summaryrefslogtreecommitdiff
path: root/tools/perf/scripts/python/netdev-times.py
diff options
context:
space:
mode:
authorMichal Kalderon <michal.kalderon@marvell.com>2020-01-27 15:26:15 +0200
committerDavid S. Miller <davem@davemloft.net>2020-01-27 14:35:32 +0100
commit0500a70d6e071040ffdaadebb966986afa83c5e9 (patch)
tree0905d51352b4a10aa351092ad5c55ead9b481991 /tools/perf/scripts/python/netdev-times.py
parent6459d93619b5bc21f775e7eb12bc4d051743d7aa (diff)
qed: FW 8.42.2.0 HSI changes
This patch contains several HSI changes. The changes are part of features like RDMA VF and OVS, the patch also contains a fix to how the init code determines if the dmae is ready to be used. Signed-off-by: Ariel Elior <ariel.elior@marvell.com> Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'tools/perf/scripts/python/netdev-times.py')
0 files changed, 0 insertions, 0 deletions
tion>mode:
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/Makefile4
-rw-r--r--drivers/gpu/drm/Kconfig426
-rw-r--r--drivers/gpu/drm/Kconfig.debug117
-rw-r--r--drivers/gpu/drm/Makefile228
-rw-r--r--drivers/gpu/drm/adp/Kconfig17
-rw-r--r--drivers/gpu/drm/adp/Makefile5
-rw-r--r--drivers/gpu/drm/adp/adp-mipi.c277
-rw-r--r--drivers/gpu/drm/adp/adp_drv.c614
-rw-r--r--drivers/gpu/drm/amd/acp/include/acp_gfx_if.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Kconfig91
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile133
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ObjectID.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aldebaran.c182
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h833
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c984
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h232
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c423
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c591
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c543
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h306
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c157
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c152
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.h9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c559
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c535
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h70
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c343
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c835
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c384
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c243
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c167
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c775
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h119
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c2293
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c128
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c638
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c246
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c263
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c132
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c201
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c329
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c591
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h105
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c1618
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h88
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c79
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c447
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c813
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c371
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c5344
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_df.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c2425
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c552
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.h16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c211
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h113
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c244
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c1434
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c61
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c241
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h69
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c388
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c119
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c504
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c439
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c296
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c664
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c2127
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h391
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c1175
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h215
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c148
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c86
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c293
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h73
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c103
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c131
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c304
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c90
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h58
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c96
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c206
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c345
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h60
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c407
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.h72
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c437
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h126
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c817
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c91
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c577
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h134
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c784
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h415
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h122
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c72
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c244
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h156
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c86
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c665
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h103
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c72
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c2528
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h221
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c392
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c4227
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h479
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c1092
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h75
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h125
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c276
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h99
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c563
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h319
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c576
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h127
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c304
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h185
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c325
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c528
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h96
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c262
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h49
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c199
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_test.c250
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h121
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c1538
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h115
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c922
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h187
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c639
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h139
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h38
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c550
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h246
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c1482
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h161
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c1011
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h77
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_utils.h91
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c230
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c398
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c1305
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h334
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c1273
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h181
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c204
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c2664
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h376
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c976
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c125
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c111
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c1018
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h111
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c615
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h92
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c1107
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h217
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c1086
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h89
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h338
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c986
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v1_0.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v1_0.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v2_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v2_0.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v2_1.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v2_1.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v3_0.c139
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v3_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.c122
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atom.c118
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atom.h27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_crtc.c29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_dp.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_dp.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_encoders.c109
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_i2c.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c146
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_ih.c72
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c140
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cikd.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h997
-rw-r--r--drivers/gpu/drm/amd/amdgpu/clearstate_gfx12.h121
-rw-r--r--drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/clearstate_si.h24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cz_ih.c66
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c160
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c3784
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v6_0.c501
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c219
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v1_7.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v3_6.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v4_15.c45
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v4_15.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v4_3.c61
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v4_3.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v4_6_2.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v4_6_2.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c2918
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h91
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm125
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm124
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c7538
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c110
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm118
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c5793
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c277
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c425
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c541
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c2205
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h68
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c52
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c89
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2_cleaner_shader.asm153
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c5062
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm153
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h64
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.c516
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.c521
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c674
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c179
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c513
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c501
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c611
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c1085
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v11_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c1070
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v12_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c250
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c233
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c307
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c1251
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c59
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c206
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v5_2.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c144
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v6_0.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c132
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v7_0.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/iceland_ih.c73
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v6_0.c817
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v6_0.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v6_1.c796
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v6_1.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v7_0.c787
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v7_0.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v11_0.c393
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v11_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c145
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v12_0.c406
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v12_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c191
-rw-r--r--drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h50
-rw-r--r--drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c377
-rw-r--r--drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h49
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c120
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c175
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c369
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c170
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c878
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c1486
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h74
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c872
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c733
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c1101
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h111
-rw-r--r--drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c121
-rw-r--r--drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.c121
-rw-r--r--drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mca_v3_0.c90
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mca_v3_0.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_api_def.h443
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_userqueue.c501
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_userqueue.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v10_1.c1008
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v10_1.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c1760
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v12_0.c1942
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v12_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c82
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c871
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c62
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c48
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c664
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c597
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c570
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c746
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c647
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c77
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h144
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmsch_v5_0.h144
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c178
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c282
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_ih.c92
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h64
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c554
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c72
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c634
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v4_3.h34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c400
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_11.h33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c78
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c109
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c369
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_7.h33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c696
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_9.h33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.c537
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nvd.h211
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h82
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v10_0.c112
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v11_0.c338
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c52
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v12_0.c223
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0.c660
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c355
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v14_0.c705
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v14_0.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v3_1.c85
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c136
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c151
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c648
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_4.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c2613
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c838
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c995
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c1917
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v6_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v6_0_0_pkt_open.h5672
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c1859
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v7_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c658
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dma.c193
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_enums.h246
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_ih.c88
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sid.h1964
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c298
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sienna_cichlid.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c82
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c296
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c109
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v13_0_6.c41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v13_0_6.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c62
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c531
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.h15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15_common.h150
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15d.h156
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.c1021
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc24.c601
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc24.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ta_ras_if.h60
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h48
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h62
-rw-r--r--drivers/gpu/drm/amd/amdgpu/tonga_ih.c88
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v12_0.c742
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v12_0.h105
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_1.c36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_1.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_7.c414
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_7.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_10.c458
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_10.h75
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_14.c160
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_14.h51
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_7.c187
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_7.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c434
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c102
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c71
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c78
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c102
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c370
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v1_0.c839
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v1_0.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v2_0.c75
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c102
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v4_0.c336
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.c86
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.h44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c534
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c457
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c1549
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c1041
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c2315
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c2131
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c1723
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c1442
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c1729
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h39
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.c81
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega20_ih.c229
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c177
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vid.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h218
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c398
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vpe_v6_1.h29
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Kconfig17
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Makefile16
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c31
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h4331
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm845
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm1136
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm339
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c2441
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c1044
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.h55
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c845
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h230
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c158
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h293
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.c1152
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.h142
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c85
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c2003
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c2485
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h156
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c153
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c57
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c90
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c90
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c79
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c207
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c242
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c751
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.h7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c96
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c386
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c418
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c428
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c106
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_iommu.c359
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_iommu.h83
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c77
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.c585
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.h19
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_module.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c197
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h92
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c175
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c222
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c569
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c459
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c760
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c205
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c248
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c221
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c40
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pasid.c69
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h82
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h290
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h726
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c971
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c758
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_queue.c388
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c263
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h37
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c2145
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.h99
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c1384
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h100
-rw-r--r--drivers/gpu/drm/amd/amdkfd/soc15_int.h5
-rw-r--r--drivers/gpu/drm/amd/amdxcp/Makefile25
-rw-r--r--drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c152
-rw-r--r--drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h30
-rw-r--r--drivers/gpu/drm/amd/display/Kconfig44
-rw-r--r--drivers/gpu/drm/amd/display/Makefile20
-rw-r--r--drivers/gpu/drm/amd/display/TODO110
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/Makefile28
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c9508
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h488
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c1778
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c209
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.h36
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c687
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h70
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c804
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h51
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c1720
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.h3
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c503
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h18
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c1094
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c151
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.h7
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h7
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c1402
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h54
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c1950
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h68
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c257
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c171
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h7
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_quirks.c178
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c209
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.h49
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c9
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h94
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c215
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h36
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c66
-rw-r--r--drivers/gpu/drm/amd/display/dc/Makefile58
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/Makefile9
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/bw_fixed.c (renamed from drivers/gpu/drm/amd/display/dc/calcs/bw_fixed.c)27
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/calcs_logger.h (renamed from drivers/gpu/drm/amd/display/dc/calcs/calcs_logger.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/conversion.c58
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/conversion.h7
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/custom_float.c173
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/dc_common.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c (renamed from drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c)68
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c38
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/vector.c19
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/bios_parser.c199
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c1294
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/bios_parser_common.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/bios_parser_types_internal2.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/command_table.c301
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/command_table.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/command_table2.c111
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/command_table2.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c104
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/command_table_helper.h8
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c15
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/dce110/command_table_helper_dce110.c104
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper2_dce112.c106
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper_dce112.c104
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/dce60/command_table_helper_dce60.c104
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/dce80/command_table_helper_dce80.c104
-rw-r--r--drivers/gpu/drm/amd/display/dc/calcs/Makefile68
-rw-r--r--drivers/gpu/drm/amd/display/dc/calcs/custom_float.c197
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile77
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c150
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c24
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c44
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.c20
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dce120/dce120_clk_mgr.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c36
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_clk.c79
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c30
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c104
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c91
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c386
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h8
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c59
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c171
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.h60
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c29
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.h67
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_smu11_driver_if.h75
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.c36
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.h31
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.c118
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c27
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c202
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c193
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h9
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c48
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c1051
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h73
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c398
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h110
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c820
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.h50
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c366
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.h130
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c680
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.h49
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_smu.c344
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_smu.h139
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dalsmc.h74
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c1233
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.h42
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c308
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.h46
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_smu13_driver_if.h64
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c141
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c1588
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h67
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c508
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.h220
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dalsmc.h55
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c1631
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h117
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c472
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h41
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_smu14_driver_if.h66
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c5301
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_debug.c216
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c3701
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link.c4830
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c780
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c6128
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c246
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c962
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c272
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c530
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c906
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_resource.c4135
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_sink.c12
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_stat.c32
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_state.c1077
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_stream.c884
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_surface.c130
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc.h2514
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_bios_types.h18
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_ddc_types.h31
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c2325
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h318
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_dp_types.h769
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_dsc.h23
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_fused_io.c148
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_fused_io.h31
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_hdmi_types.h133
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_helper.c180
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_hw_types.h443
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_link.h447
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_plane.h51
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_plane_priv.h35
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_spl_translate.c231
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_spl_translate.h23
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_state.h77
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_state_priv.h128
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_stream.h275
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_stream_priv.h75
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_trace.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_types.h666
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/Makefile103
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.c)2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h526
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn201/dcn201_dccg.c (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dccg.c)3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn201/dcn201_dccg.h (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dccg.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn21/dcn21_dccg.c (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_dccg.c)2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn21/dcn21_dccg.h (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_dccg.h)1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn30/dcn30_dccg.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dccg.c)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn30/dcn30_dccg.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dccg.h)21
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn301/dcn301_dccg.c (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dccg.c)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn301/dcn301_dccg.h (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dccg.h)6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn302/dcn302_dccg.h (renamed from drivers/gpu/drm/amd/display/dc/dcn302/dcn302_dccg.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn303/dcn303_dccg.h64
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.c878
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.h (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h)72
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.c407
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.h211
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn32/dcn32_dccg.c375
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn32/dcn32_dccg.h125
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c2486
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h256
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c915
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h249
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/Makefile6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_abm.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_abm.h132
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_audio.c332
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_audio.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_aux.c82
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_aux.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c323
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h41
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c48
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_i2c.c26
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_i2c.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c103
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h10
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c20
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_ipp.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c130
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h19
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c50
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_opp.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_opp.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c46
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.h19
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_transform.c28
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_transform.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c282
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c318
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h54
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c51
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.h12
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c23
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c164
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h12
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c473
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h42
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce100/Makefile46
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c142
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.h50
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/Makefile6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c16
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c3130
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h95
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c10
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_opp_regamma_v.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c224
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_transform_v.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce112/Makefile5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce112/dce112_compressor.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce112/dce112_hw_sequencer.c163
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce120/Makefile5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.c271
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c126
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce60/Makefile5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce60/dce60_hw_sequencer.c432
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce80/Makefile5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c54
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/Makefile7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c73
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dwb.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dwb.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c3775
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h209
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c32
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_ipp.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h717
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/Makefile35
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h273
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb.c17
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb.h363
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb_scl.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c2634
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c4106
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn201/Makefile33
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn201/dcn201_link_encoder.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn201/dcn201_link_encoder.h14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn201/dcn201_mpc.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn201/dcn201_opp.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn201/dcn201_opp.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn21/Makefile31
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c246
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn21/dcn21_link_encoder.c10
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c2289
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/Makefile56
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_afmt.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c282
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c529
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c1011
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.h15
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c2918
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_vpg.h23
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn301/Makefile14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c11
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn302/Makefile43
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c1778
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn303/Makefile39
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn303/dcn303_dccg.h30
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn303/dcn303_hwseq.c45
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn303/dcn303_hwseq.h18
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c20
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.h15
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c1720
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.h17
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/Makefile41
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c54
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c660
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c619
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c287
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c65
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c2493
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h52
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_vpg.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_vpg.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/Makefile117
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_link_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c)43
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_link_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h)36
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c)48
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h)57
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_link_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c)1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_link_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h)6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c)25
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_link_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.c)3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_link_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.h)3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_stream_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c)101
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_stream_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.h)53
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.c)1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c)196
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.h)32
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c506
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.h355
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_link_encoder.c328
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_link_encoder.h53
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.c493
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.h206
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.c190
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.h42
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.c391
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.h188
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c520
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.h332
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_link_encoder.c322
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_link_encoder.h134
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c856
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.h240
-rw-r--r--drivers/gpu/drm/amd/display/dc/dm_cp_psp.h7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dm_helpers.h68
-rw-r--r--drivers/gpu/drm/amd/display/dc/dm_pp_smu.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dm_services.h31
-rw-r--r--drivers/gpu/drm/amd/display/dc/dm_services_types.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/Makefile107
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.c (renamed from drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c)22
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.h (renamed from drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_math.c (renamed from drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_math.c)16
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c (renamed from drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c)305
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dc_features.h9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c159
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h32
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c2459
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h55
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c43
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c27
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c32
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c32
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c27
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c40
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c739
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h73
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c447
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.h11
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c128
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c178
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.c367
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.h32
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c380
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.h32
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c827
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h59
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c1485
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c147
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c432
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h40
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c7343
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.h44
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c1678
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.h70
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c3613
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h80
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c3762
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h64
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c6350
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h1170
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c614
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.h70
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c931
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c620
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h44
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c639
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.h19
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h121
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c44
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h20
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h182
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c223
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h307
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h17
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dsc/qp_tables.h36
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.c35
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/Makefile140
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/cmntypes.h94
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.c10337
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.h204
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core_structs.h2032
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_lib_defines.h79
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_util.c798
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_util.h74
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.c929
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.h28
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.c516
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.h50
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.c466
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.h135
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/bounding_boxes/dcn4_soc_bb.h372
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml2_external_lib_deps.h10
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top.h46
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_dchub_registers.h191
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_display_cfg_types.h526
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_policy_types.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_soc_parameter_types.h215
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_types.h744
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.c661
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.h12
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c13342
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.h39
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_factory.c39
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_factory.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_shared_types.h2341
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.c788
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.h43
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c785
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.c50
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_dcn4.c198
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_dcn4.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.c39
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn3.c706
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn3.h22
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c2390
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.c83
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.c147
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.h25
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_interfaces.c49
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_legacy.c10
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_legacy.h9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.c1170
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.h14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/inc/dml2_debug.h189
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/inc/dml2_internal_shared_types.h1010
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_resource_mgmt.c1174
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_resource_mgmt.h52
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_types.h43
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_internal_types.h157
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_mall_phantom.c911
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_mall_phantom.h52
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_policy.c311
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_policy.h47
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_translation_helper.c1528
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_translation_helper.h41
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_utils.c560
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_utils.h149
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_wrapper.c704
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml2_wrapper.h309
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml_assert.h32
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml_depedencies.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml_display_rq_dlg_calc.c573
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml_display_rq_dlg_calc.h63
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2_0/dml_logging.h32
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/Makefile83
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c)81
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h)16
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c)80
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_dscl.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c)116
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn20/dcn20_dpp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c)106
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn20/dcn20_dpp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h)14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn20/dcn20_dpp_cm.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c)135
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn201/dcn201_dpp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.c)26
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn201/dcn201_dpp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c)252
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h)29
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp_cm.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c)86
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn32/dcn32_dpp.c239
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn32/dcn32_dpp.h45
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c150
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.h67
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c429
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.h740
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c242
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c1186
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/Makefile35
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c495
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c)162
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.h)61
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c142
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.h59
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c394
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.h346
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dsc.h (renamed from drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h)14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dscc_types.h7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c30
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c17
-rw-r--r--drivers/gpu/drm/amd/display/dc/dwb/Makefile46
-rw-r--r--drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_cm_common.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.h)4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c)40
-rw-r--r--drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h)30
-rw-r--r--drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb_cm.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c)5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.c57
-rw-r--r--drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.h61
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/Makefile29
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_factory_dcn20.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_translate_dcn20.c17
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.c30
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn30/hw_factory_dcn30.c12
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn30/hw_factory_dcn30.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn30/hw_translate_dcn30.c25
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn30/hw_translate_dcn30.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn315/hw_factory_dcn315.c260
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn315/hw_factory_dcn315.h31
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn315/hw_translate_dcn315.c374
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn315/hw_translate_dcn315.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_factory_dcn32.c271
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_factory_dcn32.h31
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_translate_dcn32.c349
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_translate_dcn32.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.c264
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.h11
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_translate_dcn401.c335
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_translate_dcn401.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/ddc_regs.h47
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/diagnostics/hw_factory_diag.c62
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/diagnostics/hw_factory_diag.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/diagnostics/hw_translate_diag.c41
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/diagnostics/hw_translate_diag.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/gpio_base.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c46
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/hpd_regs.h10
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/hw_ddc.c23
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c35
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/hw_generic.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/hw_hpd.c12
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c30
-rw-r--r--drivers/gpu/drm/amd/display/dc/hdcp/Makefile2
-rw-r--r--drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c28
-rw-r--r--drivers/gpu/drm/amd/display/dc/hpo/Makefile50
-rw-r--r--drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_link_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.c)30
-rw-r--r--drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_link_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.h)19
-rw-r--r--drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c)86
-rw-r--r--drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h)12
-rw-r--r--drivers/gpu/drm/amd/display/dc/hpo/dcn32/dcn32_hpo_dp_link_encoder.c89
-rw-r--r--drivers/gpu/drm/amd/display/dc/hpo/dcn32/dcn32_hpo_dp_link_encoder.h68
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/Makefile104
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c)41
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h)132
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn20/dcn20_hubbub.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c)51
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn20/dcn20_hubbub.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.h)5
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn201/dcn201_hubbub.c (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubbub.c)4
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn201/dcn201_hubbub.h (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubbub.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn21/dcn21_hubbub.c (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c)24
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn21/dcn21_hubbub.h (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.h)8
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.c)21
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.h)4
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn301/dcn301_hubbub.c (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hubbub.c)2
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn301/dcn301_hubbub.h (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hubbub.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c)187
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.h (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.h)14
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c1062
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.h166
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c664
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.h172
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c1271
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h206
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/Makefile97
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c)87
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h)175
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c)219
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.h)81
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubp.c)16
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubp.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c)30
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c678
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.h)19
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c)33
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.h)9
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c246
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.h70
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c244
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.h76
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c1097
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h373
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/Makefile202
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.c (renamed from drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h)151
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce100/dce100_hwseq.c172
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce100/dce100_hwseq.h54
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c3503
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h125
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce112/dce112_hwseq.c160
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce112/dce112_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dce112/dce112_hw_sequencer.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce120/dce120_hwseq.c270
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce120/dce120_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce60/dce60_hwseq.c433
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce60/dce60_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dce60/dce60_hw_sequencer.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce80/dce80_hwseq.c55
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce80/dce80_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c4147
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h221
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c)10
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c3256
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h)41
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c)24
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hwseq.c)109
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hwseq.h)2
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.c)15
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.h (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c302
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.h)5
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c)25
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.h (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c1282
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h)25
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c)31
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_hwseq.c (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hwseq.c)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hwseq.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c)25
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.h (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.h)4
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_hwseq.c (renamed from drivers/gpu/drm/amd/display/dc/dcn302/dcn302_hwseq.c)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dcn302/dcn302_hwseq.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.c (renamed from drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.c)4
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.h (renamed from drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_hwseq.c64
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_hwseq.h37
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.c40
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c717
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h)5
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c)29
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.h (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c603
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h52
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c165
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c1848
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h135
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c174
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c1728
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h112
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c184
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn351/Makefile28
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_hwseq.c182
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_hwseq.h41
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c172
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c4045
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h213
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c177
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.h12
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h2025
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h242
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/clock_source.h8
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/core_status.h9
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/core_types.h368
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h133
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h220
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/dc_link_dpia.h99
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h25
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/abm.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/audio.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/aux_engine.h10
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h88
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h187
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/cursor_reg_cache.h121
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h225
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h125
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h16
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h144
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h48
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h123
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h81
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h52
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h29
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h968
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/opp.h54
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/optc.h197
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h55
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h45
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h223
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/transform.h20
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/vpg.h53
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h273
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h164
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h21
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/link_hwss.h92
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/link_service.h350
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/reg_helper.h38
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/resource.h500
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/soc_and_ip_translator.h24
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/Makefile65
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c32
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c43
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dce60/irq_service_dce60.c43
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dce80/irq_service_dce80.c45
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c52
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c72
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c61
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c93
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c69
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn302/irq_service_dcn302.c35
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c69
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.h19
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn31/irq_service_dcn31.c52
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn314/irq_service_dcn314.c406
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn314/irq_service_dcn314.h35
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn315/irq_service_dcn315.c411
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn315/irq_service_dcn315.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn32/irq_service_dcn32.c436
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn32/irq_service_dcn32.h35
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn35/irq_service_dcn35.c400
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn35/irq_service_dcn35.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn351/irq_service_dcn351.c382
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn351/irq_service_dcn351.h12
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn36/irq_service_dcn36.c381
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn36/irq_service_dcn36.h12
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.c414
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/irq_service.c92
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/irq_service.h14
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq_types.h31
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/Makefile64
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c1019
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.h44
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.c172
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.h63
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c353
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h63
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.c199
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.h37
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c176
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.h37
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c228
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.h62
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.c233
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_detection.c1644
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_detection.h43
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_dpms.c2674
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_dpms.h53
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_factory.c916
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_factory.h31
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_frl.c62
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_resource.c114
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_resource.h32
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_validation.c626
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_validation.h45
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.c604
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.h106
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c2599
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h117
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c171
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.h47
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c448
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h112
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c548
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.h41
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c210
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.h60
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c1816
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h201
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_128b_132b.c267
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_128b_132b.h43
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c491
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.h62
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_auxless.c80
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_auxless.h35
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c1046
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.h60
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c553
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.h45
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c250
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.h (renamed from drivers/gpu/drm/amd/display/dc/inc/link_dpcd.h)5
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c1426
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h81
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_hpd.c240
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_hpd.h54
-rw-r--r--drivers/gpu/drm/amd/display/dc/mmhubbub/Makefile54
-rw-r--r--drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.c)7
-rw-r--r--drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.h)7
-rw-r--r--drivers/gpu/drm/amd/display/dc/mmhubbub/dcn32/dcn32_mmhubbub.c239
-rw-r--r--drivers/gpu/drm/amd/display/dc/mmhubbub/dcn32/dcn32_mmhubbub.h211
-rw-r--r--drivers/gpu/drm/amd/display/dc/mmhubbub/dcn35/dcn35_mmhubbub.c59
-rw-r--r--drivers/gpu/drm/amd/display/dc/mmhubbub/dcn35/dcn35_mmhubbub.h75
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/Makefile72
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn10/dcn10_mpc.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c)32
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn10/dcn10_mpc.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h)4
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c)85
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c)275
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h)271
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.c1052
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.h402
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c635
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.h257
-rw-r--r--drivers/gpu/drm/amd/display/dc/opp/Makefile51
-rw-r--r--drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c)79
-rw-r--r--drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h)8
-rw-r--r--drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.c)52
-rw-r--r--drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.h)12
-rw-r--r--drivers/gpu/drm/amd/display/dc/opp/dcn35/dcn35_opp.c66
-rw-r--r--drivers/gpu/drm/amd/display/dc/opp/dcn35/dcn35_opp.h69
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/Makefile114
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c)368
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h686
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c)117
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h)15
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.c (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.c)17
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.h (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.h)3
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c)113
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h)32
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.c190
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.h36
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c526
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.h (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h)24
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.c275
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.h262
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c384
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h195
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c538
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.h83
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c552
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h196
-rw-r--r--drivers/gpu/drm/amd/display/dc/os_types.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/pg/Makefile35
-rw-r--r--drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.c573
-rw-r--r--drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.h195
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/Makefile225
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c)77
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h)9
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c)41
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c)56
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h)4
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c)49
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c)131
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c)107
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c)350
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h)5
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c2782
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h)48
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c)141
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c1720
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.h)11
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c2636
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h)17
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c)186
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c1526
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.h)3
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c1458
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.h38
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c2273
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h106
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c2159
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h50
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c2181
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.h44
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c2048
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.h44
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c2931
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h1281
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c780
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c2078
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.h45
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c2223
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h315
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c2196
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.h23
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c2196
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.h73
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c2282
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h656
-rw-r--r--drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/Makefile19
-rw-r--r--drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.c304
-rw-r--r--drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.h22
-rw-r--r--drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.c27
-rw-r--r--drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.h16
-rw-r--r--drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/soc_and_ip_translator.c37
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/Makefile33
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c1925
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl.h27
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl_filters.c15
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl_filters.h15
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl_isharp_filters.c553
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl_isharp_filters.h42
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl_scl_easf_filters.c2586
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl_scl_easf_filters.h37
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl_scl_filters.c1233
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl_scl_filters.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl_types.h560
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/spl_custom_float.c151
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/spl_custom_float.h29
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/spl_debug.h30
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.c493
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.h522
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/spl_os_types.h56
-rw-r--r--drivers/gpu/drm/amd/display/dc/virtual/Makefile2
-rw-r--r--drivers/gpu/drm/amd/display/dc/virtual/virtual_link_encoder.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/virtual/virtual_link_hwss.c56
-rw-r--r--drivers/gpu/drm/amd/display/dc/virtual/virtual_link_hwss.h35
-rw-r--r--drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c2
-rw-r--r--drivers/gpu/drm/amd/display/dmub/dmub_srv.h423
-rw-r--r--drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h4766
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/Makefile7
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c92
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h7
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.c6
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.h4
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn30.c3
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn30.h3
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn303.c19
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn303.h19
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c147
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h14
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn314.c67
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn314.h35
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn315.c62
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn315.h68
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn316.c62
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn316.h33
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c560
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.h273
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c611
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.h290
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn351.c34
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn351.h13
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn36.c34
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn36.h13
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c678
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h290
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_reg.h1
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c824
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c20
-rw-r--r--drivers/gpu/drm/amd/display/include/audio_types.h17
-rw-r--r--drivers/gpu/drm/amd/display/include/bios_parser_types.h24
-rw-r--r--drivers/gpu/drm/amd/display/include/dal_asic_id.h54
-rw-r--r--drivers/gpu/drm/amd/display/include/dal_types.h10
-rw-r--r--drivers/gpu/drm/amd/display/include/ddc_service_types.h14
-rw-r--r--drivers/gpu/drm/amd/display/include/dpcd_defs.h54
-rw-r--r--drivers/gpu/drm/amd/display/include/fixed31_32.h10
-rw-r--r--drivers/gpu/drm/amd/display/include/gpio_service_interface.h3
-rw-r--r--drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h27
-rw-r--r--drivers/gpu/drm/amd/display/include/grph_object_defs.h2
-rw-r--r--drivers/gpu/drm/amd/display/include/grph_object_id.h14
-rw-r--r--drivers/gpu/drm/amd/display/include/hdcp_msg_types.h108
-rw-r--r--drivers/gpu/drm/amd/display/include/hdcp_types.h103
-rw-r--r--drivers/gpu/drm/amd/display/include/i2caux_interface.h82
-rw-r--r--drivers/gpu/drm/amd/display/include/link_service_types.h73
-rw-r--r--drivers/gpu/drm/amd/display/include/logger_interface.h9
-rw-r--r--drivers/gpu/drm/amd/display/include/logger_types.h150
-rw-r--r--drivers/gpu/drm/amd/display/include/set_mode_types.h8
-rw-r--r--drivers/gpu/drm/amd/display/include/signal_types.h53
-rw-r--r--drivers/gpu/drm/amd/display/modules/color/color_gamma.c568
-rw-r--r--drivers/gpu/drm/amd/display/modules/color/color_gamma.h14
-rw-r--r--drivers/gpu/drm/amd/display/modules/freesync/freesync.c408
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c49
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h16
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c47
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c95
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c55
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c99
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.c122
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h18
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c14
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h15
-rw-r--r--drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h69
-rw-r--r--drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h180
-rw-r--r--drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h40
-rw-r--r--drivers/gpu/drm/amd/display/modules/inc/mod_stats.h6
-rw-r--r--drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c121
-rw-r--r--drivers/gpu/drm/amd/display/modules/power/power_helpers.c327
-rw-r--r--drivers/gpu/drm/amd/display/modules/power/power_helpers.h34
-rw-r--r--drivers/gpu/drm/amd/display/modules/vmid/vmid.c2
-rw-r--r--drivers/gpu/drm/amd/include/amd_acpi.h4
-rw-r--r--drivers/gpu/drm/amd/include/amd_cper.h269
-rw-r--r--drivers/gpu/drm/amd/include/amd_pcie.h18
-rw-r--r--drivers/gpu/drm/amd/include/amd_shared.h297
-rw-r--r--drivers/gpu/drm/amd/include/amdgpu_reg_state.h153
-rw-r--r--drivers/gpu/drm/amd/include/arct_ip_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/athub/athub_1_8_0_offset.h411
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/athub/athub_1_8_0_sh_mask.h1807
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/athub/athub_3_0_0_offset.h259
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/athub/athub_3_0_0_sh_mask.h1246
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/athub/athub_4_1_0_offset.h287
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/athub/athub_4_1_0_sh_mask.h1348
-rw-r--r--[-rwxr-xr-x]drivers/gpu/drm/amd/include/asic_reg/clk/clk_11_0_1_offset.h0
-rw-r--r--[-rwxr-xr-x]drivers/gpu/drm/amd/include/asic_reg/clk/clk_11_0_1_sh_mask.h0
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dce/dce_10_0_sh_mask.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_0_d.h1
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_0_sh_mask.h4
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_2_sh_mask.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_d.h7
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h6
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_1_offset.h6193
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_1_sh_mask.h22091
-rwxr-xr-xdrivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_offset.h6193
-rwxr-xr-xdrivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h22091
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_offset.h142
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h78
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_1_sh_mask.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_2_offset.h114
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_2_sh_mask.h59
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_3_offset.h44
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_3_sh_mask.h36
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_2_offset.h4
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_2_sh_mask.h21
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_4_offset.h15245
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_4_sh_mask.h61832
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_5_offset.h15195
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_5_sh_mask.h62071
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_6_offset.h15686
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_6_sh_mask.h62727
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_2_0_offset.h14737
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_2_0_sh_mask.h222948
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_2_1_offset.h14596
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_2_1_sh_mask.h56598
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_0_offset.h15279
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_0_sh_mask.h53485
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_1_offset.h15259
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_1_sh_mask.h53464
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_6_0_offset.h15485
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_6_0_sh_mask.h61940
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_4_1_0_offset.h16662
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_4_1_0_sh_mask.h145870
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_offset.h28
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_sh_mask.h28
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/df/df_4_3_offset.h30
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/df/df_4_3_sh_mask.h157
-rw-r--r--[-rwxr-xr-x]drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_2_0_3_offset.h0
-rw-r--r--[-rwxr-xr-x]drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_2_0_3_sh_mask.h0
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_3_0_0_offset.h (renamed from drivers/gpu/drm/amd/include/asic_reg/dcn/dpcs_3_0_0_offset.h)31
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_3_0_0_sh_mask.h (renamed from drivers/gpu/drm/amd/include/asic_reg/dcn/dpcs_3_0_0_sh_mask.h)11
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_3_0_3_offset.h (renamed from drivers/gpu/drm/amd/include/asic_reg/dcn/dpcs_3_0_3_offset.h)0
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_3_0_3_sh_mask.h (renamed from drivers/gpu/drm/amd/include/asic_reg/dcn/dpcs_3_0_3_sh_mask.h)4
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_3_1_4_offset.h7215
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_3_1_4_sh_mask.h55194
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_4_2_0_offset.h10
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_4_2_2_offset.h11957
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_4_2_2_sh_mask.h103633
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_4_2_3_offset.h11969
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_4_2_3_sh_mask.h136141
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h30
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h123
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h122
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h1527
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_default.h6114
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h11685
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_sh_mask.h41664
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_3_offset.h12094
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_3_sh_mask.h44690
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_5_0_offset.h10002
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_5_0_sh_mask.h36579
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_12_0_0_offset.h11061
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_12_0_0_sh_mask.h40550
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h4
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_3_offset.h7450
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_3_sh_mask.h31649
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gca/gfx_6_0_d.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_4_0_sh_mask.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_4_4_2_offset.h219
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_4_4_2_sh_mask.h663
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_5_2_1_offset.h217
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_5_2_1_sh_mask.h684
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_6_0_0_offset.h209
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_6_0_0_sh_mask.h646
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_7_0_0_offset.h219
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_7_0_0_sh_mask.h735
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/lsdma/lsdma_6_0_0_offset.h391
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/lsdma/lsdma_6_0_0_sh_mask.h1439
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/lsdma/lsdma_7_0_0_offset.h388
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/lsdma/lsdma_7_0_0_sh_mask.h1411
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_1_0_offset.h23
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_1_8_0_offset.h3366
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_1_8_0_sh_mask.h22628
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_3_0_0_offset.h1529
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_3_0_0_sh_mask.h7478
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_3_0_1_offset.h1769
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_3_0_1_sh_mask.h7483
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_3_0_2_offset.h1425
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_3_0_2_sh_mask.h7228
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_3_3_0_offset.h1395
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_3_3_0_sh_mask.h6722
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_4_1_0_offset.h1341
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_4_1_0_sh_mask.h6943
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_1_offset.h36
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_1_sh_mask.h56
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_offset.h7
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_sh_mask.h12
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_0_offset.h461
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_0_sh_mask.h682
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_2_offset.h48
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_2_sh_mask.h100
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_4_offset.h402
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_4_sh_mask.h595
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_5_offset.h455
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_5_sh_mask.h672
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_6_offset.h456
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_6_sh_mask.h702
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_8_offset.h410
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_8_sh_mask.h603
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_14_0_0_offset.h359
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_14_0_0_sh_mask.h534
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_14_0_2_offset.h468
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_14_0_2_sh_mask.h692
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbif/nbif_6_3_1_offset.h11287
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbif/nbif_6_3_1_sh_mask.h32806
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_4_3_0_offset.h17381
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_4_3_0_sh_mask.h82050
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h9406
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h57899
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_2_0_offset.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_2_0_sh_mask.h12
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_offset.h29660
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_sh_mask.h154426
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_9_0_offset.h10004
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_9_0_sh_mask.h38900
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h23
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_sh_mask.h41
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_sh_mask.h11
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_4_2_offset.h263
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_4_2_sh_mask.h995
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/osssys_6_0_0_offset.h267
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/osssys_6_0_0_sh_mask.h979
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/osssys_6_1_0_offset.h279
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/osssys_6_1_0_sh_mask.h1019
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/osssys_7_0_0_offset.h279
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/osssys_7_0_0_sh_mask.h1029
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/pcie/pcie_6_1_0_offset.h630
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/pcie/pcie_6_1_0_sh_mask.h4250
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/sdma/sdma_4_4_2_offset.h1113
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/sdma/sdma_4_4_2_sh_mask.h3300
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_d.h44
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_sh_mask.h188
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_offset.h102
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_sh_mask.h184
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_13_0_3_offset.h177
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_13_0_3_sh_mask.h428
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_13_0_6_offset.h517
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_13_0_6_sh_mask.h1178
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_14_0_2_offset.h511
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_14_0_2_sh_mask.h1106
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/thm/thm_14_0_2_offset.h228
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/thm/thm_14_0_2_sh_mask.h940
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/umc/umc_12_0_0_offset.h33
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/umc/umc_12_0_0_sh_mask.h95
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_7_0_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_offset.h35
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_sh_mask.h97
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_14_0_offset.h29
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_14_0_sh_mask.h37
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_4_0_sh_mask.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vce/vce_1_0_d.h5
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vce/vce_1_0_sh_mask.h10
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h14
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_sh_mask.h51
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_4_0_0_offset.h2032
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_4_0_0_sh_mask.h8937
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_4_0_3_offset.h2367
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_4_0_3_sh_mask.h10919
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_4_0_5_offset.h1797
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_4_0_5_sh_mask.h8614
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_5_0_0_offset.h1694
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_5_0_0_sh_mask.h7666
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vpe/vpe_6_1_0_offset.h1553
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vpe/vpe_6_1_0_sh_mask.h4393
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/xgmi/xgmi_6_1_0_sh_mask.h87
-rw-r--r--drivers/gpu/drm/amd/include/atom-bits.h2
-rw-r--r--drivers/gpu/drm/amd/include/atombios.h46
-rw-r--r--drivers/gpu/drm/amd/include/atomfirmware.h624
-rw-r--r--drivers/gpu/drm/amd/include/beige_goby_ip_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/cgs_common.h23
-rw-r--r--drivers/gpu/drm/amd/include/cyan_skillfish_ip_offset.h12
-rw-r--r--drivers/gpu/drm/amd/include/dimgrey_cavefish_ip_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/discovery.h301
-rw-r--r--drivers/gpu/drm/amd/include/dm_pp_interface.h10
-rw-r--r--drivers/gpu/drm/amd/include/ivsrcid/gfx/irqsrcs_gfx_11_0_0.h80
-rw-r--r--drivers/gpu/drm/amd/include/ivsrcid/gfx/irqsrcs_gfx_12_0_0.h74
-rw-r--r--drivers/gpu/drm/amd/include/ivsrcid/isp/irqsrcs_isp_4_1.h62
-rw-r--r--drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_2_0.h4
-rw-r--r--drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_4_0.h44
-rw-r--r--drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h47
-rw-r--r--drivers/gpu/drm/amd/include/ivsrcid/vpe/irqsrcs_vpe_6_1.h40
-rw-r--r--drivers/gpu/drm/amd/include/kgd_kfd_interface.h150
-rw-r--r--drivers/gpu/drm/amd/include/kgd_pp_interface.h1042
-rw-r--r--drivers/gpu/drm/amd/include/mes_v11_api_def.h709
-rw-r--r--drivers/gpu/drm/amd/include/mes_v12_api_def.h907
-rw-r--r--drivers/gpu/drm/amd/include/navi12_ip_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/navi14_ip_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/pptable.h95
-rw-r--r--drivers/gpu/drm/amd/include/renoir_ip_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/sienna_cichlid_ip_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/soc15_hw_ip.h2
-rw-r--r--drivers/gpu/drm/amd/include/soc15_ih_clientid.h29
-rw-r--r--drivers/gpu/drm/amd/include/soc21_enum.h22477
-rw-r--r--drivers/gpu/drm/amd/include/soc24_enum.h21073
-rw-r--r--drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h446
-rw-r--r--drivers/gpu/drm/amd/include/v10_structs.h3
-rw-r--r--drivers/gpu/drm/amd/include/v11_structs.h1189
-rw-r--r--drivers/gpu/drm/amd/include/v12_structs.h1189
-rw-r--r--drivers/gpu/drm/amd/include/v9_structs.h30
-rw-r--r--drivers/gpu/drm/amd/include/vangogh_ip_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/vega10_ip_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/vega20_ip_offset.h78
-rw-r--r--drivers/gpu/drm/amd/include/yellow_carp_offset.h11
-rw-r--r--drivers/gpu/drm/amd/pm/Makefile14
-rw-r--r--drivers/gpu/drm/amd/pm/amdgpu_dpm.c3013
-rw-r--r--drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c95
-rw-r--r--drivers/gpu/drm/amd/pm/amdgpu_pm.c3798
-rw-r--r--drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h457
-rw-r--r--drivers/gpu/drm/amd/pm/inc/amdgpu_dpm_internal.h28
-rw-r--r--drivers/gpu/drm/amd/pm/inc/amdgpu_pm.h45
-rw-r--r--drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h1402
-rw-r--r--drivers/gpu/drm/amd/pm/inc/smu9_driver_if.h486
-rw-r--r--drivers/gpu/drm/amd/pm/inc/smu_v13_0.h272
-rw-r--r--drivers/gpu/drm/amd/pm/inc/vega12/smu9_driver_if.h767
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/Makefile32
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/cik_dpm.h (renamed from drivers/gpu/drm/amd/pm/powerplay/cik_dpm.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c (renamed from drivers/gpu/drm/amd/pm/powerplay/kv_dpm.c)263
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.h (renamed from drivers/gpu/drm/amd/pm/powerplay/kv_dpm.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/kv_smc.c (renamed from drivers/gpu/drm/amd/pm/powerplay/kv_smc.c)0
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c1011
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.h37
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/ppsmc.h (renamed from drivers/gpu/drm/amd/pm/powerplay/ppsmc.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/r600_dpm.h (renamed from drivers/gpu/drm/amd/pm/powerplay/r600_dpm.h)3
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c (renamed from drivers/gpu/drm/amd/pm/powerplay/si_dpm.c)789
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.h465
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c (renamed from drivers/gpu/drm/amd/pm/powerplay/si_smc.c)66
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/sislands_smc.h (renamed from drivers/gpu/drm/amd/pm/powerplay/sislands_smc.h)63
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/Makefile4
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c579
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/ci_baco.c21
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/common_baco.c2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/fiji_baco.c24
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/hardwaremanager.c14
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/hwmgr.c6
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/polaris_baco.c30
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c23
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c475
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.h8
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomfwctrl.c82
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomfwctrl.h6
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppevvmath.h555
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h39
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c4
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c6
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c89
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c5
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_clockpowergating.c2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_clockpowergating.h1
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c260
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_powertune.c27
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_thermal.c8
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c58
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c5
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c31
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.h2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/tonga_baco.c30
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_baco.c26
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c401
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_powertune.c130
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_pptable.h42
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_processpptables.c7
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c31
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_baco.c12
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c61
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.h3
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_pptable.h11
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_processpptables.c6
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_thermal.c4
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c12
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c103
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.h6
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_pptable.h19
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_processpptables.c575
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_thermal.c10
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/amd_powerplay.h (renamed from drivers/gpu/drm/amd/pm/inc/amd_powerplay.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/cz_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/cz_ppsmc.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/fiji_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/fiji_ppsmc.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/hardwaremanager.h (renamed from drivers/gpu/drm/amd/pm/inc/hardwaremanager.h)6
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h (renamed from drivers/gpu/drm/amd/pm/inc/hwmgr.h)45
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/polaris10_pwrvirus.h (renamed from drivers/gpu/drm/amd/pm/inc/polaris10_pwrvirus.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/power_state.h (renamed from drivers/gpu/drm/amd/pm/inc/power_state.h)1
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/pp_debug.h (renamed from drivers/gpu/drm/amd/pm/inc/pp_debug.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/pp_endian.h (renamed from drivers/gpu/drm/amd/pm/inc/pp_endian.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/pp_thermal.h (renamed from drivers/gpu/drm/amd/pm/inc/pp_thermal.h)6
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/ppinterrupt.h (renamed from drivers/gpu/drm/amd/pm/inc/ppinterrupt.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/rv_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/rv_ppsmc.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu10.h (renamed from drivers/gpu/drm/amd/pm/inc/smu10.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu10_driver_if.h (renamed from drivers/gpu/drm/amd/pm/inc/smu10_driver_if.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu11_driver_if.h (renamed from drivers/gpu/drm/amd/pm/inc/smu11_driver_if.h)2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu7.h (renamed from drivers/gpu/drm/amd/pm/inc/smu7.h)6
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu71.h (renamed from drivers/gpu/drm/amd/pm/inc/smu71.h)22
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu71_discrete.h (renamed from drivers/gpu/drm/amd/pm/inc/smu71_discrete.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu72.h (renamed from drivers/gpu/drm/amd/pm/inc/smu72.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu72_discrete.h (renamed from drivers/gpu/drm/amd/pm/inc/smu72_discrete.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu73.h (renamed from drivers/gpu/drm/amd/pm/inc/smu73.h)45
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu73_discrete.h (renamed from drivers/gpu/drm/amd/pm/inc/smu73_discrete.h)73
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu74.h (renamed from drivers/gpu/drm/amd/pm/inc/smu74.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu74_discrete.h (renamed from drivers/gpu/drm/amd/pm/inc/smu74_discrete.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu75.h (renamed from drivers/gpu/drm/amd/pm/inc/smu75.h)12
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu75_discrete.h (renamed from drivers/gpu/drm/amd/pm/inc/smu75_discrete.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu7_common.h (renamed from drivers/gpu/drm/amd/pm/inc/smu7_common.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu7_discrete.h (renamed from drivers/gpu/drm/amd/pm/inc/smu7_discrete.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu7_fusion.h (renamed from drivers/gpu/drm/amd/pm/inc/smu7_fusion.h)42
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu7_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/smu7_ppsmc.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu8.h (renamed from drivers/gpu/drm/amd/pm/inc/smu8.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu8_fusion.h (renamed from drivers/gpu/drm/amd/pm/inc/smu8_fusion.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu9.h (renamed from drivers/gpu/drm/amd/pm/inc/smu9.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu9_driver_if.h488
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu_ucode_xfer_cz.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_ucode_xfer_cz.h)2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smu_ucode_xfer_vi.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_ucode_xfer_vi.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/smumgr.h (renamed from drivers/gpu/drm/amd/pm/inc/smumgr.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/tonga_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/tonga_ppsmc.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/vega10_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/vega10_ppsmc.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/vega12/smu9_driver_if.h769
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/vega12_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/vega12_ppsmc.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/vega20_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/vega20_ppsmc.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/si_dpm.h1015
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c10
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c9
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c12
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/polaris10_smumgr.c11
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/smu10_smumgr.c23
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c49
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.h2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/smu9_smumgr.c2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/tonga_smumgr.c9
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/vega10_smumgr.c10
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/vega12_smumgr.c28
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/vega20_smumgr.c16
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/vegam_smumgr.c7
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/Makefile2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c2378
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h1812
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/aldebaran_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h)5
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/arcturus_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/arcturus_ppsmc.h)2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_arcturus.h (renamed from drivers/gpu/drm/amd/pm/inc/smu11_driver_if_arcturus.h)2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_cyan_skillfish.h (renamed from drivers/gpu/drm/amd/pm/inc/smu11_driver_if_cyan_skillfish.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_navi10.h (renamed from drivers/gpu/drm/amd/pm/inc/smu11_driver_if_navi10.h)2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_sienna_cichlid.h (renamed from drivers/gpu/drm/amd/pm/inc/smu11_driver_if_sienna_cichlid.h)154
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_vangogh.h (renamed from drivers/gpu/drm/amd/pm/inc/smu11_driver_if_vangogh.h)4
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu12_driver_if.h (renamed from drivers/gpu/drm/amd/pm/inc/smu12_driver_if.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_aldebaran.h (renamed from drivers/gpu/drm/amd/pm/inc/smu13_driver_if_aldebaran.h)37
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h1632
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h282
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_5.h139
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_6.h243
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h1622
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_yellow_carp.h (renamed from drivers/gpu/drm/amd/pm/inc/smu13_driver_if_yellow_carp.h)2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0.h1889
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h263
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_0_7_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_v11_0_7_ppsmc.h)4
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_0_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_v11_0_ppsmc.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_pmfw.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_v11_5_pmfw.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_v11_5_ppsmc.h)5
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_8_pmfw.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_v11_8_pmfw.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_8_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_v11_8_ppsmc.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v12_0_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_v12_0_ppsmc.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h150
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h378
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h146
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_1_pmfw.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_v13_0_1_pmfw.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_1_ppsmc.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_v13_0_1_ppsmc.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_4_pmfw.h137
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_4_ppsmc.h137
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_5_pmfw.h126
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_5_ppsmc.h74
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h471
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h123
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h140
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_pmfw.h194
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h141
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h150
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_11_0_cdr_table.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_11_0_cdr_table.h)6
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_types.h)118
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_v11_0.h)31
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0_7_pptable.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_v11_0_7_pptable.h)9
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0_pptable.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_v11_0_pptable.h)24
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v12_0.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_v12_0.h)2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h302
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0_0_pptable.h198
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0_7_pptable.h198
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0_pptable.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_v13_0_pptable.h)10
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h249
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h204
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c1081
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c100
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c909
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c2624
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.h8
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c328
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c690
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c305
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c24
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/Makefile3
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c738
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.h8
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c1456
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c3302
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.h28
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c1073
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c1148
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.h28
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c1140
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.h29
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c4086
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h265
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c2888
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.h28
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c375
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.h1
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/Makefile30
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c1977
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c1723
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.h28
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c2932
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.h28
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c609
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h168
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu_internal.h26
-rw-r--r--drivers/gpu/drm/amd/ras/Makefile34
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/Makefile33
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c285
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.h54
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_eeprom_i2c.c182
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_eeprom_i2c.h27
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c648
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h83
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mp1_v13_0.c94
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mp1_v13_0.h30
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_nbio_v7_9.c125
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_nbio_v7_9.h30
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_process.c190
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_process.h41
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_sys.c279
-rw-r--r--drivers/gpu/drm/amd/ras/ras_mgr/ras_sys.h110
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/Makefile44
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras.h370
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_aca.c672
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_aca.h164
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_aca_v1_0.c379
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_aca_v1_0.h71
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_cmd.c522
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_cmd.h426
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_core.c603
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_core_status.h37
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_cper.c315
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_cper.h304
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_eeprom.c1339
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_eeprom.h197
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_gfx.c70
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_gfx.h43
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_gfx_v9_0.c426
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_gfx_v9_0.h259
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_log_ring.c317
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_log_ring.h93
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_mp1.c81
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_mp1.h50
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_mp1_v13_0.c105
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_mp1_v13_0.h30
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_nbio.c96
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_nbio.h46
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_nbio_v7_9.c123
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_nbio_v7_9.h31
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_process.c322
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_process.h53
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_psp.c750
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_psp.h145
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_psp_v13_0.c46
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_psp_v13_0.h31
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_ta_if.h231
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_umc.c707
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_umc.h166
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c511
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.h314
-rw-r--r--drivers/gpu/drm/arm/Kconfig9
-rw-r--r--drivers/gpu/drm/arm/display/Kconfig4
-rw-r--r--drivers/gpu/drm/arm/display/include/malidp_utils.h2
-rw-r--r--drivers/gpu/drm/arm/display/komeda/Makefile4
-rw-r--r--drivers/gpu/drm/arm/display/komeda/d71/d71_component.c14
-rw-r--r--drivers/gpu/drm/arm/display/komeda/d71/d71_dev.c6
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_color_mgmt.c5
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_crtc.c86
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_dev.c10
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_drv.c82
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c16
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.h1
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_kms.c64
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_kms.h9
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c7
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c37
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_plane.c25
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c11
-rw-r--r--drivers/gpu/drm/arm/hdlcd_crtc.c61
-rw-r--r--drivers/gpu/drm/arm/hdlcd_drv.c166
-rw-r--r--drivers/gpu/drm/arm/hdlcd_drv.h2
-rw-r--r--drivers/gpu/drm/arm/malidp_crtc.c15
-rw-r--r--drivers/gpu/drm/arm/malidp_drv.c116
-rw-r--r--drivers/gpu/drm/arm/malidp_drv.h2
-rw-r--r--drivers/gpu/drm/arm/malidp_hw.c10
-rw-r--r--drivers/gpu/drm/arm/malidp_mw.c28
-rw-r--r--drivers/gpu/drm/arm/malidp_planes.c89
-rw-r--r--drivers/gpu/drm/arm/malidp_regs.h2
-rw-r--r--drivers/gpu/drm/armada/Kconfig2
-rw-r--r--drivers/gpu/drm/armada/Makefile3
-rw-r--r--drivers/gpu/drm/armada/armada_510.c1
-rw-r--r--drivers/gpu/drm/armada/armada_crtc.c31
-rw-r--r--drivers/gpu/drm/armada/armada_debugfs.c2
-rw-r--r--drivers/gpu/drm/armada/armada_drm.h13
-rw-r--r--drivers/gpu/drm/armada/armada_drv.c55
-rw-r--r--drivers/gpu/drm/armada/armada_fb.c14
-rw-r--r--drivers/gpu/drm/armada/armada_fb.h6
-rw-r--r--drivers/gpu/drm/armada/armada_fbdev.c112
-rw-r--r--drivers/gpu/drm/armada/armada_gem.c17
-rw-r--r--drivers/gpu/drm/armada/armada_overlay.c15
-rw-r--r--drivers/gpu/drm/armada/armada_plane.c10
-rw-r--r--drivers/gpu/drm/aspeed/Kconfig3
-rw-r--r--drivers/gpu/drm/aspeed/aspeed_gfx.h1
-rw-r--r--drivers/gpu/drm/aspeed/aspeed_gfx_crtc.c12
-rw-r--r--drivers/gpu/drm/aspeed/aspeed_gfx_drv.c56
-rw-r--r--drivers/gpu/drm/aspeed/aspeed_gfx_out.c2
-rw-r--r--drivers/gpu/drm/ast/Kconfig9
-rw-r--r--drivers/gpu/drm/ast/Makefile20
-rw-r--r--drivers/gpu/drm/ast/ast_2000.c257
-rw-r--r--drivers/gpu/drm/ast/ast_2100.c480
-rw-r--r--drivers/gpu/drm/ast/ast_2200.c92
-rw-r--r--drivers/gpu/drm/ast/ast_2300.c1463
-rw-r--r--drivers/gpu/drm/ast/ast_2400.c100
-rw-r--r--drivers/gpu/drm/ast/ast_2500.c675
-rw-r--r--drivers/gpu/drm/ast/ast_2600.c116
-rw-r--r--drivers/gpu/drm/ast/ast_cursor.c314
-rw-r--r--drivers/gpu/drm/ast/ast_ddc.c187
-rw-r--r--drivers/gpu/drm/ast/ast_ddc.h11
-rw-r--r--drivers/gpu/drm/ast/ast_dp.c577
-rw-r--r--drivers/gpu/drm/ast/ast_dp501.c374
-rw-r--r--drivers/gpu/drm/ast/ast_dram_tables.h207
-rw-r--r--drivers/gpu/drm/ast/ast_drv.c391
-rw-r--r--drivers/gpu/drm/ast/ast_drv.h562
-rw-r--r--drivers/gpu/drm/ast/ast_main.c475
-rw-r--r--drivers/gpu/drm/ast/ast_mm.c44
-rw-r--r--drivers/gpu/drm/ast/ast_mode.c1537
-rw-r--r--drivers/gpu/drm/ast/ast_post.c2078
-rw-r--r--drivers/gpu/drm/ast/ast_post.h50
-rw-r--r--drivers/gpu/drm/ast/ast_reg.h78
-rw-r--r--drivers/gpu/drm/ast/ast_sil164.c126
-rw-r--r--drivers/gpu/drm/ast/ast_tables.h242
-rw-r--r--drivers/gpu/drm/ast/ast_vbios.c241
-rw-r--r--drivers/gpu/drm/ast/ast_vbios.h108
-rw-r--r--drivers/gpu/drm/ast/ast_vga.c126
-rw-r--r--drivers/gpu/drm/atmel-hlcdc/Kconfig6
-rw-r--r--drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c199
-rw-r--r--drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c156
-rw-r--r--drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h133
-rw-r--r--drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c4
-rw-r--r--drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c423
-rw-r--r--drivers/gpu/drm/bridge/Kconfig174
-rw-r--r--drivers/gpu/drm/bridge/Makefile18
-rw-r--r--drivers/gpu/drm/bridge/adv7511/Kconfig5
-rw-r--r--drivers/gpu/drm/bridge/adv7511/adv7511.h99
-rw-r--r--drivers/gpu/drm/bridge/adv7511/adv7511_audio.c84
-rw-r--r--drivers/gpu/drm/bridge/adv7511/adv7511_cec.c195
-rw-r--r--drivers/gpu/drm/bridge/adv7511/adv7511_drv.c708
-rw-r--r--drivers/gpu/drm/bridge/adv7511/adv7533.c74
-rw-r--r--drivers/gpu/drm/bridge/analogix/Kconfig8
-rw-r--r--drivers/gpu/drm/bridge/analogix/analogix-anx6345.c75
-rw-r--r--drivers/gpu/drm/bridge/analogix/analogix-anx78xx.c94
-rw-r--r--drivers/gpu/drm/bridge/analogix/analogix-i2c-dptx.c4
-rw-r--r--drivers/gpu/drm/bridge/analogix/analogix_dp_core.c658
-rw-r--r--drivers/gpu/drm/bridge/analogix/analogix_dp_core.h37
-rw-r--r--drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c223
-rw-r--r--drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h9
-rw-r--r--drivers/gpu/drm/bridge/analogix/anx7625.c1478
-rw-r--r--drivers/gpu/drm/bridge/analogix/anx7625.h126
-rw-r--r--drivers/gpu/drm/bridge/aux-bridge.c150
-rw-r--r--drivers/gpu/drm/bridge/aux-hpd-bridge.c211
-rw-r--r--drivers/gpu/drm/bridge/cadence/Kconfig26
-rw-r--r--drivers/gpu/drm/bridge/cadence/Makefile3
-rw-r--r--drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c1410
-rw-r--r--drivers/gpu/drm/bridge/cadence/cdns-dsi-core.h82
-rw-r--r--drivers/gpu/drm/bridge/cadence/cdns-dsi-j721e.c51
-rw-r--r--drivers/gpu/drm/bridge/cadence/cdns-dsi-j721e.h16
-rw-r--r--drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c277
-rw-r--r--drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.h4
-rw-r--r--drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c35
-rw-r--r--drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.h3
-rw-r--r--drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-j721e.c9
-rw-r--r--drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-j721e.h2
-rw-r--r--drivers/gpu/drm/bridge/cdns-dsi.c1303
-rw-r--r--drivers/gpu/drm/bridge/chipone-icn6211.c693
-rw-r--r--drivers/gpu/drm/bridge/chrontel-ch7033.c39
-rw-r--r--drivers/gpu/drm/bridge/cros-ec-anx7688.c14
-rw-r--r--drivers/gpu/drm/bridge/display-connector.c204
-rw-r--r--drivers/gpu/drm/bridge/fsl-ldb.c405
-rw-r--r--drivers/gpu/drm/bridge/imx/Kconfig102
-rw-r--r--drivers/gpu/drm/bridge/imx/Makefile11
-rw-r--r--drivers/gpu/drm/bridge/imx/imx-ldb-helper.c230
-rw-r--r--drivers/gpu/drm/bridge/imx/imx-ldb-helper.h95
-rw-r--r--drivers/gpu/drm/bridge/imx/imx-legacy-bridge.c91
-rw-r--r--drivers/gpu/drm/bridge/imx/imx8mp-hdmi-pai.c158
-rw-r--r--drivers/gpu/drm/bridge/imx/imx8mp-hdmi-pvi.c207
-rw-r--r--drivers/gpu/drm/bridge/imx/imx8mp-hdmi-tx.c213
-rw-r--r--drivers/gpu/drm/bridge/imx/imx8qm-ldb.c591
-rw-r--r--drivers/gpu/drm/bridge/imx/imx8qxp-ldb.c721
-rw-r--r--drivers/gpu/drm/bridge/imx/imx8qxp-pixel-combiner.c443
-rw-r--r--drivers/gpu/drm/bridge/imx/imx8qxp-pixel-link.c422
-rw-r--r--drivers/gpu/drm/bridge/imx/imx8qxp-pxl2dpi.c480
-rw-r--r--drivers/gpu/drm/bridge/imx/imx93-mipi-dsi.c915
-rw-r--r--drivers/gpu/drm/bridge/ite-it6263.c930
-rw-r--r--drivers/gpu/drm/bridge/ite-it6505.c3683
-rw-r--r--drivers/gpu/drm/bridge/ite-it66121.c1011
-rw-r--r--drivers/gpu/drm/bridge/lontium-lt8912b.c273
-rw-r--r--drivers/gpu/drm/bridge/lontium-lt9211.c799
-rw-r--r--drivers/gpu/drm/bridge/lontium-lt9611.c686
-rw-r--r--drivers/gpu/drm/bridge/lontium-lt9611uxc.c241
-rw-r--r--drivers/gpu/drm/bridge/lvds-codec.c67
-rw-r--r--drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c170
-rw-r--r--drivers/gpu/drm/bridge/microchip-lvds.c229
-rw-r--r--drivers/gpu/drm/bridge/nwl-dsi.c120
-rw-r--r--drivers/gpu/drm/bridge/nwl-dsi.h4
-rw-r--r--drivers/gpu/drm/bridge/nxp-ptn3460.c66
-rw-r--r--drivers/gpu/drm/bridge/panel.c238
-rw-r--r--drivers/gpu/drm/bridge/parade-ps8622.c34
-rw-r--r--drivers/gpu/drm/bridge/parade-ps8640.c465
-rw-r--r--drivers/gpu/drm/bridge/samsung-dsim.c2321
-rw-r--r--drivers/gpu/drm/bridge/sii902x.c336
-rw-r--r--drivers/gpu/drm/bridge/sii9234.c25
-rw-r--r--drivers/gpu/drm/bridge/sil-sii8620.c31
-rw-r--r--drivers/gpu/drm/bridge/simple-bridge.c84
-rw-r--r--drivers/gpu/drm/bridge/ssd2825.c775
-rw-r--r--drivers/gpu/drm/bridge/synopsys/Kconfig41
-rw-r--r--drivers/gpu/drm/bridge/synopsys/Makefile5
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-dp.c2097
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c52
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi-audio.h4
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.c45
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi-gp-audio.c200
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c21
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c1343
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.h848
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi.c487
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi.h16
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c231
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi2.c1032
-rw-r--r--drivers/gpu/drm/bridge/tc358762.c111
-rw-r--r--drivers/gpu/drm/bridge/tc358764.c134
-rw-r--r--drivers/gpu/drm/bridge/tc358767.c1260
-rw-r--r--drivers/gpu/drm/bridge/tc358768.c566
-rw-r--r--drivers/gpu/drm/bridge/tc358775.c249
-rw-r--r--drivers/gpu/drm/bridge/tda998x_drv.c (renamed from drivers/gpu/drm/i2c/tda998x_drv.c)101
-rw-r--r--drivers/gpu/drm/bridge/thc63lvd1024.c38
-rw-r--r--drivers/gpu/drm/bridge/ti-dlpc3433.c417
-rw-r--r--drivers/gpu/drm/bridge/ti-sn65dsi83.c526
-rw-r--r--drivers/gpu/drm/bridge/ti-sn65dsi86.c1109
-rw-r--r--drivers/gpu/drm/bridge/ti-tdp158.c115
-rw-r--r--drivers/gpu/drm/bridge/ti-tfp410.c104
-rw-r--r--drivers/gpu/drm/bridge/ti-tpd12s015.c25
-rw-r--r--drivers/gpu/drm/bridge/waveshare-dsi.c203
-rw-r--r--drivers/gpu/drm/ci/arm.config70
-rw-r--r--drivers/gpu/drm/ci/arm64.config210
-rw-r--r--drivers/gpu/drm/ci/build-igt.sh74
-rw-r--r--drivers/gpu/drm/ci/build.sh161
-rw-r--r--drivers/gpu/drm/ci/build.yml226
-rw-r--r--drivers/gpu/drm/ci/check-devicetrees.yml50
-rwxr-xr-xdrivers/gpu/drm/ci/check-patch.py57
-rw-r--r--drivers/gpu/drm/ci/container.yml77
-rwxr-xr-xdrivers/gpu/drm/ci/dt-binding-check.sh19
-rwxr-xr-xdrivers/gpu/drm/ci/dtbs-check.sh22
-rw-r--r--drivers/gpu/drm/ci/gitlab-ci.yml349
-rwxr-xr-xdrivers/gpu/drm/ci/igt_runner.sh101
-rw-r--r--drivers/gpu/drm/ci/image-tags.yml18
-rwxr-xr-xdrivers/gpu/drm/ci/kunit.sh16
-rw-r--r--drivers/gpu/drm/ci/kunit.yml37
-rwxr-xr-xdrivers/gpu/drm/ci/lava-submit.sh105
-rwxr-xr-xdrivers/gpu/drm/ci/setup-llvm-links.sh13
-rw-r--r--drivers/gpu/drm/ci/static-checks.yml12
-rw-r--r--drivers/gpu/drm/ci/test.yml471
-rw-r--r--drivers/gpu/drm/ci/x86_64.config112
-rw-r--r--drivers/gpu/drm/ci/xfails/amdgpu-stoney-fails.txt40
-rw-r--r--drivers/gpu/drm/ci/xfails/amdgpu-stoney-flakes.txt34
-rw-r--r--drivers/gpu/drm/ci/xfails/amdgpu-stoney-skips.txt40
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-amly-fails.txt48
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-amly-flakes.txt69
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-amly-skips.txt27
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-apl-fails.txt44
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-apl-flakes.txt13
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-apl-skips.txt31
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-cml-fails.txt74
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-cml-flakes.txt34
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-cml-skips.txt27
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-glk-fails.txt47
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-glk-flakes.txt20
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-glk-skips.txt328
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-jsl-fails.txt46
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-jsl-flakes.txt6
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-jsl-skips.txt21
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-kbl-fails.txt23
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-kbl-flakes.txt6
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-kbl-skips.txt152
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-tgl-fails.txt30
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-tgl-skips.txt51
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-whl-fails.txt52
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-whl-flakes.txt13
-rw-r--r--drivers/gpu/drm/ci/xfails/i915-whl-skips.txt24
-rw-r--r--drivers/gpu/drm/ci/xfails/mediatek-mt8173-fails.txt44
-rw-r--r--drivers/gpu/drm/ci/xfails/mediatek-mt8173-flakes.txt55
-rw-r--r--drivers/gpu/drm/ci/xfails/mediatek-mt8173-skips.txt23
-rw-r--r--drivers/gpu/drm/ci/xfails/mediatek-mt8183-fails.txt38
-rw-r--r--drivers/gpu/drm/ci/xfails/mediatek-mt8183-flakes.txt41
-rw-r--r--drivers/gpu/drm/ci/xfails/mediatek-mt8183-skips.txt23
-rw-r--r--drivers/gpu/drm/ci/xfails/meson-g12b-fails.txt12
-rw-r--r--drivers/gpu/drm/ci/xfails/meson-g12b-skips.txt18
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-apq8016-fails.txt5
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-apq8016-skips.txt17
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-apq8096-fails.txt2
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-apq8096-flakes.txt6
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-apq8096-skips.txt28
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-kingoftown-fails.txt17
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-kingoftown-flakes.txt41
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-kingoftown-skips.txt39
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-lazor-limozeen-fails.txt17
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-lazor-limozeen-flakes.txt20
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-lazor-limozeen-skips.txt30
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-sm8350-hdk-fails.txt10
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-sm8350-hdk-flakes.txt6
-rw-r--r--drivers/gpu/drm/ci/xfails/msm-sm8350-hdk-skips.txt212
-rw-r--r--drivers/gpu/drm/ci/xfails/panfrost-g12b-fails.txt2
-rw-r--r--drivers/gpu/drm/ci/xfails/panfrost-g12b-skips.txt24
-rw-r--r--drivers/gpu/drm/ci/xfails/panfrost-mt8183-fails.txt2
-rw-r--r--drivers/gpu/drm/ci/xfails/panfrost-mt8183-skips.txt24
-rw-r--r--drivers/gpu/drm/ci/xfails/panfrost-rk3288-fails.txt2
-rw-r--r--drivers/gpu/drm/ci/xfails/panfrost-rk3288-skips.txt27
-rw-r--r--drivers/gpu/drm/ci/xfails/panfrost-rk3399-fails.txt2
-rw-r--r--drivers/gpu/drm/ci/xfails/panfrost-rk3399-flakes.txt6
-rw-r--r--drivers/gpu/drm/ci/xfails/panfrost-rk3399-skips.txt27
-rw-r--r--drivers/gpu/drm/ci/xfails/rockchip-rk3288-fails.txt9
-rw-r--r--drivers/gpu/drm/ci/xfails/rockchip-rk3288-flakes.txt34
-rw-r--r--drivers/gpu/drm/ci/xfails/rockchip-rk3288-skips.txt21
-rw-r--r--drivers/gpu/drm/ci/xfails/rockchip-rk3399-fails.txt74
-rw-r--r--drivers/gpu/drm/ci/xfails/rockchip-rk3399-flakes.txt146
-rw-r--r--drivers/gpu/drm/ci/xfails/rockchip-rk3399-skips.txt26
-rw-r--r--drivers/gpu/drm/ci/xfails/virtio_gpu-none-fails.txt190
-rw-r--r--drivers/gpu/drm/ci/xfails/virtio_gpu-none-skips.txt31
-rw-r--r--drivers/gpu/drm/ci/xfails/vkms-none-fails.txt25
-rw-r--r--drivers/gpu/drm/ci/xfails/vkms-none-flakes.txt118
-rw-r--r--drivers/gpu/drm/ci/xfails/vkms-none-skips.txt811
-rw-r--r--drivers/gpu/drm/clients/Kconfig123
-rw-r--r--drivers/gpu/drm/clients/Makefile8
-rw-r--r--drivers/gpu/drm/clients/drm_client_internal.h25
-rw-r--r--drivers/gpu/drm/clients/drm_client_setup.c98
-rw-r--r--drivers/gpu/drm/clients/drm_fbdev_client.c176
-rw-r--r--drivers/gpu/drm/clients/drm_log.c441
-rw-r--r--drivers/gpu/drm/display/Kconfig108
-rw-r--r--drivers/gpu/drm/display/Makefile31
-rw-r--r--drivers/gpu/drm/display/drm_bridge_connector.c887
-rw-r--r--drivers/gpu/drm/display/drm_display_helper_mod.c22
-rw-r--r--drivers/gpu/drm/display/drm_dp_aux_bus.c392
-rw-r--r--drivers/gpu/drm/display/drm_dp_aux_dev.c (renamed from drivers/gpu/drm/drm_dp_aux_dev.c)8
-rw-r--r--drivers/gpu/drm/display/drm_dp_cec.c (renamed from drivers/gpu/drm/drm_dp_cec.c)79
-rw-r--r--drivers/gpu/drm/display/drm_dp_dual_mode_helper.c (renamed from drivers/gpu/drm/drm_dp_dual_mode_helper.c)65
-rw-r--r--drivers/gpu/drm/display/drm_dp_helper.c4840
-rw-r--r--drivers/gpu/drm/display/drm_dp_helper_internal.h33
-rw-r--r--drivers/gpu/drm/display/drm_dp_mst_topology.c (renamed from drivers/gpu/drm/drm_dp_mst_topology.c)2123
-rw-r--r--drivers/gpu/drm/display/drm_dp_mst_topology_internal.h (renamed from drivers/gpu/drm/drm_dp_mst_topology_internal.h)4
-rw-r--r--drivers/gpu/drm/display/drm_dp_tunnel.c1951
-rw-r--r--drivers/gpu/drm/display/drm_dsc_helper.c1566
-rw-r--r--drivers/gpu/drm/display/drm_hdcp_helper.c421
-rw-r--r--drivers/gpu/drm/display/drm_hdmi_audio_helper.c195
-rw-r--r--drivers/gpu/drm/display/drm_hdmi_cec_helper.c193
-rw-r--r--drivers/gpu/drm/display/drm_hdmi_cec_notifier_helper.c65
-rw-r--r--drivers/gpu/drm/display/drm_hdmi_helper.c427
-rw-r--r--drivers/gpu/drm/display/drm_hdmi_state_helper.c1180
-rw-r--r--drivers/gpu/drm/display/drm_scdc_helper.c278
-rw-r--r--drivers/gpu/drm/drm_agpsupport.c451
-rw-r--r--drivers/gpu/drm/drm_aperture.c353
-rw-r--r--drivers/gpu/drm/drm_atomic.c634
-rw-r--r--drivers/gpu/drm/drm_atomic_helper.c1094
-rw-r--r--drivers/gpu/drm/drm_atomic_state_helper.c176
-rw-r--r--drivers/gpu/drm/drm_atomic_uapi.c442
-rw-r--r--drivers/gpu/drm/drm_auth.c92
-rw-r--r--drivers/gpu/drm/drm_blend.c23
-rw-r--r--drivers/gpu/drm/drm_bridge.c712
-rw-r--r--drivers/gpu/drm/drm_bridge_connector.c380
-rw-r--r--drivers/gpu/drm/drm_bridge_helper.c60
-rw-r--r--drivers/gpu/drm/drm_buddy.c1326
-rw-r--r--drivers/gpu/drm/drm_bufs.c1631
-rw-r--r--drivers/gpu/drm/drm_cache.c38
-rw-r--r--drivers/gpu/drm/drm_client.c397
-rw-r--r--drivers/gpu/drm/drm_client_event.c214
-rw-r--r--drivers/gpu/drm/drm_client_modeset.c431
-rw-r--r--drivers/gpu/drm/drm_client_sysrq.c65
-rw-r--r--drivers/gpu/drm/drm_color_mgmt.c297
-rw-r--r--drivers/gpu/drm/drm_colorop.c599
-rw-r--r--drivers/gpu/drm/drm_connector.c1398
-rw-r--r--drivers/gpu/drm/drm_context.c525
-rw-r--r--drivers/gpu/drm/drm_crtc.c227
-rw-r--r--drivers/gpu/drm/drm_crtc_helper.c152
-rw-r--r--drivers/gpu/drm/drm_crtc_helper_internal.h42
-rw-r--r--drivers/gpu/drm/drm_crtc_internal.h59
-rw-r--r--drivers/gpu/drm/drm_damage_helper.c19
-rw-r--r--drivers/gpu/drm/drm_debugfs.c621
-rw-r--r--drivers/gpu/drm/drm_debugfs_crc.c1
-rw-r--r--drivers/gpu/drm/drm_displayid.c132
-rw-r--r--drivers/gpu/drm/drm_displayid_internal.h185
-rw-r--r--drivers/gpu/drm/drm_dma.c178
-rw-r--r--drivers/gpu/drm/drm_dp_aux_bus.c323
-rw-r--r--drivers/gpu/drm/drm_dp_helper.c3613
-rw-r--r--drivers/gpu/drm/drm_draw.c157
-rw-r--r--drivers/gpu/drm/drm_draw_internal.h56
-rw-r--r--drivers/gpu/drm/drm_drv.c457
-rw-r--r--drivers/gpu/drm/drm_dsc.c408
-rw-r--r--drivers/gpu/drm/drm_dumb_buffers.c176
-rw-r--r--drivers/gpu/drm/drm_edid.c4604
-rw-r--r--drivers/gpu/drm/drm_edid_load.c272
-rw-r--r--drivers/gpu/drm/drm_eld.c57
-rw-r--r--drivers/gpu/drm/drm_encoder.c81
-rw-r--r--drivers/gpu/drm/drm_encoder_slave.c182
-rw-r--r--drivers/gpu/drm/drm_exec.c341
-rw-r--r--drivers/gpu/drm/drm_fb_cma_helper.c145
-rw-r--r--drivers/gpu/drm/drm_fb_dma_helper.c197
-rw-r--r--drivers/gpu/drm/drm_fb_helper.c1550
-rw-r--r--drivers/gpu/drm/drm_fbdev_dma.c323
-rw-r--r--drivers/gpu/drm/drm_fbdev_shmem.c203
-rw-r--r--drivers/gpu/drm/drm_fbdev_ttm.c234
-rw-r--r--drivers/gpu/drm/drm_file.c528
-rw-r--r--drivers/gpu/drm/drm_flip_work.c28
-rw-r--r--drivers/gpu/drm/drm_format_helper.c1574
-rw-r--r--drivers/gpu/drm/drm_format_internal.h174
-rw-r--r--drivers/gpu/drm/drm_fourcc.c153
-rw-r--r--drivers/gpu/drm/drm_framebuffer.c295
-rw-r--r--drivers/gpu/drm/drm_gem.c680
-rw-r--r--drivers/gpu/drm/drm_gem_atomic_helper.c181
-rw-r--r--drivers/gpu/drm/drm_gem_cma_helper.c585
-rw-r--r--drivers/gpu/drm/drm_gem_dma_helper.c608
-rw-r--r--drivers/gpu/drm/drm_gem_framebuffer_helper.c181
-rw-r--r--drivers/gpu/drm/drm_gem_shmem_helper.c737
-rw-r--r--drivers/gpu/drm/drm_gem_ttm_helper.c8
-rw-r--r--drivers/gpu/drm/drm_gem_vram_helper.c385
-rw-r--r--drivers/gpu/drm/drm_gpusvm.c1635
-rw-r--r--drivers/gpu/drm/drm_gpuvm.c3207
-rw-r--r--drivers/gpu/drm/drm_hashtab.c209
-rw-r--r--drivers/gpu/drm/drm_hdcp.c423
-rw-r--r--drivers/gpu/drm/drm_internal.h97
-rw-r--r--drivers/gpu/drm/drm_ioc32.c634
-rw-r--r--drivers/gpu/drm/drm_ioctl.c205
-rw-r--r--drivers/gpu/drm/drm_irq.c206
-rw-r--r--drivers/gpu/drm/drm_kms_helper_common.c45
-rw-r--r--drivers/gpu/drm/drm_lease.c70
-rw-r--r--drivers/gpu/drm/drm_legacy.h252
-rw-r--r--drivers/gpu/drm/drm_legacy_misc.c105
-rw-r--r--drivers/gpu/drm/drm_lock.c373
-rw-r--r--drivers/gpu/drm/drm_managed.c65
-rw-r--r--drivers/gpu/drm/drm_memory.c138
-rw-r--r--drivers/gpu/drm/drm_mipi_dbi.c311
-rw-r--r--drivers/gpu/drm/drm_mipi_dsi.c855
-rw-r--r--drivers/gpu/drm/drm_mm.c44
-rw-r--r--drivers/gpu/drm/drm_mode_config.c48
-rw-r--r--drivers/gpu/drm/drm_mode_object.c79
-rw-r--r--drivers/gpu/drm/drm_modes.c796
-rw-r--r--drivers/gpu/drm/drm_modeset_helper.c106
-rw-r--r--drivers/gpu/drm/drm_modeset_lock.c13
-rw-r--r--drivers/gpu/drm/drm_of.c245
-rw-r--r--drivers/gpu/drm/drm_pagemap.c882
-rw-r--r--drivers/gpu/drm/drm_panel.c462
-rw-r--r--drivers/gpu/drm/drm_panel_backlight_quirks.c156
-rw-r--r--drivers/gpu/drm/drm_panel_orientation_quirks.c263
-rw-r--r--drivers/gpu/drm/drm_panic.c1037
-rw-r--r--drivers/gpu/drm/drm_panic_qr.rs1016
-rw-r--r--drivers/gpu/drm/drm_pci.c205
-rw-r--r--drivers/gpu/drm/drm_plane.c466
-rw-r--r--drivers/gpu/drm/drm_plane_helper.c98
-rw-r--r--drivers/gpu/drm/drm_prime.c245
-rw-r--r--drivers/gpu/drm/drm_print.c176
-rw-r--r--drivers/gpu/drm/drm_privacy_screen.c472
-rw-r--r--drivers/gpu/drm/drm_privacy_screen_x86.c106
-rw-r--r--drivers/gpu/drm/drm_probe_helper.c694
-rw-r--r--drivers/gpu/drm/drm_property.c59
-rw-r--r--drivers/gpu/drm/drm_rect.c3
-rw-r--r--drivers/gpu/drm/drm_scatter.c220
-rw-r--r--drivers/gpu/drm/drm_scdc_helper.c249
-rw-r--r--drivers/gpu/drm/drm_self_refresh_helper.c1
-rw-r--r--drivers/gpu/drm/drm_simple_kms_helper.c53
-rw-r--r--drivers/gpu/drm/drm_suballoc.c459
-rw-r--r--drivers/gpu/drm/drm_syncobj.c333
-rw-r--r--drivers/gpu/drm/drm_sysfs.c236
-rw-r--r--drivers/gpu/drm/drm_vblank.c484
-rw-r--r--drivers/gpu/drm/drm_vblank_helper.c176
-rw-r--r--drivers/gpu/drm/drm_vblank_work.c31
-rw-r--r--drivers/gpu/drm/drm_vm.c665
-rw-r--r--drivers/gpu/drm/drm_vma_manager.c77
-rw-r--r--drivers/gpu/drm/drm_writeback.c242
-rw-r--r--drivers/gpu/drm/etnaviv/cmdstream.xml.h52
-rw-r--r--drivers/gpu/drm/etnaviv/common.xml.h12
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_buffer.c17
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c1
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c10
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_drv.c189
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_drv.h10
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_dump.c26
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gem.c128
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gem.h14
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c28
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c110
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gpu.c474
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gpu.h56
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_hwdb.c171
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_mmu.c136
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_mmu.h2
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_perfmon.c8
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_sched.c147
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_sched.h3
-rw-r--r--drivers/gpu/drm/etnaviv/state.xml.h103
-rw-r--r--drivers/gpu/drm/etnaviv/state_blt.xml.h22
-rw-r--r--drivers/gpu/drm/etnaviv/state_hi.xml.h95
-rw-r--r--drivers/gpu/drm/exynos/Kconfig7
-rw-r--r--drivers/gpu/drm/exynos/exynos5433_drm_decon.c26
-rw-r--r--drivers/gpu/drm/exynos/exynos7_drm_decon.c208
-rw-r--r--drivers/gpu/drm/exynos/exynos_dp.c29
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_crtc.c5
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_crtc.h3
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_dma.c11
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_dpi.c3
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_drv.c63
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_drv.h6
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_dsi.c1917
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_fb.c11
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_fb.h1
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_fbdev.c174
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_fbdev.h31
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_fimc.c37
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_fimd.c95
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_g2d.c20
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_g2d.h4
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_gem.c59
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_gem.h5
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_gsc.c34
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_ipp.c34
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_mic.c43
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_plane.c6
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_rotator.c19
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_scaler.c22
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_vidi.c95
-rw-r--r--drivers/gpu/drm/exynos/exynos_hdmi.c72
-rw-r--r--drivers/gpu/drm/exynos/exynos_mixer.c26
-rw-r--r--drivers/gpu/drm/exynos/regs-decon7.h15
-rw-r--r--drivers/gpu/drm/fsl-dcu/Kconfig4
-rw-r--r--drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c54
-rw-r--r--drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.h3
-rw-r--r--drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_kms.c1
-rw-r--r--drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c20
-rw-r--r--drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c6
-rw-r--r--drivers/gpu/drm/fsl-dcu/fsl_tcon.c2
-rw-r--r--drivers/gpu/drm/gma500/Kconfig8
-rw-r--r--drivers/gpu/drm/gma500/Makefile2
-rw-r--r--drivers/gpu/drm/gma500/backlight.c106
-rw-r--r--drivers/gpu/drm/gma500/cdv_device.c69
-rw-r--r--drivers/gpu/drm/gma500/cdv_intel_crt.c57
-rw-r--r--drivers/gpu/drm/gma500/cdv_intel_display.c11
-rw-r--r--drivers/gpu/drm/gma500/cdv_intel_dp.c33
-rw-r--r--drivers/gpu/drm/gma500/cdv_intel_hdmi.c108
-rw-r--r--drivers/gpu/drm/gma500/cdv_intel_lvds.c95
-rw-r--r--drivers/gpu/drm/gma500/fbdev.c196
-rw-r--r--drivers/gpu/drm/gma500/framebuffer.c391
-rw-r--r--drivers/gpu/drm/gma500/gem.c374
-rw-r--r--drivers/gpu/drm/gma500/gem.h39
-rw-r--r--drivers/gpu/drm/gma500/gma_display.c131
-rw-r--r--drivers/gpu/drm/gma500/gma_display.h13
-rw-r--r--drivers/gpu/drm/gma500/gtt.c567
-rw-r--r--drivers/gpu/drm/gma500/gtt.h37
-rw-r--r--drivers/gpu/drm/gma500/intel_bios.c26
-rw-r--r--drivers/gpu/drm/gma500/intel_bios.h4
-rw-r--r--drivers/gpu/drm/gma500/intel_gmbus.c5
-rw-r--r--drivers/gpu/drm/gma500/intel_i2c.c36
-rw-r--r--drivers/gpu/drm/gma500/mid_bios.c6
-rw-r--r--drivers/gpu/drm/gma500/mmu.c50
-rw-r--r--drivers/gpu/drm/gma500/mmu.h2
-rw-r--r--drivers/gpu/drm/gma500/oaktrail_crtc.c40
-rw-r--r--drivers/gpu/drm/gma500/oaktrail_device.c71
-rw-r--r--drivers/gpu/drm/gma500/oaktrail_hdmi.c9
-rw-r--r--drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c4
-rw-r--r--drivers/gpu/drm/gma500/oaktrail_lvds.c72
-rw-r--r--drivers/gpu/drm/gma500/oaktrail_lvds_i2c.c36
-rw-r--r--drivers/gpu/drm/gma500/opregion.c14
-rw-r--r--drivers/gpu/drm/gma500/power.c171
-rw-r--r--drivers/gpu/drm/gma500/power.h18
-rw-r--r--drivers/gpu/drm/gma500/psb_device.c106
-rw-r--r--drivers/gpu/drm/gma500/psb_drv.c124
-rw-r--r--drivers/gpu/drm/gma500/psb_drv.h157
-rw-r--r--drivers/gpu/drm/gma500/psb_intel_display.c60
-rw-r--r--drivers/gpu/drm/gma500/psb_intel_drv.h49
-rw-r--r--drivers/gpu/drm/gma500/psb_intel_lvds.c103
-rw-r--r--drivers/gpu/drm/gma500/psb_intel_modes.c33
-rw-r--r--drivers/gpu/drm/gma500/psb_intel_sdvo.c100
-rw-r--r--drivers/gpu/drm/gma500/psb_irq.c163
-rw-r--r--drivers/gpu/drm/gma500/psb_irq.h19
-rw-r--r--drivers/gpu/drm/gma500/psb_lid.c80
-rw-r--r--drivers/gpu/drm/gud/Kconfig1
-rw-r--r--drivers/gpu/drm/gud/gud_connector.c58
-rw-r--r--drivers/gpu/drm/gud/gud_drv.c133
-rw-r--r--drivers/gpu/drm/gud/gud_internal.h15
-rw-r--r--drivers/gpu/drm/gud/gud_pipe.c342
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/Kconfig7
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/Makefile4
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/dp/dp_aux.c168
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h69
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/dp/dp_config.h21
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c307
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h64
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c392
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h132
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/dp/dp_serdes.c71
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_debugfs.c104
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c182
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c133
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h38
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c47
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c48
-rw-r--r--drivers/gpu/drm/hisilicon/kirin/Kconfig6
-rw-r--r--drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c72
-rw-r--r--drivers/gpu/drm/hisilicon/kirin/dw_dsi_reg.h2
-rw-r--r--drivers/gpu/drm/hisilicon/kirin/kirin_ade_reg.h2
-rw-r--r--drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c24
-rw-r--r--drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c32
-rw-r--r--drivers/gpu/drm/hyperv/hyperv_drm.h4
-rw-r--r--drivers/gpu/drm/hyperv/hyperv_drm_drv.c115
-rw-r--r--drivers/gpu/drm/hyperv/hyperv_drm_modeset.c233
-rw-r--r--drivers/gpu/drm/hyperv/hyperv_drm_proto.c25
-rw-r--r--drivers/gpu/drm/i2c/Kconfig36
-rw-r--r--drivers/gpu/drm/i2c/Makefile10
-rw-r--r--drivers/gpu/drm/i2c/tda9950.c510
-rw-r--r--drivers/gpu/drm/i810/Makefile8
-rw-r--r--drivers/gpu/drm/i810/i810_dma.c1266
-rw-r--r--drivers/gpu/drm/i810/i810_drv.c101
-rw-r--r--drivers/gpu/drm/i810/i810_drv.h246
-rw-r--r--drivers/gpu/drm/i915/Kconfig103
-rw-r--r--drivers/gpu/drm/i915/Kconfig.debug41
-rw-r--r--drivers/gpu/drm/i915/Kconfig.profile26
-rw-r--r--drivers/gpu/drm/i915/Kconfig.unstable29
-rw-r--r--drivers/gpu/drm/i915/Makefile318
-rw-r--r--drivers/gpu/drm/i915/TODO.txt2
-rw-r--r--drivers/gpu/drm/i915/display/bxt_dpio_phy_regs.h273
-rw-r--r--drivers/gpu/drm/i915/display/dvo_ch7017.c20
-rw-r--r--drivers/gpu/drm/i915/display/dvo_ch7xxx.c46
-rw-r--r--drivers/gpu/drm/i915/display/dvo_ivch.c22
-rw-r--r--drivers/gpu/drm/i915/display/dvo_ns2501.c32
-rw-r--r--drivers/gpu/drm/i915/display/dvo_sil164.c37
-rw-r--r--drivers/gpu/drm/i915/display/dvo_tfp410.c24
-rw-r--r--drivers/gpu/drm/i915/display/g4x_dp.c649
-rw-r--r--drivers/gpu/drm/i915/display/g4x_dp.h31
-rw-r--r--drivers/gpu/drm/i915/display/g4x_hdmi.c445
-rw-r--r--drivers/gpu/drm/i915/display/g4x_hdmi.h23
-rw-r--r--drivers/gpu/drm/i915/display/hsw_ips.c374
-rw-r--r--drivers/gpu/drm/i915/display/hsw_ips.h62
-rw-r--r--drivers/gpu/drm/i915/display/i9xx_display_sr.c97
-rw-r--r--drivers/gpu/drm/i915/display/i9xx_display_sr.h14
-rw-r--r--drivers/gpu/drm/i915/display/i9xx_plane.c869
-rw-r--r--drivers/gpu/drm/i915/display/i9xx_plane.h44
-rw-r--r--drivers/gpu/drm/i915/display/i9xx_plane_regs.h112
-rw-r--r--drivers/gpu/drm/i915/display/i9xx_wm.c4190
-rw-r--r--drivers/gpu/drm/i915/display/i9xx_wm.h37
-rw-r--r--drivers/gpu/drm/i915/display/i9xx_wm_regs.h257
-rw-r--r--drivers/gpu/drm/i915/display/icl_dsi.c1073
-rw-r--r--drivers/gpu/drm/i915/display/icl_dsi.h17
-rw-r--r--drivers/gpu/drm/i915/display/icl_dsi_regs.h343
-rw-r--r--drivers/gpu/drm/i915/display/intel_acpi.c47
-rw-r--r--drivers/gpu/drm/i915/display/intel_acpi.h17
-rw-r--r--drivers/gpu/drm/i915/display/intel_alpm.c600
-rw-r--r--drivers/gpu/drm/i915/display/intel_alpm.h41
-rw-r--r--drivers/gpu/drm/i915/display/intel_atomic.c265
-rw-r--r--drivers/gpu/drm/i915/display/intel_atomic.h7
-rw-r--r--drivers/gpu/drm/i915/display/intel_atomic_plane.c821
-rw-r--r--drivers/gpu/drm/i915/display/intel_atomic_plane.h68
-rw-r--r--drivers/gpu/drm/i915/display/intel_audio.c1241
-rw-r--r--drivers/gpu/drm/i915/display/intel_audio.h19
-rw-r--r--drivers/gpu/drm/i915/display/intel_audio_regs.h170
-rw-r--r--drivers/gpu/drm/i915/display/intel_backlight.c805
-rw-r--r--drivers/gpu/drm/i915/display/intel_backlight_regs.h121
-rw-r--r--drivers/gpu/drm/i915/display/intel_bios.c3085
-rw-r--r--drivers/gpu/drm/i915/display/intel_bios.h255
-rw-r--r--drivers/gpu/drm/i915/display/intel_bo.c81
-rw-r--r--drivers/gpu/drm/i915/display/intel_bo.h29
-rw-r--r--drivers/gpu/drm/i915/display/intel_bw.c1373
-rw-r--r--drivers/gpu/drm/i915/display/intel_bw.h60
-rw-r--r--drivers/gpu/drm/i915/display/intel_casf.c290
-rw-r--r--drivers/gpu/drm/i915/display/intel_casf.h21
-rw-r--r--drivers/gpu/drm/i915/display/intel_casf_regs.h33
-rw-r--r--drivers/gpu/drm/i915/display/intel_cdclk.c3083
-rw-r--r--drivers/gpu/drm/i915/display/intel_cdclk.h111
-rw-r--r--drivers/gpu/drm/i915/display/intel_cmtg.c188
-rw-r--r--drivers/gpu/drm/i915/display/intel_cmtg.h13
-rw-r--r--drivers/gpu/drm/i915/display/intel_cmtg_regs.h21
-rw-r--r--drivers/gpu/drm/i915/display/intel_color.c3718
-rw-r--r--drivers/gpu/drm/i915/display/intel_color.h41
-rw-r--r--drivers/gpu/drm/i915/display/intel_color_pipeline.c99
-rw-r--r--drivers/gpu/drm/i915/display/intel_color_pipeline.h14
-rw-r--r--drivers/gpu/drm/i915/display/intel_color_regs.h348
-rw-r--r--drivers/gpu/drm/i915/display/intel_colorop.c35
-rw-r--r--drivers/gpu/drm/i915/display/intel_colorop.h15
-rw-r--r--drivers/gpu/drm/i915/display/intel_combo_phy.c272
-rw-r--r--drivers/gpu/drm/i915/display/intel_combo_phy.h8
-rw-r--r--drivers/gpu/drm/i915/display/intel_combo_phy_regs.h167
-rw-r--r--drivers/gpu/drm/i915/display/intel_connector.c163
-rw-r--r--drivers/gpu/drm/i915/display/intel_connector.h12
-rw-r--r--drivers/gpu/drm/i915/display/intel_crt.c610
-rw-r--r--drivers/gpu/drm/i915/display/intel_crt.h22
-rw-r--r--drivers/gpu/drm/i915/display/intel_crt_regs.h50
-rw-r--r--drivers/gpu/drm/i915/display/intel_crtc.c559
-rw-r--r--drivers/gpu/drm/i915/display/intel_crtc.h51
-rw-r--r--drivers/gpu/drm/i915/display/intel_crtc_state_dump.c388
-rw-r--r--drivers/gpu/drm/i915/display/intel_crtc_state_dump.h18
-rw-r--r--drivers/gpu/drm/i915/display/intel_cursor.c635
-rw-r--r--drivers/gpu/drm/i915/display/intel_cursor.h9
-rw-r--r--drivers/gpu/drm/i915/display/intel_cursor_regs.h112
-rw-r--r--drivers/gpu/drm/i915/display/intel_cx0_phy.c3642
-rw-r--r--drivers/gpu/drm/i915/display/intel_cx0_phy.h70
-rw-r--r--drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h436
-rw-r--r--drivers/gpu/drm/i915/display/intel_dbuf_bw.c295
-rw-r--r--drivers/gpu/drm/i915/display/intel_dbuf_bw.h37
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi.c3680
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi.h50
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c455
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi_buf_trans.h10
-rw-r--r--drivers/gpu/drm/i915/display/intel_de.h205
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.c12060
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.h469
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_conversion.c21
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_conversion.h16
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_core.h627
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_debugfs.c2264
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_debugfs.h12
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_debugfs_params.c177
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_debugfs_params.h13
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_device.c1985
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_device.h326
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_driver.c797
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_driver.h42
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_irq.c2486
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_irq.h94
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_jiffies.h43
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_limits.h150
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_params.c242
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_params.h65
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_power.c6129
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_power.h396
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_power_map.c1933
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_power_map.h14
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_power_well.c2069
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_power_well.h181
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_reg_defs.h49
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_regs.h2934
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_reset.c127
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_reset.h20
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_rpm.c62
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_rpm.h37
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_rps.c108
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_rps.h46
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_snapshot.c79
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_snapshot.h16
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_trace.c9
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_trace.h858
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_types.h1203
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_utils.c32
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_utils.h31
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_wa.c79
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_wa.h36
-rw-r--r--drivers/gpu/drm/i915/display/intel_dkl_phy.c117
-rw-r--r--drivers/gpu/drm/i915/display/intel_dkl_phy.h25
-rw-r--r--drivers/gpu/drm/i915/display/intel_dkl_phy_regs.h207
-rw-r--r--drivers/gpu/drm/i915/display/intel_dmc.c1407
-rw-r--r--drivers/gpu/drm/i915/display/intel_dmc.h87
-rw-r--r--drivers/gpu/drm/i915/display/intel_dmc_regs.h587
-rw-r--r--drivers/gpu/drm/i915/display/intel_dmc_wl.c506
-rw-r--r--drivers/gpu/drm/i915/display/intel_dmc_wl.h41
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp.c6229
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp.h167
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_aux.c443
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_aux.h11
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c413
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_aux_regs.h106
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_hdcp.c314
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_link_training.c1604
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_link_training.h26
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_mst.c2077
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_mst.h19
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_test.c764
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_test.h23
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_tunnel.c814
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_tunnel.h133
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpio_phy.c757
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpio_phy.h154
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpll.c1458
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpll.h34
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpll_mgr.c3270
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpll_mgr.h271
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpt.c167
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpt.h11
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpt_common.c35
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpt_common.h13
-rw-r--r--drivers/gpu/drm/i915/display/intel_drrs.c530
-rw-r--r--drivers/gpu/drm/i915/display/intel_drrs.h33
-rw-r--r--drivers/gpu/drm/i915/display/intel_dsb.c1099
-rw-r--r--drivers/gpu/drm/i915/display/intel_dsb.h90
-rw-r--r--drivers/gpu/drm/i915/display/intel_dsb_buffer.c82
-rw-r--r--drivers/gpu/drm/i915/display/intel_dsb_buffer.h29
-rw-r--r--drivers/gpu/drm/i915/display/intel_dsb_regs.h100
-rw-r--r--drivers/gpu/drm/i915/display/intel_dsi.c66
-rw-r--r--drivers/gpu/drm/i915/display/intel_dsi.h59
-rw-r--r--drivers/gpu/drm/i915/display/intel_dsi_dcs_backlight.c35
-rw-r--r--drivers/gpu/drm/i915/display/intel_dsi_vbt.c776
-rw-r--r--drivers/gpu/drm/i915/display/intel_dsi_vbt.h20
-rw-r--r--drivers/gpu/drm/i915/display/intel_dsi_vbt_defs.h197
-rw-r--r--drivers/gpu/drm/i915/display/intel_dvo.c477
-rw-r--r--drivers/gpu/drm/i915/display/intel_dvo.h10
-rw-r--r--drivers/gpu/drm/i915/display/intel_dvo_dev.h46
-rw-r--r--drivers/gpu/drm/i915/display/intel_dvo_regs.h54
-rw-r--r--drivers/gpu/drm/i915/display/intel_encoder.c123
-rw-r--r--drivers/gpu/drm/i915/display/intel_encoder.h26
-rw-r--r--drivers/gpu/drm/i915/display/intel_fb.c1710
-rw-r--r--drivers/gpu/drm/i915/display/intel_fb.h85
-rw-r--r--drivers/gpu/drm/i915/display/intel_fb_bo.c101
-rw-r--r--drivers/gpu/drm/i915/display/intel_fb_bo.h25
-rw-r--r--drivers/gpu/drm/i915/display/intel_fb_pin.c270
-rw-r--r--drivers/gpu/drm/i915/display/intel_fb_pin.h21
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbc.c2535
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbc.h45
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbc_regs.h129
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbdev.c601
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbdev.h49
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbdev_fb.c106
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbdev_fb.h23
-rw-r--r--drivers/gpu/drm/i915/display/intel_fdi.c861
-rw-r--r--drivers/gpu/drm/i915/display/intel_fdi.h32
-rw-r--r--drivers/gpu/drm/i915/display/intel_fdi_regs.h151
-rw-r--r--drivers/gpu/drm/i915/display/intel_fifo_underrun.c316
-rw-r--r--drivers/gpu/drm/i915/display/intel_fifo_underrun.h20
-rw-r--r--drivers/gpu/drm/i915/display/intel_fixed.h (renamed from drivers/gpu/drm/i915/i915_fixed.h)0
-rw-r--r--drivers/gpu/drm/i915/display/intel_flipq.c472
-rw-r--r--drivers/gpu/drm/i915/display/intel_flipq.h37
-rw-r--r--drivers/gpu/drm/i915/display/intel_frontbuffer.c219
-rw-r--r--drivers/gpu/drm/i915/display/intel_frontbuffer.h73
-rw-r--r--drivers/gpu/drm/i915/display/intel_global_state.c214
-rw-r--r--drivers/gpu/drm/i915/display/intel_global_state.h49
-rw-r--r--drivers/gpu/drm/i915/display/intel_gmbus.c572
-rw-r--r--drivers/gpu/drm/i915/display/intel_gmbus.h17
-rw-r--r--drivers/gpu/drm/i915/display/intel_gmbus_regs.h81
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdcp.c1518
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdcp.h24
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdcp_gsc.c239
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdcp_gsc.h22
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdcp_gsc_message.c672
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdcp_gsc_message.h14
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdcp_regs.h272
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdcp_shim.h137
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdmi.c1510
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdmi.h35
-rw-r--r--drivers/gpu/drm/i915/display/intel_hotplug.c937
-rw-r--r--drivers/gpu/drm/i915/display/intel_hotplug.h30
-rw-r--r--drivers/gpu/drm/i915/display/intel_hotplug_irq.c1495
-rw-r--r--drivers/gpu/drm/i915/display/intel_hotplug_irq.h35
-rw-r--r--drivers/gpu/drm/i915/display/intel_hti.c43
-rw-r--r--drivers/gpu/drm/i915/display/intel_hti.h18
-rw-r--r--drivers/gpu/drm/i915/display/intel_hti_regs.h16
-rw-r--r--drivers/gpu/drm/i915/display/intel_link_bw.c497
-rw-r--r--drivers/gpu/drm/i915/display/intel_link_bw.h39
-rw-r--r--drivers/gpu/drm/i915/display/intel_load_detect.c225
-rw-r--r--drivers/gpu/drm/i915/display/intel_load_detect.h20
-rw-r--r--drivers/gpu/drm/i915/display/intel_lpe_audio.c143
-rw-r--r--drivers/gpu/drm/i915/display/intel_lpe_audio.h32
-rw-r--r--drivers/gpu/drm/i915/display/intel_lspcon.c245
-rw-r--r--drivers/gpu/drm/i915/display/intel_lspcon.h25
-rw-r--r--drivers/gpu/drm/i915/display/intel_lt_phy.c2327
-rw-r--r--drivers/gpu/drm/i915/display/intel_lt_phy.h47
-rw-r--r--drivers/gpu/drm/i915/display/intel_lt_phy_regs.h90
-rw-r--r--drivers/gpu/drm/i915/display/intel_lvds.c548
-rw-r--r--drivers/gpu/drm/i915/display/intel_lvds.h31
-rw-r--r--drivers/gpu/drm/i915/display/intel_lvds_regs.h65
-rw-r--r--drivers/gpu/drm/i915/display/intel_mg_phy_regs.h282
-rw-r--r--drivers/gpu/drm/i915/display/intel_modeset_lock.c50
-rw-r--r--drivers/gpu/drm/i915/display/intel_modeset_lock.h33
-rw-r--r--drivers/gpu/drm/i915/display/intel_modeset_setup.c1012
-rw-r--r--drivers/gpu/drm/i915/display/intel_modeset_setup.h15
-rw-r--r--drivers/gpu/drm/i915/display/intel_modeset_verify.c258
-rw-r--r--drivers/gpu/drm/i915/display/intel_modeset_verify.h16
-rw-r--r--drivers/gpu/drm/i915/display/intel_opregion.c582
-rw-r--r--drivers/gpu/drm/i915/display/intel_opregion.h109
-rw-r--r--drivers/gpu/drm/i915/display/intel_overlay.c278
-rw-r--r--drivers/gpu/drm/i915/display/intel_overlay.h66
-rw-r--r--drivers/gpu/drm/i915/display/intel_panel.c809
-rw-r--r--drivers/gpu/drm/i915/display/intel_panel.h52
-rw-r--r--drivers/gpu/drm/i915/display/intel_panic.c27
-rw-r--r--drivers/gpu/drm/i915/display/intel_panic.h14
-rw-r--r--drivers/gpu/drm/i915/display/intel_pch.c340
-rw-r--r--drivers/gpu/drm/i915/display/intel_pch.h56
-rw-r--r--drivers/gpu/drm/i915/display/intel_pch_display.c644
-rw-r--r--drivers/gpu/drm/i915/display/intel_pch_display.h98
-rw-r--r--drivers/gpu/drm/i915/display/intel_pch_refclk.c684
-rw-r--r--drivers/gpu/drm/i915/display/intel_pch_refclk.h45
-rw-r--r--drivers/gpu/drm/i915/display/intel_pfit.c731
-rw-r--r--drivers/gpu/drm/i915/display/intel_pfit.h29
-rw-r--r--drivers/gpu/drm/i915/display/intel_pfit_regs.h79
-rw-r--r--drivers/gpu/drm/i915/display/intel_pipe_crc.c171
-rw-r--r--drivers/gpu/drm/i915/display/intel_pipe_crc.h1
-rw-r--r--drivers/gpu/drm/i915/display/intel_pipe_crc_regs.h152
-rw-r--r--drivers/gpu/drm/i915/display/intel_plane.c1766
-rw-r--r--drivers/gpu/drm/i915/display/intel_plane.h91
-rw-r--r--drivers/gpu/drm/i915/display/intel_plane_initial.c273
-rw-r--r--drivers/gpu/drm/i915/display/intel_plane_initial.h4
-rw-r--r--drivers/gpu/drm/i915/display/intel_pmdemand.c675
-rw-r--r--drivers/gpu/drm/i915/display/intel_pmdemand.h39
-rw-r--r--drivers/gpu/drm/i915/display/intel_pps.c1372
-rw-r--r--drivers/gpu/drm/i915/display/intel_pps.h28
-rw-r--r--drivers/gpu/drm/i915/display/intel_pps_regs.h77
-rw-r--r--drivers/gpu/drm/i915/display/intel_psr.c3758
-rw-r--r--drivers/gpu/drm/i915/display/intel_psr.h70
-rw-r--r--drivers/gpu/drm/i915/display/intel_psr_regs.h342
-rw-r--r--drivers/gpu/drm/i915/display/intel_qp_tables.c198
-rw-r--r--drivers/gpu/drm/i915/display/intel_qp_tables.h4
-rw-r--r--drivers/gpu/drm/i915/display/intel_quirks.c143
-rw-r--r--drivers/gpu/drm/i915/display/intel_quirks.h23
-rw-r--r--drivers/gpu/drm/i915/display/intel_sbi.c92
-rw-r--r--drivers/gpu/drm/i915/display/intel_sbi.h27
-rw-r--r--drivers/gpu/drm/i915/display/intel_sbi_regs.h65
-rw-r--r--drivers/gpu/drm/i915/display/intel_sdvo.c1266
-rw-r--r--drivers/gpu/drm/i915/display/intel_sdvo.h21
-rw-r--r--drivers/gpu/drm/i915/display/intel_sdvo_regs.h4
-rw-r--r--drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c366
-rw-r--r--drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.h17
-rw-r--r--drivers/gpu/drm/i915/display/intel_snps_phy.c1603
-rw-r--r--drivers/gpu/drm/i915/display/intel_snps_phy.h17
-rw-r--r--drivers/gpu/drm/i915/display/intel_snps_phy_regs.h75
-rw-r--r--drivers/gpu/drm/i915/display/intel_sprite.c866
-rw-r--r--drivers/gpu/drm/i915/display/intel_sprite.h40
-rw-r--r--drivers/gpu/drm/i915/display/intel_sprite_regs.h388
-rw-r--r--drivers/gpu/drm/i915/display/intel_sprite_uapi.c128
-rw-r--r--drivers/gpu/drm/i915/display/intel_sprite_uapi.h15
-rw-r--r--drivers/gpu/drm/i915/display/intel_tc.c2066
-rw-r--r--drivers/gpu/drm/i915/display/intel_tc.h93
-rw-r--r--drivers/gpu/drm/i915/display/intel_tdf.h25
-rw-r--r--drivers/gpu/drm/i915/display/intel_tv.c394
-rw-r--r--drivers/gpu/drm/i915/display/intel_tv.h10
-rw-r--r--drivers/gpu/drm/i915/display/intel_tv_regs.h490
-rw-r--r--drivers/gpu/drm/i915/display/intel_vblank.c781
-rw-r--r--drivers/gpu/drm/i915/display/intel_vblank.h53
-rw-r--r--drivers/gpu/drm/i915/display/intel_vbt_defs.h1194
-rw-r--r--drivers/gpu/drm/i915/display/intel_vdsc.c1690
-rw-r--r--drivers/gpu/drm/i915/display/intel_vdsc.h20
-rw-r--r--drivers/gpu/drm/i915/display/intel_vdsc_regs.h354
-rw-r--r--drivers/gpu/drm/i915/display/intel_vga.c150
-rw-r--r--drivers/gpu/drm/i915/display/intel_vga.h12
-rw-r--r--drivers/gpu/drm/i915/display/intel_vga_regs.h36
-rw-r--r--drivers/gpu/drm/i915/display/intel_vrr.c870
-rw-r--r--drivers/gpu/drm/i915/display/intel_vrr.h34
-rw-r--r--drivers/gpu/drm/i915/display/intel_vrr_regs.h126
-rw-r--r--drivers/gpu/drm/i915/display/intel_wm.c404
-rw-r--r--drivers/gpu/drm/i915/display/intel_wm.h36
-rw-r--r--drivers/gpu/drm/i915/display/intel_wm_types.h76
-rw-r--r--drivers/gpu/drm/i915/display/skl_prefill.c157
-rw-r--r--drivers/gpu/drm/i915/display/skl_prefill.h46
-rw-r--r--drivers/gpu/drm/i915/display/skl_scaler.c1006
-rw-r--r--drivers/gpu/drm/i915/display/skl_scaler.h52
-rw-r--r--drivers/gpu/drm/i915/display/skl_universal_plane.c2094
-rw-r--r--drivers/gpu/drm/i915/display/skl_universal_plane.h19
-rw-r--r--drivers/gpu/drm/i915/display/skl_universal_plane_regs.h585
-rw-r--r--drivers/gpu/drm/i915/display/skl_watermark.c4108
-rw-r--r--drivers/gpu/drm/i915/display/skl_watermark.h86
-rw-r--r--drivers/gpu/drm/i915/display/skl_watermark_regs.h87
-rw-r--r--drivers/gpu/drm/i915/display/vlv_clock.c88
-rw-r--r--drivers/gpu/drm/i915/display/vlv_clock.h38
-rw-r--r--drivers/gpu/drm/i915/display/vlv_dpio_phy_regs.h309
-rw-r--r--drivers/gpu/drm/i915/display/vlv_dsi.c1088
-rw-r--r--drivers/gpu/drm/i915/display/vlv_dsi.h31
-rw-r--r--drivers/gpu/drm/i915/display/vlv_dsi_pll.c370
-rw-r--r--drivers/gpu/drm/i915/display/vlv_dsi_pll.h52
-rw-r--r--drivers/gpu/drm/i915/display/vlv_dsi_pll_regs.h109
-rw-r--r--drivers/gpu/drm/i915/display/vlv_dsi_regs.h475
-rw-r--r--drivers/gpu/drm/i915/display/vlv_sideband.c50
-rw-r--r--drivers/gpu/drm/i915/display/vlv_sideband.h156
-rw-r--r--drivers/gpu/drm/i915/dma_resv_utils.c17
-rw-r--r--drivers/gpu/drm/i915/dma_resv_utils.h13
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_busy.c40
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_clflush.c33
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_clflush.h3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c380
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.h5
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context_types.h23
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_create.c80
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_create.h17
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c101
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_dmabuf.h18
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_domain.c125
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_domain.h15
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c760
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_internal.c77
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_internal.h23
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ioctls.h3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_lmem.c35
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_lmem.h4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_mman.c331
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_mman.h6
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.c350
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.h409
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.c103
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h84
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_types.h226
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pages.c229
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_phys.c35
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pm.c51
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pm.h3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_region.c74
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_region.h11
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shmem.c481
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shrinker.c222
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_stolen.c623
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_stolen.h33
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_throttle.c7
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_tiling.c58
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_tiling.h20
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm.c1147
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm.h62
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c757
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h42
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c43
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_userptr.c91
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_wait.c171
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gemfs.c69
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gemfs.h6
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c8
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/huge_pages.c588
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c311
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c30
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c294
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c123
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c343
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c548
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c12
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c20
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h14
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/mock_context.c50
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/mock_context.h3
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c10
-rw-r--r--drivers/gpu/drm/i915/gt/gen2_engine_cs.c37
-rw-r--r--drivers/gpu/drm/i915/gt/gen2_engine_cs.h6
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_engine_cs.c1
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_ppgtt.c200
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_ppgtt.h2
-rw-r--r--drivers/gpu/drm/i915/gt/gen7_renderclear.c6
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.c382
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.h31
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c442
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs.c34
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c95
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.h88
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_sseu.c1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h58
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h68
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c900
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c50
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c81
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_regs.h269
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_stats.h33
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h206
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_user.c72
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.c304
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt.c1356
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c34
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c140
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt_gmch.h27
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gpu_commands.h96
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gsc.c321
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gsc.h43
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c657
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.h147
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c19
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c39
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c221
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_debugfs.c32
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_debugfs.h23
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_defines.h11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_irq.c206
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_mcr.c870
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_mcr.h71
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c88
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.h54
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c408
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c9
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_print.h57
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_regs.h1641
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_requests.c22
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs.c116
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs.h31
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c930
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.h15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h116
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.c235
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.h204
-rw-r--r--drivers/gpu/drm/i915/gt/intel_hwconfig.h21
-rw-r--r--drivers/gpu/drm/i915/gt/intel_llc.c31
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c415
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.h81
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc_reg.h15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_migrate.c620
-rw-r--r--drivers/gpu/drm/i915/gt/intel_migrate.h22
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.c153
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ppgtt.c61
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.c311
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.h13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6_types.h17
-rw-r--r--drivers/gpu/drm/i915/gt/intel_region_lmem.c257
-rw-r--r--drivers/gpu/drm/i915/gt/intel_region_lmem.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_renderstate.c10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c496
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.h8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset_types.h7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.c38
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_submission.c131
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c923
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.h27
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps_types.h19
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sa_media.c48
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sa_media.h15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.c541
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.h120
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c73
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.c6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_tlb.c173
-rw-r--r--drivers/gpu/drm/i915/gt/intel_tlb.h29
-rw-r--r--drivers/gpu/drm/i915/gt/intel_wopcm.c324
-rw-r--r--drivers/gpu/drm/i915/gt/intel_wopcm.h (renamed from drivers/gpu/drm/i915/intel_wopcm.h)3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c1499
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds_types.h14
-rw-r--r--drivers/gpu/drm/i915/gt/mock_engine.c56
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_context.c12
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_cs.c58
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c126
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_pm.c39
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_execlists.c222
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_gt_pm.c54
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_hangcheck.c158
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_llc.c6
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_lrc.c323
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_migrate.c490
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_mocs.c14
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rc6.c69
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_reset.c26
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_ring_submission.c6
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rps.c80
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_slpc.c588
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_timeline.c28
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_tlb.c405
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_workarounds.c47
-rw-r--r--drivers/gpu/drm/i915/gt/shaders/README6
-rw-r--r--drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm2
-rw-r--r--drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm2
-rw-r--r--drivers/gpu/drm/i915/gt/shmem_utils.c45
-rw-r--r--drivers/gpu/drm/i915/gt/shmem_utils.h3
-rw-r--r--drivers/gpu/drm/i915/gt/sysfs_engines.c103
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h132
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h42
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h31
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h46
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h113
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h32
-rw-r--r--drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h226
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_binary_headers.h135
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c524
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h21
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c429
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.h18
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c358
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h85
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.c39
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.h14
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c231
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h93
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c353
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h219
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c751
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h6
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c1654
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h35
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c428
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h19
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c61
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c248
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h251
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c167
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.c410
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.h47
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c62
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_print.h51
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c13
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h23
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c493
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h9
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h23
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c2515
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h6
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.c635
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.h63
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c260
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h7
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_print.h21
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c245
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.h8
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c24
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c1154
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h102
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h8
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc.c302
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c162
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c17
-rw-r--r--drivers/gpu/drm/i915/gvt/Makefile30
-rw-r--r--drivers/gpu/drm/i915/gvt/aperture_gm.c38
-rw-r--r--drivers/gpu/drm/i915/gvt/cfg_space.c98
-rw-r--r--drivers/gpu/drm/i915/gvt/cmd_parser.c65
-rw-r--r--drivers/gpu/drm/i915/gvt/debugfs.c55
-rw-r--r--drivers/gpu/drm/i915/gvt/display.c108
-rw-r--r--drivers/gpu/drm/i915/gvt/display.h55
-rw-r--r--drivers/gpu/drm/i915/gvt/dmabuf.c81
-rw-r--r--drivers/gpu/drm/i915/gvt/dmabuf.h2
-rw-r--r--drivers/gpu/drm/i915/gvt/edid.c59
-rw-r--r--drivers/gpu/drm/i915/gvt/edid.h12
-rw-r--r--drivers/gpu/drm/i915/gvt/execlist.c12
-rw-r--r--drivers/gpu/drm/i915/gvt/fb_decoder.c201
-rw-r--r--drivers/gpu/drm/i915/gvt/fb_decoder.h15
-rw-r--r--drivers/gpu/drm/i915/gvt/firmware.c60
-rw-r--r--drivers/gpu/drm/i915/gvt/gtt.c292
-rw-r--r--drivers/gpu/drm/i915/gvt/gtt.h8
-rw-r--r--drivers/gpu/drm/i915/gvt/gvt.c340
-rw-r--r--drivers/gpu/drm/i915/gvt/gvt.h206
-rw-r--r--drivers/gpu/drm/i915/gvt/handlers.c1379
-rw-r--r--drivers/gpu/drm/i915/gvt/hypercall.h82
-rw-r--r--drivers/gpu/drm/i915/gvt/interrupt.c71
-rw-r--r--drivers/gpu/drm/i915/gvt/interrupt.h35
-rw-r--r--drivers/gpu/drm/i915/gvt/kvmgt.c1684
-rw-r--r--drivers/gpu/drm/i915/gvt/mmio.c18
-rw-r--r--drivers/gpu/drm/i915/gvt/mmio.h6
-rw-r--r--drivers/gpu/drm/i915/gvt/mmio_context.c43
-rw-r--r--drivers/gpu/drm/i915/gvt/mmio_context.h12
-rw-r--r--drivers/gpu/drm/i915/gvt/mpt.h400
-rw-r--r--drivers/gpu/drm/i915/gvt/opregion.c159
-rw-r--r--drivers/gpu/drm/i915/gvt/page_track.c22
-rw-r--r--drivers/gpu/drm/i915/gvt/reg.h13
-rw-r--r--drivers/gpu/drm/i915/gvt/sched_policy.c10
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.c67
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.h4
-rw-r--r--drivers/gpu/drm/i915/gvt/trace.h4
-rw-r--r--drivers/gpu/drm/i915/gvt/vgpu.c255
-rw-r--r--drivers/gpu/drm/i915/i915_active.c180
-rw-r--r--drivers/gpu/drm/i915/i915_active.h32
-rw-r--r--drivers/gpu/drm/i915/i915_active_types.h2
-rw-r--r--drivers/gpu/drm/i915/i915_buddy.c466
-rw-r--r--drivers/gpu/drm/i915/i915_buddy.h143
-rw-r--r--drivers/gpu/drm/i915/i915_cmd_parser.c89
-rw-r--r--drivers/gpu/drm/i915/i915_cmd_parser.h26
-rw-r--r--drivers/gpu/drm/i915/i915_config.c5
-rw-r--r--drivers/gpu/drm/i915/i915_config.h23
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c345
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs_params.c47
-rw-r--r--drivers/gpu/drm/i915/i915_deps.c237
-rw-r--r--drivers/gpu/drm/i915/i915_deps.h45
-rw-r--r--drivers/gpu/drm/i915/i915_driver.c1847
-rw-r--r--drivers/gpu/drm/i915/i915_driver.h33
-rw-r--r--drivers/gpu/drm/i915/i915_drm_client.c220
-rw-r--r--drivers/gpu/drm/i915/i915_drm_client.h92
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c1809
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h1933
-rw-r--r--drivers/gpu/drm/i915/i915_file_private.h111
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c342
-rw-r--r--drivers/gpu/drm/i915/i915_gem.h89
-rw-r--r--drivers/gpu/drm/i915/i915_gem_evict.c174
-rw-r--r--drivers/gpu/drm/i915/i915_gem_evict.h30
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.c26
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.h7
-rw-r--r--drivers/gpu/drm/i915/i915_getparam.c54
-rw-r--r--drivers/gpu/drm/i915/i915_getparam.h15
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c1257
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.h131
-rw-r--r--drivers/gpu/drm/i915/i915_gtt_view_types.h59
-rw-r--r--drivers/gpu/drm/i915/i915_hwmon.c994
-rw-r--r--drivers/gpu/drm/i915/i915_hwmon.h27
-rw-r--r--drivers/gpu/drm/i915/i915_ioc32.c1
-rw-r--r--drivers/gpu/drm/i915/i915_ioctl.c94
-rw-r--r--drivers/gpu/drm/i915/i915_ioctl.h14
-rw-r--r--drivers/gpu/drm/i915/i915_iosf_mbi.h36
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c4030
-rw-r--r--drivers/gpu/drm/i915/i915_irq.h100
-rw-r--r--drivers/gpu/drm/i915/i915_jiffies.h16
-rw-r--r--drivers/gpu/drm/i915/i915_list_util.h23
-rw-r--r--drivers/gpu/drm/i915/i915_memcpy.c4
-rw-r--r--drivers/gpu/drm/i915/i915_mitigations.c1
-rw-r--r--drivers/gpu/drm/i915/i915_mm.c44
-rw-r--r--drivers/gpu/drm/i915/i915_mm.h36
-rw-r--r--drivers/gpu/drm/i915/i915_mmio_range.c18
-rw-r--r--drivers/gpu/drm/i915/i915_mmio_range.h19
-rw-r--r--drivers/gpu/drm/i915/i915_module.c29
-rw-r--r--drivers/gpu/drm/i915/i915_params.c212
-rw-r--r--drivers/gpu/drm/i915/i915_params.h30
-rw-r--r--drivers/gpu/drm/i915/i915_pci.c823
-rw-r--r--drivers/gpu/drm/i915/i915_pci.h6
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c1642
-rw-r--r--drivers/gpu/drm/i915/i915_perf.h6
-rw-r--r--drivers/gpu/drm/i915/i915_perf_oa_regs.h215
-rw-r--r--drivers/gpu/drm/i915/i915_perf_types.h130
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.c619
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.h45
-rw-r--r--drivers/gpu/drm/i915/i915_ptr_util.h66
-rw-r--r--drivers/gpu/drm/i915/i915_query.c171
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h12261
-rw-r--r--drivers/gpu/drm/i915/i915_reg_defs.h229
-rw-r--r--drivers/gpu/drm/i915/i915_request.c287
-rw-r--r--drivers/gpu/drm/i915/i915_request.h92
-rw-r--r--drivers/gpu/drm/i915/i915_scatterlist.c115
-rw-r--r--drivers/gpu/drm/i915/i915_scatterlist.h117
-rw-r--r--drivers/gpu/drm/i915/i915_scheduler.c2
-rw-r--r--drivers/gpu/drm/i915/i915_scheduler.h1
-rw-r--r--drivers/gpu/drm/i915/i915_selftest.h2
-rw-r--r--drivers/gpu/drm/i915/i915_suspend.c127
-rw-r--r--drivers/gpu/drm/i915/i915_suspend.h14
-rw-r--r--drivers/gpu/drm/i915/i915_sw_fence.c105
-rw-r--r--drivers/gpu/drm/i915/i915_sw_fence.h30
-rw-r--r--drivers/gpu/drm/i915/i915_sw_fence_work.c2
-rw-r--r--drivers/gpu/drm/i915/i915_switcheroo.c18
-rw-r--r--drivers/gpu/drm/i915/i915_syncmap.c19
-rw-r--r--drivers/gpu/drm/i915/i915_sysfs.c408
-rw-r--r--drivers/gpu/drm/i915/i915_sysfs.h3
-rw-r--r--drivers/gpu/drm/i915/i915_tasklet.h43
-rw-r--r--drivers/gpu/drm/i915/i915_timer_util.c36
-rw-r--r--drivers/gpu/drm/i915/i915_timer_util.h23
-rw-r--r--drivers/gpu/drm/i915/i915_trace.h562
-rw-r--r--drivers/gpu/drm/i915/i915_ttm_buddy_manager.c270
-rw-r--r--drivers/gpu/drm/i915/i915_ttm_buddy_manager.h22
-rw-r--r--drivers/gpu/drm/i915/i915_user_extensions.c2
-rw-r--r--drivers/gpu/drm/i915/i915_utils.c92
-rw-r--r--drivers/gpu/drm/i915/i915_utils.h375
-rw-r--r--drivers/gpu/drm/i915/i915_vgpu.c4
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c1373
-rw-r--r--drivers/gpu/drm/i915/i915_vma.h179
-rw-r--r--drivers/gpu/drm/i915/i915_vma_resource.c425
-rw-r--r--drivers/gpu/drm/i915/i915_vma_resource.h261
-rw-r--r--drivers/gpu/drm/i915/i915_vma_types.h102
-rw-r--r--drivers/gpu/drm/i915/i915_wait_util.h119
-rw-r--r--drivers/gpu/drm/i915/intel_clock_gating.c801
-rw-r--r--drivers/gpu/drm/i915/intel_clock_gating.h14
-rw-r--r--drivers/gpu/drm/i915/intel_cpu_info.c44
-rw-r--r--drivers/gpu/drm/i915/intel_cpu_info.h13
-rw-r--r--drivers/gpu/drm/i915/intel_device_info.c459
-rw-r--r--drivers/gpu/drm/i915/intel_device_info.h184
-rw-r--r--drivers/gpu/drm/i915/intel_dram.c532
-rw-r--r--drivers/gpu/drm/i915/intel_dram.h14
-rw-r--r--drivers/gpu/drm/i915/intel_gvt.c259
-rw-r--r--drivers/gpu/drm/i915/intel_gvt.h32
-rw-r--r--drivers/gpu/drm/i915/intel_gvt_mmio_table.c1320
-rw-r--r--drivers/gpu/drm/i915/intel_mchbar_regs.h258
-rw-r--r--drivers/gpu/drm/i915/intel_memory_region.c242
-rw-r--r--drivers/gpu/drm/i915/intel_memory_region.h40
-rw-r--r--drivers/gpu/drm/i915/intel_pch.c267
-rw-r--r--drivers/gpu/drm/i915/intel_pch.h89
-rw-r--r--drivers/gpu/drm/i915/intel_pci_config.h110
-rw-r--r--drivers/gpu/drm/i915/intel_pcode.c170
-rw-r--r--drivers/gpu/drm/i915/intel_pcode.h33
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c8222
-rw-r--r--drivers/gpu/drm/i915/intel_pm.h98
-rw-r--r--drivers/gpu/drm/i915/intel_region_ttm.c98
-rw-r--r--drivers/gpu/drm/i915/intel_region_ttm.h9
-rw-r--r--drivers/gpu/drm/i915/intel_runtime_pm.c366
-rw-r--r--drivers/gpu/drm/i915/intel_runtime_pm.h66
-rw-r--r--drivers/gpu/drm/i915/intel_sbi.c73
-rw-r--r--drivers/gpu/drm/i915/intel_sbi.h23
-rw-r--r--drivers/gpu/drm/i915/intel_step.c139
-rw-r--r--drivers/gpu/drm/i915/intel_step.h33
-rw-r--r--drivers/gpu/drm/i915/intel_uncore.c1218
-rw-r--r--drivers/gpu/drm/i915/intel_uncore.h141
-rw-r--r--drivers/gpu/drm/i915/intel_uncore_trace.c7
-rw-r--r--drivers/gpu/drm/i915/intel_uncore_trace.h49
-rw-r--r--drivers/gpu/drm/i915/intel_wakeref.c118
-rw-r--r--drivers/gpu/drm/i915/intel_wakeref.h111
-rw-r--r--drivers/gpu/drm/i915/intel_wopcm.c277
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp.c355
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp.h54
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_cmd.c8
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_42.h43
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h61
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_cmn.h42
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c55
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.h4
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c448
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h45
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_huc.c73
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_huc.h13
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_irq.c38
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_irq.h8
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_pm.c29
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_pm.h11
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_regs.h27
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_session.c65
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_session.h14
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_tee.c289
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_tee.h5
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_tee_interface.h36
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_types.h51
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_active.c15
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_buddy.c787
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem.c35
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem_evict.c60
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem_gtt.c783
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_live_selftests.h2
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_mock_selftests.h1
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_perf.c24
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_random.h2
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_request.c577
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_selftest.c91
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_sw_fence.c18
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_syncmap.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_vma.c127
-rw-r--r--drivers/gpu/drm/i915/selftests/igt_flush_test.c38
-rw-r--r--drivers/gpu/drm/i915/selftests/igt_live_test.c48
-rw-r--r--drivers/gpu/drm/i915/selftests/igt_live_test.h3
-rw-r--r--drivers/gpu/drm/i915/selftests/igt_reset.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/igt_spinner.c53
-rw-r--r--drivers/gpu/drm/i915/selftests/intel_memory_region.c231
-rw-r--r--drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c9
-rw-r--r--drivers/gpu/drm/i915/selftests/intel_uncore.c23
-rw-r--r--drivers/gpu/drm/i915/selftests/lib_sw_fence.c14
-rw-r--r--drivers/gpu/drm/i915/selftests/librapl.c4
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_gem_device.c112
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_gtt.c39
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_gtt.h3
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_region.c39
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_region.h3
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_request.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_uncore.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/scatterlist.c6
-rw-r--r--drivers/gpu/drm/i915/soc/intel_dram.c823
-rw-r--r--drivers/gpu/drm/i915/soc/intel_dram.h45
-rw-r--r--drivers/gpu/drm/i915/soc/intel_gmch.c188
-rw-r--r--drivers/gpu/drm/i915/soc/intel_gmch.h20
-rw-r--r--drivers/gpu/drm/i915/soc/intel_rom.c161
-rw-r--r--drivers/gpu/drm/i915/soc/intel_rom.h25
-rw-r--r--drivers/gpu/drm/i915/vlv_iosf_sb.c231
-rw-r--r--drivers/gpu/drm/i915/vlv_iosf_sb.h37
-rw-r--r--drivers/gpu/drm/i915/vlv_iosf_sb_reg.h180
-rw-r--r--drivers/gpu/drm/i915/vlv_sideband.c266
-rw-r--r--drivers/gpu/drm/i915/vlv_sideband.h123
-rw-r--r--drivers/gpu/drm/i915/vlv_suspend.c39
-rw-r--r--drivers/gpu/drm/imagination/Kconfig20
-rw-r--r--drivers/gpu/drm/imagination/Makefile35
-rw-r--r--drivers/gpu/drm/imagination/pvr_ccb.c646
-rw-r--r--drivers/gpu/drm/imagination/pvr_ccb.h71
-rw-r--r--drivers/gpu/drm/imagination/pvr_cccb.c267
-rw-r--r--drivers/gpu/drm/imagination/pvr_cccb.h110
-rw-r--r--drivers/gpu/drm/imagination/pvr_context.c485
-rw-r--r--drivers/gpu/drm/imagination/pvr_context.h226
-rw-r--r--drivers/gpu/drm/imagination/pvr_debugfs.c52
-rw-r--r--drivers/gpu/drm/imagination/pvr_debugfs.h29
-rw-r--r--drivers/gpu/drm/imagination/pvr_device.c773
-rw-r--r--drivers/gpu/drm/imagination/pvr_device.h794
-rw-r--r--drivers/gpu/drm/imagination/pvr_device_info.c255
-rw-r--r--drivers/gpu/drm/imagination/pvr_device_info.h186
-rw-r--r--drivers/gpu/drm/imagination/pvr_drv.c1535
-rw-r--r--drivers/gpu/drm/imagination/pvr_drv.h128
-rw-r--r--drivers/gpu/drm/imagination/pvr_free_list.c624
-rw-r--r--drivers/gpu/drm/imagination/pvr_free_list.h195
-rw-r--r--drivers/gpu/drm/imagination/pvr_fw.c1515
-rw-r--r--drivers/gpu/drm/imagination/pvr_fw.h496
-rw-r--r--drivers/gpu/drm/imagination/pvr_fw_info.h135
-rw-r--r--drivers/gpu/drm/imagination/pvr_fw_meta.c561
-rw-r--r--drivers/gpu/drm/imagination/pvr_fw_meta.h14
-rw-r--r--drivers/gpu/drm/imagination/pvr_fw_mips.c199
-rw-r--r--drivers/gpu/drm/imagination/pvr_fw_mips.h49
-rw-r--r--drivers/gpu/drm/imagination/pvr_fw_riscv.c165
-rw-r--r--drivers/gpu/drm/imagination/pvr_fw_startstop.c323
-rw-r--r--drivers/gpu/drm/imagination/pvr_fw_startstop.h13
-rw-r--r--drivers/gpu/drm/imagination/pvr_fw_trace.c466
-rw-r--r--drivers/gpu/drm/imagination/pvr_fw_trace.h76
-rw-r--r--drivers/gpu/drm/imagination/pvr_fw_util.c66
-rw-r--r--drivers/gpu/drm/imagination/pvr_gem.c416
-rw-r--r--drivers/gpu/drm/imagination/pvr_gem.h172
-rw-r--r--drivers/gpu/drm/imagination/pvr_hwrt.c546
-rw-r--r--drivers/gpu/drm/imagination/pvr_hwrt.h166
-rw-r--r--drivers/gpu/drm/imagination/pvr_job.c780
-rw-r--r--drivers/gpu/drm/imagination/pvr_job.h161
-rw-r--r--drivers/gpu/drm/imagination/pvr_mmu.c2646
-rw-r--r--drivers/gpu/drm/imagination/pvr_mmu.h108
-rw-r--r--drivers/gpu/drm/imagination/pvr_params.c147
-rw-r--r--drivers/gpu/drm/imagination/pvr_params.h72
-rw-r--r--drivers/gpu/drm/imagination/pvr_power.c703
-rw-r--r--drivers/gpu/drm/imagination/pvr_power.h59
-rw-r--r--drivers/gpu/drm/imagination/pvr_queue.c1453
-rw-r--r--drivers/gpu/drm/imagination/pvr_queue.h173
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_cr_defs.h6308
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_cr_defs_client.h159
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_defs.h179
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_fwif.h2188
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_fwif_check.h493
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_fwif_client.h373
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_fwif_client_check.h133
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_fwif_common.h60
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_fwif_dev_info.h113
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_fwif_resetframework.h28
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_fwif_sf.h1648
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_fwif_shared.h258
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_fwif_shared_check.h108
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_fwif_stream.h78
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_heap_config.h113
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_meta.h356
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_mips.h335
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_mips_check.h58
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_mmu_defs.h136
-rw-r--r--drivers/gpu/drm/imagination/pvr_rogue_riscv.h41
-rw-r--r--drivers/gpu/drm/imagination/pvr_stream.c281
-rw-r--r--drivers/gpu/drm/imagination/pvr_stream.h75
-rw-r--r--drivers/gpu/drm/imagination/pvr_stream_defs.c351
-rw-r--r--drivers/gpu/drm/imagination/pvr_stream_defs.h16
-rw-r--r--drivers/gpu/drm/imagination/pvr_sync.c289
-rw-r--r--drivers/gpu/drm/imagination/pvr_sync.h84
-rw-r--r--drivers/gpu/drm/imagination/pvr_vm.c1194
-rw-r--r--drivers/gpu/drm/imagination/pvr_vm.h70
-rw-r--r--drivers/gpu/drm/imagination/pvr_vm_mips.c237
-rw-r--r--drivers/gpu/drm/imagination/pvr_vm_mips.h22
-rw-r--r--drivers/gpu/drm/imx/Kconfig44
-rw-r--r--drivers/gpu/drm/imx/Makefile12
-rw-r--r--drivers/gpu/drm/imx/dc/Kconfig13
-rw-r--r--drivers/gpu/drm/imx/dc/Makefile7
-rw-r--r--drivers/gpu/drm/imx/dc/dc-cf.c172
-rw-r--r--drivers/gpu/drm/imx/dc/dc-crtc.c555
-rw-r--r--drivers/gpu/drm/imx/dc/dc-de.c177
-rw-r--r--drivers/gpu/drm/imx/dc/dc-de.h59
-rw-r--r--drivers/gpu/drm/imx/dc/dc-drv.c293
-rw-r--r--drivers/gpu/drm/imx/dc/dc-drv.h102
-rw-r--r--drivers/gpu/drm/imx/dc/dc-ed.c288
-rw-r--r--drivers/gpu/drm/imx/dc/dc-fg.c376
-rw-r--r--drivers/gpu/drm/imx/dc/dc-fl.c185
-rw-r--r--drivers/gpu/drm/imx/dc/dc-fu.c258
-rw-r--r--drivers/gpu/drm/imx/dc/dc-fu.h129
-rw-r--r--drivers/gpu/drm/imx/dc/dc-fw.c222
-rw-r--r--drivers/gpu/drm/imx/dc/dc-ic.c282
-rw-r--r--drivers/gpu/drm/imx/dc/dc-kms.c143
-rw-r--r--drivers/gpu/drm/imx/dc/dc-kms.h131
-rw-r--r--drivers/gpu/drm/imx/dc/dc-lb.c325
-rw-r--r--drivers/gpu/drm/imx/dc/dc-pe.c158
-rw-r--r--drivers/gpu/drm/imx/dc/dc-pe.h101
-rw-r--r--drivers/gpu/drm/imx/dc/dc-plane.c224
-rw-r--r--drivers/gpu/drm/imx/dc/dc-tc.c141
-rw-r--r--drivers/gpu/drm/imx/dcss/Kconfig8
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-blkctl.c13
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-crtc.c6
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-ctxld.c14
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-dev.c48
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-dev.h8
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-dpr.c21
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-drv.c42
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-dtg.c30
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-kms.c26
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-kms.h1
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-plane.c25
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-scaler.c25
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-ss.c12
-rw-r--r--drivers/gpu/drm/imx/imx-drm.h45
-rw-r--r--drivers/gpu/drm/imx/ipuv3/Kconfig49
-rw-r--r--drivers/gpu/drm/imx/ipuv3/Makefile11
-rw-r--r--drivers/gpu/drm/imx/ipuv3/dw_hdmi-imx.c (renamed from drivers/gpu/drm/imx/dw_hdmi-imx.c)13
-rw-r--r--drivers/gpu/drm/imx/ipuv3/imx-drm-core.c (renamed from drivers/gpu/drm/imx/imx-drm-core.c)67
-rw-r--r--drivers/gpu/drm/imx/ipuv3/imx-drm.h31
-rw-r--r--drivers/gpu/drm/imx/ipuv3/imx-ldb.c (renamed from drivers/gpu/drm/imx/imx-ldb.c)219
-rw-r--r--drivers/gpu/drm/imx/ipuv3/imx-tve.c (renamed from drivers/gpu/drm/imx/imx-tve.c)54
-rw-r--r--drivers/gpu/drm/imx/ipuv3/ipuv3-crtc.c (renamed from drivers/gpu/drm/imx/ipuv3-crtc.c)16
-rw-r--r--drivers/gpu/drm/imx/ipuv3/ipuv3-plane.c (renamed from drivers/gpu/drm/imx/ipuv3-plane.c)82
-rw-r--r--drivers/gpu/drm/imx/ipuv3/ipuv3-plane.h (renamed from drivers/gpu/drm/imx/ipuv3-plane.h)0
-rw-r--r--drivers/gpu/drm/imx/ipuv3/parallel-display.c288
-rw-r--r--drivers/gpu/drm/imx/lcdc/Kconfig10
-rw-r--r--drivers/gpu/drm/imx/lcdc/Makefile1
-rw-r--r--drivers/gpu/drm/imx/lcdc/imx-lcdc.c537
-rw-r--r--drivers/gpu/drm/imx/parallel-display.c387
-rw-r--r--drivers/gpu/drm/ingenic/Kconfig18
-rw-r--r--drivers/gpu/drm/ingenic/Makefile1
-rw-r--r--drivers/gpu/drm/ingenic/ingenic-drm-drv.c587
-rw-r--r--drivers/gpu/drm/ingenic/ingenic-drm.h41
-rw-r--r--drivers/gpu/drm/ingenic/ingenic-dw-hdmi.c103
-rw-r--r--drivers/gpu/drm/ingenic/ingenic-ipu.c154
-rw-r--r--drivers/gpu/drm/kmb/Kconfig6
-rw-r--r--drivers/gpu/drm/kmb/kmb_crtc.c1
-rw-r--r--drivers/gpu/drm/kmb/kmb_drv.c20
-rw-r--r--drivers/gpu/drm/kmb/kmb_drv.h1
-rw-r--r--drivers/gpu/drm/kmb/kmb_dsi.c5
-rw-r--r--drivers/gpu/drm/kmb/kmb_plane.c29
-rw-r--r--drivers/gpu/drm/lib/drm_random.c4
-rw-r--r--drivers/gpu/drm/lib/drm_random.h4
-rw-r--r--drivers/gpu/drm/lima/lima_bcast.c12
-rw-r--r--drivers/gpu/drm/lima/lima_bcast.h3
-rw-r--r--drivers/gpu/drm/lima/lima_ctx.c3
-rw-r--r--drivers/gpu/drm/lima/lima_ctx.h1
-rw-r--r--drivers/gpu/drm/lima/lima_devfreq.c7
-rw-r--r--drivers/gpu/drm/lima/lima_device.c3
-rw-r--r--drivers/gpu/drm/lima/lima_drv.c42
-rw-r--r--drivers/gpu/drm/lima/lima_drv.h5
-rw-r--r--drivers/gpu/drm/lima/lima_gem.c88
-rw-r--r--drivers/gpu/drm/lima/lima_gp.c49
-rw-r--r--drivers/gpu/drm/lima/lima_l2_cache.c6
-rw-r--r--drivers/gpu/drm/lima/lima_mmu.c23
-rw-r--r--drivers/gpu/drm/lima/lima_pmu.c3
-rw-r--r--drivers/gpu/drm/lima/lima_pp.c59
-rw-r--r--drivers/gpu/drm/lima/lima_sched.c86
-rw-r--r--drivers/gpu/drm/lima/lima_sched.h7
-rw-r--r--drivers/gpu/drm/lima/lima_trace.h8
-rw-r--r--drivers/gpu/drm/logicvc/Kconfig12
-rw-r--r--drivers/gpu/drm/logicvc/Makefile9
-rw-r--r--drivers/gpu/drm/logicvc/logicvc_crtc.c280
-rw-r--r--drivers/gpu/drm/logicvc/logicvc_crtc.h21
-rw-r--r--drivers/gpu/drm/logicvc/logicvc_drm.c505
-rw-r--r--drivers/gpu/drm/logicvc/logicvc_drm.h67
-rw-r--r--drivers/gpu/drm/logicvc/logicvc_interface.c213
-rw-r--r--drivers/gpu/drm/logicvc/logicvc_interface.h28
-rw-r--r--drivers/gpu/drm/logicvc/logicvc_layer.c630
-rw-r--r--drivers/gpu/drm/logicvc/logicvc_layer.h64
-rw-r--r--drivers/gpu/drm/logicvc/logicvc_mode.c76
-rw-r--r--drivers/gpu/drm/logicvc/logicvc_mode.h15
-rw-r--r--drivers/gpu/drm/logicvc/logicvc_of.c185
-rw-r--r--drivers/gpu/drm/logicvc/logicvc_of.h46
-rw-r--r--drivers/gpu/drm/logicvc/logicvc_regs.h80
-rw-r--r--drivers/gpu/drm/loongson/Kconfig20
-rw-r--r--drivers/gpu/drm/loongson/Makefile22
-rw-r--r--drivers/gpu/drm/loongson/loongson_device.c102
-rw-r--r--drivers/gpu/drm/loongson/loongson_module.c33
-rw-r--r--drivers/gpu/drm/loongson/loongson_module.h12
-rw-r--r--drivers/gpu/drm/loongson/lsdc_benchmark.c134
-rw-r--r--drivers/gpu/drm/loongson/lsdc_benchmark.h13
-rw-r--r--drivers/gpu/drm/loongson/lsdc_crtc.c1026
-rw-r--r--drivers/gpu/drm/loongson/lsdc_debugfs.c111
-rw-r--r--drivers/gpu/drm/loongson/lsdc_drv.c463
-rw-r--r--drivers/gpu/drm/loongson/lsdc_drv.h388
-rw-r--r--drivers/gpu/drm/loongson/lsdc_gem.c290
-rw-r--r--drivers/gpu/drm/loongson/lsdc_gem.h37
-rw-r--r--drivers/gpu/drm/loongson/lsdc_gfxpll.c199
-rw-r--r--drivers/gpu/drm/loongson/lsdc_gfxpll.h52
-rw-r--r--drivers/gpu/drm/loongson/lsdc_i2c.c179
-rw-r--r--drivers/gpu/drm/loongson/lsdc_i2c.h29
-rw-r--r--drivers/gpu/drm/loongson/lsdc_irq.c75
-rw-r--r--drivers/gpu/drm/loongson/lsdc_irq.h16
-rw-r--r--drivers/gpu/drm/loongson/lsdc_output.h21
-rw-r--r--drivers/gpu/drm/loongson/lsdc_output_7a1000.c178
-rw-r--r--drivers/gpu/drm/loongson/lsdc_output_7a2000.c552
-rw-r--r--drivers/gpu/drm/loongson/lsdc_pixpll.c484
-rw-r--r--drivers/gpu/drm/loongson/lsdc_pixpll.h86
-rw-r--r--drivers/gpu/drm/loongson/lsdc_plane.c794
-rw-r--r--drivers/gpu/drm/loongson/lsdc_probe.c56
-rw-r--r--drivers/gpu/drm/loongson/lsdc_probe.h12
-rw-r--r--drivers/gpu/drm/loongson/lsdc_regs.h406
-rw-r--r--drivers/gpu/drm/loongson/lsdc_ttm.c589
-rw-r--r--drivers/gpu/drm/loongson/lsdc_ttm.h99
-rw-r--r--drivers/gpu/drm/mcde/Kconfig5
-rw-r--r--drivers/gpu/drm/mcde/mcde_clk_div.c14
-rw-r--r--drivers/gpu/drm/mcde/mcde_display.c10
-rw-r--r--drivers/gpu/drm/mcde/mcde_drv.c44
-rw-r--r--drivers/gpu/drm/mcde/mcde_dsi.c17
-rw-r--r--drivers/gpu/drm/mediatek/Kconfig45
-rw-r--r--drivers/gpu/drm/mediatek/Makefile28
-rw-r--r--drivers/gpu/drm/mediatek/mtk_cec.c44
-rw-r--r--drivers/gpu/drm/mediatek/mtk_crtc.c1186
-rw-r--r--drivers/gpu/drm/mediatek/mtk_crtc.h29
-rw-r--r--drivers/gpu/drm/mediatek/mtk_ddp_comp.c710
-rw-r--r--drivers/gpu/drm/mediatek/mtk_ddp_comp.h365
-rw-r--r--drivers/gpu/drm/mediatek/mtk_disp_aal.c127
-rw-r--r--drivers/gpu/drm/mediatek/mtk_disp_ccorr.c71
-rw-r--r--drivers/gpu/drm/mediatek/mtk_disp_color.c37
-rw-r--r--drivers/gpu/drm/mediatek/mtk_disp_drv.h94
-rw-r--r--drivers/gpu/drm/mediatek/mtk_disp_gamma.c234
-rw-r--r--drivers/gpu/drm/mediatek/mtk_disp_merge.c376
-rw-r--r--drivers/gpu/drm/mediatek/mtk_disp_ovl.c418
-rw-r--r--drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c679
-rw-r--r--drivers/gpu/drm/mediatek/mtk_disp_rdma.c130
-rw-r--r--drivers/gpu/drm/mediatek/mtk_dp.c2951
-rw-r--r--drivers/gpu/drm/mediatek/mtk_dp_reg.h362
-rw-r--r--drivers/gpu/drm/mediatek/mtk_dpi.c802
-rw-r--r--drivers/gpu/drm/mediatek/mtk_dpi_regs.h27
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_crtc.c855
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_crtc.h26
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c504
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h204
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_drv.c1004
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_drv.h51
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_gem.c266
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_gem.h48
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_plane.c270
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_plane.h44
-rw-r--r--drivers/gpu/drm/mediatek/mtk_dsi.c795
-rw-r--r--drivers/gpu/drm/mediatek/mtk_ethdr.c396
-rw-r--r--drivers/gpu/drm/mediatek/mtk_ethdr.h26
-rw-r--r--drivers/gpu/drm/mediatek/mtk_gem.c289
-rw-r--r--drivers/gpu/drm/mediatek/mtk_gem.h48
-rw-r--r--drivers/gpu/drm/mediatek/mtk_hdmi.c887
-rw-r--r--drivers/gpu/drm/mediatek/mtk_hdmi.h14
-rw-r--r--drivers/gpu/drm/mediatek/mtk_hdmi_common.c456
-rw-r--r--drivers/gpu/drm/mediatek/mtk_hdmi_common.h198
-rw-r--r--drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c34
-rw-r--r--drivers/gpu/drm/mediatek/mtk_hdmi_ddc_v2.c396
-rw-r--r--drivers/gpu/drm/mediatek/mtk_hdmi_regs_v2.h263
-rw-r--r--drivers/gpu/drm/mediatek/mtk_hdmi_v2.c1521
-rw-r--r--drivers/gpu/drm/mediatek/mtk_mdp_rdma.c347
-rw-r--r--drivers/gpu/drm/mediatek/mtk_mdp_rdma.h20
-rw-r--r--drivers/gpu/drm/mediatek/mtk_padding.c154
-rw-r--r--drivers/gpu/drm/mediatek/mtk_plane.c367
-rw-r--r--drivers/gpu/drm/mediatek/mtk_plane.h54
-rw-r--r--drivers/gpu/drm/meson/Kconfig17
-rw-r--r--drivers/gpu/drm/meson/Makefile4
-rw-r--r--drivers/gpu/drm/meson/meson_drv.c205
-rw-r--r--drivers/gpu/drm/meson/meson_drv.h11
-rw-r--r--drivers/gpu/drm/meson/meson_dw_hdmi.c454
-rw-r--r--drivers/gpu/drm/meson/meson_dw_mipi_dsi.c358
-rw-r--r--drivers/gpu/drm/meson/meson_dw_mipi_dsi.h160
-rw-r--r--drivers/gpu/drm/meson/meson_encoder_cvbs.c298
-rw-r--r--drivers/gpu/drm/meson/meson_encoder_cvbs.h30
-rw-r--r--drivers/gpu/drm/meson/meson_encoder_dsi.c171
-rw-r--r--drivers/gpu/drm/meson/meson_encoder_dsi.h13
-rw-r--r--drivers/gpu/drm/meson/meson_encoder_hdmi.c496
-rw-r--r--drivers/gpu/drm/meson/meson_encoder_hdmi.h13
-rw-r--r--drivers/gpu/drm/meson/meson_osd_afbcd.c41
-rw-r--r--drivers/gpu/drm/meson/meson_osd_afbcd.h1
-rw-r--r--drivers/gpu/drm/meson/meson_overlay.c22
-rw-r--r--drivers/gpu/drm/meson/meson_plane.c35
-rw-r--r--drivers/gpu/drm/meson/meson_registers.h25
-rw-r--r--drivers/gpu/drm/meson/meson_vclk.c226
-rw-r--r--drivers/gpu/drm/meson/meson_vclk.h13
-rw-r--r--drivers/gpu/drm/meson/meson_venc.c248
-rw-r--r--drivers/gpu/drm/meson/meson_venc.h6
-rw-r--r--drivers/gpu/drm/meson/meson_venc_cvbs.c293
-rw-r--r--drivers/gpu/drm/meson/meson_venc_cvbs.h29
-rw-r--r--drivers/gpu/drm/meson/meson_viu.c29
-rw-r--r--drivers/gpu/drm/meson/meson_vpp.c2
-rw-r--r--drivers/gpu/drm/meson/meson_vpp.h2
-rw-r--r--drivers/gpu/drm/mga/Makefile11
-rw-r--r--drivers/gpu/drm/mga/mga_dma.c1168
-rw-r--r--drivers/gpu/drm/mga/mga_drv.c104
-rw-r--r--drivers/gpu/drm/mga/mga_drv.h685
-rw-r--r--drivers/gpu/drm/mga/mga_ioc32.c197
-rw-r--r--drivers/gpu/drm/mga/mga_irq.c169
-rw-r--r--drivers/gpu/drm/mga/mga_state.c1099
-rw-r--r--drivers/gpu/drm/mga/mga_warp.c167
-rw-r--r--drivers/gpu/drm/mgag200/Kconfig15
-rw-r--r--drivers/gpu/drm/mgag200/Makefile17
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_bmc.c99
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_ddc.c178
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_ddc.h11
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_drv.c436
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_drv.h323
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_g200.c408
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_g200eh.c280
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_g200eh3.c185
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_g200eh5.c205
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_g200er.c313
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_g200ev.c318
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_g200ew3.c203
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_g200se.c518
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_g200wb.c327
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_i2c.c159
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_mm.c116
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_mode.c1076
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_pll.c993
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_reg.h13
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_vga.c73
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_vga_bmc.c157
-rw-r--r--drivers/gpu/drm/msm/.gitignore1
-rw-r--r--drivers/gpu/drm/msm/Kconfig115
-rw-r--r--drivers/gpu/drm/msm/Makefile176
-rw-r--r--drivers/gpu/drm/msm/adreno/a2xx.xml.h3212
-rw-r--r--drivers/gpu/drm/msm/adreno/a2xx_catalog.c48
-rw-r--r--drivers/gpu/drm/msm/adreno/a2xx_gpu.c112
-rw-r--r--drivers/gpu/drm/msm/adreno/a2xx_gpu.h7
-rw-r--r--drivers/gpu/drm/msm/adreno/a2xx_gpummu.c122
-rw-r--r--drivers/gpu/drm/msm/adreno/a3xx.xml.h3247
-rw-r--r--drivers/gpu/drm/msm/adreno/a3xx_catalog.c88
-rw-r--r--drivers/gpu/drm/msm/adreno/a3xx_gpu.c105
-rw-r--r--drivers/gpu/drm/msm/adreno/a3xx_gpu.h2
-rw-r--r--drivers/gpu/drm/msm/adreno/a4xx.xml.h4275
-rw-r--r--drivers/gpu/drm/msm/adreno/a4xx_catalog.c48
-rw-r--r--drivers/gpu/drm/msm/adreno/a4xx_gpu.c91
-rw-r--r--drivers/gpu/drm/msm/adreno/a4xx_gpu.h2
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx.xml.h5483
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_catalog.c153
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_debugfs.c10
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_gpu.c314
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_gpu.h3
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_power.c6
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_preempt.c53
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx.xml.h7606
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_catalog.c1961
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gmu.c1255
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gmu.h81
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gmu.xml.h483
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu.c2640
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu.h256
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c1023
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h473
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_hfi.c476
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_hfi.h54
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_preempt.c477
-rw-r--r--drivers/gpu/drm/msm/adreno/a8xx_gpu.c1201
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_common.xml.h680
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_device.c665
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gen7_0_0_snapshot.h935
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gen7_2_0_snapshot.h755
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h1446
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gpu.c578
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gpu.h516
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h2355
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h453
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_12_0_sm8750.h494
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_12_2_glymur.h541
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_14_msm8937.h205
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_15_msm8917.h184
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_16_msm8953.h212
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_7_msm8996.h328
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h318
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_2_sdm660.h282
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_3_sdm630.h220
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h333
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h152
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h389
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h413
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_2_sm7150.h317
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_3_sm6150.h254
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h225
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h388
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h219
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h152
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h237
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_5_qcm2290.h145
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h163
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h400
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h261
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h429
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h413
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_4_sa8775p.h453
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h408
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_1_sar2130p.h408
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h449
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.h70
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c350
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h84
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c1078
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.h65
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c2072
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h170
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h231
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c523
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c353
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c697
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c866
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_formats.h67
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_blk.h25
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c1443
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h496
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.c257
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.h134
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c676
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h164
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cwb.c75
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cwb.h70
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c216
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.h79
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc_1_2.c396
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c84
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.h20
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c606
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h50
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c520
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h95
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c317
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h42
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h272
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.c59
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.h22
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c185
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h91
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c655
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h225
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c199
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.h33
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c207
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h78
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.c57
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.h14
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c249
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h83
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hwio.h31
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c187
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.h40
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c1198
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h131
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c273
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c2221
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h109
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c927
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h123
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_trace.h283
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c108
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.h22
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c144
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.h32
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4.xml.h1182
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c64
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c35
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_dtv_encoder.c37
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_irq.c9
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c265
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.h28
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c137
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c120
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_pll.c88
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_plane.c157
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5.xml.h1980
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c220
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.h18
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_cmd_encoder.c45
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c74
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c24
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.h1
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_encoder.c71
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_irq.c11
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c362
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h19
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_mdss.c274
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c23
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h8
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c26
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h6
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c211
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c41
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.h7
-rw-r--r--drivers/gpu/drm/msm/disp/mdp_common.xml.h112
-rw-r--r--drivers/gpu/drm/msm/disp/mdp_format.c632
-rw-r--r--drivers/gpu/drm/msm/disp/mdp_format.h77
-rw-r--r--drivers/gpu/drm/msm/disp/mdp_kms.h18
-rw-r--r--drivers/gpu/drm/msm/disp/msm_disp_snapshot.c29
-rw-r--r--drivers/gpu/drm/msm/disp/msm_disp_snapshot.h25
-rw-r--r--drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c60
-rw-r--r--drivers/gpu/drm/msm/dp/dp_audio.c656
-rw-r--r--drivers/gpu/drm/msm/dp/dp_audio.h55
-rw-r--r--drivers/gpu/drm/msm/dp/dp_aux.c447
-rw-r--r--drivers/gpu/drm/msm/dp/dp_aux.h31
-rw-r--r--drivers/gpu/drm/msm/dp/dp_catalog.c1044
-rw-r--r--drivers/gpu/drm/msm/dp/dp_catalog.h135
-rw-r--r--drivers/gpu/drm/msm/dp/dp_ctrl.c1654
-rw-r--r--drivers/gpu/drm/msm/dp/dp_ctrl.h56
-rw-r--r--drivers/gpu/drm/msm/dp/dp_debug.c166
-rw-r--r--drivers/gpu/drm/msm/dp/dp_debug.h54
-rw-r--r--drivers/gpu/drm/msm/dp/dp_display.c1713
-rw-r--r--drivers/gpu/drm/msm/dp/dp_display.h37
-rw-r--r--drivers/gpu/drm/msm/dp/dp_drm.c408
-rw-r--r--drivers/gpu/drm/msm/dp/dp_drm.h33
-rw-r--r--drivers/gpu/drm/msm/dp/dp_hpd.c69
-rw-r--r--drivers/gpu/drm/msm/dp/dp_hpd.h80
-rw-r--r--drivers/gpu/drm/msm/dp/dp_link.c766
-rw-r--r--drivers/gpu/drm/msm/dp/dp_link.h91
-rw-r--r--drivers/gpu/drm/msm/dp/dp_panel.c709
-rw-r--r--drivers/gpu/drm/msm/dp/dp_panel.h74
-rw-r--r--drivers/gpu/drm/msm/dp/dp_parser.c337
-rw-r--r--drivers/gpu/drm/msm/dp/dp_parser.h144
-rw-r--r--drivers/gpu/drm/msm/dp/dp_power.c411
-rw-r--r--drivers/gpu/drm/msm/dp/dp_power.h107
-rw-r--r--drivers/gpu/drm/msm/dp/dp_reg.h58
-rw-r--r--drivers/gpu/drm/msm/dp/dp_utils.c88
-rw-r--r--drivers/gpu/drm/msm/dp/dp_utils.h36
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi.c162
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi.h63
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi.xml.h709
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi_cfg.c295
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi_cfg.h16
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi_host.c1019
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi_manager.c540
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi_phy_10nm.xml.h228
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi_phy_14nm.xml.h310
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi_phy_20nm.xml.h238
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi_phy_28nm.xml.h385
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi_phy_28nm_8960.xml.h287
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi_phy_5nm.xml.h480
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi_phy_7nm.xml.h482
-rw-r--r--drivers/gpu/drm/msm/dsi/mmss_cc.xml.h132
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy.c272
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy.h34
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c536
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c385
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy_20nm.c123
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c527
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c292
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c988
-rw-r--r--drivers/gpu/drm/msm/dsi/sfpb.xml.h71
-rw-r--r--drivers/gpu/drm/msm/edp/edp.c198
-rw-r--r--drivers/gpu/drm/msm/edp/edp.h77
-rw-r--r--drivers/gpu/drm/msm/edp/edp.xml.h388
-rw-r--r--drivers/gpu/drm/msm/edp/edp_aux.c265
-rw-r--r--drivers/gpu/drm/msm/edp/edp_bridge.c111
-rw-r--r--drivers/gpu/drm/msm/edp/edp_connector.c132
-rw-r--r--drivers/gpu/drm/msm/edp/edp_ctrl.c1373
-rw-r--r--drivers/gpu/drm/msm/edp/edp_phy.c98
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi.c628
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi.h109
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi.xml.h1378
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_audio.c199
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_bridge.c464
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_connector.c445
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_hdcp.c2
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_hpd.c215
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_i2c.c15
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_phy.c20
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c54
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c765
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_phy_8x60.c12
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c39
-rw-r--r--drivers/gpu/drm/msm/hdmi/qfprom.xml.h62
-rw-r--r--drivers/gpu/drm/msm/msm_atomic.c61
-rw-r--r--drivers/gpu/drm/msm/msm_debugfs.c275
-rw-r--r--drivers/gpu/drm/msm/msm_drv.c1306
-rw-r--r--drivers/gpu/drm/msm/msm_drv.h516
-rw-r--r--drivers/gpu/drm/msm/msm_dsc_helper.h27
-rw-r--r--drivers/gpu/drm/msm/msm_fb.c113
-rw-r--r--drivers/gpu/drm/msm/msm_fbdev.c191
-rw-r--r--drivers/gpu/drm/msm/msm_fence.c118
-rw-r--r--drivers/gpu/drm/msm/msm_fence.h38
-rw-r--r--drivers/gpu/drm/msm/msm_gem.c1039
-rw-r--r--drivers/gpu/drm/msm/msm_gem.h436
-rw-r--r--drivers/gpu/drm/msm/msm_gem_prime.c99
-rw-r--r--drivers/gpu/drm/msm/msm_gem_shrinker.c272
-rw-r--r--drivers/gpu/drm/msm/msm_gem_submit.c765
-rw-r--r--drivers/gpu/drm/msm/msm_gem_vma.c1630
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.c480
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.h375
-rw-r--r--drivers/gpu/drm/msm/msm_gpu_devfreq.c226
-rw-r--r--drivers/gpu/drm/msm/msm_gpu_trace.h90
-rw-r--r--drivers/gpu/drm/msm/msm_gpummu.c121
-rw-r--r--drivers/gpu/drm/msm/msm_io_utils.c161
-rw-r--r--drivers/gpu/drm/msm/msm_iommu.c562
-rw-r--r--drivers/gpu/drm/msm/msm_kms.c388
-rw-r--r--drivers/gpu/drm/msm/msm_kms.h96
-rw-r--r--drivers/gpu/drm/msm/msm_mdss.c605
-rw-r--r--drivers/gpu/drm/msm/msm_mmu.h53
-rw-r--r--drivers/gpu/drm/msm/msm_perf.c10
-rw-r--r--drivers/gpu/drm/msm/msm_rd.c118
-rw-r--r--drivers/gpu/drm/msm/msm_ringbuffer.c51
-rw-r--r--drivers/gpu/drm/msm/msm_ringbuffer.h49
-rw-r--r--drivers/gpu/drm/msm/msm_submitqueue.c135
-rw-r--r--drivers/gpu/drm/msm/msm_syncobj.c172
-rw-r--r--drivers/gpu/drm/msm/msm_syncobj.h37
-rw-r--r--drivers/gpu/drm/msm/registers/.gitignore4
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a2xx.xml1865
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a3xx.xml1751
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a4xx.xml2409
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a5xx.xml3039
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a6xx.xml5021
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a6xx_descriptors.xml158
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a6xx_enums.xml429
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml267
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a6xx_perfcntrs.xml600
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a7xx_enums.xml216
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a7xx_perfcntrs.xml1030
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a8xx_descriptors.xml121
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a8xx_enums.xml299
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/adreno_common.xml412
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml2445
-rw-r--r--drivers/gpu/drm/msm/registers/display/dsi.xml390
-rw-r--r--drivers/gpu/drm/msm/registers/display/dsi_phy_10nm.xml102
-rw-r--r--drivers/gpu/drm/msm/registers/display/dsi_phy_14nm.xml135
-rw-r--r--drivers/gpu/drm/msm/registers/display/dsi_phy_20nm.xml100
-rw-r--r--drivers/gpu/drm/msm/registers/display/dsi_phy_28nm.xml180
-rw-r--r--drivers/gpu/drm/msm/registers/display/dsi_phy_28nm_8960.xml134
-rw-r--r--drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml261
-rw-r--r--drivers/gpu/drm/msm/registers/display/edp.xml239
-rw-r--r--drivers/gpu/drm/msm/registers/display/hdmi.xml1104
-rw-r--r--drivers/gpu/drm/msm/registers/display/mdp4.xml504
-rw-r--r--drivers/gpu/drm/msm/registers/display/mdp5.xml790
-rw-r--r--drivers/gpu/drm/msm/registers/display/mdp_common.xml90
-rw-r--r--drivers/gpu/drm/msm/registers/display/mdss.xml38
-rw-r--r--drivers/gpu/drm/msm/registers/display/msm.xml32
-rw-r--r--drivers/gpu/drm/msm/registers/display/sfpb.xml17
-rw-r--r--drivers/gpu/drm/msm/registers/freedreno_copyright.xml40
-rw-r--r--drivers/gpu/drm/msm/registers/gen_header.py1007
-rw-r--r--drivers/gpu/drm/msm/registers/rules-fd.xsd404
-rw-r--r--drivers/gpu/drm/mxsfb/Kconfig22
-rw-r--r--drivers/gpu/drm/mxsfb/Makefile2
-rw-r--r--drivers/gpu/drm/mxsfb/lcdif_drv.c381
-rw-r--r--drivers/gpu/drm/mxsfb/lcdif_drv.h42
-rw-r--r--drivers/gpu/drm/mxsfb/lcdif_kms.c780
-rw-r--r--drivers/gpu/drm/mxsfb/lcdif_regs.h265
-rw-r--r--drivers/gpu/drm/mxsfb/mxsfb_drv.c94
-rw-r--r--drivers/gpu/drm/mxsfb/mxsfb_drv.h3
-rw-r--r--drivers/gpu/drm/mxsfb/mxsfb_kms.c282
-rw-r--r--drivers/gpu/drm/mxsfb/mxsfb_regs.h1
-rw-r--r--drivers/gpu/drm/nouveau/Kbuild20
-rw-r--r--drivers/gpu/drm/nouveau/Kconfig45
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/Kbuild3
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/crtc.c167
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/dac.c4
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/dfp.c21
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/disp.c28
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/disp.h9
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/hw.c9
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/i2c/Kbuild5
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/i2c/ch7006_drv.c (renamed from drivers/gpu/drm/i2c/ch7006_drv.c)50
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/i2c/ch7006_mode.c (renamed from drivers/gpu/drm/i2c/ch7006_mode.c)8
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/i2c/ch7006_priv.h (renamed from drivers/gpu/drm/i2c/ch7006_priv.h)12
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/i2c/sil164_drv.c (renamed from drivers/gpu/drm/i2c/sil164_drv.c)46
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/nouveau_i2c_encoder.c145
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/overlay.c13
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/tvmodesnv17.c1
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/tvnv04.c30
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/tvnv17.c25
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/Kbuild5
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/atom.h6
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/base507c.c43
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/base827c.c2
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/base907c.c16
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/base917c.c2
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/core.c2
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/core.h6
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/core507d.c13
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/corec37d.c9
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/corec57d.c6
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/coreca7d.c122
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/crc.c97
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/crc.h8
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/crc907d.c12
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/crcc37d.c49
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/crcc37d.h40
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/crcc57d.c58
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/crcca7d.c98
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/curs.c1
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/curs507a.c51
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/dac507d.c2
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/dac907d.c2
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/disp.c1411
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/disp.h19
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/head.c74
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/head.h8
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/head507d.c24
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/head827d.c10
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/head907d.c32
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/head917d.c7
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/headc37d.c19
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/headc57d.c25
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/headca7d.c297
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/oimm507b.c6
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/ovly507e.c28
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/ovly827e.c2
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/ovly907e.c2
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/pior507d.c2
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/sor507d.c2
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/sor907d.c2
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/sorc37d.c2
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/tile.h63
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/wimm.c1
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c14
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/wndw.c215
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/wndw.h17
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/wndwc37e.c43
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c19
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/wndwc67e.c2
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/wndwca7e.c242
-rw-r--r--drivers/gpu/drm/nouveau/gv100_fence.c98
-rw-r--r--drivers/gpu/drm/nouveau/include/dispnv04/i2c/ch7006.h87
-rw-r--r--drivers/gpu/drm/nouveau/include/dispnv04/i2c/encoder_i2c.h220
-rw-r--r--drivers/gpu/drm/nouveau/include/dispnv04/i2c/sil164.h64
-rw-r--r--drivers/gpu/drm/nouveau/include/nvfw/acr.h85
-rw-r--r--drivers/gpu/drm/nouveau/include/nvfw/hs.h32
-rw-r--r--drivers/gpu/drm/nouveau/include/nvfw/ls.h51
-rw-r--r--drivers/gpu/drm/nouveau/include/nvfw/sec2.h45
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/class/cl907d.h3
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/class/clc36f.h137
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/class/clc57d.h69
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/class/clc97b.h22
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/class/clca7d.h868
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/class/clca7e.h137
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/drf.h4
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/ref/gb100/dev_hshub_base.h28
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/ref/gb10b/dev_fbhub.h18
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/ref/gb202/dev_ce.h12
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/ref/gb202/dev_therm.h17
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/ref/gh100/dev_falcon_v4.h20
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/ref/gh100/dev_fb.h15
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/ref/gh100/dev_fsp_pri.h28
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/ref/gh100/dev_mmu.h173
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/ref/gh100/dev_riscv_pri.h14
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/ref/gh100/dev_therm.h17
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/ref/gh100/dev_xtl_ep_pri.h10
-rw-r--r--drivers/gpu/drm/nouveau/include/nvhw/ref/gh100/pri_nv_xal_ep.h13
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/chan.h76
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl0046.h23
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl006b.h12
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl0080.h16
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl506e.h13
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl506f.h14
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl5070.h100
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl507a.h12
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl507b.h12
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl507c.h13
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl507d.h12
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl507e.h13
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl826e.h15
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl826f.h16
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cl906f.h16
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/cla06f.h18
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/class.h247
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/clb069.h5
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/clc36f.h19
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/clc37b.h11
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/clc37e.h13
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/client.h11
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/conn.h38
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/device.h37
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/disp.h3
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/driver.h5
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/event.h91
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/head.h23
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/if0000.h10
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/if0002.h39
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/if0003.h34
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/if0004.h5
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/if000c.h26
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/if000e.h26
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/if0010.h14
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/if0011.h33
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/if0012.h294
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/if0013.h35
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/if0014.h13
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/if0020.h45
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/if0021.h16
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/ioctl.h98
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/list.h353
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/log.h51
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/notify.h35
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/object.h32
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/os.h21
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/outp.h116
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/printf.h9
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/push.h14
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/push906f.h1
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/pushc97b.h18
-rw-r--r--drivers/gpu/drm/nouveau/include/nvif/vmm.h19
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/client.h17
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/device.h34
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/engine.h14
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/event.h66
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/falcon.h174
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/firmware.h29
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/intr.h73
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/layout.h17
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/memory.h3
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/notify.h39
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/object.h16
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/oclass.h2
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/os.h34
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/pci.h1
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h44
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h4
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h2
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h56
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h82
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h104
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h1
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/engine/nvdec.h2
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/engine/nvenc.h1
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/engine/pm.h29
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/engine/sec2.h5
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/acr.h29
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/bar.h4
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/bios.h1
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/dcb.h1
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/clk.h4
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h8
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h21
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/fsp.h24
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/gpio.h6
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h484
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h23
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/instmem.h7
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h16
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h6
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h35
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h3
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h66
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h1
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/vfn.h23
-rw-r--r--drivers/gpu/drm/nouveau/include/nvrm/nvtypes.h26
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_abi16.c463
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_abi16.h61
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_acpi.c26
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_acpi.h4
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_backlight.c131
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bios.c38
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bios.h1
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.c413
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.h71
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo0039.c10
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo5039.c8
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo74c1.c4
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo85b5.c6
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo9039.c8
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo90b5.c6
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_boa0b5.c6
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_chan.c493
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_chan.h38
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_connector.c389
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_connector.h22
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_crtc.h7
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_debugfs.c82
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_debugfs.h15
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_display.c217
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_display.h6
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_dma.c102
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_dma.h21
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_dmem.c407
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_dp.c381
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drm.c598
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drv.h169
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_encoder.h60
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_exec.c408
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_exec.h60
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_fbcon.c612
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_fbcon.h82
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_fence.c292
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_fence.h20
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_gem.c152
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_gem.h3
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_hwmon.c143
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_ioc32.c4
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_led.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_led.h2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_mem.c77
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_mem.h20
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_nvif.c34
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_platform.c36
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_prime.c51
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_sched.c524
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_sched.h118
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_sgdma.c3
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_svm.c133
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_svm.h3
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_ttm.c77
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_usif.c429
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_usif.h10
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_uvmm.c2005
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_uvmm.h99
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_vga.c26
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_vga.h1
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_vmm.c7
-rw-r--r--drivers/gpu/drm/nouveau/nv04_fbcon.c257
-rw-r--r--drivers/gpu/drm/nouveau/nv04_fence.c2
-rw-r--r--drivers/gpu/drm/nouveau/nv10_fence.c8
-rw-r--r--drivers/gpu/drm/nouveau/nv17_fence.c25
-rw-r--r--drivers/gpu/drm/nouveau/nv50_display.h4
-rw-r--r--drivers/gpu/drm/nouveau/nv50_fbcon.c299
-rw-r--r--drivers/gpu/drm/nouveau/nv50_fence.c17
-rw-r--r--drivers/gpu/drm/nouveau/nv84_fence.c51
-rw-r--r--drivers/gpu/drm/nouveau/nvc0_fbcon.c297
-rw-r--r--drivers/gpu/drm/nouveau/nvc0_fence.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvif/Kbuild11
-rw-r--r--drivers/gpu/drm/nouveau/nvif/chan.c159
-rw-r--r--drivers/gpu/drm/nouveau/nvif/chan506f.c72
-rw-r--r--drivers/gpu/drm/nouveau/nvif/chan906f.c93
-rw-r--r--drivers/gpu/drm/nouveau/nvif/chanc36f.c77
-rw-r--r--drivers/gpu/drm/nouveau/nvif/client.c34
-rw-r--r--drivers/gpu/drm/nouveau/nvif/conn.c87
-rw-r--r--drivers/gpu/drm/nouveau/nvif/device.c15
-rw-r--r--drivers/gpu/drm/nouveau/nvif/disp.c64
-rw-r--r--drivers/gpu/drm/nouveau/nvif/driver.c32
-rw-r--r--drivers/gpu/drm/nouveau/nvif/event.c81
-rw-r--r--drivers/gpu/drm/nouveau/nvif/head.c58
-rw-r--r--drivers/gpu/drm/nouveau/nvif/mmu.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvif/notify.c210
-rw-r--r--drivers/gpu/drm/nouveau/nvif/object.c78
-rw-r--r--drivers/gpu/drm/nouveau/nvif/outp.c556
-rw-r--r--drivers/gpu/drm/nouveau/nvif/user.c6
-rw-r--r--drivers/gpu/drm/nouveau/nvif/vmm.c101
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/Kbuild3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/client.c197
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/engine.c85
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/enum.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/event.c162
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/firmware.c204
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/intr.c442
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/ioctl.c203
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/memory.c15
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/notify.c163
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/object.c78
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/oproxy.c58
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/subdev.c117
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/uevent.c157
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/Kbuild1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/ce/ga100.c96
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/ce/ga102.c50
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/ce/gb202.c16
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/ce/gt215.c6
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/ce/gv100.c24
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h14
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/ce/tu102.c6
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/cipher/g84.c11
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/acpi.c6
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/base.c902
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c43
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c89
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/user.c108
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild119
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c366
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/baseg84.c74
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/basegf119.c108
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/basegp102.c32
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/basenv50.c119
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/capsgv100.c60
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/chan.c251
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/chan.h139
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/changf119.c62
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/changv100.c34
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c364
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h193
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/conn.c53
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/conn.h9
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/coreg84.c111
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/coreg94.c57
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/coregf119.c231
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/coregk104.c126
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp102.c70
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/coregv100.c205
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/corenv50.c234
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgf119.c32
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgp102.c32
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgv100.c81
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/cursnv50.c64
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/dacgf119.c70
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/dacnv50.c121
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c96
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgp102.c64
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgv100.c79
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c137
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c717
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h49
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/g84.c313
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/g94.c343
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/ga102.c122
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c1062
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c297
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/gk110.c23
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c77
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c160
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c50
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/gp102.c144
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/gt200.c73
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c231
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c902
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagf119.c62
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c51
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagv100.c30
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmig84.c91
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigf119.c82
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigk104.c82
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigm200.c36
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigt215.c91
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigv100.c84
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/head.c42
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/head.h24
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/headgf119.c104
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/headgv100.c105
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/headnv04.c74
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/headnv50.c99
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.c10
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h125
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c39
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp89.c53
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/nv04.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c1341
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h102
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgf119.c32
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgp102.c32
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmnv50.c64
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c175
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h91
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlyg84.c71
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygf119.c95
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygk104.c97
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygp102.c32
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygt200.c74
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlynv50.c107
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/piocgf119.c78
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/piocnv50.c87
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/piornv50.c139
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/priv.h80
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootg84.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootg94.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootga102.c52
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgf119.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgk104.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgk110.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgm107.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgm200.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp100.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp102.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgt200.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgt215.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgv100.c53
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv04.c98
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c360
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h45
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/roottu102.c53
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg84.c38
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg94.c291
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sorga102.c140
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c196
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgk104.c54
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c68
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c160
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgp100.c93
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgt215.c69
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgv100.c155
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sormcp77.c48
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sormcp89.c53
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sornv50.c106
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c129
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c110
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c225
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/udisp.c114
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/uhead.c127
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c665
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/wimmgv100.c82
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/wndwgv100.c184
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/falcon.c10
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild31
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c501
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/cgrp.c255
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/cgrp.h76
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c660
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h99
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c263
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/changf100.h29
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h54
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv04.h29
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.c276
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.h53
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/chid.c111
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/chid.h25
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c226
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv10.c97
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv17.c98
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv40.c254
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/g84.c253
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/g98.c70
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c626
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c294
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gb202.c14
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c942
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.h38
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c1506
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h168
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk110.c105
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk208.c59
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk20a.c33
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c109
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm200.c49
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm20b.c45
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c99
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp10b.c46
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifog84.c95
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c308
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c356
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c242
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifonv50.c93
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu102.c81
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gv100.c251
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv04.c344
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv04.h23
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv10.c94
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv17.c103
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv40.c198
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c381
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.h20
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h232
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/runl.c430
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/runl.h137
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/runq.c45
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/runq.h31
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/tu102.c473
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/ucgrp.c125
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/uchan.c418
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/user.h8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/usergv100.c45
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/usertu102.c45
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c33
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxga102.c77
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c224
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h78
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c21
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c24
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c44
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c12
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c12
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c72
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c12
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c80
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c60
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c62
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxtu102.c35
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ga102.c358
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c509
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h81
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c7
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c119
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c9
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gp108.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gv100.c203
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.h2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c14
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv25.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv2a.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.h4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c12
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.h2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h6
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c39
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c7
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.h4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c9
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/mpeg/priv.h2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/nvdec/Kbuild2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/nvdec/base.c7
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/nvdec/ga102.c63
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/nvdec/gm107.c21
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/nvdec/priv.h6
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/nvdec/tu102.c34
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/nvenc/Kbuild1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/nvenc/base.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/nvenc/gm107.c13
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/nvenc/priv.h2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/nvenc/tu102.c34
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/Kbuild11
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/base.c868
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/g84.c165
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/gf100.c243
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/gf100.h20
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/gf108.c66
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/gf117.c80
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/gk104.c184
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/gt200.c157
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/gt215.c138
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/nv40.c123
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/nv40.h15
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/nv50.c175
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/pm/priv.h105
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sec/g98.c6
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sec2/Kbuild3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sec2/base.c98
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sec2/ga102.c203
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp102.c116
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sec2/priv.h12
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sec2/r535.c54
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c34
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sw/base.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.c26
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.h5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sw/gf100.c22
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sw/nv04.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sw/nv10.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.c26
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.h4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sw/nvsw.c35
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sw/priv.h2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild7
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/base.c261
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/cmdq.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/fw.c363
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/ga100.c68
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/ga102.c154
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c350
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/gp102.c82
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/msgq.c10
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/priv.h8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/qmgr.h9
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/tu102.c28
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/v1.c235
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/nvfw/acr.c41
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/nvfw/hs.c39
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/nvfw/ls.c72
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/Kbuild2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/Kbuild4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/base.c154
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/ga100.c49
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/ga102.c330
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm200.c199
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm20b.c44
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp102.c31
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp108.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp10b.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/gv100.c67
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/hsfw.c176
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/lsfw.c144
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/priv.h104
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/acr/tu102.c41
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c10
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bar/gf100.c14
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bar/nv50.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bar/priv.h3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bar/tu102.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c12
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c138
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bios/pmu.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bios/power_budget.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bios/priv.h2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c10
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c7
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowrom.c14
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bus/gf100.c19
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bus/nv31.c6
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bus/nv50.c6
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/clk/Kbuild2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c28
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.h1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a_devfreq.c320
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a_devfreq.h24
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/clk/gm20b.c7
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/clk/gp10b.c185
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/clk/gp10b.h18
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/base.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/fbmem.h4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/g84.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/g98.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/ga100.c15
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gf100.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c17
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gt215.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/mcp89.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/priv.h5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/r535.c51
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c27
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c21
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fault/gp100.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fault/gv100.c41
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fault/priv.h3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fault/tu102.c126
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fault/user.c32
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c81
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga100.c13
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c41
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gb100.c37
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gb202.c33
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c28
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf108.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gh100.c33
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk110.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm20b.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c18
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp102.c110
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp10b.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c15
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.c52
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.h2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h24
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/r535.c87
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.c54
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramga102.c40
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp102.c31
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/tu102.c61
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fsp/Kbuild8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fsp/base.c66
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fsp/gb100.c24
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fsp/gb202.c45
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fsp/gh100.c275
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fsp/priv.h29
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fuse/gm107.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gpio/base.c27
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gpio/ga102.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gpio/gk104.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/Kbuild12
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ad102.c66
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c118
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c360
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c75
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga102.c200
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gb100.c35
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gb202.c38
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gh100.c358
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gv100.c35
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/priv.h77
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/Kbuild19
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/ad10x.c39
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/client.c49
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/engine.c189
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/engine.h20
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/ga100.c28
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/ga1xx.c39
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/gb10x.c30
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/gb20x.c44
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/gh100.c30
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/gpu.h70
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/gr.c87
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/gr.h55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/handles.h18
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/nvdec.c33
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/nvenc.c33
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/Kbuild25
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/alloc.c112
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/bar.c202
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/ce.c46
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/client.c45
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/ctrl.c93
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/device.c148
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/disp.c1793
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/fbsr.c327
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/fifo.c617
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gr.c356
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c2205
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvdec.c45
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvenc.c45
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvjpg.c45
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/alloc.h36
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/bar.h29
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/ce.h15
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/client.h20
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/ctrl.h21
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/device.h30
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/disp.h741
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/engine.h260
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/event.h47
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/fbsr.h106
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/fifo.h350
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/gr.h73
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/gsp.h825
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/msgfn.h53
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/nvdec.h17
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/nvenc.h17
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/nvjpg.h17
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/ofa.h16
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/rpcfn.h225
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/vmm.h132
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/ofa.c44
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rm.c52
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c698
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c191
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/Kbuild9
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/client.c28
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/disp.c263
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/fbsr.c149
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/fifo.c217
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/gr.c191
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/gsp.c216
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/nvrm/client.h21
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/nvrm/disp.h355
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/nvrm/engine.h318
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/nvrm/fbsr.h19
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/nvrm/fifo.h213
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/nvrm/gr.h79
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/nvrm/gsp.h634
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/nvrm/msgfn.h57
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/nvrm/ofa.h17
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/nvrm/rpcfn.h249
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/ofa.c28
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/rm.c99
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/rm.h191
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/rpc.h18
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/tu1xx.c38
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c445
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu116.c61
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/i2c/Kbuild2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/i2c/anx9805.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c212
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxch.c215
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxch.h (renamed from drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h)0
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgf119.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/i2c/base.c38
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gm200.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/i2c/padg94.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/i2c/padgf119.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/i2c/padgm200.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/instmem/Kbuild1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/instmem/base.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gh100.c28
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c23
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv04.c45
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv40.c10
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c78
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/instmem/priv.h21
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c10
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/ltc/ga102.c62
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gk104.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp102.c8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c12
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c130
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/g84.c35
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/g98.c35
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/ga100.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/gf100.c82
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk104.c38
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk20a.c9
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp100.c131
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp10b.c11
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/gt215.c63
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv04.c93
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv11.c21
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv17.c23
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv44.c7
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv50.c29
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h50
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mc/tu102.c136
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gh100.c25
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/memgf100.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/memnv04.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/memnv50.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/priv.h8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/tu102.c6
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c222
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c206
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h32
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c16
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgh100.c306
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c74
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c27
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c12
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c54
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/g84.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/g92.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf100.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf106.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/gh100.c30
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/gk104.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/gp100.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/nv04.c25
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/nv40.c25
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/nv46.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/nv4c.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/pcie.c7
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h11
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c53
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk20a.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm200.c39
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c59
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c35
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c16
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gt215.c33
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/privring/gm200.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/therm/fanpwm.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/therm/fantog.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/therm/gp100.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c13
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/top/ga100.c11
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c11
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/top/priv.h2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/vfn/Kbuild8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/vfn/base.c60
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/vfn/ga100.c52
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/vfn/gv100.c36
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/vfn/priv.h30
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/vfn/r535.c57
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/vfn/tu102.c113
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/vfn/uvfn.c67
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/volt/base.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.c4
-rw-r--r--drivers/gpu/drm/nova/Kconfig16
-rw-r--r--drivers/gpu/drm/nova/Makefile3
-rw-r--r--drivers/gpu/drm/nova/driver.rs71
-rw-r--r--drivers/gpu/drm/nova/file.rs69
-rw-r--r--drivers/gpu/drm/nova/gem.rs47
-rw-r--r--drivers/gpu/drm/nova/nova.rs17
-rw-r--r--drivers/gpu/drm/omapdrm/Kconfig7
-rw-r--r--drivers/gpu/drm/omapdrm/Makefile1
-rw-r--r--drivers/gpu/drm/omapdrm/dss/base.c28
-rw-r--r--drivers/gpu/drm/omapdrm/dss/dispc.c208
-rw-r--r--drivers/gpu/drm/omapdrm/dss/dpi.c13
-rw-r--r--drivers/gpu/drm/omapdrm/dss/dsi.c56
-rw-r--r--drivers/gpu/drm/omapdrm/dss/dss.c45
-rw-r--r--drivers/gpu/drm/omapdrm/dss/dss.h18
-rw-r--r--drivers/gpu/drm/omapdrm/dss/hdmi4.c60
-rw-r--r--drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c14
-rw-r--r--drivers/gpu/drm/omapdrm/dss/hdmi4_cec.h14
-rw-r--r--drivers/gpu/drm/omapdrm/dss/hdmi4_core.c4
-rw-r--r--drivers/gpu/drm/omapdrm/dss/hdmi5.c50
-rw-r--r--drivers/gpu/drm/omapdrm/dss/hdmi5_core.c22
-rw-r--r--drivers/gpu/drm/omapdrm/dss/hdmi5_core.h1
-rw-r--r--drivers/gpu/drm/omapdrm/dss/hdmi_phy.c5
-rw-r--r--drivers/gpu/drm/omapdrm/dss/hdmi_pll.c4
-rw-r--r--drivers/gpu/drm/omapdrm/dss/omapdss.h3
-rw-r--r--drivers/gpu/drm/omapdrm/dss/sdi.c31
-rw-r--r--drivers/gpu/drm/omapdrm/dss/venc.c40
-rw-r--r--drivers/gpu/drm/omapdrm/dss/video-pll.c8
-rw-r--r--drivers/gpu/drm/omapdrm/omap_crtc.c2
-rw-r--r--drivers/gpu/drm/omapdrm/omap_debugfs.c8
-rw-r--r--drivers/gpu/drm/omapdrm/omap_dmm_priv.h10
-rw-r--r--drivers/gpu/drm/omapdrm/omap_dmm_tiler.c31
-rw-r--r--drivers/gpu/drm/omapdrm/omap_dmm_tiler.h10
-rw-r--r--drivers/gpu/drm/omapdrm/omap_drv.c305
-rw-r--r--drivers/gpu/drm/omapdrm/omap_drv.h28
-rw-r--r--drivers/gpu/drm/omapdrm/omap_encoder.c4
-rw-r--r--drivers/gpu/drm/omapdrm/omap_fb.c64
-rw-r--r--drivers/gpu/drm/omapdrm/omap_fb.h9
-rw-r--r--drivers/gpu/drm/omapdrm/omap_fbdev.c239
-rw-r--r--drivers/gpu/drm/omapdrm/omap_fbdev.h15
-rw-r--r--drivers/gpu/drm/omapdrm/omap_gem.c320
-rw-r--r--drivers/gpu/drm/omapdrm/omap_gem.h6
-rw-r--r--drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c51
-rw-r--r--drivers/gpu/drm/omapdrm/omap_irq.c5
-rw-r--r--drivers/gpu/drm/omapdrm/omap_overlay.c212
-rw-r--r--drivers/gpu/drm/omapdrm/omap_overlay.h35
-rw-r--r--drivers/gpu/drm/omapdrm/omap_plane.c373
-rw-r--r--drivers/gpu/drm/omapdrm/omap_plane.h1
-rw-r--r--drivers/gpu/drm/omapdrm/tcm-sita.c10
-rw-r--r--drivers/gpu/drm/panel/Kconfig695
-rw-r--r--drivers/gpu/drm/panel/Makefile55
-rw-r--r--drivers/gpu/drm/panel/panel-abt-y030xx067a.c62
-rw-r--r--drivers/gpu/drm/panel/panel-arm-versatile.c13
-rw-r--r--drivers/gpu/drm/panel/panel-asus-z00t-tm5p5-n35596.c187
-rw-r--r--drivers/gpu/drm/panel/panel-auo-a030jtn01.c307
-rw-r--r--drivers/gpu/drm/panel/panel-boe-bf060y8m-aj0.c401
-rw-r--r--drivers/gpu/drm/panel/panel-boe-himax8279d.c74
-rw-r--r--drivers/gpu/drm/panel/panel-boe-td4320.c247
-rw-r--r--drivers/gpu/drm/panel/panel-boe-th101mb31ig002-28a.c438
-rw-r--r--drivers/gpu/drm/panel/panel-boe-tv101wum-ll2.c241
-rw-r--r--drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c2457
-rw-r--r--drivers/gpu/drm/panel/panel-dsi-cm.c49
-rw-r--r--drivers/gpu/drm/panel/panel-ebbg-ft8719.c247
-rw-r--r--drivers/gpu/drm/panel/panel-edp.c915
-rw-r--r--drivers/gpu/drm/panel/panel-elida-kd35t133.c169
-rw-r--r--drivers/gpu/drm/panel/panel-feixin-k101-im2ba02.c29
-rw-r--r--drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c47
-rw-r--r--drivers/gpu/drm/panel/panel-himax-hx8279.c1296
-rw-r--r--drivers/gpu/drm/panel/panel-himax-hx83102.c1088
-rw-r--r--drivers/gpu/drm/panel/panel-himax-hx83112a.c347
-rw-r--r--drivers/gpu/drm/panel/panel-himax-hx83112b.c430
-rw-r--r--drivers/gpu/drm/panel/panel-himax-hx8394.c845
-rw-r--r--drivers/gpu/drm/panel/panel-hydis-hv101hd1.c188
-rw-r--r--drivers/gpu/drm/panel/panel-ilitek-ili9322.c27
-rw-r--r--drivers/gpu/drm/panel/panel-ilitek-ili9341.c254
-rw-r--r--drivers/gpu/drm/panel/panel-ilitek-ili9805.c405
-rw-r--r--drivers/gpu/drm/panel/panel-ilitek-ili9806e.c565
-rw-r--r--drivers/gpu/drm/panel/panel-ilitek-ili9881c.c2247
-rw-r--r--drivers/gpu/drm/panel/panel-ilitek-ili9882t.c768
-rw-r--r--drivers/gpu/drm/panel/panel-innolux-ej030na.c64
-rw-r--r--drivers/gpu/drm/panel/panel-innolux-p079zca.c358
-rw-r--r--drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c1216
-rw-r--r--drivers/gpu/drm/panel/panel-jdi-fhd-r63452.c244
-rw-r--r--drivers/gpu/drm/panel/panel-jdi-lpm102a188a.c475
-rw-r--r--drivers/gpu/drm/panel/panel-jdi-lt070me05000.c98
-rw-r--r--drivers/gpu/drm/panel/panel-khadas-ts050.c1182
-rw-r--r--drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c74
-rw-r--r--drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c400
-rw-r--r--drivers/gpu/drm/panel/panel-leadtek-ltk500hd1829.c308
-rw-r--r--drivers/gpu/drm/panel/panel-lg-lb035q02.c14
-rw-r--r--drivers/gpu/drm/panel/panel-lg-ld070wx3.c184
-rw-r--r--drivers/gpu/drm/panel/panel-lg-lg4573.c15
-rw-r--r--drivers/gpu/drm/panel/panel-lg-sw43408.c320
-rw-r--r--drivers/gpu/drm/panel/panel-lincolntech-lcd197.c261
-rw-r--r--drivers/gpu/drm/panel/panel-lvds.c88
-rw-r--r--drivers/gpu/drm/panel/panel-magnachip-d53e6ea8966.c520
-rw-r--r--drivers/gpu/drm/panel/panel-mantix-mlaf057we51.c104
-rw-r--r--drivers/gpu/drm/panel/panel-nec-nl8048hl11.c14
-rw-r--r--drivers/gpu/drm/panel/panel-newvision-nv3051d.c541
-rw-r--r--drivers/gpu/drm/panel/panel-newvision-nv3052c.c686
-rw-r--r--drivers/gpu/drm/panel/panel-novatek-nt35510.c460
-rw-r--r--drivers/gpu/drm/panel/panel-novatek-nt35560.c480
-rw-r--r--drivers/gpu/drm/panel/panel-novatek-nt35950.c608
-rw-r--r--drivers/gpu/drm/panel/panel-novatek-nt36523.c1272
-rw-r--r--drivers/gpu/drm/panel/panel-novatek-nt36672a.c65
-rw-r--r--drivers/gpu/drm/panel/panel-novatek-nt36672e.c608
-rw-r--r--drivers/gpu/drm/panel/panel-novatek-nt37801.c340
-rw-r--r--drivers/gpu/drm/panel/panel-novatek-nt39016.c36
-rw-r--r--drivers/gpu/drm/panel/panel-olimex-lcd-olinuxino.c62
-rw-r--r--drivers/gpu/drm/panel/panel-orisetech-ota5601a.c360
-rw-r--r--drivers/gpu/drm/panel/panel-orisetech-otm8009a.c39
-rw-r--r--drivers/gpu/drm/panel/panel-osd-osd101t2587-53ts.c57
-rw-r--r--drivers/gpu/drm/panel/panel-panasonic-vvx10f034n00.c71
-rw-r--r--drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c37
-rw-r--r--drivers/gpu/drm/panel/panel-raydium-rm67191.c41
-rw-r--r--drivers/gpu/drm/panel/panel-raydium-rm67200.c493
-rw-r--r--drivers/gpu/drm/panel/panel-raydium-rm68200.c53
-rw-r--r--drivers/gpu/drm/panel/panel-raydium-rm692e5.c373
-rw-r--r--drivers/gpu/drm/panel/panel-raydium-rm69380.c309
-rw-r--r--drivers/gpu/drm/panel/panel-renesas-r61307.c325
-rw-r--r--drivers/gpu/drm/panel/panel-renesas-r69328.c281
-rw-r--r--drivers/gpu/drm/panel/panel-ronbo-rb070d30.c33
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-ams581vf01.c283
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-ams639rq08.c329
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-atna33xc20.c140
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-db7430.c25
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-ld9040.c54
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6d16d0.c17
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6d27a1.c14
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c492
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6e3fa7.c256
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6e3fc2x01.c385
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6e3ha2.c31
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6e3ha8.c342
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6e63j0x03.c50
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6e63m0-dsi.c5
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c3
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6e63m0.c27
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6e88a0-ams427ap24.c768
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6e88a0-ams452ef01.c125
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c19
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6e8aa5x01-ams561ra01.c981
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-sofef00.c240
-rw-r--r--drivers/gpu/drm/panel/panel-seiko-43wvf1g.c71
-rw-r--r--drivers/gpu/drm/panel/panel-sharp-lq079l1sx01.c225
-rw-r--r--drivers/gpu/drm/panel/panel-sharp-lq101r1sx01.c80
-rw-r--r--drivers/gpu/drm/panel/panel-sharp-ls037v7dw01.c35
-rw-r--r--drivers/gpu/drm/panel/panel-sharp-ls043t1le01.c120
-rw-r--r--drivers/gpu/drm/panel/panel-sharp-ls060t1sx01.c99
-rw-r--r--drivers/gpu/drm/panel/panel-simple.c1998
-rw-r--r--drivers/gpu/drm/panel/panel-sitronix-st7701.c1372
-rw-r--r--drivers/gpu/drm/panel/panel-sitronix-st7703.c853
-rw-r--r--drivers/gpu/drm/panel/panel-sitronix-st7789v.c374
-rw-r--r--drivers/gpu/drm/panel/panel-sony-acx424akp.c490
-rw-r--r--drivers/gpu/drm/panel/panel-sony-acx565akm.c37
-rw-r--r--drivers/gpu/drm/panel/panel-sony-td4353-jdi.c257
-rw-r--r--drivers/gpu/drm/panel/panel-sony-tulip-truly-nt35521.c517
-rw-r--r--drivers/gpu/drm/panel/panel-startek-kd070fhfid015.c350
-rw-r--r--drivers/gpu/drm/panel/panel-summit.c134
-rw-r--r--drivers/gpu/drm/panel/panel-synaptics-r63353.c331
-rw-r--r--drivers/gpu/drm/panel/panel-synaptics-tddi.c277
-rw-r--r--drivers/gpu/drm/panel/panel-tdo-tl070wsh30.c27
-rw-r--r--drivers/gpu/drm/panel/panel-tpo-td028ttec1.c21
-rw-r--r--drivers/gpu/drm/panel/panel-tpo-td043mtea1.c28
-rw-r--r--drivers/gpu/drm/panel/panel-tpo-tpg110.c23
-rw-r--r--drivers/gpu/drm/panel/panel-truly-nt35597.c34
-rw-r--r--drivers/gpu/drm/panel/panel-visionox-g2647fb105.c280
-rw-r--r--drivers/gpu/drm/panel/panel-visionox-r66451.c349
-rw-r--r--drivers/gpu/drm/panel/panel-visionox-rm69299.c360
-rw-r--r--drivers/gpu/drm/panel/panel-visionox-rm692e5.c442
-rw-r--r--drivers/gpu/drm/panel/panel-visionox-vtdr6130.c325
-rw-r--r--drivers/gpu/drm/panel/panel-widechips-ws2401.c21
-rw-r--r--drivers/gpu/drm/panel/panel-xinpeng-xpp055c272.c253
-rw-r--r--drivers/gpu/drm/panfrost/Kconfig4
-rw-r--r--drivers/gpu/drm/panfrost/Makefile3
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_devfreq.c74
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_devfreq.h3
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_device.c234
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_device.h102
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_drv.c587
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_dump.c241
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_dump.h12
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_features.h229
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_gem.c262
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_gem.h78
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c41
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_gpu.c250
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_gpu.h6
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_issues.h21
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_job.c436
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_job.h44
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_mmu.c381
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_mmu.h4
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_perfcnt.c49
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_regs.h90
-rw-r--r--drivers/gpu/drm/panthor/Kconfig23
-rw-r--r--drivers/gpu/drm/panthor/Makefile16
-rw-r--r--drivers/gpu/drm/panthor/panthor_devfreq.c334
-rw-r--r--drivers/gpu/drm/panthor/panthor_devfreq.h23
-rw-r--r--drivers/gpu/drm/panthor/panthor_device.c612
-rw-r--r--drivers/gpu/drm/panthor/panthor_device.h548
-rw-r--r--drivers/gpu/drm/panthor/panthor_drv.c1765
-rw-r--r--drivers/gpu/drm/panthor/panthor_fw.c1506
-rw-r--r--drivers/gpu/drm/panthor/panthor_fw.h531
-rw-r--r--drivers/gpu/drm/panthor/panthor_gem.c455
-rw-r--r--drivers/gpu/drm/panthor/panthor_gem.h202
-rw-r--r--drivers/gpu/drm/panthor/panthor_gpu.c396
-rw-r--r--drivers/gpu/drm/panthor/panthor_gpu.h55
-rw-r--r--drivers/gpu/drm/panthor/panthor_heap.c632
-rw-r--r--drivers/gpu/drm/panthor/panthor_heap.h41
-rw-r--r--drivers/gpu/drm/panthor/panthor_hw.c224
-rw-r--r--drivers/gpu/drm/panthor/panthor_hw.h56
-rw-r--r--drivers/gpu/drm/panthor/panthor_mmu.c2833
-rw-r--r--drivers/gpu/drm/panthor/panthor_mmu.h106
-rw-r--r--drivers/gpu/drm/panthor/panthor_pwr.c549
-rw-r--r--drivers/gpu/drm/panthor/panthor_pwr.h23
-rw-r--r--drivers/gpu/drm/panthor/panthor_regs.h291
-rw-r--r--drivers/gpu/drm/panthor/panthor_sched.c4149
-rw-r--r--drivers/gpu/drm/panthor/panthor_sched.h57
-rw-r--r--drivers/gpu/drm/pl111/Kconfig5
-rw-r--r--drivers/gpu/drm/pl111/pl111_display.c28
-rw-r--r--drivers/gpu/drm/pl111/pl111_drm.h4
-rw-r--r--drivers/gpu/drm/pl111/pl111_drv.c33
-rw-r--r--drivers/gpu/drm/pl111/pl111_versatile.c16
-rw-r--r--drivers/gpu/drm/qxl/Kconfig4
-rw-r--r--drivers/gpu/drm/qxl/qxl_cmd.c27
-rw-r--r--drivers/gpu/drm/qxl/qxl_debugfs.c19
-rw-r--r--drivers/gpu/drm/qxl/qxl_display.c93
-rw-r--r--drivers/gpu/drm/qxl/qxl_draw.c7
-rw-r--r--drivers/gpu/drm/qxl/qxl_drv.c68
-rw-r--r--drivers/gpu/drm/qxl/qxl_drv.h46
-rw-r--r--drivers/gpu/drm/qxl/qxl_dumb.c5
-rw-r--r--drivers/gpu/drm/qxl/qxl_gem.c28
-rw-r--r--drivers/gpu/drm/qxl/qxl_image.c2
-rw-r--r--drivers/gpu/drm/qxl/qxl_ioctl.c53
-rw-r--r--drivers/gpu/drm/qxl/qxl_irq.c1
-rw-r--r--drivers/gpu/drm/qxl/qxl_kms.c11
-rw-r--r--drivers/gpu/drm/qxl/qxl_object.c44
-rw-r--r--drivers/gpu/drm/qxl/qxl_object.h8
-rw-r--r--drivers/gpu/drm/qxl/qxl_prime.c12
-rw-r--r--drivers/gpu/drm/qxl/qxl_release.c73
-rw-r--r--drivers/gpu/drm/qxl/qxl_ttm.c65
-rw-r--r--drivers/gpu/drm/r128/Makefile10
-rw-r--r--drivers/gpu/drm/r128/ati_pcigart.c228
-rw-r--r--drivers/gpu/drm/r128/ati_pcigart.h31
-rw-r--r--drivers/gpu/drm/r128/r128_cce.c944
-rw-r--r--drivers/gpu/drm/r128/r128_drv.c116
-rw-r--r--drivers/gpu/drm/r128/r128_drv.h544
-rw-r--r--drivers/gpu/drm/r128/r128_ioc32.c199
-rw-r--r--drivers/gpu/drm/r128/r128_irq.c118
-rw-r--r--drivers/gpu/drm/r128/r128_state.c1641
-rw-r--r--drivers/gpu/drm/radeon/.gitignore2
-rw-r--r--drivers/gpu/drm/radeon/Kconfig38
-rw-r--r--drivers/gpu/drm/radeon/Makefile15
-rw-r--r--drivers/gpu/drm/radeon/atom-bits.h2
-rw-r--r--drivers/gpu/drm/radeon/atom.c70
-rw-r--r--drivers/gpu/drm/radeon/atom.h4
-rw-r--r--drivers/gpu/drm/radeon/atombios.h69
-rw-r--r--drivers/gpu/drm/radeon/atombios_crtc.c43
-rw-r--r--drivers/gpu/drm/radeon/atombios_dp.c25
-rw-r--r--drivers/gpu/drm/radeon/atombios_encoders.c144
-rw-r--r--drivers/gpu/drm/radeon/atombios_i2c.c2
-rw-r--r--drivers/gpu/drm/radeon/btc_dpm.c90
-rw-r--r--drivers/gpu/drm/radeon/cayman_blit_shaders.c320
-rw-r--r--drivers/gpu/drm/radeon/cayman_blit_shaders.h294
-rw-r--r--drivers/gpu/drm/radeon/ci_dpm.c113
-rw-r--r--drivers/gpu/drm/radeon/ci_dpm.h6
-rw-r--r--drivers/gpu/drm/radeon/cik.c136
-rw-r--r--drivers/gpu/drm/radeon/cik_blit_shaders.c246
-rw-r--r--drivers/gpu/drm/radeon/cik_blit_shaders.h219
-rw-r--r--drivers/gpu/drm/radeon/clearstate_cayman.h9
-rw-r--r--drivers/gpu/drm/radeon/clearstate_ci.h3
-rw-r--r--drivers/gpu/drm/radeon/clearstate_evergreen.h8
-rw-r--r--drivers/gpu/drm/radeon/clearstate_si.h3
-rw-r--r--drivers/gpu/drm/radeon/cypress_dpm.c8
-rw-r--r--drivers/gpu/drm/radeon/dce3_1_afmt.c1
-rw-r--r--drivers/gpu/drm/radeon/dce6_afmt.c3
-rw-r--r--drivers/gpu/drm/radeon/evergreen.c43
-rw-r--r--drivers/gpu/drm/radeon/evergreen_blit_shaders.c303
-rw-r--r--drivers/gpu/drm/radeon/evergreen_blit_shaders.h278
-rw-r--r--drivers/gpu/drm/radeon/evergreen_cs.c657
-rw-r--r--drivers/gpu/drm/radeon/evergreen_hdmi.c6
-rw-r--r--drivers/gpu/drm/radeon/evergreen_reg.h10
-rw-r--r--drivers/gpu/drm/radeon/evergreen_smc.h9
-rw-r--r--drivers/gpu/drm/radeon/kv_dpm.c11
-rw-r--r--drivers/gpu/drm/radeon/kv_smc.c2
-rw-r--r--drivers/gpu/drm/radeon/ni.c37
-rw-r--r--drivers/gpu/drm/radeon/ni_dpm.c19
-rw-r--r--drivers/gpu/drm/radeon/ni_dpm.h12
-rw-r--r--drivers/gpu/drm/radeon/nislands_smc.h51
-rw-r--r--drivers/gpu/drm/radeon/pptable.h12
-rw-r--r--drivers/gpu/drm/radeon/r100.c325
-rw-r--r--drivers/gpu/drm/radeon/r200.c34
-rw-r--r--drivers/gpu/drm/radeon/r300.c85
-rw-r--r--drivers/gpu/drm/radeon/r300_reg.h4
-rw-r--r--drivers/gpu/drm/radeon/r420.c9
-rw-r--r--drivers/gpu/drm/radeon/r520.c2
-rw-r--r--drivers/gpu/drm/radeon/r600.c24
-rw-r--r--drivers/gpu/drm/radeon/r600_blit_shaders.c719
-rw-r--r--drivers/gpu/drm/radeon/r600_blit_shaders.h38
-rw-r--r--drivers/gpu/drm/radeon/r600_cs.c459
-rw-r--r--drivers/gpu/drm/radeon/r600_dpm.c10
-rw-r--r--drivers/gpu/drm/radeon/r600_dpm.h3
-rw-r--r--drivers/gpu/drm/radeon/r600_hdmi.c24
-rw-r--r--drivers/gpu/drm/radeon/radeon.h102
-rw-r--r--drivers/gpu/drm/radeon/radeon_acpi.c15
-rw-r--r--drivers/gpu/drm/radeon/radeon_acpi.h9
-rw-r--r--drivers/gpu/drm/radeon/radeon_agp.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.c9
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.h2
-rw-r--r--drivers/gpu/drm/radeon/radeon_atombios.c104
-rw-r--r--drivers/gpu/drm/radeon/radeon_atpx_handler.c26
-rw-r--r--drivers/gpu/drm/radeon/radeon_audio.c151
-rw-r--r--drivers/gpu/drm/radeon/radeon_audio.h10
-rw-r--r--drivers/gpu/drm/radeon/radeon_bios.c39
-rw-r--r--drivers/gpu/drm/radeon/radeon_combios.c49
-rw-r--r--drivers/gpu/drm/radeon/radeon_connectors.c169
-rw-r--r--drivers/gpu/drm/radeon/radeon_cs.c62
-rw-r--r--drivers/gpu/drm/radeon/radeon_device.c89
-rw-r--r--drivers/gpu/drm/radeon/radeon_display.c120
-rw-r--r--drivers/gpu/drm/radeon/radeon_dp_auxch.c5
-rw-r--r--drivers/gpu/drm/radeon/radeon_dp_mst.c778
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.c234
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.h16
-rw-r--r--drivers/gpu/drm/radeon/radeon_encoders.c55
-rw-r--r--drivers/gpu/drm/radeon/radeon_fb.c399
-rw-r--r--drivers/gpu/drm/radeon/radeon_fbdev.c293
-rw-r--r--drivers/gpu/drm/radeon/radeon_fence.c163
-rw-r--r--drivers/gpu/drm/radeon/radeon_gart.c45
-rw-r--r--drivers/gpu/drm/radeon/radeon_gem.c109
-rw-r--r--drivers/gpu/drm/radeon/radeon_i2c.c3
-rw-r--r--drivers/gpu/drm/radeon/radeon_ib.c18
-rw-r--r--drivers/gpu/drm/radeon/radeon_irq_kms.c17
-rw-r--r--drivers/gpu/drm/radeon/radeon_kms.c90
-rw-r--r--drivers/gpu/drm/radeon/radeon_legacy_crtc.c6
-rw-r--r--drivers/gpu/drm/radeon/radeon_legacy_encoders.c50
-rw-r--r--drivers/gpu/drm/radeon/radeon_legacy_tv.c7
-rw-r--r--drivers/gpu/drm/radeon/radeon_mn.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_mode.h95
-rw-r--r--drivers/gpu/drm/radeon/radeon_object.c102
-rw-r--r--drivers/gpu/drm/radeon/radeon_object.h31
-rw-r--r--drivers/gpu/drm/radeon/radeon_pm.c38
-rw-r--r--drivers/gpu/drm/radeon/radeon_prime.c29
-rw-r--r--drivers/gpu/drm/radeon/radeon_ring.c8
-rw-r--r--drivers/gpu/drm/radeon/radeon_sa.c316
-rw-r--r--drivers/gpu/drm/radeon/radeon_semaphore.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_sync.c22
-rw-r--r--drivers/gpu/drm/radeon/radeon_test.c12
-rw-r--r--drivers/gpu/drm/radeon/radeon_trace.h2
-rw-r--r--drivers/gpu/drm/radeon/radeon_ttm.c104
-rw-r--r--drivers/gpu/drm/radeon/radeon_uvd.c19
-rw-r--r--drivers/gpu/drm/radeon/radeon_vce.c14
-rw-r--r--drivers/gpu/drm/radeon/radeon_vm.c20
-rw-r--r--drivers/gpu/drm/radeon/rs400.c31
-rw-r--r--drivers/gpu/drm/radeon/rs600.c18
-rw-r--r--drivers/gpu/drm/radeon/rs690.c2
-rw-r--r--drivers/gpu/drm/radeon/rv515.c12
-rw-r--r--drivers/gpu/drm/radeon/rv6xx_dpm.h3
-rw-r--r--drivers/gpu/drm/radeon/rv740_dpm.c8
-rw-r--r--drivers/gpu/drm/radeon/rv770.c38
-rw-r--r--drivers/gpu/drm/radeon/rv770_dpm.c4
-rw-r--r--drivers/gpu/drm/radeon/rv770_smc.c36
-rw-r--r--drivers/gpu/drm/radeon/rv770_smc.h27
-rw-r--r--drivers/gpu/drm/radeon/si.c152
-rw-r--r--drivers/gpu/drm/radeon/si_blit_shaders.c253
-rw-r--r--drivers/gpu/drm/radeon/si_blit_shaders.h223
-rw-r--r--drivers/gpu/drm/radeon/si_dpm.c132
-rw-r--r--drivers/gpu/drm/radeon/si_dpm.h21
-rw-r--r--drivers/gpu/drm/radeon/sid.h2
-rw-r--r--drivers/gpu/drm/radeon/sislands_smc.h51
-rw-r--r--drivers/gpu/drm/radeon/smu7.h6
-rw-r--r--drivers/gpu/drm/radeon/smu7_discrete.h51
-rw-r--r--drivers/gpu/drm/radeon/smu7_fusion.h42
-rw-r--r--drivers/gpu/drm/radeon/sumo_dpm.c24
-rw-r--r--drivers/gpu/drm/radeon/trinity_dpm.c26
-rw-r--r--drivers/gpu/drm/radeon/trinity_dpm.h3
-rw-r--r--drivers/gpu/drm/radeon/uvd_v1_0.c2
-rw-r--r--drivers/gpu/drm/rcar-du/Kconfig52
-rw-r--r--drivers/gpu/drm/rcar-du/Makefile28
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_of.c323
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_of.h20
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7790.dts69
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7791.dts43
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7793.dts43
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7795.dts43
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7796.dts43
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_lvds.h37
-rw-r--r--drivers/gpu/drm/renesas/Kconfig5
-rw-r--r--drivers/gpu/drm/renesas/Makefile5
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/Kconfig77
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/Makefile16
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_cmm.c (renamed from drivers/gpu/drm/rcar-du/rcar_cmm.c)11
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_cmm.h (renamed from drivers/gpu/drm/rcar-du/rcar_cmm.h)2
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_crtc.c (renamed from drivers/gpu/drm/rcar-du/rcar_du_crtc.c)129
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_crtc.h (renamed from drivers/gpu/drm/rcar-du/rcar_du_crtc.h)2
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_drv.c (renamed from drivers/gpu/drm/rcar-du/rcar_du_drv.c)127
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_drv.h (renamed from drivers/gpu/drm/rcar-du/rcar_du_drv.h)7
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_encoder.c (renamed from drivers/gpu/drm/rcar-du/rcar_du_encoder.c)16
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_encoder.h (renamed from drivers/gpu/drm/rcar-du/rcar_du_encoder.h)2
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_group.c (renamed from drivers/gpu/drm/rcar-du/rcar_du_group.c)66
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_group.h (renamed from drivers/gpu/drm/rcar-du/rcar_du_group.h)2
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_kms.c (renamed from drivers/gpu/drm/rcar-du/rcar_du_kms.c)102
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_kms.h (renamed from drivers/gpu/drm/rcar-du/rcar_du_kms.h)2
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_plane.c (renamed from drivers/gpu/drm/rcar-du/rcar_du_plane.c)75
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_plane.h (renamed from drivers/gpu/drm/rcar-du/rcar_du_plane.h)4
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_regs.h (renamed from drivers/gpu/drm/rcar-du/rcar_du_regs.h)35
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_vsp.c (renamed from drivers/gpu/drm/rcar-du/rcar_du_vsp.c)142
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_vsp.h (renamed from drivers/gpu/drm/rcar-du/rcar_du_vsp.h)2
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_writeback.c (renamed from drivers/gpu/drm/rcar-du/rcar_du_writeback.c)12
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_du_writeback.h (renamed from drivers/gpu/drm/rcar-du/rcar_du_writeback.h)2
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_dw_hdmi.c (renamed from drivers/gpu/drm/rcar-du/rcar_dw_hdmi.c)4
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_lvds.c (renamed from drivers/gpu/drm/rcar-du/rcar_lvds.c)264
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_lvds.h41
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_lvds_regs.h (renamed from drivers/gpu/drm/rcar-du/rcar_lvds_regs.h)2
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c1350
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.h31
-rw-r--r--drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h319
-rw-r--r--drivers/gpu/drm/renesas/rz-du/Kconfig28
-rw-r--r--drivers/gpu/drm/renesas/rz-du/Makefile10
-rw-r--r--drivers/gpu/drm/renesas/rz-du/rzg2l_du_crtc.c422
-rw-r--r--drivers/gpu/drm/renesas/rz-du/rzg2l_du_crtc.h89
-rw-r--r--drivers/gpu/drm/renesas/rz-du/rzg2l_du_drv.c199
-rw-r--r--drivers/gpu/drm/renesas/rz-du/rzg2l_du_drv.h78
-rw-r--r--drivers/gpu/drm/renesas/rz-du/rzg2l_du_encoder.c126
-rw-r--r--drivers/gpu/drm/renesas/rz-du/rzg2l_du_encoder.h32
-rw-r--r--drivers/gpu/drm/renesas/rz-du/rzg2l_du_kms.c479
-rw-r--r--drivers/gpu/drm/renesas/rz-du/rzg2l_du_kms.h42
-rw-r--r--drivers/gpu/drm/renesas/rz-du/rzg2l_du_vsp.c355
-rw-r--r--drivers/gpu/drm/renesas/rz-du/rzg2l_du_vsp.h82
-rw-r--r--drivers/gpu/drm/renesas/rz-du/rzg2l_mipi_dsi.c1084
-rw-r--r--drivers/gpu/drm/renesas/rz-du/rzg2l_mipi_dsi_regs.h203
-rw-r--r--drivers/gpu/drm/renesas/shmobile/Kconfig16
-rw-r--r--drivers/gpu/drm/renesas/shmobile/Makefile7
-rw-r--r--drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.c622
-rw-r--r--drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.h46
-rw-r--r--drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.c297
-rw-r--r--drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.h52
-rw-r--r--drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.c189
-rw-r--r--drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.h32
-rw-r--r--drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.c328
-rw-r--r--drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.h20
-rw-r--r--drivers/gpu/drm/renesas/shmobile/shmob_drm_regs.h (renamed from drivers/gpu/drm/shmobile/shmob_drm_regs.h)0
-rw-r--r--drivers/gpu/drm/rockchip/Kconfig59
-rw-r--r--drivers/gpu/drm/rockchip/Makefile8
-rw-r--r--drivers/gpu/drm/rockchip/analogix_dp-rockchip.c312
-rw-r--r--drivers/gpu/drm/rockchip/cdn-dp-core.c327
-rw-r--r--drivers/gpu/drm/rockchip/cdn-dp-core.h12
-rw-r--r--drivers/gpu/drm/rockchip/cdn-dp-reg.c4
-rw-r--r--drivers/gpu/drm/rockchip/cdn-dp-reg.h4
-rw-r--r--drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c356
-rw-r--r--drivers/gpu/drm/rockchip/dw-mipi-dsi2-rockchip.c508
-rw-r--r--drivers/gpu/drm/rockchip/dw_dp-rockchip.c150
-rw-r--r--drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c379
-rw-r--r--drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c669
-rw-r--r--drivers/gpu/drm/rockchip/inno_hdmi.c1033
-rw-r--r--drivers/gpu/drm/rockchip/inno_hdmi.h354
-rw-r--r--drivers/gpu/drm/rockchip/rk3066_hdmi.c364
-rw-r--r--drivers/gpu/drm/rockchip/rk3066_hdmi.h2
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_drv.c211
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_drv.h53
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_fb.c70
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_fb.h8
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_fbdev.c163
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h24
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_gem.c97
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_gem.h14
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_vop.c304
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_vop.h39
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_vop2.c2811
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_vop2.h866
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_lvds.c186
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_lvds.h23
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_rgb.c26
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_rgb.h8
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_vop2_reg.c2612
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_vop_reg.c254
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_vop_reg.h6
-rw-r--r--drivers/gpu/drm/savage/Makefile9
-rw-r--r--drivers/gpu/drm/savage/savage_bci.c1082
-rw-r--r--drivers/gpu/drm/savage/savage_drv.c91
-rw-r--r--drivers/gpu/drm/savage/savage_drv.h580
-rw-r--r--drivers/gpu/drm/savage/savage_state.c1169
-rw-r--r--drivers/gpu/drm/scheduler/.kunitconfig12
-rw-r--r--drivers/gpu/drm/scheduler/Makefile2
-rw-r--r--drivers/gpu/drm/scheduler/gpu_scheduler_trace.h140
-rw-r--r--drivers/gpu/drm/scheduler/sched_entity.c444
-rw-r--r--drivers/gpu/drm/scheduler/sched_fence.c74
-rw-r--r--drivers/gpu/drm/scheduler/sched_internal.h91
-rw-r--r--drivers/gpu/drm/scheduler/sched_main.c1089
-rw-r--r--drivers/gpu/drm/scheduler/tests/Makefile7
-rw-r--r--drivers/gpu/drm/scheduler/tests/mock_scheduler.c370
-rw-r--r--drivers/gpu/drm/scheduler/tests/sched_tests.h224
-rw-r--r--drivers/gpu/drm/scheduler/tests/tests_basic.c563
-rw-r--r--drivers/gpu/drm/selftests/Makefile7
-rw-r--r--drivers/gpu/drm/selftests/drm_cmdline_selftests.h68
-rw-r--r--drivers/gpu/drm/selftests/drm_mm_selftests.h28
-rw-r--r--drivers/gpu/drm/selftests/drm_modeset_selftests.h40
-rw-r--r--drivers/gpu/drm/selftests/drm_selftest.c109
-rw-r--r--drivers/gpu/drm/selftests/drm_selftest.h41
-rw-r--r--drivers/gpu/drm/selftests/test-drm_cmdline_parser.c1141
-rw-r--r--drivers/gpu/drm/selftests/test-drm_damage_helper.c667
-rw-r--r--drivers/gpu/drm/selftests/test-drm_dp_mst_helper.c273
-rw-r--r--drivers/gpu/drm/selftests/test-drm_format.c280
-rw-r--r--drivers/gpu/drm/selftests/test-drm_framebuffer.c351
-rw-r--r--drivers/gpu/drm/selftests/test-drm_mm.c2487
-rw-r--r--drivers/gpu/drm/selftests/test-drm_modeset_common.c32
-rw-r--r--drivers/gpu/drm/selftests/test-drm_modeset_common.h52
-rw-r--r--drivers/gpu/drm/selftests/test-drm_plane_helper.c219
-rw-r--r--drivers/gpu/drm/selftests/test-drm_rect.c223
-rw-r--r--drivers/gpu/drm/shmobile/Kconfig13
-rw-r--r--drivers/gpu/drm/shmobile/Makefile8
-rw-r--r--drivers/gpu/drm/shmobile/shmob_drm_backlight.c86
-rw-r--r--drivers/gpu/drm/shmobile/shmob_drm_backlight.h19
-rw-r--r--drivers/gpu/drm/shmobile/shmob_drm_crtc.c683
-rw-r--r--drivers/gpu/drm/shmobile/shmob_drm_crtc.h55
-rw-r--r--drivers/gpu/drm/shmobile/shmob_drm_drv.c302
-rw-r--r--drivers/gpu/drm/shmobile/shmob_drm_drv.h42
-rw-r--r--drivers/gpu/drm/shmobile/shmob_drm_kms.c150
-rw-r--r--drivers/gpu/drm/shmobile/shmob_drm_kms.h29
-rw-r--r--drivers/gpu/drm/shmobile/shmob_drm_plane.c259
-rw-r--r--drivers/gpu/drm/shmobile/shmob_drm_plane.h19
-rw-r--r--drivers/gpu/drm/sis/Makefile10
-rw-r--r--drivers/gpu/drm/sis/sis_drv.c143
-rw-r--r--drivers/gpu/drm/sis/sis_drv.h80
-rw-r--r--drivers/gpu/drm/sis/sis_mm.c363
-rw-r--r--drivers/gpu/drm/sitronix/Kconfig42
-rw-r--r--drivers/gpu/drm/sitronix/Makefile3
-rw-r--r--drivers/gpu/drm/sitronix/st7571-i2c.c1083
-rw-r--r--drivers/gpu/drm/sitronix/st7586.c (renamed from drivers/gpu/drm/tiny/st7586.c)73
-rw-r--r--drivers/gpu/drm/sitronix/st7735r.c (renamed from drivers/gpu/drm/tiny/st7735r.c)23
-rw-r--r--drivers/gpu/drm/solomon/Kconfig32
-rw-r--r--drivers/gpu/drm/solomon/Makefile3
-rw-r--r--drivers/gpu/drm/solomon/ssd130x-i2c.c126
-rw-r--r--drivers/gpu/drm/solomon/ssd130x-spi.c193
-rw-r--r--drivers/gpu/drm/solomon/ssd130x.c2040
-rw-r--r--drivers/gpu/drm/solomon/ssd130x.h113
-rw-r--r--drivers/gpu/drm/sprd/Kconfig12
-rw-r--r--drivers/gpu/drm/sprd/Makefile8
-rw-r--r--drivers/gpu/drm/sprd/megacores_pll.c305
-rw-r--r--drivers/gpu/drm/sprd/sprd_dpu.c872
-rw-r--r--drivers/gpu/drm/sprd/sprd_dpu.h109
-rw-r--r--drivers/gpu/drm/sprd/sprd_drm.c199
-rw-r--r--drivers/gpu/drm/sprd/sprd_drm.h19
-rw-r--r--drivers/gpu/drm/sprd/sprd_dsi.c1066
-rw-r--r--drivers/gpu/drm/sprd/sprd_dsi.h126
-rw-r--r--drivers/gpu/drm/sti/Kconfig6
-rw-r--r--drivers/gpu/drm/sti/Makefile2
-rw-r--r--drivers/gpu/drm/sti/sti_compositor.c18
-rw-r--r--drivers/gpu/drm/sti/sti_crtc.c1
-rw-r--r--drivers/gpu/drm/sti/sti_cursor.c21
-rw-r--r--drivers/gpu/drm/sti/sti_drv.c54
-rw-r--r--drivers/gpu/drm/sti/sti_dvo.c57
-rw-r--r--drivers/gpu/drm/sti/sti_gdp.c28
-rw-r--r--drivers/gpu/drm/sti/sti_hda.c67
-rw-r--r--drivers/gpu/drm/sti/sti_hdmi.c119
-rw-r--r--drivers/gpu/drm/sti/sti_hdmi.h4
-rw-r--r--drivers/gpu/drm/sti/sti_hqvdp.c46
-rw-r--r--drivers/gpu/drm/sti/sti_mixer.c2
-rw-r--r--drivers/gpu/drm/sti/sti_plane.c12
-rw-r--r--drivers/gpu/drm/sti/sti_plane.h2
-rw-r--r--drivers/gpu/drm/sti/sti_tvout.c18
-rw-r--r--drivers/gpu/drm/sti/sti_vtg.c22
-rw-r--r--drivers/gpu/drm/stm/Kconfig18
-rw-r--r--drivers/gpu/drm/stm/Makefile2
-rw-r--r--drivers/gpu/drm/stm/drv.c60
-rw-r--r--drivers/gpu/drm/stm/dw_mipi_dsi-stm.c403
-rw-r--r--drivers/gpu/drm/stm/ltdc.c1374
-rw-r--r--drivers/gpu/drm/stm/ltdc.h29
-rw-r--r--drivers/gpu/drm/stm/lvds.c1222
-rw-r--r--drivers/gpu/drm/sun4i/Kconfig35
-rw-r--r--drivers/gpu/drm/sun4i/Makefile2
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_backend.c70
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_crtc.c3
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_dotclock.c206
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_drv.c62
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_framebuffer.c1
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_frontend.c36
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_frontend.h1
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_hdmi.h1
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_hdmi_ddc_clk.c12
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c208
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c7
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_layer.c20
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_tcon.c84
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_tcon.h1
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_tcon_dclk.c208
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_tcon_dclk.h (renamed from drivers/gpu/drm/sun4i/sun4i_dotclock.h)0
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_tv.c199
-rw-r--r--drivers/gpu/drm/sun4i/sun6i_drc.c4
-rw-r--r--drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c82
-rw-r--r--drivers/gpu/drm/sun4i/sun6i_mipi_dsi.h7
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_csc.c120
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_csc.h17
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c62
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h11
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c251
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_mixer.c435
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_mixer.h118
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_tcon_top.c24
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_ui_layer.c314
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_ui_layer.h25
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_ui_scaler.c44
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_ui_scaler.h4
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_vi_layer.c345
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_vi_layer.h25
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_vi_scaler.c49
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_vi_scaler.h6
-rw-r--r--drivers/gpu/drm/sun4i/sunxi_engine.h40
-rw-r--r--drivers/gpu/drm/sysfb/Kconfig76
-rw-r--r--drivers/gpu/drm/sysfb/Makefile12
-rw-r--r--drivers/gpu/drm/sysfb/drm_sysfb.c35
-rw-r--r--drivers/gpu/drm/sysfb/drm_sysfb_helper.h218
-rw-r--r--drivers/gpu/drm/sysfb/drm_sysfb_modeset.c608
-rw-r--r--drivers/gpu/drm/sysfb/drm_sysfb_screen_info.c104
-rw-r--r--drivers/gpu/drm/sysfb/efidrm.c390
-rw-r--r--drivers/gpu/drm/sysfb/ofdrm.c1145
-rw-r--r--drivers/gpu/drm/sysfb/simpledrm.c885
-rw-r--r--drivers/gpu/drm/sysfb/vesadrm.c652
-rw-r--r--drivers/gpu/drm/tdfx/Makefile8
-rw-r--r--drivers/gpu/drm/tdfx/tdfx_drv.c90
-rw-r--r--drivers/gpu/drm/tdfx/tdfx_drv.h47
-rw-r--r--drivers/gpu/drm/tegra/Kconfig12
-rw-r--r--drivers/gpu/drm/tegra/Makefile7
-rw-r--r--drivers/gpu/drm/tegra/dc.c304
-rw-r--r--drivers/gpu/drm/tegra/dc.h12
-rw-r--r--drivers/gpu/drm/tegra/dp.c80
-rw-r--r--drivers/gpu/drm/tegra/dp.h2
-rw-r--r--drivers/gpu/drm/tegra/dpaux.c53
-rw-r--r--drivers/gpu/drm/tegra/drm.c119
-rw-r--r--drivers/gpu/drm/tegra/drm.h55
-rw-r--r--drivers/gpu/drm/tegra/dsi.c126
-rw-r--r--drivers/gpu/drm/tegra/falcon.c30
-rw-r--r--drivers/gpu/drm/tegra/falcon.h2
-rw-r--r--drivers/gpu/drm/tegra/fb.c253
-rw-r--r--drivers/gpu/drm/tegra/fbdev.c143
-rw-r--r--drivers/gpu/drm/tegra/firewall.c3
-rw-r--r--drivers/gpu/drm/tegra/gem.c294
-rw-r--r--drivers/gpu/drm/tegra/gem.h21
-rw-r--r--drivers/gpu/drm/tegra/gr2d.c156
-rw-r--r--drivers/gpu/drm/tegra/gr3d.c332
-rw-r--r--drivers/gpu/drm/tegra/hdmi.c315
-rw-r--r--drivers/gpu/drm/tegra/hub.c54
-rw-r--r--drivers/gpu/drm/tegra/hub.h4
-rw-r--r--drivers/gpu/drm/tegra/nvdec.c578
-rw-r--r--drivers/gpu/drm/tegra/nvjpg.c330
-rw-r--r--drivers/gpu/drm/tegra/output.c59
-rw-r--r--drivers/gpu/drm/tegra/plane.c157
-rw-r--r--drivers/gpu/drm/tegra/plane.h4
-rw-r--r--drivers/gpu/drm/tegra/rgb.c104
-rw-r--r--drivers/gpu/drm/tegra/riscv.c106
-rw-r--r--drivers/gpu/drm/tegra/riscv.h30
-rw-r--r--drivers/gpu/drm/tegra/sor.c82
-rw-r--r--drivers/gpu/drm/tegra/submit.c131
-rw-r--r--drivers/gpu/drm/tegra/uapi.c112
-rw-r--r--drivers/gpu/drm/tegra/uapi.h5
-rw-r--r--drivers/gpu/drm/tegra/vic.c192
-rw-r--r--drivers/gpu/drm/tests/.kunitconfig5
-rw-r--r--drivers/gpu/drm/tests/Makefile30
-rw-r--r--drivers/gpu/drm/tests/drm_atomic_state_test.c379
-rw-r--r--drivers/gpu/drm/tests/drm_atomic_test.c153
-rw-r--r--drivers/gpu/drm/tests/drm_bridge_test.c521
-rw-r--r--drivers/gpu/drm/tests/drm_buddy_test.c893
-rw-r--r--drivers/gpu/drm/tests/drm_client_modeset_test.c200
-rw-r--r--drivers/gpu/drm/tests/drm_cmdline_parser_test.c1079
-rw-r--r--drivers/gpu/drm/tests/drm_connector_test.c1818
-rw-r--r--drivers/gpu/drm/tests/drm_damage_helper_test.c640
-rw-r--r--drivers/gpu/drm/tests/drm_dp_mst_helper_test.c577
-rw-r--r--drivers/gpu/drm/tests/drm_exec_test.c222
-rw-r--r--drivers/gpu/drm/tests/drm_fixp_test.c71
-rw-r--r--drivers/gpu/drm/tests/drm_format_helper_test.c1740
-rw-r--r--drivers/gpu/drm/tests/drm_format_test.c360
-rw-r--r--drivers/gpu/drm/tests/drm_framebuffer_test.c725
-rw-r--r--drivers/gpu/drm/tests/drm_gem_shmem_test.c385
-rw-r--r--drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c2334
-rw-r--r--drivers/gpu/drm/tests/drm_kunit_edid.h864
-rw-r--r--drivers/gpu/drm/tests/drm_kunit_helpers.c400
-rw-r--r--drivers/gpu/drm/tests/drm_managed_test.c117
-rw-r--r--drivers/gpu/drm/tests/drm_mm_test.c361
-rw-r--r--drivers/gpu/drm/tests/drm_modes_test.c208
-rw-r--r--drivers/gpu/drm/tests/drm_plane_helper_test.c319
-rw-r--r--drivers/gpu/drm/tests/drm_probe_helper_test.c217
-rw-r--r--drivers/gpu/drm/tests/drm_rect_test.c530
-rw-r--r--drivers/gpu/drm/tests/drm_sysfb_modeset_test.c168
-rw-r--r--drivers/gpu/drm/tidss/Kconfig6
-rw-r--r--drivers/gpu/drm/tidss/Makefile3
-rw-r--r--drivers/gpu/drm/tidss/tidss_crtc.c76
-rw-r--r--drivers/gpu/drm/tidss/tidss_dispc.c935
-rw-r--r--drivers/gpu/drm/tidss/tidss_dispc.h42
-rw-r--r--drivers/gpu/drm/tidss/tidss_dispc_regs.h107
-rw-r--r--drivers/gpu/drm/tidss/tidss_drv.c86
-rw-r--r--drivers/gpu/drm/tidss/tidss_drv.h14
-rw-r--r--drivers/gpu/drm/tidss/tidss_encoder.c147
-rw-r--r--drivers/gpu/drm/tidss/tidss_encoder.h5
-rw-r--r--drivers/gpu/drm/tidss/tidss_irq.c84
-rw-r--r--drivers/gpu/drm/tidss/tidss_irq.h4
-rw-r--r--drivers/gpu/drm/tidss/tidss_kms.c34
-rw-r--r--drivers/gpu/drm/tidss/tidss_oldi.c619
-rw-r--r--drivers/gpu/drm/tidss/tidss_oldi.h43
-rw-r--r--drivers/gpu/drm/tidss/tidss_plane.c59
-rw-r--r--drivers/gpu/drm/tidss/tidss_plane.h4
-rw-r--r--drivers/gpu/drm/tidss/tidss_scale_coefs.h2
-rw-r--r--drivers/gpu/drm/tilcdc/Kconfig4
-rw-r--r--drivers/gpu/drm/tilcdc/tilcdc_crtc.c24
-rw-r--r--drivers/gpu/drm/tilcdc/tilcdc_drv.c67
-rw-r--r--drivers/gpu/drm/tilcdc/tilcdc_external.c8
-rw-r--r--drivers/gpu/drm/tilcdc/tilcdc_panel.c13
-rw-r--r--drivers/gpu/drm/tilcdc/tilcdc_plane.c14
-rw-r--r--drivers/gpu/drm/tiny/Kconfig139
-rw-r--r--drivers/gpu/drm/tiny/Makefile10
-rw-r--r--drivers/gpu/drm/tiny/appletbdrm.c834
-rw-r--r--drivers/gpu/drm/tiny/arcpgu.c37
-rw-r--r--drivers/gpu/drm/tiny/bochs.c479
-rw-r--r--drivers/gpu/drm/tiny/cirrus-qemu.c675
-rw-r--r--drivers/gpu/drm/tiny/cirrus.c653
-rw-r--r--drivers/gpu/drm/tiny/gm12u320.c103
-rw-r--r--drivers/gpu/drm/tiny/hx8357d.c22
-rw-r--r--drivers/gpu/drm/tiny/ili9163.c222
-rw-r--r--drivers/gpu/drm/tiny/ili9225.c67
-rw-r--r--drivers/gpu/drm/tiny/ili9341.c22
-rw-r--r--drivers/gpu/drm/tiny/ili9486.c41
-rw-r--r--drivers/gpu/drm/tiny/mi0283qt.c23
-rw-r--r--drivers/gpu/drm/tiny/panel-mipi-dbi.c458
-rw-r--r--drivers/gpu/drm/tiny/pixpaper.c1166
-rw-r--r--drivers/gpu/drm/tiny/repaper.c91
-rw-r--r--drivers/gpu/drm/tiny/sharp-memory.c669
-rw-r--r--drivers/gpu/drm/tiny/simpledrm.c901
-rw-r--r--drivers/gpu/drm/ttm/Makefile3
-rw-r--r--drivers/gpu/drm/ttm/tests/.kunitconfig3
-rw-r--r--drivers/gpu/drm/ttm/tests/Makefile11
-rw-r--r--drivers/gpu/drm/ttm/tests/TODO27
-rw-r--r--drivers/gpu/drm/ttm/tests/ttm_bo_test.c637
-rw-r--r--drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c1176
-rw-r--r--drivers/gpu/drm/ttm/tests/ttm_device_test.c206
-rw-r--r--drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c304
-rw-r--r--drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.h52
-rw-r--r--drivers/gpu/drm/ttm/tests/ttm_mock_manager.c238
-rw-r--r--drivers/gpu/drm/ttm/tests/ttm_mock_manager.h30
-rw-r--r--drivers/gpu/drm/ttm/tests/ttm_pool_test.c437
-rw-r--r--drivers/gpu/drm/ttm/tests/ttm_resource_test.c337
-rw-r--r--drivers/gpu/drm/ttm/tests/ttm_tt_test.c402
-rw-r--r--drivers/gpu/drm/ttm/ttm_agp_backend.c3
-rw-r--r--drivers/gpu/drm/ttm/ttm_backup.c182
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo.c1449
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_internal.h60
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_util.c664
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_vm.c134
-rw-r--r--drivers/gpu/drm/ttm/ttm_device.c171
-rw-r--r--drivers/gpu/drm/ttm/ttm_execbuf_util.c29
-rw-r--r--drivers/gpu/drm/ttm/ttm_module.c7
-rw-r--r--drivers/gpu/drm/ttm/ttm_pool.c900
-rw-r--r--drivers/gpu/drm/ttm/ttm_pool_internal.h25
-rw-r--r--drivers/gpu/drm/ttm/ttm_range_manager.c55
-rw-r--r--drivers/gpu/drm/ttm/ttm_resource.c723
-rw-r--r--drivers/gpu/drm/ttm/ttm_sys_manager.c3
-rw-r--r--drivers/gpu/drm/ttm/ttm_tt.c165
-rw-r--r--drivers/gpu/drm/tve200/Kconfig5
-rw-r--r--drivers/gpu/drm/tve200/tve200_display.c14
-rw-r--r--drivers/gpu/drm/tve200/tve200_drv.c41
-rw-r--r--drivers/gpu/drm/tyr/Kconfig19
-rw-r--r--drivers/gpu/drm/tyr/Makefile3
-rw-r--r--drivers/gpu/drm/tyr/driver.rs205
-rw-r--r--drivers/gpu/drm/tyr/file.rs56
-rw-r--r--drivers/gpu/drm/tyr/gem.rs18
-rw-r--r--drivers/gpu/drm/tyr/gpu.rs219
-rw-r--r--drivers/gpu/drm/tyr/regs.rs108
-rw-r--r--drivers/gpu/drm/tyr/tyr.rs22
-rw-r--r--drivers/gpu/drm/udl/Kconfig1
-rw-r--r--drivers/gpu/drm/udl/Makefile8
-rw-r--r--drivers/gpu/drm/udl/udl_connector.c138
-rw-r--r--drivers/gpu/drm/udl/udl_connector.h15
-rw-r--r--drivers/gpu/drm/udl/udl_drv.c39
-rw-r--r--drivers/gpu/drm/udl/udl_drv.h54
-rw-r--r--drivers/gpu/drm/udl/udl_edid.c81
-rw-r--r--drivers/gpu/drm/udl/udl_edid.h15
-rw-r--r--drivers/gpu/drm/udl/udl_main.c311
-rw-r--r--drivers/gpu/drm/udl/udl_modeset.c553
-rw-r--r--drivers/gpu/drm/udl/udl_proto.h68
-rw-r--r--drivers/gpu/drm/udl/udl_transfer.c61
-rw-r--r--drivers/gpu/drm/v3d/Kconfig5
-rw-r--r--drivers/gpu/drm/v3d/Makefile5
-rw-r--r--drivers/gpu/drm/v3d/v3d_bo.c124
-rw-r--r--drivers/gpu/drm/v3d/v3d_debugfs.c247
-rw-r--r--drivers/gpu/drm/v3d/v3d_drv.c225
-rw-r--r--drivers/gpu/drm/v3d/v3d_drv.h278
-rw-r--r--drivers/gpu/drm/v3d/v3d_fence.c11
-rw-r--r--drivers/gpu/drm/v3d/v3d_gem.c897
-rw-r--r--drivers/gpu/drm/v3d/v3d_gemfs.c62
-rw-r--r--drivers/gpu/drm/v3d/v3d_irq.c202
-rw-r--r--drivers/gpu/drm/v3d/v3d_mmu.c95
-rw-r--r--drivers/gpu/drm/v3d/v3d_perfmon.c319
-rw-r--r--drivers/gpu/drm/v3d/v3d_performance_counters.h33
-rw-r--r--drivers/gpu/drm/v3d/v3d_regs.h141
-rw-r--r--drivers/gpu/drm/v3d/v3d_sched.c700
-rw-r--r--drivers/gpu/drm/v3d/v3d_submit.c1407
-rw-r--r--drivers/gpu/drm/v3d/v3d_sysfs.c66
-rw-r--r--drivers/gpu/drm/v3d/v3d_trace.h57
-rw-r--r--drivers/gpu/drm/vboxvideo/Kconfig1
-rw-r--r--drivers/gpu/drm/vboxvideo/hgsmi_base.c47
-rw-r--r--drivers/gpu/drm/vboxvideo/vbox_drv.c57
-rw-r--r--drivers/gpu/drm/vboxvideo/vbox_drv.h1
-rw-r--r--drivers/gpu/drm/vboxvideo/vbox_irq.c1
-rw-r--r--drivers/gpu/drm/vboxvideo/vbox_main.c31
-rw-r--r--drivers/gpu/drm/vboxvideo/vbox_mode.c31
-rw-r--r--drivers/gpu/drm/vboxvideo/vbox_ttm.c1
-rw-r--r--drivers/gpu/drm/vboxvideo/vboxvideo.h6
-rw-r--r--drivers/gpu/drm/vboxvideo/vboxvideo_guest.h2
-rw-r--r--drivers/gpu/drm/vc4/Kconfig30
-rw-r--r--drivers/gpu/drm/vc4/Makefile7
-rw-r--r--drivers/gpu/drm/vc4/tests/.kunitconfig13
-rw-r--r--drivers/gpu/drm/vc4/tests/vc4_mock.c207
-rw-r--r--drivers/gpu/drm/vc4/tests/vc4_mock.h61
-rw-r--r--drivers/gpu/drm/vc4/tests/vc4_mock_crtc.c41
-rw-r--r--drivers/gpu/drm/vc4/tests/vc4_mock_output.c176
-rw-r--r--drivers/gpu/drm/vc4/tests/vc4_mock_plane.c25
-rw-r--r--drivers/gpu/drm/vc4/tests/vc4_test_pv_muxing.c1140
-rw-r--r--drivers/gpu/drm/vc4/vc4_bo.c172
-rw-r--r--drivers/gpu/drm/vc4/vc4_crtc.c753
-rw-r--r--drivers/gpu/drm/vc4/vc4_debugfs.c62
-rw-r--r--drivers/gpu/drm/vc4/vc4_dpi.c260
-rw-r--r--drivers/gpu/drm/vc4/vc4_drv.c230
-rw-r--r--drivers/gpu/drm/vc4/vc4_drv.h389
-rw-r--r--drivers/gpu/drm/vc4/vc4_dsi.c448
-rw-r--r--drivers/gpu/drm/vc4/vc4_gem.c326
-rw-r--r--drivers/gpu/drm/vc4/vc4_hdmi.c2581
-rw-r--r--drivers/gpu/drm/vc4/vc4_hdmi.h126
-rw-r--r--drivers/gpu/drm/vc4/vc4_hdmi_phy.c683
-rw-r--r--drivers/gpu/drm/vc4/vc4_hdmi_regs.h274
-rw-r--r--drivers/gpu/drm/vc4/vc4_hvs.c1470
-rw-r--r--drivers/gpu/drm/vc4/vc4_irq.c30
-rw-r--r--drivers/gpu/drm/vc4/vc4_kms.c492
-rw-r--r--drivers/gpu/drm/vc4/vc4_perfmon.c70
-rw-r--r--drivers/gpu/drm/vc4/vc4_plane.c1617
-rw-r--r--drivers/gpu/drm/vc4/vc4_regs.h372
-rw-r--r--drivers/gpu/drm/vc4/vc4_render_cl.c46
-rw-r--r--drivers/gpu/drm/vc4/vc4_trace.h95
-rw-r--r--drivers/gpu/drm/vc4/vc4_txp.c232
-rw-r--r--drivers/gpu/drm/vc4/vc4_v3d.c115
-rw-r--r--drivers/gpu/drm/vc4/vc4_validate.c69
-rw-r--r--drivers/gpu/drm/vc4/vc4_validate_shaders.c8
-rw-r--r--drivers/gpu/drm/vc4/vc4_vec.c647
-rw-r--r--drivers/gpu/drm/vgem/vgem_drv.c34
-rw-r--r--drivers/gpu/drm/vgem/vgem_drv.h11
-rw-r--r--drivers/gpu/drm/vgem/vgem_fence.c35
-rw-r--r--drivers/gpu/drm/via/Makefile8
-rw-r--r--drivers/gpu/drm/via/via_3d_reg.h1650
-rw-r--r--drivers/gpu/drm/via/via_dma.c744
-rw-r--r--drivers/gpu/drm/via/via_dmablit.c807
-rw-r--r--drivers/gpu/drm/via/via_dmablit.h140
-rw-r--r--drivers/gpu/drm/via/via_drv.c124
-rw-r--r--drivers/gpu/drm/via/via_drv.h229
-rw-r--r--drivers/gpu/drm/via/via_irq.c388
-rw-r--r--drivers/gpu/drm/via/via_map.c132
-rw-r--r--drivers/gpu/drm/via/via_mm.c241
-rw-r--r--drivers/gpu/drm/via/via_verifier.c1110
-rw-r--r--drivers/gpu/drm/via/via_verifier.h62
-rw-r--r--drivers/gpu/drm/via/via_video.c94
-rw-r--r--drivers/gpu/drm/virtio/Kconfig12
-rw-r--r--drivers/gpu/drm/virtio/Makefile2
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_debugfs.c5
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_display.c67
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_drv.c134
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_drv.h62
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_fence.c16
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_gem.c63
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_ioctl.c269
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_kms.c94
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_object.c123
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_plane.c294
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_prime.c189
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_submit.c542
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_trace.h26
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_vq.c239
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_vram.c11
-rw-r--r--drivers/gpu/drm/vkms/Kconfig32
-rw-r--r--drivers/gpu/drm/vkms/Makefile9
-rw-r--r--drivers/gpu/drm/vkms/tests/.kunitconfig4
-rw-r--r--drivers/gpu/drm/vkms/tests/Makefile8
-rw-r--r--drivers/gpu/drm/vkms/tests/vkms_color_test.c414
-rw-r--r--drivers/gpu/drm/vkms/tests/vkms_config_test.c1029
-rw-r--r--drivers/gpu/drm/vkms/tests/vkms_format_test.c279
-rw-r--r--drivers/gpu/drm/vkms/vkms_colorop.c120
-rw-r--r--drivers/gpu/drm/vkms/vkms_composer.c707
-rw-r--r--drivers/gpu/drm/vkms/vkms_composer.h28
-rw-r--r--drivers/gpu/drm/vkms/vkms_config.c649
-rw-r--r--drivers/gpu/drm/vkms/vkms_config.h489
-rw-r--r--drivers/gpu/drm/vkms/vkms_configfs.c843
-rw-r--r--drivers/gpu/drm/vkms/vkms_configfs.h8
-rw-r--r--drivers/gpu/drm/vkms/vkms_connector.c96
-rw-r--r--drivers/gpu/drm/vkms/vkms_connector.h35
-rw-r--r--drivers/gpu/drm/vkms/vkms_crtc.c139
-rw-r--r--drivers/gpu/drm/vkms/vkms_drv.c166
-rw-r--r--drivers/gpu/drm/vkms/vkms_drv.h271
-rw-r--r--drivers/gpu/drm/vkms/vkms_formats.c971
-rw-r--r--drivers/gpu/drm/vkms/vkms_formats.h21
-rw-r--r--drivers/gpu/drm/vkms/vkms_luts.c811
-rw-r--r--drivers/gpu/drm/vkms/vkms_luts.h12
-rw-r--r--drivers/gpu/drm/vkms/vkms_output.c182
-rw-r--r--drivers/gpu/drm/vkms/vkms_plane.c188
-rw-r--r--drivers/gpu/drm/vkms/vkms_writeback.c101
-rw-r--r--drivers/gpu/drm/vmwgfx/Kconfig13
-rw-r--r--drivers/gpu/drm/vmwgfx/Makefile8
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/svga3d_cmd.h6
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/svga3d_devcaps.h10
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/svga3d_dx.h12
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/svga3d_limits.h8
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/svga3d_reg.h6
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/svga3d_types.h7
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/svga_escape.h6
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/svga_overlay.h6
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/svga_reg.h14
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/vm_basic_types.h2
-rw-r--r--drivers/gpu/drm/vmwgfx/ttm_memory.c683
-rw-r--r--drivers/gpu/drm/vmwgfx/ttm_memory.h96
-rw-r--r--drivers/gpu/drm/vmwgfx/ttm_object.c300
-rw-r--r--drivers/gpu/drm/vmwgfx/ttm_object.h96
-rw-r--r--drivers/gpu/drm/vmwgfx/vmw_surface_cache.h10
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_binding.c46
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_binding.h4
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_blit.c151
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_bo.c1009
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_bo.h238
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c37
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c55
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c95
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_context.c66
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c126
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_cursor_plane.c858
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_cursor_plane.h82
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_devcaps.c1
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.c429
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.h582
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c492
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_fb.c837
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_fence.c602
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_fence.h24
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_gem.c350
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c1
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c36
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c31
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_irq.c138
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_kms.c1610
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_kms.h164
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c200
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_mksstat.h2
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_mob.c67
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_msg.c274
-rw-r--r--[-rwxr-xr-x]drivers/gpu/drm/vmwgfx/vmwgfx_msg_arm64.h196
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_msg_x86.h177
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c32
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c165
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_prime.c40
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_resource.c369
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h10
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c182
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_shader.c157
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_simple_resource.c31
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_so.c29
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_so.h6
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c672
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_streamoutput.c41
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_surface.c594
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_system_manager.c90
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_thp.c184
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c319
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c136
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_va.c8
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_validation.c242
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_validation.h88
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_vkms.c633
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_vkms.h75
-rw-r--r--drivers/gpu/drm/xe/.gitignore4
-rw-r--r--drivers/gpu/drm/xe/.kunitconfig18
-rw-r--r--drivers/gpu/drm/xe/Kconfig141
-rw-r--r--drivers/gpu/drm/xe/Kconfig.debug114
-rw-r--r--drivers/gpu/drm/xe/Kconfig.profile55
-rw-r--r--drivers/gpu/drm/xe/Makefile384
-rw-r--r--drivers/gpu/drm/xe/abi/gsc_command_header_abi.h46
-rw-r--r--drivers/gpu/drm/xe/abi/gsc_mkhi_commands_abi.h39
-rw-r--r--drivers/gpu/drm/xe/abi/gsc_proxy_commands_abi.h44
-rw-r--r--drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h100
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_abi.h281
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h279
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h659
-rw-r--r--drivers/gpu/drm/xe/abi/guc_capture_abi.h186
-rw-r--r--drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h129
-rw-r--r--drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h49
-rw-r--r--drivers/gpu/drm/xe/abi/guc_errors_abi.h101
-rw-r--r--drivers/gpu/drm/xe/abi/guc_klvs_abi.h428
-rw-r--r--drivers/gpu/drm/xe/abi/guc_log_abi.h75
-rw-r--r--drivers/gpu/drm/xe/abi/guc_messages_abi.h251
-rw-r--r--drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h247
-rw-r--r--drivers/gpu/drm/xe/abi/guc_relay_communication_abi.h118
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h13
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h40
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/gt/intel_gt_types.h11
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_active.h22
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_active_types.h13
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_config.h19
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h37
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_gtt_view_types.h7
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_irq.h6
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h6
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_reg_defs.h6
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h13
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h18
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h36
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_clock_gating.h6
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_mchbar_regs.h6
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_pci_config.h6
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h11
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_step.h14
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h162
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_uncore_trace.h6
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h10
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h29
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/soc/intel_dram.h6
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/soc/intel_gmch.h6
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/soc/intel_rom.h6
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/vlv_iosf_sb.h42
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/vlv_iosf_sb_reg.h6
-rw-r--r--drivers/gpu/drm/xe/display/ext/i915_irq.c85
-rw-r--r--drivers/gpu/drm/xe/display/intel_bo.c103
-rw-r--r--drivers/gpu/drm/xe/display/intel_fb_bo.c91
-rw-r--r--drivers/gpu/drm/xe/display/intel_fbdev_fb.c91
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.c560
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.h71
-rw-r--r--drivers/gpu/drm/xe/display/xe_display_misc.c16
-rw-r--r--drivers/gpu/drm/xe/display/xe_display_rpm.c74
-rw-r--r--drivers/gpu/drm/xe/display/xe_display_rpm.h11
-rw-r--r--drivers/gpu/drm/xe/display/xe_display_wa.c19
-rw-r--r--drivers/gpu/drm/xe/display/xe_dsb_buffer.c79
-rw-r--r--drivers/gpu/drm/xe/display/xe_fb_pin.c482
-rw-r--r--drivers/gpu/drm/xe/display/xe_hdcp_gsc.c213
-rw-r--r--drivers/gpu/drm/xe/display/xe_panic.c102
-rw-r--r--drivers/gpu/drm/xe/display/xe_plane_initial.c319
-rw-r--r--drivers/gpu/drm/xe/display/xe_stolen.c123
-rw-r--r--drivers/gpu/drm/xe/display/xe_tdf.c15
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_alu_commands.h79
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_gfx_state_commands.h18
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h164
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_gpu_commands.h77
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_gsc_commands.h36
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_instr_defs.h35
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_mfx_commands.h28
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_mi_commands.h84
-rw-r--r--drivers/gpu/drm/xe/regs/xe_bars.h12
-rw-r--r--drivers/gpu/drm/xe/regs/xe_engine_regs.h220
-rw-r--r--drivers/gpu/drm/xe/regs/xe_eu_stall_regs.h29
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gsc_regs.h58
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h620
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gtt_defs.h37
-rw-r--r--drivers/gpu/drm/xe/regs/xe_guc_regs.h154
-rw-r--r--drivers/gpu/drm/xe/regs/xe_hw_error_regs.h20
-rw-r--r--drivers/gpu/drm/xe/regs/xe_i2c_regs.h23
-rw-r--r--drivers/gpu/drm/xe/regs/xe_irq_regs.h96
-rw-r--r--drivers/gpu/drm/xe/regs/xe_lrc_layout.h43
-rw-r--r--drivers/gpu/drm/xe/regs/xe_mchbar_regs.h48
-rw-r--r--drivers/gpu/drm/xe/regs/xe_oa_regs.h103
-rw-r--r--drivers/gpu/drm/xe/regs/xe_pcode_regs.h27
-rw-r--r--drivers/gpu/drm/xe/regs/xe_pmt.h35
-rw-r--r--drivers/gpu/drm/xe/regs/xe_pxp_regs.h23
-rw-r--r--drivers/gpu/drm/xe/regs/xe_reg_defs.h137
-rw-r--r--drivers/gpu/drm/xe/regs/xe_regs.h65
-rw-r--r--drivers/gpu/drm/xe/tests/Makefile13
-rw-r--r--drivers/gpu/drm/xe/tests/xe_args_test.c221
-rw-r--r--drivers/gpu/drm/xe/tests/xe_bo.c637
-rw-r--r--drivers/gpu/drm/xe/tests/xe_dma_buf.c301
-rw-r--r--drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c208
-rw-r--r--drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c333
-rw-r--r--drivers/gpu/drm/xe/tests/xe_guc_db_mgr_test.c201
-rw-r--r--drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c776
-rw-r--r--drivers/gpu/drm/xe/tests/xe_guc_id_mgr_test.c136
-rw-r--r--drivers/gpu/drm/xe/tests/xe_guc_relay_test.c522
-rw-r--r--drivers/gpu/drm/xe/tests/xe_kunit_helpers.c129
-rw-r--r--drivers/gpu/drm/xe/tests/xe_kunit_helpers.h19
-rw-r--r--drivers/gpu/drm/xe/tests/xe_live_test_mod.c25
-rw-r--r--drivers/gpu/drm/xe/tests/xe_lmtt_test.c73
-rw-r--r--drivers/gpu/drm/xe/tests/xe_migrate.c791
-rw-r--r--drivers/gpu/drm/xe/tests/xe_mocs.c204
-rw-r--r--drivers/gpu/drm/xe/tests/xe_pci.c412
-rw-r--r--drivers/gpu/drm/xe/tests/xe_pci_test.c76
-rw-r--r--drivers/gpu/drm/xe/tests/xe_pci_test.h36
-rw-r--r--drivers/gpu/drm/xe/tests/xe_rtp_test.c548
-rw-r--r--drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c227
-rw-r--r--drivers/gpu/drm/xe/tests/xe_test.h63
-rw-r--r--drivers/gpu/drm/xe/tests/xe_test_mod.c10
-rw-r--r--drivers/gpu/drm/xe/tests/xe_wa_test.c73
-rw-r--r--drivers/gpu/drm/xe/xe_args.h143
-rw-r--r--drivers/gpu/drm/xe/xe_assert.h176
-rw-r--r--drivers/gpu/drm/xe/xe_bb.c148
-rw-r--r--drivers/gpu/drm/xe/xe_bb.h28
-rw-r--r--drivers/gpu/drm/xe/xe_bb_types.h20
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c3666
-rw-r--r--drivers/gpu/drm/xe/xe_bo.h454
-rw-r--r--drivers/gpu/drm/xe/xe_bo_doc.h179
-rw-r--r--drivers/gpu/drm/xe/xe_bo_evict.c352
-rw-r--r--drivers/gpu/drm/xe/xe_bo_evict.h21
-rw-r--r--drivers/gpu/drm/xe/xe_bo_types.h113
-rw-r--r--drivers/gpu/drm/xe/xe_configfs.c1291
-rw-r--r--drivers/gpu/drm/xe/xe_configfs.h47
-rw-r--r--drivers/gpu/drm/xe/xe_debugfs.c440
-rw-r--r--drivers/gpu/drm/xe/xe_debugfs.h17
-rw-r--r--drivers/gpu/drm/xe/xe_dep_job_types.h29
-rw-r--r--drivers/gpu/drm/xe/xe_dep_scheduler.c143
-rw-r--r--drivers/gpu/drm/xe/xe_dep_scheduler.h21
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.c522
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.h35
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump_types.h93
-rw-r--r--drivers/gpu/drm/xe/xe_device.c1297
-rw-r--r--drivers/gpu/drm/xe/xe_device.h212
-rw-r--r--drivers/gpu/drm/xe/xe_device_sysfs.c296
-rw-r--r--drivers/gpu/drm/xe/xe_device_sysfs.h13
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h726
-rw-r--r--drivers/gpu/drm/xe/xe_device_wa_oob.rules5
-rw-r--r--drivers/gpu/drm/xe/xe_dma_buf.c369
-rw-r--r--drivers/gpu/drm/xe/xe_dma_buf.h15
-rw-r--r--drivers/gpu/drm/xe/xe_drm_client.c406
-rw-r--r--drivers/gpu/drm/xe/xe_drm_client.h70
-rw-r--r--drivers/gpu/drm/xe/xe_drv.h22
-rw-r--r--drivers/gpu/drm/xe/xe_eu_stall.c989
-rw-r--r--drivers/gpu/drm/xe/xe_eu_stall.h25
-rw-r--r--drivers/gpu/drm/xe/xe_exec.c367
-rw-r--r--drivers/gpu/drm/xe/xe_exec.h14
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c1251
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.h114
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue_types.h242
-rw-r--r--drivers/gpu/drm/xe/xe_execlist.c495
-rw-r--r--drivers/gpu/drm/xe/xe_execlist.h21
-rw-r--r--drivers/gpu/drm/xe/xe_execlist_types.h51
-rw-r--r--drivers/gpu/drm/xe/xe_force_wake.c261
-rw-r--r--drivers/gpu/drm/xe/xe_force_wake.h64
-rw-r--r--drivers/gpu/drm/xe/xe_force_wake_types.h110
-rw-r--r--drivers/gpu/drm/xe/xe_gen_wa_oob.c212
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.c1124
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.h61
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt_types.h85
-rw-r--r--drivers/gpu/drm/xe/xe_gpu_scheduler.c138
-rw-r--r--drivers/gpu/drm/xe/xe_gpu_scheduler.h120
-rw-r--r--drivers/gpu/drm/xe/xe_gpu_scheduler_types.h57
-rw-r--r--drivers/gpu/drm/xe/xe_gsc.c640
-rw-r--r--drivers/gpu/drm/xe/xe_gsc.h27
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_debugfs.c71
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_debugfs.h14
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_proxy.c535
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_proxy.h21
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_submit.c220
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_submit.h32
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_types.h74
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c1069
-rw-r--r--drivers/gpu/drm/xe/xe_gt.h146
-rw-r--r--drivers/gpu/drm/xe/xe_gt_ccs_mode.c202
-rw-r--r--drivers/gpu/drm/xe/xe_gt_ccs_mode.h24
-rw-r--r--drivers/gpu/drm/xe/xe_gt_clock.c88
-rw-r--r--drivers/gpu/drm/xe/xe_gt_clock.h16
-rw-r--r--drivers/gpu/drm/xe/xe_gt_debugfs.c379
-rw-r--r--drivers/gpu/drm/xe/xe_gt_debugfs.h16
-rw-r--r--drivers/gpu/drm/xe/xe_gt_freq.c304
-rw-r--r--drivers/gpu/drm/xe/xe_gt_freq.h13
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle.c416
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle.h22
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle_types.h43
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.c888
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.h74
-rw-r--r--drivers/gpu/drm/xe/xe_gt_printk.h128
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf.c283
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf.h43
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c2938
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h92
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h59
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c2157
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h43
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h137
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c622
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h19
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h35
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c1069
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h54
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h28
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c147
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h27
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor_types.h22
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c435
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h25
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_policy_types.h31
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c425
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h34
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_service_types.h52
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h65
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_printk.h37
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf.c1377
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf.h43
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c72
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.h14
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h81
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats.c101
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats.h26
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats_types.h51
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sysfs.c55
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sysfs.h19
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sysfs_types.h26
-rw-r--r--drivers/gpu/drm/xe/xe_gt_throttle.c270
-rw-r--r--drivers/gpu/drm/xe/xe_gt_throttle.h17
-rw-r--r--drivers/gpu/drm/xe/xe_gt_topology.c372
-rw-r--r--drivers/gpu/drm/xe/xe_gt_topology.h56
-rw-r--r--drivers/gpu/drm/xe/xe_gt_types.h370
-rw-r--r--drivers/gpu/drm/xe/xe_guard.h119
-rw-r--r--drivers/gpu/drm/xe/xe_guc.c1719
-rw-r--r--drivers/gpu/drm/xe/xe_guc.h97
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads.c1013
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads.h18
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads_types.h29
-rw-r--r--drivers/gpu/drm/xe/xe_guc_buf.c207
-rw-r--r--drivers/gpu/drm/xe/xe_guc_buf.h49
-rw-r--r--drivers/gpu/drm/xe/xe_guc_buf_types.h28
-rw-r--r--drivers/gpu/drm/xe/xe_guc_capture.c2044
-rw-r--r--drivers/gpu/drm/xe/xe_guc_capture.h61
-rw-r--r--drivers/gpu/drm/xe/xe_guc_capture_types.h70
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c2148
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.h86
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct_types.h173
-rw-r--r--drivers/gpu/drm/xe/xe_guc_db_mgr.c267
-rw-r--r--drivers/gpu/drm/xe/xe_guc_db_mgr.h22
-rw-r--r--drivers/gpu/drm/xe/xe_guc_debugfs.c146
-rw-r--r--drivers/gpu/drm/xe/xe_guc_debugfs.h14
-rw-r--r--drivers/gpu/drm/xe/xe_guc_engine_activity.c521
-rw-r--r--drivers/gpu/drm/xe/xe_guc_engine_activity.h22
-rw-r--r--drivers/gpu/drm/xe/xe_guc_engine_activity_types.h102
-rw-r--r--drivers/gpu/drm/xe/xe_guc_exec_queue_types.h71
-rw-r--r--drivers/gpu/drm/xe/xe_guc_fwif.h366
-rw-r--r--drivers/gpu/drm/xe/xe_guc_hwconfig.c202
-rw-r--r--drivers/gpu/drm/xe/xe_guc_hwconfig.h20
-rw-r--r--drivers/gpu/drm/xe/xe_guc_hxg_helpers.h108
-rw-r--r--drivers/gpu/drm/xe/xe_guc_id_mgr.c280
-rw-r--r--drivers/gpu/drm/xe/xe_guc_id_mgr.h22
-rw-r--r--drivers/gpu/drm/xe/xe_guc_klv_helpers.c148
-rw-r--r--drivers/gpu/drm/xe/xe_guc_klv_helpers.h64
-rw-r--r--drivers/gpu/drm/xe/xe_guc_klv_thresholds_set.h71
-rw-r--r--drivers/gpu/drm/xe/xe_guc_klv_thresholds_set_types.h68
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log.c375
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log.h61
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log_types.h57
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pagefault.c95
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pagefault.h15
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.c1439
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.h46
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc_types.h40
-rw-r--r--drivers/gpu/drm/xe/xe_guc_relay.c967
-rw-r--r--drivers/gpu/drm/xe/xe_guc_relay.h37
-rw-r--r--drivers/gpu/drm/xe/xe_guc_relay_types.h40
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c2986
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.h52
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit_types.h144
-rw-r--r--drivers/gpu/drm/xe/xe_guc_tlb_inval.c242
-rw-r--r--drivers/gpu/drm/xe/xe_guc_tlb_inval.h19
-rw-r--r--drivers/gpu/drm/xe/xe_guc_types.h129
-rw-r--r--drivers/gpu/drm/xe/xe_heci_gsc.c249
-rw-r--r--drivers/gpu/drm/xe/xe_heci_gsc.h40
-rw-r--r--drivers/gpu/drm/xe/xe_huc.c318
-rw-r--r--drivers/gpu/drm/xe/xe_huc.h28
-rw-r--r--drivers/gpu/drm/xe/xe_huc_debugfs.c71
-rw-r--r--drivers/gpu/drm/xe/xe_huc_debugfs.h14
-rw-r--r--drivers/gpu/drm/xe/xe_huc_types.h24
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine.c1111
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine.h82
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c685
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h43
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_group.c344
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_group.h29
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_group_types.h51
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_types.h185
-rw-r--r--drivers/gpu/drm/xe/xe_hw_error.c182
-rw-r--r--drivers/gpu/drm/xe/xe_hw_error.h15
-rw-r--r--drivers/gpu/drm/xe/xe_hw_fence.c268
-rw-r--r--drivers/gpu/drm/xe/xe_hw_fence.h33
-rw-r--r--drivers/gpu/drm/xe/xe_hw_fence_types.h75
-rw-r--r--drivers/gpu/drm/xe/xe_hwmon.c1334
-rw-r--r--drivers/gpu/drm/xe/xe_hwmon.h19
-rw-r--r--drivers/gpu/drm/xe/xe_i2c.c372
-rw-r--r--drivers/gpu/drm/xe/xe_i2c.h68
-rw-r--r--drivers/gpu/drm/xe/xe_irq.c1050
-rw-r--r--drivers/gpu/drm/xe/xe_irq.h26
-rw-r--r--drivers/gpu/drm/xe/xe_late_bind_fw.c464
-rw-r--r--drivers/gpu/drm/xe/xe_late_bind_fw.h17
-rw-r--r--drivers/gpu/drm/xe/xe_late_bind_fw_types.h75
-rw-r--r--drivers/gpu/drm/xe/xe_lmtt.c573
-rw-r--r--drivers/gpu/drm/xe/xe_lmtt.h28
-rw-r--r--drivers/gpu/drm/xe/xe_lmtt_2l.c150
-rw-r--r--drivers/gpu/drm/xe/xe_lmtt_ml.c161
-rw-r--r--drivers/gpu/drm/xe/xe_lmtt_types.h63
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c2414
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.h163
-rw-r--r--drivers/gpu/drm/xe/xe_lrc_types.h61
-rw-r--r--drivers/gpu/drm/xe/xe_macros.h22
-rw-r--r--drivers/gpu/drm/xe/xe_map.h93
-rw-r--r--drivers/gpu/drm/xe/xe_memirq.c547
-rw-r--r--drivers/gpu/drm/xe/xe_memirq.h30
-rw-r--r--drivers/gpu/drm/xe/xe_memirq_types.h37
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c2470
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.h174
-rw-r--r--drivers/gpu/drm/xe/xe_migrate_doc.h88
-rw-r--r--drivers/gpu/drm/xe/xe_mmio.c410
-rw-r--r--drivers/gpu/drm/xe/xe_mmio.h49
-rw-r--r--drivers/gpu/drm/xe/xe_mmio_gem.c226
-rw-r--r--drivers/gpu/drm/xe/xe_mmio_gem.h20
-rw-r--r--drivers/gpu/drm/xe/xe_mocs.c838
-rw-r--r--drivers/gpu/drm/xe/xe_mocs.h16
-rw-r--r--drivers/gpu/drm/xe/xe_module.c183
-rw-r--r--drivers/gpu/drm/xe/xe_module.h31
-rw-r--r--drivers/gpu/drm/xe/xe_nvm.c170
-rw-r--r--drivers/gpu/drm/xe/xe_nvm.h15
-rw-r--r--drivers/gpu/drm/xe/xe_oa.c2816
-rw-r--r--drivers/gpu/drm/xe/xe_oa.h25
-rw-r--r--drivers/gpu/drm/xe/xe_oa_types.h271
-rw-r--r--drivers/gpu/drm/xe/xe_observation.c106
-rw-r--r--drivers/gpu/drm/xe/xe_observation.h20
-rw-r--r--drivers/gpu/drm/xe/xe_pagefault.c444
-rw-r--r--drivers/gpu/drm/xe/xe_pagefault.h19
-rw-r--r--drivers/gpu/drm/xe/xe_pagefault_types.h136
-rw-r--r--drivers/gpu/drm/xe/xe_pat.c580
-rw-r--r--drivers/gpu/drm/xe/xe_pat.h61
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c1255
-rw-r--r--drivers/gpu/drm/xe/xe_pci.h15
-rw-r--r--drivers/gpu/drm/xe/xe_pci_sriov.c266
-rw-r--r--drivers/gpu/drm/xe/xe_pci_sriov.h21
-rw-r--r--drivers/gpu/drm/xe/xe_pci_types.h82
-rw-r--r--drivers/gpu/drm/xe/xe_pcode.c380
-rw-r--r--drivers/gpu/drm/xe/xe_pcode.h45
-rw-r--r--drivers/gpu/drm/xe/xe_pcode_api.h94
-rw-r--r--drivers/gpu/drm/xe/xe_platform_types.h42
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c980
-rw-r--r--drivers/gpu/drm/xe/xe_pm.h57
-rw-r--r--drivers/gpu/drm/xe/xe_pmu.c600
-rw-r--r--drivers/gpu/drm/xe/xe_pmu.h18
-rw-r--r--drivers/gpu/drm/xe/xe_pmu_types.h39
-rw-r--r--drivers/gpu/drm/xe/xe_preempt_fence.c184
-rw-r--r--drivers/gpu/drm/xe/xe_preempt_fence.h61
-rw-r--r--drivers/gpu/drm/xe/xe_preempt_fence_types.h34
-rw-r--r--drivers/gpu/drm/xe/xe_printk.h129
-rw-r--r--drivers/gpu/drm/xe/xe_psmi.c294
-rw-r--r--drivers/gpu/drm/xe/xe_psmi.h14
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c2565
-rw-r--r--drivers/gpu/drm/xe/xe_pt.h52
-rw-r--r--drivers/gpu/drm/xe/xe_pt_types.h124
-rw-r--r--drivers/gpu/drm/xe/xe_pt_walk.c161
-rw-r--r--drivers/gpu/drm/xe/xe_pt_walk.h152
-rw-r--r--drivers/gpu/drm/xe/xe_pxp.c949
-rw-r--r--drivers/gpu/drm/xe/xe_pxp.h35
-rw-r--r--drivers/gpu/drm/xe/xe_pxp_debugfs.c129
-rw-r--r--drivers/gpu/drm/xe/xe_pxp_debugfs.h13
-rw-r--r--drivers/gpu/drm/xe/xe_pxp_submit.c602
-rw-r--r--drivers/gpu/drm/xe/xe_pxp_submit.h22
-rw-r--r--drivers/gpu/drm/xe/xe_pxp_types.h135
-rw-r--r--drivers/gpu/drm/xe/xe_query.c812
-rw-r--r--drivers/gpu/drm/xe/xe_query.h14
-rw-r--r--drivers/gpu/drm/xe/xe_range_fence.c161
-rw-r--r--drivers/gpu/drm/xe/xe_range_fence.h75
-rw-r--r--drivers/gpu/drm/xe/xe_reg_sr.c216
-rw-r--r--drivers/gpu/drm/xe/xe_reg_sr.h28
-rw-r--r--drivers/gpu/drm/xe/xe_reg_sr_types.h31
-rw-r--r--drivers/gpu/drm/xe/xe_reg_whitelist.c221
-rw-r--r--drivers/gpu/drm/xe/xe_reg_whitelist.h23
-rw-r--r--drivers/gpu/drm/xe/xe_res_cursor.h356
-rw-r--r--drivers/gpu/drm/xe/xe_ring_ops.c525
-rw-r--r--drivers/gpu/drm/xe/xe_ring_ops.h17
-rw-r--r--drivers/gpu/drm/xe/xe_ring_ops_types.h22
-rw-r--r--drivers/gpu/drm/xe/xe_rtp.c387
-rw-r--r--drivers/gpu/drm/xe/xe_rtp.h506
-rw-r--r--drivers/gpu/drm/xe/xe_rtp_helpers.h83
-rw-r--r--drivers/gpu/drm/xe/xe_rtp_types.h131
-rw-r--r--drivers/gpu/drm/xe/xe_sa.c151
-rw-r--r--drivers/gpu/drm/xe/xe_sa.h72
-rw-r--r--drivers/gpu/drm/xe/xe_sa_types.h19
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job.c359
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job.h95
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job_types.h79
-rw-r--r--drivers/gpu/drm/xe/xe_shrinker.c306
-rw-r--r--drivers/gpu/drm/xe/xe_shrinker.h16
-rw-r--r--drivers/gpu/drm/xe/xe_sriov.c176
-rw-r--r--drivers/gpu/drm/xe/xe_sriov.h48
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_packet.c520
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_packet.h30
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_packet_types.h75
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf.c283
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf.h31
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_control.c279
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_control.h22
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c395
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_debugfs.h18
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_helpers.h73
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_migration.c365
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_migration.h30
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_migration_types.h37
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_provision.c438
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_provision.h45
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_provision_types.h36
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_service.c216
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_service.h23
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_service_types.h36
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c647
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_sysfs.h16
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_types.h70
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_printk.h46
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_types.h40
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf.c211
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf.h20
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf_ccs.c480
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf_ccs.h35
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h51
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf_types.h47
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vfio.c80
-rw-r--r--drivers/gpu/drm/xe/xe_step.c259
-rw-r--r--drivers/gpu/drm/xe/xe_step.h23
-rw-r--r--drivers/gpu/drm/xe/xe_step_types.h76
-rw-r--r--drivers/gpu/drm/xe/xe_survivability_mode.c377
-rw-r--r--drivers/gpu/drm/xe/xe_survivability_mode.h18
-rw-r--r--drivers/gpu/drm/xe/xe_survivability_mode_types.h43
-rw-r--r--drivers/gpu/drm/xe/xe_svm.c1537
-rw-r--r--drivers/gpu/drm/xe/xe_svm.h389
-rw-r--r--drivers/gpu/drm/xe/xe_sync.c406
-rw-r--r--drivers/gpu/drm/xe/xe_sync.h47
-rw-r--r--drivers/gpu/drm/xe/xe_sync_types.h31
-rw-r--r--drivers/gpu/drm/xe/xe_tile.c217
-rw-r--r--drivers/gpu/drm/xe/xe_tile.h26
-rw-r--r--drivers/gpu/drm/xe/xe_tile_debugfs.c142
-rw-r--r--drivers/gpu/drm/xe/xe_tile_debugfs.h16
-rw-r--r--drivers/gpu/drm/xe/xe_tile_printk.h127
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c253
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.h15
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sriov_printk.h33
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sriov_vf.c350
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sriov_vf.h23
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sriov_vf_types.h23
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sysfs.c61
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sysfs.h19
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sysfs_types.h27
-rw-r--r--drivers/gpu/drm/xe/xe_tlb_inval.c433
-rw-r--r--drivers/gpu/drm/xe/xe_tlb_inval.h46
-rw-r--r--drivers/gpu/drm/xe/xe_tlb_inval_job.c285
-rw-r--r--drivers/gpu/drm/xe/xe_tlb_inval_job.h34
-rw-r--r--drivers/gpu/drm/xe/xe_tlb_inval_types.h130
-rw-r--r--drivers/gpu/drm/xe/xe_trace.c9
-rw-r--r--drivers/gpu/drm/xe/xe_trace.h474
-rw-r--r--drivers/gpu/drm/xe/xe_trace_bo.c9
-rw-r--r--drivers/gpu/drm/xe/xe_trace_bo.h263
-rw-r--r--drivers/gpu/drm/xe/xe_trace_guc.c9
-rw-r--r--drivers/gpu/drm/xe/xe_trace_guc.h159
-rw-r--r--drivers/gpu/drm/xe/xe_trace_lrc.c9
-rw-r--r--drivers/gpu/drm/xe/xe_trace_lrc.h52
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c351
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h21
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_sys_mgr.c120
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_sys_mgr.h13
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_vram_mgr.c480
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_vram_mgr.h46
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h48
-rw-r--r--drivers/gpu/drm/xe/xe_tuning.c247
-rw-r--r--drivers/gpu/drm/xe/xe_tuning.h19
-rw-r--r--drivers/gpu/drm/xe/xe_uc.c316
-rw-r--r--drivers/gpu/drm/xe/xe_uc.h25
-rw-r--r--drivers/gpu/drm/xe/xe_uc_debugfs.c30
-rw-r--r--drivers/gpu/drm/xe/xe_uc_debugfs.h14
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw.c956
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw.h193
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw_abi.h405
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw_types.h155
-rw-r--r--drivers/gpu/drm/xe/xe_uc_types.h28
-rw-r--r--drivers/gpu/drm/xe/xe_userptr.c322
-rw-r--r--drivers/gpu/drm/xe/xe_userptr.h107
-rw-r--r--drivers/gpu/drm/xe/xe_validation.c278
-rw-r--r--drivers/gpu/drm/xe/xe_validation.h192
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c4410
-rw-r--r--drivers/gpu/drm/xe/xe_vm.h414
-rw-r--r--drivers/gpu/drm/xe/xe_vm_doc.h555
-rw-r--r--drivers/gpu/drm/xe/xe_vm_madvise.c431
-rw-r--r--drivers/gpu/drm/xe/xe_vm_madvise.h15
-rw-r--r--drivers/gpu/drm/xe/xe_vm_types.h477
-rw-r--r--drivers/gpu/drm/xe/xe_vram.c462
-rw-r--r--drivers/gpu/drm/xe/xe_vram.h25
-rw-r--r--drivers/gpu/drm/xe/xe_vram_freq.c124
-rw-r--r--drivers/gpu/drm/xe/xe_vram_freq.h13
-rw-r--r--drivers/gpu/drm/xe/xe_vram_types.h85
-rw-r--r--drivers/gpu/drm/xe/xe_vsec.c225
-rw-r--r--drivers/gpu/drm/xe/xe_vsec.h15
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c1154
-rw-r--r--drivers/gpu/drm/xe/xe_wa.h53
-rw-r--r--drivers/gpu/drm/xe/xe_wa_oob.rules78
-rw-r--r--drivers/gpu/drm/xe/xe_wait_user_fence.c183
-rw-r--r--drivers/gpu/drm/xe/xe_wait_user_fence.h15
-rw-r--r--drivers/gpu/drm/xe/xe_wopcm.c273
-rw-r--r--drivers/gpu/drm/xe/xe_wopcm.h16
-rw-r--r--drivers/gpu/drm/xe/xe_wopcm_types.h26
-rw-r--r--drivers/gpu/drm/xen/xen_drm_front.c31
-rw-r--r--drivers/gpu/drm/xen/xen_drm_front.h9
-rw-r--r--drivers/gpu/drm/xen/xen_drm_front_conn.c1
-rw-r--r--drivers/gpu/drm/xen/xen_drm_front_evtchnl.c43
-rw-r--r--drivers/gpu/drm/xen/xen_drm_front_gem.c117
-rw-r--r--drivers/gpu/drm/xen/xen_drm_front_gem.h13
-rw-r--r--drivers/gpu/drm/xen/xen_drm_front_kms.c5
-rw-r--r--drivers/gpu/drm/xlnx/Kconfig17
-rw-r--r--drivers/gpu/drm/xlnx/Makefile3
-rw-r--r--drivers/gpu/drm/xlnx/zynqmp_disp.c911
-rw-r--r--drivers/gpu/drm/xlnx/zynqmp_disp.h41
-rw-r--r--drivers/gpu/drm/xlnx/zynqmp_disp_regs.h15
-rw-r--r--drivers/gpu/drm/xlnx/zynqmp_dp.c1397
-rw-r--r--drivers/gpu/drm/xlnx/zynqmp_dp.h11
-rw-r--r--drivers/gpu/drm/xlnx/zynqmp_dp_audio.c448
-rw-r--r--drivers/gpu/drm/xlnx/zynqmp_dpsub.c293
-rw-r--r--drivers/gpu/drm/xlnx/zynqmp_dpsub.h59
-rw-r--r--drivers/gpu/drm/xlnx/zynqmp_kms.c548
-rw-r--r--drivers/gpu/drm/xlnx/zynqmp_kms.h46
-rw-r--r--drivers/gpu/host1x/Kconfig8
-rw-r--r--drivers/gpu/host1x/Makefile7
-rw-r--r--drivers/gpu/host1x/bus.c150
-rw-r--r--drivers/gpu/host1x/bus.h2
-rw-r--r--drivers/gpu/host1x/cdma.c67
-rw-r--r--drivers/gpu/host1x/cdma.h2
-rw-r--r--drivers/gpu/host1x/channel.c40
-rw-r--r--drivers/gpu/host1x/channel.h4
-rw-r--r--drivers/gpu/host1x/context.c177
-rw-r--r--drivers/gpu/host1x/context.h38
-rw-r--r--drivers/gpu/host1x/context_bus.c26
-rw-r--r--drivers/gpu/host1x/debug.c68
-rw-r--r--drivers/gpu/host1x/debug.h1
-rw-r--r--drivers/gpu/host1x/dev.c429
-rw-r--r--drivers/gpu/host1x/dev.h46
-rw-r--r--drivers/gpu/host1x/fence.c120
-rw-r--r--drivers/gpu/host1x/fence.h19
-rw-r--r--drivers/gpu/host1x/hw/cdma_hw.c46
-rw-r--r--drivers/gpu/host1x/hw/channel_hw.c269
-rw-r--r--drivers/gpu/host1x/hw/debug_hw.c15
-rw-r--r--drivers/gpu/host1x/hw/host1x01_hardware.h114
-rw-r--r--drivers/gpu/host1x/hw/host1x02_hardware.h113
-rw-r--r--drivers/gpu/host1x/hw/host1x04_hardware.h113
-rw-r--r--drivers/gpu/host1x/hw/host1x05_hardware.h113
-rw-r--r--drivers/gpu/host1x/hw/host1x06_hardware.h118
-rw-r--r--drivers/gpu/host1x/hw/host1x07_hardware.h118
-rw-r--r--drivers/gpu/host1x/hw/host1x08.c33
-rw-r--r--drivers/gpu/host1x/hw/host1x08.h15
-rw-r--r--drivers/gpu/host1x/hw/host1x08_hardware.h21
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x06_uclass.h2
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x07_uclass.h2
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x08_channel.h11
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x08_common.h11
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h9
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x08_uclass.h181
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x08_vm.h36
-rw-r--r--drivers/gpu/host1x/hw/intr_hw.c136
-rw-r--r--drivers/gpu/host1x/hw/opcodes.h150
-rw-r--r--drivers/gpu/host1x/hw/syncpt_hw.c3
-rw-r--r--drivers/gpu/host1x/intr.c350
-rw-r--r--drivers/gpu/host1x/intr.h84
-rw-r--r--drivers/gpu/host1x/job.c172
-rw-r--r--drivers/gpu/host1x/job.h6
-rw-r--r--drivers/gpu/host1x/mipi.c21
-rw-r--r--drivers/gpu/host1x/syncpt.c148
-rw-r--r--drivers/gpu/host1x/syncpt.h3
-rw-r--r--drivers/gpu/ipu-v3/Kconfig2
-rw-r--r--drivers/gpu/ipu-v3/ipu-common.c53
-rw-r--r--drivers/gpu/ipu-v3/ipu-cpmem.c23
-rw-r--r--drivers/gpu/ipu-v3/ipu-csi.c114
-rw-r--r--drivers/gpu/ipu-v3/ipu-dc.c5
-rw-r--r--drivers/gpu/ipu-v3/ipu-di.c5
-rw-r--r--drivers/gpu/ipu-v3/ipu-ic.c73
-rw-r--r--drivers/gpu/ipu-v3/ipu-image-convert.c72
-rw-r--r--drivers/gpu/ipu-v3/ipu-pre.c138
-rw-r--r--drivers/gpu/ipu-v3/ipu-prg.c11
-rw-r--r--drivers/gpu/ipu-v3/ipu-prv.h5
-rw-r--r--drivers/gpu/ipu-v3/ipu-vdi.c11
-rw-r--r--drivers/gpu/nova-core/Kconfig16
-rw-r--r--drivers/gpu/nova-core/Makefile3
-rw-r--r--drivers/gpu/nova-core/bitfield.rs330
-rw-r--r--drivers/gpu/nova-core/dma.rs54
-rw-r--r--drivers/gpu/nova-core/driver.rs104
-rw-r--r--drivers/gpu/nova-core/falcon.rs664
-rw-r--r--drivers/gpu/nova-core/falcon/gsp.rs57
-rw-r--r--drivers/gpu/nova-core/falcon/hal.rs60
-rw-r--r--drivers/gpu/nova-core/falcon/hal/ga102.rs120
-rw-r--r--drivers/gpu/nova-core/falcon/sec2.rs25
-rw-r--r--drivers/gpu/nova-core/fb.rs217
-rw-r--r--drivers/gpu/nova-core/fb/hal.rs41
-rw-r--r--drivers/gpu/nova-core/fb/hal/ga100.rs63
-rw-r--r--drivers/gpu/nova-core/fb/hal/ga102.rs38
-rw-r--r--drivers/gpu/nova-core/fb/hal/tu102.rs59
-rw-r--r--drivers/gpu/nova-core/firmware.rs244
-rw-r--r--drivers/gpu/nova-core/firmware/booter.rs401
-rw-r--r--drivers/gpu/nova-core/firmware/fwsec.rs438
-rw-r--r--drivers/gpu/nova-core/firmware/gsp.rs258
-rw-r--r--drivers/gpu/nova-core/firmware/riscv.rs95
-rw-r--r--drivers/gpu/nova-core/gfw.rs71
-rw-r--r--drivers/gpu/nova-core/gpu.rs302
-rw-r--r--drivers/gpu/nova-core/gsp.rs161
-rw-r--r--drivers/gpu/nova-core/gsp/boot.rs252
-rw-r--r--drivers/gpu/nova-core/gsp/cmdq.rs679
-rw-r--r--drivers/gpu/nova-core/gsp/commands.rs227
-rw-r--r--drivers/gpu/nova-core/gsp/fw.rs928
-rw-r--r--drivers/gpu/nova-core/gsp/fw/commands.rs128
-rw-r--r--drivers/gpu/nova-core/gsp/fw/r570_144.rs31
-rw-r--r--drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs951
-rw-r--r--drivers/gpu/nova-core/gsp/sequencer.rs407
-rw-r--r--drivers/gpu/nova-core/nova_core.rs33
-rw-r--r--drivers/gpu/nova-core/num.rs217
-rw-r--r--drivers/gpu/nova-core/regs.rs411
-rw-r--r--drivers/gpu/nova-core/regs/macros.rs721
-rw-r--r--drivers/gpu/nova-core/sbuffer.rs227
-rw-r--r--drivers/gpu/nova-core/util.rs16
-rw-r--r--drivers/gpu/nova-core/vbios.rs1097
-rw-r--r--drivers/gpu/trace/Kconfig11
-rw-r--r--drivers/gpu/vga/Kconfig19
-rw-r--r--drivers/gpu/vga/Makefile1
-rw-r--r--drivers/gpu/vga/vga_switcheroo.c5
-rw-r--r--drivers/gpu/vga/vgaarb.c1567
6680 files changed, 3495036 insertions, 462567 deletions
diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile
index 835c88318cec..36a54d456630 100644
--- a/drivers/gpu/Makefile
+++ b/drivers/gpu/Makefile
@@ -2,7 +2,7 @@
# drm/tegra depends on host1x, so if both drivers are built-in care must be
# taken to initialize them in the correct order. Link order is the only way
# to ensure this currently.
-obj-$(CONFIG_TEGRA_HOST1X) += host1x/
-obj-y += drm/ vga/
+obj-y += host1x/ drm/ vga/
obj-$(CONFIG_IMX_IPUV3_CORE) += ipu-v3/
obj-$(CONFIG_TRACE_GPU_MEM) += trace/
+obj-$(CONFIG_NOVA_CORE) += nova-core/
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 0039df26854b..7e6bc0b3a589 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -10,14 +10,13 @@ menuconfig DRM
depends on (AGP || AGP=n) && !EMULATED_CMPXCHG && HAS_DMA
select DRM_PANEL_ORIENTATION_QUIRKS
select HDMI
- select FB_CMDLINE
select I2C
- select I2C_ALGOBIT
select DMA_SHARED_BUFFER
select SYNC_FILE
# gallium uses SYS_kcmp for os_same_file_description() to de-duplicate
# device and dmabuf fd. Let's make sure that is available for our userspace.
select KCMP
+ select VIDEO
help
Kernel-level support for the Direct Rendering Infrastructure (DRI)
introduced in XFree86 4.0. If you say Y here, you need to select
@@ -27,69 +26,115 @@ menuconfig DRM
details. You should also select and configure AGP
(/dev/agpgart) support if it is available for your platform.
+menu "DRM debugging options"
+depends on DRM
+source "drivers/gpu/drm/Kconfig.debug"
+endmenu
+
+if DRM
+
config DRM_MIPI_DBI
tristate
depends on DRM
+ select DRM_KMS_HELPER
config DRM_MIPI_DSI
bool
depends on DRM
-config DRM_DP_AUX_BUS
+config DRM_KMS_HELPER
tristate
depends on DRM
- depends on OF
-
-config DRM_DP_AUX_CHARDEV
- bool "DRM DP AUX Interface"
- depends on DRM
- help
- Choose this option to enable a /dev/drm_dp_auxN node that allows to
- read and write values to arbitrary DPCD registers on the DP aux
- channel.
-
-config DRM_DEBUG_MM
- bool "Insert extra checks and debug info into the DRM range managers"
- default n
- depends on DRM=y
- depends on STACKTRACE_SUPPORT
- select STACKDEPOT
+ select FB_CORE if DRM_FBDEV_EMULATION
help
- Enable allocation tracking of memory manager and leak detection on
- shutdown.
-
- Recommended for driver developers only.
+ CRTC helpers for KMS drivers.
- If in doubt, say "N".
+config DRM_DRAW
+ bool
+ depends on DRM
-config DRM_DEBUG_SELFTEST
- tristate "kselftests for DRM"
+config DRM_PANIC
+ bool "Display a user-friendly message when a kernel panic occurs"
depends on DRM
- depends on DEBUG_KERNEL
- select PRIME_NUMBERS
- select DRM_LIB_RANDOM
- select DRM_KMS_HELPER
- select DRM_EXPORT_FOR_TESTS if m
- default n
+ select FONT_SUPPORT
+ select DRM_DRAW
help
- This option provides kernel modules that can be used to run
- various selftests on parts of the DRM api. This option is not
- useful for distributions or general kernels, but only for kernel
- developers working on DRM and associated drivers.
-
+ Enable a drm panic handler, which will display a user-friendly message
+ when a kernel panic occurs. It's useful when using a user-space
+ console instead of fbcon.
+ It will only work if your graphic driver supports this feature.
+ To support Hi-DPI Display, you can enable bigger fonts like
+ FONT_TER16x32
+
+config DRM_PANIC_FOREGROUND_COLOR
+ hex "Drm panic screen foreground color, in RGB"
+ depends on DRM_PANIC
+ default 0xffffff
+
+config DRM_PANIC_BACKGROUND_COLOR
+ hex "Drm panic screen background color, in RGB"
+ depends on DRM_PANIC
+ default 0x000000
+
+config DRM_PANIC_DEBUG
+ bool "Add a debug fs entry to trigger drm_panic"
+ depends on DRM_PANIC && DEBUG_FS
+ help
+ Add dri/[device]/drm_panic_plane_x in the kernel debugfs, to force the
+ panic handler to write the panic message to this plane scanout buffer.
+ This is unsafe and should not be enabled on a production build.
If in doubt, say "N".
-config DRM_KMS_HELPER
- tristate
- depends on DRM
+config DRM_PANIC_SCREEN
+ string "Panic screen formatter"
+ default "user"
+ depends on DRM_PANIC
help
- CRTC helpers for KMS drivers.
+ This option enable to choose what will be displayed when a kernel
+ panic occurs. You can choose between "user", a short message telling
+ the user to reboot the system, or "kmsg" which will display the last
+ lines of kmsg.
+ This can also be overridden by drm.panic_screen=xxxx kernel parameter
+ or by writing to /sys/module/drm/parameters/panic_screen sysfs entry
+ Default is "user"
+
+config DRM_PANIC_SCREEN_QR_CODE
+ bool "Add a panic screen with a QR code"
+ depends on DRM_PANIC && RUST
+ select ZLIB_DEFLATE
+ help
+ This option adds a QR code generator, and a panic screen with a QR
+ code. The QR code will contain the last lines of kmsg and other debug
+ information. This should be easier for the user to report a kernel
+ panic, with all debug information available.
+ To use this panic screen, also set DRM_PANIC_SCREEN to "qr_code"
+
+config DRM_PANIC_SCREEN_QR_CODE_URL
+ string "Base URL of the QR code in the panic screen"
+ depends on DRM_PANIC_SCREEN_QR_CODE
+ help
+ This option sets the base URL to report the kernel panic. If it's set
+ the QR code will contain the URL and the kmsg compressed with zlib as
+ a URL parameter. If it's empty, the QR code will contain the kmsg as
+ uncompressed text only.
+ There is a demo code in javascript, to decode and uncompress the kmsg
+ data from the URL parameter at https://github.com/kdj0c/panic_report
+
+config DRM_PANIC_SCREEN_QR_VERSION
+ int "Maximum version (size) of the QR code."
+ depends on DRM_PANIC_SCREEN_QR_CODE
+ default 40
+ help
+ This option limits the version (or size) of the QR code. QR code
+ version ranges from Version 1 (21x21) to Version 40 (177x177).
+ Smaller QR code are easier to read, but will contain less debugging
+ data. Default is 40.
config DRM_DEBUG_DP_MST_TOPOLOGY_REFS
bool "Enable refcount backtrace history in the DP MST helpers"
depends on STACKTRACE_SUPPORT
select STACKDEPOT
- depends on DRM_KMS_HELPER
+ select DRM_KMS_HELPER
depends on DEBUG_KERNEL
depends on EXPERT
help
@@ -115,56 +160,16 @@ config DRM_DEBUG_MODESET_LOCK
If in doubt, say "N".
-config DRM_FBDEV_EMULATION
- bool "Enable legacy fbdev support for your modesetting driver"
- depends on DRM_KMS_HELPER
- depends on FB=y || FB=DRM_KMS_HELPER
- select FB_CFB_FILLRECT
- select FB_CFB_COPYAREA
- select FB_CFB_IMAGEBLIT
- select FB_DEFERRED_IO
- select FB_SYS_FOPS
- select FB_SYS_FILLRECT
- select FB_SYS_COPYAREA
- select FB_SYS_IMAGEBLIT
- select FRAMEBUFFER_CONSOLE if !EXPERT
- select FRAMEBUFFER_CONSOLE_DETECT_PRIMARY if FRAMEBUFFER_CONSOLE
- default y
- help
- Choose this option if you have a need for the legacy fbdev
- support. Note that this support also provides the linux console
- support on top of your modesetting driver.
-
- If in doubt, say "Y".
-
-config DRM_FBDEV_OVERALLOC
- int "Overallocation of the fbdev buffer"
- depends on DRM_FBDEV_EMULATION
- default 100
- help
- Defines the fbdev buffer overallocation in percent. Default
- is 100. Typical values for double buffering will be 200,
- triple buffering 300.
-
-config DRM_FBDEV_LEAK_PHYS_SMEM
- bool "Shamelessly allow leaking of fbdev physical address (DANGEROUS)"
- depends on DRM_FBDEV_EMULATION && EXPERT
- default n
+config DRM_CLIENT
+ bool
+ depends on DRM
help
- In order to keep user-space compatibility, we want in certain
- use-cases to keep leaking the fbdev physical address to the
- user-space program handling the fbdev buffer.
- This affects, not only, Amlogic, Allwinner or Rockchip devices
- with ARM Mali GPUs using an userspace Blob.
- This option is not supported by upstream developers and should be
- removed as soon as possible and be considered as a broken and
- legacy behaviour from a modern fbdev device driver.
+ Enables support for DRM clients. DRM drivers that need
+ struct drm_client_dev and its interfaces should select this
+ option. Drivers that support the default clients should
+ select DRM_CLIENT_SELECTION instead.
- Please send any bug reports when using this to your proprietary
- software vendor that requires this.
-
- If in doubt, say "N" or spread the word to your closed source
- library vendor.
+source "drivers/gpu/drm/clients/Kconfig"
config DRM_LOAD_EDID_FIRMWARE
bool "Allow to specify an EDID data set instead of probing for it"
@@ -178,25 +183,46 @@ config DRM_LOAD_EDID_FIRMWARE
default case is N. Details and instructions how to build your own
EDID data are given in Documentation/admin-guide/edid.rst.
-config DRM_DP_CEC
- bool "Enable DisplayPort CEC-Tunneling-over-AUX HDMI support"
- depends on DRM
- select CEC_CORE
- help
- Choose this option if you want to enable HDMI CEC support for
- DisplayPort/USB-C to HDMI adapters.
-
- Note: not all adapters support this feature, and even for those
- that do support this they often do not hook up the CEC pin.
+source "drivers/gpu/drm/display/Kconfig"
config DRM_TTM
tristate
depends on DRM && MMU
+ select SHMEM
help
GPU memory management subsystem for devices with multiple
GPU memory types. Will be enabled automatically if a device driver
uses it.
+config DRM_EXEC
+ tristate
+ depends on DRM
+ help
+ Execution context for command submissions
+
+config DRM_GPUVM
+ tristate
+ depends on DRM
+ select DRM_EXEC
+ help
+ GPU-VM representation providing helpers to manage a GPUs virtual
+ address space
+
+config DRM_GPUSVM
+ tristate
+ depends on DRM && DEVICE_PRIVATE
+ select HMM_MIRROR
+ select MMU_NOTIFIER
+ help
+ GPU-SVM representation providing helpers to manage a GPUs shared
+ virtual memory
+
+config DRM_BUDDY
+ tristate
+ depends on DRM
+ help
+ A page based buddy allocator
+
config DRM_VRAM_HELPER
tristate
depends on DRM
@@ -207,80 +233,54 @@ config DRM_TTM_HELPER
tristate
depends on DRM
select DRM_TTM
+ select DRM_KMS_HELPER if DRM_FBDEV_EMULATION
+ select FB_CORE if DRM_FBDEV_EMULATION
+ select FB_SYSMEM_HELPERS_DEFERRED if DRM_FBDEV_EMULATION
help
Helpers for ttm-based gem objects
-config DRM_GEM_CMA_HELPER
- bool
- depends on DRM
- help
- Choose this if you need the GEM CMA helper functions
-
-config DRM_KMS_CMA_HELPER
- bool
+config DRM_GEM_DMA_HELPER
+ tristate
depends on DRM
- select DRM_GEM_CMA_HELPER
+ select DRM_KMS_HELPER if DRM_FBDEV_EMULATION
+ select FB_CORE if DRM_FBDEV_EMULATION
+ select FB_DMAMEM_HELPERS_DEFERRED if DRM_FBDEV_EMULATION
help
- Choose this if you need the KMS CMA helper functions
+ Choose this if you need the GEM DMA helper functions
config DRM_GEM_SHMEM_HELPER
- bool
+ tristate
depends on DRM && MMU
+ select DRM_KMS_HELPER if DRM_FBDEV_EMULATION
+ select FB_CORE if DRM_FBDEV_EMULATION
+ select FB_SYSMEM_HELPERS_DEFERRED if DRM_FBDEV_EMULATION
help
Choose this if you need the GEM shmem helper functions
+config DRM_SUBALLOC_HELPER
+ tristate
+ depends on DRM
+
config DRM_SCHED
tristate
depends on DRM
-source "drivers/gpu/drm/i2c/Kconfig"
+source "drivers/gpu/drm/sysfb/Kconfig"
source "drivers/gpu/drm/arm/Kconfig"
-config DRM_RADEON
- tristate "ATI Radeon"
- depends on DRM && PCI && MMU
- depends on AGP || !AGP
- select FW_LOADER
- select DRM_KMS_HELPER
- select DRM_TTM
- select DRM_TTM_HELPER
- select POWER_SUPPLY
- select HWMON
- select BACKLIGHT_CLASS_DEVICE
- select INTERVAL_TREE
- help
- Choose this option if you have an ATI Radeon graphics card. There
- are both PCI and AGP versions. You don't need to choose this to
- run the Radeon in plain VGA mode.
-
- If M is selected, the module will be called radeon.
-
source "drivers/gpu/drm/radeon/Kconfig"
-config DRM_AMDGPU
- tristate "AMD GPU"
- depends on DRM && PCI && MMU
- select FW_LOADER
- select DRM_KMS_HELPER
- select DRM_SCHED
- select DRM_TTM
- select DRM_TTM_HELPER
- select POWER_SUPPLY
- select HWMON
- select BACKLIGHT_CLASS_DEVICE
- select INTERVAL_TREE
- help
- Choose this option if you have a recent AMD Radeon graphics card.
-
- If M is selected, the module will be called amdgpu.
-
source "drivers/gpu/drm/amd/amdgpu/Kconfig"
source "drivers/gpu/drm/nouveau/Kconfig"
+source "drivers/gpu/drm/nova/Kconfig"
+
source "drivers/gpu/drm/i915/Kconfig"
+source "drivers/gpu/drm/xe/Kconfig"
+
source "drivers/gpu/drm/kmb/Kconfig"
config DRM_VGEM
@@ -292,19 +292,7 @@ config DRM_VGEM
as used by Mesa's software renderer for enhanced performance.
If M is selected the module will be called vgem.
-config DRM_VKMS
- tristate "Virtual KMS (EXPERIMENTAL)"
- depends on DRM && MMU
- select DRM_KMS_HELPER
- select DRM_GEM_SHMEM_HELPER
- select CRC32
- default n
- help
- Virtual Kernel Mode-Setting (VKMS) is used for testing or for
- running GPU in a headless machines. Choose this option to get
- a VKMS.
-
- If M is selected the module will be called vkms.
+source "drivers/gpu/drm/vkms/Kconfig"
source "drivers/gpu/drm/exynos/Kconfig"
@@ -324,9 +312,7 @@ source "drivers/gpu/drm/armada/Kconfig"
source "drivers/gpu/drm/atmel-hlcdc/Kconfig"
-source "drivers/gpu/drm/rcar-du/Kconfig"
-
-source "drivers/gpu/drm/shmobile/Kconfig"
+source "drivers/gpu/drm/renesas/Kconfig"
source "drivers/gpu/drm/sun4i/Kconfig"
@@ -360,10 +346,14 @@ source "drivers/gpu/drm/v3d/Kconfig"
source "drivers/gpu/drm/vc4/Kconfig"
+source "drivers/gpu/drm/loongson/Kconfig"
+
source "drivers/gpu/drm/etnaviv/Kconfig"
source "drivers/gpu/drm/hisilicon/Kconfig"
+source "drivers/gpu/drm/logicvc/Kconfig"
+
source "drivers/gpu/drm/mediatek/Kconfig"
source "drivers/gpu/drm/mxsfb/Kconfig"
@@ -384,19 +374,34 @@ source "drivers/gpu/drm/lima/Kconfig"
source "drivers/gpu/drm/panfrost/Kconfig"
+source "drivers/gpu/drm/panthor/Kconfig"
+
source "drivers/gpu/drm/aspeed/Kconfig"
source "drivers/gpu/drm/mcde/Kconfig"
source "drivers/gpu/drm/tidss/Kconfig"
+source "drivers/gpu/drm/adp/Kconfig"
+
source "drivers/gpu/drm/xlnx/Kconfig"
source "drivers/gpu/drm/gud/Kconfig"
+source "drivers/gpu/drm/sitronix/Kconfig"
+
+source "drivers/gpu/drm/solomon/Kconfig"
+
+source "drivers/gpu/drm/sprd/Kconfig"
+
+source "drivers/gpu/drm/imagination/Kconfig"
+
+source "drivers/gpu/drm/tyr/Kconfig"
+
config DRM_HYPERV
tristate "DRM Support for Hyper-V synthetic video device"
- depends on DRM && PCI && MMU && HYPERV
+ depends on DRM && PCI && HYPERV_VMBUS
+ select DRM_CLIENT_SELECTION
select DRM_KMS_HELPER
select DRM_GEM_SHMEM_HELPER
help
@@ -407,91 +412,20 @@ config DRM_HYPERV
If M is selected the module will be called hyperv_drm.
-# Keep legacy drivers last
-
-menuconfig DRM_LEGACY
- bool "Enable legacy drivers (DANGEROUS)"
- depends on DRM && MMU
- help
- Enable legacy DRI1 drivers. Those drivers expose unsafe and dangerous
- APIs to user-space, which can be used to circumvent access
- restrictions and other security measures. For backwards compatibility
- those drivers are still available, but their use is highly
- inadvisable and might harm your system.
-
- You are recommended to use the safe modeset-only drivers instead, and
- perform 3D emulation in user-space.
-
- Unless you have strong reasons to go rogue, say "N".
-
-if DRM_LEGACY
-
-config DRM_TDFX
- tristate "3dfx Banshee/Voodoo3+"
- depends on DRM && PCI
- help
- Choose this option if you have a 3dfx Banshee or Voodoo3 (or later),
- graphics card. If M is selected, the module will be called tdfx.
-
-config DRM_R128
- tristate "ATI Rage 128"
- depends on DRM && PCI
- select FW_LOADER
- help
- Choose this option if you have an ATI Rage 128 graphics card. If M
- is selected, the module will be called r128. AGP support for
- this card is strongly suggested (unless you have a PCI version).
-
-config DRM_I810
- tristate "Intel I810"
- # !PREEMPTION because of missing ioctl locking
- depends on DRM && AGP && AGP_INTEL && (!PREEMPTION || BROKEN)
- help
- Choose this option if you have an Intel I810 graphics card. If M is
- selected, the module will be called i810. AGP support is required
- for this driver to work.
-
-config DRM_MGA
- tristate "Matrox g200/g400"
- depends on DRM && PCI
- select FW_LOADER
- help
- Choose this option if you have a Matrox G200, G400 or G450 graphics
- card. If M is selected, the module will be called mga. AGP
- support is required for this driver to work.
-
-config DRM_SIS
- tristate "SiS video cards"
- depends on DRM && AGP
- depends on FB_SIS || FB_SIS=n
- help
- Choose this option if you have a SiS 630 or compatible video
- chipset. If M is selected the module will be called sis. AGP
- support is required for this driver to work.
-
-config DRM_VIA
- tristate "Via unichrome video cards"
- depends on DRM && PCI
- help
- Choose this option if you have a Via unichrome or compatible video
- chipset. If M is selected the module will be called via.
-
-config DRM_SAVAGE
- tristate "Savage video cards"
- depends on DRM && PCI
- help
- Choose this option if you have a Savage3D/4/SuperSavage/Pro/Twister
- chipset. If M is selected the module will be called savage.
+# Separate option as not all DRM drivers use it
+config DRM_PANEL_BACKLIGHT_QUIRKS
+ tristate
-endif # DRM_LEGACY
+config DRM_LIB_RANDOM
+ bool
+ default n
-config DRM_EXPORT_FOR_TESTS
+config DRM_PRIVACY_SCREEN
bool
+ default n
+
+endif
# Separate option because drm_panel_orientation_quirks.c is shared with fbdev
config DRM_PANEL_ORIENTATION_QUIRKS
tristate
-
-config DRM_LIB_RANDOM
- bool
- default n
diff --git a/drivers/gpu/drm/Kconfig.debug b/drivers/gpu/drm/Kconfig.debug
new file mode 100644
index 000000000000..05dc43c0b8c5
--- /dev/null
+++ b/drivers/gpu/drm/Kconfig.debug
@@ -0,0 +1,117 @@
+config DRM_USE_DYNAMIC_DEBUG
+ bool "use dynamic debug to implement drm.debug"
+ default n
+ depends on BROKEN
+ depends on DRM
+ depends on DYNAMIC_DEBUG || DYNAMIC_DEBUG_CORE
+ depends on JUMP_LABEL
+ help
+ Use dynamic-debug to avoid drm_debug_enabled() runtime overheads.
+ Due to callsite counts in DRM drivers (~4k in amdgpu) and 56
+ bytes per callsite, the .data costs can be substantial, and
+ are therefore configurable.
+
+config DRM_WERROR
+ bool "Compile the drm subsystem with warnings as errors"
+ depends on DRM && EXPERT
+ depends on !WERROR
+ default n
+ help
+ A kernel build should not cause any compiler warnings, and this
+ enables the '-Werror' flag to enforce that rule in the drm subsystem.
+
+ The drm subsystem enables more warnings than the kernel default, so
+ this config option is disabled by default.
+
+ If in doubt, say N.
+
+config DRM_HEADER_TEST
+ bool "Ensure DRM headers are self-contained and pass kernel-doc"
+ depends on DRM && EXPERT && BROKEN
+ default n
+ help
+ Ensure the DRM subsystem headers both under drivers/gpu/drm and
+ include/drm compile, are self-contained, have header guards, and have
+ no kernel-doc warnings.
+
+ If in doubt, say N.
+
+config DRM_DEBUG_MM
+ bool "Insert extra checks and debug info into the DRM range managers"
+ default n
+ depends on DRM
+ depends on STACKTRACE_SUPPORT
+ select STACKDEPOT
+ help
+ Enable allocation tracking of memory manager and leak detection on
+ shutdown.
+
+ Recommended for driver developers only.
+
+ If in doubt, say "N".
+
+config DRM_KUNIT_TEST_HELPERS
+ tristate
+ depends on DRM && KUNIT
+ select DRM_KMS_HELPER
+ help
+ KUnit Helpers for KMS drivers.
+
+config DRM_KUNIT_TEST
+ tristate "KUnit tests for DRM" if !KUNIT_ALL_TESTS
+ depends on DRM && KUNIT && MMU
+ select DRM_BRIDGE_CONNECTOR
+ select DRM_BUDDY
+ select DRM_DISPLAY_DP_HELPER
+ select DRM_DISPLAY_HDMI_STATE_HELPER
+ select DRM_DISPLAY_HELPER
+ select DRM_EXEC
+ select DRM_EXPORT_FOR_TESTS if m
+ select DRM_GEM_SHMEM_HELPER
+ select DRM_KUNIT_TEST_HELPERS
+ select DRM_LIB_RANDOM
+ select DRM_SYSFB_HELPER
+ select PRIME_NUMBERS
+ default KUNIT_ALL_TESTS
+ help
+ This builds unit tests for DRM. This option is not useful for
+ distributions or general kernels, but only for kernel
+ developers working on DRM and associated drivers.
+
+ For more information on KUnit and unit tests in general,
+ please refer to the KUnit documentation in
+ Documentation/dev-tools/kunit/.
+
+ If in doubt, say "N".
+
+config DRM_TTM_KUNIT_TEST
+ tristate "KUnit tests for TTM" if !KUNIT_ALL_TESTS
+ default n
+ depends on DRM && KUNIT && MMU && (UML || COMPILE_TEST)
+ select DRM_TTM
+ select DRM_BUDDY
+ select DRM_EXPORT_FOR_TESTS if m
+ select DRM_KUNIT_TEST_HELPERS
+ default KUNIT_ALL_TESTS
+ help
+ Enables unit tests for TTM, a GPU memory manager subsystem used
+ to manage memory buffers. This option is mostly useful for kernel
+ developers. It depends on (UML || COMPILE_TEST) since no other driver
+ which uses TTM can be loaded while running the tests.
+
+ If in doubt, say "N".
+
+config DRM_SCHED_KUNIT_TEST
+ tristate "KUnit tests for the DRM scheduler" if !KUNIT_ALL_TESTS
+ select DRM_SCHED
+ depends on DRM && KUNIT
+ default KUNIT_ALL_TESTS
+ help
+ Choose this option to build unit tests for the DRM scheduler.
+
+ Recommended for driver developers only.
+
+ If in doubt, say "N".
+
+config DRM_EXPORT_FOR_TESTS
+ bool
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 0dff40bb863c..0e1c668b46d2 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -3,88 +3,188 @@
# Makefile for the drm device driver. This driver provides support for the
# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
-drm-y := drm_aperture.o drm_auth.o drm_cache.o \
- drm_file.o drm_gem.o drm_ioctl.o drm_irq.o \
- drm_drv.o \
- drm_sysfs.o drm_hashtab.o drm_mm.o \
- drm_crtc.o drm_fourcc.o drm_modes.o drm_edid.o drm_displayid.o \
- drm_encoder_slave.o \
- drm_trace_points.o drm_prime.o \
- drm_rect.o drm_vma_manager.o drm_flip_work.o \
- drm_modeset_lock.o drm_atomic.o drm_bridge.o \
- drm_framebuffer.o drm_connector.o drm_blend.o \
- drm_encoder.o drm_mode_object.o drm_property.o \
- drm_plane.o drm_color_mgmt.o drm_print.o \
- drm_dumb_buffers.o drm_mode_config.o drm_vblank.o \
- drm_syncobj.o drm_lease.o drm_writeback.o drm_client.o \
- drm_client_modeset.o drm_atomic_uapi.o drm_hdcp.o \
- drm_managed.o drm_vblank_work.o
-
-drm-$(CONFIG_DRM_LEGACY) += drm_agpsupport.o drm_bufs.o drm_context.o drm_dma.o \
- drm_legacy_misc.o drm_lock.o drm_memory.o drm_scatter.o \
- drm_vm.o
+CFLAGS-$(CONFIG_DRM_USE_DYNAMIC_DEBUG) += -DDYNAMIC_DEBUG_MODULE
+
+# Unconditionally enable W=1 warnings locally
+# --- begin copy-paste W=1 warnings from scripts/Makefile.warn
+subdir-ccflags-y += -Wextra -Wunused -Wno-unused-parameter
+subdir-ccflags-y += $(call cc-option, -Wrestrict)
+subdir-ccflags-y += -Wmissing-format-attribute
+subdir-ccflags-y += -Wold-style-definition
+subdir-ccflags-y += -Wmissing-include-dirs
+subdir-ccflags-y += $(call cc-option, -Wunused-but-set-variable)
+subdir-ccflags-y += $(call cc-option, -Wunused-const-variable)
+subdir-ccflags-y += $(call cc-option, -Wpacked-not-aligned)
+subdir-ccflags-y += $(call cc-option, -Wformat-overflow)
+# FIXME: fix -Wformat-truncation warnings and uncomment
+#subdir-ccflags-y += $(call cc-option, -Wformat-truncation)
+subdir-ccflags-y += $(call cc-option, -Wstringop-truncation)
+# The following turn off the warnings enabled by -Wextra
+ifeq ($(findstring 2, $(KBUILD_EXTRA_WARN)),)
+subdir-ccflags-y += -Wno-missing-field-initializers
+subdir-ccflags-y += -Wno-type-limits
+subdir-ccflags-y += -Wno-shift-negative-value
+endif
+ifeq ($(findstring 3, $(KBUILD_EXTRA_WARN)),)
+subdir-ccflags-y += -Wno-sign-compare
+endif
+# --- end copy-paste
+
+# Enable -Werror in CI and development
+subdir-ccflags-$(CONFIG_DRM_WERROR) += -Werror
+
+drm-y := \
+ drm_atomic.o \
+ drm_atomic_uapi.o \
+ drm_auth.o \
+ drm_blend.o \
+ drm_bridge.o \
+ drm_cache.o \
+ drm_color_mgmt.o \
+ drm_colorop.o \
+ drm_connector.o \
+ drm_crtc.o \
+ drm_displayid.o \
+ drm_drv.o \
+ drm_dumb_buffers.o \
+ drm_edid.o \
+ drm_eld.o \
+ drm_encoder.o \
+ drm_file.o \
+ drm_fourcc.o \
+ drm_framebuffer.o \
+ drm_gem.o \
+ drm_ioctl.o \
+ drm_lease.o \
+ drm_managed.o \
+ drm_mm.o \
+ drm_mode_config.o \
+ drm_mode_object.o \
+ drm_modes.o \
+ drm_modeset_lock.o \
+ drm_plane.o \
+ drm_prime.o \
+ drm_print.o \
+ drm_property.o \
+ drm_rect.o \
+ drm_syncobj.o \
+ drm_sysfs.o \
+ drm_trace_points.o \
+ drm_vblank.o \
+ drm_vblank_work.o \
+ drm_vma_manager.o \
+ drm_writeback.o
+drm-$(CONFIG_DRM_CLIENT) += \
+ drm_client.o \
+ drm_client_event.o \
+ drm_client_modeset.o \
+ drm_client_sysrq.o
drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o
drm-$(CONFIG_COMPAT) += drm_ioc32.o
-drm-$(CONFIG_DRM_GEM_CMA_HELPER) += drm_gem_cma_helper.o
-drm-$(CONFIG_DRM_GEM_SHMEM_HELPER) += drm_gem_shmem_helper.o
drm-$(CONFIG_DRM_PANEL) += drm_panel.o
drm-$(CONFIG_OF) += drm_of.o
drm-$(CONFIG_PCI) += drm_pci.o
-drm-$(CONFIG_DEBUG_FS) += drm_debugfs.o drm_debugfs_crc.o
+drm-$(CONFIG_DEBUG_FS) += \
+ drm_debugfs.o \
+ drm_debugfs_crc.o
drm-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o
+drm-$(CONFIG_DRM_PRIVACY_SCREEN) += \
+ drm_privacy_screen.o \
+ drm_privacy_screen_x86.o
+drm-$(CONFIG_DRM_ACCEL) += ../../accel/drm_accel.o
+drm-$(CONFIG_DRM_PANIC) += drm_panic.o
+drm-$(CONFIG_DRM_DRAW) += drm_draw.o
+drm-$(CONFIG_DRM_PANIC_SCREEN_QR_CODE) += drm_panic_qr.o
+obj-$(CONFIG_DRM) += drm.o
+
+obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += drm_panel_orientation_quirks.o
+obj-$(CONFIG_DRM_PANEL_BACKLIGHT_QUIRKS) += drm_panel_backlight_quirks.o
+
+#
+# Memory-management helpers
+#
+#
+obj-$(CONFIG_DRM_EXEC) += drm_exec.o
+obj-$(CONFIG_DRM_GPUVM) += drm_gpuvm.o
+
+drm_gpusvm_helper-y := \
+ drm_gpusvm.o\
+ drm_pagemap.o
+obj-$(CONFIG_DRM_GPUSVM) += drm_gpusvm_helper.o
+
+obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o
+
+drm_dma_helper-y := drm_gem_dma_helper.o
+drm_dma_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fbdev_dma.o
+drm_dma_helper-$(CONFIG_DRM_KMS_HELPER) += drm_fb_dma_helper.o
+obj-$(CONFIG_DRM_GEM_DMA_HELPER) += drm_dma_helper.o
-obj-$(CONFIG_DRM_DP_AUX_BUS) += drm_dp_aux_bus.o
+drm_shmem_helper-y := drm_gem_shmem_helper.o
+drm_shmem_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fbdev_shmem.o
+obj-$(CONFIG_DRM_GEM_SHMEM_HELPER) += drm_shmem_helper.o
+
+drm_suballoc_helper-y := drm_suballoc.o
+obj-$(CONFIG_DRM_SUBALLOC_HELPER) += drm_suballoc_helper.o
drm_vram_helper-y := drm_gem_vram_helper.o
obj-$(CONFIG_DRM_VRAM_HELPER) += drm_vram_helper.o
drm_ttm_helper-y := drm_gem_ttm_helper.o
+drm_ttm_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fbdev_ttm.o
obj-$(CONFIG_DRM_TTM_HELPER) += drm_ttm_helper.o
-drm_kms_helper-y := drm_bridge_connector.o drm_crtc_helper.o drm_dp_helper.o \
- drm_dsc.o drm_probe_helper.o \
- drm_plane_helper.o drm_dp_mst_topology.o drm_atomic_helper.o \
- drm_kms_helper_common.o drm_dp_dual_mode_helper.o \
- drm_simple_kms_helper.o drm_modeset_helper.o \
- drm_scdc_helper.o drm_gem_atomic_helper.o \
- drm_gem_framebuffer_helper.o \
- drm_atomic_state_helper.o drm_damage_helper.o \
- drm_format_helper.o drm_self_refresh_helper.o
+#
+# Modesetting helpers
+#
+drm_kms_helper-y := \
+ drm_atomic_helper.o \
+ drm_atomic_state_helper.o \
+ drm_bridge_helper.o \
+ drm_crtc_helper.o \
+ drm_damage_helper.o \
+ drm_flip_work.o \
+ drm_format_helper.o \
+ drm_gem_atomic_helper.o \
+ drm_gem_framebuffer_helper.o \
+ drm_kms_helper_common.o \
+ drm_modeset_helper.o \
+ drm_plane_helper.o \
+ drm_probe_helper.o \
+ drm_self_refresh_helper.o \
+ drm_simple_kms_helper.o \
+ drm_vblank_helper.o
drm_kms_helper-$(CONFIG_DRM_PANEL_BRIDGE) += bridge/panel.o
drm_kms_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fb_helper.o
-drm_kms_helper-$(CONFIG_DRM_KMS_CMA_HELPER) += drm_fb_cma_helper.o
-drm_kms_helper-$(CONFIG_DRM_DP_AUX_CHARDEV) += drm_dp_aux_dev.o
-drm_kms_helper-$(CONFIG_DRM_DP_CEC) += drm_dp_cec.o
-
obj-$(CONFIG_DRM_KMS_HELPER) += drm_kms_helper.o
-obj-$(CONFIG_DRM_DEBUG_SELFTEST) += selftests/
-obj-$(CONFIG_DRM) += drm.o
+#
+# Drivers and the rest
+#
+
+obj-y += tests/
+
obj-$(CONFIG_DRM_MIPI_DBI) += drm_mipi_dbi.o
obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o
-obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += drm_panel_orientation_quirks.o
obj-y += arm/
+obj-y += clients/
+obj-y += display/
obj-$(CONFIG_DRM_TTM) += ttm/
obj-$(CONFIG_DRM_SCHED) += scheduler/
-obj-$(CONFIG_DRM_TDFX) += tdfx/
-obj-$(CONFIG_DRM_R128) += r128/
obj-$(CONFIG_DRM_RADEON)+= radeon/
obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/
-obj-$(CONFIG_DRM_MGA) += mga/
-obj-$(CONFIG_DRM_I810) += i810/
+obj-$(CONFIG_DRM_AMDGPU)+= amd/amdxcp/
obj-$(CONFIG_DRM_I915) += i915/
+obj-$(CONFIG_DRM_XE) += xe/
obj-$(CONFIG_DRM_KMB_DISPLAY) += kmb/
obj-$(CONFIG_DRM_MGAG200) += mgag200/
obj-$(CONFIG_DRM_V3D) += v3d/
obj-$(CONFIG_DRM_VC4) += vc4/
-obj-$(CONFIG_DRM_SIS) += sis/
-obj-$(CONFIG_DRM_SAVAGE)+= savage/
obj-$(CONFIG_DRM_VMWGFX)+= vmwgfx/
-obj-$(CONFIG_DRM_VIA) +=via/
obj-$(CONFIG_DRM_VGEM) += vgem/
obj-$(CONFIG_DRM_VKMS) += vkms/
obj-$(CONFIG_DRM_NOUVEAU) +=nouveau/
+obj-$(CONFIG_DRM_NOVA) += nova/
obj-$(CONFIG_DRM_EXYNOS) +=exynos/
obj-$(CONFIG_DRM_ROCKCHIP) +=rockchip/
obj-$(CONFIG_DRM_GMA500) += gma500/
@@ -92,8 +192,7 @@ obj-$(CONFIG_DRM_UDL) += udl/
obj-$(CONFIG_DRM_AST) += ast/
obj-$(CONFIG_DRM_ARMADA) += armada/
obj-$(CONFIG_DRM_ATMEL_HLCDC) += atmel-hlcdc/
-obj-y += rcar-du/
-obj-$(CONFIG_DRM_SHMOBILE) +=shmobile/
+obj-y += renesas/
obj-y += omapdrm/
obj-$(CONFIG_DRM_SUN4I) += sun4i/
obj-y += tilcdc/
@@ -105,25 +204,52 @@ obj-$(CONFIG_DRM_STM) += stm/
obj-$(CONFIG_DRM_STI) += sti/
obj-y += imx/
obj-$(CONFIG_DRM_INGENIC) += ingenic/
+obj-$(CONFIG_DRM_LOGICVC) += logicvc/
obj-$(CONFIG_DRM_MEDIATEK) += mediatek/
obj-$(CONFIG_DRM_MESON) += meson/
-obj-y += i2c/
obj-y += panel/
obj-y += bridge/
obj-$(CONFIG_DRM_FSL_DCU) += fsl-dcu/
obj-$(CONFIG_DRM_ETNAVIV) += etnaviv/
obj-y += hisilicon/
-obj-$(CONFIG_DRM_MXSFB) += mxsfb/
+obj-y += mxsfb/
+obj-y += sysfb/
obj-y += tiny/
obj-$(CONFIG_DRM_PL111) += pl111/
obj-$(CONFIG_DRM_TVE200) += tve200/
+obj-$(CONFIG_DRM_ADP) += adp/
obj-$(CONFIG_DRM_XEN) += xen/
obj-$(CONFIG_DRM_VBOXVIDEO) += vboxvideo/
obj-$(CONFIG_DRM_LIMA) += lima/
obj-$(CONFIG_DRM_PANFROST) += panfrost/
+obj-$(CONFIG_DRM_PANTHOR) += panthor/
+obj-$(CONFIG_DRM_TYR) += tyr/
obj-$(CONFIG_DRM_ASPEED_GFX) += aspeed/
obj-$(CONFIG_DRM_MCDE) += mcde/
obj-$(CONFIG_DRM_TIDSS) += tidss/
obj-y += xlnx/
obj-y += gud/
obj-$(CONFIG_DRM_HYPERV) += hyperv/
+obj-y += sitronix/
+obj-y += solomon/
+obj-$(CONFIG_DRM_SPRD) += sprd/
+obj-$(CONFIG_DRM_LOONGSON) += loongson/
+obj-$(CONFIG_DRM_POWERVR) += imagination/
+
+# Ensure drm headers are self-contained and pass kernel-doc
+hdrtest-files := \
+ $(shell cd $(src) && find . -maxdepth 1 -name 'drm_*.h') \
+ $(shell cd $(src) && find display lib -name '*.h')
+
+always-$(CONFIG_DRM_HEADER_TEST) += \
+ $(patsubst %.h,%.hdrtest, $(hdrtest-files))
+
+# Include the header twice to detect missing include guard.
+quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@)
+ cmd_hdrtest = \
+ $(CC) $(c_flags) -fsyntax-only -x c /dev/null -include $< -include $<; \
+ PYTHONDONTWRITEBYTECODE=1 $(PYTHON3) $(KERNELDOC) -none $(if $(CONFIG_WERROR)$(CONFIG_DRM_WERROR),-Werror) $<; \
+ touch $@
+
+$(obj)/%.hdrtest: $(src)/%.h FORCE
+ $(call if_changed_dep,hdrtest)
diff --git a/drivers/gpu/drm/adp/Kconfig b/drivers/gpu/drm/adp/Kconfig
new file mode 100644
index 000000000000..9fcc27eb200d
--- /dev/null
+++ b/drivers/gpu/drm/adp/Kconfig
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+config DRM_ADP
+ tristate "DRM Support for pre-DCP Apple display controllers"
+ depends on DRM && OF && ARM64
+ depends on ARCH_APPLE || COMPILE_TEST
+ select DRM_KMS_HELPER
+ select DRM_BRIDGE_CONNECTOR
+ select DRM_DISPLAY_HELPER
+ select DRM_KMS_DMA_HELPER
+ select DRM_GEM_DMA_HELPER
+ select DRM_PANEL_BRIDGE
+ select VIDEOMODE_HELPERS
+ select DRM_MIPI_DSI
+ help
+ Chose this option if you have an Apple Arm laptop with a touchbar.
+
+ If M is selected, this module will be called adpdrm.
diff --git a/drivers/gpu/drm/adp/Makefile b/drivers/gpu/drm/adp/Makefile
new file mode 100644
index 000000000000..8e7b618edd35
--- /dev/null
+++ b/drivers/gpu/drm/adp/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+adpdrm-y := adp_drv.o
+adpdrm-mipi-y := adp-mipi.o
+obj-$(CONFIG_DRM_ADP) += adpdrm.o adpdrm-mipi.o
diff --git a/drivers/gpu/drm/adp/adp-mipi.c b/drivers/gpu/drm/adp/adp-mipi.c
new file mode 100644
index 000000000000..cba7d32150a9
--- /dev/null
+++ b/drivers/gpu/drm/adp/adp-mipi.c
@@ -0,0 +1,277 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/component.h>
+#include <linux/iopoll.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#include <drm/drm_bridge.h>
+#include <drm/drm_mipi_dsi.h>
+
+#define DSI_GEN_HDR 0x6c
+#define DSI_GEN_PLD_DATA 0x70
+
+#define DSI_CMD_PKT_STATUS 0x74
+
+#define GEN_PLD_R_EMPTY BIT(4)
+#define GEN_PLD_W_FULL BIT(3)
+#define GEN_PLD_W_EMPTY BIT(2)
+#define GEN_CMD_FULL BIT(1)
+#define GEN_CMD_EMPTY BIT(0)
+#define GEN_RD_CMD_BUSY BIT(6)
+#define CMD_PKT_STATUS_TIMEOUT_US 20000
+
+struct adp_mipi_drv_private {
+ struct mipi_dsi_host dsi;
+ struct drm_bridge bridge;
+ struct drm_bridge *next_bridge;
+ void __iomem *mipi;
+};
+
+#define mipi_to_adp(x) container_of(x, struct adp_mipi_drv_private, dsi)
+
+static int adp_dsi_gen_pkt_hdr_write(struct adp_mipi_drv_private *adp, u32 hdr_val)
+{
+ int ret;
+ u32 val, mask;
+
+ ret = readl_poll_timeout(adp->mipi + DSI_CMD_PKT_STATUS,
+ val, !(val & GEN_CMD_FULL), 1000,
+ CMD_PKT_STATUS_TIMEOUT_US);
+ if (ret) {
+ dev_err(adp->dsi.dev, "failed to get available command FIFO\n");
+ return ret;
+ }
+
+ writel(hdr_val, adp->mipi + DSI_GEN_HDR);
+
+ mask = GEN_CMD_EMPTY | GEN_PLD_W_EMPTY;
+ ret = readl_poll_timeout(adp->mipi + DSI_CMD_PKT_STATUS,
+ val, (val & mask) == mask,
+ 1000, CMD_PKT_STATUS_TIMEOUT_US);
+ if (ret) {
+ dev_err(adp->dsi.dev, "failed to write command FIFO\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int adp_dsi_write(struct adp_mipi_drv_private *adp,
+ const struct mipi_dsi_packet *packet)
+{
+ const u8 *tx_buf = packet->payload;
+ int len = packet->payload_length, pld_data_bytes = sizeof(u32), ret;
+ __le32 word;
+ u32 val;
+
+ while (len) {
+ if (len < pld_data_bytes) {
+ word = 0;
+ memcpy(&word, tx_buf, len);
+ writel(le32_to_cpu(word), adp->mipi + DSI_GEN_PLD_DATA);
+ len = 0;
+ } else {
+ memcpy(&word, tx_buf, pld_data_bytes);
+ writel(le32_to_cpu(word), adp->mipi + DSI_GEN_PLD_DATA);
+ tx_buf += pld_data_bytes;
+ len -= pld_data_bytes;
+ }
+
+ ret = readl_poll_timeout(adp->mipi + DSI_CMD_PKT_STATUS,
+ val, !(val & GEN_PLD_W_FULL), 1000,
+ CMD_PKT_STATUS_TIMEOUT_US);
+ if (ret) {
+ dev_err(adp->dsi.dev,
+ "failed to get available write payload FIFO\n");
+ return ret;
+ }
+ }
+
+ word = 0;
+ memcpy(&word, packet->header, sizeof(packet->header));
+ return adp_dsi_gen_pkt_hdr_write(adp, le32_to_cpu(word));
+}
+
+static int adp_dsi_read(struct adp_mipi_drv_private *adp,
+ const struct mipi_dsi_msg *msg)
+{
+ int i, j, ret, len = msg->rx_len;
+ u8 *buf = msg->rx_buf;
+ u32 val;
+
+ /* Wait end of the read operation */
+ ret = readl_poll_timeout(adp->mipi + DSI_CMD_PKT_STATUS,
+ val, !(val & GEN_RD_CMD_BUSY),
+ 1000, CMD_PKT_STATUS_TIMEOUT_US);
+ if (ret) {
+ dev_err(adp->dsi.dev, "Timeout during read operation\n");
+ return ret;
+ }
+
+ for (i = 0; i < len; i += 4) {
+ /* Read fifo must not be empty before all bytes are read */
+ ret = readl_poll_timeout(adp->mipi + DSI_CMD_PKT_STATUS,
+ val, !(val & GEN_PLD_R_EMPTY),
+ 1000, CMD_PKT_STATUS_TIMEOUT_US);
+ if (ret) {
+ dev_err(adp->dsi.dev, "Read payload FIFO is empty\n");
+ return ret;
+ }
+
+ val = readl(adp->mipi + DSI_GEN_PLD_DATA);
+ for (j = 0; j < 4 && j + i < len; j++)
+ buf[i + j] = val >> (8 * j);
+ }
+
+ return ret;
+}
+
+static ssize_t adp_dsi_host_transfer(struct mipi_dsi_host *host,
+ const struct mipi_dsi_msg *msg)
+{
+ struct adp_mipi_drv_private *adp = mipi_to_adp(host);
+ struct mipi_dsi_packet packet;
+ int ret, nb_bytes;
+
+ ret = mipi_dsi_create_packet(&packet, msg);
+ if (ret) {
+ dev_err(adp->dsi.dev, "failed to create packet: %d\n", ret);
+ return ret;
+ }
+
+ ret = adp_dsi_write(adp, &packet);
+ if (ret)
+ return ret;
+
+ if (msg->rx_buf && msg->rx_len) {
+ ret = adp_dsi_read(adp, msg);
+ if (ret)
+ return ret;
+ nb_bytes = msg->rx_len;
+ } else {
+ nb_bytes = packet.size;
+ }
+
+ return nb_bytes;
+}
+
+static int adp_dsi_bind(struct device *dev, struct device *master, void *data)
+{
+ return 0;
+}
+
+static void adp_dsi_unbind(struct device *dev, struct device *master, void *data)
+{
+}
+
+static const struct component_ops adp_dsi_component_ops = {
+ .bind = adp_dsi_bind,
+ .unbind = adp_dsi_unbind,
+};
+
+static int adp_dsi_host_attach(struct mipi_dsi_host *host,
+ struct mipi_dsi_device *dev)
+{
+ struct adp_mipi_drv_private *adp = mipi_to_adp(host);
+ struct drm_bridge *next;
+ int ret;
+
+ next = devm_drm_of_get_bridge(adp->dsi.dev, adp->dsi.dev->of_node, 1, 0);
+ if (IS_ERR(next))
+ return PTR_ERR(next);
+
+ adp->next_bridge = next;
+
+ drm_bridge_add(&adp->bridge);
+
+ ret = component_add(host->dev, &adp_dsi_component_ops);
+ if (ret) {
+ pr_err("failed to add dsi_host component: %d\n", ret);
+ drm_bridge_remove(&adp->bridge);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int adp_dsi_host_detach(struct mipi_dsi_host *host,
+ struct mipi_dsi_device *dev)
+{
+ struct adp_mipi_drv_private *adp = mipi_to_adp(host);
+
+ component_del(host->dev, &adp_dsi_component_ops);
+ drm_bridge_remove(&adp->bridge);
+ return 0;
+}
+
+static const struct mipi_dsi_host_ops adp_dsi_host_ops = {
+ .transfer = adp_dsi_host_transfer,
+ .attach = adp_dsi_host_attach,
+ .detach = adp_dsi_host_detach,
+};
+
+static int adp_dsi_bridge_attach(struct drm_bridge *bridge,
+ struct drm_encoder *encoder,
+ enum drm_bridge_attach_flags flags)
+{
+ struct adp_mipi_drv_private *adp =
+ container_of(bridge, struct adp_mipi_drv_private, bridge);
+
+ return drm_bridge_attach(encoder, adp->next_bridge, bridge, flags);
+}
+
+static const struct drm_bridge_funcs adp_dsi_bridge_funcs = {
+ .attach = adp_dsi_bridge_attach,
+};
+
+static int adp_mipi_probe(struct platform_device *pdev)
+{
+ struct adp_mipi_drv_private *adp;
+
+ adp = devm_drm_bridge_alloc(&pdev->dev, struct adp_mipi_drv_private,
+ bridge, &adp_dsi_bridge_funcs);
+ if (IS_ERR(adp))
+ return PTR_ERR(adp);
+
+ adp->mipi = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(adp->mipi)) {
+ dev_err(&pdev->dev, "failed to map mipi mmio");
+ return PTR_ERR(adp->mipi);
+ }
+
+ adp->dsi.dev = &pdev->dev;
+ adp->dsi.ops = &adp_dsi_host_ops;
+ adp->bridge.of_node = pdev->dev.of_node;
+ adp->bridge.type = DRM_MODE_CONNECTOR_DSI;
+ dev_set_drvdata(&pdev->dev, adp);
+ return mipi_dsi_host_register(&adp->dsi);
+}
+
+static void adp_mipi_remove(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct adp_mipi_drv_private *adp = dev_get_drvdata(dev);
+
+ mipi_dsi_host_unregister(&adp->dsi);
+}
+
+static const struct of_device_id adp_mipi_of_match[] = {
+ { .compatible = "apple,h7-display-pipe-mipi", },
+ { },
+};
+MODULE_DEVICE_TABLE(of, adp_mipi_of_match);
+
+static struct platform_driver adp_mipi_platform_driver = {
+ .driver = {
+ .name = "adp-mipi",
+ .of_match_table = adp_mipi_of_match,
+ },
+ .probe = adp_mipi_probe,
+ .remove = adp_mipi_remove,
+};
+
+module_platform_driver(adp_mipi_platform_driver);
+
+MODULE_DESCRIPTION("Apple Display Pipe MIPI driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/adp/adp_drv.c b/drivers/gpu/drm/adp/adp_drv.c
new file mode 100644
index 000000000000..4554cf75565e
--- /dev/null
+++ b/drivers/gpu/drm/adp/adp_drv.c
@@ -0,0 +1,614 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/component.h>
+#include <linux/iopoll.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
+#include <drm/drm_bridge_connector.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_fb_dma_helper.h>
+#include <drm/drm_framebuffer.h>
+#include <drm/drm_gem_atomic_helper.h>
+#include <drm/drm_gem_dma_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_of.h>
+#include <drm/drm_print.h>
+#include <drm/drm_probe_helper.h>
+#include <drm/drm_vblank.h>
+
+#define ADP_INT_STATUS 0x34
+#define ADP_INT_STATUS_INT_MASK 0x7
+#define ADP_INT_STATUS_VBLANK 0x1
+#define ADP_CTRL 0x100
+#define ADP_CTRL_VBLANK_ON 0x12
+#define ADP_CTRL_FIFO_ON 0x601
+#define ADP_SCREEN_SIZE 0x0c
+#define ADP_SCREEN_HSIZE GENMASK(15, 0)
+#define ADP_SCREEN_VSIZE GENMASK(31, 16)
+
+#define ADBE_FIFO 0x10c0
+#define ADBE_FIFO_SYNC 0xc0000000
+
+#define ADBE_BLEND_BYPASS 0x2020
+#define ADBE_BLEND_EN1 0x2028
+#define ADBE_BLEND_EN2 0x2074
+#define ADBE_BLEND_EN3 0x202c
+#define ADBE_BLEND_EN4 0x2034
+#define ADBE_MASK_BUF 0x2200
+
+#define ADBE_SRC_START 0x4040
+#define ADBE_SRC_SIZE 0x4048
+#define ADBE_DST_START 0x4050
+#define ADBE_DST_SIZE 0x4054
+#define ADBE_STRIDE 0x4038
+#define ADBE_FB_BASE 0x4030
+
+#define ADBE_LAYER_EN1 0x4020
+#define ADBE_LAYER_EN2 0x4068
+#define ADBE_LAYER_EN3 0x40b4
+#define ADBE_LAYER_EN4 0x40f4
+#define ADBE_SCALE_CTL 0x40ac
+#define ADBE_SCALE_CTL_BYPASS 0x100000
+
+#define ADBE_LAYER_CTL 0x1038
+#define ADBE_LAYER_CTL_ENABLE 0x10000
+
+#define ADBE_PIX_FMT 0x402c
+#define ADBE_PIX_FMT_XRGB32 0x53e4001
+
+static int adp_open(struct inode *inode, struct file *filp)
+{
+ /*
+ * The modesetting driver does not check the non-desktop connector
+ * property and keeps the device open and locked. If the touchbar daemon
+ * opens the device first, modesetting breaks the whole X session.
+ * Simply refuse to open the device for X11 server processes as
+ * workaround.
+ */
+ if (current->comm[0] == 'X')
+ return -EBUSY;
+
+ return drm_open(inode, filp);
+}
+
+static const struct file_operations adp_fops = {
+ .owner = THIS_MODULE,
+ .open = adp_open,
+ .release = drm_release,
+ .unlocked_ioctl = drm_ioctl,
+ .compat_ioctl = drm_compat_ioctl,
+ .poll = drm_poll,
+ .read = drm_read,
+ .llseek = noop_llseek,
+ .mmap = drm_gem_mmap,
+ .fop_flags = FOP_UNSIGNED_OFFSET,
+ DRM_GEM_DMA_UNMAPPED_AREA_FOPS
+};
+
+static int adp_drm_gem_dumb_create(struct drm_file *file_priv,
+ struct drm_device *drm,
+ struct drm_mode_create_dumb *args)
+{
+ args->height = ALIGN(args->height, 64);
+ args->size = args->pitch * args->height;
+
+ return drm_gem_dma_dumb_create_internal(file_priv, drm, args);
+}
+
+static const struct drm_driver adp_driver = {
+ .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC,
+ .fops = &adp_fops,
+ DRM_GEM_DMA_DRIVER_OPS_VMAP_WITH_DUMB_CREATE(adp_drm_gem_dumb_create),
+ .name = "adp",
+ .desc = "Apple Display Pipe DRM Driver",
+ .major = 0,
+ .minor = 1,
+};
+
+struct adp_drv_private {
+ struct drm_device drm;
+ struct drm_crtc crtc;
+ struct drm_encoder *encoder;
+ struct drm_connector *connector;
+ struct drm_bridge *next_bridge;
+ void __iomem *be;
+ void __iomem *fe;
+ u32 *mask_buf;
+ u64 mask_buf_size;
+ dma_addr_t mask_iova;
+ int be_irq;
+ int fe_irq;
+ struct drm_pending_vblank_event *event;
+};
+
+#define to_adp(x) container_of(x, struct adp_drv_private, drm)
+#define crtc_to_adp(x) container_of(x, struct adp_drv_private, crtc)
+
+static int adp_plane_atomic_check(struct drm_plane *plane,
+ struct drm_atomic_state *state)
+{
+ struct drm_plane_state *new_plane_state;
+ struct drm_crtc_state *crtc_state;
+
+ new_plane_state = drm_atomic_get_new_plane_state(state, plane);
+
+ if (!new_plane_state->crtc)
+ return 0;
+
+ crtc_state = drm_atomic_get_crtc_state(state, new_plane_state->crtc);
+ if (IS_ERR(crtc_state))
+ return PTR_ERR(crtc_state);
+
+ return drm_atomic_helper_check_plane_state(new_plane_state,
+ crtc_state,
+ DRM_PLANE_NO_SCALING,
+ DRM_PLANE_NO_SCALING,
+ true, true);
+}
+
+static void adp_plane_atomic_update(struct drm_plane *plane,
+ struct drm_atomic_state *state)
+{
+ struct adp_drv_private *adp;
+ struct drm_rect src_rect;
+ struct drm_gem_dma_object *obj;
+ struct drm_framebuffer *fb;
+ struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane);
+ u32 src_pos, src_size, dst_pos, dst_size;
+
+ if (!plane || !new_state)
+ return;
+
+ fb = new_state->fb;
+ if (!fb)
+ return;
+ adp = to_adp(plane->dev);
+
+ drm_rect_fp_to_int(&src_rect, &new_state->src);
+ src_pos = src_rect.x1 << 16 | src_rect.y1;
+ dst_pos = new_state->dst.x1 << 16 | new_state->dst.y1;
+ src_size = drm_rect_width(&src_rect) << 16 | drm_rect_height(&src_rect);
+ dst_size = drm_rect_width(&new_state->dst) << 16 |
+ drm_rect_height(&new_state->dst);
+ writel(src_pos, adp->be + ADBE_SRC_START);
+ writel(src_size, adp->be + ADBE_SRC_SIZE);
+ writel(dst_pos, adp->be + ADBE_DST_START);
+ writel(dst_size, adp->be + ADBE_DST_SIZE);
+ writel(fb->pitches[0], adp->be + ADBE_STRIDE);
+ obj = drm_fb_dma_get_gem_obj(fb, 0);
+ if (obj)
+ writel(obj->dma_addr + fb->offsets[0], adp->be + ADBE_FB_BASE);
+
+ writel(BIT(0), adp->be + ADBE_LAYER_EN1);
+ writel(BIT(0), adp->be + ADBE_LAYER_EN2);
+ writel(BIT(0), adp->be + ADBE_LAYER_EN3);
+ writel(BIT(0), adp->be + ADBE_LAYER_EN4);
+ writel(ADBE_SCALE_CTL_BYPASS, adp->be + ADBE_SCALE_CTL);
+ writel(ADBE_LAYER_CTL_ENABLE | BIT(0), adp->be + ADBE_LAYER_CTL);
+ writel(ADBE_PIX_FMT_XRGB32, adp->be + ADBE_PIX_FMT);
+}
+
+static void adp_plane_atomic_disable(struct drm_plane *plane,
+ struct drm_atomic_state *state)
+{
+ struct adp_drv_private *adp = to_adp(plane->dev);
+
+ writel(0x0, adp->be + ADBE_LAYER_EN1);
+ writel(0x0, adp->be + ADBE_LAYER_EN2);
+ writel(0x0, adp->be + ADBE_LAYER_EN3);
+ writel(0x0, adp->be + ADBE_LAYER_EN4);
+ writel(ADBE_LAYER_CTL_ENABLE, adp->be + ADBE_LAYER_CTL);
+}
+
+static const struct drm_plane_helper_funcs adp_plane_helper_funcs = {
+ .atomic_check = adp_plane_atomic_check,
+ .atomic_update = adp_plane_atomic_update,
+ .atomic_disable = adp_plane_atomic_disable,
+ DRM_GEM_SHADOW_PLANE_HELPER_FUNCS
+};
+
+static const struct drm_plane_funcs adp_plane_funcs = {
+ .update_plane = drm_atomic_helper_update_plane,
+ .disable_plane = drm_atomic_helper_disable_plane,
+ DRM_GEM_SHADOW_PLANE_FUNCS
+};
+
+static const u32 plane_formats[] = {
+ DRM_FORMAT_XRGB8888,
+};
+
+#define ALL_CRTCS 1
+
+static struct drm_plane *adp_plane_new(struct adp_drv_private *adp)
+{
+ struct drm_device *drm = &adp->drm;
+ struct drm_plane *plane;
+
+ plane = __drmm_universal_plane_alloc(drm, sizeof(struct drm_plane), 0,
+ ALL_CRTCS, &adp_plane_funcs,
+ plane_formats, ARRAY_SIZE(plane_formats),
+ NULL, DRM_PLANE_TYPE_PRIMARY, "plane");
+ if (IS_ERR(plane)) {
+ drm_err(drm, "failed to allocate plane");
+ return plane;
+ }
+
+ drm_plane_helper_add(plane, &adp_plane_helper_funcs);
+ return plane;
+}
+
+static void adp_enable_vblank(struct adp_drv_private *adp)
+{
+ u32 cur_ctrl;
+
+ writel(ADP_INT_STATUS_INT_MASK, adp->fe + ADP_INT_STATUS);
+
+ cur_ctrl = readl(adp->fe + ADP_CTRL);
+ writel(cur_ctrl | ADP_CTRL_VBLANK_ON, adp->fe + ADP_CTRL);
+}
+
+static int adp_crtc_enable_vblank(struct drm_crtc *crtc)
+{
+ struct drm_device *dev = crtc->dev;
+ struct adp_drv_private *adp = to_adp(dev);
+
+ adp_enable_vblank(adp);
+
+ return 0;
+}
+
+static void adp_disable_vblank(struct adp_drv_private *adp)
+{
+ u32 cur_ctrl;
+
+ cur_ctrl = readl(adp->fe + ADP_CTRL);
+ writel(cur_ctrl & ~ADP_CTRL_VBLANK_ON, adp->fe + ADP_CTRL);
+ writel(ADP_INT_STATUS_INT_MASK, adp->fe + ADP_INT_STATUS);
+}
+
+static void adp_crtc_disable_vblank(struct drm_crtc *crtc)
+{
+ struct drm_device *dev = crtc->dev;
+ struct adp_drv_private *adp = to_adp(dev);
+
+ adp_disable_vblank(adp);
+}
+
+static void adp_crtc_atomic_enable(struct drm_crtc *crtc,
+ struct drm_atomic_state *state)
+{
+ struct adp_drv_private *adp = crtc_to_adp(crtc);
+
+ writel(BIT(0), adp->be + ADBE_BLEND_EN2);
+ writel(BIT(4), adp->be + ADBE_BLEND_EN1);
+ writel(BIT(0), adp->be + ADBE_BLEND_EN3);
+ writel(BIT(0), adp->be + ADBE_BLEND_BYPASS);
+ writel(BIT(0), adp->be + ADBE_BLEND_EN4);
+ drm_crtc_vblank_on(crtc);
+}
+
+static void adp_crtc_atomic_disable(struct drm_crtc *crtc,
+ struct drm_atomic_state *state)
+{
+ struct adp_drv_private *adp = crtc_to_adp(crtc);
+ struct drm_crtc_state *old_state = drm_atomic_get_old_crtc_state(state, crtc);
+
+ drm_atomic_helper_disable_planes_on_crtc(old_state, false);
+
+ writel(0x0, adp->be + ADBE_BLEND_EN2);
+ writel(0x0, adp->be + ADBE_BLEND_EN1);
+ writel(0x0, adp->be + ADBE_BLEND_EN3);
+ writel(0x0, adp->be + ADBE_BLEND_BYPASS);
+ writel(0x0, adp->be + ADBE_BLEND_EN4);
+ drm_crtc_vblank_off(crtc);
+}
+
+static void adp_crtc_atomic_flush(struct drm_crtc *crtc,
+ struct drm_atomic_state *state)
+{
+ u32 frame_num = 1;
+ unsigned long flags;
+ struct adp_drv_private *adp = crtc_to_adp(crtc);
+ struct drm_crtc_state *new_state = drm_atomic_get_new_crtc_state(state, crtc);
+ u64 new_size = ALIGN(new_state->mode.hdisplay *
+ new_state->mode.vdisplay * 4, PAGE_SIZE);
+
+ if (new_size != adp->mask_buf_size) {
+ if (adp->mask_buf)
+ dma_free_coherent(crtc->dev->dev, adp->mask_buf_size,
+ adp->mask_buf, adp->mask_iova);
+ adp->mask_buf = NULL;
+ if (new_size != 0) {
+ adp->mask_buf = dma_alloc_coherent(crtc->dev->dev, new_size,
+ &adp->mask_iova, GFP_KERNEL);
+ memset(adp->mask_buf, 0xFF, new_size);
+ writel(adp->mask_iova, adp->be + ADBE_MASK_BUF);
+ }
+ adp->mask_buf_size = new_size;
+ }
+ writel(ADBE_FIFO_SYNC | frame_num, adp->be + ADBE_FIFO);
+ //FIXME: use adbe flush interrupt
+ if (crtc->state->event) {
+ struct drm_pending_vblank_event *event = crtc->state->event;
+
+ crtc->state->event = NULL;
+ spin_lock_irqsave(&crtc->dev->event_lock, flags);
+
+ if (drm_crtc_vblank_get(crtc) != 0)
+ drm_crtc_send_vblank_event(crtc, event);
+ else
+ adp->event = event;
+
+ spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+ }
+}
+
+static const struct drm_crtc_funcs adp_crtc_funcs = {
+ .destroy = drm_crtc_cleanup,
+ .set_config = drm_atomic_helper_set_config,
+ .page_flip = drm_atomic_helper_page_flip,
+ .reset = drm_atomic_helper_crtc_reset,
+ .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state,
+ .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state,
+ .enable_vblank = adp_crtc_enable_vblank,
+ .disable_vblank = adp_crtc_disable_vblank,
+};
+
+
+static const struct drm_crtc_helper_funcs adp_crtc_helper_funcs = {
+ .atomic_enable = adp_crtc_atomic_enable,
+ .atomic_disable = adp_crtc_atomic_disable,
+ .atomic_flush = adp_crtc_atomic_flush,
+};
+
+static int adp_setup_crtc(struct adp_drv_private *adp)
+{
+ struct drm_device *drm = &adp->drm;
+ struct drm_plane *primary;
+ int ret;
+
+ primary = adp_plane_new(adp);
+ if (IS_ERR(primary))
+ return PTR_ERR(primary);
+
+ ret = drm_crtc_init_with_planes(drm, &adp->crtc, primary,
+ NULL, &adp_crtc_funcs, NULL);
+ if (ret)
+ return ret;
+
+ drm_crtc_helper_add(&adp->crtc, &adp_crtc_helper_funcs);
+ return 0;
+}
+
+static const struct drm_mode_config_funcs adp_mode_config_funcs = {
+ .fb_create = drm_gem_fb_create_with_dirty,
+ .atomic_check = drm_atomic_helper_check,
+ .atomic_commit = drm_atomic_helper_commit,
+};
+
+static int adp_setup_mode_config(struct adp_drv_private *adp)
+{
+ struct drm_device *drm = &adp->drm;
+ int ret;
+ u32 size;
+
+ ret = drmm_mode_config_init(drm);
+ if (ret)
+ return ret;
+
+ /*
+ * Query screen size restrict the frame buffer size to the screen size
+ * aligned to the next multiple of 64. This is not necessary but can be
+ * used as simple check for non-desktop devices.
+ * Xorg's modesetting driver does not care about the connector
+ * "non-desktop" property. The max frame buffer width or height can be
+ * easily checked and a device can be reject if the max width/height is
+ * smaller than 120 for example.
+ * Any touchbar daemon is not limited by this small framebuffer size.
+ */
+ size = readl(adp->fe + ADP_SCREEN_SIZE);
+
+ drm->mode_config.min_width = 32;
+ drm->mode_config.min_height = 32;
+ drm->mode_config.max_width = ALIGN(FIELD_GET(ADP_SCREEN_HSIZE, size), 64);
+ drm->mode_config.max_height = ALIGN(FIELD_GET(ADP_SCREEN_VSIZE, size), 64);
+ drm->mode_config.preferred_depth = 24;
+ drm->mode_config.prefer_shadow = 0;
+ drm->mode_config.funcs = &adp_mode_config_funcs;
+
+ ret = adp_setup_crtc(adp);
+ if (ret) {
+ drm_err(drm, "failed to create crtc");
+ return ret;
+ }
+
+ adp->encoder = drmm_plain_encoder_alloc(drm, NULL, DRM_MODE_ENCODER_DSI, NULL);
+ if (IS_ERR(adp->encoder)) {
+ drm_err(drm, "failed to init encoder");
+ return PTR_ERR(adp->encoder);
+ }
+ adp->encoder->possible_crtcs = ALL_CRTCS;
+
+ ret = drm_bridge_attach(adp->encoder, adp->next_bridge, NULL,
+ DRM_BRIDGE_ATTACH_NO_CONNECTOR);
+ if (ret) {
+ drm_err(drm, "failed to init bridge chain");
+ return ret;
+ }
+
+ adp->connector = drm_bridge_connector_init(drm, adp->encoder);
+ if (IS_ERR(adp->connector))
+ return PTR_ERR(adp->connector);
+
+ drm_connector_attach_encoder(adp->connector, adp->encoder);
+
+ ret = drm_vblank_init(drm, drm->mode_config.num_crtc);
+ if (ret < 0) {
+ drm_err(drm, "failed to initialize vblank");
+ return ret;
+ }
+
+ drm_mode_config_reset(drm);
+
+ return 0;
+}
+
+static int adp_parse_of(struct platform_device *pdev, struct adp_drv_private *adp)
+{
+ struct device *dev = &pdev->dev;
+
+ adp->be = devm_platform_ioremap_resource_byname(pdev, "be");
+ if (IS_ERR(adp->be)) {
+ dev_err(dev, "failed to map display backend mmio");
+ return PTR_ERR(adp->be);
+ }
+
+ adp->fe = devm_platform_ioremap_resource_byname(pdev, "fe");
+ if (IS_ERR(adp->fe)) {
+ dev_err(dev, "failed to map display pipe mmio");
+ return PTR_ERR(adp->fe);
+ }
+
+ adp->be_irq = platform_get_irq_byname(pdev, "be");
+ if (adp->be_irq < 0)
+ return adp->be_irq;
+
+ adp->fe_irq = platform_get_irq_byname(pdev, "fe");
+ if (adp->fe_irq < 0)
+ return adp->fe_irq;
+
+ return 0;
+}
+
+static irqreturn_t adp_fe_irq(int irq, void *arg)
+{
+ struct adp_drv_private *adp = (struct adp_drv_private *)arg;
+ u32 int_status;
+ u32 int_ctl;
+
+ int_status = readl(adp->fe + ADP_INT_STATUS);
+ if (int_status & ADP_INT_STATUS_VBLANK) {
+ drm_crtc_handle_vblank(&adp->crtc);
+ spin_lock(&adp->crtc.dev->event_lock);
+ if (adp->event) {
+ int_ctl = readl(adp->fe + ADP_CTRL);
+ if ((int_ctl & 0xF00) == 0x600) {
+ drm_crtc_send_vblank_event(&adp->crtc, adp->event);
+ adp->event = NULL;
+ drm_crtc_vblank_put(&adp->crtc);
+ }
+ }
+ spin_unlock(&adp->crtc.dev->event_lock);
+ }
+
+ writel(int_status, adp->fe + ADP_INT_STATUS);
+
+
+ return IRQ_HANDLED;
+}
+
+static int adp_drm_bind(struct device *dev)
+{
+ struct drm_device *drm = dev_get_drvdata(dev);
+ struct adp_drv_private *adp = to_adp(drm);
+ int err;
+
+ writel(ADP_CTRL_FIFO_ON, adp->fe + ADP_CTRL);
+
+ adp->next_bridge = drmm_of_get_bridge(&adp->drm, dev->of_node, 0, 0);
+ if (IS_ERR(adp->next_bridge)) {
+ dev_err(dev, "failed to find next bridge");
+ return PTR_ERR(adp->next_bridge);
+ }
+
+ err = adp_setup_mode_config(adp);
+ if (err < 0)
+ return err;
+
+ err = request_irq(adp->fe_irq, adp_fe_irq, 0, "adp-fe", adp);
+ if (err)
+ return err;
+
+ err = drm_dev_register(&adp->drm, 0);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static void adp_drm_unbind(struct device *dev)
+{
+ struct drm_device *drm = dev_get_drvdata(dev);
+ struct adp_drv_private *adp = to_adp(drm);
+
+ drm_dev_unregister(drm);
+ drm_atomic_helper_shutdown(drm);
+ free_irq(adp->fe_irq, adp);
+}
+
+static const struct component_master_ops adp_master_ops = {
+ .bind = adp_drm_bind,
+ .unbind = adp_drm_unbind,
+};
+
+static int compare_dev(struct device *dev, void *data)
+{
+ return dev->of_node == data;
+}
+
+static int adp_probe(struct platform_device *pdev)
+{
+ struct device_node *port;
+ struct component_match *match = NULL;
+ struct adp_drv_private *adp;
+ int err;
+
+ adp = devm_drm_dev_alloc(&pdev->dev, &adp_driver, struct adp_drv_private, drm);
+ if (IS_ERR(adp))
+ return PTR_ERR(adp);
+
+ dev_set_drvdata(&pdev->dev, &adp->drm);
+
+ err = adp_parse_of(pdev, adp);
+ if (err < 0)
+ return err;
+
+ port = of_graph_get_remote_node(pdev->dev.of_node, 0, 0);
+ if (!port)
+ return -ENODEV;
+
+ drm_of_component_match_add(&pdev->dev, &match, compare_dev, port);
+ of_node_put(port);
+
+ return component_master_add_with_match(&pdev->dev, &adp_master_ops, match);
+}
+
+static void adp_remove(struct platform_device *pdev)
+{
+ component_master_del(&pdev->dev, &adp_master_ops);
+ dev_set_drvdata(&pdev->dev, NULL);
+}
+
+static const struct of_device_id adp_of_match[] = {
+ { .compatible = "apple,h7-display-pipe", },
+ { },
+};
+MODULE_DEVICE_TABLE(of, adp_of_match);
+
+static struct platform_driver adp_platform_driver = {
+ .driver = {
+ .name = "adp",
+ .of_match_table = adp_of_match,
+ },
+ .probe = adp_probe,
+ .remove = adp_remove,
+};
+
+module_platform_driver(adp_platform_driver);
+
+MODULE_DESCRIPTION("Apple Display Pipe DRM driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h b/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h
index feab8eb7f2a8..b26710cae801 100644
--- a/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h
+++ b/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h
@@ -19,7 +19,7 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
-*/
+ */
#ifndef _ACP_GFX_IF_H
#define _ACP_GFX_IF_H
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 74a8105fd2c0..7f515be5185d 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -1,39 +1,108 @@
# SPDX-License-Identifier: MIT
+
+config DRM_AMDGPU
+ tristate "AMD GPU"
+ depends on DRM && PCI
+ depends on !UML
+ select FW_LOADER
+ select DRM_CLIENT
+ select DRM_CLIENT_SELECTION
+ select DRM_DISPLAY_DP_HELPER
+ select DRM_DISPLAY_DSC_HELPER
+ select DRM_DISPLAY_HDMI_HELPER
+ select DRM_DISPLAY_HDCP_HELPER
+ select DRM_DISPLAY_HELPER
+ select DRM_KMS_HELPER
+ select DRM_SCHED
+ select DRM_TTM
+ select DRM_TTM_HELPER
+ select POWER_SUPPLY
+ select HWMON
+ select I2C
+ select I2C_ALGOBIT
+ select CRC16
+ select BACKLIGHT_CLASS_DEVICE
+ select INTERVAL_TREE
+ select DRM_BUDDY
+ select DRM_SUBALLOC_HELPER
+ select DRM_EXEC
+ select DRM_PANEL_BACKLIGHT_QUIRKS
+ # amdgpu depends on ACPI_VIDEO when ACPI is enabled, for select to work
+ # ACPI_VIDEO's dependencies must also be selected.
+ select INPUT if ACPI
+ select ACPI_VIDEO if ACPI
+ # On x86 ACPI_VIDEO also needs ACPI_WMI
+ select X86_PLATFORM_DEVICES if ACPI && X86
+ select ACPI_WMI if ACPI && X86
+ help
+ Choose this option if you have a recent AMD Radeon graphics card.
+
+ If M is selected, the module will be called amdgpu.
+
config DRM_AMDGPU_SI
bool "Enable amdgpu support for SI parts"
depends on DRM_AMDGPU
help
- Choose this option if you want to enable experimental support
- for SI asics.
+ Choose this option if you want to enable support
+ for SI (Southern Islands) asics.
- SI is already supported in radeon. Experimental support for SI
- in amdgpu will be disabled by default and is still provided by
- radeon. Use module options to override this:
+ SI (Southern Islands) are first generation GCN GPUs,
+ supported by both drivers: radeon (old) and amdgpu (new).
+ By default, SI dedicated GPUs are supported by amdgpu.
- radeon.si_support=0 amdgpu.si_support=1
+ Use module options to override this:
+ To use radeon for SI,
+ radeon.si_support=1 amdgpu.si_support=0
config DRM_AMDGPU_CIK
bool "Enable amdgpu support for CIK parts"
depends on DRM_AMDGPU
help
- Choose this option if you want to enable support for CIK asics.
+ Choose this option if you want to enable support for CIK (Sea
+ Islands) asics.
- CIK is already supported in radeon. Support for CIK in amdgpu
- will be disabled by default and is still provided by radeon.
- Use module options to override this:
+ CIK (Sea Islands) are second generation GCN GPUs,
+ supported by both drivers: radeon (old) and amdgpu (new).
+ By default,
+ CIK dedicated GPUs are supported by amdgpu
+ CIK APUs are supported by radeon
+ Use module options to override this:
+ To use amdgpu for CIK,
radeon.cik_support=0 amdgpu.cik_support=1
+ To use radeon for CIK,
+ radeon.cik_support=1 amdgpu.cik_support=0
config DRM_AMDGPU_USERPTR
bool "Always enable userptr write support"
depends on DRM_AMDGPU
- depends on MMU
select HMM_MIRROR
select MMU_NOTIFIER
help
This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
isn't already selected to enabled full userptr support.
+config DRM_AMD_ISP
+ bool "Enable AMD Image Signal Processor IP support"
+ depends on DRM_AMDGPU && ACPI
+ select MFD_CORE
+ select PM_GENERIC_DOMAINS if PM
+ help
+ Choose this option to enable ISP IP support for AMD SOCs.
+ This adds the ISP (Image Signal Processor) IP driver and wires
+ it up into the amdgpu driver. It is required for cameras
+ on APUs which utilize mipi cameras.
+
+config DRM_AMDGPU_WERROR
+ bool "Force the compiler to throw an error instead of a warning when compiling"
+ depends on DRM_AMDGPU
+ depends on EXPERT
+ depends on !COMPILE_TEST
+ default n
+ help
+ Add -Werror to the build flags for amdgpu.ko.
+ Only enable this if you are warning code for amdgpu.ko.
+
source "drivers/gpu/drm/amd/acp/Kconfig"
source "drivers/gpu/drm/amd/display/Kconfig"
source "drivers/gpu/drm/amd/amdkfd/Kconfig"
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 653726588956..c88760fb52ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -1,5 +1,5 @@
#
-# Copyright 2017 Advanced Micro Devices, Inc.
+# Copyright 2017-2024 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
@@ -23,7 +23,7 @@
# Makefile for the drm device driver. This driver provides support for the
# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
-FULL_AMD_PATH=$(srctree)/$(src)/..
+FULL_AMD_PATH=$(src)/..
DISPLAY_FOLDER_NAME=display
FULL_AMD_DISPLAY_PATH = $(FULL_AMD_PATH)/$(DISPLAY_FOLDER_NAME)
@@ -34,31 +34,40 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
-I$(FULL_AMD_PATH)/acp/include \
-I$(FULL_AMD_DISPLAY_PATH) \
-I$(FULL_AMD_DISPLAY_PATH)/include \
+ -I$(FULL_AMD_DISPLAY_PATH)/modules/inc \
-I$(FULL_AMD_DISPLAY_PATH)/dc \
-I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \
- -I$(FULL_AMD_PATH)/amdkfd
+ -I$(FULL_AMD_PATH)/amdkfd \
+ -I$(FULL_AMD_PATH)/ras/ras_mgr
+
+# Locally disable W=1 warnings enabled in drm subsystem Makefile
+subdir-ccflags-y += -Wno-override-init
+subdir-ccflags-$(CONFIG_DRM_AMDGPU_WERROR) += -Werror
amdgpu-y := amdgpu_drv.o
# add KMS driver
-amdgpu-y += amdgpu_device.o amdgpu_kms.o \
+amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
amdgpu_atombios.o atombios_crtc.o amdgpu_connectors.o \
atom.o amdgpu_fence.o amdgpu_ttm.o amdgpu_object.o amdgpu_gart.o \
amdgpu_encoders.o amdgpu_display.o amdgpu_i2c.o \
- amdgpu_fb.o amdgpu_gem.o amdgpu_ring.o \
- amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \
+ amdgpu_gem.o amdgpu_ring.o \
+ amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \
atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
- amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
+ amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_vm_tlb_fence.o \
+ amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \
amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
- amdgpu_debugfs.o amdgpu_ids.o amdgpu_gmc.o amdgpu_mmhub.o \
+ amdgpu_debugfs.o amdgpu_ids.o amdgpu_gmc.o amdgpu_mmhub.o amdgpu_hdp.o \
amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
- amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_hdp.o \
- amdgpu_eeprom.o amdgpu_mca.o
+ amdgpu_fw_attestation.o amdgpu_securedisplay.o \
+ amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
+ amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o \
+ amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o amdgpu_ip.o
amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
@@ -69,17 +78,23 @@ amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o \
dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o
amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o \
- uvd_v3_1.o
+ uvd_v3_1.o vce_v1_0.o
amdgpu-y += \
vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \
- nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o
+ nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o soc24.o \
+ sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \
+ nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o lsdma_v7_0.o hdp_v7_0.o nbif_v6_3_1.o \
+ cyan_skillfish_reg_init.o
# add DF block
amdgpu-y += \
df_v1_7.o \
- df_v3_6.o
+ df_v3_6.o \
+ df_v4_3.o \
+ df_v4_6_2.o \
+ df_v4_15.o
# add GMC block
amdgpu-y += \
@@ -87,11 +102,13 @@ amdgpu-y += \
gmc_v8_0.o \
gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \
gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o mmhub_v2_3.o \
- mmhub_v1_7.o
+ mmhub_v1_7.o gfxhub_v3_0.o mmhub_v3_0.o mmhub_v3_0_2.o gmc_v11_0.o \
+ mmhub_v3_0_1.o gfxhub_v3_0_3.o gfxhub_v1_2.o mmhub_v1_8.o mmhub_v3_3.o \
+ gfxhub_v11_5_0.o mmhub_v4_1_0.o gfxhub_v12_0.o gmc_v12_0.o
# add UMC block
amdgpu-y += \
- umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o
+ umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o umc_v12_0.o umc_v8_14.o
# add IH block
amdgpu-y += \
@@ -102,7 +119,10 @@ amdgpu-y += \
cz_ih.o \
vega10_ih.o \
vega20_ih.o \
- navi10_ih.o
+ navi10_ih.o \
+ ih_v6_0.o \
+ ih_v6_1.o \
+ ih_v7_0.o
# add PSP block
amdgpu-y += \
@@ -112,12 +132,13 @@ amdgpu-y += \
psp_v11_0.o \
psp_v11_0_8.o \
psp_v12_0.o \
- psp_v13_0.o
+ psp_v13_0.o \
+ psp_v13_0_4.o \
+ psp_v14_0.o
# add DCE block
amdgpu-y += \
dce_v10_0.o \
- dce_v11_0.o \
amdgpu_vkms.o
# add GFX block
@@ -128,7 +149,14 @@ amdgpu-y += \
gfx_v9_0.o \
gfx_v9_4.o \
gfx_v9_4_2.o \
- gfx_v10_0.o
+ gfx_v9_4_3.o \
+ gfx_v10_0.o \
+ imu_v11_0.o \
+ gfx_v11_0.o \
+ gfx_v11_0_3.o \
+ imu_v11_0_3.o \
+ gfx_v12_0.o \
+ imu_v12_0.o
# add async DMA block
amdgpu-y += \
@@ -137,12 +165,20 @@ amdgpu-y += \
sdma_v3_0.o \
sdma_v4_0.o \
sdma_v4_4.o \
+ sdma_v4_4_2.o \
sdma_v5_0.o \
- sdma_v5_2.o
+ sdma_v5_2.o \
+ sdma_v6_0.o \
+ sdma_v7_0.o
# add MES block
amdgpu-y += \
- mes_v10_1.o
+ amdgpu_mes.o \
+ mes_v11_0.o \
+ mes_v12_0.o \
+
+# add GFX userqueue support
+amdgpu-y += mes_userqueue.o
# add UVD block
amdgpu-y += \
@@ -160,28 +196,55 @@ amdgpu-y += \
# add VCN and JPEG block
amdgpu-y += \
amdgpu_vcn.o \
+ vcn_sw_ring.o \
vcn_v1_0.o \
vcn_v2_0.o \
vcn_v2_5.o \
vcn_v3_0.o \
+ vcn_v4_0.o \
+ vcn_v4_0_3.o \
+ vcn_v4_0_5.o \
+ vcn_v5_0_0.o \
+ vcn_v5_0_1.o \
amdgpu_jpeg.o \
jpeg_v1_0.o \
jpeg_v2_0.o \
jpeg_v2_5.o \
- jpeg_v3_0.o
+ jpeg_v3_0.o \
+ jpeg_v4_0.o \
+ jpeg_v4_0_3.o \
+ jpeg_v4_0_5.o \
+ jpeg_v5_0_0.o \
+ jpeg_v5_0_1.o
+
+# add VPE block
+amdgpu-y += \
+ amdgpu_vpe.o \
+ vpe_v6_1.o
+
+# add UMSCH block
+amdgpu-y += \
+ amdgpu_umsch_mm.o \
+ umsch_mm_v4_0.o
+#
# add ATHUB block
amdgpu-y += \
athub_v1_0.o \
athub_v2_0.o \
- athub_v2_1.o
+ athub_v2_1.o \
+ athub_v3_0.o \
+ athub_v4_1_0.o
# add SMUIO block
amdgpu-y += \
smuio_v9_0.o \
smuio_v11_0.o \
smuio_v11_0_6.o \
- smuio_v13_0.o
+ smuio_v13_0.o \
+ smuio_v13_0_3.o \
+ smuio_v13_0_6.o \
+ smuio_v14_0_2.o
# add reset block
amdgpu-y += \
@@ -194,6 +257,8 @@ amdgpu-y += \
# add amdkfd interfaces
amdgpu-y += amdgpu_amdkfd.o
+# add gfx usermode queue
+amdgpu-y += amdgpu_userq.o
ifneq ($(CONFIG_HSA_AMD),)
AMDKFD_PATH := ../amdkfd
@@ -206,8 +271,11 @@ amdgpu-y += \
amdgpu_amdkfd_gfx_v9.o \
amdgpu_amdkfd_arcturus.o \
amdgpu_amdkfd_aldebaran.o \
+ amdgpu_amdkfd_gc_9_4_3.o \
amdgpu_amdkfd_gfx_v10.o \
- amdgpu_amdkfd_gfx_v10_3.o
+ amdgpu_amdkfd_gfx_v10_3.o \
+ amdgpu_amdkfd_gfx_v11.o \
+ amdgpu_amdkfd_gfx_v12.o
ifneq ($(CONFIG_DRM_AMDGPU_CIK),)
amdgpu-y += amdgpu_amdkfd_gfx_v7.o
@@ -234,7 +302,7 @@ endif
amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
-amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
+amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_hmm.o
include $(FULL_AMD_PATH)/pm/Makefile
@@ -249,4 +317,17 @@ amdgpu-y += $(AMD_DISPLAY_FILES)
endif
+# add isp block
+ifneq ($(CONFIG_DRM_AMD_ISP),)
+amdgpu-y += \
+ amdgpu_isp.o \
+ isp_v4_1_0.o \
+ isp_v4_1_1.o
+endif
+
+AMD_GPU_RAS_PATH := ../ras
+AMD_GPU_RAS_FULL_PATH := $(FULL_AMD_PATH)/ras
+include $(AMD_GPU_RAS_FULL_PATH)/Makefile
+amdgpu-y += $(AMD_GPU_RAS_FILES)
+
obj-$(CONFIG_DRM_AMDGPU)+= amdgpu.o
diff --git a/drivers/gpu/drm/amd/amdgpu/ObjectID.h b/drivers/gpu/drm/amd/amdgpu/ObjectID.h
index 5b393622f592..a0f0a17e224f 100644
--- a/drivers/gpu/drm/amd/amdgpu/ObjectID.h
+++ b/drivers/gpu/drm/amd/amdgpu/ObjectID.h
@@ -119,6 +119,7 @@
#define CONNECTOR_OBJECT_ID_eDP 0x14
#define CONNECTOR_OBJECT_ID_MXM 0x15
#define CONNECTOR_OBJECT_ID_LVDS_eDP 0x16
+#define CONNECTOR_OBJECT_ID_USBC 0x17
/* deleted */
diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
index bcfdb63b1d42..daa7b23bc775 100644
--- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
@@ -31,65 +31,81 @@
#include "amdgpu_psp.h"
#include "amdgpu_xgmi.h"
+static bool aldebaran_is_mode2_default(struct amdgpu_reset_control *reset_ctl)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+
+ if ((amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 2) &&
+ adev->gmc.xgmi.connected_to_cpu))
+ return true;
+
+ return false;
+}
+
static struct amdgpu_reset_handler *
aldebaran_get_reset_handler(struct amdgpu_reset_control *reset_ctl,
struct amdgpu_reset_context *reset_context)
{
struct amdgpu_reset_handler *handler;
struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ int i;
+
+ if (reset_context->method == AMD_RESET_METHOD_NONE) {
+ if (aldebaran_is_mode2_default(reset_ctl))
+ reset_context->method = AMD_RESET_METHOD_MODE2;
+ else
+ reset_context->method = amdgpu_asic_reset_method(adev);
+ }
if (reset_context->method != AMD_RESET_METHOD_NONE) {
dev_dbg(adev->dev, "Getting reset handler for method %d\n",
reset_context->method);
- list_for_each_entry(handler, &reset_ctl->reset_handlers,
- handler_list) {
+ for_each_handler(i, handler, reset_ctl) {
if (handler->reset_method == reset_context->method)
return handler;
}
}
- if (adev->gmc.xgmi.connected_to_cpu) {
- list_for_each_entry(handler, &reset_ctl->reset_handlers,
- handler_list) {
- if (handler->reset_method == AMD_RESET_METHOD_MODE2) {
- reset_context->method = AMD_RESET_METHOD_MODE2;
- return handler;
- }
- }
- }
-
dev_dbg(adev->dev, "Reset handler not found!\n");
return NULL;
}
+static inline uint32_t aldebaran_get_ip_block_mask(struct amdgpu_device *adev)
+{
+ uint32_t ip_block_mask = BIT(AMD_IP_BLOCK_TYPE_GFX) |
+ BIT(AMD_IP_BLOCK_TYPE_SDMA);
+
+ if (adev->aid_mask)
+ ip_block_mask |= BIT(AMD_IP_BLOCK_TYPE_IH);
+
+ return ip_block_mask;
+}
+
static int aldebaran_mode2_suspend_ip(struct amdgpu_device *adev)
{
+ uint32_t ip_block_mask = aldebaran_get_ip_block_mask(adev);
+ uint32_t ip_block;
int r, i;
+ /* Skip suspend of SDMA IP versions >= 4.4.2. They are multi-aid */
+ if (adev->aid_mask)
+ ip_block_mask &= ~BIT(AMD_IP_BLOCK_TYPE_SDMA);
+
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
- if (!(adev->ip_blocks[i].version->type ==
- AMD_IP_BLOCK_TYPE_GFX ||
- adev->ip_blocks[i].version->type ==
- AMD_IP_BLOCK_TYPE_SDMA))
+ ip_block = BIT(adev->ip_blocks[i].version->type);
+ if (!(ip_block_mask & ip_block))
continue;
- r = adev->ip_blocks[i].version->funcs->suspend(adev);
-
- if (r) {
- dev_err(adev->dev,
- "suspend of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
+ if (r)
return r;
- }
-
- adev->ip_blocks[i].status.hw = false;
}
- return r;
+ return 0;
}
static int
@@ -113,9 +129,9 @@ static void aldebaran_async_reset(struct work_struct *work)
struct amdgpu_reset_control *reset_ctl =
container_of(work, struct amdgpu_reset_control, reset_work);
struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ int i;
- list_for_each_entry(handler, &reset_ctl->reset_handlers,
- handler_list) {
+ for_each_handler(i, handler, reset_ctl) {
if (handler->reset_method == reset_ctl->active_reset) {
dev_dbg(adev->dev, "Resetting device\n");
handler->do_reset(adev);
@@ -136,18 +152,23 @@ static int
aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
struct amdgpu_reset_context *reset_context)
{
- struct amdgpu_device *tmp_adev = NULL;
struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ struct list_head *reset_device_list = reset_context->reset_device_list;
+ struct amdgpu_device *tmp_adev = NULL;
int r = 0;
dev_dbg(adev->dev, "aldebaran perform hw reset\n");
- if (reset_context->hive == NULL) {
+
+ if (reset_device_list == NULL)
+ return -EINVAL;
+
+ if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 2) &&
+ reset_context->hive == NULL) {
/* Wrong context, return error */
return -EINVAL;
}
- list_for_each_entry(tmp_adev, &reset_context->hive->device_list,
- gmc.xgmi.head) {
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
mutex_lock(&tmp_adev->reset_cntl->reset_lock);
tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_MODE2;
}
@@ -155,8 +176,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
* Mode2 reset doesn't need any sync between nodes in XGMI hive, instead launch
* them together so that they can be completed asynchronously on multiple nodes
*/
- list_for_each_entry(tmp_adev, &reset_context->hive->device_list,
- gmc.xgmi.head) {
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
/* For XGMI run all resets in parallel to speed up the process */
if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
if (!queue_work(system_unbound_wq,
@@ -174,9 +194,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
/* For XGMI wait for all resets to complete before proceed */
if (!r) {
- list_for_each_entry(tmp_adev,
- &reset_context->hive->device_list,
- gmc.xgmi.head) {
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
flush_work(&tmp_adev->reset_cntl->reset_work);
r = tmp_adev->asic_reset_res;
@@ -186,8 +204,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
}
}
- list_for_each_entry(tmp_adev, &reset_context->hive->device_list,
- gmc.xgmi.head) {
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
mutex_unlock(&tmp_adev->reset_cntl->reset_lock);
tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_NONE;
}
@@ -198,8 +215,10 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev)
{
struct amdgpu_firmware_info *ucode_list[AMDGPU_UCODE_ID_MAXIMUM];
+ uint32_t ip_block_mask = aldebaran_get_ip_block_mask(adev);
struct amdgpu_firmware_info *ucode;
struct amdgpu_ip_block *cmn_block;
+ struct amdgpu_ip_block *ih_block;
int ucode_count = 0;
int i, r;
@@ -237,10 +256,22 @@ static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev)
dev_err(adev->dev, "Failed to get BIF handle\n");
return -EINVAL;
}
- r = cmn_block->version->funcs->resume(adev);
+ r = amdgpu_ip_block_resume(cmn_block);
if (r)
return r;
+ if (ip_block_mask & BIT(AMD_IP_BLOCK_TYPE_IH)) {
+ ih_block = amdgpu_device_ip_get_ip_block(adev,
+ AMD_IP_BLOCK_TYPE_IH);
+ if (unlikely(!ih_block)) {
+ dev_err(adev->dev, "Failed to get IH handle\n");
+ return -EINVAL;
+ }
+ r = amdgpu_ip_block_resume(ih_block);
+ if (r)
+ return r;
+ }
+
/* Reinit GFXHUB */
adev->gfxhub.funcs->init(adev);
r = adev->gfxhub.funcs->gart_enable(adev);
@@ -260,7 +291,7 @@ static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev)
adev->gfx.rlc.funcs->resume(adev);
/* Wait for FW reset event complete */
- r = smu_wait_for_event(adev, SMU_EVENT_RESET_COMPLETE, 0);
+ r = amdgpu_dpm_wait_for_event(adev, SMU_EVENT_RESET_COMPLETE, 0);
if (r) {
dev_err(adev->dev,
"Failed to get response from firmware after reset\n");
@@ -273,15 +304,10 @@ static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev)
adev->ip_blocks[i].version->type ==
AMD_IP_BLOCK_TYPE_SDMA))
continue;
- r = adev->ip_blocks[i].version->funcs->resume(adev);
- if (r) {
- dev_err(adev->dev,
- "resume of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- return r;
- }
- adev->ip_blocks[i].status.hw = true;
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
+ return r;
}
for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -295,7 +321,7 @@ static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev)
if (adev->ip_blocks[i].version->funcs->late_init) {
r = adev->ip_blocks[i].version->funcs->late_init(
- (void *)adev);
+ &adev->ip_blocks[i]);
if (r) {
dev_err(adev->dev,
"late_init of IP block <%s> failed %d after reset\n",
@@ -307,8 +333,6 @@ static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev)
adev->ip_blocks[i].status.late_initialized = true;
}
- amdgpu_ras_set_error_query_ready(adev, true);
-
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
@@ -319,18 +343,28 @@ static int
aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
struct amdgpu_reset_context *reset_context)
{
- int r;
+ struct list_head *reset_device_list = reset_context->reset_device_list;
struct amdgpu_device *tmp_adev = NULL;
+ struct amdgpu_ras *con;
+ int r;
+
+ if (reset_device_list == NULL)
+ return -EINVAL;
- if (reset_context->hive == NULL) {
+ if (amdgpu_ip_version(reset_context->reset_req_dev, MP1_HWIP, 0) ==
+ IP_VERSION(13, 0, 2) &&
+ reset_context->hive == NULL) {
/* Wrong context, return error */
return -EINVAL;
}
- list_for_each_entry(tmp_adev, &reset_context->hive->device_list,
- gmc.xgmi.head) {
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ amdgpu_set_init_level(tmp_adev,
+ AMDGPU_INIT_LEVEL_RESET_RECOVERY);
dev_info(tmp_adev->dev,
"GPU reset succeeded, trying to resume\n");
+ /*TBD: Ideally should clear only GFX, SDMA blocks*/
+ amdgpu_ras_clear_err_state(tmp_adev);
r = aldebaran_mode2_restore_ip(tmp_adev);
if (r)
goto end;
@@ -341,7 +375,30 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
*/
amdgpu_register_gpu_instance(tmp_adev);
- /* Resume RAS */
+ /* Resume RAS, ecc_irq */
+ con = amdgpu_ras_get_context(tmp_adev);
+ if (!amdgpu_sriov_vf(tmp_adev) && con) {
+ if (tmp_adev->sdma.ras &&
+ tmp_adev->sdma.ras->ras_block.ras_late_init) {
+ r = tmp_adev->sdma.ras->ras_block.ras_late_init(tmp_adev,
+ &tmp_adev->sdma.ras->ras_block.ras_comm);
+ if (r) {
+ dev_err(tmp_adev->dev, "SDMA failed to execute ras_late_init! ret:%d\n", r);
+ goto end;
+ }
+ }
+
+ if (tmp_adev->gfx.ras &&
+ tmp_adev->gfx.ras->ras_block.ras_late_init) {
+ r = tmp_adev->gfx.ras->ras_block.ras_late_init(tmp_adev,
+ &tmp_adev->gfx.ras->ras_block.ras_comm);
+ if (r) {
+ dev_err(tmp_adev->dev, "GFX failed to execute ras_late_init! ret:%d\n", r);
+ goto end;
+ }
+ }
+ }
+
amdgpu_ras_resume(tmp_adev);
/* Update PSP FW topology after reset */
@@ -351,6 +408,8 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
tmp_adev);
if (!r) {
+ amdgpu_set_init_level(tmp_adev,
+ AMDGPU_INIT_LEVEL_DEFAULT);
amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
r = amdgpu_ib_ring_tests(tmp_adev);
@@ -378,6 +437,12 @@ static struct amdgpu_reset_handler aldebaran_mode2_handler = {
.do_reset = aldebaran_mode2_reset,
};
+static struct amdgpu_reset_handler
+ *aldebaran_rst_handlers[AMDGPU_RESET_MAX_HANDLERS] = {
+ &aldebaran_mode2_handler,
+ &xgmi_reset_on_init_handler,
+ };
+
int aldebaran_reset_init(struct amdgpu_device *adev)
{
struct amdgpu_reset_control *reset_ctl;
@@ -391,10 +456,9 @@ int aldebaran_reset_init(struct amdgpu_device *adev)
reset_ctl->active_reset = AMD_RESET_METHOD_NONE;
reset_ctl->get_reset_handler = aldebaran_get_reset_handler;
- INIT_LIST_HEAD(&reset_ctl->reset_handlers);
INIT_WORK(&reset_ctl->reset_work, reset_ctl->async_reset);
/* Only mode2 is handled through reset control now */
- amdgpu_reset_add_handler(reset_ctl, &aldebaran_mode2_handler);
+ reset_ctl->reset_handlers = &aldebaran_rst_handlers;
adev->reset_cntl = reset_ctl;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b85b67a88a3d..9f9774f58ce1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -50,23 +50,20 @@
#include <linux/hashtable.h>
#include <linux/dma-fence.h>
#include <linux/pci.h>
-#include <linux/aer.h>
-#include <drm/ttm/ttm_bo_api.h>
-#include <drm/ttm/ttm_bo_driver.h>
+#include <drm/ttm/ttm_bo.h>
#include <drm/ttm/ttm_placement.h>
-#include <drm/ttm/ttm_execbuf_util.h>
#include <drm/amdgpu_drm.h>
#include <drm/drm_gem.h>
#include <drm/drm_ioctl.h>
-#include <drm/gpu_scheduler.h>
#include <kgd_kfd_interface.h>
#include "dm_pp_interface.h"
#include "kgd_pp_interface.h"
#include "amd_shared.h"
+#include "amdgpu_utils.h"
#include "amdgpu_mode.h"
#include "amdgpu_ih.h"
#include "amdgpu_irq.h"
@@ -83,15 +80,18 @@
#include "amdgpu_vce.h"
#include "amdgpu_vcn.h"
#include "amdgpu_jpeg.h"
-#include "amdgpu_mn.h"
+#include "amdgpu_vpe.h"
+#include "amdgpu_umsch_mm.h"
#include "amdgpu_gmc.h"
#include "amdgpu_gfx.h"
#include "amdgpu_sdma.h"
+#include "amdgpu_lsdma.h"
#include "amdgpu_nbio.h"
#include "amdgpu_hdp.h"
#include "amdgpu_dm.h"
#include "amdgpu_virt.h"
#include "amdgpu_csa.h"
+#include "amdgpu_mes_ctx.h"
#include "amdgpu_gart.h"
#include "amdgpu_debugfs.h"
#include "amdgpu_job.h"
@@ -99,7 +99,6 @@
#include "amdgpu_gem.h"
#include "amdgpu_doorbell.h"
#include "amdgpu_amdkfd.h"
-#include "amdgpu_smu.h"
#include "amdgpu_discovery.h"
#include "amdgpu_mes.h"
#include "amdgpu_umc.h"
@@ -109,26 +108,33 @@
#include "amdgpu_smuio.h"
#include "amdgpu_fdinfo.h"
#include "amdgpu_mca.h"
+#include "amdgpu_aca.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_cper.h"
+#include "amdgpu_xcp.h"
+#include "amdgpu_seq64.h"
+#include "amdgpu_reg_state.h"
+#include "amdgpu_userq.h"
+#include "amdgpu_eviction_fence.h"
+#if defined(CONFIG_DRM_AMD_ISP)
+#include "amdgpu_isp.h"
+#endif
-#define MAX_GPU_INSTANCE 16
+#define MAX_GPU_INSTANCE 64
-struct amdgpu_gpu_instance
-{
+#define GFX_SLICE_PERIOD_MS 250
+
+struct amdgpu_gpu_instance {
struct amdgpu_device *adev;
int mgpu_fan_enabled;
};
-struct amdgpu_mgpu_info
-{
+struct amdgpu_mgpu_info {
struct amdgpu_gpu_instance gpu_ins[MAX_GPU_INSTANCE];
struct mutex mutex;
uint32_t num_gpu;
uint32_t num_dgpu;
uint32_t num_apu;
-
- /* delayed reset_func for XGMI configuration if necessary */
- struct delayed_work delayed_reset_work;
- bool pending_reset;
};
enum amdgpu_ss {
@@ -138,8 +144,15 @@ enum amdgpu_ss {
AMDGPU_SS_DRV_UNLOAD
};
-struct amdgpu_watchdog_timer
-{
+struct amdgpu_hwip_reg_entry {
+ u32 hwip;
+ u32 inst;
+ u32 seg;
+ u32 reg_offset;
+ const char *reg_name;
+};
+
+struct amdgpu_watchdog_timer {
bool timeout_fatal_disable;
uint32_t period; /* maxCycles = (1 << period), the number of cycles before a timeout */
};
@@ -150,13 +163,11 @@ struct amdgpu_watchdog_timer
* Modules parameters.
*/
extern int amdgpu_modeset;
-extern int amdgpu_vram_limit;
+extern unsigned int amdgpu_vram_limit;
extern int amdgpu_vis_vram_limit;
extern int amdgpu_gart_size;
extern int amdgpu_gtt_size;
extern int amdgpu_moverate;
-extern int amdgpu_benchmarking;
-extern int amdgpu_testing;
extern int amdgpu_audio;
extern int amdgpu_disp_priority;
extern int amdgpu_hw_i2c;
@@ -182,14 +193,13 @@ extern int amdgpu_sched_jobs;
extern int amdgpu_sched_hw_submission;
extern uint amdgpu_pcie_gen_cap;
extern uint amdgpu_pcie_lane_cap;
-extern uint amdgpu_cg_mask;
+extern u64 amdgpu_cg_mask;
extern uint amdgpu_pg_mask;
extern uint amdgpu_sdma_phase_quantum;
extern char *amdgpu_disable_cu;
extern char *amdgpu_virtual_display;
extern uint amdgpu_pp_feature_mask;
extern uint amdgpu_force_long_training;
-extern int amdgpu_job_hang_limit;
extern int amdgpu_lbpw;
extern int amdgpu_compute_multipipe;
extern int amdgpu_gpu_recovery;
@@ -199,8 +209,10 @@ extern int amdgpu_smu_pptable_id;
extern uint amdgpu_dc_feature_mask;
extern uint amdgpu_freesync_vid_mode;
extern uint amdgpu_dc_debug_mask;
-extern uint amdgpu_dm_abm_level;
+extern uint amdgpu_dc_visual_confirm;
+extern int amdgpu_dm_abm_level;
extern int amdgpu_backlight;
+extern int amdgpu_damage_clips;
extern struct amdgpu_mgpu_info mgpu_info;
extern int amdgpu_ras_enable;
extern uint amdgpu_ras_mask;
@@ -211,17 +223,29 @@ extern int amdgpu_async_gfx_ring;
extern int amdgpu_mcbp;
extern int amdgpu_discovery;
extern int amdgpu_mes;
+extern int amdgpu_mes_log_enable;
+extern int amdgpu_mes_kiq;
+extern int amdgpu_uni_mes;
extern int amdgpu_noretry;
extern int amdgpu_force_asic_type;
extern int amdgpu_smartshift_bias;
+extern int amdgpu_use_xgmi_p2p;
+extern int amdgpu_mtype_local;
+extern int amdgpu_enforce_isolation;
#ifdef CONFIG_HSA_AMD
extern int sched_policy;
extern bool debug_evictions;
extern bool no_system_mem_limit;
+extern int halt_if_hws_hang;
+extern uint amdgpu_svm_default_granularity;
#else
static const int __maybe_unused sched_policy = KFD_SCHED_POLICY_HWS;
static const bool __maybe_unused debug_evictions; /* = false */
static const bool __maybe_unused no_system_mem_limit;
+static const int __maybe_unused halt_if_hws_hang;
+#endif
+#ifdef CONFIG_HSA_AMD_P2P
+extern bool pcie_p2p;
#endif
extern int amdgpu_tmz;
@@ -235,9 +259,23 @@ extern int amdgpu_cik_support;
#endif
extern int amdgpu_num_kcq;
+#define AMDGPU_VCNFW_LOG_SIZE (32 * 1024)
+#define AMDGPU_UMSCHFW_LOG_SIZE (32 * 1024)
+extern int amdgpu_vcnfw_log;
+extern int amdgpu_sg_display;
+extern int amdgpu_umsch_mm;
+extern int amdgpu_seamless;
+extern int amdgpu_umsch_mm_fwlog;
+
+extern int amdgpu_user_partt_mode;
+extern int amdgpu_agp;
+extern int amdgpu_rebar;
+
+extern int amdgpu_wbrf;
+extern int amdgpu_user_queue;
+
#define AMDGPU_VM_MAX_NUM_CTX 4096
#define AMDGPU_SG_THRESHOLD (256*1024*1024)
-#define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
#define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */
#define AMDGPU_FENCE_JIFFIES_TIMEOUT (HZ / 2)
@@ -267,18 +305,25 @@ extern int amdgpu_num_kcq;
#define AMDGPU_RESET_VCE (1 << 13)
#define AMDGPU_RESET_VCE1 (1 << 14)
+/* reset mask */
+#define AMDGPU_RESET_TYPE_FULL (1 << 0) /* full adapter reset, mode1/mode2/BACO/etc. */
+#define AMDGPU_RESET_TYPE_SOFT_RESET (1 << 1) /* IP level soft reset */
+#define AMDGPU_RESET_TYPE_PER_QUEUE (1 << 2) /* per queue */
+#define AMDGPU_RESET_TYPE_PER_PIPE (1 << 3) /* per pipe */
+
/* max cursor sizes (in pixels) */
#define CIK_CURSOR_WIDTH 128
#define CIK_CURSOR_HEIGHT 128
-/* smasrt shift bias level limits */
+/* smart shift bias level limits */
#define AMDGPU_SMARTSHIFT_MAX_BIAS (100)
#define AMDGPU_SMARTSHIFT_MIN_BIAS (-100)
+/* Extra time delay(in ms) to eliminate the influence of temperature momentary fluctuation */
+#define AMDGPU_SWCTF_EXTRA_DELAY 50
+
+struct amdgpu_xcp_mgr;
struct amdgpu_device;
-struct amdgpu_ib;
-struct amdgpu_cs_parser;
-struct amdgpu_job;
struct amdgpu_irq_src;
struct amdgpu_fpriv;
struct amdgpu_bo_va_mapping;
@@ -313,7 +358,6 @@ enum amdgpu_kiq_irq {
AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
AMDGPU_CP_KIQ_IRQ_LAST
};
-
#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
#define MAX_KIQ_REG_TRY 1000
@@ -325,13 +369,18 @@ int amdgpu_device_ip_set_powergating_state(void *dev,
enum amd_ip_block_type block_type,
enum amd_powergating_state state);
void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags);
+ u64 *flags);
int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
enum amd_ip_block_type block_type);
-bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
+bool amdgpu_device_ip_is_hw(struct amdgpu_device *adev,
+ enum amd_ip_block_type block_type);
+bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev,
enum amd_ip_block_type block_type);
+int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block);
-#define AMDGPU_MAX_IP_NUM 16
+int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block);
+
+#define AMDGPU_MAX_IP_NUM AMD_IP_BLOCK_TYPE_NUM
struct amdgpu_ip_block_status {
bool valid;
@@ -349,12 +398,10 @@ struct amdgpu_ip_block_version {
const struct amd_ip_funcs *funcs;
};
-#define HW_REV(_Major, _Minor, _Rev) \
- ((((uint32_t) (_Major)) << 16) | ((uint32_t) (_Minor) << 8) | ((uint32_t) (_Rev)))
-
struct amdgpu_ip_block {
struct amdgpu_ip_block_status status;
const struct amdgpu_ip_block_version *version;
+ struct amdgpu_device *adev;
};
int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
@@ -373,7 +420,9 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
*/
bool amdgpu_get_bios(struct amdgpu_device *adev);
bool amdgpu_read_bios(struct amdgpu_device *adev);
-
+bool amdgpu_soc15_read_bios_from_rom(struct amdgpu_device *adev,
+ u8 *bios, u32 length_bytes);
+void amdgpu_bios_release(struct amdgpu_device *adev);
/*
* Clocks
*/
@@ -388,7 +437,6 @@ struct amdgpu_clock {
uint32_t default_mclk;
uint32_t default_sclk;
uint32_t default_dispclk;
- uint32_t current_dispclk;
uint32_t dp_extclk;
uint32_t max_pixel_clock;
};
@@ -417,34 +465,13 @@ struct amdgpu_clock {
* alignment).
*/
-#define AMDGPU_SA_NUM_FENCE_LISTS 32
-
struct amdgpu_sa_manager {
- wait_queue_head_t wq;
- struct amdgpu_bo *bo;
- struct list_head *hole;
- struct list_head flist[AMDGPU_SA_NUM_FENCE_LISTS];
- struct list_head olist;
- unsigned size;
- uint64_t gpu_addr;
- void *cpu_ptr;
- uint32_t domain;
- uint32_t align;
-};
-
-/* sub-allocation buffer */
-struct amdgpu_sa_bo {
- struct list_head olist;
- struct list_head flist;
- struct amdgpu_sa_manager *manager;
- unsigned soffset;
- unsigned eoffset;
- struct dma_fence *fence;
+ struct drm_suballoc_manager base;
+ struct amdgpu_bo *bo;
+ uint64_t gpu_addr;
+ void *cpu_ptr;
};
-int amdgpu_fence_slab_init(void);
-void amdgpu_fence_slab_fini(void);
-
/*
* IRQS.
*/
@@ -458,28 +485,12 @@ struct amdgpu_flip_work {
uint64_t base;
struct drm_pending_vblank_event *event;
struct amdgpu_bo *old_abo;
- struct dma_fence *excl;
unsigned shared_count;
struct dma_fence **shared;
struct dma_fence_cb cb;
bool async;
};
-
-/*
- * CP & rings.
- */
-
-struct amdgpu_ib {
- struct amdgpu_sa_bo *sa_bo;
- uint32_t length_dw;
- uint64_t gpu_addr;
- uint32_t *ptr;
- uint32_t flags;
-};
-
-extern const struct drm_sched_backend_ops amdgpu_sched_ops;
-
/*
* file private structure
*/
@@ -488,97 +499,83 @@ struct amdgpu_fpriv {
struct amdgpu_vm vm;
struct amdgpu_bo_va *prt_va;
struct amdgpu_bo_va *csa_va;
+ struct amdgpu_bo_va *seq64_va;
struct mutex bo_list_lock;
struct idr bo_list_handles;
struct amdgpu_ctx_mgr ctx_mgr;
-};
+ struct amdgpu_userq_mgr userq_mgr;
-int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
+ /* Eviction fence infra */
+ struct amdgpu_eviction_fence_mgr evf_mgr;
-int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- unsigned size,
- enum amdgpu_ib_pool_type pool,
- struct amdgpu_ib *ib);
-void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
- struct dma_fence *f);
-int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
- struct amdgpu_ib *ibs, struct amdgpu_job *job,
- struct dma_fence **f);
-int amdgpu_ib_pool_init(struct amdgpu_device *adev);
-void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
-int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
-
-/*
- * CS.
- */
-struct amdgpu_cs_chunk {
- uint32_t chunk_id;
- uint32_t length_dw;
- void *kdata;
+ /** GPU partition selection */
+ uint32_t xcp_id;
};
-struct amdgpu_cs_post_dep {
- struct drm_syncobj *syncobj;
- struct dma_fence_chain *chain;
- u64 point;
-};
-
-struct amdgpu_cs_parser {
- struct amdgpu_device *adev;
- struct drm_file *filp;
- struct amdgpu_ctx *ctx;
-
- /* chunks */
- unsigned nchunks;
- struct amdgpu_cs_chunk *chunks;
-
- /* scheduler job object */
- struct amdgpu_job *job;
- struct drm_sched_entity *entity;
-
- /* buffer objects */
- struct ww_acquire_ctx ticket;
- struct amdgpu_bo_list *bo_list;
- struct amdgpu_mn *mn;
- struct amdgpu_bo_list_entry vm_pd;
- struct list_head validated;
- struct dma_fence *fence;
- uint64_t bytes_moved_threshold;
- uint64_t bytes_moved_vis_threshold;
- uint64_t bytes_moved;
- uint64_t bytes_moved_vis;
-
- /* user fence */
- struct amdgpu_bo_list_entry uf_entry;
-
- unsigned num_post_deps;
- struct amdgpu_cs_post_dep *post_deps;
-};
-
-static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
- uint32_t ib_idx, int idx)
-{
- return p->job->ibs[ib_idx].ptr[idx];
-}
-
-static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p,
- uint32_t ib_idx, int idx,
- uint32_t value)
-{
- p->job->ibs[ib_idx].ptr[idx] = value;
-}
+int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
/*
* Writeback
*/
-#define AMDGPU_MAX_WB 256 /* Reserve at most 256 WB slots for amdgpu-owned rings. */
+#define AMDGPU_MAX_WB 1024 /* Reserve at most 1024 WB slots for amdgpu-owned rings. */
+/**
+ * amdgpu_wb - This struct is used for small GPU memory allocation.
+ *
+ * This struct is used to allocate a small amount of GPU memory that can be
+ * used to shadow certain states into the memory. This is especially useful for
+ * providing easy CPU access to some states without requiring register access
+ * (e.g., if some block is power gated, reading register may be problematic).
+ *
+ * Note: the term writeback was initially used because many of the amdgpu
+ * components had some level of writeback memory, and this struct initially
+ * described those components.
+ */
struct amdgpu_wb {
+
+ /**
+ * @wb_obj:
+ *
+ * Buffer Object used for the writeback memory.
+ */
struct amdgpu_bo *wb_obj;
- volatile uint32_t *wb;
+
+ /**
+ * @wb:
+ *
+ * Pointer to the first writeback slot. In terms of CPU address
+ * this value can be accessed directly by using the offset as an index.
+ * For the GPU address, it is necessary to use gpu_addr and the offset.
+ */
+ uint32_t *wb;
+
+ /**
+ * @gpu_addr:
+ *
+ * Writeback base address in the GPU.
+ */
uint64_t gpu_addr;
- u32 num_wb; /* Number of wb slots actually reserved for amdgpu. */
+
+ /**
+ * @num_wb:
+ *
+ * Number of writeback slots reserved for amdgpu.
+ */
+ u32 num_wb;
+
+ /**
+ * @used:
+ *
+ * Track the writeback slot already used.
+ */
unsigned long used[DIV_ROUND_UP(AMDGPU_MAX_WB, BITS_PER_LONG)];
+
+ /**
+ * @lock:
+ *
+ * Protects read and write of the used field array.
+ */
+ spinlock_t lock;
};
int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb);
@@ -587,13 +584,7 @@ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb);
/*
* Benchmarking
*/
-void amdgpu_benchmark(struct amdgpu_device *adev, int test_number);
-
-
-/*
- * Testing
- */
-void amdgpu_test_moves(struct amdgpu_device *adev);
+int amdgpu_benchmark(struct amdgpu_device *adev, int test_number);
/*
* ASIC specific register table accessible by UMD
@@ -603,14 +594,42 @@ struct amdgpu_allowed_register_entry {
bool grbm_indexed;
};
+/**
+ * enum amd_reset_method - Methods for resetting AMD GPU devices
+ *
+ * @AMD_RESET_METHOD_NONE: The device will not be reset.
+ * @AMD_RESET_LEGACY: Method reserved for SI, CIK and VI ASICs.
+ * @AMD_RESET_MODE0: Reset the entire ASIC. Not currently available for the
+ * any device.
+ * @AMD_RESET_MODE1: Resets all IP blocks on the ASIC (SDMA, GFX, VCN, etc.)
+ * individually. Suitable only for some discrete GPU, not
+ * available for all ASICs.
+ * @AMD_RESET_MODE2: Resets a lesser level of IPs compared to MODE1. Which IPs
+ * are reset depends on the ASIC. Notably doesn't reset IPs
+ * shared with the CPU on APUs or the memory controllers (so
+ * VRAM is not lost). Not available on all ASICs.
+ * @AMD_RESET_LINK: Triggers SW-UP link reset on other GPUs
+ * @AMD_RESET_BACO: BACO (Bus Alive, Chip Off) method powers off and on the card
+ * but without powering off the PCI bus. Suitable only for
+ * discrete GPUs.
+ * @AMD_RESET_PCI: Does a full bus reset using core Linux subsystem PCI reset
+ * and does a secondary bus reset or FLR, depending on what the
+ * underlying hardware supports.
+ *
+ * Methods available for AMD GPU driver for resetting the device. Not all
+ * methods are suitable for every device. User can override the method using
+ * module parameter `reset_method`.
+ */
enum amd_reset_method {
AMD_RESET_METHOD_NONE = -1,
AMD_RESET_METHOD_LEGACY = 0,
AMD_RESET_METHOD_MODE0,
AMD_RESET_METHOD_MODE1,
AMD_RESET_METHOD_MODE2,
+ AMD_RESET_METHOD_LINK,
AMD_RESET_METHOD_BACO,
AMD_RESET_METHOD_PCI,
+ AMD_RESET_METHOD_ON_INIT,
};
struct amdgpu_video_codec_info {
@@ -672,7 +691,7 @@ struct amdgpu_asic_funcs {
/* PCIe replay counter */
uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev);
/* device supports BACO */
- bool (*supports_baco)(struct amdgpu_device *adev);
+ int (*supports_baco)(struct amdgpu_device *adev);
/* pre asic_init quirks */
void (*pre_asic_init)(struct amdgpu_device *adev);
/* enter/exit umd stable pstate */
@@ -680,6 +699,12 @@ struct amdgpu_asic_funcs {
/* query video codecs */
int (*query_video_codecs)(struct amdgpu_device *adev, bool encode,
const struct amdgpu_video_codecs **codecs);
+ /* encode "> 32bits" smn addressing */
+ u64 (*encode_ext_smn_addressing)(int ext_id);
+
+ ssize_t (*get_reg_state)(struct amdgpu_device *adev,
+ enum amdgpu_reg_state reg_state, void *buf,
+ size_t max_size);
};
/*
@@ -696,9 +721,9 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
/* VRAM scratch page for HDP bug, default vram page */
-struct amdgpu_vram_scratch {
+struct amdgpu_mem_scratch {
struct amdgpu_bo *robj;
- volatile uint32_t *ptr;
+ uint32_t *ptr;
u64 gpu_addr;
};
@@ -714,15 +739,22 @@ void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device);
typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device*, uint32_t);
typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
+typedef uint32_t (*amdgpu_rreg_ext_t)(struct amdgpu_device*, uint64_t);
+typedef void (*amdgpu_wreg_ext_t)(struct amdgpu_device*, uint64_t, uint32_t);
+
typedef uint64_t (*amdgpu_rreg64_t)(struct amdgpu_device*, uint32_t);
typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t);
+typedef uint64_t (*amdgpu_rreg64_ext_t)(struct amdgpu_device*, uint64_t);
+typedef void (*amdgpu_wreg64_ext_t)(struct amdgpu_device*, uint64_t, uint64_t);
+
typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t);
struct amdgpu_mmio_remap {
u32 reg_offset;
resource_size_t bus_addr;
+ struct amdgpu_bo *bo;
};
/* Define the HW IP blocks will be used in driver , add more if necessary */
@@ -737,6 +769,7 @@ enum amd_hw_ip_block_type {
SDMA5_HWIP,
SDMA6_HWIP,
SDMA7_HWIP,
+ LSDMA_HWIP,
MMHUB_HWIP,
ATHUB_HWIP,
NBIO_HWIP,
@@ -747,6 +780,7 @@ enum amd_hw_ip_block_type {
JPEG_HWIP = VCN_HWIP,
VCN1_HWIP,
VCE_HWIP,
+ VPE_HWIP,
DF_HWIP,
DCE_HWIP,
OSSSYS_HWIP,
@@ -759,13 +793,48 @@ enum amd_hw_ip_block_type {
RSMU_HWIP,
XGMI_HWIP,
DCI_HWIP,
+ PCIE_HWIP,
+ ISP_HWIP,
MAX_HWIP
};
-#define HWIP_MAX_INSTANCE 10
+#define HWIP_MAX_INSTANCE 44
#define HW_ID_MAX 300
-#define IP_VERSION(mj, mn, rv) (((mj) << 16) | ((mn) << 8) | (rv))
+#define IP_VERSION_FULL(mj, mn, rv, var, srev) \
+ (((mj) << 24) | ((mn) << 16) | ((rv) << 8) | ((var) << 4) | (srev))
+#define IP_VERSION(mj, mn, rv) IP_VERSION_FULL(mj, mn, rv, 0, 0)
+#define IP_VERSION_MAJ(ver) ((ver) >> 24)
+#define IP_VERSION_MIN(ver) (((ver) >> 16) & 0xFF)
+#define IP_VERSION_REV(ver) (((ver) >> 8) & 0xFF)
+#define IP_VERSION_VARIANT(ver) (((ver) >> 4) & 0xF)
+#define IP_VERSION_SUBREV(ver) ((ver) & 0xF)
+#define IP_VERSION_MAJ_MIN_REV(ver) ((ver) >> 8)
+
+struct amdgpu_ip_map_info {
+ /* Map of logical to actual dev instances/mask */
+ uint32_t dev_inst[MAX_HWIP][HWIP_MAX_INSTANCE];
+ int8_t (*logical_to_dev_inst)(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type block,
+ int8_t inst);
+ uint32_t (*logical_to_dev_mask)(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type block,
+ uint32_t mask);
+};
+
+enum amdgpu_uid_type {
+ AMDGPU_UID_TYPE_XCD,
+ AMDGPU_UID_TYPE_AID,
+ AMDGPU_UID_TYPE_SOC,
+ AMDGPU_UID_TYPE_MAX
+};
+
+#define AMDGPU_UID_INST_MAX 8 /* max number of instances for each UID type */
+
+struct amdgpu_uid {
+ uint64_t uid[AMDGPU_UID_TYPE_MAX][AMDGPU_UID_INST_MAX];
+ struct amdgpu_device *adev;
+};
struct amd_powerplay {
void *pp_handle;
@@ -811,8 +880,73 @@ struct amd_powerplay {
(rid == 0x01) || \
(rid == 0x10))))
+struct amdgpu_mqd_prop {
+ uint64_t mqd_gpu_addr;
+ uint64_t hqd_base_gpu_addr;
+ uint64_t rptr_gpu_addr;
+ uint64_t wptr_gpu_addr;
+ uint32_t queue_size;
+ bool use_doorbell;
+ uint32_t doorbell_index;
+ uint64_t eop_gpu_addr;
+ uint32_t hqd_pipe_priority;
+ uint32_t hqd_queue_priority;
+ bool allow_tunneling;
+ bool hqd_active;
+ uint64_t shadow_addr;
+ uint64_t gds_bkup_addr;
+ uint64_t csa_addr;
+ uint64_t fence_address;
+ bool tmz_queue;
+ bool kernel_queue;
+};
+
+struct amdgpu_mqd {
+ unsigned mqd_size;
+ int (*init_mqd)(struct amdgpu_device *adev, void *mqd,
+ struct amdgpu_mqd_prop *p);
+};
+
+struct amdgpu_pcie_reset_ctx {
+ bool in_link_reset;
+ bool occurs_dpc;
+ bool audio_suspended;
+ struct pci_dev *swus;
+ struct pci_saved_state *swus_pcistate;
+ struct pci_saved_state *swds_pcistate;
+};
+
+/*
+ * Custom Init levels could be defined for different situations where a full
+ * initialization of all hardware blocks are not expected. Sample cases are
+ * custom init sequences after resume after S0i3/S3, reset on initialization,
+ * partial reset of blocks etc. Presently, this defines only two levels. Levels
+ * are described in corresponding struct definitions - amdgpu_init_default,
+ * amdgpu_init_minimal_xgmi.
+ */
+enum amdgpu_init_lvl_id {
+ AMDGPU_INIT_LEVEL_DEFAULT,
+ AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
+ AMDGPU_INIT_LEVEL_RESET_RECOVERY,
+};
+
+struct amdgpu_init_level {
+ enum amdgpu_init_lvl_id level;
+ uint32_t hwini_ip_block_mask;
+};
+
#define AMDGPU_RESET_MAGIC_NUM 64
#define AMDGPU_MAX_DF_PERFMONS 4
+struct amdgpu_reset_domain;
+struct amdgpu_fru_info;
+
+enum amdgpu_enforce_isolation_mode {
+ AMDGPU_ENFORCE_ISOLATION_DISABLE = 0,
+ AMDGPU_ENFORCE_ISOLATION_ENABLE = 1,
+ AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY = 2,
+ AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER = 3,
+};
+
struct amdgpu_device {
struct device *dev;
struct pci_dev *pdev;
@@ -822,6 +956,7 @@ struct amdgpu_device {
struct amdgpu_acp acp;
#endif
struct amdgpu_hive_info *hive;
+ struct amdgpu_xcp_mgr *xcp_mgr;
/* ASIC */
enum amd_asic_type asic_type;
uint32_t family;
@@ -835,9 +970,9 @@ struct amdgpu_device {
bool need_swiotlb;
bool accel_working;
struct notifier_block acpi_nb;
+ struct notifier_block pm_nb;
struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS];
- struct debugfs_blob_wrapper debugfs_vbios_blob;
- struct debugfs_blob_wrapper debugfs_discovery_blob;
+ struct debugfs_blob_wrapper debugfs_vbios_blob;
struct mutex srbm_mutex;
/* GRBM index mutex. Protects concurrent access to GRBM index */
struct mutex grbm_idx_mutex;
@@ -869,8 +1004,12 @@ struct amdgpu_device {
amdgpu_wreg_t pcie_wreg;
amdgpu_rreg_t pciep_rreg;
amdgpu_wreg_t pciep_wreg;
+ amdgpu_rreg_ext_t pcie_rreg_ext;
+ amdgpu_wreg_ext_t pcie_wreg_ext;
amdgpu_rreg64_t pcie_rreg64;
amdgpu_wreg64_t pcie_wreg64;
+ amdgpu_rreg64_ext_t pcie_rreg64_ext;
+ amdgpu_wreg64_ext_t pcie_wreg64_ext;
/* protects concurrent UVD register access */
spinlock_t uvd_ctx_idx_lock;
amdgpu_rreg_t uvd_ctx_rreg;
@@ -902,11 +1041,11 @@ struct amdgpu_device {
dma_addr_t dummy_page_addr;
struct amdgpu_vm_manager vm_manager;
struct amdgpu_vmhub vmhub[AMDGPU_MAX_VMHUBS];
- unsigned num_vmhubs;
+ DECLARE_BITMAP(vmhubs_mask, AMDGPU_MAX_VMHUBS);
/* memory management */
struct amdgpu_mman mman;
- struct amdgpu_vram_scratch vram_scratch;
+ struct amdgpu_mem_scratch mem_scratch;
struct amdgpu_wb wb;
atomic64_t num_bytes_moved;
atomic64_t num_evictions;
@@ -923,12 +1062,15 @@ struct amdgpu_device {
u32 log2_max_MBps;
} mm_stats;
+ /* discovery*/
+ struct amdgpu_discovery_info discovery;
+
/* display */
bool enable_virtual_display;
struct amdgpu_vkms_output *amdgpu_vkms_output;
struct amdgpu_mode_info mode_info;
/* For pre-DCE11. DCE11 and later are in "struct amdgpu_device->dm" */
- struct work_struct hotplug_work;
+ struct delayed_work hotplug_work;
struct amdgpu_irq_src crtc_irq;
struct amdgpu_irq_src vline0_irq;
struct amdgpu_irq_src vupdate_irq;
@@ -941,6 +1083,7 @@ struct amdgpu_device {
u64 fence_context;
unsigned num_rings;
struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
+ struct dma_fence __rcu *gang_submit;
bool ib_pool_ready;
struct amdgpu_sa_manager ib_pools[AMDGPU_IB_POOL_MAX];
struct amdgpu_sched gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX];
@@ -950,14 +1093,8 @@ struct amdgpu_device {
/* powerplay */
struct amd_powerplay powerplay;
- bool pp_force_state_enabled;
-
- /* smu */
- struct smu_context smu;
-
- /* dpm */
struct amdgpu_pm pm;
- u32 cg_flags;
+ u64 cg_flags;
u32 pg_flags;
/* nbio */
@@ -981,6 +1118,9 @@ struct amdgpu_device {
/* sdma */
struct amdgpu_sdma sdma;
+ /* lsdma */
+ struct amdgpu_lsdma lsdma;
+
/* uvd */
struct amdgpu_uvd uvd;
@@ -993,6 +1133,13 @@ struct amdgpu_device {
/* jpeg */
struct amdgpu_jpeg jpeg;
+ /* vpe */
+ struct amdgpu_vpe vpe;
+
+ /* umsch */
+ struct amdgpu_umsch_mm umsch_mm;
+ bool enable_umsch_mm;
+
/* firmwares */
struct amdgpu_firmware firmware;
@@ -1002,8 +1149,8 @@ struct amdgpu_device {
/* GDS */
struct amdgpu_gds gds;
- /* KFD */
- struct amdgpu_kfd_dev kfd;
+ /* for userq and VM fences */
+ struct amdgpu_seq64 seq64;
/* UMC */
struct amdgpu_umc umc;
@@ -1011,9 +1158,30 @@ struct amdgpu_device {
/* display related functionality */
struct amdgpu_display_manager dm;
+#if defined(CONFIG_DRM_AMD_ISP)
+ /* isp */
+ struct amdgpu_isp isp;
+#endif
+
/* mes */
bool enable_mes;
+ bool enable_mes_kiq;
+ bool enable_uni_mes;
struct amdgpu_mes mes;
+ struct amdgpu_mqd mqds[AMDGPU_HW_IP_NUM];
+ const struct amdgpu_userq_funcs *userq_funcs[AMDGPU_HW_IP_NUM];
+
+ /* xarray used to retrieve the user queue fence driver reference
+ * in the EOP interrupt handler to signal the particular user
+ * queue fence.
+ */
+ struct xarray userq_xa;
+ /**
+ * @userq_doorbell_xa: Global user queue map (doorbell index → queue)
+ * Key: doorbell_index (unique global identifier for the queue)
+ * Value: struct amdgpu_usermode_queue
+ */
+ struct xarray userq_doorbell_xa;
/* df */
struct amdgpu_df df;
@@ -1021,6 +1189,12 @@ struct amdgpu_device {
/* MCA */
struct amdgpu_mca mca;
+ /* ACA */
+ struct amdgpu_aca aca;
+
+ /* CPER */
+ struct amdgpu_cper cper;
+
struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM];
uint32_t harvest_ip_mask;
int num_ip_blocks;
@@ -1034,16 +1208,13 @@ struct amdgpu_device {
/* soc15 register offset based on ip, instance and segment */
uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
+ struct amdgpu_ip_map_info ip_map;
/* delayed work_func for deferring clockgating during resume */
struct delayed_work delayed_init_work;
struct amdgpu_virt virt;
- /* link all shadow bo */
- struct list_head shadow_list;
- struct mutex shadow_list_lock;
-
/* record hw reset is performed */
bool has_hw_reset;
u8 reset_magic[AMDGPU_RESET_MAGIC_NUM];
@@ -1053,10 +1224,9 @@ struct amdgpu_device {
bool in_s3;
bool in_s4;
bool in_s0ix;
+ suspend_state_t last_suspend_state;
- atomic_t in_gpu_reset;
enum pp_mp1_state mp1_state;
- struct rw_semaphore reset_sem;
struct amdgpu_doorbell_index doorbell_index;
struct mutex notifier_lock;
@@ -1069,36 +1239,108 @@ struct amdgpu_device {
long sdma_timeout;
long video_timeout;
long compute_timeout;
+ long psp_timeout;
uint64_t unique_id;
uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS];
/* enable runtime pm on the device */
- bool runpm;
bool in_runpm;
bool has_pr3;
- bool pm_sysfs_en;
bool ucode_sysfs_en;
- /* Chip product information */
- char product_number[16];
- char product_name[32];
- char serial[20];
-
+ struct amdgpu_fru_info *fru_info;
atomic_t throttling_logging_enabled;
struct ratelimit_state throttling_logging_rs;
uint32_t ras_hw_enabled;
uint32_t ras_enabled;
+ bool ras_default_ecc_enabled;
bool no_hw_access;
struct pci_saved_state *pci_state;
pci_channel_state_t pci_channel_state;
+ struct amdgpu_pcie_reset_ctx pcie_reset_ctx;
+
+ /* Track auto wait count on s_barrier settings */
+ bool barrier_has_auto_waitcnt;
+
struct amdgpu_reset_control *reset_cntl;
- uint32_t ip_versions[HW_ID_MAX][HWIP_MAX_INSTANCE];
+ uint32_t ip_versions[MAX_HWIP][HWIP_MAX_INSTANCE];
+
+ bool ram_is_direct_mapped;
+
+ struct list_head ras_list;
+
+ struct amdgpu_reset_domain *reset_domain;
+
+ struct mutex benchmark_mutex;
+
+ bool scpm_enabled;
+ uint32_t scpm_status;
+
+ struct work_struct reset_work;
+
+ bool dc_enabled;
+ /* Mask of active clusters */
+ uint32_t aid_mask;
+
+ /* Debug */
+ bool debug_vm;
+ bool debug_largebar;
+ bool debug_disable_soft_recovery;
+ bool debug_use_vram_fw_buf;
+ bool debug_enable_ras_aca;
+ bool debug_exp_resets;
+ bool debug_disable_gpu_ring_reset;
+ bool debug_vm_userptr;
+ bool debug_disable_ce_logs;
+ bool debug_enable_ce_cs;
+
+ /* Protection for the following isolation structure */
+ struct mutex enforce_isolation_mutex;
+ enum amdgpu_enforce_isolation_mode enforce_isolation[MAX_XCP];
+ struct amdgpu_isolation {
+ void *owner;
+ struct dma_fence *spearhead;
+ struct amdgpu_sync active;
+ struct amdgpu_sync prev;
+ } isolation[MAX_XCP];
+
+ struct amdgpu_init_level *init_lvl;
+
+ /* This flag is used to determine how VRAM allocations are handled for APUs
+ * in KFD: VRAM or GTT.
+ */
+ bool apu_prefer_gtt;
+
+ bool userq_halt_for_enforce_isolation;
+ struct work_struct userq_reset_work;
+ struct amdgpu_uid *uid_info;
+
+ /* KFD
+ * Must be last --ends in a flexible-array member.
+ */
+ struct amdgpu_kfd_dev kfd;
};
+static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
+ uint8_t ip, uint8_t inst)
+{
+ /* This considers only major/minor/rev and ignores
+ * subrevision/variant fields.
+ */
+ return adev->ip_versions[ip][inst] & ~0xFFU;
+}
+
+static inline uint32_t amdgpu_ip_version_full(const struct amdgpu_device *adev,
+ uint8_t ip, uint8_t inst)
+{
+ /* This returns full version - major/minor/rev/variant/subrevision */
+ return adev->ip_versions[ip][inst];
+}
+
static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
{
return container_of(ddev, struct amdgpu_device, ddev);
@@ -1114,6 +1356,11 @@ static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_device *bdev)
return container_of(bdev, struct amdgpu_device, mman.bdev);
}
+static inline bool amdgpu_is_multi_aid(struct amdgpu_device *adev)
+{
+ return !!adev->aid_mask;
+}
+
int amdgpu_device_init(struct amdgpu_device *adev,
uint32_t flags);
void amdgpu_device_fini_hw(struct amdgpu_device *adev);
@@ -1128,38 +1375,57 @@ size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
void *buf, size_t size, bool write);
+uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
+ uint32_t inst, uint32_t reg_addr, char reg_name[],
+ uint32_t expected_value, uint32_t mask);
uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t acc_flags);
+u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
+ u64 reg_addr);
+uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t acc_flags,
+ uint32_t xcc_id);
void amdgpu_device_wreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t v,
uint32_t acc_flags);
+void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
+ u64 reg_addr, u32 reg_data);
+void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t v,
+ uint32_t acc_flags,
+ uint32_t xcc_id);
void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
- uint32_t reg, uint32_t v);
+ uint32_t reg, uint32_t v, uint32_t xcc_id);
void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value);
uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset);
u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
- u32 pcie_index, u32 pcie_data,
u32 reg_addr);
u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
- u32 pcie_index, u32 pcie_data,
u32 reg_addr);
+u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
+ u64 reg_addr);
void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
- u32 pcie_index, u32 pcie_data,
u32 reg_addr, u32 reg_data);
void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
- u32 pcie_index, u32 pcie_data,
u32 reg_addr, u64 reg_data);
-
-bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
+void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
+ u64 reg_addr, u64 reg_data);
+u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev);
+bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev,
+ enum amd_asic_type asic_type);
bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);
+void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev);
+
int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
struct amdgpu_reset_context *reset_context);
int amdgpu_do_asic_reset(struct list_head *device_list_handle,
struct amdgpu_reset_context *reset_context);
+int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context);
+
int emu_soc_asic_init(struct amdgpu_device *adev);
/*
@@ -1171,8 +1437,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
#define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ)
-#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg))
-#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v))
+#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg), 0)
+#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v), 0)
#define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
#define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))
@@ -1182,12 +1448,18 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0)
#define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
#define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
+#define RREG32_XCC(reg, inst) amdgpu_device_xcc_rreg(adev, (reg), 0, inst)
+#define WREG32_XCC(reg, v, inst) amdgpu_device_xcc_wreg(adev, (reg), (v), 0, inst)
#define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
#define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))
#define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))
#define WREG32_PCIE_PORT(reg, v) adev->pciep_wreg(adev, (reg), (v))
+#define RREG32_PCIE_EXT(reg) adev->pcie_rreg_ext(adev, (reg))
+#define WREG32_PCIE_EXT(reg, v) adev->pcie_wreg_ext(adev, (reg), (v))
#define RREG64_PCIE(reg) adev->pcie_rreg64(adev, (reg))
#define WREG64_PCIE(reg, v) adev->pcie_wreg64(adev, (reg), (v))
+#define RREG64_PCIE_EXT(reg) adev->pcie_rreg64_ext(adev, (reg))
+#define WREG64_PCIE_EXT(reg, v) adev->pcie_wreg64_ext(adev, (reg), (v))
#define RREG32_SMC(reg) adev->smc_rreg(adev, (reg))
#define WREG32_SMC(reg, v) adev->smc_wreg(adev, (reg), (v))
#define RREG32_UVD_CTX(reg) adev->uvd_ctx_rreg(adev, (reg))
@@ -1243,6 +1515,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define WREG32_FIELD_OFFSET(reg, offset, field, val) \
WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
+#define AMDGPU_GET_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> (l))
/*
* BIOS helpers.
*/
@@ -1253,7 +1526,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
/*
* ASICs macro.
*/
-#define amdgpu_asic_set_vga_state(adev, state) (adev)->asic_funcs->set_vga_state((adev), (state))
+#define amdgpu_asic_set_vga_state(adev, state) \
+ ((adev)->asic_funcs->set_vga_state ? (adev)->asic_funcs->set_vga_state((adev), (state)) : 0)
#define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev))
#define amdgpu_asic_reset_method(adev) (adev)->asic_funcs->reset_method((adev))
#define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev))
@@ -1266,10 +1540,6 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
#define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v)))
#define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
-#define amdgpu_asic_flush_hdp(adev, r) \
- ((adev)->asic_funcs->flush_hdp ? (adev)->asic_funcs->flush_hdp((adev), (r)) : (adev)->hdp.funcs->flush_hdp((adev), (r)))
-#define amdgpu_asic_invalidate_hdp(adev, r) \
- ((adev)->asic_funcs->invalidate_hdp ? (adev)->asic_funcs->invalidate_hdp((adev), (r)) : (adev)->hdp.funcs->invalidate_hdp((adev), (r)))
#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))
#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
@@ -1281,18 +1551,24 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
((adev)->asic_funcs->update_umd_stable_pstate ? (adev)->asic_funcs->update_umd_stable_pstate((adev), (enter)) : 0)
#define amdgpu_asic_query_video_codecs(adev, e, c) (adev)->asic_funcs->query_video_codecs((adev), (e), (c))
-#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter));
+#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter))
-#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+#define BIT_MASK_UPPER(i) ((i) >= BITS_PER_LONG ? 0 : ~0UL << (i))
+#define for_each_inst(i, inst_mask) \
+ for (i = ffs(inst_mask); i-- != 0; \
+ i = ffs(inst_mask & BIT_MASK_UPPER(i + 1)))
/* Common functions */
bool amdgpu_device_has_job_running(struct amdgpu_device *adev);
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
- struct amdgpu_job* job);
+ struct amdgpu_job *job,
+ struct amdgpu_reset_context *reset_context);
void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
int amdgpu_device_pci_reset(struct amdgpu_device *adev);
bool amdgpu_device_need_post(struct amdgpu_device *adev);
+bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev);
+bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev);
void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
u64 num_vis_bytes);
@@ -1302,44 +1578,53 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
const u32 array_size);
int amdgpu_device_mode1_reset(struct amdgpu_device *adev);
-bool amdgpu_device_supports_atpx(struct drm_device *dev);
-bool amdgpu_device_supports_px(struct drm_device *dev);
-bool amdgpu_device_supports_boco(struct drm_device *dev);
-bool amdgpu_device_supports_smart_shift(struct drm_device *dev);
-bool amdgpu_device_supports_baco(struct drm_device *dev);
+int amdgpu_device_link_reset(struct amdgpu_device *adev);
+bool amdgpu_device_supports_atpx(struct amdgpu_device *adev);
+bool amdgpu_device_supports_px(struct amdgpu_device *adev);
+bool amdgpu_device_supports_boco(struct amdgpu_device *adev);
+bool amdgpu_device_supports_smart_shift(struct amdgpu_device *adev);
+int amdgpu_device_supports_baco(struct amdgpu_device *adev);
+void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev);
bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
struct amdgpu_device *peer_adev);
-int amdgpu_device_baco_enter(struct drm_device *dev);
-int amdgpu_device_baco_exit(struct drm_device *dev);
+int amdgpu_device_baco_enter(struct amdgpu_device *adev);
+int amdgpu_device_baco_exit(struct amdgpu_device *adev);
void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
+void amdgpu_device_halt(struct amdgpu_device *adev);
+u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
+ u32 reg);
+void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
+ u32 reg, u32 v);
+struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev);
+struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
+ struct dma_fence *gang);
+struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ struct amdgpu_job *job);
+bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev);
+ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring);
+ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset);
+
/* atpx handler */
#if defined(CONFIG_VGA_SWITCHEROO)
void amdgpu_register_atpx_handler(void);
void amdgpu_unregister_atpx_handler(void);
bool amdgpu_has_atpx_dgpu_power_cntl(void);
bool amdgpu_is_atpx_hybrid(void);
-bool amdgpu_atpx_dgpu_req_power_for_displays(void);
bool amdgpu_has_atpx(void);
#else
static inline void amdgpu_register_atpx_handler(void) {}
static inline void amdgpu_unregister_atpx_handler(void) {}
static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; }
static inline bool amdgpu_is_atpx_hybrid(void) { return false; }
-static inline bool amdgpu_atpx_dgpu_req_power_for_displays(void) { return false; }
static inline bool amdgpu_has_atpx(void) { return false; }
#endif
-#if defined(CONFIG_VGA_SWITCHEROO) && defined(CONFIG_ACPI)
-void *amdgpu_atpx_get_dhandle(void);
-#else
-static inline void *amdgpu_atpx_get_dhandle(void) { return NULL; }
-#endif
-
/*
* KMS
*/
@@ -1348,20 +1633,18 @@ extern const int amdgpu_max_kms_ioctl;
int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags);
void amdgpu_driver_unload_kms(struct drm_device *dev);
-void amdgpu_driver_lastclose_kms(struct drm_device *dev);
int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv);
void amdgpu_driver_postclose_kms(struct drm_device *dev,
struct drm_file *file_priv);
void amdgpu_driver_release_kms(struct drm_device *dev);
-int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
+int amdgpu_device_prepare(struct drm_device *dev);
+void amdgpu_device_complete(struct drm_device *dev);
int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc);
int amdgpu_enable_vblank_kms(struct drm_crtc *crtc);
void amdgpu_disable_vblank_kms(struct drm_crtc *crtc);
-long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
- unsigned long arg);
int amdgpu_info_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
@@ -1386,6 +1669,12 @@ struct amdgpu_afmt_acr amdgpu_afmt_acr(uint32_t clock);
/* amdgpu_acpi.c */
+struct amdgpu_numa_info {
+ uint64_t size;
+ int pxm;
+ int nid;
+};
+
/* ATCS Device/Driver State */
#define AMDGPU_ATCS_PSC_DEV_STATE_D0 0
#define AMDGPU_ATCS_PSC_DEV_STATE_D3_HOT 3
@@ -1401,34 +1690,57 @@ int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
u8 perf_req, bool advertise);
int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
u8 dev_state, bool drv_state);
-int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_state);
+int amdgpu_acpi_smart_shift_update(struct amdgpu_device *adev,
+ enum amdgpu_ss ss_state);
int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev);
+int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset,
+ u64 *tmr_size);
+int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev, int xcc_id,
+ struct amdgpu_numa_info *numa_info);
void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps);
-bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev);
+bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev);
void amdgpu_acpi_detect(void);
+void amdgpu_acpi_release(void);
#else
static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; }
+static inline int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev,
+ u64 *tmr_offset, u64 *tmr_size)
+{
+ return -EINVAL;
+}
+static inline int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev,
+ int xcc_id,
+ struct amdgpu_numa_info *numa_info)
+{
+ return -EINVAL;
+}
static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
-static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; }
+static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { return false; }
static inline void amdgpu_acpi_detect(void) { }
+static inline void amdgpu_acpi_release(void) { }
static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return false; }
static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
u8 dev_state, bool drv_state) { return 0; }
-static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev,
- enum amdgpu_ss ss_state) { return 0; }
+static inline int amdgpu_acpi_smart_shift_update(struct amdgpu_device *adev,
+ enum amdgpu_ss ss_state)
+{
+ return 0;
+}
+static inline void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps) { }
#endif
-int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
- uint64_t addr, struct amdgpu_bo **bo,
- struct amdgpu_bo_va_mapping **mapping);
-
-#if defined(CONFIG_DRM_AMD_DC)
-int amdgpu_dm_display_resume(struct amdgpu_device *adev );
+#if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
+bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev);
+bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev);
#else
-static inline int amdgpu_dm_display_resume(struct amdgpu_device *adev) { return 0; }
+static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; }
+static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; }
#endif
+#if defined(CONFIG_DRM_AMD_ISP)
+int amdgpu_acpi_get_isp4_dev(struct acpi_device **dev);
+#endif
void amdgpu_register_gpu_instance(struct amdgpu_device *adev);
void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev);
@@ -1449,6 +1761,15 @@ int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
enum amd_powergating_state state);
+static inline bool amdgpu_device_has_timeouts_enabled(struct amdgpu_device *adev)
+{
+ return amdgpu_gpu_recovery != 0 &&
+ adev->gfx_timeout != MAX_SCHEDULE_TIMEOUT &&
+ adev->compute_timeout != MAX_SCHEDULE_TIMEOUT &&
+ adev->sdma_timeout != MAX_SCHEDULE_TIMEOUT &&
+ adev->video_timeout != MAX_SCHEDULE_TIMEOUT;
+}
+
#include "amdgpu_object.h"
static inline bool amdgpu_is_tmz(struct amdgpu_device *adev)
@@ -1456,8 +1777,32 @@ static inline bool amdgpu_is_tmz(struct amdgpu_device *adev)
return adev->gmc.tmz_enabled;
}
-static inline int amdgpu_in_reset(struct amdgpu_device *adev)
+int amdgpu_in_reset(struct amdgpu_device *adev);
+
+extern const struct attribute_group amdgpu_vram_mgr_attr_group;
+extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
+extern const struct attribute_group amdgpu_flash_attr_group;
+
+void amdgpu_set_init_level(struct amdgpu_device *adev,
+ enum amdgpu_init_lvl_id lvl);
+
+static inline int amdgpu_device_bus_status_check(struct amdgpu_device *adev)
{
- return atomic_read(&adev->in_gpu_reset);
+ u32 status;
+ int r;
+
+ r = pci_read_config_dword(adev->pdev, PCI_COMMAND, &status);
+ if (r || PCI_POSSIBLE_ERROR(status)) {
+ dev_err(adev->dev, "device lost from bus!");
+ return -ENODEV;
+ }
+
+ return 0;
}
+
+void amdgpu_device_set_uid(struct amdgpu_uid *uid_info,
+ enum amdgpu_uid_type type, uint8_t inst,
+ uint64_t uid);
+uint64_t amdgpu_device_get_uid(struct amdgpu_uid *uid_info,
+ enum amdgpu_uid_type type, uint8_t inst);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
new file mode 100644
index 000000000000..9b3180449150
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
@@ -0,0 +1,984 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/list.h>
+#include "amdgpu.h"
+#include "amdgpu_aca.h"
+#include "amdgpu_ras.h"
+
+#define ACA_BANK_HWID(type, hwid, mcatype) [ACA_HWIP_TYPE_##type] = {hwid, mcatype}
+
+typedef int bank_handler_t(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data);
+
+static struct aca_hwip aca_hwid_mcatypes[ACA_HWIP_TYPE_COUNT] = {
+ ACA_BANK_HWID(SMU, 0x01, 0x01),
+ ACA_BANK_HWID(PCS_XGMI, 0x50, 0x00),
+ ACA_BANK_HWID(UMC, 0x96, 0x00),
+};
+
+static void aca_banks_init(struct aca_banks *banks)
+{
+ if (!banks)
+ return;
+
+ memset(banks, 0, sizeof(*banks));
+ INIT_LIST_HEAD(&banks->list);
+}
+
+static int aca_banks_add_bank(struct aca_banks *banks, struct aca_bank *bank)
+{
+ struct aca_bank_node *node;
+
+ if (!bank)
+ return -EINVAL;
+
+ node = kvzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
+ memcpy(&node->bank, bank, sizeof(*bank));
+
+ INIT_LIST_HEAD(&node->node);
+ list_add_tail(&node->node, &banks->list);
+
+ banks->nr_banks++;
+
+ return 0;
+}
+
+static void aca_banks_release(struct aca_banks *banks)
+{
+ struct aca_bank_node *node, *tmp;
+
+ if (list_empty(&banks->list))
+ return;
+
+ list_for_each_entry_safe(node, tmp, &banks->list, node) {
+ list_del(&node->node);
+ kvfree(node);
+ banks->nr_banks--;
+ }
+}
+
+static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev, enum aca_smu_type type, u32 *count)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+ const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
+
+ if (!count)
+ return -EINVAL;
+
+ if (!smu_funcs || !smu_funcs->get_valid_aca_count)
+ return -EOPNOTSUPP;
+
+ return smu_funcs->get_valid_aca_count(adev, type, count);
+}
+
+static struct aca_regs_dump {
+ const char *name;
+ int reg_idx;
+} aca_regs[] = {
+ {"CONTROL", ACA_REG_IDX_CTL},
+ {"STATUS", ACA_REG_IDX_STATUS},
+ {"ADDR", ACA_REG_IDX_ADDR},
+ {"MISC", ACA_REG_IDX_MISC0},
+ {"CONFIG", ACA_REG_IDX_CONFIG},
+ {"IPID", ACA_REG_IDX_IPID},
+ {"SYND", ACA_REG_IDX_SYND},
+ {"DESTAT", ACA_REG_IDX_DESTAT},
+ {"DEADDR", ACA_REG_IDX_DEADDR},
+ {"CONTROL_MASK", ACA_REG_IDX_CTL_MASK},
+};
+
+static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, struct aca_bank *bank,
+ struct ras_query_context *qctx)
+{
+ u64 event_id = qctx ? qctx->evid.event_id : RAS_EVENT_INVALID_ID;
+ int i;
+
+ if (adev->debug_disable_ce_logs &&
+ bank->smu_err_type == ACA_SMU_TYPE_CE &&
+ !ACA_BANK_ERR_IS_DEFFERED(bank))
+ return;
+
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n");
+ /* plus 1 for output format, e.g: ACA[08/08]: xxxx */
+ for (i = 0; i < ARRAY_SIZE(aca_regs); i++)
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "ACA[%02d/%02d].%s=0x%016llx\n",
+ idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]);
+
+ if (ACA_REG__STATUS__SCRUB(bank->regs[ACA_REG_IDX_STATUS]))
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "hardware error logged by the scrubber\n");
+}
+
+static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum aca_hwip_type type)
+{
+
+ struct aca_hwip *hwip;
+ int hwid, mcatype;
+ u64 ipid;
+
+ if (!bank || type == ACA_HWIP_TYPE_UNKNOW)
+ return false;
+
+ hwip = &aca_hwid_mcatypes[type];
+ if (!hwip->hwid)
+ return false;
+
+ ipid = bank->regs[ACA_REG_IDX_IPID];
+ hwid = ACA_REG__IPID__HARDWAREID(ipid);
+ mcatype = ACA_REG__IPID__MCATYPE(ipid);
+
+ return hwip->hwid == hwid && hwip->mcatype == mcatype;
+}
+
+static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_type type,
+ int start, int count,
+ struct aca_banks *banks, struct ras_query_context *qctx)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+ const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
+ struct aca_bank bank;
+ int i, max_count, ret;
+
+ if (!count)
+ return 0;
+
+ if (!smu_funcs || !smu_funcs->get_valid_aca_bank)
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ max_count = smu_funcs->max_ue_bank_count;
+ break;
+ case ACA_SMU_TYPE_CE:
+ max_count = smu_funcs->max_ce_bank_count;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (start + count > max_count)
+ return -EINVAL;
+
+ count = min_t(int, count, max_count);
+ for (i = 0; i < count; i++) {
+ memset(&bank, 0, sizeof(bank));
+ ret = smu_funcs->get_valid_aca_bank(adev, type, start + i, &bank);
+ if (ret)
+ return ret;
+
+ bank.smu_err_type = type;
+
+ /*
+ * Poison being consumed when injecting a UE while running background workloads,
+ * which are unexpected.
+ */
+ if (type == ACA_SMU_TYPE_UE &&
+ ACA_REG__STATUS__POISON(bank.regs[ACA_REG_IDX_STATUS]) &&
+ !aca_bank_hwip_is_matched(&bank, ACA_HWIP_TYPE_UMC))
+ continue;
+
+ aca_smu_bank_dump(adev, i, count, &bank, qctx);
+
+ ret = aca_banks_add_bank(banks, &bank);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type)
+{
+ const struct aca_bank_ops *bank_ops = handle->bank_ops;
+
+ /* Parse all deferred errors with UMC aca handle */
+ if (ACA_BANK_ERR_IS_DEFFERED(bank))
+ return handle->hwip == ACA_HWIP_TYPE_UMC;
+
+ if (!aca_bank_hwip_is_matched(bank, handle->hwip))
+ return false;
+
+ if (!bank_ops->aca_bank_is_valid)
+ return true;
+
+ return bank_ops->aca_bank_is_valid(handle, bank, type, handle->data);
+}
+
+static struct aca_bank_error *new_bank_error(struct aca_error *aerr, struct aca_bank_info *info)
+{
+ struct aca_bank_error *bank_error;
+
+ bank_error = kvzalloc(sizeof(*bank_error), GFP_KERNEL);
+ if (!bank_error)
+ return NULL;
+
+ INIT_LIST_HEAD(&bank_error->node);
+ memcpy(&bank_error->info, info, sizeof(*info));
+
+ mutex_lock(&aerr->lock);
+ list_add_tail(&bank_error->node, &aerr->list);
+ aerr->nr_errors++;
+ mutex_unlock(&aerr->lock);
+
+ return bank_error;
+}
+
+static struct aca_bank_error *find_bank_error(struct aca_error *aerr, struct aca_bank_info *info)
+{
+ struct aca_bank_error *bank_error = NULL;
+ struct aca_bank_info *tmp_info;
+ bool found = false;
+
+ mutex_lock(&aerr->lock);
+ list_for_each_entry(bank_error, &aerr->list, node) {
+ tmp_info = &bank_error->info;
+ if (tmp_info->socket_id == info->socket_id &&
+ tmp_info->die_id == info->die_id) {
+ found = true;
+ goto out_unlock;
+ }
+ }
+
+out_unlock:
+ mutex_unlock(&aerr->lock);
+
+ return found ? bank_error : NULL;
+}
+
+static void aca_bank_error_remove(struct aca_error *aerr, struct aca_bank_error *bank_error)
+{
+ if (!aerr || !bank_error)
+ return;
+
+ list_del(&bank_error->node);
+ aerr->nr_errors--;
+
+ kvfree(bank_error);
+}
+
+static struct aca_bank_error *get_bank_error(struct aca_error *aerr, struct aca_bank_info *info)
+{
+ struct aca_bank_error *bank_error;
+
+ if (!aerr || !info)
+ return NULL;
+
+ bank_error = find_bank_error(aerr, info);
+ if (bank_error)
+ return bank_error;
+
+ return new_bank_error(aerr, info);
+}
+
+int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info,
+ enum aca_error_type type, u64 count)
+{
+ struct aca_error_cache *error_cache = &handle->error_cache;
+ struct aca_bank_error *bank_error;
+ struct aca_error *aerr;
+
+ if (!handle || !info || type >= ACA_ERROR_TYPE_COUNT)
+ return -EINVAL;
+
+ if (!count)
+ return 0;
+
+ aerr = &error_cache->errors[type];
+ bank_error = get_bank_error(aerr, info);
+ if (!bank_error)
+ return -ENOMEM;
+
+ bank_error->count += count;
+
+ return 0;
+}
+
+static int aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type)
+{
+ const struct aca_bank_ops *bank_ops = handle->bank_ops;
+
+ if (!bank)
+ return -EINVAL;
+
+ if (!bank_ops->aca_bank_parser)
+ return -EOPNOTSUPP;
+
+ return bank_ops->aca_bank_parser(handle, bank, type,
+ handle->data);
+}
+
+static int handler_aca_log_bank_error(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ int ret;
+
+ ret = aca_bank_parser(handle, bank, type);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int aca_dispatch_bank(struct aca_handle_manager *mgr, struct aca_bank *bank,
+ enum aca_smu_type type, bank_handler_t handler, void *data)
+{
+ struct aca_handle *handle;
+ int ret;
+
+ if (list_empty(&mgr->list))
+ return 0;
+
+ list_for_each_entry(handle, &mgr->list, node) {
+ if (!aca_bank_is_valid(handle, bank, type))
+ continue;
+
+ ret = handler(handle, bank, type, data);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int aca_dispatch_banks(struct aca_handle_manager *mgr, struct aca_banks *banks,
+ enum aca_smu_type type, bank_handler_t handler, void *data)
+{
+ struct aca_bank_node *node;
+ struct aca_bank *bank;
+ int ret;
+
+ if (!mgr || !banks)
+ return -EINVAL;
+
+ /* pre check to avoid unnecessary operations */
+ if (list_empty(&mgr->list) || list_empty(&banks->list))
+ return 0;
+
+ list_for_each_entry(node, &banks->list, node) {
+ bank = &node->bank;
+
+ ret = aca_dispatch_bank(mgr, bank, type, handler, data);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static bool aca_bank_should_update(struct amdgpu_device *adev, enum aca_smu_type type)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+ bool ret = true;
+
+ /*
+ * Because the UE Valid MCA count will only be cleared after reset,
+ * in order to avoid repeated counting of the error count,
+ * the aca bank is only updated once during the gpu recovery stage.
+ */
+ if (type == ACA_SMU_TYPE_UE) {
+ if (amdgpu_ras_intr_triggered())
+ ret = atomic_cmpxchg(&aca->ue_update_flag, 0, 1) == 0;
+ else
+ atomic_set(&aca->ue_update_flag, 0);
+ }
+
+ return ret;
+}
+
+static void aca_banks_generate_cper(struct amdgpu_device *adev,
+ enum aca_smu_type type,
+ struct aca_banks *banks,
+ int count)
+{
+ struct aca_bank_node *node;
+ struct aca_bank *bank;
+ int r;
+
+ if (!adev->cper.enabled)
+ return;
+
+ if (!banks || !count) {
+ dev_warn(adev->dev, "fail to generate cper records\n");
+ return;
+ }
+
+ /* UEs must be encoded into separate CPER entries */
+ if (type == ACA_SMU_TYPE_UE) {
+ struct aca_banks de_banks;
+
+ aca_banks_init(&de_banks);
+ list_for_each_entry(node, &banks->list, node) {
+ bank = &node->bank;
+ if (bank->aca_err_type == ACA_ERROR_TYPE_DEFERRED) {
+ r = aca_banks_add_bank(&de_banks, bank);
+ if (r)
+ dev_warn(adev->dev, "fail to add de banks, ret = %d\n", r);
+ } else {
+ if (amdgpu_cper_generate_ue_record(adev, bank))
+ dev_warn(adev->dev, "fail to generate ue cper records\n");
+ }
+ }
+
+ if (!list_empty(&de_banks.list)) {
+ if (amdgpu_cper_generate_ce_records(adev, &de_banks, de_banks.nr_banks))
+ dev_warn(adev->dev, "fail to generate de cper records\n");
+ }
+
+ aca_banks_release(&de_banks);
+ } else {
+ /*
+ * SMU_TYPE_CE banks are combined into 1 CPER entries,
+ * they could be CEs or DEs or both
+ */
+ if (amdgpu_cper_generate_ce_records(adev, banks, count))
+ dev_warn(adev->dev, "fail to generate ce cper records\n");
+ }
+}
+
+static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type,
+ bank_handler_t handler, struct ras_query_context *qctx, void *data)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+ struct aca_banks banks;
+ u32 count = 0;
+ int ret;
+
+ if (list_empty(&aca->mgr.list))
+ return 0;
+
+ if (!aca_bank_should_update(adev, type))
+ return 0;
+
+ ret = aca_smu_get_valid_aca_count(adev, type, &count);
+ if (ret)
+ return ret;
+
+ if (!count)
+ return 0;
+
+ aca_banks_init(&banks);
+
+ ret = aca_smu_get_valid_aca_banks(adev, type, 0, count, &banks, qctx);
+ if (ret)
+ goto err_release_banks;
+
+ if (list_empty(&banks.list)) {
+ ret = 0;
+ goto err_release_banks;
+ }
+
+ ret = aca_dispatch_banks(&aca->mgr, &banks, type,
+ handler, data);
+ if (ret)
+ goto err_release_banks;
+
+ aca_banks_generate_cper(adev, type, &banks, count);
+
+err_release_banks:
+ aca_banks_release(&banks);
+
+ return ret;
+}
+
+static int aca_log_aca_error_data(struct aca_bank_error *bank_error, enum aca_error_type type, struct ras_err_data *err_data)
+{
+ struct aca_bank_info *info;
+ struct amdgpu_smuio_mcm_config_info mcm_info;
+ u64 count;
+
+ if (type >= ACA_ERROR_TYPE_COUNT)
+ return -EINVAL;
+
+ count = bank_error->count;
+ if (!count)
+ return 0;
+
+ info = &bank_error->info;
+ mcm_info.die_id = info->die_id;
+ mcm_info.socket_id = info->socket_id;
+
+ switch (type) {
+ case ACA_ERROR_TYPE_UE:
+ amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, count);
+ break;
+ case ACA_ERROR_TYPE_CE:
+ amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, count);
+ break;
+ case ACA_ERROR_TYPE_DEFERRED:
+ amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, count);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int aca_log_aca_error(struct aca_handle *handle, enum aca_error_type type, struct ras_err_data *err_data)
+{
+ struct aca_error_cache *error_cache = &handle->error_cache;
+ struct aca_error *aerr = &error_cache->errors[type];
+ struct aca_bank_error *bank_error, *tmp;
+
+ mutex_lock(&aerr->lock);
+
+ if (list_empty(&aerr->list))
+ goto out_unlock;
+
+ list_for_each_entry_safe(bank_error, tmp, &aerr->list, node) {
+ aca_log_aca_error_data(bank_error, type, err_data);
+ aca_bank_error_remove(aerr, bank_error);
+ }
+
+out_unlock:
+ mutex_unlock(&aerr->lock);
+
+ return 0;
+}
+
+static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, enum aca_error_type type,
+ struct ras_err_data *err_data, struct ras_query_context *qctx)
+{
+ enum aca_smu_type smu_type;
+ int ret;
+
+ switch (type) {
+ case ACA_ERROR_TYPE_UE:
+ smu_type = ACA_SMU_TYPE_UE;
+ break;
+ case ACA_ERROR_TYPE_CE:
+ case ACA_ERROR_TYPE_DEFERRED:
+ smu_type = ACA_SMU_TYPE_CE;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* update aca bank to aca source error_cache first */
+ ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, qctx, NULL);
+ if (ret)
+ return ret;
+
+ /* DEs may contain in CEs or UEs */
+ if (type != ACA_ERROR_TYPE_DEFERRED)
+ aca_log_aca_error(handle, ACA_ERROR_TYPE_DEFERRED, err_data);
+
+ return aca_log_aca_error(handle, type, err_data);
+}
+
+static bool aca_handle_is_valid(struct aca_handle *handle)
+{
+ if (!handle->mask || !list_empty(&handle->node))
+ return false;
+
+ return true;
+}
+
+int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle,
+ enum aca_error_type type, struct ras_err_data *err_data,
+ struct ras_query_context *qctx)
+{
+ if (!handle || !err_data)
+ return -EINVAL;
+
+ if (aca_handle_is_valid(handle))
+ return -EOPNOTSUPP;
+
+ if ((type < 0) || (!(BIT(type) & handle->mask)))
+ return 0;
+
+ return __aca_get_error_data(adev, handle, type, err_data, qctx);
+}
+
+static void aca_error_init(struct aca_error *aerr, enum aca_error_type type)
+{
+ mutex_init(&aerr->lock);
+ INIT_LIST_HEAD(&aerr->list);
+ aerr->type = type;
+ aerr->nr_errors = 0;
+}
+
+static void aca_init_error_cache(struct aca_handle *handle)
+{
+ struct aca_error_cache *error_cache = &handle->error_cache;
+ int type;
+
+ for (type = ACA_ERROR_TYPE_UE; type < ACA_ERROR_TYPE_COUNT; type++)
+ aca_error_init(&error_cache->errors[type], type);
+}
+
+static void aca_error_fini(struct aca_error *aerr)
+{
+ struct aca_bank_error *bank_error, *tmp;
+
+ mutex_lock(&aerr->lock);
+ if (list_empty(&aerr->list))
+ goto out_unlock;
+
+ list_for_each_entry_safe(bank_error, tmp, &aerr->list, node)
+ aca_bank_error_remove(aerr, bank_error);
+
+out_unlock:
+ mutex_destroy(&aerr->lock);
+}
+
+static void aca_fini_error_cache(struct aca_handle *handle)
+{
+ struct aca_error_cache *error_cache = &handle->error_cache;
+ int type;
+
+ for (type = ACA_ERROR_TYPE_UE; type < ACA_ERROR_TYPE_COUNT; type++)
+ aca_error_fini(&error_cache->errors[type]);
+}
+
+static int add_aca_handle(struct amdgpu_device *adev, struct aca_handle_manager *mgr, struct aca_handle *handle,
+ const char *name, const struct aca_info *ras_info, void *data)
+{
+ memset(handle, 0, sizeof(*handle));
+
+ handle->adev = adev;
+ handle->mgr = mgr;
+ handle->name = name;
+ handle->hwip = ras_info->hwip;
+ handle->mask = ras_info->mask;
+ handle->bank_ops = ras_info->bank_ops;
+ handle->data = data;
+ aca_init_error_cache(handle);
+
+ INIT_LIST_HEAD(&handle->node);
+ list_add_tail(&handle->node, &mgr->list);
+ mgr->nr_handles++;
+
+ return 0;
+}
+
+static ssize_t aca_sysfs_read(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct aca_handle *handle = container_of(attr, struct aca_handle, aca_attr);
+
+ /* NOTE: the aca cache will be auto cleared once read,
+ * So the driver should unify the query entry point, forward request to ras query interface directly */
+ return amdgpu_ras_aca_sysfs_read(dev, attr, handle, buf, handle->data);
+}
+
+static int add_aca_sysfs(struct amdgpu_device *adev, struct aca_handle *handle)
+{
+ struct device_attribute *aca_attr = &handle->aca_attr;
+
+ snprintf(handle->attr_name, sizeof(handle->attr_name) - 1, "aca_%s", handle->name);
+ aca_attr->show = aca_sysfs_read;
+ aca_attr->attr.name = handle->attr_name;
+ aca_attr->attr.mode = S_IRUGO;
+ sysfs_attr_init(&aca_attr->attr);
+
+ return sysfs_add_file_to_group(&adev->dev->kobj,
+ &aca_attr->attr,
+ "ras");
+}
+
+int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle,
+ const char *name, const struct aca_info *ras_info, void *data)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+ int ret;
+
+ if (!amdgpu_aca_is_enabled(adev))
+ return 0;
+
+ ret = add_aca_handle(adev, &aca->mgr, handle, name, ras_info, data);
+ if (ret)
+ return ret;
+
+ return add_aca_sysfs(adev, handle);
+}
+
+static void remove_aca_handle(struct aca_handle *handle)
+{
+ struct aca_handle_manager *mgr = handle->mgr;
+
+ aca_fini_error_cache(handle);
+ list_del(&handle->node);
+ mgr->nr_handles--;
+}
+
+static void remove_aca_sysfs(struct aca_handle *handle)
+{
+ struct amdgpu_device *adev = handle->adev;
+ struct device_attribute *aca_attr = &handle->aca_attr;
+
+ if (adev->dev->kobj.sd)
+ sysfs_remove_file_from_group(&adev->dev->kobj,
+ &aca_attr->attr,
+ "ras");
+}
+
+void amdgpu_aca_remove_handle(struct aca_handle *handle)
+{
+ if (!handle || list_empty(&handle->node))
+ return;
+
+ remove_aca_sysfs(handle);
+ remove_aca_handle(handle);
+}
+
+static int aca_manager_init(struct aca_handle_manager *mgr)
+{
+ INIT_LIST_HEAD(&mgr->list);
+ mgr->nr_handles = 0;
+
+ return 0;
+}
+
+static void aca_manager_fini(struct aca_handle_manager *mgr)
+{
+ struct aca_handle *handle, *tmp;
+
+ if (list_empty(&mgr->list))
+ return;
+
+ list_for_each_entry_safe(handle, tmp, &mgr->list, node)
+ amdgpu_aca_remove_handle(handle);
+}
+
+bool amdgpu_aca_is_enabled(struct amdgpu_device *adev)
+{
+ return (adev->aca.is_enabled ||
+ adev->debug_enable_ras_aca);
+}
+
+int amdgpu_aca_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+ int ret;
+
+ atomic_set(&aca->ue_update_flag, 0);
+
+ ret = aca_manager_init(&aca->mgr);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+void amdgpu_aca_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+
+ aca_manager_fini(&aca->mgr);
+
+ atomic_set(&aca->ue_update_flag, 0);
+}
+
+int amdgpu_aca_reset(struct amdgpu_device *adev)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+
+ atomic_set(&aca->ue_update_flag, 0);
+
+ return 0;
+}
+
+void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+
+ WARN_ON(aca->smu_funcs);
+ aca->smu_funcs = smu_funcs;
+}
+
+int aca_bank_info_decode(struct aca_bank *bank, struct aca_bank_info *info)
+{
+ u64 ipid;
+ u32 instidhi, instidlo;
+
+ if (!bank || !info)
+ return -EINVAL;
+
+ ipid = bank->regs[ACA_REG_IDX_IPID];
+ info->hwid = ACA_REG__IPID__HARDWAREID(ipid);
+ info->mcatype = ACA_REG__IPID__MCATYPE(ipid);
+ /*
+ * Unfied DieID Format: SAASS. A:AID, S:Socket.
+ * Unfied DieID[4:4] = InstanceId[0:0]
+ * Unfied DieID[0:3] = InstanceIdHi[0:3]
+ */
+ instidhi = ACA_REG__IPID__INSTANCEIDHI(ipid);
+ instidlo = ACA_REG__IPID__INSTANCEIDLO(ipid);
+ info->die_id = ((instidhi >> 2) & 0x03);
+ info->socket_id = ((instidlo & 0x1) << 2) | (instidhi & 0x03);
+
+ return 0;
+}
+
+static int aca_bank_get_error_code(struct amdgpu_device *adev, struct aca_bank *bank)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+ const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
+
+ if (!smu_funcs || !smu_funcs->parse_error_code)
+ return -EOPNOTSUPP;
+
+ return smu_funcs->parse_error_code(adev, bank);
+}
+
+int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank, int *err_codes, int size)
+{
+ int i, error_code;
+
+ if (!bank || !err_codes)
+ return -EINVAL;
+
+ error_code = aca_bank_get_error_code(adev, bank);
+ if (error_code < 0)
+ return error_code;
+
+ for (i = 0; i < size; i++) {
+ if (err_codes[i] == error_code)
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+ const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
+
+ if (!smu_funcs || !smu_funcs->set_debug_mode)
+ return -EOPNOTSUPP;
+
+ return smu_funcs->set_debug_mode(adev, en);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_aca_smu_debug_mode_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ int ret;
+
+ ret = amdgpu_ras_set_aca_debug_mode(adev, val ? true : false);
+ if (ret)
+ return ret;
+
+ dev_info(adev->dev, "amdgpu set smu aca debug mode %s success\n", val ? "on" : "off");
+
+ return 0;
+}
+
+static void aca_dump_entry(struct seq_file *m, struct aca_bank *bank, enum aca_smu_type type, int idx)
+{
+ struct aca_bank_info info;
+ int i, ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return;
+
+ seq_printf(m, "aca entry[%d].type: %s\n", idx, type == ACA_SMU_TYPE_UE ? "UE" : "CE");
+ seq_printf(m, "aca entry[%d].info: socketid:%d aid:%d hwid:0x%03x mcatype:0x%04x\n",
+ idx, info.socket_id, info.die_id, info.hwid, info.mcatype);
+
+ for (i = 0; i < ARRAY_SIZE(aca_regs); i++)
+ seq_printf(m, "aca entry[%d].regs[%d]: 0x%016llx\n", idx, aca_regs[i].reg_idx, bank->regs[aca_regs[i].reg_idx]);
+}
+
+struct aca_dump_context {
+ struct seq_file *m;
+ int idx;
+};
+
+static int handler_aca_bank_dump(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct aca_dump_context *ctx = (struct aca_dump_context *)data;
+
+ aca_dump_entry(ctx->m, bank, type, ctx->idx++);
+
+ return handler_aca_log_bank_error(handle, bank, type, NULL);
+}
+
+static int aca_dump_show(struct seq_file *m, enum aca_smu_type type)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+ struct aca_dump_context context = {
+ .m = m,
+ .idx = 0,
+ };
+
+ return aca_banks_update(adev, type, handler_aca_bank_dump, NULL, (void *)&context);
+}
+
+static int aca_dump_ce_show(struct seq_file *m, void *unused)
+{
+ return aca_dump_show(m, ACA_SMU_TYPE_CE);
+}
+
+static int aca_dump_ce_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, aca_dump_ce_show, inode->i_private);
+}
+
+static const struct file_operations aca_ce_dump_debug_fops = {
+ .owner = THIS_MODULE,
+ .open = aca_dump_ce_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int aca_dump_ue_show(struct seq_file *m, void *unused)
+{
+ return aca_dump_show(m, ACA_SMU_TYPE_UE);
+}
+
+static int aca_dump_ue_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, aca_dump_ue_show, inode->i_private);
+}
+
+static const struct file_operations aca_ue_dump_debug_fops = {
+ .owner = THIS_MODULE,
+ .open = aca_dump_ue_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+DEFINE_DEBUGFS_ATTRIBUTE(aca_debug_mode_fops, NULL, amdgpu_aca_smu_debug_mode_set, "%llu\n");
+#endif
+
+void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root)
+{
+#if defined(CONFIG_DEBUG_FS)
+ if (!root)
+ return;
+
+ debugfs_create_file("aca_debug_mode", 0200, root, adev, &aca_debug_mode_fops);
+ debugfs_create_file("aca_ue_dump", 0400, root, adev, &aca_ue_dump_debug_fops);
+ debugfs_create_file("aca_ce_dump", 0400, root, adev, &aca_ce_dump_debug_fops);
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
new file mode 100644
index 000000000000..38c88897e1ec
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
@@ -0,0 +1,232 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_ACA_H__
+#define __AMDGPU_ACA_H__
+
+#include <linux/list.h>
+
+struct ras_err_data;
+struct ras_query_context;
+
+#define ACA_MAX_REGS_COUNT (16)
+
+#define ACA_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> l)
+#define ACA_REG__STATUS__VAL(x) ACA_REG_FIELD(x, 63, 63)
+#define ACA_REG__STATUS__OVERFLOW(x) ACA_REG_FIELD(x, 62, 62)
+#define ACA_REG__STATUS__UC(x) ACA_REG_FIELD(x, 61, 61)
+#define ACA_REG__STATUS__EN(x) ACA_REG_FIELD(x, 60, 60)
+#define ACA_REG__STATUS__MISCV(x) ACA_REG_FIELD(x, 59, 59)
+#define ACA_REG__STATUS__ADDRV(x) ACA_REG_FIELD(x, 58, 58)
+#define ACA_REG__STATUS__PCC(x) ACA_REG_FIELD(x, 57, 57)
+#define ACA_REG__STATUS__ERRCOREIDVAL(x) ACA_REG_FIELD(x, 56, 56)
+#define ACA_REG__STATUS__TCC(x) ACA_REG_FIELD(x, 55, 55)
+#define ACA_REG__STATUS__SYNDV(x) ACA_REG_FIELD(x, 53, 53)
+#define ACA_REG__STATUS__CECC(x) ACA_REG_FIELD(x, 46, 46)
+#define ACA_REG__STATUS__UECC(x) ACA_REG_FIELD(x, 45, 45)
+#define ACA_REG__STATUS__DEFERRED(x) ACA_REG_FIELD(x, 44, 44)
+#define ACA_REG__STATUS__POISON(x) ACA_REG_FIELD(x, 43, 43)
+#define ACA_REG__STATUS__SCRUB(x) ACA_REG_FIELD(x, 40, 40)
+#define ACA_REG__STATUS__ERRCOREID(x) ACA_REG_FIELD(x, 37, 32)
+#define ACA_REG__STATUS__ADDRLSB(x) ACA_REG_FIELD(x, 29, 24)
+#define ACA_REG__STATUS__ERRORCODEEXT(x) ACA_REG_FIELD(x, 21, 16)
+#define ACA_REG__STATUS__ERRORCODE(x) ACA_REG_FIELD(x, 15, 0)
+
+#define ACA_REG__IPID__MCATYPE(x) ACA_REG_FIELD(x, 63, 48)
+#define ACA_REG__IPID__INSTANCEIDHI(x) ACA_REG_FIELD(x, 47, 44)
+#define ACA_REG__IPID__HARDWAREID(x) ACA_REG_FIELD(x, 43, 32)
+#define ACA_REG__IPID__INSTANCEIDLO(x) ACA_REG_FIELD(x, 31, 0)
+
+#define ACA_REG__MISC0__VALID(x) ACA_REG_FIELD(x, 63, 63)
+#define ACA_REG__MISC0__OVRFLW(x) ACA_REG_FIELD(x, 48, 48)
+#define ACA_REG__MISC0__ERRCNT(x) ACA_REG_FIELD(x, 43, 32)
+
+#define ACA_REG__SYND__ERRORINFORMATION(x) ACA_REG_FIELD(x, 17, 0)
+
+/* NOTE: The following codes refers to the smu header file */
+#define ACA_EXTERROR_CODE_CE 0x3a
+#define ACA_EXTERROR_CODE_FAULT 0x3b
+
+#define ACA_ERROR_UE_MASK BIT_MASK(ACA_ERROR_TYPE_UE)
+#define ACA_ERROR_CE_MASK BIT_MASK(ACA_ERROR_TYPE_CE)
+#define ACA_ERROR_DEFERRED_MASK BIT_MASK(ACA_ERROR_TYPE_DEFERRED)
+
+#define mmSMNAID_AID0_MCA_SMU 0x03b30400 /* SMN AID AID0 */
+#define mmSMNAID_XCD0_MCA_SMU 0x36430400 /* SMN AID XCD0 */
+#define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */
+#define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */
+
+#define ACA_BANK_ERR_IS_DEFFERED(bank) \
+ (ACA_REG__STATUS__POISON((bank)->regs[ACA_REG_IDX_STATUS]) || \
+ ACA_REG__STATUS__DEFERRED((bank)->regs[ACA_REG_IDX_STATUS]))
+
+enum aca_reg_idx {
+ ACA_REG_IDX_CTL = 0,
+ ACA_REG_IDX_STATUS = 1,
+ ACA_REG_IDX_ADDR = 2,
+ ACA_REG_IDX_MISC0 = 3,
+ ACA_REG_IDX_CONFIG = 4,
+ ACA_REG_IDX_IPID = 5,
+ ACA_REG_IDX_SYND = 6,
+ ACA_REG_IDX_DESTAT = 8,
+ ACA_REG_IDX_DEADDR = 9,
+ ACA_REG_IDX_CTL_MASK = 10,
+ ACA_REG_IDX_COUNT = 16,
+};
+
+enum aca_hwip_type {
+ ACA_HWIP_TYPE_UNKNOW = -1,
+ ACA_HWIP_TYPE_PSP = 0,
+ ACA_HWIP_TYPE_UMC,
+ ACA_HWIP_TYPE_SMU,
+ ACA_HWIP_TYPE_PCS_XGMI,
+ ACA_HWIP_TYPE_COUNT,
+};
+
+enum aca_error_type {
+ ACA_ERROR_TYPE_INVALID = -1,
+ ACA_ERROR_TYPE_UE = 0,
+ ACA_ERROR_TYPE_CE,
+ ACA_ERROR_TYPE_DEFERRED,
+ ACA_ERROR_TYPE_COUNT
+};
+
+enum aca_smu_type {
+ ACA_SMU_TYPE_INVALID = -1,
+ ACA_SMU_TYPE_UE = 0,
+ ACA_SMU_TYPE_CE,
+ ACA_SMU_TYPE_COUNT,
+};
+
+struct aca_hwip {
+ int hwid;
+ int mcatype;
+};
+
+struct aca_bank {
+ enum aca_error_type aca_err_type;
+ enum aca_smu_type smu_err_type;
+ u64 regs[ACA_MAX_REGS_COUNT];
+};
+
+struct aca_bank_node {
+ struct aca_bank bank;
+ struct list_head node;
+};
+
+struct aca_banks {
+ int nr_banks;
+ struct list_head list;
+};
+
+struct aca_bank_info {
+ int die_id;
+ int socket_id;
+ int hwid;
+ int mcatype;
+};
+
+struct aca_bank_error {
+ struct list_head node;
+ struct aca_bank_info info;
+ u64 count;
+};
+
+struct aca_error {
+ struct list_head list;
+ struct mutex lock;
+ enum aca_error_type type;
+ int nr_errors;
+};
+
+struct aca_handle_manager {
+ struct list_head list;
+ int nr_handles;
+};
+
+struct aca_error_cache {
+ struct aca_error errors[ACA_ERROR_TYPE_COUNT];
+};
+
+struct aca_handle {
+ struct list_head node;
+ enum aca_hwip_type hwip;
+ struct amdgpu_device *adev;
+ struct aca_handle_manager *mgr;
+ struct aca_error_cache error_cache;
+ const struct aca_bank_ops *bank_ops;
+ struct device_attribute aca_attr;
+ char attr_name[64];
+ const char *name;
+ u32 mask;
+ void *data;
+};
+
+struct aca_bank_ops {
+ int (*aca_bank_parser)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data);
+ bool (*aca_bank_is_valid)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type,
+ void *data);
+};
+
+struct aca_smu_funcs {
+ int max_ue_bank_count;
+ int max_ce_bank_count;
+ int (*set_debug_mode)(struct amdgpu_device *adev, bool enable);
+ int (*get_valid_aca_count)(struct amdgpu_device *adev, enum aca_smu_type type, u32 *count);
+ int (*get_valid_aca_bank)(struct amdgpu_device *adev, enum aca_smu_type type, int idx, struct aca_bank *bank);
+ int (*parse_error_code)(struct amdgpu_device *adev, struct aca_bank *bank);
+};
+
+struct amdgpu_aca {
+ struct aca_handle_manager mgr;
+ const struct aca_smu_funcs *smu_funcs;
+ atomic_t ue_update_flag;
+ bool is_enabled;
+};
+
+struct aca_info {
+ enum aca_hwip_type hwip;
+ const struct aca_bank_ops *bank_ops;
+ u32 mask;
+};
+
+int amdgpu_aca_init(struct amdgpu_device *adev);
+void amdgpu_aca_fini(struct amdgpu_device *adev);
+int amdgpu_aca_reset(struct amdgpu_device *adev);
+void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs);
+bool amdgpu_aca_is_enabled(struct amdgpu_device *adev);
+
+int aca_bank_info_decode(struct aca_bank *bank, struct aca_bank_info *info);
+int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank, int *err_codes, int size);
+
+int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle,
+ const char *name, const struct aca_info *aca_info, void *data);
+void amdgpu_aca_remove_handle(struct aca_handle *handle);
+int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle,
+ enum aca_error_type type, struct ras_err_data *err_data,
+ struct ras_query_context *qctx);
+int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en);
+void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root);
+int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info,
+ enum aca_error_type type, u64 count);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
index cc9c9f8b23b2..381ef205b0df 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -29,6 +29,8 @@
#include <linux/platform_device.h>
#include <sound/designware_i2s.h>
#include <sound/pcm.h>
+#include <linux/acpi.h>
+#include <linux/dmi.h>
#include "amdgpu.h"
#include "atom.h"
@@ -36,6 +38,7 @@
#include "acp_gfx_if.h"
+#define ST_JADEITE 1
#define ACP_TILE_ON_MASK 0x03
#define ACP_TILE_OFF_MASK 0x02
#define ACP_TILE_ON_RETAIN_REG_MASK 0x1f
@@ -85,6 +88,8 @@
#define ACP_DEVS 4
#define ACP_SRC_ID 162
+static unsigned long acp_machine_id;
+
enum {
ACP_TILE_P1 = 0,
ACP_TILE_P2,
@@ -93,9 +98,9 @@ enum {
ACP_TILE_DSP2,
};
-static int acp_sw_init(void *handle)
+static int acp_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->acp.parent = adev->dev;
@@ -107,9 +112,9 @@ static int acp_sw_init(void *handle)
return 0;
}
-static int acp_sw_fini(void *handle)
+static int acp_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (adev->acp.cgs_device)
amdgpu_cgs_destroy_device(adev->acp.cgs_device);
@@ -128,16 +133,14 @@ static int acp_poweroff(struct generic_pm_domain *genpd)
struct amdgpu_device *adev;
apd = container_of(genpd, struct acp_pm_domain, gpd);
- if (apd != NULL) {
- adev = apd->adev;
+ adev = apd->adev;
/* call smu to POWER GATE ACP block
* smu will
* 1. turn off the acp clock
* 2. power off the acp tiles
* 3. check and enter ulv state
*/
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
- }
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0);
return 0;
}
@@ -147,16 +150,14 @@ static int acp_poweron(struct generic_pm_domain *genpd)
struct amdgpu_device *adev;
apd = container_of(genpd, struct acp_pm_domain, gpd);
- if (apd != NULL) {
- adev = apd->adev;
+ adev = apd->adev;
/* call smu to UNGATE ACP block
* smu will
* 1. exit ulv
* 2. turn on acp clock
* 3. power on acp tiles
*/
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
- }
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0);
return 0;
}
@@ -184,33 +185,58 @@ static int acp_genpd_remove_device(struct device *dev, void *data)
return 0;
}
+static int acp_quirk_cb(const struct dmi_system_id *id)
+{
+ acp_machine_id = ST_JADEITE;
+ return 1;
+}
+
+static const struct dmi_system_id acp_quirk_table[] = {
+ {
+ .callback = acp_quirk_cb,
+ .matches = {
+ DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMD"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Jadeite"),
+ }
+ },
+ {
+ .callback = acp_quirk_cb,
+ .matches = {
+ DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "IP3 Technology CO.,Ltd."),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ASN1D"),
+ },
+ },
+ {
+ .callback = acp_quirk_cb,
+ .matches = {
+ DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Standard"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ASN10"),
+ },
+ },
+ {}
+};
+
/**
* acp_hw_init - start and test ACP block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int acp_hw_init(void *handle)
+static int acp_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- uint64_t acp_base;
+ u64 acp_base;
u32 val = 0;
u32 count = 0;
struct i2s_platform_data *i2s_pdata = NULL;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- const struct amdgpu_ip_block *ip_block =
- amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_ACP);
-
- if (!ip_block)
- return -EINVAL;
+ struct amdgpu_device *adev = ip_block->adev;
r = amd_acp_hw_init(adev->acp.cgs_device,
ip_block->version->major, ip_block->version->minor);
/* -ENODEV means board uses AZ rather than ACP */
if (r == -ENODEV) {
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0);
return 0;
} else if (r) {
return r;
@@ -220,141 +246,208 @@ static int acp_hw_init(void *handle)
return -EINVAL;
acp_base = adev->rmmio_base;
-
-
adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL);
- if (adev->acp.acp_genpd == NULL)
+ if (!adev->acp.acp_genpd)
return -ENOMEM;
adev->acp.acp_genpd->gpd.name = "ACP_AUDIO";
adev->acp.acp_genpd->gpd.power_off = acp_poweroff;
adev->acp.acp_genpd->gpd.power_on = acp_poweron;
-
-
adev->acp.acp_genpd->adev = adev;
pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false);
+ dmi_check_system(acp_quirk_table);
+ switch (acp_machine_id) {
+ case ST_JADEITE:
+ {
+ adev->acp.acp_cell = kcalloc(2, sizeof(struct mfd_cell),
+ GFP_KERNEL);
+ if (!adev->acp.acp_cell) {
+ r = -ENOMEM;
+ goto failure;
+ }
- adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell),
- GFP_KERNEL);
-
- if (adev->acp.acp_cell == NULL) {
- r = -ENOMEM;
- goto failure;
- }
-
- adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL);
- if (adev->acp.acp_res == NULL) {
- r = -ENOMEM;
- goto failure;
- }
+ adev->acp.acp_res = kcalloc(3, sizeof(struct resource), GFP_KERNEL);
+ if (!adev->acp.acp_res) {
+ r = -ENOMEM;
+ goto failure;
+ }
- i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL);
- if (i2s_pdata == NULL) {
- r = -ENOMEM;
- goto failure;
- }
+ i2s_pdata = kcalloc(1, sizeof(struct i2s_platform_data), GFP_KERNEL);
+ if (!i2s_pdata) {
+ r = -ENOMEM;
+ goto failure;
+ }
- switch (adev->asic_type) {
- case CHIP_STONEY:
i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
- DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
+ DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
+ i2s_pdata[0].cap = DWC_I2S_PLAY | DWC_I2S_RECORD;
+ i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000;
+ i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET;
+ i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET;
+
+ adev->acp.acp_res[0].name = "acp2x_dma";
+ adev->acp.acp_res[0].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[0].start = acp_base;
+ adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END;
+
+ adev->acp.acp_res[1].name = "acp2x_dw_i2s_play_cap";
+ adev->acp.acp_res[1].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[1].start = acp_base + ACP_I2S_CAP_REGS_START;
+ adev->acp.acp_res[1].end = acp_base + ACP_I2S_CAP_REGS_END;
+
+ adev->acp.acp_res[2].name = "acp2x_dma_irq";
+ adev->acp.acp_res[2].flags = IORESOURCE_IRQ;
+ adev->acp.acp_res[2].start = amdgpu_irq_create_mapping(adev, 162);
+ adev->acp.acp_res[2].end = adev->acp.acp_res[2].start;
+
+ adev->acp.acp_cell[0].name = "acp_audio_dma";
+ adev->acp.acp_cell[0].id = 0;
+ adev->acp.acp_cell[0].num_resources = 3;
+ adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
+ adev->acp.acp_cell[0].platform_data = &adev->asic_type;
+ adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type);
+
+ adev->acp.acp_cell[1].name = "designware-i2s";
+ adev->acp.acp_cell[1].id = 1;
+ adev->acp.acp_cell[1].num_resources = 1;
+ adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1];
+ adev->acp.acp_cell[1].platform_data = &i2s_pdata[0];
+ adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data);
+ r = mfd_add_devices(adev->acp.parent, 0, adev->acp.acp_cell, 2, NULL, 0, NULL);
+ if (r)
+ goto failure;
+ r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd,
+ acp_genpd_add_device);
+ if (r)
+ goto failure;
break;
- default:
- i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET;
}
- i2s_pdata[0].cap = DWC_I2S_PLAY;
- i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000;
- i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET;
- i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET;
- switch (adev->asic_type) {
- case CHIP_STONEY:
- i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
- DW_I2S_QUIRK_COMP_PARAM1 |
- DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
- break;
default:
- i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
- DW_I2S_QUIRK_COMP_PARAM1;
- }
+ adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell),
+ GFP_KERNEL);
- i2s_pdata[1].cap = DWC_I2S_RECORD;
- i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000;
- i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET;
- i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET;
+ if (!adev->acp.acp_cell) {
+ r = -ENOMEM;
+ goto failure;
+ }
- i2s_pdata[2].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET;
- switch (adev->asic_type) {
- case CHIP_STONEY:
- i2s_pdata[2].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
- break;
- default:
- break;
- }
+ adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL);
+ if (!adev->acp.acp_res) {
+ r = -ENOMEM;
+ goto failure;
+ }
+
+ i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL);
+ if (!i2s_pdata) {
+ r = -ENOMEM;
+ goto failure;
+ }
+
+ switch (adev->asic_type) {
+ case CHIP_STONEY:
+ i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
+ DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
+ break;
+ default:
+ i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET;
+ }
+ i2s_pdata[0].cap = DWC_I2S_PLAY;
+ i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000;
+ i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET;
+ i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET;
+ switch (adev->asic_type) {
+ case CHIP_STONEY:
+ i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
+ DW_I2S_QUIRK_COMP_PARAM1 |
+ DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
+ break;
+ default:
+ i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
+ DW_I2S_QUIRK_COMP_PARAM1;
+ }
- i2s_pdata[2].cap = DWC_I2S_PLAY | DWC_I2S_RECORD;
- i2s_pdata[2].snd_rates = SNDRV_PCM_RATE_8000_96000;
- i2s_pdata[2].i2s_reg_comp1 = ACP_BT_COMP1_REG_OFFSET;
- i2s_pdata[2].i2s_reg_comp2 = ACP_BT_COMP2_REG_OFFSET;
-
- adev->acp.acp_res[0].name = "acp2x_dma";
- adev->acp.acp_res[0].flags = IORESOURCE_MEM;
- adev->acp.acp_res[0].start = acp_base;
- adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END;
-
- adev->acp.acp_res[1].name = "acp2x_dw_i2s_play";
- adev->acp.acp_res[1].flags = IORESOURCE_MEM;
- adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START;
- adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END;
-
- adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap";
- adev->acp.acp_res[2].flags = IORESOURCE_MEM;
- adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START;
- adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END;
-
- adev->acp.acp_res[3].name = "acp2x_dw_bt_i2s_play_cap";
- adev->acp.acp_res[3].flags = IORESOURCE_MEM;
- adev->acp.acp_res[3].start = acp_base + ACP_BT_PLAY_REGS_START;
- adev->acp.acp_res[3].end = acp_base + ACP_BT_PLAY_REGS_END;
-
- adev->acp.acp_res[4].name = "acp2x_dma_irq";
- adev->acp.acp_res[4].flags = IORESOURCE_IRQ;
- adev->acp.acp_res[4].start = amdgpu_irq_create_mapping(adev, 162);
- adev->acp.acp_res[4].end = adev->acp.acp_res[4].start;
-
- adev->acp.acp_cell[0].name = "acp_audio_dma";
- adev->acp.acp_cell[0].num_resources = 5;
- adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
- adev->acp.acp_cell[0].platform_data = &adev->asic_type;
- adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type);
-
- adev->acp.acp_cell[1].name = "designware-i2s";
- adev->acp.acp_cell[1].num_resources = 1;
- adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1];
- adev->acp.acp_cell[1].platform_data = &i2s_pdata[0];
- adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data);
-
- adev->acp.acp_cell[2].name = "designware-i2s";
- adev->acp.acp_cell[2].num_resources = 1;
- adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2];
- adev->acp.acp_cell[2].platform_data = &i2s_pdata[1];
- adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data);
-
- adev->acp.acp_cell[3].name = "designware-i2s";
- adev->acp.acp_cell[3].num_resources = 1;
- adev->acp.acp_cell[3].resources = &adev->acp.acp_res[3];
- adev->acp.acp_cell[3].platform_data = &i2s_pdata[2];
- adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data);
-
- r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell,
- ACP_DEVS);
- if (r)
- goto failure;
-
- r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd,
- acp_genpd_add_device);
- if (r)
- goto failure;
+ i2s_pdata[1].cap = DWC_I2S_RECORD;
+ i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000;
+ i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET;
+ i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET;
+
+ i2s_pdata[2].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET;
+ switch (adev->asic_type) {
+ case CHIP_STONEY:
+ i2s_pdata[2].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
+ break;
+ default:
+ break;
+ }
+
+ i2s_pdata[2].cap = DWC_I2S_PLAY | DWC_I2S_RECORD;
+ i2s_pdata[2].snd_rates = SNDRV_PCM_RATE_8000_96000;
+ i2s_pdata[2].i2s_reg_comp1 = ACP_BT_COMP1_REG_OFFSET;
+ i2s_pdata[2].i2s_reg_comp2 = ACP_BT_COMP2_REG_OFFSET;
+
+ adev->acp.acp_res[0].name = "acp2x_dma";
+ adev->acp.acp_res[0].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[0].start = acp_base;
+ adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END;
+
+ adev->acp.acp_res[1].name = "acp2x_dw_i2s_play";
+ adev->acp.acp_res[1].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START;
+ adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END;
+
+ adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap";
+ adev->acp.acp_res[2].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START;
+ adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END;
+
+ adev->acp.acp_res[3].name = "acp2x_dw_bt_i2s_play_cap";
+ adev->acp.acp_res[3].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[3].start = acp_base + ACP_BT_PLAY_REGS_START;
+ adev->acp.acp_res[3].end = acp_base + ACP_BT_PLAY_REGS_END;
+
+ adev->acp.acp_res[4].name = "acp2x_dma_irq";
+ adev->acp.acp_res[4].flags = IORESOURCE_IRQ;
+ adev->acp.acp_res[4].start = amdgpu_irq_create_mapping(adev, 162);
+ adev->acp.acp_res[4].end = adev->acp.acp_res[4].start;
+
+ adev->acp.acp_cell[0].name = "acp_audio_dma";
+ adev->acp.acp_cell[0].id = 0;
+ adev->acp.acp_cell[0].num_resources = 5;
+ adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
+ adev->acp.acp_cell[0].platform_data = &adev->asic_type;
+ adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type);
+
+ adev->acp.acp_cell[1].name = "designware-i2s";
+ adev->acp.acp_cell[1].id = 1;
+ adev->acp.acp_cell[1].num_resources = 1;
+ adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1];
+ adev->acp.acp_cell[1].platform_data = &i2s_pdata[0];
+ adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data);
+
+ adev->acp.acp_cell[2].name = "designware-i2s";
+ adev->acp.acp_cell[2].id = 2;
+ adev->acp.acp_cell[2].num_resources = 1;
+ adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2];
+ adev->acp.acp_cell[2].platform_data = &i2s_pdata[1];
+ adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data);
+
+ adev->acp.acp_cell[3].name = "designware-i2s";
+ adev->acp.acp_cell[3].id = 3;
+ adev->acp.acp_cell[3].num_resources = 1;
+ adev->acp.acp_cell[3].resources = &adev->acp.acp_res[3];
+ adev->acp.acp_cell[3].platform_data = &i2s_pdata[2];
+ adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data);
+
+ r = mfd_add_devices(adev->acp.parent, 0, adev->acp.acp_cell, ACP_DEVS, NULL, 0, NULL);
+ if (r)
+ goto failure;
+
+ r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd,
+ acp_genpd_add_device);
+ if (r)
+ goto failure;
+ }
/* Assert Soft reset of ACP */
val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
@@ -410,18 +503,18 @@ failure:
/**
* acp_hw_fini - stop the hardware block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int acp_hw_fini(void *handle)
+static int acp_hw_fini(struct amdgpu_ip_block *ip_block)
{
u32 val = 0;
u32 count = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* return early if no ACP */
if (!adev->acp.acp_genpd) {
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0);
return 0;
}
@@ -472,67 +565,50 @@ static int acp_hw_fini(void *handle)
return 0;
}
-static int acp_suspend(void *handle)
+static int acp_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* power up on suspend */
if (!adev->acp.acp_cell)
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0);
return 0;
}
-static int acp_resume(void *handle)
+static int acp_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* power down again on resume */
if (!adev->acp.acp_cell)
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0);
return 0;
}
-static int acp_early_init(void *handle)
-{
- return 0;
-}
-
-static bool acp_is_idle(void *handle)
+static bool acp_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int acp_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int acp_soft_reset(void *handle)
-{
- return 0;
-}
-
-static int acp_set_clockgating_state(void *handle,
+static int acp_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int acp_set_powergating_state(void *handle,
+static int acp_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable, 0);
return 0;
}
static const struct amd_ip_funcs acp_ip_funcs = {
.name = "acp_ip",
- .early_init = acp_early_init,
- .late_init = NULL,
.sw_init = acp_sw_init,
.sw_fini = acp_sw_fini,
.hw_init = acp_hw_init,
@@ -540,14 +616,11 @@ static const struct amd_ip_funcs acp_ip_funcs = {
.suspend = acp_suspend,
.resume = acp_resume,
.is_idle = acp_is_idle,
- .wait_for_idle = acp_wait_for_idle,
- .soft_reset = acp_soft_reset,
.set_clockgating_state = acp_set_clockgating_state,
.set_powergating_state = acp_set_powergating_state,
};
-const struct amdgpu_ip_block_version acp_ip_block =
-{
+const struct amdgpu_ip_block_version acp_ip_block = {
.type = AMD_IP_BLOCK_TYPE_ACP,
.major = 2,
.minor = 2,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 4811b0faafd9..d31460a9e958 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2012 Advanced Micro Devices, Inc.
*
@@ -23,20 +24,60 @@
#include <linux/pci.h>
#include <linux/acpi.h>
+#include <linux/backlight.h>
#include <linux/slab.h>
+#include <linux/xarray.h>
#include <linux/power_supply.h>
#include <linux/pm_runtime.h>
#include <linux/suspend.h>
#include <acpi/video.h>
#include <acpi/actbl.h>
-#include <drm/drm_crtc_helper.h>
#include "amdgpu.h"
#include "amdgpu_pm.h"
#include "amdgpu_display.h"
#include "amd_acpi.h"
#include "atom.h"
+/* Declare GUID for AMD _DSM method for XCCs */
+static const guid_t amd_xcc_dsm_guid = GUID_INIT(0x8267f5d5, 0xa556, 0x44f2,
+ 0xb8, 0xb4, 0x45, 0x56, 0x2e,
+ 0x8c, 0x5b, 0xec);
+
+#define AMD_XCC_HID_START 3000
+#define AMD_XCC_DSM_GET_NUM_FUNCS 0
+#define AMD_XCC_DSM_GET_SUPP_MODE 1
+#define AMD_XCC_DSM_GET_XCP_MODE 2
+#define AMD_XCC_DSM_GET_VF_XCC_MAPPING 4
+#define AMD_XCC_DSM_GET_TMR_INFO 5
+#define AMD_XCC_DSM_NUM_FUNCS 5
+
+#define AMD_XCC_MAX_HID 24
+
+struct xarray numa_info_xa;
+
+/* Encapsulates the XCD acpi object information */
+struct amdgpu_acpi_xcc_info {
+ struct list_head list;
+ struct amdgpu_numa_info *numa_info;
+ uint8_t xcp_node;
+ uint8_t phy_id;
+ acpi_handle handle;
+};
+
+struct amdgpu_acpi_dev_info {
+ struct list_head list;
+ struct list_head xcc_list;
+ uint32_t sbdf;
+ uint16_t supp_xcp_mode;
+ uint16_t xcp_mode;
+ uint16_t mem_mode;
+ uint64_t tmr_base;
+ uint64_t tmr_size;
+};
+
+struct list_head amdgpu_acpi_dev_list;
+
struct amdgpu_atif_notification_cfg {
bool enabled;
int command_code;
@@ -66,9 +107,7 @@ struct amdgpu_atif {
struct amdgpu_atif_notifications notifications;
struct amdgpu_atif_functions functions;
struct amdgpu_atif_notification_cfg notification_cfg;
-#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
struct backlight_device *bd;
-#endif
struct amdgpu_dm_backlight_caps backlight_caps;
};
@@ -108,6 +147,7 @@ static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif,
struct acpi_buffer *params)
{
acpi_status status;
+ union acpi_object *obj;
union acpi_object atif_arg_elements[2];
struct acpi_object_list atif_arg;
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
@@ -130,16 +170,24 @@ static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif,
status = acpi_evaluate_object(atif->handle, NULL, &atif_arg,
&buffer);
+ obj = (union acpi_object *)buffer.pointer;
- /* Fail only if calling the method fails and ATIF is supported */
- if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
+ /* Fail if calling the method fails */
+ if (ACPI_FAILURE(status)) {
DRM_DEBUG_DRIVER("failed to evaluate ATIF got %s\n",
acpi_format_exception(status));
- kfree(buffer.pointer);
+ kfree(obj);
return NULL;
}
- return buffer.pointer;
+ if (obj->type != ACPI_TYPE_BUFFER) {
+ DRM_DEBUG_DRIVER("bad object returned from ATIF: %d\n",
+ obj->type);
+ kfree(obj);
+ return NULL;
+ }
+
+ return obj;
}
/**
@@ -344,6 +392,12 @@ static int amdgpu_atif_query_backlight_caps(struct amdgpu_atif *atif)
characteristics.min_input_signal;
atif->backlight_caps.max_input_signal =
characteristics.max_input_signal;
+ atif->backlight_caps.ac_level = characteristics.ac_level;
+ atif->backlight_caps.dc_level = characteristics.dc_level;
+ atif->backlight_caps.data_points = characteristics.number_of_points;
+ memcpy(atif->backlight_caps.luminance_data,
+ characteristics.data_points,
+ sizeof(atif->backlight_caps.luminance_data));
out:
kfree(info);
return err;
@@ -436,7 +490,6 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
DRM_DEBUG_DRIVER("ATIF: %d pending SBIOS requests\n", count);
if (req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) {
-#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
if (atif->bd) {
DRM_DEBUG_DRIVER("Changing brightness to %d\n",
req.backlight_level);
@@ -447,7 +500,6 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
*/
backlight_device_set_brightness(atif->bd, req.backlight_level);
}
-#endif
}
if (req.pending & ATIF_DGPU_DISPLAY_EVENT) {
@@ -455,7 +507,6 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
pm_runtime_get_sync(adev_to_drm(adev)->dev);
/* Just fire off a uevent and let userspace tell us what to do */
drm_helper_hpd_irq_event(adev_to_drm(adev));
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
}
}
@@ -669,7 +720,7 @@ int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
atcs_input.size = sizeof(struct atcs_pref_req_input);
/* client id (bit 2-0: func num, 7-3: dev num, 15-8: bus num) */
- atcs_input.client_id = adev->pdev->devfn | (adev->pdev->bus->number << 8);
+ atcs_input.client_id = pci_dev_id(adev->pdev);
atcs_input.valid_flags_mask = ATCS_VALID_FLAGS_MASK;
atcs_input.flags = ATCS_WAIT_FOR_COMPLETION;
if (advertise)
@@ -739,7 +790,7 @@ int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
atcs_input.size = sizeof(struct atcs_pwr_shift_input);
/* dGPU id (bit 2-0: func num, 7-3: dev num, 15-8: bus num) */
- atcs_input.dgpu_id = adev->pdev->devfn | (adev->pdev->bus->number << 8);
+ atcs_input.dgpu_id = pci_dev_id(adev->pdev);
atcs_input.dev_acpi_state = dev_state;
atcs_input.drv_state = drv_state;
@@ -752,24 +803,25 @@ int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
return -EIO;
}
+ kfree(info);
return 0;
}
/**
* amdgpu_acpi_smart_shift_update - update dGPU device state to SBIOS
*
- * @dev: drm_device pointer
+ * @adev: amdgpu device pointer
* @ss_state: current smart shift event
*
* returns 0 on success,
* otherwise return error number.
*/
-int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_state)
+int amdgpu_acpi_smart_shift_update(struct amdgpu_device *adev,
+ enum amdgpu_ss ss_state)
{
- struct amdgpu_device *adev = drm_to_adev(dev);
int r;
- if (!amdgpu_device_supports_smart_shift(dev))
+ if (!amdgpu_device_supports_smart_shift(adev))
return 0;
switch (ss_state) {
@@ -804,6 +856,347 @@ int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_sta
return r;
}
+#ifdef CONFIG_ACPI_NUMA
+static inline uint64_t amdgpu_acpi_get_numa_size(int nid)
+{
+ /* This is directly using si_meminfo_node implementation as the
+ * function is not exported.
+ */
+ int zone_type;
+ uint64_t managed_pages = 0;
+
+ pg_data_t *pgdat = NODE_DATA(nid);
+
+ for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
+ managed_pages +=
+ zone_managed_pages(&pgdat->node_zones[zone_type]);
+ return managed_pages * PAGE_SIZE;
+}
+
+static struct amdgpu_numa_info *amdgpu_acpi_get_numa_info(uint32_t pxm)
+{
+ struct amdgpu_numa_info *numa_info;
+ int nid;
+
+ numa_info = xa_load(&numa_info_xa, pxm);
+
+ if (!numa_info) {
+ struct sysinfo info;
+
+ numa_info = kzalloc(sizeof(*numa_info), GFP_KERNEL);
+ if (!numa_info)
+ return NULL;
+
+ nid = pxm_to_node(pxm);
+ numa_info->pxm = pxm;
+ numa_info->nid = nid;
+
+ if (numa_info->nid == NUMA_NO_NODE) {
+ si_meminfo(&info);
+ numa_info->size = info.totalram * info.mem_unit;
+ } else {
+ numa_info->size = amdgpu_acpi_get_numa_size(nid);
+ }
+ xa_store(&numa_info_xa, numa_info->pxm, numa_info, GFP_KERNEL);
+ }
+
+ return numa_info;
+}
+#endif
+
+/**
+ * amdgpu_acpi_get_node_id - obtain the NUMA node id for corresponding amdgpu
+ * acpi device handle
+ *
+ * @handle: acpi handle
+ * @numa_info: amdgpu_numa_info structure holding numa information
+ *
+ * Queries the ACPI interface to fetch the corresponding NUMA Node ID for a
+ * given amdgpu acpi device.
+ *
+ * Returns ACPI STATUS OK with Node ID on success or the corresponding failure reason
+ */
+static acpi_status amdgpu_acpi_get_node_id(acpi_handle handle,
+ struct amdgpu_numa_info **numa_info)
+{
+#ifdef CONFIG_ACPI_NUMA
+ u64 pxm;
+ acpi_status status;
+
+ if (!numa_info)
+ return_ACPI_STATUS(AE_ERROR);
+
+ status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm);
+
+ if (ACPI_FAILURE(status))
+ return status;
+
+ *numa_info = amdgpu_acpi_get_numa_info(pxm);
+
+ if (!*numa_info)
+ return_ACPI_STATUS(AE_ERROR);
+
+ return_ACPI_STATUS(AE_OK);
+#else
+ return_ACPI_STATUS(AE_NOT_EXIST);
+#endif
+}
+
+static struct amdgpu_acpi_dev_info *amdgpu_acpi_get_dev(u32 sbdf)
+{
+ struct amdgpu_acpi_dev_info *acpi_dev;
+
+ if (list_empty(&amdgpu_acpi_dev_list))
+ return NULL;
+
+ list_for_each_entry(acpi_dev, &amdgpu_acpi_dev_list, list)
+ if (acpi_dev->sbdf == sbdf)
+ return acpi_dev;
+
+ return NULL;
+}
+
+static int amdgpu_acpi_dev_init(struct amdgpu_acpi_dev_info **dev_info,
+ struct amdgpu_acpi_xcc_info *xcc_info, u32 sbdf)
+{
+ struct amdgpu_acpi_dev_info *tmp;
+ union acpi_object *obj;
+ int ret = -ENOENT;
+
+ *dev_info = NULL;
+ tmp = kzalloc(sizeof(struct amdgpu_acpi_dev_info), GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&tmp->xcc_list);
+ INIT_LIST_HEAD(&tmp->list);
+ tmp->sbdf = sbdf;
+
+ obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
+ AMD_XCC_DSM_GET_SUPP_MODE, NULL,
+ ACPI_TYPE_INTEGER);
+
+ if (!obj) {
+ acpi_handle_debug(xcc_info->handle,
+ "_DSM function %d evaluation failed",
+ AMD_XCC_DSM_GET_SUPP_MODE);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ tmp->supp_xcp_mode = obj->integer.value & 0xFFFF;
+ ACPI_FREE(obj);
+
+ obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
+ AMD_XCC_DSM_GET_XCP_MODE, NULL,
+ ACPI_TYPE_INTEGER);
+
+ if (!obj) {
+ acpi_handle_debug(xcc_info->handle,
+ "_DSM function %d evaluation failed",
+ AMD_XCC_DSM_GET_XCP_MODE);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ tmp->xcp_mode = obj->integer.value & 0xFFFF;
+ tmp->mem_mode = (obj->integer.value >> 32) & 0xFFFF;
+ ACPI_FREE(obj);
+
+ /* Evaluate DSMs and fill XCC information */
+ obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
+ AMD_XCC_DSM_GET_TMR_INFO, NULL,
+ ACPI_TYPE_PACKAGE);
+
+ if (!obj || obj->package.count < 2) {
+ acpi_handle_debug(xcc_info->handle,
+ "_DSM function %d evaluation failed",
+ AMD_XCC_DSM_GET_TMR_INFO);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ tmp->tmr_base = obj->package.elements[0].integer.value;
+ tmp->tmr_size = obj->package.elements[1].integer.value;
+ ACPI_FREE(obj);
+
+ DRM_DEBUG_DRIVER(
+ "New dev(%x): Supported xcp mode: %x curr xcp_mode : %x mem mode : %x, tmr base: %llx tmr size: %llx ",
+ tmp->sbdf, tmp->supp_xcp_mode, tmp->xcp_mode, tmp->mem_mode,
+ tmp->tmr_base, tmp->tmr_size);
+ list_add_tail(&tmp->list, &amdgpu_acpi_dev_list);
+ *dev_info = tmp;
+
+ return 0;
+
+out:
+ if (obj)
+ ACPI_FREE(obj);
+ kfree(tmp);
+
+ return ret;
+}
+
+static int amdgpu_acpi_get_xcc_info(struct amdgpu_acpi_xcc_info *xcc_info,
+ u32 *sbdf)
+{
+ union acpi_object *obj;
+ acpi_status status;
+ int ret = -ENOENT;
+
+ obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
+ AMD_XCC_DSM_GET_NUM_FUNCS, NULL,
+ ACPI_TYPE_INTEGER);
+
+ if (!obj || obj->integer.value != AMD_XCC_DSM_NUM_FUNCS)
+ goto out;
+ ACPI_FREE(obj);
+
+ /* Evaluate DSMs and fill XCC information */
+ obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
+ AMD_XCC_DSM_GET_VF_XCC_MAPPING, NULL,
+ ACPI_TYPE_INTEGER);
+
+ if (!obj) {
+ acpi_handle_debug(xcc_info->handle,
+ "_DSM function %d evaluation failed",
+ AMD_XCC_DSM_GET_VF_XCC_MAPPING);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* PF xcc id [39:32] */
+ xcc_info->phy_id = (obj->integer.value >> 32) & 0xFF;
+ /* xcp node of this xcc [47:40] */
+ xcc_info->xcp_node = (obj->integer.value >> 40) & 0xFF;
+ /* PF domain of this xcc [31:16] */
+ *sbdf = (obj->integer.value) & 0xFFFF0000;
+ /* PF bus/dev/fn of this xcc [63:48] */
+ *sbdf |= (obj->integer.value >> 48) & 0xFFFF;
+ ACPI_FREE(obj);
+ obj = NULL;
+
+ status =
+ amdgpu_acpi_get_node_id(xcc_info->handle, &xcc_info->numa_info);
+
+ /* TODO: check if this check is required */
+ if (ACPI_SUCCESS(status))
+ ret = 0;
+out:
+ if (obj)
+ ACPI_FREE(obj);
+
+ return ret;
+}
+
+static int amdgpu_acpi_enumerate_xcc(void)
+{
+ struct amdgpu_acpi_dev_info *dev_info = NULL;
+ struct amdgpu_acpi_xcc_info *xcc_info;
+ struct acpi_device *acpi_dev;
+ char hid[ACPI_ID_LEN];
+ int ret, id;
+ u32 sbdf;
+
+ INIT_LIST_HEAD(&amdgpu_acpi_dev_list);
+ xa_init(&numa_info_xa);
+
+ for (id = 0; id < AMD_XCC_MAX_HID; id++) {
+ sprintf(hid, "%s%d", "AMD", AMD_XCC_HID_START + id);
+ acpi_dev = acpi_dev_get_first_match_dev(hid, NULL, -1);
+ /* These ACPI objects are expected to be in sequential order. If
+ * one is not found, no need to check the rest.
+ */
+ if (!acpi_dev) {
+ DRM_DEBUG_DRIVER("No matching acpi device found for %s",
+ hid);
+ break;
+ }
+
+ xcc_info = kzalloc(sizeof(struct amdgpu_acpi_xcc_info),
+ GFP_KERNEL);
+ if (!xcc_info) {
+ DRM_ERROR("Failed to allocate memory for xcc info\n");
+ return -ENOMEM;
+ }
+
+ INIT_LIST_HEAD(&xcc_info->list);
+ xcc_info->handle = acpi_device_handle(acpi_dev);
+ acpi_dev_put(acpi_dev);
+
+ ret = amdgpu_acpi_get_xcc_info(xcc_info, &sbdf);
+ if (ret) {
+ kfree(xcc_info);
+ continue;
+ }
+
+ dev_info = amdgpu_acpi_get_dev(sbdf);
+
+ if (!dev_info)
+ ret = amdgpu_acpi_dev_init(&dev_info, xcc_info, sbdf);
+
+ if (ret == -ENOMEM)
+ return ret;
+
+ if (!dev_info) {
+ kfree(xcc_info);
+ continue;
+ }
+
+ list_add_tail(&xcc_info->list, &dev_info->xcc_list);
+ }
+
+ return 0;
+}
+
+int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset,
+ u64 *tmr_size)
+{
+ struct amdgpu_acpi_dev_info *dev_info;
+ u32 sbdf;
+
+ if (!tmr_offset || !tmr_size)
+ return -EINVAL;
+
+ sbdf = (pci_domain_nr(adev->pdev->bus) << 16);
+ sbdf |= pci_dev_id(adev->pdev);
+ dev_info = amdgpu_acpi_get_dev(sbdf);
+ if (!dev_info)
+ return -ENOENT;
+
+ *tmr_offset = dev_info->tmr_base;
+ *tmr_size = dev_info->tmr_size;
+
+ return 0;
+}
+
+int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev, int xcc_id,
+ struct amdgpu_numa_info *numa_info)
+{
+ struct amdgpu_acpi_dev_info *dev_info;
+ struct amdgpu_acpi_xcc_info *xcc_info;
+ u32 sbdf;
+
+ if (!numa_info)
+ return -EINVAL;
+
+ sbdf = (pci_domain_nr(adev->pdev->bus) << 16);
+ sbdf |= pci_dev_id(adev->pdev);
+ dev_info = amdgpu_acpi_get_dev(sbdf);
+ if (!dev_info)
+ return -ENOENT;
+
+ list_for_each_entry(xcc_info, &dev_info->xcc_list, list) {
+ if (xcc_info->phy_id == xcc_id) {
+ memcpy(numa_info, xcc_info->numa_info,
+ sizeof(*numa_info));
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
/**
* amdgpu_acpi_event - handle notify events
*
@@ -849,11 +1242,11 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
{
struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif;
-#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
if (atif->notifications.brightness_change) {
- if (amdgpu_device_has_dc_support(adev)) {
+ if (adev->dc_enabled) {
#if defined(CONFIG_DRM_AMD_DC)
struct amdgpu_display_manager *dm = &adev->dm;
+
if (dm->backlight_dev[0])
atif->bd = dm->backlight_dev[0];
#endif
@@ -868,6 +1261,7 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
if ((enc->devices & (ATOM_DEVICE_LCD_SUPPORT)) &&
enc->enc_priv) {
struct amdgpu_encoder_atom_dig *dig = enc->enc_priv;
+
if (dig->bl_dev) {
atif->bd = dig->bl_dev;
break;
@@ -876,7 +1270,6 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
}
}
}
-#endif
adev->acpi_nb.notifier_call = amdgpu_acpi_event;
register_acpi_notifier(&adev->acpi_nb);
@@ -887,9 +1280,7 @@ void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps)
{
struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif;
- caps->caps_valid = atif->backlight_caps.caps_valid;
- caps->min_input_signal = atif->backlight_caps.min_input_signal;
- caps->max_input_signal = atif->backlight_caps.max_input_signal;
+ memcpy(caps, &atif->backlight_caps, sizeof(*caps));
}
/**
@@ -925,9 +1316,9 @@ static bool amdgpu_atif_pci_probe_handle(struct pci_dev *pdev)
return false;
status = acpi_get_handle(dhandle, "ATIF", &atif_handle);
- if (ACPI_FAILURE(status)) {
+ if (ACPI_FAILURE(status))
return false;
- }
+
amdgpu_acpi_priv.atif.handle = atif_handle;
acpi_get_name(amdgpu_acpi_priv.atif.handle, ACPI_FULL_PATHNAME, &buffer);
DRM_DEBUG_DRIVER("Found ATIF handle %s\n", acpi_method_name);
@@ -960,9 +1351,9 @@ static bool amdgpu_atcs_pci_probe_handle(struct pci_dev *pdev)
return false;
status = acpi_get_handle(dhandle, "ATCS", &atcs_handle);
- if (ACPI_FAILURE(status)) {
+ if (ACPI_FAILURE(status))
return false;
- }
+
amdgpu_acpi_priv.atcs.handle = atcs_handle;
acpi_get_name(amdgpu_acpi_priv.atcs.handle, ACPI_FULL_PATHNAME, &buffer);
DRM_DEBUG_DRIVER("Found ATCS handle %s\n", acpi_method_name);
@@ -974,6 +1365,34 @@ static bool amdgpu_atcs_pci_probe_handle(struct pci_dev *pdev)
return true;
}
+
+/**
+ * amdgpu_acpi_should_gpu_reset
+ *
+ * @adev: amdgpu_device_pointer
+ *
+ * returns true if should reset GPU, false if not
+ */
+bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev)
+{
+ if ((adev->flags & AMD_IS_APU) &&
+ adev->gfx.imu.funcs) /* Not need to do mode2 reset for IMU enabled APUs */
+ return false;
+
+ if ((adev->flags & AMD_IS_APU) &&
+ amdgpu_acpi_is_s3_active(adev))
+ return false;
+
+ if (amdgpu_sriov_vf(adev))
+ return false;
+
+#if IS_ENABLED(CONFIG_SUSPEND)
+ return pm_suspend_target_state != PM_SUSPEND_TO_IDLE;
+#else
+ return true;
+#endif
+}
+
/*
* amdgpu_acpi_detect - detect ACPI ATIF/ATCS methods
*
@@ -987,14 +1406,11 @@ void amdgpu_acpi_detect(void)
struct pci_dev *pdev = NULL;
int ret;
- while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
- if (!atif->handle)
- amdgpu_atif_pci_probe_handle(pdev);
- if (!atcs->handle)
- amdgpu_atcs_pci_probe_handle(pdev);
- }
+ while ((pdev = pci_get_base_class(PCI_BASE_CLASS_DISPLAY, pdev))) {
+ if ((pdev->class != PCI_CLASS_DISPLAY_VGA << 8) &&
+ (pdev->class != PCI_CLASS_DISPLAY_OTHER << 8))
+ continue;
- while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) {
if (!atif->handle)
amdgpu_atif_pci_probe_handle(pdev);
if (!atcs->handle)
@@ -1029,6 +1445,50 @@ void amdgpu_acpi_detect(void)
} else {
atif->backlight_caps.caps_valid = false;
}
+
+ amdgpu_acpi_enumerate_xcc();
+}
+
+void amdgpu_acpi_release(void)
+{
+ struct amdgpu_acpi_dev_info *dev_info, *dev_tmp;
+ struct amdgpu_acpi_xcc_info *xcc_info, *xcc_tmp;
+ struct amdgpu_numa_info *numa_info;
+ unsigned long index;
+
+ xa_for_each(&numa_info_xa, index, numa_info) {
+ kfree(numa_info);
+ xa_erase(&numa_info_xa, index);
+ }
+
+ if (list_empty(&amdgpu_acpi_dev_list))
+ return;
+
+ list_for_each_entry_safe(dev_info, dev_tmp, &amdgpu_acpi_dev_list,
+ list) {
+ list_for_each_entry_safe(xcc_info, xcc_tmp, &dev_info->xcc_list,
+ list) {
+ list_del(&xcc_info->list);
+ kfree(xcc_info);
+ }
+
+ list_del(&dev_info->list);
+ kfree(dev_info);
+ }
+}
+
+#if IS_ENABLED(CONFIG_SUSPEND)
+/**
+ * amdgpu_acpi_is_s3_active
+ *
+ * @adev: amdgpu_device_pointer
+ *
+ * returns true if supported, false if not.
+ */
+bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev)
+{
+ return !(adev->flags & AMD_IS_APU) ||
+ (pm_suspend_target_state == PM_SUSPEND_MEM);
}
/**
@@ -1040,11 +1500,66 @@ void amdgpu_acpi_detect(void)
*/
bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev)
{
-#if IS_ENABLED(CONFIG_AMD_PMC) && IS_ENABLED(CONFIG_SUSPEND)
- if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) {
- if (adev->flags & AMD_IS_APU)
- return pm_suspend_target_state == PM_SUSPEND_TO_IDLE;
+ if (!(adev->flags & AMD_IS_APU) ||
+ (pm_suspend_target_state != PM_SUSPEND_TO_IDLE))
+ return false;
+
+ if (adev->asic_type < CHIP_RAVEN)
+ return false;
+
+ if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
+ return false;
+
+ /*
+ * If ACPI_FADT_LOW_POWER_S0 is not set in the FADT, it is generally
+ * risky to do any special firmware-related preparations for entering
+ * S0ix even though the system is suspending to idle, so return false
+ * in that case.
+ */
+ if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) {
+ dev_err_once(adev->dev,
+ "Power consumption will be higher as BIOS has not been configured for suspend-to-idle.\n"
+ "To use suspend-to-idle change the sleep mode in BIOS setup.\n");
+ return false;
}
-#endif
+
+#if !IS_ENABLED(CONFIG_AMD_PMC)
+ dev_err_once(adev->dev,
+ "Power consumption will be higher as the kernel has not been compiled with CONFIG_AMD_PMC.\n");
return false;
+#else
+ return true;
+#endif /* CONFIG_AMD_PMC */
+}
+#endif /* CONFIG_SUSPEND */
+
+#if IS_ENABLED(CONFIG_DRM_AMD_ISP)
+static const struct acpi_device_id isp_sensor_ids[] = {
+ { "OMNI5C10" },
+ { }
+};
+
+static int isp_match_acpi_device_ids(struct device *dev, const void *data)
+{
+ return acpi_match_device(data, dev) ? 1 : 0;
+}
+
+int amdgpu_acpi_get_isp4_dev(struct acpi_device **dev)
+{
+ struct device *pdev __free(put_device) = NULL;
+ struct acpi_device *acpi_pdev;
+
+ pdev = bus_find_device(&platform_bus_type, NULL, isp_sensor_ids,
+ isp_match_acpi_device_ids);
+ if (!pdev)
+ return -EINVAL;
+
+ acpi_pdev = ACPI_COMPANION(pdev);
+ if (!acpi_pdev)
+ return -ENODEV;
+
+ *dev = acpi_pdev;
+
+ return 0;
}
+#endif /* CONFIG_DRM_AMD_ISP */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
index a4d65973bf7c..80771b1480ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
@@ -100,6 +100,7 @@ struct amdgpu_afmt_acr amdgpu_afmt_acr(uint32_t clock)
amdgpu_afmt_calc_cts(clock, &res.cts_32khz, &res.n_32khz, 32000);
amdgpu_afmt_calc_cts(clock, &res.cts_44_1khz, &res.n_44_1khz, 44100);
amdgpu_afmt_calc_cts(clock, &res.cts_48khz, &res.n_48khz, 48000);
+ res.clock = clock;
return res;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 7077f21f0021..a2879d2b7c8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
@@ -27,12 +28,14 @@
#include "amdgpu.h"
#include "amdgpu_gfx.h"
#include "amdgpu_dma_buf.h"
+#include <drm/ttm/ttm_tt.h>
#include <linux/module.h>
#include <linux/dma-buf.h>
#include "amdgpu_xgmi.h"
#include <uapi/linux/kfd_ioctl.h>
#include "amdgpu_ras.h"
#include "amdgpu_umc.h"
+#include "amdgpu_reset.h"
/* Total memory size in system memory and all GPU VRAM. Used to
* estimate worst case amount of memory to reserve for page tables
@@ -51,7 +54,6 @@ int amdgpu_amdkfd_init(void)
amdgpu_amdkfd_total_mem_size *= si.mem_unit;
ret = kgd2kfd_init();
- amdgpu_amdkfd_gpuvm_init_mem_limits();
kfd_initialized = !ret;
return ret;
@@ -72,10 +74,7 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
if (!kfd_initialized)
return;
- adev->kfd.dev = kgd2kfd_probe((struct kgd_dev *)adev, vf);
-
- if (adev->kfd.dev)
- amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
+ adev->kfd.dev = kgd2kfd_probe(adev, vf);
}
/**
@@ -97,13 +96,24 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
size_t *start_offset)
{
/*
- * The first num_doorbells are used by amdgpu.
+ * The first num_kernel_doorbells are used by amdgpu.
* amdkfd takes whatever's left in the aperture.
*/
- if (adev->doorbell.size > adev->doorbell.num_doorbells * sizeof(u32)) {
+ if (adev->enable_mes) {
+ /*
+ * With MES enabled, we only need to initialize
+ * the base address. The size and offset are
+ * not initialized as AMDGPU manages the whole
+ * doorbell space.
+ */
+ *aperture_base = adev->doorbell.base;
+ *aperture_size = 0;
+ *start_offset = 0;
+ } else if (adev->doorbell.size > adev->doorbell.num_kernel_doorbells *
+ sizeof(u32)) {
*aperture_base = adev->doorbell.base;
*aperture_size = adev->doorbell.size;
- *start_offset = adev->doorbell.num_doorbells * sizeof(u32);
+ *start_offset = adev->doorbell.num_kernel_doorbells * sizeof(u32);
} else {
*aperture_base = 0;
*aperture_size = 0;
@@ -111,11 +121,57 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
}
}
+
+static void amdgpu_amdkfd_reset_work(struct work_struct *work)
+{
+ struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+ kfd.reset_work);
+
+ struct amdgpu_reset_context reset_context;
+
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ reset_context.src = adev->enable_mes ?
+ AMDGPU_RESET_SRC_MES :
+ AMDGPU_RESET_SRC_HWS;
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+}
+
+static const struct drm_client_funcs kfd_client_funcs = {
+ .unregister = drm_client_release,
+};
+
+int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev)
+{
+ int ret;
+
+ if (!adev->kfd.init_complete || adev->kfd.client.dev)
+ return 0;
+
+ ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd",
+ &kfd_client_funcs);
+ if (ret) {
+ dev_err(adev->dev, "Failed to init DRM client: %d\n",
+ ret);
+ return ret;
+ }
+
+ drm_client_register(&adev->kfd.client);
+
+ return 0;
+}
+
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
{
int i;
int last_valid_bit;
+ amdgpu_amdkfd_gpuvm_init_mem_limits();
+
if (adev->kfd.dev) {
struct kgd2kfd_shared_resources gpu_resources = {
.compute_vmid_bitmap =
@@ -128,15 +184,15 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
AMDGPU_GMC_HOLE_START),
.drm_render_minor = adev_to_drm(adev)->render->index,
.sdma_doorbell_idx = adev->doorbell_index.sdma_engine,
-
+ .enable_mes = adev->enable_mes,
};
/* this is going to have a few of the MSBs set that we need to
* clear
*/
bitmap_complement(gpu_resources.cp_queue_bitmap,
- adev->gfx.mec.queue_bitmap,
- KGD_MAX_QUEUES);
+ adev->gfx.mec_bitmap[0].queue_bitmap,
+ AMDGPU_MAX_QUEUES);
/* According to linux/bitmap.h we shouldn't use bitmap_clear if
* nbits is not compile time constant
@@ -144,7 +200,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
last_valid_bit = 1 /* only first MEC can have compute queues */
* adev->gfx.mec.num_pipe_per_mec
* adev->gfx.mec.num_queue_per_pipe;
- for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
+ for (i = last_valid_bit; i < AMDGPU_MAX_QUEUES; ++i)
clear_bit(i, gpu_resources.cp_queue_bitmap);
amdgpu_doorbell_get_kfd_info(adev,
@@ -168,7 +224,11 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
}
adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
- adev_to_drm(adev), &gpu_resources);
+ &gpu_resources);
+
+ amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
+
+ INIT_WORK(&adev->kfd.reset_work, amdgpu_amdkfd_reset_work);
}
}
@@ -177,6 +237,7 @@ void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev)
if (adev->kfd.dev) {
kgd2kfd_device_exit(adev->kfd.dev);
adev->kfd.dev = NULL;
+ amdgpu_amdkfd_total_mem_size -= adev->gmc.real_vram_size;
}
}
@@ -187,38 +248,53 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry);
}
-void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm)
+void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool suspend_proc)
{
- if (adev->kfd.dev)
- kgd2kfd_suspend(adev->kfd.dev, run_pm);
+ if (adev->kfd.dev) {
+ if (adev->in_s0ix)
+ kgd2kfd_stop_sched_all_nodes(adev->kfd.dev);
+ else
+ kgd2kfd_suspend(adev->kfd.dev, suspend_proc);
+ }
}
-int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev)
+int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool resume_proc)
{
int r = 0;
- if (adev->kfd.dev)
- r = kgd2kfd_resume_iommu(adev->kfd.dev);
+ if (adev->kfd.dev) {
+ if (adev->in_s0ix)
+ r = kgd2kfd_start_sched_all_nodes(adev->kfd.dev);
+ else
+ r = kgd2kfd_resume(adev->kfd.dev, resume_proc);
+ }
return r;
}
-int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm)
+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev)
+{
+ if (adev->kfd.dev)
+ kgd2kfd_suspend_process(adev->kfd.dev);
+}
+
+int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev)
{
int r = 0;
if (adev->kfd.dev)
- r = kgd2kfd_resume(adev->kfd.dev, run_pm);
+ r = kgd2kfd_resume_process(adev->kfd.dev);
return r;
}
-int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
+int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context)
{
int r = 0;
if (adev->kfd.dev)
- r = kgd2kfd_pre_reset(adev->kfd.dev);
+ r = kgd2kfd_pre_reset(adev->kfd.dev, reset_context);
return r;
}
@@ -233,19 +309,17 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
return r;
}
-void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd)
+void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
if (amdgpu_device_should_recover_gpu(adev))
- amdgpu_device_gpu_recover(adev, NULL);
+ amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->kfd.reset_work);
}
-int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
+int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr, bool cp_mqd_gfx9)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct amdgpu_bo *bo = NULL;
struct amdgpu_bo_param bp;
int r;
@@ -314,21 +388,23 @@ allocate_mem_reserve_bo_failed:
return r;
}
-void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
+void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj)
{
- struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
+ struct amdgpu_bo **bo = (struct amdgpu_bo **) mem_obj;
- amdgpu_bo_reserve(bo, true);
- amdgpu_bo_kunmap(bo);
- amdgpu_bo_unpin(bo);
- amdgpu_bo_unreserve(bo);
- amdgpu_bo_unref(&(bo));
+ if (!bo || !*bo)
+ return;
+
+ (void)amdgpu_bo_reserve(*bo, true);
+ amdgpu_bo_kunmap(*bo);
+ amdgpu_bo_unpin(*bo);
+ amdgpu_bo_unreserve(*bo);
+ amdgpu_bo_unref(bo);
}
-int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size,
+int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size,
void **mem_obj)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct amdgpu_bo *bo = NULL;
struct amdgpu_bo_user *ubo;
struct amdgpu_bo_param bp;
@@ -355,18 +431,16 @@ int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size,
return 0;
}
-void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj)
+void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj)
{
struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;
amdgpu_bo_unref(&bo);
}
-uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
+uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev,
enum kgd_engine_type type)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
switch (type) {
case KGD_ENGINE_PFP:
return adev->gfx.pfp_fw_version;
@@ -399,17 +473,27 @@ uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
return 0;
}
-void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
- struct kfd_local_mem_info *mem_info)
+void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
+ struct kfd_local_mem_info *mem_info,
+ struct amdgpu_xcp *xcp)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
memset(mem_info, 0, sizeof(*mem_info));
- mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
- mem_info->local_mem_size_private = adev->gmc.real_vram_size -
+ if (xcp) {
+ if (adev->gmc.real_vram_size == adev->gmc.visible_vram_size)
+ mem_info->local_mem_size_public =
+ KFD_XCP_MEMORY_SIZE(adev, xcp->id);
+ else
+ mem_info->local_mem_size_private =
+ KFD_XCP_MEMORY_SIZE(adev, xcp->id);
+ } else if (adev->apu_prefer_gtt) {
+ mem_info->local_mem_size_public = (ttm_tt_pages_limit() << PAGE_SHIFT);
+ mem_info->local_mem_size_private = 0;
+ } else {
+ mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
+ mem_info->local_mem_size_private = adev->gmc.real_vram_size -
adev->gmc.visible_vram_size;
-
+ }
mem_info->vram_width = adev->gmc.vram_width;
pr_debug("Address base: %pap public 0x%llx private 0x%llx\n",
@@ -417,9 +501,7 @@ void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
mem_info->local_mem_size_public,
mem_info->local_mem_size_private);
- if (amdgpu_sriov_vf(adev))
- mem_info->mem_clk_max = adev->clock.default_mclk / 100;
- else if (adev->pm.dpm_enabled) {
+ if (adev->pm.dpm_enabled) {
if (amdgpu_emu_mode == 1)
mem_info->mem_clk_max = 0;
else
@@ -428,58 +510,28 @@ void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
mem_info->mem_clk_max = 100;
}
-uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd)
+uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
if (adev->gfx.funcs->get_gpu_clock_counter)
return adev->gfx.funcs->get_gpu_clock_counter(adev);
return 0;
}
-uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
+uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
/* the sclk is in quantas of 10kHz */
- if (amdgpu_sriov_vf(adev))
- return adev->clock.default_sclk / 100;
- else if (adev->pm.dpm_enabled)
+ if (adev->pm.dpm_enabled)
return amdgpu_dpm_get_sclk(adev, false) / 100;
else
return 100;
}
-void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
- struct amdgpu_cu_info acu_info = adev->gfx.cu_info;
-
- memset(cu_info, 0, sizeof(*cu_info));
- if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap))
- return;
-
- cu_info->cu_active_number = acu_info.number;
- cu_info->cu_ao_mask = acu_info.ao_cu_mask;
- memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0],
- sizeof(acu_info.bitmap));
- cu_info->num_shader_engines = adev->gfx.config.max_shader_engines;
- cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
- cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
- cu_info->simd_per_cu = acu_info.simd_per_cu;
- cu_info->max_waves_per_simd = acu_info.max_waves_per_simd;
- cu_info->wave_front_size = acu_info.wave_front_size;
- cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu;
- cu_info->lds_size = acu_info.lds_size;
-}
-
-int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
- struct kgd_dev **dma_buf_kgd,
+int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
+ struct amdgpu_device **dmabuf_adev,
uint64_t *bo_size, void *metadata_buffer,
size_t buffer_size, uint32_t *metadata_size,
- uint32_t *flags)
+ uint32_t *flags, int8_t *xcp_id)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct dma_buf *dma_buf;
struct drm_gem_object *obj;
struct amdgpu_bo *bo;
@@ -507,8 +559,8 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
goto out_put;
r = 0;
- if (dma_buf_kgd)
- *dma_buf_kgd = (struct kgd_dev *)adev;
+ if (dmabuf_adev)
+ *dmabuf_adev = adev;
if (bo_size)
*bo_size = amdgpu_bo_size(bo);
if (metadata_buffer)
@@ -522,76 +574,16 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
*flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC;
}
+ if (xcp_id)
+ *xcp_id = bo->xcp_id;
out_put:
dma_buf_put(dma_buf);
return r;
}
-uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
- struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
-
- return amdgpu_vram_mgr_usage(vram_man);
-}
-
-uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
- return adev->gmc.xgmi.hive_id;
-}
-
-uint64_t amdgpu_amdkfd_get_unique_id(struct kgd_dev *kgd)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
- return adev->unique_id;
-}
-
-uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src)
-{
- struct amdgpu_device *peer_adev = (struct amdgpu_device *)src;
- struct amdgpu_device *adev = (struct amdgpu_device *)dst;
- int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);
-
- if (ret < 0) {
- DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n",
- adev->gmc.xgmi.physical_node_id,
- peer_adev->gmc.xgmi.physical_node_id, ret);
- ret = 0;
- }
- return (uint8_t)ret;
-}
-
-int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct kgd_dev *dst, struct kgd_dev *src, bool is_min)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)dst, *peer_adev;
- int num_links;
-
- if (adev->asic_type != CHIP_ALDEBARAN)
- return 0;
-
- if (src)
- peer_adev = (struct amdgpu_device *)src;
-
- /* num links returns 0 for indirect peers since indirect route is unknown. */
- num_links = is_min ? 1 : amdgpu_xgmi_get_num_links(adev, peer_adev);
- if (num_links < 0) {
- DRM_ERROR("amdgpu: failed to get xgmi num links between node %d and %d. ret = %d\n",
- adev->gmc.xgmi.physical_node_id,
- peer_adev->gmc.xgmi.physical_node_id, num_links);
- num_links = 0;
- }
-
- /* Aldebaran xGMI DPM is defeatured so assume x16 x 25Gbps for bandwidth. */
- return (num_links * 16 * 25000)/BITS_PER_BYTE;
-}
-
-int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct kgd_dev *dev, bool is_min)
+int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)dev;
int num_lanes_shift = (is_min ? ffs(adev->pm.pcie_mlw_mask) :
fls(adev->pm.pcie_mlw_mask)) - 1;
int gen_speed_shift = (is_min ? ffs(adev->pm.pcie_gen_mask &
@@ -647,39 +639,11 @@ int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct kgd_dev *dev, bool is_min)
return (num_lanes_factor * gen_speed_mbits_factor)/BITS_PER_BYTE;
}
-uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
- return adev->rmmio_remap.bus_addr;
-}
-
-uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
- return adev->gds.gws_size;
-}
-
-uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
- return adev->rev_id;
-}
-
-int amdgpu_amdkfd_get_noretry(struct kgd_dev *kgd)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
- return adev->gmc.noretry;
-}
-
-int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
+int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
+ enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct amdgpu_ring *ring;
@@ -702,7 +666,7 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
goto err;
}
- ret = amdgpu_job_alloc(adev, 1, &job, NULL);
+ ret = amdgpu_job_alloc(adev, NULL, NULL, NULL, 1, &job, 0);
if (ret)
goto err;
@@ -714,6 +678,7 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
ib->length_dw = ib_len;
/* This works for NO_HWS. TODO: need to handle without knowing VMID */
job->vmid = vmid;
+ job->num_ibs = 1;
ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
@@ -722,6 +687,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
goto err_ib_sched;
}
+ /* Drop the initial kref_init count (see drm_sched_main as example) */
+ dma_fence_put(f);
ret = dma_fence_wait(f, false);
err_ib_sched:
@@ -730,10 +697,23 @@ err:
return ret;
}
-void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle)
+void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
+ enum amd_powergating_state state = idle ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE;
+ if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 &&
+ ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) ||
+ (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 12)) {
+ pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled");
+ amdgpu_gfx_off_ctrl(adev, idle);
+ } else if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 9) &&
+ (adev->flags & AMD_IS_APU)) {
+ /* Disable GFXOFF and PG. Temporary workaround
+ * to fix some compute applications issue on GFX9.
+ */
+ struct amdgpu_ip_block *gfx_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+ if (gfx_block != NULL)
+ gfx_block->version->funcs->set_powergating_state((void *)gfx_block, state);
+ }
amdgpu_dpm_switch_power_profile(adev,
PP_SMC_POWER_PROFILE_COMPUTE,
!idle);
@@ -747,49 +727,186 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
return false;
}
-int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
+bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+ return adev->have_atomics_support;
+}
- if (adev->family == AMDGPU_FAMILY_AI) {
- int i;
+void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev)
+{
+ amdgpu_device_flush_hdp(adev, NULL);
+}
+
+bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev)
+{
+ return amdgpu_ras_get_fed_status(adev);
+}
+
+void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint16_t pasid,
+ pasid_notify pasid_fn, void *data, uint32_t reset)
+{
+ amdgpu_umc_pasid_poison_handler(adev, block, pasid, pasid_fn, data, reset);
+}
+
+void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint32_t reset)
+{
+ amdgpu_umc_pasid_poison_handler(adev, block, 0, NULL, NULL, reset);
+}
+
+int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
+ uint32_t *payload)
+{
+ int ret;
+
+ /* Device or IH ring is not ready so bail. */
+ ret = amdgpu_ih_wait_on_checkpoint_process_ts(adev, &adev->irq.ih);
+ if (ret)
+ return ret;
+
+ /* Send payload to fence KFD interrupts */
+ amdgpu_amdkfd_interrupt(adev, payload);
+
+ return 0;
+}
+
+int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev)
+{
+ return kgd2kfd_check_and_lock_kfd(adev->kfd.dev);
+}
- for (i = 0; i < adev->num_vmhubs; i++)
- amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
+void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev)
+{
+ kgd2kfd_unlock_kfd(adev->kfd.dev);
+}
+
+
+u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id)
+{
+ s8 mem_id = KFD_XCP_MEM_ID(adev, xcp_id);
+ u64 tmp;
+
+ if (adev->gmc.num_mem_partitions && xcp_id >= 0 && mem_id >= 0) {
+ if (adev->gmc.is_app_apu && adev->gmc.num_mem_partitions == 1) {
+ /* In NPS1 mode, we should restrict the vram reporting
+ * tied to the ttm_pages_limit which is 1/2 of the system
+ * memory. For other partition modes, the HBM is uniformly
+ * divided already per numa node reported. If user wants to
+ * go beyond the default ttm limit and maximize the ROCm
+ * allocations, they can go up to max ttm and sysmem limits.
+ */
+
+ tmp = (ttm_tt_pages_limit() << PAGE_SHIFT) / num_online_nodes();
+ } else {
+ tmp = adev->gmc.mem_partitions[mem_id].size;
+ }
+ do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition);
+ return ALIGN_DOWN(tmp, PAGE_SIZE);
+ } else if (adev->apu_prefer_gtt) {
+ return (ttm_tt_pages_limit() << PAGE_SHIFT);
} else {
- amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
+ return adev->gmc.real_vram_size;
}
+}
- return 0;
+int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off,
+ u32 inst)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ struct amdgpu_ring_funcs *ring_funcs;
+ struct amdgpu_ring *ring;
+ int r = 0;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ if (!kiq_ring->sched.ready || amdgpu_in_reset(adev))
+ return 0;
+
+ ring_funcs = kzalloc(sizeof(*ring_funcs), GFP_KERNEL);
+ if (!ring_funcs)
+ return -ENOMEM;
+
+ ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+ if (!ring) {
+ r = -ENOMEM;
+ goto free_ring_funcs;
+ }
+
+ ring_funcs->type = AMDGPU_RING_TYPE_COMPUTE;
+ ring->doorbell_index = doorbell_off;
+ ring->funcs = ring_funcs;
+
+ spin_lock(&kiq->ring_lock);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+ spin_unlock(&kiq->ring_lock);
+ r = -ENOMEM;
+ goto free_ring;
+ }
+
+ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, 0, 0);
+
+ /* Submit unmap queue packet */
+ amdgpu_ring_commit(kiq_ring);
+ /*
+ * Ring test will do a basic scratch register change check. Just run
+ * this to ensure that unmap queues that is submitted before got
+ * processed successfully before returning.
+ */
+ r = amdgpu_ring_test_helper(kiq_ring);
+
+ spin_unlock(&kiq->ring_lock);
+
+free_ring:
+ kfree(ring);
+
+free_ring_funcs:
+ kfree(ring_funcs);
+
+ return r;
}
-int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid,
- enum TLB_FLUSH_TYPE flush_type)
+/* Stop scheduling on KFD */
+int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
- bool all_hub = false;
+ if (!adev->kfd.init_complete)
+ return 0;
- if (adev->family == AMDGPU_FAMILY_AI)
- all_hub = true;
+ return kgd2kfd_stop_sched(adev->kfd.dev, node_id);
+}
+
+/* Start scheduling on KFD */
+int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id)
+{
+ if (!adev->kfd.init_complete)
+ return 0;
- return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
+ return kgd2kfd_start_sched(adev->kfd.dev, node_id);
}
-bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
+/* check if there are KFD queues active */
+bool amdgpu_amdkfd_compute_active(struct amdgpu_device *adev, uint32_t node_id)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+ if (!adev->kfd.init_complete)
+ return false;
- return adev->have_atomics_support;
+ return kgd2kfd_compute_active(adev->kfd.dev, node_id);
}
-void amdgpu_amdkfd_ras_poison_consumption_handler(struct kgd_dev *kgd)
+/* Config CGTT_SQ_CLK_CTRL */
+int amdgpu_amdkfd_config_sq_perfmon(struct amdgpu_device *adev, uint32_t xcp_id,
+ bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
- struct ras_err_data err_data = {0, 0, 0, NULL};
+ int r;
- /* CPU MCA will handle page retirement if connected_to_cpu is 1 */
- if (!adev->gmc.xgmi.connected_to_cpu)
- amdgpu_umc_process_ras_data_cb(adev, &err_data, NULL);
- else
- amdgpu_amdkfd_gpu_reset(kgd);
+ if (!adev->kfd.init_complete)
+ return 0;
+
+ r = psp_config_sq_perfmon(&adev->psp, xcp_id, core_override_enable,
+ reg_override_enable, perfmon_override_enable);
+
+ return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index a15a4787c7ee..8bdfcde2029b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -25,14 +25,18 @@
#ifndef AMDGPU_AMDKFD_H_INCLUDED
#define AMDGPU_AMDKFD_H_INCLUDED
+#include <linux/list.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/kthread.h>
#include <linux/workqueue.h>
+#include <linux/mmu_notifier.h>
+#include <linux/memremap.h>
#include <kgd_kfd_interface.h>
-#include <drm/ttm/ttm_execbuf_util.h>
+#include <drm/drm_client.h>
#include "amdgpu_sync.h"
#include "amdgpu_vm.h"
+#include "amdgpu_xcp.h"
extern uint64_t amdgpu_amdkfd_total_mem_size;
@@ -43,11 +47,14 @@ enum TLB_FLUSH_TYPE {
};
struct amdgpu_device;
+struct kfd_process_device;
+struct amdgpu_reset_context;
enum kfd_mem_attachment_type {
KFD_MEM_ATT_SHARED, /* Share kgd_mem->bo or another attachment's */
KFD_MEM_ATT_USERPTR, /* SG bo to DMA map pages from a userptr bo */
KFD_MEM_ATT_DMABUF, /* DMAbuf to DMA map TTM BOs */
+ KFD_MEM_ATT_SG /* Tag to DMA map SG BOs */
};
struct kfd_mem_attachment {
@@ -64,21 +71,22 @@ struct kgd_mem {
struct mutex lock;
struct amdgpu_bo *bo;
struct dma_buf *dmabuf;
+ struct amdgpu_hmm_range *range;
struct list_head attachments;
/* protected by amdkfd_process_info.lock */
- struct ttm_validate_buffer validate_list;
- struct ttm_validate_buffer resv_list;
+ struct list_head validate_list;
uint32_t domain;
unsigned int mapped_to_gpu_memory;
uint64_t va;
uint32_t alloc_flags;
- atomic_t invalid;
+ uint32_t invalid;
struct amdkfd_process_info *process_info;
struct amdgpu_sync sync;
+ uint32_t gem_handle;
bool aql_queue;
bool is_imported;
};
@@ -94,8 +102,18 @@ struct amdgpu_amdkfd_fence {
struct amdgpu_kfd_dev {
struct kfd_dev *dev;
- uint64_t vram_used;
+ int64_t vram_used[MAX_XCP];
+ uint64_t vram_used_aligned[MAX_XCP];
bool init_complete;
+ struct work_struct reset_work;
+
+ /* Client for KFD BO GEM handle allocations */
+ struct drm_client_dev client;
+
+ /* HMM page migration MEMORY_DEVICE_PRIVATE mapping
+ * Must be last --ends in a flexible-array member.
+ */
+ struct dev_pagemap pgmap;
};
enum kgd_engine_type {
@@ -128,38 +146,42 @@ struct amdkfd_process_info {
struct amdgpu_amdkfd_fence *eviction_fence;
/* MMU-notifier related fields */
- atomic_t evicted_bos;
+ struct mutex notifier_lock;
+ uint32_t evicted_bos;
struct delayed_work restore_userptr_work;
struct pid *pid;
+ bool block_mmu_notifications;
};
int amdgpu_amdkfd_init(void);
void amdgpu_amdkfd_fini(void);
-void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm);
-int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev);
-int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm);
+void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool suspend_proc);
+int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool resume_proc);
+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev);
+int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev);
void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
const void *ih_ring_entry);
void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev);
-int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
+int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev);
+void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev);
+int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
+ enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len);
-void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
-bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd);
-int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid);
-int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid,
- enum TLB_FLUSH_TYPE flush_type);
+void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle);
+bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev);
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
-int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev);
+int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context);
int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev);
-void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd);
+void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev);
int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
int queue_bit);
@@ -167,11 +189,20 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
struct mm_struct *mm,
struct svm_range_bo *svm_bo);
+
+int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev);
+#if defined(CONFIG_DEBUG_FS)
+int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data);
+#endif
#if IS_ENABLED(CONFIG_HSA_AMD)
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
-int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo);
-int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm);
+void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo);
+int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
+ unsigned long cur_seq, struct kgd_mem *mem);
+int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
+ uint32_t domain,
+ struct dma_fence *fence);
#else
static inline
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
@@ -186,49 +217,58 @@ struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
}
static inline
-int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
+void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo)
{
- return 0;
}
static inline
-int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm)
+int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
+ unsigned long cur_seq, struct kgd_mem *mem)
+{
+ return 0;
+}
+static inline
+int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
+ uint32_t domain,
+ struct dma_fence *fence)
{
return 0;
}
#endif
/* Shared API */
-int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
+int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr, bool mqd_gfx9);
-void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
-int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, void **mem_obj);
-void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj);
+void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj);
+int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size,
+ void **mem_obj);
+void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj);
int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem);
int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem);
-uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
+uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev,
enum kgd_engine_type type);
-void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
- struct kfd_local_mem_info *mem_info);
-uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd);
-
-uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
-void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info);
-int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
- struct kgd_dev **dmabuf_kgd,
+void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
+ struct kfd_local_mem_info *mem_info,
+ struct amdgpu_xcp *xcp);
+uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev);
+
+uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev);
+int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
+ struct amdgpu_device **dmabuf_adev,
uint64_t *bo_size, void *metadata_buffer,
size_t buffer_size, uint32_t *metadata_size,
- uint32_t *flags);
-uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
-uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
-uint64_t amdgpu_amdkfd_get_unique_id(struct kgd_dev *kgd);
-uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd);
-uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd);
-uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd);
-int amdgpu_amdkfd_get_noretry(struct kgd_dev *kgd);
-uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src);
-int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct kgd_dev *dst, struct kgd_dev *src, bool is_min);
-int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct kgd_dev *dev, bool is_min);
+ uint32_t *flags, int8_t *xcp_id);
+int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min);
+int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
+ uint32_t *payload);
+int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off,
+ u32 inst);
+int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id);
+int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id);
+int amdgpu_amdkfd_config_sq_perfmon(struct amdgpu_device *adev, uint32_t xcp_id,
+ bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable);
+bool amdgpu_amdkfd_compute_active(struct amdgpu_device *adev, uint32_t node_id);
+
/* Read user wptr from a specified user address space with page fault
* disabled. The memory must be pinned and mapped to the hardware when
@@ -258,45 +298,81 @@ int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct kgd_dev *dev, bool is_min);
(&((struct amdgpu_fpriv *) \
((struct drm_file *)(drm_priv))->driver_priv)->vm)
-int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
- struct file *filp, u32 pasid,
+int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
+ struct amdgpu_vm *avm,
void **process_info,
struct dma_fence **ef);
-void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *drm_priv);
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
+size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
+ uint8_t xcp_id);
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
- struct kgd_dev *kgd, uint64_t va, uint64_t size,
+ struct amdgpu_device *adev, uint64_t va, uint64_t size,
void *drm_priv, struct kgd_mem **mem,
- uint64_t *offset, uint32_t flags);
+ uint64_t *offset, uint32_t flags, bool criu_resume);
int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
- struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv,
+ struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv,
uint64_t *size);
-int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
- struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, bool *table_freed);
+int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct amdgpu_device *adev,
+ struct kgd_mem *mem, void *drm_priv);
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
- struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
+ struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv);
+int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv);
int amdgpu_amdkfd_gpuvm_sync_memory(
- struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
-int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
- struct kgd_mem *mem, void **kptr, uint64_t *size);
-void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_dev *kgd, struct kgd_mem *mem);
+ struct amdgpu_device *adev, struct kgd_mem *mem, bool intr);
+int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
+ void **kptr, uint64_t *size);
+void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem);
+
+int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo, struct amdgpu_bo **bo_gart);
int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
- struct dma_fence **ef);
-int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
+ struct dma_fence __rcu **ef);
+int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
struct kfd_vm_fault_info *info);
-int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
- struct dma_buf *dmabuf,
- uint64_t va, void *drm_priv,
- struct kgd_mem **mem, uint64_t *size,
- uint64_t *mmap_offset);
-int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
+int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd,
+ uint64_t va, void *drm_priv,
+ struct kgd_mem **mem, uint64_t *size,
+ uint64_t *mmap_offset);
+int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem,
+ struct dma_buf **dmabuf);
+void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev);
+int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
struct tile_config *config);
-void amdgpu_amdkfd_ras_poison_consumption_handler(struct kgd_dev *kgd);
+void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint32_t reset);
+
+void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint16_t pasid,
+ pasid_notify pasid_fn, void *data, uint32_t reset);
+
+bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev);
+bool amdgpu_amdkfd_bo_mapped_to_dev(void *drm_priv, struct kgd_mem *mem);
+void amdgpu_amdkfd_block_mmu_notifications(void *p);
+int amdgpu_amdkfd_criu_resume(void *p);
+int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
+ uint64_t size, u32 alloc_flag, int8_t xcp_id);
+void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
+ uint64_t size, u32 alloc_flag, int8_t xcp_id);
+
+u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id);
+
+#define KFD_XCP_MEM_ID(adev, xcp_id) \
+ ((adev)->xcp_mgr && (xcp_id) >= 0 ?\
+ (adev)->xcp_mgr->xcp[(xcp_id)].mem_id : -1)
+
+#define KFD_XCP_MEMORY_SIZE(adev, xcp_id) amdgpu_amdkfd_xcp_memory_size((adev), (xcp_id))
+
+
#if IS_ENABLED(CONFIG_HSA_AMD)
void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
+
+/**
+ * @amdgpu_amdkfd_release_notify() - Notify KFD when GEM object is released
+ *
+ * Allows KFD to release its resources associated with the GEM object.
+ */
void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo);
void amdgpu_amdkfd_reserve_system_mem(uint64_t size);
#else
@@ -316,27 +392,49 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
{
}
#endif
+
+#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
+int kgd2kfd_init_zone_device(struct amdgpu_device *adev);
+#else
+static inline
+int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
+{
+ return 0;
+}
+#endif
+
/* KGD2KFD callbacks */
-int kgd2kfd_quiesce_mm(struct mm_struct *mm);
+int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger);
int kgd2kfd_resume_mm(struct mm_struct *mm);
int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
struct dma_fence *fence);
#if IS_ENABLED(CONFIG_HSA_AMD)
int kgd2kfd_init(void);
void kgd2kfd_exit(void);
-struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf);
+struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf);
bool kgd2kfd_device_init(struct kfd_dev *kfd,
- struct drm_device *ddev,
const struct kgd2kfd_shared_resources *gpu_resources);
void kgd2kfd_device_exit(struct kfd_dev *kfd);
-void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm);
-int kgd2kfd_resume_iommu(struct kfd_dev *kfd);
-int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm);
-int kgd2kfd_pre_reset(struct kfd_dev *kfd);
+void kgd2kfd_suspend(struct kfd_dev *kfd, bool suspend_proc);
+int kgd2kfd_resume(struct kfd_dev *kfd, bool resume_proc);
+void kgd2kfd_suspend_process(struct kfd_dev *kfd);
+int kgd2kfd_resume_process(struct kfd_dev *kfd);
+int kgd2kfd_pre_reset(struct kfd_dev *kfd,
+ struct amdgpu_reset_context *reset_context);
int kgd2kfd_post_reset(struct kfd_dev *kfd);
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd);
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask);
+int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd);
+void kgd2kfd_unlock_kfd(struct kfd_dev *kfd);
+int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id);
+int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd);
+int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id);
+int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd);
+bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id);
+bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry,
+ bool retry_fault);
+
#else
static inline int kgd2kfd_init(void)
{
@@ -348,13 +446,13 @@ static inline void kgd2kfd_exit(void)
}
static inline
-struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf)
+struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
{
return NULL;
}
static inline
-bool kgd2kfd_device_init(struct kfd_dev *kfd, struct drm_device *ddev,
+bool kgd2kfd_device_init(struct kfd_dev *kfd,
const struct kgd2kfd_shared_resources *gpu_resources)
{
return false;
@@ -364,21 +462,26 @@ static inline void kgd2kfd_device_exit(struct kfd_dev *kfd)
{
}
-static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
+static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool suspend_proc)
{
}
-static int __maybe_unused kgd2kfd_resume_iommu(struct kfd_dev *kfd)
+static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool resume_proc)
{
return 0;
}
-static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
+static inline void kgd2kfd_suspend_process(struct kfd_dev *kfd)
+{
+}
+
+static inline int kgd2kfd_resume_process(struct kfd_dev *kfd)
{
return 0;
}
-static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd)
+static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd,
+ struct amdgpu_reset_context *reset_context)
{
return 0;
}
@@ -402,5 +505,46 @@ static inline
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
{
}
+
+static inline int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd)
+{
+ return 0;
+}
+
+static inline void kgd2kfd_unlock_kfd(struct kfd_dev *kfd)
+{
+}
+
+static inline int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id)
+{
+ return 0;
+}
+
+static inline int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd)
+{
+ return 0;
+}
+
+static inline int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id)
+{
+ return 0;
+}
+
+static inline int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd)
+{
+ return 0;
+}
+
+static inline bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id)
+{
+ return false;
+}
+
+static inline bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry,
+ bool retry_fault)
+{
+ return false;
+}
+
#endif
#endif /* AMDGPU_AMDKFD_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
index 46cd4ee6bafb..7e9f7a280c1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -23,6 +23,145 @@
#include "amdgpu_amdkfd.h"
#include "amdgpu_amdkfd_arcturus.h"
#include "amdgpu_amdkfd_gfx_v9.h"
+#include "amdgpu_amdkfd_aldebaran.h"
+#include "gc/gc_9_4_2_offset.h"
+#include "gc/gc_9_4_2_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+/*
+ * Returns TRAP_EN, EXCP_EN and EXCP_REPLACE.
+ *
+ * restore_dbg_registers is ignored here but is a general interface requirement
+ * for devices that support GFXOFF and where the RLC save/restore list
+ * does not support hw registers for debugging i.e. the driver has to manually
+ * initialize the debug mode registers after it has disabled GFX off during the
+ * debug session.
+ */
+uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+ return data;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_aldebaran_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, keep_trap_enabled);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+ return data;
+}
+
+static int kgd_aldebaran_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported)
+{
+ *trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID |
+ KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+ KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+ KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+ KFD_DBG_TRAP_MASK_FP_INEXACT |
+ KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+ KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION;
+
+ if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR &&
+ trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE)
+ return -EPERM;
+
+ return 0;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_RPLACE. */
+static uint32_t kgd_aldebaran_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev)
+
+{
+ uint32_t data = 0;
+
+ *trap_mask_prev = REG_GET_FIELD(kfd_dbg_trap_cntl_prev, SPI_GDBG_PER_VMID_CNTL, EXCP_EN);
+ trap_mask_bits = (trap_mask_bits & trap_mask_request) |
+ (*trap_mask_prev & ~trap_mask_request);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, trap_mask_bits);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override);
+
+ return data;
+}
+
+uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, LAUNCH_MODE, wave_launch_mode);
+
+ return data;
+}
+
+#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H)
+static uint32_t kgd_gfx_aldebaran_set_address_watch(
+ struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid,
+ uint32_t inst)
+{
+ uint32_t watch_address_high;
+ uint32_t watch_address_low;
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+ watch_address_low = lower_32_bits(watch_address);
+ watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 6);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 1);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_high);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_L) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_low);
+
+ return watch_address_cntl;
+}
const struct kfd2kgd_calls aldebaran_kfd2kgd = {
.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
@@ -37,12 +176,22 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
.hqd_sdma_is_occupied = kgd_arcturus_hqd_sdma_is_occupied,
.hqd_destroy = kgd_gfx_v9_hqd_destroy,
.hqd_sdma_destroy = kgd_arcturus_hqd_sdma_destroy,
- .address_watch_disable = kgd_gfx_v9_address_watch_disable,
- .address_watch_execute = kgd_gfx_v9_address_watch_execute,
.wave_control_execute = kgd_gfx_v9_wave_control_execute,
- .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
.get_atc_vmid_pasid_mapping_info =
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
- .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings
+ .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
+ .enable_debug_trap = kgd_aldebaran_enable_debug_trap,
+ .disable_debug_trap = kgd_aldebaran_disable_debug_trap,
+ .validate_trap_override_request = kgd_aldebaran_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_aldebaran_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_aldebaran_set_address_watch,
+ .clear_address_watch = kgd_gfx_v9_clear_address_watch,
+ .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
+ .build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
+ .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
+ .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
+ .hqd_reset = kgd_gfx_v9_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v9_hqd_sdma_get_doorbell
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
new file mode 100644
index 000000000000..a7bdaf8d82dd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid);
+uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 5a7f680bcb3f..1105a09e55dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -20,12 +20,12 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <linux/module.h>
-#include <linux/fdtable.h>
#include <linux/uaccess.h>
#include <linux/firmware.h>
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_amdkfd_arcturus.h"
+#include "amdgpu_reset.h"
#include "sdma0/sdma0_4_2_2_offset.h"
#include "sdma0/sdma0_4_2_2_sh_mask.h"
#include "sdma1/sdma1_4_2_2_offset.h"
@@ -48,6 +48,8 @@
#include "amdgpu_amdkfd_gfx_v9.h"
#include "gfxhub_v1_0.h"
#include "mmhub_v9_4.h"
+#include "gc/gc_9_0_offset.h"
+#include "gc/gc_9_0_sh_mask.h"
#define HQD_N_REGS 56
#define DUMP_REG(addr) do { \
@@ -57,11 +59,6 @@
(*dump)[i++][1] = RREG32(addr); \
} while (0)
-static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
-{
- return (struct amdgpu_device *)kgd;
-}
-
static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
{
return (struct v9_sdma_mqd *)mqd;
@@ -123,10 +120,9 @@ static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
return sdma_rlc_reg_offset;
}
-int kgd_arcturus_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+int kgd_arcturus_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
uint32_t __user *wptr, struct mm_struct *mm)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v9_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
unsigned long end_jiffies;
@@ -193,18 +189,17 @@ int kgd_arcturus_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
return 0;
}
-int kgd_arcturus_hqd_sdma_dump(struct kgd_dev *kgd,
+int kgd_arcturus_hqd_sdma_dump(struct amdgpu_device *adev,
uint32_t engine_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
engine_id, queue_id);
uint32_t i = 0, reg;
#undef HQD_N_REGS
#define HQD_N_REGS (19+6+7+10)
- *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -225,9 +220,9 @@ int kgd_arcturus_hqd_sdma_dump(struct kgd_dev *kgd,
return 0;
}
-bool kgd_arcturus_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
+bool kgd_arcturus_hqd_sdma_is_occupied(struct amdgpu_device *adev,
+ void *mqd)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v9_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t sdma_rlc_rb_cntl;
@@ -244,10 +239,9 @@ bool kgd_arcturus_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
return false;
}
-int kgd_arcturus_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+int kgd_arcturus_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
unsigned int utimeout)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v9_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t temp;
@@ -284,6 +278,117 @@ int kgd_arcturus_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
return 0;
}
+/*
+ * Helper used to suspend/resume gfx pipe for image post process work to set
+ * barrier behaviour.
+ */
+static int suspend_resume_compute_scheduler(struct amdgpu_device *adev, bool suspend)
+{
+ int i, r = 0;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
+
+ if (!amdgpu_ring_sched_ready(ring))
+ continue;
+
+ /* stop secheduler and drain ring. */
+ if (suspend) {
+ drm_sched_stop(&ring->sched, NULL);
+ r = amdgpu_fence_wait_empty(ring);
+ if (r)
+ goto out;
+ } else {
+ drm_sched_start(&ring->sched, 0);
+ }
+ }
+
+out:
+ /* return on resume or failure to drain rings. */
+ if (!suspend || r)
+ return r;
+
+ return amdgpu_device_ip_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GFX);
+}
+
+static void set_barrier_auto_waitcnt(struct amdgpu_device *adev, bool enable_waitcnt)
+{
+ uint32_t data;
+
+ WRITE_ONCE(adev->barrier_has_auto_waitcnt, enable_waitcnt);
+
+ if (!down_read_trylock(&adev->reset_domain->sem))
+ return;
+
+ amdgpu_amdkfd_suspend(adev, true);
+
+ if (suspend_resume_compute_scheduler(adev, true))
+ goto out;
+
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CONFIG));
+ data = REG_SET_FIELD(data, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
+ !enable_waitcnt);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CONFIG), data);
+
+out:
+ suspend_resume_compute_scheduler(adev, false);
+
+ amdgpu_amdkfd_resume(adev, true);
+
+ up_read(&adev->reset_domain->sem);
+}
+
+/*
+ * restore_dbg_registers is ignored here but is a general interface requirement
+ * for devices that support GFXOFF and where the RLC save/restore list
+ * does not support hw registers for debugging i.e. the driver has to manually
+ * initialize the debug mode registers after it has disabled GFX off during the
+ * debug session.
+ */
+static uint32_t kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid)
+{
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+ set_barrier_auto_waitcnt(adev, true);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+/*
+ * keep_trap_enabled is ignored here but is a general interface requirement
+ * for devices that support multi-process debugging where the performance
+ * overhead from trap temporary setup needs to be bypassed when the debug
+ * session has ended.
+ */
+static uint32_t kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
+{
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+ set_barrier_auto_waitcnt(adev, false);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
const struct kfd2kgd_calls arcturus_kfd2kgd = {
.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
@@ -297,14 +402,23 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
.hqd_sdma_is_occupied = kgd_arcturus_hqd_sdma_is_occupied,
.hqd_destroy = kgd_gfx_v9_hqd_destroy,
.hqd_sdma_destroy = kgd_arcturus_hqd_sdma_destroy,
- .address_watch_disable = kgd_gfx_v9_address_watch_disable,
- .address_watch_execute = kgd_gfx_v9_address_watch_execute,
.wave_control_execute = kgd_gfx_v9_wave_control_execute,
- .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
.get_atc_vmid_pasid_mapping_info =
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base =
kgd_gfx_v9_set_vm_context_page_table_base,
+ .enable_debug_trap = kgd_arcturus_enable_debug_trap,
+ .disable_debug_trap = kgd_arcturus_disable_debug_trap,
+ .validate_trap_override_request = kgd_gfx_v9_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_gfx_v9_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_gfx_v9_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v9_set_address_watch,
+ .clear_address_watch = kgd_gfx_v9_clear_address_watch,
+ .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
+ .build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
- .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings
+ .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
+ .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
+ .hqd_reset = kgd_gfx_v9_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v9_hqd_sdma_get_doorbell
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.h
index ce08131b7b5f..756c1a5679c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.h
@@ -20,11 +20,12 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-int kgd_arcturus_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+int kgd_arcturus_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
uint32_t __user *wptr, struct mm_struct *mm);
-int kgd_arcturus_hqd_sdma_dump(struct kgd_dev *kgd,
+int kgd_arcturus_hqd_sdma_dump(struct amdgpu_device *adev,
uint32_t engine_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs);
-bool kgd_arcturus_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
-int kgd_arcturus_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+bool kgd_arcturus_hqd_sdma_is_occupied(struct amdgpu_device *adev,
+ void *mqd);
+int kgd_arcturus_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
unsigned int utimeout);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
index 1d0dbff87d3f..1ef758ac5076 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
@@ -90,7 +90,7 @@ struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
return NULL;
fence = container_of(f, struct amdgpu_amdkfd_fence, base);
- if (fence && f->ops == &amdkfd_fence_ops)
+ if (f->ops == &amdkfd_fence_ops)
return fence;
return NULL;
@@ -159,11 +159,14 @@ static void amdkfd_fence_release(struct dma_fence *f)
}
/**
- * amdkfd_fence_check_mm - Check if @mm is same as that of the fence @f
- * if same return TRUE else return FALSE.
+ * amdkfd_fence_check_mm - Check whether to prevent eviction of @f by @mm
*
* @f: [IN] fence
* @mm: [IN] mm that needs to be verified
+ *
+ * Check if @mm is same as that of the fence @f, if same return TRUE else
+ * return FALSE.
+ * For svm bo, which support vram overcommitment, always return FALSE.
*/
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
{
@@ -171,7 +174,7 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
if (!fence)
return false;
- else if (fence->mm == mm)
+ else if (fence->mm == mm && !fence->svm_bo)
return true;
return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
new file mode 100644
index 000000000000..89a45a9218f3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -0,0 +1,559 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_amdkfd_gfx_v9.h"
+#include "amdgpu_amdkfd_aldebaran.h"
+#include "gc/gc_9_4_3_offset.h"
+#include "gc/gc_9_4_3_sh_mask.h"
+#include "athub/athub_1_8_0_offset.h"
+#include "athub/athub_1_8_0_sh_mask.h"
+#include "oss/osssys_4_4_2_offset.h"
+#include "oss/osssys_4_4_2_sh_mask.h"
+#include "v9_structs.h"
+#include "soc15.h"
+#include "sdma/sdma_4_4_2_offset.h"
+#include "sdma/sdma_4_4_2_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct v9_sdma_mqd *)mqd;
+}
+
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
+ unsigned int engine_id,
+ unsigned int queue_id)
+{
+ uint32_t sdma_engine_reg_base =
+ SOC15_REG_OFFSET(SDMA0, GET_INST(SDMA0, engine_id),
+ regSDMA_RLC0_RB_CNTL) -
+ regSDMA_RLC0_RB_CNTL;
+ uint32_t retval = sdma_engine_reg_base +
+ queue_id * (regSDMA_RLC1_RB_CNTL - regSDMA_RLC0_RB_CNTL);
+
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+ queue_id, retval);
+ return retval;
+}
+
+static int kgd_gfx_v9_4_3_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm)
+{
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ unsigned long end_jiffies;
+ uint32_t data;
+ uint64_t data64;
+ uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL,
+ m->sdmax_rlcx_rb_cntl & (~SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+ end_jiffies = msecs_to_jiffies(2000) + jiffies;
+ while (true) {
+ data = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_CONTEXT_STATUS);
+ if (data & SDMA_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL_OFFSET,
+ m->sdmax_rlcx_doorbell_offset);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA_RLC0_DOORBELL,
+ ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL, data);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_MINOR_PTR_UPDATE, 1);
+ if (read_user_wptr(mm, wptr64, data64)) {
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR,
+ lower_32_bits(data64));
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR_HI,
+ upper_32_bits(data64));
+ } else {
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+ }
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_MINOR_PTR_UPDATE, 0);
+
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_BASE_HI,
+ m->sdmax_rlcx_rb_base_hi);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_ADDR_LO,
+ m->sdmax_rlcx_rb_rptr_addr_lo);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_ADDR_HI,
+ m->sdmax_rlcx_rb_rptr_addr_hi);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA_RLC0_RB_CNTL,
+ RB_ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL, data);
+
+ return 0;
+}
+
+static int kgd_gfx_v9_4_3_hqd_sdma_dump(struct amdgpu_device *adev,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+ engine_id, queue_id);
+ uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+6+7+12)
+#define DUMP_REG(addr) do { \
+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
+ break; \
+ (*dump)[i][0] = (addr) << 2; \
+ (*dump)[i++][1] = RREG32(addr); \
+ } while (0)
+
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ for (reg = regSDMA_RLC0_RB_CNTL; reg <= regSDMA_RLC0_DOORBELL; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = regSDMA_RLC0_STATUS; reg <= regSDMA_RLC0_CSA_ADDR_HI; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = regSDMA_RLC0_IB_SUB_REMAIN;
+ reg <= regSDMA_RLC0_MINOR_PTR_UPDATE; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = regSDMA_RLC0_MIDCMD_DATA0;
+ reg <= regSDMA_RLC0_MIDCMD_CNTL; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static bool kgd_gfx_v9_4_3_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
+{
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t sdma_rlc_rb_cntl;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL);
+
+ if (sdma_rlc_rb_cntl & SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK)
+ return true;
+
+ return false;
+}
+
+static int kgd_gfx_v9_4_3_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
+ unsigned int utimeout)
+{
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t temp;
+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ temp = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL);
+ temp = temp & ~SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL, temp);
+
+ while (true) {
+ temp = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_CONTEXT_STATUS);
+ if (temp & SDMA_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL, 0);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL,
+ RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL) |
+ SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+ m->sdmax_rlcx_rb_rptr =
+ RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR);
+ m->sdmax_rlcx_rb_rptr_hi =
+ RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_HI);
+
+ return 0;
+}
+
+static int kgd_gfx_v9_4_3_set_pasid_vmid_mapping(struct amdgpu_device *adev,
+ u32 pasid, unsigned int vmid, uint32_t xcc_inst)
+{
+ unsigned long timeout;
+ unsigned int reg;
+ unsigned int phy_inst = GET_INST(GC, xcc_inst);
+ /* Every two XCCs share one AID */
+ unsigned int aid = phy_inst / 2;
+
+ /*
+ * We have to assume that there is no outstanding mapping.
+ * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
+ * a mapping is in progress or because a mapping finished
+ * and the SW cleared it.
+ * So the protocol is to always wait & clear.
+ */
+ uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
+ ATC_VMID0_PASID_MAPPING__VALID_MASK;
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+ regATC_VMID0_PASID_MAPPING) + vmid, pasid_mapping);
+
+ timeout = jiffies + msecs_to_jiffies(10);
+ while (!(RREG32(SOC15_REG_OFFSET(ATHUB, 0,
+ regATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
+ (1U << vmid))) {
+ if (time_after(jiffies, timeout)) {
+ pr_err("Fail to program VMID-PASID mapping\n");
+ return -ETIME;
+ }
+ cpu_relax();
+ }
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+ regATC_VMID_PASID_MAPPING_UPDATE_STATUS),
+ 1U << vmid);
+
+ reg = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX));
+ /* Every 4 numbers is a cycle. 1st is AID, 2nd and 3rd are XCDs,
+ * and the 4th is reserved. Therefore "aid * 4 + (xcc_inst % 2) + 1"
+ * programs _LUT for XCC and "aid * 4" for AID where the XCC connects
+ * to.
+ */
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX),
+ aid * 4 + (phy_inst % 2) + 1);
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid,
+ pasid_mapping);
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX),
+ aid * 4);
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid,
+ pasid_mapping);
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX), reg);
+
+ return 0;
+}
+
+static inline struct v9_mqd *get_mqd(void *mqd)
+{
+ return (struct v9_mqd *)mqd;
+}
+
+static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t __user *wptr, uint32_t wptr_shift,
+ uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
+{
+ struct v9_mqd *m;
+ uint32_t *mqd_hqd;
+ uint32_t reg, hqd_base, hqd_end, data;
+
+ m = get_mqd(mqd);
+
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
+
+ /* HQD registers extend to CP_HQD_AQL_DISPATCH_ID_HI */
+ mqd_hqd = &m->cp_mqd_base_addr_lo;
+ hqd_base = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_MQD_BASE_ADDR);
+ hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_AQL_DISPATCH_ID_HI);
+
+ for (reg = hqd_base; reg <= hqd_end; reg++)
+ WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst);
+
+
+ /* Activate doorbell logic before triggering WPTR poll. */
+ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+ CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+ if (wptr) {
+ /* Don't read wptr with get_user because the user
+ * context may not be accessible (if this function
+ * runs in a work queue). Instead trigger a one-shot
+ * polling read from memory in the CP. This assumes
+ * that wptr is GPU-accessible in the queue's VMID via
+ * ATC or SVM. WPTR==RPTR before starting the poll so
+ * the CP starts fetching new commands from the right
+ * place.
+ *
+ * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
+ * tricky. Assume that the queue didn't overflow. The
+ * number of valid bits in the 32-bit RPTR depends on
+ * the queue size. The remaining bits are taken from
+ * the saved 64-bit WPTR. If the WPTR wrapped, add the
+ * queue size.
+ */
+ uint32_t queue_size =
+ 2 << REG_GET_FIELD(m->cp_hqd_pq_control,
+ CP_HQD_PQ_CONTROL, QUEUE_SIZE);
+ uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
+
+ if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
+ guessed_wptr += queue_size;
+ guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
+ guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO,
+ lower_32_bits(guessed_wptr));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI,
+ upper_32_bits(guessed_wptr));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR,
+ lower_32_bits((uintptr_t)wptr));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ upper_32_bits((uintptr_t)wptr));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_PQ_WPTR_POLL_CNTL1,
+ (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id));
+ }
+
+ /* Start the EOP fetcher */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR,
+ REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+
+ data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE, data);
+
+ kgd_gfx_v9_release_queue(adev, inst);
+
+ return 0;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v9_4_3_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+ return data;
+}
+
+static int kgd_gfx_v9_4_3_validate_trap_override_request(
+ struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported)
+{
+ *trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID |
+ KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+ KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+ KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+ KFD_DBG_TRAP_MASK_FP_INEXACT |
+ KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+ KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION |
+ KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START |
+ KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+ if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR &&
+ trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE)
+ return -EPERM;
+
+ return 0;
+}
+
+static uint32_t trap_mask_map_sw_to_hw(uint32_t mask)
+{
+ uint32_t trap_on_start = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START) ? 1 : 0;
+ uint32_t trap_on_end = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END) ? 1 : 0;
+ uint32_t excp_en = mask & (KFD_DBG_TRAP_MASK_FP_INVALID |
+ KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+ KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+ KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+ KFD_DBG_TRAP_MASK_FP_INEXACT |
+ KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+ KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION);
+ uint32_t ret;
+
+ ret = REG_SET_FIELD(0, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, excp_en);
+ ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START, trap_on_start);
+ ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END, trap_on_end);
+
+ return ret;
+}
+
+static uint32_t trap_mask_map_hw_to_sw(uint32_t mask)
+{
+ uint32_t ret = REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, EXCP_EN);
+
+ if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START))
+ ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START;
+
+ if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END))
+ ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+ return ret;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v9_4_3_set_wave_launch_trap_override(
+ struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev)
+
+{
+ uint32_t data = 0;
+
+ *trap_mask_prev = trap_mask_map_hw_to_sw(kfd_dbg_trap_cntl_prev);
+
+ data = (trap_mask_bits & trap_mask_request) |
+ (*trap_mask_prev & ~trap_mask_request);
+ data = trap_mask_map_sw_to_hw(data);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override);
+
+ return data;
+}
+
+#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H)
+static uint32_t kgd_gfx_v9_4_3_set_address_watch(
+ struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid,
+ uint32_t inst)
+{
+ uint32_t watch_address_high;
+ uint32_t watch_address_low;
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+ watch_address_low = lower_32_bits(watch_address);
+ watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 7);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 1);
+
+ WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+ regTCP_WATCH0_ADDR_H) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_high, inst);
+
+ WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+ regTCP_WATCH0_ADDR_L) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_low, inst);
+
+ return watch_address_cntl;
+}
+
+static uint32_t kgd_gfx_v9_4_3_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id)
+{
+ return 0;
+}
+
+static uint32_t kgd_gfx_v9_4_3_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+{
+ uint32_t reg_offset = get_sdma_rlc_reg_offset(adev, engine, queue);
+ uint32_t status = RREG32(regSDMA_RLC0_CONTEXT_STATUS + reg_offset);
+ uint32_t doorbell_off = RREG32(regSDMA_RLC0_DOORBELL_OFFSET + reg_offset);
+ bool is_active = !!REG_GET_FIELD(status, SDMA_RLC0_CONTEXT_STATUS, SELECTED);
+
+ return is_active ? doorbell_off >> 2 : 0;
+}
+
+const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
+ .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_gfx_v9_4_3_set_pasid_vmid_mapping,
+ .init_interrupts = kgd_gfx_v9_init_interrupts,
+ .hqd_load = kgd_gfx_v9_4_3_hqd_load,
+ .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
+ .hqd_sdma_load = kgd_gfx_v9_4_3_hqd_sdma_load,
+ .hqd_dump = kgd_gfx_v9_hqd_dump,
+ .hqd_sdma_dump = kgd_gfx_v9_4_3_hqd_sdma_dump,
+ .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
+ .hqd_sdma_is_occupied = kgd_gfx_v9_4_3_hqd_sdma_is_occupied,
+ .hqd_destroy = kgd_gfx_v9_hqd_destroy,
+ .hqd_sdma_destroy = kgd_gfx_v9_4_3_hqd_sdma_destroy,
+ .wave_control_execute = kgd_gfx_v9_wave_control_execute,
+ .get_atc_vmid_pasid_mapping_info =
+ kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
+ .set_vm_context_page_table_base =
+ kgd_gfx_v9_set_vm_context_page_table_base,
+ .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
+ .program_trap_handler_settings =
+ kgd_gfx_v9_program_trap_handler_settings,
+ .build_dequeue_wait_counts_packet_info =
+ kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
+ .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
+ .enable_debug_trap = kgd_aldebaran_enable_debug_trap,
+ .disable_debug_trap = kgd_gfx_v9_4_3_disable_debug_trap,
+ .validate_trap_override_request =
+ kgd_gfx_v9_4_3_validate_trap_override_request,
+ .set_wave_launch_trap_override =
+ kgd_gfx_v9_4_3_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v9_4_3_set_address_watch,
+ .clear_address_watch = kgd_gfx_v9_4_3_clear_address_watch,
+ .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
+ .hqd_reset = kgd_gfx_v9_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v9_4_3_hqd_sdma_get_doorbell
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 960acf68150a..0239114fb6c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -21,6 +21,7 @@
*/
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_amdkfd_gfx_v10.h"
#include "gc/gc_10_1_0_offset.h"
#include "gc/gc_10_1_0_sh_mask.h"
#include "athub/athub_2_0_0_offset.h"
@@ -31,6 +32,7 @@
#include "v10_structs.h"
#include "nv.h"
#include "nvd.h"
+#include <uapi/linux/kfd_ioctl.h>
enum hqd_dequeue_request_type {
NO_ACTION = 0,
@@ -39,37 +41,26 @@ enum hqd_dequeue_request_type {
SAVE_WAVES
};
-static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
-{
- return (struct amdgpu_device *)kgd;
-}
-
-static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
+static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
uint32_t queue, uint32_t vmid)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
mutex_lock(&adev->srbm_mutex);
nv_grbm_select(adev, mec, pipe, queue, vmid);
}
-static void unlock_srbm(struct kgd_dev *kgd)
+static void unlock_srbm(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
-static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
+static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t queue_id)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
- lock_srbm(kgd, mec, pipe, queue_id, 0);
+ lock_srbm(adev, mec, pipe, queue_id, 0);
}
static uint64_t get_queue_mask(struct amdgpu_device *adev,
@@ -81,33 +72,29 @@ static uint64_t get_queue_mask(struct amdgpu_device *adev,
return 1ull << bit;
}
-static void release_queue(struct kgd_dev *kgd)
+static void release_queue(struct amdgpu_device *adev)
{
- unlock_srbm(kgd);
+ unlock_srbm(adev);
}
-static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
- uint32_t sh_mem_bases)
+ uint32_t sh_mem_bases, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
- lock_srbm(kgd, 0, 0, 0, vmid);
+ lock_srbm(adev, 0, 0, 0, vmid);
WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
/* APE1 no longer exists on GFX9 */
- unlock_srbm(kgd);
+ unlock_srbm(adev);
}
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
- unsigned int vmid)
+static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
+ unsigned int vmid, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
/*
* We have to assume that there is no outstanding mapping.
* The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
@@ -150,22 +137,22 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
* but still works
*/
-static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
+static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t mec;
uint32_t pipe;
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
- lock_srbm(kgd, mec, pipe, 0, 0);
+ lock_srbm(adev, mec, pipe, 0, 0);
WREG32_SOC15(GC, 0, mmCPC_INT_CNTL,
CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
- unlock_srbm(kgd);
+ unlock_srbm(adev);
return 0;
}
@@ -218,12 +205,11 @@ static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
return (struct v10_sdma_mqd *)mqd;
}
-static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr,
- uint32_t wptr_shift, uint32_t wptr_mask,
- struct mm_struct *mm)
+static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t __user *wptr, uint32_t wptr_shift,
+ uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_compute_mqd *m;
uint32_t *mqd_hqd;
uint32_t reg, hqd_base, data;
@@ -231,7 +217,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
m = get_mqd(mqd);
pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
mqd_hqd = &m->cp_mqd_base_addr_lo;
@@ -296,24 +282,23 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, data);
- release_queue(kgd);
+ release_queue(adev);
return 0;
}
-static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
+static int kgd_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t doorbell_off)
+ uint32_t doorbell_off, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
struct v10_compute_mqd *m;
uint32_t mec, pipe;
int r;
m = get_mqd(mqd);
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
@@ -321,7 +306,7 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
mec, pipe, queue_id);
- spin_lock(&adev->gfx.kiq.ring_lock);
+ spin_lock(&adev->gfx.kiq[0].ring_lock);
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r);
@@ -348,17 +333,16 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
amdgpu_ring_commit(kiq_ring);
out_unlock:
- spin_unlock(&adev->gfx.kiq.ring_lock);
- release_queue(kgd);
+ spin_unlock(&adev->gfx.kiq[0].ring_lock);
+ release_queue(adev);
return r;
}
-static int kgd_hqd_dump(struct kgd_dev *kgd,
+static int kgd_hqd_dump(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs)
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t i = 0, reg;
#define HQD_N_REGS 56
#define DUMP_REG(addr) do { \
@@ -368,17 +352,17 @@ static int kgd_hqd_dump(struct kgd_dev *kgd,
(*dump)[i++][1] = RREG32_SOC15_IP(GC, addr); \
} while (0)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
DUMP_REG(reg);
- release_queue(kgd);
+ release_queue(adev);
WARN_ON_ONCE(i != HQD_N_REGS);
*n_regs = i;
@@ -386,10 +370,9 @@ static int kgd_hqd_dump(struct kgd_dev *kgd,
return 0;
}
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
uint32_t __user *wptr, struct mm_struct *mm)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
unsigned long end_jiffies;
@@ -456,18 +439,17 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
return 0;
}
-static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
uint32_t engine_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
engine_id, queue_id);
uint32_t i = 0, reg;
#undef HQD_N_REGS
#define HQD_N_REGS (19+6+7+10)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -488,15 +470,15 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
return 0;
}
-static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
- uint32_t pipe_id, uint32_t queue_id)
+static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t act;
bool retval = false;
uint32_t low, high;
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
if (act) {
low = lower_32_bits(queue_address >> 8);
@@ -506,13 +488,12 @@ static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI))
retval = true;
}
- release_queue(kgd);
+ release_queue(adev);
return retval;
}
-static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
+static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t sdma_rlc_rb_cntl;
@@ -529,12 +510,11 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
return false;
}
-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id)
+ uint32_t queue_id, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
enum hqd_dequeue_request_type type;
unsigned long end_jiffies;
uint32_t temp;
@@ -548,7 +528,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
int retry;
#endif
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
if (m->cp_hqd_vmid == 0)
WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
@@ -633,20 +613,19 @@ loop:
break;
if (time_after(jiffies, end_jiffies)) {
pr_err("cp queue preemption time out.\n");
- release_queue(kgd);
+ release_queue(adev);
return -ETIME;
}
usleep_range(500, 1000);
}
- release_queue(kgd);
+ release_queue(adev);
return 0;
}
-static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
unsigned int utimeout)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t temp;
@@ -683,11 +662,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
return 0;
}
-static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
uint8_t vmid, uint16_t *p_pasid)
{
uint32_t value;
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ vmid);
@@ -696,25 +674,10 @@ static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}
-static int kgd_address_watch_disable(struct kgd_dev *kgd)
-{
- return 0;
-}
-
-static int kgd_address_watch_execute(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- uint32_t cntl_val,
- uint32_t addr_hi,
- uint32_t addr_lo)
-{
- return 0;
-}
-
-static int kgd_wave_control_execute(struct kgd_dev *kgd,
+static int kgd_wave_control_execute(struct amdgpu_device *adev,
uint32_t gfx_index_val,
- uint32_t sq_cmd)
+ uint32_t sq_cmd, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t data = 0;
mutex_lock(&adev->grbm_idx_mutex);
@@ -735,18 +698,9 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,
return 0;
}
-static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- unsigned int reg_offset)
-{
- return 0;
-}
-
-static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
- uint64_t page_table_base)
+static void set_vm_context_page_table_base(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t page_table_base)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("trying to set page table base for wrong VMID %u\n",
vmid);
@@ -757,12 +711,344 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
}
-static void program_trap_handler_settings(struct kgd_dev *kgd,
- uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
+/*
+ * GFX10 helper for wave launch stall requirements on debug trap setting.
+ *
+ * vmid:
+ * Target VMID to stall/unstall.
+ *
+ * stall:
+ * 0-unstall wave launch (enable), 1-stall wave launch (disable).
+ * After wavefront launch has been stalled, allocated waves must drain from
+ * SPI in order for debug trap settings to take effect on those waves.
+ * This is roughly a ~3500 clock cycle wait on SPI where a read on
+ * SPI_GDBG_WAVE_CNTL translates to ~32 clock cycles.
+ * KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY indicates the number of reads required.
+ *
+ * NOTE: We can afford to clear the entire STALL_VMID field on unstall
+ * because current GFX10 chips cannot support multi-process debugging due to
+ * trap configuration and masking being limited to global scope. Always
+ * assume single process conditions.
+ *
+ */
+
+#define KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY 110
+static void kgd_gfx_v10_set_wave_launch_stall(struct amdgpu_device *adev, uint32_t vmid, bool stall)
+{
+ uint32_t data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+ int i;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_VMID,
+ stall ? 1 << vmid : 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
+
+ if (!stall)
+ return;
+
+ for (i = 0; i < KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY; i++)
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+}
+
+uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid)
+{
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
+
+ /* assume gfx off is disabled for the debug session if rlc restore not supported. */
+ if (restore_dbg_registers) {
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+ VMID_SEL, 1 << vmid);
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+ TRAP_EN, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+ }
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
+{
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+int kgd_gfx_v10_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ *trap_mask_supported &= KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH;
- lock_srbm(kgd, 0, 0, 0, vmid);
+ /* The SPI_GDBG_TRAP_MASK register is global and affects all
+ * processes. Only allow OR-ing the address-watch bit, since
+ * this only affects processes under the debugger. Other bits
+ * should stay 0 to avoid the debugger interfering with other
+ * processes.
+ */
+ if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR)
+ return -EINVAL;
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev)
+{
+ uint32_t data, wave_cntl_prev;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
+
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK));
+ *trap_mask_prev = REG_GET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN);
+
+ trap_mask_bits = (trap_mask_bits & trap_mask_request) |
+ (*trap_mask_prev & ~trap_mask_request);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN, trap_mask_bits);
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, REPLACE, trap_override);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
+
+ /* We need to preserve wave launch mode stall settings. */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), wave_cntl_prev);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+ bool is_mode_set = !!wave_launch_mode;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
+ VMID_MASK, is_mode_set ? 1 << vmid : 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
+ MODE, is_mode_set ? wave_launch_mode : 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+#define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H)
+#define SQ_WATCH_STRIDE (mmSQ_WATCH1_ADDR_H - mmSQ_WATCH0_ADDR_H)
+uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid,
+ uint32_t inst)
+{
+ /* SQ_WATCH?_ADDR_* and TCP_WATCH?_ADDR_* are programmed with the
+ * same values.
+ */
+ uint32_t watch_address_high;
+ uint32_t watch_address_low;
+ uint32_t tcp_watch_address_cntl;
+ uint32_t sq_watch_address_cntl;
+
+ watch_address_low = lower_32_bits(watch_address);
+ watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+ tcp_watch_address_cntl = 0;
+ tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VMID,
+ debug_vmid);
+ tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+ tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 7);
+
+ sq_watch_address_cntl = 0;
+ sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+ SQ_WATCH0_CNTL,
+ VMID,
+ debug_vmid);
+ sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+ SQ_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+ sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+ SQ_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 6);
+
+ /* Turning off this watch point until we set all the registers */
+ tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 0);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ tcp_watch_address_cntl);
+
+ sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+ SQ_WATCH0_CNTL,
+ VALID,
+ 0);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) +
+ (watch_id * SQ_WATCH_STRIDE)),
+ sq_watch_address_cntl);
+
+ /* Program {TCP,SQ}_WATCH?_ADDR* */
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_high);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_low);
+
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_ADDR_H) +
+ (watch_id * SQ_WATCH_STRIDE)),
+ watch_address_high);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_ADDR_L) +
+ (watch_id * SQ_WATCH_STRIDE)),
+ watch_address_low);
+
+ /* Enable the watch point */
+ tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 1);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ tcp_watch_address_cntl);
+
+ sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+ SQ_WATCH0_CNTL,
+ VALID,
+ 1);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) +
+ (watch_id * SQ_WATCH_STRIDE)),
+ sq_watch_address_cntl);
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id)
+{
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_cntl);
+
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) +
+ (watch_id * SQ_WATCH_STRIDE)),
+ watch_address_cntl);
+
+ return 0;
+}
+#undef TCP_WATCH_STRIDE
+#undef SQ_WATCH_STRIDE
+
+
+/* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
+ * The values read are:
+ * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
+ * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
+ * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads.
+ * gws_wait_time -- Wait Count for Global Wave Syncs.
+ * que_sleep_wait_time -- Wait Count for Dequeue Retry.
+ * sch_wave_wait_time -- Wait Count for Scheduling Wave Message.
+ * sem_rearm_wait_time -- Wait Count for Semaphore re-arm.
+ * deq_retry_wait_time -- Wait Count for Global Wave Syncs.
+ */
+void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
+ uint32_t *wait_times,
+ uint32_t inst)
+
+{
+ *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
+}
+
+void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
+ uint32_t wait_times,
+ uint32_t sch_wave,
+ uint32_t que_sleep,
+ uint32_t *reg_offset,
+ uint32_t *reg_data)
+{
+ *reg_data = wait_times;
+
+ if (sch_wave)
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ SCH_WAVE,
+ sch_wave);
+ if (que_sleep)
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ QUE_SLEEP,
+ que_sleep);
+
+ *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
+}
+
+static void program_trap_handler_settings(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
+ uint32_t inst)
+{
+ lock_srbm(adev, 0, 0, 0, vmid);
/*
* Program TBA registers
@@ -781,7 +1067,27 @@ static void program_trap_handler_settings(struct kgd_dev *kgd,
WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
upper_32_bits(tma_addr >> 8));
- unlock_srbm(kgd);
+ unlock_srbm(adev);
+}
+
+uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t inst)
+{
+ return 0;
+}
+
+uint64_t kgd_gfx_v10_hqd_reset(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t inst, unsigned int utimeout)
+{
+ return 0;
+}
+
+uint32_t kgd_gfx_v10_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+{
+ return 0;
}
const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
@@ -797,12 +1103,21 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
.hqd_destroy = kgd_hqd_destroy,
.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
- .address_watch_disable = kgd_address_watch_disable,
- .address_watch_execute = kgd_address_watch_execute,
.wave_control_execute = kgd_wave_control_execute,
- .address_watch_get_offset = kgd_address_watch_get_offset,
.get_atc_vmid_pasid_mapping_info =
get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
+ .enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
+ .disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
+ .validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v10_set_address_watch,
+ .clear_address_watch = kgd_gfx_v10_clear_address_watch,
+ .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
+ .build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
.program_trap_handler_settings = program_trap_handler_settings,
+ .hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr,
+ .hqd_reset = kgd_gfx_v10_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v10_hqd_sdma_get_doorbell
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
new file mode 100644
index 000000000000..a4c607c88178
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid);
+uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid);
+int kgd_gfx_v10_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported);
+uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev);
+uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid);
+uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid,
+ uint32_t inst);
+uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id);
+void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
+ uint32_t *wait_times,
+ uint32_t inst);
+void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
+ uint32_t wait_times,
+ uint32_t sch_wave,
+ uint32_t que_sleep,
+ uint32_t *reg_offset,
+ uint32_t *reg_data);
+uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev,
+ uint32_t pipe_id,
+ uint32_t queue_id,
+ uint32_t inst);
+uint64_t kgd_gfx_v10_hqd_reset(struct amdgpu_device *adev,
+ uint32_t pipe_id,
+ uint32_t queue_id,
+ uint32_t inst,
+ unsigned int utimeout);
+uint32_t kgd_gfx_v10_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
index dac0d751d5af..f2278a0937ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
@@ -22,10 +22,13 @@
#include <linux/mmu_context.h>
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_amdkfd_gfx_v10.h"
#include "gc/gc_10_3_0_offset.h"
#include "gc/gc_10_3_0_sh_mask.h"
#include "oss/osssys_5_0_0_offset.h"
#include "oss/osssys_5_0_0_sh_mask.h"
+#include "athub/athub_2_1_0_offset.h"
+#include "athub/athub_2_1_0_sh_mask.h"
#include "soc15_common.h"
#include "v10_structs.h"
#include "nv.h"
@@ -38,37 +41,26 @@ enum hqd_dequeue_request_type {
SAVE_WAVES
};
-static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
-{
- return (struct amdgpu_device *)kgd;
-}
-
-static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
+static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
uint32_t queue, uint32_t vmid)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
mutex_lock(&adev->srbm_mutex);
nv_grbm_select(adev, mec, pipe, queue, vmid);
}
-static void unlock_srbm(struct kgd_dev *kgd)
+static void unlock_srbm(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
-static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
+static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t queue_id)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
- lock_srbm(kgd, mec, pipe, queue_id, 0);
+ lock_srbm(adev, mec, pipe, queue_id, 0);
}
static uint64_t get_queue_mask(struct amdgpu_device *adev,
@@ -80,34 +72,30 @@ static uint64_t get_queue_mask(struct amdgpu_device *adev,
return 1ull << bit;
}
-static void release_queue(struct kgd_dev *kgd)
+static void release_queue(struct amdgpu_device *adev)
{
- unlock_srbm(kgd);
+ unlock_srbm(adev);
}
-static void program_sh_mem_settings_v10_3(struct kgd_dev *kgd, uint32_t vmid,
+static void program_sh_mem_settings_v10_3(struct amdgpu_device *adev, uint32_t vmid,
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
- uint32_t sh_mem_bases)
+ uint32_t sh_mem_bases, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
- lock_srbm(kgd, 0, 0, 0, vmid);
+ lock_srbm(adev, 0, 0, 0, vmid);
WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
/* APE1 no longer exists on GFX9 */
- unlock_srbm(kgd);
+ unlock_srbm(adev);
}
/* ATC is defeatured on Sienna_Cichlid */
-static int set_pasid_vmid_mapping_v10_3(struct kgd_dev *kgd, unsigned int pasid,
- unsigned int vmid)
+static int set_pasid_vmid_mapping_v10_3(struct amdgpu_device *adev, unsigned int pasid,
+ unsigned int vmid, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT;
/* Mapping vmid to pasid also for IH block */
@@ -118,22 +106,22 @@ static int set_pasid_vmid_mapping_v10_3(struct kgd_dev *kgd, unsigned int pasid,
return 0;
}
-static int init_interrupts_v10_3(struct kgd_dev *kgd, uint32_t pipe_id)
+static int init_interrupts_v10_3(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t mec;
uint32_t pipe;
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
- lock_srbm(kgd, mec, pipe, 0, 0);
+ lock_srbm(adev, mec, pipe, 0, 0);
WREG32_SOC15(GC, 0, mmCPC_INT_CNTL,
CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
- unlock_srbm(kgd);
+ unlock_srbm(adev);
return 0;
}
@@ -188,12 +176,11 @@ static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
return (struct v10_sdma_mqd *)mqd;
}
-static int hqd_load_v10_3(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr,
- uint32_t wptr_shift, uint32_t wptr_mask,
- struct mm_struct *mm)
+static int hqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t __user *wptr, uint32_t wptr_shift,
+ uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_compute_mqd *m;
uint32_t *mqd_hqd;
uint32_t reg, hqd_base, data;
@@ -201,7 +188,7 @@ static int hqd_load_v10_3(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
m = get_mqd(mqd);
pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
/* HIQ is set during driver init period with vmid set to 0*/
if (m->cp_hqd_vmid == 0) {
@@ -281,24 +268,23 @@ static int hqd_load_v10_3(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, data);
- release_queue(kgd);
+ release_queue(adev);
return 0;
}
-static int hiq_mqd_load_v10_3(struct kgd_dev *kgd, void *mqd,
+static int hiq_mqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t doorbell_off)
+ uint32_t doorbell_off, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
struct v10_compute_mqd *m;
uint32_t mec, pipe;
int r;
m = get_mqd(mqd);
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
@@ -306,7 +292,7 @@ static int hiq_mqd_load_v10_3(struct kgd_dev *kgd, void *mqd,
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
mec, pipe, queue_id);
- spin_lock(&adev->gfx.kiq.ring_lock);
+ spin_lock(&adev->gfx.kiq[0].ring_lock);
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r);
@@ -333,17 +319,16 @@ static int hiq_mqd_load_v10_3(struct kgd_dev *kgd, void *mqd,
amdgpu_ring_commit(kiq_ring);
out_unlock:
- spin_unlock(&adev->gfx.kiq.ring_lock);
- release_queue(kgd);
+ spin_unlock(&adev->gfx.kiq[0].ring_lock);
+ release_queue(adev);
return r;
}
-static int hqd_dump_v10_3(struct kgd_dev *kgd,
+static int hqd_dump_v10_3(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs)
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t i = 0, reg;
#define HQD_N_REGS 56
#define DUMP_REG(addr) do { \
@@ -353,17 +338,17 @@ static int hqd_dump_v10_3(struct kgd_dev *kgd,
(*dump)[i++][1] = RREG32_SOC15_IP(GC, addr); \
} while (0)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
DUMP_REG(reg);
- release_queue(kgd);
+ release_queue(adev);
WARN_ON_ONCE(i != HQD_N_REGS);
*n_regs = i;
@@ -371,10 +356,9 @@ static int hqd_dump_v10_3(struct kgd_dev *kgd,
return 0;
}
-static int hqd_sdma_load_v10_3(struct kgd_dev *kgd, void *mqd,
+static int hqd_sdma_load_v10_3(struct amdgpu_device *adev, void *mqd,
uint32_t __user *wptr, struct mm_struct *mm)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
unsigned long end_jiffies;
@@ -441,18 +425,17 @@ static int hqd_sdma_load_v10_3(struct kgd_dev *kgd, void *mqd,
return 0;
}
-static int hqd_sdma_dump_v10_3(struct kgd_dev *kgd,
+static int hqd_sdma_dump_v10_3(struct amdgpu_device *adev,
uint32_t engine_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
engine_id, queue_id);
uint32_t i = 0, reg;
#undef HQD_N_REGS
#define HQD_N_REGS (19+6+7+12)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -473,15 +456,15 @@ static int hqd_sdma_dump_v10_3(struct kgd_dev *kgd,
return 0;
}
-static bool hqd_is_occupied_v10_3(struct kgd_dev *kgd, uint64_t queue_address,
- uint32_t pipe_id, uint32_t queue_id)
+static bool hqd_is_occupied_v10_3(struct amdgpu_device *adev,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t act;
bool retval = false;
uint32_t low, high;
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
if (act) {
low = lower_32_bits(queue_address >> 8);
@@ -491,13 +474,13 @@ static bool hqd_is_occupied_v10_3(struct kgd_dev *kgd, uint64_t queue_address,
high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI))
retval = true;
}
- release_queue(kgd);
+ release_queue(adev);
return retval;
}
-static bool hqd_sdma_is_occupied_v10_3(struct kgd_dev *kgd, void *mqd)
+static bool hqd_sdma_is_occupied_v10_3(struct amdgpu_device *adev,
+ void *mqd)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t sdma_rlc_rb_cntl;
@@ -514,18 +497,17 @@ static bool hqd_sdma_is_occupied_v10_3(struct kgd_dev *kgd, void *mqd)
return false;
}
-static int hqd_destroy_v10_3(struct kgd_dev *kgd, void *mqd,
+static int hqd_destroy_v10_3(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id)
+ uint32_t queue_id, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
enum hqd_dequeue_request_type type;
unsigned long end_jiffies;
uint32_t temp;
struct v10_compute_mqd *m = get_mqd(mqd);
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
if (m->cp_hqd_vmid == 0)
WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
@@ -555,20 +537,19 @@ static int hqd_destroy_v10_3(struct kgd_dev *kgd, void *mqd,
if (time_after(jiffies, end_jiffies)) {
pr_err("cp queue pipe %d queue %d preemption failed\n",
pipe_id, queue_id);
- release_queue(kgd);
+ release_queue(adev);
return -ETIME;
}
usleep_range(500, 1000);
}
- release_queue(kgd);
+ release_queue(adev);
return 0;
}
-static int hqd_sdma_destroy_v10_3(struct kgd_dev *kgd, void *mqd,
+static int hqd_sdma_destroy_v10_3(struct amdgpu_device *adev, void *mqd,
unsigned int utimeout)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t temp;
@@ -605,26 +586,10 @@ static int hqd_sdma_destroy_v10_3(struct kgd_dev *kgd, void *mqd,
return 0;
}
-
-static int address_watch_disable_v10_3(struct kgd_dev *kgd)
-{
- return 0;
-}
-
-static int address_watch_execute_v10_3(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- uint32_t cntl_val,
- uint32_t addr_hi,
- uint32_t addr_lo)
-{
- return 0;
-}
-
-static int wave_control_execute_v10_3(struct kgd_dev *kgd,
+static int wave_control_execute_v10_3(struct amdgpu_device *adev,
uint32_t gfx_index_val,
- uint32_t sq_cmd)
+ uint32_t sq_cmd, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t data = 0;
mutex_lock(&adev->grbm_idx_mutex);
@@ -645,28 +610,30 @@ static int wave_control_execute_v10_3(struct kgd_dev *kgd,
return 0;
}
-static uint32_t address_watch_get_offset_v10_3(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- unsigned int reg_offset)
+static bool get_atc_vmid_pasid_mapping_info_v10_3(struct amdgpu_device *adev,
+ uint8_t vmid, uint16_t *p_pasid)
{
- return 0;
+ uint32_t value;
+
+ value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ + vmid);
+ *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+ return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}
-static void set_vm_context_page_table_base_v10_3(struct kgd_dev *kgd, uint32_t vmid,
- uint64_t page_table_base)
+static void set_vm_context_page_table_base_v10_3(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t page_table_base)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
/* SDMA is on gfxhub as well for Navi1* series */
adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
}
-static void program_trap_handler_settings_v10_3(struct kgd_dev *kgd,
- uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
+static void program_trap_handler_settings_v10_3(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
+ uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
- lock_srbm(kgd, 0, 0, 0, vmid);
+ lock_srbm(adev, 0, 0, 0, vmid);
/*
* Program TBA registers
@@ -685,151 +652,8 @@ static void program_trap_handler_settings_v10_3(struct kgd_dev *kgd,
WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
upper_32_bits(tma_addr >> 8));
- unlock_srbm(kgd);
-}
-
-#if 0
-uint32_t enable_debug_trap_v10_3(struct kgd_dev *kgd,
- uint32_t trap_debug_wave_launch_mode,
- uint32_t vmid)
-{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- uint32_t data = 0;
- uint32_t orig_wave_cntl_value;
- uint32_t orig_stall_vmid;
-
- mutex_lock(&adev->grbm_idx_mutex);
-
- orig_wave_cntl_value = RREG32(SOC15_REG_OFFSET(GC,
- 0,
- mmSPI_GDBG_WAVE_CNTL));
- orig_stall_vmid = REG_GET_FIELD(orig_wave_cntl_value,
- SPI_GDBG_WAVE_CNTL,
- STALL_VMID);
-
- data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
-
- data = 0;
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
-
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), orig_stall_vmid);
-
- mutex_unlock(&adev->grbm_idx_mutex);
-
- return 0;
-}
-
-uint32_t disable_debug_trap_v10_3(struct kgd_dev *kgd)
-{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
- mutex_lock(&adev->grbm_idx_mutex);
-
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
-
- mutex_unlock(&adev->grbm_idx_mutex);
-
- return 0;
-}
-
-uint32_t set_wave_launch_trap_override_v10_3(struct kgd_dev *kgd,
- uint32_t trap_override,
- uint32_t trap_mask)
-{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- uint32_t data = 0;
-
- mutex_lock(&adev->grbm_idx_mutex);
-
- data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
- data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
-
- data = 0;
- data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK,
- EXCP_EN, trap_mask);
- data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK,
- REPLACE, trap_override);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
-
- data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
- data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 0);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
-
- mutex_unlock(&adev->grbm_idx_mutex);
-
- return 0;
-}
-
-uint32_t set_wave_launch_mode_v10_3(struct kgd_dev *kgd,
- uint8_t wave_launch_mode,
- uint32_t vmid)
-{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- uint32_t data = 0;
- bool is_stall_mode;
- bool is_mode_set;
-
- is_stall_mode = (wave_launch_mode == 4);
- is_mode_set = (wave_launch_mode != 0 && wave_launch_mode != 4);
-
- mutex_lock(&adev->grbm_idx_mutex);
-
- data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
- VMID_MASK, is_mode_set ? 1 << vmid : 0);
- data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
- MODE, is_mode_set ? wave_launch_mode : 0);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data);
-
- data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
- data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL,
- STALL_VMID, is_stall_mode ? 1 << vmid : 0);
- data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL,
- STALL_RA, is_stall_mode ? 1 : 0);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
-
- mutex_unlock(&adev->grbm_idx_mutex);
-
- return 0;
-}
-
-/* kgd_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
- * The values read are:
- * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
- * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
- * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads.
- * gws_wait_time -- Wait Count for Global Wave Syncs.
- * que_sleep_wait_time -- Wait Count for Dequeue Retry.
- * sch_wave_wait_time -- Wait Count for Scheduling Wave Message.
- * sem_rearm_wait_time -- Wait Count for Semaphore re-arm.
- * deq_retry_wait_time -- Wait Count for Global Wave Syncs.
- */
-void get_iq_wait_times_v10_3(struct kgd_dev *kgd,
- uint32_t *wait_times)
-
-{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
- *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
-}
-
-void build_grace_period_packet_info_v10_3(struct kgd_dev *kgd,
- uint32_t wait_times,
- uint32_t grace_period,
- uint32_t *reg_offset,
- uint32_t *reg_data)
-{
- *reg_data = wait_times;
-
- *reg_data = REG_SET_FIELD(*reg_data,
- CP_IQ_WAIT_TIME2,
- SCH_WAVE,
- grace_period);
-
- *reg_offset = mmCP_IQ_WAIT_TIME2;
+ unlock_srbm(adev);
}
-#endif
const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
.program_sh_mem_settings = program_sh_mem_settings_v10_3,
@@ -844,19 +668,20 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
.hqd_sdma_is_occupied = hqd_sdma_is_occupied_v10_3,
.hqd_destroy = hqd_destroy_v10_3,
.hqd_sdma_destroy = hqd_sdma_destroy_v10_3,
- .address_watch_disable = address_watch_disable_v10_3,
- .address_watch_execute = address_watch_execute_v10_3,
.wave_control_execute = wave_control_execute_v10_3,
- .address_watch_get_offset = address_watch_get_offset_v10_3,
- .get_atc_vmid_pasid_mapping_info = NULL,
+ .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info_v10_3,
.set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
.program_trap_handler_settings = program_trap_handler_settings_v10_3,
-#if 0
- .enable_debug_trap = enable_debug_trap_v10_3,
- .disable_debug_trap = disable_debug_trap_v10_3,
- .set_wave_launch_trap_override = set_wave_launch_trap_override_v10_3,
- .set_wave_launch_mode = set_wave_launch_mode_v10_3,
- .get_iq_wait_times = get_iq_wait_times_v10_3,
- .build_grace_period_packet_info = build_grace_period_packet_info_v10_3,
-#endif
+ .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
+ .build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
+ .enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
+ .disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
+ .validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v10_set_address_watch,
+ .clear_address_watch = kgd_gfx_v10_clear_address_watch,
+ .hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr,
+ .hqd_reset = kgd_gfx_v10_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v10_hqd_sdma_get_doorbell
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
new file mode 100644
index 000000000000..aaccf0b9947d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
@@ -0,0 +1,835 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <linux/mmu_context.h>
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "oss/osssys_6_0_0_offset.h"
+#include "oss/osssys_6_0_0_sh_mask.h"
+#include "soc15_common.h"
+#include "soc15d.h"
+#include "v11_structs.h"
+#include "soc21.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+enum hqd_dequeue_request_type {
+ NO_ACTION = 0,
+ DRAIN_PIPE,
+ RESET_WAVES,
+ SAVE_WAVES
+};
+
+static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
+ uint32_t queue, uint32_t vmid)
+{
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, mec, pipe, queue, vmid);
+}
+
+static void unlock_srbm(struct amdgpu_device *adev)
+{
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(adev, mec, pipe, queue_id, 0);
+}
+
+static uint64_t get_queue_mask(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
+ queue_id;
+
+ return 1ull << bit;
+}
+
+static void release_queue(struct amdgpu_device *adev)
+{
+ unlock_srbm(adev);
+}
+
+static void program_sh_mem_settings_v11(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base,
+ uint32_t sh_mem_ape1_limit,
+ uint32_t sh_mem_bases, uint32_t inst)
+{
+ lock_srbm(adev, 0, 0, 0, vmid);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, regSH_MEM_CONFIG), sh_mem_config);
+ WREG32(SOC15_REG_OFFSET(GC, 0, regSH_MEM_BASES), sh_mem_bases);
+
+ unlock_srbm(adev);
+}
+
+static int set_pasid_vmid_mapping_v11(struct amdgpu_device *adev, unsigned int pasid,
+ unsigned int vmid, uint32_t inst)
+{
+ uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT;
+
+ /* Mapping vmid to pasid also for IH block */
+ pr_debug("mapping vmid %d -> pasid %d in IH block for GFX client\n",
+ vmid, pasid);
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid, value);
+
+ return 0;
+}
+
+static int init_interrupts_v11(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst)
+{
+ uint32_t mec;
+ uint32_t pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(adev, mec, pipe, 0, 0);
+
+ WREG32_SOC15(GC, 0, regCPC_INT_CNTL,
+ CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+ CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+ unlock_srbm(adev);
+
+ return 0;
+}
+
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
+ unsigned int engine_id,
+ unsigned int queue_id)
+{
+ uint32_t sdma_engine_reg_base = 0;
+ uint32_t sdma_rlc_reg_offset;
+
+ switch (engine_id) {
+ case 0:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+ regSDMA0_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
+ break;
+ case 1:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
+ regSDMA1_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
+ break;
+ default:
+ BUG();
+ }
+
+ sdma_rlc_reg_offset = sdma_engine_reg_base
+ + queue_id * (regSDMA0_QUEUE1_RB_CNTL - regSDMA0_QUEUE0_RB_CNTL);
+
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+ queue_id, sdma_rlc_reg_offset);
+
+ return sdma_rlc_reg_offset;
+}
+
+static inline struct v11_compute_mqd *get_mqd(void *mqd)
+{
+ return (struct v11_compute_mqd *)mqd;
+}
+
+static inline struct v11_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct v11_sdma_mqd *)mqd;
+}
+
+static int hqd_load_v11(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t __user *wptr,
+ uint32_t wptr_shift, uint32_t wptr_mask,
+ struct mm_struct *mm, uint32_t inst)
+{
+ struct v11_compute_mqd *m;
+ uint32_t *mqd_hqd;
+ uint32_t reg, hqd_base, data;
+
+ m = get_mqd(mqd);
+
+ pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
+
+ /* HIQ is set during driver init period with vmid set to 0*/
+ if (m->cp_hqd_vmid == 0) {
+ uint32_t value, mec, pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+ mec, pipe, queue_id);
+ value = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_CP_SCHEDULERS));
+ value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
+ ((mec << 5) | (pipe << 3) | queue_id | 0x80));
+ WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_CP_SCHEDULERS), value);
+ }
+
+ /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
+ mqd_hqd = &m->cp_mqd_base_addr_lo;
+ hqd_base = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR);
+
+ for (reg = hqd_base;
+ reg <= SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI); reg++)
+ WREG32(reg, mqd_hqd[reg - hqd_base]);
+
+
+ /* Activate doorbell logic before triggering WPTR poll. */
+ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+ CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), data);
+
+ if (wptr) {
+ /* Don't read wptr with get_user because the user
+ * context may not be accessible (if this function
+ * runs in a work queue). Instead trigger a one-shot
+ * polling read from memory in the CP. This assumes
+ * that wptr is GPU-accessible in the queue's VMID via
+ * ATC or SVM. WPTR==RPTR before starting the poll so
+ * the CP starts fetching new commands from the right
+ * place.
+ *
+ * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
+ * tricky. Assume that the queue didn't overflow. The
+ * number of valid bits in the 32-bit RPTR depends on
+ * the queue size. The remaining bits are taken from
+ * the saved 64-bit WPTR. If the WPTR wrapped, add the
+ * queue size.
+ */
+ uint32_t queue_size =
+ 2 << REG_GET_FIELD(m->cp_hqd_pq_control,
+ CP_HQD_PQ_CONTROL, QUEUE_SIZE);
+ uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
+
+ if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
+ guessed_wptr += queue_size;
+ guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
+ guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_LO),
+ lower_32_bits(guessed_wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI),
+ upper_32_bits(guessed_wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
+ lower_32_bits((uint64_t)wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ upper_32_bits((uint64_t)wptr));
+ pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__,
+ (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+ WREG32(SOC15_REG_OFFSET(GC, 0, regCP_PQ_WPTR_POLL_CNTL1),
+ (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+ }
+
+ /* Start the EOP fetcher */
+ WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_EOP_RPTR),
+ REG_SET_FIELD(m->cp_hqd_eop_rptr,
+ CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+
+ data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE), data);
+
+ release_queue(adev);
+
+ return 0;
+}
+
+static int hiq_mqd_load_v11(struct amdgpu_device *adev, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t doorbell_off, uint32_t inst)
+{
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
+ struct v11_compute_mqd *m;
+ uint32_t mec, pipe;
+ int r;
+
+ m = get_mqd(mqd);
+
+ acquire_queue(adev, pipe_id, queue_id);
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+ mec, pipe, queue_id);
+
+ spin_lock(&adev->gfx.kiq[0].ring_lock);
+ r = amdgpu_ring_alloc(kiq_ring, 7);
+ if (r) {
+ pr_err("Failed to alloc KIQ (%d).\n", r);
+ goto out_unlock;
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(queue_id) |
+ PACKET3_MAP_QUEUES_PIPE(pipe) |
+ PACKET3_MAP_QUEUES_ME((mec - 1)) |
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
+ amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
+ amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
+ amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
+ amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
+ amdgpu_ring_commit(kiq_ring);
+
+out_unlock:
+ spin_unlock(&adev->gfx.kiq[0].ring_lock);
+ release_queue(adev);
+
+ return r;
+}
+
+static int hqd_dump_v11(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
+{
+ uint32_t i = 0, reg;
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do { \
+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
+ break; \
+ (*dump)[i][0] = (addr) << 2; \
+ (*dump)[i++][1] = RREG32(addr); \
+ } while (0)
+
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ acquire_queue(adev, pipe_id, queue_id);
+
+ for (reg = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR);
+ reg <= SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI); reg++)
+ DUMP_REG(reg);
+
+ release_queue(adev);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static int hqd_sdma_load_v11(struct amdgpu_device *adev, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm)
+{
+ struct v11_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ unsigned long end_jiffies;
+ uint32_t data;
+ uint64_t data64;
+ uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL,
+ m->sdmax_rlcx_rb_cntl & (~SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK));
+
+ end_jiffies = msecs_to_jiffies(2000) + jiffies;
+ while (true) {
+ data = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_CONTEXT_STATUS);
+ if (data & SDMA0_QUEUE0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL_OFFSET,
+ m->sdmax_rlcx_doorbell_offset);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_QUEUE0_DOORBELL,
+ ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL, data);
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_MINOR_PTR_UPDATE, 1);
+ if (read_user_wptr(mm, wptr64, data64)) {
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR,
+ lower_32_bits(data64));
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR_HI,
+ upper_32_bits(data64));
+ } else {
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+ }
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_MINOR_PTR_UPDATE, 0);
+
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_BASE, m->sdmax_rlcx_rb_base);
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_BASE_HI,
+ m->sdmax_rlcx_rb_base_hi);
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_ADDR_LO,
+ m->sdmax_rlcx_rb_rptr_addr_lo);
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_ADDR_HI,
+ m->sdmax_rlcx_rb_rptr_addr_hi);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_QUEUE0_RB_CNTL,
+ RB_ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL, data);
+
+ return 0;
+}
+
+static int hqd_sdma_dump_v11(struct amdgpu_device *adev,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+ engine_id, queue_id);
+ uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (7+11+1+12+12)
+
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ for (reg = regSDMA0_QUEUE0_RB_CNTL;
+ reg <= regSDMA0_QUEUE0_RB_WPTR_HI; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = regSDMA0_QUEUE0_RB_RPTR_ADDR_HI;
+ reg <= regSDMA0_QUEUE0_DOORBELL; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = regSDMA0_QUEUE0_DOORBELL_LOG;
+ reg <= regSDMA0_QUEUE0_DOORBELL_LOG; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = regSDMA0_QUEUE0_DOORBELL_OFFSET;
+ reg <= regSDMA0_QUEUE0_RB_PREEMPT; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = regSDMA0_QUEUE0_MIDCMD_DATA0;
+ reg <= regSDMA0_QUEUE0_MIDCMD_CNTL; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static bool hqd_is_occupied_v11(struct amdgpu_device *adev, uint64_t queue_address,
+ uint32_t pipe_id, uint32_t queue_id, uint32_t inst)
+{
+ uint32_t act;
+ bool retval = false;
+ uint32_t low, high;
+
+ acquire_queue(adev, pipe_id, queue_id);
+ act = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE));
+ if (act) {
+ low = lower_32_bits(queue_address >> 8);
+ high = upper_32_bits(queue_address >> 8);
+
+ if (low == RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_BASE)) &&
+ high == RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_BASE_HI)))
+ retval = true;
+ }
+ release_queue(adev);
+ return retval;
+}
+
+static bool hqd_sdma_is_occupied_v11(struct amdgpu_device *adev, void *mqd)
+{
+ struct v11_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t sdma_rlc_rb_cntl;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL);
+
+ if (sdma_rlc_rb_cntl & SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK)
+ return true;
+
+ return false;
+}
+
+static int hqd_destroy_v11(struct amdgpu_device *adev, void *mqd,
+ enum kfd_preempt_type reset_type,
+ unsigned int utimeout, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
+{
+ enum hqd_dequeue_request_type type;
+ unsigned long end_jiffies;
+ uint32_t temp;
+ struct v11_compute_mqd *m = get_mqd(mqd);
+
+ acquire_queue(adev, pipe_id, queue_id);
+
+ if (m->cp_hqd_vmid == 0)
+ WREG32_FIELD15_PREREG(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+
+ switch (reset_type) {
+ case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+ type = DRAIN_PIPE;
+ break;
+ case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+ type = RESET_WAVES;
+ break;
+ default:
+ type = DRAIN_PIPE;
+ break;
+ }
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_DEQUEUE_REQUEST), type);
+
+ end_jiffies = (utimeout * HZ / 1000) + jiffies;
+ while (true) {
+ temp = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE));
+ if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("cp queue pipe %d queue %d preemption failed\n",
+ pipe_id, queue_id);
+ release_queue(adev);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ release_queue(adev);
+ return 0;
+}
+
+static int hqd_sdma_destroy_v11(struct amdgpu_device *adev, void *mqd,
+ unsigned int utimeout)
+{
+ struct v11_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t temp;
+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ temp = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL);
+ temp = temp & ~SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL, temp);
+
+ while (true) {
+ temp = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_CONTEXT_STATUS);
+ if (temp & SDMA0_QUEUE0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL, 0);
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL,
+ RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL) |
+ SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK);
+
+ m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR);
+ m->sdmax_rlcx_rb_rptr_hi =
+ RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_HI);
+
+ return 0;
+}
+
+static int wave_control_execute_v11(struct amdgpu_device *adev,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd, uint32_t inst)
+{
+ uint32_t data = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), gfx_index_val);
+ WREG32(SOC15_REG_OFFSET(GC, 0, regSQ_CMD), sq_cmd);
+
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SA_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SE_BROADCAST_WRITES, 1);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), data);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+static void set_vm_context_page_table_base_v11(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t page_table_base)
+{
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+ pr_err("trying to set page table base for wrong VMID %u\n",
+ vmid);
+ return;
+ }
+
+ /* SDMA is on gfxhub as well for gfx11 adapters */
+ adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
+}
+
+/*
+ * Returns TRAP_EN, EXCP_EN and EXCP_REPLACE.
+ *
+ * restore_dbg_registers is ignored here but is a general interface requirement
+ * for devices that support GFXOFF and where the RLC save/restore list
+ * does not support hw registers for debugging i.e. the driver has to manually
+ * initialize the debug mode registers after it has disabled GFX off during the
+ * debug session.
+ */
+static uint32_t kgd_gfx_v11_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+ return data;
+}
+
+/* Returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v11_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+ return data;
+}
+
+static int kgd_gfx_v11_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported)
+{
+ *trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID |
+ KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+ KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+ KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+ KFD_DBG_TRAP_MASK_FP_INEXACT |
+ KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+ KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION;
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 4))
+ *trap_mask_supported |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START |
+ KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+ if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR &&
+ trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE)
+ return -EPERM;
+
+ return 0;
+}
+
+static uint32_t trap_mask_map_sw_to_hw(uint32_t mask)
+{
+ uint32_t trap_on_start = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START) ? 1 : 0;
+ uint32_t trap_on_end = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END) ? 1 : 0;
+ uint32_t excp_en = mask & (KFD_DBG_TRAP_MASK_FP_INVALID |
+ KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+ KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+ KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+ KFD_DBG_TRAP_MASK_FP_INEXACT |
+ KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+ KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION);
+ uint32_t ret;
+
+ ret = REG_SET_FIELD(0, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, excp_en);
+ ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START, trap_on_start);
+ ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END, trap_on_end);
+
+ return ret;
+}
+
+static uint32_t trap_mask_map_hw_to_sw(uint32_t mask)
+{
+ uint32_t ret = REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, EXCP_EN);
+
+ if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START))
+ ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START;
+
+ if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END))
+ ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+ return ret;
+}
+
+/* Returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v11_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev)
+{
+ uint32_t data = 0;
+
+ *trap_mask_prev = trap_mask_map_hw_to_sw(kfd_dbg_trap_cntl_prev);
+
+ data = (trap_mask_bits & trap_mask_request) | (*trap_mask_prev & ~trap_mask_request);
+ data = trap_mask_map_sw_to_hw(data);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override);
+
+ return data;
+}
+
+static uint32_t kgd_gfx_v11_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, LAUNCH_MODE, wave_launch_mode);
+
+ return data;
+}
+
+#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H)
+static uint32_t kgd_gfx_v11_set_address_watch(struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid,
+ uint32_t inst)
+{
+ uint32_t watch_address_high;
+ uint32_t watch_address_low;
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+ watch_address_low = lower_32_bits(watch_address);
+ watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 7);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 1);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_high);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_L) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_low);
+
+ return watch_address_cntl;
+}
+
+static uint32_t kgd_gfx_v11_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id)
+{
+ return 0;
+}
+
+static uint64_t kgd_gfx_v11_hqd_get_pq_addr(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t inst)
+{
+ return 0;
+}
+
+static uint64_t kgd_gfx_v11_hqd_reset(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t inst, unsigned int utimeout)
+{
+ return 0;
+}
+
+static uint32_t kgd_gfx_v11_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+{
+ return 0;
+}
+
+const struct kfd2kgd_calls gfx_v11_kfd2kgd = {
+ .program_sh_mem_settings = program_sh_mem_settings_v11,
+ .set_pasid_vmid_mapping = set_pasid_vmid_mapping_v11,
+ .init_interrupts = init_interrupts_v11,
+ .hqd_load = hqd_load_v11,
+ .hiq_mqd_load = hiq_mqd_load_v11,
+ .hqd_sdma_load = hqd_sdma_load_v11,
+ .hqd_dump = hqd_dump_v11,
+ .hqd_sdma_dump = hqd_sdma_dump_v11,
+ .hqd_is_occupied = hqd_is_occupied_v11,
+ .hqd_sdma_is_occupied = hqd_sdma_is_occupied_v11,
+ .hqd_destroy = hqd_destroy_v11,
+ .hqd_sdma_destroy = hqd_sdma_destroy_v11,
+ .wave_control_execute = wave_control_execute_v11,
+ .get_atc_vmid_pasid_mapping_info = NULL,
+ .set_vm_context_page_table_base = set_vm_context_page_table_base_v11,
+ .enable_debug_trap = kgd_gfx_v11_enable_debug_trap,
+ .disable_debug_trap = kgd_gfx_v11_disable_debug_trap,
+ .validate_trap_override_request = kgd_gfx_v11_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_gfx_v11_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_gfx_v11_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v11_set_address_watch,
+ .clear_address_watch = kgd_gfx_v11_clear_address_watch,
+ .hqd_get_pq_addr = kgd_gfx_v11_hqd_get_pq_addr,
+ .hqd_reset = kgd_gfx_v11_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v11_hqd_sdma_get_doorbell
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c
new file mode 100644
index 000000000000..e0ceab400b2d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c
@@ -0,0 +1,384 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "gc/gc_12_0_0_offset.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "soc24.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
+ uint32_t queue, uint32_t vmid)
+{
+ mutex_lock(&adev->srbm_mutex);
+ soc24_grbm_select(adev, mec, pipe, queue, vmid);
+}
+
+static void unlock_srbm(struct amdgpu_device *adev)
+{
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(adev, mec, pipe, queue_id, 0);
+}
+
+static void release_queue(struct amdgpu_device *adev)
+{
+ unlock_srbm(adev);
+}
+
+static int init_interrupts_v12(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t inst)
+{
+ uint32_t mec;
+ uint32_t pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(adev, mec, pipe, 0, 0);
+
+ WREG32_SOC15(GC, 0, regCPC_INT_CNTL,
+ CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+ CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+ unlock_srbm(adev);
+
+ return 0;
+}
+
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
+ unsigned int engine_id,
+ unsigned int queue_id)
+{
+ uint32_t sdma_engine_reg_base = 0;
+ uint32_t sdma_rlc_reg_offset;
+
+ switch (engine_id) {
+ case 0:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+ regSDMA0_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
+ break;
+ case 1:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
+ regSDMA1_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
+ break;
+ default:
+ BUG();
+ }
+
+ sdma_rlc_reg_offset = sdma_engine_reg_base
+ + queue_id * (regSDMA0_QUEUE1_RB_CNTL - regSDMA0_QUEUE0_RB_CNTL);
+
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+ queue_id, sdma_rlc_reg_offset);
+
+ return sdma_rlc_reg_offset;
+}
+
+static int hqd_dump_v12(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
+{
+ uint32_t i = 0, reg;
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do { \
+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
+ break; \
+ (*dump)[i][0] = (addr) << 2; \
+ (*dump)[i++][1] = RREG32(addr); \
+ } while (0)
+
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ acquire_queue(adev, pipe_id, queue_id);
+
+ for (reg = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR);
+ reg <= SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI); reg++)
+ DUMP_REG(reg);
+
+ release_queue(adev);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static int hqd_sdma_dump_v12(struct amdgpu_device *adev,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+ engine_id, queue_id);
+ uint32_t i = 0, reg;
+
+ const uint32_t first_reg = regSDMA0_QUEUE0_RB_CNTL;
+ const uint32_t last_reg = regSDMA0_QUEUE0_CONTEXT_STATUS;
+#undef HQD_N_REGS
+#define HQD_N_REGS (last_reg - first_reg + 1)
+
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ for (reg = first_reg;
+ reg <= last_reg; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static int wave_control_execute_v12(struct amdgpu_device *adev,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd, uint32_t inst)
+{
+ uint32_t data = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), gfx_index_val);
+ WREG32(SOC15_REG_OFFSET(GC, 0, regSQ_CMD), sq_cmd);
+
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SA_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SE_BROADCAST_WRITES, 1);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), data);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v12_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+ return data;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v12_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+ return data;
+}
+
+static int kgd_gfx_v12_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported)
+{
+ *trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID |
+ KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+ KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+ KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+ KFD_DBG_TRAP_MASK_FP_INEXACT |
+ KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+ KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION |
+ KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START |
+ KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+
+ if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR &&
+ trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE)
+ return -EPERM;
+
+ return 0;
+}
+
+static uint32_t trap_mask_map_sw_to_hw(uint32_t mask)
+{
+ uint32_t trap_on_start = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START) ? 1 : 0;
+ uint32_t trap_on_end = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END) ? 1 : 0;
+ uint32_t excp_en = mask & (KFD_DBG_TRAP_MASK_FP_INVALID |
+ KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+ KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+ KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+ KFD_DBG_TRAP_MASK_FP_INEXACT |
+ KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+ KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION);
+ uint32_t ret;
+
+ ret = REG_SET_FIELD(0, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, excp_en);
+ ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START, trap_on_start);
+ ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END, trap_on_end);
+
+ return ret;
+}
+
+static uint32_t trap_mask_map_hw_to_sw(uint32_t mask)
+{
+ uint32_t ret = REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, EXCP_EN);
+
+ if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START))
+ ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START;
+
+ if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END))
+ ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+ return ret;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v12_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev)
+
+{
+ uint32_t data = 0;
+
+ *trap_mask_prev = trap_mask_map_hw_to_sw(kfd_dbg_trap_cntl_prev);
+
+ data = (trap_mask_bits & trap_mask_request) | (*trap_mask_prev & ~trap_mask_request);
+ data = trap_mask_map_sw_to_hw(data);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override);
+
+ return data;
+}
+
+/* returns STALL_VMID or LAUNCH_MODE. */
+static uint32_t kgd_gfx_v12_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+ bool is_stall_mode = wave_launch_mode == 4;
+
+ if (is_stall_mode)
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, STALL_VMID,
+ 1);
+ else
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, LAUNCH_MODE,
+ wave_launch_mode);
+
+ return data;
+}
+
+#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H)
+static uint32_t kgd_gfx_v12_set_address_watch(struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid,
+ uint32_t inst)
+{
+ uint32_t watch_address_high;
+ uint32_t watch_address_low;
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+ watch_address_low = lower_32_bits(watch_address);
+ watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 7);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 1);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_high);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_L) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_low);
+
+ return watch_address_cntl;
+}
+
+static uint32_t kgd_gfx_v12_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id)
+{
+ return 0;
+}
+
+static uint32_t kgd_gfx_v12_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+{
+ return 0;
+}
+
+const struct kfd2kgd_calls gfx_v12_kfd2kgd = {
+ .init_interrupts = init_interrupts_v12,
+ .hqd_dump = hqd_dump_v12,
+ .hqd_sdma_dump = hqd_sdma_dump_v12,
+ .wave_control_execute = wave_control_execute_v12,
+ .get_atc_vmid_pasid_mapping_info = NULL,
+ .enable_debug_trap = kgd_gfx_v12_enable_debug_trap,
+ .disable_debug_trap = kgd_gfx_v12_disable_debug_trap,
+ .validate_trap_override_request = kgd_gfx_v12_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_gfx_v12_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_gfx_v12_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v12_set_address_watch,
+ .clear_address_watch = kgd_gfx_v12_clear_address_watch,
+ .hqd_sdma_get_doorbell = kgd_gfx_v12_hqd_sdma_get_doorbell
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index b91d27e39bad..df77558e03ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -45,105 +45,54 @@ enum {
MAX_WATCH_ADDRESSES = 4
};
-enum {
- ADDRESS_WATCH_REG_ADDR_HI = 0,
- ADDRESS_WATCH_REG_ADDR_LO,
- ADDRESS_WATCH_REG_CNTL,
- ADDRESS_WATCH_REG_MAX
-};
-
-/* not defined in the CI/KV reg file */
-enum {
- ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL,
- ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF,
- ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000,
- /* extend the mask to 26 bits to match the low address field */
- ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6,
- ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF
-};
-
-static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = {
- mmTCP_WATCH0_ADDR_H, mmTCP_WATCH0_ADDR_L, mmTCP_WATCH0_CNTL,
- mmTCP_WATCH1_ADDR_H, mmTCP_WATCH1_ADDR_L, mmTCP_WATCH1_CNTL,
- mmTCP_WATCH2_ADDR_H, mmTCP_WATCH2_ADDR_L, mmTCP_WATCH2_CNTL,
- mmTCP_WATCH3_ADDR_H, mmTCP_WATCH3_ADDR_L, mmTCP_WATCH3_CNTL
-};
-
-union TCP_WATCH_CNTL_BITS {
- struct {
- uint32_t mask:24;
- uint32_t vmid:4;
- uint32_t atc:1;
- uint32_t mode:2;
- uint32_t valid:1;
- } bitfields, bits;
- uint32_t u32All;
- signed int i32All;
- float f32All;
-};
-
-static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
-{
- return (struct amdgpu_device *)kgd;
-}
-
-static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
+static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
uint32_t queue, uint32_t vmid)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
mutex_lock(&adev->srbm_mutex);
WREG32(mmSRBM_GFX_CNTL, value);
}
-static void unlock_srbm(struct kgd_dev *kgd)
+static void unlock_srbm(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
WREG32(mmSRBM_GFX_CNTL, 0);
mutex_unlock(&adev->srbm_mutex);
}
-static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
+static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t queue_id)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
- lock_srbm(kgd, mec, pipe, queue_id, 0);
+ lock_srbm(adev, mec, pipe, queue_id, 0);
}
-static void release_queue(struct kgd_dev *kgd)
+static void release_queue(struct amdgpu_device *adev)
{
- unlock_srbm(kgd);
+ unlock_srbm(adev);
}
-static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
- uint32_t sh_mem_bases)
+ uint32_t sh_mem_bases, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
- lock_srbm(kgd, 0, 0, 0, vmid);
+ lock_srbm(adev, 0, 0, 0, vmid);
WREG32(mmSH_MEM_CONFIG, sh_mem_config);
WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base);
WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
WREG32(mmSH_MEM_BASES, sh_mem_bases);
- unlock_srbm(kgd);
+ unlock_srbm(adev);
}
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
- unsigned int vmid)
+static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
+ unsigned int vmid, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
/*
* We have to assume that there is no outstanding mapping.
* The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
@@ -165,21 +114,21 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
return 0;
}
-static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
+static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t mec;
uint32_t pipe;
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
- lock_srbm(kgd, mec, pipe, 0, 0);
+ lock_srbm(adev, mec, pipe, 0, 0);
WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
- unlock_srbm(kgd);
+ unlock_srbm(adev);
return 0;
}
@@ -207,12 +156,11 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
return (struct cik_sdma_rlc_registers *)mqd;
}
-static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr,
- uint32_t wptr_shift, uint32_t wptr_mask,
- struct mm_struct *mm)
+static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t __user *wptr, uint32_t wptr_shift,
+ uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct cik_mqd *m;
uint32_t *mqd_hqd;
uint32_t reg, wptr_val, data;
@@ -220,7 +168,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
m = get_mqd(mqd);
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
/* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */
mqd_hqd = &m->cp_mqd_base_addr_lo;
@@ -237,27 +185,26 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
/* read_user_ptr may take the mm->mmap_lock.
* release srbm_mutex to avoid circular dependency between
- * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex.
+ * srbm_mutex->mmap_lock->reservation_ww_class_mutex->srbm_mutex.
*/
- release_queue(kgd);
+ release_queue(adev);
valid_wptr = read_user_wptr(mm, wptr, wptr_val);
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
if (valid_wptr)
WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
WREG32(mmCP_HQD_ACTIVE, data);
- release_queue(kgd);
+ release_queue(adev);
return 0;
}
-static int kgd_hqd_dump(struct kgd_dev *kgd,
+static int kgd_hqd_dump(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs)
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t i = 0, reg;
#define HQD_N_REGS (35+4)
#define DUMP_REG(addr) do { \
@@ -267,11 +214,11 @@ static int kgd_hqd_dump(struct kgd_dev *kgd,
(*dump)[i++][1] = RREG32(addr); \
} while (0)
- *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
@@ -281,7 +228,7 @@ static int kgd_hqd_dump(struct kgd_dev *kgd,
for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++)
DUMP_REG(reg);
- release_queue(kgd);
+ release_queue(adev);
WARN_ON_ONCE(i != HQD_N_REGS);
*n_regs = i;
@@ -289,10 +236,9 @@ static int kgd_hqd_dump(struct kgd_dev *kgd,
return 0;
}
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
uint32_t __user *wptr, struct mm_struct *mm)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct cik_sdma_rlc_registers *m;
unsigned long end_jiffies;
uint32_t sdma_rlc_reg_offset;
@@ -345,18 +291,17 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
return 0;
}
-static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
uint32_t engine_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
uint32_t i = 0, reg;
#undef HQD_N_REGS
#define HQD_N_REGS (19+4)
- *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -372,15 +317,15 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
return 0;
}
-static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
- uint32_t pipe_id, uint32_t queue_id)
+static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t act;
bool retval = false;
uint32_t low, high;
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
act = RREG32(mmCP_HQD_ACTIVE);
if (act) {
low = lower_32_bits(queue_address >> 8);
@@ -390,13 +335,12 @@ static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
high == RREG32(mmCP_HQD_PQ_BASE_HI))
retval = true;
}
- release_queue(kgd);
+ release_queue(adev);
return retval;
}
-static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
+static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct cik_sdma_rlc_registers *m;
uint32_t sdma_rlc_reg_offset;
uint32_t sdma_rlc_rb_cntl;
@@ -412,12 +356,11 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
return false;
}
-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id)
+ uint32_t queue_id, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t temp;
enum hqd_dequeue_request_type type;
unsigned long flags, end_jiffies;
@@ -426,7 +369,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
if (amdgpu_in_reset(adev))
return -EIO;
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
switch (reset_type) {
@@ -504,20 +447,19 @@ loop:
break;
if (time_after(jiffies, end_jiffies)) {
pr_err("cp queue preemption time out\n");
- release_queue(kgd);
+ release_queue(adev);
return -ETIME;
}
usleep_range(500, 1000);
}
- release_queue(kgd);
+ release_queue(adev);
return 0;
}
-static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
unsigned int utimeout)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct cik_sdma_rlc_registers *m;
uint32_t sdma_rlc_reg_offset;
uint32_t temp;
@@ -551,62 +493,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
return 0;
}
-static int kgd_address_watch_disable(struct kgd_dev *kgd)
-{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- union TCP_WATCH_CNTL_BITS cntl;
- unsigned int i;
-
- cntl.u32All = 0;
-
- cntl.bitfields.valid = 0;
- cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
- cntl.bitfields.atc = 1;
-
- /* Turning off this address until we set all the registers */
- for (i = 0; i < MAX_WATCH_ADDRESSES; i++)
- WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX +
- ADDRESS_WATCH_REG_CNTL], cntl.u32All);
-
- return 0;
-}
-
-static int kgd_address_watch_execute(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- uint32_t cntl_val,
- uint32_t addr_hi,
- uint32_t addr_lo)
-{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- union TCP_WATCH_CNTL_BITS cntl;
-
- cntl.u32All = cntl_val;
-
- /* Turning off this watch point until we set all the registers */
- cntl.bitfields.valid = 0;
- WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
- ADDRESS_WATCH_REG_CNTL], cntl.u32All);
-
- WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
- ADDRESS_WATCH_REG_ADDR_HI], addr_hi);
-
- WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
- ADDRESS_WATCH_REG_ADDR_LO], addr_lo);
-
- /* Enable the watch point */
- cntl.bitfields.valid = 1;
-
- WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
- ADDRESS_WATCH_REG_CNTL], cntl.u32All);
-
- return 0;
-}
-
-static int kgd_wave_control_execute(struct kgd_dev *kgd,
+static int kgd_wave_control_execute(struct amdgpu_device *adev,
uint32_t gfx_index_val,
- uint32_t sq_cmd)
+ uint32_t sq_cmd, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t data;
mutex_lock(&adev->grbm_idx_mutex);
@@ -627,18 +517,10 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,
return 0;
}
-static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- unsigned int reg_offset)
-{
- return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset];
-}
-
-static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
uint8_t vmid, uint16_t *p_pasid)
{
uint32_t value;
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
@@ -646,21 +528,17 @@ static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}
-static void set_scratch_backing_va(struct kgd_dev *kgd,
+static void set_scratch_backing_va(struct amdgpu_device *adev,
uint64_t va, uint32_t vmid)
{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
- lock_srbm(kgd, 0, 0, 0, vmid);
+ lock_srbm(adev, 0, 0, 0, vmid);
WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
- unlock_srbm(kgd);
+ unlock_srbm(adev);
}
-static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
- uint64_t page_table_base)
+static void set_vm_context_page_table_base(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t page_table_base)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("trying to set page table base for wrong VMID\n");
return;
@@ -676,15 +554,20 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
* @vmid: vmid pointer
* read vmid from register (CIK).
*/
-static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd)
+static uint32_t read_vmid_from_vmfault_reg(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
uint32_t status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
}
+static uint32_t kgd_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+
+{
+ return 0;
+}
+
const struct kfd2kgd_calls gfx_v7_kfd2kgd = {
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
@@ -697,12 +580,10 @@ const struct kfd2kgd_calls gfx_v7_kfd2kgd = {
.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
.hqd_destroy = kgd_hqd_destroy,
.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
- .address_watch_disable = kgd_address_watch_disable,
- .address_watch_execute = kgd_address_watch_execute,
.wave_control_execute = kgd_wave_control_execute,
- .address_watch_get_offset = kgd_address_watch_get_offset,
.get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info,
.set_scratch_backing_va = set_scratch_backing_va,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
.read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
+ .hqd_sdma_get_doorbell = kgd_hqd_sdma_get_doorbell,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 5ce0ce704a21..e68c0fa8d751 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -39,68 +39,54 @@ enum hqd_dequeue_request_type {
RESET_WAVES
};
-static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
-{
- return (struct amdgpu_device *)kgd;
-}
-
-static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
+static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
uint32_t queue, uint32_t vmid)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
mutex_lock(&adev->srbm_mutex);
WREG32(mmSRBM_GFX_CNTL, value);
}
-static void unlock_srbm(struct kgd_dev *kgd)
+static void unlock_srbm(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
WREG32(mmSRBM_GFX_CNTL, 0);
mutex_unlock(&adev->srbm_mutex);
}
-static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
+static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t queue_id)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
- lock_srbm(kgd, mec, pipe, queue_id, 0);
+ lock_srbm(adev, mec, pipe, queue_id, 0);
}
-static void release_queue(struct kgd_dev *kgd)
+static void release_queue(struct amdgpu_device *adev)
{
- unlock_srbm(kgd);
+ unlock_srbm(adev);
}
-static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
- uint32_t sh_mem_bases)
+ uint32_t sh_mem_bases, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
- lock_srbm(kgd, 0, 0, 0, vmid);
+ lock_srbm(adev, 0, 0, 0, vmid);
WREG32(mmSH_MEM_CONFIG, sh_mem_config);
WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base);
WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
WREG32(mmSH_MEM_BASES, sh_mem_bases);
- unlock_srbm(kgd);
+ unlock_srbm(adev);
}
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
- unsigned int vmid)
+static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
+ unsigned int vmid, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
/*
* We have to assume that there is no outstanding mapping.
* The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
@@ -123,21 +109,21 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
return 0;
}
-static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
+static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t mec;
uint32_t pipe;
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
- lock_srbm(kgd, mec, pipe, 0, 0);
+ lock_srbm(adev, mec, pipe, 0, 0);
WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
- unlock_srbm(kgd);
+ unlock_srbm(adev);
return 0;
}
@@ -165,12 +151,11 @@ static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
return (struct vi_sdma_mqd *)mqd;
}
-static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr,
- uint32_t wptr_shift, uint32_t wptr_mask,
- struct mm_struct *mm)
+static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t __user *wptr, uint32_t wptr_shift,
+ uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct vi_mqd *m;
uint32_t *mqd_hqd;
uint32_t reg, wptr_val, data;
@@ -178,7 +163,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
m = get_mqd(mqd);
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
/* HIQ is set during driver init period with vmid set to 0*/
if (m->cp_hqd_vmid == 0) {
@@ -206,7 +191,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
* on ASICs that do not support context-save.
* EOP writes/reads can start anywhere in the ring.
*/
- if (get_amdgpu_device(kgd)->asic_type != CHIP_TONGA) {
+ if (adev->asic_type != CHIP_TONGA) {
WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
@@ -224,27 +209,26 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
/* read_user_ptr may take the mm->mmap_lock.
* release srbm_mutex to avoid circular dependency between
- * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex.
+ * srbm_mutex->mmap_lock->reservation_ww_class_mutex->srbm_mutex.
*/
- release_queue(kgd);
+ release_queue(adev);
valid_wptr = read_user_wptr(mm, wptr, wptr_val);
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
if (valid_wptr)
WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
WREG32(mmCP_HQD_ACTIVE, data);
- release_queue(kgd);
+ release_queue(adev);
return 0;
}
-static int kgd_hqd_dump(struct kgd_dev *kgd,
+static int kgd_hqd_dump(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs)
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t i = 0, reg;
#define HQD_N_REGS (54+4)
#define DUMP_REG(addr) do { \
@@ -254,11 +238,11 @@ static int kgd_hqd_dump(struct kgd_dev *kgd,
(*dump)[i++][1] = RREG32(addr); \
} while (0)
- *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
@@ -268,7 +252,7 @@ static int kgd_hqd_dump(struct kgd_dev *kgd,
for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_DONES; reg++)
DUMP_REG(reg);
- release_queue(kgd);
+ release_queue(adev);
WARN_ON_ONCE(i != HQD_N_REGS);
*n_regs = i;
@@ -276,10 +260,9 @@ static int kgd_hqd_dump(struct kgd_dev *kgd,
return 0;
}
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
uint32_t __user *wptr, struct mm_struct *mm)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct vi_sdma_mqd *m;
unsigned long end_jiffies;
uint32_t sdma_rlc_reg_offset;
@@ -331,18 +314,17 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
return 0;
}
-static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
uint32_t engine_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
uint32_t i = 0, reg;
#undef HQD_N_REGS
#define HQD_N_REGS (19+4+2+3+7)
- *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -367,15 +349,15 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
return 0;
}
-static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
- uint32_t pipe_id, uint32_t queue_id)
+static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t act;
bool retval = false;
uint32_t low, high;
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
act = RREG32(mmCP_HQD_ACTIVE);
if (act) {
low = lower_32_bits(queue_address >> 8);
@@ -385,13 +367,12 @@ static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
high == RREG32(mmCP_HQD_PQ_BASE_HI))
retval = true;
}
- release_queue(kgd);
+ release_queue(adev);
return retval;
}
-static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
+static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct vi_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t sdma_rlc_rb_cntl;
@@ -407,12 +388,11 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
return false;
}
-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id)
+ uint32_t queue_id, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t temp;
enum hqd_dequeue_request_type type;
unsigned long flags, end_jiffies;
@@ -422,7 +402,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
if (amdgpu_in_reset(adev))
return -EIO;
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
if (m->cp_hqd_vmid == 0)
WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0);
@@ -502,20 +482,19 @@ loop:
break;
if (time_after(jiffies, end_jiffies)) {
pr_err("cp queue preemption time out.\n");
- release_queue(kgd);
+ release_queue(adev);
return -ETIME;
}
usleep_range(500, 1000);
}
- release_queue(kgd);
+ release_queue(adev);
return 0;
}
-static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
unsigned int utimeout)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct vi_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t temp;
@@ -549,11 +528,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
return 0;
}
-static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
uint8_t vmid, uint16_t *p_pasid)
{
uint32_t value;
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
@@ -561,25 +539,10 @@ static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}
-static int kgd_address_watch_disable(struct kgd_dev *kgd)
-{
- return 0;
-}
-
-static int kgd_address_watch_execute(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- uint32_t cntl_val,
- uint32_t addr_hi,
- uint32_t addr_lo)
-{
- return 0;
-}
-
-static int kgd_wave_control_execute(struct kgd_dev *kgd,
+static int kgd_wave_control_execute(struct amdgpu_device *adev,
uint32_t gfx_index_val,
- uint32_t sq_cmd)
+ uint32_t sq_cmd, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t data = 0;
mutex_lock(&adev->grbm_idx_mutex);
@@ -600,28 +563,17 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,
return 0;
}
-static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- unsigned int reg_offset)
-{
- return 0;
-}
-
-static void set_scratch_backing_va(struct kgd_dev *kgd,
+static void set_scratch_backing_va(struct amdgpu_device *adev,
uint64_t va, uint32_t vmid)
{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
- lock_srbm(kgd, 0, 0, 0, vmid);
+ lock_srbm(adev, 0, 0, 0, vmid);
WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
- unlock_srbm(kgd);
+ unlock_srbm(adev);
}
-static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
- uint64_t page_table_base)
+static void set_vm_context_page_table_base(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t page_table_base)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("trying to set page table base for wrong VMID\n");
return;
@@ -630,6 +582,13 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
lower_32_bits(page_table_base));
}
+static uint32_t kgd_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+
+{
+ return 0;
+}
+
const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
@@ -642,12 +601,10 @@ const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
.hqd_destroy = kgd_hqd_destroy,
.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
- .address_watch_disable = kgd_address_watch_disable,
- .address_watch_execute = kgd_address_watch_execute,
.wave_control_execute = kgd_wave_control_execute,
- .address_watch_get_offset = kgd_address_watch_get_offset,
.get_atc_vmid_pasid_mapping_info =
get_atc_vmid_pasid_mapping_info,
.set_scratch_backing_va = set_scratch_backing_va,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
+ .hqd_sdma_get_doorbell = kgd_hqd_sdma_get_doorbell,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index bcc1cbeb8799..088d09cc7a72 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -38,6 +38,7 @@
#include "soc15d.h"
#include "gfx_v9_0.h"
#include "amdgpu_amdkfd_gfx_v9.h"
+#include <uapi/linux/kfd_ioctl.h>
enum hqd_dequeue_request_type {
NO_ACTION = 0,
@@ -46,40 +47,29 @@ enum hqd_dequeue_request_type {
SAVE_WAVES
};
-static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
+static void kgd_gfx_v9_lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
+ uint32_t queue, uint32_t vmid, uint32_t inst)
{
- return (struct amdgpu_device *)kgd;
-}
-
-static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
- uint32_t queue, uint32_t vmid)
-{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
mutex_lock(&adev->srbm_mutex);
- soc15_grbm_select(adev, mec, pipe, queue, vmid);
+ soc15_grbm_select(adev, mec, pipe, queue, vmid, GET_INST(GC, inst));
}
-static void unlock_srbm(struct kgd_dev *kgd)
+static void kgd_gfx_v9_unlock_srbm(struct amdgpu_device *adev, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
- soc15_grbm_select(adev, 0, 0, 0, 0);
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, inst));
mutex_unlock(&adev->srbm_mutex);
}
-static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
- uint32_t queue_id)
+void kgd_gfx_v9_acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
- lock_srbm(kgd, mec, pipe, queue_id, 0);
+ kgd_gfx_v9_lock_srbm(adev, mec, pipe, queue_id, 0, inst);
}
-static uint64_t get_queue_mask(struct amdgpu_device *adev,
+uint64_t kgd_gfx_v9_get_queue_mask(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id)
{
unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
@@ -88,33 +78,29 @@ static uint64_t get_queue_mask(struct amdgpu_device *adev,
return 1ull << bit;
}
-static void release_queue(struct kgd_dev *kgd)
+void kgd_gfx_v9_release_queue(struct amdgpu_device *adev, uint32_t inst)
{
- unlock_srbm(kgd);
+ kgd_gfx_v9_unlock_srbm(adev, inst);
}
-void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
- uint32_t sh_mem_bases)
+ uint32_t sh_mem_bases, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
- lock_srbm(kgd, 0, 0, 0, vmid);
+ kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst);
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmSH_MEM_CONFIG, sh_mem_config);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmSH_MEM_BASES, sh_mem_bases);
/* APE1 no longer exists on GFX9 */
- unlock_srbm(kgd);
+ kgd_gfx_v9_unlock_srbm(adev, inst);
}
-int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
- unsigned int vmid)
+int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
+ unsigned int vmid, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
/*
* We have to assume that there is no outstanding mapping.
* The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
@@ -171,22 +157,22 @@ int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
* but still works
*/
-int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
+int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t mec;
uint32_t pipe;
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
- lock_srbm(kgd, mec, pipe, 0, 0);
+ kgd_gfx_v9_lock_srbm(adev, mec, pipe, 0, 0, inst);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL),
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmCPC_INT_CNTL,
CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
- unlock_srbm(kgd);
+ kgd_gfx_v9_unlock_srbm(adev, inst);
return 0;
}
@@ -233,33 +219,33 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
return (struct v9_sdma_mqd *)mqd;
}
-int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr,
- uint32_t wptr_shift, uint32_t wptr_mask,
- struct mm_struct *mm)
+int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t __user *wptr, uint32_t wptr_shift,
+ uint32_t wptr_mask, struct mm_struct *mm,
+ uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v9_mqd *m;
uint32_t *mqd_hqd;
uint32_t reg, hqd_base, data;
m = get_mqd(mqd);
- acquire_queue(kgd, pipe_id, queue_id);
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
mqd_hqd = &m->cp_mqd_base_addr_lo;
- hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+ hqd_base = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_MQD_BASE_ADDR);
for (reg = hqd_base;
- reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
- WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+ reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++)
+ WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst);
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL, data);
if (wptr) {
/* Don't read wptr with get_user because the user
@@ -288,44 +274,42 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
- lower_32_bits(guessed_wptr));
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
- upper_32_bits(guessed_wptr));
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
- lower_32_bits((uintptr_t)wptr));
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
- upper_32_bits((uintptr_t)wptr));
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
- (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_LO,
+ lower_32_bits(guessed_wptr));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI,
+ upper_32_bits(guessed_wptr));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR,
+ lower_32_bits((uintptr_t)wptr));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ upper_32_bits((uintptr_t)wptr));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_PQ_WPTR_POLL_CNTL1,
+ (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id));
}
/* Start the EOP fetcher */
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
- REG_SET_FIELD(m->cp_hqd_eop_rptr,
- CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_EOP_RPTR,
+ REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE, data);
- release_queue(kgd);
+ kgd_gfx_v9_release_queue(adev, inst);
return 0;
}
-int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
+int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t doorbell_off)
+ uint32_t doorbell_off, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[inst].ring;
struct v9_mqd *m;
uint32_t mec, pipe;
int r;
m = get_mqd(mqd);
- acquire_queue(kgd, pipe_id, queue_id);
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
@@ -333,7 +317,7 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
mec, pipe, queue_id);
- spin_lock(&adev->gfx.kiq.ring_lock);
+ spin_lock(&adev->gfx.kiq[inst].ring_lock);
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r);
@@ -360,17 +344,16 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
amdgpu_ring_commit(kiq_ring);
out_unlock:
- spin_unlock(&adev->gfx.kiq.ring_lock);
- release_queue(kgd);
+ spin_unlock(&adev->gfx.kiq[inst].ring_lock);
+ kgd_gfx_v9_release_queue(adev, inst);
return r;
}
-int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd,
+int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs)
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t i = 0, reg;
#define HQD_N_REGS 56
#define DUMP_REG(addr) do { \
@@ -380,17 +363,17 @@ int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd,
(*dump)[i++][1] = RREG32(addr); \
} while (0)
- *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
- acquire_queue(kgd, pipe_id, queue_id);
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
- for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
- reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+ for (reg = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_MQD_BASE_ADDR);
+ reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++)
DUMP_REG(reg);
- release_queue(kgd);
+ kgd_gfx_v9_release_queue(adev, inst);
WARN_ON_ONCE(i != HQD_N_REGS);
*n_regs = i;
@@ -398,10 +381,9 @@ int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd,
return 0;
}
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
uint32_t __user *wptr, struct mm_struct *mm)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v9_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
unsigned long end_jiffies;
@@ -468,18 +450,17 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
return 0;
}
-static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
uint32_t engine_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
engine_id, queue_id);
uint32_t i = 0, reg;
#undef HQD_N_REGS
#define HQD_N_REGS (19+6+7+10)
- *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -500,31 +481,30 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
return 0;
}
-bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
- uint32_t pipe_id, uint32_t queue_id)
+bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t act;
bool retval = false;
uint32_t low, high;
- acquire_queue(kgd, pipe_id, queue_id);
- act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
+ act = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE);
if (act) {
low = lower_32_bits(queue_address >> 8);
high = upper_32_bits(queue_address >> 8);
- if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) &&
- high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI)))
+ if (low == RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE) &&
+ high == RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE_HI))
retval = true;
}
- release_queue(kgd);
+ kgd_gfx_v9_release_queue(adev, inst);
return retval;
}
-static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
+static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v9_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t sdma_rlc_rb_cntl;
@@ -541,12 +521,11 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
return false;
}
-int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id)
+ uint32_t queue_id, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
enum hqd_dequeue_request_type type;
unsigned long end_jiffies;
uint32_t temp;
@@ -555,10 +534,10 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
if (amdgpu_in_reset(adev))
return -EIO;
- acquire_queue(kgd, pipe_id, queue_id);
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
if (m->cp_hqd_vmid == 0)
- WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+ WREG32_FIELD15_RLC(GC, GET_INST(GC, inst), RLC_CP_SCHEDULERS, scheduler1, 0);
switch (reset_type) {
case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
@@ -575,29 +554,28 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
break;
}
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_DEQUEUE_REQUEST, type);
end_jiffies = (utimeout * HZ / 1000) + jiffies;
while (true) {
- temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+ temp = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE);
if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
break;
if (time_after(jiffies, end_jiffies)) {
pr_err("cp queue preemption time out.\n");
- release_queue(kgd);
+ kgd_gfx_v9_release_queue(adev, inst);
return -ETIME;
}
usleep_range(500, 1000);
}
- release_queue(kgd);
+ kgd_gfx_v9_release_queue(adev, inst);
return 0;
}
-static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
unsigned int utimeout)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v9_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t temp;
@@ -634,11 +612,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
return 0;
}
-bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
uint8_t vmid, uint16_t *p_pasid)
{
uint32_t value;
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ vmid);
@@ -647,57 +624,295 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}
-int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd)
+int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd, uint32_t inst)
+{
+ uint32_t data = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ WREG32_SOC15_RLC_SHADOW(GC, GET_INST(GC, inst), mmGRBM_GFX_INDEX, gfx_index_val);
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_CMD, sq_cmd);
+
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SH_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SE_BROADCAST_WRITES, 1);
+
+ WREG32_SOC15_RLC_SHADOW(GC, GET_INST(GC, inst), mmGRBM_GFX_INDEX, data);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+/*
+ * GFX9 helper for wave launch stall requirements on debug trap setting.
+ *
+ * vmid:
+ * Target VMID to stall/unstall.
+ *
+ * stall:
+ * 0-unstall wave launch (enable), 1-stall wave launch (disable).
+ * After wavefront launch has been stalled, allocated waves must drain from
+ * SPI in order for debug trap settings to take effect on those waves.
+ * This is roughly a ~96 clock cycle wait on SPI where a read on
+ * SPI_GDBG_WAVE_CNTL translates to ~32 clock cycles.
+ * KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY indicates the number of reads required.
+ *
+ * NOTE: We can afford to clear the entire STALL_VMID field on unstall
+ * because GFX9.4.1 cannot support multi-process debugging due to trap
+ * configuration and masking being limited to global scope. Always assume
+ * single process conditions.
+ */
+#define KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY 3
+void kgd_gfx_v9_set_wave_launch_stall(struct amdgpu_device *adev,
+ uint32_t vmid,
+ bool stall)
+{
+ int i;
+ uint32_t data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1))
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_VMID,
+ stall ? 1 << vmid : 0);
+ else
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA,
+ stall ? 1 : 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
+
+ if (!stall)
+ return;
+
+ for (i = 0; i < KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY; i++)
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+}
+
+/*
+ * restore_dbg_registers is ignored here but is a general interface requirement
+ * for devices that support GFXOFF and where the RLC save/restore list
+ * does not support hw registers for debugging i.e. the driver has to manually
+ * initialize the debug mode registers after it has disabled GFX off during the
+ * debug session.
+ */
+uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid)
{
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
return 0;
}
-int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- uint32_t cntl_val,
- uint32_t addr_hi,
- uint32_t addr_lo)
+/*
+ * keep_trap_enabled is ignored here but is a general interface requirement
+ * for devices that support multi-process debugging where the performance
+ * overhead from trap temporary setup needs to be bypassed when the debug
+ * session has ended.
+ */
+uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
{
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
return 0;
}
-int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd,
- uint32_t gfx_index_val,
- uint32_t sq_cmd)
+int kgd_gfx_v9_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported)
+{
+ *trap_mask_supported &= KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH;
+
+ /* The SPI_GDBG_TRAP_MASK register is global and affects all
+ * processes. Only allow OR-ing the address-watch bit, since
+ * this only affects processes under the debugger. Other bits
+ * should stay 0 to avoid the debugger interfering with other
+ * processes.
+ */
+ if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR)
+ return -EINVAL;
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_cntl_prev)
+{
+ uint32_t data, wave_cntl_prev;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK));
+ *trap_mask_prev = REG_GET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN);
+
+ trap_mask_bits = (trap_mask_bits & trap_mask_request) |
+ (*trap_mask_prev & ~trap_mask_request);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN, trap_mask_bits);
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, REPLACE, trap_override);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
+
+ /* We need to preserve wave launch mode stall settings. */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), wave_cntl_prev);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t data = 0;
+ bool is_mode_set = !!wave_launch_mode;
mutex_lock(&adev->grbm_idx_mutex);
- WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
- data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
- INSTANCE_BROADCAST_WRITES, 1);
- data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
- SH_BROADCAST_WRITES, 1);
- data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
- SE_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
+ VMID_MASK, is_mode_set ? 1 << vmid : 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
+ MODE, is_mode_set ? wave_launch_mode : 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
- WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
}
-uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- unsigned int reg_offset)
+#define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H)
+uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid,
+ uint32_t inst)
{
+ uint32_t watch_address_high;
+ uint32_t watch_address_low;
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+
+ watch_address_low = lower_32_bits(watch_address);
+ watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VMID,
+ debug_vmid);
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 6);
+
+ /* Turning off this watch point until we set all the registers */
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 0);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_cntl);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_high);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_low);
+
+ /* Enable the watch point */
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 1);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_cntl);
+
return 0;
}
-void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd,
- uint32_t vmid, uint64_t page_table_base)
+uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_cntl);
+
+ return 0;
+}
+
+/* kgd_gfx_v9_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
+ * The values read are:
+ * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
+ * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
+ * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads.
+ * gws_wait_time -- Wait Count for Global Wave Syncs.
+ * que_sleep_wait_time -- Wait Count for Dequeue Retry.
+ * sch_wave_wait_time -- Wait Count for Scheduling Wave Message.
+ * sem_rearm_wait_time -- Wait Count for Semaphore re-arm.
+ * deq_retry_wait_time -- Wait Count for Global Wave Syncs.
+ */
+void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
+ uint32_t *wait_times,
+ uint32_t inst)
+{
+ *wait_times = RREG32_SOC15_RLC(GC, GET_INST(GC, inst),
+ mmCP_IQ_WAIT_TIME2);
+}
+
+void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t page_table_base)
+{
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("trying to set page table base for wrong VMID %u\n",
vmid);
@@ -729,33 +944,34 @@ static void unlock_spi_csq_mutexes(struct amdgpu_device *adev)
*
* @adev: Handle of device whose registers are to be read
* @queue_idx: Index of queue in the queue-map bit-field
- * @wave_cnt: Output parameter updated with number of waves in flight
- * @vmid: Output parameter updated with VMID of queue whose wave count
- * is being collected
+ * @queue_cnt: Stores the wave count and doorbell offset for an active queue
+ * @inst: xcc's instance number on a multi-XCC setup
*/
static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
- int *wave_cnt, int *vmid)
+ struct kfd_cu_occupancy *queue_cnt, uint32_t inst)
{
int pipe_idx;
int queue_slot;
unsigned int reg_val;
-
+ unsigned int wave_cnt;
/*
* Program GRBM with appropriate MEID, PIPEID, QUEUEID and VMID
* parameters to read out waves in flight. Get VMID if there are
* non-zero waves in flight.
*/
- *vmid = 0xFF;
- *wave_cnt = 0;
pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
- soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0);
- reg_val = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
- queue_slot);
- *wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
- if (*wave_cnt != 0)
- *vmid = (RREG32_SOC15(GC, 0, mmCP_HQD_VMID) &
- CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT;
+ soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, GET_INST(GC, inst));
+ reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+ mmSPI_CSQ_WF_ACTIVE_COUNT_0) + queue_slot);
+ wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
+ if (wave_cnt != 0) {
+ queue_cnt->wave_cnt += wave_cnt;
+ queue_cnt->doorbell_off =
+ (RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL) &
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK) >>
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ }
}
/**
@@ -764,16 +980,16 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
* process whose pasid is provided as a parameter. The process could have ZERO
* or more queues running and submitting waves to compute units.
*
- * @kgd: Handle of device from which to get number of waves in flight
- * @pasid: Identifies the process for which this query call is invoked
- * @pasid_wave_cnt: Output parameter updated with number of waves in flight that
- * belong to process with given pasid
+ * @adev: Handle of device from which to get number of waves in flight
+ * @cu_occupancy: Array that gets filled with wave_cnt and doorbell offset
+ * for comparison later.
* @max_waves_per_cu: Output parameter updated with maximum number of waves
- * possible per Compute Unit
+ * possible per Compute Unit
+ * @inst: xcc's instance number on a multi-XCC setup
*
* Note: It's possible that the device has too many queues (oversubscription)
* in which case a VMID could be remapped to a different PASID. This could lead
- * to an iaccurate wave count. Following is a high-level sequence:
+ * to an inaccurate wave count. Following is a high-level sequence:
* Time T1: vmid = getVmid(); vmid is associated with Pasid P1
* Time T2: passId = getPasId(vmid); vmid is associated with Pasid P2
* In the sequence above wave count obtained from time T1 will be incorrectly
@@ -794,118 +1010,224 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
* number of waves that are in flight for the queue at specified index. The
* index ranges from 0 to 7.
*
- * If non-zero waves are in flight, read CP_HQD_VMID register to obtain VMID
- * of the wave(s).
+ * If non-zero waves are in flight, store the corresponding doorbell offset
+ * of the queue, along with the wave count.
*
- * Determine if VMID from above step maps to pasid provided as parameter. If
- * it matches agrregate the wave count. That the VMID will not match pasid is
- * a normal condition i.e. a device is expected to support multiple queues
- * from multiple proceses.
+ * Determine if the queue belongs to the process by comparing the doorbell
+ * offset against the process's queues. If it matches, aggregate the wave
+ * count for the process.
*
* Reading registers referenced above involves programming GRBM appropriately
*/
-void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid,
- int *pasid_wave_cnt, int *max_waves_per_cu)
+void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
+ struct kfd_cu_occupancy *cu_occupancy,
+ int *max_waves_per_cu, uint32_t inst)
{
int qidx;
- int vmid;
int se_idx;
- int sh_idx;
int se_cnt;
- int sh_cnt;
- int wave_cnt;
int queue_map;
- int pasid_tmp;
int max_queue_cnt;
- int vmid_wave_cnt = 0;
- struct amdgpu_device *adev;
- DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES);
+ DECLARE_BITMAP(cp_queue_bitmap, AMDGPU_MAX_QUEUES);
- adev = get_amdgpu_device(kgd);
lock_spi_csq_mutexes(adev);
- soc15_grbm_select(adev, 1, 0, 0, 0);
+ soc15_grbm_select(adev, 1, 0, 0, 0, GET_INST(GC, inst));
/*
* Iterate through the shader engines and arrays of the device
* to get number of waves in flight
*/
- bitmap_complement(cp_queue_bitmap, adev->gfx.mec.queue_bitmap,
- KGD_MAX_QUEUES);
+ bitmap_complement(cp_queue_bitmap, adev->gfx.mec_bitmap[0].queue_bitmap,
+ AMDGPU_MAX_QUEUES);
max_queue_cnt = adev->gfx.mec.num_pipe_per_mec *
adev->gfx.mec.num_queue_per_pipe;
- sh_cnt = adev->gfx.config.max_sh_per_se;
se_cnt = adev->gfx.config.max_shader_engines;
for (se_idx = 0; se_idx < se_cnt; se_idx++) {
- for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) {
+ amdgpu_gfx_select_se_sh(adev, se_idx, 0, 0xffffffff, inst);
+ queue_map = RREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_CSQ_WF_ACTIVE_STATUS);
+
+ /*
+ * Assumption: queue map encodes following schema: four
+ * pipes per each micro-engine, with each pipe mapping
+ * eight queues. This schema is true for GFX9 devices
+ * and must be verified for newer device families
+ */
+ for (qidx = 0; qidx < max_queue_cnt; qidx++) {
+ /* Skip qeueus that are not associated with
+ * compute functions
+ */
+ if (!test_bit(qidx, cp_queue_bitmap))
+ continue;
- gfx_v9_0_select_se_sh(adev, se_idx, sh_idx, 0xffffffff);
- queue_map = RREG32(SOC15_REG_OFFSET(GC, 0,
- mmSPI_CSQ_WF_ACTIVE_STATUS));
+ if (!(queue_map & (1 << qidx)))
+ continue;
- /*
- * Assumption: queue map encodes following schema: four
- * pipes per each micro-engine, with each pipe mapping
- * eight queues. This schema is true for GFX9 devices
- * and must be verified for newer device families
- */
- for (qidx = 0; qidx < max_queue_cnt; qidx++) {
-
- /* Skip qeueus that are not associated with
- * compute functions
- */
- if (!test_bit(qidx, cp_queue_bitmap))
- continue;
-
- if (!(queue_map & (1 << qidx)))
- continue;
-
- /* Get number of waves in flight and aggregate them */
- get_wave_count(adev, qidx, &wave_cnt, &vmid);
- if (wave_cnt != 0) {
- pasid_tmp =
- RREG32(SOC15_REG_OFFSET(OSSSYS, 0,
- mmIH_VMID_0_LUT) + vmid);
- if (pasid_tmp == pasid)
- vmid_wave_cnt += wave_cnt;
- }
- }
+ /* Get number of waves in flight and aggregate them */
+ get_wave_count(adev, qidx, &cu_occupancy[qidx],
+ inst);
}
}
- gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
- soc15_grbm_select(adev, 0, 0, 0, 0);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, inst);
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, inst));
unlock_spi_csq_mutexes(adev);
/* Update the output parameters and return */
- *pasid_wave_cnt = vmid_wave_cnt;
*max_waves_per_cu = adev->gfx.cu_info.simd_per_cu *
adev->gfx.cu_info.max_waves_per_simd;
}
-void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd,
- uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
+void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
+ uint32_t wait_times,
+ uint32_t sch_wave,
+ uint32_t que_sleep,
+ uint32_t *reg_offset,
+ uint32_t *reg_data)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ *reg_data = wait_times;
+
+ if (sch_wave)
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ SCH_WAVE,
+ sch_wave);
+ if (que_sleep)
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ QUE_SLEEP,
+ que_sleep);
+
+ *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
+}
- lock_srbm(kgd, 0, 0, 0, vmid);
+void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, uint32_t inst)
+{
+ kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst);
/*
* Program TBA registers
*/
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
- lower_32_bits(tba_addr >> 8));
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
- upper_32_bits(tba_addr >> 8));
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TBA_LO,
+ lower_32_bits(tba_addr >> 8));
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TBA_HI,
+ upper_32_bits(tba_addr >> 8));
/*
* Program TMA registers
*/
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TMA_LO,
lower_32_bits(tma_addr >> 8));
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TMA_HI,
upper_32_bits(tma_addr >> 8));
- unlock_srbm(kgd);
+ kgd_gfx_v9_unlock_srbm(adev, inst);
+}
+
+uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t inst)
+{
+ uint32_t low, high;
+ uint64_t queue_addr = 0;
+
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, inst);
+
+ if (!RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE))
+ goto unlock_out;
+
+ low = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE);
+ high = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE_HI);
+
+ /* only concerned with user queues. */
+ if (!high)
+ goto unlock_out;
+
+ queue_addr = (((queue_addr | high) << 32) | low) << 8;
+
+unlock_out:
+ amdgpu_gfx_rlc_exit_safe_mode(adev, inst);
+ kgd_gfx_v9_release_queue(adev, inst);
+
+ return queue_addr;
+}
+
+/* assume queue acquired */
+static int kgd_gfx_v9_hqd_dequeue_wait(struct amdgpu_device *adev, uint32_t inst,
+ unsigned int utimeout)
+{
+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+ while (true) {
+ uint32_t temp = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE);
+
+ if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+ return 0;
+
+ if (time_after(jiffies, end_jiffies))
+ return -ETIME;
+
+ usleep_range(500, 1000);
+ }
+}
+
+uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t inst, unsigned int utimeout)
+{
+ uint32_t low, high, pipe_reset_data = 0;
+ uint64_t queue_addr = 0;
+
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, inst);
+
+ if (!RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE))
+ goto unlock_out;
+
+ low = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE);
+ high = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE_HI);
+
+ /* only concerned with user queues. */
+ if (!high)
+ goto unlock_out;
+
+ queue_addr = (((queue_addr | high) << 32) | low) << 8;
+
+ pr_debug("Attempting queue reset on XCC %i pipe id %i queue id %i\n",
+ inst, pipe_id, queue_id);
+
+ /* assume previous dequeue request issued will take affect after reset */
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_COMPUTE_QUEUE_RESET, 0x1);
+
+ if (!kgd_gfx_v9_hqd_dequeue_wait(adev, inst, utimeout))
+ goto unlock_out;
+
+ pr_debug("Attempting pipe reset on XCC %i pipe id %i\n", inst, pipe_id);
+
+ pipe_reset_data = REG_SET_FIELD(pipe_reset_data, CP_MEC_CNTL, MEC_ME1_PIPE0_RESET, 1);
+ pipe_reset_data = pipe_reset_data << pipe_id;
+
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_MEC_CNTL, pipe_reset_data);
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_MEC_CNTL, 0);
+
+ if (kgd_gfx_v9_hqd_dequeue_wait(adev, inst, utimeout))
+ queue_addr = 0;
+
+unlock_out:
+ pr_debug("queue reset on XCC %i pipe id %i queue id %i %s\n",
+ inst, pipe_id, queue_id, !!queue_addr ? "succeeded!" : "failed!");
+ amdgpu_gfx_rlc_exit_safe_mode(adev, inst);
+ kgd_gfx_v9_release_queue(adev, inst);
+
+ return queue_addr;
+}
+
+uint32_t kgd_gfx_v9_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+
+{
+ return 0;
}
const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
@@ -921,13 +1243,22 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
.hqd_destroy = kgd_gfx_v9_hqd_destroy,
.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
- .address_watch_disable = kgd_gfx_v9_address_watch_disable,
- .address_watch_execute = kgd_gfx_v9_address_watch_execute,
.wave_control_execute = kgd_gfx_v9_wave_control_execute,
- .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
.get_atc_vmid_pasid_mapping_info =
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
+ .enable_debug_trap = kgd_gfx_v9_enable_debug_trap,
+ .disable_debug_trap = kgd_gfx_v9_disable_debug_trap,
+ .validate_trap_override_request = kgd_gfx_v9_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_gfx_v9_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_gfx_v9_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v9_set_address_watch,
+ .clear_address_watch = kgd_gfx_v9_clear_address_watch,
+ .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
+ .build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
+ .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
+ .hqd_reset = kgd_gfx_v9_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v9_hqd_sdma_get_doorbell
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
index c63591106879..704452ca62f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -20,50 +20,97 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-
-
-void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
- uint32_t sh_mem_bases);
-int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
- unsigned int vmid);
-int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
-int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ uint32_t sh_mem_bases, uint32_t inst);
+int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
+ unsigned int vmid, uint32_t inst);
+int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst);
+int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
uint32_t wptr_shift, uint32_t wptr_mask,
- struct mm_struct *mm);
-int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
+ struct mm_struct *mm, uint32_t inst);
+int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t doorbell_off);
-int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd,
+ uint32_t doorbell_off, uint32_t inst);
+int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs);
-bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
- uint32_t pipe_id, uint32_t queue_id);
-int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst);
+bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst);
+int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id);
-int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd);
-int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- uint32_t cntl_val,
- uint32_t addr_hi,
- uint32_t addr_lo);
-int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd,
+ uint32_t queue_id, uint32_t inst);
+int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev,
uint32_t gfx_index_val,
- uint32_t sq_cmd);
-uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- unsigned int reg_offset);
-
-bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+ uint32_t sq_cmd, uint32_t inst);
+bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
uint8_t vmid, uint16_t *p_pasid);
-
-void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd,
+void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
uint32_t vmid, uint64_t page_table_base);
-void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid,
- int *pasid_wave_cnt, int *max_waves_per_cu);
-void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd,
- uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr);
+void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
+ struct kfd_cu_occupancy *cu_occupancy,
+ int *max_waves_per_cu, uint32_t inst);
+void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
+ uint32_t inst);
+void kgd_gfx_v9_acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst);
+uint64_t kgd_gfx_v9_get_queue_mask(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id);
+void kgd_gfx_v9_release_queue(struct amdgpu_device *adev, uint32_t inst);
+void kgd_gfx_v9_set_wave_launch_stall(struct amdgpu_device *adev,
+ uint32_t vmid,
+ bool stall);
+uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid);
+uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid);
+int kgd_gfx_v9_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported);
+uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid);
+uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev);
+uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid,
+ uint32_t inst);
+uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id);
+void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
+ uint32_t *wait_times,
+ uint32_t inst);
+void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
+ uint32_t wait_times,
+ uint32_t sch_wave,
+ uint32_t que_sleep,
+ uint32_t *reg_offset,
+ uint32_t *reg_data);
+uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev,
+ uint32_t pipe_id,
+ uint32_t queue_id,
+ uint32_t inst);
+uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev,
+ uint32_t pipe_id,
+ uint32_t queue_id,
+ uint32_t inst,
+ unsigned int utimeout);
+uint32_t kgd_gfx_v9_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 6348559608ce..b1c24c8fa686 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2014-2018 Advanced Micro Devices, Inc.
*
@@ -24,19 +25,32 @@
#include <linux/pagemap.h>
#include <linux/sched/mm.h>
#include <linux/sched/task.h>
+#include <drm/ttm/ttm_tt.h>
+
+#include <drm/drm_exec.h>
#include "amdgpu_object.h"
#include "amdgpu_gem.h"
#include "amdgpu_vm.h"
+#include "amdgpu_hmm.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_dma_buf.h"
#include <uapi/linux/kfd_ioctl.h>
#include "amdgpu_xgmi.h"
+#include "kfd_priv.h"
+#include "kfd_smi_events.h"
/* Userptr restore delay, just long enough to allow consecutive VM
* changes to accumulate
*/
#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
+#define AMDGPU_RESERVE_MEM_LIMIT (3UL << 29)
+
+/*
+ * Align VRAM availability to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB
+ * BO chunk
+ */
+#define VRAM_AVAILABLITY_ALIGN (1 << 21)
/* Impose limit on how much memory KFD can use */
static struct {
@@ -60,12 +74,6 @@ static const char * const domain_bit_to_string[] = {
static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work);
-
-static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
-{
- return (struct amdgpu_device *)kgd;
-}
-
static bool kfd_mem_is_attached(struct amdgpu_vm *avm,
struct kgd_mem *mem)
{
@@ -78,6 +86,25 @@ static bool kfd_mem_is_attached(struct amdgpu_vm *avm,
return false;
}
+/**
+ * reuse_dmamap() - Check whether adev can share the original
+ * userptr BO
+ *
+ * If both adev and bo_adev are in direct mapping or
+ * in the same iommu group, they can share the original BO.
+ *
+ * @adev: Device to which can or cannot share the original BO
+ * @bo_adev: Device to which allocated BO belongs to
+ *
+ * Return: returns true if adev can share original userptr BO,
+ * false otherwise.
+ */
+static bool reuse_dmamap(struct amdgpu_device *adev, struct amdgpu_device *bo_adev)
+{
+ return (adev->ram_is_direct_mapped && bo_adev->ram_is_direct_mapped) ||
+ (adev->dev->iommu_group == bo_adev->dev->iommu_group);
+}
+
/* Set memory usage limits. Current, limits are
* System (TTM + userptr) memory - 15/16th System RAM
* TTM memory - 3/8th System RAM
@@ -87,13 +114,21 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
struct sysinfo si;
uint64_t mem;
+ if (kfd_mem_limit.max_system_mem_limit)
+ return;
+
si_meminfo(&si);
- mem = si.freeram - si.freehigh;
+ mem = si.totalram - si.totalhigh;
mem *= si.mem_unit;
spin_lock_init(&kfd_mem_limit.mem_limit_lock);
- kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
- kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
+ kfd_mem_limit.max_system_mem_limit = mem - (mem >> 6);
+ if (kfd_mem_limit.max_system_mem_limit < 2 * AMDGPU_RESERVE_MEM_LIMIT)
+ kfd_mem_limit.max_system_mem_limit >>= 1;
+ else
+ kfd_mem_limit.max_system_mem_limit -= AMDGPU_RESERVE_MEM_LIMIT;
+
+ kfd_mem_limit.max_ttm_mem_limit = ttm_tt_pages_limit() << PAGE_SHIFT;
pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
(kfd_mem_limit.max_system_mem_limit >> 20),
(kfd_mem_limit.max_ttm_mem_limit >> 20));
@@ -114,115 +149,215 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
* compromise that should work in most cases without reserving too
* much memory for page tables unnecessarily (factor 16K, >> 14).
*/
-#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14)
-
-static size_t amdgpu_amdkfd_acc_size(uint64_t size)
-{
- size >>= PAGE_SHIFT;
- size *= sizeof(dma_addr_t) + sizeof(void *);
- return __roundup_pow_of_two(sizeof(struct amdgpu_bo)) +
- __roundup_pow_of_two(sizeof(struct ttm_tt)) +
- PAGE_ALIGN(size);
-}
+#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM)
-static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
- uint64_t size, u32 domain, bool sg)
+/**
+ * amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size
+ * of buffer.
+ *
+ * @adev: Device to which allocated BO belongs to
+ * @size: Size of buffer, in bytes, encapsulated by B0. This should be
+ * equivalent to amdgpu_bo_size(BO)
+ * @alloc_flag: Flag used in allocating a BO as noted above
+ * @xcp_id: xcp_id is used to get xcp from xcp manager, one xcp is
+ * managed as one compute node in driver for app
+ *
+ * Return:
+ * returns -ENOMEM in case of error, ZERO otherwise
+ */
+int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
+ uint64_t size, u32 alloc_flag, int8_t xcp_id)
{
uint64_t reserved_for_pt =
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
- size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ uint64_t reserved_for_ras = (con ? con->reserved_pages_in_bytes : 0);
+ size_t system_mem_needed, ttm_mem_needed, vram_needed;
int ret = 0;
+ uint64_t vram_size = 0;
- acc_size = amdgpu_amdkfd_acc_size(size);
-
+ system_mem_needed = 0;
+ ttm_mem_needed = 0;
vram_needed = 0;
- if (domain == AMDGPU_GEM_DOMAIN_GTT) {
- /* TTM GTT memory */
- system_mem_needed = acc_size + size;
- ttm_mem_needed = acc_size + size;
- } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
- /* Userptr */
- system_mem_needed = acc_size + size;
- ttm_mem_needed = acc_size;
- } else {
- /* VRAM and SG */
- system_mem_needed = acc_size;
- ttm_mem_needed = acc_size;
- if (domain == AMDGPU_GEM_DOMAIN_VRAM)
- vram_needed = size;
+ if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
+ system_mem_needed = size;
+ ttm_mem_needed = size;
+ } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+ /*
+ * Conservatively round up the allocation requirement to 2 MB
+ * to avoid fragmentation caused by 4K allocations in the tail
+ * 2M BO chunk.
+ */
+ vram_needed = size;
+ /*
+ * For GFX 9.4.3, get the VRAM size from XCP structs
+ */
+ if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id))
+ return -EINVAL;
+
+ vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id);
+ if (adev->apu_prefer_gtt) {
+ system_mem_needed = size;
+ ttm_mem_needed = size;
+ }
+ } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+ system_mem_needed = size;
+ } else if (!(alloc_flag &
+ (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+ KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
+ pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
+ return -ENOMEM;
}
spin_lock(&kfd_mem_limit.mem_limit_lock);
if (kfd_mem_limit.system_mem_used + system_mem_needed >
- kfd_mem_limit.max_system_mem_limit)
+ kfd_mem_limit.max_system_mem_limit) {
pr_debug("Set no_system_mem_limit=1 if using shared memory\n");
+ if (!no_system_mem_limit) {
+ ret = -ENOMEM;
+ goto release;
+ }
+ }
- if ((kfd_mem_limit.system_mem_used + system_mem_needed >
- kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
- (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
- kfd_mem_limit.max_ttm_mem_limit) ||
- (adev->kfd.vram_used + vram_needed >
- adev->gmc.real_vram_size - reserved_for_pt)) {
+ if (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
+ kfd_mem_limit.max_ttm_mem_limit) {
ret = -ENOMEM;
- } else {
- kfd_mem_limit.system_mem_used += system_mem_needed;
- kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
- adev->kfd.vram_used += vram_needed;
+ goto release;
+ }
+
+ /*if is_app_apu is false and apu_prefer_gtt is true, it is an APU with
+ * carve out < gtt. In that case, VRAM allocation will go to gtt domain, skip
+ * VRAM check since ttm_mem_limit check already cover this allocation
+ */
+
+ if (adev && xcp_id >= 0 && (!adev->apu_prefer_gtt || adev->gmc.is_app_apu)) {
+ uint64_t vram_available =
+ vram_size - reserved_for_pt - reserved_for_ras -
+ atomic64_read(&adev->vram_pin_size);
+ if (adev->kfd.vram_used[xcp_id] + vram_needed > vram_available) {
+ ret = -ENOMEM;
+ goto release;
+ }
}
+ /* Update memory accounting by decreasing available system
+ * memory, TTM memory and GPU memory as computed above
+ */
+ WARN_ONCE(vram_needed && !adev,
+ "adev reference can't be null when vram is used");
+ if (adev && xcp_id >= 0) {
+ adev->kfd.vram_used[xcp_id] += vram_needed;
+ adev->kfd.vram_used_aligned[xcp_id] +=
+ adev->apu_prefer_gtt ?
+ vram_needed :
+ ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
+ }
+ kfd_mem_limit.system_mem_used += system_mem_needed;
+ kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
+
+release:
spin_unlock(&kfd_mem_limit.mem_limit_lock);
return ret;
}
-static void unreserve_mem_limit(struct amdgpu_device *adev,
- uint64_t size, u32 domain, bool sg)
+void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
+ uint64_t size, u32 alloc_flag, int8_t xcp_id)
{
- size_t acc_size;
-
- acc_size = amdgpu_amdkfd_acc_size(size);
-
spin_lock(&kfd_mem_limit.mem_limit_lock);
- if (domain == AMDGPU_GEM_DOMAIN_GTT) {
- kfd_mem_limit.system_mem_used -= (acc_size + size);
- kfd_mem_limit.ttm_mem_used -= (acc_size + size);
- } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
- kfd_mem_limit.system_mem_used -= (acc_size + size);
- kfd_mem_limit.ttm_mem_used -= acc_size;
- } else {
- kfd_mem_limit.system_mem_used -= acc_size;
- kfd_mem_limit.ttm_mem_used -= acc_size;
- if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
- adev->kfd.vram_used -= size;
- WARN_ONCE(adev->kfd.vram_used < 0,
- "kfd VRAM memory accounting unbalanced");
+
+ if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
+ kfd_mem_limit.system_mem_used -= size;
+ kfd_mem_limit.ttm_mem_used -= size;
+ } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+ WARN_ONCE(!adev,
+ "adev reference can't be null when alloc mem flags vram is set");
+ if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id))
+ goto release;
+
+ if (adev) {
+ adev->kfd.vram_used[xcp_id] -= size;
+ if (adev->apu_prefer_gtt) {
+ adev->kfd.vram_used_aligned[xcp_id] -= size;
+ kfd_mem_limit.system_mem_used -= size;
+ kfd_mem_limit.ttm_mem_used -= size;
+ } else {
+ adev->kfd.vram_used_aligned[xcp_id] -=
+ ALIGN(size, VRAM_AVAILABLITY_ALIGN);
+ }
}
- }
- WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
- "kfd system memory accounting unbalanced");
+ } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+ kfd_mem_limit.system_mem_used -= size;
+ } else if (!(alloc_flag &
+ (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+ KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
+ pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
+ goto release;
+ }
+ WARN_ONCE(adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] < 0,
+ "KFD VRAM memory accounting unbalanced for xcp: %d", xcp_id);
WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
- "kfd TTM memory accounting unbalanced");
+ "KFD TTM memory accounting unbalanced");
+ WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
+ "KFD system memory accounting unbalanced");
+release:
spin_unlock(&kfd_mem_limit.mem_limit_lock);
}
void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- u32 domain = bo->preferred_domains;
- bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU);
+ u32 alloc_flags = bo->kfd_bo->alloc_flags;
+ u64 size = amdgpu_bo_size(bo);
- if (bo->flags & AMDGPU_AMDKFD_CREATE_USERPTR_BO) {
- domain = AMDGPU_GEM_DOMAIN_CPU;
- sg = false;
- }
-
- unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg);
+ amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags,
+ bo->xcp_id);
kfree(bo->kfd_bo);
}
+/**
+ * create_dmamap_sg_bo() - Creates a amdgpu_bo object to reflect information
+ * about USERPTR or DOOREBELL or MMIO BO.
+ *
+ * @adev: Device for which dmamap BO is being created
+ * @mem: BO of peer device that is being DMA mapped. Provides parameters
+ * in building the dmamap BO
+ * @bo_out: Output parameter updated with handle of dmamap BO
+ */
+static int
+create_dmamap_sg_bo(struct amdgpu_device *adev,
+ struct kgd_mem *mem, struct amdgpu_bo **bo_out)
+{
+ struct drm_gem_object *gem_obj;
+ int ret;
+ uint64_t flags = 0;
+
+ ret = amdgpu_bo_reserve(mem->bo, false);
+ if (ret)
+ return ret;
+
+ if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)
+ flags |= mem->bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+ AMDGPU_GEM_CREATE_UNCACHED);
+
+ ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, 1,
+ AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE | flags,
+ ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj, 0);
+
+ amdgpu_bo_unreserve(mem->bo);
+
+ if (ret) {
+ pr_err("Error in creating DMA mappable SG BO on domain: %d\n", ret);
+ return -EINVAL;
+ }
+
+ *bo_out = gem_to_amdgpu_bo(gem_obj);
+ (*bo_out)->parent = amdgpu_bo_ref(mem->bo);
+ return ret;
+}
/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's
* reservation object.
@@ -236,90 +371,47 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
struct amdgpu_amdkfd_fence *ef)
{
- struct dma_resv *resv = bo->tbo.base.resv;
- struct dma_resv_list *old, *new;
- unsigned int i, j, k;
+ struct dma_fence *replacement;
if (!ef)
return -EINVAL;
- old = dma_resv_shared_list(resv);
- if (!old)
- return 0;
-
- new = kmalloc(struct_size(new, shared, old->shared_max), GFP_KERNEL);
- if (!new)
- return -ENOMEM;
-
- /* Go through all the shared fences in the resevation object and sort
- * the interesting ones to the end of the list.
+ /* TODO: Instead of block before we should use the fence of the page
+ * table update and TLB flush here directly.
*/
- for (i = 0, j = old->shared_count, k = 0; i < old->shared_count; ++i) {
- struct dma_fence *f;
-
- f = rcu_dereference_protected(old->shared[i],
- dma_resv_held(resv));
-
- if (f->context == ef->base.context)
- RCU_INIT_POINTER(new->shared[--j], f);
- else
- RCU_INIT_POINTER(new->shared[k++], f);
- }
- new->shared_max = old->shared_max;
- new->shared_count = k;
-
- /* Install the new fence list, seqcount provides the barriers */
- write_seqcount_begin(&resv->seq);
- RCU_INIT_POINTER(resv->fence, new);
- write_seqcount_end(&resv->seq);
-
- /* Drop the references to the removed fences or move them to ef_list */
- for (i = j; i < old->shared_count; ++i) {
- struct dma_fence *f;
-
- f = rcu_dereference_protected(new->shared[i],
- dma_resv_held(resv));
- dma_fence_put(f);
- }
- kfree_rcu(old, rcu);
-
+ replacement = dma_fence_get_stub();
+ dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context,
+ replacement, DMA_RESV_USAGE_BOOKKEEP);
+ dma_fence_put(replacement);
return 0;
}
-int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
+/**
+ * amdgpu_amdkfd_remove_all_eviction_fences - Remove all eviction fences
+ * @bo: the BO where to remove the evictions fences from.
+ *
+ * This functions should only be used on release when all references to the BO
+ * are already dropped. We remove the eviction fence from the private copy of
+ * the dma_resv object here since that is what is used during release to
+ * determine of the BO is idle or not.
+ */
+void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo)
{
- struct amdgpu_bo *root = bo;
- struct amdgpu_vm_bo_base *vm_bo;
- struct amdgpu_vm *vm;
- struct amdkfd_process_info *info;
- struct amdgpu_amdkfd_fence *ef;
- int ret;
-
- /* we can always get vm_bo from root PD bo.*/
- while (root->parent)
- root = root->parent;
+ struct dma_resv *resv = &bo->tbo.base._resv;
+ struct dma_fence *fence, *stub;
+ struct dma_resv_iter cursor;
- vm_bo = root->vm_bo;
- if (!vm_bo)
- return 0;
+ dma_resv_assert_held(resv);
- vm = vm_bo->vm;
- if (!vm)
- return 0;
-
- info = vm->process_info;
- if (!info || !info->eviction_fence)
- return 0;
-
- ef = container_of(dma_fence_get(&info->eviction_fence->base),
- struct amdgpu_amdkfd_fence, base);
-
- BUG_ON(!dma_resv_trylock(bo->tbo.base.resv));
- ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef);
- dma_resv_unlock(bo->tbo.base.resv);
+ stub = dma_fence_get_stub();
+ dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, fence) {
+ if (!to_amdgpu_amdkfd_fence(fence))
+ continue;
- dma_fence_put(&ef->base);
- return ret;
+ dma_resv_replace_fences(resv, fence->context, stub,
+ DMA_RESV_USAGE_BOOKKEEP);
+ }
+ dma_fence_put(stub);
}
static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
@@ -332,6 +424,10 @@ static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
"Called with userptr BO"))
return -EINVAL;
+ /* bo has been pinned, not need validate it */
+ if (bo->tbo.pin_count)
+ return 0;
+
amdgpu_bo_placement_from_domain(bo, domain);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
@@ -344,6 +440,32 @@ validate_fail:
return ret;
}
+int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
+ uint32_t domain,
+ struct dma_fence *fence)
+{
+ int ret = amdgpu_bo_reserve(bo, false);
+
+ if (ret)
+ return ret;
+
+ ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
+ if (ret)
+ goto unreserve_out;
+
+ ret = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
+ if (ret)
+ goto unreserve_out;
+
+ dma_resv_add_fence(bo->tbo.base.resv, fence,
+ DMA_RESV_USAGE_BOOKKEEP);
+
+unreserve_out:
+ amdgpu_bo_unreserve(bo);
+
+ return ret;
+}
+
static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)
{
return amdgpu_amdkfd_bo_validate(bo, bo->allowed_domains, false);
@@ -356,34 +478,22 @@ static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)
* again. Page directories are only updated after updating page
* tables.
*/
-static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
+static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm,
+ struct ww_acquire_ctx *ticket)
{
struct amdgpu_bo *pd = vm->root.bo;
struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
int ret;
- ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate_vm_bo, NULL);
+ ret = amdgpu_vm_validate(adev, vm, ticket,
+ amdgpu_amdkfd_validate_vm_bo, NULL);
if (ret) {
pr_err("failed to validate PT BOs\n");
return ret;
}
- ret = amdgpu_amdkfd_validate_vm_bo(NULL, pd);
- if (ret) {
- pr_err("failed to validate PD\n");
- return ret;
- }
-
vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.bo);
- if (vm->use_cpu_for_update) {
- ret = amdgpu_bo_kmap(pd, NULL);
- if (ret) {
- pr_err("failed to kmap PD, ret=%d\n", ret);
- return ret;
- }
- }
-
return 0;
}
@@ -397,71 +507,53 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
if (ret)
return ret;
- return amdgpu_sync_fence(sync, vm->last_update);
+ return amdgpu_sync_fence(sync, vm->last_update, GFP_KERNEL);
}
-static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
+static uint64_t get_pte_flags(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct kgd_mem *mem)
{
- struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
- bool coherent = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;
- bool uncached = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED;
- uint32_t mapping_flags;
- uint64_t pte_flags;
- bool snoop = false;
+ uint32_t mapping_flags = AMDGPU_VM_PAGE_READABLE |
+ AMDGPU_VM_MTYPE_DEFAULT;
- mapping_flags = AMDGPU_VM_PAGE_READABLE;
if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE)
mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE)
mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
- switch (adev->asic_type) {
- case CHIP_ARCTURUS:
- if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
- if (bo_adev == adev)
- mapping_flags |= coherent ?
- AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
- else
- mapping_flags |= coherent ?
- AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
- } else {
- mapping_flags |= coherent ?
- AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
- }
- break;
- case CHIP_ALDEBARAN:
- if (coherent && uncached) {
- if (adev->gmc.xgmi.connected_to_cpu ||
- !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM))
- snoop = true;
- mapping_flags |= AMDGPU_VM_MTYPE_UC;
- } else if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
- if (bo_adev == adev) {
- mapping_flags |= coherent ?
- AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
- if (adev->gmc.xgmi.connected_to_cpu)
- snoop = true;
- } else {
- mapping_flags |= coherent ?
- AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
- if (amdgpu_xgmi_same_hive(adev, bo_adev))
- snoop = true;
- }
- } else {
- snoop = true;
- mapping_flags |= coherent ?
- AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
- }
- break;
- default:
- mapping_flags |= coherent ?
- AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
- }
+ return mapping_flags;
+}
- pte_flags = amdgpu_gem_va_map_flags(adev, mapping_flags);
- pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
+/**
+ * create_sg_table() - Create an sg_table for a contiguous DMA addr range
+ * @addr: The starting address to point to
+ * @size: Size of memory area in bytes being pointed to
+ *
+ * Allocates an instance of sg_table and initializes it to point to memory
+ * area specified by input parameters. The address used to build is assumed
+ * to be DMA mapped, if needed.
+ *
+ * DOORBELL or MMIO BOs use only one scatterlist node in their sg_table
+ * because they are physically contiguous.
+ *
+ * Return: Initialized instance of SG Table or NULL
+ */
+static struct sg_table *create_sg_table(uint64_t addr, uint32_t size)
+{
+ struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
- return pte_flags;
+ if (!sg)
+ return NULL;
+ if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
+ kfree(sg);
+ return NULL;
+ }
+ sg_dma_address(sg->sgl) = addr;
+ sg->sgl->length = size;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+ sg->sgl->dma_length = size;
+#endif
+ return sg;
}
static int
@@ -478,13 +570,13 @@ kfd_mem_dmamap_userptr(struct kgd_mem *mem,
struct ttm_tt *ttm = bo->tbo.ttm;
int ret;
+ if (WARN_ON(ttm->num_pages != src_ttm->num_pages))
+ return -EINVAL;
+
ttm->sg = kmalloc(sizeof(*ttm->sg), GFP_KERNEL);
if (unlikely(!ttm->sg))
return -ENOMEM;
- if (WARN_ON(ttm->num_pages != src_ttm->num_pages))
- return -EINVAL;
-
/* Same sequence as in amdgpu_ttm_tt_pin_userptr */
ret = sg_alloc_table_from_pages(ttm->sg, src_ttm->pages,
ttm->num_pages, 0,
@@ -497,9 +589,6 @@ kfd_mem_dmamap_userptr(struct kgd_mem *mem,
if (unlikely(ret))
goto release_sg;
- drm_prime_sg_to_dma_addr_array(ttm->sg, ttm->dma_address,
- ttm->num_pages);
-
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (ret)
@@ -528,6 +617,87 @@ kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment)
return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
}
+/**
+ * kfd_mem_dmamap_sg_bo() - Create DMA mapped sg_table to access DOORBELL or MMIO BO
+ * @mem: SG BO of the DOORBELL or MMIO resource on the owning device
+ * @attachment: Virtual address attachment of the BO on accessing device
+ *
+ * An access request from the device that owns DOORBELL does not require DMA mapping.
+ * This is because the request doesn't go through PCIe root complex i.e. it instead
+ * loops back. The need to DMA map arises only when accessing peer device's DOORBELL
+ *
+ * In contrast, all access requests for MMIO need to be DMA mapped without regard to
+ * device ownership. This is because access requests for MMIO go through PCIe root
+ * complex.
+ *
+ * This is accomplished in two steps:
+ * - Obtain DMA mapped address of DOORBELL or MMIO memory that could be used
+ * in updating requesting device's page table
+ * - Signal TTM to mark memory pointed to by requesting device's BO as GPU
+ * accessible. This allows an update of requesting device's page table
+ * with entries associated with DOOREBELL or MMIO memory
+ *
+ * This method is invoked in the following contexts:
+ * - Mapping of DOORBELL or MMIO BO of same or peer device
+ * - Validating an evicted DOOREBELL or MMIO BO on device seeking access
+ *
+ * Return: ZERO if successful, NON-ZERO otherwise
+ */
+static int
+kfd_mem_dmamap_sg_bo(struct kgd_mem *mem,
+ struct kfd_mem_attachment *attachment)
+{
+ struct ttm_operation_ctx ctx = {.interruptible = true};
+ struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+ struct amdgpu_device *adev = attachment->adev;
+ struct ttm_tt *ttm = bo->tbo.ttm;
+ enum dma_data_direction dir;
+ dma_addr_t dma_addr;
+ bool mmio;
+ int ret;
+
+ /* Expect SG Table of dmapmap BO to be NULL */
+ mmio = (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP);
+ if (unlikely(ttm->sg)) {
+ pr_err("SG Table of %d BO for peer device is UNEXPECTEDLY NON-NULL", mmio);
+ return -EINVAL;
+ }
+
+ dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+ DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+ dma_addr = mem->bo->tbo.sg->sgl->dma_address;
+ pr_debug("%d BO size: %d\n", mmio, mem->bo->tbo.sg->sgl->length);
+ pr_debug("%d BO address before DMA mapping: %llx\n", mmio, dma_addr);
+ dma_addr = dma_map_resource(adev->dev, dma_addr,
+ mem->bo->tbo.sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC);
+ ret = dma_mapping_error(adev->dev, dma_addr);
+ if (unlikely(ret))
+ return ret;
+ pr_debug("%d BO address after DMA mapping: %llx\n", mmio, dma_addr);
+
+ ttm->sg = create_sg_table(dma_addr, mem->bo->tbo.sg->sgl->length);
+ if (unlikely(!ttm->sg)) {
+ ret = -ENOMEM;
+ goto unmap_sg;
+ }
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (unlikely(ret))
+ goto free_sg;
+
+ return ret;
+
+free_sg:
+ sg_free_table(ttm->sg);
+ kfree(ttm->sg);
+ ttm->sg = NULL;
+unmap_sg:
+ dma_unmap_resource(adev->dev, dma_addr, mem->bo->tbo.sg->sgl->length,
+ dir, DMA_ATTR_SKIP_CPU_SYNC);
+ return ret;
+}
+
static int
kfd_mem_dmamap_attachment(struct kgd_mem *mem,
struct kfd_mem_attachment *attachment)
@@ -539,6 +709,8 @@ kfd_mem_dmamap_attachment(struct kgd_mem *mem,
return kfd_mem_dmamap_userptr(mem, attachment);
case KFD_MEM_ATT_DMABUF:
return kfd_mem_dmamap_dmabuf(attachment);
+ case KFD_MEM_ATT_SG:
+ return kfd_mem_dmamap_sg_bo(mem, attachment);
default:
WARN_ON_ONCE(1);
}
@@ -561,7 +733,7 @@ kfd_mem_dmaunmap_userptr(struct kgd_mem *mem,
return;
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
- ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ (void)ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
sg_free_table(ttm->sg);
@@ -572,11 +744,54 @@ kfd_mem_dmaunmap_userptr(struct kgd_mem *mem,
static void
kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment)
{
+ /* This is a no-op. We don't want to trigger eviction fences when
+ * unmapping DMABufs. Therefore the invalidation (moving to system
+ * domain) is done in kfd_mem_dmamap_dmabuf.
+ */
+}
+
+/**
+ * kfd_mem_dmaunmap_sg_bo() - Free DMA mapped sg_table of DOORBELL or MMIO BO
+ * @mem: SG BO of the DOORBELL or MMIO resource on the owning device
+ * @attachment: Virtual address attachment of the BO on accessing device
+ *
+ * The method performs following steps:
+ * - Signal TTM to mark memory pointed to by BO as GPU inaccessible
+ * - Free SG Table that is used to encapsulate DMA mapped memory of
+ * peer device's DOORBELL or MMIO memory
+ *
+ * This method is invoked in the following contexts:
+ * UNMapping of DOORBELL or MMIO BO on a device having access to its memory
+ * Eviction of DOOREBELL or MMIO BO on device having access to its memory
+ *
+ * Return: void
+ */
+static void
+kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem,
+ struct kfd_mem_attachment *attachment)
+{
struct ttm_operation_ctx ctx = {.interruptible = true};
struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+ struct amdgpu_device *adev = attachment->adev;
+ struct ttm_tt *ttm = bo->tbo.ttm;
+ enum dma_data_direction dir;
+
+ if (unlikely(!ttm->sg)) {
+ pr_debug("SG Table of BO is NULL");
+ return;
+ }
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
- ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ (void)ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+
+ dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+ DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+ dma_unmap_resource(adev->dev, ttm->sg->sgl->dma_address,
+ ttm->sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC);
+ sg_free_table(ttm->sg);
+ kfree(ttm->sg);
+ ttm->sg = NULL;
+ bo->tbo.sg = NULL;
}
static void
@@ -592,34 +807,29 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
case KFD_MEM_ATT_DMABUF:
kfd_mem_dmaunmap_dmabuf(attachment);
break;
+ case KFD_MEM_ATT_SG:
+ kfd_mem_dmaunmap_sg_bo(mem, attachment);
+ break;
default:
WARN_ON_ONCE(1);
}
}
-static int
-kfd_mem_attach_userptr(struct amdgpu_device *adev, struct kgd_mem *mem,
- struct amdgpu_bo **bo)
+static int kfd_mem_export_dmabuf(struct kgd_mem *mem)
{
- unsigned long bo_size = mem->bo->tbo.base.size;
- struct drm_gem_object *gobj;
- int ret;
-
- ret = amdgpu_bo_reserve(mem->bo, false);
- if (ret)
- return ret;
-
- ret = amdgpu_gem_object_create(adev, bo_size, 1,
- AMDGPU_GEM_DOMAIN_CPU,
- AMDGPU_GEM_CREATE_PREEMPTIBLE,
- ttm_bo_type_sg, mem->bo->tbo.base.resv,
- &gobj);
- amdgpu_bo_unreserve(mem->bo);
- if (ret)
- return ret;
+ if (!mem->dmabuf) {
+ struct amdgpu_device *bo_adev;
+ struct dma_buf *dmabuf;
- *bo = gem_to_amdgpu_bo(gobj);
- (*bo)->parent = amdgpu_bo_ref(mem->bo);
+ bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
+ dmabuf = drm_gem_prime_handle_to_dmabuf(&bo_adev->ddev, bo_adev->kfd.client.file,
+ mem->gem_handle,
+ mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+ DRM_RDWR : 0);
+ if (IS_ERR(dmabuf))
+ return PTR_ERR(dmabuf);
+ mem->dmabuf = dmabuf;
+ }
return 0;
}
@@ -631,16 +841,9 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
struct drm_gem_object *gobj;
int ret;
- if (!mem->dmabuf) {
- mem->dmabuf = amdgpu_gem_prime_export(&mem->bo->tbo.base,
- mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
- DRM_RDWR : 0);
- if (IS_ERR(mem->dmabuf)) {
- ret = PTR_ERR(mem->dmabuf);
- mem->dmabuf = NULL;
- return ret;
- }
- }
+ ret = kfd_mem_export_dmabuf(mem);
+ if (ret)
+ return ret;
gobj = amdgpu_gem_prime_import(adev_to_drm(adev), mem->dmabuf);
if (IS_ERR(gobj))
@@ -648,7 +851,6 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
*bo = gem_to_amdgpu_bo(gobj);
(*bo)->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;
- (*bo)->parent = amdgpu_bo_ref(mem->bo);
return 0;
}
@@ -674,6 +876,8 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
uint64_t va = mem->va;
struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
struct amdgpu_bo *bo[2] = {NULL, NULL};
+ struct amdgpu_bo_va *bo_va;
+ bool same_hive = false;
int i, ret;
if (!va) {
@@ -681,6 +885,24 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
return -EINVAL;
}
+ /* Determine access to VRAM, MMIO and DOORBELL BOs of peer devices
+ *
+ * The access path of MMIO and DOORBELL BOs of is always over PCIe.
+ * In contrast the access path of VRAM BOs depens upon the type of
+ * link that connects the peer device. Access over PCIe is allowed
+ * if peer device has large BAR. In contrast, access over xGMI is
+ * allowed for both small and large BAR configurations of peer device
+ */
+ if ((adev != bo_adev && !adev->apu_prefer_gtt) &&
+ ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) ||
+ (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
+ (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
+ if (mem->domain == AMDGPU_GEM_DOMAIN_VRAM)
+ same_hive = amdgpu_xgmi_same_hive(adev, bo_adev);
+ if (!same_hive && !amdgpu_device_is_peer_accessible(bo_adev, adev))
+ return -EINVAL;
+ }
+
for (i = 0; i <= is_aql; i++) {
attachment[i] = kzalloc(sizeof(*attachment[i]), GFP_KERNEL);
if (unlikely(!attachment[i])) {
@@ -691,10 +913,13 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
va + bo_size, vm);
- if (adev == bo_adev || (mem->domain == AMDGPU_GEM_DOMAIN_VRAM &&
- amdgpu_xgmi_same_hive(adev, bo_adev))) {
- /* Mappings on the local GPU and VRAM mappings in the
- * local hive share the original BO
+ if ((adev == bo_adev && !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) ||
+ (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && reuse_dmamap(adev, bo_adev)) ||
+ (mem->domain == AMDGPU_GEM_DOMAIN_GTT && reuse_dmamap(adev, bo_adev)) ||
+ same_hive) {
+ /* Mappings on the local GPU, or VRAM mappings in the
+ * local hive, or userptr, or GTT mapping can reuse dma map
+ * address space share the original BO
*/
attachment[i]->type = KFD_MEM_ATT_SHARED;
bo[i] = mem->bo;
@@ -707,26 +932,30 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
} else if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
/* Create an SG BO to DMA-map userptrs on other GPUs */
attachment[i]->type = KFD_MEM_ATT_USERPTR;
- ret = kfd_mem_attach_userptr(adev, mem, &bo[i]);
+ ret = create_dmamap_sg_bo(adev, mem, &bo[i]);
if (ret)
goto unwind;
- } else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT &&
- mem->bo->tbo.type != ttm_bo_type_sg) {
- /* GTT BOs use DMA-mapping ability of dynamic-attach
- * DMA bufs. TODO: The same should work for VRAM on
- * large-BAR GPUs.
- */
+ /* Handle DOORBELL BOs of peer devices and MMIO BOs of local and peer devices */
+ } else if (mem->bo->tbo.type == ttm_bo_type_sg) {
+ WARN_ONCE(!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL ||
+ mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP),
+ "Handing invalid SG BO in ATTACH request");
+ attachment[i]->type = KFD_MEM_ATT_SG;
+ ret = create_dmamap_sg_bo(adev, mem, &bo[i]);
+ if (ret)
+ goto unwind;
+ /* Enable acces to GTT and VRAM BOs of peer devices */
+ } else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT ||
+ mem->domain == AMDGPU_GEM_DOMAIN_VRAM) {
attachment[i]->type = KFD_MEM_ATT_DMABUF;
ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]);
if (ret)
goto unwind;
+ pr_debug("Employ DMABUF mechanism to enable peer GPU access\n");
} else {
- /* FIXME: Need to DMA-map other BO types:
- * large-BAR VRAM, doorbells, MMIO remap
- */
- attachment[i]->type = KFD_MEM_ATT_SHARED;
- bo[i] = mem->bo;
- drm_gem_object_get(&bo[i]->tbo.base);
+ WARN_ONCE(true, "Handling invalid ATTACH request");
+ ret = -EINVAL;
+ goto unwind;
}
/* Add BO to VM internal data structures */
@@ -735,7 +964,12 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
pr_debug("Unable to reserve BO during memory attach");
goto unwind;
}
- attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
+ bo_va = amdgpu_vm_bo_find(vm, bo[i]);
+ if (!bo_va)
+ bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
+ else
+ ++bo_va->ref_count;
+ attachment[i]->bo_va = bo_va;
amdgpu_bo_unreserve(bo[i]);
if (unlikely(!attachment[i]->bo_va)) {
ret = -ENOMEM;
@@ -744,7 +978,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
goto unwind;
}
attachment[i]->va = va;
- attachment[i]->pte_flags = get_pte_flags(adev, mem);
+ attachment[i]->pte_flags = get_pte_flags(adev, vm, mem);
attachment[i]->adev = adev;
list_add(&attachment[i]->list, &mem->attachments);
@@ -758,8 +992,9 @@ unwind:
if (!attachment[i])
continue;
if (attachment[i]->bo_va) {
- amdgpu_bo_reserve(bo[i], true);
- amdgpu_vm_bo_rmv(adev, attachment[i]->bo_va);
+ (void)amdgpu_bo_reserve(bo[i], true);
+ if (--attachment[i]->bo_va->ref_count == 0)
+ amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
amdgpu_bo_unreserve(bo[i]);
list_del(&attachment[i]->list);
}
@@ -776,7 +1011,8 @@ static void kfd_mem_detach(struct kfd_mem_attachment *attachment)
pr_debug("\t remove VA 0x%llx in entry %p\n",
attachment->va, attachment);
- amdgpu_vm_bo_rmv(attachment->adev, attachment->bo_va);
+ if (--attachment->bo_va->ref_count == 0)
+ amdgpu_vm_bo_del(attachment->adev, attachment->bo_va);
drm_gem_object_put(&bo->tbo.base);
list_del(&attachment->list);
kfree(attachment);
@@ -786,28 +1022,20 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
struct amdkfd_process_info *process_info,
bool userptr)
{
- struct ttm_validate_buffer *entry = &mem->validate_list;
- struct amdgpu_bo *bo = mem->bo;
-
- INIT_LIST_HEAD(&entry->head);
- entry->num_shared = 1;
- entry->bo = &bo->tbo;
mutex_lock(&process_info->lock);
if (userptr)
- list_add_tail(&entry->head, &process_info->userptr_valid_list);
+ list_add_tail(&mem->validate_list,
+ &process_info->userptr_valid_list);
else
- list_add_tail(&entry->head, &process_info->kfd_bo_list);
+ list_add_tail(&mem->validate_list, &process_info->kfd_bo_list);
mutex_unlock(&process_info->lock);
}
static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
struct amdkfd_process_info *process_info)
{
- struct ttm_validate_buffer *bo_list_entry;
-
- bo_list_entry = &mem->validate_list;
mutex_lock(&process_info->lock);
- list_del(&bo_list_entry->head);
+ list_del(&mem->validate_list);
mutex_unlock(&process_info->lock);
}
@@ -823,11 +1051,13 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
*
* Returns 0 for success, negative errno for errors.
*/
-static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
+static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
+ bool criu_resume)
{
struct amdkfd_process_info *process_info = mem->process_info;
struct amdgpu_bo *bo = mem->bo;
struct ttm_operation_ctx ctx = { true, false };
+ struct amdgpu_hmm_range *range;
int ret = 0;
mutex_lock(&process_info->lock);
@@ -838,16 +1068,40 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
goto out;
}
- ret = amdgpu_mn_register(bo, user_addr);
+ ret = amdgpu_hmm_register(bo, user_addr);
if (ret) {
pr_err("%s: Failed to register MMU notifier: %d\n",
__func__, ret);
goto out;
}
- ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
+ if (criu_resume) {
+ /*
+ * During a CRIU restore operation, the userptr buffer objects
+ * will be validated in the restore_userptr_work worker at a
+ * later stage when it is scheduled by another ioctl called by
+ * CRIU master process for the target pid for restore.
+ */
+ mutex_lock(&process_info->notifier_lock);
+ mem->invalid++;
+ mutex_unlock(&process_info->notifier_lock);
+ mutex_unlock(&process_info->lock);
+ return 0;
+ }
+
+ range = amdgpu_hmm_range_alloc(NULL);
+ if (unlikely(!range)) {
+ ret = -ENOMEM;
+ goto unregister_out;
+ }
+
+ ret = amdgpu_ttm_tt_get_user_pages(bo, range);
if (ret) {
- pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
+ amdgpu_hmm_range_free(range);
+ if (ret == -EAGAIN)
+ pr_debug("Failed to get user pages, try again\n");
+ else
+ pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
goto unregister_out;
}
@@ -856,6 +1110,9 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
pr_err("%s: Failed to reserve BO\n", __func__);
goto release_out;
}
+
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range);
+
amdgpu_bo_placement_from_domain(bo, mem->domain);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (ret)
@@ -863,10 +1120,10 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
amdgpu_bo_unreserve(bo);
release_out:
- amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+ amdgpu_hmm_range_free(range);
unregister_out:
if (ret)
- amdgpu_mn_unregister(bo);
+ amdgpu_hmm_unregister(bo);
out:
mutex_unlock(&process_info->lock);
return ret;
@@ -878,13 +1135,12 @@ out:
* object can track VM updates.
*/
struct bo_vm_reservation_context {
- struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */
- unsigned int n_vms; /* Number of VMs reserved */
- struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries */
- struct ww_acquire_ctx ticket; /* Reservation ticket */
- struct list_head list, duplicates; /* BO lists */
- struct amdgpu_sync *sync; /* Pointer to sync object */
- bool reserved; /* Whether BOs are reserved */
+ /* DRM execution context for the reservation */
+ struct drm_exec exec;
+ /* Number of VMs reserved */
+ unsigned int n_vms;
+ /* Pointer to sync object */
+ struct amdgpu_sync *sync;
};
enum bo_vm_match {
@@ -908,35 +1164,26 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
WARN_ON(!vm);
- ctx->reserved = false;
ctx->n_vms = 1;
ctx->sync = &mem->sync;
-
- INIT_LIST_HEAD(&ctx->list);
- INIT_LIST_HEAD(&ctx->duplicates);
-
- ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL);
- if (!ctx->vm_pd)
- return -ENOMEM;
-
- ctx->kfd_bo.priority = 0;
- ctx->kfd_bo.tv.bo = &bo->tbo;
- ctx->kfd_bo.tv.num_shared = 1;
- list_add(&ctx->kfd_bo.tv.head, &ctx->list);
-
- amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
-
- ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
- false, &ctx->duplicates);
- if (ret) {
- pr_err("Failed to reserve buffers in ttm.\n");
- kfree(ctx->vm_pd);
- ctx->vm_pd = NULL;
- return ret;
+ drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+ drm_exec_until_all_locked(&ctx->exec) {
+ ret = amdgpu_vm_lock_pd(vm, &ctx->exec, 2);
+ drm_exec_retry_on_contention(&ctx->exec);
+ if (unlikely(ret))
+ goto error;
+
+ ret = drm_exec_prepare_obj(&ctx->exec, &bo->tbo.base, 1);
+ drm_exec_retry_on_contention(&ctx->exec);
+ if (unlikely(ret))
+ goto error;
}
-
- ctx->reserved = true;
return 0;
+
+error:
+ pr_err("Failed to reserve buffers in ttm.\n");
+ drm_exec_fini(&ctx->exec);
+ return ret;
}
/**
@@ -953,63 +1200,40 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
struct amdgpu_vm *vm, enum bo_vm_match map_type,
struct bo_vm_reservation_context *ctx)
{
- struct amdgpu_bo *bo = mem->bo;
struct kfd_mem_attachment *entry;
- unsigned int i;
+ struct amdgpu_bo *bo = mem->bo;
int ret;
- ctx->reserved = false;
- ctx->n_vms = 0;
- ctx->vm_pd = NULL;
ctx->sync = &mem->sync;
+ drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+ DRM_EXEC_IGNORE_DUPLICATES, 0);
+ drm_exec_until_all_locked(&ctx->exec) {
+ ctx->n_vms = 0;
+ list_for_each_entry(entry, &mem->attachments, list) {
+ if ((vm && vm != entry->bo_va->base.vm) ||
+ (entry->is_mapped != map_type
+ && map_type != BO_VM_ALL))
+ continue;
- INIT_LIST_HEAD(&ctx->list);
- INIT_LIST_HEAD(&ctx->duplicates);
-
- list_for_each_entry(entry, &mem->attachments, list) {
- if ((vm && vm != entry->bo_va->base.vm) ||
- (entry->is_mapped != map_type
- && map_type != BO_VM_ALL))
- continue;
-
- ctx->n_vms++;
- }
-
- if (ctx->n_vms != 0) {
- ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd),
- GFP_KERNEL);
- if (!ctx->vm_pd)
- return -ENOMEM;
- }
-
- ctx->kfd_bo.priority = 0;
- ctx->kfd_bo.tv.bo = &bo->tbo;
- ctx->kfd_bo.tv.num_shared = 1;
- list_add(&ctx->kfd_bo.tv.head, &ctx->list);
-
- i = 0;
- list_for_each_entry(entry, &mem->attachments, list) {
- if ((vm && vm != entry->bo_va->base.vm) ||
- (entry->is_mapped != map_type
- && map_type != BO_VM_ALL))
- continue;
-
- amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list,
- &ctx->vm_pd[i]);
- i++;
- }
+ ret = amdgpu_vm_lock_pd(entry->bo_va->base.vm,
+ &ctx->exec, 2);
+ drm_exec_retry_on_contention(&ctx->exec);
+ if (unlikely(ret))
+ goto error;
+ ++ctx->n_vms;
+ }
- ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
- false, &ctx->duplicates);
- if (ret) {
- pr_err("Failed to reserve buffers in ttm.\n");
- kfree(ctx->vm_pd);
- ctx->vm_pd = NULL;
- return ret;
+ ret = drm_exec_prepare_obj(&ctx->exec, &bo->tbo.base, 1);
+ drm_exec_retry_on_contention(&ctx->exec);
+ if (unlikely(ret))
+ goto error;
}
-
- ctx->reserved = true;
return 0;
+
+error:
+ pr_err("Failed to reserve buffers in ttm.\n");
+ drm_exec_fini(&ctx->exec);
+ return ret;
}
/**
@@ -1030,19 +1254,12 @@ static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
if (wait)
ret = amdgpu_sync_wait(ctx->sync, intr);
- if (ctx->reserved)
- ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list);
- kfree(ctx->vm_pd);
-
+ drm_exec_fini(&ctx->exec);
ctx->sync = NULL;
-
- ctx->reserved = false;
- ctx->vm_pd = NULL;
-
return ret;
}
-static void unmap_bo_from_gpuvm(struct kgd_mem *mem,
+static int unmap_bo_from_gpuvm(struct kgd_mem *mem,
struct kfd_mem_attachment *entry,
struct amdgpu_sync *sync)
{
@@ -1050,19 +1267,27 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem,
struct amdgpu_device *adev = entry->adev;
struct amdgpu_vm *vm = bo_va->base.vm;
- amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
+ if (bo_va->queue_refcount) {
+ pr_debug("bo_va->queue_refcount %d\n", bo_va->queue_refcount);
+ return -EBUSY;
+ }
- amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
+ (void)amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
- amdgpu_sync_fence(sync, bo_va->last_pt_update);
+ /* VM entity stopped if process killed, don't clear freed pt bo */
+ if (!amdgpu_vm_ready(vm))
+ return 0;
- kfd_mem_dmaunmap_attachment(mem, entry);
+ (void)amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
+
+ (void)amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL);
+
+ return 0;
}
static int update_gpuvm_pte(struct kgd_mem *mem,
struct kfd_mem_attachment *entry,
- struct amdgpu_sync *sync,
- bool *table_freed)
+ struct amdgpu_sync *sync)
{
struct amdgpu_bo_va *bo_va = entry->bo_va;
struct amdgpu_device *adev = entry->adev;
@@ -1073,20 +1298,19 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
return ret;
/* Update the page tables */
- ret = amdgpu_vm_bo_update(adev, bo_va, false, table_freed);
+ ret = amdgpu_vm_bo_update(adev, bo_va, false);
if (ret) {
pr_err("amdgpu_vm_bo_update failed\n");
return ret;
}
- return amdgpu_sync_fence(sync, bo_va->last_pt_update);
+ return amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL);
}
static int map_bo_to_gpuvm(struct kgd_mem *mem,
struct kfd_mem_attachment *entry,
struct amdgpu_sync *sync,
- bool no_update_pte,
- bool *table_freed)
+ bool no_update_pte)
{
int ret;
@@ -1103,7 +1327,7 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem,
if (no_update_pte)
return 0;
- ret = update_gpuvm_pte(mem, entry, sync, table_freed);
+ ret = update_gpuvm_pte(mem, entry, sync);
if (ret) {
pr_err("update_gpuvm_pte() failed\n");
goto update_gpuvm_pte_failed;
@@ -1113,35 +1337,19 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem,
update_gpuvm_pte_failed:
unmap_bo_from_gpuvm(mem, entry, sync);
+ kfd_mem_dmaunmap_attachment(mem, entry);
return ret;
}
-static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size)
-{
- struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
-
- if (!sg)
- return NULL;
- if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
- kfree(sg);
- return NULL;
- }
- sg->sgl->dma_address = addr;
- sg->sgl->length = size;
-#ifdef CONFIG_NEED_SG_DMA_LENGTH
- sg->sgl->dma_length = size;
-#endif
- return sg;
-}
-
-static int process_validate_vms(struct amdkfd_process_info *process_info)
+static int process_validate_vms(struct amdkfd_process_info *process_info,
+ struct ww_acquire_ctx *ticket)
{
struct amdgpu_vm *peer_vm;
int ret;
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node) {
- ret = vm_validate_pt_pd_bos(peer_vm);
+ ret = vm_validate_pt_pd_bos(peer_vm, ticket);
if (ret)
return ret;
}
@@ -1197,6 +1405,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
return -ENOMEM;
mutex_init(&info->lock);
+ mutex_init(&info->notifier_lock);
INIT_LIST_HEAD(&info->vm_list_head);
INIT_LIST_HEAD(&info->kfd_bo_list);
INIT_LIST_HEAD(&info->userptr_valid_list);
@@ -1213,12 +1422,10 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
}
info->pid = get_task_pid(current->group_leader, PIDTYPE_PID);
- atomic_set(&info->evicted_bos, 0);
INIT_DELAYED_WORK(&info->restore_userptr_work,
amdgpu_amdkfd_restore_userptr_worker);
*process_info = info;
- *ef = dma_fence_get(&info->eviction_fence->base);
}
vm->process_info = *process_info;
@@ -1227,7 +1434,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
ret = amdgpu_bo_reserve(vm->root.bo, true);
if (ret)
goto reserve_pd_fail;
- ret = vm_validate_pt_pd_bos(vm);
+ ret = vm_validate_pt_pd_bos(vm, NULL);
if (ret) {
pr_err("validate_pt_pd_bos() failed\n");
goto validate_pd_fail;
@@ -1236,11 +1443,12 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
AMDGPU_FENCE_OWNER_KFD, false);
if (ret)
goto wait_pd_fail;
- ret = dma_resv_reserve_shared(vm->root.bo->tbo.base.resv, 1);
+ ret = dma_resv_reserve_fences(vm->root.bo->tbo.base.resv, 1);
if (ret)
goto reserve_shared_fail;
- amdgpu_bo_fence(vm->root.bo,
- &vm->process_info->eviction_fence->base, true);
+ dma_resv_add_fence(vm->root.bo->tbo.base.resv,
+ &vm->process_info->eviction_fence->base,
+ DMA_RESV_USAGE_BOOKKEEP);
amdgpu_bo_unreserve(vm->root.bo);
/* Update process info */
@@ -1248,6 +1456,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
list_add_tail(&vm->vm_list_node,
&(vm->process_info->vm_list_head));
vm->process_info->n_vms++;
+ if (ef)
+ *ef = dma_fence_get(&vm->process_info->eviction_fence->base);
mutex_unlock(&vm->process_info->lock);
return 0;
@@ -1259,54 +1469,99 @@ validate_pd_fail:
reserve_pd_fail:
vm->process_info = NULL;
if (info) {
- /* Two fence references: one in info and one in *ef */
dma_fence_put(&info->eviction_fence->base);
- dma_fence_put(*ef);
- *ef = NULL;
*process_info = NULL;
put_pid(info->pid);
create_evict_fence_fail:
mutex_destroy(&info->lock);
+ mutex_destroy(&info->notifier_lock);
kfree(info);
}
return ret;
}
-int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
- struct file *filp, u32 pasid,
+/**
+ * amdgpu_amdkfd_gpuvm_pin_bo() - Pins a BO using following criteria
+ * @bo: Handle of buffer object being pinned
+ * @domain: Domain into which BO should be pinned
+ *
+ * - USERPTR BOs are UNPINNABLE and will return error
+ * - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their
+ * PIN count incremented. It is valid to PIN a BO multiple times
+ *
+ * Return: ZERO if successful in pinning, Non-Zero in case of error.
+ */
+static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain)
+{
+ int ret = 0;
+
+ ret = amdgpu_bo_reserve(bo, false);
+ if (unlikely(ret))
+ return ret;
+
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) {
+ /*
+ * If bo is not contiguous on VRAM, move to system memory first to ensure
+ * we can get contiguous VRAM space after evicting other BOs.
+ */
+ if (!(bo->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) {
+ struct ttm_operation_ctx ctx = { true, false };
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (unlikely(ret)) {
+ pr_debug("validate bo 0x%p to GTT failed %d\n", &bo->tbo, ret);
+ goto out;
+ }
+ }
+ }
+
+ ret = amdgpu_bo_pin(bo, domain);
+ if (ret)
+ pr_err("Error in Pinning BO to domain: %d\n", domain);
+
+ amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
+out:
+ amdgpu_bo_unreserve(bo);
+ return ret;
+}
+
+/**
+ * amdgpu_amdkfd_gpuvm_unpin_bo() - Unpins BO using following criteria
+ * @bo: Handle of buffer object being unpinned
+ *
+ * - Is a illegal request for USERPTR BOs and is ignored
+ * - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their
+ * PIN count decremented. Calls to UNPIN must balance calls to PIN
+ */
+static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo)
+{
+ int ret = 0;
+
+ ret = amdgpu_bo_reserve(bo, false);
+ if (unlikely(ret))
+ return;
+
+ amdgpu_bo_unpin(bo);
+ amdgpu_bo_unreserve(bo);
+}
+
+int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
+ struct amdgpu_vm *avm,
void **process_info,
struct dma_fence **ef)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- struct amdgpu_fpriv *drv_priv;
- struct amdgpu_vm *avm;
int ret;
- ret = amdgpu_file_to_fpriv(filp, &drv_priv);
- if (ret)
- return ret;
- avm = &drv_priv->vm;
-
/* Already a compute VM? */
if (avm->process_info)
return -EINVAL;
- /* Free the original amdgpu allocated pasid,
- * will be replaced with kfd allocated pasid.
- */
- if (avm->pasid) {
- amdgpu_pasid_free(avm->pasid);
- amdgpu_vm_set_pasid(adev, avm, 0);
- }
-
/* Convert VM into a compute VM */
ret = amdgpu_vm_make_compute(adev, avm);
if (ret)
return ret;
- ret = amdgpu_vm_set_pasid(adev, avm, pasid);
- if (ret)
- return ret;
/* Initialize KFD part of the VM and process info */
ret = init_kfd_vm(avm, process_info, ef);
if (ret)
@@ -1321,16 +1576,10 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
{
struct amdkfd_process_info *process_info = vm->process_info;
- struct amdgpu_bo *pd = vm->root.bo;
if (!process_info)
return;
- /* Release eviction fence from PD */
- amdgpu_bo_reserve(pd, false);
- amdgpu_bo_fence(pd, NULL, false);
- amdgpu_bo_unreserve(pd);
-
/* Update process info */
mutex_lock(&process_info->lock);
process_info->n_vms--;
@@ -1349,31 +1598,11 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
cancel_delayed_work_sync(&process_info->restore_userptr_work);
put_pid(process_info->pid);
mutex_destroy(&process_info->lock);
+ mutex_destroy(&process_info->notifier_lock);
kfree(process_info);
}
}
-void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *drm_priv)
-{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- struct amdgpu_vm *avm;
-
- if (WARN_ON(!kgd || !drm_priv))
- return;
-
- avm = drm_priv_to_vm(drm_priv);
-
- pr_debug("Releasing process vm %p\n", avm);
-
- /* The original pasid of amdgpu vm has already been
- * released during making a amdgpu vm to a compute vm
- * The current pasid is managed by kfd and will be
- * released on kfd process destroy. Set amdgpu pasid
- * to 0 to avoid duplicate release.
- */
- amdgpu_vm_release_compute(adev, avm);
-}
-
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv)
{
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
@@ -1385,19 +1614,98 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv)
return avm->pd_phys_addr;
}
+void amdgpu_amdkfd_block_mmu_notifications(void *p)
+{
+ struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
+
+ mutex_lock(&pinfo->lock);
+ WRITE_ONCE(pinfo->block_mmu_notifications, true);
+ mutex_unlock(&pinfo->lock);
+}
+
+int amdgpu_amdkfd_criu_resume(void *p)
+{
+ int ret = 0;
+ struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
+
+ mutex_lock(&pinfo->lock);
+ pr_debug("scheduling work\n");
+ mutex_lock(&pinfo->notifier_lock);
+ pinfo->evicted_bos++;
+ mutex_unlock(&pinfo->notifier_lock);
+ if (!READ_ONCE(pinfo->block_mmu_notifications)) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ WRITE_ONCE(pinfo->block_mmu_notifications, false);
+ queue_delayed_work(system_freezable_wq,
+ &pinfo->restore_userptr_work, 0);
+
+out_unlock:
+ mutex_unlock(&pinfo->lock);
+ return ret;
+}
+
+size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
+ uint8_t xcp_id)
+{
+ uint64_t reserved_for_pt =
+ ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ uint64_t reserved_for_ras = (con ? con->reserved_pages_in_bytes : 0);
+ ssize_t available;
+ uint64_t vram_available, system_mem_available, ttm_mem_available;
+
+ spin_lock(&kfd_mem_limit.mem_limit_lock);
+ if (adev->apu_prefer_gtt && !adev->gmc.is_app_apu)
+ vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
+ - adev->kfd.vram_used_aligned[xcp_id];
+ else
+ vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
+ - adev->kfd.vram_used_aligned[xcp_id]
+ - atomic64_read(&adev->vram_pin_size)
+ - reserved_for_pt
+ - reserved_for_ras;
+
+ if (adev->apu_prefer_gtt) {
+ system_mem_available = no_system_mem_limit ?
+ kfd_mem_limit.max_system_mem_limit :
+ kfd_mem_limit.max_system_mem_limit -
+ kfd_mem_limit.system_mem_used;
+
+ ttm_mem_available = kfd_mem_limit.max_ttm_mem_limit -
+ kfd_mem_limit.ttm_mem_used;
+
+ available = min3(system_mem_available, ttm_mem_available,
+ vram_available);
+ available = ALIGN_DOWN(available, PAGE_SIZE);
+ } else {
+ available = ALIGN_DOWN(vram_available, VRAM_AVAILABLITY_ALIGN);
+ }
+
+ spin_unlock(&kfd_mem_limit.mem_limit_lock);
+
+ if (available < 0)
+ available = 0;
+
+ return available;
+}
+
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
- struct kgd_dev *kgd, uint64_t va, uint64_t size,
+ struct amdgpu_device *adev, uint64_t va, uint64_t size,
void *drm_priv, struct kgd_mem **mem,
- uint64_t *offset, uint32_t flags)
+ uint64_t *offset, uint32_t flags, bool criu_resume)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
+ struct amdgpu_fpriv *fpriv = container_of(avm, struct amdgpu_fpriv, vm);
enum ttm_bo_type bo_type = ttm_bo_type_device;
struct sg_table *sg = NULL;
uint64_t user_addr = 0;
struct amdgpu_bo *bo;
struct drm_gem_object *gobj = NULL;
u32 domain, alloc_domain;
+ uint64_t aligned_size;
+ int8_t xcp_id = -1;
u64 alloc_flags;
int ret;
@@ -1406,34 +1714,54 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
*/
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
- alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
- alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
+
+ if (adev->apu_prefer_gtt) {
+ domain = AMDGPU_GEM_DOMAIN_GTT;
+ alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
+ alloc_flags = 0;
+ } else {
+ alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+ alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0;
+
+ /* For contiguous VRAM allocation */
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_CONTIGUOUS)
+ alloc_flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ }
+ xcp_id = fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION ?
+ 0 : fpriv->xcp_id;
} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_flags = 0;
- } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+ } else {
domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
alloc_flags = AMDGPU_GEM_CREATE_PREEMPTIBLE;
- if (!offset || !*offset)
- return -EINVAL;
- user_addr = untagged_addr(*offset);
- } else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
- KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
- domain = AMDGPU_GEM_DOMAIN_GTT;
- alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
- bo_type = ttm_bo_type_sg;
- alloc_flags = 0;
- if (size > UINT_MAX)
+
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+ if (!offset || !*offset)
+ return -EINVAL;
+ user_addr = untagged_addr(*offset);
+ } else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+ KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
+ bo_type = ttm_bo_type_sg;
+ if (size > UINT_MAX)
+ return -EINVAL;
+ sg = create_sg_table(*offset, size);
+ if (!sg)
+ return -ENOMEM;
+ } else {
return -EINVAL;
- sg = create_doorbell_sg(*offset, size);
- if (!sg)
- return -ENOMEM;
- } else {
- return -EINVAL;
+ }
}
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT)
+ alloc_flags |= AMDGPU_GEM_CREATE_COHERENT;
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT)
+ alloc_flags |= AMDGPU_GEM_CREATE_EXT_COHERENT;
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED)
+ alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED;
+
*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
if (!*mem) {
ret = -ENOMEM;
@@ -1448,23 +1776,26 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
* the memory.
*/
if ((*mem)->aql_queue)
- size = size >> 1;
+ size >>= 1;
+ aligned_size = PAGE_ALIGN(size);
(*mem)->alloc_flags = flags;
amdgpu_sync_create(&(*mem)->sync);
- ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg);
+ ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags,
+ xcp_id);
if (ret) {
pr_debug("Insufficient memory\n");
goto err_reserve_limit;
}
- pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
- va, size, domain_string(alloc_domain));
+ pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s xcp_id %d\n",
+ va, (*mem)->aql_queue ? size << 1 : size,
+ domain_string(alloc_domain), xcp_id);
- ret = amdgpu_gem_object_create(adev, size, 1, alloc_domain, alloc_flags,
- bo_type, NULL, &gobj);
+ ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags,
+ bo_type, NULL, &gobj, xcp_id + 1);
if (ret) {
pr_debug("Failed to create BO on domain %s. ret %d\n",
domain_string(alloc_domain), ret);
@@ -1475,6 +1806,9 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
pr_debug("Failed to allow vma node access. ret %d\n", ret);
goto err_node_allow;
}
+ ret = drm_gem_handle_create(adev->kfd.client.file, gobj, &(*mem)->gem_handle);
+ if (ret)
+ goto err_gem_handle_create;
bo = gem_to_amdgpu_bo(gobj);
if (bo_type == ttm_bo_type_sg) {
bo->tbo.sg = sg;
@@ -1489,12 +1823,32 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
(*mem)->domain = domain;
(*mem)->mapped_to_gpu_memory = 0;
(*mem)->process_info = avm->process_info;
+
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
if (user_addr) {
- ret = init_user_pages(*mem, user_addr);
+ pr_debug("creating userptr BO for user_addr = %llx\n", user_addr);
+ ret = init_user_pages(*mem, user_addr, criu_resume);
if (ret)
goto allocate_init_user_pages_failed;
+ } else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+ KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
+ ret = amdgpu_amdkfd_gpuvm_pin_bo(bo, AMDGPU_GEM_DOMAIN_GTT);
+ if (ret) {
+ pr_err("Pinning MMIO/DOORBELL BO during ALLOC FAILED\n");
+ goto err_pin_bo;
+ }
+ bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+ bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+ } else {
+ mutex_lock(&avm->process_info->lock);
+ if (avm->process_info->eviction_fence &&
+ !dma_fence_is_signaled(&avm->process_info->eviction_fence->base))
+ ret = amdgpu_amdkfd_bo_validate_and_fence(bo, domain,
+ &avm->process_info->eviction_fence->base);
+ mutex_unlock(&avm->process_info->lock);
+ if (ret)
+ goto err_validate_bo;
}
if (offset)
@@ -1503,14 +1857,19 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
return 0;
allocate_init_user_pages_failed:
+err_pin_bo:
+err_validate_bo:
remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
+ drm_gem_handle_delete(adev->kfd.client.file, (*mem)->gem_handle);
+err_gem_handle_create:
drm_vma_node_revoke(&gobj->vma_node, drm_priv);
err_node_allow:
/* Don't unreserve system mem limit twice */
goto err_reserve_limit;
err_bo_create:
- unreserve_mem_limit(adev, size, alloc_domain, !!sg);
+ amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id);
err_reserve_limit:
+ amdgpu_sync_free(&(*mem)->sync);
mutex_destroy(&(*mem)->lock);
if (gobj)
drm_gem_object_put(gobj);
@@ -1525,19 +1884,27 @@ err:
}
int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
- struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv,
+ struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv,
uint64_t *size)
{
struct amdkfd_process_info *process_info = mem->process_info;
unsigned long bo_size = mem->bo->tbo.base.size;
+ bool use_release_notifier = (mem->bo->kfd_bo == mem);
struct kfd_mem_attachment *entry, *tmp;
struct bo_vm_reservation_context ctx;
- struct ttm_validate_buffer *bo_list_entry;
unsigned int mapped_to_gpu_memory;
int ret;
bool is_imported = false;
mutex_lock(&mem->lock);
+
+ /* Unpin MMIO/DOORBELL BO's that were pinned during allocation */
+ if (mem->alloc_flags &
+ (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+ KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
+ amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo);
+ }
+
mapped_to_gpu_memory = mem->mapped_to_gpu_memory;
is_imported = mem->is_imported;
mutex_unlock(&mem->lock);
@@ -1552,30 +1919,32 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
}
/* Make sure restore workers don't access the BO any more */
- bo_list_entry = &mem->validate_list;
mutex_lock(&process_info->lock);
- list_del(&bo_list_entry->head);
+ list_del(&mem->validate_list);
mutex_unlock(&process_info->lock);
- /* No more MMU notifiers */
- amdgpu_mn_unregister(mem->bo);
+ /* Cleanup user pages and MMU notifiers */
+ if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
+ amdgpu_hmm_unregister(mem->bo);
+ mutex_lock(&process_info->notifier_lock);
+ amdgpu_hmm_range_free(mem->range);
+ mutex_unlock(&process_info->notifier_lock);
+ }
ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
if (unlikely(ret))
return ret;
- /* The eviction fence should be removed by the last unmap.
- * TODO: Log an error condition if the bo still has the eviction fence
- * attached
- */
amdgpu_amdkfd_remove_eviction_fence(mem->bo,
process_info->eviction_fence);
pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
mem->va + bo_size * (1 + mem->aql_queue));
/* Remove from VM internal data structures */
- list_for_each_entry_safe(entry, tmp, &mem->attachments, list)
+ list_for_each_entry_safe(entry, tmp, &mem->attachments, list) {
+ kfd_mem_dmaunmap_attachment(mem, entry);
kfd_mem_detach(entry);
+ }
ret = unreserve_bo_and_vms(&ctx, false, false);
@@ -1591,11 +1960,12 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
}
/* Update the size of the BO being freed if it was allocated from
- * VRAM and is not imported.
+ * VRAM and is not imported. For APP APU VRAM allocations are done
+ * in GTT domain
*/
if (size) {
- if ((mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) &&
- (!is_imported))
+ if (!is_imported &&
+ mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
*size = bo_size;
else
*size = 0;
@@ -1603,8 +1973,11 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
/* Free the BO*/
drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
- if (mem->dmabuf)
+ drm_gem_handle_delete(adev->kfd.client.file, mem->gem_handle);
+ if (mem->dmabuf) {
dma_buf_put(mem->dmabuf);
+ mem->dmabuf = NULL;
+ }
mutex_destroy(&mem->lock);
/* If this releases the last reference, it will end up calling
@@ -1613,14 +1986,20 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
*/
drm_gem_object_put(&mem->bo->tbo.base);
+ /*
+ * For kgd_mem allocated in amdgpu_amdkfd_gpuvm_import_dmabuf(),
+ * explicitly free it here.
+ */
+ if (!use_release_notifier)
+ kfree(mem);
+
return ret;
}
int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
- struct kgd_dev *kgd, struct kgd_mem *mem,
- void *drm_priv, bool *table_freed)
+ struct amdgpu_device *adev, struct kgd_mem *mem,
+ void *drm_priv)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
int ret;
struct amdgpu_bo *bo;
@@ -1642,14 +2021,14 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
*/
mutex_lock(&mem->process_info->lock);
- /* Lock mmap-sem. If we find an invalid userptr BO, we can be
+ /* Lock notifier lock. If we find an invalid userptr BO, we can be
* sure that the MMU notifier is no longer running
* concurrently and the queues are actually stopped
*/
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
- mmap_write_lock(current->mm);
- is_invalid_userptr = atomic_read(&mem->invalid);
- mmap_write_unlock(current->mm);
+ mutex_lock(&mem->process_info->notifier_lock);
+ is_invalid_userptr = !!mem->invalid;
+ mutex_unlock(&mem->process_info->notifier_lock);
}
mutex_lock(&mem->lock);
@@ -1681,23 +2060,10 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
is_invalid_userptr = true;
- ret = vm_validate_pt_pd_bos(avm);
+ ret = vm_validate_pt_pd_bos(avm, NULL);
if (unlikely(ret))
goto out_unreserve;
- if (mem->mapped_to_gpu_memory == 0 &&
- !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
- /* Validate BO only once. The eviction fence gets added to BO
- * the first time it is mapped. Validate will wait for all
- * background evictions to complete.
- */
- ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
- if (ret) {
- pr_debug("Validate failed\n");
- goto out_unreserve;
- }
- }
-
list_for_each_entry(entry, &mem->attachments, list) {
if (entry->bo_va->base.vm != avm || entry->is_mapped)
continue;
@@ -1706,7 +2072,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
entry->va, entry->va + bo_size, entry);
ret = map_bo_to_gpuvm(mem, entry, ctx.sync,
- is_invalid_userptr, table_freed);
+ is_invalid_userptr);
if (ret) {
pr_err("Failed to map bo to gpuvm\n");
goto out_unreserve;
@@ -1724,18 +2090,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
mem->mapped_to_gpu_memory);
}
- if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count)
- amdgpu_bo_fence(bo,
- &avm->process_info->eviction_fence->base,
- true);
ret = unreserve_bo_and_vms(&ctx, false, false);
- /* Only apply no TLB flush on Aldebaran to
- * workaround regressions on other Asics.
- */
- if (table_freed && (adev->asic_type != CHIP_ALDEBARAN))
- *table_freed = true;
-
goto out;
out_unreserve:
@@ -1746,11 +2102,41 @@ out:
return ret;
}
+int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv)
+{
+ struct kfd_mem_attachment *entry;
+ struct amdgpu_vm *vm;
+ int ret;
+
+ vm = drm_priv_to_vm(drm_priv);
+
+ mutex_lock(&mem->lock);
+
+ ret = amdgpu_bo_reserve(mem->bo, true);
+ if (ret)
+ goto out;
+
+ list_for_each_entry(entry, &mem->attachments, list) {
+ if (entry->bo_va->base.vm != vm)
+ continue;
+ if (entry->bo_va->base.bo->tbo.ttm &&
+ !entry->bo_va->base.bo->tbo.ttm->sg)
+ continue;
+
+ kfd_mem_dmaunmap_attachment(mem, entry);
+ }
+
+ amdgpu_bo_unreserve(mem->bo);
+out:
+ mutex_unlock(&mem->lock);
+
+ return ret;
+}
+
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
- struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv)
+ struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv)
{
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
- struct amdkfd_process_info *process_info = avm->process_info;
unsigned long bo_size = mem->bo->tbo.base.size;
struct kfd_mem_attachment *entry;
struct bo_vm_reservation_context ctx;
@@ -1767,7 +2153,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
goto unreserve_out;
}
- ret = vm_validate_pt_pd_bos(avm);
+ ret = vm_validate_pt_pd_bos(avm, NULL);
if (unlikely(ret))
goto unreserve_out;
@@ -1783,7 +2169,10 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n",
entry->va, entry->va + bo_size, entry);
- unmap_bo_from_gpuvm(mem, entry, ctx.sync);
+ ret = unmap_bo_from_gpuvm(mem, entry, ctx.sync);
+ if (ret)
+ goto unreserve_out;
+
entry->is_mapped = false;
mem->mapped_to_gpu_memory--;
@@ -1791,15 +2180,6 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
mem->mapped_to_gpu_memory);
}
- /* If BO is unmapped from all VMs, unfence it. It can be evicted if
- * required.
- */
- if (mem->mapped_to_gpu_memory == 0 &&
- !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) &&
- !mem->bo->tbo.pin_count)
- amdgpu_amdkfd_remove_eviction_fence(mem->bo,
- process_info->eviction_fence);
-
unreserve_out:
unreserve_bo_and_vms(&ctx, false, false);
out:
@@ -1808,7 +2188,7 @@ out:
}
int amdgpu_amdkfd_gpuvm_sync_memory(
- struct kgd_dev *kgd, struct kgd_mem *mem, bool intr)
+ struct amdgpu_device *adev, struct kgd_mem *mem, bool intr)
{
struct amdgpu_sync sync;
int ret;
@@ -1824,8 +2204,69 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
return ret;
}
-int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
- struct kgd_mem *mem, void **kptr, uint64_t *size)
+/**
+ * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment reference count
+ * @bo: Buffer object to be mapped
+ * @bo_gart: Return bo reference
+ *
+ * Before return, bo reference count is incremented. To release the reference and unpin/
+ * unmap the BO, call amdgpu_amdkfd_free_gtt_mem.
+ */
+int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo, struct amdgpu_bo **bo_gart)
+{
+ int ret;
+
+ ret = amdgpu_bo_reserve(bo, true);
+ if (ret) {
+ pr_err("Failed to reserve bo. ret %d\n", ret);
+ goto err_reserve_bo_failed;
+ }
+
+ ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+ if (ret) {
+ pr_err("Failed to pin bo. ret %d\n", ret);
+ goto err_pin_bo_failed;
+ }
+
+ ret = amdgpu_ttm_alloc_gart(&bo->tbo);
+ if (ret) {
+ pr_err("Failed to bind bo to GART. ret %d\n", ret);
+ goto err_map_bo_gart_failed;
+ }
+
+ amdgpu_amdkfd_remove_eviction_fence(
+ bo, bo->vm_bo->vm->process_info->eviction_fence);
+
+ amdgpu_bo_unreserve(bo);
+
+ *bo_gart = amdgpu_bo_ref(bo);
+
+ return 0;
+
+err_map_bo_gart_failed:
+ amdgpu_bo_unpin(bo);
+err_pin_bo_failed:
+ amdgpu_bo_unreserve(bo);
+err_reserve_bo_failed:
+
+ return ret;
+}
+
+/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Map a GTT BO for kernel CPU access
+ *
+ * @mem: Buffer object to be mapped for CPU access
+ * @kptr[out]: pointer in kernel CPU address space
+ * @size[out]: size of the buffer
+ *
+ * Pins the BO and maps it for kernel CPU access. The eviction fence is removed
+ * from the BO, since pinned BOs cannot be evicted. The bo must remain on the
+ * validate_list, so the GPU mapping can be restored after a page table was
+ * evicted.
+ *
+ * Return: 0 on success, error code on failure
+ */
+int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
+ void **kptr, uint64_t *size)
{
int ret;
struct amdgpu_bo *bo = mem->bo;
@@ -1835,9 +2276,6 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
return -EINVAL;
}
- /* delete kgd_mem from kfd_bo_list to avoid re-validating
- * this BO in BO's restoring after eviction.
- */
mutex_lock(&mem->process_info->lock);
ret = amdgpu_bo_reserve(bo, true);
@@ -1860,7 +2298,6 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
amdgpu_amdkfd_remove_eviction_fence(
bo, mem->process_info->eviction_fence);
- list_del_init(&mem->validate_list.head);
if (size)
*size = amdgpu_bo_size(bo);
@@ -1880,51 +2317,45 @@ bo_reserve_failed:
return ret;
}
-void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_dev *kgd, struct kgd_mem *mem)
+/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Unmap a GTT BO for kernel CPU access
+ *
+ * @mem: Buffer object to be unmapped for CPU access
+ *
+ * Removes the kernel CPU mapping and unpins the BO. It does not restore the
+ * eviction fence, so this function should only be used for cleanup before the
+ * BO is destroyed.
+ */
+void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem)
{
struct amdgpu_bo *bo = mem->bo;
- amdgpu_bo_reserve(bo, true);
+ (void)amdgpu_bo_reserve(bo, true);
amdgpu_bo_kunmap(bo);
amdgpu_bo_unpin(bo);
amdgpu_bo_unreserve(bo);
}
-int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
- struct kfd_vm_fault_info *mem)
+int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
+ struct kfd_vm_fault_info *mem)
{
- struct amdgpu_device *adev;
-
- adev = (struct amdgpu_device *)kgd;
- if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) {
+ if (atomic_read_acquire(&adev->gmc.vm_fault_info_updated) == 1) {
*mem = *adev->gmc.vm_fault_info;
- mb();
- atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+ atomic_set_release(&adev->gmc.vm_fault_info_updated, 0);
}
return 0;
}
-int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
- struct dma_buf *dma_buf,
- uint64_t va, void *drm_priv,
- struct kgd_mem **mem, uint64_t *size,
- uint64_t *mmap_offset)
+static int import_obj_create(struct amdgpu_device *adev,
+ struct dma_buf *dma_buf,
+ struct drm_gem_object *obj,
+ uint64_t va, void *drm_priv,
+ struct kgd_mem **mem, uint64_t *size,
+ uint64_t *mmap_offset)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
- struct drm_gem_object *obj;
struct amdgpu_bo *bo;
int ret;
- if (dma_buf->ops != &amdgpu_dmabuf_ops)
- /* Can't handle non-graphics buffers */
- return -EINVAL;
-
- obj = dma_buf->priv;
- if (drm_to_adev(obj->dev) != adev)
- /* Can't handle buffers from other devices */
- return -EINVAL;
-
bo = gem_to_amdgpu_bo(obj);
if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT)))
@@ -1936,10 +2367,8 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
return -ENOMEM;
ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
- if (ret) {
- kfree(mem);
- return ret;
- }
+ if (ret)
+ goto err_free_mem;
if (size)
*size = amdgpu_bo_size(bo);
@@ -1956,48 +2385,131 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
| KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
- drm_gem_object_get(&bo->tbo.base);
+ get_dma_buf(dma_buf);
+ (*mem)->dmabuf = dma_buf;
(*mem)->bo = bo;
(*mem)->va = va;
- (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
- AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
+ (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) &&
+ !adev->apu_prefer_gtt ?
+ AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
+
(*mem)->mapped_to_gpu_memory = 0;
(*mem)->process_info = avm->process_info;
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
amdgpu_sync_create(&(*mem)->sync);
(*mem)->is_imported = true;
+ mutex_lock(&avm->process_info->lock);
+ if (avm->process_info->eviction_fence &&
+ !dma_fence_is_signaled(&avm->process_info->eviction_fence->base))
+ ret = amdgpu_amdkfd_bo_validate_and_fence(bo, (*mem)->domain,
+ &avm->process_info->eviction_fence->base);
+ mutex_unlock(&avm->process_info->lock);
+ if (ret)
+ goto err_remove_mem;
+
+ return 0;
+
+err_remove_mem:
+ remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
+ drm_vma_node_revoke(&obj->vma_node, drm_priv);
+err_free_mem:
+ kfree(*mem);
+ return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd,
+ uint64_t va, void *drm_priv,
+ struct kgd_mem **mem, uint64_t *size,
+ uint64_t *mmap_offset)
+{
+ struct drm_gem_object *obj;
+ uint32_t handle;
+ int ret;
+
+ ret = drm_gem_prime_fd_to_handle(&adev->ddev, adev->kfd.client.file, fd,
+ &handle);
+ if (ret)
+ return ret;
+ obj = drm_gem_object_lookup(adev->kfd.client.file, handle);
+ if (!obj) {
+ ret = -EINVAL;
+ goto err_release_handle;
+ }
+
+ ret = import_obj_create(adev, obj->dma_buf, obj, va, drm_priv, mem, size,
+ mmap_offset);
+ if (ret)
+ goto err_put_obj;
+
+ (*mem)->gem_handle = handle;
+
return 0;
+
+err_put_obj:
+ drm_gem_object_put(obj);
+err_release_handle:
+ drm_gem_handle_delete(adev->kfd.client.file, handle);
+ return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem,
+ struct dma_buf **dma_buf)
+{
+ int ret;
+
+ mutex_lock(&mem->lock);
+ ret = kfd_mem_export_dmabuf(mem);
+ if (ret)
+ goto out;
+
+ get_dma_buf(mem->dmabuf);
+ *dma_buf = mem->dmabuf;
+out:
+ mutex_unlock(&mem->lock);
+ return ret;
}
/* Evict a userptr BO by stopping the queues if necessary
*
* Runs in MMU notifier, may be in RECLAIM_FS context. This means it
* cannot do any memory allocations, and cannot take any locks that
- * are held elsewhere while allocating memory. Therefore this is as
- * simple as possible, using atomic counters.
+ * are held elsewhere while allocating memory.
*
* It doesn't do anything to the BO itself. The real work happens in
* restore, where we get updated page addresses. This function only
* ensures that GPU access to the BO is stopped.
*/
-int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
- struct mm_struct *mm)
+int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
+ unsigned long cur_seq, struct kgd_mem *mem)
{
struct amdkfd_process_info *process_info = mem->process_info;
- int evicted_bos;
int r = 0;
- atomic_inc(&mem->invalid);
- evicted_bos = atomic_inc_return(&process_info->evicted_bos);
- if (evicted_bos == 1) {
+ /* Do not process MMU notifications during CRIU restore until
+ * KFD_CRIU_OP_RESUME IOCTL is received
+ */
+ if (READ_ONCE(process_info->block_mmu_notifications))
+ return 0;
+
+ mutex_lock(&process_info->notifier_lock);
+ mmu_interval_set_seq(mni, cur_seq);
+
+ mem->invalid++;
+ if (++process_info->evicted_bos == 1) {
/* First eviction, stop the queues */
- r = kgd2kfd_quiesce_mm(mm);
- if (r)
+ r = kgd2kfd_quiesce_mm(mni->mm,
+ KFD_QUEUE_EVICTION_TRIGGER_USERPTR);
+
+ if (r && r != -ESRCH)
pr_err("Failed to quiesce KFD\n");
- schedule_delayed_work(&process_info->restore_userptr_work,
- msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+
+ if (r != -ESRCH)
+ queue_delayed_work(system_freezable_wq,
+ &process_info->restore_userptr_work,
+ msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
}
+ mutex_unlock(&process_info->notifier_lock);
return r;
}
@@ -2014,52 +2526,63 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
struct kgd_mem *mem, *tmp_mem;
struct amdgpu_bo *bo;
struct ttm_operation_ctx ctx = { false, false };
- int invalid, ret;
+ uint32_t invalid;
+ int ret = 0;
- /* Move all invalidated BOs to the userptr_inval_list and
- * release their user pages by migration to the CPU domain
- */
+ mutex_lock(&process_info->notifier_lock);
+
+ /* Move all invalidated BOs to the userptr_inval_list */
list_for_each_entry_safe(mem, tmp_mem,
&process_info->userptr_valid_list,
- validate_list.head) {
- if (!atomic_read(&mem->invalid))
- continue; /* BO is still valid */
-
- bo = mem->bo;
-
- if (amdgpu_bo_reserve(bo, true))
- return -EAGAIN;
- amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
- ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
- amdgpu_bo_unreserve(bo);
- if (ret) {
- pr_err("%s: Failed to invalidate userptr BO\n",
- __func__);
- return -EAGAIN;
- }
-
- list_move_tail(&mem->validate_list.head,
- &process_info->userptr_inval_list);
- }
-
- if (list_empty(&process_info->userptr_inval_list))
- return 0; /* All evicted userptr BOs were freed */
+ validate_list)
+ if (mem->invalid)
+ list_move_tail(&mem->validate_list,
+ &process_info->userptr_inval_list);
/* Go through userptr_inval_list and update any invalid user_pages */
list_for_each_entry(mem, &process_info->userptr_inval_list,
- validate_list.head) {
- invalid = atomic_read(&mem->invalid);
+ validate_list) {
+ invalid = mem->invalid;
if (!invalid)
/* BO hasn't been invalidated since the last
- * revalidation attempt. Keep its BO list.
+ * revalidation attempt. Keep its page list.
*/
continue;
bo = mem->bo;
+ amdgpu_hmm_range_free(mem->range);
+ mem->range = NULL;
+
+ /* BO reservations and getting user pages (hmm_range_fault)
+ * must happen outside the notifier lock
+ */
+ mutex_unlock(&process_info->notifier_lock);
+
+ /* Move the BO to system (CPU) domain if necessary to unmap
+ * and free the SG table
+ */
+ if (bo->tbo.resource->mem_type != TTM_PL_SYSTEM) {
+ if (amdgpu_bo_reserve(bo, true))
+ return -EAGAIN;
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ amdgpu_bo_unreserve(bo);
+ if (ret) {
+ pr_err("%s: Failed to invalidate userptr BO\n",
+ __func__);
+ return -EAGAIN;
+ }
+ }
+
+ mem->range = amdgpu_hmm_range_alloc(NULL);
+ if (unlikely(!mem->range))
+ return -ENOMEM;
/* Get updated user pages */
- ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
+ ret = amdgpu_ttm_tt_get_user_pages(bo, mem->range);
if (ret) {
+ amdgpu_hmm_range_free(mem->range);
+ mem->range = NULL;
pr_debug("Failed %d to get user pages\n", ret);
/* Return -EFAULT bad address error as success. It will
@@ -2071,86 +2594,100 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
*/
if (ret != -EFAULT)
return ret;
- } else {
- /*
- * FIXME: Cannot ignore the return code, must hold
- * notifier_lock
+ /* If applications unmap memory before destroying the userptr
+ * from the KFD, trigger a segmentation fault in VM debug mode.
*/
- amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+ if (amdgpu_ttm_adev(bo->tbo.bdev)->debug_vm_userptr) {
+ struct kfd_process *p;
+
+ pr_err("Pid %d unmapped memory before destroying userptr at GPU addr 0x%llx\n",
+ pid_nr(process_info->pid), mem->va);
+
+ // Send GPU VM fault to user space
+ p = kfd_lookup_process_by_pid(process_info->pid);
+ if (p) {
+ kfd_signal_vm_fault_event_with_userptr(p, mem->va);
+ kfd_unref_process(p);
+ }
+ }
+
+ ret = 0;
}
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->range);
+
+ mutex_lock(&process_info->notifier_lock);
+
/* Mark the BO as valid unless it was invalidated
* again concurrently.
*/
- if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
- return -EAGAIN;
+ if (mem->invalid != invalid) {
+ ret = -EAGAIN;
+ goto unlock_out;
+ }
+ /* set mem valid if mem has hmm range associated */
+ if (mem->range)
+ mem->invalid = 0;
}
- return 0;
+unlock_out:
+ mutex_unlock(&process_info->notifier_lock);
+
+ return ret;
}
/* Validate invalid userptr BOs
*
- * Validates BOs on the userptr_inval_list, and moves them back to the
- * userptr_valid_list. Also updates GPUVM page tables with new page
- * addresses and waits for the page table updates to complete.
+ * Validates BOs on the userptr_inval_list. Also updates GPUVM page tables
+ * with new page addresses and waits for the page table updates to complete.
*/
static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
{
- struct amdgpu_bo_list_entry *pd_bo_list_entries;
- struct list_head resv_list, duplicates;
- struct ww_acquire_ctx ticket;
+ struct ttm_operation_ctx ctx = { false, false };
struct amdgpu_sync sync;
+ struct drm_exec exec;
struct amdgpu_vm *peer_vm;
struct kgd_mem *mem, *tmp_mem;
struct amdgpu_bo *bo;
- struct ttm_operation_ctx ctx = { false, false };
- int i, ret;
-
- pd_bo_list_entries = kcalloc(process_info->n_vms,
- sizeof(struct amdgpu_bo_list_entry),
- GFP_KERNEL);
- if (!pd_bo_list_entries) {
- pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
- ret = -ENOMEM;
- goto out_no_mem;
- }
-
- INIT_LIST_HEAD(&resv_list);
- INIT_LIST_HEAD(&duplicates);
+ int ret;
- /* Get all the page directory BOs that need to be reserved */
- i = 0;
- list_for_each_entry(peer_vm, &process_info->vm_list_head,
- vm_list_node)
- amdgpu_vm_get_pd_bo(peer_vm, &resv_list,
- &pd_bo_list_entries[i++]);
- /* Add the userptr_inval_list entries to resv_list */
- list_for_each_entry(mem, &process_info->userptr_inval_list,
- validate_list.head) {
- list_add_tail(&mem->resv_list.head, &resv_list);
- mem->resv_list.bo = mem->validate_list.bo;
- mem->resv_list.num_shared = mem->validate_list.num_shared;
- }
+ amdgpu_sync_create(&sync);
+ drm_exec_init(&exec, 0, 0);
/* Reserve all BOs and page tables for validation */
- ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
- WARN(!list_empty(&duplicates), "Duplicates should be empty");
- if (ret)
- goto out_free;
+ drm_exec_until_all_locked(&exec) {
+ /* Reserve all the page directories */
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node) {
+ ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto unreserve_out;
+ }
- amdgpu_sync_create(&sync);
+ /* Reserve the userptr_inval_list entries to resv_list */
+ list_for_each_entry(mem, &process_info->userptr_inval_list,
+ validate_list) {
+ struct drm_gem_object *gobj;
- ret = process_validate_vms(process_info);
+ gobj = &mem->bo->tbo.base;
+ ret = drm_exec_prepare_obj(&exec, gobj, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto unreserve_out;
+ }
+ }
+
+ ret = process_validate_vms(process_info, NULL);
if (ret)
goto unreserve_out;
/* Validate BOs and update GPUVM page tables */
list_for_each_entry_safe(mem, tmp_mem,
&process_info->userptr_inval_list,
- validate_list.head) {
+ validate_list) {
struct kfd_mem_attachment *attachment;
bo = mem->bo;
@@ -2165,9 +2702,6 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
}
}
- list_move_tail(&mem->validate_list.head,
- &process_info->userptr_valid_list);
-
/* Update mapping. If the BO was not validated
* (because we couldn't get user pages), this will
* clear the page table entries, which will result in
@@ -2179,11 +2713,13 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
continue;
kfd_mem_dmaunmap_attachment(mem, attachment);
- ret = update_gpuvm_pte(mem, attachment, &sync, NULL);
+ ret = update_gpuvm_pte(mem, attachment, &sync);
if (ret) {
pr_err("%s: update PTE failed\n", __func__);
/* make sure this gets validated again */
- atomic_inc(&mem->invalid);
+ mutex_lock(&process_info->notifier_lock);
+ mem->invalid++;
+ mutex_unlock(&process_info->notifier_lock);
goto unreserve_out;
}
}
@@ -2193,12 +2729,51 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
ret = process_update_pds(process_info, &sync);
unreserve_out:
- ttm_eu_backoff_reservation(&ticket, &resv_list);
+ drm_exec_fini(&exec);
amdgpu_sync_wait(&sync, false);
amdgpu_sync_free(&sync);
-out_free:
- kfree(pd_bo_list_entries);
-out_no_mem:
+
+ return ret;
+}
+
+/* Confirm that all user pages are valid while holding the notifier lock
+ *
+ * Moves valid BOs from the userptr_inval_list back to userptr_val_list.
+ */
+static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_info)
+{
+ struct kgd_mem *mem, *tmp_mem;
+ int ret = 0;
+
+ list_for_each_entry_safe(mem, tmp_mem,
+ &process_info->userptr_inval_list,
+ validate_list) {
+ bool valid;
+
+ /* keep mem without hmm range at userptr_inval_list */
+ if (!mem->range)
+ continue;
+
+ /* Only check mem with hmm range associated */
+ valid = amdgpu_hmm_range_valid(mem->range);
+ amdgpu_hmm_range_free(mem->range);
+
+ mem->range = NULL;
+ if (!valid) {
+ WARN(!mem->invalid, "Invalid BO not marked invalid");
+ ret = -EAGAIN;
+ continue;
+ }
+
+ if (mem->invalid) {
+ WARN(1, "Valid BO is marked invalid");
+ ret = -EAGAIN;
+ continue;
+ }
+
+ list_move_tail(&mem->validate_list,
+ &process_info->userptr_valid_list);
+ }
return ret;
}
@@ -2217,9 +2792,11 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
restore_userptr_work);
struct task_struct *usertask;
struct mm_struct *mm;
- int evicted_bos;
+ uint32_t evicted_bos;
- evicted_bos = atomic_read(&process_info->evicted_bos);
+ mutex_lock(&process_info->notifier_lock);
+ evicted_bos = process_info->evicted_bos;
+ mutex_unlock(&process_info->notifier_lock);
if (!evicted_bos)
return;
@@ -2242,9 +2819,6 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
* and we can just restart the queues.
*/
if (!list_empty(&process_info->userptr_inval_list)) {
- if (atomic_read(&process_info->evicted_bos) != evicted_bos)
- goto unlock_out; /* Concurrent eviction, try again */
-
if (validate_invalid_user_pages(process_info))
goto unlock_out;
}
@@ -2253,10 +2827,17 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
* be a first eviction that calls quiesce_mm. The eviction
* reference counting inside KFD will handle this case.
*/
- if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) !=
- evicted_bos)
- goto unlock_out;
- evicted_bos = 0;
+ mutex_lock(&process_info->notifier_lock);
+ if (process_info->evicted_bos != evicted_bos)
+ goto unlock_notifier_out;
+
+ if (confirm_valid_user_pages_locked(process_info)) {
+ WARN(1, "User pages unexpectedly invalid");
+ goto unlock_notifier_out;
+ }
+
+ process_info->evicted_bos = evicted_bos = 0;
+
if (kgd2kfd_resume_mm(mm)) {
pr_err("%s: Failed to resume KFD\n", __func__);
/* No recovery from this failure. Probably the CP is
@@ -2264,15 +2845,38 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
*/
}
+unlock_notifier_out:
+ mutex_unlock(&process_info->notifier_lock);
unlock_out:
mutex_unlock(&process_info->lock);
- mmput(mm);
- put_task_struct(usertask);
/* If validation failed, reschedule another attempt */
- if (evicted_bos)
- schedule_delayed_work(&process_info->restore_userptr_work,
+ if (evicted_bos) {
+ queue_delayed_work(system_freezable_wq,
+ &process_info->restore_userptr_work,
msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+
+ kfd_smi_event_queue_restore_rescheduled(mm);
+ }
+ mmput(mm);
+ put_task_struct(usertask);
+}
+
+static void replace_eviction_fence(struct dma_fence __rcu **ef,
+ struct dma_fence *new_ef)
+{
+ struct dma_fence *old_ef = rcu_replace_pointer(*ef, new_ef, true
+ /* protected by process_info->lock */);
+
+ /* If we're replacing an unsignaled eviction fence, that fence will
+ * never be signaled, and if anyone is still waiting on that fence,
+ * they will hang forever. This should never happen. We should only
+ * replace the fence in restore_work that only gets scheduled after
+ * eviction work signaled the fence.
+ */
+ WARN_ONCE(!dma_fence_is_signaled(old_ef),
+ "Replacing unsignaled eviction fence");
+ dma_fence_put(old_ef);
}
/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
@@ -2293,74 +2897,61 @@ unlock_out:
* 7. Add fence to all PD and PT BOs.
* 8. Unreserve all BOs
*/
-int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
+int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu **ef)
{
- struct amdgpu_bo_list_entry *pd_bo_list;
struct amdkfd_process_info *process_info = info;
struct amdgpu_vm *peer_vm;
struct kgd_mem *mem;
- struct bo_vm_reservation_context ctx;
- struct amdgpu_amdkfd_fence *new_fence;
- int ret = 0, i;
struct list_head duplicate_save;
struct amdgpu_sync sync_obj;
unsigned long failed_size = 0;
unsigned long total_size = 0;
+ struct drm_exec exec;
+ int ret;
INIT_LIST_HEAD(&duplicate_save);
- INIT_LIST_HEAD(&ctx.list);
- INIT_LIST_HEAD(&ctx.duplicates);
- pd_bo_list = kcalloc(process_info->n_vms,
- sizeof(struct amdgpu_bo_list_entry),
- GFP_KERNEL);
- if (!pd_bo_list)
- return -ENOMEM;
-
- i = 0;
mutex_lock(&process_info->lock);
- list_for_each_entry(peer_vm, &process_info->vm_list_head,
- vm_list_node)
- amdgpu_vm_get_pd_bo(peer_vm, &ctx.list, &pd_bo_list[i++]);
- /* Reserve all BOs and page tables/directory. Add all BOs from
- * kfd_bo_list to ctx.list
- */
- list_for_each_entry(mem, &process_info->kfd_bo_list,
- validate_list.head) {
-
- list_add_tail(&mem->resv_list.head, &ctx.list);
- mem->resv_list.bo = mem->validate_list.bo;
- mem->resv_list.num_shared = mem->validate_list.num_shared;
- }
+ drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
+ drm_exec_until_all_locked(&exec) {
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node) {
+ ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret)) {
+ pr_err("Locking VM PD failed, ret: %d\n", ret);
+ goto ttm_reserve_fail;
+ }
+ }
- ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
- false, &duplicate_save);
- if (ret) {
- pr_debug("Memory eviction: TTM Reserve Failed. Try again\n");
- goto ttm_reserve_fail;
+ /* Reserve all BOs and page tables/directory. Add all BOs from
+ * kfd_bo_list to ctx.list
+ */
+ list_for_each_entry(mem, &process_info->kfd_bo_list,
+ validate_list) {
+ struct drm_gem_object *gobj;
+
+ gobj = &mem->bo->tbo.base;
+ ret = drm_exec_prepare_obj(&exec, gobj, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret)) {
+ pr_err("drm_exec_prepare_obj failed, ret: %d\n", ret);
+ goto ttm_reserve_fail;
+ }
+ }
}
amdgpu_sync_create(&sync_obj);
- /* Validate PDs and PTs */
- ret = process_validate_vms(process_info);
- if (ret)
- goto validate_map_fail;
-
- ret = process_sync_pds_resv(process_info, &sync_obj);
- if (ret) {
- pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
- goto validate_map_fail;
- }
-
- /* Validate BOs and map them to GPUVM (update VM page tables). */
+ /* Validate BOs managed by KFD */
list_for_each_entry(mem, &process_info->kfd_bo_list,
- validate_list.head) {
+ validate_list) {
struct amdgpu_bo *bo = mem->bo;
uint32_t domain = mem->domain;
- struct kfd_mem_attachment *attachment;
+ struct dma_resv_iter cursor;
+ struct dma_fence *fence;
total_size += amdgpu_bo_size(bo);
@@ -2375,17 +2966,39 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
goto validate_map_fail;
}
}
- ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving);
- if (ret) {
- pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
- goto validate_map_fail;
+ dma_resv_for_each_fence(&cursor, bo->tbo.base.resv,
+ DMA_RESV_USAGE_KERNEL, fence) {
+ ret = amdgpu_sync_fence(&sync_obj, fence, GFP_KERNEL);
+ if (ret) {
+ pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
+ goto validate_map_fail;
+ }
}
+ }
+
+ if (failed_size)
+ pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
+
+ /* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO
+ * validations above would invalidate DMABuf imports again.
+ */
+ ret = process_validate_vms(process_info, &exec.ticket);
+ if (ret) {
+ pr_debug("Validating VMs failed, ret: %d\n", ret);
+ goto validate_map_fail;
+ }
+
+ /* Update mappings managed by KFD. */
+ list_for_each_entry(mem, &process_info->kfd_bo_list,
+ validate_list) {
+ struct kfd_mem_attachment *attachment;
+
list_for_each_entry(attachment, &mem->attachments, list) {
if (!attachment->is_mapped)
continue;
kfd_mem_dmaunmap_attachment(mem, attachment);
- ret = update_gpuvm_pte(mem, attachment, &sync_obj, NULL);
+ ret = update_gpuvm_pte(mem, attachment, &sync_obj);
if (ret) {
pr_debug("Memory eviction: update PTE failed. Try again\n");
goto validate_map_fail;
@@ -2393,8 +3006,31 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
}
}
- if (failed_size)
- pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
+ /* Update mappings not managed by KFD */
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node) {
+ struct amdgpu_device *adev = amdgpu_ttm_adev(
+ peer_vm->root.bo->tbo.bdev);
+
+ struct amdgpu_fpriv *fpriv =
+ container_of(peer_vm, struct amdgpu_fpriv, vm);
+
+ ret = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
+ if (ret) {
+ dev_dbg(adev->dev,
+ "Memory eviction: handle PRT moved failed, pid %8d. Try again.\n",
+ pid_nr(process_info->pid));
+ goto validate_map_fail;
+ }
+
+ ret = amdgpu_vm_handle_moved(adev, peer_vm, &exec.ticket);
+ if (ret) {
+ dev_dbg(adev->dev,
+ "Memory eviction: handle moved failed, pid %8d. Try again.\n",
+ pid_nr(process_info->pid));
+ goto validate_map_fail;
+ }
+ }
/* Update page directories */
ret = process_update_pds(process_info, &sync_obj);
@@ -2403,46 +3039,72 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
goto validate_map_fail;
}
+ /* Sync with fences on all the page tables. They implicitly depend on any
+ * move fences from amdgpu_vm_handle_moved above.
+ */
+ ret = process_sync_pds_resv(process_info, &sync_obj);
+ if (ret) {
+ pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
+ goto validate_map_fail;
+ }
+
/* Wait for validate and PT updates to finish */
amdgpu_sync_wait(&sync_obj, false);
- /* Release old eviction fence and create new one, because fence only
- * goes from unsignaled to signaled, fence cannot be reused.
- * Use context and mm from the old fence.
+ /* The old eviction fence may be unsignaled if restore happens
+ * after a GPU reset or suspend/resume. Keep the old fence in that
+ * case. Otherwise release the old eviction fence and create new
+ * one, because fence only goes from unsignaled to signaled once
+ * and cannot be reused. Use context and mm from the old fence.
+ *
+ * If an old eviction fence signals after this check, that's OK.
+ * Anyone signaling an eviction fence must stop the queues first
+ * and schedule another restore worker.
*/
- new_fence = amdgpu_amdkfd_fence_create(
+ if (dma_fence_is_signaled(&process_info->eviction_fence->base)) {
+ struct amdgpu_amdkfd_fence *new_fence =
+ amdgpu_amdkfd_fence_create(
process_info->eviction_fence->base.context,
process_info->eviction_fence->mm,
NULL);
- if (!new_fence) {
- pr_err("Failed to create eviction fence\n");
- ret = -ENOMEM;
- goto validate_map_fail;
+
+ if (!new_fence) {
+ pr_err("Failed to create eviction fence\n");
+ ret = -ENOMEM;
+ goto validate_map_fail;
+ }
+ dma_fence_put(&process_info->eviction_fence->base);
+ process_info->eviction_fence = new_fence;
+ replace_eviction_fence(ef, dma_fence_get(&new_fence->base));
+ } else {
+ WARN_ONCE(*ef != &process_info->eviction_fence->base,
+ "KFD eviction fence doesn't match KGD process_info");
}
- dma_fence_put(&process_info->eviction_fence->base);
- process_info->eviction_fence = new_fence;
- *ef = dma_fence_get(&new_fence->base);
- /* Attach new eviction fence to all BOs */
- list_for_each_entry(mem, &process_info->kfd_bo_list,
- validate_list.head)
- amdgpu_bo_fence(mem->bo,
- &process_info->eviction_fence->base, true);
+ /* Attach new eviction fence to all BOs except pinned ones */
+ list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) {
+ if (mem->bo->tbo.pin_count)
+ continue;
- /* Attach eviction fence to PD / PT BOs */
+ dma_resv_add_fence(mem->bo->tbo.base.resv,
+ &process_info->eviction_fence->base,
+ DMA_RESV_USAGE_BOOKKEEP);
+ }
+ /* Attach eviction fence to PD / PT BOs and DMABuf imports */
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node) {
struct amdgpu_bo *bo = peer_vm->root.bo;
- amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true);
+ dma_resv_add_fence(bo->tbo.base.resv,
+ &process_info->eviction_fence->base,
+ DMA_RESV_USAGE_BOOKKEEP);
}
validate_map_fail:
- ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list);
amdgpu_sync_free(&sync_obj);
ttm_reserve_fail:
+ drm_exec_fini(&exec);
mutex_unlock(&process_info->lock);
- kfree(pd_bo_list);
return ret;
}
@@ -2485,10 +3147,12 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem
* Add process eviction fence to bo so they can
* evict each other.
*/
- ret = dma_resv_reserve_shared(gws_bo->tbo.base.resv, 1);
+ ret = dma_resv_reserve_fences(gws_bo->tbo.base.resv, 1);
if (ret)
goto reserve_shared_fail;
- amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true);
+ dma_resv_add_fence(gws_bo->tbo.base.resv,
+ &process_info->eviction_fence->base,
+ DMA_RESV_USAGE_BOOKKEEP);
amdgpu_bo_unreserve(gws_bo);
mutex_unlock(&(*mem)->process_info->lock);
@@ -2537,11 +3201,9 @@ int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem)
}
/* Returns GPU-specific tiling mode information */
-int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
+int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
struct tile_config *config)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
config->gb_addr_config = adev->gfx.config.gb_addr_config;
config->tile_config_ptr = adev->gfx.config.tile_mode_array;
config->num_tile_configs =
@@ -2557,3 +3219,34 @@ int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
return 0;
}
+
+bool amdgpu_amdkfd_bo_mapped_to_dev(void *drm_priv, struct kgd_mem *mem)
+{
+ struct amdgpu_vm *vm = drm_priv_to_vm(drm_priv);
+ struct kfd_mem_attachment *entry;
+
+ list_for_each_entry(entry, &mem->attachments, list) {
+ if (entry->is_mapped && entry->bo_va->base.vm == vm)
+ return true;
+ }
+ return false;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data)
+{
+
+ spin_lock(&kfd_mem_limit.mem_limit_lock);
+ seq_printf(m, "System mem used %lldM out of %lluM\n",
+ (kfd_mem_limit.system_mem_used >> 20),
+ (kfd_mem_limit.max_system_mem_limit >> 20));
+ seq_printf(m, "TTM mem used %lldM out of %lluM\n",
+ (kfd_mem_limit.ttm_mem_used >> 20),
+ (kfd_mem_limit.max_ttm_mem_limit >> 20));
+ spin_unlock(&kfd_mem_limit.mem_limit_lock);
+
+ return 0;
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 12a6b1c99c93..763f2b8dcf13 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -36,13 +36,6 @@
#include "atombios_encoders.h"
#include "bif/bif_4_1_d.h"
-static void amdgpu_atombios_lookup_i2c_gpio_quirks(struct amdgpu_device *adev,
- ATOM_GPIO_I2C_ASSIGMENT *gpio,
- u8 index)
-{
-
-}
-
static struct amdgpu_i2c_bus_rec amdgpu_atombios_get_bus_rec_for_i2c_gpio(ATOM_GPIO_I2C_ASSIGMENT *gpio)
{
struct amdgpu_i2c_bus_rec i2c;
@@ -108,9 +101,6 @@ struct amdgpu_i2c_bus_rec amdgpu_atombios_lookup_i2c_gpio(struct amdgpu_device *
gpio = &i2c_info->asGPIO_Info[0];
for (i = 0; i < num_indices; i++) {
-
- amdgpu_atombios_lookup_i2c_gpio_quirks(adev, gpio, i);
-
if (gpio->sucI2cId.ucAccess == id) {
i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio);
break;
@@ -142,8 +132,6 @@ void amdgpu_atombios_i2c_init(struct amdgpu_device *adev)
gpio = &i2c_info->asGPIO_Info[0];
for (i = 0; i < num_indices; i++) {
- amdgpu_atombios_lookup_i2c_gpio_quirks(adev, gpio, i);
-
i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio);
if (i2c.valid) {
@@ -156,6 +144,38 @@ void amdgpu_atombios_i2c_init(struct amdgpu_device *adev)
}
}
+void amdgpu_atombios_oem_i2c_init(struct amdgpu_device *adev, u8 i2c_id)
+{
+ struct atom_context *ctx = adev->mode_info.atom_context;
+ ATOM_GPIO_I2C_ASSIGMENT *gpio;
+ struct amdgpu_i2c_bus_rec i2c;
+ int index = GetIndexIntoMasterTable(DATA, GPIO_I2C_Info);
+ struct _ATOM_GPIO_I2C_INFO *i2c_info;
+ uint16_t data_offset, size;
+ int i, num_indices;
+ char stmp[32];
+
+ if (amdgpu_atom_parse_data_header(ctx, index, &size, NULL, NULL, &data_offset)) {
+ i2c_info = (struct _ATOM_GPIO_I2C_INFO *)(ctx->bios + data_offset);
+
+ num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) /
+ sizeof(ATOM_GPIO_I2C_ASSIGMENT);
+
+ gpio = &i2c_info->asGPIO_Info[0];
+ for (i = 0; i < num_indices; i++) {
+ i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio);
+
+ if (i2c.valid && i2c.i2c_id == i2c_id) {
+ sprintf(stmp, "OEM 0x%x", i2c.i2c_id);
+ adev->i2c_bus[i] = amdgpu_i2c_create(adev_to_drm(adev), &i2c, stmp);
+ break;
+ }
+ gpio = (ATOM_GPIO_I2C_ASSIGMENT *)
+ ((u8 *)gpio + sizeof(ATOM_GPIO_I2C_ASSIGMENT));
+ }
+ }
+}
+
struct amdgpu_gpio_rec
amdgpu_atombios_lookup_gpio(struct amdgpu_device *adev,
u8 id)
@@ -686,7 +706,6 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev)
}
adev->clock.dp_extclk =
le16_to_cpu(firmware_info->info_21.usUniphyDPModeExtClkFreq);
- adev->clock.current_dispclk = adev->clock.default_dispclk;
adev->clock.max_pixel_clock = le16_to_cpu(firmware_info->info.usMaxPixelClock);
if (adev->clock.max_pixel_clock == 0)
@@ -1018,7 +1037,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
if (clock_type == COMPUTE_ENGINE_PLL_PARAM) {
args.v3.ulClockParams = cpu_to_le32((clock_type << 24) | clock);
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+ index, (uint32_t *)&args, sizeof(args)))
+ return -EINVAL;
dividers->post_div = args.v3.ucPostDiv;
dividers->enable_post_div = (args.v3.ucCntlFlag &
@@ -1038,7 +1059,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
if (strobe_mode)
args.v5.ucInputFlag = ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+ index, (uint32_t *)&args, sizeof(args)))
+ return -EINVAL;
dividers->post_div = args.v5.ucPostDiv;
dividers->enable_post_div = (args.v5.ucCntlFlag &
@@ -1056,7 +1079,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
/* fusion */
args.v4.ulClock = cpu_to_le32(clock); /* 10 khz */
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+ index, (uint32_t *)&args, sizeof(args)))
+ return -EINVAL;
dividers->post_divider = dividers->post_div = args.v4.ucPostDiv;
dividers->real_clock = le32_to_cpu(args.v4.ulClock);
@@ -1067,7 +1092,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
args.v6_in.ulClock.ulComputeClockFlag = clock_type;
args.v6_in.ulClock.ulClockFreq = cpu_to_le32(clock); /* 10 khz */
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+ index, (uint32_t *)&args, sizeof(args)))
+ return -EINVAL;
dividers->whole_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDiv);
dividers->frac_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDivFrac);
@@ -1083,6 +1110,7 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
return 0;
}
+#ifdef CONFIG_DRM_AMDGPU_SI
int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev,
u32 clock,
bool strobe_mode,
@@ -1108,7 +1136,9 @@ int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev,
if (strobe_mode)
args.ucInputFlag |= MPLL_INPUT_FLAG_STROBE_MODE_EN;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+ index, (uint32_t *)&args, sizeof(args)))
+ return -EINVAL;
mpll_param->clkfrac = le16_to_cpu(args.ulFbDiv.usFbDivFrac);
mpll_param->clkf = le16_to_cpu(args.ulFbDiv.usFbDiv);
@@ -1134,8 +1164,8 @@ int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev,
return 0;
}
-void amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
- u32 eng_clock, u32 mem_clock)
+int amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
+ u32 eng_clock, u32 mem_clock)
{
SET_ENGINE_CLOCK_PS_ALLOCATION args;
int index = GetIndexIntoMasterTable(COMMAND, DynamicMemorySettings);
@@ -1150,7 +1180,8 @@ void amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
if (mem_clock)
args.sReserved.ulClock = cpu_to_le32(mem_clock & SET_CLOCK_FREQ_MASK);
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ return amdgpu_atom_execute_table(adev->mode_info.atom_context, index,
+ (uint32_t *)&args, sizeof(args));
}
void amdgpu_atombios_get_default_voltages(struct amdgpu_device *adev,
@@ -1204,7 +1235,9 @@ int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type,
args.v2.ucVoltageMode = 0;
args.v2.usVoltageLevel = 0;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+ index, (uint32_t *)&args, sizeof(args)))
+ return -EINVAL;
*voltage = le16_to_cpu(args.v2.usVoltageLevel);
break;
@@ -1213,7 +1246,9 @@ int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type,
args.v3.ucVoltageMode = ATOM_GET_VOLTAGE_LEVEL;
args.v3.usVoltageLevel = cpu_to_le16(voltage_id);
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+ index, (uint32_t *)&args, sizeof(args)))
+ return -EINVAL;
*voltage = le16_to_cpu(args.v3.usVoltageLevel);
break;
@@ -1475,6 +1510,8 @@ int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev,
(u32)le32_to_cpu(*((u32 *)reg_data + j));
j++;
} else if ((reg_table->mc_reg_address[i].pre_reg_data & LOW_NIBBLE_MASK) == DATA_EQU_PREV) {
+ if (i == 0)
+ continue;
reg_table->mc_reg_table_entry[num_ranges].mc_data[i] =
reg_table->mc_reg_table_entry[num_ranges].mc_data[i - 1];
}
@@ -1503,6 +1540,7 @@ int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev,
}
return -EINVAL;
}
+#endif
bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev)
{
@@ -1774,21 +1812,57 @@ static ssize_t amdgpu_atombios_get_vbios_version(struct device *dev,
struct amdgpu_device *adev = drm_to_adev(ddev);
struct atom_context *ctx = adev->mode_info.atom_context;
- return sysfs_emit(buf, "%s\n", ctx->vbios_version);
+ return sysfs_emit(buf, "%s\n", ctx->vbios_pn);
+}
+
+static ssize_t amdgpu_atombios_get_vbios_build(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct atom_context *ctx = adev->mode_info.atom_context;
+
+ return sysfs_emit(buf, "%s\n", ctx->build_num);
}
static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version,
NULL);
+static DEVICE_ATTR(vbios_build, 0444, amdgpu_atombios_get_vbios_build, NULL);
static struct attribute *amdgpu_vbios_version_attrs[] = {
- &dev_attr_vbios_version.attr,
- NULL
+ &dev_attr_vbios_version.attr, &dev_attr_vbios_build.attr, NULL
};
+static umode_t amdgpu_vbios_version_attrs_is_visible(struct kobject *kobj,
+ struct attribute *attr,
+ int index)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct atom_context *ctx = adev->mode_info.atom_context;
+
+ if (attr == &dev_attr_vbios_build.attr && !strlen(ctx->build_num))
+ return 0;
+
+ return attr->mode;
+}
+
const struct attribute_group amdgpu_vbios_version_attr_group = {
- .attrs = amdgpu_vbios_version_attrs
+ .attrs = amdgpu_vbios_version_attrs,
+ .is_visible = amdgpu_vbios_version_attrs_is_visible,
};
+int amdgpu_atombios_sysfs_init(struct amdgpu_device *adev)
+{
+ if (adev->mode_info.atom_context)
+ return devm_device_add_group(adev->dev,
+ &amdgpu_vbios_version_attr_group);
+
+ return 0;
+}
+
/**
* amdgpu_atombios_fini - free the driver info and callbacks for atombios
*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
index 27e74b1fc260..867bc5c5ce67 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
@@ -89,8 +89,7 @@ struct atom_memory_info {
#define MAX_AC_TIMING_ENTRIES 16
-struct atom_memory_clock_range_table
-{
+struct atom_memory_clock_range_table {
u8 num_entries;
u8 rsv[3];
u32 mclk[MAX_AC_TIMING_ENTRIES];
@@ -118,14 +117,12 @@ struct atom_mc_reg_table {
#define MAX_VOLTAGE_ENTRIES 32
-struct atom_voltage_table_entry
-{
+struct atom_voltage_table_entry {
u16 value;
u32 smio_low;
};
-struct atom_voltage_table
-{
+struct atom_voltage_table {
u32 count;
u32 mask_low;
u32 phase_delay;
@@ -139,6 +136,7 @@ amdgpu_atombios_lookup_gpio(struct amdgpu_device *adev,
struct amdgpu_i2c_bus_rec amdgpu_atombios_lookup_i2c_gpio(struct amdgpu_device *adev,
uint8_t id);
void amdgpu_atombios_i2c_init(struct amdgpu_device *adev);
+void amdgpu_atombios_oem_i2c_init(struct amdgpu_device *adev, u8 i2c_id);
bool amdgpu_atombios_has_dce_engine_info(struct amdgpu_device *adev);
@@ -160,13 +158,14 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
bool strobe_mode,
struct atom_clock_dividers *dividers);
+#ifdef CONFIG_DRM_AMDGPU_SI
int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev,
u32 clock,
bool strobe_mode,
struct atom_mpll_param *mpll_param);
-void amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
- u32 eng_clock, u32 mem_clock);
+int amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
+ u32 eng_clock, u32 mem_clock);
bool
amdgpu_atombios_is_voltage_gpio(struct amdgpu_device *adev,
@@ -179,6 +178,17 @@ int amdgpu_atombios_get_voltage_table(struct amdgpu_device *adev,
int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev,
u8 module_index,
struct atom_mc_reg_table *reg_table);
+int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type,
+ u16 voltage_id, u16 *voltage);
+int amdgpu_atombios_get_leakage_vddc_based_on_leakage_idx(struct amdgpu_device *adev,
+ u16 *voltage,
+ u16 leakage_idx);
+void amdgpu_atombios_get_default_voltages(struct amdgpu_device *adev,
+ u16 *vddc, u16 *vddci, u16 *mvdd);
+int amdgpu_atombios_get_svi2_info(struct amdgpu_device *adev,
+ u8 voltage_type,
+ u8 *svd_gpio_id, u8 *svc_gpio_id);
+#endif
bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev);
@@ -190,21 +200,11 @@ void amdgpu_atombios_scratch_regs_set_backlight_level(struct amdgpu_device *adev
bool amdgpu_atombios_scratch_need_asic_init(struct amdgpu_device *adev);
void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le);
-int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type,
- u16 voltage_id, u16 *voltage);
-int amdgpu_atombios_get_leakage_vddc_based_on_leakage_idx(struct amdgpu_device *adev,
- u16 *voltage,
- u16 leakage_idx);
-void amdgpu_atombios_get_default_voltages(struct amdgpu_device *adev,
- u16 *vddc, u16 *vddci, u16 *mvdd);
int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
u8 clock_type,
u32 clock,
bool strobe_mode,
struct atom_clock_dividers *dividers);
-int amdgpu_atombios_get_svi2_info(struct amdgpu_device *adev,
- u8 voltage_type,
- u8 *svd_gpio_id, u8 *svc_gpio_id);
int amdgpu_atombios_get_data_table(struct amdgpu_device *adev,
uint32_t table,
@@ -215,5 +215,6 @@ int amdgpu_atombios_get_data_table(struct amdgpu_device *adev,
void amdgpu_atombios_fini(struct amdgpu_device *adev);
int amdgpu_atombios_init(struct amdgpu_device *adev);
+int amdgpu_atombios_sysfs_init(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index 97178b307ed6..636385c80f64 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -34,6 +34,7 @@ union firmware_info {
struct atom_firmware_info_v3_2 v32;
struct atom_firmware_info_v3_3 v33;
struct atom_firmware_info_v3_4 v34;
+ struct atom_firmware_info_v3_5 v35;
};
/*
@@ -58,7 +59,7 @@ uint32_t amdgpu_atomfirmware_query_firmware_capability(struct amdgpu_device *ade
if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context,
index, &size, &frev, &crev, &data_offset)) {
/* support firmware_info 3.1 + */
- if ((frev == 3 && crev >=1) || (frev > 3)) {
+ if ((frev == 3 && crev >= 1) || (frev > 3)) {
firmware_info = (union firmware_info *)
(mode_info->atom_context->bios + data_offset);
fw_cap = le32_to_cpu(firmware_info->v31.firmware_capability);
@@ -101,39 +102,104 @@ void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev)
}
}
+static int amdgpu_atomfirmware_allocate_fb_v2_1(struct amdgpu_device *adev,
+ struct vram_usagebyfirmware_v2_1 *fw_usage, int *usage_bytes)
+{
+ u32 start_addr, fw_size, drv_size;
+
+ start_addr = le32_to_cpu(fw_usage->start_address_in_kb);
+ fw_size = le16_to_cpu(fw_usage->used_by_firmware_in_kb);
+ drv_size = le16_to_cpu(fw_usage->used_by_driver_in_kb);
+
+ DRM_DEBUG("atom firmware v2_1 requested %08x %dkb fw %dkb drv\n",
+ start_addr,
+ fw_size,
+ drv_size);
+
+ if ((start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) ==
+ (u32)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION <<
+ ATOM_VRAM_OPERATION_FLAGS_SHIFT)) {
+ /* Firmware request VRAM reservation for SR-IOV */
+ adev->mman.fw_vram_usage_start_offset = (start_addr &
+ (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
+ adev->mman.fw_vram_usage_size = fw_size << 10;
+ /* Use the default scratch size */
+ *usage_bytes = 0;
+ } else {
+ *usage_bytes = drv_size << 10;
+ }
+ return 0;
+}
+
+static int amdgpu_atomfirmware_allocate_fb_v2_2(struct amdgpu_device *adev,
+ struct vram_usagebyfirmware_v2_2 *fw_usage, int *usage_bytes)
+{
+ u32 fw_start_addr, fw_size, drv_start_addr, drv_size;
+
+ fw_start_addr = le32_to_cpu(fw_usage->fw_region_start_address_in_kb);
+ fw_size = le16_to_cpu(fw_usage->used_by_firmware_in_kb);
+
+ drv_start_addr = le32_to_cpu(fw_usage->driver_region0_start_address_in_kb);
+ drv_size = le32_to_cpu(fw_usage->used_by_driver_region0_in_kb);
+
+ DRM_DEBUG("atom requested fw start at %08x %dkb and drv start at %08x %dkb\n",
+ fw_start_addr,
+ fw_size,
+ drv_start_addr,
+ drv_size);
+
+ if (amdgpu_sriov_vf(adev) &&
+ ((fw_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION <<
+ ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) {
+ /* Firmware request VRAM reservation for SR-IOV */
+ adev->mman.fw_vram_usage_start_offset = (fw_start_addr &
+ (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
+ adev->mman.fw_vram_usage_size = fw_size << 10;
+ }
+
+ if (amdgpu_sriov_vf(adev) &&
+ ((drv_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION <<
+ ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) {
+ /* driver request VRAM reservation for SR-IOV */
+ adev->mman.drv_vram_usage_start_offset = (drv_start_addr &
+ (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
+ adev->mman.drv_vram_usage_size = drv_size << 10;
+ }
+
+ *usage_bytes = 0;
+ return 0;
+}
+
int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev)
{
struct atom_context *ctx = adev->mode_info.atom_context;
int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
vram_usagebyfirmware);
- struct vram_usagebyfirmware_v2_1 *firmware_usage;
- uint32_t start_addr, size;
- uint16_t data_offset;
+ struct vram_usagebyfirmware_v2_1 *fw_usage_v2_1;
+ struct vram_usagebyfirmware_v2_2 *fw_usage_v2_2;
+ u16 data_offset;
+ u8 frev, crev;
int usage_bytes = 0;
- if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) {
- firmware_usage = (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset);
- DRM_DEBUG("atom firmware requested %08x %dkb fw %dkb drv\n",
- le32_to_cpu(firmware_usage->start_address_in_kb),
- le16_to_cpu(firmware_usage->used_by_firmware_in_kb),
- le16_to_cpu(firmware_usage->used_by_driver_in_kb));
-
- start_addr = le32_to_cpu(firmware_usage->start_address_in_kb);
- size = le16_to_cpu(firmware_usage->used_by_firmware_in_kb);
-
- if ((uint32_t)(start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) ==
- (uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION <<
- ATOM_VRAM_OPERATION_FLAGS_SHIFT)) {
- /* Firmware request VRAM reservation for SR-IOV */
- adev->mman.fw_vram_usage_start_offset = (start_addr &
- (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
- adev->mman.fw_vram_usage_size = size << 10;
- /* Use the default scratch size */
- usage_bytes = 0;
- } else {
- usage_bytes = le16_to_cpu(firmware_usage->used_by_driver_in_kb) << 10;
+ /* Skip atomfirmware allocation for SRIOV VFs when dynamic crit regn is enabled */
+ if (!(amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled)) {
+ if (amdgpu_atom_parse_data_header(ctx, index, NULL, &frev, &crev, &data_offset)) {
+ if (frev == 2 && crev == 1) {
+ fw_usage_v2_1 =
+ (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset);
+ amdgpu_atomfirmware_allocate_fb_v2_1(adev,
+ fw_usage_v2_1,
+ &usage_bytes);
+ } else if (frev >= 2 && crev >= 2) {
+ fw_usage_v2_2 =
+ (struct vram_usagebyfirmware_v2_2 *)(ctx->bios + data_offset);
+ amdgpu_atomfirmware_allocate_fb_v2_2(adev,
+ fw_usage_v2_2,
+ &usage_bytes);
+ }
}
}
+
ctx->scratch_size_bytes = 0;
if (usage_bytes == 0)
usage_bytes = 20 * 1024;
@@ -149,12 +215,14 @@ union igp_info {
struct atom_integrated_system_info_v1_11 v11;
struct atom_integrated_system_info_v1_12 v12;
struct atom_integrated_system_info_v2_1 v21;
+ struct atom_integrated_system_info_v2_3 v23;
};
union umc_info {
struct atom_umc_info_v3_1 v31;
struct atom_umc_info_v3_2 v32;
struct atom_umc_info_v3_3 v33;
+ struct atom_umc_info_v4_0 v40;
};
union vram_info {
@@ -162,12 +230,14 @@ union vram_info {
struct atom_vram_info_header_v2_4 v24;
struct atom_vram_info_header_v2_5 v25;
struct atom_vram_info_header_v2_6 v26;
+ struct atom_vram_info_header_v3_0 v30;
};
union vram_module {
struct atom_vram_module_v9 v9;
struct atom_vram_module_v10 v10;
struct atom_vram_module_v11 v11;
+ struct atom_vram_module_v3_0 v30;
};
static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
@@ -186,13 +256,17 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
vram_type = AMDGPU_VRAM_TYPE_DDR3;
break;
case Ddr4MemType:
- case LpDdr4MemType:
vram_type = AMDGPU_VRAM_TYPE_DDR4;
break;
+ case LpDdr4MemType:
+ vram_type = AMDGPU_VRAM_TYPE_LPDDR4;
+ break;
case Ddr5MemType:
- case LpDdr5MemType:
vram_type = AMDGPU_VRAM_TYPE_DDR5;
break;
+ case LpDdr5MemType:
+ vram_type = AMDGPU_VRAM_TYPE_LPDDR5;
+ break;
default:
vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
break;
@@ -204,11 +278,15 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
break;
case ATOM_DGPU_VRAM_TYPE_HBM2:
case ATOM_DGPU_VRAM_TYPE_HBM2E:
+ case ATOM_DGPU_VRAM_TYPE_HBM3:
vram_type = AMDGPU_VRAM_TYPE_HBM;
break;
case ATOM_DGPU_VRAM_TYPE_GDDR6:
vram_type = AMDGPU_VRAM_TYPE_GDDR6;
break;
+ case ATOM_DGPU_VRAM_TYPE_HBM3E:
+ vram_type = AMDGPU_VRAM_TYPE_HBM3E;
+ break;
default:
vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
break;
@@ -218,7 +296,6 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
return vram_type;
}
-
int
amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
int *vram_width, int *vram_type,
@@ -229,6 +306,7 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
u16 data_offset, size;
union igp_info *igp_info;
union vram_info *vram_info;
+ union umc_info *umc_info;
union vram_module *vram_module;
u8 frev, crev;
u8 mem_type;
@@ -240,10 +318,16 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
if (adev->flags & AMD_IS_APU)
index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
integratedsysteminfo);
- else
- index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
- vram_info);
-
+ else {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, umc_info);
+ break;
+ default:
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, vram_info);
+ }
+ }
if (amdgpu_atom_parse_data_header(mode_info->atom_context,
index, &size,
&frev, &crev, &data_offset)) {
@@ -258,10 +342,13 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
mem_channel_number = igp_info->v11.umachannelnumber;
if (!mem_channel_number)
mem_channel_number = 1;
- /* channel width is 64 */
- if (vram_width)
- *vram_width = mem_channel_number * 64;
mem_type = igp_info->v11.memorytype;
+ if (mem_type == LpDdr5MemType)
+ mem_channel_width = 32;
+ else
+ mem_channel_width = 64;
+ if (vram_width)
+ *vram_width = mem_channel_number * mem_channel_width;
if (vram_type)
*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
break;
@@ -276,10 +363,27 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
mem_channel_number = igp_info->v21.umachannelnumber;
if (!mem_channel_number)
mem_channel_number = 1;
- /* channel width is 64 */
- if (vram_width)
- *vram_width = mem_channel_number * 64;
mem_type = igp_info->v21.memorytype;
+ if (mem_type == LpDdr5MemType)
+ mem_channel_width = 32;
+ else
+ mem_channel_width = 64;
+ if (vram_width)
+ *vram_width = mem_channel_number * mem_channel_width;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ break;
+ case 3:
+ mem_channel_number = igp_info->v23.umachannelnumber;
+ if (!mem_channel_number)
+ mem_channel_number = 1;
+ mem_type = igp_info->v23.memorytype;
+ if (mem_type == LpDdr5MemType)
+ mem_channel_width = 32;
+ else
+ mem_channel_width = 64;
+ if (vram_width)
+ *vram_width = mem_channel_number * mem_channel_width;
if (vram_type)
*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
break;
@@ -291,95 +395,150 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
return -EINVAL;
}
} else {
- vram_info = (union vram_info *)
- (mode_info->atom_context->bios + data_offset);
- module_id = (RREG32(adev->bios_scratch_reg_offset + 4) & 0x00ff0000) >> 16;
- switch (crev) {
- case 3:
- if (module_id > vram_info->v23.vram_module_num)
- module_id = 0;
- vram_module = (union vram_module *)vram_info->v23.vram_module;
- while (i < module_id) {
- vram_module = (union vram_module *)
- ((u8 *)vram_module + vram_module->v9.vram_module_size);
- i++;
- }
- mem_type = vram_module->v9.memory_type;
- if (vram_type)
- *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
- mem_channel_number = vram_module->v9.channel_num;
- mem_channel_width = vram_module->v9.channel_width;
- if (vram_width)
- *vram_width = mem_channel_number * (1 << mem_channel_width);
- mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
- if (vram_vendor)
- *vram_vendor = mem_vendor;
- break;
- case 4:
- if (module_id > vram_info->v24.vram_module_num)
- module_id = 0;
- vram_module = (union vram_module *)vram_info->v24.vram_module;
- while (i < module_id) {
- vram_module = (union vram_module *)
- ((u8 *)vram_module + vram_module->v10.vram_module_size);
- i++;
- }
- mem_type = vram_module->v10.memory_type;
- if (vram_type)
- *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
- mem_channel_number = vram_module->v10.channel_num;
- mem_channel_width = vram_module->v10.channel_width;
- if (vram_width)
- *vram_width = mem_channel_number * (1 << mem_channel_width);
- mem_vendor = (vram_module->v10.vender_rev_id) & 0xF;
- if (vram_vendor)
- *vram_vendor = mem_vendor;
- break;
- case 5:
- if (module_id > vram_info->v25.vram_module_num)
- module_id = 0;
- vram_module = (union vram_module *)vram_info->v25.vram_module;
- while (i < module_id) {
- vram_module = (union vram_module *)
- ((u8 *)vram_module + vram_module->v11.vram_module_size);
- i++;
- }
- mem_type = vram_module->v11.memory_type;
- if (vram_type)
- *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
- mem_channel_number = vram_module->v11.channel_num;
- mem_channel_width = vram_module->v11.channel_width;
- if (vram_width)
- *vram_width = mem_channel_number * (1 << mem_channel_width);
- mem_vendor = (vram_module->v11.vender_rev_id) & 0xF;
- if (vram_vendor)
- *vram_vendor = mem_vendor;
- break;
- case 6:
- if (module_id > vram_info->v26.vram_module_num)
- module_id = 0;
- vram_module = (union vram_module *)vram_info->v26.vram_module;
- while (i < module_id) {
- vram_module = (union vram_module *)
- ((u8 *)vram_module + vram_module->v9.vram_module_size);
- i++;
- }
- mem_type = vram_module->v9.memory_type;
- if (vram_type)
- *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
- mem_channel_number = vram_module->v9.channel_num;
- mem_channel_width = vram_module->v9.channel_width;
- if (vram_width)
- *vram_width = mem_channel_number * (1 << mem_channel_width);
- mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
- if (vram_vendor)
- *vram_vendor = mem_vendor;
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ umc_info = (union umc_info *)(mode_info->atom_context->bios + data_offset);
+
+ if (frev == 4) {
+ switch (crev) {
+ case 0:
+ mem_channel_number = le32_to_cpu(umc_info->v40.channel_num);
+ mem_type = le32_to_cpu(umc_info->v40.vram_type);
+ mem_channel_width = le32_to_cpu(umc_info->v40.channel_width);
+ mem_vendor = RREG32(adev->bios_scratch_reg_offset + 4) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ break;
+ default:
+ return -EINVAL;
+ }
+ } else
+ return -EINVAL;
break;
default:
- return -EINVAL;
+ vram_info = (union vram_info *)
+ (mode_info->atom_context->bios + data_offset);
+
+ module_id = (RREG32(adev->bios_scratch_reg_offset + 4) & 0x00ff0000) >> 16;
+ if (frev == 3) {
+ switch (crev) {
+ /* v30 */
+ case 0:
+ vram_module = (union vram_module *)vram_info->v30.vram_module;
+ mem_vendor = (vram_module->v30.dram_vendor_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ mem_type = vram_info->v30.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_info->v30.channel_num;
+ mem_channel_width = vram_info->v30.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * 16;
+ break;
+ default:
+ return -EINVAL;
+ }
+ } else if (frev == 2) {
+ switch (crev) {
+ /* v23 */
+ case 3:
+ if (module_id > vram_info->v23.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v23.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v9.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v9.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v9.channel_num;
+ mem_channel_width = vram_module->v9.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
+ /* v24 */
+ case 4:
+ if (module_id > vram_info->v24.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v24.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v10.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v10.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v10.channel_num;
+ mem_channel_width = vram_module->v10.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v10.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
+ /* v25 */
+ case 5:
+ if (module_id > vram_info->v25.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v25.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v11.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v11.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v11.channel_num;
+ mem_channel_width = vram_module->v11.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v11.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
+ /* v26 */
+ case 6:
+ if (module_id > vram_info->v26.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v26.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v9.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v9.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v9.channel_num;
+ mem_channel_width = vram_module->v9.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
+ default:
+ return -EINVAL;
+ }
+ } else {
+ /* invalid frev */
+ return -EINVAL;
+ }
}
}
-
}
return 0;
@@ -396,44 +555,63 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev)
u16 data_offset, size;
union umc_info *umc_info;
u8 frev, crev;
- bool ecc_default_enabled = false;
+ bool mem_ecc_enabled = false;
u8 umc_config;
u32 umc_config1;
+ adev->ras_default_ecc_enabled = false;
index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
umc_info);
if (amdgpu_atom_parse_data_header(mode_info->atom_context,
index, &size, &frev, &crev, &data_offset)) {
+ umc_info = (union umc_info *)(mode_info->atom_context->bios + data_offset);
if (frev == 3) {
- umc_info = (union umc_info *)
- (mode_info->atom_context->bios + data_offset);
switch (crev) {
case 1:
umc_config = le32_to_cpu(umc_info->v31.umc_config);
- ecc_default_enabled =
+ mem_ecc_enabled =
(umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
break;
case 2:
umc_config = le32_to_cpu(umc_info->v32.umc_config);
- ecc_default_enabled =
+ mem_ecc_enabled =
(umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
break;
case 3:
umc_config = le32_to_cpu(umc_info->v33.umc_config);
umc_config1 = le32_to_cpu(umc_info->v33.umc_config1);
- ecc_default_enabled =
+ mem_ecc_enabled =
((umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ||
(umc_config1 & UMC_CONFIG1__ENABLE_ECC_CAPABLE)) ? true : false;
+ adev->ras_default_ecc_enabled =
+ (umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
+ break;
+ default:
+ /* unsupported crev */
+ return false;
+ }
+ } else if (frev == 4) {
+ switch (crev) {
+ case 0:
+ umc_config = le32_to_cpu(umc_info->v40.umc_config);
+ umc_config1 = le32_to_cpu(umc_info->v40.umc_config1);
+ mem_ecc_enabled =
+ (umc_config1 & UMC_CONFIG1__ENABLE_ECC_CAPABLE) ? true : false;
+ adev->ras_default_ecc_enabled =
+ (umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
break;
default:
/* unsupported crev */
return false;
}
+ } else {
+ /* unsupported frev */
+ return false;
}
}
- return ecc_default_enabled;
+ return mem_ecc_enabled;
}
/*
@@ -470,8 +648,8 @@ bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *ade
/**
* amdgpu_atomfirmware_ras_rom_addr -- Get the RAS EEPROM addr from VBIOS
- * adev: amdgpu_device pointer
- * i2c_address: pointer to u8; if not NULL, will contain
+ * @adev: amdgpu_device pointer
+ * @i2c_address: pointer to u8; if not NULL, will contain
* the RAS EEPROM address if the function returns true
*
* Return true if VBIOS supports RAS EEPROM address reporting,
@@ -494,7 +672,7 @@ bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev,
index, &size, &frev, &crev,
&data_offset)) {
/* support firmware_info 3.4 + */
- if ((frev == 3 && crev >=4) || (frev > 3)) {
+ if ((frev == 3 && crev >= 4) || (frev > 3)) {
firmware_info = (union firmware_info *)
(mode_info->atom_context->bios + data_offset);
/* The ras_rom_i2c_slave_addr should ideally
@@ -526,6 +704,14 @@ bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev,
union smu_info {
struct atom_smu_info_v3_1 v31;
+ struct atom_smu_info_v4_0 v40;
+};
+
+union gfx_info {
+ struct atom_gfx_info_v2_2 v22;
+ struct atom_gfx_info_v2_4 v24;
+ struct atom_gfx_info_v2_7 v27;
+ struct atom_gfx_info_v3_0 v30;
};
int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev)
@@ -565,7 +751,10 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev)
data_offset);
/* system clock */
- spll->reference_freq = le32_to_cpu(smu_info->v31.core_refclk_10khz);
+ if (frev == 3)
+ spll->reference_freq = le32_to_cpu(smu_info->v31.core_refclk_10khz);
+ else if (frev == 4)
+ spll->reference_freq = le32_to_cpu(smu_info->v40.core_refclk_10khz);
spll->reference_div = 0;
spll->min_post_div = 1;
@@ -609,22 +798,26 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev)
gfx_info);
if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
&frev, &crev, &data_offset)) {
- struct atom_gfx_info_v2_2 *gfx_info = (struct atom_gfx_info_v2_2*)
+ union gfx_info *gfx_info = (union gfx_info *)
(mode_info->atom_context->bios + data_offset);
- if ((frev == 2) && (crev >= 2))
- spll->reference_freq = le32_to_cpu(gfx_info->rlc_gpu_timer_refclk);
- ret = 0;
+ if ((frev == 3) ||
+ (frev == 2 && crev == 6)) {
+ spll->reference_freq = le32_to_cpu(gfx_info->v30.golden_tsc_count_lower_refclk);
+ ret = 0;
+ } else if ((frev == 2) &&
+ (crev >= 2) &&
+ (crev != 6)) {
+ spll->reference_freq = le32_to_cpu(gfx_info->v22.rlc_gpu_timer_refclk);
+ ret = 0;
+ } else {
+ BUG();
+ }
}
}
return ret;
}
-union gfx_info {
- struct atom_gfx_info_v2_4 v24;
- struct atom_gfx_info_v2_7 v27;
-};
-
int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev)
{
struct amdgpu_mode_info *mode_info = &adev->mode_info;
@@ -638,42 +831,58 @@ int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev)
&frev, &crev, &data_offset)) {
union gfx_info *gfx_info = (union gfx_info *)
(mode_info->atom_context->bios + data_offset);
- switch (crev) {
- case 4:
- adev->gfx.config.max_shader_engines = gfx_info->v24.max_shader_engines;
- adev->gfx.config.max_cu_per_sh = gfx_info->v24.max_cu_per_sh;
- adev->gfx.config.max_sh_per_se = gfx_info->v24.max_sh_per_se;
- adev->gfx.config.max_backends_per_se = gfx_info->v24.max_backends_per_se;
- adev->gfx.config.max_texture_channel_caches = gfx_info->v24.max_texture_channel_caches;
- adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v24.gc_num_gprs);
- adev->gfx.config.max_gs_threads = gfx_info->v24.gc_num_max_gs_thds;
- adev->gfx.config.gs_vgt_table_depth = gfx_info->v24.gc_gs_table_depth;
- adev->gfx.config.gs_prim_buffer_depth =
- le16_to_cpu(gfx_info->v24.gc_gsprim_buff_depth);
- adev->gfx.config.double_offchip_lds_buf =
- gfx_info->v24.gc_double_offchip_lds_buffer;
- adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v24.gc_wave_size);
- adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v24.gc_max_waves_per_simd);
- adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v24.gc_max_scratch_slots_per_cu;
- adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v24.gc_lds_size);
- return 0;
- case 7:
- adev->gfx.config.max_shader_engines = gfx_info->v27.max_shader_engines;
- adev->gfx.config.max_cu_per_sh = gfx_info->v27.max_cu_per_sh;
- adev->gfx.config.max_sh_per_se = gfx_info->v27.max_sh_per_se;
- adev->gfx.config.max_backends_per_se = gfx_info->v27.max_backends_per_se;
- adev->gfx.config.max_texture_channel_caches = gfx_info->v27.max_texture_channel_caches;
- adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v27.gc_num_gprs);
- adev->gfx.config.max_gs_threads = gfx_info->v27.gc_num_max_gs_thds;
- adev->gfx.config.gs_vgt_table_depth = gfx_info->v27.gc_gs_table_depth;
- adev->gfx.config.gs_prim_buffer_depth = le16_to_cpu(gfx_info->v27.gc_gsprim_buff_depth);
- adev->gfx.config.double_offchip_lds_buf = gfx_info->v27.gc_double_offchip_lds_buffer;
- adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v27.gc_wave_size);
- adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v27.gc_max_waves_per_simd);
- adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v27.gc_max_scratch_slots_per_cu;
- adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v27.gc_lds_size);
- return 0;
- default:
+ if (frev == 2) {
+ switch (crev) {
+ case 4:
+ adev->gfx.config.max_shader_engines = gfx_info->v24.max_shader_engines;
+ adev->gfx.config.max_cu_per_sh = gfx_info->v24.max_cu_per_sh;
+ adev->gfx.config.max_sh_per_se = gfx_info->v24.max_sh_per_se;
+ adev->gfx.config.max_backends_per_se = gfx_info->v24.max_backends_per_se;
+ adev->gfx.config.max_texture_channel_caches = gfx_info->v24.max_texture_channel_caches;
+ adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v24.gc_num_gprs);
+ adev->gfx.config.max_gs_threads = gfx_info->v24.gc_num_max_gs_thds;
+ adev->gfx.config.gs_vgt_table_depth = gfx_info->v24.gc_gs_table_depth;
+ adev->gfx.config.gs_prim_buffer_depth =
+ le16_to_cpu(gfx_info->v24.gc_gsprim_buff_depth);
+ adev->gfx.config.double_offchip_lds_buf =
+ gfx_info->v24.gc_double_offchip_lds_buffer;
+ adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v24.gc_wave_size);
+ adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v24.gc_max_waves_per_simd);
+ adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v24.gc_max_scratch_slots_per_cu;
+ adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v24.gc_lds_size);
+ return 0;
+ case 7:
+ adev->gfx.config.max_shader_engines = gfx_info->v27.max_shader_engines;
+ adev->gfx.config.max_cu_per_sh = gfx_info->v27.max_cu_per_sh;
+ adev->gfx.config.max_sh_per_se = gfx_info->v27.max_sh_per_se;
+ adev->gfx.config.max_backends_per_se = gfx_info->v27.max_backends_per_se;
+ adev->gfx.config.max_texture_channel_caches = gfx_info->v27.max_texture_channel_caches;
+ adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v27.gc_num_gprs);
+ adev->gfx.config.max_gs_threads = gfx_info->v27.gc_num_max_gs_thds;
+ adev->gfx.config.gs_vgt_table_depth = gfx_info->v27.gc_gs_table_depth;
+ adev->gfx.config.gs_prim_buffer_depth = le16_to_cpu(gfx_info->v27.gc_gsprim_buff_depth);
+ adev->gfx.config.double_offchip_lds_buf = gfx_info->v27.gc_double_offchip_lds_buffer;
+ adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v27.gc_wave_size);
+ adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v27.gc_max_waves_per_simd);
+ adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v27.gc_max_scratch_slots_per_cu;
+ adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v27.gc_lds_size);
+ return 0;
+ default:
+ return -EINVAL;
+ }
+ } else if (frev == 3) {
+ switch (crev) {
+ case 0:
+ adev->gfx.config.max_shader_engines = gfx_info->v30.max_shader_engines;
+ adev->gfx.config.max_cu_per_sh = gfx_info->v30.max_cu_per_sh;
+ adev->gfx.config.max_sh_per_se = gfx_info->v30.max_sh_per_se;
+ adev->gfx.config.max_backends_per_se = gfx_info->v30.max_backends_per_se;
+ adev->gfx.config.max_texture_channel_caches = gfx_info->v30.max_texture_channel_caches;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+ } else {
return -EINVAL;
}
@@ -716,7 +925,7 @@ int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev)
firmware_info = (union firmware_info *)(ctx->bios + data_offset);
- if (frev !=3)
+ if (frev != 3)
return -EINVAL;
switch (crev) {
@@ -724,6 +933,10 @@ int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev)
fw_reserved_fb_size =
(firmware_info->v34.fw_reserved_size_in_kb << 10);
break;
+ case 5:
+ fw_reserved_fb_size =
+ (firmware_info->v35.fw_reserved_size_in_kb << 10);
+ break;
default:
fw_reserved_fb_size = 0;
break;
@@ -731,3 +944,68 @@ int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev)
return fw_reserved_fb_size;
}
+
+/*
+ * Helper function to execute asic_init table
+ *
+ * @adev: amdgpu_device pointer
+ * @fb_reset: flag to indicate whether fb is reset or not
+ *
+ * Return 0 if succeed, otherwise failed
+ */
+int amdgpu_atomfirmware_asic_init(struct amdgpu_device *adev, bool fb_reset)
+{
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ struct atom_context *ctx;
+ uint8_t frev, crev;
+ uint16_t data_offset;
+ uint32_t bootup_sclk_in10khz, bootup_mclk_in10khz;
+ struct asic_init_ps_allocation_v2_1 asic_init_ps_v2_1;
+ int index;
+
+ if (!mode_info)
+ return -EINVAL;
+
+ ctx = mode_info->atom_context;
+ if (!ctx)
+ return -EINVAL;
+
+ /* query bootup sclk/mclk from firmware_info table */
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ firmwareinfo);
+ if (amdgpu_atom_parse_data_header(ctx, index, NULL,
+ &frev, &crev, &data_offset)) {
+ union firmware_info *firmware_info =
+ (union firmware_info *)(ctx->bios +
+ data_offset);
+
+ bootup_sclk_in10khz =
+ le32_to_cpu(firmware_info->v31.bootup_sclk_in10khz);
+ bootup_mclk_in10khz =
+ le32_to_cpu(firmware_info->v31.bootup_mclk_in10khz);
+ } else {
+ return -EINVAL;
+ }
+
+ index = get_index_into_master_table(atom_master_list_of_command_functions_v2_1,
+ asic_init);
+ if (amdgpu_atom_parse_cmd_header(mode_info->atom_context, index, &frev, &crev)) {
+ if (frev == 2 && crev >= 1) {
+ memset(&asic_init_ps_v2_1, 0, sizeof(asic_init_ps_v2_1));
+ asic_init_ps_v2_1.param.engineparam.sclkfreqin10khz = bootup_sclk_in10khz;
+ asic_init_ps_v2_1.param.memparam.mclkfreqin10khz = bootup_mclk_in10khz;
+ asic_init_ps_v2_1.param.engineparam.engineflag = b3NORMAL_ENGINE_INIT;
+ if (!fb_reset)
+ asic_init_ps_v2_1.param.memparam.memflag = b3DRAM_SELF_REFRESH_EXIT;
+ else
+ asic_init_ps_v2_1.param.memparam.memflag = 0;
+ } else {
+ return -EINVAL;
+ }
+ } else {
+ return -EINVAL;
+ }
+
+ return amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, (uint32_t *)&asic_init_ps_v2_1,
+ sizeof(asic_init_ps_v2_1));
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
index 751248b253de..649b5530d8ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
@@ -36,9 +36,10 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev);
bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev);
bool amdgpu_atomfirmware_sram_ecc_supported(struct amdgpu_device *adev);
-bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t* i2c_address);
+bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t *i2c_address);
bool amdgpu_atomfirmware_mem_training_supported(struct amdgpu_device *adev);
bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_asic_init(struct amdgpu_device *adev, bool fb_reset);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index 7abe9500c0c6..3893e6fc2f03 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -11,6 +11,7 @@
#include <linux/pci.h>
#include <linux/delay.h>
+#include "amdgpu.h"
#include "amd_acpi.h"
#define AMDGPU_PX_QUIRK_FORCE_ATPX (1 << 0)
@@ -73,28 +74,21 @@ struct atpx_mux {
u16 mux;
} __packed;
-bool amdgpu_has_atpx(void) {
+bool amdgpu_has_atpx(void)
+{
return amdgpu_atpx_priv.atpx_detected;
}
-bool amdgpu_has_atpx_dgpu_power_cntl(void) {
+bool amdgpu_has_atpx_dgpu_power_cntl(void)
+{
return amdgpu_atpx_priv.atpx.functions.power_cntl;
}
-bool amdgpu_is_atpx_hybrid(void) {
+bool amdgpu_is_atpx_hybrid(void)
+{
return amdgpu_atpx_priv.atpx.is_hybrid;
}
-bool amdgpu_atpx_dgpu_req_power_for_displays(void) {
- return amdgpu_atpx_priv.atpx.dgpu_req_power_for_displays;
-}
-
-#if defined(CONFIG_ACPI)
-void *amdgpu_atpx_get_dhandle(void) {
- return amdgpu_atpx_priv.dhandle;
-}
-#endif
-
/**
* amdgpu_atpx_call - call an ATPX method
*
@@ -133,7 +127,7 @@ static union acpi_object *amdgpu_atpx_call(acpi_handle handle, int function,
/* Fail only if calling the method fails and ATPX is supported */
if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
- printk("failed to evaluate ATPX got %s\n",
+ pr_err("failed to evaluate ATPX got %s\n",
acpi_format_exception(status));
kfree(buffer.pointer);
return NULL;
@@ -165,7 +159,7 @@ static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mas
}
/**
- * amdgpu_atpx_validate_functions - validate ATPX functions
+ * amdgpu_atpx_validate - validate ATPX functions
*
* @atpx: amdgpu atpx struct
*
@@ -189,7 +183,7 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
size = *(u16 *) info->buffer.pointer;
if (size < 10) {
- printk("ATPX buffer is too small: %zu\n", size);
+ pr_err("ATPX buffer is too small: %zu\n", size);
kfree(info);
return -EINVAL;
}
@@ -222,11 +216,11 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
atpx->is_hybrid = false;
if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
if (amdgpu_atpx_priv.quirks & AMDGPU_PX_QUIRK_FORCE_ATPX) {
- printk("ATPX Hybrid Graphics, forcing to ATPX\n");
+ pr_warn("ATPX Hybrid Graphics, forcing to ATPX\n");
atpx->functions.power_cntl = true;
atpx->is_hybrid = false;
} else {
- printk("ATPX Hybrid Graphics\n");
+ pr_notice("ATPX Hybrid Graphics\n");
/*
* Disable legacy PM methods only when pcie port PM is usable,
* otherwise the device might fail to power off or power on.
@@ -268,7 +262,7 @@ static int amdgpu_atpx_verify_interface(struct amdgpu_atpx *atpx)
size = *(u16 *) info->buffer.pointer;
if (size < 8) {
- printk("ATPX buffer is too small: %zu\n", size);
+ pr_err("ATPX buffer is too small: %zu\n", size);
err = -EINVAL;
goto out;
}
@@ -277,8 +271,8 @@ static int amdgpu_atpx_verify_interface(struct amdgpu_atpx *atpx)
memcpy(&output, info->buffer.pointer, size);
/* TODO: check version? */
- printk("ATPX version %u, functions 0x%08x\n",
- output.version, output.function_bits);
+ pr_notice("ATPX version %u, functions 0x%08x\n",
+ output.version, output.function_bits);
amdgpu_atpx_parse_functions(&atpx->functions, output.function_bits);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index 313517f7cf10..199693369c7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -29,18 +29,17 @@
#define AMDGPU_BENCHMARK_COMMON_MODES_N 17
static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
- uint64_t saddr, uint64_t daddr, int n)
+ uint64_t saddr, uint64_t daddr, int n, s64 *time_ms)
{
- unsigned long start_jiffies;
- unsigned long end_jiffies;
+ ktime_t stime, etime;
struct dma_fence *fence;
int i, r;
- start_jiffies = jiffies;
+ stime = ktime_get();
for (i = 0; i < n; i++) {
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence,
- false, false, false);
+ false, false, 0);
if (r)
goto exit_do_move;
r = dma_fence_wait(fence, false);
@@ -48,120 +47,81 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
if (r)
goto exit_do_move;
}
- end_jiffies = jiffies;
- r = jiffies_to_msecs(end_jiffies - start_jiffies);
exit_do_move:
+ etime = ktime_get();
+ *time_ms = ktime_ms_delta(etime, stime);
+
return r;
}
-static void amdgpu_benchmark_log_results(int n, unsigned size,
- unsigned int time,
+static void amdgpu_benchmark_log_results(struct amdgpu_device *adev,
+ int n, unsigned size,
+ s64 time_ms,
unsigned sdomain, unsigned ddomain,
char *kind)
{
- unsigned int throughput = (n * (size >> 10)) / time;
- DRM_INFO("amdgpu: %s %u bo moves of %u kB from"
- " %d to %d in %u ms, throughput: %u Mb/s or %u MB/s\n",
- kind, n, size >> 10, sdomain, ddomain, time,
+ s64 throughput = (n * (size >> 10));
+
+ throughput = div64_s64(throughput, time_ms);
+
+ dev_info(adev->dev, "amdgpu: %s %u bo moves of %u kB from"
+ " %d to %d in %lld ms, throughput: %lld Mb/s or %lld MB/s\n",
+ kind, n, size >> 10, sdomain, ddomain, time_ms,
throughput * 8, throughput);
}
-static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
- unsigned sdomain, unsigned ddomain)
+static int amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
+ unsigned sdomain, unsigned ddomain)
{
struct amdgpu_bo *dobj = NULL;
struct amdgpu_bo *sobj = NULL;
- struct amdgpu_bo_param bp;
uint64_t saddr, daddr;
+ s64 time_ms;
int r, n;
- int time;
-
- memset(&bp, 0, sizeof(bp));
- bp.size = size;
- bp.byte_align = PAGE_SIZE;
- bp.domain = sdomain;
- bp.flags = 0;
- bp.type = ttm_bo_type_kernel;
- bp.resv = NULL;
- bp.bo_ptr_size = sizeof(struct amdgpu_bo);
n = AMDGPU_BENCHMARK_ITERATIONS;
- r = amdgpu_bo_create(adev, &bp, &sobj);
- if (r) {
- goto out_cleanup;
- }
- r = amdgpu_bo_reserve(sobj, false);
- if (unlikely(r != 0))
- goto out_cleanup;
- r = amdgpu_bo_pin(sobj, sdomain);
- if (r) {
- amdgpu_bo_unreserve(sobj);
- goto out_cleanup;
- }
- r = amdgpu_ttm_alloc_gart(&sobj->tbo);
- amdgpu_bo_unreserve(sobj);
- if (r) {
- goto out_cleanup;
- }
- saddr = amdgpu_bo_gpu_offset(sobj);
- bp.domain = ddomain;
- r = amdgpu_bo_create(adev, &bp, &dobj);
- if (r) {
- goto out_cleanup;
- }
- r = amdgpu_bo_reserve(dobj, false);
- if (unlikely(r != 0))
- goto out_cleanup;
- r = amdgpu_bo_pin(dobj, ddomain);
- if (r) {
- amdgpu_bo_unreserve(sobj);
+
+ r = amdgpu_bo_create_kernel(adev, size,
+ PAGE_SIZE, sdomain,
+ &sobj,
+ &saddr,
+ NULL);
+ if (r)
goto out_cleanup;
- }
- r = amdgpu_ttm_alloc_gart(&dobj->tbo);
- amdgpu_bo_unreserve(dobj);
- if (r) {
+ r = amdgpu_bo_create_kernel(adev, size,
+ PAGE_SIZE, ddomain,
+ &dobj,
+ &daddr,
+ NULL);
+ if (r)
goto out_cleanup;
- }
- daddr = amdgpu_bo_gpu_offset(dobj);
if (adev->mman.buffer_funcs) {
- time = amdgpu_benchmark_do_move(adev, size, saddr, daddr, n);
- if (time < 0)
+ r = amdgpu_benchmark_do_move(adev, size, saddr, daddr, n, &time_ms);
+ if (r)
goto out_cleanup;
- if (time > 0)
- amdgpu_benchmark_log_results(n, size, time,
+ else
+ amdgpu_benchmark_log_results(adev, n, size, time_ms,
sdomain, ddomain, "dma");
}
out_cleanup:
/* Check error value now. The value can be overwritten when clean up.*/
- if (r) {
- DRM_ERROR("Error while benchmarking BO move.\n");
- }
+ if (r < 0)
+ dev_info(adev->dev, "Error while benchmarking BO move.\n");
- if (sobj) {
- r = amdgpu_bo_reserve(sobj, true);
- if (likely(r == 0)) {
- amdgpu_bo_unpin(sobj);
- amdgpu_bo_unreserve(sobj);
- }
- amdgpu_bo_unref(&sobj);
- }
- if (dobj) {
- r = amdgpu_bo_reserve(dobj, true);
- if (likely(r == 0)) {
- amdgpu_bo_unpin(dobj);
- amdgpu_bo_unreserve(dobj);
- }
- amdgpu_bo_unref(&dobj);
- }
+ if (sobj)
+ amdgpu_bo_free_kernel(&sobj, &saddr, NULL);
+ if (dobj)
+ amdgpu_bo_free_kernel(&dobj, &daddr, NULL);
+ return r;
}
-void amdgpu_benchmark(struct amdgpu_device *adev, int test_number)
+int amdgpu_benchmark(struct amdgpu_device *adev, int test_number)
{
- int i;
+ int i, r;
static const int common_modes[AMDGPU_BENCHMARK_COMMON_MODES_N] = {
640 * 480 * 4,
720 * 480 * 4,
@@ -182,63 +142,119 @@ void amdgpu_benchmark(struct amdgpu_device *adev, int test_number)
1920 * 1200 * 4
};
+ mutex_lock(&adev->benchmark_mutex);
switch (test_number) {
case 1:
+ dev_info(adev->dev,
+ "benchmark test: %d (simple test, VRAM to GTT and GTT to VRAM)\n",
+ test_number);
/* simple test, VRAM to GTT and GTT to VRAM */
- amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_GTT,
- AMDGPU_GEM_DOMAIN_VRAM);
- amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_DOMAIN_GTT);
+ r = amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_GTT,
+ AMDGPU_GEM_DOMAIN_VRAM);
+ if (r)
+ goto done;
+ r = amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_GTT);
+ if (r)
+ goto done;
break;
case 2:
+ dev_info(adev->dev,
+ "benchmark test: %d (simple test, VRAM to VRAM)\n",
+ test_number);
/* simple test, VRAM to VRAM */
- amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_DOMAIN_VRAM);
+ r = amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_VRAM);
+ if (r)
+ goto done;
break;
case 3:
+ dev_info(adev->dev,
+ "benchmark test: %d (GTT to VRAM, buffer size sweep, powers of 2)\n",
+ test_number);
/* GTT to VRAM, buffer size sweep, powers of 2 */
- for (i = 1; i <= 16384; i <<= 1)
- amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT,
- AMDGPU_GEM_DOMAIN_VRAM);
+ for (i = 1; i <= 16384; i <<= 1) {
+ r = amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ AMDGPU_GEM_DOMAIN_VRAM);
+ if (r)
+ goto done;
+ }
break;
case 4:
+ dev_info(adev->dev,
+ "benchmark test: %d (VRAM to GTT, buffer size sweep, powers of 2)\n",
+ test_number);
/* VRAM to GTT, buffer size sweep, powers of 2 */
- for (i = 1; i <= 16384; i <<= 1)
- amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_DOMAIN_GTT);
+ for (i = 1; i <= 16384; i <<= 1) {
+ r = amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_GTT);
+ if (r)
+ goto done;
+ }
break;
case 5:
+ dev_info(adev->dev,
+ "benchmark test: %d (VRAM to VRAM, buffer size sweep, powers of 2)\n",
+ test_number);
/* VRAM to VRAM, buffer size sweep, powers of 2 */
- for (i = 1; i <= 16384; i <<= 1)
- amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_DOMAIN_VRAM);
+ for (i = 1; i <= 16384; i <<= 1) {
+ r = amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_VRAM);
+ if (r)
+ goto done;
+ }
break;
case 6:
+ dev_info(adev->dev,
+ "benchmark test: %d (GTT to VRAM, buffer size sweep, common modes)\n",
+ test_number);
/* GTT to VRAM, buffer size sweep, common modes */
- for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++)
- amdgpu_benchmark_move(adev, common_modes[i],
- AMDGPU_GEM_DOMAIN_GTT,
- AMDGPU_GEM_DOMAIN_VRAM);
+ for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++) {
+ r = amdgpu_benchmark_move(adev, common_modes[i],
+ AMDGPU_GEM_DOMAIN_GTT,
+ AMDGPU_GEM_DOMAIN_VRAM);
+ if (r)
+ goto done;
+ }
break;
case 7:
+ dev_info(adev->dev,
+ "benchmark test: %d (VRAM to GTT, buffer size sweep, common modes)\n",
+ test_number);
/* VRAM to GTT, buffer size sweep, common modes */
- for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++)
- amdgpu_benchmark_move(adev, common_modes[i],
- AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_DOMAIN_GTT);
+ for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++) {
+ r = amdgpu_benchmark_move(adev, common_modes[i],
+ AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_GTT);
+ if (r)
+ goto done;
+ }
break;
case 8:
+ dev_info(adev->dev,
+ "benchmark test: %d (VRAM to VRAM, buffer size sweep, common modes)\n",
+ test_number);
/* VRAM to VRAM, buffer size sweep, common modes */
- for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++)
- amdgpu_benchmark_move(adev, common_modes[i],
+ for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++) {
+ r = amdgpu_benchmark_move(adev, common_modes[i],
AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_DOMAIN_VRAM);
+ if (r)
+ goto done;
+ }
break;
default:
- DRM_ERROR("Unknown benchmark\n");
+ dev_info(adev->dev, "Unknown benchmark %d\n", test_number);
+ r = -EINVAL;
+ break;
}
+
+done:
+ mutex_unlock(&adev->benchmark_mutex);
+
+ return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index 27b19503773b..35d04e69aec0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -29,6 +29,7 @@
#include "amdgpu.h"
#include "atom.h"
+#include <linux/device.h>
#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/acpi.h>
@@ -46,52 +47,63 @@
/* Check if current bios is an ATOM BIOS.
* Return true if it is ATOM BIOS. Otherwise, return false.
*/
-static bool check_atom_bios(uint8_t *bios, size_t size)
+static bool check_atom_bios(struct amdgpu_device *adev, size_t size)
{
uint16_t tmp, bios_header_start;
+ uint8_t *bios = adev->bios;
if (!bios || size < 0x49) {
- DRM_INFO("vbios mem is null or mem size is wrong\n");
+ dev_dbg(adev->dev, "VBIOS mem is null or mem size is wrong\n");
return false;
}
if (!AMD_IS_VALID_VBIOS(bios)) {
- DRM_INFO("BIOS signature incorrect %x %x\n", bios[0], bios[1]);
+ dev_dbg(adev->dev, "VBIOS signature incorrect %x %x\n", bios[0],
+ bios[1]);
return false;
}
bios_header_start = bios[0x48] | (bios[0x49] << 8);
if (!bios_header_start) {
- DRM_INFO("Can't locate bios header\n");
+ dev_dbg(adev->dev, "Can't locate VBIOS header\n");
return false;
}
tmp = bios_header_start + 4;
if (size < tmp) {
- DRM_INFO("BIOS header is broken\n");
+ dev_dbg(adev->dev, "VBIOS header is broken\n");
return false;
}
if (!memcmp(bios + tmp, "ATOM", 4) ||
!memcmp(bios + tmp, "MOTA", 4)) {
- DRM_DEBUG("ATOMBIOS detected\n");
+ dev_dbg(adev->dev, "ATOMBIOS detected\n");
return true;
}
return false;
}
+void amdgpu_bios_release(struct amdgpu_device *adev)
+{
+ kfree(adev->bios);
+ adev->bios = NULL;
+ adev->bios_size = 0;
+}
+
/* If you boot an IGP board with a discrete card as the primary,
* the IGP rom is not accessible via the rom bar as the IGP rom is
* part of the system bios. On boot, the system bios puts a
* copy of the igp rom at the start of vram if a discrete card is
* present.
+ * For SR-IOV, if dynamic critical region is not enabled,
+ * the vbios image is also put at the start of VRAM in the VF.
*/
-static bool igp_read_bios_from_vram(struct amdgpu_device *adev)
+static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev)
{
- uint8_t __iomem *bios;
+ uint8_t __iomem *bios = NULL;
resource_size_t vram_base;
- resource_size_t size = 256 * 1024; /* ??? */
+ u32 size = 256U * 1024U; /* ??? */
if (!(adev->flags & AMD_IS_APU))
if (amdgpu_device_need_post(adev))
@@ -103,22 +115,36 @@ static bool igp_read_bios_from_vram(struct amdgpu_device *adev)
adev->bios = NULL;
vram_base = pci_resource_start(adev->pdev, 0);
- bios = ioremap_wc(vram_base, size);
- if (!bios) {
- return false;
- }
adev->bios = kmalloc(size, GFP_KERNEL);
- if (!adev->bios) {
- iounmap(bios);
+ if (!adev->bios)
return false;
+
+ /* For SRIOV with dynamic critical region is enabled,
+ * the vbios image is put at a dynamic offset of VRAM in the VF.
+ * If dynamic critical region is disabled, follow the existing logic as on baremetal.
+ */
+ if (amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled) {
+ if (amdgpu_virt_get_dynamic_data_info(adev,
+ AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID, adev->bios, &size)) {
+ amdgpu_bios_release(adev);
+ return false;
+ }
+ } else {
+ bios = ioremap_wc(vram_base, size);
+ if (!bios) {
+ amdgpu_bios_release(adev);
+ return false;
+ }
+
+ memcpy_fromio(adev->bios, bios, size);
+ iounmap(bios);
}
+
adev->bios_size = size;
- memcpy_fromio(adev->bios, bios, size);
- iounmap(bios);
- if (!check_atom_bios(adev->bios, size)) {
- kfree(adev->bios);
+ if (!check_atom_bios(adev, size)) {
+ amdgpu_bios_release(adev);
return false;
}
@@ -133,9 +159,8 @@ bool amdgpu_read_bios(struct amdgpu_device *adev)
adev->bios = NULL;
/* XXX: some cards may return 0 for rom size? ddx has a workaround */
bios = pci_map_rom(adev->pdev, &size);
- if (!bios) {
+ if (!bios)
return false;
- }
adev->bios = kzalloc(size, GFP_KERNEL);
if (adev->bios == NULL) {
@@ -146,8 +171,8 @@ bool amdgpu_read_bios(struct amdgpu_device *adev)
memcpy_fromio(adev->bios, bios, size);
pci_unmap_rom(adev->pdev, bios);
- if (!check_atom_bios(adev->bios, size)) {
- kfree(adev->bios);
+ if (!check_atom_bios(adev, size)) {
+ amdgpu_bios_release(adev);
return false;
}
@@ -168,9 +193,9 @@ static bool amdgpu_read_bios_from_rom(struct amdgpu_device *adev)
header[AMD_VBIOS_SIGNATURE_END] = 0;
if ((!AMD_IS_VALID_VBIOS(header)) ||
- 0 != memcmp((char *)&header[AMD_VBIOS_SIGNATURE_OFFSET],
- AMD_VBIOS_SIGNATURE,
- strlen(AMD_VBIOS_SIGNATURE)))
+ memcmp((char *)&header[AMD_VBIOS_SIGNATURE_OFFSET],
+ AMD_VBIOS_SIGNATURE,
+ strlen(AMD_VBIOS_SIGNATURE)) != 0)
return false;
/* valid vbios, go on */
@@ -186,8 +211,8 @@ static bool amdgpu_read_bios_from_rom(struct amdgpu_device *adev)
/* read complete BIOS */
amdgpu_asic_read_bios_from_rom(adev, adev->bios, len);
- if (!check_atom_bios(adev->bios, len)) {
- kfree(adev->bios);
+ if (!check_atom_bios(adev, len)) {
+ amdgpu_bios_release(adev);
return false;
}
@@ -216,14 +241,15 @@ static bool amdgpu_read_platform_bios(struct amdgpu_device *adev)
memcpy_fromio(adev->bios, bios, romlen);
iounmap(bios);
- if (!check_atom_bios(adev->bios, romlen))
+ if (!check_atom_bios(adev, romlen))
goto free_bios;
adev->bios_size = romlen;
return true;
free_bios:
- kfree(adev->bios);
+ amdgpu_bios_release(adev);
+
return false;
}
@@ -264,7 +290,7 @@ static int amdgpu_atrm_call(acpi_handle atrm_handle, uint8_t *bios,
status = acpi_evaluate_object(atrm_handle, NULL, &atrm_arg, &buffer);
if (ACPI_FAILURE(status)) {
- printk("failed to evaluate ATRM got %s\n", acpi_format_exception(status));
+ DRM_ERROR("failed to evaluate ATRM got %s\n", acpi_format_exception(status));
return -ENODEV;
}
@@ -285,11 +311,15 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
acpi_status status;
bool found = false;
- /* ATRM is for the discrete card only */
- if (adev->flags & AMD_IS_APU)
+ /* ATRM is for on-platform devices only */
+ if (dev_is_removable(&adev->pdev->dev))
return false;
- while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
+ while ((pdev = pci_get_base_class(PCI_BASE_CLASS_DISPLAY, pdev))) {
+ if ((pdev->class != PCI_CLASS_DISPLAY_VGA << 8) &&
+ (pdev->class != PCI_CLASS_DISPLAY_OTHER << 8))
+ continue;
+
dhandle = ACPI_HANDLE(&pdev->dev);
if (!dhandle)
continue;
@@ -301,22 +331,9 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
}
}
- if (!found) {
- while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) {
- dhandle = ACPI_HANDLE(&pdev->dev);
- if (!dhandle)
- continue;
-
- status = acpi_get_handle(dhandle, "ATRM", &atrm_handle);
- if (ACPI_SUCCESS(status)) {
- found = true;
- break;
- }
- }
- }
-
if (!found)
return false;
+ pci_dev_put(pdev);
adev->bios = kmalloc(size, GFP_KERNEL);
if (!adev->bios) {
@@ -333,8 +350,8 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
break;
}
- if (!check_atom_bios(adev->bios, size)) {
- kfree(adev->bios);
+ if (!check_atom_bios(adev, size)) {
+ amdgpu_bios_release(adev);
return false;
}
adev->bios_size = size;
@@ -349,11 +366,8 @@ static inline bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
static bool amdgpu_read_disabled_bios(struct amdgpu_device *adev)
{
- if (adev->flags & AMD_IS_APU)
- return igp_read_bios_from_vram(adev);
- else
- return (!adev->asic_funcs || !adev->asic_funcs->read_disabled_bios) ?
- false : amdgpu_asic_read_disabled_bios(adev);
+ return (!adev->asic_funcs || !adev->asic_funcs->read_disabled_bios) ?
+ false : amdgpu_asic_read_disabled_bios(adev);
}
#ifdef CONFIG_ACPI
@@ -362,7 +376,7 @@ static bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev)
struct acpi_table_header *hdr;
acpi_size tbl_size;
UEFI_ACPI_VFCT *vfct;
- unsigned offset;
+ unsigned int offset;
if (!ACPI_SUCCESS(acpi_get_table("VFCT", 1, &hdr)))
return false;
@@ -401,8 +415,8 @@ static bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev)
vhdr->ImageLength,
GFP_KERNEL);
- if (!check_atom_bios(adev->bios, vhdr->ImageLength)) {
- kfree(adev->bios);
+ if (!check_atom_bios(adev, vhdr->ImageLength)) {
+ amdgpu_bios_release(adev);
return false;
}
adev->bios_size = vhdr->ImageLength;
@@ -420,7 +434,43 @@ static inline bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev)
}
#endif
-bool amdgpu_get_bios(struct amdgpu_device *adev)
+static bool amdgpu_get_bios_apu(struct amdgpu_device *adev)
+{
+ if (amdgpu_acpi_vfct_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from VFCT\n");
+ goto success;
+ }
+
+ if (amdgpu_read_bios_from_vram(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from VRAM BAR\n");
+ goto success;
+ }
+
+ if (amdgpu_read_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n");
+ goto success;
+ }
+
+ if (amdgpu_read_platform_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from platform\n");
+ goto success;
+ }
+
+ dev_err(adev->dev, "Unable to locate a BIOS ROM\n");
+ return false;
+
+success:
+ return true;
+}
+
+static bool amdgpu_prefer_rom_resource(struct amdgpu_device *adev)
+{
+ struct resource *res = &adev->pdev->resource[PCI_ROM_RESOURCE];
+
+ return (res->flags & IORESOURCE_ROM_SHADOW);
+}
+
+static bool amdgpu_get_bios_dgpu(struct amdgpu_device *adev)
{
if (amdgpu_atrm_get_bios(adev)) {
dev_info(adev->dev, "Fetched VBIOS from ATRM\n");
@@ -432,14 +482,33 @@ bool amdgpu_get_bios(struct amdgpu_device *adev)
goto success;
}
- if (igp_read_bios_from_vram(adev)) {
+ /* this is required for SR-IOV */
+ if (amdgpu_read_bios_from_vram(adev)) {
dev_info(adev->dev, "Fetched VBIOS from VRAM BAR\n");
goto success;
}
- if (amdgpu_read_bios(adev)) {
- dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n");
- goto success;
+ if (amdgpu_prefer_rom_resource(adev)) {
+ if (amdgpu_read_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n");
+ goto success;
+ }
+
+ if (amdgpu_read_platform_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from platform\n");
+ goto success;
+ }
+
+ } else {
+ if (amdgpu_read_platform_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from platform\n");
+ goto success;
+ }
+
+ if (amdgpu_read_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n");
+ goto success;
+ }
}
if (amdgpu_read_bios_from_rom(adev)) {
@@ -452,15 +521,71 @@ bool amdgpu_get_bios(struct amdgpu_device *adev)
goto success;
}
- if (amdgpu_read_platform_bios(adev)) {
- dev_info(adev->dev, "Fetched VBIOS from platform\n");
- goto success;
- }
-
dev_err(adev->dev, "Unable to locate a BIOS ROM\n");
return false;
success:
- adev->is_atom_fw = (adev->asic_type >= CHIP_VEGA10) ? true : false;
+ return true;
+}
+
+bool amdgpu_get_bios(struct amdgpu_device *adev)
+{
+ bool found;
+
+ if (adev->flags & AMD_IS_APU)
+ found = amdgpu_get_bios_apu(adev);
+ else
+ found = amdgpu_get_bios_dgpu(adev);
+
+ if (found)
+ adev->is_atom_fw = adev->asic_type >= CHIP_VEGA10;
+
+ return found;
+}
+
+/* helper function for soc15 and onwards to read bios from rom */
+bool amdgpu_soc15_read_bios_from_rom(struct amdgpu_device *adev,
+ u8 *bios, u32 length_bytes)
+{
+ u32 *dw_ptr;
+ u32 i, length_dw;
+ u32 rom_offset;
+ u32 rom_index_offset;
+ u32 rom_data_offset;
+
+ if (bios == NULL)
+ return false;
+ if (length_bytes == 0)
+ return false;
+ /* APU vbios image is part of sbios image */
+ if (adev->flags & AMD_IS_APU)
+ return false;
+ if (!adev->smuio.funcs ||
+ !adev->smuio.funcs->get_rom_index_offset ||
+ !adev->smuio.funcs->get_rom_data_offset)
+ return false;
+
+ dw_ptr = (u32 *)bios;
+ length_dw = ALIGN(length_bytes, 4) / 4;
+
+ rom_index_offset =
+ adev->smuio.funcs->get_rom_index_offset(adev);
+ rom_data_offset =
+ adev->smuio.funcs->get_rom_data_offset(adev);
+
+ if (adev->nbio.funcs &&
+ adev->nbio.funcs->get_rom_offset) {
+ rom_offset = adev->nbio.funcs->get_rom_offset(adev);
+ rom_offset = rom_offset << 17;
+ } else {
+ rom_offset = 0;
+ }
+
+ /* set rom index to rom_offset */
+ WREG32(rom_index_offset, rom_offset);
+ /* read out the rom data */
+ for (i = 0; i < length_dw; i++)
+ dw_ptr[i] = RREG32(rom_data_offset);
+
return true;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 714178f1b6c6..66fb37b64388 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -28,6 +28,7 @@
* Christian König <deathsimple@vodafone.de>
*/
+#include <linux/sort.h>
#include <linux/uaccess.h>
#include "amdgpu.h"
@@ -40,7 +41,7 @@ static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu)
{
struct amdgpu_bo_list *list = container_of(rcu, struct amdgpu_bo_list,
rhead);
-
+ mutex_destroy(&list->bo_list_mutex);
kvfree(list);
}
@@ -50,13 +51,20 @@ static void amdgpu_bo_list_free(struct kref *ref)
refcount);
struct amdgpu_bo_list_entry *e;
- amdgpu_bo_list_for_each_entry(e, list) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+ amdgpu_bo_list_for_each_entry(e, list)
+ amdgpu_bo_unref(&e->bo);
+ call_rcu(&list->rhead, amdgpu_bo_list_free_rcu);
+}
- amdgpu_bo_unref(&bo);
- }
+static int amdgpu_bo_list_entry_cmp(const void *_a, const void *_b)
+{
+ const struct amdgpu_bo_list_entry *a = _a, *b = _b;
- call_rcu(&list->rhead, amdgpu_bo_list_free_rcu);
+ if (a->priority > b->priority)
+ return 1;
+ if (a->priority < b->priority)
+ return -1;
+ return 0;
}
int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
@@ -67,27 +75,17 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
struct amdgpu_bo_list_entry *array;
struct amdgpu_bo_list *list;
uint64_t total_size = 0;
- size_t size;
unsigned i;
int r;
- if (num_entries > (SIZE_MAX - sizeof(struct amdgpu_bo_list))
- / sizeof(struct amdgpu_bo_list_entry))
- return -EINVAL;
-
- size = sizeof(struct amdgpu_bo_list);
- size += num_entries * sizeof(struct amdgpu_bo_list_entry);
- list = kvmalloc(size, GFP_KERNEL);
+ list = kvzalloc(struct_size(list, entries, num_entries), GFP_KERNEL);
if (!list)
return -ENOMEM;
kref_init(&list->refcount);
- list->gds_obj = NULL;
- list->gws_obj = NULL;
- list->oa_obj = NULL;
- array = amdgpu_bo_list_array_entry(list, 0);
- memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry));
+ list->num_entries = num_entries;
+ array = list->entries;
for (i = 0; i < num_entries; ++i) {
struct amdgpu_bo_list_entry *entry;
@@ -118,7 +116,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
entry->priority = min(info[i].bo_priority,
AMDGPU_BO_LIST_MAX_PRIORITY);
- entry->tv.bo = &bo->tbo;
+ entry->bo = bo;
if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GDS)
list->gds_obj = bo;
@@ -132,24 +130,20 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
}
list->first_userptr = first_userptr;
- list->num_entries = num_entries;
+ sort(array, last_entry, sizeof(struct amdgpu_bo_list_entry),
+ amdgpu_bo_list_entry_cmp, NULL);
trace_amdgpu_cs_bo_status(list->num_entries, total_size);
+ mutex_init(&list->bo_list_mutex);
*result = list;
return 0;
error_free:
- for (i = 0; i < last_entry; ++i) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo);
-
- amdgpu_bo_unref(&bo);
- }
- for (i = first_userptr; i < num_entries; ++i) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo);
-
- amdgpu_bo_unref(&bo);
- }
+ for (i = 0; i < last_entry; ++i)
+ amdgpu_bo_unref(&array[i].bo);
+ for (i = first_userptr; i < num_entries; ++i)
+ amdgpu_bo_unref(&array[i].bo);
kvfree(list);
return r;
@@ -178,43 +172,10 @@ int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
}
rcu_read_unlock();
+ *result = NULL;
return -ENOENT;
}
-void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
- struct list_head *validated)
-{
- /* This is based on the bucket sort with O(n) time complexity.
- * An item with priority "i" is added to bucket[i]. The lists are then
- * concatenated in descending order.
- */
- struct list_head bucket[AMDGPU_BO_LIST_NUM_BUCKETS];
- struct amdgpu_bo_list_entry *e;
- unsigned i;
-
- for (i = 0; i < AMDGPU_BO_LIST_NUM_BUCKETS; i++)
- INIT_LIST_HEAD(&bucket[i]);
-
- /* Since buffers which appear sooner in the relocation list are
- * likely to be used more often than buffers which appear later
- * in the list, the sort mustn't change the ordering of buffers
- * with the same priority, i.e. it must be stable.
- */
- amdgpu_bo_list_for_each_entry(e, list) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
- unsigned priority = e->priority;
-
- if (!bo->parent)
- list_add_tail(&e->tv.head, &bucket[priority]);
-
- e->user_pages = NULL;
- }
-
- /* Connect the sorted buckets in the output list. */
- for (i = 0; i < AMDGPU_BO_LIST_NUM_BUCKETS; i++)
- list_splice(&bucket[i], validated);
-}
-
void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
{
kref_put(&list->refcount, amdgpu_bo_list_free);
@@ -223,43 +184,36 @@ void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
struct drm_amdgpu_bo_list_entry **info_param)
{
- const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
+ const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
+ const uint32_t bo_info_size = in->bo_info_size;
+ const uint32_t bo_number = in->bo_number;
struct drm_amdgpu_bo_list_entry *info;
- int r;
-
- info = kvmalloc_array(in->bo_number, info_size, GFP_KERNEL);
- if (!info)
- return -ENOMEM;
/* copy the handle array from userspace to a kernel buffer */
- r = -EFAULT;
- if (likely(info_size == in->bo_info_size)) {
- unsigned long bytes = in->bo_number *
- in->bo_info_size;
-
- if (copy_from_user(info, uptr, bytes))
- goto error_free;
-
+ if (likely(info_size == bo_info_size)) {
+ info = vmemdup_array_user(uptr, bo_number, info_size);
+ if (IS_ERR(info))
+ return PTR_ERR(info);
} else {
- unsigned long bytes = min(in->bo_info_size, info_size);
+ const uint32_t bytes = min(bo_info_size, info_size);
unsigned i;
- memset(info, 0, in->bo_number * info_size);
- for (i = 0; i < in->bo_number; ++i) {
- if (copy_from_user(&info[i], uptr, bytes))
- goto error_free;
+ info = kvmalloc_array(bo_number, info_size, GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
- uptr += in->bo_info_size;
+ memset(info, 0, bo_number * info_size);
+ for (i = 0; i < bo_number; ++i, uptr += bo_info_size) {
+ if (copy_from_user(&info[i], uptr, bytes)) {
+ kvfree(info);
+ return -EFAULT;
+ }
}
}
*info_param = info;
return 0;
-
-error_free:
- kvfree(info);
- return r;
}
int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index 044b41f0bfd9..2b5e7c46a39d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -23,20 +23,22 @@
#ifndef __AMDGPU_BO_LIST_H__
#define __AMDGPU_BO_LIST_H__
-#include <drm/ttm/ttm_execbuf_util.h>
#include <drm/amdgpu_drm.h>
+struct hmm_range;
+
+struct drm_file;
+
struct amdgpu_device;
struct amdgpu_bo;
struct amdgpu_bo_va;
struct amdgpu_fpriv;
struct amdgpu_bo_list_entry {
- struct ttm_validate_buffer tv;
+ struct amdgpu_bo *bo;
struct amdgpu_bo_va *bo_va;
- struct dma_fence_chain *chain;
uint32_t priority;
- struct page **user_pages;
+ struct amdgpu_hmm_range *range;
bool user_invalidated;
};
@@ -48,12 +50,16 @@ struct amdgpu_bo_list {
struct amdgpu_bo *oa_obj;
unsigned first_userptr;
unsigned num_entries;
+
+ /* Protect access during command submission.
+ */
+ struct mutex bo_list_mutex;
+
+ struct amdgpu_bo_list_entry entries[] __counted_by(num_entries);
};
int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
struct amdgpu_bo_list **result);
-void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
- struct list_head *validated);
void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
struct drm_amdgpu_bo_list_entry **info_param);
@@ -64,22 +70,14 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev,
size_t num_entries,
struct amdgpu_bo_list **list);
-static inline struct amdgpu_bo_list_entry *
-amdgpu_bo_list_array_entry(struct amdgpu_bo_list *list, unsigned index)
-{
- struct amdgpu_bo_list_entry *array = (void *)&list[1];
-
- return &array[index];
-}
-
#define amdgpu_bo_list_for_each_entry(e, list) \
- for (e = amdgpu_bo_list_array_entry(list, 0); \
- e != amdgpu_bo_list_array_entry(list, (list)->num_entries); \
+ for (e = list->entries; \
+ e != &list->entries[list->num_entries]; \
++e)
#define amdgpu_bo_list_for_each_userptr_entry(e, list) \
- for (e = amdgpu_bo_list_array_entry(list, (list)->first_userptr); \
- e != amdgpu_bo_list_array_entry(list, (list)->num_entries); \
+ for (e = &list->entries[list->first_userptr]; \
+ e != &list->entries[list->num_entries]; \
++e)
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index f1a050379190..004a6a9d6b9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -41,13 +41,13 @@ struct amdgpu_cgs_device {
((struct amdgpu_cgs_device *)cgs_device)->adev
-static uint32_t amdgpu_cgs_read_register(struct cgs_device *cgs_device, unsigned offset)
+static uint32_t amdgpu_cgs_read_register(struct cgs_device *cgs_device, unsigned int offset)
{
CGS_FUNC_ADEV;
return RREG32(offset);
}
-static void amdgpu_cgs_write_register(struct cgs_device *cgs_device, unsigned offset,
+static void amdgpu_cgs_write_register(struct cgs_device *cgs_device, unsigned int offset,
uint32_t value)
{
CGS_FUNC_ADEV;
@@ -56,7 +56,7 @@ static void amdgpu_cgs_write_register(struct cgs_device *cgs_device, unsigned of
static uint32_t amdgpu_cgs_read_ind_register(struct cgs_device *cgs_device,
enum cgs_ind_reg space,
- unsigned index)
+ unsigned int index)
{
CGS_FUNC_ADEV;
switch (space) {
@@ -84,7 +84,7 @@ static uint32_t amdgpu_cgs_read_ind_register(struct cgs_device *cgs_device,
static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device,
enum cgs_ind_reg space,
- unsigned index, uint32_t value)
+ unsigned int index, uint32_t value)
{
CGS_FUNC_ADEV;
switch (space) {
@@ -163,38 +163,38 @@ static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device,
uint16_t fw_version = 0;
switch (type) {
- case CGS_UCODE_ID_SDMA0:
- fw_version = adev->sdma.instance[0].fw_version;
- break;
- case CGS_UCODE_ID_SDMA1:
- fw_version = adev->sdma.instance[1].fw_version;
- break;
- case CGS_UCODE_ID_CP_CE:
- fw_version = adev->gfx.ce_fw_version;
- break;
- case CGS_UCODE_ID_CP_PFP:
- fw_version = adev->gfx.pfp_fw_version;
- break;
- case CGS_UCODE_ID_CP_ME:
- fw_version = adev->gfx.me_fw_version;
- break;
- case CGS_UCODE_ID_CP_MEC:
- fw_version = adev->gfx.mec_fw_version;
- break;
- case CGS_UCODE_ID_CP_MEC_JT1:
- fw_version = adev->gfx.mec_fw_version;
- break;
- case CGS_UCODE_ID_CP_MEC_JT2:
- fw_version = adev->gfx.mec_fw_version;
- break;
- case CGS_UCODE_ID_RLC_G:
- fw_version = adev->gfx.rlc_fw_version;
- break;
- case CGS_UCODE_ID_STORAGE:
- break;
- default:
- DRM_ERROR("firmware type %d do not have version\n", type);
- break;
+ case CGS_UCODE_ID_SDMA0:
+ fw_version = adev->sdma.instance[0].fw_version;
+ break;
+ case CGS_UCODE_ID_SDMA1:
+ fw_version = adev->sdma.instance[1].fw_version;
+ break;
+ case CGS_UCODE_ID_CP_CE:
+ fw_version = adev->gfx.ce_fw_version;
+ break;
+ case CGS_UCODE_ID_CP_PFP:
+ fw_version = adev->gfx.pfp_fw_version;
+ break;
+ case CGS_UCODE_ID_CP_ME:
+ fw_version = adev->gfx.me_fw_version;
+ break;
+ case CGS_UCODE_ID_CP_MEC:
+ fw_version = adev->gfx.mec_fw_version;
+ break;
+ case CGS_UCODE_ID_CP_MEC_JT1:
+ fw_version = adev->gfx.mec_fw_version;
+ break;
+ case CGS_UCODE_ID_CP_MEC_JT2:
+ fw_version = adev->gfx.mec_fw_version;
+ break;
+ case CGS_UCODE_ID_RLC_G:
+ fw_version = adev->gfx.rlc_fw_version;
+ break;
+ case CGS_UCODE_ID_STORAGE:
+ break;
+ default:
+ DRM_ERROR("firmware type %d do not have version\n", type);
+ break;
}
return fw_version;
}
@@ -205,7 +205,7 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
{
CGS_FUNC_ADEV;
- if ((CGS_UCODE_ID_SMU != type) && (CGS_UCODE_ID_SMU_SK != type)) {
+ if (type != CGS_UCODE_ID_SMU && type != CGS_UCODE_ID_SMU_SK) {
uint64_t gpu_addr;
uint32_t data_size;
const struct gfx_firmware_header_v1_0 *header;
@@ -213,6 +213,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
struct amdgpu_firmware_info *ucode;
id = fw_type_convert(cgs_device, type);
+ if (id >= AMDGPU_UCODE_ID_MAXIMUM)
+ return -EINVAL;
+
ucode = &adev->firmware.ucode[id];
if (ucode->fw == NULL)
return -EINVAL;
@@ -232,7 +235,7 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
info->mc_addr = gpu_addr;
info->version = (uint16_t)le32_to_cpu(header->header.ucode_version);
- if (CGS_UCODE_ID_CP_MEC == type)
+ if (type == CGS_UCODE_ID_CP_MEC)
info->image_size = le32_to_cpu(header->jt_offset) << 2;
info->fw_version = amdgpu_get_firmware_version(cgs_device, type);
@@ -249,83 +252,22 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
if (!adev->pm.fw) {
switch (adev->asic_type) {
- case CHIP_TAHITI:
- strcpy(fw_name, "radeon/tahiti_smc.bin");
- break;
- case CHIP_PITCAIRN:
- if ((adev->pdev->revision == 0x81) &&
- ((adev->pdev->device == 0x6810) ||
- (adev->pdev->device == 0x6811))) {
- info->is_kicker = true;
- strcpy(fw_name, "radeon/pitcairn_k_smc.bin");
- } else {
- strcpy(fw_name, "radeon/pitcairn_smc.bin");
- }
- break;
- case CHIP_VERDE:
- if (((adev->pdev->device == 0x6820) &&
- ((adev->pdev->revision == 0x81) ||
- (adev->pdev->revision == 0x83))) ||
- ((adev->pdev->device == 0x6821) &&
- ((adev->pdev->revision == 0x83) ||
- (adev->pdev->revision == 0x87))) ||
- ((adev->pdev->revision == 0x87) &&
- ((adev->pdev->device == 0x6823) ||
- (adev->pdev->device == 0x682b)))) {
- info->is_kicker = true;
- strcpy(fw_name, "radeon/verde_k_smc.bin");
- } else {
- strcpy(fw_name, "radeon/verde_smc.bin");
- }
- break;
- case CHIP_OLAND:
- if (((adev->pdev->revision == 0x81) &&
- ((adev->pdev->device == 0x6600) ||
- (adev->pdev->device == 0x6604) ||
- (adev->pdev->device == 0x6605) ||
- (adev->pdev->device == 0x6610))) ||
- ((adev->pdev->revision == 0x83) &&
- (adev->pdev->device == 0x6610))) {
- info->is_kicker = true;
- strcpy(fw_name, "radeon/oland_k_smc.bin");
- } else {
- strcpy(fw_name, "radeon/oland_smc.bin");
- }
- break;
- case CHIP_HAINAN:
- if (((adev->pdev->revision == 0x81) &&
- (adev->pdev->device == 0x6660)) ||
- ((adev->pdev->revision == 0x83) &&
- ((adev->pdev->device == 0x6660) ||
- (adev->pdev->device == 0x6663) ||
- (adev->pdev->device == 0x6665) ||
- (adev->pdev->device == 0x6667)))) {
- info->is_kicker = true;
- strcpy(fw_name, "radeon/hainan_k_smc.bin");
- } else if ((adev->pdev->revision == 0xc3) &&
- (adev->pdev->device == 0x6665)) {
- info->is_kicker = true;
- strcpy(fw_name, "radeon/banks_k_2_smc.bin");
- } else {
- strcpy(fw_name, "radeon/hainan_smc.bin");
- }
- break;
case CHIP_BONAIRE:
if ((adev->pdev->revision == 0x80) ||
(adev->pdev->revision == 0x81) ||
(adev->pdev->device == 0x665f)) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/bonaire_k_smc.bin");
+ strscpy(fw_name, "amdgpu/bonaire_k_smc.bin");
} else {
- strcpy(fw_name, "amdgpu/bonaire_smc.bin");
+ strscpy(fw_name, "amdgpu/bonaire_smc.bin");
}
break;
case CHIP_HAWAII:
if (adev->pdev->revision == 0x80) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/hawaii_k_smc.bin");
+ strscpy(fw_name, "amdgpu/hawaii_k_smc.bin");
} else {
- strcpy(fw_name, "amdgpu/hawaii_smc.bin");
+ strscpy(fw_name, "amdgpu/hawaii_smc.bin");
}
break;
case CHIP_TOPAZ:
@@ -335,93 +277,88 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD1)) ||
((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD3))) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/topaz_k_smc.bin");
+ strscpy(fw_name, "amdgpu/topaz_k_smc.bin");
} else
- strcpy(fw_name, "amdgpu/topaz_smc.bin");
+ strscpy(fw_name, "amdgpu/topaz_smc.bin");
break;
case CHIP_TONGA:
if (((adev->pdev->device == 0x6939) && (adev->pdev->revision == 0xf1)) ||
((adev->pdev->device == 0x6938) && (adev->pdev->revision == 0xf1))) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/tonga_k_smc.bin");
+ strscpy(fw_name, "amdgpu/tonga_k_smc.bin");
} else
- strcpy(fw_name, "amdgpu/tonga_smc.bin");
+ strscpy(fw_name, "amdgpu/tonga_smc.bin");
break;
case CHIP_FIJI:
- strcpy(fw_name, "amdgpu/fiji_smc.bin");
+ strscpy(fw_name, "amdgpu/fiji_smc.bin");
break;
case CHIP_POLARIS11:
if (type == CGS_UCODE_ID_SMU) {
if (ASICID_IS_P21(adev->pdev->device, adev->pdev->revision)) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/polaris11_k_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris11_k_smc.bin");
} else if (ASICID_IS_P31(adev->pdev->device, adev->pdev->revision)) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/polaris11_k2_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris11_k2_smc.bin");
} else {
- strcpy(fw_name, "amdgpu/polaris11_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris11_smc.bin");
}
} else if (type == CGS_UCODE_ID_SMU_SK) {
- strcpy(fw_name, "amdgpu/polaris11_smc_sk.bin");
+ strscpy(fw_name, "amdgpu/polaris11_smc_sk.bin");
}
break;
case CHIP_POLARIS10:
if (type == CGS_UCODE_ID_SMU) {
if (ASICID_IS_P20(adev->pdev->device, adev->pdev->revision)) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/polaris10_k_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris10_k_smc.bin");
} else if (ASICID_IS_P30(adev->pdev->device, adev->pdev->revision)) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/polaris10_k2_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris10_k2_smc.bin");
} else {
- strcpy(fw_name, "amdgpu/polaris10_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris10_smc.bin");
}
} else if (type == CGS_UCODE_ID_SMU_SK) {
- strcpy(fw_name, "amdgpu/polaris10_smc_sk.bin");
+ strscpy(fw_name, "amdgpu/polaris10_smc_sk.bin");
}
break;
case CHIP_POLARIS12:
if (ASICID_IS_P23(adev->pdev->device, adev->pdev->revision)) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/polaris12_k_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris12_k_smc.bin");
} else {
- strcpy(fw_name, "amdgpu/polaris12_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris12_smc.bin");
}
break;
case CHIP_VEGAM:
- strcpy(fw_name, "amdgpu/vegam_smc.bin");
+ strscpy(fw_name, "amdgpu/vegam_smc.bin");
break;
case CHIP_VEGA10:
if ((adev->pdev->device == 0x687f) &&
((adev->pdev->revision == 0xc0) ||
(adev->pdev->revision == 0xc1) ||
(adev->pdev->revision == 0xc3)))
- strcpy(fw_name, "amdgpu/vega10_acg_smc.bin");
+ strscpy(fw_name, "amdgpu/vega10_acg_smc.bin");
else
- strcpy(fw_name, "amdgpu/vega10_smc.bin");
+ strscpy(fw_name, "amdgpu/vega10_smc.bin");
break;
case CHIP_VEGA12:
- strcpy(fw_name, "amdgpu/vega12_smc.bin");
+ strscpy(fw_name, "amdgpu/vega12_smc.bin");
break;
case CHIP_VEGA20:
- strcpy(fw_name, "amdgpu/vega20_smc.bin");
+ strscpy(fw_name, "amdgpu/vega20_smc.bin");
break;
default:
DRM_ERROR("SMC firmware not supported\n");
return -EINVAL;
}
- err = request_firmware(&adev->pm.fw, fw_name, adev->dev);
- if (err) {
- DRM_ERROR("Failed to request firmware\n");
- return err;
- }
-
- err = amdgpu_ucode_validate(adev->pm.fw);
+ err = amdgpu_ucode_request(adev, &adev->pm.fw,
+ AMDGPU_UCODE_REQUIRED,
+ "%s", fw_name);
if (err) {
DRM_ERROR("Failed to load firmware \"%s\"", fw_name);
- release_firmware(adev->pm.fw);
- adev->pm.fw = NULL;
+ amdgpu_ucode_release(&adev->pm.fw);
return err;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 0de66f59adb8..9f96d568acf2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -24,9 +24,10 @@
* Alex Deucher
*/
+#include <drm/display/drm_dp_helper.h>
+#include <drm/drm_crtc_helper.h>
#include <drm/drm_edid.h>
-#include <drm/drm_fb_helper.h>
-#include <drm/drm_dp_helper.h>
+#include <drm/drm_modeset_helper_vtables.h>
#include <drm/drm_probe_helper.h>
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
@@ -102,13 +103,13 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector)
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
struct amdgpu_connector_atom_dig *dig_connector;
int bpc = 8;
- unsigned mode_clock, max_tmds_clock;
+ unsigned int mode_clock, max_tmds_clock;
switch (connector->connector_type) {
case DRM_MODE_CONNECTOR_DVII:
case DRM_MODE_CONNECTOR_HDMIB:
if (amdgpu_connector->use_digital) {
- if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) {
+ if (connector->display_info.is_hdmi) {
if (connector->display_info.bpc)
bpc = connector->display_info.bpc;
}
@@ -116,7 +117,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector)
break;
case DRM_MODE_CONNECTOR_DVID:
case DRM_MODE_CONNECTOR_HDMIA:
- if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) {
+ if (connector->display_info.is_hdmi) {
if (connector->display_info.bpc)
bpc = connector->display_info.bpc;
}
@@ -125,7 +126,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector)
dig_connector = amdgpu_connector->con_priv;
if ((dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) ||
(dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP) ||
- drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) {
+ connector->display_info.is_hdmi) {
if (connector->display_info.bpc)
bpc = connector->display_info.bpc;
}
@@ -149,7 +150,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector)
break;
}
- if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) {
+ if (connector->display_info.is_hdmi) {
/*
* Pre DCE-8 hw can't handle > 12 bpc, and more than 12 bpc doesn't make
* much sense without support for > 12 bpc framebuffers. RGB 4:4:4 at
@@ -175,7 +176,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector)
/* Check if bpc is within clock limit. Try to degrade gracefully otherwise */
if ((bpc == 12) && (mode_clock * 3/2 > max_tmds_clock)) {
- if ((connector->display_info.edid_hdmi_dc_modes & DRM_EDID_HDMI_DC_30) &&
+ if ((connector->display_info.edid_hdmi_rgb444_dc_modes & DRM_EDID_HDMI_DC_30) &&
(mode_clock * 5/4 <= max_tmds_clock))
bpc = 10;
else
@@ -245,36 +246,10 @@ amdgpu_connector_find_encoder(struct drm_connector *connector,
return NULL;
}
-struct edid *amdgpu_connector_edid(struct drm_connector *connector)
-{
- struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
- struct drm_property_blob *edid_blob = connector->edid_blob_ptr;
-
- if (amdgpu_connector->edid) {
- return amdgpu_connector->edid;
- } else if (edid_blob) {
- struct edid *edid = kmemdup(edid_blob->data, edid_blob->length, GFP_KERNEL);
- if (edid)
- amdgpu_connector->edid = edid;
- }
- return amdgpu_connector->edid;
-}
-
static struct edid *
amdgpu_connector_get_hardcoded_edid(struct amdgpu_device *adev)
{
- struct edid *edid;
-
- if (adev->mode_info.bios_hardcoded_edid) {
- edid = kmalloc(adev->mode_info.bios_hardcoded_edid_size, GFP_KERNEL);
- if (edid) {
- memcpy((unsigned char *)edid,
- (unsigned char *)adev->mode_info.bios_hardcoded_edid,
- adev->mode_info.bios_hardcoded_edid_size);
- return edid;
- }
- }
- return NULL;
+ return drm_edid_duplicate(drm_edid_raw(adev->mode_info.bios_hardcoded_edid));
}
static void amdgpu_connector_get_edid(struct drm_connector *connector)
@@ -315,8 +290,10 @@ static void amdgpu_connector_get_edid(struct drm_connector *connector)
if (!amdgpu_connector->edid) {
/* some laptops provide a hardcoded edid in rom for LCDs */
if (((connector->connector_type == DRM_MODE_CONNECTOR_LVDS) ||
- (connector->connector_type == DRM_MODE_CONNECTOR_eDP)))
+ (connector->connector_type == DRM_MODE_CONNECTOR_eDP))) {
amdgpu_connector->edid = amdgpu_connector_get_hardcoded_edid(adev);
+ drm_connector_update_edid_property(connector, amdgpu_connector->edid);
+ }
}
}
@@ -387,6 +364,9 @@ amdgpu_connector_lcd_native_mode(struct drm_encoder *encoder)
native_mode->vdisplay != 0 &&
native_mode->clock != 0) {
mode = drm_mode_duplicate(dev, native_mode);
+ if (!mode)
+ return NULL;
+
mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER;
drm_mode_set_name(mode);
@@ -401,6 +381,9 @@ amdgpu_connector_lcd_native_mode(struct drm_encoder *encoder)
* simpler.
*/
mode = drm_cvt_mode(dev, native_mode->hdisplay, native_mode->vdisplay, 60, true, false, false);
+ if (!mode)
+ return NULL;
+
mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER;
DRM_DEBUG_KMS("Adding cvt approximation of native panel mode %s\n", mode->name);
}
@@ -415,30 +398,28 @@ static void amdgpu_connector_add_common_modes(struct drm_encoder *encoder,
struct drm_display_mode *mode = NULL;
struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode;
int i;
- static const struct mode_size {
+ int n;
+ struct mode_size {
+ char name[DRM_DISPLAY_MODE_LEN];
int w;
int h;
- } common_modes[17] = {
- { 640, 480},
- { 720, 480},
- { 800, 600},
- { 848, 480},
- {1024, 768},
- {1152, 768},
- {1280, 720},
- {1280, 800},
- {1280, 854},
- {1280, 960},
- {1280, 1024},
- {1440, 900},
- {1400, 1050},
- {1680, 1050},
- {1600, 1200},
- {1920, 1080},
- {1920, 1200}
+ } common_modes[] = {
+ { "640x480", 640, 480},
+ { "800x600", 800, 600},
+ { "1024x768", 1024, 768},
+ { "1280x720", 1280, 720},
+ { "1280x800", 1280, 800},
+ {"1280x1024", 1280, 1024},
+ { "1440x900", 1440, 900},
+ {"1680x1050", 1680, 1050},
+ {"1600x1200", 1600, 1200},
+ {"1920x1080", 1920, 1080},
+ {"1920x1200", 1920, 1200}
};
- for (i = 0; i < 17; i++) {
+ n = ARRAY_SIZE(common_modes);
+
+ for (i = 0; i < n; i++) {
if (amdgpu_encoder->devices & (ATOM_DEVICE_TV_SUPPORT)) {
if (common_modes[i].w > 1024 ||
common_modes[i].h > 768)
@@ -451,10 +432,12 @@ static void amdgpu_connector_add_common_modes(struct drm_encoder *encoder,
common_modes[i].h == native_mode->vdisplay))
continue;
}
- if (common_modes[i].w < 320 || common_modes[i].h < 200)
- continue;
mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false);
+ if (!mode)
+ return;
+ strscpy(mode->name, common_modes[i].name, DRM_DISPLAY_MODE_LEN);
+
drm_mode_probed_add(connector, mode);
}
}
@@ -579,16 +562,26 @@ static int amdgpu_connector_set_property(struct drm_connector *connector,
amdgpu_encoder = to_amdgpu_encoder(connector->encoder);
} else {
const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private;
+
amdgpu_encoder = to_amdgpu_encoder(connector_funcs->best_encoder(connector));
}
switch (val) {
default:
- case DRM_MODE_SCALE_NONE: rmx_type = RMX_OFF; break;
- case DRM_MODE_SCALE_CENTER: rmx_type = RMX_CENTER; break;
- case DRM_MODE_SCALE_ASPECT: rmx_type = RMX_ASPECT; break;
- case DRM_MODE_SCALE_FULLSCREEN: rmx_type = RMX_FULL; break;
+ case DRM_MODE_SCALE_NONE:
+ rmx_type = RMX_OFF;
+ break;
+ case DRM_MODE_SCALE_CENTER:
+ rmx_type = RMX_CENTER;
+ break;
+ case DRM_MODE_SCALE_ASPECT:
+ rmx_type = RMX_ASPECT;
+ break;
+ case DRM_MODE_SCALE_FULLSCREEN:
+ rmx_type = RMX_FULL;
+ break;
}
+
if (amdgpu_encoder->rmx_type == rmx_type)
return 0;
@@ -617,7 +610,7 @@ amdgpu_connector_fixup_lcd_native_mode(struct drm_encoder *encoder,
if (mode->type & DRM_MODE_TYPE_PREFERRED) {
if (mode->hdisplay != native_mode->hdisplay ||
mode->vdisplay != native_mode->vdisplay)
- memcpy(native_mode, mode, sizeof(*mode));
+ drm_mode_copy(native_mode, mode);
}
}
@@ -626,7 +619,7 @@ amdgpu_connector_fixup_lcd_native_mode(struct drm_encoder *encoder,
list_for_each_entry_safe(mode, t, &connector->probed_modes, head) {
if (mode->hdisplay == native_mode->hdisplay &&
mode->vdisplay == native_mode->vdisplay) {
- *native_mode = *mode;
+ drm_mode_copy(native_mode, mode);
drm_mode_set_crtcinfo(native_mode, CRTC_INTERLACE_HALVE_V);
DRM_DEBUG_KMS("Determined LVDS native mode details from EDID\n");
break;
@@ -678,7 +671,7 @@ static int amdgpu_connector_lvds_get_modes(struct drm_connector *connector)
}
static enum drm_mode_status amdgpu_connector_lvds_mode_valid(struct drm_connector *connector,
- struct drm_display_mode *mode)
+ const struct drm_display_mode *mode)
{
struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
@@ -741,10 +734,8 @@ amdgpu_connector_lvds_detect(struct drm_connector *connector, bool force)
amdgpu_connector_update_scratch_regs(connector, ret);
- if (!drm_kms_helper_is_poll_worker()) {
- pm_runtime_mark_last_busy(connector->dev->dev);
+ if (!drm_kms_helper_is_poll_worker())
pm_runtime_put_autosuspend(connector->dev->dev);
- }
return ret;
}
@@ -786,16 +777,26 @@ static int amdgpu_connector_set_lcd_property(struct drm_connector *connector,
amdgpu_encoder = to_amdgpu_encoder(connector->encoder);
else {
const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private;
+
amdgpu_encoder = to_amdgpu_encoder(connector_funcs->best_encoder(connector));
}
switch (value) {
- case DRM_MODE_SCALE_NONE: rmx_type = RMX_OFF; break;
- case DRM_MODE_SCALE_CENTER: rmx_type = RMX_CENTER; break;
- case DRM_MODE_SCALE_ASPECT: rmx_type = RMX_ASPECT; break;
+ case DRM_MODE_SCALE_NONE:
+ rmx_type = RMX_OFF;
+ break;
+ case DRM_MODE_SCALE_CENTER:
+ rmx_type = RMX_CENTER;
+ break;
+ case DRM_MODE_SCALE_ASPECT:
+ rmx_type = RMX_ASPECT;
+ break;
default:
- case DRM_MODE_SCALE_FULLSCREEN: rmx_type = RMX_FULL; break;
+ case DRM_MODE_SCALE_FULLSCREEN:
+ rmx_type = RMX_FULL;
+ break;
}
+
if (amdgpu_encoder->rmx_type == rmx_type)
return 0;
@@ -833,7 +834,7 @@ static int amdgpu_connector_vga_get_modes(struct drm_connector *connector)
}
static enum drm_mode_status amdgpu_connector_vga_mode_valid(struct drm_connector *connector,
- struct drm_display_mode *mode)
+ const struct drm_display_mode *mode)
{
struct drm_device *dev = connector->dev;
struct amdgpu_device *adev = drm_to_adev(dev);
@@ -916,10 +917,8 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force)
amdgpu_connector_update_scratch_regs(connector, ret);
out:
- if (!drm_kms_helper_is_poll_worker()) {
- pm_runtime_mark_last_busy(connector->dev->dev);
+ if (!drm_kms_helper_is_poll_worker())
pm_runtime_put_autosuspend(connector->dev->dev);
- }
return ret;
}
@@ -959,6 +958,41 @@ amdgpu_connector_check_hpd_status_unchanged(struct drm_connector *connector)
return false;
}
+static void amdgpu_connector_shared_ddc(enum drm_connector_status *status,
+ struct drm_connector *connector,
+ struct amdgpu_connector *amdgpu_connector)
+{
+ struct drm_connector *list_connector;
+ struct drm_connector_list_iter iter;
+ struct amdgpu_connector *list_amdgpu_connector;
+ struct drm_device *dev = connector->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ if (amdgpu_connector->shared_ddc && *status == connector_status_connected) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(list_connector,
+ &iter) {
+ if (connector == list_connector)
+ continue;
+ list_amdgpu_connector = to_amdgpu_connector(list_connector);
+ if (list_amdgpu_connector->shared_ddc &&
+ list_amdgpu_connector->ddc_bus->rec.i2c_id ==
+ amdgpu_connector->ddc_bus->rec.i2c_id) {
+ /* cases where both connectors are digital */
+ if (list_connector->connector_type != DRM_MODE_CONNECTOR_VGA) {
+ /* hpd is our only option in this case */
+ if (!amdgpu_display_hpd_sense(adev,
+ amdgpu_connector->hpd.hpd)) {
+ amdgpu_connector_free_edid(connector);
+ *status = connector_status_disconnected;
+ }
+ }
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+ }
+}
+
/*
* DVI is complicated
* Do a DDC probe, if DDC probe passes, get the full EDID so
@@ -989,13 +1023,33 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
}
}
+ if (amdgpu_connector->detected_hpd_without_ddc) {
+ force = true;
+ amdgpu_connector->detected_hpd_without_ddc = false;
+ }
+
if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) {
ret = connector->status;
goto exit;
}
- if (amdgpu_connector->ddc_bus)
+ if (amdgpu_connector->ddc_bus) {
dret = amdgpu_display_ddc_probe(amdgpu_connector, false);
+
+ /* Sometimes the pins required for the DDC probe on DVI
+ * connectors don't make contact at the same time that the ones
+ * for HPD do. If the DDC probe fails even though we had an HPD
+ * signal, try again later
+ */
+ if (!dret && !force &&
+ amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) {
+ DRM_DEBUG_KMS("hpd detected without ddc, retrying in 1 second\n");
+ amdgpu_connector->detected_hpd_without_ddc = true;
+ schedule_delayed_work(&adev->hotplug_work,
+ msecs_to_jiffies(1000));
+ goto exit;
+ }
+ }
if (dret) {
amdgpu_connector->detected_by_load = false;
amdgpu_connector_free_edid(connector);
@@ -1025,32 +1079,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
* DDC line. The latter is more complex because with DVI<->HDMI adapters
* you don't really know what's connected to which port as both are digital.
*/
- if (amdgpu_connector->shared_ddc && (ret == connector_status_connected)) {
- struct drm_connector *list_connector;
- struct drm_connector_list_iter iter;
- struct amdgpu_connector *list_amdgpu_connector;
-
- drm_connector_list_iter_begin(dev, &iter);
- drm_for_each_connector_iter(list_connector,
- &iter) {
- if (connector == list_connector)
- continue;
- list_amdgpu_connector = to_amdgpu_connector(list_connector);
- if (list_amdgpu_connector->shared_ddc &&
- (list_amdgpu_connector->ddc_bus->rec.i2c_id ==
- amdgpu_connector->ddc_bus->rec.i2c_id)) {
- /* cases where both connectors are digital */
- if (list_connector->connector_type != DRM_MODE_CONNECTOR_VGA) {
- /* hpd is our only option in this case */
- if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) {
- amdgpu_connector_free_edid(connector);
- ret = connector_status_disconnected;
- }
- }
- }
- }
- drm_connector_list_iter_end(&iter);
- }
+ amdgpu_connector_shared_ddc(&ret, connector, amdgpu_connector);
}
}
@@ -1098,7 +1127,8 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
/* assume digital unless load detected otherwise */
amdgpu_connector->use_digital = true;
lret = encoder_funcs->detect(encoder, connector);
- DRM_DEBUG_KMS("load_detect %x returned: %x\n",encoder->encoder_type,lret);
+ DRM_DEBUG_KMS("load_detect %x returned: %x\n",
+ encoder->encoder_type, lret);
if (lret == connector_status_connected)
amdgpu_connector->use_digital = false;
}
@@ -1112,10 +1142,8 @@ out:
amdgpu_connector_update_scratch_regs(connector, ret);
exit:
- if (!drm_kms_helper_is_poll_worker()) {
- pm_runtime_mark_last_busy(connector->dev->dev);
+ if (!drm_kms_helper_is_poll_worker())
pm_runtime_put_autosuspend(connector->dev->dev);
- }
return ret;
}
@@ -1151,35 +1179,76 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector)
static void amdgpu_connector_dvi_force(struct drm_connector *connector)
{
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
if (connector->force == DRM_FORCE_ON)
amdgpu_connector->use_digital = false;
if (connector->force == DRM_FORCE_ON_DIGITAL)
amdgpu_connector->use_digital = true;
}
+/**
+ * amdgpu_max_hdmi_pixel_clock - Return max supported HDMI (TMDS) pixel clock
+ * @adev: pointer to amdgpu_device
+ *
+ * Return: maximum supported HDMI (TMDS) pixel clock in KHz.
+ */
+static int amdgpu_max_hdmi_pixel_clock(const struct amdgpu_device *adev)
+{
+ if (adev->asic_type >= CHIP_POLARIS10)
+ return 600000;
+ else if (adev->asic_type >= CHIP_TONGA)
+ return 300000;
+ else
+ return 297000;
+}
+
+/**
+ * amdgpu_connector_dvi_mode_valid - Validate a mode on DVI/HDMI connectors
+ * @connector: DRM connector to validate the mode on
+ * @mode: display mode to validate
+ *
+ * Validate the given display mode on DVI and HDMI connectors, including
+ * analog signals on DVI-I.
+ *
+ * Return: drm_mode_status indicating whether the mode is valid.
+ */
static enum drm_mode_status amdgpu_connector_dvi_mode_valid(struct drm_connector *connector,
- struct drm_display_mode *mode)
+ const struct drm_display_mode *mode)
{
struct drm_device *dev = connector->dev;
struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ const int max_hdmi_pixel_clock = amdgpu_max_hdmi_pixel_clock(adev);
+ const int max_dvi_single_link_pixel_clock = 165000;
+ int max_digital_pixel_clock_khz;
/* XXX check mode bandwidth */
- if (amdgpu_connector->use_digital && (mode->clock > 165000)) {
- if ((amdgpu_connector->connector_object_id == CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_I) ||
- (amdgpu_connector->connector_object_id == CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D) ||
- (amdgpu_connector->connector_object_id == CONNECTOR_OBJECT_ID_HDMI_TYPE_B)) {
- return MODE_OK;
- } else if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) {
- /* HDMI 1.3+ supports max clock of 340 Mhz */
- if (mode->clock > 340000)
- return MODE_CLOCK_HIGH;
- else
- return MODE_OK;
- } else {
- return MODE_CLOCK_HIGH;
+ if (amdgpu_connector->use_digital) {
+ switch (amdgpu_connector->connector_object_id) {
+ case CONNECTOR_OBJECT_ID_HDMI_TYPE_A:
+ max_digital_pixel_clock_khz = max_hdmi_pixel_clock;
+ break;
+ case CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_I:
+ case CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D:
+ max_digital_pixel_clock_khz = max_dvi_single_link_pixel_clock;
+ break;
+ case CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_I:
+ case CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D:
+ case CONNECTOR_OBJECT_ID_HDMI_TYPE_B:
+ max_digital_pixel_clock_khz = max_dvi_single_link_pixel_clock * 2;
+ break;
}
+
+ /* When the display EDID claims that it's an HDMI display,
+ * we use the HDMI encoder mode of the display HW,
+ * so we should verify against the max HDMI clock here.
+ */
+ if (connector->display_info.is_hdmi)
+ max_digital_pixel_clock_khz = max_hdmi_pixel_clock;
+
+ if (mode->clock > max_digital_pixel_clock_khz)
+ return MODE_CLOCK_HIGH;
}
/* check against the max pixel clock */
@@ -1385,6 +1454,7 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
ret = connector_status_connected;
else if (amdgpu_connector->dac_load_detect) { /* try load detection */
const struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
+
ret = encoder_funcs->detect(encoder, connector);
}
}
@@ -1410,10 +1480,8 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
amdgpu_connector_update_scratch_regs(connector, ret);
out:
- if (!drm_kms_helper_is_poll_worker()) {
- pm_runtime_mark_last_busy(connector->dev->dev);
+ if (!drm_kms_helper_is_poll_worker())
pm_runtime_put_autosuspend(connector->dev->dev);
- }
if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
connector->connector_type == DRM_MODE_CONNECTOR_eDP)
@@ -1425,7 +1493,7 @@ out:
}
static enum drm_mode_status amdgpu_connector_dp_mode_valid(struct drm_connector *connector,
- struct drm_display_mode *mode)
+ const struct drm_display_mode *mode)
{
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
struct amdgpu_connector_atom_dig *amdgpu_dig_connector = amdgpu_connector->con_priv;
@@ -1463,7 +1531,7 @@ static enum drm_mode_status amdgpu_connector_dp_mode_valid(struct drm_connector
(amdgpu_dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP)) {
return amdgpu_atombios_dp_mode_valid_helper(connector, mode);
} else {
- if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) {
+ if (connector->display_info.is_hdmi) {
/* HDMI 1.3+ supports max clock of 340 Mhz */
if (mode->clock > 340000)
return MODE_CLOCK_HIGH;
@@ -1665,10 +1733,12 @@ amdgpu_connector_add(struct amdgpu_device *adev,
adev->mode_info.dither_property,
AMDGPU_FMT_DITHER_DISABLE);
- if (amdgpu_audio != 0)
+ if (amdgpu_audio != 0) {
drm_object_attach_property(&amdgpu_connector->base.base,
adev->mode_info.audio_property,
AMDGPU_AUDIO_AUTO);
+ amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
+ }
subpixel_order = SubPixelHorizontalRGB;
connector->interlace_allowed = true;
@@ -1790,6 +1860,7 @@ amdgpu_connector_add(struct amdgpu_device *adev,
drm_object_attach_property(&amdgpu_connector->base.base,
adev->mode_info.audio_property,
AMDGPU_AUDIO_AUTO);
+ amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
}
drm_object_attach_property(&amdgpu_connector->base.base,
adev->mode_info.dither_property,
@@ -1843,6 +1914,7 @@ amdgpu_connector_add(struct amdgpu_device *adev,
drm_object_attach_property(&amdgpu_connector->base.base,
adev->mode_info.audio_property,
AMDGPU_AUDIO_AUTO);
+ amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
}
drm_object_attach_property(&amdgpu_connector->base.base,
adev->mode_info.dither_property,
@@ -1893,6 +1965,7 @@ amdgpu_connector_add(struct amdgpu_device *adev,
drm_object_attach_property(&amdgpu_connector->base.base,
adev->mode_info.audio_property,
AMDGPU_AUDIO_AUTO);
+ amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
}
drm_object_attach_property(&amdgpu_connector->base.base,
adev->mode_info.dither_property,
@@ -1957,7 +2030,7 @@ amdgpu_connector_add(struct amdgpu_device *adev,
if (amdgpu_connector->hpd.hpd == AMDGPU_HPD_NONE) {
if (i2c_bus->valid) {
connector->polled = DRM_CONNECTOR_POLL_CONNECT |
- DRM_CONNECTOR_POLL_DISCONNECT;
+ DRM_CONNECTOR_POLL_DISCONNECT;
}
} else
connector->polled = DRM_CONNECTOR_POLL_HPD;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.h
index 61fcef15ad72..eff833b6ed31 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.h
@@ -24,7 +24,6 @@
#ifndef __AMDGPU_CONNECTORS_H__
#define __AMDGPU_CONNECTORS_H__
-struct edid *amdgpu_connector_edid(struct drm_connector *connector);
void amdgpu_connector_hotplug(struct drm_connector *connector);
int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector);
u16 amdgpu_connector_encoder_get_dp_bridge_encoder_id(struct drm_connector *connector);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
new file mode 100644
index 000000000000..425a3e564360
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
@@ -0,0 +1,591 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/list.h>
+#include "amdgpu.h"
+
+static const guid_t MCE = CPER_NOTIFY_MCE;
+static const guid_t CMC = CPER_NOTIFY_CMC;
+static const guid_t BOOT = BOOT_TYPE;
+
+static const guid_t CRASHDUMP = AMD_CRASHDUMP;
+static const guid_t RUNTIME = AMD_GPU_NONSTANDARD_ERROR;
+
+static void __inc_entry_length(struct cper_hdr *hdr, uint32_t size)
+{
+ hdr->record_length += size;
+}
+
+static void amdgpu_cper_get_timestamp(struct cper_timestamp *timestamp)
+{
+ struct tm tm;
+ time64_t now = ktime_get_real_seconds();
+
+ time64_to_tm(now, 0, &tm);
+ timestamp->seconds = tm.tm_sec;
+ timestamp->minutes = tm.tm_min;
+ timestamp->hours = tm.tm_hour;
+ timestamp->flag = 0;
+ timestamp->day = tm.tm_mday;
+ timestamp->month = 1 + tm.tm_mon;
+ timestamp->year = (1900 + tm.tm_year) % 100;
+ timestamp->century = (1900 + tm.tm_year) / 100;
+}
+
+void amdgpu_cper_entry_fill_hdr(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ enum amdgpu_cper_type type,
+ enum cper_error_severity sev)
+{
+ char record_id[16];
+
+ hdr->signature[0] = 'C';
+ hdr->signature[1] = 'P';
+ hdr->signature[2] = 'E';
+ hdr->signature[3] = 'R';
+ hdr->revision = CPER_HDR_REV_1;
+ hdr->signature_end = 0xFFFFFFFF;
+ hdr->error_severity = sev;
+
+ hdr->valid_bits.platform_id = 1;
+ hdr->valid_bits.timestamp = 1;
+
+ amdgpu_cper_get_timestamp(&hdr->timestamp);
+
+ snprintf(record_id, 9, "%d:%X",
+ (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) ?
+ adev->smuio.funcs->get_socket_id(adev) :
+ 0,
+ atomic_inc_return(&adev->cper.unique_id));
+ memcpy(hdr->record_id, record_id, 8);
+
+ snprintf(hdr->platform_id, 16, "0x%04X:0x%04X",
+ adev->pdev->vendor, adev->pdev->device);
+ /* pmfw version should be part of creator_id according to CPER spec */
+ snprintf(hdr->creator_id, 16, "%s", CPER_CREATOR_ID_AMDGPU);
+
+ switch (type) {
+ case AMDGPU_CPER_TYPE_BOOT:
+ hdr->notify_type = BOOT;
+ break;
+ case AMDGPU_CPER_TYPE_FATAL:
+ case AMDGPU_CPER_TYPE_BP_THRESHOLD:
+ hdr->notify_type = MCE;
+ break;
+ case AMDGPU_CPER_TYPE_RUNTIME:
+ if (sev == CPER_SEV_NON_FATAL_CORRECTED)
+ hdr->notify_type = CMC;
+ else
+ hdr->notify_type = MCE;
+ break;
+ default:
+ dev_err(adev->dev, "Unknown CPER Type\n");
+ break;
+ }
+
+ __inc_entry_length(hdr, HDR_LEN);
+}
+
+static int amdgpu_cper_entry_fill_section_desc(struct amdgpu_device *adev,
+ struct cper_sec_desc *section_desc,
+ bool bp_threshold,
+ bool poison,
+ enum cper_error_severity sev,
+ guid_t sec_type,
+ uint32_t section_length,
+ uint32_t section_offset)
+{
+ section_desc->revision_minor = CPER_SEC_MINOR_REV_1;
+ section_desc->revision_major = CPER_SEC_MAJOR_REV_22;
+ section_desc->sec_offset = section_offset;
+ section_desc->sec_length = section_length;
+ section_desc->valid_bits.fru_text = 1;
+ section_desc->flag_bits.primary = 1;
+ section_desc->severity = sev;
+ section_desc->sec_type = sec_type;
+
+ snprintf(section_desc->fru_text, 20, "OAM%d",
+ (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) ?
+ adev->smuio.funcs->get_socket_id(adev) :
+ 0);
+
+ if (bp_threshold)
+ section_desc->flag_bits.exceed_err_threshold = 1;
+ if (poison)
+ section_desc->flag_bits.latent_err = 1;
+
+ return 0;
+}
+
+int amdgpu_cper_entry_fill_fatal_section(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ uint32_t idx,
+ struct cper_sec_crashdump_reg_data reg_data)
+{
+ struct cper_sec_desc *section_desc;
+ struct cper_sec_crashdump_fatal *section;
+
+ section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx));
+ section = (struct cper_sec_crashdump_fatal *)((uint8_t *)hdr +
+ FATAL_SEC_OFFSET(hdr->sec_cnt, idx));
+
+ amdgpu_cper_entry_fill_section_desc(adev, section_desc, false, false,
+ CPER_SEV_FATAL, CRASHDUMP, FATAL_SEC_LEN,
+ FATAL_SEC_OFFSET(hdr->sec_cnt, idx));
+
+ section->body.reg_ctx_type = CPER_CTX_TYPE_CRASH;
+ section->body.reg_arr_size = sizeof(reg_data);
+ section->body.data = reg_data;
+
+ __inc_entry_length(hdr, SEC_DESC_LEN + FATAL_SEC_LEN);
+
+ return 0;
+}
+
+int amdgpu_cper_entry_fill_runtime_section(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ uint32_t idx,
+ enum cper_error_severity sev,
+ uint32_t *reg_dump,
+ uint32_t reg_count)
+{
+ struct cper_sec_desc *section_desc;
+ struct cper_sec_nonstd_err *section;
+ bool poison;
+
+ poison = sev != CPER_SEV_NON_FATAL_CORRECTED;
+ section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx));
+ section = (struct cper_sec_nonstd_err *)((uint8_t *)hdr +
+ NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
+
+ amdgpu_cper_entry_fill_section_desc(adev, section_desc, false, poison,
+ sev, RUNTIME, NONSTD_SEC_LEN,
+ NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
+
+ reg_count = umin(reg_count, CPER_ACA_REG_COUNT);
+
+ section->hdr.valid_bits.err_info_cnt = 1;
+ section->hdr.valid_bits.err_context_cnt = 1;
+
+ section->info.error_type = RUNTIME;
+ section->info.ms_chk_bits.err_type_valid = 1;
+ section->ctx.reg_ctx_type = CPER_CTX_TYPE_CRASH;
+ section->ctx.reg_arr_size = sizeof(section->ctx.reg_dump);
+
+ memcpy(section->ctx.reg_dump, reg_dump, reg_count * sizeof(uint32_t));
+
+ __inc_entry_length(hdr, SEC_DESC_LEN + NONSTD_SEC_LEN);
+
+ return 0;
+}
+
+int amdgpu_cper_entry_fill_bad_page_threshold_section(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ uint32_t idx)
+{
+ struct cper_sec_desc *section_desc;
+ struct cper_sec_nonstd_err *section;
+ uint32_t socket_id;
+
+ section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx));
+ section = (struct cper_sec_nonstd_err *)((uint8_t *)hdr +
+ NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
+
+ amdgpu_cper_entry_fill_section_desc(adev, section_desc, true, false,
+ CPER_SEV_FATAL, RUNTIME, NONSTD_SEC_LEN,
+ NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
+
+ section->hdr.valid_bits.err_info_cnt = 1;
+ section->hdr.valid_bits.err_context_cnt = 1;
+
+ section->info.error_type = RUNTIME;
+ section->info.valid_bits.ms_chk = 1;
+ section->info.ms_chk_bits.err_type_valid = 1;
+ section->info.ms_chk_bits.err_type = 1;
+ section->info.ms_chk_bits.pcc = 1;
+ section->ctx.reg_ctx_type = CPER_CTX_TYPE_CRASH;
+ section->ctx.reg_arr_size = sizeof(section->ctx.reg_dump);
+
+ /* Hardcoded Reg dump for bad page threshold CPER */
+ socket_id = (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) ?
+ adev->smuio.funcs->get_socket_id(adev) :
+ 0;
+ section->ctx.reg_dump[CPER_ACA_REG_CTL_LO] = 0x1;
+ section->ctx.reg_dump[CPER_ACA_REG_CTL_HI] = 0x0;
+ section->ctx.reg_dump[CPER_ACA_REG_STATUS_LO] = 0x137;
+ section->ctx.reg_dump[CPER_ACA_REG_STATUS_HI] = 0xB0000000;
+ section->ctx.reg_dump[CPER_ACA_REG_ADDR_LO] = 0x0;
+ section->ctx.reg_dump[CPER_ACA_REG_ADDR_HI] = 0x0;
+ section->ctx.reg_dump[CPER_ACA_REG_MISC0_LO] = 0x0;
+ section->ctx.reg_dump[CPER_ACA_REG_MISC0_HI] = 0x0;
+ section->ctx.reg_dump[CPER_ACA_REG_CONFIG_LO] = 0x2;
+ section->ctx.reg_dump[CPER_ACA_REG_CONFIG_HI] = 0x1ff;
+ section->ctx.reg_dump[CPER_ACA_REG_IPID_LO] = (socket_id / 4) & 0x01;
+ section->ctx.reg_dump[CPER_ACA_REG_IPID_HI] = 0x096 | (((socket_id % 4) & 0x3) << 12);
+ section->ctx.reg_dump[CPER_ACA_REG_SYND_LO] = 0x0;
+ section->ctx.reg_dump[CPER_ACA_REG_SYND_HI] = 0x0;
+
+ __inc_entry_length(hdr, SEC_DESC_LEN + NONSTD_SEC_LEN);
+
+ return 0;
+}
+
+struct cper_hdr *amdgpu_cper_alloc_entry(struct amdgpu_device *adev,
+ enum amdgpu_cper_type type,
+ uint16_t section_count)
+{
+ struct cper_hdr *hdr;
+ uint32_t size = 0;
+
+ size += HDR_LEN;
+ size += (SEC_DESC_LEN * section_count);
+
+ switch (type) {
+ case AMDGPU_CPER_TYPE_RUNTIME:
+ case AMDGPU_CPER_TYPE_BP_THRESHOLD:
+ size += (NONSTD_SEC_LEN * section_count);
+ break;
+ case AMDGPU_CPER_TYPE_FATAL:
+ size += (FATAL_SEC_LEN * section_count);
+ break;
+ case AMDGPU_CPER_TYPE_BOOT:
+ size += (BOOT_SEC_LEN * section_count);
+ break;
+ default:
+ dev_err(adev->dev, "Unknown CPER Type!\n");
+ return NULL;
+ }
+
+ hdr = kzalloc(size, GFP_KERNEL);
+ if (!hdr)
+ return NULL;
+
+ /* Save this early */
+ hdr->sec_cnt = section_count;
+
+ return hdr;
+}
+
+int amdgpu_cper_generate_ue_record(struct amdgpu_device *adev,
+ struct aca_bank *bank)
+{
+ struct cper_hdr *fatal = NULL;
+ struct cper_sec_crashdump_reg_data reg_data = { 0 };
+ struct amdgpu_ring *ring = &adev->cper.ring_buf;
+ int ret;
+
+ fatal = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_FATAL, 1);
+ if (!fatal) {
+ dev_err(adev->dev, "fail to alloc cper entry for ue record\n");
+ return -ENOMEM;
+ }
+
+ reg_data.status_lo = lower_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+ reg_data.status_hi = upper_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+ reg_data.addr_lo = lower_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+ reg_data.addr_hi = upper_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+ reg_data.ipid_lo = lower_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+ reg_data.ipid_hi = upper_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+ reg_data.synd_lo = lower_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+ reg_data.synd_hi = upper_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+
+ amdgpu_cper_entry_fill_hdr(adev, fatal, AMDGPU_CPER_TYPE_FATAL, CPER_SEV_FATAL);
+ ret = amdgpu_cper_entry_fill_fatal_section(adev, fatal, 0, reg_data);
+ if (ret)
+ return ret;
+
+ amdgpu_cper_ring_write(ring, fatal, fatal->record_length);
+ kfree(fatal);
+
+ return 0;
+}
+
+int amdgpu_cper_generate_bp_threshold_record(struct amdgpu_device *adev)
+{
+ struct cper_hdr *bp_threshold = NULL;
+ struct amdgpu_ring *ring = &adev->cper.ring_buf;
+ int ret;
+
+ bp_threshold = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_BP_THRESHOLD, 1);
+ if (!bp_threshold) {
+ dev_err(adev->dev, "fail to alloc cper entry for bad page threshold record\n");
+ return -ENOMEM;
+ }
+
+ amdgpu_cper_entry_fill_hdr(adev, bp_threshold,
+ AMDGPU_CPER_TYPE_BP_THRESHOLD,
+ CPER_SEV_FATAL);
+ ret = amdgpu_cper_entry_fill_bad_page_threshold_section(adev, bp_threshold, 0);
+ if (ret)
+ return ret;
+
+ amdgpu_cper_ring_write(ring, bp_threshold, bp_threshold->record_length);
+ kfree(bp_threshold);
+
+ return 0;
+}
+
+static enum cper_error_severity amdgpu_aca_err_type_to_cper_sev(struct amdgpu_device *adev,
+ enum aca_error_type aca_err_type)
+{
+ switch (aca_err_type) {
+ case ACA_ERROR_TYPE_UE:
+ return CPER_SEV_FATAL;
+ case ACA_ERROR_TYPE_CE:
+ return CPER_SEV_NON_FATAL_CORRECTED;
+ case ACA_ERROR_TYPE_DEFERRED:
+ return CPER_SEV_NON_FATAL_UNCORRECTED;
+ default:
+ dev_err(adev->dev, "Unknown ACA error type!\n");
+ return CPER_SEV_FATAL;
+ }
+}
+
+int amdgpu_cper_generate_ce_records(struct amdgpu_device *adev,
+ struct aca_banks *banks,
+ uint16_t bank_count)
+{
+ struct cper_hdr *corrected = NULL;
+ enum cper_error_severity sev = CPER_SEV_NON_FATAL_CORRECTED;
+ struct amdgpu_ring *ring = &adev->cper.ring_buf;
+ uint32_t reg_data[CPER_ACA_REG_COUNT] = { 0 };
+ struct aca_bank_node *node;
+ struct aca_bank *bank;
+ uint32_t i = 0;
+ int ret;
+
+ corrected = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_RUNTIME, bank_count);
+ if (!corrected) {
+ dev_err(adev->dev, "fail to allocate cper entry for ce records\n");
+ return -ENOMEM;
+ }
+
+ /* Raise severity if any DE is detected in the ACA bank list */
+ list_for_each_entry(node, &banks->list, node) {
+ bank = &node->bank;
+ if (bank->aca_err_type == ACA_ERROR_TYPE_DEFERRED) {
+ sev = CPER_SEV_NON_FATAL_UNCORRECTED;
+ break;
+ }
+ }
+
+ amdgpu_cper_entry_fill_hdr(adev, corrected, AMDGPU_CPER_TYPE_RUNTIME, sev);
+
+ /* Combine CE and DE in cper record */
+ list_for_each_entry(node, &banks->list, node) {
+ bank = &node->bank;
+ reg_data[CPER_ACA_REG_CTL_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_CTL]);
+ reg_data[CPER_ACA_REG_CTL_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_CTL]);
+ reg_data[CPER_ACA_REG_STATUS_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+ reg_data[CPER_ACA_REG_STATUS_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+ reg_data[CPER_ACA_REG_ADDR_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+ reg_data[CPER_ACA_REG_ADDR_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+ reg_data[CPER_ACA_REG_MISC0_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_MISC0]);
+ reg_data[CPER_ACA_REG_MISC0_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_MISC0]);
+ reg_data[CPER_ACA_REG_CONFIG_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_CONFIG]);
+ reg_data[CPER_ACA_REG_CONFIG_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_CONFIG]);
+ reg_data[CPER_ACA_REG_IPID_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+ reg_data[CPER_ACA_REG_IPID_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+ reg_data[CPER_ACA_REG_SYND_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+ reg_data[CPER_ACA_REG_SYND_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+
+ ret = amdgpu_cper_entry_fill_runtime_section(adev, corrected, i++,
+ amdgpu_aca_err_type_to_cper_sev(adev, bank->aca_err_type),
+ reg_data, CPER_ACA_REG_COUNT);
+ if (ret)
+ return ret;
+ }
+
+ amdgpu_cper_ring_write(ring, corrected, corrected->record_length);
+ kfree(corrected);
+
+ return 0;
+}
+
+static bool amdgpu_cper_is_hdr(struct amdgpu_ring *ring, u64 pos)
+{
+ struct cper_hdr *chdr;
+
+ chdr = (struct cper_hdr *)&(ring->ring[pos]);
+ return strcmp(chdr->signature, "CPER") ? false : true;
+}
+
+static u32 amdgpu_cper_ring_get_ent_sz(struct amdgpu_ring *ring, u64 pos)
+{
+ struct cper_hdr *chdr;
+ u64 p;
+ u32 chunk, rec_len = 0;
+
+ chdr = (struct cper_hdr *)&(ring->ring[pos]);
+ chunk = ring->ring_size - (pos << 2);
+
+ if (!strcmp(chdr->signature, "CPER")) {
+ rec_len = chdr->record_length;
+ goto calc;
+ }
+
+ /* ring buffer is not full, no cper data after ring->wptr */
+ if (ring->count_dw)
+ goto calc;
+
+ for (p = pos + 1; p <= ring->buf_mask; p++) {
+ chdr = (struct cper_hdr *)&(ring->ring[p]);
+ if (!strcmp(chdr->signature, "CPER")) {
+ rec_len = (p - pos) << 2;
+ goto calc;
+ }
+ }
+
+calc:
+ if (!rec_len)
+ return chunk;
+ else
+ return umin(rec_len, chunk);
+}
+
+void amdgpu_cper_ring_write(struct amdgpu_ring *ring, void *src, int count)
+{
+ u64 pos, wptr_old, rptr;
+ int rec_cnt_dw = count >> 2;
+ u32 chunk, ent_sz;
+ u8 *s = (u8 *)src;
+
+ if (count >= ring->ring_size - 4) {
+ dev_err(ring->adev->dev,
+ "CPER data size(%d) is larger than ring size(%d)\n",
+ count, ring->ring_size - 4);
+
+ return;
+ }
+
+ mutex_lock(&ring->adev->cper.ring_lock);
+
+ wptr_old = ring->wptr;
+ rptr = *ring->rptr_cpu_addr & ring->ptr_mask;
+
+ while (count) {
+ ent_sz = amdgpu_cper_ring_get_ent_sz(ring, ring->wptr);
+ chunk = umin(ent_sz, count);
+
+ memcpy(&ring->ring[ring->wptr], s, chunk);
+
+ ring->wptr += (chunk >> 2);
+ ring->wptr &= ring->ptr_mask;
+ count -= chunk;
+ s += chunk;
+ }
+
+ if (ring->count_dw < rec_cnt_dw)
+ ring->count_dw = 0;
+
+ /* the buffer is overflow, adjust rptr */
+ if (((wptr_old < rptr) && (rptr <= ring->wptr)) ||
+ ((ring->wptr < wptr_old) && (wptr_old < rptr)) ||
+ ((rptr <= ring->wptr) && (ring->wptr < wptr_old))) {
+ pos = (ring->wptr + 1) & ring->ptr_mask;
+
+ do {
+ ent_sz = amdgpu_cper_ring_get_ent_sz(ring, pos);
+
+ rptr += (ent_sz >> 2);
+ rptr &= ring->ptr_mask;
+ *ring->rptr_cpu_addr = rptr;
+
+ pos = rptr;
+ } while (!amdgpu_cper_is_hdr(ring, rptr));
+ }
+
+ if (ring->count_dw >= rec_cnt_dw)
+ ring->count_dw -= rec_cnt_dw;
+ mutex_unlock(&ring->adev->cper.ring_lock);
+}
+
+static u64 amdgpu_cper_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ return *(ring->rptr_cpu_addr);
+}
+
+static u64 amdgpu_cper_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ return ring->wptr;
+}
+
+static const struct amdgpu_ring_funcs cper_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_CPER,
+ .align_mask = 0xff,
+ .support_64bit_ptrs = false,
+ .get_rptr = amdgpu_cper_ring_get_rptr,
+ .get_wptr = amdgpu_cper_ring_get_wptr,
+};
+
+static int amdgpu_cper_ring_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &(adev->cper.ring_buf);
+
+ mutex_init(&adev->cper.ring_lock);
+
+ ring->adev = NULL;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = false;
+ ring->no_scheduler = true;
+ ring->funcs = &cper_ring_funcs;
+
+ sprintf(ring->name, "cper");
+ return amdgpu_ring_init(adev, ring, CPER_MAX_RING_SIZE, NULL, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+}
+
+int amdgpu_cper_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (!amdgpu_aca_is_enabled(adev) && !amdgpu_sriov_ras_cper_en(adev))
+ return 0;
+
+ r = amdgpu_cper_ring_init(adev);
+ if (r) {
+ dev_err(adev->dev, "failed to initialize cper ring, r = %d\n", r);
+ return r;
+ }
+
+ mutex_init(&adev->cper.cper_lock);
+
+ adev->cper.enabled = true;
+ adev->cper.max_count = CPER_MAX_ALLOWED_COUNT;
+
+ return 0;
+}
+
+int amdgpu_cper_fini(struct amdgpu_device *adev)
+{
+ if (!amdgpu_aca_is_enabled(adev) && !amdgpu_sriov_ras_cper_en(adev))
+ return 0;
+
+ adev->cper.enabled = false;
+
+ amdgpu_ring_fini(&(adev->cper.ring_buf));
+ adev->cper.count = 0;
+ adev->cper.wptr = 0;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h
new file mode 100644
index 000000000000..353421807387
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_CPER_H__
+#define __AMDGPU_CPER_H__
+
+#include "amd_cper.h"
+#include "amdgpu_aca.h"
+
+#define CPER_MAX_ALLOWED_COUNT 0x1000
+#define CPER_MAX_RING_SIZE 0X100000
+#define HDR_LEN (sizeof(struct cper_hdr))
+#define SEC_DESC_LEN (sizeof(struct cper_sec_desc))
+
+#define BOOT_SEC_LEN (sizeof(struct cper_sec_crashdump_boot))
+#define FATAL_SEC_LEN (sizeof(struct cper_sec_crashdump_fatal))
+#define NONSTD_SEC_LEN (sizeof(struct cper_sec_nonstd_err))
+
+#define SEC_DESC_OFFSET(idx) (HDR_LEN + (SEC_DESC_LEN * idx))
+
+#define BOOT_SEC_OFFSET(count, idx) (HDR_LEN + (SEC_DESC_LEN * count) + (BOOT_SEC_LEN * idx))
+#define FATAL_SEC_OFFSET(count, idx) (HDR_LEN + (SEC_DESC_LEN * count) + (FATAL_SEC_LEN * idx))
+#define NONSTD_SEC_OFFSET(count, idx) (HDR_LEN + (SEC_DESC_LEN * count) + (NONSTD_SEC_LEN * idx))
+
+enum amdgpu_cper_type {
+ AMDGPU_CPER_TYPE_RUNTIME,
+ AMDGPU_CPER_TYPE_FATAL,
+ AMDGPU_CPER_TYPE_BOOT,
+ AMDGPU_CPER_TYPE_BP_THRESHOLD,
+};
+
+struct amdgpu_cper {
+ bool enabled;
+
+ atomic_t unique_id;
+ struct mutex cper_lock;
+
+ /* Lifetime CPERs generated */
+ uint32_t count;
+ uint32_t max_count;
+
+ uint32_t wptr;
+
+ void *ring[CPER_MAX_ALLOWED_COUNT];
+ struct amdgpu_ring ring_buf;
+ struct mutex ring_lock;
+};
+
+void amdgpu_cper_entry_fill_hdr(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ enum amdgpu_cper_type type,
+ enum cper_error_severity sev);
+int amdgpu_cper_entry_fill_fatal_section(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ uint32_t idx,
+ struct cper_sec_crashdump_reg_data reg_data);
+int amdgpu_cper_entry_fill_runtime_section(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ uint32_t idx,
+ enum cper_error_severity sev,
+ uint32_t *reg_dump,
+ uint32_t reg_count);
+int amdgpu_cper_entry_fill_bad_page_threshold_section(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ uint32_t section_idx);
+
+struct cper_hdr *amdgpu_cper_alloc_entry(struct amdgpu_device *adev,
+ enum amdgpu_cper_type type,
+ uint16_t section_count);
+/* UE must be encoded into separated cper entries, 1 UE 1 cper */
+int amdgpu_cper_generate_ue_record(struct amdgpu_device *adev,
+ struct aca_bank *bank);
+/* CEs and DEs are combined into 1 cper entry */
+int amdgpu_cper_generate_ce_records(struct amdgpu_device *adev,
+ struct aca_banks *banks,
+ uint16_t bank_count);
+/* Bad page threshold is encoded into separated cper entry */
+int amdgpu_cper_generate_bp_threshold_record(struct amdgpu_device *adev);
+void amdgpu_cper_ring_write(struct amdgpu_ring *ring,
+ void *src, int count);
+int amdgpu_cper_init(struct amdgpu_device *adev);
+int amdgpu_cper_fini(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 0311d799a010..ecdfe6cb36cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -32,58 +32,127 @@
#include <drm/amdgpu_drm.h>
#include <drm/drm_syncobj.h>
+#include <drm/ttm/ttm_tt.h>
+
+#include "amdgpu_cs.h"
#include "amdgpu.h"
#include "amdgpu_trace.h"
#include "amdgpu_gmc.h"
#include "amdgpu_gem.h"
#include "amdgpu_ras.h"
+#include "amdgpu_hmm.h"
+
+static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
+ struct amdgpu_device *adev,
+ struct drm_file *filp,
+ union drm_amdgpu_cs *cs)
+{
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+
+ if (cs->in.num_chunks == 0)
+ return -EINVAL;
+
+ memset(p, 0, sizeof(*p));
+ p->adev = adev;
+ p->filp = filp;
+
+ p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
+ if (!p->ctx)
+ return -EINVAL;
+
+ if (atomic_read(&p->ctx->guilty)) {
+ amdgpu_ctx_put(p->ctx);
+ return -ECANCELED;
+ }
+
+ amdgpu_sync_create(&p->sync);
+ drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+ DRM_EXEC_IGNORE_DUPLICATES, 0);
+ return 0;
+}
+
+static int amdgpu_cs_job_idx(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_cs_chunk_ib *chunk_ib)
+{
+ struct drm_sched_entity *entity;
+ unsigned int i;
+ int r;
+
+ r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type,
+ chunk_ib->ip_instance,
+ chunk_ib->ring, &entity);
+ if (r)
+ return r;
+
+ /*
+ * Abort if there is no run queue associated with this entity.
+ * Possibly because of disabled HW IP.
+ */
+ if (entity->rq == NULL)
+ return -EINVAL;
+
+ /* Check if we can add this IB to some existing job */
+ for (i = 0; i < p->gang_size; ++i)
+ if (p->entities[i] == entity)
+ return i;
+
+ /* If not increase the gang size if possible */
+ if (i == AMDGPU_CS_GANG_SIZE)
+ return -EINVAL;
-static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
- struct drm_amdgpu_cs_chunk_fence *data,
- uint32_t *offset)
+ p->entities[i] = entity;
+ p->gang_size = i + 1;
+ return i;
+}
+
+static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_cs_chunk_ib *chunk_ib,
+ unsigned int *num_ibs)
+{
+ int r;
+
+ r = amdgpu_cs_job_idx(p, chunk_ib);
+ if (r < 0)
+ return r;
+
+ if (num_ibs[r] >= amdgpu_ring_max_ibs(chunk_ib->ip_type))
+ return -EINVAL;
+
+ ++(num_ibs[r]);
+ p->gang_leader_idx = r;
+ return 0;
+}
+
+static int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_cs_chunk_fence *data,
+ uint32_t *offset)
{
struct drm_gem_object *gobj;
- struct amdgpu_bo *bo;
unsigned long size;
- int r;
gobj = drm_gem_object_lookup(p->filp, data->handle);
if (gobj == NULL)
return -EINVAL;
- bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
- p->uf_entry.priority = 0;
- p->uf_entry.tv.bo = &bo->tbo;
- /* One for TTM and one for the CS job */
- p->uf_entry.tv.num_shared = 2;
-
+ p->uf_bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
drm_gem_object_put(gobj);
- size = amdgpu_bo_size(bo);
- if (size != PAGE_SIZE || (data->offset + 8) > size) {
- r = -EINVAL;
- goto error_unref;
- }
+ size = amdgpu_bo_size(p->uf_bo);
+ if (size != PAGE_SIZE || data->offset > (size - 8))
+ return -EINVAL;
- if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
- r = -EINVAL;
- goto error_unref;
- }
+ if (amdgpu_ttm_tt_get_usermm(p->uf_bo->tbo.ttm))
+ return -EINVAL;
*offset = data->offset;
-
return 0;
-
-error_unref:
- amdgpu_bo_unref(&bo);
- return r;
}
-static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
- struct drm_amdgpu_bo_list_in *data)
+static int amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_bo_list_in *data)
{
+ struct drm_amdgpu_bo_list_entry *info;
int r;
- struct drm_amdgpu_bo_list_entry *info = NULL;
r = amdgpu_bo_create_list_entry_array(data, &info);
if (r)
@@ -103,45 +172,24 @@ error_free:
return r;
}
-static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs)
+/* Copy the data from userspace and go over it the first time */
+static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
+ union drm_amdgpu_cs *cs)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ unsigned int num_ibs[AMDGPU_CS_GANG_SIZE] = { };
struct amdgpu_vm *vm = &fpriv->vm;
- uint64_t *chunk_array_user;
uint64_t *chunk_array;
- unsigned size, num_ibs = 0;
uint32_t uf_offset = 0;
- int i;
+ size_t size;
int ret;
+ int i;
- if (cs->in.num_chunks == 0)
- return 0;
-
- chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
- if (!chunk_array)
- return -ENOMEM;
-
- p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
- if (!p->ctx) {
- ret = -EINVAL;
- goto free_chunk;
- }
-
- mutex_lock(&p->ctx->lock);
-
- /* skip guilty context job */
- if (atomic_read(&p->ctx->guilty) == 1) {
- ret = -ECANCELED;
- goto free_chunk;
- }
-
- /* get chunks */
- chunk_array_user = u64_to_user_ptr(cs->in.chunks);
- if (copy_from_user(chunk_array, chunk_array_user,
- sizeof(uint64_t)*cs->in.num_chunks)) {
- ret = -EFAULT;
- goto free_chunk;
- }
+ chunk_array = memdup_array_user(u64_to_user_ptr(cs->in.chunks),
+ cs->in.num_chunks,
+ sizeof(uint64_t));
+ if (IS_ERR(chunk_array))
+ return PTR_ERR(chunk_array);
p->nchunks = cs->in.num_chunks;
p->chunks = kvmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
@@ -152,9 +200,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
}
for (i = 0; i < p->nchunks; i++) {
- struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL;
+ struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL;
struct drm_amdgpu_cs_chunk user_chunk;
- uint32_t __user *cdata;
chunk_ptr = u64_to_user_ptr(chunk_array[i]);
if (copy_from_user(&user_chunk, chunk_ptr,
@@ -167,50 +214,50 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
p->chunks[i].length_dw = user_chunk.length_dw;
size = p->chunks[i].length_dw;
- cdata = u64_to_user_ptr(user_chunk.chunk_data);
- p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
- if (p->chunks[i].kdata == NULL) {
- ret = -ENOMEM;
+ p->chunks[i].kdata = vmemdup_array_user(u64_to_user_ptr(user_chunk.chunk_data),
+ size,
+ sizeof(uint32_t));
+ if (IS_ERR(p->chunks[i].kdata)) {
+ ret = PTR_ERR(p->chunks[i].kdata);
i--;
goto free_partial_kdata;
}
size *= sizeof(uint32_t);
- if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
- ret = -EFAULT;
- goto free_partial_kdata;
- }
+ /* Assume the worst on the following checks */
+ ret = -EINVAL;
switch (p->chunks[i].chunk_id) {
case AMDGPU_CHUNK_ID_IB:
- ++num_ibs;
+ if (size < sizeof(struct drm_amdgpu_cs_chunk_ib))
+ goto free_partial_kdata;
+
+ ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, num_ibs);
+ if (ret)
+ goto free_partial_kdata;
break;
case AMDGPU_CHUNK_ID_FENCE:
- size = sizeof(struct drm_amdgpu_cs_chunk_fence);
- if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
- ret = -EINVAL;
+ if (size < sizeof(struct drm_amdgpu_cs_chunk_fence))
goto free_partial_kdata;
- }
- ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata,
- &uf_offset);
+ ret = amdgpu_cs_p1_user_fence(p, p->chunks[i].kdata,
+ &uf_offset);
if (ret)
goto free_partial_kdata;
-
break;
case AMDGPU_CHUNK_ID_BO_HANDLES:
- size = sizeof(struct drm_amdgpu_bo_list_in);
- if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
- ret = -EINVAL;
+ if (size < sizeof(struct drm_amdgpu_bo_list_in))
goto free_partial_kdata;
- }
- ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
- if (ret)
+ /* Only a single BO list is allowed to simplify handling. */
+ if (p->bo_list)
goto free_partial_kdata;
+ ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata);
+ if (ret)
+ goto free_partial_kdata;
break;
case AMDGPU_CHUNK_ID_DEPENDENCIES:
@@ -219,25 +266,54 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
+ case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
break;
default:
- ret = -EINVAL;
goto free_partial_kdata;
}
}
- ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm);
- if (ret)
+ if (!p->gang_size || (amdgpu_sriov_vf(p->adev) && p->gang_size > 1)) {
+ ret = -EINVAL;
goto free_all_kdata;
+ }
+
+ for (i = 0; i < p->gang_size; ++i) {
+ ret = amdgpu_job_alloc(p->adev, vm, p->entities[i], vm,
+ num_ibs[i], &p->jobs[i],
+ p->filp->client_id);
+ if (ret)
+ goto free_all_kdata;
+ switch (p->adev->enforce_isolation[fpriv->xcp_id]) {
+ case AMDGPU_ENFORCE_ISOLATION_DISABLE:
+ default:
+ p->jobs[i]->enforce_isolation = false;
+ p->jobs[i]->run_cleaner_shader = false;
+ break;
+ case AMDGPU_ENFORCE_ISOLATION_ENABLE:
+ p->jobs[i]->enforce_isolation = true;
+ p->jobs[i]->run_cleaner_shader = true;
+ break;
+ case AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY:
+ p->jobs[i]->enforce_isolation = true;
+ p->jobs[i]->run_cleaner_shader = false;
+ break;
+ case AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER:
+ p->jobs[i]->enforce_isolation = true;
+ p->jobs[i]->run_cleaner_shader = false;
+ break;
+ }
+ }
+ p->gang_leader = p->jobs[p->gang_leader_idx];
- if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) {
+ if (p->ctx->generation != p->gang_leader->generation) {
ret = -ECANCELED;
goto free_all_kdata;
}
- if (p->uf_entry.tv.bo)
- p->job->uf_addr = uf_offset;
+ if (p->uf_bo)
+ p->gang_leader->uf_addr = uf_offset;
kvfree(chunk_array);
/* Use this opportunity to fill in task info for the vm */
@@ -259,6 +335,331 @@ free_chunk:
return ret;
}
+static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk,
+ unsigned int *ce_preempt,
+ unsigned int *de_preempt)
+{
+ struct drm_amdgpu_cs_chunk_ib *chunk_ib = chunk->kdata;
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
+ struct amdgpu_ring *ring;
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
+ int r;
+
+ r = amdgpu_cs_job_idx(p, chunk_ib);
+ if (r < 0)
+ return r;
+
+ job = p->jobs[r];
+ ring = amdgpu_job_ring(job);
+ ib = &job->ibs[job->num_ibs++];
+
+ /* submissions to kernel queues are disabled */
+ if (ring->no_user_submission)
+ return -EINVAL;
+
+ /* MM engine doesn't support user fences */
+ if (p->uf_bo && ring->funcs->no_user_fence)
+ return -EINVAL;
+
+ if (!p->adev->debug_enable_ce_cs &&
+ chunk_ib->flags & AMDGPU_IB_FLAG_CE) {
+ dev_err_ratelimited(p->adev->dev, "CE CS is blocked, use debug=0x400 to override\n");
+ return -EINVAL;
+ }
+
+ if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
+ chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
+ if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
+ (*ce_preempt)++;
+ else
+ (*de_preempt)++;
+
+ /* Each GFX command submit allows only 1 IB max
+ * preemptible for CE & DE */
+ if (*ce_preempt > 1 || *de_preempt > 1)
+ return -EINVAL;
+ }
+
+ if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
+ job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
+
+ r = amdgpu_ib_get(p->adev, vm, ring->funcs->parse_cs ?
+ chunk_ib->ib_bytes : 0,
+ AMDGPU_IB_POOL_DELAYED, ib);
+ if (r) {
+ drm_err(adev_to_drm(p->adev), "Failed to get ib !\n");
+ return r;
+ }
+
+ ib->gpu_addr = chunk_ib->va_start;
+ ib->length_dw = chunk_ib->ib_bytes / 4;
+ ib->flags = chunk_ib->flags;
+ return 0;
+}
+
+static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_dep *deps = chunk->kdata;
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ unsigned int num_deps;
+ int i, r;
+
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_dep);
+
+ for (i = 0; i < num_deps; ++i) {
+ struct amdgpu_ctx *ctx;
+ struct drm_sched_entity *entity;
+ struct dma_fence *fence;
+
+ ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
+ if (ctx == NULL)
+ return -EINVAL;
+
+ r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
+ deps[i].ip_instance,
+ deps[i].ring, &entity);
+ if (r) {
+ amdgpu_ctx_put(ctx);
+ return r;
+ }
+
+ fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
+ amdgpu_ctx_put(ctx);
+
+ if (IS_ERR(fence))
+ return PTR_ERR(fence);
+ else if (!fence)
+ continue;
+
+ if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
+ struct drm_sched_fence *s_fence;
+ struct dma_fence *old = fence;
+
+ s_fence = to_drm_sched_fence(fence);
+ fence = dma_fence_get(&s_fence->scheduled);
+ dma_fence_put(old);
+ }
+
+ r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL);
+ dma_fence_put(fence);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,
+ uint32_t handle, u64 point,
+ u64 flags)
+{
+ struct dma_fence *fence;
+ int r;
+
+ r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
+ if (r) {
+ drm_err(adev_to_drm(p->adev), "syncobj %u failed to find fence @ %llu (%d)!\n",
+ handle, point, r);
+ return r;
+ }
+
+ r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL);
+ dma_fence_put(fence);
+ return r;
+}
+
+static int amdgpu_cs_p2_syncobj_in(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
+ unsigned int num_deps;
+ int i, r;
+
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_sem);
+ for (i = 0; i < num_deps; ++i) {
+ r = amdgpu_syncobj_lookup_and_add(p, deps[i].handle, 0, 0);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int amdgpu_cs_p2_syncobj_timeline_wait(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
+ unsigned int num_deps;
+ int i, r;
+
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_syncobj);
+ for (i = 0; i < num_deps; ++i) {
+ r = amdgpu_syncobj_lookup_and_add(p, syncobj_deps[i].handle,
+ syncobj_deps[i].point,
+ syncobj_deps[i].flags);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int amdgpu_cs_p2_syncobj_out(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
+ unsigned int num_deps;
+ int i;
+
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+ if (p->post_deps)
+ return -EINVAL;
+
+ p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
+ GFP_KERNEL);
+ p->num_post_deps = 0;
+
+ if (!p->post_deps)
+ return -ENOMEM;
+
+
+ for (i = 0; i < num_deps; ++i) {
+ p->post_deps[i].syncobj =
+ drm_syncobj_find(p->filp, deps[i].handle);
+ if (!p->post_deps[i].syncobj)
+ return -EINVAL;
+ p->post_deps[i].chain = NULL;
+ p->post_deps[i].point = 0;
+ p->num_post_deps++;
+ }
+
+ return 0;
+}
+
+static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
+ unsigned int num_deps;
+ int i;
+
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_syncobj);
+
+ if (p->post_deps)
+ return -EINVAL;
+
+ p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
+ GFP_KERNEL);
+ p->num_post_deps = 0;
+
+ if (!p->post_deps)
+ return -ENOMEM;
+
+ for (i = 0; i < num_deps; ++i) {
+ struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
+
+ dep->chain = NULL;
+ if (syncobj_deps[i].point) {
+ dep->chain = dma_fence_chain_alloc();
+ if (!dep->chain)
+ return -ENOMEM;
+ }
+
+ dep->syncobj = drm_syncobj_find(p->filp,
+ syncobj_deps[i].handle);
+ if (!dep->syncobj) {
+ dma_fence_chain_free(dep->chain);
+ return -EINVAL;
+ }
+ dep->point = syncobj_deps[i].point;
+ p->num_post_deps++;
+ }
+
+ return 0;
+}
+
+static int amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;
+ int i;
+
+ if (shadow->flags & ~AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW)
+ return -EINVAL;
+
+ for (i = 0; i < p->gang_size; ++i) {
+ p->jobs[i]->shadow_va = shadow->shadow_va;
+ p->jobs[i]->csa_va = shadow->csa_va;
+ p->jobs[i]->gds_va = shadow->gds_va;
+ p->jobs[i]->init_shadow =
+ shadow->flags & AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW;
+ }
+
+ return 0;
+}
+
+static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
+{
+ unsigned int ce_preempt = 0, de_preempt = 0;
+ int i, r;
+
+ for (i = 0; i < p->nchunks; ++i) {
+ struct amdgpu_cs_chunk *chunk;
+
+ chunk = &p->chunks[i];
+
+ switch (chunk->chunk_id) {
+ case AMDGPU_CHUNK_ID_IB:
+ r = amdgpu_cs_p2_ib(p, chunk, &ce_preempt, &de_preempt);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_DEPENDENCIES:
+ case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
+ r = amdgpu_cs_p2_dependencies(p, chunk);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
+ r = amdgpu_cs_p2_syncobj_in(p, chunk);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
+ r = amdgpu_cs_p2_syncobj_out(p, chunk);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
+ r = amdgpu_cs_p2_syncobj_timeline_wait(p, chunk);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
+ r = amdgpu_cs_p2_syncobj_timeline_signal(p, chunk);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
+ r = amdgpu_cs_p2_shadow(p, chunk);
+ if (r)
+ return r;
+ break;
+ }
+ }
+
+ return 0;
+}
+
/* Convert microseconds to bytes. */
static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)
{
@@ -298,7 +699,6 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
{
s64 time_us, increment_us;
u64 free_vram, total_vram, used_vram;
- struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
/* Allow a maximum of 200 accumulated ms. This is basically per-IB
* throttling.
*
@@ -308,14 +708,14 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
*/
const s64 us_upper_bound = 200000;
- if (!adev->mm_stats.log2_max_MBps) {
+ if ((!adev->mm_stats.log2_max_MBps) || !ttm_resource_manager_used(&adev->mman.vram_mgr.manager)) {
*max_bytes = 0;
*max_vis_bytes = 0;
return;
}
total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
- used_vram = amdgpu_vram_mgr_usage(vram_man);
+ used_vram = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager);
free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
spin_lock(&adev->mm_stats.lock);
@@ -342,7 +742,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {
s64 min_us;
- /* Be more aggresive on dGPUs. Try to fill a portion of free
+ /* Be more aggressive on dGPUs. Try to fill a portion of free
* VRAM now.
*/
if (!(adev->flags & AMD_IS_APU))
@@ -362,10 +762,11 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
u64 total_vis_vram = adev->gmc.visible_vram_size;
u64 used_vis_vram =
- amdgpu_vram_mgr_vis_usage(vram_man);
+ amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
if (used_vis_vram < total_vis_vram) {
u64 free_vis_vram = total_vis_vram - used_vis_vram;
+
adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
increment_us, us_upper_bound);
@@ -440,7 +841,7 @@ retry:
p->bytes_moved += ctx.bytes_moved;
if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
- amdgpu_bo_in_cpu_visible_vram(bo))
+ amdgpu_res_cpu_visible(adev, bo->tbo.resource))
p->bytes_moved_vis += ctx.bytes_moved;
if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
@@ -451,57 +852,18 @@ retry:
return r;
}
-static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
- struct list_head *validated)
-{
- struct ttm_operation_ctx ctx = { true, false };
- struct amdgpu_bo_list_entry *lobj;
- int r;
-
- list_for_each_entry(lobj, validated, tv.head) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
- struct mm_struct *usermm;
-
- usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
- if (usermm && usermm != current->mm)
- return -EPERM;
-
- if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
- lobj->user_invalidated && lobj->user_pages) {
- amdgpu_bo_placement_from_domain(bo,
- AMDGPU_GEM_DOMAIN_CPU);
- r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
- if (r)
- return r;
-
- amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
- lobj->user_pages);
- }
-
- r = amdgpu_cs_bo_validate(p, bo);
- if (r)
- return r;
-
- kvfree(lobj->user_pages);
- lobj->user_pages = NULL;
- }
- return 0;
-}
-
static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct ttm_operation_ctx ctx = { true, false };
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_list_entry *e;
- struct list_head duplicates;
- struct amdgpu_bo *gds;
- struct amdgpu_bo *gws;
- struct amdgpu_bo *oa;
+ struct drm_gem_object *obj;
+ unsigned long index;
+ unsigned int i;
int r;
- INIT_LIST_HEAD(&p->validated);
-
/* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
if (cs->in.bo_list_handle) {
if (p->bo_list)
@@ -519,44 +881,27 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
return r;
}
- /* One for TTM and one for the CS job */
- amdgpu_bo_list_for_each_entry(e, p->bo_list)
- e->tv.num_shared = 2;
-
- amdgpu_bo_list_get_list(p->bo_list, &p->validated);
-
- INIT_LIST_HEAD(&duplicates);
- amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
-
- if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
- list_add(&p->uf_entry.tv.head, &p->validated);
+ mutex_lock(&p->bo_list->bo_list_mutex);
/* Get userptr backing pages. If pages are updated after registered
* in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
* amdgpu_ttm_backend_bind() to flush and invalidate new pages
*/
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
bool userpage_invalidated = false;
- int i;
+ struct amdgpu_bo *bo = e->bo;
- e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
- sizeof(struct page *),
- GFP_KERNEL | __GFP_ZERO);
- if (!e->user_pages) {
- DRM_ERROR("kvmalloc_array failure\n");
+ e->range = amdgpu_hmm_range_alloc(NULL);
+ if (unlikely(!e->range))
return -ENOMEM;
- }
- r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages);
- if (r) {
- kvfree(e->user_pages);
- e->user_pages = NULL;
- return r;
- }
+ r = amdgpu_ttm_tt_get_user_pages(bo, e->range);
+ if (r)
+ goto out_free_user_pages;
for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
- if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
+ if (bo->tbo.ttm->pages[i] !=
+ hmm_pfn_to_page(e->range->hmm_range.hmm_pfns[i])) {
userpage_invalidated = true;
break;
}
@@ -564,25 +909,52 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
e->user_invalidated = userpage_invalidated;
}
- r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
- &duplicates);
- if (unlikely(r != 0)) {
- if (r != -ERESTARTSYS)
- DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
- goto out;
+ drm_exec_until_all_locked(&p->exec) {
+ r = amdgpu_vm_lock_pd(&fpriv->vm, &p->exec, 1 + p->gang_size);
+ drm_exec_retry_on_contention(&p->exec);
+ if (unlikely(r))
+ goto out_free_user_pages;
+
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+ /* One fence for TTM and one for each CS job */
+ r = drm_exec_prepare_obj(&p->exec, &e->bo->tbo.base,
+ 1 + p->gang_size);
+ drm_exec_retry_on_contention(&p->exec);
+ if (unlikely(r))
+ goto out_free_user_pages;
+
+ e->bo_va = amdgpu_vm_bo_find(vm, e->bo);
+ }
+
+ if (p->uf_bo) {
+ r = drm_exec_prepare_obj(&p->exec, &p->uf_bo->tbo.base,
+ 1 + p->gang_size);
+ drm_exec_retry_on_contention(&p->exec);
+ if (unlikely(r))
+ goto out_free_user_pages;
+ }
}
- amdgpu_bo_list_for_each_entry(e, p->bo_list) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+ struct mm_struct *usermm;
+
+ usermm = amdgpu_ttm_tt_get_usermm(e->bo->tbo.ttm);
+ if (usermm && usermm != current->mm) {
+ r = -EPERM;
+ goto out_free_user_pages;
+ }
- e->bo_va = amdgpu_vm_bo_find(vm, bo);
+ if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) &&
+ e->user_invalidated) {
+ amdgpu_bo_placement_from_domain(e->bo,
+ AMDGPU_GEM_DOMAIN_CPU);
+ r = ttm_bo_validate(&e->bo->tbo, &e->bo->placement,
+ &ctx);
+ if (r)
+ goto out_free_user_pages;
- if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
- e->chain = dma_fence_chain_alloc();
- if (!e->chain) {
- r = -ENOMEM;
- goto error_validate;
- }
+ amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm,
+ e->range);
}
}
@@ -591,254 +963,209 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
p->bytes_moved = 0;
p->bytes_moved_vis = 0;
- r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
- amdgpu_cs_bo_validate, p);
+ r = amdgpu_vm_validate(p->adev, &fpriv->vm, NULL,
+ amdgpu_cs_bo_validate, p);
if (r) {
- DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n");
- goto error_validate;
+ drm_err(adev_to_drm(p->adev), "amdgpu_vm_validate() failed.\n");
+ goto out_free_user_pages;
}
- r = amdgpu_cs_list_validate(p, &duplicates);
- if (r)
- goto error_validate;
-
- r = amdgpu_cs_list_validate(p, &p->validated);
- if (r)
- goto error_validate;
-
- amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
- p->bytes_moved_vis);
+ drm_exec_for_each_locked_object(&p->exec, index, obj) {
+ r = amdgpu_cs_bo_validate(p, gem_to_amdgpu_bo(obj));
+ if (unlikely(r))
+ goto out_free_user_pages;
+ }
- gds = p->bo_list->gds_obj;
- gws = p->bo_list->gws_obj;
- oa = p->bo_list->oa_obj;
+ if (p->uf_bo) {
+ r = amdgpu_ttm_alloc_gart(&p->uf_bo->tbo);
+ if (unlikely(r))
+ goto out_free_user_pages;
- if (gds) {
- p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
- p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
- }
- if (gws) {
- p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
- p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
- }
- if (oa) {
- p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
- p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
+ p->gang_leader->uf_addr += amdgpu_bo_gpu_offset(p->uf_bo);
}
- if (!r && p->uf_entry.tv.bo) {
- struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
+ amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
+ p->bytes_moved_vis);
- r = amdgpu_ttm_alloc_gart(&uf->tbo);
- p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
- }
+ for (i = 0; i < p->gang_size; ++i)
+ amdgpu_job_set_resources(p->jobs[i], p->bo_list->gds_obj,
+ p->bo_list->gws_obj,
+ p->bo_list->oa_obj);
+ return 0;
-error_validate:
- if (r) {
- amdgpu_bo_list_for_each_entry(e, p->bo_list) {
- dma_fence_chain_free(e->chain);
- e->chain = NULL;
- }
- ttm_eu_backoff_reservation(&p->ticket, &p->validated);
+out_free_user_pages:
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+ amdgpu_hmm_range_free(e->range);
+ e->range = NULL;
}
-out:
+ mutex_unlock(&p->bo_list->bo_list_mutex);
return r;
}
-static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
+static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *p)
{
- struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
- struct amdgpu_bo_list_entry *e;
- int r;
+ int i, j;
- list_for_each_entry(e, &p->validated, tv.head) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
- struct dma_resv *resv = bo->tbo.base.resv;
- enum amdgpu_sync_mode sync_mode;
+ if (!trace_amdgpu_cs_enabled())
+ return;
- sync_mode = amdgpu_bo_explicit_sync(bo) ?
- AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
- r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode,
- &fpriv->vm);
- if (r)
- return r;
+ for (i = 0; i < p->gang_size; ++i) {
+ struct amdgpu_job *job = p->jobs[i];
+
+ for (j = 0; j < job->num_ibs; ++j)
+ trace_amdgpu_cs(p, job, &job->ibs[j]);
}
- return 0;
}
-/**
- * amdgpu_cs_parser_fini() - clean parser states
- * @parser: parser structure holding parsing context.
- * @error: error number
- * @backoff: indicator to backoff the reservation
- *
- * If error is set then unvalidate buffer, otherwise just free memory
- * used by parsing context.
- **/
-static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
- bool backoff)
+static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job)
{
- unsigned i;
+ struct amdgpu_ring *ring = amdgpu_job_ring(job);
+ unsigned int i;
+ int r;
+
+ /* Only for UVD/VCE VM emulation */
+ if (!ring->funcs->parse_cs && !ring->funcs->patch_cs_in_place)
+ return 0;
- if (error && backoff) {
- struct amdgpu_bo_list_entry *e;
+ for (i = 0; i < job->num_ibs; ++i) {
+ struct amdgpu_ib *ib = &job->ibs[i];
+ struct amdgpu_bo_va_mapping *m;
+ struct amdgpu_bo *aobj;
+ uint64_t va_start;
+ uint8_t *kptr;
- amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
- dma_fence_chain_free(e->chain);
- e->chain = NULL;
+ va_start = ib->gpu_addr & AMDGPU_GMC_HOLE_MASK;
+ r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
+ if (r) {
+ drm_err(adev_to_drm(p->adev), "IB va_start is invalid\n");
+ return r;
}
- ttm_eu_backoff_reservation(&parser->ticket,
- &parser->validated);
- }
+ if ((va_start + ib->length_dw * 4) >
+ (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
+ drm_err(adev_to_drm(p->adev), "IB va_start+ib_bytes is invalid\n");
+ return -EINVAL;
+ }
- for (i = 0; i < parser->num_post_deps; i++) {
- drm_syncobj_put(parser->post_deps[i].syncobj);
- kfree(parser->post_deps[i].chain);
- }
- kfree(parser->post_deps);
+ /* the IB should be reserved at this point */
+ r = amdgpu_bo_kmap(aobj, (void **)&kptr);
+ if (r)
+ return r;
- dma_fence_put(parser->fence);
+ kptr += va_start - (m->start * AMDGPU_GPU_PAGE_SIZE);
- if (parser->ctx) {
- mutex_unlock(&parser->ctx->lock);
- amdgpu_ctx_put(parser->ctx);
+ if (ring->funcs->parse_cs) {
+ memcpy(ib->ptr, kptr, ib->length_dw * 4);
+ amdgpu_bo_kunmap(aobj);
+
+ r = amdgpu_ring_parse_cs(ring, p, job, ib);
+ if (r)
+ return r;
+
+ if (ib->sa_bo)
+ ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
+ } else {
+ ib->ptr = (uint32_t *)kptr;
+ r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib);
+ amdgpu_bo_kunmap(aobj);
+ if (r)
+ return r;
+ }
}
- if (parser->bo_list)
- amdgpu_bo_list_put(parser->bo_list);
- for (i = 0; i < parser->nchunks; i++)
- kvfree(parser->chunks[i].kdata);
- kvfree(parser->chunks);
- if (parser->job)
- amdgpu_job_free(parser->job);
- if (parser->uf_entry.tv.bo) {
- struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
+ return 0;
+}
- amdgpu_bo_unref(&uf);
+static int amdgpu_cs_patch_jobs(struct amdgpu_cs_parser *p)
+{
+ unsigned int i;
+ int r;
+
+ for (i = 0; i < p->gang_size; ++i) {
+ r = amdgpu_cs_patch_ibs(p, p->jobs[i]);
+ if (r)
+ return r;
}
+ return 0;
}
static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
{
- struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct amdgpu_job *job = p->gang_leader;
struct amdgpu_device *adev = p->adev;
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_list_entry *e;
struct amdgpu_bo_va *bo_va;
- struct amdgpu_bo *bo;
+ unsigned int i;
int r;
- /* Only for UVD/VCE VM emulation */
- if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) {
- unsigned i, j;
-
- for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
- struct drm_amdgpu_cs_chunk_ib *chunk_ib;
- struct amdgpu_bo_va_mapping *m;
- struct amdgpu_bo *aobj = NULL;
- struct amdgpu_cs_chunk *chunk;
- uint64_t offset, va_start;
- struct amdgpu_ib *ib;
- uint8_t *kptr;
-
- chunk = &p->chunks[i];
- ib = &p->job->ibs[j];
- chunk_ib = chunk->kdata;
-
- if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
- continue;
-
- va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK;
- r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
- if (r) {
- DRM_ERROR("IB va_start is invalid\n");
- return r;
- }
+ /*
+ * We can't use gang submit on with reserved VMIDs when the VM changes
+ * can't be invalidated by more than one engine at the same time.
+ */
+ if (p->gang_size > 1 && !adev->vm_manager.concurrent_flush) {
+ for (i = 0; i < p->gang_size; ++i) {
+ struct drm_sched_entity *entity = p->entities[i];
+ struct drm_gpu_scheduler *sched = entity->rq->sched;
+ struct amdgpu_ring *ring = to_amdgpu_ring(sched);
- if ((va_start + chunk_ib->ib_bytes) >
- (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
- DRM_ERROR("IB va_start+ib_bytes is invalid\n");
+ if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub))
return -EINVAL;
- }
-
- /* the IB should be reserved at this point */
- r = amdgpu_bo_kmap(aobj, (void **)&kptr);
- if (r) {
- return r;
- }
-
- offset = m->start * AMDGPU_GPU_PAGE_SIZE;
- kptr += va_start - offset;
-
- if (ring->funcs->parse_cs) {
- memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
- amdgpu_bo_kunmap(aobj);
-
- r = amdgpu_ring_parse_cs(ring, p, j);
- if (r)
- return r;
- } else {
- ib->ptr = (uint32_t *)kptr;
- r = amdgpu_ring_patch_cs_in_place(ring, p, j);
- amdgpu_bo_kunmap(aobj);
- if (r)
- return r;
- }
-
- j++;
}
}
- if (!p->job->vm)
- return amdgpu_cs_sync_rings(p);
-
+ if (!amdgpu_vm_ready(vm))
+ return -EINVAL;
r = amdgpu_vm_clear_freed(adev, vm, NULL);
if (r)
return r;
- r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false, NULL);
+ r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
if (r)
return r;
- r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
+ r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update,
+ GFP_KERNEL);
if (r)
return r;
- if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
+ if (fpriv->csa_va) {
bo_va = fpriv->csa_va;
BUG_ON(!bo_va);
- r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r)
return r;
- r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
+ r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update,
+ GFP_KERNEL);
if (r)
return r;
}
+ /* FIXME: In theory this loop shouldn't be needed any more when
+ * amdgpu_vm_handle_moved handles all moved BOs that are reserved
+ * with p->ticket. But removing it caused test regressions, so I'm
+ * leaving it here for now.
+ */
amdgpu_bo_list_for_each_entry(e, p->bo_list) {
- /* ignore duplicates */
- bo = ttm_to_amdgpu_bo(e->tv.bo);
- if (!bo)
- continue;
-
bo_va = e->bo_va;
if (bo_va == NULL)
continue;
- r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r)
return r;
- r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
+ r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update,
+ GFP_KERNEL);
if (r)
return r;
}
- r = amdgpu_vm_handle_moved(adev, vm);
+ r = amdgpu_vm_handle_moved(adev, vm, &p->exec.ticket);
if (r)
return r;
@@ -846,345 +1173,93 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update);
+ r = amdgpu_sync_fence(&p->sync, vm->last_update, GFP_KERNEL);
if (r)
return r;
- p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo);
+ for (i = 0; i < p->gang_size; ++i) {
+ job = p->jobs[i];
+
+ if (!job->vm)
+ continue;
+
+ job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo);
+ }
- if (amdgpu_vm_debug) {
+ if (adev->debug_vm) {
/* Invalidate all BOs to test for userspace bugs */
amdgpu_bo_list_for_each_entry(e, p->bo_list) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+ struct amdgpu_bo *bo = e->bo;
/* ignore duplicates */
if (!bo)
continue;
- amdgpu_vm_bo_invalidate(adev, bo, false);
- }
- }
-
- return amdgpu_cs_sync_rings(p);
-}
-
-static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
- struct amdgpu_cs_parser *parser)
-{
- struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
- struct amdgpu_vm *vm = &fpriv->vm;
- int r, ce_preempt = 0, de_preempt = 0;
- struct amdgpu_ring *ring;
- int i, j;
-
- for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
- struct amdgpu_cs_chunk *chunk;
- struct amdgpu_ib *ib;
- struct drm_amdgpu_cs_chunk_ib *chunk_ib;
- struct drm_sched_entity *entity;
-
- chunk = &parser->chunks[i];
- ib = &parser->job->ibs[j];
- chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
-
- if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
- continue;
-
- if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
- (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
- if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
- if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
- ce_preempt++;
- else
- de_preempt++;
- }
-
- /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */
- if (ce_preempt > 1 || de_preempt > 1)
- return -EINVAL;
+ amdgpu_vm_bo_invalidate(bo, false);
}
-
- r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
- chunk_ib->ip_instance, chunk_ib->ring,
- &entity);
- if (r)
- return r;
-
- if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
- parser->job->preamble_status |=
- AMDGPU_PREAMBLE_IB_PRESENT;
-
- if (parser->entity && parser->entity != entity)
- return -EINVAL;
-
- /* Return if there is no run queue associated with this entity.
- * Possibly because of disabled HW IP*/
- if (entity->rq == NULL)
- return -EINVAL;
-
- parser->entity = entity;
-
- ring = to_amdgpu_ring(entity->rq->sched);
- r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
- chunk_ib->ib_bytes : 0,
- AMDGPU_IB_POOL_DELAYED, ib);
- if (r) {
- DRM_ERROR("Failed to get ib !\n");
- return r;
- }
-
- ib->gpu_addr = chunk_ib->va_start;
- ib->length_dw = chunk_ib->ib_bytes / 4;
- ib->flags = chunk_ib->flags;
-
- j++;
}
- /* MM engine doesn't support user fences */
- ring = to_amdgpu_ring(parser->entity->rq->sched);
- if (parser->job->uf_addr && ring->funcs->no_user_fence)
- return -EINVAL;
-
- return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
-}
-
-static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk *chunk)
-{
- struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
- unsigned num_deps;
- int i, r;
- struct drm_amdgpu_cs_chunk_dep *deps;
-
- deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
- num_deps = chunk->length_dw * 4 /
- sizeof(struct drm_amdgpu_cs_chunk_dep);
-
- for (i = 0; i < num_deps; ++i) {
- struct amdgpu_ctx *ctx;
- struct drm_sched_entity *entity;
- struct dma_fence *fence;
-
- ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
- if (ctx == NULL)
- return -EINVAL;
-
- r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
- deps[i].ip_instance,
- deps[i].ring, &entity);
- if (r) {
- amdgpu_ctx_put(ctx);
- return r;
- }
-
- fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
- amdgpu_ctx_put(ctx);
-
- if (IS_ERR(fence))
- return PTR_ERR(fence);
- else if (!fence)
- continue;
-
- if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
- struct drm_sched_fence *s_fence;
- struct dma_fence *old = fence;
-
- s_fence = to_drm_sched_fence(fence);
- fence = dma_fence_get(&s_fence->scheduled);
- dma_fence_put(old);
- }
-
- r = amdgpu_sync_fence(&p->job->sync, fence);
- dma_fence_put(fence);
- if (r)
- return r;
- }
return 0;
}
-static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
- uint32_t handle, u64 point,
- u64 flags)
+static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
{
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct drm_gpu_scheduler *sched;
+ struct drm_gem_object *obj;
struct dma_fence *fence;
+ unsigned long index;
+ unsigned int i;
int r;
- r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
+ r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]);
if (r) {
- DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
- handle, point, r);
+ if (r != -ERESTARTSYS)
+ drm_err(adev_to_drm(p->adev), "amdgpu_ctx_wait_prev_fence failed.\n");
return r;
}
- r = amdgpu_sync_fence(&p->job->sync, fence);
- dma_fence_put(fence);
-
- return r;
-}
+ drm_exec_for_each_locked_object(&p->exec, index, obj) {
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
-static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk *chunk)
-{
- struct drm_amdgpu_cs_chunk_sem *deps;
- unsigned num_deps;
- int i, r;
+ struct dma_resv *resv = bo->tbo.base.resv;
+ enum amdgpu_sync_mode sync_mode;
- deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
- num_deps = chunk->length_dw * 4 /
- sizeof(struct drm_amdgpu_cs_chunk_sem);
- for (i = 0; i < num_deps; ++i) {
- r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
- 0, 0);
+ sync_mode = amdgpu_bo_explicit_sync(bo) ?
+ AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
+ r = amdgpu_sync_resv(p->adev, &p->sync, resv, sync_mode,
+ &fpriv->vm);
if (r)
return r;
}
- return 0;
-}
-
-
-static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk *chunk)
-{
- struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
- unsigned num_deps;
- int i, r;
-
- syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
- num_deps = chunk->length_dw * 4 /
- sizeof(struct drm_amdgpu_cs_chunk_syncobj);
- for (i = 0; i < num_deps; ++i) {
- r = amdgpu_syncobj_lookup_and_add_to_sync(p,
- syncobj_deps[i].handle,
- syncobj_deps[i].point,
- syncobj_deps[i].flags);
+ for (i = 0; i < p->gang_size; ++i) {
+ r = amdgpu_sync_push_to_job(&p->sync, p->jobs[i]);
if (r)
return r;
}
- return 0;
-}
-
-static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk *chunk)
-{
- struct drm_amdgpu_cs_chunk_sem *deps;
- unsigned num_deps;
- int i;
-
- deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
- num_deps = chunk->length_dw * 4 /
- sizeof(struct drm_amdgpu_cs_chunk_sem);
-
- if (p->post_deps)
- return -EINVAL;
-
- p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
- GFP_KERNEL);
- p->num_post_deps = 0;
-
- if (!p->post_deps)
- return -ENOMEM;
-
-
- for (i = 0; i < num_deps; ++i) {
- p->post_deps[i].syncobj =
- drm_syncobj_find(p->filp, deps[i].handle);
- if (!p->post_deps[i].syncobj)
- return -EINVAL;
- p->post_deps[i].chain = NULL;
- p->post_deps[i].point = 0;
- p->num_post_deps++;
- }
-
- return 0;
-}
-
+ sched = p->gang_leader->base.entity->rq->sched;
+ while ((fence = amdgpu_sync_get_fence(&p->sync))) {
+ struct drm_sched_fence *s_fence = to_drm_sched_fence(fence);
-static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk *chunk)
-{
- struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
- unsigned num_deps;
- int i;
-
- syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
- num_deps = chunk->length_dw * 4 /
- sizeof(struct drm_amdgpu_cs_chunk_syncobj);
-
- if (p->post_deps)
- return -EINVAL;
-
- p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
- GFP_KERNEL);
- p->num_post_deps = 0;
-
- if (!p->post_deps)
- return -ENOMEM;
-
- for (i = 0; i < num_deps; ++i) {
- struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
-
- dep->chain = NULL;
- if (syncobj_deps[i].point) {
- dep->chain = dma_fence_chain_alloc();
- if (!dep->chain)
- return -ENOMEM;
- }
-
- dep->syncobj = drm_syncobj_find(p->filp,
- syncobj_deps[i].handle);
- if (!dep->syncobj) {
- dma_fence_chain_free(dep->chain);
- return -EINVAL;
+ /*
+ * When we have an dependency it might be necessary to insert a
+ * pipeline sync to make sure that all caches etc are flushed and the
+ * next job actually sees the results from the previous one
+ * before we start executing on the same scheduler ring.
+ */
+ if (!s_fence || s_fence->sched != sched) {
+ dma_fence_put(fence);
+ continue;
}
- dep->point = syncobj_deps[i].point;
- p->num_post_deps++;
- }
- return 0;
-}
-
-static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
- struct amdgpu_cs_parser *p)
-{
- int i, r;
-
- for (i = 0; i < p->nchunks; ++i) {
- struct amdgpu_cs_chunk *chunk;
-
- chunk = &p->chunks[i];
-
- switch (chunk->chunk_id) {
- case AMDGPU_CHUNK_ID_DEPENDENCIES:
- case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
- r = amdgpu_cs_process_fence_dep(p, chunk);
- if (r)
- return r;
- break;
- case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
- r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
- if (r)
- return r;
- break;
- case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
- r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
- if (r)
- return r;
- break;
- case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
- r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
- if (r)
- return r;
- break;
- case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
- r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
- if (r)
- return r;
- break;
- }
+ r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence,
+ GFP_KERNEL);
+ dma_fence_put(fence);
+ if (r)
+ return r;
}
-
return 0;
}
@@ -1209,20 +1284,36 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
- struct drm_sched_entity *entity = p->entity;
+ struct amdgpu_job *leader = p->gang_leader;
struct amdgpu_bo_list_entry *e;
- struct amdgpu_job *job;
+ struct drm_gem_object *gobj;
+ unsigned long index;
+ unsigned int i;
uint64_t seq;
int r;
- job = p->job;
- p->job = NULL;
+ for (i = 0; i < p->gang_size; ++i)
+ drm_sched_job_arm(&p->jobs[i]->base);
- r = drm_sched_job_init(&job->base, entity, &fpriv->vm);
- if (r)
- goto error_unlock;
+ for (i = 0; i < p->gang_size; ++i) {
+ struct dma_fence *fence;
+
+ if (p->jobs[i] == leader)
+ continue;
+
+ fence = &p->jobs[i]->base.s_fence->scheduled;
+ dma_fence_get(fence);
+ r = drm_sched_job_add_dependency(&leader->base, fence);
+ if (r) {
+ dma_fence_put(fence);
+ return r;
+ }
+ }
- drm_sched_job_arm(&job->base);
+ if (p->gang_size > 1) {
+ for (i = 0; i < p->gang_size; ++i)
+ amdgpu_job_set_gang_leader(p->jobs[i], leader);
+ }
/* No memory allocation is allowed while holding the notifier lock.
* The lock is held until amdgpu_cs_submit is finished and fence is
@@ -1233,91 +1324,100 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
/* If userptr are invalidated after amdgpu_cs_parser_bos(), return
* -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
*/
+ r = 0;
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
-
- r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+ r |= !amdgpu_hmm_range_valid(e->range);
+ amdgpu_hmm_range_free(e->range);
+ e->range = NULL;
}
if (r) {
r = -EAGAIN;
- goto error_abort;
+ mutex_unlock(&p->adev->notifier_lock);
+ return r;
}
- p->fence = dma_fence_get(&job->base.s_fence->finished);
+ p->fence = dma_fence_get(&leader->base.s_fence->finished);
+ drm_exec_for_each_locked_object(&p->exec, index, gobj) {
+
+ ttm_bo_move_to_lru_tail_unlocked(&gem_to_amdgpu_bo(gobj)->tbo);
+
+ /* Everybody except for the gang leader uses READ */
+ for (i = 0; i < p->gang_size; ++i) {
+ if (p->jobs[i] == leader)
+ continue;
- amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
+ dma_resv_add_fence(gobj->resv,
+ &p->jobs[i]->base.s_fence->finished,
+ DMA_RESV_USAGE_READ);
+ }
+
+ /* The gang leader as remembered as writer */
+ dma_resv_add_fence(gobj->resv, p->fence, DMA_RESV_USAGE_WRITE);
+ }
+
+ seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx],
+ p->fence);
amdgpu_cs_post_dependencies(p);
- if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
+ if ((leader->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
!p->ctx->preamble_presented) {
- job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
+ leader->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
p->ctx->preamble_presented = true;
}
cs->out.handle = seq;
- job->uf_sequence = seq;
-
- amdgpu_job_free_resources(job);
-
- trace_amdgpu_cs_ioctl(job);
- amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
- drm_sched_entity_push_job(&job->base);
+ leader->uf_sequence = seq;
+
+ amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->exec.ticket);
+ for (i = 0; i < p->gang_size; ++i) {
+ amdgpu_job_free_resources(p->jobs[i]);
+ trace_amdgpu_cs_ioctl(p->jobs[i]);
+ drm_sched_entity_push_job(&p->jobs[i]->base);
+ p->jobs[i] = NULL;
+ }
amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
- amdgpu_bo_list_for_each_entry(e, p->bo_list) {
- struct dma_resv *resv = e->tv.bo->base.resv;
- struct dma_fence_chain *chain = e->chain;
-
- if (!chain)
- continue;
-
- /*
- * Work around dma_resv shortcommings by wrapping up the
- * submission in a dma_fence_chain and add it as exclusive
- * fence, but first add the submission as shared fence to make
- * sure that shared fences never signal before the exclusive
- * one.
- */
- dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
- dma_fence_get(p->fence), 1);
-
- dma_resv_add_shared_fence(resv, p->fence);
- rcu_assign_pointer(resv->fence_excl, &chain->base);
- e->chain = NULL;
- }
-
- ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
mutex_unlock(&p->adev->notifier_lock);
-
+ mutex_unlock(&p->bo_list->bo_list_mutex);
return 0;
-
-error_abort:
- drm_sched_job_cleanup(&job->base);
- mutex_unlock(&p->adev->notifier_lock);
-
-error_unlock:
- amdgpu_job_free(job);
- return r;
}
-static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *parser)
+/* Cleanup the parser structure */
+static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)
{
- int i;
+ unsigned int i;
- if (!trace_amdgpu_cs_enabled())
- return;
+ amdgpu_sync_free(&parser->sync);
+ drm_exec_fini(&parser->exec);
+
+ for (i = 0; i < parser->num_post_deps; i++) {
+ drm_syncobj_put(parser->post_deps[i].syncobj);
+ kfree(parser->post_deps[i].chain);
+ }
+ kfree(parser->post_deps);
- for (i = 0; i < parser->job->num_ibs; i++)
- trace_amdgpu_cs(parser, i);
+ dma_fence_put(parser->fence);
+
+ if (parser->ctx)
+ amdgpu_ctx_put(parser->ctx);
+ if (parser->bo_list)
+ amdgpu_bo_list_put(parser->bo_list);
+
+ for (i = 0; i < parser->nchunks; i++)
+ kvfree(parser->chunks[i].kdata);
+ kvfree(parser->chunks);
+ for (i = 0; i < parser->gang_size; ++i) {
+ if (parser->jobs[i])
+ amdgpu_job_free(parser->jobs[i]);
+ }
+ amdgpu_bo_unref(&parser->uf_bo);
}
int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{
struct amdgpu_device *adev = drm_to_adev(dev);
- union drm_amdgpu_cs *cs = data;
- struct amdgpu_cs_parser parser = {};
- bool reserved_buffers = false;
+ struct amdgpu_cs_parser parser;
int r;
if (amdgpu_ras_intr_triggered())
@@ -1326,48 +1426,55 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (!adev->accel_working)
return -EBUSY;
- parser.adev = adev;
- parser.filp = filp;
-
- r = amdgpu_cs_parser_init(&parser, data);
+ r = amdgpu_cs_parser_init(&parser, adev, filp, data);
if (r) {
- if (printk_ratelimit())
- DRM_ERROR("Failed to initialize parser %d!\n", r);
- goto out;
+ drm_err_ratelimited(dev, "Failed to initialize parser %d!\n", r);
+ return r;
}
- r = amdgpu_cs_ib_fill(adev, &parser);
+ r = amdgpu_cs_pass1(&parser, data);
if (r)
- goto out;
+ goto error_fini;
- r = amdgpu_cs_dependencies(adev, &parser);
- if (r) {
- DRM_ERROR("Failed in the dependencies handling %d!\n", r);
- goto out;
- }
+ r = amdgpu_cs_pass2(&parser);
+ if (r)
+ goto error_fini;
r = amdgpu_cs_parser_bos(&parser, data);
if (r) {
if (r == -ENOMEM)
- DRM_ERROR("Not enough memory for command submission!\n");
+ drm_err(dev, "Not enough memory for command submission!\n");
else if (r != -ERESTARTSYS && r != -EAGAIN)
- DRM_ERROR("Failed to process the buffer list %d!\n", r);
- goto out;
+ drm_dbg(dev, "Failed to process the buffer list %d!\n", r);
+ goto error_fini;
}
- reserved_buffers = true;
+ r = amdgpu_cs_patch_jobs(&parser);
+ if (r)
+ goto error_backoff;
+
+ r = amdgpu_cs_vm_handling(&parser);
+ if (r)
+ goto error_backoff;
+
+ r = amdgpu_cs_sync_rings(&parser);
+ if (r)
+ goto error_backoff;
trace_amdgpu_cs_ibs(&parser);
- r = amdgpu_cs_vm_handling(&parser);
+ r = amdgpu_cs_submit(&parser, data);
if (r)
- goto out;
+ goto error_backoff;
- r = amdgpu_cs_submit(&parser, cs);
+ amdgpu_cs_parser_fini(&parser);
+ return 0;
-out:
- amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
+error_backoff:
+ mutex_unlock(&parser.bo_list->bo_list_mutex);
+error_fini:
+ amdgpu_cs_parser_fini(&parser);
return r;
}
@@ -1510,6 +1617,7 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
return 0;
default:
+ dma_fence_put(fence);
return -EINVAL;
}
}
@@ -1542,15 +1650,15 @@ static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
continue;
r = dma_fence_wait_timeout(fence, true, timeout);
+ if (r > 0 && fence->error)
+ r = fence->error;
+
dma_fence_put(fence);
if (r < 0)
return r;
if (r == 0)
break;
-
- if (fence->error)
- return fence->error;
}
memset(wait, 0, sizeof(*wait));
@@ -1636,30 +1744,21 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
{
struct amdgpu_device *adev = drm_to_adev(dev);
union drm_amdgpu_wait_fences *wait = data;
- uint32_t fence_count = wait->in.fence_count;
- struct drm_amdgpu_fence *fences_user;
struct drm_amdgpu_fence *fences;
int r;
/* Get the fences from userspace */
- fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
- GFP_KERNEL);
- if (fences == NULL)
- return -ENOMEM;
-
- fences_user = u64_to_user_ptr(wait->in.fences);
- if (copy_from_user(fences, fences_user,
- sizeof(struct drm_amdgpu_fence) * fence_count)) {
- r = -EFAULT;
- goto err_free_fences;
- }
+ fences = memdup_array_user(u64_to_user_ptr(wait->in.fences),
+ wait->in.fence_count,
+ sizeof(struct drm_amdgpu_fence));
+ if (IS_ERR(fences))
+ return PTR_ERR(fences);
if (wait->in.wait_all)
r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences);
else
r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences);
-err_free_fences:
kfree(fences);
return r;
@@ -1685,7 +1784,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
struct ttm_operation_ctx ctx = { false, false };
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_va_mapping *mapping;
- int r;
+ int i, r;
addr /= AMDGPU_GPU_PAGE_SIZE;
@@ -1697,12 +1796,17 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
*map = mapping;
/* Double check that the BO is reserved by this CS */
- if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->ticket)
+ if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket)
return -EINVAL;
- if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
- (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ /* Make sure VRAM is allocated contigiously */
+ (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ if ((*bo)->tbo.resource->mem_type == TTM_PL_VRAM &&
+ !((*bo)->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) {
+
amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
+ for (i = 0; i < (*bo)->placement.num_placement; i++)
+ (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
new file mode 100644
index 000000000000..39c33ad100cb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_CS_H__
+#define __AMDGPU_CS_H__
+
+#include <linux/ww_mutex.h>
+#include <drm/drm_exec.h>
+
+#include "amdgpu_job.h"
+#include "amdgpu_bo_list.h"
+#include "amdgpu_ring.h"
+
+#define AMDGPU_CS_GANG_SIZE 4
+
+struct amdgpu_bo_va_mapping;
+
+struct amdgpu_cs_chunk {
+ uint32_t chunk_id;
+ uint32_t length_dw;
+ void *kdata;
+};
+
+struct amdgpu_cs_post_dep {
+ struct drm_syncobj *syncobj;
+ struct dma_fence_chain *chain;
+ u64 point;
+};
+
+struct amdgpu_cs_parser {
+ struct amdgpu_device *adev;
+ struct drm_file *filp;
+ struct amdgpu_ctx *ctx;
+
+ /* chunks */
+ unsigned nchunks;
+ struct amdgpu_cs_chunk *chunks;
+
+ /* scheduler job objects */
+ unsigned int gang_size;
+ unsigned int gang_leader_idx;
+ struct drm_sched_entity *entities[AMDGPU_CS_GANG_SIZE];
+ struct amdgpu_job *jobs[AMDGPU_CS_GANG_SIZE];
+ struct amdgpu_job *gang_leader;
+
+ /* buffer objects */
+ struct drm_exec exec;
+ struct amdgpu_bo_list *bo_list;
+ struct amdgpu_mn *mn;
+ struct dma_fence *fence;
+ uint64_t bytes_moved_threshold;
+ uint64_t bytes_moved_vis_threshold;
+ uint64_t bytes_moved;
+ uint64_t bytes_moved_vis;
+
+ /* user fence */
+ struct amdgpu_bo *uf_bo;
+
+ unsigned num_post_deps;
+ struct amdgpu_cs_post_dep *post_deps;
+
+ struct amdgpu_sync sync;
+};
+
+int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
+ uint64_t addr, struct amdgpu_bo **bo,
+ struct amdgpu_bo_va_mapping **mapping);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index da21e60bb827..02138aa55793 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -22,13 +22,14 @@
* * Author: Monk.liu@amd.com
*/
+#include <drm/drm_exec.h>
+
#include "amdgpu.h"
uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
{
- uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
+ uint64_t addr = AMDGPU_VA_RESERVED_CSA_START(adev);
- addr -= AMDGPU_VA_RESERVED_SIZE;
addr = amdgpu_gmc_sign_extend(addr);
return addr;
@@ -65,31 +66,25 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va,
uint64_t csa_addr, uint32_t size)
{
- struct ww_acquire_ctx ticket;
- struct list_head list;
- struct amdgpu_bo_list_entry pd;
- struct ttm_validate_buffer csa_tv;
+ struct drm_exec exec;
int r;
- INIT_LIST_HEAD(&list);
- INIT_LIST_HEAD(&csa_tv.head);
- csa_tv.bo = &bo->tbo;
- csa_tv.num_shared = 1;
-
- list_add(&csa_tv.head, &list);
- amdgpu_vm_get_pd_bo(vm, &list, &pd);
-
- r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
- if (r) {
- DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
- return r;
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+ drm_exec_until_all_locked(&exec) {
+ r = amdgpu_vm_lock_pd(vm, &exec, 0);
+ if (likely(!r))
+ r = drm_exec_lock_obj(&exec, &bo->tbo.base);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r)) {
+ DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
+ goto error;
+ }
}
*bo_va = amdgpu_vm_bo_add(adev, vm, bo);
if (!*bo_va) {
- ttm_eu_backoff_reservation(&ticket, &list);
- DRM_ERROR("failed to create bo_va for static CSA\n");
- return -ENOMEM;
+ r = -ENOMEM;
+ goto error;
}
r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
@@ -98,11 +93,43 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (r) {
DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r);
- amdgpu_vm_bo_rmv(adev, *bo_va);
- ttm_eu_backoff_reservation(&ticket, &list);
- return r;
+ amdgpu_vm_bo_del(adev, *bo_va);
+ goto error;
}
- ttm_eu_backoff_reservation(&ticket, &list);
- return 0;
+error:
+ drm_exec_fini(&exec);
+ return r;
+}
+
+int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo, struct amdgpu_bo_va *bo_va,
+ uint64_t csa_addr)
+{
+ struct drm_exec exec;
+ int r;
+
+ drm_exec_init(&exec, 0, 0);
+ drm_exec_until_all_locked(&exec) {
+ r = amdgpu_vm_lock_pd(vm, &exec, 0);
+ if (likely(!r))
+ r = drm_exec_lock_obj(&exec, &bo->tbo.base);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r)) {
+ DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
+ goto error;
+ }
+ }
+
+ r = amdgpu_vm_bo_unmap(adev, bo_va, csa_addr);
+ if (r) {
+ DRM_ERROR("failed to do bo_unmap on static CSA, err=%d\n", r);
+ goto error;
+ }
+
+ amdgpu_vm_bo_del(adev, bo_va);
+
+error:
+ drm_exec_fini(&exec);
+ return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h
index 524b4437a021..7dfc1f2012eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h
@@ -34,6 +34,9 @@ int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va,
uint64_t csa_addr, uint32_t size);
+int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo, struct amdgpu_bo_va *bo_va,
+ uint64_t csa_addr);
void amdgpu_free_static_csa(struct amdgpu_bo **bo);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 468003583b2a..afedea02188d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -23,6 +23,7 @@
*/
#include <drm/drm_auth.h>
+#include <drm/drm_drv.h>
#include "amdgpu.h"
#include "amdgpu_sched.h"
#include "amdgpu_ras.h"
@@ -41,12 +42,12 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
[AMDGPU_HW_IP_VCN_DEC] = 1,
[AMDGPU_HW_IP_VCN_ENC] = 1,
[AMDGPU_HW_IP_VCN_JPEG] = 1,
+ [AMDGPU_HW_IP_VPE] = 1,
};
bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio)
{
switch (ctx_prio) {
- case AMDGPU_CTX_PRIORITY_UNSET:
case AMDGPU_CTX_PRIORITY_VERY_LOW:
case AMDGPU_CTX_PRIORITY_LOW:
case AMDGPU_CTX_PRIORITY_NORMAL:
@@ -54,6 +55,11 @@ bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio)
case AMDGPU_CTX_PRIORITY_VERY_HIGH:
return true;
default:
+ case AMDGPU_CTX_PRIORITY_UNSET:
+ /* UNSET priority is not valid and we don't carry that
+ * around, but set it to NORMAL in the only place this
+ * function is called, amdgpu_ctx_ioctl().
+ */
return false;
}
}
@@ -63,13 +69,14 @@ amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio)
{
switch (ctx_prio) {
case AMDGPU_CTX_PRIORITY_UNSET:
- return DRM_SCHED_PRIORITY_UNSET;
+ pr_warn_once("AMD-->DRM context priority value UNSET-->NORMAL");
+ return DRM_SCHED_PRIORITY_NORMAL;
case AMDGPU_CTX_PRIORITY_VERY_LOW:
- return DRM_SCHED_PRIORITY_MIN;
+ return DRM_SCHED_PRIORITY_LOW;
case AMDGPU_CTX_PRIORITY_LOW:
- return DRM_SCHED_PRIORITY_MIN;
+ return DRM_SCHED_PRIORITY_LOW;
case AMDGPU_CTX_PRIORITY_NORMAL:
return DRM_SCHED_PRIORITY_NORMAL;
@@ -93,9 +100,6 @@ amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio)
static int amdgpu_ctx_priority_permit(struct drm_file *filp,
int32_t priority)
{
- if (!amdgpu_ctx_priority_is_valid(priority))
- return -EINVAL;
-
/* NORMAL and below are accessible by everyone */
if (priority <= AMDGPU_CTX_PRIORITY_NORMAL)
return 0;
@@ -109,7 +113,7 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
return -EACCES;
}
-static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_compute_prio(int32_t prio)
+static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_gfx_pipe_prio(int32_t prio)
{
switch (prio) {
case AMDGPU_CTX_PRIORITY_HIGH:
@@ -134,16 +138,17 @@ static enum amdgpu_ring_priority_level amdgpu_ctx_sched_prio_to_ring_prio(int32_
static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
{
- struct amdgpu_device *adev = ctx->adev;
- int32_t ctx_prio;
+ struct amdgpu_device *adev = ctx->mgr->adev;
unsigned int hw_prio;
+ int32_t ctx_prio;
ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
ctx->init_priority : ctx->override_priority;
switch (hw_ip) {
+ case AMDGPU_HW_IP_GFX:
case AMDGPU_HW_IP_COMPUTE:
- hw_prio = amdgpu_ctx_prio_to_compute_prio(ctx_prio);
+ hw_prio = amdgpu_ctx_prio_to_gfx_pipe_prio(ctx_prio);
break;
case AMDGPU_HW_IP_VCE:
case AMDGPU_HW_IP_VCN_ENC:
@@ -161,17 +166,50 @@ static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
return hw_prio;
}
+/* Calculate the time spend on the hw */
+static ktime_t amdgpu_ctx_fence_time(struct dma_fence *fence)
+{
+ struct drm_sched_fence *s_fence;
+
+ if (!fence)
+ return ns_to_ktime(0);
+
+ /* When the fence is not even scheduled it can't have spend time */
+ s_fence = to_drm_sched_fence(fence);
+ if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->scheduled.flags))
+ return ns_to_ktime(0);
+
+ /* When it is still running account how much already spend */
+ if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->finished.flags))
+ return ktime_sub(ktime_get(), s_fence->scheduled.timestamp);
+
+ return ktime_sub(s_fence->finished.timestamp,
+ s_fence->scheduled.timestamp);
+}
+
+static ktime_t amdgpu_ctx_entity_time(struct amdgpu_ctx *ctx,
+ struct amdgpu_ctx_entity *centity)
+{
+ ktime_t res = ns_to_ktime(0);
+ uint32_t i;
+
+ spin_lock(&ctx->ring_lock);
+ for (i = 0; i < amdgpu_sched_jobs; i++) {
+ res = ktime_add(res, amdgpu_ctx_fence_time(centity->fences[i]));
+ }
+ spin_unlock(&ctx->ring_lock);
+ return res;
+}
static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
const u32 ring)
{
- struct amdgpu_device *adev = ctx->adev;
- struct amdgpu_ctx_entity *entity;
struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
- unsigned num_scheds = 0;
- int32_t ctx_prio;
- unsigned int hw_prio;
+ struct amdgpu_device *adev = ctx->mgr->adev;
+ struct amdgpu_ctx_entity *entity;
enum drm_sched_priority drm_prio;
+ unsigned int hw_prio, num_scheds;
+ int32_t ctx_prio;
int r;
entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs),
@@ -181,13 +219,25 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
ctx->init_priority : ctx->override_priority;
+ entity->hw_ip = hw_ip;
entity->sequence = 1;
hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio);
hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
- scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
- num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
+
+ if (!(adev)->xcp_mgr) {
+ scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
+ num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
+ } else {
+ struct amdgpu_fpriv *fpriv;
+
+ fpriv = container_of(ctx->ctx_mgr, struct amdgpu_fpriv, ctx_mgr);
+ r = amdgpu_xcp_select_scheds(adev, hw_ip, hw_prio, fpriv,
+ &num_scheds, &scheds);
+ if (r)
+ goto error_free_entity;
+ }
/* disable load balance if the hw engine retains context among dependent jobs */
if (hw_ip == AMDGPU_HW_IP_VCN_ENC ||
@@ -204,20 +254,74 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
if (r)
goto error_free_entity;
- ctx->entities[hw_ip][ring] = entity;
+ /* It's not an error if we fail to install the new entity */
+ if (cmpxchg(&ctx->entities[hw_ip][ring], NULL, entity))
+ goto cleanup_entity;
+
return 0;
+cleanup_entity:
+ drm_sched_entity_fini(&entity->entity);
+
error_free_entity:
kfree(entity);
return r;
}
-static int amdgpu_ctx_init(struct amdgpu_device *adev,
- int32_t priority,
- struct drm_file *filp,
- struct amdgpu_ctx *ctx)
+static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_device *adev,
+ struct amdgpu_ctx_entity *entity)
{
+ ktime_t res = ns_to_ktime(0);
+ int i;
+
+ if (!entity)
+ return res;
+
+ for (i = 0; i < amdgpu_sched_jobs; ++i) {
+ res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i]));
+ dma_fence_put(entity->fences[i]);
+ }
+
+ amdgpu_xcp_release_sched(adev, entity);
+
+ kfree(entity);
+ return res;
+}
+
+static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
+ u32 *stable_pstate)
+{
+ struct amdgpu_device *adev = ctx->mgr->adev;
+ enum amd_dpm_forced_level current_level;
+
+ current_level = amdgpu_dpm_get_performance_level(adev);
+
+ switch (current_level) {
+ case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
+ *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_STANDARD;
+ break;
+ case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
+ *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK;
+ break;
+ case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
+ *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK;
+ break;
+ case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
+ *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_PEAK;
+ break;
+ default:
+ *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE;
+ break;
+ }
+ return 0;
+}
+
+static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
+ struct drm_file *filp, struct amdgpu_ctx *ctx)
+{
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ u32 current_stable_pstate;
int r;
r = amdgpu_ctx_priority_permit(filp, priority);
@@ -226,52 +330,104 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
memset(ctx, 0, sizeof(*ctx));
- ctx->adev = adev;
-
kref_init(&ctx->refcount);
+ ctx->mgr = mgr;
spin_lock_init(&ctx->ring_lock);
- mutex_init(&ctx->lock);
- ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
+ ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter);
ctx->reset_counter_query = ctx->reset_counter;
- ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
+ ctx->generation = amdgpu_vm_generation(mgr->adev, &fpriv->vm);
ctx->init_priority = priority;
ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
+ r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
+ if (r)
+ return r;
+
+ if (mgr->adev->pm.stable_pstate_ctx)
+ ctx->stable_pstate = mgr->adev->pm.stable_pstate_ctx->stable_pstate;
+ else
+ ctx->stable_pstate = current_stable_pstate;
+
+ ctx->ctx_mgr = &(fpriv->ctx_mgr);
return 0;
}
-static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
+static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
+ u32 stable_pstate)
{
+ struct amdgpu_device *adev = ctx->mgr->adev;
+ enum amd_dpm_forced_level level;
+ u32 current_stable_pstate;
+ int r;
- int i;
+ mutex_lock(&adev->pm.stable_pstate_ctx_lock);
+ if (adev->pm.stable_pstate_ctx && adev->pm.stable_pstate_ctx != ctx) {
+ r = -EBUSY;
+ goto done;
+ }
- if (!entity)
- return;
+ r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
+ if (r || (stable_pstate == current_stable_pstate))
+ goto done;
- for (i = 0; i < amdgpu_sched_jobs; ++i)
- dma_fence_put(entity->fences[i]);
+ switch (stable_pstate) {
+ case AMDGPU_CTX_STABLE_PSTATE_NONE:
+ level = AMD_DPM_FORCED_LEVEL_AUTO;
+ break;
+ case AMDGPU_CTX_STABLE_PSTATE_STANDARD:
+ level = AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD;
+ break;
+ case AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK:
+ level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK;
+ break;
+ case AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK:
+ level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK;
+ break;
+ case AMDGPU_CTX_STABLE_PSTATE_PEAK:
+ level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK;
+ break;
+ default:
+ r = -EINVAL;
+ goto done;
+ }
- kfree(entity);
+ r = amdgpu_dpm_force_performance_level(adev, level);
+
+ if (level == AMD_DPM_FORCED_LEVEL_AUTO)
+ adev->pm.stable_pstate_ctx = NULL;
+ else
+ adev->pm.stable_pstate_ctx = ctx;
+done:
+ mutex_unlock(&adev->pm.stable_pstate_ctx_lock);
+
+ return r;
}
static void amdgpu_ctx_fini(struct kref *ref)
{
struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
- struct amdgpu_device *adev = ctx->adev;
- unsigned i, j;
+ struct amdgpu_ctx_mgr *mgr = ctx->mgr;
+ struct amdgpu_device *adev = mgr->adev;
+ unsigned i, j, idx;
if (!adev)
return;
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
- amdgpu_ctx_fini_entity(ctx->entities[i][j]);
- ctx->entities[i][j] = NULL;
+ ktime_t spend;
+
+ spend = amdgpu_ctx_fini_entity(adev, ctx->entities[i][j]);
+ atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]);
}
}
- mutex_destroy(&ctx->lock);
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate);
+ drm_dev_exit(idx);
+ }
+
kfree(ctx);
}
@@ -279,6 +435,7 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
u32 ring, struct drm_sched_entity **entity)
{
int r;
+ struct drm_sched_entity *ctx_entity;
if (hw_ip >= AMDGPU_HW_IP_NUM) {
DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
@@ -302,7 +459,14 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
return r;
}
- *entity = &ctx->entities[hw_ip][ring]->entity;
+ ctx_entity = &ctx->entities[hw_ip][ring]->entity;
+ r = drm_sched_entity_error(ctx_entity);
+ if (r) {
+ DRM_DEBUG("error entity %p\n", ctx_entity);
+ return r;
+ }
+
+ *entity = ctx_entity;
return 0;
}
@@ -329,7 +493,7 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
}
*id = (uint32_t)r;
- r = amdgpu_ctx_init(adev, priority, filp, ctx);
+ r = amdgpu_ctx_init(mgr, priority, filp, ctx);
if (r) {
idr_remove(&mgr->ctx_handles, *id);
*id = 0;
@@ -433,12 +597,15 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter))
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET;
- if (ctx->vram_lost_counter != atomic_read(&adev->vram_lost_counter))
+ if (ctx->generation != amdgpu_vm_generation(adev, &fpriv->vm))
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST;
if (atomic_read(&ctx->guilty))
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
+ if (amdgpu_in_reset(adev))
+ out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS;
+
if (adev->ras_enabled && con) {
/* Return the cached values in O(1),
* and schedule delayed work to cache
@@ -467,11 +634,39 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
return 0;
}
+static int amdgpu_ctx_stable_pstate(struct amdgpu_device *adev,
+ struct amdgpu_fpriv *fpriv, uint32_t id,
+ bool set, u32 *stable_pstate)
+{
+ struct amdgpu_ctx *ctx;
+ struct amdgpu_ctx_mgr *mgr;
+ int r;
+
+ if (!fpriv)
+ return -EINVAL;
+
+ mgr = &fpriv->ctx_mgr;
+ mutex_lock(&mgr->lock);
+ ctx = idr_find(&mgr->ctx_handles, id);
+ if (!ctx) {
+ mutex_unlock(&mgr->lock);
+ return -EINVAL;
+ }
+
+ if (set)
+ r = amdgpu_ctx_set_stable_pstate(ctx, *stable_pstate);
+ else
+ r = amdgpu_ctx_get_stable_pstate(ctx, stable_pstate);
+
+ mutex_unlock(&mgr->lock);
+ return r;
+}
+
int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
int r;
- uint32_t id;
+ uint32_t id, stable_pstate;
int32_t priority;
union drm_amdgpu_ctx *args = data;
@@ -481,25 +676,50 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
id = args->in.ctx_id;
priority = args->in.priority;
- /* For backwards compatibility reasons, we need to accept
- * ioctls with garbage in the priority field */
+ /* For backwards compatibility, we need to accept ioctls with garbage
+ * in the priority field. Garbage values in the priority field, result
+ * in the priority being set to NORMAL.
+ */
if (!amdgpu_ctx_priority_is_valid(priority))
priority = AMDGPU_CTX_PRIORITY_NORMAL;
switch (args->in.op) {
case AMDGPU_CTX_OP_ALLOC_CTX:
+ if (args->in.flags)
+ return -EINVAL;
r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id);
args->out.alloc.ctx_id = id;
break;
case AMDGPU_CTX_OP_FREE_CTX:
+ if (args->in.flags)
+ return -EINVAL;
r = amdgpu_ctx_free(fpriv, id);
break;
case AMDGPU_CTX_OP_QUERY_STATE:
+ if (args->in.flags)
+ return -EINVAL;
r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
break;
case AMDGPU_CTX_OP_QUERY_STATE2:
+ if (args->in.flags)
+ return -EINVAL;
r = amdgpu_ctx_query2(adev, fpriv, id, &args->out);
break;
+ case AMDGPU_CTX_OP_GET_STABLE_PSTATE:
+ if (args->in.flags)
+ return -EINVAL;
+ r = amdgpu_ctx_stable_pstate(adev, fpriv, id, false, &stable_pstate);
+ if (!r)
+ args->out.pstate.flags = stable_pstate;
+ break;
+ case AMDGPU_CTX_OP_SET_STABLE_PSTATE:
+ if (args->in.flags & ~AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK)
+ return -EINVAL;
+ stable_pstate = args->in.flags & AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK;
+ if (stable_pstate > AMDGPU_CTX_STABLE_PSTATE_PEAK)
+ return -EINVAL;
+ r = amdgpu_ctx_stable_pstate(adev, fpriv, id, true, &stable_pstate);
+ break;
default:
return -EINVAL;
}
@@ -534,9 +754,9 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
return 0;
}
-void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
- struct drm_sched_entity *entity,
- struct dma_fence *fence, uint64_t *handle)
+uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
+ struct drm_sched_entity *entity,
+ struct dma_fence *fence)
{
struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
uint64_t seq = centity->sequence;
@@ -545,8 +765,7 @@ void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
idx = seq & (amdgpu_sched_jobs - 1);
other = centity->fences[idx];
- if (other)
- BUG_ON(!dma_fence_is_signaled(other));
+ WARN_ON(other && !dma_fence_is_signaled(other));
dma_fence_get(fence);
@@ -555,9 +774,11 @@ void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
centity->sequence++;
spin_unlock(&ctx->ring_lock);
+ atomic64_add(ktime_to_ns(amdgpu_ctx_fence_time(other)),
+ &ctx->mgr->time_spend[centity->hw_ip]);
+
dma_fence_put(other);
- if (handle)
- *handle = seq;
+ return seq;
}
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
@@ -594,7 +815,7 @@ static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
int hw_ip,
int32_t priority)
{
- struct amdgpu_device *adev = ctx->adev;
+ struct amdgpu_device *adev = ctx->mgr->adev;
unsigned int hw_prio;
struct drm_gpu_scheduler **scheds = NULL;
unsigned num_scheds;
@@ -604,7 +825,7 @@ static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
amdgpu_ctx_to_drm_sched_prio(priority));
/* set hw priority */
- if (hw_ip == AMDGPU_HW_IP_COMPUTE) {
+ if (hw_ip == AMDGPU_HW_IP_COMPUTE || hw_ip == AMDGPU_HW_IP_GFX) {
hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX);
scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
@@ -659,10 +880,17 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
return r;
}
-void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
+void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
+ struct amdgpu_device *adev)
{
+ unsigned int i;
+
+ mgr->adev = adev;
mutex_init(&mgr->lock);
- idr_init(&mgr->ctx_handles);
+ idr_init_base(&mgr->ctx_handles, 1);
+
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
+ atomic64_set(&mgr->time_spend[i], 0);
}
long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
@@ -691,7 +919,7 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
return timeout;
}
-void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
+static void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
{
struct amdgpu_ctx *ctx;
struct idr *idp;
@@ -716,102 +944,49 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
drm_sched_entity_fini(entity);
}
}
+ kref_put(&ctx->refcount, amdgpu_ctx_fini);
}
}
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
{
- struct amdgpu_ctx *ctx;
- struct idr *idp;
- uint32_t id;
-
amdgpu_ctx_mgr_entity_fini(mgr);
-
- idp = &mgr->ctx_handles;
-
- idr_for_each_entry(idp, ctx, id) {
- if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1)
- DRM_ERROR("ctx %p is still alive\n", ctx);
- }
-
idr_destroy(&mgr->ctx_handles);
mutex_destroy(&mgr->lock);
}
-static void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx,
- struct amdgpu_ctx_entity *centity, ktime_t *total, ktime_t *max)
+void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
+ ktime_t usage[AMDGPU_HW_IP_NUM])
{
- ktime_t now, t1;
- uint32_t i;
-
- *total = *max = 0;
-
- now = ktime_get();
- for (i = 0; i < amdgpu_sched_jobs; i++) {
- struct dma_fence *fence;
- struct drm_sched_fence *s_fence;
-
- spin_lock(&ctx->ring_lock);
- fence = dma_fence_get(centity->fences[i]);
- spin_unlock(&ctx->ring_lock);
- if (!fence)
- continue;
- s_fence = to_drm_sched_fence(fence);
- if (!dma_fence_is_signaled(&s_fence->scheduled)) {
- dma_fence_put(fence);
- continue;
- }
- t1 = s_fence->scheduled.timestamp;
- if (!ktime_before(t1, now)) {
- dma_fence_put(fence);
- continue;
- }
- if (dma_fence_is_signaled(&s_fence->finished) &&
- s_fence->finished.timestamp < now)
- *total += ktime_sub(s_fence->finished.timestamp, t1);
- else
- *total += ktime_sub(now, t1);
- t1 = ktime_sub(now, t1);
- dma_fence_put(fence);
- *max = max(t1, *max);
- }
-}
-
-ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
- uint32_t idx, uint64_t *elapsed)
-{
- struct idr *idp;
struct amdgpu_ctx *ctx;
+ unsigned int hw_ip, i;
uint32_t id;
- struct amdgpu_ctx_entity *centity;
- ktime_t total = 0, max = 0;
- if (idx >= AMDGPU_MAX_ENTITY_NUM)
- return 0;
- idp = &mgr->ctx_handles;
+ /*
+ * This is a little bit racy because it can be that a ctx or a fence are
+ * destroyed just in the moment we try to account them. But that is ok
+ * since exactly that case is explicitely allowed by the interface.
+ */
mutex_lock(&mgr->lock);
- idr_for_each_entry(idp, ctx, id) {
- ktime_t ttotal, tmax;
+ for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
+ uint64_t ns = atomic64_read(&mgr->time_spend[hw_ip]);
- if (!ctx->entities[hwip][idx])
- continue;
-
- centity = ctx->entities[hwip][idx];
- amdgpu_ctx_fence_time(ctx, centity, &ttotal, &tmax);
+ usage[hw_ip] = ns_to_ktime(ns);
+ }
- /* Harmonic mean approximation diverges for very small
- * values. If ratio < 0.01% ignore
- */
- if (AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(tmax, ttotal))
- continue;
+ idr_for_each_entry(&mgr->ctx_handles, ctx, id) {
+ for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
+ for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) {
+ struct amdgpu_ctx_entity *centity;
+ ktime_t spend;
- total = ktime_add(total, ttotal);
- max = ktime_after(tmax, max) ? tmax : max;
+ centity = ctx->entities[hw_ip][i];
+ if (!centity)
+ continue;
+ spend = amdgpu_ctx_entity_time(ctx, centity);
+ usage[hw_ip] = ktime_add(usage[hw_ip], spend);
+ }
+ }
}
-
mutex_unlock(&mgr->lock);
- if (elapsed)
- *elapsed = max;
-
- return total;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index a44b8b8ed39c..090dfe86f75b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -23,16 +23,20 @@
#ifndef __AMDGPU_CTX_H__
#define __AMDGPU_CTX_H__
+#include <linux/ktime.h>
+#include <linux/types.h>
+
#include "amdgpu_ring.h"
struct drm_device;
struct drm_file;
struct amdgpu_fpriv;
+struct amdgpu_ctx_mgr;
#define AMDGPU_MAX_ENTITY_NUM 4
-#define AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(max, total) ((max) > 16384ULL*(total))
struct amdgpu_ctx_entity {
+ uint32_t hw_ip;
uint64_t sequence;
struct drm_sched_entity entity;
struct dma_fence *fences[];
@@ -40,19 +44,20 @@ struct amdgpu_ctx_entity {
struct amdgpu_ctx {
struct kref refcount;
- struct amdgpu_device *adev;
+ struct amdgpu_ctx_mgr *mgr;
unsigned reset_counter;
unsigned reset_counter_query;
- uint32_t vram_lost_counter;
+ uint64_t generation;
spinlock_t ring_lock;
struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM];
bool preamble_presented;
int32_t init_priority;
int32_t override_priority;
- struct mutex lock;
atomic_t guilty;
unsigned long ras_counter_ce;
unsigned long ras_counter_ue;
+ uint32_t stable_pstate;
+ struct amdgpu_ctx_mgr *ctx_mgr;
};
struct amdgpu_ctx_mgr {
@@ -60,6 +65,7 @@ struct amdgpu_ctx_mgr {
struct mutex lock;
/* protected by lock */
struct idr ctx_handles;
+ atomic64_t time_spend[AMDGPU_HW_IP_NUM];
};
extern const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM];
@@ -69,9 +75,9 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
u32 ring, struct drm_sched_entity **entity);
-void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
- struct drm_sched_entity *entity,
- struct dma_fence *fence, uint64_t *seq);
+uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
+ struct drm_sched_entity *entity,
+ struct dma_fence *fence);
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
struct drm_sched_entity *entity,
uint64_t seq);
@@ -84,10 +90,11 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
struct drm_sched_entity *entity);
-void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
-void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
+void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
+ struct amdgpu_device *adev);
long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
-ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
- uint32_t idx, uint64_t *elapsed);
+void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
+ ktime_t usage[AMDGPU_HW_IP_NUM]);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 164d6a9e9fbb..62d43b8cbe58 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -37,6 +37,9 @@
#include "amdgpu_fw_attestation.h"
#include "amdgpu_umr.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_psp_ta.h"
+
#if defined(CONFIG_DEBUG_FS)
/**
@@ -53,14 +56,14 @@
*
* Bit 62: Indicates a GRBM bank switch is needed
* Bit 61: Indicates a SRBM bank switch is needed (implies bit 62 is
- * zero)
+ * zero)
* Bits 24..33: The SE or ME selector if needed
* Bits 34..43: The SH (or SA) or PIPE selector if needed
* Bits 44..53: The INSTANCE (or CU/WGP) or QUEUE selector if needed
*
* Bit 23: Indicates that the PM power gating lock should be held
- * This is necessary to read registers that might be
- * unreliable during a power gating transistion.
+ * This is necessary to read registers that might be
+ * unreliable during a power gating transistion.
*
* The lower bits are the BYTE offset of the register to read. This
* allows reading multiple registers in a single call and having
@@ -73,7 +76,7 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
ssize_t result = 0;
int r;
bool pm_pg_lock, use_bank, use_ring;
- unsigned instance_bank, sh_bank, se_bank, me, pipe, queue, vmid;
+ unsigned int instance_bank, sh_bank, se_bank, me, pipe, queue, vmid;
pm_pg_lock = use_bank = use_ring = false;
instance_bank = sh_bank = se_bank = me = pipe = queue = vmid = 0;
@@ -126,17 +129,16 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
if (use_bank) {
if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
(se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return -EINVAL;
}
mutex_lock(&adev->grbm_idx_mutex);
amdgpu_gfx_select_se_sh(adev, se_bank,
- sh_bank, instance_bank);
+ sh_bank, instance_bank, 0);
} else if (use_ring) {
mutex_lock(&adev->srbm_mutex);
- amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue, vmid);
+ amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue, vmid, 0);
}
if (pm_pg_lock)
@@ -151,7 +153,7 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
} else {
r = get_user(value, (uint32_t *)buf);
if (!r)
- amdgpu_mm_wreg_mmio_rlc(adev, *pos >> 2, value);
+ amdgpu_mm_wreg_mmio_rlc(adev, *pos >> 2, value, 0);
}
if (r) {
result = r;
@@ -166,17 +168,16 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
end:
if (use_bank) {
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
} else if (use_ring) {
- amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0);
+ amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
if (pm_pg_lock)
mutex_unlock(&adev->pm.mutex);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
@@ -205,7 +206,7 @@ static int amdgpu_debugfs_regs2_open(struct inode *inode, struct file *file)
{
struct amdgpu_debugfs_regs2_data *rd;
- rd = kzalloc(sizeof *rd, GFP_KERNEL);
+ rd = kzalloc(sizeof(*rd), GFP_KERNEL);
if (!rd)
return -ENOMEM;
rd->adev = file_inode(file)->i_private;
@@ -218,6 +219,7 @@ static int amdgpu_debugfs_regs2_open(struct inode *inode, struct file *file)
static int amdgpu_debugfs_regs2_release(struct inode *inode, struct file *file)
{
struct amdgpu_debugfs_regs2_data *rd = file->private_data;
+
mutex_destroy(&rd->lock);
kfree(file->private_data);
return 0;
@@ -251,7 +253,6 @@ static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 off
if (rd->id.use_grbm) {
if ((rd->id.grbm.sh != 0xFFFFFFFF && rd->id.grbm.sh >= adev->gfx.config.max_sh_per_se) ||
(rd->id.grbm.se != 0xFFFFFFFF && rd->id.grbm.se >= adev->gfx.config.max_shader_engines)) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
mutex_unlock(&rd->lock);
@@ -259,14 +260,14 @@ static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 off
}
mutex_lock(&adev->grbm_idx_mutex);
amdgpu_gfx_select_se_sh(adev, rd->id.grbm.se,
- rd->id.grbm.sh,
- rd->id.grbm.instance);
+ rd->id.grbm.sh,
+ rd->id.grbm.instance, rd->id.xcc_id);
}
if (rd->id.use_srbm) {
mutex_lock(&adev->srbm_mutex);
amdgpu_gfx_select_me_pipe_q(adev, rd->id.srbm.me, rd->id.srbm.pipe,
- rd->id.srbm.queue, rd->id.srbm.vmid);
+ rd->id.srbm.queue, rd->id.srbm.vmid, rd->id.xcc_id);
}
if (rd->id.pg_lock)
@@ -279,7 +280,7 @@ static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 off
} else {
r = get_user(value, (uint32_t *)buf);
if (!r)
- amdgpu_mm_wreg_mmio_rlc(adev, offset >> 2, value);
+ amdgpu_mm_wreg_mmio_rlc(adev, offset >> 2, value, rd->id.xcc_id);
}
if (r) {
result = r;
@@ -292,12 +293,12 @@ static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 off
}
end:
if (rd->id.use_grbm) {
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, rd->id.xcc_id);
mutex_unlock(&adev->grbm_idx_mutex);
}
if (rd->id.use_srbm) {
- amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0);
+ amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, rd->id.xcc_id);
mutex_unlock(&adev->srbm_mutex);
}
@@ -306,7 +307,6 @@ end:
mutex_unlock(&rd->lock);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
@@ -316,18 +316,45 @@ end:
static long amdgpu_debugfs_regs2_ioctl(struct file *f, unsigned int cmd, unsigned long data)
{
struct amdgpu_debugfs_regs2_data *rd = f->private_data;
+ struct amdgpu_debugfs_regs2_iocdata v1_data;
int r;
+ mutex_lock(&rd->lock);
+
switch (cmd) {
+ case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE_V2:
+ r = copy_from_user(&rd->id, (struct amdgpu_debugfs_regs2_iocdata_v2 *)data,
+ sizeof(rd->id));
+ if (r)
+ r = -EINVAL;
+ goto done;
case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE:
- mutex_lock(&rd->lock);
- r = copy_from_user(&rd->id, (struct amdgpu_debugfs_regs2_iocdata *)data, sizeof rd->id);
- mutex_unlock(&rd->lock);
- return r ? -EINVAL : 0;
+ r = copy_from_user(&v1_data, (struct amdgpu_debugfs_regs2_iocdata *)data,
+ sizeof(v1_data));
+ if (r) {
+ r = -EINVAL;
+ goto done;
+ }
+ goto v1_copy;
default:
- return -EINVAL;
- }
- return 0;
+ r = -EINVAL;
+ goto done;
+ }
+
+v1_copy:
+ rd->id.use_srbm = v1_data.use_srbm;
+ rd->id.use_grbm = v1_data.use_grbm;
+ rd->id.pg_lock = v1_data.pg_lock;
+ rd->id.grbm.se = v1_data.grbm.se;
+ rd->id.grbm.sh = v1_data.grbm.sh;
+ rd->id.grbm.instance = v1_data.grbm.instance;
+ rd->id.srbm.me = v1_data.srbm.me;
+ rd->id.srbm.pipe = v1_data.srbm.pipe;
+ rd->id.srbm.queue = v1_data.srbm.queue;
+ rd->id.xcc_id = 0;
+done:
+ mutex_unlock(&rd->lock);
+ return r;
}
static ssize_t amdgpu_debugfs_regs2_read(struct file *f, char __user *buf, size_t size, loff_t *pos)
@@ -340,6 +367,136 @@ static ssize_t amdgpu_debugfs_regs2_write(struct file *f, const char __user *buf
return amdgpu_debugfs_regs2_op(f, (char __user *)buf, *pos, size, 1);
}
+static int amdgpu_debugfs_gprwave_open(struct inode *inode, struct file *file)
+{
+ struct amdgpu_debugfs_gprwave_data *rd;
+
+ rd = kzalloc(sizeof(*rd), GFP_KERNEL);
+ if (!rd)
+ return -ENOMEM;
+ rd->adev = file_inode(file)->i_private;
+ file->private_data = rd;
+ mutex_init(&rd->lock);
+
+ return 0;
+}
+
+static int amdgpu_debugfs_gprwave_release(struct inode *inode, struct file *file)
+{
+ struct amdgpu_debugfs_gprwave_data *rd = file->private_data;
+
+ mutex_destroy(&rd->lock);
+ kfree(file->private_data);
+ return 0;
+}
+
+static ssize_t amdgpu_debugfs_gprwave_read(struct file *f, char __user *buf, size_t size, loff_t *pos)
+{
+ struct amdgpu_debugfs_gprwave_data *rd = f->private_data;
+ struct amdgpu_device *adev = rd->adev;
+ ssize_t result = 0;
+ int r;
+ uint32_t *data, x;
+
+ if (size > 4096 || size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ r = amdgpu_virt_enable_access_debugfs(adev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ data = kcalloc(1024, sizeof(*data), GFP_KERNEL);
+ if (!data) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ amdgpu_virt_disable_access_debugfs(adev);
+ return -ENOMEM;
+ }
+
+ /* switch to the specific se/sh/cu */
+ mutex_lock(&adev->grbm_idx_mutex);
+ amdgpu_gfx_select_se_sh(adev, rd->id.se, rd->id.sh, rd->id.cu, rd->id.xcc_id);
+
+ if (!rd->id.gpr_or_wave) {
+ x = 0;
+ if (adev->gfx.funcs->read_wave_data)
+ adev->gfx.funcs->read_wave_data(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, data, &x);
+ } else {
+ x = size >> 2;
+ if (rd->id.gpr.vpgr_or_sgpr) {
+ if (adev->gfx.funcs->read_wave_vgprs)
+ adev->gfx.funcs->read_wave_vgprs(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, rd->id.gpr.thread, *pos, size>>2, data);
+ } else {
+ if (adev->gfx.funcs->read_wave_sgprs)
+ adev->gfx.funcs->read_wave_sgprs(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, *pos, size>>2, data);
+ }
+ }
+
+ amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, rd->id.xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ if (!x) {
+ result = -EINVAL;
+ goto done;
+ }
+
+ while (size && (*pos < x * 4)) {
+ uint32_t value;
+
+ value = data[*pos >> 2];
+ r = put_user(value, (uint32_t *)buf);
+ if (r) {
+ result = r;
+ goto done;
+ }
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+done:
+ amdgpu_virt_disable_access_debugfs(adev);
+ kfree(data);
+ return result;
+}
+
+static long amdgpu_debugfs_gprwave_ioctl(struct file *f, unsigned int cmd, unsigned long data)
+{
+ struct amdgpu_debugfs_gprwave_data *rd = f->private_data;
+ int r = 0;
+
+ mutex_lock(&rd->lock);
+
+ switch (cmd) {
+ case AMDGPU_DEBUGFS_GPRWAVE_IOC_SET_STATE:
+ if (copy_from_user(&rd->id,
+ (struct amdgpu_debugfs_gprwave_iocdata *)data,
+ sizeof(rd->id)))
+ r = -EFAULT;
+ goto done;
+ default:
+ r = -EINVAL;
+ goto done;
+ }
+
+done:
+ mutex_unlock(&rd->lock);
+ return r;
+}
+
+
+
/**
* amdgpu_debugfs_regs_pcie_read - Read from a PCIE register
@@ -378,14 +535,14 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
while (size) {
uint32_t value;
- value = RREG32_PCIE(*pos);
+ if (upper_32_bits(*pos))
+ value = RREG32_PCIE_EXT(*pos);
+ else
+ value = RREG32_PCIE(*pos);
+
r = put_user(value, (uint32_t *)buf);
- if (r) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- amdgpu_virt_disable_access_debugfs(adev);
- return r;
- }
+ if (r)
+ goto out;
result += 4;
buf += 4;
@@ -393,11 +550,11 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
size -= 4;
}
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ r = result;
+out:
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
-
amdgpu_virt_disable_access_debugfs(adev);
- return result;
+ return r;
}
/**
@@ -438,14 +595,13 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
uint32_t value;
r = get_user(value, (uint32_t *)buf);
- if (r) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- amdgpu_virt_disable_access_debugfs(adev);
- return r;
- }
+ if (r)
+ goto out;
- WREG32_PCIE(*pos, value);
+ if (upper_32_bits(*pos))
+ WREG32_PCIE_EXT(*pos, value);
+ else
+ WREG32_PCIE(*pos, value);
result += 4;
buf += 4;
@@ -453,11 +609,11 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
size -= 4;
}
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ r = result;
+out:
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
-
amdgpu_virt_disable_access_debugfs(adev);
- return result;
+ return r;
}
/**
@@ -482,6 +638,9 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
+ if (!adev->didt_rreg)
+ return -EOPNOTSUPP;
+
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (r < 0) {
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -499,12 +658,8 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
value = RREG32_DIDT(*pos >> 2);
r = put_user(value, (uint32_t *)buf);
- if (r) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- amdgpu_virt_disable_access_debugfs(adev);
- return r;
- }
+ if (r)
+ goto out;
result += 4;
buf += 4;
@@ -512,11 +667,11 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
size -= 4;
}
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ r = result;
+out:
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
-
amdgpu_virt_disable_access_debugfs(adev);
- return result;
+ return r;
}
/**
@@ -541,6 +696,9 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
+ if (!adev->didt_wreg)
+ return -EOPNOTSUPP;
+
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (r < 0) {
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -557,12 +715,8 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
uint32_t value;
r = get_user(value, (uint32_t *)buf);
- if (r) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- amdgpu_virt_disable_access_debugfs(adev);
- return r;
- }
+ if (r)
+ goto out;
WREG32_DIDT(*pos >> 2, value);
@@ -572,11 +726,11 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
size -= 4;
}
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ r = result;
+out:
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
-
amdgpu_virt_disable_access_debugfs(adev);
- return result;
+ return r;
}
/**
@@ -598,6 +752,9 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
ssize_t result = 0;
int r;
+ if (!adev->smc_rreg)
+ return -EOPNOTSUPP;
+
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
@@ -618,12 +775,8 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
value = RREG32_SMC(*pos);
r = put_user(value, (uint32_t *)buf);
- if (r) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- amdgpu_virt_disable_access_debugfs(adev);
- return r;
- }
+ if (r)
+ goto out;
result += 4;
buf += 4;
@@ -631,11 +784,11 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
size -= 4;
}
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ r = result;
+out:
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
-
amdgpu_virt_disable_access_debugfs(adev);
- return result;
+ return r;
}
/**
@@ -657,6 +810,9 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
ssize_t result = 0;
int r;
+ if (!adev->smc_wreg)
+ return -EOPNOTSUPP;
+
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
@@ -676,12 +832,8 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
uint32_t value;
r = get_user(value, (uint32_t *)buf);
- if (r) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- amdgpu_virt_disable_access_debugfs(adev);
- return r;
- }
+ if (r)
+ goto out;
WREG32_SMC(*pos, value);
@@ -691,11 +843,11 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
size -= 4;
}
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ r = result;
+out:
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
-
amdgpu_virt_disable_access_debugfs(adev);
- return result;
+ return r;
}
/**
@@ -728,7 +880,7 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
return -ENOMEM;
/* version, increment each time something is added */
- config[no_regs++] = 3;
+ config[no_regs++] = 5;
config[no_regs++] = adev->gfx.config.max_shader_engines;
config[no_regs++] = adev->gfx.config.max_tile_pipes;
config[no_regs++] = adev->gfx.config.max_cu_per_sh;
@@ -756,7 +908,7 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
/* rev==1 */
config[no_regs++] = adev->rev_id;
config[no_regs++] = adev->pg_flags;
- config[no_regs++] = adev->cg_flags;
+ config[no_regs++] = lower_32_bits(adev->cg_flags);
/* rev==2 */
config[no_regs++] = adev->family;
@@ -768,6 +920,13 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
config[no_regs++] = adev->pdev->subsystem_device;
config[no_regs++] = adev->pdev->subsystem_vendor;
+ /* rev==4 APU flag */
+ config[no_regs++] = adev->flags & AMD_IS_APU ? 1 : 0;
+
+ /* rev==5 PG/CG flag upper 32bit */
+ config[no_regs++] = 0;
+ config[no_regs++] = upper_32_bits(adev->cg_flags);
+
while (size && (*pos < no_regs * 4)) {
uint32_t value;
@@ -833,7 +992,6 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
if (r) {
@@ -871,7 +1029,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
* The offset being sought changes which wave that the status data
* will be returned for. The bits are used as follows:
*
- * Bits 0..6: Byte offset into data
+ * Bits 0..6: Byte offset into data
* Bits 7..14: SE selector
* Bits 15..22: SH/SA selector
* Bits 23..30: CU/{WGP+SIMD} selector
@@ -915,16 +1073,15 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
/* switch to the specific se/sh/cu */
mutex_lock(&adev->grbm_idx_mutex);
- amdgpu_gfx_select_se_sh(adev, se, sh, cu);
+ amdgpu_gfx_select_se_sh(adev, se, sh, cu, 0);
x = 0;
if (adev->gfx.funcs->read_wave_data)
- adev->gfx.funcs->read_wave_data(adev, simd, wave, data, &x);
+ adev->gfx.funcs->read_wave_data(adev, 0, simd, wave, data, &x);
- amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+ amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0);
mutex_unlock(&adev->grbm_idx_mutex);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
if (!x) {
@@ -1009,20 +1166,19 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
/* switch to the specific se/sh/cu */
mutex_lock(&adev->grbm_idx_mutex);
- amdgpu_gfx_select_se_sh(adev, se, sh, cu);
+ amdgpu_gfx_select_se_sh(adev, se, sh, cu, 0);
if (bank == 0) {
if (adev->gfx.funcs->read_wave_vgprs)
- adev->gfx.funcs->read_wave_vgprs(adev, simd, wave, thread, offset, size>>2, data);
+ adev->gfx.funcs->read_wave_vgprs(adev, 0, simd, wave, thread, offset, size>>2, data);
} else {
if (adev->gfx.funcs->read_wave_sgprs)
- adev->gfx.funcs->read_wave_sgprs(adev, simd, wave, offset, size>>2, data);
+ adev->gfx.funcs->read_wave_sgprs(adev, 0, simd, wave, offset, size>>2, data);
}
- amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+ amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0);
mutex_unlock(&adev->grbm_idx_mutex);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
while (size) {
@@ -1051,6 +1207,154 @@ err:
}
/**
+ * amdgpu_debugfs_gfxoff_residency_read - Read GFXOFF residency
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ *
+ * Read the last residency value logged. It doesn't auto update, one needs to
+ * stop logging before getting the current value.
+ */
+static ssize_t amdgpu_debugfs_gfxoff_residency_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ while (size) {
+ uint32_t value;
+
+ r = amdgpu_get_gfx_off_residency(adev, &value);
+ if (r)
+ goto out;
+
+ r = put_user(value, (uint32_t *)buf);
+ if (r)
+ goto out;
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+ r = result;
+out:
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ return r;
+}
+
+/**
+ * amdgpu_debugfs_gfxoff_residency_write - Log GFXOFF Residency
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos: Offset to seek to
+ *
+ * Write a 32-bit non-zero to start logging; write a 32-bit zero to stop
+ */
+static ssize_t amdgpu_debugfs_gfxoff_residency_write(struct file *f, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ while (size) {
+ u32 value;
+
+ r = get_user(value, (uint32_t *)buf);
+ if (r)
+ goto out;
+
+ amdgpu_set_gfx_off_residency(adev, value ? true : false);
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+ r = result;
+out:
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ return r;
+}
+
+
+/**
+ * amdgpu_debugfs_gfxoff_count_read - Read GFXOFF entry count
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ */
+static ssize_t amdgpu_debugfs_gfxoff_count_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ while (size) {
+ u64 value = 0;
+
+ r = amdgpu_get_gfx_off_entrycount(adev, &value);
+ if (r)
+ goto out;
+
+ r = put_user(value, (u64 *)buf);
+ if (r)
+ goto out;
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+ r = result;
+out:
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ return r;
+}
+
+/**
* amdgpu_debugfs_gfxoff_write - Enable/disable GFXOFF
*
* @f: open file handle
@@ -1080,11 +1384,8 @@ static ssize_t amdgpu_debugfs_gfxoff_write(struct file *f, const char __user *bu
uint32_t value;
r = get_user(value, (uint32_t *)buf);
- if (r) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- return r;
- }
+ if (r)
+ goto out;
amdgpu_gfx_off_ctrl(adev, value ? true : false);
@@ -1094,10 +1395,11 @@ static ssize_t amdgpu_debugfs_gfxoff_write(struct file *f, const char __user *bu
size -= 4;
}
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ r = result;
+out:
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- return result;
+ return r;
}
@@ -1120,25 +1422,57 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf,
return -EINVAL;
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
+ }
while (size) {
- uint32_t value;
+ u32 value = adev->gfx.gfx_off_state;
+
+ r = put_user(value, (u32 *)buf);
+ if (r)
+ goto out;
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+ r = result;
+out:
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ return r;
+}
+
+static ssize_t amdgpu_debugfs_gfxoff_status_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ while (size) {
+ u32 value;
r = amdgpu_get_gfx_off_status(adev, &value);
- if (r) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- return r;
- }
+ if (r)
+ goto out;
- r = put_user(value, (uint32_t *)buf);
- if (r) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- return r;
- }
+ r = put_user(value, (u32 *)buf);
+ if (r)
+ goto out;
result += 4;
buf += 4;
@@ -1146,10 +1480,11 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf,
size -= 4;
}
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ r = result;
+out:
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- return result;
+ return r;
}
static const struct file_operations amdgpu_debugfs_regs2_fops = {
@@ -1162,6 +1497,15 @@ static const struct file_operations amdgpu_debugfs_regs2_fops = {
.llseek = default_llseek
};
+static const struct file_operations amdgpu_debugfs_gprwave_fops = {
+ .owner = THIS_MODULE,
+ .unlocked_ioctl = amdgpu_debugfs_gprwave_ioctl,
+ .read = amdgpu_debugfs_gprwave_read,
+ .open = amdgpu_debugfs_gprwave_open,
+ .release = amdgpu_debugfs_gprwave_release,
+ .llseek = default_llseek
+};
+
static const struct file_operations amdgpu_debugfs_regs_fops = {
.owner = THIS_MODULE,
.read = amdgpu_debugfs_regs_read,
@@ -1217,9 +1561,29 @@ static const struct file_operations amdgpu_debugfs_gfxoff_fops = {
.llseek = default_llseek
};
+static const struct file_operations amdgpu_debugfs_gfxoff_status_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_gfxoff_status_read,
+ .llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_gfxoff_count_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_gfxoff_count_read,
+ .llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_gfxoff_residency_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_gfxoff_residency_read,
+ .write = amdgpu_debugfs_gfxoff_residency_write,
+ .llseek = default_llseek
+};
+
static const struct file_operations *debugfs_regs[] = {
&amdgpu_debugfs_regs_fops,
&amdgpu_debugfs_regs2_fops,
+ &amdgpu_debugfs_gprwave_fops,
&amdgpu_debugfs_regs_didt_fops,
&amdgpu_debugfs_regs_pcie_fops,
&amdgpu_debugfs_regs_smc_fops,
@@ -1228,11 +1592,15 @@ static const struct file_operations *debugfs_regs[] = {
&amdgpu_debugfs_wave_fops,
&amdgpu_debugfs_gpr_fops,
&amdgpu_debugfs_gfxoff_fops,
+ &amdgpu_debugfs_gfxoff_status_fops,
+ &amdgpu_debugfs_gfxoff_count_fops,
+ &amdgpu_debugfs_gfxoff_residency_fops,
};
-static const char *debugfs_regs_names[] = {
+static const char * const debugfs_regs_names[] = {
"amdgpu_regs",
"amdgpu_regs2",
+ "amdgpu_gprwave",
"amdgpu_regs_didt",
"amdgpu_regs_pcie",
"amdgpu_regs_smc",
@@ -1241,11 +1609,14 @@ static const char *debugfs_regs_names[] = {
"amdgpu_wave",
"amdgpu_gpr",
"amdgpu_gfxoff",
+ "amdgpu_gfxoff_status",
+ "amdgpu_gfxoff_count",
+ "amdgpu_gfxoff_residency",
};
/**
* amdgpu_debugfs_regs_init - Initialize debugfs entries that provide
- * register access.
+ * register access.
*
* @adev: The device to attach the debugfs entries to
*/
@@ -1257,7 +1628,7 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) {
ent = debugfs_create_file(debugfs_regs_names[i],
- S_IFREG | S_IRUGO, root,
+ S_IFREG | 0400, root,
adev, debugfs_regs[i]);
if (!i && !IS_ERR_OR_NULL(ent))
i_size_write(ent->d_inode, adev->rmmio_size);
@@ -1268,7 +1639,7 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+ struct amdgpu_device *adev = m->private;
struct drm_device *dev = adev_to_drm(adev);
int r = 0, i;
@@ -1279,7 +1650,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
}
/* Avoid accidently unparking the sched thread during GPU reset */
- r = down_write_killable(&adev->reset_sem);
+ r = down_write_killable(&adev->reset_domain->sem);
if (r)
return r;
@@ -1287,30 +1658,29 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!amdgpu_ring_sched_ready(ring))
continue;
- kthread_park(ring->sched.thread);
+ drm_sched_wqueue_stop(&ring->sched);
}
- seq_printf(m, "run ib test:\n");
+ seq_puts(m, "run ib test:\n");
r = amdgpu_ib_ring_tests(adev);
if (r)
seq_printf(m, "ib ring tests failed (%d).\n", r);
else
- seq_printf(m, "ib ring tests passed.\n");
+ seq_puts(m, "ib ring tests passed.\n");
/* go on the scheduler */
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!amdgpu_ring_sched_ready(ring))
continue;
- kthread_unpark(ring->sched.thread);
+ drm_sched_wqueue_start(&ring->sched);
}
- up_write(&adev->reset_sem);
+ up_write(&adev->reset_domain->sem);
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return 0;
@@ -1330,7 +1700,6 @@ static int amdgpu_debugfs_evict_vram(void *data, u64 *val)
*val = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return 0;
@@ -1351,16 +1720,33 @@ static int amdgpu_debugfs_evict_gtt(void *data, u64 *val)
*val = amdgpu_ttm_evict_resources(adev, TTM_PL_TT);
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return 0;
}
+static int amdgpu_debugfs_benchmark(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ struct drm_device *dev = adev_to_drm(adev);
+ int r;
+
+ r = pm_runtime_get_sync(dev->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(dev->dev);
+ return r;
+ }
+
+ r = amdgpu_benchmark(adev, val);
+
+ pm_runtime_put_autosuspend(dev->dev);
+
+ return r;
+}
static int amdgpu_debugfs_vm_info_show(struct seq_file *m, void *unused)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+ struct amdgpu_device *adev = m->private;
struct drm_device *dev = adev_to_drm(adev);
struct drm_file *file;
int r;
@@ -1372,9 +1758,14 @@ static int amdgpu_debugfs_vm_info_show(struct seq_file *m, void *unused)
list_for_each_entry(file, &dev->filelist, lhead) {
struct amdgpu_fpriv *fpriv = file->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
+ struct amdgpu_task_info *ti;
+
+ ti = amdgpu_vm_get_task_info_vm(vm);
+ if (ti) {
+ seq_printf(m, "pid:%d\tProcess:%s ----------\n", ti->task.pid, ti->process_name);
+ amdgpu_vm_put_task_info(ti);
+ }
- seq_printf(m, "pid:%d\tProcess:%s ----------\n",
- vm->task_info.pid, vm->task_info.process_name);
r = amdgpu_bo_reserve(vm->root.bo, true);
if (r)
break;
@@ -1393,6 +1784,8 @@ DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_evict_vram_fops, amdgpu_debugfs_evict_vram,
NULL, "%lld\n");
DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_evict_gtt_fops, amdgpu_debugfs_evict_gtt,
NULL, "%lld\n");
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_benchmark_fops, NULL, amdgpu_debugfs_benchmark,
+ "%lld\n");
static void amdgpu_ib_preempt_fences_swap(struct amdgpu_ring *ring,
struct dma_fence **fences)
@@ -1485,7 +1878,7 @@ no_preempt:
continue;
}
job = to_amdgpu_job(s_job);
- if (preempted && (&job->hw_fence) == fence)
+ if (preempted && (&job->hw_fence->base) == fence)
/* mark the job as preempted */
job->preemption_status |= AMDGPU_IB_PREEMPTED;
}
@@ -1494,7 +1887,7 @@ no_preempt:
static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
{
- int r, resched, length;
+ int r, length;
struct amdgpu_ring *ring;
struct dma_fence **fences = NULL;
struct amdgpu_device *adev = (struct amdgpu_device *)data;
@@ -1504,7 +1897,8 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
ring = adev->rings[val];
- if (!ring || !ring->funcs->preempt_ib || !ring->sched.thread)
+ if (!amdgpu_ring_sched_ready(ring) ||
+ !ring->funcs->preempt_ib)
return -EINVAL;
/* the last preemption failed */
@@ -1517,14 +1911,12 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
return -ENOMEM;
/* Avoid accidently unparking the sched thread during GPU reset */
- r = down_read_killable(&adev->reset_sem);
+ r = down_read_killable(&adev->reset_domain->sem);
if (r)
goto pro_end;
/* stop the scheduler */
- kthread_park(ring->sched.thread);
-
- resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
+ drm_sched_wqueue_stop(&ring->sched);
/* preempt the IB */
r = amdgpu_ring_preempt_ib(ring);
@@ -1558,11 +1950,9 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
failure:
/* restart the scheduler */
- kthread_unpark(ring->sched.thread);
+ drm_sched_wqueue_start(&ring->sched);
- up_read(&adev->reset_sem);
-
- ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
+ up_read(&adev->reset_domain->sem);
pro_end:
kfree(fences);
@@ -1576,7 +1966,7 @@ static int amdgpu_debugfs_sclk_set(void *data, u64 val)
uint32_t max_freq, min_freq;
struct amdgpu_device *adev = (struct amdgpu_device *)data;
- if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ if (amdgpu_sriov_multi_vf_mode(adev))
return -EINVAL;
ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
@@ -1585,22 +1975,24 @@ static int amdgpu_debugfs_sclk_set(void *data, u64 val)
return ret;
}
- if (is_support_sw_smu(adev)) {
- ret = smu_get_dpm_freq_range(&adev->smu, SMU_SCLK, &min_freq, &max_freq);
- if (ret || val > max_freq || val < min_freq)
- return -EINVAL;
- ret = smu_set_soft_freq_range(&adev->smu, SMU_SCLK, (uint32_t)val, (uint32_t)val);
- } else {
- return 0;
+ ret = amdgpu_dpm_get_dpm_freq_range(adev, PP_SCLK, &min_freq, &max_freq);
+ if (ret == -EOPNOTSUPP) {
+ ret = 0;
+ goto out;
+ }
+ if (ret || val > max_freq || val < min_freq) {
+ ret = -EINVAL;
+ goto out;
}
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
-
+ ret = amdgpu_dpm_set_soft_freq_range(adev, PP_SCLK, (uint32_t)val, (uint32_t)val);
if (ret)
- return -EINVAL;
+ ret = -EINVAL;
- return 0;
+out:
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ return ret;
}
DEFINE_DEBUGFS_ATTRIBUTE(fops_ib_preempt, NULL,
@@ -1618,6 +2010,9 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
if (!debugfs_initialized())
return 0;
+ debugfs_create_x32("amdgpu_smu_debug", 0600, root,
+ &adev->pm.smu_debug_mask);
+
ent = debugfs_create_file("amdgpu_preempt_ib", 0600, root, adev,
&fops_ib_preempt);
if (IS_ERR(ent)) {
@@ -1644,9 +2039,12 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
DRM_ERROR("registering register debugfs failed (%d).\n", r);
amdgpu_debugfs_firmware_init(adev);
+ amdgpu_ta_if_debugfs_init(adev);
+
+ amdgpu_debugfs_mes_event_log_init(adev);
#if defined(CONFIG_DRM_AMD_DC)
- if (amdgpu_device_has_dc_support(adev))
+ if (adev->dc_enabled)
dtn_debugfs_init(adev);
#endif
@@ -1659,33 +2057,109 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
amdgpu_debugfs_ring_init(adev, ring);
}
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (!amdgpu_vcnfw_log)
+ break;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ amdgpu_debugfs_vcn_fwlog_init(adev, i, &adev->vcn.inst[i]);
+ }
+
+ if (amdgpu_umsch_mm & amdgpu_umsch_mm_fwlog)
+ amdgpu_debugfs_umsch_fwlog_init(adev, &adev->umsch_mm);
+
+ amdgpu_debugfs_vcn_sched_mask_init(adev);
+ amdgpu_debugfs_jpeg_sched_mask_init(adev);
+ amdgpu_debugfs_gfx_sched_mask_init(adev);
+ amdgpu_debugfs_compute_sched_mask_init(adev);
+ amdgpu_debugfs_sdma_sched_mask_init(adev);
+
amdgpu_ras_debugfs_create_all(adev);
amdgpu_rap_debugfs_init(adev);
amdgpu_securedisplay_debugfs_init(adev);
amdgpu_fw_attestation_debugfs_init(adev);
+ amdgpu_psp_debugfs_init(adev);
- debugfs_create_file("amdgpu_evict_vram", 0444, root, adev,
+ debugfs_create_file("amdgpu_evict_vram", 0400, root, adev,
&amdgpu_evict_vram_fops);
- debugfs_create_file("amdgpu_evict_gtt", 0444, root, adev,
+ debugfs_create_file("amdgpu_evict_gtt", 0400, root, adev,
&amdgpu_evict_gtt_fops);
- debugfs_create_file("amdgpu_test_ib", 0444, root, adev,
+ debugfs_create_file("amdgpu_test_ib", 0400, root, adev,
&amdgpu_debugfs_test_ib_fops);
debugfs_create_file("amdgpu_vm_info", 0444, root, adev,
&amdgpu_debugfs_vm_info_fops);
+ debugfs_create_file("amdgpu_benchmark", 0200, root, adev,
+ &amdgpu_benchmark_fops);
adev->debugfs_vbios_blob.data = adev->bios;
adev->debugfs_vbios_blob.size = adev->bios_size;
debugfs_create_blob("amdgpu_vbios", 0444, root,
&adev->debugfs_vbios_blob);
- adev->debugfs_discovery_blob.data = adev->mman.discovery_bin;
- adev->debugfs_discovery_blob.size = adev->mman.discovery_tmr_size;
- debugfs_create_blob("amdgpu_discovery", 0444, root,
- &adev->debugfs_discovery_blob);
+ if (adev->discovery.debugfs_blob.size)
+ debugfs_create_blob("amdgpu_discovery", 0444, root,
+ &adev->discovery.debugfs_blob);
+
+ return 0;
+}
+
+static int amdgpu_pt_info_read(struct seq_file *m, void *unused)
+{
+ struct drm_file *file;
+ struct amdgpu_fpriv *fpriv;
+ struct amdgpu_bo *root_bo;
+ struct amdgpu_device *adev;
+ int r;
+
+ file = m->private;
+ if (!file)
+ return -EINVAL;
+
+ adev = drm_to_adev(file->minor->dev);
+ fpriv = file->driver_priv;
+ if (!fpriv || !fpriv->vm.root.bo)
+ return -ENODEV;
+
+ root_bo = amdgpu_bo_ref(fpriv->vm.root.bo);
+ r = amdgpu_bo_reserve(root_bo, true);
+ if (r) {
+ amdgpu_bo_unref(&root_bo);
+ return -EINVAL;
+ }
+
+ seq_printf(m, "pd_address: 0x%llx\n", amdgpu_gmc_pd_addr(fpriv->vm.root.bo));
+ seq_printf(m, "max_pfn: 0x%llx\n", adev->vm_manager.max_pfn);
+ seq_printf(m, "num_level: 0x%x\n", adev->vm_manager.num_level);
+ seq_printf(m, "block_size: 0x%x\n", adev->vm_manager.block_size);
+ seq_printf(m, "fragment_size: 0x%x\n", adev->vm_manager.fragment_size);
+
+ amdgpu_bo_unreserve(root_bo);
+ amdgpu_bo_unref(&root_bo);
return 0;
}
+static int amdgpu_pt_info_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, amdgpu_pt_info_read, inode->i_private);
+}
+
+static const struct file_operations amdgpu_pt_info_fops = {
+ .owner = THIS_MODULE,
+ .open = amdgpu_pt_info_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+void amdgpu_debugfs_vm_init(struct drm_file *file)
+{
+ debugfs_create_file("vm_pagetable_info", 0444, file->debugfs_client, file,
+ &amdgpu_pt_info_fops);
+}
+
#else
int amdgpu_debugfs_init(struct amdgpu_device *adev)
{
@@ -1695,4 +2169,7 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
{
return 0;
}
+void amdgpu_debugfs_vm_init(struct drm_file *file)
+{
+}
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
index 371a6f0deb29..e7b3c38e5186 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
@@ -32,3 +32,6 @@ void amdgpu_debugfs_fini(struct amdgpu_device *adev);
void amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
void amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_vm_init(struct drm_file *file);
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
new file mode 100644
index 000000000000..4e2fe6674db8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <generated/utsrelease.h>
+#include <linux/devcoredump.h>
+#include "amdgpu_dev_coredump.h"
+#include "atom.h"
+
+#ifndef CONFIG_DEV_COREDUMP
+void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
+ bool vram_lost, struct amdgpu_job *job)
+{
+}
+#else
+
+const char *hw_ip_names[MAX_HWIP] = {
+ [GC_HWIP] = "GC",
+ [HDP_HWIP] = "HDP",
+ [SDMA0_HWIP] = "SDMA0",
+ [SDMA1_HWIP] = "SDMA1",
+ [SDMA2_HWIP] = "SDMA2",
+ [SDMA3_HWIP] = "SDMA3",
+ [SDMA4_HWIP] = "SDMA4",
+ [SDMA5_HWIP] = "SDMA5",
+ [SDMA6_HWIP] = "SDMA6",
+ [SDMA7_HWIP] = "SDMA7",
+ [LSDMA_HWIP] = "LSDMA",
+ [MMHUB_HWIP] = "MMHUB",
+ [ATHUB_HWIP] = "ATHUB",
+ [NBIO_HWIP] = "NBIO",
+ [MP0_HWIP] = "MP0",
+ [MP1_HWIP] = "MP1",
+ [UVD_HWIP] = "UVD/JPEG/VCN",
+ [VCN1_HWIP] = "VCN1",
+ [VCE_HWIP] = "VCE",
+ [VPE_HWIP] = "VPE",
+ [DF_HWIP] = "DF",
+ [DCE_HWIP] = "DCE",
+ [OSSSYS_HWIP] = "OSSSYS",
+ [SMUIO_HWIP] = "SMUIO",
+ [PWR_HWIP] = "PWR",
+ [NBIF_HWIP] = "NBIF",
+ [THM_HWIP] = "THM",
+ [CLK_HWIP] = "CLK",
+ [UMC_HWIP] = "UMC",
+ [RSMU_HWIP] = "RSMU",
+ [XGMI_HWIP] = "XGMI",
+ [DCI_HWIP] = "DCI",
+ [PCIE_HWIP] = "PCIE",
+};
+
+static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev,
+ struct drm_printer *p)
+{
+ uint32_t version;
+ uint32_t feature;
+ uint8_t smu_program, smu_major, smu_minor, smu_debug;
+ struct atom_context *ctx = adev->mode_info.atom_context;
+
+ drm_printf(p, "VCE feature version: %u, fw version: 0x%08x\n",
+ adev->vce.fb_version, adev->vce.fw_version);
+ drm_printf(p, "UVD feature version: %u, fw version: 0x%08x\n", 0,
+ adev->uvd.fw_version);
+ drm_printf(p, "GMC feature version: %u, fw version: 0x%08x\n", 0,
+ adev->gmc.fw_version);
+ drm_printf(p, "ME feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.me_feature_version, adev->gfx.me_fw_version);
+ drm_printf(p, "PFP feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.pfp_feature_version, adev->gfx.pfp_fw_version);
+ drm_printf(p, "CE feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.ce_feature_version, adev->gfx.ce_fw_version);
+ drm_printf(p, "RLC feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.rlc_feature_version, adev->gfx.rlc_fw_version);
+
+ drm_printf(p, "RLC SRLC feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.rlc_srlc_feature_version,
+ adev->gfx.rlc_srlc_fw_version);
+ drm_printf(p, "RLC SRLG feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.rlc_srlg_feature_version,
+ adev->gfx.rlc_srlg_fw_version);
+ drm_printf(p, "RLC SRLS feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.rlc_srls_feature_version,
+ adev->gfx.rlc_srls_fw_version);
+ drm_printf(p, "RLCP feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.rlcp_ucode_feature_version,
+ adev->gfx.rlcp_ucode_version);
+ drm_printf(p, "RLCV feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.rlcv_ucode_feature_version,
+ adev->gfx.rlcv_ucode_version);
+ drm_printf(p, "MEC feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.mec_feature_version, adev->gfx.mec_fw_version);
+
+ if (adev->gfx.mec2_fw)
+ drm_printf(p, "MEC2 feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.mec2_feature_version,
+ adev->gfx.mec2_fw_version);
+
+ drm_printf(p, "IMU feature version: %u, fw version: 0x%08x\n", 0,
+ adev->gfx.imu_fw_version);
+ drm_printf(p, "PSP SOS feature version: %u, fw version: 0x%08x\n",
+ adev->psp.sos.feature_version, adev->psp.sos.fw_version);
+ drm_printf(p, "PSP ASD feature version: %u, fw version: 0x%08x\n",
+ adev->psp.asd_context.bin_desc.feature_version,
+ adev->psp.asd_context.bin_desc.fw_version);
+
+ drm_printf(p, "TA XGMI feature version: 0x%08x, fw version: 0x%08x\n",
+ adev->psp.xgmi_context.context.bin_desc.feature_version,
+ adev->psp.xgmi_context.context.bin_desc.fw_version);
+ drm_printf(p, "TA RAS feature version: 0x%08x, fw version: 0x%08x\n",
+ adev->psp.ras_context.context.bin_desc.feature_version,
+ adev->psp.ras_context.context.bin_desc.fw_version);
+ drm_printf(p, "TA HDCP feature version: 0x%08x, fw version: 0x%08x\n",
+ adev->psp.hdcp_context.context.bin_desc.feature_version,
+ adev->psp.hdcp_context.context.bin_desc.fw_version);
+ drm_printf(p, "TA DTM feature version: 0x%08x, fw version: 0x%08x\n",
+ adev->psp.dtm_context.context.bin_desc.feature_version,
+ adev->psp.dtm_context.context.bin_desc.fw_version);
+ drm_printf(p, "TA RAP feature version: 0x%08x, fw version: 0x%08x\n",
+ adev->psp.rap_context.context.bin_desc.feature_version,
+ adev->psp.rap_context.context.bin_desc.fw_version);
+ drm_printf(p,
+ "TA SECURE DISPLAY feature version: 0x%08x, fw version: 0x%08x\n",
+ adev->psp.securedisplay_context.context.bin_desc.feature_version,
+ adev->psp.securedisplay_context.context.bin_desc.fw_version);
+
+ /* SMC firmware */
+ version = adev->pm.fw_version;
+
+ smu_program = (version >> 24) & 0xff;
+ smu_major = (version >> 16) & 0xff;
+ smu_minor = (version >> 8) & 0xff;
+ smu_debug = (version >> 0) & 0xff;
+ drm_printf(p,
+ "SMC feature version: %u, program: %d, fw version: 0x%08x (%d.%d.%d)\n",
+ 0, smu_program, version, smu_major, smu_minor, smu_debug);
+
+ /* SDMA firmware */
+ for (int i = 0; i < adev->sdma.num_instances; i++) {
+ drm_printf(p,
+ "SDMA%d feature version: %u, firmware version: 0x%08x\n",
+ i, adev->sdma.instance[i].feature_version,
+ adev->sdma.instance[i].fw_version);
+ }
+
+ drm_printf(p, "VCN feature version: %u, fw version: 0x%08x\n", 0,
+ adev->vcn.fw_version);
+ drm_printf(p, "DMCU feature version: %u, fw version: 0x%08x\n", 0,
+ adev->dm.dmcu_fw_version);
+ drm_printf(p, "DMCUB feature version: %u, fw version: 0x%08x\n", 0,
+ adev->dm.dmcub_fw_version);
+ drm_printf(p, "PSP TOC feature version: %u, fw version: 0x%08x\n",
+ adev->psp.toc.feature_version, adev->psp.toc.fw_version);
+
+ version = adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK;
+ feature = (adev->mes.kiq_version & AMDGPU_MES_FEAT_VERSION_MASK) >>
+ AMDGPU_MES_FEAT_VERSION_SHIFT;
+ drm_printf(p, "MES_KIQ feature version: %u, fw version: 0x%08x\n",
+ feature, version);
+
+ version = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
+ feature = (adev->mes.sched_version & AMDGPU_MES_FEAT_VERSION_MASK) >>
+ AMDGPU_MES_FEAT_VERSION_SHIFT;
+ drm_printf(p, "MES feature version: %u, fw version: 0x%08x\n", feature,
+ version);
+
+ drm_printf(p, "VPE feature version: %u, fw version: 0x%08x\n",
+ adev->vpe.feature_version, adev->vpe.fw_version);
+
+ drm_printf(p, "\nVBIOS Information\n");
+ drm_printf(p, "vbios name : %s\n", ctx->name);
+ drm_printf(p, "vbios pn : %s\n", ctx->vbios_pn);
+ drm_printf(p, "vbios version : %d\n", ctx->version);
+ drm_printf(p, "vbios ver_str : %s\n", ctx->vbios_ver_str);
+ drm_printf(p, "vbios date : %s\n", ctx->date);
+}
+
+static ssize_t
+amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
+ void *data, size_t datalen)
+{
+ struct drm_printer p;
+ struct amdgpu_coredump_info *coredump = data;
+ struct drm_print_iterator iter;
+ struct amdgpu_vm_fault_info *fault_info;
+ struct amdgpu_ip_block *ip_block;
+ int ver;
+
+ iter.data = buffer;
+ iter.offset = 0;
+ iter.start = offset;
+ iter.remain = count;
+
+ p = drm_coredump_printer(&iter);
+
+ drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
+ drm_printf(&p, "version: " AMDGPU_COREDUMP_VERSION "\n");
+ drm_printf(&p, "kernel: " UTS_RELEASE "\n");
+ drm_printf(&p, "module: " KBUILD_MODNAME "\n");
+ drm_printf(&p, "time: %ptSp\n", &coredump->reset_time);
+
+ if (coredump->reset_task_info.task.pid)
+ drm_printf(&p, "process_name: %s PID: %d\n",
+ coredump->reset_task_info.process_name,
+ coredump->reset_task_info.task.pid);
+
+ /* SOC Information */
+ drm_printf(&p, "\nSOC Information\n");
+ drm_printf(&p, "SOC Device id: %d\n", coredump->adev->pdev->device);
+ drm_printf(&p, "SOC PCI Revision id: %d\n", coredump->adev->pdev->revision);
+ drm_printf(&p, "SOC Family: %d\n", coredump->adev->family);
+ drm_printf(&p, "SOC Revision id: %d\n", coredump->adev->rev_id);
+ drm_printf(&p, "SOC External Revision id: %d\n", coredump->adev->external_rev_id);
+
+ /* Memory Information */
+ drm_printf(&p, "\nSOC Memory Information\n");
+ drm_printf(&p, "real vram size: %llu\n", coredump->adev->gmc.real_vram_size);
+ drm_printf(&p, "visible vram size: %llu\n", coredump->adev->gmc.visible_vram_size);
+ drm_printf(&p, "gtt size: %llu\n", coredump->adev->mman.gtt_mgr.manager.size);
+
+ /* GDS Config */
+ drm_printf(&p, "\nGDS Config\n");
+ drm_printf(&p, "gds: total size: %d\n", coredump->adev->gds.gds_size);
+ drm_printf(&p, "gds: compute partition size: %d\n", coredump->adev->gds.gds_size);
+ drm_printf(&p, "gds: gws per compute partition: %d\n", coredump->adev->gds.gws_size);
+ drm_printf(&p, "gds: os per compute partition: %d\n", coredump->adev->gds.oa_size);
+
+ /* HWIP Version Information */
+ drm_printf(&p, "\nHW IP Version Information\n");
+ for (int i = 1; i < MAX_HWIP; i++) {
+ for (int j = 0; j < HWIP_MAX_INSTANCE; j++) {
+ ver = coredump->adev->ip_versions[i][j];
+ if (ver)
+ drm_printf(&p, "HWIP: %s[%d][%d]: v%d.%d.%d.%d.%d\n",
+ hw_ip_names[i], i, j,
+ IP_VERSION_MAJ(ver),
+ IP_VERSION_MIN(ver),
+ IP_VERSION_REV(ver),
+ IP_VERSION_VARIANT(ver),
+ IP_VERSION_SUBREV(ver));
+ }
+ }
+
+ /* IP firmware information */
+ drm_printf(&p, "\nIP Firmwares\n");
+ amdgpu_devcoredump_fw_info(coredump->adev, &p);
+
+ if (coredump->ring) {
+ drm_printf(&p, "\nRing timed out details\n");
+ drm_printf(&p, "IP Type: %d Ring Name: %s\n",
+ coredump->ring->funcs->type,
+ coredump->ring->name);
+ }
+
+ /* Add page fault information */
+ fault_info = &coredump->adev->vm_manager.fault_info;
+ drm_printf(&p, "\n[%s] Page fault observed\n",
+ fault_info->vmhub ? "mmhub" : "gfxhub");
+ drm_printf(&p, "Faulty page starting at address: 0x%016llx\n", fault_info->addr);
+ drm_printf(&p, "Protection fault status register: 0x%x\n\n", fault_info->status);
+
+ /* dump the ip state for each ip */
+ drm_printf(&p, "IP Dump\n");
+ for (int i = 0; i < coredump->adev->num_ip_blocks; i++) {
+ ip_block = &coredump->adev->ip_blocks[i];
+ if (ip_block->version->funcs->print_ip_state) {
+ drm_printf(&p, "IP: %s\n", ip_block->version->funcs->name);
+ ip_block->version->funcs->print_ip_state(ip_block, &p);
+ drm_printf(&p, "\n");
+ }
+ }
+
+ /* Add ring buffer information */
+ drm_printf(&p, "Ring buffer information\n");
+ for (int i = 0; i < coredump->adev->num_rings; i++) {
+ int j = 0;
+ struct amdgpu_ring *ring = coredump->adev->rings[i];
+
+ drm_printf(&p, "ring name: %s\n", ring->name);
+ drm_printf(&p, "Rptr: 0x%llx Wptr: 0x%llx RB mask: %x\n",
+ amdgpu_ring_get_rptr(ring),
+ amdgpu_ring_get_wptr(ring),
+ ring->buf_mask);
+ drm_printf(&p, "Ring size in dwords: %d\n",
+ ring->ring_size / 4);
+ drm_printf(&p, "Ring contents\n");
+ drm_printf(&p, "Offset \t Value\n");
+
+ while (j < ring->ring_size) {
+ drm_printf(&p, "0x%x \t 0x%x\n", j, ring->ring[j / 4]);
+ j += 4;
+ }
+ }
+
+ if (coredump->skip_vram_check)
+ drm_printf(&p, "VRAM lost check is skipped!\n");
+ else if (coredump->reset_vram_lost)
+ drm_printf(&p, "VRAM is lost due to GPU reset!\n");
+
+ return count - iter.remain;
+}
+
+static void amdgpu_devcoredump_free(void *data)
+{
+ kfree(data);
+}
+
+void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
+ bool vram_lost, struct amdgpu_job *job)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ struct amdgpu_coredump_info *coredump;
+ struct drm_sched_job *s_job;
+
+ coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT);
+
+ if (!coredump) {
+ DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__);
+ return;
+ }
+
+ coredump->skip_vram_check = skip_vram_check;
+ coredump->reset_vram_lost = vram_lost;
+
+ if (job && job->pasid) {
+ struct amdgpu_task_info *ti;
+
+ ti = amdgpu_vm_get_task_info_pasid(adev, job->pasid);
+ if (ti) {
+ coredump->reset_task_info = *ti;
+ amdgpu_vm_put_task_info(ti);
+ }
+ }
+
+ if (job) {
+ s_job = &job->base;
+ coredump->ring = to_amdgpu_ring(s_job->sched);
+ }
+
+ coredump->adev = adev;
+
+ ktime_get_ts64(&coredump->reset_time);
+
+ dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT,
+ amdgpu_devcoredump_read, amdgpu_devcoredump_free);
+
+ drm_info(dev, "AMDGPU device coredump file has been created\n");
+ drm_info(dev, "Check your /sys/class/drm/card%d/device/devcoredump/data\n",
+ dev->primary->index);
+}
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
new file mode 100644
index 000000000000..ef9772c6bcc9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_DEV_COREDUMP_H__
+#define __AMDGPU_DEV_COREDUMP_H__
+
+#include "amdgpu.h"
+
+#ifdef CONFIG_DEV_COREDUMP
+
+#define AMDGPU_COREDUMP_VERSION "1"
+
+struct amdgpu_coredump_info {
+ struct amdgpu_device *adev;
+ struct amdgpu_task_info reset_task_info;
+ struct timespec64 reset_time;
+ bool skip_vram_check;
+ bool reset_vram_lost;
+ struct amdgpu_ring *ring;
+};
+#endif
+
+void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
+ bool vram_lost, struct amdgpu_job *job);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1e651b959141..58c3ffe707d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -25,15 +25,24 @@
* Alex Deucher
* Jerome Glisse
*/
+
+#include <linux/aperture.h>
#include <linux/power_supply.h>
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/console.h>
#include <linux/slab.h>
+#include <linux/iommu.h>
+#include <linux/pci.h>
+#include <linux/pci-p2pdma.h>
+#include <linux/apple-gmux.h>
#include <drm/drm_atomic_helper.h>
+#include <drm/drm_client_event.h>
+#include <drm/drm_crtc_helper.h>
#include <drm/drm_probe_helper.h>
#include <drm/amdgpu_drm.h>
+#include <linux/device.h>
#include <linux/vgaarb.h>
#include <linux/vga_switcheroo.h>
#include <linux/efi.h>
@@ -54,7 +63,6 @@
#include "soc15.h"
#include "nv.h"
#include "bif/bif_4_1_d.h"
-#include <linux/pci.h>
#include <linux/firmware.h>
#include "amdgpu_vf_error.h"
@@ -63,9 +71,12 @@
#include "amdgpu_xgmi.h"
#include "amdgpu_ras.h"
+#include "amdgpu_ras_mgr.h"
#include "amdgpu_pmu.h"
#include "amdgpu_fru_eeprom.h"
#include "amdgpu_reset.h"
+#include "amdgpu_virt.h"
+#include "amdgpu_dev_coredump.h"
#include <linux/suspend.h>
#include <drm/task_barrier.h>
@@ -73,20 +84,31 @@
#include <drm/drm_drv.h>
+#if IS_ENABLED(CONFIG_X86)
+#include <asm/intel-family.h>
+#include <asm/cpu_device_id.h>
+#endif
+
MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
-MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
-MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
-MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
-MODULE_FIRMWARE("amdgpu/vangogh_gpu_info.bin");
-MODULE_FIRMWARE("amdgpu/yellow_carp_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish_gpu_info.bin");
#define AMDGPU_RESUME_MS 2000
+#define AMDGPU_MAX_RETRY_LIMIT 2
+#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
+#define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
+#define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
+#define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
+
+#define AMDGPU_VBIOS_SKIP (1U << 0)
+#define AMDGPU_VBIOS_OPTIONAL (1U << 1)
+
+static const struct drm_driver amdgpu_kms_driver;
const char *amdgpu_asic_name[] = {
"TAHITI",
@@ -129,13 +151,76 @@ const char *amdgpu_asic_name[] = {
"LAST",
};
+#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM - 1, 0)
+/*
+ * Default init level where all blocks are expected to be initialized. This is
+ * the level of initialization expected by default and also after a full reset
+ * of the device.
+ */
+struct amdgpu_init_level amdgpu_init_default = {
+ .level = AMDGPU_INIT_LEVEL_DEFAULT,
+ .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
+};
+
+struct amdgpu_init_level amdgpu_init_recovery = {
+ .level = AMDGPU_INIT_LEVEL_RESET_RECOVERY,
+ .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
+};
+
+/*
+ * Minimal blocks needed to be initialized before a XGMI hive can be reset. This
+ * is used for cases like reset on initialization where the entire hive needs to
+ * be reset before first use.
+ */
+struct amdgpu_init_level amdgpu_init_minimal_xgmi = {
+ .level = AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
+ .hwini_ip_block_mask =
+ BIT(AMD_IP_BLOCK_TYPE_GMC) | BIT(AMD_IP_BLOCK_TYPE_SMC) |
+ BIT(AMD_IP_BLOCK_TYPE_COMMON) | BIT(AMD_IP_BLOCK_TYPE_IH) |
+ BIT(AMD_IP_BLOCK_TYPE_PSP)
+};
+
+static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev);
+static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev);
+static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev);
+
+static void amdgpu_device_load_switch_state(struct amdgpu_device *adev);
+
+static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev,
+ enum amd_ip_block_type block)
+{
+ return (adev->init_lvl->hwini_ip_block_mask & (1U << block)) != 0;
+}
+
+void amdgpu_set_init_level(struct amdgpu_device *adev,
+ enum amdgpu_init_lvl_id lvl)
+{
+ switch (lvl) {
+ case AMDGPU_INIT_LEVEL_MINIMAL_XGMI:
+ adev->init_lvl = &amdgpu_init_minimal_xgmi;
+ break;
+ case AMDGPU_INIT_LEVEL_RESET_RECOVERY:
+ adev->init_lvl = &amdgpu_init_recovery;
+ break;
+ case AMDGPU_INIT_LEVEL_DEFAULT:
+ fallthrough;
+ default:
+ adev->init_lvl = &amdgpu_init_default;
+ break;
+ }
+}
+
+static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
+static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
+ void *data);
+
/**
* DOC: pcie_replay_count
*
* The amdgpu driver provides a sysfs API for reporting the total number
- * of PCIe replays (NAKs)
+ * of PCIe replays (NAKs).
* The file pcie_replay_count is used for this and returns the total
- * number of replays as a sum of the NAKs generated and NAKs received
+ * number of replays as a sum of the NAKs generated and NAKs received.
*/
static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
@@ -148,89 +233,202 @@ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
return sysfs_emit(buf, "%llu\n", cnt);
}
-static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
+static DEVICE_ATTR(pcie_replay_count, 0444,
amdgpu_device_get_pcie_replay_count, NULL);
-static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
+static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev)
+{
+ int ret = 0;
-/**
- * DOC: product_name
- *
- * The amdgpu driver provides a sysfs API for reporting the product name
- * for the device
- * The file serial_number is used for this and returns the product name
- * as returned from the FRU.
- * NOTE: This is only available for certain server cards
- */
+ if (amdgpu_nbio_is_replay_cnt_supported(adev))
+ ret = sysfs_create_file(&adev->dev->kobj,
+ &dev_attr_pcie_replay_count.attr);
-static ssize_t amdgpu_device_get_product_name(struct device *dev,
- struct device_attribute *attr, char *buf)
+ return ret;
+}
+
+static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev)
+{
+ if (amdgpu_nbio_is_replay_cnt_supported(adev))
+ sysfs_remove_file(&adev->dev->kobj,
+ &dev_attr_pcie_replay_count.attr);
+}
+
+static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
+ const struct bin_attribute *attr, char *buf,
+ loff_t ppos, size_t count)
{
+ struct device *dev = kobj_to_dev(kobj);
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
+ ssize_t bytes_read;
+
+ switch (ppos) {
+ case AMDGPU_SYS_REG_STATE_XGMI:
+ bytes_read = amdgpu_asic_get_reg_state(
+ adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
+ break;
+ case AMDGPU_SYS_REG_STATE_WAFL:
+ bytes_read = amdgpu_asic_get_reg_state(
+ adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
+ break;
+ case AMDGPU_SYS_REG_STATE_PCIE:
+ bytes_read = amdgpu_asic_get_reg_state(
+ adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
+ break;
+ case AMDGPU_SYS_REG_STATE_USR:
+ bytes_read = amdgpu_asic_get_reg_state(
+ adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
+ break;
+ case AMDGPU_SYS_REG_STATE_USR_1:
+ bytes_read = amdgpu_asic_get_reg_state(
+ adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
+ break;
+ default:
+ return -EINVAL;
+ }
- return sysfs_emit(buf, "%s\n", adev->product_name);
+ return bytes_read;
}
-static DEVICE_ATTR(product_name, S_IRUGO,
- amdgpu_device_get_product_name, NULL);
+static const BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
+ AMDGPU_SYS_REG_STATE_END);
+
+int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
+{
+ int ret;
+
+ if (!amdgpu_asic_get_reg_state_supported(adev))
+ return 0;
+
+ ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
+
+ return ret;
+}
+
+void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
+{
+ if (!amdgpu_asic_get_reg_state_supported(adev))
+ return;
+ sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
+}
+
+int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ if (ip_block->version->funcs->suspend) {
+ r = ip_block->version->funcs->suspend(ip_block);
+ if (r) {
+ dev_err(ip_block->adev->dev,
+ "suspend of IP block <%s> failed %d\n",
+ ip_block->version->funcs->name, r);
+ return r;
+ }
+ }
+
+ ip_block->status.hw = false;
+ return 0;
+}
+
+int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ if (ip_block->version->funcs->resume) {
+ r = ip_block->version->funcs->resume(ip_block);
+ if (r) {
+ dev_err(ip_block->adev->dev,
+ "resume of IP block <%s> failed %d\n",
+ ip_block->version->funcs->name, r);
+ return r;
+ }
+ }
+
+ ip_block->status.hw = true;
+ return 0;
+}
/**
- * DOC: product_number
+ * DOC: board_info
+ *
+ * The amdgpu driver provides a sysfs API for giving board related information.
+ * It provides the form factor information in the format
+ *
+ * type : form factor
+ *
+ * Possible form factor values
+ *
+ * - "cem" - PCIE CEM card
+ * - "oam" - Open Compute Accelerator Module
+ * - "unknown" - Not known
*
- * The amdgpu driver provides a sysfs API for reporting the part number
- * for the device
- * The file serial_number is used for this and returns the part number
- * as returned from the FRU.
- * NOTE: This is only available for certain server cards
*/
-static ssize_t amdgpu_device_get_product_number(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t amdgpu_device_get_board_info(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
+ enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
+ const char *pkg;
+
+ if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
+ pkg_type = adev->smuio.funcs->get_pkg_type(adev);
- return sysfs_emit(buf, "%s\n", adev->product_number);
+ switch (pkg_type) {
+ case AMDGPU_PKG_TYPE_CEM:
+ pkg = "cem";
+ break;
+ case AMDGPU_PKG_TYPE_OAM:
+ pkg = "oam";
+ break;
+ default:
+ pkg = "unknown";
+ break;
+ }
+
+ return sysfs_emit(buf, "%s : %s\n", "type", pkg);
}
-static DEVICE_ATTR(product_number, S_IRUGO,
- amdgpu_device_get_product_number, NULL);
+static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
-/**
- * DOC: serial_number
- *
- * The amdgpu driver provides a sysfs API for reporting the serial number
- * for the device
- * The file serial_number is used for this and returns the serial number
- * as returned from the FRU.
- * NOTE: This is only available for certain server cards
- */
+static struct attribute *amdgpu_board_attrs[] = {
+ &dev_attr_board_info.attr,
+ NULL,
+};
-static ssize_t amdgpu_device_get_serial_number(struct device *dev,
- struct device_attribute *attr, char *buf)
+static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
{
+ struct device *dev = kobj_to_dev(kobj);
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
- return sysfs_emit(buf, "%s\n", adev->serial);
+ if (adev->flags & AMD_IS_APU)
+ return 0;
+
+ return attr->mode;
}
-static DEVICE_ATTR(serial_number, S_IRUGO,
- amdgpu_device_get_serial_number, NULL);
+static const struct attribute_group amdgpu_board_attrs_group = {
+ .attrs = amdgpu_board_attrs,
+ .is_visible = amdgpu_board_attrs_is_visible
+};
+
+static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
/**
* amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
*
- * @dev: drm_device pointer
+ * @adev: amdgpu device pointer
*
* Returns true if the device is a dGPU with ATPX power control,
* otherwise return false.
*/
-bool amdgpu_device_supports_px(struct drm_device *dev)
+bool amdgpu_device_supports_px(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = drm_to_adev(dev);
-
if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
return true;
return false;
@@ -239,14 +437,15 @@ bool amdgpu_device_supports_px(struct drm_device *dev)
/**
* amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
*
- * @dev: drm_device pointer
+ * @adev: amdgpu device pointer
*
* Returns true if the device is a dGPU with ACPI power control,
* otherwise return false.
*/
-bool amdgpu_device_supports_boco(struct drm_device *dev)
+bool amdgpu_device_supports_boco(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = drm_to_adev(dev);
+ if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE))
+ return false;
if (adev->has_pr3 ||
((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
@@ -257,30 +456,105 @@ bool amdgpu_device_supports_boco(struct drm_device *dev)
/**
* amdgpu_device_supports_baco - Does the device support BACO
*
- * @dev: drm_device pointer
+ * @adev: amdgpu device pointer
*
- * Returns true if the device supporte BACO,
- * otherwise return false.
+ * Return:
+ * 1 if the device supports BACO;
+ * 3 if the device supports MACO (only works if BACO is supported)
+ * otherwise return 0.
*/
-bool amdgpu_device_supports_baco(struct drm_device *dev)
+int amdgpu_device_supports_baco(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = drm_to_adev(dev);
-
return amdgpu_asic_supports_baco(adev);
}
+void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
+{
+ int bamaco_support;
+
+ adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
+ bamaco_support = amdgpu_device_supports_baco(adev);
+
+ switch (amdgpu_runtime_pm) {
+ case 2:
+ if (bamaco_support & MACO_SUPPORT) {
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
+ dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
+ } else if (bamaco_support == BACO_SUPPORT) {
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
+ dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
+ }
+ break;
+ case 1:
+ if (bamaco_support & BACO_SUPPORT) {
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
+ dev_info(adev->dev, "Forcing BACO for runtime pm\n");
+ }
+ break;
+ case -1:
+ case -2:
+ if (amdgpu_device_supports_px(adev)) {
+ /* enable PX as runtime mode */
+ adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
+ dev_info(adev->dev, "Using ATPX for runtime pm\n");
+ } else if (amdgpu_device_supports_boco(adev)) {
+ /* enable boco as runtime mode */
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
+ dev_info(adev->dev, "Using BOCO for runtime pm\n");
+ } else {
+ if (!bamaco_support)
+ goto no_runtime_pm;
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ case CHIP_ARCTURUS:
+ /* BACO are not supported on vega20 and arctrus */
+ break;
+ case CHIP_VEGA10:
+ /* enable BACO as runpm mode if noretry=0 */
+ if (!adev->gmc.noretry && !amdgpu_passthrough(adev))
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
+ break;
+ default:
+ /* enable BACO as runpm mode on CI+ */
+ if (!amdgpu_passthrough(adev))
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
+ break;
+ }
+
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
+ if (bamaco_support & MACO_SUPPORT) {
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
+ dev_info(adev->dev, "Using BAMACO for runtime pm\n");
+ } else {
+ dev_info(adev->dev, "Using BACO for runtime pm\n");
+ }
+ }
+ }
+ break;
+ case 0:
+ dev_info(adev->dev, "runtime pm is manually disabled\n");
+ break;
+ default:
+ break;
+ }
+
+no_runtime_pm:
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
+ dev_info(adev->dev, "Runtime PM not available\n");
+}
/**
* amdgpu_device_supports_smart_shift - Is the device dGPU with
* smart shift support
*
- * @dev: drm_device pointer
+ * @adev: amdgpu device pointer
*
* Returns true if the device is a dGPU with Smart Shift support,
* otherwise returns false.
*/
-bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
+bool amdgpu_device_supports_smart_shift(struct amdgpu_device *adev)
{
- return (amdgpu_device_supports_boco(dev) &&
+ return (amdgpu_device_supports_boco(adev) &&
amdgpu_acpi_is_power_shift_control_supported());
}
@@ -331,7 +605,7 @@ void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
}
/**
- * amdgpu_device_vram_access - access vram by vram aperature
+ * amdgpu_device_aper_access - access vram by vram aperture
*
* @adev: amdgpu_device pointer
* @pos: offset of the buffer in vram
@@ -359,10 +633,16 @@ size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
if (write) {
memcpy_toio(addr, buf, count);
+ /* Make sure HDP write cache flush happens without any reordering
+ * after the system memory contents are sent over PCIe device
+ */
mb();
amdgpu_device_flush_hdp(adev, NULL);
} else {
amdgpu_device_invalidate_hdp(adev, NULL);
+ /* Make sure HDP read cache is invalidated before issuing a read
+ * to the PCIe device
+ */
mb();
memcpy_fromio(buf, addr, count);
}
@@ -416,17 +696,17 @@ bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
* here is that the GPU reset is not running on another thread in parallel.
*
* For this we trylock the read side of the reset semaphore, if that succeeds
- * we know that the reset is not running in paralell.
+ * we know that the reset is not running in parallel.
*
* If the trylock fails we assert that we are either already holding the read
* side of the lock or are the reset thread itself and hold the write side of
* the lock.
*/
if (in_task()) {
- if (down_read_trylock(&adev->reset_sem))
- up_read(&adev->reset_sem);
+ if (down_read_trylock(&adev->reset_domain->sem))
+ up_read(&adev->reset_domain->sem);
else
- lockdep_assert_held(&adev->reset_sem);
+ lockdep_assert_held(&adev->reset_domain->sem);
}
#endif
return false;
@@ -452,9 +732,9 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
if ((reg * 4) < adev->rmmio_size) {
if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
amdgpu_sriov_runtime(adev) &&
- down_read_trylock(&adev->reset_sem)) {
- ret = amdgpu_kiq_rreg(adev, reg);
- up_read(&adev->reset_sem);
+ down_read_trylock(&adev->reset_domain->sem)) {
+ ret = amdgpu_kiq_rreg(adev, reg, 0);
+ up_read(&adev->reset_domain->sem);
} else {
ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
}
@@ -470,8 +750,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
/*
* MMIO register read with bytes helper functions
* @offset:bytes offset from MMIO start
- *
-*/
+ */
/**
* amdgpu_mm_rreg8 - read a memory mapped IO register
@@ -491,12 +770,55 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
BUG();
}
+
+/**
+ * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ * @acc_flags: access flags which require special behavior
+ * @xcc_id: xcc accelerated compute core id
+ *
+ * Returns the 32 bit value from the offset specified.
+ */
+uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t acc_flags,
+ uint32_t xcc_id)
+{
+ uint32_t ret, rlcg_flag;
+
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
+ if ((reg * 4) < adev->rmmio_size) {
+ if (amdgpu_sriov_vf(adev) &&
+ !amdgpu_sriov_runtime(adev) &&
+ adev->gfx.rlc.rlcg_reg_access_supported &&
+ amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
+ GC_HWIP, false,
+ &rlcg_flag)) {
+ ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, GET_INST(GC, xcc_id));
+ } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
+ amdgpu_sriov_runtime(adev) &&
+ down_read_trylock(&adev->reset_domain->sem)) {
+ ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
+ up_read(&adev->reset_domain->sem);
+ } else {
+ ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
+ }
+ } else {
+ ret = adev->pcie_rreg(adev, reg * 4);
+ }
+
+ return ret;
+}
+
/*
* MMIO register write with bytes helper functions
* @offset:bytes offset from MMIO start
* @value: the value want to be written to the register
- *
-*/
+ */
+
/**
* amdgpu_mm_wreg8 - read a memory mapped IO register
*
@@ -537,9 +859,9 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
if ((reg * 4) < adev->rmmio_size) {
if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
amdgpu_sriov_runtime(adev) &&
- down_read_trylock(&adev->reset_sem)) {
- amdgpu_kiq_wreg(adev, reg, v);
- up_read(&adev->reset_sem);
+ down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_kiq_wreg(adev, reg, v, 0);
+ up_read(&adev->reset_domain->sem);
} else {
writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
}
@@ -550,13 +872,19 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
}
-/*
- * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range
+/**
+ * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
*
- * this function is invoked only the debugfs register access
- * */
+ * @adev: amdgpu_device pointer
+ * @reg: mmio/rlc register
+ * @v: value to write
+ * @xcc_id: xcc accelerated compute core id
+ *
+ * this function is invoked only for the debugfs register access
+ */
void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
- uint32_t reg, uint32_t v)
+ uint32_t reg, uint32_t v,
+ uint32_t xcc_id)
{
if (amdgpu_device_skip_hw_access(adev))
return;
@@ -565,126 +893,133 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
adev->gfx.rlc.funcs &&
adev->gfx.rlc.funcs->is_rlcg_access_range) {
if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
- return adev->gfx.rlc.funcs->sriov_wreg(adev, reg, v, 0, 0);
+ return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
+ } else if ((reg * 4) >= adev->rmmio_size) {
+ adev->pcie_wreg(adev, reg * 4, v);
} else {
writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
}
}
/**
- * amdgpu_mm_rdoorbell - read a doorbell dword
+ * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
*
* @adev: amdgpu_device pointer
- * @index: doorbell index
+ * @reg: dword aligned register offset
+ * @v: 32 bit value to write to the register
+ * @acc_flags: access flags which require special behavior
+ * @xcc_id: xcc accelerated compute core id
*
- * Returns the value in the doorbell aperture at the
- * requested doorbell index (CIK).
+ * Writes the value specified to the offset specified.
*/
-u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
+void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t v,
+ uint32_t acc_flags, uint32_t xcc_id)
{
- if (amdgpu_device_skip_hw_access(adev))
- return 0;
-
- if (index < adev->doorbell.num_doorbells) {
- return readl(adev->doorbell.ptr + index);
- } else {
- DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
- return 0;
- }
-}
+ uint32_t rlcg_flag;
-/**
- * amdgpu_mm_wdoorbell - write a doorbell dword
- *
- * @adev: amdgpu_device pointer
- * @index: doorbell index
- * @v: value to write
- *
- * Writes @v to the doorbell aperture at the
- * requested doorbell index (CIK).
- */
-void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
-{
if (amdgpu_device_skip_hw_access(adev))
return;
- if (index < adev->doorbell.num_doorbells) {
- writel(v, adev->doorbell.ptr + index);
+ if ((reg * 4) < adev->rmmio_size) {
+ if (amdgpu_sriov_vf(adev) &&
+ !amdgpu_sriov_runtime(adev) &&
+ adev->gfx.rlc.rlcg_reg_access_supported &&
+ amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
+ GC_HWIP, true,
+ &rlcg_flag)) {
+ amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, GET_INST(GC, xcc_id));
+ } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
+ amdgpu_sriov_runtime(adev) &&
+ down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_kiq_wreg(adev, reg, v, xcc_id);
+ up_read(&adev->reset_domain->sem);
+ } else {
+ writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
+ }
} else {
- DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
+ adev->pcie_wreg(adev, reg * 4, v);
}
}
/**
- * amdgpu_mm_rdoorbell64 - read a doorbell Qword
+ * amdgpu_device_indirect_rreg - read an indirect register
*
* @adev: amdgpu_device pointer
- * @index: doorbell index
+ * @reg_addr: indirect register address to read from
*
- * Returns the value in the doorbell aperture at the
- * requested doorbell index (VEGA10+).
+ * Returns the value of indirect register @reg_addr
*/
-u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
+u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
+ u32 reg_addr)
{
- if (amdgpu_device_skip_hw_access(adev))
- return 0;
+ unsigned long flags, pcie_index, pcie_data;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_data_offset;
+ u32 r;
- if (index < adev->doorbell.num_doorbells) {
- return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
- } else {
- DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
- return 0;
- }
-}
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
-/**
- * amdgpu_mm_wdoorbell64 - write a doorbell Qword
- *
- * @adev: amdgpu_device pointer
- * @index: doorbell index
- * @v: value to write
- *
- * Writes @v to the doorbell aperture at the
- * requested doorbell index (VEGA10+).
- */
-void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
-{
- if (amdgpu_device_skip_hw_access(adev))
- return;
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
- if (index < adev->doorbell.num_doorbells) {
- atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
- } else {
- DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
- }
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ r = readl(pcie_data_offset);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+
+ return r;
}
-/**
- * amdgpu_device_indirect_rreg - read an indirect register
- *
- * @adev: amdgpu_device pointer
- * @pcie_index: mmio register offset
- * @pcie_data: mmio register offset
- * @reg_addr: indirect register address to read from
- *
- * Returns the value of indirect register @reg_addr
- */
-u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
- u32 pcie_index, u32 pcie_data,
- u32 reg_addr)
+u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
+ u64 reg_addr)
{
- unsigned long flags;
+ unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
u32 r;
void __iomem *pcie_index_offset;
+ void __iomem *pcie_index_hi_offset;
void __iomem *pcie_data_offset;
+ if (unlikely(!adev->nbio.funcs)) {
+ pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
+ pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
+ } else {
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+ }
+
+ if (reg_addr >> 32) {
+ if (unlikely(!adev->nbio.funcs))
+ pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
+ else
+ pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
+ } else {
+ pcie_index_hi = 0;
+ }
+
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+ if (pcie_index_hi != 0)
+ pcie_index_hi_offset = (void __iomem *)adev->rmmio +
+ pcie_index_hi * 4;
writel(reg_addr, pcie_index_offset);
readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
r = readl(pcie_data_offset);
+
+ /* clear the high bits */
+ if (pcie_index_hi != 0) {
+ writel(0, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
return r;
@@ -694,33 +1029,83 @@ u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
* amdgpu_device_indirect_rreg64 - read a 64bits indirect register
*
* @adev: amdgpu_device pointer
- * @pcie_index: mmio register offset
- * @pcie_data: mmio register offset
* @reg_addr: indirect register address to read from
*
* Returns the value of indirect register @reg_addr
*/
u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
- u32 pcie_index, u32 pcie_data,
u32 reg_addr)
{
- unsigned long flags;
+ unsigned long flags, pcie_index, pcie_data;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_data_offset;
u64 r;
+
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+
+ /* read low 32 bits */
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ r = readl(pcie_data_offset);
+ /* read high 32 bits */
+ writel(reg_addr + 4, pcie_index_offset);
+ readl(pcie_index_offset);
+ r |= ((u64)readl(pcie_data_offset) << 32);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+
+ return r;
+}
+
+u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
+ u64 reg_addr)
+{
+ unsigned long flags, pcie_index, pcie_data;
+ unsigned long pcie_index_hi = 0;
void __iomem *pcie_index_offset;
+ void __iomem *pcie_index_hi_offset;
void __iomem *pcie_data_offset;
+ u64 r;
+
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+ if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
+ pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+ if (pcie_index_hi != 0)
+ pcie_index_hi_offset = (void __iomem *)adev->rmmio +
+ pcie_index_hi * 4;
/* read low 32 bits */
writel(reg_addr, pcie_index_offset);
readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
r = readl(pcie_data_offset);
/* read high 32 bits */
writel(reg_addr + 4, pcie_index_offset);
readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
r |= ((u64)readl(pcie_data_offset) << 32);
+
+ /* clear the high bits */
+ if (pcie_index_hi != 0) {
+ writel(0, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
return r;
@@ -730,20 +1115,20 @@ u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
* amdgpu_device_indirect_wreg - write an indirect register address
*
* @adev: amdgpu_device pointer
- * @pcie_index: mmio register offset
- * @pcie_data: mmio register offset
* @reg_addr: indirect register offset
* @reg_data: indirect register data
*
*/
void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
- u32 pcie_index, u32 pcie_data,
u32 reg_addr, u32 reg_data)
{
- unsigned long flags;
+ unsigned long flags, pcie_index, pcie_data;
void __iomem *pcie_index_offset;
void __iomem *pcie_data_offset;
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
@@ -755,24 +1140,64 @@ void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
}
+void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
+ u64 reg_addr, u32 reg_data)
+{
+ unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_index_hi_offset;
+ void __iomem *pcie_data_offset;
+
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+ if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
+ pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
+ else
+ pcie_index_hi = 0;
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+ if (pcie_index_hi != 0)
+ pcie_index_hi_offset = (void __iomem *)adev->rmmio +
+ pcie_index_hi * 4;
+
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+ writel(reg_data, pcie_data_offset);
+ readl(pcie_data_offset);
+
+ /* clear the high bits */
+ if (pcie_index_hi != 0) {
+ writel(0, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
/**
* amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
*
* @adev: amdgpu_device pointer
- * @pcie_index: mmio register offset
- * @pcie_data: mmio register offset
* @reg_addr: indirect register offset
* @reg_data: indirect register data
*
*/
void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
- u32 pcie_index, u32 pcie_data,
u32 reg_addr, u64 reg_data)
{
- unsigned long flags;
+ unsigned long flags, pcie_index, pcie_data;
void __iomem *pcie_index_offset;
void __iomem *pcie_data_offset;
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
@@ -790,6 +1215,67 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
}
+void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
+ u64 reg_addr, u64 reg_data)
+{
+ unsigned long flags, pcie_index, pcie_data;
+ unsigned long pcie_index_hi = 0;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_index_hi_offset;
+ void __iomem *pcie_data_offset;
+
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+ if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
+ pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+ if (pcie_index_hi != 0)
+ pcie_index_hi_offset = (void __iomem *)adev->rmmio +
+ pcie_index_hi * 4;
+
+ /* write low 32 bits */
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+ writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
+ readl(pcie_data_offset);
+ /* write high 32 bits */
+ writel(reg_addr + 4, pcie_index_offset);
+ readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+ writel((u32)(reg_data >> 32), pcie_data_offset);
+ readl(pcie_data_offset);
+
+ /* clear the high bits */
+ if (pcie_index_hi != 0) {
+ writel(0, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+/**
+ * amdgpu_device_get_rev_id - query device rev_id
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Return device rev_id
+ */
+u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
+{
+ return adev->nbio.funcs->get_rev_id(adev);
+}
+
/**
* amdgpu_invalid_rreg - dummy reg read function
*
@@ -802,7 +1288,14 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
*/
static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
{
- DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
+ dev_err(adev->dev, "Invalid callback to read register 0x%04X\n", reg);
+ BUG();
+ return 0;
+}
+
+static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
+{
+ dev_err(adev->dev, "Invalid callback to read register 0x%llX\n", reg);
BUG();
return 0;
}
@@ -819,8 +1312,17 @@ static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
*/
static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
{
- DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
- reg, v);
+ dev_err(adev->dev,
+ "Invalid callback to write register 0x%04X with 0x%08X\n", reg,
+ v);
+ BUG();
+}
+
+static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
+{
+ dev_err(adev->dev,
+ "Invalid callback to write register 0x%llX with 0x%08X\n", reg,
+ v);
BUG();
}
@@ -836,7 +1338,15 @@ static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32
*/
static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
{
- DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
+ dev_err(adev->dev, "Invalid callback to read 64 bit register 0x%04X\n",
+ reg);
+ BUG();
+ return 0;
+}
+
+static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
+{
+ dev_err(adev->dev, "Invalid callback to read register 0x%llX\n", reg);
BUG();
return 0;
}
@@ -853,8 +1363,17 @@ static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
*/
static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
{
- DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
- reg, v);
+ dev_err(adev->dev,
+ "Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
+ reg, v);
+ BUG();
+}
+
+static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
+{
+ dev_err(adev->dev,
+ "Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
+ reg, v);
BUG();
}
@@ -872,8 +1391,9 @@ static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint
static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
uint32_t block, uint32_t reg)
{
- DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
- reg, block);
+ dev_err(adev->dev,
+ "Invalid callback to read register 0x%04X in block 0x%04X\n",
+ reg, block);
BUG();
return 0;
}
@@ -893,11 +1413,23 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
uint32_t block,
uint32_t reg, uint32_t v)
{
- DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
- reg, block, v);
+ dev_err(adev->dev,
+ "Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
+ reg, block, v);
BUG();
}
+static uint32_t amdgpu_device_get_vbios_flags(struct amdgpu_device *adev)
+{
+ if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
+ return AMDGPU_VBIOS_SKIP;
+
+ if (hweight32(adev->aid_mask) && amdgpu_passthrough(adev))
+ return AMDGPU_VBIOS_OPTIONAL;
+
+ return 0;
+}
+
/**
* amdgpu_device_asic_init - Wrapper for atom asic_init
*
@@ -907,38 +1439,62 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
*/
static int amdgpu_device_asic_init(struct amdgpu_device *adev)
{
+ uint32_t flags;
+ bool optional;
+ int ret;
+
amdgpu_asic_pre_asic_init(adev);
+ flags = amdgpu_device_get_vbios_flags(adev);
+ optional = !!(flags & (AMDGPU_VBIOS_OPTIONAL | AMDGPU_VBIOS_SKIP));
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
+ amdgpu_psp_wait_for_bootloader(adev);
+ if (optional && !adev->bios)
+ return 0;
- return amdgpu_atom_asic_init(adev->mode_info.atom_context);
+ ret = amdgpu_atomfirmware_asic_init(adev, true);
+ return ret;
+ } else {
+ if (optional && !adev->bios)
+ return 0;
+
+ return amdgpu_atom_asic_init(adev->mode_info.atom_context);
+ }
+
+ return 0;
}
/**
- * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
+ * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
*
* @adev: amdgpu_device pointer
*
* Allocates a scratch page of VRAM for use by various things in the
* driver.
*/
-static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
+static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
{
- return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
- &adev->vram_scratch.robj,
- &adev->vram_scratch.gpu_addr,
- (void **)&adev->vram_scratch.ptr);
+ return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mem_scratch.robj,
+ &adev->mem_scratch.gpu_addr,
+ (void **)&adev->mem_scratch.ptr);
}
/**
- * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
+ * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
*
* @adev: amdgpu_device pointer
*
* Frees the VRAM scratch page.
*/
-static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
+static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
{
- amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
}
/**
@@ -948,7 +1504,7 @@ static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
* @registers: pointer to the register array
* @array_size: size of the register array
*
- * Programs an array or registers with and and or masks.
+ * Programs an array or registers with and or masks.
* This is a helper for setting golden registers.
*/
void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
@@ -961,7 +1517,7 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
if (array_size % 3)
return;
- for (i = 0; i < array_size; i +=3) {
+ for (i = 0; i < array_size; i += 3) {
reg = registers[i + 0];
and_mask = registers[i + 1];
or_mask = registers[i + 2];
@@ -1006,76 +1562,6 @@ int amdgpu_device_pci_reset(struct amdgpu_device *adev)
}
/*
- * GPU doorbell aperture helpers function.
- */
-/**
- * amdgpu_device_doorbell_init - Init doorbell driver information.
- *
- * @adev: amdgpu_device pointer
- *
- * Init doorbell driver information (CIK)
- * Returns 0 on success, error on failure.
- */
-static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
-{
-
- /* No doorbell on SI hardware generation */
- if (adev->asic_type < CHIP_BONAIRE) {
- adev->doorbell.base = 0;
- adev->doorbell.size = 0;
- adev->doorbell.num_doorbells = 0;
- adev->doorbell.ptr = NULL;
- return 0;
- }
-
- if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
- return -EINVAL;
-
- amdgpu_asic_init_doorbell_index(adev);
-
- /* doorbell bar mapping */
- adev->doorbell.base = pci_resource_start(adev->pdev, 2);
- adev->doorbell.size = pci_resource_len(adev->pdev, 2);
-
- adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
- adev->doorbell_index.max_assignment+1);
- if (adev->doorbell.num_doorbells == 0)
- return -EINVAL;
-
- /* For Vega, reserve and map two pages on doorbell BAR since SDMA
- * paging queue doorbell use the second page. The
- * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
- * doorbells are in the first page. So with paging queue enabled,
- * the max num_doorbells should + 1 page (0x400 in dword)
- */
- if (adev->asic_type >= CHIP_VEGA10)
- adev->doorbell.num_doorbells += 0x400;
-
- adev->doorbell.ptr = ioremap(adev->doorbell.base,
- adev->doorbell.num_doorbells *
- sizeof(u32));
- if (adev->doorbell.ptr == NULL)
- return -ENOMEM;
-
- return 0;
-}
-
-/**
- * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
- *
- * @adev: amdgpu_device pointer
- *
- * Tear down doorbell driver information (CIK)
- */
-static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
-{
- iounmap(adev->doorbell.ptr);
- adev->doorbell.ptr = NULL;
-}
-
-
-
-/*
* amdgpu_device_wb_*()
* Writeback is the method by which the GPU updates special pages in memory
* with the status of certain GPU events (fences, ring pointers,etc.).
@@ -1100,7 +1586,7 @@ static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
}
/**
- * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
+ * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
*
* @adev: amdgpu_device pointer
*
@@ -1144,13 +1630,17 @@ static int amdgpu_device_wb_init(struct amdgpu_device *adev)
*/
int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
{
- unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
+ unsigned long flags, offset;
+ spin_lock_irqsave(&adev->wb.lock, flags);
+ offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
if (offset < adev->wb.num_wb) {
__set_bit(offset, adev->wb.used);
+ spin_unlock_irqrestore(&adev->wb.lock, flags);
*wb = offset << 3; /* convert to dw offset */
return 0;
} else {
+ spin_unlock_irqrestore(&adev->wb.lock, flags);
return -EINVAL;
}
}
@@ -1165,9 +1655,13 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
*/
void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
{
+ unsigned long flags;
+
wb >>= 3;
+ spin_lock_irqsave(&adev->wb.lock, flags);
if (wb < adev->wb.num_wb)
__clear_bit(wb, adev->wb.used);
+ spin_unlock_irqrestore(&adev->wb.lock, flags);
}
/**
@@ -1184,14 +1678,33 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
struct pci_bus *root;
struct resource *res;
- unsigned i;
+ int max_size, r;
+ unsigned int i;
u16 cmd;
- int r;
+
+ if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
+ return 0;
/* Bypass for VF */
if (amdgpu_sriov_vf(adev))
return 0;
+ if (!amdgpu_rebar)
+ return 0;
+
+ /* resizing on Dell G5 SE platforms causes problems with runtime pm */
+ if ((amdgpu_runtime_pm != 0) &&
+ adev->pdev->vendor == PCI_VENDOR_ID_ATI &&
+ adev->pdev->device == 0x731f &&
+ adev->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL)
+ return 0;
+
+ /* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
+ if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
+ dev_warn(
+ adev->dev,
+ "System can't access extended configuration space, please check!!\n");
+
/* skip if the bios has already enabled large BAR */
if (adev->gmc.real_vram_size &&
(pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
@@ -1213,33 +1726,32 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
return 0;
/* Limit the BAR size to what is available */
- rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
- rbar_size);
+ max_size = pci_rebar_get_max_size(adev->pdev, 0);
+ if (max_size < 0)
+ return 0;
+ rbar_size = min(max_size, rbar_size);
/* Disable memory decoding while we change the BAR addresses and size */
pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
pci_write_config_word(adev->pdev, PCI_COMMAND,
cmd & ~PCI_COMMAND_MEMORY);
- /* Free the VRAM and doorbell BAR, we most likely need to move both. */
- amdgpu_device_doorbell_fini(adev);
- if (adev->asic_type >= CHIP_BONAIRE)
- pci_release_resource(adev->pdev, 2);
-
- pci_release_resource(adev->pdev, 0);
+ /* Tear down doorbell as resizing will release BARs */
+ amdgpu_doorbell_fini(adev);
- r = pci_resize_resource(adev->pdev, 0, rbar_size);
+ r = pci_resize_resource(adev->pdev, 0, rbar_size,
+ (adev->asic_type >= CHIP_BONAIRE) ? 1 << 5
+ : 1 << 2);
if (r == -ENOSPC)
- DRM_INFO("Not enough PCI address space for a large BAR.");
+ dev_info(adev->dev,
+ "Not enough PCI address space for a large BAR.");
else if (r && r != -ENOTSUPP)
- DRM_ERROR("Problem resizing BAR0 (%d).", r);
-
- pci_assign_unassigned_bus_resources(adev->pdev->bus);
+ dev_err(adev->dev, "Problem resizing BAR0 (%d).", r);
/* When the doorbell or fb BAR isn't available we have no chance of
* using the device.
*/
- r = amdgpu_device_doorbell_init(adev);
+ r = amdgpu_doorbell_init(adev);
if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
return -ENODEV;
@@ -1262,11 +1774,17 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
*/
bool amdgpu_device_need_post(struct amdgpu_device *adev)
{
- uint32_t reg;
+ uint32_t reg, flags;
if (amdgpu_sriov_vf(adev))
return false;
+ flags = amdgpu_device_get_vbios_flags(adev);
+ if (flags & AMDGPU_VBIOS_SKIP)
+ return false;
+ if ((flags & AMDGPU_VBIOS_OPTIONAL) && !adev->bios)
+ return false;
+
if (amdgpu_passthrough(adev)) {
/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
* some old smc fw still need driver do vPost otherwise gpu hang, while
@@ -1276,19 +1794,21 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
if (adev->asic_type == CHIP_FIJI) {
int err;
uint32_t fw_ver;
+
err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
- /* force vPost if error occured */
+ /* force vPost if error occurred */
if (err)
return true;
fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
+ release_firmware(adev->pm.fw);
if (fw_ver < 0x00160e00)
return true;
}
}
/* Don't post if we need to reset whole hive on init */
- if (adev->gmc.xgmi.pending_reset)
+ if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
return false;
if (adev->has_hw_reset) {
@@ -1309,6 +1829,125 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
return true;
}
+/*
+ * Check whether seamless boot is supported.
+ *
+ * So far we only support seamless boot on DCE 3.0 or later.
+ * If users report that it works on older ASICS as well, we may
+ * loosen this.
+ */
+bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
+{
+ switch (amdgpu_seamless) {
+ case -1:
+ break;
+ case 1:
+ return true;
+ case 0:
+ return false;
+ default:
+ dev_err(adev->dev, "Invalid value for amdgpu.seamless: %d\n",
+ amdgpu_seamless);
+ return false;
+ }
+
+ if (!(adev->flags & AMD_IS_APU))
+ return false;
+
+ if (adev->mman.keep_stolen_vga_memory)
+ return false;
+
+ return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
+}
+
+/*
+ * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
+ * don't support dynamic speed switching. Until we have confirmation from Intel
+ * that a specific host supports it, it's safer that we keep it disabled for all.
+ *
+ * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
+ * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
+ */
+static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
+{
+#if IS_ENABLED(CONFIG_X86)
+ struct cpuinfo_x86 *c = &cpu_data(0);
+
+ /* eGPU change speeds based on USB4 fabric conditions */
+ if (dev_is_removable(adev->dev))
+ return true;
+
+ if (c->x86_vendor == X86_VENDOR_INTEL)
+ return false;
+#endif
+ return true;
+}
+
+static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev)
+{
+ /* Enabling ASPM causes randoms hangs on Tahiti and Oland on Zen4.
+ * It's unclear if this is a platform-specific or GPU-specific issue.
+ * Disable ASPM on SI for the time being.
+ */
+ if (adev->family == AMDGPU_FAMILY_SI)
+ return true;
+
+#if IS_ENABLED(CONFIG_X86)
+ struct cpuinfo_x86 *c = &cpu_data(0);
+
+ if (!(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1)))
+ return false;
+
+ if (c->x86 == 6 &&
+ adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5) {
+ switch (c->x86_model) {
+ case VFM_MODEL(INTEL_ALDERLAKE):
+ case VFM_MODEL(INTEL_ALDERLAKE_L):
+ case VFM_MODEL(INTEL_RAPTORLAKE):
+ case VFM_MODEL(INTEL_RAPTORLAKE_P):
+ case VFM_MODEL(INTEL_RAPTORLAKE_S):
+ return true;
+ default:
+ return false;
+ }
+ } else {
+ return false;
+ }
+#else
+ return false;
+#endif
+}
+
+/**
+ * amdgpu_device_should_use_aspm - check if the device should program ASPM
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Confirm whether the module parameter and pcie bridge agree that ASPM should
+ * be set for this device.
+ *
+ * Returns true if it should be used or false if not.
+ */
+bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
+{
+ switch (amdgpu_aspm) {
+ case -1:
+ break;
+ case 0:
+ return false;
+ case 1:
+ return true;
+ default:
+ return false;
+ }
+ if (adev->flags & AMD_IS_APU)
+ return false;
+ if (amdgpu_device_aspm_support_quirk(adev))
+ return false;
+ return pcie_aspm_enabled(adev->pdev);
+}
+
/* if we get transitioned to only one device, take VGA back */
/**
* amdgpu_device_vga_set_decode - enable/disable vga decode
@@ -1323,6 +1962,7 @@ static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
bool state)
{
struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
+
amdgpu_asic_set_vga_state(adev, state);
if (state)
return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
@@ -1345,7 +1985,8 @@ static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
{
/* defines number of bits in page table versus page directory,
* a page is 4KB so we have 12 bits offset, minimum 9 bits in the
- * page table and the remaining bits are in the page directory */
+ * page table and the remaining bits are in the page directory
+ */
if (amdgpu_vm_block_size == -1)
return;
@@ -1389,7 +2030,7 @@ static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
return;
if (!is_os_64) {
- DRM_WARN("Not 64-bit OS, feature not supported\n");
+ dev_warn(adev->dev, "Not 64-bit OS, feature not supported\n");
goto def_value;
}
si_meminfo(&si);
@@ -1404,7 +2045,7 @@ static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
if (total_memory < dram_size_seven_GB)
goto def_value1;
} else {
- DRM_WARN("Smu memory pool size not supported\n");
+ dev_warn(adev->dev, "Smu memory pool size not supported\n");
goto def_value;
}
adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
@@ -1412,7 +2053,7 @@ static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
return;
def_value1:
- DRM_WARN("No enough system memory\n");
+ dev_warn(adev->dev, "No enough system memory\n");
def_value:
adev->pm.smu_prv_buffer_size = 0;
}
@@ -1443,11 +2084,12 @@ static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
case CHIP_YELLOW_CARP:
break;
case CHIP_CYAN_SKILLFISH:
- if (adev->pdev->device == 0x13FE)
+ if ((adev->pdev->device == 0x13FE) ||
+ (adev->pdev->device == 0x143F))
adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
break;
default:
- return -EINVAL;
+ break;
}
return 0;
@@ -1463,11 +2105,13 @@ static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
*/
static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
{
+ int i;
+
if (amdgpu_sched_jobs < 4) {
dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
amdgpu_sched_jobs);
amdgpu_sched_jobs = 4;
- } else if (!is_power_of_2(amdgpu_sched_jobs)){
+ } else if (!is_power_of_2(amdgpu_sched_jobs)) {
dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
amdgpu_sched_jobs);
amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
@@ -1504,6 +2148,11 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
}
+ if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
+ dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
+ amdgpu_reset_method = -1;
+ }
+
amdgpu_device_check_smu_prv_buffer_size(adev);
amdgpu_device_check_vm_size(adev);
@@ -1512,9 +2161,31 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
- amdgpu_gmc_tmz_set(adev);
-
- amdgpu_gmc_noretry_set(adev);
+ for (i = 0; i < MAX_XCP; i++) {
+ switch (amdgpu_enforce_isolation) {
+ case -1:
+ case 0:
+ default:
+ /* disable */
+ adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE;
+ break;
+ case 1:
+ /* enable */
+ adev->enforce_isolation[i] =
+ AMDGPU_ENFORCE_ISOLATION_ENABLE;
+ break;
+ case 2:
+ /* enable legacy mode */
+ adev->enforce_isolation[i] =
+ AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
+ break;
+ case 3:
+ /* enable only process isolation without submitting cleaner shader */
+ adev->enforce_isolation[i] =
+ AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER;
+ break;
+ }
+ }
return 0;
}
@@ -1525,7 +2196,7 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
* @pdev: pci dev pointer
* @state: vga_switcheroo state
*
- * Callback for the switcheroo driver. Suspends or resumes the
+ * Callback for the switcheroo driver. Suspends or resumes
* the asics before or after it is powered up using ACPI methods.
*/
static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
@@ -1534,7 +2205,8 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
struct drm_device *dev = pci_get_drvdata(pdev);
int r;
- if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
+ if (amdgpu_device_supports_px(drm_to_adev(dev)) &&
+ state == VGA_SWITCHEROO_OFF)
return;
if (state == VGA_SWITCHEROO_ON) {
@@ -1546,13 +2218,15 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
amdgpu_device_load_pci_state(pdev);
r = pci_enable_device(pdev);
if (r)
- DRM_WARN("pci_enable_device failed (%d)\n", r);
+ dev_warn(&pdev->dev, "pci_enable_device failed (%d)\n",
+ r);
amdgpu_device_resume(dev, true);
dev->switch_power_state = DRM_SWITCH_POWER_ON;
} else {
- pr_info("switched off\n");
+ dev_info(&pdev->dev, "switched off\n");
dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
+ amdgpu_device_prepare(dev);
amdgpu_device_suspend(dev, true);
amdgpu_device_cache_pci_state(pdev);
/* Shut down the device */
@@ -1575,7 +2249,7 @@ static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
{
struct drm_device *dev = pci_get_drvdata(pdev);
- /*
+ /*
* FIXME: open_count is protected by drm_global_mutex but that would lead to
* locking inversion with the driver load path. And the access here is
* completely racy anyway. So don't bother with locking for now.
@@ -1615,10 +2289,11 @@ int amdgpu_device_ip_set_clockgating_state(void *dev,
if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
continue;
r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
- (void *)adev, state);
+ &adev->ip_blocks[i], state);
if (r)
- DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_err(adev->dev,
+ "set_clockgating_state of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
}
return r;
}
@@ -1649,10 +2324,11 @@ int amdgpu_device_ip_set_powergating_state(void *dev,
if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
continue;
r = adev->ip_blocks[i].version->funcs->set_powergating_state(
- (void *)adev, state);
+ &adev->ip_blocks[i], state);
if (r)
- DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_err(adev->dev,
+ "set_powergating_state of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
}
return r;
}
@@ -1669,7 +2345,7 @@ int amdgpu_device_ip_set_powergating_state(void *dev,
* clockgating is enabled.
*/
void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
int i;
@@ -1677,7 +2353,8 @@ void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
if (!adev->ip_blocks[i].status.valid)
continue;
if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
- adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
+ adev->ip_blocks[i].version->funcs->get_clockgating_state(
+ &adev->ip_blocks[i], flags);
}
}
@@ -1699,9 +2376,12 @@ int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
if (!adev->ip_blocks[i].status.valid)
continue;
if (adev->ip_blocks[i].version->type == block_type) {
- r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
- if (r)
- return r;
+ if (adev->ip_blocks[i].version->funcs->wait_for_idle) {
+ r = adev->ip_blocks[i].version->funcs->wait_for_idle(
+ &adev->ip_blocks[i]);
+ if (r)
+ return r;
+ }
break;
}
}
@@ -1710,26 +2390,45 @@ int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
}
/**
- * amdgpu_device_ip_is_idle - is the hardware IP idle
+ * amdgpu_device_ip_is_hw - is the hardware IP enabled
*
* @adev: amdgpu_device pointer
* @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
*
- * Check if the hardware IP is idle or not.
- * Returns true if it the IP is idle, false if not.
+ * Check if the hardware IP is enable or not.
+ * Returns true if it the IP is enable, false if not.
*/
-bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
- enum amd_ip_block_type block_type)
+bool amdgpu_device_ip_is_hw(struct amdgpu_device *adev,
+ enum amd_ip_block_type block_type)
{
int i;
for (i = 0; i < adev->num_ip_blocks; i++) {
- if (!adev->ip_blocks[i].status.valid)
- continue;
if (adev->ip_blocks[i].version->type == block_type)
- return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
+ return adev->ip_blocks[i].status.hw;
}
- return true;
+ return false;
+}
+
+/**
+ * amdgpu_device_ip_is_valid - is the hardware IP valid
+ *
+ * @adev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ *
+ * Check if the hardware IP is valid or not.
+ * Returns true if it the IP is valid, false if not.
+ */
+bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev,
+ enum amd_ip_block_type block_type)
+{
+ int i;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (adev->ip_blocks[i].version->type == block_type)
+ return adev->ip_blocks[i].status.valid;
+ }
+ return false;
}
@@ -1780,6 +2479,34 @@ int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
return 1;
}
+static const char *ip_block_names[] = {
+ [AMD_IP_BLOCK_TYPE_COMMON] = "common",
+ [AMD_IP_BLOCK_TYPE_GMC] = "gmc",
+ [AMD_IP_BLOCK_TYPE_IH] = "ih",
+ [AMD_IP_BLOCK_TYPE_SMC] = "smu",
+ [AMD_IP_BLOCK_TYPE_PSP] = "psp",
+ [AMD_IP_BLOCK_TYPE_DCE] = "dce",
+ [AMD_IP_BLOCK_TYPE_GFX] = "gfx",
+ [AMD_IP_BLOCK_TYPE_SDMA] = "sdma",
+ [AMD_IP_BLOCK_TYPE_UVD] = "uvd",
+ [AMD_IP_BLOCK_TYPE_VCE] = "vce",
+ [AMD_IP_BLOCK_TYPE_ACP] = "acp",
+ [AMD_IP_BLOCK_TYPE_VCN] = "vcn",
+ [AMD_IP_BLOCK_TYPE_MES] = "mes",
+ [AMD_IP_BLOCK_TYPE_JPEG] = "jpeg",
+ [AMD_IP_BLOCK_TYPE_VPE] = "vpe",
+ [AMD_IP_BLOCK_TYPE_UMSCH_MM] = "umsch_mm",
+ [AMD_IP_BLOCK_TYPE_ISP] = "isp",
+ [AMD_IP_BLOCK_TYPE_RAS] = "ras",
+};
+
+static const char *ip_block_name(struct amdgpu_device *adev, enum amd_ip_block_type type)
+{
+ int idx = (int)type;
+
+ return idx < ARRAY_SIZE(ip_block_names) ? ip_block_names[idx] : "unknown";
+}
+
/**
* amdgpu_device_ip_block_add
*
@@ -1808,8 +2535,15 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
break;
}
- DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
- ip_block_version->funcs->name);
+ dev_info(adev->dev, "detected ip block number %d <%s_v%d_%d_%d> (%s)\n",
+ adev->num_ip_blocks,
+ ip_block_name(adev, ip_block_version->type),
+ ip_block_version->major,
+ ip_block_version->minor,
+ ip_block_version->rev,
+ ip_block_version->funcs->name);
+
+ adev->ip_blocks[adev->num_ip_blocks].adev = adev;
adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
@@ -1825,7 +2559,7 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
* the module parameter virtual_display. This feature provides a virtual
* display hardware on headless boards or in virtualized environments.
* This function parses and validates the configuration string specified by
- * the user and configues the virtual display configuration (number of
+ * the user and configures the virtual display configuration (number of
* virtual connectors, crtcs, etc.) specified.
*/
static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
@@ -1864,75 +2598,46 @@ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
}
}
- DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
- amdgpu_virtual_display, pci_address_name,
- adev->enable_virtual_display, adev->mode_info.num_crtc);
+ dev_info(
+ adev->dev,
+ "virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
+ amdgpu_virtual_display, pci_address_name,
+ adev->enable_virtual_display, adev->mode_info.num_crtc);
kfree(pciaddstr);
}
}
+void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
+ adev->mode_info.num_crtc = 1;
+ adev->enable_virtual_display = true;
+ dev_info(adev->dev, "virtual_display:%d, num_crtc:%d\n",
+ adev->enable_virtual_display,
+ adev->mode_info.num_crtc);
+ }
+}
+
/**
* amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
*
* @adev: amdgpu_device pointer
*
* Parses the asic configuration parameters specified in the gpu info
- * firmware and makes them availale to the driver for use in configuring
+ * firmware and makes them available to the driver for use in configuring
* the asic.
* Returns 0 on success, -EINVAL on failure.
*/
static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[40];
int err;
const struct gpu_info_firmware_header_v1_0 *hdr;
adev->firmware.gpu_info_fw = NULL;
- if (adev->mman.discovery_bin) {
- amdgpu_discovery_get_gfx_info(adev);
-
- /*
- * FIXME: The bounding box is still needed by Navi12, so
- * temporarily read it from gpu_info firmware. Should be droped
- * when DAL no longer needs it.
- */
- if (adev->asic_type != CHIP_NAVI12)
- return 0;
- }
-
switch (adev->asic_type) {
-#ifdef CONFIG_DRM_AMDGPU_SI
- case CHIP_VERDE:
- case CHIP_TAHITI:
- case CHIP_PITCAIRN:
- case CHIP_OLAND:
- case CHIP_HAINAN:
-#endif
-#ifdef CONFIG_DRM_AMDGPU_CIK
- case CHIP_BONAIRE:
- case CHIP_HAWAII:
- case CHIP_KAVERI:
- case CHIP_KABINI:
- case CHIP_MULLINS:
-#endif
- case CHIP_TOPAZ:
- case CHIP_TONGA:
- case CHIP_FIJI:
- case CHIP_POLARIS10:
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- case CHIP_VEGAM:
- case CHIP_CARRIZO:
- case CHIP_STONEY:
- case CHIP_VEGA20:
- case CHIP_ALDEBARAN:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_DIMGREY_CAVEFISH:
- case CHIP_BEIGE_GOBY:
default:
return 0;
case CHIP_VEGA10:
@@ -1952,42 +2657,25 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
case CHIP_ARCTURUS:
chip_name = "arcturus";
break;
- case CHIP_RENOIR:
- if (adev->apu_flags & AMD_APU_IS_RENOIR)
- chip_name = "renoir";
- else
- chip_name = "green_sardine";
- break;
- case CHIP_NAVI10:
- chip_name = "navi10";
- break;
- case CHIP_NAVI14:
- chip_name = "navi14";
- break;
case CHIP_NAVI12:
+ if (adev->discovery.bin)
+ return 0;
chip_name = "navi12";
break;
- case CHIP_VANGOGH:
- chip_name = "vangogh";
- break;
- case CHIP_YELLOW_CARP:
- chip_name = "yellow_carp";
+ case CHIP_CYAN_SKILLFISH:
+ if (adev->discovery.bin)
+ return 0;
+ chip_name = "cyan_skillfish";
break;
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
- err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw,
+ AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s_gpu_info.bin", chip_name);
if (err) {
dev_err(adev->dev,
- "Failed to load gpu_info firmware \"%s\"\n",
- fw_name);
- goto out;
- }
- err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
- if (err) {
- dev_err(adev->dev,
- "Failed to validate gpu_info firmware \"%s\"\n",
- fw_name);
+ "Failed to get gpu_info firmware \"%s_gpu_info.bin\"\n",
+ chip_name);
goto out;
}
@@ -2002,7 +2690,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
/*
- * Should be droped when DAL no longer needs it.
+ * Should be dropped when DAL no longer needs it.
*/
if (adev->asic_type == CHIP_NAVI12)
goto parse_soc_bounding_box;
@@ -2058,6 +2746,24 @@ out:
return err;
}
+static void amdgpu_uid_init(struct amdgpu_device *adev)
+{
+ /* Initialize the UID for the device */
+ adev->uid_info = kzalloc(sizeof(struct amdgpu_uid), GFP_KERNEL);
+ if (!adev->uid_info) {
+ dev_warn(adev->dev, "Failed to allocate memory for UID\n");
+ return;
+ }
+ adev->uid_info->adev = adev;
+}
+
+static void amdgpu_uid_fini(struct amdgpu_device *adev)
+{
+ /* Free the UID memory */
+ kfree(adev->uid_info);
+ adev->uid_info = NULL;
+}
+
/**
* amdgpu_device_ip_early_init - run early init for hardware IPs
*
@@ -2070,6 +2776,10 @@ out:
*/
static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
{
+ struct amdgpu_ip_block *ip_block;
+ struct pci_dev *parent;
+ bool total, skip_bios;
+ uint32_t bios_flags;
int i, r;
amdgpu_device_enable_virtual_display(adev);
@@ -2078,6 +2788,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
r = amdgpu_virt_request_full_gpu(adev, true);
if (r)
return r;
+
+ r = amdgpu_virt_init_critical_region(adev);
+ if (r)
+ return r;
}
switch (adev->asic_type) {
@@ -2134,34 +2848,62 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
break;
}
- amdgpu_amdkfd_device_probe(adev);
+ /* Check for IP version 9.4.3 with A0 hardware */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) &&
+ !amdgpu_device_get_rev_id(adev)) {
+ dev_err(adev->dev, "Unsupported A0 hardware\n");
+ return -ENODEV; /* device unsupported - no device error */
+ }
+
+ if (amdgpu_has_atpx() &&
+ (amdgpu_is_atpx_hybrid() ||
+ amdgpu_has_atpx_dgpu_power_cntl()) &&
+ ((adev->flags & AMD_IS_APU) == 0) &&
+ !dev_is_removable(&adev->pdev->dev))
+ adev->flags |= AMD_IS_PX;
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ parent = pcie_find_root_port(adev->pdev);
+ adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
+ }
adev->pm.pp_feature = amdgpu_pp_feature_mask;
if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
+ if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
+ adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
+
+ adev->virt.is_xgmi_node_migrate_enabled = false;
+ if (amdgpu_sriov_vf(adev)) {
+ adev->virt.is_xgmi_node_migrate_enabled =
+ amdgpu_ip_version((adev), GC_HWIP, 0) == IP_VERSION(9, 4, 4);
+ }
+ total = true;
for (i = 0; i < adev->num_ip_blocks; i++) {
+ ip_block = &adev->ip_blocks[i];
+
if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
- DRM_ERROR("disabled ip block: %d <%s>\n",
- i, adev->ip_blocks[i].version->funcs->name);
+ dev_warn(adev->dev, "disabled ip block: %d <%s>\n", i,
+ adev->ip_blocks[i].version->funcs->name);
adev->ip_blocks[i].status.valid = false;
- } else {
- if (adev->ip_blocks[i].version->funcs->early_init) {
- r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
- if (r == -ENOENT) {
- adev->ip_blocks[i].status.valid = false;
- } else if (r) {
- DRM_ERROR("early_init of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- return r;
- } else {
- adev->ip_blocks[i].status.valid = true;
- }
+ } else if (ip_block->version->funcs->early_init) {
+ r = ip_block->version->funcs->early_init(ip_block);
+ if (r == -ENOENT) {
+ adev->ip_blocks[i].status.valid = false;
+ } else if (r) {
+ dev_err(adev->dev,
+ "early_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
+ total = false;
} else {
adev->ip_blocks[i].status.valid = true;
}
+ } else {
+ adev->ip_blocks[i].status.valid = true;
}
/* get the vbios after the asic_funcs are set up */
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
@@ -2169,15 +2911,32 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
if (r)
return r;
+ bios_flags = amdgpu_device_get_vbios_flags(adev);
+ skip_bios = !!(bios_flags & AMDGPU_VBIOS_SKIP);
/* Read BIOS */
- if (!amdgpu_get_bios(adev))
- return -EINVAL;
-
- r = amdgpu_atombios_init(adev);
- if (r) {
- dev_err(adev->dev, "amdgpu_atombios_init failed\n");
- amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
- return r;
+ if (!skip_bios) {
+ bool optional =
+ !!(bios_flags & AMDGPU_VBIOS_OPTIONAL);
+ if (!amdgpu_get_bios(adev) && !optional)
+ return -EINVAL;
+
+ if (optional && !adev->bios)
+ dev_info(
+ adev->dev,
+ "VBIOS image optional, proceeding without VBIOS image");
+
+ if (adev->bios) {
+ r = amdgpu_atombios_init(adev);
+ if (r) {
+ dev_err(adev->dev,
+ "amdgpu_atombios_init failed\n");
+ amdgpu_vf_error_put(
+ adev,
+ AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL,
+ 0, 0);
+ return r;
+ }
+ }
}
/*get pf2vf msg info at it's earliest time*/
@@ -2186,6 +2945,17 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
}
}
+ if (!total)
+ return -ENODEV;
+
+ if (adev->gmc.xgmi.supported)
+ amdgpu_xgmi_early_init(adev);
+
+ if (amdgpu_is_multi_aid(adev))
+ amdgpu_uid_init(adev);
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+ if (ip_block->status.valid != false)
+ amdgpu_amdkfd_device_probe(adev);
adev->cg_flags &= amdgpu_cg_mask;
adev->pg_flags &= amdgpu_pg_mask;
@@ -2202,13 +2972,18 @@ static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].status.hw)
continue;
+ if (!amdgpu_ip_member_of_hwini(
+ adev, adev->ip_blocks[i].version->type))
+ continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
(amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
- r = adev->ip_blocks[i].version->funcs->hw_init(adev);
+ r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
if (r) {
- DRM_ERROR("hw_init of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_err(adev->dev,
+ "hw_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
return r;
}
adev->ip_blocks[i].status.hw = true;
@@ -2227,10 +3002,14 @@ static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].status.hw)
continue;
- r = adev->ip_blocks[i].version->funcs->hw_init(adev);
+ if (!amdgpu_ip_member_of_hwini(
+ adev, adev->ip_blocks[i].version->type))
+ continue;
+ r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
if (r) {
- DRM_ERROR("hw_init of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_err(adev->dev,
+ "hw_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
return r;
}
adev->ip_blocks[i].status.hw = true;
@@ -2250,6 +3029,10 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
continue;
+ if (!amdgpu_ip_member_of_hwini(adev,
+ AMD_IP_BLOCK_TYPE_PSP))
+ break;
+
if (!adev->ip_blocks[i].status.sw)
continue;
@@ -2258,22 +3041,21 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
break;
if (amdgpu_in_reset(adev) || adev->in_suspend) {
- r = adev->ip_blocks[i].version->funcs->resume(adev);
- if (r) {
- DRM_ERROR("resume of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
return r;
- }
} else {
- r = adev->ip_blocks[i].version->funcs->hw_init(adev);
+ r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
if (r) {
- DRM_ERROR("hw_init of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_err(adev->dev,
+ "hw_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i]
+ .version->funcs->name,
+ r);
return r;
}
+ adev->ip_blocks[i].status.hw = true;
}
-
- adev->ip_blocks[i].status.hw = true;
break;
}
}
@@ -2284,6 +3066,74 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
return r;
}
+static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
+{
+ struct drm_sched_init_args args = {
+ .ops = &amdgpu_sched_ops,
+ .num_rqs = DRM_SCHED_PRIORITY_COUNT,
+ .timeout_wq = adev->reset_domain->wq,
+ .dev = adev->dev,
+ };
+ long timeout;
+ int r, i;
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ /* No need to setup the GPU scheduler for rings that don't need it */
+ if (!ring || ring->no_scheduler)
+ continue;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ timeout = adev->gfx_timeout;
+ break;
+ case AMDGPU_RING_TYPE_COMPUTE:
+ timeout = adev->compute_timeout;
+ break;
+ case AMDGPU_RING_TYPE_SDMA:
+ timeout = adev->sdma_timeout;
+ break;
+ default:
+ timeout = adev->video_timeout;
+ break;
+ }
+
+ args.timeout = timeout;
+ args.credit_limit = ring->num_hw_submission;
+ args.score = ring->sched_score;
+ args.name = ring->name;
+
+ r = drm_sched_init(&ring->sched, &args);
+ if (r) {
+ dev_err(adev->dev,
+ "Failed to create scheduler on ring %s.\n",
+ ring->name);
+ return r;
+ }
+ r = amdgpu_uvd_entity_init(adev, ring);
+ if (r) {
+ dev_err(adev->dev,
+ "Failed to create UVD scheduling entity on ring %s.\n",
+ ring->name);
+ return r;
+ }
+ r = amdgpu_vce_entity_init(adev, ring);
+ if (r) {
+ dev_err(adev->dev,
+ "Failed to create VCE scheduling entity on ring %s.\n",
+ ring->name);
+ return r;
+ }
+ }
+
+ if (adev->xcp_mgr)
+ amdgpu_xcp_update_partition_sched_list(adev);
+
+ return 0;
+}
+
+
/**
* amdgpu_device_ip_init - run init for hardware IPs
*
@@ -2297,6 +3147,7 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
*/
static int amdgpu_device_ip_init(struct amdgpu_device *adev)
{
+ bool init_badpage;
int i, r;
r = amdgpu_ras_init(adev);
@@ -2306,43 +3157,77 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
- r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
- if (r) {
- DRM_ERROR("sw_init of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- goto init_failed;
+ if (adev->ip_blocks[i].version->funcs->sw_init) {
+ r = adev->ip_blocks[i].version->funcs->sw_init(&adev->ip_blocks[i]);
+ if (r) {
+ dev_err(adev->dev,
+ "sw_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
+ goto init_failed;
+ }
}
adev->ip_blocks[i].status.sw = true;
- /* need to do gmc hw init early so we can allocate gpu mem */
- if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
- r = amdgpu_device_vram_scratch_init(adev);
+ if (!amdgpu_ip_member_of_hwini(
+ adev, adev->ip_blocks[i].version->type))
+ continue;
+
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
+ /* need to do common hw init early so everything is set up for gmc */
+ r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
if (r) {
- DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
+ dev_err(adev->dev, "hw_init %d failed %d\n", i,
+ r);
goto init_failed;
}
- r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
+ adev->ip_blocks[i].status.hw = true;
+ } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
+ /* need to do gmc hw init early so we can allocate gpu mem */
+ /* Try to reserve bad pages early */
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_exchange_data(adev);
+
+ r = amdgpu_device_mem_scratch_init(adev);
+ if (r) {
+ dev_err(adev->dev,
+ "amdgpu_mem_scratch_init failed %d\n",
+ r);
+ goto init_failed;
+ }
+ r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
if (r) {
- DRM_ERROR("hw_init %d failed %d\n", i, r);
+ dev_err(adev->dev, "hw_init %d failed %d\n", i,
+ r);
goto init_failed;
}
r = amdgpu_device_wb_init(adev);
if (r) {
- DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
+ dev_err(adev->dev,
+ "amdgpu_device_wb_init failed %d\n", r);
goto init_failed;
}
adev->ip_blocks[i].status.hw = true;
/* right after GMC hw init, we create CSA */
- if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
+ if (adev->gfx.mcbp) {
r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
- AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_CSA_SIZE);
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ AMDGPU_CSA_SIZE);
if (r) {
- DRM_ERROR("allocate CSA failed %d\n", r);
+ dev_err(adev->dev,
+ "allocate CSA failed %d\n", r);
goto init_failed;
}
}
+
+ r = amdgpu_seq64_init(adev);
+ if (r) {
+ dev_err(adev->dev, "allocate seq64 failed %d\n",
+ r);
+ goto init_failed;
+ }
}
}
@@ -2387,22 +3272,58 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
* Note: theoretically, this should be called before all vram allocations
* to protect retired page from abusing
*/
- r = amdgpu_ras_recovery_init(adev);
+ init_badpage = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
+ r = amdgpu_ras_recovery_init(adev, init_badpage);
if (r)
goto init_failed;
- if (adev->gmc.xgmi.num_physical_nodes > 1)
- amdgpu_xgmi_add_device(adev);
+ /**
+ * In case of XGMI grab extra reference for reset domain for this device
+ */
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ if (amdgpu_xgmi_add_device(adev) == 0) {
+ if (!amdgpu_sriov_vf(adev)) {
+ struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+
+ if (WARN_ON(!hive)) {
+ r = -ENOENT;
+ goto init_failed;
+ }
+
+ if (!hive->reset_domain ||
+ !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
+ r = -ENOENT;
+ amdgpu_put_xgmi_hive(hive);
+ goto init_failed;
+ }
+
+ /* Drop the early temporary reset domain we created for device */
+ amdgpu_reset_put_reset_domain(adev->reset_domain);
+ adev->reset_domain = hive->reset_domain;
+ amdgpu_put_xgmi_hive(hive);
+ }
+ }
+ }
+
+ r = amdgpu_device_init_schedulers(adev);
+ if (r)
+ goto init_failed;
+
+ if (adev->mman.buffer_funcs_ring->sched.ready)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
/* Don't init kfd if whole hive need to be reset during init */
- if (!adev->gmc.xgmi.pending_reset)
+ if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
+ kgd2kfd_init_zone_device(adev);
amdgpu_amdkfd_device_init(adev);
+ }
amdgpu_fru_get_product_info(adev);
+ if (!amdgpu_sriov_vf(adev) || amdgpu_sriov_ras_cper_en(adev))
+ r = amdgpu_cper_init(adev);
+
init_failed:
- if (amdgpu_sriov_vf(adev))
- amdgpu_virt_release_full_gpu(adev, true);
return r;
}
@@ -2414,7 +3335,7 @@ init_failed:
*
* Writes a reset magic value to the gart pointer in VRAM. The driver calls
* this function before a GPU reset. If the value is retained after a
- * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
+ * GPU reset, VRAM has not been lost. Some GPU resets may destroy VRAM contents.
*/
static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
{
@@ -2445,6 +3366,8 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
* always assumed to be lost.
*/
switch (amdgpu_asic_reset_method(adev)) {
+ case AMD_RESET_METHOD_LEGACY:
+ case AMD_RESET_METHOD_LINK:
case AMD_RESET_METHOD_BACO:
case AMD_RESET_METHOD_MODE1:
return true;
@@ -2478,9 +3401,10 @@ int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
if (!adev->ip_blocks[i].status.late_initialized)
continue;
- /* skip CG for GFX on S0ix */
+ /* skip CG for GFX, SDMA on S0ix */
if (adev->in_s0ix &&
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
+ (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
continue;
/* skip CG for VCE/UVD, it's handled specially */
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
@@ -2489,11 +3413,13 @@ int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
adev->ip_blocks[i].version->funcs->set_clockgating_state) {
/* enable clockgating to save power */
- r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
+ r = adev->ip_blocks[i].version->funcs->set_clockgating_state(&adev->ip_blocks[i],
state);
if (r) {
- DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_err(adev->dev,
+ "set_clockgating_state(gate) of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
return r;
}
}
@@ -2514,22 +3440,26 @@ int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
if (!adev->ip_blocks[i].status.late_initialized)
continue;
- /* skip PG for GFX on S0ix */
+ /* skip PG for GFX, SDMA on S0ix */
if (adev->in_s0ix &&
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
+ (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
continue;
- /* skip CG for VCE/UVD, it's handled specially */
+ /* skip CG for VCE/UVD/VPE, it's handled specially */
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
+ adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VPE &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
adev->ip_blocks[i].version->funcs->set_powergating_state) {
/* enable powergating to save power */
- r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
+ r = adev->ip_blocks[i].version->funcs->set_powergating_state(&adev->ip_blocks[i],
state);
if (r) {
- DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_err(adev->dev,
+ "set_powergating_state(gate) of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
return r;
}
}
@@ -2556,7 +3486,7 @@ static int amdgpu_device_enable_mgpu_fan_boost(void)
for (i = 0; i < mgpu_info.num_dgpu; i++) {
gpu_ins = &(mgpu_info.gpu_ins[i]);
adev = gpu_ins->adev;
- if (!(adev->flags & AMD_IS_APU) &&
+ if (!(adev->flags & AMD_IS_APU || amdgpu_sriov_multi_vf_mode(adev)) &&
!gpu_ins->mgpu_fan_enabled) {
ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
if (ret)
@@ -2593,17 +3523,26 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
if (!adev->ip_blocks[i].status.hw)
continue;
if (adev->ip_blocks[i].version->funcs->late_init) {
- r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
+ r = adev->ip_blocks[i].version->funcs->late_init(&adev->ip_blocks[i]);
if (r) {
- DRM_ERROR("late_init of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_err(adev->dev,
+ "late_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
return r;
}
}
adev->ip_blocks[i].status.late_initialized = true;
}
- amdgpu_ras_set_error_query_ready(adev, true);
+ r = amdgpu_ras_late_init(adev);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_ras_late_init failed %d", r);
+ return r;
+ }
+
+ if (!amdgpu_reset_in_recovery(adev))
+ amdgpu_ras_set_error_query_ready(adev, true);
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
@@ -2612,13 +3551,13 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
r = amdgpu_device_enable_mgpu_fan_boost();
if (r)
- DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
+ dev_err(adev->dev, "enable mgpu fan boost failed (%d).\n", r);
- /* For XGMI + passthrough configuration on arcturus, enable light SBR */
- if (adev->asic_type == CHIP_ARCTURUS &&
- amdgpu_passthrough(adev) &&
- adev->gmc.xgmi.num_physical_nodes > 1)
- smu_set_light_sbr(&adev->smu, true);
+ /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
+ if (amdgpu_passthrough(adev) &&
+ ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
+ adev->asic_type == CHIP_ALDEBARAN))
+ amdgpu_dpm_handle_passthrough_sbr(adev, true);
if (adev->gmc.xgmi.num_physical_nodes > 1) {
mutex_lock(&mgpu_info.mutex);
@@ -2645,7 +3584,9 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
AMDGPU_XGMI_PSTATE_MIN);
if (r) {
- DRM_ERROR("pstate setting failed (%d).\n", r);
+ dev_err(adev->dev,
+ "pstate setting failed (%d).\n",
+ r);
break;
}
}
@@ -2657,59 +3598,101 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
return 0;
}
-static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
+static void amdgpu_ip_block_hw_fini(struct amdgpu_ip_block *ip_block)
{
- int i, r;
-
- for (i = 0; i < adev->num_ip_blocks; i++) {
- if (!adev->ip_blocks[i].version->funcs->early_fini)
- continue;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
- r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
+ if (!ip_block->version->funcs->hw_fini) {
+ dev_err(adev->dev, "hw_fini of IP block <%s> not defined\n",
+ ip_block->version->funcs->name);
+ } else {
+ r = ip_block->version->funcs->hw_fini(ip_block);
+ /* XXX handle errors */
if (r) {
- DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_dbg(adev->dev,
+ "hw_fini of IP block <%s> failed %d\n",
+ ip_block->version->funcs->name, r);
}
}
- amdgpu_amdkfd_suspend(adev, false);
+ ip_block->status.hw = false;
+}
- amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
- amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
+/**
+ * amdgpu_device_smu_fini_early - smu hw_fini wrapper
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * For ASICs need to disable SMC first
+ */
+static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
+{
+ int i;
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
+ return;
- /* need to disable SMC first */
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.hw)
continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
- r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
- /* XXX handle errors */
- if (r) {
- DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- }
- adev->ip_blocks[i].status.hw = false;
+ amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
break;
}
}
+}
- for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
- if (!adev->ip_blocks[i].status.hw)
+static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].version->funcs->early_fini)
continue;
- r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
- /* XXX handle errors */
+ r = adev->ip_blocks[i].version->funcs->early_fini(&adev->ip_blocks[i]);
if (r) {
- DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_dbg(adev->dev,
+ "early_fini of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
}
+ }
+
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
- adev->ip_blocks[i].status.hw = false;
+ amdgpu_amdkfd_suspend(adev, true);
+ amdgpu_userq_suspend(adev);
+
+ /* Workaround for ASICs need to disable SMC first */
+ amdgpu_device_smu_fini_early(adev);
+
+ for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+ if (!adev->ip_blocks[i].status.hw)
+ continue;
+
+ amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
}
if (amdgpu_sriov_vf(adev)) {
if (amdgpu_virt_release_full_gpu(adev, false))
- DRM_ERROR("failed to release exclusive mode on fini\n");
+ dev_err(adev->dev,
+ "failed to release exclusive mode on fini\n");
+ }
+
+ /*
+ * Driver reload on the APU can fail due to firmware validation because
+ * the PSP is always running, as it is shared across the whole SoC.
+ * This same issue does not occur on dGPU because it has a mechanism
+ * that checks whether the PSP is running. A solution for those issues
+ * in the APU is to trigger a GPU reset, but this should be done during
+ * the unload phase to avoid adding boot latency and screen flicker.
+ */
+ if ((adev->flags & AMD_IS_APU) && !adev->gmc.is_app_apu) {
+ r = amdgpu_asic_reset(adev);
+ if (r)
+ dev_err(adev->dev, "asic reset on %s failed\n", __func__);
}
return 0;
@@ -2730,11 +3713,11 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
{
int i, r;
+ amdgpu_cper_fini(adev);
+
if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
amdgpu_virt_release_ras_err_handler_data(adev);
- amdgpu_ras_pre_fini(adev);
-
if (adev->gmc.xgmi.num_physical_nodes > 1)
amdgpu_xgmi_remove_device(adev);
@@ -2748,15 +3731,20 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
amdgpu_ucode_free_bo(adev);
amdgpu_free_static_csa(&adev->virt.csa_obj);
amdgpu_device_wb_fini(adev);
- amdgpu_device_vram_scratch_fini(adev);
+ amdgpu_device_mem_scratch_fini(adev);
amdgpu_ib_pool_fini(adev);
+ amdgpu_seq64_fini(adev);
+ amdgpu_doorbell_fini(adev);
}
-
- r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
- /* XXX handle errors */
- if (r) {
- DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ if (adev->ip_blocks[i].version->funcs->sw_fini) {
+ r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]);
+ /* XXX handle errors */
+ if (r) {
+ dev_dbg(adev->dev,
+ "sw_fini of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
+ }
}
adev->ip_blocks[i].status.sw = false;
adev->ip_blocks[i].status.valid = false;
@@ -2766,11 +3754,12 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
if (!adev->ip_blocks[i].status.late_initialized)
continue;
if (adev->ip_blocks[i].version->funcs->late_fini)
- adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
+ adev->ip_blocks[i].version->funcs->late_fini(&adev->ip_blocks[i]);
adev->ip_blocks[i].status.late_initialized = false;
}
amdgpu_ras_fini(adev);
+ amdgpu_uid_fini(adev);
return 0;
}
@@ -2788,7 +3777,7 @@ static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
r = amdgpu_ib_ring_tests(adev);
if (r)
- DRM_ERROR("ib ring test failed (%d).\n", r);
+ dev_err(adev->dev, "ib ring test failed (%d).\n", r);
}
static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
@@ -2799,7 +3788,7 @@ static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
WARN_ON_ONCE(adev->gfx.gfx_off_state);
WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
- if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
+ if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true, 0))
adev->gfx.gfx_off_state = true;
}
@@ -2816,11 +3805,19 @@ static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
*/
static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
{
- int i, r;
+ int i, r, rec;
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
+ /*
+ * Per PMFW team's suggestion, driver needs to handle gfxoff
+ * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
+ * scenario. Add the missing df cstate disablement here.
+ */
+ if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
+ dev_warn(adev->dev, "Failed to disallow df cstate");
+
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.valid)
continue;
@@ -2829,19 +3826,25 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
continue;
- /* XXX handle errors */
- r = adev->ip_blocks[i].version->funcs->suspend(adev);
- /* XXX handle errors */
- if (r) {
- DRM_ERROR("suspend of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- return r;
- }
-
- adev->ip_blocks[i].status.hw = false;
+ r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
+ if (r)
+ goto unwind;
}
return 0;
+unwind:
+ rec = amdgpu_device_ip_resume_phase3(adev);
+ if (rec)
+ dev_err(adev->dev,
+ "amdgpu_device_ip_resume_phase3 failed during unwind: %d\n",
+ rec);
+
+ amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW);
+
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
+
+ return r;
}
/**
@@ -2857,10 +3860,10 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
*/
static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
{
- int i, r;
+ int i, r, rec;
if (adev->in_s0ix)
- amdgpu_gfx_state_change_set(adev, sGpuChangeState_D3Entry);
+ amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.valid)
@@ -2876,47 +3879,94 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
}
/* skip unnecessary suspend if we do not initialize them yet */
- if (adev->gmc.xgmi.pending_reset &&
- !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
- adev->ip_blocks[i].status.hw = false;
+ if (!amdgpu_ip_member_of_hwini(
+ adev, adev->ip_blocks[i].version->type))
continue;
- }
- /* skip suspend of gfx and psp for S0ix
+ /* Since we skip suspend for S0i3, we need to cancel the delayed
+ * idle work here as the suspend callback never gets called.
+ */
+ if (adev->in_s0ix &&
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
+ cancel_delayed_work_sync(&adev->gfx.idle_work);
+ /* skip suspend of gfx/mes and psp for S0ix
* gfx is in gfxoff state, so on resume it will exit gfxoff just
* like at runtime. PSP is also part of the always on hardware
* so no need to suspend it.
*/
if (adev->in_s0ix &&
(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX))
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
continue;
- /* XXX handle errors */
- r = adev->ip_blocks[i].version->funcs->suspend(adev);
- /* XXX handle errors */
- if (r) {
- DRM_ERROR("suspend of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- }
- adev->ip_blocks[i].status.hw = false;
+ /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
+ if (adev->in_s0ix &&
+ (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
+ IP_VERSION(5, 0, 0)) &&
+ (adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_SDMA))
+ continue;
+
+ /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
+ * These are in TMR, hence are expected to be reused by PSP-TOS to reload
+ * from this location and RLC Autoload automatically also gets loaded
+ * from here based on PMFW -> PSP message during re-init sequence.
+ * Therefore, the psp suspend & resume should be skipped to avoid destroy
+ * the TMR and reload FWs again for IMU enabled APU ASICs.
+ */
+ if (amdgpu_in_reset(adev) &&
+ (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
+ continue;
+
+ r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
+ if (r)
+ goto unwind;
+
/* handle putting the SMC in the appropriate state */
- if(!amdgpu_sriov_vf(adev)){
+ if (!amdgpu_sriov_vf(adev)) {
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
if (r) {
- DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
- adev->mp1_state, r);
- return r;
+ dev_err(adev->dev,
+ "SMC failed to set mp1 state %d, %d\n",
+ adev->mp1_state, r);
+ goto unwind;
}
}
}
}
return 0;
+unwind:
+ /* suspend phase 2 = resume phase 1 + resume phase 2 */
+ rec = amdgpu_device_ip_resume_phase1(adev);
+ if (rec) {
+ dev_err(adev->dev,
+ "amdgpu_device_ip_resume_phase1 failed during unwind: %d\n",
+ rec);
+ return r;
+ }
+
+ rec = amdgpu_device_fw_loading(adev);
+ if (rec) {
+ dev_err(adev->dev,
+ "amdgpu_device_fw_loading failed during unwind: %d\n",
+ rec);
+ return r;
+ }
+
+ rec = amdgpu_device_ip_resume_phase2(adev);
+ if (rec) {
+ dev_err(adev->dev,
+ "amdgpu_device_ip_resume_phase2 failed during unwind: %d\n",
+ rec);
+ return r;
+ }
+
+ return r;
}
/**
@@ -2930,7 +3980,7 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
* in each IP into a state suitable for suspend.
* Returns 0 on success, negative error code on failure.
*/
-int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
+static int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
{
int r;
@@ -2939,6 +3989,8 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
amdgpu_virt_request_full_gpu(adev, false);
}
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
+
r = amdgpu_device_ip_suspend_phase1(adev);
if (r)
return r;
@@ -2955,8 +4007,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
int i, r;
static enum amd_ip_block_type ip_order[] = {
- AMD_IP_BLOCK_TYPE_GMC,
AMD_IP_BLOCK_TYPE_COMMON,
+ AMD_IP_BLOCK_TYPE_GMC,
AMD_IP_BLOCK_TYPE_PSP,
AMD_IP_BLOCK_TYPE_IH,
};
@@ -2974,10 +4026,12 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
!block->status.valid)
continue;
- r = block->version->funcs->hw_init(adev);
- DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
- if (r)
+ r = block->version->funcs->hw_init(&adev->ip_blocks[i]);
+ if (r) {
+ dev_err(adev->dev, "RE-INIT-early: %s failed\n",
+ block->version->funcs->name);
return r;
+ }
block->status.hw = true;
}
}
@@ -2987,43 +4041,44 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
{
- int i, r;
+ struct amdgpu_ip_block *block;
+ int i, r = 0;
static enum amd_ip_block_type ip_order[] = {
AMD_IP_BLOCK_TYPE_SMC,
AMD_IP_BLOCK_TYPE_DCE,
AMD_IP_BLOCK_TYPE_GFX,
AMD_IP_BLOCK_TYPE_SDMA,
+ AMD_IP_BLOCK_TYPE_MES,
AMD_IP_BLOCK_TYPE_UVD,
AMD_IP_BLOCK_TYPE_VCE,
- AMD_IP_BLOCK_TYPE_VCN
+ AMD_IP_BLOCK_TYPE_VCN,
+ AMD_IP_BLOCK_TYPE_JPEG
};
for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
- int j;
- struct amdgpu_ip_block *block;
+ block = amdgpu_device_ip_get_ip_block(adev, ip_order[i]);
- for (j = 0; j < adev->num_ip_blocks; j++) {
- block = &adev->ip_blocks[j];
-
- if (block->version->type != ip_order[i] ||
- !block->status.valid ||
- block->status.hw)
- continue;
+ if (!block)
+ continue;
- if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
- r = block->version->funcs->resume(adev);
- else
- r = block->version->funcs->hw_init(adev);
+ if (block->status.valid && !block->status.hw) {
+ if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) {
+ r = amdgpu_ip_block_resume(block);
+ } else {
+ r = block->version->funcs->hw_init(block);
+ }
- DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
- if (r)
- return r;
+ if (r) {
+ dev_err(adev->dev, "RE-INIT-late: %s failed\n",
+ block->version->funcs->name);
+ break;
+ }
block->status.hw = true;
}
}
- return 0;
+ return r;
}
/**
@@ -3047,15 +4102,12 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
+ (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
- r = adev->ip_blocks[i].version->funcs->resume(adev);
- if (r) {
- DRM_ERROR("resume of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
return r;
- }
- adev->ip_blocks[i].status.hw = true;
}
}
@@ -3067,7 +4119,7 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
*
* @adev: amdgpu_device pointer
*
- * First resume function for hardware IPs. The list of all the hardware
+ * Second resume function for hardware IPs. The list of all the hardware
* IPs that make up the asic is walked and the resume callbacks are run for
* all blocks except COMMON, GMC, and IH. resume puts the hardware into a
* functional state after a suspend and updates the software state as
@@ -3085,15 +4137,42 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
continue;
- r = adev->ip_blocks[i].version->funcs->resume(adev);
- if (r) {
- DRM_ERROR("resume of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
return r;
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Third resume function for hardware IPs. The list of all the hardware
+ * IPs that make up the asic is walked and the resume callbacks are run for
+ * all DCE. resume puts the hardware into a functional state after a suspend
+ * and updates the software state as necessary. This function is also used
+ * for restoring the GPU after a GPU reset.
+ *
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
+ continue;
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
+ return r;
}
- adev->ip_blocks[i].status.hw = true;
}
return 0;
@@ -3106,7 +4185,7 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
*
* Main resume function for hardware IPs. The hardware IPs
* are split into two resume functions because they are
- * are also used in in recovering from a GPU reset and some additional
+ * also used in recovering from a GPU reset and some additional
* steps need to be take between them. In this case (S3/S4) they are
* run sequentially.
* Returns 0 on success, negative error code on failure.
@@ -3115,10 +4194,6 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
{
int r;
- r = amdgpu_amdkfd_resume_iommu(adev);
- if (r)
- return r;
-
r = amdgpu_device_ip_resume_phase1(adev);
if (r)
return r;
@@ -3129,6 +4204,16 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
r = amdgpu_device_ip_resume_phase2(adev);
+ if (adev->mman.buffer_funcs_ring->sched.ready)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+
+ if (r)
+ return r;
+
+ amdgpu_fence_driver_hw_init(adev);
+
+ r = amdgpu_device_ip_resume_phase3(adev);
+
return r;
}
@@ -3158,76 +4243,47 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
/**
* amdgpu_device_asic_has_dc_support - determine if DC supports the asic
*
+ * @pdev : pci device context
* @asic_type: AMD asic type
*
* Check if there is DC (new modesetting infrastructre) support for an asic.
* returns true if DC has support, false if not.
*/
-bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
+bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev,
+ enum amd_asic_type asic_type)
{
switch (asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+ case CHIP_HAINAN:
+#endif
+ case CHIP_TOPAZ:
+ /* chips with no display hardware */
+ return false;
#if defined(CONFIG_DRM_AMD_DC)
case CHIP_TAHITI:
case CHIP_PITCAIRN:
case CHIP_VERDE:
case CHIP_OLAND:
- /*
- * We have systems in the wild with these ASICs that require
- * LVDS and VGA support which is not supported with DC.
- *
- * Fallback to the non-DC driver here by default so as not to
- * cause regressions.
- */
-#if defined(CONFIG_DRM_AMD_DC_SI)
- return amdgpu_dc > 0;
-#else
- return false;
-#endif
- case CHIP_BONAIRE:
+ return amdgpu_dc != 0 && IS_ENABLED(CONFIG_DRM_AMD_DC_SI);
case CHIP_KAVERI:
case CHIP_KABINI:
case CHIP_MULLINS:
/*
* We have systems in the wild with these ASICs that require
- * LVDS and VGA support which is not supported with DC.
+ * TRAVIS and NUTMEG support which is not supported with DC.
*
* Fallback to the non-DC driver here by default so as not to
* cause regressions.
*/
return amdgpu_dc > 0;
- case CHIP_HAWAII:
- case CHIP_CARRIZO:
- case CHIP_STONEY:
- case CHIP_POLARIS10:
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- case CHIP_VEGAM:
- case CHIP_TONGA:
- case CHIP_FIJI:
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- case CHIP_RAVEN:
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
- case CHIP_RENOIR:
- case CHIP_CYAN_SKILLFISH:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_DIMGREY_CAVEFISH:
- case CHIP_BEIGE_GOBY:
- case CHIP_VANGOGH:
- case CHIP_YELLOW_CARP:
-#endif
default:
return amdgpu_dc != 0;
#else
default:
if (amdgpu_dc > 0)
- DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
- "but isn't supported by ASIC, ignoring\n");
+ dev_info_once(
+ &pdev->dev,
+ "Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
return false;
#endif
}
@@ -3242,12 +4298,11 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
*/
bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
{
- if (amdgpu_sriov_vf(adev) ||
- adev->enable_virtual_display ||
+ if (adev->enable_virtual_display ||
(adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
return false;
- return amdgpu_device_asic_has_dc_support(adev->asic_type);
+ return amdgpu_device_asic_has_dc_support(adev->pdev, adev->asic_type);
}
static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
@@ -3269,20 +4324,18 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
task_barrier_enter(&hive->tb);
- adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
+ adev->asic_reset_res = amdgpu_device_baco_enter(adev);
if (adev->asic_reset_res)
goto fail;
task_barrier_exit(&hive->tb);
- adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
+ adev->asic_reset_res = amdgpu_device_baco_exit(adev);
if (adev->asic_reset_res)
goto fail;
- if (adev->mmhub.ras_funcs &&
- adev->mmhub.ras_funcs->reset_ras_error_count)
- adev->mmhub.ras_funcs->reset_ras_error_count(adev);
+ amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
} else {
task_barrier_full(&hive->tb);
@@ -3291,7 +4344,8 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
fail:
if (adev->asic_reset_res)
- DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
+ dev_warn(adev->dev,
+ "ASIC reset failed with error, %d for drm dev, %s",
adev->asic_reset_res, adev_to_drm(adev)->unique);
amdgpu_put_xgmi_hive(hive);
}
@@ -3304,76 +4358,155 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
long timeout;
int ret = 0;
- /*
- * By default timeout for non compute jobs is 10000
- * and 60000 for compute jobs.
- * In SR-IOV or passthrough mode, timeout for compute
- * jobs are 60000 by default.
- */
- adev->gfx_timeout = msecs_to_jiffies(10000);
- adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
- if (amdgpu_sriov_vf(adev))
- adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
- msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
- else
- adev->compute_timeout = msecs_to_jiffies(60000);
+ /* By default timeout for all queues is 2 sec */
+ adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
+ adev->video_timeout = msecs_to_jiffies(2000);
- if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
- while ((timeout_setting = strsep(&input, ",")) &&
- strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
- ret = kstrtol(timeout_setting, 0, &timeout);
- if (ret)
- return ret;
+ if (!strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH))
+ return 0;
- if (timeout == 0) {
- index++;
- continue;
- } else if (timeout < 0) {
- timeout = MAX_SCHEDULE_TIMEOUT;
- dev_warn(adev->dev, "lockup timeout disabled");
- add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
- } else {
- timeout = msecs_to_jiffies(timeout);
- }
+ while ((timeout_setting = strsep(&input, ",")) &&
+ strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
+ ret = kstrtol(timeout_setting, 0, &timeout);
+ if (ret)
+ return ret;
- switch (index++) {
- case 0:
- adev->gfx_timeout = timeout;
- break;
- case 1:
- adev->compute_timeout = timeout;
- break;
- case 2:
- adev->sdma_timeout = timeout;
- break;
- case 3:
- adev->video_timeout = timeout;
- break;
- default:
- break;
- }
+ if (timeout == 0) {
+ index++;
+ continue;
+ } else if (timeout < 0) {
+ timeout = MAX_SCHEDULE_TIMEOUT;
+ dev_warn(adev->dev, "lockup timeout disabled");
+ add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
+ } else {
+ timeout = msecs_to_jiffies(timeout);
}
- /*
- * There is only one value specified and
- * it should apply to all non-compute jobs.
- */
- if (index == 1) {
- adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
- if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
- adev->compute_timeout = adev->gfx_timeout;
+
+ switch (index++) {
+ case 0:
+ adev->gfx_timeout = timeout;
+ break;
+ case 1:
+ adev->compute_timeout = timeout;
+ break;
+ case 2:
+ adev->sdma_timeout = timeout;
+ break;
+ case 3:
+ adev->video_timeout = timeout;
+ break;
+ default:
+ break;
}
}
+ /* When only one value specified apply it to all queues. */
+ if (index == 1)
+ adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
+ adev->video_timeout = timeout;
+
return ret;
}
-static const struct attribute *amdgpu_dev_attributes[] = {
- &dev_attr_product_name.attr,
- &dev_attr_product_number.attr,
- &dev_attr_serial_number.attr,
- &dev_attr_pcie_replay_count.attr,
- NULL
-};
+/**
+ * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
+ */
+static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
+{
+ struct iommu_domain *domain;
+
+ domain = iommu_get_domain_for_dev(adev->dev);
+ if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
+ adev->ram_is_direct_mapped = true;
+}
+
+#if defined(CONFIG_HSA_AMD_P2P)
+/**
+ * amdgpu_device_check_iommu_remap - Check if DMA remapping is enabled.
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * return if IOMMU remapping bar address
+ */
+static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev)
+{
+ struct iommu_domain *domain;
+
+ domain = iommu_get_domain_for_dev(adev->dev);
+ if (domain && (domain->type == IOMMU_DOMAIN_DMA ||
+ domain->type == IOMMU_DOMAIN_DMA_FQ))
+ return true;
+
+ return false;
+}
+#endif
+
+static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
+{
+ if (amdgpu_mcbp == 1)
+ adev->gfx.mcbp = true;
+ else if (amdgpu_mcbp == 0)
+ adev->gfx.mcbp = false;
+
+ if (amdgpu_sriov_vf(adev))
+ adev->gfx.mcbp = true;
+
+ if (adev->gfx.mcbp)
+ dev_info(adev->dev, "MCBP is enabled\n");
+}
+
+static int amdgpu_device_sys_interface_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = amdgpu_atombios_sysfs_init(adev);
+ if (r)
+ drm_err(&adev->ddev,
+ "registering atombios sysfs failed (%d).\n", r);
+
+ r = amdgpu_pm_sysfs_init(adev);
+ if (r)
+ dev_err(adev->dev, "registering pm sysfs failed (%d).\n", r);
+
+ r = amdgpu_ucode_sysfs_init(adev);
+ if (r) {
+ adev->ucode_sysfs_en = false;
+ dev_err(adev->dev, "Creating firmware sysfs failed (%d).\n", r);
+ } else
+ adev->ucode_sysfs_en = true;
+
+ r = amdgpu_device_attr_sysfs_init(adev);
+ if (r)
+ dev_err(adev->dev, "Could not create amdgpu device attr\n");
+
+ r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
+ if (r)
+ dev_err(adev->dev,
+ "Could not create amdgpu board attributes\n");
+
+ amdgpu_fru_sysfs_init(adev);
+ amdgpu_reg_state_sysfs_init(adev);
+ amdgpu_xcp_sysfs_init(adev);
+
+ return r;
+}
+
+static void amdgpu_device_sys_interface_fini(struct amdgpu_device *adev)
+{
+ if (adev->pm.sysfs_initialized)
+ amdgpu_pm_sysfs_fini(adev);
+ if (adev->ucode_sysfs_en)
+ amdgpu_ucode_sysfs_fini(adev);
+ amdgpu_device_attr_sysfs_fini(adev);
+ amdgpu_fru_sysfs_fini(adev);
+
+ amdgpu_reg_state_sysfs_fini(adev);
+ amdgpu_xcp_sysfs_fini(adev);
+}
/**
* amdgpu_device_init - initialize the driver
@@ -3388,11 +4521,11 @@ static const struct attribute *amdgpu_dev_attributes[] = {
int amdgpu_device_init(struct amdgpu_device *adev,
uint32_t flags)
{
- struct drm_device *ddev = adev_to_drm(adev);
struct pci_dev *pdev = adev->pdev;
int r, i;
bool px = false;
u32 max_MBps;
+ int tmp;
adev->shutdown = false;
adev->flags = flags;
@@ -3408,6 +4541,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->gmc.gart_size = 512 * 1024 * 1024;
adev->accel_working = false;
adev->num_rings = 0;
+ RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
adev->mman.buffer_funcs = NULL;
adev->mman.buffer_funcs_ring = NULL;
adev->vm_manager.vm_pte_funcs = NULL;
@@ -3421,10 +4555,14 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->smc_wreg = &amdgpu_invalid_wreg;
adev->pcie_rreg = &amdgpu_invalid_rreg;
adev->pcie_wreg = &amdgpu_invalid_wreg;
+ adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
+ adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
adev->pciep_rreg = &amdgpu_invalid_rreg;
adev->pciep_wreg = &amdgpu_invalid_wreg;
adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
+ adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
+ adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
adev->didt_rreg = &amdgpu_invalid_rreg;
@@ -3434,30 +4572,43 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
- DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
- amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
- pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
+ dev_info(
+ adev->dev,
+ "initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
+ amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
+ pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
/* mutex initialization are all done here so we
- * can recall function without having locking issues */
+ * can recall function without having locking issues
+ */
mutex_init(&adev->firmware.mutex);
mutex_init(&adev->pm.mutex);
mutex_init(&adev->gfx.gpu_clock_mutex);
mutex_init(&adev->srbm_mutex);
mutex_init(&adev->gfx.pipe_reserve_mutex);
mutex_init(&adev->gfx.gfx_off_mutex);
+ mutex_init(&adev->gfx.partition_mutex);
mutex_init(&adev->grbm_idx_mutex);
mutex_init(&adev->mn_lock);
mutex_init(&adev->virt.vf_errors.lock);
hash_init(adev->mn_hash);
- atomic_set(&adev->in_gpu_reset, 0);
- init_rwsem(&adev->reset_sem);
mutex_init(&adev->psp.mutex);
mutex_init(&adev->notifier_lock);
-
- r = amdgpu_device_init_apu_flags(adev);
- if (r)
- return r;
+ mutex_init(&adev->pm.stable_pstate_ctx_lock);
+ mutex_init(&adev->benchmark_mutex);
+ mutex_init(&adev->gfx.reset_sem_mutex);
+ /* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
+ mutex_init(&adev->enforce_isolation_mutex);
+ for (i = 0; i < MAX_XCP; ++i) {
+ adev->isolation[i].spearhead = dma_fence_get_stub();
+ amdgpu_sync_create(&adev->isolation[i].active);
+ amdgpu_sync_create(&adev->isolation[i].prev);
+ }
+ mutex_init(&adev->gfx.userq_sch_mutex);
+ mutex_init(&adev->gfx.workload_profile_mutex);
+ mutex_init(&adev->vcn.workload_profile_mutex);
+
+ amdgpu_device_init_apu_flags(adev);
r = amdgpu_device_check_arguments(adev);
if (r)
@@ -3472,20 +4623,45 @@ int amdgpu_device_init(struct amdgpu_device *adev,
spin_lock_init(&adev->se_cac_idx_lock);
spin_lock_init(&adev->audio_endpt_idx_lock);
spin_lock_init(&adev->mm_stats.lock);
+ spin_lock_init(&adev->virt.rlcg_reg_lock);
+ spin_lock_init(&adev->wb.lock);
- INIT_LIST_HEAD(&adev->shadow_list);
- mutex_init(&adev->shadow_list_lock);
+ xa_init_flags(&adev->userq_xa, XA_FLAGS_LOCK_IRQ);
INIT_LIST_HEAD(&adev->reset_list);
+ INIT_LIST_HEAD(&adev->ras_list);
+
+ INIT_LIST_HEAD(&adev->pm.od_kobj_list);
+
+ xa_init(&adev->userq_doorbell_xa);
+
INIT_DELAYED_WORK(&adev->delayed_init_work,
amdgpu_device_delayed_init_work_handler);
INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
amdgpu_device_delay_enable_gfx_off);
+ /*
+ * Initialize the enforce_isolation work structures for each XCP
+ * partition. This work handler is responsible for enforcing shader
+ * isolation on AMD GPUs. It counts the number of emitted fences for
+ * each GFX and compute ring. If there are any fences, it schedules
+ * the `enforce_isolation_work` to be run after a delay. If there are
+ * no fences, it signals the Kernel Fusion Driver (KFD) to resume the
+ * runqueue.
+ */
+ for (i = 0; i < MAX_XCP; i++) {
+ INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work,
+ amdgpu_gfx_enforce_isolation_handler);
+ adev->gfx.enforce_isolation[i].adev = adev;
+ adev->gfx.enforce_isolation[i].xcp_id = i;
+ }
INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
+ INIT_WORK(&adev->userq_reset_work, amdgpu_userq_reset_work);
adev->gfx.gfx_off_req_count = 1;
+ adev->gfx.gfx_off_residency = 0;
+ adev->gfx.gfx_off_entrycount = 0;
adev->pm.ac_power = power_supply_is_system_supplied() > 0;
atomic_set(&adev->throttling_logging_enabled, 1);
@@ -3497,6 +4673,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
* for throttling interrupt) = 60 seconds.
*/
ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
+
ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
/* Registers mapping */
@@ -3513,22 +4690,27 @@ int amdgpu_device_init(struct amdgpu_device *adev,
atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
- if (adev->rmmio == NULL) {
+ if (!adev->rmmio)
return -ENOMEM;
- }
- DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
- DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
-
- amdgpu_device_get_pcie_info(adev);
- if (amdgpu_mcbp)
- DRM_INFO("MCBP is enabled\n");
+ dev_info(adev->dev, "register mmio base: 0x%08X\n",
+ (uint32_t)adev->rmmio_base);
+ dev_info(adev->dev, "register mmio size: %u\n",
+ (unsigned int)adev->rmmio_size);
- if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
- adev->enable_mes = true;
+ /*
+ * Reset domain needs to be present early, before XGMI hive discovered
+ * (if any) and initialized to use reset sem and in_gpu reset flag
+ * early on during init and before calling to RREG32.
+ */
+ adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
+ if (!adev->reset_domain)
+ return -ENOMEM;
/* detect hw virtualization here */
- amdgpu_detect_virtualization(adev);
+ amdgpu_virt_init(adev);
+
+ amdgpu_device_get_pcie_info(adev);
r = amdgpu_device_get_job_timeout_settings(adev);
if (r) {
@@ -3536,26 +4718,74 @@ int amdgpu_device_init(struct amdgpu_device *adev,
return r;
}
+ amdgpu_device_set_mcbp(adev);
+
+ /*
+ * By default, use default mode where all blocks are expected to be
+ * initialized. At present a 'swinit' of blocks is required to be
+ * completed before the need for a different level is detected.
+ */
+ amdgpu_set_init_level(adev, AMDGPU_INIT_LEVEL_DEFAULT);
/* early init functions */
r = amdgpu_device_ip_early_init(adev);
if (r)
return r;
+ /*
+ * No need to remove conflicting FBs for non-display class devices.
+ * This prevents the sysfb from being freed accidently.
+ */
+ if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA ||
+ (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) {
+ /* Get rid of things like offb */
+ r = aperture_remove_conflicting_pci_devices(adev->pdev, amdgpu_kms_driver.name);
+ if (r)
+ return r;
+ }
+
+ /* Enable TMZ based on IP_VERSION */
+ amdgpu_gmc_tmz_set(adev);
+
+ if (amdgpu_sriov_vf(adev) &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
+ /* VF MMIO access (except mailbox range) from CPU
+ * will be blocked during sriov runtime
+ */
+ adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
+
+ amdgpu_gmc_noretry_set(adev);
+ /* Need to get xgmi info early to decide the reset behavior*/
+ if (adev->gmc.xgmi.supported) {
+ r = adev->gfxhub.funcs->get_xgmi_info(adev);
+ if (r)
+ return r;
+ }
+
/* enable PCIE atomic ops */
- if (amdgpu_sriov_vf(adev))
- adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
- adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_enabled_flags ==
- (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
- else
+ if (amdgpu_sriov_vf(adev)) {
+ if (adev->virt.fw_reserve.p_pf2vf)
+ adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
+ adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
+ (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+ /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
+ * internal path natively support atomics, set have_atomics_support to true.
+ */
+ } else if ((adev->flags & AMD_IS_APU) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) >
+ IP_VERSION(9, 0, 0))) {
+ adev->have_atomics_support = true;
+ } else {
adev->have_atomics_support =
!pci_enable_atomic_ops_to_root(adev->pdev,
PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+ }
+
if (!adev->have_atomics_support)
dev_info(adev->dev, "PCIE atomic ops is not supported\n");
/* doorbell bar mapping and doorbell index init*/
- amdgpu_device_doorbell_init(adev);
+ amdgpu_doorbell_init(adev);
if (amdgpu_emu_mode == 1) {
/* post the asic on emulation mode */
@@ -3566,7 +4796,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
amdgpu_reset_init(adev);
/* detect if we are with an SRIOV vbios */
- amdgpu_device_detect_sriov_bios(adev);
+ if (adev->bios)
+ amdgpu_device_detect_sriov_bios(adev);
/* check if we need to reset the asic
* E.g., driver was not cleanly unloaded previously, etc.
@@ -3574,30 +4805,26 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
if (adev->gmc.xgmi.num_physical_nodes) {
dev_info(adev->dev, "Pending hive reset.\n");
- adev->gmc.xgmi.pending_reset = true;
- /* Only need to init necessary block for SMU to handle the reset */
- for (i = 0; i < adev->num_ip_blocks; i++) {
- if (!adev->ip_blocks[i].status.valid)
- continue;
- if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
- DRM_DEBUG("IP %s disabled for hw_init.\n",
- adev->ip_blocks[i].version->funcs->name);
- adev->ip_blocks[i].status.hw = true;
- }
- }
+ amdgpu_set_init_level(adev,
+ AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
+ } else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
+ !amdgpu_device_has_display_hardware(adev)) {
+ r = psp_gpu_reset(adev);
} else {
- r = amdgpu_asic_reset(adev);
- if (r) {
- dev_err(adev->dev, "asic reset on init failed\n");
- goto failed;
- }
+ tmp = amdgpu_reset_method;
+ /* It should do a default reset when loading or reloading the driver,
+ * regardless of the module parameter reset_method.
+ */
+ amdgpu_reset_method = AMD_RESET_METHOD_NONE;
+ r = amdgpu_asic_reset(adev);
+ amdgpu_reset_method = tmp;
}
- }
- pci_enable_pcie_error_reporting(adev->pdev);
+ if (r) {
+ dev_err(adev->dev, "asic reset on init failed\n");
+ goto failed;
+ }
+ }
/* Post card if necessary */
if (amdgpu_device_need_post(adev)) {
@@ -3606,7 +4833,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
r = -EINVAL;
goto failed;
}
- DRM_INFO("GPU posting now...\n");
+ dev_info(adev->dev, "GPU posting now...\n");
r = amdgpu_device_asic_init(adev);
if (r) {
dev_err(adev->dev, "gpu post error!\n");
@@ -3614,25 +4841,26 @@ int amdgpu_device_init(struct amdgpu_device *adev,
}
}
- if (adev->is_atom_fw) {
- /* Initialize clocks */
- r = amdgpu_atomfirmware_get_clock_info(adev);
- if (r) {
- dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
- amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
- goto failed;
- }
- } else {
- /* Initialize clocks */
- r = amdgpu_atombios_get_clock_info(adev);
- if (r) {
- dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
- amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
- goto failed;
+ if (adev->bios) {
+ if (adev->is_atom_fw) {
+ /* Initialize clocks */
+ r = amdgpu_atomfirmware_get_clock_info(adev);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
+ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
+ goto failed;
+ }
+ } else {
+ /* Initialize clocks */
+ r = amdgpu_atombios_get_clock_info(adev);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
+ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
+ goto failed;
+ }
+ /* init i2c buses */
+ amdgpu_i2c_init(adev);
}
- /* init i2c buses */
- if (!amdgpu_device_has_dc_support(adev))
- amdgpu_atombios_i2c_init(adev);
}
fence_driver_init:
@@ -3649,18 +4877,6 @@ fence_driver_init:
r = amdgpu_device_ip_init(adev);
if (r) {
- /* failed in exclusive mode due to timeout */
- if (amdgpu_sriov_vf(adev) &&
- !amdgpu_sriov_runtime(adev) &&
- amdgpu_virt_mmio_blocked(adev) &&
- !amdgpu_virt_wait_reset(adev)) {
- dev_err(adev->dev, "VF exclusive mode timeout\n");
- /* Don't send request since VF is inactive. */
- adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
- adev->virt.ops = NULL;
- r = -EAGAIN;
- goto release_ras_con;
- }
dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
goto release_ras_con;
@@ -3687,35 +4903,6 @@ fence_driver_init:
/* Get a log2 for easy divisions. */
adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
- amdgpu_fbdev_init(adev);
-
- r = amdgpu_pm_sysfs_init(adev);
- if (r) {
- adev->pm_sysfs_en = false;
- DRM_ERROR("registering pm debugfs failed (%d).\n", r);
- } else
- adev->pm_sysfs_en = true;
-
- r = amdgpu_ucode_sysfs_init(adev);
- if (r) {
- adev->ucode_sysfs_en = false;
- DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
- } else
- adev->ucode_sysfs_en = true;
-
- if ((amdgpu_testing & 1)) {
- if (adev->accel_working)
- amdgpu_test_moves(adev);
- else
- DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
- }
- if (amdgpu_benchmarking) {
- if (adev->accel_working)
- amdgpu_benchmark(adev, amdgpu_benchmarking);
- else
- DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
- }
-
/*
* Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
* Otherwise the mgpu fan boost feature will be skipped due to the
@@ -3726,7 +4913,7 @@ fence_driver_init:
/* enable clockgating, etc. after ib tests, etc. since some blocks require
* explicit gating rather than handling it automatically.
*/
- if (!adev->gmc.xgmi.pending_reset) {
+ if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
r = amdgpu_device_ip_late_init(adev);
if (r) {
dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
@@ -3739,12 +4926,19 @@ fence_driver_init:
msecs_to_jiffies(AMDGPU_RESUME_MS));
}
- if (amdgpu_sriov_vf(adev))
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_release_full_gpu(adev, true);
flush_delayed_work(&adev->delayed_init_work);
+ }
- r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
- if (r)
- dev_err(adev->dev, "Could not create amdgpu device attr\n");
+ if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
+ amdgpu_xgmi_reset_on_init(adev);
+ /*
+ * Place those sysfs registering after `late_init`. As some of those
+ * operations performed in `late_init` might affect the sysfs
+ * interfaces creating.
+ */
+ r = amdgpu_device_sys_interface_init(adev);
if (IS_ENABLED(CONFIG_PERF_EVENTS))
r = amdgpu_pmu_init(adev);
@@ -3757,24 +4951,45 @@ fence_driver_init:
/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
/* this will fail for cards that aren't VGA class devices, just
- * ignore it */
+ * ignore it
+ */
if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
- if (amdgpu_device_supports_px(ddev)) {
- px = true;
+ px = amdgpu_device_supports_px(adev);
+
+ if (px || (!dev_is_removable(&adev->pdev->dev) &&
+ apple_gmux_detect(NULL, NULL)))
vga_switcheroo_register_client(adev->pdev,
&amdgpu_switcheroo_ops, px);
+
+ if (px)
vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
- }
- if (adev->gmc.xgmi.pending_reset)
- queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
- msecs_to_jiffies(AMDGPU_RESUME_MS));
+ amdgpu_device_check_iommu_direct_map(adev);
+
+ adev->pm_nb.notifier_call = amdgpu_device_pm_notifier;
+ r = register_pm_notifier(&adev->pm_nb);
+ if (r)
+ goto failed;
return 0;
release_ras_con:
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_release_full_gpu(adev, true);
+
+ /* failed in exclusive mode due to timeout */
+ if (amdgpu_sriov_vf(adev) &&
+ !amdgpu_sriov_runtime(adev) &&
+ amdgpu_virt_mmio_blocked(adev) &&
+ !amdgpu_virt_wait_reset(adev)) {
+ dev_err(adev->dev, "VF exclusive mode timeout\n");
+ /* Don't send request since VF is inactive. */
+ adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
+ adev->virt.ops = NULL;
+ r = -EAGAIN;
+ }
amdgpu_release_ras_context(adev);
failed:
@@ -3785,11 +5000,12 @@ failed:
static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
{
+
/* Clear all CPU mappings pointing to this device */
unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
/* Unmap all mapped bars - Doorbell, registers and VRAM */
- amdgpu_device_doorbell_fini(adev);
+ amdgpu_doorbell_fini(adev);
iounmap(adev->rmmio);
adev->rmmio = NULL;
@@ -3798,14 +5014,14 @@ static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
adev->mman.aper_base_kaddr = NULL;
/* Memory manager related */
- if (!adev->gmc.xgmi.connected_to_cpu) {
+ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
arch_phys_wc_del(adev->gmc.vram_mtrr);
arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
}
}
/**
- * amdgpu_device_fini - tear down the driver
+ * amdgpu_device_fini_hw - tear down the driver
*
* @adev: amdgpu_device pointer
*
@@ -3816,15 +5032,16 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
{
dev_info(adev->dev, "amdgpu: finishing device.\n");
flush_delayed_work(&adev->delayed_init_work);
- if (adev->mman.initialized) {
- flush_delayed_work(&adev->mman.bdev.wq);
- ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
- }
+
+ if (adev->mman.initialized)
+ drain_workqueue(adev->mman.bdev.wq);
adev->shutdown = true;
+ unregister_pm_notifier(&adev->pm_nb);
+
/* make sure IB test finished before entering exclusive mode
* to avoid preemption on IB test
- * */
+ */
if (amdgpu_sriov_vf(adev)) {
amdgpu_virt_request_full_gpu(adev, false);
amdgpu_virt_fini_data_exchange(adev);
@@ -3832,7 +5049,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
/* disable all interrupts */
amdgpu_irq_disable_all(adev);
- if (adev->mode_info.mode_config_initialized){
+ if (adev->mode_info.mode_config_initialized) {
if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
drm_helper_force_disable_all(adev_to_drm(adev));
else
@@ -3840,58 +5057,90 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
}
amdgpu_fence_driver_hw_fini(adev);
- if (adev->pm_sysfs_en)
- amdgpu_pm_sysfs_fini(adev);
- if (adev->ucode_sysfs_en)
- amdgpu_ucode_sysfs_fini(adev);
- sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
+ amdgpu_device_sys_interface_fini(adev);
+
+ /* disable ras feature must before hw fini */
+ amdgpu_ras_pre_fini(adev);
- amdgpu_fbdev_fini(adev);
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
amdgpu_device_ip_fini_early(adev);
amdgpu_irq_fini_hw(adev);
- ttm_device_clear_dma_mappings(&adev->mman.bdev);
+ if (adev->mman.initialized)
+ ttm_device_clear_dma_mappings(&adev->mman.bdev);
amdgpu_gart_dummy_page_fini(adev);
- amdgpu_device_unmap_mmio(adev);
+ if (drm_dev_is_unplugged(adev_to_drm(adev)))
+ amdgpu_device_unmap_mmio(adev);
+
}
void amdgpu_device_fini_sw(struct amdgpu_device *adev)
{
- amdgpu_fence_driver_sw_fini(adev);
+ int i, idx;
+ bool px;
+
amdgpu_device_ip_fini(adev);
- release_firmware(adev->firmware.gpu_info_fw);
- adev->firmware.gpu_info_fw = NULL;
+ amdgpu_fence_driver_sw_fini(adev);
+ amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
adev->accel_working = false;
+ dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
+ for (i = 0; i < MAX_XCP; ++i) {
+ dma_fence_put(adev->isolation[i].spearhead);
+ amdgpu_sync_free(&adev->isolation[i].active);
+ amdgpu_sync_free(&adev->isolation[i].prev);
+ }
amdgpu_reset_fini(adev);
/* free i2c buses */
- if (!amdgpu_device_has_dc_support(adev))
- amdgpu_i2c_fini(adev);
+ amdgpu_i2c_fini(adev);
+
+ if (adev->bios) {
+ if (amdgpu_emu_mode != 1)
+ amdgpu_atombios_fini(adev);
+ amdgpu_bios_release(adev);
+ }
+
+ kfree(adev->fru_info);
+ adev->fru_info = NULL;
- if (amdgpu_emu_mode != 1)
- amdgpu_atombios_fini(adev);
+ kfree(adev->xcp_mgr);
+ adev->xcp_mgr = NULL;
- kfree(adev->bios);
- adev->bios = NULL;
- if (amdgpu_device_supports_px(adev_to_drm(adev))) {
+ px = amdgpu_device_supports_px(adev);
+
+ if (px || (!dev_is_removable(&adev->pdev->dev) &&
+ apple_gmux_detect(NULL, NULL)))
vga_switcheroo_unregister_client(adev->pdev);
+
+ if (px)
vga_switcheroo_fini_domain_pm_ops(adev->dev);
- }
+
if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
vga_client_unregister(adev->pdev);
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+
+ iounmap(adev->rmmio);
+ adev->rmmio = NULL;
+ drm_dev_exit(idx);
+ }
+
if (IS_ENABLED(CONFIG_PERF_EVENTS))
amdgpu_pmu_fini(adev);
- if (adev->mman.discovery_bin)
+ if (adev->discovery.bin)
amdgpu_discovery_fini(adev);
- kfree(adev->pci_state);
+ amdgpu_reset_put_reset_domain(adev->reset_domain);
+ adev->reset_domain = NULL;
+ kfree(adev->pci_state);
+ kfree(adev->pcie_reset_ctx.swds_pcistate);
+ kfree(adev->pcie_reset_ctx.swus_pcistate);
}
/**
@@ -3903,66 +5152,260 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
* at suspend time.
*
*/
-static void amdgpu_device_evict_resources(struct amdgpu_device *adev)
+static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
{
- /* No need to evict vram on APUs for suspend to ram */
- if (adev->in_s3 && (adev->flags & AMD_IS_APU))
- return;
+ int ret;
- if (amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM))
- DRM_WARN("evicting device resources failed\n");
+ /* No need to evict vram on APUs unless going to S4 */
+ if (!adev->in_s4 && (adev->flags & AMD_IS_APU))
+ return 0;
+
+ /* No need to evict when going to S5 through S4 callbacks */
+ if (system_state == SYSTEM_POWER_OFF)
+ return 0;
+ ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
+ if (ret) {
+ dev_warn(adev->dev, "evicting device resources failed\n");
+ return ret;
+ }
+
+ if (adev->in_s4) {
+ ret = ttm_device_prepare_hibernation(&adev->mman.bdev);
+ if (ret)
+ dev_err(adev->dev, "prepare hibernation failed, %d\n", ret);
+ }
+ return ret;
}
/*
* Suspend & resume.
*/
/**
+ * amdgpu_device_pm_notifier - Notification block for Suspend/Hibernate events
+ * @nb: notifier block
+ * @mode: suspend mode
+ * @data: data
+ *
+ * This function is called when the system is about to suspend or hibernate.
+ * It is used to set the appropriate flags so that eviction can be optimized
+ * in the pm prepare callback.
+ */
+static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
+ void *data)
+{
+ struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb);
+
+ switch (mode) {
+ case PM_HIBERNATION_PREPARE:
+ adev->in_s4 = true;
+ break;
+ case PM_POST_HIBERNATION:
+ adev->in_s4 = false;
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+/**
+ * amdgpu_device_prepare - prepare for device suspend
+ *
+ * @dev: drm dev pointer
+ *
+ * Prepare to put the hw in the suspend state (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver suspend.
+ */
+int amdgpu_device_prepare(struct drm_device *dev)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int i, r;
+
+ if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+ return 0;
+
+ /* Evict the majority of BOs before starting suspend sequence */
+ r = amdgpu_device_evict_resources(adev);
+ if (r)
+ return r;
+
+ flush_delayed_work(&adev->gfx.gfx_off_delay_work);
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
+ continue;
+ r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_complete - complete power state transition
+ *
+ * @dev: drm dev pointer
+ *
+ * Undo the changes from amdgpu_device_prepare. This will be
+ * called on all resume transitions, including those that failed.
+ */
+void amdgpu_device_complete(struct drm_device *dev)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int i;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ if (!adev->ip_blocks[i].version->funcs->complete)
+ continue;
+ adev->ip_blocks[i].version->funcs->complete(&adev->ip_blocks[i]);
+ }
+}
+
+/**
* amdgpu_device_suspend - initiate device suspend
*
* @dev: drm dev pointer
- * @fbcon : notify the fbdev of suspend
+ * @notify_clients: notify in-kernel DRM clients
*
* Puts the hw in the suspend state (all asics).
* Returns 0 for success or an error on failure.
* Called at driver suspend.
*/
-int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
+int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
{
struct amdgpu_device *adev = drm_to_adev(dev);
+ int r, rec;
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
adev->in_suspend = true;
- if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
- DRM_WARN("smart shift update failed\n");
+ if (amdgpu_sriov_vf(adev)) {
+ if (!adev->in_runpm)
+ amdgpu_amdkfd_suspend_process(adev);
+ amdgpu_virt_fini_data_exchange(adev);
+ r = amdgpu_virt_request_full_gpu(adev, false);
+ if (r)
+ return r;
+ }
- drm_kms_helper_poll_disable(dev);
+ r = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D3);
+ if (r)
+ goto unwind_sriov;
- if (fbcon)
- amdgpu_fbdev_set_suspend(adev, 1);
+ if (notify_clients)
+ drm_client_dev_suspend(adev_to_drm(adev));
cancel_delayed_work_sync(&adev->delayed_init_work);
amdgpu_ras_suspend(adev);
- amdgpu_device_ip_suspend_phase1(adev);
+ r = amdgpu_device_ip_suspend_phase1(adev);
+ if (r)
+ goto unwind_smartshift;
- if (!adev->in_s0ix)
- amdgpu_amdkfd_suspend(adev, adev->in_runpm);
+ amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
+ r = amdgpu_userq_suspend(adev);
+ if (r)
+ goto unwind_ip_phase1;
+
+ r = amdgpu_device_evict_resources(adev);
+ if (r)
+ goto unwind_userq;
- /* First evict vram memory */
- amdgpu_device_evict_resources(adev);
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
amdgpu_fence_driver_hw_fini(adev);
- amdgpu_device_ip_suspend_phase2(adev);
- /* This second call to evict device resources is to evict
- * the gart page table using the CPU.
+ r = amdgpu_device_ip_suspend_phase2(adev);
+ if (r)
+ goto unwind_evict;
+
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_release_full_gpu(adev, false);
+
+ return 0;
+
+unwind_evict:
+ if (adev->mman.buffer_funcs_ring->sched.ready)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ amdgpu_fence_driver_hw_init(adev);
+
+unwind_userq:
+ rec = amdgpu_userq_resume(adev);
+ if (rec) {
+ dev_warn(adev->dev, "failed to re-initialize user queues: %d\n", rec);
+ return r;
+ }
+ rec = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
+ if (rec) {
+ dev_warn(adev->dev, "failed to re-initialize kfd: %d\n", rec);
+ return r;
+ }
+
+unwind_ip_phase1:
+ /* suspend phase 1 = resume phase 3 */
+ rec = amdgpu_device_ip_resume_phase3(adev);
+ if (rec) {
+ dev_warn(adev->dev, "failed to re-initialize IPs phase1: %d\n", rec);
+ return r;
+ }
+
+unwind_smartshift:
+ rec = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0);
+ if (rec) {
+ dev_warn(adev->dev, "failed to re-update smart shift: %d\n", rec);
+ return r;
+ }
+
+ if (notify_clients)
+ drm_client_dev_resume(adev_to_drm(adev));
+
+ amdgpu_ras_resume(adev);
+
+unwind_sriov:
+ if (amdgpu_sriov_vf(adev)) {
+ rec = amdgpu_virt_request_full_gpu(adev, true);
+ if (rec) {
+ dev_warn(adev->dev, "failed to reinitialize sriov: %d\n", rec);
+ return r;
+ }
+ }
+
+ adev->in_suspend = adev->in_s0ix = adev->in_s3 = false;
+
+ return r;
+}
+
+static inline int amdgpu_virt_resume(struct amdgpu_device *adev)
+{
+ int r;
+ unsigned int prev_physical_node_id = adev->gmc.xgmi.physical_node_id;
+
+ /* During VM resume, QEMU programming of VF MSIX table (register GFXMSIX_VECT0_ADDR_LO)
+ * may not work. The access could be blocked by nBIF protection as VF isn't in
+ * exclusive access mode. Exclusive access is enabled now, disable/enable MSIX
+ * so that QEMU reprograms MSIX table.
*/
- amdgpu_device_evict_resources(adev);
+ amdgpu_restore_msix(adev);
+
+ r = adev->gfxhub.funcs->get_xgmi_info(adev);
+ if (r)
+ return r;
+
+ dev_info(adev->dev, "xgmi node, old id %d, new id %d\n",
+ prev_physical_node_id, adev->gmc.xgmi.physical_node_id);
+
+ adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
+ adev->vm_manager.vram_base_offset +=
+ adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
return 0;
}
@@ -3971,22 +5414,34 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
* amdgpu_device_resume - initiate device resume
*
* @dev: drm dev pointer
- * @fbcon : notify the fbdev of resume
+ * @notify_clients: notify in-kernel DRM clients
*
* Bring the hw back to operating state (all asics).
* Returns 0 for success or an error on failure.
* Called at driver resume.
*/
-int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
+int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
{
struct amdgpu_device *adev = drm_to_adev(dev);
int r = 0;
+ if (amdgpu_sriov_vf(adev)) {
+ r = amdgpu_virt_request_full_gpu(adev, true);
+ if (r)
+ return r;
+ }
+
+ if (amdgpu_virt_xgmi_migrate_enabled(adev)) {
+ r = amdgpu_virt_resume(adev);
+ if (r)
+ goto exit;
+ }
+
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
if (adev->in_s0ix)
- amdgpu_gfx_state_change_set(adev, sGpuChangeState_D0Entry);
+ amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
/* post card */
if (amdgpu_device_need_post(adev)) {
@@ -3996,58 +5451,73 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
}
r = amdgpu_device_ip_resume(adev);
+
if (r) {
dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
- return r;
+ goto exit;
}
- amdgpu_fence_driver_hw_init(adev);
+
+ r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
+ if (r)
+ goto exit;
+
+ r = amdgpu_userq_resume(adev);
+ if (r)
+ goto exit;
r = amdgpu_device_ip_late_init(adev);
if (r)
- return r;
+ goto exit;
queue_delayed_work(system_wq, &adev->delayed_init_work,
msecs_to_jiffies(AMDGPU_RESUME_MS));
+exit:
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_init_data_exchange(adev);
+ amdgpu_virt_release_full_gpu(adev, true);
- if (!adev->in_s0ix) {
- r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
- if (r)
- return r;
+ if (!r && !adev->in_runpm)
+ r = amdgpu_amdkfd_resume_process(adev);
}
+ if (r)
+ return r;
+
/* Make sure IB tests flushed */
flush_delayed_work(&adev->delayed_init_work);
- if (fbcon)
- amdgpu_fbdev_set_suspend(adev, 0);
-
- drm_kms_helper_poll_enable(dev);
+ if (notify_clients)
+ drm_client_dev_resume(adev_to_drm(adev));
amdgpu_ras_resume(adev);
- /*
- * Most of the connector probing functions try to acquire runtime pm
- * refs to ensure that the GPU is powered on when connector polling is
- * performed. Since we're calling this from a runtime PM callback,
- * trying to acquire rpm refs will cause us to deadlock.
- *
- * Since we're guaranteed to be holding the rpm lock, it's safe to
- * temporarily disable the rpm helpers so this doesn't deadlock us.
- */
+ if (adev->mode_info.num_crtc) {
+ /*
+ * Most of the connector probing functions try to acquire runtime pm
+ * refs to ensure that the GPU is powered on when connector polling is
+ * performed. Since we're calling this from a runtime PM callback,
+ * trying to acquire rpm refs will cause us to deadlock.
+ *
+ * Since we're guaranteed to be holding the rpm lock, it's safe to
+ * temporarily disable the rpm helpers so this doesn't deadlock us.
+ */
#ifdef CONFIG_PM
- dev->dev->power.disable_depth++;
+ dev->dev->power.disable_depth++;
#endif
- if (!amdgpu_device_has_dc_support(adev))
- drm_helper_hpd_irq_event(dev);
- else
- drm_kms_helper_hotplug_event(dev);
+ if (!adev->dc_enabled)
+ drm_helper_hpd_irq_event(dev);
+ else
+ drm_kms_helper_hotplug_event(dev);
#ifdef CONFIG_PM
- dev->dev->power.disable_depth--;
+ dev->dev->power.disable_depth--;
#endif
+ }
+
+ amdgpu_vram_mgr_clear_reset_blocks(adev);
adev->in_suspend = false;
- if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
- DRM_WARN("smart shift update failed\n");
+ if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0))
+ dev_warn(adev->dev, "smart shift update failed\n");
return 0;
}
@@ -4078,7 +5548,8 @@ static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].version->funcs->check_soft_reset)
adev->ip_blocks[i].status.hang =
- adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
+ adev->ip_blocks[i].version->funcs->check_soft_reset(
+ &adev->ip_blocks[i]);
if (adev->ip_blocks[i].status.hang) {
dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
asic_hang = true;
@@ -4107,7 +5578,7 @@ static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].status.hang &&
adev->ip_blocks[i].version->funcs->pre_soft_reset) {
- r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
+ r = adev->ip_blocks[i].version->funcs->pre_soft_reset(&adev->ip_blocks[i]);
if (r)
return r;
}
@@ -4169,7 +5640,7 @@ static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].status.hang &&
adev->ip_blocks[i].version->funcs->soft_reset) {
- r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
+ r = adev->ip_blocks[i].version->funcs->soft_reset(&adev->ip_blocks[i]);
if (r)
return r;
}
@@ -4198,7 +5669,7 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].status.hang &&
adev->ip_blocks[i].version->funcs->post_soft_reset)
- r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
+ r = adev->ip_blocks[i].version->funcs->post_soft_reset(&adev->ip_blocks[i]);
if (r)
return r;
}
@@ -4207,105 +5678,44 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
}
/**
- * amdgpu_device_recover_vram - Recover some VRAM contents
- *
- * @adev: amdgpu_device pointer
- *
- * Restores the contents of VRAM buffers from the shadows in GTT. Used to
- * restore things like GPUVM page tables after a GPU reset where
- * the contents of VRAM might be lost.
- *
- * Returns:
- * 0 on success, negative error code on failure.
- */
-static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
-{
- struct dma_fence *fence = NULL, *next = NULL;
- struct amdgpu_bo *shadow;
- struct amdgpu_bo_vm *vmbo;
- long r = 1, tmo;
-
- if (amdgpu_sriov_runtime(adev))
- tmo = msecs_to_jiffies(8000);
- else
- tmo = msecs_to_jiffies(100);
-
- dev_info(adev->dev, "recover vram bo from shadow start\n");
- mutex_lock(&adev->shadow_list_lock);
- list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
- shadow = &vmbo->bo;
- /* No need to recover an evicted BO */
- if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
- shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
- shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
- continue;
-
- r = amdgpu_bo_restore_shadow(shadow, &next);
- if (r)
- break;
-
- if (fence) {
- tmo = dma_fence_wait_timeout(fence, false, tmo);
- dma_fence_put(fence);
- fence = next;
- if (tmo == 0) {
- r = -ETIMEDOUT;
- break;
- } else if (tmo < 0) {
- r = tmo;
- break;
- }
- } else {
- fence = next;
- }
- }
- mutex_unlock(&adev->shadow_list_lock);
-
- if (fence)
- tmo = dma_fence_wait_timeout(fence, false, tmo);
- dma_fence_put(fence);
-
- if (r < 0 || tmo <= 0) {
- dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
- return -EIO;
- }
-
- dev_info(adev->dev, "recover vram bo from shadow done\n");
- return 0;
-}
-
-
-/**
* amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
*
* @adev: amdgpu_device pointer
- * @from_hypervisor: request from hypervisor
+ * @reset_context: amdgpu reset context pointer
*
* do VF FLR and reinitialize Asic
* return 0 means succeeded otherwise failed
*/
static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
- bool from_hypervisor)
+ struct amdgpu_reset_context *reset_context)
{
int r;
+ struct amdgpu_hive_info *hive = NULL;
- amdgpu_amdkfd_pre_reset(adev);
-
- if (from_hypervisor)
+ if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) {
+ if (!amdgpu_ras_get_fed_status(adev))
+ amdgpu_virt_ready_to_reset(adev);
+ amdgpu_virt_wait_reset(adev);
+ clear_bit(AMDGPU_HOST_FLR, &reset_context->flags);
r = amdgpu_virt_request_full_gpu(adev, true);
- else
+ } else {
r = amdgpu_virt_reset_gpu(adev);
+ }
if (r)
return r;
+ amdgpu_ras_clear_err_state(adev);
+ amdgpu_irq_gpu_reset_resume_helper(adev);
+
+ /* some sw clean up VF needs to do before recover */
+ amdgpu_virt_post_reset(adev);
+
/* Resume IP prior to SMC */
r = amdgpu_device_ip_reinit_early_sriov(adev);
if (r)
- goto error;
+ return r;
amdgpu_virt_init_data_exchange(adev);
- /* we need recover gart prior to run SMC/CP/SDMA resume */
- amdgpu_gtt_mgr_recover(ttm_manager_type(&adev->mman.bdev, TTM_PL_TT));
r = amdgpu_device_fw_loading(adev);
if (r)
@@ -4314,45 +5724,64 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
/* now we are okay to resume SMC/CP/SDMA */
r = amdgpu_device_ip_reinit_late_sriov(adev);
if (r)
- goto error;
+ return r;
+
+ hive = amdgpu_get_xgmi_hive(adev);
+ /* Update PSP FW topology after reset */
+ if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
+ r = amdgpu_xgmi_update_topology(hive, adev);
+ if (hive)
+ amdgpu_put_xgmi_hive(hive);
+ if (r)
+ return r;
- amdgpu_irq_gpu_reset_resume_helper(adev);
r = amdgpu_ib_ring_tests(adev);
- amdgpu_amdkfd_post_reset(adev);
+ if (r)
+ return r;
-error:
- if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
+ if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST)
amdgpu_inc_vram_lost(adev);
- r = amdgpu_device_recover_vram(adev);
- }
+
+ /* need to be called during full access so we can't do it later like
+ * bare-metal does.
+ */
+ amdgpu_amdkfd_post_reset(adev);
amdgpu_virt_release_full_gpu(adev, true);
- return r;
+ /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
+ amdgpu_ras_resume(adev);
+
+ amdgpu_virt_ras_telemetry_post_reset(adev);
+
+ return 0;
}
/**
- * amdgpu_device_has_job_running - check if there is any job in mirror list
+ * amdgpu_device_has_job_running - check if there is any unfinished job
*
* @adev: amdgpu_device pointer
*
- * check if there is any job in mirror list
+ * check if there is any job running on the device when guest driver receives
+ * FLR notification from host driver. If there are still jobs running, then
+ * the guest driver will not respond the FLR reset. Instead, let the job hit
+ * the timeout and guest driver then issue the reset request.
*/
bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
{
int i;
- struct drm_sched_job *job;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!amdgpu_ring_sched_ready(ring))
continue;
- spin_lock(&ring->sched.job_list_lock);
- job = list_first_entry_or_null(&ring->sched.pending_list,
- struct drm_sched_job, list);
- spin_unlock(&ring->sched.job_list_lock);
- if (job)
+ if (amdgpu_fence_count_emitted(ring))
return true;
}
return false;
@@ -4368,46 +5797,37 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
*/
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
{
- if (!amdgpu_device_ip_check_soft_reset(adev)) {
- dev_info(adev->dev, "Timeout, but no hardware hang detected.\n");
- return false;
- }
if (amdgpu_gpu_recovery == 0)
goto disabled;
+ /* Skip soft reset check in fatal error mode */
+ if (!amdgpu_ras_is_poison_mode_supported(adev))
+ return true;
+
if (amdgpu_sriov_vf(adev))
return true;
if (amdgpu_gpu_recovery == -1) {
switch (adev->asic_type) {
- case CHIP_BONAIRE:
- case CHIP_HAWAII:
- case CHIP_TOPAZ:
- case CHIP_TONGA:
- case CHIP_FIJI:
- case CHIP_POLARIS10:
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- case CHIP_VEGAM:
- case CHIP_VEGA20:
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_RAVEN:
- case CHIP_ARCTURUS:
- case CHIP_RENOIR:
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_DIMGREY_CAVEFISH:
- case CHIP_BEIGE_GOBY:
- case CHIP_VANGOGH:
- case CHIP_ALDEBARAN:
- break;
- default:
+#ifdef CONFIG_DRM_AMDGPU_SI
+ case CHIP_VERDE:
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_OLAND:
+ case CHIP_HAINAN:
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+#endif
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ case CHIP_CYAN_SKILLFISH:
goto disabled;
+ default:
+ break;
}
}
@@ -4420,88 +5840,121 @@ disabled:
int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
{
- u32 i;
- int ret = 0;
+ u32 i;
+ int ret = 0;
- amdgpu_atombios_scratch_regs_engine_hung(adev, true);
+ if (adev->bios)
+ amdgpu_atombios_scratch_regs_engine_hung(adev, true);
- dev_info(adev->dev, "GPU mode1 reset\n");
+ dev_info(adev->dev, "GPU mode1 reset\n");
- /* disable BM */
- pci_clear_master(adev->pdev);
+ /* Cache the state before bus master disable. The saved config space
+ * values are used in other cases like restore after mode-2 reset.
+ */
+ amdgpu_device_cache_pci_state(adev->pdev);
- amdgpu_device_cache_pci_state(adev->pdev);
+ /* disable BM */
+ pci_clear_master(adev->pdev);
- if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
- dev_info(adev->dev, "GPU smu mode1 reset\n");
- ret = amdgpu_dpm_mode1_reset(adev);
- } else {
- dev_info(adev->dev, "GPU psp mode1 reset\n");
- ret = psp_gpu_reset(adev);
- }
+ if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
+ dev_info(adev->dev, "GPU smu mode1 reset\n");
+ ret = amdgpu_dpm_mode1_reset(adev);
+ } else {
+ dev_info(adev->dev, "GPU psp mode1 reset\n");
+ ret = psp_gpu_reset(adev);
+ }
- if (ret)
- dev_err(adev->dev, "GPU mode1 reset failed\n");
+ if (ret)
+ goto mode1_reset_failed;
- amdgpu_device_load_pci_state(adev->pdev);
+ amdgpu_device_load_pci_state(adev->pdev);
+ ret = amdgpu_psp_wait_for_bootloader(adev);
+ if (ret)
+ goto mode1_reset_failed;
- /* wait for asic to come out of reset */
- for (i = 0; i < adev->usec_timeout; i++) {
- u32 memsize = adev->nbio.funcs->get_memsize(adev);
+ /* wait for asic to come out of reset */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ u32 memsize = adev->nbio.funcs->get_memsize(adev);
- if (memsize != 0xffffffff)
- break;
- udelay(1);
- }
+ if (memsize != 0xffffffff)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ ret = -ETIMEDOUT;
+ goto mode1_reset_failed;
+ }
+
+ if (adev->bios)
+ amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+
+ return 0;
- amdgpu_atombios_scratch_regs_engine_hung(adev, false);
- return ret;
+mode1_reset_failed:
+ dev_err(adev->dev, "GPU mode1 reset failed\n");
+ return ret;
+}
+
+int amdgpu_device_link_reset(struct amdgpu_device *adev)
+{
+ int ret = 0;
+
+ dev_info(adev->dev, "GPU link reset\n");
+
+ if (!amdgpu_reset_in_dpc(adev))
+ ret = amdgpu_dpm_link_reset(adev);
+
+ if (ret)
+ goto link_reset_failed;
+
+ ret = amdgpu_psp_wait_for_bootloader(adev);
+ if (ret)
+ goto link_reset_failed;
+
+ return 0;
+
+link_reset_failed:
+ dev_err(adev->dev, "GPU link reset failed\n");
+ return ret;
}
int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
struct amdgpu_reset_context *reset_context)
{
- int i, j, r = 0;
+ int i, r = 0;
struct amdgpu_job *job = NULL;
+ struct amdgpu_device *tmp_adev = reset_context->reset_req_dev;
bool need_full_reset =
test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
if (reset_context->reset_req_dev == adev)
job = reset_context->job;
- if (amdgpu_sriov_vf(adev)) {
- /* stop the data exchange thread */
- amdgpu_virt_fini_data_exchange(adev);
- }
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_pre_reset(adev);
+
+ amdgpu_fence_driver_isr_toggle(adev, true);
/* block all schedulers and reset given job's ring */
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!amdgpu_ring_sched_ready(ring))
continue;
- /*clear job fence from fence drv to avoid force_completion
- *leave NULL and vm flush fence in fence drv */
- for (j = 0; j <= ring->fence_drv.num_fences_mask; j++) {
- struct dma_fence *old, **ptr;
-
- ptr = &ring->fence_drv.fences[j];
- old = rcu_dereference_protected(*ptr, 1);
- if (old && test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &old->flags)) {
- RCU_INIT_POINTER(*ptr, NULL);
- }
- }
/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
amdgpu_fence_driver_force_completion(ring);
}
+ amdgpu_fence_driver_isr_toggle(adev, false);
+
if (job && job->vm)
drm_sched_increase_karma(&job->base);
r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
/* If reset handler not implemented, continue; otherwise return */
- if (r == -ENOSYS)
+ if (r == -EOPNOTSUPP)
r = 0;
else
return r;
@@ -4512,7 +5965,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
if (!need_full_reset)
need_full_reset = amdgpu_device_ip_need_full_reset(adev);
- if (!need_full_reset) {
+ if (!need_full_reset && amdgpu_gpu_recovery &&
+ amdgpu_device_ip_check_soft_reset(adev)) {
amdgpu_device_ip_pre_soft_reset(adev);
r = amdgpu_device_ip_soft_reset(adev);
amdgpu_device_ip_post_soft_reset(adev);
@@ -4522,6 +5976,16 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
}
}
+ if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
+ dev_info(tmp_adev->dev, "Dumping IP State\n");
+ /* Trigger ip dump before we reset the asic */
+ for (i = 0; i < tmp_adev->num_ip_blocks; i++)
+ if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
+ tmp_adev->ip_blocks[i].version->funcs
+ ->dump_ip_state((void *)&tmp_adev->ip_blocks[i]);
+ dev_info(tmp_adev->dev, "Dumping IP State Completed\n");
+ }
+
if (need_full_reset)
r = amdgpu_device_ip_suspend(adev);
if (need_full_reset)
@@ -4534,106 +5998,78 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
return r;
}
-int amdgpu_do_asic_reset(struct list_head *device_list_handle,
- struct amdgpu_reset_context *reset_context)
+int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
{
- struct amdgpu_device *tmp_adev = NULL;
- bool need_full_reset, skip_hw_reset, vram_lost = false;
- int r = 0;
+ struct list_head *device_list_handle;
+ bool full_reset, vram_lost = false;
+ struct amdgpu_device *tmp_adev;
+ int r, init_level;
- /* Try reset handler method first */
- tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
- reset_list);
- r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
- /* If reset handler not implemented, continue; otherwise return */
- if (r == -ENOSYS)
- r = 0;
- else
- return r;
-
- /* Reset handler not implemented, use the default method */
- need_full_reset =
- test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
- skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
-
- /*
- * ASIC reset has to be done on all XGMI hive nodes ASAP
- * to allow proper links negotiation in FW (within 1 sec)
- */
- if (!skip_hw_reset && need_full_reset) {
- list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
- /* For XGMI run all resets in parallel to speed up the process */
- if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
- tmp_adev->gmc.xgmi.pending_reset = false;
- if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
- r = -EALREADY;
- } else
- r = amdgpu_asic_reset(tmp_adev);
+ device_list_handle = reset_context->reset_device_list;
- if (r) {
- dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
- r, adev_to_drm(tmp_adev)->unique);
- break;
- }
- }
-
- /* For XGMI wait for all resets to complete before proceed */
- if (!r) {
- list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
- if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
- flush_work(&tmp_adev->xgmi_reset_work);
- r = tmp_adev->asic_reset_res;
- if (r)
- break;
- }
- }
- }
- }
+ if (!device_list_handle)
+ return -EINVAL;
- if (!r && amdgpu_ras_intr_triggered()) {
- list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
- if (tmp_adev->mmhub.ras_funcs &&
- tmp_adev->mmhub.ras_funcs->reset_ras_error_count)
- tmp_adev->mmhub.ras_funcs->reset_ras_error_count(tmp_adev);
- }
+ full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
- amdgpu_ras_intr_cleared();
- }
+ /**
+ * If it's reset on init, it's default init level, otherwise keep level
+ * as recovery level.
+ */
+ if (reset_context->method == AMD_RESET_METHOD_ON_INIT)
+ init_level = AMDGPU_INIT_LEVEL_DEFAULT;
+ else
+ init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY;
+ r = 0;
list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
- if (need_full_reset) {
+ amdgpu_set_init_level(tmp_adev, init_level);
+ if (full_reset) {
/* post card */
+ amdgpu_reset_set_dpc_status(tmp_adev, false);
+ amdgpu_ras_clear_err_state(tmp_adev);
r = amdgpu_device_asic_init(tmp_adev);
if (r) {
dev_warn(tmp_adev->dev, "asic atom init failed!");
} else {
dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
- r = amdgpu_amdkfd_resume_iommu(tmp_adev);
- if (r)
- goto out;
r = amdgpu_device_ip_resume_phase1(tmp_adev);
if (r)
goto out;
vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
+
+ if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
+ amdgpu_coredump(tmp_adev, false, vram_lost, reset_context->job);
+
if (vram_lost) {
- DRM_INFO("VRAM is lost due to GPU reset!\n");
+ dev_info(
+ tmp_adev->dev,
+ "VRAM is lost due to GPU reset!\n");
amdgpu_inc_vram_lost(tmp_adev);
}
- r = amdgpu_gtt_mgr_recover(ttm_manager_type(&tmp_adev->mman.bdev, TTM_PL_TT));
- if (r)
- goto out;
-
r = amdgpu_device_fw_loading(tmp_adev);
if (r)
return r;
+ r = amdgpu_xcp_restore_partition_mode(
+ tmp_adev->xcp_mgr);
+ if (r)
+ goto out;
+
r = amdgpu_device_ip_resume_phase2(tmp_adev);
if (r)
goto out;
+ if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
+ amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
+
+ r = amdgpu_device_ip_resume_phase3(tmp_adev);
+ if (r)
+ goto out;
+
if (vram_lost)
amdgpu_device_fill_reset_magic(tmp_adev);
@@ -4651,7 +6087,11 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
if (r)
goto out;
- amdgpu_fbdev_set_suspend(tmp_adev, 0);
+ r = amdgpu_userq_post_reset(tmp_adev, vram_lost);
+ if (r)
+ goto out;
+
+ drm_client_dev_resume(adev_to_drm(tmp_adev));
/*
* The GPU enters bad state once faulty pages
@@ -4663,7 +6103,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
* bad_page_threshold value to fix this once
* probing driver again.
*/
- if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
+ if (!amdgpu_ras_is_rma(tmp_adev)) {
/* must succeed. */
amdgpu_ras_resume(tmp_adev);
} else {
@@ -4681,44 +6121,111 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
out:
if (!r) {
+ /* IP init is complete now, set level as default */
+ amdgpu_set_init_level(tmp_adev,
+ AMDGPU_INIT_LEVEL_DEFAULT);
amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
r = amdgpu_ib_ring_tests(tmp_adev);
if (r) {
dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
- need_full_reset = true;
r = -EAGAIN;
goto end;
}
}
- if (!r)
- r = amdgpu_device_recover_vram(tmp_adev);
- else
+ if (r)
tmp_adev->asic_reset_res = r;
}
end:
- if (need_full_reset)
+ return r;
+}
+
+int amdgpu_do_asic_reset(struct list_head *device_list_handle,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+ bool need_full_reset, skip_hw_reset;
+ int r = 0;
+
+ /* Try reset handler method first */
+ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
+ reset_list);
+
+ reset_context->reset_device_list = device_list_handle;
+ r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
+ /* If reset handler not implemented, continue; otherwise return */
+ if (r == -EOPNOTSUPP)
+ r = 0;
+ else
+ return r;
+
+ /* Reset handler not implemented, use the default method */
+ need_full_reset =
+ test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
+ skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
+
+ /*
+ * ASIC reset has to be done on all XGMI hive nodes ASAP
+ * to allow proper links negotiation in FW (within 1 sec)
+ */
+ if (!skip_hw_reset && need_full_reset) {
+ list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ /* For XGMI run all resets in parallel to speed up the process */
+ if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
+ if (!queue_work(system_unbound_wq,
+ &tmp_adev->xgmi_reset_work))
+ r = -EALREADY;
+ } else
+ r = amdgpu_asic_reset(tmp_adev);
+
+ if (r) {
+ dev_err(tmp_adev->dev,
+ "ASIC reset failed with error, %d for drm dev, %s",
+ r, adev_to_drm(tmp_adev)->unique);
+ goto out;
+ }
+ }
+
+ /* For XGMI wait for all resets to complete before proceed */
+ if (!r) {
+ list_for_each_entry(tmp_adev, device_list_handle,
+ reset_list) {
+ if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
+ flush_work(&tmp_adev->xgmi_reset_work);
+ r = tmp_adev->asic_reset_res;
+ if (r)
+ break;
+ }
+ }
+ }
+ }
+
+ if (!r && amdgpu_ras_intr_triggered()) {
+ list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ amdgpu_ras_reset_error_count(tmp_adev,
+ AMDGPU_RAS_BLOCK__MMHUB);
+ }
+
+ amdgpu_ras_intr_cleared();
+ }
+
+ r = amdgpu_device_reinit_after_reset(reset_context);
+ if (r == -EAGAIN)
set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
else
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
+
+out:
return r;
}
-static bool amdgpu_device_lock_adev(struct amdgpu_device *adev,
- struct amdgpu_hive_info *hive)
+static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
{
- if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
- return false;
-
- if (hive) {
- down_write_nest_lock(&adev->reset_sem, &hive->hive_lock);
- } else {
- down_write(&adev->reset_sem);
- }
switch (amdgpu_asic_reset_method(adev)) {
case AMD_RESET_METHOD_MODE1:
+ case AMD_RESET_METHOD_LINK:
adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
break;
case AMD_RESET_METHOD_MODE2:
@@ -4728,56 +6235,12 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev,
adev->mp1_state = PP_MP1_STATE_NONE;
break;
}
-
- return true;
}
-static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
+static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
{
amdgpu_vf_error_trans_all(adev);
adev->mp1_state = PP_MP1_STATE_NONE;
- atomic_set(&adev->in_gpu_reset, 0);
- up_write(&adev->reset_sem);
-}
-
-/*
- * to lockup a list of amdgpu devices in a hive safely, if not a hive
- * with multiple nodes, it will be similar as amdgpu_device_lock_adev.
- *
- * unlock won't require roll back.
- */
-static int amdgpu_device_lock_hive_adev(struct amdgpu_device *adev, struct amdgpu_hive_info *hive)
-{
- struct amdgpu_device *tmp_adev = NULL;
-
- if (adev->gmc.xgmi.num_physical_nodes > 1) {
- if (!hive) {
- dev_err(adev->dev, "Hive is NULL while device has multiple xgmi nodes");
- return -ENODEV;
- }
- list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
- if (!amdgpu_device_lock_adev(tmp_adev, hive))
- goto roll_back;
- }
- } else if (!amdgpu_device_lock_adev(adev, hive))
- return -EAGAIN;
-
- return 0;
-roll_back:
- if (!list_is_first(&tmp_adev->gmc.xgmi.head, &hive->device_list)) {
- /*
- * if the lockup iteration break in the middle of a hive,
- * it may means there may has a race issue,
- * or a hive device locked up independently.
- * we may be in trouble and may not, so will try to roll back
- * the lock and give out a warnning.
- */
- dev_warn(tmp_adev->dev, "Hive lock iteration broke in the middle. Rolling back to unlock");
- list_for_each_entry_continue_reverse(tmp_adev, &hive->device_list, gmc.xgmi.head) {
- amdgpu_device_unlock_adev(tmp_adev);
- }
- }
- return -EAGAIN;
}
static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
@@ -4790,6 +6253,8 @@ static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
pm_runtime_enable(&(p->dev));
pm_runtime_resume(&(p->dev));
}
+
+ pci_dev_put(p);
}
static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
@@ -4828,6 +6293,7 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
if (expires < ktime_get_mono_fast_ns()) {
dev_warn(adev->dev, "failed to suspend display audio\n");
+ pci_dev_put(p);
/* TODO: abort the succeeding gpu reset? */
return -ETIMEDOUT;
}
@@ -4835,187 +6301,107 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
pm_runtime_disable(&(p->dev));
+ pci_dev_put(p);
return 0;
}
-static void amdgpu_device_recheck_guilty_jobs(
- struct amdgpu_device *adev, struct list_head *device_list_handle,
- struct amdgpu_reset_context *reset_context)
+static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
{
- int i, r = 0;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- struct amdgpu_ring *ring = adev->rings[i];
- int ret = 0;
- struct drm_sched_job *s_job;
+#if defined(CONFIG_DEBUG_FS)
+ if (!amdgpu_sriov_vf(adev))
+ cancel_work(&adev->reset_work);
+#endif
+ cancel_work(&adev->userq_reset_work);
- if (!ring || !ring->sched.thread)
- continue;
+ if (adev->kfd.dev)
+ cancel_work(&adev->kfd.reset_work);
- s_job = list_first_entry_or_null(&ring->sched.pending_list,
- struct drm_sched_job, list);
- if (s_job == NULL)
- continue;
+ if (amdgpu_sriov_vf(adev))
+ cancel_work(&adev->virt.flr_work);
- /* clear job's guilty and depend the folowing step to decide the real one */
- drm_sched_reset_karma(s_job);
- /* for the real bad job, it will be resubmitted twice, adding a dma_fence_get
- * to make sure fence is balanced */
- dma_fence_get(s_job->s_fence->parent);
- drm_sched_resubmit_jobs_ext(&ring->sched, 1);
-
- ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, ring->sched.timeout);
- if (ret == 0) { /* timeout */
- DRM_ERROR("Found the real bad job! ring:%s, job_id:%llx\n",
- ring->sched.name, s_job->id);
-
- /* set guilty */
- drm_sched_increase_karma(s_job);
-retry:
- /* do hw reset */
- if (amdgpu_sriov_vf(adev)) {
- amdgpu_virt_fini_data_exchange(adev);
- r = amdgpu_device_reset_sriov(adev, false);
- if (r)
- adev->asic_reset_res = r;
- } else {
- clear_bit(AMDGPU_SKIP_HW_RESET,
- &reset_context->flags);
- r = amdgpu_do_asic_reset(device_list_handle,
- reset_context);
- if (r && r == -EAGAIN)
- goto retry;
- }
+ if (con && adev->ras_enabled)
+ cancel_work(&con->recovery_work);
- /*
- * add reset counter so that the following
- * resubmitted job could flush vmid
- */
- atomic_inc(&adev->gpu_reset_counter);
- continue;
- }
+}
- /* got the hw fence, signal finished fence */
- atomic_dec(ring->sched.score);
- dma_fence_put(s_job->s_fence->parent);
- dma_fence_get(&s_job->s_fence->finished);
- dma_fence_signal(&s_job->s_fence->finished);
- dma_fence_put(&s_job->s_fence->finished);
+static int amdgpu_device_health_check(struct list_head *device_list_handle)
+{
+ struct amdgpu_device *tmp_adev;
+ int ret = 0;
- /* remove node from list and free the job */
- spin_lock(&ring->sched.job_list_lock);
- list_del_init(&s_job->list);
- spin_unlock(&ring->sched.job_list_lock);
- ring->sched.ops->free_job(s_job);
+ list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ ret |= amdgpu_device_bus_status_check(tmp_adev);
}
-}
-/**
- * amdgpu_device_gpu_recover - reset the asic and recover scheduler
- *
- * @adev: amdgpu_device pointer
- * @job: which job trigger hang
- *
- * Attempt to reset the GPU if it has hung (all asics).
- * Attempt to do soft-reset or full-reset and reinitialize Asic
- * Returns 0 for success or an error on failure.
- */
+ return ret;
+}
-int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
- struct amdgpu_job *job)
+static void amdgpu_device_recovery_prepare(struct amdgpu_device *adev,
+ struct list_head *device_list,
+ struct amdgpu_hive_info *hive)
{
- struct list_head device_list, *device_list_handle = NULL;
- bool job_signaled = false;
- struct amdgpu_hive_info *hive = NULL;
struct amdgpu_device *tmp_adev = NULL;
- int i, r = 0;
- bool need_emergency_restart = false;
- bool audio_suspended = false;
- int tmp_vram_lost_counter;
- struct amdgpu_reset_context reset_context;
-
- memset(&reset_context, 0, sizeof(reset_context));
-
- /*
- * Special case: RAS triggered and full reset isn't supported
- */
- need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
-
- /*
- * Flush RAM to disk so that after reboot
- * the user can read log and see why the system rebooted.
- */
- if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
- DRM_WARN("Emergency reboot.");
-
- ksys_sync_helper();
- emergency_restart();
- }
-
- dev_info(adev->dev, "GPU %s begin!\n",
- need_emergency_restart ? "jobs stop":"reset");
/*
- * Here we trylock to avoid chain of resets executing from
- * either trigger by jobs on different adevs in XGMI hive or jobs on
- * different schedulers for same device while this TO handler is running.
- * We always reset all schedulers for device and all devices for XGMI
- * hive so that should take care of them too.
+ * Build list of devices to reset.
+ * In case we are in XGMI hive mode, resort the device list
+ * to put adev in the 1st position.
*/
- hive = amdgpu_get_xgmi_hive(adev);
- if (hive) {
- if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
- DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
- job ? job->base.id : -1, hive->hive_id);
- amdgpu_put_xgmi_hive(hive);
- if (job && job->vm)
- drm_sched_increase_karma(&job->base);
- return 0;
+ if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ list_add_tail(&tmp_adev->reset_list, device_list);
+ if (adev->shutdown)
+ tmp_adev->shutdown = true;
+ if (amdgpu_reset_in_dpc(adev))
+ tmp_adev->pcie_reset_ctx.in_link_reset = true;
}
- mutex_lock(&hive->hive_lock);
+ if (!list_is_first(&adev->reset_list, device_list))
+ list_rotate_to_front(&adev->reset_list, device_list);
+ } else {
+ list_add_tail(&adev->reset_list, device_list);
}
+}
- reset_context.method = AMD_RESET_METHOD_NONE;
- reset_context.reset_req_dev = adev;
- reset_context.job = job;
- reset_context.hive = hive;
- clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+static void amdgpu_device_recovery_get_reset_lock(struct amdgpu_device *adev,
+ struct list_head *device_list)
+{
+ struct amdgpu_device *tmp_adev = NULL;
- /*
- * lock the device before we try to operate the linked list
- * if didn't get the device lock, don't touch the linked list since
- * others may iterating it.
- */
- r = amdgpu_device_lock_hive_adev(adev, hive);
- if (r) {
- dev_info(adev->dev, "Bailing on TDR for s_job:%llx, as another already in progress",
- job ? job->base.id : -1);
+ if (list_empty(device_list))
+ return;
+ tmp_adev =
+ list_first_entry(device_list, struct amdgpu_device, reset_list);
+ amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
+}
- /* even we skipped this reset, still need to set the job to guilty */
- if (job && job->vm)
- drm_sched_increase_karma(&job->base);
- goto skip_recovery;
- }
+static void amdgpu_device_recovery_put_reset_lock(struct amdgpu_device *adev,
+ struct list_head *device_list)
+{
+ struct amdgpu_device *tmp_adev = NULL;
- /*
- * Build list of devices to reset.
- * In case we are in XGMI hive mode, resort the device list
- * to put adev in the 1st position.
- */
- INIT_LIST_HEAD(&device_list);
- if (adev->gmc.xgmi.num_physical_nodes > 1) {
- list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
- list_add_tail(&tmp_adev->reset_list, &device_list);
- if (!list_is_first(&adev->reset_list, &device_list))
- list_rotate_to_front(&adev->reset_list, &device_list);
- device_list_handle = &device_list;
- } else {
- list_add_tail(&adev->reset_list, &device_list);
- device_list_handle = &device_list;
- }
+ if (list_empty(device_list))
+ return;
+ tmp_adev =
+ list_first_entry(device_list, struct amdgpu_device, reset_list);
+ amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
+}
+
+static void amdgpu_device_halt_activities(struct amdgpu_device *adev,
+ struct amdgpu_job *job,
+ struct amdgpu_reset_context *reset_context,
+ struct list_head *device_list,
+ struct amdgpu_hive_info *hive,
+ bool need_emergency_restart)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+ int i;
/* block all schedulers and reset given job's ring */
- list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
+ amdgpu_device_set_mp1_state(tmp_adev);
+
/*
* Try to put the audio codec into suspend state
* before gpu reset started.
@@ -5027,32 +6413,33 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
* some audio codec errors.
*/
if (!amdgpu_device_suspend_display_audio(tmp_adev))
- audio_suspended = true;
+ tmp_adev->pcie_reset_ctx.audio_suspended = true;
amdgpu_ras_set_error_query_ready(tmp_adev, false);
cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
- if (!amdgpu_sriov_vf(tmp_adev))
- amdgpu_amdkfd_pre_reset(tmp_adev);
+ amdgpu_amdkfd_pre_reset(tmp_adev, reset_context);
/*
- * Mark these ASICs to be reseted as untracked first
+ * Mark these ASICs to be reset as untracked first
* And add them back after reset completed
*/
amdgpu_unregister_gpu_instance(tmp_adev);
- amdgpu_fbdev_set_suspend(tmp_adev, 1);
+ drm_client_dev_suspend(adev_to_drm(tmp_adev));
/* disable ras on ALL IPs */
- if (!need_emergency_restart &&
- amdgpu_device_ip_need_full_reset(tmp_adev))
+ if (!need_emergency_restart && !amdgpu_reset_in_dpc(adev) &&
+ amdgpu_device_ip_need_full_reset(tmp_adev))
amdgpu_ras_suspend(tmp_adev);
+ amdgpu_userq_pre_reset(tmp_adev);
+
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = tmp_adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!amdgpu_ring_sched_ready(ring))
continue;
drm_sched_stop(&ring->sched, job ? &job->base : NULL);
@@ -5062,26 +6449,19 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
}
atomic_inc(&tmp_adev->gpu_reset_counter);
}
+}
- if (need_emergency_restart)
- goto skip_sched_resume;
-
- /*
- * Must check guilty signal here since after this point all old
- * HW fences are force signaled.
- *
- * job->base holds a reference to parent fence
- */
- if (job && job->base.s_fence->parent &&
- dma_fence_is_signaled(job->base.s_fence->parent)) {
- job_signaled = true;
- dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
- goto skip_hw_reset;
- }
+static int amdgpu_device_asic_reset(struct amdgpu_device *adev,
+ struct list_head *device_list,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+ int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
+ int r = 0;
retry: /* Rest of adevs pre asic reset from XGMI hive. */
- list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
- r = amdgpu_device_pre_asic_reset(tmp_adev, &reset_context);
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
+ r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
/*TODO Should we stop ?*/
if (r) {
dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
@@ -5090,68 +6470,107 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
}
}
- tmp_vram_lost_counter = atomic_read(&((adev)->vram_lost_counter));
/* Actual ASIC resets if needed.*/
/* Host driver will handle XGMI hive reset for SRIOV */
if (amdgpu_sriov_vf(adev)) {
- r = amdgpu_device_reset_sriov(adev, job ? false : true);
+
+ /* Bail out of reset early */
+ if (amdgpu_ras_is_rma(adev))
+ return -ENODEV;
+
+ if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) {
+ dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n");
+ amdgpu_ras_set_fed(adev, true);
+ set_bit(AMDGPU_HOST_FLR, &reset_context->flags);
+ }
+
+ r = amdgpu_device_reset_sriov(adev, reset_context);
+ if (AMDGPU_RETRY_SRIOV_RESET(r) && (retry_limit--) > 0) {
+ amdgpu_virt_release_full_gpu(adev, true);
+ goto retry;
+ }
if (r)
adev->asic_reset_res = r;
} else {
- r = amdgpu_do_asic_reset(device_list_handle, &reset_context);
+ r = amdgpu_do_asic_reset(device_list, reset_context);
if (r && r == -EAGAIN)
goto retry;
}
-skip_hw_reset:
-
- /* Post ASIC reset for all devs .*/
- list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
-
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
/*
- * Sometimes a later bad compute job can block a good gfx job as gfx
- * and compute ring share internal GC HW mutually. We add an additional
- * guilty jobs recheck step to find the real guilty job, it synchronously
- * submits and pends for the first job being signaled. If it gets timeout,
- * we identify it as a real guilty job.
+ * Drop any pending non scheduler resets queued before reset is done.
+ * Any reset scheduled after this point would be valid. Scheduler resets
+ * were already dropped during drm_sched_stop and no new ones can come
+ * in before drm_sched_start.
*/
- if (amdgpu_gpu_recovery == 2 &&
- !(tmp_vram_lost_counter < atomic_read(&adev->vram_lost_counter)))
- amdgpu_device_recheck_guilty_jobs(
- tmp_adev, device_list_handle, &reset_context);
+ amdgpu_device_stop_pending_resets(tmp_adev);
+ }
+
+ return r;
+}
+
+static int amdgpu_device_sched_resume(struct list_head *device_list,
+ struct amdgpu_reset_context *reset_context,
+ bool job_signaled)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+ int i, r = 0;
+
+ /* Post ASIC reset for all devs .*/
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = tmp_adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!amdgpu_ring_sched_ready(ring))
continue;
- /* No point to resubmit jobs if we didn't HW reset*/
- if (!tmp_adev->asic_reset_res && !job_signaled)
- drm_sched_resubmit_jobs(&ring->sched);
-
- drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
+ drm_sched_start(&ring->sched, 0);
}
- if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
+ if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
- }
-
- tmp_adev->asic_reset_res = 0;
- if (r) {
- /* bad news, how to tell it to userspace ? */
- dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
- amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
+ if (tmp_adev->asic_reset_res) {
+ /* bad news, how to tell it to userspace ?
+ * for ras error, we should report GPU bad status instead of
+ * reset failure
+ */
+ if (reset_context->src != AMDGPU_RESET_SRC_RAS ||
+ !amdgpu_ras_eeprom_check_err_threshold(tmp_adev))
+ dev_info(
+ tmp_adev->dev,
+ "GPU reset(%d) failed with error %d \n",
+ atomic_read(
+ &tmp_adev->gpu_reset_counter),
+ tmp_adev->asic_reset_res);
+ amdgpu_vf_error_put(tmp_adev,
+ AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0,
+ tmp_adev->asic_reset_res);
+ if (!r)
+ r = tmp_adev->asic_reset_res;
+ tmp_adev->asic_reset_res = 0;
} else {
- dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
- if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
- DRM_WARN("smart shift update failed\n");
+ dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n",
+ atomic_read(&tmp_adev->gpu_reset_counter));
+ if (amdgpu_acpi_smart_shift_update(tmp_adev,
+ AMDGPU_SS_DEV_D0))
+ dev_warn(tmp_adev->dev,
+ "smart shift update failed\n");
}
}
-skip_sched_resume:
- list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ return r;
+}
+
+static void amdgpu_device_gpu_resume(struct amdgpu_device *adev,
+ struct list_head *device_list,
+ bool need_emergency_restart)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
/* unlock kfd: SRIOV would do it separately */
if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
amdgpu_amdkfd_post_reset(tmp_adev);
@@ -5162,37 +6581,239 @@ skip_sched_resume:
if (!adev->kfd.init_complete)
amdgpu_amdkfd_device_init(adev);
- if (audio_suspended)
+ if (tmp_adev->pcie_reset_ctx.audio_suspended)
amdgpu_device_resume_display_audio(tmp_adev);
- amdgpu_device_unlock_adev(tmp_adev);
+
+ amdgpu_device_unset_mp1_state(tmp_adev);
+
+ amdgpu_ras_set_error_query_ready(tmp_adev, true);
+
+ }
+}
+
+
+/**
+ * amdgpu_device_gpu_recover - reset the asic and recover scheduler
+ *
+ * @adev: amdgpu_device pointer
+ * @job: which job trigger hang
+ * @reset_context: amdgpu reset context pointer
+ *
+ * Attempt to reset the GPU if it has hung (all asics).
+ * Attempt to do soft-reset or full-reset and reinitialize Asic
+ * Returns 0 for success or an error on failure.
+ */
+
+int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+ struct amdgpu_job *job,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct list_head device_list;
+ bool job_signaled = false;
+ struct amdgpu_hive_info *hive = NULL;
+ int r = 0;
+ bool need_emergency_restart = false;
+
+ /*
+ * If it reaches here because of hang/timeout and a RAS error is
+ * detected at the same time, let RAS recovery take care of it.
+ */
+ if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) &&
+ !amdgpu_sriov_vf(adev) &&
+ reset_context->src != AMDGPU_RESET_SRC_RAS) {
+ dev_dbg(adev->dev,
+ "Gpu recovery from source: %d yielding to RAS error recovery handling",
+ reset_context->src);
+ return 0;
}
-skip_recovery:
+ /*
+ * Special case: RAS triggered and full reset isn't supported
+ */
+ need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
+
+ /*
+ * Flush RAM to disk so that after reboot
+ * the user can read log and see why the system rebooted.
+ */
+ if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
+ amdgpu_ras_get_context(adev)->reboot) {
+ dev_warn(adev->dev, "Emergency reboot.");
+
+ ksys_sync_helper();
+ emergency_restart();
+ }
+
+ dev_info(adev->dev, "GPU %s begin!. Source: %d\n",
+ need_emergency_restart ? "jobs stop" : "reset",
+ reset_context->src);
+
+ if (!amdgpu_sriov_vf(adev))
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive)
+ mutex_lock(&hive->hive_lock);
+
+ reset_context->job = job;
+ reset_context->hive = hive;
+ INIT_LIST_HEAD(&device_list);
+
+ amdgpu_device_recovery_prepare(adev, &device_list, hive);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ r = amdgpu_device_health_check(&device_list);
+ if (r)
+ goto end_reset;
+ }
+
+ /* Cannot be called after locking reset domain */
+ amdgpu_ras_pre_reset(adev, &device_list);
+
+ /* We need to lock reset domain only once both for XGMI and single device */
+ amdgpu_device_recovery_get_reset_lock(adev, &device_list);
+
+ amdgpu_device_halt_activities(adev, job, reset_context, &device_list,
+ hive, need_emergency_restart);
+ if (need_emergency_restart)
+ goto skip_sched_resume;
+ /*
+ * Must check guilty signal here since after this point all old
+ * HW fences are force signaled.
+ *
+ * job->base holds a reference to parent fence
+ */
+ if (job && dma_fence_is_signaled(&job->hw_fence->base)) {
+ job_signaled = true;
+ dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
+ goto skip_hw_reset;
+ }
+
+ r = amdgpu_device_asic_reset(adev, &device_list, reset_context);
+ if (r)
+ goto reset_unlock;
+skip_hw_reset:
+ r = amdgpu_device_sched_resume(&device_list, reset_context, job_signaled);
+ if (r)
+ goto reset_unlock;
+skip_sched_resume:
+ amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart);
+reset_unlock:
+ amdgpu_device_recovery_put_reset_lock(adev, &device_list);
+ amdgpu_ras_post_reset(adev, &device_list);
+end_reset:
if (hive) {
- atomic_set(&hive->in_reset, 0);
mutex_unlock(&hive->hive_lock);
amdgpu_put_xgmi_hive(hive);
}
- if (r && r != -EAGAIN)
+ if (r)
dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
+
+ atomic_set(&adev->reset_domain->reset_res, r);
+
+ if (!r) {
+ struct amdgpu_task_info *ti = NULL;
+
+ if (job)
+ ti = amdgpu_vm_get_task_info_pasid(adev, job->pasid);
+
+ drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE,
+ ti ? &ti->task : NULL);
+
+ amdgpu_vm_put_task_info(ti);
+ }
+
return r;
}
/**
+ * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
+ *
+ * @adev: amdgpu_device pointer
+ * @speed: pointer to the speed of the link
+ * @width: pointer to the width of the link
+ *
+ * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
+ * first physical partner to an AMD dGPU.
+ * This will exclude any virtual switches and links.
+ */
+static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
+ enum pci_bus_speed *speed,
+ enum pcie_link_width *width)
+{
+ struct pci_dev *parent = adev->pdev;
+
+ if (!speed || !width)
+ return;
+
+ *speed = PCI_SPEED_UNKNOWN;
+ *width = PCIE_LNK_WIDTH_UNKNOWN;
+
+ if (amdgpu_device_pcie_dynamic_switching_supported(adev)) {
+ while ((parent = pci_upstream_bridge(parent))) {
+ /* skip upstream/downstream switches internal to dGPU*/
+ if (parent->vendor == PCI_VENDOR_ID_ATI)
+ continue;
+ *speed = pcie_get_speed_cap(parent);
+ *width = pcie_get_width_cap(parent);
+ break;
+ }
+ } else {
+ /* use the current speeds rather than max if switching is not supported */
+ pcie_bandwidth_available(adev->pdev, NULL, speed, width);
+ }
+}
+
+/**
+ * amdgpu_device_gpu_bandwidth - find the bandwidth of the GPU
+ *
+ * @adev: amdgpu_device pointer
+ * @speed: pointer to the speed of the link
+ * @width: pointer to the width of the link
+ *
+ * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
+ * AMD dGPU which may be a virtual upstream bridge.
+ */
+static void amdgpu_device_gpu_bandwidth(struct amdgpu_device *adev,
+ enum pci_bus_speed *speed,
+ enum pcie_link_width *width)
+{
+ struct pci_dev *parent = adev->pdev;
+
+ if (!speed || !width)
+ return;
+
+ parent = pci_upstream_bridge(parent);
+ if (parent && parent->vendor == PCI_VENDOR_ID_ATI) {
+ /* use the upstream/downstream switches internal to dGPU */
+ *speed = pcie_get_speed_cap(parent);
+ *width = pcie_get_width_cap(parent);
+ while ((parent = pci_upstream_bridge(parent))) {
+ if (parent->vendor == PCI_VENDOR_ID_ATI) {
+ /* use the upstream/downstream switches internal to dGPU */
+ *speed = pcie_get_speed_cap(parent);
+ *width = pcie_get_width_cap(parent);
+ }
+ }
+ } else {
+ /* use the device itself */
+ *speed = pcie_get_speed_cap(adev->pdev);
+ *width = pcie_get_width_cap(adev->pdev);
+ }
+}
+
+/**
* amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
*
* @adev: amdgpu_device pointer
*
- * Fetchs and stores in the driver the PCIE capabilities (gen speed
+ * Fetches and stores in the driver the PCIE capabilities (gen speed
* and lanes) of the slot the device is in. Handles APUs and
* virtualized environments where PCIE config space may not be available.
*/
static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
{
- struct pci_dev *pdev;
enum pci_bus_speed speed_cap, platform_speed_cap;
- enum pcie_link_width platform_link_width;
+ enum pcie_link_width platform_link_width, link_width;
if (amdgpu_pcie_gen_cap)
adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
@@ -5201,7 +6822,7 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
/* covers APUs as well */
- if (pci_is_root_bus(adev->pdev->bus)) {
+ if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
if (adev->pm.pcie_gen_mask == 0)
adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
if (adev->pm.pcie_mlw_mask == 0)
@@ -5212,13 +6833,12 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
return;
- pcie_bandwidth_available(adev->pdev, NULL,
- &platform_speed_cap, &platform_link_width);
+ amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
+ &platform_link_width);
+ amdgpu_device_gpu_bandwidth(adev, &speed_cap, &link_width);
if (adev->pm.pcie_gen_mask == 0) {
/* asic caps */
- pdev = adev->pdev;
- speed_cap = pcie_get_speed_cap(pdev);
if (speed_cap == PCI_SPEED_UNKNOWN) {
adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
@@ -5274,51 +6894,103 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
}
}
if (adev->pm.pcie_mlw_mask == 0) {
+ /* asic caps */
+ if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
+ adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_ASIC_PCIE_MLW_MASK;
+ } else {
+ switch (link_width) {
+ case PCIE_LNK_X32:
+ adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X16:
+ adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X12:
+ adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X8:
+ adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X4:
+ adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X2:
+ adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X1:
+ adev->pm.pcie_mlw_mask |= CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1;
+ break;
+ default:
+ break;
+ }
+ }
+ /* platform caps */
if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
} else {
switch (platform_link_width) {
case PCIE_LNK_X32:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
case PCIE_LNK_X16:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
case PCIE_LNK_X12:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
case PCIE_LNK_X8:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
case PCIE_LNK_X4:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
case PCIE_LNK_X2:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
case PCIE_LNK_X1:
- adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
+ adev->pm.pcie_mlw_mask |= CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
break;
default:
break;
@@ -5327,12 +6999,51 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
}
}
-int amdgpu_device_baco_enter(struct drm_device *dev)
+/**
+ * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
+ *
+ * @adev: amdgpu_device pointer
+ * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
+ *
+ * Return true if @peer_adev can access (DMA) @adev through the PCIe
+ * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
+ * @peer_adev.
+ */
+bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
+ struct amdgpu_device *peer_adev)
+{
+#ifdef CONFIG_HSA_AMD_P2P
+ bool p2p_access =
+ !adev->gmc.xgmi.connected_to_cpu &&
+ !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
+ if (!p2p_access)
+ dev_info(adev->dev, "PCIe P2P access from peer device %s is not supported by the chipset\n",
+ pci_name(peer_adev->pdev));
+
+ bool is_large_bar = adev->gmc.visible_vram_size &&
+ adev->gmc.real_vram_size == adev->gmc.visible_vram_size;
+ bool p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev);
+
+ if (!p2p_addressable) {
+ uint64_t address_mask = peer_adev->dev->dma_mask ?
+ ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
+ resource_size_t aper_limit =
+ adev->gmc.aper_base + adev->gmc.aper_size - 1;
+
+ p2p_addressable = !(adev->gmc.aper_base & address_mask ||
+ aper_limit & address_mask);
+ }
+ return pcie_p2p && is_large_bar && p2p_access && p2p_addressable;
+#else
+ return false;
+#endif
+}
+
+int amdgpu_device_baco_enter(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
- if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
+ if (!amdgpu_device_supports_baco(adev))
return -ENOTSUPP;
if (ras && adev->ras_enabled &&
@@ -5342,13 +7053,12 @@ int amdgpu_device_baco_enter(struct drm_device *dev)
return amdgpu_dpm_baco_enter(adev);
}
-int amdgpu_device_baco_exit(struct drm_device *dev)
+int amdgpu_device_baco_exit(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
int ret = 0;
- if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
+ if (!amdgpu_device_supports_baco(adev))
return -ENOTSUPP;
ret = amdgpu_dpm_baco_exit(adev);
@@ -5359,27 +7069,13 @@ int amdgpu_device_baco_exit(struct drm_device *dev)
adev->nbio.funcs->enable_doorbell_interrupt)
adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
- if (amdgpu_passthrough(adev) &&
+ if (amdgpu_passthrough(adev) && adev->nbio.funcs &&
adev->nbio.funcs->clear_doorbell_interrupt)
adev->nbio.funcs->clear_doorbell_interrupt(adev);
return 0;
}
-static void amdgpu_cancel_all_tdr(struct amdgpu_device *adev)
-{
- int i;
-
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- struct amdgpu_ring *ring = adev->rings[i];
-
- if (!ring || !ring->sched.thread)
- continue;
-
- cancel_delayed_work_sync(&ring->sched.work_tdr);
- }
-}
-
/**
* amdgpu_pci_error_detected - Called when a PCI error is detected.
* @pdev: PCI device struct
@@ -5393,48 +7089,52 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
{
struct drm_device *dev = pci_get_drvdata(pdev);
struct amdgpu_device *adev = drm_to_adev(dev);
- int i;
-
- DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
+ struct amdgpu_hive_info *hive __free(xgmi_put_hive) =
+ amdgpu_get_xgmi_hive(adev);
+ struct amdgpu_reset_context reset_context;
+ struct list_head device_list;
- if (adev->gmc.xgmi.num_physical_nodes > 1) {
- DRM_WARN("No support for XGMI hive yet...");
- return PCI_ERS_RESULT_DISCONNECT;
- }
+ dev_info(adev->dev, "PCI error: detected callback!!\n");
adev->pci_channel_state = state;
switch (state) {
case pci_channel_io_normal:
+ dev_info(adev->dev, "pci_channel_io_normal: state(%d)!!\n", state);
return PCI_ERS_RESULT_CAN_RECOVER;
- /* Fatal error, prepare for slot reset */
case pci_channel_io_frozen:
- /*
- * Cancel and wait for all TDRs in progress if failing to
- * set adev->in_gpu_reset in amdgpu_device_lock_adev
- *
- * Locking adev->reset_sem will prevent any external access
- * to GPU during PCI error recovery
- */
- while (!amdgpu_device_lock_adev(adev, NULL))
- amdgpu_cancel_all_tdr(adev);
-
- /*
- * Block any work scheduling as we do for regular GPU reset
- * for the duration of the recovery
- */
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- struct amdgpu_ring *ring = adev->rings[i];
-
- if (!ring || !ring->sched.thread)
- continue;
+ /* Fatal error, prepare for slot reset */
+ dev_info(adev->dev, "pci_channel_io_frozen: state(%d)!!\n", state);
+ if (hive) {
+ /* Hive devices should be able to support FW based
+ * link reset on other devices, if not return.
+ */
+ if (!amdgpu_dpm_is_link_reset_supported(adev)) {
+ dev_warn(adev->dev,
+ "No support for XGMI hive yet...\n");
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+ /* Set dpc status only if device is part of hive
+ * Non-hive devices should be able to recover after
+ * link reset.
+ */
+ amdgpu_reset_set_dpc_status(adev, true);
- drm_sched_stop(&ring->sched, NULL);
+ mutex_lock(&hive->hive_lock);
}
- atomic_inc(&adev->gpu_reset_counter);
+ memset(&reset_context, 0, sizeof(reset_context));
+ INIT_LIST_HEAD(&device_list);
+
+ amdgpu_device_recovery_prepare(adev, &device_list, hive);
+ amdgpu_device_recovery_get_reset_lock(adev, &device_list);
+ amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list,
+ hive, false);
+ if (hive)
+ mutex_unlock(&hive->hive_lock);
return PCI_ERS_RESULT_NEED_RESET;
case pci_channel_io_perm_failure:
/* Permanent error, prepare for device removal */
+ dev_info(adev->dev, "pci_channel_io_perm_failure: state(%d)!!\n", state);
return PCI_ERS_RESULT_DISCONNECT;
}
@@ -5447,8 +7147,10 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
*/
pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
{
+ struct drm_device *dev = pci_get_drvdata(pdev);
+ struct amdgpu_device *adev = drm_to_adev(dev);
- DRM_INFO("PCI error: mmio enabled callback!!\n");
+ dev_info(adev->dev, "PCI error: mmio enabled callback!!\n");
/* TODO - dump whatever for debugging purposes */
@@ -5472,21 +7174,38 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
{
struct drm_device *dev = pci_get_drvdata(pdev);
struct amdgpu_device *adev = drm_to_adev(dev);
- int r, i;
struct amdgpu_reset_context reset_context;
- u32 memsize;
+ struct amdgpu_device *tmp_adev;
+ struct amdgpu_hive_info *hive;
struct list_head device_list;
+ struct pci_dev *link_dev;
+ int r = 0, i, timeout;
+ u32 memsize;
+ u16 status;
- DRM_INFO("PCI error: slot reset callback!!\n");
+ dev_info(adev->dev, "PCI error: slot reset callback!!\n");
memset(&reset_context, 0, sizeof(reset_context));
- INIT_LIST_HEAD(&device_list);
- list_add_tail(&adev->reset_list, &device_list);
-
- /* wait for asic to come out of reset */
- msleep(500);
+ if (adev->pcie_reset_ctx.swus)
+ link_dev = adev->pcie_reset_ctx.swus;
+ else
+ link_dev = adev->pdev;
+ /* wait for asic to come out of reset, timeout = 10s */
+ timeout = 10000;
+ do {
+ usleep_range(10000, 10500);
+ r = pci_read_config_word(link_dev, PCI_VENDOR_ID, &status);
+ timeout -= 10;
+ } while (timeout > 0 && (status != PCI_VENDOR_ID_ATI) &&
+ (status != PCI_VENDOR_ID_AMD));
+
+ if ((status != PCI_VENDOR_ID_ATI) && (status != PCI_VENDOR_ID_AMD)) {
+ r = -ETIME;
+ goto out;
+ }
+ amdgpu_device_load_switch_state(adev);
/* Restore PCI confspace */
amdgpu_device_load_pci_state(pdev);
@@ -5506,25 +7225,40 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
- set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
-
- adev->no_hw_access = true;
- r = amdgpu_device_pre_asic_reset(adev, &reset_context);
- adev->no_hw_access = false;
- if (r)
- goto out;
+ set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
+ INIT_LIST_HEAD(&device_list);
- r = amdgpu_do_asic_reset(&device_list, &reset_context);
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive) {
+ mutex_lock(&hive->hive_lock);
+ reset_context.hive = hive;
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ tmp_adev->pcie_reset_ctx.in_link_reset = true;
+ list_add_tail(&tmp_adev->reset_list, &device_list);
+ }
+ } else {
+ set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
+ list_add_tail(&adev->reset_list, &device_list);
+ }
+ r = amdgpu_device_asic_reset(adev, &device_list, &reset_context);
out:
if (!r) {
if (amdgpu_device_cache_pci_state(adev->pdev))
pci_restore_state(adev->pdev);
-
- DRM_INFO("PCIe error recovery succeeded\n");
+ dev_info(adev->dev, "PCIe error recovery succeeded\n");
} else {
- DRM_ERROR("PCIe error recovery failed, err:%d", r);
- amdgpu_device_unlock_adev(adev);
+ dev_err(adev->dev, "PCIe error recovery failed, err:%d\n", r);
+ if (hive) {
+ list_for_each_entry(tmp_adev, &device_list, reset_list)
+ amdgpu_device_unset_mp1_state(tmp_adev);
+ }
+ amdgpu_device_recovery_put_reset_lock(adev, &device_list);
+ }
+
+ if (hive) {
+ mutex_unlock(&hive->hive_lock);
+ amdgpu_put_xgmi_hive(hive);
}
return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
@@ -5541,27 +7275,95 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
{
struct drm_device *dev = pci_get_drvdata(pdev);
struct amdgpu_device *adev = drm_to_adev(dev);
- int i;
-
+ struct list_head device_list;
+ struct amdgpu_hive_info *hive = NULL;
+ struct amdgpu_device *tmp_adev = NULL;
- DRM_INFO("PCI error: resume callback!!\n");
+ dev_info(adev->dev, "PCI error: resume callback!!\n");
/* Only continue execution for the case of pci_channel_io_frozen */
if (adev->pci_channel_state != pci_channel_io_frozen)
return;
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- struct amdgpu_ring *ring = adev->rings[i];
+ INIT_LIST_HEAD(&device_list);
- if (!ring || !ring->sched.thread)
- continue;
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive) {
+ mutex_lock(&hive->hive_lock);
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ tmp_adev->pcie_reset_ctx.in_link_reset = false;
+ list_add_tail(&tmp_adev->reset_list, &device_list);
+ }
+ } else
+ list_add_tail(&adev->reset_list, &device_list);
+ amdgpu_device_sched_resume(&device_list, NULL, NULL);
+ amdgpu_device_gpu_resume(adev, &device_list, false);
+ amdgpu_device_recovery_put_reset_lock(adev, &device_list);
- drm_sched_resubmit_jobs(&ring->sched);
- drm_sched_start(&ring->sched, true);
+ if (hive) {
+ mutex_unlock(&hive->hive_lock);
+ amdgpu_put_xgmi_hive(hive);
}
+}
+
+static void amdgpu_device_cache_switch_state(struct amdgpu_device *adev)
+{
+ struct pci_dev *swus, *swds;
+ int r;
- amdgpu_device_unlock_adev(adev);
+ swds = pci_upstream_bridge(adev->pdev);
+ if (!swds || swds->vendor != PCI_VENDOR_ID_ATI ||
+ pci_pcie_type(swds) != PCI_EXP_TYPE_DOWNSTREAM)
+ return;
+ swus = pci_upstream_bridge(swds);
+ if (!swus ||
+ (swus->vendor != PCI_VENDOR_ID_ATI &&
+ swus->vendor != PCI_VENDOR_ID_AMD) ||
+ pci_pcie_type(swus) != PCI_EXP_TYPE_UPSTREAM)
+ return;
+
+ /* If already saved, return */
+ if (adev->pcie_reset_ctx.swus)
+ return;
+ /* Upstream bridge is ATI, assume it's SWUS/DS architecture */
+ r = pci_save_state(swds);
+ if (r)
+ return;
+ adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(swds);
+
+ r = pci_save_state(swus);
+ if (r)
+ return;
+ adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(swus);
+
+ adev->pcie_reset_ctx.swus = swus;
+}
+
+static void amdgpu_device_load_switch_state(struct amdgpu_device *adev)
+{
+ struct pci_dev *pdev;
+ int r;
+
+ if (!adev->pcie_reset_ctx.swds_pcistate ||
+ !adev->pcie_reset_ctx.swus_pcistate)
+ return;
+
+ pdev = adev->pcie_reset_ctx.swus;
+ r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swus_pcistate);
+ if (!r) {
+ pci_restore_state(pdev);
+ } else {
+ dev_warn(adev->dev, "Failed to load SWUS state, err:%d\n", r);
+ return;
+ }
+
+ pdev = pci_upstream_bridge(adev->pdev);
+ r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swds_pcistate);
+ if (!r)
+ pci_restore_state(pdev);
+ else
+ dev_warn(adev->dev, "Failed to load SWDS state, err:%d\n", r);
}
bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
@@ -5570,6 +7372,9 @@ bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
struct amdgpu_device *adev = drm_to_adev(dev);
int r;
+ if (amdgpu_sriov_vf(adev))
+ return false;
+
r = pci_save_state(pdev);
if (!r) {
kfree(adev->pci_state);
@@ -5577,14 +7382,16 @@ bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
adev->pci_state = pci_store_saved_state(pdev);
if (!adev->pci_state) {
- DRM_ERROR("Failed to store PCI saved state");
+ dev_err(adev->dev, "Failed to store PCI saved state");
return false;
}
} else {
- DRM_WARN("Failed to save PCI state, err:%d\n", r);
+ dev_warn(adev->dev, "Failed to save PCI state, err:%d\n", r);
return false;
}
+ amdgpu_device_cache_switch_state(adev);
+
return true;
}
@@ -5602,7 +7409,7 @@ bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
if (!r) {
pci_restore_state(pdev);
} else {
- DRM_WARN("Failed to load PCI state, err:%d\n", r);
+ dev_warn(adev->dev, "Failed to load PCI state, err:%d\n", r);
return false;
}
@@ -5613,27 +7420,418 @@ void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
#ifdef CONFIG_X86_64
- if (adev->flags & AMD_IS_APU)
+ if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
return;
#endif
if (adev->gmc.xgmi.connected_to_cpu)
return;
- if (ring && ring->funcs->emit_hdp_flush)
+ if (ring && ring->funcs->emit_hdp_flush) {
amdgpu_ring_emit_hdp_flush(ring);
- else
- amdgpu_asic_flush_hdp(adev, ring);
+ return;
+ }
+
+ if (!ring && amdgpu_sriov_runtime(adev)) {
+ if (!amdgpu_kiq_hdp_flush(adev))
+ return;
+ }
+
+ amdgpu_hdp_flush(adev, ring);
}
void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
#ifdef CONFIG_X86_64
- if (adev->flags & AMD_IS_APU)
+ if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
return;
#endif
if (adev->gmc.xgmi.connected_to_cpu)
return;
- amdgpu_asic_invalidate_hdp(adev, ring);
+ amdgpu_hdp_invalidate(adev, ring);
+}
+
+int amdgpu_in_reset(struct amdgpu_device *adev)
+{
+ return atomic_read(&adev->reset_domain->in_gpu_reset);
+}
+
+/**
+ * amdgpu_device_halt() - bring hardware to some kind of halt state
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Bring hardware to some kind of halt state so that no one can touch it
+ * any more. It will help to maintain error context when error occurred.
+ * Compare to a simple hang, the system will keep stable at least for SSH
+ * access. Then it should be trivial to inspect the hardware state and
+ * see what's going on. Implemented as following:
+ *
+ * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
+ * clears all CPU mappings to device, disallows remappings through page faults
+ * 2. amdgpu_irq_disable_all() disables all interrupts
+ * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
+ * 4. set adev->no_hw_access to avoid potential crashes after setp 5
+ * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
+ * 6. pci_disable_device() and pci_wait_for_pending_transaction()
+ * flush any in flight DMA operations
+ */
+void amdgpu_device_halt(struct amdgpu_device *adev)
+{
+ struct pci_dev *pdev = adev->pdev;
+ struct drm_device *ddev = adev_to_drm(adev);
+
+ amdgpu_xcp_dev_unplug(adev);
+ drm_dev_unplug(ddev);
+
+ amdgpu_irq_disable_all(adev);
+
+ amdgpu_fence_driver_hw_fini(adev);
+
+ adev->no_hw_access = true;
+
+ amdgpu_device_unmap_mmio(adev);
+
+ pci_disable_device(pdev);
+ pci_wait_for_pending_transaction(pdev);
+}
+
+u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
+ u32 reg)
+{
+ unsigned long flags, address, data;
+ u32 r;
+
+ address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(address, reg * 4);
+ (void)RREG32(address);
+ r = RREG32(data);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ return r;
+}
+
+void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
+ u32 reg, u32 v)
+{
+ unsigned long flags, address, data;
+
+ address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(address, reg * 4);
+ (void)RREG32(address);
+ WREG32(data, v);
+ (void)RREG32(data);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+/**
+ * amdgpu_device_get_gang - return a reference to the current gang
+ * @adev: amdgpu_device pointer
+ *
+ * Returns: A new reference to the current gang leader.
+ */
+struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev)
+{
+ struct dma_fence *fence;
+
+ rcu_read_lock();
+ fence = dma_fence_get_rcu_safe(&adev->gang_submit);
+ rcu_read_unlock();
+ return fence;
+}
+
+/**
+ * amdgpu_device_switch_gang - switch to a new gang
+ * @adev: amdgpu_device pointer
+ * @gang: the gang to switch to
+ *
+ * Try to switch to a new gang.
+ * Returns: NULL if we switched to the new gang or a reference to the current
+ * gang leader.
+ */
+struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
+ struct dma_fence *gang)
+{
+ struct dma_fence *old = NULL;
+
+ dma_fence_get(gang);
+ do {
+ dma_fence_put(old);
+ old = amdgpu_device_get_gang(adev);
+ if (old == gang)
+ break;
+
+ if (!dma_fence_is_signaled(old)) {
+ dma_fence_put(gang);
+ return old;
+ }
+
+ } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
+ old, gang) != old);
+
+ /*
+ * Drop it once for the exchanged reference in adev and once for the
+ * thread local reference acquired in amdgpu_device_get_gang().
+ */
+ dma_fence_put(old);
+ dma_fence_put(old);
+ return NULL;
+}
+
+/**
+ * amdgpu_device_enforce_isolation - enforce HW isolation
+ * @adev: the amdgpu device pointer
+ * @ring: the HW ring the job is supposed to run on
+ * @job: the job which is about to be pushed to the HW ring
+ *
+ * Makes sure that only one client at a time can use the GFX block.
+ * Returns: The dependency to wait on before the job can be pushed to the HW.
+ * The function is called multiple times until NULL is returned.
+ */
+struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ struct amdgpu_job *job)
+{
+ struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
+ struct drm_sched_fence *f = job->base.s_fence;
+ struct dma_fence *dep;
+ void *owner;
+ int r;
+
+ /*
+ * For now enforce isolation only for the GFX block since we only need
+ * the cleaner shader on those rings.
+ */
+ if (ring->funcs->type != AMDGPU_RING_TYPE_GFX &&
+ ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
+ return NULL;
+
+ /*
+ * All submissions where enforce isolation is false are handled as if
+ * they come from a single client. Use ~0l as the owner to distinct it
+ * from kernel submissions where the owner is NULL.
+ */
+ owner = job->enforce_isolation ? f->owner : (void *)~0l;
+
+ mutex_lock(&adev->enforce_isolation_mutex);
+
+ /*
+ * The "spearhead" submission is the first one which changes the
+ * ownership to its client. We always need to wait for it to be
+ * pushed to the HW before proceeding with anything.
+ */
+ if (&f->scheduled != isolation->spearhead &&
+ !dma_fence_is_signaled(isolation->spearhead)) {
+ dep = isolation->spearhead;
+ goto out_grab_ref;
+ }
+
+ if (isolation->owner != owner) {
+
+ /*
+ * Wait for any gang to be assembled before switching to a
+ * different owner or otherwise we could deadlock the
+ * submissions.
+ */
+ if (!job->gang_submit) {
+ dep = amdgpu_device_get_gang(adev);
+ if (!dma_fence_is_signaled(dep))
+ goto out_return_dep;
+ dma_fence_put(dep);
+ }
+
+ dma_fence_put(isolation->spearhead);
+ isolation->spearhead = dma_fence_get(&f->scheduled);
+ amdgpu_sync_move(&isolation->active, &isolation->prev);
+ trace_amdgpu_isolation(isolation->owner, owner);
+ isolation->owner = owner;
+ }
+
+ /*
+ * Specifying the ring here helps to pipeline submissions even when
+ * isolation is enabled. If that is not desired for testing NULL can be
+ * used instead of the ring to enforce a CPU round trip while switching
+ * between clients.
+ */
+ dep = amdgpu_sync_peek_fence(&isolation->prev, ring);
+ r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT);
+ if (r)
+ dev_warn(adev->dev, "OOM tracking isolation\n");
+
+out_grab_ref:
+ dma_fence_get(dep);
+out_return_dep:
+ mutex_unlock(&adev->enforce_isolation_mutex);
+ return dep;
+}
+
+bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+ case CHIP_HAINAN:
+#endif
+ case CHIP_TOPAZ:
+ /* chips with no display hardware */
+ return false;
+#ifdef CONFIG_DRM_AMDGPU_SI
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ case CHIP_OLAND:
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+#endif
+ case CHIP_TONGA:
+ case CHIP_FIJI:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_VEGAM:
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ /* chips with display hardware */
+ return true;
+ default:
+ /* IP discovery */
+ if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
+ (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
+ return false;
+ return true;
+ }
+}
+
+uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
+ uint32_t inst, uint32_t reg_addr, char reg_name[],
+ uint32_t expected_value, uint32_t mask)
+{
+ uint32_t ret = 0;
+ uint32_t old_ = 0;
+ uint32_t tmp_ = RREG32(reg_addr);
+ uint32_t loop = adev->usec_timeout;
+
+ while ((tmp_ & (mask)) != (expected_value)) {
+ if (old_ != tmp_) {
+ loop = adev->usec_timeout;
+ old_ = tmp_;
+ } else
+ udelay(1);
+ tmp_ = RREG32(reg_addr);
+ loop--;
+ if (!loop) {
+ dev_warn(
+ adev->dev,
+ "Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
+ inst, reg_name, (uint32_t)expected_value,
+ (uint32_t)(tmp_ & (mask)));
+ ret = -ETIMEDOUT;
+ break;
+ }
+ }
+ return ret;
+}
+
+ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring)
+{
+ ssize_t size = 0;
+
+ if (!ring || !ring->adev)
+ return size;
+
+ if (amdgpu_device_should_recover_gpu(ring->adev))
+ size |= AMDGPU_RESET_TYPE_FULL;
+
+ if (unlikely(!ring->adev->debug_disable_soft_recovery) &&
+ !amdgpu_sriov_vf(ring->adev) && ring->funcs->soft_recovery)
+ size |= AMDGPU_RESET_TYPE_SOFT_RESET;
+
+ return size;
+}
+
+ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset)
+{
+ ssize_t size = 0;
+
+ if (supported_reset == 0) {
+ size += sysfs_emit_at(buf, size, "unsupported");
+ size += sysfs_emit_at(buf, size, "\n");
+ return size;
+
+ }
+
+ if (supported_reset & AMDGPU_RESET_TYPE_SOFT_RESET)
+ size += sysfs_emit_at(buf, size, "soft ");
+
+ if (supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)
+ size += sysfs_emit_at(buf, size, "queue ");
+
+ if (supported_reset & AMDGPU_RESET_TYPE_PER_PIPE)
+ size += sysfs_emit_at(buf, size, "pipe ");
+
+ if (supported_reset & AMDGPU_RESET_TYPE_FULL)
+ size += sysfs_emit_at(buf, size, "full ");
+
+ size += sysfs_emit_at(buf, size, "\n");
+ return size;
+}
+
+void amdgpu_device_set_uid(struct amdgpu_uid *uid_info,
+ enum amdgpu_uid_type type, uint8_t inst,
+ uint64_t uid)
+{
+ if (!uid_info)
+ return;
+
+ if (type >= AMDGPU_UID_TYPE_MAX) {
+ dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
+ type);
+ return;
+ }
+
+ if (inst >= AMDGPU_UID_INST_MAX) {
+ dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
+ inst);
+ return;
+ }
+
+ if (uid_info->uid[type][inst] != 0) {
+ dev_warn_once(
+ uid_info->adev->dev,
+ "Overwriting existing UID %llu for type %d instance %d\n",
+ uid_info->uid[type][inst], type, inst);
+ }
+
+ uid_info->uid[type][inst] = uid;
+}
+
+u64 amdgpu_device_get_uid(struct amdgpu_uid *uid_info,
+ enum amdgpu_uid_type type, uint8_t inst)
+{
+ if (!uid_info)
+ return 0;
+
+ if (type >= AMDGPU_UID_TYPE_MAX) {
+ dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
+ type);
+ return 0;
+ }
+
+ if (inst >= AMDGPU_UID_INST_MAX) {
+ dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
+ inst);
+ return 0;
+ }
+
+ return uid_info->uid[type][inst];
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
index 6b25837955c4..eb605e79ae0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
@@ -33,6 +33,7 @@ struct amdgpu_df_hash_status {
struct amdgpu_df_funcs {
void (*sw_init)(struct amdgpu_device *adev);
void (*sw_fini)(struct amdgpu_device *adev);
+ void (*hw_init)(struct amdgpu_device *adev);
void (*enable_broadcast_mode)(struct amdgpu_device *adev,
bool enable);
u32 (*get_fb_channel_number)(struct amdgpu_device *adev);
@@ -40,7 +41,7 @@ struct amdgpu_df_funcs {
void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
bool enable);
void (*get_clockgating_state)(struct amdgpu_device *adev,
- u32 *flags);
+ u64 *flags);
void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
bool enable);
int (*pmc_start)(struct amdgpu_device *adev, uint64_t config,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index ea00090b3fb3..fa2a22dfa048 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2018 Advanced Micro Devices, Inc.
+ * Copyright 2018-2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -27,19 +27,27 @@
#include "amdgpu_discovery.h"
#include "soc15_hw_ip.h"
#include "discovery.h"
+#include "amdgpu_ras.h"
#include "soc15.h"
#include "gfx_v9_0.h"
+#include "gfx_v9_4_3.h"
#include "gmc_v9_0.h"
#include "df_v1_7.h"
#include "df_v3_6.h"
+#include "df_v4_3.h"
+#include "df_v4_6_2.h"
+#include "df_v4_15.h"
#include "nbio_v6_1.h"
#include "nbio_v7_0.h"
#include "nbio_v7_4.h"
+#include "nbio_v7_9.h"
+#include "nbio_v7_11.h"
#include "hdp_v4_0.h"
#include "vega10_ih.h"
#include "vega20_ih.h"
#include "sdma_v4_0.h"
+#include "sdma_v4_4_2.h"
#include "uvd_v7_0.h"
#include "vce_v4_0.h"
#include "vcn_v1_0.h"
@@ -47,29 +55,78 @@
#include "jpeg_v2_5.h"
#include "smuio_v9_0.h"
#include "gmc_v10_0.h"
+#include "gmc_v11_0.h"
+#include "gmc_v12_0.h"
#include "gfxhub_v2_0.h"
#include "mmhub_v2_0.h"
#include "nbio_v2_3.h"
+#include "nbio_v4_3.h"
#include "nbio_v7_2.h"
+#include "nbio_v7_7.h"
+#include "nbif_v6_3_1.h"
#include "hdp_v5_0.h"
+#include "hdp_v5_2.h"
+#include "hdp_v6_0.h"
+#include "hdp_v7_0.h"
#include "nv.h"
+#include "soc21.h"
+#include "soc24.h"
#include "navi10_ih.h"
+#include "ih_v6_0.h"
+#include "ih_v6_1.h"
+#include "ih_v7_0.h"
#include "gfx_v10_0.h"
+#include "gfx_v11_0.h"
+#include "gfx_v12_0.h"
#include "sdma_v5_0.h"
#include "sdma_v5_2.h"
+#include "sdma_v6_0.h"
+#include "sdma_v7_0.h"
+#include "lsdma_v6_0.h"
+#include "lsdma_v7_0.h"
#include "vcn_v2_0.h"
#include "jpeg_v2_0.h"
#include "vcn_v3_0.h"
#include "jpeg_v3_0.h"
+#include "vcn_v4_0.h"
+#include "jpeg_v4_0.h"
+#include "vcn_v4_0_3.h"
+#include "jpeg_v4_0_3.h"
+#include "vcn_v4_0_5.h"
+#include "jpeg_v4_0_5.h"
#include "amdgpu_vkms.h"
-#include "mes_v10_1.h"
+#include "mes_v11_0.h"
+#include "mes_v12_0.h"
#include "smuio_v11_0.h"
#include "smuio_v11_0_6.h"
#include "smuio_v13_0.h"
+#include "smuio_v13_0_3.h"
+#include "smuio_v13_0_6.h"
+#include "smuio_v14_0_2.h"
+#include "vcn_v5_0_0.h"
+#include "vcn_v5_0_1.h"
+#include "jpeg_v5_0_0.h"
+#include "jpeg_v5_0_1.h"
+#include "amdgpu_ras_mgr.h"
+
+#include "amdgpu_vpe.h"
+#if defined(CONFIG_DRM_AMD_ISP)
+#include "amdgpu_isp.h"
+#endif
MODULE_FIRMWARE("amdgpu/ip_discovery.bin");
-
+MODULE_FIRMWARE("amdgpu/vega10_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/vega12_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/vega20_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/raven_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/raven2_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/picasso_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_ip_discovery.bin");
+
+#define mmIP_DISCOVERY_VERSION 0x16A00
#define mmRCC_CONFIG_MEMSIZE 0xde3
+#define mmMP0_SMN_C2PMSG_33 0x16061
#define mmMM_INDEX 0x0
#define mmMM_INDEX_HI 0x6
#define mmMM_DATA 0x1
@@ -110,6 +167,7 @@ static const char *hw_id_names[HW_ID_MAX] = {
[SDMA1_HWID] = "SDMA1",
[SDMA2_HWID] = "SDMA2",
[SDMA3_HWID] = "SDMA3",
+ [LSDMA_HWID] = "LSDMA",
[ISP_HWID] = "ISP",
[DBGU_IO_HWID] = "DBGU_IO",
[DF_HWID] = "DF",
@@ -150,6 +208,7 @@ static const char *hw_id_names[HW_ID_MAX] = {
[XGMI_HWID] = "XGMI",
[XGBE_HWID] = "XGBE",
[MP0_HWID] = "MP0",
+ [VPE_HWID] = "VPE",
};
static int hw_id_map[MAX_HWIP] = {
@@ -159,6 +218,7 @@ static int hw_id_map[MAX_HWIP] = {
[SDMA1_HWIP] = SDMA1_HWID,
[SDMA2_HWIP] = SDMA2_HWID,
[SDMA3_HWIP] = SDMA3_HWID,
+ [LSDMA_HWIP] = LSDMA_HWID,
[MMHUB_HWIP] = MMHUB_HWID,
[ATHUB_HWIP] = ATHUB_HWID,
[NBIO_HWIP] = NBIF_HWID,
@@ -177,15 +237,124 @@ static int hw_id_map[MAX_HWIP] = {
[UMC_HWIP] = UMC_HWID,
[XGMI_HWIP] = XGMI_HWID,
[DCI_HWIP] = DCI_HWID,
+ [PCIE_HWIP] = PCIE_HWID,
+ [VPE_HWIP] = VPE_HWID,
+ [ISP_HWIP] = ISP_HWID,
};
-static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary)
+static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev, uint8_t *binary)
+{
+ u64 tmr_offset, tmr_size, pos;
+ void *discv_regn;
+ int ret;
+
+ ret = amdgpu_acpi_get_tmr_info(adev, &tmr_offset, &tmr_size);
+ if (ret)
+ return ret;
+
+ pos = tmr_offset + tmr_size - DISCOVERY_TMR_OFFSET;
+
+ /* This region is read-only and reserved from system use */
+ discv_regn = memremap(pos, adev->discovery.size, MEMREMAP_WC);
+ if (discv_regn) {
+ memcpy(binary, discv_regn, adev->discovery.size);
+ memunmap(discv_regn);
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+#define IP_DISCOVERY_V2 2
+#define IP_DISCOVERY_V4 4
+
+static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
+ uint8_t *binary)
+{
+ bool sz_valid = true;
+ uint64_t vram_size;
+ int i, ret = 0;
+ u32 msg;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* It can take up to two second for IFWI init to complete on some dGPUs,
+ * but generally it should be in the 60-100ms range. Normally this starts
+ * as soon as the device gets power so by the time the OS loads this has long
+ * completed. However, when a card is hotplugged via e.g., USB4, we need to
+ * wait for this to complete. Once the C2PMSG is updated, we can
+ * continue.
+ */
+
+ for (i = 0; i < 2000; i++) {
+ msg = RREG32(mmMP0_SMN_C2PMSG_33);
+ if (msg & 0x80000000)
+ break;
+ msleep(1);
+ }
+ }
+
+ vram_size = RREG32(mmRCC_CONFIG_MEMSIZE);
+ if (!vram_size || vram_size == U32_MAX)
+ sz_valid = false;
+ else
+ vram_size <<= 20;
+
+ /*
+ * If in VRAM, discovery TMR is marked for reservation. If it is in system mem,
+ * then it is not required to be reserved.
+ */
+ if (sz_valid) {
+ if (amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled) {
+ /* For SRIOV VFs with dynamic critical region enabled,
+ * we will get the IPD binary via below call.
+ * If dynamic critical is disabled, fall through to normal seq.
+ */
+ if (amdgpu_virt_get_dynamic_data_info(adev,
+ AMD_SRIOV_MSG_IPD_TABLE_ID, binary,
+ &adev->discovery.size)) {
+ dev_err(adev->dev,
+ "failed to read discovery info from dynamic critical region.");
+ ret = -EINVAL;
+ goto exit;
+ }
+ } else {
+ uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
+
+ amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
+ adev->discovery.size, false);
+ adev->discovery.reserve_tmr = true;
+ }
+ } else {
+ ret = amdgpu_discovery_read_binary_from_sysmem(adev, binary);
+ }
+
+ if (ret)
+ dev_err(adev->dev,
+ "failed to read discovery info from memory, vram size read: %llx",
+ vram_size);
+exit:
+ return ret;
+}
+
+static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev,
+ uint8_t *binary,
+ const char *fw_name)
{
- uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
- uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
+ const struct firmware *fw;
+ int r;
+
+ r = firmware_request_nowarn(&fw, fw_name, adev->dev);
+ if (r) {
+ if (amdgpu_discovery == 2)
+ dev_err(adev->dev, "can't load firmware \"%s\"\n", fw_name);
+ else
+ drm_info(&adev->ddev, "Optional firmware \"%s\" was not found\n", fw_name);
+ return r;
+ }
+
+ memcpy((u8 *)binary, (u8 *)fw->data, fw->size);
+ release_firmware(fw);
- amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
- adev->mman.discovery_tmr_size, false);
return 0;
}
@@ -206,56 +375,152 @@ static inline bool amdgpu_discovery_verify_checksum(uint8_t *data, uint32_t size
return !!(amdgpu_discovery_calculate_checksum(data, size) == expected);
}
+static inline bool amdgpu_discovery_verify_binary_signature(uint8_t *binary)
+{
+ struct binary_header *bhdr;
+ bhdr = (struct binary_header *)binary;
+
+ return (le32_to_cpu(bhdr->binary_signature) == BINARY_SIGNATURE);
+}
+
+static void amdgpu_discovery_harvest_config_quirk(struct amdgpu_device *adev)
+{
+ /*
+ * So far, apply this quirk only on those Navy Flounder boards which
+ * have a bad harvest table of VCN config.
+ */
+ if ((amdgpu_ip_version(adev, UVD_HWIP, 1) == IP_VERSION(3, 0, 1)) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 2))) {
+ switch (adev->pdev->revision) {
+ case 0xC1:
+ case 0xC2:
+ case 0xC3:
+ case 0xC5:
+ case 0xC7:
+ case 0xCF:
+ case 0xDF:
+ adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1;
+ adev->vcn.inst_mask &= ~AMDGPU_VCN_HARVEST_VCN1;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static int amdgpu_discovery_verify_npsinfo(struct amdgpu_device *adev,
+ struct binary_header *bhdr)
+{
+ uint8_t *discovery_bin = adev->discovery.bin;
+ struct table_info *info;
+ uint16_t checksum;
+ uint16_t offset;
+
+ info = &bhdr->table_list[NPS_INFO];
+ offset = le16_to_cpu(info->offset);
+ checksum = le16_to_cpu(info->checksum);
+
+ struct nps_info_header *nhdr =
+ (struct nps_info_header *)(discovery_bin + offset);
+
+ if (le32_to_cpu(nhdr->table_id) != NPS_INFO_TABLE_ID) {
+ dev_dbg(adev->dev, "invalid ip discovery nps info table id\n");
+ return -EINVAL;
+ }
+
+ if (!amdgpu_discovery_verify_checksum(discovery_bin + offset,
+ le32_to_cpu(nhdr->size_bytes),
+ checksum)) {
+ dev_dbg(adev->dev, "invalid nps info data table checksum\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const char *amdgpu_discovery_get_fw_name(struct amdgpu_device *adev)
+{
+ if (amdgpu_discovery == 2) {
+ /* Assume there is valid discovery TMR in VRAM even if binary is sideloaded */
+ adev->discovery.reserve_tmr = true;
+ return "amdgpu/ip_discovery.bin";
+ }
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ return "amdgpu/vega10_ip_discovery.bin";
+ case CHIP_VEGA12:
+ return "amdgpu/vega12_ip_discovery.bin";
+ case CHIP_RAVEN:
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ return "amdgpu/raven2_ip_discovery.bin";
+ else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+ return "amdgpu/picasso_ip_discovery.bin";
+ else
+ return "amdgpu/raven_ip_discovery.bin";
+ case CHIP_VEGA20:
+ return "amdgpu/vega20_ip_discovery.bin";
+ case CHIP_ARCTURUS:
+ return "amdgpu/arcturus_ip_discovery.bin";
+ case CHIP_ALDEBARAN:
+ return "amdgpu/aldebaran_ip_discovery.bin";
+ default:
+ return NULL;
+ }
+}
+
static int amdgpu_discovery_init(struct amdgpu_device *adev)
{
struct table_info *info;
struct binary_header *bhdr;
- struct ip_discovery_header *ihdr;
- struct gpu_info_header *ghdr;
- const struct firmware *fw;
+ uint8_t *discovery_bin;
+ const char *fw_name;
uint16_t offset;
uint16_t size;
uint16_t checksum;
int r;
- adev->mman.discovery_tmr_size = DISCOVERY_TMR_SIZE;
- adev->mman.discovery_bin = kzalloc(adev->mman.discovery_tmr_size, GFP_KERNEL);
- if (!adev->mman.discovery_bin)
+ adev->discovery.bin = kzalloc(DISCOVERY_TMR_SIZE, GFP_KERNEL);
+ if (!adev->discovery.bin)
return -ENOMEM;
-
- if (amdgpu_discovery == 2) {
- r = request_firmware(&fw, "amdgpu/ip_discovery.bin", adev->dev);
+ adev->discovery.size = DISCOVERY_TMR_SIZE;
+ adev->discovery.debugfs_blob.data = adev->discovery.bin;
+ adev->discovery.debugfs_blob.size = adev->discovery.size;
+
+ discovery_bin = adev->discovery.bin;
+ /* Read from file if it is the preferred option */
+ fw_name = amdgpu_discovery_get_fw_name(adev);
+ if (fw_name != NULL) {
+ drm_dbg(&adev->ddev, "use ip discovery information from file");
+ r = amdgpu_discovery_read_binary_from_file(adev, discovery_bin,
+ fw_name);
if (r)
- goto get_from_vram;
- dev_info(adev->dev, "Using IP discovery from file\n");
- memcpy((u8 *)adev->mman.discovery_bin, (u8 *)fw->data,
- adev->mman.discovery_tmr_size);
- release_firmware(fw);
+ goto out;
} else {
-get_from_vram:
- r = amdgpu_discovery_read_binary(adev, adev->mman.discovery_bin);
- if (r) {
- DRM_ERROR("failed to read ip discovery binary\n");
+ drm_dbg(&adev->ddev, "use ip discovery information from memory");
+ r = amdgpu_discovery_read_binary_from_mem(adev, discovery_bin);
+ if (r)
goto out;
- }
}
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
-
- if (le32_to_cpu(bhdr->binary_signature) != BINARY_SIGNATURE) {
- DRM_ERROR("invalid ip discovery binary signature\n");
+ /* check the ip discovery binary signature */
+ if (!amdgpu_discovery_verify_binary_signature(discovery_bin)) {
+ dev_err(adev->dev,
+ "get invalid ip discovery binary signature\n");
r = -EINVAL;
goto out;
}
+ bhdr = (struct binary_header *)discovery_bin;
+
offset = offsetof(struct binary_header, binary_checksum) +
sizeof(bhdr->binary_checksum);
size = le16_to_cpu(bhdr->binary_size) - offset;
checksum = le16_to_cpu(bhdr->binary_checksum);
- if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
- size, checksum)) {
- DRM_ERROR("invalid ip discovery binary checksum\n");
+ if (!amdgpu_discovery_verify_checksum(discovery_bin + offset, size,
+ checksum)) {
+ dev_err(adev->dev, "invalid ip discovery binary checksum\n");
r = -EINVAL;
goto out;
}
@@ -263,95 +528,880 @@ get_from_vram:
info = &bhdr->table_list[IP_DISCOVERY];
offset = le16_to_cpu(info->offset);
checksum = le16_to_cpu(info->checksum);
- ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin + offset);
- if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) {
- DRM_ERROR("invalid ip discovery data table signature\n");
- r = -EINVAL;
- goto out;
- }
+ if (offset) {
+ struct ip_discovery_header *ihdr =
+ (struct ip_discovery_header *)(discovery_bin + offset);
+ if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) {
+ dev_err(adev->dev, "invalid ip discovery data table signature\n");
+ r = -EINVAL;
+ goto out;
+ }
- if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
- le16_to_cpu(ihdr->size), checksum)) {
- DRM_ERROR("invalid ip discovery data table checksum\n");
- r = -EINVAL;
- goto out;
+ if (!amdgpu_discovery_verify_checksum(discovery_bin + offset,
+ le16_to_cpu(ihdr->size),
+ checksum)) {
+ dev_err(adev->dev, "invalid ip discovery data table checksum\n");
+ r = -EINVAL;
+ goto out;
+ }
}
info = &bhdr->table_list[GC];
offset = le16_to_cpu(info->offset);
checksum = le16_to_cpu(info->checksum);
- ghdr = (struct gpu_info_header *)(adev->mman.discovery_bin + offset);
- if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
- le32_to_cpu(ghdr->size), checksum)) {
- DRM_ERROR("invalid gc data table checksum\n");
- r = -EINVAL;
- goto out;
+ if (offset) {
+ struct gpu_info_header *ghdr =
+ (struct gpu_info_header *)(discovery_bin + offset);
+
+ if (le32_to_cpu(ghdr->table_id) != GC_TABLE_ID) {
+ dev_err(adev->dev, "invalid ip discovery gc table id\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (!amdgpu_discovery_verify_checksum(discovery_bin + offset,
+ le32_to_cpu(ghdr->size),
+ checksum)) {
+ dev_err(adev->dev, "invalid gc data table checksum\n");
+ r = -EINVAL;
+ goto out;
+ }
+ }
+
+ info = &bhdr->table_list[HARVEST_INFO];
+ offset = le16_to_cpu(info->offset);
+ checksum = le16_to_cpu(info->checksum);
+
+ if (offset) {
+ struct harvest_info_header *hhdr =
+ (struct harvest_info_header *)(discovery_bin + offset);
+
+ if (le32_to_cpu(hhdr->signature) != HARVEST_TABLE_SIGNATURE) {
+ dev_err(adev->dev, "invalid ip discovery harvest table signature\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (!amdgpu_discovery_verify_checksum(
+ discovery_bin + offset,
+ sizeof(struct harvest_table), checksum)) {
+ dev_err(adev->dev, "invalid harvest data table checksum\n");
+ r = -EINVAL;
+ goto out;
+ }
+ }
+
+ info = &bhdr->table_list[VCN_INFO];
+ offset = le16_to_cpu(info->offset);
+ checksum = le16_to_cpu(info->checksum);
+
+ if (offset) {
+ struct vcn_info_header *vhdr =
+ (struct vcn_info_header *)(discovery_bin + offset);
+
+ if (le32_to_cpu(vhdr->table_id) != VCN_INFO_TABLE_ID) {
+ dev_err(adev->dev, "invalid ip discovery vcn table id\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (!amdgpu_discovery_verify_checksum(
+ discovery_bin + offset,
+ le32_to_cpu(vhdr->size_bytes), checksum)) {
+ dev_err(adev->dev, "invalid vcn data table checksum\n");
+ r = -EINVAL;
+ goto out;
+ }
+ }
+
+ info = &bhdr->table_list[MALL_INFO];
+ offset = le16_to_cpu(info->offset);
+ checksum = le16_to_cpu(info->checksum);
+
+ if (0 && offset) {
+ struct mall_info_header *mhdr =
+ (struct mall_info_header *)(discovery_bin + offset);
+
+ if (le32_to_cpu(mhdr->table_id) != MALL_INFO_TABLE_ID) {
+ dev_err(adev->dev, "invalid ip discovery mall table id\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (!amdgpu_discovery_verify_checksum(
+ discovery_bin + offset,
+ le32_to_cpu(mhdr->size_bytes), checksum)) {
+ dev_err(adev->dev, "invalid mall data table checksum\n");
+ r = -EINVAL;
+ goto out;
+ }
}
return 0;
out:
- kfree(adev->mman.discovery_bin);
- adev->mman.discovery_bin = NULL;
-
+ kfree(adev->discovery.bin);
+ adev->discovery.bin = NULL;
+ if ((amdgpu_discovery != 2) &&
+ (RREG32(mmIP_DISCOVERY_VERSION) == 4))
+ amdgpu_ras_query_boot_status(adev, 4);
return r;
}
+static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev);
+
void amdgpu_discovery_fini(struct amdgpu_device *adev)
{
- kfree(adev->mman.discovery_bin);
- adev->mman.discovery_bin = NULL;
+ amdgpu_discovery_sysfs_fini(adev);
+ kfree(adev->discovery.bin);
+ adev->discovery.bin = NULL;
}
-static int amdgpu_discovery_validate_ip(const struct ip *ip)
+static int amdgpu_discovery_validate_ip(struct amdgpu_device *adev,
+ uint8_t instance, uint16_t hw_id)
{
- if (ip->number_instance >= HWIP_MAX_INSTANCE) {
- DRM_ERROR("Unexpected number_instance (%d) from ip discovery blob\n",
- ip->number_instance);
+ if (instance >= HWIP_MAX_INSTANCE) {
+ dev_err(adev->dev,
+ "Unexpected instance_number (%d) from ip discovery blob\n",
+ instance);
return -EINVAL;
}
- if (le16_to_cpu(ip->hw_id) >= HW_ID_MAX) {
- DRM_ERROR("Unexpected hw_id (%d) from ip discovery blob\n",
- le16_to_cpu(ip->hw_id));
+ if (hw_id >= HW_ID_MAX) {
+ dev_err(adev->dev,
+ "Unexpected hw_id (%d) from ip discovery blob\n",
+ hw_id);
return -EINVAL;
}
return 0;
}
-int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
+static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev,
+ uint32_t *vcn_harvest_count)
{
+ uint8_t *discovery_bin = adev->discovery.bin;
struct binary_header *bhdr;
struct ip_discovery_header *ihdr;
struct die_header *dhdr;
struct ip *ip;
+ uint16_t die_offset, ip_offset, num_dies, num_ips;
+ uint16_t hw_id;
+ uint8_t inst;
+ int i, j;
+
+ bhdr = (struct binary_header *)discovery_bin;
+ ihdr = (struct ip_discovery_header
+ *)(discovery_bin +
+ le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
+ num_dies = le16_to_cpu(ihdr->num_dies);
+
+ /* scan harvest bit of all IP data structures */
+ for (i = 0; i < num_dies; i++) {
+ die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
+ dhdr = (struct die_header *)(discovery_bin + die_offset);
+ num_ips = le16_to_cpu(dhdr->num_ips);
+ ip_offset = die_offset + sizeof(*dhdr);
+
+ for (j = 0; j < num_ips; j++) {
+ ip = (struct ip *)(discovery_bin + ip_offset);
+ inst = ip->number_instance;
+ hw_id = le16_to_cpu(ip->hw_id);
+ if (amdgpu_discovery_validate_ip(adev, inst, hw_id))
+ goto next_ip;
+
+ if (ip->harvest == 1) {
+ switch (hw_id) {
+ case VCN_HWID:
+ (*vcn_harvest_count)++;
+ if (inst == 0) {
+ adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN0;
+ adev->vcn.inst_mask &=
+ ~AMDGPU_VCN_HARVEST_VCN0;
+ adev->jpeg.inst_mask &=
+ ~AMDGPU_VCN_HARVEST_VCN0;
+ } else {
+ adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1;
+ adev->vcn.inst_mask &=
+ ~AMDGPU_VCN_HARVEST_VCN1;
+ adev->jpeg.inst_mask &=
+ ~AMDGPU_VCN_HARVEST_VCN1;
+ }
+ break;
+ case DMU_HWID:
+ adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
+ break;
+ default:
+ break;
+ }
+ }
+next_ip:
+ ip_offset += struct_size(ip, base_address,
+ ip->num_base_address);
+ }
+ }
+}
+
+static void amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev,
+ uint32_t *vcn_harvest_count,
+ uint32_t *umc_harvest_count)
+{
+ uint8_t *discovery_bin = adev->discovery.bin;
+ struct binary_header *bhdr;
+ struct harvest_table *harvest_info;
+ u16 offset;
+ int i;
+ uint32_t umc_harvest_config = 0;
+
+ bhdr = (struct binary_header *)discovery_bin;
+ offset = le16_to_cpu(bhdr->table_list[HARVEST_INFO].offset);
+
+ if (!offset) {
+ dev_err(adev->dev, "invalid harvest table offset\n");
+ return;
+ }
+
+ harvest_info = (struct harvest_table *)(discovery_bin + offset);
+
+ for (i = 0; i < 32; i++) {
+ if (le16_to_cpu(harvest_info->list[i].hw_id) == 0)
+ break;
+
+ switch (le16_to_cpu(harvest_info->list[i].hw_id)) {
+ case VCN_HWID:
+ (*vcn_harvest_count)++;
+ adev->vcn.harvest_config |=
+ (1 << harvest_info->list[i].number_instance);
+ adev->jpeg.harvest_config |=
+ (1 << harvest_info->list[i].number_instance);
+
+ adev->vcn.inst_mask &=
+ ~(1U << harvest_info->list[i].number_instance);
+ adev->jpeg.inst_mask &=
+ ~(1U << harvest_info->list[i].number_instance);
+ break;
+ case DMU_HWID:
+ adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
+ break;
+ case UMC_HWID:
+ umc_harvest_config |=
+ 1 << (le16_to_cpu(harvest_info->list[i].number_instance));
+ (*umc_harvest_count)++;
+ break;
+ case GC_HWID:
+ adev->gfx.xcc_mask &=
+ ~(1U << harvest_info->list[i].number_instance);
+ break;
+ case SDMA0_HWID:
+ adev->sdma.sdma_mask &=
+ ~(1U << harvest_info->list[i].number_instance);
+ break;
+#if defined(CONFIG_DRM_AMD_ISP)
+ case ISP_HWID:
+ adev->isp.harvest_config |=
+ ~(1U << harvest_info->list[i].number_instance);
+ break;
+#endif
+ default:
+ break;
+ }
+ }
+
+ adev->umc.active_mask = ((1 << adev->umc.node_inst_num) - 1) &
+ ~umc_harvest_config;
+}
+
+/* ================================================== */
+
+struct ip_hw_instance {
+ struct kobject kobj; /* ip_discovery/die/#die/#hw_id/#instance/<attrs...> */
+
+ int hw_id;
+ u8 num_instance;
+ u8 major, minor, revision;
+ u8 harvest;
+
+ int num_base_addresses;
+ u32 base_addr[] __counted_by(num_base_addresses);
+};
+
+struct ip_hw_id {
+ struct kset hw_id_kset; /* ip_discovery/die/#die/#hw_id/, contains ip_hw_instance */
+ int hw_id;
+};
+
+struct ip_die_entry {
+ struct kset ip_kset; /* ip_discovery/die/#die/, contains ip_hw_id */
+ u16 num_ips;
+};
+
+/* -------------------------------------------------- */
+
+struct ip_hw_instance_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct ip_hw_instance *ip_hw_instance, char *buf);
+};
+
+static ssize_t hw_id_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", ip_hw_instance->hw_id);
+}
+
+static ssize_t num_instance_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", ip_hw_instance->num_instance);
+}
+
+static ssize_t major_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", ip_hw_instance->major);
+}
+
+static ssize_t minor_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", ip_hw_instance->minor);
+}
+
+static ssize_t revision_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", ip_hw_instance->revision);
+}
+
+static ssize_t harvest_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+ return sysfs_emit(buf, "0x%01X\n", ip_hw_instance->harvest);
+}
+
+static ssize_t num_base_addresses_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", ip_hw_instance->num_base_addresses);
+}
+
+static ssize_t base_addr_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+ ssize_t res, at;
+ int ii;
+
+ for (res = at = ii = 0; ii < ip_hw_instance->num_base_addresses; ii++) {
+ /* Here we satisfy the condition that, at + size <= PAGE_SIZE.
+ */
+ if (at + 12 > PAGE_SIZE)
+ break;
+ res = sysfs_emit_at(buf, at, "0x%08X\n",
+ ip_hw_instance->base_addr[ii]);
+ if (res <= 0)
+ break;
+ at += res;
+ }
+
+ return res < 0 ? res : at;
+}
+
+static struct ip_hw_instance_attr ip_hw_attr[] = {
+ __ATTR_RO(hw_id),
+ __ATTR_RO(num_instance),
+ __ATTR_RO(major),
+ __ATTR_RO(minor),
+ __ATTR_RO(revision),
+ __ATTR_RO(harvest),
+ __ATTR_RO(num_base_addresses),
+ __ATTR_RO(base_addr),
+};
+
+static struct attribute *ip_hw_instance_attrs[ARRAY_SIZE(ip_hw_attr) + 1];
+ATTRIBUTE_GROUPS(ip_hw_instance);
+
+#define to_ip_hw_instance(x) container_of(x, struct ip_hw_instance, kobj)
+#define to_ip_hw_instance_attr(x) container_of(x, struct ip_hw_instance_attr, attr)
+
+static ssize_t ip_hw_instance_attr_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct ip_hw_instance *ip_hw_instance = to_ip_hw_instance(kobj);
+ struct ip_hw_instance_attr *ip_hw_attr = to_ip_hw_instance_attr(attr);
+
+ if (!ip_hw_attr->show)
+ return -EIO;
+
+ return ip_hw_attr->show(ip_hw_instance, buf);
+}
+
+static const struct sysfs_ops ip_hw_instance_sysfs_ops = {
+ .show = ip_hw_instance_attr_show,
+};
+
+static void ip_hw_instance_release(struct kobject *kobj)
+{
+ struct ip_hw_instance *ip_hw_instance = to_ip_hw_instance(kobj);
+
+ kfree(ip_hw_instance);
+}
+
+static const struct kobj_type ip_hw_instance_ktype = {
+ .release = ip_hw_instance_release,
+ .sysfs_ops = &ip_hw_instance_sysfs_ops,
+ .default_groups = ip_hw_instance_groups,
+};
+
+/* -------------------------------------------------- */
+
+#define to_ip_hw_id(x) container_of(to_kset(x), struct ip_hw_id, hw_id_kset)
+
+static void ip_hw_id_release(struct kobject *kobj)
+{
+ struct ip_hw_id *ip_hw_id = to_ip_hw_id(kobj);
+
+ if (!list_empty(&ip_hw_id->hw_id_kset.list))
+ DRM_ERROR("ip_hw_id->hw_id_kset is not empty");
+ kfree(ip_hw_id);
+}
+
+static const struct kobj_type ip_hw_id_ktype = {
+ .release = ip_hw_id_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+};
+
+/* -------------------------------------------------- */
+
+static void die_kobj_release(struct kobject *kobj);
+static void ip_disc_release(struct kobject *kobj);
+
+struct ip_die_entry_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct ip_die_entry *ip_die_entry, char *buf);
+};
+
+#define to_ip_die_entry_attr(x) container_of(x, struct ip_die_entry_attribute, attr)
+
+static ssize_t num_ips_show(struct ip_die_entry *ip_die_entry, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", ip_die_entry->num_ips);
+}
+
+/* If there are more ip_die_entry attrs, other than the number of IPs,
+ * we can make this intro an array of attrs, and then initialize
+ * ip_die_entry_attrs in a loop.
+ */
+static struct ip_die_entry_attribute num_ips_attr =
+ __ATTR_RO(num_ips);
+
+static struct attribute *ip_die_entry_attrs[] = {
+ &num_ips_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(ip_die_entry); /* ip_die_entry_groups */
+
+#define to_ip_die_entry(x) container_of(to_kset(x), struct ip_die_entry, ip_kset)
+
+static ssize_t ip_die_entry_attr_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct ip_die_entry_attribute *ip_die_entry_attr = to_ip_die_entry_attr(attr);
+ struct ip_die_entry *ip_die_entry = to_ip_die_entry(kobj);
+
+ if (!ip_die_entry_attr->show)
+ return -EIO;
+
+ return ip_die_entry_attr->show(ip_die_entry, buf);
+}
+
+static void ip_die_entry_release(struct kobject *kobj)
+{
+ struct ip_die_entry *ip_die_entry = to_ip_die_entry(kobj);
+
+ if (!list_empty(&ip_die_entry->ip_kset.list))
+ DRM_ERROR("ip_die_entry->ip_kset is not empty");
+ kfree(ip_die_entry);
+}
+
+static const struct sysfs_ops ip_die_entry_sysfs_ops = {
+ .show = ip_die_entry_attr_show,
+};
+
+static const struct kobj_type ip_die_entry_ktype = {
+ .release = ip_die_entry_release,
+ .sysfs_ops = &ip_die_entry_sysfs_ops,
+ .default_groups = ip_die_entry_groups,
+};
+
+static const struct kobj_type die_kobj_ktype = {
+ .release = die_kobj_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+};
+
+static const struct kobj_type ip_discovery_ktype = {
+ .release = ip_disc_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+};
+
+struct ip_discovery_top {
+ struct kobject kobj; /* ip_discovery/ */
+ struct kset die_kset; /* ip_discovery/die/, contains ip_die_entry */
+ struct amdgpu_device *adev;
+};
+
+static void die_kobj_release(struct kobject *kobj)
+{
+ struct ip_discovery_top *ip_top = container_of(to_kset(kobj),
+ struct ip_discovery_top,
+ die_kset);
+ if (!list_empty(&ip_top->die_kset.list))
+ DRM_ERROR("ip_top->die_kset is not empty");
+}
+
+static void ip_disc_release(struct kobject *kobj)
+{
+ struct ip_discovery_top *ip_top = container_of(kobj, struct ip_discovery_top,
+ kobj);
+ struct amdgpu_device *adev = ip_top->adev;
+
+ kfree(ip_top);
+ adev->discovery.ip_top = NULL;
+}
+
+static uint8_t amdgpu_discovery_get_harvest_info(struct amdgpu_device *adev,
+ uint16_t hw_id, uint8_t inst)
+{
+ uint8_t harvest = 0;
+
+ /* Until a uniform way is figured, get mask based on hwid */
+ switch (hw_id) {
+ case VCN_HWID:
+ /* VCN vs UVD+VCE */
+ if (!amdgpu_ip_version(adev, VCE_HWIP, 0))
+ harvest = ((1 << inst) & adev->vcn.inst_mask) == 0;
+ break;
+ case DMU_HWID:
+ if (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)
+ harvest = 0x1;
+ break;
+ case UMC_HWID:
+ /* TODO: It needs another parsing; for now, ignore.*/
+ break;
+ case GC_HWID:
+ harvest = ((1 << inst) & adev->gfx.xcc_mask) == 0;
+ break;
+ case SDMA0_HWID:
+ harvest = ((1 << inst) & adev->sdma.sdma_mask) == 0;
+ break;
+ default:
+ break;
+ }
+
+ return harvest;
+}
+
+static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev,
+ struct ip_die_entry *ip_die_entry,
+ const size_t _ip_offset, const int num_ips,
+ bool reg_base_64)
+{
+ uint8_t *discovery_bin = adev->discovery.bin;
+ int ii, jj, kk, res;
+ uint16_t hw_id;
+ uint8_t inst;
+
+ DRM_DEBUG("num_ips:%d", num_ips);
+
+ /* Find all IPs of a given HW ID, and add their instance to
+ * #die/#hw_id/#instance/<attributes>
+ */
+ for (ii = 0; ii < HW_ID_MAX; ii++) {
+ struct ip_hw_id *ip_hw_id = NULL;
+ size_t ip_offset = _ip_offset;
+
+ for (jj = 0; jj < num_ips; jj++) {
+ struct ip_v4 *ip;
+ struct ip_hw_instance *ip_hw_instance;
+
+ ip = (struct ip_v4 *)(discovery_bin + ip_offset);
+ inst = ip->instance_number;
+ hw_id = le16_to_cpu(ip->hw_id);
+ if (amdgpu_discovery_validate_ip(adev, inst, hw_id) ||
+ hw_id != ii)
+ goto next_ip;
+
+ DRM_DEBUG("match:%d @ ip_offset:%zu", ii, ip_offset);
+
+ /* We have a hw_id match; register the hw
+ * block if not yet registered.
+ */
+ if (!ip_hw_id) {
+ ip_hw_id = kzalloc(sizeof(*ip_hw_id), GFP_KERNEL);
+ if (!ip_hw_id)
+ return -ENOMEM;
+ ip_hw_id->hw_id = ii;
+
+ kobject_set_name(&ip_hw_id->hw_id_kset.kobj, "%d", ii);
+ ip_hw_id->hw_id_kset.kobj.kset = &ip_die_entry->ip_kset;
+ ip_hw_id->hw_id_kset.kobj.ktype = &ip_hw_id_ktype;
+ res = kset_register(&ip_hw_id->hw_id_kset);
+ if (res) {
+ DRM_ERROR("Couldn't register ip_hw_id kset");
+ kfree(ip_hw_id);
+ return res;
+ }
+ if (hw_id_names[ii]) {
+ res = sysfs_create_link(&ip_die_entry->ip_kset.kobj,
+ &ip_hw_id->hw_id_kset.kobj,
+ hw_id_names[ii]);
+ if (res) {
+ DRM_ERROR("Couldn't create IP link %s in IP Die:%s\n",
+ hw_id_names[ii],
+ kobject_name(&ip_die_entry->ip_kset.kobj));
+ }
+ }
+ }
+
+ /* Now register its instance.
+ */
+ ip_hw_instance = kzalloc(struct_size(ip_hw_instance,
+ base_addr,
+ ip->num_base_address),
+ GFP_KERNEL);
+ if (!ip_hw_instance) {
+ DRM_ERROR("no memory for ip_hw_instance");
+ return -ENOMEM;
+ }
+ ip_hw_instance->hw_id = le16_to_cpu(ip->hw_id); /* == ii */
+ ip_hw_instance->num_instance = ip->instance_number;
+ ip_hw_instance->major = ip->major;
+ ip_hw_instance->minor = ip->minor;
+ ip_hw_instance->revision = ip->revision;
+ ip_hw_instance->harvest =
+ amdgpu_discovery_get_harvest_info(
+ adev, ip_hw_instance->hw_id,
+ ip_hw_instance->num_instance);
+ ip_hw_instance->num_base_addresses = ip->num_base_address;
+
+ for (kk = 0; kk < ip_hw_instance->num_base_addresses; kk++) {
+ if (reg_base_64)
+ ip_hw_instance->base_addr[kk] =
+ lower_32_bits(le64_to_cpu(ip->base_address_64[kk])) & 0x3FFFFFFF;
+ else
+ ip_hw_instance->base_addr[kk] = ip->base_address[kk];
+ }
+
+ kobject_init(&ip_hw_instance->kobj, &ip_hw_instance_ktype);
+ ip_hw_instance->kobj.kset = &ip_hw_id->hw_id_kset;
+ res = kobject_add(&ip_hw_instance->kobj, NULL,
+ "%d", ip_hw_instance->num_instance);
+next_ip:
+ if (reg_base_64)
+ ip_offset += struct_size(ip, base_address_64,
+ ip->num_base_address);
+ else
+ ip_offset += struct_size(ip, base_address,
+ ip->num_base_address);
+ }
+ }
+
+ return 0;
+}
+
+static int amdgpu_discovery_sysfs_recurse(struct amdgpu_device *adev)
+{
+ struct ip_discovery_top *ip_top = adev->discovery.ip_top;
+ uint8_t *discovery_bin = adev->discovery.bin;
+ struct binary_header *bhdr;
+ struct ip_discovery_header *ihdr;
+ struct die_header *dhdr;
+ struct kset *die_kset = &ip_top->die_kset;
+ u16 num_dies, die_offset, num_ips;
+ size_t ip_offset;
+ int ii, res;
+
+ bhdr = (struct binary_header *)discovery_bin;
+ ihdr = (struct ip_discovery_header
+ *)(discovery_bin +
+ le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
+ num_dies = le16_to_cpu(ihdr->num_dies);
+
+ DRM_DEBUG("number of dies: %d\n", num_dies);
+
+ for (ii = 0; ii < num_dies; ii++) {
+ struct ip_die_entry *ip_die_entry;
+
+ die_offset = le16_to_cpu(ihdr->die_info[ii].die_offset);
+ dhdr = (struct die_header *)(discovery_bin + die_offset);
+ num_ips = le16_to_cpu(dhdr->num_ips);
+ ip_offset = die_offset + sizeof(*dhdr);
+
+ /* Add the die to the kset.
+ *
+ * dhdr->die_id == ii, which was checked in
+ * amdgpu_discovery_reg_base_init().
+ */
+
+ ip_die_entry = kzalloc(sizeof(*ip_die_entry), GFP_KERNEL);
+ if (!ip_die_entry)
+ return -ENOMEM;
+
+ ip_die_entry->num_ips = num_ips;
+
+ kobject_set_name(&ip_die_entry->ip_kset.kobj, "%d", le16_to_cpu(dhdr->die_id));
+ ip_die_entry->ip_kset.kobj.kset = die_kset;
+ ip_die_entry->ip_kset.kobj.ktype = &ip_die_entry_ktype;
+ res = kset_register(&ip_die_entry->ip_kset);
+ if (res) {
+ DRM_ERROR("Couldn't register ip_die_entry kset");
+ kfree(ip_die_entry);
+ return res;
+ }
+
+ amdgpu_discovery_sysfs_ips(adev, ip_die_entry, ip_offset, num_ips, !!ihdr->base_addr_64_bit);
+ }
+
+ return 0;
+}
+
+static int amdgpu_discovery_sysfs_init(struct amdgpu_device *adev)
+{
+ uint8_t *discovery_bin = adev->discovery.bin;
+ struct ip_discovery_top *ip_top;
+ struct kset *die_kset;
+ int res, ii;
+
+ if (!discovery_bin)
+ return -EINVAL;
+
+ ip_top = kzalloc(sizeof(*ip_top), GFP_KERNEL);
+ if (!ip_top)
+ return -ENOMEM;
+
+ ip_top->adev = adev;
+ adev->discovery.ip_top = ip_top;
+ res = kobject_init_and_add(&ip_top->kobj, &ip_discovery_ktype,
+ &adev->dev->kobj, "ip_discovery");
+ if (res) {
+ DRM_ERROR("Couldn't init and add ip_discovery/");
+ goto Err;
+ }
+
+ die_kset = &ip_top->die_kset;
+ kobject_set_name(&die_kset->kobj, "%s", "die");
+ die_kset->kobj.parent = &ip_top->kobj;
+ die_kset->kobj.ktype = &die_kobj_ktype;
+ res = kset_register(&ip_top->die_kset);
+ if (res) {
+ DRM_ERROR("Couldn't register die_kset");
+ goto Err;
+ }
+
+ for (ii = 0; ii < ARRAY_SIZE(ip_hw_attr); ii++)
+ ip_hw_instance_attrs[ii] = &ip_hw_attr[ii].attr;
+ ip_hw_instance_attrs[ii] = NULL;
+
+ res = amdgpu_discovery_sysfs_recurse(adev);
+
+ return res;
+Err:
+ kobject_put(&ip_top->kobj);
+ return res;
+}
+
+/* -------------------------------------------------- */
+
+#define list_to_kobj(el) container_of(el, struct kobject, entry)
+
+static void amdgpu_discovery_sysfs_ip_hw_free(struct ip_hw_id *ip_hw_id)
+{
+ struct list_head *el, *tmp;
+ struct kset *hw_id_kset;
+
+ hw_id_kset = &ip_hw_id->hw_id_kset;
+ spin_lock(&hw_id_kset->list_lock);
+ list_for_each_prev_safe(el, tmp, &hw_id_kset->list) {
+ list_del_init(el);
+ spin_unlock(&hw_id_kset->list_lock);
+ /* kobject is embedded in ip_hw_instance */
+ kobject_put(list_to_kobj(el));
+ spin_lock(&hw_id_kset->list_lock);
+ }
+ spin_unlock(&hw_id_kset->list_lock);
+ kobject_put(&ip_hw_id->hw_id_kset.kobj);
+}
+
+static void amdgpu_discovery_sysfs_die_free(struct ip_die_entry *ip_die_entry)
+{
+ struct list_head *el, *tmp;
+ struct kset *ip_kset;
+
+ ip_kset = &ip_die_entry->ip_kset;
+ spin_lock(&ip_kset->list_lock);
+ list_for_each_prev_safe(el, tmp, &ip_kset->list) {
+ list_del_init(el);
+ spin_unlock(&ip_kset->list_lock);
+ amdgpu_discovery_sysfs_ip_hw_free(to_ip_hw_id(list_to_kobj(el)));
+ spin_lock(&ip_kset->list_lock);
+ }
+ spin_unlock(&ip_kset->list_lock);
+ kobject_put(&ip_die_entry->ip_kset.kobj);
+}
+
+static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev)
+{
+ struct ip_discovery_top *ip_top = adev->discovery.ip_top;
+ struct list_head *el, *tmp;
+ struct kset *die_kset;
+
+ die_kset = &ip_top->die_kset;
+ spin_lock(&die_kset->list_lock);
+ list_for_each_prev_safe(el, tmp, &die_kset->list) {
+ list_del_init(el);
+ spin_unlock(&die_kset->list_lock);
+ amdgpu_discovery_sysfs_die_free(to_ip_die_entry(list_to_kobj(el)));
+ spin_lock(&die_kset->list_lock);
+ }
+ spin_unlock(&die_kset->list_lock);
+ kobject_put(&ip_top->die_kset.kobj);
+ kobject_put(&ip_top->kobj);
+}
+
+/* ================================================== */
+
+static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
+{
+ uint8_t num_base_address, subrev, variant;
+ struct binary_header *bhdr;
+ struct ip_discovery_header *ihdr;
+ struct die_header *dhdr;
+ uint8_t *discovery_bin;
+ struct ip_v4 *ip;
uint16_t die_offset;
uint16_t ip_offset;
uint16_t num_dies;
+ uint32_t wafl_ver;
uint16_t num_ips;
- uint8_t num_base_address;
+ uint16_t hw_id;
+ uint8_t inst;
int hw_ip;
int i, j, k;
int r;
r = amdgpu_discovery_init(adev);
- if (r) {
- DRM_ERROR("amdgpu_discovery_init failed\n");
+ if (r)
return r;
- }
-
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
- ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
- le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
+ discovery_bin = adev->discovery.bin;
+ wafl_ver = 0;
+ adev->gfx.xcc_mask = 0;
+ adev->sdma.sdma_mask = 0;
+ adev->vcn.inst_mask = 0;
+ adev->jpeg.inst_mask = 0;
+ bhdr = (struct binary_header *)discovery_bin;
+ ihdr = (struct ip_discovery_header
+ *)(discovery_bin +
+ le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
num_dies = le16_to_cpu(ihdr->num_dies);
DRM_DEBUG("number of dies: %d\n", num_dies);
for (i = 0; i < num_dies; i++) {
die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
- dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset);
+ dhdr = (struct die_header *)(discovery_bin + die_offset);
num_ips = le16_to_cpu(dhdr->num_ips);
ip_offset = die_offset + sizeof(*dhdr);
@@ -365,9 +1415,11 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
le16_to_cpu(dhdr->die_id), num_ips);
for (j = 0; j < num_ips; j++) {
- ip = (struct ip *)(adev->mman.discovery_bin + ip_offset);
+ ip = (struct ip_v4 *)(discovery_bin + ip_offset);
- if (amdgpu_discovery_validate_ip(ip))
+ inst = ip->instance_number;
+ hw_id = le16_to_cpu(ip->hw_id);
+ if (amdgpu_discovery_validate_ip(adev, inst, hw_id))
goto next_ip;
num_base_address = ip->num_base_address;
@@ -375,32 +1427,99 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
DRM_DEBUG("%s(%d) #%d v%d.%d.%d:\n",
hw_id_names[le16_to_cpu(ip->hw_id)],
le16_to_cpu(ip->hw_id),
- ip->number_instance,
+ ip->instance_number,
ip->major, ip->minor,
ip->revision);
- if (le16_to_cpu(ip->hw_id) == VCN_HWID)
- adev->vcn.num_vcn_inst++;
+ if (le16_to_cpu(ip->hw_id) == VCN_HWID) {
+ /* Bit [5:0]: original revision value
+ * Bit [7:6]: en/decode capability:
+ * 0b00 : VCN function normally
+ * 0b10 : encode is disabled
+ * 0b01 : decode is disabled
+ */
+ if (adev->vcn.num_vcn_inst <
+ AMDGPU_MAX_VCN_INSTANCES) {
+ adev->vcn.inst[adev->vcn.num_vcn_inst].vcn_config =
+ ip->revision & 0xc0;
+ adev->vcn.num_vcn_inst++;
+ adev->vcn.inst_mask |=
+ (1U << ip->instance_number);
+ adev->jpeg.inst_mask |=
+ (1U << ip->instance_number);
+ } else {
+ dev_err(adev->dev, "Too many VCN instances: %d vs %d\n",
+ adev->vcn.num_vcn_inst + 1,
+ AMDGPU_MAX_VCN_INSTANCES);
+ }
+ ip->revision &= ~0xc0;
+ }
if (le16_to_cpu(ip->hw_id) == SDMA0_HWID ||
le16_to_cpu(ip->hw_id) == SDMA1_HWID ||
le16_to_cpu(ip->hw_id) == SDMA2_HWID ||
- le16_to_cpu(ip->hw_id) == SDMA3_HWID)
- adev->sdma.num_instances++;
+ le16_to_cpu(ip->hw_id) == SDMA3_HWID) {
+ if (adev->sdma.num_instances <
+ AMDGPU_MAX_SDMA_INSTANCES) {
+ adev->sdma.num_instances++;
+ adev->sdma.sdma_mask |=
+ (1U << ip->instance_number);
+ } else {
+ dev_err(adev->dev, "Too many SDMA instances: %d vs %d\n",
+ adev->sdma.num_instances + 1,
+ AMDGPU_MAX_SDMA_INSTANCES);
+ }
+ }
+
+ if (le16_to_cpu(ip->hw_id) == VPE_HWID) {
+ if (adev->vpe.num_instances < AMDGPU_MAX_VPE_INSTANCES)
+ adev->vpe.num_instances++;
+ else
+ dev_err(adev->dev, "Too many VPE instances: %d vs %d\n",
+ adev->vpe.num_instances + 1,
+ AMDGPU_MAX_VPE_INSTANCES);
+ }
+
+ if (le16_to_cpu(ip->hw_id) == UMC_HWID) {
+ adev->gmc.num_umc++;
+ adev->umc.node_inst_num++;
+ }
+
+ if (le16_to_cpu(ip->hw_id) == GC_HWID)
+ adev->gfx.xcc_mask |=
+ (1U << ip->instance_number);
+
+ if (!wafl_ver && le16_to_cpu(ip->hw_id) == WAFLC_HWID)
+ wafl_ver = IP_VERSION_FULL(ip->major, ip->minor,
+ ip->revision, 0, 0);
for (k = 0; k < num_base_address; k++) {
/*
* convert the endianness of base addresses in place,
* so that we don't need to convert them when accessing adev->reg_offset.
*/
- ip->base_address[k] = le32_to_cpu(ip->base_address[k]);
+ if (ihdr->base_addr_64_bit)
+ /* Truncate the 64bit base address from ip discovery
+ * and only store lower 32bit ip base in reg_offset[].
+ * Bits > 32 follows ASIC specific format, thus just
+ * discard them and handle it within specific ASIC.
+ * By this way reg_offset[] and related helpers can
+ * stay unchanged.
+ * The base address is in dwords, thus clear the
+ * highest 2 bits to store.
+ */
+ ip->base_address[k] =
+ lower_32_bits(le64_to_cpu(ip->base_address_64[k])) & 0x3FFFFFFF;
+ else
+ ip->base_address[k] = le32_to_cpu(ip->base_address[k]);
DRM_DEBUG("\t0x%08x\n", ip->base_address[k]);
}
for (hw_ip = 0; hw_ip < MAX_HWIP; hw_ip++) {
- if (hw_id_map[hw_ip] == le16_to_cpu(ip->hw_id)) {
+ if (hw_id_map[hw_ip] == le16_to_cpu(ip->hw_id) &&
+ hw_id_map[hw_ip] != 0) {
DRM_DEBUG("set register base offset for %s\n",
hw_id_names[le16_to_cpu(ip->hw_id)]);
- adev->reg_offset[hw_ip][ip->number_instance] =
+ adev->reg_offset[hw_ip][ip->instance_number] =
ip->base_address;
/* Instance support is somewhat inconsistent.
* SDMA is a good example. Sienna cichlid has 4 total
@@ -411,153 +1530,411 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
* example. On most chips there are multiple instances
* with the same HWID.
*/
- adev->ip_versions[hw_ip][ip->number_instance] =
- IP_VERSION(ip->major, ip->minor, ip->revision);
+
+ if (ihdr->version < 3) {
+ subrev = 0;
+ variant = 0;
+ } else {
+ subrev = ip->sub_revision;
+ variant = ip->variant;
+ }
+
+ adev->ip_versions[hw_ip]
+ [ip->instance_number] =
+ IP_VERSION_FULL(ip->major,
+ ip->minor,
+ ip->revision,
+ variant,
+ subrev);
}
}
next_ip:
- ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1);
+ if (ihdr->base_addr_64_bit)
+ ip_offset += struct_size(ip, base_address_64, ip->num_base_address);
+ else
+ ip_offset += struct_size(ip, base_address, ip->num_base_address);
}
}
+ if (wafl_ver && !adev->ip_versions[XGMI_HWIP][0])
+ adev->ip_versions[XGMI_HWIP][0] = wafl_ver;
+
return 0;
}
-int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int number_instance,
- int *major, int *minor, int *revision)
+static void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
{
- struct binary_header *bhdr;
+ uint8_t *discovery_bin = adev->discovery.bin;
struct ip_discovery_header *ihdr;
- struct die_header *dhdr;
- struct ip *ip;
- uint16_t die_offset;
- uint16_t ip_offset;
- uint16_t num_dies;
- uint16_t num_ips;
- int i, j;
+ struct binary_header *bhdr;
+ int vcn_harvest_count = 0;
+ int umc_harvest_count = 0;
+ uint16_t offset, ihdr_ver;
+
+ bhdr = (struct binary_header *)discovery_bin;
+ offset = le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset);
+ ihdr = (struct ip_discovery_header *)(discovery_bin + offset);
+ ihdr_ver = le16_to_cpu(ihdr->version);
+ /*
+ * Harvest table does not fit Navi1x and legacy GPUs,
+ * so read harvest bit per IP data structure to set
+ * harvest configuration.
+ */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 2, 0) &&
+ ihdr_ver <= 2) {
+ if ((adev->pdev->device == 0x731E &&
+ (adev->pdev->revision == 0xC6 ||
+ adev->pdev->revision == 0xC7)) ||
+ (adev->pdev->device == 0x7340 &&
+ adev->pdev->revision == 0xC9) ||
+ (adev->pdev->device == 0x7360 &&
+ adev->pdev->revision == 0xC7))
+ amdgpu_discovery_read_harvest_bit_per_ip(adev,
+ &vcn_harvest_count);
+ } else {
+ amdgpu_discovery_read_from_harvest_table(adev,
+ &vcn_harvest_count,
+ &umc_harvest_count);
+ }
- if (!adev->mman.discovery_bin) {
- DRM_ERROR("ip discovery uninitialized\n");
- return -EINVAL;
+ amdgpu_discovery_harvest_config_quirk(adev);
+
+ if (vcn_harvest_count == adev->vcn.num_vcn_inst) {
+ adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK;
+ adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK;
}
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
- ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
- le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
- num_dies = le16_to_cpu(ihdr->num_dies);
+ if (umc_harvest_count < adev->gmc.num_umc) {
+ adev->gmc.num_umc -= umc_harvest_count;
+ }
+}
- for (i = 0; i < num_dies; i++) {
- die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
- dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset);
- num_ips = le16_to_cpu(dhdr->num_ips);
- ip_offset = die_offset + sizeof(*dhdr);
+union gc_info {
+ struct gc_info_v1_0 v1;
+ struct gc_info_v1_1 v1_1;
+ struct gc_info_v1_2 v1_2;
+ struct gc_info_v1_3 v1_3;
+ struct gc_info_v2_0 v2;
+ struct gc_info_v2_1 v2_1;
+};
- for (j = 0; j < num_ips; j++) {
- ip = (struct ip *)(adev->mman.discovery_bin + ip_offset);
-
- if ((le16_to_cpu(ip->hw_id) == hw_id) && (ip->number_instance == number_instance)) {
- if (major)
- *major = ip->major;
- if (minor)
- *minor = ip->minor;
- if (revision)
- *revision = ip->revision;
- return 0;
- }
- ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1);
- }
+static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
+{
+ uint8_t *discovery_bin = adev->discovery.bin;
+ struct binary_header *bhdr;
+ union gc_info *gc_info;
+ u16 offset;
+
+ if (!discovery_bin) {
+ DRM_ERROR("ip discovery uninitialized\n");
+ return -EINVAL;
}
- return -EINVAL;
+ bhdr = (struct binary_header *)discovery_bin;
+ offset = le16_to_cpu(bhdr->table_list[GC].offset);
+
+ if (!offset)
+ return 0;
+
+ gc_info = (union gc_info *)(discovery_bin + offset);
+
+ switch (le16_to_cpu(gc_info->v1.header.version_major)) {
+ case 1:
+ adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v1.gc_num_se);
+ adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->v1.gc_num_wgp0_per_sa) +
+ le32_to_cpu(gc_info->v1.gc_num_wgp1_per_sa));
+ adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
+ adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v1.gc_num_rb_per_se);
+ adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v1.gc_num_gl2c);
+ adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v1.gc_num_gprs);
+ adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v1.gc_num_max_gs_thds);
+ adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v1.gc_gs_table_depth);
+ adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v1.gc_gsprim_buff_depth);
+ adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v1.gc_double_offchip_lds_buffer);
+ adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v1.gc_wave_size);
+ adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v1.gc_max_waves_per_simd);
+ adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v1.gc_max_scratch_slots_per_cu);
+ adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v1.gc_lds_size);
+ adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v1.gc_num_sc_per_se) /
+ le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
+ adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v1.gc_num_packer_per_sc);
+ if (le16_to_cpu(gc_info->v1.header.version_minor) >= 1) {
+ adev->gfx.config.gc_num_tcp_per_sa = le32_to_cpu(gc_info->v1_1.gc_num_tcp_per_sa);
+ adev->gfx.config.gc_num_sdp_interface = le32_to_cpu(gc_info->v1_1.gc_num_sdp_interface);
+ adev->gfx.config.gc_num_tcps = le32_to_cpu(gc_info->v1_1.gc_num_tcps);
+ }
+ if (le16_to_cpu(gc_info->v1.header.version_minor) >= 2) {
+ adev->gfx.config.gc_num_tcp_per_wpg = le32_to_cpu(gc_info->v1_2.gc_num_tcp_per_wpg);
+ adev->gfx.config.gc_tcp_l1_size = le32_to_cpu(gc_info->v1_2.gc_tcp_l1_size);
+ adev->gfx.config.gc_num_sqc_per_wgp = le32_to_cpu(gc_info->v1_2.gc_num_sqc_per_wgp);
+ adev->gfx.config.gc_l1_instruction_cache_size_per_sqc = le32_to_cpu(gc_info->v1_2.gc_l1_instruction_cache_size_per_sqc);
+ adev->gfx.config.gc_l1_data_cache_size_per_sqc = le32_to_cpu(gc_info->v1_2.gc_l1_data_cache_size_per_sqc);
+ adev->gfx.config.gc_gl1c_per_sa = le32_to_cpu(gc_info->v1_2.gc_gl1c_per_sa);
+ adev->gfx.config.gc_gl1c_size_per_instance = le32_to_cpu(gc_info->v1_2.gc_gl1c_size_per_instance);
+ adev->gfx.config.gc_gl2c_per_gpu = le32_to_cpu(gc_info->v1_2.gc_gl2c_per_gpu);
+ }
+ if (le16_to_cpu(gc_info->v1.header.version_minor) >= 3) {
+ adev->gfx.config.gc_tcp_size_per_cu = le32_to_cpu(gc_info->v1_3.gc_tcp_size_per_cu);
+ adev->gfx.config.gc_tcp_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_tcp_cache_line_size);
+ adev->gfx.config.gc_instruction_cache_size_per_sqc = le32_to_cpu(gc_info->v1_3.gc_instruction_cache_size_per_sqc);
+ adev->gfx.config.gc_instruction_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_instruction_cache_line_size);
+ adev->gfx.config.gc_scalar_data_cache_size_per_sqc = le32_to_cpu(gc_info->v1_3.gc_scalar_data_cache_size_per_sqc);
+ adev->gfx.config.gc_scalar_data_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_scalar_data_cache_line_size);
+ adev->gfx.config.gc_tcc_size = le32_to_cpu(gc_info->v1_3.gc_tcc_size);
+ adev->gfx.config.gc_tcc_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_tcc_cache_line_size);
+ }
+ break;
+ case 2:
+ adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se);
+ adev->gfx.config.max_cu_per_sh = le32_to_cpu(gc_info->v2.gc_num_cu_per_sh);
+ adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
+ adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v2.gc_num_rb_per_se);
+ adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v2.gc_num_tccs);
+ adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v2.gc_num_gprs);
+ adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v2.gc_num_max_gs_thds);
+ adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v2.gc_gs_table_depth);
+ adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v2.gc_gsprim_buff_depth);
+ adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v2.gc_double_offchip_lds_buffer);
+ adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v2.gc_wave_size);
+ adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v2.gc_max_waves_per_simd);
+ adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v2.gc_max_scratch_slots_per_cu);
+ adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v2.gc_lds_size);
+ adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v2.gc_num_sc_per_se) /
+ le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
+ adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v2.gc_num_packer_per_sc);
+ if (le16_to_cpu(gc_info->v2.header.version_minor) == 1) {
+ adev->gfx.config.gc_num_tcp_per_sa = le32_to_cpu(gc_info->v2_1.gc_num_tcp_per_sh);
+ adev->gfx.config.gc_tcp_size_per_cu = le32_to_cpu(gc_info->v2_1.gc_tcp_size_per_cu);
+ adev->gfx.config.gc_num_sdp_interface = le32_to_cpu(gc_info->v2_1.gc_num_sdp_interface); /* per XCD */
+ adev->gfx.config.gc_num_cu_per_sqc = le32_to_cpu(gc_info->v2_1.gc_num_cu_per_sqc);
+ adev->gfx.config.gc_l1_instruction_cache_size_per_sqc = le32_to_cpu(gc_info->v2_1.gc_instruction_cache_size_per_sqc);
+ adev->gfx.config.gc_l1_data_cache_size_per_sqc = le32_to_cpu(gc_info->v2_1.gc_scalar_data_cache_size_per_sqc);
+ adev->gfx.config.gc_tcc_size = le32_to_cpu(gc_info->v2_1.gc_tcc_size); /* per XCD */
+ }
+ break;
+ default:
+ dev_err(adev->dev,
+ "Unhandled GC info table %d.%d\n",
+ le16_to_cpu(gc_info->v1.header.version_major),
+ le16_to_cpu(gc_info->v1.header.version_minor));
+ return -EINVAL;
+ }
+ return 0;
}
+union mall_info {
+ struct mall_info_v1_0 v1;
+ struct mall_info_v2_0 v2;
+};
-int amdgpu_discovery_get_vcn_version(struct amdgpu_device *adev, int vcn_instance,
- int *major, int *minor, int *revision)
+static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev)
{
- return amdgpu_discovery_get_ip_version(adev, VCN_HWID,
- vcn_instance, major, minor, revision);
+ uint8_t *discovery_bin = adev->discovery.bin;
+ struct binary_header *bhdr;
+ union mall_info *mall_info;
+ u32 u, mall_size_per_umc, m_s_present, half_use;
+ u64 mall_size;
+ u16 offset;
+
+ if (!discovery_bin) {
+ DRM_ERROR("ip discovery uninitialized\n");
+ return -EINVAL;
+ }
+
+ bhdr = (struct binary_header *)discovery_bin;
+ offset = le16_to_cpu(bhdr->table_list[MALL_INFO].offset);
+
+ if (!offset)
+ return 0;
+
+ mall_info = (union mall_info *)(discovery_bin + offset);
+
+ switch (le16_to_cpu(mall_info->v1.header.version_major)) {
+ case 1:
+ mall_size = 0;
+ mall_size_per_umc = le32_to_cpu(mall_info->v1.mall_size_per_m);
+ m_s_present = le32_to_cpu(mall_info->v1.m_s_present);
+ half_use = le32_to_cpu(mall_info->v1.m_half_use);
+ for (u = 0; u < adev->gmc.num_umc; u++) {
+ if (m_s_present & (1 << u))
+ mall_size += mall_size_per_umc * 2;
+ else if (half_use & (1 << u))
+ mall_size += mall_size_per_umc / 2;
+ else
+ mall_size += mall_size_per_umc;
+ }
+ adev->gmc.mall_size = mall_size;
+ adev->gmc.m_half_use = half_use;
+ break;
+ case 2:
+ mall_size_per_umc = le32_to_cpu(mall_info->v2.mall_size_per_umc);
+ adev->gmc.mall_size = (uint64_t)mall_size_per_umc * adev->gmc.num_umc;
+ break;
+ default:
+ dev_err(adev->dev,
+ "Unhandled MALL info table %d.%d\n",
+ le16_to_cpu(mall_info->v1.header.version_major),
+ le16_to_cpu(mall_info->v1.header.version_minor));
+ return -EINVAL;
+ }
+ return 0;
}
-void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
+union vcn_info {
+ struct vcn_info_v1_0 v1;
+};
+
+static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev)
{
+ uint8_t *discovery_bin = adev->discovery.bin;
struct binary_header *bhdr;
- struct harvest_table *harvest_info;
- int i, vcn_harvest_count = 0;
+ union vcn_info *vcn_info;
+ u16 offset;
+ int v;
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
- harvest_info = (struct harvest_table *)(adev->mman.discovery_bin +
- le16_to_cpu(bhdr->table_list[HARVEST_INFO].offset));
+ if (!discovery_bin) {
+ DRM_ERROR("ip discovery uninitialized\n");
+ return -EINVAL;
+ }
- for (i = 0; i < 32; i++) {
- if (le16_to_cpu(harvest_info->list[i].hw_id) == 0)
- break;
+ /* num_vcn_inst is currently limited to AMDGPU_MAX_VCN_INSTANCES
+ * which is smaller than VCN_INFO_TABLE_MAX_NUM_INSTANCES
+ * but that may change in the future with new GPUs so keep this
+ * check for defensive purposes.
+ */
+ if (adev->vcn.num_vcn_inst > VCN_INFO_TABLE_MAX_NUM_INSTANCES) {
+ dev_err(adev->dev, "invalid vcn instances\n");
+ return -EINVAL;
+ }
- switch (le16_to_cpu(harvest_info->list[i].hw_id)) {
- case VCN_HWID:
- vcn_harvest_count++;
- if (harvest_info->list[i].number_instance == 0)
- adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN0;
- else
- adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1;
- break;
- case DMU_HWID:
- adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
- break;
- default:
- break;
+ bhdr = (struct binary_header *)discovery_bin;
+ offset = le16_to_cpu(bhdr->table_list[VCN_INFO].offset);
+
+ if (!offset)
+ return 0;
+
+ vcn_info = (union vcn_info *)(discovery_bin + offset);
+
+ switch (le16_to_cpu(vcn_info->v1.header.version_major)) {
+ case 1:
+ /* num_vcn_inst is currently limited to AMDGPU_MAX_VCN_INSTANCES
+ * so this won't overflow.
+ */
+ for (v = 0; v < adev->vcn.num_vcn_inst; v++) {
+ adev->vcn.inst[v].vcn_codec_disable_mask =
+ le32_to_cpu(vcn_info->v1.instance_info[v].fuse_data.all_bits);
}
+ break;
+ default:
+ dev_err(adev->dev,
+ "Unhandled VCN info table %d.%d\n",
+ le16_to_cpu(vcn_info->v1.header.version_major),
+ le16_to_cpu(vcn_info->v1.header.version_minor));
+ return -EINVAL;
}
- /* some IP discovery tables on Navy Flounder don't have this set correctly */
- if ((adev->ip_versions[UVD_HWIP][1] == IP_VERSION(3, 0, 1)) &&
- (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 2)))
- adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1;
- if (vcn_harvest_count == adev->vcn.num_vcn_inst) {
- adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK;
- adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK;
- }
- if ((adev->pdev->device == 0x731E &&
- (adev->pdev->revision == 0xC6 || adev->pdev->revision == 0xC7)) ||
- (adev->pdev->device == 0x7340 && adev->pdev->revision == 0xC9) ||
- (adev->pdev->device == 0x7360 && adev->pdev->revision == 0xC7)) {
- adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK;
- adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK;
+ return 0;
+}
+
+union nps_info {
+ struct nps_info_v1_0 v1;
+};
+
+static int amdgpu_discovery_refresh_nps_info(struct amdgpu_device *adev,
+ union nps_info *nps_data)
+{
+ uint64_t vram_size, pos, offset;
+ struct nps_info_header *nhdr;
+ struct binary_header bhdr;
+ uint16_t checksum;
+
+ vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
+ pos = vram_size - DISCOVERY_TMR_OFFSET;
+ amdgpu_device_vram_access(adev, pos, &bhdr, sizeof(bhdr), false);
+
+ offset = le16_to_cpu(bhdr.table_list[NPS_INFO].offset);
+ checksum = le16_to_cpu(bhdr.table_list[NPS_INFO].checksum);
+
+ amdgpu_device_vram_access(adev, (pos + offset), nps_data,
+ sizeof(*nps_data), false);
+
+ nhdr = (struct nps_info_header *)(nps_data);
+ if (!amdgpu_discovery_verify_checksum((uint8_t *)nps_data,
+ le32_to_cpu(nhdr->size_bytes),
+ checksum)) {
+ dev_err(adev->dev, "nps data refresh, checksum mismatch\n");
+ return -EINVAL;
}
+
+ return 0;
}
-int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
+int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev,
+ uint32_t *nps_type,
+ struct amdgpu_gmc_memrange **ranges,
+ int *range_cnt, bool refresh)
{
+ uint8_t *discovery_bin = adev->discovery.bin;
+ struct amdgpu_gmc_memrange *mem_ranges;
struct binary_header *bhdr;
- struct gc_info_v1_0 *gc_info;
+ union nps_info *nps_info;
+ union nps_info nps_data;
+ u16 offset;
+ int i, r;
- if (!adev->mman.discovery_bin) {
- DRM_ERROR("ip discovery uninitialized\n");
+ if (!nps_type || !range_cnt || !ranges)
return -EINVAL;
+
+ if (refresh) {
+ r = amdgpu_discovery_refresh_nps_info(adev, &nps_data);
+ if (r)
+ return r;
+ nps_info = &nps_data;
+ } else {
+ if (!discovery_bin) {
+ dev_err(adev->dev,
+ "fetch mem range failed, ip discovery uninitialized\n");
+ return -EINVAL;
+ }
+
+ bhdr = (struct binary_header *)discovery_bin;
+ offset = le16_to_cpu(bhdr->table_list[NPS_INFO].offset);
+
+ if (!offset)
+ return -ENOENT;
+
+ /* If verification fails, return as if NPS table doesn't exist */
+ if (amdgpu_discovery_verify_npsinfo(adev, bhdr))
+ return -ENOENT;
+
+ nps_info = (union nps_info *)(discovery_bin + offset);
}
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
- gc_info = (struct gc_info_v1_0 *)(adev->mman.discovery_bin +
- le16_to_cpu(bhdr->table_list[GC].offset));
-
- adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->gc_num_se);
- adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->gc_num_wgp0_per_sa) +
- le32_to_cpu(gc_info->gc_num_wgp1_per_sa));
- adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->gc_num_sa_per_se);
- adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->gc_num_rb_per_se);
- adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->gc_num_gl2c);
- adev->gfx.config.max_gprs = le32_to_cpu(gc_info->gc_num_gprs);
- adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->gc_num_max_gs_thds);
- adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->gc_gs_table_depth);
- adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->gc_gsprim_buff_depth);
- adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->gc_double_offchip_lds_buffer);
- adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->gc_wave_size);
- adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->gc_max_waves_per_simd);
- adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->gc_max_scratch_slots_per_cu);
- adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->gc_lds_size);
- adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->gc_num_sc_per_se) /
- le32_to_cpu(gc_info->gc_num_sa_per_se);
- adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->gc_num_packer_per_sc);
+ switch (le16_to_cpu(nps_info->v1.header.version_major)) {
+ case 1:
+ mem_ranges = kvcalloc(nps_info->v1.count,
+ sizeof(*mem_ranges),
+ GFP_KERNEL);
+ if (!mem_ranges)
+ return -ENOMEM;
+ *nps_type = nps_info->v1.nps_type;
+ *range_cnt = nps_info->v1.count;
+ for (i = 0; i < *range_cnt; i++) {
+ mem_ranges[i].base_address =
+ nps_info->v1.instance_info[i].base_address;
+ mem_ranges[i].limit_address =
+ nps_info->v1.instance_info[i].limit_address;
+ mem_ranges[i].nid_mask = -1;
+ mem_ranges[i].flags = 0;
+ }
+ *ranges = mem_ranges;
+ break;
+ default:
+ dev_err(adev->dev, "Unhandled NPS info table %d.%d\n",
+ le16_to_cpu(nps_info->v1.header.version_major),
+ le16_to_cpu(nps_info->v1.header.version_minor));
+ return -EINVAL;
+ }
return 0;
}
@@ -565,7 +1942,7 @@ int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)
{
/* what IP to use for this? */
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 2, 1):
@@ -574,24 +1951,45 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(9, 4, 0):
case IP_VERSION(9, 4, 1):
case IP_VERSION(9, 4, 2):
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
break;
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 2):
case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 3):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 7):
amdgpu_device_ip_block_add(adev, &nv_common_ip_block);
break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ amdgpu_device_ip_block_add(adev, &soc21_common_ip_block);
+ break;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ amdgpu_device_ip_block_add(adev, &soc24_common_ip_block);
+ break;
default:
dev_err(adev->dev,
"Failed to add common ip block(GC_HWIP:0x%x)\n",
- adev->ip_versions[GC_HWIP][0]);
+ amdgpu_ip_version(adev, GC_HWIP, 0));
return -EINVAL;
}
return 0;
@@ -600,7 +1998,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)
static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev)
{
/* use GC or MMHUB IP version */
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 2, 1):
@@ -609,24 +2007,44 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(9, 4, 0):
case IP_VERSION(9, 4, 1):
case IP_VERSION(9, 4, 2):
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
break;
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 2):
case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 3):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 7):
amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block);
break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block);
+ break;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ amdgpu_device_ip_block_add(adev, &gmc_v12_0_ip_block);
+ break;
default:
- dev_err(adev->dev,
- "Failed to add gmc ip block(GC_HWIP:0x%x)\n",
- adev->ip_versions[GC_HWIP][0]);
+ dev_err(adev->dev, "Failed to add gmc ip block(GC_HWIP:0x%x)\n",
+ amdgpu_ip_version(adev, GC_HWIP, 0));
return -EINVAL;
}
return 0;
@@ -634,7 +2052,7 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev)
static int amdgpu_discovery_set_ih_ip_blocks(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[OSSSYS_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, OSSSYS_HWIP, 0)) {
case IP_VERSION(4, 0, 0):
case IP_VERSION(4, 0, 1):
case IP_VERSION(4, 1, 0):
@@ -645,6 +2063,8 @@ static int amdgpu_discovery_set_ih_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(4, 2, 0):
case IP_VERSION(4, 2, 1):
case IP_VERSION(4, 4, 0):
+ case IP_VERSION(4, 4, 2):
+ case IP_VERSION(4, 4, 5):
amdgpu_device_ip_block_add(adev, &vega20_ih_ip_block);
break;
case IP_VERSION(5, 0, 0):
@@ -655,10 +2075,21 @@ static int amdgpu_discovery_set_ih_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(5, 2, 1):
amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);
break;
+ case IP_VERSION(6, 0, 0):
+ case IP_VERSION(6, 0, 1):
+ case IP_VERSION(6, 0, 2):
+ amdgpu_device_ip_block_add(adev, &ih_v6_0_ip_block);
+ break;
+ case IP_VERSION(6, 1, 0):
+ amdgpu_device_ip_block_add(adev, &ih_v6_1_ip_block);
+ break;
+ case IP_VERSION(7, 0, 0):
+ amdgpu_device_ip_block_add(adev, &ih_v7_0_ip_block);
+ break;
default:
dev_err(adev->dev,
"Failed to add ih ip block(OSSSYS_HWIP:0x%x)\n",
- adev->ip_versions[OSSSYS_HWIP][0]);
+ amdgpu_ip_version(adev, OSSSYS_HWIP, 0));
return -EINVAL;
}
return 0;
@@ -666,7 +2097,7 @@ static int amdgpu_discovery_set_ih_ip_blocks(struct amdgpu_device *adev)
static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[MP0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
case IP_VERSION(9, 0, 0):
amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
break;
@@ -684,6 +2115,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 0, 12):
case IP_VERSION(11, 0, 13):
case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 2):
amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
break;
case IP_VERSION(11, 0, 8):
@@ -693,15 +2125,35 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(12, 0, 1):
amdgpu_device_ip_block_add(adev, &psp_v12_0_ip_block);
break;
+ case IP_VERSION(13, 0, 0):
case IP_VERSION(13, 0, 1):
case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 3):
+ case IP_VERSION(13, 0, 5):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 7):
+ case IP_VERSION(13, 0, 8):
+ case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 11):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
+ case IP_VERSION(14, 0, 0):
+ case IP_VERSION(14, 0, 1):
+ case IP_VERSION(14, 0, 4):
amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block);
break;
+ case IP_VERSION(13, 0, 4):
+ amdgpu_device_ip_block_add(adev, &psp_v13_0_4_ip_block);
+ break;
+ case IP_VERSION(14, 0, 2):
+ case IP_VERSION(14, 0, 3):
+ case IP_VERSION(14, 0, 5):
+ amdgpu_device_ip_block_add(adev, &psp_v14_0_ip_block);
+ break;
default:
dev_err(adev->dev,
"Failed to add psp ip block(MP0_HWIP:0x%x)\n",
- adev->ip_versions[MP0_HWIP][0]);
+ amdgpu_ip_version(adev, MP0_HWIP, 0));
return -EINVAL;
}
return 0;
@@ -709,7 +2161,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev)
static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[MP1_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
case IP_VERSION(9, 0, 0):
case IP_VERSION(10, 0, 0):
case IP_VERSION(10, 0, 1):
@@ -723,38 +2175,74 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 0, 5):
case IP_VERSION(11, 0, 9):
case IP_VERSION(11, 0, 7):
- case IP_VERSION(11, 0, 8):
case IP_VERSION(11, 0, 11):
case IP_VERSION(11, 0, 12):
case IP_VERSION(11, 0, 13):
case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 2):
amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
break;
+ case IP_VERSION(11, 0, 8):
+ if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2)
+ amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
+ break;
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block);
break;
+ case IP_VERSION(13, 0, 0):
case IP_VERSION(13, 0, 1):
case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 3):
+ case IP_VERSION(13, 0, 4):
+ case IP_VERSION(13, 0, 5):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 7):
+ case IP_VERSION(13, 0, 8):
+ case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 11):
+ case IP_VERSION(13, 0, 14):
+ case IP_VERSION(13, 0, 12):
amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block);
break;
+ case IP_VERSION(14, 0, 0):
+ case IP_VERSION(14, 0, 1):
+ case IP_VERSION(14, 0, 2):
+ case IP_VERSION(14, 0, 3):
+ case IP_VERSION(14, 0, 4):
+ case IP_VERSION(14, 0, 5):
+ amdgpu_device_ip_block_add(adev, &smu_v14_0_ip_block);
+ break;
default:
dev_err(adev->dev,
"Failed to add smu ip block(MP1_HWIP:0x%x)\n",
- adev->ip_versions[MP1_HWIP][0]);
+ amdgpu_ip_version(adev, MP1_HWIP, 0));
return -EINVAL;
}
return 0;
}
+#if defined(CONFIG_DRM_AMD_DC)
+static void amdgpu_discovery_set_sriov_display(struct amdgpu_device *adev)
+{
+ amdgpu_device_set_sriov_virtual_display(adev);
+ amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
+}
+#endif
+
static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
{
- if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) {
+ if (adev->enable_virtual_display) {
amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
+ return 0;
+ }
+
+ if (!amdgpu_device_has_dc_support(adev))
+ return 0;
+
#if defined(CONFIG_DRM_AMD_DC)
- } else if (adev->ip_versions[DCE_HWIP][0]) {
- switch (adev->ip_versions[DCE_HWIP][0]) {
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
case IP_VERSION(1, 0, 0):
case IP_VERSION(1, 0, 1):
case IP_VERSION(2, 0, 2):
@@ -767,35 +2255,54 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(3, 0, 1):
case IP_VERSION(3, 1, 2):
case IP_VERSION(3, 1, 3):
- amdgpu_device_ip_block_add(adev, &dm_ip_block);
+ case IP_VERSION(3, 1, 4):
+ case IP_VERSION(3, 1, 5):
+ case IP_VERSION(3, 1, 6):
+ case IP_VERSION(3, 2, 0):
+ case IP_VERSION(3, 2, 1):
+ case IP_VERSION(3, 5, 0):
+ case IP_VERSION(3, 5, 1):
+ case IP_VERSION(3, 6, 0):
+ case IP_VERSION(4, 1, 0):
+ /* TODO: Fix IP version. DC code expects version 4.0.1 */
+ if (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(4, 1, 0))
+ adev->ip_versions[DCE_HWIP][0] = IP_VERSION(4, 0, 1);
+
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_discovery_set_sriov_display(adev);
+ else
+ amdgpu_device_ip_block_add(adev, &dm_ip_block);
break;
default:
dev_err(adev->dev,
"Failed to add dm ip block(DCE_HWIP:0x%x)\n",
- adev->ip_versions[DCE_HWIP][0]);
+ amdgpu_ip_version(adev, DCE_HWIP, 0));
return -EINVAL;
}
- } else if (adev->ip_versions[DCI_HWIP][0]) {
- switch (adev->ip_versions[DCI_HWIP][0]) {
+ } else if (amdgpu_ip_version(adev, DCI_HWIP, 0)) {
+ switch (amdgpu_ip_version(adev, DCI_HWIP, 0)) {
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
case IP_VERSION(12, 1, 0):
- amdgpu_device_ip_block_add(adev, &dm_ip_block);
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_discovery_set_sriov_display(adev);
+ else
+ amdgpu_device_ip_block_add(adev, &dm_ip_block);
break;
default:
dev_err(adev->dev,
"Failed to add dm ip block(DCI_HWIP:0x%x)\n",
- adev->ip_versions[DCI_HWIP][0]);
+ amdgpu_ip_version(adev, DCI_HWIP, 0));
return -EINVAL;
}
-#endif
}
+#endif
return 0;
}
static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 2, 1):
@@ -806,22 +2313,44 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(9, 4, 2):
amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
break;
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
+ amdgpu_device_ip_block_add(adev, &gfx_v9_4_3_ip_block);
+ break;
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 2):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block);
break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block);
+ break;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ amdgpu_device_ip_block_add(adev, &gfx_v12_0_ip_block);
+ break;
default:
- dev_err(adev->dev,
- "Failed to add gfx ip block(GC_HWIP:0x%x)\n",
- adev->ip_versions[GC_HWIP][0]);
+ dev_err(adev->dev, "Failed to add gfx ip block(GC_HWIP:0x%x)\n",
+ amdgpu_ip_version(adev, GC_HWIP, 0));
return -EINVAL;
}
return 0;
@@ -829,7 +2358,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
case IP_VERSION(4, 0, 0):
case IP_VERSION(4, 0, 1):
case IP_VERSION(4, 1, 0):
@@ -840,6 +2369,11 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(4, 4, 0):
amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
break;
+ case IP_VERSION(4, 4, 2):
+ case IP_VERSION(4, 4, 5):
+ case IP_VERSION(4, 4, 4):
+ amdgpu_device_ip_block_add(adev, &sdma_v4_4_2_ip_block);
+ break;
case IP_VERSION(5, 0, 0):
case IP_VERSION(5, 0, 1):
case IP_VERSION(5, 0, 2):
@@ -850,23 +2384,54 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(5, 2, 2):
case IP_VERSION(5, 2, 4):
case IP_VERSION(5, 2, 5):
+ case IP_VERSION(5, 2, 6):
case IP_VERSION(5, 2, 3):
case IP_VERSION(5, 2, 1):
+ case IP_VERSION(5, 2, 7):
amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block);
break;
+ case IP_VERSION(6, 0, 0):
+ case IP_VERSION(6, 0, 1):
+ case IP_VERSION(6, 0, 2):
+ case IP_VERSION(6, 0, 3):
+ case IP_VERSION(6, 1, 0):
+ case IP_VERSION(6, 1, 1):
+ case IP_VERSION(6, 1, 2):
+ case IP_VERSION(6, 1, 3):
+ amdgpu_device_ip_block_add(adev, &sdma_v6_0_ip_block);
+ break;
+ case IP_VERSION(7, 0, 0):
+ case IP_VERSION(7, 0, 1):
+ amdgpu_device_ip_block_add(adev, &sdma_v7_0_ip_block);
+ break;
default:
dev_err(adev->dev,
"Failed to add sdma ip block(SDMA0_HWIP:0x%x)\n",
- adev->ip_versions[SDMA0_HWIP][0]);
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0));
return -EINVAL;
}
+
+ return 0;
+}
+
+static int amdgpu_discovery_set_ras_ip_blocks(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
+ amdgpu_device_ip_block_add(adev, &ras_v1_0_ip_block);
+ break;
+ default:
+ break;
+ }
return 0;
}
static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
{
- if (adev->ip_versions[VCE_HWIP][0]) {
- switch (adev->ip_versions[UVD_HWIP][0]) {
+ if (amdgpu_ip_version(adev, VCE_HWIP, 0)) {
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
case IP_VERSION(7, 0, 0):
case IP_VERSION(7, 2, 0):
/* UVD is not supported on vega20 SR-IOV */
@@ -876,10 +2441,10 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
default:
dev_err(adev->dev,
"Failed to add uvd v7 ip block(UVD_HWIP:0x%x)\n",
- adev->ip_versions[UVD_HWIP][0]);
+ amdgpu_ip_version(adev, UVD_HWIP, 0));
return -EINVAL;
}
- switch (adev->ip_versions[VCE_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, VCE_HWIP, 0)) {
case IP_VERSION(4, 0, 0):
case IP_VERSION(4, 1, 0):
/* VCE is not supported on vega20 SR-IOV */
@@ -889,11 +2454,11 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
default:
dev_err(adev->dev,
"Failed to add VCE v4 ip block(VCE_HWIP:0x%x)\n",
- adev->ip_versions[VCE_HWIP][0]);
+ amdgpu_ip_version(adev, VCE_HWIP, 0));
return -EINVAL;
}
} else {
- switch (adev->ip_versions[UVD_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
case IP_VERSION(1, 0, 0):
case IP_VERSION(1, 0, 1):
amdgpu_device_ip_block_add(adev, &vcn_v1_0_ip_block);
@@ -917,10 +2482,9 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
break;
case IP_VERSION(3, 0, 0):
case IP_VERSION(3, 0, 16):
- case IP_VERSION(3, 0, 64):
case IP_VERSION(3, 1, 1):
+ case IP_VERSION(3, 1, 2):
case IP_VERSION(3, 0, 2):
- case IP_VERSION(3, 0, 192):
amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
if (!amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block);
@@ -928,10 +2492,33 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(3, 0, 33):
amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
break;
+ case IP_VERSION(4, 0, 0):
+ case IP_VERSION(4, 0, 2):
+ case IP_VERSION(4, 0, 4):
+ amdgpu_device_ip_block_add(adev, &vcn_v4_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v4_0_ip_block);
+ break;
+ case IP_VERSION(4, 0, 3):
+ amdgpu_device_ip_block_add(adev, &vcn_v4_0_3_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v4_0_3_ip_block);
+ break;
+ case IP_VERSION(4, 0, 5):
+ case IP_VERSION(4, 0, 6):
+ amdgpu_device_ip_block_add(adev, &vcn_v4_0_5_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v4_0_5_ip_block);
+ break;
+ case IP_VERSION(5, 0, 0):
+ amdgpu_device_ip_block_add(adev, &vcn_v5_0_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v5_0_0_ip_block);
+ break;
+ case IP_VERSION(5, 0, 1):
+ amdgpu_device_ip_block_add(adev, &vcn_v5_0_1_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v5_0_1_ip_block);
+ break;
default:
dev_err(adev->dev,
"Failed to add vcn/jpeg ip block(UVD_HWIP:0x%x)\n",
- adev->ip_versions[UVD_HWIP][0]);
+ amdgpu_ip_version(adev, UVD_HWIP, 0));
return -EINVAL;
}
}
@@ -940,33 +2527,113 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
- case IP_VERSION(10, 1, 10):
- case IP_VERSION(10, 1, 1):
- case IP_VERSION(10, 1, 2):
- case IP_VERSION(10, 1, 3):
- case IP_VERSION(10, 3, 0):
- case IP_VERSION(10, 3, 1):
- case IP_VERSION(10, 3, 2):
- case IP_VERSION(10, 3, 3):
- case IP_VERSION(10, 3, 4):
- case IP_VERSION(10, 3, 5):
- amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block);
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ amdgpu_device_ip_block_add(adev, &mes_v11_0_ip_block);
+ adev->enable_mes = true;
+ adev->enable_mes_kiq = true;
+ break;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ amdgpu_device_ip_block_add(adev, &mes_v12_0_ip_block);
+ adev->enable_mes = true;
+ adev->enable_mes_kiq = true;
+ if (amdgpu_uni_mes)
+ adev->enable_uni_mes = true;
break;
default:
- break;;
+ break;
}
return 0;
}
+static void amdgpu_discovery_init_soc_config(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
+ aqua_vanjaram_init_soc_config(adev);
+ break;
+ default:
+ break;
+ }
+}
+
+static int amdgpu_discovery_set_vpe_ip_blocks(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) {
+ case IP_VERSION(6, 1, 0):
+ case IP_VERSION(6, 1, 1):
+ case IP_VERSION(6, 1, 3):
+ amdgpu_device_ip_block_add(adev, &vpe_v6_1_ip_block);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int amdgpu_discovery_set_umsch_mm_ip_blocks(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
+ case IP_VERSION(4, 0, 5):
+ case IP_VERSION(4, 0, 6):
+ if (amdgpu_umsch_mm & 0x1) {
+ amdgpu_device_ip_block_add(adev, &umsch_mm_v4_0_ip_block);
+ adev->enable_umsch_mm = true;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int amdgpu_discovery_set_isp_ip_blocks(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DRM_AMD_ISP)
+ switch (amdgpu_ip_version(adev, ISP_HWIP, 0)) {
+ case IP_VERSION(4, 1, 0):
+ amdgpu_device_ip_block_add(adev, &isp_v4_1_0_ip_block);
+ break;
+ case IP_VERSION(4, 1, 1):
+ amdgpu_device_ip_block_add(adev, &isp_v4_1_1_ip_block);
+ break;
+ default:
+ break;
+ }
+#endif
+
+ return 0;
+}
+
int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
{
int r;
switch (adev->asic_type) {
case CHIP_VEGA10:
+ /* This is not fatal. We only need the discovery
+ * binary for sysfs. We don't need it for a
+ * functional system.
+ */
+ amdgpu_discovery_init(adev);
vega10_reg_base_init(adev);
adev->sdma.num_instances = 2;
+ adev->sdma.sdma_mask = 3;
+ adev->gmc.num_umc = 4;
+ adev->gfx.xcc_mask = 1;
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 0, 0);
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 0, 0);
adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 0, 0);
@@ -986,8 +2653,16 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 0);
break;
case CHIP_VEGA12:
+ /* This is not fatal. We only need the discovery
+ * binary for sysfs. We don't need it for a
+ * functional system.
+ */
+ amdgpu_discovery_init(adev);
vega10_reg_base_init(adev);
adev->sdma.num_instances = 2;
+ adev->sdma.sdma_mask = 3;
+ adev->gmc.num_umc = 4;
+ adev->gfx.xcc_mask = 1;
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 3, 0);
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 3, 0);
adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 0, 1);
@@ -1007,9 +2682,17 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 1);
break;
case CHIP_RAVEN:
+ /* This is not fatal. We only need the discovery
+ * binary for sysfs. We don't need it for a
+ * functional system.
+ */
+ amdgpu_discovery_init(adev);
vega10_reg_base_init(adev);
adev->sdma.num_instances = 1;
+ adev->sdma.sdma_mask = 1;
adev->vcn.num_vcn_inst = 1;
+ adev->gmc.num_umc = 2;
+ adev->gfx.xcc_mask = 1;
if (adev->apu_flags & AMD_APU_IS_RAVEN2) {
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 2, 0);
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 2, 0);
@@ -1026,6 +2709,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->ip_versions[GC_HWIP][0] = IP_VERSION(9, 2, 2);
adev->ip_versions[UVD_HWIP][0] = IP_VERSION(1, 0, 1);
adev->ip_versions[DCE_HWIP][0] = IP_VERSION(1, 0, 1);
+ adev->ip_versions[ISP_HWIP][0] = IP_VERSION(2, 0, 0);
} else {
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 1, 0);
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 1, 0);
@@ -1042,11 +2726,20 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->ip_versions[GC_HWIP][0] = IP_VERSION(9, 1, 0);
adev->ip_versions[UVD_HWIP][0] = IP_VERSION(1, 0, 0);
adev->ip_versions[DCE_HWIP][0] = IP_VERSION(1, 0, 0);
+ adev->ip_versions[ISP_HWIP][0] = IP_VERSION(2, 0, 0);
}
break;
case CHIP_VEGA20:
+ /* This is not fatal. We only need the discovery
+ * binary for sysfs. We don't need it for a
+ * functional system.
+ */
+ amdgpu_discovery_init(adev);
vega20_reg_base_init(adev);
adev->sdma.num_instances = 2;
+ adev->sdma.sdma_mask = 3;
+ adev->gmc.num_umc = 8;
+ adev->gfx.xcc_mask = 1;
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 0);
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 0);
adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 2, 0);
@@ -1067,9 +2760,17 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 1, 0);
break;
case CHIP_ARCTURUS:
+ /* This is not fatal. We only need the discovery
+ * binary for sysfs. We don't need it for a
+ * functional system.
+ */
+ amdgpu_discovery_init(adev);
arct_reg_base_init(adev);
adev->sdma.num_instances = 8;
+ adev->sdma.sdma_mask = 0xff;
adev->vcn.num_vcn_inst = 2;
+ adev->gmc.num_umc = 8;
+ adev->gfx.xcc_mask = 1;
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 1);
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 1);
adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 2, 1);
@@ -1094,9 +2795,17 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->ip_versions[UVD_HWIP][1] = IP_VERSION(2, 5, 0);
break;
case CHIP_ALDEBARAN:
+ /* This is not fatal. We only need the discovery
+ * binary for sysfs. We don't need it for a
+ * functional system.
+ */
+ amdgpu_discovery_init(adev);
aldebaran_reg_base_init(adev);
adev->sdma.num_instances = 5;
+ adev->sdma.sdma_mask = 0x1f;
adev->vcn.num_vcn_inst = 2;
+ adev->gmc.num_umc = 4;
+ adev->gfx.xcc_mask = 1;
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 2);
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 2);
adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 4, 0);
@@ -1118,26 +2827,64 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->ip_versions[UVD_HWIP][1] = IP_VERSION(2, 6, 0);
adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0);
break;
+ case CHIP_CYAN_SKILLFISH:
+ if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) {
+ r = amdgpu_discovery_reg_base_init(adev);
+ if (r)
+ return -EINVAL;
+
+ amdgpu_discovery_harvest_ip(adev);
+ amdgpu_discovery_get_gfx_info(adev);
+ amdgpu_discovery_get_mall_info(adev);
+ amdgpu_discovery_get_vcn_info(adev);
+ } else {
+ cyan_skillfish_reg_base_init(adev);
+ adev->sdma.num_instances = 2;
+ adev->sdma.sdma_mask = 3;
+ adev->gfx.xcc_mask = 1;
+ adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(2, 0, 3);
+ adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(2, 0, 3);
+ adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(5, 0, 1);
+ adev->ip_versions[HDP_HWIP][0] = IP_VERSION(5, 0, 1);
+ adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(5, 0, 1);
+ adev->ip_versions[SDMA1_HWIP][1] = IP_VERSION(5, 0, 1);
+ adev->ip_versions[DF_HWIP][0] = IP_VERSION(3, 5, 0);
+ adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(2, 1, 1);
+ adev->ip_versions[UMC_HWIP][0] = IP_VERSION(8, 1, 1);
+ adev->ip_versions[MP0_HWIP][0] = IP_VERSION(11, 0, 8);
+ adev->ip_versions[MP1_HWIP][0] = IP_VERSION(11, 0, 8);
+ adev->ip_versions[THM_HWIP][0] = IP_VERSION(11, 0, 1);
+ adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(11, 0, 8);
+ adev->ip_versions[GC_HWIP][0] = IP_VERSION(10, 1, 3);
+ adev->ip_versions[UVD_HWIP][0] = IP_VERSION(2, 0, 3);
+ }
+ break;
default:
r = amdgpu_discovery_reg_base_init(adev);
- if (r)
- return -EINVAL;
+ if (r) {
+ drm_err(&adev->ddev, "discovery failed: %d\n", r);
+ return r;
+ }
amdgpu_discovery_harvest_ip(adev);
-
- if (!adev->mman.discovery_bin) {
- DRM_ERROR("ip discovery uninitialized\n");
- return -EINVAL;
- }
+ amdgpu_discovery_get_gfx_info(adev);
+ amdgpu_discovery_get_mall_info(adev);
+ amdgpu_discovery_get_vcn_info(adev);
break;
}
- switch (adev->ip_versions[GC_HWIP][0]) {
+ amdgpu_discovery_init_soc_config(adev);
+ amdgpu_discovery_sysfs_init(adev);
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
case IP_VERSION(9, 2, 1):
case IP_VERSION(9, 4, 0):
case IP_VERSION(9, 4, 1):
case IP_VERSION(9, 4, 2):
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
adev->family = AMDGPU_FAMILY_AI;
break;
case IP_VERSION(9, 1, 0):
@@ -1149,6 +2896,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 2):
case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 4):
@@ -1157,19 +2905,64 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
break;
case IP_VERSION(10, 3, 1):
adev->family = AMDGPU_FAMILY_VGH;
+ adev->apu_flags |= AMD_APU_IS_VANGOGH;
break;
case IP_VERSION(10, 3, 3):
adev->family = AMDGPU_FAMILY_YC;
break;
+ case IP_VERSION(10, 3, 6):
+ adev->family = AMDGPU_FAMILY_GC_10_3_6;
+ break;
+ case IP_VERSION(10, 3, 7):
+ adev->family = AMDGPU_FAMILY_GC_10_3_7;
+ break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ adev->family = AMDGPU_FAMILY_GC_11_0_0;
+ break;
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ adev->family = AMDGPU_FAMILY_GC_11_0_1;
+ break;
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ adev->family = AMDGPU_FAMILY_GC_11_5_0;
+ break;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ adev->family = AMDGPU_FAMILY_GC_12_0_0;
+ break;
default:
return -EINVAL;
}
- if (adev->ip_versions[XGMI_HWIP][0] == IP_VERSION(4, 8, 0))
- adev->gmc.xgmi.supported = true;
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 3, 0):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 7):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ adev->flags |= AMD_IS_APU;
+ break;
+ default:
+ break;
+ }
/* set NBIO version */
- switch (adev->ip_versions[NBIO_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
case IP_VERSION(6, 1, 0):
case IP_VERSION(6, 2, 0):
adev->nbio.funcs = &nbio_v6_1_funcs;
@@ -1183,16 +2976,27 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
break;
case IP_VERSION(7, 4, 0):
case IP_VERSION(7, 4, 1):
+ case IP_VERSION(7, 4, 4):
adev->nbio.funcs = &nbio_v7_4_funcs;
adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg;
break;
- case IP_VERSION(7, 4, 4):
- adev->nbio.funcs = &nbio_v7_4_funcs;
- adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg_ald;
+ case IP_VERSION(7, 9, 0):
+ case IP_VERSION(7, 9, 1):
+ adev->nbio.funcs = &nbio_v7_9_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v7_9_hdp_flush_reg;
+ break;
+ case IP_VERSION(7, 11, 0):
+ case IP_VERSION(7, 11, 1):
+ case IP_VERSION(7, 11, 2):
+ case IP_VERSION(7, 11, 3):
+ adev->nbio.funcs = &nbio_v7_11_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v7_11_hdp_flush_reg;
break;
case IP_VERSION(7, 2, 0):
case IP_VERSION(7, 2, 1):
+ case IP_VERSION(7, 3, 0):
case IP_VERSION(7, 5, 0):
+ case IP_VERSION(7, 5, 1):
adev->nbio.funcs = &nbio_v7_2_funcs;
adev->nbio.hdp_flush_reg = &nbio_v7_2_hdp_flush_reg;
break;
@@ -1200,21 +3004,35 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(2, 3, 0):
case IP_VERSION(2, 3, 1):
case IP_VERSION(2, 3, 2):
- adev->nbio.funcs = &nbio_v2_3_funcs;
- adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg;
- break;
case IP_VERSION(3, 3, 0):
case IP_VERSION(3, 3, 1):
case IP_VERSION(3, 3, 2):
case IP_VERSION(3, 3, 3):
adev->nbio.funcs = &nbio_v2_3_funcs;
- adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg_sc;
+ adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg;
+ break;
+ case IP_VERSION(4, 3, 0):
+ case IP_VERSION(4, 3, 1):
+ if (amdgpu_sriov_vf(adev))
+ adev->nbio.funcs = &nbio_v4_3_sriov_funcs;
+ else
+ adev->nbio.funcs = &nbio_v4_3_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v4_3_hdp_flush_reg;
+ break;
+ case IP_VERSION(7, 7, 0):
+ case IP_VERSION(7, 7, 1):
+ adev->nbio.funcs = &nbio_v7_7_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v7_7_hdp_flush_reg;
+ break;
+ case IP_VERSION(6, 3, 1):
+ adev->nbio.funcs = &nbif_v6_3_1_funcs;
+ adev->nbio.hdp_flush_reg = &nbif_v6_3_1_hdp_flush_reg;
break;
default:
break;
}
- switch (adev->ip_versions[HDP_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, HDP_HWIP, 0)) {
case IP_VERSION(4, 0, 0):
case IP_VERSION(4, 0, 1):
case IP_VERSION(4, 1, 0):
@@ -1223,6 +3041,8 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(4, 2, 0):
case IP_VERSION(4, 2, 1):
case IP_VERSION(4, 4, 0):
+ case IP_VERSION(4, 4, 2):
+ case IP_VERSION(4, 4, 5):
adev->hdp.funcs = &hdp_v4_0_funcs;
break;
case IP_VERSION(5, 0, 0):
@@ -1233,11 +3053,22 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(5, 2, 0):
adev->hdp.funcs = &hdp_v5_0_funcs;
break;
+ case IP_VERSION(5, 2, 1):
+ adev->hdp.funcs = &hdp_v5_2_funcs;
+ break;
+ case IP_VERSION(6, 0, 0):
+ case IP_VERSION(6, 0, 1):
+ case IP_VERSION(6, 1, 0):
+ adev->hdp.funcs = &hdp_v6_0_funcs;
+ break;
+ case IP_VERSION(7, 0, 0):
+ adev->hdp.funcs = &hdp_v7_0_funcs;
+ break;
default:
break;
}
- switch (adev->ip_versions[DF_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, DF_HWIP, 0)) {
case IP_VERSION(3, 6, 0):
case IP_VERSION(3, 6, 1):
case IP_VERSION(3, 6, 2):
@@ -1250,11 +3081,21 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(3, 5, 2):
adev->df.funcs = &df_v1_7_funcs;
break;
+ case IP_VERSION(4, 3, 0):
+ adev->df.funcs = &df_v4_3_funcs;
+ break;
+ case IP_VERSION(4, 6, 2):
+ adev->df.funcs = &df_v4_6_2_funcs;
+ break;
+ case IP_VERSION(4, 15, 0):
+ case IP_VERSION(4, 15, 1):
+ adev->df.funcs = &df_v4_15_funcs;
+ break;
default:
break;
}
- switch (adev->ip_versions[SMUIO_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, SMUIO_HWIP, 0)) {
case IP_VERSION(9, 0, 0):
case IP_VERSION(9, 0, 1):
case IP_VERSION(10, 0, 0):
@@ -1274,12 +3115,46 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 0, 10):
case IP_VERSION(11, 0, 11):
case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 2):
case IP_VERSION(13, 0, 1):
+ case IP_VERSION(13, 0, 9):
+ case IP_VERSION(13, 0, 10):
adev->smuio.funcs = &smuio_v11_0_6_funcs;
break;
case IP_VERSION(13, 0, 2):
adev->smuio.funcs = &smuio_v13_0_funcs;
break;
+ case IP_VERSION(13, 0, 3):
+ case IP_VERSION(13, 0, 11):
+ adev->smuio.funcs = &smuio_v13_0_3_funcs;
+ if (adev->smuio.funcs->get_pkg_type(adev) == AMDGPU_PKG_TYPE_APU) {
+ adev->flags |= AMD_IS_APU;
+ }
+ break;
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 8):
+ case IP_VERSION(14, 0, 0):
+ case IP_VERSION(14, 0, 1):
+ adev->smuio.funcs = &smuio_v13_0_6_funcs;
+ break;
+ case IP_VERSION(14, 0, 2):
+ adev->smuio.funcs = &smuio_v14_0_2_funcs;
+ break;
+ default:
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, LSDMA_HWIP, 0)) {
+ case IP_VERSION(6, 0, 0):
+ case IP_VERSION(6, 0, 1):
+ case IP_VERSION(6, 0, 2):
+ case IP_VERSION(6, 0, 3):
+ adev->lsdma.funcs = &lsdma_v6_0_funcs;
+ break;
+ case IP_VERSION(7, 0, 0):
+ case IP_VERSION(7, 0, 1):
+ adev->lsdma.funcs = &lsdma_v7_0_funcs;
+ break;
default:
break;
}
@@ -1330,8 +3205,13 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
if (r)
return r;
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
- !amdgpu_sriov_vf(adev)) {
+ r = amdgpu_discovery_set_ras_ip_blocks(adev);
+ if (r)
+ return r;
+
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
+ !amdgpu_sriov_vf(adev)) ||
+ (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
r = amdgpu_discovery_set_smu_ip_blocks(adev);
if (r)
return r;
@@ -1341,12 +3221,21 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
if (r)
return r;
- if (adev->enable_mes) {
- r = amdgpu_discovery_set_mes_ip_blocks(adev);
- if (r)
- return r;
- }
+ r = amdgpu_discovery_set_mes_ip_blocks(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_discovery_set_vpe_ip_blocks(adev);
+ if (r)
+ return r;
+ r = amdgpu_discovery_set_umsch_mm_ip_blocks(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_discovery_set_isp_ip_blocks(adev);
+ if (r)
+ return r;
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
index 0ea029e3b850..4ce04486cc31 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
@@ -24,18 +24,27 @@
#ifndef __AMDGPU_DISCOVERY__
#define __AMDGPU_DISCOVERY__
-#define DISCOVERY_TMR_SIZE (4 << 10)
+#include <linux/debugfs.h>
+
+#define DISCOVERY_TMR_SIZE (10 << 10)
#define DISCOVERY_TMR_OFFSET (64 << 10)
+struct ip_discovery_top;
+
+struct amdgpu_discovery_info {
+ struct debugfs_blob_wrapper debugfs_blob;
+ struct ip_discovery_top *ip_top;
+ uint32_t size;
+ uint8_t *bin;
+ bool reserve_tmr;
+};
+
void amdgpu_discovery_fini(struct amdgpu_device *adev);
-int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev);
-void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev);
-int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int number_instance,
- int *major, int *minor, int *revision);
-
-int amdgpu_discovery_get_vcn_version(struct amdgpu_device *adev, int vcn_instance,
- int *major, int *minor, int *revision);
-int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev);
int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev);
+int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev,
+ uint32_t *nps_type,
+ struct amdgpu_gmc_memrange **ranges,
+ int *range_cnt, bool refresh);
+
#endif /* __AMDGPU_DISCOVERY__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index dc50c05f23fc..b5d34797d606 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -30,17 +30,64 @@
#include "atom.h"
#include "amdgpu_connectors.h"
#include "amdgpu_display.h"
+#include "soc15_common.h"
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "bif/bif_4_1_d.h"
#include <asm/div64.h>
#include <linux/pci.h>
#include <linux/pm_runtime.h>
#include <drm/drm_crtc_helper.h>
+#include <drm/drm_damage_helper.h>
+#include <drm/drm_drv.h>
#include <drm/drm_edid.h>
-#include <drm/drm_gem_framebuffer_helper.h>
#include <drm/drm_fb_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
#include <drm/drm_fourcc.h>
+#include <drm/drm_modeset_helper.h>
#include <drm/drm_vblank.h>
+/**
+ * amdgpu_display_hotplug_work_func - work handler for display hotplug event
+ *
+ * @work: work struct pointer
+ *
+ * This is the hotplug event work handler (all ASICs).
+ * The work gets scheduled from the IRQ handler if there
+ * was a hotplug interrupt. It walks through the connector table
+ * and calls hotplug handler for each connector. After this, it sends
+ * a DRM hotplug event to alert userspace.
+ *
+ * This design approach is required in order to defer hotplug event handling
+ * from the IRQ handler to a work handler because hotplug handler has to use
+ * mutexes which cannot be locked in an IRQ handler (since &mutex_lock may
+ * sleep).
+ */
+void amdgpu_display_hotplug_work_func(struct work_struct *work)
+{
+ struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+ hotplug_work.work);
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_mode_config *mode_config = &dev->mode_config;
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+
+ mutex_lock(&mode_config->mutex);
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter)
+ amdgpu_connector_hotplug(connector);
+ drm_connector_list_iter_end(&iter);
+ mutex_unlock(&mode_config->mutex);
+ /* Just fire off a uevent and let userspace tell us what to do */
+ drm_helper_hpd_irq_event(dev);
+}
+
+static int amdgpu_display_framebuffer_init(struct drm_device *dev,
+ struct amdgpu_framebuffer *rfb,
+ const struct drm_mode_fb_cmd2 *mode_cmd,
+ struct drm_gem_object *obj);
+
static void amdgpu_display_flip_callback(struct dma_fence *f,
struct dma_fence_cb *cb)
{
@@ -54,7 +101,7 @@ static void amdgpu_display_flip_callback(struct dma_fence *f,
static bool amdgpu_display_flip_handle_fence(struct amdgpu_flip_work *work,
struct dma_fence **f)
{
- struct dma_fence *fence= *f;
+ struct dma_fence *fence = *f;
if (fence == NULL)
return false;
@@ -80,12 +127,9 @@ static void amdgpu_display_flip_work_func(struct work_struct *__work)
struct drm_crtc *crtc = &amdgpu_crtc->base;
unsigned long flags;
- unsigned i;
+ unsigned int i;
int vpos, hpos;
- if (amdgpu_display_flip_handle_fence(work, &work->excl))
- return;
-
for (i = 0; i < work->shared_count; ++i)
if (amdgpu_display_flip_handle_fence(work, &work->shared[i]))
return;
@@ -116,8 +160,9 @@ static void amdgpu_display_flip_work_func(struct work_struct *__work)
spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
- DRM_DEBUG_DRIVER("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_SUBMITTED, work: %p,\n",
- amdgpu_crtc->crtc_id, amdgpu_crtc, work);
+ drm_dbg_vbl(adev_to_drm(adev),
+ "crtc:%d[%p], pflip_stat:AMDGPU_FLIP_SUBMITTED, work: %p,\n",
+ amdgpu_crtc->crtc_id, amdgpu_crtc, work);
}
@@ -159,7 +204,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
u64 tiling_flags;
int i, r;
- work = kzalloc(sizeof *work, GFP_KERNEL);
+ work = kzalloc(sizeof(*work), GFP_KERNEL);
if (work == NULL)
return -ENOMEM;
@@ -189,6 +234,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
}
if (!adev->enable_virtual_display) {
+ new_abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(new_abo,
amdgpu_display_supported_domains(adev, new_abo->flags));
if (unlikely(r != 0)) {
@@ -203,8 +249,9 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
goto unpin;
}
- r = dma_resv_get_fences(new_abo->tbo.base.resv, &work->excl,
- &work->shared_count, &work->shared);
+ r = dma_resv_get_fences(new_abo->tbo.base.resv, DMA_RESV_USAGE_WRITE,
+ &work->shared_count,
+ &work->shared);
if (unlikely(r != 0)) {
DRM_ERROR("failed to get fences for buffer\n");
goto unpin;
@@ -253,7 +300,6 @@ unreserve:
cleanup:
amdgpu_bo_unref(&work->old_abo);
- dma_fence_put(work->excl);
for (i = 0; i < work->shared_count; ++i)
dma_fence_put(work->shared[i]);
kfree(work->shared);
@@ -286,22 +332,19 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set,
if (crtc->enabled)
active = true;
- pm_runtime_mark_last_busy(dev->dev);
-
adev = drm_to_adev(dev);
/* if we have active crtcs and we don't have a power ref,
- take the current one */
+ * take the current one
+ */
if (active && !adev->have_disp_power_ref) {
adev->have_disp_power_ref = true;
return ret;
}
- /* if we have no active crtcs, then drop the power ref
- we got before */
- if (!active && adev->have_disp_power_ref) {
- pm_runtime_put_autosuspend(dev->dev);
+ /* if we have no active crtcs, then go to
+ * drop the power ref we got before
+ */
+ if (!active && adev->have_disp_power_ref)
adev->have_disp_power_ref = false;
- }
-
out:
/* drop the power reference we got coming in here */
pm_runtime_put_autosuspend(dev->dev);
@@ -465,11 +508,10 @@ bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector,
if (amdgpu_connector->router.ddc_valid)
amdgpu_i2c_router_select_ddc_port(amdgpu_connector);
- if (use_aux) {
+ if (use_aux)
ret = i2c_transfer(&amdgpu_connector->ddc_bus->aux.ddc, msgs, 2);
- } else {
+ else
ret = i2c_transfer(&amdgpu_connector->ddc_bus->adapter, msgs, 2);
- }
if (ret != 2)
/* Couldn't find an accessible DDC on this connector */
@@ -478,20 +520,40 @@ bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector,
* EDID header starts with:
* 0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x00.
* Only the first 6 bytes must be valid as
- * drm_edid_block_valid() can fix the last 2 bytes */
+ * drm_edid_block_valid() can fix the last 2 bytes
+ */
if (drm_edid_header_is_valid(buf) < 6) {
/* Couldn't find an accessible EDID on this
- * connector */
+ * connector
+ */
return false;
}
return true;
}
+static int amdgpu_dirtyfb(struct drm_framebuffer *fb, struct drm_file *file,
+ unsigned int flags, unsigned int color,
+ struct drm_clip_rect *clips, unsigned int num_clips)
+{
+
+ if (file)
+ return -ENOSYS;
+
+ return drm_atomic_helper_dirtyfb(fb, file, flags, color, clips,
+ num_clips);
+}
+
static const struct drm_framebuffer_funcs amdgpu_fb_funcs = {
.destroy = drm_gem_fb_destroy,
.create_handle = drm_gem_fb_create_handle,
};
+static const struct drm_framebuffer_funcs amdgpu_fb_funcs_atomic = {
+ .destroy = drm_gem_fb_destroy,
+ .create_handle = drm_gem_fb_create_handle,
+ .dirty = amdgpu_dirtyfb
+};
+
uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
uint64_t bo_flags)
{
@@ -508,28 +570,9 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
*/
if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&
amdgpu_bo_support_uswc(bo_flags) &&
- amdgpu_device_asic_has_dc_support(adev->asic_type)) {
- switch (adev->asic_type) {
- case CHIP_CARRIZO:
- case CHIP_STONEY:
- domain |= AMDGPU_GEM_DOMAIN_GTT;
- break;
- case CHIP_RAVEN:
- /* enable S/G on PCO and RV2 */
- if ((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
- (adev->apu_flags & AMD_APU_IS_PICASSO))
- domain |= AMDGPU_GEM_DOMAIN_GTT;
- break;
- case CHIP_RENOIR:
- case CHIP_VANGOGH:
- case CHIP_YELLOW_CARP:
- domain |= AMDGPU_GEM_DOMAIN_GTT;
- break;
-
- default:
- break;
- }
- }
+ adev->dc_enabled &&
+ adev->mode_info.gpu_vm_support)
+ domain |= AMDGPU_GEM_DOMAIN_GTT;
#endif
return domain;
@@ -611,6 +654,10 @@ amdgpu_lookup_format_info(u32 format, uint64_t modifier)
if (!IS_AMD_FMT_MOD(modifier))
return NULL;
+ if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) < AMD_FMT_MOD_TILE_VER_GFX9 ||
+ AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX12)
+ return NULL;
+
if (AMD_FMT_MOD_GET(DCC_RETILE, modifier))
return lookup_format_info(dcc_retile_formats,
ARRAY_SIZE(dcc_retile_formats),
@@ -675,10 +722,39 @@ extract_render_dcc_offset(struct amdgpu_device *adev,
return 0;
}
+static int convert_tiling_flags_to_modifier_gfx12(struct amdgpu_framebuffer *afb)
+{
+ u64 modifier = 0;
+ int swizzle_mode = AMDGPU_TILING_GET(afb->tiling_flags, GFX12_SWIZZLE_MODE);
+
+ if (!swizzle_mode) {
+ modifier = DRM_FORMAT_MOD_LINEAR;
+ } else {
+ int max_comp_block =
+ AMDGPU_TILING_GET(afb->tiling_flags, GFX12_DCC_MAX_COMPRESSED_BLOCK);
+
+ modifier =
+ AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX12) |
+ AMD_FMT_MOD_SET(TILE, swizzle_mode) |
+ AMD_FMT_MOD_SET(DCC, afb->gfx12_dcc) |
+ AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, max_comp_block);
+ }
+
+ afb->base.modifier = modifier;
+ afb->base.flags |= DRM_MODE_FB_MODIFIERS;
+ return 0;
+}
+
static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
{
struct amdgpu_device *adev = drm_to_adev(afb->base.dev);
uint64_t modifier = 0;
+ int num_pipes = 0;
+ int num_pkrs = 0;
+
+ num_pkrs = adev->gfx.config.gb_addr_config_fields.num_pkrs;
+ num_pipes = adev->gfx.config.gb_addr_config_fields.num_pipes;
if (!afb->tiling_flags || !AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE)) {
modifier = DRM_FORMAT_MOD_LINEAR;
@@ -691,7 +767,7 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
int bank_xor_bits = 0;
int packers = 0;
int rb = 0;
- int pipes = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes);
+ int pipes = ilog2(num_pipes);
uint32_t dcc_offset = AMDGPU_TILING_GET(afb->tiling_flags, DCC_OFFSET_256B);
switch (swizzle >> 2) {
@@ -707,14 +783,21 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
case 6: /* 64 KiB _X */
block_size_bits = 16;
break;
+ case 7: /* 256 KiB */
+ block_size_bits = 18;
+ break;
default:
/* RESERVED or VAR */
return -EINVAL;
}
- if (adev->asic_type >= CHIP_SIENNA_CICHLID)
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0))
+ version = AMD_FMT_MOD_TILE_VER_GFX11;
+ else if (amdgpu_ip_version(adev, GC_HWIP, 0) >=
+ IP_VERSION(10, 3, 0))
version = AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS;
- else if (adev->family == AMDGPU_FAMILY_NV)
+ else if (amdgpu_ip_version(adev, GC_HWIP, 0) >=
+ IP_VERSION(10, 0, 0))
version = AMD_FMT_MOD_TILE_VER_GFX10;
else
version = AMD_FMT_MOD_TILE_VER_GFX9;
@@ -723,19 +806,34 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
case 0: /* Z microtiling */
return -EINVAL;
case 1: /* S microtiling */
- if (!has_xor)
- version = AMD_FMT_MOD_TILE_VER_GFX9;
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) <
+ IP_VERSION(11, 0, 0)) {
+ if (!has_xor)
+ version = AMD_FMT_MOD_TILE_VER_GFX9;
+ }
break;
case 2:
- if (!has_xor && afb->base.format->cpp[0] != 4)
- version = AMD_FMT_MOD_TILE_VER_GFX9;
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) <
+ IP_VERSION(11, 0, 0)) {
+ if (!has_xor && afb->base.format->cpp[0] != 4)
+ version = AMD_FMT_MOD_TILE_VER_GFX9;
+ }
break;
case 3:
break;
}
if (has_xor) {
+ if (num_pipes == num_pkrs && num_pkrs == 0) {
+ DRM_ERROR("invalid number of pipes and packers\n");
+ return -EINVAL;
+ }
+
switch (version) {
+ case AMD_FMT_MOD_TILE_VER_GFX11:
+ pipe_xor_bits = min(block_size_bits - 8, pipes);
+ packers = ilog2(adev->gfx.config.gb_addr_config_fields.num_pkrs);
+ break;
case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS:
pipe_xor_bits = min(block_size_bits - 8, pipes);
packers = min(block_size_bits - 8 - pipe_xor_bits,
@@ -769,9 +867,12 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
u64 render_dcc_offset;
/* Enable constant encode on RAVEN2 and later. */
- bool dcc_constant_encode = adev->asic_type > CHIP_RAVEN ||
- (adev->asic_type == CHIP_RAVEN &&
- adev->external_rev_id >= 0x81);
+ bool dcc_constant_encode =
+ (adev->asic_type > CHIP_RAVEN ||
+ (adev->asic_type == CHIP_RAVEN &&
+ adev->external_rev_id >= 0x81)) &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) <
+ IP_VERSION(11, 0, 0);
int max_cblock_size = dcc_i64b ? AMD_FMT_MOD_DCC_BLOCK_64B :
dcc_i128b ? AMD_FMT_MOD_DCC_BLOCK_128B :
@@ -808,7 +909,9 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
if (adev->family >= AMDGPU_FAMILY_NV) {
int extra_pipe = 0;
- if (adev->asic_type >= CHIP_SIENNA_CICHLID &&
+ if ((amdgpu_ip_version(adev, GC_HWIP,
+ 0) >=
+ IP_VERSION(10, 3, 0)) &&
pipes == packers && pipes > 1)
extra_pipe = 1;
@@ -842,8 +945,7 @@ static int check_tiling_flags_gfx6(struct amdgpu_framebuffer *afb)
{
u64 micro_tile_mode;
- /* Zero swizzle mode means linear */
- if (AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE) == 0)
+ if (AMDGPU_TILING_GET(afb->tiling_flags, ARRAY_MODE) == 1) /* LINEAR_ALIGNED */
return 0;
micro_tile_mode = AMDGPU_TILING_GET(afb->tiling_flags, MICRO_TILE_MODE);
@@ -886,10 +988,11 @@ static unsigned int get_dcc_block_size(uint64_t modifier, bool rb_aligned,
return max(10 + (rb_aligned ? (int)AMD_FMT_MOD_GET(RB, modifier) : 0), 12);
}
case AMD_FMT_MOD_TILE_VER_GFX10:
- case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS: {
+ case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS:
+ case AMD_FMT_MOD_TILE_VER_GFX11: {
int pipes_log2 = AMD_FMT_MOD_GET(PIPE_XOR_BITS, modifier);
- if (ver == AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS && pipes_log2 > 1 &&
+ if (ver >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS && pipes_log2 > 1 &&
AMD_FMT_MOD_GET(PACKERS, modifier) == pipes_log2)
++pipes_log2;
@@ -958,7 +1061,7 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb)
int ret;
unsigned int i, block_width, block_height, block_size_log2;
- if (!rfb->base.dev->mode_config.allow_fb_modifiers)
+ if (rfb->base.dev->mode_config.fb_modifiers_not_supported)
return 0;
for (i = 0; i < format_info->num_planes; ++i) {
@@ -966,6 +1069,30 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb)
block_width = 256 / format_info->cpp[i];
block_height = 1;
block_size_log2 = 8;
+ } else if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX12) {
+ int swizzle = AMD_FMT_MOD_GET(TILE, modifier);
+
+ switch (swizzle) {
+ case AMD_FMT_MOD_TILE_GFX12_256B_2D:
+ block_size_log2 = 8;
+ break;
+ case AMD_FMT_MOD_TILE_GFX12_4K_2D:
+ block_size_log2 = 12;
+ break;
+ case AMD_FMT_MOD_TILE_GFX12_64K_2D:
+ block_size_log2 = 16;
+ break;
+ case AMD_FMT_MOD_TILE_GFX12_256K_2D:
+ block_size_log2 = 18;
+ break;
+ default:
+ drm_dbg_kms(rfb->base.dev,
+ "Gfx12 swizzle mode with unknown block size: %d\n", swizzle);
+ return -EINVAL;
+ }
+
+ get_block_dimensions(block_size_log2, format_info->cpp[i],
+ &block_width, &block_height);
} else {
int swizzle = AMD_FMT_MOD_GET(TILE, modifier);
@@ -982,6 +1109,9 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb)
case DC_SW_64KB_S_X:
block_size_log2 = 16;
break;
+ case DC_SW_VAR_S_X:
+ block_size_log2 = 18;
+ break;
default:
drm_dbg_kms(rfb->base.dev,
"Swizzle mode with unknown block size: %d\n", swizzle);
@@ -998,7 +1128,8 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb)
return ret;
}
- if (AMD_FMT_MOD_GET(DCC, modifier)) {
+ if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) <= AMD_FMT_MOD_TILE_VER_GFX11 &&
+ AMD_FMT_MOD_GET(DCC, modifier)) {
if (AMD_FMT_MOD_GET(DCC_RETILE, modifier)) {
block_size_log2 = get_dcc_block_size(modifier, false, false);
get_block_dimensions(block_size_log2 + 8, format_info->cpp[0],
@@ -1028,7 +1159,8 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb)
}
static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb,
- uint64_t *tiling_flags, bool *tmz_surface)
+ uint64_t *tiling_flags, bool *tmz_surface,
+ bool *gfx12_dcc)
{
struct amdgpu_bo *rbo;
int r;
@@ -1036,6 +1168,7 @@ static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb
if (!amdgpu_fb) {
*tiling_flags = 0;
*tmz_surface = false;
+ *gfx12_dcc = false;
return 0;
}
@@ -1049,51 +1182,26 @@ static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb
return r;
}
- if (tiling_flags)
- amdgpu_bo_get_tiling_flags(rbo, tiling_flags);
-
- if (tmz_surface)
- *tmz_surface = amdgpu_bo_encrypted(rbo);
+ amdgpu_bo_get_tiling_flags(rbo, tiling_flags);
+ *tmz_surface = amdgpu_bo_encrypted(rbo);
+ *gfx12_dcc = rbo->flags & AMDGPU_GEM_CREATE_GFX12_DCC;
amdgpu_bo_unreserve(rbo);
return r;
}
-int amdgpu_display_gem_fb_init(struct drm_device *dev,
- struct amdgpu_framebuffer *rfb,
- const struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_gem_object *obj)
-{
- int ret;
-
- rfb->base.obj[0] = obj;
- drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd);
-
- ret = amdgpu_display_framebuffer_init(dev, rfb, mode_cmd, obj);
- if (ret)
- goto err;
-
- ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
- if (ret)
- goto err;
-
- return 0;
-err:
- drm_dbg_kms(dev, "Failed to init gem fb: %d\n", ret);
- rfb->base.obj[0] = NULL;
- return ret;
-}
-
-int amdgpu_display_gem_fb_verify_and_init(
- struct drm_device *dev, struct amdgpu_framebuffer *rfb,
- struct drm_file *file_priv, const struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_gem_object *obj)
+static int amdgpu_display_gem_fb_verify_and_init(struct drm_device *dev,
+ struct amdgpu_framebuffer *rfb,
+ struct drm_file *file_priv,
+ const struct drm_format_info *info,
+ const struct drm_mode_fb_cmd2 *mode_cmd,
+ struct drm_gem_object *obj)
{
int ret;
rfb->base.obj[0] = obj;
- drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd);
+ drm_helper_mode_fill_fb_struct(dev, &rfb->base, info, mode_cmd);
/* Verify that the modifier is supported. */
if (!drm_any_plane_has_format(dev, mode_cmd->pixel_format,
mode_cmd->modifier[0])) {
@@ -1109,7 +1217,12 @@ int amdgpu_display_gem_fb_verify_and_init(
if (ret)
goto err;
- ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
+ if (drm_drv_uses_atomic_modeset(dev))
+ ret = drm_framebuffer_init(dev, &rfb->base,
+ &amdgpu_fb_funcs_atomic);
+ else
+ ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
+
if (ret)
goto err;
@@ -1120,10 +1233,10 @@ err:
return ret;
}
-int amdgpu_display_framebuffer_init(struct drm_device *dev,
- struct amdgpu_framebuffer *rfb,
- const struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_gem_object *obj)
+static int amdgpu_display_framebuffer_init(struct drm_device *dev,
+ struct amdgpu_framebuffer *rfb,
+ const struct drm_mode_fb_cmd2 *mode_cmd,
+ struct drm_gem_object *obj)
{
struct amdgpu_device *adev = drm_to_adev(dev);
int ret, i;
@@ -1141,11 +1254,12 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev,
}
}
- ret = amdgpu_display_get_fb_info(rfb, &rfb->tiling_flags, &rfb->tmz_surface);
+ ret = amdgpu_display_get_fb_info(rfb, &rfb->tiling_flags, &rfb->tmz_surface,
+ &rfb->gfx12_dcc);
if (ret)
return ret;
- if (!dev->mode_config.allow_fb_modifiers) {
+ if (dev->mode_config.fb_modifiers_not_supported && !adev->enable_virtual_display) {
drm_WARN_ONCE(dev, adev->family >= AMDGPU_FAMILY_AI,
"GFX9+ requires FB check based on format modifier\n");
ret = check_tiling_flags_gfx6(rfb);
@@ -1153,9 +1267,13 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev,
return ret;
}
- if (dev->mode_config.allow_fb_modifiers &&
+ if (!dev->mode_config.fb_modifiers_not_supported &&
!(rfb->base.flags & DRM_MODE_FB_MODIFIERS)) {
- ret = convert_tiling_flags_to_modifier(rfb);
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0))
+ ret = convert_tiling_flags_to_modifier_gfx12(rfb);
+ else
+ ret = convert_tiling_flags_to_modifier(rfb);
+
if (ret) {
drm_dbg_kms(dev, "Failed to convert tiling flags 0x%llX to a modifier",
rfb->tiling_flags);
@@ -1178,6 +1296,7 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev,
struct drm_framebuffer *
amdgpu_display_user_framebuffer_create(struct drm_device *dev,
struct drm_file *file_priv,
+ const struct drm_format_info *info,
const struct drm_mode_fb_cmd2 *mode_cmd)
{
struct amdgpu_framebuffer *amdgpu_fb;
@@ -1188,15 +1307,17 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev,
obj = drm_gem_object_lookup(file_priv, mode_cmd->handles[0]);
if (obj == NULL) {
- drm_dbg_kms(dev, "No GEM object associated to handle 0x%08X, "
- "can't create framebuffer\n", mode_cmd->handles[0]);
+ drm_dbg_kms(dev,
+ "No GEM object associated to handle 0x%08X, can't create framebuffer\n",
+ mode_cmd->handles[0]);
+
return ERR_PTR(-ENOENT);
}
/* Handle is imported dma-buf, so cannot be migrated to VRAM for scanout */
bo = gem_to_amdgpu_bo(obj);
domains = amdgpu_display_supported_domains(drm_to_adev(dev), bo->flags);
- if (obj->import_attach && !(domains & AMDGPU_GEM_DOMAIN_GTT)) {
+ if (drm_gem_is_imported(obj) && !(domains & AMDGPU_GEM_DOMAIN_GTT)) {
drm_dbg_kms(dev, "Cannot create framebuffer from imported dma_buf\n");
drm_gem_object_put(obj);
return ERR_PTR(-EINVAL);
@@ -1209,7 +1330,7 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev,
}
ret = amdgpu_display_gem_fb_verify_and_init(dev, amdgpu_fb, file_priv,
- mode_cmd, obj);
+ info, mode_cmd, obj);
if (ret) {
kfree(amdgpu_fb);
drm_gem_object_put(obj);
@@ -1222,27 +1343,84 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev,
const struct drm_mode_config_funcs amdgpu_mode_funcs = {
.fb_create = amdgpu_display_user_framebuffer_create,
- .output_poll_changed = drm_fb_helper_output_poll_changed,
};
-static const struct drm_prop_enum_list amdgpu_underscan_enum_list[] =
-{ { UNDERSCAN_OFF, "off" },
+static const struct drm_prop_enum_list amdgpu_underscan_enum_list[] = {
+ { UNDERSCAN_OFF, "off" },
{ UNDERSCAN_ON, "on" },
{ UNDERSCAN_AUTO, "auto" },
};
-static const struct drm_prop_enum_list amdgpu_audio_enum_list[] =
-{ { AMDGPU_AUDIO_DISABLE, "off" },
+static const struct drm_prop_enum_list amdgpu_audio_enum_list[] = {
+ { AMDGPU_AUDIO_DISABLE, "off" },
{ AMDGPU_AUDIO_ENABLE, "on" },
{ AMDGPU_AUDIO_AUTO, "auto" },
};
/* XXX support different dither options? spatial, temporal, both, etc. */
-static const struct drm_prop_enum_list amdgpu_dither_enum_list[] =
-{ { AMDGPU_FMT_DITHER_DISABLE, "off" },
+static const struct drm_prop_enum_list amdgpu_dither_enum_list[] = {
+ { AMDGPU_FMT_DITHER_DISABLE, "off" },
{ AMDGPU_FMT_DITHER_ENABLE, "on" },
};
+/**
+ * DOC: property for adaptive backlight modulation
+ *
+ * The 'adaptive backlight modulation' property is used for the compositor to
+ * directly control the adaptive backlight modulation power savings feature
+ * that is part of DCN hardware.
+ *
+ * The property will be attached specifically to eDP panels that support it.
+ *
+ * The property is by default set to 'sysfs' to allow the sysfs file 'panel_power_savings'
+ * to be able to control it.
+ * If set to 'off' the compositor will ensure it stays off.
+ * The other values 'min', 'bias min', 'bias max', and 'max' will control the
+ * intensity of the power savings.
+ *
+ * Modifying this value can have implications on color accuracy, so tread
+ * carefully.
+ */
+static int amdgpu_display_setup_abm_prop(struct amdgpu_device *adev)
+{
+ const struct drm_prop_enum_list props[] = {
+ { ABM_SYSFS_CONTROL, "sysfs" },
+ { ABM_LEVEL_OFF, "off" },
+ { ABM_LEVEL_MIN, "min" },
+ { ABM_LEVEL_BIAS_MIN, "bias min" },
+ { ABM_LEVEL_BIAS_MAX, "bias max" },
+ { ABM_LEVEL_MAX, "max" },
+ };
+ struct drm_property *prop;
+ int i;
+
+ if (!adev->dc_enabled)
+ return 0;
+
+ prop = drm_property_create(adev_to_drm(adev), DRM_MODE_PROP_ENUM,
+ "adaptive backlight modulation",
+ 6);
+ if (!prop)
+ return -ENOMEM;
+
+ for (i = 0; i < ARRAY_SIZE(props); i++) {
+ int ret;
+
+ ret = drm_property_add_enum(prop, props[i].type,
+ props[i].name);
+
+ if (ret) {
+ drm_property_destroy(adev_to_drm(adev), prop);
+
+ return ret;
+ }
+ }
+
+ adev->mode_info.abm_level_property = prop;
+
+ return 0;
+}
+
int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
{
int sz;
@@ -1289,15 +1467,7 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
"dither",
amdgpu_dither_enum_list, sz);
- if (amdgpu_device_has_dc_support(adev)) {
- adev->mode_info.abm_level_property =
- drm_property_create_range(adev_to_drm(adev), 0,
- "abm level", 0, 4);
- if (!adev->mode_info.abm_level_property)
- return -ENOMEM;
- }
-
- return 0;
+ return amdgpu_display_setup_abm_prop(adev);
}
void amdgpu_display_update_priority(struct amdgpu_device *adev)
@@ -1364,7 +1534,7 @@ bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
if ((!(mode->flags & DRM_MODE_FLAG_INTERLACE)) &&
((amdgpu_encoder->underscan_type == UNDERSCAN_ON) ||
((amdgpu_encoder->underscan_type == UNDERSCAN_AUTO) &&
- drm_detect_hdmi_monitor(amdgpu_connector_edid(connector)) &&
+ connector && connector->display_info.is_hdmi &&
amdgpu_display_is_hdtv_mode(mode)))) {
if (amdgpu_encoder->underscan_hborder != 0)
amdgpu_crtc->h_border = amdgpu_encoder->underscan_hborder;
@@ -1383,6 +1553,7 @@ bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
}
if (amdgpu_crtc->rmx_type != RMX_OFF) {
fixed20_12 a, b;
+
a.full = dfixed_const(src_v);
b.full = dfixed_const(dst_v);
amdgpu_crtc->vsc.full = dfixed_div(a, b);
@@ -1402,7 +1573,7 @@ bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
*
* \param dev Device to query.
* \param pipe Crtc to query.
- * \param flags Flags from caller (DRM_CALLED_FROM_VBLIRQ or 0).
+ * \param flags from caller (DRM_CALLED_FROM_VBLIRQ or 0).
* For driver internal use only also supports these flags:
*
* USE_REAL_VBLANKSTART to use the real start of vblank instead
@@ -1469,8 +1640,7 @@ int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev,
ret |= DRM_SCANOUTPOS_ACCURATE;
vbl_start = vbl & 0x1fff;
vbl_end = (vbl >> 16) & 0x1fff;
- }
- else {
+ } else {
/* No: Fake something reasonable which gives at least ok results. */
vbl_start = mode->crtc_vdisplay;
vbl_end = 0;
@@ -1478,8 +1648,8 @@ int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev,
/* Called from driver internal vblank counter query code? */
if (flags & GET_DISTANCE_TO_VBLANKSTART) {
- /* Caller wants distance from real vbl_start in *hpos */
- *hpos = *vpos - vbl_start;
+ /* Caller wants distance from real vbl_start in *hpos */
+ *hpos = *vpos - vbl_start;
}
/* Fudge vblank to start a few scanlines earlier to handle the
@@ -1501,7 +1671,7 @@ int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev,
/* In vblank? */
if (in_vbl)
- ret |= DRM_SCANOUTPOS_IN_VBLANK;
+ ret |= DRM_SCANOUTPOS_IN_VBLANK;
/* Called from driver internal vblank counter query code? */
if (flags & GET_DISTANCE_TO_VBLANKSTART) {
@@ -1568,6 +1738,21 @@ bool amdgpu_crtc_get_scanout_position(struct drm_crtc *crtc,
stime, etime, mode);
}
+static bool
+amdgpu_display_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_fb_helper *fb_helper = dev->fb_helper;
+
+ if (!fb_helper || !fb_helper->buffer)
+ return false;
+
+ if (gem_to_amdgpu_bo(fb_helper->buffer->gem) != robj)
+ return false;
+
+ return true;
+}
+
int amdgpu_display_suspend_helper(struct amdgpu_device *adev)
{
struct drm_device *dev = adev_to_drm(adev);
@@ -1576,6 +1761,8 @@ int amdgpu_display_suspend_helper(struct amdgpu_device *adev)
struct drm_connector_list_iter iter;
int r;
+ drm_kms_helper_poll_disable(dev);
+
/* turn off display hw */
drm_modeset_lock_all(dev);
drm_connector_list_iter_begin(dev, &iter);
@@ -1592,6 +1779,7 @@ int amdgpu_display_suspend_helper(struct amdgpu_device *adev)
if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
+
r = amdgpu_bo_reserve(aobj, true);
if (r == 0) {
amdgpu_bo_unpin(aobj);
@@ -1599,12 +1787,11 @@ int amdgpu_display_suspend_helper(struct amdgpu_device *adev)
}
}
- if (fb == NULL || fb->obj[0] == NULL) {
+ if (!fb || !fb->obj[0])
continue;
- }
+
robj = gem_to_amdgpu_bo(fb->obj[0]);
- /* don't unpin kernel fb objects */
- if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
+ if (!amdgpu_display_robj_is_fb(adev, robj)) {
r = amdgpu_bo_reserve(robj, true);
if (r == 0) {
amdgpu_bo_unpin(robj);
@@ -1629,8 +1816,10 @@ int amdgpu_display_resume_helper(struct amdgpu_device *adev)
if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
+
r = amdgpu_bo_reserve(aobj, true);
if (r == 0) {
+ aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
if (r != 0)
dev_err(adev->dev, "Failed to pin cursor BO (%d)\n", r);
@@ -1653,6 +1842,87 @@ int amdgpu_display_resume_helper(struct amdgpu_device *adev)
drm_modeset_unlock_all(dev);
+ drm_kms_helper_poll_enable(dev);
+
return 0;
}
+/* panic_bo is set in amdgpu_dm_plane_get_scanout_buffer() and only used in amdgpu_dm_set_pixel()
+ * they are called from the panic handler, and protected by the drm_panic spinlock.
+ */
+static struct amdgpu_bo *panic_abo;
+
+/* Use the indirect MMIO to write each pixel to the GPU VRAM,
+ * This is a simplified version of amdgpu_device_mm_access()
+ */
+static void amdgpu_display_set_pixel(struct drm_scanout_buffer *sb,
+ unsigned int x,
+ unsigned int y,
+ u32 color)
+{
+ struct amdgpu_res_cursor cursor;
+ unsigned long offset;
+ struct amdgpu_bo *abo = panic_abo;
+ struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+ uint32_t tmp;
+
+ offset = x * 4 + y * sb->pitch[0];
+ amdgpu_res_first(abo->tbo.resource, offset, 4, &cursor);
+
+ tmp = cursor.start >> 31;
+ WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t) cursor.start) | 0x80000000);
+ if (tmp != 0xffffffff)
+ WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
+ WREG32_NO_KIQ(mmMM_DATA, color);
+}
+
+int amdgpu_display_get_scanout_buffer(struct drm_plane *plane,
+ struct drm_scanout_buffer *sb)
+{
+ struct amdgpu_bo *abo;
+ struct drm_framebuffer *fb = plane->state->fb;
+
+ if (!fb)
+ return -EINVAL;
+
+ DRM_DEBUG_KMS("Framebuffer %dx%d %p4cc\n", fb->width, fb->height, &fb->format->format);
+
+ abo = gem_to_amdgpu_bo(fb->obj[0]);
+ if (!abo)
+ return -EINVAL;
+
+ sb->width = fb->width;
+ sb->height = fb->height;
+ /* Use the generic linear format, because tiling will be disabled in panic_flush() */
+ sb->format = drm_format_info(fb->format->format);
+ if (!sb->format)
+ return -EINVAL;
+
+ sb->pitch[0] = fb->pitches[0];
+
+ if (abo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) {
+ if (abo->tbo.resource->mem_type != TTM_PL_VRAM) {
+ drm_warn(plane->dev, "amdgpu panic, framebuffer not in VRAM\n");
+ return -EINVAL;
+ }
+ /* Only handle 32bits format, to simplify mmio access */
+ if (fb->format->cpp[0] != 4) {
+ drm_warn(plane->dev, "amdgpu panic, pixel format is not 32bits\n");
+ return -EINVAL;
+ }
+ sb->set_pixel = amdgpu_display_set_pixel;
+ panic_abo = abo;
+ return 0;
+ }
+ if (!abo->kmap.virtual &&
+ ttm_bo_kmap(&abo->tbo, 0, PFN_UP(abo->tbo.base.size), &abo->kmap)) {
+ drm_warn(plane->dev, "amdgpu bo map failed, panic won't be displayed\n");
+ return -ENOMEM;
+ }
+ if (abo->kmap.bo_kmap_type & TTM_BO_MAP_IOMEM_MASK)
+ iosys_map_set_vaddr_iomem(&sb->map[0], abo->kmap.virtual);
+ else
+ iosys_map_set_vaddr(&sb->map[0], abo->kmap.virtual);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
index 7b6d83e2b13c..49a29bf47a37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
@@ -23,6 +23,8 @@
#ifndef __AMDGPU_DISPLAY_H__
#define __AMDGPU_DISPLAY_H__
+#include <drm/drm_panic.h>
+
#define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc))
#define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l))
#define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e))
@@ -35,14 +37,14 @@
#define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c))
#define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
-int amdgpu_display_freesync_ioctl(struct drm_device *dev, void *data,
- struct drm_file *filp);
+void amdgpu_display_hotplug_work_func(struct work_struct *work);
void amdgpu_display_update_priority(struct amdgpu_device *adev);
uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
uint64_t bo_flags);
struct drm_framebuffer *
amdgpu_display_user_framebuffer_create(struct drm_device *dev,
struct drm_file *file_priv,
+ const struct drm_format_info *info,
const struct drm_mode_fb_cmd2 *mode_cmd);
const struct drm_format_info *
amdgpu_lookup_format_info(u32 format, uint64_t modifier);
@@ -50,4 +52,14 @@ amdgpu_lookup_format_info(u32 format, uint64_t modifier);
int amdgpu_display_suspend_helper(struct amdgpu_device *adev);
int amdgpu_display_resume_helper(struct amdgpu_device *adev);
+int amdgpu_display_get_scanout_buffer(struct drm_plane *plane,
+ struct drm_scanout_buffer *sb);
+
+#define ABM_SYSFS_CONTROL -1
+#define ABM_LEVEL_OFF 0
+#define ABM_LEVEL_MIN 1
+#define ABM_LEVEL_BIAS_MIN 2
+#define ABM_LEVEL_BIAS_MAX 3
+#define ABM_LEVEL_MAX 4
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index ae6ab93c868b..e22cfa7c6d32 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -36,11 +36,35 @@
#include "amdgpu_gem.h"
#include "amdgpu_dma_buf.h"
#include "amdgpu_xgmi.h"
+#include "amdgpu_vm.h"
#include <drm/amdgpu_drm.h>
+#include <drm/ttm/ttm_tt.h>
#include <linux/dma-buf.h>
#include <linux/dma-fence-array.h>
#include <linux/pci-p2pdma.h>
-#include <linux/pm_runtime.h>
+
+static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops;
+
+/**
+ * dma_buf_attach_adev - Helper to get adev of an attachment
+ *
+ * @attach: attachment
+ *
+ * Returns:
+ * A struct amdgpu_device * if the attaching device is an amdgpu device or
+ * partition, NULL otherwise.
+ */
+static struct amdgpu_device *dma_buf_attach_adev(struct dma_buf_attachment *attach)
+{
+ if (attach->importer_ops == &amdgpu_dma_buf_attach_ops) {
+ struct drm_gem_object *obj = attach->importer_priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+ return amdgpu_ttm_adev(bo->tbo.bdev);
+ }
+
+ return NULL;
+}
/**
* amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
@@ -53,45 +77,49 @@
static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
struct dma_buf_attachment *attach)
{
+ struct amdgpu_device *attach_adev = dma_buf_attach_adev(attach);
struct drm_gem_object *obj = dmabuf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
int r;
- if (pci_p2pdma_distance_many(adev->pdev, &attach->dev, 1, true) < 0)
+ /*
+ * Disable peer-to-peer access for DCC-enabled VRAM surfaces on GFX12+.
+ * Such buffers cannot be safely accessed over P2P due to device-local
+ * compression metadata. Fallback to system-memory path instead.
+ * Device supports GFX12 (GC 12.x or newer)
+ * BO was created with the AMDGPU_GEM_CREATE_GFX12_DCC flag
+ *
+ */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0) &&
+ bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
attach->peer2peer = false;
- if (attach->dev->driver == adev->dev->driver)
- return 0;
+ /*
+ * Disable peer-to-peer access for DCC-enabled VRAM surfaces on GFX12+.
+ * Such buffers cannot be safely accessed over P2P due to device-local
+ * compression metadata. Fallback to system-memory path instead.
+ * Device supports GFX12 (GC 12.x or newer)
+ * BO was created with the AMDGPU_GEM_CREATE_GFX12_DCC flag
+ *
+ */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0) &&
+ bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
+ attach->peer2peer = false;
- r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
- if (r < 0)
- goto out;
+ if (!amdgpu_dmabuf_is_xgmi_accessible(attach_adev, bo) &&
+ pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0)
+ attach->peer2peer = false;
- return 0;
+ r = dma_resv_lock(bo->tbo.base.resv, NULL);
+ if (r)
+ return r;
-out:
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- return r;
-}
+ amdgpu_vm_bo_update_shared(bo);
-/**
- * amdgpu_dma_buf_detach - &dma_buf_ops.detach implementation
- *
- * @dmabuf: DMA-buf where we remove the attachment from
- * @attach: the attachment to remove
- *
- * Called when an attachment is removed from the DMA-buf.
- */
-static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
- struct dma_buf_attachment *attach)
-{
- struct drm_gem_object *obj = dmabuf->priv;
- struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ dma_resv_unlock(bo->tbo.base.resv);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return 0;
}
/**
@@ -103,23 +131,35 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
*/
static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach)
{
- struct drm_gem_object *obj = attach->dmabuf->priv;
- struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
- int r;
+ struct dma_buf *dmabuf = attach->dmabuf;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(dmabuf->priv);
+ u32 domains = bo->allowed_domains;
+
+ dma_resv_assert_held(dmabuf->resv);
+
+ /* Try pinning into VRAM to allow P2P with RDMA NICs without ODP
+ * support if all attachments can do P2P. If any attachment can't do
+ * P2P just pin into GTT instead.
+ *
+ * To avoid with conflicting pinnings between GPUs and RDMA when move
+ * notifiers are disabled, only allow pinning in VRAM when move
+ * notiers are enabled.
+ */
+ if (!IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) {
+ domains &= ~AMDGPU_GEM_DOMAIN_VRAM;
+ } else {
+ list_for_each_entry(attach, &dmabuf->attachments, node)
+ if (!attach->peer2peer)
+ domains &= ~AMDGPU_GEM_DOMAIN_VRAM;
+ }
- /* pin buffer into GTT */
- r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
- if (r)
- return r;
+ if (domains & AMDGPU_GEM_DOMAIN_VRAM)
+ bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
- if (bo->tbo.moving) {
- r = dma_fence_wait(bo->tbo.moving, true);
- if (r) {
- amdgpu_bo_unpin(bo);
- return r;
- }
- }
- return 0;
+ if (WARN_ON(!domains))
+ return -EINVAL;
+
+ return amdgpu_bo_pin(bo, domains);
}
/**
@@ -163,7 +203,7 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
if (!bo->tbo.pin_count) {
/* move buffer into GTT or VRAM */
struct ttm_operation_ctx ctx = { false, false };
- unsigned domains = AMDGPU_GEM_DOMAIN_GTT;
+ unsigned int domains = AMDGPU_GEM_DOMAIN_GTT;
if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM &&
attach->peer2peer) {
@@ -174,10 +214,6 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (r)
return ERR_PTR(r);
-
- } else if (!(amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type) &
- AMDGPU_GEM_DOMAIN_GTT)) {
- return ERR_PTR(-EBUSY);
}
switch (bo->tbo.resource->mem_type) {
@@ -194,6 +230,11 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
break;
case TTM_PL_VRAM:
+ /* XGMI-accessible memory should never be DMA-mapped */
+ if (WARN_ON(amdgpu_dmabuf_is_xgmi_accessible(
+ dma_buf_attach_adev(attach), bo)))
+ return ERR_PTR(-EINVAL);
+
r = amdgpu_vram_mgr_alloc_sgt(adev, bo->tbo.resource, 0,
bo->tbo.base.size, attach->dev,
dir, &sgt);
@@ -225,7 +266,7 @@ static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach,
struct sg_table *sgt,
enum dma_data_direction dir)
{
- if (sgt->sgl->page_link) {
+ if (sg_page(sgt->sgl)) {
dma_unmap_sgtable(attach->dev, sgt, dir, 0);
sg_free_table(sgt);
kfree(sgt);
@@ -275,9 +316,38 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
return ret;
}
+static int amdgpu_dma_buf_vmap(struct dma_buf *dma_buf, struct iosys_map *map)
+{
+ struct drm_gem_object *obj = dma_buf->priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+ int ret;
+
+ /*
+ * Pin to keep buffer in place while it's vmap'ed. The actual
+ * domain is not that important as long as it's mapable. Using
+ * GTT and VRAM should be compatible with most use cases.
+ */
+ ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_VRAM);
+ if (ret)
+ return ret;
+ ret = drm_gem_dmabuf_vmap(dma_buf, map);
+ if (ret)
+ amdgpu_bo_unpin(bo);
+
+ return ret;
+}
+
+static void amdgpu_dma_buf_vunmap(struct dma_buf *dma_buf, struct iosys_map *map)
+{
+ struct drm_gem_object *obj = dma_buf->priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+ drm_gem_dmabuf_vunmap(dma_buf, map);
+ amdgpu_bo_unpin(bo);
+}
+
const struct dma_buf_ops amdgpu_dmabuf_ops = {
.attach = amdgpu_dma_buf_attach,
- .detach = amdgpu_dma_buf_detach,
.pin = amdgpu_dma_buf_pin,
.unpin = amdgpu_dma_buf_unpin,
.map_dma_buf = amdgpu_dma_buf_map,
@@ -285,8 +355,8 @@ const struct dma_buf_ops amdgpu_dmabuf_ops = {
.release = drm_gem_dmabuf_release,
.begin_cpu_access = amdgpu_dma_buf_begin_cpu_access,
.mmap = drm_gem_dmabuf_mmap,
- .vmap = drm_gem_dmabuf_vmap,
- .vunmap = drm_gem_dmabuf_vunmap,
+ .vmap = amdgpu_dma_buf_vmap,
+ .vunmap = amdgpu_dma_buf_vunmap,
};
/**
@@ -304,11 +374,23 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj,
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
struct dma_buf *buf;
+ struct ttm_operation_ctx ctx = {
+ .interruptible = true,
+ .no_wait_gpu = true,
+ /* We opt to avoid OOM on system pages allocations */
+ .gfp_retry_mayfail = true,
+ .allow_res_evict = false,
+ };
+ int ret;
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
return ERR_PTR(-EPERM);
+ ret = ttm_bo_setup_export(&bo->tbo, &ctx);
+ if (ret)
+ return ERR_PTR(ret);
+
buf = drm_gem_prime_export(gobj, flags);
if (!IS_ERR(buf))
buf->ops = &amdgpu_dmabuf_ops;
@@ -343,12 +425,15 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
if (dma_buf->ops == &amdgpu_dmabuf_ops) {
struct amdgpu_bo *other = gem_to_amdgpu_bo(dma_buf->priv);
- flags |= other->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+ flags |= other->flags & (AMDGPU_GEM_CREATE_CPU_GTT_USWC |
+ AMDGPU_GEM_CREATE_COHERENT |
+ AMDGPU_GEM_CREATE_EXT_COHERENT |
+ AMDGPU_GEM_CREATE_UNCACHED);
}
ret = amdgpu_gem_object_create(adev, dma_buf->size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_CPU, flags,
- ttm_bo_type_sg, resv, &gobj);
+ ttm_bo_type_sg, resv, &gobj, 0);
if (ret)
goto error;
@@ -384,7 +469,11 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
struct amdgpu_vm_bo_base *bo_base;
int r;
- if (bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
+ /* FIXME: This should be after the "if", but needs a fix to make sure
+ * DMABuf imports are initialized in the right VM list.
+ */
+ amdgpu_vm_bo_invalidate(bo, false);
+ if (!bo->tbo.resource || bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
return;
r = ttm_bo_validate(&bo->tbo, &placement, &ctx);
@@ -415,9 +504,12 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
continue;
}
- r = amdgpu_vm_clear_freed(adev, vm, NULL);
+ /* Reserve fences for two SDMA page table updates */
+ r = dma_resv_reserve_fences(resv, 2);
if (!r)
- r = amdgpu_vm_handle_moved(adev, vm);
+ r = amdgpu_vm_clear_freed(adev, vm, NULL);
+ if (!r)
+ r = amdgpu_vm_handle_moved(adev, vm, ticket);
if (r && r != -EBUSY)
DRM_ERROR("Failed to invalidate VM page tables (%d))\n",
@@ -491,7 +583,10 @@ bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev,
struct drm_gem_object *obj = &bo->tbo.base;
struct drm_gem_object *gobj;
- if (obj->import_attach) {
+ if (!adev)
+ return false;
+
+ if (drm_gem_is_imported(obj)) {
struct dma_buf *dma_buf = obj->import_attach->dmabuf;
if (dma_buf->ops != &amdgpu_dmabuf_ops)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
index 89e6ad30396f..2675689ef70f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -21,6 +21,9 @@
*
*/
+#ifndef AMDGPU_DOORBELL_H
+#define AMDGPU_DOORBELL_H
+
/*
* GPU doorbell structures, functions & helpers
*/
@@ -28,8 +31,15 @@ struct amdgpu_doorbell {
/* doorbell mmio */
resource_size_t base;
resource_size_t size;
- u32 __iomem *ptr;
- u32 num_doorbells; /* Number of doorbells actually reserved for amdgpu. */
+
+ /* Number of doorbells reserved for amdgpu kernel driver */
+ u32 num_kernel_doorbells;
+
+ /* Kernel doorbells */
+ struct amdgpu_bo *kernel_doorbells;
+
+ /* For CPU access of doorbells */
+ uint32_t *cpu_addr;
};
/* Reserved doorbells for amdgpu (including multimedia).
@@ -52,8 +62,11 @@ struct amdgpu_doorbell_index {
uint32_t userqueue_end;
uint32_t gfx_ring0;
uint32_t gfx_ring1;
- uint32_t sdma_engine[8];
- uint32_t mes_ring;
+ uint32_t gfx_userqueue_start;
+ uint32_t gfx_userqueue_end;
+ uint32_t sdma_engine[16];
+ uint32_t mes_ring0;
+ uint32_t mes_ring1;
uint32_t ih;
union {
struct {
@@ -73,15 +86,17 @@ struct amdgpu_doorbell_index {
uint32_t vce_ring6_7;
} uvd_vce;
};
+ uint32_t vpe_ring;
uint32_t first_non_cp;
uint32_t last_non_cp;
uint32_t max_assignment;
/* Per engine SDMA doorbell size in dword */
uint32_t sdma_doorbell_range;
+ /* Per xcc doorbell size for KIQ/KCQ */
+ uint32_t xcc_doorbell_range;
};
-typedef enum _AMDGPU_DOORBELL_ASSIGNMENT
-{
+enum AMDGPU_DOORBELL_ASSIGNMENT {
AMDGPU_DOORBELL_KIQ = 0x000,
AMDGPU_DOORBELL_HIQ = 0x001,
AMDGPU_DOORBELL_DIQ = 0x002,
@@ -99,10 +114,10 @@ typedef enum _AMDGPU_DOORBELL_ASSIGNMENT
AMDGPU_DOORBELL_IH = 0x1E8,
AMDGPU_DOORBELL_MAX_ASSIGNMENT = 0x3FF,
AMDGPU_DOORBELL_INVALID = 0xFFFF
-} AMDGPU_DOORBELL_ASSIGNMENT;
+};
+
+enum AMDGPU_VEGA20_DOORBELL_ASSIGNMENT {
-typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT
-{
/* Compute + GFX: 0~255 */
AMDGPU_VEGA20_DOORBELL_KIQ = 0x000,
AMDGPU_VEGA20_DOORBELL_HIQ = 0x001,
@@ -156,12 +171,20 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT
AMDGPU_VEGA20_DOORBELL64_FIRST_NON_CP = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0,
AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7,
- AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT = 0x18F,
+ /* kiq/kcq from second XCD. Max 8 XCDs */
+ AMDGPU_VEGA20_DOORBELL_XCC1_KIQ_START = 0x190,
+ /* 8 compute rings per GC. Max to 0x1CE */
+ AMDGPU_VEGA20_DOORBELL_XCC1_MEC_RING0_START = 0x197,
+
+ /* AID1 SDMA: 0x1D0 ~ 0x1F7 */
+ AMDGPU_VEGA20_DOORBELL_AID1_sDMA_START = 0x1D0,
+
+ AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT = 0x1F7,
AMDGPU_VEGA20_DOORBELL_INVALID = 0xFFFF
-} AMDGPU_VEGA20_DOORBELL_ASSIGNMENT;
+};
+
+enum AMDGPU_NAVI10_DOORBELL_ASSIGNMENT {
-typedef enum _AMDGPU_NAVI10_DOORBELL_ASSIGNMENT
-{
/* Compute + GFX: 0~255 */
AMDGPU_NAVI10_DOORBELL_KIQ = 0x000,
AMDGPU_NAVI10_DOORBELL_HIQ = 0x001,
@@ -174,11 +197,15 @@ typedef enum _AMDGPU_NAVI10_DOORBELL_ASSIGNMENT
AMDGPU_NAVI10_DOORBELL_MEC_RING5 = 0x008,
AMDGPU_NAVI10_DOORBELL_MEC_RING6 = 0x009,
AMDGPU_NAVI10_DOORBELL_MEC_RING7 = 0x00A,
- AMDGPU_NAVI10_DOORBELL_USERQUEUE_START = 0x00B,
+ AMDGPU_NAVI10_DOORBELL_MES_RING0 = 0x00B,
+ AMDGPU_NAVI10_DOORBELL_MES_RING1 = 0x00C,
+ AMDGPU_NAVI10_DOORBELL_USERQUEUE_START = 0x00D,
AMDGPU_NAVI10_DOORBELL_USERQUEUE_END = 0x08A,
AMDGPU_NAVI10_DOORBELL_GFX_RING0 = 0x08B,
AMDGPU_NAVI10_DOORBELL_GFX_RING1 = 0x08C,
- AMDGPU_NAVI10_DOORBELL_MES_RING = 0x090,
+ AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_START = 0x08D,
+ AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_END = 0x0FF,
+
/* SDMA:256~335*/
AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0 = 0x100,
AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1 = 0x10A,
@@ -200,18 +227,19 @@ typedef enum _AMDGPU_NAVI10_DOORBELL_ASSIGNMENT
AMDGPU_NAVI10_DOORBELL64_VCNc_d = 0x18E,
AMDGPU_NAVI10_DOORBELL64_VCNe_f = 0x18F,
+ AMDGPU_NAVI10_DOORBELL64_VPE = 0x190,
+
AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0,
- AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP = AMDGPU_NAVI10_DOORBELL64_VCNe_f,
+ AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP = AMDGPU_NAVI10_DOORBELL64_VPE,
- AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT = 0x18F,
+ AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT = AMDGPU_NAVI10_DOORBELL64_VPE,
AMDGPU_NAVI10_DOORBELL_INVALID = 0xFFFF
-} AMDGPU_NAVI10_DOORBELL_ASSIGNMENT;
+};
/*
* 64bit doorbell, offset are in QWORD, occupy 2KB doorbell space
*/
-typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
-{
+enum AMDGPU_DOORBELL64_ASSIGNMENT {
/*
* All compute related doorbells: kiq, hiq, diq, traditional compute queue, user queue, should locate in
* a continues range so that programming CP_MEC_DOORBELL_RANGE_LOWER/UPPER can cover this range.
@@ -287,15 +315,58 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
AMDGPU_DOORBELL64_MAX_ASSIGNMENT = 0xFF,
AMDGPU_DOORBELL64_INVALID = 0xFFFF
-} AMDGPU_DOORBELL64_ASSIGNMENT;
+};
+
+enum AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 {
+
+ /* XCC0: 0x00 ~20, XCC1: 20 ~ 2F ... */
+
+ /* KIQ/HIQ/DIQ */
+ AMDGPU_DOORBELL_LAYOUT1_KIQ_START = 0x000,
+ AMDGPU_DOORBELL_LAYOUT1_HIQ = 0x001,
+ AMDGPU_DOORBELL_LAYOUT1_DIQ = 0x002,
+ /* Compute: 0x08 ~ 0x20 */
+ AMDGPU_DOORBELL_LAYOUT1_MEC_RING_START = 0x008,
+ AMDGPU_DOORBELL_LAYOUT1_MEC_RING_END = 0x00F,
+ AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_START = 0x010,
+ AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_END = 0x01F,
+ AMDGPU_DOORBELL_LAYOUT1_XCC_RANGE = 0x020,
+
+ /* SDMA: 0x100 ~ 0x19F */
+ AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START = 0x100,
+ AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_END = 0x19F,
+ /* IH: 0x1A0 ~ 0x1AF */
+ AMDGPU_DOORBELL_LAYOUT1_IH = 0x1A0,
+ /* VCN: 0x1B0 ~ 0x1E8 */
+ AMDGPU_DOORBELL_LAYOUT1_VCN_START = 0x1B0,
+ AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1E8,
+
+ AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START,
+ AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_VCN_END,
+
+ AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT = 0x1E8,
+ AMDGPU_DOORBELL_LAYOUT1_INVALID = 0xFFFF
+};
u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index);
void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);
+/*
+ * GPU doorbell aperture helpers function.
+ */
+int amdgpu_doorbell_init(struct amdgpu_device *adev);
+void amdgpu_doorbell_fini(struct amdgpu_device *adev);
+int amdgpu_doorbell_create_kernel_doorbells(struct amdgpu_device *adev);
+uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev,
+ struct amdgpu_bo *db_bo,
+ uint32_t doorbell_index,
+ uint32_t db_size);
+
#define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index))
#define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v))
#define RDOORBELL64(index) amdgpu_mm_rdoorbell64(adev, (index))
#define WDOORBELL64(index, v) amdgpu_mm_wdoorbell64(adev, (index), (v))
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c
new file mode 100644
index 000000000000..3040437d99c2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+
+/**
+ * amdgpu_mm_rdoorbell - read a doorbell dword
+ *
+ * @adev: amdgpu_device pointer
+ * @index: doorbell index
+ *
+ * Returns the value in the doorbell aperture at the
+ * requested doorbell index (CIK).
+ */
+u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
+{
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
+ if (index < adev->doorbell.num_kernel_doorbells)
+ return readl(adev->doorbell.cpu_addr + index);
+
+ dev_err(adev->dev, "reading beyond doorbell aperture: 0x%08x!\n",
+ index);
+ return 0;
+}
+
+/**
+ * amdgpu_mm_wdoorbell - write a doorbell dword
+ *
+ * @adev: amdgpu_device pointer
+ * @index: doorbell index
+ * @v: value to write
+ *
+ * Writes @v to the doorbell aperture at the
+ * requested doorbell index (CIK).
+ */
+void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
+{
+ if (amdgpu_device_skip_hw_access(adev))
+ return;
+
+ if (index < adev->doorbell.num_kernel_doorbells)
+ writel(v, adev->doorbell.cpu_addr + index);
+ else
+ dev_err(adev->dev,
+ "writing beyond doorbell aperture: 0x%08x!\n", index);
+}
+
+/**
+ * amdgpu_mm_rdoorbell64 - read a doorbell Qword
+ *
+ * @adev: amdgpu_device pointer
+ * @index: doorbell index
+ *
+ * Returns the value in the doorbell aperture at the
+ * requested doorbell index (VEGA10+).
+ */
+u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
+{
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
+ if (index < adev->doorbell.num_kernel_doorbells)
+ return atomic64_read((atomic64_t *)(adev->doorbell.cpu_addr + index));
+
+ dev_err(adev->dev, "reading beyond doorbell aperture: 0x%08x!\n",
+ index);
+ return 0;
+}
+
+/**
+ * amdgpu_mm_wdoorbell64 - write a doorbell Qword
+ *
+ * @adev: amdgpu_device pointer
+ * @index: doorbell index
+ * @v: value to write
+ *
+ * Writes @v to the doorbell aperture at the
+ * requested doorbell index (VEGA10+).
+ */
+void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
+{
+ if (amdgpu_device_skip_hw_access(adev))
+ return;
+
+ if (index < adev->doorbell.num_kernel_doorbells)
+ atomic64_set((atomic64_t *)(adev->doorbell.cpu_addr + index), v);
+ else
+ dev_err(adev->dev,
+ "writing beyond doorbell aperture: 0x%08x!\n", index);
+}
+
+/**
+ * amdgpu_doorbell_index_on_bar - Find doorbell's absolute offset in BAR
+ *
+ * @adev: amdgpu_device pointer
+ * @db_bo: doorbell object's bo
+ * @doorbell_index: doorbell relative index in this doorbell object
+ * @db_size: doorbell size is in byte
+ *
+ * returns doorbell's absolute index in BAR
+ */
+uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev,
+ struct amdgpu_bo *db_bo,
+ uint32_t doorbell_index,
+ uint32_t db_size)
+{
+ int db_bo_offset;
+
+ db_bo_offset = amdgpu_bo_gpu_offset_no_check(db_bo);
+
+ /* doorbell index is 32 bit but doorbell's size can be 32 bit
+ * or 64 bit, so *db_size(in byte)/4 for alignment.
+ */
+ return db_bo_offset / sizeof(u32) + doorbell_index *
+ DIV_ROUND_UP(db_size, 4);
+}
+
+/**
+ * amdgpu_doorbell_create_kernel_doorbells - Create kernel doorbells for graphics
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Creates doorbells for graphics driver usages.
+ * returns 0 on success, error otherwise.
+ */
+int amdgpu_doorbell_create_kernel_doorbells(struct amdgpu_device *adev)
+{
+ int r;
+ int size;
+
+ /* SI HW does not have doorbells, skip allocation */
+ if (adev->doorbell.num_kernel_doorbells == 0)
+ return 0;
+
+ /* Reserve first num_kernel_doorbells (page-aligned) for kernel ops */
+ size = ALIGN(adev->doorbell.num_kernel_doorbells * sizeof(u32), PAGE_SIZE);
+
+ /* Allocate an extra page for MES kernel usages (ring test) */
+ adev->mes.db_start_dw_offset = size / sizeof(u32);
+ size += PAGE_SIZE;
+
+ r = amdgpu_bo_create_kernel(adev,
+ size,
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_DOORBELL,
+ &adev->doorbell.kernel_doorbells,
+ NULL,
+ (void **)&adev->doorbell.cpu_addr);
+ if (r) {
+ dev_err(adev->dev,
+ "Failed to allocate kernel doorbells, err=%d\n", r);
+ return r;
+ }
+
+ adev->doorbell.num_kernel_doorbells = size / sizeof(u32);
+ return 0;
+}
+
+/*
+ * GPU doorbell aperture helpers function.
+ */
+/**
+ * amdgpu_doorbell_init - Init doorbell driver information.
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Init doorbell driver information (CIK)
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_doorbell_init(struct amdgpu_device *adev)
+{
+
+ /* No doorbell on SI hardware generation */
+ if (adev->asic_type < CHIP_BONAIRE) {
+ adev->doorbell.base = 0;
+ adev->doorbell.size = 0;
+ adev->doorbell.num_kernel_doorbells = 0;
+ return 0;
+ }
+
+ if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
+ return -EINVAL;
+
+ amdgpu_asic_init_doorbell_index(adev);
+
+ /* doorbell bar mapping */
+ adev->doorbell.base = pci_resource_start(adev->pdev, 2);
+ adev->doorbell.size = pci_resource_len(adev->pdev, 2);
+
+ adev->doorbell.num_kernel_doorbells =
+ min_t(u32, adev->doorbell.size / sizeof(u32),
+ adev->doorbell_index.max_assignment + 1);
+ if (adev->doorbell.num_kernel_doorbells == 0)
+ return -EINVAL;
+
+ /*
+ * For Vega, reserve and map two pages on doorbell BAR since SDMA
+ * paging queue doorbell use the second page. The
+ * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
+ * doorbells are in the first page. So with paging queue enabled,
+ * the max num_kernel_doorbells should + 1 page (0x400 in dword)
+ */
+ if (adev->asic_type >= CHIP_VEGA10)
+ adev->doorbell.num_kernel_doorbells += 0x400;
+
+ return 0;
+}
+
+/**
+ * amdgpu_doorbell_fini - Tear down doorbell driver information.
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tear down doorbell driver information (CIK)
+ */
+void amdgpu_doorbell_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->doorbell.kernel_doorbells,
+ NULL,
+ (void **)&adev->doorbell.cpu_addr);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index ad95de6399af..2dfbddcef9ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -23,33 +23,37 @@
*/
#include <drm/amdgpu_drm.h>
-#include <drm/drm_aperture.h>
+#include <drm/clients/drm_client_setup.h>
#include <drm/drm_drv.h>
+#include <drm/drm_fbdev_ttm.h>
#include <drm/drm_gem.h>
-#include <drm/drm_vblank.h>
#include <drm/drm_managed.h>
-#include "amdgpu_drv.h"
-
#include <drm/drm_pciids.h>
-#include <linux/console.h>
-#include <linux/module.h>
-#include <linux/pm_runtime.h>
-#include <linux/vga_switcheroo.h>
#include <drm/drm_probe_helper.h>
+#include <drm/drm_vblank.h>
+
+#include <linux/cc_platform.h>
+#include <linux/dynamic_debug.h>
+#include <linux/module.h>
#include <linux/mmu_notifier.h>
+#include <linux/pm_runtime.h>
#include <linux/suspend.h>
-#include <linux/cc_platform.h>
+#include <linux/vga_switcheroo.h>
#include "amdgpu.h"
-#include "amdgpu_irq.h"
+#include "amdgpu_amdkfd.h"
#include "amdgpu_dma_buf.h"
-#include "amdgpu_sched.h"
+#include "amdgpu_drv.h"
#include "amdgpu_fdinfo.h"
-#include "amdgpu_amdkfd.h"
-
+#include "amdgpu_irq.h"
+#include "amdgpu_psp.h"
#include "amdgpu_ras.h"
-#include "amdgpu_xgmi.h"
#include "amdgpu_reset.h"
+#include "amdgpu_sched.h"
+#include "amdgpu_xgmi.h"
+#include "amdgpu_userq.h"
+#include "amdgpu_userq_fence.h"
+#include "../amdxcp/amdgpu_xcp_drv.h"
/*
* KMS wrapper.
@@ -81,7 +85,7 @@
* - 3.24.0 - Add high priority compute support for gfx9
* - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
* - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
- * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
+ * - 3.27.0 - Add new chunk to AMDGPU_CS to enable BO_LIST creation.
* - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES
* - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID
* - 3.30.0 - Add AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE.
@@ -99,18 +103,56 @@
* - 3.42.0 - Add 16bpc fixed point display support
* - 3.43.0 - Add device hot plug/unplug support
* - 3.44.0 - DCN3 supports DCC independent block settings: !64B && 128B, 64B && 128B
+ * - 3.45.0 - Add context ioctl stable pstate interface
+ * - 3.46.0 - To enable hot plug amdgpu tests in libdrm
+ * - 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags
+ * - 3.48.0 - Add IP discovery version info to HW INFO
+ * - 3.49.0 - Add gang submit into CS IOCTL
+ * - 3.50.0 - Update AMDGPU_INFO_DEV_INFO IOCTL for minimum engine and memory clock
+ * Update AMDGPU_INFO_SENSOR IOCTL for PEAK_PSTATE engine and memory clock
+ * 3.51.0 - Return the PCIe gen and lanes from the INFO ioctl
+ * 3.52.0 - Add AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD, add device_info fields:
+ * tcp_cache_size, num_sqc_per_wgp, sqc_data_cache_size, sqc_inst_cache_size,
+ * gl1c_cache_size, gl2c_cache_size, mall_size, enabled_rb_pipes_mask_hi
+ * 3.53.0 - Support for GFX11 CP GFX shadowing
+ * 3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support
+ * - 3.55.0 - Add AMDGPU_INFO_GPUVM_FAULT query
+ * - 3.56.0 - Update IB start address and size alignment for decode and encode
+ * - 3.57.0 - Compute tunneling on GFX10+
+ * - 3.58.0 - Add GFX12 DCC support
+ * - 3.59.0 - Cleared VRAM
+ * - 3.60.0 - Add AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE (Vulkan requirement)
+ * - 3.61.0 - Contains fix for RV/PCO compute queues
+ * - 3.62.0 - Add AMDGPU_IDS_FLAGS_MODE_PF, AMDGPU_IDS_FLAGS_MODE_VF & AMDGPU_IDS_FLAGS_MODE_PT
+ * - 3.63.0 - GFX12 display DCC supports 256B max compressed block size
+ * - 3.64.0 - Userq IP support query
*/
#define KMS_DRIVER_MAJOR 3
-#define KMS_DRIVER_MINOR 44
+#define KMS_DRIVER_MINOR 64
#define KMS_DRIVER_PATCHLEVEL 0
-int amdgpu_vram_limit;
+/*
+ * amdgpu.debug module options. Are all disabled by default
+ */
+enum AMDGPU_DEBUG_MASK {
+ AMDGPU_DEBUG_VM = BIT(0),
+ AMDGPU_DEBUG_LARGEBAR = BIT(1),
+ AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY = BIT(2),
+ AMDGPU_DEBUG_USE_VRAM_FW_BUF = BIT(3),
+ AMDGPU_DEBUG_ENABLE_RAS_ACA = BIT(4),
+ AMDGPU_DEBUG_ENABLE_EXP_RESETS = BIT(5),
+ AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6),
+ AMDGPU_DEBUG_SMU_POOL = BIT(7),
+ AMDGPU_DEBUG_VM_USERPTR = BIT(8),
+ AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9),
+ AMDGPU_DEBUG_ENABLE_CE_CS = BIT(10)
+};
+
+unsigned int amdgpu_vram_limit = UINT_MAX;
int amdgpu_vis_vram_limit;
int amdgpu_gart_size = -1; /* auto */
int amdgpu_gtt_size = -1; /* auto */
int amdgpu_moverate = -1; /* auto */
-int amdgpu_benchmarking;
-int amdgpu_testing;
int amdgpu_audio = -1;
int amdgpu_disp_priority;
int amdgpu_hw_i2c;
@@ -128,7 +170,6 @@ int amdgpu_vm_size = -1;
int amdgpu_vm_fragment_size = -1;
int amdgpu_vm_block_size = -1;
int amdgpu_vm_fault_stop;
-int amdgpu_vm_debug;
int amdgpu_vm_update_mode = -1;
int amdgpu_exp_hw_support;
int amdgpu_dc = -1;
@@ -136,11 +177,22 @@ int amdgpu_sched_jobs = 32;
int amdgpu_sched_hw_submission = 2;
uint amdgpu_pcie_gen_cap;
uint amdgpu_pcie_lane_cap;
-uint amdgpu_cg_mask = 0xffffffff;
+u64 amdgpu_cg_mask = 0xffffffffffffffff;
uint amdgpu_pg_mask = 0xffffffff;
uint amdgpu_sdma_phase_quantum = 32;
-char *amdgpu_disable_cu = NULL;
-char *amdgpu_virtual_display = NULL;
+char *amdgpu_disable_cu;
+char *amdgpu_virtual_display;
+int amdgpu_enforce_isolation = -1;
+int amdgpu_modeset = -1;
+
+/* Specifies the default granularity for SVM, used in buffer
+ * migration and restoration of backing memory when handling
+ * recoverable page faults.
+ *
+ * The value is given as log(numPages(buffer)); for a 2 MiB
+ * buffer it computes to be 9
+ */
+uint amdgpu_svm_default_granularity = 9;
/*
* OverDrive(bit 14) disabled by default
@@ -148,7 +200,6 @@ char *amdgpu_virtual_display = NULL;
*/
uint amdgpu_pp_feature_mask = 0xfff7bfff;
uint amdgpu_force_long_training;
-int amdgpu_job_hang_limit;
int amdgpu_lbpw = -1;
int amdgpu_compute_multipipe = -1;
int amdgpu_gpu_recovery = -1; /* auto */
@@ -167,10 +218,14 @@ int amdgpu_smu_pptable_id = -1;
*/
uint amdgpu_dc_feature_mask = 2;
uint amdgpu_dc_debug_mask;
+uint amdgpu_dc_visual_confirm;
int amdgpu_async_gfx_ring = 1;
-int amdgpu_mcbp;
+int amdgpu_mcbp = -1;
int amdgpu_discovery = -1;
int amdgpu_mes;
+int amdgpu_mes_log_enable = 0;
+int amdgpu_mes_kiq;
+int amdgpu_uni_mes = 1;
int amdgpu_noretry = -1;
int amdgpu_force_asic_type = -1;
int amdgpu_tmz = -1; /* auto */
@@ -178,14 +233,34 @@ uint amdgpu_freesync_vid_mode;
int amdgpu_reset_method = -1; /* auto */
int amdgpu_num_kcq = -1;
int amdgpu_smartshift_bias;
-
-static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
+int amdgpu_use_xgmi_p2p = 1;
+int amdgpu_vcnfw_log;
+int amdgpu_sg_display = -1; /* auto */
+int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE;
+int amdgpu_umsch_mm;
+int amdgpu_seamless = -1; /* auto */
+uint amdgpu_debug_mask;
+int amdgpu_agp = -1; /* auto */
+int amdgpu_wbrf = -1;
+int amdgpu_damage_clips = -1; /* auto */
+int amdgpu_umsch_mm_fwlog;
+int amdgpu_rebar = -1; /* auto */
+int amdgpu_user_queue = -1;
+
+DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0,
+ "DRM_UT_CORE",
+ "DRM_UT_DRIVER",
+ "DRM_UT_KMS",
+ "DRM_UT_PRIME",
+ "DRM_UT_ATOMIC",
+ "DRM_UT_VBL",
+ "DRM_UT_STATE",
+ "DRM_UT_LEASE",
+ "DRM_UT_DP",
+ "DRM_UT_DRMRES");
struct amdgpu_mgpu_info mgpu_info = {
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
- .delayed_reset_work = __DELAYED_WORK_INITIALIZER(
- mgpu_info.delayed_reset_work,
- amdgpu_drv_delayed_reset_work_handler, 0),
};
int amdgpu_ras_enable = -1;
uint amdgpu_ras_mask = 0xffffffff;
@@ -211,17 +286,19 @@ module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444);
/**
* DOC: gartsize (uint)
- * Restrict the size of GART in Mib (32, 64, etc.) for testing. The default is -1 (The size depends on asic).
+ * Restrict the size of GART (for kernel use) in Mib (32, 64, etc.) for testing.
+ * The default is -1 (The size depends on asic).
*/
-MODULE_PARM_DESC(gartsize, "Size of GART to setup in megabytes (32, 64, etc., -1=auto)");
+MODULE_PARM_DESC(gartsize, "Size of kernel GART to setup in megabytes (32, 64, etc., -1=auto)");
module_param_named(gartsize, amdgpu_gart_size, uint, 0600);
/**
* DOC: gttsize (int)
- * Restrict the size of GTT domain in MiB for testing. The default is -1 (It's VRAM size if 3GB < VRAM < 3/4 RAM,
- * otherwise 3/4 RAM size).
+ * Restrict the size of GTT domain (for userspace use) in MiB for testing.
+ * The default is -1 (Use value specified by TTM).
+ * This parameter is deprecated and will be removed in the future.
*/
-MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)");
+MODULE_PARM_DESC(gttsize, "Size of the GTT userspace domain in megabytes (-1 = auto)");
module_param_named(gttsize, amdgpu_gtt_size, int, 0600);
/**
@@ -232,24 +309,10 @@ MODULE_PARM_DESC(moverate, "Maximum buffer migration rate in MB/s. (32, 64, etc.
module_param_named(moverate, amdgpu_moverate, int, 0600);
/**
- * DOC: benchmark (int)
- * Run benchmarks. The default is 0 (Skip benchmarks).
- */
-MODULE_PARM_DESC(benchmark, "Run benchmark");
-module_param_named(benchmark, amdgpu_benchmarking, int, 0444);
-
-/**
- * DOC: test (int)
- * Test BO GTT->VRAM and VRAM->GTT GPU copies. The default is 0 (Skip test, only set 1 to run test).
- */
-MODULE_PARM_DESC(test, "Run tests");
-module_param_named(test, amdgpu_testing, int, 0444);
-
-/**
* DOC: audio (int)
* Set HDMI/DPAudio. Only affects non-DC display handling. The default is -1 (Enabled), set 0 to disabled it.
*/
-MODULE_PARM_DESC(audio, "Audio enable (-1 = auto, 0 = disable, 1 = enable)");
+MODULE_PARM_DESC(audio, "HDMI/DP Audio enable for non DC displays (-1 = auto, 0 = disable, 1 = enable)");
module_param_named(audio, amdgpu_audio, int, 0444);
/**
@@ -281,27 +344,26 @@ MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");
module_param_named(msi, amdgpu_msi, int, 0444);
/**
+ * DOC: svm_default_granularity (uint)
+ * Used in buffer migration and handling of recoverable page faults
+ */
+MODULE_PARM_DESC(svm_default_granularity, "SVM's default granularity in log(2^Pages), default 9 = 2^9 = 2 MiB");
+module_param_named(svm_default_granularity, amdgpu_svm_default_granularity, uint, 0644);
+
+/**
* DOC: lockup_timeout (string)
* Set GPU scheduler timeout value in ms.
*
- * The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or
- * multiple values specified. 0 and negative values are invalidated. They will be adjusted
- * to the default timeout.
- *
- * - With one value specified, the setting will apply to all non-compute jobs.
- * - With multiple values specified, the first one will be for GFX.
- * The second one is for Compute. The third and fourth ones are
- * for SDMA and Video.
+ * The format can be [single value] for setting all timeouts at once or
+ * [GFX,Compute,SDMA,Video] to set individual timeouts.
+ * Negative values mean infinity.
*
- * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video)
- * jobs is 10000. The timeout for compute is 60000.
+ * By default(with no lockup_timeout settings), the timeout for all queues is 2000.
*/
-MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and 60000 for compute jobs; "
- "for passthrough or sriov, 10000 for all jobs."
- " 0: keep default value. negative: infinity timeout), "
- "format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; "
- "for passthrough or sriov [all jobs] or [GFX,Compute,SDMA,Video].");
-module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444);
+MODULE_PARM_DESC(lockup_timeout,
+ "GPU lockup timeout in ms (default: 2000. 0: keep default value. negative: infinity timeout), format: [single value for all] or [GFX,Compute,SDMA,Video].");
+module_param_string(lockup_timeout, amdgpu_lockup_timeout,
+ sizeof(amdgpu_lockup_timeout), 0444);
/**
* DOC: dpm (int)
@@ -314,9 +376,12 @@ module_param_named(dpm, amdgpu_dpm, int, 0444);
/**
* DOC: fw_load_type (int)
- * Set different firmware loading type for debugging (0 = direct, 1 = SMU, 2 = PSP). The default is -1 (auto).
+ * Set different firmware loading type for debugging, if supported.
+ * Set to 0 to force direct loading if supported by the ASIC. Set
+ * to -1 to select the default loading mode for the ASIC, as defined
+ * by the driver. The default is -1 (auto).
*/
-MODULE_PARM_DESC(fw_load_type, "firmware loading type (0 = direct, 1 = SMU, 2 = PSP, -1 = auto)");
+MODULE_PARM_DESC(fw_load_type, "firmware loading type (3 = rlc backdoor autoload if supported, 2 = smu load if supported, 1 = psp load, 0 = force direct if supported, -1 = auto)");
module_param_named(fw_load_type, amdgpu_fw_load_type, int, 0444);
/**
@@ -328,10 +393,12 @@ module_param_named(aspm, amdgpu_aspm, int, 0444);
/**
* DOC: runpm (int)
- * Override for runtime power management control for dGPUs in PX/HG laptops. The amdgpu driver can dynamically power down
- * the dGPU on PX/HG laptops when it is idle. The default is -1 (auto enable). Setting the value to 0 disables this functionality.
+ * Override for runtime power management control for dGPUs. The amdgpu driver can dynamically power down
+ * the dGPUs when they are idle if supported. The default is -1 (auto enable).
+ * Setting the value to 0 disables this functionality.
+ * Setting the value to -2 is auto enabled with power down when displays are attached.
*/
-MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = PX only default)");
+MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = auto, -2 = auto with displays)");
module_param_named(runpm, amdgpu_runtime_pm, int, 0444);
/**
@@ -342,7 +409,7 @@ module_param_named(runpm, amdgpu_runtime_pm, int, 0444);
* the kernel log for the list of IPs on the asic. The default is 0xffffffff (enable all blocks on a device).
*/
MODULE_PARM_DESC(ip_block_mask, "IP Block Mask (all blocks enabled (default))");
-module_param_named(ip_block_mask, amdgpu_ip_block_mask, uint, 0444);
+module_param_named_unsafe(ip_block_mask, amdgpu_ip_block_mask, uint, 0444);
/**
* DOC: bapm (int)
@@ -388,13 +455,6 @@ MODULE_PARM_DESC(vm_fault_stop, "Stop on VM fault (0 = never (default), 1 = prin
module_param_named(vm_fault_stop, amdgpu_vm_fault_stop, int, 0444);
/**
- * DOC: vm_debug (int)
- * Debug VM handling (0 = disabled, 1 = enabled). The default is 0 (Disabled).
- */
-MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)");
-module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);
-
-/**
* DOC: vm_update_mode (int)
* Override VM update mode. VM updated by using CPU (0 = never, 1 = Graphics only, 2 = Compute only, 3 = Both). The default
* is -1 (Only in large BAR(LB) systems Compute VM tables will be updated by CPU, otherwise 0, never).
@@ -407,7 +467,7 @@ module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444);
* Enable experimental hw support (1 = enable). The default is 0 (disabled).
*/
MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))");
-module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444);
+module_param_named_unsafe(exp_hw_support, amdgpu_exp_hw_support, int, 0444);
/**
* DOC: dc (int)
@@ -463,12 +523,12 @@ MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))");
module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444);
/**
- * DOC: cg_mask (uint)
+ * DOC: cg_mask (ullong)
* Override Clockgating features enabled on GPU (0 = disable clock gating). See the AMD_CG_SUPPORT flags in
- * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffff (all enabled).
+ * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffffffffffff (all enabled).
*/
MODULE_PARM_DESC(cg_mask, "Clockgating flags mask (0 = disable clock gating)");
-module_param_named(cg_mask, amdgpu_cg_mask, uint, 0444);
+module_param_named(cg_mask, amdgpu_cg_mask, ullong, 0444);
/**
* DOC: pg_mask (uint)
@@ -504,13 +564,6 @@ MODULE_PARM_DESC(virtual_display,
module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444);
/**
- * DOC: job_hang_limit (int)
- * Set how much time allow a job hang and not drop it. The default is 0.
- */
-MODULE_PARM_DESC(job_hang_limit, "how much time allow a job hang and not drop it (default 0)");
-module_param_named(job_hang_limit, amdgpu_job_hang_limit, int ,0444);
-
-/**
* DOC: lbpw (int)
* Override Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable). The default is -1 (auto, enabled).
*/
@@ -524,15 +577,15 @@ module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444);
* DOC: gpu_recovery (int)
* Set to enable GPU recovery mechanism (1 = enable, 0 = disable). The default is -1 (auto, disabled except SRIOV).
*/
-MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (2 = advanced tdr mode, 1 = enable, 0 = disable, -1 = auto)");
-module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444);
+MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)");
+module_param_named_unsafe(gpu_recovery, amdgpu_gpu_recovery, int, 0444);
/**
* DOC: emu_mode (int)
* Set value 1 to enable emulation mode. This is only needed when running on an emulator. The default is 0 (disabled).
*/
MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable)");
-module_param_named(emu_mode, amdgpu_emu_mode, int, 0444);
+module_param_named_unsafe(emu_mode, amdgpu_emu_mode, int, 0444);
/**
* DOC: ras_enable (int)
@@ -565,39 +618,39 @@ module_param_named(timeout_period, amdgpu_watchdog_timer.period, uint, 0644);
/**
* DOC: si_support (int)
- * Set SI support driver. This parameter works after set config CONFIG_DRM_AMDGPU_SI. For SI asic, when radeon driver is enabled,
- * set value 0 to use radeon driver, while set value 1 to use amdgpu driver. The default is using radeon driver when it available,
- * otherwise using amdgpu driver.
- */
+ * 1 = enabled, 0 = disabled, -1 = default
+ *
+ * SI (Southern Islands) are first generation GCN GPUs, supported by both
+ * drivers: radeon (old) and amdgpu (new). This parameter controls whether
+ * amdgpu should support SI.
+ * By default, SI dedicated GPUs are supported by amdgpu.
+ * Only relevant when CONFIG_DRM_AMDGPU_SI is enabled to build SI support in amdgpu.
+ * See also radeon.si_support which should be disabled when amdgpu.si_support is
+ * enabled, and vice versa.
+ */
+int amdgpu_si_support = -1;
#ifdef CONFIG_DRM_AMDGPU_SI
-
-#if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
-int amdgpu_si_support = 0;
-MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled (default))");
-#else
-int amdgpu_si_support = 1;
-MODULE_PARM_DESC(si_support, "SI support (1 = enabled (default), 0 = disabled)");
-#endif
-
+MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled, -1 = default)");
module_param_named(si_support, amdgpu_si_support, int, 0444);
#endif
/**
* DOC: cik_support (int)
- * Set CIK support driver. This parameter works after set config CONFIG_DRM_AMDGPU_CIK. For CIK asic, when radeon driver is enabled,
- * set value 0 to use radeon driver, while set value 1 to use amdgpu driver. The default is using radeon driver when it available,
- * otherwise using amdgpu driver.
- */
+ * 1 = enabled, 0 = disabled, -1 = default
+ *
+ * CIK (Sea Islands) are second generation GCN GPUs, supported by both
+ * drivers: radeon (old) and amdgpu (new). This parameter controls whether
+ * amdgpu should support CIK.
+ * By default:
+ * - CIK dedicated GPUs are supported by amdgpu.
+ * - CIK APUs are supported by radeon (except when radeon is not built).
+ * Only relevant when CONFIG_DRM_AMDGPU_CIK is enabled to build CIK support in amdgpu.
+ * See also radeon.cik_support which should be disabled when amdgpu.cik_support is
+ * enabled, and vice versa.
+ */
+int amdgpu_cik_support = -1;
#ifdef CONFIG_DRM_AMDGPU_CIK
-
-#if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
-int amdgpu_cik_support = 0;
-MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled (default))");
-#else
-int amdgpu_cik_support = 1;
-MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled)");
-#endif
-
+MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled, -1 = default)");
module_param_named(cik_support, amdgpu_cik_support, int, 0444);
#endif
@@ -607,8 +660,7 @@ module_param_named(cik_support, amdgpu_cik_support, int, 0444);
* E.g. 0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte. The default is 0 (disabled).
*/
MODULE_PARM_DESC(smu_memory_pool_size,
- "reserve gtt for smu debug usage, 0 = disable,"
- "0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte");
+ "reserve gtt for smu debug usage, 0 = disable,0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte");
module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444);
/**
@@ -621,10 +673,10 @@ module_param_named(async_gfx_ring, amdgpu_async_gfx_ring, int, 0444);
/**
* DOC: mcbp (int)
- * It is used to enable mid command buffer preemption. (0 = disabled (default), 1 = enabled)
+ * It is used to enable mid command buffer preemption. (0 = disabled, 1 = enabled, -1 auto (default))
*/
MODULE_PARM_DESC(mcbp,
- "Enable Mid-command buffer preemption (0 = disabled (default), 1 = enabled)");
+ "Enable Mid-command buffer preemption (0 = disabled, 1 = enabled), -1 = auto (default)");
module_param_named(mcbp, amdgpu_mcbp, int, 0444);
/**
@@ -646,6 +698,33 @@ MODULE_PARM_DESC(mes,
module_param_named(mes, amdgpu_mes, int, 0444);
/**
+ * DOC: mes_log_enable (int)
+ * Enable Micro Engine Scheduler log. This is used to enable/disable MES internal log.
+ * (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(mes_log_enable,
+ "Enable Micro Engine Scheduler log (0 = disabled (default), 1 = enabled)");
+module_param_named(mes_log_enable, amdgpu_mes_log_enable, int, 0444);
+
+/**
+ * DOC: mes_kiq (int)
+ * Enable Micro Engine Scheduler KIQ. This is a new engine pipe for kiq.
+ * (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(mes_kiq,
+ "Enable Micro Engine Scheduler KIQ (0 = disabled (default), 1 = enabled)");
+module_param_named(mes_kiq, amdgpu_mes_kiq, int, 0444);
+
+/**
+ * DOC: uni_mes (int)
+ * Enable Unified Micro Engine Scheduler. This is a new engine pipe for unified scheduler.
+ * (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(uni_mes,
+ "Enable Unified Micro Engine Scheduler (0 = disabled, 1 = enabled(default)");
+module_param_named(uni_mes, amdgpu_uni_mes, int, 0444);
+
+/**
* DOC: noretry (int)
* Disable XNACK retry in the SQ by default on GFXv9 hardware. On ASICs that
* do not support per-process XNACK this also disables retry page faults.
@@ -661,8 +740,15 @@ module_param_named(noretry, amdgpu_noretry, int, 0644);
*/
MODULE_PARM_DESC(force_asic_type,
"A non negative value used to specify the asic type for all supported GPUs");
-module_param_named(force_asic_type, amdgpu_force_asic_type, int, 0444);
+module_param_named_unsafe(force_asic_type, amdgpu_force_asic_type, int, 0444);
+/**
+ * DOC: use_xgmi_p2p (int)
+ * Enables/disables XGMI P2P interface (0 = disable, 1 = enable).
+ */
+MODULE_PARM_DESC(use_xgmi_p2p,
+ "Enable XGMI P2P interface (0 = disable; 1 = enable (default))");
+module_param_named(use_xgmi_p2p, amdgpu_use_xgmi_p2p, int, 0444);
#ifdef CONFIG_HSA_AMD
@@ -673,7 +759,7 @@ module_param_named(force_asic_type, amdgpu_force_asic_type, int, 0444);
* assigns queues to HQDs.
*/
int sched_policy = KFD_SCHED_POLICY_HWS;
-module_param(sched_policy, int, 0444);
+module_param_unsafe(sched_policy, int, 0444);
MODULE_PARM_DESC(sched_policy,
"Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");
@@ -682,7 +768,7 @@ MODULE_PARM_DESC(sched_policy,
* Maximum number of processes that HWS can schedule concurrently. The maximum is the
* number of VMIDs assigned to the HWS, which is also the default.
*/
-int hws_max_conc_proc = 8;
+int hws_max_conc_proc = -1;
module_param(hws_max_conc_proc, int, 0444);
MODULE_PARM_DESC(hws_max_conc_proc,
"Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))");
@@ -718,38 +804,12 @@ MODULE_PARM_DESC(send_sigterm,
"Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
/**
- * DOC: debug_largebar (int)
- * Set debug_largebar as 1 to enable simulating large-bar capability on non-large bar
- * system. This limits the VRAM size reported to ROCm applications to the visible
- * size, usually 256MB.
- * Default value is 0, diabled.
- */
-int debug_largebar;
-module_param(debug_largebar, int, 0444);
-MODULE_PARM_DESC(debug_largebar,
- "Debug large-bar flag used to simulate large-bar capability on non-large bar machine (0 = disable, 1 = enable)");
-
-/**
- * DOC: ignore_crat (int)
- * Ignore CRAT table during KFD initialization. By default, KFD uses the ACPI CRAT
- * table to get information about AMD APUs. This option can serve as a workaround on
- * systems with a broken CRAT table.
- *
- * Default is auto (according to asic type, iommu_v2, and crat table, to decide
- * whehter use CRAT)
- */
-int ignore_crat;
-module_param(ignore_crat, int, 0444);
-MODULE_PARM_DESC(ignore_crat,
- "Ignore CRAT table during KFD initialization (0 = auto (default), 1 = ignore CRAT)");
-
-/**
* DOC: halt_if_hws_hang (int)
* Halt if HWS hang is detected. Default value, 0, disables the halt on hang.
* Setting 1 enables halt on hang.
*/
int halt_if_hws_hang;
-module_param(halt_if_hws_hang, int, 0644);
+module_param_unsafe(halt_if_hws_hang, int, 0644);
MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
/**
@@ -758,13 +818,13 @@ MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (defau
* check says. Default value: false (rely on MEC2 firmware version check).
*/
bool hws_gws_support;
-module_param(hws_gws_support, bool, 0444);
+module_param_unsafe(hws_gws_support, bool, 0444);
MODULE_PARM_DESC(hws_gws_support, "Assume MEC2 FW supports GWS barriers (false = rely on FW version check (Default), true = force supported)");
/**
- * DOC: queue_preemption_timeout_ms (int)
- * queue preemption timeout in ms (1 = Minimum, 9000 = default)
- */
+ * DOC: queue_preemption_timeout_ms (int)
+ * queue preemption timeout in ms (1 = Minimum, 9000 = default)
+ */
int queue_preemption_timeout_ms = 9000;
module_param(queue_preemption_timeout_ms, int, 0644);
MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue preemption timeout in ms (1 = Minimum, 9000 = default)");
@@ -789,9 +849,26 @@ MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = defa
* DOC: no_queue_eviction_on_vm_fault (int)
* If set, process queues will not be evicted on gpuvm fault. This is to keep the wavefront context for debugging (0 = queue eviction, 1 = no queue eviction). The default is 0 (queue eviction).
*/
-int amdgpu_no_queue_eviction_on_vm_fault = 0;
+int amdgpu_no_queue_eviction_on_vm_fault;
MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault (0 = queue eviction, 1 = no queue eviction)");
-module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444);
+module_param_named_unsafe(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444);
+#endif
+
+/**
+ * DOC: mtype_local (int)
+ */
+int amdgpu_mtype_local;
+MODULE_PARM_DESC(mtype_local, "MTYPE for local memory (0 = MTYPE_RW (default), 1 = MTYPE_NC, 2 = MTYPE_CC)");
+module_param_named_unsafe(mtype_local, amdgpu_mtype_local, int, 0444);
+
+/**
+ * DOC: pcie_p2p (bool)
+ * Enable PCIe P2P (requires large-BAR). Default value: true (on)
+ */
+#ifdef CONFIG_HSA_AMD_P2P
+bool pcie_p2p = true;
+module_param(pcie_p2p, bool, 0444);
+MODULE_PARM_DESC(pcie_p2p, "Enable PCIe P2P (requires large-BAR). (N = off, Y = on(default))");
#endif
/**
@@ -804,11 +881,14 @@ module_param_named(dcfeaturemask, amdgpu_dc_feature_mask, uint, 0444);
/**
* DOC: dcdebugmask (uint)
- * Override display features enabled. See enum DC_DEBUG_MASK in drivers/gpu/drm/amd/include/amd_shared.h.
+ * Display debug options. See enum DC_DEBUG_MASK in drivers/gpu/drm/amd/include/amd_shared.h.
*/
MODULE_PARM_DESC(dcdebugmask, "all debug options disabled (default))");
module_param_named(dcdebugmask, amdgpu_dc_debug_mask, uint, 0444);
+MODULE_PARM_DESC(visualconfirm, "Visual confirm (0 = off (default), 1 = MPO, 5 = PSR)");
+module_param_named(visualconfirm, amdgpu_dc_visual_confirm, uint, 0444);
+
/**
* DOC: abmlevel (uint)
* Override the default ABM (Adaptive Backlight Management) level used for DC
@@ -818,18 +898,31 @@ module_param_named(dcdebugmask, amdgpu_dc_debug_mask, uint, 0444);
* the ABM algorithm, with 1 being the least reduction and 4 being the most
* reduction.
*
- * Defaults to 0, or disabled. Userspace can still override this level later
- * after boot.
+ * Defaults to -1, or auto. Userspace can only override this level after
+ * boot if it's set to auto.
*/
-uint amdgpu_dm_abm_level;
-MODULE_PARM_DESC(abmlevel, "ABM level (0 = off (default), 1-4 = backlight reduction level) ");
-module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444);
+int amdgpu_dm_abm_level = -1;
+MODULE_PARM_DESC(abmlevel,
+ "ABM level (0 = off, 1-4 = backlight reduction level, -1 auto (default))");
+module_param_named(abmlevel, amdgpu_dm_abm_level, int, 0444);
int amdgpu_backlight = -1;
MODULE_PARM_DESC(backlight, "Backlight control (0 = pwm, 1 = aux, -1 auto (default))");
module_param_named(backlight, amdgpu_backlight, bint, 0444);
/**
+ * DOC: damageclips (int)
+ * Enable or disable damage clips support. If damage clips support is disabled,
+ * we will force full frame updates, irrespective of what user space sends to
+ * us.
+ *
+ * Defaults to -1 (where it is enabled unless a PSR-SU display is detected).
+ */
+MODULE_PARM_DESC(damageclips,
+ "Damage clips support (0 = disable, 1 = enable, -1 auto (default))");
+module_param_named(damageclips, amdgpu_damage_clips, int, 0444);
+
+/**
* DOC: tmz (int)
* Trusted Memory Zone (TMZ) is a method to protect data being written
* to or read from memory.
@@ -862,15 +955,15 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
*/
MODULE_PARM_DESC(
freesync_video,
- "Enable freesync modesetting optimization feature (0 = off (default), 1 = on)");
+ "Adds additional modes via VRR for refresh changes without a full modeset (0 = off (default), 1 = on)");
module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444);
/**
* DOC: reset_method (int)
- * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco, 5 = pci)
+ * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)
*/
-MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco/bamaco, 5 = pci)");
-module_param_named(reset_method, amdgpu_reset_method, int, 0444);
+MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco/bamaco)");
+module_param_named_unsafe(reset_method, amdgpu_reset_method, int, 0644);
/**
* DOC: bad_page_threshold (int) Bad page threshold is specifies the
@@ -878,13 +971,46 @@ module_param_named(reset_method, amdgpu_reset_method, int, 0444);
* result in the GPU entering bad status when the number of total
* faulty pages by ECC exceeds the threshold value.
*/
-MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = auto(default value), 0 = disable bad page retirement, -2 = ignore bad page threshold)");
+MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = ignore threshold (default value), 0 = disable bad page retirement, -2 = threshold determined by a formula, 0 < threshold < max records, user-defined threshold)");
module_param_named(bad_page_threshold, amdgpu_bad_page_threshold, int, 0444);
MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)");
module_param_named(num_kcq, amdgpu_num_kcq, int, 0444);
/**
+ * DOC: vcnfw_log (int)
+ * Enable vcnfw log output for debugging, the default is disabled.
+ */
+MODULE_PARM_DESC(vcnfw_log, "Enable vcnfw log(0 = disable (default value), 1 = enable)");
+module_param_named(vcnfw_log, amdgpu_vcnfw_log, int, 0444);
+
+/**
+ * DOC: sg_display (int)
+ * Disable S/G (scatter/gather) display (i.e., display from system memory).
+ * This option is only relevant on APUs. Set this option to 0 to disable
+ * S/G display if you experience flickering or other issues under memory
+ * pressure and report the issue.
+ */
+MODULE_PARM_DESC(sg_display, "S/G Display (-1 = auto (default), 0 = disable)");
+module_param_named(sg_display, amdgpu_sg_display, int, 0444);
+
+/**
+ * DOC: umsch_mm (int)
+ * Enable Multi Media User Mode Scheduler. This is a HW scheduling engine for VCN and VPE.
+ * (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(umsch_mm,
+ "Enable Multi Media User Mode Scheduler (0 = disabled (default), 1 = enabled)");
+module_param_named(umsch_mm, amdgpu_umsch_mm, int, 0444);
+
+/**
+ * DOC: umsch_mm_fwlog (int)
+ * Enable umschfw log output for debugging, the default is disabled.
+ */
+MODULE_PARM_DESC(umsch_mm_fwlog, "Enable umschfw log(0 = disable (default value), 1 = enable)");
+module_param_named(umsch_mm_fwlog, amdgpu_umsch_mm_fwlog, int, 0444);
+
+/**
* DOC: smu_pptable_id (int)
* Used to override pptable id. id = 0 use VBIOS pptable.
* id > 0 use the soft pptable with specicfied id.
@@ -893,6 +1019,109 @@ MODULE_PARM_DESC(smu_pptable_id,
"specify pptable id to be used (-1 = auto(default) value, 0 = use pptable from vbios, > 0 = soft pptable id)");
module_param_named(smu_pptable_id, amdgpu_smu_pptable_id, int, 0444);
+/**
+ * DOC: partition_mode (int)
+ * Used to override the default SPX mode.
+ */
+MODULE_PARM_DESC(
+ user_partt_mode,
+ "specify partition mode to be used (-2 = AMDGPU_AUTO_COMPUTE_PARTITION_MODE(default value) \
+ 0 = AMDGPU_SPX_PARTITION_MODE, \
+ 1 = AMDGPU_DPX_PARTITION_MODE, \
+ 2 = AMDGPU_TPX_PARTITION_MODE, \
+ 3 = AMDGPU_QPX_PARTITION_MODE, \
+ 4 = AMDGPU_CPX_PARTITION_MODE)");
+module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444);
+
+
+/**
+ * DOC: enforce_isolation (int)
+ * enforce process isolation between graphics and compute.
+ * (-1 = auto, 0 = disable, 1 = enable, 2 = enable legacy mode, 3 = enable without cleaner shader)
+ */
+module_param_named(enforce_isolation, amdgpu_enforce_isolation, int, 0444);
+MODULE_PARM_DESC(enforce_isolation,
+"enforce process isolation between graphics and compute. (-1 = auto, 0 = disable, 1 = enable, 2 = enable legacy mode, 3 = enable without cleaner shader)");
+
+/**
+ * DOC: modeset (int)
+ * Override nomodeset (1 = override, -1 = auto). The default is -1 (auto).
+ */
+MODULE_PARM_DESC(modeset, "Override nomodeset (1 = enable, -1 = auto)");
+module_param_named(modeset, amdgpu_modeset, int, 0444);
+
+/**
+ * DOC: seamless (int)
+ * Seamless boot will keep the image on the screen during the boot process.
+ */
+MODULE_PARM_DESC(seamless, "Seamless boot (-1 = auto (default), 0 = disable, 1 = enable)");
+module_param_named(seamless, amdgpu_seamless, int, 0444);
+
+/**
+ * DOC: debug_mask (uint)
+ * Debug options for amdgpu, work as a binary mask with the following options:
+ *
+ * - 0x1: Debug VM handling
+ * - 0x2: Enable simulating large-bar capability on non-large bar system. This
+ * limits the VRAM size reported to ROCm applications to the visible
+ * size, usually 256MB.
+ * - 0x4: Disable GPU soft recovery, always do a full reset
+ * - 0x8: Use VRAM for firmware loading
+ * - 0x10: Enable ACA based RAS logging
+ * - 0x20: Enable experimental resets
+ * - 0x40: Disable ring resets
+ * - 0x80: Use VRAM for SMU pool
+ */
+MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default");
+module_param_named_unsafe(debug_mask, amdgpu_debug_mask, uint, 0444);
+
+/**
+ * DOC: agp (int)
+ * Enable the AGP aperture. This provides an aperture in the GPU's internal
+ * address space for direct access to system memory. Note that these accesses
+ * are non-snooped, so they are only used for access to uncached memory.
+ */
+MODULE_PARM_DESC(agp, "AGP (-1 = auto (default), 0 = disable, 1 = enable)");
+module_param_named(agp, amdgpu_agp, int, 0444);
+
+/**
+ * DOC: wbrf (int)
+ * Enable Wifi RFI interference mitigation feature.
+ * Due to electrical and mechanical constraints there may be likely interference of
+ * relatively high-powered harmonics of the (G-)DDR memory clocks with local radio
+ * module frequency bands used by Wifi 6/6e/7. To mitigate the possible RFI interference,
+ * with this feature enabled, PMFW will use either “shadowed P-State” or “P-State” based
+ * on active list of frequencies in-use (to be avoided) as part of initial setting or
+ * P-state transition. However, there may be potential performance impact with this
+ * feature enabled.
+ * (0 = disabled, 1 = enabled, -1 = auto (default setting, will be enabled if supported))
+ */
+MODULE_PARM_DESC(wbrf,
+ "Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)");
+module_param_named(wbrf, amdgpu_wbrf, int, 0444);
+
+/**
+ * DOC: rebar (int)
+ * Allow BAR resizing. Disable this to prevent the driver from attempting
+ * to resize the BAR if the GPU supports it and there is available MMIO space.
+ * Note that this just prevents the driver from resizing the BAR. The BIOS
+ * may have already resized the BAR at boot time.
+ */
+MODULE_PARM_DESC(rebar, "Resizable BAR (-1 = auto (default), 0 = disable, 1 = enable)");
+module_param_named(rebar, amdgpu_rebar, int, 0444);
+
+/**
+ * DOC: user_queue (int)
+ * Enable user queues on systems that support user queues. Possible values:
+ *
+ * - -1 = auto (ASIC specific default)
+ * - 0 = user queues disabled
+ * - 1 = user queues enabled and kernel queues enabled (if supported)
+ * - 2 = user queues enabled and kernel queues disabled
+ */
+MODULE_PARM_DESC(user_queue, "Enable user queues (-1 = auto (default), 0 = disable, 1 = enable, 2 = enable UQs and disable KQs)");
+module_param_named(user_queue, amdgpu_user_queue, int, 0444);
+
/* These devices are not supported by amdgpu.
* They are supported by the mach64, r128, radeon drivers
*/
@@ -1521,10 +1750,91 @@ static const u16 amdgpu_unsupported_pciidlist[] = {
0x99A0,
0x99A2,
0x99A4,
+ /* radeon secondary ids */
+ 0x3171,
+ 0x3e70,
+ 0x4164,
+ 0x4165,
+ 0x4166,
+ 0x4168,
+ 0x4170,
+ 0x4171,
+ 0x4172,
+ 0x4173,
+ 0x496e,
+ 0x4a69,
+ 0x4a6a,
+ 0x4a6b,
+ 0x4a70,
+ 0x4a74,
+ 0x4b69,
+ 0x4b6b,
+ 0x4b6c,
+ 0x4c6e,
+ 0x4e64,
+ 0x4e65,
+ 0x4e66,
+ 0x4e67,
+ 0x4e68,
+ 0x4e69,
+ 0x4e6a,
+ 0x4e71,
+ 0x4f73,
+ 0x5569,
+ 0x556b,
+ 0x556d,
+ 0x556f,
+ 0x5571,
+ 0x5854,
+ 0x5874,
+ 0x5940,
+ 0x5941,
+ 0x5b70,
+ 0x5b72,
+ 0x5b73,
+ 0x5b74,
+ 0x5b75,
+ 0x5d44,
+ 0x5d45,
+ 0x5d6d,
+ 0x5d6f,
+ 0x5d72,
+ 0x5d77,
+ 0x5e6b,
+ 0x5e6d,
+ 0x7120,
+ 0x7124,
+ 0x7129,
+ 0x712e,
+ 0x712f,
+ 0x7162,
+ 0x7163,
+ 0x7166,
+ 0x7167,
+ 0x7172,
+ 0x7173,
+ 0x71a0,
+ 0x71a1,
+ 0x71a3,
+ 0x71a7,
+ 0x71bb,
+ 0x71e0,
+ 0x71e1,
+ 0x71e2,
+ 0x71e6,
+ 0x71e7,
+ 0x71f2,
+ 0x7269,
+ 0x726b,
+ 0x726e,
+ 0x72a0,
+ 0x72a8,
+ 0x72b1,
+ 0x72b3,
+ 0x793f,
};
static const struct pci_device_id pciidlist[] = {
-#ifdef CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
{0x1002, 0x6784, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
{0x1002, 0x6788, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
@@ -1597,8 +1907,6 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x6665, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
{0x1002, 0x6667, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
{0x1002, 0x666F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
-#endif
-#ifdef CONFIG_DRM_AMDGPU_CIK
/* Kaveri */
{0x1002, 0x1304, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x1305, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
@@ -1681,7 +1989,6 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x985D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x985E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x985F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
-#endif
/* topaz */
{0x1002, 0x6900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
{0x1002, 0x6901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
@@ -1824,9 +2131,6 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x73AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
{0x1002, 0x73BF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
- /* Van Gogh */
- {0x1002, 0x163F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VANGOGH|AMD_IS_APU},
-
/* Yellow Carp */
{0x1002, 0x164D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU},
{0x1002, 0x1681, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU},
@@ -1857,19 +2161,26 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x73FF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
/* Aldebaran */
- {0x1002, 0x7408, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
- {0x1002, 0x740C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
- {0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
- {0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x7408, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN},
+ {0x1002, 0x740C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN},
+ {0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN},
+ {0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN},
/* CYAN_SKILLFISH */
+ {0x1002, 0x13DB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
+ {0x1002, 0x13F9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
+ {0x1002, 0x13FA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
+ {0x1002, 0x13FB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
+ {0x1002, 0x13FC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
{0x1002, 0x13FE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
+ {0x1002, 0x143F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
/* BEIGE_GOBY */
{0x1002, 0x7420, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
{0x1002, 0x7421, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
{0x1002, 0x7422, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
{0x1002, 0x7423, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+ {0x1002, 0x7424, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
{0x1002, 0x743F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
{ PCI_DEVICE(0x1002, PCI_ANY_ID),
@@ -1882,13 +2193,185 @@ static const struct pci_device_id pciidlist[] = {
.class_mask = 0xffffff,
.driver_data = CHIP_IP_DISCOVERY },
+ { PCI_DEVICE(0x1002, PCI_ANY_ID),
+ .class = PCI_CLASS_ACCELERATOR_PROCESSING << 8,
+ .class_mask = 0xffffff,
+ .driver_data = CHIP_IP_DISCOVERY },
+
{0, 0, 0}
};
MODULE_DEVICE_TABLE(pci, pciidlist);
+static const struct amdgpu_asic_type_quirk asic_type_quirks[] = {
+ /* differentiate between P10 and P11 asics with the same DID */
+ {0x67FF, 0xE3, CHIP_POLARIS10},
+ {0x67FF, 0xE7, CHIP_POLARIS10},
+ {0x67FF, 0xF3, CHIP_POLARIS10},
+ {0x67FF, 0xF7, CHIP_POLARIS10},
+};
+
static const struct drm_driver amdgpu_kms_driver;
+static void amdgpu_get_secondary_funcs(struct amdgpu_device *adev)
+{
+ struct pci_dev *p = NULL;
+ int i;
+
+ /* 0 - GPU
+ * 1 - audio
+ * 2 - USB
+ * 3 - UCSI
+ */
+ for (i = 1; i < 4; i++) {
+ p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
+ adev->pdev->bus->number, i);
+ if (p) {
+ pm_runtime_get_sync(&p->dev);
+ pm_runtime_put_autosuspend(&p->dev);
+ pci_dev_put(p);
+ }
+ }
+}
+
+static void amdgpu_init_debug_options(struct amdgpu_device *adev)
+{
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_VM) {
+ pr_info("debug: VM handling debug enabled\n");
+ adev->debug_vm = true;
+ }
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_LARGEBAR) {
+ pr_info("debug: enabled simulating large-bar capability on non-large bar system\n");
+ adev->debug_largebar = true;
+ }
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY) {
+ pr_info("debug: soft reset for GPU recovery disabled\n");
+ adev->debug_disable_soft_recovery = true;
+ }
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_USE_VRAM_FW_BUF) {
+ pr_info("debug: place fw in vram for frontdoor loading\n");
+ adev->debug_use_vram_fw_buf = true;
+ }
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_RAS_ACA) {
+ pr_info("debug: enable RAS ACA\n");
+ adev->debug_enable_ras_aca = true;
+ }
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_EXP_RESETS) {
+ pr_info("debug: enable experimental reset features\n");
+ adev->debug_exp_resets = true;
+ }
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_GPU_RING_RESET) {
+ pr_info("debug: ring reset disabled\n");
+ adev->debug_disable_gpu_ring_reset = true;
+ }
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_SMU_POOL) {
+ pr_info("debug: use vram for smu pool\n");
+ adev->pm.smu_debug_mask |= SMU_DEBUG_POOL_USE_VRAM;
+ }
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_VM_USERPTR) {
+ pr_info("debug: VM mode debug for userptr is enabled\n");
+ adev->debug_vm_userptr = true;
+ }
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_RAS_CE_LOG) {
+ pr_info("debug: disable kernel logs of correctable errors\n");
+ adev->debug_disable_ce_logs = true;
+ }
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_CE_CS) {
+ pr_info("debug: allowing command submission to CE engine\n");
+ adev->debug_enable_ce_cs = true;
+ }
+}
+
+static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(asic_type_quirks); i++) {
+ if (pdev->device == asic_type_quirks[i].device &&
+ pdev->revision == asic_type_quirks[i].revision) {
+ flags &= ~AMD_ASIC_MASK;
+ flags |= asic_type_quirks[i].type;
+ break;
+ }
+ }
+
+ return flags;
+}
+
+static bool amdgpu_support_enabled(struct device *dev,
+ const enum amd_asic_type family)
+{
+ const char *gen;
+ const char *param;
+ int module_param = -1;
+ bool radeon_support_built = IS_ENABLED(CONFIG_DRM_RADEON);
+ bool amdgpu_support_built = false;
+ bool support_by_default = false;
+
+ switch (family) {
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ case CHIP_OLAND:
+ case CHIP_HAINAN:
+ gen = "SI";
+ param = "si_support";
+ module_param = amdgpu_si_support;
+ amdgpu_support_built = IS_ENABLED(CONFIG_DRM_AMDGPU_SI);
+ support_by_default = true;
+ break;
+
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ support_by_default = true;
+ fallthrough;
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ gen = "CIK";
+ param = "cik_support";
+ module_param = amdgpu_cik_support;
+ amdgpu_support_built = IS_ENABLED(CONFIG_DRM_AMDGPU_CIK);
+ break;
+
+ default:
+ /* All other chips are supported by amdgpu only */
+ return true;
+ }
+
+ if (!amdgpu_support_built) {
+ dev_info(dev, "amdgpu built without %s support\n", gen);
+ return false;
+ }
+
+ if ((module_param == -1 && (support_by_default || !radeon_support_built)) ||
+ module_param == 1) {
+ if (radeon_support_built)
+ dev_info(dev, "%s support provided by amdgpu.\n"
+ "Use radeon.%s=1 amdgpu.%s=0 to override.\n",
+ gen, param, param);
+
+ return true;
+ }
+
+ if (radeon_support_built)
+ dev_info(dev, "%s support provided by radeon.\n"
+ "Use radeon.%s=0 amdgpu.%s=1 to override.\n",
+ gen, param, param);
+ else if (module_param == 0)
+ dev_info(dev, "%s support disabled by module param\n", gen);
+
+ return false;
+}
+
static int amdgpu_pci_probe(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
@@ -1898,19 +2381,23 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
int ret, retry = 0, i;
bool supports_atomic = false;
+ if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA ||
+ (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) {
+ if (drm_firmware_drivers_only() && amdgpu_modeset == -1)
+ return -EINVAL;
+ }
+
/* skip devices which are owned by radeon */
for (i = 0; i < ARRAY_SIZE(amdgpu_unsupported_pciidlist); i++) {
if (amdgpu_unsupported_pciidlist[i] == pdev->device)
return -ENODEV;
}
- if (flags == 0) {
- DRM_INFO("Unsupported asic. Remove me when IP discovery init is in place.\n");
- return -ENODEV;
- }
+ if (amdgpu_aspm == -1 && !pcie_aspm_enabled(pdev))
+ amdgpu_aspm = 0;
if (amdgpu_virtual_display ||
- amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK))
+ amdgpu_device_asic_has_dc_support(pdev, flags & AMD_ASIC_MASK))
supports_atomic = true;
if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) {
@@ -1919,6 +2406,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
return -ENODEV;
}
+ flags = amdgpu_fix_asic_type(pdev, flags);
+
/* Due to hardware bugs, S/G Display on raven requires a 1:1 IOMMU mapping,
* however, SME requires an indirect IOMMU mapping because the encryption
* bit is beyond the DMA mask of the chip.
@@ -1930,45 +2419,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
return -ENOTSUPP;
}
-#ifdef CONFIG_DRM_AMDGPU_SI
- if (!amdgpu_si_support) {
- switch (flags & AMD_ASIC_MASK) {
- case CHIP_TAHITI:
- case CHIP_PITCAIRN:
- case CHIP_VERDE:
- case CHIP_OLAND:
- case CHIP_HAINAN:
- dev_info(&pdev->dev,
- "SI support provided by radeon.\n");
- dev_info(&pdev->dev,
- "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n"
- );
- return -ENODEV;
- }
- }
-#endif
-#ifdef CONFIG_DRM_AMDGPU_CIK
- if (!amdgpu_cik_support) {
- switch (flags & AMD_ASIC_MASK) {
- case CHIP_KAVERI:
- case CHIP_BONAIRE:
- case CHIP_HAWAII:
- case CHIP_KABINI:
- case CHIP_MULLINS:
- dev_info(&pdev->dev,
- "CIK support provided by radeon.\n");
- dev_info(&pdev->dev,
- "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n"
- );
- return -ENODEV;
- }
- }
-#endif
-
- /* Get rid of things like offb */
- ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &amdgpu_kms_driver);
- if (ret)
- return ret;
+ if (!amdgpu_support_enabled(&pdev->dev, flags & AMD_ASIC_MASK))
+ return -ENODEV;
adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, typeof(*adev), ddev);
if (IS_ERR(adev))
@@ -1987,12 +2439,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
pci_set_drvdata(pdev, ddev);
- ret = amdgpu_driver_load_kms(adev, ent->driver_data);
+ amdgpu_init_debug_options(adev);
+
+ ret = amdgpu_driver_load_kms(adev, flags);
if (ret)
goto err_pci;
retry_init:
- ret = drm_dev_register(ddev, ent->driver_data);
+ ret = drm_dev_register(ddev, flags);
if (ret == -EAGAIN && ++retry <= 3) {
DRM_INFO("retry init %d\n", retry);
/* Don't request EX mode too frequently which is attacking */
@@ -2002,10 +2456,78 @@ retry_init:
goto err_pci;
}
+ ret = amdgpu_xcp_dev_register(adev, ent);
+ if (ret)
+ goto err_pci;
+
+ ret = amdgpu_amdkfd_drm_client_create(adev);
+ if (ret)
+ goto err_pci;
+
+ /*
+ * 1. don't init fbdev on hw without DCE
+ * 2. don't init fbdev if there are no connectors
+ */
+ if (adev->mode_info.mode_config_initialized &&
+ !list_empty(&adev_to_drm(adev)->mode_config.connector_list)) {
+ const struct drm_format_info *format;
+
+ /* select 8 bpp console on low vram cards */
+ if (adev->gmc.real_vram_size <= (32*1024*1024))
+ format = drm_format_info(DRM_FORMAT_C8);
+ else
+ format = NULL;
+
+ drm_client_setup(adev_to_drm(adev), format);
+ }
+
ret = amdgpu_debugfs_init(adev);
if (ret)
DRM_ERROR("Creating debugfs files failed (%d).\n", ret);
+ if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) {
+ /* only need to skip on ATPX */
+ if (amdgpu_device_supports_px(adev))
+ dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
+ /* we want direct complete for BOCO */
+ if (amdgpu_device_supports_boco(adev))
+ dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_SMART_PREPARE |
+ DPM_FLAG_SMART_SUSPEND |
+ DPM_FLAG_MAY_SKIP_RESUME);
+ pm_runtime_use_autosuspend(ddev->dev);
+ pm_runtime_set_autosuspend_delay(ddev->dev, 5000);
+
+ pm_runtime_allow(ddev->dev);
+
+ pm_runtime_put_autosuspend(ddev->dev);
+
+ pci_wake_from_d3(pdev, TRUE);
+
+ /*
+ * For runpm implemented via BACO, PMFW will handle the
+ * timing for BACO in and out:
+ * - put ASIC into BACO state only when both video and
+ * audio functions are in D3 state.
+ * - pull ASIC out of BACO state when either video or
+ * audio function is in D0 state.
+ * Also, at startup, PMFW assumes both functions are in
+ * D0 state.
+ *
+ * So if snd driver was loaded prior to amdgpu driver
+ * and audio function was put into D3 state, there will
+ * be no PMFW-aware D-state transition(D0->D3) on runpm
+ * suspend. Thus the BACO will be not correctly kicked in.
+ *
+ * Via amdgpu_get_secondary_funcs(), the audio dev is put
+ * into D0 state. Then there will be a PMFW-aware D-state
+ * transition(D0->D3) on runpm suspend.
+ */
+ if (amdgpu_device_supports_baco(adev) &&
+ !(adev->flags & AMD_IS_APU) &&
+ adev->asic_type >= CHIP_NAVI10)
+ amdgpu_get_secondary_funcs(adev);
+ }
+
return 0;
err_pci:
@@ -2017,8 +2539,18 @@ static void
amdgpu_pci_remove(struct pci_dev *pdev)
{
struct drm_device *dev = pci_get_drvdata(pdev);
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ amdgpu_ras_eeprom_check_and_recover(adev);
+ amdgpu_xcp_dev_unplug(adev);
+ amdgpu_gmc_prepare_nps_mode_change(adev);
drm_dev_unplug(dev);
+
+ if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) {
+ pm_runtime_get_sync(dev->dev);
+ pm_runtime_forbid(dev->dev);
+ }
+
amdgpu_driver_unload_kms(dev);
/*
@@ -2039,6 +2571,10 @@ amdgpu_pci_shutdown(struct pci_dev *pdev)
if (amdgpu_ras_intr_triggered())
return;
+ /* device maybe not resumed here, return immediately in this case */
+ if (adev->in_s4 && adev->in_suspend)
+ return;
+
/* if we are running in a VM, make sure the device
* torn down properly on reboot/shutdown.
* unfortunately we can't detect certain
@@ -2046,115 +2582,85 @@ amdgpu_pci_shutdown(struct pci_dev *pdev)
*/
if (!amdgpu_passthrough(adev))
adev->mp1_state = PP_MP1_STATE_UNLOAD;
- amdgpu_device_ip_suspend(adev);
+ amdgpu_device_prepare(dev);
+ amdgpu_device_suspend(dev, true);
adev->mp1_state = PP_MP1_STATE_NONE;
}
-/**
- * amdgpu_drv_delayed_reset_work_handler - work handler for reset
- *
- * @work: work_struct.
- */
-static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work)
-{
- struct list_head device_list;
- struct amdgpu_device *adev;
- int i, r;
- struct amdgpu_reset_context reset_context;
-
- memset(&reset_context, 0, sizeof(reset_context));
-
- mutex_lock(&mgpu_info.mutex);
- if (mgpu_info.pending_reset == true) {
- mutex_unlock(&mgpu_info.mutex);
- return;
- }
- mgpu_info.pending_reset = true;
- mutex_unlock(&mgpu_info.mutex);
-
- /* Use a common context, just need to make sure full reset is done */
- reset_context.method = AMD_RESET_METHOD_NONE;
- set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
-
- for (i = 0; i < mgpu_info.num_dgpu; i++) {
- adev = mgpu_info.gpu_ins[i].adev;
- reset_context.reset_req_dev = adev;
- r = amdgpu_device_pre_asic_reset(adev, &reset_context);
- if (r) {
- dev_err(adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
- r, adev_to_drm(adev)->unique);
- }
- if (!queue_work(system_unbound_wq, &adev->xgmi_reset_work))
- r = -EALREADY;
- }
- for (i = 0; i < mgpu_info.num_dgpu; i++) {
- adev = mgpu_info.gpu_ins[i].adev;
- flush_work(&adev->xgmi_reset_work);
- adev->gmc.xgmi.pending_reset = false;
- }
-
- /* reset function will rebuild the xgmi hive info , clear it now */
- for (i = 0; i < mgpu_info.num_dgpu; i++)
- amdgpu_xgmi_remove_device(mgpu_info.gpu_ins[i].adev);
-
- INIT_LIST_HEAD(&device_list);
-
- for (i = 0; i < mgpu_info.num_dgpu; i++)
- list_add_tail(&mgpu_info.gpu_ins[i].adev->reset_list, &device_list);
-
- /* unregister the GPU first, reset function will add them back */
- list_for_each_entry(adev, &device_list, reset_list)
- amdgpu_unregister_gpu_instance(adev);
-
- /* Use a common context, just need to make sure full reset is done */
- set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
- r = amdgpu_do_asic_reset(&device_list, &reset_context);
-
- if (r) {
- DRM_ERROR("reinit gpus failure");
- return;
- }
- for (i = 0; i < mgpu_info.num_dgpu; i++) {
- adev = mgpu_info.gpu_ins[i].adev;
- if (!adev->kfd.init_complete)
- amdgpu_amdkfd_device_init(adev);
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
- }
- return;
-}
-
static int amdgpu_pmops_prepare(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+ /* device maybe not resumed here, return immediately in this case */
+ if (adev->in_s4 && adev->in_suspend)
+ return 0;
/* Return a positive number here so
* DPM_FLAG_SMART_SUSPEND works properly
*/
- if (amdgpu_device_supports_boco(drm_dev))
- return pm_runtime_suspended(dev) &&
- pm_suspend_via_firmware();
+ if (amdgpu_device_supports_boco(adev) && pm_runtime_suspended(dev))
+ return 1;
- return 0;
+ /* if we will not support s3 or s2i for the device
+ * then skip suspend
+ */
+ if (!amdgpu_acpi_is_s0ix_active(adev) &&
+ !amdgpu_acpi_is_s3_active(adev))
+ return 1;
+
+ return amdgpu_device_prepare(drm_dev);
}
static void amdgpu_pmops_complete(struct device *dev)
{
- /* nothing to do */
+ amdgpu_device_complete(dev_get_drvdata(dev));
}
static int amdgpu_pmops_suspend(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(drm_dev);
- int r;
if (amdgpu_acpi_is_s0ix_active(adev))
adev->in_s0ix = true;
- adev->in_s3 = true;
- r = amdgpu_device_suspend(drm_dev, true);
- adev->in_s3 = false;
+ else if (amdgpu_acpi_is_s3_active(adev))
+ adev->in_s3 = true;
+ if (!adev->in_s0ix && !adev->in_s3) {
+#if IS_ENABLED(CONFIG_SUSPEND)
+ /* don't allow going deep first time followed by s2idle the next time */
+ if (adev->last_suspend_state != PM_SUSPEND_ON &&
+ adev->last_suspend_state != pm_suspend_target_state) {
+ drm_err_once(drm_dev, "Unsupported suspend state %d\n",
+ pm_suspend_target_state);
+ return -EINVAL;
+ }
+#endif
+ return 0;
+ }
- return r;
+#if IS_ENABLED(CONFIG_SUSPEND)
+ /* cache the state last used for suspend */
+ adev->last_suspend_state = pm_suspend_target_state;
+#endif
+
+ return amdgpu_device_suspend(drm_dev, true);
+}
+
+static int amdgpu_pmops_suspend_noirq(struct device *dev)
+{
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+ int r;
+
+ if (amdgpu_acpi_should_gpu_reset(adev)) {
+ amdgpu_device_lock_reset_domain(adev->reset_domain);
+ r = amdgpu_asic_reset(adev);
+ amdgpu_device_unlock_reset_domain(adev->reset_domain);
+ return r;
+ }
+
+ return 0;
}
static int amdgpu_pmops_resume(struct device *dev)
@@ -2163,6 +2669,9 @@ static int amdgpu_pmops_resume(struct device *dev)
struct amdgpu_device *adev = drm_to_adev(drm_dev);
int r;
+ if (!adev->in_s0ix && !adev->in_s3)
+ return 0;
+
/* Avoids registers access if device is physically gone */
if (!pci_device_is_present(adev->pdev))
adev->no_hw_access = true;
@@ -2170,6 +2679,8 @@ static int amdgpu_pmops_resume(struct device *dev)
r = amdgpu_device_resume(drm_dev, true);
if (amdgpu_acpi_is_s0ix_active(adev))
adev->in_s0ix = false;
+ else
+ adev->in_s3 = false;
return r;
}
@@ -2179,24 +2690,34 @@ static int amdgpu_pmops_freeze(struct device *dev)
struct amdgpu_device *adev = drm_to_adev(drm_dev);
int r;
- adev->in_s4 = true;
r = amdgpu_device_suspend(drm_dev, true);
- adev->in_s4 = false;
if (r)
return r;
- return amdgpu_asic_reset(adev);
+
+ if (amdgpu_acpi_should_gpu_reset(adev))
+ return amdgpu_asic_reset(adev);
+ return 0;
}
static int amdgpu_pmops_thaw(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
+ /* do not resume device if it's normal hibernation */
+ if (!pm_hibernate_is_recovering() && !pm_hibernation_mode_is_suspend())
+ return 0;
+
return amdgpu_device_resume(drm_dev, true);
}
static int amdgpu_pmops_poweroff(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+ /* device maybe not resumed here, return immediately in this case */
+ if (adev->in_s4 && adev->in_suspend)
+ return 0;
return amdgpu_device_suspend(drm_dev, true);
}
@@ -2208,6 +2729,82 @@ static int amdgpu_pmops_restore(struct device *dev)
return amdgpu_device_resume(drm_dev, true);
}
+static int amdgpu_runtime_idle_check_display(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct drm_device *drm_dev = pci_get_drvdata(pdev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+ if (adev->mode_info.num_crtc) {
+ struct drm_connector *list_connector;
+ struct drm_connector_list_iter iter;
+ int ret = 0;
+
+ if (amdgpu_runtime_pm != -2) {
+ /* XXX: Return busy if any displays are connected to avoid
+ * possible display wakeups after runtime resume due to
+ * hotplug events in case any displays were connected while
+ * the GPU was in suspend. Remove this once that is fixed.
+ */
+ mutex_lock(&drm_dev->mode_config.mutex);
+ drm_connector_list_iter_begin(drm_dev, &iter);
+ drm_for_each_connector_iter(list_connector, &iter) {
+ if (list_connector->status == connector_status_connected) {
+ ret = -EBUSY;
+ break;
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+ mutex_unlock(&drm_dev->mode_config.mutex);
+
+ if (ret)
+ return ret;
+ }
+
+ if (adev->dc_enabled) {
+ struct drm_crtc *crtc;
+
+ drm_for_each_crtc(crtc, drm_dev) {
+ drm_modeset_lock(&crtc->mutex, NULL);
+ if (crtc->state->active)
+ ret = -EBUSY;
+ drm_modeset_unlock(&crtc->mutex);
+ if (ret < 0)
+ break;
+ }
+ } else {
+ mutex_lock(&drm_dev->mode_config.mutex);
+ drm_modeset_lock(&drm_dev->mode_config.connection_mutex, NULL);
+
+ drm_connector_list_iter_begin(drm_dev, &iter);
+ drm_for_each_connector_iter(list_connector, &iter) {
+ if (list_connector->dpms == DRM_MODE_DPMS_ON) {
+ ret = -EBUSY;
+ break;
+ }
+ }
+
+ drm_connector_list_iter_end(&iter);
+
+ drm_modeset_unlock(&drm_dev->mode_config.connection_mutex);
+ mutex_unlock(&drm_dev->mode_config.mutex);
+ }
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int amdgpu_runtime_idle_check_userq(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct drm_device *drm_dev = pci_get_drvdata(pdev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+ return xa_empty(&adev->userq_doorbell_xa) ? 0 : -EBUSY;
+}
+
static int amdgpu_pmops_runtime_suspend(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
@@ -2215,14 +2812,22 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
struct amdgpu_device *adev = drm_to_adev(drm_dev);
int ret, i;
- if (!adev->runpm) {
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) {
pm_runtime_forbid(dev);
return -EBUSY;
}
+ ret = amdgpu_runtime_idle_check_display(dev);
+ if (ret)
+ return ret;
+ ret = amdgpu_runtime_idle_check_userq(dev);
+ if (ret)
+ return ret;
+
/* wait for all rings to drain before suspending */
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
+
if (ring && ring->sched.ready) {
ret = amdgpu_fence_wait_empty(ring);
if (ret)
@@ -2231,16 +2836,34 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
}
adev->in_runpm = true;
- if (amdgpu_device_supports_px(drm_dev))
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX)
drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
+ /*
+ * By setting mp1_state as PP_MP1_STATE_UNLOAD, MP1 will do some
+ * proper cleanups and put itself into a state ready for PNP. That
+ * can address some random resuming failure observed on BOCO capable
+ * platforms.
+ * TODO: this may be also needed for PX capable platform.
+ */
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO)
+ adev->mp1_state = PP_MP1_STATE_UNLOAD;
+
+ ret = amdgpu_device_prepare(drm_dev);
+ if (ret)
+ return ret;
ret = amdgpu_device_suspend(drm_dev, false);
if (ret) {
adev->in_runpm = false;
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO)
+ adev->mp1_state = PP_MP1_STATE_NONE;
return ret;
}
- if (amdgpu_device_supports_px(drm_dev)) {
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO)
+ adev->mp1_state = PP_MP1_STATE_NONE;
+
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) {
/* Only need to handle PCI state in the driver for ATPX
* PCI core handles it for _PR3.
*/
@@ -2249,12 +2872,15 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
pci_ignore_hotplug(pdev);
pci_set_power_state(pdev, PCI_D3cold);
drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
- } else if (amdgpu_device_supports_boco(drm_dev)) {
+ } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) {
/* nothing to do */
- } else if (amdgpu_device_supports_baco(drm_dev)) {
- amdgpu_device_baco_enter(drm_dev);
+ } else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) ||
+ (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) {
+ amdgpu_device_baco_enter(adev);
}
+ dev_dbg(&pdev->dev, "asic/device is runtime suspended\n");
+
return 0;
}
@@ -2265,14 +2891,14 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
struct amdgpu_device *adev = drm_to_adev(drm_dev);
int ret;
- if (!adev->runpm)
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
return -EINVAL;
/* Avoids registers access if device is physically gone */
if (!pci_device_is_present(adev->pdev))
adev->no_hw_access = true;
- if (amdgpu_device_supports_px(drm_dev)) {
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) {
drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
/* Only need to handle PCI state in the driver for ATPX
@@ -2284,19 +2910,23 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
if (ret)
return ret;
pci_set_master(pdev);
- } else if (amdgpu_device_supports_boco(drm_dev)) {
+ } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) {
/* Only need to handle PCI state in the driver for ATPX
* PCI core handles it for _PR3.
*/
pci_set_master(pdev);
- } else if (amdgpu_device_supports_baco(drm_dev)) {
- amdgpu_device_baco_exit(drm_dev);
+ } else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) ||
+ (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) {
+ amdgpu_device_baco_exit(adev);
}
ret = amdgpu_device_resume(drm_dev, false);
- if (ret)
+ if (ret) {
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX)
+ pci_disable_device(pdev);
return ret;
+ }
- if (amdgpu_device_supports_px(drm_dev))
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX)
drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
adev->in_runpm = false;
return 0;
@@ -2306,53 +2936,38 @@ static int amdgpu_pmops_runtime_idle(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(drm_dev);
- /* we don't want the main rpm_idle to call suspend - we want to autosuspend */
- int ret = 1;
+ int ret;
- if (!adev->runpm) {
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) {
pm_runtime_forbid(dev);
return -EBUSY;
}
- if (amdgpu_device_has_dc_support(adev)) {
- struct drm_crtc *crtc;
-
- drm_for_each_crtc(crtc, drm_dev) {
- drm_modeset_lock(&crtc->mutex, NULL);
- if (crtc->state->active)
- ret = -EBUSY;
- drm_modeset_unlock(&crtc->mutex);
- if (ret < 0)
- break;
- }
-
- } else {
- struct drm_connector *list_connector;
- struct drm_connector_list_iter iter;
-
- mutex_lock(&drm_dev->mode_config.mutex);
- drm_modeset_lock(&drm_dev->mode_config.connection_mutex, NULL);
-
- drm_connector_list_iter_begin(drm_dev, &iter);
- drm_for_each_connector_iter(list_connector, &iter) {
- if (list_connector->dpms == DRM_MODE_DPMS_ON) {
- ret = -EBUSY;
- break;
- }
- }
+ ret = amdgpu_runtime_idle_check_display(dev);
+ if (ret)
+ goto done;
- drm_connector_list_iter_end(&iter);
+ ret = amdgpu_runtime_idle_check_userq(dev);
+done:
+ pm_runtime_autosuspend(dev);
+ return ret;
+}
- drm_modeset_unlock(&drm_dev->mode_config.connection_mutex);
- mutex_unlock(&drm_dev->mode_config.mutex);
+static int amdgpu_drm_release(struct inode *inode, struct file *filp)
+{
+ struct drm_file *file_priv = filp->private_data;
+ struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
+ struct drm_device *dev = file_priv->minor->dev;
+ int idx;
+
+ if (fpriv && drm_dev_enter(dev, &idx)) {
+ fpriv->evf_mgr.fd_closing = true;
+ amdgpu_eviction_fence_destroy(&fpriv->evf_mgr);
+ amdgpu_userq_mgr_fini(&fpriv->userq_mgr);
+ drm_dev_exit(idx);
}
- if (ret == -EBUSY)
- DRM_DEBUG_DRIVER("failing to power off - crtc active\n");
-
- pm_runtime_mark_last_busy(dev);
- pm_runtime_autosuspend(dev);
- return ret;
+ return drm_release(inode, filp);
}
long amdgpu_drm_ioctl(struct file *filp,
@@ -2361,6 +2976,7 @@ long amdgpu_drm_ioctl(struct file *filp,
struct drm_file *file_priv = filp->private_data;
struct drm_device *dev;
long ret;
+
dev = file_priv->minor->dev;
ret = pm_runtime_get_sync(dev->dev);
if (ret < 0)
@@ -2368,21 +2984,21 @@ long amdgpu_drm_ioctl(struct file *filp,
ret = drm_ioctl(filp, cmd, arg);
- pm_runtime_mark_last_busy(dev->dev);
out:
pm_runtime_put_autosuspend(dev->dev);
return ret;
}
static const struct dev_pm_ops amdgpu_pm_ops = {
- .prepare = amdgpu_pmops_prepare,
- .complete = amdgpu_pmops_complete,
- .suspend = amdgpu_pmops_suspend,
- .resume = amdgpu_pmops_resume,
- .freeze = amdgpu_pmops_freeze,
- .thaw = amdgpu_pmops_thaw,
- .poweroff = amdgpu_pmops_poweroff,
- .restore = amdgpu_pmops_restore,
+ .prepare = pm_sleep_ptr(amdgpu_pmops_prepare),
+ .complete = pm_sleep_ptr(amdgpu_pmops_complete),
+ .suspend = pm_sleep_ptr(amdgpu_pmops_suspend),
+ .suspend_noirq = pm_sleep_ptr(amdgpu_pmops_suspend_noirq),
+ .resume = pm_sleep_ptr(amdgpu_pmops_resume),
+ .freeze = pm_sleep_ptr(amdgpu_pmops_freeze),
+ .thaw = pm_sleep_ptr(amdgpu_pmops_thaw),
+ .poweroff = pm_sleep_ptr(amdgpu_pmops_poweroff),
+ .restore = pm_sleep_ptr(amdgpu_pmops_restore),
.runtime_suspend = amdgpu_pmops_runtime_suspend,
.runtime_resume = amdgpu_pmops_runtime_resume,
.runtime_idle = amdgpu_pmops_runtime_idle,
@@ -2404,7 +3020,7 @@ static const struct file_operations amdgpu_driver_kms_fops = {
.owner = THIS_MODULE,
.open = drm_open,
.flush = amdgpu_flush,
- .release = drm_release,
+ .release = amdgpu_drm_release,
.unlocked_ioctl = amdgpu_drm_ioctl,
.mmap = drm_gem_mmap,
.poll = drm_poll,
@@ -2413,8 +3029,9 @@ static const struct file_operations amdgpu_driver_kms_fops = {
.compat_ioctl = amdgpu_kms_compat_ioctl,
#endif
#ifdef CONFIG_PROC_FS
- .show_fdinfo = amdgpu_show_fdinfo
+ .show_fdinfo = drm_show_fdinfo,
#endif
+ .fop_flags = FOP_UNSIGNED_OFFSET,
};
int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
@@ -2424,9 +3041,8 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
if (!filp)
return -EINVAL;
- if (filp->f_op != &amdgpu_driver_kms_fops) {
+ if (filp->f_op != &amdgpu_driver_kms_fops)
return -EINVAL;
- }
file = filp->private_data;
*fpriv = file->driver_priv;
@@ -2451,6 +3067,10 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_GEM_LIST_HANDLES, amdgpu_gem_list_handles_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
};
static const struct drm_driver amdgpu_kms_driver = {
@@ -2461,22 +3081,44 @@ static const struct drm_driver amdgpu_kms_driver = {
DRIVER_SYNCOBJ_TIMELINE,
.open = amdgpu_driver_open_kms,
.postclose = amdgpu_driver_postclose_kms,
- .lastclose = amdgpu_driver_lastclose_kms,
.ioctls = amdgpu_ioctls_kms,
.num_ioctls = ARRAY_SIZE(amdgpu_ioctls_kms),
.dumb_create = amdgpu_mode_dumb_create,
.dumb_map_offset = amdgpu_mode_dumb_mmap,
+ DRM_FBDEV_TTM_DRIVER_OPS,
+ .fops = &amdgpu_driver_kms_fops,
+ .release = &amdgpu_driver_release_kms,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = amdgpu_show_fdinfo,
+#endif
+
+ .gem_prime_import = amdgpu_gem_prime_import,
+
+ .name = DRIVER_NAME,
+ .desc = DRIVER_DESC,
+ .major = KMS_DRIVER_MAJOR,
+ .minor = KMS_DRIVER_MINOR,
+ .patchlevel = KMS_DRIVER_PATCHLEVEL,
+};
+
+const struct drm_driver amdgpu_partition_driver = {
+ .driver_features =
+ DRIVER_GEM | DRIVER_RENDER | DRIVER_SYNCOBJ |
+ DRIVER_SYNCOBJ_TIMELINE,
+ .open = amdgpu_driver_open_kms,
+ .postclose = amdgpu_driver_postclose_kms,
+ .ioctls = amdgpu_ioctls_kms,
+ .num_ioctls = ARRAY_SIZE(amdgpu_ioctls_kms),
+ .dumb_create = amdgpu_mode_dumb_create,
+ .dumb_map_offset = amdgpu_mode_dumb_mmap,
+ DRM_FBDEV_TTM_DRIVER_OPS,
.fops = &amdgpu_driver_kms_fops,
.release = &amdgpu_driver_release_kms,
- .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
- .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
.gem_prime_import = amdgpu_gem_prime_import,
- .gem_prime_mmap = drm_gem_prime_mmap,
.name = DRIVER_NAME,
.desc = DRIVER_DESC,
- .date = DRIVER_DATE,
.major = KMS_DRIVER_MAJOR,
.minor = KMS_DRIVER_MINOR,
.patchlevel = KMS_DRIVER_PATCHLEVEL,
@@ -2489,25 +3131,20 @@ static struct pci_error_handlers amdgpu_pci_err_handler = {
.resume = amdgpu_pci_resume,
};
-extern const struct attribute_group amdgpu_vram_mgr_attr_group;
-extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
-extern const struct attribute_group amdgpu_vbios_version_attr_group;
-
static const struct attribute_group *amdgpu_sysfs_groups[] = {
&amdgpu_vram_mgr_attr_group,
&amdgpu_gtt_mgr_attr_group,
- &amdgpu_vbios_version_attr_group,
+ &amdgpu_flash_attr_group,
NULL,
};
-
static struct pci_driver amdgpu_kms_pci_driver = {
.name = DRIVER_NAME,
.id_table = pciidlist,
.probe = amdgpu_pci_probe,
.remove = amdgpu_pci_remove,
.shutdown = amdgpu_pci_shutdown,
- .driver.pm = &amdgpu_pm_ops,
+ .driver.pm = pm_ptr(&amdgpu_pm_ops),
.err_handler = &amdgpu_pci_err_handler,
.dev_groups = amdgpu_sysfs_groups,
};
@@ -2516,16 +3153,11 @@ static int __init amdgpu_init(void)
{
int r;
- if (vgacon_text_force()) {
- DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n");
- return -EINVAL;
- }
-
r = amdgpu_sync_init();
if (r)
goto error_sync;
- r = amdgpu_fence_slab_init();
+ r = amdgpu_userq_fence_slab_init();
if (r)
goto error_fence;
@@ -2536,6 +3168,12 @@ static int __init amdgpu_init(void)
/* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */
amdgpu_amdkfd_init();
+ if (amdgpu_pp_feature_mask & PP_OVERDRIVE_MASK) {
+ add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
+ pr_crit("Overdrive is enabled, please disable it before "
+ "reporting any bugs unrelated to overdrive.\n");
+ }
+
/* let modprobe override vga console setting */
return pci_register_driver(&amdgpu_kms_pci_driver);
@@ -2551,9 +3189,11 @@ static void __exit amdgpu_exit(void)
amdgpu_amdkfd_fini();
pci_unregister_driver(&amdgpu_kms_pci_driver);
amdgpu_unregister_atpx_handler();
+ amdgpu_acpi_release();
amdgpu_sync_fini();
- amdgpu_fence_slab_fini();
+ amdgpu_userq_fence_slab_fini();
mmu_notifier_synchronize();
+ amdgpu_xcp_drv_release();
}
module_init(amdgpu_init);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h
index e3a4f7048042..2d86cc6f7f4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h
@@ -40,9 +40,13 @@
#define DRIVER_NAME "amdgpu"
#define DRIVER_DESC "AMD GPU"
-#define DRIVER_DATE "20150101"
+
+extern const struct drm_driver amdgpu_partition_driver;
long amdgpu_drm_ioctl(struct file *filp,
unsigned int cmd, unsigned long arg);
+long amdgpu_kms_compat_ioctl(struct file *filp,
+ unsigned int cmd, unsigned long arg);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c
index 4d9eb0137f8c..8cd69836dd99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c
@@ -79,16 +79,18 @@
* That is, for an I2C EEPROM driver everything is controlled by
* the "eeprom_addr".
*
+ * See also top of amdgpu_ras_eeprom.c.
+ *
* P.S. If you need to write, lock and read the Identification Page,
* (M24M02-DR device only, which we do not use), change the "7" to
* "0xF" in the macro below, and let the client set bit 20 to 1 in
* "eeprom_addr", and set A10 to 0 to write into it, and A10 and A1 to
* 1 to lock it permanently.
*/
-#define MAKE_I2C_ADDR(_aa) ((0xA << 3) | (((_aa) >> 16) & 7))
+#define MAKE_I2C_ADDR(_aa) ((0xA << 3) | (((_aa) >> 16) & 0xF))
static int __amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr,
- u8 *eeprom_buf, u16 buf_size, bool read)
+ u8 *eeprom_buf, u32 buf_size, bool read)
{
u8 eeprom_offset_buf[EEPROM_OFFSET_SIZE];
struct i2c_msg msgs[] = {
@@ -131,15 +133,15 @@ static int __amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr,
* cycle begins. This is implied for the
* "i2c_transfer()" abstraction.
*/
- len = min(EEPROM_PAGE_SIZE - (eeprom_addr &
- EEPROM_PAGE_MASK),
- (u32)buf_size);
+ len = min(EEPROM_PAGE_SIZE - (eeprom_addr & EEPROM_PAGE_MASK),
+ buf_size);
} else {
/* Reading from the EEPROM has no limitation
* on the number of bytes read from the EEPROM
* device--they are simply sequenced out.
+ * Keep in mind that i2c_msg.len is u16 type.
*/
- len = buf_size;
+ len = min(U16_MAX, buf_size);
}
msgs[1].len = len;
msgs[1].buf = eeprom_buf;
@@ -177,10 +179,12 @@ static int __amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr,
* Returns the number of bytes read/written; -errno on error.
*/
static int amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr,
- u8 *eeprom_buf, u16 buf_size, bool read)
+ u8 *eeprom_buf, u32 buf_size, bool read)
{
const struct i2c_adapter_quirks *quirks = i2c_adap->quirks;
u16 limit;
+ u16 ps; /* Partial size */
+ int res = 0, r;
if (!quirks)
limit = 0;
@@ -196,35 +200,32 @@ static int amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr,
dev_err_ratelimited(&i2c_adap->dev,
"maddr:0x%04X size:0x%02X:quirk max_%s_len must be > %d",
eeprom_addr, buf_size,
- read ? "read" : "write", EEPROM_OFFSET_SIZE);
+ str_read_write(read), EEPROM_OFFSET_SIZE);
return -EINVAL;
- } else {
- u16 ps; /* Partial size */
- int res = 0, r;
-
- /* The "limit" includes all data bytes sent/received,
- * which would include the EEPROM_OFFSET_SIZE bytes.
- * Account for them here.
- */
- limit -= EEPROM_OFFSET_SIZE;
- for ( ; buf_size > 0;
- buf_size -= ps, eeprom_addr += ps, eeprom_buf += ps) {
- ps = min(limit, buf_size);
-
- r = __amdgpu_eeprom_xfer(i2c_adap, eeprom_addr,
- eeprom_buf, ps, read);
- if (r < 0)
- return r;
- res += r;
- }
+ }
- return res;
+ /* The "limit" includes all data bytes sent/received,
+ * which would include the EEPROM_OFFSET_SIZE bytes.
+ * Account for them here.
+ */
+ limit -= EEPROM_OFFSET_SIZE;
+ for ( ; buf_size > 0;
+ buf_size -= ps, eeprom_addr += ps, eeprom_buf += ps) {
+ ps = min(limit, buf_size);
+
+ r = __amdgpu_eeprom_xfer(i2c_adap, eeprom_addr,
+ eeprom_buf, ps, read);
+ if (r < 0)
+ return r;
+ res += r;
}
+
+ return res;
}
int amdgpu_eeprom_read(struct i2c_adapter *i2c_adap,
u32 eeprom_addr, u8 *eeprom_buf,
- u16 bytes)
+ u32 bytes)
{
return amdgpu_eeprom_xfer(i2c_adap, eeprom_addr, eeprom_buf, bytes,
true);
@@ -232,7 +233,7 @@ int amdgpu_eeprom_read(struct i2c_adapter *i2c_adap,
int amdgpu_eeprom_write(struct i2c_adapter *i2c_adap,
u32 eeprom_addr, u8 *eeprom_buf,
- u16 bytes)
+ u32 bytes)
{
return amdgpu_eeprom_xfer(i2c_adap, eeprom_addr, eeprom_buf, bytes,
false);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h
index 6935adb2be1f..8083b8253ef4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h
@@ -28,10 +28,10 @@
int amdgpu_eeprom_read(struct i2c_adapter *i2c_adap,
u32 eeprom_addr, u8 *eeprom_buf,
- u16 bytes);
+ u32 bytes);
int amdgpu_eeprom_write(struct i2c_adapter *i2c_adap,
u32 eeprom_addr, u8 *eeprom_buf,
- u16 bytes);
+ u32 bytes);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
index af4ef84e27a7..3aaeed2d3562 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
@@ -24,7 +24,6 @@
* Alex Deucher
*/
-#include <drm/drm_crtc_helper.h>
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "amdgpu_connectors.h"
@@ -71,6 +70,7 @@ void amdgpu_encoder_set_active_device(struct drm_encoder *encoder)
drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
amdgpu_encoder->active_device = amdgpu_encoder->devices & amdgpu_connector->devices;
DRM_DEBUG_KMS("setting active device to %08x from %08x %08x for encoder %d\n",
amdgpu_encoder->active_device, amdgpu_encoder->devices,
@@ -166,12 +166,12 @@ void amdgpu_panel_mode_fixup(struct drm_encoder *encoder,
{
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode;
- unsigned hblank = native_mode->htotal - native_mode->hdisplay;
- unsigned vblank = native_mode->vtotal - native_mode->vdisplay;
- unsigned hover = native_mode->hsync_start - native_mode->hdisplay;
- unsigned vover = native_mode->vsync_start - native_mode->vdisplay;
- unsigned hsync_width = native_mode->hsync_end - native_mode->hsync_start;
- unsigned vsync_width = native_mode->vsync_end - native_mode->vsync_start;
+ unsigned int hblank = native_mode->htotal - native_mode->hdisplay;
+ unsigned int vblank = native_mode->vtotal - native_mode->vdisplay;
+ unsigned int hover = native_mode->hsync_start - native_mode->hdisplay;
+ unsigned int vover = native_mode->vsync_start - native_mode->vdisplay;
+ unsigned int hsync_width = native_mode->hsync_end - native_mode->hsync_start;
+ unsigned int vsync_width = native_mode->vsync_end - native_mode->vsync_start;
adjusted_mode->clock = native_mode->clock;
adjusted_mode->flags = native_mode->flags;
@@ -222,7 +222,7 @@ bool amdgpu_dig_monitor_is_duallink(struct drm_encoder *encoder,
case DRM_MODE_CONNECTOR_HDMIB:
if (amdgpu_connector->use_digital) {
/* HDMI 1.3 supports up to 340 Mhz over single link */
- if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) {
+ if (connector->display_info.is_hdmi) {
if (pixel_clock > 340000)
return true;
else
@@ -244,7 +244,7 @@ bool amdgpu_dig_monitor_is_duallink(struct drm_encoder *encoder,
return false;
else {
/* HDMI 1.3 supports up to 340 Mhz over single link */
- if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) {
+ if (connector->display_info.is_hdmi) {
if (pixel_clock > 340000)
return true;
else
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
new file mode 100644
index 000000000000..23d7d0b0d625
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
@@ -0,0 +1,241 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/sched.h>
+#include <drm/drm_exec.h>
+#include "amdgpu.h"
+
+#define work_to_evf_mgr(w, name) container_of(w, struct amdgpu_eviction_fence_mgr, name)
+#define evf_mgr_to_fpriv(e) container_of(e, struct amdgpu_fpriv, evf_mgr)
+
+static const char *
+amdgpu_eviction_fence_get_driver_name(struct dma_fence *fence)
+{
+ return "amdgpu_eviction_fence";
+}
+
+static const char *
+amdgpu_eviction_fence_get_timeline_name(struct dma_fence *f)
+{
+ struct amdgpu_eviction_fence *ef;
+
+ ef = container_of(f, struct amdgpu_eviction_fence, base);
+ return ef->timeline_name;
+}
+
+int
+amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct drm_exec *exec)
+{
+ struct amdgpu_eviction_fence *old_ef, *new_ef;
+ struct drm_gem_object *obj;
+ unsigned long index;
+ int ret;
+
+ if (evf_mgr->ev_fence &&
+ !dma_fence_is_signaled(&evf_mgr->ev_fence->base))
+ return 0;
+ /*
+ * Steps to replace eviction fence:
+ * * lock all objects in exec (caller)
+ * * create a new eviction fence
+ * * update new eviction fence in evf_mgr
+ * * attach the new eviction fence to BOs
+ * * release the old fence
+ * * unlock the objects (caller)
+ */
+ new_ef = amdgpu_eviction_fence_create(evf_mgr);
+ if (!new_ef) {
+ DRM_ERROR("Failed to create new eviction fence\n");
+ return -ENOMEM;
+ }
+
+ /* Update the eviction fence now */
+ spin_lock(&evf_mgr->ev_fence_lock);
+ old_ef = evf_mgr->ev_fence;
+ evf_mgr->ev_fence = new_ef;
+ spin_unlock(&evf_mgr->ev_fence_lock);
+
+ /* Attach the new fence */
+ drm_exec_for_each_locked_object(exec, index, obj) {
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+ if (!bo)
+ continue;
+ ret = amdgpu_eviction_fence_attach(evf_mgr, bo);
+ if (ret) {
+ DRM_ERROR("Failed to attch new eviction fence\n");
+ goto free_err;
+ }
+ }
+
+ /* Free old fence */
+ if (old_ef)
+ dma_fence_put(&old_ef->base);
+ return 0;
+
+free_err:
+ kfree(new_ef);
+ return ret;
+}
+
+static void
+amdgpu_eviction_fence_suspend_worker(struct work_struct *work)
+{
+ struct amdgpu_eviction_fence_mgr *evf_mgr = work_to_evf_mgr(work, suspend_work.work);
+ struct amdgpu_fpriv *fpriv = evf_mgr_to_fpriv(evf_mgr);
+ struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
+ struct amdgpu_eviction_fence *ev_fence;
+
+ mutex_lock(&uq_mgr->userq_mutex);
+ spin_lock(&evf_mgr->ev_fence_lock);
+ ev_fence = evf_mgr->ev_fence;
+ if (ev_fence)
+ dma_fence_get(&ev_fence->base);
+ else
+ goto unlock;
+ spin_unlock(&evf_mgr->ev_fence_lock);
+
+ amdgpu_userq_evict(uq_mgr, ev_fence);
+
+ mutex_unlock(&uq_mgr->userq_mutex);
+ dma_fence_put(&ev_fence->base);
+ return;
+
+unlock:
+ spin_unlock(&evf_mgr->ev_fence_lock);
+ mutex_unlock(&uq_mgr->userq_mutex);
+}
+
+static bool amdgpu_eviction_fence_enable_signaling(struct dma_fence *f)
+{
+ struct amdgpu_eviction_fence_mgr *evf_mgr;
+ struct amdgpu_eviction_fence *ev_fence;
+
+ if (!f)
+ return true;
+
+ ev_fence = to_ev_fence(f);
+ evf_mgr = ev_fence->evf_mgr;
+
+ schedule_delayed_work(&evf_mgr->suspend_work, 0);
+ return true;
+}
+
+static const struct dma_fence_ops amdgpu_eviction_fence_ops = {
+ .get_driver_name = amdgpu_eviction_fence_get_driver_name,
+ .get_timeline_name = amdgpu_eviction_fence_get_timeline_name,
+ .enable_signaling = amdgpu_eviction_fence_enable_signaling,
+};
+
+void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_eviction_fence *ev_fence)
+{
+ spin_lock(&evf_mgr->ev_fence_lock);
+ dma_fence_signal(&ev_fence->base);
+ spin_unlock(&evf_mgr->ev_fence_lock);
+}
+
+struct amdgpu_eviction_fence *
+amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+ struct amdgpu_eviction_fence *ev_fence;
+
+ ev_fence = kzalloc(sizeof(*ev_fence), GFP_KERNEL);
+ if (!ev_fence)
+ return NULL;
+
+ ev_fence->evf_mgr = evf_mgr;
+ get_task_comm(ev_fence->timeline_name, current);
+ spin_lock_init(&ev_fence->lock);
+ dma_fence_init64(&ev_fence->base, &amdgpu_eviction_fence_ops,
+ &ev_fence->lock, evf_mgr->ev_fence_ctx,
+ atomic_inc_return(&evf_mgr->ev_fence_seq));
+ return ev_fence;
+}
+
+void amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+ struct amdgpu_eviction_fence *ev_fence;
+
+ /* Wait for any pending work to execute */
+ flush_delayed_work(&evf_mgr->suspend_work);
+
+ spin_lock(&evf_mgr->ev_fence_lock);
+ ev_fence = evf_mgr->ev_fence;
+ spin_unlock(&evf_mgr->ev_fence_lock);
+
+ if (!ev_fence)
+ return;
+
+ dma_fence_wait(&ev_fence->base, false);
+
+ /* Last unref of ev_fence */
+ dma_fence_put(&ev_fence->base);
+}
+
+int amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_bo *bo)
+{
+ struct amdgpu_eviction_fence *ev_fence;
+ struct dma_resv *resv = bo->tbo.base.resv;
+ int ret;
+
+ if (!resv)
+ return 0;
+
+ ret = dma_resv_reserve_fences(resv, 1);
+ if (ret) {
+ DRM_DEBUG_DRIVER("Failed to resv fence space\n");
+ return ret;
+ }
+
+ spin_lock(&evf_mgr->ev_fence_lock);
+ ev_fence = evf_mgr->ev_fence;
+ if (ev_fence)
+ dma_resv_add_fence(resv, &ev_fence->base, DMA_RESV_USAGE_BOOKKEEP);
+ spin_unlock(&evf_mgr->ev_fence_lock);
+
+ return 0;
+}
+
+void amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_bo *bo)
+{
+ struct dma_fence *stub = dma_fence_get_stub();
+
+ dma_resv_replace_fences(bo->tbo.base.resv, evf_mgr->ev_fence_ctx,
+ stub, DMA_RESV_USAGE_BOOKKEEP);
+ dma_fence_put(stub);
+}
+
+int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+ /* This needs to be done one time per open */
+ atomic_set(&evf_mgr->ev_fence_seq, 0);
+ evf_mgr->ev_fence_ctx = dma_fence_context_alloc(1);
+ spin_lock_init(&evf_mgr->ev_fence_lock);
+
+ INIT_DELAYED_WORK(&evf_mgr->suspend_work, amdgpu_eviction_fence_suspend_worker);
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
new file mode 100644
index 000000000000..fcd867b7147d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_EV_FENCE_H_
+#define AMDGPU_EV_FENCE_H_
+
+struct amdgpu_eviction_fence {
+ struct dma_fence base;
+ spinlock_t lock;
+ char timeline_name[TASK_COMM_LEN];
+ struct amdgpu_eviction_fence_mgr *evf_mgr;
+};
+
+struct amdgpu_eviction_fence_mgr {
+ u64 ev_fence_ctx;
+ atomic_t ev_fence_seq;
+ spinlock_t ev_fence_lock;
+ struct amdgpu_eviction_fence *ev_fence;
+ struct delayed_work suspend_work;
+ uint8_t fd_closing;
+};
+
+/* Eviction fence helper functions */
+struct amdgpu_eviction_fence *
+amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+void
+amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+int
+amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_bo *bo);
+
+void
+amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_bo *bo);
+
+int
+amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+void
+amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_eviction_fence *ev_fence);
+
+int
+amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct drm_exec *exec);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
deleted file mode 100644
index cd0acbea75da..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ /dev/null
@@ -1,388 +0,0 @@
-/*
- * Copyright © 2007 David Airlie
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * David Airlie
- */
-
-#include <linux/module.h>
-#include <linux/pm_runtime.h>
-#include <linux/slab.h>
-#include <linux/vga_switcheroo.h>
-
-#include <drm/amdgpu_drm.h>
-#include <drm/drm_crtc.h>
-#include <drm/drm_crtc_helper.h>
-#include <drm/drm_fb_helper.h>
-#include <drm/drm_fourcc.h>
-
-#include "amdgpu.h"
-#include "cikd.h"
-#include "amdgpu_gem.h"
-
-#include "amdgpu_display.h"
-
-/* object hierarchy -
- this contains a helper + a amdgpu fb
- the helper contains a pointer to amdgpu framebuffer baseclass.
-*/
-
-static int
-amdgpufb_open(struct fb_info *info, int user)
-{
- struct drm_fb_helper *fb_helper = info->par;
- int ret = pm_runtime_get_sync(fb_helper->dev->dev);
- if (ret < 0 && ret != -EACCES) {
- pm_runtime_mark_last_busy(fb_helper->dev->dev);
- pm_runtime_put_autosuspend(fb_helper->dev->dev);
- return ret;
- }
- return 0;
-}
-
-static int
-amdgpufb_release(struct fb_info *info, int user)
-{
- struct drm_fb_helper *fb_helper = info->par;
-
- pm_runtime_mark_last_busy(fb_helper->dev->dev);
- pm_runtime_put_autosuspend(fb_helper->dev->dev);
- return 0;
-}
-
-static const struct fb_ops amdgpufb_ops = {
- .owner = THIS_MODULE,
- DRM_FB_HELPER_DEFAULT_OPS,
- .fb_open = amdgpufb_open,
- .fb_release = amdgpufb_release,
- .fb_fillrect = drm_fb_helper_cfb_fillrect,
- .fb_copyarea = drm_fb_helper_cfb_copyarea,
- .fb_imageblit = drm_fb_helper_cfb_imageblit,
-};
-
-
-int amdgpu_align_pitch(struct amdgpu_device *adev, int width, int cpp, bool tiled)
-{
- int aligned = width;
- int pitch_mask = 0;
-
- switch (cpp) {
- case 1:
- pitch_mask = 255;
- break;
- case 2:
- pitch_mask = 127;
- break;
- case 3:
- case 4:
- pitch_mask = 63;
- break;
- }
-
- aligned += pitch_mask;
- aligned &= ~pitch_mask;
- return aligned * cpp;
-}
-
-static void amdgpufb_destroy_pinned_object(struct drm_gem_object *gobj)
-{
- struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
- int ret;
-
- ret = amdgpu_bo_reserve(abo, true);
- if (likely(ret == 0)) {
- amdgpu_bo_kunmap(abo);
- amdgpu_bo_unpin(abo);
- amdgpu_bo_unreserve(abo);
- }
- drm_gem_object_put(gobj);
-}
-
-static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
- struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_gem_object **gobj_p)
-{
- const struct drm_format_info *info;
- struct amdgpu_device *adev = rfbdev->adev;
- struct drm_gem_object *gobj = NULL;
- struct amdgpu_bo *abo = NULL;
- bool fb_tiled = false; /* useful for testing */
- u32 tiling_flags = 0, domain;
- int ret;
- int aligned_size, size;
- int height = mode_cmd->height;
- u32 cpp;
- u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
- AMDGPU_GEM_CREATE_VRAM_CLEARED;
-
- info = drm_get_format_info(adev_to_drm(adev), mode_cmd);
- cpp = info->cpp[0];
-
- /* need to align pitch with crtc limits */
- mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp,
- fb_tiled);
- domain = amdgpu_display_supported_domains(adev, flags);
- height = ALIGN(mode_cmd->height, 8);
- size = mode_cmd->pitches[0] * height;
- aligned_size = ALIGN(size, PAGE_SIZE);
- ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain, flags,
- ttm_bo_type_device, NULL, &gobj);
- if (ret) {
- pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
- return -ENOMEM;
- }
- abo = gem_to_amdgpu_bo(gobj);
-
- if (fb_tiled)
- tiling_flags = AMDGPU_TILING_SET(ARRAY_MODE, GRPH_ARRAY_2D_TILED_THIN1);
-
- ret = amdgpu_bo_reserve(abo, false);
- if (unlikely(ret != 0))
- goto out_unref;
-
- if (tiling_flags) {
- ret = amdgpu_bo_set_tiling_flags(abo,
- tiling_flags);
- if (ret)
- dev_err(adev->dev, "FB failed to set tiling flags\n");
- }
-
- ret = amdgpu_bo_pin(abo, domain);
- if (ret) {
- amdgpu_bo_unreserve(abo);
- goto out_unref;
- }
-
- ret = amdgpu_ttm_alloc_gart(&abo->tbo);
- if (ret) {
- amdgpu_bo_unreserve(abo);
- dev_err(adev->dev, "%p bind failed\n", abo);
- goto out_unref;
- }
-
- ret = amdgpu_bo_kmap(abo, NULL);
- amdgpu_bo_unreserve(abo);
- if (ret) {
- goto out_unref;
- }
-
- *gobj_p = gobj;
- return 0;
-out_unref:
- amdgpufb_destroy_pinned_object(gobj);
- *gobj_p = NULL;
- return ret;
-}
-
-static int amdgpufb_create(struct drm_fb_helper *helper,
- struct drm_fb_helper_surface_size *sizes)
-{
- struct amdgpu_fbdev *rfbdev = (struct amdgpu_fbdev *)helper;
- struct amdgpu_device *adev = rfbdev->adev;
- struct fb_info *info;
- struct drm_framebuffer *fb = NULL;
- struct drm_mode_fb_cmd2 mode_cmd;
- struct drm_gem_object *gobj = NULL;
- struct amdgpu_bo *abo = NULL;
- int ret;
-
- memset(&mode_cmd, 0, sizeof(mode_cmd));
- mode_cmd.width = sizes->surface_width;
- mode_cmd.height = sizes->surface_height;
-
- if (sizes->surface_bpp == 24)
- sizes->surface_bpp = 32;
-
- mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
- sizes->surface_depth);
-
- ret = amdgpufb_create_pinned_object(rfbdev, &mode_cmd, &gobj);
- if (ret) {
- DRM_ERROR("failed to create fbcon object %d\n", ret);
- return ret;
- }
-
- abo = gem_to_amdgpu_bo(gobj);
-
- /* okay we have an object now allocate the framebuffer */
- info = drm_fb_helper_alloc_fbi(helper);
- if (IS_ERR(info)) {
- ret = PTR_ERR(info);
- goto out;
- }
-
- ret = amdgpu_display_gem_fb_init(adev_to_drm(adev), &rfbdev->rfb,
- &mode_cmd, gobj);
- if (ret) {
- DRM_ERROR("failed to initialize framebuffer %d\n", ret);
- goto out;
- }
-
- fb = &rfbdev->rfb.base;
-
- /* setup helper */
- rfbdev->helper.fb = fb;
-
- info->fbops = &amdgpufb_ops;
-
- info->fix.smem_start = amdgpu_gmc_vram_cpu_pa(adev, abo);
- info->fix.smem_len = amdgpu_bo_size(abo);
- info->screen_base = amdgpu_bo_kptr(abo);
- info->screen_size = amdgpu_bo_size(abo);
-
- drm_fb_helper_fill_info(info, &rfbdev->helper, sizes);
-
- /* setup aperture base/size for vesafb takeover */
- info->apertures->ranges[0].base = adev_to_drm(adev)->mode_config.fb_base;
- info->apertures->ranges[0].size = adev->gmc.aper_size;
-
- /* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */
-
- if (info->screen_base == NULL) {
- ret = -ENOSPC;
- goto out;
- }
-
- DRM_INFO("fb mappable at 0x%lX\n", info->fix.smem_start);
- DRM_INFO("vram apper at 0x%lX\n", (unsigned long)adev->gmc.aper_base);
- DRM_INFO("size %lu\n", (unsigned long)amdgpu_bo_size(abo));
- DRM_INFO("fb depth is %d\n", fb->format->depth);
- DRM_INFO(" pitch is %d\n", fb->pitches[0]);
-
- vga_switcheroo_client_fb_set(adev->pdev, info);
- return 0;
-
-out:
- if (fb && ret) {
- drm_gem_object_put(gobj);
- drm_framebuffer_unregister_private(fb);
- drm_framebuffer_cleanup(fb);
- kfree(fb);
- }
- return ret;
-}
-
-static int amdgpu_fbdev_destroy(struct drm_device *dev, struct amdgpu_fbdev *rfbdev)
-{
- struct amdgpu_framebuffer *rfb = &rfbdev->rfb;
- int i;
-
- drm_fb_helper_unregister_fbi(&rfbdev->helper);
-
- if (rfb->base.obj[0]) {
- for (i = 0; i < rfb->base.format->num_planes; i++)
- drm_gem_object_put(rfb->base.obj[0]);
- amdgpufb_destroy_pinned_object(rfb->base.obj[0]);
- rfb->base.obj[0] = NULL;
- drm_framebuffer_unregister_private(&rfb->base);
- drm_framebuffer_cleanup(&rfb->base);
- }
- drm_fb_helper_fini(&rfbdev->helper);
-
- return 0;
-}
-
-static const struct drm_fb_helper_funcs amdgpu_fb_helper_funcs = {
- .fb_probe = amdgpufb_create,
-};
-
-int amdgpu_fbdev_init(struct amdgpu_device *adev)
-{
- struct amdgpu_fbdev *rfbdev;
- int bpp_sel = 32;
- int ret;
-
- /* don't init fbdev on hw without DCE */
- if (!adev->mode_info.mode_config_initialized)
- return 0;
-
- /* don't init fbdev if there are no connectors */
- if (list_empty(&adev_to_drm(adev)->mode_config.connector_list))
- return 0;
-
- /* select 8 bpp console on low vram cards */
- if (adev->gmc.real_vram_size <= (32*1024*1024))
- bpp_sel = 8;
-
- rfbdev = kzalloc(sizeof(struct amdgpu_fbdev), GFP_KERNEL);
- if (!rfbdev)
- return -ENOMEM;
-
- rfbdev->adev = adev;
- adev->mode_info.rfbdev = rfbdev;
-
- drm_fb_helper_prepare(adev_to_drm(adev), &rfbdev->helper,
- &amdgpu_fb_helper_funcs);
-
- ret = drm_fb_helper_init(adev_to_drm(adev), &rfbdev->helper);
- if (ret) {
- kfree(rfbdev);
- return ret;
- }
-
- /* disable all the possible outputs/crtcs before entering KMS mode */
- if (!amdgpu_device_has_dc_support(adev) && !amdgpu_virtual_display)
- drm_helper_disable_unused_functions(adev_to_drm(adev));
-
- drm_fb_helper_initial_config(&rfbdev->helper, bpp_sel);
- return 0;
-}
-
-void amdgpu_fbdev_fini(struct amdgpu_device *adev)
-{
- if (!adev->mode_info.rfbdev)
- return;
-
- amdgpu_fbdev_destroy(adev_to_drm(adev), adev->mode_info.rfbdev);
- kfree(adev->mode_info.rfbdev);
- adev->mode_info.rfbdev = NULL;
-}
-
-void amdgpu_fbdev_set_suspend(struct amdgpu_device *adev, int state)
-{
- if (adev->mode_info.rfbdev)
- drm_fb_helper_set_suspend_unlocked(&adev->mode_info.rfbdev->helper,
- state);
-}
-
-int amdgpu_fbdev_total_size(struct amdgpu_device *adev)
-{
- struct amdgpu_bo *robj;
- int size = 0;
-
- if (!adev->mode_info.rfbdev)
- return 0;
-
- robj = gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0]);
- size += amdgpu_bo_size(robj);
- return size;
-}
-
-bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj)
-{
- if (!adev->mode_info.rfbdev)
- return false;
- if (robj == gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0]))
- return true;
- return false;
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
index 5a6857c44bb6..b349bb3676d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
@@ -32,6 +32,8 @@
#include <drm/amdgpu_drm.h>
#include <drm/drm_debugfs.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
#include "amdgpu.h"
#include "amdgpu_vm.h"
@@ -50,62 +52,73 @@ static const char *amdgpu_ip_name[AMDGPU_HW_IP_NUM] = {
[AMDGPU_HW_IP_VCN_DEC] = "dec",
[AMDGPU_HW_IP_VCN_ENC] = "enc",
[AMDGPU_HW_IP_VCN_JPEG] = "jpeg",
+ [AMDGPU_HW_IP_VPE] = "vpe",
};
-void amdgpu_show_fdinfo(struct seq_file *m, struct file *f)
+void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
{
- struct amdgpu_fpriv *fpriv;
- uint32_t bus, dev, fn, i, domain;
- uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0;
- struct drm_file *file = f->private_data;
- struct amdgpu_device *adev = drm_to_adev(file->minor->dev);
- struct amdgpu_bo *root;
- int ret;
-
- ret = amdgpu_file_to_fpriv(f, &fpriv);
- if (ret)
- return;
- bus = adev->pdev->bus->number;
- domain = pci_domain_nr(adev->pdev->bus);
- dev = PCI_SLOT(adev->pdev->devfn);
- fn = PCI_FUNC(adev->pdev->devfn);
-
- root = amdgpu_bo_ref(fpriv->vm.root.bo);
- if (!root)
- return;
-
- ret = amdgpu_bo_reserve(root, false);
- if (ret) {
- DRM_ERROR("Fail to reserve bo\n");
- return;
+ struct amdgpu_fpriv *fpriv = file->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
+
+ struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM];
+ ktime_t usage[AMDGPU_HW_IP_NUM];
+ const char *pl_name[] = {
+ [TTM_PL_VRAM] = "vram",
+ [TTM_PL_TT] = "gtt",
+ [TTM_PL_SYSTEM] = "cpu",
+ [AMDGPU_PL_GDS] = "gds",
+ [AMDGPU_PL_GWS] = "gws",
+ [AMDGPU_PL_OA] = "oa",
+ [AMDGPU_PL_DOORBELL] = "doorbell",
+ [AMDGPU_PL_MMIO_REMAP] = "mmioremap",
+ };
+ unsigned int hw_ip, i;
+
+ amdgpu_vm_get_memory(vm, stats);
+ amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage);
+
+ /*
+ * ******************************************************************
+ * For text output format description please see drm-usage-stats.rst!
+ * ******************************************************************
+ */
+
+ drm_printf(p, "pasid:\t%u\n", fpriv->vm.pasid);
+
+ for (i = 0; i < ARRAY_SIZE(pl_name); i++) {
+ if (!pl_name[i])
+ continue;
+
+ drm_print_memory_stats(p,
+ &stats[i].drm,
+ DRM_GEM_OBJECT_RESIDENT |
+ DRM_GEM_OBJECT_PURGEABLE,
+ pl_name[i]);
}
- amdgpu_vm_get_memory(&fpriv->vm, &vram_mem, &gtt_mem, &cpu_mem);
- amdgpu_bo_unreserve(root);
- amdgpu_bo_unref(&root);
-
- seq_printf(m, "pdev:\t%04x:%02x:%02x.%d\npasid:\t%u\n", domain, bus,
- dev, fn, fpriv->vm.pasid);
- seq_printf(m, "vram mem:\t%llu kB\n", vram_mem/1024UL);
- seq_printf(m, "gtt mem:\t%llu kB\n", gtt_mem/1024UL);
- seq_printf(m, "cpu mem:\t%llu kB\n", cpu_mem/1024UL);
- for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
- uint32_t count = amdgpu_ctx_num_entities[i];
- int idx = 0;
- uint64_t total = 0, min = 0;
- uint32_t perc, frac;
-
- for (idx = 0; idx < count; idx++) {
- total = amdgpu_ctx_mgr_fence_usage(&fpriv->ctx_mgr,
- i, idx, &min);
- if ((total == 0) || (min == 0))
- continue;
-
- perc = div64_u64(10000 * total, min);
- frac = perc % 100;
-
- seq_printf(m, "%s%d:\t%d.%d%%\n",
- amdgpu_ip_name[i],
- idx, perc/100, frac);
- }
+
+ /* Legacy amdgpu keys, alias to drm-resident-memory-: */
+ drm_printf(p, "drm-memory-vram:\t%llu KiB\n",
+ stats[TTM_PL_VRAM].drm.resident/1024UL);
+ drm_printf(p, "drm-memory-gtt: \t%llu KiB\n",
+ stats[TTM_PL_TT].drm.resident/1024UL);
+ drm_printf(p, "drm-memory-cpu: \t%llu KiB\n",
+ stats[TTM_PL_SYSTEM].drm.resident/1024UL);
+
+ /* Amdgpu specific memory accounting keys: */
+ drm_printf(p, "amd-evicted-vram:\t%llu KiB\n",
+ stats[TTM_PL_VRAM].evicted/1024UL);
+ drm_printf(p, "amd-requested-vram:\t%llu KiB\n",
+ (stats[TTM_PL_VRAM].drm.shared +
+ stats[TTM_PL_VRAM].drm.private) / 1024UL);
+ drm_printf(p, "amd-requested-gtt:\t%llu KiB\n",
+ (stats[TTM_PL_TT].drm.shared +
+ stats[TTM_PL_TT].drm.private) / 1024UL);
+
+ for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
+ if (!usage[hw_ip])
+ continue;
+
+ drm_printf(p, "drm-engine-%s:\t%lld ns\n", amdgpu_ip_name[hw_ip],
+ ktime_to_ns(usage[hw_ip]));
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h
index 41a4c7056729..0398f5a159ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h
@@ -30,7 +30,6 @@
#include <linux/rbtree.h>
#include <drm/gpu_scheduler.h>
#include <drm/drm_file.h>
-#include <drm/ttm/ttm_bo_driver.h>
#include <linux/sched/mm.h>
#include "amdgpu_sync.h"
@@ -38,6 +37,6 @@
#include "amdgpu_ids.h"
uint32_t amdgpu_get_ip_count(struct amdgpu_device *adev, int id);
-void amdgpu_show_fdinfo(struct seq_file *m, struct file *f);
+void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 3b7e86ea7167..c7843e336310 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -39,41 +39,9 @@
#include <drm/drm_drv.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
+#include "amdgpu_reset.h"
/*
- * Fences
- * Fences mark an event in the GPUs pipeline and are used
- * for GPU/CPU synchronization. When the fence is written,
- * it is expected that all buffers associated with that fence
- * are no longer in use by the associated ring on the GPU and
- * that the the relevant GPU caches have been flushed.
- */
-
-struct amdgpu_fence {
- struct dma_fence base;
-
- /* RB, DMA, etc. */
- struct amdgpu_ring *ring;
-};
-
-static struct kmem_cache *amdgpu_fence_slab;
-
-int amdgpu_fence_slab_init(void)
-{
- amdgpu_fence_slab = kmem_cache_create(
- "amdgpu_fence", sizeof(struct amdgpu_fence), 0,
- SLAB_HWCACHE_ALIGN, NULL);
- if (!amdgpu_fence_slab)
- return -ENOMEM;
- return 0;
-}
-
-void amdgpu_fence_slab_fini(void)
-{
- rcu_barrier();
- kmem_cache_destroy(amdgpu_fence_slab);
-}
-/*
* Cast helper
*/
static const struct dma_fence_ops amdgpu_fence_ops;
@@ -81,10 +49,7 @@ static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f)
{
struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
- if (__f->base.ops == &amdgpu_fence_ops)
- return __f;
-
- return NULL;
+ return __f;
}
/**
@@ -128,53 +93,32 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
* amdgpu_fence_emit - emit a fence on the requested ring
*
* @ring: ring the fence is associated with
- * @f: resulting fence object
- * @job: job the fence is embedded in
+ * @af: amdgpu fence input
* @flags: flags to pass into the subordinate .emit_fence() call
*
* Emits a fence command on the requested ring (all asics).
* Returns 0 on success, -ENOMEM on failure.
*/
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amdgpu_job *job,
- unsigned flags)
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af,
+ unsigned int flags)
{
struct amdgpu_device *adev = ring->adev;
struct dma_fence *fence;
- struct amdgpu_fence *am_fence;
struct dma_fence __rcu **ptr;
uint32_t seq;
int r;
- if (job == NULL) {
- /* create a sperate hw fence */
- am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_ATOMIC);
- if (am_fence == NULL)
- return -ENOMEM;
- fence = &am_fence->base;
- am_fence->ring = ring;
- } else {
- /* take use of job-embedded fence */
- fence = &job->hw_fence;
- }
+ fence = &af->base;
+ af->ring = ring;
seq = ++ring->fence_drv.sync_seq;
- if (job != NULL && job->job_run_counter) {
- /* reinit seq for resubmitted jobs */
- fence->seqno = seq;
- } else {
- dma_fence_init(fence, &amdgpu_fence_ops,
- &ring->fence_drv.lock,
- adev->fence_context + ring->idx,
- seq);
- }
-
- if (job != NULL) {
- /* mark this fence has a parent job */
- set_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &fence->flags);
- }
+ dma_fence_init(fence, &amdgpu_fence_ops,
+ &ring->fence_drv.lock,
+ adev->fence_context + ring->idx, seq);
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
seq, flags | AMDGPU_FENCE_FLAG_INT);
+ amdgpu_fence_save_wptr(af);
pm_runtime_get_noresume(adev_to_drm(adev)->dev);
ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
if (unlikely(rcu_dereference_protected(*ptr, 1))) {
@@ -192,13 +136,13 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
}
}
+ to_amdgpu_fence(fence)->start_timestamp = ktime_get();
+
/* This function can't be called concurrently anyway, otherwise
* emitting the fence would mess up the hardware ring buffer.
*/
rcu_assign_pointer(*ptr, dma_fence_get(fence));
- *f = fence;
-
return 0;
}
@@ -273,7 +217,7 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
} while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
- if (del_timer(&ring->fence_drv.fallback_timer) &&
+ if (timer_delete(&ring->fence_drv.fallback_timer) &&
seq != ring->fence_drv.sync_seq)
amdgpu_fence_schedule_fallback(ring);
@@ -285,6 +229,7 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
do {
struct dma_fence *fence, **ptr;
+ struct amdgpu_fence *am_fence;
++last_seq;
last_seq &= drv->num_fences_mask;
@@ -297,9 +242,14 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
if (!fence)
continue;
+ /* Save the wptr in the fence driver so we know what the last processed
+ * wptr was. This is required for re-emitting the ring state for
+ * queues that are reset but are not guilty and thus have no guilty fence.
+ */
+ am_fence = container_of(fence, struct amdgpu_fence, base);
+ drv->signalled_wptr = am_fence->wptr;
dma_fence_signal(fence);
dma_fence_put(fence);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
} while (last_seq != seq);
@@ -315,11 +265,13 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
*/
static void amdgpu_fence_fallback(struct timer_list *t)
{
- struct amdgpu_ring *ring = from_timer(ring, t,
- fence_drv.fallback_timer);
+ struct amdgpu_ring *ring = timer_container_of(ring, t,
+ fence_drv.fallback_timer);
if (amdgpu_fence_process(ring))
- DRM_WARN("Fence fallback timer expired on ring %s\n", ring->name);
+ dev_warn(ring->adev->dev,
+ "Fence fallback timer expired on ring %s\n",
+ ring->name);
}
/**
@@ -367,14 +319,11 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
uint32_t wait_seq,
signed long timeout)
{
- uint32_t seq;
-
- do {
- seq = amdgpu_fence_read(ring);
- udelay(5);
- timeout -= 5;
- } while ((int32_t)(wait_seq - seq) > 0 && timeout > 0);
+ while ((int32_t)(wait_seq - amdgpu_fence_read(ring)) > 0 && timeout > 0) {
+ udelay(2);
+ timeout -= 2;
+ }
return timeout > 0 ? timeout : 0;
}
/**
@@ -386,14 +335,13 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
* Returns the number of emitted fences on the ring. Used by the
* dynpm code to ring track activity.
*/
-unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
+unsigned int amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
{
uint64_t emitted;
/* We are not protected by ring lock when reading the last sequence
* but it's ok to report slightly wrong fence count here.
*/
- amdgpu_fence_process(ring);
emitted = 0x100000000ull;
emitted -= atomic_read(&ring->fence_drv.last_seq);
emitted += READ_ONCE(ring->fence_drv.sync_seq);
@@ -401,6 +349,57 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
}
/**
+ * amdgpu_fence_last_unsignaled_time_us - the time fence emitted until now
+ * @ring: ring the fence is associated with
+ *
+ * Find the earliest fence unsignaled until now, calculate the time delta
+ * between the time fence emitted and now.
+ */
+u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring)
+{
+ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ struct dma_fence *fence;
+ uint32_t last_seq, sync_seq;
+
+ last_seq = atomic_read(&ring->fence_drv.last_seq);
+ sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
+ if (last_seq == sync_seq)
+ return 0;
+
+ ++last_seq;
+ last_seq &= drv->num_fences_mask;
+ fence = drv->fences[last_seq];
+ if (!fence)
+ return 0;
+
+ return ktime_us_delta(ktime_get(),
+ to_amdgpu_fence(fence)->start_timestamp);
+}
+
+/**
+ * amdgpu_fence_update_start_timestamp - update the timestamp of the fence
+ * @ring: ring the fence is associated with
+ * @seq: the fence seq number to update.
+ * @timestamp: the start timestamp to update.
+ *
+ * The function called at the time the fence and related ib is about to
+ * resubmit to gpu in MCBP scenario. Thus we do not consider race condition
+ * with amdgpu_fence_process to modify the same fence.
+ */
+void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq, ktime_t timestamp)
+{
+ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ struct dma_fence *fence;
+
+ seq &= drv->num_fences_mask;
+ fence = drv->fences[seq];
+ if (!fence)
+ return;
+
+ to_amdgpu_fence(fence)->start_timestamp = timestamp;
+}
+
+/**
* amdgpu_fence_driver_start_ring - make the fence driver
* ready for use on the requested ring.
*
@@ -415,14 +414,14 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
*/
int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq_src,
- unsigned irq_type)
+ unsigned int irq_type)
{
struct amdgpu_device *adev = ring->adev;
uint64_t index;
if (ring->funcs->type != AMDGPU_RING_TYPE_UVD) {
- ring->fence_drv.cpu_addr = &adev->wb.wb[ring->fence_offs];
- ring->fence_drv.gpu_addr = adev->wb.gpu_addr + (ring->fence_offs * 4);
+ ring->fence_drv.cpu_addr = ring->fence_cpu_addr;
+ ring->fence_drv.gpu_addr = ring->fence_gpu_addr;
} else {
/* put fence directly behind firmware */
index = ALIGN(adev->uvd.fw->size, 8);
@@ -445,24 +444,18 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
* for the requested ring.
*
* @ring: ring to init the fence driver on
- * @num_hw_submission: number of entries on the hardware queue
- * @sched_score: optional score atomic shared with other schedulers
*
* Init the fence driver for the requested ring (all asics).
* Helper function for amdgpu_fence_driver_init().
*/
-int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
- unsigned num_hw_submission,
- atomic_t *sched_score)
+int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- long timeout;
- int r;
if (!adev)
return -EINVAL;
- if (!is_power_of_2(num_hw_submission))
+ if (!is_power_of_2(ring->num_hw_submission))
return -EINVAL;
ring->fence_drv.cpu_addr = NULL;
@@ -473,41 +466,14 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
timer_setup(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, 0);
- ring->fence_drv.num_fences_mask = num_hw_submission * 2 - 1;
+ ring->fence_drv.num_fences_mask = ring->num_hw_submission * 2 - 1;
spin_lock_init(&ring->fence_drv.lock);
- ring->fence_drv.fences = kcalloc(num_hw_submission * 2, sizeof(void *),
+ ring->fence_drv.fences = kcalloc(ring->num_hw_submission * 2, sizeof(void *),
GFP_KERNEL);
+
if (!ring->fence_drv.fences)
return -ENOMEM;
- /* No need to setup the GPU scheduler for rings that don't need it */
- if (ring->no_scheduler)
- return 0;
-
- switch (ring->funcs->type) {
- case AMDGPU_RING_TYPE_GFX:
- timeout = adev->gfx_timeout;
- break;
- case AMDGPU_RING_TYPE_COMPUTE:
- timeout = adev->compute_timeout;
- break;
- case AMDGPU_RING_TYPE_SDMA:
- timeout = adev->sdma_timeout;
- break;
- default:
- timeout = adev->video_timeout;
- break;
- }
-
- r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
- num_hw_submission, amdgpu_job_hang_limit,
- timeout, NULL, sched_score, ring->name);
- if (r) {
- DRM_ERROR("Failed to create scheduler on ring %s.\n",
- ring->name);
- return r;
- }
-
return 0;
}
@@ -529,6 +495,42 @@ int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev)
}
/**
+ * amdgpu_fence_need_ring_interrupt_restore - helper function to check whether
+ * fence driver interrupts need to be restored.
+ *
+ * @ring: ring that to be checked
+ *
+ * Interrupts for rings that belong to GFX IP don't need to be restored
+ * when the target power state is s0ix.
+ *
+ * Return true if need to restore interrupts, false otherwise.
+ */
+static bool amdgpu_fence_need_ring_interrupt_restore(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ bool is_gfx_power_domain = false;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_SDMA:
+ /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
+ IP_VERSION(5, 0, 0))
+ is_gfx_power_domain = true;
+ break;
+ case AMDGPU_RING_TYPE_GFX:
+ case AMDGPU_RING_TYPE_COMPUTE:
+ case AMDGPU_RING_TYPE_KIQ:
+ case AMDGPU_RING_TYPE_MES:
+ is_gfx_power_domain = true;
+ break;
+ default:
+ break;
+ }
+
+ return !(adev->in_s0ix && is_gfx_power_domain);
+}
+
+/**
* amdgpu_fence_driver_hw_fini - tear down the fence driver
* for all possible rings.
*
@@ -546,9 +548,6 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev)
if (!ring || !ring->fence_drv.initialized)
continue;
- if (!ring->no_scheduler)
- drm_sched_stop(&ring->sched, NULL);
-
/* You can't wait for HW to signal if it's gone */
if (!drm_dev_is_unplugged(adev_to_drm(adev)))
r = amdgpu_fence_wait_empty(ring);
@@ -558,11 +557,31 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev)
if (r)
amdgpu_fence_driver_force_completion(ring);
- if (ring->fence_drv.irq_src)
+ if (!drm_dev_is_unplugged(adev_to_drm(adev)) &&
+ ring->fence_drv.irq_src &&
+ amdgpu_fence_need_ring_interrupt_restore(ring))
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
- del_timer_sync(&ring->fence_drv.fallback_timer);
+ timer_delete_sync(&ring->fence_drv.fallback_timer);
+ }
+}
+
+/* Will either stop and flush handlers for amdgpu interrupt or reanble it */
+void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop)
+{
+ int i;
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ if (!ring || !ring->fence_drv.initialized || !ring->fence_drv.irq_src)
+ continue;
+
+ if (stop)
+ disable_irq(adev->irq.irq);
+ else
+ enable_irq(adev->irq.irq);
}
}
@@ -576,7 +595,13 @@ void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev)
if (!ring || !ring->fence_drv.initialized)
continue;
- if (!ring->no_scheduler)
+ /*
+ * Notice we check for sched.ops since there's some
+ * override on the meaning of sched.ready by amdgpu.
+ * The natural check would be sched.ready, which is
+ * set as drm_sched_init() finishes...
+ */
+ if (ring->sched.ops)
drm_sched_fini(&ring->sched);
for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
@@ -605,22 +630,43 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
+
if (!ring || !ring->fence_drv.initialized)
continue;
- if (!ring->no_scheduler) {
- drm_sched_resubmit_jobs(&ring->sched);
- drm_sched_start(&ring->sched, true);
- }
-
/* enable the interrupt */
- if (ring->fence_drv.irq_src)
+ if (ring->fence_drv.irq_src &&
+ amdgpu_fence_need_ring_interrupt_restore(ring))
amdgpu_irq_get(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
}
}
/**
+ * amdgpu_fence_driver_set_error - set error code on fences
+ * @ring: the ring which contains the fences
+ * @error: the error code to set
+ *
+ * Set an error code to all the fences pending on the ring.
+ */
+void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error)
+{
+ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ unsigned long flags;
+
+ spin_lock_irqsave(&drv->lock, flags);
+ for (unsigned int i = 0; i <= drv->num_fences_mask; ++i) {
+ struct dma_fence *fence;
+
+ fence = rcu_dereference_protected(drv->fences[i],
+ lockdep_is_held(&drv->lock));
+ if (fence && !dma_fence_is_signaled_locked(fence))
+ dma_fence_set_error(fence, error);
+ }
+ spin_unlock_irqrestore(&drv->lock, flags);
+}
+
+/**
* amdgpu_fence_driver_force_completion - force signal latest fence of ring
*
* @ring: fence of the ring to signal
@@ -628,10 +674,125 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev)
*/
void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring)
{
+ amdgpu_fence_driver_set_error(ring, -ECANCELED);
amdgpu_fence_write(ring, ring->fence_drv.sync_seq);
amdgpu_fence_process(ring);
}
+
+/*
+ * Kernel queue reset handling
+ *
+ * The driver can reset individual queues for most engines, but those queues
+ * may contain work from multiple contexts. Resetting the queue will reset
+ * lose all of that state. In order to minimize the collateral damage, the
+ * driver will save the ring contents which are not associated with the guilty
+ * context prior to resetting the queue. After resetting the queue the queue
+ * contents from the other contexts is re-emitted to the rings so that it can
+ * be processed by the engine. To handle this, we save the queue's write
+ * pointer (wptr) in the fences associated with each context. If we get a
+ * queue timeout, we can then use the wptrs from the fences to determine
+ * which data needs to be saved out of the queue's ring buffer.
+ */
+
+/**
+ * amdgpu_fence_driver_guilty_force_completion - force signal of specified sequence
+ *
+ * @af: fence of the ring to signal
+ *
+ */
+void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af)
+{
+ struct dma_fence *unprocessed;
+ struct dma_fence __rcu **ptr;
+ struct amdgpu_fence *fence;
+ struct amdgpu_ring *ring = af->ring;
+ unsigned long flags;
+ u32 seq, last_seq;
+
+ last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
+ seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
+
+ /* mark all fences from the guilty context with an error */
+ spin_lock_irqsave(&ring->fence_drv.lock, flags);
+ do {
+ last_seq++;
+ last_seq &= ring->fence_drv.num_fences_mask;
+
+ ptr = &ring->fence_drv.fences[last_seq];
+ rcu_read_lock();
+ unprocessed = rcu_dereference(*ptr);
+
+ if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) {
+ fence = container_of(unprocessed, struct amdgpu_fence, base);
+
+ if (fence == af)
+ dma_fence_set_error(&fence->base, -ETIME);
+ else if (fence->context == af->context)
+ dma_fence_set_error(&fence->base, -ECANCELED);
+ }
+ rcu_read_unlock();
+ } while (last_seq != seq);
+ spin_unlock_irqrestore(&ring->fence_drv.lock, flags);
+ /* signal the guilty fence */
+ amdgpu_fence_write(ring, (u32)af->base.seqno);
+ amdgpu_fence_process(ring);
+}
+
+void amdgpu_fence_save_wptr(struct amdgpu_fence *af)
+{
+ af->wptr = af->ring->wptr;
+}
+
+static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring,
+ u64 start_wptr, u32 end_wptr)
+{
+ unsigned int first_idx = start_wptr & ring->buf_mask;
+ unsigned int last_idx = end_wptr & ring->buf_mask;
+ unsigned int i;
+
+ /* Backup the contents of the ring buffer. */
+ for (i = first_idx; i != last_idx; ++i, i &= ring->buf_mask)
+ ring->ring_backup[ring->ring_backup_entries_to_copy++] = ring->ring[i];
+}
+
+void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
+ struct amdgpu_fence *guilty_fence)
+{
+ struct dma_fence *unprocessed;
+ struct dma_fence __rcu **ptr;
+ struct amdgpu_fence *fence;
+ u64 wptr;
+ u32 seq, last_seq;
+
+ last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
+ seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
+ wptr = ring->fence_drv.signalled_wptr;
+ ring->ring_backup_entries_to_copy = 0;
+
+ do {
+ last_seq++;
+ last_seq &= ring->fence_drv.num_fences_mask;
+
+ ptr = &ring->fence_drv.fences[last_seq];
+ rcu_read_lock();
+ unprocessed = rcu_dereference(*ptr);
+
+ if (unprocessed && !dma_fence_is_signaled(unprocessed)) {
+ fence = container_of(unprocessed, struct amdgpu_fence, base);
+
+ /* save everything if the ring is not guilty, otherwise
+ * just save the content from other contexts.
+ */
+ if (!guilty_fence || (fence->context != guilty_fence->context))
+ amdgpu_ring_backup_unprocessed_command(ring, wptr,
+ fence->wptr);
+ wptr = fence->wptr;
+ }
+ rcu_read_unlock();
+ } while (last_seq != seq);
+}
+
/*
* Common fence implementation
*/
@@ -643,16 +804,7 @@ static const char *amdgpu_fence_get_driver_name(struct dma_fence *fence)
static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
{
- struct amdgpu_ring *ring;
-
- if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) {
- struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence);
-
- ring = to_amdgpu_ring(job->base.sched);
- } else {
- ring = to_amdgpu_fence(f)->ring;
- }
- return (const char *)ring->name;
+ return (const char *)to_amdgpu_fence(f)->ring->name;
}
/**
@@ -665,18 +817,8 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
*/
static bool amdgpu_fence_enable_signaling(struct dma_fence *f)
{
- struct amdgpu_ring *ring;
-
- if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) {
- struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence);
-
- ring = to_amdgpu_ring(job->base.sched);
- } else {
- ring = to_amdgpu_fence(f)->ring;
- }
-
- if (!timer_pending(&ring->fence_drv.fallback_timer))
- amdgpu_fence_schedule_fallback(ring);
+ if (!timer_pending(&to_amdgpu_fence(f)->ring->fence_drv.fallback_timer))
+ amdgpu_fence_schedule_fallback(to_amdgpu_fence(f)->ring);
return true;
}
@@ -692,19 +834,8 @@ static void amdgpu_fence_free(struct rcu_head *rcu)
{
struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
- if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) {
- /* free job if fence has a parent job */
- struct amdgpu_job *job;
-
- job = container_of(f, struct amdgpu_job, hw_fence);
- kfree(job);
- } else {
/* free fence_slab if it's separated fence*/
- struct amdgpu_fence *fence;
-
- fence = to_amdgpu_fence(f);
- kmem_cache_free(amdgpu_fence_slab, fence);
- }
+ kfree(to_amdgpu_fence(f));
}
/**
@@ -727,18 +858,18 @@ static const struct dma_fence_ops amdgpu_fence_ops = {
.release = amdgpu_fence_release,
};
-
/*
* Fence debugfs
*/
#if defined(CONFIG_DEBUG_FS)
static int amdgpu_debugfs_fence_info_show(struct seq_file *m, void *unused)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+ struct amdgpu_device *adev = m->private;
int i;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
+
if (!ring || !ring->fence_drv.initialized)
continue;
@@ -791,9 +922,11 @@ static int gpu_recover_get(void *data, u64 *val)
return 0;
}
- *val = amdgpu_device_gpu_recover(adev, NULL);
+ if (amdgpu_reset_domain_schedule(adev->reset_domain, &adev->reset_work))
+ flush_work(&adev->reset_work);
+
+ *val = atomic_read(&adev->reset_domain->reset_res);
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return 0;
@@ -803,6 +936,24 @@ DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_fence_info);
DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gpu_recover_fops, gpu_recover_get, NULL,
"%lld\n");
+static void amdgpu_debugfs_reset_work(struct work_struct *work)
+{
+ struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+ reset_work);
+
+ struct amdgpu_reset_context reset_context;
+
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ reset_context.src = AMDGPU_RESET_SRC_USER;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
+
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+}
+
#endif
void amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
@@ -814,9 +965,12 @@ void amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
debugfs_create_file("amdgpu_fence_info", 0444, root, adev,
&amdgpu_debugfs_fence_info_fops);
- if (!amdgpu_sriov_vf(adev))
+ if (!amdgpu_sriov_vf(adev)) {
+
+ INIT_WORK(&adev->reset_work, amdgpu_debugfs_reset_work);
debugfs_create_file("amdgpu_gpu_recover", 0444, root, adev,
&amdgpu_debugfs_gpu_recover_fops);
+ }
#endif
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
index 7709caeb233d..b0082aa7f3c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
@@ -29,10 +29,11 @@
#include "amdgpu_fru_eeprom.h"
#include "amdgpu_eeprom.h"
-#define FRU_EEPROM_MADDR 0x60000
-#define I2C_PRODUCT_INFO_OFFSET 0xC0
+#define FRU_EEPROM_MADDR_6 0x60000
+#define FRU_EEPROM_MADDR_8 0x80000
+#define FRU_EEPROM_MADDR_INV 0xFFFFF
-static bool is_fru_eeprom_supported(struct amdgpu_device *adev)
+static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr)
{
/* Only server cards have the FRU EEPROM
* TODO: See if we can figure this out dynamically instead of
@@ -40,152 +41,368 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev)
*/
struct atom_context *atom_ctx = adev->mode_info.atom_context;
- /* VBIOS is of the format ###-DXXXYY-##. For SKU identification,
+ /* The i2c access is blocked on VF
+ * TODO: Need other way to get the info
+ * Also, FRU not valid for APU devices.
+ */
+ if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU))
+ return false;
+
+ /* The default I2C EEPROM address of the FRU.
+ */
+ if (fru_addr)
+ *fru_addr = FRU_EEPROM_MADDR_8;
+
+ /* VBIOS is of the format ###-DXXXYYYY-##. For SKU identification,
* we can use just the "DXXX" portion. If there were more models, we
* could convert the 3 characters to a hex integer and use a switch
* for ease/speed/readability. For now, 2 string comparisons are
* reasonable and not too expensive
*/
- switch (adev->asic_type) {
- case CHIP_VEGA20:
- /* D161 and D163 are the VG20 server SKUs */
- if (strnstr(atom_ctx->vbios_version, "D161",
- sizeof(atom_ctx->vbios_version)) ||
- strnstr(atom_ctx->vbios_version, "D163",
- sizeof(atom_ctx->vbios_version)))
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+ case IP_VERSION(11, 0, 2):
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ /* D161 and D163 are the VG20 server SKUs */
+ if (atom_ctx && (strnstr(atom_ctx->vbios_pn, "D161",
+ sizeof(atom_ctx->vbios_pn)) ||
+ strnstr(atom_ctx->vbios_pn, "D163",
+ sizeof(atom_ctx->vbios_pn)))) {
+ if (fru_addr)
+ *fru_addr = FRU_EEPROM_MADDR_6;
+ return true;
+ } else {
+ return false;
+ }
+ case CHIP_ARCTURUS:
+ default:
+ return false;
+ }
+ case IP_VERSION(11, 0, 7):
+ if (atom_ctx && strnstr(atom_ctx->vbios_pn, "D603",
+ sizeof(atom_ctx->vbios_pn))) {
+ if (strnstr(atom_ctx->vbios_pn, "D603GLXE",
+ sizeof(atom_ctx->vbios_pn))) {
+ return false;
+ }
+
+ if (fru_addr)
+ *fru_addr = FRU_EEPROM_MADDR_6;
return true;
- else
+
+ } else {
return false;
+ }
+ case IP_VERSION(13, 0, 2):
+ /* All Aldebaran SKUs have an FRU */
+ if (atom_ctx && !strnstr(atom_ctx->vbios_pn, "D673",
+ sizeof(atom_ctx->vbios_pn)))
+ if (fru_addr)
+ *fru_addr = FRU_EEPROM_MADDR_6;
+ return true;
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 14):
+ if (fru_addr)
+ *fru_addr = FRU_EEPROM_MADDR_8;
+ return true;
+ case IP_VERSION(13, 0, 12):
+ if (fru_addr)
+ *fru_addr = FRU_EEPROM_MADDR_INV;
+ return true;
default:
return false;
}
}
-static int amdgpu_fru_read_eeprom(struct amdgpu_device *adev, uint32_t addrptr,
- unsigned char *buff)
+int amdgpu_fru_get_product_info(struct amdgpu_device *adev)
{
- int ret, size;
+ struct amdgpu_fru_info *fru_info;
+ unsigned char buf[8], *pia;
+ u32 addr, fru_addr;
+ int size, len;
+ u8 csum;
- ret = amdgpu_eeprom_read(&adev->pm.smu_i2c, addrptr, buff, 1);
- if (ret < 1) {
- DRM_WARN("FRU: Failed to get size field");
- return ret;
- }
+ if (!is_fru_eeprom_supported(adev, &fru_addr))
+ return 0;
- /* The size returned by the i2c requires subtraction of 0xC0 since the
- * size apparently always reports as 0xC0+actual size.
- */
- size = buff[0] - I2C_PRODUCT_INFO_OFFSET;
+ /* FRU data avaialble, but no direct EEPROM access */
+ if (fru_addr == FRU_EEPROM_MADDR_INV)
+ return 0;
- ret = amdgpu_eeprom_read(&adev->pm.smu_i2c, addrptr + 1, buff, size);
- if (ret < 1) {
- DRM_WARN("FRU: Failed to get data field");
- return ret;
+ if (!adev->fru_info) {
+ adev->fru_info = kzalloc(sizeof(*adev->fru_info), GFP_KERNEL);
+ if (!adev->fru_info)
+ return -ENOMEM;
}
- return size;
-}
-
-int amdgpu_fru_get_product_info(struct amdgpu_device *adev)
-{
- unsigned char buff[34];
- u32 addrptr;
- int size, len;
-
- if (!is_fru_eeprom_supported(adev))
- return 0;
+ fru_info = adev->fru_info;
+ /* For Arcturus-and-later, default value of serial_number is unique_id
+ * so convert it to a 16-digit HEX string for convenience and
+ * backwards-compatibility.
+ */
+ sprintf(fru_info->serial, "%llx", adev->unique_id);
/* If algo exists, it means that the i2c_adapter's initialized */
- if (!adev->pm.smu_i2c.algo) {
- DRM_WARN("Cannot access FRU, EEPROM accessor not initialized");
+ if (!adev->pm.fru_eeprom_i2c_bus || !adev->pm.fru_eeprom_i2c_bus->algo) {
+ dev_warn(adev->dev,
+ "Cannot access FRU, EEPROM accessor not initialized");
return -ENODEV;
}
- /* There's a lot of repetition here. This is due to the FRU having
- * variable-length fields. To get the information, we have to find the
- * size of each field, and then keep reading along and reading along
- * until we get all of the data that we want. We use addrptr to track
- * the address as we go
- */
-
- /* The first fields are all of size 1-byte, from 0-7 are offsets that
- * contain information that isn't useful to us.
- * Bytes 8-a are all 1-byte and refer to the size of the entire struct,
- * and the language field, so just start from 0xb, manufacturer size
- */
- addrptr = FRU_EEPROM_MADDR + 0xb;
- size = amdgpu_fru_read_eeprom(adev, addrptr, buff);
- if (size < 1) {
- DRM_ERROR("Failed to read FRU Manufacturer, ret:%d", size);
- return -EINVAL;
+ /* Read the IPMI Common header */
+ len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, fru_addr, buf,
+ sizeof(buf));
+ if (len != 8) {
+ dev_err(adev->dev, "Couldn't read the IPMI Common Header: %d",
+ len);
+ return len < 0 ? len : -EIO;
}
- /* Increment the addrptr by the size of the field, and 1 due to the
- * size field being 1 byte. This pattern continues below.
- */
- addrptr += size + 1;
- size = amdgpu_fru_read_eeprom(adev, addrptr, buff);
- if (size < 1) {
- DRM_ERROR("Failed to read FRU product name, ret:%d", size);
- return -EINVAL;
+ if (buf[0] != 1) {
+ dev_err(adev->dev, "Bad IPMI Common Header version: 0x%02x",
+ buf[0]);
+ return -EIO;
}
- len = size;
- /* Product name should only be 32 characters. Any more,
- * and something could be wrong. Cap it at 32 to be safe
- */
- if (len >= sizeof(adev->product_name)) {
- DRM_WARN("FRU Product Number is larger than 32 characters. This is likely a mistake");
- len = sizeof(adev->product_name) - 1;
+ for (csum = 0; len > 0; len--)
+ csum += buf[len - 1];
+ if (csum) {
+ dev_err(adev->dev, "Bad IPMI Common Header checksum: 0x%02x",
+ csum);
+ return -EIO;
}
- /* Start at 2 due to buff using fields 0 and 1 for the address */
- memcpy(adev->product_name, &buff[2], len);
- adev->product_name[len] = '\0';
-
- addrptr += size + 1;
- size = amdgpu_fru_read_eeprom(adev, addrptr, buff);
- if (size < 1) {
- DRM_ERROR("Failed to read FRU product number, ret:%d", size);
- return -EINVAL;
+
+ /* Get the offset to the Product Info Area (PIA). */
+ addr = buf[4] * 8;
+ if (!addr)
+ return 0;
+
+ /* Get the absolute address to the PIA. */
+ addr += fru_addr;
+
+ /* Read the header of the PIA. */
+ len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addr, buf, 3);
+ if (len != 3) {
+ dev_err(adev->dev,
+ "Couldn't read the Product Info Area header: %d", len);
+ return len < 0 ? len : -EIO;
}
- len = size;
- /* Product number should only be 16 characters. Any more,
- * and something could be wrong. Cap it at 16 to be safe
- */
- if (len >= sizeof(adev->product_number)) {
- DRM_WARN("FRU Product Number is larger than 16 characters. This is likely a mistake");
- len = sizeof(adev->product_number) - 1;
+ if (buf[0] != 1) {
+ dev_err(adev->dev, "Bad IPMI Product Info Area version: 0x%02x",
+ buf[0]);
+ return -EIO;
}
- memcpy(adev->product_number, &buff[2], len);
- adev->product_number[len] = '\0';
- addrptr += size + 1;
- size = amdgpu_fru_read_eeprom(adev, addrptr, buff);
+ size = buf[1] * 8;
+ pia = kzalloc(size, GFP_KERNEL);
+ if (!pia)
+ return -ENOMEM;
- if (size < 1) {
- DRM_ERROR("Failed to read FRU product version, ret:%d", size);
- return -EINVAL;
+ /* Read the whole PIA. */
+ len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addr, pia, size);
+ if (len != size) {
+ kfree(pia);
+ dev_err(adev->dev, "Couldn't read the Product Info Area: %d",
+ len);
+ return len < 0 ? len : -EIO;
}
- addrptr += size + 1;
- size = amdgpu_fru_read_eeprom(adev, addrptr, buff);
-
- if (size < 1) {
- DRM_ERROR("Failed to read FRU serial number, ret:%d", size);
- return -EINVAL;
+ for (csum = 0; size > 0; size--)
+ csum += pia[size - 1];
+ if (csum) {
+ dev_err(adev->dev, "Bad Product Info Area checksum: 0x%02x",
+ csum);
+ kfree(pia);
+ return -EIO;
}
- len = size;
- /* Serial number should only be 16 characters. Any more,
- * and something could be wrong. Cap it at 16 to be safe
+ /* Now extract useful information from the PIA.
+ *
+ * Read Manufacturer Name field whose length is [3].
*/
- if (len >= sizeof(adev->serial)) {
- DRM_WARN("FRU Serial Number is larger than 16 characters. This is likely a mistake");
- len = sizeof(adev->serial) - 1;
- }
- memcpy(adev->serial, &buff[2], len);
- adev->serial[len] = '\0';
+ addr = 3;
+ if (addr + 1 >= len)
+ goto Out;
+ memcpy(fru_info->manufacturer_name, pia + addr + 1,
+ min_t(size_t, sizeof(fru_info->manufacturer_name),
+ pia[addr] & 0x3F));
+ fru_info->manufacturer_name[sizeof(fru_info->manufacturer_name) - 1] =
+ '\0';
+
+ /* Read Product Name field. */
+ addr += 1 + (pia[addr] & 0x3F);
+ if (addr + 1 >= len)
+ goto Out;
+ memcpy(fru_info->product_name, pia + addr + 1,
+ min_t(size_t, sizeof(fru_info->product_name), pia[addr] & 0x3F));
+ fru_info->product_name[sizeof(fru_info->product_name) - 1] = '\0';
+
+ /* Go to the Product Part/Model Number field. */
+ addr += 1 + (pia[addr] & 0x3F);
+ if (addr + 1 >= len)
+ goto Out;
+ memcpy(fru_info->product_number, pia + addr + 1,
+ min_t(size_t, sizeof(fru_info->product_number),
+ pia[addr] & 0x3F));
+ fru_info->product_number[sizeof(fru_info->product_number) - 1] = '\0';
+
+ /* Go to the Product Version field. */
+ addr += 1 + (pia[addr] & 0x3F);
+
+ /* Go to the Product Serial Number field. */
+ addr += 1 + (pia[addr] & 0x3F);
+ if (addr + 1 >= len)
+ goto Out;
+ memcpy(fru_info->serial, pia + addr + 1,
+ min_t(size_t, sizeof(fru_info->serial), pia[addr] & 0x3F));
+ fru_info->serial[sizeof(fru_info->serial) - 1] = '\0';
+
+ /* Asset Tag field */
+ addr += 1 + (pia[addr] & 0x3F);
+ /* FRU File Id field. This could be 'null'. */
+ addr += 1 + (pia[addr] & 0x3F);
+ if ((addr + 1 >= len) || !(pia[addr] & 0x3F))
+ goto Out;
+ memcpy(fru_info->fru_id, pia + addr + 1,
+ min_t(size_t, sizeof(fru_info->fru_id), pia[addr] & 0x3F));
+ fru_info->fru_id[sizeof(fru_info->fru_id) - 1] = '\0';
+
+Out:
+ kfree(pia);
return 0;
}
+
+/**
+ * DOC: product_name
+ *
+ * The amdgpu driver provides a sysfs API for reporting the product name
+ * for the device
+ * The file product_name is used for this and returns the product name
+ * as returned from the FRU.
+ * NOTE: This is only available for certain server cards
+ */
+
+static ssize_t amdgpu_fru_product_name_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ return sysfs_emit(buf, "%s\n", adev->fru_info->product_name);
+}
+
+static DEVICE_ATTR(product_name, 0444, amdgpu_fru_product_name_show, NULL);
+
+/**
+ * DOC: product_number
+ *
+ * The amdgpu driver provides a sysfs API for reporting the part number
+ * for the device
+ * The file product_number is used for this and returns the part number
+ * as returned from the FRU.
+ * NOTE: This is only available for certain server cards
+ */
+
+static ssize_t amdgpu_fru_product_number_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ return sysfs_emit(buf, "%s\n", adev->fru_info->product_number);
+}
+
+static DEVICE_ATTR(product_number, 0444, amdgpu_fru_product_number_show, NULL);
+
+/**
+ * DOC: serial_number
+ *
+ * The amdgpu driver provides a sysfs API for reporting the serial number
+ * for the device
+ * The file serial_number is used for this and returns the serial number
+ * as returned from the FRU.
+ * NOTE: This is only available for certain server cards
+ */
+
+static ssize_t amdgpu_fru_serial_number_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ return sysfs_emit(buf, "%s\n", adev->fru_info->serial);
+}
+
+static DEVICE_ATTR(serial_number, 0444, amdgpu_fru_serial_number_show, NULL);
+
+/**
+ * DOC: fru_id
+ *
+ * The amdgpu driver provides a sysfs API for reporting FRU File Id
+ * for the device.
+ * The file fru_id is used for this and returns the File Id value
+ * as returned from the FRU.
+ * NOTE: This is only available for certain server cards
+ */
+
+static ssize_t amdgpu_fru_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ return sysfs_emit(buf, "%s\n", adev->fru_info->fru_id);
+}
+
+static DEVICE_ATTR(fru_id, 0444, amdgpu_fru_id_show, NULL);
+
+/**
+ * DOC: manufacturer
+ *
+ * The amdgpu driver provides a sysfs API for reporting manufacturer name from
+ * FRU information.
+ * The file manufacturer returns the value as returned from the FRU.
+ * NOTE: This is only available for certain server cards
+ */
+
+static ssize_t amdgpu_fru_manufacturer_name_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ return sysfs_emit(buf, "%s\n", adev->fru_info->manufacturer_name);
+}
+
+static DEVICE_ATTR(manufacturer, 0444, amdgpu_fru_manufacturer_name_show, NULL);
+
+static const struct attribute *amdgpu_fru_attributes[] = {
+ &dev_attr_product_name.attr,
+ &dev_attr_product_number.attr,
+ &dev_attr_serial_number.attr,
+ &dev_attr_fru_id.attr,
+ &dev_attr_manufacturer.attr,
+ NULL
+};
+
+int amdgpu_fru_sysfs_init(struct amdgpu_device *adev)
+{
+ if (!is_fru_eeprom_supported(adev, NULL) || !adev->fru_info)
+ return 0;
+
+ return sysfs_create_files(&adev->dev->kobj, amdgpu_fru_attributes);
+}
+
+void amdgpu_fru_sysfs_fini(struct amdgpu_device *adev)
+{
+ if (!adev->fru_info)
+ return;
+
+ sysfs_remove_files(&adev->dev->kobj, amdgpu_fru_attributes);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h
index 1308d976d60e..98f3196599ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h
@@ -24,6 +24,19 @@
#ifndef __AMDGPU_FRU_EEPROM_H__
#define __AMDGPU_FRU_EEPROM_H__
+#define AMDGPU_PRODUCT_NAME_LEN 64
+
+/* FRU product information */
+struct amdgpu_fru_info {
+ char product_number[20];
+ char product_name[AMDGPU_PRODUCT_NAME_LEN];
+ char serial[20];
+ char manufacturer_name[32];
+ char fru_id[50];
+};
+
int amdgpu_fru_get_product_info(struct amdgpu_device *adev);
+int amdgpu_fru_sysfs_init(struct amdgpu_device *adev);
+void amdgpu_fru_sysfs_fini(struct amdgpu_device *adev);
#endif // __AMDGPU_FRU_EEPROM_H__
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
index 2ca3c329de6d..328a1b963548 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
@@ -32,17 +32,15 @@
#include "soc15_common.h"
#define FW_ATTESTATION_DB_COOKIE 0x143b6a37
-#define FW_ATTESTATION_RECORD_VALID 1
+#define FW_ATTESTATION_RECORD_VALID 1
#define FW_ATTESTATION_MAX_SIZE 4096
-typedef struct FW_ATT_DB_HEADER
-{
+struct FW_ATT_DB_HEADER {
uint32_t AttDbVersion; /* version of the fwar feature */
uint32_t AttDbCookie; /* cookie as an extra check for corrupt data */
-} FW_ATT_DB_HEADER;
+};
-typedef struct FW_ATT_RECORD
-{
+struct FW_ATT_RECORD {
uint16_t AttFwIdV1; /* Legacy FW Type field */
uint16_t AttFwIdV2; /* V2 FW ID field */
uint32_t AttFWVersion; /* FW Version */
@@ -50,7 +48,7 @@ typedef struct FW_ATT_RECORD
uint8_t AttSource; /* FW source indicator */
uint8_t RecordValid; /* Indicates whether the record is a valid entry */
uint32_t AttFwTaId; /* Ta ID (only in TA Attestation Table) */
-} FW_ATT_RECORD;
+};
static ssize_t amdgpu_fw_attestation_debugfs_read(struct file *f,
char __user *buf,
@@ -60,15 +58,15 @@ static ssize_t amdgpu_fw_attestation_debugfs_read(struct file *f,
struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
uint64_t records_addr = 0;
uint64_t vram_pos = 0;
- FW_ATT_DB_HEADER fw_att_hdr = {0};
- FW_ATT_RECORD fw_att_record = {0};
+ struct FW_ATT_DB_HEADER fw_att_hdr = {0};
+ struct FW_ATT_RECORD fw_att_record = {0};
- if (size < sizeof(FW_ATT_RECORD)) {
+ if (size < sizeof(struct FW_ATT_RECORD)) {
DRM_WARN("FW attestation input buffer not enough memory");
return -EINVAL;
}
- if ((*pos + sizeof(FW_ATT_DB_HEADER)) >= FW_ATTESTATION_MAX_SIZE) {
+ if ((*pos + sizeof(struct FW_ATT_DB_HEADER)) >= FW_ATTESTATION_MAX_SIZE) {
DRM_WARN("FW attestation out of bounds");
return 0;
}
@@ -83,8 +81,8 @@ static ssize_t amdgpu_fw_attestation_debugfs_read(struct file *f,
if (*pos == 0) {
amdgpu_device_vram_access(adev,
vram_pos,
- (uint32_t*)&fw_att_hdr,
- sizeof(FW_ATT_DB_HEADER),
+ (uint32_t *)&fw_att_hdr,
+ sizeof(struct FW_ATT_DB_HEADER),
false);
if (fw_att_hdr.AttDbCookie != FW_ATTESTATION_DB_COOKIE) {
@@ -96,20 +94,20 @@ static ssize_t amdgpu_fw_attestation_debugfs_read(struct file *f,
}
amdgpu_device_vram_access(adev,
- vram_pos + sizeof(FW_ATT_DB_HEADER) + *pos,
- (uint32_t*)&fw_att_record,
- sizeof(FW_ATT_RECORD),
+ vram_pos + sizeof(struct FW_ATT_DB_HEADER) + *pos,
+ (uint32_t *)&fw_att_record,
+ sizeof(struct FW_ATT_RECORD),
false);
if (fw_att_record.RecordValid != FW_ATTESTATION_RECORD_VALID)
return 0;
- if (copy_to_user(buf, (void*)&fw_att_record, sizeof(FW_ATT_RECORD)))
+ if (copy_to_user(buf, (void *)&fw_att_record, sizeof(struct FW_ATT_RECORD)))
return -EINVAL;
- *pos += sizeof(FW_ATT_RECORD);
+ *pos += sizeof(struct FW_ATT_RECORD);
- return sizeof(FW_ATT_RECORD);
+ return sizeof(struct FW_ATT_RECORD);
}
static const struct file_operations amdgpu_fw_attestation_debugfs_ops = {
@@ -124,6 +122,10 @@ static int amdgpu_is_fw_attestation_supported(struct amdgpu_device *adev)
if (adev->flags & AMD_IS_APU)
return 0;
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(14, 0, 2) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(14, 0, 3))
+ return 0;
+
if (adev->asic_type >= CHIP_SIENNA_CICHLID)
return 1;
@@ -136,7 +138,7 @@ void amdgpu_fw_attestation_debugfs_init(struct amdgpu_device *adev)
return;
debugfs_create_file("amdgpu_fw_attestation",
- S_IRUSR,
+ 0400,
adev_to_drm(adev)->primary->debugfs_root,
adev,
&amdgpu_fw_attestation_debugfs_ops);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index d3e4203f6217..d2237ce9da70 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -34,7 +34,9 @@
#include <asm/set_memory.h>
#endif
#include "amdgpu.h"
+#include "amdgpu_reset.h"
#include <drm/drm_drv.h>
+#include <drm/ttm/ttm_tt.h>
/*
* GART
@@ -76,8 +78,9 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
if (adev->dummy_page_addr)
return 0;
- adev->dummy_page_addr = dma_map_page(&adev->pdev->dev, dummy_page, 0,
- PAGE_SIZE, DMA_BIDIRECTIONAL);
+ adev->dummy_page_addr = dma_map_page_attrs(&adev->pdev->dev, dummy_page, 0,
+ PAGE_SIZE, DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
if (dma_mapping_error(&adev->pdev->dev, adev->dummy_page_addr)) {
dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n");
adev->dummy_page_addr = 0;
@@ -97,97 +100,174 @@ void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
{
if (!adev->dummy_page_addr)
return;
- dma_unmap_page(&adev->pdev->dev, adev->dummy_page_addr, PAGE_SIZE,
- DMA_BIDIRECTIONAL);
+ dma_unmap_page_attrs(&adev->pdev->dev, adev->dummy_page_addr, PAGE_SIZE,
+ DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
adev->dummy_page_addr = 0;
}
/**
- * amdgpu_gart_table_vram_alloc - allocate vram for gart page table
+ * amdgpu_gart_table_ram_alloc - allocate system ram for gart page table
*
* @adev: amdgpu_device pointer
*
- * Allocate video memory for GART page table
- * (pcie r4xx, r5xx+). These asics require the
- * gart table to be in video memory.
+ * Allocate system memory for GART page table for ASICs that don't have
+ * dedicated VRAM.
* Returns 0 for success, error for failure.
*/
-int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
+int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev)
{
- int r;
+ unsigned int order = get_order(adev->gart.table_size);
+ gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO;
+ struct amdgpu_bo *bo = NULL;
+ struct sg_table *sg = NULL;
+ struct amdgpu_bo_param bp;
+ dma_addr_t dma_addr;
+ struct page *p;
+ unsigned long x;
+ int ret;
+
+ if (adev->gart.bo != NULL)
+ return 0;
- if (adev->gart.bo == NULL) {
- struct amdgpu_bo_param bp;
-
- memset(&bp, 0, sizeof(bp));
- bp.size = adev->gart.table_size;
- bp.byte_align = PAGE_SIZE;
- bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
- bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
- bp.type = ttm_bo_type_kernel;
- bp.resv = NULL;
- bp.bo_ptr_size = sizeof(struct amdgpu_bo);
-
- r = amdgpu_bo_create(adev, &bp, &adev->gart.bo);
- if (r) {
- return r;
- }
+ p = alloc_pages(gfp_flags, order);
+ if (!p)
+ return -ENOMEM;
+
+ /* assign pages to this device */
+ for (x = 0; x < (1UL << order); x++)
+ p[x].mapping = adev->mman.bdev.dev_mapping;
+
+ /* If the hardware does not support UTCL2 snooping of the CPU caches
+ * then set_memory_wc() could be used as a workaround to mark the pages
+ * as write combine memory.
+ */
+ dma_addr = dma_map_page(&adev->pdev->dev, p, 0, adev->gart.table_size,
+ DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(&adev->pdev->dev, dma_addr)) {
+ dev_err(&adev->pdev->dev, "Failed to DMA MAP the GART BO page\n");
+ __free_pages(p, order);
+ p = NULL;
+ return -EFAULT;
+ }
+
+ dev_info(adev->dev, "%s dma_addr:%pad\n", __func__, &dma_addr);
+ /* Create SG table */
+ sg = kmalloc(sizeof(*sg), GFP_KERNEL);
+ if (!sg) {
+ ret = -ENOMEM;
+ goto error;
}
+ ret = sg_alloc_table(sg, 1, GFP_KERNEL);
+ if (ret)
+ goto error;
+
+ sg_dma_address(sg->sgl) = dma_addr;
+ sg->sgl->length = adev->gart.table_size;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+ sg->sgl->dma_length = adev->gart.table_size;
+#endif
+ /* Create SG BO */
+ memset(&bp, 0, sizeof(bp));
+ bp.size = adev->gart.table_size;
+ bp.byte_align = PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_CPU;
+ bp.type = ttm_bo_type_sg;
+ bp.resv = NULL;
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+ bp.flags = 0;
+ ret = amdgpu_bo_create(adev, &bp, &bo);
+ if (ret)
+ goto error;
+
+ bo->tbo.sg = sg;
+ bo->tbo.ttm->sg = sg;
+ bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+ bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+
+ ret = amdgpu_bo_reserve(bo, true);
+ if (ret) {
+ dev_err(adev->dev, "(%d) failed to reserve bo for GART system bo\n", ret);
+ goto error;
+ }
+
+ ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+ WARN(ret, "Pinning the GART table failed");
+ if (ret)
+ goto error_resv;
+
+ adev->gart.bo = bo;
+ adev->gart.ptr = page_to_virt(p);
+ /* Make GART table accessible in VMID0 */
+ ret = amdgpu_ttm_alloc_gart(&adev->gart.bo->tbo);
+ if (ret)
+ amdgpu_gart_table_ram_free(adev);
+ amdgpu_bo_unreserve(bo);
+
return 0;
+
+error_resv:
+ amdgpu_bo_unreserve(bo);
+error:
+ amdgpu_bo_unref(&bo);
+ if (sg) {
+ sg_free_table(sg);
+ kfree(sg);
+ }
+ __free_pages(p, order);
+ return ret;
}
/**
- * amdgpu_gart_table_vram_pin - pin gart page table in vram
+ * amdgpu_gart_table_ram_free - free gart page table system ram
*
* @adev: amdgpu_device pointer
*
- * Pin the GART page table in vram so it will not be moved
- * by the memory manager (pcie r4xx, r5xx+). These asics require the
- * gart table to be in video memory.
- * Returns 0 for success, error for failure.
+ * Free the system memory used for the GART page tableon ASICs that don't
+ * have dedicated VRAM.
*/
-int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev)
+void amdgpu_gart_table_ram_free(struct amdgpu_device *adev)
{
- int r;
-
- r = amdgpu_bo_reserve(adev->gart.bo, false);
- if (unlikely(r != 0))
- return r;
- r = amdgpu_bo_pin(adev->gart.bo, AMDGPU_GEM_DOMAIN_VRAM);
- if (r) {
+ unsigned int order = get_order(adev->gart.table_size);
+ struct sg_table *sg = adev->gart.bo->tbo.sg;
+ struct page *p;
+ unsigned long x;
+ int ret;
+
+ ret = amdgpu_bo_reserve(adev->gart.bo, false);
+ if (!ret) {
+ amdgpu_bo_unpin(adev->gart.bo);
amdgpu_bo_unreserve(adev->gart.bo);
- return r;
}
- r = amdgpu_bo_kmap(adev->gart.bo, &adev->gart.ptr);
- if (r)
- amdgpu_bo_unpin(adev->gart.bo);
- amdgpu_bo_unreserve(adev->gart.bo);
- return r;
+ amdgpu_bo_unref(&adev->gart.bo);
+ sg_free_table(sg);
+ kfree(sg);
+ p = virt_to_page(adev->gart.ptr);
+ for (x = 0; x < (1UL << order); x++)
+ p[x].mapping = NULL;
+ __free_pages(p, order);
+
+ adev->gart.ptr = NULL;
}
/**
- * amdgpu_gart_table_vram_unpin - unpin gart page table in vram
+ * amdgpu_gart_table_vram_alloc - allocate vram for gart page table
*
* @adev: amdgpu_device pointer
*
- * Unpin the GART page table in vram (pcie r4xx, r5xx+).
- * These asics require the gart table to be in video memory.
+ * Allocate video memory for GART page table
+ * (pcie r4xx, r5xx+). These asics require the
+ * gart table to be in video memory.
+ * Returns 0 for success, error for failure.
*/
-void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev)
+int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
{
- int r;
+ if (adev->gart.bo != NULL)
+ return 0;
- if (adev->gart.bo == NULL) {
- return;
- }
- r = amdgpu_bo_reserve(adev->gart.bo, true);
- if (likely(r == 0)) {
- amdgpu_bo_kunmap(adev->gart.bo);
- amdgpu_bo_unpin(adev->gart.bo);
- amdgpu_bo_unreserve(adev->gart.bo);
- adev->gart.ptr = NULL;
- }
+ return amdgpu_bo_create_kernel(adev, adev->gart.table_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM, &adev->gart.bo,
+ NULL, (void *)&adev->gart.ptr);
}
/**
@@ -201,11 +281,7 @@ void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev)
*/
void amdgpu_gart_table_vram_free(struct amdgpu_device *adev)
{
- if (adev->gart.bo == NULL) {
- return;
- }
- amdgpu_bo_unref(&adev->gart.bo);
- adev->gart.ptr = NULL;
+ amdgpu_bo_free_kernel(&adev->gart.bo, NULL, (void *)&adev->gart.ptr);
}
/*
@@ -222,28 +298,24 @@ void amdgpu_gart_table_vram_free(struct amdgpu_device *adev)
* replaces them with the dummy page (all asics).
* Returns 0 for success, -EINVAL for failure.
*/
-int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
+void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
int pages)
{
unsigned t;
- unsigned p;
int i, j;
u64 page_base;
/* Starting from VEGA10, system bit must be 0 to mean invalid. */
uint64_t flags = 0;
int idx;
- if (!adev->gart.ready) {
- WARN(1, "trying to unbind memory from uninitialized GART !\n");
- return -EINVAL;
- }
+ if (!adev->gart.ptr)
+ return;
if (!drm_dev_enter(adev_to_drm(adev), &idx))
- return 0;
+ return;
t = offset / AMDGPU_GPU_PAGE_SIZE;
- p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
- for (i = 0; i < pages; i++, p++) {
+ for (i = 0; i < pages; i++) {
page_base = adev->dummy_page_addr;
if (!adev->gart.ptr)
continue;
@@ -254,13 +326,9 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
page_base += AMDGPU_GPU_PAGE_SIZE;
}
}
- mb();
- amdgpu_device_flush_hdp(adev, NULL);
- for (i = 0; i < adev->num_vmhubs; i++)
- amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
+ amdgpu_gart_invalidate_tlb(adev);
drm_dev_exit(idx);
- return 0;
}
/**
@@ -276,7 +344,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
* Map the dma_addresses into GART entries (all asics).
* Returns 0 for success, -EINVAL for failure.
*/
-int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
+void amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
int pages, dma_addr_t *dma_addr, uint64_t flags,
void *dst)
{
@@ -284,13 +352,8 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
unsigned i, j, t;
int idx;
- if (!adev->gart.ready) {
- WARN(1, "trying to bind memory to uninitialized GART !\n");
- return -EINVAL;
- }
-
if (!drm_dev_enter(adev_to_drm(adev), &idx))
- return 0;
+ return;
t = offset / AMDGPU_GPU_PAGE_SIZE;
@@ -302,7 +365,42 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
}
}
drm_dev_exit(idx);
- return 0;
+}
+
+/**
+ * amdgpu_gart_map_vram_range - map VRAM pages into the GART page table
+ *
+ * @adev: amdgpu_device pointer
+ * @pa: physical address of the first page to be mapped
+ * @start_page: first page to map in the GART aperture
+ * @num_pages: number of pages to be mapped
+ * @flags: page table entry flags
+ * @dst: CPU address of the GART table
+ *
+ * Binds a BO that is allocated in VRAM to the GART page table
+ * (all ASICs).
+ *
+ * Useful when a kernel BO is located in VRAM but
+ * needs to be accessed from the GART address space.
+ */
+void amdgpu_gart_map_vram_range(struct amdgpu_device *adev, uint64_t pa,
+ uint64_t start_page, uint64_t num_pages,
+ uint64_t flags, void *dst)
+{
+ u32 i, idx;
+
+ /* The SYSTEM flag indicates the pages aren't in VRAM. */
+ WARN_ON_ONCE(flags & AMDGPU_PTE_SYSTEM);
+
+ if (!drm_dev_enter(adev_to_drm(adev), &idx))
+ return;
+
+ for (i = 0; i < num_pages; ++i) {
+ amdgpu_gmc_set_pte_pde(adev, adev->gart.ptr,
+ start_page + i, pa + AMDGPU_GPU_PAGE_SIZE * i, flags);
+ }
+
+ drm_dev_exit(idx);
}
/**
@@ -318,20 +416,14 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
* (all asics).
* Returns 0 for success, -EINVAL for failure.
*/
-int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
+void amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
int pages, dma_addr_t *dma_addr,
uint64_t flags)
{
- if (!adev->gart.ready) {
- WARN(1, "trying to bind memory to uninitialized GART !\n");
- return -EINVAL;
- }
-
if (!adev->gart.ptr)
- return 0;
+ return;
- return amdgpu_gart_map(adev, offset, pages, dma_addr, flags,
- adev->gart.ptr);
+ amdgpu_gart_map(adev, offset, pages, dma_addr, flags, adev->gart.ptr);
}
/**
@@ -346,9 +438,15 @@ void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev)
{
int i;
+ if (!adev->gart.ptr)
+ return;
+
mb();
- amdgpu_device_flush_hdp(adev, NULL);
- for (i = 0; i < adev->num_vmhubs; i++)
+ if (down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_device_flush_hdp(adev, NULL);
+ up_read(&adev->reset_domain->sem);
+ }
+ for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
index 78895413cf9f..d3118275ddae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -46,24 +46,26 @@ struct amdgpu_gart {
unsigned num_gpu_pages;
unsigned num_cpu_pages;
unsigned table_size;
- bool ready;
/* Asic default pte flags */
uint64_t gart_pte_flags;
};
+int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev);
+void amdgpu_gart_table_ram_free(struct amdgpu_device *adev);
int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev);
void amdgpu_gart_table_vram_free(struct amdgpu_device *adev);
-int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
-void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
int amdgpu_gart_init(struct amdgpu_device *adev);
void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev);
-int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
- int pages);
-int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
- int pages, dma_addr_t *dma_addr, uint64_t flags,
- void *dst);
-int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
- int pages, dma_addr_t *dma_addr, uint64_t flags);
+void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
+ int pages);
+void amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
+ int pages, dma_addr_t *dma_addr, uint64_t flags,
+ void *dst);
+void amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
+ int pages, dma_addr_t *dma_addr, uint64_t flags);
+void amdgpu_gart_map_vram_range(struct amdgpu_device *adev, uint64_t pa,
+ uint64_t start_page, uint64_t num_pages,
+ uint64_t flags, void *dst);
void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index a1e63ba4c54a..3e38c5db2987 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -33,14 +33,125 @@
#include <drm/amdgpu_drm.h>
#include <drm/drm_drv.h>
+#include <drm/drm_exec.h>
#include <drm/drm_gem_ttm_helper.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/drm_syncobj.h>
#include "amdgpu.h"
#include "amdgpu_display.h"
#include "amdgpu_dma_buf.h"
+#include "amdgpu_hmm.h"
#include "amdgpu_xgmi.h"
+#include "amdgpu_vm.h"
-static const struct drm_gem_object_funcs amdgpu_gem_object_funcs;
+static int
+amdgpu_gem_add_input_fence(struct drm_file *filp,
+ uint64_t syncobj_handles_array,
+ uint32_t num_syncobj_handles)
+{
+ struct dma_fence *fence;
+ uint32_t *syncobj_handles;
+ int ret, i;
+
+ if (!num_syncobj_handles)
+ return 0;
+
+ syncobj_handles = memdup_user(u64_to_user_ptr(syncobj_handles_array),
+ size_mul(sizeof(uint32_t), num_syncobj_handles));
+ if (IS_ERR(syncobj_handles))
+ return PTR_ERR(syncobj_handles);
+
+ for (i = 0; i < num_syncobj_handles; i++) {
+
+ if (!syncobj_handles[i]) {
+ ret = -EINVAL;
+ goto free_memdup;
+ }
+
+ ret = drm_syncobj_find_fence(filp, syncobj_handles[i], 0, 0, &fence);
+ if (ret)
+ goto free_memdup;
+
+ dma_fence_wait(fence, false);
+
+ /* TODO: optimize async handling */
+ dma_fence_put(fence);
+ }
+
+free_memdup:
+ kfree(syncobj_handles);
+ return ret;
+}
+
+static int
+amdgpu_gem_update_timeline_node(struct drm_file *filp,
+ uint32_t syncobj_handle,
+ uint64_t point,
+ struct drm_syncobj **syncobj,
+ struct dma_fence_chain **chain)
+{
+ if (!syncobj_handle)
+ return 0;
+
+ /* Find the sync object */
+ *syncobj = drm_syncobj_find(filp, syncobj_handle);
+ if (!*syncobj)
+ return -ENOENT;
+
+ if (!point)
+ return 0;
+
+ /* Allocate the chain node */
+ *chain = dma_fence_chain_alloc();
+ if (!*chain) {
+ drm_syncobj_put(*syncobj);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void
+amdgpu_gem_update_bo_mapping(struct drm_file *filp,
+ struct amdgpu_bo_va *bo_va,
+ uint32_t operation,
+ uint64_t point,
+ struct dma_fence *fence,
+ struct drm_syncobj *syncobj,
+ struct dma_fence_chain *chain)
+{
+ struct amdgpu_bo *bo = bo_va ? bo_va->base.bo : NULL;
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
+ struct dma_fence *last_update;
+
+ if (!syncobj)
+ return;
+
+ /* Find the last update fence */
+ switch (operation) {
+ case AMDGPU_VA_OP_MAP:
+ case AMDGPU_VA_OP_REPLACE:
+ if (bo && (bo->tbo.base.resv == vm->root.bo->tbo.base.resv))
+ last_update = vm->last_update;
+ else
+ last_update = bo_va->last_pt_update;
+ break;
+ case AMDGPU_VA_OP_UNMAP:
+ case AMDGPU_VA_OP_CLEAR:
+ last_update = fence;
+ break;
+ default:
+ return;
+ }
+
+ /* Add fence to timeline */
+ if (!point)
+ drm_syncobj_replace_fence(syncobj, last_update);
+ else
+ drm_syncobj_add_point(syncobj, chain, last_update, point);
+}
static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf)
{
@@ -60,10 +171,10 @@ static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf)
goto unlock;
}
- ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
- TTM_BO_VM_NUM_PREFAULT);
+ ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
+ TTM_BO_VM_NUM_PREFAULT);
- drm_dev_exit(idx);
+ drm_dev_exit(idx);
} else {
ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
}
@@ -84,19 +195,17 @@ static const struct vm_operations_struct amdgpu_gem_vm_ops = {
static void amdgpu_gem_object_free(struct drm_gem_object *gobj)
{
- struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj);
+ struct amdgpu_bo *aobj = gem_to_amdgpu_bo(gobj);
- if (robj) {
- amdgpu_mn_unregister(robj);
- amdgpu_bo_unref(&robj);
- }
+ amdgpu_hmm_unregister(aobj);
+ ttm_bo_fini(&aobj->tbo);
}
int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
int alignment, u32 initial_domain,
u64 flags, enum ttm_bo_type type,
struct dma_resv *resv,
- struct drm_gem_object **obj)
+ struct drm_gem_object **obj, int8_t xcp_id_plus1)
{
struct amdgpu_bo *bo;
struct amdgpu_bo_user *ubo;
@@ -105,6 +214,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
memset(&bp, 0, sizeof(bp));
*obj = NULL;
+ flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
bp.size = size;
bp.byte_align = alignment;
@@ -114,6 +224,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
bp.flags = flags;
bp.domain = initial_domain;
bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+ bp.xcp_id_plus1 = xcp_id_plus1;
r = amdgpu_bo_create_user(adev, &bp, &ubo);
if (r)
@@ -121,7 +232,6 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
bo = &ubo->bo;
*obj = &bo->tbo.base;
- (*obj)->funcs = &amdgpu_gem_object_funcs;
return 0;
}
@@ -170,21 +280,63 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
return -EPERM;
if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID &&
- abo->tbo.base.resv != vm->root.bo->tbo.base.resv)
+ !amdgpu_vm_is_bo_always_valid(vm, abo))
return -EPERM;
r = amdgpu_bo_reserve(abo, false);
if (r)
return r;
+ amdgpu_vm_bo_update_shared(abo);
bo_va = amdgpu_vm_bo_find(vm, abo);
- if (!bo_va) {
+ if (!bo_va)
bo_va = amdgpu_vm_bo_add(adev, vm, abo);
- } else {
+ else
++bo_va->ref_count;
+
+ /* attach gfx eviction fence */
+ r = amdgpu_eviction_fence_attach(&fpriv->evf_mgr, abo);
+ if (r) {
+ DRM_DEBUG_DRIVER("Failed to attach eviction fence to BO\n");
+ amdgpu_bo_unreserve(abo);
+ return r;
}
+
amdgpu_bo_unreserve(abo);
- return 0;
+
+ /* Validate and add eviction fence to DMABuf imports with dynamic
+ * attachment in compute VMs. Re-validation will be done by
+ * amdgpu_vm_validate. Fences are on the reservation shared with the
+ * export, which is currently required to be validated and fenced
+ * already by amdgpu_amdkfd_gpuvm_restore_process_bos.
+ *
+ * Nested locking below for the case that a GEM object is opened in
+ * kfd_mem_export_dmabuf. Since the lock below is only taken for imports,
+ * but not for export, this is a different lock class that cannot lead to
+ * circular lock dependencies.
+ */
+ if (!vm->is_compute_context || !vm->process_info)
+ return 0;
+ if (!drm_gem_is_imported(obj) ||
+ !dma_buf_is_dynamic(obj->import_attach->dmabuf))
+ return 0;
+ mutex_lock_nested(&vm->process_info->lock, 1);
+ if (!WARN_ON(!vm->process_info->eviction_fence)) {
+ r = amdgpu_amdkfd_bo_validate_and_fence(abo, AMDGPU_GEM_DOMAIN_GTT,
+ &vm->process_info->eviction_fence->base);
+ if (r) {
+ struct amdgpu_task_info *ti = amdgpu_vm_get_task_info_vm(vm);
+
+ dev_warn(adev->dev, "validate_and_fence failed: %d\n", r);
+ if (ti) {
+ dev_warn(adev->dev, "pid %d\n", ti->task.pid);
+ amdgpu_vm_put_task_info(ti);
+ }
+ }
+ }
+ mutex_unlock(&vm->process_info->lock);
+
+ return r;
}
static void amdgpu_gem_object_close(struct drm_gem_object *obj,
@@ -195,44 +347,40 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
- struct amdgpu_bo_list_entry vm_pd;
- struct list_head list, duplicates;
struct dma_fence *fence = NULL;
- struct ttm_validate_buffer tv;
- struct ww_acquire_ctx ticket;
struct amdgpu_bo_va *bo_va;
+ struct drm_exec exec;
long r;
- INIT_LIST_HEAD(&list);
- INIT_LIST_HEAD(&duplicates);
-
- tv.bo = &bo->tbo;
- tv.num_shared = 2;
- list_add(&tv.head, &list);
+ drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
+ drm_exec_until_all_locked(&exec) {
+ r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r))
+ goto out_unlock;
+
+ r = amdgpu_vm_lock_pd(vm, &exec, 0);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r))
+ goto out_unlock;
+ }
- amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
+ if (!amdgpu_vm_is_bo_always_valid(vm, bo))
+ amdgpu_eviction_fence_detach(&fpriv->evf_mgr, bo);
- r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
- if (r) {
- dev_err(adev->dev, "leaking bo va because "
- "we fail to reserve bo (%ld)\n", r);
- return;
- }
bo_va = amdgpu_vm_bo_find(vm, bo);
if (!bo_va || --bo_va->ref_count)
goto out_unlock;
- amdgpu_vm_bo_rmv(adev, bo_va);
+ amdgpu_vm_bo_del(adev, bo_va);
+ amdgpu_vm_bo_update_shared(bo);
if (!amdgpu_vm_ready(vm))
goto out_unlock;
- fence = dma_resv_excl_fence(bo->tbo.base.resv);
- if (fence) {
- amdgpu_bo_fence(bo, fence, true);
- fence = NULL;
- }
-
r = amdgpu_vm_clear_freed(adev, vm, &fence);
+ if (unlikely(r < 0))
+ dev_err(adev->dev, "failed to clear page "
+ "tables on GEM object close (%ld)\n", r);
if (r || !fence)
goto out_unlock;
@@ -240,10 +388,9 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj,
dma_fence_put(fence);
out_unlock:
- if (unlikely(r < 0))
- dev_err(adev->dev, "failed to clear page "
- "tables on GEM object close (%ld)\n", r);
- ttm_eu_backoff_reservation(&ticket, &list);
+ if (r)
+ dev_err(adev->dev, "leaking bo va (%ld)\n", r);
+ drm_exec_fini(&exec);
}
static int amdgpu_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
@@ -261,13 +408,13 @@ static int amdgpu_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_str
* becoming writable and makes is_cow_mapping(vm_flags) false.
*/
if (is_cow_mapping(vma->vm_flags) &&
- !(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
- vma->vm_flags &= ~VM_MAYWRITE;
+ !(vma->vm_flags & VM_ACCESS_FLAGS))
+ vm_flags_clear(vma, VM_MAYWRITE);
return drm_gem_ttm_mmap(obj, vma);
}
-static const struct drm_gem_object_funcs amdgpu_gem_object_funcs = {
+const struct drm_gem_object_funcs amdgpu_gem_object_funcs = {
.free = amdgpu_gem_object_free,
.open = amdgpu_gem_object_open,
.close = amdgpu_gem_object_close,
@@ -296,14 +443,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
int r;
/* reject invalid gem flags */
- if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
- AMDGPU_GEM_CREATE_CPU_GTT_USWC |
- AMDGPU_GEM_CREATE_VRAM_CLEARED |
- AMDGPU_GEM_CREATE_VM_ALWAYS_VALID |
- AMDGPU_GEM_CREATE_EXPLICIT_SYNC |
- AMDGPU_GEM_CREATE_ENCRYPTED))
-
+ if (flags & ~AMDGPU_GEM_CREATE_SETTABLE_MASK)
return -EINVAL;
/* reject invalid gem domains */
@@ -315,6 +455,12 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
+ /* always clear VRAM */
+ flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
+
+ if (args->in.domains & AMDGPU_GEM_DOMAIN_MMIO_REMAP)
+ return -EINVAL;
+
/* create a gem object to contain this object in */
if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS |
AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) {
@@ -340,7 +486,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
retry:
r = amdgpu_gem_object_create(adev, size, args->in.alignment,
initial_domain,
- flags, ttm_bo_type_device, resv, &gobj);
+ flags, ttm_bo_type_device, resv, &gobj, fpriv->xcp_id + 1);
if (r && r != -ERESTARTSYS) {
if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
@@ -383,7 +529,9 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
struct ttm_operation_ctx ctx = { true, false };
struct amdgpu_device *adev = drm_to_adev(dev);
struct drm_amdgpu_gem_userptr *args = data;
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct drm_gem_object *gobj;
+ struct amdgpu_hmm_range *range;
struct amdgpu_bo *bo;
uint32_t handle;
int r;
@@ -408,7 +556,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
/* create a gem object to contain this object in */
r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU,
- 0, ttm_bo_type_device, NULL, &gobj);
+ 0, ttm_bo_type_device, NULL, &gobj, fpriv->xcp_id + 1);
if (r)
return r;
@@ -419,21 +567,25 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
if (r)
goto release_object;
- if (args->flags & AMDGPU_GEM_USERPTR_REGISTER) {
- r = amdgpu_mn_register(bo, args->addr);
- if (r)
- goto release_object;
- }
+ r = amdgpu_hmm_register(bo, args->addr);
+ if (r)
+ goto release_object;
if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) {
- r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
- if (r)
+ range = amdgpu_hmm_range_alloc(NULL);
+ if (unlikely(!range))
+ return -ENOMEM;
+ r = amdgpu_ttm_tt_get_user_pages(bo, range);
+ if (r) {
+ amdgpu_hmm_range_free(range);
goto release_object;
-
+ }
r = amdgpu_bo_reserve(bo, true);
if (r)
goto user_pages_done;
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range);
+
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
amdgpu_bo_unreserve(bo);
@@ -449,8 +601,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
user_pages_done:
if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)
- amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
-
+ amdgpu_hmm_range_free(range);
release_object:
drm_gem_object_put(gobj);
@@ -465,9 +616,9 @@ int amdgpu_mode_dumb_mmap(struct drm_file *filp,
struct amdgpu_bo *robj;
gobj = drm_gem_object_lookup(filp, handle);
- if (gobj == NULL) {
+ if (!gobj)
return -ENOENT;
- }
+
robj = gem_to_amdgpu_bo(gobj);
if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm) ||
(robj->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
@@ -484,6 +635,7 @@ int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data,
{
union drm_amdgpu_gem_mmap *args = data;
uint32_t handle = args->in.handle;
+
memset(args, 0, sizeof(*args));
return amdgpu_mode_dumb_mmap(filp, dev, handle, &args->out.addr_ptr);
}
@@ -510,7 +662,7 @@ unsigned long amdgpu_gem_timeout(uint64_t timeout_ns)
timeout_jiffies = nsecs_to_jiffies(ktime_to_ns(timeout));
/* clamp timeout to avoid unsigned-> signed overflow */
- if (timeout_jiffies > MAX_SCHEDULE_TIMEOUT )
+ if (timeout_jiffies > MAX_SCHEDULE_TIMEOUT)
return MAX_SCHEDULE_TIMEOUT - 1;
return timeout_jiffies;
@@ -528,11 +680,12 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
long ret;
gobj = drm_gem_object_lookup(filp, handle);
- if (gobj == NULL) {
+ if (!gobj)
return -ENOENT;
- }
+
robj = gem_to_amdgpu_bo(gobj);
- ret = dma_resv_wait_timeout(robj->tbo.base.resv, true, true, timeout);
+ ret = dma_resv_wait_timeout(robj->tbo.base.resv, DMA_RESV_USAGE_READ,
+ true, timeout);
/* ret == 0 means not signaled,
* ret > 0 means signaled
@@ -556,7 +709,7 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
struct amdgpu_bo *robj;
int r = -1;
- DRM_DEBUG("%d \n", args->handle);
+ DRM_DEBUG("%d\n", args->handle);
gobj = drm_gem_object_lookup(filp, args->handle);
if (gobj == NULL)
return -ENOENT;
@@ -601,24 +754,29 @@ out:
*
* Update the bo_va directly after setting its address. Errors are not
* vital here, so they are not reported back to userspace.
+ *
+ * Returns resulting fence if freed BO(s) got cleared from the PT.
+ * otherwise stub fence in case of error.
*/
-static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_bo_va *bo_va,
- uint32_t operation)
+static struct dma_fence *
+amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo_va *bo_va,
+ uint32_t operation)
{
+ struct dma_fence *fence = dma_fence_get_stub();
int r;
if (!amdgpu_vm_ready(vm))
- return;
+ return fence;
- r = amdgpu_vm_clear_freed(adev, vm, NULL);
+ r = amdgpu_vm_clear_freed(adev, vm, &fence);
if (r)
goto error;
if (operation == AMDGPU_VA_OP_MAP ||
operation == AMDGPU_VA_OP_REPLACE) {
- r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r)
goto error;
}
@@ -628,34 +786,8 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
error:
if (r && r != -ERESTARTSYS)
DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
-}
-/**
- * amdgpu_gem_va_map_flags - map GEM UAPI flags into hardware flags
- *
- * @adev: amdgpu_device pointer
- * @flags: GEM UAPI flags
- *
- * Returns the GEM UAPI flags mapped into hardware for the ASIC.
- */
-uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags)
-{
- uint64_t pte_flag = 0;
-
- if (flags & AMDGPU_VM_PAGE_EXECUTABLE)
- pte_flag |= AMDGPU_PTE_EXECUTABLE;
- if (flags & AMDGPU_VM_PAGE_READABLE)
- pte_flag |= AMDGPU_PTE_READABLE;
- if (flags & AMDGPU_VM_PAGE_WRITEABLE)
- pte_flag |= AMDGPU_PTE_WRITEABLE;
- if (flags & AMDGPU_VM_PAGE_PRT)
- pte_flag |= AMDGPU_PTE_PRT;
-
- if (adev->gmc.gmc_funcs->map_mtype)
- pte_flag |= amdgpu_gmc_map_mtype(adev,
- flags & AMDGPU_VM_MTYPE_MASK);
-
- return pte_flag;
+ return fence;
}
int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
@@ -663,7 +795,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
{
const uint32_t valid_flags = AMDGPU_VM_DELAY_UPDATE |
AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
- AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_MASK;
+ AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_MASK |
+ AMDGPU_VM_PAGE_NOALLOC;
const uint32_t prt_flags = AMDGPU_VM_DELAY_UPDATE |
AMDGPU_VM_PAGE_PRT;
@@ -673,25 +806,24 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct amdgpu_bo *abo;
struct amdgpu_bo_va *bo_va;
- struct amdgpu_bo_list_entry vm_pd;
- struct ttm_validate_buffer tv;
- struct ww_acquire_ctx ticket;
- struct list_head list, duplicates;
- uint64_t va_flags;
+ struct drm_syncobj *timeline_syncobj = NULL;
+ struct dma_fence_chain *timeline_chain = NULL;
+ struct dma_fence *fence;
+ struct drm_exec exec;
uint64_t vm_size;
int r = 0;
- if (args->va_address < AMDGPU_VA_RESERVED_SIZE) {
+ if (args->va_address < AMDGPU_VA_RESERVED_BOTTOM) {
dev_dbg(dev->dev,
- "va_address 0x%LX is in reserved area 0x%LX\n",
- args->va_address, AMDGPU_VA_RESERVED_SIZE);
+ "va_address 0x%llx is in reserved area 0x%llx\n",
+ args->va_address, AMDGPU_VA_RESERVED_BOTTOM);
return -EINVAL;
}
if (args->va_address >= AMDGPU_GMC_HOLE_START &&
args->va_address < AMDGPU_GMC_HOLE_END) {
dev_dbg(dev->dev,
- "va_address 0x%LX is in VA hole 0x%LX-0x%LX\n",
+ "va_address 0x%llx is in VA hole 0x%llx-0x%llx\n",
args->va_address, AMDGPU_GMC_HOLE_START,
AMDGPU_GMC_HOLE_END);
return -EINVAL;
@@ -700,7 +832,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
args->va_address &= AMDGPU_GMC_HOLE_MASK;
vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
- vm_size -= AMDGPU_VA_RESERVED_SIZE;
+ vm_size -= AMDGPU_VA_RESERVED_TOP;
if (args->va_address + args->map_size > vm_size) {
dev_dbg(dev->dev,
"va_address 0x%llx is in top reserved area 0x%llx\n",
@@ -726,36 +858,44 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
- INIT_LIST_HEAD(&list);
- INIT_LIST_HEAD(&duplicates);
if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
!(args->flags & AMDGPU_VM_PAGE_PRT)) {
gobj = drm_gem_object_lookup(filp, args->handle);
if (gobj == NULL)
return -ENOENT;
abo = gem_to_amdgpu_bo(gobj);
- tv.bo = &abo->tbo;
- if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
- tv.num_shared = 1;
- else
- tv.num_shared = 0;
- list_add(&tv.head, &list);
} else {
gobj = NULL;
abo = NULL;
}
- amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
-
- r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
+ r = amdgpu_gem_add_input_fence(filp,
+ args->input_fence_syncobj_handles,
+ args->num_syncobj_handles);
if (r)
- goto error_unref;
+ goto error_put_gobj;
+
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+ DRM_EXEC_IGNORE_DUPLICATES, 0);
+ drm_exec_until_all_locked(&exec) {
+ if (gobj) {
+ r = drm_exec_lock_obj(&exec, gobj);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r))
+ goto error;
+ }
+
+ r = amdgpu_vm_lock_pd(&fpriv->vm, &exec, 2);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r))
+ goto error;
+ }
if (abo) {
bo_va = amdgpu_vm_bo_find(&fpriv->vm, abo);
if (!bo_va) {
r = -ENOENT;
- goto error_backoff;
+ goto error;
}
} else if (args->operation != AMDGPU_VA_OP_CLEAR) {
bo_va = fpriv->prt_va;
@@ -763,12 +903,19 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
bo_va = NULL;
}
+ r = amdgpu_gem_update_timeline_node(filp,
+ args->vm_timeline_syncobj_out,
+ args->vm_timeline_point,
+ &timeline_syncobj,
+ &timeline_chain);
+ if (r)
+ goto error;
+
switch (args->operation) {
case AMDGPU_VA_OP_MAP:
- va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
args->offset_in_bo, args->map_size,
- va_flags);
+ args->flags);
break;
case AMDGPU_VA_OP_UNMAP:
r = amdgpu_vm_bo_unmap(adev, bo_va, args->va_address);
@@ -780,22 +927,31 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
args->map_size);
break;
case AMDGPU_VA_OP_REPLACE:
- va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,
args->offset_in_bo, args->map_size,
- va_flags);
+ args->flags);
break;
default:
break;
}
- if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug)
- amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
- args->operation);
+ if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) {
+ fence = amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
+ args->operation);
+
+ if (timeline_syncobj)
+ amdgpu_gem_update_bo_mapping(filp, bo_va,
+ args->operation,
+ args->vm_timeline_point,
+ fence, timeline_syncobj,
+ timeline_chain);
+ else
+ dma_fence_put(fence);
-error_backoff:
- ttm_eu_backoff_reservation(&ticket, &list);
+ }
-error_unref:
+error:
+ drm_exec_fini(&exec);
+error_put_gobj:
drm_gem_object_put(gobj);
return r;
}
@@ -803,22 +959,38 @@ error_unref:
int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
- struct amdgpu_device *adev = drm_to_adev(dev);
struct drm_amdgpu_gem_op *args = data;
struct drm_gem_object *gobj;
struct amdgpu_vm_bo_base *base;
struct amdgpu_bo *robj;
+ struct drm_exec exec;
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
int r;
+ if (args->padding)
+ return -EINVAL;
+
gobj = drm_gem_object_lookup(filp, args->handle);
- if (gobj == NULL) {
+ if (!gobj)
return -ENOENT;
- }
+
robj = gem_to_amdgpu_bo(gobj);
- r = amdgpu_bo_reserve(robj, false);
- if (unlikely(r))
- goto out;
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+ DRM_EXEC_IGNORE_DUPLICATES, 0);
+ drm_exec_until_all_locked(&exec) {
+ r = drm_exec_lock_obj(&exec, gobj);
+ drm_exec_retry_on_contention(&exec);
+ if (r)
+ goto out_exec;
+
+ if (args->op == AMDGPU_GEM_OP_GET_MAPPING_INFO) {
+ r = amdgpu_vm_lock_pd(&fpriv->vm, &exec, 0);
+ drm_exec_retry_on_contention(&exec);
+ if (r)
+ goto out_exec;
+ }
+ }
switch (args->op) {
case AMDGPU_GEM_OP_GET_GEM_CREATE_INFO: {
@@ -829,29 +1001,26 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
info.alignment = robj->tbo.page_alignment << PAGE_SHIFT;
info.domains = robj->preferred_domains;
info.domain_flags = robj->flags;
- amdgpu_bo_unreserve(robj);
+ drm_exec_fini(&exec);
if (copy_to_user(out, &info, sizeof(info)))
r = -EFAULT;
break;
}
case AMDGPU_GEM_OP_SET_PLACEMENT:
- if (robj->tbo.base.import_attach &&
+ if (drm_gem_is_imported(&robj->tbo.base) &&
args->value & AMDGPU_GEM_DOMAIN_VRAM) {
r = -EINVAL;
- amdgpu_bo_unreserve(robj);
- break;
+ goto out_exec;
}
if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm)) {
r = -EPERM;
- amdgpu_bo_unreserve(robj);
- break;
+ goto out_exec;
}
for (base = robj->vm_bo; base; base = base->next)
if (amdgpu_xgmi_same_hive(amdgpu_ttm_adev(robj->tbo.bdev),
amdgpu_ttm_adev(base->vm->root.bo->tbo.bdev))) {
r = -EINVAL;
- amdgpu_bo_unreserve(robj);
- goto out;
+ goto out_exec;
}
@@ -863,18 +1032,173 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
if (robj->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
- amdgpu_vm_bo_invalidate(adev, robj, true);
+ amdgpu_vm_bo_invalidate(robj, true);
+ drm_exec_fini(&exec);
+ break;
+ case AMDGPU_GEM_OP_GET_MAPPING_INFO: {
+ struct amdgpu_bo_va *bo_va = amdgpu_vm_bo_find(&fpriv->vm, robj);
+ struct drm_amdgpu_gem_vm_entry *vm_entries;
+ struct amdgpu_bo_va_mapping *mapping;
+ int num_mappings = 0;
+ /*
+ * num_entries is set as an input to the size of the user-allocated array of
+ * drm_amdgpu_gem_vm_entry stored at args->value.
+ * num_entries is sent back as output as the number of mappings the bo has.
+ * If that number is larger than the size of the array, the ioctl must
+ * be retried.
+ */
+ vm_entries = kvcalloc(args->num_entries, sizeof(*vm_entries), GFP_KERNEL);
+ if (!vm_entries)
+ return -ENOMEM;
+
+ amdgpu_vm_bo_va_for_each_valid_mapping(bo_va, mapping) {
+ if (num_mappings < args->num_entries) {
+ vm_entries[num_mappings].addr = mapping->start * AMDGPU_GPU_PAGE_SIZE;
+ vm_entries[num_mappings].size = (mapping->last - mapping->start + 1) * AMDGPU_GPU_PAGE_SIZE;
+ vm_entries[num_mappings].offset = mapping->offset;
+ vm_entries[num_mappings].flags = mapping->flags;
+ }
+ num_mappings += 1;
+ }
+
+ amdgpu_vm_bo_va_for_each_invalid_mapping(bo_va, mapping) {
+ if (num_mappings < args->num_entries) {
+ vm_entries[num_mappings].addr = mapping->start * AMDGPU_GPU_PAGE_SIZE;
+ vm_entries[num_mappings].size = (mapping->last - mapping->start + 1) * AMDGPU_GPU_PAGE_SIZE;
+ vm_entries[num_mappings].offset = mapping->offset;
+ vm_entries[num_mappings].flags = mapping->flags;
+ }
+ num_mappings += 1;
+ }
+
+ drm_exec_fini(&exec);
+
+ if (num_mappings > 0 && num_mappings <= args->num_entries)
+ if (copy_to_user(u64_to_user_ptr(args->value), vm_entries, num_mappings * sizeof(*vm_entries)))
+ r = -EFAULT;
- amdgpu_bo_unreserve(robj);
+ args->num_entries = num_mappings;
+
+ kvfree(vm_entries);
break;
+ }
default:
- amdgpu_bo_unreserve(robj);
+ drm_exec_fini(&exec);
r = -EINVAL;
}
-out:
drm_gem_object_put(gobj);
return r;
+out_exec:
+ drm_exec_fini(&exec);
+ drm_gem_object_put(gobj);
+ return r;
+}
+
+/**
+ * amdgpu_gem_list_handles_ioctl - get information about a process' buffer objects
+ *
+ * @dev: drm device pointer
+ * @data: drm_amdgpu_gem_list_handles
+ * @filp: drm file pointer
+ *
+ * num_entries is set as an input to the size of the entries array.
+ * num_entries is sent back as output as the number of bos in the process.
+ * If that number is larger than the size of the array, the ioctl must
+ * be retried.
+ *
+ * Returns:
+ * 0 for success, -errno for errors.
+ */
+int amdgpu_gem_list_handles_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ struct drm_amdgpu_gem_list_handles *args = data;
+ struct drm_amdgpu_gem_list_handles_entry *bo_entries;
+ struct drm_gem_object *gobj;
+ int id, ret = 0;
+ int bo_index = 0;
+ int num_bos = 0;
+
+ spin_lock(&filp->table_lock);
+ idr_for_each_entry(&filp->object_idr, gobj, id)
+ num_bos += 1;
+ spin_unlock(&filp->table_lock);
+
+ if (args->num_entries < num_bos) {
+ args->num_entries = num_bos;
+ return 0;
+ }
+
+ if (num_bos == 0) {
+ args->num_entries = 0;
+ return 0;
+ }
+
+ bo_entries = kvcalloc(num_bos, sizeof(*bo_entries), GFP_KERNEL);
+ if (!bo_entries)
+ return -ENOMEM;
+
+ spin_lock(&filp->table_lock);
+ idr_for_each_entry(&filp->object_idr, gobj, id) {
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
+ struct drm_amdgpu_gem_list_handles_entry *bo_entry;
+
+ if (bo_index >= num_bos) {
+ ret = -EAGAIN;
+ break;
+ }
+
+ bo_entry = &bo_entries[bo_index];
+
+ bo_entry->size = amdgpu_bo_size(bo);
+ bo_entry->alloc_flags = bo->flags & AMDGPU_GEM_CREATE_SETTABLE_MASK;
+ bo_entry->preferred_domains = bo->preferred_domains;
+ bo_entry->gem_handle = id;
+ bo_entry->alignment = bo->tbo.page_alignment;
+
+ if (bo->tbo.base.import_attach)
+ bo_entry->flags |= AMDGPU_GEM_LIST_HANDLES_FLAG_IS_IMPORT;
+
+ bo_index += 1;
+ }
+ spin_unlock(&filp->table_lock);
+
+ args->num_entries = bo_index;
+
+ if (!ret)
+ if (copy_to_user(u64_to_user_ptr(args->entries), bo_entries, num_bos * sizeof(*bo_entries)))
+ ret = -EFAULT;
+
+ kvfree(bo_entries);
+
+ return ret;
+}
+
+static int amdgpu_gem_align_pitch(struct amdgpu_device *adev,
+ int width,
+ int cpp,
+ bool tiled)
+{
+ int aligned = width;
+ int pitch_mask = 0;
+
+ switch (cpp) {
+ case 1:
+ pitch_mask = 255;
+ break;
+ case 2:
+ pitch_mask = 127;
+ break;
+ case 3:
+ case 4:
+ pitch_mask = 63;
+ break;
+ }
+
+ aligned += pitch_mask;
+ aligned &= ~pitch_mask;
+ return aligned * cpp;
}
int amdgpu_mode_dumb_create(struct drm_file *file_priv,
@@ -882,10 +1206,12 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
struct drm_mode_create_dumb *args)
{
struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
struct drm_gem_object *gobj;
uint32_t handle;
u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+ AMDGPU_GEM_CREATE_CPU_GTT_USWC |
+ AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
u32 domain;
int r;
@@ -897,23 +1223,23 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
if (adev->mman.buffer_funcs_enabled)
flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
- args->pitch = amdgpu_align_pitch(adev, args->width,
- DIV_ROUND_UP(args->bpp, 8), 0);
+ args->pitch = amdgpu_gem_align_pitch(adev, args->width,
+ DIV_ROUND_UP(args->bpp, 8), 0);
args->size = (u64)args->pitch * args->height;
args->size = ALIGN(args->size, PAGE_SIZE);
domain = amdgpu_bo_get_preferred_domain(adev,
amdgpu_display_supported_domains(adev, flags));
r = amdgpu_gem_object_create(adev, args->size, 0, domain, flags,
- ttm_bo_type_device, NULL, &gobj);
+ ttm_bo_type_device, NULL, &gobj, fpriv->xcp_id + 1);
if (r)
return -ENOMEM;
r = drm_gem_handle_create(file_priv, gobj, &handle);
/* drop reference from allocate - handle holds it now */
drm_gem_object_put(gobj);
- if (r) {
+ if (r)
return r;
- }
+
args->handle = handle;
return 0;
}
@@ -921,7 +1247,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
#if defined(CONFIG_DEBUG_FS)
static int amdgpu_debugfs_gem_info_show(struct seq_file *m, void *unused)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+ struct amdgpu_device *adev = m->private;
struct drm_device *dev = adev_to_drm(adev);
struct drm_file *file;
int r;
@@ -933,6 +1259,7 @@ static int amdgpu_debugfs_gem_info_show(struct seq_file *m, void *unused)
list_for_each_entry(file, &dev->filelist, lhead) {
struct task_struct *task;
struct drm_gem_object *gobj;
+ struct pid *pid;
int id;
/*
@@ -942,8 +1269,9 @@ static int amdgpu_debugfs_gem_info_show(struct seq_file *m, void *unused)
* Therefore, we need to protect this ->comm access using RCU.
*/
rcu_read_lock();
- task = pid_task(file->pid, PIDTYPE_PID);
- seq_printf(m, "pid %8d command %s:\n", pid_nr(file->pid),
+ pid = rcu_dereference(file->pid);
+ task = pid_task(pid, PIDTYPE_TGID);
+ seq_printf(m, "pid %8d command %s:\n", pid_nr(pid),
task ? task->comm : "<unknown>");
rcu_read_unlock();
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
index 637bf51dbf06..b558336bc4c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
@@ -33,6 +33,8 @@
#define AMDGPU_GEM_DOMAIN_MAX 0x3
#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, tbo.base)
+extern const struct drm_gem_object_funcs amdgpu_gem_object_funcs;
+
unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
/*
@@ -43,8 +45,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
int alignment, u32 initial_domain,
u64 flags, enum ttm_bo_type type,
struct dma_resv *resv,
- struct drm_gem_object **obj);
-
+ struct drm_gem_object **obj, int8_t xcp_id_plus1);
int amdgpu_mode_dumb_create(struct drm_file *file_priv,
struct drm_device *dev,
struct drm_mode_create_dumb *args);
@@ -62,13 +63,28 @@ int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
-uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags);
int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
+int amdgpu_gem_list_handles_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
+#define AMDGPU_GEM_CREATE_SETTABLE_MASK (AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | \
+ AMDGPU_GEM_CREATE_NO_CPU_ACCESS | \
+ AMDGPU_GEM_CREATE_CPU_GTT_USWC | \
+ AMDGPU_GEM_CREATE_VRAM_CLEARED | \
+ AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | \
+ AMDGPU_GEM_CREATE_EXPLICIT_SYNC | \
+ AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE | \
+ AMDGPU_GEM_CREATE_ENCRYPTED | \
+ AMDGPU_GEM_CREATE_GFX12_DCC | \
+ AMDGPU_GEM_CREATE_DISCARDABLE | \
+ AMDGPU_GEM_CREATE_COHERENT | \
+ AMDGPU_GEM_CREATE_UNCACHED | \
+ AMDGPU_GEM_CREATE_EXT_COHERENT)
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 1916ec84dd71..8b118c53f351 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -23,10 +23,18 @@
*
*/
+#include <linux/firmware.h>
+#include <linux/pm_runtime.h>
+
#include "amdgpu.h"
#include "amdgpu_gfx.h"
#include "amdgpu_rlc.h"
#include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_xcp.h"
+#include "amdgpu_xgmi.h"
+#include "amdgpu_mes.h"
+#include "nvd.h"
/* delay 0.1 second to enable gfx off feature */
#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
@@ -62,35 +70,26 @@ void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
}
bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
- int mec, int pipe, int queue)
+ int xcc_id, int mec, int pipe, int queue)
{
return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
- adev->gfx.mec.queue_bitmap);
+ adev->gfx.mec_bitmap[xcc_id].queue_bitmap);
}
-int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
- int me, int pipe, int queue)
+static int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
+ int me, int pipe, int queue)
{
+ int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
int bit = 0;
bit += me * adev->gfx.me.num_pipe_per_me
- * adev->gfx.me.num_queue_per_pipe;
- bit += pipe * adev->gfx.me.num_queue_per_pipe;
+ * num_queue_per_pipe;
+ bit += pipe * num_queue_per_pipe;
bit += queue;
return bit;
}
-void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
- int *me, int *pipe, int *queue)
-{
- *queue = bit % adev->gfx.me.num_queue_per_pipe;
- *pipe = (bit / adev->gfx.me.num_queue_per_pipe)
- % adev->gfx.me.num_pipe_per_me;
- *me = (bit / adev->gfx.me.num_queue_per_pipe)
- / adev->gfx.me.num_pipe_per_me;
-}
-
bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
int me, int pipe, int queue)
{
@@ -99,42 +98,6 @@ bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
}
/**
- * amdgpu_gfx_scratch_get - Allocate a scratch register
- *
- * @adev: amdgpu_device pointer
- * @reg: scratch register mmio offset
- *
- * Allocate a CP scratch register for use by the driver (all asics).
- * Returns 0 on success or -EINVAL on failure.
- */
-int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg)
-{
- int i;
-
- i = ffs(adev->gfx.scratch.free_mask);
- if (i != 0 && i <= adev->gfx.scratch.num_reg) {
- i--;
- adev->gfx.scratch.free_mask &= ~(1u << i);
- *reg = adev->gfx.scratch.reg_base + i;
- return 0;
- }
- return -EINVAL;
-}
-
-/**
- * amdgpu_gfx_scratch_free - Free a scratch register
- *
- * @adev: amdgpu_device pointer
- * @reg: scratch register mmio offset
- *
- * Free a CP scratch register allocated for use by the driver (all asics)
- */
-void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg)
-{
- adev->gfx.scratch.free_mask |= 1u << (reg - adev->gfx.scratch.reg_base);
-}
-
-/**
* amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
*
* @mask: array in which the per-shader array disable masks will be stored
@@ -144,9 +107,9 @@ void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg)
* The bitmask of CUs to be disabled in the shader array determined by se and
* sh is stored in mask[se * max_sh + sh].
*/
-void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh)
+void amdgpu_gfx_parse_disable_cu(unsigned int *mask, unsigned int max_se, unsigned int max_sh)
{
- unsigned se, sh, cu;
+ unsigned int se, sh, cu;
const char *p;
memset(mask, 0, sizeof(*mask) * max_se * max_sh);
@@ -158,6 +121,7 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_s
for (;;) {
char *next;
int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
+
if (ret < 3) {
DRM_ERROR("amdgpu: could not parse disable_cu\n");
return;
@@ -178,14 +142,22 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_s
}
}
-static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
+static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev)
+{
+ return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1;
+}
+
+static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev)
{
if (amdgpu_compute_multipipe != -1) {
- DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
+ dev_info(adev->dev, "amdgpu: forcing compute pipe policy %d\n",
amdgpu_compute_multipipe);
return amdgpu_compute_multipipe == 1;
}
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
+ return true;
+
/* FIXME: spreading the queues across pipes causes perf regressions
* on POLARIS11 compute workloads */
if (adev->asic_type == CHIP_POLARIS11)
@@ -194,6 +166,28 @@ static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
return adev->gfx.mec.num_mec > 1;
}
+bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ int queue = ring->queue;
+ int pipe = ring->pipe;
+
+ /* Policy: use pipe1 queue0 as high priority graphics queue if we
+ * have more than one gfx pipe.
+ */
+ if (amdgpu_gfx_is_graphics_multipipe_capable(adev) &&
+ adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) {
+ int me = ring->me;
+ int bit;
+
+ bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue);
+ if (ring == &adev->gfx.gfx_ring[bit])
+ return true;
+ }
+
+ return false;
+}
+
bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
@@ -209,55 +203,71 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
{
- int i, queue, pipe;
- bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
+ int i, j, queue, pipe;
+ bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev);
int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
adev->gfx.mec.num_queue_per_pipe,
adev->gfx.num_compute_rings);
+ int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
if (multipipe_policy) {
- /* policy: make queues evenly cross all pipes on MEC1 only */
- for (i = 0; i < max_queues_per_mec; i++) {
- pipe = i % adev->gfx.mec.num_pipe_per_mec;
- queue = (i / adev->gfx.mec.num_pipe_per_mec) %
- adev->gfx.mec.num_queue_per_pipe;
-
- set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
- adev->gfx.mec.queue_bitmap);
+ /* policy: make queues evenly cross all pipes on MEC1 only
+ * for multiple xcc, just use the original policy for simplicity */
+ for (j = 0; j < num_xcc; j++) {
+ for (i = 0; i < max_queues_per_mec; i++) {
+ pipe = i % adev->gfx.mec.num_pipe_per_mec;
+ queue = (i / adev->gfx.mec.num_pipe_per_mec) %
+ adev->gfx.mec.num_queue_per_pipe;
+
+ set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
+ adev->gfx.mec_bitmap[j].queue_bitmap);
+ }
}
} else {
/* policy: amdgpu owns all queues in the given pipe */
- for (i = 0; i < max_queues_per_mec; ++i)
- set_bit(i, adev->gfx.mec.queue_bitmap);
+ for (j = 0; j < num_xcc; j++) {
+ for (i = 0; i < max_queues_per_mec; ++i)
+ set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap);
+ }
}
- dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
+ for (j = 0; j < num_xcc; j++) {
+ dev_dbg(adev->dev, "mec queue bitmap weight=%d\n",
+ bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
+ }
}
void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
{
- int i, queue, me;
-
- for (i = 0; i < AMDGPU_MAX_GFX_QUEUES; ++i) {
- queue = i % adev->gfx.me.num_queue_per_pipe;
- me = (i / adev->gfx.me.num_queue_per_pipe)
- / adev->gfx.me.num_pipe_per_me;
+ int i, queue, pipe;
+ bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev);
+ int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
+ int max_queues_per_me = adev->gfx.me.num_pipe_per_me * num_queue_per_pipe;
- if (me >= adev->gfx.me.num_me)
- break;
+ if (multipipe_policy) {
/* policy: amdgpu owns the first queue per pipe at this stage
* will extend to mulitple queues per pipe later */
- if (me == 0 && queue < 1)
+ for (i = 0; i < max_queues_per_me; i++) {
+ pipe = i % adev->gfx.me.num_pipe_per_me;
+ queue = (i / adev->gfx.me.num_pipe_per_me) %
+ num_queue_per_pipe;
+
+ set_bit(pipe * num_queue_per_pipe + queue,
+ adev->gfx.me.queue_bitmap);
+ }
+ } else {
+ for (i = 0; i < max_queues_per_me; ++i)
set_bit(i, adev->gfx.me.queue_bitmap);
}
/* update the number of active graphics rings */
- adev->gfx.num_gfx_rings =
- bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
+ if (adev->gfx.num_gfx_rings)
+ adev->gfx.num_gfx_rings =
+ bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
}
static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
- struct amdgpu_ring *ring)
+ struct amdgpu_ring *ring, int xcc_id)
{
int queue_bit;
int mec, pipe, queue;
@@ -266,8 +276,8 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
* adev->gfx.mec.num_pipe_per_mec
* adev->gfx.mec.num_queue_per_pipe;
- while (queue_bit-- >= 0) {
- if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
+ while (--queue_bit >= 0) {
+ if (test_bit(queue_bit, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
continue;
amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
@@ -291,11 +301,11 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
return -EINVAL;
}
-int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
- struct amdgpu_ring *ring,
- struct amdgpu_irq_src *irq)
+int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id)
{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+ struct amdgpu_irq_src *irq = &kiq->irq;
+ struct amdgpu_ring *ring = &kiq->ring;
int r = 0;
spin_lock_init(&kiq->ring_lock);
@@ -303,15 +313,22 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
ring->adev = NULL;
ring->ring_obj = NULL;
ring->use_doorbell = true;
- ring->doorbell_index = adev->doorbell_index.kiq;
-
- r = amdgpu_gfx_kiq_acquire(adev, ring);
+ ring->xcc_id = xcc_id;
+ ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
+ ring->doorbell_index =
+ (adev->doorbell_index.kiq +
+ xcc_id * adev->doorbell_index.xcc_doorbell_range)
+ << 1;
+
+ r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id);
if (r)
return r;
ring->eop_gpu_addr = kiq->eop_gpu_addr;
ring->no_scheduler = true;
- sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+ snprintf(ring->name, sizeof(ring->name), "kiq_%hhu.%hhu.%hhu.%hhu",
+ (unsigned char)xcc_id, (unsigned char)ring->me,
+ (unsigned char)ring->pipe, (unsigned char)ring->queue);
r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
@@ -325,19 +342,19 @@ void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
amdgpu_ring_fini(ring);
}
-void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev)
+void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id)
{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
}
int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
- unsigned hpd_size)
+ unsigned int hpd_size, int xcc_id)
{
int r;
u32 *hpd;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
@@ -360,31 +377,44 @@ int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
/* create MQD for each compute/gfx queue */
int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
- unsigned mqd_size)
+ unsigned int mqd_size, int xcc_id)
{
- struct amdgpu_ring *ring = NULL;
- int r, i;
+ int r, i, j;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+ struct amdgpu_ring *ring = &kiq->ring;
+ u32 domain = AMDGPU_GEM_DOMAIN_GTT;
+
+#if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64)
+ /* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
+ domain |= AMDGPU_GEM_DOMAIN_VRAM;
+#endif
/* create MQD for KIQ */
- ring = &adev->gfx.kiq.ring;
- if (!ring->mqd_obj) {
+ if (!adev->enable_mes_kiq && !ring->mqd_obj) {
/* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
* otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
* deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
* KIQ MQD no matter SRIOV or Bare-metal
*/
r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM, &ring->mqd_obj,
- &ring->mqd_gpu_addr, &ring->mqd_ptr);
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &ring->mqd_obj,
+ &ring->mqd_gpu_addr,
+ &ring->mqd_ptr);
if (r) {
dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
return r;
}
/* prepare MQD backup */
- adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(mqd_size, GFP_KERNEL);
- if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
- dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
+ kiq->mqd_backup = kzalloc(mqd_size, GFP_KERNEL);
+ if (!kiq->mqd_backup) {
+ dev_warn(adev->dev,
+ "no memory to create MQD backup for ring %s\n", ring->name);
+ return -ENOMEM;
+ }
}
if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
@@ -393,47 +423,55 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
ring = &adev->gfx.gfx_ring[i];
if (!ring->mqd_obj) {
r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
+ domain, &ring->mqd_obj,
&ring->mqd_gpu_addr, &ring->mqd_ptr);
if (r) {
dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
return r;
}
+ ring->mqd_size = mqd_size;
/* prepare MQD backup */
- adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
- if (!adev->gfx.me.mqd_backup[i])
+ adev->gfx.me.mqd_backup[i] = kzalloc(mqd_size, GFP_KERNEL);
+ if (!adev->gfx.me.mqd_backup[i]) {
dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
+ return -ENOMEM;
+ }
}
}
}
/* create MQD for each KCQ */
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
+ j = i + xcc_id * adev->gfx.num_compute_rings;
+ ring = &adev->gfx.compute_ring[j];
if (!ring->mqd_obj) {
r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
+ domain, &ring->mqd_obj,
&ring->mqd_gpu_addr, &ring->mqd_ptr);
if (r) {
dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
return r;
}
+ ring->mqd_size = mqd_size;
/* prepare MQD backup */
- adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
- if (!adev->gfx.mec.mqd_backup[i])
+ adev->gfx.mec.mqd_backup[j] = kzalloc(mqd_size, GFP_KERNEL);
+ if (!adev->gfx.mec.mqd_backup[j]) {
dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
+ return -ENOMEM;
+ }
}
}
return 0;
}
-void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
+void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id)
{
struct amdgpu_ring *ring = NULL;
- int i;
+ int i, j;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
@@ -446,41 +484,121 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
}
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
- kfree(adev->gfx.mec.mqd_backup[i]);
+ j = i + xcc_id * adev->gfx.num_compute_rings;
+ ring = &adev->gfx.compute_ring[j];
+ kfree(adev->gfx.mec.mqd_backup[j]);
amdgpu_bo_free_kernel(&ring->mqd_obj,
&ring->mqd_gpu_addr,
&ring->mqd_ptr);
}
- ring = &adev->gfx.kiq.ring;
- kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
+ ring = &kiq->ring;
+ kfree(kiq->mqd_backup);
amdgpu_bo_free_kernel(&ring->mqd_obj,
&ring->mqd_gpu_addr,
&ring->mqd_ptr);
}
-int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
+int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
struct amdgpu_ring *kiq_ring = &kiq->ring;
- int i, r;
+ int i, r = 0;
+ int j;
+
+ if (adev->enable_mes) {
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_compute_rings;
+ amdgpu_mes_unmap_legacy_queue(adev,
+ &adev->gfx.compute_ring[j],
+ RESET_QUEUES, 0, 0);
+ }
+ return 0;
+ }
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
- spin_lock(&adev->gfx.kiq.ring_lock);
+ if (!kiq_ring->sched.ready || amdgpu_in_reset(adev))
+ return 0;
+
+ spin_lock(&kiq->ring_lock);
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
adev->gfx.num_compute_rings)) {
- spin_unlock(&adev->gfx.kiq.ring_lock);
+ spin_unlock(&kiq->ring_lock);
return -ENOMEM;
}
- for (i = 0; i < adev->gfx.num_compute_rings; i++)
- kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i],
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_compute_rings;
+ kiq->pmf->kiq_unmap_queues(kiq_ring,
+ &adev->gfx.compute_ring[j],
RESET_QUEUES, 0, 0);
+ }
+ /* Submit unmap queue packet */
+ amdgpu_ring_commit(kiq_ring);
+ /*
+ * Ring test will do a basic scratch register change check. Just run
+ * this to ensure that unmap queues that is submitted before got
+ * processed successfully before returning.
+ */
r = amdgpu_ring_test_helper(kiq_ring);
- spin_unlock(&adev->gfx.kiq.ring_lock);
+
+ spin_unlock(&kiq->ring_lock);
+
+ return r;
+}
+
+int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ int i, r = 0;
+ int j;
+
+ if (adev->enable_mes) {
+ if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_gfx_rings;
+ amdgpu_mes_unmap_legacy_queue(adev,
+ &adev->gfx.gfx_ring[j],
+ PREEMPT_QUEUES, 0, 0);
+ }
+ }
+ return 0;
+ }
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ if (!adev->gfx.kiq[0].ring.sched.ready || amdgpu_in_reset(adev))
+ return 0;
+
+ if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
+ spin_lock(&kiq->ring_lock);
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
+ adev->gfx.num_gfx_rings)) {
+ spin_unlock(&kiq->ring_lock);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_gfx_rings;
+ kiq->pmf->kiq_unmap_queues(kiq_ring,
+ &adev->gfx.gfx_ring[j],
+ PREEMPT_QUEUES, 0, 0);
+ }
+ /* Submit unmap queue packet */
+ amdgpu_ring_commit(kiq_ring);
+
+ /*
+ * Ring test will do a basic scratch register change check.
+ * Just run this to ensure that unmap queues that is submitted
+ * before got processed successfully before returning.
+ */
+ r = amdgpu_ring_test_helper(kiq_ring);
+ spin_unlock(&kiq->ring_lock);
+ }
return r;
}
@@ -498,67 +616,167 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
return set_resource_bit;
}
-int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
+static int amdgpu_gfx_mes_enable_kcq(struct amdgpu_device *adev, int xcc_id)
{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ uint64_t queue_mask = ~0ULL;
+ int r, i, j;
+
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ if (!adev->enable_uni_mes) {
+ spin_lock(&kiq->ring_lock);
+ r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->set_resources_size);
+ if (r) {
+ dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r);
+ spin_unlock(&kiq->ring_lock);
+ return r;
+ }
+
+ kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
+ r = amdgpu_ring_test_helper(kiq_ring);
+ spin_unlock(&kiq->ring_lock);
+ if (r)
+ dev_err(adev->dev, "KIQ failed to set resources\n");
+ }
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_compute_rings;
+ r = amdgpu_mes_map_legacy_queue(adev,
+ &adev->gfx.compute_ring[j]);
+ if (r) {
+ dev_err(adev->dev, "failed to map compute queue\n");
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
uint64_t queue_mask = 0;
- int r, i;
+ int r, i, j;
+
+ if (adev->mes.enable_legacy_queue_map)
+ return amdgpu_gfx_mes_enable_kcq(adev, xcc_id);
if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
return -EINVAL;
for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
- if (!test_bit(i, adev->gfx.mec.queue_bitmap))
+ if (!test_bit(i, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
continue;
/* This situation may be hit in the future if a new HW
* generation exposes more than 64 queues. If so, the
* definition of queue_mask needs updating */
if (WARN_ON(i > (sizeof(queue_mask)*8))) {
- DRM_ERROR("Invalid KCQ enabled: %d\n", i);
+ dev_err(adev->dev, "Invalid KCQ enabled: %d\n", i);
break;
}
queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i));
}
- DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
- kiq_ring->queue);
- spin_lock(&adev->gfx.kiq.ring_lock);
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ dev_info(adev->dev, "kiq ring mec %d pipe %d q %d\n", kiq_ring->me,
+ kiq_ring->pipe, kiq_ring->queue);
+
+ spin_lock(&kiq->ring_lock);
r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
adev->gfx.num_compute_rings +
kiq->pmf->set_resources_size);
if (r) {
- DRM_ERROR("Failed to lock KIQ (%d).\n", r);
- spin_unlock(&adev->gfx.kiq.ring_lock);
+ dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r);
+ spin_unlock(&kiq->ring_lock);
return r;
}
kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
- for (i = 0; i < adev->gfx.num_compute_rings; i++)
- kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]);
-
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_compute_rings;
+ kiq->pmf->kiq_map_queues(kiq_ring,
+ &adev->gfx.compute_ring[j]);
+ }
+ /* Submit map queue packet */
+ amdgpu_ring_commit(kiq_ring);
+ /*
+ * Ring test will do a basic scratch register change check. Just run
+ * this to ensure that map queues that is submitted before got
+ * processed successfully before returning.
+ */
r = amdgpu_ring_test_helper(kiq_ring);
- spin_unlock(&adev->gfx.kiq.ring_lock);
+ spin_unlock(&kiq->ring_lock);
if (r)
- DRM_ERROR("KCQ enable failed\n");
+ dev_err(adev->dev, "KCQ enable failed\n");
return r;
}
-/* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
- *
- * @adev: amdgpu_device pointer
- * @bool enable true: enable gfx off feature, false: disable gfx off feature
- *
- * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled.
- * 2. other client can send request to disable gfx off feature, the request should be honored.
- * 3. other client can cancel their request of disable gfx off feature
- * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
- */
+int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ int r, i, j;
-void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
+ if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
+ return -EINVAL;
+
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ if (adev->mes.enable_legacy_queue_map) {
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_gfx_rings;
+ r = amdgpu_mes_map_legacy_queue(adev,
+ &adev->gfx.gfx_ring[j]);
+ if (r) {
+ dev_err(adev->dev, "failed to map gfx queue\n");
+ return r;
+ }
+ }
+
+ return 0;
+ }
+
+ spin_lock(&kiq->ring_lock);
+ /* No need to map kcq on the slave */
+ if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
+ r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
+ adev->gfx.num_gfx_rings);
+ if (r) {
+ dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r);
+ spin_unlock(&kiq->ring_lock);
+ return r;
+ }
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_gfx_rings;
+ kiq->pmf->kiq_map_queues(kiq_ring,
+ &adev->gfx.gfx_ring[j]);
+ }
+ }
+ /* Submit map queue packet */
+ amdgpu_ring_commit(kiq_ring);
+ /*
+ * Ring test will do a basic scratch register change check. Just run
+ * this to ensure that map queues that is submitted before got
+ * processed successfully before returning.
+ */
+ r = amdgpu_ring_test_helper(kiq_ring);
+ spin_unlock(&kiq->ring_lock);
+ if (r)
+ dev_err(adev->dev, "KGQ enable failed\n");
+
+ return r;
+}
+
+static void amdgpu_gfx_do_off_ctrl(struct amdgpu_device *adev, bool enable,
+ bool no_delay)
{
unsigned long delay = GFX_OFF_DELAY_ENABLE;
@@ -580,17 +798,21 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
if (adev->gfx.gfx_off_req_count == 0 &&
!adev->gfx.gfx_off_state) {
/* If going to s2idle, no need to wait */
- if (adev->in_s0ix)
- delay = GFX_OFF_NO_DELAY;
- schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
+ if (no_delay) {
+ if (!amdgpu_dpm_set_powergating_by_smu(adev,
+ AMD_IP_BLOCK_TYPE_GFX, true, 0))
+ adev->gfx.gfx_off_state = true;
+ } else {
+ schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
delay);
+ }
}
} else {
if (adev->gfx.gfx_off_req_count == 0) {
cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
if (adev->gfx.gfx_off_state &&
- !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) {
+ !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false, 0)) {
adev->gfx.gfx_off_state = false;
if (adev->gfx.funcs->init_spm_golden) {
@@ -608,6 +830,82 @@ unlock:
mutex_unlock(&adev->gfx.gfx_off_mutex);
}
+/* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
+ *
+ * @adev: amdgpu_device pointer
+ * @bool enable true: enable gfx off feature, false: disable gfx off feature
+ *
+ * 1. gfx off feature will be enabled by gfx ip after gfx cg pg enabled.
+ * 2. other client can send request to disable gfx off feature, the request should be honored.
+ * 3. other client can cancel their request of disable gfx off feature
+ * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
+ *
+ * gfx off allow will be delayed by GFX_OFF_DELAY_ENABLE ms.
+ */
+void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
+{
+ /* If going to s2idle, no need to wait */
+ bool no_delay = adev->in_s0ix ? true : false;
+
+ amdgpu_gfx_do_off_ctrl(adev, enable, no_delay);
+}
+
+/* amdgpu_gfx_off_ctrl_immediate - Handle gfx off feature enable/disable
+ *
+ * @adev: amdgpu_device pointer
+ * @bool enable true: enable gfx off feature, false: disable gfx off feature
+ *
+ * 1. gfx off feature will be enabled by gfx ip after gfx cg pg enabled.
+ * 2. other client can send request to disable gfx off feature, the request should be honored.
+ * 3. other client can cancel their request of disable gfx off feature
+ * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
+ *
+ * gfx off allow will be issued immediately.
+ */
+void amdgpu_gfx_off_ctrl_immediate(struct amdgpu_device *adev, bool enable)
+{
+ amdgpu_gfx_do_off_ctrl(adev, enable, true);
+}
+
+int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value)
+{
+ int r = 0;
+
+ mutex_lock(&adev->gfx.gfx_off_mutex);
+
+ r = amdgpu_dpm_set_residency_gfxoff(adev, value);
+
+ mutex_unlock(&adev->gfx.gfx_off_mutex);
+
+ return r;
+}
+
+int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *value)
+{
+ int r = 0;
+
+ mutex_lock(&adev->gfx.gfx_off_mutex);
+
+ r = amdgpu_dpm_get_residency_gfxoff(adev, value);
+
+ mutex_unlock(&adev->gfx.gfx_off_mutex);
+
+ return r;
+}
+
+int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value)
+{
+ int r = 0;
+
+ mutex_lock(&adev->gfx.gfx_off_mutex);
+
+ r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
+
+ mutex_unlock(&adev->gfx.gfx_off_mutex);
+
+ return r;
+}
+
int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)
{
@@ -615,72 +913,88 @@ int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)
mutex_lock(&adev->gfx.gfx_off_mutex);
- r = smu_get_status_gfxoff(adev, value);
+ r = amdgpu_dpm_get_status_gfxoff(adev, value);
mutex_unlock(&adev->gfx.gfx_off_mutex);
return r;
}
-int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev)
+int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
{
int r;
- struct ras_fs_if fs_info = {
- .sysfs_name = "gfx_err_count",
- };
- struct ras_ih_if ih_info = {
- .cb = amdgpu_gfx_process_ras_data_cb,
- };
- if (!adev->gfx.ras_if) {
- adev->gfx.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
- if (!adev->gfx.ras_if)
- return -ENOMEM;
- adev->gfx.ras_if->block = AMDGPU_RAS_BLOCK__GFX;
- adev->gfx.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
- adev->gfx.ras_if->sub_block_index = 0;
- }
- fs_info.head = ih_info.head = *adev->gfx.ras_if;
- r = amdgpu_ras_late_init(adev, adev->gfx.ras_if,
- &fs_info, &ih_info);
- if (r)
- goto free;
-
- if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) {
- if (!amdgpu_persistent_edc_harvesting_supported(adev))
- amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX);
+ if (amdgpu_ras_is_supported(adev, ras_block->block)) {
+ if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
+ r = amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX);
+ if (r)
+ return r;
+ }
- r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
+ r = amdgpu_ras_block_late_init(adev, ras_block);
if (r)
- goto late_fini;
+ return r;
+
+ if (amdgpu_sriov_vf(adev))
+ return r;
+
+ if (adev->gfx.cp_ecc_error_irq.funcs) {
+ r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
+ if (r)
+ goto late_fini;
+ }
} else {
- /* free gfx ras_if if ras is not supported */
- r = 0;
- goto free;
+ amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
}
return 0;
late_fini:
- amdgpu_ras_late_fini(adev, adev->gfx.ras_if, &ih_info);
-free:
- kfree(adev->gfx.ras_if);
- adev->gfx.ras_if = NULL;
+ amdgpu_ras_block_late_fini(adev, ras_block);
return r;
}
-void amdgpu_gfx_ras_fini(struct amdgpu_device *adev)
+int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev)
{
- if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
- adev->gfx.ras_if) {
- struct ras_common_if *ras_if = adev->gfx.ras_if;
- struct ras_ih_if ih_info = {
- .head = *ras_if,
- .cb = amdgpu_gfx_process_ras_data_cb,
- };
+ int err = 0;
+ struct amdgpu_gfx_ras *ras = NULL;
+
+ /* adev->gfx.ras is NULL, which means gfx does not
+ * support ras function, then do nothing here.
+ */
+ if (!adev->gfx.ras)
+ return 0;
+
+ ras = adev->gfx.ras;
- amdgpu_ras_late_fini(adev, ras_if, &ih_info);
- kfree(ras_if);
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register gfx ras block!\n");
+ return err;
}
+
+ strcpy(ras->ras_block.ras_comm.name, "gfx");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->gfx.ras_if = &ras->ras_block.ras_comm;
+
+ /* If not define special ras_late_init function, use gfx default ras_late_init */
+ if (!ras->ras_block.ras_late_init)
+ ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
+
+ /* If not defined special ras_cb function, use default ras_cb */
+ if (!ras->ras_block.ras_cb)
+ ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
+
+ return 0;
+}
+
+int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ if (adev->gfx.ras && adev->gfx.ras->poison_consumption_handler)
+ return adev->gfx.ras->poison_consumption_handler(adev, entry);
+
+ return 0;
}
int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
@@ -695,9 +1009,9 @@ int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
*/
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
- if (adev->gfx.ras_funcs &&
- adev->gfx.ras_funcs->query_ras_error_count)
- adev->gfx.ras_funcs->query_ras_error_count(adev, err_data);
+ if (adev->gfx.ras && adev->gfx.ras->ras_block.hw_ops &&
+ adev->gfx.ras->ras_block.hw_ops->query_ras_error_count)
+ adev->gfx.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
amdgpu_ras_reset_gpu(adev);
}
return AMDGPU_RAS_SUCCESS;
@@ -717,22 +1031,44 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
ih_data.head = *ras_if;
- DRM_ERROR("CP ECC ERROR IRQ\n");
+ dev_err(adev->dev, "CP ECC ERROR IRQ\n");
amdgpu_ras_interrupt_dispatch(adev, &ih_data);
return 0;
}
-uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
+void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
+ void *ras_error_status,
+ void (*func)(struct amdgpu_device *adev, void *ras_error_status,
+ int xcc_id))
+{
+ int i;
+ int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
+ uint32_t xcc_mask = GENMASK(num_xcc - 1, 0);
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+ if (err_data) {
+ err_data->ue_count = 0;
+ err_data->ce_count = 0;
+ }
+
+ for_each_inst(i, xcc_mask)
+ func(adev, ras_error_status, i);
+}
+
+uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id)
{
signed long r, cnt = 0;
unsigned long flags;
uint32_t seq, reg_val_offs = 0, value = 0;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
struct amdgpu_ring *ring = &kiq->ring;
if (amdgpu_device_skip_hw_access(adev))
return 0;
+ if (adev->mes.ring[0].sched.ready)
+ return amdgpu_mes_rreg(adev, reg);
+
BUG_ON(!ring->funcs->emit_rreg);
spin_lock_irqsave(&kiq->ring_lock, flags);
@@ -740,7 +1076,10 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
pr_err("critical bug! too many kiq readers\n");
goto failed_unlock;
}
- amdgpu_ring_alloc(ring, 32);
+ r = amdgpu_ring_alloc(ring, 32);
+ if (r)
+ goto failed_unlock;
+
amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
if (r)
@@ -764,6 +1103,9 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
might_sleep();
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+ if (amdgpu_in_reset(adev))
+ goto failed_kiq_read;
+
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
}
@@ -787,12 +1129,12 @@ failed_kiq_read:
return ~0;
}
-void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
+void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id)
{
signed long r, cnt = 0;
unsigned long flags;
uint32_t seq;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
struct amdgpu_ring *ring = &kiq->ring;
BUG_ON(!ring->funcs->emit_wreg);
@@ -800,8 +1142,16 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
if (amdgpu_device_skip_hw_access(adev))
return;
+ if (adev->mes.ring[0].sched.ready) {
+ amdgpu_mes_wreg(adev, reg, v);
+ return;
+ }
+
spin_lock_irqsave(&kiq->ring_lock, flags);
- amdgpu_ring_alloc(ring, 32);
+ r = amdgpu_ring_alloc(ring, 32);
+ if (r)
+ goto failed_unlock;
+
amdgpu_ring_emit_wreg(ring, reg, v);
r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
if (r)
@@ -825,6 +1175,8 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
might_sleep();
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+ if (amdgpu_in_reset(adev))
+ goto failed_kiq_write;
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
@@ -837,11 +1189,81 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
failed_undo:
amdgpu_ring_undo(ring);
+failed_unlock:
spin_unlock_irqrestore(&kiq->ring_lock, flags);
failed_kiq_write:
dev_err(adev->dev, "failed to write reg:%x\n", reg);
}
+int amdgpu_kiq_hdp_flush(struct amdgpu_device *adev)
+{
+ signed long r, cnt = 0;
+ unsigned long flags;
+ uint32_t seq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *ring = &kiq->ring;
+
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
+ if (adev->enable_mes_kiq && adev->mes.ring[0].sched.ready)
+ return amdgpu_mes_hdp_flush(adev);
+
+ if (!ring->funcs->emit_hdp_flush) {
+ return -EOPNOTSUPP;
+ }
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ r = amdgpu_ring_alloc(ring, 32);
+ if (r)
+ goto failed_unlock;
+
+ amdgpu_ring_emit_hdp_flush(ring);
+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+ if (r)
+ goto failed_undo;
+
+ amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+ /* don't wait anymore for gpu reset case because this way may
+ * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+ * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+ * never return if we keep waiting in virt_kiq_rreg, which cause
+ * gpu_recover() hang there.
+ *
+ * also don't wait anymore for IRQ context
+ * */
+ if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
+ goto failed_kiq_hdp_flush;
+
+ might_sleep();
+ while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+ if (amdgpu_in_reset(adev))
+ goto failed_kiq_hdp_flush;
+
+ msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+ }
+
+ if (cnt > MAX_KIQ_REG_TRY) {
+ dev_err(adev->dev, "failed to flush HDP via KIQ timeout\n");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+
+failed_undo:
+ amdgpu_ring_undo(ring);
+failed_unlock:
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+failed_kiq_hdp_flush:
+ dev_err(adev->dev, "failed to flush HDP via KIQ\n");
+ return r < 0 ? r : -EIO;
+}
+
int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
{
if (amdgpu_num_kcq == -1) {
@@ -853,18 +1275,1283 @@ int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
return amdgpu_num_kcq;
}
-/* amdgpu_gfx_state_change_set - Handle gfx power state change set
+void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev,
+ uint32_t ucode_id)
+{
+ const struct gfx_firmware_header_v1_0 *cp_hdr;
+ const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0;
+ struct amdgpu_firmware_info *info = NULL;
+ const struct firmware *ucode_fw;
+ unsigned int fw_size;
+
+ switch (ucode_id) {
+ case AMDGPU_UCODE_ID_CP_PFP:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.pfp_fw->data;
+ adev->gfx.pfp_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.pfp_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ ucode_fw = adev->gfx.pfp_fw;
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+ adev->gfx.pfp_fw_version =
+ le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
+ adev->gfx.pfp_feature_version =
+ le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
+ ucode_fw = adev->gfx.pfp_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+ ucode_fw = adev->gfx.pfp_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_ME:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.me_fw->data;
+ adev->gfx.me_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.me_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ ucode_fw = adev->gfx.me_fw;
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+ adev->gfx.me_fw_version =
+ le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
+ adev->gfx.me_feature_version =
+ le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
+ ucode_fw = adev->gfx.me_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+ ucode_fw = adev->gfx.me_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_CE:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.ce_fw->data;
+ adev->gfx.ce_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.ce_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ ucode_fw = adev->gfx.ce_fw;
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC1:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec_fw->data;
+ adev->gfx.mec_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.mec_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ ucode_fw = adev->gfx.mec_fw;
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
+ le32_to_cpu(cp_hdr->jt_size) * 4;
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC1_JT:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec_fw->data;
+ ucode_fw = adev->gfx.mec_fw;
+ fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC2:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec2_fw->data;
+ adev->gfx.mec2_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.mec2_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ ucode_fw = adev->gfx.mec2_fw;
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
+ le32_to_cpu(cp_hdr->jt_size) * 4;
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC2_JT:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec2_fw->data;
+ ucode_fw = adev->gfx.mec2_fw;
+ fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+ adev->gfx.mec_fw_version =
+ le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
+ adev->gfx.mec_feature_version =
+ le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
+ ucode_fw = adev->gfx.mec_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+ ucode_fw = adev->gfx.mec_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
+ break;
+ default:
+ dev_err(adev->dev, "Invalid ucode id %u\n", ucode_id);
+ return;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ info = &adev->firmware.ucode[ucode_id];
+ info->ucode_id = ucode_id;
+ info->fw = ucode_fw;
+ adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE);
+ }
+}
+
+bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id)
+{
+ return !(xcc_id % (adev->gfx.num_xcc_per_xcp ?
+ adev->gfx.num_xcc_per_xcp : 1));
+}
+
+static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev,
+ struct device_attribute *addr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ int mode;
+
+ /* Only minimal precaution taken to reject requests while in reset.*/
+ if (amdgpu_in_reset(adev))
+ return -EPERM;
+
+ mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
+ AMDGPU_XCP_FL_NONE);
+
+ return sysfs_emit(buf, "%s\n", amdgpu_gfx_compute_mode_desc(mode));
+}
+
+static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev,
+ struct device_attribute *addr,
+ const char *buf, size_t count)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ enum amdgpu_gfx_partition mode;
+ int ret = 0, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ if (num_xcc % 2 != 0)
+ return -EINVAL;
+
+ if (!strncasecmp("SPX", buf, strlen("SPX"))) {
+ mode = AMDGPU_SPX_PARTITION_MODE;
+ } else if (!strncasecmp("DPX", buf, strlen("DPX"))) {
+ /*
+ * DPX mode needs AIDs to be in multiple of 2.
+ * Each AID connects 2 XCCs.
+ */
+ if (num_xcc%4)
+ return -EINVAL;
+ mode = AMDGPU_DPX_PARTITION_MODE;
+ } else if (!strncasecmp("TPX", buf, strlen("TPX"))) {
+ if (num_xcc != 6)
+ return -EINVAL;
+ mode = AMDGPU_TPX_PARTITION_MODE;
+ } else if (!strncasecmp("QPX", buf, strlen("QPX"))) {
+ if (num_xcc != 8)
+ return -EINVAL;
+ mode = AMDGPU_QPX_PARTITION_MODE;
+ } else if (!strncasecmp("CPX", buf, strlen("CPX"))) {
+ mode = AMDGPU_CPX_PARTITION_MODE;
+ } else {
+ return -EINVAL;
+ }
+
+ /* Don't allow a switch while under reset */
+ if (!down_read_trylock(&adev->reset_domain->sem))
+ return -EPERM;
+
+ ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode);
+
+ up_read(&adev->reset_domain->sem);
+
+ if (ret)
+ return ret;
+
+ return count;
+}
+
+static const char *xcp_desc[] = {
+ [AMDGPU_SPX_PARTITION_MODE] = "SPX",
+ [AMDGPU_DPX_PARTITION_MODE] = "DPX",
+ [AMDGPU_TPX_PARTITION_MODE] = "TPX",
+ [AMDGPU_QPX_PARTITION_MODE] = "QPX",
+ [AMDGPU_CPX_PARTITION_MODE] = "CPX",
+};
+
+static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
+ struct device_attribute *addr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
+ int size = 0, mode;
+ char *sep = "";
+
+ if (!xcp_mgr || !xcp_mgr->avail_xcp_modes)
+ return sysfs_emit(buf, "Not supported\n");
+
+ for_each_inst(mode, xcp_mgr->avail_xcp_modes) {
+ size += sysfs_emit_at(buf, size, "%s%s", sep, xcp_desc[mode]);
+ sep = ", ";
+ }
+
+ size += sysfs_emit_at(buf, size, "\n");
+
+ return size;
+}
+
+static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct drm_gpu_scheduler *sched = &ring->sched;
+ struct drm_sched_entity entity;
+ static atomic_t counter;
+ struct dma_fence *f;
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
+ void *owner;
+ int i, r;
+
+ /* Initialize the scheduler entity */
+ r = drm_sched_entity_init(&entity, DRM_SCHED_PRIORITY_NORMAL,
+ &sched, 1, NULL);
+ if (r) {
+ dev_err(adev->dev, "Failed setting up GFX kernel entity.\n");
+ goto err;
+ }
+
+ /*
+ * Use some unique dummy value as the owner to make sure we execute
+ * the cleaner shader on each submission. The value just need to change
+ * for each submission and is otherwise meaningless.
+ */
+ owner = (void *)(unsigned long)atomic_inc_return(&counter);
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, &entity, owner,
+ 64, 0, &job,
+ AMDGPU_KERNEL_JOB_ID_CLEANER_SHADER);
+ if (r)
+ goto err;
+
+ job->enforce_isolation = true;
+ /* always run the cleaner shader */
+ job->run_cleaner_shader = true;
+
+ ib = &job->ibs[0];
+ for (i = 0; i <= ring->funcs->align_mask; ++i)
+ ib->ptr[i] = ring->funcs->nop;
+ ib->length_dw = ring->funcs->align_mask + 1;
+
+ f = amdgpu_job_submit(job);
+
+ r = dma_fence_wait(f, false);
+ if (r)
+ goto err;
+
+ dma_fence_put(f);
+
+ /* Clean up the scheduler entity */
+ drm_sched_entity_destroy(&entity);
+ return 0;
+
+err:
+ return r;
+}
+
+static int amdgpu_gfx_run_cleaner_shader(struct amdgpu_device *adev, int xcp_id)
+{
+ int num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ struct amdgpu_ring *ring;
+ int num_xcc_to_clear;
+ int i, r, xcc_id;
+
+ if (adev->gfx.num_xcc_per_xcp)
+ num_xcc_to_clear = adev->gfx.num_xcc_per_xcp;
+ else
+ num_xcc_to_clear = 1;
+
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings];
+ if ((ring->xcp_id == xcp_id) && ring->sched.ready) {
+ r = amdgpu_gfx_run_cleaner_shader_job(ring);
+ if (r)
+ return r;
+ num_xcc_to_clear--;
+ break;
+ }
+ }
+ }
+
+ if (num_xcc_to_clear)
+ return -ENOENT;
+
+ return 0;
+}
+
+/**
+ * amdgpu_gfx_set_run_cleaner_shader - Execute the AMDGPU GFX Cleaner Shader
+ * @dev: The device structure
+ * @attr: The device attribute structure
+ * @buf: The buffer containing the input data
+ * @count: The size of the input data
+ *
+ * Provides the sysfs interface to manually run a cleaner shader, which is
+ * used to clear the GPU state between different tasks. Writing a value to the
+ * 'run_cleaner_shader' sysfs file triggers the cleaner shader execution.
+ * The value written corresponds to the partition index on multi-partition
+ * devices. On single-partition devices, the value should be '0'.
+ *
+ * The cleaner shader clears the Local Data Store (LDS) and General Purpose
+ * Registers (GPRs) to ensure data isolation between GPU workloads.
+ *
+ * Return: The number of bytes written to the sysfs file.
+ */
+static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ int ret;
+ long value;
+
+ if (amdgpu_in_reset(adev))
+ return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
+
+ if (adev->gfx.disable_kq)
+ return -EPERM;
+
+ ret = kstrtol(buf, 0, &value);
+
+ if (ret)
+ return -EINVAL;
+
+ if (value < 0)
+ return -EINVAL;
+
+ if (adev->xcp_mgr) {
+ if (value >= adev->xcp_mgr->num_xcps)
+ return -EINVAL;
+ } else {
+ if (value > 1)
+ return -EINVAL;
+ }
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
+ return ret;
+ }
+
+ ret = amdgpu_gfx_run_cleaner_shader(adev, value);
+
+ pm_runtime_put_autosuspend(ddev->dev);
+
+ if (ret)
+ return ret;
+
+ return count;
+}
+
+/**
+ * amdgpu_gfx_get_enforce_isolation - Query AMDGPU GFX Enforce Isolation Settings
+ * @dev: The device structure
+ * @attr: The device attribute structure
+ * @buf: The buffer to store the output data
+ *
+ * Provides the sysfs read interface to get the current settings of the 'enforce_isolation'
+ * feature for each GPU partition. Reading from the 'enforce_isolation'
+ * sysfs file returns the isolation settings for all partitions, where '0'
+ * indicates disabled, '1' indicates enabled, and '2' indicates enabled in legacy mode,
+ * and '3' indicates enabled without cleaner shader.
+ *
+ * Return: The number of bytes read from the sysfs file.
+ */
+static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ int i;
+ ssize_t size = 0;
+
+ if (adev->xcp_mgr) {
+ for (i = 0; i < adev->xcp_mgr->num_xcps; i++) {
+ size += sysfs_emit_at(buf, size, "%u", adev->enforce_isolation[i]);
+ if (i < (adev->xcp_mgr->num_xcps - 1))
+ size += sysfs_emit_at(buf, size, " ");
+ }
+ buf[size++] = '\n';
+ } else {
+ size = sysfs_emit_at(buf, 0, "%u\n", adev->enforce_isolation[0]);
+ }
+
+ return size;
+}
+
+/**
+ * amdgpu_gfx_set_enforce_isolation - Control AMDGPU GFX Enforce Isolation
+ * @dev: The device structure
+ * @attr: The device attribute structure
+ * @buf: The buffer containing the input data
+ * @count: The size of the input data
+ *
+ * This function allows control over the 'enforce_isolation' feature, which
+ * serializes access to the graphics engine. Writing '0' to disable, '1' to
+ * enable isolation with cleaner shader, '2' to enable legacy isolation without
+ * cleaner shader, or '3' to enable process isolation without submitting the
+ * cleaner shader to the 'enforce_isolation' sysfs file sets the isolation mode
+ * for each partition. The input should specify the setting for all
+ * partitions.
+ *
+ * Return: The number of bytes written to the sysfs file.
+ */
+static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ long partition_values[MAX_XCP] = {0};
+ int ret, i, num_partitions;
+ const char *input_buf = buf;
+
+ for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
+ ret = sscanf(input_buf, "%ld", &partition_values[i]);
+ if (ret <= 0)
+ break;
+
+ /* Move the pointer to the next value in the string */
+ input_buf = strchr(input_buf, ' ');
+ if (input_buf) {
+ input_buf++;
+ } else {
+ i++;
+ break;
+ }
+ }
+ num_partitions = i;
+
+ if (adev->xcp_mgr && num_partitions != adev->xcp_mgr->num_xcps)
+ return -EINVAL;
+
+ if (!adev->xcp_mgr && num_partitions != 1)
+ return -EINVAL;
+
+ for (i = 0; i < num_partitions; i++) {
+ if (partition_values[i] != 0 &&
+ partition_values[i] != 1 &&
+ partition_values[i] != 2 &&
+ partition_values[i] != 3)
+ return -EINVAL;
+ }
+
+ mutex_lock(&adev->enforce_isolation_mutex);
+ for (i = 0; i < num_partitions; i++) {
+ switch (partition_values[i]) {
+ case 0:
+ default:
+ adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE;
+ break;
+ case 1:
+ adev->enforce_isolation[i] =
+ AMDGPU_ENFORCE_ISOLATION_ENABLE;
+ break;
+ case 2:
+ adev->enforce_isolation[i] =
+ AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
+ break;
+ case 3:
+ adev->enforce_isolation[i] =
+ AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER;
+ break;
+ }
+ }
+ mutex_unlock(&adev->enforce_isolation_mutex);
+
+ amdgpu_mes_update_enforce_isolation(adev);
+
+ return count;
+}
+
+static ssize_t amdgpu_gfx_get_gfx_reset_mask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (!adev)
+ return -ENODEV;
+
+ return amdgpu_show_reset_mask(buf, adev->gfx.gfx_supported_reset);
+}
+
+static ssize_t amdgpu_gfx_get_compute_reset_mask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (!adev)
+ return -ENODEV;
+
+ return amdgpu_show_reset_mask(buf, adev->gfx.compute_supported_reset);
+}
+
+static DEVICE_ATTR(run_cleaner_shader, 0200,
+ NULL, amdgpu_gfx_set_run_cleaner_shader);
+
+static DEVICE_ATTR(enforce_isolation, 0644,
+ amdgpu_gfx_get_enforce_isolation,
+ amdgpu_gfx_set_enforce_isolation);
+
+static DEVICE_ATTR(current_compute_partition, 0644,
+ amdgpu_gfx_get_current_compute_partition,
+ amdgpu_gfx_set_compute_partition);
+
+static DEVICE_ATTR(available_compute_partition, 0444,
+ amdgpu_gfx_get_available_compute_partition, NULL);
+static DEVICE_ATTR(gfx_reset_mask, 0444,
+ amdgpu_gfx_get_gfx_reset_mask, NULL);
+
+static DEVICE_ATTR(compute_reset_mask, 0444,
+ amdgpu_gfx_get_compute_reset_mask, NULL);
+
+static int amdgpu_gfx_sysfs_xcp_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
+ bool xcp_switch_supported;
+ int r;
+
+ if (!xcp_mgr)
+ return 0;
+
+ xcp_switch_supported =
+ (xcp_mgr->funcs && xcp_mgr->funcs->switch_partition_mode);
+
+ if (!xcp_switch_supported)
+ dev_attr_current_compute_partition.attr.mode &=
+ ~(S_IWUSR | S_IWGRP | S_IWOTH);
+
+ r = device_create_file(adev->dev, &dev_attr_current_compute_partition);
+ if (r)
+ return r;
+
+ if (xcp_switch_supported)
+ r = device_create_file(adev->dev,
+ &dev_attr_available_compute_partition);
+
+ return r;
+}
+
+static void amdgpu_gfx_sysfs_xcp_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
+ bool xcp_switch_supported;
+
+ if (!xcp_mgr)
+ return;
+
+ xcp_switch_supported =
+ (xcp_mgr->funcs && xcp_mgr->funcs->switch_partition_mode);
+ device_remove_file(adev->dev, &dev_attr_current_compute_partition);
+
+ if (xcp_switch_supported)
+ device_remove_file(adev->dev,
+ &dev_attr_available_compute_partition);
+}
+
+static int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = device_create_file(adev->dev, &dev_attr_enforce_isolation);
+ if (r)
+ return r;
+ if (adev->gfx.enable_cleaner_shader)
+ r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader);
+
+ return r;
+}
+
+static void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev)
+{
+ device_remove_file(adev->dev, &dev_attr_enforce_isolation);
+ if (adev->gfx.enable_cleaner_shader)
+ device_remove_file(adev->dev, &dev_attr_run_cleaner_shader);
+}
+
+static int amdgpu_gfx_sysfs_reset_mask_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (!amdgpu_gpu_recovery)
+ return r;
+
+ if (adev->gfx.num_gfx_rings) {
+ r = device_create_file(adev->dev, &dev_attr_gfx_reset_mask);
+ if (r)
+ return r;
+ }
+
+ if (adev->gfx.num_compute_rings) {
+ r = device_create_file(adev->dev, &dev_attr_compute_reset_mask);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+static void amdgpu_gfx_sysfs_reset_mask_fini(struct amdgpu_device *adev)
+{
+ if (!amdgpu_gpu_recovery)
+ return;
+
+ if (adev->gfx.num_gfx_rings)
+ device_remove_file(adev->dev, &dev_attr_gfx_reset_mask);
+
+ if (adev->gfx.num_compute_rings)
+ device_remove_file(adev->dev, &dev_attr_compute_reset_mask);
+}
+
+int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = amdgpu_gfx_sysfs_xcp_init(adev);
+ if (r) {
+ dev_err(adev->dev, "failed to create xcp sysfs files");
+ return r;
+ }
+
+ r = amdgpu_gfx_sysfs_isolation_shader_init(adev);
+ if (r)
+ dev_err(adev->dev, "failed to create isolation sysfs files");
+
+ r = amdgpu_gfx_sysfs_reset_mask_init(adev);
+ if (r)
+ dev_err(adev->dev, "failed to create reset mask sysfs files");
+
+ return r;
+}
+
+void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
+{
+ if (adev->dev->kobj.sd) {
+ amdgpu_gfx_sysfs_xcp_fini(adev);
+ amdgpu_gfx_sysfs_isolation_shader_fini(adev);
+ amdgpu_gfx_sysfs_reset_mask_fini(adev);
+ }
+}
+
+int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,
+ unsigned int cleaner_shader_size)
+{
+ if (!adev->gfx.enable_cleaner_shader)
+ return -EOPNOTSUPP;
+
+ return amdgpu_bo_create_kernel(adev, cleaner_shader_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.cleaner_shader_obj,
+ &adev->gfx.cleaner_shader_gpu_addr,
+ (void **)&adev->gfx.cleaner_shader_cpu_ptr);
+}
+
+void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev)
+{
+ if (!adev->gfx.enable_cleaner_shader)
+ return;
+
+ amdgpu_bo_free_kernel(&adev->gfx.cleaner_shader_obj,
+ &adev->gfx.cleaner_shader_gpu_addr,
+ (void **)&adev->gfx.cleaner_shader_cpu_ptr);
+}
+
+void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev,
+ unsigned int cleaner_shader_size,
+ const void *cleaner_shader_ptr)
+{
+ if (!adev->gfx.enable_cleaner_shader)
+ return;
+
+ if (adev->gfx.cleaner_shader_cpu_ptr && cleaner_shader_ptr)
+ memcpy_toio(adev->gfx.cleaner_shader_cpu_ptr, cleaner_shader_ptr,
+ cleaner_shader_size);
+}
+
+/**
+ * amdgpu_gfx_kfd_sch_ctrl - Control the KFD scheduler from the KGD (Graphics Driver)
+ * @adev: amdgpu_device pointer
+ * @idx: Index of the scheduler to control
+ * @enable: Whether to enable or disable the KFD scheduler
+ *
+ * This function is used to control the KFD (Kernel Fusion Driver) scheduler
+ * from the KGD. It is part of the cleaner shader feature. This function plays
+ * a key role in enforcing process isolation on the GPU.
+ *
+ * The function uses a reference count mechanism (kfd_sch_req_count) to keep
+ * track of the number of requests to enable the KFD scheduler. When a request
+ * to enable the KFD scheduler is made, the reference count is decremented.
+ * When the reference count reaches zero, a delayed work is scheduled to
+ * enforce isolation after a delay of GFX_SLICE_PERIOD.
+ *
+ * When a request to disable the KFD scheduler is made, the function first
+ * checks if the reference count is zero. If it is, it cancels the delayed work
+ * for enforcing isolation and checks if the KFD scheduler is active. If the
+ * KFD scheduler is active, it sends a request to stop the KFD scheduler and
+ * sets the KFD scheduler state to inactive. Then, it increments the reference
+ * count.
+ *
+ * The function is synchronized using the kfd_sch_mutex to ensure that the KFD
+ * scheduler state and reference count are updated atomically.
+ *
+ * Note: If the reference count is already zero when a request to enable the
+ * KFD scheduler is made, it means there's an imbalance bug somewhere. The
+ * function triggers a warning in this case.
+ */
+static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx,
+ bool enable)
+{
+ mutex_lock(&adev->gfx.userq_sch_mutex);
+
+ if (enable) {
+ /* If the count is already 0, it means there's an imbalance bug somewhere.
+ * Note that the bug may be in a different caller than the one which triggers the
+ * WARN_ON_ONCE.
+ */
+ if (WARN_ON_ONCE(adev->gfx.userq_sch_req_count[idx] == 0)) {
+ dev_err(adev->dev, "Attempted to enable KFD scheduler when reference count is already zero\n");
+ goto unlock;
+ }
+
+ adev->gfx.userq_sch_req_count[idx]--;
+
+ if (adev->gfx.userq_sch_req_count[idx] == 0 &&
+ adev->gfx.userq_sch_inactive[idx]) {
+ schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
+ msecs_to_jiffies(adev->gfx.enforce_isolation_time[idx]));
+ }
+ } else {
+ if (adev->gfx.userq_sch_req_count[idx] == 0) {
+ cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work);
+ if (!adev->gfx.userq_sch_inactive[idx]) {
+ amdgpu_userq_stop_sched_for_enforce_isolation(adev, idx);
+ if (adev->kfd.init_complete)
+ amdgpu_amdkfd_stop_sched(adev, idx);
+ adev->gfx.userq_sch_inactive[idx] = true;
+ }
+ }
+
+ adev->gfx.userq_sch_req_count[idx]++;
+ }
+
+unlock:
+ mutex_unlock(&adev->gfx.userq_sch_mutex);
+}
+
+/**
+ * amdgpu_gfx_enforce_isolation_handler - work handler for enforcing shader isolation
+ *
+ * @work: work_struct.
+ *
+ * This function is the work handler for enforcing shader isolation on AMD GPUs.
+ * It counts the number of emitted fences for each GFX and compute ring. If there
+ * are any fences, it schedules the `enforce_isolation_work` to be run after a
+ * delay of `GFX_SLICE_PERIOD`. If there are no fences, it signals the Kernel Fusion
+ * Driver (KFD) to resume the runqueue. The function is synchronized using the
+ * `enforce_isolation_mutex`.
+ */
+void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work)
+{
+ struct amdgpu_isolation_work *isolation_work =
+ container_of(work, struct amdgpu_isolation_work, work.work);
+ struct amdgpu_device *adev = isolation_work->adev;
+ u32 i, idx, fences = 0;
+
+ if (isolation_work->xcp_id == AMDGPU_XCP_NO_PARTITION)
+ idx = 0;
+ else
+ idx = isolation_work->xcp_id;
+
+ if (idx >= MAX_XCP)
+ return;
+
+ mutex_lock(&adev->enforce_isolation_mutex);
+ for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i) {
+ if (isolation_work->xcp_id == adev->gfx.gfx_ring[i].xcp_id)
+ fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]);
+ }
+ for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i) {
+ if (isolation_work->xcp_id == adev->gfx.compute_ring[i].xcp_id)
+ fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
+ }
+ if (fences) {
+ /* we've already had our timeslice, so let's wrap this up */
+ schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
+ msecs_to_jiffies(1));
+ } else {
+ /* Tell KFD to resume the runqueue */
+ WARN_ON_ONCE(!adev->gfx.userq_sch_inactive[idx]);
+ WARN_ON_ONCE(adev->gfx.userq_sch_req_count[idx]);
+
+ amdgpu_userq_start_sched_for_enforce_isolation(adev, idx);
+ if (adev->kfd.init_complete)
+ amdgpu_amdkfd_start_sched(adev, idx);
+ adev->gfx.userq_sch_inactive[idx] = false;
+ }
+ mutex_unlock(&adev->enforce_isolation_mutex);
+}
+
+/**
+ * amdgpu_gfx_enforce_isolation_wait_for_kfd - Manage KFD wait period for process isolation
* @adev: amdgpu_device pointer
- * @state: gfx power state(1 -sGpuChangeState_D0Entry and 2 -sGpuChangeState_D3Entry)
+ * @idx: Index of the GPU partition
+ *
+ * When kernel submissions come in, the jobs are given a time slice and once
+ * that time slice is up, if there are KFD user queues active, kernel
+ * submissions are blocked until KFD has had its time slice. Once the KFD time
+ * slice is up, KFD user queues are preempted and kernel submissions are
+ * unblocked and allowed to run again.
+ */
+static void
+amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev,
+ u32 idx)
+{
+ unsigned long cjiffies;
+ bool wait = false;
+
+ mutex_lock(&adev->enforce_isolation_mutex);
+ if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) {
+ /* set the initial values if nothing is set */
+ if (!adev->gfx.enforce_isolation_jiffies[idx]) {
+ adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
+ adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS;
+ }
+ /* Make sure KFD gets a chance to run */
+ if (amdgpu_amdkfd_compute_active(adev, idx)) {
+ cjiffies = jiffies;
+ if (time_after(cjiffies, adev->gfx.enforce_isolation_jiffies[idx])) {
+ cjiffies -= adev->gfx.enforce_isolation_jiffies[idx];
+ if ((jiffies_to_msecs(cjiffies) >= GFX_SLICE_PERIOD_MS)) {
+ /* if our time is up, let KGD work drain before scheduling more */
+ wait = true;
+ /* reset the timer period */
+ adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS;
+ } else {
+ /* set the timer period to what's left in our time slice */
+ adev->gfx.enforce_isolation_time[idx] =
+ GFX_SLICE_PERIOD_MS - jiffies_to_msecs(cjiffies);
+ }
+ } else {
+ /* if jiffies wrap around we will just wait a little longer */
+ adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
+ }
+ } else {
+ /* if there is no KFD work, then set the full slice period */
+ adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
+ adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS;
+ }
+ }
+ mutex_unlock(&adev->enforce_isolation_mutex);
+
+ if (wait)
+ msleep(GFX_SLICE_PERIOD_MS);
+}
+
+/**
+ * amdgpu_gfx_enforce_isolation_ring_begin_use - Begin use of a ring with enforced isolation
+ * @ring: Pointer to the amdgpu_ring structure
*
+ * Ring begin_use helper implementation for gfx which serializes access to the
+ * gfx IP between kernel submission IOCTLs and KFD user queues when isolation
+ * enforcement is enabled. The kernel submission IOCTLs and KFD user queues
+ * each get a time slice when both are active.
*/
+void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 idx;
+ bool sched_work = false;
+
+ if (!adev->gfx.enable_cleaner_shader)
+ return;
+
+ if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION)
+ idx = 0;
+ else
+ idx = ring->xcp_id;
-void amdgpu_gfx_state_change_set(struct amdgpu_device *adev, enum gfx_change_state state)
+ if (idx >= MAX_XCP)
+ return;
+
+ /* Don't submit more work until KFD has had some time */
+ amdgpu_gfx_enforce_isolation_wait_for_kfd(adev, idx);
+
+ mutex_lock(&adev->enforce_isolation_mutex);
+ if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) {
+ if (adev->kfd.init_complete)
+ sched_work = true;
+ }
+ mutex_unlock(&adev->enforce_isolation_mutex);
+
+ if (sched_work)
+ amdgpu_gfx_kfd_sch_ctrl(adev, idx, false);
+}
+
+/**
+ * amdgpu_gfx_enforce_isolation_ring_end_use - End use of a ring with enforced isolation
+ * @ring: Pointer to the amdgpu_ring structure
+ *
+ * Ring end_use helper implementation for gfx which serializes access to the
+ * gfx IP between kernel submission IOCTLs and KFD user queues when isolation
+ * enforcement is enabled. The kernel submission IOCTLs and KFD user queues
+ * each get a time slice when both are active.
+ */
+void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring)
{
- mutex_lock(&adev->pm.mutex);
- if (adev->powerplay.pp_funcs &&
- adev->powerplay.pp_funcs->gfx_state_change_set)
- ((adev)->powerplay.pp_funcs->gfx_state_change_set(
- (adev)->powerplay.pp_handle, state));
- mutex_unlock(&adev->pm.mutex);
+ struct amdgpu_device *adev = ring->adev;
+ u32 idx;
+ bool sched_work = false;
+
+ if (!adev->gfx.enable_cleaner_shader)
+ return;
+
+ if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION)
+ idx = 0;
+ else
+ idx = ring->xcp_id;
+
+ if (idx >= MAX_XCP)
+ return;
+
+ mutex_lock(&adev->enforce_isolation_mutex);
+ if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) {
+ if (adev->kfd.init_complete)
+ sched_work = true;
+ }
+ mutex_unlock(&adev->enforce_isolation_mutex);
+
+ if (sched_work)
+ amdgpu_gfx_kfd_sch_ctrl(adev, idx, true);
}
+
+void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work)
+{
+ struct amdgpu_device *adev =
+ container_of(work, struct amdgpu_device, gfx.idle_work.work);
+ enum PP_SMC_POWER_PROFILE profile;
+ u32 i, fences = 0;
+ int r;
+
+ if (adev->gfx.num_gfx_rings)
+ profile = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
+ else
+ profile = PP_SMC_POWER_PROFILE_COMPUTE;
+
+ for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i)
+ fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]);
+ for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i)
+ fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
+ if (!fences && !atomic_read(&adev->gfx.total_submission_cnt)) {
+ mutex_lock(&adev->gfx.workload_profile_mutex);
+ if (adev->gfx.workload_profile_active) {
+ r = amdgpu_dpm_switch_power_profile(adev, profile, false);
+ if (r)
+ dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r,
+ profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
+ "fullscreen 3D" : "compute");
+ adev->gfx.workload_profile_active = false;
+ }
+ mutex_unlock(&adev->gfx.workload_profile_mutex);
+ } else {
+ schedule_delayed_work(&adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT);
+ }
+}
+
+void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ enum PP_SMC_POWER_PROFILE profile;
+ int r;
+
+ if (amdgpu_dpm_is_overdrive_enabled(adev))
+ return;
+
+ if (adev->gfx.num_gfx_rings)
+ profile = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
+ else
+ profile = PP_SMC_POWER_PROFILE_COMPUTE;
+
+ atomic_inc(&adev->gfx.total_submission_cnt);
+
+ cancel_delayed_work_sync(&adev->gfx.idle_work);
+
+ /* We can safely return early here because we've cancelled the
+ * the delayed work so there is no one else to set it to false
+ * and we don't care if someone else sets it to true.
+ */
+ if (adev->gfx.workload_profile_active)
+ return;
+
+ mutex_lock(&adev->gfx.workload_profile_mutex);
+ if (!adev->gfx.workload_profile_active) {
+ r = amdgpu_dpm_switch_power_profile(adev, profile, true);
+ if (r)
+ dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r,
+ profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
+ "fullscreen 3D" : "compute");
+ adev->gfx.workload_profile_active = true;
+ }
+ mutex_unlock(&adev->gfx.workload_profile_mutex);
+}
+
+void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (amdgpu_dpm_is_overdrive_enabled(adev))
+ return;
+
+ atomic_dec(&ring->adev->gfx.total_submission_cnt);
+
+ schedule_delayed_work(&ring->adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT);
+}
+
+/**
+ * amdgpu_gfx_csb_preamble_start - Set CSB preamble start
+ *
+ * @buffer: This is an output variable that gets the PACKET3 preamble setup.
+ *
+ * Return:
+ * return the latest index.
+ */
+u32 amdgpu_gfx_csb_preamble_start(u32 *buffer)
+{
+ u32 count = 0;
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ buffer[count++] = cpu_to_le32(0x80000000);
+ buffer[count++] = cpu_to_le32(0x80000000);
+
+ return count;
+}
+
+/**
+ * amdgpu_gfx_csb_data_parser - Parser CS data
+ *
+ * @adev: amdgpu_device pointer used to get the CS data and other gfx info.
+ * @buffer: This is an output variable that gets the PACKET3 preamble end.
+ * @count: Index to start set the preemble end.
+ *
+ * Return:
+ * return the latest index.
+ */
+u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, u32 *buffer, u32 count)
+{
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+ u32 i;
+
+ for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT) {
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
+ buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
+
+ for (i = 0; i < ext->reg_count; i++)
+ buffer[count++] = cpu_to_le32(ext->extent[i]);
+ }
+ }
+ }
+
+ return count;
+}
+
+/**
+ * amdgpu_gfx_csb_preamble_end - Set CSB preamble end
+ *
+ * @buffer: This is an output variable that gets the PACKET3 preamble end.
+ * @count: Index to start set the preemble end.
+ */
+void amdgpu_gfx_csb_preamble_end(u32 *buffer, u32 count)
+{
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
+ buffer[count++] = cpu_to_le32(0);
+}
+
+/*
+ * debugfs for to enable/disable gfx job submission to specific core.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_gfx_sched_mask_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+
+ mask = (1ULL << adev->gfx.num_gfx_rings) - 1;
+ if ((val & mask) == 0)
+ return -EINVAL;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
+ ring = &adev->gfx.gfx_ring[i];
+ if (val & (1 << i))
+ ring->sched.ready = true;
+ else
+ ring->sched.ready = false;
+ }
+ /* publish sched.ready flag update effective immediately across smp */
+ smp_rmb();
+ return 0;
+}
+
+static int amdgpu_debugfs_gfx_sched_mask_get(void *data, u64 *val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+ for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
+ ring = &adev->gfx.gfx_ring[i];
+ if (ring->sched.ready)
+ mask |= 1ULL << i;
+ }
+
+ *val = mask;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gfx_sched_mask_fops,
+ amdgpu_debugfs_gfx_sched_mask_get,
+ amdgpu_debugfs_gfx_sched_mask_set, "%llx\n");
+
+#endif
+
+void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ if (!(adev->gfx.num_gfx_rings > 1))
+ return;
+ sprintf(name, "amdgpu_gfx_sched_mask");
+ debugfs_create_file(name, 0600, root, adev,
+ &amdgpu_debugfs_gfx_sched_mask_fops);
+#endif
+}
+
+/*
+ * debugfs for to enable/disable compute job submission to specific core.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_compute_sched_mask_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+
+ mask = (1ULL << adev->gfx.num_compute_rings) - 1;
+ if ((val & mask) == 0)
+ return -EINVAL;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
+ ring = &adev->gfx.compute_ring[i];
+ if (val & (1 << i))
+ ring->sched.ready = true;
+ else
+ ring->sched.ready = false;
+ }
+
+ /* publish sched.ready flag update effective immediately across smp */
+ smp_rmb();
+ return 0;
+}
+
+static int amdgpu_debugfs_compute_sched_mask_get(void *data, u64 *val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+ for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
+ ring = &adev->gfx.compute_ring[i];
+ if (ring->sched.ready)
+ mask |= 1ULL << i;
+ }
+
+ *val = mask;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_compute_sched_mask_fops,
+ amdgpu_debugfs_compute_sched_mask_get,
+ amdgpu_debugfs_compute_sched_mask_set, "%llx\n");
+
+#endif
+
+void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ if (!(adev->gfx.num_compute_rings > 1))
+ return;
+ sprintf(name, "amdgpu_compute_sched_mask");
+ debugfs_create_file(name, 0600, root, adev,
+ &amdgpu_debugfs_compute_sched_mask_fops);
+#endif
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index f851196c83a5..efd61a1ccc66 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -30,7 +30,11 @@
#include "clearstate_defs.h"
#include "amdgpu_ring.h"
#include "amdgpu_rlc.h"
+#include "amdgpu_imu.h"
#include "soc15.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_ring_mux.h"
+#include "amdgpu_xcp.h"
/* GFX current status */
#define AMDGPU_GFX_NORMAL_MODE 0x00000000L
@@ -39,33 +43,77 @@
#define AMDGPU_GFX_CG_DISABLED_MODE 0x00000004L
#define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L
-#define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES
-#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
+#define AMDGPU_MAX_GC_INSTANCES 8
+#define AMDGPU_MAX_QUEUES 128
+
+#define AMDGPU_MAX_GFX_QUEUES AMDGPU_MAX_QUEUES
+#define AMDGPU_MAX_COMPUTE_QUEUES AMDGPU_MAX_QUEUES
enum amdgpu_gfx_pipe_priority {
AMDGPU_GFX_PIPE_PRIO_NORMAL = AMDGPU_RING_PRIO_1,
AMDGPU_GFX_PIPE_PRIO_HIGH = AMDGPU_RING_PRIO_2
};
-/* Argument for PPSMC_MSG_GpuChangeState */
-enum gfx_change_state {
- sGpuChangeState_D0Entry = 1,
- sGpuChangeState_D3Entry,
-};
-
#define AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM 0
#define AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM 15
+/* 1 second timeout */
+#define GFX_PROFILE_IDLE_TIMEOUT msecs_to_jiffies(1000)
+
+enum amdgpu_gfx_partition {
+ AMDGPU_SPX_PARTITION_MODE = 0,
+ AMDGPU_DPX_PARTITION_MODE = 1,
+ AMDGPU_TPX_PARTITION_MODE = 2,
+ AMDGPU_QPX_PARTITION_MODE = 3,
+ AMDGPU_CPX_PARTITION_MODE = 4,
+ AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE = -1,
+ /* Automatically choose the right mode */
+ AMDGPU_AUTO_COMPUTE_PARTITION_MODE = -2,
+};
+
+#define NUM_XCC(x) hweight16(x)
+
+enum amdgpu_gfx_ras_mem_id_type {
+ AMDGPU_GFX_CP_MEM = 0,
+ AMDGPU_GFX_GCEA_MEM,
+ AMDGPU_GFX_GC_CANE_MEM,
+ AMDGPU_GFX_GCUTCL2_MEM,
+ AMDGPU_GFX_GDS_MEM,
+ AMDGPU_GFX_LDS_MEM,
+ AMDGPU_GFX_RLC_MEM,
+ AMDGPU_GFX_SP_MEM,
+ AMDGPU_GFX_SPI_MEM,
+ AMDGPU_GFX_SQC_MEM,
+ AMDGPU_GFX_SQ_MEM,
+ AMDGPU_GFX_TA_MEM,
+ AMDGPU_GFX_TCC_MEM,
+ AMDGPU_GFX_TCA_MEM,
+ AMDGPU_GFX_TCI_MEM,
+ AMDGPU_GFX_TCP_MEM,
+ AMDGPU_GFX_TD_MEM,
+ AMDGPU_GFX_TCX_MEM,
+ AMDGPU_GFX_ATC_L2_MEM,
+ AMDGPU_GFX_UTCL2_MEM,
+ AMDGPU_GFX_VML2_MEM,
+ AMDGPU_GFX_VML2_WALKER_MEM,
+ AMDGPU_GFX_MEM_TYPE_NUM
+};
+
struct amdgpu_mec {
struct amdgpu_bo *hpd_eop_obj;
u64 hpd_eop_gpu_addr;
struct amdgpu_bo *mec_fw_obj;
u64 mec_fw_gpu_addr;
+ struct amdgpu_bo *mec_fw_data_obj;
+ u64 mec_fw_data_gpu_addr;
+
u32 num_mec;
u32 num_pipe_per_mec;
u32 num_queue_per_pipe;
- void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1];
+ void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES];
+};
+struct amdgpu_mec_bitmap {
/* These are the resources for which amdgpu takes ownership */
DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
};
@@ -94,6 +142,10 @@ struct kiq_pm4_funcs {
void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring,
uint16_t pasid, uint32_t flush_type,
bool all_hub);
+ void (*kiq_reset_hw_queue)(struct amdgpu_ring *kiq_ring,
+ uint32_t queue_type, uint32_t me_id,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t xcc_id, uint32_t vmid);
/* Packet sizes */
int set_resources_size;
int map_queues_size;
@@ -109,15 +161,7 @@ struct amdgpu_kiq {
struct amdgpu_ring ring;
struct amdgpu_irq_src irq;
const struct kiq_pm4_funcs *pmf;
-};
-
-/*
- * GPU scratch registers structures, functions & helpers
- */
-struct amdgpu_scratch {
- unsigned num_reg;
- uint32_t reg_base;
- uint32_t free_mask;
+ void *mqd_backup;
};
/*
@@ -126,10 +170,46 @@ struct amdgpu_scratch {
#define AMDGPU_GFX_MAX_SE 4
#define AMDGPU_GFX_MAX_SH_PER_SE 2
+/**
+ * amdgpu_rb_config - Configure a single Render Backend (RB)
+ *
+ * Bad RBs are fused off and there is a harvest register the driver reads to
+ * determine which RB(s) are fused off so that the driver can configure the
+ * hardware state so that nothing gets sent to them. There are also user
+ * harvest registers that the driver can program to disable additional RBs,
+ * etc., for testing purposes.
+ */
struct amdgpu_rb_config {
+ /**
+ * @rb_backend_disable:
+ *
+ * The value captured from register RB_BACKEND_DISABLE indicates if the
+ * RB backend is disabled or not.
+ */
uint32_t rb_backend_disable;
+
+ /**
+ * @user_rb_backend_disable:
+ *
+ * The value captured from register USER_RB_BACKEND_DISABLE indicates
+ * if the User RB backend is disabled or not.
+ */
uint32_t user_rb_backend_disable;
+
+ /**
+ * @raster_config:
+ *
+ * To set up all of the states, it is necessary to have two registers
+ * to keep all of the states. This field holds the first register.
+ */
uint32_t raster_config;
+
+ /**
+ * @raster_config_1:
+ *
+ * To set up all of the states, it is necessary to have two registers
+ * to keep all of the states. This field holds the second register.
+ */
uint32_t raster_config_1;
};
@@ -177,6 +257,13 @@ struct amdgpu_gfx_config {
uint32_t macrotile_mode_array[16];
struct gb_addr_config gb_addr_config_fields;
+
+ /**
+ * @rb_config:
+ *
+ * Matrix that keeps all the Render Backend (color and depth buffer
+ * handling) configuration on the 3D engine.
+ */
struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE];
/* gfx configure feature */
@@ -187,7 +274,29 @@ struct amdgpu_gfx_config {
uint32_t num_sc_per_sh;
uint32_t num_packer_per_sc;
uint32_t pa_sc_tile_steering_override;
+ /* Whether texture coordinate truncation is conformant. */
+ bool ta_cntl2_truncate_coord_mode;
uint64_t tcc_disabled_mask;
+ uint32_t gc_num_tcp_per_sa;
+ uint32_t gc_num_sdp_interface;
+ uint32_t gc_num_tcps;
+ uint32_t gc_num_tcp_per_wpg;
+ uint32_t gc_tcp_l1_size;
+ uint32_t gc_num_sqc_per_wgp;
+ uint32_t gc_l1_instruction_cache_size_per_sqc;
+ uint32_t gc_l1_data_cache_size_per_sqc;
+ uint32_t gc_gl1c_per_sa;
+ uint32_t gc_gl1c_size_per_instance;
+ uint32_t gc_gl2c_per_gpu;
+ uint32_t gc_tcp_size_per_cu;
+ uint32_t gc_num_cu_per_sqc;
+ uint32_t gc_tcc_size;
+ uint32_t gc_tcp_cache_line_size;
+ uint32_t gc_instruction_cache_size_per_sqc;
+ uint32_t gc_instruction_cache_line_size;
+ uint32_t gc_scalar_data_cache_size_per_sqc;
+ uint32_t gc_scalar_data_cache_line_size;
+ uint32_t gc_tcc_cache_line_size;
};
struct amdgpu_cu_info {
@@ -201,39 +310,52 @@ struct amdgpu_cu_info {
uint32_t number;
uint32_t ao_cu_mask;
uint32_t ao_cu_bitmap[4][4];
- uint32_t bitmap[4][4];
+ uint32_t bitmap[AMDGPU_MAX_GC_INSTANCES][4][4];
};
-struct amdgpu_gfx_ras_funcs {
- int (*ras_late_init)(struct amdgpu_device *adev);
- void (*ras_fini)(struct amdgpu_device *adev);
- int (*ras_error_inject)(struct amdgpu_device *adev,
- void *inject_if);
- int (*query_ras_error_count)(struct amdgpu_device *adev,
- void *ras_error_status);
- void (*reset_ras_error_count)(struct amdgpu_device *adev);
- void (*query_ras_error_status)(struct amdgpu_device *adev);
- void (*reset_ras_error_status)(struct amdgpu_device *adev);
+struct amdgpu_gfx_ras {
+ struct amdgpu_ras_block_object ras_block;
void (*enable_watchdog_timer)(struct amdgpu_device *adev);
+ int (*rlc_gc_fed_irq)(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry);
+ int (*poison_consumption_handler)(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry);
+};
+
+struct amdgpu_gfx_shadow_info {
+ u32 shadow_size;
+ u32 shadow_alignment;
+ u32 csa_size;
+ u32 csa_alignment;
};
struct amdgpu_gfx_funcs {
/* get the gpu clock counter */
uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num,
- u32 sh_num, u32 instance);
- void (*read_wave_data)(struct amdgpu_device *adev, uint32_t simd,
+ u32 sh_num, u32 instance, int xcc_id);
+ void (*read_wave_data)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t *dst, int *no_fields);
- void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd,
+ void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t thread, uint32_t start,
uint32_t size, uint32_t *dst);
- void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd,
+ void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t start, uint32_t size,
uint32_t *dst);
void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe,
- u32 queue, u32 vmid);
+ u32 queue, u32 vmid, u32 xcc_id);
void (*init_spm_golden)(struct amdgpu_device *adev);
void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable);
+ int (*get_gfx_shadow_info)(struct amdgpu_device *adev,
+ struct amdgpu_gfx_shadow_info *shadow_info,
+ bool skip_check);
+ enum amdgpu_gfx_partition
+ (*query_partition_mode)(struct amdgpu_device *adev);
+ int (*switch_partition_mode)(struct amdgpu_device *adev,
+ int num_xccs_per_xcp);
+ int (*ih_node_to_logical_xcc)(struct amdgpu_device *adev, int ih_node);
+ int (*get_xccs_per_xcp)(struct amdgpu_device *adev);
};
struct sq_work {
@@ -245,6 +367,10 @@ struct amdgpu_pfp {
struct amdgpu_bo *pfp_fw_obj;
uint64_t pfp_fw_gpu_addr;
uint32_t *pfp_fw_ptr;
+
+ struct amdgpu_bo *pfp_fw_data_obj;
+ uint64_t pfp_fw_data_gpu_addr;
+ uint32_t *pfp_fw_data_ptr;
};
struct amdgpu_ce {
@@ -257,6 +383,11 @@ struct amdgpu_me {
struct amdgpu_bo *me_fw_obj;
uint64_t me_fw_gpu_addr;
uint32_t *me_fw_ptr;
+
+ struct amdgpu_bo *me_fw_data_obj;
+ uint64_t me_fw_data_gpu_addr;
+ uint32_t *me_fw_data_ptr;
+
uint32_t num_me;
uint32_t num_pipe_per_me;
uint32_t num_queue_per_pipe;
@@ -266,6 +397,12 @@ struct amdgpu_me {
DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
};
+struct amdgpu_isolation_work {
+ struct amdgpu_device *adev;
+ u32 xcp_id;
+ struct delayed_work work;
+};
+
struct amdgpu_gfx {
struct mutex gpu_clock_mutex;
struct amdgpu_gfx_config config;
@@ -274,8 +411,10 @@ struct amdgpu_gfx {
struct amdgpu_ce ce;
struct amdgpu_me me;
struct amdgpu_mec mec;
- struct amdgpu_kiq kiq;
- struct amdgpu_scratch scratch;
+ struct amdgpu_mec_bitmap mec_bitmap[AMDGPU_MAX_GC_INSTANCES];
+ struct amdgpu_kiq kiq[AMDGPU_MAX_GC_INSTANCES];
+ struct amdgpu_imu imu;
+ bool rs64_enable; /* firmware format */
const struct firmware *me_fw; /* ME firmware */
uint32_t me_fw_version;
const struct firmware *pfp_fw; /* PFP firmware */
@@ -288,6 +427,8 @@ struct amdgpu_gfx {
uint32_t mec_fw_version;
const struct firmware *mec2_fw; /* MEC2 firmware */
uint32_t mec2_fw_version;
+ const struct firmware *imu_fw; /* IMU firmware */
+ uint32_t imu_fw_version;
uint32_t me_feature_version;
uint32_t ce_feature_version;
uint32_t pfp_feature_version;
@@ -298,6 +439,10 @@ struct amdgpu_gfx {
uint32_t rlc_srlg_feature_version;
uint32_t rlc_srls_fw_version;
uint32_t rlc_srls_feature_version;
+ uint32_t rlcp_ucode_version;
+ uint32_t rlcp_ucode_feature_version;
+ uint32_t rlcv_ucode_version;
+ uint32_t rlcv_ucode_feature_version;
uint32_t mec_feature_version;
uint32_t mec2_feature_version;
bool mec_fw_write_wait;
@@ -305,13 +450,15 @@ struct amdgpu_gfx {
bool cp_fw_write_wait;
struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS];
unsigned num_gfx_rings;
- struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
+ struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES];
unsigned num_compute_rings;
struct amdgpu_irq_src eop_irq;
struct amdgpu_irq_src priv_reg_irq;
struct amdgpu_irq_src priv_inst_irq;
+ struct amdgpu_irq_src bad_op_irq;
struct amdgpu_irq_src cp_ecc_error_irq;
struct amdgpu_irq_src sq_irq;
+ struct amdgpu_irq_src rlc_gc_fed_irq;
struct sq_work sq_work;
/* gfx status */
@@ -324,26 +471,86 @@ struct amdgpu_gfx {
/* reset mask */
uint32_t grbm_soft_reset;
uint32_t srbm_soft_reset;
+ uint32_t gfx_supported_reset;
+ uint32_t compute_supported_reset;
/* gfx off */
- bool gfx_off_state; /* true: enabled, false: disabled */
- struct mutex gfx_off_mutex;
- uint32_t gfx_off_req_count; /* default 1, enable gfx off: dec 1, disable gfx off: add 1 */
- struct delayed_work gfx_off_delay_work;
+ bool gfx_off_state; /* true: enabled, false: disabled */
+ struct mutex gfx_off_mutex; /* mutex to change gfxoff state */
+ uint32_t gfx_off_req_count; /* default 1, enable gfx off: dec 1, disable gfx off: add 1 */
+ struct delayed_work gfx_off_delay_work; /* async work to set gfx block off */
+ uint32_t gfx_off_residency; /* last logged residency */
+ uint64_t gfx_off_entrycount; /* count of times GPU has get into GFXOFF state */
/* pipe reservation */
struct mutex pipe_reserve_mutex;
DECLARE_BITMAP (pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
/*ras */
- struct ras_common_if *ras_if;
- const struct amdgpu_gfx_ras_funcs *ras_funcs;
+ struct ras_common_if *ras_if;
+ struct amdgpu_gfx_ras *ras;
+
+ bool is_poweron;
+
+ struct amdgpu_ring sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS];
+ struct amdgpu_ring_mux muxer;
+
+ bool cp_gfx_shadow; /* for gfx11 */
+
+ uint16_t xcc_mask;
+ uint32_t num_xcc_per_xcp;
+ struct mutex partition_mutex;
+ bool mcbp; /* mid command buffer preemption */
+
+ /* IP reg dump */
+ uint32_t *ip_dump_core;
+ uint32_t *ip_dump_compute_queues;
+ uint32_t *ip_dump_gfx_queues;
+
+ struct mutex reset_sem_mutex;
+
+ /* cleaner shader */
+ struct amdgpu_bo *cleaner_shader_obj;
+ unsigned int cleaner_shader_size;
+ u64 cleaner_shader_gpu_addr;
+ void *cleaner_shader_cpu_ptr;
+ const void *cleaner_shader_ptr;
+ bool enable_cleaner_shader;
+ struct amdgpu_isolation_work enforce_isolation[MAX_XCP];
+ /* Mutex for synchronizing KFD scheduler operations */
+ struct mutex userq_sch_mutex;
+ u64 userq_sch_req_count[MAX_XCP];
+ bool userq_sch_inactive[MAX_XCP];
+ unsigned long enforce_isolation_jiffies[MAX_XCP];
+ unsigned long enforce_isolation_time[MAX_XCP];
+
+ atomic_t total_submission_cnt;
+ struct delayed_work idle_work;
+ bool workload_profile_active;
+ struct mutex workload_profile_mutex;
+
+ bool disable_kq;
+ bool disable_uq;
};
+struct amdgpu_gfx_ras_reg_entry {
+ struct amdgpu_ras_err_status_reg_entry reg_entry;
+ enum amdgpu_gfx_ras_mem_id_type mem_id_type;
+ uint32_t se_num;
+};
+
+struct amdgpu_gfx_ras_mem_id_entry {
+ const struct amdgpu_ras_memory_id_entry *mem_id_ent;
+ uint32_t size;
+};
+
+#define AMDGPU_GFX_MEMID_ENT(x) {(x), ARRAY_SIZE(x)},
+
#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
-#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
-#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid))
+#define amdgpu_gfx_select_se_sh(adev, se, sh, instance, xcc_id) ((adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance), (xcc_id)))
+#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid, xcc_id) ((adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid), (xcc_id)))
#define amdgpu_gfx_init_spm_golden(adev) (adev)->gfx.funcs->init_spm_golden((adev))
+#define amdgpu_gfx_get_gfx_shadow_info(adev, si) ((adev)->gfx.funcs->get_gfx_shadow_info((adev), (si), false))
/**
* amdgpu_gfx_create_bitmask - create a bitmask
@@ -358,27 +565,24 @@ static inline u32 amdgpu_gfx_create_bitmask(u32 bit_width)
return (u32)((1ULL << bit_width) - 1);
}
-int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg);
-void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg);
-
void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se,
unsigned max_sh);
-int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
- struct amdgpu_ring *ring,
- struct amdgpu_irq_src *irq);
+int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id);
void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring);
-void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev);
+void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id);
int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
- unsigned hpd_size);
+ unsigned hpd_size, int xcc_id);
int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
- unsigned mqd_size);
-void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev);
-int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev);
-int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev);
+ unsigned mqd_size, int xcc_id);
+void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id);
void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev);
void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev);
@@ -387,28 +591,81 @@ int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
int pipe, int queue);
void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
int *mec, int *pipe, int *queue);
-bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec,
- int pipe, int queue);
+bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int xcc_id,
+ int mec, int pipe, int queue);
bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
-int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me,
- int pipe, int queue);
-void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
- int *me, int *pipe, int *queue);
+bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me,
int pipe, int queue);
void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
+void amdgpu_gfx_off_ctrl_immediate(struct amdgpu_device *adev, bool enable);
int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value);
-int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev);
+int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
void amdgpu_gfx_ras_fini(struct amdgpu_device *adev);
+int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value);
+int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *residency);
+int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value);
int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
void *err_data,
struct amdgpu_iv_entry *entry);
int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry);
-uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);
-void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
+uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id);
+void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id);
+int amdgpu_kiq_hdp_flush(struct amdgpu_device *adev);
int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev);
-void amdgpu_gfx_state_change_set(struct amdgpu_device *adev, enum gfx_change_state state);
+void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id);
+
+int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry);
+
+bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev);
+void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev);
+void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
+ void *ras_error_status,
+ void (*func)(struct amdgpu_device *adev, void *ras_error_status,
+ int xcc_id));
+int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,
+ unsigned int cleaner_shader_size);
+void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev);
+void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev,
+ unsigned int cleaner_shader_size,
+ const void *cleaner_shader_ptr);
+void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work);
+void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring);
+
+void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work);
+void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring);
+u32 amdgpu_gfx_csb_preamble_start(u32 *buffer);
+u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, u32 *buffer, u32 count);
+void amdgpu_gfx_csb_preamble_end(u32 *buffer, u32 count);
+
+void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev);
+
+static inline const char *amdgpu_gfx_compute_mode_desc(int mode)
+{
+ switch (mode) {
+ case AMDGPU_SPX_PARTITION_MODE:
+ return "SPX";
+ case AMDGPU_DPX_PARTITION_MODE:
+ return "DPX";
+ case AMDGPU_TPX_PARTITION_MODE:
+ return "TPX";
+ case AMDGPU_QPX_PARTITION_MODE:
+ return "QPX";
+ case AMDGPU_CPX_PARTITION_MODE:
+ return "CPX";
+ default:
+ return "UNKNOWN";
+ }
+}
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h
index beabab515836..c7b44aeb671b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h
@@ -35,6 +35,9 @@ struct amdgpu_gfxhub_funcs {
void (*init)(struct amdgpu_device *adev);
int (*get_xgmi_info)(struct amdgpu_device *adev);
void (*utcl2_harvest)(struct amdgpu_device *adev);
+ void (*mode2_save_regs)(struct amdgpu_device *adev);
+ void (*mode2_restore_regs)(struct amdgpu_device *adev);
+ void (*halt)(struct amdgpu_device *adev);
};
struct amdgpu_gfxhub {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 08478fce00f2..869bceb0fe2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -25,13 +25,25 @@
*/
#include <linux/io-64-nonatomic-lo-hi.h>
+#ifdef CONFIG_X86
+#include <asm/hypervisor.h>
+#endif
#include "amdgpu.h"
#include "amdgpu_gmc.h"
#include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
#include "amdgpu_xgmi.h"
#include <drm/drm_drv.h>
+#include <drm/ttm/ttm_tt.h>
+
+static const u64 four_gb = 0x100000000ULL;
+
+bool amdgpu_gmc_is_pdb0_enabled(struct amdgpu_device *adev)
+{
+ return adev->gmc.xgmi.connected_to_cpu || amdgpu_virt_xgmi_migrate_enabled(adev);
+}
/**
* amdgpu_gmc_pdb0_alloc - allocate vram for pdb0
@@ -47,7 +59,7 @@ int amdgpu_gmc_pdb0_alloc(struct amdgpu_device *adev)
struct amdgpu_bo_param bp;
u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes;
uint32_t pde0_page_shift = adev->gmc.vmid0_page_table_block_size + 21;
- uint32_t npdes = (vram_size + (1ULL << pde0_page_shift) -1) >> pde0_page_shift;
+ uint32_t npdes = (vram_size + (1ULL << pde0_page_shift) - 1) >> pde0_page_shift;
memset(&bp, 0, sizeof(bp));
bp.size = PAGE_ALIGN((npdes + 1) * 8);
@@ -176,6 +188,9 @@ uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+ if (!bo->ttm)
+ return AMDGPU_BO_INVALID_OFFSET;
+
if (bo->ttm->num_pages != 1 || bo->ttm->caching == ttm_cached)
return AMDGPU_BO_INVALID_OFFSET;
@@ -198,13 +213,20 @@ uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
u64 base)
{
+ uint64_t vis_limit = (uint64_t)amdgpu_vis_vram_limit << 20;
uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
mc->vram_start = base;
mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
- if (limit && limit < mc->real_vram_size)
+ if (limit < mc->real_vram_size)
mc->real_vram_size = limit;
+ if (vis_limit && vis_limit < mc->visible_vram_size)
+ mc->visible_vram_size = vis_limit;
+
+ if (mc->real_vram_size < mc->visible_vram_size)
+ mc->visible_vram_size = mc->real_vram_size;
+
if (mc->xgmi.num_physical_nodes == 0) {
mc->fb_start = mc->vram_start;
mc->fb_end = mc->vram_end;
@@ -236,10 +258,20 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc
u64 hive_vram_end = mc->xgmi.node_segment_size * mc->xgmi.num_physical_nodes - 1;
mc->vram_start = mc->xgmi.node_segment_size * mc->xgmi.physical_node_id;
mc->vram_end = mc->vram_start + mc->xgmi.node_segment_size - 1;
- mc->gart_start = hive_vram_end + 1;
+ /* node_segment_size may not 4GB aligned on SRIOV, align up is needed. */
+ mc->gart_start = ALIGN(hive_vram_end + 1, four_gb);
mc->gart_end = mc->gart_start + mc->gart_size - 1;
- mc->fb_start = hive_vram_start;
- mc->fb_end = hive_vram_end;
+ if (amdgpu_virt_xgmi_migrate_enabled(adev)) {
+ /* set mc->vram_start to 0 to switch the returned GPU address of
+ * amdgpu_bo_create_reserved() from FB aperture to GART aperture.
+ */
+ mc->vram_start = 0;
+ mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
+ mc->visible_vram_size = min(mc->visible_vram_size, mc->real_vram_size);
+ } else {
+ mc->fb_start = hive_vram_start;
+ mc->fb_end = hive_vram_end;
+ }
dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
mc->mc_vram_size >> 20, mc->vram_start,
mc->vram_end, mc->real_vram_size >> 20);
@@ -252,14 +284,15 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc
*
* @adev: amdgpu device structure holding all necessary information
* @mc: memory controller structure holding memory information
+ * @gart_placement: GART placement policy with respect to VRAM
*
- * Function will place try to place GART before or after VRAM.
+ * Function will try to place GART before or after VRAM.
* If GART size is bigger than space left then we ajust GART size.
* Thus function will never fails.
*/
-void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
+void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
+ enum amdgpu_gart_placement gart_placement)
{
- const uint64_t four_gb = 0x100000000ULL;
u64 size_af, size_bf;
/*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/
u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1);
@@ -275,11 +308,22 @@ void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
mc->gart_size = max(size_bf, size_af);
}
- if ((size_bf >= mc->gart_size && size_bf < size_af) ||
- (size_af < mc->gart_size))
- mc->gart_start = 0;
- else
+ switch (gart_placement) {
+ case AMDGPU_GART_PLACEMENT_HIGH:
mc->gart_start = max_mc_address - mc->gart_size + 1;
+ break;
+ case AMDGPU_GART_PLACEMENT_LOW:
+ mc->gart_start = 0;
+ break;
+ case AMDGPU_GART_PLACEMENT_BEST_FIT:
+ default:
+ if ((size_bf >= mc->gart_size && size_bf < size_af) ||
+ (size_af < mc->gart_size))
+ mc->gart_start = 0;
+ else
+ mc->gart_start = max_mc_address - mc->gart_size + 1;
+ break;
+ }
mc->gart_start &= ~(four_gb - 1);
mc->gart_end = mc->gart_start + mc->gart_size - 1;
@@ -304,14 +348,6 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1);
u64 size_af, size_bf;
- if (amdgpu_sriov_vf(adev)) {
- mc->agp_start = 0xffffffffffff;
- mc->agp_end = 0x0;
- mc->agp_size = 0;
-
- return;
- }
-
if (mc->fb_start > mc->gart_start) {
size_bf = (mc->fb_start & sixteen_gb_mask) -
ALIGN(mc->gart_end + 1, sixteen_gb);
@@ -336,6 +372,25 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
}
/**
+ * amdgpu_gmc_set_agp_default - Set the default AGP aperture value.
+ * @adev: amdgpu device structure holding all necessary information
+ * @mc: memory controller structure holding memory information
+ *
+ * To disable the AGP aperture, you need to set the start to a larger
+ * value than the end. This function sets the default value which
+ * can then be overridden using amdgpu_gmc_agp_location() if you want
+ * to enable the AGP aperture on a specific chip.
+ *
+ */
+void amdgpu_gmc_set_agp_default(struct amdgpu_device *adev,
+ struct amdgpu_gmc *mc)
+{
+ mc->agp_start = 0xffffffffffff;
+ mc->agp_end = 0;
+ mc->agp_size = 0;
+}
+
+/**
* amdgpu_gmc_fault_key - get hask key from vm fault address and pasid
*
* @addr: 48 bit physical address, page aligned (36 significant bits)
@@ -350,6 +405,7 @@ static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid)
* amdgpu_gmc_filter_faults - filter VM faults
*
* @adev: amdgpu device structure
+ * @ih: interrupt ring that the fault received from
* @addr: address of the VM fault
* @pasid: PASID of the process causing the fault
* @timestamp: timestamp of the fault
@@ -358,7 +414,8 @@ static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid)
* True if the fault was filtered and should not be processed further.
* False if the fault is a new one and needs to be handled.
*/
-bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
+bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih, uint64_t addr,
uint16_t pasid, uint64_t timestamp)
{
struct amdgpu_gmc *gmc = &adev->gmc;
@@ -366,6 +423,10 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
struct amdgpu_gmc_fault *fault;
uint32_t hash;
+ /* Stale retry fault if timestamp goes backward */
+ if (amdgpu_ih_ts_after(timestamp, ih->processed_timestamp))
+ return true;
+
/* If we don't have space left in the ring buffer return immediately */
stamp = max(timestamp, AMDGPU_GMC_FAULT_TIMEOUT + 1) -
AMDGPU_GMC_FAULT_TIMEOUT;
@@ -378,8 +439,21 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
while (fault->timestamp >= stamp) {
uint64_t tmp;
- if (atomic64_read(&fault->key) == key)
- return true;
+ if (atomic64_read(&fault->key) == key) {
+ /*
+ * if we get a fault which is already present in
+ * the fault_ring and the timestamp of
+ * the fault is after the expired timestamp,
+ * then this is a new fault that needs to be added
+ * into the fault ring.
+ */
+ if (fault->timestamp_expiry != 0 &&
+ amdgpu_ih_ts_after(fault->timestamp_expiry,
+ timestamp))
+ break;
+ else
+ return true;
+ }
tmp = fault->timestamp;
fault = &gmc->fault_ring[fault->next];
@@ -415,97 +489,89 @@ void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
{
struct amdgpu_gmc *gmc = &adev->gmc;
uint64_t key = amdgpu_gmc_fault_key(addr, pasid);
+ struct amdgpu_ih_ring *ih;
struct amdgpu_gmc_fault *fault;
+ uint32_t last_wptr;
+ uint64_t last_ts;
uint32_t hash;
uint64_t tmp;
+ if (adev->irq.retry_cam_enabled)
+ return;
+
+ ih = &adev->irq.ih1;
+ /* Get the WPTR of the last entry in IH ring */
+ last_wptr = amdgpu_ih_get_wptr(adev, ih);
+ /* Order wptr with ring data. */
+ rmb();
+ /* Get the timetamp of the last entry in IH ring */
+ last_ts = amdgpu_ih_decode_iv_ts(adev, ih, last_wptr, -1);
+
hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
fault = &gmc->fault_ring[gmc->fault_hash[hash].idx];
do {
- if (atomic64_cmpxchg(&fault->key, key, 0) == key)
+ if (atomic64_read(&fault->key) == key) {
+ /*
+ * Update the timestamp when this fault
+ * expired.
+ */
+ fault->timestamp_expiry = last_ts;
break;
+ }
tmp = fault->timestamp;
fault = &gmc->fault_ring[fault->next];
} while (fault->timestamp < tmp);
}
-int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
+int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev)
{
int r;
- if (adev->umc.ras_funcs &&
- adev->umc.ras_funcs->ras_late_init) {
- r = adev->umc.ras_funcs->ras_late_init(adev);
- if (r)
- return r;
- }
+ /* umc ras block */
+ r = amdgpu_umc_ras_sw_init(adev);
+ if (r)
+ return r;
- if (adev->mmhub.ras_funcs &&
- adev->mmhub.ras_funcs->ras_late_init) {
- r = adev->mmhub.ras_funcs->ras_late_init(adev);
- if (r)
- return r;
- }
+ /* mmhub ras block */
+ r = amdgpu_mmhub_ras_sw_init(adev);
+ if (r)
+ return r;
- if (!adev->gmc.xgmi.connected_to_cpu)
- adev->gmc.xgmi.ras_funcs = &xgmi_ras_funcs;
+ /* hdp ras block */
+ r = amdgpu_hdp_ras_sw_init(adev);
+ if (r)
+ return r;
- if (adev->gmc.xgmi.ras_funcs &&
- adev->gmc.xgmi.ras_funcs->ras_late_init) {
- r = adev->gmc.xgmi.ras_funcs->ras_late_init(adev);
- if (r)
- return r;
- }
+ /* mca.x ras block */
+ r = amdgpu_mca_mp0_ras_sw_init(adev);
+ if (r)
+ return r;
- if (adev->hdp.ras_funcs &&
- adev->hdp.ras_funcs->ras_late_init) {
- r = adev->hdp.ras_funcs->ras_late_init(adev);
- if (r)
- return r;
- }
+ r = amdgpu_mca_mp1_ras_sw_init(adev);
+ if (r)
+ return r;
- if (adev->mca.mp0.ras_funcs &&
- adev->mca.mp0.ras_funcs->ras_late_init) {
- r = adev->mca.mp0.ras_funcs->ras_late_init(adev);
- if (r)
- return r;
- }
+ r = amdgpu_mca_mpio_ras_sw_init(adev);
+ if (r)
+ return r;
- if (adev->mca.mp1.ras_funcs &&
- adev->mca.mp1.ras_funcs->ras_late_init) {
- r = adev->mca.mp1.ras_funcs->ras_late_init(adev);
- if (r)
- return r;
- }
+ /* xgmi ras block */
+ r = amdgpu_xgmi_ras_sw_init(adev);
+ if (r)
+ return r;
- if (adev->mca.mpio.ras_funcs &&
- adev->mca.mpio.ras_funcs->ras_late_init) {
- r = adev->mca.mpio.ras_funcs->ras_late_init(adev);
- if (r)
- return r;
- }
+ return 0;
+}
+int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
+{
return 0;
}
void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
{
- if (adev->umc.ras_funcs &&
- adev->umc.ras_funcs->ras_fini)
- adev->umc.ras_funcs->ras_fini(adev);
-
- if (adev->mmhub.ras_funcs &&
- adev->mmhub.ras_funcs->ras_fini)
- adev->mmhub.ras_funcs->ras_fini(adev);
-
- if (adev->gmc.xgmi.ras_funcs &&
- adev->gmc.xgmi.ras_funcs->ras_fini)
- adev->gmc.xgmi.ras_funcs->ras_fini(adev);
- if (adev->hdp.ras_funcs &&
- adev->hdp.ras_funcs->ras_fini)
- adev->hdp.ras_funcs->ras_fini(adev);
}
/*
@@ -515,23 +581,42 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
* subject to change when ring number changes
* Engine 17: Gart flushes
*/
-#define GFXHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
-#define MMHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
+#define AMDGPU_VMHUB_INV_ENG_BITMAP 0x1FFF3
int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring;
- unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] =
- {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP,
- GFXHUB_FREE_VM_INV_ENGS_BITMAP};
+ unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = {0};
unsigned i;
unsigned vmhub, inv_eng;
+ struct amdgpu_ring *shared_ring;
+
+ /* init the vm inv eng for all vmhubs */
+ for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
+ vm_inv_engs[i] = AMDGPU_VMHUB_INV_ENG_BITMAP;
+ /* reserve engine 5 for firmware */
+ if (adev->enable_mes)
+ vm_inv_engs[i] &= ~(1 << 5);
+ /* reserve engine 6 for uni mes */
+ if (adev->enable_uni_mes)
+ vm_inv_engs[i] &= ~(1 << 6);
+ /* reserve mmhub engine 3 for firmware */
+ if (adev->enable_umsch_mm)
+ vm_inv_engs[i] &= ~(1 << 3);
+ }
for (i = 0; i < adev->num_rings; ++i) {
ring = adev->rings[i];
- vmhub = ring->funcs->vmhub;
+ vmhub = ring->vm_hub;
+
+ if (ring == &adev->mes.ring[0] ||
+ ring == &adev->mes.ring[1] ||
+ ring == &adev->umsch_mm.ring ||
+ ring == &adev->cper.ring_buf)
+ continue;
- if (ring == &adev->mes.ring)
+ /* Skip if the ring is a shared ring */
+ if (amdgpu_sdma_is_shared_inv_eng(adev, ring))
continue;
inv_eng = ffs(vm_inv_engs[vmhub]);
@@ -545,12 +630,230 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng);
dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
- ring->name, ring->vm_inv_eng, ring->funcs->vmhub);
+ ring->name, ring->vm_inv_eng, ring->vm_hub);
+ /* SDMA has a special packet which allows it to use the same
+ * invalidation engine for all the rings in one instance.
+ * Therefore, we do not allocate a separate VM invalidation engine
+ * for SDMA page rings. Instead, they share the VM invalidation
+ * engine with the SDMA gfx ring. This change ensures efficient
+ * resource management and avoids the issue of insufficient VM
+ * invalidation engines.
+ */
+ shared_ring = amdgpu_sdma_get_shared_ring(adev, ring);
+ if (shared_ring) {
+ shared_ring->vm_inv_eng = ring->vm_inv_eng;
+ dev_info(adev->dev, "ring %s shares VM invalidation engine %u with ring %s on hub %u\n",
+ ring->name, ring->vm_inv_eng, shared_ring->name, ring->vm_hub);
+ continue;
+ }
}
return 0;
}
+void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
+{
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
+ struct dma_fence *fence;
+ struct amdgpu_job *job;
+ int r;
+
+ if (!hub->sdma_invalidation_workaround || vmid ||
+ !adev->mman.buffer_funcs_enabled || !adev->ib_pool_ready ||
+ !ring->sched.ready) {
+ /*
+ * A GPU reset should flush all TLBs anyway, so no need to do
+ * this while one is ongoing.
+ */
+ if (!down_read_trylock(&adev->reset_domain->sem))
+ return;
+
+ if (adev->gmc.flush_tlb_needs_extra_type_2)
+ adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid,
+ vmhub, 2);
+
+ if (adev->gmc.flush_tlb_needs_extra_type_0 && flush_type == 2)
+ adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid,
+ vmhub, 0);
+
+ adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, vmhub,
+ flush_type);
+ up_read(&adev->reset_domain->sem);
+ return;
+ }
+
+ /* The SDMA on Navi 1x has a bug which can theoretically result in memory
+ * corruption if an invalidation happens at the same time as an VA
+ * translation. Avoid this by doing the invalidation from the SDMA
+ * itself at least for GART.
+ */
+ mutex_lock(&adev->mman.gtt_window_lock);
+ r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
+ AMDGPU_FENCE_OWNER_UNDEFINED,
+ 16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
+ &job, AMDGPU_KERNEL_JOB_ID_FLUSH_GPU_TLB);
+ if (r)
+ goto error_alloc;
+
+ job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
+ job->vm_needs_flush = true;
+ job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
+ amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+ fence = amdgpu_job_submit(job);
+ mutex_unlock(&adev->mman.gtt_window_lock);
+
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+
+ return;
+
+error_alloc:
+ mutex_unlock(&adev->mman.gtt_window_lock);
+ dev_err(adev->dev, "Error flushing GPU TLB using the SDMA (%d)!\n", r);
+}
+
+int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
+ uint32_t flush_type, bool all_hub,
+ uint32_t inst)
+{
+ struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
+ unsigned int ndw;
+ int r, cnt = 0;
+ uint32_t seq;
+
+ /*
+ * A GPU reset should flush all TLBs anyway, so no need to do
+ * this while one is ongoing.
+ */
+ if (!down_read_trylock(&adev->reset_domain->sem))
+ return 0;
+
+ if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready) {
+ if (adev->gmc.flush_tlb_needs_extra_type_2)
+ adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
+ 2, all_hub,
+ inst);
+
+ if (adev->gmc.flush_tlb_needs_extra_type_0 && flush_type == 2)
+ adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
+ 0, all_hub,
+ inst);
+
+ adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
+ flush_type, all_hub,
+ inst);
+ r = 0;
+ } else {
+ /* 2 dwords flush + 8 dwords fence */
+ ndw = kiq->pmf->invalidate_tlbs_size + 8;
+
+ if (adev->gmc.flush_tlb_needs_extra_type_2)
+ ndw += kiq->pmf->invalidate_tlbs_size;
+
+ if (adev->gmc.flush_tlb_needs_extra_type_0)
+ ndw += kiq->pmf->invalidate_tlbs_size;
+
+ spin_lock(&adev->gfx.kiq[inst].ring_lock);
+ r = amdgpu_ring_alloc(ring, ndw);
+ if (r) {
+ spin_unlock(&adev->gfx.kiq[inst].ring_lock);
+ goto error_unlock_reset;
+ }
+ if (adev->gmc.flush_tlb_needs_extra_type_2)
+ kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub);
+
+ if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0)
+ kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub);
+
+ kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub);
+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+ if (r) {
+ amdgpu_ring_undo(ring);
+ spin_unlock(&adev->gfx.kiq[inst].ring_lock);
+ goto error_unlock_reset;
+ }
+
+ amdgpu_ring_commit(ring);
+ spin_unlock(&adev->gfx.kiq[inst].ring_lock);
+
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+ might_sleep();
+ while (r < 1 && cnt++ < MAX_KIQ_REG_TRY &&
+ !amdgpu_reset_pending(adev->reset_domain)) {
+ msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+ }
+
+ if (cnt > MAX_KIQ_REG_TRY) {
+ dev_err(adev->dev, "timeout waiting for kiq fence\n");
+ r = -ETIME;
+ } else
+ r = 0;
+ }
+
+error_unlock_reset:
+ up_read(&adev->reset_domain->sem);
+ return r;
+}
+
+void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask,
+ uint32_t xcc_inst)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_inst];
+ struct amdgpu_ring *ring = &kiq->ring;
+ signed long r, cnt = 0;
+ unsigned long flags;
+ uint32_t seq;
+
+ if (adev->mes.ring[0].sched.ready) {
+ amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1,
+ ref, mask);
+ return;
+ }
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ amdgpu_ring_alloc(ring, 32);
+ amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
+ ref, mask);
+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+ if (r)
+ goto failed_undo;
+
+ amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+ /* don't wait anymore for IRQ context */
+ if (r < 1 && in_interrupt())
+ goto failed_kiq;
+
+ might_sleep();
+ while (r < 1 && cnt++ < MAX_KIQ_REG_TRY &&
+ !amdgpu_reset_pending(adev->reset_domain)) {
+
+ msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+ }
+
+ if (cnt > MAX_KIQ_REG_TRY)
+ goto failed_kiq;
+
+ return;
+
+failed_undo:
+ amdgpu_ring_undo(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+failed_kiq:
+ dev_err(adev->dev, "failed to write reg %x wait reg %x\n", reg0, reg1);
+}
+
/**
* amdgpu_gmc_tmz_set -- check and set if a device supports TMZ
* @adev: amdgpu_device pointer
@@ -560,9 +863,16 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
*/
void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
{
- switch (adev->asic_type) {
- case CHIP_RAVEN:
- case CHIP_RENOIR:
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ /* RAVEN */
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
+ /* RENOIR looks like RAVEN */
+ case IP_VERSION(9, 3, 0):
+ /* GC 10.3.7 */
+ case IP_VERSION(10, 3, 7):
+ /* GC 11.0.1 */
+ case IP_VERSION(11, 0, 1):
if (amdgpu_tmz == 0) {
adev->gmc.tmz_enabled = false;
dev_info(adev->dev,
@@ -573,11 +883,24 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
"Trusted Memory Zone (TMZ) feature enabled\n");
}
break;
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
- case CHIP_VANGOGH:
- case CHIP_YELLOW_CARP:
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ /* VANGOGH */
+ case IP_VERSION(10, 3, 1):
+ /* YELLOW_CARP*/
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
/* Don't enable it by default yet.
*/
if (amdgpu_tmz < 1) {
@@ -608,40 +931,20 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)
{
struct amdgpu_gmc *gmc = &adev->gmc;
-
- switch (adev->asic_type) {
- case CHIP_VEGA10:
- case CHIP_VEGA20:
- case CHIP_ARCTURUS:
- case CHIP_ALDEBARAN:
- /*
- * noretry = 0 will cause kfd page fault tests fail
- * for some ASICs, so set default to 1 for these ASICs.
- */
- if (amdgpu_noretry == -1)
- gmc->noretry = 1;
- else
- gmc->noretry = amdgpu_noretry;
- break;
- case CHIP_RAVEN:
- default:
- /* Raven currently has issues with noretry
- * regardless of what we decide for other
- * asics, we should leave raven with
- * noretry = 0 until we root cause the
- * issues.
- *
- * default this to 0 for now, but we may want
- * to change this in the future for certain
- * GPUs as it can increase performance in
- * certain cases.
- */
- if (amdgpu_noretry == -1)
- gmc->noretry = 0;
- else
- gmc->noretry = amdgpu_noretry;
- break;
- }
+ uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
+ bool noretry_default = (gc_ver == IP_VERSION(9, 0, 1) ||
+ gc_ver == IP_VERSION(9, 4, 0) ||
+ gc_ver == IP_VERSION(9, 4, 1) ||
+ gc_ver == IP_VERSION(9, 4, 2) ||
+ gc_ver == IP_VERSION(9, 4, 3) ||
+ gc_ver == IP_VERSION(9, 4, 4) ||
+ gc_ver == IP_VERSION(9, 5, 0) ||
+ gc_ver >= IP_VERSION(10, 3, 0));
+
+ if (!amdgpu_sriov_xnack_support(adev))
+ gmc->noretry = 1;
+ else
+ gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : amdgpu_noretry;
}
void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type,
@@ -654,7 +957,7 @@ void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type,
for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + hub->ctx_distance * i;
- tmp = (hub_type == AMDGPU_GFXHUB_0) ?
+ tmp = (hub_type == AMDGPU_GFXHUB(0)) ?
RREG32_SOC15_IP(GC, reg) :
RREG32_SOC15_IP(MMHUB, reg);
@@ -663,7 +966,7 @@ void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type,
else
tmp &= ~hub->vm_cntx_cntl_vm_fault;
- (hub_type == AMDGPU_GFXHUB_0) ?
+ (hub_type == AMDGPU_GFXHUB(0)) ?
WREG32_SOC15_IP(GC, reg, tmp) :
WREG32_SOC15_IP(MMHUB, reg, tmp);
}
@@ -674,6 +977,13 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev)
unsigned size;
/*
+ * Some ASICs need to reserve a region of video memory to avoid access
+ * from driver
+ */
+ adev->mman.stolen_reserved_offset = 0;
+ adev->mman.stolen_reserved_size = 0;
+
+ /*
* TODO:
* Currently there is a bug where some memory client outside
* of the driver writes to first 8M of VRAM on S3 resume,
@@ -683,6 +993,17 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev)
*/
switch (adev->asic_type) {
case CHIP_VEGA10:
+ adev->mman.keep_stolen_vga_memory = true;
+ /*
+ * VEGA10 SRIOV VF with MS_HYPERV host needs some firmware reserved area.
+ */
+#ifdef CONFIG_X86
+ if (amdgpu_sriov_vf(adev) && hypervisor_is_type(X86_HYPER_MS_HYPERV)) {
+ adev->mman.stolen_reserved_offset = 0x500000;
+ adev->mman.stolen_reserved_size = 0x200000;
+ }
+#endif
+ break;
case CHIP_RAVEN:
case CHIP_RENOIR:
adev->mman.keep_stolen_vga_memory = true;
@@ -693,7 +1014,7 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev)
}
if (amdgpu_sriov_vf(adev) ||
- !amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE)) {
+ !amdgpu_device_has_display_hardware(adev)) {
size = 0;
} else {
size = amdgpu_gmc_get_vbios_fb_size(adev);
@@ -739,9 +1060,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
*/
u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes;
u64 pde0_page_size = (1ULL<<adev->gmc.vmid0_page_table_block_size)<<21;
- u64 vram_addr = adev->vm_manager.vram_base_offset -
- adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
- u64 vram_end = vram_addr + vram_size;
+ u64 vram_addr, vram_end;
u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
int idx;
@@ -752,7 +1071,12 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
flags |= AMDGPU_PTE_WRITEABLE;
flags |= AMDGPU_PTE_SNOOPED;
flags |= AMDGPU_PTE_FRAG((adev->gmc.vmid0_page_table_block_size + 9*1));
- flags |= AMDGPU_PDE_PTE;
+ flags |= AMDGPU_PDE_PTE_FLAG(adev);
+
+ vram_addr = adev->vm_manager.vram_base_offset;
+ if (!amdgpu_virt_xgmi_migrate_enabled(adev))
+ vram_addr -= adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
+ vram_end = vram_addr + vram_size;
/* The first n PDE0 entries are used as PTE,
* pointing to vram
@@ -765,7 +1089,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
* pointing to a 4K system page
*/
flags = AMDGPU_PTE_VALID;
- flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED;
+ flags |= AMDGPU_PTE_SNOOPED | AMDGPU_PDE_BFS_FLAG(adev, 0);
/* Requires gart_ptb_gpu_pa to be 4K aligned */
amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags);
drm_dev_exit(idx);
@@ -795,33 +1119,564 @@ uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo)
return amdgpu_gmc_vram_mc2pa(adev, amdgpu_bo_gpu_offset(bo));
}
-/**
- * amdgpu_gmc_vram_cpu_pa - calculate vram buffer object's physical address
- * from CPU's view
- *
- * @adev: amdgpu_device pointer
- * @bo: amdgpu buffer object
- */
-uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo)
+int amdgpu_gmc_vram_checking(struct amdgpu_device *adev)
+{
+ struct amdgpu_bo *vram_bo = NULL;
+ uint64_t vram_gpu = 0;
+ void *vram_ptr = NULL;
+
+ int ret, size = 0x100000;
+ uint8_t cptr[10];
+
+ ret = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &vram_bo,
+ &vram_gpu,
+ &vram_ptr);
+ if (ret)
+ return ret;
+
+ memset(vram_ptr, 0x86, size);
+ memset(cptr, 0x86, 10);
+
+ /**
+ * Check the start, the mid, and the end of the memory if the content of
+ * each byte is the pattern "0x86". If yes, we suppose the vram bo is
+ * workable.
+ *
+ * Note: If check the each byte of whole 1M bo, it will cost too many
+ * seconds, so here, we just pick up three parts for emulation.
+ */
+ ret = memcmp(vram_ptr, cptr, 10);
+ if (ret) {
+ ret = -EIO;
+ goto release_buffer;
+ }
+
+ ret = memcmp(vram_ptr + (size / 2), cptr, 10);
+ if (ret) {
+ ret = -EIO;
+ goto release_buffer;
+ }
+
+ ret = memcmp(vram_ptr + size - 10, cptr, 10);
+ if (ret) {
+ ret = -EIO;
+ goto release_buffer;
+ }
+
+release_buffer:
+ amdgpu_bo_free_kernel(&vram_bo, &vram_gpu,
+ &vram_ptr);
+
+ return ret;
+}
+
+static const char *nps_desc[] = {
+ [AMDGPU_NPS1_PARTITION_MODE] = "NPS1",
+ [AMDGPU_NPS2_PARTITION_MODE] = "NPS2",
+ [AMDGPU_NPS3_PARTITION_MODE] = "NPS3",
+ [AMDGPU_NPS4_PARTITION_MODE] = "NPS4",
+ [AMDGPU_NPS6_PARTITION_MODE] = "NPS6",
+ [AMDGPU_NPS8_PARTITION_MODE] = "NPS8",
+};
+
+static ssize_t available_memory_partition_show(struct device *dev,
+ struct device_attribute *addr,
+ char *buf)
{
- return amdgpu_bo_gpu_offset(bo) - adev->gmc.vram_start + adev->gmc.aper_base;
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ int size = 0, mode;
+ char *sep = "";
+
+ for_each_inst(mode, adev->gmc.supported_nps_modes) {
+ size += sysfs_emit_at(buf, size, "%s%s", sep, nps_desc[mode]);
+ sep = ", ";
+ }
+ size += sysfs_emit_at(buf, size, "\n");
+
+ return size;
}
-void amdgpu_gmc_get_reserved_allocation(struct amdgpu_device *adev)
+static ssize_t current_memory_partition_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
{
- /* Some ASICs need to reserve a region of video memory to avoid access
- * from driver */
- adev->mman.stolen_reserved_offset = 0;
- adev->mman.stolen_reserved_size = 0;
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ enum amdgpu_memory_partition mode;
+ struct amdgpu_hive_info *hive;
+ int i;
- switch (adev->asic_type) {
- case CHIP_YELLOW_CARP:
- if (amdgpu_discovery == 0) {
- adev->mman.stolen_reserved_offset = 0x1ffb0000;
- adev->mman.stolen_reserved_size = 64 * PAGE_SIZE;
+ mode = UNKNOWN_MEMORY_PARTITION_MODE;
+ for_each_inst(i, adev->gmc.supported_nps_modes) {
+ if (!strncasecmp(nps_desc[i], buf, strlen(nps_desc[i]))) {
+ mode = i;
+ break;
+ }
+ }
+
+ if (mode == UNKNOWN_MEMORY_PARTITION_MODE)
+ return -EINVAL;
+
+ if (mode == adev->gmc.gmc_funcs->query_mem_partition_mode(adev)) {
+ dev_info(
+ adev->dev,
+ "requested NPS mode is same as current NPS mode, skipping\n");
+ return count;
+ }
+
+ /* If device is part of hive, all devices in the hive should request the
+ * same mode. Hence store the requested mode in hive.
+ */
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive) {
+ atomic_set(&hive->requested_nps_mode, mode);
+ amdgpu_put_xgmi_hive(hive);
+ } else {
+ adev->gmc.requested_nps_mode = mode;
+ }
+
+ dev_info(
+ adev->dev,
+ "NPS mode change requested, please remove and reload the driver\n");
+
+ return count;
+}
+
+static ssize_t current_memory_partition_show(
+ struct device *dev, struct device_attribute *addr, char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ enum amdgpu_memory_partition mode;
+
+ /* Only minimal precaution taken to reject requests while in reset */
+ if (amdgpu_in_reset(adev))
+ return -EPERM;
+
+ mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ if ((mode >= ARRAY_SIZE(nps_desc)) ||
+ (BIT(mode) & AMDGPU_ALL_NPS_MASK) != BIT(mode))
+ return sysfs_emit(buf, "UNKNOWN\n");
+
+ return sysfs_emit(buf, "%s\n", nps_desc[mode]);
+}
+
+static DEVICE_ATTR_RW(current_memory_partition);
+static DEVICE_ATTR_RO(available_memory_partition);
+
+int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev)
+{
+ bool nps_switch_support;
+ int r = 0;
+
+ if (!adev->gmc.gmc_funcs->query_mem_partition_mode)
+ return 0;
+
+ nps_switch_support = (hweight32(adev->gmc.supported_nps_modes &
+ AMDGPU_ALL_NPS_MASK) > 1);
+ if (!nps_switch_support)
+ dev_attr_current_memory_partition.attr.mode &=
+ ~(S_IWUSR | S_IWGRP | S_IWOTH);
+ else
+ r = device_create_file(adev->dev,
+ &dev_attr_available_memory_partition);
+
+ if (r)
+ return r;
+
+ return device_create_file(adev->dev,
+ &dev_attr_current_memory_partition);
+}
+
+void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev)
+{
+ if (!adev->gmc.gmc_funcs->query_mem_partition_mode)
+ return;
+
+ device_remove_file(adev->dev, &dev_attr_current_memory_partition);
+ device_remove_file(adev->dev, &dev_attr_available_memory_partition);
+}
+
+int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev,
+ struct amdgpu_mem_partition_info *mem_ranges,
+ uint8_t *exp_ranges)
+{
+ struct amdgpu_gmc_memrange *ranges;
+ int range_cnt, ret, i, j;
+ uint32_t nps_type;
+ bool refresh;
+
+ if (!mem_ranges || !exp_ranges)
+ return -EINVAL;
+
+ refresh = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) &&
+ (adev->gmc.reset_flags & AMDGPU_GMC_INIT_RESET_NPS);
+ ret = amdgpu_discovery_get_nps_info(adev, &nps_type, &ranges,
+ &range_cnt, refresh);
+
+ if (ret)
+ return ret;
+
+ /* TODO: For now, expect ranges and partition count to be the same.
+ * Adjust if there are holes expected in any NPS domain.
+ */
+ if (*exp_ranges && (range_cnt != *exp_ranges)) {
+ dev_warn(
+ adev->dev,
+ "NPS config mismatch - expected ranges: %d discovery - nps mode: %d, nps ranges: %d",
+ *exp_ranges, nps_type, range_cnt);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ for (i = 0; i < range_cnt; ++i) {
+ if (ranges[i].base_address >= ranges[i].limit_address) {
+ dev_warn(
+ adev->dev,
+ "Invalid NPS range - nps mode: %d, range[%d]: base: %llx limit: %llx",
+ nps_type, i, ranges[i].base_address,
+ ranges[i].limit_address);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ /* Check for overlaps, not expecting any now */
+ for (j = i - 1; j >= 0; j--) {
+ if (max(ranges[j].base_address,
+ ranges[i].base_address) <=
+ min(ranges[j].limit_address,
+ ranges[i].limit_address)) {
+ dev_warn(
+ adev->dev,
+ "overlapping ranges detected [ %llx - %llx ] | [%llx - %llx]",
+ ranges[j].base_address,
+ ranges[j].limit_address,
+ ranges[i].base_address,
+ ranges[i].limit_address);
+ ret = -EINVAL;
+ goto err;
+ }
+ }
+
+ mem_ranges[i].range.fpfn =
+ (ranges[i].base_address -
+ adev->vm_manager.vram_base_offset) >>
+ AMDGPU_GPU_PAGE_SHIFT;
+ mem_ranges[i].range.lpfn =
+ (ranges[i].limit_address -
+ adev->vm_manager.vram_base_offset) >>
+ AMDGPU_GPU_PAGE_SHIFT;
+ mem_ranges[i].size =
+ ranges[i].limit_address - ranges[i].base_address + 1;
+ }
+
+ if (!*exp_ranges)
+ *exp_ranges = range_cnt;
+err:
+ kfree(ranges);
+
+ return ret;
+}
+
+int amdgpu_gmc_request_memory_partition(struct amdgpu_device *adev,
+ int nps_mode)
+{
+ /* Not supported on VF devices and APUs */
+ if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU))
+ return -EOPNOTSUPP;
+
+ if (!adev->psp.funcs) {
+ dev_err(adev->dev,
+ "PSP interface not available for nps mode change request");
+ return -EINVAL;
+ }
+
+ return psp_memory_partition(&adev->psp, nps_mode);
+}
+
+static inline bool amdgpu_gmc_need_nps_switch_req(struct amdgpu_device *adev,
+ int req_nps_mode,
+ int cur_nps_mode)
+{
+ return (((BIT(req_nps_mode) & adev->gmc.supported_nps_modes) ==
+ BIT(req_nps_mode)) &&
+ req_nps_mode != cur_nps_mode);
+}
+
+void amdgpu_gmc_prepare_nps_mode_change(struct amdgpu_device *adev)
+{
+ int req_nps_mode, cur_nps_mode, r;
+ struct amdgpu_hive_info *hive;
+
+ if (amdgpu_sriov_vf(adev) || !adev->gmc.supported_nps_modes ||
+ !adev->gmc.gmc_funcs->request_mem_partition_mode)
+ return;
+
+ cur_nps_mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive) {
+ req_nps_mode = atomic_read(&hive->requested_nps_mode);
+ if (!amdgpu_gmc_need_nps_switch_req(adev, req_nps_mode,
+ cur_nps_mode)) {
+ amdgpu_put_xgmi_hive(hive);
+ return;
}
+ r = amdgpu_xgmi_request_nps_change(adev, hive, req_nps_mode);
+ amdgpu_put_xgmi_hive(hive);
+ goto out;
+ }
+
+ req_nps_mode = adev->gmc.requested_nps_mode;
+ if (!amdgpu_gmc_need_nps_switch_req(adev, req_nps_mode, cur_nps_mode))
+ return;
+
+ /* even if this fails, we should let driver unload w/o blocking */
+ r = adev->gmc.gmc_funcs->request_mem_partition_mode(adev, req_nps_mode);
+out:
+ if (r)
+ dev_err(adev->dev, "NPS mode change request failed\n");
+ else
+ dev_info(
+ adev->dev,
+ "NPS mode change request done, reload driver to complete the change\n");
+}
+
+bool amdgpu_gmc_need_reset_on_init(struct amdgpu_device *adev)
+{
+ if (adev->gmc.gmc_funcs->need_reset_on_init)
+ return adev->gmc.gmc_funcs->need_reset_on_init(adev);
+
+ return false;
+}
+
+enum amdgpu_memory_partition
+amdgpu_gmc_get_vf_memory_partition(struct amdgpu_device *adev)
+{
+ switch (adev->gmc.num_mem_partitions) {
+ case 0:
+ return UNKNOWN_MEMORY_PARTITION_MODE;
+ case 1:
+ return AMDGPU_NPS1_PARTITION_MODE;
+ case 2:
+ return AMDGPU_NPS2_PARTITION_MODE;
+ case 4:
+ return AMDGPU_NPS4_PARTITION_MODE;
+ case 8:
+ return AMDGPU_NPS8_PARTITION_MODE;
+ default:
+ return AMDGPU_NPS1_PARTITION_MODE;
+ }
+}
+
+enum amdgpu_memory_partition
+amdgpu_gmc_get_memory_partition(struct amdgpu_device *adev, u32 *supp_modes)
+{
+ enum amdgpu_memory_partition mode = UNKNOWN_MEMORY_PARTITION_MODE;
+
+ if (adev->nbio.funcs &&
+ adev->nbio.funcs->get_memory_partition_mode)
+ mode = adev->nbio.funcs->get_memory_partition_mode(adev,
+ supp_modes);
+ else
+ dev_warn(adev->dev, "memory partition mode query is not supported\n");
+
+ return mode;
+}
+
+enum amdgpu_memory_partition
+amdgpu_gmc_query_memory_partition(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev))
+ return amdgpu_gmc_get_vf_memory_partition(adev);
+ else
+ return amdgpu_gmc_get_memory_partition(adev, NULL);
+}
+
+static bool amdgpu_gmc_validate_partition_info(struct amdgpu_device *adev)
+{
+ enum amdgpu_memory_partition mode;
+ u32 supp_modes;
+ bool valid;
+
+ mode = amdgpu_gmc_get_memory_partition(adev, &supp_modes);
+
+ /* Mode detected by hardware not present in supported modes */
+ if ((mode != UNKNOWN_MEMORY_PARTITION_MODE) &&
+ !(BIT(mode - 1) & supp_modes))
+ return false;
+
+ switch (mode) {
+ case UNKNOWN_MEMORY_PARTITION_MODE:
+ case AMDGPU_NPS1_PARTITION_MODE:
+ valid = (adev->gmc.num_mem_partitions == 1);
+ break;
+ case AMDGPU_NPS2_PARTITION_MODE:
+ valid = (adev->gmc.num_mem_partitions == 2);
+ break;
+ case AMDGPU_NPS4_PARTITION_MODE:
+ valid = (adev->gmc.num_mem_partitions == 3 ||
+ adev->gmc.num_mem_partitions == 4);
+ break;
+ case AMDGPU_NPS8_PARTITION_MODE:
+ valid = (adev->gmc.num_mem_partitions == 8);
break;
default:
+ valid = false;
+ }
+
+ return valid;
+}
+
+static bool amdgpu_gmc_is_node_present(int *node_ids, int num_ids, int nid)
+{
+ int i;
+
+ /* Check if node with id 'nid' is present in 'node_ids' array */
+ for (i = 0; i < num_ids; ++i)
+ if (node_ids[i] == nid)
+ return true;
+
+ return false;
+}
+
+static void
+amdgpu_gmc_init_acpi_mem_ranges(struct amdgpu_device *adev,
+ struct amdgpu_mem_partition_info *mem_ranges)
+{
+ struct amdgpu_numa_info numa_info;
+ int node_ids[AMDGPU_MAX_MEM_RANGES];
+ int num_ranges = 0, ret;
+ int num_xcc, xcc_id;
+ uint32_t xcc_mask;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ xcc_mask = (1U << num_xcc) - 1;
+
+ for_each_inst(xcc_id, xcc_mask) {
+ ret = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info);
+ if (ret)
+ continue;
+
+ if (numa_info.nid == NUMA_NO_NODE) {
+ mem_ranges[0].size = numa_info.size;
+ mem_ranges[0].numa.node = numa_info.nid;
+ num_ranges = 1;
+ break;
+ }
+
+ if (amdgpu_gmc_is_node_present(node_ids, num_ranges,
+ numa_info.nid))
+ continue;
+
+ node_ids[num_ranges] = numa_info.nid;
+ mem_ranges[num_ranges].numa.node = numa_info.nid;
+ mem_ranges[num_ranges].size = numa_info.size;
+ ++num_ranges;
+ }
+
+ adev->gmc.num_mem_partitions = num_ranges;
+}
+
+void amdgpu_gmc_init_sw_mem_ranges(struct amdgpu_device *adev,
+ struct amdgpu_mem_partition_info *mem_ranges)
+{
+ enum amdgpu_memory_partition mode;
+ u32 start_addr = 0, size;
+ int i, r, l;
+
+ mode = amdgpu_gmc_query_memory_partition(adev);
+
+ switch (mode) {
+ case UNKNOWN_MEMORY_PARTITION_MODE:
+ adev->gmc.num_mem_partitions = 0;
+ break;
+ case AMDGPU_NPS1_PARTITION_MODE:
+ adev->gmc.num_mem_partitions = 1;
+ break;
+ case AMDGPU_NPS2_PARTITION_MODE:
+ adev->gmc.num_mem_partitions = 2;
+ break;
+ case AMDGPU_NPS4_PARTITION_MODE:
+ if (adev->flags & AMD_IS_APU)
+ adev->gmc.num_mem_partitions = 3;
+ else
+ adev->gmc.num_mem_partitions = 4;
+ break;
+ case AMDGPU_NPS8_PARTITION_MODE:
+ adev->gmc.num_mem_partitions = 8;
+ break;
+ default:
+ adev->gmc.num_mem_partitions = 1;
break;
}
+
+ /* Use NPS range info, if populated */
+ r = amdgpu_gmc_get_nps_memranges(adev, mem_ranges,
+ &adev->gmc.num_mem_partitions);
+ if (!r) {
+ l = 0;
+ for (i = 1; i < adev->gmc.num_mem_partitions; ++i) {
+ if (mem_ranges[i].range.lpfn >
+ mem_ranges[i - 1].range.lpfn)
+ l = i;
+ }
+
+ } else {
+ if (!adev->gmc.num_mem_partitions) {
+ dev_warn(adev->dev,
+ "Not able to detect NPS mode, fall back to NPS1\n");
+ adev->gmc.num_mem_partitions = 1;
+ }
+ /* Fallback to sw based calculation */
+ size = (adev->gmc.real_vram_size + SZ_16M) >> AMDGPU_GPU_PAGE_SHIFT;
+ size /= adev->gmc.num_mem_partitions;
+
+ for (i = 0; i < adev->gmc.num_mem_partitions; ++i) {
+ mem_ranges[i].range.fpfn = start_addr;
+ mem_ranges[i].size =
+ ((u64)size << AMDGPU_GPU_PAGE_SHIFT);
+ mem_ranges[i].range.lpfn = start_addr + size - 1;
+ start_addr += size;
+ }
+
+ l = adev->gmc.num_mem_partitions - 1;
+ }
+
+ /* Adjust the last one */
+ mem_ranges[l].range.lpfn =
+ (adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT) - 1;
+ mem_ranges[l].size =
+ adev->gmc.real_vram_size -
+ ((u64)mem_ranges[l].range.fpfn << AMDGPU_GPU_PAGE_SHIFT);
+}
+
+int amdgpu_gmc_init_mem_ranges(struct amdgpu_device *adev)
+{
+ bool valid;
+
+ adev->gmc.mem_partitions = kcalloc(AMDGPU_MAX_MEM_RANGES,
+ sizeof(struct amdgpu_mem_partition_info),
+ GFP_KERNEL);
+ if (!adev->gmc.mem_partitions)
+ return -ENOMEM;
+
+ if (adev->gmc.is_app_apu)
+ amdgpu_gmc_init_acpi_mem_ranges(adev, adev->gmc.mem_partitions);
+ else
+ amdgpu_gmc_init_sw_mem_ranges(adev, adev->gmc.mem_partitions);
+
+ if (amdgpu_sriov_vf(adev))
+ valid = true;
+ else
+ valid = amdgpu_gmc_validate_partition_info(adev);
+ if (!valid) {
+ /* TODO: handle invalid case */
+ dev_warn(adev->dev,
+ "Mem ranges not matching with hardware config\n");
+ }
+
+ return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index e55201134a01..727342689d4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -29,6 +29,8 @@
#include <linux/types.h>
#include "amdgpu_irq.h"
+#include "amdgpu_xgmi.h"
+#include "amdgpu_ras.h"
/* VA hole for 48bit addresses on Vega10 */
#define AMDGPU_GMC_HOLE_START 0x0000800000000000ULL
@@ -60,8 +62,35 @@
*/
#define AMDGPU_GMC_FAULT_TIMEOUT 5000ULL
+/* XNACK flags */
+#define AMDGPU_GMC_XNACK_FLAG_CHAIN BIT(0)
+
struct firmware;
+enum amdgpu_memory_partition {
+ UNKNOWN_MEMORY_PARTITION_MODE = 0,
+ AMDGPU_NPS1_PARTITION_MODE = 1,
+ AMDGPU_NPS2_PARTITION_MODE = 2,
+ AMDGPU_NPS3_PARTITION_MODE = 3,
+ AMDGPU_NPS4_PARTITION_MODE = 4,
+ AMDGPU_NPS6_PARTITION_MODE = 6,
+ AMDGPU_NPS8_PARTITION_MODE = 8,
+};
+
+#define AMDGPU_ALL_NPS_MASK \
+ (BIT(AMDGPU_NPS1_PARTITION_MODE) | BIT(AMDGPU_NPS2_PARTITION_MODE) | \
+ BIT(AMDGPU_NPS3_PARTITION_MODE) | BIT(AMDGPU_NPS4_PARTITION_MODE) | \
+ BIT(AMDGPU_NPS6_PARTITION_MODE) | BIT(AMDGPU_NPS8_PARTITION_MODE))
+
+#define AMDGPU_GMC_INIT_RESET_NPS BIT(0)
+
+#define AMDGPU_MAX_MEM_RANGES 8
+
+#define AMDGPU_GMC9_FAULT_SOURCE_DATA_RETRY 0x80
+#define AMDGPU_GMC9_FAULT_SOURCE_DATA_READ 0x40
+#define AMDGPU_GMC9_FAULT_SOURCE_DATA_WRITE 0x20
+#define AMDGPU_GMC9_FAULT_SOURCE_DATA_EXE 0x10
+
/*
* GMC page fault information
*/
@@ -69,6 +98,7 @@ struct amdgpu_gmc_fault {
uint64_t timestamp:48;
uint64_t next:AMDGPU_GMC_FAULT_RING_ORDER;
atomic64_t key;
+ uint64_t timestamp_expiry:48;
};
/*
@@ -99,7 +129,13 @@ struct amdgpu_vmhub {
uint32_t eng_distance;
uint32_t eng_addr_distance; /* include LO32/HI32 */
+ uint32_t vm_cntx_cntl;
uint32_t vm_cntx_cntl_vm_fault;
+ uint32_t vm_l2_bank_select_reserved_cid2;
+
+ uint32_t vm_contexts_disable;
+
+ bool sdma_invalidation_workaround;
const struct amdgpu_vmhub_funcs *vmhub_funcs;
};
@@ -112,8 +148,9 @@ struct amdgpu_gmc_funcs {
void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type);
/* flush the vm tlb via pasid */
- int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
- uint32_t flush_type, bool all_hub);
+ void (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
+ uint32_t flush_type, bool all_hub,
+ uint32_t inst);
/* flush the vm tlb via ring */
uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
uint64_t pd_addr);
@@ -122,44 +159,58 @@ struct amdgpu_gmc_funcs {
unsigned pasid);
/* enable/disable PRT support */
void (*set_prt)(struct amdgpu_device *adev, bool enable);
- /* map mtype to hardware flags */
- uint64_t (*map_mtype)(struct amdgpu_device *adev, uint32_t flags);
/* get the pde for a given mc addr */
void (*get_vm_pde)(struct amdgpu_device *adev, int level,
u64 *dst, u64 *flags);
- /* get the pte flags to use for a BO VA mapping */
+ /* get the pte flags to use for PTEs */
void (*get_vm_pte)(struct amdgpu_device *adev,
- struct amdgpu_bo_va_mapping *mapping,
- uint64_t *flags);
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo,
+ uint32_t vm_flags,
+ uint64_t *pte_flags);
+ /* override per-page pte flags */
+ void (*override_vm_pte_flags)(struct amdgpu_device *dev,
+ struct amdgpu_vm *vm,
+ uint64_t addr, uint64_t *flags);
/* get the amount of memory used by the vbios for pre-OS console */
unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev);
+ /* get the DCC buffer alignment */
+ unsigned int (*get_dcc_alignment)(struct amdgpu_device *adev);
+
+ enum amdgpu_memory_partition (*query_mem_partition_mode)(
+ struct amdgpu_device *adev);
+ /* Request NPS mode */
+ int (*request_mem_partition_mode)(struct amdgpu_device *adev,
+ int nps_mode);
+ bool (*need_reset_on_init)(struct amdgpu_device *adev);
+};
+
+struct amdgpu_mem_partition_info {
+ union {
+ struct {
+ uint32_t fpfn;
+ uint32_t lpfn;
+ } range;
+ struct {
+ int node;
+ } numa;
+ };
+ uint64_t size;
};
-struct amdgpu_xgmi_ras_funcs {
- int (*ras_late_init)(struct amdgpu_device *adev);
- void (*ras_fini)(struct amdgpu_device *adev);
- int (*query_ras_error_count)(struct amdgpu_device *adev,
- void *ras_error_status);
- void (*reset_ras_error_count)(struct amdgpu_device *adev);
+#define INVALID_PFN -1
+
+struct amdgpu_gmc_memrange {
+ uint64_t base_address;
+ uint64_t limit_address;
+ uint32_t flags;
+ int nid_mask;
};
-struct amdgpu_xgmi {
- /* from psp */
- u64 node_id;
- u64 hive_id;
- /* fixed per family */
- u64 node_segment_size;
- /* physical node (0-3) */
- unsigned physical_node_id;
- /* number of nodes (0-4) */
- unsigned num_physical_nodes;
- /* gpu list in the same hive */
- struct list_head head;
- bool supported;
- struct ras_common_if *ras_if;
- bool connected_to_cpu;
- bool pending_reset;
- const struct amdgpu_xgmi_ras_funcs *ras_funcs;
+enum amdgpu_gart_placement {
+ AMDGPU_GART_PLACEMENT_BEST_FIT = 0,
+ AMDGPU_GART_PLACEMENT_HIGH,
+ AMDGPU_GART_PLACEMENT_LOW,
};
struct amdgpu_gmc {
@@ -248,30 +299,80 @@ struct amdgpu_gmc {
uint64_t last_fault:AMDGPU_GMC_FAULT_RING_ORDER;
bool tmz_enabled;
+ bool is_app_apu;
+ struct amdgpu_mem_partition_info *mem_partitions;
+ uint8_t num_mem_partitions;
const struct amdgpu_gmc_funcs *gmc_funcs;
+ enum amdgpu_memory_partition requested_nps_mode;
+ uint32_t supported_nps_modes;
+ uint32_t reset_flags;
struct amdgpu_xgmi xgmi;
struct amdgpu_irq_src ecc_irq;
int noretry;
+ uint32_t xnack_flags;
uint32_t vmid0_page_table_block_size;
uint32_t vmid0_page_table_depth;
struct amdgpu_bo *pdb0_bo;
/* CPU kmapped address of pdb0*/
void *ptr_pdb0;
+
+ /* MALL size */
+ u64 mall_size;
+ uint32_t m_half_use;
+
+ /* number of UMC instances */
+ int num_umc;
+ /* mode2 save restore */
+ u64 VM_L2_CNTL;
+ u64 VM_L2_CNTL2;
+ u64 VM_DUMMY_PAGE_FAULT_CNTL;
+ u64 VM_DUMMY_PAGE_FAULT_ADDR_LO32;
+ u64 VM_DUMMY_PAGE_FAULT_ADDR_HI32;
+ u64 VM_L2_PROTECTION_FAULT_CNTL;
+ u64 VM_L2_PROTECTION_FAULT_CNTL2;
+ u64 VM_L2_PROTECTION_FAULT_MM_CNTL3;
+ u64 VM_L2_PROTECTION_FAULT_MM_CNTL4;
+ u64 VM_L2_PROTECTION_FAULT_ADDR_LO32;
+ u64 VM_L2_PROTECTION_FAULT_ADDR_HI32;
+ u64 VM_DEBUG;
+ u64 VM_L2_MM_GROUP_RT_CLASSES;
+ u64 VM_L2_BANK_SELECT_RESERVED_CID;
+ u64 VM_L2_BANK_SELECT_RESERVED_CID2;
+ u64 VM_L2_CACHE_PARITY_CNTL;
+ u64 VM_L2_IH_LOG_CNTL;
+ u64 VM_CONTEXT_CNTL[16];
+ u64 VM_CONTEXT_PAGE_TABLE_BASE_ADDR_LO32[16];
+ u64 VM_CONTEXT_PAGE_TABLE_BASE_ADDR_HI32[16];
+ u64 VM_CONTEXT_PAGE_TABLE_START_ADDR_LO32[16];
+ u64 VM_CONTEXT_PAGE_TABLE_START_ADDR_HI32[16];
+ u64 VM_CONTEXT_PAGE_TABLE_END_ADDR_LO32[16];
+ u64 VM_CONTEXT_PAGE_TABLE_END_ADDR_HI32[16];
+ u64 MC_VM_MX_L1_TLB_CNTL;
+
+ u64 noretry_flags;
+
+ bool flush_tlb_needs_extra_type_0;
+ bool flush_tlb_needs_extra_type_2;
+ bool flush_pasid_uses_kiq;
};
-#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
-#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
- ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
- ((adev), (pasid), (type), (allhub)))
#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
-#define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
-#define amdgpu_gmc_get_vm_pte(adev, mapping, flags) (adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags))
+#define amdgpu_gmc_get_vm_pte(adev, vm, bo, vm_flags, pte_flags) \
+ ((adev)->gmc.gmc_funcs->get_vm_pte((adev), (vm), (bo), (vm_flags), \
+ (pte_flags)))
+#define amdgpu_gmc_override_vm_pte_flags(adev, vm, addr, pte_flags) \
+ (adev)->gmc.gmc_funcs->override_vm_pte_flags \
+ ((adev), (vm), (addr), (pte_flags))
#define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
+#define amdgpu_gmc_get_dcc_alignment(adev) ({ \
+ typeof(adev) _adev = (adev); \
+ _adev->gmc.gmc_funcs->get_dcc_alignment(_adev); \
+})
/**
* amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR
@@ -301,6 +402,7 @@ static inline uint64_t amdgpu_gmc_sign_extend(uint64_t addr)
return addr;
}
+bool amdgpu_gmc_is_pdb0_enabled(struct amdgpu_device *adev);
int amdgpu_gmc_pdb0_alloc(struct amdgpu_device *adev);
void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
uint64_t *addr, uint64_t *flags);
@@ -313,16 +415,30 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc
void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
u64 base);
void amdgpu_gmc_gart_location(struct amdgpu_device *adev,
- struct amdgpu_gmc *mc);
+ struct amdgpu_gmc *mc,
+ enum amdgpu_gart_placement gart_placement);
void amdgpu_gmc_agp_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc);
-bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
+void amdgpu_gmc_set_agp_default(struct amdgpu_device *adev,
+ struct amdgpu_gmc *mc);
+bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih, uint64_t addr,
uint16_t pasid, uint64_t timestamp);
void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
uint16_t pasid);
+int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
+void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type);
+int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
+ uint32_t flush_type, bool all_hub,
+ uint32_t inst);
+void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask,
+ uint32_t xcc_inst);
extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev);
extern void amdgpu_gmc_noretry_set(struct amdgpu_device *adev);
@@ -332,10 +448,29 @@ amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type,
bool enable);
void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev);
-void amdgpu_gmc_get_reserved_allocation(struct amdgpu_device *adev);
void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev);
uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr);
uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo);
-uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo);
+int amdgpu_gmc_vram_checking(struct amdgpu_device *adev);
+int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev);
+void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev);
+
+int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev,
+ struct amdgpu_mem_partition_info *mem_ranges,
+ uint8_t *exp_ranges);
+
+int amdgpu_gmc_request_memory_partition(struct amdgpu_device *adev,
+ int nps_mode);
+void amdgpu_gmc_prepare_nps_mode_change(struct amdgpu_device *adev);
+bool amdgpu_gmc_need_reset_on_init(struct amdgpu_device *adev);
+enum amdgpu_memory_partition
+amdgpu_gmc_get_vf_memory_partition(struct amdgpu_device *adev);
+enum amdgpu_memory_partition
+amdgpu_gmc_get_memory_partition(struct amdgpu_device *adev, u32 *supp_modes);
+enum amdgpu_memory_partition
+amdgpu_gmc_query_memory_partition(struct amdgpu_device *adev);
+int amdgpu_gmc_init_mem_ranges(struct amdgpu_device *adev);
+void amdgpu_gmc_init_sw_mem_ranges(struct amdgpu_device *adev,
+ struct amdgpu_mem_partition_info *mem_ranges);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 675a72ef305d..895c1e4c6747 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -26,23 +26,12 @@
#include "amdgpu.h"
-struct amdgpu_gtt_node {
- struct ttm_buffer_object *tbo;
- struct ttm_range_mgr_node base;
-};
-
static inline struct amdgpu_gtt_mgr *
to_gtt_mgr(struct ttm_resource_manager *man)
{
return container_of(man, struct amdgpu_gtt_mgr, manager);
}
-static inline struct amdgpu_gtt_node *
-to_amdgpu_gtt_node(struct ttm_resource *res)
-{
- return container_of(res, struct amdgpu_gtt_node, base.base);
-}
-
/**
* DOC: mem_info_gtt_total
*
@@ -60,7 +49,7 @@ static ssize_t amdgpu_mem_info_gtt_total_show(struct device *dev,
struct ttm_resource_manager *man;
man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
- return sysfs_emit(buf, "%llu\n", man->size * PAGE_SIZE);
+ return sysfs_emit(buf, "%llu\n", man->size);
}
/**
@@ -77,10 +66,9 @@ static ssize_t amdgpu_mem_info_gtt_used_show(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
- struct ttm_resource_manager *man;
+ struct ttm_resource_manager *man = &adev->mman.gtt_mgr.manager;
- man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
- return sysfs_emit(buf, "%llu\n", amdgpu_gtt_mgr_usage(man));
+ return sysfs_emit(buf, "%llu\n", ttm_resource_manager_usage(man));
}
static DEVICE_ATTR(mem_info_gtt_total, S_IRUGO,
@@ -107,9 +95,9 @@ const struct attribute_group amdgpu_gtt_mgr_attr_group = {
*/
bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *res)
{
- struct amdgpu_gtt_node *node = to_amdgpu_gtt_node(res);
+ struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
- return drm_mm_node_allocated(&node->base.mm_nodes[0]);
+ return drm_mm_node_allocated(&node->mm_nodes[0]);
}
/**
@@ -129,28 +117,23 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,
{
struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
uint32_t num_pages = PFN_UP(tbo->base.size);
- struct amdgpu_gtt_node *node;
+ struct ttm_range_mgr_node *node;
int r;
- if (!(place->flags & TTM_PL_FLAG_TEMPORARY) &&
- atomic64_add_return(num_pages, &mgr->used) > man->size) {
- atomic64_sub(num_pages, &mgr->used);
- return -ENOSPC;
- }
+ node = kzalloc(struct_size(node, mm_nodes, 1), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
- node = kzalloc(struct_size(node, base.mm_nodes, 1), GFP_KERNEL);
- if (!node) {
- r = -ENOMEM;
- goto err_out;
+ ttm_resource_init(tbo, place, &node->base);
+ if (!(place->flags & TTM_PL_FLAG_TEMPORARY) &&
+ ttm_resource_manager_usage(man) > man->size) {
+ r = -ENOSPC;
+ goto err_free;
}
- node->tbo = tbo;
- ttm_resource_init(tbo, place, &node->base.base);
-
if (place->lpfn) {
spin_lock(&mgr->lock);
- r = drm_mm_insert_node_in_range(&mgr->mm,
- &node->base.mm_nodes[0],
+ r = drm_mm_insert_node_in_range(&mgr->mm, &node->mm_nodes[0],
num_pages, tbo->page_alignment,
0, place->fpfn, place->lpfn,
DRM_MM_INSERT_BEST);
@@ -158,23 +141,19 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,
if (unlikely(r))
goto err_free;
- node->base.base.start = node->base.mm_nodes[0].start;
+ node->base.start = node->mm_nodes[0].start;
} else {
- node->base.mm_nodes[0].start = 0;
- node->base.mm_nodes[0].size = node->base.base.num_pages;
- node->base.base.start = AMDGPU_BO_INVALID_OFFSET;
+ node->mm_nodes[0].start = 0;
+ node->mm_nodes[0].size = PFN_UP(node->base.size);
+ node->base.start = AMDGPU_BO_INVALID_OFFSET;
}
- *res = &node->base.base;
+ *res = &node->base;
return 0;
err_free:
+ ttm_resource_fini(man, &node->base);
kfree(node);
-
-err_out:
- if (!(place->flags & TTM_PL_FLAG_TEMPORARY))
- atomic64_sub(num_pages, &mgr->used);
-
return r;
}
@@ -189,62 +168,74 @@ err_out:
static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man,
struct ttm_resource *res)
{
- struct amdgpu_gtt_node *node = to_amdgpu_gtt_node(res);
+ struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
spin_lock(&mgr->lock);
- if (drm_mm_node_allocated(&node->base.mm_nodes[0]))
- drm_mm_remove_node(&node->base.mm_nodes[0]);
+ if (drm_mm_node_allocated(&node->mm_nodes[0]))
+ drm_mm_remove_node(&node->mm_nodes[0]);
spin_unlock(&mgr->lock);
- if (!(res->placement & TTM_PL_FLAG_TEMPORARY))
- atomic64_sub(res->num_pages, &mgr->used);
-
+ ttm_resource_fini(man, res);
kfree(node);
}
/**
- * amdgpu_gtt_mgr_usage - return usage of GTT domain
- *
- * @man: TTM memory type manager
- *
- * Return how many bytes are used in the GTT domain
- */
-uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager *man)
-{
- struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
-
- return atomic64_read(&mgr->used) * PAGE_SIZE;
-}
-
-/**
* amdgpu_gtt_mgr_recover - re-init gart
*
- * @man: TTM memory type manager
+ * @mgr: amdgpu_gtt_mgr pointer
*
* Re-init the gart for each known BO in the GTT.
*/
-int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man)
+void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr)
{
- struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
- struct amdgpu_device *adev;
- struct amdgpu_gtt_node *node;
+ struct ttm_range_mgr_node *node;
struct drm_mm_node *mm_node;
- int r = 0;
+ struct amdgpu_device *adev;
adev = container_of(mgr, typeof(*adev), mman.gtt_mgr);
spin_lock(&mgr->lock);
drm_mm_for_each_node(mm_node, &mgr->mm) {
- node = container_of(mm_node, typeof(*node), base.mm_nodes[0]);
- r = amdgpu_ttm_recover_gart(node->tbo);
- if (r)
- break;
+ node = container_of(mm_node, typeof(*node), mm_nodes[0]);
+ amdgpu_ttm_recover_gart(node->base.bo);
}
spin_unlock(&mgr->lock);
+}
- amdgpu_gart_invalidate_tlb(adev);
+/**
+ * amdgpu_gtt_mgr_intersects - test for intersection
+ *
+ * @man: Our manager object
+ * @res: The resource to test
+ * @place: The place for the new allocation
+ * @size: The size of the new allocation
+ *
+ * Simplified intersection test, only interesting if we need GART or not.
+ */
+static bool amdgpu_gtt_mgr_intersects(struct ttm_resource_manager *man,
+ struct ttm_resource *res,
+ const struct ttm_place *place,
+ size_t size)
+{
+ return !place->lpfn || amdgpu_gtt_mgr_has_gart_addr(res);
+}
- return r;
+/**
+ * amdgpu_gtt_mgr_compatible - test for compatibility
+ *
+ * @man: Our manager object
+ * @res: The resource to test
+ * @place: The place for the new allocation
+ * @size: The size of the new allocation
+ *
+ * Simplified compatibility test.
+ */
+static bool amdgpu_gtt_mgr_compatible(struct ttm_resource_manager *man,
+ struct ttm_resource *res,
+ const struct ttm_place *place,
+ size_t size)
+{
+ return !place->lpfn || amdgpu_gtt_mgr_has_gart_addr(res);
}
/**
@@ -263,14 +254,13 @@ static void amdgpu_gtt_mgr_debug(struct ttm_resource_manager *man,
spin_lock(&mgr->lock);
drm_mm_print(&mgr->mm, printer);
spin_unlock(&mgr->lock);
-
- drm_printf(printer, "man size:%llu pages, gtt used:%llu pages\n",
- man->size, atomic64_read(&mgr->used));
}
static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func = {
.alloc = amdgpu_gtt_mgr_new,
.free = amdgpu_gtt_mgr_del,
+ .intersects = amdgpu_gtt_mgr_intersects,
+ .compatible = amdgpu_gtt_mgr_compatible,
.debug = amdgpu_gtt_mgr_debug
};
@@ -291,13 +281,13 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size)
man->use_tt = true;
man->func = &amdgpu_gtt_mgr_func;
- ttm_resource_manager_init(man, gtt_size >> PAGE_SHIFT);
+ ttm_resource_manager_init(man, &adev->mman.bdev, gtt_size);
start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
+ start += amdgpu_vce_required_gart_pages(adev);
size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
drm_mm_init(&mgr->mm, start, size);
spin_lock_init(&mgr->lock);
- atomic64_set(&mgr->used, 0);
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager);
ttm_resource_manager_set_used(man, true);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
index a766e1aad2b9..5a60d69a3e1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2021 Advanced Micro Devices, Inc.
+ * Copyright 2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -20,49 +20,65 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
-
#include "amdgpu.h"
#include "amdgpu_ras.h"
+#include <uapi/linux/kfd_ioctl.h>
-int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev)
+int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev)
{
- int r;
- struct ras_ih_if ih_info = {
- .cb = NULL,
- };
- struct ras_fs_if fs_info = {
- .sysfs_name = "hdp_err_count",
- };
+ int err;
+ struct amdgpu_hdp_ras *ras;
- if (!adev->hdp.ras_if) {
- adev->hdp.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
- if (!adev->hdp.ras_if)
- return -ENOMEM;
- adev->hdp.ras_if->block = AMDGPU_RAS_BLOCK__HDP;
- adev->hdp.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
- adev->hdp.ras_if->sub_block_index = 0;
- }
- ih_info.head = fs_info.head = *adev->hdp.ras_if;
- r = amdgpu_ras_late_init(adev, adev->hdp.ras_if,
- &fs_info, &ih_info);
- if (r || !amdgpu_ras_is_supported(adev, adev->hdp.ras_if->block)) {
- kfree(adev->hdp.ras_if);
- adev->hdp.ras_if = NULL;
+ if (!adev->hdp.ras)
+ return 0;
+
+ ras = adev->hdp.ras;
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register hdp ras block!\n");
+ return err;
}
- return r;
+ strcpy(ras->ras_block.ras_comm.name, "hdp");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__HDP;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->hdp.ras_if = &ras->ras_block.ras_comm;
+
+ /* hdp ras follows amdgpu_ras_block_late_init_default for late init */
+ return 0;
}
-void amdgpu_hdp_ras_fini(struct amdgpu_device *adev)
+void amdgpu_hdp_generic_flush(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
{
- if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP) &&
- adev->hdp.ras_if) {
- struct ras_common_if *ras_if = adev->hdp.ras_if;
- struct ras_ih_if ih_info = {
- .cb = NULL,
- };
-
- amdgpu_ras_late_fini(adev, ras_if, &ih_info);
- kfree(ras_if);
+ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32((adev->rmmio_remap.reg_offset +
+ KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >>
+ 2,
+ 0);
+ if (adev->nbio.funcs->get_memsize)
+ adev->nbio.funcs->get_memsize(adev);
+ } else {
+ amdgpu_ring_emit_wreg(ring,
+ (adev->rmmio_remap.reg_offset +
+ KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >>
+ 2,
+ 0);
}
}
+
+void amdgpu_hdp_invalidate(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+ if (adev->asic_funcs && adev->asic_funcs->invalidate_hdp)
+ adev->asic_funcs->invalidate_hdp(adev, ring);
+ else if (adev->hdp.funcs && adev->hdp.funcs->invalidate_hdp)
+ adev->hdp.funcs->invalidate_hdp(adev, ring);
+}
+
+void amdgpu_hdp_flush(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+ if (adev->asic_funcs && adev->asic_funcs->flush_hdp)
+ adev->asic_funcs->flush_hdp(adev, ring);
+ else if (adev->hdp.funcs && adev->hdp.funcs->flush_hdp)
+ adev->hdp.funcs->flush_hdp(adev, ring);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
index 7ec99d591584..d9f488fa76b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
@@ -22,13 +22,10 @@
*/
#ifndef __AMDGPU_HDP_H__
#define __AMDGPU_HDP_H__
+#include "amdgpu_ras.h"
-struct amdgpu_hdp_ras_funcs {
- int (*ras_late_init)(struct amdgpu_device *adev);
- void (*ras_fini)(struct amdgpu_device *adev);
- void (*query_ras_error_count)(struct amdgpu_device *adev,
- void *ras_error_status);
- void (*reset_ras_error_count)(struct amdgpu_device *adev);
+struct amdgpu_hdp_ras {
+ struct amdgpu_ras_block_object ras_block;
};
struct amdgpu_hdp_funcs {
@@ -36,16 +33,21 @@ struct amdgpu_hdp_funcs {
void (*invalidate_hdp)(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
void (*update_clock_gating)(struct amdgpu_device *adev, bool enable);
- void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags);
+ void (*get_clock_gating_state)(struct amdgpu_device *adev, u64 *flags);
void (*init_registers)(struct amdgpu_device *adev);
};
struct amdgpu_hdp {
struct ras_common_if *ras_if;
const struct amdgpu_hdp_funcs *funcs;
- const struct amdgpu_hdp_ras_funcs *ras_funcs;
+ struct amdgpu_hdp_ras *ras;
};
-int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev);
-void amdgpu_hdp_ras_fini(struct amdgpu_device *adev);
+int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev);
+void amdgpu_hdp_generic_flush(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
+void amdgpu_hdp_invalidate(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
+void amdgpu_hdp_flush(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
#endif /* __AMDGPU_HDP_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
new file mode 100644
index 000000000000..90d26d820bac
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
@@ -0,0 +1,293 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ * Christian König <christian.koenig@amd.com>
+ */
+
+/**
+ * DOC: MMU Notifier
+ *
+ * For coherent userptr handling registers an MMU notifier to inform the driver
+ * about updates on the page tables of a process.
+ *
+ * When somebody tries to invalidate the page tables we block the update until
+ * all operations on the pages in question are completed, then those pages are
+ * marked as accessed and also dirty if it wasn't a read only access.
+ *
+ * New command submissions using the userptrs in question are delayed until all
+ * page table invalidation are completed and we once more see a coherent process
+ * address space.
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <drm/drm.h>
+
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_hmm.h"
+
+#define MAX_WALK_BYTE (2UL << 30)
+
+/**
+ * amdgpu_hmm_invalidate_gfx - callback to notify about mm change
+ *
+ * @mni: the range (mm) is about to update
+ * @range: details on the invalidation
+ * @cur_seq: Value to pass to mmu_interval_set_seq()
+ *
+ * Block for operations on BOs to finish and mark pages as accessed and
+ * potentially dirty.
+ */
+static bool amdgpu_hmm_invalidate_gfx(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
+{
+ struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ long r;
+
+ if (!mmu_notifier_range_blockable(range))
+ return false;
+
+ mutex_lock(&adev->notifier_lock);
+
+ mmu_interval_set_seq(mni, cur_seq);
+
+ r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP,
+ false, MAX_SCHEDULE_TIMEOUT);
+ mutex_unlock(&adev->notifier_lock);
+ if (r <= 0)
+ DRM_ERROR("(%ld) failed to wait for user bo\n", r);
+ return true;
+}
+
+static const struct mmu_interval_notifier_ops amdgpu_hmm_gfx_ops = {
+ .invalidate = amdgpu_hmm_invalidate_gfx,
+};
+
+/**
+ * amdgpu_hmm_invalidate_hsa - callback to notify about mm change
+ *
+ * @mni: the range (mm) is about to update
+ * @range: details on the invalidation
+ * @cur_seq: Value to pass to mmu_interval_set_seq()
+ *
+ * We temporarily evict the BO attached to this range. This necessitates
+ * evicting all user-mode queues of the process.
+ */
+static bool amdgpu_hmm_invalidate_hsa(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
+{
+ struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
+
+ if (!mmu_notifier_range_blockable(range))
+ return false;
+
+ amdgpu_amdkfd_evict_userptr(mni, cur_seq, bo->kfd_bo);
+
+ return true;
+}
+
+static const struct mmu_interval_notifier_ops amdgpu_hmm_hsa_ops = {
+ .invalidate = amdgpu_hmm_invalidate_hsa,
+};
+
+/**
+ * amdgpu_hmm_register - register a BO for notifier updates
+ *
+ * @bo: amdgpu buffer object
+ * @addr: userptr addr we should monitor
+ *
+ * Registers a mmu_notifier for the given BO at the specified address.
+ * Returns 0 on success, -ERRNO if anything goes wrong.
+ */
+int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr)
+{
+ int r;
+
+ if (bo->kfd_bo)
+ r = mmu_interval_notifier_insert(&bo->notifier, current->mm,
+ addr, amdgpu_bo_size(bo),
+ &amdgpu_hmm_hsa_ops);
+ else
+ r = mmu_interval_notifier_insert(&bo->notifier, current->mm, addr,
+ amdgpu_bo_size(bo),
+ &amdgpu_hmm_gfx_ops);
+ if (r)
+ /*
+ * Make sure amdgpu_hmm_unregister() doesn't call
+ * mmu_interval_notifier_remove() when the notifier isn't properly
+ * initialized.
+ */
+ bo->notifier.mm = NULL;
+
+ return r;
+}
+
+/**
+ * amdgpu_hmm_unregister - unregister a BO for notifier updates
+ *
+ * @bo: amdgpu buffer object
+ *
+ * Remove any registration of mmu notifier updates from the buffer object.
+ */
+void amdgpu_hmm_unregister(struct amdgpu_bo *bo)
+{
+ if (!bo->notifier.mm)
+ return;
+ mmu_interval_notifier_remove(&bo->notifier);
+ bo->notifier.mm = NULL;
+}
+
+int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
+ uint64_t start, uint64_t npages, bool readonly,
+ void *owner,
+ struct amdgpu_hmm_range *range)
+{
+ unsigned long end;
+ unsigned long timeout;
+ unsigned long *pfns;
+ int r = 0;
+ struct hmm_range *hmm_range = &range->hmm_range;
+
+ pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
+ if (unlikely(!pfns)) {
+ r = -ENOMEM;
+ goto out_free_range;
+ }
+
+ hmm_range->notifier = notifier;
+ hmm_range->default_flags = HMM_PFN_REQ_FAULT;
+ if (!readonly)
+ hmm_range->default_flags |= HMM_PFN_REQ_WRITE;
+ hmm_range->hmm_pfns = pfns;
+ hmm_range->start = start;
+ end = start + npages * PAGE_SIZE;
+ hmm_range->dev_private_owner = owner;
+
+ do {
+ hmm_range->end = min(hmm_range->start + MAX_WALK_BYTE, end);
+
+ pr_debug("hmm range: start = 0x%lx, end = 0x%lx",
+ hmm_range->start, hmm_range->end);
+
+ timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+
+retry:
+ hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
+ r = hmm_range_fault(hmm_range);
+ if (unlikely(r)) {
+ if (r == -EBUSY && !time_after(jiffies, timeout))
+ goto retry;
+ goto out_free_pfns;
+ }
+
+ if (hmm_range->end == end)
+ break;
+ hmm_range->hmm_pfns += MAX_WALK_BYTE >> PAGE_SHIFT;
+ hmm_range->start = hmm_range->end;
+ } while (hmm_range->end < end);
+
+ hmm_range->start = start;
+ hmm_range->hmm_pfns = pfns;
+
+ return 0;
+
+out_free_pfns:
+ kvfree(pfns);
+ hmm_range->hmm_pfns = NULL;
+out_free_range:
+ if (r == -EBUSY)
+ r = -EAGAIN;
+ return r;
+}
+
+/**
+ * amdgpu_hmm_range_valid - check if an HMM range is still valid
+ * @range: pointer to the &struct amdgpu_hmm_range to validate
+ *
+ * Determines whether the given HMM range @range is still valid by
+ * checking for invalidations via the MMU notifier sequence. This is
+ * typically used to verify that the range has not been invalidated
+ * by concurrent address space updates before it is accessed.
+ *
+ * Return:
+ * * true if @range is valid and can be used safely
+ * * false if @range is NULL or has been invalidated
+ */
+bool amdgpu_hmm_range_valid(struct amdgpu_hmm_range *range)
+{
+ if (!range)
+ return false;
+
+ return !mmu_interval_read_retry(range->hmm_range.notifier,
+ range->hmm_range.notifier_seq);
+}
+
+/**
+ * amdgpu_hmm_range_alloc - allocate and initialize an AMDGPU HMM range
+ * @bo: optional buffer object to associate with this HMM range
+ *
+ * Allocates memory for amdgpu_hmm_range and associates it with the @bo passed.
+ * The reference count of the @bo is incremented.
+ *
+ * Return:
+ * Pointer to a newly allocated struct amdgpu_hmm_range on success,
+ * or NULL if memory allocation fails.
+ */
+struct amdgpu_hmm_range *amdgpu_hmm_range_alloc(struct amdgpu_bo *bo)
+{
+ struct amdgpu_hmm_range *range;
+
+ range = kzalloc(sizeof(*range), GFP_KERNEL);
+ if (!range)
+ return NULL;
+
+ range->bo = amdgpu_bo_ref(bo);
+ return range;
+}
+
+/**
+ * amdgpu_hmm_range_free - release an AMDGPU HMM range
+ * @range: pointer to the range object to free
+ *
+ * Releases all resources held by @range, including the associated
+ * hmm_pfns and the dropping reference of associated bo if any.
+ *
+ * Return: void
+ */
+void amdgpu_hmm_range_free(struct amdgpu_hmm_range *range)
+{
+ if (!range)
+ return;
+
+ kvfree(range->hmm_range.hmm_pfns);
+ amdgpu_bo_unref(&range->bo);
+ kfree(range);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
new file mode 100644
index 000000000000..140bc9cd57b4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+#ifndef __AMDGPU_MN_H__
+#define __AMDGPU_MN_H__
+
+#include <linux/types.h>
+#include <linux/hmm.h>
+#include <linux/rwsem.h>
+#include <linux/workqueue.h>
+#include <linux/interval_tree.h>
+#include <linux/mmu_notifier.h>
+
+struct amdgpu_hmm_range {
+ struct hmm_range hmm_range;
+ struct amdgpu_bo *bo;
+};
+
+int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
+ uint64_t start, uint64_t npages, bool readonly,
+ void *owner,
+ struct amdgpu_hmm_range *range);
+
+#if defined(CONFIG_HMM_MIRROR)
+bool amdgpu_hmm_range_valid(struct amdgpu_hmm_range *range);
+struct amdgpu_hmm_range *amdgpu_hmm_range_alloc(struct amdgpu_bo *bo);
+void amdgpu_hmm_range_free(struct amdgpu_hmm_range *range);
+int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr);
+void amdgpu_hmm_unregister(struct amdgpu_bo *bo);
+#else
+static inline int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr)
+{
+ DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, "
+ "add CONFIG_ZONE_DEVICE=y in config file to fix this\n");
+ return -ENODEV;
+}
+
+static inline void amdgpu_hmm_unregister(struct amdgpu_bo *bo) {}
+
+static inline bool amdgpu_hmm_range_valid(struct amdgpu_hmm_range *range)
+{
+ return false;
+}
+
+static inline struct amdgpu_hmm_range *amdgpu_hmm_range_alloc(struct amdgpu_bo *bo)
+{
+ return NULL;
+}
+
+static inline void amdgpu_hmm_range_free(struct amdgpu_hmm_range *range) {}
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
index 82608df43396..9cb72f0c5277 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
@@ -24,7 +24,6 @@
* Alex Deucher
*/
-#include <linux/export.h>
#include <linux/pci.h>
#include <drm/drm_edid.h>
@@ -175,7 +174,6 @@ struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev,
i2c->rec = *rec;
i2c->adapter.owner = THIS_MODULE;
- i2c->adapter.class = I2C_CLASS_DDC;
i2c->adapter.dev.parent = dev->dev;
i2c->dev = dev;
i2c_set_adapdata(&i2c->adapter, i2c);
@@ -186,7 +184,7 @@ struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev,
snprintf(i2c->adapter.name, sizeof(i2c->adapter.name),
"AMDGPU i2c hw bus %s", name);
i2c->adapter.algo = &amdgpu_atombios_i2c_algo;
- ret = i2c_add_adapter(&i2c->adapter);
+ ret = devm_i2c_add_adapter(dev->dev, &i2c->adapter);
if (ret)
goto out_free;
} else {
@@ -217,22 +215,23 @@ out_free:
}
-void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c)
-{
- if (!i2c)
- return;
- WARN_ON(i2c->has_aux);
- i2c_del_adapter(&i2c->adapter);
- kfree(i2c);
-}
-
-/* Add the default buses */
void amdgpu_i2c_init(struct amdgpu_device *adev)
{
- if (amdgpu_hw_i2c)
- DRM_INFO("hw_i2c forced on, you may experience display detection problems!\n");
-
- amdgpu_atombios_i2c_init(adev);
+ if (!adev->is_atom_fw) {
+ if (!amdgpu_device_has_dc_support(adev)) {
+ amdgpu_atombios_i2c_init(adev);
+ } else {
+ switch (adev->asic_type) {
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ amdgpu_atombios_oem_i2c_init(adev, 0x97);
+ break;
+ default:
+ break;
+ }
+ }
+ }
}
/* remove all the buses */
@@ -240,28 +239,9 @@ void amdgpu_i2c_fini(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < AMDGPU_MAX_I2C_BUS; i++) {
- if (adev->i2c_bus[i]) {
- amdgpu_i2c_destroy(adev->i2c_bus[i]);
+ for (i = 0; i < AMDGPU_MAX_I2C_BUS; i++)
+ if (adev->i2c_bus[i])
adev->i2c_bus[i] = NULL;
- }
- }
-}
-
-/* Add additional buses */
-void amdgpu_i2c_add(struct amdgpu_device *adev,
- const struct amdgpu_i2c_bus_rec *rec,
- const char *name)
-{
- struct drm_device *dev = adev_to_drm(adev);
- int i;
-
- for (i = 0; i < AMDGPU_MAX_I2C_BUS; i++) {
- if (!adev->i2c_bus[i]) {
- adev->i2c_bus[i] = amdgpu_i2c_create(dev, rec, name);
- return;
- }
- }
}
/* looks up bus based on id */
@@ -280,7 +260,7 @@ amdgpu_i2c_lookup(struct amdgpu_device *adev,
return NULL;
}
-static void amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus,
+static int amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus,
u8 slave_addr,
u8 addr,
u8 *val)
@@ -305,16 +285,18 @@ static void amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus,
out_buf[0] = addr;
out_buf[1] = 0;
- if (i2c_transfer(&i2c_bus->adapter, msgs, 2) == 2) {
- *val = in_buf[0];
- DRM_DEBUG("val = 0x%02x\n", *val);
- } else {
- DRM_DEBUG("i2c 0x%02x 0x%02x read failed\n",
- addr, *val);
+ if (i2c_transfer(&i2c_bus->adapter, msgs, 2) != 2) {
+ DRM_DEBUG("i2c 0x%02x read failed\n", addr);
+ return -EIO;
}
+
+ *val = in_buf[0];
+ DRM_DEBUG("val = 0x%02x\n", *val);
+
+ return 0;
}
-static void amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus,
+static int amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus,
u8 slave_addr,
u8 addr,
u8 val)
@@ -330,9 +312,12 @@ static void amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus,
out_buf[0] = addr;
out_buf[1] = val;
- if (i2c_transfer(&i2c_bus->adapter, &msg, 1) != 1)
- DRM_DEBUG("i2c 0x%02x 0x%02x write failed\n",
- addr, val);
+ if (i2c_transfer(&i2c_bus->adapter, &msg, 1) != 1) {
+ DRM_DEBUG("i2c 0x%02x 0x%02x write failed\n", addr, val);
+ return -EIO;
+ }
+
+ return 0;
}
/* ddc router switching */
@@ -347,16 +332,18 @@ amdgpu_i2c_router_select_ddc_port(const struct amdgpu_connector *amdgpu_connecto
if (!amdgpu_connector->router_bus)
return;
- amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+ if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
- 0x3, &val);
+ 0x3, &val))
+ return;
val &= ~amdgpu_connector->router.ddc_mux_control_pin;
amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
0x3, val);
- amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+ if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
- 0x1, &val);
+ 0x1, &val))
+ return;
val &= ~amdgpu_connector->router.ddc_mux_control_pin;
val |= amdgpu_connector->router.ddc_mux_state;
amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
@@ -376,16 +363,18 @@ amdgpu_i2c_router_select_cd_port(const struct amdgpu_connector *amdgpu_connector
if (!amdgpu_connector->router_bus)
return;
- amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+ if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
- 0x3, &val);
+ 0x3, &val))
+ return;
val &= ~amdgpu_connector->router.cd_mux_control_pin;
amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
0x3, val);
- amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+ if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
- 0x1, &val);
+ 0x1, &val))
+ return;
val &= ~amdgpu_connector->router.cd_mux_control_pin;
val |= amdgpu_connector->router.cd_mux_state;
amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h
index 63c2ff7499e1..1d3d3806e0dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h
@@ -30,9 +30,6 @@ struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev,
void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c);
void amdgpu_i2c_init(struct amdgpu_device *adev);
void amdgpu_i2c_fini(struct amdgpu_device *adev);
-void amdgpu_i2c_add(struct amdgpu_device *adev,
- const struct amdgpu_i2c_bus_rec *rec,
- const char *name);
struct amdgpu_i2c_chan *
amdgpu_i2c_lookup(struct amdgpu_device *adev,
const struct amdgpu_i2c_bus_rec *i2c_bus);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index bc1297dcdf97..586a58facca1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -62,14 +62,14 @@
* Returns 0 on success, error on failure.
*/
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- unsigned size, enum amdgpu_ib_pool_type pool_type,
+ unsigned int size, enum amdgpu_ib_pool_type pool_type,
struct amdgpu_ib *ib)
{
int r;
if (size) {
r = amdgpu_sa_bo_new(&adev->ib_pools[pool_type],
- &ib->sa_bo, size, 256);
+ &ib->sa_bo, size);
if (r) {
dev_err(adev->dev, "failed to get a new IB (%d)\n", r);
return r;
@@ -89,16 +89,14 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
/**
* amdgpu_ib_free - free an IB (Indirect Buffer)
*
- * @adev: amdgpu_device pointer
* @ib: IB object to free
* @f: the fence SA bo need wait on for the ib alloation
*
* Free an IB (all asics).
*/
-void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
- struct dma_fence *f)
+void amdgpu_ib_free(struct amdgpu_ib *ib, struct dma_fence *f)
{
- amdgpu_sa_bo_free(adev, &ib->sa_bo, f);
+ amdgpu_sa_bo_free(&ib->sa_bo, f);
}
/**
@@ -123,24 +121,26 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
* a CONST_IB), it will be put on the ring prior to the DE IB. Prior
* to SI there was just a DE IB.
*/
-int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
+int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
struct amdgpu_ib *ibs, struct amdgpu_job *job,
struct dma_fence **f)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib *ib = &ibs[0];
struct dma_fence *tmp = NULL;
+ struct amdgpu_fence *af;
bool need_ctx_switch;
- unsigned patch_offset = ~0;
struct amdgpu_vm *vm;
uint64_t fence_ctx;
uint32_t status = 0, alloc_size;
- unsigned fence_flags = 0;
- bool secure;
-
- unsigned i;
- int r = 0;
+ unsigned int fence_flags = 0;
+ bool secure, init_shadow;
+ u64 shadow_va, csa_va, gds_va;
+ int vmid = AMDGPU_JOB_GET_VMID(job);
bool need_pipe_sync = false;
+ unsigned int cond_exec;
+ unsigned int i;
+ int r = 0;
if (num_ibs == 0)
return -EINVAL;
@@ -149,26 +149,48 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
if (job) {
vm = job->vm;
fence_ctx = job->base.s_fence ?
- job->base.s_fence->scheduled.context : 0;
+ job->base.s_fence->finished.context : 0;
+ shadow_va = job->shadow_va;
+ csa_va = job->csa_va;
+ gds_va = job->gds_va;
+ init_shadow = job->init_shadow;
+ af = job->hw_fence;
+ /* Save the context of the job for reset handling.
+ * The driver needs this so it can skip the ring
+ * contents for guilty contexts.
+ */
+ af->context = fence_ctx;
+ /* the vm fence is also part of the job's context */
+ job->hw_vm_fence->context = fence_ctx;
} else {
vm = NULL;
fence_ctx = 0;
+ shadow_va = 0;
+ csa_va = 0;
+ gds_va = 0;
+ init_shadow = false;
+ af = kzalloc(sizeof(*af), GFP_ATOMIC);
+ if (!af)
+ return -ENOMEM;
}
if (!ring->sched.ready) {
dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name);
- return -EINVAL;
+ r = -EINVAL;
+ goto free_fence;
}
if (vm && !job->vmid) {
dev_err(adev->dev, "VM IB without ID\n");
- return -EINVAL;
+ r = -EINVAL;
+ goto free_fence;
}
if ((ib->flags & AMDGPU_IB_FLAGS_SECURE) &&
- (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)) {
- dev_err(adev->dev, "secure submissions not supported on compute rings\n");
- return -EINVAL;
+ (!ring->funcs->secure_submission_supported)) {
+ dev_err(adev->dev, "secure submissions not supported on ring <%s>\n", ring->name);
+ r = -EINVAL;
+ goto free_fence;
}
alloc_size = ring->funcs->emit_frame_size + num_ibs *
@@ -177,14 +199,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
r = amdgpu_ring_alloc(ring, alloc_size);
if (r) {
dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
- return r;
+ goto free_fence;
}
need_ctx_switch = ring->current_ctx != fence_ctx;
if (ring->funcs->emit_pipeline_sync && job &&
- ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) ||
- (amdgpu_sriov_vf(adev) && need_ctx_switch) ||
- amdgpu_vm_need_pipeline_sync(ring, job))) {
+ ((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) ||
+ need_ctx_switch || amdgpu_vm_need_pipeline_sync(ring, job))) {
+
need_pipe_sync = true;
if (tmp)
@@ -211,8 +233,15 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
}
}
- if (job && ring->funcs->init_cond_exec)
- patch_offset = amdgpu_ring_init_cond_exec(ring);
+ amdgpu_ring_ib_begin(ring);
+
+ if (ring->funcs->emit_gfx_shadow)
+ amdgpu_ring_emit_gfx_shadow(ring, shadow_va, csa_va, gds_va,
+ init_shadow, vmid);
+
+ if (ring->funcs->init_cond_exec)
+ cond_exec = amdgpu_ring_init_cond_exec(ring,
+ ring->cond_exe_gpu_addr);
amdgpu_device_flush_hdp(adev, ring);
@@ -262,31 +291,54 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
fence_flags | AMDGPU_FENCE_FLAG_64BIT);
}
- r = amdgpu_fence_emit(ring, f, job, fence_flags);
+ if (ring->funcs->emit_gfx_shadow && ring->funcs->init_cond_exec) {
+ amdgpu_ring_emit_gfx_shadow(ring, 0, 0, 0, false, 0);
+ amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr);
+ }
+
+ r = amdgpu_fence_emit(ring, af, fence_flags);
if (r) {
dev_err(adev->dev, "failed to emit fence (%d)\n", r);
if (job && job->vmid)
- amdgpu_vmid_reset(adev, ring->funcs->vmhub, job->vmid);
+ amdgpu_vmid_reset(adev, ring->vm_hub, job->vmid);
amdgpu_ring_undo(ring);
return r;
}
+ *f = &af->base;
+ /* get a ref for the job */
+ if (job)
+ dma_fence_get(*f);
if (ring->funcs->insert_end)
ring->funcs->insert_end(ring);
- if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
- amdgpu_ring_patch_cond_exec(ring, patch_offset);
+ amdgpu_ring_patch_cond_exec(ring, cond_exec);
ring->current_ctx = fence_ctx;
- if (vm && ring->funcs->emit_switch_buffer)
+ if (job && ring->funcs->emit_switch_buffer)
amdgpu_ring_emit_switch_buffer(ring);
if (ring->funcs->emit_wave_limit &&
ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
ring->funcs->emit_wave_limit(ring, false);
+ /* Save the wptr associated with this fence.
+ * This must be last for resets to work properly
+ * as we need to save the wptr associated with this
+ * fence so we know what rings contents to backup
+ * after we reset the queue.
+ */
+ amdgpu_fence_save_wptr(af);
+
+ amdgpu_ring_ib_end(ring);
amdgpu_ring_commit(ring);
+
return 0;
+
+free_fence:
+ if (!job)
+ kfree(af);
+ return r;
}
/**
@@ -307,8 +359,7 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) {
r = amdgpu_sa_bo_manager_init(adev, &adev->ib_pools[i],
- AMDGPU_IB_POOL_SIZE,
- AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_IB_POOL_SIZE, 256,
AMDGPU_GEM_DOMAIN_GTT);
if (r)
goto error;
@@ -357,7 +408,7 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
{
long tmo_gfx, tmo_mm;
int r, ret = 0;
- unsigned i;
+ unsigned int i;
tmo_mm = tmo_gfx = AMDGPU_IB_TEST_TIMEOUT;
if (amdgpu_sriov_vf(adev)) {
@@ -374,7 +425,7 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
/* for CP & SDMA engines since they are scheduled together so
* need to make the timeout width enough to cover the time
* cost waiting for it coming back under RUNTIME only
- */
+ */
tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT;
} else if (adev->gmc.xgmi.hive_id) {
tmo_gfx = AMDGPU_IB_TEST_GFX_XGMI_TIMEOUT;
@@ -390,6 +441,10 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
if (!ring->sched.ready || !ring->funcs->test_ib)
continue;
+ if (adev->enable_mes &&
+ ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ continue;
+
/* MM engine need more time */
if (ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
ring->funcs->type == AMDGPU_RING_TYPE_VCE ||
@@ -431,15 +486,15 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
static int amdgpu_debugfs_sa_info_show(struct seq_file *m, void *unused)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+ struct amdgpu_device *adev = m->private;
- seq_printf(m, "--------------------- DELAYED --------------------- \n");
+ seq_puts(m, "--------------------- DELAYED ---------------------\n");
amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DELAYED],
m);
- seq_printf(m, "-------------------- IMMEDIATE -------------------- \n");
+ seq_puts(m, "-------------------- IMMEDIATE --------------------\n");
amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_IMMEDIATE],
m);
- seq_printf(m, "--------------------- DIRECT ---------------------- \n");
+ seq_puts(m, "--------------------- DIRECT ----------------------\n");
amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DIRECT], m);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index b7fb72bff2c1..9cab36322c16 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -62,9 +62,8 @@ int amdgpu_pasid_alloc(unsigned int bits)
int pasid = -EINVAL;
for (bits = min(bits, 31U); bits > 0; bits--) {
- pasid = ida_simple_get(&amdgpu_pasid_ida,
- 1U << (bits - 1), 1U << bits,
- GFP_KERNEL);
+ pasid = ida_alloc_range(&amdgpu_pasid_ida, 1U << (bits - 1),
+ (1U << bits) - 1, GFP_KERNEL);
if (pasid != -ENOSPC)
break;
}
@@ -82,7 +81,7 @@ int amdgpu_pasid_alloc(unsigned int bits)
void amdgpu_pasid_free(u32 pasid)
{
trace_amdgpu_pasid_freed(pasid);
- ida_simple_remove(&amdgpu_pasid_ida, pasid);
+ ida_free(&amdgpu_pasid_ida, pasid);
}
static void amdgpu_pasid_free_cb(struct dma_fence *fence,
@@ -107,36 +106,19 @@ static void amdgpu_pasid_free_cb(struct dma_fence *fence,
void amdgpu_pasid_free_delayed(struct dma_resv *resv,
u32 pasid)
{
- struct dma_fence *fence, **fences;
struct amdgpu_pasid_cb *cb;
- unsigned count;
+ struct dma_fence *fence;
int r;
- r = dma_resv_get_fences(resv, NULL, &count, &fences);
+ r = dma_resv_get_singleton(resv, DMA_RESV_USAGE_BOOKKEEP, &fence);
if (r)
goto fallback;
- if (count == 0) {
+ if (!fence) {
amdgpu_pasid_free(pasid);
return;
}
- if (count == 1) {
- fence = fences[0];
- kfree(fences);
- } else {
- uint64_t context = dma_fence_context_alloc(1);
- struct dma_fence_array *array;
-
- array = dma_fence_array_create(count, fences, context,
- 1, false);
- if (!array) {
- kfree(fences);
- goto fallback;
- }
- fence = &array->base;
- }
-
cb = kmalloc(sizeof(*cb), GFP_KERNEL);
if (!cb) {
/* Last resort when we are OOM */
@@ -156,7 +138,8 @@ fallback:
/* Not enough memory for the delayed delete, as last resort
* block for all the fences to complete.
*/
- dma_resv_wait_timeout(resv, true, false, MAX_SCHEDULE_TIMEOUT);
+ dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP,
+ false, MAX_SCHEDULE_TIMEOUT);
amdgpu_pasid_free(pasid);
}
@@ -181,76 +164,71 @@ bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
atomic_read(&adev->gpu_reset_counter);
}
+/* Check if we need to switch to another set of resources */
+static bool amdgpu_vmid_gds_switch_needed(struct amdgpu_vmid *id,
+ struct amdgpu_job *job)
+{
+ return id->gds_base != job->gds_base ||
+ id->gds_size != job->gds_size ||
+ id->gws_base != job->gws_base ||
+ id->gws_size != job->gws_size ||
+ id->oa_base != job->oa_base ||
+ id->oa_size != job->oa_size;
+}
+
+/* Check if the id is compatible with the job */
+static bool amdgpu_vmid_compatible(struct amdgpu_vmid *id,
+ struct amdgpu_job *job)
+{
+ return id->pd_gpu_addr == job->vm_pd_addr &&
+ !amdgpu_vmid_gds_switch_needed(id, job);
+}
+
/**
* amdgpu_vmid_grab_idle - grab idle VMID
*
- * @vm: vm to allocate id for
* @ring: ring we want to submit job to
- * @sync: sync object where we add dependencies
* @idle: resulting idle VMID
+ * @fence: fence to wait for if no id could be grabbed
*
* Try to find an idle VMID, if none is idle add a fence to wait to the sync
* object. Returns -ENOMEM when we are out of memory.
*/
-static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
- struct amdgpu_ring *ring,
- struct amdgpu_sync *sync,
- struct amdgpu_vmid **idle)
+static int amdgpu_vmid_grab_idle(struct amdgpu_ring *ring,
+ struct amdgpu_vmid **idle,
+ struct dma_fence **fence)
{
struct amdgpu_device *adev = ring->adev;
- unsigned vmhub = ring->funcs->vmhub;
+ unsigned vmhub = ring->vm_hub;
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
- struct dma_fence **fences;
- unsigned i;
- int r;
-
- if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait))
- return amdgpu_sync_fence(sync, ring->vmid_wait);
- fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_KERNEL);
- if (!fences)
- return -ENOMEM;
+ /* If anybody is waiting for a VMID let everybody wait for fairness */
+ if (!dma_fence_is_signaled(ring->vmid_wait)) {
+ *fence = dma_fence_get(ring->vmid_wait);
+ return 0;
+ }
/* Check if we have an idle VMID */
- i = 0;
- list_for_each_entry((*idle), &id_mgr->ids_lru, list) {
+ list_for_each_entry_reverse((*idle), &id_mgr->ids_lru, list) {
/* Don't use per engine and per process VMID at the same time */
struct amdgpu_ring *r = adev->vm_manager.concurrent_flush ?
NULL : ring;
- fences[i] = amdgpu_sync_peek_fence(&(*idle)->active, r);
- if (!fences[i])
- break;
- ++i;
+ *fence = amdgpu_sync_peek_fence(&(*idle)->active, r);
+ if (!(*fence))
+ return 0;
}
- /* If we can't find a idle VMID to use, wait till one becomes available */
- if (&(*idle)->list == &id_mgr->ids_lru) {
- u64 fence_context = adev->vm_manager.fence_context + ring->idx;
- unsigned seqno = ++adev->vm_manager.seqno[ring->idx];
- struct dma_fence_array *array;
- unsigned j;
-
- *idle = NULL;
- for (j = 0; j < i; ++j)
- dma_fence_get(fences[j]);
-
- array = dma_fence_array_create(i, fences, fence_context,
- seqno, true);
- if (!array) {
- for (j = 0; j < i; ++j)
- dma_fence_put(fences[j]);
- kfree(fences);
- return -ENOMEM;
- }
-
- r = amdgpu_sync_fence(sync, &array->base);
- dma_fence_put(ring->vmid_wait);
- ring->vmid_wait = &array->base;
- return r;
- }
- kfree(fences);
+ /*
+ * If we can't find a idle VMID to use, wait on a fence from the least
+ * recently used in the hope that it will be available soon.
+ */
+ *idle = NULL;
+ dma_fence_put(ring->vmid_wait);
+ ring->vmid_wait = dma_fence_get(*fence);
+ /* This is the reference we return */
+ dma_fence_get(*fence);
return 0;
}
@@ -259,41 +237,41 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
*
* @vm: vm to allocate id for
* @ring: ring we want to submit job to
- * @sync: sync object where we add dependencies
- * @fence: fence protecting ID from reuse
* @job: job who wants to use the VMID
* @id: resulting VMID
+ * @fence: fence to wait for if no id could be grabbed
*
* Try to assign a reserved VMID.
*/
static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
struct amdgpu_ring *ring,
- struct amdgpu_sync *sync,
- struct dma_fence *fence,
struct amdgpu_job *job,
- struct amdgpu_vmid **id)
+ struct amdgpu_vmid **id,
+ struct dma_fence **fence)
{
struct amdgpu_device *adev = ring->adev;
- unsigned vmhub = ring->funcs->vmhub;
+ unsigned vmhub = ring->vm_hub;
uint64_t fence_context = adev->fence_context + ring->idx;
- struct dma_fence *updates = sync->last_vm_update;
bool needs_flush = vm->use_cpu_for_update;
- int r = 0;
+ uint64_t updates = amdgpu_vm_tlb_seq(vm);
+ int r;
*id = vm->reserved_vmid[vmhub];
- if (updates && (*id)->flushed_updates &&
- updates->context == (*id)->flushed_updates->context &&
- !dma_fence_is_later(updates, (*id)->flushed_updates))
- updates = NULL;
-
if ((*id)->owner != vm->immediate.fence_context ||
- job->vm_pd_addr != (*id)->pd_gpu_addr ||
- updates || !(*id)->last_flush ||
+ !amdgpu_vmid_compatible(*id, job) ||
+ (*id)->flushed_updates < updates ||
+ !(*id)->last_flush ||
((*id)->last_flush->context != fence_context &&
- !dma_fence_is_signaled((*id)->last_flush))) {
+ !dma_fence_is_signaled((*id)->last_flush)))
+ needs_flush = true;
+
+ if ((*id)->owner != vm->immediate.fence_context ||
+ (!adev->vm_manager.concurrent_flush && needs_flush)) {
struct dma_fence *tmp;
- /* Don't use per engine and per process VMID at the same time */
+ /* Don't use per engine and per process VMID at the
+ * same time
+ */
if (adev->vm_manager.concurrent_flush)
ring = NULL;
@@ -302,24 +280,21 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
if (tmp) {
*id = NULL;
- r = amdgpu_sync_fence(sync, tmp);
- return r;
+ *fence = dma_fence_get(tmp);
+ return 0;
}
- needs_flush = true;
}
/* Good we can use this VMID. Remember this submission as
* user of the VMID.
*/
- r = amdgpu_sync_fence(&(*id)->active, fence);
+ r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished,
+ GFP_ATOMIC);
if (r)
return r;
- if (updates) {
- dma_fence_put((*id)->flushed_updates);
- (*id)->flushed_updates = dma_fence_get(updates);
- }
job->vm_needs_flush = needs_flush;
+ job->spm_update_needed = true;
return 0;
}
@@ -328,8 +303,6 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
*
* @vm: vm to allocate id for
* @ring: ring we want to submit job to
- * @sync: sync object where we add dependencies
- * @fence: fence protecting ID from reuse
* @job: job who wants to use the VMID
* @id: resulting VMID
*
@@ -337,16 +310,14 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
*/
static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
struct amdgpu_ring *ring,
- struct amdgpu_sync *sync,
- struct dma_fence *fence,
struct amdgpu_job *job,
struct amdgpu_vmid **id)
{
struct amdgpu_device *adev = ring->adev;
- unsigned vmhub = ring->funcs->vmhub;
+ unsigned vmhub = ring->vm_hub;
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
uint64_t fence_context = adev->fence_context + ring->idx;
- struct dma_fence *updates = sync->last_vm_update;
+ uint64_t updates = amdgpu_vm_tlb_seq(vm);
int r;
job->vm_needs_flush = vm->use_cpu_for_update;
@@ -354,13 +325,12 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
/* Check if we can use a VMID already assigned to this VM */
list_for_each_entry_reverse((*id), &id_mgr->ids_lru, list) {
bool needs_flush = vm->use_cpu_for_update;
- struct dma_fence *flushed;
/* Check all the prerequisites to using this VMID */
if ((*id)->owner != vm->immediate.fence_context)
continue;
- if ((*id)->pd_gpu_addr != job->vm_pd_addr)
+ if (!amdgpu_vmid_compatible(*id, job))
continue;
if (!(*id)->last_flush ||
@@ -368,8 +338,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
!dma_fence_is_signaled((*id)->last_flush)))
needs_flush = true;
- flushed = (*id)->flushed_updates;
- if (updates && (!flushed || dma_fence_is_later(updates, flushed)))
+ if ((*id)->flushed_updates < updates)
needs_flush = true;
if (needs_flush && !adev->vm_manager.concurrent_flush)
@@ -378,15 +347,12 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
/* Good, we can use this VMID. Remember this submission as
* user of the VMID.
*/
- r = amdgpu_sync_fence(&(*id)->active, fence);
+ r = amdgpu_sync_fence(&(*id)->active,
+ &job->base.s_fence->finished,
+ GFP_ATOMIC);
if (r)
return r;
- if (updates && (!flushed || dma_fence_is_later(updates, flushed))) {
- dma_fence_put((*id)->flushed_updates);
- (*id)->flushed_updates = dma_fence_get(updates);
- }
-
job->vm_needs_flush |= needs_flush;
return 0;
}
@@ -400,65 +366,70 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
*
* @vm: vm to allocate id for
* @ring: ring we want to submit job to
- * @sync: sync object where we add dependencies
- * @fence: fence protecting ID from reuse
* @job: job who wants to use the VMID
+ * @fence: fence to wait for if no id could be grabbed
*
* Allocate an id for the vm, adding fences to the sync obj as necessary.
*/
int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
- struct amdgpu_sync *sync, struct dma_fence *fence,
- struct amdgpu_job *job)
+ struct amdgpu_job *job, struct dma_fence **fence)
{
struct amdgpu_device *adev = ring->adev;
- unsigned vmhub = ring->funcs->vmhub;
+ unsigned vmhub = ring->vm_hub;
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
struct amdgpu_vmid *idle = NULL;
struct amdgpu_vmid *id = NULL;
int r = 0;
mutex_lock(&id_mgr->lock);
- r = amdgpu_vmid_grab_idle(vm, ring, sync, &idle);
+ r = amdgpu_vmid_grab_idle(ring, &idle, fence);
if (r || !idle)
goto error;
- if (vm->reserved_vmid[vmhub]) {
- r = amdgpu_vmid_grab_reserved(vm, ring, sync, fence, job, &id);
+ if (amdgpu_vmid_uses_reserved(vm, vmhub)) {
+ r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence);
if (r || !id)
goto error;
} else {
- r = amdgpu_vmid_grab_used(vm, ring, sync, fence, job, &id);
+ r = amdgpu_vmid_grab_used(vm, ring, job, &id);
if (r)
goto error;
if (!id) {
- struct dma_fence *updates = sync->last_vm_update;
-
/* Still no ID to use? Then use the idle one found earlier */
id = idle;
/* Remember this submission as user of the VMID */
- r = amdgpu_sync_fence(&id->active, fence);
+ r = amdgpu_sync_fence(&id->active,
+ &job->base.s_fence->finished,
+ GFP_ATOMIC);
if (r)
goto error;
- dma_fence_put(id->flushed_updates);
- id->flushed_updates = dma_fence_get(updates);
job->vm_needs_flush = true;
}
list_move_tail(&id->list, &id_mgr->ids_lru);
}
- id->pd_gpu_addr = job->vm_pd_addr;
- id->owner = vm->immediate.fence_context;
-
+ job->gds_switch_needed = amdgpu_vmid_gds_switch_needed(id, job);
if (job->vm_needs_flush) {
+ id->flushed_updates = amdgpu_vm_tlb_seq(vm);
dma_fence_put(id->last_flush);
id->last_flush = NULL;
}
job->vmid = id - id_mgr->ids;
job->pasid = vm->pasid;
+
+ id->gds_base = job->gds_base;
+ id->gds_size = job->gds_size;
+ id->gws_base = job->gws_base;
+ id->gws_size = job->gws_size;
+ id->oa_base = job->oa_base;
+ id->oa_size = job->oa_size;
+ id->pd_gpu_addr = job->vm_pd_addr;
+ id->owner = vm->immediate.fence_context;
+
trace_amdgpu_vm_grab_id(vm, ring, job);
error:
@@ -466,29 +437,47 @@ error:
return r;
}
-int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
+/*
+ * amdgpu_vmid_uses_reserved - check if a VM will use a reserved VMID
+ * @vm: the VM to check
+ * @vmhub: the VMHUB which will be used
+ *
+ * Returns: True if the VM will use a reserved VMID.
+ */
+bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub)
+{
+ return vm->reserved_vmid[vmhub];
+}
+
+/*
+ * amdgpu_vmid_alloc_reserved - reserve a specific VMID for this vm
+ * @adev: amdgpu device structure
+ * @vm: the VM to reserve an ID for
+ * @vmhub: the VMHUB which should be used
+ *
+ * Mostly used to have a reserved VMID for debugging and SPM.
+ *
+ * Returns: 0 for success, -ENOENT if an ID is already reserved.
+ */
+int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned vmhub)
{
- struct amdgpu_vmid_mgr *id_mgr;
- struct amdgpu_vmid *idle;
+ struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+ struct amdgpu_vmid *id;
int r = 0;
- id_mgr = &adev->vm_manager.id_mgr[vmhub];
mutex_lock(&id_mgr->lock);
if (vm->reserved_vmid[vmhub])
goto unlock;
- if (atomic_inc_return(&id_mgr->reserved_vmid_num) >
- AMDGPU_VM_MAX_RESERVED_VMID) {
- DRM_ERROR("Over limitation of reserved vmid\n");
- atomic_dec(&id_mgr->reserved_vmid_num);
- r = -EINVAL;
+ if (id_mgr->reserved_vmid) {
+ r = -ENOENT;
goto unlock;
}
- /* Select the first entry VMID */
- idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, list);
- list_del_init(&idle->list);
- vm->reserved_vmid[vmhub] = idle;
+ /* Remove from normal round robin handling */
+ id = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, list);
+ list_del_init(&id->list);
+ vm->reserved_vmid[vmhub] = id;
+ id_mgr->reserved_vmid = true;
mutex_unlock(&id_mgr->lock);
return 0;
@@ -497,8 +486,13 @@ unlock:
return r;
}
-void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
+/*
+ * amdgpu_vmid_free_reserved - free up a reserved VMID again
+ * @adev: amdgpu device structure
+ * @vm: the VM with the reserved ID
+ * @vmhub: the VMHUB which should be used
+ */
+void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned vmhub)
{
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
@@ -508,7 +502,7 @@ void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
list_add(&vm->reserved_vmid[vmhub]->list,
&id_mgr->ids_lru);
vm->reserved_vmid[vmhub] = NULL;
- atomic_dec(&id_mgr->reserved_vmid_num);
+ id_mgr->reserved_vmid = false;
}
mutex_unlock(&id_mgr->lock);
}
@@ -576,10 +570,17 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
mutex_init(&id_mgr->lock);
INIT_LIST_HEAD(&id_mgr->ids_lru);
- atomic_set(&id_mgr->reserved_vmid_num, 0);
- /* manage only VMIDs not used by KFD */
- id_mgr->num_ids = adev->vm_manager.first_kfd_vmid;
+ /* for GC <10, SDMA uses MMHUB so use first_kfd_vmid for both GC and MM */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0))
+ /* manage only VMIDs not used by KFD */
+ id_mgr->num_ids = adev->vm_manager.first_kfd_vmid;
+ else if (AMDGPU_IS_MMHUB0(i) ||
+ AMDGPU_IS_MMHUB1(i))
+ id_mgr->num_ids = 16;
+ else
+ /* manage only VMIDs not used by KFD */
+ id_mgr->num_ids = adev->vm_manager.first_kfd_vmid;
/* skip over VMID 0, since it is the system VM */
for (j = 1; j < id_mgr->num_ids; ++j) {
@@ -610,7 +611,6 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev)
struct amdgpu_vmid *id = &id_mgr->ids[j];
amdgpu_sync_free(&id->active);
- dma_fence_put(id->flushed_updates);
dma_fence_put(id->last_flush);
dma_fence_put(id->pasid_mapping);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index 0c3b4fa1f936..b3649cd3af56 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -47,7 +47,7 @@ struct amdgpu_vmid {
uint64_t pd_gpu_addr;
/* last flushed PD/PT update */
- struct dma_fence *flushed_updates;
+ uint64_t flushed_updates;
uint32_t current_gpu_reset_count;
@@ -67,7 +67,7 @@ struct amdgpu_vmid_mgr {
unsigned num_ids;
struct list_head ids_lru;
struct amdgpu_vmid ids[AMDGPU_NUM_VMID];
- atomic_t reserved_vmid_num;
+ bool reserved_vmid;
};
int amdgpu_pasid_alloc(unsigned int bits);
@@ -77,15 +77,13 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,
bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
struct amdgpu_vmid *id);
-int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
+bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub);
+int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned vmhub);
-void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
+void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned vmhub);
int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
- struct amdgpu_sync *sync, struct dma_fence *fence,
- struct amdgpu_job *job);
+ struct amdgpu_job *job, struct dma_fence **fence);
void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
unsigned vmid);
void amdgpu_vmid_reset_all(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index 0c7963dfacad..a6419246e9c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -25,6 +25,7 @@
#include "amdgpu.h"
#include "amdgpu_ih.h"
+#include "amdgpu_reset.h"
/**
* amdgpu_ih_ring_init - initialize the IH state
@@ -138,6 +139,7 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
/**
* amdgpu_ih_ring_write - write IV to the ring buffer
*
+ * @adev: amdgpu_device pointer
* @ih: ih ring to write to
* @iv: the iv to write
* @num_dw: size of the iv in dw
@@ -145,8 +147,8 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
* Writes an IV to the ring buffer using the CPU and increment the wptr.
* Used for testing and delegating IVs to a software ring.
*/
-void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
- unsigned int num_dw)
+void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
+ const uint32_t *iv, unsigned int num_dw)
{
uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2;
unsigned int i;
@@ -161,55 +163,38 @@ void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
if (wptr != READ_ONCE(ih->rptr)) {
wmb();
WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr));
+ } else if (adev->irq.retry_cam_enabled) {
+ dev_warn_once(adev->dev, "IH soft ring buffer overflow 0x%X, 0x%X\n",
+ wptr, ih->rptr);
}
}
-/* Waiter helper that checks current rptr matches or passes checkpoint wptr */
-static bool amdgpu_ih_has_checkpoint_processed(struct amdgpu_device *adev,
- struct amdgpu_ih_ring *ih,
- uint32_t checkpoint_wptr,
- uint32_t *prev_rptr)
-{
- uint32_t cur_rptr = ih->rptr | (*prev_rptr & ~ih->ptr_mask);
-
- /* rptr has wrapped. */
- if (cur_rptr < *prev_rptr)
- cur_rptr += ih->ptr_mask + 1;
- *prev_rptr = cur_rptr;
-
- /* check ring is empty to workaround missing wptr overflow flag */
- return cur_rptr >= checkpoint_wptr ||
- (cur_rptr & ih->ptr_mask) == amdgpu_ih_get_wptr(adev, ih);
-}
-
/**
- * amdgpu_ih_wait_on_checkpoint_process - wait to process IVs up to checkpoint
+ * amdgpu_ih_wait_on_checkpoint_process_ts - wait to process IVs up to checkpoint
*
* @adev: amdgpu_device pointer
* @ih: ih ring to process
*
* Used to ensure ring has processed IVs up to the checkpoint write pointer.
*/
-int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev,
+int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
struct amdgpu_ih_ring *ih)
{
- uint32_t checkpoint_wptr, rptr;
+ uint32_t checkpoint_wptr;
+ uint64_t checkpoint_ts;
+ long timeout = HZ;
if (!ih->enabled || adev->shutdown)
return -ENODEV;
checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
- /* Order wptr with rptr. */
+ /* Order wptr with ring data. */
rmb();
- rptr = READ_ONCE(ih->rptr);
-
- /* wptr has wrapped. */
- if (rptr > checkpoint_wptr)
- checkpoint_wptr += ih->ptr_mask + 1;
+ checkpoint_ts = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1);
- return wait_event_interruptible(ih->wait_process,
- amdgpu_ih_has_checkpoint_processed(adev, ih,
- checkpoint_wptr, &rptr));
+ return wait_event_interruptible_timeout(ih->wait_process,
+ amdgpu_ih_ts_after(checkpoint_ts, ih->processed_timestamp) ||
+ ih->rptr == amdgpu_ih_get_wptr(adev, ih), timeout);
}
/**
@@ -233,7 +218,7 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
restart_ih:
count = AMDGPU_IH_MAX_NUM_IVS;
- DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
+ dev_dbg(adev->dev, "%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
/* Order reading of wptr vs. reading of IH ring data */
rmb();
@@ -243,13 +228,23 @@ restart_ih:
ih->rptr &= ih->ptr_mask;
}
- amdgpu_ih_set_rptr(adev, ih);
+ if (!ih->overflow)
+ amdgpu_ih_set_rptr(adev, ih);
+
wake_up_all(&ih->wait_process);
/* make sure wptr hasn't changed while processing */
wptr = amdgpu_ih_get_wptr(adev, ih);
if (wptr != ih->rptr)
- goto restart_ih;
+ if (!ih->overflow)
+ goto restart_ih;
+
+ if (ih->overflow)
+ if (amdgpu_sriov_runtime(adev))
+ WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __func__);
return IRQ_HANDLED;
}
@@ -262,7 +257,7 @@ restart_ih:
* @entry: IV entry
*
* Decodes the interrupt vector at the current rptr
- * position and also advance the position for for Vega10
+ * position and also advance the position for Vega10
* and later GPUs.
*/
void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
@@ -290,7 +285,7 @@ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32);
entry->timestamp_src = dw[2] >> 31;
entry->pasid = dw[3] & 0xffff;
- entry->pasid_src = dw[3] >> 31;
+ entry->node_id = (dw[3] >> 16) & 0xff;
entry->src_data[0] = dw[4];
entry->src_data[1] = dw[5];
entry->src_data[2] = dw[6];
@@ -299,3 +294,24 @@ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
/* wptr/rptr are in bytes! */
ih->rptr += 32;
}
+
+uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
+ signed int offset)
+{
+ uint32_t iv_size = 32;
+ uint32_t ring_index;
+ uint32_t dw1, dw2;
+
+ rptr += iv_size * offset;
+ ring_index = (rptr & ih->ptr_mask) >> 2;
+
+ dw1 = le32_to_cpu(ih->ring[ring_index + 1]);
+ dw2 = le32_to_cpu(ih->ring[ring_index + 2]);
+ return dw1 | ((u64)(dw2 & 0xffff) << 32);
+}
+
+const char *amdgpu_ih_ring_name(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
+{
+ return ih == &adev->irq.ih ? "ih" : ih == &adev->irq.ih_soft ? "sw ih" :
+ ih == &adev->irq.ih1 ? "ih1" : ih == &adev->irq.ih2 ? "ih2" : "unknown";
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
index 0649b59830a5..f58b6be7fccc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -27,6 +27,9 @@
/* Maximum number of IVs processed at once */
#define AMDGPU_IH_MAX_NUM_IVS 32
+#define IH_RING_SIZE (256 * 1024)
+#define IH_SW_RING_SIZE (16 * 1024) /* enough for 512 CAM entries */
+
struct amdgpu_device;
struct amdgpu_iv_entry;
@@ -53,14 +56,14 @@ struct amdgpu_ih_ring {
bool use_bus_addr;
struct amdgpu_bo *ring_obj;
- volatile uint32_t *ring;
+ uint32_t *ring;
uint64_t gpu_addr;
uint64_t wptr_addr;
- volatile uint32_t *wptr_cpu;
+ uint32_t *wptr_cpu;
uint64_t rptr_addr;
- volatile uint32_t *rptr_cpu;
+ uint32_t *rptr_cpu;
bool enabled;
unsigned rptr;
@@ -68,31 +71,48 @@ struct amdgpu_ih_ring {
/* For waiting on IH processing at checkpoint. */
wait_queue_head_t wait_process;
+ uint64_t processed_timestamp;
+ bool overflow;
};
+/* return true if time stamp t2 is after t1 with 48bit wrap around */
+#define amdgpu_ih_ts_after(t1, t2) \
+ (((int64_t)((t2) << 16) - (int64_t)((t1) << 16)) > 0LL)
+
+#define amdgpu_ih_ts_after_or_equal(t1, t2) \
+ (((int64_t)((t2) << 16) - (int64_t)((t1) << 16)) >= 0LL)
+
/* provided by the ih block */
struct amdgpu_ih_funcs {
/* ring read/write ptr handling, called from interrupt context */
u32 (*get_wptr)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
void (*decode_iv)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
struct amdgpu_iv_entry *entry);
+ uint64_t (*decode_iv_ts)(struct amdgpu_ih_ring *ih, u32 rptr,
+ signed int offset);
void (*set_rptr)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
};
#define amdgpu_ih_get_wptr(adev, ih) (adev)->irq.ih_funcs->get_wptr((adev), (ih))
#define amdgpu_ih_decode_iv(adev, iv) \
(adev)->irq.ih_funcs->decode_iv((adev), (ih), (iv))
+#define amdgpu_ih_decode_iv_ts(adev, ih, rptr, offset) \
+ (WARN_ON_ONCE(!(adev)->irq.ih_funcs->decode_iv_ts) ? 0 : \
+ (adev)->irq.ih_funcs->decode_iv_ts((ih), (rptr), (offset)))
#define amdgpu_ih_set_rptr(adev, ih) (adev)->irq.ih_funcs->set_rptr((adev), (ih))
int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
unsigned ring_size, bool use_bus_addr);
void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
-void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
- unsigned int num_dw);
-int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev,
- struct amdgpu_ih_ring *ih);
+void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
+ const uint32_t *iv, unsigned int num_dw);
+int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih);
int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
struct amdgpu_ih_ring *ih,
struct amdgpu_iv_entry *entry);
+uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
+ signed int offset);
+const char *amdgpu_ih_ring_name(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h
new file mode 100644
index 000000000000..484e936812e4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_IMU_H__
+#define __AMDGPU_IMU_H__
+
+enum imu_work_mode {
+ DEBUG_MODE,
+ MISSION_MODE
+};
+
+struct amdgpu_imu_funcs {
+ int (*init_microcode)(struct amdgpu_device *adev);
+ int (*load_microcode)(struct amdgpu_device *adev);
+ void (*setup_imu)(struct amdgpu_device *adev);
+ int (*start_imu)(struct amdgpu_device *adev);
+ void (*program_rlc_ram)(struct amdgpu_device *adev);
+ int (*wait_for_reset_status)(struct amdgpu_device *adev);
+};
+
+struct imu_rlc_ram_golden {
+ u32 hwip;
+ u32 instance;
+ u32 segment;
+ u32 reg;
+ u32 data;
+ u32 addr_mask;
+};
+
+#define IMU_RLC_RAM_GOLDEN_VALUE(ip, inst, reg, data, addr_mask) \
+ { ip##_HWIP, inst, reg##_BASE_IDX, reg, data, addr_mask }
+
+struct amdgpu_imu {
+ const struct amdgpu_imu_funcs *funcs;
+ enum imu_work_mode mode;
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
index 5cf142e849bb..a1cbd7c3deb2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
@@ -1,4 +1,4 @@
-/**
+/*
* \file amdgpu_ioc32.c
*
* 32-bit ioctl compatibility routines for the AMDGPU DRM.
@@ -37,12 +37,9 @@
long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
unsigned int nr = DRM_IOCTL_NR(cmd);
- int ret;
if (nr < DRM_COMMAND_BASE)
return drm_compat_ioctl(filp, cmd, arg);
- ret = amdgpu_drm_ioctl(filp, cmd, arg);
-
- return ret;
+ return amdgpu_drm_ioctl(filp, cmd, arg);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c
new file mode 100644
index 000000000000..99e1cf4fc955
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_ip.h"
+
+static int8_t amdgpu_logical_to_dev_inst(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type block,
+ int8_t inst)
+{
+ int8_t dev_inst;
+
+ switch (block) {
+ case GC_HWIP:
+ case SDMA0_HWIP:
+ /* Both JPEG and VCN as JPEG is only alias of VCN */
+ case VCN_HWIP:
+ dev_inst = adev->ip_map.dev_inst[block][inst];
+ break;
+ default:
+ /* For rest of the IPs, no look up required.
+ * Assume 'logical instance == physical instance' for all configs. */
+ dev_inst = inst;
+ break;
+ }
+
+ return dev_inst;
+}
+
+static uint32_t amdgpu_logical_to_dev_mask(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type block,
+ uint32_t mask)
+{
+ uint32_t dev_mask = 0;
+ int8_t log_inst, dev_inst;
+
+ while (mask) {
+ log_inst = ffs(mask) - 1;
+ dev_inst = amdgpu_logical_to_dev_inst(adev, block, log_inst);
+ dev_mask |= (1 << dev_inst);
+ mask &= ~(1 << log_inst);
+ }
+
+ return dev_mask;
+}
+
+static void amdgpu_populate_ip_map(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type ip_block,
+ uint32_t inst_mask)
+{
+ int l = 0, i;
+
+ while (inst_mask) {
+ i = ffs(inst_mask) - 1;
+ adev->ip_map.dev_inst[ip_block][l++] = i;
+ inst_mask &= ~(1 << i);
+ }
+ for (; l < HWIP_MAX_INSTANCE; l++)
+ adev->ip_map.dev_inst[ip_block][l] = -1;
+}
+
+void amdgpu_ip_map_init(struct amdgpu_device *adev)
+{
+ u32 ip_map[][2] = {
+ { GC_HWIP, adev->gfx.xcc_mask },
+ { SDMA0_HWIP, adev->sdma.sdma_mask },
+ { VCN_HWIP, adev->vcn.inst_mask },
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ip_map); ++i)
+ amdgpu_populate_ip_map(adev, ip_map[i][0], ip_map[i][1]);
+
+ adev->ip_map.logical_to_dev_inst = amdgpu_logical_to_dev_inst;
+ adev->ip_map.logical_to_dev_mask = amdgpu_logical_to_dev_mask;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h
new file mode 100644
index 000000000000..2490fd322aec
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_IP_H__
+#define __AMDGPU_IP_H__
+
+void amdgpu_ip_map_init(struct amdgpu_device *adev);
+
+#endif /* __AMDGPU_IP_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index cc2e0c9cfe0a..8112ffc85995 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -45,7 +45,6 @@
#include <linux/irq.h>
#include <linux/pci.h>
-#include <drm/drm_crtc_helper.h>
#include <drm/drm_vblank.h>
#include <drm/amdgpu_drm.h>
#include <drm/drm_drv.h>
@@ -100,40 +99,20 @@ const char *soc15_ih_clientid_name[] = {
"MP1"
};
-/**
- * amdgpu_hotplug_work_func - work handler for display hotplug event
- *
- * @work: work struct pointer
- *
- * This is the hotplug event work handler (all ASICs).
- * The work gets scheduled from the IRQ handler if there
- * was a hotplug interrupt. It walks through the connector table
- * and calls hotplug handler for each connector. After this, it sends
- * a DRM hotplug event to alert userspace.
- *
- * This design approach is required in order to defer hotplug event handling
- * from the IRQ handler to a work handler because hotplug handler has to use
- * mutexes which cannot be locked in an IRQ handler (since &mutex_lock may
- * sleep).
- */
-static void amdgpu_hotplug_work_func(struct work_struct *work)
-{
- struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
- hotplug_work);
- struct drm_device *dev = adev_to_drm(adev);
- struct drm_mode_config *mode_config = &dev->mode_config;
- struct drm_connector *connector;
- struct drm_connector_list_iter iter;
-
- mutex_lock(&mode_config->mutex);
- drm_connector_list_iter_begin(dev, &iter);
- drm_for_each_connector_iter(connector, &iter)
- amdgpu_connector_hotplug(connector);
- drm_connector_list_iter_end(&iter);
- mutex_unlock(&mode_config->mutex);
- /* Just fire off a uevent and let userspace tell us what to do */
- drm_helper_hpd_irq_event(dev);
-}
+const int node_id_to_phys_map[NODEID_MAX] = {
+ [AID0_NODEID] = 0,
+ [XCD0_NODEID] = 0,
+ [XCD1_NODEID] = 1,
+ [AID1_NODEID] = 1,
+ [XCD2_NODEID] = 2,
+ [XCD3_NODEID] = 3,
+ [AID2_NODEID] = 2,
+ [XCD4_NODEID] = 4,
+ [XCD5_NODEID] = 5,
+ [AID3_NODEID] = 3,
+ [XCD6_NODEID] = 6,
+ [XCD7_NODEID] = 7,
+};
/**
* amdgpu_irq_disable_all - disable *all* interrupts
@@ -145,7 +124,7 @@ static void amdgpu_hotplug_work_func(struct work_struct *work)
void amdgpu_irq_disable_all(struct amdgpu_device *adev)
{
unsigned long irqflags;
- unsigned i, j, k;
+ unsigned int i, j, k;
int r;
spin_lock_irqsave(&adev->irq.lock, irqflags);
@@ -160,12 +139,12 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev)
continue;
for (k = 0; k < src->num_types; ++k) {
- atomic_set(&src->enabled_types[k], 0);
r = src->funcs->set(adev, src, k,
AMDGPU_IRQ_STATE_DISABLE);
if (r)
- DRM_ERROR("error disabling interrupt (%d)\n",
- r);
+ dev_err(adev->dev,
+ "error disabling interrupt (%d)\n",
+ r);
}
}
}
@@ -193,20 +172,7 @@ static irqreturn_t amdgpu_irq_handler(int irq, void *arg)
if (ret == IRQ_HANDLED)
pm_runtime_mark_last_busy(dev->dev);
- /* For the hardware that cannot enable bif ring for both ras_controller_irq
- * and ras_err_evnet_athub_irq ih cookies, the driver has to poll status
- * register to check whether the interrupt is triggered or not, and properly
- * ack the interrupt if it is there
- */
- if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF)) {
- if (adev->nbio.ras_funcs &&
- adev->nbio.ras_funcs->handle_ras_controller_intr_no_bifring)
- adev->nbio.ras_funcs->handle_ras_controller_intr_no_bifring(adev);
-
- if (adev->nbio.ras_funcs &&
- adev->nbio.ras_funcs->handle_ras_err_event_athub_intr_no_bifring)
- adev->nbio.ras_funcs->handle_ras_err_event_athub_intr_no_bifring(adev);
- }
+ amdgpu_ras_interrupt_fatal_error_handler(adev);
return ret;
}
@@ -277,7 +243,7 @@ static bool amdgpu_msi_ok(struct amdgpu_device *adev)
return true;
}
-static void amdgpu_restore_msix(struct amdgpu_device *adev)
+void amdgpu_restore_msix(struct amdgpu_device *adev)
{
u16 ctrl;
@@ -305,44 +271,29 @@ static void amdgpu_restore_msix(struct amdgpu_device *adev)
*/
int amdgpu_irq_init(struct amdgpu_device *adev)
{
- int r = 0;
- unsigned int irq;
+ unsigned int irq, flags;
+ int r;
spin_lock_init(&adev->irq.lock);
/* Enable MSI if not disabled by module parameter */
adev->irq.msi_enabled = false;
- if (amdgpu_msi_ok(adev)) {
- int nvec = pci_msix_vec_count(adev->pdev);
- unsigned int flags;
+ if (!amdgpu_msi_ok(adev))
+ flags = PCI_IRQ_INTX;
+ else
+ flags = PCI_IRQ_ALL_TYPES;
- if (nvec <= 0) {
- flags = PCI_IRQ_MSI;
- } else {
- flags = PCI_IRQ_MSI | PCI_IRQ_MSIX;
- }
- /* we only need one vector */
- nvec = pci_alloc_irq_vectors(adev->pdev, 1, 1, flags);
- if (nvec > 0) {
- adev->irq.msi_enabled = true;
- dev_dbg(adev->dev, "using MSI/MSI-X.\n");
- }
+ /* we only need one vector */
+ r = pci_alloc_irq_vectors(adev->pdev, 1, 1, flags);
+ if (r < 0) {
+ dev_err(adev->dev, "Failed to alloc msi vectors\n");
+ return r;
}
- if (!amdgpu_device_has_dc_support(adev)) {
- if (!adev->enable_virtual_display)
- /* Disable vblank IRQs aggressively for power-saving */
- /* XXX: can this be enabled for DC? */
- adev_to_drm(adev)->vblank_disable_immediate = true;
-
- r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
- if (r)
- return r;
-
- /* Pre-DCE11 */
- INIT_WORK(&adev->hotplug_work,
- amdgpu_hotplug_work_func);
+ if (amdgpu_msi_ok(adev)) {
+ adev->irq.msi_enabled = true;
+ dev_dbg(adev->dev, "using MSI/MSI-X.\n");
}
INIT_WORK(&adev->irq.ih1_work, amdgpu_irq_handle_ih1);
@@ -352,25 +303,29 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
/* Use vector 0 for MSI-X. */
r = pci_irq_vector(adev->pdev, 0);
if (r < 0)
- return r;
+ goto free_vectors;
irq = r;
/* PCI devices require shared interrupts. */
r = request_irq(irq, amdgpu_irq_handler, IRQF_SHARED, adev_to_drm(adev)->driver->name,
adev_to_drm(adev));
- if (r) {
- if (!amdgpu_device_has_dc_support(adev))
- flush_work(&adev->hotplug_work);
- return r;
- }
+ if (r)
+ goto free_vectors;
+
adev->irq.installed = true;
adev->irq.irq = irq;
adev_to_drm(adev)->max_vblank_count = 0x00ffffff;
- DRM_DEBUG("amdgpu: irq initialized.\n");
+ dev_dbg(adev->dev, "amdgpu: irq initialized.\n");
return 0;
-}
+free_vectors:
+ if (adev->irq.msi_enabled)
+ pci_free_irq_vectors(adev->pdev);
+
+ adev->irq.msi_enabled = false;
+ return r;
+}
void amdgpu_irq_fini_hw(struct amdgpu_device *adev)
{
@@ -379,9 +334,6 @@ void amdgpu_irq_fini_hw(struct amdgpu_device *adev)
adev->irq.installed = false;
if (adev->irq.msi_enabled)
pci_free_irq_vectors(adev->pdev);
-
- if (!amdgpu_device_has_dc_support(adev))
- flush_work(&adev->hotplug_work);
}
amdgpu_ih_ring_fini(adev, &adev->irq.ih_soft);
@@ -391,7 +343,7 @@ void amdgpu_irq_fini_hw(struct amdgpu_device *adev)
}
/**
- * amdgpu_irq_fini - shut down interrupt handling
+ * amdgpu_irq_fini_sw - shut down interrupt handling
*
* @adev: amdgpu device pointer
*
@@ -401,7 +353,7 @@ void amdgpu_irq_fini_hw(struct amdgpu_device *adev)
*/
void amdgpu_irq_fini_sw(struct amdgpu_device *adev)
{
- unsigned i, j;
+ unsigned int i, j;
for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
if (!adev->irq.client[i].sources)
@@ -435,7 +387,7 @@ void amdgpu_irq_fini_sw(struct amdgpu_device *adev)
* 0 on success or error code otherwise
*/
int amdgpu_irq_add_id(struct amdgpu_device *adev,
- unsigned client_id, unsigned src_id,
+ unsigned int client_id, unsigned int src_id,
struct amdgpu_irq_src *source)
{
if (client_id >= AMDGPU_IRQ_CLIENTID_MAX)
@@ -487,13 +439,21 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
{
u32 ring_index = ih->rptr >> 2;
struct amdgpu_iv_entry entry;
- unsigned client_id, src_id;
+ unsigned int client_id, src_id;
struct amdgpu_irq_src *src;
bool handled = false;
int r;
entry.ih = ih;
entry.iv_entry = (const uint32_t *)&ih->ring[ring_index];
+
+ /*
+ * timestamp is not supported on some legacy SOCs (cik, cz, iceland,
+ * si and tonga), so initialize timestamp and timestamp_src to 0
+ */
+ entry.timestamp = 0;
+ entry.timestamp_src = 0;
+
amdgpu_ih_decode_iv(adev, &entry);
trace_amdgpu_iv(ih - &adev->irq.ih, &entry);
@@ -502,33 +462,41 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
src_id = entry.src_id;
if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) {
- DRM_DEBUG("Invalid client_id in IV: %d\n", client_id);
+ dev_dbg(adev->dev, "Invalid client_id in IV: %d\n", client_id);
} else if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) {
- DRM_DEBUG("Invalid src_id in IV: %d\n", src_id);
+ dev_dbg(adev->dev, "Invalid src_id in IV: %d\n", src_id);
- } else if ((client_id == AMDGPU_IRQ_CLIENTID_LEGACY) &&
+ } else if (((client_id == AMDGPU_IRQ_CLIENTID_LEGACY) ||
+ (client_id == SOC15_IH_CLIENTID_ISP)) &&
adev->irq.virq[src_id]) {
generic_handle_domain_irq(adev->irq.domain, src_id);
} else if (!adev->irq.client[client_id].sources) {
- DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n",
- client_id, src_id);
+ dev_dbg(adev->dev,
+ "Unregistered interrupt client_id: %d src_id: %d\n",
+ client_id, src_id);
} else if ((src = adev->irq.client[client_id].sources[src_id])) {
r = src->funcs->process(adev, src, &entry);
if (r < 0)
- DRM_ERROR("error processing interrupt (%d)\n", r);
+ dev_err(adev->dev, "error processing interrupt (%d)\n",
+ r);
else if (r)
handled = true;
} else {
- DRM_DEBUG("Unhandled interrupt src_id: %d\n", src_id);
+ dev_dbg(adev->dev,
+ "Unregistered interrupt src_id: %d of client_id:%d\n",
+ src_id, client_id);
}
/* Send it to amdkfd as well if it isn't already handled */
if (!handled)
amdgpu_amdkfd_interrupt(adev, entry.iv_entry);
+
+ if (amdgpu_ih_ts_after(ih->processed_timestamp, entry.timestamp))
+ ih->processed_timestamp = entry.timestamp;
}
/**
@@ -545,7 +513,7 @@ void amdgpu_irq_delegate(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry,
unsigned int num_dw)
{
- amdgpu_ih_ring_write(&adev->irq.ih_soft, entry->iv_entry, num_dw);
+ amdgpu_ih_ring_write(adev, &adev->irq.ih_soft, entry->iv_entry, num_dw);
schedule_work(&adev->irq.ih_soft_work);
}
@@ -559,7 +527,7 @@ void amdgpu_irq_delegate(struct amdgpu_device *adev,
* Updates interrupt state for the specific source (all ASICs).
*/
int amdgpu_irq_update(struct amdgpu_device *adev,
- struct amdgpu_irq_src *src, unsigned type)
+ struct amdgpu_irq_src *src, unsigned int type)
{
unsigned long irqflags;
enum amdgpu_interrupt_state state;
@@ -568,7 +536,8 @@ int amdgpu_irq_update(struct amdgpu_device *adev,
spin_lock_irqsave(&adev->irq.lock, irqflags);
/* We need to determine after taking the lock, otherwise
- we might disable just enabled interrupts again */
+ * we might disable just enabled interrupts again
+ */
if (amdgpu_irq_enabled(adev, src, type))
state = AMDGPU_IRQ_STATE_ENABLE;
else
@@ -622,7 +591,7 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev)
* 0 on success or error code otherwise
*/
int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
- unsigned type)
+ unsigned int type)
{
if (!adev->irq.installed)
return -ENOENT;
@@ -652,8 +621,12 @@ int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
* 0 on success or error code otherwise
*/
int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
- unsigned type)
+ unsigned int type)
{
+ /* When the threshold is reached,the interrupt source may not be enabled.return -EINVAL */
+ if (amdgpu_ras_is_rma(adev) && !amdgpu_irq_enabled(adev, src, type))
+ return -EINVAL;
+
if (!adev->irq.installed)
return -ENOENT;
@@ -663,6 +636,9 @@ int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
if (!src->enabled_types || !src->funcs->set)
return -EINVAL;
+ if (WARN_ON(!amdgpu_irq_enabled(adev, src, type)))
+ return -EINVAL;
+
if (atomic_dec_and_test(&src->enabled_types[type]))
return amdgpu_irq_update(adev, src, type);
@@ -683,7 +659,7 @@ int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
* invalid parameters
*/
bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
- unsigned type)
+ unsigned int type)
{
if (!adev->irq.installed)
return false;
@@ -757,10 +733,10 @@ static const struct irq_domain_ops amdgpu_hw_irqdomain_ops = {
*/
int amdgpu_irq_add_domain(struct amdgpu_device *adev)
{
- adev->irq.domain = irq_domain_add_linear(NULL, AMDGPU_MAX_IRQ_SRC_ID,
- &amdgpu_hw_irqdomain_ops, adev);
+ adev->irq.domain = irq_domain_create_linear(NULL, AMDGPU_MAX_IRQ_SRC_ID,
+ &amdgpu_hw_irqdomain_ops, adev);
if (!adev->irq.domain) {
- DRM_ERROR("GPU irq add domain failed\n");
+ dev_err(adev->dev, "GPU irq add domain failed\n");
return -ENODEV;
}
@@ -796,7 +772,7 @@ void amdgpu_irq_remove_domain(struct amdgpu_device *adev)
* Returns:
* Linux IRQ
*/
-unsigned amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned src_id)
+unsigned int amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned int src_id)
{
adev->irq.virq[src_id] = irq_create_mapping(adev->irq.domain, src_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
index e9f2c11ea416..9f0417456abd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
@@ -53,7 +53,7 @@ struct amdgpu_iv_entry {
uint64_t timestamp;
unsigned timestamp_src;
unsigned pasid;
- unsigned pasid_src;
+ unsigned node_id;
unsigned src_data[AMDGPU_IRQ_SRC_DATA_MAX_SIZE_DW];
const uint32_t *iv_entry;
};
@@ -98,8 +98,28 @@ struct amdgpu_irq {
struct irq_domain *domain; /* GPU irq controller domain */
unsigned virq[AMDGPU_MAX_IRQ_SRC_ID];
uint32_t srbm_soft_reset;
+ u32 retry_cam_doorbell_index;
+ bool retry_cam_enabled;
};
+enum interrupt_node_id_per_aid {
+ AID0_NODEID = 0,
+ XCD0_NODEID = 1,
+ XCD1_NODEID = 2,
+ AID1_NODEID = 4,
+ XCD2_NODEID = 5,
+ XCD3_NODEID = 6,
+ AID2_NODEID = 8,
+ XCD4_NODEID = 9,
+ XCD5_NODEID = 10,
+ AID3_NODEID = 12,
+ XCD6_NODEID = 13,
+ XCD7_NODEID = 14,
+ NODEID_MAX,
+};
+
+extern const int node_id_to_phys_map[NODEID_MAX];
+
void amdgpu_irq_disable_all(struct amdgpu_device *adev);
int amdgpu_irq_init(struct amdgpu_device *adev);
@@ -126,5 +146,6 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev);
int amdgpu_irq_add_domain(struct amdgpu_device *adev);
void amdgpu_irq_remove_domain(struct amdgpu_device *adev);
unsigned amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned src_id);
+void amdgpu_restore_msix(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c
new file mode 100644
index 000000000000..37270c4dab8d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c
@@ -0,0 +1,345 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/mfd/core.h>
+
+#include "amdgpu.h"
+#include "amdgpu_isp.h"
+#include "isp_v4_1_0.h"
+#include "isp_v4_1_1.h"
+
+#define ISP_MC_ADDR_ALIGN (1024 * 32)
+
+/**
+ * isp_hw_init - start and test isp block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ */
+static int isp_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_isp *isp = &adev->isp;
+
+ if (isp->funcs->hw_init != NULL)
+ return isp->funcs->hw_init(isp);
+
+ return -ENODEV;
+}
+
+/**
+ * isp_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ */
+static int isp_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_isp *isp = &ip_block->adev->isp;
+
+ if (isp->funcs->hw_fini != NULL)
+ return isp->funcs->hw_fini(isp);
+
+ return -ENODEV;
+}
+
+static int isp_load_fw_by_psp(struct amdgpu_device *adev)
+{
+ const struct common_firmware_header *hdr;
+ char ucode_prefix[10];
+ int r = 0;
+
+ /* get isp fw binary name and path */
+ amdgpu_ucode_ip_version_decode(adev, ISP_HWIP, ucode_prefix,
+ sizeof(ucode_prefix));
+
+ /* read isp fw */
+ r = amdgpu_ucode_request(adev, &adev->isp.fw, AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s.bin", ucode_prefix);
+ if (r) {
+ amdgpu_ucode_release(&adev->isp.fw);
+ return r;
+ }
+
+ hdr = (const struct common_firmware_header *)adev->isp.fw->data;
+
+ adev->firmware.ucode[AMDGPU_UCODE_ID_ISP].ucode_id =
+ AMDGPU_UCODE_ID_ISP;
+ adev->firmware.ucode[AMDGPU_UCODE_ID_ISP].fw = adev->isp.fw;
+
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
+
+ return r;
+}
+
+static int isp_early_init(struct amdgpu_ip_block *ip_block)
+{
+
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_isp *isp = &adev->isp;
+
+ switch (amdgpu_ip_version(adev, ISP_HWIP, 0)) {
+ case IP_VERSION(4, 1, 0):
+ isp_v4_1_0_set_isp_funcs(isp);
+ break;
+ case IP_VERSION(4, 1, 1):
+ isp_v4_1_1_set_isp_funcs(isp);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ isp->adev = adev;
+ isp->parent = adev->dev;
+
+ if (isp_load_fw_by_psp(adev)) {
+ DRM_DEBUG_DRIVER("%s: isp fw load failed\n", __func__);
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static bool isp_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ return true;
+}
+
+static int isp_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int isp_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static int is_valid_isp_device(struct device *isp_parent, struct device *amdgpu_dev)
+{
+ if (isp_parent != amdgpu_dev)
+ return -EINVAL;
+
+ return 0;
+}
+
+/**
+ * isp_user_buffer_alloc - create user buffer object (BO) for isp
+ *
+ * @dev: isp device handle
+ * @dmabuf: DMABUF handle for isp buffer allocated in system memory
+ * @buf_obj: GPU buffer object handle to initialize
+ * @buf_addr: GPU addr of the pinned BO to initialize
+ *
+ * Imports isp DMABUF to allocate and pin a user BO for isp internal use. It does
+ * GART alloc to generate GPU addr for BO to make it accessible through the
+ * GART aperture for ISP HW.
+ *
+ * This function is exported to allow the V4L2 isp device external to drm device
+ * to create and access the isp user BO.
+ *
+ * Returns:
+ * 0 on success, negative error code otherwise.
+ */
+int isp_user_buffer_alloc(struct device *dev, void *dmabuf,
+ void **buf_obj, u64 *buf_addr)
+{
+ struct platform_device *ispdev = to_platform_device(dev);
+ const struct isp_platform_data *isp_pdata;
+ struct amdgpu_device *adev;
+ struct mfd_cell *mfd_cell;
+ struct amdgpu_bo *bo;
+ u64 gpu_addr;
+ int ret;
+
+ if (WARN_ON(!ispdev))
+ return -ENODEV;
+
+ if (WARN_ON(!buf_obj))
+ return -EINVAL;
+
+ if (WARN_ON(!buf_addr))
+ return -EINVAL;
+
+ mfd_cell = &ispdev->mfd_cell[0];
+ if (!mfd_cell)
+ return -ENODEV;
+
+ isp_pdata = mfd_cell->platform_data;
+ adev = isp_pdata->adev;
+
+ ret = is_valid_isp_device(ispdev->dev.parent, adev->dev);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_bo_create_isp_user(adev, dmabuf,
+ AMDGPU_GEM_DOMAIN_GTT, &bo, &gpu_addr);
+ if (ret) {
+ drm_err(&adev->ddev, "failed to alloc gart user buffer (%d)", ret);
+ return ret;
+ }
+
+ *buf_obj = (void *)bo;
+ *buf_addr = gpu_addr;
+
+ return 0;
+}
+EXPORT_SYMBOL(isp_user_buffer_alloc);
+
+/**
+ * isp_user_buffer_free - free isp user buffer object (BO)
+ *
+ * @buf_obj: amdgpu isp user BO to free
+ *
+ * unpin and unref BO for isp internal use.
+ *
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to free the isp user BO.
+ */
+void isp_user_buffer_free(void *buf_obj)
+{
+ amdgpu_bo_free_isp_user(buf_obj);
+}
+EXPORT_SYMBOL(isp_user_buffer_free);
+
+/**
+ * isp_kernel_buffer_alloc - create kernel buffer object (BO) for isp
+ *
+ * @dev: isp device handle
+ * @size: size for the new BO
+ * @buf_obj: GPU BO handle to initialize
+ * @gpu_addr: GPU addr of the pinned BO
+ * @cpu_addr: CPU address mapping of BO
+ *
+ * Allocates and pins a kernel BO for internal isp firmware use.
+ *
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to create and access the kernel BO.
+ *
+ * Returns:
+ * 0 on success, negative error code otherwise.
+ */
+int isp_kernel_buffer_alloc(struct device *dev, u64 size,
+ void **buf_obj, u64 *gpu_addr, void **cpu_addr)
+{
+ struct platform_device *ispdev = to_platform_device(dev);
+ struct amdgpu_bo **bo = (struct amdgpu_bo **)buf_obj;
+ const struct isp_platform_data *isp_pdata;
+ struct amdgpu_device *adev;
+ struct mfd_cell *mfd_cell;
+ int ret;
+
+ if (WARN_ON(!ispdev))
+ return -ENODEV;
+
+ if (WARN_ON(!buf_obj))
+ return -EINVAL;
+
+ if (WARN_ON(!gpu_addr))
+ return -EINVAL;
+
+ if (WARN_ON(!cpu_addr))
+ return -EINVAL;
+
+ mfd_cell = &ispdev->mfd_cell[0];
+ if (!mfd_cell)
+ return -ENODEV;
+
+ isp_pdata = mfd_cell->platform_data;
+ adev = isp_pdata->adev;
+
+ ret = is_valid_isp_device(ispdev->dev.parent, adev->dev);
+ if (ret)
+ return ret;
+
+ /* Ensure *bo is NULL so a new BO will be created */
+ *bo = NULL;
+ ret = amdgpu_bo_create_kernel(adev,
+ size,
+ ISP_MC_ADDR_ALIGN,
+ AMDGPU_GEM_DOMAIN_GTT,
+ bo,
+ gpu_addr,
+ cpu_addr);
+ if (!cpu_addr || ret) {
+ drm_err(&adev->ddev, "failed to alloc gart kernel buffer (%d)", ret);
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(isp_kernel_buffer_alloc);
+
+/**
+ * isp_kernel_buffer_free - free isp kernel buffer object (BO)
+ *
+ * @buf_obj: amdgpu isp user BO to free
+ * @gpu_addr: GPU addr of isp kernel BO
+ * @cpu_addr: CPU addr of isp kernel BO
+ *
+ * unmaps and unpin a isp kernel BO.
+ *
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to free the kernel BO.
+ */
+void isp_kernel_buffer_free(void **buf_obj, u64 *gpu_addr, void **cpu_addr)
+{
+ struct amdgpu_bo **bo = (struct amdgpu_bo **)buf_obj;
+
+ amdgpu_bo_free_kernel(bo, gpu_addr, cpu_addr);
+}
+EXPORT_SYMBOL(isp_kernel_buffer_free);
+
+static const struct amd_ip_funcs isp_ip_funcs = {
+ .name = "isp_ip",
+ .early_init = isp_early_init,
+ .hw_init = isp_hw_init,
+ .hw_fini = isp_hw_fini,
+ .is_idle = isp_is_idle,
+ .set_clockgating_state = isp_set_clockgating_state,
+ .set_powergating_state = isp_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version isp_v4_1_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_ISP,
+ .major = 4,
+ .minor = 1,
+ .rev = 0,
+ .funcs = &isp_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version isp_v4_1_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_ISP,
+ .major = 4,
+ .minor = 1,
+ .rev = 1,
+ .funcs = &isp_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h
new file mode 100644
index 000000000000..d6f4ffa4c97c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#ifndef __AMDGPU_ISP_H__
+#define __AMDGPU_ISP_H__
+
+#include <drm/amd/isp.h>
+#include <linux/pm_domain.h>
+
+#define ISP_REGS_OFFSET_END 0x629A4
+
+struct amdgpu_isp;
+
+struct isp_funcs {
+ int (*hw_init)(struct amdgpu_isp *isp);
+ int (*hw_fini)(struct amdgpu_isp *isp);
+};
+
+struct amdgpu_isp {
+ struct device *parent;
+ struct amdgpu_device *adev;
+ const struct isp_funcs *funcs;
+ struct mfd_cell *isp_cell;
+ struct resource *isp_res;
+ struct resource *isp_i2c_res;
+ struct resource *isp_gpio_res;
+ struct isp_platform_data *isp_pdata;
+ unsigned int harvest_config;
+ const struct firmware *fw;
+ struct generic_pm_domain ispgpd;
+};
+
+extern const struct amdgpu_ip_block_version isp_v4_1_0_ip_block;
+extern const struct amdgpu_ip_block_version isp_v4_1_1_ip_block;
+
+#endif /* __AMDGPU_ISP_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index bfc47bea23db..0a0dcbf0798d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -29,41 +29,144 @@
#include "amdgpu.h"
#include "amdgpu_trace.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_dev_coredump.h"
+#include "amdgpu_xgmi.h"
+
+static void amdgpu_job_do_core_dump(struct amdgpu_device *adev,
+ struct amdgpu_job *job)
+{
+ int i;
+
+ dev_info(adev->dev, "Dumping IP State\n");
+ for (i = 0; i < adev->num_ip_blocks; i++)
+ if (adev->ip_blocks[i].version->funcs->dump_ip_state)
+ adev->ip_blocks[i].version->funcs
+ ->dump_ip_state((void *)&adev->ip_blocks[i]);
+ dev_info(adev->dev, "Dumping IP State Completed\n");
+
+ amdgpu_coredump(adev, true, false, job);
+}
+
+static void amdgpu_job_core_dump(struct amdgpu_device *adev,
+ struct amdgpu_job *job)
+{
+ struct list_head device_list, *device_list_handle = NULL;
+ struct amdgpu_device *tmp_adev = NULL;
+ struct amdgpu_hive_info *hive = NULL;
+
+ if (!amdgpu_sriov_vf(adev))
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive)
+ mutex_lock(&hive->hive_lock);
+ /*
+ * Reuse the logic in amdgpu_device_gpu_recover() to build list of
+ * devices for code dump
+ */
+ INIT_LIST_HEAD(&device_list);
+ if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
+ list_add_tail(&tmp_adev->reset_list, &device_list);
+ if (!list_is_first(&adev->reset_list, &device_list))
+ list_rotate_to_front(&adev->reset_list, &device_list);
+ device_list_handle = &device_list;
+ } else {
+ list_add_tail(&adev->reset_list, &device_list);
+ device_list_handle = &device_list;
+ }
+
+ /* Do the coredump for each device */
+ list_for_each_entry(tmp_adev, device_list_handle, reset_list)
+ amdgpu_job_do_core_dump(tmp_adev, job);
+
+ if (hive) {
+ mutex_unlock(&hive->hive_lock);
+ amdgpu_put_xgmi_hive(hive);
+ }
+}
static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
{
struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
struct amdgpu_job *job = to_amdgpu_job(s_job);
- struct amdgpu_task_info ti;
+ struct drm_wedge_task_info *info = NULL;
+ struct amdgpu_task_info *ti = NULL;
struct amdgpu_device *adev = ring->adev;
- int idx;
+ int idx, r;
if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
- DRM_INFO("%s - device unplugged skipping recovery on scheduler:%s",
+ dev_info(adev->dev, "%s - device unplugged skipping recovery on scheduler:%s",
__func__, s_job->sched->name);
/* Effectively the job is aborted as the device is gone */
return DRM_GPU_SCHED_STAT_ENODEV;
}
- memset(&ti, 0, sizeof(struct amdgpu_task_info));
+ /*
+ * Do the coredump immediately after a job timeout to get a very
+ * close dump/snapshot/representation of GPU's current error status
+ * Skip it for SRIOV, since VF FLR will be triggered by host driver
+ * before job timeout
+ */
+ if (!amdgpu_sriov_vf(adev))
+ amdgpu_job_core_dump(adev, job);
if (amdgpu_gpu_recovery &&
+ amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_SOFT_RESET) &&
amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
- DRM_ERROR("ring %s timeout, but soft recovered\n",
- s_job->sched->name);
+ dev_err(adev->dev, "ring %s timeout, but soft recovered\n",
+ s_job->sched->name);
goto exit;
}
- amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);
- DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n",
- job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
- ring->fence_drv.sync_seq);
- DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
- ti.process_name, ti.tgid, ti.task_name, ti.pid);
+ dev_err(adev->dev, "ring %s timeout, signaled seq=%u, emitted seq=%u\n",
+ job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
+ ring->fence_drv.sync_seq);
+
+ ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid);
+ if (ti) {
+ amdgpu_vm_print_task_info(adev, ti);
+ info = &ti->task;
+ }
+
+ /* attempt a per ring reset */
+ if (amdgpu_gpu_recovery &&
+ amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_PER_QUEUE) &&
+ ring->funcs->reset) {
+ dev_err(adev->dev, "Starting %s ring reset\n",
+ s_job->sched->name);
+ r = amdgpu_ring_reset(ring, job->vmid, job->hw_fence);
+ if (!r) {
+ atomic_inc(&ring->adev->gpu_reset_counter);
+ dev_err(adev->dev, "Ring %s reset succeeded\n",
+ ring->sched.name);
+ drm_dev_wedged_event(adev_to_drm(adev),
+ DRM_WEDGE_RECOVERY_NONE, info);
+ goto exit;
+ }
+ dev_err(adev->dev, "Ring %s reset failed\n", ring->sched.name);
+ }
+
+ dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
if (amdgpu_device_should_recover_gpu(ring->adev)) {
- amdgpu_device_gpu_recover(ring->adev, job);
+ struct amdgpu_reset_context reset_context;
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ reset_context.src = AMDGPU_RESET_SRC_JOB;
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+ /*
+ * To avoid an unnecessary extra coredump, as we have already
+ * got the very close representation of GPU's error status
+ */
+ set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
+
+ r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context);
+ if (r)
+ dev_err(adev->dev, "GPU Recovery Failed: %d\n", r);
} else {
drm_sched_suspend_timeout(&ring->sched);
if (amdgpu_sriov_vf(adev))
@@ -71,74 +174,119 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
}
exit:
+ amdgpu_vm_put_task_info(ti);
drm_dev_exit(idx);
- return DRM_GPU_SCHED_STAT_NOMINAL;
+ return DRM_GPU_SCHED_STAT_RESET;
}
-int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
- struct amdgpu_job **job, struct amdgpu_vm *vm)
+int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct drm_sched_entity *entity, void *owner,
+ unsigned int num_ibs, struct amdgpu_job **job,
+ u64 drm_client_id)
{
- size_t size = sizeof(struct amdgpu_job);
+ struct amdgpu_fence *af;
+ int r;
if (num_ibs == 0)
return -EINVAL;
- size += sizeof(struct amdgpu_ib) * num_ibs;
-
- *job = kzalloc(size, GFP_KERNEL);
+ *job = kzalloc(struct_size(*job, ibs, num_ibs), GFP_KERNEL);
if (!*job)
return -ENOMEM;
- /*
- * Initialize the scheduler to at least some ring so that we always
- * have a pointer to adev.
- */
- (*job)->base.sched = &adev->rings[0]->sched;
+ af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
+ if (!af) {
+ r = -ENOMEM;
+ goto err_job;
+ }
+ (*job)->hw_fence = af;
+
+ af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
+ if (!af) {
+ r = -ENOMEM;
+ goto err_fence;
+ }
+ (*job)->hw_vm_fence = af;
+
(*job)->vm = vm;
- (*job)->ibs = (void *)&(*job)[1];
- (*job)->num_ibs = num_ibs;
- amdgpu_sync_create(&(*job)->sync);
- amdgpu_sync_create(&(*job)->sched_sync);
- (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
+ amdgpu_sync_create(&(*job)->explicit_sync);
+ (*job)->generation = amdgpu_vm_generation(adev, vm);
(*job)->vm_pd_addr = AMDGPU_BO_INVALID_OFFSET;
- return 0;
+ if (!entity)
+ return 0;
+
+ return drm_sched_job_init(&(*job)->base, entity, 1, owner,
+ drm_client_id);
+
+err_fence:
+ kfree((*job)->hw_fence);
+err_job:
+ kfree(*job);
+ *job = NULL;
+
+ return r;
}
-int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
- enum amdgpu_ib_pool_type pool_type,
- struct amdgpu_job **job)
+int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
+ struct drm_sched_entity *entity, void *owner,
+ size_t size, enum amdgpu_ib_pool_type pool_type,
+ struct amdgpu_job **job, u64 k_job_id)
{
int r;
- r = amdgpu_job_alloc(adev, 1, job, NULL);
+ r = amdgpu_job_alloc(adev, NULL, entity, owner, 1, job,
+ k_job_id);
if (r)
return r;
+ (*job)->num_ibs = 1;
r = amdgpu_ib_get(adev, NULL, size, pool_type, &(*job)->ibs[0]);
- if (r)
+ if (r) {
+ if (entity)
+ drm_sched_job_cleanup(&(*job)->base);
+ kfree((*job)->hw_vm_fence);
+ kfree((*job)->hw_fence);
kfree(*job);
+ *job = NULL;
+ }
return r;
}
+void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds,
+ struct amdgpu_bo *gws, struct amdgpu_bo *oa)
+{
+ if (gds) {
+ job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
+ job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
+ }
+ if (gws) {
+ job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
+ job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
+ }
+ if (oa) {
+ job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
+ job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
+ }
+}
+
void amdgpu_job_free_resources(struct amdgpu_job *job)
{
- struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
struct dma_fence *f;
- struct dma_fence *hw_fence;
unsigned i;
- if (job->hw_fence.ops == NULL)
- hw_fence = job->external_hw_fence;
+ /* Check if any fences were initialized */
+ if (job->base.s_fence && job->base.s_fence->finished.ops)
+ f = &job->base.s_fence->finished;
+ else if (job->hw_fence && job->hw_fence->base.ops)
+ f = &job->hw_fence->base;
else
- hw_fence = &job->hw_fence;
+ f = NULL;
- /* use sched fence if available */
- f = job->base.s_fence ? &job->base.s_fence->finished : hw_fence;
for (i = 0; i < job->num_ibs; ++i)
- amdgpu_ib_free(ring->adev, &job->ibs[i], f);
+ amdgpu_ib_free(&job->ibs[i], f);
}
static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
@@ -147,48 +295,68 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
drm_sched_job_cleanup(s_job);
- amdgpu_sync_free(&job->sync);
- amdgpu_sync_free(&job->sched_sync);
+ amdgpu_sync_free(&job->explicit_sync);
- /* only put the hw fence if has embedded fence */
- if (job->hw_fence.ops != NULL)
- dma_fence_put(&job->hw_fence);
+ if (job->hw_fence->base.ops)
+ dma_fence_put(&job->hw_fence->base);
else
- kfree(job);
+ kfree(job->hw_fence);
+ if (job->hw_vm_fence->base.ops)
+ dma_fence_put(&job->hw_vm_fence->base);
+ else
+ kfree(job->hw_vm_fence);
+
+ kfree(job);
+}
+
+void amdgpu_job_set_gang_leader(struct amdgpu_job *job,
+ struct amdgpu_job *leader)
+{
+ struct dma_fence *fence = &leader->base.s_fence->scheduled;
+
+ WARN_ON(job->gang_submit);
+
+ /*
+ * Don't add a reference when we are the gang leader to avoid circle
+ * dependency.
+ */
+ if (job != leader)
+ dma_fence_get(fence);
+ job->gang_submit = fence;
}
void amdgpu_job_free(struct amdgpu_job *job)
{
+ if (job->base.entity)
+ drm_sched_job_cleanup(&job->base);
+
amdgpu_job_free_resources(job);
- amdgpu_sync_free(&job->sync);
- amdgpu_sync_free(&job->sched_sync);
+ amdgpu_sync_free(&job->explicit_sync);
+ if (job->gang_submit != &job->base.s_fence->scheduled)
+ dma_fence_put(job->gang_submit);
- /* only put the hw fence if has embedded fence */
- if (job->hw_fence.ops != NULL)
- dma_fence_put(&job->hw_fence);
+ if (job->hw_fence->base.ops)
+ dma_fence_put(&job->hw_fence->base);
+ else
+ kfree(job->hw_fence);
+ if (job->hw_vm_fence->base.ops)
+ dma_fence_put(&job->hw_vm_fence->base);
else
- kfree(job);
+ kfree(job->hw_vm_fence);
+
+ kfree(job);
}
-int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
- void *owner, struct dma_fence **f)
+struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job)
{
- int r;
-
- if (!f)
- return -EINVAL;
-
- r = drm_sched_job_init(&job->base, entity, owner);
- if (r)
- return r;
+ struct dma_fence *f;
drm_sched_job_arm(&job->base);
-
- *f = dma_fence_get(&job->base.s_fence->finished);
+ f = dma_fence_get(&job->base.s_fence->finished);
amdgpu_job_free_resources(job);
drm_sched_entity_push_job(&job->base);
- return 0;
+ return f;
}
int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
@@ -197,50 +365,58 @@ int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
int r;
job->base.sched = &ring->sched;
- r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, NULL, fence);
- /* record external_hw_fence for direct submit */
- job->external_hw_fence = dma_fence_get(*fence);
+ r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job, fence);
+
if (r)
return r;
amdgpu_job_free(job);
- dma_fence_put(*fence);
-
return 0;
}
-static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job,
- struct drm_sched_entity *s_entity)
+static struct dma_fence *
+amdgpu_job_prepare_job(struct drm_sched_job *sched_job,
+ struct drm_sched_entity *s_entity)
{
struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched);
struct amdgpu_job *job = to_amdgpu_job(sched_job);
- struct amdgpu_vm *vm = job->vm;
struct dma_fence *fence;
int r;
- fence = amdgpu_sync_get_fence(&job->sync);
- if (fence && drm_sched_dependency_optimized(fence, s_entity)) {
- r = amdgpu_sync_fence(&job->sched_sync, fence);
- if (r)
- DRM_ERROR("Error adding fence (%d)\n", r);
+ r = drm_sched_entity_error(s_entity);
+ if (r)
+ goto error;
+
+ if (job->gang_submit) {
+ fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit);
+ if (fence)
+ return fence;
}
- while (fence == NULL && vm && !job->vmid) {
- r = amdgpu_vmid_grab(vm, ring, &job->sync,
- &job->base.s_fence->finished,
- job);
- if (r)
- DRM_ERROR("Error getting VM ID (%d)\n", r);
+ fence = amdgpu_device_enforce_isolation(ring->adev, ring, job);
+ if (fence)
+ return fence;
- fence = amdgpu_sync_get_fence(&job->sync);
+ if (job->vm && !job->vmid) {
+ r = amdgpu_vmid_grab(job->vm, ring, job, &fence);
+ if (r) {
+ dev_err(ring->adev->dev, "Error getting VM ID (%d)\n", r);
+ goto error;
+ }
+ return fence;
}
- return fence;
+ return NULL;
+
+error:
+ dma_fence_set_error(&job->base.s_fence->finished, r);
+ return NULL;
}
static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
{
struct amdgpu_ring *ring = to_amdgpu_ring(sched_job->sched);
+ struct amdgpu_device *adev = ring->adev;
struct dma_fence *fence = NULL, *finished;
struct amdgpu_job *job;
int r = 0;
@@ -248,26 +424,25 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
job = to_amdgpu_job(sched_job);
finished = &job->base.s_fence->finished;
- BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL));
-
trace_amdgpu_sched_run_job(job);
- if (job->vram_lost_counter != atomic_read(&ring->adev->vram_lost_counter))
- dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if VRAM lost */
+ /* Skip job if VRAM is lost and never resubmit gangs */
+ if (job->generation != amdgpu_vm_generation(adev, job->vm) ||
+ (job->job_run_counter && job->gang_submit))
+ dma_fence_set_error(finished, -ECANCELED);
if (finished->error < 0) {
- DRM_INFO("Skip scheduling IBs!\n");
+ dev_dbg(adev->dev, "Skip scheduling IBs in ring(%s)",
+ ring->name);
} else {
r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
&fence);
if (r)
- DRM_ERROR("Error scheduling IBs (%d)\n", r);
+ dev_err(adev->dev,
+ "Error scheduling IBs (%d) in ring(%s)", r,
+ ring->name);
}
- if (!job->job_run_counter)
- dma_fence_get(fence);
- else if (finished->error < 0)
- dma_fence_put(&job->hw_fence);
job->job_run_counter++;
amdgpu_job_free_resources(job);
@@ -275,8 +450,24 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
return fence;
}
-#define to_drm_sched_job(sched_job) \
- container_of((sched_job), struct drm_sched_job, queue_node)
+/*
+ * This is a duplicate function from DRM scheduler sched_internal.h.
+ * Plan is to remove it when amdgpu_job_stop_all_jobs_on_sched is removed, due
+ * latter being incorrect and racy.
+ *
+ * See https://lore.kernel.org/amd-gfx/44edde63-7181-44fb-a4f7-94e50514f539@amd.com/
+ */
+static struct drm_sched_job *
+drm_sched_entity_queue_pop(struct drm_sched_entity *entity)
+{
+ struct spsc_node *node;
+
+ node = spsc_queue_pop(&entity->job_queue);
+ if (!node)
+ return NULL;
+
+ return container_of(node, struct drm_sched_job, queue_node);
+}
void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
{
@@ -285,15 +476,11 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
int i;
/* Signal all jobs not yet scheduled */
- for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
- struct drm_sched_rq *rq = &sched->sched_rq[i];
-
- if (!rq)
- continue;
-
+ for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) {
+ struct drm_sched_rq *rq = sched->sched_rq[i];
spin_lock(&rq->lock);
list_for_each_entry(s_entity, &rq->entities, list) {
- while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) {
+ while ((s_job = drm_sched_entity_queue_pop(s_entity))) {
struct drm_sched_fence *s_fence = s_job->s_fence;
dma_fence_signal(&s_fence->scheduled);
@@ -314,7 +501,7 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
}
const struct drm_sched_backend_ops amdgpu_sched_ops = {
- .dependency = amdgpu_job_dependency,
+ .prepare_job = amdgpu_job_prepare_job,
.run_job = amdgpu_job_run,
.timedout_job = amdgpu_job_timedout,
.free_job = amdgpu_job_free_cb
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 9e65730193b8..7abf069d17d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -23,6 +23,10 @@
#ifndef __AMDGPU_JOB_H__
#define __AMDGPU_JOB_H__
+#include <drm/gpu_scheduler.h>
+#include "amdgpu_sync.h"
+#include "amdgpu_ring.h"
+
/* bit set means command submit involves a preamble IB */
#define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0)
/* bit set means preamble IB is first presented in belonging context */
@@ -40,42 +44,84 @@
struct amdgpu_fence;
enum amdgpu_ib_pool_type;
+/* Internal kernel job ids. (decreasing values, starting from U64_MAX). */
+#define AMDGPU_KERNEL_JOB_ID_VM_UPDATE (18446744073709551615ULL)
+#define AMDGPU_KERNEL_JOB_ID_VM_UPDATE_PDES (18446744073709551614ULL)
+#define AMDGPU_KERNEL_JOB_ID_VM_UPDATE_RANGE (18446744073709551613ULL)
+#define AMDGPU_KERNEL_JOB_ID_VM_PT_CLEAR (18446744073709551612ULL)
+#define AMDGPU_KERNEL_JOB_ID_TTM_MAP_BUFFER (18446744073709551611ULL)
+#define AMDGPU_KERNEL_JOB_ID_TTM_ACCESS_MEMORY_SDMA (18446744073709551610ULL)
+#define AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER (18446744073709551609ULL)
+#define AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE (18446744073709551608ULL)
+#define AMDGPU_KERNEL_JOB_ID_MOVE_BLIT (18446744073709551607ULL)
+#define AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER (18446744073709551606ULL)
+#define AMDGPU_KERNEL_JOB_ID_CLEANER_SHADER (18446744073709551605ULL)
+#define AMDGPU_KERNEL_JOB_ID_FLUSH_GPU_TLB (18446744073709551604ULL)
+#define AMDGPU_KERNEL_JOB_ID_KFD_GART_MAP (18446744073709551603ULL)
+#define AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST (18446744073709551602ULL)
+
struct amdgpu_job {
struct drm_sched_job base;
struct amdgpu_vm *vm;
- struct amdgpu_sync sync;
- struct amdgpu_sync sched_sync;
- struct amdgpu_ib *ibs;
- struct dma_fence hw_fence;
- struct dma_fence *external_hw_fence;
+ struct amdgpu_sync explicit_sync;
+ struct amdgpu_fence *hw_fence;
+ struct amdgpu_fence *hw_vm_fence;
+ struct dma_fence *gang_submit;
uint32_t preamble_status;
uint32_t preemption_status;
- uint32_t num_ibs;
bool vm_needs_flush;
+ bool gds_switch_needed;
+ bool spm_update_needed;
uint64_t vm_pd_addr;
unsigned vmid;
unsigned pasid;
uint32_t gds_base, gds_size;
uint32_t gws_base, gws_size;
uint32_t oa_base, oa_size;
- uint32_t vram_lost_counter;
+ uint64_t generation;
/* user fence handling */
uint64_t uf_addr;
uint64_t uf_sequence;
+ /* virtual addresses for shadow/GDS/CSA */
+ uint64_t shadow_va;
+ uint64_t csa_va;
+ uint64_t gds_va;
+ bool init_shadow;
+
/* job_run_counter >= 1 means a resubmit job */
uint32_t job_run_counter;
+
+ /* enforce isolation */
+ bool enforce_isolation;
+ bool run_cleaner_shader;
+
+ uint32_t num_ibs;
+ struct amdgpu_ib ibs[];
};
-int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
- struct amdgpu_job **job, struct amdgpu_vm *vm);
-int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
- enum amdgpu_ib_pool_type pool, struct amdgpu_job **job);
+static inline struct amdgpu_ring *amdgpu_job_ring(struct amdgpu_job *job)
+{
+ return to_amdgpu_ring(job->base.entity->rq->sched);
+}
+
+int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct drm_sched_entity *entity, void *owner,
+ unsigned int num_ibs, struct amdgpu_job **job,
+ u64 drm_client_id);
+int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
+ struct drm_sched_entity *entity, void *owner,
+ size_t size, enum amdgpu_ib_pool_type pool_type,
+ struct amdgpu_job **job,
+ u64 k_job_id);
+void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds,
+ struct amdgpu_bo *gws, struct amdgpu_bo *oa);
void amdgpu_job_free_resources(struct amdgpu_job *job);
+void amdgpu_job_set_gang_leader(struct amdgpu_job *job,
+ struct amdgpu_job *leader);
void amdgpu_job_free(struct amdgpu_job *job);
-int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
- void *owner, struct dma_fence **f);
+struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job);
int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
struct dma_fence **fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
index 9342aa23ebd2..63ee6ba6a931 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
@@ -33,27 +33,62 @@
#define JPEG_IDLE_TIMEOUT msecs_to_jiffies(1000)
static void amdgpu_jpeg_idle_work_handler(struct work_struct *work);
+static void amdgpu_jpeg_reg_dump_fini(struct amdgpu_device *adev);
int amdgpu_jpeg_sw_init(struct amdgpu_device *adev)
{
+ int i, r;
+
INIT_DELAYED_WORK(&adev->jpeg.idle_work, amdgpu_jpeg_idle_work_handler);
mutex_init(&adev->jpeg.jpeg_pg_lock);
atomic_set(&adev->jpeg.total_submission_cnt, 0);
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
+ (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG))
+ adev->jpeg.indirect_sram = true;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ if (adev->jpeg.harvest_config & (1U << i))
+ continue;
+
+ if (adev->jpeg.indirect_sram) {
+ r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->jpeg.inst[i].dpg_sram_bo,
+ &adev->jpeg.inst[i].dpg_sram_gpu_addr,
+ &adev->jpeg.inst[i].dpg_sram_cpu_addr);
+ if (r) {
+ dev_err(adev->dev,
+ "JPEG %d (%d) failed to allocate DPG bo\n", i, r);
+ return r;
+ }
+ }
+ }
+
return 0;
}
int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev)
{
- int i;
+ int i, j;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
- if (adev->jpeg.harvest_config & (1 << i))
+ if (adev->jpeg.harvest_config & (1U << i))
continue;
- amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec);
+ amdgpu_bo_free_kernel(
+ &adev->jpeg.inst[i].dpg_sram_bo,
+ &adev->jpeg.inst[i].dpg_sram_gpu_addr,
+ (void **)&adev->jpeg.inst[i].dpg_sram_cpu_addr);
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j)
+ amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec[j]);
}
+ if (adev->jpeg.reg_list)
+ amdgpu_jpeg_reg_dump_fini(adev);
+
mutex_destroy(&adev->jpeg.jpeg_pg_lock);
return 0;
@@ -76,19 +111,22 @@ static void amdgpu_jpeg_idle_work_handler(struct work_struct *work)
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, jpeg.idle_work.work);
unsigned int fences = 0;
- unsigned int i;
+ unsigned int i, j;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
- if (adev->jpeg.harvest_config & (1 << i))
+ if (adev->jpeg.harvest_config & (1U << i))
continue;
- fences += amdgpu_fence_count_emitted(&adev->jpeg.inst[i].ring_dec);
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j)
+ fences += amdgpu_fence_count_emitted(&adev->jpeg.inst[i].ring_dec[j]);
}
- if (!fences && !atomic_read(&adev->jpeg.total_submission_cnt))
+ if (!fences && !atomic_read(&adev->jpeg.total_submission_cnt)) {
+ mutex_lock(&adev->jpeg.jpeg_pg_lock);
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG,
AMD_PG_STATE_GATE);
- else
+ mutex_unlock(&adev->jpeg.jpeg_pg_lock);
+ } else
schedule_delayed_work(&adev->jpeg.idle_work, JPEG_IDLE_TIMEOUT);
}
@@ -118,18 +156,25 @@ int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring)
unsigned i;
int r;
- WREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch, 0xCAFEDEAD);
+ /* JPEG in SRIOV does not support direct register read/write */
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
r = amdgpu_ring_alloc(ring, 3);
if (r)
return r;
- amdgpu_ring_write(ring, PACKET0(adev->jpeg.internal.jpeg_pitch, 0));
- amdgpu_ring_write(ring, 0xDEADBEEF);
+ WREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe], 0xCAFEDEAD);
+ /* Add a read register to make sure the write register is executed. */
+ RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]);
+
+ amdgpu_ring_write(ring, PACKET0(adev->jpeg.internal.jpeg_pitch[ring->pipe], 0));
+ amdgpu_ring_write(ring, 0xABADCAFE);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch);
- if (tmp == 0xDEADBEEF)
+ tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]);
+ if (tmp == 0xABADCAFE)
break;
udelay(1);
}
@@ -150,14 +195,15 @@ static int amdgpu_jpeg_dec_set_reg(struct amdgpu_ring *ring, uint32_t handle,
const unsigned ib_size_dw = 16;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
- AMDGPU_IB_POOL_DIRECT, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
+ AMDGPU_IB_POOL_DIRECT, &job,
+ AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
ib = &job->ibs[0];
- ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch, 0, 0, PACKETJ_TYPE0);
+ ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch[ring->pipe], 0, 0, PACKETJ_TYPE0);
ib->ptr[1] = 0xDEADBEEF;
for (i = 2; i < 16; i += 2) {
ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
@@ -202,17 +248,360 @@ int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = 0;
}
- for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch);
- if (tmp == 0xDEADBEEF)
- break;
- udelay(1);
+ if (!amdgpu_sriov_vf(adev)) {
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ if (amdgpu_emu_mode == 1)
+ udelay(10);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
}
- if (i >= adev->usec_timeout)
- r = -ETIMEDOUT;
-
dma_fence_put(fence);
error:
return r;
}
+
+int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ struct ras_common_if *ras_if = adev->jpeg.ras_if;
+ struct ras_dispatch_if ih_data = {
+ .entry = entry,
+ };
+
+ if (!ras_if)
+ return 0;
+
+ ih_data.head = *ras_if;
+ amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+
+ return 0;
+}
+
+int amdgpu_jpeg_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r, i;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ if (amdgpu_ras_is_supported(adev, ras_block->block)) {
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i) ||
+ !adev->jpeg.inst[i].ras_poison_irq.funcs)
+ continue;
+
+ r = amdgpu_irq_get(adev, &adev->jpeg.inst[i].ras_poison_irq, 0);
+ if (r)
+ goto late_fini;
+ }
+ }
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+ return r;
+}
+
+int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err;
+ struct amdgpu_jpeg_ras *ras;
+
+ if (!adev->jpeg.ras)
+ return 0;
+
+ ras = adev->jpeg.ras;
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register jpeg ras block!\n");
+ return err;
+ }
+
+ strcpy(ras->ras_block.ras_comm.name, "jpeg");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__JPEG;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON;
+ adev->jpeg.ras_if = &ras->ras_block.ras_comm;
+
+ if (!ras->ras_block.ras_late_init)
+ ras->ras_block.ras_late_init = amdgpu_jpeg_ras_late_init;
+
+ return 0;
+}
+
+int amdgpu_jpeg_psp_update_sram(struct amdgpu_device *adev, int inst_idx,
+ enum AMDGPU_UCODE_ID ucode_id)
+{
+ struct amdgpu_firmware_info ucode = {
+ .ucode_id = AMDGPU_UCODE_ID_JPEG_RAM,
+ .mc_addr = adev->jpeg.inst[inst_idx].dpg_sram_gpu_addr,
+ .ucode_size = ((uintptr_t)adev->jpeg.inst[inst_idx].dpg_sram_curr_addr -
+ (uintptr_t)adev->jpeg.inst[inst_idx].dpg_sram_cpu_addr),
+ };
+
+ return psp_execute_ip_fw_load(&adev->psp, &ucode);
+}
+
+/*
+ * debugfs for to enable/disable jpeg job submission to specific core.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_jpeg_sched_mask_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i, j;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+
+ mask = (1ULL << (adev->jpeg.num_jpeg_inst * adev->jpeg.num_jpeg_rings)) - 1;
+ if ((val & mask) == 0)
+ return -EINVAL;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ if (val & (BIT_ULL((i * adev->jpeg.num_jpeg_rings) + j)))
+ ring->sched.ready = true;
+ else
+ ring->sched.ready = false;
+ }
+ }
+ /* publish sched.ready flag update effective immediately across smp */
+ smp_rmb();
+ return 0;
+}
+
+static int amdgpu_debugfs_jpeg_sched_mask_get(void *data, u64 *val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i, j;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ if (ring->sched.ready)
+ mask |= 1ULL << ((i * adev->jpeg.num_jpeg_rings) + j);
+ }
+ }
+ *val = mask;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_jpeg_sched_mask_fops,
+ amdgpu_debugfs_jpeg_sched_mask_get,
+ amdgpu_debugfs_jpeg_sched_mask_set, "%llx\n");
+
+#endif
+
+void amdgpu_debugfs_jpeg_sched_mask_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ if (!(adev->jpeg.num_jpeg_inst > 1) && !(adev->jpeg.num_jpeg_rings > 1))
+ return;
+ sprintf(name, "amdgpu_jpeg_sched_mask");
+ debugfs_create_file(name, 0600, root, adev,
+ &amdgpu_debugfs_jpeg_sched_mask_fops);
+#endif
+}
+
+static ssize_t amdgpu_get_jpeg_reset_mask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (!adev)
+ return -ENODEV;
+
+ return amdgpu_show_reset_mask(buf, adev->jpeg.supported_reset);
+}
+
+static DEVICE_ATTR(jpeg_reset_mask, 0444,
+ amdgpu_get_jpeg_reset_mask, NULL);
+
+int amdgpu_jpeg_sysfs_reset_mask_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (adev->jpeg.num_jpeg_inst) {
+ r = device_create_file(adev->dev, &dev_attr_jpeg_reset_mask);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+void amdgpu_jpeg_sysfs_reset_mask_fini(struct amdgpu_device *adev)
+{
+ if (adev->dev->kobj.sd) {
+ if (adev->jpeg.num_jpeg_inst)
+ device_remove_file(adev->dev, &dev_attr_jpeg_reset_mask);
+ }
+}
+
+int amdgpu_jpeg_reg_dump_init(struct amdgpu_device *adev,
+ const struct amdgpu_hwip_reg_entry *reg, u32 count)
+{
+ adev->jpeg.ip_dump = kcalloc(adev->jpeg.num_jpeg_inst * count,
+ sizeof(uint32_t), GFP_KERNEL);
+ if (!adev->jpeg.ip_dump) {
+ dev_err(adev->dev,
+ "Failed to allocate memory for JPEG IP Dump\n");
+ return -ENOMEM;
+ }
+ adev->jpeg.reg_list = reg;
+ adev->jpeg.reg_count = count;
+
+ return 0;
+}
+
+static void amdgpu_jpeg_reg_dump_fini(struct amdgpu_device *adev)
+{
+ kfree(adev->jpeg.ip_dump);
+ adev->jpeg.reg_list = NULL;
+ adev->jpeg.reg_count = 0;
+}
+
+void amdgpu_jpeg_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ u32 inst_off, inst_id, is_powered;
+ int i, j;
+
+ if (!adev->jpeg.ip_dump)
+ return;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ inst_id = GET_INST(JPEG, i);
+ inst_off = i * adev->jpeg.reg_count;
+ /* check power status from UVD_JPEG_POWER_STATUS */
+ adev->jpeg.ip_dump[inst_off] =
+ RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->jpeg.reg_list[0],
+ inst_id));
+ is_powered = ((adev->jpeg.ip_dump[inst_off] & 0x1) != 1);
+
+ if (is_powered)
+ for (j = 1; j < adev->jpeg.reg_count; j++)
+ adev->jpeg.ip_dump[inst_off + j] =
+ RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->jpeg.reg_list[j],
+ inst_id));
+ }
+}
+
+void amdgpu_jpeg_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ u32 inst_off, is_powered;
+ int i, j;
+
+ if (!adev->jpeg.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->jpeg.num_jpeg_inst);
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ if (adev->jpeg.harvest_config & (1 << i)) {
+ drm_printf(p, "\nHarvested Instance:JPEG%d Skipping dump\n", i);
+ continue;
+ }
+
+ inst_off = i * adev->jpeg.reg_count;
+ is_powered = ((adev->jpeg.ip_dump[inst_off] & 0x1) != 1);
+
+ if (is_powered) {
+ drm_printf(p, "Active Instance:JPEG%d\n", i);
+ for (j = 0; j < adev->jpeg.reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", adev->jpeg.reg_list[j].reg_name,
+ adev->jpeg.ip_dump[inst_off + j]);
+ } else
+ drm_printf(p, "\nInactive Instance:JPEG%d\n", i);
+ }
+}
+
+static inline bool amdgpu_jpeg_reg_valid(u32 reg)
+{
+ if (reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END ||
+ (reg >= JPEG_ATOMIC_RANGE_START && reg <= JPEG_ATOMIC_RANGE_END))
+ return false;
+ else
+ return true;
+}
+
+/**
+ * amdgpu_jpeg_dec_parse_cs - command submission parser
+ *
+ * @parser: Command submission parser context
+ * @job: the job to parse
+ * @ib: the IB to parse
+ *
+ * Parse the command stream, return -EINVAL for invalid packet,
+ * 0 otherwise
+ */
+
+int amdgpu_jpeg_dec_parse_cs(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
+{
+ u32 i, reg, res, cond, type;
+ struct amdgpu_device *adev = parser->adev;
+
+ for (i = 0; i < ib->length_dw ; i += 2) {
+ reg = CP_PACKETJ_GET_REG(ib->ptr[i]);
+ res = CP_PACKETJ_GET_RES(ib->ptr[i]);
+ cond = CP_PACKETJ_GET_COND(ib->ptr[i]);
+ type = CP_PACKETJ_GET_TYPE(ib->ptr[i]);
+
+ if (res) /* only support 0 at the moment */
+ return -EINVAL;
+
+ switch (type) {
+ case PACKETJ_TYPE0:
+ if (cond != PACKETJ_CONDITION_CHECK0 ||
+ !amdgpu_jpeg_reg_valid(reg)) {
+ dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
+ return -EINVAL;
+ }
+ break;
+ case PACKETJ_TYPE3:
+ if (cond != PACKETJ_CONDITION_CHECK3 ||
+ !amdgpu_jpeg_reg_valid(reg)) {
+ dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
+ return -EINVAL;
+ }
+ break;
+ case PACKETJ_TYPE6:
+ if (ib->ptr[i] == CP_PACKETJ_NOP)
+ continue;
+ dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
+ return -EINVAL;
+ default:
+ dev_err(adev->dev, "Unknown packet type %d !\n", type);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
index 55fbff2be761..346ae0ab09d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
@@ -24,30 +24,131 @@
#ifndef __AMDGPU_JPEG_H__
#define __AMDGPU_JPEG_H__
-#define AMDGPU_MAX_JPEG_INSTANCES 2
+#include "amdgpu_ras.h"
+#include "amdgpu_cs.h"
+
+#define AMDGPU_MAX_JPEG_INSTANCES 4
+#define AMDGPU_MAX_JPEG_RINGS 10
+#define AMDGPU_MAX_JPEG_RINGS_4_0_3 8
+
+#define JPEG_REG_RANGE_START 0x4000
+#define JPEG_REG_RANGE_END 0x41c2
+#define JPEG_ATOMIC_RANGE_START 0x4120
+#define JPEG_ATOMIC_RANGE_END 0x412A
+
#define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0)
#define AMDGPU_JPEG_HARVEST_JPEG1 (1 << 1)
+#define WREG32_SOC15_JPEG_DPG_MODE(inst_idx, offset, value, indirect) \
+ do { \
+ if (!indirect) { \
+ WREG32_SOC15(JPEG, GET_INST(JPEG, inst_idx), \
+ mmUVD_DPG_LMA_DATA, value); \
+ WREG32_SOC15( \
+ JPEG, GET_INST(JPEG, inst_idx), \
+ mmUVD_DPG_LMA_CTL, \
+ (UVD_DPG_LMA_CTL__READ_WRITE_MASK | \
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT | \
+ indirect << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
+ } else { \
+ *adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = \
+ offset; \
+ *adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = \
+ value; \
+ } \
+ } while (0)
+
+#define RREG32_SOC15_JPEG_DPG_MODE(inst_idx, offset, mask_en) \
+ ({ \
+ WREG32_SOC15(JPEG, inst_idx, mmUVD_DPG_LMA_CTL, \
+ (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
+ mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
+ RREG32_SOC15(JPEG, inst_idx, mmUVD_DPG_LMA_DATA); \
+ })
+
+#define WREG32_SOC24_JPEG_DPG_MODE(inst_idx, offset, value, indirect) \
+ do { \
+ WREG32_SOC15(JPEG, GET_INST(JPEG, inst_idx), \
+ regUVD_DPG_LMA_DATA, value); \
+ WREG32_SOC15(JPEG, GET_INST(JPEG, inst_idx), \
+ regUVD_DPG_LMA_MASK, 0xFFFFFFFF); \
+ WREG32_SOC15( \
+ JPEG, GET_INST(JPEG, inst_idx), \
+ regUVD_DPG_LMA_CTL, \
+ (UVD_DPG_LMA_CTL__READ_WRITE_MASK | \
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT | \
+ indirect << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
+ } while (0)
+
+#define RREG32_SOC24_JPEG_DPG_MODE(inst_idx, offset, mask_en) \
+ do { \
+ WREG32_SOC15(JPEG, GET_INST(JPEG, inst_idx), \
+ regUVD_DPG_LMA_MASK, 0xFFFFFFFF); \
+ WREG32_SOC15(JPEG, GET_INST(JPEG, inst_idx), \
+ regUVD_DPG_LMA_CTL, \
+ (UVD_DPG_LMA_CTL__MASK_EN_MASK | \
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
+ RREG32_SOC15(JPEG, inst_idx, regUVD_DPG_LMA_DATA); \
+ } while (0)
+
+#define ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, offset, value, indirect) \
+ do { \
+ *adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = offset; \
+ *adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = value; \
+ } while (0)
+
+struct amdgpu_hwip_reg_entry;
+
+enum amdgpu_jpeg_caps {
+ AMDGPU_JPEG_RRMT_ENABLED,
+};
+
+#define AMDGPU_JPEG_CAPS(caps) BIT(AMDGPU_JPEG_##caps)
+
struct amdgpu_jpeg_reg{
- unsigned jpeg_pitch;
+ unsigned jpeg_pitch[AMDGPU_MAX_JPEG_RINGS];
};
struct amdgpu_jpeg_inst {
- struct amdgpu_ring ring_dec;
+ struct amdgpu_ring ring_dec[AMDGPU_MAX_JPEG_RINGS];
struct amdgpu_irq_src irq;
+ struct amdgpu_irq_src ras_poison_irq;
struct amdgpu_jpeg_reg external;
+ struct amdgpu_bo *dpg_sram_bo;
+ struct dpg_pause_state pause_state;
+ void *dpg_sram_cpu_addr;
+ uint64_t dpg_sram_gpu_addr;
+ uint32_t *dpg_sram_curr_addr;
+ uint8_t aid_id;
+};
+
+struct amdgpu_jpeg_ras {
+ struct amdgpu_ras_block_object ras_block;
};
struct amdgpu_jpeg {
uint8_t num_jpeg_inst;
struct amdgpu_jpeg_inst inst[AMDGPU_MAX_JPEG_INSTANCES];
+ unsigned num_jpeg_rings;
struct amdgpu_jpeg_reg internal;
unsigned harvest_config;
struct delayed_work idle_work;
enum amd_powergating_state cur_state;
struct mutex jpeg_pg_lock;
atomic_t total_submission_cnt;
+ struct ras_common_if *ras_if;
+ struct amdgpu_jpeg_ras *ras;
+
+ uint16_t inst_mask;
+ uint8_t num_inst_per_aid;
+ bool indirect_sram;
+ uint32_t supported_reset;
+ uint32_t caps;
+ u32 *ip_dump;
+ u32 reg_count;
+ const struct amdgpu_hwip_reg_entry *reg_list;
};
int amdgpu_jpeg_sw_init(struct amdgpu_device *adev);
@@ -61,4 +162,23 @@ void amdgpu_jpeg_ring_end_use(struct amdgpu_ring *ring);
int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring);
int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry);
+int amdgpu_jpeg_ras_late_init(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block);
+int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_jpeg_psp_update_sram(struct amdgpu_device *adev, int inst_idx,
+ enum AMDGPU_UCODE_ID ucode_id);
+void amdgpu_debugfs_jpeg_sched_mask_init(struct amdgpu_device *adev);
+int amdgpu_jpeg_sysfs_reset_mask_init(struct amdgpu_device *adev);
+void amdgpu_jpeg_sysfs_reset_mask_fini(struct amdgpu_device *adev);
+int amdgpu_jpeg_reg_dump_init(struct amdgpu_device *adev,
+ const struct amdgpu_hwip_reg_entry *reg, u32 count);
+void amdgpu_jpeg_dump_ip_state(struct amdgpu_ip_block *ip_block);
+void amdgpu_jpeg_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p);
+int amdgpu_jpeg_dec_parse_cs(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib);
+
#endif /*__AMDGPU_JPEG_H__*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 651c7abfde03..6ee77f431d56 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -29,6 +29,7 @@
#include "amdgpu.h"
#include <drm/amdgpu_drm.h>
#include <drm/drm_drv.h>
+#include <drm/drm_fb_helper.h>
#include "amdgpu_uvd.h"
#include "amdgpu_vce.h"
#include "atom.h"
@@ -42,6 +43,9 @@
#include "amdgpu_gem.h"
#include "amdgpu_display.h"
#include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
+#include "amd_pcie.h"
+#include "amdgpu_userq.h"
void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
{
@@ -87,12 +91,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
if (adev->rmmio == NULL)
return;
- if (adev->runpm) {
- pm_runtime_get_sync(dev->dev);
- pm_runtime_forbid(dev->dev);
- }
-
- if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DRV_UNLOAD))
+ if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DRV_UNLOAD))
DRM_WARN("smart shift update failed\n");
amdgpu_acpi_fini(adev);
@@ -124,22 +123,6 @@ void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
mutex_unlock(&mgpu_info.mutex);
}
-static void amdgpu_get_audio_func(struct amdgpu_device *adev)
-{
- struct pci_dev *p = NULL;
-
- p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
- adev->pdev->bus->number, 1);
- if (p) {
- pm_runtime_get_sync(&p->dev);
-
- pm_runtime_mark_last_busy(&p->dev);
- pm_runtime_put_autosuspend(&p->dev);
-
- pci_dev_put(p);
- }
-}
-
/**
* amdgpu_driver_load_kms - Main load function for KMS.
*
@@ -152,21 +135,10 @@ static void amdgpu_get_audio_func(struct amdgpu_device *adev)
int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
{
struct drm_device *dev;
- struct pci_dev *parent;
int r, acpi_status;
dev = adev_to_drm(adev);
- if (amdgpu_has_atpx() &&
- (amdgpu_is_atpx_hybrid() ||
- amdgpu_has_atpx_dgpu_power_cntl()) &&
- ((flags & AMD_IS_APU) == 0) &&
- !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
- flags |= AMD_IS_PX;
-
- parent = pci_upstream_bridge(adev->pdev);
- adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
-
/* amdgpu_device_init should report only fatal error
* like memory allocation failure or iomapping failure,
* or memory manager initialization failure, it must
@@ -179,36 +151,7 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
goto out;
}
- if (amdgpu_device_supports_px(dev) &&
- (amdgpu_runtime_pm != 0)) { /* enable runpm by default for atpx */
- adev->runpm = true;
- dev_info(adev->dev, "Using ATPX for runtime pm\n");
- } else if (amdgpu_device_supports_boco(dev) &&
- (amdgpu_runtime_pm != 0)) { /* enable runpm by default for boco */
- adev->runpm = true;
- dev_info(adev->dev, "Using BOCO for runtime pm\n");
- } else if (amdgpu_device_supports_baco(dev) &&
- (amdgpu_runtime_pm != 0)) {
- switch (adev->asic_type) {
- case CHIP_VEGA20:
- case CHIP_ARCTURUS:
- /* enable runpm if runpm=1 */
- if (amdgpu_runtime_pm > 0)
- adev->runpm = true;
- break;
- case CHIP_VEGA10:
- /* turn runpm on if noretry=0 */
- if (!adev->gmc.noretry)
- adev->runpm = true;
- break;
- default:
- /* enable runpm on CI+ */
- adev->runpm = true;
- break;
- }
- if (adev->runpm)
- dev_info(adev->dev, "Using BACO for runtime pm\n");
- }
+ amdgpu_device_detect_runtime_pm_mode(adev);
/* Call ACPI methods: require modeset init
* but failure is not fatal
@@ -218,62 +161,54 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
if (acpi_status)
dev_dbg(dev->dev, "Error during ACPI methods call\n");
- if (adev->runpm) {
- /* only need to skip on ATPX */
- if (amdgpu_device_supports_px(dev))
- dev_pm_set_driver_flags(dev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
- /* we want direct complete for BOCO */
- if (amdgpu_device_supports_boco(dev))
- dev_pm_set_driver_flags(dev->dev, DPM_FLAG_SMART_PREPARE |
- DPM_FLAG_SMART_SUSPEND |
- DPM_FLAG_MAY_SKIP_RESUME);
- pm_runtime_use_autosuspend(dev->dev);
- pm_runtime_set_autosuspend_delay(dev->dev, 5000);
-
- pm_runtime_allow(dev->dev);
-
- pm_runtime_mark_last_busy(dev->dev);
- pm_runtime_put_autosuspend(dev->dev);
-
- /*
- * For runpm implemented via BACO, PMFW will handle the
- * timing for BACO in and out:
- * - put ASIC into BACO state only when both video and
- * audio functions are in D3 state.
- * - pull ASIC out of BACO state when either video or
- * audio function is in D0 state.
- * Also, at startup, PMFW assumes both functions are in
- * D0 state.
- *
- * So if snd driver was loaded prior to amdgpu driver
- * and audio function was put into D3 state, there will
- * be no PMFW-aware D-state transition(D0->D3) on runpm
- * suspend. Thus the BACO will be not correctly kicked in.
- *
- * Via amdgpu_get_audio_func(), the audio dev is put
- * into D0 state. Then there will be a PMFW-aware D-state
- * transition(D0->D3) on runpm suspend.
- */
- if (amdgpu_device_supports_baco(dev) &&
- !(adev->flags & AMD_IS_APU) &&
- (adev->asic_type >= CHIP_NAVI10))
- amdgpu_get_audio_func(adev);
- }
-
- if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DRV_LOAD))
+ if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DRV_LOAD))
DRM_WARN("smart shift update failed\n");
out:
- if (r) {
- /* balance pm_runtime_get_sync in amdgpu_driver_unload_kms */
- if (adev->rmmio && adev->runpm)
- pm_runtime_put_noidle(dev->dev);
+ if (r)
amdgpu_driver_unload_kms(dev);
- }
return r;
}
+static enum amd_ip_block_type
+ amdgpu_ip_get_block_type(struct amdgpu_device *adev, uint32_t ip)
+{
+ enum amd_ip_block_type type;
+
+ switch (ip) {
+ case AMDGPU_HW_IP_GFX:
+ type = AMD_IP_BLOCK_TYPE_GFX;
+ break;
+ case AMDGPU_HW_IP_COMPUTE:
+ type = AMD_IP_BLOCK_TYPE_GFX;
+ break;
+ case AMDGPU_HW_IP_DMA:
+ type = AMD_IP_BLOCK_TYPE_SDMA;
+ break;
+ case AMDGPU_HW_IP_UVD:
+ case AMDGPU_HW_IP_UVD_ENC:
+ type = AMD_IP_BLOCK_TYPE_UVD;
+ break;
+ case AMDGPU_HW_IP_VCE:
+ type = AMD_IP_BLOCK_TYPE_VCE;
+ break;
+ case AMDGPU_HW_IP_VCN_DEC:
+ case AMDGPU_HW_IP_VCN_ENC:
+ type = AMD_IP_BLOCK_TYPE_VCN;
+ break;
+ case AMDGPU_HW_IP_VCN_JPEG:
+ type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ?
+ AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN;
+ break;
+ default:
+ type = AMD_IP_BLOCK_TYPE_NUM;
+ break;
+ }
+
+ return type;
+}
+
static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
struct drm_amdgpu_query_fw *query_fw,
struct amdgpu_device *adev)
@@ -323,6 +258,14 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
fw_info->ver = adev->gfx.rlc_srls_fw_version;
fw_info->feature = adev->gfx.rlc_srls_feature_version;
break;
+ case AMDGPU_INFO_FW_GFX_RLCP:
+ fw_info->ver = adev->gfx.rlcp_ucode_version;
+ fw_info->feature = adev->gfx.rlcp_ucode_feature_version;
+ break;
+ case AMDGPU_INFO_FW_GFX_RLCV:
+ fw_info->ver = adev->gfx.rlcv_ucode_version;
+ fw_info->feature = adev->gfx.rlcv_ucode_feature_version;
+ break;
case AMDGPU_INFO_FW_GFX_MEC:
if (query_fw->index == 0) {
fw_info->ver = adev->gfx.mec_fw_version;
@@ -400,12 +343,54 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
fw_info->ver = adev->psp.toc.fw_version;
fw_info->feature = adev->psp.toc.feature_version;
break;
+ case AMDGPU_INFO_FW_CAP:
+ fw_info->ver = adev->psp.cap_fw_version;
+ fw_info->feature = adev->psp.cap_feature_version;
+ break;
+ case AMDGPU_INFO_FW_MES_KIQ:
+ fw_info->ver = adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK;
+ fw_info->feature = (adev->mes.kiq_version & AMDGPU_MES_FEAT_VERSION_MASK)
+ >> AMDGPU_MES_FEAT_VERSION_SHIFT;
+ break;
+ case AMDGPU_INFO_FW_MES:
+ fw_info->ver = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
+ fw_info->feature = (adev->mes.sched_version & AMDGPU_MES_FEAT_VERSION_MASK)
+ >> AMDGPU_MES_FEAT_VERSION_SHIFT;
+ break;
+ case AMDGPU_INFO_FW_IMU:
+ fw_info->ver = adev->gfx.imu_fw_version;
+ fw_info->feature = 0;
+ break;
+ case AMDGPU_INFO_FW_VPE:
+ fw_info->ver = adev->vpe.fw_version;
+ fw_info->feature = adev->vpe.feature_version;
+ break;
default:
return -EINVAL;
}
return 0;
}
+static int amdgpu_userq_metadata_info_gfx(struct amdgpu_device *adev,
+ struct drm_amdgpu_info *info,
+ struct drm_amdgpu_info_uq_metadata_gfx *meta)
+{
+ int ret = -EOPNOTSUPP;
+
+ if (adev->gfx.funcs->get_gfx_shadow_info) {
+ struct amdgpu_gfx_shadow_info shadow = {};
+
+ adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow, true);
+ meta->shadow_size = shadow.shadow_size;
+ meta->shadow_alignment = shadow.shadow_alignment;
+ meta->csa_size = shadow.csa_size;
+ meta->csa_alignment = shadow.csa_alignment;
+ ret = 0;
+ }
+
+ return ret;
+}
+
static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
struct drm_amdgpu_info *info,
struct drm_amdgpu_info_hw_ip *result)
@@ -414,6 +399,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
uint32_t ib_size_alignment = 0;
enum amd_ip_block_type type;
unsigned int num_rings = 0;
+ uint32_t num_slots = 0;
unsigned int i, j;
if (info->query_hw_ip.ip_instance >= AMDGPU_HW_IP_INSTANCE_MAX_COUNT)
@@ -423,24 +409,45 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
case AMDGPU_HW_IP_GFX:
type = AMD_IP_BLOCK_TYPE_GFX;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
- if (adev->gfx.gfx_ring[i].sched.ready)
+ if (adev->gfx.gfx_ring[i].sched.ready &&
+ !adev->gfx.gfx_ring[i].no_user_submission)
++num_rings;
+
+ if (!adev->gfx.disable_uq) {
+ for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++)
+ num_slots += hweight32(adev->mes.gfx_hqd_mask[i]);
+ }
+
ib_start_alignment = 32;
ib_size_alignment = 32;
break;
case AMDGPU_HW_IP_COMPUTE:
type = AMD_IP_BLOCK_TYPE_GFX;
for (i = 0; i < adev->gfx.num_compute_rings; i++)
- if (adev->gfx.compute_ring[i].sched.ready)
+ if (adev->gfx.compute_ring[i].sched.ready &&
+ !adev->gfx.compute_ring[i].no_user_submission)
++num_rings;
+
+ if (!adev->sdma.disable_uq) {
+ for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++)
+ num_slots += hweight32(adev->mes.compute_hqd_mask[i]);
+ }
+
ib_start_alignment = 32;
ib_size_alignment = 32;
break;
case AMDGPU_HW_IP_DMA:
type = AMD_IP_BLOCK_TYPE_SDMA;
for (i = 0; i < adev->sdma.num_instances; i++)
- if (adev->sdma.instance[i].ring.sched.ready)
+ if (adev->sdma.instance[i].ring.sched.ready &&
+ !adev->sdma.instance[i].ring.no_user_submission)
++num_rings;
+
+ if (!adev->gfx.disable_uq) {
+ for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++)
+ num_slots += hweight32(adev->mes.sdma_hqd_mask[i]);
+ }
+
ib_start_alignment = 256;
ib_size_alignment = 4;
break;
@@ -450,19 +457,21 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->uvd.harvest_config & (1 << i))
continue;
- if (adev->uvd.inst[i].ring.sched.ready)
+ if (adev->uvd.inst[i].ring.sched.ready &&
+ !adev->uvd.inst[i].ring.no_user_submission)
++num_rings;
}
- ib_start_alignment = 64;
+ ib_start_alignment = 256;
ib_size_alignment = 64;
break;
case AMDGPU_HW_IP_VCE:
type = AMD_IP_BLOCK_TYPE_VCE;
for (i = 0; i < adev->vce.num_rings; i++)
- if (adev->vce.ring[i].sched.ready)
+ if (adev->vce.ring[i].sched.ready &&
+ !adev->vce.ring[i].no_user_submission)
++num_rings;
- ib_start_alignment = 4;
- ib_size_alignment = 1;
+ ib_start_alignment = 256;
+ ib_size_alignment = 4;
break;
case AMDGPU_HW_IP_UVD_ENC:
type = AMD_IP_BLOCK_TYPE_UVD;
@@ -471,36 +480,39 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
continue;
for (j = 0; j < adev->uvd.num_enc_rings; j++)
- if (adev->uvd.inst[i].ring_enc[j].sched.ready)
+ if (adev->uvd.inst[i].ring_enc[j].sched.ready &&
+ !adev->uvd.inst[i].ring_enc[j].no_user_submission)
++num_rings;
}
- ib_start_alignment = 64;
- ib_size_alignment = 64;
+ ib_start_alignment = 256;
+ ib_size_alignment = 4;
break;
case AMDGPU_HW_IP_VCN_DEC:
type = AMD_IP_BLOCK_TYPE_VCN;
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- if (adev->uvd.harvest_config & (1 << i))
+ if (adev->vcn.harvest_config & (1 << i))
continue;
- if (adev->vcn.inst[i].ring_dec.sched.ready)
+ if (adev->vcn.inst[i].ring_dec.sched.ready &&
+ !adev->vcn.inst[i].ring_dec.no_user_submission)
++num_rings;
}
- ib_start_alignment = 16;
- ib_size_alignment = 16;
+ ib_start_alignment = 256;
+ ib_size_alignment = 64;
break;
case AMDGPU_HW_IP_VCN_ENC:
type = AMD_IP_BLOCK_TYPE_VCN;
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- if (adev->uvd.harvest_config & (1 << i))
+ if (adev->vcn.harvest_config & (1 << i))
continue;
- for (j = 0; j < adev->vcn.num_enc_rings; j++)
- if (adev->vcn.inst[i].ring_enc[j].sched.ready)
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; j++)
+ if (adev->vcn.inst[i].ring_enc[j].sched.ready &&
+ !adev->vcn.inst[i].ring_enc[j].no_user_submission)
++num_rings;
}
- ib_start_alignment = 64;
- ib_size_alignment = 1;
+ ib_start_alignment = 256;
+ ib_size_alignment = 4;
break;
case AMDGPU_HW_IP_VCN_JPEG:
type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ?
@@ -510,11 +522,21 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->jpeg.harvest_config & (1 << i))
continue;
- if (adev->jpeg.inst[i].ring_dec.sched.ready)
- ++num_rings;
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; j++)
+ if (adev->jpeg.inst[i].ring_dec[j].sched.ready &&
+ !adev->jpeg.inst[i].ring_dec[j].no_user_submission)
+ ++num_rings;
}
- ib_start_alignment = 16;
- ib_size_alignment = 16;
+ ib_start_alignment = 256;
+ ib_size_alignment = 64;
+ break;
+ case AMDGPU_HW_IP_VPE:
+ type = AMD_IP_BLOCK_TYPE_VPE;
+ if (adev->vpe.ring.sched.ready &&
+ !adev->vpe.ring.no_user_submission)
+ ++num_rings;
+ ib_start_alignment = 256;
+ ib_size_alignment = 4;
break;
default:
return -EINVAL;
@@ -533,8 +555,41 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
result->hw_ip_version_major = adev->ip_blocks[i].version->major;
result->hw_ip_version_minor = adev->ip_blocks[i].version->minor;
+
+ if (adev->asic_type >= CHIP_VEGA10) {
+ switch (type) {
+ case AMD_IP_BLOCK_TYPE_GFX:
+ result->ip_discovery_version =
+ IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, GC_HWIP, 0));
+ break;
+ case AMD_IP_BLOCK_TYPE_SDMA:
+ result->ip_discovery_version =
+ IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, SDMA0_HWIP, 0));
+ break;
+ case AMD_IP_BLOCK_TYPE_UVD:
+ case AMD_IP_BLOCK_TYPE_VCN:
+ case AMD_IP_BLOCK_TYPE_JPEG:
+ result->ip_discovery_version =
+ IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, UVD_HWIP, 0));
+ break;
+ case AMD_IP_BLOCK_TYPE_VCE:
+ result->ip_discovery_version =
+ IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, VCE_HWIP, 0));
+ break;
+ case AMD_IP_BLOCK_TYPE_VPE:
+ result->ip_discovery_version =
+ IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, VPE_HWIP, 0));
+ break;
+ default:
+ result->ip_discovery_version = 0;
+ break;
+ }
+ } else {
+ result->ip_discovery_version = 0;
+ }
result->capabilities_flags = 0;
result->available_rings = (1 << num_rings) - 1;
+ result->userq_num_slots = num_slots;
result->ib_start_alignment = ib_start_alignment;
result->ib_size_alignment = ib_size_alignment;
return 0;
@@ -561,11 +616,16 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
struct drm_amdgpu_info *info = data;
struct amdgpu_mode_info *minfo = &adev->mode_info;
void __user *out = (void __user *)(uintptr_t)info->return_pointer;
+ struct amdgpu_fpriv *fpriv;
+ struct amdgpu_ip_block *ip_block;
+ enum amd_ip_block_type type;
+ struct amdgpu_xcp *xcp;
+ u32 count, inst_mask;
uint32_t size = info->return_size;
struct drm_crtc *crtc;
uint32_t ui32 = 0;
uint64_t ui64 = 0;
- int i, found;
+ int i, found, ret;
int ui32_size = sizeof(ui32);
if (!info->return_size || !info->return_pointer)
@@ -580,6 +640,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
crtc = (struct drm_crtc *)minfo->crtcs[i];
if (crtc && crtc->base.id == info->mode_crtc.id) {
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
ui32 = amdgpu_crtc->crtc_id;
found = 1;
break;
@@ -592,56 +653,82 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0;
case AMDGPU_INFO_HW_IP_INFO: {
struct drm_amdgpu_info_hw_ip ip = {};
- int ret;
ret = amdgpu_hw_ip_info(adev, info, &ip);
if (ret)
return ret;
- ret = copy_to_user(out, &ip, min((size_t)size, sizeof(ip)));
+ ret = copy_to_user(out, &ip, min_t(size_t, size, sizeof(ip)));
return ret ? -EFAULT : 0;
}
case AMDGPU_INFO_HW_IP_COUNT: {
- enum amd_ip_block_type type;
- uint32_t count = 0;
+ fpriv = (struct amdgpu_fpriv *)filp->driver_priv;
+ type = amdgpu_ip_get_block_type(adev, info->query_hw_ip.type);
+ ip_block = amdgpu_device_ip_get_ip_block(adev, type);
- switch (info->query_hw_ip.type) {
- case AMDGPU_HW_IP_GFX:
- type = AMD_IP_BLOCK_TYPE_GFX;
- break;
- case AMDGPU_HW_IP_COMPUTE:
- type = AMD_IP_BLOCK_TYPE_GFX;
- break;
- case AMDGPU_HW_IP_DMA:
- type = AMD_IP_BLOCK_TYPE_SDMA;
- break;
- case AMDGPU_HW_IP_UVD:
- type = AMD_IP_BLOCK_TYPE_UVD;
+ if (!ip_block || !ip_block->status.valid)
+ return -EINVAL;
+
+ if (adev->xcp_mgr && adev->xcp_mgr->num_xcps > 0 &&
+ fpriv->xcp_id < adev->xcp_mgr->num_xcps) {
+ xcp = &adev->xcp_mgr->xcp[fpriv->xcp_id];
+ switch (type) {
+ case AMD_IP_BLOCK_TYPE_GFX:
+ ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &inst_mask);
+ if (ret)
+ return ret;
+ count = hweight32(inst_mask);
+ break;
+ case AMD_IP_BLOCK_TYPE_SDMA:
+ ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_SDMA, &inst_mask);
+ if (ret)
+ return ret;
+ count = hweight32(inst_mask);
+ break;
+ case AMD_IP_BLOCK_TYPE_JPEG:
+ ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_VCN, &inst_mask);
+ if (ret)
+ return ret;
+ count = hweight32(inst_mask) * adev->jpeg.num_jpeg_rings;
+ break;
+ case AMD_IP_BLOCK_TYPE_VCN:
+ ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_VCN, &inst_mask);
+ if (ret)
+ return ret;
+ count = hweight32(inst_mask);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return copy_to_user(out, &count, min(size, 4u)) ? -EFAULT : 0;
+ }
+
+ switch (type) {
+ case AMD_IP_BLOCK_TYPE_GFX:
+ case AMD_IP_BLOCK_TYPE_VCE:
+ count = 1;
break;
- case AMDGPU_HW_IP_VCE:
- type = AMD_IP_BLOCK_TYPE_VCE;
+ case AMD_IP_BLOCK_TYPE_SDMA:
+ count = adev->sdma.num_instances;
break;
- case AMDGPU_HW_IP_UVD_ENC:
- type = AMD_IP_BLOCK_TYPE_UVD;
+ case AMD_IP_BLOCK_TYPE_JPEG:
+ count = adev->jpeg.num_jpeg_inst * adev->jpeg.num_jpeg_rings;
break;
- case AMDGPU_HW_IP_VCN_DEC:
- case AMDGPU_HW_IP_VCN_ENC:
- type = AMD_IP_BLOCK_TYPE_VCN;
+ case AMD_IP_BLOCK_TYPE_VCN:
+ count = adev->vcn.num_vcn_inst;
break;
- case AMDGPU_HW_IP_VCN_JPEG:
- type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ?
- AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN;
+ case AMD_IP_BLOCK_TYPE_UVD:
+ count = adev->uvd.num_uvd_inst;
break;
+ /* For all other IP block types not listed in the switch statement
+ * the ip status is valid here and the instance count is one.
+ */
default:
- return -EINVAL;
+ count = 1;
+ break;
}
- for (i = 0; i < adev->num_ip_blocks; i++)
- if (adev->ip_blocks[i].version->type == type &&
- adev->ip_blocks[i].status.valid &&
- count < AMDGPU_HW_IP_INSTANCE_MAX_COUNT)
- count++;
-
return copy_to_user(out, &count, min(size, 4u)) ? -EFAULT : 0;
}
case AMDGPU_INFO_TIMESTAMP:
@@ -649,7 +736,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_FW_VERSION: {
struct drm_amdgpu_info_firmware fw_info;
- int ret;
/* We only support one instance of each IP block right now. */
if (info->query_fw.ip_instance != 0)
@@ -672,13 +758,14 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
ui64 = atomic64_read(&adev->num_vram_cpu_page_faults);
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_VRAM_USAGE:
- ui64 = amdgpu_vram_mgr_usage(ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM));
+ ui64 = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ?
+ ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) : 0;
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_VIS_VRAM_USAGE:
- ui64 = amdgpu_vram_mgr_vis_usage(ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM));
+ ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_GTT_USAGE:
- ui64 = amdgpu_gtt_mgr_usage(ttm_manager_type(&adev->mman.bdev, TTM_PL_TT));
+ ui64 = ttm_resource_manager_usage(&adev->mman.gtt_mgr.manager);
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_GDS_CONFIG: {
struct drm_amdgpu_info_gds gds_info;
@@ -702,24 +789,24 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
atomic64_read(&adev->visible_pin_size),
vram_gtt.vram_size);
vram_gtt.gtt_size = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT)->size;
- vram_gtt.gtt_size *= PAGE_SIZE;
vram_gtt.gtt_size -= atomic64_read(&adev->gart_pin_size);
return copy_to_user(out, &vram_gtt,
min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0;
}
case AMDGPU_INFO_MEMORY: {
struct drm_amdgpu_memory_info mem;
- struct ttm_resource_manager *vram_man =
- ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
struct ttm_resource_manager *gtt_man =
- ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
+ &adev->mman.gtt_mgr.manager;
+ struct ttm_resource_manager *vram_man =
+ &adev->mman.vram_mgr.manager;
+
memset(&mem, 0, sizeof(mem));
mem.vram.total_heap_size = adev->gmc.real_vram_size;
mem.vram.usable_heap_size = adev->gmc.real_vram_size -
atomic64_read(&adev->vram_pin_size) -
AMDGPU_VM_RESERVED_VRAM;
- mem.vram.heap_usage =
- amdgpu_vram_mgr_usage(vram_man);
+ mem.vram.heap_usage = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ?
+ ttm_resource_manager_usage(vram_man) : 0;
mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;
mem.cpu_accessible_vram.total_heap_size =
@@ -729,16 +816,14 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
atomic64_read(&adev->visible_pin_size),
mem.vram.usable_heap_size);
mem.cpu_accessible_vram.heap_usage =
- amdgpu_vram_mgr_vis_usage(vram_man);
+ amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
mem.cpu_accessible_vram.max_allocation =
mem.cpu_accessible_vram.usable_heap_size * 3 / 4;
mem.gtt.total_heap_size = gtt_man->size;
- mem.gtt.total_heap_size *= PAGE_SIZE;
mem.gtt.usable_heap_size = mem.gtt.total_heap_size -
atomic64_read(&adev->gart_pin_size);
- mem.gtt.heap_usage =
- amdgpu_gtt_mgr_usage(gtt_man);
+ mem.gtt.heap_usage = ttm_resource_manager_usage(gtt_man);
mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4;
return copy_to_user(out, &mem,
@@ -746,32 +831,47 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
? -EFAULT : 0;
}
case AMDGPU_INFO_READ_MMR_REG: {
- unsigned n, alloc_size;
+ int ret = 0;
+ unsigned int n, alloc_size;
uint32_t *regs;
- unsigned se_num = (info->read_mmr_reg.instance >>
+ unsigned int se_num = (info->read_mmr_reg.instance >>
AMDGPU_INFO_MMR_SE_INDEX_SHIFT) &
AMDGPU_INFO_MMR_SE_INDEX_MASK;
- unsigned sh_num = (info->read_mmr_reg.instance >>
+ unsigned int sh_num = (info->read_mmr_reg.instance >>
AMDGPU_INFO_MMR_SH_INDEX_SHIFT) &
AMDGPU_INFO_MMR_SH_INDEX_MASK;
+ if (!down_read_trylock(&adev->reset_domain->sem))
+ return -ENOENT;
+
/* set full masks if the userspace set all bits
- * in the bitfields */
- if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK)
+ * in the bitfields
+ */
+ if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK) {
se_num = 0xffffffff;
- else if (se_num >= AMDGPU_GFX_MAX_SE)
- return -EINVAL;
- if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK)
+ } else if (se_num >= AMDGPU_GFX_MAX_SE) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) {
sh_num = 0xffffffff;
- else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE)
- return -EINVAL;
+ } else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE) {
+ ret = -EINVAL;
+ goto out;
+ }
- if (info->read_mmr_reg.count > 128)
- return -EINVAL;
+ if (info->read_mmr_reg.count > 128) {
+ ret = -EINVAL;
+ goto out;
+ }
regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL);
- if (!regs)
- return -ENOMEM;
+ if (!regs) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
alloc_size = info->read_mmr_reg.count * sizeof(*regs);
amdgpu_gfx_off_ctrl(adev, false);
@@ -783,18 +883,22 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
info->read_mmr_reg.dword_offset + i);
kfree(regs);
amdgpu_gfx_off_ctrl(adev, true);
- return -EFAULT;
+ ret = -EFAULT;
+ goto out;
}
}
amdgpu_gfx_off_ctrl(adev, true);
n = copy_to_user(out, regs, min(size, alloc_size));
kfree(regs);
- return n ? -EFAULT : 0;
+ ret = (n ? -EFAULT : 0);
+out:
+ up_read(&adev->reset_domain->sem);
+ return ret;
}
case AMDGPU_INFO_DEV_INFO: {
struct drm_amdgpu_info_device *dev_info;
uint64_t vm_size;
- int ret;
+ uint32_t pcie_gen_mask, pcie_width_mask;
dev_info = kzalloc(sizeof(*dev_info), GFP_KERNEL);
if (!dev_info)
@@ -812,32 +916,52 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (adev->pm.dpm_enabled) {
dev_info->max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 10;
dev_info->max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 10;
+ dev_info->min_engine_clock = amdgpu_dpm_get_sclk(adev, true) * 10;
+ dev_info->min_memory_clock = amdgpu_dpm_get_mclk(adev, true) * 10;
} else {
- dev_info->max_engine_clock = adev->clock.default_sclk * 10;
- dev_info->max_memory_clock = adev->clock.default_mclk * 10;
+ dev_info->max_engine_clock =
+ dev_info->min_engine_clock =
+ adev->clock.default_sclk * 10;
+ dev_info->max_memory_clock =
+ dev_info->min_memory_clock =
+ adev->clock.default_mclk * 10;
}
dev_info->enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask;
dev_info->num_rb_pipes = adev->gfx.config.max_backends_per_se *
adev->gfx.config.max_shader_engines;
dev_info->num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts;
- dev_info->_pad = 0;
dev_info->ids_flags = 0;
if (adev->flags & AMD_IS_APU)
dev_info->ids_flags |= AMDGPU_IDS_FLAGS_FUSION;
- if (amdgpu_mcbp || amdgpu_sriov_vf(adev))
+ if (adev->gfx.mcbp)
dev_info->ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION;
if (amdgpu_is_tmz(adev))
dev_info->ids_flags |= AMDGPU_IDS_FLAGS_TMZ;
+ if (adev->gfx.config.ta_cntl2_truncate_coord_mode)
+ dev_info->ids_flags |= AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD;
+
+ /* Gang submit is not supported under SRIOV currently */
+ if (!amdgpu_sriov_vf(adev))
+ dev_info->ids_flags |= AMDGPU_IDS_FLAGS_GANG_SUBMIT;
+
+ if (amdgpu_passthrough(adev))
+ dev_info->ids_flags |= (AMDGPU_IDS_FLAGS_MODE_PT <<
+ AMDGPU_IDS_FLAGS_MODE_SHIFT) &
+ AMDGPU_IDS_FLAGS_MODE_MASK;
+ else if (amdgpu_sriov_vf(adev))
+ dev_info->ids_flags |= (AMDGPU_IDS_FLAGS_MODE_VF <<
+ AMDGPU_IDS_FLAGS_MODE_SHIFT) &
+ AMDGPU_IDS_FLAGS_MODE_MASK;
vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
- vm_size -= AMDGPU_VA_RESERVED_SIZE;
+ vm_size -= AMDGPU_VA_RESERVED_TOP;
/* Older VCE FW versions are buggy and can handle only 40bits */
if (adev->vce.fw_version &&
adev->vce.fw_version < AMDGPU_VCE_FW_53_45)
vm_size = min(vm_size, 1ULL << 40);
- dev_info->virtual_address_offset = AMDGPU_VA_RESERVED_SIZE;
+ dev_info->virtual_address_offset = AMDGPU_VA_RESERVED_BOTTOM;
dev_info->virtual_address_max =
min(vm_size, AMDGPU_GMC_HOLE_START);
@@ -854,7 +978,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
memcpy(&dev_info->cu_ao_bitmap[0], &adev->gfx.cu_info.ao_cu_bitmap[0],
sizeof(adev->gfx.cu_info.ao_cu_bitmap));
memcpy(&dev_info->cu_bitmap[0], &adev->gfx.cu_info.bitmap[0],
- sizeof(adev->gfx.cu_info.bitmap));
+ sizeof(dev_info->cu_bitmap));
dev_info->vram_type = adev->gmc.vram_type;
dev_info->vram_bit_width = adev->gmc.vram_width;
dev_info->vce_harvest_config = adev->vce.harvest_config;
@@ -874,13 +998,51 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
dev_info->tcc_disabled_mask = adev->gfx.config.tcc_disabled_mask;
+ /* Combine the chip gen mask with the platform (CPU/mobo) mask. */
+ pcie_gen_mask = adev->pm.pcie_gen_mask &
+ (adev->pm.pcie_gen_mask >> CAIL_PCIE_LINK_SPEED_SUPPORT_SHIFT);
+ pcie_width_mask = adev->pm.pcie_mlw_mask &
+ (adev->pm.pcie_mlw_mask >> CAIL_PCIE_LINK_WIDTH_SUPPORT_SHIFT);
+ dev_info->pcie_gen = fls(pcie_gen_mask);
+ dev_info->pcie_num_lanes =
+ pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 ? 32 :
+ pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 ? 16 :
+ pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 ? 12 :
+ pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 ? 8 :
+ pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 ? 4 :
+ pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 ? 2 : 1;
+
+ dev_info->tcp_cache_size = adev->gfx.config.gc_tcp_l1_size;
+ dev_info->num_sqc_per_wgp = adev->gfx.config.gc_num_sqc_per_wgp;
+ dev_info->sqc_data_cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
+ dev_info->sqc_inst_cache_size = adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
+ dev_info->gl1c_cache_size = adev->gfx.config.gc_gl1c_size_per_instance *
+ adev->gfx.config.gc_gl1c_per_sa;
+ dev_info->gl2c_cache_size = adev->gfx.config.gc_gl2c_per_gpu;
+ dev_info->mall_size = adev->gmc.mall_size;
+
+
+ if (adev->gfx.funcs->get_gfx_shadow_info) {
+ struct amdgpu_gfx_shadow_info shadow_info;
+
+ ret = amdgpu_gfx_get_gfx_shadow_info(adev, &shadow_info);
+ if (!ret) {
+ dev_info->shadow_size = shadow_info.shadow_size;
+ dev_info->shadow_alignment = shadow_info.shadow_alignment;
+ dev_info->csa_size = shadow_info.csa_size;
+ dev_info->csa_alignment = shadow_info.csa_alignment;
+ }
+ }
+
+ dev_info->userq_ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+
ret = copy_to_user(out, dev_info,
min((size_t)size, sizeof(*dev_info))) ? -EFAULT : 0;
kfree(dev_info);
return ret;
}
case AMDGPU_INFO_VCE_CLOCK_TABLE: {
- unsigned i;
+ unsigned int i;
struct drm_amdgpu_info_vce_clock_table vce_clk_table = {};
struct amd_vce_state *vce_state;
@@ -922,12 +1084,17 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
struct atom_context *atom_context;
atom_context = adev->mode_info.atom_context;
- memcpy(vbios_info.name, atom_context->name, sizeof(atom_context->name));
- memcpy(vbios_info.vbios_pn, atom_context->vbios_pn, sizeof(atom_context->vbios_pn));
- vbios_info.version = atom_context->version;
- memcpy(vbios_info.vbios_ver_str, atom_context->vbios_ver_str,
- sizeof(atom_context->vbios_ver_str));
- memcpy(vbios_info.date, atom_context->date, sizeof(atom_context->date));
+ if (atom_context) {
+ memcpy(vbios_info.name, atom_context->name,
+ sizeof(atom_context->name));
+ memcpy(vbios_info.vbios_pn, atom_context->vbios_pn,
+ sizeof(atom_context->vbios_pn));
+ vbios_info.version = atom_context->version;
+ memcpy(vbios_info.vbios_ver_str, atom_context->vbios_ver_str,
+ sizeof(atom_context->vbios_ver_str));
+ memcpy(vbios_info.date, atom_context->date,
+ sizeof(atom_context->date));
+ }
return copy_to_user(out, &vbios_info,
min((size_t)size, sizeof(vbios_info))) ? -EFAULT : 0;
@@ -1001,7 +1168,21 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
case AMDGPU_INFO_SENSOR_GPU_AVG_POWER:
/* get average GPU power */
if (amdgpu_dpm_read_sensor(adev,
- AMDGPU_PP_SENSOR_GPU_POWER,
+ AMDGPU_PP_SENSOR_GPU_AVG_POWER,
+ (void *)&ui32, &ui32_size)) {
+ /* fall back to input power for backwards compat */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_GPU_INPUT_POWER,
+ (void *)&ui32, &ui32_size)) {
+ return -EINVAL;
+ }
+ }
+ ui32 >>= 8;
+ break;
+ case AMDGPU_INFO_SENSOR_GPU_INPUT_POWER:
+ /* get input GPU power */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_GPU_INPUT_POWER,
(void *)&ui32, &ui32_size)) {
return -EINVAL;
}
@@ -1041,6 +1222,24 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
}
ui32 /= 100;
break;
+ case AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_SCLK:
+ /* get peak pstate sclk in Mhz */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_PEAK_PSTATE_SCLK,
+ (void *)&ui32, &ui32_size)) {
+ return -EINVAL;
+ }
+ ui32 /= 100;
+ break;
+ case AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_MCLK:
+ /* get peak pstate mclk in Mhz */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_PEAK_PSTATE_MCLK,
+ (void *)&ui32, &ui32_size)) {
+ return -EINVAL;
+ }
+ ui32 /= 100;
+ break;
default:
DRM_DEBUG_KMS("Invalid request %d\n",
info->sensor_info.type);
@@ -1068,6 +1267,9 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
struct drm_amdgpu_info_video_caps *caps;
int r;
+ if (!adev->asic_funcs->query_video_codecs)
+ return -EINVAL;
+
switch (info->video_cap.type) {
case AMDGPU_INFO_VIDEO_CAPS_DECODE:
r = amdgpu_asic_query_video_codecs(adev, false, &codecs);
@@ -1120,6 +1322,51 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
kfree(caps);
return r;
}
+ case AMDGPU_INFO_MAX_IBS: {
+ uint32_t max_ibs[AMDGPU_HW_IP_NUM];
+
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
+ max_ibs[i] = amdgpu_ring_max_ibs(i);
+
+ return copy_to_user(out, max_ibs,
+ min((size_t)size, sizeof(max_ibs))) ? -EFAULT : 0;
+ }
+ case AMDGPU_INFO_GPUVM_FAULT: {
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
+ struct drm_amdgpu_info_gpuvm_fault gpuvm_fault;
+ unsigned long flags;
+
+ if (!vm)
+ return -EINVAL;
+
+ memset(&gpuvm_fault, 0, sizeof(gpuvm_fault));
+
+ xa_lock_irqsave(&adev->vm_manager.pasids, flags);
+ gpuvm_fault.addr = vm->fault_info.addr;
+ gpuvm_fault.status = vm->fault_info.status;
+ gpuvm_fault.vmhub = vm->fault_info.vmhub;
+ xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
+
+ return copy_to_user(out, &gpuvm_fault,
+ min((size_t)size, sizeof(gpuvm_fault))) ? -EFAULT : 0;
+ }
+ case AMDGPU_INFO_UQ_FW_AREAS: {
+ struct drm_amdgpu_info_uq_metadata meta_info = {};
+
+ switch (info->query_hw_ip.type) {
+ case AMDGPU_HW_IP_GFX:
+ ret = amdgpu_userq_metadata_info_gfx(adev, info, &meta_info.gfx);
+ if (ret)
+ return ret;
+
+ ret = copy_to_user(out, &meta_info,
+ min((size_t)size, sizeof(meta_info))) ? -EFAULT : 0;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+ }
default:
DRM_DEBUG_KMS("Invalid request %d\n", info->query);
return -EINVAL;
@@ -1127,23 +1374,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
return 0;
}
-
-/*
- * Outdated mess for old drm with Xorg being in charge (void function now).
- */
-/**
- * amdgpu_driver_lastclose_kms - drm callback for last close
- *
- * @dev: drm dev pointer
- *
- * Switch vga_switcheroo state after last close (all asics).
- */
-void amdgpu_driver_lastclose_kms(struct drm_device *dev)
-{
- drm_fb_helper_lastclose(dev);
- vga_switcheroo_process_delayed_switch();
-}
-
/**
* amdgpu_driver_open_kms - drm callback for open
*
@@ -1186,13 +1416,15 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
pasid = 0;
}
- r = amdgpu_vm_init(adev, &fpriv->vm);
+ r = amdgpu_xcp_open_device(adev, fpriv, file_priv);
if (r)
goto error_pasid;
- r = amdgpu_vm_set_pasid(adev, &fpriv->vm, pasid);
+ amdgpu_debugfs_vm_init(file_priv);
+
+ r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id, pasid);
if (r)
- goto error_vm;
+ goto error_pasid;
fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL);
if (!fpriv->prt_va) {
@@ -1200,7 +1432,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
goto error_vm;
}
- if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
+ if (adev->gfx.mcbp) {
uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
r = amdgpu_map_static_csa(adev, &fpriv->vm, adev->virt.csa_obj,
@@ -1209,10 +1441,22 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
goto error_vm;
}
+ r = amdgpu_seq64_map(adev, &fpriv->vm, &fpriv->seq64_va);
+ if (r)
+ goto error_vm;
+
mutex_init(&fpriv->bo_list_lock);
- idr_init(&fpriv->bo_list_handles);
+ idr_init_base(&fpriv->bo_list_handles, 1);
+
+ r = amdgpu_userq_mgr_init(&fpriv->userq_mgr, file_priv, adev);
+ if (r)
+ DRM_WARN("Can't setup usermode queues, use legacy workload submission only\n");
- amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
+ r = amdgpu_eviction_fence_init(&fpriv->evf_mgr);
+ if (r)
+ goto error_vm;
+
+ amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev);
file_priv->driver_priv = fpriv;
goto out_suspend;
@@ -1221,15 +1465,12 @@ error_vm:
amdgpu_vm_fini(adev, &fpriv->vm);
error_pasid:
- if (pasid) {
+ if (pasid)
amdgpu_pasid_free(pasid);
- amdgpu_vm_set_pasid(adev, &fpriv->vm, 0);
- }
kfree(fpriv);
out_suspend:
- pm_runtime_mark_last_busy(dev->dev);
pm_put:
pm_runtime_put_autosuspend(dev->dev);
@@ -1264,18 +1505,22 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE) != NULL)
amdgpu_vce_free_handles(adev, file_priv);
- amdgpu_vm_bo_rmv(adev, fpriv->prt_va);
+ if (fpriv->csa_va) {
+ uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
- if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
- /* TODO: how to handle reserve failure */
- BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true));
- amdgpu_vm_bo_rmv(adev, fpriv->csa_va);
+ WARN_ON(amdgpu_unmap_static_csa(adev, &fpriv->vm, adev->virt.csa_obj,
+ fpriv->csa_va, csa_addr));
fpriv->csa_va = NULL;
- amdgpu_bo_unreserve(adev->virt.csa_obj);
}
+ amdgpu_seq64_unmap(adev, fpriv);
+
pasid = fpriv->vm.pasid;
pd = amdgpu_bo_ref(fpriv->vm.root.bo);
+ if (!WARN_ON(amdgpu_bo_reserve(pd, true))) {
+ amdgpu_vm_bo_del(adev, fpriv->prt_va);
+ amdgpu_bo_unreserve(pd);
+ }
amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
amdgpu_vm_fini(adev, &fpriv->vm);
@@ -1293,7 +1538,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
kfree(fpriv);
file_priv->driver_priv = NULL;
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
}
@@ -1419,16 +1663,15 @@ void amdgpu_disable_vblank_kms(struct drm_crtc *crtc)
static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+ struct amdgpu_device *adev = m->private;
struct drm_amdgpu_info_firmware fw_info;
struct drm_amdgpu_query_fw query_fw;
struct atom_context *ctx = adev->mode_info.atom_context;
- uint8_t smu_minor, smu_debug;
- uint16_t smu_major;
+ uint8_t smu_program, smu_major, smu_minor, smu_debug;
int ret, i;
static const char *ta_fw_name[TA_FW_TYPE_MAX_INDEX] = {
-#define TA_FW_NAME(type) [TA_FW_TYPE_PSP_##type] = #type
+#define TA_FW_NAME(type)[TA_FW_TYPE_PSP_##type] = #type
TA_FW_NAME(XGMI),
TA_FW_NAME(RAS),
TA_FW_NAME(HDCP),
@@ -1518,6 +1761,22 @@ static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused)
seq_printf(m, "RLC SRLS feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
+ /* RLCP */
+ query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLCP;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "RLCP feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* RLCV */
+ query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLCV;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "RLCV feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
/* MEC */
query_fw.fw_type = AMDGPU_INFO_FW_GFX_MEC;
query_fw.index = 0;
@@ -1537,6 +1796,15 @@ static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused)
fw_info.feature, fw_info.ver);
}
+ /* IMU */
+ query_fw.fw_type = AMDGPU_INFO_FW_IMU;
+ query_fw.index = 0;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "IMU feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
/* PSP SOS */
query_fw.fw_type = AMDGPU_INFO_FW_SOS;
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
@@ -1570,11 +1838,12 @@ static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused)
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
if (ret)
return ret;
- smu_major = (fw_info.ver >> 16) & 0xffff;
+ smu_program = (fw_info.ver >> 24) & 0xff;
+ smu_major = (fw_info.ver >> 16) & 0xff;
smu_minor = (fw_info.ver >> 8) & 0xff;
smu_debug = (fw_info.ver >> 0) & 0xff;
- seq_printf(m, "SMC feature version: %u, firmware version: 0x%08x (%d.%d.%d)\n",
- fw_info.feature, fw_info.ver, smu_major, smu_minor, smu_debug);
+ seq_printf(m, "SMC feature version: %u, program: %d, firmware version: 0x%08x (%d.%d.%d)\n",
+ fw_info.feature, smu_program, fw_info.ver, smu_major, smu_minor, smu_debug);
/* SDMA */
query_fw.fw_type = AMDGPU_INFO_FW_SDMA;
@@ -1619,7 +1888,41 @@ static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused)
seq_printf(m, "TOC feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
- seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version);
+ /* CAP */
+ if (adev->psp.cap_fw) {
+ query_fw.fw_type = AMDGPU_INFO_FW_CAP;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "CAP feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+ }
+
+ /* MES_KIQ */
+ query_fw.fw_type = AMDGPU_INFO_FW_MES_KIQ;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "MES_KIQ feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* MES */
+ query_fw.fw_type = AMDGPU_INFO_FW_MES;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "MES feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* VPE */
+ query_fw.fw_type = AMDGPU_INFO_FW_VPE;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "VPE feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ seq_printf(m, "VBIOS version: %s\n", ctx->vbios_pn);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c
new file mode 100644
index 000000000000..4d1d4994ea3f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_lsdma.h"
+
+#define AMDGPU_LSDMA_MAX_SIZE 0x2000000ULL
+
+int amdgpu_lsdma_wait_for(struct amdgpu_device *adev,
+ uint32_t reg_index, uint32_t reg_val,
+ uint32_t mask)
+{
+ uint32_t val;
+ int i;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ val = RREG32(reg_index);
+ if ((val & mask) == reg_val)
+ return 0;
+ udelay(1);
+ }
+
+ return -ETIME;
+}
+
+int amdgpu_lsdma_copy_mem(struct amdgpu_device *adev,
+ uint64_t src_addr,
+ uint64_t dst_addr,
+ uint64_t mem_size)
+{
+ int ret;
+
+ if (mem_size == 0)
+ return -EINVAL;
+
+ while (mem_size > 0) {
+ uint64_t current_copy_size = min(mem_size, AMDGPU_LSDMA_MAX_SIZE);
+
+ ret = adev->lsdma.funcs->copy_mem(adev, src_addr, dst_addr, current_copy_size);
+ if (ret)
+ return ret;
+ src_addr += current_copy_size;
+ dst_addr += current_copy_size;
+ mem_size -= current_copy_size;
+ }
+
+ return 0;
+}
+
+int amdgpu_lsdma_fill_mem(struct amdgpu_device *adev,
+ uint64_t dst_addr,
+ uint32_t data,
+ uint64_t mem_size)
+{
+ int ret;
+
+ if (mem_size == 0)
+ return -EINVAL;
+
+ while (mem_size > 0) {
+ uint64_t current_fill_size = min(mem_size, AMDGPU_LSDMA_MAX_SIZE);
+
+ ret = adev->lsdma.funcs->fill_mem(adev, dst_addr, data, current_fill_size);
+ if (ret)
+ return ret;
+ dst_addr += current_fill_size;
+ mem_size -= current_fill_size;
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h
new file mode 100644
index 000000000000..c61ba58c5ee0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_LSDMA_H__
+#define __AMDGPU_LSDMA_H__
+
+struct amdgpu_lsdma {
+ const struct amdgpu_lsdma_funcs *funcs;
+};
+
+struct amdgpu_lsdma_funcs {
+ int (*copy_mem)(struct amdgpu_device *adev, uint64_t src_addr,
+ uint64_t dst_addr, uint64_t size);
+ int (*fill_mem)(struct amdgpu_device *adev, uint64_t dst_addr,
+ uint32_t data, uint64_t size);
+ void (*update_memory_power_gating)(struct amdgpu_device *adev, bool enable);
+};
+
+int amdgpu_lsdma_copy_mem(struct amdgpu_device *adev, uint64_t src_addr,
+ uint64_t dst_addr, uint64_t mem_size);
+int amdgpu_lsdma_fill_mem(struct amdgpu_device *adev, uint64_t dst_addr,
+ uint32_t data, uint64_t mem_size);
+int amdgpu_lsdma_wait_for(struct amdgpu_device *adev, uint32_t reg_index,
+ uint32_t reg_val, uint32_t mask);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
index ce538f4819f9..3ca03b5e0f91 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -27,6 +27,16 @@
#include "umc/umc_6_7_0_offset.h"
#include "umc/umc_6_7_0_sh_mask.h"
+static bool amdgpu_mca_is_deferred_error(struct amdgpu_device *adev,
+ uint64_t mc_status)
+{
+ if (adev->umc.ras->check_ecc_err_status)
+ return adev->umc.ras->check_ecc_err_status(adev,
+ AMDGPU_MCA_ERROR_TYPE_DE, &mc_status);
+
+ return false;
+}
+
void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev,
uint64_t mc_status_addr,
unsigned long *error_count)
@@ -71,47 +81,550 @@ void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev,
amdgpu_mca_reset_error_count(adev, mc_status_addr);
}
-int amdgpu_mca_ras_late_init(struct amdgpu_device *adev,
- struct amdgpu_mca_ras *mca_dev)
+int amdgpu_mca_mp0_ras_sw_init(struct amdgpu_device *adev)
{
- int r;
- struct ras_ih_if ih_info = {
- .cb = NULL,
- };
- struct ras_fs_if fs_info = {
- .sysfs_name = mca_dev->ras_funcs->sysfs_name,
- };
+ int err;
+ struct amdgpu_mca_ras_block *ras;
+
+ if (!adev->mca.mp0.ras)
+ return 0;
+
+ ras = adev->mca.mp0.ras;
+
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register mca.mp0 ras block!\n");
+ return err;
+ }
+
+ strcpy(ras->ras_block.ras_comm.name, "mca.mp0");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->mca.mp0.ras_if = &ras->ras_block.ras_comm;
+
+ return 0;
+}
+
+int amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err;
+ struct amdgpu_mca_ras_block *ras;
+
+ if (!adev->mca.mp1.ras)
+ return 0;
+
+ ras = adev->mca.mp1.ras;
+
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register mca.mp1 ras block!\n");
+ return err;
+ }
+
+ strcpy(ras->ras_block.ras_comm.name, "mca.mp1");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->mca.mp1.ras_if = &ras->ras_block.ras_comm;
+
+ return 0;
+}
+
+int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err;
+ struct amdgpu_mca_ras_block *ras;
+
+ if (!adev->mca.mpio.ras)
+ return 0;
+
+ ras = adev->mca.mpio.ras;
+
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register mca.mpio ras block!\n");
+ return err;
+ }
+
+ strcpy(ras->ras_block.ras_comm.name, "mca.mpio");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->mca.mpio.ras_if = &ras->ras_block.ras_comm;
+
+ return 0;
+}
+
+static void amdgpu_mca_bank_set_init(struct mca_bank_set *mca_set)
+{
+ if (!mca_set)
+ return;
+
+ memset(mca_set, 0, sizeof(*mca_set));
+ INIT_LIST_HEAD(&mca_set->list);
+}
+
+static int amdgpu_mca_bank_set_add_entry(struct mca_bank_set *mca_set, struct mca_bank_entry *entry)
+{
+ struct mca_bank_node *node;
+
+ if (!entry)
+ return -EINVAL;
+
+ node = kvzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
+ memcpy(&node->entry, entry, sizeof(*entry));
+
+ INIT_LIST_HEAD(&node->node);
+ list_add_tail(&node->node, &mca_set->list);
+
+ mca_set->nr_entries++;
+
+ return 0;
+}
+
+static int amdgpu_mca_bank_set_merge(struct mca_bank_set *mca_set, struct mca_bank_set *new)
+{
+ struct mca_bank_node *node;
+
+ list_for_each_entry(node, &new->list, node)
+ amdgpu_mca_bank_set_add_entry(mca_set, &node->entry);
+
+ return 0;
+}
+
+static void amdgpu_mca_bank_set_remove_node(struct mca_bank_set *mca_set, struct mca_bank_node *node)
+{
+ if (!node)
+ return;
+
+ list_del(&node->node);
+ kvfree(node);
+
+ mca_set->nr_entries--;
+}
+
+static void amdgpu_mca_bank_set_release(struct mca_bank_set *mca_set)
+{
+ struct mca_bank_node *node, *tmp;
+
+ if (list_empty(&mca_set->list))
+ return;
+
+ list_for_each_entry_safe(node, tmp, &mca_set->list, node)
+ amdgpu_mca_bank_set_remove_node(mca_set, node);
+}
+
+void amdgpu_mca_smu_init_funcs(struct amdgpu_device *adev, const struct amdgpu_mca_smu_funcs *mca_funcs)
+{
+ struct amdgpu_mca *mca = &adev->mca;
+
+ mca->mca_funcs = mca_funcs;
+}
+
+int amdgpu_mca_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_mca *mca = &adev->mca;
+ struct mca_bank_cache *mca_cache;
+ int i;
+
+ atomic_set(&mca->ue_update_flag, 0);
+
+ for (i = 0; i < ARRAY_SIZE(mca->mca_caches); i++) {
+ mca_cache = &mca->mca_caches[i];
+ mutex_init(&mca_cache->lock);
+ amdgpu_mca_bank_set_init(&mca_cache->mca_set);
+ }
+
+ return 0;
+}
+
+void amdgpu_mca_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_mca *mca = &adev->mca;
+ struct mca_bank_cache *mca_cache;
+ int i;
+
+ atomic_set(&mca->ue_update_flag, 0);
+
+ for (i = 0; i < ARRAY_SIZE(mca->mca_caches); i++) {
+ mca_cache = &mca->mca_caches[i];
+ amdgpu_mca_bank_set_release(&mca_cache->mca_set);
+ mutex_destroy(&mca_cache->lock);
+ }
+}
+
+int amdgpu_mca_reset(struct amdgpu_device *adev)
+{
+ amdgpu_mca_fini(adev);
+
+ return amdgpu_mca_init(adev);
+}
+
+int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable)
+{
+ const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+
+ if (mca_funcs && mca_funcs->mca_set_debug_mode)
+ return mca_funcs->mca_set_debug_mode(adev, enable);
+
+ return -EOPNOTSUPP;
+}
+
+static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, struct mca_bank_entry *entry,
+ struct ras_query_context *qctx)
+{
+ u64 event_id = qctx ? qctx->evid.event_id : RAS_EVENT_INVALID_ID;
+
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n");
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].STATUS=0x%016llx\n",
+ idx, entry->regs[MCA_REG_IDX_STATUS]);
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].ADDR=0x%016llx\n",
+ idx, entry->regs[MCA_REG_IDX_ADDR]);
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].MISC0=0x%016llx\n",
+ idx, entry->regs[MCA_REG_IDX_MISC0]);
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].IPID=0x%016llx\n",
+ idx, entry->regs[MCA_REG_IDX_IPID]);
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].SYND=0x%016llx\n",
+ idx, entry->regs[MCA_REG_IDX_SYND]);
+}
+
+static int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count)
+{
+ const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+
+ if (!count)
+ return -EINVAL;
+
+ if (mca_funcs && mca_funcs->mca_get_valid_mca_count)
+ return mca_funcs->mca_get_valid_mca_count(adev, type, count);
+
+ return -EOPNOTSUPP;
+}
+
+static int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
+ int idx, struct mca_bank_entry *entry)
+{
+ const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+ int count;
+
+ if (!mca_funcs || !mca_funcs->mca_get_mca_entry)
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case AMDGPU_MCA_ERROR_TYPE_UE:
+ count = mca_funcs->max_ue_count;
+ break;
+ case AMDGPU_MCA_ERROR_TYPE_CE:
+ count = mca_funcs->max_ce_count;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (idx >= count)
+ return -EINVAL;
+
+ return mca_funcs->mca_get_mca_entry(adev, type, idx, entry);
+}
+
+static bool amdgpu_mca_bank_should_update(struct amdgpu_device *adev, enum amdgpu_mca_error_type type)
+{
+ struct amdgpu_mca *mca = &adev->mca;
+ bool ret = true;
+
+ /*
+ * Because the UE Valid MCA count will only be cleared after reset,
+ * in order to avoid repeated counting of the error count,
+ * the aca bank is only updated once during the gpu recovery stage.
+ */
+ if (type == AMDGPU_MCA_ERROR_TYPE_UE) {
+ if (amdgpu_ras_intr_triggered())
+ ret = atomic_cmpxchg(&mca->ue_update_flag, 0, 1) == 0;
+ else
+ atomic_set(&mca->ue_update_flag, 0);
+ }
+
+ return ret;
+}
+
+static bool amdgpu_mca_bank_should_dump(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
+ struct mca_bank_entry *entry)
+{
+ bool ret;
+
+ switch (type) {
+ case AMDGPU_MCA_ERROR_TYPE_CE:
+ ret = amdgpu_mca_is_deferred_error(adev, entry->regs[MCA_REG_IDX_STATUS]);
+ break;
+ case AMDGPU_MCA_ERROR_TYPE_UE:
+ default:
+ ret = true;
+ break;
+ }
+
+ return ret;
+}
+
+static int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set,
+ struct ras_query_context *qctx)
+{
+ struct mca_bank_entry entry;
+ uint32_t count = 0, i;
+ int ret;
+
+ if (!mca_set)
+ return -EINVAL;
+
+ if (!amdgpu_mca_bank_should_update(adev, type))
+ return 0;
+
+ ret = amdgpu_mca_smu_get_valid_mca_count(adev, type, &count);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < count; i++) {
+ memset(&entry, 0, sizeof(entry));
+ ret = amdgpu_mca_smu_get_mca_entry(adev, type, i, &entry);
+ if (ret)
+ return ret;
- if (!mca_dev->ras_if) {
- mca_dev->ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
- if (!mca_dev->ras_if)
- return -ENOMEM;
- mca_dev->ras_if->block = mca_dev->ras_funcs->ras_block;
- mca_dev->ras_if->sub_block_index = mca_dev->ras_funcs->ras_sub_block;
- mca_dev->ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ amdgpu_mca_bank_set_add_entry(mca_set, &entry);
+
+ if (amdgpu_mca_bank_should_dump(adev, type, &entry))
+ amdgpu_mca_smu_mca_bank_dump(adev, i, &entry, qctx);
}
- ih_info.head = fs_info.head = *mca_dev->ras_if;
- r = amdgpu_ras_late_init(adev, mca_dev->ras_if,
- &fs_info, &ih_info);
- if (r || !amdgpu_ras_is_supported(adev, mca_dev->ras_if->block)) {
- kfree(mca_dev->ras_if);
- mca_dev->ras_if = NULL;
+
+ return 0;
+}
+
+static int amdgpu_mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+ enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count)
+{
+ const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+
+ if (!count || !entry)
+ return -EINVAL;
+
+ if (!mca_funcs || !mca_funcs->mca_parse_mca_error_count)
+ return -EOPNOTSUPP;
+
+ return mca_funcs->mca_parse_mca_error_count(adev, blk, type, entry, count);
+}
+
+static int amdgpu_mca_dispatch_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
+ struct mca_bank_set *mca_set, struct ras_err_data *err_data)
+{
+ struct amdgpu_smuio_mcm_config_info mcm_info;
+ struct mca_bank_node *node, *tmp;
+ struct mca_bank_entry *entry;
+ uint32_t count;
+ int ret;
+
+ if (!mca_set)
+ return -EINVAL;
+
+ if (!mca_set->nr_entries)
+ return 0;
+
+ list_for_each_entry_safe(node, tmp, &mca_set->list, node) {
+ entry = &node->entry;
+
+ count = 0;
+ ret = amdgpu_mca_smu_parse_mca_error_count(adev, blk, type, entry, &count);
+ if (ret && ret != -EOPNOTSUPP)
+ return ret;
+
+ if (!count)
+ continue;
+
+ memset(&mcm_info, 0, sizeof(mcm_info));
+
+ mcm_info.socket_id = entry->info.socket_id;
+ mcm_info.die_id = entry->info.aid;
+
+ if (type == AMDGPU_MCA_ERROR_TYPE_UE) {
+ amdgpu_ras_error_statistic_ue_count(err_data,
+ &mcm_info, (uint64_t)count);
+ } else {
+ if (amdgpu_mca_is_deferred_error(adev, entry->regs[MCA_REG_IDX_STATUS]))
+ amdgpu_ras_error_statistic_de_count(err_data,
+ &mcm_info, (uint64_t)count);
+ else
+ amdgpu_ras_error_statistic_ce_count(err_data,
+ &mcm_info, (uint64_t)count);
+ }
+
+ amdgpu_mca_bank_set_remove_node(mca_set, node);
}
- return r;
+ return 0;
+}
+
+static int amdgpu_mca_add_mca_set_to_cache(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, struct mca_bank_set *new)
+{
+ struct mca_bank_cache *mca_cache = &adev->mca.mca_caches[type];
+ int ret;
+
+ mutex_lock(&mca_cache->lock);
+ ret = amdgpu_mca_bank_set_merge(&mca_cache->mca_set, new);
+ mutex_unlock(&mca_cache->lock);
+
+ return ret;
}
-void amdgpu_mca_ras_fini(struct amdgpu_device *adev,
- struct amdgpu_mca_ras *mca_dev)
+int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
+ struct ras_err_data *err_data, struct ras_query_context *qctx)
{
- struct ras_ih_if ih_info = {
- .cb = NULL,
+ struct mca_bank_set mca_set;
+ struct mca_bank_cache *mca_cache = &adev->mca.mca_caches[type];
+ int ret;
+
+ amdgpu_mca_bank_set_init(&mca_set);
+
+ ret = amdgpu_mca_smu_get_mca_set(adev, type, &mca_set, qctx);
+ if (ret)
+ goto out_mca_release;
+
+ ret = amdgpu_mca_dispatch_mca_set(adev, blk, type, &mca_set, err_data);
+ if (ret)
+ goto out_mca_release;
+
+ /* add remain mca bank to mca cache */
+ if (mca_set.nr_entries) {
+ ret = amdgpu_mca_add_mca_set_to_cache(adev, type, &mca_set);
+ if (ret)
+ goto out_mca_release;
+ }
+
+ /* dispatch mca set again if mca cache has valid data */
+ mutex_lock(&mca_cache->lock);
+ if (mca_cache->mca_set.nr_entries)
+ ret = amdgpu_mca_dispatch_mca_set(adev, blk, type, &mca_cache->mca_set, err_data);
+ mutex_unlock(&mca_cache->lock);
+
+out_mca_release:
+ amdgpu_mca_bank_set_release(&mca_set);
+
+ return ret;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_mca_smu_debug_mode_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ int ret;
+
+ ret = amdgpu_ras_set_mca_debug_mode(adev, val ? true : false);
+ if (ret)
+ return ret;
+
+ dev_info(adev->dev, "amdgpu set smu mca debug mode %s success\n", val ? "on" : "off");
+
+ return 0;
+}
+
+static void mca_dump_entry(struct seq_file *m, struct mca_bank_entry *entry)
+{
+ int i, idx = entry->idx;
+ int reg_idx_array[] = {
+ MCA_REG_IDX_STATUS,
+ MCA_REG_IDX_ADDR,
+ MCA_REG_IDX_MISC0,
+ MCA_REG_IDX_IPID,
+ MCA_REG_IDX_SYND,
};
- if (!mca_dev->ras_if)
+ seq_printf(m, "mca entry[%d].type: %s\n", idx, entry->type == AMDGPU_MCA_ERROR_TYPE_UE ? "UE" : "CE");
+ seq_printf(m, "mca entry[%d].ip: %d\n", idx, entry->ip);
+ seq_printf(m, "mca entry[%d].info: socketid:%d aid:%d hwid:0x%03x mcatype:0x%04x\n",
+ idx, entry->info.socket_id, entry->info.aid, entry->info.hwid, entry->info.mcatype);
+
+ for (i = 0; i < ARRAY_SIZE(reg_idx_array); i++)
+ seq_printf(m, "mca entry[%d].regs[%d]: 0x%016llx\n", idx, reg_idx_array[i], entry->regs[reg_idx_array[i]]);
+}
+
+static int mca_dump_show(struct seq_file *m, enum amdgpu_mca_error_type type)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+ struct mca_bank_node *node;
+ struct mca_bank_set mca_set;
+ struct ras_query_context qctx;
+ int ret;
+
+ amdgpu_mca_bank_set_init(&mca_set);
+
+ qctx.evid.event_id = RAS_EVENT_INVALID_ID;
+ ret = amdgpu_mca_smu_get_mca_set(adev, type, &mca_set, &qctx);
+ if (ret)
+ goto err_free_mca_set;
+
+ seq_printf(m, "amdgpu smu %s valid mca count: %d\n",
+ type == AMDGPU_MCA_ERROR_TYPE_UE ? "UE" : "CE", mca_set.nr_entries);
+
+ if (!mca_set.nr_entries)
+ goto err_free_mca_set;
+
+ list_for_each_entry(node, &mca_set.list, node)
+ mca_dump_entry(m, &node->entry);
+
+ /* add mca bank to mca bank cache */
+ ret = amdgpu_mca_add_mca_set_to_cache(adev, type, &mca_set);
+
+err_free_mca_set:
+ amdgpu_mca_bank_set_release(&mca_set);
+
+ return ret;
+}
+
+static int mca_dump_ce_show(struct seq_file *m, void *unused)
+{
+ return mca_dump_show(m, AMDGPU_MCA_ERROR_TYPE_CE);
+}
+
+static int mca_dump_ce_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, mca_dump_ce_show, inode->i_private);
+}
+
+static const struct file_operations mca_ce_dump_debug_fops = {
+ .owner = THIS_MODULE,
+ .open = mca_dump_ce_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int mca_dump_ue_show(struct seq_file *m, void *unused)
+{
+ return mca_dump_show(m, AMDGPU_MCA_ERROR_TYPE_UE);
+}
+
+static int mca_dump_ue_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, mca_dump_ue_show, inode->i_private);
+}
+
+static const struct file_operations mca_ue_dump_debug_fops = {
+ .owner = THIS_MODULE,
+ .open = mca_dump_ue_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+DEFINE_DEBUGFS_ATTRIBUTE(mca_debug_mode_fops, NULL, amdgpu_mca_smu_debug_mode_set, "%llu\n");
+#endif
+
+void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root)
+{
+#if defined(CONFIG_DEBUG_FS)
+ if (!root)
return;
- amdgpu_ras_late_fini(adev, mca_dev->ras_if, &ih_info);
- kfree(mca_dev->ras_if);
- mca_dev->ras_if = NULL;
-} \ No newline at end of file
+ debugfs_create_file("mca_debug_mode", 0200, root, adev, &mca_debug_mode_fops);
+ debugfs_create_file("mca_ue_dump", 0400, root, adev, &mca_ue_dump_debug_fops);
+ debugfs_create_file("mca_ce_dump", 0400, root, adev, &mca_ce_dump_debug_fops);
+#endif
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
index c74bc7177066..e80323ff90c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
@@ -21,32 +21,120 @@
#ifndef __AMDGPU_MCA_H__
#define __AMDGPU_MCA_H__
-struct amdgpu_mca_ras_funcs {
- int (*ras_late_init)(struct amdgpu_device *adev);
- void (*ras_fini)(struct amdgpu_device *adev);
- void (*query_ras_error_count)(struct amdgpu_device *adev,
- void *ras_error_status);
- void (*query_ras_error_address)(struct amdgpu_device *adev,
- void *ras_error_status);
- uint32_t ras_block;
- uint32_t ras_sub_block;
- const char* sysfs_name;
+#include "amdgpu_ras.h"
+
+#define MCA_MAX_REGS_COUNT (16)
+
+#define MCA_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> l)
+#define MCA_REG__STATUS__VAL(x) MCA_REG_FIELD(x, 63, 63)
+#define MCA_REG__STATUS__OVERFLOW(x) MCA_REG_FIELD(x, 62, 62)
+#define MCA_REG__STATUS__UC(x) MCA_REG_FIELD(x, 61, 61)
+#define MCA_REG__STATUS__EN(x) MCA_REG_FIELD(x, 60, 60)
+#define MCA_REG__STATUS__MISCV(x) MCA_REG_FIELD(x, 59, 59)
+#define MCA_REG__STATUS__ADDRV(x) MCA_REG_FIELD(x, 58, 58)
+#define MCA_REG__STATUS__PCC(x) MCA_REG_FIELD(x, 57, 57)
+#define MCA_REG__STATUS__ERRCOREIDVAL(x) MCA_REG_FIELD(x, 56, 56)
+#define MCA_REG__STATUS__TCC(x) MCA_REG_FIELD(x, 55, 55)
+#define MCA_REG__STATUS__SYNDV(x) MCA_REG_FIELD(x, 53, 53)
+#define MCA_REG__STATUS__CECC(x) MCA_REG_FIELD(x, 46, 46)
+#define MCA_REG__STATUS__UECC(x) MCA_REG_FIELD(x, 45, 45)
+#define MCA_REG__STATUS__DEFERRED(x) MCA_REG_FIELD(x, 44, 44)
+#define MCA_REG__STATUS__POISON(x) MCA_REG_FIELD(x, 43, 43)
+#define MCA_REG__STATUS__SCRUB(x) MCA_REG_FIELD(x, 40, 40)
+#define MCA_REG__STATUS__ERRCOREID(x) MCA_REG_FIELD(x, 37, 32)
+#define MCA_REG__STATUS__ADDRLSB(x) MCA_REG_FIELD(x, 29, 24)
+#define MCA_REG__STATUS__ERRORCODEEXT(x) MCA_REG_FIELD(x, 21, 16)
+#define MCA_REG__STATUS__ERRORCODE(x) MCA_REG_FIELD(x, 15, 0)
+
+#define MCA_REG__MISC0__ERRCNT(x) MCA_REG_FIELD(x, 43, 32)
+
+#define MCA_REG__SYND__ERRORINFORMATION(x) MCA_REG_FIELD(x, 17, 0)
+
+enum amdgpu_mca_ip {
+ AMDGPU_MCA_IP_UNKNOW = -1,
+ AMDGPU_MCA_IP_PSP = 0,
+ AMDGPU_MCA_IP_SDMA,
+ AMDGPU_MCA_IP_GC,
+ AMDGPU_MCA_IP_SMU,
+ AMDGPU_MCA_IP_MP5,
+ AMDGPU_MCA_IP_UMC,
+ AMDGPU_MCA_IP_PCS_XGMI,
+ AMDGPU_MCA_IP_COUNT,
+};
+
+enum amdgpu_mca_error_type {
+ AMDGPU_MCA_ERROR_TYPE_UE = 0,
+ AMDGPU_MCA_ERROR_TYPE_CE,
+ AMDGPU_MCA_ERROR_TYPE_DE,
+};
+
+struct amdgpu_mca_ras_block {
+ struct amdgpu_ras_block_object ras_block;
};
struct amdgpu_mca_ras {
struct ras_common_if *ras_if;
- const struct amdgpu_mca_ras_funcs *ras_funcs;
+ struct amdgpu_mca_ras_block *ras;
+};
+
+struct mca_bank_set {
+ int nr_entries;
+ struct list_head list;
};
-struct amdgpu_mca_funcs {
- void (*init)(struct amdgpu_device *adev);
+struct mca_bank_cache {
+ struct mca_bank_set mca_set;
+ struct mutex lock;
};
struct amdgpu_mca {
- const struct amdgpu_mca_funcs *funcs;
struct amdgpu_mca_ras mp0;
struct amdgpu_mca_ras mp1;
struct amdgpu_mca_ras mpio;
+ const struct amdgpu_mca_smu_funcs *mca_funcs;
+ struct mca_bank_cache mca_caches[AMDGPU_MCA_ERROR_TYPE_DE];
+ atomic_t ue_update_flag;
+};
+
+enum mca_reg_idx {
+ MCA_REG_IDX_STATUS = 1,
+ MCA_REG_IDX_ADDR = 2,
+ MCA_REG_IDX_MISC0 = 3,
+ MCA_REG_IDX_IPID = 5,
+ MCA_REG_IDX_SYND = 6,
+ MCA_REG_IDX_COUNT = 16,
+};
+
+struct mca_bank_info {
+ int socket_id;
+ int aid;
+ int hwid;
+ int mcatype;
+};
+
+struct mca_bank_entry {
+ int idx;
+ enum amdgpu_mca_error_type type;
+ enum amdgpu_mca_ip ip;
+ struct mca_bank_info info;
+ uint64_t regs[MCA_MAX_REGS_COUNT];
+};
+
+struct mca_bank_node {
+ struct mca_bank_entry entry;
+ struct list_head node;
+};
+
+struct amdgpu_mca_smu_funcs {
+ int max_ue_count;
+ int max_ce_count;
+ int (*mca_set_debug_mode)(struct amdgpu_device *adev, bool enable);
+ int (*mca_parse_mca_error_count)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
+ struct mca_bank_entry *entry, uint32_t *count);
+ int (*mca_get_valid_mca_count)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
+ uint32_t *count);
+ int (*mca_get_mca_entry)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
+ int idx, struct mca_bank_entry *entry);
};
void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev,
@@ -63,11 +151,19 @@ void amdgpu_mca_reset_error_count(struct amdgpu_device *adev,
void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev,
uint64_t mc_status_addr,
void *ras_error_status);
+int amdgpu_mca_mp0_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev);
-int amdgpu_mca_ras_late_init(struct amdgpu_device *adev,
- struct amdgpu_mca_ras *mca_dev);
-
-void amdgpu_mca_ras_fini(struct amdgpu_device *adev,
- struct amdgpu_mca_ras *mca_dev);
+void amdgpu_mca_smu_init_funcs(struct amdgpu_device *adev, const struct amdgpu_mca_smu_funcs *mca_funcs);
+int amdgpu_mca_init(struct amdgpu_device *adev);
+void amdgpu_mca_fini(struct amdgpu_device *adev);
+int amdgpu_mca_reset(struct amdgpu_device *adev);
+int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable);
+int amdgpu_mca_smu_get_mca_set_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+ enum amdgpu_mca_error_type type, uint32_t *total);
+void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root);
+int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
+ struct ras_err_data *err_data, struct ras_query_context *qctx);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
new file mode 100644
index 000000000000..9c182ce501af
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -0,0 +1,784 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <drm/drm_exec.h>
+
+#include "amdgpu_mes.h"
+#include "amdgpu.h"
+#include "soc15_common.h"
+#include "amdgpu_mes_ctx.h"
+
+#define AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
+#define AMDGPU_ONE_DOORBELL_SIZE 8
+
+int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev)
+{
+ return roundup(AMDGPU_ONE_DOORBELL_SIZE *
+ AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS,
+ PAGE_SIZE);
+}
+
+static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev)
+{
+ int i;
+ struct amdgpu_mes *mes = &adev->mes;
+
+ /* Bitmap for dynamic allocation of kernel doorbells */
+ mes->doorbell_bitmap = bitmap_zalloc(PAGE_SIZE / sizeof(u32), GFP_KERNEL);
+ if (!mes->doorbell_bitmap) {
+ dev_err(adev->dev, "Failed to allocate MES doorbell bitmap\n");
+ return -ENOMEM;
+ }
+
+ mes->num_mes_dbs = PAGE_SIZE / AMDGPU_ONE_DOORBELL_SIZE;
+ for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) {
+ adev->mes.aggregated_doorbells[i] = mes->db_start_dw_offset + i * 2;
+ set_bit(i, mes->doorbell_bitmap);
+ }
+
+ return 0;
+}
+
+static int amdgpu_mes_event_log_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (!amdgpu_mes_log_enable)
+ return 0;
+
+ r = amdgpu_bo_create_kernel(adev, adev->mes.event_log_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->mes.event_log_gpu_obj,
+ &adev->mes.event_log_gpu_addr,
+ &adev->mes.event_log_cpu_addr);
+ if (r) {
+ dev_warn(adev->dev, "failed to create MES event log buffer (%d)", r);
+ return r;
+ }
+
+ memset(adev->mes.event_log_cpu_addr, 0, adev->mes.event_log_size);
+
+ return 0;
+
+}
+
+static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
+{
+ bitmap_free(adev->mes.doorbell_bitmap);
+}
+
+int amdgpu_mes_init(struct amdgpu_device *adev)
+{
+ int i, r, num_pipes;
+
+ adev->mes.adev = adev;
+
+ idr_init(&adev->mes.pasid_idr);
+ idr_init(&adev->mes.gang_id_idr);
+ idr_init(&adev->mes.queue_id_idr);
+ ida_init(&adev->mes.doorbell_ida);
+ spin_lock_init(&adev->mes.queue_id_lock);
+ mutex_init(&adev->mes.mutex_hidden);
+
+ for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++)
+ spin_lock_init(&adev->mes.ring_lock[i]);
+
+ adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
+ adev->mes.vmid_mask_mmhub = 0xFF00;
+ adev->mes.vmid_mask_gfxhub = adev->gfx.disable_kq ? 0xFFFE : 0xFF00;
+
+ num_pipes = adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_me;
+ if (num_pipes > AMDGPU_MES_MAX_GFX_PIPES)
+ dev_warn(adev->dev, "more gfx pipes than supported by MES! (%d vs %d)\n",
+ num_pipes, AMDGPU_MES_MAX_GFX_PIPES);
+
+ for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) {
+ if (i >= num_pipes)
+ break;
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >=
+ IP_VERSION(12, 0, 0))
+ /*
+ * GFX V12 has only one GFX pipe, but 8 queues in it.
+ * GFX pipe 0 queue 0 is being used by Kernel queue.
+ * Set GFX pipe 0 queue 1-7 for MES scheduling
+ * mask = 1111 1110b
+ */
+ adev->mes.gfx_hqd_mask[i] = adev->gfx.disable_kq ? 0xFF : 0xFE;
+ else
+ /*
+ * GFX pipe 0 queue 0 is being used by Kernel queue.
+ * Set GFX pipe 0 queue 1 for MES scheduling
+ * mask = 10b
+ */
+ adev->mes.gfx_hqd_mask[i] = adev->gfx.disable_kq ? 0x3 : 0x2;
+ }
+
+ num_pipes = adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec;
+ if (num_pipes > AMDGPU_MES_MAX_COMPUTE_PIPES)
+ dev_warn(adev->dev, "more compute pipes than supported by MES! (%d vs %d)\n",
+ num_pipes, AMDGPU_MES_MAX_COMPUTE_PIPES);
+
+ for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) {
+ if (i >= num_pipes)
+ break;
+ adev->mes.compute_hqd_mask[i] = adev->gfx.disable_kq ? 0xF : 0xC;
+ }
+
+ num_pipes = adev->sdma.num_instances;
+ if (num_pipes > AMDGPU_MES_MAX_SDMA_PIPES)
+ dev_warn(adev->dev, "more SDMA pipes than supported by MES! (%d vs %d)\n",
+ num_pipes, AMDGPU_MES_MAX_SDMA_PIPES);
+
+ for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) {
+ if (i >= num_pipes)
+ break;
+ adev->mes.sdma_hqd_mask[i] = 0xfc;
+ }
+
+ for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) {
+ r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs[i]);
+ if (r) {
+ dev_err(adev->dev,
+ "(%d) ring trail_fence_offs wb alloc failed\n",
+ r);
+ goto error;
+ }
+ adev->mes.sch_ctx_gpu_addr[i] =
+ adev->wb.gpu_addr + (adev->mes.sch_ctx_offs[i] * 4);
+ adev->mes.sch_ctx_ptr[i] =
+ (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs[i]];
+
+ r = amdgpu_device_wb_get(adev,
+ &adev->mes.query_status_fence_offs[i]);
+ if (r) {
+ dev_err(adev->dev,
+ "(%d) query_status_fence_offs wb alloc failed\n",
+ r);
+ goto error;
+ }
+ adev->mes.query_status_fence_gpu_addr[i] = adev->wb.gpu_addr +
+ (adev->mes.query_status_fence_offs[i] * 4);
+ adev->mes.query_status_fence_ptr[i] =
+ (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs[i]];
+ }
+
+ r = amdgpu_mes_doorbell_init(adev);
+ if (r)
+ goto error;
+
+ r = amdgpu_mes_event_log_init(adev);
+ if (r)
+ goto error_doorbell;
+
+ if (adev->mes.hung_queue_db_array_size) {
+ r = amdgpu_bo_create_kernel(adev,
+ adev->mes.hung_queue_db_array_size * sizeof(u32),
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mes.hung_queue_db_array_gpu_obj,
+ &adev->mes.hung_queue_db_array_gpu_addr,
+ &adev->mes.hung_queue_db_array_cpu_addr);
+ if (r) {
+ dev_warn(adev->dev, "failed to create MES hung db array buffer (%d)", r);
+ goto error_doorbell;
+ }
+ }
+
+ return 0;
+
+error_doorbell:
+ amdgpu_mes_doorbell_free(adev);
+error:
+ for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) {
+ if (adev->mes.sch_ctx_ptr[i])
+ amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]);
+ if (adev->mes.query_status_fence_ptr[i])
+ amdgpu_device_wb_free(adev,
+ adev->mes.query_status_fence_offs[i]);
+ }
+
+ idr_destroy(&adev->mes.pasid_idr);
+ idr_destroy(&adev->mes.gang_id_idr);
+ idr_destroy(&adev->mes.queue_id_idr);
+ ida_destroy(&adev->mes.doorbell_ida);
+ mutex_destroy(&adev->mes.mutex_hidden);
+ return r;
+}
+
+void amdgpu_mes_fini(struct amdgpu_device *adev)
+{
+ int i;
+
+ amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj,
+ &adev->mes.hung_queue_db_array_gpu_addr,
+ &adev->mes.hung_queue_db_array_cpu_addr);
+
+ amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj,
+ &adev->mes.event_log_gpu_addr,
+ &adev->mes.event_log_cpu_addr);
+
+ for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) {
+ if (adev->mes.sch_ctx_ptr[i])
+ amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]);
+ if (adev->mes.query_status_fence_ptr[i])
+ amdgpu_device_wb_free(adev,
+ adev->mes.query_status_fence_offs[i]);
+ }
+
+ amdgpu_mes_doorbell_free(adev);
+
+ idr_destroy(&adev->mes.pasid_idr);
+ idr_destroy(&adev->mes.gang_id_idr);
+ idr_destroy(&adev->mes.queue_id_idr);
+ ida_destroy(&adev->mes.doorbell_ida);
+ mutex_destroy(&adev->mes.mutex_hidden);
+}
+
+int amdgpu_mes_suspend(struct amdgpu_device *adev)
+{
+ struct mes_suspend_gang_input input;
+ int r;
+
+ if (!amdgpu_mes_suspend_resume_all_supported(adev))
+ return 0;
+
+ memset(&input, 0x0, sizeof(struct mes_suspend_gang_input));
+ input.suspend_all_gangs = 1;
+
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->suspend_gang(&adev->mes, &input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ dev_err(adev->dev, "failed to suspend all gangs");
+
+ return r;
+}
+
+int amdgpu_mes_resume(struct amdgpu_device *adev)
+{
+ struct mes_resume_gang_input input;
+ int r;
+
+ if (!amdgpu_mes_suspend_resume_all_supported(adev))
+ return 0;
+
+ memset(&input, 0x0, sizeof(struct mes_resume_gang_input));
+ input.resume_all_gangs = 1;
+
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->resume_gang(&adev->mes, &input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ dev_err(adev->dev, "failed to resume all gangs");
+
+ return r;
+}
+
+int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ struct mes_map_legacy_queue_input queue_input;
+ int r;
+
+ memset(&queue_input, 0, sizeof(queue_input));
+
+ queue_input.queue_type = ring->funcs->type;
+ queue_input.doorbell_offset = ring->doorbell_index;
+ queue_input.pipe_id = ring->pipe;
+ queue_input.queue_id = ring->queue;
+ queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+ queue_input.wptr_addr = ring->wptr_gpu_addr;
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->map_legacy_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ dev_err(adev->dev, "failed to map legacy queue\n");
+
+ return r;
+}
+
+int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq)
+{
+ struct mes_unmap_legacy_queue_input queue_input;
+ int r;
+
+ queue_input.action = action;
+ queue_input.queue_type = ring->funcs->type;
+ queue_input.doorbell_offset = ring->doorbell_index;
+ queue_input.pipe_id = ring->pipe;
+ queue_input.queue_id = ring->queue;
+ queue_input.trail_fence_addr = gpu_addr;
+ queue_input.trail_fence_data = seq;
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ dev_err(adev->dev, "failed to unmap legacy queue\n");
+
+ return r;
+}
+
+int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ unsigned int vmid,
+ bool use_mmio)
+{
+ struct mes_reset_queue_input queue_input;
+ int r;
+
+ memset(&queue_input, 0, sizeof(queue_input));
+
+ queue_input.queue_type = ring->funcs->type;
+ queue_input.doorbell_offset = ring->doorbell_index;
+ queue_input.me_id = ring->me;
+ queue_input.pipe_id = ring->pipe;
+ queue_input.queue_id = ring->queue;
+ queue_input.mqd_addr = ring->mqd_obj ? amdgpu_bo_gpu_offset(ring->mqd_obj) : 0;
+ queue_input.wptr_addr = ring->wptr_gpu_addr;
+ queue_input.vmid = vmid;
+ queue_input.use_mmio = use_mmio;
+ queue_input.is_kq = true;
+ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX)
+ queue_input.legacy_gfx = true;
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ dev_err(adev->dev, "failed to reset legacy queue\n");
+
+ return r;
+}
+
+int amdgpu_mes_get_hung_queue_db_array_size(struct amdgpu_device *adev)
+{
+ return adev->mes.hung_queue_db_array_size;
+}
+
+int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev,
+ int queue_type,
+ bool detect_only,
+ unsigned int *hung_db_num,
+ u32 *hung_db_array)
+
+{
+ struct mes_detect_and_reset_queue_input input;
+ u32 *db_array = adev->mes.hung_queue_db_array_cpu_addr;
+ int r, i;
+
+ if (!hung_db_num || !hung_db_array)
+ return -EINVAL;
+
+ if ((queue_type != AMDGPU_RING_TYPE_GFX) &&
+ (queue_type != AMDGPU_RING_TYPE_COMPUTE) &&
+ (queue_type != AMDGPU_RING_TYPE_SDMA))
+ return -EINVAL;
+
+ /* Clear the doorbell array before detection */
+ memset(adev->mes.hung_queue_db_array_cpu_addr, AMDGPU_MES_INVALID_DB_OFFSET,
+ adev->mes.hung_queue_db_array_size * sizeof(u32));
+ input.queue_type = queue_type;
+ input.detect_only = detect_only;
+
+ r = adev->mes.funcs->detect_and_reset_hung_queues(&adev->mes,
+ &input);
+ if (r) {
+ dev_err(adev->dev, "failed to detect and reset\n");
+ } else {
+ *hung_db_num = 0;
+ for (i = 0; i < adev->mes.hung_queue_hqd_info_offset; i++) {
+ if (db_array[i] != AMDGPU_MES_INVALID_DB_OFFSET) {
+ hung_db_array[i] = db_array[i];
+ *hung_db_num += 1;
+ }
+ }
+
+ /*
+ * TODO: return HQD info for MES scheduled user compute queue reset cases
+ * stored in hung_db_array hqd info offset to full array size
+ */
+ }
+
+ return r;
+}
+
+uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg)
+{
+ struct mes_misc_op_input op_input;
+ int r, val = 0;
+ uint32_t addr_offset = 0;
+ uint64_t read_val_gpu_addr;
+ uint32_t *read_val_ptr;
+
+ if (amdgpu_device_wb_get(adev, &addr_offset)) {
+ dev_err(adev->dev, "critical bug! too many mes readers\n");
+ goto error;
+ }
+ read_val_gpu_addr = adev->wb.gpu_addr + (addr_offset * 4);
+ read_val_ptr = (uint32_t *)&adev->wb.wb[addr_offset];
+ op_input.op = MES_MISC_OP_READ_REG;
+ op_input.read_reg.reg_offset = reg;
+ op_input.read_reg.buffer_addr = read_val_gpu_addr;
+
+ if (!adev->mes.funcs->misc_op) {
+ dev_err(adev->dev, "mes rreg is not supported!\n");
+ goto error;
+ }
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ dev_err(adev->dev, "failed to read reg (0x%x)\n", reg);
+ else
+ val = *(read_val_ptr);
+
+error:
+ if (addr_offset)
+ amdgpu_device_wb_free(adev, addr_offset);
+ return val;
+}
+
+int amdgpu_mes_wreg(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t val)
+{
+ struct mes_misc_op_input op_input;
+ int r;
+
+ op_input.op = MES_MISC_OP_WRITE_REG;
+ op_input.write_reg.reg_offset = reg;
+ op_input.write_reg.reg_value = val;
+
+ if (!adev->mes.funcs->misc_op) {
+ dev_err(adev->dev, "mes wreg is not supported!\n");
+ r = -EINVAL;
+ goto error;
+ }
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ dev_err(adev->dev, "failed to write reg (0x%x)\n", reg);
+
+error:
+ return r;
+}
+
+int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ struct mes_misc_op_input op_input;
+ int r;
+
+ op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT;
+ op_input.wrm_reg.reg0 = reg0;
+ op_input.wrm_reg.reg1 = reg1;
+ op_input.wrm_reg.ref = ref;
+ op_input.wrm_reg.mask = mask;
+
+ if (!adev->mes.funcs->misc_op) {
+ dev_err(adev->dev, "mes reg_write_reg_wait is not supported!\n");
+ r = -EINVAL;
+ goto error;
+ }
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ dev_err(adev->dev, "failed to reg_write_reg_wait\n");
+
+error:
+ return r;
+}
+
+int amdgpu_mes_hdp_flush(struct amdgpu_device *adev)
+{
+ uint32_t hdp_flush_req_offset, hdp_flush_done_offset, ref_and_mask;
+
+ hdp_flush_req_offset = adev->nbio.funcs->get_hdp_flush_req_offset(adev);
+ hdp_flush_done_offset = adev->nbio.funcs->get_hdp_flush_done_offset(adev);
+ ref_and_mask = adev->nbio.hdp_flush_reg->ref_and_mask_cp0;
+
+ return amdgpu_mes_reg_write_reg_wait(adev, hdp_flush_req_offset, hdp_flush_done_offset,
+ ref_and_mask, ref_and_mask);
+}
+
+int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
+ uint64_t process_context_addr,
+ uint32_t spi_gdbg_per_vmid_cntl,
+ const uint32_t *tcp_watch_cntl,
+ uint32_t flags,
+ bool trap_en)
+{
+ struct mes_misc_op_input op_input = {0};
+ int r;
+
+ if (!adev->mes.funcs->misc_op) {
+ dev_err(adev->dev,
+ "mes set shader debugger is not supported!\n");
+ return -EINVAL;
+ }
+
+ op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
+ op_input.set_shader_debugger.process_context_addr = process_context_addr;
+ op_input.set_shader_debugger.flags.u32all = flags;
+
+ /* use amdgpu mes_flush_shader_debugger instead */
+ if (op_input.set_shader_debugger.flags.process_ctx_flush)
+ return -EINVAL;
+
+ op_input.set_shader_debugger.spi_gdbg_per_vmid_cntl = spi_gdbg_per_vmid_cntl;
+ memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl,
+ sizeof(op_input.set_shader_debugger.tcp_watch_cntl));
+
+ if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >>
+ AMDGPU_MES_API_VERSION_SHIFT) >= 14)
+ op_input.set_shader_debugger.trap_en = trap_en;
+
+ amdgpu_mes_lock(&adev->mes);
+
+ r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ if (r)
+ dev_err(adev->dev, "failed to set_shader_debugger\n");
+
+ amdgpu_mes_unlock(&adev->mes);
+
+ return r;
+}
+
+int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
+ uint64_t process_context_addr)
+{
+ struct mes_misc_op_input op_input = {0};
+ int r;
+
+ if (!adev->mes.funcs->misc_op) {
+ dev_err(adev->dev,
+ "mes flush shader debugger is not supported!\n");
+ return -EINVAL;
+ }
+
+ op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
+ op_input.set_shader_debugger.process_context_addr = process_context_addr;
+ op_input.set_shader_debugger.flags.process_ctx_flush = true;
+
+ amdgpu_mes_lock(&adev->mes);
+
+ r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ if (r)
+ dev_err(adev->dev, "failed to set_shader_debugger\n");
+
+ amdgpu_mes_unlock(&adev->mes);
+
+ return r;
+}
+
+uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
+ enum amdgpu_mes_priority_level prio)
+{
+ return adev->mes.aggregated_doorbells[prio];
+}
+
+int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
+{
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ struct amdgpu_firmware_info *info;
+ char ucode_prefix[30];
+ char fw_name[50];
+ bool need_retry = false;
+ u32 *ucode_ptr;
+ int r;
+
+ amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix,
+ sizeof(ucode_prefix));
+ if (adev->enable_uni_mes) {
+ snprintf(fw_name, sizeof(fw_name),
+ "amdgpu/%s_uni_mes.bin", ucode_prefix);
+ } else if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0)) {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin",
+ ucode_prefix,
+ pipe == AMDGPU_MES_SCHED_PIPE ? "_2" : "1");
+ need_retry = true;
+ } else {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin",
+ ucode_prefix,
+ pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1");
+ }
+
+ r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], AMDGPU_UCODE_REQUIRED,
+ "%s", fw_name);
+ if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) {
+ dev_info(adev->dev, "try to fall back to %s_mes.bin\n", ucode_prefix);
+ r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe],
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mes.bin", ucode_prefix);
+ }
+
+ if (r)
+ goto out;
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw[pipe]->data;
+ adev->mes.uc_start_addr[pipe] =
+ le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |
+ ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32);
+ adev->mes.data_start_addr[pipe] =
+ le32_to_cpu(mes_hdr->mes_data_start_addr_lo) |
+ ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32);
+ ucode_ptr = (u32 *)(adev->mes.fw[pipe]->data +
+ sizeof(union amdgpu_firmware_header));
+ adev->mes.fw_version[pipe] =
+ le32_to_cpu(ucode_ptr[24]) & AMDGPU_MES_VERSION_MASK;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ int ucode, ucode_data;
+
+ if (pipe == AMDGPU_MES_SCHED_PIPE) {
+ ucode = AMDGPU_UCODE_ID_CP_MES;
+ ucode_data = AMDGPU_UCODE_ID_CP_MES_DATA;
+ } else {
+ ucode = AMDGPU_UCODE_ID_CP_MES1;
+ ucode_data = AMDGPU_UCODE_ID_CP_MES1_DATA;
+ }
+
+ info = &adev->firmware.ucode[ucode];
+ info->ucode_id = ucode;
+ info->fw = adev->mes.fw[pipe];
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes),
+ PAGE_SIZE);
+
+ info = &adev->firmware.ucode[ucode_data];
+ info->ucode_id = ucode_data;
+ info->fw = adev->mes.fw[pipe];
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes),
+ PAGE_SIZE);
+ }
+
+ return 0;
+out:
+ amdgpu_ucode_release(&adev->mes.fw[pipe]);
+ return r;
+}
+
+bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev)
+{
+ uint32_t mes_rev = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
+
+ return ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) &&
+ mes_rev >= 0x63) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0));
+}
+
+/* Fix me -- node_id is used to identify the correct MES instances in the future */
+static int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev,
+ uint32_t node_id, bool enable)
+{
+ struct mes_misc_op_input op_input = {0};
+ int r;
+
+ op_input.op = MES_MISC_OP_CHANGE_CONFIG;
+ op_input.change_config.option.limit_single_process = enable ? 1 : 0;
+
+ if (!adev->mes.funcs->misc_op) {
+ dev_err(adev->dev, "mes change config is not supported!\n");
+ r = -EINVAL;
+ goto error;
+ }
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ dev_err(adev->dev, "failed to change_config.\n");
+
+error:
+ return r;
+}
+
+int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev)
+{
+ int i, r = 0;
+
+ if (adev->enable_mes && adev->gfx.enable_cleaner_shader) {
+ mutex_lock(&adev->enforce_isolation_mutex);
+ for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
+ if (adev->enforce_isolation[i] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
+ r |= amdgpu_mes_set_enforce_isolation(adev, i, true);
+ else
+ r |= amdgpu_mes_set_enforce_isolation(adev, i, false);
+ }
+ mutex_unlock(&adev->enforce_isolation_mutex);
+ }
+ return r;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused)
+{
+ struct amdgpu_device *adev = m->private;
+ uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr);
+
+ seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4,
+ mem, adev->mes.event_log_size, false);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log);
+
+#endif
+
+void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev)
+{
+
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ if (adev->enable_mes && amdgpu_mes_log_enable)
+ debugfs_create_file("amdgpu_mes_event_log", 0444, root,
+ adev, &amdgpu_debugfs_mes_event_log_fops);
+
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 7334982ea702..e989225b354b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -24,10 +24,25 @@
#ifndef __AMDGPU_MES_H__
#define __AMDGPU_MES_H__
+#include "amdgpu_irq.h"
+#include "kgd_kfd_interface.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_doorbell.h"
+#include <linux/sched/mm.h>
+
#define AMDGPU_MES_MAX_COMPUTE_PIPES 8
#define AMDGPU_MES_MAX_GFX_PIPES 2
#define AMDGPU_MES_MAX_SDMA_PIPES 2
+#define AMDGPU_MES_API_VERSION_SHIFT 12
+#define AMDGPU_MES_FEAT_VERSION_SHIFT 24
+
+#define AMDGPU_MES_VERSION_MASK 0x00000fff
+#define AMDGPU_MES_API_VERSION_MASK 0x00fff000
+#define AMDGPU_MES_FEAT_VERSION_MASK 0xff000000
+#define AMDGPU_MES_MSCRATCH_SIZE 0x40000
+#define AMDGPU_MES_INVALID_DB_OFFSET 0xffffffff
+
enum amdgpu_mes_priority_level {
AMDGPU_MES_PRIORITY_LEVEL_LOW = 0,
AMDGPU_MES_PRIORITY_LEVEL_NORMAL = 1,
@@ -37,57 +52,159 @@ enum amdgpu_mes_priority_level {
AMDGPU_MES_PRIORITY_NUM_LEVELS
};
+#define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */
+#define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */
+
struct amdgpu_mes_funcs;
+enum amdgpu_mes_pipe {
+ AMDGPU_MES_SCHED_PIPE = 0,
+ AMDGPU_MES_KIQ_PIPE,
+ AMDGPU_MAX_MES_PIPES = 2,
+};
+
struct amdgpu_mes {
struct amdgpu_device *adev;
+ struct mutex mutex_hidden;
+
+ struct idr pasid_idr;
+ struct idr gang_id_idr;
+ struct idr queue_id_idr;
+ struct ida doorbell_ida;
+
+ spinlock_t queue_id_lock;
+
+ uint32_t sched_version;
+ uint32_t kiq_version;
+ uint32_t fw_version[AMDGPU_MAX_MES_PIPES];
+ bool enable_legacy_queue_map;
+
uint32_t total_max_queue;
- uint32_t doorbell_id_offset;
uint32_t max_doorbell_slices;
uint64_t default_process_quantum;
uint64_t default_gang_quantum;
- struct amdgpu_ring ring;
+ struct amdgpu_ring ring[AMDGPU_MAX_MES_PIPES];
+ spinlock_t ring_lock[AMDGPU_MAX_MES_PIPES];
- const struct firmware *fw;
+ const struct firmware *fw[AMDGPU_MAX_MES_PIPES];
/* mes ucode */
- struct amdgpu_bo *ucode_fw_obj;
- uint64_t ucode_fw_gpu_addr;
- uint32_t *ucode_fw_ptr;
- uint32_t ucode_fw_version;
- uint64_t uc_start_addr;
+ struct amdgpu_bo *ucode_fw_obj[AMDGPU_MAX_MES_PIPES];
+ uint64_t ucode_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
+ uint32_t *ucode_fw_ptr[AMDGPU_MAX_MES_PIPES];
+ uint64_t uc_start_addr[AMDGPU_MAX_MES_PIPES];
/* mes ucode data */
- struct amdgpu_bo *data_fw_obj;
- uint64_t data_fw_gpu_addr;
- uint32_t *data_fw_ptr;
- uint32_t data_fw_version;
- uint64_t data_start_addr;
+ struct amdgpu_bo *data_fw_obj[AMDGPU_MAX_MES_PIPES];
+ uint64_t data_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
+ uint32_t *data_fw_ptr[AMDGPU_MAX_MES_PIPES];
+ uint64_t data_start_addr[AMDGPU_MAX_MES_PIPES];
/* eop gpu obj */
- struct amdgpu_bo *eop_gpu_obj;
- uint64_t eop_gpu_addr;
+ struct amdgpu_bo *eop_gpu_obj[AMDGPU_MAX_MES_PIPES];
+ uint64_t eop_gpu_addr[AMDGPU_MAX_MES_PIPES];
- void *mqd_backup;
+ void *mqd_backup[AMDGPU_MAX_MES_PIPES];
+ struct amdgpu_irq_src irq[AMDGPU_MAX_MES_PIPES];
uint32_t vmid_mask_gfxhub;
uint32_t vmid_mask_mmhub;
- uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES];
uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES];
+ uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES];
uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES];
- uint32_t agreegated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS];
- uint32_t sch_ctx_offs;
- uint64_t sch_ctx_gpu_addr;
- uint64_t *sch_ctx_ptr;
- uint32_t query_status_fence_offs;
- uint64_t query_status_fence_gpu_addr;
- uint64_t *query_status_fence_ptr;
+ uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS];
+ uint32_t sch_ctx_offs[AMDGPU_MAX_MES_PIPES];
+ uint64_t sch_ctx_gpu_addr[AMDGPU_MAX_MES_PIPES];
+ uint64_t *sch_ctx_ptr[AMDGPU_MAX_MES_PIPES];
+ uint32_t query_status_fence_offs[AMDGPU_MAX_MES_PIPES];
+ uint64_t query_status_fence_gpu_addr[AMDGPU_MAX_MES_PIPES];
+ uint64_t *query_status_fence_ptr[AMDGPU_MAX_MES_PIPES];
+
+ uint32_t saved_flags;
+
+ /* initialize kiq pipe */
+ int (*kiq_hw_init)(struct amdgpu_device *adev);
+ int (*kiq_hw_fini)(struct amdgpu_device *adev);
+
+ /* MES doorbells */
+ uint32_t db_start_dw_offset;
+ uint32_t num_mes_dbs;
+ unsigned long *doorbell_bitmap;
+
+ /* MES event log buffer */
+ uint32_t event_log_size;
+ struct amdgpu_bo *event_log_gpu_obj;
+ uint64_t event_log_gpu_addr;
+ void *event_log_cpu_addr;
/* ip specific functions */
const struct amdgpu_mes_funcs *funcs;
+
+ /* mes resource_1 bo*/
+ struct amdgpu_bo *resource_1[AMDGPU_MAX_MES_PIPES];
+ uint64_t resource_1_gpu_addr[AMDGPU_MAX_MES_PIPES];
+ void *resource_1_addr[AMDGPU_MAX_MES_PIPES];
+
+ int hung_queue_db_array_size;
+ int hung_queue_hqd_info_offset;
+ struct amdgpu_bo *hung_queue_db_array_gpu_obj;
+ uint64_t hung_queue_db_array_gpu_addr;
+ void *hung_queue_db_array_cpu_addr;
+};
+
+struct amdgpu_mes_gang {
+ int gang_id;
+ int priority;
+ int inprocess_gang_priority;
+ int global_priority_level;
+ struct list_head list;
+ struct amdgpu_mes_process *process;
+ struct amdgpu_bo *gang_ctx_bo;
+ uint64_t gang_ctx_gpu_addr;
+ void *gang_ctx_cpu_ptr;
+ uint64_t gang_quantum;
+ struct list_head queue_list;
+};
+
+struct amdgpu_mes_queue {
+ struct list_head list;
+ struct amdgpu_mes_gang *gang;
+ int queue_id;
+ uint64_t doorbell_off;
+ struct amdgpu_bo *mqd_obj;
+ void *mqd_cpu_ptr;
+ uint64_t mqd_gpu_addr;
+ uint64_t wptr_gpu_addr;
+ int queue_type;
+ int paging;
+ struct amdgpu_ring *ring;
+};
+
+struct amdgpu_mes_queue_properties {
+ int queue_type;
+ uint64_t hqd_base_gpu_addr;
+ uint64_t rptr_gpu_addr;
+ uint64_t wptr_gpu_addr;
+ uint64_t wptr_mc_addr;
+ uint32_t queue_size;
+ uint64_t eop_gpu_addr;
+ uint32_t hqd_pipe_priority;
+ uint32_t hqd_queue_priority;
+ bool paging;
+ struct amdgpu_ring *ring;
+ /* out */
+ uint64_t doorbell_off;
+};
+
+struct amdgpu_mes_gang_properties {
+ uint32_t priority;
+ uint32_t gang_quantum;
+ uint32_t inprocess_gang_priority;
+ uint32_t priority_level;
+ int global_priority_level;
};
struct mes_add_queue_input {
@@ -104,13 +221,44 @@ struct mes_add_queue_input {
uint32_t doorbell_offset;
uint64_t mqd_addr;
uint64_t wptr_addr;
+ uint64_t wptr_mc_addr;
uint32_t queue_type;
uint32_t paging;
+ uint32_t gws_base;
+ uint32_t gws_size;
+ uint64_t tba_addr;
+ uint64_t tma_addr;
+ uint32_t trap_en;
+ uint32_t skip_process_ctx_clear;
+ uint32_t is_kfd_process;
+ uint32_t is_aql_queue;
+ uint32_t queue_size;
+ uint32_t exclusively_scheduled;
};
struct mes_remove_queue_input {
uint32_t doorbell_offset;
uint64_t gang_context_addr;
+ bool remove_queue_after_reset;
+};
+
+struct mes_map_legacy_queue_input {
+ uint32_t queue_type;
+ uint32_t doorbell_offset;
+ uint32_t pipe_id;
+ uint32_t queue_id;
+ uint64_t mqd_addr;
+ uint64_t wptr_addr;
+};
+
+struct mes_unmap_legacy_queue_input {
+ enum amdgpu_unmap_queues_action action;
+ uint32_t queue_type;
+ uint32_t doorbell_offset;
+ uint32_t pipe_id;
+ uint32_t queue_id;
+ uint64_t trail_fence_addr;
+ uint64_t trail_fence_data;
};
struct mes_suspend_gang_input {
@@ -125,6 +273,95 @@ struct mes_resume_gang_input {
uint64_t gang_context_addr;
};
+struct mes_reset_queue_input {
+ uint32_t queue_type;
+ uint32_t doorbell_offset;
+ bool use_mmio;
+ uint32_t me_id;
+ uint32_t pipe_id;
+ uint32_t queue_id;
+ uint64_t mqd_addr;
+ uint64_t wptr_addr;
+ uint32_t vmid;
+ bool legacy_gfx;
+ bool is_kq;
+};
+
+struct mes_detect_and_reset_queue_input {
+ uint32_t queue_type;
+ bool detect_only;
+};
+
+struct mes_inv_tlbs_pasid_input {
+ uint32_t xcc_id;
+ uint16_t pasid;
+ uint8_t hub_id;
+ uint8_t flush_type;
+};
+
+enum mes_misc_opcode {
+ MES_MISC_OP_WRITE_REG,
+ MES_MISC_OP_READ_REG,
+ MES_MISC_OP_WRM_REG_WAIT,
+ MES_MISC_OP_WRM_REG_WR_WAIT,
+ MES_MISC_OP_SET_SHADER_DEBUGGER,
+ MES_MISC_OP_CHANGE_CONFIG,
+};
+
+struct mes_misc_op_input {
+ enum mes_misc_opcode op;
+
+ union {
+ struct {
+ uint32_t reg_offset;
+ uint64_t buffer_addr;
+ } read_reg;
+
+ struct {
+ uint32_t reg_offset;
+ uint32_t reg_value;
+ } write_reg;
+
+ struct {
+ uint32_t ref;
+ uint32_t mask;
+ uint32_t reg0;
+ uint32_t reg1;
+ } wrm_reg;
+
+ struct {
+ uint64_t process_context_addr;
+ union {
+ struct {
+ uint32_t single_memop : 1;
+ uint32_t single_alu_op : 1;
+ uint32_t reserved: 29;
+ uint32_t process_ctx_flush: 1;
+ };
+ uint32_t u32all;
+ } flags;
+ uint32_t spi_gdbg_per_vmid_cntl;
+ uint32_t tcp_watch_cntl[4];
+ uint32_t trap_en;
+ } set_shader_debugger;
+
+ struct {
+ union {
+ struct {
+ uint32_t limit_single_process : 1;
+ uint32_t enable_hws_logging_buffer : 1;
+ uint32_t reserved : 30;
+ };
+ uint32_t all;
+ } option;
+ struct {
+ uint32_t tdr_level;
+ uint32_t tdr_delay;
+ } tdr_config;
+ } change_config;
+ };
+};
+
struct amdgpu_mes_funcs {
int (*add_hw_queue)(struct amdgpu_mes *mes,
struct mes_add_queue_input *input);
@@ -132,11 +369,141 @@ struct amdgpu_mes_funcs {
int (*remove_hw_queue)(struct amdgpu_mes *mes,
struct mes_remove_queue_input *input);
+ int (*map_legacy_queue)(struct amdgpu_mes *mes,
+ struct mes_map_legacy_queue_input *input);
+
+ int (*unmap_legacy_queue)(struct amdgpu_mes *mes,
+ struct mes_unmap_legacy_queue_input *input);
+
int (*suspend_gang)(struct amdgpu_mes *mes,
struct mes_suspend_gang_input *input);
int (*resume_gang)(struct amdgpu_mes *mes,
struct mes_resume_gang_input *input);
+
+ int (*misc_op)(struct amdgpu_mes *mes,
+ struct mes_misc_op_input *input);
+
+ int (*reset_hw_queue)(struct amdgpu_mes *mes,
+ struct mes_reset_queue_input *input);
+
+ int (*detect_and_reset_hung_queues)(struct amdgpu_mes *mes,
+ struct mes_detect_and_reset_queue_input *input);
+
+
+ int (*invalidate_tlbs_pasid)(struct amdgpu_mes *mes,
+ struct mes_inv_tlbs_pasid_input *input);
};
+#define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
+#define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev))
+
+int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe);
+int amdgpu_mes_init(struct amdgpu_device *adev);
+void amdgpu_mes_fini(struct amdgpu_device *adev);
+
+int amdgpu_mes_suspend(struct amdgpu_device *adev);
+int amdgpu_mes_resume(struct amdgpu_device *adev);
+
+int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
+int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq);
+int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ unsigned int vmid,
+ bool use_mmio);
+
+int amdgpu_mes_get_hung_queue_db_array_size(struct amdgpu_device *adev);
+int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev,
+ int queue_type,
+ bool detect_only,
+ unsigned int *hung_db_num,
+ u32 *hung_db_array);
+
+uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg);
+int amdgpu_mes_wreg(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t val);
+int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask);
+int amdgpu_mes_hdp_flush(struct amdgpu_device *adev);
+int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
+ uint64_t process_context_addr,
+ uint32_t spi_gdbg_per_vmid_cntl,
+ const uint32_t *tcp_watch_cntl,
+ uint32_t flags,
+ bool trap_en);
+int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
+ uint64_t process_context_addr);
+
+uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
+ enum amdgpu_mes_priority_level prio);
+
+int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev);
+
+/*
+ * MES lock can be taken in MMU notifiers.
+ *
+ * A bit more detail about why to set no-FS reclaim with MES lock:
+ *
+ * The purpose of the MMU notifier is to stop GPU access to memory so
+ * that the Linux VM subsystem can move pages around safely. This is
+ * done by preempting user mode queues for the affected process. When
+ * MES is used, MES lock needs to be taken to preempt the queues.
+ *
+ * The MMU notifier callback entry point in the driver is
+ * amdgpu_mn_invalidate_range_start_hsa. The relevant call chain from
+ * there is:
+ * amdgpu_amdkfd_evict_userptr -> kgd2kfd_quiesce_mm ->
+ * kfd_process_evict_queues -> pdd->dev->dqm->ops.evict_process_queues
+ *
+ * The last part of the chain is a function pointer where we take the
+ * MES lock.
+ *
+ * The problem with taking locks in the MMU notifier is, that MMU
+ * notifiers can be called in reclaim-FS context. That's where the
+ * kernel frees up pages to make room for new page allocations under
+ * memory pressure. While we are running in reclaim-FS context, we must
+ * not trigger another memory reclaim operation because that would
+ * recursively reenter the reclaim code and cause a deadlock. The
+ * memalloc_nofs_save/restore calls guarantee that.
+ *
+ * In addition we also need to avoid lock dependencies on other locks taken
+ * under the MES lock, for example reservation locks. Here is a possible
+ * scenario of a deadlock:
+ * Thread A: takes and holds reservation lock | triggers reclaim-FS |
+ * MMU notifier | blocks trying to take MES lock
+ * Thread B: takes and holds MES lock | blocks trying to take reservation lock
+ *
+ * In this scenario Thread B gets involved in a deadlock even without
+ * triggering a reclaim-FS operation itself.
+ * To fix this and break the lock dependency chain you'd need to either:
+ * 1. protect reservation locks with memalloc_nofs_save/restore, or
+ * 2. avoid taking reservation locks under the MES lock.
+ *
+ * Reservation locks are taken all over the kernel in different subsystems, we
+ * have no control over them and their lock dependencies.So the only workable
+ * solution is to avoid taking other locks under the MES lock.
+ * As a result, make sure no reclaim-FS happens while holding this lock anywhere
+ * to prevent deadlocks when an MMU notifier runs in reclaim-FS context.
+ */
+static inline void amdgpu_mes_lock(struct amdgpu_mes *mes)
+{
+ mutex_lock(&mes->mutex_hidden);
+ mes->saved_flags = memalloc_noreclaim_save();
+}
+
+static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes)
+{
+ memalloc_noreclaim_restore(mes->saved_flags);
+ mutex_unlock(&mes->mutex_hidden);
+}
+
+bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev);
+
+int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev);
+
#endif /* __AMDGPU_MES_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h
new file mode 100644
index 000000000000..912a5be2ece6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_MES_CTX_H__
+#define __AMDGPU_MES_CTX_H__
+
+#include "v10_structs.h"
+
+enum {
+ AMDGPU_MES_CTX_RPTR_OFFS = 0,
+ AMDGPU_MES_CTX_WPTR_OFFS,
+ AMDGPU_MES_CTX_FENCE_OFFS,
+ AMDGPU_MES_CTX_COND_EXE_OFFS,
+ AMDGPU_MES_CTX_TRAIL_FENCE_OFFS,
+ AMDGPU_MES_CTX_MAX_OFFS,
+};
+
+enum {
+ AMDGPU_MES_CTX_RING_OFFS = AMDGPU_MES_CTX_MAX_OFFS,
+ AMDGPU_MES_CTX_IB_OFFS,
+ AMDGPU_MES_CTX_PADDING_OFFS,
+};
+
+#define AMDGPU_MES_CTX_MAX_GFX_RINGS 1
+#define AMDGPU_MES_CTX_MAX_COMPUTE_RINGS 4
+#define AMDGPU_MES_CTX_MAX_SDMA_RINGS 2
+#define AMDGPU_MES_CTX_MAX_RINGS \
+ (AMDGPU_MES_CTX_MAX_GFX_RINGS + \
+ AMDGPU_MES_CTX_MAX_COMPUTE_RINGS + \
+ AMDGPU_MES_CTX_MAX_SDMA_RINGS)
+
+#define AMDGPU_CSA_SDMA_SIZE 64
+#define GFX10_MEC_HPD_SIZE 2048
+
+struct amdgpu_wb_slot {
+ uint32_t data[8];
+};
+
+struct amdgpu_mes_ctx_meta_data {
+ struct {
+ uint8_t ring[PAGE_SIZE * 4];
+
+ /* gfx csa */
+ struct v10_gfx_meta_data gfx_meta_data;
+
+ uint8_t gds_backup[64 * 1024];
+
+ struct amdgpu_wb_slot slots[AMDGPU_MES_CTX_MAX_OFFS];
+
+ /* only for ib test */
+ uint32_t ib[256] __aligned(256);
+
+ uint32_t padding[64];
+
+ } __aligned(PAGE_SIZE) gfx[AMDGPU_MES_CTX_MAX_GFX_RINGS];
+
+ struct {
+ uint8_t ring[PAGE_SIZE * 4];
+
+ uint8_t mec_hpd[GFX10_MEC_HPD_SIZE];
+
+ struct amdgpu_wb_slot slots[AMDGPU_MES_CTX_MAX_OFFS];
+
+ /* only for ib test */
+ uint32_t ib[256] __aligned(256);
+
+ uint32_t padding[64];
+
+ } __aligned(PAGE_SIZE) compute[AMDGPU_MES_CTX_MAX_COMPUTE_RINGS];
+
+ struct {
+ uint8_t ring[PAGE_SIZE * 4];
+
+ /* sdma csa for mcbp */
+ uint8_t sdma_meta_data[AMDGPU_CSA_SDMA_SIZE];
+
+ struct amdgpu_wb_slot slots[AMDGPU_MES_CTX_MAX_OFFS];
+
+ /* only for ib test */
+ uint32_t ib[256] __aligned(256);
+
+ uint32_t padding[64];
+
+ } __aligned(PAGE_SIZE) sdma[AMDGPU_MES_CTX_MAX_SDMA_RINGS];
+};
+
+struct amdgpu_mes_ctx_data {
+ struct amdgpu_bo *meta_data_obj;
+ uint64_t meta_data_gpu_addr;
+ uint64_t meta_data_mc_addr;
+ struct amdgpu_bo_va *meta_data_va;
+ void *meta_data_ptr;
+ uint32_t gang_ids[AMDGPU_HW_IP_DMA+1];
+};
+
+#define AMDGPU_FENCE_MES_QUEUE_FLAG 0x1000000u
+#define AMDGPU_FENCE_MES_QUEUE_ID_MASK (AMDGPU_FENCE_MES_QUEUE_FLAG - 1)
+
+#define AMDGPU_FENCE_MES_QUEUE_FLAG 0x1000000u
+#define AMDGPU_FENCE_MES_QUEUE_ID_MASK (AMDGPU_FENCE_MES_QUEUE_FLAG - 1)
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
index 24297dc51434..0f6b1021fef3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2019 Advanced Micro Devices, Inc.
+ * Copyright (C) 2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -8,61 +8,39 @@
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
*
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-
#include "amdgpu.h"
#include "amdgpu_ras.h"
-int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev)
+int amdgpu_mmhub_ras_sw_init(struct amdgpu_device *adev)
{
- int r;
- struct ras_ih_if ih_info = {
- .cb = NULL,
- };
- struct ras_fs_if fs_info = {
- .sysfs_name = "mmhub_err_count",
- };
+ int err;
+ struct amdgpu_mmhub_ras *ras;
- if (!adev->mmhub.ras_if) {
- adev->mmhub.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
- if (!adev->mmhub.ras_if)
- return -ENOMEM;
- adev->mmhub.ras_if->block = AMDGPU_RAS_BLOCK__MMHUB;
- adev->mmhub.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
- adev->mmhub.ras_if->sub_block_index = 0;
- }
- ih_info.head = fs_info.head = *adev->mmhub.ras_if;
- r = amdgpu_ras_late_init(adev, adev->mmhub.ras_if,
- &fs_info, &ih_info);
- if (r || !amdgpu_ras_is_supported(adev, adev->mmhub.ras_if->block)) {
- kfree(adev->mmhub.ras_if);
- adev->mmhub.ras_if = NULL;
- }
+ if (!adev->mmhub.ras)
+ return 0;
- return r;
-}
+ ras = adev->mmhub.ras;
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register mmhub ras block!\n");
+ return err;
+ }
-void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev)
-{
- if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) &&
- adev->mmhub.ras_if) {
- struct ras_common_if *ras_if = adev->mmhub.ras_if;
- struct ras_ih_if ih_info = {
- .cb = NULL,
- };
+ strcpy(ras->ras_block.ras_comm.name, "mmhub");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MMHUB;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->mmhub.ras_if = &ras->ras_block.ras_comm;
- amdgpu_ras_late_fini(adev, ras_if, &ih_info);
- kfree(ras_if);
- }
+ /* mmhub ras follows amdgpu_ras_block_late_init_default for late init */
+ return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
index b27fcbccce2b..1ca9d4ed8063 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
@@ -21,18 +21,36 @@
#ifndef __AMDGPU_MMHUB_H__
#define __AMDGPU_MMHUB_H__
-struct amdgpu_mmhub_ras_funcs {
- int (*ras_late_init)(struct amdgpu_device *adev);
- void (*ras_fini)(struct amdgpu_device *adev);
- void (*query_ras_error_count)(struct amdgpu_device *adev,
- void *ras_error_status);
- void (*query_ras_error_status)(struct amdgpu_device *adev);
- void (*reset_ras_error_count)(struct amdgpu_device *adev);
- void (*reset_ras_error_status)(struct amdgpu_device *adev);
+enum amdgpu_mmhub_ras_memory_id {
+ AMDGPU_MMHUB_WGMI_PAGEMEM = 0,
+ AMDGPU_MMHUB_RGMI_PAGEMEM = 1,
+ AMDGPU_MMHUB_WDRAM_PAGEMEM = 2,
+ AMDGPU_MMHUB_RDRAM_PAGEMEM = 3,
+ AMDGPU_MMHUB_WIO_CMDMEM = 4,
+ AMDGPU_MMHUB_RIO_CMDMEM = 5,
+ AMDGPU_MMHUB_WGMI_CMDMEM = 6,
+ AMDGPU_MMHUB_RGMI_CMDMEM = 7,
+ AMDGPU_MMHUB_WDRAM_CMDMEM = 8,
+ AMDGPU_MMHUB_RDRAM_CMDMEM = 9,
+ AMDGPU_MMHUB_MAM_DMEM0 = 10,
+ AMDGPU_MMHUB_MAM_DMEM1 = 11,
+ AMDGPU_MMHUB_MAM_DMEM2 = 12,
+ AMDGPU_MMHUB_MAM_DMEM3 = 13,
+ AMDGPU_MMHUB_WRET_TAGMEM = 19,
+ AMDGPU_MMHUB_RRET_TAGMEM = 20,
+ AMDGPU_MMHUB_WIO_DATAMEM = 21,
+ AMDGPU_MMHUB_WGMI_DATAMEM = 22,
+ AMDGPU_MMHUB_WDRAM_DATAMEM = 23,
+ AMDGPU_MMHUB_MEMORY_BLOCK_LAST,
+};
+
+struct amdgpu_mmhub_ras {
+ struct amdgpu_ras_block_object ras_block;
};
struct amdgpu_mmhub_funcs {
u64 (*get_fb_location)(struct amdgpu_device *adev);
+ u64 (*get_mc_fb_offset)(struct amdgpu_device *adev);
void (*init)(struct amdgpu_device *adev);
int (*gart_enable)(struct amdgpu_device *adev);
void (*set_fault_enable_default)(struct amdgpu_device *adev,
@@ -40,7 +58,7 @@ struct amdgpu_mmhub_funcs {
void (*gart_disable)(struct amdgpu_device *adev);
int (*set_clockgating)(struct amdgpu_device *adev,
enum amd_clockgating_state state);
- void (*get_clockgating)(struct amdgpu_device *adev, u32 *flags);
+ void (*get_clockgating)(struct amdgpu_device *adev, u64 *flags);
void (*setup_vm_pt_regs)(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base);
void (*update_power_gating)(struct amdgpu_device *adev,
@@ -50,10 +68,10 @@ struct amdgpu_mmhub_funcs {
struct amdgpu_mmhub {
struct ras_common_if *ras_if;
const struct amdgpu_mmhub_funcs *funcs;
- const struct amdgpu_mmhub_ras_funcs *ras_funcs;
+ struct amdgpu_mmhub_ras *ras;
};
-int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev);
-void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev);
+int amdgpu_mmhub_ras_sw_init(struct amdgpu_device *adev);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
deleted file mode 100644
index 4b153daf283d..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- */
-/*
- * Authors:
- * Christian König <christian.koenig@amd.com>
- */
-
-/**
- * DOC: MMU Notifier
- *
- * For coherent userptr handling registers an MMU notifier to inform the driver
- * about updates on the page tables of a process.
- *
- * When somebody tries to invalidate the page tables we block the update until
- * all operations on the pages in question are completed, then those pages are
- * marked as accessed and also dirty if it wasn't a read only access.
- *
- * New command submissions using the userptrs in question are delayed until all
- * page table invalidation are completed and we once more see a coherent process
- * address space.
- */
-
-#include <linux/firmware.h>
-#include <linux/module.h>
-#include <drm/drm.h>
-
-#include "amdgpu.h"
-#include "amdgpu_amdkfd.h"
-
-/**
- * amdgpu_mn_invalidate_gfx - callback to notify about mm change
- *
- * @mni: the range (mm) is about to update
- * @range: details on the invalidation
- * @cur_seq: Value to pass to mmu_interval_set_seq()
- *
- * Block for operations on BOs to finish and mark pages as accessed and
- * potentially dirty.
- */
-static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni,
- const struct mmu_notifier_range *range,
- unsigned long cur_seq)
-{
- struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- long r;
-
- if (!mmu_notifier_range_blockable(range))
- return false;
-
- mutex_lock(&adev->notifier_lock);
-
- mmu_interval_set_seq(mni, cur_seq);
-
- r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false,
- MAX_SCHEDULE_TIMEOUT);
- mutex_unlock(&adev->notifier_lock);
- if (r <= 0)
- DRM_ERROR("(%ld) failed to wait for user bo\n", r);
- return true;
-}
-
-static const struct mmu_interval_notifier_ops amdgpu_mn_gfx_ops = {
- .invalidate = amdgpu_mn_invalidate_gfx,
-};
-
-/**
- * amdgpu_mn_invalidate_hsa - callback to notify about mm change
- *
- * @mni: the range (mm) is about to update
- * @range: details on the invalidation
- * @cur_seq: Value to pass to mmu_interval_set_seq()
- *
- * We temporarily evict the BO attached to this range. This necessitates
- * evicting all user-mode queues of the process.
- */
-static bool amdgpu_mn_invalidate_hsa(struct mmu_interval_notifier *mni,
- const struct mmu_notifier_range *range,
- unsigned long cur_seq)
-{
- struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-
- if (!mmu_notifier_range_blockable(range))
- return false;
-
- mutex_lock(&adev->notifier_lock);
-
- mmu_interval_set_seq(mni, cur_seq);
-
- amdgpu_amdkfd_evict_userptr(bo->kfd_bo, bo->notifier.mm);
- mutex_unlock(&adev->notifier_lock);
-
- return true;
-}
-
-static const struct mmu_interval_notifier_ops amdgpu_mn_hsa_ops = {
- .invalidate = amdgpu_mn_invalidate_hsa,
-};
-
-/**
- * amdgpu_mn_register - register a BO for notifier updates
- *
- * @bo: amdgpu buffer object
- * @addr: userptr addr we should monitor
- *
- * Registers a mmu_notifier for the given BO at the specified address.
- * Returns 0 on success, -ERRNO if anything goes wrong.
- */
-int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
-{
- if (bo->kfd_bo)
- return mmu_interval_notifier_insert(&bo->notifier, current->mm,
- addr, amdgpu_bo_size(bo),
- &amdgpu_mn_hsa_ops);
- return mmu_interval_notifier_insert(&bo->notifier, current->mm, addr,
- amdgpu_bo_size(bo),
- &amdgpu_mn_gfx_ops);
-}
-
-/**
- * amdgpu_mn_unregister - unregister a BO for notifier updates
- *
- * @bo: amdgpu buffer object
- *
- * Remove any registration of mmu notifier updates from the buffer object.
- */
-void amdgpu_mn_unregister(struct amdgpu_bo *bo)
-{
- if (!bo->notifier.mm)
- return;
- mmu_interval_notifier_remove(&bo->notifier);
- bo->notifier.mm = NULL;
-}
-
-int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
- struct mm_struct *mm, struct page **pages,
- uint64_t start, uint64_t npages,
- struct hmm_range **phmm_range, bool readonly,
- bool mmap_locked, void *owner)
-{
- struct hmm_range *hmm_range;
- unsigned long timeout;
- unsigned long i;
- unsigned long *pfns;
- int r = 0;
-
- hmm_range = kzalloc(sizeof(*hmm_range), GFP_KERNEL);
- if (unlikely(!hmm_range))
- return -ENOMEM;
-
- pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
- if (unlikely(!pfns)) {
- r = -ENOMEM;
- goto out_free_range;
- }
-
- hmm_range->notifier = notifier;
- hmm_range->default_flags = HMM_PFN_REQ_FAULT;
- if (!readonly)
- hmm_range->default_flags |= HMM_PFN_REQ_WRITE;
- hmm_range->hmm_pfns = pfns;
- hmm_range->start = start;
- hmm_range->end = start + npages * PAGE_SIZE;
- hmm_range->dev_private_owner = owner;
-
- /* Assuming 512MB takes maxmium 1 second to fault page address */
- timeout = max(npages >> 17, 1ULL) * HMM_RANGE_DEFAULT_TIMEOUT;
- timeout = jiffies + msecs_to_jiffies(timeout);
-
-retry:
- hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
-
- if (likely(!mmap_locked))
- mmap_read_lock(mm);
-
- r = hmm_range_fault(hmm_range);
-
- if (likely(!mmap_locked))
- mmap_read_unlock(mm);
- if (unlikely(r)) {
- /*
- * FIXME: This timeout should encompass the retry from
- * mmu_interval_read_retry() as well.
- */
- if (r == -EBUSY && !time_after(jiffies, timeout))
- goto retry;
- goto out_free_pfns;
- }
-
- /*
- * Due to default_flags, all pages are HMM_PFN_VALID or
- * hmm_range_fault() fails. FIXME: The pages cannot be touched outside
- * the notifier_lock, and mmu_interval_read_retry() must be done first.
- */
- for (i = 0; pages && i < npages; i++)
- pages[i] = hmm_pfn_to_page(pfns[i]);
-
- *phmm_range = hmm_range;
-
- return 0;
-
-out_free_pfns:
- kvfree(pfns);
-out_free_range:
- kfree(hmm_range);
-
- return r;
-}
-
-int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range)
-{
- int r;
-
- r = mmu_interval_read_retry(hmm_range->notifier,
- hmm_range->notifier_seq);
- kvfree(hmm_range->hmm_pfns);
- kfree(hmm_range);
-
- return r;
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
deleted file mode 100644
index 14a3c1864085..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright 2017 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Christian König
- */
-#ifndef __AMDGPU_MN_H__
-#define __AMDGPU_MN_H__
-
-#include <linux/types.h>
-#include <linux/hmm.h>
-#include <linux/rwsem.h>
-#include <linux/workqueue.h>
-#include <linux/interval_tree.h>
-
-int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
- struct mm_struct *mm, struct page **pages,
- uint64_t start, uint64_t npages,
- struct hmm_range **phmm_range, bool readonly,
- bool mmap_locked, void *owner);
-int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range);
-
-#if defined(CONFIG_HMM_MIRROR)
-int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
-void amdgpu_mn_unregister(struct amdgpu_bo *bo);
-#else
-static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
-{
- DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, "
- "add CONFIG_ZONE_DEVICE=y in config file to fix this\n");
- return -ENODEV;
-}
-static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {}
-#endif
-
-#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 89fb372ed49c..dc8d2f52c7d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -30,21 +30,18 @@
#ifndef AMDGPU_MODE_H
#define AMDGPU_MODE_H
+#include <drm/display/drm_dp_helper.h>
#include <drm/drm_crtc.h>
-#include <drm/drm_edid.h>
#include <drm/drm_encoder.h>
-#include <drm/drm_dp_helper.h>
#include <drm/drm_fixed.h>
-#include <drm/drm_crtc_helper.h>
-#include <drm/drm_fb_helper.h>
-#include <drm/drm_plane_helper.h>
+#include <drm/drm_framebuffer.h>
#include <drm/drm_probe_helper.h>
#include <linux/i2c.h>
#include <linux/i2c-algo-bit.h>
#include <linux/hrtimer.h>
#include "amdgpu_irq.h"
-#include <drm/drm_dp_mst_helper.h>
+#include <drm/display/drm_dp_mst_helper.h>
#include "modules/inc/mod_freesync.h"
#include "amdgpu_dm_irq_params.h"
@@ -53,6 +50,8 @@ struct amdgpu_device;
struct amdgpu_encoder;
struct amdgpu_router;
struct amdgpu_hpd;
+struct edid;
+struct drm_edid;
#define to_amdgpu_crtc(x) container_of(x, struct amdgpu_crtc, base)
#define to_amdgpu_connector(x) container_of(x, struct amdgpu_connector, base)
@@ -232,8 +231,6 @@ struct amdgpu_i2c_chan {
struct mutex mutex;
};
-struct amdgpu_fbdev;
-
struct amdgpu_afmt {
bool enabled;
int offset;
@@ -304,18 +301,12 @@ struct amdgpu_framebuffer {
uint64_t tiling_flags;
bool tmz_surface;
+ bool gfx12_dcc;
/* caching for later use */
uint64_t address;
};
-struct amdgpu_fbdev {
- struct drm_fb_helper helper;
- struct amdgpu_framebuffer rfb;
- struct list_head fbdev_list;
- struct amdgpu_device *adev;
-};
-
struct amdgpu_mode_info {
struct atom_context *atom_context;
struct card_info *atom_card_info;
@@ -338,11 +329,8 @@ struct amdgpu_mode_info {
/* Adaptive Backlight Modulation (power feature) */
struct drm_property *abm_level_property;
/* hardcoded DFP edid from BIOS */
- struct edid *bios_hardcoded_edid;
- int bios_hardcoded_edid_size;
+ const struct drm_edid *bios_hardcoded_edid;
- /* pointer to fbdev info structure */
- struct amdgpu_fbdev *rfbdev;
/* firmware flags */
u32 firmware_flags;
/* pointer to backlight encoder */
@@ -352,22 +340,110 @@ struct amdgpu_mode_info {
int num_crtc; /* number of crtcs */
int num_hpd; /* number of hpd pins */
int num_dig; /* number of dig blocks */
+ bool gpu_vm_support; /* supports display from GTT */
int disp_priority;
const struct amdgpu_display_funcs *funcs;
const enum drm_plane_type *plane_type;
+
+ /* Driver-private color mgmt props */
+
+ /* @plane_degamma_lut_property: Plane property to set a degamma LUT to
+ * convert encoded values to light linear values before sampling or
+ * blending.
+ */
+ struct drm_property *plane_degamma_lut_property;
+ /* @plane_degamma_lut_size_property: Plane property to define the max
+ * size of degamma LUT as supported by the driver (read-only).
+ */
+ struct drm_property *plane_degamma_lut_size_property;
+ /**
+ * @plane_degamma_tf_property: Plane pre-defined transfer function to
+ * to go from scanout/encoded values to linear values.
+ */
+ struct drm_property *plane_degamma_tf_property;
+ /**
+ * @plane_hdr_mult_property:
+ */
+ struct drm_property *plane_hdr_mult_property;
+
+ struct drm_property *plane_ctm_property;
+ /**
+ * @shaper_lut_property: Plane property to set pre-blending shaper LUT
+ * that converts color content before 3D LUT. If
+ * plane_shaper_tf_property != Identity TF, AMD color module will
+ * combine the user LUT values with pre-defined TF into the LUT
+ * parameters to be programmed.
+ */
+ struct drm_property *plane_shaper_lut_property;
+ /**
+ * @shaper_lut_size_property: Plane property for the size of
+ * pre-blending shaper LUT as supported by the driver (read-only).
+ */
+ struct drm_property *plane_shaper_lut_size_property;
+ /**
+ * @plane_shaper_tf_property: Plane property to set a predefined
+ * transfer function for pre-blending shaper (before applying 3D LUT)
+ * with or without LUT. There is no shaper ROM, but we can use AMD
+ * color modules to program LUT parameters from predefined TF (or
+ * from a combination of pre-defined TF and the custom 1D LUT).
+ */
+ struct drm_property *plane_shaper_tf_property;
+ /**
+ * @plane_lut3d_property: Plane property for color transformation using
+ * a 3D LUT (pre-blending), a three-dimensional array where each
+ * element is an RGB triplet. Each dimension has the size of
+ * lut3d_size. The array contains samples from the approximated
+ * function. On AMD, values between samples are estimated by
+ * tetrahedral interpolation. The array is accessed with three indices,
+ * one for each input dimension (color channel), blue being the
+ * outermost dimension, red the innermost.
+ */
+ struct drm_property *plane_lut3d_property;
+ /**
+ * @plane_degamma_lut_size_property: Plane property to define the max
+ * size of 3D LUT as supported by the driver (read-only). The max size
+ * is the max size of one dimension and, therefore, the max number of
+ * entries for 3D LUT array is the 3D LUT size cubed;
+ */
+ struct drm_property *plane_lut3d_size_property;
+ /**
+ * @plane_blend_lut_property: Plane property for output gamma before
+ * blending. Userspace set a blend LUT to convert colors after 3D LUT
+ * conversion. It works as a post-3DLUT 1D LUT. With shaper LUT, they
+ * are sandwiching 3D LUT with two 1D LUT. If plane_blend_tf_property
+ * != Identity TF, AMD color module will combine the user LUT values
+ * with pre-defined TF into the LUT parameters to be programmed.
+ */
+ struct drm_property *plane_blend_lut_property;
+ /**
+ * @plane_blend_lut_size_property: Plane property to define the max
+ * size of blend LUT as supported by the driver (read-only).
+ */
+ struct drm_property *plane_blend_lut_size_property;
+ /**
+ * @plane_blend_tf_property: Plane property to set a predefined
+ * transfer function for pre-blending blend/out_gamma (after applying
+ * 3D LUT) with or without LUT. There is no blend ROM, but we can use
+ * AMD color modules to program LUT parameters from predefined TF (or
+ * from a combination of pre-defined TF and the custom 1D LUT).
+ */
+ struct drm_property *plane_blend_tf_property;
+ /* @regamma_tf_property: Transfer function for CRTC regamma
+ * (post-blending). Possible values are defined by `enum
+ * amdgpu_transfer_function`. There is no regamma ROM, but we can use
+ * AMD color modules to program LUT parameters from predefined TF (or
+ * from a combination of pre-defined TF and the custom 1D LUT).
+ */
+ struct drm_property *regamma_tf_property;
};
#define AMDGPU_MAX_BL_LEVEL 0xFF
-#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
-
struct amdgpu_backlight_privdata {
struct amdgpu_encoder *encoder;
uint8_t negative;
};
-#endif
-
struct amdgpu_atom_ss {
uint16_t percentage;
uint16_t percentage_divider;
@@ -422,8 +498,6 @@ struct amdgpu_crtc {
struct drm_connector *connector;
/* for dpm */
u32 line_time;
- u32 wm_low;
- u32 wm_high;
u32 lb_vblank_lead_lines;
struct drm_display_mode hw_mode;
/* for virtual dce */
@@ -432,6 +506,10 @@ struct amdgpu_crtc {
int otg_inst;
struct drm_pending_vblank_event *event;
+
+ bool wb_pending;
+ bool wb_enabled;
+ struct drm_writeback_connector *wb_conn;
};
struct amdgpu_encoder_atom_dig {
@@ -531,6 +609,7 @@ struct amdgpu_i2c_adapter {
struct i2c_adapter base;
struct ddc_service *ddc_service;
+ bool oem;
};
#define TO_DM_AUX(x) container_of((x), struct amdgpu_dm_dp_aux, aux)
@@ -549,6 +628,7 @@ struct amdgpu_connector {
void *con_priv;
bool dac_load_detect;
bool detected_by_load; /* if the connection status was determined by load */
+ bool detected_hpd_without_ddc; /* if an HPD signal was detected on DVI, but ddc probing failed */
uint16_t connector_object_id;
struct amdgpu_hpd hpd;
struct amdgpu_router router;
@@ -564,8 +644,8 @@ struct amdgpu_mst_connector {
struct drm_dp_mst_topology_mgr mst_mgr;
struct amdgpu_dm_dp_aux dm_dp_aux;
- struct drm_dp_mst_port *port;
- struct amdgpu_connector *mst_port;
+ struct drm_dp_mst_port *mst_output_port;
+ struct amdgpu_connector *mst_root;
bool is_mst_connector;
struct amdgpu_encoder *mst_encoder;
};
@@ -602,19 +682,6 @@ int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev,
int *hpos, ktime_t *stime, ktime_t *etime,
const struct drm_display_mode *mode);
-int amdgpu_display_gem_fb_init(struct drm_device *dev,
- struct amdgpu_framebuffer *rfb,
- const struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_gem_object *obj);
-int amdgpu_display_gem_fb_verify_and_init(
- struct drm_device *dev, struct amdgpu_framebuffer *rfb,
- struct drm_file *file_priv, const struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_gem_object *obj);
-int amdgpu_display_framebuffer_init(struct drm_device *dev,
- struct amdgpu_framebuffer *rfb,
- const struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_gem_object *obj);
-
int amdgpufb_remove(struct drm_device *dev, struct drm_framebuffer *fb);
void amdgpu_enc_destroy(struct drm_encoder *encoder);
@@ -631,15 +698,6 @@ bool amdgpu_crtc_get_scanout_position(struct drm_crtc *crtc,
int *hpos, ktime_t *stime, ktime_t *etime,
const struct drm_display_mode *mode);
-/* fbdev layer */
-int amdgpu_fbdev_init(struct amdgpu_device *adev);
-void amdgpu_fbdev_fini(struct amdgpu_device *adev);
-void amdgpu_fbdev_set_suspend(struct amdgpu_device *adev, int state);
-int amdgpu_fbdev_total_size(struct amdgpu_device *adev);
-bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj);
-
-int amdgpu_align_pitch(struct amdgpu_device *adev, int width, int bpp, bool tiled);
-
/* amdgpu_display.c */
void amdgpu_display_print_display_setup(struct drm_device *dev);
int amdgpu_display_modeset_create_props(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
index 6afb02fef8cf..a974265837f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
@@ -22,61 +22,65 @@
#include "amdgpu.h"
#include "amdgpu_ras.h"
-int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev)
+int amdgpu_nbio_ras_sw_init(struct amdgpu_device *adev)
{
- int r;
- struct ras_ih_if ih_info = {
- .cb = NULL,
- };
- struct ras_fs_if fs_info = {
- .sysfs_name = "pcie_bif_err_count",
- };
+ int err;
+ struct amdgpu_nbio_ras *ras;
+
+ if (!adev->nbio.ras)
+ return 0;
- if (!adev->nbio.ras_if) {
- adev->nbio.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
- if (!adev->nbio.ras_if)
- return -ENOMEM;
- adev->nbio.ras_if->block = AMDGPU_RAS_BLOCK__PCIE_BIF;
- adev->nbio.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
- adev->nbio.ras_if->sub_block_index = 0;
+ ras = adev->nbio.ras;
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register pcie_bif ras block!\n");
+ return err;
}
- ih_info.head = fs_info.head = *adev->nbio.ras_if;
- r = amdgpu_ras_late_init(adev, adev->nbio.ras_if,
- &fs_info, &ih_info);
+
+ strcpy(ras->ras_block.ras_comm.name, "pcie_bif");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__PCIE_BIF;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->nbio.ras_if = &ras->ras_block.ras_comm;
+
+ return 0;
+}
+
+u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+ if (adev->nbio.funcs && adev->nbio.funcs->get_pcie_replay_count)
+ return adev->nbio.funcs->get_pcie_replay_count(adev);
+
+ return 0;
+}
+
+bool amdgpu_nbio_is_replay_cnt_supported(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev) || !adev->asic_funcs ||
+ !adev->asic_funcs->get_pcie_replay_count ||
+ (!adev->nbio.funcs || !adev->nbio.funcs->get_pcie_replay_count))
+ return false;
+
+ return true;
+}
+
+int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+ r = amdgpu_ras_block_late_init(adev, ras_block);
if (r)
- goto free;
+ return r;
- if (amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) {
+ if (amdgpu_ras_is_supported(adev, ras_block->block)) {
r = amdgpu_irq_get(adev, &adev->nbio.ras_controller_irq, 0);
if (r)
goto late_fini;
r = amdgpu_irq_get(adev, &adev->nbio.ras_err_event_athub_irq, 0);
if (r)
goto late_fini;
- } else {
- r = 0;
- goto free;
}
return 0;
late_fini:
- amdgpu_ras_late_fini(adev, adev->nbio.ras_if, &ih_info);
-free:
- kfree(adev->nbio.ras_if);
- adev->nbio.ras_if = NULL;
+ amdgpu_ras_block_late_fini(adev, ras_block);
return r;
}
-
-void amdgpu_nbio_ras_fini(struct amdgpu_device *adev)
-{
- if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF) &&
- adev->nbio.ras_if) {
- struct ras_common_if *ras_if = adev->nbio.ras_if;
- struct ras_ih_if ih_info = {
- .cb = NULL,
- };
-
- amdgpu_ras_late_fini(adev, ras_if, &ih_info);
- kfree(ras_if);
- }
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
index 843052205bd5..b528de6a01f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
@@ -47,15 +47,12 @@ struct nbio_hdp_flush_reg {
u32 ref_and_mask_sdma7;
};
-struct amdgpu_nbio_ras_funcs {
+struct amdgpu_nbio_ras {
+ struct amdgpu_ras_block_object ras_block;
void (*handle_ras_controller_intr_no_bifring)(struct amdgpu_device *adev);
void (*handle_ras_err_event_athub_intr_no_bifring)(struct amdgpu_device *adev);
int (*init_ras_controller_interrupt)(struct amdgpu_device *adev);
int (*init_ras_err_event_athub_interrupt)(struct amdgpu_device *adev);
- void (*query_ras_error_count)(struct amdgpu_device *adev,
- void *ras_error_status);
- int (*ras_late_init)(struct amdgpu_device *adev);
- void (*ras_fini)(struct amdgpu_device *adev);
};
struct amdgpu_nbio_funcs {
@@ -64,6 +61,7 @@ struct amdgpu_nbio_funcs {
u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev);
u32 (*get_pcie_index_offset)(struct amdgpu_device *adev);
u32 (*get_pcie_data_offset)(struct amdgpu_device *adev);
+ u32 (*get_pcie_index_hi_offset)(struct amdgpu_device *adev);
u32 (*get_pcie_port_index_offset)(struct amdgpu_device *adev);
u32 (*get_pcie_port_data_offset)(struct amdgpu_device *adev);
u32 (*get_rev_id)(struct amdgpu_device *adev);
@@ -71,8 +69,11 @@ struct amdgpu_nbio_funcs {
u32 (*get_memsize)(struct amdgpu_device *adev);
void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance,
bool use_doorbell, int doorbell_index, int doorbell_size);
+ void (*vpe_doorbell_range)(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index, int doorbell_size);
void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell,
int doorbell_index, int instance);
+ void (*gc_doorbell_init)(struct amdgpu_device *adev);
void (*enable_doorbell_aperture)(struct amdgpu_device *adev,
bool enable);
void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev,
@@ -86,7 +87,7 @@ struct amdgpu_nbio_funcs {
void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev,
bool enable);
void (*get_clockgating_state)(struct amdgpu_device *adev,
- u32 *flags);
+ u64 *flags);
void (*ih_control)(struct amdgpu_device *adev);
void (*init_registers)(struct amdgpu_device *adev);
void (*remap_hdp_registers)(struct amdgpu_device *adev);
@@ -96,6 +97,13 @@ struct amdgpu_nbio_funcs {
void (*apply_lc_spc_mode_wa)(struct amdgpu_device *adev);
void (*apply_l1_link_width_reconfig_wa)(struct amdgpu_device *adev);
void (*clear_doorbell_interrupt)(struct amdgpu_device *adev);
+ u32 (*get_rom_offset)(struct amdgpu_device *adev);
+ int (*get_compute_partition_mode)(struct amdgpu_device *adev);
+ u32 (*get_memory_partition_mode)(struct amdgpu_device *adev,
+ u32 *supp_modes);
+ bool (*is_nps_switch_requested)(struct amdgpu_device *adev);
+ u64 (*get_pcie_replay_count)(struct amdgpu_device *adev);
+ void (*set_reg_remap)(struct amdgpu_device *adev);
};
struct amdgpu_nbio {
@@ -104,9 +112,13 @@ struct amdgpu_nbio {
struct amdgpu_irq_src ras_err_event_athub_irq;
struct ras_common_if *ras_if;
const struct amdgpu_nbio_funcs *funcs;
- const struct amdgpu_nbio_ras_funcs *ras_funcs;
+ struct amdgpu_nbio_ras *ras;
};
-int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev);
-void amdgpu_nbio_ras_fini(struct amdgpu_device *adev);
+int amdgpu_nbio_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
+u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev);
+
+bool amdgpu_nbio_is_replay_cnt_supported(struct amdgpu_device *adev);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 4fcfc2313b8c..e08f58de4b17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -32,12 +32,17 @@
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/dma-buf.h>
+#include <linux/export.h>
+#include <drm/drm_drv.h>
#include <drm/amdgpu_drm.h>
#include <drm/drm_cache.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_dma_buf.h"
/**
* DOC: amdgpu_object
@@ -58,7 +63,7 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
amdgpu_bo_kunmap(bo);
- if (bo->tbo.base.import_attach)
+ if (drm_gem_is_imported(&bo->tbo.base))
drm_prime_gem_destroy(&bo->tbo.base, bo->tbo.sg);
drm_gem_object_release(&bo->tbo.base);
amdgpu_bo_unref(&bo->parent);
@@ -75,23 +80,6 @@ static void amdgpu_bo_user_destroy(struct ttm_buffer_object *tbo)
amdgpu_bo_destroy(tbo);
}
-static void amdgpu_bo_vm_destroy(struct ttm_buffer_object *tbo)
-{
- struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
- struct amdgpu_bo_vm *vmbo;
-
- vmbo = to_amdgpu_bo_vm(bo);
- /* in case amdgpu_device_recover_vram got NULL of bo->parent */
- if (!list_empty(&vmbo->shadow_list)) {
- mutex_lock(&adev->shadow_list_lock);
- list_del_init(&vmbo->shadow_list);
- mutex_unlock(&adev->shadow_list_lock);
- }
-
- amdgpu_bo_destroy(tbo);
-}
-
/**
* amdgpu_bo_is_amdgpu_bo - check if the buffer object is an &amdgpu_bo
* @bo: buffer object to be checked
@@ -105,8 +93,7 @@ static void amdgpu_bo_vm_destroy(struct ttm_buffer_object *tbo)
bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo)
{
if (bo->destroy == &amdgpu_bo_destroy ||
- bo->destroy == &amdgpu_bo_user_destroy ||
- bo->destroy == &amdgpu_bo_vm_destroy)
+ bo->destroy == &amdgpu_bo_user_destroy)
return true;
return false;
@@ -129,20 +116,48 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
u32 c = 0;
if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
- unsigned visible_pfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
-
- places[c].fpfn = 0;
- places[c].lpfn = 0;
+ unsigned int visible_pfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
+ int8_t mem_id = KFD_XCP_MEM_ID(adev, abo->xcp_id);
+
+ if (adev->gmc.mem_partitions && mem_id >= 0) {
+ places[c].fpfn = adev->gmc.mem_partitions[mem_id].range.fpfn;
+ /*
+ * memory partition range lpfn is inclusive start + size - 1
+ * TTM place lpfn is exclusive start + size
+ */
+ places[c].lpfn = adev->gmc.mem_partitions[mem_id].range.lpfn + 1;
+ } else {
+ places[c].fpfn = 0;
+ places[c].lpfn = 0;
+ }
places[c].mem_type = TTM_PL_VRAM;
places[c].flags = 0;
if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
- places[c].lpfn = visible_pfn;
+ places[c].lpfn = min_not_zero(places[c].lpfn, visible_pfn);
else
places[c].flags |= TTM_PL_FLAG_TOPDOWN;
- if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
+ if (abo->tbo.type == ttm_bo_type_kernel &&
+ flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
+
+ c++;
+ }
+
+ if (domain & AMDGPU_GEM_DOMAIN_DOORBELL) {
+ places[c].fpfn = 0;
+ places[c].lpfn = 0;
+ places[c].mem_type = AMDGPU_PL_DOORBELL;
+ places[c].flags = 0;
+ c++;
+ }
+
+ if (domain & AMDGPU_GEM_DOMAIN_MMIO_REMAP) {
+ places[c].fpfn = 0;
+ places[c].lpfn = 0;
+ places[c].mem_type = AMDGPU_PL_MMIO_REMAP;
+ places[c].flags = 0;
c++;
}
@@ -153,6 +168,13 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
abo->flags & AMDGPU_GEM_CREATE_PREEMPTIBLE ?
AMDGPU_PL_PREEMPT : TTM_PL_TT;
places[c].flags = 0;
+ /*
+ * When GTT is just an alternative to VRAM make sure that we
+ * only use it as fallback and still try to fill up VRAM first.
+ */
+ if (abo->tbo.resource && !(adev->flags & AMD_IS_APU) &&
+ domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM)
+ places[c].flags |= TTM_PL_FLAG_FALLBACK;
c++;
}
@@ -200,9 +222,6 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
placement->num_placement = c;
placement->placement = places;
-
- placement->num_busy_placement = c;
- placement->busy_placement = places;
}
/**
@@ -315,6 +334,9 @@ error_free:
*
* Allocates and pins a BO for kernel internal use.
*
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to create and access the kernel BO.
+ *
* Note: For bo_ptr new BO is only created if bo_ptr points to NULL.
*
* Returns:
@@ -340,22 +362,89 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
}
/**
+ * amdgpu_bo_create_isp_user - create user BO for isp
+ *
+ * @adev: amdgpu device object
+ * @dma_buf: DMABUF handle for isp buffer
+ * @domain: where to place it
+ * @bo: used to initialize BOs in structures
+ * @gpu_addr: GPU addr of the pinned BO
+ *
+ * Imports isp DMABUF to allocate and pin a user BO for isp internal use. It does
+ * GART alloc to generate gpu_addr for BO to make it accessible through the
+ * GART aperture for ISP HW.
+ *
+ * This function is exported to allow the V4L2 isp device external to drm device
+ * to create and access the isp user BO.
+ *
+ * Returns:
+ * 0 on success, negative error code otherwise.
+ */
+int amdgpu_bo_create_isp_user(struct amdgpu_device *adev,
+ struct dma_buf *dma_buf, u32 domain, struct amdgpu_bo **bo,
+ u64 *gpu_addr)
+
+{
+ struct drm_gem_object *gem_obj;
+ int r;
+
+ gem_obj = amdgpu_gem_prime_import(&adev->ddev, dma_buf);
+ *bo = gem_to_amdgpu_bo(gem_obj);
+ if (!(*bo)) {
+ dev_err(adev->dev, "failed to get valid isp user bo\n");
+ return -EINVAL;
+ }
+
+ r = amdgpu_bo_reserve(*bo, false);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to reserve isp user bo\n", r);
+ return r;
+ }
+
+ r = amdgpu_bo_pin(*bo, domain);
+ if (r) {
+ dev_err(adev->dev, "(%d) isp user bo pin failed\n", r);
+ goto error_unreserve;
+ }
+
+ r = amdgpu_ttm_alloc_gart(&(*bo)->tbo);
+ if (r) {
+ dev_err(adev->dev, "%p bind failed\n", *bo);
+ goto error_unpin;
+ }
+
+ if (!WARN_ON(!gpu_addr))
+ *gpu_addr = amdgpu_bo_gpu_offset(*bo);
+
+ amdgpu_bo_unreserve(*bo);
+
+ return 0;
+
+error_unpin:
+ amdgpu_bo_unpin(*bo);
+error_unreserve:
+ amdgpu_bo_unreserve(*bo);
+ amdgpu_bo_unref(bo);
+
+ return r;
+}
+
+/**
* amdgpu_bo_create_kernel_at - create BO for kernel use at specific location
*
* @adev: amdgpu device object
* @offset: offset of the BO
* @size: size of the BO
- * @domain: where to place it
* @bo_ptr: used to initialize BOs in structures
* @cpu_addr: optional CPU address mapping
*
- * Creates a kernel BO at a specific offset in the address space of the domain.
+ * Creates a kernel BO at a specific offset in VRAM.
*
* Returns:
* 0 on success, negative error code otherwise.
*/
int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
- uint64_t offset, uint64_t size, uint32_t domain,
+ uint64_t offset, uint64_t size,
struct amdgpu_bo **bo_ptr, void **cpu_addr)
{
struct ttm_operation_ctx ctx = { false, false };
@@ -365,8 +454,9 @@ int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
offset &= PAGE_MASK;
size = ALIGN(size, PAGE_SIZE);
- r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE, domain, bo_ptr,
- NULL, cpu_addr);
+ r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM, bo_ptr, NULL,
+ cpu_addr);
if (r)
return r;
@@ -414,6 +504,9 @@ error:
* @cpu_addr: pointer to where the BO's CPU memory space address was stored
*
* unmaps and unpin a BO for kernel internal use.
+ *
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to free the kernel BO.
*/
void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
void **cpu_addr)
@@ -421,6 +514,8 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
if (*bo == NULL)
return;
+ WARN_ON(amdgpu_ttm_adev((*bo)->tbo.bdev)->in_suspend);
+
if (likely(amdgpu_bo_reserve(*bo, true) == 0)) {
if (cpu_addr)
amdgpu_bo_kunmap(*bo);
@@ -437,7 +532,29 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
*cpu_addr = NULL;
}
-/* Validate bo size is bit bigger then the request domain */
+/**
+ * amdgpu_bo_free_isp_user - free BO for isp use
+ *
+ * @bo: amdgpu isp user BO to free
+ *
+ * unpin and unref BO for isp internal use.
+ *
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to free the isp user BO.
+ */
+void amdgpu_bo_free_isp_user(struct amdgpu_bo *bo)
+{
+ if (bo == NULL)
+ return;
+
+ if (amdgpu_bo_reserve(bo, true) == 0) {
+ amdgpu_bo_unpin(bo);
+ amdgpu_bo_unreserve(bo);
+ }
+ amdgpu_bo_unref(&bo);
+}
+
+/* Validate bo size is bit bigger than the request domain */
static bool amdgpu_bo_validate_size(struct amdgpu_device *adev,
unsigned long size, u32 domain)
{
@@ -445,33 +562,26 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev,
/*
* If GTT is part of requested domains the check must succeed to
- * allow fall back to GTT
+ * allow fall back to GTT.
*/
- if (domain & AMDGPU_GEM_DOMAIN_GTT) {
+ if (domain & AMDGPU_GEM_DOMAIN_GTT)
man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
-
- if (size < (man->size << PAGE_SHIFT))
- return true;
- else
- goto fail;
- }
-
- if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
+ else if (domain & AMDGPU_GEM_DOMAIN_VRAM)
man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
+ else
+ return true;
- if (size < (man->size << PAGE_SHIFT))
- return true;
- else
- goto fail;
+ if (!man) {
+ if (domain & AMDGPU_GEM_DOMAIN_GTT)
+ WARN_ON_ONCE("GTT domain requested but GTT mem manager uninitialized");
+ return false;
}
+ /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU, _DOMAIN_DOORBELL */
+ if (size < man->size)
+ return true;
- /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU */
- return true;
-
-fail:
- DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size,
- man->size << PAGE_SHIFT);
+ DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, man->size);
return false;
}
@@ -541,6 +651,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
/* GWS and OA don't need any alignment. */
page_align = bp->byte_align;
size <<= PAGE_SHIFT;
+
} else if (bp->domain & AMDGPU_GEM_DOMAIN_GDS) {
/* Both size and alignment must be a multiple of 4. */
page_align = ALIGN(bp->byte_align, 4);
@@ -561,16 +672,25 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
if (bo == NULL)
return -ENOMEM;
drm_gem_private_object_init(adev_to_drm(adev), &bo->tbo.base, size);
+ bo->tbo.base.funcs = &amdgpu_gem_object_funcs;
bo->vm_bo = NULL;
bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain :
bp->domain;
bo->allowed_domains = bo->preferred_domains;
if (bp->type != ttm_bo_type_kernel &&
+ !(bp->flags & AMDGPU_GEM_CREATE_DISCARDABLE) &&
bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
bo->flags = bp->flags;
+ if (adev->gmc.mem_partitions)
+ /* For GPUs with spatial partitioning, bo->xcp_id=-1 means any partition */
+ bo->xcp_id = bp->xcp_id_plus1 - 1;
+ else
+ /* For GPUs without spatial partitioning */
+ bo->xcp_id = 0;
+
if (!amdgpu_bo_support_uswc(bo->flags))
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
@@ -581,20 +701,21 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
else
amdgpu_bo_placement_from_domain(bo, bp->domain);
if (bp->type == ttm_bo_type_kernel)
+ bo->tbo.priority = 2;
+ else if (!(bp->flags & AMDGPU_GEM_CREATE_DISCARDABLE))
bo->tbo.priority = 1;
if (!bp->destroy)
bp->destroy = &amdgpu_bo_destroy;
- r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, bp->type,
+ r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, bp->type,
&bo->placement, page_align, &ctx, NULL,
bp->resv, bp->destroy);
if (unlikely(r != 0))
return r;
if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
- bo->tbo.resource->mem_type == TTM_PL_VRAM &&
- bo->tbo.resource->start < adev->gmc.visible_vram_size >> PAGE_SHIFT)
+ amdgpu_res_cpu_visible(adev, bo->tbo.resource))
amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved,
ctx.bytes_moved);
else
@@ -604,13 +725,12 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
bo->tbo.resource->mem_type == TTM_PL_VRAM) {
struct dma_fence *fence;
- r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence);
+ r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence);
if (unlikely(r))
goto fail_unreserve;
- amdgpu_bo_fence(bo, fence, false);
- dma_fence_put(bo->tbo.moving);
- bo->tbo.moving = dma_fence_get(fence);
+ dma_resv_add_fence(bo->tbo.base.resv, fence,
+ DMA_RESV_USAGE_KERNEL);
dma_fence_put(fence);
}
if (!bp->resv)
@@ -684,61 +804,15 @@ int amdgpu_bo_create_vm(struct amdgpu_device *adev,
* num of amdgpu_vm_pt entries.
*/
BUG_ON(bp->bo_ptr_size < sizeof(struct amdgpu_bo_vm));
- bp->destroy = &amdgpu_bo_vm_destroy;
r = amdgpu_bo_create(adev, bp, &bo_ptr);
if (r)
return r;
*vmbo_ptr = to_amdgpu_bo_vm(bo_ptr);
- INIT_LIST_HEAD(&(*vmbo_ptr)->shadow_list);
return r;
}
/**
- * amdgpu_bo_add_to_shadow_list - add a BO to the shadow list
- *
- * @vmbo: BO that will be inserted into the shadow list
- *
- * Insert a BO to the shadow list.
- */
-void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo)
-{
- struct amdgpu_device *adev = amdgpu_ttm_adev(vmbo->bo.tbo.bdev);
-
- mutex_lock(&adev->shadow_list_lock);
- list_add_tail(&vmbo->shadow_list, &adev->shadow_list);
- mutex_unlock(&adev->shadow_list_lock);
-}
-
-/**
- * amdgpu_bo_restore_shadow - restore an &amdgpu_bo shadow
- *
- * @shadow: &amdgpu_bo shadow to be restored
- * @fence: dma_fence associated with the operation
- *
- * Copies a buffer object's shadow content back to the object.
- * This is used for recovering a buffer from its shadow in case of a gpu
- * reset where vram context may be lost.
- *
- * Returns:
- * 0 for success or a negative error code on failure.
- */
-int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence)
-
-{
- struct amdgpu_device *adev = amdgpu_ttm_adev(shadow->tbo.bdev);
- struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
- uint64_t shadow_addr, parent_addr;
-
- shadow_addr = amdgpu_bo_gpu_offset(shadow);
- parent_addr = amdgpu_bo_gpu_offset(shadow->parent);
-
- return amdgpu_copy_buffer(ring, shadow_addr, parent_addr,
- amdgpu_bo_size(shadow), NULL, fence,
- true, false, false);
-}
-
-/**
* amdgpu_bo_kmap - map an &amdgpu_bo buffer object
* @bo: &amdgpu_bo buffer object to be mapped
* @ptr: kernel virtual address to be returned
@@ -757,6 +831,11 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
return -EPERM;
+ r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL,
+ false, MAX_SCHEDULE_TIMEOUT);
+ if (r < 0)
+ return r;
+
kptr = amdgpu_bo_kptr(bo);
if (kptr) {
if (ptr)
@@ -764,12 +843,7 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
return 0;
}
- r = dma_resv_wait_timeout(bo->tbo.base.resv, false, false,
- MAX_SCHEDULE_TIMEOUT);
- if (r < 0)
- return r;
-
- r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.resource->num_pages, &bo->kmap);
+ r = ttm_bo_kmap(&bo->tbo, 0, PFN_UP(bo->tbo.base.size), &bo->kmap);
if (r)
return r;
@@ -821,7 +895,7 @@ struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo)
if (bo == NULL)
return NULL;
- ttm_bo_get(&bo->tbo);
+ drm_gem_object_get(&bo->tbo.base);
return bo;
}
@@ -833,40 +907,30 @@ struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo)
*/
void amdgpu_bo_unref(struct amdgpu_bo **bo)
{
- struct ttm_buffer_object *tbo;
-
if ((*bo) == NULL)
return;
- tbo = &((*bo)->tbo);
- ttm_bo_put(tbo);
+ drm_gem_object_put(&(*bo)->tbo.base);
*bo = NULL;
}
/**
- * amdgpu_bo_pin_restricted - pin an &amdgpu_bo buffer object
+ * amdgpu_bo_pin - pin an &amdgpu_bo buffer object
* @bo: &amdgpu_bo buffer object to be pinned
* @domain: domain to be pinned to
- * @min_offset: the start of requested address range
- * @max_offset: the end of requested address range
*
- * Pins the buffer object according to requested domain and address range. If
- * the memory is unbound gart memory, binds the pages into gart table. Adjusts
- * pin_count and pin_size accordingly.
+ * Pins the buffer object according to requested domain. If the memory is
+ * unbound gart memory, binds the pages into gart table. Adjusts pin_count and
+ * pin_size accordingly.
*
* Pinning means to lock pages in memory along with keeping them at a fixed
* offset. It is required when a buffer can not be moved, for example, when
* a display buffer is being scanned out.
*
- * Compared with amdgpu_bo_pin(), this function gives more flexibility on
- * where to pin a buffer if there are specific restrictions on where a buffer
- * must be located.
- *
* Returns:
* 0 for success or a negative error code on failure.
*/
-int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
- u64 min_offset, u64 max_offset)
+int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_operation_ctx ctx = { false, false };
@@ -875,11 +939,12 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
return -EPERM;
- if (WARN_ON_ONCE(min_offset > max_offset))
- return -EINVAL;
+ /* Check domain to be pinned to against preferred domains */
+ if (bo->preferred_domains & domain)
+ domain = bo->preferred_domains & domain;
/* A shared bo cannot be migrated to VRAM */
- if (bo->tbo.base.import_attach) {
+ if (drm_gem_is_imported(&bo->tbo.base)) {
if (domain & AMDGPU_GEM_DOMAIN_GTT)
domain = AMDGPU_GEM_DOMAIN_GTT;
else
@@ -899,14 +964,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
return -EINVAL;
ttm_bo_pin(&bo->tbo);
-
- if (max_offset != 0) {
- u64 domain_start = amdgpu_ttm_domain_start(adev,
- mem_type);
- WARN_ON_ONCE(max_offset <
- (amdgpu_bo_gpu_offset(bo) - domain_start));
- }
-
return 0;
}
@@ -915,7 +972,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
*/
domain = amdgpu_bo_get_preferred_domain(adev, domain);
- if (bo->tbo.base.import_attach)
+ if (drm_gem_is_imported(&bo->tbo.base))
dma_buf_pin(bo->tbo.base.import_attach);
/* force to pin into visible video ram */
@@ -923,16 +980,9 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
amdgpu_bo_placement_from_domain(bo, domain);
for (i = 0; i < bo->placement.num_placement; i++) {
- unsigned fpfn, lpfn;
-
- fpfn = min_offset >> PAGE_SHIFT;
- lpfn = max_offset >> PAGE_SHIFT;
-
- if (fpfn > bo->placements[i].fpfn)
- bo->placements[i].fpfn = fpfn;
- if (!bo->placements[i].lpfn ||
- (lpfn && lpfn < bo->placements[i].lpfn))
- bo->placements[i].lpfn = lpfn;
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
+ bo->placements[i].mem_type == TTM_PL_VRAM)
+ bo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
}
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
@@ -943,12 +993,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
ttm_bo_pin(&bo->tbo);
- domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
- if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
+ if (bo->tbo.resource->mem_type == TTM_PL_VRAM) {
atomic64_add(amdgpu_bo_size(bo), &adev->vram_pin_size);
atomic64_add(amdgpu_vram_mgr_bo_visible_size(bo),
&adev->visible_pin_size);
- } else if (domain == AMDGPU_GEM_DOMAIN_GTT) {
+ } else if (bo->tbo.resource->mem_type == TTM_PL_TT) {
atomic64_add(amdgpu_bo_size(bo), &adev->gart_pin_size);
}
@@ -957,24 +1006,6 @@ error:
}
/**
- * amdgpu_bo_pin - pin an &amdgpu_bo buffer object
- * @bo: &amdgpu_bo buffer object to be pinned
- * @domain: domain to be pinned to
- *
- * A simple wrapper to amdgpu_bo_pin_restricted().
- * Provides a simpler API for buffers that do not have any strict restrictions
- * on where a buffer must be located.
- *
- * Returns:
- * 0 for success or a negative error code on failure.
- */
-int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain)
-{
- bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
- return amdgpu_bo_pin_restricted(bo, domain, 0, 0);
-}
-
-/**
* amdgpu_bo_unpin - unpin an &amdgpu_bo buffer object
* @bo: &amdgpu_bo buffer object to be unpinned
*
@@ -992,7 +1023,7 @@ void amdgpu_bo_unpin(struct amdgpu_bo *bo)
if (bo->tbo.pin_count)
return;
- if (bo->tbo.base.import_attach)
+ if (drm_gem_is_imported(&bo->tbo.base))
dma_buf_unpin(bo->tbo.base.import_attach);
if (bo->tbo.resource->mem_type == TTM_PL_VRAM) {
@@ -1002,9 +1033,10 @@ void amdgpu_bo_unpin(struct amdgpu_bo *bo)
} else if (bo->tbo.resource->mem_type == TTM_PL_TT) {
atomic64_sub(amdgpu_bo_size(bo), &adev->gart_pin_size);
}
+
}
-static const char *amdgpu_vram_names[] = {
+static const char * const amdgpu_vram_names[] = {
"UNKNOWN",
"GDDR1",
"DDR2",
@@ -1015,7 +1047,10 @@ static const char *amdgpu_vram_names[] = {
"DDR3",
"DDR4",
"GDDR6",
- "DDR5"
+ "DDR5",
+ "LPDDR4",
+ "LPDDR5",
+ "HBM3E"
};
/**
@@ -1030,11 +1065,16 @@ static const char *amdgpu_vram_names[] = {
int amdgpu_bo_init(struct amdgpu_device *adev)
{
/* On A+A platform, VRAM can be mapped as WB */
- if (!adev->gmc.xgmi.connected_to_cpu) {
+ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
/* reserve PAT memory space to WC for VRAM */
- arch_io_reserve_memtype_wc(adev->gmc.aper_base,
+ int r = arch_io_reserve_memtype_wc(adev->gmc.aper_base,
adev->gmc.aper_size);
+ if (r) {
+ DRM_ERROR("Unable to set WC memtype for the aperture base\n");
+ return r;
+ }
+
/* Add an MTRR for the VRAM */
adev->gmc.vram_mtrr = arch_phys_wc_add(adev->gmc.aper_base,
adev->gmc.aper_size);
@@ -1056,7 +1096,17 @@ int amdgpu_bo_init(struct amdgpu_device *adev)
*/
void amdgpu_bo_fini(struct amdgpu_device *adev)
{
+ int idx;
+
amdgpu_ttm_fini(adev);
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
+ arch_phys_wc_del(adev->gmc.vram_mtrr);
+ arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
+ }
+ drm_dev_exit(idx);
+ }
}
/**
@@ -1118,8 +1168,8 @@ void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags)
* Returns:
* 0 for success or a negative error code on failure.
*/
-int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
- uint32_t metadata_size, uint64_t flags)
+int amdgpu_bo_set_metadata(struct amdgpu_bo *bo, void *metadata,
+ u32 metadata_size, uint64_t flags)
{
struct amdgpu_bo_user *ubo;
void *buffer;
@@ -1197,7 +1247,7 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
* amdgpu_bo_move_notify - notification about a memory move
* @bo: pointer to a buffer object
* @evict: if this move is evicting the buffer from the graphics address space
- * @new_mem: new information of the bufer object
+ * @new_mem: new resource for backing the BO
*
* Marks the corresponding &amdgpu_bo buffer object as invalid, also performs
* bookkeeping.
@@ -1207,52 +1257,24 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
bool evict,
struct ttm_resource *new_mem)
{
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
- struct amdgpu_bo *abo;
struct ttm_resource *old_mem = bo->resource;
+ struct amdgpu_bo *abo;
if (!amdgpu_bo_is_amdgpu_bo(bo))
return;
abo = ttm_to_amdgpu_bo(bo);
- amdgpu_vm_bo_invalidate(adev, abo, evict);
+ amdgpu_vm_bo_move(abo, new_mem, evict);
amdgpu_bo_kunmap(abo);
- if (abo->tbo.base.dma_buf && !abo->tbo.base.import_attach &&
- bo->resource->mem_type != TTM_PL_SYSTEM)
+ if (abo->tbo.base.dma_buf && !drm_gem_is_imported(&abo->tbo.base) &&
+ old_mem && old_mem->mem_type != TTM_PL_SYSTEM)
dma_buf_move_notify(abo->tbo.base.dma_buf);
- /* remember the eviction */
- if (evict)
- atomic64_inc(&adev->num_evictions);
-
- /* update statistics */
- if (!new_mem)
- return;
-
/* move_notify is called before move happens */
- trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
-}
-
-void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem,
- uint64_t *gtt_mem, uint64_t *cpu_mem)
-{
- unsigned int domain;
-
- domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
- switch (domain) {
- case AMDGPU_GEM_DOMAIN_VRAM:
- *vram_mem += amdgpu_bo_size(bo);
- break;
- case AMDGPU_GEM_DOMAIN_GTT:
- *gtt_mem += amdgpu_bo_size(bo);
- break;
- case AMDGPU_GEM_DOMAIN_CPU:
- default:
- *cpu_mem += amdgpu_bo_size(bo);
- break;
- }
+ trace_amdgpu_bo_move(abo, new_mem ? new_mem->mem_type : -1,
+ old_mem ? old_mem->mem_type : -1);
}
/**
@@ -1264,6 +1286,7 @@ void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem,
*/
void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct dma_fence *fence = NULL;
struct amdgpu_bo *abo;
int r;
@@ -1273,28 +1296,42 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
abo = ttm_to_amdgpu_bo(bo);
+ WARN_ON(abo->vm_bo);
+
if (abo->kfd_bo)
amdgpu_amdkfd_release_notify(abo);
- /* We only remove the fence if the resv has individualized. */
- WARN_ON_ONCE(bo->type == ttm_bo_type_kernel
- && bo->base.resv != &bo->base._resv);
- if (bo->base.resv == &bo->base._resv)
- amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo);
-
- if (bo->resource->mem_type != TTM_PL_VRAM ||
- !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE))
+ /*
+ * We lock the private dma_resv object here and since the BO is about to
+ * be released nobody else should have a pointer to it.
+ * So when this locking here fails something is wrong with the reference
+ * counting.
+ */
+ if (WARN_ON_ONCE(!dma_resv_trylock(&bo->base._resv)))
return;
- dma_resv_lock(bo->base.resv, NULL);
+ amdgpu_amdkfd_remove_all_eviction_fences(abo);
- r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence);
- if (!WARN_ON(r)) {
- amdgpu_bo_fence(abo, fence, false);
- dma_fence_put(fence);
- }
+ if (!bo->resource || bo->resource->mem_type != TTM_PL_VRAM ||
+ !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE) ||
+ adev->in_suspend || drm_dev_is_unplugged(adev_to_drm(adev)))
+ goto out;
- dma_resv_unlock(bo->base.resv);
+ r = dma_resv_reserve_fences(&bo->base._resv, 1);
+ if (r)
+ goto out;
+
+ r = amdgpu_fill_buffer(abo, 0, &bo->base._resv, &fence, true,
+ AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
+ if (WARN_ON(r))
+ goto out;
+
+ amdgpu_vram_mgr_set_cleared(bo->resource);
+ dma_resv_add_fence(&bo->base._resv, fence, DMA_RESV_USAGE_KERNEL);
+ dma_fence_put(fence);
+
+out:
+ dma_resv_unlock(&bo->base._resv);
}
/**
@@ -1313,17 +1350,12 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct ttm_operation_ctx ctx = { false, false };
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
- unsigned long offset;
int r;
/* Remember that this BO was accessed by the CPU */
abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
- if (bo->resource->mem_type != TTM_PL_VRAM)
- return 0;
-
- offset = bo->resource->start << PAGE_SHIFT;
- if ((offset + bo->base.size) <= adev->gmc.visible_vram_size)
+ if (amdgpu_res_cpu_visible(adev, bo->resource))
return 0;
/* Can't move a pinned BO to visible VRAM */
@@ -1336,8 +1368,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
AMDGPU_GEM_DOMAIN_GTT);
/* Avoid costly evictions; only set GTT as a busy placement */
- abo->placement.num_busy_placement = 1;
- abo->placement.busy_placement = &abo->placements[1];
+ abo->placements[0].flags |= TTM_PL_FLAG_DESIRED;
r = ttm_bo_validate(bo, &abo->placement, &ctx);
if (unlikely(r == -EBUSY || r == -ERESTARTSYS))
@@ -1345,10 +1376,9 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
else if (unlikely(r))
return VM_FAULT_SIGBUS;
- offset = bo->resource->start << PAGE_SHIFT;
/* this should never happen */
if (bo->resource->mem_type == TTM_PL_VRAM &&
- (offset + bo->base.size) > adev->gmc.visible_vram_size)
+ !amdgpu_res_cpu_visible(adev, bo->resource))
return VM_FAULT_SIGBUS;
ttm_bo_move_to_lru_tail_unlocked(bo);
@@ -1367,11 +1397,17 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
bool shared)
{
struct dma_resv *resv = bo->tbo.base.resv;
+ int r;
- if (shared)
- dma_resv_add_shared_fence(resv, fence);
- else
- dma_resv_add_excl_fence(resv, fence);
+ r = dma_resv_reserve_fences(resv, 1);
+ if (r) {
+ /* As last resort on OOM we block for the fence */
+ dma_fence_wait(fence, false);
+ return;
+ }
+
+ dma_resv_add_fence(resv, fence, shared ? DMA_RESV_USAGE_READ :
+ DMA_RESV_USAGE_WRITE);
}
/**
@@ -1443,6 +1479,26 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
}
/**
+ * amdgpu_bo_fb_aper_addr - return FB aperture GPU offset of the VRAM bo
+ * @bo: amdgpu VRAM buffer object for which we query the offset
+ *
+ * Returns:
+ * current FB aperture GPU offset of the object.
+ */
+u64 amdgpu_bo_fb_aper_addr(struct amdgpu_bo *bo)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ uint64_t offset, fb_base;
+
+ WARN_ON_ONCE(bo->tbo.resource->mem_type != TTM_PL_VRAM);
+
+ fb_base = adev->gmc.fb_start;
+ fb_base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
+ offset = (bo->tbo.resource->start << PAGE_SHIFT) + fb_base;
+ return amdgpu_gmc_sign_extend(offset);
+}
+
+/**
* amdgpu_bo_gpu_offset_no_check - return GPU offset of bo
* @bo: amdgpu object for which we query the offset
*
@@ -1452,15 +1508,60 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- uint64_t offset;
+ uint64_t offset = AMDGPU_BO_INVALID_OFFSET;
- offset = (bo->tbo.resource->start << PAGE_SHIFT) +
- amdgpu_ttm_domain_start(adev, bo->tbo.resource->mem_type);
+ if (bo->tbo.resource->mem_type == TTM_PL_TT)
+ offset = amdgpu_gmc_agp_addr(&bo->tbo);
+
+ if (offset == AMDGPU_BO_INVALID_OFFSET)
+ offset = (bo->tbo.resource->start << PAGE_SHIFT) +
+ amdgpu_ttm_domain_start(adev, bo->tbo.resource->mem_type);
return amdgpu_gmc_sign_extend(offset);
}
/**
+ * amdgpu_bo_mem_stats_placement - bo placement for memory accounting
+ * @bo: the buffer object we should look at
+ *
+ * BO can have multiple preferred placements, to avoid double counting we want
+ * to file it under a single placement for memory stats.
+ * Luckily, if we take the highest set bit in preferred_domains the result is
+ * quite sensible.
+ *
+ * Returns:
+ * Which of the placements should the BO be accounted under.
+ */
+uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo)
+{
+ uint32_t domain = bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK;
+
+ if (!domain)
+ return TTM_PL_SYSTEM;
+
+ switch (rounddown_pow_of_two(domain)) {
+ case AMDGPU_GEM_DOMAIN_CPU:
+ return TTM_PL_SYSTEM;
+ case AMDGPU_GEM_DOMAIN_GTT:
+ return TTM_PL_TT;
+ case AMDGPU_GEM_DOMAIN_VRAM:
+ return TTM_PL_VRAM;
+ case AMDGPU_GEM_DOMAIN_GDS:
+ return AMDGPU_PL_GDS;
+ case AMDGPU_GEM_DOMAIN_GWS:
+ return AMDGPU_PL_GWS;
+ case AMDGPU_GEM_DOMAIN_OA:
+ return AMDGPU_PL_OA;
+ case AMDGPU_GEM_DOMAIN_DOORBELL:
+ return AMDGPU_PL_DOORBELL;
+ case AMDGPU_GEM_DOMAIN_MMIO_REMAP:
+ return AMDGPU_PL_MMIO_REMAP;
+ default:
+ return TTM_PL_SYSTEM;
+ }
+}
+
+/**
* amdgpu_bo_get_preferred_domain - get preferred domain
* @adev: amdgpu device object
* @domain: allowed :ref:`memory domains <amdgpu_memory_domains>`
@@ -1471,7 +1572,8 @@ u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo)
uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
uint32_t domain)
{
- if (domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) {
+ if ((domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) &&
+ ((adev->asic_type == CHIP_CARRIZO) || (adev->asic_type == CHIP_STONEY))) {
domain = AMDGPU_GEM_DOMAIN_VRAM;
if (adev->gmc.real_vram_size <= AMDGPU_SG_THRESHOLD)
domain = AMDGPU_GEM_DOMAIN_GTT;
@@ -1501,25 +1603,54 @@ uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
*/
u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct dma_buf_attachment *attachment;
struct dma_buf *dma_buf;
- unsigned int domain;
const char *placement;
unsigned int pin_count;
u64 size;
- domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
- switch (domain) {
- case AMDGPU_GEM_DOMAIN_VRAM:
- placement = "VRAM";
- break;
- case AMDGPU_GEM_DOMAIN_GTT:
- placement = " GTT";
- break;
- case AMDGPU_GEM_DOMAIN_CPU:
- default:
- placement = " CPU";
- break;
+ if (dma_resv_trylock(bo->tbo.base.resv)) {
+ if (!bo->tbo.resource) {
+ placement = "NONE";
+ } else {
+ switch (bo->tbo.resource->mem_type) {
+ case TTM_PL_VRAM:
+ if (amdgpu_res_cpu_visible(adev, bo->tbo.resource))
+ placement = "VRAM VISIBLE";
+ else
+ placement = "VRAM";
+ break;
+ case TTM_PL_TT:
+ placement = "GTT";
+ break;
+ case AMDGPU_PL_GDS:
+ placement = "GDS";
+ break;
+ case AMDGPU_PL_GWS:
+ placement = "GWS";
+ break;
+ case AMDGPU_PL_OA:
+ placement = "OA";
+ break;
+ case AMDGPU_PL_PREEMPT:
+ placement = "PREEMPTIBLE";
+ break;
+ case AMDGPU_PL_DOORBELL:
+ placement = "DOORBELL";
+ break;
+ case AMDGPU_PL_MMIO_REMAP:
+ placement = "MMIO REMAP";
+ break;
+ case TTM_PL_SYSTEM:
+ default:
+ placement = "CPU";
+ break;
+ }
+ }
+ dma_resv_unlock(bo->tbo.base.resv);
+ } else {
+ placement = "UNKNOWN";
}
size = amdgpu_bo_size(bo);
@@ -1534,9 +1665,9 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
attachment = READ_ONCE(bo->tbo.base.import_attach);
if (attachment)
- seq_printf(m, " imported from %p", dma_buf);
+ seq_printf(m, " imported from ino:%lu", file_inode(dma_buf->file)->i_ino);
else if (dma_buf)
- seq_printf(m, " exported as %p", dma_buf);
+ seq_printf(m, " exported as ino:%lu", file_inode(dma_buf->file)->i_ino);
amdgpu_bo_print_flag(m, bo, CPU_ACCESS_REQUIRED);
amdgpu_bo_print_flag(m, bo, NO_CPU_ACCESS);
@@ -1545,7 +1676,11 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
amdgpu_bo_print_flag(m, bo, VRAM_CONTIGUOUS);
amdgpu_bo_print_flag(m, bo, VM_ALWAYS_VALID);
amdgpu_bo_print_flag(m, bo, EXPLICIT_SYNC);
-
+ /* Add the gem obj resv fence dump*/
+ if (dma_resv_trylock(bo->tbo.base.resv)) {
+ dma_resv_describe(bo->tbo.base.resv, m);
+ dma_resv_unlock(bo->tbo.base.resv);
+ }
seq_puts(m, "\n");
return size;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 4c9cbdc66995..52c2d1731aab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -41,7 +41,6 @@
/* BO flag to indicate a KFD userptr BO */
#define AMDGPU_AMDKFD_CREATE_USERPTR_BO (1ULL << 63)
-#define AMDGPU_AMDKFD_CREATE_SVM_BO (1ULL << 62)
#define to_amdgpu_bo_user(abo) container_of((abo), struct amdgpu_bo_user, bo)
#define to_amdgpu_bo_vm(abo) container_of((abo), struct amdgpu_bo_vm, bo)
@@ -57,6 +56,8 @@ struct amdgpu_bo_param {
bool no_wait_gpu;
struct dma_resv *resv;
void (*destroy)(struct ttm_buffer_object *bo);
+ /* xcp partition number plus 1, 0 means any partition */
+ int8_t xcp_id_plus1;
};
/* bo virtual addresses in a vm */
@@ -68,7 +69,7 @@ struct amdgpu_bo_va_mapping {
uint64_t last;
uint64_t __subtree_last;
uint64_t offset;
- uint64_t flags;
+ uint32_t flags;
};
/* User space allocated BO in a VM */
@@ -89,6 +90,13 @@ struct amdgpu_bo_va {
bool cleared;
bool is_xgmi;
+
+ /*
+ * protected by vm reservation lock
+ * if non-zero, cannot unmap from GPU because user queues may still access it
+ */
+ unsigned int queue_refcount;
+ atomic_t userq_va_mapped;
};
struct amdgpu_bo {
@@ -109,6 +117,13 @@ struct amdgpu_bo {
struct mmu_interval_notifier notifier;
#endif
struct kgd_mem *kfd_bo;
+
+ /*
+ * For GPUs with spatial partitioning, xcp partition number, -1 means
+ * any partition. For other ASICs without spatial partition, always 0
+ * for memory accounting.
+ */
+ int8_t xcp_id;
};
struct amdgpu_bo_user {
@@ -122,8 +137,6 @@ struct amdgpu_bo_user {
struct amdgpu_bo_vm {
struct amdgpu_bo bo;
- struct amdgpu_bo *shadow;
- struct list_head shadow_list;
struct amdgpu_vm_bo_base entries[];
};
@@ -153,6 +166,10 @@ static inline unsigned amdgpu_mem_type_to_domain(u32 mem_type)
return AMDGPU_GEM_DOMAIN_GWS;
case AMDGPU_PL_OA:
return AMDGPU_GEM_DOMAIN_OA;
+ case AMDGPU_PL_DOORBELL:
+ return AMDGPU_GEM_DOMAIN_DOORBELL;
+ case AMDGPU_PL_MMIO_REMAP:
+ return AMDGPU_GEM_DOMAIN_MMIO_REMAP;
default:
break;
}
@@ -214,28 +231,6 @@ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo)
}
/**
- * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM
- */
-static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo)
-{
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- struct amdgpu_res_cursor cursor;
-
- if (bo->tbo.resource->mem_type != TTM_PL_VRAM)
- return false;
-
- amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
- while (cursor.remaining) {
- if (cursor.start < adev->gmc.visible_vram_size)
- return true;
-
- amdgpu_res_next(&cursor, cursor.size);
- }
-
- return false;
-}
-
-/**
* amdgpu_bo_explicit_sync - return whether the bo is explicitly synced
*/
static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo)
@@ -254,22 +249,6 @@ static inline bool amdgpu_bo_encrypted(struct amdgpu_bo *bo)
return bo->flags & AMDGPU_GEM_CREATE_ENCRYPTED;
}
-/**
- * amdgpu_bo_shadowed - check if the BO is shadowed
- *
- * @bo: BO to be tested.
- *
- * Returns:
- * NULL if not shadowed or else return a BO pointer.
- */
-static inline struct amdgpu_bo *amdgpu_bo_shadowed(struct amdgpu_bo *bo)
-{
- if (bo->tbo.type == ttm_bo_type_kernel)
- return to_amdgpu_bo_vm(bo)->shadow;
-
- return NULL;
-}
-
bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
@@ -284,8 +263,12 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
unsigned long size, int align,
u32 domain, struct amdgpu_bo **bo_ptr,
u64 *gpu_addr, void **cpu_addr);
+int amdgpu_bo_create_isp_user(struct amdgpu_device *adev,
+ struct dma_buf *dbuf, u32 domain,
+ struct amdgpu_bo **bo,
+ u64 *gpu_addr);
int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
- uint64_t offset, uint64_t size, uint32_t domain,
+ uint64_t offset, uint64_t size,
struct amdgpu_bo **bo_ptr, void **cpu_addr);
int amdgpu_bo_create_user(struct amdgpu_device *adev,
struct amdgpu_bo_param *bp,
@@ -295,14 +278,13 @@ int amdgpu_bo_create_vm(struct amdgpu_device *adev,
struct amdgpu_bo_vm **ubo_ptr);
void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
void **cpu_addr);
+void amdgpu_bo_free_isp_user(struct amdgpu_bo *bo);
int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr);
void *amdgpu_bo_kptr(struct amdgpu_bo *bo);
void amdgpu_bo_kunmap(struct amdgpu_bo *bo);
struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo);
void amdgpu_bo_unref(struct amdgpu_bo **bo);
int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain);
-int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
- u64 min_offset, u64 max_offset);
void amdgpu_bo_unpin(struct amdgpu_bo *bo);
int amdgpu_bo_init(struct amdgpu_device *adev);
void amdgpu_bo_fini(struct amdgpu_device *adev);
@@ -325,27 +307,31 @@ int amdgpu_bo_sync_wait_resv(struct amdgpu_device *adev, struct dma_resv *resv,
bool intr);
int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr);
u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo);
+u64 amdgpu_bo_fb_aper_addr(struct amdgpu_bo *bo);
u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo);
-void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem,
- uint64_t *gtt_mem, uint64_t *cpu_mem);
-void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo);
-int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
- struct dma_fence **fence);
+uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo);
uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
uint32_t domain);
/*
* sub allocation
*/
+static inline struct amdgpu_sa_manager *
+to_amdgpu_sa_manager(struct drm_suballoc_manager *manager)
+{
+ return container_of(manager, struct amdgpu_sa_manager, base);
+}
-static inline uint64_t amdgpu_sa_bo_gpu_addr(struct amdgpu_sa_bo *sa_bo)
+static inline uint64_t amdgpu_sa_bo_gpu_addr(struct drm_suballoc *sa_bo)
{
- return sa_bo->manager->gpu_addr + sa_bo->soffset;
+ return to_amdgpu_sa_manager(sa_bo->manager)->gpu_addr +
+ drm_suballoc_soffset(sa_bo);
}
-static inline void * amdgpu_sa_bo_cpu_addr(struct amdgpu_sa_bo *sa_bo)
+static inline void *amdgpu_sa_bo_cpu_addr(struct drm_suballoc *sa_bo)
{
- return sa_bo->manager->cpu_ptr + sa_bo->soffset;
+ return to_amdgpu_sa_manager(sa_bo->manager)->cpu_ptr +
+ drm_suballoc_soffset(sa_bo);
}
int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
@@ -356,11 +342,10 @@ void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
struct amdgpu_sa_manager *sa_manager);
int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
- struct amdgpu_sa_bo **sa_bo,
- unsigned size, unsigned align);
-void amdgpu_sa_bo_free(struct amdgpu_device *adev,
- struct amdgpu_sa_bo **sa_bo,
- struct dma_fence *fence);
+ struct drm_suballoc **sa_bo,
+ unsigned int size);
+void amdgpu_sa_bo_free(struct drm_suballoc **sa_bo,
+ struct dma_fence *fence);
#if defined(CONFIG_DEBUG_FS)
void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
struct seq_file *m);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
index 4eaec446b49d..675aa138ea11 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
@@ -69,6 +69,7 @@ static void amdgpu_pll_reduce_ratio(unsigned *nom, unsigned *den,
/**
* amdgpu_pll_get_fb_ref_div - feedback and ref divider calculation
*
+ * @adev: amdgpu_device pointer
* @nom: nominator
* @den: denominator
* @post_div: post divider
@@ -93,7 +94,7 @@ static void amdgpu_pll_get_fb_ref_div(struct amdgpu_device *adev, unsigned int n
ref_div_max = min(128 / post_div, ref_div_max);
/* get matching reference and feedback divider */
- *ref_div = min(max(DIV_ROUND_CLOSEST(den, post_div), 1u), ref_div_max);
+ *ref_div = clamp(DIV_ROUND_CLOSEST(den, post_div), 1u, ref_div_max);
*fb_div = DIV_ROUND_CLOSEST(nom * *ref_div * post_div, den);
/* limit fb divider to its maximum */
@@ -106,6 +107,7 @@ static void amdgpu_pll_get_fb_ref_div(struct amdgpu_device *adev, unsigned int n
/**
* amdgpu_pll_compute - compute PLL paramaters
*
+ * @adev: amdgpu_device pointer
* @pll: information about the PLL
* @freq: requested frequency
* @dot_clock_p: resulting pixel clock
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
index 82e9ecf84352..6e91ea1de5aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
@@ -233,6 +233,10 @@ static void amdgpu_perf_start(struct perf_event *event, int flags)
if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
return;
+ if ((!pe->adev->df.funcs) ||
+ (!pe->adev->df.funcs->pmc_start))
+ return;
+
WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
hwc->state = 0;
@@ -268,9 +272,12 @@ static void amdgpu_perf_read(struct perf_event *event)
pmu);
u64 count, prev;
- do {
- prev = local64_read(&hwc->prev_count);
+ if ((!pe->adev->df.funcs) ||
+ (!pe->adev->df.funcs->pmc_get_count))
+ return;
+ prev = local64_read(&hwc->prev_count);
+ do {
switch (hwc->config_base) {
case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF:
case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI:
@@ -281,7 +288,7 @@ static void amdgpu_perf_read(struct perf_event *event)
count = 0;
break;
}
- } while (local64_cmpxchg(&hwc->prev_count, prev, count) != prev);
+ } while (!local64_try_cmpxchg(&hwc->prev_count, &prev, count));
local64_add(count - prev, &event->count);
}
@@ -297,6 +304,10 @@ static void amdgpu_perf_stop(struct perf_event *event, int flags)
if (hwc->state & PERF_HES_UPTODATE)
return;
+ if ((!pe->adev->df.funcs) ||
+ (!pe->adev->df.funcs->pmc_stop))
+ return;
+
switch (hwc->config_base) {
case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF:
case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI:
@@ -326,6 +337,10 @@ static int amdgpu_perf_add(struct perf_event *event, int flags)
struct amdgpu_pmu_entry,
pmu);
+ if ((!pe->adev->df.funcs) ||
+ (!pe->adev->df.funcs->pmc_start))
+ return -EINVAL;
+
switch (pe->pmu_perf_type) {
case AMDGPU_PMU_PERF_TYPE_DF:
hwc->config_base = AMDGPU_PMU_EVENT_CONFIG_TYPE_DF;
@@ -371,6 +386,9 @@ static void amdgpu_perf_del(struct perf_event *event, int flags)
struct amdgpu_pmu_entry *pe = container_of(event->pmu,
struct amdgpu_pmu_entry,
pmu);
+ if ((!pe->adev->df.funcs) ||
+ (!pe->adev->df.funcs->pmc_stop))
+ return;
amdgpu_perf_stop(event, PERF_EF_UPDATE);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
index d02c8637f909..34b5e22b44e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
@@ -25,12 +25,6 @@
#include "amdgpu.h"
-static inline struct amdgpu_preempt_mgr *
-to_preempt_mgr(struct ttm_resource_manager *man)
-{
- return container_of(man, struct amdgpu_preempt_mgr, manager);
-}
-
/**
* DOC: mem_info_preempt_used
*
@@ -45,10 +39,9 @@ static ssize_t mem_info_preempt_used_show(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
- struct ttm_resource_manager *man;
+ struct ttm_resource_manager *man = &adev->mman.preempt_mgr;
- man = ttm_manager_type(&adev->mman.bdev, AMDGPU_PL_PREEMPT);
- return sysfs_emit(buf, "%llu\n", amdgpu_preempt_mgr_usage(man));
+ return sysfs_emit(buf, "%llu\n", ttm_resource_manager_usage(man));
}
static DEVICE_ATTR_RO(mem_info_preempt_used);
@@ -59,7 +52,7 @@ static DEVICE_ATTR_RO(mem_info_preempt_used);
* @man: TTM memory type manager
* @tbo: TTM BO we need this range for
* @place: placement flags and restrictions
- * @mem: the resulting mem object
+ * @res: TTM memory object
*
* Dummy, just count the space used without allocating resources or any limit.
*/
@@ -68,16 +61,12 @@ static int amdgpu_preempt_mgr_new(struct ttm_resource_manager *man,
const struct ttm_place *place,
struct ttm_resource **res)
{
- struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man);
-
*res = kzalloc(sizeof(**res), GFP_KERNEL);
if (!*res)
return -ENOMEM;
ttm_resource_init(tbo, place, *res);
(*res)->start = AMDGPU_BO_INVALID_OFFSET;
-
- atomic64_add((*res)->num_pages, &mgr->used);
return 0;
}
@@ -85,55 +74,20 @@ static int amdgpu_preempt_mgr_new(struct ttm_resource_manager *man,
* amdgpu_preempt_mgr_del - free ranges
*
* @man: TTM memory type manager
- * @mem: TTM memory object
+ * @res: TTM memory object
*
* Free the allocated GTT again.
*/
static void amdgpu_preempt_mgr_del(struct ttm_resource_manager *man,
struct ttm_resource *res)
{
- struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man);
-
- atomic64_sub(res->num_pages, &mgr->used);
+ ttm_resource_fini(man, res);
kfree(res);
}
-/**
- * amdgpu_preempt_mgr_usage - return usage of PREEMPT domain
- *
- * @man: TTM memory type manager
- *
- * Return how many bytes are used in the GTT domain
- */
-uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man)
-{
- struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man);
- s64 result = atomic64_read(&mgr->used);
-
- return (result > 0 ? result : 0) * PAGE_SIZE;
-}
-
-/**
- * amdgpu_preempt_mgr_debug - dump VRAM table
- *
- * @man: TTM memory type manager
- * @printer: DRM printer to use
- *
- * Dump the table content using printk.
- */
-static void amdgpu_preempt_mgr_debug(struct ttm_resource_manager *man,
- struct drm_printer *printer)
-{
- struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man);
-
- drm_printf(printer, "man size:%llu pages, preempt used:%lld pages\n",
- man->size, (u64)atomic64_read(&mgr->used));
-}
-
static const struct ttm_resource_manager_func amdgpu_preempt_mgr_func = {
.alloc = amdgpu_preempt_mgr_new,
.free = amdgpu_preempt_mgr_del,
- .debug = amdgpu_preempt_mgr_debug
};
/**
@@ -145,16 +99,13 @@ static const struct ttm_resource_manager_func amdgpu_preempt_mgr_func = {
*/
int amdgpu_preempt_mgr_init(struct amdgpu_device *adev)
{
- struct amdgpu_preempt_mgr *mgr = &adev->mman.preempt_mgr;
- struct ttm_resource_manager *man = &mgr->manager;
+ struct ttm_resource_manager *man = &adev->mman.preempt_mgr;
int ret;
man->use_tt = true;
man->func = &amdgpu_preempt_mgr_func;
- ttm_resource_manager_init(man, (1 << 30));
-
- atomic64_set(&mgr->used, 0);
+ ttm_resource_manager_init(man, &adev->mman.bdev, (1 << 30));
ret = device_create_file(adev->dev, &dev_attr_mem_info_preempt_used);
if (ret) {
@@ -162,8 +113,7 @@ int amdgpu_preempt_mgr_init(struct amdgpu_device *adev)
return ret;
}
- ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT,
- &mgr->manager);
+ ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, man);
ttm_resource_manager_set_used(man, true);
return 0;
}
@@ -178,8 +128,7 @@ int amdgpu_preempt_mgr_init(struct amdgpu_device *adev)
*/
void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev)
{
- struct amdgpu_preempt_mgr *mgr = &adev->mman.preempt_mgr;
- struct ttm_resource_manager *man = &mgr->manager;
+ struct ttm_resource_manager *man = &adev->mman.preempt_mgr;
int ret;
ttm_resource_manager_set_used(man, false);
@@ -188,7 +137,8 @@ void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev)
if (ret)
return;
- device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used);
+ if (adev->dev->kobj.sd)
+ device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used);
ttm_resource_manager_cleanup(man);
ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, NULL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index c641f84649d6..0b10497d487c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -37,20 +37,46 @@
#include "psp_v11_0_8.h"
#include "psp_v12_0.h"
#include "psp_v13_0.h"
+#include "psp_v13_0_4.h"
+#include "psp_v14_0.h"
#include "amdgpu_ras.h"
#include "amdgpu_securedisplay.h"
#include "amdgpu_atomfirmware.h"
-static int psp_sysfs_init(struct amdgpu_device *adev);
-static void psp_sysfs_fini(struct amdgpu_device *adev);
+#define AMD_VBIOS_FILE_MAX_SIZE_B (1024*1024*16)
static int psp_load_smu_fw(struct psp_context *psp);
-static int psp_ta_unload(struct psp_context *psp, struct ta_context *context);
-static int psp_ta_load(struct psp_context *psp, struct ta_context *context);
static int psp_rap_terminate(struct psp_context *psp);
static int psp_securedisplay_terminate(struct psp_context *psp);
+static int psp_ring_init(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct psp_ring *ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ ring = &psp->km_ring;
+
+ ring->ring_type = ring_type;
+
+ /* allocate 4k Page of Local Frame Buffer memory for ring */
+ ring->ring_size = 0x1000;
+ ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->firmware.rbuf,
+ &ring->ring_mem_mc_addr,
+ (void **)&ring->ring_mem);
+ if (ret) {
+ ring->ring_size = 0;
+ return ret;
+ }
+
+ return 0;
+}
+
/*
* Due to DF Cstate management centralized to PMFW, the firmware
* loading sequence will be updated as below:
@@ -75,7 +101,7 @@ static void psp_check_pmfw_centralized_cstate_management(struct psp_context *psp
return;
}
- switch (adev->ip_versions[MP0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 4):
case IP_VERSION(11, 0, 5):
@@ -84,7 +110,9 @@ static void psp_check_pmfw_centralized_cstate_management(struct psp_context *psp
case IP_VERSION(11, 0, 11):
case IP_VERSION(11, 0, 12):
case IP_VERSION(11, 0, 13):
+ case IP_VERSION(13, 0, 0):
case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 7):
psp->pmfw_centralized_cstate_management = true;
break;
default:
@@ -93,54 +121,143 @@ static void psp_check_pmfw_centralized_cstate_management(struct psp_context *psp
}
}
-static int psp_early_init(void *handle)
+static int psp_init_sriov_microcode(struct psp_context *psp)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = psp->adev;
+ char ucode_prefix[30];
+ int ret = 0;
+
+ amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(9, 0, 0):
+ case IP_VERSION(11, 0, 7):
+ case IP_VERSION(11, 0, 9):
+ adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
+ ret = psp_init_cap_microcode(psp, ucode_prefix);
+ break;
+ case IP_VERSION(13, 0, 2):
+ adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
+ ret = psp_init_cap_microcode(psp, ucode_prefix);
+ ret &= psp_init_ta_microcode(psp, ucode_prefix);
+ break;
+ case IP_VERSION(13, 0, 0):
+ adev->virt.autoload_ucode_id = 0;
+ break;
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 14):
+ ret = psp_init_cap_microcode(psp, ucode_prefix);
+ ret &= psp_init_ta_microcode(psp, ucode_prefix);
+ break;
+ case IP_VERSION(13, 0, 10):
+ adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MES1_DATA;
+ ret = psp_init_cap_microcode(psp, ucode_prefix);
+ break;
+ case IP_VERSION(13, 0, 12):
+ ret = psp_init_ta_microcode(psp, ucode_prefix);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return ret;
+}
+
+static int psp_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
struct psp_context *psp = &adev->psp;
- switch (adev->ip_versions[MP0_HWIP][0]) {
+ psp->autoload_supported = true;
+ psp->boot_time_tmr = true;
+
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
case IP_VERSION(9, 0, 0):
psp_v3_1_set_psp_funcs(psp);
psp->autoload_supported = false;
+ psp->boot_time_tmr = false;
break;
case IP_VERSION(10, 0, 0):
case IP_VERSION(10, 0, 1):
psp_v10_0_set_psp_funcs(psp);
psp->autoload_supported = false;
+ psp->boot_time_tmr = false;
break;
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 4):
psp_v11_0_set_psp_funcs(psp);
psp->autoload_supported = false;
+ psp->boot_time_tmr = false;
break;
case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 7):
+ adev->psp.sup_pd_fw_up = !amdgpu_sriov_vf(adev);
+ fallthrough;
case IP_VERSION(11, 0, 5):
case IP_VERSION(11, 0, 9):
- case IP_VERSION(11, 0, 7):
case IP_VERSION(11, 0, 11):
case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 2):
case IP_VERSION(11, 0, 12):
case IP_VERSION(11, 0, 13):
psp_v11_0_set_psp_funcs(psp);
- psp->autoload_supported = true;
+ psp->boot_time_tmr = false;
break;
case IP_VERSION(11, 0, 3):
case IP_VERSION(12, 0, 1):
psp_v12_0_set_psp_funcs(psp);
+ psp->autoload_supported = false;
+ psp->boot_time_tmr = false;
break;
case IP_VERSION(13, 0, 2):
+ psp->boot_time_tmr = false;
+ fallthrough;
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 14):
+ psp_v13_0_set_psp_funcs(psp);
+ psp->autoload_supported = false;
+ break;
+ case IP_VERSION(13, 0, 12):
psp_v13_0_set_psp_funcs(psp);
+ psp->autoload_supported = false;
+ adev->psp.sup_ifwi_up = !amdgpu_sriov_vf(adev);
break;
case IP_VERSION(13, 0, 1):
case IP_VERSION(13, 0, 3):
+ case IP_VERSION(13, 0, 5):
+ case IP_VERSION(13, 0, 8):
+ case IP_VERSION(13, 0, 11):
+ case IP_VERSION(14, 0, 0):
+ case IP_VERSION(14, 0, 1):
+ case IP_VERSION(14, 0, 4):
psp_v13_0_set_psp_funcs(psp);
- psp->autoload_supported = true;
+ psp->boot_time_tmr = false;
break;
case IP_VERSION(11, 0, 8):
if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) {
psp_v11_0_8_set_psp_funcs(psp);
- psp->autoload_supported = false;
}
+ psp->autoload_supported = false;
+ psp->boot_time_tmr = false;
+ break;
+ case IP_VERSION(13, 0, 0):
+ case IP_VERSION(13, 0, 7):
+ case IP_VERSION(13, 0, 10):
+ psp_v13_0_set_psp_funcs(psp);
+ adev->psp.sup_ifwi_up = !amdgpu_sriov_vf(adev);
+ psp->boot_time_tmr = false;
+ break;
+ case IP_VERSION(13, 0, 4):
+ psp_v13_0_4_set_psp_funcs(psp);
+ psp->boot_time_tmr = false;
+ break;
+ case IP_VERSION(14, 0, 2):
+ case IP_VERSION(14, 0, 3):
+ adev->psp.sup_ifwi_up = !amdgpu_sriov_vf(adev);
+ psp_v14_0_set_psp_funcs(psp);
+ break;
+ case IP_VERSION(14, 0, 5):
+ psp_v14_0_set_psp_funcs(psp);
+ psp->boot_time_tmr = false;
break;
default:
return -EINVAL;
@@ -148,9 +265,52 @@ static int psp_early_init(void *handle)
psp->adev = adev;
+ adev->psp_timeout = 20000;
+
psp_check_pmfw_centralized_cstate_management(psp);
- return 0;
+ if (amdgpu_sriov_vf(adev))
+ return psp_init_sriov_microcode(psp);
+ else
+ return psp_init_microcode(psp);
+}
+
+void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx)
+{
+ amdgpu_bo_free_kernel(&mem_ctx->shared_bo, &mem_ctx->shared_mc_addr,
+ &mem_ctx->shared_buf);
+ mem_ctx->shared_bo = NULL;
+}
+
+static void psp_free_shared_bufs(struct psp_context *psp)
+{
+ void *tmr_buf;
+ void **pptr;
+
+ /* free TMR memory buffer */
+ pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
+ amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr);
+ psp->tmr_bo = NULL;
+
+ /* free xgmi shared memory */
+ psp_ta_free_shared_buf(&psp->xgmi_context.context.mem_context);
+
+ /* free ras shared memory */
+ psp_ta_free_shared_buf(&psp->ras_context.context.mem_context);
+
+ /* free hdcp shared memory */
+ psp_ta_free_shared_buf(&psp->hdcp_context.context.mem_context);
+
+ /* free dtm shared memory */
+ psp_ta_free_shared_buf(&psp->dtm_context.context.mem_context);
+
+ /* free rap shared memory */
+ psp_ta_free_shared_buf(&psp->rap_context.context.mem_context);
+
+ /* free securedisplay shared memory */
+ psp_ta_free_shared_buf(&psp->securedisplay_context.context.mem_context);
+
+
}
static void psp_memory_training_fini(struct psp_context *psp)
@@ -168,21 +328,22 @@ static int psp_memory_training_init(struct psp_context *psp)
struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
if (ctx->init != PSP_MEM_TRAIN_RESERVE_SUCCESS) {
- DRM_DEBUG("memory training is not supported!\n");
+ dev_dbg(psp->adev->dev, "memory training is not supported!\n");
return 0;
}
ctx->sys_cache = kzalloc(ctx->train_data_size, GFP_KERNEL);
if (ctx->sys_cache == NULL) {
- DRM_ERROR("alloc mem_train_ctx.sys_cache failed!\n");
+ dev_err(psp->adev->dev, "alloc mem_train_ctx.sys_cache failed!\n");
ret = -ENOMEM;
goto Err_out;
}
- DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
- ctx->train_data_size,
- ctx->p2c_train_data_offset,
- ctx->c2p_train_data_offset);
+ dev_dbg(psp->adev->dev,
+ "train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
+ ctx->train_data_size,
+ ctx->p2c_train_data_offset,
+ ctx->c2p_train_data_offset);
ctx->init = PSP_MEM_TRAIN_INIT_SUCCESS;
return 0;
@@ -211,6 +372,11 @@ static bool psp_get_runtime_db_entry(struct amdgpu_device *adev,
bool ret = false;
int i;
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14))
+ return false;
+
db_header_pos = adev->gmc.mc_vram_size - PSP_RUNTIME_DB_OFFSET;
db_dir_pos = db_header_pos + sizeof(struct psp_runtime_data_header);
@@ -220,7 +386,7 @@ static bool psp_get_runtime_db_entry(struct amdgpu_device *adev,
if (db_header.cookie != PSP_RUNTIME_DB_COOKIE_ID) {
/* runtime db doesn't exist, exit */
- dev_warn(adev->dev, "PSP runtime database doesn't exist\n");
+ dev_dbg(adev->dev, "PSP runtime database doesn't exist\n");
return false;
}
@@ -241,7 +407,7 @@ static bool psp_get_runtime_db_entry(struct amdgpu_device *adev,
case PSP_RUNTIME_ENTRY_TYPE_BOOT_CONFIG:
if (db_dir.entry_list[i].size < sizeof(struct psp_runtime_boot_cfg_entry)) {
/* invalid db entry size */
- dev_warn(adev->dev, "Invalid PSP runtime database entry size\n");
+ dev_warn(adev->dev, "Invalid PSP runtime database boot cfg entry size\n");
return false;
}
/* read runtime database entry */
@@ -249,6 +415,17 @@ static bool psp_get_runtime_db_entry(struct amdgpu_device *adev,
(uint32_t *)db_entry, sizeof(struct psp_runtime_boot_cfg_entry), false);
ret = true;
break;
+ case PSP_RUNTIME_ENTRY_TYPE_PPTABLE_ERR_STATUS:
+ if (db_dir.entry_list[i].size < sizeof(struct psp_runtime_scpm_entry)) {
+ /* invalid db entry size */
+ dev_warn(adev->dev, "Invalid PSP runtime database scpm entry size\n");
+ return false;
+ }
+ /* read runtime database entry */
+ amdgpu_device_vram_access(adev, db_header_pos + db_dir.entry_list[i].offset,
+ (uint32_t *)db_entry, sizeof(struct psp_runtime_scpm_entry), false);
+ ret = true;
+ break;
default:
ret = false;
break;
@@ -259,35 +436,39 @@ static bool psp_get_runtime_db_entry(struct amdgpu_device *adev,
return ret;
}
-static int psp_sw_init(void *handle)
+static int psp_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct psp_context *psp = &adev->psp;
int ret;
struct psp_runtime_boot_cfg_entry boot_cfg_entry;
struct psp_memory_training_context *mem_training_ctx = &psp->mem_train_ctx;
+ struct psp_runtime_scpm_entry scpm_entry;
psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
if (!psp->cmd) {
- DRM_ERROR("Failed to allocate memory to command buffer!\n");
- ret = -ENOMEM;
+ dev_err(adev->dev, "Failed to allocate memory to command buffer!\n");
+ return -ENOMEM;
}
- if (!amdgpu_sriov_vf(adev)) {
- ret = psp_init_microcode(psp);
- if (ret) {
- DRM_ERROR("Failed to load psp firmware!\n");
- return ret;
- }
- } else if (amdgpu_sriov_vf(adev) &&
- adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 2)) {
- ret = psp_init_ta_microcode(psp, "aldebaran");
- if (ret) {
- DRM_ERROR("Failed to initialize ta microcode!\n");
- return ret;
- }
+ adev->psp.xgmi_context.supports_extended_data =
+ !adev->gmc.xgmi.connected_to_cpu &&
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 2);
+
+ memset(&scpm_entry, 0, sizeof(scpm_entry));
+ if ((psp_get_runtime_db_entry(adev,
+ PSP_RUNTIME_ENTRY_TYPE_PPTABLE_ERR_STATUS,
+ &scpm_entry)) &&
+ (scpm_entry.scpm_status != SCPM_DISABLE)) {
+ adev->scpm_enabled = true;
+ adev->scpm_status = scpm_entry.scpm_status;
+ } else {
+ adev->scpm_enabled = false;
+ adev->scpm_status = SCPM_DISABLE;
}
+ /* TODO: stop gpu driver services and print alarm if scpm is enabled with error status */
+
memset(&boot_cfg_entry, 0, sizeof(boot_cfg_entry));
if (psp_get_runtime_db_entry(adev,
PSP_RUNTIME_ENTRY_TYPE_BOOT_CONFIG,
@@ -298,75 +479,107 @@ static int psp_sw_init(void *handle)
/* If psp runtime database exists, then
* only enable two stage memory training
* when TWO_STAGE_DRAM_TRAINING bit is set
- * in runtime database */
+ * in runtime database
+ */
mem_training_ctx->enable_mem_training = true;
}
} else {
- /* If psp runtime database doesn't exist or
- * is invalid, force enable two stage memory
- * training */
+ /* If psp runtime database doesn't exist or is
+ * invalid, force enable two stage memory training
+ */
mem_training_ctx->enable_mem_training = true;
}
if (mem_training_ctx->enable_mem_training) {
ret = psp_memory_training_init(psp);
if (ret) {
- DRM_ERROR("Failed to initialize memory training!\n");
+ dev_err(adev->dev, "Failed to initialize memory training!\n");
return ret;
}
ret = psp_mem_training(psp, PSP_MEM_TRAIN_COLD_BOOT);
if (ret) {
- DRM_ERROR("Failed to process memory training!\n");
+ dev_err(adev->dev, "Failed to process memory training!\n");
return ret;
}
}
- if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 0) ||
- adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 7)) {
- ret= psp_sysfs_init(adev);
- if (ret) {
- return ret;
- }
- }
+ ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
+ (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ?
+ AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
+ &psp->fw_pri_bo,
+ &psp->fw_pri_mc_addr,
+ &psp->fw_pri_buf);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &psp->fence_buf_bo,
+ &psp->fence_buf_mc_addr,
+ &psp->fence_buf);
+ if (ret)
+ goto failed1;
+
+ ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
+ (void **)&psp->cmd_buf_mem);
+ if (ret)
+ goto failed2;
return 0;
+
+failed2:
+ amdgpu_bo_free_kernel(&psp->fence_buf_bo,
+ &psp->fence_buf_mc_addr, &psp->fence_buf);
+failed1:
+ amdgpu_bo_free_kernel(&psp->fw_pri_bo,
+ &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
+ return ret;
}
-static int psp_sw_fini(void *handle)
+static int psp_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct psp_context *psp = &adev->psp;
- struct psp_gfx_cmd_resp *cmd = psp->cmd;
psp_memory_training_fini(psp);
- if (psp->sos_fw) {
- release_firmware(psp->sos_fw);
- psp->sos_fw = NULL;
- }
- if (psp->asd_fw) {
- release_firmware(psp->asd_fw);
- psp->asd_fw = NULL;
- }
- if (psp->ta_fw) {
- release_firmware(psp->ta_fw);
- psp->ta_fw = NULL;
- }
- if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 0) ||
- adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 7))
- psp_sysfs_fini(adev);
+ amdgpu_ucode_release(&psp->sos_fw);
+ amdgpu_ucode_release(&psp->asd_fw);
+ amdgpu_ucode_release(&psp->ta_fw);
+ amdgpu_ucode_release(&psp->cap_fw);
+ amdgpu_ucode_release(&psp->toc_fw);
+
+ kfree(psp->cmd);
+ psp->cmd = NULL;
- kfree(cmd);
- cmd = NULL;
+ psp_free_shared_bufs(psp);
+
+ if (psp->km_ring.ring_mem)
+ amdgpu_bo_free_kernel(&adev->firmware.rbuf,
+ &psp->km_ring.ring_mem_mc_addr,
+ (void **)&psp->km_ring.ring_mem);
+
+ amdgpu_bo_free_kernel(&psp->fw_pri_bo,
+ &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
+ amdgpu_bo_free_kernel(&psp->fence_buf_bo,
+ &psp->fence_buf_mc_addr, &psp->fence_buf);
+ amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
+ (void **)&psp->cmd_buf_mem);
return 0;
}
-int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
- uint32_t reg_val, uint32_t mask, bool check_changed)
+int psp_wait_for(struct psp_context *psp, uint32_t reg_index, uint32_t reg_val,
+ uint32_t mask, uint32_t flags)
{
+ bool check_changed = flags & PSP_WAITREG_CHANGED;
+ bool verbose = !(flags & PSP_WAITREG_NOVERBOSE);
uint32_t val;
int i;
struct amdgpu_device *adev = psp->adev;
@@ -386,6 +599,31 @@ int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
udelay(1);
}
+ if (verbose)
+ dev_err(adev->dev,
+ "psp reg (0x%x) wait timed out, mask: %x, read: %x exp: %x",
+ reg_index, mask, val, reg_val);
+
+ return -ETIME;
+}
+
+int psp_wait_for_spirom_update(struct psp_context *psp, uint32_t reg_index,
+ uint32_t reg_val, uint32_t mask, uint32_t msec_timeout)
+{
+ uint32_t val;
+ int i;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (psp->adev->no_hw_access)
+ return 0;
+
+ for (i = 0; i < msec_timeout; i++) {
+ val = RREG32(reg_index);
+ if ((val & mask) == reg_val)
+ return 0;
+ msleep(1);
+ }
+
return -ETIME;
}
@@ -422,28 +660,49 @@ static const char *psp_gfx_cmd_name(enum psp_gfx_cmd_id cmd_id)
return "AUTOLOAD_RLC";
case GFX_CMD_ID_BOOT_CFG:
return "BOOT_CFG";
+ case GFX_CMD_ID_CONFIG_SQ_PERFMON:
+ return "CONFIG_SQ_PERFMON";
+ case GFX_CMD_ID_FB_FW_RESERV_ADDR:
+ return "FB_FW_RESERV_ADDR";
+ case GFX_CMD_ID_FB_FW_RESERV_EXT_ADDR:
+ return "FB_FW_RESERV_EXT_ADDR";
+ case GFX_CMD_ID_SRIOV_SPATIAL_PART:
+ return "SPATIAL_PARTITION";
+ case GFX_CMD_ID_FB_NPS_MODE:
+ return "NPS_MODE_CHANGE";
default:
return "UNKNOWN CMD";
}
}
+static bool psp_err_warn(struct psp_context *psp)
+{
+ struct psp_gfx_cmd_resp *cmd = psp->cmd_buf_mem;
+
+ /* This response indicates reg list is already loaded */
+ if (amdgpu_ip_version(psp->adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 2) &&
+ cmd->cmd_id == GFX_CMD_ID_LOAD_IP_FW &&
+ cmd->cmd.cmd_load_ip_fw.fw_type == GFX_FW_TYPE_REG_LIST &&
+ cmd->resp.status == TEE_ERROR_CANCEL)
+ return false;
+
+ return true;
+}
+
static int
psp_cmd_submit_buf(struct psp_context *psp,
struct amdgpu_firmware_info *ucode,
struct psp_gfx_cmd_resp *cmd, uint64_t fence_mc_addr)
{
int ret;
- int index, idx;
- int timeout = 20000;
+ int index;
+ int timeout = psp->adev->psp_timeout;
bool ras_intr = false;
bool skip_unsupport = false;
if (psp->adev->no_hw_access)
return 0;
- if (!drm_dev_enter(adev_to_drm(psp->adev), &idx))
- return 0;
-
memset(psp->cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE);
memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp));
@@ -475,7 +734,7 @@ psp_cmd_submit_buf(struct psp_context *psp,
skip_unsupport = (psp->cmd_buf_mem->resp.status == TEE_ERROR_NOT_SUPPORTED ||
psp->cmd_buf_mem->resp.status == PSP_ERR_UNKNOWN_COMMAND) && amdgpu_sriov_vf(psp->adev);
- memcpy((void*)&cmd->resp, (void*)&psp->cmd_buf_mem->resp, sizeof(struct psp_gfx_resp));
+ memcpy(&cmd->resp, &psp->cmd_buf_mem->resp, sizeof(struct psp_gfx_resp));
/* In some cases, psp response status is not 0 even there is no
* problem while the command is submitted. Some version of PSP FW
@@ -486,12 +745,21 @@ psp_cmd_submit_buf(struct psp_context *psp,
*/
if (!skip_unsupport && (psp->cmd_buf_mem->resp.status || !timeout) && !ras_intr) {
if (ucode)
- DRM_WARN("failed to load ucode %s(0x%X) ",
- amdgpu_ucode_name(ucode->ucode_id), ucode->ucode_id);
- DRM_WARN("psp gfx command %s(0x%X) failed and response status is (0x%X)\n",
- psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id), psp->cmd_buf_mem->cmd_id,
- psp->cmd_buf_mem->resp.status);
- if (!timeout) {
+ dev_warn(psp->adev->dev,
+ "failed to load ucode %s(0x%X) ",
+ amdgpu_ucode_name(ucode->ucode_id), ucode->ucode_id);
+ if (psp_err_warn(psp))
+ dev_warn(
+ psp->adev->dev,
+ "psp gfx command %s(0x%X) failed and response status is (0x%X)\n",
+ psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id),
+ psp->cmd_buf_mem->cmd_id,
+ psp->cmd_buf_mem->resp.status);
+ /* If any firmware (including CAP) load fails under SRIOV, it should
+ * return failure to stop the VF from initializing.
+ * Also return failure in case of timeout
+ */
+ if ((ucode && amdgpu_sriov_vf(psp->adev)) || !timeout) {
ret = -EINVAL;
goto exit;
}
@@ -503,7 +771,6 @@ psp_cmd_submit_buf(struct psp_context *psp,
}
exit:
- drm_dev_exit(idx);
return ret;
}
@@ -518,7 +785,7 @@ static struct psp_gfx_cmd_resp *acquire_psp_cmd_buf(struct psp_context *psp)
return cmd;
}
-void release_psp_cmd_buf(struct psp_context *psp)
+static void release_psp_cmd_buf(struct psp_context *psp)
{
mutex_unlock(&psp->mutex);
}
@@ -528,8 +795,13 @@ static void psp_prep_tmr_cmd_buf(struct psp_context *psp,
uint64_t tmr_mc, struct amdgpu_bo *tmr_bo)
{
struct amdgpu_device *adev = psp->adev;
- uint32_t size = amdgpu_bo_size(tmr_bo);
- uint64_t tmr_pa = amdgpu_gmc_vram_pa(adev, tmr_bo);
+ uint32_t size = 0;
+ uint64_t tmr_pa = 0;
+
+ if (tmr_bo) {
+ size = amdgpu_bo_size(tmr_bo);
+ tmr_pa = amdgpu_gmc_vram_pa(adev, tmr_bo);
+ }
if (amdgpu_sriov_vf(psp->adev))
cmd->cmd_id = GFX_CMD_ID_SETUP_VMR;
@@ -577,7 +849,7 @@ static int psp_load_toc(struct psp_context *psp,
/* Set up Trusted Memory Region */
static int psp_tmr_init(struct psp_context *psp)
{
- int ret;
+ int ret = 0;
int tmr_size;
void *tmr_buf;
void **pptr;
@@ -592,32 +864,43 @@ static int psp_tmr_init(struct psp_context *psp)
tmr_size = PSP_TMR_SIZE(psp->adev);
/* For ASICs support RLC autoload, psp will parse the toc
- * and calculate the total size of TMR needed */
+ * and calculate the total size of TMR needed
+ */
if (!amdgpu_sriov_vf(psp->adev) &&
psp->toc.start_addr &&
psp->toc.size_bytes &&
psp->fw_pri_buf) {
ret = psp_load_toc(psp, &tmr_size);
if (ret) {
- DRM_ERROR("Failed to load toc\n");
+ dev_err(psp->adev->dev, "Failed to load toc\n");
return ret;
}
}
- pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
- ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_SIZE(psp->adev),
- AMDGPU_GEM_DOMAIN_VRAM,
- &psp->tmr_bo, &psp->tmr_mc_addr, pptr);
+ if (!psp->tmr_bo && !psp->boot_time_tmr) {
+ pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
+ ret = amdgpu_bo_create_kernel(psp->adev, tmr_size,
+ PSP_TMR_ALIGNMENT,
+ AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_VRAM,
+ &psp->tmr_bo, &psp->tmr_mc_addr,
+ pptr);
+ }
+ if (amdgpu_virt_xgmi_migrate_enabled(psp->adev) && psp->tmr_bo)
+ psp->tmr_mc_addr = amdgpu_bo_fb_aper_addr(psp->tmr_bo);
return ret;
}
static bool psp_skip_tmr(struct psp_context *psp)
{
- switch (psp->adev->ip_versions[MP0_HWIP][0]) {
+ switch (amdgpu_ip_version(psp->adev, MP0_HWIP, 0)) {
case IP_VERSION(11, 0, 9):
case IP_VERSION(11, 0, 7):
case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
return true;
default:
return false;
@@ -638,8 +921,9 @@ static int psp_tmr_load(struct psp_context *psp)
cmd = acquire_psp_cmd_buf(psp);
psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, psp->tmr_bo);
- DRM_INFO("reserve 0x%lx from 0x%llx for PSP TMR\n",
- amdgpu_bo_size(psp->tmr_bo), psp->tmr_mc_addr);
+ if (psp->tmr_bo)
+ dev_info(psp->adev->dev, "reserve 0x%lx from 0x%llx for PSP TMR\n",
+ amdgpu_bo_size(psp->tmr_bo), psp->tmr_mc_addr);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
@@ -650,7 +934,7 @@ static int psp_tmr_load(struct psp_context *psp)
}
static void psp_prep_tmr_unload_cmd_buf(struct psp_context *psp,
- struct psp_gfx_cmd_resp *cmd)
+ struct psp_gfx_cmd_resp *cmd)
{
if (amdgpu_sriov_vf(psp->adev))
cmd->cmd_id = GFX_CMD_ID_DESTROY_VMR;
@@ -661,10 +945,18 @@ static void psp_prep_tmr_unload_cmd_buf(struct psp_context *psp,
static int psp_tmr_unload(struct psp_context *psp)
{
int ret;
- struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
+ struct psp_gfx_cmd_resp *cmd;
+
+ /* skip TMR unload for Navi12 and CHIP_SIENNA_CICHLID SRIOV,
+ * as TMR is not loaded at all
+ */
+ if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp))
+ return 0;
+
+ cmd = acquire_psp_cmd_buf(psp);
psp_prep_tmr_unload_cmd_buf(psp, cmd);
- DRM_INFO("free PSP TMR buffer\n");
+ dev_dbg(psp->adev->dev, "free PSP TMR buffer\n");
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
@@ -676,19 +968,7 @@ static int psp_tmr_unload(struct psp_context *psp)
static int psp_tmr_terminate(struct psp_context *psp)
{
- int ret;
- void *tmr_buf;
- void **pptr;
-
- ret = psp_tmr_unload(psp);
- if (ret)
- return ret;
-
- /* free TMR memory buffer */
- pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
- amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr);
-
- return 0;
+ return psp_tmr_unload(psp);
}
int psp_get_fw_attestation_records_addr(struct psp_context *psp,
@@ -720,6 +1000,106 @@ int psp_get_fw_attestation_records_addr(struct psp_context *psp,
return ret;
}
+static int psp_get_fw_reservation_info(struct psp_context *psp,
+ uint32_t cmd_id,
+ uint64_t *addr,
+ uint32_t *size)
+{
+ int ret;
+ uint32_t status;
+ struct psp_gfx_cmd_resp *cmd;
+
+ cmd = acquire_psp_cmd_buf(psp);
+
+ cmd->cmd_id = cmd_id;
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd,
+ psp->fence_buf_mc_addr);
+ if (ret) {
+ release_psp_cmd_buf(psp);
+ return ret;
+ }
+
+ status = cmd->resp.status;
+ if (status == PSP_ERR_UNKNOWN_COMMAND) {
+ release_psp_cmd_buf(psp);
+ *addr = 0;
+ *size = 0;
+ return 0;
+ }
+
+ *addr = (uint64_t)cmd->resp.uresp.fw_reserve_info.reserve_base_address_hi << 32 |
+ cmd->resp.uresp.fw_reserve_info.reserve_base_address_lo;
+ *size = cmd->resp.uresp.fw_reserve_info.reserve_size;
+
+ release_psp_cmd_buf(psp);
+
+ return 0;
+}
+
+int psp_update_fw_reservation(struct psp_context *psp)
+{
+ int ret;
+ uint64_t reserv_addr, reserv_addr_ext;
+ uint32_t reserv_size, reserv_size_ext, mp0_ip_ver;
+ struct amdgpu_device *adev = psp->adev;
+
+ mp0_ip_ver = amdgpu_ip_version(adev, MP0_HWIP, 0);
+
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ switch (mp0_ip_ver) {
+ case IP_VERSION(14, 0, 2):
+ if (adev->psp.sos.fw_version < 0x3b0e0d)
+ return 0;
+ break;
+
+ case IP_VERSION(14, 0, 3):
+ if (adev->psp.sos.fw_version < 0x3a0e14)
+ return 0;
+ break;
+
+ default:
+ return 0;
+ }
+
+ ret = psp_get_fw_reservation_info(psp, GFX_CMD_ID_FB_FW_RESERV_ADDR, &reserv_addr, &reserv_size);
+ if (ret)
+ return ret;
+ ret = psp_get_fw_reservation_info(psp, GFX_CMD_ID_FB_FW_RESERV_EXT_ADDR, &reserv_addr_ext, &reserv_size_ext);
+ if (ret)
+ return ret;
+
+ if (reserv_addr != adev->gmc.real_vram_size - reserv_size) {
+ dev_warn(adev->dev, "reserve fw region is not valid!\n");
+ return 0;
+ }
+
+ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, NULL);
+
+ reserv_size = roundup(reserv_size, SZ_1M);
+
+ ret = amdgpu_bo_create_kernel_at(adev, reserv_addr, reserv_size, &adev->mman.fw_reserved_memory, NULL);
+ if (ret) {
+ dev_err(adev->dev, "reserve fw region failed(%d)!\n", ret);
+ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, NULL);
+ return ret;
+ }
+
+ reserv_size_ext = roundup(reserv_size_ext, SZ_1M);
+
+ ret = amdgpu_bo_create_kernel_at(adev, reserv_addr_ext, reserv_size_ext,
+ &adev->mman.fw_reserved_memory_extend, NULL);
+ if (ret) {
+ dev_err(adev->dev, "reserve extend fw region failed(%d)!\n", ret);
+ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory_extend, NULL, NULL);
+ return ret;
+ }
+
+ return 0;
+}
+
static int psp_boot_config_get(struct amdgpu_device *adev, uint32_t *boot_cfg)
{
struct psp_context *psp = &adev->psp;
@@ -795,9 +1175,50 @@ static int psp_rl_load(struct amdgpu_device *adev)
return ret;
}
-static int psp_asd_load(struct psp_context *psp)
+int psp_memory_partition(struct psp_context *psp, int mode)
{
- return psp_ta_load(psp, &psp->asd_context);
+ struct psp_gfx_cmd_resp *cmd;
+ int ret;
+
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ cmd = acquire_psp_cmd_buf(psp);
+
+ cmd->cmd_id = GFX_CMD_ID_FB_NPS_MODE;
+ cmd->cmd.cmd_memory_part.mode = mode;
+
+ dev_info(psp->adev->dev,
+ "Requesting %d memory partition change through PSP", mode);
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+ if (ret)
+ dev_err(psp->adev->dev,
+ "PSP request failed to change to NPS%d mode\n", mode);
+
+ release_psp_cmd_buf(psp);
+
+ return ret;
+}
+
+int psp_spatial_partition(struct psp_context *psp, int mode)
+{
+ struct psp_gfx_cmd_resp *cmd;
+ int ret;
+
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ cmd = acquire_psp_cmd_buf(psp);
+
+ cmd->cmd_id = GFX_CMD_ID_SRIOV_SPATIAL_PART;
+ cmd->cmd.cmd_spatial_part.mode = mode;
+
+ dev_info(psp->adev->dev, "Requesting %d partitions through PSP", mode);
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+ release_psp_cmd_buf(psp);
+
+ return ret;
}
static int psp_asd_initialize(struct psp_context *psp)
@@ -811,11 +1232,16 @@ static int psp_asd_initialize(struct psp_context *psp)
if (amdgpu_sriov_vf(psp->adev) || !psp->asd_context.bin_desc.size_bytes)
return 0;
+ /* bypass asd if display hardware is not available */
+ if (!amdgpu_device_has_display_hardware(psp->adev) &&
+ amdgpu_ip_version(psp->adev, MP0_HWIP, 0) >= IP_VERSION(13, 0, 10))
+ return 0;
+
psp->asd_context.mem_context.shared_mc_addr = 0;
psp->asd_context.mem_context.shared_mem_size = PSP_ASD_SHARED_MEM_SIZE;
psp->asd_context.ta_load_type = GFX_CMD_ID_LOAD_ASD;
- ret = psp_asd_load(psp);
+ ret = psp_ta_load(psp, &psp->asd_context);
if (!ret)
psp->asd_context.initialized = true;
@@ -829,7 +1255,7 @@ static void psp_prep_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd,
cmd->cmd.cmd_unload_ta.session_id = session_id;
}
-static int psp_ta_unload(struct psp_context *psp, struct ta_context *context)
+int psp_ta_unload(struct psp_context *psp, struct ta_context *context)
{
int ret;
struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
@@ -838,16 +1264,13 @@ static int psp_ta_unload(struct psp_context *psp, struct ta_context *context)
ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+ context->resp_status = cmd->resp.status;
+
release_psp_cmd_buf(psp);
return ret;
}
-static int psp_asd_unload(struct psp_context *psp)
-{
- return psp_ta_unload(psp, &psp->asd_context);
-}
-
static int psp_asd_terminate(struct psp_context *psp)
{
int ret;
@@ -858,8 +1281,7 @@ static int psp_asd_terminate(struct psp_context *psp)
if (!psp->asd_context.initialized)
return 0;
- ret = psp_asd_unload(psp);
-
+ ret = psp_ta_unload(psp, &psp->asd_context);
if (!ret)
psp->asd_context.initialized = false;
@@ -888,7 +1310,7 @@ int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
psp_prep_reg_prog_cmd_buf(cmd, reg, value);
ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
if (ret)
- DRM_ERROR("PSP failed to program reg id %d", reg);
+ dev_err(psp->adev->dev, "PSP failed to program reg id %d\n", reg);
release_psp_cmd_buf(psp);
@@ -900,7 +1322,7 @@ static void psp_prep_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
struct ta_context *context)
{
cmd->cmd_id = context->ta_load_type;
- cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(ta_bin_mc);
+ cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(ta_bin_mc);
cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(ta_bin_mc);
cmd->cmd.cmd_load_ta.app_len = context->bin_desc.size_bytes;
@@ -911,33 +1333,19 @@ static void psp_prep_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
cmd->cmd.cmd_load_ta.cmd_buf_len = context->mem_context.shared_mem_size;
}
-static int psp_ta_init_shared_buf(struct psp_context *psp,
+int psp_ta_init_shared_buf(struct psp_context *psp,
struct ta_mem_context *mem_ctx)
{
- int ret;
-
/*
- * Allocate 16k memory aligned to 4k from Frame Buffer (local
- * physical) for ta to host memory
- */
- ret = amdgpu_bo_create_kernel(psp->adev, mem_ctx->shared_mem_size,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
+ * Allocate 16k memory aligned to 4k from Frame Buffer (local
+ * physical) for ta to host memory
+ */
+ return amdgpu_bo_create_kernel(psp->adev, mem_ctx->shared_mem_size,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
&mem_ctx->shared_bo,
&mem_ctx->shared_mc_addr,
&mem_ctx->shared_buf);
-
- return ret;
-}
-
-static void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx)
-{
- amdgpu_bo_free_kernel(&mem_ctx->shared_bo, &mem_ctx->shared_mc_addr,
- &mem_ctx->shared_buf);
-}
-
-static int psp_xgmi_init_shared_buf(struct psp_context *psp)
-{
- return psp_ta_init_shared_buf(psp, &psp->xgmi_context.context.mem_context);
}
static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd,
@@ -949,7 +1357,7 @@ static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd,
cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id;
}
-static int psp_ta_invoke(struct psp_context *psp,
+int psp_ta_invoke(struct psp_context *psp,
uint32_t ta_cmd_id,
struct ta_context *context)
{
@@ -961,12 +1369,14 @@ static int psp_ta_invoke(struct psp_context *psp,
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
+ context->resp_status = cmd->resp.status;
+
release_psp_cmd_buf(psp);
return ret;
}
-static int psp_ta_load(struct psp_context *psp, struct ta_context *context)
+int psp_ta_load(struct psp_context *psp, struct ta_context *context)
{
int ret;
struct psp_gfx_cmd_resp *cmd;
@@ -976,30 +1386,26 @@ static int psp_ta_load(struct psp_context *psp, struct ta_context *context)
psp_copy_fw(psp, context->bin_desc.start_addr,
context->bin_desc.size_bytes);
+ if (amdgpu_virt_xgmi_migrate_enabled(psp->adev) &&
+ context->mem_context.shared_bo)
+ context->mem_context.shared_mc_addr =
+ amdgpu_bo_fb_aper_addr(context->mem_context.shared_bo);
+
psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, context);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
- if (!ret) {
+ context->resp_status = cmd->resp.status;
+
+ if (!ret)
context->session_id = cmd->resp.session_id;
- }
release_psp_cmd_buf(psp);
return ret;
}
-static int psp_xgmi_load(struct psp_context *psp)
-{
- return psp_ta_load(psp, &psp->xgmi_context.context);
-}
-
-static int psp_xgmi_unload(struct psp_context *psp)
-{
- return psp_ta_unload(psp, &psp->xgmi_context.context);
-}
-
int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
{
return psp_ta_invoke(psp, ta_cmd_id, &psp->xgmi_context.context);
@@ -1011,24 +1417,19 @@ int psp_xgmi_terminate(struct psp_context *psp)
struct amdgpu_device *adev = psp->adev;
/* XGMI TA unload currently is not supported on Arcturus/Aldebaran A+A */
- if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 4) ||
- (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 2) &&
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(11, 0, 4) ||
+ (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 2) &&
adev->gmc.xgmi.connected_to_cpu))
return 0;
if (!psp->xgmi_context.context.initialized)
return 0;
- ret = psp_xgmi_unload(psp);
- if (ret)
- return ret;
+ ret = psp_ta_unload(psp, &psp->xgmi_context.context);
psp->xgmi_context.context.initialized = false;
- /* free xgmi shared memory */
- psp_ta_free_shared_buf(&psp->xgmi_context.context.mem_context);
-
- return 0;
+ return ret;
}
int psp_xgmi_initialize(struct psp_context *psp, bool set_extended_data, bool load_ta)
@@ -1047,14 +1448,14 @@ int psp_xgmi_initialize(struct psp_context *psp, bool set_extended_data, bool lo
psp->xgmi_context.context.mem_context.shared_mem_size = PSP_XGMI_SHARED_MEM_SIZE;
psp->xgmi_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
- if (!psp->xgmi_context.context.initialized) {
- ret = psp_xgmi_init_shared_buf(psp);
+ if (!psp->xgmi_context.context.mem_context.shared_buf) {
+ ret = psp_ta_init_shared_buf(psp, &psp->xgmi_context.context.mem_context);
if (ret)
return ret;
}
/* Load XGMI TA */
- ret = psp_xgmi_load(psp);
+ ret = psp_ta_load(psp, &psp->xgmi_context.context);
if (!ret)
psp->xgmi_context.context.initialized = true;
else
@@ -1068,6 +1469,8 @@ invoke:
xgmi_cmd->cmd_id = TA_COMMAND_XGMI__INITIALIZE;
ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
+ /* note down the capbility flag for XGMI TA */
+ psp->xgmi_context.xgmi_ta_caps = xgmi_cmd->caps_flag;
return ret;
}
@@ -1114,8 +1517,11 @@ int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id)
static bool psp_xgmi_peer_link_info_supported(struct psp_context *psp)
{
- return psp->adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 2) &&
- psp->xgmi_context.context.bin_desc.fw_version >= 0x2000000b;
+ return (amdgpu_ip_version(psp->adev, MP0_HWIP, 0) ==
+ IP_VERSION(13, 0, 2) &&
+ psp->xgmi_context.context.bin_desc.fw_version >= 0x2000000b) ||
+ amdgpu_ip_version(psp->adev, MP0_HWIP, 0) >=
+ IP_VERSION(13, 0, 6);
}
/*
@@ -1133,9 +1539,13 @@ static void psp_xgmi_reflect_topology_info(struct psp_context *psp,
uint64_t src_node_id = psp->adev->gmc.xgmi.node_id;
uint64_t dst_node_id = node_info.node_id;
uint8_t dst_num_hops = node_info.num_hops;
+ uint8_t dst_is_sharing_enabled = node_info.is_sharing_enabled;
uint8_t dst_num_links = node_info.num_links;
hive = amdgpu_get_xgmi_hive(psp->adev);
+ if (WARN_ON(!hive))
+ return;
+
list_for_each_entry(mirror_adev, &hive->device_list, gmc.xgmi.head) {
struct psp_xgmi_topology_info *mirror_top_info;
int j;
@@ -1149,19 +1559,28 @@ static void psp_xgmi_reflect_topology_info(struct psp_context *psp,
continue;
mirror_top_info->nodes[j].num_hops = dst_num_hops;
- /*
- * prevent 0 num_links value re-reflection since reflection
+ mirror_top_info->nodes[j].is_sharing_enabled = dst_is_sharing_enabled;
+ /* prevent 0 num_links value re-reflection since reflection
* criteria is based on num_hops (direct or indirect).
- *
*/
- if (dst_num_links)
+ if (dst_num_links) {
mirror_top_info->nodes[j].num_links = dst_num_links;
+ /* swap src and dst due to frame of reference */
+ for (int k = 0; k < dst_num_links; k++) {
+ mirror_top_info->nodes[j].port_num[k].src_xgmi_port_num =
+ node_info.port_num[k].dst_xgmi_port_num;
+ mirror_top_info->nodes[j].port_num[k].dst_xgmi_port_num =
+ node_info.port_num[k].src_xgmi_port_num;
+ }
+ }
break;
}
break;
}
+
+ amdgpu_put_xgmi_hive(hive);
}
int psp_xgmi_get_topology_info(struct psp_context *psp,
@@ -1184,7 +1603,7 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
/* Fill in the shared memory with topology information as input */
topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info;
- xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO;
+ xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_TOPOLOGY_INFO;
topology_info_input->num_nodes = number_devices;
for (i = 0; i < topology_info_input->num_nodes; i++) {
@@ -1195,7 +1614,7 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
}
/* Invoke xgmi ta to get the topology information */
- ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO);
+ ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_TOPOLOGY_INFO);
if (ret)
return ret;
@@ -1220,26 +1639,64 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
/* Invoke xgmi ta again to get the link information */
if (psp_xgmi_peer_link_info_supported(psp)) {
- struct ta_xgmi_cmd_get_peer_link_info_output *link_info_output;
+ struct ta_xgmi_cmd_get_peer_link_info *link_info_output;
+ struct ta_xgmi_cmd_get_extend_peer_link_info *link_extend_info_output;
+ bool requires_reflection =
+ (psp->xgmi_context.supports_extended_data &&
+ get_extended_data) ||
+ amdgpu_ip_version(psp->adev, MP0_HWIP, 0) ==
+ IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(psp->adev, MP0_HWIP, 0) ==
+ IP_VERSION(13, 0, 14) ||
+ amdgpu_sriov_vf(psp->adev);
+ bool ta_port_num_support = psp->xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG ||
+ amdgpu_sriov_xgmi_ta_ext_peer_link_en(psp->adev);
+
+ /* popluate the shared output buffer rather than the cmd input buffer
+ * with node_ids as the input for GET_PEER_LINKS command execution.
+ * This is required for GET_PEER_LINKS per xgmi ta implementation.
+ * The same requirement for GET_EXTEND_PEER_LINKS command.
+ */
+ if (ta_port_num_support) {
+ link_extend_info_output = &xgmi_cmd->xgmi_out_message.get_extend_link_info;
- xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_PEER_LINKS;
+ for (i = 0; i < topology->num_nodes; i++)
+ link_extend_info_output->nodes[i].node_id = topology->nodes[i].node_id;
- ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_PEER_LINKS);
+ link_extend_info_output->num_nodes = topology->num_nodes;
+ xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_EXTEND_PEER_LINKS;
+ } else {
+ link_info_output = &xgmi_cmd->xgmi_out_message.get_link_info;
+ for (i = 0; i < topology->num_nodes; i++)
+ link_info_output->nodes[i].node_id = topology->nodes[i].node_id;
+
+ link_info_output->num_nodes = topology->num_nodes;
+ xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_PEER_LINKS;
+ }
+
+ ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
if (ret)
return ret;
- link_info_output = &xgmi_cmd->xgmi_out_message.get_link_info;
for (i = 0; i < topology->num_nodes; i++) {
+ uint8_t node_num_links = ta_port_num_support ?
+ link_extend_info_output->nodes[i].num_links : link_info_output->nodes[i].num_links;
/* accumulate num_links on extended data */
- topology->nodes[i].num_links = get_extended_data ?
- topology->nodes[i].num_links +
- link_info_output->nodes[i].num_links :
- link_info_output->nodes[i].num_links;
+ if (get_extended_data) {
+ topology->nodes[i].num_links = topology->nodes[i].num_links + node_num_links;
+ } else {
+ topology->nodes[i].num_links = (requires_reflection && topology->nodes[i].num_links) ?
+ topology->nodes[i].num_links : node_num_links;
+ }
+ /* popluate the connected port num info if supported and available */
+ if (ta_port_num_support && topology->nodes[i].num_links) {
+ memcpy(topology->nodes[i].port_num, link_extend_info_output->nodes[i].port_num,
+ sizeof(struct xgmi_connected_port_num) * TA_XGMI__MAX_PORT_NUM);
+ }
/* reflect the topology information for bi-directionality */
- if (psp->xgmi_context.supports_extended_data &&
- get_extended_data && topology->nodes[i].num_hops)
+ if (requires_reflection && topology->nodes[i].num_hops)
psp_xgmi_reflect_topology_info(psp, topology->nodes[i]);
}
}
@@ -1277,21 +1734,6 @@ int psp_xgmi_set_topology_info(struct psp_context *psp,
}
// ras begin
-static int psp_ras_init_shared_buf(struct psp_context *psp)
-{
- return psp_ta_init_shared_buf(psp, &psp->ras_context.context.mem_context);
-}
-
-static int psp_ras_load(struct psp_context *psp)
-{
- return psp_ta_load(psp, &psp->ras_context.context);
-}
-
-static int psp_ras_unload(struct psp_context *psp)
-{
- return psp_ta_unload(psp, &psp->ras_context.context);
-}
-
static void psp_ras_ta_check_status(struct psp_context *psp)
{
struct ta_ras_shared_memory *ras_cmd =
@@ -1300,21 +1742,86 @@ static void psp_ras_ta_check_status(struct psp_context *psp)
switch (ras_cmd->ras_status) {
case TA_RAS_STATUS__ERROR_UNSUPPORTED_IP:
dev_warn(psp->adev->dev,
- "RAS WARNING: cmd failed due to unsupported ip\n");
+ "RAS WARNING: cmd failed due to unsupported ip\n");
break;
case TA_RAS_STATUS__ERROR_UNSUPPORTED_ERROR_INJ:
dev_warn(psp->adev->dev,
- "RAS WARNING: cmd failed due to unsupported error injection\n");
+ "RAS WARNING: cmd failed due to unsupported error injection\n");
break;
case TA_RAS_STATUS__SUCCESS:
break;
+ case TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED:
+ if (ras_cmd->cmd_id == TA_RAS_COMMAND__TRIGGER_ERROR)
+ dev_warn(psp->adev->dev,
+ "RAS WARNING: Inject error to critical region is not allowed\n");
+ break;
default:
dev_warn(psp->adev->dev,
- "RAS WARNING: ras status = 0x%X\n", ras_cmd->ras_status);
+ "RAS WARNING: ras status = 0x%X\n", ras_cmd->ras_status);
break;
}
}
+static int psp_ras_send_cmd(struct psp_context *psp,
+ enum ras_command cmd_id, void *in, void *out)
+{
+ struct ta_ras_shared_memory *ras_cmd;
+ uint32_t cmd = cmd_id;
+ int ret = 0;
+
+ if (!in)
+ return -EINVAL;
+
+ mutex_lock(&psp->ras_context.mutex);
+ ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
+ memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
+
+ switch (cmd) {
+ case TA_RAS_COMMAND__ENABLE_FEATURES:
+ case TA_RAS_COMMAND__DISABLE_FEATURES:
+ memcpy(&ras_cmd->ras_in_message,
+ in, sizeof(ras_cmd->ras_in_message));
+ break;
+ case TA_RAS_COMMAND__TRIGGER_ERROR:
+ memcpy(&ras_cmd->ras_in_message.trigger_error,
+ in, sizeof(ras_cmd->ras_in_message.trigger_error));
+ break;
+ case TA_RAS_COMMAND__QUERY_ADDRESS:
+ memcpy(&ras_cmd->ras_in_message.address,
+ in, sizeof(ras_cmd->ras_in_message.address));
+ break;
+ default:
+ dev_err(psp->adev->dev, "Invalid ras cmd id: %u\n", cmd);
+ ret = -EINVAL;
+ goto err_out;
+ }
+
+ ras_cmd->cmd_id = cmd;
+ ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
+
+ switch (cmd) {
+ case TA_RAS_COMMAND__TRIGGER_ERROR:
+ if (!ret && out)
+ memcpy(out, &ras_cmd->ras_status, sizeof(ras_cmd->ras_status));
+ break;
+ case TA_RAS_COMMAND__QUERY_ADDRESS:
+ if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status)
+ ret = -EINVAL;
+ else if (out)
+ memcpy(out,
+ &ras_cmd->ras_out_message.address,
+ sizeof(ras_cmd->ras_out_message.address));
+ break;
+ default:
+ break;
+ }
+
+err_out:
+ mutex_unlock(&psp->ras_context.mutex);
+
+ return ret;
+}
+
int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
{
struct ta_ras_shared_memory *ras_cmd;
@@ -1333,9 +1840,8 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
if (amdgpu_ras_intr_triggered())
return ret;
- if (ras_cmd->if_version > RAS_TA_HOST_IF_VER)
- {
- DRM_WARN("RAS: Unsupported Interface");
+ if (ras_cmd->if_version > RAS_TA_HOST_IF_VER) {
+ dev_warn(psp->adev->dev, "RAS: Unsupported Interface\n");
return -EINVAL;
}
@@ -1344,8 +1850,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
dev_warn(psp->adev->dev, "ECC switch disabled\n");
ras_cmd->ras_status = TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE;
- }
- else if (ras_cmd->ras_out_message.flags.reg_access_failure_flag)
+ } else if (ras_cmd->ras_out_message.flags.reg_access_failure_flag)
dev_warn(psp->adev->dev,
"RAS internal register access blocked\n");
@@ -1358,30 +1863,22 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
int psp_ras_enable_features(struct psp_context *psp,
union ta_ras_cmd_input *info, bool enable)
{
- struct ta_ras_shared_memory *ras_cmd;
+ enum ras_command cmd_id;
int ret;
- if (!psp->ras_context.context.initialized)
+ if (!psp->ras_context.context.initialized || !info)
return -EINVAL;
- ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
- memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
-
- if (enable)
- ras_cmd->cmd_id = TA_RAS_COMMAND__ENABLE_FEATURES;
- else
- ras_cmd->cmd_id = TA_RAS_COMMAND__DISABLE_FEATURES;
-
- ras_cmd->ras_in_message = *info;
-
- ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
+ cmd_id = enable ?
+ TA_RAS_COMMAND__ENABLE_FEATURES : TA_RAS_COMMAND__DISABLE_FEATURES;
+ ret = psp_ras_send_cmd(psp, cmd_id, info, NULL);
if (ret)
return -EINVAL;
return 0;
}
-static int psp_ras_terminate(struct psp_context *psp)
+int psp_ras_terminate(struct psp_context *psp)
{
int ret;
@@ -1394,19 +1891,16 @@ static int psp_ras_terminate(struct psp_context *psp)
if (!psp->ras_context.context.initialized)
return 0;
- ret = psp_ras_unload(psp);
- if (ret)
- return ret;
+ ret = psp_ta_unload(psp, &psp->ras_context.context);
psp->ras_context.context.initialized = false;
- /* free ras shared memory */
- psp_ta_free_shared_buf(&psp->ras_context.context.mem_context);
+ mutex_destroy(&psp->ras_context.mutex);
- return 0;
+ return ret;
}
-static int psp_ras_initialize(struct psp_context *psp)
+int psp_ras_initialize(struct psp_context *psp)
{
int ret;
uint32_t boot_cfg = 0xFF;
@@ -1433,35 +1927,47 @@ static int psp_ras_initialize(struct psp_context *psp)
if (ret)
dev_warn(adev->dev, "PSP get boot config failed\n");
- if (!amdgpu_ras_is_supported(psp->adev, AMDGPU_RAS_BLOCK__UMC)) {
- if (!boot_cfg) {
- dev_info(adev->dev, "GECC is disabled\n");
- } else {
- /* disable GECC in next boot cycle if ras is
- * disabled by module parameter amdgpu_ras_enable
- * and/or amdgpu_ras_mask, or boot_config_get call
- * is failed
- */
- ret = psp_boot_config_set(adev, 0);
- if (ret)
- dev_warn(adev->dev, "PSP set boot config failed\n");
- else
- dev_warn(adev->dev, "GECC will be disabled in next boot cycle "
- "if set amdgpu_ras_enable and/or amdgpu_ras_mask to 0x0\n");
- }
+ if (boot_cfg == 1 && !adev->ras_default_ecc_enabled &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) {
+ dev_warn(adev->dev, "GECC is currently enabled, which may affect performance\n");
+ dev_warn(adev->dev,
+ "To disable GECC, please reboot the system and load the amdgpu driver with the parameter amdgpu_ras_enable=0\n");
} else {
- if (1 == boot_cfg) {
- dev_info(adev->dev, "GECC is enabled\n");
+ if ((adev->ras_default_ecc_enabled || amdgpu_ras_enable == 1) &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) {
+ if (boot_cfg == 1) {
+ dev_info(adev->dev, "GECC is enabled\n");
+ } else {
+ /* enable GECC in next boot cycle if it is disabled
+ * in boot config, or force enable GECC if failed to
+ * get boot configuration
+ */
+ ret = psp_boot_config_set(adev, BOOT_CONFIG_GECC);
+ if (ret)
+ dev_warn(adev->dev, "PSP set boot config failed\n");
+ else
+ dev_warn(adev->dev, "GECC will be enabled in next boot cycle\n");
+ }
} else {
- /* enable GECC in next boot cycle if it is disabled
- * in boot config, or force enable GECC if failed to
- * get boot configuration
- */
- ret = psp_boot_config_set(adev, BOOT_CONFIG_GECC);
- if (ret)
- dev_warn(adev->dev, "PSP set boot config failed\n");
- else
- dev_warn(adev->dev, "GECC will be enabled in next boot cycle\n");
+ if (!boot_cfg) {
+ if (!adev->ras_default_ecc_enabled &&
+ amdgpu_ras_enable != 1 &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+ dev_warn(adev->dev, "GECC is disabled, set amdgpu_ras_enable=1 to enable GECC in next boot cycle if needed\n");
+ else
+ dev_info(adev->dev, "GECC is disabled\n");
+ } else {
+ /* disable GECC in next boot cycle if ras is
+ * disabled by module parameter amdgpu_ras_enable
+ * and/or amdgpu_ras_mask, or boot_config_get call
+ * is failed
+ */
+ ret = psp_boot_config_set(adev, 0);
+ if (ret)
+ dev_warn(adev->dev, "PSP set boot config failed\n");
+ else
+ dev_warn(adev->dev, "GECC will be disabled in next boot cycle if set amdgpu_ras_enable and/or amdgpu_ras_mask to 0x0\n");
+ }
}
}
}
@@ -1469,8 +1975,8 @@ static int psp_ras_initialize(struct psp_context *psp)
psp->ras_context.context.mem_context.shared_mem_size = PSP_RAS_SHARED_MEM_SIZE;
psp->ras_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
- if (!psp->ras_context.context.initialized) {
- ret = psp_ras_init_shared_buf(psp);
+ if (!psp->ras_context.context.mem_context.shared_buf) {
+ ret = psp_ta_init_shared_buf(psp, &psp->ras_context.context.mem_context);
if (ret)
return ret;
}
@@ -1480,64 +1986,101 @@ static int psp_ras_initialize(struct psp_context *psp)
if (amdgpu_ras_is_poison_mode_supported(adev))
ras_cmd->ras_in_message.init_flags.poison_mode_en = 1;
- if (!adev->gmc.xgmi.connected_to_cpu)
+ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu)
ras_cmd->ras_in_message.init_flags.dgpu_mode = 1;
+ ras_cmd->ras_in_message.init_flags.xcc_mask =
+ adev->gfx.xcc_mask;
+ ras_cmd->ras_in_message.init_flags.channel_dis_num = hweight32(adev->gmc.m_half_use) * 2;
+ if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+ ras_cmd->ras_in_message.init_flags.nps_mode =
+ adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ ras_cmd->ras_in_message.init_flags.active_umc_mask = adev->umc.active_mask;
- ret = psp_ras_load(psp);
+ ret = psp_ta_load(psp, &psp->ras_context.context);
- if (!ret && !ras_cmd->ras_status)
+ if (!ret && !ras_cmd->ras_status) {
psp->ras_context.context.initialized = true;
- else {
+ mutex_init(&psp->ras_context.mutex);
+ } else {
if (ras_cmd->ras_status)
- dev_warn(psp->adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status);
- amdgpu_ras_fini(psp->adev);
+ dev_warn(adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status);
+
+ /* fail to load RAS TA */
+ psp->ras_context.context.initialized = false;
}
return ret;
}
int psp_ras_trigger_error(struct psp_context *psp,
- struct ta_ras_trigger_error_input *info)
+ struct ta_ras_trigger_error_input *info, uint32_t instance_mask)
{
- struct ta_ras_shared_memory *ras_cmd;
+ struct amdgpu_device *adev = psp->adev;
int ret;
+ uint32_t dev_mask;
+ uint32_t ras_status = 0;
- if (!psp->ras_context.context.initialized)
+ if (!psp->ras_context.context.initialized || !info)
return -EINVAL;
- ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
- memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
+ switch (info->block_id) {
+ case TA_RAS_BLOCK__GFX:
+ dev_mask = GET_MASK(GC, instance_mask);
+ break;
+ case TA_RAS_BLOCK__SDMA:
+ dev_mask = GET_MASK(SDMA0, instance_mask);
+ break;
+ case TA_RAS_BLOCK__VCN:
+ case TA_RAS_BLOCK__JPEG:
+ dev_mask = GET_MASK(VCN, instance_mask);
+ break;
+ default:
+ dev_mask = instance_mask;
+ break;
+ }
- ras_cmd->cmd_id = TA_RAS_COMMAND__TRIGGER_ERROR;
- ras_cmd->ras_in_message.trigger_error = *info;
+ /* reuse sub_block_index for backward compatibility */
+ dev_mask <<= AMDGPU_RAS_INST_SHIFT;
+ dev_mask &= AMDGPU_RAS_INST_MASK;
+ info->sub_block_index |= dev_mask;
- ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
+ ret = psp_ras_send_cmd(psp,
+ TA_RAS_COMMAND__TRIGGER_ERROR, info, &ras_status);
if (ret)
return -EINVAL;
/* If err_event_athub occurs error inject was successful, however
- return status from TA is no long reliable */
+ * return status from TA is no long reliable
+ */
if (amdgpu_ras_intr_triggered())
return 0;
- if (ras_cmd->ras_status)
+ if (ras_status == TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED)
+ return -EACCES;
+ else if (ras_status)
return -EINVAL;
return 0;
}
-// ras end
-// HDCP start
-static int psp_hdcp_init_shared_buf(struct psp_context *psp)
+int psp_ras_query_address(struct psp_context *psp,
+ struct ta_ras_query_address_input *addr_in,
+ struct ta_ras_query_address_output *addr_out)
{
- return psp_ta_init_shared_buf(psp, &psp->hdcp_context.context.mem_context);
-}
+ int ret;
-static int psp_hdcp_load(struct psp_context *psp)
-{
- return psp_ta_load(psp, &psp->hdcp_context.context);
+ if (!psp->ras_context.context.initialized ||
+ !addr_in || !addr_out)
+ return -EINVAL;
+
+ ret = psp_ras_send_cmd(psp,
+ TA_RAS_COMMAND__QUERY_ADDRESS, addr_in, addr_out);
+
+ return ret;
}
+// ras end
+// HDCP start
static int psp_hdcp_initialize(struct psp_context *psp)
{
int ret;
@@ -1548,6 +2091,10 @@ static int psp_hdcp_initialize(struct psp_context *psp)
if (amdgpu_sriov_vf(psp->adev))
return 0;
+ /* bypass hdcp initialization if dmu is harvested */
+ if (!amdgpu_device_has_display_hardware(psp->adev))
+ return 0;
+
if (!psp->hdcp_context.context.bin_desc.size_bytes ||
!psp->hdcp_context.context.bin_desc.start_addr) {
dev_info(psp->adev->dev, "HDCP: optional hdcp ta ucode is not available\n");
@@ -1557,13 +2104,13 @@ static int psp_hdcp_initialize(struct psp_context *psp)
psp->hdcp_context.context.mem_context.shared_mem_size = PSP_HDCP_SHARED_MEM_SIZE;
psp->hdcp_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
- if (!psp->hdcp_context.context.initialized) {
- ret = psp_hdcp_init_shared_buf(psp);
+ if (!psp->hdcp_context.context.mem_context.shared_buf) {
+ ret = psp_ta_init_shared_buf(psp, &psp->hdcp_context.context.mem_context);
if (ret)
return ret;
}
- ret = psp_hdcp_load(psp);
+ ret = psp_ta_load(psp, &psp->hdcp_context.context);
if (!ret) {
psp->hdcp_context.context.initialized = true;
mutex_init(&psp->hdcp_context.mutex);
@@ -1572,11 +2119,6 @@ static int psp_hdcp_initialize(struct psp_context *psp)
return ret;
}
-static int psp_hdcp_unload(struct psp_context *psp)
-{
- return psp_ta_unload(psp, &psp->hdcp_context.context);
-}
-
int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
{
/*
@@ -1585,6 +2127,9 @@ int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
if (amdgpu_sriov_vf(psp->adev))
return 0;
+ if (!psp->hdcp_context.context.initialized)
+ return 0;
+
return psp_ta_invoke(psp, ta_cmd_id, &psp->hdcp_context.context);
}
@@ -1598,38 +2143,18 @@ static int psp_hdcp_terminate(struct psp_context *psp)
if (amdgpu_sriov_vf(psp->adev))
return 0;
- if (!psp->hdcp_context.context.initialized) {
- if (psp->hdcp_context.context.mem_context.shared_buf)
- goto out;
- else
- return 0;
- }
+ if (!psp->hdcp_context.context.initialized)
+ return 0;
- ret = psp_hdcp_unload(psp);
- if (ret)
- return ret;
+ ret = psp_ta_unload(psp, &psp->hdcp_context.context);
psp->hdcp_context.context.initialized = false;
-out:
- /* free hdcp shared memory */
- psp_ta_free_shared_buf(&psp->hdcp_context.context.mem_context);
-
- return 0;
+ return ret;
}
// HDCP end
// DTM start
-static int psp_dtm_init_shared_buf(struct psp_context *psp)
-{
- return psp_ta_init_shared_buf(psp, &psp->dtm_context.context.mem_context);
-}
-
-static int psp_dtm_load(struct psp_context *psp)
-{
- return psp_ta_load(psp, &psp->dtm_context.context);
-}
-
static int psp_dtm_initialize(struct psp_context *psp)
{
int ret;
@@ -1640,6 +2165,10 @@ static int psp_dtm_initialize(struct psp_context *psp)
if (amdgpu_sriov_vf(psp->adev))
return 0;
+ /* bypass dtm initialization if dmu is harvested */
+ if (!amdgpu_device_has_display_hardware(psp->adev))
+ return 0;
+
if (!psp->dtm_context.context.bin_desc.size_bytes ||
!psp->dtm_context.context.bin_desc.start_addr) {
dev_info(psp->adev->dev, "DTM: optional dtm ta ucode is not available\n");
@@ -1649,13 +2178,13 @@ static int psp_dtm_initialize(struct psp_context *psp)
psp->dtm_context.context.mem_context.shared_mem_size = PSP_DTM_SHARED_MEM_SIZE;
psp->dtm_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
- if (!psp->dtm_context.context.initialized) {
- ret = psp_dtm_init_shared_buf(psp);
+ if (!psp->dtm_context.context.mem_context.shared_buf) {
+ ret = psp_ta_init_shared_buf(psp, &psp->dtm_context.context.mem_context);
if (ret)
return ret;
}
- ret = psp_dtm_load(psp);
+ ret = psp_ta_load(psp, &psp->dtm_context.context);
if (!ret) {
psp->dtm_context.context.initialized = true;
mutex_init(&psp->dtm_context.mutex);
@@ -1664,11 +2193,6 @@ static int psp_dtm_initialize(struct psp_context *psp)
return ret;
}
-static int psp_dtm_unload(struct psp_context *psp)
-{
- return psp_ta_unload(psp, &psp->dtm_context.context);
-}
-
int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
{
/*
@@ -1677,6 +2201,9 @@ int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
if (amdgpu_sriov_vf(psp->adev))
return 0;
+ if (!psp->dtm_context.context.initialized)
+ return 0;
+
return psp_ta_invoke(psp, ta_cmd_id, &psp->dtm_context.context);
}
@@ -1690,43 +2217,18 @@ static int psp_dtm_terminate(struct psp_context *psp)
if (amdgpu_sriov_vf(psp->adev))
return 0;
- if (!psp->dtm_context.context.initialized) {
- if (psp->dtm_context.context.mem_context.shared_buf)
- goto out;
- else
- return 0;
- }
+ if (!psp->dtm_context.context.initialized)
+ return 0;
- ret = psp_dtm_unload(psp);
- if (ret)
- return ret;
+ ret = psp_ta_unload(psp, &psp->dtm_context.context);
psp->dtm_context.context.initialized = false;
-out:
- /* free dtm shared memory */
- psp_ta_free_shared_buf(&psp->dtm_context.context.mem_context);
-
- return 0;
+ return ret;
}
// DTM end
// RAP start
-static int psp_rap_init_shared_buf(struct psp_context *psp)
-{
- return psp_ta_init_shared_buf(psp, &psp->rap_context.context.mem_context);
-}
-
-static int psp_rap_load(struct psp_context *psp)
-{
- return psp_ta_load(psp, &psp->rap_context.context);
-}
-
-static int psp_rap_unload(struct psp_context *psp)
-{
- return psp_ta_unload(psp, &psp->rap_context.context);
-}
-
static int psp_rap_initialize(struct psp_context *psp)
{
int ret;
@@ -1747,13 +2249,13 @@ static int psp_rap_initialize(struct psp_context *psp)
psp->rap_context.context.mem_context.shared_mem_size = PSP_RAP_SHARED_MEM_SIZE;
psp->rap_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
- if (!psp->rap_context.context.initialized) {
- ret = psp_rap_init_shared_buf(psp);
+ if (!psp->rap_context.context.mem_context.shared_buf) {
+ ret = psp_ta_init_shared_buf(psp, &psp->rap_context.context.mem_context);
if (ret)
return ret;
}
- ret = psp_rap_load(psp);
+ ret = psp_ta_load(psp, &psp->rap_context.context);
if (!ret) {
psp->rap_context.context.initialized = true;
mutex_init(&psp->rap_context.mutex);
@@ -1763,6 +2265,8 @@ static int psp_rap_initialize(struct psp_context *psp)
ret = psp_rap_invoke(psp, TA_CMD_RAP__INITIALIZE, &status);
if (ret || status != TA_RAP_STATUS__SUCCESS) {
psp_rap_terminate(psp);
+ /* free rap shared memory */
+ psp_ta_free_shared_buf(&psp->rap_context.context.mem_context);
dev_warn(psp->adev->dev, "RAP TA initialize fail (%d) status %d.\n",
ret, status);
@@ -1780,13 +2284,10 @@ static int psp_rap_terminate(struct psp_context *psp)
if (!psp->rap_context.context.initialized)
return 0;
- ret = psp_rap_unload(psp);
+ ret = psp_ta_unload(psp, &psp->rap_context.context);
psp->rap_context.context.initialized = false;
- /* free rap shared memory */
- psp_ta_free_shared_buf(&psp->rap_context.context.mem_context);
-
return ret;
}
@@ -1826,26 +2327,10 @@ out_unlock:
// RAP end
/* securedisplay start */
-static int psp_securedisplay_init_shared_buf(struct psp_context *psp)
-{
- return psp_ta_init_shared_buf(
- psp, &psp->securedisplay_context.context.mem_context);
-}
-
-static int psp_securedisplay_load(struct psp_context *psp)
-{
- return psp_ta_load(psp, &psp->securedisplay_context.context);
-}
-
-static int psp_securedisplay_unload(struct psp_context *psp)
-{
- return psp_ta_unload(psp, &psp->securedisplay_context.context);
-}
-
static int psp_securedisplay_initialize(struct psp_context *psp)
{
int ret;
- struct securedisplay_cmd *securedisplay_cmd;
+ struct ta_securedisplay_cmd *securedisplay_cmd;
/*
* TODO: bypass the initialize in sriov for now
@@ -1853,9 +2338,14 @@ static int psp_securedisplay_initialize(struct psp_context *psp)
if (amdgpu_sriov_vf(psp->adev))
return 0;
+ /* bypass securedisplay initialization if dmu is harvested */
+ if (!amdgpu_device_has_display_hardware(psp->adev))
+ return 0;
+
if (!psp->securedisplay_context.context.bin_desc.size_bytes ||
!psp->securedisplay_context.context.bin_desc.start_addr) {
- dev_info(psp->adev->dev, "SECUREDISPLAY: securedisplay ta ucode is not available\n");
+ dev_info(psp->adev->dev,
+ "SECUREDISPLAY: optional securedisplay ta ucode is not available\n");
return 0;
}
@@ -1864,24 +2354,35 @@ static int psp_securedisplay_initialize(struct psp_context *psp)
psp->securedisplay_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
if (!psp->securedisplay_context.context.initialized) {
- ret = psp_securedisplay_init_shared_buf(psp);
+ ret = psp_ta_init_shared_buf(psp,
+ &psp->securedisplay_context.context.mem_context);
if (ret)
return ret;
}
- ret = psp_securedisplay_load(psp);
- if (!ret) {
+ ret = psp_ta_load(psp, &psp->securedisplay_context.context);
+ if (!ret && !psp->securedisplay_context.context.resp_status) {
psp->securedisplay_context.context.initialized = true;
mutex_init(&psp->securedisplay_context.mutex);
- } else
+ } else {
+ /* don't try again */
+ psp->securedisplay_context.context.bin_desc.size_bytes = 0;
return ret;
+ }
+
+ mutex_lock(&psp->securedisplay_context.mutex);
psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,
TA_SECUREDISPLAY_COMMAND__QUERY_TA);
ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__QUERY_TA);
+
+ mutex_unlock(&psp->securedisplay_context.mutex);
+
if (ret) {
psp_securedisplay_terminate(psp);
+ /* free securedisplay shared memory */
+ psp_ta_free_shared_buf(&psp->securedisplay_context.context.mem_context);
dev_err(psp->adev->dev, "SECUREDISPLAY TA initialize fail.\n");
return -EINVAL;
}
@@ -1890,6 +2391,8 @@ static int psp_securedisplay_initialize(struct psp_context *psp)
psp_securedisplay_parse_resp_status(psp, securedisplay_cmd->status);
dev_err(psp->adev->dev, "SECUREDISPLAY: query securedisplay TA failed. ret 0x%x\n",
securedisplay_cmd->securedisplay_out_message.query_ta.query_cmd_ret);
+ /* don't try again */
+ psp->securedisplay_context.context.bin_desc.size_bytes = 0;
}
return 0;
@@ -1908,15 +2411,10 @@ static int psp_securedisplay_terminate(struct psp_context *psp)
if (!psp->securedisplay_context.context.initialized)
return 0;
- ret = psp_securedisplay_unload(psp);
- if (ret)
- return ret;
+ ret = psp_ta_unload(psp, &psp->securedisplay_context.context);
psp->securedisplay_context.context.initialized = false;
- /* free securedisplay shared memory */
- psp_ta_free_shared_buf(&psp->securedisplay_context.context.mem_context);
-
return ret;
}
@@ -1928,30 +2426,77 @@ int psp_securedisplay_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
return -EINVAL;
if (ta_cmd_id != TA_SECUREDISPLAY_COMMAND__QUERY_TA &&
- ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC)
+ ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC &&
+ ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2)
return -EINVAL;
- mutex_lock(&psp->securedisplay_context.mutex);
-
ret = psp_ta_invoke(psp, ta_cmd_id, &psp->securedisplay_context.context);
- mutex_unlock(&psp->securedisplay_context.mutex);
-
return ret;
}
/* SECUREDISPLAY end */
+int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev)
+{
+ struct psp_context *psp = &adev->psp;
+ int ret = 0;
+
+ if (!amdgpu_sriov_vf(adev) && psp->funcs && psp->funcs->wait_for_bootloader != NULL)
+ ret = psp->funcs->wait_for_bootloader(psp);
+
+ return ret;
+}
+
+bool amdgpu_psp_get_ras_capability(struct psp_context *psp)
+{
+ if (psp->funcs &&
+ psp->funcs->get_ras_capability) {
+ return psp->funcs->get_ras_capability(psp);
+ } else {
+ return false;
+ }
+}
+
+bool amdgpu_psp_tos_reload_needed(struct amdgpu_device *adev)
+{
+ struct psp_context *psp = &adev->psp;
+
+ if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU))
+ return false;
+
+ if (psp->funcs && psp->funcs->is_reload_needed)
+ return psp->funcs->is_reload_needed(psp);
+
+ return false;
+}
+
+static void psp_update_gpu_addresses(struct amdgpu_device *adev)
+{
+ struct psp_context *psp = &adev->psp;
+
+ if (psp->cmd_buf_bo && psp->cmd_buf_mem) {
+ psp->fw_pri_mc_addr = amdgpu_bo_fb_aper_addr(psp->fw_pri_bo);
+ psp->fence_buf_mc_addr = amdgpu_bo_fb_aper_addr(psp->fence_buf_bo);
+ psp->cmd_buf_mc_addr = amdgpu_bo_fb_aper_addr(psp->cmd_buf_bo);
+ }
+ if (adev->firmware.rbuf && psp->km_ring.ring_mem)
+ psp->km_ring.ring_mem_mc_addr = amdgpu_bo_fb_aper_addr(adev->firmware.rbuf);
+}
+
static int psp_hw_start(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
int ret;
+ if (amdgpu_virt_xgmi_migrate_enabled(adev))
+ psp_update_gpu_addresses(adev);
+
if (!amdgpu_sriov_vf(adev)) {
if ((is_psp_fw_valid(psp->kdb)) &&
(psp->funcs->bootloader_load_kdb != NULL)) {
ret = psp_bootloader_load_kdb(psp);
if (ret) {
- DRM_ERROR("PSP load kdb failed!\n");
+ dev_err(adev->dev, "PSP load kdb failed!\n");
return ret;
}
}
@@ -1960,7 +2505,7 @@ static int psp_hw_start(struct psp_context *psp)
(psp->funcs->bootloader_load_spl != NULL)) {
ret = psp_bootloader_load_spl(psp);
if (ret) {
- DRM_ERROR("PSP load spl failed!\n");
+ dev_err(adev->dev, "PSP load spl failed!\n");
return ret;
}
}
@@ -1969,7 +2514,7 @@ static int psp_hw_start(struct psp_context *psp)
(psp->funcs->bootloader_load_sysdrv != NULL)) {
ret = psp_bootloader_load_sysdrv(psp);
if (ret) {
- DRM_ERROR("PSP load sys drv failed!\n");
+ dev_err(adev->dev, "PSP load sys drv failed!\n");
return ret;
}
}
@@ -1978,7 +2523,7 @@ static int psp_hw_start(struct psp_context *psp)
(psp->funcs->bootloader_load_soc_drv != NULL)) {
ret = psp_bootloader_load_soc_drv(psp);
if (ret) {
- DRM_ERROR("PSP load soc drv failed!\n");
+ dev_err(adev->dev, "PSP load soc drv failed!\n");
return ret;
}
}
@@ -1987,7 +2532,7 @@ static int psp_hw_start(struct psp_context *psp)
(psp->funcs->bootloader_load_intf_drv != NULL)) {
ret = psp_bootloader_load_intf_drv(psp);
if (ret) {
- DRM_ERROR("PSP load intf drv failed!\n");
+ dev_err(adev->dev, "PSP load intf drv failed!\n");
return ret;
}
}
@@ -1996,7 +2541,34 @@ static int psp_hw_start(struct psp_context *psp)
(psp->funcs->bootloader_load_dbg_drv != NULL)) {
ret = psp_bootloader_load_dbg_drv(psp);
if (ret) {
- DRM_ERROR("PSP load dbg drv failed!\n");
+ dev_err(adev->dev, "PSP load dbg drv failed!\n");
+ return ret;
+ }
+ }
+
+ if ((is_psp_fw_valid(psp->ras_drv)) &&
+ (psp->funcs->bootloader_load_ras_drv != NULL)) {
+ ret = psp_bootloader_load_ras_drv(psp);
+ if (ret) {
+ dev_err(adev->dev, "PSP load ras_drv failed!\n");
+ return ret;
+ }
+ }
+
+ if ((is_psp_fw_valid(psp->ipkeymgr_drv)) &&
+ (psp->funcs->bootloader_load_ipkeymgr_drv != NULL)) {
+ ret = psp_bootloader_load_ipkeymgr_drv(psp);
+ if (ret) {
+ dev_err(adev->dev, "PSP load ipkeymgr_drv failed!\n");
+ return ret;
+ }
+ }
+
+ if ((is_psp_fw_valid(psp->spdm_drv)) &&
+ (psp->funcs->bootloader_load_spdm_drv != NULL)) {
+ ret = psp_bootloader_load_spdm_drv(psp);
+ if (ret) {
+ dev_err(adev->dev, "PSP load spdm_drv failed!\n");
return ret;
}
}
@@ -2005,7 +2577,7 @@ static int psp_hw_start(struct psp_context *psp)
(psp->funcs->bootloader_load_sos != NULL)) {
ret = psp_bootloader_load_sos(psp);
if (ret) {
- DRM_ERROR("PSP load sos failed!\n");
+ dev_err(adev->dev, "PSP load sos failed!\n");
return ret;
}
}
@@ -2013,16 +2585,30 @@ static int psp_hw_start(struct psp_context *psp)
ret = psp_ring_create(psp, PSP_RING_TYPE__KM);
if (ret) {
- DRM_ERROR("PSP create ring failed!\n");
+ dev_err(adev->dev, "PSP create ring failed!\n");
return ret;
}
- ret = psp_tmr_init(psp);
- if (ret) {
- DRM_ERROR("PSP tmr init failed!\n");
- return ret;
+ if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ ret = psp_update_fw_reservation(psp);
+ if (ret) {
+ dev_err(adev->dev, "update fw reservation failed!\n");
+ return ret;
+ }
+ }
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev))
+ goto skip_pin_bo;
+
+ if (!psp->boot_time_tmr || psp->autoload_supported) {
+ ret = psp_tmr_init(psp);
+ if (ret) {
+ dev_err(adev->dev, "PSP tmr init failed!\n");
+ return ret;
+ }
}
+skip_pin_bo:
/*
* For ASICs with DF Cstate management centralized
* to PMFW, TMR setup should be performed after PMFW
@@ -2034,10 +2620,12 @@ static int psp_hw_start(struct psp_context *psp)
return ret;
}
- ret = psp_tmr_load(psp);
- if (ret) {
- DRM_ERROR("PSP load tmr failed!\n");
- return ret;
+ if (!psp->boot_time_tmr || !psp->autoload_supported) {
+ ret = psp_tmr_load(psp);
+ if (ret) {
+ dev_err(adev->dev, "PSP load tmr failed!\n");
+ return ret;
+ }
}
return 0;
@@ -2047,6 +2635,9 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
enum psp_gfx_fw_type *type)
{
switch (ucode->ucode_id) {
+ case AMDGPU_UCODE_ID_CAP:
+ *type = GFX_FW_TYPE_CAP;
+ break;
case AMDGPU_UCODE_ID_SDMA0:
*type = GFX_FW_TYPE_SDMA0;
break;
@@ -2077,6 +2668,12 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
case AMDGPU_UCODE_ID_CP_MES_DATA:
*type = GFX_FW_TYPE_MES_STACK;
break;
+ case AMDGPU_UCODE_ID_CP_MES1:
+ *type = GFX_FW_TYPE_CP_MES_KIQ;
+ break;
+ case AMDGPU_UCODE_ID_CP_MES1_DATA:
+ *type = GFX_FW_TYPE_MES_KIQ_STACK;
+ break;
case AMDGPU_UCODE_ID_CP_CE:
*type = GFX_FW_TYPE_CP_CE;
break;
@@ -2098,6 +2695,12 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
case AMDGPU_UCODE_ID_CP_MEC2_JT:
*type = GFX_FW_TYPE_CP_MEC_ME2;
break;
+ case AMDGPU_UCODE_ID_RLC_P:
+ *type = GFX_FW_TYPE_RLC_P;
+ break;
+ case AMDGPU_UCODE_ID_RLC_V:
+ *type = GFX_FW_TYPE_RLC_V;
+ break;
case AMDGPU_UCODE_ID_RLC_G:
*type = GFX_FW_TYPE_RLC_G;
break;
@@ -2116,9 +2719,27 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
case AMDGPU_UCODE_ID_RLC_DRAM:
*type = GFX_FW_TYPE_RLC_DRAM_BOOT;
break;
+ case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS:
+ *type = GFX_FW_TYPE_GLOBAL_TAP_DELAYS;
+ break;
+ case AMDGPU_UCODE_ID_SE0_TAP_DELAYS:
+ *type = GFX_FW_TYPE_SE0_TAP_DELAYS;
+ break;
+ case AMDGPU_UCODE_ID_SE1_TAP_DELAYS:
+ *type = GFX_FW_TYPE_SE1_TAP_DELAYS;
+ break;
+ case AMDGPU_UCODE_ID_SE2_TAP_DELAYS:
+ *type = GFX_FW_TYPE_SE2_TAP_DELAYS;
+ break;
+ case AMDGPU_UCODE_ID_SE3_TAP_DELAYS:
+ *type = GFX_FW_TYPE_SE3_TAP_DELAYS;
+ break;
case AMDGPU_UCODE_ID_SMC:
*type = GFX_FW_TYPE_SMU;
break;
+ case AMDGPU_UCODE_ID_PPTABLE:
+ *type = GFX_FW_TYPE_PPTABLE;
+ break;
case AMDGPU_UCODE_ID_UVD:
*type = GFX_FW_TYPE_UVD;
break;
@@ -2149,6 +2770,79 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
case AMDGPU_UCODE_ID_DMCUB:
*type = GFX_FW_TYPE_DMUB;
break;
+ case AMDGPU_UCODE_ID_SDMA_UCODE_TH0:
+ case AMDGPU_UCODE_ID_SDMA_RS64:
+ *type = GFX_FW_TYPE_SDMA_UCODE_TH0;
+ break;
+ case AMDGPU_UCODE_ID_SDMA_UCODE_TH1:
+ *type = GFX_FW_TYPE_SDMA_UCODE_TH1;
+ break;
+ case AMDGPU_UCODE_ID_IMU_I:
+ *type = GFX_FW_TYPE_IMU_I;
+ break;
+ case AMDGPU_UCODE_ID_IMU_D:
+ *type = GFX_FW_TYPE_IMU_D;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP:
+ *type = GFX_FW_TYPE_RS64_PFP;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME:
+ *type = GFX_FW_TYPE_RS64_ME;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC:
+ *type = GFX_FW_TYPE_RS64_MEC;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
+ *type = GFX_FW_TYPE_RS64_PFP_P0_STACK;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
+ *type = GFX_FW_TYPE_RS64_PFP_P1_STACK;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
+ *type = GFX_FW_TYPE_RS64_ME_P0_STACK;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
+ *type = GFX_FW_TYPE_RS64_ME_P1_STACK;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
+ *type = GFX_FW_TYPE_RS64_MEC_P0_STACK;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
+ *type = GFX_FW_TYPE_RS64_MEC_P1_STACK;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
+ *type = GFX_FW_TYPE_RS64_MEC_P2_STACK;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
+ *type = GFX_FW_TYPE_RS64_MEC_P3_STACK;
+ break;
+ case AMDGPU_UCODE_ID_VPE_CTX:
+ *type = GFX_FW_TYPE_VPEC_FW1;
+ break;
+ case AMDGPU_UCODE_ID_VPE_CTL:
+ *type = GFX_FW_TYPE_VPEC_FW2;
+ break;
+ case AMDGPU_UCODE_ID_VPE:
+ *type = GFX_FW_TYPE_VPE;
+ break;
+ case AMDGPU_UCODE_ID_UMSCH_MM_UCODE:
+ *type = GFX_FW_TYPE_UMSCH_UCODE;
+ break;
+ case AMDGPU_UCODE_ID_UMSCH_MM_DATA:
+ *type = GFX_FW_TYPE_UMSCH_DATA;
+ break;
+ case AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER:
+ *type = GFX_FW_TYPE_UMSCH_CMD_BUFFER;
+ break;
+ case AMDGPU_UCODE_ID_P2S_TABLE:
+ *type = GFX_FW_TYPE_P2S_TABLE;
+ break;
+ case AMDGPU_UCODE_ID_JPEG_RAM:
+ *type = GFX_FW_TYPE_JPEG_RAM;
+ break;
+ case AMDGPU_UCODE_ID_ISP:
+ *type = GFX_FW_TYPE_ISP;
+ break;
case AMDGPU_UCODE_ID_MAXIMUM:
default:
return -EINVAL;
@@ -2205,7 +2899,8 @@ static void psp_print_fw_hdr(struct psp_context *psp,
}
}
-static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode,
+static int psp_prep_load_ip_fw_cmd_buf(struct psp_context *psp,
+ struct amdgpu_firmware_info *ucode,
struct psp_gfx_cmd_resp *cmd)
{
int ret;
@@ -2218,18 +2913,18 @@ static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode,
ret = psp_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type);
if (ret)
- DRM_ERROR("Unknown firmware type\n");
+ dev_err(psp->adev->dev, "Unknown firmware type\n");
return ret;
}
-static int psp_execute_non_psp_fw_load(struct psp_context *psp,
- struct amdgpu_firmware_info *ucode)
+int psp_execute_ip_fw_load(struct psp_context *psp,
+ struct amdgpu_firmware_info *ucode)
{
int ret = 0;
struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
- ret = psp_prep_load_ip_fw_cmd_buf(ucode, cmd);
+ ret = psp_prep_load_ip_fw_cmd_buf(psp, ucode, cmd);
if (!ret) {
ret = psp_cmd_submit_buf(psp, ucode, cmd,
psp->fence_buf_mc_addr);
@@ -2240,6 +2935,33 @@ static int psp_execute_non_psp_fw_load(struct psp_context *psp,
return ret;
}
+static int psp_load_p2s_table(struct psp_context *psp)
+{
+ int ret;
+ struct amdgpu_device *adev = psp->adev;
+ struct amdgpu_firmware_info *ucode =
+ &adev->firmware.ucode[AMDGPU_UCODE_ID_P2S_TABLE];
+
+ if (adev->in_runpm && ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) ||
+ (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)))
+ return 0;
+
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) {
+ uint32_t supp_vers = adev->flags & AMD_IS_APU ? 0x0036013D :
+ 0x0036003C;
+ if (psp->sos.fw_version < supp_vers)
+ return 0;
+ }
+
+ if (!ucode->fw || amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ ret = psp_execute_ip_fw_load(psp, ucode);
+
+ return ret;
+}
+
static int psp_load_smu_fw(struct psp_context *psp)
{
int ret;
@@ -2248,23 +2970,29 @@ static int psp_load_smu_fw(struct psp_context *psp)
&adev->firmware.ucode[AMDGPU_UCODE_ID_SMC];
struct amdgpu_ras *ras = psp->ras_context.ras;
+ /*
+ * Skip SMU FW reloading in case of using BACO for runpm only,
+ * as SMU is always alive.
+ */
+ if (adev->in_runpm && ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) ||
+ (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)))
+ return 0;
+
if (!ucode->fw || amdgpu_sriov_vf(psp->adev))
return 0;
- if ((amdgpu_in_reset(adev) &&
- ras && adev->ras_enabled &&
- (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 4) ||
- adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 2)))) {
+ if ((amdgpu_in_reset(adev) && ras && adev->ras_enabled &&
+ (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(11, 0, 4) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(11, 0, 2)))) {
ret = amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_UNLOAD);
- if (ret) {
- DRM_WARN("Failed to set MP1 state prepare for reload\n");
- }
+ if (ret)
+ dev_err(adev->dev, "Failed to set MP1 state prepare for reload\n");
}
- ret = psp_execute_non_psp_fw_load(psp, ucode);
+ ret = psp_execute_ip_fw_load(psp, ucode);
if (ret)
- DRM_ERROR("PSP load smu failed!\n");
+ dev_err(adev->dev, "PSP load smu failed!\n");
return ret;
}
@@ -2272,7 +3000,10 @@ static int psp_load_smu_fw(struct psp_context *psp)
static bool fw_load_skip_check(struct psp_context *psp,
struct amdgpu_firmware_info *ucode)
{
- if (!ucode->fw)
+ if (!ucode->fw || !ucode->ucode_size)
+ return true;
+
+ if (ucode->ucode_id == AMDGPU_UCODE_ID_P2S_TABLE)
return true;
if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC &&
@@ -2282,20 +3013,7 @@ static bool fw_load_skip_check(struct psp_context *psp,
return true;
if (amdgpu_sriov_vf(psp->adev) &&
- (ucode->ucode_id == AMDGPU_UCODE_ID_SDMA0
- || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1
- || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2
- || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3
- || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA4
- || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA5
- || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA6
- || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA7
- || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G
- || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
- || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
- || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM
- || ucode->ucode_id == AMDGPU_UCODE_ID_SMC))
- /*skip ucode loading in SRIOV VF */
+ amdgpu_virt_fw_load_skip_check(psp->adev, ucode->ucode_id))
return true;
if (psp->autoload_supported &&
@@ -2316,7 +3034,7 @@ int psp_load_fw_list(struct psp_context *psp,
for (i = 0; i < ucode_count; ++i) {
ucode = ucode_list[i];
psp_print_fw_hdr(psp, ucode);
- ret = psp_execute_non_psp_fw_load(psp, ucode);
+ ret = psp_execute_ip_fw_load(psp, ucode);
if (ret)
return ret;
}
@@ -2336,6 +3054,9 @@ static int psp_load_non_psp_fw(struct psp_context *psp)
return ret;
}
+ /* Load P2S table first if it's available */
+ psp_load_p2s_table(psp);
+
for (i = 0; i < adev->firmware.max_ucodes; i++) {
ucode = &adev->firmware.ucode[i];
@@ -2351,28 +3072,32 @@ static int psp_load_non_psp_fw(struct psp_context *psp)
continue;
if (psp->autoload_supported &&
- (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 7) ||
- adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 11) ||
- adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 12)) &&
+ (amdgpu_ip_version(adev, MP0_HWIP, 0) ==
+ IP_VERSION(11, 0, 7) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) ==
+ IP_VERSION(11, 0, 11) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) ==
+ IP_VERSION(11, 0, 12)) &&
(ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1 ||
ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2 ||
ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3))
/* PSP only receive one SDMA fw for sienna_cichlid,
- * as all four sdma fw are same */
+ * as all four sdma fw are same
+ */
continue;
psp_print_fw_hdr(psp, ucode);
- ret = psp_execute_non_psp_fw_load(psp, ucode);
+ ret = psp_execute_ip_fw_load(psp, ucode);
if (ret)
return ret;
- /* Start rlc autoload after psp recieved all the gfx firmware */
+ /* Start rlc autoload after psp received all the gfx firmware */
if (psp->autoload_supported && ucode->ucode_id == (amdgpu_sriov_vf(adev) ?
- AMDGPU_UCODE_ID_CP_MEC2 : AMDGPU_UCODE_ID_RLC_G)) {
+ adev->virt.autoload_ucode_id : AMDGPU_UCODE_ID_RLC_G)) {
ret = psp_rlc_autoload_start(psp);
if (ret) {
- DRM_ERROR("Failed to start rlc autoload\n");
+ dev_err(adev->dev, "Failed to start rlc autoload\n");
return ret;
}
}
@@ -2387,76 +3112,55 @@ static int psp_load_fw(struct amdgpu_device *adev)
struct psp_context *psp = &adev->psp;
if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) {
- psp_ring_stop(psp, PSP_RING_TYPE__KM); /* should not destroy ring, only stop */
- goto skip_memalloc;
- }
-
- if (amdgpu_sriov_vf(adev)) {
- ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
- AMDGPU_GEM_DOMAIN_VRAM,
- &psp->fw_pri_bo,
- &psp->fw_pri_mc_addr,
- &psp->fw_pri_buf);
+ /* should not destroy ring, only stop */
+ psp_ring_stop(psp, PSP_RING_TYPE__KM);
} else {
- ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
- AMDGPU_GEM_DOMAIN_GTT,
- &psp->fw_pri_bo,
- &psp->fw_pri_mc_addr,
- &psp->fw_pri_buf);
- }
-
- if (ret)
- goto failed;
-
- ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &psp->fence_buf_bo,
- &psp->fence_buf_mc_addr,
- &psp->fence_buf);
- if (ret)
- goto failed;
-
- ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
- (void **)&psp->cmd_buf_mem);
- if (ret)
- goto failed;
-
- memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE);
+ memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE);
- ret = psp_ring_init(psp, PSP_RING_TYPE__KM);
- if (ret) {
- DRM_ERROR("PSP ring init failed!\n");
- goto failed;
+ ret = psp_ring_init(psp, PSP_RING_TYPE__KM);
+ if (ret) {
+ dev_err(adev->dev, "PSP ring init failed!\n");
+ goto failed;
+ }
}
-skip_memalloc:
ret = psp_hw_start(psp);
if (ret)
goto failed;
ret = psp_load_non_psp_fw(psp);
if (ret)
- goto failed;
+ goto failed1;
ret = psp_asd_initialize(psp);
if (ret) {
- DRM_ERROR("PSP load asd failed!\n");
- return ret;
+ dev_err(adev->dev, "PSP load asd failed!\n");
+ goto failed1;
}
ret = psp_rl_load(adev);
if (ret) {
- DRM_ERROR("PSP load RL failed!\n");
- return ret;
+ dev_err(adev->dev, "PSP load RL failed!\n");
+ goto failed1;
+ }
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) {
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ ret = psp_xgmi_initialize(psp, false, true);
+ /* Warning the XGMI seesion initialize failure
+ * Instead of stop driver initialization
+ */
+ if (ret)
+ dev_err(psp->adev->dev,
+ "XGMI: Failed to initialize XGMI session\n");
+ }
}
if (psp->ta_fw) {
ret = psp_ras_initialize(psp);
if (ret)
dev_err(psp->adev->dev,
- "RAS: Failed to initialize RAS\n");
+ "RAS: Failed to initialize RAS\n");
ret = psp_hdcp_initialize(psp);
if (ret)
@@ -2481,32 +3185,32 @@ skip_memalloc:
return 0;
+failed1:
+ psp_free_shared_bufs(psp);
failed:
/*
* all cleanup jobs (xgmi terminate, ras terminate,
* ring destroy, cmd/fence/fw buffers destory,
* psp->cmd destory) are delayed to psp_hw_fini
*/
+ psp_ring_destroy(psp, PSP_RING_TYPE__KM);
return ret;
}
-static int psp_hw_init(void *handle)
+static int psp_hw_init(struct amdgpu_ip_block *ip_block)
{
int ret;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
mutex_lock(&adev->firmware.mutex);
- /*
- * This sequence is just used on hw_init only once, no need on
- * resume.
- */
+
ret = amdgpu_ucode_init_bo(adev);
if (ret)
goto failed;
ret = psp_load_fw(adev);
if (ret) {
- DRM_ERROR("PSP firmware loading failed\n");
+ dev_err(adev->dev, "PSP firmware loading failed\n");
goto failed;
}
@@ -2519,9 +3223,9 @@ failed:
return -EINVAL;
}
-static int psp_hw_fini(void *handle)
+static int psp_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct psp_context *psp = &adev->psp;
if (psp->ta_fw) {
@@ -2530,105 +3234,104 @@ static int psp_hw_fini(void *handle)
psp_rap_terminate(psp);
psp_dtm_terminate(psp);
psp_hdcp_terminate(psp);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1)
+ psp_xgmi_terminate(psp);
}
psp_asd_terminate(psp);
-
psp_tmr_terminate(psp);
- psp_ring_destroy(psp, PSP_RING_TYPE__KM);
- amdgpu_bo_free_kernel(&psp->fw_pri_bo,
- &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
- amdgpu_bo_free_kernel(&psp->fence_buf_bo,
- &psp->fence_buf_mc_addr, &psp->fence_buf);
- amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
- (void **)&psp->cmd_buf_mem);
+ psp_ring_destroy(psp, PSP_RING_TYPE__KM);
return 0;
}
-static int psp_suspend(void *handle)
+static int psp_suspend(struct amdgpu_ip_block *ip_block)
{
- int ret;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret = 0;
+ struct amdgpu_device *adev = ip_block->adev;
struct psp_context *psp = &adev->psp;
if (adev->gmc.xgmi.num_physical_nodes > 1 &&
psp->xgmi_context.context.initialized) {
ret = psp_xgmi_terminate(psp);
if (ret) {
- DRM_ERROR("Failed to terminate xgmi ta\n");
- return ret;
+ dev_err(adev->dev, "Failed to terminate xgmi ta\n");
+ goto out;
}
}
if (psp->ta_fw) {
ret = psp_ras_terminate(psp);
if (ret) {
- DRM_ERROR("Failed to terminate ras ta\n");
- return ret;
+ dev_err(adev->dev, "Failed to terminate ras ta\n");
+ goto out;
}
ret = psp_hdcp_terminate(psp);
if (ret) {
- DRM_ERROR("Failed to terminate hdcp ta\n");
- return ret;
+ dev_err(adev->dev, "Failed to terminate hdcp ta\n");
+ goto out;
}
ret = psp_dtm_terminate(psp);
if (ret) {
- DRM_ERROR("Failed to terminate dtm ta\n");
- return ret;
+ dev_err(adev->dev, "Failed to terminate dtm ta\n");
+ goto out;
}
ret = psp_rap_terminate(psp);
if (ret) {
- DRM_ERROR("Failed to terminate rap ta\n");
- return ret;
+ dev_err(adev->dev, "Failed to terminate rap ta\n");
+ goto out;
}
ret = psp_securedisplay_terminate(psp);
if (ret) {
- DRM_ERROR("Failed to terminate securedisplay ta\n");
- return ret;
+ dev_err(adev->dev, "Failed to terminate securedisplay ta\n");
+ goto out;
}
}
ret = psp_asd_terminate(psp);
if (ret) {
- DRM_ERROR("Failed to terminate asd\n");
- return ret;
+ dev_err(adev->dev, "Failed to terminate asd\n");
+ goto out;
}
ret = psp_tmr_terminate(psp);
if (ret) {
- DRM_ERROR("Failed to terminate tmr\n");
- return ret;
+ dev_err(adev->dev, "Failed to terminate tmr\n");
+ goto out;
}
ret = psp_ring_stop(psp, PSP_RING_TYPE__KM);
- if (ret) {
- DRM_ERROR("PSP ring stop failed\n");
- return ret;
- }
+ if (ret)
+ dev_err(adev->dev, "PSP ring stop failed\n");
- return 0;
+out:
+ return ret;
}
-static int psp_resume(void *handle)
+static int psp_resume(struct amdgpu_ip_block *ip_block)
{
int ret;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct psp_context *psp = &adev->psp;
- DRM_INFO("PSP is resuming...\n");
+ dev_info(adev->dev, "PSP is resuming...\n");
if (psp->mem_train_ctx.enable_mem_training) {
ret = psp_mem_training(psp, PSP_MEM_TRAIN_RESUME);
if (ret) {
- DRM_ERROR("Failed to process memory training!\n");
+ dev_err(adev->dev, "Failed to process memory training!\n");
return ret;
}
}
mutex_lock(&adev->firmware.mutex);
+ ret = amdgpu_ucode_init_bo(adev);
+ if (ret)
+ goto failed;
+
ret = psp_hw_start(psp);
if (ret)
goto failed;
@@ -2639,7 +3342,7 @@ static int psp_resume(void *handle)
ret = psp_asd_initialize(psp);
if (ret) {
- DRM_ERROR("PSP load asd failed!\n");
+ dev_err(adev->dev, "PSP load asd failed!\n");
goto failed;
}
@@ -2663,7 +3366,7 @@ static int psp_resume(void *handle)
ret = psp_ras_initialize(psp);
if (ret)
dev_err(psp->adev->dev,
- "RAS: Failed to initialize RAS\n");
+ "RAS: Failed to initialize RAS\n");
ret = psp_hdcp_initialize(psp);
if (ret)
@@ -2691,7 +3394,7 @@ static int psp_resume(void *handle)
return 0;
failed:
- DRM_ERROR("PSP resume failed\n");
+ dev_err(adev->dev, "PSP resume failed\n");
mutex_unlock(&adev->firmware.mutex);
return ret;
}
@@ -2725,19 +3428,6 @@ int psp_rlc_autoload_start(struct psp_context *psp)
return ret;
}
-int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx,
- uint64_t cmd_gpu_addr, int cmd_size)
-{
- struct amdgpu_firmware_info ucode = {0};
-
- ucode.ucode_id = inst_idx ? AMDGPU_UCODE_ID_VCN1_RAM :
- AMDGPU_UCODE_ID_VCN0_RAM;
- ucode.mc_addr = cmd_gpu_addr;
- ucode.ucode_size = cmd_size;
-
- return psp_execute_non_psp_fw_load(&adev->psp, &ucode);
-}
-
int psp_ring_cmd_submit(struct psp_context *psp,
uint64_t cmd_buf_mc_addr,
uint64_t fence_mc_addr,
@@ -2765,9 +3455,11 @@ int psp_ring_cmd_submit(struct psp_context *psp,
write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw);
/* Check invalid write_frame ptr address */
if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) {
- DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
- ring_buffer_start, ring_buffer_end, write_frame);
- DRM_ERROR("write_frame is pointing to address out of bounds\n");
+ dev_err(adev->dev,
+ "ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
+ ring_buffer_start, ring_buffer_end, write_frame);
+ dev_err(adev->dev,
+ "write_frame is pointing to address out of bounds\n");
return -EINVAL;
}
@@ -2788,25 +3480,14 @@ int psp_ring_cmd_submit(struct psp_context *psp,
return 0;
}
-int psp_init_asd_microcode(struct psp_context *psp,
- const char *chip_name)
+int psp_init_asd_microcode(struct psp_context *psp, const char *chip_name)
{
struct amdgpu_device *adev = psp->adev;
- char fw_name[PSP_FW_NAME_LEN];
const struct psp_firmware_header_v1_0 *asd_hdr;
int err = 0;
- if (!chip_name) {
- dev_err(adev->dev, "invalid chip name for asd microcode\n");
- return -EINVAL;
- }
-
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", chip_name);
- err = request_firmware(&adev->psp.asd_fw, fw_name, adev->dev);
- if (err)
- goto out;
-
- err = amdgpu_ucode_validate(adev->psp.asd_fw);
+ err = amdgpu_ucode_request(adev, &adev->psp.asd_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_asd.bin", chip_name);
if (err)
goto out;
@@ -2818,31 +3499,18 @@ int psp_init_asd_microcode(struct psp_context *psp,
le32_to_cpu(asd_hdr->header.ucode_array_offset_bytes);
return 0;
out:
- dev_err(adev->dev, "fail to initialize asd microcode\n");
- release_firmware(adev->psp.asd_fw);
- adev->psp.asd_fw = NULL;
+ amdgpu_ucode_release(&adev->psp.asd_fw);
return err;
}
-int psp_init_toc_microcode(struct psp_context *psp,
- const char *chip_name)
+int psp_init_toc_microcode(struct psp_context *psp, const char *chip_name)
{
struct amdgpu_device *adev = psp->adev;
- char fw_name[PSP_FW_NAME_LEN];
const struct psp_firmware_header_v1_0 *toc_hdr;
int err = 0;
- if (!chip_name) {
- dev_err(adev->dev, "invalid chip name for toc microcode\n");
- return -EINVAL;
- }
-
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", chip_name);
- err = request_firmware(&adev->psp.toc_fw, fw_name, adev->dev);
- if (err)
- goto out;
-
- err = amdgpu_ucode_validate(adev->psp.toc_fw);
+ err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_toc.bin", chip_name);
if (err)
goto out;
@@ -2854,9 +3522,7 @@ int psp_init_toc_microcode(struct psp_context *psp,
le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
return 0;
out:
- dev_err(adev->dev, "fail to request/validate toc microcode\n");
- release_firmware(adev->psp.toc_fw);
- adev->psp.toc_fw = NULL;
+ amdgpu_ucode_release(&adev->psp.toc_fw);
return err;
}
@@ -2878,7 +3544,7 @@ static int parse_sos_bin_descriptor(struct psp_context *psp,
psp->sos.fw_version = le32_to_cpu(desc->fw_version);
psp->sos.feature_version = le32_to_cpu(desc->fw_version);
psp->sos.size_bytes = le32_to_cpu(desc->size_bytes);
- psp->sos.start_addr = ucode_start_addr;
+ psp->sos.start_addr = ucode_start_addr;
break;
case PSP_FW_TYPE_PSP_SYS_DRV:
psp->sys.fw_version = le32_to_cpu(desc->fw_version);
@@ -2928,6 +3594,24 @@ static int parse_sos_bin_descriptor(struct psp_context *psp,
psp->dbg_drv.size_bytes = le32_to_cpu(desc->size_bytes);
psp->dbg_drv.start_addr = ucode_start_addr;
break;
+ case PSP_FW_TYPE_PSP_RAS_DRV:
+ psp->ras_drv.fw_version = le32_to_cpu(desc->fw_version);
+ psp->ras_drv.feature_version = le32_to_cpu(desc->fw_version);
+ psp->ras_drv.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->ras_drv.start_addr = ucode_start_addr;
+ break;
+ case PSP_FW_TYPE_PSP_IPKEYMGR_DRV:
+ psp->ipkeymgr_drv.fw_version = le32_to_cpu(desc->fw_version);
+ psp->ipkeymgr_drv.feature_version = le32_to_cpu(desc->fw_version);
+ psp->ipkeymgr_drv.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->ipkeymgr_drv.start_addr = ucode_start_addr;
+ break;
+ case PSP_FW_TYPE_PSP_SPDM_DRV:
+ psp->spdm_drv.fw_version = le32_to_cpu(desc->fw_version);
+ psp->spdm_drv.feature_version = le32_to_cpu(desc->fw_version);
+ psp->spdm_drv.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->spdm_drv.start_addr = ucode_start_addr;
+ break;
default:
dev_warn(psp->adev->dev, "Unsupported PSP FW type: %d\n", desc->fw_type);
break;
@@ -2947,7 +3631,7 @@ static int psp_init_sos_base_fw(struct amdgpu_device *adev)
le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes);
if (adev->gmc.xgmi.connected_to_cpu ||
- (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(13, 0, 2))) {
+ (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2))) {
adev->psp.sos.fw_version = le32_to_cpu(sos_hdr->header.ucode_version);
adev->psp.sos.feature_version = le32_to_cpu(sos_hdr->sos.fw_version);
@@ -2957,7 +3641,6 @@ static int psp_init_sos_base_fw(struct amdgpu_device *adev)
adev->psp.sos.size_bytes = le32_to_cpu(sos_hdr->sos.size_bytes);
adev->psp.sos.start_addr = ucode_array_start_addr +
le32_to_cpu(sos_hdr->sos.offset_bytes);
- adev->psp.xgmi_context.supports_extended_data = false;
} else {
/* Load alternate PSP SOS FW */
sos_hdr_v1_3 = (const struct psp_firmware_header_v1_3 *)adev->psp.sos_fw->data;
@@ -2972,7 +3655,6 @@ static int psp_init_sos_base_fw(struct amdgpu_device *adev)
adev->psp.sos.size_bytes = le32_to_cpu(sos_hdr_v1_3->sos_aux.size_bytes);
adev->psp.sos.start_addr = ucode_array_start_addr +
le32_to_cpu(sos_hdr_v1_3->sos_aux.offset_bytes);
- adev->psp.xgmi_context.supports_extended_data = true;
}
if ((adev->psp.sys.size_bytes == 0) || (adev->psp.sos.size_bytes == 0)) {
@@ -2983,31 +3665,26 @@ static int psp_init_sos_base_fw(struct amdgpu_device *adev)
return 0;
}
-int psp_init_sos_microcode(struct psp_context *psp,
- const char *chip_name)
+int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name)
{
struct amdgpu_device *adev = psp->adev;
- char fw_name[PSP_FW_NAME_LEN];
const struct psp_firmware_header_v1_0 *sos_hdr;
const struct psp_firmware_header_v1_1 *sos_hdr_v1_1;
const struct psp_firmware_header_v1_2 *sos_hdr_v1_2;
const struct psp_firmware_header_v1_3 *sos_hdr_v1_3;
const struct psp_firmware_header_v2_0 *sos_hdr_v2_0;
- int err = 0;
+ const struct psp_firmware_header_v2_1 *sos_hdr_v2_1;
+ int fw_index, fw_bin_count, start_index = 0;
+ const struct psp_fw_bin_desc *fw_bin;
uint8_t *ucode_array_start_addr;
- int fw_index = 0;
-
- if (!chip_name) {
- dev_err(adev->dev, "invalid chip name for sos microcode\n");
- return -EINVAL;
- }
-
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sos.bin", chip_name);
- err = request_firmware(&adev->psp.sos_fw, fw_name, adev->dev);
- if (err)
- goto out;
+ int err = 0;
- err = amdgpu_ucode_validate(adev->psp.sos_fw);
+ if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sos_kicker.bin", chip_name);
+ else
+ err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sos.bin", chip_name);
if (err)
goto out;
@@ -3056,15 +3733,30 @@ int psp_init_sos_microcode(struct psp_context *psp,
case 2:
sos_hdr_v2_0 = (const struct psp_firmware_header_v2_0 *)adev->psp.sos_fw->data;
- if (le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count) >= UCODE_MAX_PSP_PACKAGING) {
+ fw_bin_count = le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count);
+
+ if (fw_bin_count >= UCODE_MAX_PSP_PACKAGING) {
dev_err(adev->dev, "packed SOS count exceeds maximum limit\n");
err = -EINVAL;
goto out;
}
- for (fw_index = 0; fw_index < le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count); fw_index++) {
- err = parse_sos_bin_descriptor(psp,
- &sos_hdr_v2_0->psp_fw_bin[fw_index],
+ if (sos_hdr_v2_0->header.header_version_minor == 1) {
+ sos_hdr_v2_1 = (const struct psp_firmware_header_v2_1 *)adev->psp.sos_fw->data;
+
+ fw_bin = sos_hdr_v2_1->psp_fw_bin;
+
+ if (psp_is_aux_sos_load_required(psp))
+ start_index = le32_to_cpu(sos_hdr_v2_1->psp_aux_fw_bin_index);
+ else
+ fw_bin_count -= le32_to_cpu(sos_hdr_v2_1->psp_aux_fw_bin_index);
+
+ } else {
+ fw_bin = sos_hdr_v2_0->psp_fw_bin;
+ }
+
+ for (fw_index = start_index; fw_index < fw_bin_count; fw_index++) {
+ err = parse_sos_bin_descriptor(psp, fw_bin + fw_index,
sos_hdr_v2_0);
if (err)
goto out;
@@ -3079,14 +3771,41 @@ int psp_init_sos_microcode(struct psp_context *psp,
return 0;
out:
- dev_err(adev->dev,
- "failed to init sos firmware\n");
- release_firmware(adev->psp.sos_fw);
- adev->psp.sos_fw = NULL;
+ amdgpu_ucode_release(&adev->psp.sos_fw);
return err;
}
+static bool is_ta_fw_applicable(struct psp_context *psp,
+ const struct psp_fw_bin_desc *desc)
+{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t fw_version;
+
+ switch (desc->fw_type) {
+ case TA_FW_TYPE_PSP_XGMI:
+ case TA_FW_TYPE_PSP_XGMI_AUX:
+ /* for now, AUX TA only exists on 13.0.6 ta bin,
+ * from v20.00.0x.14
+ */
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) ==
+ IP_VERSION(13, 0, 6)) {
+ fw_version = le32_to_cpu(desc->fw_version);
+
+ if (adev->flags & AMD_IS_APU &&
+ (fw_version & 0xff) >= 0x14)
+ return desc->fw_type == TA_FW_TYPE_PSP_XGMI_AUX;
+ else
+ return desc->fw_type == TA_FW_TYPE_PSP_XGMI;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return true;
+}
+
static int parse_ta_bin_descriptor(struct psp_context *psp,
const struct psp_fw_bin_desc *desc,
const struct ta_firmware_header_v2_0 *ta_hdr)
@@ -3096,6 +3815,9 @@ static int parse_ta_bin_descriptor(struct psp_context *psp,
if (!psp || !desc || !ta_hdr)
return -EINVAL;
+ if (!is_ta_fw_applicable(psp, desc))
+ return 0;
+
ucode_start_addr = (uint8_t *)ta_hdr +
le32_to_cpu(desc->offset_bytes) +
le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
@@ -3108,6 +3830,7 @@ static int parse_ta_bin_descriptor(struct psp_context *psp,
psp->asd_context.bin_desc.start_addr = ucode_start_addr;
break;
case TA_FW_TYPE_PSP_XGMI:
+ case TA_FW_TYPE_PSP_XGMI_AUX:
psp->xgmi_context.context.bin_desc.fw_version = le32_to_cpu(desc->fw_version);
psp->xgmi_context.context.bin_desc.size_bytes = le32_to_cpu(desc->size_bytes);
psp->xgmi_context.context.bin_desc.start_addr = ucode_start_addr;
@@ -3148,41 +3871,76 @@ static int parse_ta_bin_descriptor(struct psp_context *psp,
return 0;
}
-int psp_init_ta_microcode(struct psp_context *psp,
- const char *chip_name)
+static int parse_ta_v1_microcode(struct psp_context *psp)
{
+ const struct ta_firmware_header_v1_0 *ta_hdr;
struct amdgpu_device *adev = psp->adev;
- char fw_name[PSP_FW_NAME_LEN];
- const struct ta_firmware_header_v2_0 *ta_hdr;
- int err = 0;
- int ta_index = 0;
- if (!chip_name) {
- dev_err(adev->dev, "invalid chip name for ta microcode\n");
+ ta_hdr = (const struct ta_firmware_header_v1_0 *) adev->psp.ta_fw->data;
+
+ if (le16_to_cpu(ta_hdr->header.header_version_major) != 1)
return -EINVAL;
- }
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
- err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
- if (err)
- goto out;
+ adev->psp.xgmi_context.context.bin_desc.fw_version =
+ le32_to_cpu(ta_hdr->xgmi.fw_version);
+ adev->psp.xgmi_context.context.bin_desc.size_bytes =
+ le32_to_cpu(ta_hdr->xgmi.size_bytes);
+ adev->psp.xgmi_context.context.bin_desc.start_addr =
+ (uint8_t *)ta_hdr +
+ le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+
+ adev->psp.ras_context.context.bin_desc.fw_version =
+ le32_to_cpu(ta_hdr->ras.fw_version);
+ adev->psp.ras_context.context.bin_desc.size_bytes =
+ le32_to_cpu(ta_hdr->ras.size_bytes);
+ adev->psp.ras_context.context.bin_desc.start_addr =
+ (uint8_t *)adev->psp.xgmi_context.context.bin_desc.start_addr +
+ le32_to_cpu(ta_hdr->ras.offset_bytes);
+
+ adev->psp.hdcp_context.context.bin_desc.fw_version =
+ le32_to_cpu(ta_hdr->hdcp.fw_version);
+ adev->psp.hdcp_context.context.bin_desc.size_bytes =
+ le32_to_cpu(ta_hdr->hdcp.size_bytes);
+ adev->psp.hdcp_context.context.bin_desc.start_addr =
+ (uint8_t *)ta_hdr +
+ le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+
+ adev->psp.dtm_context.context.bin_desc.fw_version =
+ le32_to_cpu(ta_hdr->dtm.fw_version);
+ adev->psp.dtm_context.context.bin_desc.size_bytes =
+ le32_to_cpu(ta_hdr->dtm.size_bytes);
+ adev->psp.dtm_context.context.bin_desc.start_addr =
+ (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr +
+ le32_to_cpu(ta_hdr->dtm.offset_bytes);
+
+ adev->psp.securedisplay_context.context.bin_desc.fw_version =
+ le32_to_cpu(ta_hdr->securedisplay.fw_version);
+ adev->psp.securedisplay_context.context.bin_desc.size_bytes =
+ le32_to_cpu(ta_hdr->securedisplay.size_bytes);
+ adev->psp.securedisplay_context.context.bin_desc.start_addr =
+ (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr +
+ le32_to_cpu(ta_hdr->securedisplay.offset_bytes);
+
+ adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
- err = amdgpu_ucode_validate(adev->psp.ta_fw);
- if (err)
- goto out;
+ return 0;
+}
+
+static int parse_ta_v2_microcode(struct psp_context *psp)
+{
+ const struct ta_firmware_header_v2_0 *ta_hdr;
+ struct amdgpu_device *adev = psp->adev;
+ int err = 0;
+ int ta_index = 0;
ta_hdr = (const struct ta_firmware_header_v2_0 *)adev->psp.ta_fw->data;
- if (le16_to_cpu(ta_hdr->header.header_version_major) != 2) {
- dev_err(adev->dev, "unsupported TA header version\n");
- err = -EINVAL;
- goto out;
- }
+ if (le16_to_cpu(ta_hdr->header.header_version_major) != 2)
+ return -EINVAL;
if (le32_to_cpu(ta_hdr->ta_fw_bin_count) >= UCODE_MAX_PSP_PACKAGING) {
dev_err(adev->dev, "packed TA count exceeds maximum limit\n");
- err = -EINVAL;
- goto out;
+ return -EINVAL;
}
for (ta_index = 0; ta_index < le32_to_cpu(ta_hdr->ta_fw_bin_count); ta_index++) {
@@ -3190,24 +3948,131 @@ int psp_init_ta_microcode(struct psp_context *psp,
&ta_hdr->ta_fw_bin[ta_index],
ta_hdr);
if (err)
- goto out;
+ return err;
}
return 0;
+}
+
+int psp_init_ta_microcode(struct psp_context *psp, const char *chip_name)
+{
+ const struct common_firmware_header *hdr;
+ struct amdgpu_device *adev = psp->adev;
+ int err;
+
+ if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ta_kicker.bin", chip_name);
+ else
+ err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ta.bin", chip_name);
+ if (err)
+ return err;
+
+ hdr = (const struct common_firmware_header *)adev->psp.ta_fw->data;
+ switch (le16_to_cpu(hdr->header_version_major)) {
+ case 1:
+ err = parse_ta_v1_microcode(psp);
+ break;
+ case 2:
+ err = parse_ta_v2_microcode(psp);
+ break;
+ default:
+ dev_err(adev->dev, "unsupported TA header version\n");
+ err = -EINVAL;
+ }
+
+ if (err)
+ amdgpu_ucode_release(&adev->psp.ta_fw);
+
+ return err;
+}
+
+int psp_init_cap_microcode(struct psp_context *psp, const char *chip_name)
+{
+ struct amdgpu_device *adev = psp->adev;
+ const struct psp_firmware_header_v1_0 *cap_hdr_v1_0;
+ struct amdgpu_firmware_info *info = NULL;
+ int err = 0;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ dev_err(adev->dev, "cap microcode should only be loaded under SRIOV\n");
+ return -EINVAL;
+ }
+
+ err = amdgpu_ucode_request(adev, &adev->psp.cap_fw, AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s_cap.bin", chip_name);
+ if (err) {
+ if (err == -ENODEV) {
+ dev_warn(adev->dev, "cap microcode does not exist, skip\n");
+ err = 0;
+ } else {
+ dev_err(adev->dev, "fail to initialize cap microcode\n");
+ }
+ goto out;
+ }
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CAP];
+ info->ucode_id = AMDGPU_UCODE_ID_CAP;
+ info->fw = adev->psp.cap_fw;
+ cap_hdr_v1_0 = (const struct psp_firmware_header_v1_0 *)
+ adev->psp.cap_fw->data;
+ adev->firmware.fw_size += ALIGN(
+ le32_to_cpu(cap_hdr_v1_0->header.ucode_size_bytes), PAGE_SIZE);
+ adev->psp.cap_fw_version = le32_to_cpu(cap_hdr_v1_0->header.ucode_version);
+ adev->psp.cap_feature_version = le32_to_cpu(cap_hdr_v1_0->sos.fw_version);
+ adev->psp.cap_ucode_size = le32_to_cpu(cap_hdr_v1_0->header.ucode_size_bytes);
+
+ return 0;
+
out:
- dev_err(adev->dev, "fail to initialize ta microcode\n");
- release_firmware(adev->psp.ta_fw);
- adev->psp.ta_fw = NULL;
+ amdgpu_ucode_release(&adev->psp.cap_fw);
return err;
}
-static int psp_set_clockgating_state(void *handle,
- enum amd_clockgating_state state)
+int psp_config_sq_perfmon(struct psp_context *psp,
+ uint32_t xcp_id, bool core_override_enable,
+ bool reg_override_enable, bool perfmon_override_enable)
+{
+ int ret;
+
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (xcp_id > MAX_XCP) {
+ dev_err(psp->adev->dev, "invalid xcp_id %d\n", xcp_id);
+ return -EINVAL;
+ }
+
+ if (amdgpu_ip_version(psp->adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) {
+ dev_err(psp->adev->dev, "Unsupported MP0 version 0x%x for CONFIG_SQ_PERFMON command\n",
+ amdgpu_ip_version(psp->adev, MP0_HWIP, 0));
+ return -EINVAL;
+ }
+ struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
+
+ cmd->cmd_id = GFX_CMD_ID_CONFIG_SQ_PERFMON;
+ cmd->cmd.config_sq_perfmon.gfx_xcp_mask = BIT_MASK(xcp_id);
+ cmd->cmd.config_sq_perfmon.core_override = core_override_enable;
+ cmd->cmd.config_sq_perfmon.reg_override = reg_override_enable;
+ cmd->cmd.config_sq_perfmon.perfmon_override = perfmon_override_enable;
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+ if (ret)
+ dev_warn(psp->adev->dev, "PSP failed to config sq: xcp%d core%d reg%d perfmon%d\n",
+ xcp_id, core_override_enable, reg_override_enable, perfmon_override_enable);
+
+ release_psp_cmd_buf(psp);
+ return ret;
+}
+
+static int psp_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
{
return 0;
}
-static int psp_set_powergating_state(void *handle,
+static int psp_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -3219,11 +4084,13 @@ static ssize_t psp_usbc_pd_fw_sysfs_read(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct amdgpu_ip_block *ip_block;
uint32_t fw_ver;
int ret;
- if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) {
- DRM_INFO("PSP block is not ready yet.");
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP);
+ if (!ip_block || !ip_block->status.late_initialized) {
+ dev_info(adev->dev, "PSP block is not ready yet\n.");
return -EBUSY;
}
@@ -3232,7 +4099,7 @@ static ssize_t psp_usbc_pd_fw_sysfs_read(struct device *dev,
mutex_unlock(&adev->psp.mutex);
if (ret) {
- DRM_ERROR("Failed to read USBC PD FW, err = %d", ret);
+ dev_err(adev->dev, "Failed to read USBC PD FW, err = %d\n", ret);
return ret;
}
@@ -3247,31 +4114,32 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
int ret, idx;
- char fw_name[100];
const struct firmware *usbc_pd_fw;
struct amdgpu_bo *fw_buf_bo = NULL;
uint64_t fw_pri_mc_addr;
void *fw_pri_cpu_addr;
+ struct amdgpu_ip_block *ip_block;
- if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) {
- DRM_INFO("PSP block is not ready yet.");
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP);
+ if (!ip_block || !ip_block->status.late_initialized) {
+ dev_err(adev->dev, "PSP block is not ready yet.");
return -EBUSY;
}
if (!drm_dev_enter(ddev, &idx))
return -ENODEV;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s", buf);
- ret = request_firmware(&usbc_pd_fw, fw_name, adev->dev);
+ ret = amdgpu_ucode_request(adev, &usbc_pd_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s", buf);
if (ret)
goto fail;
/* LFB address which is aligned to 1MB boundary per PSP request */
ret = amdgpu_bo_create_kernel(adev, usbc_pd_fw->size, 0x100000,
- AMDGPU_GEM_DOMAIN_VRAM,
- &fw_buf_bo,
- &fw_pri_mc_addr,
- &fw_pri_cpu_addr);
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &fw_buf_bo, &fw_pri_mc_addr,
+ &fw_pri_cpu_addr);
if (ret)
goto rel_buf;
@@ -3284,10 +4152,10 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev,
amdgpu_bo_free_kernel(&fw_buf_bo, &fw_pri_mc_addr, &fw_pri_cpu_addr);
rel_buf:
- release_firmware(usbc_pd_fw);
+ amdgpu_ucode_release(&usbc_pd_fw);
fail:
if (ret) {
- DRM_ERROR("Failed to load USBC PD FW, err = %d", ret);
+ dev_err(adev->dev, "Failed to load USBC PD FW, err = %d", ret);
count = ret;
}
@@ -3308,7 +4176,12 @@ void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size
drm_dev_exit(idx);
}
-static DEVICE_ATTR(usbc_pd_fw, S_IRUGO | S_IWUSR,
+/**
+ * DOC: usbc_pd_fw
+ * Reading from this file will retrieve the USB-C PD firmware version. Writing to
+ * this file will trigger the update process.
+ */
+static DEVICE_ATTR(usbc_pd_fw, 0644,
psp_usbc_pd_fw_sysfs_read,
psp_usbc_pd_fw_sysfs_write);
@@ -3317,41 +4190,285 @@ int is_psp_fw_valid(struct psp_bin_desc bin)
return bin.size_bytes;
}
-const struct amd_ip_funcs psp_ip_funcs = {
- .name = "psp",
- .early_init = psp_early_init,
- .late_init = NULL,
- .sw_init = psp_sw_init,
- .sw_fini = psp_sw_fini,
- .hw_init = psp_hw_init,
- .hw_fini = psp_hw_fini,
- .suspend = psp_suspend,
- .resume = psp_resume,
- .is_idle = NULL,
- .check_soft_reset = NULL,
- .wait_for_idle = NULL,
- .soft_reset = NULL,
- .set_clockgating_state = psp_set_clockgating_state,
- .set_powergating_state = psp_set_powergating_state,
+static ssize_t amdgpu_psp_vbflash_write(struct file *filp, struct kobject *kobj,
+ const struct bin_attribute *bin_attr,
+ char *buffer, loff_t pos, size_t count)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ adev->psp.vbflash_done = false;
+
+ /* Safeguard against memory drain */
+ if (adev->psp.vbflash_image_size > AMD_VBIOS_FILE_MAX_SIZE_B) {
+ dev_err(adev->dev, "File size cannot exceed %u\n", AMD_VBIOS_FILE_MAX_SIZE_B);
+ kvfree(adev->psp.vbflash_tmp_buf);
+ adev->psp.vbflash_tmp_buf = NULL;
+ adev->psp.vbflash_image_size = 0;
+ return -ENOMEM;
+ }
+
+ /* TODO Just allocate max for now and optimize to realloc later if needed */
+ if (!adev->psp.vbflash_tmp_buf) {
+ adev->psp.vbflash_tmp_buf = kvmalloc(AMD_VBIOS_FILE_MAX_SIZE_B, GFP_KERNEL);
+ if (!adev->psp.vbflash_tmp_buf)
+ return -ENOMEM;
+ }
+
+ mutex_lock(&adev->psp.mutex);
+ memcpy(adev->psp.vbflash_tmp_buf + pos, buffer, count);
+ adev->psp.vbflash_image_size += count;
+ mutex_unlock(&adev->psp.mutex);
+
+ dev_dbg(adev->dev, "IFWI staged for update\n");
+
+ return count;
+}
+
+static ssize_t amdgpu_psp_vbflash_read(struct file *filp, struct kobject *kobj,
+ const struct bin_attribute *bin_attr, char *buffer,
+ loff_t pos, size_t count)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct amdgpu_bo *fw_buf_bo = NULL;
+ uint64_t fw_pri_mc_addr;
+ void *fw_pri_cpu_addr;
+ int ret;
+
+ if (adev->psp.vbflash_image_size == 0)
+ return -EINVAL;
+
+ dev_dbg(adev->dev, "PSP IFWI flash process initiated\n");
+
+ ret = amdgpu_bo_create_kernel(adev, adev->psp.vbflash_image_size,
+ AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &fw_buf_bo,
+ &fw_pri_mc_addr,
+ &fw_pri_cpu_addr);
+ if (ret)
+ goto rel_buf;
+
+ memcpy_toio(fw_pri_cpu_addr, adev->psp.vbflash_tmp_buf, adev->psp.vbflash_image_size);
+
+ mutex_lock(&adev->psp.mutex);
+ ret = psp_update_spirom(&adev->psp, fw_pri_mc_addr);
+ mutex_unlock(&adev->psp.mutex);
+
+ amdgpu_bo_free_kernel(&fw_buf_bo, &fw_pri_mc_addr, &fw_pri_cpu_addr);
+
+rel_buf:
+ kvfree(adev->psp.vbflash_tmp_buf);
+ adev->psp.vbflash_tmp_buf = NULL;
+ adev->psp.vbflash_image_size = 0;
+
+ if (ret) {
+ dev_err(adev->dev, "Failed to load IFWI, err = %d\n", ret);
+ return ret;
+ }
+
+ dev_dbg(adev->dev, "PSP IFWI flash process done\n");
+ return 0;
+}
+
+/**
+ * DOC: psp_vbflash
+ * Writing to this file will stage an IFWI for update. Reading from this file
+ * will trigger the update process.
+ */
+static const struct bin_attribute psp_vbflash_bin_attr = {
+ .attr = {.name = "psp_vbflash", .mode = 0660},
+ .size = 0,
+ .write = amdgpu_psp_vbflash_write,
+ .read = amdgpu_psp_vbflash_read,
+};
+
+/**
+ * DOC: psp_vbflash_status
+ * The status of the flash process.
+ * 0: IFWI flash not complete.
+ * 1: IFWI flash complete.
+ */
+static ssize_t amdgpu_psp_vbflash_status(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ uint32_t vbflash_status;
+
+ vbflash_status = psp_vbflash_status(&adev->psp);
+ if (!adev->psp.vbflash_done)
+ vbflash_status = 0;
+ else if (adev->psp.vbflash_done && !(vbflash_status & 0x80000000))
+ vbflash_status = 1;
+
+ return sysfs_emit(buf, "0x%x\n", vbflash_status);
+}
+static DEVICE_ATTR(psp_vbflash_status, 0440, amdgpu_psp_vbflash_status, NULL);
+
+static const struct bin_attribute *const bin_flash_attrs[] = {
+ &psp_vbflash_bin_attr,
+ NULL
+};
+
+static struct attribute *flash_attrs[] = {
+ &dev_attr_psp_vbflash_status.attr,
+ &dev_attr_usbc_pd_fw.attr,
+ NULL
+};
+
+static umode_t amdgpu_flash_attr_is_visible(struct kobject *kobj, struct attribute *attr, int idx)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (attr == &dev_attr_usbc_pd_fw.attr)
+ return adev->psp.sup_pd_fw_up ? 0660 : 0;
+
+ return adev->psp.sup_ifwi_up ? 0440 : 0;
+}
+
+static umode_t amdgpu_bin_flash_attr_is_visible(struct kobject *kobj,
+ const struct bin_attribute *attr,
+ int idx)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ return adev->psp.sup_ifwi_up ? 0660 : 0;
+}
+
+const struct attribute_group amdgpu_flash_attr_group = {
+ .attrs = flash_attrs,
+ .bin_attrs = bin_flash_attrs,
+ .is_bin_visible = amdgpu_bin_flash_attr_is_visible,
+ .is_visible = amdgpu_flash_attr_is_visible,
};
-static int psp_sysfs_init(struct amdgpu_device *adev)
+#if defined(CONFIG_DEBUG_FS)
+static int psp_read_spirom_debugfs_open(struct inode *inode, struct file *filp)
{
- int ret = device_create_file(adev->dev, &dev_attr_usbc_pd_fw);
+ struct amdgpu_device *adev = filp->f_inode->i_private;
+ struct spirom_bo *bo_triplet;
+ int ret;
+
+ /* serialize the open() file calling */
+ if (!mutex_trylock(&adev->psp.mutex))
+ return -EBUSY;
+
+ /*
+ * make sure only one userpace process is alive for dumping so that
+ * only one memory buffer of AMD_VBIOS_FILE_MAX_SIZE * 2 is consumed.
+ * let's say the case where one process try opening the file while
+ * another one has proceeded to read or release. In this way, eliminate
+ * the use of mutex for read() or release() callback as well.
+ */
+ if (adev->psp.spirom_dump_trip) {
+ mutex_unlock(&adev->psp.mutex);
+ return -EBUSY;
+ }
+
+ bo_triplet = kzalloc(sizeof(struct spirom_bo), GFP_KERNEL);
+ if (!bo_triplet) {
+ mutex_unlock(&adev->psp.mutex);
+ return -ENOMEM;
+ }
+
+ ret = amdgpu_bo_create_kernel(adev, AMD_VBIOS_FILE_MAX_SIZE_B * 2,
+ AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &bo_triplet->bo,
+ &bo_triplet->mc_addr,
+ &bo_triplet->cpu_addr);
+ if (ret)
+ goto rel_trip;
+ ret = psp_dump_spirom(&adev->psp, bo_triplet->mc_addr);
if (ret)
- DRM_ERROR("Failed to create USBC PD FW control file!");
+ goto rel_bo;
+ adev->psp.spirom_dump_trip = bo_triplet;
+ mutex_unlock(&adev->psp.mutex);
+ return 0;
+rel_bo:
+ amdgpu_bo_free_kernel(&bo_triplet->bo, &bo_triplet->mc_addr,
+ &bo_triplet->cpu_addr);
+rel_trip:
+ kfree(bo_triplet);
+ mutex_unlock(&adev->psp.mutex);
+ dev_err(adev->dev, "Trying IFWI dump fails, err = %d\n", ret);
return ret;
}
-static void psp_sysfs_fini(struct amdgpu_device *adev)
+static ssize_t psp_read_spirom_debugfs_read(struct file *filp, char __user *buf, size_t size,
+ loff_t *pos)
{
- device_remove_file(adev->dev, &dev_attr_usbc_pd_fw);
+ struct amdgpu_device *adev = filp->f_inode->i_private;
+ struct spirom_bo *bo_triplet = adev->psp.spirom_dump_trip;
+
+ if (!bo_triplet)
+ return -EINVAL;
+
+ return simple_read_from_buffer(buf,
+ size,
+ pos, bo_triplet->cpu_addr,
+ AMD_VBIOS_FILE_MAX_SIZE_B * 2);
}
-const struct amdgpu_ip_block_version psp_v3_1_ip_block =
+static int psp_read_spirom_debugfs_release(struct inode *inode, struct file *filp)
+{
+ struct amdgpu_device *adev = filp->f_inode->i_private;
+ struct spirom_bo *bo_triplet = adev->psp.spirom_dump_trip;
+
+ if (bo_triplet) {
+ amdgpu_bo_free_kernel(&bo_triplet->bo, &bo_triplet->mc_addr,
+ &bo_triplet->cpu_addr);
+ kfree(bo_triplet);
+ }
+
+ adev->psp.spirom_dump_trip = NULL;
+ return 0;
+}
+
+static const struct file_operations psp_dump_spirom_debugfs_ops = {
+ .owner = THIS_MODULE,
+ .open = psp_read_spirom_debugfs_open,
+ .read = psp_read_spirom_debugfs_read,
+ .release = psp_read_spirom_debugfs_release,
+ .llseek = default_llseek,
+};
+#endif
+
+void amdgpu_psp_debugfs_init(struct amdgpu_device *adev)
{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+
+ debugfs_create_file_size("psp_spirom_dump", 0444, minor->debugfs_root,
+ adev, &psp_dump_spirom_debugfs_ops, AMD_VBIOS_FILE_MAX_SIZE_B * 2);
+#endif
+}
+
+const struct amd_ip_funcs psp_ip_funcs = {
+ .name = "psp",
+ .early_init = psp_early_init,
+ .sw_init = psp_sw_init,
+ .sw_fini = psp_sw_fini,
+ .hw_init = psp_hw_init,
+ .hw_fini = psp_hw_fini,
+ .suspend = psp_suspend,
+ .resume = psp_resume,
+ .set_clockgating_state = psp_set_clockgating_state,
+ .set_powergating_state = psp_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version psp_v3_1_ip_block = {
.type = AMD_IP_BLOCK_TYPE_PSP,
.major = 3,
.minor = 1,
@@ -3359,8 +4476,7 @@ const struct amdgpu_ip_block_version psp_v3_1_ip_block =
.funcs = &psp_ip_funcs,
};
-const struct amdgpu_ip_block_version psp_v10_0_ip_block =
-{
+const struct amdgpu_ip_block_version psp_v10_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_PSP,
.major = 10,
.minor = 0,
@@ -3368,8 +4484,7 @@ const struct amdgpu_ip_block_version psp_v10_0_ip_block =
.funcs = &psp_ip_funcs,
};
-const struct amdgpu_ip_block_version psp_v11_0_ip_block =
-{
+const struct amdgpu_ip_block_version psp_v11_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_PSP,
.major = 11,
.minor = 0,
@@ -3385,8 +4500,7 @@ const struct amdgpu_ip_block_version psp_v11_0_8_ip_block = {
.funcs = &psp_ip_funcs,
};
-const struct amdgpu_ip_block_version psp_v12_0_ip_block =
-{
+const struct amdgpu_ip_block_version psp_v12_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_PSP,
.major = 12,
.minor = 0,
@@ -3401,3 +4515,19 @@ const struct amdgpu_ip_block_version psp_v13_0_ip_block = {
.rev = 0,
.funcs = &psp_ip_funcs,
};
+
+const struct amdgpu_ip_block_version psp_v13_0_4_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_PSP,
+ .major = 13,
+ .minor = 0,
+ .rev = 4,
+ .funcs = &psp_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version psp_v14_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_PSP,
+ .major = 14,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &psp_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index f29afabbff1f..237b624aa51c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -36,8 +36,34 @@
#define PSP_CMD_BUFFER_SIZE 0x1000
#define PSP_1_MEG 0x100000
#define PSP_TMR_SIZE(adev) ((adev)->asic_type == CHIP_ALDEBARAN ? 0x800000 : 0x400000)
+#define PSP_TMR_ALIGNMENT 0x100000
#define PSP_FW_NAME_LEN 0x24
+/* VBIOS gfl defines */
+#define MBOX_READY_MASK 0x80000000
+#define MBOX_STATUS_MASK 0x0000FFFF
+#define MBOX_COMMAND_MASK 0x00FF0000
+#define MBOX_READY_FLAG 0x80000000
+#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO 0x2
+#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI 0x3
+#define C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE 0x4
+#define C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_LO 0xf
+#define C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_HI 0x10
+#define C2PMSG_CMD_SPI_GET_FLASH_IMAGE 0x11
+
+/* Command register bit 31 set to indicate readiness */
+#define MBOX_TOS_READY_FLAG (GFX_FLAG_RESPONSE)
+#define MBOX_TOS_READY_MASK (GFX_CMD_RESPONSE_MASK | GFX_CMD_STATUS_MASK)
+
+/* Values to check for a successful GFX_CMD response wait. Check against
+ * both status bits and response state - helps to detect a command failure
+ * or other unexpected cases like a device drop reading all 0xFFs
+ */
+#define MBOX_TOS_RESP_FLAG (GFX_FLAG_RESPONSE)
+#define MBOX_TOS_RESP_MASK (GFX_CMD_RESPONSE_MASK | GFX_CMD_STATUS_MASK)
+
+extern const struct attribute_group amdgpu_flash_attr_group;
+
enum psp_shared_mem_size {
PSP_ASD_SHARED_MEM_SIZE = 0x0,
PSP_XGMI_SHARED_MEM_SIZE = 0x4000,
@@ -48,6 +74,17 @@ enum psp_shared_mem_size {
PSP_SECUREDISPLAY_SHARED_MEM_SIZE = 0x4000,
};
+enum ta_type_id {
+ TA_TYPE_XGMI = 1,
+ TA_TYPE_RAS,
+ TA_TYPE_HDCP,
+ TA_TYPE_DTM,
+ TA_TYPE_RAP,
+ TA_TYPE_SECUREDISPLAY,
+
+ TA_TYPE_MAX_INDEX,
+};
+
struct psp_context;
struct psp_xgmi_node_info;
struct psp_xgmi_topology_info;
@@ -58,15 +95,18 @@ enum psp_bootloader_cmd {
PSP_BL__LOAD_SOSDRV = 0x20000,
PSP_BL__LOAD_KEY_DATABASE = 0x80000,
PSP_BL__LOAD_SOCDRV = 0xB0000,
- PSP_BL__LOAD_INTFDRV = 0xC0000,
- PSP_BL__LOAD_DBGDRV = 0xD0000,
+ PSP_BL__LOAD_DBGDRV = 0xC0000,
+ PSP_BL__LOAD_HADDRV = PSP_BL__LOAD_DBGDRV,
+ PSP_BL__LOAD_INTFDRV = 0xD0000,
+ PSP_BL__LOAD_RASDRV = 0xE0000,
+ PSP_BL__LOAD_IPKEYMGRDRV = 0xF0000,
PSP_BL__DRAM_LONG_TRAIN = 0x100000,
PSP_BL__DRAM_SHORT_TRAIN = 0x200000,
PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000,
+ PSP_BL__LOAD_SPDMDRV = 0x20000000,
};
-enum psp_ring_type
-{
+enum psp_ring_type {
PSP_RING_TYPE__INVALID = 0,
/*
* These values map to the way the PSP kernel identifies the
@@ -76,8 +116,7 @@ enum psp_ring_type
PSP_RING_TYPE__KM = 2 /* Kernel mode ring (formerly called GPCOM) */
};
-struct psp_ring
-{
+struct psp_ring {
enum psp_ring_type ring_type;
struct psp_gfx_rb_frame *ring_mem;
uint64_t ring_mem_mc_addr;
@@ -91,20 +130,26 @@ enum psp_reg_prog_id {
PSP_REG_IH_RB_CNTL = 0, /* register IH_RB_CNTL */
PSP_REG_IH_RB_CNTL_RING1 = 1, /* register IH_RB_CNTL_RING1 */
PSP_REG_IH_RB_CNTL_RING2 = 2, /* register IH_RB_CNTL_RING2 */
+ PSP_REG_MMHUB_L1_TLB_CNTL = 25,
PSP_REG_LAST
};
-struct psp_funcs
-{
+#define PSP_WAITREG_CHANGED BIT(0) /* check if the value has changed */
+#define PSP_WAITREG_NOVERBOSE BIT(1) /* No error verbose */
+
+struct psp_funcs {
int (*init_microcode)(struct psp_context *psp);
+ int (*wait_for_bootloader)(struct psp_context *psp);
int (*bootloader_load_kdb)(struct psp_context *psp);
int (*bootloader_load_spl)(struct psp_context *psp);
int (*bootloader_load_sysdrv)(struct psp_context *psp);
int (*bootloader_load_soc_drv)(struct psp_context *psp);
int (*bootloader_load_intf_drv)(struct psp_context *psp);
int (*bootloader_load_dbg_drv)(struct psp_context *psp);
+ int (*bootloader_load_ras_drv)(struct psp_context *psp);
+ int (*bootloader_load_ipkeymgr_drv)(struct psp_context *psp);
+ int (*bootloader_load_spdm_drv)(struct psp_context *psp);
int (*bootloader_load_sos)(struct psp_context *psp);
- int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type);
int (*ring_create)(struct psp_context *psp,
enum psp_ring_type ring_type);
int (*ring_stop)(struct psp_context *psp,
@@ -118,6 +163,21 @@ struct psp_funcs
void (*ring_set_wptr)(struct psp_context *psp, uint32_t value);
int (*load_usbc_pd_fw)(struct psp_context *psp, uint64_t fw_pri_mc_addr);
int (*read_usbc_pd_fw)(struct psp_context *psp, uint32_t *fw_ver);
+ int (*update_spirom)(struct psp_context *psp, uint64_t fw_pri_mc_addr);
+ int (*dump_spirom)(struct psp_context *psp, uint64_t fw_pri_mc_addr);
+ int (*vbflash_stat)(struct psp_context *psp);
+ int (*fatal_error_recovery_quirk)(struct psp_context *psp);
+ bool (*get_ras_capability)(struct psp_context *psp);
+ bool (*is_aux_sos_load_required)(struct psp_context *psp);
+ bool (*is_reload_needed)(struct psp_context *psp);
+ int (*reg_program_no_ring)(struct psp_context *psp, uint32_t val,
+ enum psp_reg_prog_id id);
+};
+
+struct ta_funcs {
+ int (*fn_ta_initialize)(struct psp_context *psp);
+ int (*fn_ta_invoke)(struct psp_context *psp, uint32_t ta_cmd_id);
+ int (*fn_ta_terminate)(struct psp_context *psp);
};
#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
@@ -127,6 +187,7 @@ struct psp_xgmi_node_info {
uint8_t is_sharing_enabled;
enum ta_xgmi_assigned_sdma_engine sdma_engine;
uint8_t num_links;
+ struct xgmi_connected_port_num port_num[TA_XGMI__MAX_PORT_NUM];
};
struct psp_xgmi_topology_info {
@@ -151,9 +212,11 @@ struct ta_mem_context {
struct ta_context {
bool initialized;
uint32_t session_id;
+ uint32_t resp_status;
struct ta_mem_context mem_context;
struct psp_bin_desc bin_desc;
enum psp_gfx_cmd_id ta_load_type;
+ enum ta_type_id ta_type;
};
struct ta_cp_context {
@@ -165,18 +228,20 @@ struct psp_xgmi_context {
struct ta_context context;
struct psp_xgmi_topology_info top_info;
bool supports_extended_data;
+ uint8_t xgmi_ta_caps;
};
struct psp_ras_context {
struct ta_context context;
struct amdgpu_ras *ras;
+ struct mutex mutex;
};
#define MEM_TRAIN_SYSTEM_SIGNATURE 0x54534942
#define GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES 0x1000
#define GDDR6_MEM_TRAINING_OFFSET 0x8000
/*Define the VRAM size that will be encroached by BIST training.*/
-#define GDDR6_MEM_TRAINING_ENCROACHED_SIZE 0x2000000
+#define BIST_MEM_TRAINING_ENCROACHED_SIZE 0x2000000
enum psp_memory_training_init_flag {
PSP_MEM_TRAIN_NOT_SUPPORT = 0x0,
@@ -231,6 +296,7 @@ enum psp_runtime_entry_type {
PSP_RUNTIME_ENTRY_TYPE_MGPU_WAFL = 0x3, /* WAFL runtime data */
PSP_RUNTIME_ENTRY_TYPE_MGPU_XGMI = 0x4, /* XGMI runtime data */
PSP_RUNTIME_ENTRY_TYPE_BOOT_CONFIG = 0x5, /* Boot Config runtime data */
+ PSP_RUNTIME_ENTRY_TYPE_PPTABLE_ERR_STATUS = 0x6, /* SCPM validation data */
};
/* PSP runtime DB header */
@@ -265,19 +331,39 @@ enum psp_runtime_boot_cfg_feature {
BOOT_CFG_FEATURE_TWO_STAGE_DRAM_TRAINING = 0x2,
};
+/* PSP run time DB SCPM authentication defines */
+enum psp_runtime_scpm_authentication {
+ SCPM_DISABLE = 0x0,
+ SCPM_ENABLE = 0x1,
+ SCPM_ENABLE_WITH_SCPM_ERR = 0x2,
+};
+
/* PSP runtime DB boot config entry */
struct psp_runtime_boot_cfg_entry {
uint32_t boot_cfg_bitmask;
uint32_t reserved;
};
-struct psp_context
-{
- struct amdgpu_device *adev;
- struct psp_ring km_ring;
+/* PSP runtime DB SCPM entry */
+struct psp_runtime_scpm_entry {
+ enum psp_runtime_scpm_authentication scpm_status;
+};
+
+#if defined(CONFIG_DEBUG_FS)
+struct spirom_bo {
+ struct amdgpu_bo *bo;
+ uint64_t mc_addr;
+ void *cpu_addr;
+};
+#endif
+
+struct psp_context {
+ struct amdgpu_device *adev;
+ struct psp_ring km_ring;
struct psp_gfx_cmd_resp *cmd;
const struct psp_funcs *funcs;
+ const struct ta_funcs *ta_funcs;
/* firmware buffer */
struct amdgpu_bo *fw_pri_bo;
@@ -295,17 +381,23 @@ struct psp_context
struct psp_bin_desc soc_drv;
struct psp_bin_desc intf_drv;
struct psp_bin_desc dbg_drv;
+ struct psp_bin_desc ras_drv;
+ struct psp_bin_desc ipkeymgr_drv;
+ struct psp_bin_desc spdm_drv;
/* tmr buffer */
struct amdgpu_bo *tmr_bo;
uint64_t tmr_mc_addr;
/* asd firmware */
- const struct firmware *asd_fw;
+ const struct firmware *asd_fw;
/* toc firmware */
const struct firmware *toc_fw;
+ /* cap firmware */
+ const struct firmware *cap_fw;
+
/* fence buffer */
struct amdgpu_bo *fence_buf_bo;
uint64_t fence_buf_mc_addr;
@@ -320,6 +412,8 @@ struct psp_context
atomic_t fence_value;
/* flag to mark whether gfx fw autoload is supported or not */
bool autoload_supported;
+ /* flag to mark whether psp use runtime TMR or boottime TMR */
+ bool boot_time_tmr;
/* flag to mark whether df cstate management centralized to PMFW */
bool pmfw_centralized_cstate_management;
@@ -327,6 +421,10 @@ struct psp_context
const struct firmware *ta_fw;
uint32_t ta_fw_version;
+ uint32_t cap_fw_version;
+ uint32_t cap_feature_version;
+ uint32_t cap_ucode_size;
+
struct ta_context asd_context;
struct psp_xgmi_context xgmi_context;
struct psp_ras_context ras_context;
@@ -338,6 +436,17 @@ struct psp_context
struct psp_memory_training_context mem_train_ctx;
uint32_t boot_cfg_bitmask;
+
+ /* firmware upgrades supported */
+ bool sup_pd_fw_up;
+ bool sup_ifwi_up;
+
+ char *vbflash_tmp_buf;
+ size_t vbflash_image_size;
+ bool vbflash_done;
+#if defined(CONFIG_DEBUG_FS)
+ struct spirom_bo *spirom_dump_trip;
+#endif
};
struct amdgpu_psp_funcs {
@@ -346,7 +455,6 @@ struct amdgpu_psp_funcs {
};
-#define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))
#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
#define psp_ring_stop(psp, type) (psp)->funcs->ring_stop((psp), (type))
#define psp_ring_destroy(psp, type) ((psp)->funcs->ring_destroy((psp), (type)))
@@ -364,6 +472,15 @@ struct amdgpu_psp_funcs {
((psp)->funcs->bootloader_load_intf_drv ? (psp)->funcs->bootloader_load_intf_drv((psp)) : 0)
#define psp_bootloader_load_dbg_drv(psp) \
((psp)->funcs->bootloader_load_dbg_drv ? (psp)->funcs->bootloader_load_dbg_drv((psp)) : 0)
+#define psp_bootloader_load_ras_drv(psp) \
+ ((psp)->funcs->bootloader_load_ras_drv ? \
+ (psp)->funcs->bootloader_load_ras_drv((psp)) : 0)
+#define psp_bootloader_load_ipkeymgr_drv(psp) \
+ ((psp)->funcs->bootloader_load_ipkeymgr_drv ? \
+ (psp)->funcs->bootloader_load_ipkeymgr_drv((psp)) : 0)
+#define psp_bootloader_load_spdm_drv(psp) \
+ ((psp)->funcs->bootloader_load_spdm_drv ? \
+ (psp)->funcs->bootloader_load_spdm_drv((psp)) : 0)
#define psp_bootloader_load_sos(psp) \
((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0)
#define psp_smu_reload_quirk(psp) \
@@ -384,6 +501,29 @@ struct amdgpu_psp_funcs {
((psp)->funcs->read_usbc_pd_fw ? \
(psp)->funcs->read_usbc_pd_fw((psp), fw_ver) : -EINVAL)
+#define psp_update_spirom(psp, fw_pri_mc_addr) \
+ ((psp)->funcs->update_spirom ? \
+ (psp)->funcs->update_spirom((psp), fw_pri_mc_addr) : -EINVAL)
+
+#define psp_dump_spirom(psp, fw_pri_mc_addr) \
+ ((psp)->funcs->dump_spirom ? \
+ (psp)->funcs->dump_spirom((psp), fw_pri_mc_addr) : -EINVAL)
+
+#define psp_vbflash_status(psp) \
+ ((psp)->funcs->vbflash_stat ? \
+ (psp)->funcs->vbflash_stat((psp)) : -EINVAL)
+
+#define psp_fatal_error_recovery_quirk(psp) \
+ ((psp)->funcs->fatal_error_recovery_quirk ? \
+ (psp)->funcs->fatal_error_recovery_quirk((psp)) : 0)
+
+#define psp_is_aux_sos_load_required(psp) \
+ ((psp)->funcs->is_aux_sos_load_required ? (psp)->funcs->is_aux_sos_load_required((psp)) : 0)
+
+#define psp_reg_program_no_ring(psp, val, id) \
+ ((psp)->funcs->reg_program_no_ring ? \
+ (psp)->funcs->reg_program_no_ring((psp), val, id) : -EINVAL)
+
extern const struct amd_ip_funcs psp_ip_funcs;
extern const struct amdgpu_ip_block_version psp_v3_1_ip_block;
@@ -392,13 +532,27 @@ extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
extern const struct amdgpu_ip_block_version psp_v11_0_8_ip_block;
extern const struct amdgpu_ip_block_version psp_v12_0_ip_block;
extern const struct amdgpu_ip_block_version psp_v13_0_ip_block;
+extern const struct amdgpu_ip_block_version psp_v13_0_4_ip_block;
+extern const struct amdgpu_ip_block_version psp_v14_0_ip_block;
+
+int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
+ uint32_t field_val, uint32_t mask, uint32_t flags);
+extern int psp_wait_for_spirom_update(struct psp_context *psp, uint32_t reg_index,
+ uint32_t field_val, uint32_t mask, uint32_t msec_timeout);
-extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
- uint32_t field_val, uint32_t mask, bool check_changed);
+int psp_execute_ip_fw_load(struct psp_context *psp,
+ struct amdgpu_firmware_info *ucode);
int psp_gpu_reset(struct amdgpu_device *adev);
-int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx,
- uint64_t cmd_gpu_addr, int cmd_size);
+
+int psp_ta_init_shared_buf(struct psp_context *psp,
+ struct ta_mem_context *mem_ctx);
+void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx);
+int psp_ta_unload(struct psp_context *psp, struct ta_context *context);
+int psp_ta_load(struct psp_context *psp, struct ta_context *context);
+int psp_ta_invoke(struct psp_context *psp,
+ uint32_t ta_cmd_id,
+ struct ta_context *context);
int psp_xgmi_initialize(struct psp_context *psp, bool set_extended_data, bool load_ta);
int psp_xgmi_terminate(struct psp_context *psp);
@@ -412,12 +566,16 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
int psp_xgmi_set_topology_info(struct psp_context *psp,
int number_devices,
struct psp_xgmi_topology_info *topology);
-
+int psp_ras_initialize(struct psp_context *psp);
int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
int psp_ras_enable_features(struct psp_context *psp,
union ta_ras_cmd_input *info, bool enable);
int psp_ras_trigger_error(struct psp_context *psp,
- struct ta_ras_trigger_error_input *info);
+ struct ta_ras_trigger_error_input *info, uint32_t instance_mask);
+int psp_ras_terminate(struct psp_context *psp);
+int psp_ras_query_address(struct psp_context *psp,
+ struct ta_ras_query_address_input *addr_in,
+ struct ta_ras_query_address_output *addr_out);
int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
@@ -440,12 +598,29 @@ int psp_init_sos_microcode(struct psp_context *psp,
const char *chip_name);
int psp_init_ta_microcode(struct psp_context *psp,
const char *chip_name);
+int psp_init_cap_microcode(struct psp_context *psp,
+ const char *chip_name);
int psp_get_fw_attestation_records_addr(struct psp_context *psp,
uint64_t *output_ptr);
-
+int psp_update_fw_reservation(struct psp_context *psp);
int psp_load_fw_list(struct psp_context *psp,
struct amdgpu_firmware_info **ucode_list, int ucode_count);
void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size);
+int psp_spatial_partition(struct psp_context *psp, int mode);
+int psp_memory_partition(struct psp_context *psp, int mode);
+
int is_psp_fw_valid(struct psp_bin_desc bin);
+
+int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev);
+bool amdgpu_psp_get_ras_capability(struct psp_context *psp);
+
+int psp_config_sq_perfmon(struct psp_context *psp, uint32_t xcp_id,
+ bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable);
+bool amdgpu_psp_tos_reload_needed(struct amdgpu_device *adev);
+int amdgpu_psp_reg_program_no_ring(struct psp_context *psp, uint32_t val,
+ enum psp_reg_prog_id id);
+void amdgpu_psp_debugfs_init(struct amdgpu_device *adev);
+
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
new file mode 100644
index 000000000000..6e8aad91bcd3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
@@ -0,0 +1,392 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_psp_ta.h"
+
+#if defined(CONFIG_DEBUG_FS)
+
+static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf,
+ size_t len, loff_t *off);
+static ssize_t ta_if_unload_debugfs_write(struct file *fp, const char *buf,
+ size_t len, loff_t *off);
+static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf,
+ size_t len, loff_t *off);
+
+static uint32_t get_bin_version(const uint8_t *bin)
+{
+ const struct common_firmware_header *hdr =
+ (const struct common_firmware_header *)bin;
+
+ return hdr->ucode_version;
+}
+
+static int prep_ta_mem_context(struct ta_mem_context *mem_context,
+ uint8_t *shared_buf,
+ uint32_t shared_buf_len)
+{
+ if (mem_context->shared_mem_size < shared_buf_len)
+ return -EINVAL;
+ memset(mem_context->shared_buf, 0, mem_context->shared_mem_size);
+ memcpy((void *)mem_context->shared_buf, shared_buf, shared_buf_len);
+
+ return 0;
+}
+
+static bool is_ta_type_valid(enum ta_type_id ta_type)
+{
+ switch (ta_type) {
+ case TA_TYPE_RAS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static const struct ta_funcs ras_ta_funcs = {
+ .fn_ta_initialize = psp_ras_initialize,
+ .fn_ta_invoke = psp_ras_invoke,
+ .fn_ta_terminate = psp_ras_terminate
+};
+
+static void set_ta_context_funcs(struct psp_context *psp,
+ enum ta_type_id ta_type,
+ struct ta_context **pcontext)
+{
+ switch (ta_type) {
+ case TA_TYPE_RAS:
+ *pcontext = &psp->ras_context.context;
+ psp->ta_funcs = &ras_ta_funcs;
+ break;
+ default:
+ break;
+ }
+}
+
+static const struct file_operations ta_load_debugfs_fops = {
+ .write = ta_if_load_debugfs_write,
+ .llseek = default_llseek,
+ .owner = THIS_MODULE
+};
+
+static const struct file_operations ta_unload_debugfs_fops = {
+ .write = ta_if_unload_debugfs_write,
+ .llseek = default_llseek,
+ .owner = THIS_MODULE
+};
+
+static const struct file_operations ta_invoke_debugfs_fops = {
+ .write = ta_if_invoke_debugfs_write,
+ .llseek = default_llseek,
+ .owner = THIS_MODULE
+};
+
+/*
+ * DOC: AMDGPU TA debugfs interfaces
+ *
+ * Three debugfs interfaces can be opened by a program to
+ * load/invoke/unload TA,
+ *
+ * - /sys/kernel/debug/dri/<N>/ta_if/ta_load
+ * - /sys/kernel/debug/dri/<N>/ta_if/ta_invoke
+ * - /sys/kernel/debug/dri/<N>/ta_if/ta_unload
+ *
+ * How to use the interfaces in a program?
+ *
+ * A program needs to provide transmit buffer to the interfaces
+ * and will receive buffer from the interfaces below,
+ *
+ * - For TA load debugfs interface:
+ * Transmit buffer:
+ * - TA type (4bytes)
+ * - TA bin length (4bytes)
+ * - TA bin
+ * Receive buffer:
+ * - TA ID (4bytes)
+ *
+ * - For TA invoke debugfs interface:
+ * Transmit buffer:
+ * - TA type (4bytes)
+ * - TA ID (4bytes)
+ * - TA CMD ID (4bytes)
+ * - TA shard buf length
+ * (4bytes, value not beyond TA shared memory size)
+ * - TA shared buf
+ * Receive buffer:
+ * - TA shared buf
+ *
+ * - For TA unload debugfs interface:
+ * Transmit buffer:
+ * - TA type (4bytes)
+ * - TA ID (4bytes)
+ */
+
+static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off)
+{
+ uint32_t ta_type = 0;
+ uint32_t ta_bin_len = 0;
+ uint8_t *ta_bin = NULL;
+ uint32_t copy_pos = 0;
+ int ret = 0;
+
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private;
+ struct psp_context *psp = &adev->psp;
+ struct ta_context *context = NULL;
+
+ if (!buf)
+ return -EINVAL;
+
+ ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t));
+ if (ret || (!is_ta_type_valid(ta_type)))
+ return -EFAULT;
+
+ copy_pos += sizeof(uint32_t);
+
+ ret = copy_from_user((void *)&ta_bin_len, &buf[copy_pos], sizeof(uint32_t));
+ if (ret)
+ return -EFAULT;
+
+ if (ta_bin_len > PSP_1_MEG)
+ return -EINVAL;
+
+ copy_pos += sizeof(uint32_t);
+
+ ta_bin = memdup_user(&buf[copy_pos], ta_bin_len);
+ if (IS_ERR(ta_bin))
+ return PTR_ERR(ta_bin);
+
+ /* Set TA context and functions */
+ set_ta_context_funcs(psp, ta_type, &context);
+
+ if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_terminate) {
+ dev_err(adev->dev, "Unsupported function to terminate TA\n");
+ ret = -EOPNOTSUPP;
+ goto err_free_bin;
+ }
+
+ /*
+ * Allocate TA shared buf in case shared buf was freed
+ * due to loading TA failed before.
+ */
+ if (!context->mem_context.shared_buf) {
+ ret = psp_ta_init_shared_buf(psp, &context->mem_context);
+ if (ret) {
+ ret = -ENOMEM;
+ goto err_free_bin;
+ }
+ }
+
+ ret = psp_fn_ta_terminate(psp);
+ if (ret || context->resp_status) {
+ dev_err(adev->dev,
+ "Failed to unload embedded TA (%d) and status (0x%X)\n",
+ ret, context->resp_status);
+ if (!ret)
+ ret = -EINVAL;
+ goto err_free_ta_shared_buf;
+ }
+
+ /* Prepare TA context for TA initialization */
+ context->ta_type = ta_type;
+ context->bin_desc.fw_version = get_bin_version(ta_bin);
+ context->bin_desc.size_bytes = ta_bin_len;
+ context->bin_desc.start_addr = ta_bin;
+
+ if (!psp->ta_funcs->fn_ta_initialize) {
+ dev_err(adev->dev, "Unsupported function to initialize TA\n");
+ ret = -EOPNOTSUPP;
+ goto err_free_ta_shared_buf;
+ }
+
+ ret = psp_fn_ta_initialize(psp);
+ if (ret || context->resp_status) {
+ dev_err(adev->dev, "Failed to load TA via debugfs (%d) and status (0x%X)\n",
+ ret, context->resp_status);
+ if (!ret)
+ ret = -EINVAL;
+ goto err_free_ta_shared_buf;
+ }
+
+ if (copy_to_user((char *)buf, (void *)&context->session_id, sizeof(uint32_t)))
+ ret = -EFAULT;
+
+err_free_ta_shared_buf:
+ /* Only free TA shared buf when returns error code */
+ if (ret && context->mem_context.shared_buf)
+ psp_ta_free_shared_buf(&context->mem_context);
+err_free_bin:
+ kfree(ta_bin);
+
+ return ret;
+}
+
+static ssize_t ta_if_unload_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off)
+{
+ uint32_t ta_type = 0;
+ uint32_t ta_id = 0;
+ uint32_t copy_pos = 0;
+ int ret = 0;
+
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private;
+ struct psp_context *psp = &adev->psp;
+ struct ta_context *context = NULL;
+
+ if (!buf)
+ return -EINVAL;
+
+ ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t));
+ if (ret || (!is_ta_type_valid(ta_type)))
+ return -EFAULT;
+
+ copy_pos += sizeof(uint32_t);
+
+ ret = copy_from_user((void *)&ta_id, &buf[copy_pos], sizeof(uint32_t));
+ if (ret)
+ return -EFAULT;
+
+ set_ta_context_funcs(psp, ta_type, &context);
+ context->session_id = ta_id;
+
+ if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_terminate) {
+ dev_err(adev->dev, "Unsupported function to terminate TA\n");
+ return -EOPNOTSUPP;
+ }
+
+ ret = psp_fn_ta_terminate(psp);
+ if (ret || context->resp_status) {
+ dev_err(adev->dev, "Failed to unload TA via debugfs (%d) and status (0x%X)\n",
+ ret, context->resp_status);
+ if (!ret)
+ ret = -EINVAL;
+ }
+
+ if (context->mem_context.shared_buf)
+ psp_ta_free_shared_buf(&context->mem_context);
+
+ return ret;
+}
+
+static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off)
+{
+ uint32_t ta_type = 0;
+ uint32_t ta_id = 0;
+ uint32_t cmd_id = 0;
+ uint32_t shared_buf_len = 0;
+ uint8_t *shared_buf = NULL;
+ uint32_t copy_pos = 0;
+ int ret = 0;
+
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private;
+ struct psp_context *psp = &adev->psp;
+ struct ta_context *context = NULL;
+
+ if (!buf)
+ return -EINVAL;
+
+ ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t));
+ if (ret)
+ return -EFAULT;
+ copy_pos += sizeof(uint32_t);
+
+ ret = copy_from_user((void *)&ta_id, &buf[copy_pos], sizeof(uint32_t));
+ if (ret)
+ return -EFAULT;
+ copy_pos += sizeof(uint32_t);
+
+ ret = copy_from_user((void *)&cmd_id, &buf[copy_pos], sizeof(uint32_t));
+ if (ret)
+ return -EFAULT;
+ copy_pos += sizeof(uint32_t);
+
+ ret = copy_from_user((void *)&shared_buf_len, &buf[copy_pos], sizeof(uint32_t));
+ if (ret)
+ return -EFAULT;
+ copy_pos += sizeof(uint32_t);
+
+ shared_buf = memdup_user(&buf[copy_pos], shared_buf_len);
+ if (IS_ERR(shared_buf))
+ return PTR_ERR(shared_buf);
+
+ set_ta_context_funcs(psp, ta_type, &context);
+
+ if (!context || !context->initialized) {
+ dev_err(adev->dev, "TA is not initialized\n");
+ ret = -EINVAL;
+ goto err_free_shared_buf;
+ }
+
+ if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_invoke) {
+ dev_err(adev->dev, "Unsupported function to invoke TA\n");
+ ret = -EOPNOTSUPP;
+ goto err_free_shared_buf;
+ }
+
+ context->session_id = ta_id;
+
+ mutex_lock(&psp->ras_context.mutex);
+ ret = prep_ta_mem_context(&context->mem_context, shared_buf, shared_buf_len);
+ if (ret)
+ goto err_free_shared_buf;
+
+ ret = psp_fn_ta_invoke(psp, cmd_id);
+ if (ret || context->resp_status) {
+ dev_err(adev->dev, "Failed to invoke TA via debugfs (%d) and status (0x%X)\n",
+ ret, context->resp_status);
+ if (!ret) {
+ ret = -EINVAL;
+ goto err_free_shared_buf;
+ }
+ }
+
+ if (copy_to_user((char *)&buf[copy_pos], context->mem_context.shared_buf, shared_buf_len))
+ ret = -EFAULT;
+
+err_free_shared_buf:
+ mutex_unlock(&psp->ras_context.mutex);
+ kfree(shared_buf);
+
+ return ret;
+}
+
+void amdgpu_ta_if_debugfs_init(struct amdgpu_device *adev)
+{
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+
+ struct dentry *dir = debugfs_create_dir("ta_if", minor->debugfs_root);
+
+ debugfs_create_file("ta_load", 0200, dir, adev,
+ &ta_load_debugfs_fops);
+
+ debugfs_create_file("ta_unload", 0200, dir,
+ adev, &ta_unload_debugfs_fops);
+
+ debugfs_create_file("ta_invoke", 0200, dir,
+ adev, &ta_invoke_debugfs_fops);
+}
+
+#else
+void amdgpu_ta_if_debugfs_init(struct amdgpu_device *adev)
+{
+
+}
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h
new file mode 100644
index 000000000000..14cd1c81c3e6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_PSP_TA_H__
+#define __AMDGPU_PSP_TA_H__
+
+/* Calling set_ta_context_funcs is required before using the following macros */
+#define psp_fn_ta_initialize(psp) ((psp)->ta_funcs->fn_ta_initialize((psp)))
+#define psp_fn_ta_invoke(psp, ta_cmd_id) ((psp)->ta_funcs->fn_ta_invoke((psp), (ta_cmd_id)))
+#define psp_fn_ta_terminate(psp) ((psp)->ta_funcs->fn_ta_terminate((psp)))
+
+void amdgpu_ta_if_debugfs_init(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c
index 12010c988c8b..bacf888735db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c
@@ -101,7 +101,6 @@ static ssize_t amdgpu_rap_debugfs_write(struct file *f, const char __user *buf,
}
amdgpu_gfx_off_ctrl(adev, true);
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return size;
@@ -116,7 +115,6 @@ static const struct file_operations amdgpu_rap_debugfs_ops = {
void amdgpu_rap_debugfs_init(struct amdgpu_device *adev)
{
-#if defined(CONFIG_DEBUG_FS)
struct drm_minor *minor = adev_to_drm(adev)->primary;
if (!adev->psp.rap_context.context.initialized)
@@ -124,5 +122,4 @@ void amdgpu_rap_debugfs_init(struct amdgpu_device *adev)
debugfs_create_file("rap_test", S_IWUSR, minor->debugfs_root,
adev, &amdgpu_rap_debugfs_ops);
-#endif
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 08133de21fdd..2a6cf7963dde 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -28,13 +28,21 @@
#include <linux/reboot.h>
#include <linux/syscalls.h>
#include <linux/pm_runtime.h>
+#include <linux/list_sort.h>
#include "amdgpu.h"
#include "amdgpu_ras.h"
#include "amdgpu_atomfirmware.h"
#include "amdgpu_xgmi.h"
#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
+#include "nbio_v4_3.h"
+#include "nbif_v6_3_1.h"
+#include "nbio_v7_9.h"
#include "atom.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ras_mgr.h"
+
#ifdef CONFIG_X86_MCE_AMD
#include <asm/mce.h>
@@ -66,6 +74,11 @@ const char *ras_block_string[] = {
"mp1",
"fuse",
"mca",
+ "vcn",
+ "jpeg",
+ "ih",
+ "mpio",
+ "mmsch",
};
const char *ras_mca_block_string[] = {
@@ -75,12 +88,20 @@ const char *ras_mca_block_string[] = {
"mca_iohc",
};
+struct amdgpu_ras_block_list {
+ /* ras block link */
+ struct list_head node;
+
+ struct amdgpu_ras_block_object *ras_obj;
+};
+
const char *get_ras_block_str(struct ras_common_if *ras_block)
{
if (!ras_block)
return "NULL";
- if (ras_block->block >= AMDGPU_RAS_BLOCK_COUNT)
+ if (ras_block->block >= AMDGPU_RAS_BLOCK_COUNT ||
+ ras_block->block >= ARRAY_SIZE(ras_block_string))
return "OUT OF RANGE";
if (ras_block->block == AMDGPU_RAS_BLOCK__MCA)
@@ -89,6 +110,9 @@ const char *get_ras_block_str(struct ras_common_if *ras_block)
return ras_block_string[ras_block->block];
}
+#define ras_block_str(_BLOCK_) \
+ (((_BLOCK_) < ARRAY_SIZE(ras_block_string)) ? ras_block_string[_BLOCK_] : "Out Of Range")
+
#define ras_err_str(i) (ras_error_string[ffs(i)])
#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
@@ -99,6 +123,15 @@ const char *get_ras_block_str(struct ras_common_if *ras_block)
/* typical ECC bad page rate is 1 bad page per 100MB VRAM */
#define RAS_BAD_PAGE_COVER (100 * 1024 * 1024ULL)
+#define MAX_UMC_POISON_POLLING_TIME_ASYNC 10
+
+#define AMDGPU_RAS_RETIRE_PAGE_INTERVAL 100 //ms
+
+#define MAX_FLUSH_RETIRE_DWORK_TIMES 100
+
+#define BYPASS_ALLOCATED_ADDRESS 0x0
+#define BYPASS_INITIALIZATION_ADDRESS 0x1
+
enum amdgpu_ras_retire_page_reservation {
AMDGPU_RAS_RETIRE_PAGE_RESERVED,
AMDGPU_RAS_RETIRE_PAGE_PENDING,
@@ -107,12 +140,18 @@ enum amdgpu_ras_retire_page_reservation {
atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0);
-static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
+static int amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
uint64_t addr);
-static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
+static int amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
uint64_t addr);
+
+static void amdgpu_ras_critical_region_init(struct amdgpu_device *adev);
+static void amdgpu_ras_critical_region_fini(struct amdgpu_device *adev);
+
#ifdef CONFIG_X86_MCE_AMD
static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev);
+static void
+amdgpu_unregister_bad_pages_mca_notifier(struct amdgpu_device *adev);
struct mce_notifier_adev_list {
struct amdgpu_device *devs[MAX_GPU_INSTANCE];
int num_gpu;
@@ -136,40 +175,39 @@ static bool amdgpu_ras_get_error_query_ready(struct amdgpu_device *adev)
static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t address)
{
- struct ras_err_data err_data = {0, 0, 0, NULL};
+ struct ras_err_data err_data;
struct eeprom_table_record err_rec;
+ int ret;
- if ((address >= adev->gmc.mc_vram_size) ||
- (address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
+ ret = amdgpu_ras_check_bad_page(adev, address);
+ if (ret == -EINVAL) {
dev_warn(adev->dev,
- "RAS WARN: input address 0x%llx is invalid.\n",
- address);
+ "RAS WARN: input address 0x%llx is invalid.\n",
+ address);
return -EINVAL;
- }
-
- if (amdgpu_ras_check_bad_page(adev, address)) {
+ } else if (ret == 1) {
dev_warn(adev->dev,
- "RAS WARN: 0x%llx has already been marked as bad page!\n",
- address);
+ "RAS WARN: 0x%llx has already been marked as bad page!\n",
+ address);
return 0;
}
- memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
-
- err_rec.address = address;
- err_rec.retired_page = address >> AMDGPU_GPU_PAGE_SHIFT;
- err_rec.ts = (uint64_t)ktime_get_real_seconds();
- err_rec.err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
+ ret = amdgpu_ras_error_data_init(&err_data);
+ if (ret)
+ return ret;
+ memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
err_data.err_addr = &err_rec;
- err_data.err_addr_cnt = 1;
+ amdgpu_umc_fill_error_record(&err_data, address, address, 0, 0);
if (amdgpu_bad_page_threshold != 0) {
amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
- err_data.err_addr_cnt);
- amdgpu_ras_save_bad_pages(adev);
+ err_data.err_addr_cnt, false);
+ amdgpu_ras_save_bad_pages(adev, NULL);
}
+ amdgpu_ras_error_data_fini(&err_data);
+
dev_warn(adev->dev, "WARNING: THIS IS ONLY FOR TEST PURPOSES AND WILL CORRUPT RAS EEPROM\n");
dev_warn(adev->dev, "Clear EEPROM:\n");
dev_warn(adev->dev, " echo 1 > /sys/kernel/debug/dri/0/ras/ras_eeprom_reset\n");
@@ -177,6 +215,56 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre
return 0;
}
+static int amdgpu_check_address_validity(struct amdgpu_device *adev,
+ uint64_t address, uint64_t flags)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct amdgpu_vram_block_info blk_info;
+ uint64_t page_pfns[32] = {0};
+ int i, ret, count;
+ bool hit = false;
+
+ if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0))
+ return 0;
+
+ if (amdgpu_sriov_vf(adev)) {
+ if (amdgpu_virt_check_vf_critical_region(adev, address, &hit))
+ return -EPERM;
+ return hit ? -EACCES : 0;
+ }
+
+ if ((address >= adev->gmc.mc_vram_size) ||
+ (address >= RAS_UMC_INJECT_ADDR_LIMIT))
+ return -EFAULT;
+
+ count = amdgpu_umc_lookup_bad_pages_in_a_row(adev,
+ address, page_pfns, ARRAY_SIZE(page_pfns));
+ if (count <= 0)
+ return -EPERM;
+
+ for (i = 0; i < count; i++) {
+ memset(&blk_info, 0, sizeof(blk_info));
+ ret = amdgpu_vram_mgr_query_address_block_info(&adev->mman.vram_mgr,
+ page_pfns[i] << AMDGPU_GPU_PAGE_SHIFT, &blk_info);
+ if (!ret) {
+ /* The input address that needs to be checked is allocated by
+ * current calling process, so it is necessary to exclude
+ * the calling process.
+ */
+ if ((flags == BYPASS_ALLOCATED_ADDRESS) &&
+ ((blk_info.task.pid != task_pid_nr(current)) ||
+ strncmp(blk_info.task.comm, current->comm, TASK_COMM_LEN)))
+ return -EACCES;
+ else if ((flags == BYPASS_INITIALIZATION_ADDRESS) &&
+ (blk_info.task.pid == con->init_task_pid) &&
+ !strncmp(blk_info.task.comm, con->init_task_comm, TASK_COMM_LEN))
+ return -EACCES;
+ }
+ }
+
+ return 0;
+}
+
static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@@ -190,6 +278,13 @@ static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,
if (amdgpu_ras_query_error_status(obj->adev, &info))
return -EINVAL;
+ /* Hardware counter will be reset automatically after the query on Vega20 and Arcturus */
+ if (amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 2) &&
+ amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 4)) {
+ if (amdgpu_ras_reset_error_status(obj->adev, info.head.block))
+ dev_warn(obj->adev->dev, "Failed to reset error counter and error status");
+ }
+
s = snprintf(val, sizeof(val), "%s: %lu\n%s: %lu\n",
"ue", info.ue_count,
"ce", info.ce_count);
@@ -239,6 +334,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
int block_id;
uint32_t sub_block;
u64 address, value;
+ /* default value is 0 if the mask is not set by user */
+ u32 instance_mask = 0;
if (*pos)
return -EINVAL;
@@ -258,6 +355,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
op = 2;
else if (strstr(str, "retire_page") != NULL)
op = 3;
+ else if (strstr(str, "check_address") != NULL)
+ op = 4;
else if (str[0] && str[1] && str[2] && str[3])
/* ascii string, but commands are not matched. */
return -EINVAL;
@@ -272,24 +371,39 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
data->inject.address = address;
return 0;
+ } else if (op == 4) {
+ if (sscanf(str, "%*s 0x%llx 0x%llx", &address, &value) != 2 &&
+ sscanf(str, "%*s %llu %llu", &address, &value) != 2)
+ return -EINVAL;
+
+ data->op = op;
+ data->inject.address = address;
+ data->inject.value = value;
+ return 0;
}
if (amdgpu_ras_find_block_id_by_name(block_name, &block_id))
return -EINVAL;
data->head.block = block_id;
- /* only ue and ce errors are supported */
+ /* only ue, ce and poison errors are supported */
if (!memcmp("ue", err, 2))
data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
else if (!memcmp("ce", err, 2))
data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE;
+ else if (!memcmp("poison", err, 6))
+ data->head.type = AMDGPU_RAS_ERROR__POISON;
else
return -EINVAL;
data->op = op;
if (op == 2) {
- if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx",
+ if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx 0x%x",
+ &sub_block, &address, &value, &instance_mask) != 4 &&
+ sscanf(str, "%*s %*s %*s %u %llu %llu %u",
+ &sub_block, &address, &value, &instance_mask) != 4 &&
+ sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx",
&sub_block, &address, &value) != 3 &&
sscanf(str, "%*s %*s %*s %u %llu %llu",
&sub_block, &address, &value) != 3)
@@ -297,6 +411,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
data->head.sub_block_index = sub_block;
data->inject.address = address;
data->inject.value = value;
+ data->inject.instance_mask = instance_mask;
}
} else {
if (size < sizeof(*data))
@@ -309,6 +424,46 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
return 0;
}
+static void amdgpu_ras_instance_mask_check(struct amdgpu_device *adev,
+ struct ras_debug_if *data)
+{
+ int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
+ uint32_t mask, inst_mask = data->inject.instance_mask;
+
+ /* no need to set instance mask if there is only one instance */
+ if (num_xcc <= 1 && inst_mask) {
+ data->inject.instance_mask = 0;
+ dev_dbg(adev->dev,
+ "RAS inject mask(0x%x) isn't supported and force it to 0.\n",
+ inst_mask);
+
+ return;
+ }
+
+ switch (data->head.block) {
+ case AMDGPU_RAS_BLOCK__GFX:
+ mask = GENMASK(num_xcc - 1, 0);
+ break;
+ case AMDGPU_RAS_BLOCK__SDMA:
+ mask = GENMASK(adev->sdma.num_instances - 1, 0);
+ break;
+ case AMDGPU_RAS_BLOCK__VCN:
+ case AMDGPU_RAS_BLOCK__JPEG:
+ mask = GENMASK(adev->vcn.num_vcn_inst - 1, 0);
+ break;
+ default:
+ mask = inst_mask;
+ break;
+ }
+
+ /* remove invalid bits in instance mask */
+ data->inject.instance_mask &= mask;
+ if (inst_mask != data->inject.instance_mask)
+ dev_dbg(adev->dev,
+ "Adjust RAS inject mask 0x%x to 0x%x\n",
+ inst_mask, data->inject.instance_mask);
+}
+
/**
* DOC: AMDGPU RAS debugfs control interface
*
@@ -324,7 +479,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
* sub_block_index: some IPs have subcomponets. say, GFX, sDMA.
* name: the name of IP.
*
- * inject has two more members than head, they are address, value.
+ * inject has three more members than head, they are address, value and mask.
* As their names indicate, inject operation will write the
* value to the address.
*
@@ -348,7 +503,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
*
* echo "disable <block>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
* echo "enable <block> <error>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
- * echo "inject <block> <error> <sub-block> <address> <value> > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
+ * echo "inject <block> <error> <sub-block> <address> <value> <mask>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
*
* Where N, is the card which you want to affect.
*
@@ -359,19 +514,21 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
* The block is one of: umc, sdma, gfx, etc.
* see ras_block_string[] for details
*
- * The error type is one of: ue, ce, where,
+ * The error type is one of: ue, ce and poison where,
* ue is multi-uncorrectable
* ce is single-correctable
+ * poison is poison
*
* The sub-block is a the sub-block index, pass 0 if there is no sub-block.
* The address and value are hexadecimal numbers, leading 0x is optional.
+ * The mask means instance mask, is optional, default value is 0x1.
*
* For instance,
*
* .. code-block:: bash
*
* echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
- * echo inject umc ce 0 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
+ * echo inject umc ce 0 0 0 3 > /sys/kernel/debug/dri/0/ras/ras_ctrl
* echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl
*
* How to check the result of the operation?
@@ -412,6 +569,9 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f,
return size;
else
return ret;
+ } else if (data.op == 4) {
+ ret = amdgpu_check_address_validity(adev, data.inject.address, data.inject.value);
+ return ret ? ret : size;
}
if (!amdgpu_ras_is_supported(adev, data.head.block))
@@ -425,24 +585,21 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f,
ret = amdgpu_ras_feature_enable(adev, &data.head, 1);
break;
case 2:
- if ((data.inject.address >= adev->gmc.mc_vram_size) ||
- (data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
- dev_warn(adev->dev, "RAS WARN: input address "
- "0x%llx is invalid.",
+ /* umc ce/ue error injection for a bad page is not allowed */
+ if (data.head.block == AMDGPU_RAS_BLOCK__UMC)
+ ret = amdgpu_ras_check_bad_page(adev, data.inject.address);
+ if (ret == -EINVAL) {
+ dev_warn(adev->dev, "RAS WARN: input address 0x%llx is invalid.",
data.inject.address);
- ret = -EINVAL;
break;
- }
-
- /* umc ce/ue error injection for a bad page is not allowed */
- if ((data.head.block == AMDGPU_RAS_BLOCK__UMC) &&
- amdgpu_ras_check_bad_page(adev, data.inject.address)) {
- dev_warn(adev->dev, "RAS WARN: inject: 0x%llx has "
- "already been marked as bad!\n",
- data.inject.address);
+ } else if (ret == 1) {
+ dev_warn(adev->dev, "RAS WARN: inject: 0x%llx has already been marked as bad!\n",
+ data.inject.address);
break;
}
+ amdgpu_ras_instance_mask_check(adev, &data);
+
/* data.inject.address is offset instead of absolute gpu address */
ret = amdgpu_ras_error_inject(adev, &data.inject);
break;
@@ -452,11 +609,13 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f,
}
if (ret)
- return -EINVAL;
+ return ret;
return size;
}
+static int amdgpu_uniras_clear_badpages_info(struct amdgpu_device *adev);
+
/**
* DOC: AMDGPU RAS debugfs EEPROM table reset interface
*
@@ -481,6 +640,11 @@ static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f,
(struct amdgpu_device *)file_inode(f)->i_private;
int ret;
+ if (amdgpu_uniras_enabled(adev)) {
+ ret = amdgpu_uniras_clear_badpages_info(adev);
+ return ret ? ret : size;
+ }
+
ret = amdgpu_ras_eeprom_reset_table(
&(amdgpu_ras_get_context(adev)->eeprom_control));
@@ -543,13 +707,18 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
if (amdgpu_ras_query_error_status(obj->adev, &info))
return -EINVAL;
- if (obj->adev->asic_type == CHIP_ALDEBARAN) {
+ if (amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 2) &&
+ amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 4)) {
if (amdgpu_ras_reset_error_status(obj->adev, info.head.block))
- DRM_WARN("Failed to reset error counter and error status");
+ dev_warn(obj->adev->dev, "Failed to reset error counter and error status");
}
- return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count,
- "ce", info.ce_count);
+ if (info.head.block == AMDGPU_RAS_BLOCK__UMC)
+ return sysfs_emit(buf, "%s: %lu\n%s: %lu\n%s: %lu\n", "ue", info.ue_count,
+ "ce", info.ce_count, "de", info.de_count);
+ else
+ return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count,
+ "ce", info.ce_count);
}
/* obj begin */
@@ -559,8 +728,11 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
static inline void put_obj(struct ras_manager *obj)
{
- if (obj && (--obj->use == 0))
+ if (obj && (--obj->use == 0)) {
list_del(&obj->node);
+ amdgpu_ras_error_data_fini(&obj->err_data);
+ }
+
if (obj && (obj->use < 0))
DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", get_ras_block_str(&obj->head));
}
@@ -590,6 +762,9 @@ static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev,
if (alive_obj(obj))
return NULL;
+ if (amdgpu_ras_error_data_init(&obj->err_data))
+ return NULL;
+
obj->head = *head;
obj->adev = adev;
list_add(&obj->node, &con->head);
@@ -700,42 +875,50 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
if (!con)
return -EINVAL;
- info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL);
- if (!info)
- return -ENOMEM;
-
- if (!enable) {
- info->disable_features = (struct ta_ras_disable_features_input) {
- .block_id = amdgpu_ras_block_to_ta(head->block),
- .error_type = amdgpu_ras_error_to_ta(head->type),
- };
- } else {
- info->enable_features = (struct ta_ras_enable_features_input) {
- .block_id = amdgpu_ras_block_to_ta(head->block),
- .error_type = amdgpu_ras_error_to_ta(head->type),
- };
- }
+ /* For non-gfx ip, do not enable ras feature if it is not allowed */
+ /* For gfx ip, regardless of feature support status, */
+ /* Force issue enable or disable ras feature commands */
+ if (head->block != AMDGPU_RAS_BLOCK__GFX &&
+ !amdgpu_ras_is_feature_allowed(adev, head))
+ return 0;
- /* Do not enable if it is not allowed. */
- WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head));
+ /* Only enable gfx ras feature from host side */
+ if (head->block == AMDGPU_RAS_BLOCK__GFX &&
+ !amdgpu_sriov_vf(adev) &&
+ !amdgpu_ras_intr_triggered()) {
+ info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ if (!enable) {
+ info->disable_features = (struct ta_ras_disable_features_input) {
+ .block_id = amdgpu_ras_block_to_ta(head->block),
+ .error_type = amdgpu_ras_error_to_ta(head->type),
+ };
+ } else {
+ info->enable_features = (struct ta_ras_enable_features_input) {
+ .block_id = amdgpu_ras_block_to_ta(head->block),
+ .error_type = amdgpu_ras_error_to_ta(head->type),
+ };
+ }
- if (!amdgpu_ras_intr_triggered()) {
ret = psp_ras_enable_features(&adev->psp, info, enable);
if (ret) {
dev_err(adev->dev, "ras %s %s failed poison:%d ret:%d\n",
enable ? "enable":"disable",
get_ras_block_str(head),
amdgpu_ras_is_poison_mode_supported(adev), ret);
- goto out;
+ kfree(info);
+ return ret;
}
+
+ kfree(info);
}
/* setup the obj */
__amdgpu_ras_feature_enable(adev, head, enable);
- ret = 0;
-out:
- kfree(info);
- return ret;
+
+ return 0;
}
/* Only used in device probe stage and called only once. */
@@ -774,7 +957,7 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
if (ret)
return ret;
- /* gfx block ras dsiable cmd must send to ras-ta */
+ /* gfx block ras disable cmd must send to ras-ta */
if (head->block == AMDGPU_RAS_BLOCK__GFX)
con->features |= BIT(head->block);
@@ -866,220 +1049,626 @@ static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev,
}
/* feature ctl end */
+static int amdgpu_ras_block_match_default(struct amdgpu_ras_block_object *block_obj,
+ enum amdgpu_ras_block block)
+{
+ if (!block_obj)
+ return -EINVAL;
+
+ if (block_obj->ras_comm.block == block)
+ return 0;
+
+ return -EINVAL;
+}
-void amdgpu_ras_mca_query_error_status(struct amdgpu_device *adev,
- struct ras_common_if *ras_block,
- struct ras_err_data *err_data)
+static struct amdgpu_ras_block_object *amdgpu_ras_get_ras_block(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint32_t sub_block_index)
{
- switch (ras_block->sub_block_index) {
- case AMDGPU_RAS_MCA_BLOCK__MP0:
- if (adev->mca.mp0.ras_funcs &&
- adev->mca.mp0.ras_funcs->query_ras_error_count)
- adev->mca.mp0.ras_funcs->query_ras_error_count(adev, &err_data);
- break;
- case AMDGPU_RAS_MCA_BLOCK__MP1:
- if (adev->mca.mp1.ras_funcs &&
- adev->mca.mp1.ras_funcs->query_ras_error_count)
- adev->mca.mp1.ras_funcs->query_ras_error_count(adev, &err_data);
- break;
- case AMDGPU_RAS_MCA_BLOCK__MPIO:
- if (adev->mca.mpio.ras_funcs &&
- adev->mca.mpio.ras_funcs->query_ras_error_count)
- adev->mca.mpio.ras_funcs->query_ras_error_count(adev, &err_data);
- break;
- default:
- break;
+ struct amdgpu_ras_block_list *node, *tmp;
+ struct amdgpu_ras_block_object *obj;
+
+ if (block >= AMDGPU_RAS_BLOCK__LAST)
+ return NULL;
+
+ list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
+ if (!node->ras_obj) {
+ dev_warn(adev->dev, "Warning: abnormal ras list node.\n");
+ continue;
+ }
+
+ obj = node->ras_obj;
+ if (obj->ras_block_match) {
+ if (obj->ras_block_match(obj, block, sub_block_index) == 0)
+ return obj;
+ } else {
+ if (amdgpu_ras_block_match_default(obj, block) == 0)
+ return obj;
+ }
}
+
+ return NULL;
}
-/* query/inject/cure begin */
-int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
- struct ras_query_if *info)
+static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_data *err_data)
{
- struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
- struct ras_err_data err_data = {0, 0, 0, NULL};
- int i;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ int ret = 0;
- if (!obj)
- return -EINVAL;
+ /*
+ * choosing right query method according to
+ * whether smu support query error information
+ */
+ ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(ras->umc_ecc));
+ if (ret == -EOPNOTSUPP) {
+ if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_count)
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
- switch (info->head.block) {
- case AMDGPU_RAS_BLOCK__UMC:
- if (adev->umc.ras_funcs &&
- adev->umc.ras_funcs->query_ras_error_count)
- adev->umc.ras_funcs->query_ras_error_count(adev, &err_data);
/* umc query_ras_error_address is also responsible for clearing
* error status
*/
- if (adev->umc.ras_funcs &&
- adev->umc.ras_funcs->query_ras_error_address)
- adev->umc.ras_funcs->query_ras_error_address(adev, &err_data);
- break;
- case AMDGPU_RAS_BLOCK__SDMA:
- if (adev->sdma.funcs->query_ras_error_count) {
- for (i = 0; i < adev->sdma.num_instances; i++)
- adev->sdma.funcs->query_ras_error_count(adev, i,
- &err_data);
+ if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_address)
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, err_data);
+ } else if (!ret) {
+ if (adev->umc.ras &&
+ adev->umc.ras->ecc_info_query_ras_error_count)
+ adev->umc.ras->ecc_info_query_ras_error_count(adev, err_data);
+
+ if (adev->umc.ras &&
+ adev->umc.ras->ecc_info_query_ras_error_address)
+ adev->umc.ras->ecc_info_query_ras_error_address(adev, err_data);
+ }
+}
+
+static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev,
+ struct ras_manager *ras_mgr,
+ struct ras_err_data *err_data,
+ struct ras_query_context *qctx,
+ const char *blk_name,
+ bool is_ue,
+ bool is_de)
+{
+ struct amdgpu_smuio_mcm_config_info *mcm_info;
+ struct ras_err_node *err_node;
+ struct ras_err_info *err_info;
+ u64 event_id = qctx->evid.event_id;
+
+ if (is_ue) {
+ for_each_ras_error(err_node, err_data) {
+ err_info = &err_node->err_info;
+ mcm_info = &err_info->mcm_info;
+ if (err_info->ue_count) {
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+ "%lld new uncorrectable hardware errors detected in %s block\n",
+ mcm_info->socket_id,
+ mcm_info->die_id,
+ err_info->ue_count,
+ blk_name);
+ }
}
- break;
- case AMDGPU_RAS_BLOCK__GFX:
- if (adev->gfx.ras_funcs &&
- adev->gfx.ras_funcs->query_ras_error_count)
- adev->gfx.ras_funcs->query_ras_error_count(adev, &err_data);
- if (adev->gfx.ras_funcs &&
- adev->gfx.ras_funcs->query_ras_error_status)
- adev->gfx.ras_funcs->query_ras_error_status(adev);
- break;
- case AMDGPU_RAS_BLOCK__MMHUB:
- if (adev->mmhub.ras_funcs &&
- adev->mmhub.ras_funcs->query_ras_error_count)
- adev->mmhub.ras_funcs->query_ras_error_count(adev, &err_data);
-
- if (adev->mmhub.ras_funcs &&
- adev->mmhub.ras_funcs->query_ras_error_status)
- adev->mmhub.ras_funcs->query_ras_error_status(adev);
- break;
- case AMDGPU_RAS_BLOCK__PCIE_BIF:
- if (adev->nbio.ras_funcs &&
- adev->nbio.ras_funcs->query_ras_error_count)
- adev->nbio.ras_funcs->query_ras_error_count(adev, &err_data);
- break;
- case AMDGPU_RAS_BLOCK__XGMI_WAFL:
- if (adev->gmc.xgmi.ras_funcs &&
- adev->gmc.xgmi.ras_funcs->query_ras_error_count)
- adev->gmc.xgmi.ras_funcs->query_ras_error_count(adev, &err_data);
- break;
- case AMDGPU_RAS_BLOCK__HDP:
- if (adev->hdp.ras_funcs &&
- adev->hdp.ras_funcs->query_ras_error_count)
- adev->hdp.ras_funcs->query_ras_error_count(adev, &err_data);
- break;
- case AMDGPU_RAS_BLOCK__MCA:
- amdgpu_ras_mca_query_error_status(adev, &info->head, &err_data);
- break;
- default:
- break;
+ for_each_ras_error(err_node, &ras_mgr->err_data) {
+ err_info = &err_node->err_info;
+ mcm_info = &err_info->mcm_info;
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+ "%lld uncorrectable hardware errors detected in total in %s block\n",
+ mcm_info->socket_id, mcm_info->die_id, err_info->ue_count, blk_name);
+ }
+
+ } else {
+ if (is_de) {
+ for_each_ras_error(err_node, err_data) {
+ err_info = &err_node->err_info;
+ mcm_info = &err_info->mcm_info;
+ if (err_info->de_count) {
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+ "%lld new deferred hardware errors detected in %s block\n",
+ mcm_info->socket_id,
+ mcm_info->die_id,
+ err_info->de_count,
+ blk_name);
+ }
+ }
+
+ for_each_ras_error(err_node, &ras_mgr->err_data) {
+ err_info = &err_node->err_info;
+ mcm_info = &err_info->mcm_info;
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+ "%lld deferred hardware errors detected in total in %s block\n",
+ mcm_info->socket_id, mcm_info->die_id,
+ err_info->de_count, blk_name);
+ }
+ } else {
+ if (adev->debug_disable_ce_logs)
+ return;
+
+ for_each_ras_error(err_node, err_data) {
+ err_info = &err_node->err_info;
+ mcm_info = &err_info->mcm_info;
+ if (err_info->ce_count) {
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+ "%lld new correctable hardware errors detected in %s block\n",
+ mcm_info->socket_id,
+ mcm_info->die_id,
+ err_info->ce_count,
+ blk_name);
+ }
+ }
+
+ for_each_ras_error(err_node, &ras_mgr->err_data) {
+ err_info = &err_node->err_info;
+ mcm_info = &err_info->mcm_info;
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+ "%lld correctable hardware errors detected in total in %s block\n",
+ mcm_info->socket_id, mcm_info->die_id,
+ err_info->ce_count, blk_name);
+ }
+ }
}
+}
- obj->err_data.ue_count += err_data.ue_count;
- obj->err_data.ce_count += err_data.ce_count;
+static inline bool err_data_has_source_info(struct ras_err_data *data)
+{
+ return !list_empty(&data->err_node_list);
+}
- info->ue_count = obj->err_data.ue_count;
- info->ce_count = obj->err_data.ce_count;
+static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev,
+ struct ras_query_if *query_if,
+ struct ras_err_data *err_data,
+ struct ras_query_context *qctx)
+{
+ struct ras_manager *ras_mgr = amdgpu_ras_find_obj(adev, &query_if->head);
+ const char *blk_name = get_ras_block_str(&query_if->head);
+ u64 event_id = qctx->evid.event_id;
+
+ if (err_data->ce_count) {
+ if (err_data_has_source_info(err_data)) {
+ amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx,
+ blk_name, false, false);
+ } else if (!adev->aid_mask &&
+ adev->smuio.funcs &&
+ adev->smuio.funcs->get_socket_id &&
+ adev->smuio.funcs->get_die_id) {
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d "
+ "%ld correctable hardware errors "
+ "detected in %s block\n",
+ adev->smuio.funcs->get_socket_id(adev),
+ adev->smuio.funcs->get_die_id(adev),
+ ras_mgr->err_data.ce_count,
+ blk_name);
+ } else {
+ RAS_EVENT_LOG(adev, event_id, "%ld correctable hardware errors "
+ "detected in %s block\n",
+ ras_mgr->err_data.ce_count,
+ blk_name);
+ }
+ }
- if (err_data.ce_count) {
- if (adev->smuio.funcs &&
- adev->smuio.funcs->get_socket_id &&
- adev->smuio.funcs->get_die_id) {
- dev_info(adev->dev, "socket: %d, die: %d "
- "%ld correctable hardware errors "
- "detected in %s block, no user "
- "action is needed.\n",
- adev->smuio.funcs->get_socket_id(adev),
- adev->smuio.funcs->get_die_id(adev),
- obj->err_data.ce_count,
- get_ras_block_str(&info->head));
+ if (err_data->ue_count) {
+ if (err_data_has_source_info(err_data)) {
+ amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx,
+ blk_name, true, false);
+ } else if (!adev->aid_mask &&
+ adev->smuio.funcs &&
+ adev->smuio.funcs->get_socket_id &&
+ adev->smuio.funcs->get_die_id) {
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d "
+ "%ld uncorrectable hardware errors "
+ "detected in %s block\n",
+ adev->smuio.funcs->get_socket_id(adev),
+ adev->smuio.funcs->get_die_id(adev),
+ ras_mgr->err_data.ue_count,
+ blk_name);
} else {
- dev_info(adev->dev, "%ld correctable hardware errors "
- "detected in %s block, no user "
- "action is needed.\n",
- obj->err_data.ce_count,
- get_ras_block_str(&info->head));
- }
- }
- if (err_data.ue_count) {
- if (adev->smuio.funcs &&
- adev->smuio.funcs->get_socket_id &&
- adev->smuio.funcs->get_die_id) {
- dev_info(adev->dev, "socket: %d, die: %d "
- "%ld uncorrectable hardware errors "
- "detected in %s block\n",
- adev->smuio.funcs->get_socket_id(adev),
- adev->smuio.funcs->get_die_id(adev),
- obj->err_data.ue_count,
- get_ras_block_str(&info->head));
+ RAS_EVENT_LOG(adev, event_id, "%ld uncorrectable hardware errors "
+ "detected in %s block\n",
+ ras_mgr->err_data.ue_count,
+ blk_name);
+ }
+ }
+
+ if (err_data->de_count) {
+ if (err_data_has_source_info(err_data)) {
+ amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx,
+ blk_name, false, true);
+ } else if (!adev->aid_mask &&
+ adev->smuio.funcs &&
+ adev->smuio.funcs->get_socket_id &&
+ adev->smuio.funcs->get_die_id) {
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d "
+ "%ld deferred hardware errors "
+ "detected in %s block\n",
+ adev->smuio.funcs->get_socket_id(adev),
+ adev->smuio.funcs->get_die_id(adev),
+ ras_mgr->err_data.de_count,
+ blk_name);
} else {
- dev_info(adev->dev, "%ld uncorrectable hardware errors "
- "detected in %s block\n",
- obj->err_data.ue_count,
- get_ras_block_str(&info->head));
+ RAS_EVENT_LOG(adev, event_id, "%ld deferred hardware errors "
+ "detected in %s block\n",
+ ras_mgr->err_data.de_count,
+ blk_name);
}
}
+}
- if (!amdgpu_persistent_edc_harvesting_supported(adev))
- amdgpu_ras_reset_error_status(adev, info->head.block);
+static void amdgpu_ras_virt_error_generate_report(struct amdgpu_device *adev,
+ struct ras_query_if *query_if,
+ struct ras_err_data *err_data,
+ struct ras_query_context *qctx)
+{
+ unsigned long new_ue, new_ce, new_de;
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, &query_if->head);
+ const char *blk_name = get_ras_block_str(&query_if->head);
+ u64 event_id = qctx->evid.event_id;
+
+ new_ce = err_data->ce_count - obj->err_data.ce_count;
+ new_ue = err_data->ue_count - obj->err_data.ue_count;
+ new_de = err_data->de_count - obj->err_data.de_count;
+
+ if (new_ce) {
+ RAS_EVENT_LOG(adev, event_id, "%lu correctable hardware errors "
+ "detected in %s block\n",
+ new_ce,
+ blk_name);
+ }
+
+ if (new_ue) {
+ RAS_EVENT_LOG(adev, event_id, "%lu uncorrectable hardware errors "
+ "detected in %s block\n",
+ new_ue,
+ blk_name);
+ }
+
+ if (new_de) {
+ RAS_EVENT_LOG(adev, event_id, "%lu deferred hardware errors "
+ "detected in %s block\n",
+ new_de,
+ blk_name);
+ }
+}
+
+static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, struct ras_err_data *err_data)
+{
+ struct ras_err_node *err_node;
+ struct ras_err_info *err_info;
+
+ if (err_data_has_source_info(err_data)) {
+ for_each_ras_error(err_node, err_data) {
+ err_info = &err_node->err_info;
+ amdgpu_ras_error_statistic_de_count(&obj->err_data,
+ &err_info->mcm_info, err_info->de_count);
+ amdgpu_ras_error_statistic_ce_count(&obj->err_data,
+ &err_info->mcm_info, err_info->ce_count);
+ amdgpu_ras_error_statistic_ue_count(&obj->err_data,
+ &err_info->mcm_info, err_info->ue_count);
+ }
+ } else {
+ /* for legacy asic path which doesn't has error source info */
+ obj->err_data.ue_count += err_data->ue_count;
+ obj->err_data.ce_count += err_data->ce_count;
+ obj->err_data.de_count += err_data->de_count;
+ }
+}
+
+static void amdgpu_ras_mgr_virt_error_data_statistics_update(struct ras_manager *obj,
+ struct ras_err_data *err_data)
+{
+ /* Host reports absolute counts */
+ obj->err_data.ue_count = err_data->ue_count;
+ obj->err_data.ce_count = err_data->ce_count;
+ obj->err_data.de_count = err_data->de_count;
+}
+
+static struct ras_manager *get_ras_manager(struct amdgpu_device *adev, enum amdgpu_ras_block blk)
+{
+ struct ras_common_if head;
+
+ memset(&head, 0, sizeof(head));
+ head.block = blk;
+
+ return amdgpu_ras_find_obj(adev, &head);
+}
+
+int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+ const struct aca_info *aca_info, void *data)
+{
+ struct ras_manager *obj;
+
+ /* in resume phase, no need to create aca fs node */
+ if (adev->in_suspend || amdgpu_reset_in_recovery(adev))
+ return 0;
+
+ obj = get_ras_manager(adev, blk);
+ if (!obj)
+ return -EINVAL;
+
+ return amdgpu_aca_add_handle(adev, &obj->aca_handle, ras_block_str(blk), aca_info, data);
+}
+
+int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk)
+{
+ struct ras_manager *obj;
+
+ obj = get_ras_manager(adev, blk);
+ if (!obj)
+ return -EINVAL;
+
+ amdgpu_aca_remove_handle(&obj->aca_handle);
return 0;
}
-int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
- enum amdgpu_ras_block block)
+static int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+ enum aca_error_type type, struct ras_err_data *err_data,
+ struct ras_query_context *qctx)
+{
+ struct ras_manager *obj;
+
+ obj = get_ras_manager(adev, blk);
+ if (!obj)
+ return -EINVAL;
+
+ return amdgpu_aca_get_error_data(adev, &obj->aca_handle, type, err_data, qctx);
+}
+
+ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr,
+ struct aca_handle *handle, char *buf, void *data)
{
- if (!amdgpu_ras_is_supported(adev, block))
+ struct ras_manager *obj = container_of(handle, struct ras_manager, aca_handle);
+ struct ras_query_if info = {
+ .head = obj->head,
+ };
+
+ if (!amdgpu_ras_get_error_query_ready(obj->adev))
+ return sysfs_emit(buf, "Query currently inaccessible\n");
+
+ if (amdgpu_ras_query_error_status(obj->adev, &info))
return -EINVAL;
- switch (block) {
- case AMDGPU_RAS_BLOCK__GFX:
- if (adev->gfx.ras_funcs &&
- adev->gfx.ras_funcs->reset_ras_error_count)
- adev->gfx.ras_funcs->reset_ras_error_count(adev);
+ return sysfs_emit(buf, "%s: %lu\n%s: %lu\n%s: %lu\n", "ue", info.ue_count,
+ "ce", info.ce_count, "de", info.de_count);
+}
- if (adev->gfx.ras_funcs &&
- adev->gfx.ras_funcs->reset_ras_error_status)
- adev->gfx.ras_funcs->reset_ras_error_status(adev);
- break;
- case AMDGPU_RAS_BLOCK__MMHUB:
- if (adev->mmhub.ras_funcs &&
- adev->mmhub.ras_funcs->reset_ras_error_count)
- adev->mmhub.ras_funcs->reset_ras_error_count(adev);
-
- if (adev->mmhub.ras_funcs &&
- adev->mmhub.ras_funcs->reset_ras_error_status)
- adev->mmhub.ras_funcs->reset_ras_error_status(adev);
- break;
- case AMDGPU_RAS_BLOCK__SDMA:
- if (adev->sdma.funcs->reset_ras_error_count)
- adev->sdma.funcs->reset_ras_error_count(adev);
- break;
- case AMDGPU_RAS_BLOCK__HDP:
- if (adev->hdp.ras_funcs &&
- adev->hdp.ras_funcs->reset_ras_error_count)
- adev->hdp.ras_funcs->reset_ras_error_count(adev);
- break;
- default:
- break;
+static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev,
+ struct ras_query_if *info,
+ struct ras_err_data *err_data,
+ struct ras_query_context *qctx,
+ unsigned int error_query_mode)
+{
+ enum amdgpu_ras_block blk = info ? info->head.block : AMDGPU_RAS_BLOCK_COUNT;
+ struct amdgpu_ras_block_object *block_obj = NULL;
+ int ret;
+
+ if (blk == AMDGPU_RAS_BLOCK_COUNT)
+ return -EINVAL;
+
+ if (error_query_mode == AMDGPU_RAS_INVALID_ERROR_QUERY)
+ return -EINVAL;
+
+ if (error_query_mode == AMDGPU_RAS_VIRT_ERROR_COUNT_QUERY) {
+ return amdgpu_virt_req_ras_err_count(adev, blk, err_data);
+ } else if (error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) {
+ if (info->head.block == AMDGPU_RAS_BLOCK__UMC) {
+ amdgpu_ras_get_ecc_info(adev, err_data);
+ } else {
+ block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, 0);
+ if (!block_obj || !block_obj->hw_ops) {
+ dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
+ get_ras_block_str(&info->head));
+ return -EINVAL;
+ }
+
+ if (block_obj->hw_ops->query_ras_error_count)
+ block_obj->hw_ops->query_ras_error_count(adev, err_data);
+
+ if ((info->head.block == AMDGPU_RAS_BLOCK__SDMA) ||
+ (info->head.block == AMDGPU_RAS_BLOCK__GFX) ||
+ (info->head.block == AMDGPU_RAS_BLOCK__MMHUB)) {
+ if (block_obj->hw_ops->query_ras_error_status)
+ block_obj->hw_ops->query_ras_error_status(adev);
+ }
+ }
+ } else {
+ if (amdgpu_aca_is_enabled(adev)) {
+ ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_UE, err_data, qctx);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_CE, err_data, qctx);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_DEFERRED, err_data, qctx);
+ if (ret)
+ return ret;
+ } else {
+ /* FIXME: add code to check return value later */
+ amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_UE, err_data, qctx);
+ amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_CE, err_data, qctx);
+ }
}
return 0;
}
-/* Trigger XGMI/WAFL error */
-static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
- struct ta_ras_trigger_error_input *block_info)
+/* query/inject/cure begin */
+static int amdgpu_ras_query_error_status_with_event(struct amdgpu_device *adev,
+ struct ras_query_if *info,
+ enum ras_event_type type)
{
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
+ struct ras_err_data err_data;
+ struct ras_query_context qctx;
+ unsigned int error_query_mode;
int ret;
- if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
- dev_warn(adev->dev, "Failed to disallow df cstate");
+ if (!obj)
+ return -EINVAL;
+
+ ret = amdgpu_ras_error_data_init(&err_data);
+ if (ret)
+ return ret;
+
+ if (!amdgpu_ras_get_error_query_mode(adev, &error_query_mode))
+ return -EINVAL;
+
+ memset(&qctx, 0, sizeof(qctx));
+ qctx.evid.type = type;
+ qctx.evid.event_id = amdgpu_ras_acquire_event_id(adev, type);
- if (amdgpu_dpm_allow_xgmi_power_down(adev, false))
- dev_warn(adev->dev, "Failed to disallow XGMI power down");
+ if (!down_read_trylock(&adev->reset_domain->sem)) {
+ ret = -EIO;
+ goto out_fini_err_data;
+ }
- ret = psp_ras_trigger_error(&adev->psp, block_info);
+ ret = amdgpu_ras_query_error_status_helper(adev, info,
+ &err_data,
+ &qctx,
+ error_query_mode);
+ up_read(&adev->reset_domain->sem);
+ if (ret)
+ goto out_fini_err_data;
- if (amdgpu_ras_intr_triggered())
+ if (error_query_mode != AMDGPU_RAS_VIRT_ERROR_COUNT_QUERY) {
+ amdgpu_rasmgr_error_data_statistic_update(obj, &err_data);
+ amdgpu_ras_error_generate_report(adev, info, &err_data, &qctx);
+ } else {
+ /* Host provides absolute error counts. First generate the report
+ * using the previous VF internal count against new host count.
+ * Then Update VF internal count.
+ */
+ amdgpu_ras_virt_error_generate_report(adev, info, &err_data, &qctx);
+ amdgpu_ras_mgr_virt_error_data_statistics_update(obj, &err_data);
+ }
+
+ info->ue_count = obj->err_data.ue_count;
+ info->ce_count = obj->err_data.ce_count;
+ info->de_count = obj->err_data.de_count;
+
+out_fini_err_data:
+ amdgpu_ras_error_data_fini(&err_data);
+
+ return ret;
+}
+
+static int amdgpu_uniras_clear_badpages_info(struct amdgpu_device *adev)
+{
+ struct ras_cmd_dev_handle req = {0};
+ int ret;
+
+ ret = amdgpu_ras_mgr_handle_ras_cmd(adev, RAS_CMD__CLEAR_BAD_PAGE_INFO,
+ &req, sizeof(req), NULL, 0);
+ if (ret) {
+ dev_err(adev->dev, "Failed to clear bad pages info, ret: %d\n", ret);
return ret;
+ }
+
+ return 0;
+}
+
+static int amdgpu_uniras_query_block_ecc(struct amdgpu_device *adev,
+ struct ras_query_if *info)
+{
+ struct ras_cmd_block_ecc_info_req req = {0};
+ struct ras_cmd_block_ecc_info_rsp rsp = {0};
+ int ret;
+
+ if (!info)
+ return -EINVAL;
- if (amdgpu_dpm_allow_xgmi_power_down(adev, true))
- dev_warn(adev->dev, "Failed to allow XGMI power down");
+ req.block_id = info->head.block;
+ req.subblock_id = info->head.sub_block_index;
- if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
- dev_warn(adev->dev, "Failed to allow df cstate");
+ ret = amdgpu_ras_mgr_handle_ras_cmd(adev, RAS_CMD__GET_BLOCK_ECC_STATUS,
+ &req, sizeof(req), &rsp, sizeof(rsp));
+ if (!ret) {
+ info->ce_count = rsp.ce_count;
+ info->ue_count = rsp.ue_count;
+ info->de_count = rsp.de_count;
+ }
return ret;
}
+int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_if *info)
+{
+ if (amdgpu_uniras_enabled(adev))
+ return amdgpu_uniras_query_block_ecc(adev, info);
+ else
+ return amdgpu_ras_query_error_status_with_event(adev, info, RAS_EVENT_TYPE_INVALID);
+}
+
+int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block)
+{
+ struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0);
+ const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+ const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs;
+
+ if (!block_obj || !block_obj->hw_ops) {
+ dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
+ ras_block_str(block));
+ return -EOPNOTSUPP;
+ }
+
+ if (!amdgpu_ras_is_supported(adev, block) ||
+ !amdgpu_ras_get_aca_debug_mode(adev))
+ return -EOPNOTSUPP;
+
+ if (amdgpu_sriov_vf(adev))
+ return -EOPNOTSUPP;
+
+ /* skip ras error reset in gpu reset */
+ if ((amdgpu_in_reset(adev) || amdgpu_ras_in_recovery(adev)) &&
+ ((smu_funcs && smu_funcs->set_debug_mode) ||
+ (mca_funcs && mca_funcs->mca_set_debug_mode)))
+ return -EOPNOTSUPP;
+
+ if (block_obj->hw_ops->reset_ras_error_count)
+ block_obj->hw_ops->reset_ras_error_count(adev);
+
+ return 0;
+}
+
+int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block)
+{
+ struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0);
+
+ if (amdgpu_ras_reset_error_count(adev, block) == -EOPNOTSUPP)
+ return 0;
+
+ if ((block == AMDGPU_RAS_BLOCK__GFX) ||
+ (block == AMDGPU_RAS_BLOCK__MMHUB)) {
+ if (block_obj->hw_ops->reset_ras_error_status)
+ block_obj->hw_ops->reset_ras_error_status(adev);
+ }
+
+ return 0;
+}
+
+static int amdgpu_uniras_error_inject(struct amdgpu_device *adev,
+ struct ras_inject_if *info)
+{
+ struct ras_cmd_inject_error_req inject_req;
+ struct ras_cmd_inject_error_rsp rsp;
+
+ if (!info)
+ return -EINVAL;
+
+ memset(&inject_req, 0, sizeof(inject_req));
+ inject_req.block_id = info->head.block;
+ inject_req.subblock_id = info->head.sub_block_index;
+ inject_req.address = info->address;
+ inject_req.error_type = info->head.type;
+ inject_req.instance_mask = info->instance_mask;
+ inject_req.method = info->value;
+
+ return amdgpu_ras_mgr_handle_ras_cmd(adev, RAS_CMD__INJECT_ERROR,
+ &inject_req, sizeof(inject_req), &rsp, sizeof(rsp));
+}
+
/* wrapper of psp_ras_trigger_error */
int amdgpu_ras_error_inject(struct amdgpu_device *adev,
struct ras_inject_if *info)
@@ -1092,40 +1681,44 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
.address = info->address,
.value = info->value,
};
- int ret = 0;
+ int ret = -EINVAL;
+ struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev,
+ info->head.block,
+ info->head.sub_block_index);
+
+ if (amdgpu_uniras_enabled(adev))
+ return amdgpu_uniras_error_inject(adev, info);
+
+ /* inject on guest isn't allowed, return success directly */
+ if (amdgpu_sriov_vf(adev))
+ return 0;
if (!obj)
return -EINVAL;
+ if (!block_obj || !block_obj->hw_ops) {
+ dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
+ get_ras_block_str(&info->head));
+ return -EINVAL;
+ }
+
/* Calculate XGMI relative offset */
- if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ if (adev->gmc.xgmi.num_physical_nodes > 1 &&
+ info->head.block != AMDGPU_RAS_BLOCK__GFX) {
block_info.address =
amdgpu_xgmi_get_relative_phy_addr(adev,
block_info.address);
}
- switch (info->head.block) {
- case AMDGPU_RAS_BLOCK__GFX:
- if (adev->gfx.ras_funcs &&
- adev->gfx.ras_funcs->ras_error_inject)
- ret = adev->gfx.ras_funcs->ras_error_inject(adev, info);
- else
- ret = -EINVAL;
- break;
- case AMDGPU_RAS_BLOCK__UMC:
- case AMDGPU_RAS_BLOCK__SDMA:
- case AMDGPU_RAS_BLOCK__MMHUB:
- case AMDGPU_RAS_BLOCK__PCIE_BIF:
- case AMDGPU_RAS_BLOCK__MCA:
- ret = psp_ras_trigger_error(&adev->psp, &block_info);
- break;
- case AMDGPU_RAS_BLOCK__XGMI_WAFL:
- ret = amdgpu_ras_error_inject_xgmi(adev, &block_info);
- break;
- default:
- dev_info(adev->dev, "%s error injection is not supported yet\n",
- get_ras_block_str(&info->head));
- ret = -EINVAL;
+ if (block_obj->hw_ops->ras_error_inject) {
+ if (info->head.block == AMDGPU_RAS_BLOCK__GFX)
+ ret = block_obj->hw_ops->ras_error_inject(adev, info, info->instance_mask);
+ else /* Special ras_error_inject is defined (e.g: xgmi) */
+ ret = block_obj->hw_ops->ras_error_inject(adev, &block_info,
+ info->instance_mask);
+ } else {
+ /* default path */
+ ret = psp_ras_trigger_error(&adev->psp, &block_info, info->instance_mask);
}
if (ret)
@@ -1136,11 +1729,54 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
}
/**
- * amdgpu_ras_query_error_count -- Get error counts of all IPs
- * adev: pointer to AMD GPU device
- * ce_count: pointer to an integer to be set to the count of correctible errors.
- * ue_count: pointer to an integer to be set to the count of uncorrectible
+ * amdgpu_ras_query_error_count_helper -- Get error counter for specific IP
+ * @adev: pointer to AMD GPU device
+ * @ce_count: pointer to an integer to be set to the count of correctible errors.
+ * @ue_count: pointer to an integer to be set to the count of uncorrectible errors.
+ * @query_info: pointer to ras_query_if
+ *
+ * Return 0 for query success or do nothing, otherwise return an error
+ * on failures
+ */
+static int amdgpu_ras_query_error_count_helper(struct amdgpu_device *adev,
+ unsigned long *ce_count,
+ unsigned long *ue_count,
+ struct ras_query_if *query_info)
+{
+ int ret;
+
+ if (!query_info)
+ /* do nothing if query_info is not specified */
+ return 0;
+
+ ret = amdgpu_ras_query_error_status(adev, query_info);
+ if (ret)
+ return ret;
+
+ *ce_count += query_info->ce_count;
+ *ue_count += query_info->ue_count;
+
+ /* some hardware/IP supports read to clear
+ * no need to explictly reset the err status after the query call */
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 2) &&
+ amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 4)) {
+ if (amdgpu_ras_reset_error_status(adev, query_info->head.block))
+ dev_warn(adev->dev,
+ "Failed to reset error counter and error status\n");
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_ras_query_error_count -- Get error counts of all IPs or specific IP
+ * @adev: pointer to AMD GPU device
+ * @ce_count: pointer to an integer to be set to the count of correctible errors.
+ * @ue_count: pointer to an integer to be set to the count of uncorrectible
* errors.
+ * @query_info: pointer to ras_query_if if the query request is only for
+ * specific ip block; if info is NULL, then the qurey request is for
+ * all the ip blocks that support query ras error counters/status
*
* If set, @ce_count or @ue_count, count and return the corresponding
* error counts in those integer pointers. Return 0 if the device
@@ -1148,11 +1784,13 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
*/
int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
unsigned long *ce_count,
- unsigned long *ue_count)
+ unsigned long *ue_count,
+ struct ras_query_if *query_info)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj;
unsigned long ce, ue;
+ int ret;
if (!adev->ras_enabled || !con)
return -EOPNOTSUPP;
@@ -1164,20 +1802,23 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
ce = 0;
ue = 0;
- list_for_each_entry(obj, &con->head, node) {
- struct ras_query_if info = {
- .head = obj->head,
- };
- int res;
-
- res = amdgpu_ras_query_error_status(adev, &info);
- if (res)
- return res;
-
- ce += info.ce_count;
- ue += info.ue_count;
+ if (!query_info) {
+ /* query all the ip blocks that support ras query interface */
+ list_for_each_entry(obj, &con->head, node) {
+ struct ras_query_if info = {
+ .head = obj->head,
+ };
+
+ ret = amdgpu_ras_query_error_count_helper(adev, &ce, &ue, &info);
+ }
+ } else {
+ /* query specific ip block */
+ ret = amdgpu_ras_query_error_count_helper(adev, &ce, &ue, query_info);
}
+ if (ret)
+ return ret;
+
if (ce_count)
*ce_count = ce;
@@ -1192,7 +1833,9 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
/* sysfs begin */
static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
- struct ras_badpage **bps, unsigned int *count);
+ struct ras_badpage *bps, uint32_t count, uint32_t start);
+static int amdgpu_uniras_badpages_read(struct amdgpu_device *adev,
+ struct ras_badpage *bps, uint32_t count, uint32_t start);
static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
{
@@ -1238,7 +1881,7 @@ static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
*/
static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
- struct kobject *kobj, struct bin_attribute *attr,
+ struct kobject *kobj, const struct bin_attribute *attr,
char *buf, loff_t ppos, size_t count)
{
struct amdgpu_ras *con =
@@ -1250,19 +1893,50 @@ static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
unsigned int end = div64_ul(ppos + count - 1, element_size);
ssize_t s = 0;
struct ras_badpage *bps = NULL;
- unsigned int bps_count = 0;
+ int bps_count = 0, i, status;
+ uint64_t address;
memset(buf, 0, count);
- if (amdgpu_ras_badpages_read(adev, &bps, &bps_count))
+ bps_count = end - start;
+ bps = kmalloc_array(bps_count, sizeof(*bps), GFP_KERNEL);
+ if (!bps)
+ return 0;
+
+ memset(bps, 0, sizeof(*bps) * bps_count);
+
+ if (amdgpu_uniras_enabled(adev))
+ bps_count = amdgpu_uniras_badpages_read(adev, bps, bps_count, start);
+ else
+ bps_count = amdgpu_ras_badpages_read(adev, bps, bps_count, start);
+
+ if (bps_count <= 0) {
+ kfree(bps);
return 0;
+ }
+
+ for (i = 0; i < bps_count; i++) {
+ address = ((uint64_t)bps[i].bp) << AMDGPU_GPU_PAGE_SHIFT;
+ if (amdgpu_ras_check_critical_address(adev, address))
+ continue;
+
+ bps[i].size = AMDGPU_GPU_PAGE_SIZE;
+
+ status = amdgpu_vram_mgr_query_page_status(&adev->mman.vram_mgr,
+ address);
+ if (status == -EBUSY)
+ bps[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING;
+ else if (status == -ENOENT)
+ bps[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT;
+ else
+ bps[i].flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED;
- for (; start < end && start < bps_count; start++)
s += scnprintf(&buf[s], element_size + 1,
"0x%08x : 0x%08x : %1s\n",
- bps[start].bp,
- bps[start].size,
- amdgpu_ras_badpage_flags_str(bps[start].flags));
+ bps[i].bp,
+ bps[i].size,
+ amdgpu_ras_badpage_flags_str(bps[i].flags));
+ }
kfree(bps);
@@ -1275,23 +1949,106 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
struct amdgpu_ras *con =
container_of(attr, struct amdgpu_ras, features_attr);
- return scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features);
+ return sysfs_emit(buf, "feature mask: 0x%x\n", con->features);
+}
+
+static bool amdgpu_ras_get_version_info(struct amdgpu_device *adev, u32 *major,
+ u32 *minor, u32 *rev)
+{
+ int i;
+
+ if (!adev || !major || !minor || !rev || !amdgpu_uniras_enabled(adev))
+ return false;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_RAS) {
+ *major = adev->ip_blocks[i].version->major;
+ *minor = adev->ip_blocks[i].version->minor;
+ *rev = adev->ip_blocks[i].version->rev;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static ssize_t amdgpu_ras_sysfs_version_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct amdgpu_ras *con =
+ container_of(attr, struct amdgpu_ras, version_attr);
+ u32 major, minor, rev;
+ ssize_t size = 0;
+
+ size += sysfs_emit_at(buf, size, "table version: 0x%x\n",
+ con->eeprom_control.tbl_hdr.version);
+
+ if (amdgpu_ras_get_version_info(con->adev, &major, &minor, &rev))
+ size += sysfs_emit_at(buf, size, "ras version: %u.%u.%u\n",
+ major, minor, rev);
+
+ return size;
+}
+
+static ssize_t amdgpu_ras_sysfs_schema_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct amdgpu_ras *con =
+ container_of(attr, struct amdgpu_ras, schema_attr);
+ return sysfs_emit(buf, "schema: 0x%x\n", con->schema);
+}
+
+static struct {
+ enum ras_event_type type;
+ const char *name;
+} dump_event[] = {
+ {RAS_EVENT_TYPE_FATAL, "Fatal Error"},
+ {RAS_EVENT_TYPE_POISON_CREATION, "Poison Creation"},
+ {RAS_EVENT_TYPE_POISON_CONSUMPTION, "Poison Consumption"},
+};
+
+static ssize_t amdgpu_ras_sysfs_event_state_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct amdgpu_ras *con =
+ container_of(attr, struct amdgpu_ras, event_state_attr);
+ struct ras_event_manager *event_mgr = con->event_mgr;
+ struct ras_event_state *event_state;
+ int i, size = 0;
+
+ if (!event_mgr)
+ return -EINVAL;
+
+ size += sysfs_emit_at(buf, size, "current seqno: %llu\n", atomic64_read(&event_mgr->seqno));
+ for (i = 0; i < ARRAY_SIZE(dump_event); i++) {
+ event_state = &event_mgr->event_state[dump_event[i].type];
+ size += sysfs_emit_at(buf, size, "%s: count:%llu, last_seqno:%llu\n",
+ dump_event[i].name,
+ atomic64_read(&event_state->count),
+ event_state->last_seqno);
+ }
+
+ return (ssize_t)size;
}
static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- sysfs_remove_file_from_group(&adev->dev->kobj,
+ if (adev->dev->kobj.sd)
+ sysfs_remove_file_from_group(&adev->dev->kobj,
&con->badpages_attr.attr,
RAS_FS_NAME);
}
-static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev)
+static int amdgpu_ras_sysfs_remove_dev_attr_node(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct attribute *attrs[] = {
&con->features_attr.attr,
+ &con->version_attr.attr,
+ &con->schema_attr.attr,
+ &con->event_state_attr.attr,
NULL
};
struct attribute_group group = {
@@ -1299,24 +2056,30 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev)
.attrs = attrs,
};
- sysfs_remove_group(&adev->dev->kobj, &group);
+ if (adev->dev->kobj.sd)
+ sysfs_remove_group(&adev->dev->kobj, &group);
return 0;
}
int amdgpu_ras_sysfs_create(struct amdgpu_device *adev,
- struct ras_fs_if *head)
+ struct ras_common_if *head)
{
- struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head->head);
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
+
+ if (amdgpu_aca_is_enabled(adev))
+ return 0;
if (!obj || obj->attr_inuse)
return -EINVAL;
+ if (amdgpu_sriov_vf(adev) && !amdgpu_virt_ras_telemetry_block_en(adev, head->block))
+ return 0;
+
get_obj(obj);
- memcpy(obj->fs_data.sysfs_name,
- head->sysfs_name,
- sizeof(obj->fs_data.sysfs_name));
+ snprintf(obj->fs_data.sysfs_name, sizeof(obj->fs_data.sysfs_name),
+ "%s_err_count", head->name);
obj->sysfs_attr = (struct device_attribute){
.attr = {
@@ -1344,10 +2107,14 @@ int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev,
{
struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
+ if (amdgpu_aca_is_enabled(adev))
+ return 0;
+
if (!obj || !obj->attr_inuse)
return -EINVAL;
- sysfs_remove_file_from_group(&adev->dev->kobj,
+ if (adev->dev->kobj.sd)
+ sysfs_remove_file_from_group(&adev->dev->kobj,
&obj->sysfs_attr.attr,
RAS_FS_NAME);
obj->attr_inuse = 0;
@@ -1368,7 +2135,7 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
if (amdgpu_bad_page_threshold != 0)
amdgpu_ras_sysfs_remove_bad_page_node(adev);
- amdgpu_ras_sysfs_remove_feature_node(adev);
+ amdgpu_ras_sysfs_remove_dev_attr_node(adev);
return 0;
}
@@ -1396,6 +2163,7 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct amdgpu_ras_eeprom_control *eeprom = &con->eeprom_control;
struct drm_minor *minor = adev_to_drm(adev)->primary;
struct dentry *dir;
@@ -1406,6 +2174,7 @@ static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *
&amdgpu_ras_debugfs_eeprom_ops);
debugfs_create_u32("bad_page_cnt_threshold", 0444, dir,
&con->bad_page_cnt_threshold);
+ debugfs_create_u32("ras_num_recs", 0444, dir, &eeprom->ras_num_recs);
debugfs_create_x32("ras_hw_enabled", 0444, dir, &adev->ras_hw_enabled);
debugfs_create_x32("ras_enabled", 0444, dir, &adev->ras_enabled);
debugfs_create_file("ras_eeprom_size", S_IRUGO, dir, adev,
@@ -1453,6 +2222,24 @@ static void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
obj, &amdgpu_ras_debugfs_ops);
}
+static bool amdgpu_ras_aca_is_supported(struct amdgpu_device *adev)
+{
+ bool ret;
+
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
+ ret = true;
+ break;
+ default:
+ ret = false;
+ break;
+ }
+
+ return ret;
+}
+
void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
@@ -1478,15 +2265,28 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev)
amdgpu_ras_debugfs_create(adev, &fs_info, dir);
}
}
+
+ if (amdgpu_ras_aca_is_supported(adev)) {
+ if (amdgpu_aca_is_enabled(adev))
+ amdgpu_aca_smu_debugfs_init(adev, dir);
+ else
+ amdgpu_mca_smu_debugfs_init(adev, dir);
+ }
}
/* debugfs end */
/* ras fs */
-static BIN_ATTR(gpu_vram_bad_pages, S_IRUGO,
- amdgpu_ras_sysfs_badpages_read, NULL, 0);
+static const BIN_ATTR(gpu_vram_bad_pages, S_IRUGO,
+ amdgpu_ras_sysfs_badpages_read, NULL, 0);
static DEVICE_ATTR(features, S_IRUGO,
amdgpu_ras_sysfs_features_read, NULL);
+static DEVICE_ATTR(version, 0444,
+ amdgpu_ras_sysfs_version_show, NULL);
+static DEVICE_ATTR(schema, 0444,
+ amdgpu_ras_sysfs_schema_show, NULL);
+static DEVICE_ATTR(event_state, 0444,
+ amdgpu_ras_sysfs_event_state_show, NULL);
static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
@@ -1495,26 +2295,41 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
};
struct attribute *attrs[] = {
&con->features_attr.attr,
+ &con->version_attr.attr,
+ &con->schema_attr.attr,
+ &con->event_state_attr.attr,
NULL
};
- struct bin_attribute *bin_attrs[] = {
+ const struct bin_attribute *bin_attrs[] = {
NULL,
NULL,
};
int r;
+ group.attrs = attrs;
+
/* add features entry */
con->features_attr = dev_attr_features;
- group.attrs = attrs;
sysfs_attr_init(attrs[0]);
+ /* add version entry */
+ con->version_attr = dev_attr_version;
+ sysfs_attr_init(attrs[1]);
+
+ /* add schema entry */
+ con->schema_attr = dev_attr_schema;
+ sysfs_attr_init(attrs[2]);
+
+ /* add event_state entry */
+ con->event_state_attr = dev_attr_event_state;
+ sysfs_attr_init(attrs[3]);
+
if (amdgpu_bad_page_threshold != 0) {
/* add bad_page_features entry */
- bin_attr_gpu_vram_bad_pages.private = NULL;
con->badpages_attr = bin_attr_gpu_vram_bad_pages;
+ sysfs_bin_attr_init(&con->badpages_attr);
bin_attrs[0] = &con->badpages_attr;
group.bin_attrs = bin_attrs;
- sysfs_bin_attr_init(bin_attrs[0]);
}
r = sysfs_create_group(&adev->dev->kobj, &group);
@@ -1543,12 +2358,162 @@ static int amdgpu_ras_fs_fini(struct amdgpu_device *adev)
/* ras fs end */
/* ih begin */
+
+/* For the hardware that cannot enable bif ring for both ras_controller_irq
+ * and ras_err_evnet_athub_irq ih cookies, the driver has to poll status
+ * register to check whether the interrupt is triggered or not, and properly
+ * ack the interrupt if it is there
+ */
+void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
+{
+ /* Fatal error events are handled on host side */
+ if (amdgpu_sriov_vf(adev))
+ return;
+ /*
+ * If the current interrupt is caused by a non-fatal RAS error, skip
+ * check for fatal error. For fatal errors, FED status of all devices
+ * in XGMI hive gets set when the first device gets fatal error
+ * interrupt. The error gets propagated to other devices as well, so
+ * make sure to ack the interrupt regardless of FED status.
+ */
+ if (!amdgpu_ras_get_fed_status(adev) &&
+ amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY))
+ return;
+
+ if (amdgpu_uniras_enabled(adev)) {
+ amdgpu_ras_mgr_handle_fatal_interrupt(adev, NULL);
+ return;
+ }
+
+ if (adev->nbio.ras &&
+ adev->nbio.ras->handle_ras_controller_intr_no_bifring)
+ adev->nbio.ras->handle_ras_controller_intr_no_bifring(adev);
+
+ if (adev->nbio.ras &&
+ adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring)
+ adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring(adev);
+}
+
+static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *obj,
+ struct amdgpu_iv_entry *entry)
+{
+ bool poison_stat = false;
+ struct amdgpu_device *adev = obj->adev;
+ struct amdgpu_ras_block_object *block_obj =
+ amdgpu_ras_get_ras_block(adev, obj->head.block, 0);
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ enum ras_event_type type = RAS_EVENT_TYPE_POISON_CONSUMPTION;
+ u64 event_id;
+ int ret;
+
+ if (!block_obj || !con)
+ return;
+
+ ret = amdgpu_ras_mark_ras_event(adev, type);
+ if (ret)
+ return;
+
+ amdgpu_ras_set_err_poison(adev, block_obj->ras_comm.block);
+ /* both query_poison_status and handle_poison_consumption are optional,
+ * but at least one of them should be implemented if we need poison
+ * consumption handler
+ */
+ if (block_obj->hw_ops && block_obj->hw_ops->query_poison_status) {
+ poison_stat = block_obj->hw_ops->query_poison_status(adev);
+ if (!poison_stat) {
+ /* Not poison consumption interrupt, no need to handle it */
+ dev_info(adev->dev, "No RAS poison status in %s poison IH.\n",
+ block_obj->ras_comm.name);
+
+ return;
+ }
+ }
+
+ amdgpu_umc_poison_handler(adev, obj->head.block, 0);
+
+ if (block_obj->hw_ops && block_obj->hw_ops->handle_poison_consumption)
+ poison_stat = block_obj->hw_ops->handle_poison_consumption(adev);
+
+ /* gpu reset is fallback for failed and default cases.
+ * For RMA case, amdgpu_umc_poison_handler will handle gpu reset.
+ */
+ if (poison_stat && !amdgpu_ras_is_rma(adev)) {
+ event_id = amdgpu_ras_acquire_event_id(adev, type);
+ RAS_EVENT_LOG(adev, event_id,
+ "GPU reset for %s RAS poison consumption is issued!\n",
+ block_obj->ras_comm.name);
+ amdgpu_ras_reset_gpu(adev);
+ }
+
+ if (!poison_stat)
+ amdgpu_gfx_poison_consumption_handler(adev, entry);
+}
+
+static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj,
+ struct amdgpu_iv_entry *entry)
+{
+ struct amdgpu_device *adev = obj->adev;
+ enum ras_event_type type = RAS_EVENT_TYPE_POISON_CREATION;
+ u64 event_id;
+ int ret;
+
+ ret = amdgpu_ras_mark_ras_event(adev, type);
+ if (ret)
+ return;
+
+ event_id = amdgpu_ras_acquire_event_id(adev, type);
+ RAS_EVENT_LOG(adev, event_id, "Poison is created\n");
+
+ if (amdgpu_ip_version(obj->adev, UMC_HWIP, 0) >= IP_VERSION(12, 0, 0)) {
+ struct amdgpu_ras *con = amdgpu_ras_get_context(obj->adev);
+
+ atomic_inc(&con->page_retirement_req_cnt);
+ atomic_inc(&con->poison_creation_count);
+
+ wake_up(&con->page_retirement_wq);
+ }
+}
+
+static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj,
+ struct amdgpu_iv_entry *entry)
+{
+ struct ras_ih_data *data = &obj->ih_data;
+ struct ras_err_data err_data;
+ int ret;
+
+ if (!data->cb)
+ return;
+
+ ret = amdgpu_ras_error_data_init(&err_data);
+ if (ret)
+ return;
+
+ /* Let IP handle its data, maybe we need get the output
+ * from the callback to update the error type/count, etc
+ */
+ amdgpu_ras_set_fed(obj->adev, true);
+ ret = data->cb(obj->adev, &err_data, entry);
+ /* ue will trigger an interrupt, and in that case
+ * we need do a reset to recovery the whole system.
+ * But leave IP do that recovery, here we just dispatch
+ * the error.
+ */
+ if (ret == AMDGPU_RAS_SUCCESS) {
+ /* these counts could be left as 0 if
+ * some blocks do not count error number
+ */
+ obj->err_data.ue_count += err_data.ue_count;
+ obj->err_data.ce_count += err_data.ce_count;
+ obj->err_data.de_count += err_data.de_count;
+ }
+
+ amdgpu_ras_error_data_fini(&err_data);
+}
+
static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
{
struct ras_ih_data *data = &obj->ih_data;
struct amdgpu_iv_entry entry;
- int ret;
- struct ras_err_data err_data = {0, 0, 0, NULL};
while (data->rptr != data->wptr) {
rmb();
@@ -1559,29 +2524,17 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
data->rptr = (data->aligned_element_size +
data->rptr) % data->ring_size;
- if (data->cb) {
- if (amdgpu_ras_is_poison_mode_supported(obj->adev) &&
- obj->head.block == AMDGPU_RAS_BLOCK__UMC)
- dev_info(obj->adev->dev,
- "Poison is created, no user action is needed.\n");
- else {
- /* Let IP handle its data, maybe we need get the output
- * from the callback to udpate the error type/count, etc
- */
- ret = data->cb(obj->adev, &err_data, &entry);
- /* ue will trigger an interrupt, and in that case
- * we need do a reset to recovery the whole system.
- * But leave IP do that recovery, here we just dispatch
- * the error.
- */
- if (ret == AMDGPU_RAS_SUCCESS) {
- /* these counts could be left as 0 if
- * some blocks do not count error number
- */
- obj->err_data.ue_count += err_data.ue_count;
- obj->err_data.ce_count += err_data.ce_count;
- }
- }
+ if (amdgpu_ras_is_poison_mode_supported(obj->adev)) {
+ if (obj->head.block == AMDGPU_RAS_BLOCK__UMC)
+ amdgpu_ras_interrupt_poison_creation_handler(obj, &entry);
+ else
+ amdgpu_ras_interrupt_poison_consumption_handler(obj, &entry);
+ } else {
+ if (obj->head.block == AMDGPU_RAS_BLOCK__UMC)
+ amdgpu_ras_interrupt_umc_handler(obj, &entry);
+ else
+ dev_warn(obj->adev->dev,
+ "No RAS interrupt handler for non-UMC block with poison disabled.\n");
}
}
}
@@ -1599,12 +2552,25 @@ static void amdgpu_ras_interrupt_process_handler(struct work_struct *work)
int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
struct ras_dispatch_if *info)
{
- struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
- struct ras_ih_data *data = &obj->ih_data;
+ struct ras_manager *obj;
+ struct ras_ih_data *data;
+
+ if (amdgpu_uniras_enabled(adev)) {
+ struct ras_ih_info ih_info;
+ memset(&ih_info, 0, sizeof(ih_info));
+ ih_info.block = info->head.block;
+ memcpy(&ih_info.iv_entry, info->entry, sizeof(struct amdgpu_iv_entry));
+
+ return amdgpu_ras_mgr_handle_controller_interrupt(adev, &ih_info);
+ }
+
+ obj = amdgpu_ras_find_obj(adev, &info->head);
if (!obj)
return -EINVAL;
+ data = &obj->ih_data;
+
if (data->inuse == 0)
return 0;
@@ -1622,9 +2588,9 @@ int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
}
int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev,
- struct ras_ih_if *info)
+ struct ras_common_if *head)
{
- struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
struct ras_ih_data *data;
if (!obj)
@@ -1644,24 +2610,27 @@ int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev,
}
int amdgpu_ras_interrupt_add_handler(struct amdgpu_device *adev,
- struct ras_ih_if *info)
+ struct ras_common_if *head)
{
- struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
struct ras_ih_data *data;
+ struct amdgpu_ras_block_object *ras_obj;
if (!obj) {
/* in case we registe the IH before enable ras feature */
- obj = amdgpu_ras_create_obj(adev, &info->head);
+ obj = amdgpu_ras_create_obj(adev, head);
if (!obj)
return -EINVAL;
} else
get_obj(obj);
+ ras_obj = container_of(head, struct amdgpu_ras_block_object, ras_comm);
+
data = &obj->ih_data;
/* add the callback.etc */
*data = (struct ras_ih_data) {
.inuse = 0,
- .cb = info->cb,
+ .cb = ras_obj->ras_cb,
.element_size = sizeof(struct amdgpu_iv_entry),
.rptr = 0,
.wptr = 0,
@@ -1690,10 +2659,7 @@ static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev)
struct ras_manager *obj, *tmp;
list_for_each_entry_safe(obj, tmp, &con->head, node) {
- struct ras_ih_if info = {
- .head = obj->head,
- };
- amdgpu_ras_interrupt_remove_handler(adev, &info);
+ amdgpu_ras_interrupt_remove_handler(adev, &obj->head);
}
return 0;
@@ -1701,7 +2667,7 @@ static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev)
/* ih end */
/* traversal all IPs except NBIO to query error counter */
-static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)
+static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev, enum ras_event_type type)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj;
@@ -1723,7 +2689,28 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)
if (info.head.block == AMDGPU_RAS_BLOCK__PCIE_BIF)
continue;
- amdgpu_ras_query_error_status(adev, &info);
+ /*
+ * this is a workaround for aldebaran, skip send msg to
+ * smu to get ecc_info table due to smu handle get ecc
+ * info table failed temporarily.
+ * should be removed until smu fix handle ecc_info table.
+ */
+ if ((info.head.block == AMDGPU_RAS_BLOCK__UMC) &&
+ (amdgpu_ip_version(adev, MP1_HWIP, 0) ==
+ IP_VERSION(13, 0, 2)))
+ continue;
+
+ amdgpu_ras_query_error_status_with_event(adev, &info, type);
+
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) !=
+ IP_VERSION(11, 0, 2) &&
+ amdgpu_ip_version(adev, MP0_HWIP, 0) !=
+ IP_VERSION(11, 0, 4) &&
+ amdgpu_ip_version(adev, MP0_HWIP, 0) !=
+ IP_VERSION(13, 0, 0)) {
+ if (amdgpu_ras_reset_error_status(adev, info.head.block))
+ dev_warn(adev->dev, "Failed to reset error counter and error status");
+ }
}
}
@@ -1731,24 +2718,28 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)
static void amdgpu_ras_error_status_query(struct amdgpu_device *adev,
struct ras_query_if *info)
{
+ struct amdgpu_ras_block_object *block_obj;
/*
* Only two block need to query read/write
* RspStatus at current state
*/
- switch (info->head.block) {
- case AMDGPU_RAS_BLOCK__GFX:
- if (adev->gfx.ras_funcs &&
- adev->gfx.ras_funcs->query_ras_error_status)
- adev->gfx.ras_funcs->query_ras_error_status(adev);
- break;
- case AMDGPU_RAS_BLOCK__MMHUB:
- if (adev->mmhub.ras_funcs &&
- adev->mmhub.ras_funcs->query_ras_error_status)
- adev->mmhub.ras_funcs->query_ras_error_status(adev);
- break;
- default:
- break;
+ if ((info->head.block != AMDGPU_RAS_BLOCK__GFX) &&
+ (info->head.block != AMDGPU_RAS_BLOCK__MMHUB))
+ return;
+
+ block_obj = amdgpu_ras_get_ras_block(adev,
+ info->head.block,
+ info->head.sub_block_index);
+
+ if (!block_obj || !block_obj->hw_ops) {
+ dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
+ get_ras_block_str(&info->head));
+ return;
}
+
+ if (block_obj->hw_ops->query_ras_error_status)
+ block_obj->hw_ops->query_ras_error_status(adev);
+
}
static void amdgpu_ras_query_err_status(struct amdgpu_device *adev)
@@ -1768,55 +2759,121 @@ static void amdgpu_ras_query_err_status(struct amdgpu_device *adev)
}
}
-/* recovery begin */
-
-/* return 0 on success.
- * caller need free bps.
- */
static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
- struct ras_badpage **bps, unsigned int *count)
+ struct ras_badpage *bps, uint32_t count, uint32_t start)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data;
- int i = 0;
- int ret = 0, status;
+ int r = 0;
+ uint32_t i;
if (!con || !con->eh_data || !bps || !count)
return -EINVAL;
mutex_lock(&con->recovery_lock);
data = con->eh_data;
- if (!data || data->count == 0) {
- *bps = NULL;
- ret = -EINVAL;
- goto out;
+ if (start < data->count) {
+ for (i = start; i < data->count; i++) {
+ if (!data->bps[i].ts)
+ continue;
+
+ bps[r].bp = data->bps[i].retired_page;
+ r++;
+ if (r >= count)
+ break;
+ }
}
+ mutex_unlock(&con->recovery_lock);
- *bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL);
- if (!*bps) {
- ret = -ENOMEM;
- goto out;
- }
+ return r;
+}
- for (; i < data->count; i++) {
- (*bps)[i] = (struct ras_badpage){
- .bp = data->bps[i].retired_page,
- .size = AMDGPU_GPU_PAGE_SIZE,
- .flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED,
- };
- status = amdgpu_vram_mgr_query_page_status(
- ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM),
- data->bps[i].retired_page);
- if (status == -EBUSY)
- (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING;
- else if (status == -ENOENT)
- (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT;
+static int amdgpu_uniras_badpages_read(struct amdgpu_device *adev,
+ struct ras_badpage *bps, uint32_t count, uint32_t start)
+{
+ struct ras_cmd_bad_pages_info_req cmd_input;
+ struct ras_cmd_bad_pages_info_rsp *output;
+ uint32_t group, start_group, end_group;
+ uint32_t pos, pos_in_group;
+ int r = 0, i;
+
+ if (!bps || !count)
+ return -EINVAL;
+
+ output = kmalloc(sizeof(*output), GFP_KERNEL);
+ if (!output)
+ return -ENOMEM;
+
+ memset(&cmd_input, 0, sizeof(cmd_input));
+
+ start_group = start / RAS_CMD_MAX_BAD_PAGES_PER_GROUP;
+ end_group = (start + count + RAS_CMD_MAX_BAD_PAGES_PER_GROUP - 1) /
+ RAS_CMD_MAX_BAD_PAGES_PER_GROUP;
+
+ pos = start;
+ for (group = start_group; group < end_group; group++) {
+ memset(output, 0, sizeof(*output));
+ cmd_input.group_index = group;
+ if (amdgpu_ras_mgr_handle_ras_cmd(adev, RAS_CMD__GET_BAD_PAGES,
+ &cmd_input, sizeof(cmd_input), output, sizeof(*output)))
+ goto out;
+
+ if (pos >= output->bp_total_cnt)
+ goto out;
+
+ pos_in_group = pos - group * RAS_CMD_MAX_BAD_PAGES_PER_GROUP;
+ for (i = pos_in_group; i < output->bp_in_group; i++, pos++) {
+ if (!output->records[i].ts)
+ continue;
+
+ bps[r].bp = output->records[i].retired_page;
+ r++;
+ if (r >= count)
+ goto out;
+ }
}
- *count = data->count;
out:
- mutex_unlock(&con->recovery_lock);
- return ret;
+ kfree(output);
+ return r;
+}
+
+static void amdgpu_ras_set_fed_all(struct amdgpu_device *adev,
+ struct amdgpu_hive_info *hive, bool status)
+{
+ struct amdgpu_device *tmp_adev;
+
+ if (hive) {
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
+ amdgpu_ras_set_fed(tmp_adev, status);
+ } else {
+ amdgpu_ras_set_fed(adev, status);
+ }
+}
+
+bool amdgpu_ras_in_recovery(struct amdgpu_device *adev)
+{
+ struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ int hive_ras_recovery = 0;
+
+ if (hive) {
+ hive_ras_recovery = atomic_read(&hive->ras_recovery);
+ amdgpu_put_xgmi_hive(hive);
+ }
+
+ if (ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery))
+ return true;
+
+ return false;
+}
+
+static enum ras_event_type amdgpu_ras_get_fatal_error_event(struct amdgpu_device *adev)
+{
+ if (amdgpu_ras_intr_triggered())
+ return RAS_EVENT_TYPE_FATAL;
+ else
+ return RAS_EVENT_TYPE_POISON_CONSUMPTION;
}
static void amdgpu_ras_do_recovery(struct work_struct *work)
@@ -1826,9 +2883,26 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
struct amdgpu_device *remote_adev = NULL;
struct amdgpu_device *adev = ras->adev;
struct list_head device_list, *device_list_handle = NULL;
+ struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+ unsigned int error_query_mode;
+ enum ras_event_type type;
+
+ if (hive) {
+ atomic_set(&hive->ras_recovery, 1);
+ /* If any device which is part of the hive received RAS fatal
+ * error interrupt, set fatal error status on all. This
+ * condition will need a recovery, and flag will be cleared
+ * as part of recovery.
+ */
+ list_for_each_entry(remote_adev, &hive->device_list,
+ gmc.xgmi.head)
+ if (amdgpu_ras_get_fed_status(remote_adev)) {
+ amdgpu_ras_set_fed_all(adev, hive, true);
+ break;
+ }
+ }
if (!ras->disable_ras_err_cnt_harvest) {
- struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
/* Build list of devices to query RAS related errors */
if (hive && adev->gmc.xgmi.num_physical_nodes > 1) {
@@ -1839,18 +2913,64 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
device_list_handle = &device_list;
}
+ if (amdgpu_ras_get_error_query_mode(adev, &error_query_mode)) {
+ if (error_query_mode == AMDGPU_RAS_FIRMWARE_ERROR_QUERY) {
+ /* wait 500ms to ensure pmfw polling mca bank info done */
+ msleep(500);
+ }
+ }
+
+ type = amdgpu_ras_get_fatal_error_event(adev);
list_for_each_entry(remote_adev,
device_list_handle, gmc.xgmi.head) {
- amdgpu_ras_query_err_status(remote_adev);
- amdgpu_ras_log_on_err_counter(remote_adev);
+ if (amdgpu_uniras_enabled(remote_adev)) {
+ amdgpu_ras_mgr_update_ras_ecc(remote_adev);
+ } else {
+ amdgpu_ras_query_err_status(remote_adev);
+ amdgpu_ras_log_on_err_counter(remote_adev, type);
+ }
}
- amdgpu_put_xgmi_hive(hive);
}
- if (amdgpu_device_should_recover_gpu(ras->adev))
- amdgpu_device_gpu_recover(ras->adev, NULL);
+ if (amdgpu_device_should_recover_gpu(ras->adev)) {
+ struct amdgpu_reset_context reset_context;
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ reset_context.src = AMDGPU_RESET_SRC_RAS;
+ set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
+
+ /* Perform full reset in fatal error mode */
+ if (!amdgpu_ras_is_poison_mode_supported(ras->adev))
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ else {
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+ if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET) {
+ ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ reset_context.method = AMD_RESET_METHOD_MODE2;
+ }
+
+ /* Fatal error occurs in poison mode, mode1 reset is used to
+ * recover gpu.
+ */
+ if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET) {
+ ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+ psp_fatal_error_recovery_quirk(&adev->psp);
+ }
+ }
+
+ amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
+ }
atomic_set(&ras->in_recovery, 0);
+ if (hive) {
+ atomic_set(&hive->ras_recovery, 0);
+ amdgpu_put_xgmi_hive(hive);
+ }
}
/* alloc/realloc bps array */
@@ -1860,10 +2980,9 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
unsigned int old_space = data->count + data->space_left;
unsigned int new_space = old_space + pages;
unsigned int align_space = ALIGN(new_space, 512);
- void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL);
+ void *bps = kmalloc_array(align_space, sizeof(*data->bps), GFP_KERNEL);
if (!bps) {
- kfree(bps);
return -ENOMEM;
}
@@ -1878,44 +2997,283 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
return 0;
}
-/* it deal with vram only. */
-int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
- struct eeprom_table_record *bps, int pages)
+static int amdgpu_ras_mca2pa_by_idx(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps,
+ struct ras_err_data *err_data)
{
- struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- struct ras_err_handler_data *data;
+ struct ta_ras_query_address_input addr_in;
+ uint32_t socket = 0;
int ret = 0;
- uint32_t i;
- if (!con || !con->eh_data || !bps || pages <= 0)
- return 0;
+ if (adev->smuio.funcs && adev->smuio.funcs->get_socket_id)
+ socket = adev->smuio.funcs->get_socket_id(adev);
- mutex_lock(&con->recovery_lock);
- data = con->eh_data;
- if (!data)
- goto out;
+ /* reinit err_data */
+ err_data->err_addr_cnt = 0;
+ err_data->err_addr_len = adev->umc.retire_unit;
+
+ memset(&addr_in, 0, sizeof(addr_in));
+ addr_in.ma.err_addr = bps->address;
+ addr_in.ma.socket_id = socket;
+ addr_in.ma.ch_inst = bps->mem_channel;
+ if (!amdgpu_ras_smu_eeprom_supported(adev)) {
+ /* tell RAS TA the node instance is not used */
+ addr_in.ma.node_inst = TA_RAS_INV_NODE;
+ } else {
+ addr_in.ma.umc_inst = bps->mcumc_id;
+ addr_in.ma.node_inst = bps->cu;
+ }
+
+ if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
+ ret = adev->umc.ras->convert_ras_err_addr(adev, err_data,
+ &addr_in, NULL, false);
+
+ return ret;
+}
+
+static int amdgpu_ras_mca2pa(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps,
+ struct ras_err_data *err_data)
+{
+ struct ta_ras_query_address_input addr_in;
+ uint32_t die_id, socket = 0;
+
+ if (adev->smuio.funcs && adev->smuio.funcs->get_socket_id)
+ socket = adev->smuio.funcs->get_socket_id(adev);
- for (i = 0; i < pages; i++) {
+ /* although die id is gotten from PA in nps1 mode, the id is
+ * fitable for any nps mode
+ */
+ if (adev->umc.ras && adev->umc.ras->get_die_id_from_pa)
+ die_id = adev->umc.ras->get_die_id_from_pa(adev, bps->address,
+ bps->retired_page << AMDGPU_GPU_PAGE_SHIFT);
+ else
+ return -EINVAL;
+
+ /* reinit err_data */
+ err_data->err_addr_cnt = 0;
+ err_data->err_addr_len = adev->umc.retire_unit;
+
+ memset(&addr_in, 0, sizeof(addr_in));
+ addr_in.ma.err_addr = bps->address;
+ addr_in.ma.ch_inst = bps->mem_channel;
+ addr_in.ma.umc_inst = bps->mcumc_id;
+ addr_in.ma.node_inst = die_id;
+ addr_in.ma.socket_id = socket;
+
+ if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
+ return adev->umc.ras->convert_ras_err_addr(adev, err_data,
+ &addr_in, NULL, false);
+ else
+ return -EINVAL;
+}
+
+static int __amdgpu_ras_restore_bad_pages(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps, int count)
+{
+ int j;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_err_handler_data *data = con->eh_data;
+
+ for (j = 0; j < count; j++) {
if (amdgpu_ras_check_bad_page_unlock(con,
- bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT))
+ bps[j].retired_page << AMDGPU_GPU_PAGE_SHIFT)) {
+ data->count++;
+ data->space_left--;
continue;
+ }
if (!data->space_left &&
- amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
- ret = -ENOMEM;
- goto out;
+ amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
+ return -ENOMEM;
}
- amdgpu_vram_mgr_reserve_range(
- ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM),
- bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT,
- AMDGPU_GPU_PAGE_SIZE);
+ amdgpu_ras_reserve_page(adev, bps[j].retired_page);
- memcpy(&data->bps[data->count], &bps[i], sizeof(*data->bps));
+ memcpy(&data->bps[data->count], &(bps[j]),
+ sizeof(struct eeprom_table_record));
data->count++;
data->space_left--;
+ con->bad_page_num++;
+ }
+
+ return 0;
+}
+
+static int __amdgpu_ras_convert_rec_array_from_rom(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps, struct ras_err_data *err_data,
+ enum amdgpu_memory_partition nps)
+{
+ int i = 0;
+ enum amdgpu_memory_partition save_nps;
+
+ save_nps = (bps[0].retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK;
+
+ /*old asics just have pa in eeprom*/
+ if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12) {
+ memcpy(err_data->err_addr, bps,
+ sizeof(struct eeprom_table_record) * adev->umc.retire_unit);
+ goto out;
+ }
+
+ for (i = 0; i < adev->umc.retire_unit; i++)
+ bps[i].retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);
+
+ if (save_nps) {
+ if (save_nps == nps) {
+ if (amdgpu_umc_pages_in_a_row(adev, err_data,
+ bps[0].retired_page << AMDGPU_GPU_PAGE_SHIFT))
+ return -EINVAL;
+ for (i = 0; i < adev->umc.retire_unit; i++) {
+ err_data->err_addr[i].address = bps[0].address;
+ err_data->err_addr[i].mem_channel = bps[0].mem_channel;
+ err_data->err_addr[i].bank = bps[0].bank;
+ err_data->err_addr[i].err_type = bps[0].err_type;
+ err_data->err_addr[i].mcumc_id = bps[0].mcumc_id;
+ }
+ } else {
+ if (amdgpu_ras_mca2pa_by_idx(adev, &bps[0], err_data))
+ return -EINVAL;
+ }
+ } else {
+ if (bps[0].address == 0) {
+ /* for specific old eeprom data, mca address is not stored,
+ * calc it from pa
+ */
+ if (amdgpu_umc_pa2mca(adev, bps[0].retired_page << AMDGPU_GPU_PAGE_SHIFT,
+ &(bps[0].address), AMDGPU_NPS1_PARTITION_MODE))
+ return -EINVAL;
+ }
+
+ if (amdgpu_ras_mca2pa(adev, &bps[0], err_data)) {
+ if (nps == AMDGPU_NPS1_PARTITION_MODE)
+ memcpy(err_data->err_addr, bps,
+ sizeof(struct eeprom_table_record) * adev->umc.retire_unit);
+ else
+ return -EOPNOTSUPP;
+ }
}
+
out:
+ return __amdgpu_ras_restore_bad_pages(adev, err_data->err_addr, adev->umc.retire_unit);
+}
+
+static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps, struct ras_err_data *err_data,
+ enum amdgpu_memory_partition nps)
+{
+ int i = 0;
+ enum amdgpu_memory_partition save_nps;
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev)) {
+ save_nps = (bps->retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK;
+ bps->retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);
+ } else {
+ /* if pmfw manages eeprom, save_nps is not stored on eeprom,
+ * we should always convert mca address into physical address,
+ * make save_nps different from nps
+ */
+ save_nps = nps + 1;
+ }
+
+ if (save_nps == nps) {
+ if (amdgpu_umc_pages_in_a_row(adev, err_data,
+ bps->retired_page << AMDGPU_GPU_PAGE_SHIFT))
+ return -EINVAL;
+ for (i = 0; i < adev->umc.retire_unit; i++) {
+ err_data->err_addr[i].address = bps->address;
+ err_data->err_addr[i].mem_channel = bps->mem_channel;
+ err_data->err_addr[i].bank = bps->bank;
+ err_data->err_addr[i].err_type = bps->err_type;
+ err_data->err_addr[i].mcumc_id = bps->mcumc_id;
+ }
+ } else {
+ if (bps->address) {
+ if (amdgpu_ras_mca2pa_by_idx(adev, bps, err_data))
+ return -EINVAL;
+ } else {
+ /* for specific old eeprom data, mca address is not stored,
+ * calc it from pa
+ */
+ if (amdgpu_umc_pa2mca(adev, bps->retired_page << AMDGPU_GPU_PAGE_SHIFT,
+ &(bps->address), AMDGPU_NPS1_PARTITION_MODE))
+ return -EINVAL;
+
+ if (amdgpu_ras_mca2pa(adev, bps, err_data))
+ return -EOPNOTSUPP;
+ }
+ }
+
+ return __amdgpu_ras_restore_bad_pages(adev, err_data->err_addr,
+ adev->umc.retire_unit);
+}
+
+/* it deal with vram only. */
+int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps, int pages, bool from_rom)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_err_data err_data;
+ struct amdgpu_ras_eeprom_control *control =
+ &adev->psp.ras_context.ras->eeprom_control;
+ enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE;
+ int ret = 0;
+ uint32_t i = 0;
+
+ if (!con || !con->eh_data || !bps || pages <= 0)
+ return 0;
+
+ if (from_rom) {
+ err_data.err_addr =
+ kcalloc(adev->umc.retire_unit,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+ if (!err_data.err_addr) {
+ dev_warn(adev->dev, "Failed to alloc UMC error address record in mca2pa conversion!\n");
+ return -ENOMEM;
+ }
+
+ if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+ nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ }
+
+ mutex_lock(&con->recovery_lock);
+
+ if (from_rom) {
+ /* there is no pa recs in V3, so skip pa recs processing */
+ if ((control->tbl_hdr.version < RAS_TABLE_VER_V3) &&
+ !amdgpu_ras_smu_eeprom_supported(adev)) {
+ for (i = 0; i < pages; i++) {
+ if (control->ras_num_recs - i >= adev->umc.retire_unit) {
+ if ((bps[i].address == bps[i + 1].address) &&
+ (bps[i].mem_channel == bps[i + 1].mem_channel)) {
+ /* deal with retire_unit records a time */
+ ret = __amdgpu_ras_convert_rec_array_from_rom(adev,
+ &bps[i], &err_data, nps);
+ if (ret)
+ con->bad_page_num -= adev->umc.retire_unit;
+ i += (adev->umc.retire_unit - 1);
+ } else {
+ break;
+ }
+ } else {
+ break;
+ }
+ }
+ }
+ for (; i < pages; i++) {
+ ret = __amdgpu_ras_convert_rec_from_rom(adev,
+ &bps[i], &err_data, nps);
+ if (ret)
+ con->bad_page_num -= adev->umc.retire_unit;
+ }
+
+ con->eh_data->count_saved = con->eh_data->count;
+ } else {
+ ret = __amdgpu_ras_restore_bad_pages(adev, bps, pages);
+ }
+
+ if (from_rom)
+ kfree(err_data.err_addr);
mutex_unlock(&con->recovery_lock);
return ret;
@@ -1924,30 +3282,70 @@ out:
/*
* write error record array to eeprom, the function should be
* protected by recovery_lock
+ * new_cnt: new added UE count, excluding reserved bad pages, can be NULL
*/
-int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev)
+int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
+ unsigned long *new_cnt)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data;
struct amdgpu_ras_eeprom_control *control;
- int save_count;
+ int save_count, unit_num, i;
+
+ if (!con || !con->eh_data) {
+ if (new_cnt)
+ *new_cnt = 0;
+
+ return 0;
+ }
+
+ if (!con->eeprom_control.is_eeprom_valid) {
+ dev_warn(adev->dev,
+ "Failed to save EEPROM table data because of EEPROM data corruption!");
+ if (new_cnt)
+ *new_cnt = 0;
- if (!con || !con->eh_data)
return 0;
+ }
+ mutex_lock(&con->recovery_lock);
control = &con->eeprom_control;
data = con->eh_data;
- save_count = data->count - control->ras_num_recs;
+ if (amdgpu_ras_smu_eeprom_supported(adev))
+ unit_num = control->ras_num_recs -
+ control->ras_num_recs_old;
+ else
+ unit_num = data->count / adev->umc.retire_unit -
+ control->ras_num_recs;
+
+ save_count = con->bad_page_num - control->ras_num_bad_pages;
+ mutex_unlock(&con->recovery_lock);
+
+ if (new_cnt)
+ *new_cnt = unit_num;
+
/* only new entries are saved */
- if (save_count > 0) {
- if (amdgpu_ras_eeprom_append(control,
- &data->bps[control->ras_num_recs],
- save_count)) {
- dev_err(adev->dev, "Failed to save EEPROM table data!");
- return -EIO;
+ if (unit_num && save_count) {
+ /*old asics only save pa to eeprom like before*/
+ if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12) {
+ if (amdgpu_ras_eeprom_append(control,
+ &data->bps[data->count_saved], unit_num)) {
+ dev_err(adev->dev, "Failed to save EEPROM table data!");
+ return -EIO;
+ }
+ } else {
+ for (i = 0; i < unit_num; i++) {
+ if (amdgpu_ras_eeprom_append(control,
+ &data->bps[data->count_saved +
+ i * adev->umc.retire_unit], 1)) {
+ dev_err(adev->dev, "Failed to save EEPROM table data!");
+ return -EIO;
+ }
+ }
}
dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count);
+ data->count_saved = data->count;
}
return 0;
@@ -1962,7 +3360,7 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
struct amdgpu_ras_eeprom_control *control =
&adev->psp.ras_context.ras->eeprom_control;
struct eeprom_table_record *bps;
- int ret;
+ int ret, i = 0;
/* no bad page record, skip eeprom access */
if (control->ras_num_recs == 0 || amdgpu_bad_page_threshold == 0)
@@ -1973,27 +3371,72 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
return -ENOMEM;
ret = amdgpu_ras_eeprom_read(control, bps, control->ras_num_recs);
- if (ret)
+ if (ret) {
dev_err(adev->dev, "Failed to load EEPROM table records!");
- else
- ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs);
+ } else {
+ if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) {
+ /*In V3, there is no pa recs, and some cases(when address==0) may be parsed
+ as pa recs, so add verion check to avoid it.
+ */
+ if ((control->tbl_hdr.version < RAS_TABLE_VER_V3) &&
+ !amdgpu_ras_smu_eeprom_supported(adev)) {
+ for (i = 0; i < control->ras_num_recs; i++) {
+ if ((control->ras_num_recs - i) >= adev->umc.retire_unit) {
+ if ((bps[i].address == bps[i + 1].address) &&
+ (bps[i].mem_channel == bps[i + 1].mem_channel)) {
+ control->ras_num_pa_recs += adev->umc.retire_unit;
+ i += (adev->umc.retire_unit - 1);
+ } else {
+ control->ras_num_mca_recs +=
+ (control->ras_num_recs - i);
+ break;
+ }
+ } else {
+ control->ras_num_mca_recs += (control->ras_num_recs - i);
+ break;
+ }
+ }
+ } else {
+ control->ras_num_mca_recs = control->ras_num_recs;
+ }
+ }
+
+ ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs, true);
+ if (ret)
+ goto out;
+
+ ret = amdgpu_ras_eeprom_check(control);
+ if (ret)
+ goto out;
+ /* HW not usable */
+ if (amdgpu_ras_is_rma(adev))
+ ret = -EHWPOISON;
+ }
+
+out:
kfree(bps);
return ret;
}
-static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
+static int amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
uint64_t addr)
{
struct ras_err_handler_data *data = con->eh_data;
+ struct amdgpu_device *adev = con->adev;
int i;
+ if ((addr >= adev->gmc.mc_vram_size &&
+ adev->gmc.mc_vram_size) ||
+ (addr >= RAS_UMC_INJECT_ADDR_LIMIT))
+ return -EINVAL;
+
addr >>= AMDGPU_GPU_PAGE_SHIFT;
for (i = 0; i < data->count; i++)
if (addr == data->bps[i].retired_page)
- return true;
+ return 1;
- return false;
+ return 0;
}
/*
@@ -2001,11 +3444,11 @@ static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
*
* Note: this check is only for umc block
*/
-static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
+static int amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
uint64_t addr)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- bool ret = false;
+ int ret = 0;
if (!con || !con->eh_data)
return ret;
@@ -2022,45 +3465,382 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev,
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
/*
- * Justification of value bad_page_cnt_threshold in ras structure
- *
- * Generally, -1 <= amdgpu_bad_page_threshold <= max record length
- * in eeprom, and introduce two scenarios accordingly.
- *
- * Bad page retirement enablement:
- * - If amdgpu_bad_page_threshold = -1,
- * bad_page_cnt_threshold = typical value by formula.
- *
- * - When the value from user is 0 < amdgpu_bad_page_threshold <
- * max record length in eeprom, use it directly.
- *
- * Bad page retirement disablement:
- * - If amdgpu_bad_page_threshold = 0, bad page retirement
- * functionality is disabled, and bad_page_cnt_threshold will
- * take no effect.
+ * amdgpu_bad_page_threshold is used to config
+ * the threshold for the number of bad pages.
+ * -1: Threshold is set to default value
+ * Driver will issue a warning message when threshold is reached
+ * and continue runtime services.
+ * 0: Disable bad page retirement
+ * Driver will not retire bad pages
+ * which is intended for debugging purpose.
+ * -2: Threshold is determined by a formula
+ * that assumes 1 bad page per 100M of local memory.
+ * Driver will continue runtime services when threhold is reached.
+ * 0 < threshold < max number of bad page records in EEPROM,
+ * A user-defined threshold is set
+ * Driver will halt runtime services when this custom threshold is reached.
*/
-
- if (amdgpu_bad_page_threshold < 0) {
+ if (amdgpu_bad_page_threshold == -2) {
u64 val = adev->gmc.mc_vram_size;
do_div(val, RAS_BAD_PAGE_COVER);
con->bad_page_cnt_threshold = min(lower_32_bits(val),
max_count);
+ } else if (amdgpu_bad_page_threshold == -1) {
+ con->bad_page_cnt_threshold = ((con->reserved_pages_in_bytes) >> 21) << 4;
} else {
con->bad_page_cnt_threshold = min_t(int, max_count,
amdgpu_bad_page_threshold);
}
}
-int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
+int amdgpu_ras_put_poison_req(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint16_t pasid,
+ pasid_notify pasid_fn, void *data, uint32_t reset)
+{
+ int ret = 0;
+ struct ras_poison_msg poison_msg;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ memset(&poison_msg, 0, sizeof(poison_msg));
+ poison_msg.block = block;
+ poison_msg.pasid = pasid;
+ poison_msg.reset = reset;
+ poison_msg.pasid_fn = pasid_fn;
+ poison_msg.data = data;
+
+ ret = kfifo_put(&con->poison_fifo, poison_msg);
+ if (!ret) {
+ dev_err(adev->dev, "Poison message fifo is full!\n");
+ return -ENOSPC;
+ }
+
+ return 0;
+}
+
+static int amdgpu_ras_get_poison_req(struct amdgpu_device *adev,
+ struct ras_poison_msg *poison_msg)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ return kfifo_get(&con->poison_fifo, poison_msg);
+}
+
+static void amdgpu_ras_ecc_log_init(struct ras_ecc_log_info *ecc_log)
+{
+ mutex_init(&ecc_log->lock);
+
+ INIT_RADIX_TREE(&ecc_log->de_page_tree, GFP_KERNEL);
+ ecc_log->de_queried_count = 0;
+ ecc_log->consumption_q_count = 0;
+}
+
+static void amdgpu_ras_ecc_log_fini(struct ras_ecc_log_info *ecc_log)
+{
+ struct radix_tree_iter iter;
+ void __rcu **slot;
+ struct ras_ecc_err *ecc_err;
+
+ mutex_lock(&ecc_log->lock);
+ radix_tree_for_each_slot(slot, &ecc_log->de_page_tree, &iter, 0) {
+ ecc_err = radix_tree_deref_slot(slot);
+ kfree(ecc_err->err_pages.pfn);
+ kfree(ecc_err);
+ radix_tree_iter_delete(&ecc_log->de_page_tree, &iter, slot);
+ }
+ mutex_unlock(&ecc_log->lock);
+
+ mutex_destroy(&ecc_log->lock);
+ ecc_log->de_queried_count = 0;
+ ecc_log->consumption_q_count = 0;
+}
+
+static bool amdgpu_ras_schedule_retirement_dwork(struct amdgpu_ras *con,
+ uint32_t delayed_ms)
+{
+ int ret;
+
+ mutex_lock(&con->umc_ecc_log.lock);
+ ret = radix_tree_tagged(&con->umc_ecc_log.de_page_tree,
+ UMC_ECC_NEW_DETECTED_TAG);
+ mutex_unlock(&con->umc_ecc_log.lock);
+
+ if (ret)
+ schedule_delayed_work(&con->page_retirement_dwork,
+ msecs_to_jiffies(delayed_ms));
+
+ return ret ? true : false;
+}
+
+static void amdgpu_ras_do_page_retirement(struct work_struct *work)
+{
+ struct amdgpu_ras *con = container_of(work, struct amdgpu_ras,
+ page_retirement_dwork.work);
+ struct amdgpu_device *adev = con->adev;
+ struct ras_err_data err_data;
+
+ /* If gpu reset is ongoing, delay retiring the bad pages */
+ if (amdgpu_in_reset(adev) || amdgpu_ras_in_recovery(adev)) {
+ amdgpu_ras_schedule_retirement_dwork(con,
+ AMDGPU_RAS_RETIRE_PAGE_INTERVAL * 3);
+ return;
+ }
+
+ amdgpu_ras_error_data_init(&err_data);
+
+ amdgpu_umc_handle_bad_pages(adev, &err_data);
+
+ amdgpu_ras_error_data_fini(&err_data);
+
+ amdgpu_ras_schedule_retirement_dwork(con,
+ AMDGPU_RAS_RETIRE_PAGE_INTERVAL);
+}
+
+static int amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev,
+ uint32_t poison_creation_count)
+{
+ int ret = 0;
+ struct ras_ecc_log_info *ecc_log;
+ struct ras_query_if info;
+ u32 timeout = MAX_UMC_POISON_POLLING_TIME_ASYNC;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ u64 de_queried_count;
+ u64 consumption_q_count;
+ enum ras_event_type type = RAS_EVENT_TYPE_POISON_CREATION;
+
+ memset(&info, 0, sizeof(info));
+ info.head.block = AMDGPU_RAS_BLOCK__UMC;
+
+ ecc_log = &ras->umc_ecc_log;
+ ecc_log->de_queried_count = 0;
+ ecc_log->consumption_q_count = 0;
+
+ do {
+ ret = amdgpu_ras_query_error_status_with_event(adev, &info, type);
+ if (ret)
+ return ret;
+
+ de_queried_count = ecc_log->de_queried_count;
+ consumption_q_count = ecc_log->consumption_q_count;
+
+ if (de_queried_count && consumption_q_count)
+ break;
+
+ msleep(100);
+ } while (--timeout);
+
+ if (de_queried_count)
+ schedule_delayed_work(&ras->page_retirement_dwork, 0);
+
+ if (amdgpu_ras_is_rma(adev) && atomic_cmpxchg(&ras->rma_in_recovery, 0, 1) == 0)
+ amdgpu_ras_reset_gpu(adev);
+
+ return 0;
+}
+
+static void amdgpu_ras_clear_poison_fifo(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_poison_msg msg;
+ int ret;
+
+ do {
+ ret = kfifo_get(&con->poison_fifo, &msg);
+ } while (ret);
+}
+
+static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev,
+ uint32_t msg_count, uint32_t *gpu_reset)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ uint32_t reset_flags = 0, reset = 0;
+ struct ras_poison_msg msg;
+ int ret, i;
+
+ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+
+ for (i = 0; i < msg_count; i++) {
+ ret = amdgpu_ras_get_poison_req(adev, &msg);
+ if (!ret)
+ continue;
+
+ if (msg.pasid_fn)
+ msg.pasid_fn(adev, msg.pasid, msg.data);
+
+ reset_flags |= msg.reset;
+ }
+
+ /*
+ * Try to ensure poison creation handler is completed first
+ * to set rma if bad page exceed threshold.
+ */
+ flush_delayed_work(&con->page_retirement_dwork);
+
+ /* for RMA, amdgpu_ras_poison_creation_handler will trigger gpu reset */
+ if (reset_flags && !amdgpu_ras_is_rma(adev)) {
+ if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET)
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ else if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET)
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ else
+ reset = reset_flags;
+
+ con->gpu_reset_flags |= reset;
+ amdgpu_ras_reset_gpu(adev);
+
+ *gpu_reset = reset;
+
+ /* Wait for gpu recovery to complete */
+ flush_work(&con->recovery_work);
+ }
+
+ return 0;
+}
+
+static int amdgpu_ras_page_retirement_thread(void *param)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)param;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ uint32_t poison_creation_count, msg_count;
+ uint32_t gpu_reset;
+ int ret;
+
+ while (!kthread_should_stop()) {
+
+ wait_event_interruptible(con->page_retirement_wq,
+ kthread_should_stop() ||
+ atomic_read(&con->page_retirement_req_cnt));
+
+ if (kthread_should_stop())
+ break;
+
+ mutex_lock(&con->poison_lock);
+ gpu_reset = 0;
+
+ do {
+ poison_creation_count = atomic_read(&con->poison_creation_count);
+ ret = amdgpu_ras_poison_creation_handler(adev, poison_creation_count);
+ if (ret == -EIO)
+ break;
+
+ if (poison_creation_count) {
+ atomic_sub(poison_creation_count, &con->poison_creation_count);
+ atomic_sub(poison_creation_count, &con->page_retirement_req_cnt);
+ }
+ } while (atomic_read(&con->poison_creation_count) &&
+ !atomic_read(&con->poison_consumption_count));
+
+ if (ret != -EIO) {
+ msg_count = kfifo_len(&con->poison_fifo);
+ if (msg_count) {
+ ret = amdgpu_ras_poison_consumption_handler(adev,
+ msg_count, &gpu_reset);
+ if ((ret != -EIO) &&
+ (gpu_reset != AMDGPU_RAS_GPU_RESET_MODE1_RESET))
+ atomic_sub(msg_count, &con->page_retirement_req_cnt);
+ }
+ }
+
+ if ((ret == -EIO) || (gpu_reset == AMDGPU_RAS_GPU_RESET_MODE1_RESET)) {
+ /* gpu mode-1 reset is ongoing or just completed ras mode-1 reset */
+ /* Clear poison creation request */
+ atomic_set(&con->poison_creation_count, 0);
+ atomic_set(&con->poison_consumption_count, 0);
+
+ /* Clear poison fifo */
+ amdgpu_ras_clear_poison_fifo(adev);
+
+ /* Clear all poison requests */
+ atomic_set(&con->page_retirement_req_cnt, 0);
+
+ if (ret == -EIO) {
+ /* Wait for mode-1 reset to complete */
+ down_read(&adev->reset_domain->sem);
+ up_read(&adev->reset_domain->sem);
+ }
+
+ /* Wake up work to save bad pages to eeprom */
+ schedule_delayed_work(&con->page_retirement_dwork, 0);
+ } else if (gpu_reset) {
+ /* gpu just completed mode-2 reset or other reset */
+ /* Clear poison consumption messages cached in fifo */
+ msg_count = kfifo_len(&con->poison_fifo);
+ if (msg_count) {
+ amdgpu_ras_clear_poison_fifo(adev);
+ atomic_sub(msg_count, &con->page_retirement_req_cnt);
+ }
+
+ atomic_set(&con->poison_consumption_count, 0);
+
+ /* Wake up work to save bad pages to eeprom */
+ schedule_delayed_work(&con->page_retirement_dwork, 0);
+ }
+ mutex_unlock(&con->poison_lock);
+ }
+
+ return 0;
+}
+
+int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct amdgpu_ras_eeprom_control *control;
+ int ret;
+
+ if (!con || amdgpu_sriov_vf(adev))
+ return 0;
+
+ if (amdgpu_uniras_enabled(adev))
+ return 0;
+
+ control = &con->eeprom_control;
+ con->ras_smu_drv = amdgpu_dpm_get_ras_smu_driver(adev);
+
+ ret = amdgpu_ras_eeprom_init(control);
+ control->is_eeprom_valid = !ret;
+
+ if (!adev->umc.ras || !adev->umc.ras->convert_ras_err_addr)
+ control->ras_num_pa_recs = control->ras_num_recs;
+
+ if (adev->umc.ras &&
+ adev->umc.ras->get_retire_flip_bits)
+ adev->umc.ras->get_retire_flip_bits(adev);
+
+ if (control->ras_num_recs && control->is_eeprom_valid) {
+ ret = amdgpu_ras_load_bad_pages(adev);
+ if (ret) {
+ control->is_eeprom_valid = false;
+ return 0;
+ }
+
+ amdgpu_dpm_send_hbm_bad_pages_num(
+ adev, control->ras_num_bad_pages);
+
+ if (con->update_channel_flag == true) {
+ amdgpu_dpm_send_hbm_bad_channel_flag(
+ adev, control->bad_channel_bitmap);
+ con->update_channel_flag = false;
+ }
+
+ /* The format action is only applied to new ASICs */
+ if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) >= 12 &&
+ control->tbl_hdr.version < RAS_TABLE_VER_V3)
+ if (!amdgpu_ras_eeprom_reset_table(control))
+ if (amdgpu_ras_save_bad_pages(adev, NULL))
+ dev_warn(adev->dev, "Failed to format RAS EEPROM data in V3 version!\n");
+ }
+
+ return 0;
+}
+
+int amdgpu_ras_recovery_init(struct amdgpu_device *adev, bool init_bp_info)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data **data;
u32 max_eeprom_records_count = 0;
- bool exc_err_limit = false;
int ret;
- if (!con)
+ if (!con || amdgpu_sriov_vf(adev))
return 0;
/* Allow access to RAS EEPROM via debugfs, when the ASIC
@@ -2074,42 +3854,44 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
return 0;
data = &con->eh_data;
- *data = kmalloc(sizeof(**data), GFP_KERNEL | __GFP_ZERO);
+ *data = kzalloc(sizeof(**data), GFP_KERNEL);
if (!*data) {
ret = -ENOMEM;
goto out;
}
mutex_init(&con->recovery_lock);
+ mutex_init(&con->poison_lock);
INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery);
atomic_set(&con->in_recovery, 0);
+ atomic_set(&con->rma_in_recovery, 0);
+ con->eeprom_control.bad_channel_bitmap = 0;
- max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count();
+ max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count(&con->eeprom_control);
amdgpu_ras_validate_threshold(adev, max_eeprom_records_count);
- /* Todo: During test the SMU might fail to read the eeprom through I2C
- * when the GPU is pending on XGMI reset during probe time
- * (Mostly after second bus reset), skip it now
- */
- if (adev->gmc.xgmi.pending_reset)
- return 0;
- ret = amdgpu_ras_eeprom_init(&con->eeprom_control, &exc_err_limit);
- /*
- * This calling fails when exc_err_limit is true or
- * ret != 0.
- */
- if (exc_err_limit || ret)
- goto free;
-
- if (con->eeprom_control.ras_num_recs) {
- ret = amdgpu_ras_load_bad_pages(adev);
+ if (init_bp_info) {
+ ret = amdgpu_ras_init_badpage_info(adev);
if (ret)
goto free;
+ }
- if (adev->smu.ppt_funcs && adev->smu.ppt_funcs->send_hbm_bad_pages_num)
- adev->smu.ppt_funcs->send_hbm_bad_pages_num(&adev->smu, con->eeprom_control.ras_num_recs);
+ mutex_init(&con->page_rsv_lock);
+ INIT_KFIFO(con->poison_fifo);
+ mutex_init(&con->page_retirement_lock);
+ init_waitqueue_head(&con->page_retirement_wq);
+ atomic_set(&con->page_retirement_req_cnt, 0);
+ atomic_set(&con->poison_creation_count, 0);
+ atomic_set(&con->poison_consumption_count, 0);
+ con->page_retirement_thread =
+ kthread_run(amdgpu_ras_page_retirement_thread, adev, "umc_page_retirement");
+ if (IS_ERR(con->page_retirement_thread)) {
+ con->page_retirement_thread = NULL;
+ dev_warn(adev->dev, "Failed to create umc_page_retirement thread!!!\n");
}
+ INIT_DELAYED_WORK(&con->page_retirement_dwork, amdgpu_ras_do_page_retirement);
+ amdgpu_ras_ecc_log_init(&con->umc_ecc_log);
#ifdef CONFIG_X86_MCE_AMD
if ((adev->asic_type == CHIP_ALDEBARAN) &&
(adev->gmc.xgmi.connected_to_cpu))
@@ -2128,7 +3910,7 @@ out:
* Except error threshold exceeding case, other failure cases in this
* function would not fail amdgpu driver init.
*/
- if (!exc_err_limit)
+ if (!amdgpu_ras_is_rma(adev))
ret = 0;
else
ret = -EINVAL;
@@ -2140,25 +3922,75 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data = con->eh_data;
+ int max_flush_timeout = MAX_FLUSH_RETIRE_DWORK_TIMES;
+ bool ret;
/* recovery_init failed to init it, fini is useless */
if (!data)
return 0;
+ /* Save all cached bad pages to eeprom */
+ do {
+ flush_delayed_work(&con->page_retirement_dwork);
+ ret = amdgpu_ras_schedule_retirement_dwork(con, 0);
+ } while (ret && max_flush_timeout--);
+
+ if (con->page_retirement_thread)
+ kthread_stop(con->page_retirement_thread);
+
+ atomic_set(&con->page_retirement_req_cnt, 0);
+ atomic_set(&con->poison_creation_count, 0);
+
+ mutex_destroy(&con->page_rsv_lock);
+
cancel_work_sync(&con->recovery_work);
+ cancel_delayed_work_sync(&con->page_retirement_dwork);
+
+ amdgpu_ras_ecc_log_fini(&con->umc_ecc_log);
+
mutex_lock(&con->recovery_lock);
con->eh_data = NULL;
kfree(data->bps);
kfree(data);
mutex_unlock(&con->recovery_lock);
+ amdgpu_ras_critical_region_init(adev);
+#ifdef CONFIG_X86_MCE_AMD
+ amdgpu_unregister_bad_pages_mca_notifier(adev);
+#endif
return 0;
}
/* recovery end */
static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
{
+ if (amdgpu_sriov_vf(adev)) {
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ if (adev->asic_type == CHIP_IP_DISCOVERY) {
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(13, 0, 0):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
+ case IP_VERSION(14, 0, 3):
+ return true;
+ default:
+ return false;
+ }
+ }
+
return adev->asic_type == CHIP_VEGA10 ||
adev->asic_type == CHIP_VEGA20 ||
adev->asic_type == CHIP_ARCTURUS ||
@@ -2178,13 +4010,95 @@ static void amdgpu_ras_get_quirks(struct amdgpu_device *adev)
if (!ctx)
return;
- if (strnstr(ctx->vbios_version, "D16406",
- sizeof(ctx->vbios_version)) ||
- strnstr(ctx->vbios_version, "D36002",
- sizeof(ctx->vbios_version)))
+ if (strnstr(ctx->vbios_pn, "D16406",
+ sizeof(ctx->vbios_pn)) ||
+ strnstr(ctx->vbios_pn, "D36002",
+ sizeof(ctx->vbios_pn)))
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX);
}
+/* Query ras capablity via atomfirmware interface */
+static void amdgpu_ras_query_ras_capablity_from_vbios(struct amdgpu_device *adev)
+{
+ /* mem_ecc cap */
+ if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
+ dev_info(adev->dev, "MEM ECC is active.\n");
+ adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC |
+ 1 << AMDGPU_RAS_BLOCK__DF);
+ } else {
+ dev_info(adev->dev, "MEM ECC is not presented.\n");
+ }
+
+ /* sram_ecc cap */
+ if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
+ dev_info(adev->dev, "SRAM ECC is active.\n");
+ if (!amdgpu_sriov_vf(adev))
+ adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
+ 1 << AMDGPU_RAS_BLOCK__DF);
+ else
+ adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__PCIE_BIF |
+ 1 << AMDGPU_RAS_BLOCK__SDMA |
+ 1 << AMDGPU_RAS_BLOCK__GFX);
+
+ /*
+ * VCN/JPEG RAS can be supported on both bare metal and
+ * SRIOV environment
+ */
+ if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(2, 6, 0) ||
+ amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 0) ||
+ amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 3) ||
+ amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(5, 0, 1))
+ adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |
+ 1 << AMDGPU_RAS_BLOCK__JPEG);
+ else
+ adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN |
+ 1 << AMDGPU_RAS_BLOCK__JPEG);
+
+ /*
+ * XGMI RAS is not supported if xgmi num physical nodes
+ * is zero
+ */
+ if (!adev->gmc.xgmi.num_physical_nodes)
+ adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__XGMI_WAFL);
+ } else {
+ dev_info(adev->dev, "SRAM ECC is not presented.\n");
+ }
+}
+
+/* Query poison mode from umc/df IP callbacks */
+static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ bool df_poison, umc_poison;
+
+ /* poison setting is useless on SRIOV guest */
+ if (amdgpu_sriov_vf(adev) || !con)
+ return;
+
+ /* Init poison supported flag, the default value is false */
+ if (adev->gmc.xgmi.connected_to_cpu ||
+ adev->gmc.is_app_apu) {
+ /* enabled by default when GPU is connected to CPU */
+ con->poison_supported = true;
+ } else if (adev->df.funcs &&
+ adev->df.funcs->query_ras_poison_mode &&
+ adev->umc.ras &&
+ adev->umc.ras->query_ras_poison_mode) {
+ df_poison =
+ adev->df.funcs->query_ras_poison_mode(adev);
+ umc_poison =
+ adev->umc.ras->query_ras_poison_mode(adev);
+
+ /* Only poison is set in both DF and UMC, we can support it */
+ if (df_poison && umc_poison)
+ con->poison_supported = true;
+ else if (df_poison != umc_poison)
+ dev_warn(adev->dev,
+ "Poison setting is inconsistent in DF/UMC(%d:%d)!\n",
+ df_poison, umc_poison);
+ }
+}
+
/*
* check hardware's ras ability which will be saved in hw_supported.
* if hardware does not support ras, we can skip some ras initializtion and
@@ -2198,26 +4112,21 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
{
adev->ras_hw_enabled = adev->ras_enabled = 0;
- if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw ||
- !amdgpu_ras_asic_supported(adev))
+ if (!amdgpu_ras_asic_supported(adev))
return;
- if (!adev->gmc.xgmi.connected_to_cpu) {
- if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
- dev_info(adev->dev, "MEM ECC is active.\n");
- adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC |
- 1 << AMDGPU_RAS_BLOCK__DF);
- } else {
- dev_info(adev->dev, "MEM ECC is not presented.\n");
- }
+ if (amdgpu_sriov_vf(adev)) {
+ if (amdgpu_virt_get_ras_capability(adev))
+ goto init_ras_enabled_flag;
+ }
- if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
- dev_info(adev->dev, "SRAM ECC is active.\n");
- adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
- 1 << AMDGPU_RAS_BLOCK__DF);
- } else {
- dev_info(adev->dev, "SRAM ECC is not presented.\n");
- }
+ /* query ras capability from psp */
+ if (amdgpu_psp_get_ras_capability(&adev->psp))
+ goto init_ras_enabled_flag;
+
+ /* query ras capablity from bios */
+ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
+ amdgpu_ras_query_ras_capablity_from_vbios(adev);
} else {
/* driver only manages a few IP blocks RAS feature
* when GPU is connected cpu through XGMI */
@@ -2226,13 +4135,31 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
1 << AMDGPU_RAS_BLOCK__MMHUB);
}
+ /* apply asic specific settings (vega20 only for now) */
amdgpu_ras_get_quirks(adev);
+ /* query poison mode from umc/df ip callback */
+ amdgpu_ras_query_poison_mode(adev);
+
+init_ras_enabled_flag:
/* hw_supported needs to be aligned with RAS block mask. */
adev->ras_hw_enabled &= AMDGPU_RAS_BLOCK_MASK;
adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 :
adev->ras_hw_enabled & amdgpu_ras_mask;
+
+ /* aca is disabled by default except for psp v13_0_6/v13_0_12/v13_0_14 */
+ if (!amdgpu_sriov_vf(adev)) {
+ adev->aca.is_enabled =
+ (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14));
+ }
+
+ /* bad page feature is not applicable to specific app platform */
+ if (adev->gmc.is_app_apu &&
+ amdgpu_ip_version(adev, UMC_HWIP, 0) == IP_VERSION(12, 0, 0))
+ amdgpu_bad_page_threshold = 0;
}
static void amdgpu_ras_counte_dw(struct work_struct *work)
@@ -2250,29 +4177,92 @@ static void amdgpu_ras_counte_dw(struct work_struct *work)
/* Cache new values.
*/
- if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count) == 0) {
+ if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, NULL) == 0) {
atomic_set(&con->ras_ce_count, ce_count);
atomic_set(&con->ras_ue_count, ue_count);
}
- pm_runtime_mark_last_busy(dev->dev);
Out:
pm_runtime_put_autosuspend(dev->dev);
}
+static int amdgpu_get_ras_schema(struct amdgpu_device *adev)
+{
+ return amdgpu_ras_is_poison_mode_supported(adev) ? AMDGPU_RAS_ERROR__POISON : 0 |
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE |
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE |
+ AMDGPU_RAS_ERROR__PARITY;
+}
+
+static void ras_event_mgr_init(struct ras_event_manager *mgr)
+{
+ struct ras_event_state *event_state;
+ int i;
+
+ memset(mgr, 0, sizeof(*mgr));
+ atomic64_set(&mgr->seqno, 0);
+
+ for (i = 0; i < ARRAY_SIZE(mgr->event_state); i++) {
+ event_state = &mgr->event_state[i];
+ event_state->last_seqno = RAS_EVENT_INVALID_ID;
+ atomic64_set(&event_state->count, 0);
+ }
+}
+
+static void amdgpu_ras_event_mgr_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ struct amdgpu_hive_info *hive;
+
+ if (!ras)
+ return;
+
+ hive = amdgpu_get_xgmi_hive(adev);
+ ras->event_mgr = hive ? &hive->event_mgr : &ras->__event_mgr;
+
+ /* init event manager with node 0 on xgmi system */
+ if (!amdgpu_reset_in_recovery(adev)) {
+ if (!hive || adev->gmc.xgmi.node_id == 0)
+ ras_event_mgr_init(ras->event_mgr);
+ }
+
+ if (hive)
+ amdgpu_put_xgmi_hive(hive);
+}
+
+static void amdgpu_ras_init_reserved_vram_size(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ if (!con || (adev->flags & AMD_IS_APU))
+ return;
+
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
+ con->reserved_pages_in_bytes = AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT;
+ break;
+ case IP_VERSION(13, 0, 14):
+ con->reserved_pages_in_bytes = (AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT << 1);
+ break;
+ default:
+ break;
+ }
+}
+
int amdgpu_ras_init(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
int r;
- bool df_poison, umc_poison;
if (con)
return 0;
- con = kmalloc(sizeof(struct amdgpu_ras) +
+ con = kzalloc(sizeof(*con) +
sizeof(struct ras_manager) * AMDGPU_RAS_BLOCK_COUNT +
sizeof(struct ras_manager) * AMDGPU_RAS_MCA_BLOCK_COUNT,
- GFP_KERNEL|__GFP_ZERO);
+ GFP_KERNEL);
if (!con)
return -ENOMEM;
@@ -2301,7 +4291,9 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
goto release_con;
}
+ con->update_channel_flag = false;
con->features = 0;
+ con->schema = 0;
INIT_LIST_HEAD(&con->head);
/* Might need get this flag from vbios. */
con->flags = RAS_DEFAULT_FLAGS;
@@ -2309,54 +4301,95 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
/* initialize nbio ras function ahead of any other
* ras functions so hardware fatal error interrupt
* can be enabled as early as possible */
- switch (adev->asic_type) {
- case CHIP_VEGA20:
- case CHIP_ARCTURUS:
- case CHIP_ALDEBARAN:
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(7, 4, 0):
+ case IP_VERSION(7, 4, 1):
+ case IP_VERSION(7, 4, 4):
if (!adev->gmc.xgmi.connected_to_cpu)
- adev->nbio.ras_funcs = &nbio_v7_4_ras_funcs;
+ adev->nbio.ras = &nbio_v7_4_ras;
+ break;
+ case IP_VERSION(4, 3, 0):
+ if (adev->ras_hw_enabled & (1 << AMDGPU_RAS_BLOCK__DF))
+ /* unlike other generation of nbio ras,
+ * nbio v4_3 only support fatal error interrupt
+ * to inform software that DF is freezed due to
+ * system fatal error event. driver should not
+ * enable nbio ras in such case. Instead,
+ * check DF RAS */
+ adev->nbio.ras = &nbio_v4_3_ras;
+ break;
+ case IP_VERSION(6, 3, 1):
+ if (adev->ras_hw_enabled & (1 << AMDGPU_RAS_BLOCK__DF))
+ /* unlike other generation of nbio ras,
+ * nbif v6_3_1 only support fatal error interrupt
+ * to inform software that DF is freezed due to
+ * system fatal error event. driver should not
+ * enable nbio ras in such case. Instead,
+ * check DF RAS
+ */
+ adev->nbio.ras = &nbif_v6_3_1_ras;
+ break;
+ case IP_VERSION(7, 9, 0):
+ case IP_VERSION(7, 9, 1):
+ if (!adev->gmc.is_app_apu)
+ adev->nbio.ras = &nbio_v7_9_ras;
break;
default:
/* nbio ras is not available */
break;
}
- if (adev->nbio.ras_funcs &&
- adev->nbio.ras_funcs->init_ras_controller_interrupt) {
- r = adev->nbio.ras_funcs->init_ras_controller_interrupt(adev);
+ /* nbio ras block needs to be enabled ahead of other ras blocks
+ * to handle fatal error */
+ r = amdgpu_nbio_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ if (adev->nbio.ras &&
+ adev->nbio.ras->init_ras_controller_interrupt) {
+ r = adev->nbio.ras->init_ras_controller_interrupt(adev);
if (r)
goto release_con;
}
- if (adev->nbio.ras_funcs &&
- adev->nbio.ras_funcs->init_ras_err_event_athub_interrupt) {
- r = adev->nbio.ras_funcs->init_ras_err_event_athub_interrupt(adev);
+ if (adev->nbio.ras &&
+ adev->nbio.ras->init_ras_err_event_athub_interrupt) {
+ r = adev->nbio.ras->init_ras_err_event_athub_interrupt(adev);
if (r)
goto release_con;
}
- /* Init poison supported flag, the default value is false */
- if (adev->df.funcs &&
- adev->df.funcs->query_ras_poison_mode &&
- adev->umc.ras_funcs &&
- adev->umc.ras_funcs->query_ras_poison_mode) {
- df_poison =
- adev->df.funcs->query_ras_poison_mode(adev);
- umc_poison =
- adev->umc.ras_funcs->query_ras_poison_mode(adev);
- /* Only poison is set in both DF and UMC, we can support it */
- if (df_poison && umc_poison)
- con->poison_supported = true;
- else if (df_poison != umc_poison)
- dev_warn(adev->dev, "Poison setting is inconsistent in DF/UMC(%d:%d)!\n",
- df_poison, umc_poison);
- }
+ /* Packed socket_id to ras feature mask bits[31:29] */
+ if (adev->smuio.funcs &&
+ adev->smuio.funcs->get_socket_id)
+ con->features |= ((adev->smuio.funcs->get_socket_id(adev)) <<
+ AMDGPU_RAS_FEATURES_SOCKETID_SHIFT);
+
+ /* Get RAS schema for particular SOC */
+ con->schema = amdgpu_get_ras_schema(adev);
+
+ amdgpu_ras_init_reserved_vram_size(adev);
if (amdgpu_ras_fs_init(adev)) {
r = -EINVAL;
goto release_con;
}
+ if (amdgpu_ras_aca_is_supported(adev)) {
+ if (amdgpu_aca_is_enabled(adev))
+ r = amdgpu_aca_init(adev);
+ else
+ r = amdgpu_mca_init(adev);
+ if (r)
+ goto release_con;
+ }
+
+ con->init_task_pid = task_pid_nr(current);
+ get_task_comm(con->init_task_comm, current);
+
+ mutex_init(&con->critical_region_lock);
+ INIT_LIST_HEAD(&con->critical_region_head);
+
dev_info(adev->dev, "RAS INFO: ras initialized successfully, "
"hardware ability[%x] ras_mask[%x]\n",
adev->ras_hw_enabled, adev->ras_enabled);
@@ -2371,7 +4404,8 @@ release_con:
int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev)
{
- if (adev->gmc.xgmi.connected_to_cpu)
+ if (adev->gmc.xgmi.connected_to_cpu ||
+ adev->gmc.is_app_apu)
return 1;
return 0;
}
@@ -2406,12 +4440,12 @@ bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev)
}
/* helper function to handle common stuff in ip late init phase */
-int amdgpu_ras_late_init(struct amdgpu_device *adev,
- struct ras_common_if *ras_block,
- struct ras_fs_if *fs_info,
- struct ras_ih_if *ih_info)
+int amdgpu_ras_block_late_init(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block)
{
+ struct amdgpu_ras_block_object *ras_obj = NULL;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_query_if *query_info;
unsigned long ue_count, ce_count;
int r;
@@ -2423,7 +4457,7 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
r = amdgpu_ras_feature_enable_on_boot(adev, ras_block, 1);
if (r) {
- if (adev->in_suspend || amdgpu_in_reset(adev)) {
+ if (adev->in_suspend || amdgpu_reset_in_recovery(adev)) {
/* in resume phase, if fail to enable ras,
* clean up all ras fs nodes, and disable ras */
goto cleanup;
@@ -2435,49 +4469,75 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
amdgpu_persistent_edc_harvesting(adev, ras_block);
/* in resume phase, no need to create ras fs node */
- if (adev->in_suspend || amdgpu_in_reset(adev))
+ if (adev->in_suspend || amdgpu_reset_in_recovery(adev))
return 0;
- if (ih_info->cb) {
- r = amdgpu_ras_interrupt_add_handler(adev, ih_info);
+ ras_obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm);
+ if (ras_obj->ras_cb || (ras_obj->hw_ops &&
+ (ras_obj->hw_ops->query_poison_status ||
+ ras_obj->hw_ops->handle_poison_consumption))) {
+ r = amdgpu_ras_interrupt_add_handler(adev, ras_block);
if (r)
- goto interrupt;
+ goto cleanup;
}
- r = amdgpu_ras_sysfs_create(adev, fs_info);
- if (r)
- goto sysfs;
+ if (ras_obj->hw_ops &&
+ (ras_obj->hw_ops->query_ras_error_count ||
+ ras_obj->hw_ops->query_ras_error_status)) {
+ r = amdgpu_ras_sysfs_create(adev, ras_block);
+ if (r)
+ goto interrupt;
- /* Those are the cached values at init.
- */
- if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count) == 0) {
- atomic_set(&con->ras_ce_count, ce_count);
- atomic_set(&con->ras_ue_count, ue_count);
+ /* Those are the cached values at init.
+ */
+ query_info = kzalloc(sizeof(*query_info), GFP_KERNEL);
+ if (!query_info)
+ return -ENOMEM;
+ memcpy(&query_info->head, ras_block, sizeof(struct ras_common_if));
+
+ if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, query_info) == 0) {
+ atomic_set(&con->ras_ce_count, ce_count);
+ atomic_set(&con->ras_ue_count, ue_count);
+ }
+
+ kfree(query_info);
}
return 0;
-cleanup:
- amdgpu_ras_sysfs_remove(adev, ras_block);
-sysfs:
- if (ih_info->cb)
- amdgpu_ras_interrupt_remove_handler(adev, ih_info);
+
interrupt:
+ if (ras_obj->ras_cb)
+ amdgpu_ras_interrupt_remove_handler(adev, ras_block);
+cleanup:
amdgpu_ras_feature_enable(adev, ras_block, 0);
return r;
}
+static int amdgpu_ras_block_late_init_default(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block)
+{
+ return amdgpu_ras_block_late_init(adev, ras_block);
+}
+
/* helper function to remove ras fs node and interrupt handler */
-void amdgpu_ras_late_fini(struct amdgpu_device *adev,
- struct ras_common_if *ras_block,
- struct ras_ih_if *ih_info)
+void amdgpu_ras_block_late_fini(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block)
{
- if (!ras_block || !ih_info)
+ struct amdgpu_ras_block_object *ras_obj;
+ if (!ras_block)
return;
amdgpu_ras_sysfs_remove(adev, ras_block);
- if (ih_info->cb)
- amdgpu_ras_interrupt_remove_handler(adev, ih_info);
- amdgpu_ras_feature_enable(adev, ras_block, 0);
+
+ ras_obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm);
+ if (ras_obj->ras_cb)
+ amdgpu_ras_interrupt_remove_handler(adev, ras_block);
+}
+
+static void amdgpu_ras_block_late_fini_default(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block)
+{
+ return amdgpu_ras_block_late_fini(adev, ras_block);
}
/* do some init work after IP late init as dependence.
@@ -2526,10 +4586,64 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev)
amdgpu_ras_disable_all_features(adev, 0);
/* Make sure all ras objects are disabled. */
- if (con->features)
+ if (AMDGPU_RAS_GET_FEATURES(con->features))
amdgpu_ras_disable_all_features(adev, 1);
}
+int amdgpu_ras_late_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras_block_list *node, *tmp;
+ struct amdgpu_ras_block_object *obj;
+ int r;
+
+ amdgpu_ras_event_mgr_init(adev);
+
+ if (amdgpu_ras_aca_is_supported(adev)) {
+ if (amdgpu_reset_in_recovery(adev)) {
+ if (amdgpu_aca_is_enabled(adev))
+ r = amdgpu_aca_reset(adev);
+ else
+ r = amdgpu_mca_reset(adev);
+ if (r)
+ return r;
+ }
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if (amdgpu_aca_is_enabled(adev))
+ amdgpu_ras_set_aca_debug_mode(adev, false);
+ else
+ amdgpu_ras_set_mca_debug_mode(adev, false);
+ }
+ }
+
+ /* Guest side doesn't need init ras feature */
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_ras_telemetry_en(adev))
+ return 0;
+
+ list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
+ obj = node->ras_obj;
+ if (!obj) {
+ dev_warn(adev->dev, "Warning: abnormal ras list node.\n");
+ continue;
+ }
+
+ if (!amdgpu_ras_is_supported(adev, obj->ras_comm.block))
+ continue;
+
+ if (obj->ras_late_init) {
+ r = obj->ras_late_init(adev, &obj->ras_comm);
+ if (r) {
+ dev_err(adev->dev, "%s failed to execute ras_late_init! ret:%d\n",
+ obj->ras_comm.name, r);
+ return r;
+ }
+ } else
+ amdgpu_ras_block_late_init_default(adev, &obj->ras_comm);
+ }
+
+ return 0;
+}
+
/* do some fini work before IP fini as dependence */
int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
{
@@ -2540,25 +4654,53 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
/* Need disable ras on all IPs here before ip [hw/sw]fini */
- amdgpu_ras_disable_all_features(adev, 0);
+ if (AMDGPU_RAS_GET_FEATURES(con->features))
+ amdgpu_ras_disable_all_features(adev, 0);
amdgpu_ras_recovery_fini(adev);
return 0;
}
int amdgpu_ras_fini(struct amdgpu_device *adev)
{
+ struct amdgpu_ras_block_list *ras_node, *tmp;
+ struct amdgpu_ras_block_object *obj = NULL;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
if (!adev->ras_enabled || !con)
return 0;
+ amdgpu_ras_critical_region_fini(adev);
+ mutex_destroy(&con->critical_region_lock);
+
+ list_for_each_entry_safe(ras_node, tmp, &adev->ras_list, node) {
+ if (ras_node->ras_obj) {
+ obj = ras_node->ras_obj;
+ if (amdgpu_ras_is_supported(adev, obj->ras_comm.block) &&
+ obj->ras_fini)
+ obj->ras_fini(adev, &obj->ras_comm);
+ else
+ amdgpu_ras_block_late_fini_default(adev, &obj->ras_comm);
+ }
+
+ /* Clear ras blocks from ras_list and free ras block list node */
+ list_del(&ras_node->node);
+ kfree(ras_node);
+ }
+
amdgpu_ras_fs_fini(adev);
amdgpu_ras_interrupt_remove_all(adev);
- WARN(con->features, "Feature mask is not cleared");
+ if (amdgpu_ras_aca_is_supported(adev)) {
+ if (amdgpu_aca_is_enabled(adev))
+ amdgpu_aca_fini(adev);
+ else
+ amdgpu_mca_fini(adev);
+ }
- if (con->features)
- amdgpu_ras_disable_all_features(adev, 1);
+ WARN(AMDGPU_RAS_GET_FEATURES(con->features), "Feature mask is not cleared");
+
+ if (AMDGPU_RAS_GET_FEATURES(con->features))
+ amdgpu_ras_disable_all_features(adev, 0);
cancel_delayed_work_sync(&con->ras_counte_delay_work);
@@ -2568,18 +4710,161 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
return 0;
}
-void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
+bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev)
{
- amdgpu_ras_check_supported(adev);
- if (!adev->ras_hw_enabled)
- return;
+ struct amdgpu_ras *ras;
+
+ ras = amdgpu_ras_get_context(adev);
+ if (!ras)
+ return false;
+
+ return test_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state);
+}
+
+void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status)
+{
+ struct amdgpu_ras *ras;
+
+ ras = amdgpu_ras_get_context(adev);
+ if (ras) {
+ if (status)
+ set_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state);
+ else
+ clear_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state);
+ }
+}
+
+void amdgpu_ras_clear_err_state(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *ras;
+
+ ras = amdgpu_ras_get_context(adev);
+ if (ras) {
+ ras->ras_err_state = 0;
+ ras->gpu_reset_flags = 0;
+ }
+}
+void amdgpu_ras_set_err_poison(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block)
+{
+ struct amdgpu_ras *ras;
+
+ ras = amdgpu_ras_get_context(adev);
+ if (ras)
+ set_bit(block, &ras->ras_err_state);
+}
+
+bool amdgpu_ras_is_err_state(struct amdgpu_device *adev, int block)
+{
+ struct amdgpu_ras *ras;
+
+ ras = amdgpu_ras_get_context(adev);
+ if (ras) {
+ if (block == AMDGPU_RAS_BLOCK__ANY)
+ return (ras->ras_err_state != 0);
+ else
+ return test_bit(block, &ras->ras_err_state) ||
+ test_bit(AMDGPU_RAS_BLOCK__LAST,
+ &ras->ras_err_state);
+ }
+
+ return false;
+}
+
+static struct ras_event_manager *__get_ras_event_mgr(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *ras;
+
+ ras = amdgpu_ras_get_context(adev);
+ if (!ras)
+ return NULL;
+
+ return ras->event_mgr;
+}
+
+int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_type type,
+ const void *caller)
+{
+ struct ras_event_manager *event_mgr;
+ struct ras_event_state *event_state;
+ int ret = 0;
+
+ if (amdgpu_uniras_enabled(adev))
+ return 0;
+
+ if (type >= RAS_EVENT_TYPE_COUNT) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ event_mgr = __get_ras_event_mgr(adev);
+ if (!event_mgr) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ event_state = &event_mgr->event_state[type];
+ event_state->last_seqno = atomic64_inc_return(&event_mgr->seqno);
+ atomic64_inc(&event_state->count);
+
+out:
+ if (ret && caller)
+ dev_warn(adev->dev, "failed mark ras event (%d) in %ps, ret:%d\n",
+ (int)type, caller, ret);
+
+ return ret;
+}
+
+u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type)
+{
+ struct ras_event_manager *event_mgr;
+ u64 id;
+
+ if (type >= RAS_EVENT_TYPE_COUNT)
+ return RAS_EVENT_INVALID_ID;
+
+ switch (type) {
+ case RAS_EVENT_TYPE_FATAL:
+ case RAS_EVENT_TYPE_POISON_CREATION:
+ case RAS_EVENT_TYPE_POISON_CONSUMPTION:
+ event_mgr = __get_ras_event_mgr(adev);
+ if (!event_mgr)
+ return RAS_EVENT_INVALID_ID;
+
+ id = event_mgr->event_state[type].last_seqno;
+ break;
+ case RAS_EVENT_TYPE_INVALID:
+ default:
+ id = RAS_EVENT_INVALID_ID;
+ break;
+ }
+
+ return id;
+}
+
+int amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
+{
if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) {
- dev_info(adev->dev, "uncorrectable hardware error"
- "(ERREVENT_ATHUB_INTERRUPT) detected!\n");
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ enum ras_event_type type = RAS_EVENT_TYPE_FATAL;
+ u64 event_id = RAS_EVENT_INVALID_ID;
+
+ if (amdgpu_uniras_enabled(adev))
+ return 0;
+ if (!amdgpu_ras_mark_ras_event(adev, type))
+ event_id = amdgpu_ras_acquire_event_id(adev, type);
+
+ RAS_EVENT_LOG(adev, event_id, "uncorrectable hardware error"
+ "(ERREVENT_ATHUB_INTERRUPT) detected!\n");
+
+ amdgpu_ras_set_fed(adev, true);
+ ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET;
amdgpu_ras_reset_gpu(adev);
}
+
+ return -EBUSY;
}
bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev)
@@ -2636,18 +4921,14 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
struct mce *m = (struct mce *)data;
struct amdgpu_device *adev = NULL;
uint32_t gpu_id = 0;
- uint32_t umc_inst = 0;
- uint32_t ch_inst, channel_index = 0;
- struct ras_err_data err_data = {0, 0, 0, NULL};
- struct eeprom_table_record err_rec;
- uint64_t retired_page;
+ uint32_t umc_inst = 0, ch_inst = 0;
/*
* If the error was generated in UMC_V2, which belongs to GPU UMCs,
* and error occurred in DramECC (Extended error code = 0) then only
* process the error, else bail out.
*/
- if (!m || !((smca_get_bank_type(m->bank) == SMCA_UMC_V2) &&
+ if (!m || !((smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC_V2) &&
(XEC(m->status, 0x3f) == 0x0)))
return NOTIFY_DONE;
@@ -2679,37 +4960,10 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d",
umc_inst, ch_inst);
- memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
-
- /*
- * Translate UMC channel address to Physical address
- */
- channel_index =
- adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num
- + ch_inst];
-
- retired_page = ADDR_OF_8KB_BLOCK(m->addr) |
- ADDR_OF_256B_BLOCK(channel_index) |
- OFFSET_IN_256B_BLOCK(m->addr);
-
- err_rec.address = m->addr;
- err_rec.retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
- err_rec.ts = (uint64_t)ktime_get_real_seconds();
- err_rec.err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
- err_rec.cu = 0;
- err_rec.mem_channel = channel_index;
- err_rec.mcumc_id = umc_inst;
-
- err_data.err_addr = &err_rec;
- err_data.err_addr_cnt = 1;
-
- if (amdgpu_bad_page_threshold != 0) {
- amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
- err_data.err_addr_cnt);
- amdgpu_ras_save_bad_pages(adev);
- }
-
- return NOTIFY_OK;
+ if (!amdgpu_umc_page_retirement_mca(adev, m->addr, ch_inst, umc_inst))
+ return NOTIFY_OK;
+ else
+ return NOTIFY_DONE;
}
static struct notifier_block amdgpu_bad_page_nb = {
@@ -2738,4 +4992,757 @@ static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev)
notifier_registered = true;
}
}
+static void amdgpu_unregister_bad_pages_mca_notifier(struct amdgpu_device *adev)
+{
+ int i, j;
+
+ if (!notifier_registered && !mce_adev_list.num_gpu)
+ return;
+ for (i = 0, j = 0; i < mce_adev_list.num_gpu; i++) {
+ if (mce_adev_list.devs[i] == adev)
+ mce_adev_list.devs[i] = NULL;
+ if (!mce_adev_list.devs[i])
+ ++j;
+ }
+
+ if (j == mce_adev_list.num_gpu) {
+ mce_adev_list.num_gpu = 0;
+ /* Unregister x86 notifier with MCE subsystem. */
+ if (notifier_registered) {
+ mce_unregister_decode_chain(&amdgpu_bad_page_nb);
+ notifier_registered = false;
+ }
+ }
+}
#endif
+
+struct amdgpu_ras *amdgpu_ras_get_context(struct amdgpu_device *adev)
+{
+ if (!adev)
+ return NULL;
+
+ return adev->psp.ras_context.ras;
+}
+
+int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con)
+{
+ if (!adev)
+ return -EINVAL;
+
+ adev->psp.ras_context.ras = ras_con;
+ return 0;
+}
+
+/* check if ras is supported on block, say, sdma, gfx */
+int amdgpu_ras_is_supported(struct amdgpu_device *adev,
+ unsigned int block)
+{
+ int ret = 0;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ if (block >= AMDGPU_RAS_BLOCK_COUNT)
+ return 0;
+
+ ret = ras && (adev->ras_enabled & (1 << block));
+
+ /* For the special asic with mem ecc enabled but sram ecc
+ * not enabled, even if the ras block is not supported on
+ * .ras_enabled, if the asic supports poison mode and the
+ * ras block has ras configuration, it can be considered
+ * that the ras block supports ras function.
+ */
+ if (!ret &&
+ (block == AMDGPU_RAS_BLOCK__GFX ||
+ block == AMDGPU_RAS_BLOCK__SDMA ||
+ block == AMDGPU_RAS_BLOCK__VCN ||
+ block == AMDGPU_RAS_BLOCK__JPEG) &&
+ (amdgpu_ras_mask & (1 << block)) &&
+ amdgpu_ras_is_poison_mode_supported(adev) &&
+ amdgpu_ras_get_ras_block(adev, block, 0))
+ ret = 1;
+
+ return ret;
+}
+
+int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ /* mode1 is the only selection for RMA status */
+ if (amdgpu_ras_is_rma(adev)) {
+ ras->gpu_reset_flags = 0;
+ ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ }
+
+ if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) {
+ struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+ int hive_ras_recovery = 0;
+
+ if (hive) {
+ hive_ras_recovery = atomic_read(&hive->ras_recovery);
+ amdgpu_put_xgmi_hive(hive);
+ }
+ /* In the case of multiple GPUs, after a GPU has started
+ * resetting all GPUs on hive, other GPUs do not need to
+ * trigger GPU reset again.
+ */
+ if (!hive_ras_recovery)
+ amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work);
+ else
+ atomic_set(&ras->in_recovery, 0);
+ } else {
+ flush_work(&ras->recovery_work);
+ amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work);
+ }
+
+ return 0;
+}
+
+int amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ int ret = 0;
+
+ if (con) {
+ ret = amdgpu_mca_smu_set_debug_mode(adev, enable);
+ if (!ret)
+ con->is_aca_debug_mode = enable;
+ }
+
+ return ret;
+}
+
+int amdgpu_ras_set_aca_debug_mode(struct amdgpu_device *adev, bool enable)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ int ret = 0;
+
+ if (con) {
+ if (amdgpu_aca_is_enabled(adev))
+ ret = amdgpu_aca_smu_set_debug_mode(adev, enable);
+ else
+ ret = amdgpu_mca_smu_set_debug_mode(adev, enable);
+ if (!ret)
+ con->is_aca_debug_mode = enable;
+ }
+
+ return ret;
+}
+
+bool amdgpu_ras_get_aca_debug_mode(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs;
+ const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+
+ if (!con)
+ return false;
+
+ if ((amdgpu_aca_is_enabled(adev) && smu_funcs && smu_funcs->set_debug_mode) ||
+ (!amdgpu_aca_is_enabled(adev) && mca_funcs && mca_funcs->mca_set_debug_mode))
+ return con->is_aca_debug_mode;
+ else
+ return true;
+}
+
+bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev,
+ unsigned int *error_query_mode)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+ const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs;
+
+ if (!con) {
+ *error_query_mode = AMDGPU_RAS_INVALID_ERROR_QUERY;
+ return false;
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ *error_query_mode = AMDGPU_RAS_VIRT_ERROR_COUNT_QUERY;
+ } else if ((smu_funcs && smu_funcs->set_debug_mode) || (mca_funcs && mca_funcs->mca_set_debug_mode)) {
+ *error_query_mode =
+ (con->is_aca_debug_mode) ? AMDGPU_RAS_DIRECT_ERROR_QUERY : AMDGPU_RAS_FIRMWARE_ERROR_QUERY;
+ } else {
+ *error_query_mode = AMDGPU_RAS_DIRECT_ERROR_QUERY;
+ }
+
+ return true;
+}
+
+/* Register each ip ras block into amdgpu ras */
+int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
+ struct amdgpu_ras_block_object *ras_block_obj)
+{
+ struct amdgpu_ras_block_list *ras_node;
+ if (!adev || !ras_block_obj)
+ return -EINVAL;
+
+ ras_node = kzalloc(sizeof(*ras_node), GFP_KERNEL);
+ if (!ras_node)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&ras_node->node);
+ ras_node->ras_obj = ras_block_obj;
+ list_add_tail(&ras_node->node, &adev->ras_list);
+
+ return 0;
+}
+
+void amdgpu_ras_get_error_type_name(uint32_t err_type, char *err_type_name)
+{
+ if (!err_type_name)
+ return;
+
+ switch (err_type) {
+ case AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE:
+ sprintf(err_type_name, "correctable");
+ break;
+ case AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE:
+ sprintf(err_type_name, "uncorrectable");
+ break;
+ default:
+ sprintf(err_type_name, "unknown");
+ break;
+ }
+}
+
+bool amdgpu_ras_inst_get_memory_id_field(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_entry,
+ uint32_t instance,
+ uint32_t *memory_id)
+{
+ uint32_t err_status_lo_data, err_status_lo_offset;
+
+ if (!reg_entry)
+ return false;
+
+ err_status_lo_offset =
+ AMDGPU_RAS_REG_ENTRY_OFFSET(reg_entry->hwip, instance,
+ reg_entry->seg_lo, reg_entry->reg_lo);
+ err_status_lo_data = RREG32(err_status_lo_offset);
+
+ if ((reg_entry->flags & AMDGPU_RAS_ERR_STATUS_VALID) &&
+ !REG_GET_FIELD(err_status_lo_data, ERR_STATUS_LO, ERR_STATUS_VALID_FLAG))
+ return false;
+
+ *memory_id = REG_GET_FIELD(err_status_lo_data, ERR_STATUS_LO, MEMORY_ID);
+
+ return true;
+}
+
+bool amdgpu_ras_inst_get_err_cnt_field(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_entry,
+ uint32_t instance,
+ unsigned long *err_cnt)
+{
+ uint32_t err_status_hi_data, err_status_hi_offset;
+
+ if (!reg_entry)
+ return false;
+
+ err_status_hi_offset =
+ AMDGPU_RAS_REG_ENTRY_OFFSET(reg_entry->hwip, instance,
+ reg_entry->seg_hi, reg_entry->reg_hi);
+ err_status_hi_data = RREG32(err_status_hi_offset);
+
+ if ((reg_entry->flags & AMDGPU_RAS_ERR_INFO_VALID) &&
+ !REG_GET_FIELD(err_status_hi_data, ERR_STATUS_HI, ERR_INFO_VALID_FLAG))
+ /* keep the check here in case we need to refer to the result later */
+ dev_dbg(adev->dev, "Invalid err_info field\n");
+
+ /* read err count */
+ *err_cnt = REG_GET_FIELD(err_status_hi_data, ERR_STATUS, ERR_CNT);
+
+ return true;
+}
+
+void amdgpu_ras_inst_query_ras_error_count(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_list,
+ uint32_t reg_list_size,
+ const struct amdgpu_ras_memory_id_entry *mem_list,
+ uint32_t mem_list_size,
+ uint32_t instance,
+ uint32_t err_type,
+ unsigned long *err_count)
+{
+ uint32_t memory_id;
+ unsigned long err_cnt;
+ char err_type_name[16];
+ uint32_t i, j;
+
+ for (i = 0; i < reg_list_size; i++) {
+ /* query memory_id from err_status_lo */
+ if (!amdgpu_ras_inst_get_memory_id_field(adev, &reg_list[i],
+ instance, &memory_id))
+ continue;
+
+ /* query err_cnt from err_status_hi */
+ if (!amdgpu_ras_inst_get_err_cnt_field(adev, &reg_list[i],
+ instance, &err_cnt) ||
+ !err_cnt)
+ continue;
+
+ *err_count += err_cnt;
+
+ /* log the errors */
+ amdgpu_ras_get_error_type_name(err_type, err_type_name);
+ if (!mem_list) {
+ /* memory_list is not supported */
+ dev_info(adev->dev,
+ "%ld %s hardware errors detected in %s, instance: %d, memory_id: %d\n",
+ err_cnt, err_type_name,
+ reg_list[i].block_name,
+ instance, memory_id);
+ } else {
+ for (j = 0; j < mem_list_size; j++) {
+ if (memory_id == mem_list[j].memory_id) {
+ dev_info(adev->dev,
+ "%ld %s hardware errors detected in %s, instance: %d, memory block: %s\n",
+ err_cnt, err_type_name,
+ reg_list[i].block_name,
+ instance, mem_list[j].name);
+ break;
+ }
+ }
+ }
+ }
+}
+
+void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_list,
+ uint32_t reg_list_size,
+ uint32_t instance)
+{
+ uint32_t err_status_lo_offset, err_status_hi_offset;
+ uint32_t i;
+
+ for (i = 0; i < reg_list_size; i++) {
+ err_status_lo_offset =
+ AMDGPU_RAS_REG_ENTRY_OFFSET(reg_list[i].hwip, instance,
+ reg_list[i].seg_lo, reg_list[i].reg_lo);
+ err_status_hi_offset =
+ AMDGPU_RAS_REG_ENTRY_OFFSET(reg_list[i].hwip, instance,
+ reg_list[i].seg_hi, reg_list[i].reg_hi);
+ WREG32(err_status_lo_offset, 0);
+ WREG32(err_status_hi_offset, 0);
+ }
+}
+
+int amdgpu_ras_error_data_init(struct ras_err_data *err_data)
+{
+ memset(err_data, 0, sizeof(*err_data));
+
+ INIT_LIST_HEAD(&err_data->err_node_list);
+
+ return 0;
+}
+
+static void amdgpu_ras_error_node_release(struct ras_err_node *err_node)
+{
+ if (!err_node)
+ return;
+
+ list_del(&err_node->node);
+ kvfree(err_node);
+}
+
+void amdgpu_ras_error_data_fini(struct ras_err_data *err_data)
+{
+ struct ras_err_node *err_node, *tmp;
+
+ list_for_each_entry_safe(err_node, tmp, &err_data->err_node_list, node)
+ amdgpu_ras_error_node_release(err_node);
+}
+
+static struct ras_err_node *amdgpu_ras_error_find_node_by_id(struct ras_err_data *err_data,
+ struct amdgpu_smuio_mcm_config_info *mcm_info)
+{
+ struct ras_err_node *err_node;
+ struct amdgpu_smuio_mcm_config_info *ref_id;
+
+ if (!err_data || !mcm_info)
+ return NULL;
+
+ for_each_ras_error(err_node, err_data) {
+ ref_id = &err_node->err_info.mcm_info;
+
+ if (mcm_info->socket_id == ref_id->socket_id &&
+ mcm_info->die_id == ref_id->die_id)
+ return err_node;
+ }
+
+ return NULL;
+}
+
+static struct ras_err_node *amdgpu_ras_error_node_new(void)
+{
+ struct ras_err_node *err_node;
+
+ err_node = kvzalloc(sizeof(*err_node), GFP_KERNEL);
+ if (!err_node)
+ return NULL;
+
+ INIT_LIST_HEAD(&err_node->node);
+
+ return err_node;
+}
+
+static int ras_err_info_cmp(void *priv, const struct list_head *a, const struct list_head *b)
+{
+ struct ras_err_node *nodea = container_of(a, struct ras_err_node, node);
+ struct ras_err_node *nodeb = container_of(b, struct ras_err_node, node);
+ struct amdgpu_smuio_mcm_config_info *infoa = &nodea->err_info.mcm_info;
+ struct amdgpu_smuio_mcm_config_info *infob = &nodeb->err_info.mcm_info;
+
+ if (unlikely(infoa->socket_id != infob->socket_id))
+ return infoa->socket_id - infob->socket_id;
+ else
+ return infoa->die_id - infob->die_id;
+
+ return 0;
+}
+
+static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_data,
+ struct amdgpu_smuio_mcm_config_info *mcm_info)
+{
+ struct ras_err_node *err_node;
+
+ err_node = amdgpu_ras_error_find_node_by_id(err_data, mcm_info);
+ if (err_node)
+ return &err_node->err_info;
+
+ err_node = amdgpu_ras_error_node_new();
+ if (!err_node)
+ return NULL;
+
+ memcpy(&err_node->err_info.mcm_info, mcm_info, sizeof(*mcm_info));
+
+ err_data->err_list_count++;
+ list_add_tail(&err_node->node, &err_data->err_node_list);
+ list_sort(NULL, &err_data->err_node_list, ras_err_info_cmp);
+
+ return &err_node->err_info;
+}
+
+int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
+ struct amdgpu_smuio_mcm_config_info *mcm_info,
+ u64 count)
+{
+ struct ras_err_info *err_info;
+
+ if (!err_data || !mcm_info)
+ return -EINVAL;
+
+ if (!count)
+ return 0;
+
+ err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
+ if (!err_info)
+ return -EINVAL;
+
+ err_info->ue_count += count;
+ err_data->ue_count += count;
+
+ return 0;
+}
+
+int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data,
+ struct amdgpu_smuio_mcm_config_info *mcm_info,
+ u64 count)
+{
+ struct ras_err_info *err_info;
+
+ if (!err_data || !mcm_info)
+ return -EINVAL;
+
+ if (!count)
+ return 0;
+
+ err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
+ if (!err_info)
+ return -EINVAL;
+
+ err_info->ce_count += count;
+ err_data->ce_count += count;
+
+ return 0;
+}
+
+int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data,
+ struct amdgpu_smuio_mcm_config_info *mcm_info,
+ u64 count)
+{
+ struct ras_err_info *err_info;
+
+ if (!err_data || !mcm_info)
+ return -EINVAL;
+
+ if (!count)
+ return 0;
+
+ err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
+ if (!err_info)
+ return -EINVAL;
+
+ err_info->de_count += count;
+ err_data->de_count += count;
+
+ return 0;
+}
+
+#define mmMP0_SMN_C2PMSG_92 0x1609C
+#define mmMP0_SMN_C2PMSG_126 0x160BE
+static void amdgpu_ras_boot_time_error_reporting(struct amdgpu_device *adev,
+ u32 instance)
+{
+ u32 socket_id, aid_id, hbm_id;
+ u32 fw_status;
+ u32 boot_error;
+ u64 reg_addr;
+
+ /* The pattern for smn addressing in other SOC could be different from
+ * the one for aqua_vanjaram. We should revisit the code if the pattern
+ * is changed. In such case, replace the aqua_vanjaram implementation
+ * with more common helper */
+ reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) +
+ aqua_vanjaram_encode_ext_smn_addressing(instance);
+ fw_status = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
+
+ reg_addr = (mmMP0_SMN_C2PMSG_126 << 2) +
+ aqua_vanjaram_encode_ext_smn_addressing(instance);
+ boot_error = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
+
+ socket_id = AMDGPU_RAS_GPU_ERR_SOCKET_ID(boot_error);
+ aid_id = AMDGPU_RAS_GPU_ERR_AID_ID(boot_error);
+ hbm_id = ((1 == AMDGPU_RAS_GPU_ERR_HBM_ID(boot_error)) ? 0 : 1);
+
+ if (AMDGPU_RAS_GPU_ERR_MEM_TRAINING(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, hbm: %d, fw_status: 0x%x, memory training failed\n",
+ socket_id, aid_id, hbm_id, fw_status);
+
+ if (AMDGPU_RAS_GPU_ERR_FW_LOAD(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, fw_status: 0x%x, firmware load failed at boot time\n",
+ socket_id, aid_id, fw_status);
+
+ if (AMDGPU_RAS_GPU_ERR_WAFL_LINK_TRAINING(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, fw_status: 0x%x, wafl link training failed\n",
+ socket_id, aid_id, fw_status);
+
+ if (AMDGPU_RAS_GPU_ERR_XGMI_LINK_TRAINING(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, fw_status: 0x%x, xgmi link training failed\n",
+ socket_id, aid_id, fw_status);
+
+ if (AMDGPU_RAS_GPU_ERR_USR_CP_LINK_TRAINING(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, fw_status: 0x%x, usr cp link training failed\n",
+ socket_id, aid_id, fw_status);
+
+ if (AMDGPU_RAS_GPU_ERR_USR_DP_LINK_TRAINING(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, fw_status: 0x%x, usr dp link training failed\n",
+ socket_id, aid_id, fw_status);
+
+ if (AMDGPU_RAS_GPU_ERR_HBM_MEM_TEST(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, hbm: %d, fw_status: 0x%x, hbm memory test failed\n",
+ socket_id, aid_id, hbm_id, fw_status);
+
+ if (AMDGPU_RAS_GPU_ERR_HBM_BIST_TEST(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, hbm: %d, fw_status: 0x%x, hbm bist test failed\n",
+ socket_id, aid_id, hbm_id, fw_status);
+
+ if (AMDGPU_RAS_GPU_ERR_DATA_ABORT(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, fw_status: 0x%x, data abort exception\n",
+ socket_id, aid_id, fw_status);
+
+ if (AMDGPU_RAS_GPU_ERR_GENERIC(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, fw_status: 0x%x, Boot Controller Generic Error\n",
+ socket_id, aid_id, fw_status);
+}
+
+static bool amdgpu_ras_boot_error_detected(struct amdgpu_device *adev,
+ u32 instance)
+{
+ u64 reg_addr;
+ u32 reg_data;
+ int retry_loop;
+
+ reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) +
+ aqua_vanjaram_encode_ext_smn_addressing(instance);
+
+ for (retry_loop = 0; retry_loop < AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT; retry_loop++) {
+ reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
+ if ((reg_data & AMDGPU_RAS_BOOT_STATUS_MASK) == AMDGPU_RAS_BOOT_STEADY_STATUS)
+ return false;
+ else
+ msleep(1);
+ }
+
+ return true;
+}
+
+void amdgpu_ras_query_boot_status(struct amdgpu_device *adev, u32 num_instances)
+{
+ u32 i;
+
+ for (i = 0; i < num_instances; i++) {
+ if (amdgpu_ras_boot_error_detected(adev, i))
+ amdgpu_ras_boot_time_error_reporting(adev, i);
+ }
+}
+
+int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
+ uint64_t start = pfn << AMDGPU_GPU_PAGE_SHIFT;
+ int ret = 0;
+
+ if (amdgpu_ras_check_critical_address(adev, start))
+ return 0;
+
+ mutex_lock(&con->page_rsv_lock);
+ ret = amdgpu_vram_mgr_query_page_status(mgr, start);
+ if (ret == -ENOENT)
+ ret = amdgpu_vram_mgr_reserve_range(mgr, start, AMDGPU_GPU_PAGE_SIZE);
+ mutex_unlock(&con->page_rsv_lock);
+
+ return ret;
+}
+
+void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id,
+ const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ if (RAS_EVENT_ID_IS_VALID(event_id))
+ dev_printk(KERN_INFO, adev->dev, "{%llu}%pV", event_id, &vaf);
+ else
+ dev_printk(KERN_INFO, adev->dev, "%pV", &vaf);
+
+ va_end(args);
+}
+
+bool amdgpu_ras_is_rma(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ if (amdgpu_uniras_enabled(adev))
+ return amdgpu_ras_mgr_is_rma(adev);
+
+ if (!con)
+ return false;
+
+ return con->is_rma;
+}
+
+int amdgpu_ras_add_critical_region(struct amdgpu_device *adev,
+ struct amdgpu_bo *bo)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct amdgpu_vram_mgr_resource *vres;
+ struct ras_critical_region *region;
+ struct drm_buddy_block *block;
+ int ret = 0;
+
+ if (!bo || !bo->tbo.resource)
+ return -EINVAL;
+
+ vres = to_amdgpu_vram_mgr_resource(bo->tbo.resource);
+
+ mutex_lock(&con->critical_region_lock);
+
+ /* Check if the bo had been recorded */
+ list_for_each_entry(region, &con->critical_region_head, node)
+ if (region->bo == bo)
+ goto out;
+
+ /* Record new critical amdgpu bo */
+ list_for_each_entry(block, &vres->blocks, link) {
+ region = kzalloc(sizeof(*region), GFP_KERNEL);
+ if (!region) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ region->bo = bo;
+ region->start = amdgpu_vram_mgr_block_start(block);
+ region->size = amdgpu_vram_mgr_block_size(block);
+ list_add_tail(&region->node, &con->critical_region_head);
+ }
+
+out:
+ mutex_unlock(&con->critical_region_lock);
+
+ return ret;
+}
+
+static void amdgpu_ras_critical_region_init(struct amdgpu_device *adev)
+{
+ amdgpu_ras_add_critical_region(adev, adev->mman.fw_reserved_memory);
+}
+
+static void amdgpu_ras_critical_region_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_critical_region *region, *tmp;
+
+ mutex_lock(&con->critical_region_lock);
+ list_for_each_entry_safe(region, tmp, &con->critical_region_head, node) {
+ list_del(&region->node);
+ kfree(region);
+ }
+ mutex_unlock(&con->critical_region_lock);
+}
+
+bool amdgpu_ras_check_critical_address(struct amdgpu_device *adev, uint64_t addr)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_critical_region *region;
+ bool ret = false;
+
+ mutex_lock(&con->critical_region_lock);
+ list_for_each_entry(region, &con->critical_region_head, node) {
+ if ((region->start <= addr) &&
+ (addr < (region->start + region->size))) {
+ ret = true;
+ break;
+ }
+ }
+ mutex_unlock(&con->critical_region_lock);
+
+ return ret;
+}
+
+void amdgpu_ras_pre_reset(struct amdgpu_device *adev,
+ struct list_head *device_list)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
+ if (amdgpu_uniras_enabled(tmp_adev))
+ amdgpu_ras_mgr_pre_reset(tmp_adev);
+ }
+}
+
+void amdgpu_ras_post_reset(struct amdgpu_device *adev,
+ struct list_head *device_list)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
+ if (amdgpu_uniras_enabled(tmp_adev))
+ amdgpu_ras_mgr_post_reset(tmp_adev);
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index e36f4de9fa55..ff44190d7d98 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -26,12 +26,57 @@
#include <linux/debugfs.h>
#include <linux/list.h>
-#include "amdgpu.h"
-#include "amdgpu_psp.h"
+#include <linux/kfifo.h>
+#include <linux/radix-tree.h>
#include "ta_ras_if.h"
#include "amdgpu_ras_eeprom.h"
+#include "amdgpu_smuio.h"
+#include "amdgpu_aca.h"
+
+struct amdgpu_iv_entry;
+
+#define AMDGPU_RAS_GPU_ERR_MEM_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 0, 0)
+#define AMDGPU_RAS_GPU_ERR_FW_LOAD(x) AMDGPU_GET_REG_FIELD(x, 1, 1)
+#define AMDGPU_RAS_GPU_ERR_WAFL_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 2, 2)
+#define AMDGPU_RAS_GPU_ERR_XGMI_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 3, 3)
+#define AMDGPU_RAS_GPU_ERR_USR_CP_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 4, 4)
+#define AMDGPU_RAS_GPU_ERR_USR_DP_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 5, 5)
+#define AMDGPU_RAS_GPU_ERR_HBM_MEM_TEST(x) AMDGPU_GET_REG_FIELD(x, 6, 6)
+#define AMDGPU_RAS_GPU_ERR_HBM_BIST_TEST(x) AMDGPU_GET_REG_FIELD(x, 7, 7)
+#define AMDGPU_RAS_GPU_ERR_SOCKET_ID(x) AMDGPU_GET_REG_FIELD(x, 10, 8)
+#define AMDGPU_RAS_GPU_ERR_AID_ID(x) AMDGPU_GET_REG_FIELD(x, 12, 11)
+#define AMDGPU_RAS_GPU_ERR_HBM_ID(x) AMDGPU_GET_REG_FIELD(x, 14, 13)
+#define AMDGPU_RAS_GPU_ERR_DATA_ABORT(x) AMDGPU_GET_REG_FIELD(x, 29, 29)
+#define AMDGPU_RAS_GPU_ERR_GENERIC(x) AMDGPU_GET_REG_FIELD(x, 30, 30)
+
+#define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT 100
+#define AMDGPU_RAS_BOOT_STEADY_STATUS 0xBA
+#define AMDGPU_RAS_BOOT_STATUS_MASK 0xFF
#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0)
+/* position of instance value in sub_block_index of
+ * ta_ras_trigger_error_input, the sub block uses lower 12 bits
+ */
+#define AMDGPU_RAS_INST_MASK 0xfffff000
+#define AMDGPU_RAS_INST_SHIFT 0xc
+
+#define AMDGPU_RAS_FEATURES_SOCKETID_SHIFT 29
+#define AMDGPU_RAS_FEATURES_SOCKETID_MASK 0xe0000000
+
+/* Reserve 8 physical dram row for possible retirement.
+ * In worst cases, it will lose 8 * 2MB memory in vram domain */
+#define AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT (16ULL << 20)
+/* The high three bits indicates socketid */
+#define AMDGPU_RAS_GET_FEATURES(val) ((val) & ~AMDGPU_RAS_FEATURES_SOCKETID_MASK)
+
+#define RAS_EVENT_INVALID_ID (BIT_ULL(63))
+#define RAS_EVENT_ID_IS_VALID(x) (!((x) & BIT_ULL(63)))
+
+#define RAS_EVENT_LOG(adev, id, fmt, ...) \
+ amdgpu_ras_event_log_print((adev), (id), (fmt), ##__VA_ARGS__)
+
+#define amdgpu_ras_mark_ras_event(adev, type) \
+ (amdgpu_ras_mark_ras_event_caller((adev), (type), __builtin_return_address(0)))
enum amdgpu_ras_block {
AMDGPU_RAS_BLOCK__UMC = 0,
@@ -49,8 +94,14 @@ enum amdgpu_ras_block {
AMDGPU_RAS_BLOCK__MP1,
AMDGPU_RAS_BLOCK__FUSE,
AMDGPU_RAS_BLOCK__MCA,
-
- AMDGPU_RAS_BLOCK__LAST
+ AMDGPU_RAS_BLOCK__VCN,
+ AMDGPU_RAS_BLOCK__JPEG,
+ AMDGPU_RAS_BLOCK__IH,
+ AMDGPU_RAS_BLOCK__MPIO,
+ AMDGPU_RAS_BLOCK__MMSCH,
+
+ AMDGPU_RAS_BLOCK__LAST,
+ AMDGPU_RAS_BLOCK__ANY = -1
};
enum amdgpu_ras_mca_block {
@@ -312,6 +363,53 @@ enum amdgpu_ras_ret {
AMDGPU_RAS_PT,
};
+enum amdgpu_ras_error_query_mode {
+ AMDGPU_RAS_INVALID_ERROR_QUERY = 0,
+ AMDGPU_RAS_DIRECT_ERROR_QUERY = 1,
+ AMDGPU_RAS_FIRMWARE_ERROR_QUERY = 2,
+ AMDGPU_RAS_VIRT_ERROR_COUNT_QUERY = 3,
+};
+
+/* ras error status reisger fields */
+#define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG__SHIFT 0x0
+#define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG_MASK 0x00000001L
+#define ERR_STATUS_LO__MEMORY_ID__SHIFT 0x18
+#define ERR_STATUS_LO__MEMORY_ID_MASK 0xFF000000L
+#define ERR_STATUS_HI__ERR_INFO_VALID_FLAG__SHIFT 0x2
+#define ERR_STATUS_HI__ERR_INFO_VALID_FLAG_MASK 0x00000004L
+#define ERR_STATUS__ERR_CNT__SHIFT 0x17
+#define ERR_STATUS__ERR_CNT_MASK 0x03800000L
+
+#define AMDGPU_RAS_REG_ENTRY(ip, inst, reg_lo, reg_hi) \
+ ip##_HWIP, inst, reg_lo##_BASE_IDX, reg_lo, reg_hi##_BASE_IDX, reg_hi
+
+#define AMDGPU_RAS_REG_ENTRY_OFFSET(hwip, ip_inst, segment, reg) \
+ (adev->reg_offset[hwip][ip_inst][segment] + (reg))
+
+#define AMDGPU_RAS_ERR_INFO_VALID (1 << 0)
+#define AMDGPU_RAS_ERR_STATUS_VALID (1 << 1)
+#define AMDGPU_RAS_ERR_ADDRESS_VALID (1 << 2)
+
+#define AMDGPU_RAS_GPU_RESET_MODE2_RESET (0x1 << 0)
+#define AMDGPU_RAS_GPU_RESET_MODE1_RESET (0x1 << 1)
+
+struct amdgpu_ras_err_status_reg_entry {
+ uint32_t hwip;
+ uint32_t ip_inst;
+ uint32_t seg_lo;
+ uint32_t reg_lo;
+ uint32_t seg_hi;
+ uint32_t reg_hi;
+ uint32_t reg_inst;
+ uint32_t flags;
+ const char *block_name;
+};
+
+struct amdgpu_ras_memory_id_entry {
+ uint32_t memory_id;
+ const char *name;
+};
+
struct ras_common_if {
enum amdgpu_ras_block block;
enum amdgpu_ras_error_type type;
@@ -319,13 +417,130 @@ struct ras_common_if {
char name[32];
};
+#define MAX_UMC_CHANNEL_NUM 32
+
+struct ecc_info_per_ch {
+ uint16_t ce_count_lo_chip;
+ uint16_t ce_count_hi_chip;
+ uint64_t mca_umc_status;
+ uint64_t mca_umc_addr;
+ uint64_t mca_ceumc_addr;
+};
+
+struct umc_ecc_info {
+ struct ecc_info_per_ch ecc[MAX_UMC_CHANNEL_NUM];
+
+ /* Determine smu ecctable whether support
+ * record correctable error address
+ */
+ int record_ce_addr_supported;
+};
+
+enum ras_event_type {
+ RAS_EVENT_TYPE_INVALID = 0,
+ RAS_EVENT_TYPE_FATAL,
+ RAS_EVENT_TYPE_POISON_CREATION,
+ RAS_EVENT_TYPE_POISON_CONSUMPTION,
+ RAS_EVENT_TYPE_COUNT,
+};
+
+struct ras_event_state {
+ u64 last_seqno;
+ atomic64_t count;
+};
+
+struct ras_event_manager {
+ atomic64_t seqno;
+ struct ras_event_state event_state[RAS_EVENT_TYPE_COUNT];
+};
+
+struct ras_event_id {
+ enum ras_event_type type;
+ u64 event_id;
+};
+
+struct ras_query_context {
+ struct ras_event_id evid;
+};
+
+typedef int (*pasid_notify)(struct amdgpu_device *adev,
+ uint16_t pasid, void *data);
+
+struct ras_poison_msg {
+ enum amdgpu_ras_block block;
+ uint16_t pasid;
+ uint32_t reset;
+ pasid_notify pasid_fn;
+ void *data;
+};
+
+struct ras_err_pages {
+ uint32_t count;
+ uint64_t *pfn;
+};
+
+struct ras_ecc_err {
+ uint64_t status;
+ uint64_t ipid;
+ uint64_t addr;
+ uint64_t pa_pfn;
+ /* save global channel index across all UMC instances */
+ uint32_t channel_idx;
+ struct ras_err_pages err_pages;
+};
+
+struct ras_ecc_log_info {
+ struct mutex lock;
+ struct radix_tree_root de_page_tree;
+ uint64_t de_queried_count;
+ uint64_t consumption_q_count;
+};
+
+struct ras_critical_region {
+ struct list_head node;
+ struct amdgpu_bo *bo;
+ uint64_t start;
+ uint64_t size;
+};
+
+struct ras_eeprom_table_version {
+ uint32_t minor : 16;
+ uint32_t major : 16;
+};
+
+struct ras_eeprom_smu_funcs {
+ int (*get_ras_table_version)(struct amdgpu_device *adev,
+ uint32_t *table_version);
+ int (*get_badpage_count)(struct amdgpu_device *adev, uint32_t *count, uint32_t timeout);
+ int (*get_badpage_mca_addr)(struct amdgpu_device *adev, uint16_t index, uint64_t *mca_addr);
+ int (*set_timestamp)(struct amdgpu_device *adev, uint64_t timestamp);
+ int (*get_timestamp)(struct amdgpu_device *adev,
+ uint16_t index, uint64_t *timestamp);
+ int (*get_badpage_ipid)(struct amdgpu_device *adev, uint16_t index, uint64_t *ipid);
+ int (*erase_ras_table)(struct amdgpu_device *adev, uint32_t *result);
+};
+
+enum ras_smu_feature_flags {
+ RAS_SMU_FEATURE_BIT__RAS_EEPROM = BIT_ULL(0),
+};
+
+struct ras_smu_drv {
+ const struct ras_eeprom_smu_funcs *smu_eeprom_funcs;
+ void (*ras_smu_feature_flags)(struct amdgpu_device *adev, uint64_t *flags);
+};
+
struct amdgpu_ras {
+ void *ras_mgr;
/* ras infrastructure */
/* for ras itself. */
uint32_t features;
+ uint32_t schema;
struct list_head head;
/* sysfs */
struct device_attribute features_attr;
+ struct device_attribute version_attr;
+ struct device_attribute schema_attr;
+ struct device_attribute event_state_attr;
struct bin_attribute badpages_attr;
struct dentry *de_ras_eeprom_table;
/* block array */
@@ -334,6 +549,7 @@ struct amdgpu_ras {
/* gpu recovery */
struct work_struct recovery_work;
atomic_t in_recovery;
+ atomic_t rma_in_recovery;
struct amdgpu_device *adev;
/* error handler data */
struct ras_err_handler_data *eh_data;
@@ -358,25 +574,92 @@ struct amdgpu_ras {
struct delayed_work ras_counte_delay_work;
atomic_t ras_ue_count;
atomic_t ras_ce_count;
+
+ /* record umc error info queried from smu */
+ struct umc_ecc_info umc_ecc;
+
+ /* Indicates smu whether need update bad channel info */
+ bool update_channel_flag;
+ /* Record status of smu mca debug mode */
+ bool is_aca_debug_mode;
+ bool is_rma;
+
+ /* Record special requirements of gpu reset caller */
+ uint32_t gpu_reset_flags;
+
+ struct task_struct *page_retirement_thread;
+ wait_queue_head_t page_retirement_wq;
+ struct mutex page_retirement_lock;
+ atomic_t page_retirement_req_cnt;
+ atomic_t poison_creation_count;
+ atomic_t poison_consumption_count;
+ struct mutex page_rsv_lock;
+ DECLARE_KFIFO(poison_fifo, struct ras_poison_msg, 128);
+ struct ras_ecc_log_info umc_ecc_log;
+ struct delayed_work page_retirement_dwork;
+
+ /* ras errors detected */
+ unsigned long ras_err_state;
+
+ /* RAS event manager */
+ struct ras_event_manager __event_mgr;
+ struct ras_event_manager *event_mgr;
+
+ uint64_t reserved_pages_in_bytes;
+
+ pid_t init_task_pid;
+ char init_task_comm[TASK_COMM_LEN];
+
+ int bad_page_num;
+
+ struct list_head critical_region_head;
+ struct mutex critical_region_lock;
+
+ /* Protect poison injection */
+ struct mutex poison_lock;
+
+ /* Disable/Enable uniras switch */
+ bool uniras_enabled;
+ const struct ras_smu_drv *ras_smu_drv;
};
struct ras_fs_data {
- char sysfs_name[32];
+ char sysfs_name[48];
char debugfs_name[32];
};
+struct ras_err_info {
+ struct amdgpu_smuio_mcm_config_info mcm_info;
+ u64 ce_count;
+ u64 ue_count;
+ u64 de_count;
+};
+
+struct ras_err_node {
+ struct list_head node;
+ struct ras_err_info err_info;
+};
+
struct ras_err_data {
unsigned long ue_count;
unsigned long ce_count;
+ unsigned long de_count;
unsigned long err_addr_cnt;
struct eeprom_table_record *err_addr;
+ unsigned long err_addr_len;
+ u32 err_list_count;
+ struct list_head err_node_list;
};
+#define for_each_ras_error(err_node, err_data) \
+ list_for_each_entry(err_node, &(err_data)->err_node_list, node)
+
struct ras_err_handler_data {
/* point to bad page records array */
struct eeprom_table_record *bps;
/* the count of entries */
int count;
+ int count_saved;
/* the space can place new entries */
int space_left;
};
@@ -419,6 +702,8 @@ struct ras_manager {
struct ras_ih_data ih_data;
struct ras_err_data err_data;
+
+ struct aca_handle aca_handle;
};
struct ras_badpage {
@@ -438,12 +723,14 @@ struct ras_query_if {
struct ras_common_if head;
unsigned long ue_count;
unsigned long ce_count;
+ unsigned long de_count;
};
struct ras_inject_if {
struct ras_common_if head;
uint64_t address;
uint64_t value;
+ uint32_t instance_mask;
};
struct ras_cure_if {
@@ -468,6 +755,30 @@ struct ras_debug_if {
};
int op;
};
+
+struct amdgpu_ras_block_object {
+ struct ras_common_if ras_comm;
+
+ int (*ras_block_match)(struct amdgpu_ras_block_object *block_obj,
+ enum amdgpu_ras_block block, uint32_t sub_block_index);
+ int (*ras_late_init)(struct amdgpu_device *adev, struct ras_common_if *ras_block);
+ void (*ras_fini)(struct amdgpu_device *adev, struct ras_common_if *ras_block);
+ ras_ih_cb ras_cb;
+ const struct amdgpu_ras_block_hw_ops *hw_ops;
+};
+
+struct amdgpu_ras_block_hw_ops {
+ int (*ras_error_inject)(struct amdgpu_device *adev,
+ void *inject_if, uint32_t instance_mask);
+ void (*query_ras_error_count)(struct amdgpu_device *adev, void *ras_error_status);
+ void (*query_ras_error_status)(struct amdgpu_device *adev);
+ void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status);
+ void (*reset_ras_error_count)(struct amdgpu_device *adev);
+ void (*reset_ras_error_status)(struct amdgpu_device *adev);
+ bool (*query_poison_status)(struct amdgpu_device *adev);
+ bool (*handle_poison_consumption)(struct amdgpu_device *adev);
+};
+
/* work flow
* vbios
* 1: ras feature enable (enabled by default)
@@ -482,43 +793,23 @@ struct ras_debug_if {
* 8: feature disable
*/
-#define amdgpu_ras_get_context(adev) ((adev)->psp.ras_context.ras)
-#define amdgpu_ras_set_context(adev, ras_con) ((adev)->psp.ras_context.ras = (ras_con))
-
-/* check if ras is supported on block, say, sdma, gfx */
-static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
- unsigned int block)
-{
- struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
-
- if (block >= AMDGPU_RAS_BLOCK_COUNT)
- return 0;
- return ras && (adev->ras_enabled & (1 << block));
-}
-
-int amdgpu_ras_recovery_init(struct amdgpu_device *adev);
+int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev);
+int amdgpu_ras_recovery_init(struct amdgpu_device *adev, bool init_bp_info);
void amdgpu_ras_resume(struct amdgpu_device *adev);
void amdgpu_ras_suspend(struct amdgpu_device *adev);
int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
unsigned long *ce_count,
- unsigned long *ue_count);
+ unsigned long *ue_count,
+ struct ras_query_if *query_info);
/* error handling functions */
int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
- struct eeprom_table_record *bps, int pages);
-
-int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev);
-
-static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
-{
- struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ struct eeprom_table_record *bps, int pages, bool from_rom);
- if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
- schedule_work(&ras->recovery_work);
- return 0;
-}
+int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
+ unsigned long *new_cnt);
static inline enum ta_ras_block
amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) {
@@ -553,6 +844,16 @@ amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) {
return TA_RAS_BLOCK__FUSE;
case AMDGPU_RAS_BLOCK__MCA:
return TA_RAS_BLOCK__MCA;
+ case AMDGPU_RAS_BLOCK__VCN:
+ return TA_RAS_BLOCK__VCN;
+ case AMDGPU_RAS_BLOCK__JPEG:
+ return TA_RAS_BLOCK__JPEG;
+ case AMDGPU_RAS_BLOCK__IH:
+ return TA_RAS_BLOCK__IH;
+ case AMDGPU_RAS_BLOCK__MPIO:
+ return TA_RAS_BLOCK__MPIO;
+ case AMDGPU_RAS_BLOCK__MMSCH:
+ return TA_RAS_BLOCK__MMSCH;
default:
WARN_ONCE(1, "RAS ERROR: unexpected block id %d\n", block);
return TA_RAS_BLOCK__UMC;
@@ -580,15 +881,15 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) {
/* called in ip_init and ip_fini */
int amdgpu_ras_init(struct amdgpu_device *adev);
+int amdgpu_ras_late_init(struct amdgpu_device *adev);
int amdgpu_ras_fini(struct amdgpu_device *adev);
int amdgpu_ras_pre_fini(struct amdgpu_device *adev);
-int amdgpu_ras_late_init(struct amdgpu_device *adev,
- struct ras_common_if *ras_block,
- struct ras_fs_if *fs_info,
- struct ras_ih_if *ih_info);
-void amdgpu_ras_late_fini(struct amdgpu_device *adev,
- struct ras_common_if *ras_block,
- struct ras_ih_if *ih_info);
+
+int amdgpu_ras_block_late_init(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block);
+
+void amdgpu_ras_block_late_fini(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block);
int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
struct ras_common_if *head, bool enable);
@@ -597,7 +898,7 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
struct ras_common_if *head, bool enable);
int amdgpu_ras_sysfs_create(struct amdgpu_device *adev,
- struct ras_fs_if *head);
+ struct ras_common_if *head);
int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev,
struct ras_common_if *head);
@@ -607,6 +908,8 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev);
int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
struct ras_query_if *info);
+int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block);
int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
enum amdgpu_ras_block block);
@@ -614,10 +917,10 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
struct ras_inject_if *info);
int amdgpu_ras_interrupt_add_handler(struct amdgpu_device *adev,
- struct ras_ih_if *info);
+ struct ras_common_if *head);
int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev,
- struct ras_ih_if *info);
+ struct ras_common_if *head);
int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
struct ras_dispatch_if *info);
@@ -637,7 +940,7 @@ static inline void amdgpu_ras_intr_cleared(void)
atomic_set(&amdgpu_ras_in_intr, 0);
}
-void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev);
+int amdgpu_ras_global_ras_isr(struct amdgpu_device *adev);
void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready);
@@ -651,4 +954,94 @@ const char *get_ras_block_str(struct ras_common_if *ras_block);
bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev);
+int amdgpu_ras_is_supported(struct amdgpu_device *adev, unsigned int block);
+
+int amdgpu_ras_reset_gpu(struct amdgpu_device *adev);
+
+struct amdgpu_ras* amdgpu_ras_get_context(struct amdgpu_device *adev);
+
+int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con);
+
+int amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable);
+int amdgpu_ras_set_aca_debug_mode(struct amdgpu_device *adev, bool enable);
+bool amdgpu_ras_get_aca_debug_mode(struct amdgpu_device *adev);
+bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev,
+ unsigned int *mode);
+
+int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
+ struct amdgpu_ras_block_object *ras_block_obj);
+void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev);
+void amdgpu_ras_get_error_type_name(uint32_t err_type, char *err_type_name);
+bool amdgpu_ras_inst_get_memory_id_field(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_entry,
+ uint32_t instance,
+ uint32_t *memory_id);
+bool amdgpu_ras_inst_get_err_cnt_field(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_entry,
+ uint32_t instance,
+ unsigned long *err_cnt);
+void amdgpu_ras_inst_query_ras_error_count(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_list,
+ uint32_t reg_list_size,
+ const struct amdgpu_ras_memory_id_entry *mem_list,
+ uint32_t mem_list_size,
+ uint32_t instance,
+ uint32_t err_type,
+ unsigned long *err_count);
+void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_list,
+ uint32_t reg_list_size,
+ uint32_t instance);
+
+int amdgpu_ras_error_data_init(struct ras_err_data *err_data);
+void amdgpu_ras_error_data_fini(struct ras_err_data *err_data);
+int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data,
+ struct amdgpu_smuio_mcm_config_info *mcm_info,
+ u64 count);
+int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
+ struct amdgpu_smuio_mcm_config_info *mcm_info,
+ u64 count);
+int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data,
+ struct amdgpu_smuio_mcm_config_info *mcm_info,
+ u64 count);
+void amdgpu_ras_query_boot_status(struct amdgpu_device *adev, u32 num_instances);
+int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+ const struct aca_info *aca_info, void *data);
+int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk);
+
+ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr,
+ struct aca_handle *handle, char *buf, void *data);
+
+void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status);
+bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev);
+void amdgpu_ras_set_err_poison(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block);
+void amdgpu_ras_clear_err_state(struct amdgpu_device *adev);
+bool amdgpu_ras_is_err_state(struct amdgpu_device *adev, int block);
+
+u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type);
+int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_type type,
+ const void *caller);
+
+int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn);
+
+int amdgpu_ras_add_critical_region(struct amdgpu_device *adev, struct amdgpu_bo *bo);
+bool amdgpu_ras_check_critical_address(struct amdgpu_device *adev, uint64_t addr);
+
+int amdgpu_ras_put_poison_req(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint16_t pasid,
+ pasid_notify pasid_fn, void *data, uint32_t reset);
+
+bool amdgpu_ras_in_recovery(struct amdgpu_device *adev);
+
+__printf(3, 4)
+void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id,
+ const char *fmt, ...);
+
+bool amdgpu_ras_is_rma(struct amdgpu_device *adev);
+
+void amdgpu_ras_pre_reset(struct amdgpu_device *adev,
+ struct list_head *device_list);
+void amdgpu_ras_post_reset(struct amdgpu_device *adev,
+ struct list_head *device_list);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 05117eda105b..64dd7a81bff5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -31,14 +31,35 @@
#include <linux/debugfs.h>
#include <linux/uaccess.h>
-#define EEPROM_I2C_MADDR_VEGA20 0x0
-#define EEPROM_I2C_MADDR_ARCTURUS 0x40000
-#define EEPROM_I2C_MADDR_ARCTURUS_D342 0x0
-#define EEPROM_I2C_MADDR_SIENNA_CICHLID 0x0
-#define EEPROM_I2C_MADDR_ALDEBARAN 0x0
+#include "amdgpu_reset.h"
+#include "amdgpu_ras_mgr.h"
+
+/* These are memory addresses as would be seen by one or more EEPROM
+ * chips strung on the I2C bus, usually by manipulating pins 1-3 of a
+ * set of EEPROM devices. They form a continuous memory space.
+ *
+ * The I2C device address includes the device type identifier, 1010b,
+ * which is a reserved value and indicates that this is an I2C EEPROM
+ * device. It also includes the top 3 bits of the 19 bit EEPROM memory
+ * address, namely bits 18, 17, and 16. This makes up the 7 bit
+ * address sent on the I2C bus with bit 0 being the direction bit,
+ * which is not represented here, and sent by the hardware directly.
+ *
+ * For instance,
+ * 50h = 1010000b => device type identifier 1010b, bits 18:16 = 000b, address 0.
+ * 54h = 1010100b => --"--, bits 18:16 = 100b, address 40000h.
+ * 56h = 1010110b => --"--, bits 18:16 = 110b, address 60000h.
+ * Depending on the size of the I2C EEPROM device(s), bits 18:16 may
+ * address memory in a device or a device on the I2C bus, depending on
+ * the status of pins 1-3. See top of amdgpu_eeprom.c.
+ *
+ * The RAS table lives either at address 0 or address 40000h of EEPROM.
+ */
+#define EEPROM_I2C_MADDR_0 0x0
+#define EEPROM_I2C_MADDR_4 0x40000
/*
- * The 2 macros bellow represent the actual size in bytes that
+ * The 2 macros below represent the actual size in bytes that
* those entities occupy in the EEPROM memory.
* RAS_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which
* uses uint64 to store 6b fields such as retired_page.
@@ -48,11 +69,24 @@
/* Table hdr is 'AMDR' */
#define RAS_TABLE_HDR_VAL 0x414d4452
-#define RAS_TABLE_VER 0x00010000
/* Bad GPU tag ‘BADG’ */
#define RAS_TABLE_HDR_BAD 0x42414447
+/*
+ * EEPROM Table structure v1
+ * ---------------------------------
+ * | |
+ * | EEPROM TABLE HEADER |
+ * | ( size 20 Bytes ) |
+ * | |
+ * ---------------------------------
+ * | |
+ * | BAD PAGE RECORD AREA |
+ * | |
+ * ---------------------------------
+ */
+
/* Assume 2-Mbit size EEPROM and take up the whole space. */
#define RAS_TBL_SIZE_BYTES (256 * 1024)
#define RAS_TABLE_START 0
@@ -61,6 +95,37 @@
#define RAS_MAX_RECORD_COUNT ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE) \
/ RAS_TABLE_RECORD_SIZE)
+/*
+ * EEPROM Table structrue v2.1
+ * ---------------------------------
+ * | |
+ * | EEPROM TABLE HEADER |
+ * | ( size 20 Bytes ) |
+ * | |
+ * ---------------------------------
+ * | |
+ * | EEPROM TABLE RAS INFO |
+ * | (available info size 4 Bytes) |
+ * | ( reserved size 252 Bytes ) |
+ * | |
+ * ---------------------------------
+ * | |
+ * | BAD PAGE RECORD AREA |
+ * | |
+ * ---------------------------------
+ */
+
+/* EEPROM Table V2_1 */
+#define RAS_TABLE_V2_1_INFO_SIZE 256
+#define RAS_TABLE_V2_1_INFO_START RAS_TABLE_HEADER_SIZE
+#define RAS_RECORD_START_V2_1 (RAS_HDR_START + RAS_TABLE_HEADER_SIZE + \
+ RAS_TABLE_V2_1_INFO_SIZE)
+#define RAS_MAX_RECORD_COUNT_V2_1 ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE - \
+ RAS_TABLE_V2_1_INFO_SIZE) \
+ / RAS_TABLE_RECORD_SIZE)
+
+#define RAS_SMU_MESSAGE_TIMEOUT_MS 1000 /* 1s */
+
/* Given a zero-based index of an EEPROM RAS record, yields the EEPROM
* offset off of RAS_TABLE_START. That is, this is something you can
* add to control->i2c_address, and then tell I2C layer to read
@@ -83,43 +148,40 @@
#define RAS_NUM_RECS(_tbl_hdr) (((_tbl_hdr)->tbl_size - \
RAS_TABLE_HEADER_SIZE) / RAS_TABLE_RECORD_SIZE)
-#define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control))->adev
+#define RAS_NUM_RECS_V2_1(_tbl_hdr) (((_tbl_hdr)->tbl_size - \
+ RAS_TABLE_HEADER_SIZE - \
+ RAS_TABLE_V2_1_INFO_SIZE) / RAS_TABLE_RECORD_SIZE)
-static bool __is_ras_eeprom_supported(struct amdgpu_device *adev)
-{
- return adev->asic_type == CHIP_VEGA20 ||
- adev->asic_type == CHIP_ARCTURUS ||
- adev->asic_type == CHIP_SIENNA_CICHLID ||
- adev->asic_type == CHIP_ALDEBARAN;
-}
+#define to_amdgpu_device(x) ((container_of(x, struct amdgpu_ras, eeprom_control))->adev)
-static bool __get_eeprom_i2c_addr_arct(struct amdgpu_device *adev,
- struct amdgpu_ras_eeprom_control *control)
+static bool __is_ras_eeprom_supported(struct amdgpu_device *adev)
{
- struct atom_context *atom_ctx = adev->mode_info.atom_context;
-
- if (!control || !atom_ctx)
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+ case IP_VERSION(11, 0, 2): /* VEGA20 and ARCTURUS */
+ case IP_VERSION(11, 0, 7): /* Sienna cichlid */
+ case IP_VERSION(13, 0, 0):
+ case IP_VERSION(13, 0, 2): /* Aldebaran */
+ case IP_VERSION(13, 0, 10):
+ return true;
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
+ return (adev->gmc.is_app_apu) ? false : true;
+ default:
return false;
-
- if (strnstr(atom_ctx->vbios_version,
- "D342",
- sizeof(atom_ctx->vbios_version)))
- control->i2c_address = EEPROM_I2C_MADDR_ARCTURUS_D342;
- else
- control->i2c_address = EEPROM_I2C_MADDR_ARCTURUS;
-
- return true;
+ }
}
static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,
struct amdgpu_ras_eeprom_control *control)
{
+ struct atom_context *atom_ctx = adev->mode_info.atom_context;
u8 i2c_addr;
if (!control)
return false;
- if (amdgpu_atomfirmware_ras_rom_addr(adev, &i2c_addr)) {
+ if (adev->bios && amdgpu_atomfirmware_ras_rom_addr(adev, &i2c_addr)) {
/* The address given by VBIOS is an 8-bit, wire-format
* address, i.e. the most significant byte.
*
@@ -134,27 +196,44 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,
return true;
}
- switch (adev->asic_type) {
- case CHIP_VEGA20:
- control->i2c_address = EEPROM_I2C_MADDR_VEGA20;
- break;
-
- case CHIP_ARCTURUS:
- return __get_eeprom_i2c_addr_arct(adev, control);
-
- case CHIP_SIENNA_CICHLID:
- control->i2c_address = EEPROM_I2C_MADDR_SIENNA_CICHLID;
- break;
-
- case CHIP_ALDEBARAN:
- control->i2c_address = EEPROM_I2C_MADDR_ALDEBARAN;
- break;
-
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+ case IP_VERSION(11, 0, 2):
+ /* VEGA20 and ARCTURUS */
+ if (adev->asic_type == CHIP_VEGA20)
+ control->i2c_address = EEPROM_I2C_MADDR_0;
+ else if (strnstr(atom_ctx->vbios_pn,
+ "D342",
+ sizeof(atom_ctx->vbios_pn)))
+ control->i2c_address = EEPROM_I2C_MADDR_0;
+ else
+ control->i2c_address = EEPROM_I2C_MADDR_4;
+ return true;
+ case IP_VERSION(11, 0, 7):
+ control->i2c_address = EEPROM_I2C_MADDR_0;
+ return true;
+ case IP_VERSION(13, 0, 2):
+ if (strnstr(atom_ctx->vbios_pn, "D673",
+ sizeof(atom_ctx->vbios_pn)))
+ control->i2c_address = EEPROM_I2C_MADDR_4;
+ else
+ control->i2c_address = EEPROM_I2C_MADDR_0;
+ return true;
+ case IP_VERSION(13, 0, 0):
+ if (strnstr(atom_ctx->vbios_pn, "D707",
+ sizeof(atom_ctx->vbios_pn)))
+ control->i2c_address = EEPROM_I2C_MADDR_0;
+ else
+ control->i2c_address = EEPROM_I2C_MADDR_4;
+ return true;
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
+ control->i2c_address = EEPROM_I2C_MADDR_4;
+ return true;
default:
return false;
}
-
- return true;
}
static void
@@ -193,23 +272,89 @@ static int __write_table_header(struct amdgpu_ras_eeprom_control *control)
__encode_table_header_to_buf(&control->tbl_hdr, buf);
/* i2c may be unstable in gpu reset */
- down_read(&adev->reset_sem);
- res = amdgpu_eeprom_write(&adev->pm.smu_i2c,
+ down_read(&adev->reset_domain->sem);
+ res = amdgpu_eeprom_write(adev->pm.ras_eeprom_i2c_bus,
control->i2c_address +
control->ras_header_offset,
buf, RAS_TABLE_HEADER_SIZE);
- up_read(&adev->reset_sem);
+ up_read(&adev->reset_domain->sem);
if (res < 0) {
- DRM_ERROR("Failed to write EEPROM table header:%d", res);
+ dev_err(adev->dev, "Failed to write EEPROM table header:%d",
+ res);
} else if (res < RAS_TABLE_HEADER_SIZE) {
- DRM_ERROR("Short write:%d out of %d\n",
- res, RAS_TABLE_HEADER_SIZE);
+ dev_err(adev->dev, "Short write:%d out of %d\n", res,
+ RAS_TABLE_HEADER_SIZE);
+ res = -EIO;
+ } else {
+ res = 0;
+ }
+
+ return res;
+}
+
+static void
+__encode_table_ras_info_to_buf(struct amdgpu_ras_eeprom_table_ras_info *rai,
+ unsigned char *buf)
+{
+ u32 *pp = (uint32_t *)buf;
+ u32 tmp;
+
+ tmp = ((uint32_t)(rai->rma_status) & 0xFF) |
+ (((uint32_t)(rai->health_percent) << 8) & 0xFF00) |
+ (((uint32_t)(rai->ecc_page_threshold) << 16) & 0xFFFF0000);
+ pp[0] = cpu_to_le32(tmp);
+}
+
+static void
+__decode_table_ras_info_from_buf(struct amdgpu_ras_eeprom_table_ras_info *rai,
+ unsigned char *buf)
+{
+ u32 *pp = (uint32_t *)buf;
+ u32 tmp;
+
+ tmp = le32_to_cpu(pp[0]);
+ rai->rma_status = tmp & 0xFF;
+ rai->health_percent = (tmp >> 8) & 0xFF;
+ rai->ecc_page_threshold = (tmp >> 16) & 0xFFFF;
+}
+
+static int __write_table_ras_info(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ u8 *buf;
+ int res;
+
+ buf = kzalloc(RAS_TABLE_V2_1_INFO_SIZE, GFP_KERNEL);
+ if (!buf) {
+ dev_err(adev->dev,
+ "Failed to alloc buf to write table ras info\n");
+ return -ENOMEM;
+ }
+
+ __encode_table_ras_info_to_buf(&control->tbl_rai, buf);
+
+ /* i2c may be unstable in gpu reset */
+ down_read(&adev->reset_domain->sem);
+ res = amdgpu_eeprom_write(adev->pm.ras_eeprom_i2c_bus,
+ control->i2c_address +
+ control->ras_info_offset,
+ buf, RAS_TABLE_V2_1_INFO_SIZE);
+ up_read(&adev->reset_domain->sem);
+
+ if (res < 0) {
+ dev_err(adev->dev, "Failed to write EEPROM table ras info:%d",
+ res);
+ } else if (res < RAS_TABLE_V2_1_INFO_SIZE) {
+ dev_err(adev->dev, "Short write:%d out of %d\n", res,
+ RAS_TABLE_V2_1_INFO_SIZE);
res = -EIO;
} else {
res = 0;
}
+ kfree(buf);
+
return res;
}
@@ -229,6 +374,21 @@ static u8 __calc_hdr_byte_sum(const struct amdgpu_ras_eeprom_control *control)
return csum;
}
+static u8 __calc_ras_info_byte_sum(const struct amdgpu_ras_eeprom_control *control)
+{
+ int ii;
+ u8 *pp, csum;
+ size_t sz;
+
+ sz = sizeof(control->tbl_rai);
+ pp = (u8 *) &control->tbl_rai;
+ csum = 0;
+ for (ii = 0; ii < sz; ii++, pp++)
+ csum += *pp;
+
+ return csum;
+}
+
static int amdgpu_ras_eeprom_correct_header_tag(
struct amdgpu_ras_eeprom_control *control,
uint32_t header)
@@ -254,6 +414,25 @@ static int amdgpu_ras_eeprom_correct_header_tag(
return res;
}
+static void amdgpu_ras_set_eeprom_table_version(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+
+ switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
+ case IP_VERSION(8, 10, 0):
+ hdr->version = RAS_TABLE_VER_V2_1;
+ return;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 5, 0):
+ hdr->version = RAS_TABLE_VER_V3;
+ return;
+ default:
+ hdr->version = RAS_TABLE_VER_V1;
+ return;
+ }
+}
+
/**
* amdgpu_ras_eeprom_reset_table -- Reset the RAS EEPROM table
* @control: pointer to control structure
@@ -263,25 +442,73 @@ static int amdgpu_ras_eeprom_correct_header_tag(
*/
int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+ struct amdgpu_ras_eeprom_table_ras_info *rai = &control->tbl_rai;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ u32 erase_res = 0;
u8 csum;
int res;
mutex_lock(&control->ras_tbl_mutex);
- hdr->header = RAS_TABLE_HDR_VAL;
- hdr->version = RAS_TABLE_VER;
- hdr->first_rec_offset = RAS_RECORD_START;
- hdr->tbl_size = RAS_TABLE_HEADER_SIZE;
+ if (!amdgpu_ras_smu_eeprom_supported(adev)) {
+ hdr->header = RAS_TABLE_HDR_VAL;
+ amdgpu_ras_set_eeprom_table_version(control);
+
+ if (hdr->version >= RAS_TABLE_VER_V2_1) {
+ hdr->first_rec_offset = RAS_RECORD_START_V2_1;
+ hdr->tbl_size = RAS_TABLE_HEADER_SIZE +
+ RAS_TABLE_V2_1_INFO_SIZE;
+ rai->rma_status = GPU_HEALTH_USABLE;
+
+ control->ras_record_offset = RAS_RECORD_START_V2_1;
+ control->ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1;
+ /**
+ * GPU health represented as a percentage.
+ * 0 means worst health, 100 means fully health.
+ */
+ rai->health_percent = 100;
+ /* ecc_page_threshold = 0 means disable bad page retirement */
+ rai->ecc_page_threshold = con->bad_page_cnt_threshold;
+ } else {
+ hdr->first_rec_offset = RAS_RECORD_START;
+ hdr->tbl_size = RAS_TABLE_HEADER_SIZE;
+
+ control->ras_record_offset = RAS_RECORD_START;
+ control->ras_max_record_count = RAS_MAX_RECORD_COUNT;
+ }
- csum = __calc_hdr_byte_sum(control);
- csum = -csum;
- hdr->checksum = csum;
- res = __write_table_header(control);
+ csum = __calc_hdr_byte_sum(control);
+ if (hdr->version >= RAS_TABLE_VER_V2_1)
+ csum += __calc_ras_info_byte_sum(control);
+ csum = -csum;
+ hdr->checksum = csum;
+ res = __write_table_header(control);
+ if (!res && hdr->version > RAS_TABLE_VER_V1)
+ res = __write_table_ras_info(control);
+ } else {
+ res = amdgpu_ras_smu_erase_ras_table(adev, &erase_res);
+ if (res || erase_res) {
+ dev_warn(adev->dev, "RAS EEPROM reset failed, res:%d result:%d",
+ res, erase_res);
+ if (!res)
+ res = -EIO;
+ }
+ }
control->ras_num_recs = 0;
+ control->ras_num_bad_pages = 0;
+ control->ras_num_mca_recs = 0;
+ control->ras_num_pa_recs = 0;
control->ras_fri = 0;
+ amdgpu_dpm_send_hbm_bad_pages_num(adev, control->ras_num_bad_pages);
+
+ control->bad_channel_bitmap = 0;
+ amdgpu_dpm_send_hbm_bad_channel_flag(adev, control->bad_channel_bitmap);
+ con->update_channel_flag = false;
+
amdgpu_ras_debugfs_set_ret_size(control);
mutex_unlock(&control->ras_tbl_mutex);
@@ -349,7 +576,11 @@ bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- if (!__is_ras_eeprom_supported(adev))
+ if (amdgpu_uniras_enabled(adev))
+ return amdgpu_ras_mgr_check_eeprom_safety_watermark(adev);
+
+ if (!__is_ras_eeprom_supported(adev) ||
+ !amdgpu_bad_page_threshold)
return false;
/* skip check eeprom table for VEGA20 Gaming */
@@ -360,10 +591,19 @@ bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev)
return false;
if (con->eeprom_control.tbl_hdr.header == RAS_TABLE_HDR_BAD) {
- dev_warn(adev->dev, "This GPU is in BAD status.");
- dev_warn(adev->dev, "Please retire it or set a larger "
- "threshold value when reloading driver.\n");
- return true;
+ if (con->eeprom_control.ras_num_bad_pages > con->bad_page_cnt_threshold)
+ dev_warn(adev->dev, "RAS records:%d exceed threshold:%d",
+ con->eeprom_control.ras_num_bad_pages, con->bad_page_cnt_threshold);
+ if ((amdgpu_bad_page_threshold == -1) ||
+ (amdgpu_bad_page_threshold == -2)) {
+ dev_warn(adev->dev,
+ "Please consult AMD Service Action Guide (SAG) for appropriate service procedures.\n");
+ return false;
+ } else {
+ dev_warn(adev->dev,
+ "Please consider adjusting the customized threshold.\n");
+ return true;
+ }
}
return false;
@@ -387,21 +627,21 @@ static int __amdgpu_ras_eeprom_write(struct amdgpu_ras_eeprom_control *control,
int res;
/* i2c may be unstable in gpu reset */
- down_read(&adev->reset_sem);
+ down_read(&adev->reset_domain->sem);
buf_size = num * RAS_TABLE_RECORD_SIZE;
- res = amdgpu_eeprom_write(&adev->pm.smu_i2c,
+ res = amdgpu_eeprom_write(adev->pm.ras_eeprom_i2c_bus,
control->i2c_address +
RAS_INDEX_TO_OFFSET(control, fri),
buf, buf_size);
- up_read(&adev->reset_sem);
+ up_read(&adev->reset_domain->sem);
if (res < 0) {
- DRM_ERROR("Writing %d EEPROM table records error:%d",
- num, res);
+ dev_err(adev->dev, "Writing %d EEPROM table records error:%d",
+ num, res);
} else if (res < buf_size) {
/* Short write, return error.
*/
- DRM_ERROR("Wrote %d records out of %d",
- res / RAS_TABLE_RECORD_SIZE, num);
+ dev_err(adev->dev, "Wrote %d records out of %d",
+ res / RAS_TABLE_RECORD_SIZE, num);
res = -EIO;
} else {
res = 0;
@@ -415,6 +655,8 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control,
struct eeprom_table_record *record,
const u32 num)
{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(to_amdgpu_device(control));
+ struct amdgpu_device *adev = to_amdgpu_device(control);
u32 a, b, i;
u8 *buf, *pp;
int res;
@@ -426,9 +668,17 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control,
/* Encode all of them in one go.
*/
pp = buf;
- for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE)
+ for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) {
__encode_table_record_to_buf(control, &record[i], pp);
+ /* update bad channel bitmap */
+ if ((record[i].mem_channel < BITS_PER_TYPE(control->bad_channel_bitmap)) &&
+ !(control->bad_channel_bitmap & (1 << record[i].mem_channel))) {
+ control->bad_channel_bitmap |= 1 << record[i].mem_channel;
+ con->update_channel_flag = true;
+ }
+ }
+
/* a, first record index to write into.
* b, last record index to write into.
* a = first index to read (fri) + number of records in the table,
@@ -509,6 +759,14 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control,
control->ras_num_recs = 1 + (control->ras_max_record_count + b
- control->ras_fri)
% control->ras_max_record_count;
+
+ /*old asics only save pa to eeprom like before*/
+ if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12)
+ control->ras_num_pa_recs += num;
+ else
+ control->ras_num_mca_recs += num;
+
+ control->ras_num_bad_pages = con->bad_page_num;
Out:
kfree(buf);
return res;
@@ -526,44 +784,75 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
/* Modify the header if it exceeds.
*/
if (amdgpu_bad_page_threshold != 0 &&
- control->ras_num_recs >= ras->bad_page_cnt_threshold) {
+ control->ras_num_bad_pages > ras->bad_page_cnt_threshold) {
dev_warn(adev->dev,
"Saved bad pages %d reaches threshold value %d\n",
- control->ras_num_recs, ras->bad_page_cnt_threshold);
- control->tbl_hdr.header = RAS_TABLE_HDR_BAD;
+ control->ras_num_bad_pages, ras->bad_page_cnt_threshold);
+
+ if (adev->cper.enabled && !amdgpu_uniras_enabled(adev) &&
+ amdgpu_cper_generate_bp_threshold_record(adev))
+ dev_warn(adev->dev, "fail to generate bad page threshold cper records\n");
+
+ if ((amdgpu_bad_page_threshold != -1) &&
+ (amdgpu_bad_page_threshold != -2)) {
+ control->tbl_hdr.header = RAS_TABLE_HDR_BAD;
+ if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) {
+ control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD;
+ control->tbl_rai.health_percent = 0;
+ }
+ ras->is_rma = true;
+ }
+
+ /* ignore the -ENOTSUPP return value */
+ amdgpu_dpm_send_rma_reason(adev);
}
- control->tbl_hdr.version = RAS_TABLE_VER;
- control->tbl_hdr.first_rec_offset = RAS_INDEX_TO_OFFSET(control, control->ras_fri);
- control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE + control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
+ if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1)
+ control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE +
+ RAS_TABLE_V2_1_INFO_SIZE +
+ control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
+ else
+ control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE +
+ control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
control->tbl_hdr.checksum = 0;
buf_size = control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
buf = kcalloc(control->ras_num_recs, RAS_TABLE_RECORD_SIZE, GFP_KERNEL);
if (!buf) {
- DRM_ERROR("allocating memory for table of size %d bytes failed\n",
- control->tbl_hdr.tbl_size);
+ dev_err(adev->dev,
+ "allocating memory for table of size %d bytes failed\n",
+ control->tbl_hdr.tbl_size);
res = -ENOMEM;
goto Out;
}
- down_read(&adev->reset_sem);
- res = amdgpu_eeprom_read(&adev->pm.smu_i2c,
+ down_read(&adev->reset_domain->sem);
+ res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
control->i2c_address +
control->ras_record_offset,
buf, buf_size);
- up_read(&adev->reset_sem);
+ up_read(&adev->reset_domain->sem);
if (res < 0) {
- DRM_ERROR("EEPROM failed reading records:%d\n",
- res);
+ dev_err(adev->dev, "EEPROM failed reading records:%d\n", res);
goto Out;
} else if (res < buf_size) {
- DRM_ERROR("EEPROM read %d out of %d bytes\n",
- res, buf_size);
+ dev_err(adev->dev, "EEPROM read %d out of %d bytes\n", res,
+ buf_size);
res = -EIO;
goto Out;
}
+ /**
+ * bad page records have been stored in eeprom,
+ * now calculate gpu health percent
+ */
+ if (amdgpu_bad_page_threshold != 0 &&
+ control->tbl_hdr.version >= RAS_TABLE_VER_V2_1 &&
+ control->ras_num_bad_pages <= ras->bad_page_cnt_threshold)
+ control->tbl_rai.health_percent = ((ras->bad_page_cnt_threshold -
+ control->ras_num_bad_pages) * 100) /
+ ras->bad_page_cnt_threshold;
+
/* Recalc the checksum.
*/
csum = 0;
@@ -571,15 +860,84 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
csum += *pp;
csum += __calc_hdr_byte_sum(control);
+ if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1)
+ csum += __calc_ras_info_byte_sum(control);
/* avoid sign extension when assigning to "checksum" */
csum = -csum;
control->tbl_hdr.checksum = csum;
res = __write_table_header(control);
+ if (!res && control->tbl_hdr.version > RAS_TABLE_VER_V1)
+ res = __write_table_ras_info(control);
Out:
kfree(buf);
return res;
}
+int amdgpu_ras_eeprom_update_record_num(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ int ret, retry = 20;
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return 0;
+
+ control->ras_num_recs_old = control->ras_num_recs;
+
+ do {
+ /* 1000ms timeout is long enough, smu_get_badpage_count won't
+ * return -EBUSY before timeout.
+ */
+ ret = amdgpu_ras_smu_get_badpage_count(adev,
+ &(control->ras_num_recs), RAS_SMU_MESSAGE_TIMEOUT_MS);
+ if (!ret &&
+ (control->ras_num_recs_old == control->ras_num_recs)) {
+ /* record number update in PMFW needs some time,
+ * smu_get_badpage_count may return immediately without
+ * count update, sleep for a while and retry again.
+ */
+ msleep(50);
+ retry--;
+ } else {
+ break;
+ }
+ } while (retry);
+
+ /* no update of record number is not a real failure,
+ * don't print warning here
+ */
+ if (!ret && (control->ras_num_recs_old == control->ras_num_recs))
+ ret = -EINVAL;
+
+ return ret;
+}
+
+static int amdgpu_ras_smu_eeprom_append(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev) || !con)
+ return 0;
+
+ control->ras_num_bad_pages = con->bad_page_num;
+
+ if (amdgpu_bad_page_threshold != 0 &&
+ control->ras_num_bad_pages > con->bad_page_cnt_threshold) {
+ dev_warn(adev->dev,
+ "Saved bad pages %d reaches threshold value %d\n",
+ control->ras_num_bad_pages, con->bad_page_cnt_threshold);
+
+ if (adev->cper.enabled && amdgpu_cper_generate_bp_threshold_record(adev))
+ dev_warn(adev->dev, "fail to generate bad page threshold cper records\n");
+
+ if ((amdgpu_bad_page_threshold != -1) &&
+ (amdgpu_bad_page_threshold != -2))
+ con->is_rma = true;
+ }
+
+ return 0;
+}
+
/**
* amdgpu_ras_eeprom_append -- append records to the EEPROM RAS table
* @control: pointer to control structure
@@ -598,20 +956,32 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
const u32 num)
{
struct amdgpu_device *adev = to_amdgpu_device(control);
- int res;
+ int res, i;
+ uint64_t nps = AMDGPU_NPS1_PARTITION_MODE;
if (!__is_ras_eeprom_supported(adev))
return 0;
+ if (amdgpu_ras_smu_eeprom_supported(adev))
+ return amdgpu_ras_smu_eeprom_append(control);
+
if (num == 0) {
- DRM_ERROR("will not append 0 records\n");
+ dev_err(adev->dev, "will not append 0 records\n");
return -EINVAL;
} else if (num > control->ras_max_record_count) {
- DRM_ERROR("cannot append %d records than the size of table %d\n",
- num, control->ras_max_record_count);
+ dev_err(adev->dev,
+ "cannot append %d records than the size of table %d\n",
+ num, control->ras_max_record_count);
return -EINVAL;
}
+ if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+ nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+
+ /* set the new channel index flag */
+ for (i = 0; i < num; i++)
+ record[i].retired_page |= (nps << UMC_NPS_SHIFT);
+
mutex_lock(&control->ras_tbl_mutex);
res = amdgpu_ras_eeprom_append_table(control, record, num);
@@ -621,6 +991,11 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
amdgpu_ras_debugfs_set_ret_size(control);
mutex_unlock(&control->ras_tbl_mutex);
+
+ /* clear channel index flag, the flag is only saved on eeprom */
+ for (i = 0; i < num; i++)
+ record[i].retired_page &= ~(nps << UMC_NPS_SHIFT);
+
return res;
}
@@ -642,21 +1017,21 @@ static int __amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
int res;
/* i2c may be unstable in gpu reset */
- down_read(&adev->reset_sem);
+ down_read(&adev->reset_domain->sem);
buf_size = num * RAS_TABLE_RECORD_SIZE;
- res = amdgpu_eeprom_read(&adev->pm.smu_i2c,
+ res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
control->i2c_address +
RAS_INDEX_TO_OFFSET(control, fri),
buf, buf_size);
- up_read(&adev->reset_sem);
+ up_read(&adev->reset_domain->sem);
if (res < 0) {
- DRM_ERROR("Reading %d EEPROM table records error:%d",
- num, res);
+ dev_err(adev->dev, "Reading %d EEPROM table records error:%d",
+ num, res);
} else if (res < buf_size) {
/* Short read, return error.
*/
- DRM_ERROR("Read %d records out of %d",
- res / RAS_TABLE_RECORD_SIZE, num);
+ dev_err(adev->dev, "Read %d records out of %d",
+ res / RAS_TABLE_RECORD_SIZE, num);
res = -EIO;
} else {
res = 0;
@@ -665,6 +1040,50 @@ static int __amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
return res;
}
+int amdgpu_ras_eeprom_read_idx(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *record, u32 rec_idx,
+ const u32 num)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ uint64_t ts, end_idx;
+ int i, ret;
+ u64 mca, ipid;
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return 0;
+
+ if (!adev->umc.ras || !adev->umc.ras->mca_ipid_parse)
+ return -EOPNOTSUPP;
+
+ end_idx = rec_idx + num;
+ for (i = rec_idx; i < end_idx; i++) {
+ ret = amdgpu_ras_smu_get_badpage_mca_addr(adev, i, &mca);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_ras_smu_get_badpage_ipid(adev, i, &ipid);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_ras_smu_get_timestamp(adev, i, &ts);
+ if (ret)
+ return ret;
+
+ record[i - rec_idx].address = mca;
+ /* retired_page (pa) is unused now */
+ record[i - rec_idx].retired_page = 0x1ULL;
+ record[i - rec_idx].ts = ts;
+ record[i - rec_idx].err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
+
+ adev->umc.ras->mca_ipid_parse(adev, ipid,
+ (uint32_t *)&(record[i - rec_idx].cu),
+ (uint32_t *)&(record[i - rec_idx].mem_channel),
+ (uint32_t *)&(record[i - rec_idx].mcumc_id), NULL);
+ }
+
+ return 0;
+}
+
/**
* amdgpu_ras_eeprom_read -- read EEPROM
* @control: pointer to control structure
@@ -681,19 +1100,23 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
const u32 num)
{
struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
int i, res;
u8 *buf, *pp;
u32 g0, g1;
+ if (amdgpu_ras_smu_eeprom_supported(adev))
+ return amdgpu_ras_eeprom_read_idx(control, record, 0, num);
+
if (!__is_ras_eeprom_supported(adev))
return 0;
if (num == 0) {
- DRM_ERROR("will not read 0 records\n");
+ dev_err(adev->dev, "will not read 0 records\n");
return -EINVAL;
} else if (num > control->ras_num_recs) {
- DRM_ERROR("too many records to read:%d available:%d\n",
- num, control->ras_num_recs);
+ dev_err(adev->dev, "too many records to read:%d available:%d\n",
+ num, control->ras_num_recs);
return -EINVAL;
}
@@ -748,8 +1171,16 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
/* Read up everything? Then transform.
*/
pp = buf;
- for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE)
+ for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) {
__decode_table_record_from_buf(control, &record[i], pp);
+
+ /* update bad channel bitmap */
+ if ((record[i].mem_channel < BITS_PER_TYPE(control->bad_channel_bitmap)) &&
+ !(control->bad_channel_bitmap & (1 << record[i].mem_channel))) {
+ control->bad_channel_bitmap |= 1 << record[i].mem_channel;
+ con->update_channel_flag = true;
+ }
+ }
Out:
kfree(buf);
mutex_unlock(&control->ras_tbl_mutex);
@@ -757,9 +1188,15 @@ Out:
return res;
}
-uint32_t amdgpu_ras_eeprom_max_record_count(void)
+uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *control)
{
- return RAS_MAX_RECORD_COUNT;
+ /* get available eeprom table version first before eeprom table init */
+ amdgpu_ras_set_eeprom_table_version(control);
+
+ if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1)
+ return RAS_MAX_RECORD_COUNT_V2_1;
+ else
+ return RAS_MAX_RECORD_COUNT;
}
static ssize_t
@@ -842,6 +1279,10 @@ static ssize_t amdgpu_ras_debugfs_table_read(struct file *f, char __user *buf,
int res = -EFAULT;
size_t data_len;
+ /* pmfw manages eeprom data by itself */
+ if (amdgpu_ras_smu_eeprom_supported(adev))
+ return 0;
+
mutex_lock(&control->ras_tbl_mutex);
/* We want *pos - data_len > 0, which means there's
@@ -1001,20 +1442,27 @@ static int __verify_ras_table_checksum(struct amdgpu_ras_eeprom_control *control
int buf_size, res;
u8 csum, *buf, *pp;
- buf_size = RAS_TABLE_HEADER_SIZE +
- control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
+ if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1)
+ buf_size = RAS_TABLE_HEADER_SIZE +
+ RAS_TABLE_V2_1_INFO_SIZE +
+ control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
+ else
+ buf_size = RAS_TABLE_HEADER_SIZE +
+ control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
+
buf = kzalloc(buf_size, GFP_KERNEL);
if (!buf) {
- DRM_ERROR("Out of memory checking RAS table checksum.\n");
+ dev_err(adev->dev,
+ "Out of memory checking RAS table checksum.\n");
return -ENOMEM;
}
- res = amdgpu_eeprom_read(&adev->pm.smu_i2c,
+ res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
control->i2c_address +
control->ras_header_offset,
buf, buf_size);
if (res < buf_size) {
- DRM_ERROR("Partial read for checksum, res:%d\n", res);
+ dev_err(adev->dev, "Partial read for checksum, res:%d\n", res);
/* On partial reads, return -EIO.
*/
if (res >= 0)
@@ -1030,8 +1478,78 @@ Out:
return res < 0 ? res : csum;
}
-int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
- bool *exceed_err_limit)
+static int __read_table_ras_info(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_ras_eeprom_table_ras_info *rai = &control->tbl_rai;
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ unsigned char *buf;
+ int res;
+
+ buf = kzalloc(RAS_TABLE_V2_1_INFO_SIZE, GFP_KERNEL);
+ if (!buf) {
+ dev_err(adev->dev,
+ "Failed to alloc buf to read EEPROM table ras info\n");
+ return -ENOMEM;
+ }
+
+ /**
+ * EEPROM table V2_1 supports ras info,
+ * read EEPROM table ras info
+ */
+ res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
+ control->i2c_address + control->ras_info_offset,
+ buf, RAS_TABLE_V2_1_INFO_SIZE);
+ if (res < RAS_TABLE_V2_1_INFO_SIZE) {
+ dev_err(adev->dev,
+ "Failed to read EEPROM table ras info, res:%d", res);
+ res = res >= 0 ? -EIO : res;
+ goto Out;
+ }
+
+ __decode_table_ras_info_from_buf(rai, buf);
+
+Out:
+ kfree(buf);
+ return res == RAS_TABLE_V2_1_INFO_SIZE ? 0 : res;
+}
+
+static int amdgpu_ras_smu_eeprom_init(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ uint64_t local_time;
+ int res;
+
+ ras->is_rma = false;
+
+ if (!__is_ras_eeprom_supported(adev))
+ return 0;
+ mutex_init(&control->ras_tbl_mutex);
+
+ res = amdgpu_ras_smu_get_table_version(adev, &(hdr->version));
+ if (res)
+ return res;
+
+ res = amdgpu_ras_smu_get_badpage_count(adev,
+ &(control->ras_num_recs), 100);
+ if (res)
+ return res;
+
+ local_time = (uint64_t)ktime_get_real_seconds();
+ res = amdgpu_ras_smu_set_timestamp(adev, local_time);
+ if (res)
+ return res;
+
+ control->ras_max_record_count = 4000;
+
+ control->ras_num_mca_recs = 0;
+ control->ras_num_pa_recs = 0;
+
+ return 0;
+}
+
+int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
{
struct amdgpu_device *adev = to_amdgpu_device(control);
unsigned char buf[RAS_TABLE_HEADER_SIZE] = { 0 };
@@ -1039,58 +1557,177 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
int res;
- *exceed_err_limit = false;
+ if (amdgpu_ras_smu_eeprom_supported(adev))
+ return amdgpu_ras_smu_eeprom_init(control);
+
+ ras->is_rma = false;
if (!__is_ras_eeprom_supported(adev))
return 0;
/* Verify i2c adapter is initialized */
- if (!adev->pm.smu_i2c.algo)
+ if (!adev->pm.ras_eeprom_i2c_bus || !adev->pm.ras_eeprom_i2c_bus->algo)
return -ENOENT;
if (!__get_eeprom_i2c_addr(adev, control))
return -EINVAL;
control->ras_header_offset = RAS_HDR_START;
- control->ras_record_offset = RAS_RECORD_START;
- control->ras_max_record_count = RAS_MAX_RECORD_COUNT;
+ control->ras_info_offset = RAS_TABLE_V2_1_INFO_START;
mutex_init(&control->ras_tbl_mutex);
/* Read the table header from EEPROM address */
- res = amdgpu_eeprom_read(&adev->pm.smu_i2c,
+ res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
control->i2c_address + control->ras_header_offset,
buf, RAS_TABLE_HEADER_SIZE);
if (res < RAS_TABLE_HEADER_SIZE) {
- DRM_ERROR("Failed to read EEPROM table header, res:%d", res);
+ dev_err(adev->dev, "Failed to read EEPROM table header, res:%d",
+ res);
return res >= 0 ? -EIO : res;
}
__decode_table_header_from_buf(hdr, buf);
- control->ras_num_recs = RAS_NUM_RECS(hdr);
+ if (hdr->header != RAS_TABLE_HDR_VAL &&
+ hdr->header != RAS_TABLE_HDR_BAD) {
+ dev_info(adev->dev, "Creating a new EEPROM table");
+ return amdgpu_ras_eeprom_reset_table(control);
+ }
+
+ switch (hdr->version) {
+ case RAS_TABLE_VER_V2_1:
+ case RAS_TABLE_VER_V3:
+ control->ras_num_recs = RAS_NUM_RECS_V2_1(hdr);
+ control->ras_record_offset = RAS_RECORD_START_V2_1;
+ control->ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1;
+ break;
+ case RAS_TABLE_VER_V1:
+ control->ras_num_recs = RAS_NUM_RECS(hdr);
+ control->ras_record_offset = RAS_RECORD_START;
+ control->ras_max_record_count = RAS_MAX_RECORD_COUNT;
+ break;
+ default:
+ dev_err(adev->dev,
+ "RAS header invalid, unsupported version: %u",
+ hdr->version);
+ return -EINVAL;
+ }
+
+ if (control->ras_num_recs > control->ras_max_record_count) {
+ dev_err(adev->dev,
+ "RAS header invalid, records in header: %u max allowed :%u",
+ control->ras_num_recs, control->ras_max_record_count);
+ return -EINVAL;
+ }
+
control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset);
+ control->ras_num_mca_recs = 0;
+ control->ras_num_pa_recs = 0;
+ return 0;
+}
+
+static int amdgpu_ras_smu_eeprom_check(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ if (!__is_ras_eeprom_supported(adev))
+ return 0;
+
+ control->ras_num_bad_pages = ras->bad_page_num;
+
+ if ((ras->bad_page_cnt_threshold < control->ras_num_bad_pages) &&
+ amdgpu_bad_page_threshold != 0) {
+ dev_warn(adev->dev,
+ "RAS records:%d exceed threshold:%d\n",
+ control->ras_num_bad_pages, ras->bad_page_cnt_threshold);
+ if ((amdgpu_bad_page_threshold == -1) ||
+ (amdgpu_bad_page_threshold == -2)) {
+ dev_warn(adev->dev,
+ "Please consult AMD Service Action Guide (SAG) for appropriate service procedures\n");
+ } else {
+ ras->is_rma = true;
+ dev_warn(adev->dev,
+ "User defined threshold is set, runtime service will be halt when threshold is reached\n");
+ }
+
+ return 0;
+ }
+
+ dev_dbg(adev->dev,
+ "Found existing EEPROM table with %d records",
+ control->ras_num_bad_pages);
+
+ /* Warn if we are at 90% of the threshold or above
+ */
+ if (10 * control->ras_num_bad_pages >= 9 * ras->bad_page_cnt_threshold)
+ dev_warn(adev->dev, "RAS records:%u exceeds 90%% of threshold:%d",
+ control->ras_num_bad_pages,
+ ras->bad_page_cnt_threshold);
+ return 0;
+}
+
+int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ int res = 0;
+
+ if (amdgpu_ras_smu_eeprom_supported(adev))
+ return amdgpu_ras_smu_eeprom_check(control);
+
+ if (!__is_ras_eeprom_supported(adev))
+ return 0;
+
+ /* Verify i2c adapter is initialized */
+ if (!adev->pm.ras_eeprom_i2c_bus || !adev->pm.ras_eeprom_i2c_bus->algo)
+ return -ENOENT;
+
+ if (!__get_eeprom_i2c_addr(adev, control))
+ return -EINVAL;
+
+ control->ras_num_bad_pages = ras->bad_page_num;
if (hdr->header == RAS_TABLE_HDR_VAL) {
- DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records",
- control->ras_num_recs);
+ dev_dbg(adev->dev,
+ "Found existing EEPROM table with %d records",
+ control->ras_num_bad_pages);
+
+ if (hdr->version >= RAS_TABLE_VER_V2_1) {
+ res = __read_table_ras_info(control);
+ if (res)
+ return res;
+ }
+
res = __verify_ras_table_checksum(control);
if (res)
- DRM_ERROR("RAS table incorrect checksum or error:%d\n",
- res);
+ dev_err(adev->dev,
+ "RAS table incorrect checksum or error:%d\n",
+ res);
/* Warn if we are at 90% of the threshold or above
*/
- if (10 * control->ras_num_recs >= 9 * ras->bad_page_cnt_threshold)
+ if (10 * control->ras_num_bad_pages >= 9 * ras->bad_page_cnt_threshold)
dev_warn(adev->dev, "RAS records:%u exceeds 90%% of threshold:%d",
- control->ras_num_recs,
+ control->ras_num_bad_pages,
ras->bad_page_cnt_threshold);
} else if (hdr->header == RAS_TABLE_HDR_BAD &&
amdgpu_bad_page_threshold != 0) {
+ if (hdr->version >= RAS_TABLE_VER_V2_1) {
+ res = __read_table_ras_info(control);
+ if (res)
+ return res;
+ }
+
res = __verify_ras_table_checksum(control);
- if (res)
- DRM_ERROR("RAS Table incorrect checksum or error:%d\n",
- res);
- if (ras->bad_page_cnt_threshold > control->ras_num_recs) {
+ if (res) {
+ dev_err(adev->dev,
+ "RAS Table incorrect checksum or error:%d\n",
+ res);
+ return -EINVAL;
+ }
+ if (ras->bad_page_cnt_threshold >= control->ras_num_bad_pages) {
/* This means that, the threshold was increased since
* the last time the system was booted, and now,
* ras->bad_page_cnt_threshold - control->num_recs > 0,
@@ -1100,29 +1737,194 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
dev_info(adev->dev,
"records:%d threshold:%d, resetting "
"RAS table header signature",
- control->ras_num_recs,
+ control->ras_num_bad_pages,
ras->bad_page_cnt_threshold);
res = amdgpu_ras_eeprom_correct_header_tag(control,
RAS_TABLE_HDR_VAL);
} else {
- dev_err(adev->dev, "RAS records:%d exceed threshold:%d",
- control->ras_num_recs, ras->bad_page_cnt_threshold);
- if (amdgpu_bad_page_threshold == -2) {
- dev_warn(adev->dev, "GPU will be initialized due to bad_page_threshold = -2.");
+ dev_warn(adev->dev,
+ "RAS records:%d exceed threshold:%d\n",
+ control->ras_num_bad_pages, ras->bad_page_cnt_threshold);
+ if ((amdgpu_bad_page_threshold == -1) ||
+ (amdgpu_bad_page_threshold == -2)) {
res = 0;
+ dev_warn(adev->dev,
+ "Please consult AMD Service Action Guide (SAG) for appropriate service procedures\n");
} else {
- *exceed_err_limit = true;
- dev_err(adev->dev,
- "RAS records:%d exceed threshold:%d, "
- "GPU will not be initialized. Replace this GPU or increase the threshold",
- control->ras_num_recs, ras->bad_page_cnt_threshold);
+ ras->is_rma = true;
+ dev_warn(adev->dev,
+ "User defined threshold is set, runtime service will be halt when threshold is reached\n");
}
}
- } else {
- DRM_INFO("Creating a new EEPROM table");
-
- res = amdgpu_ras_eeprom_reset_table(control);
}
return res < 0 ? res : 0;
}
+
+void amdgpu_ras_eeprom_check_and_recover(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ struct amdgpu_ras_eeprom_control *control;
+ int res;
+
+ if (!__is_ras_eeprom_supported(adev) || !ras ||
+ amdgpu_ras_smu_eeprom_supported(adev))
+ return;
+ control = &ras->eeprom_control;
+ if (!control->is_eeprom_valid)
+ return;
+ res = __verify_ras_table_checksum(control);
+ if (res) {
+ dev_warn(adev->dev,
+ "RAS table incorrect checksum or error:%d, try to recover\n",
+ res);
+ if (!amdgpu_ras_eeprom_reset_table(control))
+ if (!amdgpu_ras_save_bad_pages(adev, NULL))
+ if (!__verify_ras_table_checksum(control)) {
+ dev_info(adev->dev, "RAS table recovery succeed\n");
+ return;
+ }
+ dev_err(adev->dev, "RAS table recovery failed\n");
+ control->is_eeprom_valid = false;
+ }
+ return;
+}
+
+static const struct ras_smu_drv *amdgpu_ras_get_smu_ras_drv(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ if (!ras)
+ return NULL;
+
+ return ras->ras_smu_drv;
+}
+
+static uint64_t amdgpu_ras_smu_get_feature_flags(struct amdgpu_device *adev)
+{
+ const struct ras_smu_drv *ras_smu_drv = amdgpu_ras_get_smu_ras_drv(adev);
+ uint64_t flags = 0ULL;
+
+ if (!ras_smu_drv)
+ goto out;
+
+ if (ras_smu_drv->ras_smu_feature_flags)
+ ras_smu_drv->ras_smu_feature_flags(adev, &flags);
+
+out:
+ return flags;
+}
+
+bool amdgpu_ras_smu_eeprom_supported(struct amdgpu_device *adev)
+{
+ const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+ uint64_t flags = 0ULL;
+
+ if (!__is_ras_eeprom_supported(adev) || !smu_ras_drv)
+ return false;
+
+ if (!smu_ras_drv->smu_eeprom_funcs)
+ return false;
+
+ flags = amdgpu_ras_smu_get_feature_flags(adev);
+
+ return !!(flags & RAS_SMU_FEATURE_BIT__RAS_EEPROM);
+}
+
+int amdgpu_ras_smu_get_table_version(struct amdgpu_device *adev,
+ uint32_t *table_version)
+{
+ const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return -EOPNOTSUPP;
+
+ if (smu_ras_drv->smu_eeprom_funcs->get_ras_table_version)
+ return smu_ras_drv->smu_eeprom_funcs->get_ras_table_version(adev,
+ table_version);
+ return -EOPNOTSUPP;
+}
+
+int amdgpu_ras_smu_get_badpage_count(struct amdgpu_device *adev,
+ uint32_t *count, uint32_t timeout)
+{
+ const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return -EOPNOTSUPP;
+
+ if (smu_ras_drv->smu_eeprom_funcs->get_badpage_count)
+ return smu_ras_drv->smu_eeprom_funcs->get_badpage_count(adev,
+ count, timeout);
+ return -EOPNOTSUPP;
+}
+
+int amdgpu_ras_smu_get_badpage_mca_addr(struct amdgpu_device *adev,
+ uint16_t index, uint64_t *mca_addr)
+{
+ const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return -EOPNOTSUPP;
+
+ if (smu_ras_drv->smu_eeprom_funcs->get_badpage_mca_addr)
+ return smu_ras_drv->smu_eeprom_funcs->get_badpage_mca_addr(adev,
+ index, mca_addr);
+ return -EOPNOTSUPP;
+}
+
+int amdgpu_ras_smu_set_timestamp(struct amdgpu_device *adev,
+ uint64_t timestamp)
+{
+ const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return -EOPNOTSUPP;
+
+ if (smu_ras_drv->smu_eeprom_funcs->set_timestamp)
+ return smu_ras_drv->smu_eeprom_funcs->set_timestamp(adev,
+ timestamp);
+ return -EOPNOTSUPP;
+}
+
+int amdgpu_ras_smu_get_timestamp(struct amdgpu_device *adev,
+ uint16_t index, uint64_t *timestamp)
+{
+ const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return -EOPNOTSUPP;
+
+ if (smu_ras_drv->smu_eeprom_funcs->get_timestamp)
+ return smu_ras_drv->smu_eeprom_funcs->get_timestamp(adev,
+ index, timestamp);
+ return -EOPNOTSUPP;
+}
+
+int amdgpu_ras_smu_get_badpage_ipid(struct amdgpu_device *adev,
+ uint16_t index, uint64_t *ipid)
+{
+ const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return -EOPNOTSUPP;
+
+ if (smu_ras_drv->smu_eeprom_funcs->get_badpage_ipid)
+ return smu_ras_drv->smu_eeprom_funcs->get_badpage_ipid(adev,
+ index, ipid);
+ return -EOPNOTSUPP;
+}
+
+int amdgpu_ras_smu_erase_ras_table(struct amdgpu_device *adev,
+ uint32_t *result)
+{
+ const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return -EOPNOTSUPP;
+
+ if (smu_ras_drv->smu_eeprom_funcs->erase_ras_table)
+ return smu_ras_drv->smu_eeprom_funcs->erase_ras_table(adev,
+ result);
+ return -EOPNOTSUPP;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
index 6bb00578bfbb..2e5d63957e71 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
@@ -26,8 +26,17 @@
#include <linux/i2c.h>
+#define RAS_TABLE_VER_V1 0x00010000
+#define RAS_TABLE_VER_V2_1 0x00021000
+#define RAS_TABLE_VER_V3 0x00030000
+
struct amdgpu_device;
+enum amdgpu_ras_gpu_health_status {
+ GPU_HEALTH_USABLE = 0,
+ GPU_RETIRED__ECC_REACH_THRESHOLD = 2,
+};
+
enum amdgpu_ras_eeprom_err_type {
AMDGPU_RAS_EEPROM_ERR_NA,
AMDGPU_RAS_EEPROM_ERR_RECOVERABLE,
@@ -43,9 +52,18 @@ struct amdgpu_ras_eeprom_table_header {
uint32_t checksum;
} __packed;
+struct amdgpu_ras_eeprom_table_ras_info {
+ u8 rma_status;
+ u8 health_percent;
+ u16 ecc_page_threshold;
+ u32 padding[64 - 1];
+} __packed;
+
struct amdgpu_ras_eeprom_control {
struct amdgpu_ras_eeprom_table_header tbl_hdr;
+ struct amdgpu_ras_eeprom_table_ras_info tbl_rai;
+
/* Base I2C EEPPROM 19-bit memory address,
* where the table is located. For more information,
* see top of amdgpu_eeprom.c.
@@ -58,11 +76,24 @@ struct amdgpu_ras_eeprom_control {
* right after the header.
*/
u32 ras_header_offset;
+ u32 ras_info_offset;
u32 ras_record_offset;
/* Number of records in the table.
*/
u32 ras_num_recs;
+ u32 ras_num_recs_old;
+
+ /* the bad page number is ras_num_recs or
+ * ras_num_recs * umc.retire_unit
+ */
+ u32 ras_num_bad_pages;
+
+ /* Number of records store mca address */
+ u32 ras_num_mca_recs;
+
+ /* Number of records store physical address */
+ u32 ras_num_pa_recs;
/* First record index to read, 0-based.
* Range is [0, num_recs-1]. This is
@@ -80,6 +111,12 @@ struct amdgpu_ras_eeprom_control {
/* Protect table access via this mutex.
*/
struct mutex ras_tbl_mutex;
+
+ /* Record channel info which occurred bad pages
+ */
+ u32 bad_channel_bitmap;
+
+ bool is_eeprom_valid;
};
/*
@@ -107,8 +144,7 @@ struct eeprom_table_record {
unsigned char mcumc_id;
} __packed;
-int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
- bool *exceed_err_limit);
+int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control);
int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control);
@@ -120,10 +156,43 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
struct eeprom_table_record *records, const u32 num);
-uint32_t amdgpu_ras_eeprom_max_record_count(void);
+uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *control);
void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control);
+int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control);
+
+void amdgpu_ras_eeprom_check_and_recover(struct amdgpu_device *adev);
+
+bool amdgpu_ras_smu_eeprom_supported(struct amdgpu_device *adev);
+
+int amdgpu_ras_smu_get_table_version(struct amdgpu_device *adev,
+ uint32_t *table_version);
+
+int amdgpu_ras_smu_get_badpage_count(struct amdgpu_device *adev,
+ uint32_t *count, uint32_t timeout);
+
+int amdgpu_ras_smu_get_badpage_mca_addr(struct amdgpu_device *adev,
+ uint16_t index, uint64_t *mca_addr);
+
+int amdgpu_ras_smu_set_timestamp(struct amdgpu_device *adev,
+ uint64_t timestamp);
+
+int amdgpu_ras_smu_get_timestamp(struct amdgpu_device *adev,
+ uint16_t index, uint64_t *timestamp);
+
+int amdgpu_ras_smu_get_badpage_ipid(struct amdgpu_device *adev,
+ uint16_t index, uint64_t *ipid);
+
+int amdgpu_ras_smu_erase_ras_table(struct amdgpu_device *adev,
+ uint32_t *result);
+
+int amdgpu_ras_eeprom_read_idx(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *record, u32 rec_idx,
+ const u32 num);
+
+int amdgpu_ras_eeprom_update_record_num(struct amdgpu_ras_eeprom_control *control);
+
extern const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops;
extern const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index acfa207cf970..be2e56ce1355 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -30,12 +30,15 @@
#include <drm/ttm/ttm_resource.h>
#include <drm/ttm/ttm_range_manager.h>
+#include "amdgpu_vram_mgr.h"
+
/* state back for walking over vram_mgr and gtt_mgr allocations */
struct amdgpu_res_cursor {
uint64_t start;
uint64_t size;
uint64_t remaining;
- struct drm_mm_node *node;
+ void *node;
+ uint32_t mem_type;
};
/**
@@ -52,27 +55,64 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
uint64_t start, uint64_t size,
struct amdgpu_res_cursor *cur)
{
+ struct drm_buddy_block *block;
+ struct list_head *head, *next;
struct drm_mm_node *node;
- if (!res || res->mem_type == TTM_PL_SYSTEM) {
- cur->start = start;
- cur->size = size;
+ if (!res)
+ goto fallback;
+
+ BUG_ON(start + size > res->size);
+
+ cur->mem_type = res->mem_type;
+
+ switch (cur->mem_type) {
+ case TTM_PL_VRAM:
+ head = &to_amdgpu_vram_mgr_resource(res)->blocks;
+
+ block = list_first_entry_or_null(head,
+ struct drm_buddy_block,
+ link);
+ if (!block)
+ goto fallback;
+
+ while (start >= amdgpu_vram_mgr_block_size(block)) {
+ start -= amdgpu_vram_mgr_block_size(block);
+
+ next = block->link.next;
+ if (next != head)
+ block = list_entry(next, struct drm_buddy_block, link);
+ }
+
+ cur->start = amdgpu_vram_mgr_block_start(block) + start;
+ cur->size = min(amdgpu_vram_mgr_block_size(block) - start, size);
cur->remaining = size;
- cur->node = NULL;
- WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT);
- return;
- }
+ cur->node = block;
+ break;
+ case TTM_PL_TT:
+ case AMDGPU_PL_DOORBELL:
+ case AMDGPU_PL_MMIO_REMAP:
+ node = to_ttm_range_mgr_node(res)->mm_nodes;
+ while (start >= node->size << PAGE_SHIFT)
+ start -= node++->size << PAGE_SHIFT;
- BUG_ON(start + size > res->num_pages << PAGE_SHIFT);
+ cur->start = (node->start << PAGE_SHIFT) + start;
+ cur->size = min((node->size << PAGE_SHIFT) - start, size);
+ cur->remaining = size;
+ cur->node = node;
+ break;
+ default:
+ goto fallback;
+ }
- node = to_ttm_range_mgr_node(res)->mm_nodes;
- while (start >= node->size << PAGE_SHIFT)
- start -= node++->size << PAGE_SHIFT;
+ return;
- cur->start = (node->start << PAGE_SHIFT) + start;
- cur->size = min((node->size << PAGE_SHIFT) - start, size);
+fallback:
+ cur->start = start;
+ cur->size = size;
cur->remaining = size;
- cur->node = node;
+ cur->node = NULL;
+ WARN_ON(res && start + size > res->size);
}
/**
@@ -85,7 +125,9 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
*/
static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
{
- struct drm_mm_node *node = cur->node;
+ struct drm_buddy_block *block;
+ struct drm_mm_node *node;
+ struct list_head *next;
BUG_ON(size > cur->remaining);
@@ -99,9 +141,54 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
return;
}
- cur->node = ++node;
- cur->start = node->start << PAGE_SHIFT;
- cur->size = min(node->size << PAGE_SHIFT, cur->remaining);
+ switch (cur->mem_type) {
+ case TTM_PL_VRAM:
+ block = cur->node;
+
+ next = block->link.next;
+ block = list_entry(next, struct drm_buddy_block, link);
+
+ cur->node = block;
+ cur->start = amdgpu_vram_mgr_block_start(block);
+ cur->size = min(amdgpu_vram_mgr_block_size(block), cur->remaining);
+ break;
+ case TTM_PL_TT:
+ case AMDGPU_PL_DOORBELL:
+ case AMDGPU_PL_MMIO_REMAP:
+ node = cur->node;
+
+ cur->node = ++node;
+ cur->start = node->start << PAGE_SHIFT;
+ cur->size = min(node->size << PAGE_SHIFT, cur->remaining);
+ break;
+ default:
+ return;
+ }
+}
+
+/**
+ * amdgpu_res_cleared - check if blocks are cleared
+ *
+ * @cur: the cursor to extract the block
+ *
+ * Check if the @cur block is cleared
+ */
+static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
+{
+ struct drm_buddy_block *block;
+
+ switch (cur->mem_type) {
+ case TTM_PL_VRAM:
+ block = cur->node;
+
+ if (!amdgpu_vram_mgr_is_cleared(block))
+ return false;
+ break;
+ default:
+ return false;
+ }
+
+ return true;
}
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index 02afd4115675..28c4ad62f50e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -23,23 +23,176 @@
#include "amdgpu_reset.h"
#include "aldebaran.h"
+#include "sienna_cichlid.h"
+#include "smu_v13_0_10.h"
-int amdgpu_reset_add_handler(struct amdgpu_reset_control *reset_ctl,
- struct amdgpu_reset_handler *handler)
+static int amdgpu_reset_xgmi_reset_on_init_suspend(struct amdgpu_device *adev)
{
- /* TODO: Check if handler exists? */
- list_add_tail(&handler->handler_list, &reset_ctl->reset_handlers);
+ int i;
+
+ for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ if (!adev->ip_blocks[i].status.hw)
+ continue;
+ /* displays are handled in phase1 */
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
+ continue;
+
+ /* XXX handle errors */
+ amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
+ adev->ip_blocks[i].status.hw = false;
+ }
+
+ /* VCN FW shared region is in frambuffer, there are some flags
+ * initialized in that region during sw_init. Make sure the region is
+ * backed up.
+ */
+ amdgpu_vcn_save_vcpu_bo(adev);
+
return 0;
}
+static int amdgpu_reset_xgmi_reset_on_init_prep_hwctxt(
+ struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct list_head *reset_device_list = reset_context->reset_device_list;
+ struct amdgpu_device *tmp_adev;
+ int r;
+
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ amdgpu_unregister_gpu_instance(tmp_adev);
+ r = amdgpu_reset_xgmi_reset_on_init_suspend(tmp_adev);
+ if (r) {
+ dev_err(tmp_adev->dev,
+ "xgmi reset on init: prepare for reset failed");
+ return r;
+ }
+ }
+
+ return r;
+}
+
+static int amdgpu_reset_xgmi_reset_on_init_restore_hwctxt(
+ struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct list_head *reset_device_list = reset_context->reset_device_list;
+ struct amdgpu_device *tmp_adev = NULL;
+ int r;
+
+ r = amdgpu_device_reinit_after_reset(reset_context);
+ if (r)
+ return r;
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ if (!tmp_adev->kfd.init_complete) {
+ kgd2kfd_init_zone_device(tmp_adev);
+ amdgpu_amdkfd_device_init(tmp_adev);
+ amdgpu_amdkfd_drm_client_create(tmp_adev);
+ }
+ }
+
+ return r;
+}
+
+static int amdgpu_reset_xgmi_reset_on_init_perform_reset(
+ struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ struct list_head *reset_device_list = reset_context->reset_device_list;
+ struct amdgpu_device *tmp_adev = NULL;
+ int r;
+
+ dev_dbg(adev->dev, "xgmi roi - hw reset\n");
+
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ mutex_lock(&tmp_adev->reset_cntl->reset_lock);
+ tmp_adev->reset_cntl->active_reset =
+ amdgpu_asic_reset_method(adev);
+ }
+ r = 0;
+ /* Mode1 reset needs to be triggered on all devices together */
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ /* For XGMI run all resets in parallel to speed up the process */
+ if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
+ r = -EALREADY;
+ if (r) {
+ dev_err(tmp_adev->dev,
+ "xgmi reset on init: reset failed with error, %d",
+ r);
+ break;
+ }
+ }
+
+ /* For XGMI wait for all resets to complete before proceed */
+ if (!r) {
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ flush_work(&tmp_adev->xgmi_reset_work);
+ r = tmp_adev->asic_reset_res;
+ if (r)
+ break;
+ }
+ }
+
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ mutex_unlock(&tmp_adev->reset_cntl->reset_lock);
+ tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_NONE;
+ }
+
+ return r;
+}
+
+int amdgpu_reset_do_xgmi_reset_on_init(
+ struct amdgpu_reset_context *reset_context)
+{
+ struct list_head *reset_device_list = reset_context->reset_device_list;
+ struct amdgpu_device *adev;
+ int r;
+
+ if (!reset_device_list || list_empty(reset_device_list) ||
+ list_is_singular(reset_device_list))
+ return -EINVAL;
+
+ adev = list_first_entry(reset_device_list, struct amdgpu_device,
+ reset_list);
+ r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
+ if (r)
+ return r;
+
+ r = amdgpu_reset_perform_reset(adev, reset_context);
+
+ return r;
+}
+
+struct amdgpu_reset_handler xgmi_reset_on_init_handler = {
+ .reset_method = AMD_RESET_METHOD_ON_INIT,
+ .prepare_env = NULL,
+ .prepare_hwcontext = amdgpu_reset_xgmi_reset_on_init_prep_hwctxt,
+ .perform_reset = amdgpu_reset_xgmi_reset_on_init_perform_reset,
+ .restore_hwcontext = amdgpu_reset_xgmi_reset_on_init_restore_hwctxt,
+ .restore_env = NULL,
+ .do_reset = NULL,
+};
+
int amdgpu_reset_init(struct amdgpu_device *adev)
{
int ret = 0;
- switch (adev->asic_type) {
- case CHIP_ALDEBARAN:
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+ case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
ret = aldebaran_reset_init(adev);
break;
+ case IP_VERSION(11, 0, 7):
+ ret = sienna_cichlid_reset_init(adev);
+ break;
+ case IP_VERSION(13, 0, 10):
+ ret = smu_v13_0_10_reset_init(adev);
+ break;
default:
break;
}
@@ -51,10 +204,19 @@ int amdgpu_reset_fini(struct amdgpu_device *adev)
{
int ret = 0;
- switch (adev->asic_type) {
- case CHIP_ALDEBARAN:
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+ case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
ret = aldebaran_reset_fini(adev);
break;
+ case IP_VERSION(11, 0, 7):
+ ret = sienna_cichlid_reset_fini(adev);
+ break;
+ case IP_VERSION(13, 0, 10):
+ ret = smu_v13_0_10_reset_fini(adev);
+ break;
default:
break;
}
@@ -71,7 +233,7 @@ int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev,
reset_handler = adev->reset_cntl->get_reset_handler(
adev->reset_cntl, reset_context);
if (!reset_handler)
- return -ENOSYS;
+ return -EOPNOTSUPP;
return reset_handler->prepare_hwcontext(adev->reset_cntl,
reset_context);
@@ -87,7 +249,7 @@ int amdgpu_reset_perform_reset(struct amdgpu_device *adev,
reset_handler = adev->reset_cntl->get_reset_handler(
adev->reset_cntl, reset_context);
if (!reset_handler)
- return -ENOSYS;
+ return -EOPNOTSUPP;
ret = reset_handler->perform_reset(adev->reset_cntl, reset_context);
if (ret)
@@ -96,3 +258,97 @@ int amdgpu_reset_perform_reset(struct amdgpu_device *adev,
return reset_handler->restore_hwcontext(adev->reset_cntl,
reset_context);
}
+
+
+void amdgpu_reset_destroy_reset_domain(struct kref *ref)
+{
+ struct amdgpu_reset_domain *reset_domain = container_of(ref,
+ struct amdgpu_reset_domain,
+ refcount);
+ if (reset_domain->wq)
+ destroy_workqueue(reset_domain->wq);
+
+ kvfree(reset_domain);
+}
+
+struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_domain_type type,
+ char *wq_name)
+{
+ struct amdgpu_reset_domain *reset_domain;
+
+ reset_domain = kvzalloc(sizeof(struct amdgpu_reset_domain), GFP_KERNEL);
+ if (!reset_domain) {
+ DRM_ERROR("Failed to allocate amdgpu_reset_domain!");
+ return NULL;
+ }
+
+ reset_domain->type = type;
+ kref_init(&reset_domain->refcount);
+
+ reset_domain->wq = create_singlethread_workqueue(wq_name);
+ if (!reset_domain->wq) {
+ DRM_ERROR("Failed to allocate wq for amdgpu_reset_domain!");
+ amdgpu_reset_put_reset_domain(reset_domain);
+ return NULL;
+
+ }
+
+ atomic_set(&reset_domain->in_gpu_reset, 0);
+ atomic_set(&reset_domain->reset_res, 0);
+ init_rwsem(&reset_domain->sem);
+
+ return reset_domain;
+}
+
+void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain)
+{
+ atomic_set(&reset_domain->in_gpu_reset, 1);
+ down_write(&reset_domain->sem);
+}
+
+
+void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain)
+{
+ atomic_set(&reset_domain->in_gpu_reset, 0);
+ up_write(&reset_domain->sem);
+}
+
+void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf,
+ size_t len)
+{
+ if (!buf || !len)
+ return;
+
+ switch (rst_ctxt->src) {
+ case AMDGPU_RESET_SRC_JOB:
+ if (rst_ctxt->job) {
+ snprintf(buf, len, "job hang on ring:%s",
+ rst_ctxt->job->base.sched->name);
+ } else {
+ strscpy(buf, "job hang", len);
+ }
+ break;
+ case AMDGPU_RESET_SRC_RAS:
+ strscpy(buf, "RAS error", len);
+ break;
+ case AMDGPU_RESET_SRC_MES:
+ strscpy(buf, "MES hang", len);
+ break;
+ case AMDGPU_RESET_SRC_HWS:
+ strscpy(buf, "HWS hang", len);
+ break;
+ case AMDGPU_RESET_SRC_USER:
+ strscpy(buf, "user trigger", len);
+ break;
+ case AMDGPU_RESET_SRC_USERQ:
+ strscpy(buf, "user queue trigger", len);
+ break;
+ default:
+ strscpy(buf, "unknown", len);
+ }
+}
+
+bool amdgpu_reset_in_recovery(struct amdgpu_device *adev)
+{
+ return (adev->init_lvl->level == AMDGPU_INIT_LEVEL_RESET_RECOVERY);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index e00d38d9160a..07b4d37f1db6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -26,10 +26,24 @@
#include "amdgpu.h"
+#define AMDGPU_RESET_MAX_HANDLERS 5
+
enum AMDGPU_RESET_FLAGS {
AMDGPU_NEED_FULL_RESET = 0,
AMDGPU_SKIP_HW_RESET = 1,
+ AMDGPU_SKIP_COREDUMP = 2,
+ AMDGPU_HOST_FLR = 3,
+};
+
+enum AMDGPU_RESET_SRCS {
+ AMDGPU_RESET_SRC_UNKNOWN,
+ AMDGPU_RESET_SRC_JOB,
+ AMDGPU_RESET_SRC_RAS,
+ AMDGPU_RESET_SRC_MES,
+ AMDGPU_RESET_SRC_HWS,
+ AMDGPU_RESET_SRC_USER,
+ AMDGPU_RESET_SRC_USERQ,
};
struct amdgpu_reset_context {
@@ -37,12 +51,13 @@ struct amdgpu_reset_context {
struct amdgpu_device *reset_req_dev;
struct amdgpu_job *job;
struct amdgpu_hive_info *hive;
+ struct list_head *reset_device_list;
unsigned long flags;
+ enum AMDGPU_RESET_SRCS src;
};
struct amdgpu_reset_handler {
enum amd_reset_method reset_method;
- struct list_head handler_list;
int (*prepare_env)(struct amdgpu_reset_control *reset_ctl,
struct amdgpu_reset_context *context);
int (*prepare_hwcontext)(struct amdgpu_reset_control *reset_ctl,
@@ -61,7 +76,8 @@ struct amdgpu_reset_control {
void *handle;
struct work_struct reset_work;
struct mutex reset_lock;
- struct list_head reset_handlers;
+ struct amdgpu_reset_handler *(
+ *reset_handlers)[AMDGPU_RESET_MAX_HANDLERS];
atomic_t in_reset;
enum amd_reset_method active_reset;
struct amdgpu_reset_handler *(*get_reset_handler)(
@@ -70,6 +86,21 @@ struct amdgpu_reset_control {
void (*async_reset)(struct work_struct *work);
};
+
+enum amdgpu_reset_domain_type {
+ SINGLE_DEVICE,
+ XGMI_HIVE
+};
+
+struct amdgpu_reset_domain {
+ struct kref refcount;
+ struct workqueue_struct *wq;
+ enum amdgpu_reset_domain_type type;
+ struct rw_semaphore sem;
+ atomic_t in_gpu_reset;
+ atomic_t reset_res;
+};
+
int amdgpu_reset_init(struct amdgpu_device *adev);
int amdgpu_reset_fini(struct amdgpu_device *adev);
@@ -79,7 +110,67 @@ int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev,
int amdgpu_reset_perform_reset(struct amdgpu_device *adev,
struct amdgpu_reset_context *reset_context);
-int amdgpu_reset_add_handler(struct amdgpu_reset_control *reset_ctl,
- struct amdgpu_reset_handler *handler);
+int amdgpu_reset_prepare_env(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context);
+int amdgpu_reset_restore_env(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context);
+
+struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_domain_type type,
+ char *wq_name);
+
+void amdgpu_reset_destroy_reset_domain(struct kref *ref);
+
+static inline bool amdgpu_reset_get_reset_domain(struct amdgpu_reset_domain *domain)
+{
+ return kref_get_unless_zero(&domain->refcount) != 0;
+}
+
+static inline void amdgpu_reset_put_reset_domain(struct amdgpu_reset_domain *domain)
+{
+ if (domain)
+ kref_put(&domain->refcount, amdgpu_reset_destroy_reset_domain);
+}
+
+static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *domain,
+ struct work_struct *work)
+{
+ return queue_work(domain->wq, work);
+}
+
+static inline bool amdgpu_reset_pending(struct amdgpu_reset_domain *domain)
+{
+ lockdep_assert_held(&domain->sem);
+ return rwsem_is_contended(&domain->sem);
+}
+
+void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain);
+
+void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain);
+
+void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf,
+ size_t len);
+
+#define for_each_handler(i, handler, reset_ctl) \
+ for (i = 0; (i < AMDGPU_RESET_MAX_HANDLERS) && \
+ (handler = (*reset_ctl->reset_handlers)[i]); \
+ ++i)
+
+extern struct amdgpu_reset_handler xgmi_reset_on_init_handler;
+int amdgpu_reset_do_xgmi_reset_on_init(
+ struct amdgpu_reset_context *reset_context);
+
+bool amdgpu_reset_in_recovery(struct amdgpu_device *adev);
+
+static inline void amdgpu_reset_set_dpc_status(struct amdgpu_device *adev,
+ bool status)
+{
+ adev->pcie_reset_ctx.occurs_dpc = status;
+ adev->no_hw_access = status;
+}
+
+static inline bool amdgpu_reset_in_dpc(struct amdgpu_device *adev)
+{
+ return adev->pcie_reset_ctx.occurs_dpc;
+}
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index ab2351ba9574..c596b6df2e2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -33,6 +33,7 @@
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
+#include "amdgpu_ras_mgr.h"
#include "atom.h"
/*
@@ -50,6 +51,26 @@
*/
/**
+ * amdgpu_ring_max_ibs - Return max IBs that fit in a single submission.
+ *
+ * @type: ring type for which to return the limit.
+ */
+unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type)
+{
+ switch (type) {
+ case AMDGPU_RING_TYPE_GFX:
+ /* Need to keep at least 192 on GFX7+ for old radv. */
+ return 192;
+ case AMDGPU_RING_TYPE_COMPUTE:
+ return 125;
+ case AMDGPU_RING_TYPE_VCN_JPEG:
+ return 16;
+ default:
+ return 49;
+ }
+}
+
+/**
* amdgpu_ring_alloc - allocate space on the ring buffer
*
* @ring: amdgpu_ring structure holding ring information
@@ -58,7 +79,7 @@
* Allocate @ndw dwords in the ring buffer (all asics).
* Returns 0 on success, error on failure.
*/
-int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw)
+int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw)
{
/* Align requested size with padding so unlock_commit can
* pad safely */
@@ -79,6 +100,29 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw)
return 0;
}
+/**
+ * amdgpu_ring_alloc_reemit - allocate space on the ring buffer for reemit
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @ndw: number of dwords to allocate in the ring buffer
+ *
+ * Allocate @ndw dwords in the ring buffer (all asics).
+ * doesn't check the max_dw limit as we may be reemitting
+ * several submissions.
+ */
+static void amdgpu_ring_alloc_reemit(struct amdgpu_ring *ring, unsigned int ndw)
+{
+ /* Align requested size with padding so unlock_commit can
+ * pad safely */
+ ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
+
+ ring->count_dw = ndw;
+ ring->wptr_old = ring->wptr;
+
+ if (ring->funcs->begin_use)
+ ring->funcs->begin_use(ring);
+}
+
/** amdgpu_ring_insert_nop - insert NOP packets
*
* @ring: amdgpu_ring structure holding ring information
@@ -88,10 +132,22 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw)
*/
void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
{
- int i;
+ uint32_t occupied, chunk1, chunk2;
+
+ occupied = ring->wptr & ring->buf_mask;
+ chunk1 = ring->buf_mask + 1 - occupied;
+ chunk1 = (chunk1 >= count) ? count : chunk1;
+ chunk2 = count - chunk1;
+
+ if (chunk1)
+ memset32(&ring->ring[occupied], ring->funcs->nop, chunk1);
- for (i = 0; i < count; i++)
- amdgpu_ring_write(ring, ring->funcs->nop);
+ if (chunk2)
+ memset32(ring->ring, ring->funcs->nop, chunk2);
+
+ ring->wptr += count;
+ ring->wptr &= ring->ptr_mask;
+ ring->count_dw -= count;
}
/**
@@ -104,8 +160,16 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
*/
void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
{
- while (ib->length_dw & ring->funcs->align_mask)
- ib->ptr[ib->length_dw++] = ring->funcs->nop;
+ u32 align_mask = ring->funcs->align_mask;
+ u32 count = ib->length_dw & align_mask;
+
+ if (count) {
+ count = align_mask + 1 - count;
+
+ memset32(&ib->ptr[ib->length_dw], ring->funcs->nop, count);
+
+ ib->length_dw += count;
+ }
}
/**
@@ -121,11 +185,16 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
{
uint32_t count;
+ if (ring->count_dw < 0)
+ DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
+
/* We pad to match fetch size */
count = ring->funcs->align_mask + 1 -
(ring->wptr & ring->funcs->align_mask);
- count %= ring->funcs->align_mask + 1;
- ring->funcs->insert_nop(ring, count);
+ count &= ring->funcs->align_mask;
+
+ if (count != 0)
+ ring->funcs->insert_nop(ring, count);
mb();
amdgpu_ring_set_wptr(ring);
@@ -149,6 +218,12 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
ring->funcs->end_use(ring);
}
+#define amdgpu_ring_get_gpu_addr(ring, offset) \
+ (ring->adev->wb.gpu_addr + offset * 4)
+
+#define amdgpu_ring_get_cpu_addr(ring, offset) \
+ (&ring->adev->wb.wb[offset])
+
/**
* amdgpu_ring_init - init driver ring struct.
*
@@ -172,6 +247,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
int sched_hw_submission = amdgpu_sched_hw_submission;
u32 *num_sched;
u32 hw_ip;
+ unsigned int max_ibs_dw;
/* Set the hw submission limit higher for KIQ because
* it's used for a number of gfx/compute tasks by both
@@ -181,6 +257,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
*/
if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
sched_hw_submission = max(sched_hw_submission, 256);
+ if (ring->funcs->type == AMDGPU_RING_TYPE_MES)
+ sched_hw_submission = 8;
else if (ring == &adev->sdma.instance[0].page)
sched_hw_submission = 256;
@@ -189,10 +267,14 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
return -EINVAL;
ring->adev = adev;
+ ring->num_hw_submission = sched_hw_submission;
+ ring->sched_score = sched_score;
+ ring->vmid_wait = dma_fence_get_stub();
+
ring->idx = adev->num_rings++;
adev->rings[ring->idx] = ring;
- r = amdgpu_fence_driver_init_ring(ring, sched_hw_submission,
- sched_score);
+
+ r = amdgpu_fence_driver_init_ring(ring);
if (r)
return r;
}
@@ -217,44 +299,89 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
if (r) {
- dev_err(adev->dev,
- "(%d) ring trail_fence_offs wb alloc failed\n", r);
+ dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r);
return r;
}
- ring->trail_fence_gpu_addr =
- adev->wb.gpu_addr + (ring->trail_fence_offs * 4);
- ring->trail_fence_cpu_addr = &adev->wb.wb[ring->trail_fence_offs];
r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
if (r) {
dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
return r;
}
- ring->cond_exe_gpu_addr = adev->wb.gpu_addr + (ring->cond_exe_offs * 4);
- ring->cond_exe_cpu_addr = &adev->wb.wb[ring->cond_exe_offs];
+
+ ring->fence_gpu_addr =
+ amdgpu_ring_get_gpu_addr(ring, ring->fence_offs);
+ ring->fence_cpu_addr =
+ amdgpu_ring_get_cpu_addr(ring, ring->fence_offs);
+
+ ring->rptr_gpu_addr =
+ amdgpu_ring_get_gpu_addr(ring, ring->rptr_offs);
+ ring->rptr_cpu_addr =
+ amdgpu_ring_get_cpu_addr(ring, ring->rptr_offs);
+
+ ring->wptr_gpu_addr =
+ amdgpu_ring_get_gpu_addr(ring, ring->wptr_offs);
+ ring->wptr_cpu_addr =
+ amdgpu_ring_get_cpu_addr(ring, ring->wptr_offs);
+
+ ring->trail_fence_gpu_addr =
+ amdgpu_ring_get_gpu_addr(ring, ring->trail_fence_offs);
+ ring->trail_fence_cpu_addr =
+ amdgpu_ring_get_cpu_addr(ring, ring->trail_fence_offs);
+
+ ring->cond_exe_gpu_addr =
+ amdgpu_ring_get_gpu_addr(ring, ring->cond_exe_offs);
+ ring->cond_exe_cpu_addr =
+ amdgpu_ring_get_cpu_addr(ring, ring->cond_exe_offs);
+
/* always set cond_exec_polling to CONTINUE */
*ring->cond_exe_cpu_addr = 1;
- r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
- if (r) {
- dev_err(adev->dev, "failed initializing fences (%d).\n", r);
- return r;
- }
+ if (ring->funcs->type != AMDGPU_RING_TYPE_CPER) {
+ r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
+ if (r) {
+ dev_err(adev->dev, "failed initializing fences (%d).\n", r);
+ return r;
+ }
+
+ max_ibs_dw = ring->funcs->emit_frame_size +
+ amdgpu_ring_max_ibs(ring->funcs->type) * ring->funcs->emit_ib_size;
+ max_ibs_dw = (max_ibs_dw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
+
+ if (WARN_ON(max_ibs_dw > max_dw))
+ max_dw = max_ibs_dw;
- ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission);
+ ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission);
+ } else {
+ ring->ring_size = roundup_pow_of_two(max_dw * 4);
+ ring->count_dw = (ring->ring_size - 4) >> 2;
+ /* ring buffer is empty now */
+ ring->wptr = *ring->rptr_cpu_addr = 0;
+ }
ring->buf_mask = (ring->ring_size / 4) - 1;
ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
0xffffffffffffffff : ring->buf_mask;
+ /* Initialize cached_rptr to 0 */
+ ring->cached_rptr = 0;
+
+ if (!ring->ring_backup) {
+ ring->ring_backup = kvzalloc(ring->ring_size, GFP_KERNEL);
+ if (!ring->ring_backup)
+ return -ENOMEM;
+ }
+
/* Allocate ring buffer */
if (ring->ring_obj == NULL) {
- r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE,
+ r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_bytes,
+ PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&ring->ring_obj,
&ring->gpu_addr,
(void **)&ring->ring);
if (r) {
dev_err(adev->dev, "(%d) ring create failed\n", r);
+ kvfree(ring->ring_backup);
return r;
}
amdgpu_ring_clear_ring(ring);
@@ -263,7 +390,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
ring->max_dw = max_dw;
ring->hw_prio = hw_prio;
- if (!ring->no_scheduler) {
+ if (!ring->no_scheduler && ring->funcs->type < AMDGPU_HW_IP_NUM) {
hw_ip = ring->funcs->type;
num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds;
adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] =
@@ -298,12 +425,12 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
amdgpu_bo_free_kernel(&ring->ring_obj,
&ring->gpu_addr,
(void **)&ring->ring);
+ kvfree(ring->ring_backup);
+ ring->ring_backup = NULL;
dma_fence_put(ring->vmid_wait);
ring->vmid_wait = NULL;
ring->me = 0;
-
- ring->adev->rings[ring->idx] = NULL;
}
/**
@@ -338,17 +465,30 @@ void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
struct dma_fence *fence)
{
- ktime_t deadline = ktime_add_us(ktime_get(), 10000);
+ unsigned long flags;
+ ktime_t deadline;
+ bool ret;
+
+ deadline = ktime_add_us(ktime_get(), 10000);
if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence)
return false;
- atomic_inc(&ring->adev->gpu_reset_counter);
+ spin_lock_irqsave(fence->lock, flags);
+ if (!dma_fence_is_signaled_locked(fence))
+ dma_fence_set_error(fence, -ENODATA);
+ spin_unlock_irqrestore(fence->lock, flags);
+
while (!dma_fence_is_signaled(fence) &&
ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0)
ring->funcs->soft_recovery(ring, vmid);
- return dma_fence_is_signaled(fence);
+ ret = dma_fence_is_signaled(fence);
+ /* increment the counter only if soft reset worked */
+ if (ret)
+ atomic_inc(&ring->adev->gpu_reset_counter);
+
+ return ret;
}
/*
@@ -356,6 +496,66 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
*/
#if defined(CONFIG_DEBUG_FS)
+static ssize_t amdgpu_ras_cper_debugfs_read(struct file *f, char __user *buf,
+ size_t size, loff_t *offset)
+{
+ const uint8_t ring_header_size = 12;
+ struct amdgpu_ring *ring = file_inode(f)->i_private;
+ struct ras_cmd_cper_snapshot_req *snapshot_req __free(kfree) =
+ kzalloc(sizeof(struct ras_cmd_cper_snapshot_req), GFP_KERNEL);
+ struct ras_cmd_cper_snapshot_rsp *snapshot_rsp __free(kfree) =
+ kzalloc(sizeof(struct ras_cmd_cper_snapshot_rsp), GFP_KERNEL);
+ struct ras_cmd_cper_record_req *record_req __free(kfree) =
+ kzalloc(sizeof(struct ras_cmd_cper_record_req), GFP_KERNEL);
+ struct ras_cmd_cper_record_rsp *record_rsp __free(kfree) =
+ kzalloc(sizeof(struct ras_cmd_cper_record_rsp), GFP_KERNEL);
+ uint8_t *ring_header __free(kfree) =
+ kzalloc(ring_header_size, GFP_KERNEL);
+ uint32_t total_cper_num;
+ uint64_t start_cper_id;
+ int r;
+
+ if (!snapshot_req || !snapshot_rsp || !record_req || !record_rsp ||
+ !ring_header)
+ return -ENOMEM;
+
+ if (!(*offset)) {
+ /* Need at least 12 bytes for the header on the first read */
+ if (size < ring_header_size)
+ return -EINVAL;
+
+ if (copy_to_user(buf, ring_header, ring_header_size))
+ return -EFAULT;
+ buf += ring_header_size;
+ size -= ring_header_size;
+ }
+
+ r = amdgpu_ras_mgr_handle_ras_cmd(ring->adev,
+ RAS_CMD__GET_CPER_SNAPSHOT,
+ snapshot_req, sizeof(struct ras_cmd_cper_snapshot_req),
+ snapshot_rsp, sizeof(struct ras_cmd_cper_snapshot_rsp));
+ if (r || !snapshot_rsp->total_cper_num)
+ return r;
+
+ start_cper_id = snapshot_rsp->start_cper_id;
+ total_cper_num = snapshot_rsp->total_cper_num;
+
+ record_req->buf_ptr = (uint64_t)(uintptr_t)buf;
+ record_req->buf_size = size;
+ record_req->cper_start_id = start_cper_id + *offset;
+ record_req->cper_num = total_cper_num;
+ r = amdgpu_ras_mgr_handle_ras_cmd(ring->adev, RAS_CMD__GET_CPER_RECORD,
+ record_req, sizeof(struct ras_cmd_cper_record_req),
+ record_rsp, sizeof(struct ras_cmd_cper_record_rsp));
+ if (r)
+ return r;
+
+ r = *offset ? record_rsp->real_data_size : record_rsp->real_data_size + ring_header_size;
+ (*offset) += record_rsp->real_cper_num;
+
+ return r;
+}
+
/* Layout of file is 12 bytes consisting of
* - rptr
* - wptr
@@ -367,8 +567,13 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
struct amdgpu_ring *ring = file_inode(f)->i_private;
- int r, i;
uint32_t value, result, early[3];
+ uint64_t p;
+ loff_t i;
+ int r;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_CPER && amdgpu_uniras_enabled(ring->adev))
+ return amdgpu_ras_cper_debugfs_read(f, buf, size, pos);
if (*pos & 3 || size & 3)
return -EINVAL;
@@ -376,13 +581,18 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
result = 0;
if (*pos < 12) {
+ if (ring->funcs->type == AMDGPU_RING_TYPE_CPER)
+ mutex_lock(&ring->adev->cper.ring_lock);
+
early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask;
early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask;
early[2] = ring->wptr & ring->buf_mask;
for (i = *pos / 4; i < 3 && size; i++) {
r = put_user(early[i], (uint32_t *)buf);
- if (r)
- return r;
+ if (r) {
+ result = r;
+ goto out;
+ }
buf += 4;
result += 4;
size -= 4;
@@ -390,29 +600,113 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
}
}
- while (size) {
- if (*pos >= (ring->ring_size + 12))
- return result;
+ if (ring->funcs->type != AMDGPU_RING_TYPE_CPER) {
+ while (size) {
+ if (*pos >= (ring->ring_size + 12))
+ return result;
- value = ring->ring[(*pos - 12)/4];
- r = put_user(value, (uint32_t *)buf);
- if (r)
- return r;
- buf += 4;
- result += 4;
- size -= 4;
- *pos += 4;
+ value = ring->ring[(*pos - 12)/4];
+ r = put_user(value, (uint32_t *)buf);
+ if (r)
+ return r;
+ buf += 4;
+ result += 4;
+ size -= 4;
+ *pos += 4;
+ }
+ } else {
+ p = early[0];
+ if (early[0] <= early[1])
+ size = (early[1] - early[0]);
+ else
+ size = ring->ring_size - (early[0] - early[1]);
+
+ while (size) {
+ if (p == early[1])
+ goto out;
+
+ value = ring->ring[p];
+ r = put_user(value, (uint32_t *)buf);
+ if (r) {
+ result = r;
+ goto out;
+ }
+
+ buf += 4;
+ result += 4;
+ size--;
+ p++;
+ p &= ring->ptr_mask;
+ }
}
+out:
+ if (ring->funcs->type == AMDGPU_RING_TYPE_CPER)
+ mutex_unlock(&ring->adev->cper.ring_lock);
+
return result;
}
+static ssize_t amdgpu_debugfs_virt_ring_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_ring *ring = file_inode(f)->i_private;
+
+ if (*pos & 3 || size & 3)
+ return -EINVAL;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_CPER)
+ amdgpu_virt_req_ras_cper_dump(ring->adev, false);
+
+ return amdgpu_debugfs_ring_read(f, buf, size, pos);
+}
+
static const struct file_operations amdgpu_debugfs_ring_fops = {
.owner = THIS_MODULE,
.read = amdgpu_debugfs_ring_read,
.llseek = default_llseek
};
+static const struct file_operations amdgpu_debugfs_virt_ring_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_virt_ring_read,
+ .llseek = default_llseek
+};
+
+static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_ring *ring = file_inode(f)->i_private;
+ ssize_t bytes = min_t(ssize_t, ring->mqd_size - *pos, size);
+ void *from = ((u8 *)ring->mqd_ptr) + *pos;
+
+ if (*pos > ring->mqd_size)
+ return 0;
+
+ if (copy_to_user(buf, from, bytes))
+ return -EFAULT;
+
+ *pos += bytes;
+ return bytes;
+}
+
+static const struct file_operations amdgpu_debugfs_mqd_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_mqd_read,
+ .llseek = default_llseek
+};
+
+static int amdgpu_debugfs_ring_error(void *data, u64 val)
+{
+ struct amdgpu_ring *ring = data;
+
+ amdgpu_fence_driver_set_error(ring, val);
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(amdgpu_debugfs_error_fops, NULL,
+ amdgpu_debugfs_ring_error, "%lld\n");
+
#endif
void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
@@ -424,9 +718,25 @@ void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
char name[32];
sprintf(name, "amdgpu_ring_%s", ring->name);
- debugfs_create_file_size(name, S_IFREG | S_IRUGO, root, ring,
- &amdgpu_debugfs_ring_fops,
- ring->ring_size + 12);
+ if (amdgpu_sriov_vf(adev))
+ debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
+ &amdgpu_debugfs_virt_ring_fops,
+ ring->ring_size + 12);
+ else
+ debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
+ &amdgpu_debugfs_ring_fops,
+ ring->ring_size + 12);
+
+ if (ring->mqd_obj) {
+ sprintf(name, "amdgpu_mqd_%s", ring->name);
+ debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
+ &amdgpu_debugfs_mqd_fops,
+ ring->mqd_size);
+ }
+
+ sprintf(name, "amdgpu_error_%s", ring->name);
+ debugfs_create_file(name, 0200, root, ring,
+ &amdgpu_debugfs_error_fops);
#endif
}
@@ -454,5 +764,164 @@ int amdgpu_ring_test_helper(struct amdgpu_ring *ring)
ring->name);
ring->sched.ready = !r;
+
return r;
}
+
+static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct amdgpu_device *adev = ring->adev;
+ bool is_high_prio_compute = ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE &&
+ amdgpu_gfx_is_high_priority_compute_queue(adev, ring);
+ bool is_high_prio_gfx = ring->funcs->type == AMDGPU_RING_TYPE_GFX &&
+ amdgpu_gfx_is_high_priority_graphics_queue(adev, ring);
+
+ memset(prop, 0, sizeof(*prop));
+
+ prop->mqd_gpu_addr = ring->mqd_gpu_addr;
+ prop->hqd_base_gpu_addr = ring->gpu_addr;
+ prop->rptr_gpu_addr = ring->rptr_gpu_addr;
+ prop->wptr_gpu_addr = ring->wptr_gpu_addr;
+ prop->queue_size = ring->ring_size;
+ prop->eop_gpu_addr = ring->eop_gpu_addr;
+ prop->use_doorbell = ring->use_doorbell;
+ prop->doorbell_index = ring->doorbell_index;
+ prop->kernel_queue = true;
+
+ /* map_queues packet doesn't need activate the queue,
+ * so only kiq need set this field.
+ */
+ prop->hqd_active = ring->funcs->type == AMDGPU_RING_TYPE_KIQ;
+
+ prop->allow_tunneling = is_high_prio_compute;
+ if (is_high_prio_compute || is_high_prio_gfx) {
+ prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
+ prop->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
+ }
+}
+
+int amdgpu_ring_init_mqd(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_mqd *mqd_mgr;
+ struct amdgpu_mqd_prop prop;
+
+ amdgpu_ring_to_mqd_prop(ring, &prop);
+
+ ring->wptr = 0;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ mqd_mgr = &adev->mqds[AMDGPU_HW_IP_COMPUTE];
+ else
+ mqd_mgr = &adev->mqds[ring->funcs->type];
+
+ return mqd_mgr->init_mqd(adev, ring->mqd_ptr, &prop);
+}
+
+void amdgpu_ring_ib_begin(struct amdgpu_ring *ring)
+{
+ if (ring->is_sw_ring)
+ amdgpu_sw_ring_ib_begin(ring);
+}
+
+void amdgpu_ring_ib_end(struct amdgpu_ring *ring)
+{
+ if (ring->is_sw_ring)
+ amdgpu_sw_ring_ib_end(ring);
+}
+
+void amdgpu_ring_ib_on_emit_cntl(struct amdgpu_ring *ring)
+{
+ if (ring->is_sw_ring)
+ amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_CONTROL);
+}
+
+void amdgpu_ring_ib_on_emit_ce(struct amdgpu_ring *ring)
+{
+ if (ring->is_sw_ring)
+ amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_CE);
+}
+
+void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring)
+{
+ if (ring->is_sw_ring)
+ amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_DE);
+}
+
+bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring)
+{
+ if (!ring)
+ return false;
+
+ if (ring->no_scheduler || !drm_sched_wqueue_ready(&ring->sched))
+ return false;
+
+ return true;
+}
+
+void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring,
+ struct amdgpu_fence *guilty_fence)
+{
+ /* Stop the scheduler to prevent anybody else from touching the ring buffer. */
+ drm_sched_wqueue_stop(&ring->sched);
+ /* back up the non-guilty commands */
+ amdgpu_ring_backup_unprocessed_commands(ring, guilty_fence);
+}
+
+int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
+ struct amdgpu_fence *guilty_fence)
+{
+ unsigned int i;
+ int r;
+
+ /* verify that the ring is functional */
+ r = amdgpu_ring_test_ring(ring);
+ if (r)
+ return r;
+
+ /* signal the guilty fence and set an error on all fences from the context */
+ if (guilty_fence)
+ amdgpu_fence_driver_guilty_force_completion(guilty_fence);
+ /* Re-emit the non-guilty commands */
+ if (ring->ring_backup_entries_to_copy) {
+ amdgpu_ring_alloc_reemit(ring, ring->ring_backup_entries_to_copy);
+ for (i = 0; i < ring->ring_backup_entries_to_copy; i++)
+ amdgpu_ring_write(ring, ring->ring_backup[i]);
+ amdgpu_ring_commit(ring);
+ }
+ /* Start the scheduler again */
+ drm_sched_wqueue_start(&ring->sched);
+ return 0;
+}
+
+bool amdgpu_ring_is_reset_type_supported(struct amdgpu_ring *ring,
+ u32 reset_type)
+{
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ if (ring->adev->gfx.gfx_supported_reset & reset_type)
+ return true;
+ break;
+ case AMDGPU_RING_TYPE_COMPUTE:
+ if (ring->adev->gfx.compute_supported_reset & reset_type)
+ return true;
+ break;
+ case AMDGPU_RING_TYPE_SDMA:
+ if (ring->adev->sdma.supported_reset & reset_type)
+ return true;
+ break;
+ case AMDGPU_RING_TYPE_VCN_DEC:
+ case AMDGPU_RING_TYPE_VCN_ENC:
+ if (ring->adev->vcn.supported_reset & reset_type)
+ return true;
+ break;
+ case AMDGPU_RING_TYPE_VCN_JPEG:
+ if (ring->adev->jpeg.supported_reset & reset_type)
+ return true;
+ break;
+ default:
+ break;
+ }
+ return false;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 4d380e79752c..7a27c6c4bb44 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -27,14 +27,24 @@
#include <drm/amdgpu_drm.h>
#include <drm/gpu_scheduler.h>
#include <drm/drm_print.h>
+#include <drm/drm_suballoc.h>
+
+struct amdgpu_device;
+struct amdgpu_ring;
+struct amdgpu_ib;
+struct amdgpu_cs_parser;
+struct amdgpu_job;
+struct amdgpu_vm;
/* max number of rings */
-#define AMDGPU_MAX_RINGS 28
-#define AMDGPU_MAX_HWIP_RINGS 8
+#define AMDGPU_MAX_RINGS 149
+#define AMDGPU_MAX_HWIP_RINGS 64
#define AMDGPU_MAX_GFX_RINGS 2
+#define AMDGPU_MAX_SW_GFX_RINGS 2
#define AMDGPU_MAX_COMPUTE_RINGS 8
#define AMDGPU_MAX_VCE_RINGS 3
#define AMDGPU_MAX_UVD_ENC_RINGS 2
+#define AMDGPU_MAX_VPE_RINGS 2
enum amdgpu_ring_priority_level {
AMDGPU_RING_PRIO_0,
@@ -52,9 +62,7 @@ enum amdgpu_ring_priority_level {
#define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
#define AMDGPU_FENCE_FLAG_INT (1 << 1)
#define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2)
-
-/* fence flag bit to indicate the face is embedded in job*/
-#define AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT (DMA_FENCE_FLAG_USER_BITS + 1)
+#define AMDGPU_FENCE_FLAG_EXEC (1 << 3)
#define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
@@ -70,8 +78,12 @@ enum amdgpu_ring_type {
AMDGPU_RING_TYPE_VCN_DEC = AMDGPU_HW_IP_VCN_DEC,
AMDGPU_RING_TYPE_VCN_ENC = AMDGPU_HW_IP_VCN_ENC,
AMDGPU_RING_TYPE_VCN_JPEG = AMDGPU_HW_IP_VCN_JPEG,
+ AMDGPU_RING_TYPE_VPE = AMDGPU_HW_IP_VPE,
AMDGPU_RING_TYPE_KIQ,
- AMDGPU_RING_TYPE_MES
+ AMDGPU_RING_TYPE_MES,
+ AMDGPU_RING_TYPE_UMSCH_MM,
+ AMDGPU_RING_TYPE_CPER,
+ AMDGPU_RING_TYPE_MAX,
};
enum amdgpu_ib_pool_type {
@@ -85,11 +97,13 @@ enum amdgpu_ib_pool_type {
AMDGPU_IB_POOL_MAX
};
-struct amdgpu_device;
-struct amdgpu_ring;
-struct amdgpu_ib;
-struct amdgpu_cs_parser;
-struct amdgpu_job;
+struct amdgpu_ib {
+ struct drm_suballoc *sa_bo;
+ uint32_t length_dw;
+ uint64_t gpu_addr;
+ uint32_t *ptr;
+ uint32_t flags;
+};
struct amdgpu_sched {
u32 num_scheds;
@@ -101,10 +115,11 @@ struct amdgpu_sched {
*/
struct amdgpu_fence_driver {
uint64_t gpu_addr;
- volatile uint32_t *cpu_addr;
+ uint32_t *cpu_addr;
/* sync_seq is protected by ring emission lock */
uint32_t sync_seq;
atomic_t last_seq;
+ u64 signalled_wptr;
bool initialized;
struct amdgpu_irq_src *irq_src;
unsigned irq_type;
@@ -114,11 +129,35 @@ struct amdgpu_fence_driver {
struct dma_fence **fences;
};
+/*
+ * Fences mark an event in the GPUs pipeline and are used
+ * for GPU/CPU synchronization. When the fence is written,
+ * it is expected that all buffers associated with that fence
+ * are no longer in use by the associated ring on the GPU and
+ * that the relevant GPU caches have been flushed.
+ */
+
+struct amdgpu_fence {
+ struct dma_fence base;
+
+ /* RB, DMA, etc. */
+ struct amdgpu_ring *ring;
+ ktime_t start_timestamp;
+
+ /* wptr for the fence for resets */
+ u64 wptr;
+ /* fence context for resets */
+ u64 context;
+};
+
+extern const struct drm_sched_backend_ops amdgpu_sched_ops;
+
+void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error);
void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring);
+void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af);
+void amdgpu_fence_save_wptr(struct amdgpu_fence *af);
-int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
- unsigned num_hw_submission,
- atomic_t *sched_score);
+int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq_src,
unsigned irq_type);
@@ -126,8 +165,8 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev);
void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev);
int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev);
void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev);
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence, struct amdgpu_job *job,
- unsigned flags);
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af,
+ unsigned int flags);
int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
uint32_t timeout);
bool amdgpu_fence_process(struct amdgpu_ring *ring);
@@ -137,27 +176,64 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
signed long timeout);
unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
+void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop);
+
+u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring);
+void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq,
+ ktime_t timestamp);
+
/*
* Rings.
*/
/* provided by hw blocks that expose a ring buffer for commands */
struct amdgpu_ring_funcs {
+ /**
+ * @type:
+ *
+ * GFX, Compute, SDMA, UVD, VCE, VCN, VPE, KIQ, MES, UMSCH, and CPER
+ * use ring buffers. The type field just identifies which component the
+ * ring buffer is associated with.
+ */
enum amdgpu_ring_type type;
uint32_t align_mask;
+
+ /**
+ * @nop:
+ *
+ * Every block in the amdgpu has no-op instructions (e.g., GFX 10
+ * uses PACKET3(PACKET3_NOP, 0x3FFF), VCN 5 uses VCN_ENC_CMD_NO_OP,
+ * etc). This field receives the specific no-op for the component
+ * that initializes the ring.
+ */
u32 nop;
bool support_64bit_ptrs;
bool no_user_fence;
- unsigned vmhub;
- unsigned extra_dw;
+ bool secure_submission_supported;
+
+ /**
+ * @extra_bytes:
+ *
+ * Optional extra space in bytes that is added to the ring size
+ * when allocating the BO that holds the contents of the ring.
+ * This space isn't used for command submission to the ring,
+ * but is just there to satisfy some hardware requirements or
+ * implement workarounds. It's up to the implementation of each
+ * specific ring to initialize this space.
+ */
+ unsigned extra_bytes;
/* ring read/write ptr handling */
u64 (*get_rptr)(struct amdgpu_ring *ring);
u64 (*get_wptr)(struct amdgpu_ring *ring);
void (*set_wptr)(struct amdgpu_ring *ring);
/* validating and patching of IBs */
- int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx);
- int (*patch_cs_in_place)(struct amdgpu_cs_parser *p, uint32_t ib_idx);
+ int (*parse_cs)(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib);
+ int (*patch_cs_in_place)(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib);
/* constants to calculate how many DW are needed for an emit */
unsigned emit_frame_size;
unsigned emit_ib_size;
@@ -185,13 +261,14 @@ struct amdgpu_ring_funcs {
void (*insert_end)(struct amdgpu_ring *ring);
/* pad the indirect buffer to the necessary number of dw */
void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
- unsigned (*init_cond_exec)(struct amdgpu_ring *ring);
- void (*patch_cond_exec)(struct amdgpu_ring *ring, unsigned offset);
+ unsigned (*init_cond_exec)(struct amdgpu_ring *ring, uint64_t addr);
/* note usage for clock and power gating */
void (*begin_use)(struct amdgpu_ring *ring);
void (*end_use)(struct amdgpu_ring *ring);
void (*emit_switch_buffer) (struct amdgpu_ring *ring);
void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
+ void (*emit_gfx_shadow)(struct amdgpu_ring *ring, u64 shadow_va, u64 csa_va,
+ u64 gds_va, bool init_shadow, int vmid);
void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg,
uint32_t reg_val_offs);
void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
@@ -207,8 +284,17 @@ struct amdgpu_ring_funcs {
int (*preempt_ib)(struct amdgpu_ring *ring);
void (*emit_mem_sync)(struct amdgpu_ring *ring);
void (*emit_wave_limit)(struct amdgpu_ring *ring, bool enable);
+ void (*patch_cntl)(struct amdgpu_ring *ring, unsigned offset);
+ void (*patch_ce)(struct amdgpu_ring *ring, unsigned offset);
+ void (*patch_de)(struct amdgpu_ring *ring, unsigned offset);
+ int (*reset)(struct amdgpu_ring *ring, unsigned int vmid,
+ struct amdgpu_fence *timedout_fence);
+ void (*emit_cleaner_shader)(struct amdgpu_ring *ring);
};
+/**
+ * amdgpu_ring - Holds ring information
+ */
struct amdgpu_ring {
struct amdgpu_device *adev;
const struct amdgpu_ring_funcs *funcs;
@@ -216,49 +302,128 @@ struct amdgpu_ring {
struct drm_gpu_scheduler sched;
struct amdgpu_bo *ring_obj;
- volatile uint32_t *ring;
+ uint32_t *ring;
+ /* backups for resets */
+ uint32_t *ring_backup;
+ unsigned int ring_backup_entries_to_copy;
unsigned rptr_offs;
+ u64 rptr_gpu_addr;
+ u32 *rptr_cpu_addr;
+
+ /**
+ * @wptr:
+ *
+ * This is part of the Ring buffer implementation and represents the
+ * write pointer. The wptr determines where the host has written.
+ */
u64 wptr;
+
+ /**
+ * @wptr_old:
+ *
+ * Before update wptr with the new value, usually the old value is
+ * stored in the wptr_old.
+ */
u64 wptr_old;
unsigned ring_size;
+
+ /**
+ * @max_dw:
+ *
+ * Maximum number of DWords for ring allocation. This information is
+ * provided at the ring initialization time, and each IP block can
+ * specify a specific value. Check places that invoke
+ * amdgpu_ring_init() to see the maximum size per block.
+ */
unsigned max_dw;
+
+ /**
+ * @count_dw:
+ *
+ * This value starts with the maximum amount of DWords supported by the
+ * ring. This value is updated based on the ring manipulation.
+ */
int count_dw;
uint64_t gpu_addr;
+
+ /**
+ * @ptr_mask:
+ *
+ * Some IPs provide support for 64-bit pointers and others for 32-bit
+ * only; this behavior is component-specific and defined by the field
+ * support_64bit_ptr. If the IP block supports 64-bits, the mask
+ * 0xffffffffffffffff is set; otherwise, this value assumes buf_mask.
+ * Notice that this field is used to keep wptr under a valid range.
+ */
uint64_t ptr_mask;
+
+ /**
+ * @buf_mask:
+ *
+ * Buffer mask is a value used to keep wptr count under its
+ * thresholding. Buffer mask initialized during the ring buffer
+ * initialization time, and it is defined as (ring_size / 4) -1.
+ */
uint32_t buf_mask;
u32 idx;
+ u32 xcc_id;
+ u32 xcp_id;
u32 me;
u32 pipe;
u32 queue;
struct amdgpu_bo *mqd_obj;
uint64_t mqd_gpu_addr;
void *mqd_ptr;
+ unsigned mqd_size;
uint64_t eop_gpu_addr;
u32 doorbell_index;
bool use_doorbell;
bool use_pollmem;
unsigned wptr_offs;
+ u64 wptr_gpu_addr;
+
+ /**
+ * @wptr_cpu_addr:
+ *
+ * This is the CPU address pointer in the writeback slot. This is used
+ * to commit changes to the GPU.
+ */
+ u32 *wptr_cpu_addr;
unsigned fence_offs;
+ u64 fence_gpu_addr;
+ u32 *fence_cpu_addr;
uint64_t current_ctx;
char name[16];
u32 trail_seq;
unsigned trail_fence_offs;
u64 trail_fence_gpu_addr;
- volatile u32 *trail_fence_cpu_addr;
+ u32 *trail_fence_cpu_addr;
unsigned cond_exe_offs;
u64 cond_exe_gpu_addr;
- volatile u32 *cond_exe_cpu_addr;
+ u32 *cond_exe_cpu_addr;
+ unsigned int set_q_mode_offs;
+ u32 *set_q_mode_ptr;
+ u64 set_q_mode_token;
+ unsigned vm_hub;
unsigned vm_inv_eng;
struct dma_fence *vmid_wait;
bool has_compute_vm_bug;
bool no_scheduler;
+ bool no_user_submission;
int hw_prio;
+ unsigned num_hw_submission;
+ atomic_t *sched_score;
+
+ bool is_sw_ring;
+ unsigned int entry_index;
+ /* store the cached rptr to restore after reset */
+ uint64_t cached_rptr;
};
-#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
-#define amdgpu_ring_patch_cs_in_place(r, p, ib) ((r)->funcs->patch_cs_in_place((p), (ib)))
+#define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib)))
+#define amdgpu_ring_patch_cs_in_place(r, p, job, ib) ((r)->funcs->patch_cs_in_place((p), (job), (ib)))
#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
-#define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
+#define amdgpu_ring_test_ib(r, t) ((r)->funcs->test_ib ? (r)->funcs->test_ib((r), (t)) : 0)
#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
@@ -270,24 +435,35 @@ struct amdgpu_ring {
#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
+#define amdgpu_ring_emit_gfx_shadow(r, s, c, g, i, v) ((r)->funcs->emit_gfx_shadow((r), (s), (c), (g), (i), (v)))
#define amdgpu_ring_emit_rreg(r, d, o) (r)->funcs->emit_rreg((r), (d), (o))
#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
-#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
-#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
+#define amdgpu_ring_init_cond_exec(r, a) (r)->funcs->init_cond_exec((r), (a))
#define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
+#define amdgpu_ring_patch_cntl(r, o) ((r)->funcs->patch_cntl((r), (o)))
+#define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o)))
+#define amdgpu_ring_patch_de(r, o) ((r)->funcs->patch_de((r), (o)))
+#define amdgpu_ring_reset(r, v, f) (r)->funcs->reset((r), (v), (f))
+unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type);
int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
+void amdgpu_ring_ib_begin(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_end(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_on_emit_cntl(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_on_emit_ce(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring);
+
void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
void amdgpu_ring_commit(struct amdgpu_ring *ring);
void amdgpu_ring_undo(struct amdgpu_ring *ring);
int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
- unsigned int ring_size, struct amdgpu_irq_src *irq_src,
- unsigned int irq_type, unsigned int prio,
+ unsigned int max_dw, struct amdgpu_irq_src *irq_src,
+ unsigned int irq_type, unsigned int hw_prio,
atomic_t *sched_score);
void amdgpu_ring_fini(struct amdgpu_ring *ring);
void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
@@ -304,16 +480,11 @@ static inline void amdgpu_ring_set_preempt_cond_exec(struct amdgpu_ring *ring,
static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
{
- int i = 0;
- while (i <= ring->buf_mask)
- ring->ring[i++] = ring->funcs->nop;
-
+ memset32(ring->ring, ring->funcs->nop, ring->buf_mask + 1);
}
static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
{
- if (ring->count_dw <= 0)
- DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
ring->ring[ring->wptr++ & ring->buf_mask] = v;
ring->wptr &= ring->ptr_mask;
ring->count_dw--;
@@ -323,26 +494,20 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
void *src, int count_dw)
{
unsigned occupied, chunk1, chunk2;
- void *dst;
-
- if (unlikely(ring->count_dw < count_dw))
- DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
occupied = ring->wptr & ring->buf_mask;
- dst = (void *)&ring->ring[occupied];
chunk1 = ring->buf_mask + 1 - occupied;
- chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1;
+ chunk1 = (chunk1 >= count_dw) ? count_dw : chunk1;
chunk2 = count_dw - chunk1;
chunk1 <<= 2;
chunk2 <<= 2;
if (chunk1)
- memcpy(dst, src, chunk1);
+ memcpy(&ring->ring[occupied], src, chunk1);
if (chunk2) {
src += chunk1;
- dst = (void *)ring->ring;
- memcpy(dst, src, chunk2);
+ memcpy(ring->ring, src, chunk2);
}
ring->wptr += count_dw;
@@ -350,8 +515,66 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
ring->count_dw -= count_dw;
}
+/**
+ * amdgpu_ring_patch_cond_exec - patch dw count of conditional execute
+ * @ring: amdgpu_ring structure
+ * @offset: offset returned by amdgpu_ring_init_cond_exec
+ *
+ * Calculate the dw count and patch it into a cond_exec command.
+ */
+static inline void amdgpu_ring_patch_cond_exec(struct amdgpu_ring *ring,
+ unsigned int offset)
+{
+ unsigned cur;
+
+ if (!ring->funcs->init_cond_exec)
+ return;
+
+ WARN_ON(offset > ring->buf_mask);
+ WARN_ON(ring->ring[offset] != 0);
+
+ cur = (ring->wptr - 1) & ring->buf_mask;
+ if (cur < offset)
+ cur += ring->ring_size >> 2;
+ ring->ring[offset] = cur - offset;
+}
+
int amdgpu_ring_test_helper(struct amdgpu_ring *ring);
void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
+
+int amdgpu_ring_init_mqd(struct amdgpu_ring *ring);
+
+static inline u32 amdgpu_ib_get_value(struct amdgpu_ib *ib, int idx)
+{
+ return ib->ptr[idx];
+}
+
+static inline void amdgpu_ib_set_value(struct amdgpu_ib *ib, int idx,
+ uint32_t value)
+{
+ ib->ptr[idx] = value;
+}
+
+int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ unsigned size,
+ enum amdgpu_ib_pool_type pool,
+ struct amdgpu_ib *ib);
+void amdgpu_ib_free(struct amdgpu_ib *ib, struct dma_fence *f);
+int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
+ struct amdgpu_ib *ibs, struct amdgpu_job *job,
+ struct dma_fence **f);
+int amdgpu_ib_pool_init(struct amdgpu_device *adev);
+void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
+int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
+bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring);
+void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
+ struct amdgpu_fence *guilty_fence);
+void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring,
+ struct amdgpu_fence *guilty_fence);
+int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
+ struct amdgpu_fence *guilty_fence);
+bool amdgpu_ring_is_reset_type_supported(struct amdgpu_ring *ring,
+ u32 reset_type);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
new file mode 100644
index 000000000000..7e7d6c3865bc
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -0,0 +1,576 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/slab.h>
+#include <drm/drm_print.h>
+
+#include "amdgpu_ring_mux.h"
+#include "amdgpu_ring.h"
+#include "amdgpu.h"
+
+#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2)
+#define AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US 10000
+
+static const struct ring_info {
+ unsigned int hw_pio;
+ const char *ring_name;
+} sw_ring_info[] = {
+ { AMDGPU_RING_PRIO_DEFAULT, "gfx_low"},
+ { AMDGPU_RING_PRIO_2, "gfx_high"},
+};
+
+static struct kmem_cache *amdgpu_mux_chunk_slab;
+
+static inline struct amdgpu_mux_entry *amdgpu_ring_mux_sw_entry(struct amdgpu_ring_mux *mux,
+ struct amdgpu_ring *ring)
+{
+ return ring->entry_index < mux->ring_entry_size ?
+ &mux->ring_entry[ring->entry_index] : NULL;
+}
+
+/* copy packages on sw ring range[begin, end) */
+static void amdgpu_ring_mux_copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux,
+ struct amdgpu_ring *ring,
+ u64 s_start, u64 s_end)
+{
+ u64 start, end;
+ struct amdgpu_ring *real_ring = mux->real_ring;
+
+ start = s_start & ring->buf_mask;
+ end = s_end & ring->buf_mask;
+
+ if (start == end) {
+ DRM_ERROR("no more data copied from sw ring\n");
+ return;
+ }
+ if (start > end) {
+ amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) + end - start);
+ amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[start],
+ (ring->ring_size >> 2) - start);
+ amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[0], end);
+ } else {
+ amdgpu_ring_alloc(real_ring, end - start);
+ amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[start], end - start);
+ }
+}
+
+static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux)
+{
+ struct amdgpu_mux_entry *e = NULL;
+ struct amdgpu_mux_chunk *chunk;
+ uint32_t seq, last_seq;
+ int i;
+
+ /*find low priority entries:*/
+ if (!mux->s_resubmit)
+ return;
+
+ for (i = 0; i < mux->num_ring_entries; i++) {
+ if (mux->ring_entry[i].ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
+ e = &mux->ring_entry[i];
+ break;
+ }
+ }
+
+ if (!e) {
+ DRM_ERROR("%s no low priority ring found\n", __func__);
+ return;
+ }
+
+ last_seq = atomic_read(&e->ring->fence_drv.last_seq);
+ seq = mux->seqno_to_resubmit;
+ if (last_seq < seq) {
+ /*resubmit all the fences between (last_seq, seq]*/
+ list_for_each_entry(chunk, &e->list, entry) {
+ if (chunk->sync_seq > last_seq && chunk->sync_seq <= seq) {
+ amdgpu_fence_update_start_timestamp(e->ring,
+ chunk->sync_seq,
+ ktime_get());
+ if (chunk->sync_seq ==
+ le32_to_cpu(*(e->ring->fence_drv.cpu_addr + 2))) {
+ if (chunk->cntl_offset <= e->ring->buf_mask)
+ amdgpu_ring_patch_cntl(e->ring,
+ chunk->cntl_offset);
+ if (chunk->ce_offset <= e->ring->buf_mask)
+ amdgpu_ring_patch_ce(e->ring, chunk->ce_offset);
+ if (chunk->de_offset <= e->ring->buf_mask)
+ amdgpu_ring_patch_de(e->ring, chunk->de_offset);
+ }
+ amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, e->ring,
+ chunk->start,
+ chunk->end);
+ mux->wptr_resubmit = chunk->end;
+ amdgpu_ring_commit(mux->real_ring);
+ }
+ }
+ }
+
+ timer_delete(&mux->resubmit_timer);
+ mux->s_resubmit = false;
+}
+
+static void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux)
+{
+ mod_timer(&mux->resubmit_timer, jiffies + AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT);
+}
+
+static void amdgpu_mux_resubmit_fallback(struct timer_list *t)
+{
+ struct amdgpu_ring_mux *mux = timer_container_of(mux, t,
+ resubmit_timer);
+
+ if (!spin_trylock(&mux->lock)) {
+ amdgpu_ring_mux_schedule_resubmit(mux);
+ DRM_ERROR("reschedule resubmit\n");
+ return;
+ }
+ amdgpu_mux_resubmit_chunks(mux);
+ spin_unlock(&mux->lock);
+}
+
+int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
+ unsigned int entry_size)
+{
+ mux->real_ring = ring;
+ mux->num_ring_entries = 0;
+
+ mux->ring_entry = kcalloc(entry_size, sizeof(struct amdgpu_mux_entry), GFP_KERNEL);
+ if (!mux->ring_entry)
+ return -ENOMEM;
+
+ mux->ring_entry_size = entry_size;
+ mux->s_resubmit = false;
+
+ amdgpu_mux_chunk_slab = KMEM_CACHE(amdgpu_mux_chunk, SLAB_HWCACHE_ALIGN);
+ if (!amdgpu_mux_chunk_slab) {
+ DRM_ERROR("create amdgpu_mux_chunk cache failed\n");
+ return -ENOMEM;
+ }
+
+ spin_lock_init(&mux->lock);
+ timer_setup(&mux->resubmit_timer, amdgpu_mux_resubmit_fallback, 0);
+
+ return 0;
+}
+
+void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux)
+{
+ struct amdgpu_mux_entry *e;
+ struct amdgpu_mux_chunk *chunk, *chunk2;
+ int i;
+
+ for (i = 0; i < mux->num_ring_entries; i++) {
+ e = &mux->ring_entry[i];
+ list_for_each_entry_safe(chunk, chunk2, &e->list, entry) {
+ list_del(&chunk->entry);
+ kmem_cache_free(amdgpu_mux_chunk_slab, chunk);
+ }
+ }
+ kmem_cache_destroy(amdgpu_mux_chunk_slab);
+ kfree(mux->ring_entry);
+ mux->ring_entry = NULL;
+ mux->num_ring_entries = 0;
+ mux->ring_entry_size = 0;
+}
+
+int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+ struct amdgpu_mux_entry *e;
+
+ if (mux->num_ring_entries >= mux->ring_entry_size) {
+ DRM_ERROR("add sw ring exceeding max entry size\n");
+ return -ENOENT;
+ }
+
+ e = &mux->ring_entry[mux->num_ring_entries];
+ ring->entry_index = mux->num_ring_entries;
+ e->ring = ring;
+
+ INIT_LIST_HEAD(&e->list);
+ mux->num_ring_entries += 1;
+ return 0;
+}
+
+void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr)
+{
+ struct amdgpu_mux_entry *e;
+
+ spin_lock(&mux->lock);
+
+ if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT)
+ amdgpu_mux_resubmit_chunks(mux);
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("cannot find entry for sw ring\n");
+ spin_unlock(&mux->lock);
+ return;
+ }
+
+ /* We could skip this set wptr as preemption in process. */
+ if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && mux->pending_trailing_fence_signaled) {
+ spin_unlock(&mux->lock);
+ return;
+ }
+
+ e->sw_cptr = e->sw_wptr;
+ /* Update cptr if the package already copied in resubmit functions */
+ if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && e->sw_cptr < mux->wptr_resubmit)
+ e->sw_cptr = mux->wptr_resubmit;
+ e->sw_wptr = wptr;
+ e->start_ptr_in_hw_ring = mux->real_ring->wptr;
+
+ /* Skip copying for the packages already resubmitted.*/
+ if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT || mux->wptr_resubmit < wptr) {
+ amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr);
+ e->end_ptr_in_hw_ring = mux->real_ring->wptr;
+ amdgpu_ring_commit(mux->real_ring);
+ } else {
+ e->end_ptr_in_hw_ring = mux->real_ring->wptr;
+ }
+ spin_unlock(&mux->lock);
+}
+
+u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+ struct amdgpu_mux_entry *e;
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("cannot find entry for sw ring\n");
+ return 0;
+ }
+
+ return e->sw_wptr;
+}
+
+/**
+ * amdgpu_ring_mux_get_rptr - get the readptr of the software ring
+ * @mux: the multiplexer the software rings attach to
+ * @ring: the software ring of which we calculate the readptr
+ *
+ * The return value of the readptr is not precise while the other rings could
+ * write data onto the real ring buffer.After overwriting on the real ring, we
+ * can not decide if our packages have been excuted or not read yet. However,
+ * this function is only called by the tools such as umr to collect the latest
+ * packages for the hang analysis. We assume the hang happens near our latest
+ * submit. Thus we could use the following logic to give the clue:
+ * If the readptr is between start and end, then we return the copy pointer
+ * plus the distance from start to readptr. If the readptr is before start, we
+ * return the copy pointer. Lastly, if the readptr is past end, we return the
+ * write pointer.
+ */
+u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+ struct amdgpu_mux_entry *e;
+ u64 readp, offset, start, end;
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("no sw entry found!\n");
+ return 0;
+ }
+
+ readp = amdgpu_ring_get_rptr(mux->real_ring);
+
+ start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask;
+ end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask;
+ if (start > end) {
+ if (readp <= end)
+ readp += mux->real_ring->ring_size >> 2;
+ end += mux->real_ring->ring_size >> 2;
+ }
+
+ if (start <= readp && readp <= end) {
+ offset = readp - start;
+ e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask;
+ } else if (readp < start) {
+ e->sw_rptr = e->sw_cptr;
+ } else {
+ /* end < readptr */
+ e->sw_rptr = e->sw_wptr;
+ }
+
+ return e->sw_rptr;
+}
+
+u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+ WARN_ON(!ring->is_sw_ring);
+ return amdgpu_ring_mux_get_rptr(mux, ring);
+}
+
+u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+ WARN_ON(!ring->is_sw_ring);
+ return amdgpu_ring_mux_get_wptr(mux, ring);
+}
+
+void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+ WARN_ON(!ring->is_sw_ring);
+ amdgpu_ring_mux_set_wptr(mux, ring, ring->wptr);
+}
+
+/* Override insert_nop to prevent emitting nops to the software rings */
+void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ WARN_ON(!ring->is_sw_ring);
+}
+
+const char *amdgpu_sw_ring_name(int idx)
+{
+ return idx < ARRAY_SIZE(sw_ring_info) ?
+ sw_ring_info[idx].ring_name : NULL;
+}
+
+unsigned int amdgpu_sw_ring_priority(int idx)
+{
+ return idx < ARRAY_SIZE(sw_ring_info) ?
+ sw_ring_info[idx].hw_pio : AMDGPU_RING_PRIO_DEFAULT;
+}
+
+/*Scan on low prio rings to have unsignaled fence and high ring has no fence.*/
+static int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux)
+{
+ struct amdgpu_ring *ring;
+ int i, need_preempt;
+
+ need_preempt = 0;
+ for (i = 0; i < mux->num_ring_entries; i++) {
+ ring = mux->ring_entry[i].ring;
+ if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT &&
+ amdgpu_fence_count_emitted(ring) > 0)
+ return 0;
+ if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT &&
+ amdgpu_fence_last_unsignaled_time_us(ring) >
+ AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US)
+ need_preempt = 1;
+ }
+ return need_preempt && !mux->s_resubmit;
+}
+
+/* Trigger Mid-Command Buffer Preemption (MCBP) and find if we need to resubmit. */
+static int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux)
+{
+ int r;
+
+ spin_lock(&mux->lock);
+ mux->pending_trailing_fence_signaled = true;
+ r = amdgpu_ring_preempt_ib(mux->real_ring);
+ spin_unlock(&mux->lock);
+ return r;
+}
+
+void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+ WARN_ON(!ring->is_sw_ring);
+ if (adev->gfx.mcbp && ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) {
+ if (amdgpu_mcbp_scan(mux) > 0)
+ amdgpu_mcbp_trigger_preempt(mux);
+ return;
+ }
+
+ amdgpu_ring_mux_start_ib(mux, ring);
+}
+
+void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+ WARN_ON(!ring->is_sw_ring);
+ if (adev->gfx.mcbp && ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT)
+ return;
+ amdgpu_ring_mux_end_ib(mux, ring);
+}
+
+void amdgpu_sw_ring_ib_mark_offset(struct amdgpu_ring *ring, enum amdgpu_ring_mux_offset_type type)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+ unsigned offset;
+
+ if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT)
+ return;
+
+ offset = ring->wptr & ring->buf_mask;
+
+ amdgpu_ring_mux_ib_mark_offset(mux, ring, offset, type);
+}
+
+void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+ struct amdgpu_mux_entry *e;
+ struct amdgpu_mux_chunk *chunk;
+
+ spin_lock(&mux->lock);
+ amdgpu_mux_resubmit_chunks(mux);
+ spin_unlock(&mux->lock);
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("cannot find entry!\n");
+ return;
+ }
+
+ chunk = kmem_cache_alloc(amdgpu_mux_chunk_slab, GFP_KERNEL);
+ if (!chunk) {
+ DRM_ERROR("alloc amdgpu_mux_chunk_slab failed\n");
+ return;
+ }
+
+ chunk->start = ring->wptr;
+ /* the initialized value used to check if they are set by the ib submission*/
+ chunk->cntl_offset = ring->buf_mask + 1;
+ chunk->de_offset = ring->buf_mask + 1;
+ chunk->ce_offset = ring->buf_mask + 1;
+ list_add_tail(&chunk->entry, &e->list);
+}
+
+static void scan_and_remove_signaled_chunk(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+ uint32_t last_seq = 0;
+ struct amdgpu_mux_entry *e;
+ struct amdgpu_mux_chunk *chunk, *tmp;
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("cannot find entry!\n");
+ return;
+ }
+
+ last_seq = atomic_read(&ring->fence_drv.last_seq);
+
+ list_for_each_entry_safe(chunk, tmp, &e->list, entry) {
+ if (chunk->sync_seq <= last_seq) {
+ list_del(&chunk->entry);
+ kmem_cache_free(amdgpu_mux_chunk_slab, chunk);
+ }
+ }
+}
+
+void amdgpu_ring_mux_ib_mark_offset(struct amdgpu_ring_mux *mux,
+ struct amdgpu_ring *ring, u64 offset,
+ enum amdgpu_ring_mux_offset_type type)
+{
+ struct amdgpu_mux_entry *e;
+ struct amdgpu_mux_chunk *chunk;
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("cannot find entry!\n");
+ return;
+ }
+
+ chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry);
+ if (!chunk) {
+ DRM_ERROR("cannot find chunk!\n");
+ return;
+ }
+
+ switch (type) {
+ case AMDGPU_MUX_OFFSET_TYPE_CONTROL:
+ chunk->cntl_offset = offset;
+ break;
+ case AMDGPU_MUX_OFFSET_TYPE_DE:
+ chunk->de_offset = offset;
+ break;
+ case AMDGPU_MUX_OFFSET_TYPE_CE:
+ chunk->ce_offset = offset;
+ break;
+ default:
+ DRM_ERROR("invalid type (%d)\n", type);
+ break;
+ }
+}
+
+void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+ struct amdgpu_mux_entry *e;
+ struct amdgpu_mux_chunk *chunk;
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("cannot find entry!\n");
+ return;
+ }
+
+ chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry);
+ if (!chunk) {
+ DRM_ERROR("cannot find chunk!\n");
+ return;
+ }
+
+ chunk->end = ring->wptr;
+ chunk->sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
+
+ scan_and_remove_signaled_chunk(mux, ring);
+}
+
+bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux)
+{
+ struct amdgpu_mux_entry *e;
+ struct amdgpu_ring *ring = NULL;
+ int i;
+
+ if (!mux->pending_trailing_fence_signaled)
+ return false;
+
+ if (mux->real_ring->trail_seq != le32_to_cpu(*mux->real_ring->trail_fence_cpu_addr))
+ return false;
+
+ for (i = 0; i < mux->num_ring_entries; i++) {
+ e = &mux->ring_entry[i];
+ if (e->ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
+ ring = e->ring;
+ break;
+ }
+ }
+
+ if (!ring) {
+ DRM_ERROR("cannot find low priority ring\n");
+ return false;
+ }
+
+ amdgpu_fence_process(ring);
+ if (amdgpu_fence_count_emitted(ring) > 0) {
+ mux->s_resubmit = true;
+ mux->seqno_to_resubmit = ring->fence_drv.sync_seq;
+ amdgpu_ring_mux_schedule_resubmit(mux);
+ }
+
+ mux->pending_trailing_fence_signaled = false;
+ return true;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
new file mode 100644
index 000000000000..d3186b570b82
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_RING_MUX__
+#define __AMDGPU_RING_MUX__
+
+#include <linux/timer.h>
+#include <linux/spinlock.h>
+#include "amdgpu_ring.h"
+
+struct amdgpu_ring;
+
+/**
+ * struct amdgpu_mux_entry - the entry recording software rings copying information.
+ * @ring: the pointer to the software ring.
+ * @start_ptr_in_hw_ring: last start location copied to in the hardware ring.
+ * @end_ptr_in_hw_ring: last end location copied to in the hardware ring.
+ * @sw_cptr: the position of the copy pointer in the sw ring.
+ * @sw_rptr: the read pointer in software ring.
+ * @sw_wptr: the write pointer in software ring.
+ * @list: list head for amdgpu_mux_chunk
+ */
+struct amdgpu_mux_entry {
+ struct amdgpu_ring *ring;
+ u64 start_ptr_in_hw_ring;
+ u64 end_ptr_in_hw_ring;
+ u64 sw_cptr;
+ u64 sw_rptr;
+ u64 sw_wptr;
+ struct list_head list;
+};
+
+enum amdgpu_ring_mux_offset_type {
+ AMDGPU_MUX_OFFSET_TYPE_CONTROL,
+ AMDGPU_MUX_OFFSET_TYPE_DE,
+ AMDGPU_MUX_OFFSET_TYPE_CE,
+};
+
+enum ib_complete_status {
+ /* IB not started/reset value, default value. */
+ IB_COMPLETION_STATUS_DEFAULT = 0,
+ /* IB preempted, started but not completed. */
+ IB_COMPLETION_STATUS_PREEMPTED = 1,
+ /* IB completed. */
+ IB_COMPLETION_STATUS_COMPLETED = 2,
+};
+
+struct amdgpu_ring_mux {
+ struct amdgpu_ring *real_ring;
+
+ struct amdgpu_mux_entry *ring_entry;
+ unsigned int num_ring_entries;
+ unsigned int ring_entry_size;
+ /*the lock for copy data from different software rings*/
+ spinlock_t lock;
+ bool s_resubmit;
+ uint32_t seqno_to_resubmit;
+ u64 wptr_resubmit;
+ struct timer_list resubmit_timer;
+
+ bool pending_trailing_fence_signaled;
+};
+
+/**
+ * struct amdgpu_mux_chunk - save the location of indirect buffer's package on softare rings.
+ * @entry: the list entry.
+ * @sync_seq: the fence seqno related with the saved IB.
+ * @start:- start location on the software ring.
+ * @end:- end location on the software ring.
+ * @control_offset:- the PRE_RESUME bit position used for resubmission.
+ * @de_offset:- the anchor in write_data for de meta of resubmission.
+ * @ce_offset:- the anchor in write_data for ce meta of resubmission.
+ */
+struct amdgpu_mux_chunk {
+ struct list_head entry;
+ uint32_t sync_seq;
+ u64 start;
+ u64 end;
+ u64 cntl_offset;
+ u64 de_offset;
+ u64 ce_offset;
+};
+
+int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
+ unsigned int entry_size);
+void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux);
+int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr);
+u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+void amdgpu_ring_mux_ib_mark_offset(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
+ u64 offset, enum amdgpu_ring_mux_offset_type type);
+bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux);
+
+u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
+u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring);
+void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring);
+void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
+void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
+void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
+void amdgpu_sw_ring_ib_mark_offset(struct amdgpu_ring *ring, enum amdgpu_ring_mux_offset_type type);
+const char *amdgpu_sw_ring_name(int idx);
+unsigned int amdgpu_sw_ring_priority(int idx);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
index 6373bfb47d55..5aa830a02d80 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
@@ -31,12 +31,13 @@
* amdgpu_gfx_rlc_enter_safe_mode - Set RLC into safe mode
*
* @adev: amdgpu_device pointer
+ * @xcc_id: xcc accelerated compute core id
*
* Set RLC enter into safe mode if RLC is enabled and haven't in safe mode.
*/
-void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev)
+void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
- if (adev->gfx.rlc.in_safe_mode)
+ if (adev->gfx.rlc.in_safe_mode[xcc_id])
return;
/* if RLC is not enabled, do nothing */
@@ -46,8 +47,8 @@ void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev)
if (adev->cg_flags &
(AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_3D_CGCG)) {
- adev->gfx.rlc.funcs->set_safe_mode(adev);
- adev->gfx.rlc.in_safe_mode = true;
+ adev->gfx.rlc.funcs->set_safe_mode(adev, xcc_id);
+ adev->gfx.rlc.in_safe_mode[xcc_id] = true;
}
}
@@ -55,12 +56,13 @@ void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev)
* amdgpu_gfx_rlc_exit_safe_mode - Set RLC out of safe mode
*
* @adev: amdgpu_device pointer
+ * @xcc_id: xcc accelerated compute core id
*
* Set RLC exit safe mode if RLC is enabled and have entered into safe mode.
*/
-void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev)
+void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
- if (!(adev->gfx.rlc.in_safe_mode))
+ if (!(adev->gfx.rlc.in_safe_mode[xcc_id]))
return;
/* if RLC is not enabled, do nothing */
@@ -70,8 +72,8 @@ void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev)
if (adev->cg_flags &
(AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_3D_CGCG)) {
- adev->gfx.rlc.funcs->unset_safe_mode(adev);
- adev->gfx.rlc.in_safe_mode = false;
+ adev->gfx.rlc.funcs->unset_safe_mode(adev, xcc_id);
+ adev->gfx.rlc.in_safe_mode[xcc_id] = false;
}
}
@@ -87,13 +89,14 @@ void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev)
int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws)
{
const u32 *src_ptr;
- volatile u32 *dst_ptr;
+ u32 *dst_ptr;
u32 i;
int r;
/* allocate save restore block */
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
&adev->gfx.rlc.save_restore_obj,
&adev->gfx.rlc.save_restore_gpu_addr,
(void **)&adev->gfx.rlc.sr_ptr);
@@ -130,7 +133,8 @@ int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
/* allocate clear state block */
adev->gfx.rlc.clear_state_size = dws = adev->gfx.rlc.funcs->get_csb_size(adev);
r = amdgpu_bo_create_kernel(adev, dws * 4, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
(void **)&adev->gfx.rlc.cs_ptr);
@@ -156,7 +160,8 @@ int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev)
int r;
r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
&adev->gfx.rlc.cp_table_obj,
&adev->gfx.rlc.cp_table_gpu_addr,
(void **)&adev->gfx.rlc.cp_table_ptr);
@@ -184,7 +189,7 @@ int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev)
void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev)
{
const __le32 *fw_data;
- volatile u32 *dst_ptr;
+ u32 *dst_ptr;
int me, i, max_me;
u32 bo_offset = 0;
u32 table_offset, table_size;
@@ -236,7 +241,7 @@ void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev)
table_size = le32_to_cpu(hdr->jt_size);
}
- for (i = 0; i < table_size; i ++) {
+ for (i = 0; i < table_size; i++) {
dst_ptr[bo_offset + i] =
cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
}
@@ -272,3 +277,274 @@ void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev)
&adev->gfx.rlc.cp_table_gpu_addr,
(void **)&adev->gfx.rlc.cp_table_ptr);
}
+
+static int amdgpu_gfx_rlc_init_microcode_v2_0(struct amdgpu_device *adev)
+{
+ const struct common_firmware_header *common_hdr;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ struct amdgpu_firmware_info *info;
+ unsigned int *tmp;
+ unsigned int i;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+
+ adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
+ adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
+ adev->gfx.rlc.save_and_restore_offset =
+ le32_to_cpu(rlc_hdr->save_and_restore_offset);
+ adev->gfx.rlc.clear_state_descriptor_offset =
+ le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
+ adev->gfx.rlc.avail_scratch_ram_locations =
+ le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
+ adev->gfx.rlc.reg_restore_list_size =
+ le32_to_cpu(rlc_hdr->reg_restore_list_size);
+ adev->gfx.rlc.reg_list_format_start =
+ le32_to_cpu(rlc_hdr->reg_list_format_start);
+ adev->gfx.rlc.reg_list_format_separate_start =
+ le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
+ adev->gfx.rlc.starting_offsets_start =
+ le32_to_cpu(rlc_hdr->starting_offsets_start);
+ adev->gfx.rlc.reg_list_format_size_bytes =
+ le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
+ adev->gfx.rlc.reg_list_size_bytes =
+ le32_to_cpu(rlc_hdr->reg_list_size_bytes);
+ adev->gfx.rlc.register_list_format =
+ kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
+ adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
+ if (!adev->gfx.rlc.register_list_format) {
+ dev_err(adev->dev, "failed to allocate memory for rlc register_list_format\n");
+ return -ENOMEM;
+ }
+
+ tmp = (unsigned int *)((uintptr_t)rlc_hdr +
+ le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
+ for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
+ adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
+
+ adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
+
+ tmp = (unsigned int *)((uintptr_t)rlc_hdr +
+ le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
+ for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
+ adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
+ info->fw = adev->gfx.rlc_fw;
+ if (info->fw) {
+ common_hdr = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(common_hdr->ucode_size_bytes), PAGE_SIZE);
+ }
+ }
+
+ return 0;
+}
+
+static void amdgpu_gfx_rlc_init_microcode_v2_1(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_1 *rlc_hdr;
+ struct amdgpu_firmware_info *info;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
+ adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
+ adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
+ adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
+ adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
+ adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
+ adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
+ adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
+ adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
+ adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
+ adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
+ adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
+ adev->gfx.rlc.reg_list_format_direct_reg_list_length =
+ le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (adev->gfx.rlc.save_restore_list_cntl_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.save_restore_list_gpm_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.save_restore_list_srm_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
+ }
+ }
+}
+
+static void amdgpu_gfx_rlc_init_microcode_v2_2(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_2 *rlc_hdr;
+ struct amdgpu_firmware_info *info;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlc.rlc_iram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_iram_ucode_size_bytes);
+ adev->gfx.rlc.rlc_iram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_iram_ucode_offset_bytes);
+ adev->gfx.rlc.rlc_dram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_dram_ucode_size_bytes);
+ adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (adev->gfx.rlc.rlc_iram_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_IRAM];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_IRAM;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.rlc_iram_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.rlc_dram_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_DRAM];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_DRAM;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE);
+ }
+ }
+}
+
+static void amdgpu_gfx_rlc_init_microcode_v2_3(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_3 *rlc_hdr;
+ struct amdgpu_firmware_info *info;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlcp_ucode_version = le32_to_cpu(rlc_hdr->rlcp_ucode_version);
+ adev->gfx.rlcp_ucode_feature_version = le32_to_cpu(rlc_hdr->rlcp_ucode_feature_version);
+ adev->gfx.rlc.rlcp_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcp_ucode_size_bytes);
+ adev->gfx.rlc.rlcp_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcp_ucode_offset_bytes);
+
+ adev->gfx.rlcv_ucode_version = le32_to_cpu(rlc_hdr->rlcv_ucode_version);
+ adev->gfx.rlcv_ucode_feature_version = le32_to_cpu(rlc_hdr->rlcv_ucode_feature_version);
+ adev->gfx.rlc.rlcv_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcv_ucode_size_bytes);
+ adev->gfx.rlc.rlcv_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcv_ucode_offset_bytes);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (adev->gfx.rlc.rlcp_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_P];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_P;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.rlcp_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.rlcv_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_V];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_V;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.rlcv_ucode_size_bytes, PAGE_SIZE);
+ }
+ }
+}
+
+static void amdgpu_gfx_rlc_init_microcode_v2_4(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_4 *rlc_hdr;
+ struct amdgpu_firmware_info *info;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_4 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlc.global_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->global_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.global_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->global_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se0_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.se0_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se1_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.se1_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se2_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.se2_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se3_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.se3_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_offset_bytes);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (adev->gfx.rlc.global_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.global_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.se0_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE0_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE0_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se0_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.se1_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE1_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE1_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se1_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.se2_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE2_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE2_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se2_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.se3_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE3_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE3_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se3_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
+ }
+}
+
+int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev,
+ uint16_t version_major,
+ uint16_t version_minor)
+{
+ int err;
+
+ if (version_major < 2) {
+ /* only support rlc_hdr v2.x and onwards */
+ dev_err(adev->dev, "unsupported rlc fw hdr\n");
+ return -EINVAL;
+ }
+
+ /* is_rlc_v2_1 is still used in APU code path */
+ if (version_major == 2 && version_minor == 1)
+ adev->gfx.rlc.is_rlc_v2_1 = true;
+
+ err = amdgpu_gfx_rlc_init_microcode_v2_0(adev);
+ if (err) {
+ dev_err(adev->dev, "fail to init rlc v2_0 microcode\n");
+ return err;
+ }
+
+ if (version_minor >= 1)
+ amdgpu_gfx_rlc_init_microcode_v2_1(adev);
+ if (version_minor >= 2)
+ amdgpu_gfx_rlc_init_microcode_v2_2(adev);
+ if (version_minor == 3)
+ amdgpu_gfx_rlc_init_microcode_v2_3(adev);
+ if (version_minor == 4)
+ amdgpu_gfx_rlc_init_microcode_v2_4(adev);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
index 00afd0dcae86..2ce310b31942 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
@@ -26,6 +26,8 @@
#include "clearstate_defs.h"
+#define AMDGPU_MAX_RLC_INSTANCES 8
+
/* firmware ID used in rlc toc */
typedef enum _FIRMWARE_ID_ {
FIRMWARE_ID_INVALID = 0,
@@ -69,6 +71,94 @@ typedef enum _FIRMWARE_ID_ {
FIRMWARE_ID_MAX = 38,
} FIRMWARE_ID;
+typedef enum _SOC21_FIRMWARE_ID_ {
+ SOC21_FIRMWARE_ID_INVALID = 0,
+ SOC21_FIRMWARE_ID_RLC_G_UCODE = 1,
+ SOC21_FIRMWARE_ID_RLC_TOC = 2,
+ SOC21_FIRMWARE_ID_RLCG_SCRATCH = 3,
+ SOC21_FIRMWARE_ID_RLC_SRM_ARAM = 4,
+ SOC21_FIRMWARE_ID_RLC_P_UCODE = 5,
+ SOC21_FIRMWARE_ID_RLC_V_UCODE = 6,
+ SOC21_FIRMWARE_ID_RLX6_UCODE = 7,
+ SOC21_FIRMWARE_ID_RLX6_UCODE_CORE1 = 8,
+ SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT = 9,
+ SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT_CORE1 = 10,
+ SOC21_FIRMWARE_ID_SDMA_UCODE_TH0 = 11,
+ SOC21_FIRMWARE_ID_SDMA_UCODE_TH1 = 12,
+ SOC21_FIRMWARE_ID_CP_PFP = 13,
+ SOC21_FIRMWARE_ID_CP_ME = 14,
+ SOC21_FIRMWARE_ID_CP_MEC = 15,
+ SOC21_FIRMWARE_ID_RS64_MES_P0 = 16,
+ SOC21_FIRMWARE_ID_RS64_MES_P1 = 17,
+ SOC21_FIRMWARE_ID_RS64_PFP = 18,
+ SOC21_FIRMWARE_ID_RS64_ME = 19,
+ SOC21_FIRMWARE_ID_RS64_MEC = 20,
+ SOC21_FIRMWARE_ID_RS64_MES_P0_STACK = 21,
+ SOC21_FIRMWARE_ID_RS64_MES_P1_STACK = 22,
+ SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK = 23,
+ SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK = 24,
+ SOC21_FIRMWARE_ID_RS64_ME_P0_STACK = 25,
+ SOC21_FIRMWARE_ID_RS64_ME_P1_STACK = 26,
+ SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK = 27,
+ SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK = 28,
+ SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK = 29,
+ SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK = 30,
+ SOC21_FIRMWARE_ID_RLC_SRM_DRAM_SR = 31,
+ SOC21_FIRMWARE_ID_RLCG_SCRATCH_SR = 32,
+ SOC21_FIRMWARE_ID_RLCP_SCRATCH_SR = 33,
+ SOC21_FIRMWARE_ID_RLCV_SCRATCH_SR = 34,
+ SOC21_FIRMWARE_ID_RLX6_DRAM_SR = 35,
+ SOC21_FIRMWARE_ID_RLX6_DRAM_SR_CORE1 = 36,
+ SOC21_FIRMWARE_ID_MAX = 37
+} SOC21_FIRMWARE_ID;
+
+typedef enum _SOC24_FIRMWARE_ID_ {
+ SOC24_FIRMWARE_ID_INVALID = 0,
+ SOC24_FIRMWARE_ID_RLC_G_UCODE = 1,
+ SOC24_FIRMWARE_ID_RLC_TOC = 2,
+ SOC24_FIRMWARE_ID_RLCG_SCRATCH = 3,
+ SOC24_FIRMWARE_ID_RLC_SRM_ARAM = 4,
+ SOC24_FIRMWARE_ID_RLC_P_UCODE = 5,
+ SOC24_FIRMWARE_ID_RLC_V_UCODE = 6,
+ SOC24_FIRMWARE_ID_RLX6_UCODE = 7,
+ SOC24_FIRMWARE_ID_RLX6_UCODE_CORE1 = 8,
+ SOC24_FIRMWARE_ID_RLX6_DRAM_BOOT = 9,
+ SOC24_FIRMWARE_ID_RLX6_DRAM_BOOT_CORE1 = 10,
+ SOC24_FIRMWARE_ID_SDMA_UCODE_TH0 = 11,
+ SOC24_FIRMWARE_ID_SDMA_UCODE_TH1 = 12,
+ SOC24_FIRMWARE_ID_CP_PFP = 13,
+ SOC24_FIRMWARE_ID_CP_ME = 14,
+ SOC24_FIRMWARE_ID_CP_MEC = 15,
+ SOC24_FIRMWARE_ID_RS64_MES_P0 = 16,
+ SOC24_FIRMWARE_ID_RS64_MES_P1 = 17,
+ SOC24_FIRMWARE_ID_RS64_PFP = 18,
+ SOC24_FIRMWARE_ID_RS64_ME = 19,
+ SOC24_FIRMWARE_ID_RS64_MEC = 20,
+ SOC24_FIRMWARE_ID_RS64_MES_P0_STACK = 21,
+ SOC24_FIRMWARE_ID_RS64_MES_P1_STACK = 22,
+ SOC24_FIRMWARE_ID_RS64_PFP_P0_STACK = 23,
+ SOC24_FIRMWARE_ID_RS64_PFP_P1_STACK = 24,
+ SOC24_FIRMWARE_ID_RS64_ME_P0_STACK = 25,
+ SOC24_FIRMWARE_ID_RS64_ME_P1_STACK = 26,
+ SOC24_FIRMWARE_ID_RS64_MEC_P0_STACK = 27,
+ SOC24_FIRMWARE_ID_RS64_MEC_P1_STACK = 28,
+ SOC24_FIRMWARE_ID_RS64_MEC_P2_STACK = 29,
+ SOC24_FIRMWARE_ID_RS64_MEC_P3_STACK = 30,
+ SOC24_FIRMWARE_ID_RLC_SRM_DRAM_SR = 31,
+ SOC24_FIRMWARE_ID_RLCG_SCRATCH_SR = 32,
+ SOC24_FIRMWARE_ID_RLCP_SCRATCH_SR = 33,
+ SOC24_FIRMWARE_ID_RLCV_SCRATCH_SR = 34,
+ SOC24_FIRMWARE_ID_RLX6_DRAM_SR = 35,
+ SOC24_FIRMWARE_ID_RLX6_DRAM_SR_CORE1 = 36,
+ SOC24_FIRMWARE_ID_RLCDEBUGLOG = 37,
+ SOC24_FIRMWARE_ID_SRIOV_DEBUG = 38,
+ SOC24_FIRMWARE_ID_SRIOV_CSA_RLC = 39,
+ SOC24_FIRMWARE_ID_SRIOV_CSA_SDMA = 40,
+ SOC24_FIRMWARE_ID_SRIOV_CSA_CP = 41,
+ SOC24_FIRMWARE_ID_UMF_ZONE_PAD = 42,
+ SOC24_FIRMWARE_ID_MAX = 43
+} SOC24_FIRMWARE_ID;
+
typedef struct _RLC_TABLE_OF_CONTENT {
union {
unsigned int DW0;
@@ -112,47 +202,96 @@ typedef struct _RLC_TABLE_OF_CONTENT {
};
} RLC_TABLE_OF_CONTENT;
+typedef struct _RLC_TABLE_OF_CONTENT_V2 {
+ union {
+ unsigned int DW0;
+ struct {
+ uint32_t offset : 25;
+ uint32_t id : 7;
+ };
+ };
+
+ union {
+ unsigned int DW1;
+ struct {
+ uint32_t reserved0 : 1;
+ uint32_t reserved1 : 1;
+ uint32_t reserved2 : 1;
+ uint32_t memory_destination : 2;
+ uint32_t vfflr_image_code : 4;
+ uint32_t reserved9 : 1;
+ uint32_t reserved10 : 1;
+ uint32_t reserved11 : 1;
+ uint32_t size_x16 : 1;
+ uint32_t reserved13 : 1;
+ uint32_t size : 18;
+ };
+ };
+} RLC_TABLE_OF_CONTENT_V2;
+
#define RLC_TOC_MAX_SIZE 64
struct amdgpu_rlc_funcs {
bool (*is_rlc_enabled)(struct amdgpu_device *adev);
- void (*set_safe_mode)(struct amdgpu_device *adev);
- void (*unset_safe_mode)(struct amdgpu_device *adev);
+ void (*set_safe_mode)(struct amdgpu_device *adev, int xcc_id);
+ void (*unset_safe_mode)(struct amdgpu_device *adev, int xcc_id);
int (*init)(struct amdgpu_device *adev);
u32 (*get_csb_size)(struct amdgpu_device *adev);
- void (*get_csb_buffer)(struct amdgpu_device *adev, volatile u32 *buffer);
+
+ /**
+ * @get_csb_buffer: Get the clear state to be put into the hardware.
+ *
+ * The parameter adev is used to get the CS data and other gfx info,
+ * and buffer is the RLC CS pointer
+ *
+ * Sometimes, the user space puts a request to clear the state in the
+ * command buffer; this function provides the clear state that gets put
+ * into the hardware. Note that the driver programs Clear State
+ * Indirect Buffer (CSB) explicitly when it sets up the kernel rings,
+ * and it also provides a pointer to it which is used by the firmware
+ * to load the clear state in some cases.
+ */
+ void (*get_csb_buffer)(struct amdgpu_device *adev, u32 *buffer);
int (*get_cp_table_num)(struct amdgpu_device *adev);
int (*resume)(struct amdgpu_device *adev);
void (*stop)(struct amdgpu_device *adev);
void (*reset)(struct amdgpu_device *adev);
void (*start)(struct amdgpu_device *adev);
- void (*update_spm_vmid)(struct amdgpu_device *adev, unsigned vmid);
- void (*sriov_wreg)(struct amdgpu_device *adev, u32 offset, u32 v, u32 acc_flags, u32 hwip);
- u32 (*sriov_rreg)(struct amdgpu_device *adev, u32 offset, u32 acc_flags, u32 hwip);
+ void (*update_spm_vmid)(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid);
bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t reg);
};
+struct amdgpu_rlcg_reg_access_ctrl {
+ uint32_t scratch_reg0;
+ uint32_t scratch_reg1;
+ uint32_t scratch_reg2;
+ uint32_t scratch_reg3;
+ uint32_t grbm_cntl;
+ uint32_t grbm_idx;
+ uint32_t spare_int;
+};
+
struct amdgpu_rlc {
/* for power gating */
struct amdgpu_bo *save_restore_obj;
uint64_t save_restore_gpu_addr;
- volatile uint32_t *sr_ptr;
+ uint32_t *sr_ptr;
const u32 *reg_list;
u32 reg_list_size;
/* for clear state */
struct amdgpu_bo *clear_state_obj;
uint64_t clear_state_gpu_addr;
- volatile uint32_t *cs_ptr;
+ uint32_t *cs_ptr;
const struct cs_section_def *cs_data;
u32 clear_state_size;
/* for cp tables */
struct amdgpu_bo *cp_table_obj;
uint64_t cp_table_gpu_addr;
- volatile uint32_t *cp_table_ptr;
+ uint32_t *cp_table_ptr;
u32 cp_table_size;
/* safe mode for updating CG/PG state */
- bool in_safe_mode;
+ bool in_safe_mode[AMDGPU_MAX_RLC_INSTANCES];
const struct amdgpu_rlc_funcs *funcs;
/* for firmware data */
@@ -171,6 +310,13 @@ struct amdgpu_rlc {
u32 save_restore_list_srm_size_bytes;
u32 rlc_iram_ucode_size_bytes;
u32 rlc_dram_ucode_size_bytes;
+ u32 rlcp_ucode_size_bytes;
+ u32 rlcv_ucode_size_bytes;
+ u32 global_tap_delays_ucode_size_bytes;
+ u32 se0_tap_delays_ucode_size_bytes;
+ u32 se1_tap_delays_ucode_size_bytes;
+ u32 se2_tap_delays_ucode_size_bytes;
+ u32 se3_tap_delays_ucode_size_bytes;
u32 *register_list_format;
u32 *register_restore;
@@ -179,6 +325,13 @@ struct amdgpu_rlc {
u8 *save_restore_list_srm;
u8 *rlc_iram_ucode;
u8 *rlc_dram_ucode;
+ u8 *rlcp_ucode;
+ u8 *rlcv_ucode;
+ u8 *global_tap_delays_ucode;
+ u8 *se0_tap_delays_ucode;
+ u8 *se1_tap_delays_ucode;
+ u8 *se2_tap_delays_ucode;
+ u8 *se3_tap_delays_ucode;
bool is_rlc_v2_1;
@@ -191,14 +344,20 @@ struct amdgpu_rlc {
struct amdgpu_bo *rlc_toc_bo;
uint64_t rlc_toc_gpu_addr;
void *rlc_toc_buf;
+
+ bool rlcg_reg_access_supported;
+ /* registers for rlcg indirect reg access */
+ struct amdgpu_rlcg_reg_access_ctrl reg_access_ctrl[AMDGPU_MAX_RLC_INSTANCES];
};
-void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev);
-void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev);
+void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev, int xcc_id);
+void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev, int xcc_id);
int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws);
int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev);
int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev);
void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev);
void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev);
-
+int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev,
+ uint16_t version_major,
+ uint16_t version_minor);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index 524d10b21041..39070b2a4c04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -44,327 +44,62 @@
#include "amdgpu.h"
-static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo);
-static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager);
-
int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
struct amdgpu_sa_manager *sa_manager,
- unsigned size, u32 align, u32 domain)
+ unsigned int size, u32 suballoc_align, u32 domain)
{
- int i, r;
-
- init_waitqueue_head(&sa_manager->wq);
- sa_manager->bo = NULL;
- sa_manager->size = size;
- sa_manager->domain = domain;
- sa_manager->align = align;
- sa_manager->hole = &sa_manager->olist;
- INIT_LIST_HEAD(&sa_manager->olist);
- for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
- INIT_LIST_HEAD(&sa_manager->flist[i]);
+ int r;
- r = amdgpu_bo_create_kernel(adev, size, align, domain, &sa_manager->bo,
- &sa_manager->gpu_addr, &sa_manager->cpu_ptr);
+ r = amdgpu_bo_create_kernel(adev, size, AMDGPU_GPU_PAGE_SIZE, domain,
+ &sa_manager->bo, &sa_manager->gpu_addr,
+ &sa_manager->cpu_ptr);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r);
return r;
}
- memset(sa_manager->cpu_ptr, 0, sa_manager->size);
+ memset(sa_manager->cpu_ptr, 0, size);
+ drm_suballoc_manager_init(&sa_manager->base, size, suballoc_align);
return r;
}
void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
struct amdgpu_sa_manager *sa_manager)
{
- struct amdgpu_sa_bo *sa_bo, *tmp;
-
if (sa_manager->bo == NULL) {
dev_err(adev->dev, "no bo for sa manager\n");
return;
}
- if (!list_empty(&sa_manager->olist)) {
- sa_manager->hole = &sa_manager->olist,
- amdgpu_sa_bo_try_free(sa_manager);
- if (!list_empty(&sa_manager->olist)) {
- dev_err(adev->dev, "sa_manager is not empty, clearing anyway\n");
- }
- }
- list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) {
- amdgpu_sa_bo_remove_locked(sa_bo);
- }
+ drm_suballoc_manager_fini(&sa_manager->base);
amdgpu_bo_free_kernel(&sa_manager->bo, &sa_manager->gpu_addr, &sa_manager->cpu_ptr);
- sa_manager->size = 0;
}
-static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)
-{
- struct amdgpu_sa_manager *sa_manager = sa_bo->manager;
- if (sa_manager->hole == &sa_bo->olist) {
- sa_manager->hole = sa_bo->olist.prev;
- }
- list_del_init(&sa_bo->olist);
- list_del_init(&sa_bo->flist);
- dma_fence_put(sa_bo->fence);
- kfree(sa_bo);
-}
-
-static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager)
+int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
+ struct drm_suballoc **sa_bo,
+ unsigned int size)
{
- struct amdgpu_sa_bo *sa_bo, *tmp;
+ struct drm_suballoc *sa = drm_suballoc_new(&sa_manager->base, size,
+ GFP_KERNEL, false, 0);
- if (sa_manager->hole->next == &sa_manager->olist)
- return;
+ if (IS_ERR(sa)) {
+ *sa_bo = NULL;
- sa_bo = list_entry(sa_manager->hole->next, struct amdgpu_sa_bo, olist);
- list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) {
- if (sa_bo->fence == NULL ||
- !dma_fence_is_signaled(sa_bo->fence)) {
- return;
- }
- amdgpu_sa_bo_remove_locked(sa_bo);
+ return PTR_ERR(sa);
}
-}
-static inline unsigned amdgpu_sa_bo_hole_soffset(struct amdgpu_sa_manager *sa_manager)
-{
- struct list_head *hole = sa_manager->hole;
-
- if (hole != &sa_manager->olist) {
- return list_entry(hole, struct amdgpu_sa_bo, olist)->eoffset;
- }
+ *sa_bo = sa;
return 0;
}
-static inline unsigned amdgpu_sa_bo_hole_eoffset(struct amdgpu_sa_manager *sa_manager)
-{
- struct list_head *hole = sa_manager->hole;
-
- if (hole->next != &sa_manager->olist) {
- return list_entry(hole->next, struct amdgpu_sa_bo, olist)->soffset;
- }
- return sa_manager->size;
-}
-
-static bool amdgpu_sa_bo_try_alloc(struct amdgpu_sa_manager *sa_manager,
- struct amdgpu_sa_bo *sa_bo,
- unsigned size, unsigned align)
+void amdgpu_sa_bo_free(struct drm_suballoc **sa_bo, struct dma_fence *fence)
{
- unsigned soffset, eoffset, wasted;
-
- soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
- eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager);
- wasted = (align - (soffset % align)) % align;
-
- if ((eoffset - soffset) >= (size + wasted)) {
- soffset += wasted;
-
- sa_bo->manager = sa_manager;
- sa_bo->soffset = soffset;
- sa_bo->eoffset = soffset + size;
- list_add(&sa_bo->olist, sa_manager->hole);
- INIT_LIST_HEAD(&sa_bo->flist);
- sa_manager->hole = &sa_bo->olist;
- return true;
- }
- return false;
-}
-
-/**
- * amdgpu_sa_event - Check if we can stop waiting
- *
- * @sa_manager: pointer to the sa_manager
- * @size: number of bytes we want to allocate
- * @align: alignment we need to match
- *
- * Check if either there is a fence we can wait for or
- * enough free memory to satisfy the allocation directly
- */
-static bool amdgpu_sa_event(struct amdgpu_sa_manager *sa_manager,
- unsigned size, unsigned align)
-{
- unsigned soffset, eoffset, wasted;
- int i;
-
- for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
- if (!list_empty(&sa_manager->flist[i]))
- return true;
-
- soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
- eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager);
- wasted = (align - (soffset % align)) % align;
-
- if ((eoffset - soffset) >= (size + wasted)) {
- return true;
- }
-
- return false;
-}
-
-static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
- struct dma_fence **fences,
- unsigned *tries)
-{
- struct amdgpu_sa_bo *best_bo = NULL;
- unsigned i, soffset, best, tmp;
-
- /* if hole points to the end of the buffer */
- if (sa_manager->hole->next == &sa_manager->olist) {
- /* try again with its beginning */
- sa_manager->hole = &sa_manager->olist;
- return true;
- }
-
- soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
- /* to handle wrap around we add sa_manager->size */
- best = sa_manager->size * 2;
- /* go over all fence list and try to find the closest sa_bo
- * of the current last
- */
- for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) {
- struct amdgpu_sa_bo *sa_bo;
-
- fences[i] = NULL;
-
- if (list_empty(&sa_manager->flist[i]))
- continue;
-
- sa_bo = list_first_entry(&sa_manager->flist[i],
- struct amdgpu_sa_bo, flist);
-
- if (!dma_fence_is_signaled(sa_bo->fence)) {
- fences[i] = sa_bo->fence;
- continue;
- }
-
- /* limit the number of tries each ring gets */
- if (tries[i] > 2) {
- continue;
- }
-
- tmp = sa_bo->soffset;
- if (tmp < soffset) {
- /* wrap around, pretend it's after */
- tmp += sa_manager->size;
- }
- tmp -= soffset;
- if (tmp < best) {
- /* this sa bo is the closest one */
- best = tmp;
- best_bo = sa_bo;
- }
- }
-
- if (best_bo) {
- uint32_t idx = best_bo->fence->context;
-
- idx %= AMDGPU_SA_NUM_FENCE_LISTS;
- ++tries[idx];
- sa_manager->hole = best_bo->olist.prev;
-
- /* we knew that this one is signaled,
- so it's save to remote it */
- amdgpu_sa_bo_remove_locked(best_bo);
- return true;
- }
- return false;
-}
-
-int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
- struct amdgpu_sa_bo **sa_bo,
- unsigned size, unsigned align)
-{
- struct dma_fence *fences[AMDGPU_SA_NUM_FENCE_LISTS];
- unsigned tries[AMDGPU_SA_NUM_FENCE_LISTS];
- unsigned count;
- int i, r;
- signed long t;
-
- if (WARN_ON_ONCE(align > sa_manager->align))
- return -EINVAL;
-
- if (WARN_ON_ONCE(size > sa_manager->size))
- return -EINVAL;
-
- *sa_bo = kmalloc(sizeof(struct amdgpu_sa_bo), GFP_KERNEL);
- if (!(*sa_bo))
- return -ENOMEM;
- (*sa_bo)->manager = sa_manager;
- (*sa_bo)->fence = NULL;
- INIT_LIST_HEAD(&(*sa_bo)->olist);
- INIT_LIST_HEAD(&(*sa_bo)->flist);
-
- spin_lock(&sa_manager->wq.lock);
- do {
- for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
- tries[i] = 0;
-
- do {
- amdgpu_sa_bo_try_free(sa_manager);
-
- if (amdgpu_sa_bo_try_alloc(sa_manager, *sa_bo,
- size, align)) {
- spin_unlock(&sa_manager->wq.lock);
- return 0;
- }
-
- /* see if we can skip over some allocations */
- } while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries));
-
- for (i = 0, count = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
- if (fences[i])
- fences[count++] = dma_fence_get(fences[i]);
-
- if (count) {
- spin_unlock(&sa_manager->wq.lock);
- t = dma_fence_wait_any_timeout(fences, count, false,
- MAX_SCHEDULE_TIMEOUT,
- NULL);
- for (i = 0; i < count; ++i)
- dma_fence_put(fences[i]);
-
- r = (t > 0) ? 0 : t;
- spin_lock(&sa_manager->wq.lock);
- } else {
- /* if we have nothing to wait for block */
- r = wait_event_interruptible_locked(
- sa_manager->wq,
- amdgpu_sa_event(sa_manager, size, align)
- );
- }
-
- } while (!r);
-
- spin_unlock(&sa_manager->wq.lock);
- kfree(*sa_bo);
- *sa_bo = NULL;
- return r;
-}
-
-void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
- struct dma_fence *fence)
-{
- struct amdgpu_sa_manager *sa_manager;
-
if (sa_bo == NULL || *sa_bo == NULL) {
return;
}
- sa_manager = (*sa_bo)->manager;
- spin_lock(&sa_manager->wq.lock);
- if (fence && !dma_fence_is_signaled(fence)) {
- uint32_t idx;
-
- (*sa_bo)->fence = dma_fence_get(fence);
- idx = fence->context % AMDGPU_SA_NUM_FENCE_LISTS;
- list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]);
- } else {
- amdgpu_sa_bo_remove_locked(*sa_bo);
- }
- wake_up_all_locked(&sa_manager->wq);
- spin_unlock(&sa_manager->wq.lock);
+ drm_suballoc_free(*sa_bo, fence);
*sa_bo = NULL;
}
@@ -373,26 +108,8 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
struct seq_file *m)
{
- struct amdgpu_sa_bo *i;
-
- spin_lock(&sa_manager->wq.lock);
- list_for_each_entry(i, &sa_manager->olist, olist) {
- uint64_t soffset = i->soffset + sa_manager->gpu_addr;
- uint64_t eoffset = i->eoffset + sa_manager->gpu_addr;
- if (&i->olist == sa_manager->hole) {
- seq_printf(m, ">");
- } else {
- seq_printf(m, " ");
- }
- seq_printf(m, "[0x%010llx 0x%010llx] size %8lld",
- soffset, eoffset, eoffset - soffset);
+ struct drm_printer p = drm_seq_file_printer(m);
- if (i->fence)
- seq_printf(m, " protected by 0x%016llx on context %llu",
- i->fence->seqno, i->fence->context);
-
- seq_printf(m, "\n");
- }
- spin_unlock(&sa_manager->wq.lock);
+ drm_suballoc_dump_debug_info(&sa_manager->base, &p, sa_manager->gpu_addr);
}
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index e9b45089a28a..341beec59537 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -22,7 +22,6 @@
* Authors: Andres Rodriguez <andresx7@gmail.com>
*/
-#include <linux/fdtable.h>
#include <linux/file.h>
#include <linux/pid.h>
@@ -36,25 +35,26 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev,
int fd,
int32_t priority)
{
- struct fd f = fdget(fd);
+ CLASS(fd, f)(fd);
struct amdgpu_fpriv *fpriv;
+ struct amdgpu_ctx_mgr *mgr;
struct amdgpu_ctx *ctx;
uint32_t id;
int r;
- if (!f.file)
+ if (fd_empty(f))
return -EINVAL;
- r = amdgpu_file_to_fpriv(f.file, &fpriv);
- if (r) {
- fdput(f);
+ r = amdgpu_file_to_fpriv(fd_file(f), &fpriv);
+ if (r)
return r;
- }
- idr_for_each_entry(&fpriv->ctx_mgr.ctx_handles, ctx, id)
+ mgr = &fpriv->ctx_mgr;
+ mutex_lock(&mgr->lock);
+ idr_for_each_entry(&mgr->ctx_handles, ctx, id)
amdgpu_ctx_priority_override(ctx, priority);
+ mutex_unlock(&mgr->lock);
- fdput(f);
return 0;
}
@@ -63,31 +63,25 @@ static int amdgpu_sched_context_priority_override(struct amdgpu_device *adev,
unsigned ctx_id,
int32_t priority)
{
- struct fd f = fdget(fd);
+ CLASS(fd, f)(fd);
struct amdgpu_fpriv *fpriv;
struct amdgpu_ctx *ctx;
int r;
- if (!f.file)
+ if (fd_empty(f))
return -EINVAL;
- r = amdgpu_file_to_fpriv(f.file, &fpriv);
- if (r) {
- fdput(f);
+ r = amdgpu_file_to_fpriv(fd_file(f), &fpriv);
+ if (r)
return r;
- }
ctx = amdgpu_ctx_get(fpriv, ctx_id);
- if (!ctx) {
- fdput(f);
+ if (!ctx)
return -EINVAL;
- }
amdgpu_ctx_priority_override(ctx, priority);
amdgpu_ctx_put(ctx);
- fdput(f);
-
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 65debb65a5df..8b8a04138711 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -21,9 +21,13 @@
*
*/
+#include <linux/firmware.h>
#include "amdgpu.h"
#include "amdgpu_sdma.h"
#include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
+#include "gc/gc_10_1_0_offset.h"
+#include "gc/gc_10_3_0_sh_mask.h"
#define AMDGPU_CSA_SDMA_SIZE 64
/* SDMA CSA reside in the 3rd page of CSA */
@@ -63,7 +67,7 @@ int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index)
}
uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring,
- unsigned vmid)
+ unsigned int vmid)
{
struct amdgpu_device *adev = ring->adev;
uint64_t csa_mc_addr;
@@ -71,7 +75,7 @@ uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring,
int r;
/* don't enable OS preemption on SDMA under SRIOV */
- if (amdgpu_sriov_vf(adev) || vmid == 0 || !amdgpu_mcbp)
+ if (amdgpu_sriov_vf(adev) || vmid == 0 || !adev->gfx.mcbp)
return 0;
r = amdgpu_sdma_get_index_from_ring(ring, &index);
@@ -87,78 +91,39 @@ uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring,
}
int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev,
- void *ras_ih_info)
+ struct ras_common_if *ras_block)
{
int r, i;
- struct ras_ih_if *ih_info = (struct ras_ih_if *)ras_ih_info;
- struct ras_fs_if fs_info = {
- .sysfs_name = "sdma_err_count",
- };
-
- if (!ih_info)
- return -EINVAL;
-
- if (!adev->sdma.ras_if) {
- adev->sdma.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
- if (!adev->sdma.ras_if)
- return -ENOMEM;
- adev->sdma.ras_if->block = AMDGPU_RAS_BLOCK__SDMA;
- adev->sdma.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
- adev->sdma.ras_if->sub_block_index = 0;
- }
- fs_info.head = ih_info->head = *adev->sdma.ras_if;
- r = amdgpu_ras_late_init(adev, adev->sdma.ras_if,
- &fs_info, ih_info);
+ r = amdgpu_ras_block_late_init(adev, ras_block);
if (r)
- goto free;
+ return r;
- if (amdgpu_ras_is_supported(adev, adev->sdma.ras_if->block)) {
+ if (amdgpu_ras_is_supported(adev, ras_block->block)) {
for (i = 0; i < adev->sdma.num_instances; i++) {
r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq,
AMDGPU_SDMA_IRQ_INSTANCE0 + i);
if (r)
goto late_fini;
}
- } else {
- r = 0;
- goto free;
}
return 0;
late_fini:
- amdgpu_ras_late_fini(adev, adev->sdma.ras_if, ih_info);
-free:
- kfree(adev->sdma.ras_if);
- adev->sdma.ras_if = NULL;
+ amdgpu_ras_block_late_fini(adev, ras_block);
return r;
}
-void amdgpu_sdma_ras_fini(struct amdgpu_device *adev)
-{
- if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA) &&
- adev->sdma.ras_if) {
- struct ras_common_if *ras_if = adev->sdma.ras_if;
- struct ras_ih_if ih_info = {
- .head = *ras_if,
- /* the cb member will not be used by
- * amdgpu_ras_interrupt_remove_handler, init it only
- * to cheat the check in ras_late_fini
- */
- .cb = amdgpu_sdma_process_ras_data_cb,
- };
-
- amdgpu_ras_late_fini(adev, ras_if, &ih_info);
- kfree(ras_if);
- }
-}
-
int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev,
void *err_data,
struct amdgpu_iv_entry *entry)
{
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+
+ if (amdgpu_sriov_vf(adev))
+ return AMDGPU_RAS_SUCCESS;
+
amdgpu_ras_reset_gpu(adev);
return AMDGPU_RAS_SUCCESS;
@@ -181,3 +146,466 @@ int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev,
amdgpu_ras_interrupt_dispatch(adev, &ih_data);
return 0;
}
+
+static int amdgpu_sdma_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
+{
+ uint16_t version_major;
+ const struct common_firmware_header *header = NULL;
+ const struct sdma_firmware_header_v1_0 *hdr;
+ const struct sdma_firmware_header_v2_0 *hdr_v2;
+ const struct sdma_firmware_header_v3_0 *hdr_v3;
+
+ header = (const struct common_firmware_header *)
+ sdma_inst->fw->data;
+ version_major = le16_to_cpu(header->header_version_major);
+
+ switch (version_major) {
+ case 1:
+ hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data;
+ sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version);
+ sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version);
+ break;
+ case 2:
+ hdr_v2 = (const struct sdma_firmware_header_v2_0 *)sdma_inst->fw->data;
+ sdma_inst->fw_version = le32_to_cpu(hdr_v2->header.ucode_version);
+ sdma_inst->feature_version = le32_to_cpu(hdr_v2->ucode_feature_version);
+ break;
+ case 3:
+ hdr_v3 = (const struct sdma_firmware_header_v3_0 *)sdma_inst->fw->data;
+ sdma_inst->fw_version = le32_to_cpu(hdr_v3->header.ucode_version);
+ sdma_inst->feature_version = le32_to_cpu(hdr_v3->ucode_feature_version);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (sdma_inst->feature_version >= 20)
+ sdma_inst->burst_nop = true;
+
+ return 0;
+}
+
+void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
+ bool duplicate)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ amdgpu_ucode_release(&adev->sdma.instance[i].fw);
+ if (duplicate)
+ break;
+ }
+
+ memset((void *)adev->sdma.instance, 0,
+ sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES);
+}
+
+int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
+ u32 instance, bool duplicate)
+{
+ struct amdgpu_firmware_info *info = NULL;
+ const struct common_firmware_header *header = NULL;
+ int err, i;
+ const struct sdma_firmware_header_v2_0 *sdma_hdr;
+ const struct sdma_firmware_header_v3_0 *sdma_hv3;
+ uint16_t version_major;
+ char ucode_prefix[30];
+
+ amdgpu_ucode_ip_version_decode(adev, SDMA0_HWIP, ucode_prefix, sizeof(ucode_prefix));
+ if (instance == 0)
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[instance].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s.bin", ucode_prefix);
+ else
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[instance].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s%d.bin", ucode_prefix, instance);
+ if (err)
+ goto out;
+
+ header = (const struct common_firmware_header *)
+ adev->sdma.instance[instance].fw->data;
+ version_major = le16_to_cpu(header->header_version_major);
+
+ if ((duplicate && instance) || (!duplicate && version_major > 1)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = amdgpu_sdma_init_inst_ctx(&adev->sdma.instance[instance]);
+ if (err)
+ goto out;
+
+ if (duplicate) {
+ for (i = 1; i < adev->sdma.num_instances; i++)
+ memcpy((void *)&adev->sdma.instance[i],
+ (void *)&adev->sdma.instance[0],
+ sizeof(struct amdgpu_sdma_instance));
+ }
+
+ DRM_DEBUG("psp_load == '%s'\n",
+ adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ switch (version_major) {
+ case 1:
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (!duplicate && (instance != i))
+ continue;
+ else {
+ /* Use a single copy per SDMA firmware type. PSP uses the same instance for all
+ * groups of SDMAs */
+ if ((amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 4, 2) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 4, 4) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 4, 5)) &&
+ adev->firmware.load_type ==
+ AMDGPU_FW_LOAD_PSP &&
+ adev->sdma.num_inst_per_aid == i) {
+ break;
+ }
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
+ info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
+ info->fw = adev->sdma.instance[i].fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+ }
+ }
+ break;
+ case 2:
+ sdma_hdr = (const struct sdma_firmware_header_v2_0 *)
+ adev->sdma.instance[0].fw->data;
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_UCODE_TH0];
+ info->ucode_id = AMDGPU_UCODE_ID_SDMA_UCODE_TH0;
+ info->fw = adev->sdma.instance[0].fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes), PAGE_SIZE);
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_UCODE_TH1];
+ info->ucode_id = AMDGPU_UCODE_ID_SDMA_UCODE_TH1;
+ info->fw = adev->sdma.instance[0].fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes), PAGE_SIZE);
+ break;
+ case 3:
+ sdma_hv3 = (const struct sdma_firmware_header_v3_0 *)
+ adev->sdma.instance[0].fw->data;
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_RS64];
+ info->ucode_id = AMDGPU_UCODE_ID_SDMA_RS64;
+ info->fw = adev->sdma.instance[0].fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(sdma_hv3->ucode_size_bytes), PAGE_SIZE);
+ break;
+ default:
+ err = -EINVAL;
+ }
+ }
+
+out:
+ if (err)
+ amdgpu_sdma_destroy_inst_ctx(adev, duplicate);
+ return err;
+}
+
+int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err = 0;
+ struct amdgpu_sdma_ras *ras = NULL;
+
+ /* adev->sdma.ras is NULL, which means sdma does not
+ * support ras function, then do nothing here.
+ */
+ if (!adev->sdma.ras)
+ return 0;
+
+ ras = adev->sdma.ras;
+
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register sdma ras block!\n");
+ return err;
+ }
+
+ strcpy(ras->ras_block.ras_comm.name, "sdma");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__SDMA;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->sdma.ras_if = &ras->ras_block.ras_comm;
+
+ /* If not define special ras_late_init function, use default ras_late_init */
+ if (!ras->ras_block.ras_late_init)
+ ras->ras_block.ras_late_init = amdgpu_sdma_ras_late_init;
+
+ /* If not defined special ras_cb function, use default ras_cb */
+ if (!ras->ras_block.ras_cb)
+ ras->ras_block.ras_cb = amdgpu_sdma_process_ras_data_cb;
+
+ return 0;
+}
+
+/*
+ * debugfs for to enable/disable sdma job submission to specific core.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_sdma_sched_mask_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u64 i, num_ring;
+ u64 mask = 0;
+ struct amdgpu_ring *ring, *page = NULL;
+
+ if (!adev)
+ return -ENODEV;
+
+ /* Determine the number of rings per SDMA instance
+ * (1 for sdma gfx ring, 2 if page queue exists)
+ */
+ if (adev->sdma.has_page_queue)
+ num_ring = 2;
+ else
+ num_ring = 1;
+
+ /* Calculate the maximum possible mask value
+ * based on the number of SDMA instances and rings
+ */
+ mask = BIT_ULL(adev->sdma.num_instances * num_ring) - 1;
+
+ if ((val & mask) == 0)
+ return -EINVAL;
+
+ for (i = 0; i < adev->sdma.num_instances; ++i) {
+ ring = &adev->sdma.instance[i].ring;
+ if (adev->sdma.has_page_queue)
+ page = &adev->sdma.instance[i].page;
+ if (val & BIT_ULL(i * num_ring))
+ ring->sched.ready = true;
+ else
+ ring->sched.ready = false;
+
+ if (page) {
+ if (val & BIT_ULL(i * num_ring + 1))
+ page->sched.ready = true;
+ else
+ page->sched.ready = false;
+ }
+ }
+ /* publish sched.ready flag update effective immediately across smp */
+ smp_rmb();
+ return 0;
+}
+
+static int amdgpu_debugfs_sdma_sched_mask_get(void *data, u64 *val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u64 i, num_ring;
+ u64 mask = 0;
+ struct amdgpu_ring *ring, *page = NULL;
+
+ if (!adev)
+ return -ENODEV;
+
+ /* Determine the number of rings per SDMA instance
+ * (1 for sdma gfx ring, 2 if page queue exists)
+ */
+ if (adev->sdma.has_page_queue)
+ num_ring = 2;
+ else
+ num_ring = 1;
+
+ for (i = 0; i < adev->sdma.num_instances; ++i) {
+ ring = &adev->sdma.instance[i].ring;
+ if (adev->sdma.has_page_queue)
+ page = &adev->sdma.instance[i].page;
+
+ if (ring->sched.ready)
+ mask |= BIT_ULL(i * num_ring);
+ else
+ mask &= ~BIT_ULL(i * num_ring);
+
+ if (page) {
+ if (page->sched.ready)
+ mask |= BIT_ULL(i * num_ring + 1);
+ else
+ mask &= ~BIT_ULL(i * num_ring + 1);
+ }
+ }
+
+ *val = mask;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_sdma_sched_mask_fops,
+ amdgpu_debugfs_sdma_sched_mask_get,
+ amdgpu_debugfs_sdma_sched_mask_set, "%llx\n");
+
+#endif
+
+void amdgpu_debugfs_sdma_sched_mask_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ if (!(adev->sdma.num_instances > 1))
+ return;
+ sprintf(name, "amdgpu_sdma_sched_mask");
+ debugfs_create_file(name, 0600, root, adev,
+ &amdgpu_debugfs_sdma_sched_mask_fops);
+#endif
+}
+
+static ssize_t amdgpu_get_sdma_reset_mask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (!adev)
+ return -ENODEV;
+
+ return amdgpu_show_reset_mask(buf, adev->sdma.supported_reset);
+}
+
+static DEVICE_ATTR(sdma_reset_mask, 0444,
+ amdgpu_get_sdma_reset_mask, NULL);
+
+int amdgpu_sdma_sysfs_reset_mask_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (!amdgpu_gpu_recovery)
+ return r;
+
+ if (adev->sdma.num_instances) {
+ r = device_create_file(adev->dev, &dev_attr_sdma_reset_mask);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev)
+{
+ if (!amdgpu_gpu_recovery)
+ return;
+
+ if (adev->dev->kobj.sd) {
+ if (adev->sdma.num_instances)
+ device_remove_file(adev->dev, &dev_attr_sdma_reset_mask);
+ }
+}
+
+struct amdgpu_ring *amdgpu_sdma_get_shared_ring(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+ if (adev->sdma.has_page_queue &&
+ (ring->me < adev->sdma.num_instances) &&
+ (ring == &adev->sdma.instance[ring->me].ring))
+ return &adev->sdma.instance[ring->me].page;
+ else
+ return NULL;
+}
+
+/**
+* amdgpu_sdma_is_shared_inv_eng - Check if a ring is an SDMA ring that shares a VM invalidation engine
+* @adev: Pointer to the AMDGPU device structure
+* @ring: Pointer to the ring structure to check
+*
+* This function checks if the given ring is an SDMA ring that shares a VM invalidation engine.
+* It returns true if the ring is such an SDMA ring, false otherwise.
+*/
+bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+ int i = ring->me;
+
+ if (!adev->sdma.has_page_queue || i >= adev->sdma.num_instances)
+ return false;
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
+ return (ring == &adev->sdma.instance[i].page);
+ else
+ return false;
+}
+
+static int amdgpu_sdma_soft_reset(struct amdgpu_device *adev, u32 instance_id)
+{
+ struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];
+
+ if (sdma_instance->funcs->soft_reset_kernel_queue)
+ return sdma_instance->funcs->soft_reset_kernel_queue(adev, instance_id);
+
+ return -EOPNOTSUPP;
+}
+
+/**
+ * amdgpu_sdma_reset_engine - Reset a specific SDMA engine
+ * @adev: Pointer to the AMDGPU device
+ * @instance_id: Logical ID of the SDMA engine instance to reset
+ * @caller_handles_kernel_queues: Skip kernel queue processing. Caller
+ * will handle it.
+ *
+ * Returns: 0 on success, or a negative error code on failure.
+ */
+int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id,
+ bool caller_handles_kernel_queues)
+{
+ int ret = 0;
+ struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];
+ struct amdgpu_ring *gfx_ring = &sdma_instance->ring;
+ struct amdgpu_ring *page_ring = &sdma_instance->page;
+
+ mutex_lock(&sdma_instance->engine_reset_mutex);
+
+ if (!caller_handles_kernel_queues) {
+ /* Stop the scheduler's work queue for the GFX and page rings if they are running.
+ * This ensures that no new tasks are submitted to the queues while
+ * the reset is in progress.
+ */
+ drm_sched_wqueue_stop(&gfx_ring->sched);
+
+ if (adev->sdma.has_page_queue)
+ drm_sched_wqueue_stop(&page_ring->sched);
+ }
+
+ if (sdma_instance->funcs->stop_kernel_queue) {
+ sdma_instance->funcs->stop_kernel_queue(gfx_ring);
+ if (adev->sdma.has_page_queue)
+ sdma_instance->funcs->stop_kernel_queue(page_ring);
+ }
+
+ /* Perform the SDMA reset for the specified instance */
+ ret = amdgpu_sdma_soft_reset(adev, instance_id);
+ if (ret) {
+ dev_err(adev->dev, "Failed to reset SDMA logical instance %u\n", instance_id);
+ goto exit;
+ }
+
+ if (sdma_instance->funcs->start_kernel_queue) {
+ sdma_instance->funcs->start_kernel_queue(gfx_ring);
+ if (adev->sdma.has_page_queue)
+ sdma_instance->funcs->start_kernel_queue(page_ring);
+ }
+
+exit:
+ if (!caller_handles_kernel_queues) {
+ /* Restart the scheduler's work queue for the GFX and page rings
+ * if they were stopped by this function. This allows new tasks
+ * to be submitted to the queues after the reset is complete.
+ */
+ if (!ret) {
+ amdgpu_fence_driver_force_completion(gfx_ring);
+ drm_sched_wqueue_start(&gfx_ring->sched);
+ if (adev->sdma.has_page_queue) {
+ amdgpu_fence_driver_force_completion(page_ring);
+ drm_sched_wqueue_start(&page_ring->sched);
+ }
+ }
+ }
+ mutex_unlock(&sdma_instance->engine_reset_mutex);
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index f8fb755e3aa6..34311f32be4c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -23,9 +23,10 @@
#ifndef __AMDGPU_SDMA_H__
#define __AMDGPU_SDMA_H__
+#include "amdgpu_ras.h"
/* max number of IP instances */
-#define AMDGPU_MAX_SDMA_INSTANCES 8
+#define AMDGPU_MAX_SDMA_INSTANCES 16
enum amdgpu_sdma_irq {
AMDGPU_SDMA_IRQ_INSTANCE0 = 0,
@@ -36,9 +37,25 @@ enum amdgpu_sdma_irq {
AMDGPU_SDMA_IRQ_INSTANCE5,
AMDGPU_SDMA_IRQ_INSTANCE6,
AMDGPU_SDMA_IRQ_INSTANCE7,
+ AMDGPU_SDMA_IRQ_INSTANCE8,
+ AMDGPU_SDMA_IRQ_INSTANCE9,
+ AMDGPU_SDMA_IRQ_INSTANCE10,
+ AMDGPU_SDMA_IRQ_INSTANCE11,
+ AMDGPU_SDMA_IRQ_INSTANCE12,
+ AMDGPU_SDMA_IRQ_INSTANCE13,
+ AMDGPU_SDMA_IRQ_INSTANCE14,
+ AMDGPU_SDMA_IRQ_INSTANCE15,
AMDGPU_SDMA_IRQ_LAST
};
+#define NUM_SDMA(x) hweight32(x)
+
+struct amdgpu_sdma_funcs {
+ int (*stop_kernel_queue)(struct amdgpu_ring *ring);
+ int (*start_kernel_queue)(struct amdgpu_ring *ring);
+ int (*soft_reset_kernel_queue)(struct amdgpu_device *adev, u32 instance_id);
+};
+
struct amdgpu_sdma_instance {
/* SDMA firmware */
const struct firmware *fw;
@@ -48,32 +65,74 @@ struct amdgpu_sdma_instance {
struct amdgpu_ring ring;
struct amdgpu_ring page;
bool burst_nop;
+ uint32_t aid_id;
+
+ struct amdgpu_bo *sdma_fw_obj;
+ uint64_t sdma_fw_gpu_addr;
+ uint32_t *sdma_fw_ptr;
+ struct mutex engine_reset_mutex;
+ /* track guilty state of GFX and PAGE queues */
+ bool gfx_guilty;
+ bool page_guilty;
+ const struct amdgpu_sdma_funcs *funcs;
};
-struct amdgpu_sdma_ras_funcs {
- int (*ras_late_init)(struct amdgpu_device *adev,
- void *ras_ih_info);
- void (*ras_fini)(struct amdgpu_device *adev);
- int (*query_ras_error_count)(struct amdgpu_device *adev,
- uint32_t instance, void *ras_error_status);
- void (*reset_ras_error_count)(struct amdgpu_device *adev);
+enum amdgpu_sdma_ras_memory_id {
+ AMDGPU_SDMA_MBANK_DATA_BUF0 = 1,
+ AMDGPU_SDMA_MBANK_DATA_BUF1 = 2,
+ AMDGPU_SDMA_MBANK_DATA_BUF2 = 3,
+ AMDGPU_SDMA_MBANK_DATA_BUF3 = 4,
+ AMDGPU_SDMA_MBANK_DATA_BUF4 = 5,
+ AMDGPU_SDMA_MBANK_DATA_BUF5 = 6,
+ AMDGPU_SDMA_MBANK_DATA_BUF6 = 7,
+ AMDGPU_SDMA_MBANK_DATA_BUF7 = 8,
+ AMDGPU_SDMA_MBANK_DATA_BUF8 = 9,
+ AMDGPU_SDMA_MBANK_DATA_BUF9 = 10,
+ AMDGPU_SDMA_MBANK_DATA_BUF10 = 11,
+ AMDGPU_SDMA_MBANK_DATA_BUF11 = 12,
+ AMDGPU_SDMA_MBANK_DATA_BUF12 = 13,
+ AMDGPU_SDMA_MBANK_DATA_BUF13 = 14,
+ AMDGPU_SDMA_MBANK_DATA_BUF14 = 15,
+ AMDGPU_SDMA_MBANK_DATA_BUF15 = 16,
+ AMDGPU_SDMA_UCODE_BUF = 17,
+ AMDGPU_SDMA_RB_CMD_BUF = 18,
+ AMDGPU_SDMA_IB_CMD_BUF = 19,
+ AMDGPU_SDMA_UTCL1_RD_FIFO = 20,
+ AMDGPU_SDMA_UTCL1_RDBST_FIFO = 21,
+ AMDGPU_SDMA_UTCL1_WR_FIFO = 22,
+ AMDGPU_SDMA_DATA_LUT_FIFO = 23,
+ AMDGPU_SDMA_SPLIT_DAT_BUF = 24,
+ AMDGPU_SDMA_MEMORY_BLOCK_LAST,
+};
+
+struct amdgpu_sdma_ras {
+ struct amdgpu_ras_block_object ras_block;
};
struct amdgpu_sdma {
struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
struct amdgpu_irq_src trap_irq;
struct amdgpu_irq_src illegal_inst_irq;
+ struct amdgpu_irq_src fence_irq;
struct amdgpu_irq_src ecc_irq;
struct amdgpu_irq_src vm_hole_irq;
struct amdgpu_irq_src doorbell_invalid_irq;
struct amdgpu_irq_src pool_timeout_irq;
struct amdgpu_irq_src srbm_write_irq;
+ struct amdgpu_irq_src ctxt_empty_irq;
int num_instances;
+ uint32_t sdma_mask;
+ int num_inst_per_aid;
uint32_t srbm_soft_reset;
bool has_page_queue;
struct ras_common_if *ras_if;
- const struct amdgpu_sdma_ras_funcs *funcs;
+ struct amdgpu_sdma_ras *ras;
+ uint32_t *ip_dump;
+ uint32_t supported_reset;
+ struct list_head reset_callback_list;
+ bool no_user_submission;
+ bool disable_uq;
};
/*
@@ -95,7 +154,7 @@ struct amdgpu_buffer_funcs {
uint64_t dst_offset,
/* number of byte to transfer */
uint32_t byte_count,
- bool tmz);
+ uint32_t copy_flags);
/* maximum bytes in a single operation */
uint32_t fill_max_bytes;
@@ -113,6 +172,9 @@ struct amdgpu_buffer_funcs {
uint32_t byte_count);
};
+int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id,
+ bool caller_handles_kernel_queues);
+
#define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b), (t))
#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
@@ -121,12 +183,22 @@ amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring);
int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index);
uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, unsigned vmid);
int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev,
- void *ras_ih_info);
-void amdgpu_sdma_ras_fini(struct amdgpu_device *adev);
+ struct ras_common_if *ras_block);
int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev,
void *err_data,
struct amdgpu_iv_entry *entry);
int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry);
+int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, u32 instance,
+ bool duplicate);
+void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
+ bool duplicate);
+int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_sdma_sched_mask_init(struct amdgpu_device *adev);
+int amdgpu_sdma_sysfs_reset_mask_init(struct amdgpu_device *adev);
+void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev);
+bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_ring *ring);
+struct amdgpu_ring *amdgpu_sdma_get_shared_ring(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
index cc7597a15fe9..3739be1b71e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
@@ -77,11 +77,11 @@ void psp_securedisplay_parse_resp_status(struct psp_context *psp,
}
}
-void psp_prep_securedisplay_cmd_buf(struct psp_context *psp, struct securedisplay_cmd **cmd,
+void psp_prep_securedisplay_cmd_buf(struct psp_context *psp, struct ta_securedisplay_cmd **cmd,
enum ta_securedisplay_command command_id)
{
- *cmd = (struct securedisplay_cmd *)psp->securedisplay_context.context.mem_context.shared_buf;
- memset(*cmd, 0, sizeof(struct securedisplay_cmd));
+ *cmd = (struct ta_securedisplay_cmd *)psp->securedisplay_context.context.mem_context.shared_buf;
+ memset(*cmd, 0, sizeof(struct ta_securedisplay_cmd));
(*cmd)->status = TA_SECUREDISPLAY_STATUS__GENERIC_FAILURE;
(*cmd)->cmd_id = command_id;
}
@@ -93,7 +93,7 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __u
{
struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
struct psp_context *psp = &adev->psp;
- struct securedisplay_cmd *securedisplay_cmd;
+ struct ta_securedisplay_cmd *securedisplay_cmd;
struct drm_device *dev = adev_to_drm(adev);
uint32_t phy_id;
uint32_t op;
@@ -121,6 +121,7 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __u
switch (op) {
case 1:
+ mutex_lock(&psp->securedisplay_context.mutex);
psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,
TA_SECUREDISPLAY_COMMAND__QUERY_TA);
ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__QUERY_TA);
@@ -131,8 +132,14 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __u
else
psp_securedisplay_parse_resp_status(psp, securedisplay_cmd->status);
}
+ mutex_unlock(&psp->securedisplay_context.mutex);
break;
case 2:
+ if (size < 3 || phy_id >= TA_SECUREDISPLAY_MAX_PHY) {
+ dev_err(adev->dev, "Invalid input: %s\n", str);
+ return -EINVAL;
+ }
+ mutex_lock(&psp->securedisplay_context.mutex);
psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,
TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC);
securedisplay_cmd->securedisplay_in_message.send_roi_crc.phy_id = phy_id;
@@ -146,12 +153,12 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __u
psp_securedisplay_parse_resp_status(psp, securedisplay_cmd->status);
}
}
+ mutex_unlock(&psp->securedisplay_context.mutex);
break;
default:
dev_err(adev->dev, "Invalid input: %s\n", str);
}
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return size;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.h
index fe98574748f4..456ad68ed4b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.h
@@ -30,7 +30,7 @@
void amdgpu_securedisplay_debugfs_init(struct amdgpu_device *adev);
void psp_securedisplay_parse_resp_status(struct psp_context *psp,
enum ta_securedisplay_status status);
-void psp_prep_securedisplay_cmd_buf(struct psp_context *psp, struct securedisplay_cmd **cmd,
+void psp_prep_securedisplay_cmd_buf(struct psp_context *psp, struct ta_securedisplay_cmd **cmd,
enum ta_securedisplay_command command_id);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
new file mode 100644
index 000000000000..a0b479d5fff1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_seq64.h"
+
+#include <drm/drm_exec.h>
+
+/**
+ * DOC: amdgpu_seq64
+ *
+ * amdgpu_seq64 allocates a 64bit memory on each request in sequence order.
+ * seq64 driver is required for user queue fence memory allocation, TLB
+ * counters and VM updates. It has maximum count of 32768 64 bit slots.
+ */
+
+/**
+ * amdgpu_seq64_get_va_base - Get the seq64 va base address
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Returns:
+ * va base address on success
+ */
+static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev)
+{
+ u64 addr = AMDGPU_VA_RESERVED_SEQ64_START(adev);
+
+ addr = amdgpu_gmc_sign_extend(addr);
+
+ return addr;
+}
+
+/**
+ * amdgpu_seq64_map - Map the seq64 memory to VM
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: vm pointer
+ * @bo_va: bo_va pointer
+ *
+ * Map the seq64 memory to the given VM.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure
+ */
+int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_bo_va **bo_va)
+{
+ struct amdgpu_bo *bo;
+ struct drm_exec exec;
+ u64 seq64_addr;
+ int r;
+
+ bo = adev->seq64.sbo;
+ if (!bo)
+ return -EINVAL;
+
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+ drm_exec_until_all_locked(&exec) {
+ r = amdgpu_vm_lock_pd(vm, &exec, 0);
+ if (likely(!r))
+ r = drm_exec_lock_obj(&exec, &bo->tbo.base);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r))
+ goto error;
+ }
+
+ *bo_va = amdgpu_vm_bo_add(adev, vm, bo);
+ if (!*bo_va) {
+ r = -ENOMEM;
+ goto error;
+ }
+
+ seq64_addr = amdgpu_seq64_get_va_base(adev) & AMDGPU_GMC_HOLE_MASK;
+
+ r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0,
+ AMDGPU_VA_RESERVED_SEQ64_SIZE,
+ AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_MTYPE_UC);
+ if (r) {
+ DRM_ERROR("failed to do bo_map on userq sem, err=%d\n", r);
+ amdgpu_vm_bo_del(adev, *bo_va);
+ goto error;
+ }
+
+ r = amdgpu_vm_bo_update(adev, *bo_va, false);
+ if (r) {
+ DRM_ERROR("failed to do vm_bo_update on userq sem\n");
+ amdgpu_vm_bo_del(adev, *bo_va);
+ goto error;
+ }
+
+error:
+ drm_exec_fini(&exec);
+ return r;
+}
+
+/**
+ * amdgpu_seq64_unmap - Unmap the seq64 memory
+ *
+ * @adev: amdgpu_device pointer
+ * @fpriv: DRM file private
+ *
+ * Unmap the seq64 memory from the given VM.
+ */
+void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv)
+{
+ struct amdgpu_vm *vm;
+ struct amdgpu_bo *bo;
+ struct drm_exec exec;
+ int r;
+
+ if (!fpriv->seq64_va)
+ return;
+
+ bo = adev->seq64.sbo;
+ if (!bo)
+ return;
+
+ vm = &fpriv->vm;
+
+ drm_exec_init(&exec, 0, 0);
+ drm_exec_until_all_locked(&exec) {
+ r = amdgpu_vm_lock_pd(vm, &exec, 0);
+ if (likely(!r))
+ r = drm_exec_lock_obj(&exec, &bo->tbo.base);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r))
+ goto error;
+ }
+
+ amdgpu_vm_bo_del(adev, fpriv->seq64_va);
+
+ fpriv->seq64_va = NULL;
+
+error:
+ drm_exec_fini(&exec);
+}
+
+/**
+ * amdgpu_seq64_alloc - Allocate a 64 bit memory
+ *
+ * @adev: amdgpu_device pointer
+ * @va: VA to access the seq in process address space
+ * @gpu_addr: GPU address to access the seq
+ * @cpu_addr: CPU address to access the seq
+ *
+ * Alloc a 64 bit memory from seq64 pool.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure
+ */
+int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va,
+ u64 *gpu_addr, u64 **cpu_addr)
+{
+ unsigned long bit_pos;
+
+ bit_pos = find_first_zero_bit(adev->seq64.used, adev->seq64.num_sem);
+ if (bit_pos >= adev->seq64.num_sem)
+ return -ENOSPC;
+
+ __set_bit(bit_pos, adev->seq64.used);
+
+ *va = bit_pos * sizeof(u64) + amdgpu_seq64_get_va_base(adev);
+
+ if (gpu_addr)
+ *gpu_addr = bit_pos * sizeof(u64) + adev->seq64.gpu_addr;
+
+ *cpu_addr = bit_pos + adev->seq64.cpu_base_addr;
+
+ return 0;
+}
+
+/**
+ * amdgpu_seq64_free - Free the given 64 bit memory
+ *
+ * @adev: amdgpu_device pointer
+ * @va: gpu start address to be freed
+ *
+ * Free the given 64 bit memory from seq64 pool.
+ */
+void amdgpu_seq64_free(struct amdgpu_device *adev, u64 va)
+{
+ unsigned long bit_pos;
+
+ bit_pos = (va - amdgpu_seq64_get_va_base(adev)) / sizeof(u64);
+ if (bit_pos < adev->seq64.num_sem)
+ __clear_bit(bit_pos, adev->seq64.used);
+}
+
+/**
+ * amdgpu_seq64_fini - Cleanup seq64 driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Free the memory space allocated for seq64.
+ *
+ */
+void amdgpu_seq64_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->seq64.sbo,
+ NULL,
+ (void **)&adev->seq64.cpu_base_addr);
+}
+
+/**
+ * amdgpu_seq64_init - Initialize seq64 driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate the required memory space for seq64.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure
+ */
+int amdgpu_seq64_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->seq64.sbo)
+ return 0;
+
+ /*
+ * AMDGPU_MAX_SEQ64_SLOTS * sizeof(u64) * 8 = AMDGPU_MAX_SEQ64_SLOTS
+ * 64bit slots
+ */
+ r = amdgpu_bo_create_kernel(adev, AMDGPU_VA_RESERVED_SEQ64_SIZE,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->seq64.sbo, &adev->seq64.gpu_addr,
+ (void **)&adev->seq64.cpu_base_addr);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create seq64 failed\n", r);
+ return r;
+ }
+
+ memset(adev->seq64.cpu_base_addr, 0, AMDGPU_VA_RESERVED_SEQ64_SIZE);
+
+ adev->seq64.num_sem = AMDGPU_MAX_SEQ64_SLOTS;
+ memset(&adev->seq64.used, 0, sizeof(adev->seq64.used));
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h
new file mode 100644
index 000000000000..26a249aaaee1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_SEQ64_H__
+#define __AMDGPU_SEQ64_H__
+
+#include "amdgpu_vm.h"
+
+#define AMDGPU_MAX_SEQ64_SLOTS (AMDGPU_VA_RESERVED_SEQ64_SIZE / sizeof(u64))
+
+struct amdgpu_seq64 {
+ struct amdgpu_bo *sbo;
+ u32 num_sem;
+ u64 gpu_addr;
+ u64 *cpu_base_addr;
+ DECLARE_BITMAP(used, AMDGPU_MAX_SEQ64_SLOTS);
+};
+
+void amdgpu_seq64_fini(struct amdgpu_device *adev);
+int amdgpu_seq64_init(struct amdgpu_device *adev);
+int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, u64 *gpu_addr, u64 **cpu_addr);
+void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr);
+int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_bo_va **bo_va);
+void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv);
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
index 484bb3dcec47..ec9d12f85f39 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
@@ -23,14 +23,28 @@
#ifndef __AMDGPU_SMUIO_H__
#define __AMDGPU_SMUIO_H__
+enum amdgpu_pkg_type {
+ AMDGPU_PKG_TYPE_APU = 2,
+ AMDGPU_PKG_TYPE_CEM = 3,
+ AMDGPU_PKG_TYPE_OAM = 4,
+ AMDGPU_PKG_TYPE_UNKNOWN,
+};
+
+struct amdgpu_smuio_mcm_config_info {
+ int socket_id;
+ int die_id;
+};
+
struct amdgpu_smuio_funcs {
u32 (*get_rom_index_offset)(struct amdgpu_device *adev);
u32 (*get_rom_data_offset)(struct amdgpu_device *adev);
void (*update_rom_clock_gating)(struct amdgpu_device *adev, bool enable);
- void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags);
+ void (*get_clock_gating_state)(struct amdgpu_device *adev, u64 *flags);
u32 (*get_die_id)(struct amdgpu_device *adev);
u32 (*get_socket_id)(struct amdgpu_device *adev);
+ enum amdgpu_pkg_type (*get_pkg_type)(struct amdgpu_device *adev);
bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev);
+ u64 (*get_gpu_clock_counter)(struct amdgpu_device *adev);
};
struct amdgpu_smuio {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 862eb3c1c4c5..d6ae9974c952 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2014 Advanced Micro Devices, Inc.
* All Rights Reserved.
@@ -51,7 +52,6 @@ static struct kmem_cache *amdgpu_sync_slab;
void amdgpu_sync_create(struct amdgpu_sync *sync)
{
hash_init(sync->fences);
- sync->last_vm_update = NULL;
}
/**
@@ -135,11 +135,16 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
struct amdgpu_sync_entry *e;
hash_for_each_possible(sync->fences, e, node, f->context) {
- if (unlikely(e->fence->context != f->context))
- continue;
+ if (dma_fence_is_signaled(e->fence)) {
+ dma_fence_put(e->fence);
+ e->fence = dma_fence_get(f);
+ return true;
+ }
- amdgpu_sync_keep_later(&e->fence, f);
- return true;
+ if (likely(e->fence->context == f->context)) {
+ amdgpu_sync_keep_later(&e->fence, f);
+ return true;
+ }
}
return false;
}
@@ -149,10 +154,12 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
*
* @sync: sync object to add fence to
* @f: fence to sync to
+ * @flags: memory allocation flags to use when allocating sync entry
*
* Add the fence to the sync object.
*/
-int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
+int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
+ gfp_t flags)
{
struct amdgpu_sync_entry *e;
@@ -162,7 +169,7 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
if (amdgpu_sync_add_later(sync, f))
return 0;
- e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
+ e = kmem_cache_alloc(amdgpu_sync_slab, flags);
if (!e)
return -ENOMEM;
@@ -171,23 +178,6 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
return 0;
}
-/**
- * amdgpu_sync_vm_fence - remember to sync to this VM fence
- *
- * @sync: sync object to add fence to
- * @fence: the VM fence to add
- *
- * Add the fence to the sync object and remember it as VM update.
- */
-int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
-{
- if (!fence)
- return 0;
-
- amdgpu_sync_keep_later(&sync->last_vm_update, fence);
- return amdgpu_sync_fence(sync, fence);
-}
-
/* Determine based on the owner and mode if we should sync to a fence or not */
static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
enum amdgpu_sync_mode mode,
@@ -208,7 +198,8 @@ static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
/* Never sync to VM updates either. */
if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
- owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+ owner != AMDGPU_FENCE_OWNER_UNDEFINED &&
+ owner != AMDGPU_FENCE_OWNER_KFD)
return false;
/* Ignore fences depending on the sync mode */
@@ -252,44 +243,65 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct dma_resv *resv, enum amdgpu_sync_mode mode,
void *owner)
{
- struct dma_resv_list *flist;
+ struct dma_resv_iter cursor;
struct dma_fence *f;
- unsigned i;
- int r = 0;
+ int r;
if (resv == NULL)
return -EINVAL;
-
- /* always sync to the exclusive fence */
- f = dma_resv_excl_fence(resv);
- dma_fence_chain_for_each(f, f) {
- struct dma_fence_chain *chain = to_dma_fence_chain(f);
-
- if (amdgpu_sync_test_fence(adev, mode, owner, chain ?
- chain->fence : f)) {
- r = amdgpu_sync_fence(sync, f);
- dma_fence_put(f);
- if (r)
- return r;
- break;
+ /* Implicitly sync only to KERNEL, WRITE and READ */
+ dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, f) {
+ dma_fence_chain_for_each(f, f) {
+ struct dma_fence *tmp = dma_fence_chain_contained(f);
+
+ if (amdgpu_sync_test_fence(adev, mode, owner, tmp)) {
+ r = amdgpu_sync_fence(sync, f, GFP_KERNEL);
+ dma_fence_put(f);
+ if (r)
+ return r;
+ break;
+ }
}
}
+ return 0;
+}
- flist = dma_resv_shared_list(resv);
- if (!flist)
- return 0;
+/**
+ * amdgpu_sync_kfd - sync to KFD fences
+ *
+ * @sync: sync object to add KFD fences to
+ * @resv: reservation object with KFD fences
+ *
+ * Extract all KFD fences and add them to the sync object.
+ */
+int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv)
+{
+ struct dma_resv_iter cursor;
+ struct dma_fence *f;
+ int r = 0;
- for (i = 0; i < flist->shared_count; ++i) {
- f = rcu_dereference_protected(flist->shared[i],
- dma_resv_held(resv));
+ dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP);
+ dma_resv_for_each_fence_unlocked(&cursor, f) {
+ void *fence_owner = amdgpu_sync_get_owner(f);
- if (amdgpu_sync_test_fence(adev, mode, owner, f)) {
- r = amdgpu_sync_fence(sync, f);
- if (r)
- return r;
- }
+ if (fence_owner != AMDGPU_FENCE_OWNER_KFD)
+ continue;
+
+ r = amdgpu_sync_fence(sync, f, GFP_KERNEL);
+ if (r)
+ break;
}
- return 0;
+ dma_resv_iter_end(&cursor);
+
+ return r;
+}
+
+/* Free the entry back to the slab */
+static void amdgpu_sync_entry_free(struct amdgpu_sync_entry *e)
+{
+ hash_del(&e->node);
+ dma_fence_put(e->fence);
+ kmem_cache_free(amdgpu_sync_slab, e);
}
/**
@@ -313,9 +325,7 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
if (dma_fence_is_signaled(f)) {
- hash_del(&e->node);
- dma_fence_put(f);
- kmem_cache_free(amdgpu_sync_slab, e);
+ amdgpu_sync_entry_free(e);
continue;
}
if (ring && s_fence) {
@@ -349,6 +359,7 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
struct hlist_node *tmp;
struct dma_fence *f;
int i;
+
hash_for_each_safe(sync->fences, i, tmp, e, node) {
f = e->fence;
@@ -383,19 +394,64 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
hash_for_each_safe(source->fences, i, tmp, e, node) {
f = e->fence;
if (!dma_fence_is_signaled(f)) {
- r = amdgpu_sync_fence(clone, f);
+ r = amdgpu_sync_fence(clone, f, GFP_KERNEL);
if (r)
return r;
} else {
- hash_del(&e->node);
- dma_fence_put(f);
- kmem_cache_free(amdgpu_sync_slab, e);
+ amdgpu_sync_entry_free(e);
}
}
- dma_fence_put(clone->last_vm_update);
- clone->last_vm_update = dma_fence_get(source->last_vm_update);
+ return 0;
+}
+
+/**
+ * amdgpu_sync_move - move all fences from src to dst
+ *
+ * @src: source of the fences, empty after function
+ * @dst: destination for the fences
+ *
+ * Moves all fences from source to destination. All fences in destination are
+ * freed and source is empty after the function call.
+ */
+void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst)
+{
+ unsigned int i;
+
+ amdgpu_sync_free(dst);
+
+ for (i = 0; i < HASH_SIZE(src->fences); ++i)
+ hlist_move_list(&src->fences[i], &dst->fences[i]);
+}
+
+/**
+ * amdgpu_sync_push_to_job - push fences into job
+ * @sync: sync object to get the fences from
+ * @job: job to push the fences into
+ *
+ * Add all unsignaled fences from sync to job.
+ */
+int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job)
+{
+ struct amdgpu_sync_entry *e;
+ struct hlist_node *tmp;
+ struct dma_fence *f;
+ int i, r;
+
+ hash_for_each_safe(sync->fences, i, tmp, e, node) {
+ f = e->fence;
+ if (dma_fence_is_signaled(f)) {
+ amdgpu_sync_entry_free(e);
+ continue;
+ }
+ dma_fence_get(f);
+ r = drm_sched_job_add_dependency(&job->base, f);
+ if (r) {
+ dma_fence_put(f);
+ return r;
+ }
+ }
return 0;
}
@@ -410,9 +466,7 @@ int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)
if (r)
return r;
- hash_del(&e->node);
- dma_fence_put(e->fence);
- kmem_cache_free(amdgpu_sync_slab, e);
+ amdgpu_sync_entry_free(e);
}
return 0;
@@ -429,15 +483,10 @@ void amdgpu_sync_free(struct amdgpu_sync *sync)
{
struct amdgpu_sync_entry *e;
struct hlist_node *tmp;
- unsigned i;
-
- hash_for_each_safe(sync->fences, i, tmp, e, node) {
- hash_del(&e->node);
- dma_fence_put(e->fence);
- kmem_cache_free(amdgpu_sync_slab, e);
- }
+ unsigned int i;
- dma_fence_put(sync->last_vm_update);
+ hash_for_each_safe(sync->fences, i, tmp, e, node)
+ amdgpu_sync_entry_free(e);
}
/**
@@ -447,9 +496,7 @@ void amdgpu_sync_free(struct amdgpu_sync *sync)
*/
int amdgpu_sync_init(void)
{
- amdgpu_sync_slab = kmem_cache_create(
- "amdgpu_sync", sizeof(struct amdgpu_sync_entry), 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ amdgpu_sync_slab = KMEM_CACHE(amdgpu_sync_entry, SLAB_HWCACHE_ALIGN);
if (!amdgpu_sync_slab)
return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
index 7c0fe20c470d..51eb4382c91e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
@@ -30,6 +30,7 @@ struct dma_fence;
struct dma_resv;
struct amdgpu_device;
struct amdgpu_ring;
+struct amdgpu_job;
enum amdgpu_sync_mode {
AMDGPU_SYNC_ALWAYS,
@@ -43,19 +44,21 @@ enum amdgpu_sync_mode {
*/
struct amdgpu_sync {
DECLARE_HASHTABLE(fences, 4);
- struct dma_fence *last_vm_update;
};
void amdgpu_sync_create(struct amdgpu_sync *sync);
-int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f);
-int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence);
+int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
+ gfp_t flags);
int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct dma_resv *resv, enum amdgpu_sync_mode mode,
void *owner);
+int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv);
struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
struct amdgpu_ring *ring);
struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone);
+void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst);
+int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job);
int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
void amdgpu_sync_free(struct amdgpu_sync *sync);
int amdgpu_sync_init(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
deleted file mode 100644
index 909d830b513e..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ /dev/null
@@ -1,250 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 OR MIT
-/*
- * Copyright 2009 VMware, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Michel Dänzer
- */
-
-#include <drm/amdgpu_drm.h>
-#include "amdgpu.h"
-#include "amdgpu_uvd.h"
-#include "amdgpu_vce.h"
-
-/* Test BO GTT->VRAM and VRAM->GTT GPU copies across the whole GTT aperture */
-static void amdgpu_do_test_moves(struct amdgpu_device *adev)
-{
- struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
- struct amdgpu_bo *vram_obj = NULL;
- struct amdgpu_bo **gtt_obj = NULL;
- struct amdgpu_bo_param bp;
- uint64_t gart_addr, vram_addr;
- unsigned n, size;
- int i, r;
-
- size = 1024 * 1024;
-
- /* Number of tests =
- * (Total GTT - gart_pin_size - (2 transfer windows for buffer moves)) / test size
- */
- n = adev->gmc.gart_size - atomic64_read(&adev->gart_pin_size);
- n -= AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS *
- AMDGPU_GPU_PAGE_SIZE;
- n /= size;
-
- gtt_obj = kcalloc(n, sizeof(*gtt_obj), GFP_KERNEL);
- if (!gtt_obj) {
- DRM_ERROR("Failed to allocate %d pointers\n", n);
- r = 1;
- goto out_cleanup;
- }
- memset(&bp, 0, sizeof(bp));
- bp.size = size;
- bp.byte_align = PAGE_SIZE;
- bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
- bp.flags = 0;
- bp.type = ttm_bo_type_kernel;
- bp.resv = NULL;
- bp.bo_ptr_size = sizeof(struct amdgpu_bo);
-
- r = amdgpu_bo_create(adev, &bp, &vram_obj);
- if (r) {
- DRM_ERROR("Failed to create VRAM object\n");
- goto out_cleanup;
- }
- r = amdgpu_bo_reserve(vram_obj, false);
- if (unlikely(r != 0))
- goto out_unref;
- r = amdgpu_bo_pin(vram_obj, AMDGPU_GEM_DOMAIN_VRAM);
- if (r) {
- DRM_ERROR("Failed to pin VRAM object\n");
- goto out_unres;
- }
- vram_addr = amdgpu_bo_gpu_offset(vram_obj);
- for (i = 0; i < n; i++) {
- void *gtt_map, *vram_map;
- void **gart_start, **gart_end;
- void **vram_start, **vram_end;
- struct dma_fence *fence = NULL;
-
- bp.domain = AMDGPU_GEM_DOMAIN_GTT;
- r = amdgpu_bo_create(adev, &bp, gtt_obj + i);
- if (r) {
- DRM_ERROR("Failed to create GTT object %d\n", i);
- goto out_lclean;
- }
-
- r = amdgpu_bo_reserve(gtt_obj[i], false);
- if (unlikely(r != 0))
- goto out_lclean_unref;
- r = amdgpu_bo_pin(gtt_obj[i], AMDGPU_GEM_DOMAIN_GTT);
- if (r) {
- DRM_ERROR("Failed to pin GTT object %d\n", i);
- goto out_lclean_unres;
- }
- r = amdgpu_ttm_alloc_gart(&gtt_obj[i]->tbo);
- if (r) {
- DRM_ERROR("%p bind failed\n", gtt_obj[i]);
- goto out_lclean_unpin;
- }
- gart_addr = amdgpu_bo_gpu_offset(gtt_obj[i]);
-
- r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map);
- if (r) {
- DRM_ERROR("Failed to map GTT object %d\n", i);
- goto out_lclean_unpin;
- }
-
- for (gart_start = gtt_map, gart_end = gtt_map + size;
- gart_start < gart_end;
- gart_start++)
- *gart_start = gart_start;
-
- amdgpu_bo_kunmap(gtt_obj[i]);
-
- r = amdgpu_copy_buffer(ring, gart_addr, vram_addr,
- size, NULL, &fence, false, false, false);
-
- if (r) {
- DRM_ERROR("Failed GTT->VRAM copy %d\n", i);
- goto out_lclean_unpin;
- }
-
- r = dma_fence_wait(fence, false);
- if (r) {
- DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i);
- goto out_lclean_unpin;
- }
-
- dma_fence_put(fence);
- fence = NULL;
-
- r = amdgpu_bo_kmap(vram_obj, &vram_map);
- if (r) {
- DRM_ERROR("Failed to map VRAM object after copy %d\n", i);
- goto out_lclean_unpin;
- }
-
- for (gart_start = gtt_map, gart_end = gtt_map + size,
- vram_start = vram_map, vram_end = vram_map + size;
- vram_start < vram_end;
- gart_start++, vram_start++) {
- if (*vram_start != gart_start) {
- DRM_ERROR("Incorrect GTT->VRAM copy %d: Got 0x%p, "
- "expected 0x%p (GTT/VRAM offset "
- "0x%16llx/0x%16llx)\n",
- i, *vram_start, gart_start,
- (unsigned long long)
- (gart_addr - adev->gmc.gart_start +
- (void *)gart_start - gtt_map),
- (unsigned long long)
- (vram_addr - adev->gmc.vram_start +
- (void *)gart_start - gtt_map));
- amdgpu_bo_kunmap(vram_obj);
- goto out_lclean_unpin;
- }
- *vram_start = vram_start;
- }
-
- amdgpu_bo_kunmap(vram_obj);
-
- r = amdgpu_copy_buffer(ring, vram_addr, gart_addr,
- size, NULL, &fence, false, false, false);
-
- if (r) {
- DRM_ERROR("Failed VRAM->GTT copy %d\n", i);
- goto out_lclean_unpin;
- }
-
- r = dma_fence_wait(fence, false);
- if (r) {
- DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i);
- goto out_lclean_unpin;
- }
-
- dma_fence_put(fence);
- fence = NULL;
-
- r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map);
- if (r) {
- DRM_ERROR("Failed to map GTT object after copy %d\n", i);
- goto out_lclean_unpin;
- }
-
- for (gart_start = gtt_map, gart_end = gtt_map + size,
- vram_start = vram_map, vram_end = vram_map + size;
- gart_start < gart_end;
- gart_start++, vram_start++) {
- if (*gart_start != vram_start) {
- DRM_ERROR("Incorrect VRAM->GTT copy %d: Got 0x%p, "
- "expected 0x%p (VRAM/GTT offset "
- "0x%16llx/0x%16llx)\n",
- i, *gart_start, vram_start,
- (unsigned long long)
- (vram_addr - adev->gmc.vram_start +
- (void *)vram_start - vram_map),
- (unsigned long long)
- (gart_addr - adev->gmc.gart_start +
- (void *)vram_start - vram_map));
- amdgpu_bo_kunmap(gtt_obj[i]);
- goto out_lclean_unpin;
- }
- }
-
- amdgpu_bo_kunmap(gtt_obj[i]);
-
- DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 0x%llx\n",
- gart_addr - adev->gmc.gart_start);
- continue;
-
-out_lclean_unpin:
- amdgpu_bo_unpin(gtt_obj[i]);
-out_lclean_unres:
- amdgpu_bo_unreserve(gtt_obj[i]);
-out_lclean_unref:
- amdgpu_bo_unref(&gtt_obj[i]);
-out_lclean:
- for (--i; i >= 0; --i) {
- amdgpu_bo_unpin(gtt_obj[i]);
- amdgpu_bo_unreserve(gtt_obj[i]);
- amdgpu_bo_unref(&gtt_obj[i]);
- }
- if (fence)
- dma_fence_put(fence);
- break;
- }
-
- amdgpu_bo_unpin(vram_obj);
-out_unres:
- amdgpu_bo_unreserve(vram_obj);
-out_unref:
- amdgpu_bo_unref(&vram_obj);
-out_cleanup:
- kfree(gtt_obj);
- if (r) {
- pr_warn("Error while testing BO move\n");
- }
-}
-
-void amdgpu_test_moves(struct amdgpu_device *adev)
-{
- if (adev->mman.buffer_funcs)
- amdgpu_do_test_moves(adev);
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index d855cb53c7e0..d13e64a69e25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -127,7 +127,7 @@ TRACE_EVENT(amdgpu_bo_create,
TP_fast_assign(
__entry->bo = bo;
- __entry->pages = bo->tbo.resource->num_pages;
+ __entry->pages = PFN_UP(bo->tbo.resource->size);
__entry->type = bo->tbo.resource->mem_type;
__entry->prefer = bo->preferred_domains;
__entry->allow = bo->allowed_domains;
@@ -140,8 +140,10 @@ TRACE_EVENT(amdgpu_bo_create,
);
TRACE_EVENT(amdgpu_cs,
- TP_PROTO(struct amdgpu_cs_parser *p, int i),
- TP_ARGS(p, i),
+ TP_PROTO(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib),
+ TP_ARGS(p, job, ib),
TP_STRUCT__entry(
__field(struct amdgpu_bo_list *, bo_list)
__field(u32, ring)
@@ -151,10 +153,10 @@ TRACE_EVENT(amdgpu_cs,
TP_fast_assign(
__entry->bo_list = p->bo_list;
- __entry->ring = to_amdgpu_ring(p->entity->rq->sched)->idx;
- __entry->dw = p->job->ibs[i].length_dw;
+ __entry->ring = to_amdgpu_ring(job->base.entity->rq->sched)->idx;
+ __entry->dw = ib->length_dw;
__entry->fences = amdgpu_fence_count_emitted(
- to_amdgpu_ring(p->entity->rq->sched));
+ to_amdgpu_ring(job->base.entity->rq->sched));
),
TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u",
__entry->bo_list, __entry->ring, __entry->dw,
@@ -165,25 +167,23 @@ TRACE_EVENT(amdgpu_cs_ioctl,
TP_PROTO(struct amdgpu_job *job),
TP_ARGS(job),
TP_STRUCT__entry(
- __field(uint64_t, sched_job_id)
__string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
- __field(unsigned int, context)
- __field(unsigned int, seqno)
+ __field(u64, context)
+ __field(u64, seqno)
__field(struct dma_fence *, fence)
__string(ring, to_amdgpu_ring(job->base.sched)->name)
__field(u32, num_ibs)
),
TP_fast_assign(
- __entry->sched_job_id = job->base.id;
- __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job));
+ __assign_str(timeline);
__entry->context = job->base.s_fence->finished.context;
__entry->seqno = job->base.s_fence->finished.seqno;
- __assign_str(ring, to_amdgpu_ring(job->base.sched)->name);
+ __assign_str(ring);
__entry->num_ibs = job->num_ibs;
),
- TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
- __entry->sched_job_id, __get_str(timeline), __entry->context,
+ TP_printk("timeline=%s, fence=%llu:%llu, ring_name=%s, num_ibs=%u",
+ __get_str(timeline), __entry->context,
__entry->seqno, __get_str(ring), __entry->num_ibs)
);
@@ -191,24 +191,22 @@ TRACE_EVENT(amdgpu_sched_run_job,
TP_PROTO(struct amdgpu_job *job),
TP_ARGS(job),
TP_STRUCT__entry(
- __field(uint64_t, sched_job_id)
__string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
- __field(unsigned int, context)
- __field(unsigned int, seqno)
+ __field(u64, context)
+ __field(u64, seqno)
__string(ring, to_amdgpu_ring(job->base.sched)->name)
__field(u32, num_ibs)
),
TP_fast_assign(
- __entry->sched_job_id = job->base.id;
- __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job));
+ __assign_str(timeline);
__entry->context = job->base.s_fence->finished.context;
__entry->seqno = job->base.s_fence->finished.seqno;
- __assign_str(ring, to_amdgpu_ring(job->base.sched)->name);
+ __assign_str(ring);
__entry->num_ibs = job->num_ibs;
),
- TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
- __entry->sched_job_id, __get_str(timeline), __entry->context,
+ TP_printk("timeline=%s, fence=%llu:%llu, ring_name=%s, num_ibs=%u",
+ __get_str(timeline), __entry->context,
__entry->seqno, __get_str(ring), __entry->num_ibs)
);
@@ -229,9 +227,9 @@ TRACE_EVENT(amdgpu_vm_grab_id,
TP_fast_assign(
__entry->pasid = vm->pasid;
- __assign_str(ring, ring->name);
+ __assign_str(ring);
__entry->vmid = job->vmid;
- __entry->vm_hub = ring->funcs->vmhub,
+ __entry->vm_hub = ring->vm_hub,
__entry->pd_addr = job->vm_pd_addr;
__entry->needs_flush = job->vm_needs_flush;
),
@@ -358,11 +356,10 @@ TRACE_EVENT(amdgpu_vm_update_ptes,
}
),
TP_printk("pid:%u vm_ctx:0x%llx start:0x%010llx end:0x%010llx,"
- " flags:0x%llx, incr:%llu, dst:\n%s%s", __entry->pid,
+ " flags:0x%llx, incr:%llu, dst:\n%s", __entry->pid,
__entry->vm_ctx, __entry->start, __entry->end,
__entry->flags, __entry->incr, __print_array(
- __get_dynamic_array(dst), min(__entry->nptes, 32u), 8),
- __entry->nptes > 32 ? "..." : "")
+ __get_dynamic_array(dst), __entry->nptes, 8))
);
TRACE_EVENT(amdgpu_vm_set_ptes,
@@ -424,14 +421,14 @@ TRACE_EVENT(amdgpu_vm_flush,
),
TP_fast_assign(
- __assign_str(ring, ring->name);
+ __assign_str(ring);
__entry->vmid = vmid;
- __entry->vm_hub = ring->funcs->vmhub;
+ __entry->vm_hub = ring->vm_hub;
__entry->pd_addr = pd_addr;
),
TP_printk("ring=%s, id=%u, hub=%u, pd_addr=%010Lx",
__get_str(ring), __entry->vmid,
- __entry->vm_hub,__entry->pd_addr)
+ __entry->vm_hub, __entry->pd_addr)
);
DECLARE_EVENT_CLASS(amdgpu_pasid,
@@ -456,6 +453,38 @@ DEFINE_EVENT(amdgpu_pasid, amdgpu_pasid_freed,
TP_ARGS(pasid)
);
+TRACE_EVENT(amdgpu_isolation,
+ TP_PROTO(void *prev, void *next),
+ TP_ARGS(prev, next),
+ TP_STRUCT__entry(
+ __field(void *, prev)
+ __field(void *, next)
+ ),
+
+ TP_fast_assign(
+ __entry->prev = prev;
+ __entry->next = next;
+ ),
+ TP_printk("prev=%p, next=%p",
+ __entry->prev,
+ __entry->next)
+);
+
+TRACE_EVENT(amdgpu_cleaner_shader,
+ TP_PROTO(struct amdgpu_ring *ring, struct dma_fence *fence),
+ TP_ARGS(ring, fence),
+ TP_STRUCT__entry(
+ __string(ring, ring->name)
+ __field(u64, seqno)
+ ),
+
+ TP_fast_assign(
+ __assign_str(ring);
+ __entry->seqno = fence->seqno;
+ ),
+ TP_printk("ring=%s, seqno=%Lu", __get_str(ring), __entry->seqno)
+);
+
TRACE_EVENT(amdgpu_bo_list_set,
TP_PROTO(struct amdgpu_bo_list *list, struct amdgpu_bo *bo),
TP_ARGS(list, bo),
@@ -493,7 +522,7 @@ TRACE_EVENT(amdgpu_cs_bo_status,
);
TRACE_EVENT(amdgpu_bo_move,
- TP_PROTO(struct amdgpu_bo* bo, uint32_t new_placement, uint32_t old_placement),
+ TP_PROTO(struct amdgpu_bo *bo, uint32_t new_placement, uint32_t old_placement),
TP_ARGS(bo, new_placement, old_placement),
TP_STRUCT__entry(
__field(struct amdgpu_bo *, bo)
@@ -518,23 +547,35 @@ TRACE_EVENT(amdgpu_ib_pipe_sync,
TP_ARGS(sched_job, fence),
TP_STRUCT__entry(
__string(ring, sched_job->base.sched->name)
- __field(uint64_t, id)
__field(struct dma_fence *, fence)
- __field(uint64_t, ctx)
- __field(unsigned, seqno)
+ __field(u64, ctx)
+ __field(u64, seqno)
),
TP_fast_assign(
- __assign_str(ring, sched_job->base.sched->name);
- __entry->id = sched_job->base.id;
+ __assign_str(ring);
__entry->fence = fence;
__entry->ctx = fence->context;
__entry->seqno = fence->seqno;
),
- TP_printk("job ring=%s, id=%llu, need pipe sync to fence=%p, context=%llu, seq=%u",
- __get_str(ring), __entry->id,
- __entry->fence, __entry->ctx,
- __entry->seqno)
+ TP_printk("job ring=%s need pipe sync to fence=%llu:%llu",
+ __get_str(ring), __entry->ctx, __entry->seqno)
+);
+
+TRACE_EVENT(amdgpu_reset_reg_dumps,
+ TP_PROTO(uint32_t address, uint32_t value),
+ TP_ARGS(address, value),
+ TP_STRUCT__entry(
+ __field(uint32_t, address)
+ __field(uint32_t, value)
+ ),
+ TP_fast_assign(
+ __entry->address = address;
+ __entry->value = value;
+ ),
+ TP_printk("amdgpu register dump 0x%x: 0x%x",
+ __entry->address,
+ __entry->value)
);
#undef AMDGPU_JOB_GET_TIMELINE_NAME
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c
index 57c6c39ba064..b96d885f6e33 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c
@@ -23,6 +23,7 @@
*/
#include <drm/amdgpu_drm.h>
+#include "amdgpu_cs.h"
#include "amdgpu.h"
#define CREATE_TRACE_POINTS
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c875f1cdd2af..2b931e855abd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -38,15 +38,15 @@
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/swap.h>
-#include <linux/swiotlb.h>
#include <linux/dma-buf.h>
#include <linux/sizes.h>
#include <linux/module.h>
-#include <drm/ttm/ttm_bo_api.h>
-#include <drm/ttm/ttm_bo_driver.h>
+#include <drm/drm_drv.h>
+#include <drm/ttm/ttm_bo.h>
#include <drm/ttm/ttm_placement.h>
#include <drm/ttm/ttm_range_manager.h>
+#include <drm/ttm/ttm_tt.h>
#include <drm/amdgpu_drm.h>
@@ -56,13 +56,14 @@
#include "amdgpu_amdkfd.h"
#include "amdgpu_sdma.h"
#include "amdgpu_ras.h"
+#include "amdgpu_hmm.h"
#include "amdgpu_atomfirmware.h"
#include "amdgpu_res_cursor.h"
#include "bif/bif_4_1_d.h"
-MODULE_IMPORT_NS(DMA_BUF);
+MODULE_IMPORT_NS("DMA_BUF");
-#define AMDGPU_TTM_VRAM_MAX_DW_READ (size_t)128
+#define AMDGPU_TTM_VRAM_MAX_DW_READ ((size_t)128)
static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
struct ttm_tt *ttm,
@@ -101,32 +102,19 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
/* Don't handle scatter gather BOs */
if (bo->type == ttm_bo_type_sg) {
placement->num_placement = 0;
- placement->num_busy_placement = 0;
return;
}
/* Object isn't an AMDGPU object so ignore */
if (!amdgpu_bo_is_amdgpu_bo(bo)) {
placement->placement = &placements;
- placement->busy_placement = &placements;
placement->num_placement = 1;
- placement->num_busy_placement = 1;
return;
}
abo = ttm_to_amdgpu_bo(bo);
- if (abo->flags & AMDGPU_AMDKFD_CREATE_SVM_BO) {
- struct dma_fence *fence;
- struct dma_resv *resv = &bo->base._resv;
-
- rcu_read_lock();
- fence = rcu_dereference(resv->fence_excl);
- if (fence && !fence->ops->signaled)
- dma_fence_enable_sw_signaling(fence);
-
+ if (abo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) {
placement->num_placement = 0;
- placement->num_busy_placement = 0;
- rcu_read_unlock();
return;
}
@@ -134,17 +122,19 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
case AMDGPU_PL_GDS:
case AMDGPU_PL_GWS:
case AMDGPU_PL_OA:
+ case AMDGPU_PL_DOORBELL:
+ case AMDGPU_PL_MMIO_REMAP:
placement->num_placement = 0;
- placement->num_busy_placement = 0;
return;
case TTM_PL_VRAM:
if (!adev->mman.buffer_funcs_enabled) {
/* Move to system memory */
amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
+
} else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
!(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
- amdgpu_bo_in_cpu_visible_vram(abo)) {
+ amdgpu_res_cpu_visible(adev, bo->resource)) {
/* Try evicting to the CPU inaccessible part of VRAM
* first, but only set GTT as busy placement, so this
@@ -156,8 +146,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
AMDGPU_GEM_DOMAIN_CPU);
abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
abo->placements[0].lpfn = 0;
- abo->placement.busy_placement = &abo->placements[1];
- abo->placement.num_busy_placement = 1;
+ abo->placements[0].flags |= TTM_PL_FLAG_DESIRED;
} else {
/* Move to GTT memory */
amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT |
@@ -178,10 +167,10 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
* @bo: buffer object to map
* @mem: memory object to map
* @mm_cur: range to map
- * @num_pages: number of pages to map
* @window: which GART window to use
* @ring: DMA ring to use for the copy
* @tmz: if we should setup a TMZ enabled mapping
+ * @size: in number of bytes to map, out number of bytes mapped
* @addr: resulting address inside the MC address space
*
* Setup one of the GART windows to access a specific piece of memory or return
@@ -190,23 +179,22 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
struct ttm_resource *mem,
struct amdgpu_res_cursor *mm_cur,
- unsigned num_pages, unsigned window,
- struct amdgpu_ring *ring, bool tmz,
- uint64_t *addr)
+ unsigned int window, struct amdgpu_ring *ring,
+ bool tmz, uint64_t *size, uint64_t *addr)
{
struct amdgpu_device *adev = ring->adev;
- struct amdgpu_job *job;
- unsigned num_dw, num_bytes;
- struct dma_fence *fence;
+ unsigned int offset, num_pages, num_dw, num_bytes;
uint64_t src_addr, dst_addr;
+ struct amdgpu_job *job;
void *cpu_addr;
uint64_t flags;
- unsigned int i;
int r;
BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
- BUG_ON(mem->mem_type == AMDGPU_PL_PREEMPT);
+
+ if (WARN_ON(mem->mem_type == AMDGPU_PL_PREEMPT))
+ return -EINVAL;
/* Map only what can't be accessed directly */
if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
@@ -215,16 +203,31 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
return 0;
}
+
+ /*
+ * If start begins at an offset inside the page, then adjust the size
+ * and addr accordingly
+ */
+ offset = mm_cur->start & ~PAGE_MASK;
+
+ num_pages = PFN_UP(*size + offset);
+ num_pages = min_t(uint32_t, num_pages, AMDGPU_GTT_MAX_TRANSFER_SIZE);
+
+ *size = min(*size, (uint64_t)num_pages * PAGE_SIZE - offset);
+
*addr = adev->gmc.gart_start;
*addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
AMDGPU_GPU_PAGE_SIZE;
- *addr += mm_cur->start & ~PAGE_MASK;
+ *addr += offset;
num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
- r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
- AMDGPU_IB_POOL_DELAYED, &job);
+ r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
+ AMDGPU_FENCE_OWNER_UNDEFINED,
+ num_dw * 4 + num_bytes,
+ AMDGPU_IB_POOL_DELAYED, &job,
+ AMDGPU_KERNEL_JOB_ID_TTM_MAP_BUFFER);
if (r)
return r;
@@ -234,7 +237,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
- dst_addr, num_bytes, false);
+ dst_addr, num_bytes, 0);
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);
@@ -249,38 +252,15 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
dma_addr_t *dma_addr;
dma_addr = &bo->ttm->dma_address[mm_cur->start >> PAGE_SHIFT];
- r = amdgpu_gart_map(adev, 0, num_pages, dma_addr, flags,
- cpu_addr);
- if (r)
- goto error_free;
+ amdgpu_gart_map(adev, 0, num_pages, dma_addr, flags, cpu_addr);
} else {
- dma_addr_t dma_address;
+ u64 pa = mm_cur->start + adev->vm_manager.vram_base_offset;
- dma_address = mm_cur->start;
- dma_address += adev->vm_manager.vram_base_offset;
-
- for (i = 0; i < num_pages; ++i) {
- r = amdgpu_gart_map(adev, i << PAGE_SHIFT, 1,
- &dma_address, flags, cpu_addr);
- if (r)
- goto error_free;
-
- dma_address += PAGE_SIZE;
- }
+ amdgpu_gart_map_vram_range(adev, pa, 0, num_pages, flags, cpu_addr);
}
- r = amdgpu_job_submit(job, &adev->mman.entity,
- AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
- if (r)
- goto error_free;
-
- dma_fence_put(fence);
-
- return r;
-
-error_free:
- amdgpu_job_free(job);
- return r;
+ dma_fence_put(amdgpu_job_submit(job));
+ return 0;
}
/**
@@ -298,23 +278,24 @@ error_free:
* move and different for a BO to BO copy.
*
*/
-int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
- const struct amdgpu_copy_mem *src,
- const struct amdgpu_copy_mem *dst,
- uint64_t size, bool tmz,
- struct dma_resv *resv,
- struct dma_fence **f)
+__attribute__((nonnull))
+static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
+ const struct amdgpu_copy_mem *src,
+ const struct amdgpu_copy_mem *dst,
+ uint64_t size, bool tmz,
+ struct dma_resv *resv,
+ struct dma_fence **f)
{
- const uint32_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
- AMDGPU_GPU_PAGE_SIZE);
-
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
struct amdgpu_res_cursor src_mm, dst_mm;
struct dma_fence *fence = NULL;
int r = 0;
+ uint32_t copy_flags = 0;
+ struct amdgpu_bo *abo_src, *abo_dst;
if (!adev->mman.buffer_funcs_enabled) {
- DRM_ERROR("Trying to move memory with ring turned off.\n");
+ dev_err(adev->dev,
+ "Trying to move memory with ring turned off.\n");
return -EINVAL;
}
@@ -323,34 +304,49 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
mutex_lock(&adev->mman.gtt_window_lock);
while (src_mm.remaining) {
- uint32_t src_page_offset = src_mm.start & ~PAGE_MASK;
- uint32_t dst_page_offset = dst_mm.start & ~PAGE_MASK;
+ uint64_t from, to, cur_size, tiling_flags;
+ uint32_t num_type, data_format, max_com, write_compress_disable;
struct dma_fence *next;
- uint32_t cur_size;
- uint64_t from, to;
- /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
- * begins at an offset, then adjust the size accordingly
- */
- cur_size = max(src_page_offset, dst_page_offset);
- cur_size = min(min3(src_mm.size, dst_mm.size, size),
- (uint64_t)(GTT_MAX_BYTES - cur_size));
+ /* Never copy more than 256MiB at once to avoid a timeout */
+ cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20);
/* Map src to window 0 and dst to window 1. */
r = amdgpu_ttm_map_buffer(src->bo, src->mem, &src_mm,
- PFN_UP(cur_size + src_page_offset),
- 0, ring, tmz, &from);
+ 0, ring, tmz, &cur_size, &from);
if (r)
goto error;
r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, &dst_mm,
- PFN_UP(cur_size + dst_page_offset),
- 1, ring, tmz, &to);
+ 1, ring, tmz, &cur_size, &to);
if (r)
goto error;
- r = amdgpu_copy_buffer(ring, from, to, cur_size,
- resv, &next, false, true, tmz);
+ abo_src = ttm_to_amdgpu_bo(src->bo);
+ abo_dst = ttm_to_amdgpu_bo(dst->bo);
+ if (tmz)
+ copy_flags |= AMDGPU_COPY_FLAGS_TMZ;
+ if ((abo_src->flags & AMDGPU_GEM_CREATE_GFX12_DCC) &&
+ (abo_src->tbo.resource->mem_type == TTM_PL_VRAM))
+ copy_flags |= AMDGPU_COPY_FLAGS_READ_DECOMPRESSED;
+ if ((abo_dst->flags & AMDGPU_GEM_CREATE_GFX12_DCC) &&
+ (dst->mem->mem_type == TTM_PL_VRAM)) {
+ copy_flags |= AMDGPU_COPY_FLAGS_WRITE_COMPRESSED;
+ amdgpu_bo_get_tiling_flags(abo_dst, &tiling_flags);
+ max_com = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_MAX_COMPRESSED_BLOCK);
+ num_type = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_NUMBER_TYPE);
+ data_format = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_DATA_FORMAT);
+ write_compress_disable =
+ AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_WRITE_COMPRESS_DISABLE);
+ copy_flags |= (AMDGPU_COPY_FLAGS_SET(MAX_COMPRESSED, max_com) |
+ AMDGPU_COPY_FLAGS_SET(NUMBER_TYPE, num_type) |
+ AMDGPU_COPY_FLAGS_SET(DATA_FORMAT, data_format) |
+ AMDGPU_COPY_FLAGS_SET(WRITE_COMPRESS_DISABLE,
+ write_compress_disable));
+ }
+
+ r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
+ &next, false, true, copy_flags);
if (r)
goto error;
@@ -362,9 +358,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
}
error:
mutex_unlock(&adev->mman.gtt_window_lock);
- if (f)
- *f = dma_fence_get(fence);
- dma_fence_put(fence);
+ *f = fence;
return r;
}
@@ -393,7 +387,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
dst.offset = 0;
r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
- new_mem->num_pages << PAGE_SHIFT,
+ new_mem->size,
amdgpu_bo_encrypted(abo),
bo->base.resv, &fence);
if (r)
@@ -404,11 +398,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
struct dma_fence *wipe_fence = NULL;
- r = amdgpu_fill_buffer(ttm_to_amdgpu_bo(bo), AMDGPU_POISON,
- NULL, &wipe_fence);
+ r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
+ false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
if (r) {
goto error;
} else if (wipe_fence) {
+ amdgpu_vram_mgr_set_cleared(bo->resource);
dma_fence_put(fence);
fence = wipe_fence;
}
@@ -429,30 +424,56 @@ error:
return r;
}
-/*
- * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy
+/**
+ * amdgpu_res_cpu_visible - Check that resource can be accessed by CPU
+ * @adev: amdgpu device
+ * @res: the resource to check
*
- * Called by amdgpu_bo_move()
+ * Returns: true if the full resource is CPU visible, false otherwise.
*/
-static bool amdgpu_mem_visible(struct amdgpu_device *adev,
- struct ttm_resource *mem)
+bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
+ struct ttm_resource *res)
{
- uint64_t mem_size = (u64)mem->num_pages << PAGE_SHIFT;
struct amdgpu_res_cursor cursor;
- if (mem->mem_type == TTM_PL_SYSTEM ||
- mem->mem_type == TTM_PL_TT)
+ if (!res)
+ return false;
+
+ if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT ||
+ res->mem_type == AMDGPU_PL_PREEMPT || res->mem_type == AMDGPU_PL_DOORBELL ||
+ res->mem_type == AMDGPU_PL_MMIO_REMAP)
return true;
- if (mem->mem_type != TTM_PL_VRAM)
+
+ if (res->mem_type != TTM_PL_VRAM)
return false;
- amdgpu_res_first(mem, 0, mem_size, &cursor);
+ amdgpu_res_first(res, 0, res->size, &cursor);
+ while (cursor.remaining) {
+ if ((cursor.start + cursor.size) > adev->gmc.visible_vram_size)
+ return false;
+ amdgpu_res_next(&cursor, cursor.size);
+ }
+
+ return true;
+}
+
+/*
+ * amdgpu_res_copyable - Check that memory can be accessed by ttm_bo_move_memcpy
+ *
+ * Called by amdgpu_bo_move()
+ */
+static bool amdgpu_res_copyable(struct amdgpu_device *adev,
+ struct ttm_resource *mem)
+{
+ if (!amdgpu_res_cpu_visible(adev, mem))
+ return false;
/* ttm_resource_ioremap only supports contiguous memory */
- if (cursor.size != mem_size)
+ if (mem->mem_type == TTM_PL_VRAM &&
+ !(mem->placement & TTM_PL_FLAG_CONTIGUOUS))
return false;
- return cursor.start + cursor.size <= adev->gmc.visible_vram_size;
+ return true;
}
/*
@@ -477,22 +498,21 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
return r;
}
- /* Can't move a pinned BO */
abo = ttm_to_amdgpu_bo(bo);
- if (WARN_ON_ONCE(abo->tbo.pin_count > 0))
- return -EINVAL;
-
adev = amdgpu_ttm_adev(bo->bdev);
- if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
+ if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM &&
+ bo->ttm == NULL)) {
+ amdgpu_bo_move_notify(bo, evict, new_mem);
ttm_bo_move_null(bo, new_mem);
- goto out;
+ return 0;
}
if (old_mem->mem_type == TTM_PL_SYSTEM &&
(new_mem->mem_type == TTM_PL_TT ||
new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
+ amdgpu_bo_move_notify(bo, evict, new_mem);
ttm_bo_move_null(bo, new_mem);
- goto out;
+ return 0;
}
if ((old_mem->mem_type == TTM_PL_TT ||
old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
@@ -502,20 +522,26 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
return r;
amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm);
+ amdgpu_bo_move_notify(bo, evict, new_mem);
ttm_resource_free(bo, &bo->resource);
ttm_bo_assign_mem(bo, new_mem);
- goto out;
+ return 0;
}
if (old_mem->mem_type == AMDGPU_PL_GDS ||
old_mem->mem_type == AMDGPU_PL_GWS ||
old_mem->mem_type == AMDGPU_PL_OA ||
+ old_mem->mem_type == AMDGPU_PL_DOORBELL ||
+ old_mem->mem_type == AMDGPU_PL_MMIO_REMAP ||
new_mem->mem_type == AMDGPU_PL_GDS ||
new_mem->mem_type == AMDGPU_PL_GWS ||
- new_mem->mem_type == AMDGPU_PL_OA) {
+ new_mem->mem_type == AMDGPU_PL_OA ||
+ new_mem->mem_type == AMDGPU_PL_DOORBELL ||
+ new_mem->mem_type == AMDGPU_PL_MMIO_REMAP) {
/* Nothing to save here */
+ amdgpu_bo_move_notify(bo, evict, new_mem);
ttm_bo_move_null(bo, new_mem);
- goto out;
+ return 0;
}
if (bo->type == ttm_bo_type_device &&
@@ -527,27 +553,28 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
}
- if (adev->mman.buffer_funcs_enabled) {
- if (((old_mem->mem_type == TTM_PL_SYSTEM &&
- new_mem->mem_type == TTM_PL_VRAM) ||
- (old_mem->mem_type == TTM_PL_VRAM &&
- new_mem->mem_type == TTM_PL_SYSTEM))) {
- hop->fpfn = 0;
- hop->lpfn = 0;
- hop->mem_type = TTM_PL_TT;
- hop->flags = TTM_PL_FLAG_TEMPORARY;
- return -EMULTIHOP;
- }
+ if (adev->mman.buffer_funcs_enabled &&
+ ((old_mem->mem_type == TTM_PL_SYSTEM &&
+ new_mem->mem_type == TTM_PL_VRAM) ||
+ (old_mem->mem_type == TTM_PL_VRAM &&
+ new_mem->mem_type == TTM_PL_SYSTEM))) {
+ hop->fpfn = 0;
+ hop->lpfn = 0;
+ hop->mem_type = TTM_PL_TT;
+ hop->flags = TTM_PL_FLAG_TEMPORARY;
+ return -EMULTIHOP;
+ }
+ amdgpu_bo_move_notify(bo, evict, new_mem);
+ if (adev->mman.buffer_funcs_enabled)
r = amdgpu_move_blit(bo, evict, new_mem, old_mem);
- } else {
+ else
r = -ENODEV;
- }
if (r) {
/* Check that all memory is CPU accessible */
- if (!amdgpu_mem_visible(adev, old_mem) ||
- !amdgpu_mem_visible(adev, new_mem)) {
+ if (!amdgpu_res_copyable(adev, old_mem) ||
+ !amdgpu_res_copyable(adev, new_mem)) {
pr_err("Move buffer fallback to memcpy unavailable\n");
return r;
}
@@ -557,10 +584,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
return r;
}
-out:
- /* update statistics */
+ /* update statistics after the move */
+ if (evict)
+ atomic64_inc(&adev->num_evictions);
atomic64_add(bo->base.size, &adev->num_bytes_moved);
- amdgpu_bo_move_notify(bo, evict, new_mem);
return 0;
}
@@ -573,7 +600,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
struct ttm_resource *mem)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- size_t bus_size = (size_t)mem->num_pages << PAGE_SHIFT;
switch (mem->mem_type) {
case TTM_PL_SYSTEM:
@@ -584,9 +610,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
break;
case TTM_PL_VRAM:
mem->bus.offset = mem->start << PAGE_SHIFT;
- /* check if it's visible */
- if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size)
- return -EINVAL;
if (adev->mman.aper_base_kaddr &&
mem->placement & TTM_PL_FLAG_CONTIGUOUS)
@@ -596,6 +619,18 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
mem->bus.offset += adev->gmc.aper_base;
mem->bus.is_iomem = true;
break;
+ case AMDGPU_PL_DOORBELL:
+ mem->bus.offset = mem->start << PAGE_SHIFT;
+ mem->bus.offset += adev->doorbell.base;
+ mem->bus.is_iomem = true;
+ mem->bus.caching = ttm_uncached;
+ break;
+ case AMDGPU_PL_MMIO_REMAP:
+ mem->bus.offset = mem->start << PAGE_SHIFT;
+ mem->bus.offset += adev->rmmio_remap.bus_addr;
+ mem->bus.is_iomem = true;
+ mem->bus.caching = ttm_uncached;
+ break;
default:
return -EINVAL;
}
@@ -610,6 +645,12 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
amdgpu_res_first(bo->resource, (u64)page_offset << PAGE_SHIFT, 0,
&cursor);
+
+ if (bo->resource->mem_type == AMDGPU_PL_DOORBELL)
+ return ((uint64_t)(adev->doorbell.base + cursor.start)) >> PAGE_SHIFT;
+ else if (bo->resource->mem_type == AMDGPU_PL_MMIO_REMAP)
+ return ((uint64_t)(adev->rmmio_remap.bus_addr + cursor.start)) >> PAGE_SHIFT;
+
return (adev->gmc.aper_base + cursor.start) >> PAGE_SHIFT;
}
@@ -645,23 +686,25 @@ struct amdgpu_ttm_tt {
struct task_struct *usertask;
uint32_t userflags;
bool bound;
-#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
- struct hmm_range *range;
-#endif
+ int32_t pool_id;
};
+#define ttm_to_amdgpu_ttm_tt(ptr) container_of(ptr, struct amdgpu_ttm_tt, ttm)
+
#ifdef CONFIG_DRM_AMDGPU_USERPTR
/*
* amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
* memory and start HMM tracking CPU page table update
*
* Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
- * once afterwards to stop HMM tracking
+ * once afterwards to stop HMM tracking. Its the caller responsibility to ensure
+ * that range is a valid memory and it is freed too.
*/
-int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
+int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
+ struct amdgpu_hmm_range *range)
{
struct ttm_tt *ttm = bo->tbo.ttm;
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
unsigned long start = gtt->userptr;
struct vm_area_struct *vma;
struct mm_struct *mm;
@@ -674,10 +717,6 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
return -EFAULT;
}
- /* Another get_user_pages is running at the same time?? */
- if (WARN_ON(gtt->range))
- return -EFAULT;
-
if (!mmget_not_zero(mm)) /* Happens during process shutdown */
return -ESRCH;
@@ -694,9 +733,8 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
}
readonly = amdgpu_ttm_tt_is_readonly(ttm);
- r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start,
- ttm->num_pages, &gtt->range, readonly,
- true, NULL);
+ r = amdgpu_hmm_range_get_pages(&bo->notifier, start, ttm->num_pages,
+ readonly, NULL, range);
out_unlock:
mmap_read_unlock(mm);
if (r)
@@ -707,37 +745,6 @@ out_unlock:
return r;
}
-/*
- * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
- * Check if the pages backing this ttm range have been invalidated
- *
- * Returns: true if pages are still valid
- */
-bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
-{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
- bool r = false;
-
- if (!gtt || !gtt->userptr)
- return false;
-
- DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%x\n",
- gtt->userptr, ttm->num_pages);
-
- WARN_ONCE(!gtt->range || !gtt->range->hmm_pfns,
- "No user pages to check\n");
-
- if (gtt->range) {
- /*
- * FIXME: Must always hold notifier_lock for this, and must
- * not ignore the return code.
- */
- r = amdgpu_hmm_range_get_pages_done(gtt->range);
- gtt->range = NULL;
- }
-
- return !r;
-}
#endif
/*
@@ -747,12 +754,12 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
* that backs user memory and will ultimately be mapped into the device
* address space.
*/
-void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
+void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct amdgpu_hmm_range *range)
{
unsigned long i;
for (i = 0; i < ttm->num_pages; ++i)
- ttm->pages[i] = pages ? pages[i] : NULL;
+ ttm->pages[i] = range ? hmm_pfn_to_page(range->hmm_range.hmm_pfns[i]) : NULL;
}
/*
@@ -764,7 +771,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev,
struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
enum dma_data_direction direction = write ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
@@ -780,7 +787,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev,
/* Map SG to device */
r = dma_map_sgtable(adev->dev, ttm->sg, direction, 0);
if (r)
- goto release_sg;
+ goto release_sg_table;
/* convert SG to linear array of pages and dma addresses */
drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address,
@@ -788,6 +795,8 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev,
return 0;
+release_sg_table:
+ sg_free_table(ttm->sg);
release_sg:
kfree(ttm->sg);
ttm->sg = NULL;
@@ -801,7 +810,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev,
struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
enum dma_data_direction direction = write ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
@@ -813,64 +822,63 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev,
/* unmap the pages mapped to the device */
dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
sg_free_table(ttm->sg);
+}
-#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
- if (gtt->range) {
- unsigned long i;
+/*
+ * total_pages is constructed as MQD0+CtrlStack0 + MQD1+CtrlStack1 + ...
+ * MQDn+CtrlStackn where n is the number of XCCs per partition.
+ * pages_per_xcc is the size of one MQD+CtrlStack. The first page is MQD
+ * and uses memory type default, UC. The rest of pages_per_xcc are
+ * Ctrl stack and modify their memory type to NC.
+ */
+static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
+ struct ttm_tt *ttm, uint64_t flags)
+{
+ struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ uint64_t total_pages = ttm->num_pages;
+ int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
+ uint64_t page_idx, pages_per_xcc;
+ int i;
+ uint64_t ctrl_flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_NC);
- for (i = 0; i < ttm->num_pages; i++) {
- if (ttm->pages[i] !=
- hmm_pfn_to_page(gtt->range->hmm_pfns[i]))
- break;
- }
+ pages_per_xcc = total_pages;
+ do_div(pages_per_xcc, num_xcc);
- WARN((i == ttm->num_pages), "Missing get_user_page_done\n");
+ for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) {
+ /* MQD page: use default flags */
+ amdgpu_gart_bind(adev,
+ gtt->offset + (page_idx << PAGE_SHIFT),
+ 1, &gtt->ttm.dma_address[page_idx], flags);
+ /*
+ * Ctrl pages - modify the memory type to NC (ctrl_flags) from
+ * the second page of the BO onward.
+ */
+ amdgpu_gart_bind(adev,
+ gtt->offset + ((page_idx + 1) << PAGE_SHIFT),
+ pages_per_xcc - 1,
+ &gtt->ttm.dma_address[page_idx + 1],
+ ctrl_flags);
}
-#endif
}
-static int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
- struct ttm_buffer_object *tbo,
- uint64_t flags)
+static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
+ struct ttm_buffer_object *tbo,
+ uint64_t flags)
{
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
struct ttm_tt *ttm = tbo->ttm;
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
- int r;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
if (amdgpu_bo_encrypted(abo))
flags |= AMDGPU_PTE_TMZ;
if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
- uint64_t page_idx = 1;
-
- r = amdgpu_gart_bind(adev, gtt->offset, page_idx,
- gtt->ttm.dma_address, flags);
- if (r)
- goto gart_bind_fail;
-
- /* The memory type of the first page defaults to UC. Now
- * modify the memory type to NC from the second page of
- * the BO onward.
- */
- flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
- flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
-
- r = amdgpu_gart_bind(adev,
- gtt->offset + (page_idx << PAGE_SHIFT),
- ttm->num_pages - page_idx,
- &(gtt->ttm.dma_address[page_idx]), flags);
+ amdgpu_ttm_gart_bind_gfx9_mqd(adev, ttm, flags);
} else {
- r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
- gtt->ttm.dma_address, flags);
+ amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
+ gtt->ttm.dma_address, flags);
}
-
-gart_bind_fail:
- if (r)
- DRM_ERROR("failed to bind %u pages at 0x%08llX\n",
- ttm->num_pages, gtt->offset);
-
- return r;
+ gtt->bound = true;
}
/*
@@ -884,9 +892,9 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
struct ttm_resource *bo_mem)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- struct amdgpu_ttm_tt *gtt = (void*)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
uint64_t flags;
- int r = 0;
+ int r;
if (!bo_mem)
return -EINVAL;
@@ -897,7 +905,7 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
if (gtt->userptr) {
r = amdgpu_ttm_tt_pin_userptr(bdev, ttm);
if (r) {
- DRM_ERROR("failed to pin userptr\n");
+ dev_err(adev->dev, "failed to pin userptr\n");
return r;
}
} else if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) {
@@ -922,11 +930,6 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
ttm->num_pages, bo_mem, ttm);
}
- if (bo_mem->mem_type == AMDGPU_PL_GDS ||
- bo_mem->mem_type == AMDGPU_PL_GWS ||
- bo_mem->mem_type == AMDGPU_PL_OA)
- return -EINVAL;
-
if (bo_mem->mem_type != TTM_PL_TT ||
!amdgpu_gtt_mgr_has_gart_addr(bo_mem)) {
gtt->offset = AMDGPU_BO_INVALID_OFFSET;
@@ -938,14 +941,10 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
/* bind pages into GART page tables */
gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
- r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
- gtt->ttm.dma_address, flags);
-
- if (r)
- DRM_ERROR("failed to bind %u pages at 0x%08llX\n",
- ttm->num_pages, gtt->offset);
+ amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
+ gtt->ttm.dma_address, flags);
gtt->bound = true;
- return r;
+ return 0;
}
/*
@@ -960,7 +959,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct ttm_operation_ctx ctx = { false, false };
- struct amdgpu_ttm_tt *gtt = (void *)bo->ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(bo->ttm);
struct ttm_placement placement;
struct ttm_place placements;
struct ttm_resource *tmp;
@@ -971,16 +970,12 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
return 0;
addr = amdgpu_gmc_agp_addr(bo);
- if (addr != AMDGPU_BO_INVALID_OFFSET) {
- bo->resource->start = addr >> PAGE_SHIFT;
+ if (addr != AMDGPU_BO_INVALID_OFFSET)
return 0;
- }
/* allocate GART space */
placement.num_placement = 1;
placement.placement = &placements;
- placement.num_busy_placement = 1;
- placement.busy_placement = &placements;
placements.fpfn = 0;
placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
placements.mem_type = TTM_PL_TT;
@@ -995,12 +990,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
/* Bind pages */
gtt->offset = (u64)tmp->start << PAGE_SHIFT;
- r = amdgpu_ttm_gart_bind(adev, bo, flags);
- if (unlikely(r)) {
- ttm_resource_free(bo, &tmp);
- return r;
- }
-
+ amdgpu_ttm_gart_bind(adev, bo, flags);
amdgpu_gart_invalidate_tlb(adev);
ttm_resource_free(bo, &bo->resource);
ttm_bo_assign_mem(bo, tmp);
@@ -1014,19 +1004,16 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
* Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to
* rebind GTT pages during a GPU reset.
*/
-int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
+void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
uint64_t flags;
- int r;
if (!tbo->ttm)
- return 0;
+ return;
flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, tbo->resource);
- r = amdgpu_ttm_gart_bind(adev, tbo, flags);
-
- return r;
+ amdgpu_ttm_gart_bind(adev, tbo, flags);
}
/*
@@ -1039,13 +1026,12 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
- int r;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
/* if the pages have userptr pinning then clear that first */
if (gtt->userptr) {
amdgpu_ttm_tt_unpin_userptr(bdev, ttm);
- } else if (ttm->sg && gtt->gobj->import_attach) {
+ } else if (ttm->sg && drm_gem_is_imported(gtt->gobj)) {
struct dma_buf_attachment *attach;
attach = gtt->gobj->import_attach;
@@ -1060,17 +1046,14 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
return;
/* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
- r = amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages);
- if (r)
- DRM_ERROR("failed to unbind %u pages at 0x%08llX\n",
- gtt->ttm.num_pages, gtt->offset);
+ amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages);
gtt->bound = false;
}
static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev,
struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
if (gtt->usertask)
put_task_struct(gtt->usertask);
@@ -1090,15 +1073,20 @@ static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev,
static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
uint32_t page_flags)
{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
struct amdgpu_ttm_tt *gtt;
enum ttm_caching caching;
gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
- if (gtt == NULL) {
+ if (!gtt)
return NULL;
- }
+
gtt->gobj = &bo->base;
+ if (adev->gmc.mem_partitions && abo->xcp_id >= 0)
+ gtt->pool_id = KFD_XCP_MEM_ID(adev, abo->xcp_id);
+ else
+ gtt->pool_id = abo->xcp_id;
if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
caching = ttm_write_combined;
@@ -1124,7 +1112,8 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
struct ttm_operation_ctx *ctx)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+ struct ttm_pool *pool;
pgoff_t i;
int ret;
@@ -1139,7 +1128,11 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
return 0;
- ret = ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx);
+ if (adev->mman.ttm_pools && gtt->pool_id >= 0)
+ pool = &adev->mman.ttm_pools[gtt->pool_id];
+ else
+ pool = &adev->mman.bdev.pool;
+ ret = ttm_pool_alloc(pool, ttm, ctx);
if (ret)
return ret;
@@ -1158,8 +1151,9 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
struct amdgpu_device *adev;
+ struct ttm_pool *pool;
pgoff_t i;
amdgpu_ttm_backend_unbind(bdev, ttm);
@@ -1178,7 +1172,33 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
ttm->pages[i]->mapping = NULL;
adev = amdgpu_ttm_adev(bdev);
- return ttm_pool_free(&adev->mman.bdev.pool, ttm);
+
+ if (adev->mman.ttm_pools && gtt->pool_id >= 0)
+ pool = &adev->mman.ttm_pools[gtt->pool_id];
+ else
+ pool = &adev->mman.bdev.pool;
+
+ return ttm_pool_free(pool, ttm);
+}
+
+/**
+ * amdgpu_ttm_tt_get_userptr - Return the userptr GTT ttm_tt for the current
+ * task
+ *
+ * @tbo: The ttm_buffer_object that contains the userptr
+ * @user_addr: The returned value
+ */
+int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,
+ uint64_t *user_addr)
+{
+ struct amdgpu_ttm_tt *gtt;
+
+ if (!tbo->ttm)
+ return -EINVAL;
+
+ gtt = (void *)tbo->ttm;
+ *user_addr = gtt->userptr;
+ return 0;
}
/**
@@ -1189,8 +1209,9 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
* @addr: The address in the current tasks VM space to use
* @flags: Requirements of userptr object.
*
- * Called by amdgpu_gem_userptr_ioctl() to bind userptr pages
- * to current task
+ * Called by amdgpu_gem_userptr_ioctl() and kfd_ioctl_alloc_memory_of_gpu() to
+ * bind userptr pages to current task and by kfd_ioctl_acquire_vm() to
+ * initialize GPU VM for a KFD process.
*/
int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
uint64_t addr, uint32_t flags)
@@ -1207,7 +1228,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
/* Set TTM_TT_FLAG_EXTERNAL before populate but after create. */
bo->ttm->page_flags |= TTM_TT_FLAG_EXTERNAL;
- gtt = (void *)bo->ttm;
+ gtt = ttm_to_amdgpu_ttm_tt(bo->ttm);
gtt->userptr = addr;
gtt->userflags = flags;
@@ -1224,7 +1245,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
*/
struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
if (gtt == NULL)
return NULL;
@@ -1243,7 +1264,7 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
unsigned long end, unsigned long *userptr)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
unsigned long size;
if (gtt == NULL || !gtt->userptr)
@@ -1266,7 +1287,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
*/
bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
if (gtt == NULL || !gtt->userptr)
return false;
@@ -1279,7 +1300,7 @@ bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
*/
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
if (gtt == NULL)
return false;
@@ -1303,10 +1324,12 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem)
flags |= AMDGPU_PTE_VALID;
if (mem && (mem->mem_type == TTM_PL_TT ||
- mem->mem_type == AMDGPU_PL_PREEMPT)) {
+ mem->mem_type == AMDGPU_PL_DOORBELL ||
+ mem->mem_type == AMDGPU_PL_PREEMPT ||
+ mem->mem_type == AMDGPU_PL_MMIO_REMAP)) {
flags |= AMDGPU_PTE_SYSTEM;
- if (ttm->caching == ttm_cached)
+ if (ttm && ttm->caching == ttm_cached)
flags |= AMDGPU_PTE_SNOOPED;
}
@@ -1352,11 +1375,11 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
const struct ttm_place *place)
{
- unsigned long num_pages = bo->resource->num_pages;
- struct amdgpu_res_cursor cursor;
- struct dma_resv_list *flist;
+ struct dma_resv_iter resv_cursor;
struct dma_fence *f;
- int i;
+
+ if (!amdgpu_bo_is_amdgpu_bo(bo))
+ return ttm_bo_eviction_valuable(bo, place);
/* Swapout? */
if (bo->resource->mem_type == TTM_PL_SYSTEM)
@@ -1370,50 +1393,27 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
* If true, then return false as any KFD process needs all its BOs to
* be resident to run successfully
*/
- flist = dma_resv_shared_list(bo->base.resv);
- if (flist) {
- for (i = 0; i < flist->shared_count; ++i) {
- f = rcu_dereference_protected(flist->shared[i],
- dma_resv_held(bo->base.resv));
- if (amdkfd_fence_check_mm(f, current->mm))
- return false;
- }
+ dma_resv_for_each_fence(&resv_cursor, bo->base.resv,
+ DMA_RESV_USAGE_BOOKKEEP, f) {
+ if (amdkfd_fence_check_mm(f, current->mm) &&
+ !(place->flags & TTM_PL_FLAG_CONTIGUOUS))
+ return false;
}
- switch (bo->resource->mem_type) {
- case AMDGPU_PL_PREEMPT:
- /* Preemptible BOs don't own system resources managed by the
- * driver (pages, VRAM, GART space). They point to resources
- * owned by someone else (e.g. pageable memory in user mode
- * or a DMABuf). They are used in a preemptible context so we
- * can guarantee no deadlocks and good QoS in case of MMU
- * notifiers or DMABuf move notifiers from the resource owner.
- */
+ /* Preemptible BOs don't own system resources managed by the
+ * driver (pages, VRAM, GART space). They point to resources
+ * owned by someone else (e.g. pageable memory in user mode
+ * or a DMABuf). They are used in a preemptible context so we
+ * can guarantee no deadlocks and good QoS in case of MMU
+ * notifiers or DMABuf move notifiers from the resource owner.
+ */
+ if (bo->resource->mem_type == AMDGPU_PL_PREEMPT)
return false;
- case TTM_PL_TT:
- if (amdgpu_bo_is_amdgpu_bo(bo) &&
- amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo)))
- return false;
- return true;
-
- case TTM_PL_VRAM:
- /* Check each drm MM node individually */
- amdgpu_res_first(bo->resource, 0, (u64)num_pages << PAGE_SHIFT,
- &cursor);
- while (cursor.remaining) {
- if (place->fpfn < PFN_DOWN(cursor.start + cursor.size)
- && !(place->lpfn &&
- place->lpfn <= PFN_DOWN(cursor.start)))
- return true;
- amdgpu_res_next(&cursor, cursor.size);
- }
+ if (bo->resource->mem_type == TTM_PL_TT &&
+ amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo)))
return false;
- default:
- break;
- }
-
return ttm_bo_eviction_valuable(bo, place);
}
@@ -1452,6 +1452,68 @@ static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
}
}
+static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
+ unsigned long offset, void *buf,
+ int len, int write)
+{
+ struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+ struct amdgpu_res_cursor src_mm;
+ struct amdgpu_job *job;
+ struct dma_fence *fence;
+ uint64_t src_addr, dst_addr;
+ unsigned int num_dw;
+ int r, idx;
+
+ if (len != PAGE_SIZE)
+ return -EINVAL;
+
+ if (!adev->mman.sdma_access_ptr)
+ return -EACCES;
+
+ if (!drm_dev_enter(adev_to_drm(adev), &idx))
+ return -ENODEV;
+
+ if (write)
+ memcpy(adev->mman.sdma_access_ptr, buf, len);
+
+ num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
+ r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
+ AMDGPU_FENCE_OWNER_UNDEFINED,
+ num_dw * 4, AMDGPU_IB_POOL_DELAYED,
+ &job,
+ AMDGPU_KERNEL_JOB_ID_TTM_ACCESS_MEMORY_SDMA);
+ if (r)
+ goto out;
+
+ mutex_lock(&adev->mman.gtt_window_lock);
+ amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm);
+ src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) +
+ src_mm.start;
+ dst_addr = amdgpu_bo_gpu_offset(adev->mman.sdma_access_bo);
+ if (write)
+ swap(src_addr, dst_addr);
+
+ amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
+ PAGE_SIZE, 0);
+
+ amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
+ WARN_ON(job->ibs[0].length_dw > num_dw);
+
+ fence = amdgpu_job_submit(job);
+ mutex_unlock(&adev->mman.gtt_window_lock);
+
+ if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
+ r = -ETIMEDOUT;
+ dma_fence_put(fence);
+
+ if (!(r || write))
+ memcpy(buf, adev->mman.sdma_access_ptr, len);
+out:
+ drm_dev_exit(idx);
+ return r;
+}
+
/**
* amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
*
@@ -1476,6 +1538,10 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
if (bo->resource->mem_type != TTM_PL_VRAM)
return -EIO;
+ if (amdgpu_device_has_timeouts_enabled(adev) &&
+ !amdgpu_ttm_access_memory_sdma(bo, offset, buf, len, write))
+ return len;
+
amdgpu_res_first(bo->resource, offset, len, &cursor);
while (cursor.remaining) {
size_t count, size = cursor.size;
@@ -1517,7 +1583,6 @@ static struct ttm_device_funcs amdgpu_bo_driver = {
.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
.io_mem_pfn = amdgpu_ttm_io_mem_pfn,
.access_memory = &amdgpu_ttm_access_memory,
- .del_from_lru_notify = &amdgpu_vm_del_from_lru_notify
};
/*
@@ -1536,6 +1601,23 @@ static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)
NULL, &adev->mman.fw_vram_usage_va);
}
+/*
+ * Driver Reservation functions
+ */
+/**
+ * amdgpu_ttm_drv_reserve_vram_fini - free drv reserved vram
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * free drv reserved vram if it has been reserved.
+ */
+static void amdgpu_ttm_drv_reserve_vram_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->mman.drv_vram_usage_reserved_bo,
+ NULL,
+ &adev->mman.drv_vram_usage_va);
+}
+
/**
* amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw
*
@@ -1557,11 +1639,35 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
return amdgpu_bo_create_kernel_at(adev,
adev->mman.fw_vram_usage_start_offset,
adev->mman.fw_vram_usage_size,
- AMDGPU_GEM_DOMAIN_VRAM,
&adev->mman.fw_vram_usage_reserved_bo,
&adev->mman.fw_vram_usage_va);
}
+/**
+ * amdgpu_ttm_drv_reserve_vram_init - create bo vram reservation from driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * create bo vram reservation from drv.
+ */
+static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev)
+{
+ u64 vram_size = adev->gmc.visible_vram_size;
+
+ adev->mman.drv_vram_usage_va = NULL;
+ adev->mman.drv_vram_usage_reserved_bo = NULL;
+
+ if (adev->mman.drv_vram_usage_size == 0 ||
+ adev->mman.drv_vram_usage_size > vram_size)
+ return 0;
+
+ return amdgpu_bo_create_kernel_at(adev,
+ adev->mman.drv_vram_usage_start_offset,
+ adev->mman.drv_vram_usage_size,
+ &adev->mman.drv_vram_usage_reserved_bo,
+ &adev->mman.drv_vram_usage_va);
+}
+
/*
* Memoy training reservation functions
*/
@@ -1584,14 +1690,15 @@ static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev)
return 0;
}
-static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev)
+static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev,
+ uint32_t reserve_size)
{
struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
memset(ctx, 0, sizeof(*ctx));
ctx->c2p_train_data_offset =
- ALIGN((adev->gmc.mc_vram_size - adev->mman.discovery_tmr_size - SZ_1M), SZ_1M);
+ ALIGN((adev->gmc.mc_vram_size - reserve_size - SZ_1M), SZ_1M);
ctx->p2c_train_data_offset =
(adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
ctx->train_data_size =
@@ -1609,11 +1716,12 @@ static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev)
*/
static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
{
- int ret;
struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
bool mem_train_support = false;
+ uint32_t reserve_size = 0;
+ int ret;
- if (!amdgpu_sriov_vf(adev)) {
+ if (adev->bios && !amdgpu_sriov_vf(adev)) {
if (amdgpu_atomfirmware_mem_training_supported(adev))
mem_train_support = true;
else
@@ -1627,43 +1735,135 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
* Otherwise, fallback to legacy approach to check and reserve tmr block for ip
* discovery data and G6 memory training data respectively
*/
- adev->mman.discovery_tmr_size =
- amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
- if (!adev->mman.discovery_tmr_size)
- adev->mman.discovery_tmr_size = DISCOVERY_TMR_OFFSET;
+ if (adev->bios)
+ reserve_size =
+ amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
+
+ if (!adev->bios &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)))
+ reserve_size = max(reserve_size, (uint32_t)280 << 20);
+ else if (!reserve_size)
+ reserve_size = DISCOVERY_TMR_OFFSET;
if (mem_train_support) {
/* reserve vram for mem train according to TMR location */
- amdgpu_ttm_training_data_block_init(adev);
+ amdgpu_ttm_training_data_block_init(adev, reserve_size);
ret = amdgpu_bo_create_kernel_at(adev,
- ctx->c2p_train_data_offset,
- ctx->train_data_size,
- AMDGPU_GEM_DOMAIN_VRAM,
- &ctx->c2p_bo,
- NULL);
+ ctx->c2p_train_data_offset,
+ ctx->train_data_size,
+ &ctx->c2p_bo,
+ NULL);
if (ret) {
- DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
+ dev_err(adev->dev, "alloc c2p_bo failed(%d)!\n", ret);
amdgpu_ttm_training_reserve_vram_fini(adev);
return ret;
}
ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
}
- ret = amdgpu_bo_create_kernel_at(adev,
- adev->gmc.real_vram_size - adev->mman.discovery_tmr_size,
- adev->mman.discovery_tmr_size,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->mman.discovery_memory,
- NULL);
+ ret = amdgpu_bo_create_kernel_at(
+ adev, adev->gmc.real_vram_size - reserve_size, reserve_size,
+ &adev->mman.fw_reserved_memory, NULL);
if (ret) {
- DRM_ERROR("alloc tmr failed(%d)!\n", ret);
- amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
+ dev_err(adev->dev, "alloc tmr failed(%d)!\n", ret);
+ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL,
+ NULL);
return ret;
}
return 0;
}
+static int amdgpu_ttm_pools_init(struct amdgpu_device *adev)
+{
+ int i;
+
+ if (!adev->gmc.is_app_apu || !adev->gmc.num_mem_partitions)
+ return 0;
+
+ adev->mman.ttm_pools = kcalloc(adev->gmc.num_mem_partitions,
+ sizeof(*adev->mman.ttm_pools),
+ GFP_KERNEL);
+ if (!adev->mman.ttm_pools)
+ return -ENOMEM;
+
+ for (i = 0; i < adev->gmc.num_mem_partitions; i++) {
+ ttm_pool_init(&adev->mman.ttm_pools[i], adev->dev,
+ adev->gmc.mem_partitions[i].numa.node,
+ TTM_ALLOCATION_POOL_BENEFICIAL_ORDER(get_order(SZ_2M)));
+ }
+ return 0;
+}
+
+static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev)
+{
+ int i;
+
+ if (!adev->gmc.is_app_apu || !adev->mman.ttm_pools)
+ return;
+
+ for (i = 0; i < adev->gmc.num_mem_partitions; i++)
+ ttm_pool_fini(&adev->mman.ttm_pools[i]);
+
+ kfree(adev->mman.ttm_pools);
+ adev->mman.ttm_pools = NULL;
+}
+
+/**
+ * amdgpu_ttm_mmio_remap_bo_init - Allocate the singleton 4K MMIO_REMAP BO
+ * @adev: amdgpu device
+ *
+ * Allocates a one-page (4K) GEM BO in AMDGPU_GEM_DOMAIN_MMIO_REMAP when the
+ * hardware exposes a remap base (adev->rmmio_remap.bus_addr) and the host
+ * PAGE_SIZE is <= AMDGPU_GPU_PAGE_SIZE (4K). The BO is created as a regular
+ * GEM object (amdgpu_bo_create).
+ *
+ * Return:
+ * * 0 on success or intentional skip (feature not present/unsupported)
+ * * negative errno on allocation failure
+ */
+static int amdgpu_ttm_mmio_remap_bo_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_bo_param bp;
+ int r;
+
+ /* Skip if HW doesn't expose remap, or if PAGE_SIZE > AMDGPU_GPU_PAGE_SIZE (4K). */
+ if (!adev->rmmio_remap.bus_addr || PAGE_SIZE > AMDGPU_GPU_PAGE_SIZE)
+ return 0;
+
+ memset(&bp, 0, sizeof(bp));
+
+ /* Create exactly one GEM BO in the MMIO_REMAP domain. */
+ bp.type = ttm_bo_type_device; /* userspace-mappable GEM */
+ bp.size = AMDGPU_GPU_PAGE_SIZE; /* 4K */
+ bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_MMIO_REMAP;
+ bp.flags = 0;
+ bp.resv = NULL;
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+
+ r = amdgpu_bo_create(adev, &bp, &adev->rmmio_remap.bo);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * amdgpu_ttm_mmio_remap_bo_fini - Free the singleton MMIO_REMAP BO
+ * @adev: amdgpu device
+ *
+ * Frees the kernel-owned MMIO_REMAP BO if it was allocated by
+ * amdgpu_ttm_mmio_remap_bo_init().
+ */
+static void amdgpu_ttm_mmio_remap_bo_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_unref(&adev->rmmio_remap.bo);
+ adev->rmmio_remap.bo = NULL;
+}
+
/*
* amdgpu_ttm_init - Init the memory management (ttm) as well as various
* gtt/vram related fields.
@@ -1677,34 +1877,40 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
{
uint64_t gtt_size;
int r;
- u64 vis_vram_limit;
mutex_init(&adev->mman.gtt_window_lock);
+ dma_set_max_seg_size(adev->dev, UINT_MAX);
/* No others user of address space so set it to 0 */
r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev,
adev_to_drm(adev)->anon_inode->i_mapping,
adev_to_drm(adev)->vma_offset_manager,
- adev->need_swiotlb,
- dma_addressing_limited(adev->dev));
+ (adev->need_swiotlb ?
+ TTM_ALLOCATION_POOL_USE_DMA_ALLOC : 0) |
+ (dma_addressing_limited(adev->dev) ?
+ TTM_ALLOCATION_POOL_USE_DMA32 : 0) |
+ TTM_ALLOCATION_POOL_BENEFICIAL_ORDER(get_order(SZ_2M)));
if (r) {
- DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
+ dev_err(adev->dev,
+ "failed initializing buffer object driver(%d).\n", r);
return r;
}
- adev->mman.initialized = true;
- /* Initialize VRAM pool with all of VRAM divided into pages */
- r = amdgpu_vram_mgr_init(adev);
+ r = amdgpu_ttm_pools_init(adev);
if (r) {
- DRM_ERROR("Failed initializing VRAM heap.\n");
+ dev_err(adev->dev, "failed to init ttm pools(%d).\n", r);
return r;
}
+ adev->mman.initialized = true;
- /* Reduce size of CPU-visible VRAM if requested */
- vis_vram_limit = (u64)amdgpu_vis_vram_limit * 1024 * 1024;
- if (amdgpu_vis_vram_limit > 0 &&
- vis_vram_limit <= adev->gmc.visible_vram_size)
- adev->gmc.visible_vram_size = vis_vram_limit;
+ if (!adev->gmc.is_app_apu) {
+ /* Initialize VRAM pool with all of VRAM divided into pages */
+ r = amdgpu_vram_mgr_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed initializing VRAM heap.\n");
+ return r;
+ }
+ }
/* Change the size here instead of the init above so only lpfn is affected */
amdgpu_ttm_set_buffer_funcs_status(adev, false);
@@ -1714,6 +1920,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base,
adev->gmc.visible_vram_size);
+ else if (adev->gmc.is_app_apu)
+ DRM_DEBUG_DRIVER(
+ "No need to ioremap when real vram size is 0\n");
else
#endif
adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
@@ -1725,16 +1934,23 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
*place on the VRAM, so reserve it early.
*/
r = amdgpu_ttm_fw_reserve_vram_init(adev);
- if (r) {
+ if (r)
+ return r;
+
+ /*
+ * The reserved VRAM for the driver must be pinned to a specific
+ * location in VRAM, so reserve it early.
+ */
+ r = amdgpu_ttm_drv_reserve_vram_init(adev);
+ if (r)
return r;
- }
/*
- * only NAVI10 and onwards ASIC support for IP discovery.
- * If IP discovery enabled, a block of memory should be
- * reserved for IP discovey.
+ * only NAVI10 and later ASICs support IP discovery.
+ * If IP discovery is enabled, a block of memory should be
+ * reserved for it.
*/
- if (adev->mman.discovery_bin) {
+ if (adev->discovery.reserve_tmr) {
r = amdgpu_ttm_reserve_tmr(adev);
if (r)
return r;
@@ -1743,78 +1959,125 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
/* allocate memory as required for VGA
* This is used for VGA emulation and pre-OS scanout buffers to
* avoid display artifacts while transitioning between pre-OS
- * and driver. */
- r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->mman.stolen_vga_memory,
- NULL);
- if (r)
- return r;
- r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
- adev->mman.stolen_extended_size,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->mman.stolen_extended_memory,
- NULL);
- if (r)
- return r;
- r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_reserved_offset,
- adev->mman.stolen_reserved_size,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->mman.stolen_reserved_memory,
- NULL);
- if (r)
- return r;
+ * and driver.
+ */
+ if (!adev->gmc.is_app_apu) {
+ r = amdgpu_bo_create_kernel_at(adev, 0,
+ adev->mman.stolen_vga_size,
+ &adev->mman.stolen_vga_memory,
+ NULL);
+ if (r)
+ return r;
+
+ r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
+ adev->mman.stolen_extended_size,
+ &adev->mman.stolen_extended_memory,
+ NULL);
+
+ if (r)
+ return r;
+
+ r = amdgpu_bo_create_kernel_at(adev,
+ adev->mman.stolen_reserved_offset,
+ adev->mman.stolen_reserved_size,
+ &adev->mman.stolen_reserved_memory,
+ NULL);
+ if (r)
+ return r;
+ } else {
+ DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n");
+ }
- DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
- (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
+ dev_info(adev->dev, "amdgpu: %uM of VRAM memory ready\n",
+ (unsigned int)(adev->gmc.real_vram_size / (1024 * 1024)));
- /* Compute GTT size, either bsaed on 3/4th the size of RAM size
- * or whatever the user passed on module init */
- if (amdgpu_gtt_size == -1) {
- struct sysinfo si;
+ /* Compute GTT size, either based on TTM limit
+ * or whatever the user passed on module init.
+ */
+ gtt_size = ttm_tt_pages_limit() << PAGE_SHIFT;
+ if (amdgpu_gtt_size != -1) {
+ uint64_t configured_size = (uint64_t)amdgpu_gtt_size << 20;
+
+ drm_warn(&adev->ddev,
+ "Configuring gttsize via module parameter is deprecated, please use ttm.pages_limit\n");
+ if (gtt_size != configured_size)
+ drm_warn(&adev->ddev,
+ "GTT size has been set as %llu but TTM size has been set as %llu, this is unusual\n",
+ configured_size, gtt_size);
- si_meminfo(&si);
- gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
- adev->gmc.mc_vram_size),
- ((uint64_t)si.totalram * si.mem_unit * 3/4));
+ gtt_size = configured_size;
}
- else
- gtt_size = (uint64_t)amdgpu_gtt_size << 20;
/* Initialize GTT memory pool */
r = amdgpu_gtt_mgr_init(adev, gtt_size);
if (r) {
- DRM_ERROR("Failed initializing GTT heap.\n");
+ dev_err(adev->dev, "Failed initializing GTT heap.\n");
+ return r;
+ }
+ dev_info(adev->dev, "amdgpu: %uM of GTT memory ready.\n",
+ (unsigned int)(gtt_size / (1024 * 1024)));
+
+ if (adev->flags & AMD_IS_APU) {
+ if (adev->gmc.real_vram_size < gtt_size)
+ adev->apu_prefer_gtt = true;
+ }
+
+ /* Initialize doorbell pool on PCI BAR */
+ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, adev->doorbell.size / PAGE_SIZE);
+ if (r) {
+ dev_err(adev->dev, "Failed initializing doorbell heap.\n");
+ return r;
+ }
+
+ /* Create a boorbell page for kernel usages */
+ r = amdgpu_doorbell_create_kernel_doorbells(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to initialize kernel doorbells.\n");
+ return r;
+ }
+
+ /* Initialize MMIO-remap pool (single page 4K) */
+ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_MMIO_REMAP, 1);
+ if (r) {
+ dev_err(adev->dev, "Failed initializing MMIO-remap heap.\n");
return r;
}
- DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
- (unsigned)(gtt_size / (1024 * 1024)));
+
+ /* Allocate the singleton MMIO_REMAP BO (4K) if supported */
+ r = amdgpu_ttm_mmio_remap_bo_init(adev);
+ if (r)
+ return r;
/* Initialize preemptible memory pool */
r = amdgpu_preempt_mgr_init(adev);
if (r) {
- DRM_ERROR("Failed initializing PREEMPT heap.\n");
+ dev_err(adev->dev, "Failed initializing PREEMPT heap.\n");
return r;
}
/* Initialize various on-chip memory pools */
r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GDS, adev->gds.gds_size);
if (r) {
- DRM_ERROR("Failed initializing GDS heap.\n");
+ dev_err(adev->dev, "Failed initializing GDS heap.\n");
return r;
}
r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GWS, adev->gds.gws_size);
if (r) {
- DRM_ERROR("Failed initializing gws heap.\n");
+ dev_err(adev->dev, "Failed initializing gws heap.\n");
return r;
}
r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_OA, adev->gds.oa_size);
if (r) {
- DRM_ERROR("Failed initializing oa heap.\n");
+ dev_err(adev->dev, "Failed initializing oa heap.\n");
return r;
}
+ if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mman.sdma_access_bo, NULL,
+ &adev->mman.sdma_access_ptr))
+ DRM_WARN("Debug VRAM access will use slowpath MM access\n");
return 0;
}
@@ -1824,29 +2087,57 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
*/
void amdgpu_ttm_fini(struct amdgpu_device *adev)
{
+ int idx;
+
if (!adev->mman.initialized)
return;
+ amdgpu_ttm_pools_fini(adev);
+
amdgpu_ttm_training_reserve_vram_fini(adev);
/* return the stolen vga memory back to VRAM */
- amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
- amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
- /* return the IP Discovery TMR memory back to VRAM */
- amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
- if (adev->mman.stolen_reserved_size)
- amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory,
- NULL, NULL);
+ if (!adev->gmc.is_app_apu) {
+ amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
+ /* return the FW reserved memory back to VRAM */
+ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL,
+ NULL);
+ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory_extend, NULL,
+ NULL);
+ if (adev->mman.stolen_reserved_size)
+ amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory,
+ NULL, NULL);
+ }
+ amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,
+ &adev->mman.sdma_access_ptr);
+
+ amdgpu_ttm_mmio_remap_bo_fini(adev);
amdgpu_ttm_fw_reserve_vram_fini(adev);
+ amdgpu_ttm_drv_reserve_vram_fini(adev);
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+
+ if (adev->mman.aper_base_kaddr)
+ iounmap(adev->mman.aper_base_kaddr);
+ adev->mman.aper_base_kaddr = NULL;
- amdgpu_vram_mgr_fini(adev);
+ drm_dev_exit(idx);
+ }
+
+ if (!adev->gmc.is_app_apu)
+ amdgpu_vram_mgr_fini(adev);
amdgpu_gtt_mgr_fini(adev);
amdgpu_preempt_mgr_fini(adev);
+ amdgpu_doorbell_fini(adev);
+
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS);
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS);
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
+ ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_DOORBELL);
+ ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_MMIO_REMAP);
ttm_device_fini(&adev->mman.bdev);
adev->mman.initialized = false;
- DRM_INFO("amdgpu: ttm finalized\n");
+ dev_info(adev->dev, "amdgpu: ttm finalized\n");
}
/**
@@ -1865,7 +2156,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
int r;
if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
- adev->mman.buffer_funcs_enabled == enable)
+ adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu)
return;
if (enable) {
@@ -1874,18 +2165,32 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
ring = adev->mman.buffer_funcs_ring;
sched = &ring->sched;
- r = drm_sched_entity_init(&adev->mman.entity,
+ r = drm_sched_entity_init(&adev->mman.high_pr,
DRM_SCHED_PRIORITY_KERNEL, &sched,
1, NULL);
if (r) {
- DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
- r);
+ dev_err(adev->dev,
+ "Failed setting up TTM BO move entity (%d)\n",
+ r);
return;
}
+
+ r = drm_sched_entity_init(&adev->mman.low_pr,
+ DRM_SCHED_PRIORITY_NORMAL, &sched,
+ 1, NULL);
+ if (r) {
+ dev_err(adev->dev,
+ "Failed setting up TTM BO move entity (%d)\n",
+ r);
+ goto error_free_entity;
+ }
} else {
- drm_sched_entity_destroy(&adev->mman.entity);
- dma_fence_put(man->move);
- man->move = NULL;
+ drm_sched_entity_destroy(&adev->mman.high_pr);
+ drm_sched_entity_destroy(&adev->mman.low_pr);
+ /* Drop all the old fences since re-creating the scheduler entities
+ * will allocate new contexts.
+ */
+ ttm_resource_manager_cleanup(man);
}
/* this just adjusts TTM size idea, which sets lpfn to the correct value */
@@ -1893,60 +2198,81 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
size = adev->gmc.real_vram_size;
else
size = adev->gmc.visible_vram_size;
- man->size = size >> PAGE_SHIFT;
+ man->size = size;
adev->mman.buffer_funcs_enabled = enable;
+
+ return;
+
+error_free_entity:
+ drm_sched_entity_destroy(&adev->mman.high_pr);
+}
+
+static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
+ bool direct_submit,
+ unsigned int num_dw,
+ struct dma_resv *resv,
+ bool vm_needs_flush,
+ struct amdgpu_job **job,
+ bool delayed, u64 k_job_id)
+{
+ enum amdgpu_ib_pool_type pool = direct_submit ?
+ AMDGPU_IB_POOL_DIRECT :
+ AMDGPU_IB_POOL_DELAYED;
+ int r;
+ struct drm_sched_entity *entity = delayed ? &adev->mman.low_pr :
+ &adev->mman.high_pr;
+ r = amdgpu_job_alloc_with_ib(adev, entity,
+ AMDGPU_FENCE_OWNER_UNDEFINED,
+ num_dw * 4, pool, job, k_job_id);
+ if (r)
+ return r;
+
+ if (vm_needs_flush) {
+ (*job)->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo ?
+ adev->gmc.pdb0_bo :
+ adev->gart.bo);
+ (*job)->vm_needs_flush = true;
+ }
+ if (!resv)
+ return 0;
+
+ return drm_sched_job_add_resv_dependencies(&(*job)->base, resv,
+ DMA_RESV_USAGE_BOOKKEEP);
}
int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
uint64_t dst_offset, uint32_t byte_count,
struct dma_resv *resv,
struct dma_fence **fence, bool direct_submit,
- bool vm_needs_flush, bool tmz)
+ bool vm_needs_flush, uint32_t copy_flags)
{
- enum amdgpu_ib_pool_type pool = direct_submit ? AMDGPU_IB_POOL_DIRECT :
- AMDGPU_IB_POOL_DELAYED;
struct amdgpu_device *adev = ring->adev;
+ unsigned int num_loops, num_dw;
struct amdgpu_job *job;
-
uint32_t max_bytes;
- unsigned num_loops, num_dw;
- unsigned i;
+ unsigned int i;
int r;
- if (direct_submit && !ring->sched.ready) {
- DRM_ERROR("Trying to move memory with ring turned off.\n");
+ if (!direct_submit && !ring->sched.ready) {
+ dev_err(adev->dev,
+ "Trying to move memory with ring turned off.\n");
return -EINVAL;
}
max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
num_loops = DIV_ROUND_UP(byte_count, max_bytes);
num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
-
- r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, pool, &job);
+ r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw,
+ resv, vm_needs_flush, &job, false,
+ AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER);
if (r)
return r;
- if (vm_needs_flush) {
- job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo ?
- adev->gmc.pdb0_bo : adev->gart.bo);
- job->vm_needs_flush = true;
- }
- if (resv) {
- r = amdgpu_sync_resv(adev, &job->sync, resv,
- AMDGPU_SYNC_ALWAYS,
- AMDGPU_FENCE_OWNER_UNDEFINED);
- if (r) {
- DRM_ERROR("sync failed (%d).\n", r);
- goto error_free;
- }
- }
-
for (i = 0; i < num_loops; i++) {
uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
- dst_offset, cur_size_in_bytes, tmz);
-
+ dst_offset, cur_size_in_bytes, copy_flags);
src_offset += cur_size_in_bytes;
dst_offset += cur_size_in_bytes;
byte_count -= cur_size_in_bytes;
@@ -1957,8 +2283,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
if (direct_submit)
r = amdgpu_job_submit_direct(job, ring, fence);
else
- r = amdgpu_job_submit(job, &adev->mman.entity,
- AMDGPU_FENCE_OWNER_UNDEFINED, fence);
+ *fence = amdgpu_job_submit(job);
if (r)
goto error_free;
@@ -1966,94 +2291,163 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
error_free:
amdgpu_job_free(job);
- DRM_ERROR("Error scheduling IBs (%d)\n", r);
+ dev_err(adev->dev, "Error scheduling IBs (%d)\n", r);
return r;
}
-int amdgpu_fill_buffer(struct amdgpu_bo *bo,
- uint32_t src_data,
- struct dma_resv *resv,
- struct dma_fence **fence)
+static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
+ uint64_t dst_addr, uint32_t byte_count,
+ struct dma_resv *resv,
+ struct dma_fence **fence,
+ bool vm_needs_flush, bool delayed,
+ u64 k_job_id)
{
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
- struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
-
- struct amdgpu_res_cursor cursor;
+ struct amdgpu_device *adev = ring->adev;
unsigned int num_loops, num_dw;
- uint64_t num_bytes;
-
struct amdgpu_job *job;
+ uint32_t max_bytes;
+ unsigned int i;
int r;
- if (!adev->mman.buffer_funcs_enabled) {
- DRM_ERROR("Trying to clear memory with ring turned off.\n");
- return -EINVAL;
+ max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
+ num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
+ num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
+ r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush,
+ &job, delayed, k_job_id);
+ if (r)
+ return r;
+
+ for (i = 0; i < num_loops; i++) {
+ uint32_t cur_size = min(byte_count, max_bytes);
+
+ amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, dst_addr,
+ cur_size);
+
+ dst_addr += cur_size;
+ byte_count -= cur_size;
}
- if (bo->tbo.resource->mem_type == AMDGPU_PL_PREEMPT) {
- DRM_ERROR("Trying to clear preemptible memory.\n");
+ amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+ WARN_ON(job->ibs[0].length_dw > num_dw);
+ *fence = amdgpu_job_submit(job);
+ return 0;
+}
+
+/**
+ * amdgpu_ttm_clear_buffer - clear memory buffers
+ * @bo: amdgpu buffer object
+ * @resv: reservation object
+ * @fence: dma_fence associated with the operation
+ *
+ * Clear the memory buffer resource.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
+ struct dma_resv *resv,
+ struct dma_fence **fence)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ struct amdgpu_res_cursor cursor;
+ u64 addr;
+ int r = 0;
+
+ if (!adev->mman.buffer_funcs_enabled)
return -EINVAL;
- }
- if (bo->tbo.resource->mem_type == TTM_PL_TT) {
- r = amdgpu_ttm_alloc_gart(&bo->tbo);
- if (r)
- return r;
- }
+ if (!fence)
+ return -EINVAL;
- num_bytes = bo->tbo.resource->num_pages << PAGE_SHIFT;
- num_loops = 0;
+ *fence = dma_fence_get_stub();
- amdgpu_res_first(bo->tbo.resource, 0, num_bytes, &cursor);
+ amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
+
+ mutex_lock(&adev->mman.gtt_window_lock);
while (cursor.remaining) {
- num_loops += DIV_ROUND_UP_ULL(cursor.size, max_bytes);
- amdgpu_res_next(&cursor, cursor.size);
- }
- num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw;
+ struct dma_fence *next = NULL;
+ u64 size;
- /* for IB padding */
- num_dw += 64;
+ if (amdgpu_res_cleared(&cursor)) {
+ amdgpu_res_next(&cursor, cursor.size);
+ continue;
+ }
- r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_DELAYED,
- &job);
- if (r)
- return r;
+ /* Never clear more than 256MiB at once to avoid timeouts */
+ size = min(cursor.size, 256ULL << 20);
- if (resv) {
- r = amdgpu_sync_resv(adev, &job->sync, resv,
- AMDGPU_SYNC_ALWAYS,
- AMDGPU_FENCE_OWNER_UNDEFINED);
- if (r) {
- DRM_ERROR("sync failed (%d).\n", r);
- goto error_free;
- }
+ r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
+ 1, ring, false, &size, &addr);
+ if (r)
+ goto err;
+
+ r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
+ &next, true, true,
+ AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);
+ if (r)
+ goto err;
+
+ dma_fence_put(*fence);
+ *fence = next;
+
+ amdgpu_res_next(&cursor, size);
}
+err:
+ mutex_unlock(&adev->mman.gtt_window_lock);
- amdgpu_res_first(bo->tbo.resource, 0, num_bytes, &cursor);
- while (cursor.remaining) {
- uint32_t cur_size = min_t(uint64_t, cursor.size, max_bytes);
- uint64_t dst_addr = cursor.start;
+ return r;
+}
- dst_addr += amdgpu_ttm_domain_start(adev,
- bo->tbo.resource->mem_type);
- amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, dst_addr,
- cur_size);
+int amdgpu_fill_buffer(struct amdgpu_bo *bo,
+ uint32_t src_data,
+ struct dma_resv *resv,
+ struct dma_fence **f,
+ bool delayed,
+ u64 k_job_id)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ struct dma_fence *fence = NULL;
+ struct amdgpu_res_cursor dst;
+ int r;
- amdgpu_res_next(&cursor, cur_size);
+ if (!adev->mman.buffer_funcs_enabled) {
+ dev_err(adev->dev,
+ "Trying to clear memory with ring turned off.\n");
+ return -EINVAL;
}
- amdgpu_ring_pad_ib(ring, &job->ibs[0]);
- WARN_ON(job->ibs[0].length_dw > num_dw);
- r = amdgpu_job_submit(job, &adev->mman.entity,
- AMDGPU_FENCE_OWNER_UNDEFINED, fence);
- if (r)
- goto error_free;
+ amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &dst);
- return 0;
+ mutex_lock(&adev->mman.gtt_window_lock);
+ while (dst.remaining) {
+ struct dma_fence *next;
+ uint64_t cur_size, to;
-error_free:
- amdgpu_job_free(job);
+ /* Never fill more than 256MiB at once to avoid timeouts */
+ cur_size = min(dst.size, 256ULL << 20);
+
+ r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &dst,
+ 1, ring, false, &cur_size, &to);
+ if (r)
+ goto error;
+
+ r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,
+ &next, true, delayed, k_job_id);
+ if (r)
+ goto error;
+
+ dma_fence_put(fence);
+ fence = next;
+
+ amdgpu_res_next(&dst, cur_size);
+ }
+error:
+ mutex_unlock(&adev->mman.gtt_window_lock);
+ if (f)
+ *f = dma_fence_get(fence);
+ dma_fence_put(fence);
return r;
}
@@ -2080,7 +2474,7 @@ int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type)
man = ttm_manager_type(&adev->mman.bdev, mem_type);
break;
default:
- DRM_ERROR("Trying to evict invalid memory type\n");
+ dev_err(adev->dev, "Trying to evict invalid memory type\n");
return -EINVAL;
}
@@ -2089,73 +2483,13 @@ int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type)
#if defined(CONFIG_DEBUG_FS)
-static int amdgpu_mm_vram_table_show(struct seq_file *m, void *unused)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
- struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
- TTM_PL_VRAM);
- struct drm_printer p = drm_seq_file_printer(m);
-
- man->func->debug(man, &p);
- return 0;
-}
-
static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+ struct amdgpu_device *adev = m->private;
return ttm_pool_debugfs(&adev->mman.bdev.pool, m);
}
-static int amdgpu_mm_tt_table_show(struct seq_file *m, void *unused)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
- struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
- TTM_PL_TT);
- struct drm_printer p = drm_seq_file_printer(m);
-
- man->func->debug(man, &p);
- return 0;
-}
-
-static int amdgpu_mm_gds_table_show(struct seq_file *m, void *unused)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
- struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
- AMDGPU_PL_GDS);
- struct drm_printer p = drm_seq_file_printer(m);
-
- man->func->debug(man, &p);
- return 0;
-}
-
-static int amdgpu_mm_gws_table_show(struct seq_file *m, void *unused)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
- struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
- AMDGPU_PL_GWS);
- struct drm_printer p = drm_seq_file_printer(m);
-
- man->func->debug(man, &p);
- return 0;
-}
-
-static int amdgpu_mm_oa_table_show(struct seq_file *m, void *unused)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
- struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
- AMDGPU_PL_OA);
- struct drm_printer p = drm_seq_file_printer(m);
-
- man->func->debug(man, &p);
- return 0;
-}
-
-DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_vram_table);
-DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_tt_table);
-DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_gds_table);
-DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_gws_table);
-DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_oa_table);
DEFINE_SHOW_ATTRIBUTE(amdgpu_ttm_page_pool);
/*
@@ -2265,7 +2599,7 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
struct page *p;
void *ptr;
- bytes = bytes < size ? bytes : size;
+ bytes = min(bytes, size);
/* Translate the bus address to a physical address. If
* the domain is NULL it means there is no IOMMU active
@@ -2281,9 +2615,9 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
if (p->mapping != adev->mman.bdev.dev_mapping)
return -EPERM;
- ptr = kmap(p);
+ ptr = kmap_local_page(p);
r = copy_to_user(buf, ptr + off, bytes);
- kunmap(p);
+ kunmap_local(ptr);
if (r)
return -EFAULT;
@@ -2320,7 +2654,7 @@ static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,
struct page *p;
void *ptr;
- bytes = bytes < size ? bytes : size;
+ bytes = min(bytes, size);
addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
@@ -2332,9 +2666,9 @@ static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,
if (p->mapping != adev->mman.bdev.dev_mapping)
return -EPERM;
- ptr = kmap(p);
+ ptr = kmap_local_page(p);
r = copy_from_user(ptr + off, buf, bytes);
- kunmap(p);
+ kunmap_local(ptr);
if (r)
return -EFAULT;
@@ -2365,17 +2699,23 @@ void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
&amdgpu_ttm_vram_fops, adev->gmc.mc_vram_size);
debugfs_create_file("amdgpu_iomem", 0444, root, adev,
&amdgpu_ttm_iomem_fops);
- debugfs_create_file("amdgpu_vram_mm", 0444, root, adev,
- &amdgpu_mm_vram_table_fops);
- debugfs_create_file("amdgpu_gtt_mm", 0444, root, adev,
- &amdgpu_mm_tt_table_fops);
- debugfs_create_file("amdgpu_gds_mm", 0444, root, adev,
- &amdgpu_mm_gds_table_fops);
- debugfs_create_file("amdgpu_gws_mm", 0444, root, adev,
- &amdgpu_mm_gws_table_fops);
- debugfs_create_file("amdgpu_oa_mm", 0444, root, adev,
- &amdgpu_mm_oa_table_fops);
debugfs_create_file("ttm_page_pool", 0444, root, adev,
&amdgpu_ttm_page_pool_fops);
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
+ TTM_PL_VRAM),
+ root, "amdgpu_vram_mm");
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
+ TTM_PL_TT),
+ root, "amdgpu_gtt_mm");
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
+ AMDGPU_PL_GDS),
+ root, "amdgpu_gds_mm");
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
+ AMDGPU_PL_GWS),
+ root, "amdgpu_gws_mm");
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
+ AMDGPU_PL_OA),
+ root, "amdgpu_oa_mm");
+
#endif
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 7346ecff4438..577ee04ce0bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -26,42 +26,35 @@
#include <linux/dma-direction.h>
#include <drm/gpu_scheduler.h>
-#include "amdgpu.h"
+#include <drm/ttm/ttm_placement.h>
+#include "amdgpu_vram_mgr.h"
+#include "amdgpu_hmm.h"
#define AMDGPU_PL_GDS (TTM_PL_PRIV + 0)
#define AMDGPU_PL_GWS (TTM_PL_PRIV + 1)
#define AMDGPU_PL_OA (TTM_PL_PRIV + 2)
#define AMDGPU_PL_PREEMPT (TTM_PL_PRIV + 3)
+#define AMDGPU_PL_DOORBELL (TTM_PL_PRIV + 4)
+#define AMDGPU_PL_MMIO_REMAP (TTM_PL_PRIV + 5)
+#define __AMDGPU_PL_NUM (TTM_PL_PRIV + 6)
#define AMDGPU_GTT_MAX_TRANSFER_SIZE 512
#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2
-#define AMDGPU_POISON 0xd0bed0be
+extern const struct attribute_group amdgpu_vram_mgr_attr_group;
+extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
-struct amdgpu_vram_mgr {
- struct ttm_resource_manager manager;
- struct drm_mm mm;
- spinlock_t lock;
- struct list_head reservations_pending;
- struct list_head reserved_pages;
- atomic64_t usage;
- atomic64_t vis_usage;
-};
+struct hmm_range;
struct amdgpu_gtt_mgr {
struct ttm_resource_manager manager;
struct drm_mm mm;
spinlock_t lock;
- atomic64_t used;
-};
-
-struct amdgpu_preempt_mgr {
- struct ttm_resource_manager manager;
- atomic64_t used;
};
struct amdgpu_mman {
struct ttm_device bdev;
+ struct ttm_pool *ttm_pools;
bool initialized;
void __iomem *aper_base_kaddr;
@@ -71,12 +64,14 @@ struct amdgpu_mman {
bool buffer_funcs_enabled;
struct mutex gtt_window_lock;
- /* Scheduler entity for buffer moves */
- struct drm_sched_entity entity;
+ /* High priority scheduler entity for buffer moves */
+ struct drm_sched_entity high_pr;
+ /* Low priority scheduler entity for VRAM clearing */
+ struct drm_sched_entity low_pr;
struct amdgpu_vram_mgr vram_mgr;
struct amdgpu_gtt_mgr gtt_mgr;
- struct amdgpu_preempt_mgr preempt_mgr;
+ struct ttm_resource_manager preempt_mgr;
uint64_t stolen_vga_size;
struct amdgpu_bo *stolen_vga_memory;
@@ -88,16 +83,25 @@ struct amdgpu_mman {
uint64_t stolen_reserved_offset;
uint64_t stolen_reserved_size;
- /* discovery */
- uint8_t *discovery_bin;
- uint32_t discovery_tmr_size;
- struct amdgpu_bo *discovery_memory;
+ /* fw reserved memory */
+ struct amdgpu_bo *fw_reserved_memory;
+ struct amdgpu_bo *fw_reserved_memory_extend;
/* firmware VRAM reservation */
u64 fw_vram_usage_start_offset;
u64 fw_vram_usage_size;
struct amdgpu_bo *fw_vram_usage_reserved_bo;
void *fw_vram_usage_va;
+
+ /* driver VRAM reservation */
+ u64 drv_vram_usage_start_offset;
+ u64 drv_vram_usage_size;
+ struct amdgpu_bo *drv_vram_usage_reserved_bo;
+ void *drv_vram_usage_va;
+
+ /* PAGE_SIZE'd BO for process memory r/w over SDMA. */
+ struct amdgpu_bo *sdma_access_bo;
+ void *sdma_access_ptr;
};
struct amdgpu_copy_mem {
@@ -106,6 +110,23 @@ struct amdgpu_copy_mem {
unsigned long offset;
};
+#define AMDGPU_COPY_FLAGS_TMZ (1 << 0)
+#define AMDGPU_COPY_FLAGS_READ_DECOMPRESSED (1 << 1)
+#define AMDGPU_COPY_FLAGS_WRITE_COMPRESSED (1 << 2)
+#define AMDGPU_COPY_FLAGS_MAX_COMPRESSED_SHIFT 3
+#define AMDGPU_COPY_FLAGS_MAX_COMPRESSED_MASK 0x03
+#define AMDGPU_COPY_FLAGS_NUMBER_TYPE_SHIFT 5
+#define AMDGPU_COPY_FLAGS_NUMBER_TYPE_MASK 0x07
+#define AMDGPU_COPY_FLAGS_DATA_FORMAT_SHIFT 8
+#define AMDGPU_COPY_FLAGS_DATA_FORMAT_MASK 0x3f
+#define AMDGPU_COPY_FLAGS_WRITE_COMPRESS_DISABLE_SHIFT 14
+#define AMDGPU_COPY_FLAGS_WRITE_COMPRESS_DISABLE_MASK 0x1
+
+#define AMDGPU_COPY_FLAGS_SET(field, value) \
+ (((__u32)(value) & AMDGPU_COPY_FLAGS_##field##_MASK) << AMDGPU_COPY_FLAGS_##field##_SHIFT)
+#define AMDGPU_COPY_FLAGS_GET(value, field) \
+ (((__u32)(value) >> AMDGPU_COPY_FLAGS_##field##_SHIFT) & AMDGPU_COPY_FLAGS_##field##_MASK)
+
int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size);
void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev);
int amdgpu_preempt_mgr_init(struct amdgpu_device *adev);
@@ -114,8 +135,7 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev);
void amdgpu_vram_mgr_fini(struct amdgpu_device *adev);
bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem);
-uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager *man);
-int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man);
+void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr);
uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man);
@@ -129,54 +149,53 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
void amdgpu_vram_mgr_free_sgt(struct device *dev,
enum dma_data_direction dir,
struct sg_table *sgt);
-uint64_t amdgpu_vram_mgr_usage(struct ttm_resource_manager *man);
-uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_resource_manager *man);
-int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man,
+uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr *mgr);
+int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr,
uint64_t start, uint64_t size);
-int amdgpu_vram_mgr_query_page_status(struct ttm_resource_manager *man,
+int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
uint64_t start);
+void amdgpu_vram_mgr_clear_reset_blocks(struct amdgpu_device *adev);
+
+bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
+ struct ttm_resource *res);
int amdgpu_ttm_init(struct amdgpu_device *adev);
void amdgpu_ttm_fini(struct amdgpu_device *adev);
void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
bool enable);
-
int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
uint64_t dst_offset, uint32_t byte_count,
struct dma_resv *resv,
struct dma_fence **fence, bool direct_submit,
- bool vm_needs_flush, bool tmz);
-int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
- const struct amdgpu_copy_mem *src,
- const struct amdgpu_copy_mem *dst,
- uint64_t size, bool tmz,
- struct dma_resv *resv,
- struct dma_fence **f);
+ bool vm_needs_flush, uint32_t copy_flags);
+int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
+ struct dma_resv *resv,
+ struct dma_fence **fence);
int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32_t src_data,
struct dma_resv *resv,
- struct dma_fence **fence);
+ struct dma_fence **fence,
+ bool delayed,
+ u64 k_job_id);
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
-int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
+void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type);
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
-int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages);
-bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm);
+int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
+ struct amdgpu_hmm_range *range);
#else
static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
- struct page **pages)
+ struct amdgpu_hmm_range *range)
{
return -EPERM;
}
-static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
-{
- return false;
-}
#endif
-void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
+void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct amdgpu_hmm_range *range);
+int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,
+ uint64_t *user_addr);
int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
uint64_t addr, uint32_t flags);
bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index ca3350502618..e96f24e9ad57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -28,6 +28,13 @@
#include "amdgpu.h"
#include "amdgpu_ucode.h"
+#define AMDGPU_UCODE_NAME_MAX (128)
+
+static const struct kicker_device kicker_device_list[] = {
+ {0x744B, 0x00},
+ {0x7551, 0xC8}
+};
+
static void amdgpu_ucode_print_common_hdr(const struct common_firmware_header *hdr)
{
DRM_DEBUG("size_bytes: %u\n", le32_to_cpu(hdr->size_bytes));
@@ -115,6 +122,12 @@ void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr)
le32_to_cpu(gfx_hdr->ucode_feature_version));
DRM_DEBUG("jt_offset: %u\n", le32_to_cpu(gfx_hdr->jt_offset));
DRM_DEBUG("jt_size: %u\n", le32_to_cpu(gfx_hdr->jt_size));
+ } else if (version_major == 2) {
+ const struct gfx_firmware_header_v2_0 *gfx_hdr =
+ container_of(hdr, struct gfx_firmware_header_v2_0, header);
+
+ DRM_DEBUG("ucode_feature_version: %u\n",
+ le32_to_cpu(gfx_hdr->ucode_feature_version));
} else {
DRM_ERROR("Unknown GFX ucode version: %u.%u\n", version_major, version_minor);
}
@@ -145,70 +158,138 @@ void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr)
} else if (version_major == 2) {
const struct rlc_firmware_header_v2_0 *rlc_hdr =
container_of(hdr, struct rlc_firmware_header_v2_0, header);
+ const struct rlc_firmware_header_v2_1 *rlc_hdr_v2_1 =
+ container_of(rlc_hdr, struct rlc_firmware_header_v2_1, v2_0);
+ const struct rlc_firmware_header_v2_2 *rlc_hdr_v2_2 =
+ container_of(rlc_hdr_v2_1, struct rlc_firmware_header_v2_2, v2_1);
+ const struct rlc_firmware_header_v2_3 *rlc_hdr_v2_3 =
+ container_of(rlc_hdr_v2_2, struct rlc_firmware_header_v2_3, v2_2);
+ const struct rlc_firmware_header_v2_4 *rlc_hdr_v2_4 =
+ container_of(rlc_hdr_v2_3, struct rlc_firmware_header_v2_4, v2_3);
- DRM_DEBUG("ucode_feature_version: %u\n",
- le32_to_cpu(rlc_hdr->ucode_feature_version));
- DRM_DEBUG("jt_offset: %u\n", le32_to_cpu(rlc_hdr->jt_offset));
- DRM_DEBUG("jt_size: %u\n", le32_to_cpu(rlc_hdr->jt_size));
- DRM_DEBUG("save_and_restore_offset: %u\n",
- le32_to_cpu(rlc_hdr->save_and_restore_offset));
- DRM_DEBUG("clear_state_descriptor_offset: %u\n",
- le32_to_cpu(rlc_hdr->clear_state_descriptor_offset));
- DRM_DEBUG("avail_scratch_ram_locations: %u\n",
- le32_to_cpu(rlc_hdr->avail_scratch_ram_locations));
- DRM_DEBUG("reg_restore_list_size: %u\n",
- le32_to_cpu(rlc_hdr->reg_restore_list_size));
- DRM_DEBUG("reg_list_format_start: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_format_start));
- DRM_DEBUG("reg_list_format_separate_start: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_format_separate_start));
- DRM_DEBUG("starting_offsets_start: %u\n",
- le32_to_cpu(rlc_hdr->starting_offsets_start));
- DRM_DEBUG("reg_list_format_size_bytes: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_format_size_bytes));
- DRM_DEBUG("reg_list_format_array_offset_bytes: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
- DRM_DEBUG("reg_list_size_bytes: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_size_bytes));
- DRM_DEBUG("reg_list_array_offset_bytes: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
- DRM_DEBUG("reg_list_format_separate_size_bytes: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_format_separate_size_bytes));
- DRM_DEBUG("reg_list_format_separate_array_offset_bytes: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_format_separate_array_offset_bytes));
- DRM_DEBUG("reg_list_separate_size_bytes: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes));
- DRM_DEBUG("reg_list_separate_array_offset_bytes: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_separate_array_offset_bytes));
- if (version_minor == 1) {
- const struct rlc_firmware_header_v2_1 *v2_1 =
- container_of(rlc_hdr, struct rlc_firmware_header_v2_1, v2_0);
+ switch (version_minor) {
+ case 0:
+ /* rlc_hdr v2_0 */
+ DRM_DEBUG("ucode_feature_version: %u\n",
+ le32_to_cpu(rlc_hdr->ucode_feature_version));
+ DRM_DEBUG("jt_offset: %u\n", le32_to_cpu(rlc_hdr->jt_offset));
+ DRM_DEBUG("jt_size: %u\n", le32_to_cpu(rlc_hdr->jt_size));
+ DRM_DEBUG("save_and_restore_offset: %u\n",
+ le32_to_cpu(rlc_hdr->save_and_restore_offset));
+ DRM_DEBUG("clear_state_descriptor_offset: %u\n",
+ le32_to_cpu(rlc_hdr->clear_state_descriptor_offset));
+ DRM_DEBUG("avail_scratch_ram_locations: %u\n",
+ le32_to_cpu(rlc_hdr->avail_scratch_ram_locations));
+ DRM_DEBUG("reg_restore_list_size: %u\n",
+ le32_to_cpu(rlc_hdr->reg_restore_list_size));
+ DRM_DEBUG("reg_list_format_start: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_format_start));
+ DRM_DEBUG("reg_list_format_separate_start: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_format_separate_start));
+ DRM_DEBUG("starting_offsets_start: %u\n",
+ le32_to_cpu(rlc_hdr->starting_offsets_start));
+ DRM_DEBUG("reg_list_format_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_format_size_bytes));
+ DRM_DEBUG("reg_list_format_array_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
+ DRM_DEBUG("reg_list_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_size_bytes));
+ DRM_DEBUG("reg_list_array_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
+ DRM_DEBUG("reg_list_format_separate_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_format_separate_size_bytes));
+ DRM_DEBUG("reg_list_format_separate_array_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_format_separate_array_offset_bytes));
+ DRM_DEBUG("reg_list_separate_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes));
+ DRM_DEBUG("reg_list_separate_array_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_separate_array_offset_bytes));
+ break;
+ case 1:
+ /* rlc_hdr v2_1 */
DRM_DEBUG("reg_list_format_direct_reg_list_length: %u\n",
- le32_to_cpu(v2_1->reg_list_format_direct_reg_list_length));
+ le32_to_cpu(rlc_hdr_v2_1->reg_list_format_direct_reg_list_length));
DRM_DEBUG("save_restore_list_cntl_ucode_ver: %u\n",
- le32_to_cpu(v2_1->save_restore_list_cntl_ucode_ver));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_ucode_ver));
DRM_DEBUG("save_restore_list_cntl_feature_ver: %u\n",
- le32_to_cpu(v2_1->save_restore_list_cntl_feature_ver));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_feature_ver));
DRM_DEBUG("save_restore_list_cntl_size_bytes %u\n",
- le32_to_cpu(v2_1->save_restore_list_cntl_size_bytes));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_size_bytes));
DRM_DEBUG("save_restore_list_cntl_offset_bytes: %u\n",
- le32_to_cpu(v2_1->save_restore_list_cntl_offset_bytes));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_offset_bytes));
DRM_DEBUG("save_restore_list_gpm_ucode_ver: %u\n",
- le32_to_cpu(v2_1->save_restore_list_gpm_ucode_ver));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_ucode_ver));
DRM_DEBUG("save_restore_list_gpm_feature_ver: %u\n",
- le32_to_cpu(v2_1->save_restore_list_gpm_feature_ver));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_feature_ver));
DRM_DEBUG("save_restore_list_gpm_size_bytes %u\n",
- le32_to_cpu(v2_1->save_restore_list_gpm_size_bytes));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_size_bytes));
DRM_DEBUG("save_restore_list_gpm_offset_bytes: %u\n",
- le32_to_cpu(v2_1->save_restore_list_gpm_offset_bytes));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_offset_bytes));
DRM_DEBUG("save_restore_list_srm_ucode_ver: %u\n",
- le32_to_cpu(v2_1->save_restore_list_srm_ucode_ver));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_ucode_ver));
DRM_DEBUG("save_restore_list_srm_feature_ver: %u\n",
- le32_to_cpu(v2_1->save_restore_list_srm_feature_ver));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_feature_ver));
DRM_DEBUG("save_restore_list_srm_size_bytes %u\n",
- le32_to_cpu(v2_1->save_restore_list_srm_size_bytes));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_size_bytes));
DRM_DEBUG("save_restore_list_srm_offset_bytes: %u\n",
- le32_to_cpu(v2_1->save_restore_list_srm_offset_bytes));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_offset_bytes));
+ break;
+ case 2:
+ /* rlc_hdr v2_2 */
+ DRM_DEBUG("rlc_iram_ucode_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_2->rlc_iram_ucode_size_bytes));
+ DRM_DEBUG("rlc_iram_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_2->rlc_iram_ucode_offset_bytes));
+ DRM_DEBUG("rlc_dram_ucode_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_2->rlc_dram_ucode_size_bytes));
+ DRM_DEBUG("rlc_dram_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_2->rlc_dram_ucode_offset_bytes));
+ break;
+ case 3:
+ /* rlc_hdr v2_3 */
+ DRM_DEBUG("rlcp_ucode_version: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_version));
+ DRM_DEBUG("rlcp_ucode_feature_version: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_feature_version));
+ DRM_DEBUG("rlcp_ucode_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_size_bytes));
+ DRM_DEBUG("rlcp_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_offset_bytes));
+ DRM_DEBUG("rlcv_ucode_version: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_version));
+ DRM_DEBUG("rlcv_ucode_feature_version: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_feature_version));
+ DRM_DEBUG("rlcv_ucode_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_size_bytes));
+ DRM_DEBUG("rlcv_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_offset_bytes));
+ break;
+ case 4:
+ /* rlc_hdr v2_4 */
+ DRM_DEBUG("global_tap_delays_ucode_size_bytes :%u\n",
+ le32_to_cpu(rlc_hdr_v2_4->global_tap_delays_ucode_size_bytes));
+ DRM_DEBUG("global_tap_delays_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_4->global_tap_delays_ucode_offset_bytes));
+ DRM_DEBUG("se0_tap_delays_ucode_size_bytes :%u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se0_tap_delays_ucode_size_bytes));
+ DRM_DEBUG("se0_tap_delays_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se0_tap_delays_ucode_offset_bytes));
+ DRM_DEBUG("se1_tap_delays_ucode_size_bytes :%u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se1_tap_delays_ucode_size_bytes));
+ DRM_DEBUG("se1_tap_delays_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se1_tap_delays_ucode_offset_bytes));
+ DRM_DEBUG("se2_tap_delays_ucode_size_bytes :%u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se2_tap_delays_ucode_size_bytes));
+ DRM_DEBUG("se2_tap_delays_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se2_tap_delays_ucode_offset_bytes));
+ DRM_DEBUG("se3_tap_delays_ucode_size_bytes :%u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se3_tap_delays_ucode_size_bytes));
+ DRM_DEBUG("se3_tap_delays_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se3_tap_delays_ucode_offset_bytes));
+ break;
+ default:
+ DRM_ERROR("Unknown RLC v2 ucode: v2.%u\n", version_minor);
+ break;
}
} else {
DRM_ERROR("Unknown RLC ucode version: %u.%u\n", version_major, version_minor);
@@ -238,6 +319,23 @@ void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr)
container_of(sdma_hdr, struct sdma_firmware_header_v1_1, v1_0);
DRM_DEBUG("digest_size: %u\n", le32_to_cpu(sdma_v1_1_hdr->digest_size));
}
+ } else if (version_major == 2) {
+ const struct sdma_firmware_header_v2_0 *sdma_hdr =
+ container_of(hdr, struct sdma_firmware_header_v2_0, header);
+
+ DRM_DEBUG("ucode_feature_version: %u\n",
+ le32_to_cpu(sdma_hdr->ucode_feature_version));
+ DRM_DEBUG("ctx_jt_offset: %u\n", le32_to_cpu(sdma_hdr->ctx_jt_offset));
+ DRM_DEBUG("ctx_jt_size: %u\n", le32_to_cpu(sdma_hdr->ctx_jt_size));
+ DRM_DEBUG("ctl_ucode_offset: %u\n", le32_to_cpu(sdma_hdr->ctl_ucode_offset));
+ DRM_DEBUG("ctl_jt_offset: %u\n", le32_to_cpu(sdma_hdr->ctl_jt_offset));
+ DRM_DEBUG("ctl_jt_size: %u\n", le32_to_cpu(sdma_hdr->ctl_jt_size));
+ } else if (version_major == 3) {
+ const struct sdma_firmware_header_v3_0 *sdma_hdr =
+ container_of(hdr, struct sdma_firmware_header_v3_0, header);
+
+ DRM_DEBUG("ucode_reversion: %u\n",
+ le32_to_cpu(sdma_hdr->ucode_feature_version));
} else {
DRM_ERROR("Unknown SDMA ucode version: %u.%u\n",
version_major, version_minor);
@@ -248,6 +346,8 @@ void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr)
{
uint16_t version_major = le16_to_cpu(hdr->header_version_major);
uint16_t version_minor = le16_to_cpu(hdr->header_version_minor);
+ uint32_t fw_index;
+ const struct psp_fw_bin_desc *desc;
DRM_DEBUG("PSP\n");
amdgpu_ucode_print_common_hdr(hdr);
@@ -312,6 +412,77 @@ void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr)
DRM_DEBUG("spl_size_bytes: %u\n",
le32_to_cpu(psp_hdr_v1_3->spl.size_bytes));
}
+ } else if (version_major == 2) {
+ const struct psp_firmware_header_v2_0 *psp_hdr_v2_0 =
+ container_of(hdr, struct psp_firmware_header_v2_0, header);
+ for (fw_index = 0; fw_index < le32_to_cpu(psp_hdr_v2_0->psp_fw_bin_count); fw_index++) {
+ desc = &(psp_hdr_v2_0->psp_fw_bin[fw_index]);
+ switch (desc->fw_type) {
+ case PSP_FW_TYPE_PSP_SOS:
+ DRM_DEBUG("psp_sos_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_sos_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ case PSP_FW_TYPE_PSP_SYS_DRV:
+ DRM_DEBUG("psp_sys_drv_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_sys_drv_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ case PSP_FW_TYPE_PSP_KDB:
+ DRM_DEBUG("psp_kdb_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_kdb_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ case PSP_FW_TYPE_PSP_TOC:
+ DRM_DEBUG("psp_toc_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_toc_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ case PSP_FW_TYPE_PSP_SPL:
+ DRM_DEBUG("psp_spl_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_spl_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ case PSP_FW_TYPE_PSP_RL:
+ DRM_DEBUG("psp_rl_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_rl_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ case PSP_FW_TYPE_PSP_SOC_DRV:
+ DRM_DEBUG("psp_soc_drv_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_soc_drv_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ case PSP_FW_TYPE_PSP_INTF_DRV:
+ DRM_DEBUG("psp_intf_drv_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_intf_drv_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ case PSP_FW_TYPE_PSP_DBG_DRV:
+ DRM_DEBUG("psp_dbg_drv_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_dbg_drv_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ case PSP_FW_TYPE_PSP_RAS_DRV:
+ DRM_DEBUG("psp_ras_drv_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_ras_drv_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ default:
+ DRM_DEBUG("Unsupported PSP fw type: %d\n", desc->fw_type);
+ break;
+ }
+ }
} else {
DRM_ERROR("Unknown PSP ucode version: %u.%u\n",
version_major, version_minor);
@@ -339,7 +510,7 @@ void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr)
}
}
-int amdgpu_ucode_validate(const struct firmware *fw)
+static int amdgpu_ucode_validate(const struct firmware *fw)
{
const struct common_firmware_header *hdr =
(const struct common_firmware_header *)fw->data;
@@ -355,8 +526,8 @@ bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
{
if ((hdr->common.header_version_major == hdr_major) &&
(hdr->common.header_version_minor == hdr_minor))
- return false;
- return true;
+ return true;
+ return false;
}
enum amdgpu_firmware_load_type
@@ -389,26 +560,6 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
case CHIP_POLARIS12:
case CHIP_VEGAM:
return AMDGPU_FW_LOAD_SMU;
- case CHIP_VEGA10:
- case CHIP_RAVEN:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_ARCTURUS:
- case CHIP_RENOIR:
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
- case CHIP_ALDEBARAN:
- case CHIP_BEIGE_GOBY:
- case CHIP_YELLOW_CARP:
- if (!load_type)
- return AMDGPU_FW_LOAD_DIRECT;
- else
- return AMDGPU_FW_LOAD_PSP;
case CHIP_CYAN_SKILLFISH:
if (!(load_type &&
adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2))
@@ -418,6 +569,8 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
default:
if (!load_type)
return AMDGPU_FW_LOAD_DIRECT;
+ else if (load_type == 3)
+ return AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO;
else
return AMDGPU_FW_LOAD_PSP;
}
@@ -442,6 +595,10 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id)
return "SDMA6";
case AMDGPU_UCODE_ID_SDMA7:
return "SDMA7";
+ case AMDGPU_UCODE_ID_SDMA_UCODE_TH0:
+ return "SDMA_CTX";
+ case AMDGPU_UCODE_ID_SDMA_UCODE_TH1:
+ return "SDMA_CTL";
case AMDGPU_UCODE_ID_CP_CE:
return "CP_CE";
case AMDGPU_UCODE_ID_CP_PFP:
@@ -460,6 +617,10 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id)
return "CP_MES";
case AMDGPU_UCODE_ID_CP_MES_DATA:
return "CP_MES_DATA";
+ case AMDGPU_UCODE_ID_CP_MES1:
+ return "CP_MES_KIQ";
+ case AMDGPU_UCODE_ID_CP_MES1_DATA:
+ return "CP_MES_KIQ_DATA";
case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL:
return "RLC_RESTORE_LIST_CNTL";
case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM:
@@ -472,10 +633,32 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id)
return "RLC_DRAM";
case AMDGPU_UCODE_ID_RLC_G:
return "RLC_G";
+ case AMDGPU_UCODE_ID_RLC_P:
+ return "RLC_P";
+ case AMDGPU_UCODE_ID_RLC_V:
+ return "RLC_V";
+ case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS:
+ return "GLOBAL_TAP_DELAYS";
+ case AMDGPU_UCODE_ID_SE0_TAP_DELAYS:
+ return "SE0_TAP_DELAYS";
+ case AMDGPU_UCODE_ID_SE1_TAP_DELAYS:
+ return "SE1_TAP_DELAYS";
+ case AMDGPU_UCODE_ID_SE2_TAP_DELAYS:
+ return "SE2_TAP_DELAYS";
+ case AMDGPU_UCODE_ID_SE3_TAP_DELAYS:
+ return "SE3_TAP_DELAYS";
+ case AMDGPU_UCODE_ID_IMU_I:
+ return "IMU_I";
+ case AMDGPU_UCODE_ID_IMU_D:
+ return "IMU_D";
case AMDGPU_UCODE_ID_STORAGE:
return "STORAGE";
case AMDGPU_UCODE_ID_SMC:
return "SMC";
+ case AMDGPU_UCODE_ID_PPTABLE:
+ return "PPTABLE";
+ case AMDGPU_UCODE_ID_P2S_TABLE:
+ return "P2STABLE";
case AMDGPU_UCODE_ID_UVD:
return "UVD";
case AMDGPU_UCODE_ID_UVD1:
@@ -496,20 +679,72 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id)
return "VCN1_RAM";
case AMDGPU_UCODE_ID_DMCUB:
return "DMCUB";
+ case AMDGPU_UCODE_ID_CAP:
+ return "CAP";
+ case AMDGPU_UCODE_ID_VPE_CTX:
+ return "VPE_CTX";
+ case AMDGPU_UCODE_ID_VPE_CTL:
+ return "VPE_CTL";
+ case AMDGPU_UCODE_ID_VPE:
+ return "VPE";
+ case AMDGPU_UCODE_ID_UMSCH_MM_UCODE:
+ return "UMSCH_MM_UCODE";
+ case AMDGPU_UCODE_ID_UMSCH_MM_DATA:
+ return "UMSCH_MM_DATA";
+ case AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER:
+ return "UMSCH_MM_CMD_BUFFER";
+ case AMDGPU_UCODE_ID_JPEG_RAM:
+ return "JPEG";
+ case AMDGPU_UCODE_ID_SDMA_RS64:
+ return "RS64_SDMA";
+ case AMDGPU_UCODE_ID_CP_RS64_PFP:
+ return "RS64_PFP";
+ case AMDGPU_UCODE_ID_CP_RS64_ME:
+ return "RS64_ME";
+ case AMDGPU_UCODE_ID_CP_RS64_MEC:
+ return "RS64_MEC";
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
+ return "RS64_PFP_P0_STACK";
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
+ return "RS64_PFP_P1_STACK";
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
+ return "RS64_ME_P0_STACK";
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
+ return "RS64_ME_P1_STACK";
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
+ return "RS64_MEC_P0_STACK";
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
+ return "RS64_MEC_P1_STACK";
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
+ return "RS64_MEC_P2_STACK";
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
+ return "RS64_MEC_P3_STACK";
+ case AMDGPU_UCODE_ID_ISP:
+ return "ISP";
default:
return "UNKNOWN UCODE";
}
}
+static inline int amdgpu_ucode_is_valid(uint32_t fw_version)
+{
+ if (!fw_version)
+ return -EINVAL;
+
+ return 0;
+}
+
#define FW_VERSION_ATTR(name, mode, field) \
static ssize_t show_##name(struct device *dev, \
- struct device_attribute *attr, \
- char *buf) \
+ struct device_attribute *attr, char *buf) \
{ \
struct drm_device *ddev = dev_get_drvdata(dev); \
struct amdgpu_device *adev = drm_to_adev(ddev); \
\
- return sysfs_emit(buf, "0x%08x\n", adev->field); \
+ if (!buf) \
+ return amdgpu_ucode_is_valid(adev->field); \
+ \
+ return sysfs_emit(buf, "0x%08x\n", adev->field); \
} \
static DEVICE_ATTR(name, mode, show_##name, NULL)
@@ -525,6 +760,7 @@ FW_VERSION_ATTR(rlc_srlg_fw_version, 0444, gfx.rlc_srlg_fw_version);
FW_VERSION_ATTR(rlc_srls_fw_version, 0444, gfx.rlc_srls_fw_version);
FW_VERSION_ATTR(mec_fw_version, 0444, gfx.mec_fw_version);
FW_VERSION_ATTR(mec2_fw_version, 0444, gfx.mec2_fw_version);
+FW_VERSION_ATTR(imu_fw_version, 0444, gfx.imu_fw_version);
FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos.fw_version);
FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd_context.bin_desc.fw_version);
FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ras_context.context.bin_desc.fw_version);
@@ -534,6 +770,10 @@ FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version);
FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version);
FW_VERSION_ATTR(vcn_fw_version, 0444, vcn.fw_version);
FW_VERSION_ATTR(dmcu_fw_version, 0444, dm.dmcu_fw_version);
+FW_VERSION_ATTR(dmcub_fw_version, 0444, dm.dmcub_fw_version);
+FW_VERSION_ATTR(mes_fw_version, 0444, mes.sched_version & AMDGPU_MES_VERSION_MASK);
+FW_VERSION_ATTR(mes_kiq_fw_version, 0444, mes.kiq_version & AMDGPU_MES_VERSION_MASK);
+FW_VERSION_ATTR(pldm_fw_version, 0444, firmware.pldm_version);
static struct attribute *fw_attrs[] = {
&dev_attr_vce_fw_version.attr, &dev_attr_uvd_fw_version.attr,
@@ -546,12 +786,30 @@ static struct attribute *fw_attrs[] = {
&dev_attr_ta_ras_fw_version.attr, &dev_attr_ta_xgmi_fw_version.attr,
&dev_attr_smc_fw_version.attr, &dev_attr_sdma_fw_version.attr,
&dev_attr_sdma2_fw_version.attr, &dev_attr_vcn_fw_version.attr,
- &dev_attr_dmcu_fw_version.attr, NULL
+ &dev_attr_dmcu_fw_version.attr, &dev_attr_dmcub_fw_version.attr,
+ &dev_attr_imu_fw_version.attr, &dev_attr_mes_fw_version.attr,
+ &dev_attr_mes_kiq_fw_version.attr, &dev_attr_pldm_fw_version.attr,
+ NULL
};
+#define to_dev_attr(x) container_of(x, struct device_attribute, attr)
+
+static umode_t amdgpu_ucode_sys_visible(struct kobject *kobj,
+ struct attribute *attr, int idx)
+{
+ struct device_attribute *dev_attr = to_dev_attr(attr);
+ struct device *dev = kobj_to_dev(kobj);
+
+ if (dev_attr->show(dev, dev_attr, NULL) == -EINVAL)
+ return 0;
+
+ return attr->mode;
+}
+
static const struct attribute_group fw_attr_group = {
.name = "fw_version",
- .attrs = fw_attrs
+ .attrs = fw_attrs,
+ .is_visible = amdgpu_ucode_sys_visible
};
int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev)
@@ -570,12 +828,18 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
{
const struct common_firmware_header *header = NULL;
const struct gfx_firmware_header_v1_0 *cp_hdr = NULL;
+ const struct gfx_firmware_header_v2_0 *cpv2_hdr = NULL;
const struct dmcu_firmware_header_v1_0 *dmcu_hdr = NULL;
const struct dmcub_firmware_header_v1_0 *dmcub_hdr = NULL;
const struct mes_firmware_header_v1_0 *mes_hdr = NULL;
+ const struct sdma_firmware_header_v2_0 *sdma_hdr = NULL;
+ const struct sdma_firmware_header_v3_0 *sdmav3_hdr = NULL;
+ const struct imu_firmware_header_v1_0 *imu_hdr = NULL;
+ const struct vpe_firmware_header_v1_0 *vpe_hdr = NULL;
+ const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr = NULL;
u8 *ucode_addr;
- if (NULL == ucode->fw)
+ if (!ucode->fw)
return 0;
ucode->mc_addr = mc_addr;
@@ -586,12 +850,33 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
header = (const struct common_firmware_header *)ucode->fw->data;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
+ cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)ucode->fw->data;
dmcu_hdr = (const struct dmcu_firmware_header_v1_0 *)ucode->fw->data;
dmcub_hdr = (const struct dmcub_firmware_header_v1_0 *)ucode->fw->data;
mes_hdr = (const struct mes_firmware_header_v1_0 *)ucode->fw->data;
+ sdma_hdr = (const struct sdma_firmware_header_v2_0 *)ucode->fw->data;
+ sdmav3_hdr = (const struct sdma_firmware_header_v3_0 *)ucode->fw->data;
+ imu_hdr = (const struct imu_firmware_header_v1_0 *)ucode->fw->data;
+ vpe_hdr = (const struct vpe_firmware_header_v1_0 *)ucode->fw->data;
+ umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *)ucode->fw->data;
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
switch (ucode->ucode_id) {
+ case AMDGPU_UCODE_ID_SDMA_UCODE_TH0:
+ ucode->ucode_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_SDMA_UCODE_TH1:
+ ucode->ucode_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(sdma_hdr->ctl_ucode_offset);
+ break;
+ case AMDGPU_UCODE_ID_SDMA_RS64:
+ ucode->ucode_size = le32_to_cpu(sdmav3_hdr->ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(sdmav3_hdr->header.ucode_array_offset_bytes);
+ break;
case AMDGPU_UCODE_ID_CP_MEC1:
case AMDGPU_UCODE_ID_CP_MEC2:
ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes) -
@@ -626,6 +911,34 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
ucode->ucode_size = adev->gfx.rlc.rlc_dram_ucode_size_bytes;
ucode_addr = adev->gfx.rlc.rlc_dram_ucode;
break;
+ case AMDGPU_UCODE_ID_RLC_P:
+ ucode->ucode_size = adev->gfx.rlc.rlcp_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.rlcp_ucode;
+ break;
+ case AMDGPU_UCODE_ID_RLC_V:
+ ucode->ucode_size = adev->gfx.rlc.rlcv_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.rlcv_ucode;
+ break;
+ case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS:
+ ucode->ucode_size = adev->gfx.rlc.global_tap_delays_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.global_tap_delays_ucode;
+ break;
+ case AMDGPU_UCODE_ID_SE0_TAP_DELAYS:
+ ucode->ucode_size = adev->gfx.rlc.se0_tap_delays_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.se0_tap_delays_ucode;
+ break;
+ case AMDGPU_UCODE_ID_SE1_TAP_DELAYS:
+ ucode->ucode_size = adev->gfx.rlc.se1_tap_delays_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.se1_tap_delays_ucode;
+ break;
+ case AMDGPU_UCODE_ID_SE2_TAP_DELAYS:
+ ucode->ucode_size = adev->gfx.rlc.se2_tap_delays_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.se2_tap_delays_ucode;
+ break;
+ case AMDGPU_UCODE_ID_SE3_TAP_DELAYS:
+ ucode->ucode_size = adev->gfx.rlc.se3_tap_delays_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.se3_tap_delays_ucode;
+ break;
case AMDGPU_UCODE_ID_CP_MES:
ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
@@ -636,6 +949,16 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes);
break;
+ case AMDGPU_UCODE_ID_CP_MES1:
+ ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(mes_hdr->mes_ucode_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_MES1_DATA:
+ ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes);
+ break;
case AMDGPU_UCODE_ID_DMCU_ERAM:
ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes) -
le32_to_cpu(dmcu_hdr->intv_size_bytes);
@@ -653,6 +976,100 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(header->ucode_array_offset_bytes);
break;
+ case AMDGPU_UCODE_ID_PPTABLE:
+ ucode->ucode_size = ucode->fw->size;
+ ucode_addr = (u8 *)ucode->fw->data;
+ break;
+ case AMDGPU_UCODE_ID_P2S_TABLE:
+ ucode->ucode_size = ucode->fw->size;
+ ucode_addr = (u8 *)ucode->fw->data;
+ break;
+ case AMDGPU_UCODE_ID_IMU_I:
+ ucode->ucode_size = le32_to_cpu(imu_hdr->imu_iram_ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(imu_hdr->header.ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_IMU_D:
+ ucode->ucode_size = le32_to_cpu(imu_hdr->imu_dram_ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(imu_hdr->header.ucode_array_offset_bytes) +
+ le32_to_cpu(imu_hdr->imu_iram_ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_VPE_CTX:
+ ucode->ucode_size = le32_to_cpu(vpe_hdr->ctx_ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(vpe_hdr->header.ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_VPE_CTL:
+ ucode->ucode_size = le32_to_cpu(vpe_hdr->ctl_ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(vpe_hdr->ctl_ucode_offset);
+ break;
+ case AMDGPU_UCODE_ID_UMSCH_MM_UCODE:
+ ucode->ucode_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(umsch_mm_hdr->header.ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_UMSCH_MM_DATA:
+ ucode->ucode_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_offset_bytes);
+ break;
default:
ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
@@ -678,7 +1095,7 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode,
uint8_t *src_addr = NULL;
uint8_t *dst_addr = NULL;
- if (NULL == ucode->fw)
+ if (!ucode->fw)
return 0;
comm_hdr = (const struct common_firmware_header *)ucode->fw->data;
@@ -696,9 +1113,11 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode,
int amdgpu_ucode_create_bo(struct amdgpu_device *adev)
{
- if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) {
+ if ((adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) &&
+ (adev->firmware.load_type != AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)) {
amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE,
- amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
+ (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ?
+ AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
&adev->firmware.fw_buf,
&adev->firmware.fw_buf_mc,
&adev->firmware.fw_buf_ptr);
@@ -714,8 +1133,7 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev)
void amdgpu_ucode_free_bo(struct amdgpu_device *adev)
{
- if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT)
- amdgpu_bo_free_kernel(&adev->firmware.fw_buf,
+ amdgpu_bo_free_kernel(&adev->firmware.fw_buf,
&adev->firmware.fw_buf_mc,
&adev->firmware.fw_buf_ptr);
}
@@ -742,6 +1160,9 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM;
}
+ if (amdgpu_virt_xgmi_migrate_enabled(adev) && adev->firmware.fw_buf)
+ adev->firmware.fw_buf_mc = amdgpu_bo_fb_aper_addr(adev->firmware.fw_buf);
+
for (i = 0; i < adev->firmware.max_ucodes; i++) {
ucode = &adev->firmware.ucode[i];
if (ucode->fw) {
@@ -750,6 +1171,7 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
if (i == AMDGPU_UCODE_ID_CP_MEC1 &&
adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
const struct gfx_firmware_header_v1_0 *cp_hdr;
+
cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
amdgpu_ucode_patch_jt(ucode, adev->firmware.fw_buf_mc + fw_offset,
adev->firmware.fw_buf_ptr + fw_offset);
@@ -760,3 +1182,335 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
}
return 0;
}
+
+static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int block_type)
+{
+ if (block_type == MP0_HWIP) {
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(9, 0, 0):
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ return "vega10";
+ case CHIP_VEGA12:
+ return "vega12";
+ default:
+ return NULL;
+ }
+ case IP_VERSION(10, 0, 0):
+ case IP_VERSION(10, 0, 1):
+ if (adev->asic_type == CHIP_RAVEN) {
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ return "raven2";
+ else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+ return "picasso";
+ return "raven";
+ }
+ break;
+ case IP_VERSION(11, 0, 0):
+ return "navi10";
+ case IP_VERSION(11, 0, 2):
+ return "vega20";
+ case IP_VERSION(11, 0, 3):
+ return "renoir";
+ case IP_VERSION(11, 0, 4):
+ return "arcturus";
+ case IP_VERSION(11, 0, 5):
+ return "navi14";
+ case IP_VERSION(11, 0, 7):
+ return "sienna_cichlid";
+ case IP_VERSION(11, 0, 9):
+ return "navi12";
+ case IP_VERSION(11, 0, 11):
+ return "navy_flounder";
+ case IP_VERSION(11, 0, 12):
+ return "dimgrey_cavefish";
+ case IP_VERSION(11, 0, 13):
+ return "beige_goby";
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 2):
+ return "vangogh";
+ case IP_VERSION(12, 0, 1):
+ return "green_sardine";
+ case IP_VERSION(13, 0, 2):
+ return "aldebaran";
+ case IP_VERSION(13, 0, 1):
+ case IP_VERSION(13, 0, 3):
+ return "yellow_carp";
+ }
+ } else if (block_type == MP1_HWIP) {
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+ case IP_VERSION(9, 0, 0):
+ case IP_VERSION(10, 0, 0):
+ case IP_VERSION(10, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ if (adev->asic_type == CHIP_ARCTURUS)
+ return "arcturus_smc";
+ return NULL;
+ case IP_VERSION(11, 0, 0):
+ return "navi10_smc";
+ case IP_VERSION(11, 0, 5):
+ return "navi14_smc";
+ case IP_VERSION(11, 0, 9):
+ return "navi12_smc";
+ case IP_VERSION(11, 0, 7):
+ return "sienna_cichlid_smc";
+ case IP_VERSION(11, 0, 11):
+ return "navy_flounder_smc";
+ case IP_VERSION(11, 0, 12):
+ return "dimgrey_cavefish_smc";
+ case IP_VERSION(11, 0, 13):
+ return "beige_goby_smc";
+ case IP_VERSION(13, 0, 2):
+ return "aldebaran_smc";
+ }
+ } else if (block_type == SDMA0_HWIP) {
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(4, 0, 0):
+ return "vega10_sdma";
+ case IP_VERSION(4, 0, 1):
+ return "vega12_sdma";
+ case IP_VERSION(4, 1, 0):
+ case IP_VERSION(4, 1, 1):
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ return "raven2_sdma";
+ else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+ return "picasso_sdma";
+ return "raven_sdma";
+ case IP_VERSION(4, 1, 2):
+ if (adev->apu_flags & AMD_APU_IS_RENOIR)
+ return "renoir_sdma";
+ return "green_sardine_sdma";
+ case IP_VERSION(4, 2, 0):
+ return "vega20_sdma";
+ case IP_VERSION(4, 2, 2):
+ return "arcturus_sdma";
+ case IP_VERSION(4, 4, 0):
+ return "aldebaran_sdma";
+ case IP_VERSION(5, 0, 0):
+ return "navi10_sdma";
+ case IP_VERSION(5, 0, 1):
+ return "cyan_skillfish2_sdma";
+ case IP_VERSION(5, 0, 2):
+ return "navi14_sdma";
+ case IP_VERSION(5, 0, 5):
+ return "navi12_sdma";
+ case IP_VERSION(5, 2, 0):
+ return "sienna_cichlid_sdma";
+ case IP_VERSION(5, 2, 2):
+ return "navy_flounder_sdma";
+ case IP_VERSION(5, 2, 4):
+ return "dimgrey_cavefish_sdma";
+ case IP_VERSION(5, 2, 5):
+ return "beige_goby_sdma";
+ case IP_VERSION(5, 2, 3):
+ return "yellow_carp_sdma";
+ case IP_VERSION(5, 2, 1):
+ return "vangogh_sdma";
+ }
+ } else if (block_type == UVD_HWIP) {
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
+ case IP_VERSION(1, 0, 0):
+ case IP_VERSION(1, 0, 1):
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ return "raven2_vcn";
+ else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+ return "picasso_vcn";
+ return "raven_vcn";
+ case IP_VERSION(2, 5, 0):
+ return "arcturus_vcn";
+ case IP_VERSION(2, 2, 0):
+ if (adev->apu_flags & AMD_APU_IS_RENOIR)
+ return "renoir_vcn";
+ return "green_sardine_vcn";
+ case IP_VERSION(2, 6, 0):
+ return "aldebaran_vcn";
+ case IP_VERSION(2, 0, 0):
+ return "navi10_vcn";
+ case IP_VERSION(2, 0, 2):
+ if (adev->asic_type == CHIP_NAVI12)
+ return "navi12_vcn";
+ return "navi14_vcn";
+ case IP_VERSION(3, 0, 0):
+ case IP_VERSION(3, 0, 64):
+ case IP_VERSION(3, 0, 192):
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 0))
+ return "sienna_cichlid_vcn";
+ return "navy_flounder_vcn";
+ case IP_VERSION(3, 0, 2):
+ return "vangogh_vcn";
+ case IP_VERSION(3, 0, 16):
+ return "dimgrey_cavefish_vcn";
+ case IP_VERSION(3, 0, 33):
+ return "beige_goby_vcn";
+ case IP_VERSION(3, 1, 1):
+ return "yellow_carp_vcn";
+ }
+ } else if (block_type == GC_HWIP) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 0, 1):
+ return "vega10";
+ case IP_VERSION(9, 2, 1):
+ return "vega12";
+ case IP_VERSION(9, 4, 0):
+ return "vega20";
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ return "raven2";
+ else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+ return "picasso";
+ return "raven";
+ case IP_VERSION(9, 4, 1):
+ return "arcturus";
+ case IP_VERSION(9, 3, 0):
+ if (adev->apu_flags & AMD_APU_IS_RENOIR)
+ return "renoir";
+ return "green_sardine";
+ case IP_VERSION(9, 4, 2):
+ return "aldebaran";
+ case IP_VERSION(10, 1, 10):
+ return "navi10";
+ case IP_VERSION(10, 1, 1):
+ return "navi14";
+ case IP_VERSION(10, 1, 2):
+ return "navi12";
+ case IP_VERSION(10, 3, 0):
+ return "sienna_cichlid";
+ case IP_VERSION(10, 3, 2):
+ return "navy_flounder";
+ case IP_VERSION(10, 3, 1):
+ return "vangogh";
+ case IP_VERSION(10, 3, 4):
+ return "dimgrey_cavefish";
+ case IP_VERSION(10, 3, 5):
+ return "beige_goby";
+ case IP_VERSION(10, 3, 3):
+ return "yellow_carp";
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ return "cyan_skillfish2";
+ }
+ }
+ return NULL;
+}
+
+bool amdgpu_is_kicker_fw(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(kicker_device_list); i++) {
+ if (adev->pdev->device == kicker_device_list[i].device &&
+ adev->pdev->revision == kicker_device_list[i].revision)
+ return true;
+ }
+
+ return false;
+}
+
+void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len)
+{
+ int maj, min, rev;
+ char *ip_name;
+ const char *legacy;
+ uint32_t version = amdgpu_ip_version(adev, block_type, 0);
+
+ legacy = amdgpu_ucode_legacy_naming(adev, block_type);
+ if (legacy) {
+ snprintf(ucode_prefix, len, "%s", legacy);
+ return;
+ }
+
+ switch (block_type) {
+ case GC_HWIP:
+ ip_name = "gc";
+ break;
+ case SDMA0_HWIP:
+ ip_name = "sdma";
+ break;
+ case MP0_HWIP:
+ ip_name = "psp";
+ break;
+ case MP1_HWIP:
+ ip_name = "smu";
+ break;
+ case UVD_HWIP:
+ ip_name = "vcn";
+ break;
+ case VPE_HWIP:
+ ip_name = "vpe";
+ break;
+ case ISP_HWIP:
+ ip_name = "isp";
+ break;
+ default:
+ BUG();
+ }
+
+ maj = IP_VERSION_MAJ(version);
+ min = IP_VERSION_MIN(version);
+ rev = IP_VERSION_REV(version);
+
+ snprintf(ucode_prefix, len, "%s_%d_%d_%d", ip_name, maj, min, rev);
+}
+
+/*
+ * amdgpu_ucode_request - Fetch and validate amdgpu microcode
+ *
+ * @adev: amdgpu device
+ * @fw: pointer to load firmware to
+ * @required: whether the firmware is required
+ * @fmt: firmware name format string
+ * @...: variable arguments
+ *
+ * This is a helper that will use request_firmware and amdgpu_ucode_validate
+ * to load and run basic validation on firmware. If the load fails, remap
+ * the error code to -ENODEV, so that early_init functions will fail to load.
+ */
+int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw,
+ enum amdgpu_ucode_required required, const char *fmt, ...)
+{
+ char fname[AMDGPU_UCODE_NAME_MAX];
+ va_list ap;
+ int r;
+
+ va_start(ap, fmt);
+ r = vsnprintf(fname, sizeof(fname), fmt, ap);
+ va_end(ap);
+ if (r == sizeof(fname)) {
+ dev_warn(adev->dev, "amdgpu firmware name buffer overflow\n");
+ return -EOVERFLOW;
+ }
+
+ if (required == AMDGPU_UCODE_REQUIRED)
+ r = request_firmware(fw, fname, adev->dev);
+ else {
+ r = firmware_request_nowarn(fw, fname, adev->dev);
+ if (r)
+ drm_info(&adev->ddev, "Optional firmware \"%s\" was not found\n", fname);
+ }
+ if (r)
+ return -ENODEV;
+
+ r = amdgpu_ucode_validate(*fw);
+ if (r)
+ /*
+ * The amdgpu_ucode_request() should be paired with amdgpu_ucode_release()
+ * regardless of success/failure, and the amdgpu_ucode_release() takes care of
+ * firmware release and need to avoid redundant release FW operation here.
+ */
+ dev_dbg(adev->dev, "\"%s\" failed to validate\n", fname);
+
+ return r;
+}
+
+/*
+ * amdgpu_ucode_release - Release firmware microcode
+ *
+ * @fw: pointer to firmware to release
+ */
+void amdgpu_ucode_release(const struct firmware **fw)
+{
+ release_firmware(*fw);
+ *fw = NULL;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 7c2538db3cd5..6349aad6da35 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -25,6 +25,8 @@
#include "amdgpu_socbb.h"
+#define RS64_FW_UC_START_ADDR_LO 0x3000
+
struct common_firmware_header {
uint32_t size_bytes; /* size of the entire header+image(s) in bytes */
uint32_t header_size_bytes; /* size of just the header in bytes */
@@ -124,6 +126,10 @@ enum psp_fw_type {
PSP_FW_TYPE_PSP_SOC_DRV,
PSP_FW_TYPE_PSP_INTF_DRV,
PSP_FW_TYPE_PSP_DBG_DRV,
+ PSP_FW_TYPE_PSP_RAS_DRV,
+ PSP_FW_TYPE_PSP_IPKEYMGR_DRV,
+ PSP_FW_TYPE_PSP_SPDM_DRV,
+ PSP_FW_TYPE_MAX_INDEX,
};
/* version_major=2, version_minor=0 */
@@ -133,6 +139,14 @@ struct psp_firmware_header_v2_0 {
struct psp_fw_bin_desc psp_fw_bin[];
};
+/* version_major=2, version_minor=1 */
+struct psp_firmware_header_v2_1 {
+ struct common_firmware_header header;
+ uint32_t psp_fw_bin_count;
+ uint32_t psp_aux_fw_bin_index;
+ struct psp_fw_bin_desc psp_fw_bin[];
+};
+
/* version_major=1, version_minor=0 */
struct ta_firmware_header_v1_0 {
struct common_firmware_header header;
@@ -152,6 +166,7 @@ enum ta_fw_type {
TA_FW_TYPE_PSP_DTM,
TA_FW_TYPE_PSP_RAP,
TA_FW_TYPE_PSP_SECUREDISPLAY,
+ TA_FW_TYPE_PSP_XGMI_AUX,
TA_FW_TYPE_MAX_INDEX,
};
@@ -170,6 +185,18 @@ struct gfx_firmware_header_v1_0 {
uint32_t jt_size; /* size of jt */
};
+/* version_major=2, version_minor=0 */
+struct gfx_firmware_header_v2_0 {
+ struct common_firmware_header header;
+ uint32_t ucode_feature_version;
+ uint32_t ucode_size_bytes;
+ uint32_t ucode_offset_bytes;
+ uint32_t data_size_bytes;
+ uint32_t data_offset_bytes;
+ uint32_t ucode_start_addr_lo;
+ uint32_t ucode_start_addr_hi;
+};
+
/* version_major=1, version_minor=0 */
struct mes_firmware_header_v1_0 {
struct common_firmware_header header;
@@ -236,7 +263,7 @@ struct rlc_firmware_header_v2_1 {
uint32_t save_restore_list_srm_offset_bytes;
};
-/* version_major=2, version_minor=1 */
+/* version_major=2, version_minor=2 */
struct rlc_firmware_header_v2_2 {
struct rlc_firmware_header_v2_1 v2_1;
uint32_t rlc_iram_ucode_size_bytes;
@@ -245,6 +272,34 @@ struct rlc_firmware_header_v2_2 {
uint32_t rlc_dram_ucode_offset_bytes;
};
+/* version_major=2, version_minor=3 */
+struct rlc_firmware_header_v2_3 {
+ struct rlc_firmware_header_v2_2 v2_2;
+ uint32_t rlcp_ucode_version;
+ uint32_t rlcp_ucode_feature_version;
+ uint32_t rlcp_ucode_size_bytes;
+ uint32_t rlcp_ucode_offset_bytes;
+ uint32_t rlcv_ucode_version;
+ uint32_t rlcv_ucode_feature_version;
+ uint32_t rlcv_ucode_size_bytes;
+ uint32_t rlcv_ucode_offset_bytes;
+};
+
+/* version_major=2, version_minor=4 */
+struct rlc_firmware_header_v2_4 {
+ struct rlc_firmware_header_v2_3 v2_3;
+ uint32_t global_tap_delays_ucode_size_bytes;
+ uint32_t global_tap_delays_ucode_offset_bytes;
+ uint32_t se0_tap_delays_ucode_size_bytes;
+ uint32_t se0_tap_delays_ucode_offset_bytes;
+ uint32_t se1_tap_delays_ucode_size_bytes;
+ uint32_t se1_tap_delays_ucode_offset_bytes;
+ uint32_t se2_tap_delays_ucode_size_bytes;
+ uint32_t se2_tap_delays_ucode_offset_bytes;
+ uint32_t se3_tap_delays_ucode_size_bytes;
+ uint32_t se3_tap_delays_ucode_offset_bytes;
+};
+
/* version_major=1, version_minor=0 */
struct sdma_firmware_header_v1_0 {
struct common_firmware_header header;
@@ -260,6 +315,57 @@ struct sdma_firmware_header_v1_1 {
uint32_t digest_size;
};
+/* version_major=2, version_minor=0 */
+struct sdma_firmware_header_v2_0 {
+ struct common_firmware_header header;
+ uint32_t ucode_feature_version;
+ uint32_t ctx_ucode_size_bytes; /* context thread ucode size */
+ uint32_t ctx_jt_offset; /* context thread jt location */
+ uint32_t ctx_jt_size; /* context thread size of jt */
+ uint32_t ctl_ucode_offset;
+ uint32_t ctl_ucode_size_bytes; /* control thread ucode size */
+ uint32_t ctl_jt_offset; /* control thread jt location */
+ uint32_t ctl_jt_size; /* control thread size of jt */
+};
+
+/* version_major=1, version_minor=0 */
+struct vpe_firmware_header_v1_0 {
+ struct common_firmware_header header;
+ uint32_t ucode_feature_version;
+ uint32_t ctx_ucode_size_bytes; /* context thread ucode size */
+ uint32_t ctx_jt_offset; /* context thread jt location */
+ uint32_t ctx_jt_size; /* context thread size of jt */
+ uint32_t ctl_ucode_offset;
+ uint32_t ctl_ucode_size_bytes; /* control thread ucode size */
+ uint32_t ctl_jt_offset; /* control thread jt location */
+ uint32_t ctl_jt_size; /* control thread size of jt */
+};
+
+/* version_major=1, version_minor=0 */
+struct umsch_mm_firmware_header_v1_0 {
+ struct common_firmware_header header;
+ uint32_t umsch_mm_ucode_version;
+ uint32_t umsch_mm_ucode_size_bytes;
+ uint32_t umsch_mm_ucode_offset_bytes;
+ uint32_t umsch_mm_ucode_data_version;
+ uint32_t umsch_mm_ucode_data_size_bytes;
+ uint32_t umsch_mm_ucode_data_offset_bytes;
+ uint32_t umsch_mm_irq_start_addr_lo;
+ uint32_t umsch_mm_irq_start_addr_hi;
+ uint32_t umsch_mm_uc_start_addr_lo;
+ uint32_t umsch_mm_uc_start_addr_hi;
+ uint32_t umsch_mm_data_start_addr_lo;
+ uint32_t umsch_mm_data_start_addr_hi;
+};
+
+/* version_major=3, version_minor=0 */
+struct sdma_firmware_header_v3_0 {
+ struct common_firmware_header header;
+ uint32_t ucode_feature_version;
+ uint32_t ucode_offset_bytes;
+ uint32_t ucode_size_bytes;
+};
+
/* gpu info payload */
struct gpu_info_firmware_v1_0 {
uint32_t gc_num_se;
@@ -313,6 +419,15 @@ struct dmcub_firmware_header_v1_0 {
uint32_t bss_data_bytes; /* size of bss/data region, in bytes */
};
+/* version_major=1, version_minor=0 */
+struct imu_firmware_header_v1_0 {
+ struct common_firmware_header header;
+ uint32_t imu_iram_ucode_size_bytes;
+ uint32_t imu_iram_ucode_offset_bytes;
+ uint32_t imu_dram_ucode_size_bytes;
+ uint32_t imu_dram_ucode_offset_bytes;
+};
+
/* header is fixed size */
union amdgpu_firmware_header {
struct common_firmware_header common;
@@ -323,27 +438,36 @@ union amdgpu_firmware_header {
struct psp_firmware_header_v1_1 psp_v1_1;
struct psp_firmware_header_v1_3 psp_v1_3;
struct psp_firmware_header_v2_0 psp_v2_0;
+ struct psp_firmware_header_v2_0 psp_v2_1;
struct ta_firmware_header_v1_0 ta;
struct ta_firmware_header_v2_0 ta_v2_0;
struct gfx_firmware_header_v1_0 gfx;
+ struct gfx_firmware_header_v2_0 gfx_v2_0;
struct rlc_firmware_header_v1_0 rlc;
struct rlc_firmware_header_v2_0 rlc_v2_0;
struct rlc_firmware_header_v2_1 rlc_v2_1;
+ struct rlc_firmware_header_v2_2 rlc_v2_2;
+ struct rlc_firmware_header_v2_3 rlc_v2_3;
+ struct rlc_firmware_header_v2_4 rlc_v2_4;
struct sdma_firmware_header_v1_0 sdma;
struct sdma_firmware_header_v1_1 sdma_v1_1;
+ struct sdma_firmware_header_v2_0 sdma_v2_0;
+ struct sdma_firmware_header_v3_0 sdma_v3_0;
struct gpu_info_firmware_header_v1_0 gpu_info;
struct dmcu_firmware_header_v1_0 dmcu;
struct dmcub_firmware_header_v1_0 dmcub;
+ struct imu_firmware_header_v1_0 imu;
uint8_t raw[0x100];
};
-#define UCODE_MAX_PSP_PACKAGING ((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct psp_fw_bin_desc))
+#define UCODE_MAX_PSP_PACKAGING (((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct psp_fw_bin_desc)) * 2)
/*
* fw loading support
*/
enum AMDGPU_UCODE_ID {
- AMDGPU_UCODE_ID_SDMA0 = 0,
+ AMDGPU_UCODE_ID_CAP = 0,
+ AMDGPU_UCODE_ID_SDMA0,
AMDGPU_UCODE_ID_SDMA1,
AMDGPU_UCODE_ID_SDMA2,
AMDGPU_UCODE_ID_SDMA3,
@@ -351,23 +475,49 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_SDMA5,
AMDGPU_UCODE_ID_SDMA6,
AMDGPU_UCODE_ID_SDMA7,
+ AMDGPU_UCODE_ID_SDMA_UCODE_TH0,
+ AMDGPU_UCODE_ID_SDMA_UCODE_TH1,
+ AMDGPU_UCODE_ID_SDMA_RS64,
AMDGPU_UCODE_ID_CP_CE,
AMDGPU_UCODE_ID_CP_PFP,
AMDGPU_UCODE_ID_CP_ME,
+ AMDGPU_UCODE_ID_CP_RS64_PFP,
+ AMDGPU_UCODE_ID_CP_RS64_ME,
+ AMDGPU_UCODE_ID_CP_RS64_MEC,
+ AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK,
+ AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK,
+ AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK,
+ AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK,
+ AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK,
+ AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK,
+ AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK,
+ AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK,
AMDGPU_UCODE_ID_CP_MEC1,
AMDGPU_UCODE_ID_CP_MEC1_JT,
AMDGPU_UCODE_ID_CP_MEC2,
AMDGPU_UCODE_ID_CP_MEC2_JT,
AMDGPU_UCODE_ID_CP_MES,
AMDGPU_UCODE_ID_CP_MES_DATA,
+ AMDGPU_UCODE_ID_CP_MES1,
+ AMDGPU_UCODE_ID_CP_MES1_DATA,
+ AMDGPU_UCODE_ID_IMU_I,
+ AMDGPU_UCODE_ID_IMU_D,
+ AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS,
+ AMDGPU_UCODE_ID_SE0_TAP_DELAYS,
+ AMDGPU_UCODE_ID_SE1_TAP_DELAYS,
+ AMDGPU_UCODE_ID_SE2_TAP_DELAYS,
+ AMDGPU_UCODE_ID_SE3_TAP_DELAYS,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM,
AMDGPU_UCODE_ID_RLC_IRAM,
AMDGPU_UCODE_ID_RLC_DRAM,
+ AMDGPU_UCODE_ID_RLC_P,
+ AMDGPU_UCODE_ID_RLC_V,
AMDGPU_UCODE_ID_RLC_G,
AMDGPU_UCODE_ID_STORAGE,
AMDGPU_UCODE_ID_SMC,
+ AMDGPU_UCODE_ID_PPTABLE,
AMDGPU_UCODE_ID_UVD,
AMDGPU_UCODE_ID_UVD1,
AMDGPU_UCODE_ID_VCE,
@@ -378,6 +528,15 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_VCN0_RAM,
AMDGPU_UCODE_ID_VCN1_RAM,
AMDGPU_UCODE_ID_DMCUB,
+ AMDGPU_UCODE_ID_VPE_CTX,
+ AMDGPU_UCODE_ID_VPE_CTL,
+ AMDGPU_UCODE_ID_VPE,
+ AMDGPU_UCODE_ID_UMSCH_MM_UCODE,
+ AMDGPU_UCODE_ID_UMSCH_MM_DATA,
+ AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER,
+ AMDGPU_UCODE_ID_P2S_TABLE,
+ AMDGPU_UCODE_ID_JPEG_RAM,
+ AMDGPU_UCODE_ID_ISP,
AMDGPU_UCODE_ID_MAXIMUM,
};
@@ -390,11 +549,16 @@ enum AMDGPU_UCODE_STATUS {
enum amdgpu_firmware_load_type {
AMDGPU_FW_LOAD_DIRECT = 0,
- AMDGPU_FW_LOAD_SMU,
AMDGPU_FW_LOAD_PSP,
+ AMDGPU_FW_LOAD_SMU,
AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO,
};
+enum amdgpu_ucode_required {
+ AMDGPU_UCODE_OPTIONAL,
+ AMDGPU_UCODE_REQUIRED,
+};
+
/* conform to smu_ucode_xfer_cz.h */
#define AMDGPU_SDMA0_UCODE_LOADED 0x00000001
#define AMDGPU_SDMA1_UCODE_LOADED 0x00000002
@@ -438,16 +602,26 @@ struct amdgpu_firmware {
void *fw_buf_ptr;
uint64_t fw_buf_mc;
+ uint32_t pldm_version;
+};
+
+struct kicker_device{
+ unsigned short device;
+ u8 revision;
};
void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr);
+void amdgpu_ucode_print_imu_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr);
-int amdgpu_ucode_validate(const struct firmware *fw);
+__printf(4, 5)
+int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw,
+ enum amdgpu_ucode_required required, const char *fmt, ...);
+void amdgpu_ucode_release(const struct firmware **fw);
bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
uint16_t hdr_major, uint16_t hdr_minor);
@@ -462,4 +636,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type);
const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id);
+void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len);
+bool amdgpu_is_kicker_fw(struct amdgpu_device *adev);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index a90029ee9733..3f0b0e9af4f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -21,126 +21,363 @@
*
*/
-#include "amdgpu_ras.h"
+#include <linux/sort.h>
+#include "amdgpu.h"
+#include "umc_v6_7.h"
+#include "amdgpu_ras_mgr.h"
+#define MAX_UMC_POISON_POLLING_TIME_SYNC 20 //ms
-int amdgpu_umc_ras_late_init(struct amdgpu_device *adev)
+#define MAX_UMC_HASH_STRING_SIZE 256
+
+static int amdgpu_umc_convert_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t err_addr,
+ uint32_t ch_inst, uint32_t umc_inst)
{
- int r;
- struct ras_fs_if fs_info = {
- .sysfs_name = "umc_err_count",
- };
- struct ras_ih_if ih_info = {
- .cb = amdgpu_umc_process_ras_data_cb,
- };
+ switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
+ case IP_VERSION(6, 7, 0):
+ umc_v6_7_convert_error_address(adev,
+ err_data, err_addr, ch_inst, umc_inst);
+ break;
+ default:
+ dev_warn(adev->dev,
+ "UMC address to Physical address translation is not supported\n");
+ return AMDGPU_RAS_FAIL;
+ }
+
+ return AMDGPU_RAS_SUCCESS;
+}
+
+int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
+ uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst)
+{
+ struct ras_err_data err_data;
+ int ret;
+
+ ret = amdgpu_ras_error_data_init(&err_data);
+ if (ret)
+ return ret;
- if (!adev->umc.ras_if) {
- adev->umc.ras_if =
- kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
- if (!adev->umc.ras_if)
- return -ENOMEM;
- adev->umc.ras_if->block = AMDGPU_RAS_BLOCK__UMC;
- adev->umc.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
- adev->umc.ras_if->sub_block_index = 0;
+ err_data.err_addr =
+ kcalloc(adev->umc.max_ras_err_cnt_per_query,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+ if (!err_data.err_addr) {
+ dev_warn(adev->dev,
+ "Failed to alloc memory for umc error record in MCA notifier!\n");
+ ret = AMDGPU_RAS_FAIL;
+ goto out_fini_err_data;
}
- ih_info.head = fs_info.head = *adev->umc.ras_if;
- r = amdgpu_ras_late_init(adev, adev->umc.ras_if,
- &fs_info, &ih_info);
- if (r)
- goto free;
+ err_data.err_addr_len = adev->umc.max_ras_err_cnt_per_query;
- if (amdgpu_ras_is_supported(adev, adev->umc.ras_if->block)) {
- r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0);
- if (r)
- goto late_fini;
- } else {
- r = 0;
- goto free;
+ /*
+ * Translate UMC channel address to Physical address
+ */
+ ret = amdgpu_umc_convert_error_address(adev, &err_data, err_addr,
+ ch_inst, umc_inst);
+ if (ret)
+ goto out_free_err_addr;
+
+ if (amdgpu_bad_page_threshold != 0) {
+ amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
+ err_data.err_addr_cnt, false);
+ amdgpu_ras_save_bad_pages(adev, NULL);
}
- /* ras init of specific umc version */
- if (adev->umc.ras_funcs &&
- adev->umc.ras_funcs->err_cnt_init)
- adev->umc.ras_funcs->err_cnt_init(adev);
+out_free_err_addr:
+ kfree(err_data.err_addr);
- return 0;
+out_fini_err_data:
+ amdgpu_ras_error_data_fini(&err_data);
-late_fini:
- amdgpu_ras_late_fini(adev, adev->umc.ras_if, &ih_info);
-free:
- kfree(adev->umc.ras_if);
- adev->umc.ras_if = NULL;
- return r;
+ return ret;
}
-void amdgpu_umc_ras_fini(struct amdgpu_device *adev)
+void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
+ void *ras_error_status)
{
- if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) &&
- adev->umc.ras_if) {
- struct ras_common_if *ras_if = adev->umc.ras_if;
- struct ras_ih_if ih_info = {
- .head = *ras_if,
- .cb = amdgpu_umc_process_ras_data_cb,
- };
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct amdgpu_ras_eeprom_control *control = &con->eeprom_control;
+ unsigned int error_query_mode;
+ int ret = 0;
+ unsigned long err_count;
+
+ amdgpu_ras_get_error_query_mode(adev, &error_query_mode);
- amdgpu_ras_late_fini(adev, ras_if, &ih_info);
- kfree(ras_if);
+ err_data->err_addr =
+ kcalloc(adev->umc.max_ras_err_cnt_per_query,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+
+ /* still call query_ras_error_address to clear error status
+ * even NOMEM error is encountered
+ */
+ if (!err_data->err_addr)
+ dev_warn(adev->dev,
+ "Failed to alloc memory for umc error address record!\n");
+ else
+ err_data->err_addr_len = adev->umc.max_ras_err_cnt_per_query;
+
+ mutex_lock(&con->page_retirement_lock);
+ if (!amdgpu_ras_smu_eeprom_supported(adev)) {
+ ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc));
+ if (ret == -EOPNOTSUPP &&
+ error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) {
+ if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_count)
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev,
+ ras_error_status);
+
+ if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_address &&
+ adev->umc.max_ras_err_cnt_per_query) {
+ err_data->err_addr =
+ kcalloc(adev->umc.max_ras_err_cnt_per_query,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+
+ /* still call query_ras_error_address to clear error status
+ * even NOMEM error is encountered
+ */
+ if (!err_data->err_addr)
+ dev_warn(adev->dev,
+ "Failed to alloc memory for umc error address record!\n");
+ else
+ err_data->err_addr_len =
+ adev->umc.max_ras_err_cnt_per_query;
+
+ /* umc query_ras_error_address is also responsible for clearing
+ * error status
+ */
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev,
+ ras_error_status);
+ }
+ } else if (error_query_mode == AMDGPU_RAS_FIRMWARE_ERROR_QUERY ||
+ (!ret && error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY)) {
+ if (adev->umc.ras &&
+ adev->umc.ras->ecc_info_query_ras_error_count)
+ adev->umc.ras->ecc_info_query_ras_error_count(adev,
+ ras_error_status);
+
+ if (adev->umc.ras &&
+ adev->umc.ras->ecc_info_query_ras_error_address &&
+ adev->umc.max_ras_err_cnt_per_query) {
+ err_data->err_addr =
+ kcalloc(adev->umc.max_ras_err_cnt_per_query,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+
+ /* still call query_ras_error_address to clear error status
+ * even NOMEM error is encountered
+ */
+ if (!err_data->err_addr)
+ dev_warn(adev->dev,
+ "Failed to alloc memory for umc error address record!\n");
+ else
+ err_data->err_addr_len =
+ adev->umc.max_ras_err_cnt_per_query;
+
+ /* umc query_ras_error_address is also responsible for clearing
+ * error status
+ */
+ adev->umc.ras->ecc_info_query_ras_error_address(adev,
+ ras_error_status);
+ }
+ }
+ } else {
+ if (!amdgpu_ras_eeprom_update_record_num(control)) {
+ err_data->err_addr_cnt = err_data->de_count =
+ control->ras_num_recs - control->ras_num_recs_old;
+ amdgpu_ras_eeprom_read_idx(control, err_data->err_addr,
+ control->ras_num_recs_old, err_data->de_count);
+ }
}
+
+ /* only uncorrectable error needs gpu reset */
+ if (err_data->ue_count || err_data->de_count) {
+ err_count = err_data->ue_count + err_data->de_count;
+ if ((amdgpu_bad_page_threshold != 0) &&
+ err_data->err_addr_cnt) {
+ amdgpu_ras_add_bad_pages(adev, err_data->err_addr,
+ err_data->err_addr_cnt, amdgpu_ras_smu_eeprom_supported(adev));
+ amdgpu_ras_save_bad_pages(adev, &err_count);
+
+ amdgpu_dpm_send_hbm_bad_pages_num(adev,
+ con->eeprom_control.ras_num_bad_pages);
+
+ if (con->update_channel_flag == true) {
+ amdgpu_dpm_send_hbm_bad_channel_flag(adev, con->eeprom_control.bad_channel_bitmap);
+ con->update_channel_flag = false;
+ }
+ }
+ }
+
+ kfree(err_data->err_addr);
+ err_data->err_addr = NULL;
+
+ mutex_unlock(&con->page_retirement_lock);
}
-int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
+static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
void *ras_error_status,
- struct amdgpu_iv_entry *entry)
+ struct amdgpu_iv_entry *entry,
+ uint32_t reset)
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
- if (adev->umc.ras_funcs &&
- adev->umc.ras_funcs->query_ras_error_count)
- adev->umc.ras_funcs->query_ras_error_count(adev, ras_error_status);
-
- if (adev->umc.ras_funcs &&
- adev->umc.ras_funcs->query_ras_error_address &&
- adev->umc.max_ras_err_cnt_per_query) {
- err_data->err_addr =
- kcalloc(adev->umc.max_ras_err_cnt_per_query,
- sizeof(struct eeprom_table_record), GFP_KERNEL);
+ amdgpu_umc_handle_bad_pages(adev, ras_error_status);
+
+ if ((err_data->ue_count || err_data->de_count) &&
+ (reset || amdgpu_ras_is_rma(adev))) {
+ con->gpu_reset_flags |= reset;
+ amdgpu_ras_reset_gpu(adev);
+ }
- /* still call query_ras_error_address to clear error status
- * even NOMEM error is encountered
- */
- if(!err_data->err_addr)
- dev_warn(adev->dev, "Failed to alloc memory for "
- "umc error address record!\n");
+ return AMDGPU_RAS_SUCCESS;
+}
+
+int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint16_t pasid,
+ pasid_notify pasid_fn, void *data, uint32_t reset)
+{
+ int ret = AMDGPU_RAS_SUCCESS;
- /* umc query_ras_error_address is also responsible for clearing
- * error status
- */
- adev->umc.ras_funcs->query_ras_error_address(adev, ras_error_status);
+ if (adev->gmc.xgmi.connected_to_cpu ||
+ adev->gmc.is_app_apu) {
+ if (reset) {
+ /* MCA poison handler is only responsible for GPU reset,
+ * let MCA notifier do page retirement.
+ */
+ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+ amdgpu_ras_reset_gpu(adev);
+ }
+ return ret;
}
- /* only uncorrectable error needs gpu reset */
- if (err_data->ue_count) {
- dev_info(adev->dev, "%ld uncorrectable hardware errors "
- "detected in UMC block\n",
- err_data->ue_count);
+ if (!amdgpu_sriov_vf(adev)) {
+ if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0)) {
+ struct ras_err_data err_data;
+ struct ras_common_if head = {
+ .block = AMDGPU_RAS_BLOCK__UMC,
+ };
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);
- if ((amdgpu_bad_page_threshold != 0) &&
- err_data->err_addr_cnt) {
- amdgpu_ras_add_bad_pages(adev, err_data->err_addr,
- err_data->err_addr_cnt);
- amdgpu_ras_save_bad_pages(adev);
+ ret = amdgpu_ras_error_data_init(&err_data);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset);
+
+ if (ret == AMDGPU_RAS_SUCCESS && obj) {
+ obj->err_data.ue_count += err_data.ue_count;
+ obj->err_data.ce_count += err_data.ce_count;
+ obj->err_data.de_count += err_data.de_count;
+ }
+
+ amdgpu_ras_error_data_fini(&err_data);
+ } else if (amdgpu_uniras_enabled(adev)) {
+ struct ras_ih_info ih_info = {0};
- if (adev->smu.ppt_funcs && adev->smu.ppt_funcs->send_hbm_bad_pages_num)
- adev->smu.ppt_funcs->send_hbm_bad_pages_num(&adev->smu, con->eeprom_control.ras_num_recs);
+ ih_info.block = block;
+ ih_info.pasid = pasid;
+ ih_info.reset = reset;
+ ih_info.pasid_fn = pasid_fn;
+ ih_info.data = data;
+ amdgpu_ras_mgr_handle_consumer_interrupt(adev, &ih_info);
+ } else {
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ int ret;
+
+ ret = amdgpu_ras_put_poison_req(adev,
+ block, pasid, pasid_fn, data, reset);
+ if (!ret) {
+ atomic_inc(&con->page_retirement_req_cnt);
+ atomic_inc(&con->poison_consumption_count);
+ wake_up(&con->page_retirement_wq);
+ }
}
+ } else {
+ if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
+ adev->virt.ops->ras_poison_handler(adev, block);
+ else
+ dev_warn(adev->dev,
+ "No ras_poison_handler interface in SRIOV!\n");
+ }
- amdgpu_ras_reset_gpu(adev);
+ return ret;
+}
+
+int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint32_t reset)
+{
+ return amdgpu_umc_pasid_poison_handler(adev,
+ block, 0, NULL, NULL, reset);
+}
+
+int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
+ void *ras_error_status,
+ struct amdgpu_iv_entry *entry)
+{
+ return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry,
+ AMDGPU_RAS_GPU_RESET_MODE1_RESET);
+}
+
+int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err;
+ struct amdgpu_umc_ras *ras;
+
+ if (!adev->umc.ras)
+ return 0;
+
+ ras = adev->umc.ras;
+
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register umc ras block!\n");
+ return err;
}
- kfree(err_data->err_addr);
- return AMDGPU_RAS_SUCCESS;
+ strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->umc.ras_if = &ras->ras_block.ras_comm;
+
+ if (!ras->ras_block.ras_late_init)
+ ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init;
+
+ if (!ras->ras_block.ras_cb)
+ ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb;
+
+ return 0;
+}
+
+int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ if (amdgpu_sriov_vf(adev))
+ return r;
+
+ if (amdgpu_ras_is_supported(adev, ras_block->block)) {
+ r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0);
+ if (r)
+ goto late_fini;
+ }
+
+ /* ras init of specific umc version */
+ if (adev->umc.ras &&
+ adev->umc.ras->err_cnt_init)
+ adev->umc.ras->err_cnt_init(adev);
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+ return r;
}
int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
@@ -160,3 +397,231 @@ int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
amdgpu_ras_interrupt_dispatch(adev, &ih_data);
return 0;
}
+
+int amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
+ uint64_t err_addr,
+ uint64_t retired_page,
+ uint32_t channel_index,
+ uint32_t umc_inst)
+{
+ struct eeprom_table_record *err_rec;
+
+ if (!err_data ||
+ !err_data->err_addr ||
+ (err_data->err_addr_cnt >= err_data->err_addr_len))
+ return -EINVAL;
+
+ err_rec = &err_data->err_addr[err_data->err_addr_cnt];
+
+ err_rec->address = err_addr;
+ /* page frame address is saved */
+ err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
+ err_rec->ts = (uint64_t)ktime_get_real_seconds();
+ err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
+ err_rec->cu = 0;
+ err_rec->mem_channel = channel_index;
+ err_rec->mcumc_id = umc_inst;
+
+ err_data->err_addr_cnt++;
+
+ return 0;
+}
+
+static int amdgpu_umc_loop_all_aid(struct amdgpu_device *adev, umc_func func,
+ void *data)
+{
+ uint32_t umc_node_inst;
+ uint32_t node_inst;
+ uint32_t umc_inst;
+ uint32_t ch_inst;
+ int ret;
+
+ /*
+ * This loop is done based on the following -
+ * umc.active mask = mask of active umc instances across all nodes
+ * umc.umc_inst_num = maximum number of umc instancess per node
+ * umc.node_inst_num = maximum number of node instances
+ * Channel instances are not assumed to be harvested.
+ */
+ dev_dbg(adev->dev, "active umcs :%lx umc_inst per node: %d",
+ adev->umc.active_mask, adev->umc.umc_inst_num);
+ for_each_set_bit(umc_node_inst, &(adev->umc.active_mask),
+ adev->umc.node_inst_num * adev->umc.umc_inst_num) {
+ node_inst = umc_node_inst / adev->umc.umc_inst_num;
+ umc_inst = umc_node_inst % adev->umc.umc_inst_num;
+ LOOP_UMC_CH_INST(ch_inst) {
+ dev_dbg(adev->dev,
+ "node_inst :%d umc_inst: %d ch_inst: %d",
+ node_inst, umc_inst, ch_inst);
+ ret = func(adev, node_inst, umc_inst, ch_inst, data);
+ if (ret) {
+ dev_err(adev->dev,
+ "Node %d umc %d ch %d func returns %d\n",
+ node_inst, umc_inst, ch_inst, ret);
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int amdgpu_umc_loop_channels(struct amdgpu_device *adev,
+ umc_func func, void *data)
+{
+ uint32_t node_inst = 0;
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+ int ret = 0;
+
+ if (adev->aid_mask)
+ return amdgpu_umc_loop_all_aid(adev, func, data);
+
+ if (adev->umc.node_inst_num) {
+ LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) {
+ ret = func(adev, node_inst, umc_inst, ch_inst, data);
+ if (ret) {
+ dev_err(adev->dev, "Node %d umc %d ch %d func returns %d\n",
+ node_inst, umc_inst, ch_inst, ret);
+ return ret;
+ }
+ }
+ } else {
+ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+ ret = func(adev, 0, umc_inst, ch_inst, data);
+ if (ret) {
+ dev_err(adev->dev, "Umc %d ch %d func returns %d\n",
+ umc_inst, ch_inst, ret);
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev,
+ uint64_t status, uint64_t ipid, uint64_t addr)
+{
+ if (adev->umc.ras->update_ecc_status)
+ return adev->umc.ras->update_ecc_status(adev,
+ status, ipid, addr);
+ return 0;
+}
+
+int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev,
+ struct radix_tree_root *ecc_tree, struct ras_ecc_err *ecc_err)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_ecc_log_info *ecc_log;
+ int ret;
+
+ ecc_log = &con->umc_ecc_log;
+
+ mutex_lock(&ecc_log->lock);
+ ret = radix_tree_insert(ecc_tree, ecc_err->pa_pfn, ecc_err);
+ if (!ret)
+ radix_tree_tag_set(ecc_tree,
+ ecc_err->pa_pfn, UMC_ECC_NEW_DETECTED_TAG);
+ mutex_unlock(&ecc_log->lock);
+
+ return ret;
+}
+
+int amdgpu_umc_pages_in_a_row(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t pa_addr)
+{
+ struct ta_ras_query_address_output addr_out;
+
+ /* reinit err_data */
+ err_data->err_addr_cnt = 0;
+ err_data->err_addr_len = adev->umc.retire_unit;
+
+ addr_out.pa.pa = pa_addr;
+ if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
+ return adev->umc.ras->convert_ras_err_addr(adev, err_data, NULL,
+ &addr_out, false);
+ else
+ return -EINVAL;
+}
+
+int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
+ uint64_t pa_addr, uint64_t *pfns, int len)
+{
+ int i, ret;
+ struct ras_err_data err_data;
+
+ err_data.err_addr = kcalloc(adev->umc.retire_unit,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+ if (!err_data.err_addr) {
+ dev_warn(adev->dev, "Failed to alloc memory in bad page lookup!\n");
+ return 0;
+ }
+
+ ret = amdgpu_umc_pages_in_a_row(adev, &err_data, pa_addr);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < adev->umc.retire_unit; i++) {
+ if (i >= len)
+ goto out;
+
+ pfns[i] = err_data.err_addr[i].retired_page;
+ }
+ ret = i;
+ adev->umc.err_addr_cnt = err_data.err_addr_cnt;
+
+out:
+ kfree(err_data.err_addr);
+ return ret;
+}
+
+int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev,
+ uint64_t err_addr, uint32_t ch, uint32_t umc,
+ uint32_t node, uint32_t socket,
+ struct ta_ras_query_address_output *addr_out, bool dump_addr)
+{
+ struct ta_ras_query_address_input addr_in;
+ int ret;
+
+ memset(&addr_in, 0, sizeof(addr_in));
+ addr_in.ma.err_addr = err_addr;
+ addr_in.ma.ch_inst = ch;
+ addr_in.ma.umc_inst = umc;
+ addr_in.ma.node_inst = node;
+ addr_in.ma.socket_id = socket;
+
+ if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) {
+ ret = adev->umc.ras->convert_ras_err_addr(adev, NULL, &addr_in,
+ addr_out, dump_addr);
+ if (ret)
+ return ret;
+ } else {
+ return 0;
+ }
+
+ return 0;
+}
+
+int amdgpu_umc_pa2mca(struct amdgpu_device *adev,
+ uint64_t pa, uint64_t *mca, enum amdgpu_memory_partition nps)
+{
+ struct ta_ras_query_address_input addr_in;
+ struct ta_ras_query_address_output addr_out;
+ int ret;
+
+ /* nps: the pa belongs to */
+ addr_in.pa.pa = pa | ((uint64_t)nps << 58);
+ addr_in.addr_type = TA_RAS_PA_TO_MCA;
+ ret = psp_ras_query_address(&adev->psp, &addr_in, &addr_out);
+ if (ret) {
+ dev_warn(adev->dev, "Failed to query RAS MCA address for 0x%llx",
+ pa);
+
+ return ret;
+ }
+
+ *mca = addr_out.ma.err_addr;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index 1f5fe2315236..28dff750c47e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -20,7 +20,8 @@
*/
#ifndef __AMDGPU_UMC_H__
#define __AMDGPU_UMC_H__
-
+#include "amdgpu_ras.h"
+#include "amdgpu_mca.h"
/*
* (addr / 256) * 4096, the higher 26 bits in ErrorAddr
* is the index of 4KB block
@@ -31,6 +32,11 @@
* is the index of 8KB block
*/
#define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5)
+/*
+ * (addr / 256) * 32768, the higher 26 bits in ErrorAddr
+ * is the index of 8KB block
+ */
+#define ADDR_OF_32KB_BLOCK(addr) (((addr) & ~0xffULL) << 7)
/* channel index is the index of 256B block */
#define ADDR_OF_256B_BLOCK(channel_index) ((channel_index) << 8)
/* offset in 256B block */
@@ -40,15 +46,75 @@
#define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++)
#define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst))
-struct amdgpu_umc_ras_funcs {
+#define LOOP_UMC_NODE_INST(node_inst) \
+ for_each_set_bit((node_inst), &(adev->umc.active_mask), adev->umc.node_inst_num)
+
+#define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) \
+ LOOP_UMC_NODE_INST((node_inst)) LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst))
+
+/* Page retirement tag */
+#define UMC_ECC_NEW_DETECTED_TAG 0x1
+/*
+ * a flag to indicate v2 of channel index stored in eeprom
+ *
+ * v1 (legacy way): store channel index within a umc instance in eeprom
+ * range in UMC v12: 0 ~ 7
+ * v2: store global channel index in eeprom
+ * range in UMC v12: 0 ~ 127
+ *
+ * NOTE: it's better to store it in eeprom_table_record.mem_channel,
+ * but there is only 8 bits in mem_channel, and the channel number may
+ * increase in the future, we decide to save it in
+ * eeprom_table_record.retired_page. retired_page is useless in v2,
+ * we depend on eeprom_table_record.address instead of retired_page in v2.
+ * Only 48 bits are saved on eeprom, use bit 47 here.
+ */
+#define UMC_CHANNEL_IDX_V2 BIT_ULL(47)
+
+/*
+ * save nps value to eeprom_table_record.retired_page[47:40],
+ * the channel index flag above will be retired.
+ */
+#define UMC_NPS_SHIFT 40
+#define UMC_NPS_MASK 0xffULL
+
+/* three column bits and one row bit in MCA address flip
+ * in bad page retirement
+ */
+#define RETIRE_FLIP_BITS_NUM 4
+
+struct amdgpu_umc_flip_bits {
+ uint32_t flip_bits_in_pa[RETIRE_FLIP_BITS_NUM];
+ uint32_t flip_row_bit;
+ uint32_t r13_in_pa;
+ uint32_t bit_num;
+};
+
+typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst,
+ uint32_t umc_inst, uint32_t ch_inst, void *data);
+
+struct amdgpu_umc_ras {
+ struct amdgpu_ras_block_object ras_block;
void (*err_cnt_init)(struct amdgpu_device *adev);
- int (*ras_late_init)(struct amdgpu_device *adev);
- void (*ras_fini)(struct amdgpu_device *adev);
- void (*query_ras_error_count)(struct amdgpu_device *adev,
+ bool (*query_ras_poison_mode)(struct amdgpu_device *adev);
+ void (*ecc_info_query_ras_error_count)(struct amdgpu_device *adev,
void *ras_error_status);
- void (*query_ras_error_address)(struct amdgpu_device *adev,
+ void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev,
void *ras_error_status);
- bool (*query_ras_poison_mode)(struct amdgpu_device *adev);
+ bool (*check_ecc_err_status)(struct amdgpu_device *adev,
+ enum amdgpu_mca_error_type type, void *ras_error_status);
+ int (*update_ecc_status)(struct amdgpu_device *adev,
+ uint64_t status, uint64_t ipid, uint64_t addr);
+ int (*convert_ras_err_addr)(struct amdgpu_device *adev,
+ struct ras_err_data *err_data,
+ struct ta_ras_query_address_input *addr_in,
+ struct ta_ras_query_address_output *addr_out,
+ bool dump_addr);
+ uint32_t (*get_die_id_from_pa)(struct amdgpu_device *adev,
+ uint64_t mca_addr, uint64_t retired_page);
+ void (*get_retire_flip_bits)(struct amdgpu_device *adev);
+ void (*mca_ipid_parse)(struct amdgpu_device *adev, uint64_t ipid,
+ uint32_t *did, uint32_t *ch, uint32_t *umc_inst, uint32_t *sid);
};
struct amdgpu_umc_funcs {
@@ -62,22 +128,69 @@ struct amdgpu_umc {
uint32_t channel_inst_num;
/* number of umc instance with memory map register access */
uint32_t umc_inst_num;
+
+ /* Total number of umc node instance including harvest one */
+ uint32_t node_inst_num;
+
/* UMC regiser per channel offset */
uint32_t channel_offs;
+ /* how many pages are retired in one UE */
+ uint32_t retire_unit;
/* channel index table of interleaved memory */
const uint32_t *channel_idx_tbl;
struct ras_common_if *ras_if;
const struct amdgpu_umc_funcs *funcs;
- const struct amdgpu_umc_ras_funcs *ras_funcs;
+ struct amdgpu_umc_ras *ras;
+
+ /* active mask for umc node instance */
+ unsigned long active_mask;
+
+ struct amdgpu_umc_flip_bits flip_bits;
+
+ unsigned long err_addr_cnt;
};
-int amdgpu_umc_ras_late_init(struct amdgpu_device *adev);
-void amdgpu_umc_ras_fini(struct amdgpu_device *adev);
-int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
- void *ras_error_status,
- struct amdgpu_iv_entry *entry);
+int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
+int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint32_t reset);
+int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint16_t pasid,
+ pasid_notify pasid_fn, void *data, uint32_t reset);
int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry);
+int amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
+ uint64_t err_addr,
+ uint64_t retired_page,
+ uint32_t channel_index,
+ uint32_t umc_inst);
+
+int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
+ void *ras_error_status,
+ struct amdgpu_iv_entry *entry);
+int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
+ uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst);
+
+int amdgpu_umc_loop_channels(struct amdgpu_device *adev,
+ umc_func func, void *data);
+
+int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev,
+ uint64_t status, uint64_t ipid, uint64_t addr);
+int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev,
+ struct radix_tree_root *ecc_tree, struct ras_ecc_err *ecc_err);
+
+void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
+ void *ras_error_status);
+int amdgpu_umc_pages_in_a_row(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t pa_addr);
+int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
+ uint64_t pa_addr, uint64_t *pfns, int len);
+int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev,
+ uint64_t err_addr, uint32_t ch, uint32_t umc,
+ uint32_t node, uint32_t socket,
+ struct ta_ras_query_address_output *addr_out, bool dump_addr);
+int amdgpu_umc_pa2mca(struct amdgpu_device *adev,
+ uint64_t pa, uint64_t *mca, enum amdgpu_memory_partition nps);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h
index 919d9d401750..5b27fc41ffbf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h
@@ -35,17 +35,51 @@ struct amdgpu_debugfs_regs2_iocdata {
} srbm;
};
+struct amdgpu_debugfs_regs2_iocdata_v2 {
+ __u32 use_srbm, use_grbm, pg_lock;
+ struct {
+ __u32 se, sh, instance;
+ } grbm;
+ struct {
+ __u32 me, pipe, queue, vmid;
+ } srbm;
+ u32 xcc_id;
+};
+
+struct amdgpu_debugfs_gprwave_iocdata {
+ u32 gpr_or_wave, se, sh, cu, wave, simd, xcc_id;
+ struct {
+ u32 thread, vpgr_or_sgpr;
+ } gpr;
+};
+
/*
* MMIO debugfs state data (per file* handle)
*/
struct amdgpu_debugfs_regs2_data {
struct amdgpu_device *adev;
struct mutex lock;
- struct amdgpu_debugfs_regs2_iocdata id;
+ struct amdgpu_debugfs_regs2_iocdata_v2 id;
+};
+
+struct amdgpu_debugfs_gprwave_data {
+ struct amdgpu_device *adev;
+ struct mutex lock;
+ struct amdgpu_debugfs_gprwave_iocdata id;
};
enum AMDGPU_DEBUGFS_REGS2_CMDS {
- AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE=0,
+ AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE = 0,
+ AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE_V2,
+};
+
+enum AMDGPU_DEBUGFS_GPRWAVE_CMDS {
+ AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE = 0,
};
+//reg2 interface
#define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE, struct amdgpu_debugfs_regs2_iocdata)
+#define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE_V2 _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE_V2, struct amdgpu_debugfs_regs2_iocdata_v2)
+
+//gprwave interface
+#define AMDGPU_DEBUGFS_GPRWAVE_IOC_SET_STATE _IOWR(0x20, AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE, struct amdgpu_debugfs_gprwave_iocdata)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
new file mode 100644
index 000000000000..cd707d70a0bf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
@@ -0,0 +1,550 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <drm/drm_exec.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_umsch_mm.h"
+#include "umsch_mm_v4_0.h"
+
+MODULE_FIRMWARE("amdgpu/umsch_mm_4_0_0.bin");
+
+int amdgpu_umsch_mm_submit_pkt(struct amdgpu_umsch_mm *umsch, void *pkt, int ndws)
+{
+ struct amdgpu_ring *ring = &umsch->ring;
+
+ if (amdgpu_ring_alloc(ring, ndws))
+ return -ENOMEM;
+
+ amdgpu_ring_write_multiple(ring, pkt, ndws);
+ amdgpu_ring_commit(ring);
+
+ return 0;
+}
+
+int amdgpu_umsch_mm_query_fence(struct amdgpu_umsch_mm *umsch)
+{
+ struct amdgpu_ring *ring = &umsch->ring;
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, adev->usec_timeout);
+ if (r < 1) {
+ dev_err(adev->dev, "ring umsch timeout, emitted fence %u\n",
+ ring->fence_drv.sync_seq);
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static void umsch_mm_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_umsch_mm *umsch = (struct amdgpu_umsch_mm *)ring;
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ WDOORBELL32(ring->doorbell_index, ring->wptr << 2);
+ else
+ WREG32(umsch->rb_wptr, ring->wptr << 2);
+}
+
+static u64 umsch_mm_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_umsch_mm *umsch = (struct amdgpu_umsch_mm *)ring;
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32(umsch->rb_rptr);
+}
+
+static u64 umsch_mm_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_umsch_mm *umsch = (struct amdgpu_umsch_mm *)ring;
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32(umsch->rb_wptr);
+}
+
+static const struct amdgpu_ring_funcs umsch_v4_0_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_UMSCH_MM,
+ .align_mask = 0,
+ .nop = 0,
+ .support_64bit_ptrs = false,
+ .get_rptr = umsch_mm_ring_get_rptr,
+ .get_wptr = umsch_mm_ring_get_wptr,
+ .set_wptr = umsch_mm_ring_set_wptr,
+ .insert_nop = amdgpu_ring_insert_nop,
+};
+
+int amdgpu_umsch_mm_ring_init(struct amdgpu_umsch_mm *umsch)
+{
+ struct amdgpu_device *adev = container_of(umsch, struct amdgpu_device, umsch_mm);
+ struct amdgpu_ring *ring = &umsch->ring;
+
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+ ring->use_doorbell = true;
+ ring->no_scheduler = true;
+ ring->doorbell_index = (AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1) + 6;
+
+ snprintf(ring->name, sizeof(ring->name), "umsch");
+
+ return amdgpu_ring_init(adev, ring, 1024, NULL, 0, AMDGPU_RING_PRIO_DEFAULT, NULL);
+}
+
+int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch)
+{
+ const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr;
+ struct amdgpu_device *adev = umsch->ring.adev;
+ const char *fw_name = NULL;
+ int r;
+
+ switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
+ case IP_VERSION(4, 0, 5):
+ case IP_VERSION(4, 0, 6):
+ fw_name = "4_0_0";
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/umsch_mm_%s.bin", fw_name);
+ if (r) {
+ release_firmware(adev->umsch_mm.fw);
+ adev->umsch_mm.fw = NULL;
+ return r;
+ }
+
+ umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *)adev->umsch_mm.fw->data;
+
+ adev->umsch_mm.ucode_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes);
+ adev->umsch_mm.data_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes);
+
+ adev->umsch_mm.irq_start_addr =
+ le32_to_cpu(umsch_mm_hdr->umsch_mm_irq_start_addr_lo) |
+ ((uint64_t)(le32_to_cpu(umsch_mm_hdr->umsch_mm_irq_start_addr_hi)) << 32);
+ adev->umsch_mm.uc_start_addr =
+ le32_to_cpu(umsch_mm_hdr->umsch_mm_uc_start_addr_lo) |
+ ((uint64_t)(le32_to_cpu(umsch_mm_hdr->umsch_mm_uc_start_addr_hi)) << 32);
+ adev->umsch_mm.data_start_addr =
+ le32_to_cpu(umsch_mm_hdr->umsch_mm_data_start_addr_lo) |
+ ((uint64_t)(le32_to_cpu(umsch_mm_hdr->umsch_mm_data_start_addr_hi)) << 32);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ struct amdgpu_firmware_info *info;
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_UMSCH_MM_UCODE];
+ info->ucode_id = AMDGPU_UCODE_ID_UMSCH_MM_UCODE;
+ info->fw = adev->umsch_mm.fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes), PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_UMSCH_MM_DATA];
+ info->ucode_id = AMDGPU_UCODE_ID_UMSCH_MM_DATA;
+ info->fw = adev->umsch_mm.fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes), PAGE_SIZE);
+ }
+
+ return 0;
+}
+
+int amdgpu_umsch_mm_allocate_ucode_buffer(struct amdgpu_umsch_mm *umsch)
+{
+ const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr;
+ struct amdgpu_device *adev = umsch->ring.adev;
+ const __le32 *fw_data;
+ uint32_t fw_size;
+ int r;
+
+ umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *)
+ adev->umsch_mm.fw->data;
+
+ fw_data = (const __le32 *)(adev->umsch_mm.fw->data +
+ le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_offset_bytes));
+ fw_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_size,
+ 4 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->umsch_mm.ucode_fw_obj,
+ &adev->umsch_mm.ucode_fw_gpu_addr,
+ (void **)&adev->umsch_mm.ucode_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create umsch_mm fw ucode bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->umsch_mm.ucode_fw_ptr, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->umsch_mm.ucode_fw_obj);
+ amdgpu_bo_unreserve(adev->umsch_mm.ucode_fw_obj);
+ return 0;
+}
+
+int amdgpu_umsch_mm_allocate_ucode_data_buffer(struct amdgpu_umsch_mm *umsch)
+{
+ const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr;
+ struct amdgpu_device *adev = umsch->ring.adev;
+ const __le32 *fw_data;
+ uint32_t fw_size;
+ int r;
+
+ umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *)
+ adev->umsch_mm.fw->data;
+
+ fw_data = (const __le32 *)(adev->umsch_mm.fw->data +
+ le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_offset_bytes));
+ fw_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_size,
+ 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->umsch_mm.data_fw_obj,
+ &adev->umsch_mm.data_fw_gpu_addr,
+ (void **)&adev->umsch_mm.data_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create umsch_mm fw data bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->umsch_mm.data_fw_ptr, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->umsch_mm.data_fw_obj);
+ amdgpu_bo_unreserve(adev->umsch_mm.data_fw_obj);
+ return 0;
+}
+
+int amdgpu_umsch_mm_psp_execute_cmd_buf(struct amdgpu_umsch_mm *umsch)
+{
+ struct amdgpu_device *adev = umsch->ring.adev;
+ struct amdgpu_firmware_info ucode = {
+ .ucode_id = AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER,
+ .mc_addr = adev->umsch_mm.cmd_buf_gpu_addr,
+ .ucode_size = ((uintptr_t)adev->umsch_mm.cmd_buf_curr_ptr -
+ (uintptr_t)adev->umsch_mm.cmd_buf_ptr),
+ };
+
+ return psp_execute_ip_fw_load(&adev->psp, &ucode);
+}
+
+static void umsch_mm_agdb_index_init(struct amdgpu_device *adev)
+{
+ uint32_t umsch_mm_agdb_start;
+ int i;
+
+ umsch_mm_agdb_start = adev->doorbell_index.max_assignment + 1;
+ umsch_mm_agdb_start = roundup(umsch_mm_agdb_start, 1024);
+ umsch_mm_agdb_start += (AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1);
+
+ for (i = 0; i < CONTEXT_PRIORITY_NUM_LEVELS; i++)
+ adev->umsch_mm.agdb_index[i] = umsch_mm_agdb_start + i;
+}
+
+static int umsch_mm_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ adev->umsch_mm.vmid_mask_mm_vpe = 0xf00;
+ adev->umsch_mm.engine_mask = (1 << UMSCH_SWIP_ENGINE_TYPE_VPE);
+ adev->umsch_mm.vpe_hqd_mask = 0xfe;
+
+ r = amdgpu_device_wb_get(adev, &adev->umsch_mm.wb_index);
+ if (r) {
+ dev_err(adev->dev, "failed to alloc wb for umsch: %d\n", r);
+ return r;
+ }
+
+ adev->umsch_mm.sch_ctx_gpu_addr = adev->wb.gpu_addr +
+ (adev->umsch_mm.wb_index * 4);
+
+ r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->umsch_mm.cmd_buf_obj,
+ &adev->umsch_mm.cmd_buf_gpu_addr,
+ (void **)&adev->umsch_mm.cmd_buf_ptr);
+ if (r) {
+ dev_err(adev->dev, "failed to allocate cmdbuf bo %d\n", r);
+ amdgpu_device_wb_free(adev, adev->umsch_mm.wb_index);
+ return r;
+ }
+
+ r = amdgpu_bo_create_kernel(adev, AMDGPU_UMSCHFW_LOG_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->umsch_mm.dbglog_bo,
+ &adev->umsch_mm.log_gpu_addr,
+ &adev->umsch_mm.log_cpu_addr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate umsch debug bo\n", r);
+ return r;
+ }
+
+ mutex_init(&adev->umsch_mm.mutex_hidden);
+
+ umsch_mm_agdb_index_init(adev);
+
+ return 0;
+}
+
+
+static int umsch_mm_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
+ case IP_VERSION(4, 0, 5):
+ case IP_VERSION(4, 0, 6):
+ umsch_mm_v4_0_set_funcs(&adev->umsch_mm);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ adev->umsch_mm.ring.funcs = &umsch_v4_0_ring_funcs;
+ umsch_mm_set_regs(&adev->umsch_mm);
+
+ return 0;
+}
+
+static int umsch_mm_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_in_reset(adev) || adev->in_s0ix || adev->in_suspend)
+ return 0;
+
+ return 0;
+}
+
+static int umsch_mm_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = umsch_mm_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_umsch_fwlog_init(&adev->umsch_mm);
+ r = umsch_mm_ring_init(&adev->umsch_mm);
+ if (r)
+ return r;
+
+ r = umsch_mm_init_microcode(&adev->umsch_mm);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int umsch_mm_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ release_firmware(adev->umsch_mm.fw);
+ adev->umsch_mm.fw = NULL;
+
+ amdgpu_ring_fini(&adev->umsch_mm.ring);
+
+ mutex_destroy(&adev->umsch_mm.mutex_hidden);
+
+ amdgpu_bo_free_kernel(&adev->umsch_mm.cmd_buf_obj,
+ &adev->umsch_mm.cmd_buf_gpu_addr,
+ (void **)&adev->umsch_mm.cmd_buf_ptr);
+
+ amdgpu_bo_free_kernel(&adev->umsch_mm.dbglog_bo,
+ &adev->umsch_mm.log_gpu_addr,
+ (void **)&adev->umsch_mm.log_cpu_addr);
+
+ amdgpu_device_wb_free(adev, adev->umsch_mm.wb_index);
+
+ return 0;
+}
+
+static int umsch_mm_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = umsch_mm_load_microcode(&adev->umsch_mm);
+ if (r)
+ return r;
+
+ umsch_mm_ring_start(&adev->umsch_mm);
+
+ r = umsch_mm_set_hw_resources(&adev->umsch_mm);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int umsch_mm_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ umsch_mm_ring_stop(&adev->umsch_mm);
+
+ amdgpu_bo_free_kernel(&adev->umsch_mm.data_fw_obj,
+ &adev->umsch_mm.data_fw_gpu_addr,
+ (void **)&adev->umsch_mm.data_fw_ptr);
+
+ amdgpu_bo_free_kernel(&adev->umsch_mm.ucode_fw_obj,
+ &adev->umsch_mm.ucode_fw_gpu_addr,
+ (void **)&adev->umsch_mm.ucode_fw_ptr);
+ return 0;
+}
+
+static int umsch_mm_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return umsch_mm_hw_fini(ip_block);
+}
+
+static int umsch_mm_resume(struct amdgpu_ip_block *ip_block)
+{
+ return umsch_mm_hw_init(ip_block);
+}
+
+void amdgpu_umsch_fwlog_init(struct amdgpu_umsch_mm *umsch_mm)
+{
+#if defined(CONFIG_DEBUG_FS)
+ void *fw_log_cpu_addr = umsch_mm->log_cpu_addr;
+ volatile struct amdgpu_umsch_fwlog *log_buf = fw_log_cpu_addr;
+
+ log_buf->header_size = sizeof(struct amdgpu_umsch_fwlog);
+ log_buf->buffer_size = AMDGPU_UMSCHFW_LOG_SIZE;
+ log_buf->rptr = log_buf->header_size;
+ log_buf->wptr = log_buf->header_size;
+ log_buf->wrapped = 0;
+#endif
+}
+
+/*
+ * debugfs for mapping umsch firmware log buffer.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static ssize_t amdgpu_debugfs_umsch_fwlog_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_umsch_mm *umsch_mm;
+ void *log_buf;
+ volatile struct amdgpu_umsch_fwlog *plog;
+ unsigned int read_pos, write_pos, available, i, read_bytes = 0;
+ unsigned int read_num[2] = {0};
+
+ umsch_mm = file_inode(f)->i_private;
+ if (!umsch_mm)
+ return -ENODEV;
+
+ if (!umsch_mm->log_cpu_addr)
+ return -EFAULT;
+
+ log_buf = umsch_mm->log_cpu_addr;
+
+ plog = (volatile struct amdgpu_umsch_fwlog *)log_buf;
+ read_pos = plog->rptr;
+ write_pos = plog->wptr;
+
+ if (read_pos > AMDGPU_UMSCHFW_LOG_SIZE || write_pos > AMDGPU_UMSCHFW_LOG_SIZE)
+ return -EFAULT;
+
+ if (!size || (read_pos == write_pos))
+ return 0;
+
+ if (write_pos > read_pos) {
+ available = write_pos - read_pos;
+ read_num[0] = min_t(size_t, size, available);
+ } else {
+ read_num[0] = AMDGPU_UMSCHFW_LOG_SIZE - read_pos;
+ available = read_num[0] + write_pos - plog->header_size;
+ if (size > available)
+ read_num[1] = write_pos - plog->header_size;
+ else if (size > read_num[0])
+ read_num[1] = size - read_num[0];
+ else
+ read_num[0] = size;
+ }
+
+ for (i = 0; i < 2; i++) {
+ if (read_num[i]) {
+ if (read_pos == AMDGPU_UMSCHFW_LOG_SIZE)
+ read_pos = plog->header_size;
+ if (read_num[i] == copy_to_user((buf + read_bytes),
+ (log_buf + read_pos), read_num[i]))
+ return -EFAULT;
+
+ read_bytes += read_num[i];
+ read_pos += read_num[i];
+ }
+ }
+
+ plog->rptr = read_pos;
+ *pos += read_bytes;
+ return read_bytes;
+}
+
+static const struct file_operations amdgpu_debugfs_umschfwlog_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_umsch_fwlog_read,
+ .llseek = default_llseek
+};
+#endif
+
+void amdgpu_debugfs_umsch_fwlog_init(struct amdgpu_device *adev,
+ struct amdgpu_umsch_mm *umsch_mm)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ sprintf(name, "amdgpu_umsch_fwlog");
+ debugfs_create_file_size(name, S_IFREG | 0444, root, umsch_mm,
+ &amdgpu_debugfs_umschfwlog_fops,
+ AMDGPU_UMSCHFW_LOG_SIZE);
+#endif
+}
+
+static const struct amd_ip_funcs umsch_mm_v4_0_ip_funcs = {
+ .name = "umsch_mm_v4_0",
+ .early_init = umsch_mm_early_init,
+ .late_init = umsch_mm_late_init,
+ .sw_init = umsch_mm_sw_init,
+ .sw_fini = umsch_mm_sw_fini,
+ .hw_init = umsch_mm_hw_init,
+ .hw_fini = umsch_mm_hw_fini,
+ .suspend = umsch_mm_suspend,
+ .resume = umsch_mm_resume,
+};
+
+const struct amdgpu_ip_block_version umsch_mm_v4_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_UMSCH_MM,
+ .major = 4,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &umsch_mm_v4_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h
new file mode 100644
index 000000000000..2c771a753778
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_UMSCH_MM_H__
+#define __AMDGPU_UMSCH_MM_H__
+
+enum UMSCH_SWIP_ENGINE_TYPE {
+ UMSCH_SWIP_ENGINE_TYPE_VCN0 = 0,
+ UMSCH_SWIP_ENGINE_TYPE_VCN1 = 1,
+ UMSCH_SWIP_ENGINE_TYPE_VCN = 2,
+ UMSCH_SWIP_ENGINE_TYPE_VPE = 3,
+ UMSCH_SWIP_ENGINE_TYPE_MAX
+};
+
+enum UMSCH_CONTEXT_PRIORITY_LEVEL {
+ CONTEXT_PRIORITY_LEVEL_IDLE = 0,
+ CONTEXT_PRIORITY_LEVEL_NORMAL = 1,
+ CONTEXT_PRIORITY_LEVEL_FOCUS = 2,
+ CONTEXT_PRIORITY_LEVEL_REALTIME = 3,
+ CONTEXT_PRIORITY_NUM_LEVELS
+};
+
+struct umsch_mm_set_resource_input {
+ uint32_t vmid_mask_mm_vcn;
+ uint32_t vmid_mask_mm_vpe;
+ uint32_t collaboration_mask_vpe;
+ uint32_t logging_vmid;
+ uint32_t engine_mask;
+ union {
+ struct {
+ uint32_t disable_reset : 1;
+ uint32_t disable_umsch_mm_log : 1;
+ uint32_t use_rs64mem_for_proc_ctx_csa : 1;
+ uint32_t reserved : 29;
+ };
+ uint32_t uint32_all;
+ };
+};
+
+struct amdgpu_umsch_fwlog {
+ uint32_t rptr;
+ uint32_t wptr;
+ uint32_t buffer_size;
+ uint32_t header_size;
+ uint32_t wrapped;
+};
+
+struct umsch_mm_add_queue_input {
+ uint32_t process_id;
+ uint64_t page_table_base_addr;
+ uint64_t process_va_start;
+ uint64_t process_va_end;
+ uint64_t process_quantum;
+ uint64_t process_csa_addr;
+ uint64_t context_quantum;
+ uint64_t context_csa_addr;
+ uint32_t inprocess_context_priority;
+ enum UMSCH_CONTEXT_PRIORITY_LEVEL context_global_priority_level;
+ uint32_t doorbell_offset_0;
+ uint32_t doorbell_offset_1;
+ enum UMSCH_SWIP_ENGINE_TYPE engine_type;
+ uint32_t affinity;
+ uint64_t mqd_addr;
+ uint64_t h_context;
+ uint64_t h_queue;
+ uint32_t vm_context_cntl;
+
+ uint32_t process_csa_array_index;
+ uint32_t context_csa_array_index;
+
+ struct {
+ uint32_t is_context_suspended : 1;
+ uint32_t collaboration_mode : 1;
+ uint32_t reserved : 30;
+ };
+};
+
+struct umsch_mm_remove_queue_input {
+ uint32_t doorbell_offset_0;
+ uint32_t doorbell_offset_1;
+ uint64_t context_csa_addr;
+ uint32_t context_csa_array_index;
+};
+
+struct MQD_INFO {
+ uint32_t rb_base_hi;
+ uint32_t rb_base_lo;
+ uint32_t rb_size;
+ uint32_t wptr_val;
+ uint32_t rptr_val;
+ uint32_t unmapped;
+ uint32_t vmid;
+};
+
+struct amdgpu_umsch_mm;
+
+struct umsch_mm_funcs {
+ int (*set_hw_resources)(struct amdgpu_umsch_mm *umsch);
+ int (*add_queue)(struct amdgpu_umsch_mm *umsch,
+ struct umsch_mm_add_queue_input *input);
+ int (*remove_queue)(struct amdgpu_umsch_mm *umsch,
+ struct umsch_mm_remove_queue_input *input);
+ int (*set_regs)(struct amdgpu_umsch_mm *umsch);
+ int (*init_microcode)(struct amdgpu_umsch_mm *umsch);
+ int (*load_microcode)(struct amdgpu_umsch_mm *umsch);
+ int (*ring_init)(struct amdgpu_umsch_mm *umsch);
+ int (*ring_start)(struct amdgpu_umsch_mm *umsch);
+ int (*ring_stop)(struct amdgpu_umsch_mm *umsch);
+ int (*ring_fini)(struct amdgpu_umsch_mm *umsch);
+};
+
+struct amdgpu_umsch_mm {
+ struct amdgpu_ring ring;
+
+ uint32_t rb_wptr;
+ uint32_t rb_rptr;
+
+ const struct umsch_mm_funcs *funcs;
+
+ const struct firmware *fw;
+ uint32_t fw_version;
+ uint32_t feature_version;
+
+ struct amdgpu_bo *ucode_fw_obj;
+ uint64_t ucode_fw_gpu_addr;
+ uint32_t *ucode_fw_ptr;
+ uint64_t irq_start_addr;
+ uint64_t uc_start_addr;
+ uint32_t ucode_size;
+
+ struct amdgpu_bo *data_fw_obj;
+ uint64_t data_fw_gpu_addr;
+ uint32_t *data_fw_ptr;
+ uint64_t data_start_addr;
+ uint32_t data_size;
+
+ struct amdgpu_bo *cmd_buf_obj;
+ uint64_t cmd_buf_gpu_addr;
+ uint32_t *cmd_buf_ptr;
+ uint32_t *cmd_buf_curr_ptr;
+
+ uint32_t wb_index;
+ uint64_t sch_ctx_gpu_addr;
+ uint32_t *sch_ctx_cpu_addr;
+
+ uint32_t vmid_mask_mm_vcn;
+ uint32_t vmid_mask_mm_vpe;
+ uint32_t engine_mask;
+ uint32_t vcn0_hqd_mask;
+ uint32_t vcn1_hqd_mask;
+ uint32_t vcn_hqd_mask[2];
+ uint32_t vpe_hqd_mask;
+ uint32_t agdb_index[CONTEXT_PRIORITY_NUM_LEVELS];
+
+ struct mutex mutex_hidden;
+ struct amdgpu_bo *dbglog_bo;
+ void *log_cpu_addr;
+ uint64_t log_gpu_addr;
+ uint32_t mem_size;
+ uint32_t log_offset;
+};
+
+int amdgpu_umsch_mm_submit_pkt(struct amdgpu_umsch_mm *umsch, void *pkt, int ndws);
+int amdgpu_umsch_mm_query_fence(struct amdgpu_umsch_mm *umsch);
+
+int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch);
+int amdgpu_umsch_mm_allocate_ucode_buffer(struct amdgpu_umsch_mm *umsch);
+int amdgpu_umsch_mm_allocate_ucode_data_buffer(struct amdgpu_umsch_mm *umsch);
+
+int amdgpu_umsch_mm_psp_execute_cmd_buf(struct amdgpu_umsch_mm *umsch);
+
+int amdgpu_umsch_mm_ring_init(struct amdgpu_umsch_mm *umsch);
+
+void amdgpu_debugfs_umsch_fwlog_init(struct amdgpu_device *adev,
+ struct amdgpu_umsch_mm *umsch);
+
+void amdgpu_umsch_fwlog_init(struct amdgpu_umsch_mm *umsch_mm);
+
+#define WREG32_SOC15_UMSCH(reg, value) \
+ do { \
+ uint32_t reg_offset = adev->reg_offset[VCN_HWIP][0][reg##_BASE_IDX] + reg; \
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { \
+ *adev->umsch_mm.cmd_buf_curr_ptr++ = (reg_offset << 2); \
+ *adev->umsch_mm.cmd_buf_curr_ptr++ = value; \
+ } else { \
+ WREG32(reg_offset, value); \
+ } \
+ } while (0)
+
+#define umsch_mm_set_hw_resources(umsch) \
+ ((umsch)->funcs->set_hw_resources ? (umsch)->funcs->set_hw_resources((umsch)) : 0)
+#define umsch_mm_add_queue(umsch, input) \
+ ((umsch)->funcs->add_queue ? (umsch)->funcs->add_queue((umsch), (input)) : 0)
+#define umsch_mm_remove_queue(umsch, input) \
+ ((umsch)->funcs->remove_queue ? (umsch)->funcs->remove_queue((umsch), (input)) : 0)
+
+#define umsch_mm_set_regs(umsch) \
+ ((umsch)->funcs->set_regs ? (umsch)->funcs->set_regs((umsch)) : 0)
+#define umsch_mm_init_microcode(umsch) \
+ ((umsch)->funcs->init_microcode ? (umsch)->funcs->init_microcode((umsch)) : 0)
+#define umsch_mm_load_microcode(umsch) \
+ ((umsch)->funcs->load_microcode ? (umsch)->funcs->load_microcode((umsch)) : 0)
+
+#define umsch_mm_ring_init(umsch) \
+ ((umsch)->funcs->ring_init ? (umsch)->funcs->ring_init((umsch)) : 0)
+#define umsch_mm_ring_start(umsch) \
+ ((umsch)->funcs->ring_start ? (umsch)->funcs->ring_start((umsch)) : 0)
+#define umsch_mm_ring_stop(umsch) \
+ ((umsch)->funcs->ring_stop ? (umsch)->funcs->ring_stop((umsch)) : 0)
+#define umsch_mm_ring_fini(umsch) \
+ ((umsch)->funcs->ring_fini ? (umsch)->funcs->ring_fini((umsch)) : 0)
+
+static inline void amdgpu_umsch_mm_lock(struct amdgpu_umsch_mm *umsch)
+{
+ mutex_lock(&umsch->mutex_hidden);
+}
+
+static inline void amdgpu_umsch_mm_unlock(struct amdgpu_umsch_mm *umsch)
+{
+ mutex_unlock(&umsch->mutex_hidden);
+}
+
+extern const struct amdgpu_ip_block_version umsch_mm_v4_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
new file mode 100644
index 000000000000..9a969175900e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -0,0 +1,1482 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <drm/drm_auth.h>
+#include <drm/drm_exec.h>
+#include <linux/pm_runtime.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_userq.h"
+#include "amdgpu_hmm.h"
+#include "amdgpu_userq_fence.h"
+
+u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev)
+{
+ int i;
+ u32 userq_ip_mask = 0;
+
+ for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
+ if (adev->userq_funcs[i])
+ userq_ip_mask |= (1 << i);
+ }
+
+ return userq_ip_mask;
+}
+
+static bool amdgpu_userq_is_reset_type_supported(struct amdgpu_device *adev,
+ enum amdgpu_ring_type ring_type, int reset_type)
+{
+
+ if (ring_type < 0 || ring_type >= AMDGPU_RING_TYPE_MAX)
+ return false;
+
+ switch (ring_type) {
+ case AMDGPU_RING_TYPE_GFX:
+ if (adev->gfx.gfx_supported_reset & reset_type)
+ return true;
+ break;
+ case AMDGPU_RING_TYPE_COMPUTE:
+ if (adev->gfx.compute_supported_reset & reset_type)
+ return true;
+ break;
+ case AMDGPU_RING_TYPE_SDMA:
+ if (adev->sdma.supported_reset & reset_type)
+ return true;
+ break;
+ case AMDGPU_RING_TYPE_VCN_DEC:
+ case AMDGPU_RING_TYPE_VCN_ENC:
+ if (adev->vcn.supported_reset & reset_type)
+ return true;
+ break;
+ case AMDGPU_RING_TYPE_VCN_JPEG:
+ if (adev->jpeg.supported_reset & reset_type)
+ return true;
+ break;
+ default:
+ break;
+ }
+ return false;
+}
+
+static void amdgpu_userq_gpu_reset(struct amdgpu_device *adev)
+{
+ if (amdgpu_device_should_recover_gpu(adev)) {
+ amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->userq_reset_work);
+ /* Wait for the reset job to complete */
+ flush_work(&adev->userq_reset_work);
+ }
+}
+
+static int
+amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const int queue_types[] = {
+ AMDGPU_RING_TYPE_COMPUTE,
+ AMDGPU_RING_TYPE_GFX,
+ AMDGPU_RING_TYPE_SDMA
+ };
+ const int num_queue_types = ARRAY_SIZE(queue_types);
+ bool gpu_reset = false;
+ int r = 0;
+ int i;
+
+ /* Warning if current process mutex is not held */
+ WARN_ON(!mutex_is_locked(&uq_mgr->userq_mutex));
+
+ if (unlikely(adev->debug_disable_gpu_ring_reset)) {
+ dev_err(adev->dev, "userq reset disabled by debug mask\n");
+ return 0;
+ }
+
+ /*
+ * If GPU recovery feature is disabled system-wide,
+ * skip all reset detection logic
+ */
+ if (!amdgpu_gpu_recovery)
+ return 0;
+
+ /*
+ * Iterate through all queue types to detect and reset problematic queues
+ * Process each queue type in the defined order
+ */
+ for (i = 0; i < num_queue_types; i++) {
+ int ring_type = queue_types[i];
+ const struct amdgpu_userq_funcs *funcs = adev->userq_funcs[ring_type];
+
+ if (!amdgpu_userq_is_reset_type_supported(adev, ring_type, AMDGPU_RESET_TYPE_PER_QUEUE))
+ continue;
+
+ if (atomic_read(&uq_mgr->userq_count[ring_type]) > 0 &&
+ funcs && funcs->detect_and_reset) {
+ r = funcs->detect_and_reset(adev, ring_type);
+ if (r) {
+ gpu_reset = true;
+ break;
+ }
+ }
+ }
+
+ if (gpu_reset)
+ amdgpu_userq_gpu_reset(adev);
+
+ return r;
+}
+
+static int amdgpu_userq_buffer_va_list_add(struct amdgpu_usermode_queue *queue,
+ struct amdgpu_bo_va_mapping *va_map, u64 addr)
+{
+ struct amdgpu_userq_va_cursor *va_cursor;
+ struct userq_va_list;
+
+ va_cursor = kzalloc(sizeof(*va_cursor), GFP_KERNEL);
+ if (!va_cursor)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&va_cursor->list);
+ va_cursor->gpu_addr = addr;
+ atomic_set(&va_map->bo_va->userq_va_mapped, 1);
+ list_add(&va_cursor->list, &queue->userq_va_list);
+
+ return 0;
+}
+
+int amdgpu_userq_input_va_validate(struct amdgpu_usermode_queue *queue,
+ u64 addr, u64 expected_size)
+{
+ struct amdgpu_bo_va_mapping *va_map;
+ struct amdgpu_vm *vm = queue->vm;
+ u64 user_addr;
+ u64 size;
+ int r = 0;
+
+ user_addr = (addr & AMDGPU_GMC_HOLE_MASK) >> AMDGPU_GPU_PAGE_SHIFT;
+ size = expected_size >> AMDGPU_GPU_PAGE_SHIFT;
+
+ r = amdgpu_bo_reserve(vm->root.bo, false);
+ if (r)
+ return r;
+
+ va_map = amdgpu_vm_bo_lookup_mapping(vm, user_addr);
+ if (!va_map) {
+ r = -EINVAL;
+ goto out_err;
+ }
+ /* Only validate the userq whether resident in the VM mapping range */
+ if (user_addr >= va_map->start &&
+ va_map->last - user_addr + 1 >= size) {
+ amdgpu_userq_buffer_va_list_add(queue, va_map, user_addr);
+ amdgpu_bo_unreserve(vm->root.bo);
+ return 0;
+ }
+
+ r = -EINVAL;
+out_err:
+ amdgpu_bo_unreserve(vm->root.bo);
+ return r;
+}
+
+static bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 addr)
+{
+ struct amdgpu_bo_va_mapping *mapping;
+ bool r;
+
+ if (amdgpu_bo_reserve(vm->root.bo, false))
+ return false;
+
+ mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);
+ if (!IS_ERR_OR_NULL(mapping) && atomic_read(&mapping->bo_va->userq_va_mapped))
+ r = true;
+ else
+ r = false;
+ amdgpu_bo_unreserve(vm->root.bo);
+
+ return r;
+}
+
+static bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_userq_va_cursor *va_cursor, *tmp;
+ int r = 0;
+
+ list_for_each_entry_safe(va_cursor, tmp, &queue->userq_va_list, list) {
+ r += amdgpu_userq_buffer_va_mapped(queue->vm, va_cursor->gpu_addr);
+ dev_dbg(queue->userq_mgr->adev->dev,
+ "validate the userq mapping:%p va:%llx r:%d\n",
+ queue, va_cursor->gpu_addr, r);
+ }
+
+ if (r != 0)
+ return true;
+
+ return false;
+}
+
+static void amdgpu_userq_buffer_va_list_del(struct amdgpu_bo_va_mapping *mapping,
+ struct amdgpu_userq_va_cursor *va_cursor)
+{
+ atomic_set(&mapping->bo_va->userq_va_mapped, 0);
+ list_del(&va_cursor->list);
+ kfree(va_cursor);
+}
+
+static int amdgpu_userq_buffer_vas_list_cleanup(struct amdgpu_device *adev,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_userq_va_cursor *va_cursor, *tmp;
+ struct amdgpu_bo_va_mapping *mapping;
+ int r;
+
+ r = amdgpu_bo_reserve(queue->vm->root.bo, false);
+ if (r)
+ return r;
+
+ list_for_each_entry_safe(va_cursor, tmp, &queue->userq_va_list, list) {
+ mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, va_cursor->gpu_addr);
+ if (!mapping) {
+ r = -EINVAL;
+ goto err;
+ }
+ dev_dbg(adev->dev, "delete the userq:%p va:%llx\n",
+ queue, va_cursor->gpu_addr);
+ amdgpu_userq_buffer_va_list_del(mapping, va_cursor);
+ }
+err:
+ amdgpu_bo_unreserve(queue->vm->root.bo);
+ return r;
+}
+
+static int
+amdgpu_userq_preempt_helper(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *userq_funcs =
+ adev->userq_funcs[queue->queue_type];
+ bool found_hung_queue = false;
+ int r = 0;
+
+ if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
+ r = userq_funcs->preempt(uq_mgr, queue);
+ if (r) {
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ found_hung_queue = true;
+ } else {
+ queue->state = AMDGPU_USERQ_STATE_PREEMPTED;
+ }
+ }
+
+ if (found_hung_queue)
+ amdgpu_userq_detect_and_reset_queues(uq_mgr);
+
+ return r;
+}
+
+static int
+amdgpu_userq_restore_helper(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *userq_funcs =
+ adev->userq_funcs[queue->queue_type];
+ int r = 0;
+
+ if (queue->state == AMDGPU_USERQ_STATE_PREEMPTED) {
+ r = userq_funcs->restore(uq_mgr, queue);
+ if (r) {
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ } else {
+ queue->state = AMDGPU_USERQ_STATE_MAPPED;
+ }
+ }
+
+ return r;
+}
+
+static int
+amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *userq_funcs =
+ adev->userq_funcs[queue->queue_type];
+ bool found_hung_queue = false;
+ int r = 0;
+
+ if ((queue->state == AMDGPU_USERQ_STATE_MAPPED) ||
+ (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) {
+ r = userq_funcs->unmap(uq_mgr, queue);
+ if (r) {
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ found_hung_queue = true;
+ } else {
+ queue->state = AMDGPU_USERQ_STATE_UNMAPPED;
+ }
+ }
+
+ if (found_hung_queue)
+ amdgpu_userq_detect_and_reset_queues(uq_mgr);
+
+ return r;
+}
+
+static int
+amdgpu_userq_map_helper(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *userq_funcs =
+ adev->userq_funcs[queue->queue_type];
+ int r = 0;
+
+ if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) {
+ r = userq_funcs->map(uq_mgr, queue);
+ if (r) {
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ amdgpu_userq_detect_and_reset_queues(uq_mgr);
+ } else {
+ queue->state = AMDGPU_USERQ_STATE_MAPPED;
+ }
+ }
+
+ return r;
+}
+
+static int
+amdgpu_userq_wait_for_last_fence(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct dma_fence *f = queue->last_fence;
+ int ret = 0;
+
+ if (f && !dma_fence_is_signaled(f)) {
+ ret = dma_fence_wait_timeout(f, true, MAX_SCHEDULE_TIMEOUT);
+ if (ret <= 0) {
+ drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n",
+ f->context, f->seqno);
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ return -ETIME;
+ }
+ }
+
+ return ret;
+}
+
+static void
+amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue,
+ int queue_id)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type];
+
+ /* Wait for mode-1 reset to complete */
+ down_read(&adev->reset_domain->sem);
+
+ /* Drop the userq reference. */
+ amdgpu_userq_buffer_vas_list_cleanup(adev, queue);
+ uq_funcs->mqd_destroy(uq_mgr, queue);
+ amdgpu_userq_fence_driver_free(queue);
+ /* Use interrupt-safe locking since IRQ handlers may access these XArrays */
+ xa_erase_irq(&uq_mgr->userq_mgr_xa, (unsigned long)queue_id);
+ xa_erase_irq(&adev->userq_doorbell_xa, queue->doorbell_index);
+ queue->userq_mgr = NULL;
+ list_del(&queue->userq_va_list);
+ kfree(queue);
+
+ up_read(&adev->reset_domain->sem);
+}
+
+static struct amdgpu_usermode_queue *
+amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid)
+{
+ return xa_load(&uq_mgr->userq_mgr_xa, qid);
+}
+
+void
+amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+ struct amdgpu_eviction_fence *ev_fence;
+
+retry:
+ /* Flush any pending resume work to create ev_fence */
+ flush_delayed_work(&uq_mgr->resume_work);
+
+ mutex_lock(&uq_mgr->userq_mutex);
+ spin_lock(&evf_mgr->ev_fence_lock);
+ ev_fence = evf_mgr->ev_fence;
+ spin_unlock(&evf_mgr->ev_fence_lock);
+ if (!ev_fence || dma_fence_is_signaled(&ev_fence->base)) {
+ mutex_unlock(&uq_mgr->userq_mutex);
+ /*
+ * Looks like there was no pending resume work,
+ * add one now to create a valid eviction fence
+ */
+ schedule_delayed_work(&uq_mgr->resume_work, 0);
+ goto retry;
+ }
+}
+
+int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_userq_obj *userq_obj,
+ int size)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_bo_param bp;
+ int r;
+
+ memset(&bp, 0, sizeof(bp));
+ bp.byte_align = PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+ bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ bp.type = ttm_bo_type_kernel;
+ bp.size = size;
+ bp.resv = NULL;
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+
+ r = amdgpu_bo_create(adev, &bp, &userq_obj->obj);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to allocate BO for userqueue (%d)", r);
+ return r;
+ }
+
+ r = amdgpu_bo_reserve(userq_obj->obj, true);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to reserve BO to map (%d)", r);
+ goto free_obj;
+ }
+
+ r = amdgpu_ttm_alloc_gart(&(userq_obj->obj)->tbo);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to alloc GART for userqueue object (%d)", r);
+ goto unresv;
+ }
+
+ r = amdgpu_bo_kmap(userq_obj->obj, &userq_obj->cpu_ptr);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to map BO for userqueue (%d)", r);
+ goto unresv;
+ }
+
+ userq_obj->gpu_addr = amdgpu_bo_gpu_offset(userq_obj->obj);
+ amdgpu_bo_unreserve(userq_obj->obj);
+ memset(userq_obj->cpu_ptr, 0, size);
+ return 0;
+
+unresv:
+ amdgpu_bo_unreserve(userq_obj->obj);
+
+free_obj:
+ amdgpu_bo_unref(&userq_obj->obj);
+ return r;
+}
+
+void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_userq_obj *userq_obj)
+{
+ amdgpu_bo_kunmap(userq_obj->obj);
+ amdgpu_bo_unref(&userq_obj->obj);
+}
+
+uint64_t
+amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_db_info *db_info,
+ struct drm_file *filp)
+{
+ uint64_t index;
+ struct drm_gem_object *gobj;
+ struct amdgpu_userq_obj *db_obj = db_info->db_obj;
+ int r, db_size;
+
+ gobj = drm_gem_object_lookup(filp, db_info->doorbell_handle);
+ if (gobj == NULL) {
+ drm_file_err(uq_mgr->file, "Can't find GEM object for doorbell\n");
+ return -EINVAL;
+ }
+
+ db_obj->obj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+ drm_gem_object_put(gobj);
+
+ r = amdgpu_bo_reserve(db_obj->obj, true);
+ if (r) {
+ drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n");
+ goto unref_bo;
+ }
+
+ /* Pin the BO before generating the index, unpin in queue destroy */
+ r = amdgpu_bo_pin(db_obj->obj, AMDGPU_GEM_DOMAIN_DOORBELL);
+ if (r) {
+ drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n");
+ goto unresv_bo;
+ }
+
+ switch (db_info->queue_type) {
+ case AMDGPU_HW_IP_GFX:
+ case AMDGPU_HW_IP_COMPUTE:
+ case AMDGPU_HW_IP_DMA:
+ db_size = sizeof(u64);
+ break;
+ default:
+ drm_file_err(uq_mgr->file, "[Usermode queues] IP %d not support\n",
+ db_info->queue_type);
+ r = -EINVAL;
+ goto unpin_bo;
+ }
+
+ index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj,
+ db_info->doorbell_offset, db_size);
+ drm_dbg_driver(adev_to_drm(uq_mgr->adev),
+ "[Usermode queues] doorbell index=%lld\n", index);
+ amdgpu_bo_unreserve(db_obj->obj);
+ return index;
+
+unpin_bo:
+ amdgpu_bo_unpin(db_obj->obj);
+unresv_bo:
+ amdgpu_bo_unreserve(db_obj->obj);
+unref_bo:
+ amdgpu_bo_unref(&db_obj->obj);
+ return r;
+}
+
+static int
+amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
+{
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_usermode_queue *queue;
+ int r = 0;
+
+ cancel_delayed_work_sync(&uq_mgr->resume_work);
+ mutex_lock(&uq_mgr->userq_mutex);
+
+ queue = amdgpu_userq_find(uq_mgr, queue_id);
+ if (!queue) {
+ drm_dbg_driver(adev_to_drm(uq_mgr->adev), "Invalid queue id to destroy\n");
+ mutex_unlock(&uq_mgr->userq_mutex);
+ return -EINVAL;
+ }
+ amdgpu_userq_wait_for_last_fence(uq_mgr, queue);
+ r = amdgpu_bo_reserve(queue->db_obj.obj, true);
+ if (!r) {
+ amdgpu_bo_unpin(queue->db_obj.obj);
+ amdgpu_bo_unreserve(queue->db_obj.obj);
+ }
+ amdgpu_bo_unref(&queue->db_obj.obj);
+ atomic_dec(&uq_mgr->userq_count[queue->queue_type]);
+#if defined(CONFIG_DEBUG_FS)
+ debugfs_remove_recursive(queue->debugfs_queue);
+#endif
+ amdgpu_userq_detect_and_reset_queues(uq_mgr);
+ r = amdgpu_userq_unmap_helper(uq_mgr, queue);
+ /*TODO: It requires a reset for userq hw unmap error*/
+ if (unlikely(r != AMDGPU_USERQ_STATE_UNMAPPED)) {
+ drm_warn(adev_to_drm(uq_mgr->adev), "trying to destroy a HW mapping userq\n");
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ }
+ amdgpu_userq_cleanup(uq_mgr, queue, queue_id);
+ mutex_unlock(&uq_mgr->userq_mutex);
+
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ return r;
+}
+
+static int amdgpu_userq_priority_permit(struct drm_file *filp,
+ int priority)
+{
+ if (priority < AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH)
+ return 0;
+
+ if (capable(CAP_SYS_NICE))
+ return 0;
+
+ if (drm_is_current_master(filp))
+ return 0;
+
+ return -EACCES;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_mqd_info_read(struct seq_file *m, void *unused)
+{
+ struct amdgpu_usermode_queue *queue = m->private;
+ struct amdgpu_bo *bo;
+ int r;
+
+ if (!queue || !queue->mqd.obj)
+ return -EINVAL;
+
+ bo = amdgpu_bo_ref(queue->mqd.obj);
+ r = amdgpu_bo_reserve(bo, true);
+ if (r) {
+ amdgpu_bo_unref(&bo);
+ return -EINVAL;
+ }
+
+ seq_printf(m, "queue_type: %d\n", queue->queue_type);
+ seq_printf(m, "mqd_gpu_address: 0x%llx\n", amdgpu_bo_gpu_offset(queue->mqd.obj));
+
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
+
+ return 0;
+}
+
+static int amdgpu_mqd_info_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, amdgpu_mqd_info_read, inode->i_private);
+}
+
+static const struct file_operations amdgpu_mqd_info_fops = {
+ .owner = THIS_MODULE,
+ .open = amdgpu_mqd_info_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+#endif
+
+static int
+amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
+{
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *uq_funcs;
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_db_info db_info;
+ char *queue_name;
+ bool skip_map_queue;
+ u32 qid;
+ uint64_t index;
+ int r = 0;
+ int priority =
+ (args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >>
+ AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT;
+
+ r = amdgpu_userq_priority_permit(filp, priority);
+ if (r)
+ return r;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ drm_file_err(uq_mgr->file, "pm_runtime_get_sync() failed for userqueue create\n");
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ /*
+ * There could be a situation that we are creating a new queue while
+ * the other queues under this UQ_mgr are suspended. So if there is any
+ * resume work pending, wait for it to get done.
+ *
+ * This will also make sure we have a valid eviction fence ready to be used.
+ */
+ amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
+
+ uq_funcs = adev->userq_funcs[args->in.ip_type];
+ if (!uq_funcs) {
+ drm_file_err(uq_mgr->file, "Usermode queue is not supported for this IP (%u)\n",
+ args->in.ip_type);
+ r = -EINVAL;
+ goto unlock;
+ }
+
+ queue = kzalloc(sizeof(struct amdgpu_usermode_queue), GFP_KERNEL);
+ if (!queue) {
+ drm_file_err(uq_mgr->file, "Failed to allocate memory for queue\n");
+ r = -ENOMEM;
+ goto unlock;
+ }
+
+ INIT_LIST_HEAD(&queue->userq_va_list);
+ queue->doorbell_handle = args->in.doorbell_handle;
+ queue->queue_type = args->in.ip_type;
+ queue->vm = &fpriv->vm;
+ queue->priority = priority;
+
+ db_info.queue_type = queue->queue_type;
+ db_info.doorbell_handle = queue->doorbell_handle;
+ db_info.db_obj = &queue->db_obj;
+ db_info.doorbell_offset = args->in.doorbell_offset;
+
+ /* Validate the userq virtual address.*/
+ if (amdgpu_userq_input_va_validate(queue, args->in.queue_va, args->in.queue_size) ||
+ amdgpu_userq_input_va_validate(queue, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) ||
+ amdgpu_userq_input_va_validate(queue, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) {
+ r = -EINVAL;
+ kfree(queue);
+ goto unlock;
+ }
+
+ /* Convert relative doorbell offset into absolute doorbell index */
+ index = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp);
+ if (index == (uint64_t)-EINVAL) {
+ drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n");
+ kfree(queue);
+ r = -EINVAL;
+ goto unlock;
+ }
+
+ queue->doorbell_index = index;
+ xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC);
+ r = amdgpu_userq_fence_driver_alloc(adev, queue);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to alloc fence driver\n");
+ goto unlock;
+ }
+
+ r = uq_funcs->mqd_create(uq_mgr, &args->in, queue);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to create Queue\n");
+ amdgpu_userq_fence_driver_free(queue);
+ kfree(queue);
+ goto unlock;
+ }
+
+ /* Wait for mode-1 reset to complete */
+ down_read(&adev->reset_domain->sem);
+ r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, GFP_KERNEL));
+ if (r) {
+ kfree(queue);
+ up_read(&adev->reset_domain->sem);
+ goto unlock;
+ }
+
+ r = xa_alloc(&uq_mgr->userq_mgr_xa, &qid, queue, XA_LIMIT(1, AMDGPU_MAX_USERQ_COUNT), GFP_KERNEL);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to allocate a queue id\n");
+ amdgpu_userq_fence_driver_free(queue);
+ uq_funcs->mqd_destroy(uq_mgr, queue);
+ kfree(queue);
+ r = -ENOMEM;
+ up_read(&adev->reset_domain->sem);
+ goto unlock;
+ }
+ up_read(&adev->reset_domain->sem);
+ queue->userq_mgr = uq_mgr;
+
+ /* don't map the queue if scheduling is halted */
+ if (adev->userq_halt_for_enforce_isolation &&
+ ((queue->queue_type == AMDGPU_HW_IP_GFX) ||
+ (queue->queue_type == AMDGPU_HW_IP_COMPUTE)))
+ skip_map_queue = true;
+ else
+ skip_map_queue = false;
+ if (!skip_map_queue) {
+ r = amdgpu_userq_map_helper(uq_mgr, queue);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to map Queue\n");
+ xa_erase(&uq_mgr->userq_mgr_xa, qid);
+ amdgpu_userq_fence_driver_free(queue);
+ uq_funcs->mqd_destroy(uq_mgr, queue);
+ kfree(queue);
+ goto unlock;
+ }
+ }
+
+ queue_name = kasprintf(GFP_KERNEL, "queue-%d", qid);
+ if (!queue_name) {
+ r = -ENOMEM;
+ goto unlock;
+ }
+
+#if defined(CONFIG_DEBUG_FS)
+ /* Queue dentry per client to hold MQD information */
+ queue->debugfs_queue = debugfs_create_dir(queue_name, filp->debugfs_client);
+ debugfs_create_file("mqd_info", 0444, queue->debugfs_queue, queue, &amdgpu_mqd_info_fops);
+#endif
+ kfree(queue_name);
+
+ args->out.queue_id = qid;
+ atomic_inc(&uq_mgr->userq_count[queue->queue_type]);
+
+unlock:
+ mutex_unlock(&uq_mgr->userq_mutex);
+
+ return r;
+}
+
+static int amdgpu_userq_input_args_validate(struct drm_device *dev,
+ union drm_amdgpu_userq *args,
+ struct drm_file *filp)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ switch (args->in.op) {
+ case AMDGPU_USERQ_OP_CREATE:
+ if (args->in.flags & ~(AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK |
+ AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE))
+ return -EINVAL;
+ /* Usermode queues are only supported for GFX IP as of now */
+ if (args->in.ip_type != AMDGPU_HW_IP_GFX &&
+ args->in.ip_type != AMDGPU_HW_IP_DMA &&
+ args->in.ip_type != AMDGPU_HW_IP_COMPUTE) {
+ drm_file_err(filp, "Usermode queue doesn't support IP type %u\n",
+ args->in.ip_type);
+ return -EINVAL;
+ }
+
+ if ((args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE) &&
+ (args->in.ip_type != AMDGPU_HW_IP_GFX) &&
+ (args->in.ip_type != AMDGPU_HW_IP_COMPUTE) &&
+ !amdgpu_is_tmz(adev)) {
+ drm_file_err(filp, "Secure only supported on GFX/Compute queues\n");
+ return -EINVAL;
+ }
+
+ if (args->in.queue_va == AMDGPU_BO_INVALID_OFFSET ||
+ args->in.queue_va == 0 ||
+ args->in.queue_size == 0) {
+ drm_file_err(filp, "invalidate userq queue va or size\n");
+ return -EINVAL;
+ }
+ if (!args->in.wptr_va || !args->in.rptr_va) {
+ drm_file_err(filp, "invalidate userq queue rptr or wptr\n");
+ return -EINVAL;
+ }
+ break;
+ case AMDGPU_USERQ_OP_FREE:
+ if (args->in.ip_type ||
+ args->in.doorbell_handle ||
+ args->in.doorbell_offset ||
+ args->in.flags ||
+ args->in.queue_va ||
+ args->in.queue_size ||
+ args->in.rptr_va ||
+ args->in.wptr_va ||
+ args->in.mqd ||
+ args->in.mqd_size)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ union drm_amdgpu_userq *args = data;
+ int r;
+
+ if (amdgpu_userq_input_args_validate(dev, args, filp) < 0)
+ return -EINVAL;
+
+ switch (args->in.op) {
+ case AMDGPU_USERQ_OP_CREATE:
+ r = amdgpu_userq_create(filp, args);
+ if (r)
+ drm_file_err(filp, "Failed to create usermode queue\n");
+ break;
+
+ case AMDGPU_USERQ_OP_FREE:
+ r = amdgpu_userq_destroy(filp, args->in.queue_id);
+ if (r)
+ drm_file_err(filp, "Failed to destroy usermode queue\n");
+ break;
+
+ default:
+ drm_dbg_driver(dev, "Invalid user queue op specified: %d\n", args->in.op);
+ return -EINVAL;
+ }
+
+ return r;
+}
+
+static int
+amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
+{
+ struct amdgpu_usermode_queue *queue;
+ unsigned long queue_id;
+ int ret = 0, r;
+
+ /* Resume all the queues for this process */
+ xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) {
+
+ if (!amdgpu_userq_buffer_vas_mapped(queue)) {
+ drm_file_err(uq_mgr->file,
+ "trying restore queue without va mapping\n");
+ queue->state = AMDGPU_USERQ_STATE_INVALID_VA;
+ continue;
+ }
+
+ r = amdgpu_userq_restore_helper(uq_mgr, queue);
+ if (r)
+ ret = r;
+ }
+
+ if (ret)
+ drm_file_err(uq_mgr->file, "Failed to map all the queues\n");
+ return ret;
+}
+
+static int amdgpu_userq_validate_vm(void *param, struct amdgpu_bo *bo)
+{
+ struct ttm_operation_ctx ctx = { false, false };
+
+ amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+ return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+}
+
+/* Handle all BOs on the invalidated list, validate them and update the PTs */
+static int
+amdgpu_userq_bo_validate(struct amdgpu_device *adev, struct drm_exec *exec,
+ struct amdgpu_vm *vm)
+{
+ struct ttm_operation_ctx ctx = { false, false };
+ struct amdgpu_bo_va *bo_va;
+ struct amdgpu_bo *bo;
+ int ret;
+
+ spin_lock(&vm->status_lock);
+ while (!list_empty(&vm->invalidated)) {
+ bo_va = list_first_entry(&vm->invalidated,
+ struct amdgpu_bo_va,
+ base.vm_status);
+ spin_unlock(&vm->status_lock);
+
+ bo = bo_va->base.bo;
+ ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 2);
+ if (unlikely(ret))
+ return ret;
+
+ amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret)
+ return ret;
+
+ /* This moves the bo_va to the done list */
+ ret = amdgpu_vm_bo_update(adev, bo_va, false);
+ if (ret)
+ return ret;
+
+ spin_lock(&vm->status_lock);
+ }
+ spin_unlock(&vm->status_lock);
+
+ return 0;
+}
+
+/* Make sure the whole VM is ready to be used */
+static int
+amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
+{
+ struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
+ bool invalidated = false, new_addition = false;
+ struct ttm_operation_ctx ctx = { true, false };
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_hmm_range *range;
+ struct amdgpu_vm *vm = &fpriv->vm;
+ unsigned long key, tmp_key;
+ struct amdgpu_bo_va *bo_va;
+ struct amdgpu_bo *bo;
+ struct drm_exec exec;
+ struct xarray xa;
+ int ret;
+
+ xa_init(&xa);
+
+retry_lock:
+ drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
+ drm_exec_until_all_locked(&exec) {
+ ret = amdgpu_vm_lock_pd(vm, &exec, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto unlock_all;
+
+ ret = amdgpu_vm_lock_done_list(vm, &exec, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto unlock_all;
+
+ /* This validates PDs, PTs and per VM BOs */
+ ret = amdgpu_vm_validate(adev, vm, NULL,
+ amdgpu_userq_validate_vm,
+ NULL);
+ if (unlikely(ret))
+ goto unlock_all;
+
+ /* This locks and validates the remaining evicted BOs */
+ ret = amdgpu_userq_bo_validate(adev, &exec, vm);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto unlock_all;
+ }
+
+ if (invalidated) {
+ xa_for_each(&xa, tmp_key, range) {
+ bo = range->bo;
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret)
+ goto unlock_all;
+
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range);
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret)
+ goto unlock_all;
+ }
+ invalidated = false;
+ }
+
+ ret = amdgpu_vm_handle_moved(adev, vm, NULL);
+ if (ret)
+ goto unlock_all;
+
+ key = 0;
+ /* Validate User Ptr BOs */
+ list_for_each_entry(bo_va, &vm->done, base.vm_status) {
+ bo = bo_va->base.bo;
+ if (!bo)
+ continue;
+
+ if (!amdgpu_ttm_tt_is_userptr(bo->tbo.ttm))
+ continue;
+
+ range = xa_load(&xa, key);
+ if (range && range->bo != bo) {
+ xa_erase(&xa, key);
+ amdgpu_hmm_range_free(range);
+ range = NULL;
+ }
+
+ if (!range) {
+ range = amdgpu_hmm_range_alloc(bo);
+ if (!range) {
+ ret = -ENOMEM;
+ goto unlock_all;
+ }
+
+ xa_store(&xa, key, range, GFP_KERNEL);
+ new_addition = true;
+ }
+ key++;
+ }
+
+ if (new_addition) {
+ drm_exec_fini(&exec);
+ xa_for_each(&xa, tmp_key, range) {
+ if (!range)
+ continue;
+ bo = range->bo;
+ ret = amdgpu_ttm_tt_get_user_pages(bo, range);
+ if (ret)
+ goto unlock_all;
+ }
+
+ invalidated = true;
+ new_addition = false;
+ goto retry_lock;
+ }
+
+ ret = amdgpu_vm_update_pdes(adev, vm, false);
+ if (ret)
+ goto unlock_all;
+
+ /*
+ * We need to wait for all VM updates to finish before restarting the
+ * queues. Using the done list like that is now ok since everything is
+ * locked in place.
+ */
+ list_for_each_entry(bo_va, &vm->done, base.vm_status)
+ dma_fence_wait(bo_va->last_pt_update, false);
+ dma_fence_wait(vm->last_update, false);
+
+ ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec);
+ if (ret)
+ drm_file_err(uq_mgr->file, "Failed to replace eviction fence\n");
+
+unlock_all:
+ drm_exec_fini(&exec);
+ xa_for_each(&xa, tmp_key, range) {
+ if (!range)
+ continue;
+ bo = range->bo;
+ amdgpu_hmm_range_free(range);
+ }
+ xa_destroy(&xa);
+ return ret;
+}
+
+static void amdgpu_userq_restore_worker(struct work_struct *work)
+{
+ struct amdgpu_userq_mgr *uq_mgr = work_to_uq_mgr(work, resume_work.work);
+ struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
+ int ret;
+
+ flush_delayed_work(&fpriv->evf_mgr.suspend_work);
+
+ mutex_lock(&uq_mgr->userq_mutex);
+
+ ret = amdgpu_userq_vm_validate(uq_mgr);
+ if (ret) {
+ drm_file_err(uq_mgr->file, "Failed to validate BOs to restore\n");
+ goto unlock;
+ }
+
+ ret = amdgpu_userq_restore_all(uq_mgr);
+ if (ret) {
+ drm_file_err(uq_mgr->file, "Failed to restore all queues\n");
+ goto unlock;
+ }
+
+unlock:
+ mutex_unlock(&uq_mgr->userq_mutex);
+}
+
+static int
+amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
+{
+ struct amdgpu_usermode_queue *queue;
+ unsigned long queue_id;
+ int ret = 0, r;
+
+ amdgpu_userq_detect_and_reset_queues(uq_mgr);
+ /* Try to unmap all the queues in this process ctx */
+ xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) {
+ r = amdgpu_userq_preempt_helper(uq_mgr, queue);
+ if (r)
+ ret = r;
+ }
+
+ if (ret)
+ drm_file_err(uq_mgr->file, "Couldn't unmap all the queues\n");
+ return ret;
+}
+
+void amdgpu_userq_reset_work(struct work_struct *work)
+{
+ struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+ userq_reset_work);
+ struct amdgpu_reset_context reset_context;
+
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ reset_context.src = AMDGPU_RESET_SRC_USERQ;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ /*set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);*/
+
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+}
+
+static int
+amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
+{
+ struct amdgpu_usermode_queue *queue;
+ unsigned long queue_id;
+ int ret;
+
+ xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) {
+ struct dma_fence *f = queue->last_fence;
+
+ if (!f || dma_fence_is_signaled(f))
+ continue;
+ ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
+ if (ret <= 0) {
+ drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n",
+ f->context, f->seqno);
+ return -ETIMEDOUT;
+ }
+ }
+
+ return 0;
+}
+
+void
+amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_eviction_fence *ev_fence)
+{
+ struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
+ struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr;
+ struct amdgpu_device *adev = uq_mgr->adev;
+ int ret;
+
+ /* Wait for any pending userqueue fence work to finish */
+ ret = amdgpu_userq_wait_for_signal(uq_mgr);
+ if (ret)
+ dev_err(adev->dev, "Not evicting userqueue, timeout waiting for work\n");
+
+ ret = amdgpu_userq_evict_all(uq_mgr);
+ if (ret)
+ dev_err(adev->dev, "Failed to evict userqueue\n");
+
+ /* Signal current eviction fence */
+ amdgpu_eviction_fence_signal(evf_mgr, ev_fence);
+
+ if (evf_mgr->fd_closing) {
+ cancel_delayed_work_sync(&uq_mgr->resume_work);
+ return;
+ }
+
+ /* Schedule a resume work */
+ schedule_delayed_work(&uq_mgr->resume_work, 0);
+}
+
+int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv,
+ struct amdgpu_device *adev)
+{
+ mutex_init(&userq_mgr->userq_mutex);
+ xa_init_flags(&userq_mgr->userq_mgr_xa, XA_FLAGS_ALLOC);
+ userq_mgr->adev = adev;
+ userq_mgr->file = file_priv;
+
+ INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker);
+ return 0;
+}
+
+void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr)
+{
+ struct amdgpu_usermode_queue *queue;
+ unsigned long queue_id;
+
+ cancel_delayed_work_sync(&userq_mgr->resume_work);
+
+ mutex_lock(&userq_mgr->userq_mutex);
+ amdgpu_userq_detect_and_reset_queues(userq_mgr);
+ xa_for_each(&userq_mgr->userq_mgr_xa, queue_id, queue) {
+ amdgpu_userq_wait_for_last_fence(userq_mgr, queue);
+ amdgpu_userq_unmap_helper(userq_mgr, queue);
+ amdgpu_userq_cleanup(userq_mgr, queue, queue_id);
+ }
+
+ xa_destroy(&userq_mgr->userq_mgr_xa);
+ mutex_unlock(&userq_mgr->userq_mutex);
+ mutex_destroy(&userq_mgr->userq_mutex);
+}
+
+int amdgpu_userq_suspend(struct amdgpu_device *adev)
+{
+ u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_mgr *uqm;
+ unsigned long queue_id;
+ int r;
+
+ if (!ip_mask)
+ return 0;
+
+ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+ uqm = queue->userq_mgr;
+ cancel_delayed_work_sync(&uqm->resume_work);
+ guard(mutex)(&uqm->userq_mutex);
+ amdgpu_userq_detect_and_reset_queues(uqm);
+ if (adev->in_s0ix)
+ r = amdgpu_userq_preempt_helper(uqm, queue);
+ else
+ r = amdgpu_userq_unmap_helper(uqm, queue);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+int amdgpu_userq_resume(struct amdgpu_device *adev)
+{
+ u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_mgr *uqm;
+ unsigned long queue_id;
+ int r;
+
+ if (!ip_mask)
+ return 0;
+
+ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+ uqm = queue->userq_mgr;
+ guard(mutex)(&uqm->userq_mutex);
+ if (adev->in_s0ix)
+ r = amdgpu_userq_restore_helper(uqm, queue);
+ else
+ r = amdgpu_userq_map_helper(uqm, queue);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
+ u32 idx)
+{
+ u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_mgr *uqm;
+ unsigned long queue_id;
+ int ret = 0, r;
+
+ /* only need to stop gfx/compute */
+ if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE))))
+ return 0;
+
+ if (adev->userq_halt_for_enforce_isolation)
+ dev_warn(adev->dev, "userq scheduling already stopped!\n");
+ adev->userq_halt_for_enforce_isolation = true;
+ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+ uqm = queue->userq_mgr;
+ cancel_delayed_work_sync(&uqm->resume_work);
+ mutex_lock(&uqm->userq_mutex);
+ if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
+ (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
+ (queue->xcp_id == idx)) {
+ amdgpu_userq_detect_and_reset_queues(uqm);
+ r = amdgpu_userq_preempt_helper(uqm, queue);
+ if (r)
+ ret = r;
+ }
+ mutex_unlock(&uqm->userq_mutex);
+ }
+
+ return ret;
+}
+
+int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
+ u32 idx)
+{
+ u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_mgr *uqm;
+ unsigned long queue_id;
+ int ret = 0, r;
+
+ /* only need to stop gfx/compute */
+ if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE))))
+ return 0;
+
+ if (!adev->userq_halt_for_enforce_isolation)
+ dev_warn(adev->dev, "userq scheduling already started!\n");
+ adev->userq_halt_for_enforce_isolation = false;
+ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+ uqm = queue->userq_mgr;
+ mutex_lock(&uqm->userq_mutex);
+ if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
+ (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
+ (queue->xcp_id == idx)) {
+ r = amdgpu_userq_restore_helper(uqm, queue);
+ if (r)
+ ret = r;
+ }
+ mutex_unlock(&uqm->userq_mutex);
+ }
+
+ return ret;
+}
+
+int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t saddr)
+{
+ u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+ struct amdgpu_bo_va *bo_va = mapping->bo_va;
+ struct dma_resv *resv = bo_va->base.bo->tbo.base.resv;
+ int ret = 0;
+
+ if (!ip_mask)
+ return 0;
+
+ dev_warn_once(adev->dev, "now unmapping a vital queue va:%llx\n", saddr);
+ /**
+ * The userq VA mapping reservation should include the eviction fence,
+ * if the eviction fence can't signal successfully during unmapping,
+ * then driver will warn to flag this improper unmap of the userq VA.
+ * Note: The eviction fence may be attached to different BOs, and this
+ * unmap is only for one kind of userq VAs, so at this point suppose
+ * the eviction fence is always unsignaled.
+ */
+ if (!dma_resv_test_signaled(resv, DMA_RESV_USAGE_BOOKKEEP)) {
+ ret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP, true,
+ MAX_SCHEDULE_TIMEOUT);
+ if (ret <= 0)
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+void amdgpu_userq_pre_reset(struct amdgpu_device *adev)
+{
+ const struct amdgpu_userq_funcs *userq_funcs;
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_mgr *uqm;
+ unsigned long queue_id;
+
+ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+ uqm = queue->userq_mgr;
+ cancel_delayed_work_sync(&uqm->resume_work);
+ if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
+ amdgpu_userq_wait_for_last_fence(uqm, queue);
+ userq_funcs = adev->userq_funcs[queue->queue_type];
+ userq_funcs->unmap(uqm, queue);
+ /* just mark all queues as hung at this point.
+ * if unmap succeeds, we could map again
+ * in amdgpu_userq_post_reset() if vram is not lost
+ */
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ amdgpu_userq_fence_driver_force_completion(queue);
+ }
+ }
+}
+
+int amdgpu_userq_post_reset(struct amdgpu_device *adev, bool vram_lost)
+{
+ /* if any queue state is AMDGPU_USERQ_STATE_UNMAPPED
+ * at this point, we should be able to map it again
+ * and continue if vram is not lost.
+ */
+ struct amdgpu_userq_mgr *uqm;
+ struct amdgpu_usermode_queue *queue;
+ const struct amdgpu_userq_funcs *userq_funcs;
+ unsigned long queue_id;
+ int r = 0;
+
+ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+ uqm = queue->userq_mgr;
+ if (queue->state == AMDGPU_USERQ_STATE_HUNG && !vram_lost) {
+ userq_funcs = adev->userq_funcs[queue->queue_type];
+ /* Re-map queue */
+ r = userq_funcs->map(uqm, queue);
+ if (r) {
+ dev_err(adev->dev, "Failed to remap queue %ld\n", queue_id);
+ continue;
+ }
+ queue->state = AMDGPU_USERQ_STATE_MAPPED;
+ }
+ }
+
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
new file mode 100644
index 000000000000..c37444427a14
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_USERQ_H_
+#define AMDGPU_USERQ_H_
+#include "amdgpu_eviction_fence.h"
+
+#define AMDGPU_MAX_USERQ_COUNT 512
+
+#define to_ev_fence(f) container_of(f, struct amdgpu_eviction_fence, base)
+#define uq_mgr_to_fpriv(u) container_of(u, struct amdgpu_fpriv, userq_mgr)
+#define work_to_uq_mgr(w, name) container_of(w, struct amdgpu_userq_mgr, name)
+
+enum amdgpu_userq_state {
+ AMDGPU_USERQ_STATE_UNMAPPED = 0,
+ AMDGPU_USERQ_STATE_MAPPED,
+ AMDGPU_USERQ_STATE_PREEMPTED,
+ AMDGPU_USERQ_STATE_HUNG,
+ AMDGPU_USERQ_STATE_INVALID_VA,
+};
+
+struct amdgpu_mqd_prop;
+
+struct amdgpu_userq_obj {
+ void *cpu_ptr;
+ uint64_t gpu_addr;
+ struct amdgpu_bo *obj;
+};
+
+struct amdgpu_userq_va_cursor {
+ u64 gpu_addr;
+ struct list_head list;
+};
+
+struct amdgpu_usermode_queue {
+ int queue_type;
+ enum amdgpu_userq_state state;
+ uint64_t doorbell_handle;
+ uint64_t doorbell_index;
+ uint64_t flags;
+ struct amdgpu_mqd_prop *userq_prop;
+ struct amdgpu_userq_mgr *userq_mgr;
+ struct amdgpu_vm *vm;
+ struct amdgpu_userq_obj mqd;
+ struct amdgpu_userq_obj db_obj;
+ struct amdgpu_userq_obj fw_obj;
+ struct amdgpu_userq_obj wptr_obj;
+ struct xarray fence_drv_xa;
+ struct amdgpu_userq_fence_driver *fence_drv;
+ struct dma_fence *last_fence;
+ u32 xcp_id;
+ int priority;
+ struct dentry *debugfs_queue;
+
+ struct list_head userq_va_list;
+};
+
+struct amdgpu_userq_funcs {
+ int (*mqd_create)(struct amdgpu_userq_mgr *uq_mgr,
+ struct drm_amdgpu_userq_in *args,
+ struct amdgpu_usermode_queue *queue);
+ void (*mqd_destroy)(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *uq);
+ int (*unmap)(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue);
+ int (*map)(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue);
+ int (*preempt)(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue);
+ int (*restore)(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue);
+ int (*detect_and_reset)(struct amdgpu_device *adev,
+ int queue_type);
+};
+
+/* Usermode queues for gfx */
+struct amdgpu_userq_mgr {
+ /**
+ * @userq_mgr_xa: Per-process user queue map (queue ID → queue)
+ * Key: queue_id (unique ID within the process's userq manager)
+ * Value: struct amdgpu_usermode_queue
+ */
+ struct xarray userq_mgr_xa;
+ struct mutex userq_mutex;
+ struct amdgpu_device *adev;
+ struct delayed_work resume_work;
+ struct drm_file *file;
+ atomic_t userq_count[AMDGPU_RING_TYPE_MAX];
+};
+
+struct amdgpu_db_info {
+ uint64_t doorbell_handle;
+ uint32_t queue_type;
+ uint32_t doorbell_offset;
+ struct amdgpu_userq_obj *db_obj;
+};
+
+int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+
+int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv,
+ struct amdgpu_device *adev);
+
+void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr);
+
+int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_userq_obj *userq_obj,
+ int size);
+
+void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_userq_obj *userq_obj);
+
+void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_eviction_fence *ev_fence);
+
+void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr,
+ struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+uint64_t amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_db_info *db_info,
+ struct drm_file *filp);
+
+u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev);
+
+int amdgpu_userq_suspend(struct amdgpu_device *adev);
+int amdgpu_userq_resume(struct amdgpu_device *adev);
+
+int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
+ u32 idx);
+int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
+ u32 idx);
+void amdgpu_userq_reset_work(struct work_struct *work);
+void amdgpu_userq_pre_reset(struct amdgpu_device *adev);
+int amdgpu_userq_post_reset(struct amdgpu_device *adev, bool vram_lost);
+
+int amdgpu_userq_input_va_validate(struct amdgpu_usermode_queue *queue,
+ u64 addr, u64 expected_size);
+int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t saddr);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
new file mode 100644
index 000000000000..eba9fb359047
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -0,0 +1,1011 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/kref.h>
+#include <linux/slab.h>
+#include <linux/dma-fence-unwrap.h>
+
+#include <drm/drm_exec.h>
+#include <drm/drm_syncobj.h>
+
+#include "amdgpu.h"
+#include "amdgpu_userq_fence.h"
+
+static const struct dma_fence_ops amdgpu_userq_fence_ops;
+static struct kmem_cache *amdgpu_userq_fence_slab;
+
+int amdgpu_userq_fence_slab_init(void)
+{
+ amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence",
+ sizeof(struct amdgpu_userq_fence),
+ 0,
+ SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (!amdgpu_userq_fence_slab)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void amdgpu_userq_fence_slab_fini(void)
+{
+ rcu_barrier();
+ kmem_cache_destroy(amdgpu_userq_fence_slab);
+}
+
+static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f)
+{
+ if (!f || f->ops != &amdgpu_userq_fence_ops)
+ return NULL;
+
+ return container_of(f, struct amdgpu_userq_fence, base);
+}
+
+static u64 amdgpu_userq_fence_read(struct amdgpu_userq_fence_driver *fence_drv)
+{
+ return le64_to_cpu(*fence_drv->cpu_addr);
+}
+
+static void
+amdgpu_userq_fence_write(struct amdgpu_userq_fence_driver *fence_drv,
+ u64 seq)
+{
+ if (fence_drv->cpu_addr)
+ *fence_drv->cpu_addr = cpu_to_le64(seq);
+}
+
+int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
+ struct amdgpu_usermode_queue *userq)
+{
+ struct amdgpu_userq_fence_driver *fence_drv;
+ unsigned long flags;
+ int r;
+
+ fence_drv = kzalloc(sizeof(*fence_drv), GFP_KERNEL);
+ if (!fence_drv)
+ return -ENOMEM;
+
+ /* Acquire seq64 memory */
+ r = amdgpu_seq64_alloc(adev, &fence_drv->va, &fence_drv->gpu_addr,
+ &fence_drv->cpu_addr);
+ if (r)
+ goto free_fence_drv;
+
+ memset(fence_drv->cpu_addr, 0, sizeof(u64));
+
+ kref_init(&fence_drv->refcount);
+ INIT_LIST_HEAD(&fence_drv->fences);
+ spin_lock_init(&fence_drv->fence_list_lock);
+
+ fence_drv->adev = adev;
+ fence_drv->context = dma_fence_context_alloc(1);
+ get_task_comm(fence_drv->timeline_name, current);
+
+ xa_lock_irqsave(&adev->userq_xa, flags);
+ r = xa_err(__xa_store(&adev->userq_xa, userq->doorbell_index,
+ fence_drv, GFP_KERNEL));
+ xa_unlock_irqrestore(&adev->userq_xa, flags);
+ if (r)
+ goto free_seq64;
+
+ userq->fence_drv = fence_drv;
+
+ return 0;
+
+free_seq64:
+ amdgpu_seq64_free(adev, fence_drv->va);
+free_fence_drv:
+ kfree(fence_drv);
+
+ return r;
+}
+
+static void amdgpu_userq_walk_and_drop_fence_drv(struct xarray *xa)
+{
+ struct amdgpu_userq_fence_driver *fence_drv;
+ unsigned long index;
+
+ if (xa_empty(xa))
+ return;
+
+ xa_lock(xa);
+ xa_for_each(xa, index, fence_drv) {
+ __xa_erase(xa, index);
+ amdgpu_userq_fence_driver_put(fence_drv);
+ }
+
+ xa_unlock(xa);
+}
+
+void
+amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq)
+{
+ amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa);
+ xa_destroy(&userq->fence_drv_xa);
+ /* Drop the fence_drv reference held by user queue */
+ amdgpu_userq_fence_driver_put(userq->fence_drv);
+}
+
+void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv)
+{
+ struct amdgpu_userq_fence *userq_fence, *tmp;
+ struct dma_fence *fence;
+ unsigned long flags;
+ u64 rptr;
+ int i;
+
+ if (!fence_drv)
+ return;
+
+ spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
+ rptr = amdgpu_userq_fence_read(fence_drv);
+
+ list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences, link) {
+ fence = &userq_fence->base;
+
+ if (rptr < fence->seqno)
+ break;
+
+ dma_fence_signal(fence);
+
+ for (i = 0; i < userq_fence->fence_drv_array_count; i++)
+ amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]);
+
+ list_del(&userq_fence->link);
+ dma_fence_put(fence);
+ }
+ spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
+}
+
+void amdgpu_userq_fence_driver_destroy(struct kref *ref)
+{
+ struct amdgpu_userq_fence_driver *fence_drv = container_of(ref,
+ struct amdgpu_userq_fence_driver,
+ refcount);
+ struct amdgpu_userq_fence_driver *xa_fence_drv;
+ struct amdgpu_device *adev = fence_drv->adev;
+ struct amdgpu_userq_fence *fence, *tmp;
+ struct xarray *xa = &adev->userq_xa;
+ unsigned long index, flags;
+ struct dma_fence *f;
+
+ spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
+ list_for_each_entry_safe(fence, tmp, &fence_drv->fences, link) {
+ f = &fence->base;
+
+ if (!dma_fence_is_signaled(f)) {
+ dma_fence_set_error(f, -ECANCELED);
+ dma_fence_signal(f);
+ }
+
+ list_del(&fence->link);
+ dma_fence_put(f);
+ }
+ spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
+
+ xa_lock_irqsave(xa, flags);
+ xa_for_each(xa, index, xa_fence_drv)
+ if (xa_fence_drv == fence_drv)
+ __xa_erase(xa, index);
+ xa_unlock_irqrestore(xa, flags);
+
+ /* Free seq64 memory */
+ amdgpu_seq64_free(adev, fence_drv->va);
+ kfree(fence_drv);
+}
+
+void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv)
+{
+ kref_get(&fence_drv->refcount);
+}
+
+void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv)
+{
+ kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy);
+}
+
+static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence)
+{
+ *userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC);
+ return *userq_fence ? 0 : -ENOMEM;
+}
+
+static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
+ struct amdgpu_userq_fence *userq_fence,
+ u64 seq, struct dma_fence **f)
+{
+ struct amdgpu_userq_fence_driver *fence_drv;
+ struct dma_fence *fence;
+ unsigned long flags;
+
+ fence_drv = userq->fence_drv;
+ if (!fence_drv)
+ return -EINVAL;
+
+ spin_lock_init(&userq_fence->lock);
+ INIT_LIST_HEAD(&userq_fence->link);
+ fence = &userq_fence->base;
+ userq_fence->fence_drv = fence_drv;
+
+ dma_fence_init64(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
+ fence_drv->context, seq);
+
+ amdgpu_userq_fence_driver_get(fence_drv);
+ dma_fence_get(fence);
+
+ if (!xa_empty(&userq->fence_drv_xa)) {
+ struct amdgpu_userq_fence_driver *stored_fence_drv;
+ unsigned long index, count = 0;
+ int i = 0;
+
+ xa_lock(&userq->fence_drv_xa);
+ xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
+ count++;
+
+ userq_fence->fence_drv_array =
+ kvmalloc_array(count,
+ sizeof(struct amdgpu_userq_fence_driver *),
+ GFP_ATOMIC);
+
+ if (userq_fence->fence_drv_array) {
+ xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) {
+ userq_fence->fence_drv_array[i] = stored_fence_drv;
+ __xa_erase(&userq->fence_drv_xa, index);
+ i++;
+ }
+ }
+
+ userq_fence->fence_drv_array_count = i;
+ xa_unlock(&userq->fence_drv_xa);
+ } else {
+ userq_fence->fence_drv_array = NULL;
+ userq_fence->fence_drv_array_count = 0;
+ }
+
+ /* Check if hardware has already processed the job */
+ spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
+ if (!dma_fence_is_signaled(fence))
+ list_add_tail(&userq_fence->link, &fence_drv->fences);
+ else
+ dma_fence_put(fence);
+
+ spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
+
+ *f = fence;
+
+ return 0;
+}
+
+static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f)
+{
+ return "amdgpu_userq_fence";
+}
+
+static const char *amdgpu_userq_fence_get_timeline_name(struct dma_fence *f)
+{
+ struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
+
+ return fence->fence_drv->timeline_name;
+}
+
+static bool amdgpu_userq_fence_signaled(struct dma_fence *f)
+{
+ struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
+ struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv;
+ u64 rptr, wptr;
+
+ rptr = amdgpu_userq_fence_read(fence_drv);
+ wptr = fence->base.seqno;
+
+ if (rptr >= wptr)
+ return true;
+
+ return false;
+}
+
+static void amdgpu_userq_fence_free(struct rcu_head *rcu)
+{
+ struct dma_fence *fence = container_of(rcu, struct dma_fence, rcu);
+ struct amdgpu_userq_fence *userq_fence = to_amdgpu_userq_fence(fence);
+ struct amdgpu_userq_fence_driver *fence_drv = userq_fence->fence_drv;
+
+ /* Release the fence driver reference */
+ amdgpu_userq_fence_driver_put(fence_drv);
+
+ kvfree(userq_fence->fence_drv_array);
+ kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
+}
+
+static void amdgpu_userq_fence_release(struct dma_fence *f)
+{
+ call_rcu(&f->rcu, amdgpu_userq_fence_free);
+}
+
+static const struct dma_fence_ops amdgpu_userq_fence_ops = {
+ .get_driver_name = amdgpu_userq_fence_get_driver_name,
+ .get_timeline_name = amdgpu_userq_fence_get_timeline_name,
+ .signaled = amdgpu_userq_fence_signaled,
+ .release = amdgpu_userq_fence_release,
+};
+
+/**
+ * amdgpu_userq_fence_read_wptr - Read the userq wptr value
+ *
+ * @queue: user mode queue structure pointer
+ * @wptr: write pointer value
+ *
+ * Read the wptr value from userq's MQD. The userq signal IOCTL
+ * creates a dma_fence for the shared buffers that expects the
+ * RPTR value written to seq64 memory >= WPTR.
+ *
+ * Returns wptr value on success, error on failure.
+ */
+static int amdgpu_userq_fence_read_wptr(struct amdgpu_usermode_queue *queue,
+ u64 *wptr)
+{
+ struct amdgpu_bo_va_mapping *mapping;
+ struct amdgpu_bo *bo;
+ u64 addr, *ptr;
+ int r;
+
+ r = amdgpu_bo_reserve(queue->vm->root.bo, false);
+ if (r)
+ return r;
+
+ addr = queue->userq_prop->wptr_gpu_addr;
+ addr &= AMDGPU_GMC_HOLE_MASK;
+
+ mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT);
+ if (!mapping) {
+ amdgpu_bo_unreserve(queue->vm->root.bo);
+ DRM_ERROR("Failed to lookup amdgpu_bo_va_mapping\n");
+ return -EINVAL;
+ }
+
+ bo = amdgpu_bo_ref(mapping->bo_va->base.bo);
+ amdgpu_bo_unreserve(queue->vm->root.bo);
+ r = amdgpu_bo_reserve(bo, true);
+ if (r) {
+ amdgpu_bo_unref(&bo);
+ DRM_ERROR("Failed to reserve userqueue wptr bo");
+ return r;
+ }
+
+ r = amdgpu_bo_kmap(bo, (void **)&ptr);
+ if (r) {
+ DRM_ERROR("Failed mapping the userqueue wptr bo");
+ goto map_error;
+ }
+
+ *wptr = le64_to_cpu(*ptr);
+
+ amdgpu_bo_kunmap(bo);
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
+
+ return 0;
+
+map_error:
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
+
+ return r;
+}
+
+static void amdgpu_userq_fence_cleanup(struct dma_fence *fence)
+{
+ dma_fence_put(fence);
+}
+
+static void
+amdgpu_userq_fence_driver_set_error(struct amdgpu_userq_fence *fence,
+ int error)
+{
+ struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv;
+ unsigned long flags;
+ struct dma_fence *f;
+
+ spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
+
+ f = rcu_dereference_protected(&fence->base,
+ lockdep_is_held(&fence_drv->fence_list_lock));
+ if (f && !dma_fence_is_signaled_locked(f))
+ dma_fence_set_error(f, error);
+ spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
+}
+
+void
+amdgpu_userq_fence_driver_force_completion(struct amdgpu_usermode_queue *userq)
+{
+ struct dma_fence *f = userq->last_fence;
+
+ if (f) {
+ struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
+ struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv;
+ u64 wptr = fence->base.seqno;
+
+ amdgpu_userq_fence_driver_set_error(fence, -ECANCELED);
+ amdgpu_userq_fence_write(fence_drv, wptr);
+ amdgpu_userq_fence_driver_process(fence_drv);
+
+ }
+}
+
+int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
+ struct drm_amdgpu_userq_signal *args = data;
+ struct drm_gem_object **gobj_write = NULL;
+ struct drm_gem_object **gobj_read = NULL;
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_fence *userq_fence;
+ struct drm_syncobj **syncobj = NULL;
+ u32 *bo_handles_write, num_write_bo_handles;
+ u32 *syncobj_handles, num_syncobj_handles;
+ u32 *bo_handles_read, num_read_bo_handles;
+ int r, i, entry, rentry, wentry;
+ struct dma_fence *fence;
+ struct drm_exec exec;
+ u64 wptr;
+
+ num_syncobj_handles = args->num_syncobj_handles;
+ syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles),
+ size_mul(sizeof(u32), num_syncobj_handles));
+ if (IS_ERR(syncobj_handles))
+ return PTR_ERR(syncobj_handles);
+
+ /* Array of pointers to the looked up syncobjs */
+ syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), GFP_KERNEL);
+ if (!syncobj) {
+ r = -ENOMEM;
+ goto free_syncobj_handles;
+ }
+
+ for (entry = 0; entry < num_syncobj_handles; entry++) {
+ syncobj[entry] = drm_syncobj_find(filp, syncobj_handles[entry]);
+ if (!syncobj[entry]) {
+ r = -ENOENT;
+ goto free_syncobj;
+ }
+ }
+
+ num_read_bo_handles = args->num_bo_read_handles;
+ bo_handles_read = memdup_user(u64_to_user_ptr(args->bo_read_handles),
+ sizeof(u32) * num_read_bo_handles);
+ if (IS_ERR(bo_handles_read)) {
+ r = PTR_ERR(bo_handles_read);
+ goto free_syncobj;
+ }
+
+ /* Array of pointers to the GEM read objects */
+ gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL);
+ if (!gobj_read) {
+ r = -ENOMEM;
+ goto free_bo_handles_read;
+ }
+
+ for (rentry = 0; rentry < num_read_bo_handles; rentry++) {
+ gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]);
+ if (!gobj_read[rentry]) {
+ r = -ENOENT;
+ goto put_gobj_read;
+ }
+ }
+
+ num_write_bo_handles = args->num_bo_write_handles;
+ bo_handles_write = memdup_user(u64_to_user_ptr(args->bo_write_handles),
+ sizeof(u32) * num_write_bo_handles);
+ if (IS_ERR(bo_handles_write)) {
+ r = PTR_ERR(bo_handles_write);
+ goto put_gobj_read;
+ }
+
+ /* Array of pointers to the GEM write objects */
+ gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL);
+ if (!gobj_write) {
+ r = -ENOMEM;
+ goto free_bo_handles_write;
+ }
+
+ for (wentry = 0; wentry < num_write_bo_handles; wentry++) {
+ gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]);
+ if (!gobj_write[wentry]) {
+ r = -ENOENT;
+ goto put_gobj_write;
+ }
+ }
+
+ /* Retrieve the user queue */
+ queue = xa_load(&userq_mgr->userq_mgr_xa, args->queue_id);
+ if (!queue) {
+ r = -ENOENT;
+ goto put_gobj_write;
+ }
+
+ r = amdgpu_userq_fence_read_wptr(queue, &wptr);
+ if (r)
+ goto put_gobj_write;
+
+ r = amdgpu_userq_fence_alloc(&userq_fence);
+ if (r)
+ goto put_gobj_write;
+
+ /* We are here means UQ is active, make sure the eviction fence is valid */
+ amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
+
+ /* Create a new fence */
+ r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence);
+ if (r) {
+ mutex_unlock(&userq_mgr->userq_mutex);
+ kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
+ goto put_gobj_write;
+ }
+
+ dma_fence_put(queue->last_fence);
+ queue->last_fence = dma_fence_get(fence);
+ mutex_unlock(&userq_mgr->userq_mutex);
+
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
+ (num_read_bo_handles + num_write_bo_handles));
+
+ /* Lock all BOs with retry handling */
+ drm_exec_until_all_locked(&exec) {
+ r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (r) {
+ amdgpu_userq_fence_cleanup(fence);
+ goto exec_fini;
+ }
+
+ r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (r) {
+ amdgpu_userq_fence_cleanup(fence);
+ goto exec_fini;
+ }
+ }
+
+ for (i = 0; i < num_read_bo_handles; i++) {
+ if (!gobj_read || !gobj_read[i]->resv)
+ continue;
+
+ dma_resv_add_fence(gobj_read[i]->resv, fence,
+ DMA_RESV_USAGE_READ);
+ }
+
+ for (i = 0; i < num_write_bo_handles; i++) {
+ if (!gobj_write || !gobj_write[i]->resv)
+ continue;
+
+ dma_resv_add_fence(gobj_write[i]->resv, fence,
+ DMA_RESV_USAGE_WRITE);
+ }
+
+ /* Add the created fence to syncobj/BO's */
+ for (i = 0; i < num_syncobj_handles; i++)
+ drm_syncobj_replace_fence(syncobj[i], fence);
+
+ /* drop the reference acquired in fence creation function */
+ dma_fence_put(fence);
+
+exec_fini:
+ drm_exec_fini(&exec);
+put_gobj_write:
+ while (wentry-- > 0)
+ drm_gem_object_put(gobj_write[wentry]);
+ kfree(gobj_write);
+free_bo_handles_write:
+ kfree(bo_handles_write);
+put_gobj_read:
+ while (rentry-- > 0)
+ drm_gem_object_put(gobj_read[rentry]);
+ kfree(gobj_read);
+free_bo_handles_read:
+ kfree(bo_handles_read);
+free_syncobj:
+ while (entry-- > 0)
+ if (syncobj[entry])
+ drm_syncobj_put(syncobj[entry]);
+ kfree(syncobj);
+free_syncobj_handles:
+ kfree(syncobj_handles);
+
+ return r;
+}
+
+int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ u32 *syncobj_handles, *timeline_points, *timeline_handles, *bo_handles_read, *bo_handles_write;
+ u32 num_syncobj, num_read_bo_handles, num_write_bo_handles;
+ struct drm_amdgpu_userq_fence_info *fence_info = NULL;
+ struct drm_amdgpu_userq_wait *wait_info = data;
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
+ struct amdgpu_usermode_queue *waitq;
+ struct drm_gem_object **gobj_write;
+ struct drm_gem_object **gobj_read;
+ struct dma_fence **fences = NULL;
+ u16 num_points, num_fences = 0;
+ int r, i, rentry, wentry, cnt;
+ struct drm_exec exec;
+
+ num_read_bo_handles = wait_info->num_bo_read_handles;
+ bo_handles_read = memdup_user(u64_to_user_ptr(wait_info->bo_read_handles),
+ size_mul(sizeof(u32), num_read_bo_handles));
+ if (IS_ERR(bo_handles_read))
+ return PTR_ERR(bo_handles_read);
+
+ num_write_bo_handles = wait_info->num_bo_write_handles;
+ bo_handles_write = memdup_user(u64_to_user_ptr(wait_info->bo_write_handles),
+ size_mul(sizeof(u32), num_write_bo_handles));
+ if (IS_ERR(bo_handles_write)) {
+ r = PTR_ERR(bo_handles_write);
+ goto free_bo_handles_read;
+ }
+
+ num_syncobj = wait_info->num_syncobj_handles;
+ syncobj_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_handles),
+ size_mul(sizeof(u32), num_syncobj));
+ if (IS_ERR(syncobj_handles)) {
+ r = PTR_ERR(syncobj_handles);
+ goto free_bo_handles_write;
+ }
+
+ num_points = wait_info->num_syncobj_timeline_handles;
+ timeline_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_handles),
+ sizeof(u32) * num_points);
+ if (IS_ERR(timeline_handles)) {
+ r = PTR_ERR(timeline_handles);
+ goto free_syncobj_handles;
+ }
+
+ timeline_points = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_points),
+ sizeof(u32) * num_points);
+ if (IS_ERR(timeline_points)) {
+ r = PTR_ERR(timeline_points);
+ goto free_timeline_handles;
+ }
+
+ gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL);
+ if (!gobj_read) {
+ r = -ENOMEM;
+ goto free_timeline_points;
+ }
+
+ for (rentry = 0; rentry < num_read_bo_handles; rentry++) {
+ gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]);
+ if (!gobj_read[rentry]) {
+ r = -ENOENT;
+ goto put_gobj_read;
+ }
+ }
+
+ gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL);
+ if (!gobj_write) {
+ r = -ENOMEM;
+ goto put_gobj_read;
+ }
+
+ for (wentry = 0; wentry < num_write_bo_handles; wentry++) {
+ gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]);
+ if (!gobj_write[wentry]) {
+ r = -ENOENT;
+ goto put_gobj_write;
+ }
+ }
+
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
+ (num_read_bo_handles + num_write_bo_handles));
+
+ /* Lock all BOs with retry handling */
+ drm_exec_until_all_locked(&exec) {
+ r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (r) {
+ drm_exec_fini(&exec);
+ goto put_gobj_write;
+ }
+
+ r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (r) {
+ drm_exec_fini(&exec);
+ goto put_gobj_write;
+ }
+ }
+
+ if (!wait_info->num_fences) {
+ if (num_points) {
+ struct dma_fence_unwrap iter;
+ struct dma_fence *fence;
+ struct dma_fence *f;
+
+ for (i = 0; i < num_points; i++) {
+ r = drm_syncobj_find_fence(filp, timeline_handles[i],
+ timeline_points[i],
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+ &fence);
+ if (r)
+ goto exec_fini;
+
+ dma_fence_unwrap_for_each(f, &iter, fence)
+ num_fences++;
+
+ dma_fence_put(fence);
+ }
+ }
+
+ /* Count syncobj's fence */
+ for (i = 0; i < num_syncobj; i++) {
+ struct dma_fence *fence;
+
+ r = drm_syncobj_find_fence(filp, syncobj_handles[i],
+ 0,
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+ &fence);
+ if (r)
+ goto exec_fini;
+
+ num_fences++;
+ dma_fence_put(fence);
+ }
+
+ /* Count GEM objects fence */
+ for (i = 0; i < num_read_bo_handles; i++) {
+ struct dma_resv_iter resv_cursor;
+ struct dma_fence *fence;
+
+ dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
+ DMA_RESV_USAGE_READ, fence)
+ num_fences++;
+ }
+
+ for (i = 0; i < num_write_bo_handles; i++) {
+ struct dma_resv_iter resv_cursor;
+ struct dma_fence *fence;
+
+ dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
+ DMA_RESV_USAGE_WRITE, fence)
+ num_fences++;
+ }
+
+ /*
+ * Passing num_fences = 0 means that userspace doesn't want to
+ * retrieve userq_fence_info. If num_fences = 0 we skip filling
+ * userq_fence_info and return the actual number of fences on
+ * args->num_fences.
+ */
+ wait_info->num_fences = num_fences;
+ } else {
+ /* Array of fence info */
+ fence_info = kmalloc_array(wait_info->num_fences, sizeof(*fence_info), GFP_KERNEL);
+ if (!fence_info) {
+ r = -ENOMEM;
+ goto exec_fini;
+ }
+
+ /* Array of fences */
+ fences = kmalloc_array(wait_info->num_fences, sizeof(*fences), GFP_KERNEL);
+ if (!fences) {
+ r = -ENOMEM;
+ goto free_fence_info;
+ }
+
+ /* Retrieve GEM read objects fence */
+ for (i = 0; i < num_read_bo_handles; i++) {
+ struct dma_resv_iter resv_cursor;
+ struct dma_fence *fence;
+
+ dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
+ DMA_RESV_USAGE_READ, fence) {
+ if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+ r = -EINVAL;
+ goto free_fences;
+ }
+
+ fences[num_fences++] = fence;
+ dma_fence_get(fence);
+ }
+ }
+
+ /* Retrieve GEM write objects fence */
+ for (i = 0; i < num_write_bo_handles; i++) {
+ struct dma_resv_iter resv_cursor;
+ struct dma_fence *fence;
+
+ dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
+ DMA_RESV_USAGE_WRITE, fence) {
+ if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+ r = -EINVAL;
+ goto free_fences;
+ }
+
+ fences[num_fences++] = fence;
+ dma_fence_get(fence);
+ }
+ }
+
+ if (num_points) {
+ struct dma_fence_unwrap iter;
+ struct dma_fence *fence;
+ struct dma_fence *f;
+
+ for (i = 0; i < num_points; i++) {
+ r = drm_syncobj_find_fence(filp, timeline_handles[i],
+ timeline_points[i],
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+ &fence);
+ if (r)
+ goto free_fences;
+
+ dma_fence_unwrap_for_each(f, &iter, fence) {
+ if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+ r = -EINVAL;
+ goto free_fences;
+ }
+
+ dma_fence_get(f);
+ fences[num_fences++] = f;
+ }
+
+ dma_fence_put(fence);
+ }
+ }
+
+ /* Retrieve syncobj's fence */
+ for (i = 0; i < num_syncobj; i++) {
+ struct dma_fence *fence;
+
+ r = drm_syncobj_find_fence(filp, syncobj_handles[i],
+ 0,
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+ &fence);
+ if (r)
+ goto free_fences;
+
+ if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+ r = -EINVAL;
+ goto free_fences;
+ }
+
+ fences[num_fences++] = fence;
+ }
+
+ /*
+ * Keep only the latest fences to reduce the number of values
+ * given back to userspace.
+ */
+ num_fences = dma_fence_dedup_array(fences, num_fences);
+
+ waitq = xa_load(&userq_mgr->userq_mgr_xa, wait_info->waitq_id);
+ if (!waitq) {
+ r = -EINVAL;
+ goto free_fences;
+ }
+
+ for (i = 0, cnt = 0; i < num_fences; i++) {
+ struct amdgpu_userq_fence_driver *fence_drv;
+ struct amdgpu_userq_fence *userq_fence;
+ u32 index;
+
+ userq_fence = to_amdgpu_userq_fence(fences[i]);
+ if (!userq_fence) {
+ /*
+ * Just waiting on other driver fences should
+ * be good for now
+ */
+ r = dma_fence_wait(fences[i], true);
+ if (r) {
+ dma_fence_put(fences[i]);
+ goto free_fences;
+ }
+
+ dma_fence_put(fences[i]);
+ continue;
+ }
+
+ fence_drv = userq_fence->fence_drv;
+ /*
+ * We need to make sure the user queue release their reference
+ * to the fence drivers at some point before queue destruction.
+ * Otherwise, we would gather those references until we don't
+ * have any more space left and crash.
+ */
+ r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv,
+ xa_limit_32b, GFP_KERNEL);
+ if (r)
+ goto free_fences;
+
+ amdgpu_userq_fence_driver_get(fence_drv);
+
+ /* Store drm syncobj's gpu va address and value */
+ fence_info[cnt].va = fence_drv->va;
+ fence_info[cnt].value = fences[i]->seqno;
+
+ dma_fence_put(fences[i]);
+ /* Increment the actual userq fence count */
+ cnt++;
+ }
+
+ wait_info->num_fences = cnt;
+ /* Copy userq fence info to user space */
+ if (copy_to_user(u64_to_user_ptr(wait_info->out_fences),
+ fence_info, wait_info->num_fences * sizeof(*fence_info))) {
+ r = -EFAULT;
+ goto free_fences;
+ }
+
+ kfree(fences);
+ kfree(fence_info);
+ }
+
+ drm_exec_fini(&exec);
+ for (i = 0; i < num_read_bo_handles; i++)
+ drm_gem_object_put(gobj_read[i]);
+ kfree(gobj_read);
+
+ for (i = 0; i < num_write_bo_handles; i++)
+ drm_gem_object_put(gobj_write[i]);
+ kfree(gobj_write);
+
+ kfree(timeline_points);
+ kfree(timeline_handles);
+ kfree(syncobj_handles);
+ kfree(bo_handles_write);
+ kfree(bo_handles_read);
+
+ return 0;
+
+free_fences:
+ while (num_fences-- > 0)
+ dma_fence_put(fences[num_fences]);
+ kfree(fences);
+free_fence_info:
+ kfree(fence_info);
+exec_fini:
+ drm_exec_fini(&exec);
+put_gobj_write:
+ while (wentry-- > 0)
+ drm_gem_object_put(gobj_write[wentry]);
+ kfree(gobj_write);
+put_gobj_read:
+ while (rentry-- > 0)
+ drm_gem_object_put(gobj_read[rentry]);
+ kfree(gobj_read);
+free_timeline_points:
+ kfree(timeline_points);
+free_timeline_handles:
+ kfree(timeline_handles);
+free_syncobj_handles:
+ kfree(syncobj_handles);
+free_bo_handles_write:
+ kfree(bo_handles_write);
+free_bo_handles_read:
+ kfree(bo_handles_read);
+
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
new file mode 100644
index 000000000000..d76add2afc77
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_USERQ_FENCE_H__
+#define __AMDGPU_USERQ_FENCE_H__
+
+#include <linux/types.h>
+
+#include "amdgpu_userq.h"
+
+struct amdgpu_userq_fence {
+ struct dma_fence base;
+ /*
+ * This lock is necessary to synchronize the
+ * userqueue dma fence operations.
+ */
+ spinlock_t lock;
+ struct list_head link;
+ unsigned long fence_drv_array_count;
+ struct amdgpu_userq_fence_driver *fence_drv;
+ struct amdgpu_userq_fence_driver **fence_drv_array;
+};
+
+struct amdgpu_userq_fence_driver {
+ struct kref refcount;
+ u64 va;
+ u64 gpu_addr;
+ u64 *cpu_addr;
+ u64 context;
+ /*
+ * This lock is necesaary to synchronize the access
+ * to the fences list by the fence driver.
+ */
+ spinlock_t fence_list_lock;
+ struct list_head fences;
+ struct amdgpu_device *adev;
+ char timeline_name[TASK_COMM_LEN];
+};
+
+int amdgpu_userq_fence_slab_init(void);
+void amdgpu_userq_fence_slab_fini(void);
+
+void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv);
+void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv);
+int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
+ struct amdgpu_usermode_queue *userq);
+void amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq);
+void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv);
+void amdgpu_userq_fence_driver_force_completion(struct amdgpu_usermode_queue *userq);
+void amdgpu_userq_fence_driver_destroy(struct kref *ref);
+int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_utils.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_utils.h
new file mode 100644
index 000000000000..1e40ca3b1584
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_utils.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_UTILS_H_
+#define AMDGPU_UTILS_H_
+
+/* ---------- Generic 2‑bit capability attribute encoding ----------
+ * 00 INVALID, 01 RO, 10 WO, 11 RW
+ */
+enum amdgpu_cap_attr {
+ AMDGPU_CAP_ATTR_INVALID = 0,
+ AMDGPU_CAP_ATTR_RO = 1 << 0,
+ AMDGPU_CAP_ATTR_WO = 1 << 1,
+ AMDGPU_CAP_ATTR_RW = (AMDGPU_CAP_ATTR_RO | AMDGPU_CAP_ATTR_WO),
+};
+
+#define AMDGPU_CAP_ATTR_BITS 2
+#define AMDGPU_CAP_ATTR_MAX ((1U << AMDGPU_CAP_ATTR_BITS) - 1)
+
+/* Internal helper to build helpers for a given enum NAME */
+#define DECLARE_ATTR_CAP_CLASS_HELPERS(NAME) \
+enum { NAME##_BITMAP_BITS = NAME##_COUNT * AMDGPU_CAP_ATTR_BITS }; \
+struct NAME##_caps { \
+ DECLARE_BITMAP(bmap, NAME##_BITMAP_BITS); \
+}; \
+static inline unsigned int NAME##_ATTR_START(enum NAME##_cap_id cap) \
+{ return (unsigned int)cap * AMDGPU_CAP_ATTR_BITS; } \
+static inline void NAME##_attr_init(struct NAME##_caps *c) \
+{ if (c) bitmap_zero(c->bmap, NAME##_BITMAP_BITS); } \
+static inline int NAME##_attr_set(struct NAME##_caps *c, \
+ enum NAME##_cap_id cap, enum amdgpu_cap_attr attr) \
+{ \
+ if (!c) \
+ return -EINVAL; \
+ if (cap >= NAME##_COUNT) \
+ return -EINVAL; \
+ if ((unsigned int)attr > AMDGPU_CAP_ATTR_MAX) \
+ return -EINVAL; \
+ bitmap_write(c->bmap, (unsigned long)attr, \
+ NAME##_ATTR_START(cap), AMDGPU_CAP_ATTR_BITS); \
+ return 0; \
+} \
+static inline int NAME##_attr_get(const struct NAME##_caps *c, \
+ enum NAME##_cap_id cap, enum amdgpu_cap_attr *out) \
+{ \
+ unsigned long v; \
+ if (!c || !out) \
+ return -EINVAL; \
+ if (cap >= NAME##_COUNT) \
+ return -EINVAL; \
+ v = bitmap_read(c->bmap, NAME##_ATTR_START(cap), AMDGPU_CAP_ATTR_BITS); \
+ *out = (enum amdgpu_cap_attr)v; \
+ return 0; \
+} \
+static inline bool NAME##_cap_is_ro(const struct NAME##_caps *c, enum NAME##_cap_id id) \
+{ enum amdgpu_cap_attr a; return !NAME##_attr_get(c, id, &a) && a == AMDGPU_CAP_ATTR_RO; } \
+static inline bool NAME##_cap_is_wo(const struct NAME##_caps *c, enum NAME##_cap_id id) \
+{ enum amdgpu_cap_attr a; return !NAME##_attr_get(c, id, &a) && a == AMDGPU_CAP_ATTR_WO; } \
+static inline bool NAME##_cap_is_rw(const struct NAME##_caps *c, enum NAME##_cap_id id) \
+{ enum amdgpu_cap_attr a; return !NAME##_attr_get(c, id, &a) && a == AMDGPU_CAP_ATTR_RW; }
+
+/* Element expander for enum creation */
+#define _CAP_ENUM_ELEM(x) x,
+
+/* Public macro: declare enum + helpers from an X‑macro list */
+#define DECLARE_ATTR_CAP_CLASS(NAME, LIST_MACRO) \
+ enum NAME##_cap_id { LIST_MACRO(_CAP_ENUM_ELEM) NAME##_COUNT }; \
+ DECLARE_ATTR_CAP_CLASS_HELPERS(NAME)
+
+#endif /* AMDGPU_UTILS_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 6f8de11a17f1..5c38f0d30c87 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -37,6 +37,7 @@
#include "amdgpu.h"
#include "amdgpu_pm.h"
#include "amdgpu_uvd.h"
+#include "amdgpu_cs.h"
#include "cikd.h"
#include "uvd/uvd_4_2_d.h"
@@ -95,16 +96,16 @@
*/
struct amdgpu_uvd_cs_ctx {
struct amdgpu_cs_parser *parser;
- unsigned reg, count;
- unsigned data0, data1;
- unsigned idx;
- unsigned ib_idx;
+ unsigned int reg, count;
+ unsigned int data0, data1;
+ unsigned int idx;
+ struct amdgpu_ib *ib;
/* does the IB has a msg command */
bool has_msg_cmd;
/* minimum buffer sizes */
- unsigned *buf_sizes;
+ unsigned int *buf_sizes;
};
#ifdef CONFIG_DRM_AMDGPU_SI
@@ -185,7 +186,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
unsigned long bo_size;
const char *fw_name;
const struct common_firmware_header *hdr;
- unsigned family_id;
+ unsigned int family_id;
int i, j, r;
INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
@@ -259,19 +260,11 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
return -EINVAL;
}
- r = request_firmware(&adev->uvd.fw, fw_name, adev->dev);
- if (r) {
- dev_err(adev->dev, "amdgpu_uvd: Can't load firmware \"%s\"\n",
- fw_name);
- return r;
- }
-
- r = amdgpu_ucode_validate(adev->uvd.fw);
+ r = amdgpu_ucode_request(adev, &adev->uvd.fw, AMDGPU_UCODE_REQUIRED, "%s", fw_name);
if (r) {
dev_err(adev->dev, "amdgpu_uvd: Can't validate firmware \"%s\"\n",
fw_name);
- release_firmware(adev->uvd.fw);
- adev->uvd.fw = NULL;
+ amdgpu_ucode_release(&adev->uvd.fw);
return r;
}
@@ -282,7 +275,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
if (adev->asic_type < CHIP_VEGA20) {
- unsigned version_major, version_minor;
+ unsigned int version_major, version_minor;
version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
@@ -330,8 +323,11 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
if (adev->uvd.harvest_config & (1 << j))
continue;
r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst[j].vcpu_bo,
- &adev->uvd.inst[j].gpu_addr, &adev->uvd.inst[j].cpu_addr);
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->uvd.inst[j].vcpu_bo,
+ &adev->uvd.inst[j].gpu_addr,
+ &adev->uvd.inst[j].cpu_addr);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
return r;
@@ -393,7 +389,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
}
amdgpu_bo_free_kernel(&adev->uvd.ib_bo, NULL, &addr);
- release_firmware(adev->uvd.fw);
+ amdgpu_ucode_release(&adev->uvd.fw);
return 0;
}
@@ -402,32 +398,32 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
* amdgpu_uvd_entity_init - init entity
*
* @adev: amdgpu_device pointer
+ * @ring: amdgpu_ring pointer to check
*
+ * Initialize the entity used for handle management in the kernel driver.
*/
-int amdgpu_uvd_entity_init(struct amdgpu_device *adev)
+int amdgpu_uvd_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring)
{
- struct amdgpu_ring *ring;
- struct drm_gpu_scheduler *sched;
- int r;
+ if (ring == &adev->uvd.inst[0].ring) {
+ struct drm_gpu_scheduler *sched = &ring->sched;
+ int r;
- ring = &adev->uvd.inst[0].ring;
- sched = &ring->sched;
- r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL,
- &sched, 1, NULL);
- if (r) {
- DRM_ERROR("Failed setting up UVD kernel entity.\n");
- return r;
+ r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL,
+ &sched, 1, NULL);
+ if (r) {
+ DRM_ERROR("Failed setting up UVD kernel entity.\n");
+ return r;
+ }
}
return 0;
}
-int amdgpu_uvd_suspend(struct amdgpu_device *adev)
+int amdgpu_uvd_prepare_suspend(struct amdgpu_device *adev)
{
- unsigned size;
+ unsigned int size;
void *ptr;
int i, j, idx;
- bool in_ras_intr = amdgpu_ras_intr_triggered();
cancel_delayed_work_sync(&adev->uvd.idle_work);
@@ -456,7 +452,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
/* re-write 0 since err_event_athub will corrupt VCPU buffer */
- if (in_ras_intr)
+ if (amdgpu_ras_intr_triggered())
memset(adev->uvd.inst[j].saved_bo, 0, size);
else
memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
@@ -465,7 +461,12 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
}
}
- if (in_ras_intr)
+ return 0;
+}
+
+int amdgpu_uvd_suspend(struct amdgpu_device *adev)
+{
+ if (amdgpu_ras_intr_triggered())
DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n");
return 0;
@@ -473,7 +474,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
int amdgpu_uvd_resume(struct amdgpu_device *adev)
{
- unsigned size;
+ unsigned int size;
void *ptr;
int i, idx;
@@ -495,7 +496,7 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
adev->uvd.inst[i].saved_bo = NULL;
} else {
const struct common_firmware_header *hdr;
- unsigned offset;
+ unsigned int offset;
hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
@@ -546,9 +547,12 @@ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo)
{
int i;
+
for (i = 0; i < abo->placement.num_placement; ++i) {
abo->placements[i].fpfn = 0 >> PAGE_SHIFT;
abo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
+ if (abo->placements[i].mem_type == TTM_PL_VRAM)
+ abo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
}
}
@@ -557,8 +561,8 @@ static u64 amdgpu_uvd_get_addr_from_ctx(struct amdgpu_uvd_cs_ctx *ctx)
uint32_t lo, hi;
uint64_t addr;
- lo = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data0);
- hi = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data1);
+ lo = amdgpu_ib_get_value(ctx->ib, ctx->data0);
+ hi = amdgpu_ib_get_value(ctx->ib, ctx->data1);
addr = ((uint64_t)lo) | (((uint64_t)hi) << 32);
return addr;
@@ -583,16 +587,17 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)
r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping);
if (r) {
- DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr);
+ DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
return r;
}
if (!ctx->parser->adev->uvd.address_64_bit) {
/* check if it's a message or feedback command */
- cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1;
+ cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx) >> 1;
if (cmd == 0x0 || cmd == 0x3) {
/* yes, force it into VRAM */
uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
+
amdgpu_bo_placement_from_domain(bo, domain);
}
amdgpu_uvd_force_into_uvd_segment(bo);
@@ -613,21 +618,21 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)
* Peek into the decode message and calculate the necessary buffer sizes.
*/
static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
- unsigned buf_sizes[])
+ unsigned int buf_sizes[])
{
- unsigned stream_type = msg[4];
- unsigned width = msg[6];
- unsigned height = msg[7];
- unsigned dpb_size = msg[9];
- unsigned pitch = msg[28];
- unsigned level = msg[57];
+ unsigned int stream_type = msg[4];
+ unsigned int width = msg[6];
+ unsigned int height = msg[7];
+ unsigned int dpb_size = msg[9];
+ unsigned int pitch = msg[28];
+ unsigned int level = msg[57];
- unsigned width_in_mb = width / 16;
- unsigned height_in_mb = ALIGN(height / 16, 2);
- unsigned fs_in_mb = width_in_mb * height_in_mb;
+ unsigned int width_in_mb = width / 16;
+ unsigned int height_in_mb = ALIGN(height / 16, 2);
+ unsigned int fs_in_mb = width_in_mb * height_in_mb;
- unsigned image_size, tmp, min_dpb_size, num_dpb_buffer;
- unsigned min_ctx_size = ~0;
+ unsigned int image_size, tmp, min_dpb_size, num_dpb_buffer;
+ unsigned int min_ctx_size = ~0;
image_size = width * height;
image_size += image_size / 2;
@@ -635,7 +640,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
switch (stream_type) {
case 0: /* H264 */
- switch(level) {
+ switch (level) {
case 30:
num_dpb_buffer = 8100 / fs_in_mb;
break;
@@ -713,7 +718,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
break;
case 7: /* H264 Perf */
- switch(level) {
+ switch (level) {
case 30:
num_dpb_buffer = 8100 / fs_in_mb;
break;
@@ -746,7 +751,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
/* reference picture buffer */
min_dpb_size = image_size * num_dpb_buffer;
- if (!adev->uvd.use_ctx_buf){
+ if (!adev->uvd.use_ctx_buf) {
/* macroblock context buffer */
min_dpb_size +=
width_in_mb * height_in_mb * num_dpb_buffer * 192;
@@ -809,7 +814,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
* Make sure that we don't open up to many sessions.
*/
static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
- struct amdgpu_bo *bo, unsigned offset)
+ struct amdgpu_bo *bo, unsigned int offset)
{
struct amdgpu_device *adev = ctx->parser->adev;
int32_t *msg, msg_type, handle;
@@ -834,6 +839,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
handle = msg[2];
if (handle == 0) {
+ amdgpu_bo_kunmap(bo);
DRM_ERROR("Invalid UVD handle!\n");
return -EINVAL;
}
@@ -892,6 +898,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
}
+ amdgpu_bo_kunmap(bo);
return -EINVAL;
}
@@ -913,7 +920,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping);
if (r) {
- DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr);
+ DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
return r;
}
@@ -925,16 +932,14 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE;
start += addr;
- amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data0,
- lower_32_bits(start));
- amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data1,
- upper_32_bits(start));
+ amdgpu_ib_set_value(ctx->ib, ctx->data0, lower_32_bits(start));
+ amdgpu_ib_set_value(ctx->ib, ctx->data1, upper_32_bits(start));
- cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1;
+ cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx) >> 1;
if (cmd < 0x4) {
if ((end - start) < ctx->buf_sizes[cmd]) {
DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
- (unsigned)(end - start),
+ (unsigned int)(end - start),
ctx->buf_sizes[cmd]);
return -EINVAL;
}
@@ -942,7 +947,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
} else if (cmd == 0x206) {
if ((end - start) < ctx->buf_sizes[4]) {
DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
- (unsigned)(end - start),
+ (unsigned int)(end - start),
ctx->buf_sizes[4]);
return -EINVAL;
}
@@ -953,14 +958,14 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
if (!ctx->parser->adev->uvd.address_64_bit) {
if ((start >> 28) != ((end - 1) >> 28)) {
- DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n",
+ DRM_ERROR("reloc %llx-%llx crossing 256MB boundary!\n",
start, end);
return -EINVAL;
}
if ((cmd == 0 || cmd == 0x3) &&
(start >> 28) != (ctx->parser->adev->uvd.inst->gpu_addr >> 28)) {
- DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n",
+ DRM_ERROR("msg/fb buffer %llx-%llx out of 256MB segment!\n",
start, end);
return -EINVAL;
}
@@ -990,14 +995,13 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx,
int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
{
- struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx];
int i, r;
ctx->idx++;
for (i = 0; i <= ctx->count; ++i) {
- unsigned reg = ctx->reg + i;
+ unsigned int reg = ctx->reg + i;
- if (ctx->idx >= ib->length_dw) {
+ if (ctx->idx >= ctx->ib->length_dw) {
DRM_ERROR("Register command after end of CS!\n");
return -EINVAL;
}
@@ -1037,12 +1041,12 @@ static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx,
static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx,
int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
{
- struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx];
int r;
- for (ctx->idx = 0 ; ctx->idx < ib->length_dw; ) {
- uint32_t cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx);
- unsigned type = CP_PACKET_GET_TYPE(cmd);
+ for (ctx->idx = 0 ; ctx->idx < ctx->ib->length_dw; ) {
+ uint32_t cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx);
+ unsigned int type = CP_PACKET_GET_TYPE(cmd);
+
switch (type) {
case PACKET_TYPE0:
ctx->reg = CP_PACKET0_GET_REG(cmd);
@@ -1066,25 +1070,26 @@ static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx,
* amdgpu_uvd_ring_parse_cs - UVD command submission parser
*
* @parser: Command submission parser context
- * @ib_idx: Which indirect buffer to use
+ * @job: the job to parse
+ * @ib: the IB to patch
*
* Parse the command stream, patch in addresses as necessary.
*/
-int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
+int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
{
struct amdgpu_uvd_cs_ctx ctx = {};
- unsigned buf_sizes[] = {
+ unsigned int buf_sizes[] = {
[0x00000000] = 2048,
[0x00000001] = 0xFFFFFFFF,
[0x00000002] = 0xFFFFFFFF,
[0x00000003] = 2048,
[0x00000004] = 0xFFFFFFFF,
};
- struct amdgpu_ib *ib = &parser->job->ibs[ib_idx];
int r;
- parser->job->vm = NULL;
- ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
+ job->vm = NULL;
if (ib->length_dw % 16) {
DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n",
@@ -1094,7 +1099,7 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
ctx.parser = parser;
ctx.buf_sizes = buf_sizes;
- ctx.ib_idx = ib_idx;
+ ctx.ib = ib;
/* first round only required on chips without UVD 64 bit address support */
if (!parser->adev->uvd.address_64_bit) {
@@ -1122,30 +1127,29 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
{
struct amdgpu_device *adev = ring->adev;
struct dma_fence *f = NULL;
+ uint32_t offset, data[4];
struct amdgpu_job *job;
struct amdgpu_ib *ib;
- uint32_t data[4];
uint64_t addr;
- long r;
- int i;
- unsigned offset_idx = 0;
- unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
+ int i, r;
- r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
- AMDGPU_IB_POOL_DELAYED, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, &adev->uvd.entity,
+ AMDGPU_FENCE_OWNER_UNDEFINED,
+ 64, direct ? AMDGPU_IB_POOL_DIRECT :
+ AMDGPU_IB_POOL_DELAYED, &job,
+ AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
- if (adev->asic_type >= CHIP_VEGA10) {
- offset_idx = 1 + ring->me;
- offset[1] = adev->reg_offset[UVD_HWIP][0][1];
- offset[2] = adev->reg_offset[UVD_HWIP][1][1];
- }
+ if (adev->asic_type >= CHIP_VEGA10)
+ offset = adev->reg_offset[UVD_HWIP][ring->me][1];
+ else
+ offset = UVD_BASE_SI;
- data[0] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA0, 0);
- data[1] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA1, 0);
- data[2] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_CMD, 0);
- data[3] = PACKET0(offset[offset_idx] + UVD_NO_OP, 0);
+ data[0] = PACKET0(offset + UVD_GPCOM_VCPU_DATA0, 0);
+ data[1] = PACKET0(offset + UVD_GPCOM_VCPU_DATA1, 0);
+ data[2] = PACKET0(offset + UVD_GPCOM_VCPU_CMD, 0);
+ data[3] = PACKET0(offset + UVD_NO_OP, 0);
ib = &job->ibs[0];
addr = amdgpu_bo_gpu_offset(bo);
@@ -1162,27 +1166,17 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
ib->length_dw = 16;
if (direct) {
- r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false,
- msecs_to_jiffies(10));
- if (r == 0)
- r = -ETIMEDOUT;
- if (r < 0)
- goto err_free;
-
r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err_free;
} else {
- r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.base.resv,
- AMDGPU_SYNC_ALWAYS,
- AMDGPU_FENCE_OWNER_UNDEFINED);
+ r = drm_sched_job_add_resv_dependencies(&job->base,
+ bo->tbo.base.resv,
+ DMA_RESV_USAGE_KERNEL);
if (r)
goto err_free;
- r = amdgpu_job_submit(job, &adev->uvd.entity,
- AMDGPU_FENCE_OWNER_UNDEFINED, &f);
- if (r)
- goto err_free;
+ f = amdgpu_job_submit(job);
}
amdgpu_bo_reserve(bo, true);
@@ -1201,8 +1195,9 @@ err_free:
}
/* multiple fence commands without any stream commands in between can
- crash the vcpu so just try to emmit a dummy create/destroy msg to
- avoid this */
+ * crash the vcpu so just try to emmit a dummy create/destroy msg to
+ * avoid this
+ */
int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
struct dma_fence **fence)
{
@@ -1268,15 +1263,14 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
{
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, uvd.idle_work.work);
- unsigned fences = 0, i, j;
+ unsigned int fences = 0, i, j;
for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
if (adev->uvd.harvest_config & (1 << i))
continue;
fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring);
- for (j = 0; j < adev->uvd.num_enc_rings; ++j) {
+ for (j = 0; j < adev->uvd.num_enc_rings; ++j)
fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring_enc[j]);
- }
}
if (fences == 0) {
@@ -1372,7 +1366,7 @@ error:
*/
uint32_t amdgpu_uvd_used_handles(struct amdgpu_device *adev)
{
- unsigned i;
+ unsigned int i;
uint32_t used_handles = 0;
for (i = 0; i < adev->uvd.max_handles; ++i) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
index 76ac9699885d..9dfad2f48ef4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@@ -73,7 +73,8 @@ struct amdgpu_uvd {
int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
int amdgpu_uvd_sw_fini(struct amdgpu_device *adev);
-int amdgpu_uvd_entity_init(struct amdgpu_device *adev);
+int amdgpu_uvd_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring);
+int amdgpu_uvd_prepare_suspend(struct amdgpu_device *adev);
int amdgpu_uvd_suspend(struct amdgpu_device *adev);
int amdgpu_uvd_resume(struct amdgpu_device *adev);
int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
@@ -82,7 +83,9 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
bool direct, struct dma_fence **fence);
void amdgpu_uvd_free_handles(struct amdgpu_device *adev,
struct drm_file *filp);
-int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx);
+int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib);
void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring);
void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring);
int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 688bef1649b5..a7d8f1ce6ac2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -34,12 +34,16 @@
#include "amdgpu.h"
#include "amdgpu_pm.h"
#include "amdgpu_vce.h"
+#include "amdgpu_cs.h"
#include "cikd.h"
/* 1 second timeout */
#define VCE_IDLE_TIMEOUT msecs_to_jiffies(1000)
/* Firmware Names */
+#ifdef CONFIG_DRM_AMDGPU_SI
+#define FIRMWARE_VCE_V1_0 "amdgpu/vce_1_0_0.bin"
+#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
#define FIRMWARE_BONAIRE "amdgpu/bonaire_vce.bin"
#define FIRMWARE_KABINI "amdgpu/kabini_vce.bin"
@@ -60,6 +64,9 @@
#define FIRMWARE_VEGA12 "amdgpu/vega12_vce.bin"
#define FIRMWARE_VEGA20 "amdgpu/vega20_vce.bin"
+#ifdef CONFIG_DRM_AMDGPU_SI
+MODULE_FIRMWARE(FIRMWARE_VCE_V1_0);
+#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
MODULE_FIRMWARE(FIRMWARE_BONAIRE);
MODULE_FIRMWARE(FIRMWARE_KABINI);
@@ -87,90 +94,93 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
bool direct, struct dma_fence **fence);
/**
- * amdgpu_vce_sw_init - allocate memory, load vce firmware
+ * amdgpu_vce_firmware_name() - determine the firmware file name for VCE
*
* @adev: amdgpu_device pointer
- * @size: size for the new BO
*
- * First step to get VCE online, allocate memory and load the firmware
+ * Each chip that has VCE IP may need a different firmware.
+ * This function returns the name of the VCE firmware file
+ * appropriate for the current chip.
*/
-int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
+static const char *amdgpu_vce_firmware_name(struct amdgpu_device *adev)
{
- const char *fw_name;
- const struct common_firmware_header *hdr;
- unsigned ucode_version, version_major, version_minor, binary_id;
- int i, r;
-
switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+ case CHIP_PITCAIRN:
+ case CHIP_TAHITI:
+ case CHIP_VERDE:
+ return FIRMWARE_VCE_V1_0;
+#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_BONAIRE:
- fw_name = FIRMWARE_BONAIRE;
- break;
+ return FIRMWARE_BONAIRE;
case CHIP_KAVERI:
- fw_name = FIRMWARE_KAVERI;
- break;
+ return FIRMWARE_KAVERI;
case CHIP_KABINI:
- fw_name = FIRMWARE_KABINI;
- break;
+ return FIRMWARE_KABINI;
case CHIP_HAWAII:
- fw_name = FIRMWARE_HAWAII;
- break;
+ return FIRMWARE_HAWAII;
case CHIP_MULLINS:
- fw_name = FIRMWARE_MULLINS;
- break;
+ return FIRMWARE_MULLINS;
#endif
case CHIP_TONGA:
- fw_name = FIRMWARE_TONGA;
- break;
+ return FIRMWARE_TONGA;
case CHIP_CARRIZO:
- fw_name = FIRMWARE_CARRIZO;
- break;
+ return FIRMWARE_CARRIZO;
case CHIP_FIJI:
- fw_name = FIRMWARE_FIJI;
- break;
+ return FIRMWARE_FIJI;
case CHIP_STONEY:
- fw_name = FIRMWARE_STONEY;
- break;
+ return FIRMWARE_STONEY;
case CHIP_POLARIS10:
- fw_name = FIRMWARE_POLARIS10;
- break;
+ return FIRMWARE_POLARIS10;
case CHIP_POLARIS11:
- fw_name = FIRMWARE_POLARIS11;
- break;
+ return FIRMWARE_POLARIS11;
case CHIP_POLARIS12:
- fw_name = FIRMWARE_POLARIS12;
- break;
+ return FIRMWARE_POLARIS12;
case CHIP_VEGAM:
- fw_name = FIRMWARE_VEGAM;
- break;
+ return FIRMWARE_VEGAM;
case CHIP_VEGA10:
- fw_name = FIRMWARE_VEGA10;
- break;
+ return FIRMWARE_VEGA10;
case CHIP_VEGA12:
- fw_name = FIRMWARE_VEGA12;
- break;
+ return FIRMWARE_VEGA12;
case CHIP_VEGA20:
- fw_name = FIRMWARE_VEGA20;
- break;
+ return FIRMWARE_VEGA20;
default:
- return -EINVAL;
+ return NULL;
}
+}
- r = request_firmware(&adev->vce.fw, fw_name, adev->dev);
- if (r) {
- dev_err(adev->dev, "amdgpu_vce: Can't load firmware \"%s\"\n",
- fw_name);
- return r;
- }
+/**
+ * amdgpu_vce_early_init() - try to load VCE firmware
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tries to load the VCE firmware.
+ *
+ * When not found, returns ENOENT so that the driver can
+ * still load and initialize the rest of the IP blocks.
+ * The GPU can function just fine without VCE, they will just
+ * not support video encoding.
+ */
+int amdgpu_vce_early_init(struct amdgpu_device *adev)
+{
+ const char *fw_name = amdgpu_vce_firmware_name(adev);
+ const struct common_firmware_header *hdr;
+ unsigned int ucode_version, version_major, version_minor, binary_id;
+ int r;
- r = amdgpu_ucode_validate(adev->vce.fw);
+ if (!fw_name)
+ return -ENOENT;
+
+ r = amdgpu_ucode_request(adev, &adev->vce.fw, AMDGPU_UCODE_REQUIRED, "%s", fw_name);
if (r) {
- dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n",
- fw_name);
- release_firmware(adev->vce.fw);
- adev->vce.fw = NULL;
- return r;
+ dev_err(adev->dev,
+ "amdgpu_vce: Firmware \"%s\" not found or failed to validate (%d)\n",
+ fw_name, r);
+
+ amdgpu_ucode_release(&adev->vce.fw);
+ return -ENOENT;
}
hdr = (const struct common_firmware_header *)adev->vce.fw->data;
@@ -179,13 +189,39 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
version_major = (ucode_version >> 20) & 0xfff;
version_minor = (ucode_version >> 8) & 0xfff;
binary_id = ucode_version & 0xff;
- DRM_INFO("Found VCE firmware Version: %d.%d Binary ID: %d\n",
+ dev_info(adev->dev, "Found VCE firmware Version: %d.%d Binary ID: %d\n",
version_major, version_minor, binary_id);
adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) |
(binary_id << 8));
+ return 0;
+}
+
+/**
+ * amdgpu_vce_sw_init() - allocate memory for VCE BO
+ *
+ * @adev: amdgpu_device pointer
+ * @size: size for the new BO
+ *
+ * First step to get VCE online: allocate memory for VCE BO.
+ * The VCE firmware binary is copied into the VCE BO later,
+ * in amdgpu_vce_resume. The VCE executes its code from the
+ * VCE BO and also uses the space in this BO for its stack and data.
+ *
+ * Ideally this BO should be placed in VRAM for optimal performance,
+ * although technically it also runs from system RAM (albeit slowly).
+ */
+int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
+{
+ int i, r;
+
+ if (!adev->vce.fw)
+ return -ENOENT;
+
r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM, &adev->vce.vcpu_bo,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->vce.vcpu_bo,
&adev->vce.gpu_addr, &adev->vce.cpu_addr);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r);
@@ -212,22 +248,22 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
*/
int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
{
- unsigned i;
+ unsigned int i;
if (adev->vce.vcpu_bo == NULL)
return 0;
drm_sched_entity_destroy(&adev->vce.entity);
- amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
- (void **)&adev->vce.cpu_addr);
-
for (i = 0; i < adev->vce.num_rings; i++)
amdgpu_ring_fini(&adev->vce.ring[i]);
- release_firmware(adev->vce.fw);
+ amdgpu_ucode_release(&adev->vce.fw);
mutex_destroy(&adev->vce.idle_mutex);
+ amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
+ (void **)&adev->vce.cpu_addr);
+
return 0;
}
@@ -235,21 +271,22 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
* amdgpu_vce_entity_init - init entity
*
* @adev: amdgpu_device pointer
+ * @ring: amdgpu_ring pointer to check
*
+ * Initialize the entity used for handle management in the kernel driver.
*/
-int amdgpu_vce_entity_init(struct amdgpu_device *adev)
+int amdgpu_vce_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring)
{
- struct amdgpu_ring *ring;
- struct drm_gpu_scheduler *sched;
- int r;
-
- ring = &adev->vce.ring[0];
- sched = &ring->sched;
- r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL,
- &sched, 1, NULL);
- if (r != 0) {
- DRM_ERROR("Failed setting up VCE run queue.\n");
- return r;
+ if (ring == &adev->vce.ring[0]) {
+ struct drm_gpu_scheduler *sched = &ring->sched;
+ int r;
+
+ r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL,
+ &sched, 1, NULL);
+ if (r != 0) {
+ DRM_ERROR("Failed setting up VCE run queue.\n");
+ return r;
+ }
}
return 0;
@@ -289,40 +326,23 @@ int amdgpu_vce_suspend(struct amdgpu_device *adev)
*/
int amdgpu_vce_resume(struct amdgpu_device *adev)
{
- void *cpu_addr;
const struct common_firmware_header *hdr;
- unsigned offset;
- int r, idx;
+ unsigned int offset;
+ int idx;
if (adev->vce.vcpu_bo == NULL)
return -EINVAL;
- r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false);
- if (r) {
- dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r);
- return r;
- }
-
- r = amdgpu_bo_kmap(adev->vce.vcpu_bo, &cpu_addr);
- if (r) {
- amdgpu_bo_unreserve(adev->vce.vcpu_bo);
- dev_err(adev->dev, "(%d) VCE map failed\n", r);
- return r;
- }
-
hdr = (const struct common_firmware_header *)adev->vce.fw->data;
offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
- memcpy_toio(cpu_addr, adev->vce.fw->data + offset,
+ memset_io(adev->vce.cpu_addr, 0, amdgpu_bo_size(adev->vce.vcpu_bo));
+ memcpy_toio(adev->vce.cpu_addr, adev->vce.fw->data + offset,
adev->vce.fw->size - offset);
drm_dev_exit(idx);
}
- amdgpu_bo_kunmap(adev->vce.vcpu_bo);
-
- amdgpu_bo_unreserve(adev->vce.vcpu_bo);
-
return 0;
}
@@ -337,7 +357,7 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work)
{
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, vce.idle_work.work);
- unsigned i, count = 0;
+ unsigned int i, count = 0;
for (i = 0; i < adev->vce.num_rings; i++)
count += amdgpu_fence_count_emitted(&adev->vce.ring[i]);
@@ -414,6 +434,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
{
struct amdgpu_ring *ring = &adev->vce.ring[0];
int i, r;
+
for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
uint32_t handle = atomic_read(&adev->vce.handles[i]);
@@ -430,11 +451,28 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
}
/**
+ * amdgpu_vce_required_gart_pages() - gets number of GART pages required by VCE
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Returns how many GART pages we need before GTT for the VCE IP block.
+ * For VCE1, see vce_v1_0_ensure_vcpu_bo_32bit_addr for details.
+ * For VCE2+, this is not needed so return zero.
+ */
+u32 amdgpu_vce_required_gart_pages(struct amdgpu_device *adev)
+{
+ /* VCE IP block not added yet, so can't use amdgpu_ip_version */
+ if (adev->family == AMDGPU_FAMILY_SI)
+ return 512;
+
+ return 0;
+}
+
+/**
* amdgpu_vce_get_create_msg - generate a VCE create msg
*
* @ring: ring we should submit the msg to
* @handle: VCE session handle to use
- * @bo: amdgpu object for which we query the offset
* @fence: optional fence to return
*
* Open up a stream for HW test
@@ -442,7 +480,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
struct dma_fence **fence)
{
- const unsigned ib_size_dw = 1024;
+ const unsigned int ib_size_dw = 1024;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct amdgpu_ib ib_msg;
@@ -450,8 +488,10 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
- AMDGPU_IB_POOL_DIRECT, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, &ring->adev->vce.entity,
+ AMDGPU_FENCE_OWNER_UNDEFINED,
+ ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT,
+ &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
@@ -505,7 +545,7 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
ib->ptr[i] = 0x0;
r = amdgpu_job_submit_direct(job, ring, &f);
- amdgpu_ib_free(ring->adev, &ib_msg, f);
+ amdgpu_ib_free(&ib_msg, f);
if (r)
goto err;
@@ -532,15 +572,18 @@ err:
static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
bool direct, struct dma_fence **fence)
{
- const unsigned ib_size_dw = 1024;
+ const unsigned int ib_size_dw = 1024;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
+ r = amdgpu_job_alloc_with_ib(ring->adev, &ring->adev->vce.entity,
+ AMDGPU_FENCE_OWNER_UNDEFINED,
+ ib_size_dw * 4,
direct ? AMDGPU_IB_POOL_DIRECT :
- AMDGPU_IB_POOL_DELAYED, &job);
+ AMDGPU_IB_POOL_DELAYED, &job,
+ AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
@@ -570,8 +613,7 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
if (direct)
r = amdgpu_job_submit_direct(job, ring, &f);
else
- r = amdgpu_job_submit(job, &ring->adev->vce.entity,
- AMDGPU_FENCE_OWNER_UNDEFINED, &f);
+ f = amdgpu_job_submit(job);
if (r)
goto err;
@@ -588,8 +630,8 @@ err:
/**
* amdgpu_vce_validate_bo - make sure not to cross 4GB boundary
*
- * @p: parser context
- * @ib_idx: indirect buffer to use
+ * @p: cs parser
+ * @ib: indirect buffer to use
* @lo: address of lower dword
* @hi: address of higher dword
* @size: minimum size
@@ -597,19 +639,20 @@ err:
*
* Make sure that no BO cross a 4GB boundary.
*/
-static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx,
- int lo, int hi, unsigned size, int32_t index)
+static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p,
+ struct amdgpu_ib *ib, int lo, int hi,
+ unsigned int size, int32_t index)
{
int64_t offset = ((uint64_t)size) * ((int64_t)index);
struct ttm_operation_ctx ctx = { false, false };
struct amdgpu_bo_va_mapping *mapping;
- unsigned i, fpfn, lpfn;
+ unsigned int i, fpfn, lpfn;
struct amdgpu_bo *bo;
uint64_t addr;
int r;
- addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) |
- ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32;
+ addr = ((uint64_t)amdgpu_ib_get_value(ib, lo)) |
+ ((uint64_t)amdgpu_ib_get_value(ib, hi)) << 32;
if (index >= 0) {
addr += offset;
fpfn = PAGE_ALIGN(offset) >> PAGE_SHIFT;
@@ -621,7 +664,7 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx,
r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
if (r) {
- DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n",
+ DRM_ERROR("Can't find BO for addr 0x%010llx %d %d %d %d\n",
addr, lo, hi, size, index);
return r;
}
@@ -639,7 +682,7 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx,
* amdgpu_vce_cs_reloc - command submission relocation
*
* @p: parser context
- * @ib_idx: indirect buffer to use
+ * @ib: indirect buffer to use
* @lo: address of lower dword
* @hi: address of higher dword
* @size: minimum size
@@ -647,8 +690,8 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx,
*
* Patch relocation inside command stream with real buffer address
*/
-static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
- int lo, int hi, unsigned size, uint32_t index)
+static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib,
+ int lo, int hi, unsigned int size, uint32_t index)
{
struct amdgpu_bo_va_mapping *mapping;
struct amdgpu_bo *bo;
@@ -658,20 +701,20 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
if (index == 0xffffffff)
index = 0;
- addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) |
- ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32;
+ addr = ((uint64_t)amdgpu_ib_get_value(ib, lo)) |
+ ((uint64_t)amdgpu_ib_get_value(ib, hi)) << 32;
addr += ((uint64_t)size) * ((uint64_t)index);
r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
if (r) {
- DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n",
+ DRM_ERROR("Can't find BO for addr 0x%010llx %d %d %d %d\n",
addr, lo, hi, size, index);
return r;
}
if ((addr + (uint64_t)size) >
(mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
- DRM_ERROR("BO too small for addr 0x%010Lx %d %d\n",
+ DRM_ERROR("BO too small for addr 0x%010llx %d %d\n",
addr, lo, hi);
return -EINVAL;
}
@@ -680,8 +723,8 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
addr += amdgpu_bo_gpu_offset(bo);
addr -= ((uint64_t)size) * ((uint64_t)index);
- amdgpu_set_ib_value(p, ib_idx, lo, lower_32_bits(addr));
- amdgpu_set_ib_value(p, ib_idx, hi, upper_32_bits(addr));
+ amdgpu_ib_set_value(ib, lo, lower_32_bits(addr));
+ amdgpu_ib_set_value(ib, hi, upper_32_bits(addr));
return 0;
}
@@ -694,12 +737,12 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
* @allocated: allocated a new handle?
*
* Validates the handle and return the found session index or -EINVAL
- * we we don't have another free session index.
+ * we don't have another free session index.
*/
static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
uint32_t handle, uint32_t *allocated)
{
- unsigned i;
+ unsigned int i;
/* validate the handle */
for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
@@ -730,27 +773,29 @@ static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
* amdgpu_vce_ring_parse_cs - parse and validate the command stream
*
* @p: parser context
- * @ib_idx: indirect buffer to use
+ * @job: the job to parse
+ * @ib: the IB to patch
*/
-int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
+int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
{
- struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
- unsigned fb_idx = 0, bs_idx = 0;
+ unsigned int fb_idx = 0, bs_idx = 0;
int session_idx = -1;
uint32_t destroyed = 0;
uint32_t created = 0;
uint32_t allocated = 0;
uint32_t tmp, handle = 0;
- uint32_t *size = &tmp;
- unsigned idx;
+ uint32_t dummy = 0xffffffff;
+ uint32_t *size = &dummy;
+ unsigned int idx;
int i, r = 0;
- p->job->vm = NULL;
- ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
+ job->vm = NULL;
for (idx = 0; idx < ib->length_dw;) {
- uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
- uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
+ uint32_t len = amdgpu_ib_get_value(ib, idx);
+ uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1);
if ((len < 8) || (len & 3)) {
DRM_ERROR("invalid VCE command length (%d)!\n", len);
@@ -760,52 +805,52 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
switch (cmd) {
case 0x00000002: /* task info */
- fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6);
- bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7);
+ fb_idx = amdgpu_ib_get_value(ib, idx + 6);
+ bs_idx = amdgpu_ib_get_value(ib, idx + 7);
break;
case 0x03000001: /* encode */
- r = amdgpu_vce_validate_bo(p, ib_idx, idx + 10,
- idx + 9, 0, 0);
+ r = amdgpu_vce_validate_bo(p, ib, idx + 10, idx + 9,
+ 0, 0);
if (r)
goto out;
- r = amdgpu_vce_validate_bo(p, ib_idx, idx + 12,
- idx + 11, 0, 0);
+ r = amdgpu_vce_validate_bo(p, ib, idx + 12, idx + 11,
+ 0, 0);
if (r)
goto out;
break;
case 0x05000001: /* context buffer */
- r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3,
- idx + 2, 0, 0);
+ r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
+ 0, 0);
if (r)
goto out;
break;
case 0x05000004: /* video bitstream buffer */
- tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4);
- r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2,
+ tmp = amdgpu_ib_get_value(ib, idx + 4);
+ r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
tmp, bs_idx);
if (r)
goto out;
break;
case 0x05000005: /* feedback buffer */
- r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2,
+ r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
4096, fb_idx);
if (r)
goto out;
break;
case 0x0500000d: /* MV buffer */
- r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3,
- idx + 2, 0, 0);
+ r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
+ 0, 0);
if (r)
goto out;
- r = amdgpu_vce_validate_bo(p, ib_idx, idx + 8,
- idx + 7, 0, 0);
+ r = amdgpu_vce_validate_bo(p, ib, idx + 8, idx + 7,
+ 0, 0);
if (r)
goto out;
break;
@@ -815,12 +860,12 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
}
for (idx = 0; idx < ib->length_dw;) {
- uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
- uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
+ uint32_t len = amdgpu_ib_get_value(ib, idx);
+ uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1);
switch (cmd) {
case 0x00000001: /* session */
- handle = amdgpu_get_ib_value(p, ib_idx, idx + 2);
+ handle = amdgpu_ib_get_value(ib, idx + 2);
session_idx = amdgpu_vce_validate_handle(p, handle,
&allocated);
if (session_idx < 0) {
@@ -831,8 +876,8 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
break;
case 0x00000002: /* task info */
- fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6);
- bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7);
+ fb_idx = amdgpu_ib_get_value(ib, idx + 6);
+ bs_idx = amdgpu_ib_get_value(ib, idx + 7);
break;
case 0x01000001: /* create */
@@ -847,8 +892,8 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
goto out;
}
- *size = amdgpu_get_ib_value(p, ib_idx, idx + 8) *
- amdgpu_get_ib_value(p, ib_idx, idx + 10) *
+ *size = amdgpu_ib_get_value(ib, idx + 8) *
+ amdgpu_ib_get_value(ib, idx + 10) *
8 * 3 / 2;
break;
@@ -877,12 +922,12 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
break;
case 0x03000001: /* encode */
- r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9,
+ r = amdgpu_vce_cs_reloc(p, ib, idx + 10, idx + 9,
*size, 0);
if (r)
goto out;
- r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11,
+ r = amdgpu_vce_cs_reloc(p, ib, idx + 12, idx + 11,
*size / 3, 0);
if (r)
goto out;
@@ -893,35 +938,35 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
break;
case 0x05000001: /* context buffer */
- r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
+ r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2,
*size * 2, 0);
if (r)
goto out;
break;
case 0x05000004: /* video bitstream buffer */
- tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4);
- r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
+ tmp = amdgpu_ib_get_value(ib, idx + 4);
+ r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2,
tmp, bs_idx);
if (r)
goto out;
break;
case 0x05000005: /* feedback buffer */
- r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
+ r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2,
4096, fb_idx);
if (r)
goto out;
break;
case 0x0500000d: /* MV buffer */
- r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3,
- idx + 2, *size, 0);
+ r = amdgpu_vce_cs_reloc(p, ib, idx + 3,
+ idx + 2, *size, 0);
if (r)
goto out;
- r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 8,
- idx + 7, *size / 12, 0);
+ r = amdgpu_vce_cs_reloc(p, ib, idx + 8,
+ idx + 7, *size / 12, 0);
if (r)
goto out;
break;
@@ -966,11 +1011,13 @@ out:
* amdgpu_vce_ring_parse_cs_vm - parse the command stream in VM mode
*
* @p: parser context
- * @ib_idx: indirect buffer to use
+ * @job: the job to parse
+ * @ib: the IB to patch
*/
-int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx)
+int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
{
- struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
int session_idx = -1;
uint32_t destroyed = 0;
uint32_t created = 0;
@@ -979,8 +1026,8 @@ int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx)
int i, r = 0, idx = 0;
while (idx < ib->length_dw) {
- uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
- uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
+ uint32_t len = amdgpu_ib_get_value(ib, idx);
+ uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1);
if ((len < 8) || (len & 3)) {
DRM_ERROR("invalid VCE command length (%d)!\n", len);
@@ -990,7 +1037,7 @@ int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx)
switch (cmd) {
case 0x00000001: /* session */
- handle = amdgpu_get_ib_value(p, ib_idx, idx + 2);
+ handle = amdgpu_ib_get_value(ib, idx + 2);
session_idx = amdgpu_vce_validate_handle(p, handle,
&allocated);
if (session_idx < 0) {
@@ -1039,7 +1086,6 @@ out:
if (!r) {
/* No error, free all destroyed handle slots */
tmp = destroyed;
- amdgpu_ib_free(p->adev, ib, NULL);
} else {
/* Error during parsing, free all allocated handle slots */
tmp = allocated;
@@ -1082,7 +1128,7 @@ void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring,
*
*/
void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
- unsigned flags)
+ unsigned int flags)
{
WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
@@ -1104,7 +1150,7 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t rptr;
- unsigned i;
+ unsigned int i;
int r, timeout = adev->usec_timeout;
/* skip ring test for sriov*/
@@ -1169,7 +1215,7 @@ error:
enum amdgpu_ring_priority_level amdgpu_vce_get_ring_prio(int ring)
{
- switch(ring) {
+ switch (ring) {
case 0:
return AMDGPU_RING_PRIO_0;
case 1:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index be4a6e773c5b..1c3464ce5037 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -51,16 +51,22 @@ struct amdgpu_vce {
struct drm_sched_entity entity;
uint32_t srbm_soft_reset;
unsigned num_rings;
+ uint32_t keyselect;
};
+int amdgpu_vce_early_init(struct amdgpu_device *adev);
int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size);
int amdgpu_vce_sw_fini(struct amdgpu_device *adev);
-int amdgpu_vce_entity_init(struct amdgpu_device *adev);
+int amdgpu_vce_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring);
int amdgpu_vce_suspend(struct amdgpu_device *adev);
int amdgpu_vce_resume(struct amdgpu_device *adev);
void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
-int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
-int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx);
+u32 amdgpu_vce_required_gart_pages(struct amdgpu_device *adev);
+int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
+ struct amdgpu_ib *ib);
+int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib);
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
struct amdgpu_ib *ib, uint32_t flags);
void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 585961c2f5f2..5e0786ea911b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2016 Advanced Micro Devices, Inc.
+ * Copyright 2016-2024 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -26,7 +26,9 @@
#include <linux/firmware.h>
#include <linux/module.h>
+#include <linux/dmi.h>
#include <linux/pci.h>
+#include <linux/debugfs.h>
#include <drm/drm_drv.h>
#include "amdgpu.h"
@@ -35,22 +37,32 @@
#include "soc15d.h"
/* Firmware Names */
-#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"
-#define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin"
-#define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin"
-#define FIRMWARE_ARCTURUS "amdgpu/arcturus_vcn.bin"
-#define FIRMWARE_RENOIR "amdgpu/renoir_vcn.bin"
-#define FIRMWARE_GREEN_SARDINE "amdgpu/green_sardine_vcn.bin"
-#define FIRMWARE_NAVI10 "amdgpu/navi10_vcn.bin"
-#define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin"
-#define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin"
-#define FIRMWARE_SIENNA_CICHLID "amdgpu/sienna_cichlid_vcn.bin"
-#define FIRMWARE_NAVY_FLOUNDER "amdgpu/navy_flounder_vcn.bin"
-#define FIRMWARE_VANGOGH "amdgpu/vangogh_vcn.bin"
+#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"
+#define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin"
+#define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin"
+#define FIRMWARE_ARCTURUS "amdgpu/arcturus_vcn.bin"
+#define FIRMWARE_RENOIR "amdgpu/renoir_vcn.bin"
+#define FIRMWARE_GREEN_SARDINE "amdgpu/green_sardine_vcn.bin"
+#define FIRMWARE_NAVI10 "amdgpu/navi10_vcn.bin"
+#define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin"
+#define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin"
+#define FIRMWARE_SIENNA_CICHLID "amdgpu/sienna_cichlid_vcn.bin"
+#define FIRMWARE_NAVY_FLOUNDER "amdgpu/navy_flounder_vcn.bin"
+#define FIRMWARE_VANGOGH "amdgpu/vangogh_vcn.bin"
#define FIRMWARE_DIMGREY_CAVEFISH "amdgpu/dimgrey_cavefish_vcn.bin"
-#define FIRMWARE_ALDEBARAN "amdgpu/aldebaran_vcn.bin"
-#define FIRMWARE_BEIGE_GOBY "amdgpu/beige_goby_vcn.bin"
-#define FIRMWARE_YELLOW_CARP "amdgpu/yellow_carp_vcn.bin"
+#define FIRMWARE_ALDEBARAN "amdgpu/aldebaran_vcn.bin"
+#define FIRMWARE_BEIGE_GOBY "amdgpu/beige_goby_vcn.bin"
+#define FIRMWARE_YELLOW_CARP "amdgpu/yellow_carp_vcn.bin"
+#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin"
+#define FIRMWARE_VCN4_0_0 "amdgpu/vcn_4_0_0.bin"
+#define FIRMWARE_VCN4_0_2 "amdgpu/vcn_4_0_2.bin"
+#define FIRMWARE_VCN4_0_3 "amdgpu/vcn_4_0_3.bin"
+#define FIRMWARE_VCN4_0_4 "amdgpu/vcn_4_0_4.bin"
+#define FIRMWARE_VCN4_0_5 "amdgpu/vcn_4_0_5.bin"
+#define FIRMWARE_VCN4_0_6 "amdgpu/vcn_4_0_6.bin"
+#define FIRMWARE_VCN4_0_6_1 "amdgpu/vcn_4_0_6_1.bin"
+#define FIRMWARE_VCN5_0_0 "amdgpu/vcn_5_0_0.bin"
+#define FIRMWARE_VCN5_0_1 "amdgpu/vcn_5_0_1.bin"
MODULE_FIRMWARE(FIRMWARE_RAVEN);
MODULE_FIRMWARE(FIRMWARE_PICASSO);
@@ -68,124 +80,93 @@ MODULE_FIRMWARE(FIRMWARE_VANGOGH);
MODULE_FIRMWARE(FIRMWARE_DIMGREY_CAVEFISH);
MODULE_FIRMWARE(FIRMWARE_BEIGE_GOBY);
MODULE_FIRMWARE(FIRMWARE_YELLOW_CARP);
+MODULE_FIRMWARE(FIRMWARE_VCN_3_1_2);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_0);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_2);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_3);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_4);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_5);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_6);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_6_1);
+MODULE_FIRMWARE(FIRMWARE_VCN5_0_0);
+MODULE_FIRMWARE(FIRMWARE_VCN5_0_1);
static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
+static void amdgpu_vcn_reg_dump_fini(struct amdgpu_device *adev);
-int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
+int amdgpu_vcn_early_init(struct amdgpu_device *adev, int i)
+{
+ char ucode_prefix[25];
+ int r;
+
+ adev->vcn.inst[i].adev = adev;
+ adev->vcn.inst[i].inst = i;
+ amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ if (i != 0 && adev->vcn.per_inst_fw) {
+ r = amdgpu_ucode_request(adev, &adev->vcn.inst[i].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_%d.bin", ucode_prefix, i);
+ if (r)
+ amdgpu_ucode_release(&adev->vcn.inst[i].fw);
+ } else {
+ if (!adev->vcn.inst[0].fw) {
+ r = amdgpu_ucode_request(adev, &adev->vcn.inst[0].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s.bin", ucode_prefix);
+ if (r)
+ amdgpu_ucode_release(&adev->vcn.inst[0].fw);
+ } else {
+ r = 0;
+ }
+ adev->vcn.inst[i].fw = adev->vcn.inst[0].fw;
+ }
+
+ return r;
+}
+
+int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i)
{
unsigned long bo_size;
- const char *fw_name;
const struct common_firmware_header *hdr;
unsigned char fw_check;
- int i, r;
-
- INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
- mutex_init(&adev->vcn.vcn_pg_lock);
- mutex_init(&adev->vcn.vcn1_jpeg1_workaround);
- atomic_set(&adev->vcn.total_submission_cnt, 0);
- for (i = 0; i < adev->vcn.num_vcn_inst; i++)
- atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0);
-
- switch (adev->ip_versions[UVD_HWIP][0]) {
- case IP_VERSION(1, 0, 0):
- case IP_VERSION(1, 0, 1):
- if (adev->apu_flags & AMD_APU_IS_RAVEN2)
- fw_name = FIRMWARE_RAVEN2;
- else if (adev->apu_flags & AMD_APU_IS_PICASSO)
- fw_name = FIRMWARE_PICASSO;
- else
- fw_name = FIRMWARE_RAVEN;
- break;
- case IP_VERSION(2, 5, 0):
- fw_name = FIRMWARE_ARCTURUS;
- if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
- adev->vcn.indirect_sram = true;
- break;
- case IP_VERSION(2, 2, 0):
- if (adev->apu_flags & AMD_APU_IS_RENOIR)
- fw_name = FIRMWARE_RENOIR;
- else
- fw_name = FIRMWARE_GREEN_SARDINE;
-
- if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
- adev->vcn.indirect_sram = true;
- break;
- case IP_VERSION(2, 6, 0):
- fw_name = FIRMWARE_ALDEBARAN;
- if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
- adev->vcn.indirect_sram = true;
- break;
- case IP_VERSION(2, 0, 0):
- fw_name = FIRMWARE_NAVI10;
- if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
- adev->vcn.indirect_sram = true;
- break;
- case IP_VERSION(2, 0, 2):
- if (adev->asic_type == CHIP_NAVI12)
- fw_name = FIRMWARE_NAVI12;
- else
- fw_name = FIRMWARE_NAVI14;
- if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
- adev->vcn.indirect_sram = true;
- break;
- case IP_VERSION(3, 0, 0):
- case IP_VERSION(3, 0, 64):
- case IP_VERSION(3, 0, 192):
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0))
- fw_name = FIRMWARE_SIENNA_CICHLID;
- else
- fw_name = FIRMWARE_NAVY_FLOUNDER;
- if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
- adev->vcn.indirect_sram = true;
- break;
- case IP_VERSION(3, 0, 2):
- fw_name = FIRMWARE_VANGOGH;
- break;
- case IP_VERSION(3, 0, 16):
- fw_name = FIRMWARE_DIMGREY_CAVEFISH;
- if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
- adev->vcn.indirect_sram = true;
- break;
- case IP_VERSION(3, 0, 33):
- fw_name = FIRMWARE_BEIGE_GOBY;
- if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
- adev->vcn.indirect_sram = true;
- break;
- case IP_VERSION(3, 1, 1):
- fw_name = FIRMWARE_YELLOW_CARP;
- if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
- adev->vcn.indirect_sram = true;
- break;
- default:
- return -EINVAL;
- }
+ unsigned int fw_shared_size, log_offset;
+ int r;
- r = request_firmware(&adev->vcn.fw, fw_name, adev->dev);
- if (r) {
- dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n",
- fw_name);
- return r;
+ mutex_init(&adev->vcn.inst[i].vcn1_jpeg1_workaround);
+ mutex_init(&adev->vcn.inst[i].vcn_pg_lock);
+ mutex_init(&adev->vcn.inst[i].engine_reset_mutex);
+ atomic_set(&adev->vcn.inst[i].total_submission_cnt, 0);
+ INIT_DELAYED_WORK(&adev->vcn.inst[i].idle_work, amdgpu_vcn_idle_work_handler);
+ atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0);
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
+ adev->vcn.inst[i].indirect_sram = true;
+
+ /*
+ * Some Steam Deck's BIOS versions are incompatible with the
+ * indirect SRAM mode, leading to amdgpu being unable to get
+ * properly probed (and even potentially crashing the kernel).
+ * Hence, check for these versions here - notice this is
+ * restricted to Vangogh (Deck's APU).
+ */
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(3, 0, 2)) {
+ const char *bios_ver = dmi_get_system_info(DMI_BIOS_VERSION);
+
+ if (bios_ver && (!strncmp("F7A0113", bios_ver, 7) ||
+ !strncmp("F7A0114", bios_ver, 7))) {
+ adev->vcn.inst[i].indirect_sram = false;
+ dev_info(adev->dev,
+ "Steam Deck quirk: indirect SRAM disabled on BIOS %s\n", bios_ver);
+ }
}
- r = amdgpu_ucode_validate(adev->vcn.fw);
- if (r) {
- dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n",
- fw_name);
- release_firmware(adev->vcn.fw);
- adev->vcn.fw = NULL;
- return r;
- }
+ /* from vcn4 and above, only unified queue is used */
+ adev->vcn.inst[i].using_unified_queue =
+ amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0);
- hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data;
+ adev->vcn.inst[i].fw_version = le32_to_cpu(hdr->ucode_version);
adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version);
/* Bit 20-23, it is encode major and non-zero for new naming convention.
@@ -203,283 +184,378 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
enc_major = fw_check;
dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf;
vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf;
- DRM_INFO("Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n",
- enc_major, enc_minor, dec_ver, vep, fw_rev);
+ dev_info(adev->dev,
+ "[VCN instance %d] Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n",
+ i, enc_major, enc_minor, dec_ver, vep, fw_rev);
} else {
unsigned int version_major, version_minor, family_id;
family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
- DRM_INFO("Found VCN firmware Version: %u.%u Family ID: %u\n",
- version_major, version_minor, family_id);
+ dev_info(adev->dev, "[VCN instance %d] Found VCN firmware Version: %u.%u Family ID: %u\n",
+ i, version_major, version_minor, family_id);
}
bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
- bo_size += AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));
- for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(5, 0, 0)) {
+ fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared));
+ log_offset = offsetof(struct amdgpu_vcn5_fw_shared, fw_log);
+ } else if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0)) {
+ fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared));
+ log_offset = offsetof(struct amdgpu_vcn4_fw_shared, fw_log);
+ } else {
+ fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));
+ log_offset = offsetof(struct amdgpu_fw_shared, fw_log);
+ }
- r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].vcpu_bo,
- &adev->vcn.inst[i].gpu_addr, &adev->vcn.inst[i].cpu_addr);
+ bo_size += fw_shared_size;
+
+ if (amdgpu_vcnfw_log)
+ bo_size += AMDGPU_VCNFW_LOG_SIZE;
+
+ r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->vcn.inst[i].vcpu_bo,
+ &adev->vcn.inst[i].gpu_addr,
+ &adev->vcn.inst[i].cpu_addr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
+ return r;
+ }
+
+ adev->vcn.inst[i].fw_shared.cpu_addr = adev->vcn.inst[i].cpu_addr +
+ bo_size - fw_shared_size;
+ adev->vcn.inst[i].fw_shared.gpu_addr = adev->vcn.inst[i].gpu_addr +
+ bo_size - fw_shared_size;
+
+ adev->vcn.inst[i].fw_shared.mem_size = fw_shared_size;
+
+ if (amdgpu_vcnfw_log) {
+ adev->vcn.inst[i].fw_shared.cpu_addr -= AMDGPU_VCNFW_LOG_SIZE;
+ adev->vcn.inst[i].fw_shared.gpu_addr -= AMDGPU_VCNFW_LOG_SIZE;
+ adev->vcn.inst[i].fw_shared.log_offset = log_offset;
+ }
+
+ if (adev->vcn.inst[i].indirect_sram) {
+ r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->vcn.inst[i].dpg_sram_bo,
+ &adev->vcn.inst[i].dpg_sram_gpu_addr,
+ &adev->vcn.inst[i].dpg_sram_cpu_addr);
if (r) {
- dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
+ dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r);
return r;
}
-
- adev->vcn.inst[i].fw_shared_cpu_addr = adev->vcn.inst[i].cpu_addr +
- bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));
- adev->vcn.inst[i].fw_shared_gpu_addr = adev->vcn.inst[i].gpu_addr +
- bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));
-
- if (adev->vcn.indirect_sram) {
- r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].dpg_sram_bo,
- &adev->vcn.inst[i].dpg_sram_gpu_addr, &adev->vcn.inst[i].dpg_sram_cpu_addr);
- if (r) {
- dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r);
- return r;
- }
- }
}
return 0;
}
-int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
+void amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i)
{
- int i, j;
+ int j;
- for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
- if (adev->vcn.harvest_config & (1 << j))
- continue;
+ if (adev->vcn.harvest_config & (1 << i))
+ return;
- if (adev->vcn.indirect_sram) {
- amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo,
- &adev->vcn.inst[j].dpg_sram_gpu_addr,
- (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr);
- }
- kvfree(adev->vcn.inst[j].saved_bo);
+ amdgpu_bo_free_kernel(
+ &adev->vcn.inst[i].dpg_sram_bo,
+ &adev->vcn.inst[i].dpg_sram_gpu_addr,
+ (void **)&adev->vcn.inst[i].dpg_sram_cpu_addr);
+
+ kvfree(adev->vcn.inst[i].saved_bo);
- amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo,
- &adev->vcn.inst[j].gpu_addr,
- (void **)&adev->vcn.inst[j].cpu_addr);
+ amdgpu_bo_free_kernel(&adev->vcn.inst[i].vcpu_bo,
+ &adev->vcn.inst[i].gpu_addr,
+ (void **)&adev->vcn.inst[i].cpu_addr);
- amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec);
+ amdgpu_ring_fini(&adev->vcn.inst[i].ring_dec);
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
- amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]);
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j)
+ amdgpu_ring_fini(&adev->vcn.inst[i].ring_enc[j]);
+
+ if (adev->vcn.per_inst_fw) {
+ amdgpu_ucode_release(&adev->vcn.inst[i].fw);
+ } else {
+ amdgpu_ucode_release(&adev->vcn.inst[0].fw);
+ adev->vcn.inst[i].fw = NULL;
}
- release_firmware(adev->vcn.fw);
- mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround);
- mutex_destroy(&adev->vcn.vcn_pg_lock);
+ if (adev->vcn.reg_list)
+ amdgpu_vcn_reg_dump_fini(adev);
- return 0;
+ mutex_destroy(&adev->vcn.inst[i].vcn_pg_lock);
+ mutex_destroy(&adev->vcn.inst[i].vcn1_jpeg1_workaround);
}
bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance)
{
bool ret = false;
+ int vcn_config = adev->vcn.inst[vcn_instance].vcn_config;
- int major;
- int minor;
- int revision;
-
- /* if cannot find IP data, then this VCN does not exist */
- if (amdgpu_discovery_get_vcn_version(adev, vcn_instance, &major, &minor, &revision) != 0)
- return true;
-
- if ((type == VCN_ENCODE_RING) && (revision & VCN_BLOCK_ENCODE_DISABLE_MASK)) {
+ if ((type == VCN_ENCODE_RING) && (vcn_config & VCN_BLOCK_ENCODE_DISABLE_MASK))
ret = true;
- } else if ((type == VCN_DECODE_RING) && (revision & VCN_BLOCK_DECODE_DISABLE_MASK)) {
+ else if ((type == VCN_DECODE_RING) && (vcn_config & VCN_BLOCK_DECODE_DISABLE_MASK))
ret = true;
- } else if ((type == VCN_UNIFIED_RING) && (revision & VCN_BLOCK_QUEUE_DISABLE_MASK)) {
+ else if ((type == VCN_UNIFIED_RING) && (vcn_config & VCN_BLOCK_QUEUE_DISABLE_MASK))
ret = true;
- }
return ret;
}
-int amdgpu_vcn_suspend(struct amdgpu_device *adev)
+static int amdgpu_vcn_save_vcpu_bo_inst(struct amdgpu_device *adev, int i)
{
- unsigned size;
+ unsigned int size;
void *ptr;
- int i, idx;
+ int idx;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
+ if (adev->vcn.inst[i].vcpu_bo == NULL)
+ return 0;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- if (adev->vcn.inst[i].vcpu_bo == NULL)
- return 0;
+ size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
+ ptr = adev->vcn.inst[i].cpu_addr;
- size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
- ptr = adev->vcn.inst[i].cpu_addr;
+ adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL);
+ if (!adev->vcn.inst[i].saved_bo)
+ return -ENOMEM;
- adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL);
- if (!adev->vcn.inst[i].saved_bo)
- return -ENOMEM;
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size);
+ drm_dev_exit(idx);
+ }
- if (drm_dev_enter(adev_to_drm(adev), &idx)) {
- memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size);
- drm_dev_exit(idx);
- }
+ return 0;
+}
+
+int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev)
+{
+ int ret, i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ret = amdgpu_vcn_save_vcpu_bo_inst(adev, i);
+ if (ret)
+ return ret;
}
+
return 0;
}
-int amdgpu_vcn_resume(struct amdgpu_device *adev)
+int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i)
+{
+ bool in_ras_intr = amdgpu_ras_intr_triggered();
+
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
+
+ /* err_event_athub and dpc recovery will corrupt VCPU buffer, so we need to
+ * restore fw data and clear buffer in amdgpu_vcn_resume() */
+ if (in_ras_intr || adev->pcie_reset_ctx.in_link_reset)
+ return 0;
+
+ return amdgpu_vcn_save_vcpu_bo_inst(adev, i);
+}
+
+int amdgpu_vcn_resume(struct amdgpu_device *adev, int i)
{
- unsigned size;
+ unsigned int size;
void *ptr;
- int i, idx;
+ int idx;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- if (adev->vcn.inst[i].vcpu_bo == NULL)
- return -EINVAL;
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
+ if (adev->vcn.inst[i].vcpu_bo == NULL)
+ return -EINVAL;
+
+ size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
+ ptr = adev->vcn.inst[i].cpu_addr;
- size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
- ptr = adev->vcn.inst[i].cpu_addr;
+ if (adev->vcn.inst[i].saved_bo != NULL) {
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size);
+ drm_dev_exit(idx);
+ }
+ kvfree(adev->vcn.inst[i].saved_bo);
+ adev->vcn.inst[i].saved_bo = NULL;
+ } else {
+ const struct common_firmware_header *hdr;
+ unsigned int offset;
- if (adev->vcn.inst[i].saved_bo != NULL) {
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data;
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
- memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size);
+ memcpy_toio(adev->vcn.inst[i].cpu_addr,
+ adev->vcn.inst[i].fw->data + offset,
+ le32_to_cpu(hdr->ucode_size_bytes));
drm_dev_exit(idx);
}
- kvfree(adev->vcn.inst[i].saved_bo);
- adev->vcn.inst[i].saved_bo = NULL;
- } else {
- const struct common_firmware_header *hdr;
- unsigned offset;
-
- hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
- if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
- offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
- if (drm_dev_enter(adev_to_drm(adev), &idx)) {
- memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset,
- le32_to_cpu(hdr->ucode_size_bytes));
- drm_dev_exit(idx);
- }
- size -= le32_to_cpu(hdr->ucode_size_bytes);
- ptr += le32_to_cpu(hdr->ucode_size_bytes);
- }
- memset_io(ptr, 0, size);
+ size -= le32_to_cpu(hdr->ucode_size_bytes);
+ ptr += le32_to_cpu(hdr->ucode_size_bytes);
}
+ memset_io(ptr, 0, size);
}
+
return 0;
}
-static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
+void amdgpu_vcn_get_profile(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev =
- container_of(work, struct amdgpu_device, vcn.idle_work.work);
- unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
- unsigned int i, j;
- int r = 0;
+ int r;
- for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
- if (adev->vcn.harvest_config & (1 << j))
- continue;
+ mutex_lock(&adev->vcn.workload_profile_mutex);
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
- fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]);
+ if (adev->vcn.workload_profile_active) {
+ mutex_unlock(&adev->vcn.workload_profile_mutex);
+ return;
+ }
+ r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
+ true);
+ if (r)
+ dev_warn(adev->dev,
+ "(%d) failed to enable video power profile mode\n", r);
+ else
+ adev->vcn.workload_profile_active = true;
+ mutex_unlock(&adev->vcn.workload_profile_mutex);
+}
+
+void amdgpu_vcn_put_profile(struct amdgpu_device *adev)
+{
+ bool pg = true;
+ int r, i;
+
+ mutex_lock(&adev->vcn.workload_profile_mutex);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.inst[i].cur_state != AMD_PG_STATE_GATE) {
+ pg = false;
+ break;
}
+ }
+
+ if (pg) {
+ r = amdgpu_dpm_switch_power_profile(
+ adev, PP_SMC_POWER_PROFILE_VIDEO, false);
+ if (r)
+ dev_warn(
+ adev->dev,
+ "(%d) failed to disable video power profile mode\n",
+ r);
+ else
+ adev->vcn.workload_profile_active = false;
+ }
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- struct dpg_pause_state new_state;
+ mutex_unlock(&adev->vcn.workload_profile_mutex);
+}
- if (fence[j] ||
- unlikely(atomic_read(&adev->vcn.inst[j].dpg_enc_submission_cnt)))
- new_state.fw_based = VCN_DPG_STATE__PAUSE;
- else
- new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
+{
+ struct amdgpu_vcn_inst *vcn_inst =
+ container_of(work, struct amdgpu_vcn_inst, idle_work.work);
+ struct amdgpu_device *adev = vcn_inst->adev;
+ unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
+ unsigned int i = vcn_inst->inst, j;
- adev->vcn.pause_dpg_mode(adev, j, &new_state);
- }
+ if (adev->vcn.harvest_config & (1 << i))
+ return;
+
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j)
+ fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_enc[j]);
+
+ /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
+ !adev->vcn.inst[i].using_unified_queue) {
+ struct dpg_pause_state new_state;
- fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec);
- fences += fence[j];
+ if (fence[i] ||
+ unlikely(atomic_read(&vcn_inst->dpg_enc_submission_cnt)))
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+
+ adev->vcn.inst[i].pause_dpg_mode(vcn_inst, &new_state);
}
- if (!fences && !atomic_read(&adev->vcn.total_submission_cnt)) {
- amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
- AMD_PG_STATE_GATE);
- r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
- false);
- if (r)
- dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
+ fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_dec);
+ fences += fence[i];
+
+ if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) {
+ mutex_lock(&vcn_inst->vcn_pg_lock);
+ vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_GATE);
+ mutex_unlock(&vcn_inst->vcn_pg_lock);
+ amdgpu_vcn_put_profile(adev);
+
} else {
- schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+ schedule_delayed_work(&vcn_inst->idle_work, VCN_IDLE_TIMEOUT);
}
}
void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- int r = 0;
+ struct amdgpu_vcn_inst *vcn_inst = &adev->vcn.inst[ring->me];
- atomic_inc(&adev->vcn.total_submission_cnt);
+ atomic_inc(&vcn_inst->total_submission_cnt);
- if (!cancel_delayed_work_sync(&adev->vcn.idle_work)) {
- r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
- true);
- if (r)
- dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r);
- }
+ cancel_delayed_work_sync(&vcn_inst->idle_work);
- mutex_lock(&adev->vcn.vcn_pg_lock);
- amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
- AMD_PG_STATE_UNGATE);
+ mutex_lock(&vcn_inst->vcn_pg_lock);
+ vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE);
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
+ !vcn_inst->using_unified_queue) {
struct dpg_pause_state new_state;
if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) {
- atomic_inc(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
+ atomic_inc(&vcn_inst->dpg_enc_submission_cnt);
new_state.fw_based = VCN_DPG_STATE__PAUSE;
} else {
unsigned int fences = 0;
unsigned int i;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
- fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]);
+ for (i = 0; i < vcn_inst->num_enc_rings; ++i)
+ fences += amdgpu_fence_count_emitted(&vcn_inst->ring_enc[i]);
- if (fences || atomic_read(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt))
+ if (fences || atomic_read(&vcn_inst->dpg_enc_submission_cnt))
new_state.fw_based = VCN_DPG_STATE__PAUSE;
else
new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
}
- adev->vcn.pause_dpg_mode(adev, ring->me, &new_state);
+ vcn_inst->pause_dpg_mode(vcn_inst, &new_state);
}
- mutex_unlock(&adev->vcn.vcn_pg_lock);
+ mutex_unlock(&vcn_inst->vcn_pg_lock);
+ amdgpu_vcn_get_profile(adev);
}
void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
- ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC &&
+ !adev->vcn.inst[ring->me].using_unified_queue)
atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
- atomic_dec(&ring->adev->vcn.total_submission_cnt);
+ atomic_dec(&ring->adev->vcn.inst[ring->me].total_submission_cnt);
- schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+ schedule_delayed_work(&ring->adev->vcn.inst[ring->me].idle_work,
+ VCN_IDLE_TIMEOUT);
}
int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t tmp = 0;
- unsigned i;
+ unsigned int i;
int r;
/* VCN in SRIOV does not support direct register read/write */
@@ -490,7 +566,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
r = amdgpu_ring_alloc(ring, 3);
if (r)
return r;
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.scratch9, 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
@@ -541,27 +617,28 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
struct amdgpu_ib *ib_msg,
struct dma_fence **fence)
{
+ u64 addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
struct amdgpu_device *adev = ring->adev;
struct dma_fence *f = NULL;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
- uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
int i, r;
- r = amdgpu_job_alloc_with_ib(adev, 64,
- AMDGPU_IB_POOL_DIRECT, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
+ 64, AMDGPU_IB_POOL_DIRECT,
+ &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
goto err;
ib = &job->ibs[0];
- ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0);
+ ib->ptr[0] = PACKET0(adev->vcn.inst[ring->me].internal.data0, 0);
ib->ptr[1] = addr;
- ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0);
+ ib->ptr[2] = PACKET0(adev->vcn.inst[ring->me].internal.data1, 0);
ib->ptr[3] = addr >> 32;
- ib->ptr[4] = PACKET0(adev->vcn.internal.cmd, 0);
+ ib->ptr[4] = PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0);
ib->ptr[5] = 0;
for (i = 6; i < 16; i += 2) {
- ib->ptr[i] = PACKET0(adev->vcn.internal.nop, 0);
+ ib->ptr[i] = PACKET0(adev->vcn.inst[ring->me].internal.nop, 0);
ib->ptr[i+1] = 0;
}
ib->length_dw = 16;
@@ -570,7 +647,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
if (r)
goto err_free;
- amdgpu_ib_free(adev, ib_msg, f);
+ amdgpu_ib_free(ib_msg, f);
if (fence)
*fence = dma_fence_get(f);
@@ -581,7 +658,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
err_free:
amdgpu_job_free(job);
err:
- amdgpu_ib_free(adev, ib_msg, f);
+ amdgpu_ib_free(ib_msg, f);
return r;
}
@@ -679,27 +756,70 @@ error:
return r;
}
+static uint32_t *amdgpu_vcn_unified_ring_ib_header(struct amdgpu_ib *ib,
+ uint32_t ib_pack_in_dw, bool enc)
+{
+ uint32_t *ib_checksum;
+
+ ib->ptr[ib->length_dw++] = 0x00000010; /* single queue checksum */
+ ib->ptr[ib->length_dw++] = 0x30000002;
+ ib_checksum = &ib->ptr[ib->length_dw++];
+ ib->ptr[ib->length_dw++] = ib_pack_in_dw;
+
+ ib->ptr[ib->length_dw++] = 0x00000010; /* engine info */
+ ib->ptr[ib->length_dw++] = 0x30000001;
+ ib->ptr[ib->length_dw++] = enc ? 0x2 : 0x3;
+ ib->ptr[ib->length_dw++] = ib_pack_in_dw * sizeof(uint32_t);
+
+ return ib_checksum;
+}
+
+static void amdgpu_vcn_unified_ring_ib_checksum(uint32_t **ib_checksum,
+ uint32_t ib_pack_in_dw)
+{
+ uint32_t i;
+ uint32_t checksum = 0;
+
+ for (i = 0; i < ib_pack_in_dw; i++)
+ checksum += *(*ib_checksum + 2 + i);
+
+ **ib_checksum = checksum;
+}
+
static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
struct amdgpu_ib *ib_msg,
struct dma_fence **fence)
{
struct amdgpu_vcn_decode_buffer *decode_buffer = NULL;
- const unsigned int ib_size_dw = 64;
+ unsigned int ib_size_dw = 64;
struct amdgpu_device *adev = ring->adev;
struct dma_fence *f = NULL;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
+ uint32_t *ib_checksum;
+ uint32_t ib_pack_in_dw;
int i, r;
- r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4,
- AMDGPU_IB_POOL_DIRECT, &job);
+ if (adev->vcn.inst[ring->me].using_unified_queue)
+ ib_size_dw += 8;
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
+ ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT,
+ &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
goto err;
ib = &job->ibs[0];
ib->length_dw = 0;
+ /* single queue headers */
+ if (adev->vcn.inst[ring->me].using_unified_queue) {
+ ib_pack_in_dw = sizeof(struct amdgpu_vcn_decode_buffer) / sizeof(uint32_t)
+ + 4 + 2; /* engine info + decoding ib in dw */
+ ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, ib_pack_in_dw, false);
+ }
+
ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8;
ib->ptr[ib->length_dw++] = cpu_to_le32(AMDGPU_VCN_IB_FLAG_DECODE_BUFFER);
decode_buffer = (struct amdgpu_vcn_decode_buffer *)&(ib->ptr[ib->length_dw]);
@@ -713,11 +833,14 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
+ if (adev->vcn.inst[ring->me].using_unified_queue)
+ amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, ib_pack_in_dw);
+
r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err_free;
- amdgpu_ib_free(adev, ib_msg, f);
+ amdgpu_ib_free(ib_msg, f);
if (fence)
*fence = dma_fence_get(f);
@@ -728,7 +851,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
err_free:
amdgpu_job_free(job);
err:
- amdgpu_ib_free(adev, ib_msg, f);
+ amdgpu_ib_free(ib_msg, f);
return r;
}
@@ -768,7 +891,7 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t rptr;
- unsigned i;
+ unsigned int i;
int r;
if (amdgpu_sriov_vf(adev))
@@ -799,15 +922,21 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
struct amdgpu_ib *ib_msg,
struct dma_fence **fence)
{
- const unsigned ib_size_dw = 16;
+ unsigned int ib_size_dw = 16;
+ struct amdgpu_device *adev = ring->adev;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
+ uint32_t *ib_checksum = NULL;
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
- AMDGPU_IB_POOL_DIRECT, &job);
+ if (adev->vcn.inst[ring->me].using_unified_queue)
+ ib_size_dw += 8;
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
+ ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT,
+ &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
@@ -815,12 +944,16 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
ib->length_dw = 0;
+
+ if (adev->vcn.inst[ring->me].using_unified_queue)
+ ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true);
+
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = upper_32_bits(addr);
ib->ptr[ib->length_dw++] = addr;
- ib->ptr[ib->length_dw++] = 0x0000000b;
+ ib->ptr[ib->length_dw++] = 0x00000000;
ib->ptr[ib->length_dw++] = 0x00000014;
ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
@@ -834,6 +967,9 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
+ if (adev->vcn.inst[ring->me].using_unified_queue)
+ amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11);
+
r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err;
@@ -853,15 +989,21 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
struct amdgpu_ib *ib_msg,
struct dma_fence **fence)
{
- const unsigned ib_size_dw = 16;
+ unsigned int ib_size_dw = 16;
+ struct amdgpu_device *adev = ring->adev;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
+ uint32_t *ib_checksum = NULL;
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
- AMDGPU_IB_POOL_DIRECT, &job);
+ if (adev->vcn.inst[ring->me].using_unified_queue)
+ ib_size_dw += 8;
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
+ ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT,
+ &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
@@ -869,12 +1011,16 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
ib->length_dw = 0;
+
+ if (adev->vcn.inst[ring->me].using_unified_queue)
+ ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true);
+
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001;
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = upper_32_bits(addr);
ib->ptr[ib->length_dw++] = addr;
- ib->ptr[ib->length_dw++] = 0x0000000b;
+ ib->ptr[ib->length_dw++] = 0x00000000;
ib->ptr[ib->length_dw++] = 0x00000014;
ib->ptr[ib->length_dw++] = 0x00000002;
@@ -888,6 +1034,9 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
+ if (adev->vcn.inst[ring->me].using_unified_queue)
+ amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11);
+
r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err;
@@ -932,15 +1081,33 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = 0;
error:
- amdgpu_ib_free(adev, &ib, fence);
+ amdgpu_ib_free(&ib, fence);
dma_fence_put(fence);
return r;
}
+int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ long r;
+
+ if ((amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(4, 0, 3)) &&
+ (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(5, 0, 1))) {
+ r = amdgpu_vcn_enc_ring_test_ib(ring, timeout);
+ if (r)
+ goto error;
+ }
+
+ r = amdgpu_vcn_dec_sw_ring_test_ib(ring, timeout);
+
+error:
+ return r;
+}
+
enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring)
{
- switch(ring) {
+ switch (ring) {
case 0:
return AMDGPU_RING_PRIO_0;
case 1:
@@ -952,29 +1119,519 @@ enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring)
}
}
-void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev)
+void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev, int i)
{
- int i;
unsigned int idx;
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
const struct common_firmware_header *hdr;
- hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+ if (adev->vcn.harvest_config & (1 << i))
+ return;
+
+ if ((amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 3) ||
+ amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(5, 0, 1))
+ && (i > 0))
+ return;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data;
+ /* currently only support 2 FW instances */
+ if (i >= 2) {
+ dev_info(adev->dev, "More then 2 VCN FW instances!\n");
+ return;
+ }
+ idx = AMDGPU_UCODE_ID_VCN + i;
+ adev->firmware.ucode[idx].ucode_id = idx;
+ adev->firmware.ucode[idx].fw = adev->vcn.inst[i].fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
+ }
+}
+
+/*
+ * debugfs for mapping vcn firmware log buffer.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_vcn_inst *vcn;
+ void *log_buf;
+ struct amdgpu_vcn_fwlog *plog;
+ unsigned int read_pos, write_pos, available, i, read_bytes = 0;
+ unsigned int read_num[2] = {0};
+
+ vcn = file_inode(f)->i_private;
+ if (!vcn)
+ return -ENODEV;
+
+ if (!vcn->fw_shared.cpu_addr || !amdgpu_vcnfw_log)
+ return -EFAULT;
+
+ log_buf = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size;
+
+ plog = (struct amdgpu_vcn_fwlog *)log_buf;
+ read_pos = plog->rptr;
+ write_pos = plog->wptr;
+
+ if (read_pos > AMDGPU_VCNFW_LOG_SIZE || write_pos > AMDGPU_VCNFW_LOG_SIZE)
+ return -EFAULT;
+
+ if (!size || (read_pos == write_pos))
+ return 0;
+
+ if (write_pos > read_pos) {
+ available = write_pos - read_pos;
+ read_num[0] = min_t(size_t, size, available);
+ } else {
+ read_num[0] = AMDGPU_VCNFW_LOG_SIZE - read_pos;
+ available = read_num[0] + write_pos - plog->header_size;
+ if (size > available)
+ read_num[1] = write_pos - plog->header_size;
+ else if (size > read_num[0])
+ read_num[1] = size - read_num[0];
+ else
+ read_num[0] = size;
+ }
+
+ for (i = 0; i < 2; i++) {
+ if (read_num[i]) {
+ if (read_pos == AMDGPU_VCNFW_LOG_SIZE)
+ read_pos = plog->header_size;
+ if (read_num[i] == copy_to_user((buf + read_bytes),
+ (log_buf + read_pos), read_num[i]))
+ return -EFAULT;
+
+ read_bytes += read_num[i];
+ read_pos += read_num[i];
+ }
+ }
+
+ plog->rptr = read_pos;
+ *pos += read_bytes;
+ return read_bytes;
+}
+
+static const struct file_operations amdgpu_debugfs_vcnfwlog_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_vcn_fwlog_read,
+ .llseek = default_llseek
+};
+#endif
+
+void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, uint8_t i,
+ struct amdgpu_vcn_inst *vcn)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ sprintf(name, "amdgpu_vcn_%d_fwlog", i);
+ debugfs_create_file_size(name, S_IFREG | 0444, root, vcn,
+ &amdgpu_debugfs_vcnfwlog_fops,
+ AMDGPU_VCNFW_LOG_SIZE);
+#endif
+}
+
+void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn)
+{
+#if defined(CONFIG_DEBUG_FS)
+ uint32_t *flag = vcn->fw_shared.cpu_addr;
+ void *fw_log_cpu_addr = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size;
+ uint64_t fw_log_gpu_addr = vcn->fw_shared.gpu_addr + vcn->fw_shared.mem_size;
+ struct amdgpu_vcn_fwlog *log_buf = fw_log_cpu_addr;
+ struct amdgpu_fw_shared_fw_logging *fw_log = vcn->fw_shared.cpu_addr
+ + vcn->fw_shared.log_offset;
+ *flag |= cpu_to_le32(AMDGPU_VCN_FW_LOGGING_FLAG);
+ fw_log->is_enabled = 1;
+ fw_log->addr_lo = cpu_to_le32(fw_log_gpu_addr & 0xFFFFFFFF);
+ fw_log->addr_hi = cpu_to_le32(fw_log_gpu_addr >> 32);
+ fw_log->size = cpu_to_le32(AMDGPU_VCNFW_LOG_SIZE);
+
+ log_buf->header_size = sizeof(struct amdgpu_vcn_fwlog);
+ log_buf->buffer_size = AMDGPU_VCNFW_LOG_SIZE;
+ log_buf->rptr = log_buf->header_size;
+ log_buf->wptr = log_buf->header_size;
+ log_buf->wrapped = 0;
+#endif
+}
+
+int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ struct ras_common_if *ras_if = adev->vcn.ras_if;
+ struct ras_dispatch_if ih_data = {
+ .entry = entry,
+ };
+
+ if (!ras_if)
+ return 0;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ ih_data.head = *ras_if;
+ amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+ } else {
+ if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
+ adev->virt.ops->ras_poison_handler(adev, ras_if->block);
+ else
+ dev_warn(adev->dev,
+ "No ras_poison_handler interface in SRIOV for VCN!\n");
+ }
+
+ return 0;
+}
+
+int amdgpu_vcn_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r, i;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ if (amdgpu_ras_is_supported(adev, ras_block->block)) {
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- if (adev->vcn.harvest_config & (1 << i))
+ if (adev->vcn.harvest_config & (1 << i) ||
+ !adev->vcn.inst[i].ras_poison_irq.funcs)
continue;
- /* currently only support 2 FW instances */
- if (i >= 2) {
- dev_info(adev->dev, "More then 2 VCN FW instances!\n");
- break;
- }
- idx = AMDGPU_UCODE_ID_VCN + i;
- adev->firmware.ucode[idx].ucode_id = idx;
- adev->firmware.ucode[idx].fw = adev->vcn.fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
+
+ r = amdgpu_irq_get(adev, &adev->vcn.inst[i].ras_poison_irq, 0);
+ if (r)
+ goto late_fini;
+ }
+ }
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+ return r;
+}
+
+int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err;
+ struct amdgpu_vcn_ras *ras;
+
+ if (!adev->vcn.ras)
+ return 0;
+
+ ras = adev->vcn.ras;
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register vcn ras block!\n");
+ return err;
+ }
+
+ strcpy(ras->ras_block.ras_comm.name, "vcn");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__VCN;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON;
+ adev->vcn.ras_if = &ras->ras_block.ras_comm;
+
+ if (!ras->ras_block.ras_late_init)
+ ras->ras_block.ras_late_init = amdgpu_vcn_ras_late_init;
+
+ return 0;
+}
+
+int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx,
+ enum AMDGPU_UCODE_ID ucode_id)
+{
+ struct amdgpu_firmware_info ucode = {
+ .ucode_id = (ucode_id ? ucode_id :
+ (inst_idx ? AMDGPU_UCODE_ID_VCN1_RAM :
+ AMDGPU_UCODE_ID_VCN0_RAM)),
+ .mc_addr = adev->vcn.inst[inst_idx].dpg_sram_gpu_addr,
+ .ucode_size = ((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr -
+ (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr),
+ };
+
+ return psp_execute_ip_fw_load(&adev->psp, &ucode);
+}
+
+static ssize_t amdgpu_get_vcn_reset_mask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (!adev)
+ return -ENODEV;
+
+ return amdgpu_show_reset_mask(buf, adev->vcn.supported_reset);
+}
+
+static DEVICE_ATTR(vcn_reset_mask, 0444,
+ amdgpu_get_vcn_reset_mask, NULL);
+
+int amdgpu_vcn_sysfs_reset_mask_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (adev->vcn.num_vcn_inst) {
+ r = device_create_file(adev->dev, &dev_attr_vcn_reset_mask);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+void amdgpu_vcn_sysfs_reset_mask_fini(struct amdgpu_device *adev)
+{
+ if (adev->dev->kobj.sd) {
+ if (adev->vcn.num_vcn_inst)
+ device_remove_file(adev->dev, &dev_attr_vcn_reset_mask);
+ }
+}
+
+/*
+ * debugfs to enable/disable vcn job submission to specific core or
+ * instance. It is created only if the queue type is unified.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_vcn_sched_mask_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i;
+ u64 mask;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+
+ mask = (1ULL << adev->vcn.num_vcn_inst) - 1;
+ if ((val & mask) == 0)
+ return -EINVAL;
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ if (val & (1ULL << i))
+ ring->sched.ready = true;
+ else
+ ring->sched.ready = false;
+ }
+ /* publish sched.ready flag update effective immediately across smp */
+ smp_rmb();
+ return 0;
+}
+
+static int amdgpu_debugfs_vcn_sched_mask_get(void *data, u64 *val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ if (ring->sched.ready)
+ mask |= 1ULL << i;
+ }
+ *val = mask;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_vcn_sched_mask_fops,
+ amdgpu_debugfs_vcn_sched_mask_get,
+ amdgpu_debugfs_vcn_sched_mask_set, "%llx\n");
+#endif
+
+void amdgpu_debugfs_vcn_sched_mask_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ if (adev->vcn.num_vcn_inst <= 1 || !adev->vcn.inst[0].using_unified_queue)
+ return;
+ sprintf(name, "amdgpu_vcn_sched_mask");
+ debugfs_create_file(name, 0600, root, adev,
+ &amdgpu_debugfs_vcn_sched_mask_fops);
+#endif
+}
+
+/**
+ * vcn_set_powergating_state - set VCN block powergating state
+ *
+ * @ip_block: amdgpu_ip_block pointer
+ * @state: power gating state
+ *
+ * Set VCN block powergating state
+ */
+int vcn_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret = 0, i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ ret |= vinst->set_pg_state(vinst, state);
+ }
+
+ return ret;
+}
+
+/**
+ * amdgpu_vcn_reset_engine - Reset a specific VCN engine
+ * @adev: Pointer to the AMDGPU device
+ * @instance_id: VCN engine instance to reset
+ *
+ * Returns: 0 on success, or a negative error code on failure.
+ */
+static int amdgpu_vcn_reset_engine(struct amdgpu_device *adev,
+ uint32_t instance_id)
+{
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[instance_id];
+ int r, i;
+
+ mutex_lock(&vinst->engine_reset_mutex);
+ /* Stop the scheduler's work queue for the dec and enc rings if they are running.
+ * This ensures that no new tasks are submitted to the queues while
+ * the reset is in progress.
+ */
+ drm_sched_wqueue_stop(&vinst->ring_dec.sched);
+ for (i = 0; i < vinst->num_enc_rings; i++)
+ drm_sched_wqueue_stop(&vinst->ring_enc[i].sched);
+
+ /* Perform the VCN reset for the specified instance */
+ r = vinst->reset(vinst);
+ if (r)
+ goto unlock;
+ r = amdgpu_ring_test_ring(&vinst->ring_dec);
+ if (r)
+ goto unlock;
+ for (i = 0; i < vinst->num_enc_rings; i++) {
+ r = amdgpu_ring_test_ring(&vinst->ring_enc[i]);
+ if (r)
+ goto unlock;
+ }
+ amdgpu_fence_driver_force_completion(&vinst->ring_dec);
+ for (i = 0; i < vinst->num_enc_rings; i++)
+ amdgpu_fence_driver_force_completion(&vinst->ring_enc[i]);
+
+ /* Restart the scheduler's work queue for the dec and enc rings
+ * if they were stopped by this function. This allows new tasks
+ * to be submitted to the queues after the reset is complete.
+ */
+ drm_sched_wqueue_start(&vinst->ring_dec.sched);
+ for (i = 0; i < vinst->num_enc_rings; i++)
+ drm_sched_wqueue_start(&vinst->ring_enc[i].sched);
+
+unlock:
+ mutex_unlock(&vinst->engine_reset_mutex);
+
+ return r;
+}
+
+/**
+ * amdgpu_vcn_ring_reset - Reset a VCN ring
+ * @ring: ring to reset
+ * @vmid: vmid of guilty job
+ * @timedout_fence: fence of timed out job
+ *
+ * This helper is for VCN blocks without unified queues because
+ * resetting the engine resets all queues in that case. With
+ * unified queues we have one queue per engine.
+ * Returns: 0 on success, or a negative error code on failure.
+ */
+int amdgpu_vcn_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (adev->vcn.inst[ring->me].using_unified_queue)
+ return -EINVAL;
+
+ return amdgpu_vcn_reset_engine(adev, ring->me);
+}
+
+int amdgpu_vcn_reg_dump_init(struct amdgpu_device *adev,
+ const struct amdgpu_hwip_reg_entry *reg, u32 count)
+{
+ adev->vcn.ip_dump = kcalloc(adev->vcn.num_vcn_inst * count,
+ sizeof(uint32_t), GFP_KERNEL);
+ if (!adev->vcn.ip_dump)
+ return -ENOMEM;
+ adev->vcn.reg_list = reg;
+ adev->vcn.reg_count = count;
+
+ return 0;
+}
+
+static void amdgpu_vcn_reg_dump_fini(struct amdgpu_device *adev)
+{
+ kfree(adev->vcn.ip_dump);
+ adev->vcn.ip_dump = NULL;
+ adev->vcn.reg_list = NULL;
+ adev->vcn.reg_count = 0;
+}
+
+void amdgpu_vcn_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ bool is_powered;
+ u32 inst_off;
+
+ if (!adev->vcn.ip_dump)
+ return;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ inst_off = i * adev->vcn.reg_count;
+ /* mmUVD_POWER_STATUS is always readable and is the first in reg_list */
+ adev->vcn.ip_dump[inst_off] =
+ RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->vcn.reg_list[0], i));
+ is_powered = (adev->vcn.ip_dump[inst_off] &
+ UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF) !=
+ UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
+
+ if (is_powered)
+ for (j = 1; j < adev->vcn.reg_count; j++)
+ adev->vcn.ip_dump[inst_off + j] =
+ RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->vcn.reg_list[j], i));
+ }
+}
+
+void amdgpu_vcn_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ bool is_powered;
+ u32 inst_off;
+
+ if (!adev->vcn.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i)) {
+ drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i);
+ continue;
+ }
+
+ inst_off = i * adev->vcn.reg_count;
+ is_powered = (adev->vcn.ip_dump[inst_off] &
+ UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF) !=
+ UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
+
+ if (is_powered) {
+ drm_printf(p, "\nActive Instance:VCN%d\n", i);
+ for (j = 0; j < adev->vcn.reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", adev->vcn.reg_list[j].reg_name,
+ adev->vcn.ip_dump[inst_off + j]);
+ } else {
+ drm_printf(p, "\nInactive Instance:VCN%d\n", i);
}
- dev_info(adev->dev, "Will use PSP to load VCN firmware\n");
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index bfa27ea94804..82624b44e661 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2016 Advanced Micro Devices, Inc.
+ * Copyright 2016-2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -24,14 +24,16 @@
#ifndef __AMDGPU_VCN_H__
#define __AMDGPU_VCN_H__
+#include "amdgpu_ras.h"
+
#define AMDGPU_VCN_STACK_SIZE (128*1024)
#define AMDGPU_VCN_CONTEXT_SIZE (512*1024)
#define AMDGPU_VCN_FIRMWARE_OFFSET 256
#define AMDGPU_VCN_MAX_ENC_RINGS 3
-#define AMDGPU_MAX_VCN_INSTANCES 2
-#define AMDGPU_MAX_VCN_ENC_RINGS AMDGPU_VCN_MAX_ENC_RINGS * AMDGPU_MAX_VCN_INSTANCES
+#define AMDGPU_MAX_VCN_INSTANCES 4
+#define AMDGPU_MAX_VCN_ENC_RINGS (AMDGPU_VCN_MAX_ENC_RINGS * AMDGPU_MAX_VCN_INSTANCES)
#define AMDGPU_VCN_HARVEST_VCN0 (1 << 0)
#define AMDGPU_VCN_HARVEST_VCN1 (1 << 1)
@@ -63,10 +65,7 @@
#define VCN_ENC_CMD_REG_WRITE 0x0000000b
#define VCN_ENC_CMD_REG_WAIT 0x0000000c
-#define VCN_VID_SOC_ADDRESS_2_0 0x1fa00
-#define VCN1_VID_SOC_ADDRESS_3_0 0x48200
#define VCN_AON_SOC_ADDRESS_2_0 0x1f800
-#define VCN1_AON_SOC_ADDRESS_3_0 0x48000
#define VCN_VID_IP_ADDRESS_2_0 0x0
#define VCN_AON_IP_ADDRESS_2_0 0x30000
@@ -101,7 +100,8 @@
#define SOC15_DPG_MODE_OFFSET(ip, inst_idx, reg) \
({ \
- uint32_t internal_reg_offset, addr; \
+ /* To avoid a -Wunused-but-set-variable warning. */ \
+ uint32_t internal_reg_offset __maybe_unused, addr; \
bool video_range, video1_range, aon_range, aon1_range; \
\
addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \
@@ -141,27 +141,112 @@
RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA); \
})
-#define WREG32_SOC15_DPG_MODE(inst_idx, offset, value, mask_en, indirect) \
- do { \
- if (!indirect) { \
- WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA, value); \
- WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, \
- (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
- mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
- offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
- } else { \
- *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = offset; \
- *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = value; \
- } \
+#define WREG32_SOC15_DPG_MODE(inst_idx, offset, value, mask_en, indirect) \
+ do { \
+ if (!indirect) { \
+ WREG32_SOC15(VCN, GET_INST(VCN, inst_idx), \
+ mmUVD_DPG_LMA_DATA, value); \
+ WREG32_SOC15( \
+ VCN, GET_INST(VCN, inst_idx), \
+ mmUVD_DPG_LMA_CTL, \
+ (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
+ mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
+ } else { \
+ *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \
+ offset; \
+ *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \
+ value; \
+ } \
} while (0)
+#define SOC24_DPG_MODE_OFFSET(ip, inst_idx, reg) \
+ ({ \
+ /* To avoid a -Wunused-but-set-variable warning. */ \
+ uint32_t internal_reg_offset __maybe_unused, addr; \
+ bool video_range, video1_range, aon_range, aon1_range; \
+ \
+ addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \
+ addr <<= 2; \
+ video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS)) && \
+ ((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS + 0x2600))))); \
+ video1_range = ((((0xFFFFF & addr) >= (VCN1_VID_SOC_ADDRESS)) && \
+ ((0xFFFFF & addr) < ((VCN1_VID_SOC_ADDRESS + 0x2600))))); \
+ aon_range = ((((0xFFFFF & addr) >= (VCN_AON_SOC_ADDRESS)) && \
+ ((0xFFFFF & addr) < ((VCN_AON_SOC_ADDRESS + 0x600))))); \
+ aon1_range = ((((0xFFFFF & addr) >= (VCN1_AON_SOC_ADDRESS)) && \
+ ((0xFFFFF & addr) < ((VCN1_AON_SOC_ADDRESS + 0x600))))); \
+ if (video_range) \
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN_VID_SOC_ADDRESS) + \
+ (VCN_VID_IP_ADDRESS)); \
+ else if (aon_range) \
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN_AON_SOC_ADDRESS) + \
+ (VCN_AON_IP_ADDRESS)); \
+ else if (video1_range) \
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN1_VID_SOC_ADDRESS) + \
+ (VCN_VID_IP_ADDRESS)); \
+ else if (aon1_range) \
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN1_AON_SOC_ADDRESS) + \
+ (VCN_AON_IP_ADDRESS)); \
+ else \
+ internal_reg_offset = (0xFFFFF & addr); \
+ \
+ internal_reg_offset >>= 2; \
+ })
+
+#define WREG32_SOC24_DPG_MODE(inst_idx, offset, value, mask_en, indirect) \
+ do { \
+ if (!indirect) { \
+ WREG32_SOC15(VCN, GET_INST(VCN, inst_idx), \
+ regUVD_DPG_LMA_DATA, value); \
+ WREG32_SOC15( \
+ VCN, GET_INST(VCN, inst_idx), \
+ regUVD_DPG_LMA_CTL, \
+ (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
+ mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
+ } else { \
+ *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \
+ offset; \
+ *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \
+ value; \
+ } \
+ } while (0)
+
+#define AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE (1 << 2)
+#define AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT (1 << 4)
#define AMDGPU_VCN_FW_SHARED_FLAG_0_RB (1 << 6)
#define AMDGPU_VCN_MULTI_QUEUE_FLAG (1 << 8)
#define AMDGPU_VCN_SW_RING_FLAG (1 << 9)
+#define AMDGPU_VCN_FW_LOGGING_FLAG (1 << 10)
+#define AMDGPU_VCN_SMU_VERSION_INFO_FLAG (1 << 11)
+#define AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG (1 << 11)
+#define AMDGPU_VCN_VF_RB_SETUP_FLAG (1 << 14)
+#define AMDGPU_VCN_VF_RB_DECOUPLE_FLAG (1 << 15)
+
+#define MAX_NUM_VCN_RB_SETUP 4
#define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER 0x00000001
#define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER 0x00000001
+#define VCN_CODEC_DISABLE_MASK_AV1 (1 << 0)
+#define VCN_CODEC_DISABLE_MASK_VP9 (1 << 1)
+#define VCN_CODEC_DISABLE_MASK_HEVC (1 << 2)
+#define VCN_CODEC_DISABLE_MASK_H264 (1 << 3)
+
+#define AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU (0)
+#define AMDGPU_VCN_SMU_DPM_INTERFACE_APU (1)
+
+#define AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING 2
+
+struct amdgpu_hwip_reg_entry;
+
+enum amdgpu_vcn_caps {
+ AMDGPU_VCN_RRMT_ENABLED,
+};
+
+#define AMDGPU_VCN_CAPS(caps) BIT(AMDGPU_VCN_##caps)
+
enum fw_queue_mode {
FW_QUEUE_RING_RESET = 1,
FW_QUEUE_DPG_HOLD_OFF = 2,
@@ -205,7 +290,16 @@ struct amdgpu_vcn_reg{
unsigned scratch9;
};
+struct amdgpu_vcn_fw_shared {
+ void *cpu_addr;
+ uint64_t gpu_addr;
+ uint32_t mem_size;
+ uint32_t log_offset;
+};
+
struct amdgpu_vcn_inst {
+ struct amdgpu_device *adev;
+ int inst;
struct amdgpu_bo *vcpu_bo;
void *cpu_addr;
uint64_t gpu_addr;
@@ -214,6 +308,7 @@ struct amdgpu_vcn_inst {
struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS];
atomic_t sched_score;
struct amdgpu_irq_src irq;
+ struct amdgpu_irq_src ras_poison_irq;
struct amdgpu_vcn_reg external;
struct amdgpu_bo *dpg_sram_bo;
struct dpg_pause_state pause_state;
@@ -221,28 +316,58 @@ struct amdgpu_vcn_inst {
uint64_t dpg_sram_gpu_addr;
uint32_t *dpg_sram_curr_addr;
atomic_t dpg_enc_submission_cnt;
- void *fw_shared_cpu_addr;
- uint64_t fw_shared_gpu_addr;
-};
-
-struct amdgpu_vcn {
- unsigned fw_version;
+ struct amdgpu_vcn_fw_shared fw_shared;
+ uint8_t aid_id;
+ const struct firmware *fw; /* VCN firmware */
+ uint8_t vcn_config;
+ uint32_t vcn_codec_disable_mask;
+ atomic_t total_submission_cnt;
+ struct mutex vcn_pg_lock;
+ enum amd_powergating_state cur_state;
struct delayed_work idle_work;
- const struct firmware *fw; /* VCN firmware */
+ unsigned fw_version;
unsigned num_enc_rings;
- enum amd_powergating_state cur_state;
bool indirect_sram;
+ struct amdgpu_vcn_reg internal;
+ struct mutex vcn1_jpeg1_workaround;
+ int (*pause_dpg_mode)(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
+ int (*set_pg_state)(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+ int (*reset)(struct amdgpu_vcn_inst *vinst);
+ bool using_unified_queue;
+ struct mutex engine_reset_mutex;
+};
+
+struct amdgpu_vcn_ras {
+ struct amdgpu_ras_block_object ras_block;
+};
+struct amdgpu_vcn {
uint8_t num_vcn_inst;
struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES];
- struct amdgpu_vcn_reg internal;
- struct mutex vcn_pg_lock;
- struct mutex vcn1_jpeg1_workaround;
- atomic_t total_submission_cnt;
unsigned harvest_config;
- int (*pause_dpg_mode)(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state);
+
+ struct ras_common_if *ras_if;
+ struct amdgpu_vcn_ras *ras;
+
+ uint16_t inst_mask;
+ uint8_t num_inst_per_aid;
+
+ /* IP reg dump */
+ uint32_t *ip_dump;
+
+ uint32_t supported_reset;
+ uint32_t caps;
+
+ bool per_inst_fw;
+ unsigned fw_version;
+
+ bool workload_profile_active;
+ struct mutex workload_profile_mutex;
+ u32 reg_count;
+ const struct amdgpu_hwip_reg_entry *reg_list;
};
struct amdgpu_fw_shared_rb_ptrs_struct {
@@ -264,6 +389,25 @@ struct amdgpu_fw_shared_sw_ring {
uint8_t padding[3];
};
+struct amdgpu_fw_shared_unified_queue_struct {
+ uint8_t is_enabled;
+ uint8_t queue_mode;
+ uint8_t queue_status;
+ uint8_t padding[5];
+};
+
+struct amdgpu_fw_shared_fw_logging {
+ uint8_t is_enabled;
+ uint32_t addr_lo;
+ uint32_t addr_hi;
+ uint32_t size;
+};
+
+struct amdgpu_fw_shared_smu_interface_info {
+ uint8_t smu_interface_type;
+ uint8_t padding[3];
+};
+
struct amdgpu_fw_shared {
uint32_t present_flag_0;
uint8_t pad[44];
@@ -271,6 +415,66 @@ struct amdgpu_fw_shared {
uint8_t pad1[1];
struct amdgpu_fw_shared_multi_queue multi_queue;
struct amdgpu_fw_shared_sw_ring sw_ring;
+ struct amdgpu_fw_shared_fw_logging fw_log;
+ struct amdgpu_fw_shared_smu_interface_info smu_interface_info;
+};
+
+struct amdgpu_vcn_rb_setup_info {
+ uint32_t rb_addr_lo;
+ uint32_t rb_addr_hi;
+ uint32_t rb_size;
+};
+
+struct amdgpu_fw_shared_rb_setup {
+ uint32_t is_rb_enabled_flags;
+
+ union {
+ struct {
+ uint32_t rb_addr_lo;
+ uint32_t rb_addr_hi;
+ uint32_t rb_size;
+ uint32_t rb4_addr_lo;
+ uint32_t rb4_addr_hi;
+ uint32_t rb4_size;
+ uint32_t reserved[6];
+ };
+
+ struct {
+ struct amdgpu_vcn_rb_setup_info rb_info[MAX_NUM_VCN_RB_SETUP];
+ };
+ };
+};
+
+struct amdgpu_fw_shared_drm_key_wa {
+ uint8_t method;
+ uint8_t reserved[3];
+};
+
+struct amdgpu_fw_shared_queue_decouple {
+ uint8_t is_enabled;
+ uint8_t reserved[7];
+};
+
+struct amdgpu_vcn4_fw_shared {
+ uint32_t present_flag_0;
+ uint8_t pad[12];
+ struct amdgpu_fw_shared_unified_queue_struct sq;
+ uint8_t pad1[8];
+ struct amdgpu_fw_shared_fw_logging fw_log;
+ uint8_t pad2[20];
+ struct amdgpu_fw_shared_rb_setup rb_setup;
+ struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface;
+ struct amdgpu_fw_shared_drm_key_wa drm_key_wa;
+ uint8_t pad3[9];
+ struct amdgpu_fw_shared_queue_decouple decouple;
+};
+
+struct amdgpu_vcn_fwlog {
+ uint32_t rptr;
+ uint32_t wptr;
+ uint32_t buffer_size;
+ uint32_t header_size;
+ uint8_t wrapped;
};
struct amdgpu_vcn_decode_buffer {
@@ -280,6 +484,28 @@ struct amdgpu_vcn_decode_buffer {
uint32_t pad[30];
};
+struct amdgpu_vcn_rb_metadata {
+ uint32_t size;
+ uint32_t present_flag_0;
+
+ uint8_t version;
+ uint8_t ring_id;
+ uint8_t pad[26];
+};
+
+struct amdgpu_vcn5_fw_shared {
+ uint32_t present_flag_0;
+ uint8_t pad[12];
+ struct amdgpu_fw_shared_unified_queue_struct sq;
+ uint8_t pad1[8];
+ struct amdgpu_fw_shared_fw_logging fw_log;
+ uint8_t pad2[20];
+ struct amdgpu_fw_shared_rb_setup rb_setup;
+ struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface;
+ struct amdgpu_fw_shared_drm_key_wa drm_key_wa;
+ uint8_t pad3[404];
+};
+
#define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80
#define VCN_BLOCK_DECODE_DISABLE_MASK 0x40
#define VCN_BLOCK_QUEUE_DISABLE_MASK 0xC0
@@ -290,10 +516,11 @@ enum vcn_ring_type {
VCN_UNIFIED_RING,
};
-int amdgpu_vcn_sw_init(struct amdgpu_device *adev);
-int amdgpu_vcn_sw_fini(struct amdgpu_device *adev);
-int amdgpu_vcn_suspend(struct amdgpu_device *adev);
-int amdgpu_vcn_resume(struct amdgpu_device *adev);
+int amdgpu_vcn_early_init(struct amdgpu_device *adev, int i);
+int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i);
+void amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i);
+int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i);
+int amdgpu_vcn_resume(struct amdgpu_device *adev, int i);
void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring);
void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring);
@@ -304,12 +531,43 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring);
int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring);
int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout);
int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring);
int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout);
enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring);
-void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev);
+void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev, int i);
+
+void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn);
+void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev,
+ uint8_t i, struct amdgpu_vcn_inst *vcn);
+
+int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry);
+int amdgpu_vcn_ras_late_init(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block);
+int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev);
+
+int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx,
+ enum AMDGPU_UCODE_ID ucode_id);
+int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev);
+int amdgpu_vcn_sysfs_reset_mask_init(struct amdgpu_device *adev);
+void amdgpu_vcn_sysfs_reset_mask_fini(struct amdgpu_device *adev);
+void amdgpu_debugfs_vcn_sched_mask_init(struct amdgpu_device *adev);
+
+int vcn_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state);
+int amdgpu_vcn_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *guilty_fence);
+int amdgpu_vcn_reg_dump_init(struct amdgpu_device *adev,
+ const struct amdgpu_hwip_reg_entry *reg, u32 count);
+void amdgpu_vcn_dump_ip_state(struct amdgpu_ip_block *ip_block);
+void amdgpu_vcn_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p);
+void amdgpu_vcn_get_profile(struct amdgpu_device *adev);
+void amdgpu_vcn_put_profile(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 04cf9b207e62..47a6ce4fdc74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -23,10 +23,17 @@
#include <linux/module.h>
+#ifdef CONFIG_X86
+#include <asm/hypervisor.h>
+#endif
+
#include <drm/drm_drv.h>
+#include <xen/xen.h>
#include "amdgpu.h"
#include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_dpm.h"
#include "vi.h"
#include "soc15.h"
#include "nv.h"
@@ -37,6 +44,18 @@
vf2pf_info->ucode_info[ucode].version = ver; \
} while (0)
+#define mmRCC_CONFIG_MEMSIZE 0xde3
+
+const char *amdgpu_virt_dynamic_crit_table_name[] = {
+ "IP DISCOVERY",
+ "VBIOS IMG",
+ "RAS TELEMETRY",
+ "DATA EXCHANGE",
+ "BAD PAGE INFO",
+ "INIT HEADER",
+ "LAST",
+};
+
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
{
/* By now all MMIO pages except mailbox are blocked */
@@ -51,7 +70,8 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
/* enable virtual display */
if (adev->asic_type != CHIP_ALDEBARAN &&
- adev->asic_type != CHIP_ARCTURUS) {
+ adev->asic_type != CHIP_ARCTURUS &&
+ ((adev->pdev->class >> 8) != PCI_CLASS_ACCELERATOR_PROCESSING)) {
if (adev->mode_info.num_crtc == 0)
adev->mode_info.num_crtc = 1;
adev->enable_virtual_display = true;
@@ -59,52 +79,10 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
ddev->driver_features &= ~DRIVER_ATOMIC;
adev->cg_flags = 0;
adev->pg_flags = 0;
-}
-
-void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
- uint32_t reg0, uint32_t reg1,
- uint32_t ref, uint32_t mask)
-{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
- struct amdgpu_ring *ring = &kiq->ring;
- signed long r, cnt = 0;
- unsigned long flags;
- uint32_t seq;
-
- spin_lock_irqsave(&kiq->ring_lock, flags);
- amdgpu_ring_alloc(ring, 32);
- amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
- ref, mask);
- r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
- if (r)
- goto failed_undo;
-
- amdgpu_ring_commit(ring);
- spin_unlock_irqrestore(&kiq->ring_lock, flags);
-
- r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
-
- /* don't wait anymore for IRQ context */
- if (r < 1 && in_interrupt())
- goto failed_kiq;
-
- might_sleep();
- while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
- msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
- r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
- }
-
- if (cnt > MAX_KIQ_REG_TRY)
- goto failed_kiq;
-
- return;
-
-failed_undo:
- amdgpu_ring_undo(ring);
- spin_unlock_irqrestore(&kiq->ring_lock, flags);
-failed_kiq:
- dev_err(adev->dev, "failed to write reg %x wait reg %x\n", reg0, reg1);
+ /* Reduce kcq number to 2 to reduce latency */
+ if (amdgpu_num_kcq == -1)
+ amdgpu_num_kcq = 2;
}
/**
@@ -121,8 +99,10 @@ int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init)
if (virt->ops && virt->ops->req_full_gpu) {
r = virt->ops->req_full_gpu(adev, init);
- if (r)
+ if (r) {
+ adev->no_hw_access = true;
return r;
+ }
adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
}
@@ -182,9 +162,24 @@ void amdgpu_virt_request_init_data(struct amdgpu_device *adev)
virt->ops->req_init_data(adev);
if (adev->virt.req_init_data_ver > 0)
- DRM_INFO("host supports REQ_INIT_DATA handshake\n");
+ dev_info(adev->dev, "host supports REQ_INIT_DATA handshake of critical_region_version %d\n",
+ adev->virt.req_init_data_ver);
else
- DRM_WARN("host doesn't support REQ_INIT_DATA handshake\n");
+ dev_warn(adev->dev, "host doesn't support REQ_INIT_DATA handshake\n");
+}
+
+/**
+ * amdgpu_virt_ready_to_reset() - send ready to reset to host
+ * @adev: amdgpu device.
+ * Send ready to reset message to GPU hypervisor to signal we have stopped GPU
+ * activity and is ready for host FLR
+ */
+void amdgpu_virt_ready_to_reset(struct amdgpu_device *adev)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+
+ if (virt->ops && virt->ops->reset_gpu)
+ virt->ops->ready_to_reset(adev);
}
/**
@@ -217,17 +212,18 @@ int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev)
return 0;
r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
&adev->virt.mm_table.bo,
&adev->virt.mm_table.gpu_addr,
(void *)&adev->virt.mm_table.cpu_addr);
if (r) {
- DRM_ERROR("failed to alloc mm table and error = %d.\n", r);
+ dev_err(adev->dev, "failed to alloc mm table and error = %d.\n", r);
return r;
}
memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE);
- DRM_INFO("MM table gpu addr = 0x%llx, cpu addr = %p.\n",
+ dev_info(adev->dev, "MM table gpu addr = 0x%llx, cpu addr = %p.\n",
adev->virt.mm_table.gpu_addr,
adev->virt.mm_table.cpu_addr);
return 0;
@@ -249,6 +245,22 @@ void amdgpu_virt_free_mm_table(struct amdgpu_device *adev)
adev->virt.mm_table.gpu_addr = 0;
}
+/**
+ * amdgpu_virt_rcvd_ras_interrupt() - receive ras interrupt
+ * @adev: amdgpu device.
+ * Check whether host sent RAS error message
+ * Return: true if found, otherwise false
+ */
+bool amdgpu_virt_rcvd_ras_interrupt(struct amdgpu_device *adev)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+
+ if (!virt->ops || !virt->ops->rcvd_ras_intr)
+ return false;
+
+ return virt->ops->rcvd_ras_intr(adev);
+}
+
unsigned int amd_sriov_msg_checksum(void *obj,
unsigned long obj_size,
@@ -283,17 +295,15 @@ static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev)
*data = kmalloc(sizeof(struct amdgpu_virt_ras_err_handler_data), GFP_KERNEL);
if (!*data)
- return -ENOMEM;
+ goto data_failure;
- bps = kmalloc_array(align_space, sizeof((*data)->bps), GFP_KERNEL);
- bps_bo = kmalloc_array(align_space, sizeof((*data)->bps_bo), GFP_KERNEL);
+ bps = kmalloc_array(align_space, sizeof(*(*data)->bps), GFP_KERNEL);
+ if (!bps)
+ goto bps_failure;
- if (!bps || !bps_bo) {
- kfree(bps);
- kfree(bps_bo);
- kfree(*data);
- return -ENOMEM;
- }
+ bps_bo = kmalloc_array(align_space, sizeof(*(*data)->bps_bo), GFP_KERNEL);
+ if (!bps_bo)
+ goto bps_bo_failure;
(*data)->bps = bps;
(*data)->bps_bo = bps_bo;
@@ -303,6 +313,13 @@ static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev)
virt->ras_init_done = true;
return 0;
+
+bps_bo_failure:
+ kfree(bps);
+bps_failure:
+ kfree(*data);
+data_failure:
+ return -ENOMEM;
}
static void amdgpu_virt_ras_release_bp(struct amdgpu_device *adev)
@@ -317,8 +334,10 @@ static void amdgpu_virt_ras_release_bp(struct amdgpu_device *adev)
for (i = data->last_reserved - 1; i >= 0; i--) {
bo = data->bps_bo[i];
- amdgpu_bo_free_kernel(&bo, NULL, NULL);
- data->bps_bo[i] = bo;
+ if (bo) {
+ amdgpu_bo_free_kernel(&bo, NULL, NULL);
+ data->bps_bo[i] = bo;
+ }
data->last_reserved = i;
}
}
@@ -358,6 +377,8 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
{
struct amdgpu_virt *virt = &adev->virt;
struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
+ struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
+ struct ttm_resource_manager *man = &mgr->manager;
struct amdgpu_bo *bo = NULL;
uint64_t bp;
int i;
@@ -373,13 +394,20 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
* 2) a ras bad page has been reserved (duplicate error injection
* for one page);
*/
- if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
- AMDGPU_GPU_PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &bo, NULL))
- DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
-
- data->bps_bo[i] = bo;
+ if (ttm_resource_manager_used(man)) {
+ amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr,
+ bp << AMDGPU_GPU_PAGE_SHIFT,
+ AMDGPU_GPU_PAGE_SIZE);
+ data->bps_bo[i] = NULL;
+ } else {
+ if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
+ AMDGPU_GPU_PAGE_SIZE,
+ &bo, NULL))
+ dev_dbg(adev->dev,
+ "RAS WARN: reserve vram for retired page %llx fail\n",
+ bp);
+ data->bps_bo[i] = bo;
+ }
data->last_reserved = i + 1;
bo = NULL;
}
@@ -408,11 +436,19 @@ static void amdgpu_virt_add_bad_page(struct amdgpu_device *adev,
struct eeprom_table_record bp;
uint64_t retired_page;
uint32_t bp_idx, bp_cnt;
+ void *vram_usage_va = NULL;
+
+ if (adev->mman.fw_vram_usage_va)
+ vram_usage_va = adev->mman.fw_vram_usage_va;
+ else
+ vram_usage_va = adev->mman.drv_vram_usage_va;
+
+ memset(&bp, 0, sizeof(bp));
if (bp_block_size) {
bp_cnt = bp_block_size / sizeof(uint64_t);
for (bp_idx = 0; bp_idx < bp_cnt; bp_idx++) {
- retired_page = *(uint64_t *)(adev->mman.fw_vram_usage_va +
+ retired_page = *(uint64_t *)(vram_usage_va +
bp_block_offset + bp_idx * sizeof(uint64_t));
bp.retired_page = retired_page;
@@ -439,7 +475,7 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev)
return -EINVAL;
if (pf2vf_info->size > 1024) {
- DRM_ERROR("invalid pf2vf message size\n");
+ dev_err(adev->dev, "invalid pf2vf message size: 0x%x\n", pf2vf_info->size);
return -EINVAL;
}
@@ -450,7 +486,9 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev)
adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size,
adev->virt.fw_reserve.checksum_key, checksum);
if (checksum != checkval) {
- DRM_ERROR("invalid pf2vf message\n");
+ dev_err(adev->dev,
+ "invalid pf2vf message: header checksum=0x%x calculated checksum=0x%x\n",
+ checksum, checkval);
return -EINVAL;
}
@@ -464,7 +502,9 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev)
adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size,
0, checksum);
if (checksum != checkval) {
- DRM_ERROR("invalid pf2vf message\n");
+ dev_err(adev->dev,
+ "invalid pf2vf message: header checksum=0x%x calculated checksum=0x%x\n",
+ checksum, checkval);
return -EINVAL;
}
@@ -493,14 +533,17 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev)
tmp = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->mm_bw_management[i].encode_max_frame_pixels;
adev->virt.encode_max_frame_pixels = max(tmp, adev->virt.encode_max_frame_pixels);
}
- if((adev->virt.decode_max_dimension_pixels > 0) || (adev->virt.encode_max_dimension_pixels > 0))
+ if ((adev->virt.decode_max_dimension_pixels > 0) || (adev->virt.encode_max_dimension_pixels > 0))
adev->virt.is_mm_bw_enabled = true;
adev->unique_id =
((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->uuid;
+ adev->virt.ras_en_caps.all = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->ras_en_caps.all;
+ adev->virt.ras_telemetry_en_caps.all =
+ ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->ras_telemetry_en_caps.all;
break;
default:
- DRM_ERROR("invalid pf2vf version\n");
+ dev_err(adev->dev, "invalid pf2vf version: 0x%x\n", pf2vf_info->version);
return -EINVAL;
}
@@ -548,7 +591,6 @@ static void amdgpu_virt_populate_vf2pf_ucode_info(struct amdgpu_device *adev)
static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev)
{
struct amd_sriov_msg_vf2pf_info *vf2pf_info;
- struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
vf2pf_info = (struct amd_sriov_msg_vf2pf_info *) adev->virt.fw_reserve.p_vf2pf;
@@ -571,8 +613,10 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev)
vf2pf_info->driver_cert = 0;
vf2pf_info->os_info.all = 0;
- vf2pf_info->fb_usage = amdgpu_vram_mgr_usage(vram_man) >> 20;
- vf2pf_info->fb_vis_usage = amdgpu_vram_mgr_vis_usage(vram_man) >> 20;
+ vf2pf_info->fb_usage = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ?
+ ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) >> 20 : 0;
+ vf2pf_info->fb_vis_usage =
+ amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr) >> 20;
vf2pf_info->fb_size = adev->gmc.real_vram_size >> 20;
vf2pf_info->fb_vis_size = adev->gmc.visible_vram_size >> 20;
@@ -585,9 +629,15 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev)
vf2pf_info->decode_usage = 0;
vf2pf_info->dummy_page_addr = (uint64_t)adev->dummy_page_addr;
+ if (amdgpu_sriov_is_mes_info_enable(adev)) {
+ vf2pf_info->mes_info_addr =
+ (uint64_t)(adev->mes.resource_1_gpu_addr[0] + AMDGPU_GPU_PAGE_SIZE);
+ vf2pf_info->mes_info_size =
+ adev->mes.resource_1[0]->tbo.base.size - AMDGPU_GPU_PAGE_SIZE;
+ }
vf2pf_info->checksum =
amd_sriov_msg_checksum(
- vf2pf_info, vf2pf_info->header.size, 0, 0);
+ vf2pf_info, sizeof(*vf2pf_info), 0, 0);
return 0;
}
@@ -598,18 +648,59 @@ static void amdgpu_virt_update_vf2pf_work_item(struct work_struct *work)
int ret;
ret = amdgpu_virt_read_pf2vf_data(adev);
- if (ret)
+ if (ret) {
+ adev->virt.vf2pf_update_retry_cnt++;
+
+ if ((amdgpu_virt_rcvd_ras_interrupt(adev) ||
+ adev->virt.vf2pf_update_retry_cnt >= AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT) &&
+ amdgpu_sriov_runtime(adev)) {
+
+ amdgpu_ras_set_fed(adev, true);
+ if (amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->kfd.reset_work))
+ return;
+ else
+ dev_err(adev->dev, "Failed to queue work! at %s", __func__);
+ }
+
goto out;
+ }
+
+ adev->virt.vf2pf_update_retry_cnt = 0;
amdgpu_virt_write_vf2pf_data(adev);
out:
schedule_delayed_work(&(adev->virt.vf2pf_work), adev->virt.vf2pf_update_interval_ms);
}
+static int amdgpu_virt_read_exchange_data_from_mem(struct amdgpu_device *adev, uint32_t *pfvf_data)
+{
+ uint32_t dataexchange_offset =
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset;
+ uint32_t dataexchange_size =
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10;
+ uint64_t pos = 0;
+
+ dev_info(adev->dev,
+ "Got data exchange info from dynamic crit_region_table at offset 0x%x with size of 0x%x bytes.\n",
+ dataexchange_offset, dataexchange_size);
+
+ if (!IS_ALIGNED(dataexchange_offset, 4) || !IS_ALIGNED(dataexchange_size, 4)) {
+ dev_err(adev->dev, "Data exchange data not aligned to 4 bytes\n");
+ return -EINVAL;
+ }
+
+ pos = (uint64_t)dataexchange_offset;
+ amdgpu_device_vram_access(adev, pos, pfvf_data,
+ dataexchange_size, false);
+
+ return 0;
+}
+
void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
{
if (adev->virt.vf2pf_update_interval_ms != 0) {
- DRM_INFO("clean up the vf2pf work item\n");
+ dev_info(adev->dev, "clean up the vf2pf work item\n");
cancel_delayed_work_sync(&adev->virt.vf2pf_work);
adev->virt.vf2pf_update_interval_ms = 0;
}
@@ -617,58 +708,118 @@ void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
{
- uint64_t bp_block_offset = 0;
- uint32_t bp_block_size = 0;
- struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL;
+ uint32_t *pfvf_data = NULL;
adev->virt.fw_reserve.p_pf2vf = NULL;
adev->virt.fw_reserve.p_vf2pf = NULL;
adev->virt.vf2pf_update_interval_ms = 0;
+ adev->virt.vf2pf_update_retry_cnt = 0;
- if (adev->mman.fw_vram_usage_va != NULL) {
- adev->virt.vf2pf_update_interval_ms = 2000;
+ if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) {
+ dev_warn(adev->dev, "Currently fw_vram and drv_vram should not have values at the same time!");
+ } else if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
+ /* go through this logic in ip_init and reset to init workqueue*/
+ amdgpu_virt_exchange_data(adev);
- adev->virt.fw_reserve.p_pf2vf =
- (struct amd_sriov_msg_pf2vf_info_header *)
- (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
- adev->virt.fw_reserve.p_vf2pf =
- (struct amd_sriov_msg_vf2pf_info_header *)
- (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
+ INIT_DELAYED_WORK(&adev->virt.vf2pf_work, amdgpu_virt_update_vf2pf_work_item);
+ schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
+ } else if (adev->bios != NULL) {
+ /* got through this logic in early init stage to get necessary flags, e.g. rlcg_acc related*/
+ if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
+ pfvf_data =
+ kzalloc(adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10,
+ GFP_KERNEL);
+ if (!pfvf_data) {
+ dev_err(adev->dev, "Failed to allocate memory for pfvf_data\n");
+ return;
+ }
- amdgpu_virt_read_pf2vf_data(adev);
- amdgpu_virt_write_vf2pf_data(adev);
+ if (amdgpu_virt_read_exchange_data_from_mem(adev, pfvf_data))
+ goto free_pfvf_data;
- /* bad page handling for version 2 */
- if (adev->virt.fw_reserve.p_pf2vf->version == 2) {
- pf2vf_v2 = (struct amd_sriov_msg_pf2vf_info *)adev->virt.fw_reserve.p_pf2vf;
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)pfvf_data;
- bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_low & 0xFFFFFFFF) |
- ((((uint64_t)pf2vf_v2->bp_block_offset_high) << 32) & 0xFFFFFFFF00000000);
- bp_block_size = pf2vf_v2->bp_block_size;
+ amdgpu_virt_read_pf2vf_data(adev);
- if (bp_block_size && !adev->virt.ras_init_done)
- amdgpu_virt_init_ras_err_handler_data(adev);
+free_pfvf_data:
+ kfree(pfvf_data);
+ pfvf_data = NULL;
+ adev->virt.fw_reserve.p_pf2vf = NULL;
+ } else {
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)
+ (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
- if (adev->virt.ras_init_done)
- amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size);
+ amdgpu_virt_read_pf2vf_data(adev);
+ }
+ }
+}
+
+
+void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
+{
+ uint64_t bp_block_offset = 0;
+ uint32_t bp_block_size = 0;
+ struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL;
+
+ if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
+ if (adev->mman.fw_vram_usage_va) {
+ if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)
+ (adev->mman.fw_vram_usage_va +
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset);
+ adev->virt.fw_reserve.p_vf2pf =
+ (struct amd_sriov_msg_vf2pf_info_header *)
+ (adev->mman.fw_vram_usage_va +
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset +
+ (AMD_SRIOV_MSG_SIZE_KB << 10));
+ adev->virt.fw_reserve.ras_telemetry =
+ (adev->mman.fw_vram_usage_va +
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset);
+ } else {
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)
+ (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
+ adev->virt.fw_reserve.p_vf2pf =
+ (struct amd_sriov_msg_vf2pf_info_header *)
+ (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
+ adev->virt.fw_reserve.ras_telemetry =
+ (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
}
- } else if (adev->bios != NULL) {
- adev->virt.fw_reserve.p_pf2vf =
- (struct amd_sriov_msg_pf2vf_info_header *)
- (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
+ } else if (adev->mman.drv_vram_usage_va) {
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)
+ (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
+ adev->virt.fw_reserve.p_vf2pf =
+ (struct amd_sriov_msg_vf2pf_info_header *)
+ (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
+ adev->virt.fw_reserve.ras_telemetry =
+ (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
+ }
amdgpu_virt_read_pf2vf_data(adev);
+ amdgpu_virt_write_vf2pf_data(adev);
- return;
- }
+ /* bad page handling for version 2 */
+ if (adev->virt.fw_reserve.p_pf2vf->version == 2) {
+ pf2vf_v2 = (struct amd_sriov_msg_pf2vf_info *)adev->virt.fw_reserve.p_pf2vf;
- if (adev->virt.vf2pf_update_interval_ms != 0) {
- INIT_DELAYED_WORK(&adev->virt.vf2pf_work, amdgpu_virt_update_vf2pf_work_item);
- schedule_delayed_work(&(adev->virt.vf2pf_work), adev->virt.vf2pf_update_interval_ms);
+ bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_low & 0xFFFFFFFF) |
+ ((((uint64_t)pf2vf_v2->bp_block_offset_high) << 32) & 0xFFFFFFFF00000000);
+ bp_block_size = pf2vf_v2->bp_block_size;
+
+ if (bp_block_size && !adev->virt.ras_init_done)
+ amdgpu_virt_init_ras_err_handler_data(adev);
+
+ if (adev->virt.ras_init_done)
+ amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size);
+ }
}
}
-void amdgpu_detect_virtualization(struct amdgpu_device *adev)
+static u32 amdgpu_virt_init_detect_asic(struct amdgpu_device *adev)
{
uint32_t reg;
@@ -684,6 +835,7 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
case CHIP_SIENNA_CICHLID:
case CHIP_ARCTURUS:
case CHIP_ALDEBARAN:
+ case CHIP_IP_DISCOVERY:
reg = RREG32(mmRCC_IOV_FUNC_IDENTIFIER);
break;
default: /* other chip doesn't support SRIOV */
@@ -698,18 +850,36 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV;
if (!reg) {
- if (is_virtual_machine()) /* passthrough mode exclus sriov mod */
+ /* passthrough mode exclus sriov mod */
+ if (is_virtual_machine() && !xen_initial_domain())
adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
}
+ return reg;
+}
+
+static bool amdgpu_virt_init_req_data(struct amdgpu_device *adev, u32 reg)
+{
+ bool is_sriov = false;
+
/* we have the ability to check now */
if (amdgpu_sriov_vf(adev)) {
+ is_sriov = true;
+
switch (adev->asic_type) {
case CHIP_TONGA:
case CHIP_FIJI:
vi_set_virt_ops(adev);
break;
case CHIP_VEGA10:
+ soc15_set_virt_ops(adev);
+#ifdef CONFIG_X86
+ /* not send GPU_INIT_DATA with MS_HYPERV*/
+ if (!hypervisor_is_type(X86_HYPER_MS_HYPERV))
+#endif
+ /* send a dummy GPU_INIT_DATA request to host on vega10 */
+ amdgpu_virt_request_init_data(adev);
+ break;
case CHIP_VEGA20:
case CHIP_ARCTURUS:
case CHIP_ALDEBARAN:
@@ -718,15 +888,258 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
case CHIP_NAVI10:
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
+ case CHIP_IP_DISCOVERY:
nv_set_virt_ops(adev);
/* try send GPU_INIT_DATA request to host */
amdgpu_virt_request_init_data(adev);
break;
default: /* other chip doesn't support SRIOV */
- DRM_ERROR("Unknown asic type: %d!\n", adev->asic_type);
+ is_sriov = false;
+ dev_err(adev->dev, "Unknown asic type: %d!\n", adev->asic_type);
break;
}
}
+
+ return is_sriov;
+}
+
+static void amdgpu_virt_init_ras(struct amdgpu_device *adev)
+{
+ ratelimit_state_init(&adev->virt.ras.ras_error_cnt_rs, 5 * HZ, 1);
+ ratelimit_state_init(&adev->virt.ras.ras_cper_dump_rs, 5 * HZ, 1);
+ ratelimit_state_init(&adev->virt.ras.ras_chk_criti_rs, 5 * HZ, 1);
+
+ ratelimit_set_flags(&adev->virt.ras.ras_error_cnt_rs,
+ RATELIMIT_MSG_ON_RELEASE);
+ ratelimit_set_flags(&adev->virt.ras.ras_cper_dump_rs,
+ RATELIMIT_MSG_ON_RELEASE);
+ ratelimit_set_flags(&adev->virt.ras.ras_chk_criti_rs,
+ RATELIMIT_MSG_ON_RELEASE);
+
+ mutex_init(&adev->virt.ras.ras_telemetry_mutex);
+ mutex_init(&adev->virt.access_req_mutex);
+
+ adev->virt.ras.cper_rptr = 0;
+}
+
+static uint8_t amdgpu_virt_crit_region_calc_checksum(uint8_t *buf_start, uint8_t *buf_end)
+{
+ uint32_t sum = 0;
+
+ if (buf_start >= buf_end)
+ return 0;
+
+ for (; buf_start < buf_end; buf_start++)
+ sum += buf_start[0];
+
+ return 0xffffffff - sum;
+}
+
+int amdgpu_virt_init_critical_region(struct amdgpu_device *adev)
+{
+ struct amd_sriov_msg_init_data_header *init_data_hdr = NULL;
+ u64 init_hdr_offset = adev->virt.init_data_header.offset;
+ u64 init_hdr_size = (u64)adev->virt.init_data_header.size_kb << 10; /* KB → bytes */
+ u64 vram_size;
+ u64 end;
+ int r = 0;
+ uint8_t checksum = 0;
+
+ /* Skip below init if critical region version != v2 */
+ if (adev->virt.req_init_data_ver != GPU_CRIT_REGION_V2)
+ return 0;
+
+ if (init_hdr_offset < 0) {
+ dev_err(adev->dev, "Invalid init header offset\n");
+ return -EINVAL;
+ }
+
+ vram_size = RREG32(mmRCC_CONFIG_MEMSIZE);
+ if (!vram_size || vram_size == U32_MAX)
+ return -EINVAL;
+ vram_size <<= 20;
+
+ if (check_add_overflow(init_hdr_offset, init_hdr_size, &end) || end > vram_size) {
+ dev_err(adev->dev, "init_data_header exceeds VRAM size, exiting\n");
+ return -EINVAL;
+ }
+
+ /* Allocate for init_data_hdr */
+ init_data_hdr = kzalloc(sizeof(struct amd_sriov_msg_init_data_header), GFP_KERNEL);
+ if (!init_data_hdr)
+ return -ENOMEM;
+
+ amdgpu_device_vram_access(adev, (uint64_t)init_hdr_offset, (uint32_t *)init_data_hdr,
+ sizeof(struct amd_sriov_msg_init_data_header), false);
+
+ /* Table validation */
+ if (strncmp(init_data_hdr->signature,
+ AMDGPU_SRIOV_CRIT_DATA_SIGNATURE,
+ AMDGPU_SRIOV_CRIT_DATA_SIG_LEN) != 0) {
+ dev_err(adev->dev, "Invalid init data signature: %.4s\n",
+ init_data_hdr->signature);
+ r = -EINVAL;
+ goto out;
+ }
+
+ checksum = amdgpu_virt_crit_region_calc_checksum(
+ (uint8_t *)&init_data_hdr->initdata_offset,
+ (uint8_t *)init_data_hdr +
+ sizeof(struct amd_sriov_msg_init_data_header));
+ if (checksum != init_data_hdr->checksum) {
+ dev_err(adev->dev, "Found unmatching checksum from calculation 0x%x and init_data 0x%x\n",
+ checksum, init_data_hdr->checksum);
+ r = -EINVAL;
+ goto out;
+ }
+
+ memset(&adev->virt.crit_regn, 0, sizeof(adev->virt.crit_regn));
+ memset(adev->virt.crit_regn_tbl, 0, sizeof(adev->virt.crit_regn_tbl));
+
+ adev->virt.crit_regn.offset = init_data_hdr->initdata_offset;
+ adev->virt.crit_regn.size_kb = init_data_hdr->initdata_size_in_kb;
+
+ /* Validation and initialization for each table entry */
+ if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_IPD_TABLE_ID)) {
+ if (!init_data_hdr->ip_discovery_size_in_kb ||
+ init_data_hdr->ip_discovery_size_in_kb > DISCOVERY_TMR_SIZE) {
+ dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+ amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_IPD_TABLE_ID],
+ init_data_hdr->ip_discovery_size_in_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].offset =
+ init_data_hdr->ip_discovery_offset;
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb =
+ init_data_hdr->ip_discovery_size_in_kb;
+ }
+
+ if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID)) {
+ if (!init_data_hdr->vbios_img_size_in_kb) {
+ dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+ amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID],
+ init_data_hdr->vbios_img_size_in_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].offset =
+ init_data_hdr->vbios_img_offset;
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].size_kb =
+ init_data_hdr->vbios_img_size_in_kb;
+ }
+
+ if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID)) {
+ if (!init_data_hdr->ras_tele_info_size_in_kb) {
+ dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+ amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID],
+ init_data_hdr->ras_tele_info_size_in_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset =
+ init_data_hdr->ras_tele_info_offset;
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].size_kb =
+ init_data_hdr->ras_tele_info_size_in_kb;
+ }
+
+ if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID)) {
+ if (!init_data_hdr->dataexchange_size_in_kb) {
+ dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+ amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID],
+ init_data_hdr->dataexchange_size_in_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset =
+ init_data_hdr->dataexchange_offset;
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb =
+ init_data_hdr->dataexchange_size_in_kb;
+ }
+
+ if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID)) {
+ if (!init_data_hdr->bad_page_size_in_kb) {
+ dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+ amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID],
+ init_data_hdr->bad_page_size_in_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].offset =
+ init_data_hdr->bad_page_info_offset;
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].size_kb =
+ init_data_hdr->bad_page_size_in_kb;
+ }
+
+ /* Validation for critical region info */
+ if (adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb > DISCOVERY_TMR_SIZE) {
+ dev_err(adev->dev, "Invalid IP discovery size: 0x%x\n",
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ /* reserved memory starts from crit region base offset with the size of 5MB */
+ adev->mman.fw_vram_usage_start_offset = adev->virt.crit_regn.offset;
+ adev->mman.fw_vram_usage_size = adev->virt.crit_regn.size_kb << 10;
+ dev_info(adev->dev,
+ "critical region v%d requested to reserve memory start at %08llx with %llu KB.\n",
+ init_data_hdr->version,
+ adev->mman.fw_vram_usage_start_offset,
+ adev->mman.fw_vram_usage_size >> 10);
+
+ adev->virt.is_dynamic_crit_regn_enabled = true;
+
+out:
+ kfree(init_data_hdr);
+ init_data_hdr = NULL;
+
+ return r;
+}
+
+int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
+ int data_id, uint8_t *binary, u32 *size)
+{
+ uint32_t data_offset = 0;
+ uint32_t data_size = 0;
+ enum amd_sriov_msg_table_id_enum data_table_id = data_id;
+
+ if (data_table_id >= AMD_SRIOV_MSG_MAX_TABLE_ID)
+ return -EINVAL;
+
+ data_offset = adev->virt.crit_regn_tbl[data_table_id].offset;
+ data_size = adev->virt.crit_regn_tbl[data_table_id].size_kb << 10;
+
+ /* Validate on input params */
+ if (!binary || !size || *size < (uint64_t)data_size)
+ return -EINVAL;
+
+ /* Proceed to copy the dynamic content */
+ amdgpu_device_vram_access(adev,
+ (uint64_t)data_offset, (uint32_t *)binary, data_size, false);
+ *size = (uint64_t)data_size;
+
+ dev_dbg(adev->dev,
+ "Got %s info from dynamic crit_region_table at offset 0x%x with size of 0x%x bytes.\n",
+ amdgpu_virt_dynamic_crit_table_name[data_id], data_offset, data_size);
+
+ return 0;
+}
+
+void amdgpu_virt_init(struct amdgpu_device *adev)
+{
+ bool is_sriov = false;
+ uint32_t reg = amdgpu_virt_init_detect_asic(adev);
+
+ is_sriov = amdgpu_virt_init_req_data(adev, reg);
+
+ if (is_sriov)
+ amdgpu_virt_init_ras(adev);
}
static bool amdgpu_virt_access_debugfs_is_mmio(struct amdgpu_device *adev)
@@ -775,6 +1188,90 @@ enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *ad
return mode;
}
+void amdgpu_virt_pre_reset(struct amdgpu_device *adev)
+{
+ /* stop the data exchange thread */
+ amdgpu_virt_fini_data_exchange(adev);
+ amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_FLR);
+}
+
+void amdgpu_virt_post_reset(struct amdgpu_device *adev)
+{
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) {
+ /* force set to GFXOFF state after reset,
+ * to avoid some invalid operation before GC enable
+ */
+ adev->gfx.is_poweron = false;
+ }
+
+ adev->mes.ring[0].sched.ready = false;
+}
+
+bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id)
+{
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(13, 0, 0):
+ /* no vf autoload, white list */
+ if (ucode_id == AMDGPU_UCODE_ID_VCN1 ||
+ ucode_id == AMDGPU_UCODE_ID_VCN)
+ return false;
+ else
+ return true;
+ case IP_VERSION(11, 0, 9):
+ case IP_VERSION(11, 0, 7):
+ /* black list for CHIP_NAVI12 and CHIP_SIENNA_CICHLID */
+ if (ucode_id == AMDGPU_UCODE_ID_RLC_G
+ || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
+ || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
+ || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM
+ || ucode_id == AMDGPU_UCODE_ID_SMC)
+ return true;
+ else
+ return false;
+ case IP_VERSION(13, 0, 10):
+ /* white list */
+ if (ucode_id == AMDGPU_UCODE_ID_CAP
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_PFP
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_ME
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK
+ || ucode_id == AMDGPU_UCODE_ID_CP_MES
+ || ucode_id == AMDGPU_UCODE_ID_CP_MES_DATA
+ || ucode_id == AMDGPU_UCODE_ID_CP_MES1
+ || ucode_id == AMDGPU_UCODE_ID_CP_MES1_DATA
+ || ucode_id == AMDGPU_UCODE_ID_VCN1
+ || ucode_id == AMDGPU_UCODE_ID_VCN)
+ return false;
+ else
+ return true;
+ default:
+ /* lagacy black list */
+ if (ucode_id == AMDGPU_UCODE_ID_SDMA0
+ || ucode_id == AMDGPU_UCODE_ID_SDMA1
+ || ucode_id == AMDGPU_UCODE_ID_SDMA2
+ || ucode_id == AMDGPU_UCODE_ID_SDMA3
+ || ucode_id == AMDGPU_UCODE_ID_SDMA4
+ || ucode_id == AMDGPU_UCODE_ID_SDMA5
+ || ucode_id == AMDGPU_UCODE_ID_SDMA6
+ || ucode_id == AMDGPU_UCODE_ID_SDMA7
+ || ucode_id == AMDGPU_UCODE_ID_RLC_G
+ || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
+ || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
+ || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM
+ || ucode_id == AMDGPU_UCODE_ID_SMC)
+ return true;
+ else
+ return false;
+ }
+}
+
void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev,
struct amdgpu_video_codec_info *encode, uint32_t encode_array_size,
struct amdgpu_video_codec_info *decode, uint32_t decode_array_size)
@@ -806,3 +1303,545 @@ void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev,
}
}
}
+
+bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
+ u32 acc_flags, u32 hwip,
+ bool write, u32 *rlcg_flag)
+{
+ bool ret = false;
+
+ switch (hwip) {
+ case GC_HWIP:
+ if (amdgpu_sriov_reg_indirect_gc(adev)) {
+ *rlcg_flag =
+ write ? AMDGPU_RLCG_GC_WRITE : AMDGPU_RLCG_GC_READ;
+ ret = true;
+ /* only in new version, AMDGPU_REGS_NO_KIQ and
+ * AMDGPU_REGS_RLC are enabled simultaneously */
+ } else if ((acc_flags & AMDGPU_REGS_RLC) &&
+ !(acc_flags & AMDGPU_REGS_NO_KIQ) && write) {
+ *rlcg_flag = AMDGPU_RLCG_GC_WRITE_LEGACY;
+ ret = true;
+ }
+ break;
+ case MMHUB_HWIP:
+ if (amdgpu_sriov_reg_indirect_mmhub(adev) &&
+ (acc_flags & AMDGPU_REGS_RLC) && write) {
+ *rlcg_flag = AMDGPU_RLCG_MMHUB_WRITE;
+ ret = true;
+ }
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id)
+{
+ struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
+ uint32_t timeout = 50000;
+ uint32_t i, tmp;
+ uint32_t ret = 0;
+ void *scratch_reg0;
+ void *scratch_reg1;
+ void *scratch_reg2;
+ void *scratch_reg3;
+ void *spare_int;
+ unsigned long flags;
+
+ if (!adev->gfx.rlc.rlcg_reg_access_supported) {
+ dev_err(adev->dev,
+ "indirect registers access through rlcg is not available\n");
+ return 0;
+ }
+
+ if (adev->gfx.xcc_mask && (((1 << xcc_id) & adev->gfx.xcc_mask) == 0)) {
+ dev_err(adev->dev, "invalid xcc\n");
+ return 0;
+ }
+
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
+ reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[xcc_id];
+ scratch_reg0 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg0;
+ scratch_reg1 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg1;
+ scratch_reg2 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg2;
+ scratch_reg3 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg3;
+
+ spin_lock_irqsave(&adev->virt.rlcg_reg_lock, flags);
+
+ if (reg_access_ctrl->spare_int)
+ spare_int = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->spare_int;
+
+ if (offset == reg_access_ctrl->grbm_cntl) {
+ /* if the target reg offset is grbm_cntl, write to scratch_reg2 */
+ writel(v, scratch_reg2);
+ if (flag == AMDGPU_RLCG_GC_WRITE_LEGACY)
+ writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
+ } else if (offset == reg_access_ctrl->grbm_idx) {
+ /* if the target reg offset is grbm_idx, write to scratch_reg3 */
+ writel(v, scratch_reg3);
+ if (flag == AMDGPU_RLCG_GC_WRITE_LEGACY)
+ writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
+ } else {
+ /*
+ * SCRATCH_REG0 = read/write value
+ * SCRATCH_REG1[30:28] = command
+ * SCRATCH_REG1[19:0] = address in dword
+ * SCRATCH_REG1[27:24] = Error reporting
+ */
+ writel(v, scratch_reg0);
+ writel((offset | flag), scratch_reg1);
+ if (reg_access_ctrl->spare_int)
+ writel(1, spare_int);
+
+ for (i = 0; i < timeout; i++) {
+ tmp = readl(scratch_reg1);
+ if (!(tmp & AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK))
+ break;
+ udelay(10);
+ }
+
+ tmp = readl(scratch_reg1);
+ if (i >= timeout || (tmp & AMDGPU_RLCG_SCRATCH1_ERROR_MASK) != 0) {
+ if (amdgpu_sriov_rlcg_error_report_enabled(adev)) {
+ if (tmp & AMDGPU_RLCG_VFGATE_DISABLED) {
+ dev_err(adev->dev,
+ "vfgate is disabled, rlcg failed to program reg: 0x%05x\n", offset);
+ } else if (tmp & AMDGPU_RLCG_WRONG_OPERATION_TYPE) {
+ dev_err(adev->dev,
+ "wrong operation type, rlcg failed to program reg: 0x%05x\n", offset);
+ } else if (tmp & AMDGPU_RLCG_REG_NOT_IN_RANGE) {
+ dev_err(adev->dev,
+ "register is not in range, rlcg failed to program reg: 0x%05x\n", offset);
+ } else {
+ dev_err(adev->dev,
+ "unknown error type, rlcg failed to program reg: 0x%05x\n", offset);
+ }
+ } else {
+ dev_err(adev->dev,
+ "timeout: rlcg faled to program reg: 0x%05x\n", offset);
+ }
+ }
+ }
+
+ ret = readl(scratch_reg0);
+
+ spin_unlock_irqrestore(&adev->virt.rlcg_reg_lock, flags);
+
+ return ret;
+}
+
+void amdgpu_sriov_wreg(struct amdgpu_device *adev,
+ u32 offset, u32 value,
+ u32 acc_flags, u32 hwip, u32 xcc_id)
+{
+ u32 rlcg_flag;
+
+ if (amdgpu_device_skip_hw_access(adev))
+ return;
+
+ if (!amdgpu_sriov_runtime(adev) &&
+ amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, true, &rlcg_flag)) {
+ amdgpu_virt_rlcg_reg_rw(adev, offset, value, rlcg_flag, xcc_id);
+ return;
+ }
+
+ if (acc_flags & AMDGPU_REGS_NO_KIQ)
+ WREG32_NO_KIQ(offset, value);
+ else
+ WREG32(offset, value);
+}
+
+u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
+ u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id)
+{
+ u32 rlcg_flag;
+
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
+ if (!amdgpu_sriov_runtime(adev) &&
+ amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, false, &rlcg_flag))
+ return amdgpu_virt_rlcg_reg_rw(adev, offset, 0, rlcg_flag, xcc_id);
+
+ if (acc_flags & AMDGPU_REGS_NO_KIQ)
+ return RREG32_NO_KIQ(offset);
+ else
+ return RREG32(offset);
+}
+
+bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev)
+{
+ bool xnack_mode = true;
+
+ if (amdgpu_sriov_vf(adev) &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
+ xnack_mode = false;
+
+ return xnack_mode;
+}
+
+bool amdgpu_virt_get_ras_capability(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ if (!amdgpu_sriov_ras_caps_en(adev))
+ return false;
+
+ if (adev->virt.ras_en_caps.bits.block_umc)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__UMC);
+ if (adev->virt.ras_en_caps.bits.block_sdma)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__SDMA);
+ if (adev->virt.ras_en_caps.bits.block_gfx)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__GFX);
+ if (adev->virt.ras_en_caps.bits.block_mmhub)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MMHUB);
+ if (adev->virt.ras_en_caps.bits.block_athub)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__ATHUB);
+ if (adev->virt.ras_en_caps.bits.block_pcie_bif)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__PCIE_BIF);
+ if (adev->virt.ras_en_caps.bits.block_hdp)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__HDP);
+ if (adev->virt.ras_en_caps.bits.block_xgmi_wafl)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__XGMI_WAFL);
+ if (adev->virt.ras_en_caps.bits.block_df)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__DF);
+ if (adev->virt.ras_en_caps.bits.block_smn)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__SMN);
+ if (adev->virt.ras_en_caps.bits.block_sem)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__SEM);
+ if (adev->virt.ras_en_caps.bits.block_mp0)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MP0);
+ if (adev->virt.ras_en_caps.bits.block_mp1)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MP1);
+ if (adev->virt.ras_en_caps.bits.block_fuse)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__FUSE);
+ if (adev->virt.ras_en_caps.bits.block_mca)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MCA);
+ if (adev->virt.ras_en_caps.bits.block_vcn)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__VCN);
+ if (adev->virt.ras_en_caps.bits.block_jpeg)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__JPEG);
+ if (adev->virt.ras_en_caps.bits.block_ih)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__IH);
+ if (adev->virt.ras_en_caps.bits.block_mpio)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MPIO);
+
+ if (adev->virt.ras_en_caps.bits.poison_propogation_mode)
+ con->poison_supported = true; /* Poison is handled by host */
+
+ return true;
+}
+
+static inline enum amd_sriov_ras_telemetry_gpu_block
+amdgpu_ras_block_to_sriov(struct amdgpu_device *adev, enum amdgpu_ras_block block) {
+ switch (block) {
+ case AMDGPU_RAS_BLOCK__UMC:
+ return RAS_TELEMETRY_GPU_BLOCK_UMC;
+ case AMDGPU_RAS_BLOCK__SDMA:
+ return RAS_TELEMETRY_GPU_BLOCK_SDMA;
+ case AMDGPU_RAS_BLOCK__GFX:
+ return RAS_TELEMETRY_GPU_BLOCK_GFX;
+ case AMDGPU_RAS_BLOCK__MMHUB:
+ return RAS_TELEMETRY_GPU_BLOCK_MMHUB;
+ case AMDGPU_RAS_BLOCK__ATHUB:
+ return RAS_TELEMETRY_GPU_BLOCK_ATHUB;
+ case AMDGPU_RAS_BLOCK__PCIE_BIF:
+ return RAS_TELEMETRY_GPU_BLOCK_PCIE_BIF;
+ case AMDGPU_RAS_BLOCK__HDP:
+ return RAS_TELEMETRY_GPU_BLOCK_HDP;
+ case AMDGPU_RAS_BLOCK__XGMI_WAFL:
+ return RAS_TELEMETRY_GPU_BLOCK_XGMI_WAFL;
+ case AMDGPU_RAS_BLOCK__DF:
+ return RAS_TELEMETRY_GPU_BLOCK_DF;
+ case AMDGPU_RAS_BLOCK__SMN:
+ return RAS_TELEMETRY_GPU_BLOCK_SMN;
+ case AMDGPU_RAS_BLOCK__SEM:
+ return RAS_TELEMETRY_GPU_BLOCK_SEM;
+ case AMDGPU_RAS_BLOCK__MP0:
+ return RAS_TELEMETRY_GPU_BLOCK_MP0;
+ case AMDGPU_RAS_BLOCK__MP1:
+ return RAS_TELEMETRY_GPU_BLOCK_MP1;
+ case AMDGPU_RAS_BLOCK__FUSE:
+ return RAS_TELEMETRY_GPU_BLOCK_FUSE;
+ case AMDGPU_RAS_BLOCK__MCA:
+ return RAS_TELEMETRY_GPU_BLOCK_MCA;
+ case AMDGPU_RAS_BLOCK__VCN:
+ return RAS_TELEMETRY_GPU_BLOCK_VCN;
+ case AMDGPU_RAS_BLOCK__JPEG:
+ return RAS_TELEMETRY_GPU_BLOCK_JPEG;
+ case AMDGPU_RAS_BLOCK__IH:
+ return RAS_TELEMETRY_GPU_BLOCK_IH;
+ case AMDGPU_RAS_BLOCK__MPIO:
+ return RAS_TELEMETRY_GPU_BLOCK_MPIO;
+ default:
+ dev_warn(adev->dev, "Unsupported SRIOV RAS telemetry block 0x%x\n",
+ block);
+ return RAS_TELEMETRY_GPU_BLOCK_COUNT;
+ }
+}
+
+static int amdgpu_virt_cache_host_error_counts(struct amdgpu_device *adev,
+ struct amdsriov_ras_telemetry *host_telemetry)
+{
+ struct amd_sriov_ras_telemetry_error_count *tmp = NULL;
+ uint32_t checksum, used_size;
+
+ checksum = host_telemetry->header.checksum;
+ used_size = host_telemetry->header.used_size;
+
+ if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
+ return 0;
+
+ tmp = kmemdup(&host_telemetry->body.error_count, used_size, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ if (checksum != amd_sriov_msg_checksum(tmp, used_size, 0, 0))
+ goto out;
+
+ memcpy(&adev->virt.count_cache, tmp,
+ min(used_size, sizeof(adev->virt.count_cache)));
+out:
+ kfree(tmp);
+
+ return 0;
+}
+
+static int amdgpu_virt_req_ras_err_count_internal(struct amdgpu_device *adev, bool force_update)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+
+ if (!virt->ops || !virt->ops->req_ras_err_count)
+ return -EOPNOTSUPP;
+
+ /* Host allows 15 ras telemetry requests per 60 seconds. Afterwhich, the Host
+ * will ignore incoming guest messages. Ratelimit the guest messages to
+ * prevent guest self DOS.
+ */
+ if (__ratelimit(&virt->ras.ras_error_cnt_rs) || force_update) {
+ mutex_lock(&virt->ras.ras_telemetry_mutex);
+ if (!virt->ops->req_ras_err_count(adev))
+ amdgpu_virt_cache_host_error_counts(adev,
+ virt->fw_reserve.ras_telemetry);
+ mutex_unlock(&virt->ras.ras_telemetry_mutex);
+ }
+
+ return 0;
+}
+
+/* Bypass ACA interface and query ECC counts directly from host */
+int amdgpu_virt_req_ras_err_count(struct amdgpu_device *adev, enum amdgpu_ras_block block,
+ struct ras_err_data *err_data)
+{
+ enum amd_sriov_ras_telemetry_gpu_block sriov_block;
+
+ sriov_block = amdgpu_ras_block_to_sriov(adev, block);
+
+ if (sriov_block >= RAS_TELEMETRY_GPU_BLOCK_COUNT ||
+ !amdgpu_sriov_ras_telemetry_block_en(adev, sriov_block))
+ return -EOPNOTSUPP;
+
+ /* Host Access may be lost during reset, just return last cached data. */
+ if (down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_virt_req_ras_err_count_internal(adev, false);
+ up_read(&adev->reset_domain->sem);
+ }
+
+ err_data->ue_count = adev->virt.count_cache.block[sriov_block].ue_count;
+ err_data->ce_count = adev->virt.count_cache.block[sriov_block].ce_count;
+ err_data->de_count = adev->virt.count_cache.block[sriov_block].de_count;
+
+ return 0;
+}
+
+static int
+amdgpu_virt_write_cpers_to_ring(struct amdgpu_device *adev,
+ struct amdsriov_ras_telemetry *host_telemetry,
+ u32 *more)
+{
+ struct amd_sriov_ras_cper_dump *cper_dump = NULL;
+ struct cper_hdr *entry = NULL;
+ struct amdgpu_ring *ring = &adev->cper.ring_buf;
+ uint32_t checksum, used_size, i;
+ int ret = 0;
+
+ checksum = host_telemetry->header.checksum;
+ used_size = host_telemetry->header.used_size;
+
+ if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
+ return -EINVAL;
+
+ cper_dump = kmemdup(&host_telemetry->body.cper_dump, used_size, GFP_KERNEL);
+ if (!cper_dump)
+ return -ENOMEM;
+
+ if (checksum != amd_sriov_msg_checksum(cper_dump, used_size, 0, 0)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ *more = cper_dump->more;
+
+ if (cper_dump->wptr < adev->virt.ras.cper_rptr) {
+ dev_warn(
+ adev->dev,
+ "guest specified rptr that was too high! guest rptr: 0x%llx, host rptr: 0x%llx\n",
+ adev->virt.ras.cper_rptr, cper_dump->wptr);
+
+ adev->virt.ras.cper_rptr = cper_dump->wptr;
+ goto out;
+ }
+
+ entry = (struct cper_hdr *)&cper_dump->buf[0];
+
+ for (i = 0; i < cper_dump->count; i++) {
+ amdgpu_cper_ring_write(ring, entry, entry->record_length);
+ entry = (struct cper_hdr *)((char *)entry +
+ entry->record_length);
+ }
+
+ if (cper_dump->overflow_count)
+ dev_warn(adev->dev,
+ "host reported CPER overflow of 0x%llx entries!\n",
+ cper_dump->overflow_count);
+
+ adev->virt.ras.cper_rptr = cper_dump->wptr;
+out:
+ kfree(cper_dump);
+
+ return ret;
+}
+
+static int amdgpu_virt_req_ras_cper_dump_internal(struct amdgpu_device *adev)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ int ret = 0;
+ uint32_t more = 0;
+
+ if (!virt->ops || !virt->ops->req_ras_cper_dump)
+ return -EOPNOTSUPP;
+
+ do {
+ if (!virt->ops->req_ras_cper_dump(adev, virt->ras.cper_rptr))
+ ret = amdgpu_virt_write_cpers_to_ring(
+ adev, virt->fw_reserve.ras_telemetry, &more);
+ else
+ ret = 0;
+ } while (more && !ret);
+
+ return ret;
+}
+
+int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ int ret = 0;
+
+ if (!amdgpu_sriov_ras_cper_en(adev))
+ return -EOPNOTSUPP;
+
+ if ((__ratelimit(&virt->ras.ras_cper_dump_rs) || force_update) &&
+ down_read_trylock(&adev->reset_domain->sem)) {
+ mutex_lock(&virt->ras.ras_telemetry_mutex);
+ ret = amdgpu_virt_req_ras_cper_dump_internal(adev);
+ mutex_unlock(&virt->ras.ras_telemetry_mutex);
+ up_read(&adev->reset_domain->sem);
+ }
+
+ return ret;
+}
+
+int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev)
+{
+ unsigned long ue_count, ce_count;
+
+ if (amdgpu_sriov_ras_telemetry_en(adev)) {
+ amdgpu_virt_req_ras_err_count_internal(adev, true);
+ amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, NULL);
+ }
+
+ return 0;
+}
+
+bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block)
+{
+ enum amd_sriov_ras_telemetry_gpu_block sriov_block;
+
+ sriov_block = amdgpu_ras_block_to_sriov(adev, block);
+
+ if (sriov_block >= RAS_TELEMETRY_GPU_BLOCK_COUNT ||
+ !amdgpu_sriov_ras_telemetry_block_en(adev, sriov_block))
+ return false;
+
+ return true;
+}
+
+/*
+ * amdgpu_virt_request_bad_pages() - request bad pages
+ * @adev: amdgpu device.
+ * Send command to GPU hypervisor to write new bad pages into the shared PF2VF region
+ */
+void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+
+ if (virt->ops && virt->ops->req_bad_pages)
+ virt->ops->req_bad_pages(adev);
+}
+
+static int amdgpu_virt_cache_chk_criti_hit(struct amdgpu_device *adev,
+ struct amdsriov_ras_telemetry *host_telemetry,
+ bool *hit)
+{
+ struct amd_sriov_ras_chk_criti *tmp = NULL;
+ uint32_t checksum, used_size;
+
+ checksum = host_telemetry->header.checksum;
+ used_size = host_telemetry->header.used_size;
+
+ if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
+ return 0;
+
+ tmp = kmemdup(&host_telemetry->body.chk_criti, used_size, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ if (checksum != amd_sriov_msg_checksum(tmp, used_size, 0, 0))
+ goto out;
+
+ if (hit)
+ *hit = tmp->hit ? true : false;
+
+out:
+ kfree(tmp);
+
+ return 0;
+}
+
+int amdgpu_virt_check_vf_critical_region(struct amdgpu_device *adev, u64 addr, bool *hit)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ int r = -EPERM;
+
+ if (!virt->ops || !virt->ops->req_ras_chk_criti)
+ return -EOPNOTSUPP;
+
+ /* Host allows 15 ras telemetry requests per 60 seconds. Afterwhich, the Host
+ * will ignore incoming guest messages. Ratelimit the guest messages to
+ * prevent guest self DOS.
+ */
+ if (__ratelimit(&virt->ras.ras_chk_criti_rs)) {
+ mutex_lock(&virt->ras.ras_telemetry_mutex);
+ if (!virt->ops->req_ras_chk_criti(adev, addr))
+ r = amdgpu_virt_cache_chk_criti_hit(
+ adev, virt->fw_reserve.ras_telemetry, hit);
+ mutex_unlock(&virt->ras.ras_telemetry_mutex);
+ }
+
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 8d4c20bb71c5..01d5bca2dee1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -31,12 +31,35 @@
#define AMDGPU_SRIOV_CAPS_IS_VF (1 << 2) /* this GPU is a virtual function */
#define AMDGPU_PASSTHROUGH_MODE (1 << 3) /* thw whole GPU is pass through for VM */
#define AMDGPU_SRIOV_CAPS_RUNTIME (1 << 4) /* is out of full access mode */
+#define AMDGPU_VF_MMIO_ACCESS_PROTECT (1 << 5) /* MMIO write access is not allowed in sriov runtime */
+
+/* flags for indirect register access path supported by rlcg for sriov */
+#define AMDGPU_RLCG_GC_WRITE_LEGACY (0x8 << 28)
+#define AMDGPU_RLCG_GC_WRITE (0x0 << 28)
+#define AMDGPU_RLCG_GC_READ (0x1 << 28)
+#define AMDGPU_RLCG_MMHUB_WRITE (0x2 << 28)
+
+/* error code for indirect register access path supported by rlcg for sriov */
+#define AMDGPU_RLCG_VFGATE_DISABLED 0x4000000
+#define AMDGPU_RLCG_WRONG_OPERATION_TYPE 0x2000000
+#define AMDGPU_RLCG_REG_NOT_IN_RANGE 0x1000000
+
+#define AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK 0xFFFFF
+#define AMDGPU_RLCG_SCRATCH1_ERROR_MASK 0xF000000
/* all asic after AI use this offset */
#define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5
/* tonga/fiji use this offset */
#define mmBIF_IOV_FUNC_IDENTIFIER 0x1503
+#define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 2
+
+/* Signature used to validate the SR-IOV dynamic critical region init data header ("INDA") */
+#define AMDGPU_SRIOV_CRIT_DATA_SIGNATURE "INDA"
+#define AMDGPU_SRIOV_CRIT_DATA_SIG_LEN 4
+
+#define IS_SRIOV_CRIT_REGN_ENTRY_VALID(hdr, id) ((hdr)->valid_tables & (1 << (id)))
+
enum amdgpu_sriov_vf_mode {
SRIOV_VF_MODE_BARE_METAL = 0,
SRIOV_VF_MODE_ONE_VF,
@@ -61,6 +84,8 @@ struct amdgpu_vf_error_buffer {
uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
};
+enum idh_request;
+
/**
* struct amdgpu_virt_ops - amdgpu device virt operations
*/
@@ -69,8 +94,17 @@ struct amdgpu_virt_ops {
int (*rel_full_gpu)(struct amdgpu_device *adev, bool init);
int (*req_init_data)(struct amdgpu_device *adev);
int (*reset_gpu)(struct amdgpu_device *adev);
+ void (*ready_to_reset)(struct amdgpu_device *adev);
int (*wait_reset)(struct amdgpu_device *adev);
- void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3);
+ void (*trans_msg)(struct amdgpu_device *adev, enum idh_request req,
+ u32 data1, u32 data2, u32 data3);
+ void (*ras_poison_handler)(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block);
+ bool (*rcvd_ras_intr)(struct amdgpu_device *adev);
+ int (*req_ras_err_count)(struct amdgpu_device *adev);
+ int (*req_ras_cper_dump)(struct amdgpu_device *adev, u64 vf_rptr);
+ int (*req_bad_pages)(struct amdgpu_device *adev);
+ int (*req_ras_chk_criti)(struct amdgpu_device *adev, u64 addr);
};
/*
@@ -79,6 +113,7 @@ struct amdgpu_virt_ops {
struct amdgpu_virt_fw_reserve {
struct amd_sriov_msg_pf2vf_info_header *p_pf2vf;
struct amd_sriov_msg_vf2pf_info_header *p_vf2pf;
+ void *ras_telemetry;
unsigned int checksum_key;
};
@@ -106,15 +141,29 @@ enum AMDGIM_FEATURE_FLAG {
AMDGIM_FEATURE_PP_ONE_VF = (1 << 4),
/* Indirect Reg Access enabled */
AMDGIM_FEATURE_INDIRECT_REG_ACCESS = (1 << 5),
+ /* AV1 Support MODE*/
+ AMDGIM_FEATURE_AV1_SUPPORT = (1 << 6),
+ /* VCN RB decouple */
+ AMDGIM_FEATURE_VCN_RB_DECOUPLE = (1 << 7),
+ /* MES info */
+ AMDGIM_FEATURE_MES_INFO_ENABLE = (1 << 8),
+ AMDGIM_FEATURE_RAS_CAPS = (1 << 9),
+ AMDGIM_FEATURE_RAS_TELEMETRY = (1 << 10),
+ AMDGIM_FEATURE_RAS_CPER = (1 << 11),
+ AMDGIM_FEATURE_XGMI_TA_EXT_PEER_LINK = (1 << 12),
};
enum AMDGIM_REG_ACCESS_FLAG {
/* Use PSP to program IH_RB_CNTL */
- AMDGIM_FEATURE_IH_REG_PSP_EN = (1 << 0),
+ AMDGIM_FEATURE_IH_REG_PSP_EN = (1 << 0),
/* Use RLC to program MMHUB regs */
- AMDGIM_FEATURE_MMHUB_REG_RLC_EN = (1 << 1),
+ AMDGIM_FEATURE_MMHUB_REG_RLC_EN = (1 << 1),
/* Use RLC to program GC regs */
- AMDGIM_FEATURE_GC_REG_RLC_EN = (1 << 2),
+ AMDGIM_FEATURE_GC_REG_RLC_EN = (1 << 2),
+ /* Use PSP to program L1_TLB_CNTL */
+ AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN = (1 << 3),
+ /* Use RLCG to program SQ_CONFIG1 */
+ AMDGIM_FEATURE_REG_ACCESS_SQ_CONFIG = (1 << 4),
};
struct amdgim_pf2vf_info_v1 {
@@ -208,6 +257,23 @@ struct amdgpu_virt_ras_err_handler_data {
int last_reserved;
};
+struct amdgpu_virt_ras {
+ struct ratelimit_state ras_error_cnt_rs;
+ struct ratelimit_state ras_cper_dump_rs;
+ struct ratelimit_state ras_chk_criti_rs;
+ struct mutex ras_telemetry_mutex;
+ uint64_t cper_rptr;
+};
+
+#define AMDGPU_VIRT_CAPS_LIST(X) X(AMDGPU_VIRT_CAP_POWER_LIMIT)
+
+DECLARE_ATTR_CAP_CLASS(amdgpu_virt, AMDGPU_VIRT_CAPS_LIST);
+
+struct amdgpu_virt_region {
+ uint32_t offset;
+ uint32_t size_kb;
+};
+
/* GPU virtualization */
struct amdgpu_virt {
uint32_t caps;
@@ -217,11 +283,16 @@ struct amdgpu_virt {
uint32_t reg_val_offs;
struct amdgpu_irq_src ack_irq;
struct amdgpu_irq_src rcv_irq;
+
struct work_struct flr_work;
+ struct work_struct req_bad_pages_work;
+ struct work_struct handle_bad_pages_work;
+
struct amdgpu_mm_table mm_table;
const struct amdgpu_virt_ops *ops;
struct amdgpu_vf_error_buffer vf_errors;
struct amdgpu_virt_fw_reserve fw_reserve;
+ struct amdgpu_virt_caps virt_caps;
uint32_t gim_feature;
uint32_t reg_access_mode;
int req_init_data_ver;
@@ -230,9 +301,16 @@ struct amdgpu_virt {
bool ras_init_done;
uint32_t reg_access;
+ /* dynamic(v2) critical regions */
+ struct amdgpu_virt_region init_data_header;
+ struct amdgpu_virt_region crit_regn;
+ struct amdgpu_virt_region crit_regn_tbl[AMD_SRIOV_MSG_MAX_TABLE_ID];
+ bool is_dynamic_crit_regn_enabled;
+
/* vf2pf message */
struct delayed_work vf2pf_work;
uint32_t vf2pf_update_interval_ms;
+ int vf2pf_update_retry_cnt;
/* multimedia bandwidth config */
bool is_mm_bw_enabled;
@@ -240,6 +318,22 @@ struct amdgpu_virt {
uint32_t decode_max_frame_pixels;
uint32_t encode_max_dimension_pixels;
uint32_t encode_max_frame_pixels;
+
+ /* the ucode id to signal the autoload */
+ uint32_t autoload_ucode_id;
+
+ /* Spinlock to protect access to the RLCG register interface */
+ spinlock_t rlcg_reg_lock;
+
+ struct mutex access_req_mutex;
+
+ union amd_sriov_ras_caps ras_en_caps;
+ union amd_sriov_ras_caps ras_telemetry_en_caps;
+ struct amdgpu_virt_ras ras;
+ struct amd_sriov_ras_telemetry_error_count count_cache;
+
+ /* hibernate and resume with different VF feature for xgmi enabled system */
+ bool is_xgmi_node_migrate_enabled;
};
struct amdgpu_video_codec_info;
@@ -275,13 +369,44 @@ struct amdgpu_video_codec_info;
(amdgpu_sriov_vf((adev)) && \
((adev)->virt.reg_access & (AMDGIM_FEATURE_GC_REG_RLC_EN)))
+#define amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev) \
+(amdgpu_sriov_vf((adev)) && \
+ ((adev)->virt.reg_access & (AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN)))
+
+#define amdgpu_sriov_rlcg_error_report_enabled(adev) \
+ (amdgpu_sriov_reg_indirect_mmhub(adev) || amdgpu_sriov_reg_indirect_gc(adev))
+
+#define amdgpu_sriov_reg_access_sq_config(adev) \
+(amdgpu_sriov_vf((adev)) && \
+ ((adev)->virt.reg_access & (AMDGIM_FEATURE_REG_ACCESS_SQ_CONFIG)))
+
#define amdgpu_passthrough(adev) \
((adev)->virt.caps & AMDGPU_PASSTHROUGH_MODE)
+#define amdgpu_sriov_vf_mmio_access_protection(adev) \
+((adev)->virt.caps & AMDGPU_VF_MMIO_ACCESS_PROTECT)
+
+#define amdgpu_sriov_ras_caps_en(adev) \
+((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_CAPS)
+
+#define amdgpu_sriov_ras_telemetry_en(adev) \
+(((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_TELEMETRY) && (adev)->virt.fw_reserve.ras_telemetry)
+
+#define amdgpu_sriov_ras_telemetry_block_en(adev, sriov_blk) \
+(amdgpu_sriov_ras_telemetry_en((adev)) && (adev)->virt.ras_telemetry_en_caps.all & BIT(sriov_blk))
+
+#define amdgpu_sriov_ras_cper_en(adev) \
+((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_CPER)
+
+#define amdgpu_sriov_xgmi_ta_ext_peer_link_en(adev) \
+((adev)->virt.gim_feature & AMDGIM_FEATURE_XGMI_TA_EXT_PEER_LINK)
+
static inline bool is_virtual_machine(void)
{
-#ifdef CONFIG_X86
+#if defined(CONFIG_X86)
return boot_cpu_has(X86_FEATURE_HYPERVISOR);
+#elif defined(CONFIG_ARM64)
+ return !is_kernel_in_hyp_mode();
#else
return false;
#endif
@@ -289,27 +414,42 @@ static inline bool is_virtual_machine(void)
#define amdgpu_sriov_is_pp_one_vf(adev) \
((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
+#define amdgpu_sriov_multi_vf_mode(adev) \
+ (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
#define amdgpu_sriov_is_debug(adev) \
((!amdgpu_in_reset(adev)) && adev->virt.tdr_debug)
#define amdgpu_sriov_is_normal(adev) \
((!amdgpu_in_reset(adev)) && (!adev->virt.tdr_debug))
+#define amdgpu_sriov_is_av1_support(adev) \
+ ((adev)->virt.gim_feature & AMDGIM_FEATURE_AV1_SUPPORT)
+#define amdgpu_sriov_is_vcn_rb_decouple(adev) \
+ ((adev)->virt.gim_feature & AMDGIM_FEATURE_VCN_RB_DECOUPLE)
+#define amdgpu_sriov_is_mes_info_enable(adev) \
+ ((adev)->virt.gim_feature & AMDGIM_FEATURE_MES_INFO_ENABLE)
+
+#define amdgpu_virt_xgmi_migrate_enabled(adev) \
+ ((adev)->virt.is_xgmi_node_migrate_enabled && (adev)->gmc.xgmi.node_segment_size != 0)
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
void amdgpu_virt_init_setting(struct amdgpu_device *adev);
-void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
- uint32_t reg0, uint32_t rreg1,
- uint32_t ref, uint32_t mask);
int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
void amdgpu_virt_request_init_data(struct amdgpu_device *adev);
+void amdgpu_virt_ready_to_reset(struct amdgpu_device *adev);
int amdgpu_virt_wait_reset(struct amdgpu_device *adev);
int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
+bool amdgpu_virt_rcvd_ras_interrupt(struct amdgpu_device *adev);
void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev);
void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
+void amdgpu_virt_exchange_data(struct amdgpu_device *adev);
void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev);
-void amdgpu_detect_virtualization(struct amdgpu_device *adev);
+void amdgpu_virt_init(struct amdgpu_device *adev);
+
+int amdgpu_virt_init_critical_region(struct amdgpu_device *adev);
+int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
+ int data_id, uint8_t *binary, u32 *size);
bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
@@ -320,4 +460,27 @@ enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *ad
void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev,
struct amdgpu_video_codec_info *encode, uint32_t encode_array_size,
struct amdgpu_video_codec_info *decode, uint32_t decode_array_size);
+void amdgpu_sriov_wreg(struct amdgpu_device *adev,
+ u32 offset, u32 value,
+ u32 acc_flags, u32 hwip, u32 xcc_id);
+u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
+ u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id);
+bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev,
+ uint32_t ucode_id);
+void amdgpu_virt_pre_reset(struct amdgpu_device *adev);
+void amdgpu_virt_post_reset(struct amdgpu_device *adev);
+bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev);
+bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
+ u32 acc_flags, u32 hwip,
+ bool write, u32 *rlcg_flag);
+u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id);
+bool amdgpu_virt_get_ras_capability(struct amdgpu_device *adev);
+int amdgpu_virt_req_ras_err_count(struct amdgpu_device *adev, enum amdgpu_ras_block block,
+ struct ras_err_data *err_data);
+int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update);
+int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev);
+bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block);
+void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev);
+int amdgpu_virt_check_vf_critical_region(struct amdgpu_device *adev, u64 addr, bool *hit);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
index ac9a8cd21c4b..79bad9cbe2ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: GPL-2.0+
#include <drm/drm_atomic_helper.h>
+#include <drm/drm_edid.h>
#include <drm/drm_simple_kms_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
#include <drm/drm_vblank.h>
#include "amdgpu.h"
@@ -12,10 +14,11 @@
#include "dce_v8_0.h"
#endif
#include "dce_v10_0.h"
-#include "dce_v11_0.h"
#include "ivsrcid/ivsrcid_vislands30.h"
#include "amdgpu_vkms.h"
#include "amdgpu_display.h"
+#include "atom.h"
+#include "amdgpu_irq.h"
/**
* DOC: amdgpu_vkms
@@ -41,46 +44,44 @@ static const u32 amdgpu_vkms_formats[] = {
static enum hrtimer_restart amdgpu_vkms_vblank_simulate(struct hrtimer *timer)
{
- struct amdgpu_vkms_output *output = container_of(timer,
- struct amdgpu_vkms_output,
- vblank_hrtimer);
- struct drm_crtc *crtc = &output->crtc;
+ struct amdgpu_crtc *amdgpu_crtc = container_of(timer, struct amdgpu_crtc, vblank_timer);
+ struct drm_crtc *crtc = &amdgpu_crtc->base;
+ struct amdgpu_vkms_output *output = drm_crtc_to_amdgpu_vkms_output(crtc);
u64 ret_overrun;
bool ret;
- ret_overrun = hrtimer_forward_now(&output->vblank_hrtimer,
+ ret_overrun = hrtimer_forward_now(&amdgpu_crtc->vblank_timer,
output->period_ns);
- WARN_ON(ret_overrun != 1);
+ if (ret_overrun != 1)
+ DRM_WARN("%s: vblank timer overrun\n", __func__);
ret = drm_crtc_handle_vblank(crtc);
+ /* Don't queue timer again when vblank is disabled. */
if (!ret)
- DRM_ERROR("amdgpu_vkms failure on handling vblank");
+ return HRTIMER_NORESTART;
return HRTIMER_RESTART;
}
static int amdgpu_vkms_enable_vblank(struct drm_crtc *crtc)
{
- struct drm_device *dev = crtc->dev;
- unsigned int pipe = drm_crtc_index(crtc);
- struct drm_vblank_crtc *vblank = &dev->vblank[pipe];
+ struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc);
struct amdgpu_vkms_output *out = drm_crtc_to_amdgpu_vkms_output(crtc);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
drm_calc_timestamping_constants(crtc, &crtc->mode);
- hrtimer_init(&out->vblank_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- out->vblank_hrtimer.function = &amdgpu_vkms_vblank_simulate;
out->period_ns = ktime_set(0, vblank->framedur_ns);
- hrtimer_start(&out->vblank_hrtimer, out->period_ns, HRTIMER_MODE_REL);
+ hrtimer_start(&amdgpu_crtc->vblank_timer, out->period_ns, HRTIMER_MODE_REL);
return 0;
}
static void amdgpu_vkms_disable_vblank(struct drm_crtc *crtc)
{
- struct amdgpu_vkms_output *out = drm_crtc_to_amdgpu_vkms_output(crtc);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- hrtimer_cancel(&out->vblank_hrtimer);
+ hrtimer_try_to_cancel(&amdgpu_crtc->vblank_timer);
}
static bool amdgpu_vkms_get_vblank_timestamp(struct drm_crtc *crtc,
@@ -88,17 +89,16 @@ static bool amdgpu_vkms_get_vblank_timestamp(struct drm_crtc *crtc,
ktime_t *vblank_time,
bool in_vblank_irq)
{
- struct drm_device *dev = crtc->dev;
- unsigned int pipe = crtc->index;
struct amdgpu_vkms_output *output = drm_crtc_to_amdgpu_vkms_output(crtc);
- struct drm_vblank_crtc *vblank = &dev->vblank[pipe];
+ struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
if (!READ_ONCE(vblank->enabled)) {
*vblank_time = ktime_get();
return true;
}
- *vblank_time = READ_ONCE(output->vblank_hrtimer.node.expires);
+ *vblank_time = READ_ONCE(amdgpu_crtc->vblank_timer.node.expires);
if (WARN_ON(*vblank_time == vblank->time))
return true;
@@ -142,15 +142,16 @@ static void amdgpu_vkms_crtc_atomic_disable(struct drm_crtc *crtc,
static void amdgpu_vkms_crtc_atomic_flush(struct drm_crtc *crtc,
struct drm_atomic_state *state)
{
+ unsigned long flags;
if (crtc->state->event) {
- spin_lock(&crtc->dev->event_lock);
+ spin_lock_irqsave(&crtc->dev->event_lock, flags);
if (drm_crtc_vblank_get(crtc) != 0)
drm_crtc_send_vblank_event(crtc, crtc->state->event);
else
drm_crtc_arm_vblank_event(crtc, crtc->state->event);
- spin_unlock(&crtc->dev->event_lock);
+ spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
crtc->state->event = NULL;
}
@@ -165,6 +166,8 @@ static const struct drm_crtc_helper_funcs amdgpu_vkms_crtc_helper_funcs = {
static int amdgpu_vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc,
struct drm_plane *primary, struct drm_plane *cursor)
{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
int ret;
ret = drm_crtc_init_with_planes(dev, crtc, primary, cursor,
@@ -176,6 +179,17 @@ static int amdgpu_vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc,
drm_crtc_helper_add(crtc, &amdgpu_vkms_crtc_helper_funcs);
+ amdgpu_crtc->crtc_id = drm_crtc_index(crtc);
+ adev->mode_info.crtcs[drm_crtc_index(crtc)] = amdgpu_crtc;
+
+ amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
+ amdgpu_crtc->encoder = NULL;
+ amdgpu_crtc->connector = NULL;
+ amdgpu_crtc->vsync_timer_enabled = AMDGPU_IRQ_STATE_DISABLE;
+
+ hrtimer_setup(&amdgpu_crtc->vblank_timer, &amdgpu_vkms_vblank_simulate, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
+
return ret;
}
@@ -222,6 +236,8 @@ static int amdgpu_vkms_conn_get_modes(struct drm_connector *connector)
for (i = 0; i < ARRAY_SIZE(common_modes); i++) {
mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false);
+ if (!mode)
+ continue;
drm_mode_probed_add(connector, mode);
}
@@ -266,8 +282,8 @@ static int amdgpu_vkms_plane_atomic_check(struct drm_plane *plane,
return PTR_ERR(crtc_state);
ret = drm_atomic_helper_check_plane_state(new_plane_state, crtc_state,
- DRM_PLANE_HELPER_NO_SCALING,
- DRM_PLANE_HELPER_NO_SCALING,
+ DRM_PLANE_NO_SCALING,
+ DRM_PLANE_NO_SCALING,
false, true);
if (ret != 0)
return ret;
@@ -286,9 +302,6 @@ static int amdgpu_vkms_prepare_fb(struct drm_plane *plane,
struct drm_gem_object *obj;
struct amdgpu_device *adev;
struct amdgpu_bo *rbo;
- struct list_head list;
- struct ttm_validate_buffer tv;
- struct ww_acquire_ctx ticket;
uint32_t domain;
int r;
@@ -297,61 +310,80 @@ static int amdgpu_vkms_prepare_fb(struct drm_plane *plane,
return 0;
}
afb = to_amdgpu_framebuffer(new_state->fb);
- obj = new_state->fb->obj[0];
+
+ obj = drm_gem_fb_get_obj(new_state->fb, 0);
+ if (!obj) {
+ DRM_ERROR("Failed to get obj from framebuffer\n");
+ return -EINVAL;
+ }
+
rbo = gem_to_amdgpu_bo(obj);
adev = amdgpu_ttm_adev(rbo->tbo.bdev);
- INIT_LIST_HEAD(&list);
-
- tv.bo = &rbo->tbo;
- tv.num_shared = 1;
- list_add(&tv.head, &list);
- r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
+ r = amdgpu_bo_reserve(rbo, true);
if (r) {
dev_err(adev->dev, "fail to reserve bo (%d)\n", r);
return r;
}
+ r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1);
+ if (r) {
+ dev_err(adev->dev, "allocating fence slot failed (%d)\n", r);
+ goto error_unlock;
+ }
+
if (plane->type != DRM_PLANE_TYPE_CURSOR)
domain = amdgpu_display_supported_domains(adev, rbo->flags);
else
domain = AMDGPU_GEM_DOMAIN_VRAM;
+ rbo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(rbo, domain);
if (unlikely(r != 0)) {
if (r != -ERESTARTSYS)
DRM_ERROR("Failed to pin framebuffer with error %d\n", r);
- ttm_eu_backoff_reservation(&ticket, &list);
- return r;
+ goto error_unlock;
}
r = amdgpu_ttm_alloc_gart(&rbo->tbo);
if (unlikely(r != 0)) {
- amdgpu_bo_unpin(rbo);
- ttm_eu_backoff_reservation(&ticket, &list);
DRM_ERROR("%p bind failed\n", rbo);
- return r;
+ goto error_unpin;
}
- ttm_eu_backoff_reservation(&ticket, &list);
+ amdgpu_bo_unreserve(rbo);
afb->address = amdgpu_bo_gpu_offset(rbo);
amdgpu_bo_ref(rbo);
return 0;
+
+error_unpin:
+ amdgpu_bo_unpin(rbo);
+
+error_unlock:
+ amdgpu_bo_unreserve(rbo);
+ return r;
}
static void amdgpu_vkms_cleanup_fb(struct drm_plane *plane,
struct drm_plane_state *old_state)
{
struct amdgpu_bo *rbo;
+ struct drm_gem_object *obj;
int r;
if (!old_state->fb)
return;
- rbo = gem_to_amdgpu_bo(old_state->fb->obj[0]);
+ obj = drm_gem_fb_get_obj(old_state->fb, 0);
+ if (!obj) {
+ DRM_ERROR("Failed to get obj from framebuffer\n");
+ return;
+ }
+
+ rbo = gem_to_amdgpu_bo(obj);
r = amdgpu_bo_reserve(rbo, false);
if (unlikely(r)) {
DRM_ERROR("failed to reserve rbo before unpin\n");
@@ -396,12 +428,12 @@ static struct drm_plane *amdgpu_vkms_plane_init(struct drm_device *dev,
return plane;
}
-int amdgpu_vkms_output_init(struct drm_device *dev,
- struct amdgpu_vkms_output *output, int index)
+static int amdgpu_vkms_output_init(struct drm_device *dev, struct
+ amdgpu_vkms_output *output, int index)
{
struct drm_connector *connector = &output->connector;
struct drm_encoder *encoder = &output->encoder;
- struct drm_crtc *crtc = &output->crtc;
+ struct drm_crtc *crtc = &output->crtc.base;
struct drm_plane *primary, *cursor = NULL;
int ret;
@@ -460,10 +492,15 @@ const struct drm_mode_config_funcs amdgpu_vkms_mode_funcs = {
.atomic_commit = drm_atomic_helper_commit,
};
-static int amdgpu_vkms_sw_init(void *handle)
+static int amdgpu_vkms_sw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->amdgpu_vkms_output = kcalloc(adev->mode_info.num_crtc,
+ sizeof(struct amdgpu_vkms_output), GFP_KERNEL);
+ if (!adev->amdgpu_vkms_output)
+ return -ENOMEM;
adev_to_drm(adev)->max_vblank_count = 0;
@@ -475,16 +512,12 @@ static int amdgpu_vkms_sw_init(void *handle)
adev_to_drm(adev)->mode_config.preferred_depth = 24;
adev_to_drm(adev)->mode_config.prefer_shadow = 1;
- adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base;
+ adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
r = amdgpu_display_modeset_create_props(adev);
if (r)
return r;
- adev->amdgpu_vkms_output = kcalloc(adev->mode_info.num_crtc, sizeof(struct amdgpu_vkms_output), GFP_KERNEL);
- if (!adev->amdgpu_vkms_output)
- return -ENOMEM;
-
/* allocate crtcs, encoders, connectors */
for (i = 0; i < adev->mode_info.num_crtc; i++) {
r = amdgpu_vkms_output_init(adev_to_drm(adev), &adev->amdgpu_vkms_output[i], i);
@@ -492,33 +525,38 @@ static int amdgpu_vkms_sw_init(void *handle)
return r;
}
+ r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
+ if (r)
+ return r;
+
drm_kms_helper_poll_init(adev_to_drm(adev));
adev->mode_info.mode_config_initialized = true;
return 0;
}
-static int amdgpu_vkms_sw_fini(void *handle)
+static int amdgpu_vkms_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i = 0;
for (i = 0; i < adev->mode_info.num_crtc; i++)
- if (adev->amdgpu_vkms_output[i].vblank_hrtimer.function)
- hrtimer_cancel(&adev->amdgpu_vkms_output[i].vblank_hrtimer);
-
- kfree(adev->mode_info.bios_hardcoded_edid);
- kfree(adev->amdgpu_vkms_output);
+ if (adev->mode_info.crtcs[i])
+ hrtimer_cancel(&adev->mode_info.crtcs[i]->vblank_timer);
drm_kms_helper_poll_fini(adev_to_drm(adev));
+ drm_mode_config_cleanup(adev_to_drm(adev));
adev->mode_info.mode_config_initialized = false;
+
+ drm_edid_free(adev->mode_info.bios_hardcoded_edid);
+ kfree(adev->amdgpu_vkms_output);
return 0;
}
-static int amdgpu_vkms_hw_init(void *handle)
+static int amdgpu_vkms_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
switch (adev->asic_type) {
#ifdef CONFIG_DRM_AMDGPU_SI
@@ -542,13 +580,6 @@ static int amdgpu_vkms_hw_init(void *handle)
case CHIP_TONGA:
dce_v10_0_disable_dce(adev);
break;
- case CHIP_CARRIZO:
- case CHIP_STONEY:
- case CHIP_POLARIS10:
- case CHIP_POLARIS11:
- case CHIP_VEGAM:
- dce_v11_0_disable_dce(adev);
- break;
case CHIP_TOPAZ:
#ifdef CONFIG_DRM_AMDGPU_SI
case CHIP_HAINAN:
@@ -561,55 +592,45 @@ static int amdgpu_vkms_hw_init(void *handle)
return 0;
}
-static int amdgpu_vkms_hw_fini(void *handle)
+static int amdgpu_vkms_hw_fini(struct amdgpu_ip_block *ip_block)
{
return 0;
}
-static int amdgpu_vkms_suspend(void *handle)
+static int amdgpu_vkms_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = drm_mode_config_helper_suspend(adev_to_drm(adev));
if (r)
return r;
- return amdgpu_vkms_hw_fini(handle);
+
+ return 0;
}
-static int amdgpu_vkms_resume(void *handle)
+static int amdgpu_vkms_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = amdgpu_vkms_hw_init(handle);
+ r = amdgpu_vkms_hw_init(ip_block);
if (r)
return r;
- return drm_mode_config_helper_resume(adev_to_drm(adev));
+ return drm_mode_config_helper_resume(adev_to_drm(ip_block->adev));
}
-static bool amdgpu_vkms_is_idle(void *handle)
+static bool amdgpu_vkms_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int amdgpu_vkms_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int amdgpu_vkms_soft_reset(void *handle)
-{
- return 0;
-}
-
-static int amdgpu_vkms_set_clockgating_state(void *handle,
+static int amdgpu_vkms_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int amdgpu_vkms_set_powergating_state(void *handle,
+static int amdgpu_vkms_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -617,8 +638,6 @@ static int amdgpu_vkms_set_powergating_state(void *handle,
static const struct amd_ip_funcs amdgpu_vkms_ip_funcs = {
.name = "amdgpu_vkms",
- .early_init = NULL,
- .late_init = NULL,
.sw_init = amdgpu_vkms_sw_init,
.sw_fini = amdgpu_vkms_sw_fini,
.hw_init = amdgpu_vkms_hw_init,
@@ -626,14 +645,11 @@ static const struct amd_ip_funcs amdgpu_vkms_ip_funcs = {
.suspend = amdgpu_vkms_suspend,
.resume = amdgpu_vkms_resume,
.is_idle = amdgpu_vkms_is_idle,
- .wait_for_idle = amdgpu_vkms_wait_for_idle,
- .soft_reset = amdgpu_vkms_soft_reset,
.set_clockgating_state = amdgpu_vkms_set_clockgating_state,
.set_powergating_state = amdgpu_vkms_set_powergating_state,
};
-const struct amdgpu_ip_block_version amdgpu_vkms_ip_block =
-{
+const struct amdgpu_ip_block_version amdgpu_vkms_ip_block = {
.type = AMD_IP_BLOCK_TYPE_DCE,
.major = 1,
.minor = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h
index 97f1b79c0724..4f8722ff37c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h
@@ -10,15 +10,14 @@
#define YRES_MAX 16384
#define drm_crtc_to_amdgpu_vkms_output(target) \
- container_of(target, struct amdgpu_vkms_output, crtc)
+ container_of(target, struct amdgpu_vkms_output, crtc.base)
extern const struct amdgpu_ip_block_version amdgpu_vkms_ip_block;
struct amdgpu_vkms_output {
- struct drm_crtc crtc;
+ struct amdgpu_crtc crtc;
struct drm_encoder encoder;
struct drm_connector connector;
- struct hrtimer vblank_hrtimer;
ktime_t period_ns;
struct drm_pending_vblank_event *event;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0e7dc23f78e7..a67285118c37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -33,7 +33,10 @@
#include <drm/amdgpu_drm.h>
#include <drm/drm_drv.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/drm_exec.h>
#include "amdgpu.h"
+#include "amdgpu_vm.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_gmc.h"
@@ -45,22 +48,43 @@
/**
* DOC: GPUVM
*
- * GPUVM is similar to the legacy gart on older asics, however
- * rather than there being a single global gart table
- * for the entire GPU, there are multiple VM page tables active
- * at any given time. The VM page tables can contain a mix
- * vram pages and system memory pages and system memory pages
+ * GPUVM is the MMU functionality provided on the GPU.
+ * GPUVM is similar to the legacy GART on older asics, however
+ * rather than there being a single global GART table
+ * for the entire GPU, there can be multiple GPUVM page tables active
+ * at any given time. The GPUVM page tables can contain a mix
+ * VRAM pages and system pages (both memory and MMIO) and system pages
* can be mapped as snooped (cached system pages) or unsnooped
* (uncached system pages).
- * Each VM has an ID associated with it and there is a page table
- * associated with each VMID. When execting a command buffer,
- * the kernel tells the the ring what VMID to use for that command
+ *
+ * Each active GPUVM has an ID associated with it and there is a page table
+ * linked with each VMID. When executing a command buffer,
+ * the kernel tells the engine what VMID to use for that command
* buffer. VMIDs are allocated dynamically as commands are submitted.
* The userspace drivers maintain their own address space and the kernel
* sets up their pages tables accordingly when they submit their
* command buffers and a VMID is assigned.
- * Cayman/Trinity support up to 8 active VMs at any given time;
- * SI supports 16.
+ * The hardware supports up to 16 active GPUVMs at any given time.
+ *
+ * Each GPUVM is represented by a 1-2 or 1-5 level page table, depending
+ * on the ASIC family. GPUVM supports RWX attributes on each page as well
+ * as other features such as encryption and caching attributes.
+ *
+ * VMID 0 is special. It is the GPUVM used for the kernel driver. In
+ * addition to an aperture managed by a page table, VMID 0 also has
+ * several other apertures. There is an aperture for direct access to VRAM
+ * and there is a legacy AGP aperture which just forwards accesses directly
+ * to the matching system physical addresses (or IOVAs when an IOMMU is
+ * present). These apertures provide direct access to these memories without
+ * incurring the overhead of a page table. VMID 0 is used by the kernel
+ * driver for tasks like memory management.
+ *
+ * GPU clients (i.e., engines on the GPU) use GPUVM VMIDs to access memory.
+ * For user applications, each application can have their own unique GPUVM
+ * address space. The application manages the address space and the kernel
+ * driver manages the GPUVM page tables for each process. If an GPU client
+ * accesses an invalid page, it will generate a GPU page fault, similar to
+ * accessing an invalid page on a CPU.
*/
#define START(node) ((node)->start)
@@ -89,171 +113,29 @@ struct amdgpu_prt_cb {
};
/**
- * amdgpu_vm_set_pasid - manage pasid and vm ptr mapping
- *
- * @adev: amdgpu_device pointer
- * @vm: amdgpu_vm pointer
- * @pasid: the pasid the VM is using on this GPU
- *
- * Set the pasid this VM is using on this GPU, can also be used to remove the
- * pasid by passing in zero.
- *
- */
-int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- u32 pasid)
-{
- int r;
-
- if (vm->pasid == pasid)
- return 0;
-
- if (vm->pasid) {
- r = xa_err(xa_erase_irq(&adev->vm_manager.pasids, vm->pasid));
- if (r < 0)
- return r;
-
- vm->pasid = 0;
- }
-
- if (pasid) {
- r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm,
- GFP_KERNEL));
- if (r < 0)
- return r;
-
- vm->pasid = pasid;
- }
-
-
- return 0;
-}
-
-/*
- * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
- * happens while holding this lock anywhere to prevent deadlocks when
- * an MMU notifier runs in reclaim-FS context.
- */
-static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm)
-{
- mutex_lock(&vm->eviction_lock);
- vm->saved_flags = memalloc_noreclaim_save();
-}
-
-static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm)
-{
- if (mutex_trylock(&vm->eviction_lock)) {
- vm->saved_flags = memalloc_noreclaim_save();
- return 1;
- }
- return 0;
-}
-
-static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
-{
- memalloc_noreclaim_restore(vm->saved_flags);
- mutex_unlock(&vm->eviction_lock);
-}
-
-/**
- * amdgpu_vm_level_shift - return the addr shift for each level
- *
- * @adev: amdgpu_device pointer
- * @level: VMPT level
- *
- * Returns:
- * The number of bits the pfn needs to be right shifted for a level.
- */
-static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,
- unsigned level)
-{
- switch (level) {
- case AMDGPU_VM_PDB2:
- case AMDGPU_VM_PDB1:
- case AMDGPU_VM_PDB0:
- return 9 * (AMDGPU_VM_PDB0 - level) +
- adev->vm_manager.block_size;
- case AMDGPU_VM_PTB:
- return 0;
- default:
- return ~0;
- }
-}
-
-/**
- * amdgpu_vm_num_entries - return the number of entries in a PD/PT
- *
- * @adev: amdgpu_device pointer
- * @level: VMPT level
- *
- * Returns:
- * The number of entries in a page directory or page table.
- */
-static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
- unsigned level)
-{
- unsigned shift = amdgpu_vm_level_shift(adev,
- adev->vm_manager.root_level);
-
- if (level == adev->vm_manager.root_level)
- /* For the root directory */
- return round_up(adev->vm_manager.max_pfn, 1ULL << shift)
- >> shift;
- else if (level != AMDGPU_VM_PTB)
- /* Everything in between */
- return 512;
- else
- /* For the page tables on the leaves */
- return AMDGPU_VM_PTE_COUNT(adev);
-}
-
-/**
- * amdgpu_vm_num_ats_entries - return the number of ATS entries in the root PD
- *
- * @adev: amdgpu_device pointer
- *
- * Returns:
- * The number of entries in the root page directory which needs the ATS setting.
+ * struct amdgpu_vm_tlb_seq_struct - Helper to increment the TLB flush sequence
*/
-static unsigned amdgpu_vm_num_ats_entries(struct amdgpu_device *adev)
-{
- unsigned shift;
-
- shift = amdgpu_vm_level_shift(adev, adev->vm_manager.root_level);
- return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT);
-}
+struct amdgpu_vm_tlb_seq_struct {
+ /**
+ * @vm: pointer to the amdgpu_vm structure to set the fence sequence on
+ */
+ struct amdgpu_vm *vm;
-/**
- * amdgpu_vm_entries_mask - the mask to get the entry number of a PD/PT
- *
- * @adev: amdgpu_device pointer
- * @level: VMPT level
- *
- * Returns:
- * The mask to extract the entry number of a PD/PT from an address.
- */
-static uint32_t amdgpu_vm_entries_mask(struct amdgpu_device *adev,
- unsigned int level)
-{
- if (level <= adev->vm_manager.root_level)
- return 0xffffffff;
- else if (level != AMDGPU_VM_PTB)
- return 0x1ff;
- else
- return AMDGPU_VM_PTE_COUNT(adev) - 1;
-}
+ /**
+ * @cb: callback
+ */
+ struct dma_fence_cb cb;
+};
/**
- * amdgpu_vm_bo_size - returns the size of the BOs in bytes
- *
- * @adev: amdgpu_device pointer
- * @level: VMPT level
+ * amdgpu_vm_assert_locked - check if VM is correctly locked
+ * @vm: the VM which schould be tested
*
- * Returns:
- * The size of the BO for a page directory or page table in bytes.
+ * Asserts that the VM root PD is locked.
*/
-static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)
+static void amdgpu_vm_assert_locked(struct amdgpu_vm *vm)
{
- return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8);
+ dma_resv_assert_held(vm->root.bo->tbo.base.resv);
}
/**
@@ -270,10 +152,13 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
struct amdgpu_bo *bo = vm_bo->bo;
vm_bo->moved = true;
+ amdgpu_vm_assert_locked(vm);
+ spin_lock(&vm_bo->vm->status_lock);
if (bo->tbo.type == ttm_bo_type_kernel)
list_move(&vm_bo->vm_status, &vm->evicted);
else
list_move_tail(&vm_bo->vm_status, &vm->evicted);
+ spin_unlock(&vm_bo->vm->status_lock);
}
/**
* amdgpu_vm_bo_moved - vm_bo is moved
@@ -285,7 +170,10 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
{
+ amdgpu_vm_assert_locked(vm_bo->vm);
+ spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
+ spin_unlock(&vm_bo->vm->status_lock);
}
/**
@@ -298,7 +186,10 @@ static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
{
+ amdgpu_vm_assert_locked(vm_bo->vm);
+ spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->idle);
+ spin_unlock(&vm_bo->vm->status_lock);
vm_bo->moved = false;
}
@@ -312,9 +203,25 @@ static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
{
- spin_lock(&vm_bo->vm->invalidated_lock);
+ spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated);
- spin_unlock(&vm_bo->vm->invalidated_lock);
+ spin_unlock(&vm_bo->vm->status_lock);
+}
+
+/**
+ * amdgpu_vm_bo_evicted_user - vm_bo is evicted
+ *
+ * @vm_bo: vm_bo which is evicted
+ *
+ * State for BOs used by user mode queues which are not at the location they
+ * should be.
+ */
+static void amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base *vm_bo)
+{
+ vm_bo->moved = true;
+ spin_lock(&vm_bo->vm->status_lock);
+ list_move(&vm_bo->vm_status, &vm_bo->vm->evicted_user);
+ spin_unlock(&vm_bo->vm->status_lock);
}
/**
@@ -327,10 +234,14 @@ static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
{
- if (vm_bo->bo->parent)
+ amdgpu_vm_assert_locked(vm_bo->vm);
+ if (vm_bo->bo->parent) {
+ spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
- else
+ spin_unlock(&vm_bo->vm->status_lock);
+ } else {
amdgpu_vm_bo_idle(vm_bo);
+ }
}
/**
@@ -343,330 +254,256 @@ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo)
{
- spin_lock(&vm_bo->vm->invalidated_lock);
+ amdgpu_vm_assert_locked(vm_bo->vm);
+ spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->done);
- spin_unlock(&vm_bo->vm->invalidated_lock);
+ spin_unlock(&vm_bo->vm->status_lock);
}
/**
- * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
- *
- * @base: base structure for tracking BO usage in a VM
- * @vm: vm to which bo is to be added
- * @bo: amdgpu buffer object
- *
- * Initialize a bo_va_base structure and add it to the appropriate lists
+ * amdgpu_vm_bo_reset_state_machine - reset the vm_bo state machine
+ * @vm: the VM which state machine to reset
*
+ * Move all vm_bo object in the VM into a state where they will be updated
+ * again during validation.
*/
-static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
- struct amdgpu_vm *vm,
- struct amdgpu_bo *bo)
+static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm)
{
- base->vm = vm;
- base->bo = bo;
- base->next = NULL;
- INIT_LIST_HEAD(&base->vm_status);
-
- if (!bo)
- return;
- base->next = bo->vm_bo;
- bo->vm_bo = base;
+ struct amdgpu_vm_bo_base *vm_bo, *tmp;
- if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv)
- return;
+ amdgpu_vm_assert_locked(vm);
- vm->bulk_moveable = false;
- if (bo->tbo.type == ttm_bo_type_kernel && bo->parent)
- amdgpu_vm_bo_relocated(base);
- else
- amdgpu_vm_bo_idle(base);
+ spin_lock(&vm->status_lock);
+ list_splice_init(&vm->done, &vm->invalidated);
+ list_for_each_entry(vm_bo, &vm->invalidated, vm_status)
+ vm_bo->moved = true;
- if (bo->preferred_domains &
- amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type))
- return;
+ list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) {
+ struct amdgpu_bo *bo = vm_bo->bo;
- /*
- * we checked all the prerequisites, but it looks like this per vm bo
- * is currently evicted. add the bo to the evicted list to make sure it
- * is validated on next vm use to avoid fault.
- * */
- amdgpu_vm_bo_evicted(base);
+ vm_bo->moved = true;
+ if (!bo || bo->tbo.type != ttm_bo_type_kernel)
+ list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
+ else if (bo->parent)
+ list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
+ }
+ spin_unlock(&vm->status_lock);
}
/**
- * amdgpu_vm_pt_parent - get the parent page directory
- *
- * @pt: child page table
+ * amdgpu_vm_update_shared - helper to update shared memory stat
+ * @base: base structure for tracking BO usage in a VM
*
- * Helper to get the parent entry for the child page table. NULL if we are at
- * the root page directory.
- */
-static struct amdgpu_vm_bo_base *amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base *pt)
-{
- struct amdgpu_bo *parent = pt->bo->parent;
-
- if (!parent)
- return NULL;
-
- return parent->vm_bo;
+ * Takes the vm status_lock and updates the shared memory stat. If the basic
+ * stat changed (e.g. buffer was moved) amdgpu_vm_update_stats need to be called
+ * as well.
+ */
+static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base)
+{
+ struct amdgpu_vm *vm = base->vm;
+ struct amdgpu_bo *bo = base->bo;
+ uint64_t size = amdgpu_bo_size(bo);
+ uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo);
+ bool shared;
+
+ dma_resv_assert_held(bo->tbo.base.resv);
+ spin_lock(&vm->status_lock);
+ shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
+ if (base->shared != shared) {
+ base->shared = shared;
+ if (shared) {
+ vm->stats[bo_memtype].drm.shared += size;
+ vm->stats[bo_memtype].drm.private -= size;
+ } else {
+ vm->stats[bo_memtype].drm.shared -= size;
+ vm->stats[bo_memtype].drm.private += size;
+ }
+ }
+ spin_unlock(&vm->status_lock);
}
-/*
- * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt
- */
-struct amdgpu_vm_pt_cursor {
- uint64_t pfn;
- struct amdgpu_vm_bo_base *parent;
- struct amdgpu_vm_bo_base *entry;
- unsigned level;
-};
-
/**
- * amdgpu_vm_pt_start - start PD/PT walk
- *
- * @adev: amdgpu_device pointer
- * @vm: amdgpu_vm structure
- * @start: start address of the walk
- * @cursor: state to initialize
+ * amdgpu_vm_bo_update_shared - callback when bo gets shared/unshared
+ * @bo: amdgpu buffer object
*
- * Initialize a amdgpu_vm_pt_cursor to start a walk.
+ * Update the per VM stats for all the vm if needed from private to shared or
+ * vice versa.
*/
-static void amdgpu_vm_pt_start(struct amdgpu_device *adev,
- struct amdgpu_vm *vm, uint64_t start,
- struct amdgpu_vm_pt_cursor *cursor)
+void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo)
{
- cursor->pfn = start;
- cursor->parent = NULL;
- cursor->entry = &vm->root;
- cursor->level = adev->vm_manager.root_level;
+ struct amdgpu_vm_bo_base *base;
+
+ for (base = bo->vm_bo; base; base = base->next)
+ amdgpu_vm_update_shared(base);
}
/**
- * amdgpu_vm_pt_descendant - go to child node
- *
- * @adev: amdgpu_device pointer
- * @cursor: current state
+ * amdgpu_vm_update_stats_locked - helper to update normal memory stat
+ * @base: base structure for tracking BO usage in a VM
+ * @res: the ttm_resource to use for the purpose of accounting, may or may not
+ * be bo->tbo.resource
+ * @sign: if we should add (+1) or subtract (-1) from the stat
*
- * Walk to the child node of the current node.
- * Returns:
- * True if the walk was possible, false otherwise.
+ * Caller need to have the vm status_lock held. Useful for when multiple update
+ * need to happen at the same time.
*/
-static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev,
- struct amdgpu_vm_pt_cursor *cursor)
+static void amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base *base,
+ struct ttm_resource *res, int sign)
{
- unsigned mask, shift, idx;
+ struct amdgpu_vm *vm = base->vm;
+ struct amdgpu_bo *bo = base->bo;
+ int64_t size = sign * amdgpu_bo_size(bo);
+ uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo);
- if ((cursor->level == AMDGPU_VM_PTB) || !cursor->entry ||
- !cursor->entry->bo)
- return false;
+ /* For drm-total- and drm-shared-, BO are accounted by their preferred
+ * placement, see also amdgpu_bo_mem_stats_placement.
+ */
+ if (base->shared)
+ vm->stats[bo_memtype].drm.shared += size;
+ else
+ vm->stats[bo_memtype].drm.private += size;
- mask = amdgpu_vm_entries_mask(adev, cursor->level);
- shift = amdgpu_vm_level_shift(adev, cursor->level);
+ if (res && res->mem_type < __AMDGPU_PL_NUM) {
+ uint32_t res_memtype = res->mem_type;
- ++cursor->level;
- idx = (cursor->pfn >> shift) & mask;
- cursor->parent = cursor->entry;
- cursor->entry = &to_amdgpu_bo_vm(cursor->entry->bo)->entries[idx];
- return true;
+ vm->stats[res_memtype].drm.resident += size;
+ /* BO only count as purgeable if it is resident,
+ * since otherwise there's nothing to purge.
+ */
+ if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE)
+ vm->stats[res_memtype].drm.purgeable += size;
+ if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(res_memtype)))
+ vm->stats[bo_memtype].evicted += size;
+ }
}
/**
- * amdgpu_vm_pt_sibling - go to sibling node
- *
- * @adev: amdgpu_device pointer
- * @cursor: current state
+ * amdgpu_vm_update_stats - helper to update normal memory stat
+ * @base: base structure for tracking BO usage in a VM
+ * @res: the ttm_resource to use for the purpose of accounting, may or may not
+ * be bo->tbo.resource
+ * @sign: if we should add (+1) or subtract (-1) from the stat
*
- * Walk to the sibling node of the current node.
- * Returns:
- * True if the walk was possible, false otherwise.
+ * Updates the basic memory stat when bo is added/deleted/moved.
*/
-static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev,
- struct amdgpu_vm_pt_cursor *cursor)
+void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base,
+ struct ttm_resource *res, int sign)
{
- unsigned shift, num_entries;
-
- /* Root doesn't have a sibling */
- if (!cursor->parent)
- return false;
-
- /* Go to our parents and see if we got a sibling */
- shift = amdgpu_vm_level_shift(adev, cursor->level - 1);
- num_entries = amdgpu_vm_num_entries(adev, cursor->level - 1);
+ struct amdgpu_vm *vm = base->vm;
- if (cursor->entry == &to_amdgpu_bo_vm(cursor->parent->bo)->entries[num_entries - 1])
- return false;
-
- cursor->pfn += 1ULL << shift;
- cursor->pfn &= ~((1ULL << shift) - 1);
- ++cursor->entry;
- return true;
+ spin_lock(&vm->status_lock);
+ amdgpu_vm_update_stats_locked(base, res, sign);
+ spin_unlock(&vm->status_lock);
}
/**
- * amdgpu_vm_pt_ancestor - go to parent node
+ * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
+ *
+ * @base: base structure for tracking BO usage in a VM
+ * @vm: vm to which bo is to be added
+ * @bo: amdgpu buffer object
*
- * @cursor: current state
+ * Initialize a bo_va_base structure and add it to the appropriate lists
*
- * Walk to the parent node of the current node.
- * Returns:
- * True if the walk was possible, false otherwise.
*/
-static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor)
+void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
+ struct amdgpu_vm *vm, struct amdgpu_bo *bo)
{
- if (!cursor->parent)
- return false;
+ base->vm = vm;
+ base->bo = bo;
+ base->next = NULL;
+ INIT_LIST_HEAD(&base->vm_status);
- --cursor->level;
- cursor->entry = cursor->parent;
- cursor->parent = amdgpu_vm_pt_parent(cursor->parent);
- return true;
-}
+ if (!bo)
+ return;
+ base->next = bo->vm_bo;
+ bo->vm_bo = base;
-/**
- * amdgpu_vm_pt_next - get next PD/PT in hieratchy
- *
- * @adev: amdgpu_device pointer
- * @cursor: current state
- *
- * Walk the PD/PT tree to the next node.
- */
-static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
- struct amdgpu_vm_pt_cursor *cursor)
-{
- /* First try a newborn child */
- if (amdgpu_vm_pt_descendant(adev, cursor))
+ spin_lock(&vm->status_lock);
+ base->shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
+ amdgpu_vm_update_stats_locked(base, bo->tbo.resource, +1);
+ spin_unlock(&vm->status_lock);
+
+ if (!amdgpu_vm_is_bo_always_valid(vm, bo))
return;
- /* If that didn't worked try to find a sibling */
- while (!amdgpu_vm_pt_sibling(adev, cursor)) {
- /* No sibling, go to our parents and grandparents */
- if (!amdgpu_vm_pt_ancestor(cursor)) {
- cursor->pfn = ~0ll;
- return;
- }
- }
-}
+ dma_resv_assert_held(vm->root.bo->tbo.base.resv);
-/**
- * amdgpu_vm_pt_first_dfs - start a deep first search
- *
- * @adev: amdgpu_device structure
- * @vm: amdgpu_vm structure
- * @start: optional cursor to start with
- * @cursor: state to initialize
- *
- * Starts a deep first traversal of the PD/PT tree.
- */
-static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_vm_pt_cursor *start,
- struct amdgpu_vm_pt_cursor *cursor)
-{
- if (start)
- *cursor = *start;
+ ttm_bo_set_bulk_move(&bo->tbo, &vm->lru_bulk_move);
+ if (bo->tbo.type == ttm_bo_type_kernel && bo->parent)
+ amdgpu_vm_bo_relocated(base);
else
- amdgpu_vm_pt_start(adev, vm, 0, cursor);
- while (amdgpu_vm_pt_descendant(adev, cursor));
-}
-
-/**
- * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue
- *
- * @start: starting point for the search
- * @entry: current entry
- *
- * Returns:
- * True when the search should continue, false otherwise.
- */
-static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start,
- struct amdgpu_vm_bo_base *entry)
-{
- return entry && (!start || entry != start->entry);
-}
+ amdgpu_vm_bo_idle(base);
-/**
- * amdgpu_vm_pt_next_dfs - get the next node for a deep first search
- *
- * @adev: amdgpu_device structure
- * @cursor: current state
- *
- * Move the cursor to the next node in a deep first search.
- */
-static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev,
- struct amdgpu_vm_pt_cursor *cursor)
-{
- if (!cursor->entry)
+ if (bo->preferred_domains &
+ amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type))
return;
- if (!cursor->parent)
- cursor->entry = NULL;
- else if (amdgpu_vm_pt_sibling(adev, cursor))
- while (amdgpu_vm_pt_descendant(adev, cursor));
- else
- amdgpu_vm_pt_ancestor(cursor);
+ /*
+ * we checked all the prerequisites, but it looks like this per vm bo
+ * is currently evicted. add the bo to the evicted list to make sure it
+ * is validated on next vm use to avoid fault.
+ * */
+ amdgpu_vm_bo_evicted(base);
}
-/*
- * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs
- */
-#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \
- for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \
- (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\
- amdgpu_vm_pt_continue_dfs((start), (entry)); \
- (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor)))
-
/**
- * amdgpu_vm_get_pd_bo - add the VM PD to a validation list
+ * amdgpu_vm_lock_pd - lock PD in drm_exec
*
* @vm: vm providing the BOs
- * @validated: head of validation list
- * @entry: entry to add
+ * @exec: drm execution context
+ * @num_fences: number of extra fences to reserve
*
- * Add the page directory to the list of BOs to
- * validate for command submission.
+ * Lock the VM root PD in the DRM execution context.
*/
-void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
- struct list_head *validated,
- struct amdgpu_bo_list_entry *entry)
+int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,
+ unsigned int num_fences)
{
- entry->priority = 0;
- entry->tv.bo = &vm->root.bo->tbo;
- /* Two for VM updates, one for TTM and one for the CS job */
- entry->tv.num_shared = 4;
- entry->user_pages = NULL;
- list_add(&entry->tv.head, validated);
+ /* We need at least two fences for the VM PD/PT updates */
+ return drm_exec_prepare_obj(exec, &vm->root.bo->tbo.base,
+ 2 + num_fences);
}
/**
- * amdgpu_vm_del_from_lru_notify - update bulk_moveable flag
- *
- * @bo: BO which was removed from the LRU
+ * amdgpu_vm_lock_done_list - lock all BOs on the done list
+ * @vm: vm providing the BOs
+ * @exec: drm execution context
+ * @num_fences: number of extra fences to reserve
*
- * Make sure the bulk_moveable flag is updated when a BO is removed from the
- * LRU.
+ * Lock the BOs on the done list in the DRM execution context.
*/
-void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo)
+int amdgpu_vm_lock_done_list(struct amdgpu_vm *vm, struct drm_exec *exec,
+ unsigned int num_fences)
{
- struct amdgpu_bo *abo;
- struct amdgpu_vm_bo_base *bo_base;
+ struct list_head *prev = &vm->done;
+ struct amdgpu_bo_va *bo_va;
+ struct amdgpu_bo *bo;
+ int ret;
- if (!amdgpu_bo_is_amdgpu_bo(bo))
- return;
+ /* We can only trust prev->next while holding the lock */
+ spin_lock(&vm->status_lock);
+ while (!list_is_head(prev->next, &vm->done)) {
+ bo_va = list_entry(prev->next, typeof(*bo_va), base.vm_status);
- if (bo->pin_count)
- return;
+ bo = bo_va->base.bo;
+ if (bo) {
+ amdgpu_bo_ref(bo);
+ spin_unlock(&vm->status_lock);
- abo = ttm_to_amdgpu_bo(bo);
- if (!abo->parent)
- return;
- for (bo_base = abo->vm_bo; bo_base; bo_base = bo_base->next) {
- struct amdgpu_vm *vm = bo_base->vm;
+ ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 1);
+ amdgpu_bo_unref(&bo);
+ if (unlikely(ret))
+ return ret;
- if (abo->tbo.base.resv == vm->root.bo->tbo.base.resv)
- vm->bulk_moveable = false;
+ spin_lock(&vm->status_lock);
+ }
+ prev = prev->next;
}
+ spin_unlock(&vm->status_lock);
+ return 0;
}
+
/**
* amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU
*
@@ -679,71 +516,110 @@ void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo)
void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
{
- struct amdgpu_vm_bo_base *bo_base;
+ spin_lock(&adev->mman.bdev.lru_lock);
+ ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
+ spin_unlock(&adev->mman.bdev.lru_lock);
+}
- if (vm->bulk_moveable) {
- spin_lock(&adev->mman.bdev.lru_lock);
- ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
- spin_unlock(&adev->mman.bdev.lru_lock);
- return;
- }
+/* Create scheduler entities for page table updates */
+static int amdgpu_vm_init_entities(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+ int r;
- memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
+ r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL,
+ adev->vm_manager.vm_pte_scheds,
+ adev->vm_manager.vm_pte_num_scheds, NULL);
+ if (r)
+ goto error;
- spin_lock(&adev->mman.bdev.lru_lock);
- list_for_each_entry(bo_base, &vm->idle, vm_status) {
- struct amdgpu_bo *bo = bo_base->bo;
- struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo);
+ return drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL,
+ adev->vm_manager.vm_pte_scheds,
+ adev->vm_manager.vm_pte_num_scheds, NULL);
- if (!bo->parent)
- continue;
+error:
+ drm_sched_entity_destroy(&vm->immediate);
+ return r;
+}
- ttm_bo_move_to_lru_tail(&bo->tbo, bo->tbo.resource,
- &vm->lru_bulk_move);
- if (shadow)
- ttm_bo_move_to_lru_tail(&shadow->tbo,
- shadow->tbo.resource,
- &vm->lru_bulk_move);
- }
- spin_unlock(&adev->mman.bdev.lru_lock);
+/* Destroy the entities for page table updates again */
+static void amdgpu_vm_fini_entities(struct amdgpu_vm *vm)
+{
+ drm_sched_entity_destroy(&vm->immediate);
+ drm_sched_entity_destroy(&vm->delayed);
+}
- vm->bulk_moveable = true;
+/**
+ * amdgpu_vm_generation - return the page table re-generation counter
+ * @adev: the amdgpu_device
+ * @vm: optional VM to check, might be NULL
+ *
+ * Returns a page table re-generation token to allow checking if submissions
+ * are still valid to use this VM. The VM parameter might be NULL in which case
+ * just the VRAM lost counter will be used.
+ */
+uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+ uint64_t result = (u64)atomic_read(&adev->vram_lost_counter) << 32;
+
+ if (!vm)
+ return result;
+
+ result += lower_32_bits(vm->generation);
+ /* Add one if the page tables will be re-generated on next CS */
+ if (drm_sched_entity_error(&vm->delayed))
+ ++result;
+
+ return result;
}
/**
- * amdgpu_vm_validate_pt_bos - validate the page table BOs
+ * amdgpu_vm_validate - validate evicted BOs tracked in the VM
*
* @adev: amdgpu device pointer
* @vm: vm providing the BOs
+ * @ticket: optional reservation ticket used to reserve the VM
* @validate: callback to do the validation
* @param: parameter for the validation callback
*
- * Validate the page table BOs on command submission if neccessary.
+ * Validate the page table BOs and per-VM BOs on command submission if
+ * necessary. If a ticket is given, also try to validate evicted user queue
+ * BOs. They must already be reserved with the given ticket.
*
* Returns:
* Validation result.
*/
-int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- int (*validate)(void *p, struct amdgpu_bo *bo),
- void *param)
+int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct ww_acquire_ctx *ticket,
+ int (*validate)(void *p, struct amdgpu_bo *bo),
+ void *param)
{
- struct amdgpu_vm_bo_base *bo_base, *tmp;
+ uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm);
+ struct amdgpu_vm_bo_base *bo_base;
+ struct amdgpu_bo *bo;
int r;
- vm->bulk_moveable &= list_empty(&vm->evicted);
+ if (vm->generation != new_vm_generation) {
+ vm->generation = new_vm_generation;
+ amdgpu_vm_bo_reset_state_machine(vm);
+ amdgpu_vm_fini_entities(vm);
+ r = amdgpu_vm_init_entities(adev, vm);
+ if (r)
+ return r;
+ }
+
+ spin_lock(&vm->status_lock);
+ while (!list_empty(&vm->evicted)) {
+ bo_base = list_first_entry(&vm->evicted,
+ struct amdgpu_vm_bo_base,
+ vm_status);
+ spin_unlock(&vm->status_lock);
- list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
- struct amdgpu_bo *bo = bo_base->bo;
- struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo);
+ bo = bo_base->bo;
r = validate(param, bo);
if (r)
return r;
- if (shadow) {
- r = validate(param, shadow);
- if (r)
- return r;
- }
if (bo->tbo.type != ttm_bo_type_kernel) {
amdgpu_vm_bo_moved(bo_base);
@@ -751,334 +627,67 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
vm->update_funcs->map_table(to_amdgpu_bo_vm(bo));
amdgpu_vm_bo_relocated(bo_base);
}
+ spin_lock(&vm->status_lock);
}
+ while (ticket && !list_empty(&vm->evicted_user)) {
+ bo_base = list_first_entry(&vm->evicted_user,
+ struct amdgpu_vm_bo_base,
+ vm_status);
+ spin_unlock(&vm->status_lock);
- amdgpu_vm_eviction_lock(vm);
- vm->evicting = false;
- amdgpu_vm_eviction_unlock(vm);
-
- return 0;
-}
-
-/**
- * amdgpu_vm_ready - check VM is ready for updates
- *
- * @vm: VM to check
- *
- * Check if all VM PDs/PTs are ready for updates
- *
- * Returns:
- * True if eviction list is empty.
- */
-bool amdgpu_vm_ready(struct amdgpu_vm *vm)
-{
- return list_empty(&vm->evicted);
-}
-
-/**
- * amdgpu_vm_clear_bo - initially clear the PDs/PTs
- *
- * @adev: amdgpu_device pointer
- * @vm: VM to clear BO from
- * @vmbo: BO to clear
- * @immediate: use an immediate update
- *
- * Root PD needs to be reserved when calling this.
- *
- * Returns:
- * 0 on success, errno otherwise.
- */
-static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_bo_vm *vmbo,
- bool immediate)
-{
- struct ttm_operation_ctx ctx = { true, false };
- unsigned level = adev->vm_manager.root_level;
- struct amdgpu_vm_update_params params;
- struct amdgpu_bo *ancestor = &vmbo->bo;
- struct amdgpu_bo *bo = &vmbo->bo;
- unsigned entries, ats_entries;
- uint64_t addr;
- int r, idx;
-
- /* Figure out our place in the hierarchy */
- if (ancestor->parent) {
- ++level;
- while (ancestor->parent->parent) {
- ++level;
- ancestor = ancestor->parent;
- }
- }
-
- entries = amdgpu_bo_size(bo) / 8;
- if (!vm->pte_support_ats) {
- ats_entries = 0;
-
- } else if (!bo->parent) {
- ats_entries = amdgpu_vm_num_ats_entries(adev);
- ats_entries = min(ats_entries, entries);
- entries -= ats_entries;
-
- } else {
- struct amdgpu_vm_bo_base *pt;
-
- pt = ancestor->vm_bo;
- ats_entries = amdgpu_vm_num_ats_entries(adev);
- if ((pt - to_amdgpu_bo_vm(vm->root.bo)->entries) >= ats_entries) {
- ats_entries = 0;
- } else {
- ats_entries = entries;
- entries = 0;
- }
- }
-
- r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
- if (r)
- return r;
-
- if (vmbo->shadow) {
- struct amdgpu_bo *shadow = vmbo->shadow;
+ bo = bo_base->bo;
+ dma_resv_assert_held(bo->tbo.base.resv);
- r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx);
+ r = validate(param, bo);
if (r)
return r;
- }
-
- if (!drm_dev_enter(adev_to_drm(adev), &idx))
- return -ENODEV;
-
- r = vm->update_funcs->map_table(vmbo);
- if (r)
- goto exit;
-
- memset(&params, 0, sizeof(params));
- params.adev = adev;
- params.vm = vm;
- params.immediate = immediate;
-
- r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
- if (r)
- goto exit;
- addr = 0;
- if (ats_entries) {
- uint64_t value = 0, flags;
+ amdgpu_vm_bo_invalidated(bo_base);
- flags = AMDGPU_PTE_DEFAULT_ATC;
- if (level != AMDGPU_VM_PTB) {
- /* Handle leaf PDEs as PTEs */
- flags |= AMDGPU_PDE_PTE;
- amdgpu_gmc_get_vm_pde(adev, level, &value, &flags);
- }
-
- r = vm->update_funcs->update(&params, vmbo, addr, 0, ats_entries,
- value, flags);
- if (r)
- goto exit;
-
- addr += ats_entries * 8;
+ spin_lock(&vm->status_lock);
}
+ spin_unlock(&vm->status_lock);
- if (entries) {
- uint64_t value = 0, flags = 0;
-
- if (adev->asic_type >= CHIP_VEGA10) {
- if (level != AMDGPU_VM_PTB) {
- /* Handle leaf PDEs as PTEs */
- flags |= AMDGPU_PDE_PTE;
- amdgpu_gmc_get_vm_pde(adev, level,
- &value, &flags);
- } else {
- /* Workaround for fault priority problem on GMC9 */
- flags = AMDGPU_PTE_EXECUTABLE;
- }
- }
-
- r = vm->update_funcs->update(&params, vmbo, addr, 0, entries,
- value, flags);
- if (r)
- goto exit;
- }
-
- r = vm->update_funcs->commit(&params, NULL);
-exit:
- drm_dev_exit(idx);
- return r;
-}
-
-/**
- * amdgpu_vm_pt_create - create bo for PD/PT
- *
- * @adev: amdgpu_device pointer
- * @vm: requesting vm
- * @level: the page table level
- * @immediate: use a immediate update
- * @vmbo: pointer to the buffer object pointer
- */
-static int amdgpu_vm_pt_create(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- int level, bool immediate,
- struct amdgpu_bo_vm **vmbo)
-{
- struct amdgpu_bo_param bp;
- struct amdgpu_bo *bo;
- struct dma_resv *resv;
- unsigned int num_entries;
- int r;
-
- memset(&bp, 0, sizeof(bp));
-
- bp.size = amdgpu_vm_bo_size(adev, level);
- bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
- bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
- bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain);
- bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
- AMDGPU_GEM_CREATE_CPU_GTT_USWC;
-
- if (level < AMDGPU_VM_PTB)
- num_entries = amdgpu_vm_num_entries(adev, level);
- else
- num_entries = 0;
-
- bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries);
-
- if (vm->use_cpu_for_update)
- bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
-
- bp.type = ttm_bo_type_kernel;
- bp.no_wait_gpu = immediate;
- if (vm->root.bo)
- bp.resv = vm->root.bo->tbo.base.resv;
-
- r = amdgpu_bo_create_vm(adev, &bp, vmbo);
- if (r)
- return r;
-
- bo = &(*vmbo)->bo;
- if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) {
- (*vmbo)->shadow = NULL;
- return 0;
- }
-
- if (!bp.resv)
- WARN_ON(dma_resv_lock(bo->tbo.base.resv,
- NULL));
- resv = bp.resv;
- memset(&bp, 0, sizeof(bp));
- bp.size = amdgpu_vm_bo_size(adev, level);
- bp.domain = AMDGPU_GEM_DOMAIN_GTT;
- bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
- bp.type = ttm_bo_type_kernel;
- bp.resv = bo->tbo.base.resv;
- bp.bo_ptr_size = sizeof(struct amdgpu_bo);
-
- r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow);
-
- if (!resv)
- dma_resv_unlock(bo->tbo.base.resv);
-
- if (r) {
- amdgpu_bo_unref(&bo);
- return r;
- }
-
- (*vmbo)->shadow->parent = amdgpu_bo_ref(bo);
- amdgpu_bo_add_to_shadow_list(*vmbo);
+ amdgpu_vm_eviction_lock(vm);
+ vm->evicting = false;
+ amdgpu_vm_eviction_unlock(vm);
return 0;
}
/**
- * amdgpu_vm_alloc_pts - Allocate a specific page table
+ * amdgpu_vm_ready - check VM is ready for updates
*
- * @adev: amdgpu_device pointer
- * @vm: VM to allocate page tables for
- * @cursor: Which page table to allocate
- * @immediate: use an immediate update
+ * @vm: VM to check
*
- * Make sure a specific page table or directory is allocated.
+ * Check if all VM PDs/PTs are ready for updates
*
* Returns:
- * 1 if page table needed to be allocated, 0 if page table was already
- * allocated, negative errno if an error occurred.
+ * True if VM is not evicting and all VM entities are not stopped
*/
-static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_vm_pt_cursor *cursor,
- bool immediate)
+bool amdgpu_vm_ready(struct amdgpu_vm *vm)
{
- struct amdgpu_vm_bo_base *entry = cursor->entry;
- struct amdgpu_bo *pt_bo;
- struct amdgpu_bo_vm *pt;
- int r;
-
- if (entry->bo)
- return 0;
-
- r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt);
- if (r)
- return r;
-
- /* Keep a reference to the root directory to avoid
- * freeing them up in the wrong order.
- */
- pt_bo = &pt->bo;
- pt_bo->parent = amdgpu_bo_ref(cursor->parent->bo);
- amdgpu_vm_bo_base_init(entry, vm, pt_bo);
- r = amdgpu_vm_clear_bo(adev, vm, pt, immediate);
- if (r)
- goto error_free_pt;
+ bool ret;
- return 0;
-
-error_free_pt:
- amdgpu_bo_unref(&pt->shadow);
- amdgpu_bo_unref(&pt_bo);
- return r;
-}
+ amdgpu_vm_assert_locked(vm);
-/**
- * amdgpu_vm_free_table - fre one PD/PT
- *
- * @entry: PDE to free
- */
-static void amdgpu_vm_free_table(struct amdgpu_vm_bo_base *entry)
-{
- struct amdgpu_bo *shadow;
-
- if (!entry->bo)
- return;
- shadow = amdgpu_bo_shadowed(entry->bo);
- entry->bo->vm_bo = NULL;
- list_del(&entry->vm_status);
- amdgpu_bo_unref(&shadow);
- amdgpu_bo_unref(&entry->bo);
-}
+ amdgpu_vm_eviction_lock(vm);
+ ret = !vm->evicting;
+ amdgpu_vm_eviction_unlock(vm);
-/**
- * amdgpu_vm_free_pts - free PD/PT levels
- *
- * @adev: amdgpu device structure
- * @vm: amdgpu vm structure
- * @start: optional cursor where to start freeing PDs/PTs
- *
- * Free the page directory or page table level and all sub levels.
- */
-static void amdgpu_vm_free_pts(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_vm_pt_cursor *start)
-{
- struct amdgpu_vm_pt_cursor cursor;
- struct amdgpu_vm_bo_base *entry;
+ spin_lock(&vm->status_lock);
+ ret &= list_empty(&vm->evicted);
+ spin_unlock(&vm->status_lock);
- vm->bulk_moveable = false;
+ spin_lock(&vm->immediate.lock);
+ ret &= !vm->immediate.stopped;
+ spin_unlock(&vm->immediate.lock);
- for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
- amdgpu_vm_free_table(entry);
+ spin_lock(&vm->delayed.lock);
+ ret &= !vm->delayed.stopped;
+ spin_unlock(&vm->delayed.lock);
- if (start)
- amdgpu_vm_free_table(start->entry);
+ return ret;
}
/**
@@ -1129,27 +738,22 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
struct amdgpu_job *job)
{
struct amdgpu_device *adev = ring->adev;
- unsigned vmhub = ring->funcs->vmhub;
+ unsigned vmhub = ring->vm_hub;
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
- struct amdgpu_vmid *id;
- bool gds_switch_needed;
- bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug;
if (job->vmid == 0)
return false;
- id = &id_mgr->ids[job->vmid];
- gds_switch_needed = ring->funcs->emit_gds_switch && (
- id->gds_base != job->gds_base ||
- id->gds_size != job->gds_size ||
- id->gws_base != job->gws_base ||
- id->gws_size != job->gws_size ||
- id->oa_base != job->oa_base ||
- id->oa_size != job->oa_size);
-
- if (amdgpu_vmid_had_gpu_reset(adev, id))
+
+ if (job->vm_needs_flush || ring->has_compute_vm_bug)
+ return true;
+
+ if (ring->funcs->emit_gds_switch && job->gds_switch_needed)
return true;
- return vm_flush_needed || gds_switch_needed;
+ if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid]))
+ return true;
+
+ return false;
}
/**
@@ -1168,30 +772,25 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
bool need_pipe_sync)
{
struct amdgpu_device *adev = ring->adev;
- unsigned vmhub = ring->funcs->vmhub;
+ struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
+ unsigned vmhub = ring->vm_hub;
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
- bool gds_switch_needed = ring->funcs->emit_gds_switch && (
- id->gds_base != job->gds_base ||
- id->gds_size != job->gds_size ||
- id->gws_base != job->gws_base ||
- id->gws_size != job->gws_size ||
- id->oa_base != job->oa_base ||
- id->oa_size != job->oa_size);
+ bool spm_update_needed = job->spm_update_needed;
+ bool gds_switch_needed = ring->funcs->emit_gds_switch &&
+ job->gds_switch_needed;
bool vm_flush_needed = job->vm_needs_flush;
- struct dma_fence *fence = NULL;
+ bool cleaner_shader_needed = false;
bool pasid_mapping_needed = false;
- unsigned patch_offset = 0;
- bool update_spm_vmid_needed = (job->vm && (job->vm->reserved_vmid[vmhub] != NULL));
+ struct dma_fence *fence = NULL;
+ unsigned int patch;
int r;
- if (update_spm_vmid_needed && adev->gfx.rlc.funcs->update_spm_vmid)
- adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid);
-
if (amdgpu_vmid_had_gpu_reset(adev, id)) {
gds_switch_needed = true;
vm_flush_needed = true;
pasid_mapping_needed = true;
+ spm_update_needed = true;
}
mutex_lock(&id_mgr->lock);
@@ -1206,15 +805,26 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
ring->funcs->emit_wreg;
- if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
+ cleaner_shader_needed = job->run_cleaner_shader &&
+ adev->gfx.enable_cleaner_shader &&
+ ring->funcs->emit_cleaner_shader && job->base.s_fence &&
+ &job->base.s_fence->scheduled == isolation->spearhead;
+
+ if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync &&
+ !cleaner_shader_needed)
return 0;
+ amdgpu_ring_ib_begin(ring);
if (ring->funcs->init_cond_exec)
- patch_offset = amdgpu_ring_init_cond_exec(ring);
+ patch = amdgpu_ring_init_cond_exec(ring,
+ ring->cond_exe_gpu_addr);
if (need_pipe_sync)
amdgpu_ring_emit_pipeline_sync(ring);
+ if (cleaner_shader_needed)
+ ring->funcs->emit_cleaner_shader(ring);
+
if (vm_flush_needed) {
trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
@@ -1223,10 +833,24 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
if (pasid_mapping_needed)
amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
- if (vm_flush_needed || pasid_mapping_needed) {
- r = amdgpu_fence_emit(ring, &fence, NULL, 0);
+ if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid)
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid);
+
+ if (ring->funcs->emit_gds_switch &&
+ gds_switch_needed) {
+ amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
+ job->gds_size, job->gws_base,
+ job->gws_size, job->oa_base,
+ job->oa_size);
+ }
+
+ if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) {
+ r = amdgpu_fence_emit(ring, job->hw_vm_fence, 0);
if (r)
return r;
+ fence = &job->hw_vm_fence->base;
+ /* get a ref for the job */
+ dma_fence_get(fence);
}
if (vm_flush_needed) {
@@ -1245,29 +869,29 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
id->pasid_mapping = dma_fence_get(fence);
mutex_unlock(&id_mgr->lock);
}
- dma_fence_put(fence);
- if (ring->funcs->emit_gds_switch && gds_switch_needed) {
- id->gds_base = job->gds_base;
- id->gds_size = job->gds_size;
- id->gws_base = job->gws_base;
- id->gws_size = job->gws_size;
- id->oa_base = job->oa_base;
- id->oa_size = job->oa_size;
- amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
- job->gds_size, job->gws_base,
- job->gws_size, job->oa_base,
- job->oa_size);
+ /*
+ * Make sure that all other submissions wait for the cleaner shader to
+ * finish before we push them to the HW.
+ */
+ if (cleaner_shader_needed) {
+ trace_amdgpu_cleaner_shader(ring, fence);
+ mutex_lock(&adev->enforce_isolation_mutex);
+ dma_fence_put(isolation->spearhead);
+ isolation->spearhead = dma_fence_get(fence);
+ mutex_unlock(&adev->enforce_isolation_mutex);
}
+ dma_fence_put(fence);
- if (ring->funcs->patch_cond_exec)
- amdgpu_ring_patch_cond_exec(ring, patch_offset);
+ amdgpu_ring_patch_cond_exec(ring, patch);
/* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
if (ring->funcs->emit_switch_buffer) {
amdgpu_ring_emit_switch_buffer(ring);
amdgpu_ring_emit_switch_buffer(ring);
}
+
+ amdgpu_ring_ib_end(ring);
return 0;
}
@@ -1328,53 +952,6 @@ uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
}
/**
- * amdgpu_vm_update_pde - update a single level in the hierarchy
- *
- * @params: parameters for the update
- * @vm: requested vm
- * @entry: entry to update
- *
- * Makes sure the requested entry in parent is up to date.
- */
-static int amdgpu_vm_update_pde(struct amdgpu_vm_update_params *params,
- struct amdgpu_vm *vm,
- struct amdgpu_vm_bo_base *entry)
-{
- struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry);
- struct amdgpu_bo *bo = parent->bo, *pbo;
- uint64_t pde, pt, flags;
- unsigned level;
-
- for (level = 0, pbo = bo->parent; pbo; ++level)
- pbo = pbo->parent;
-
- level += params->adev->vm_manager.root_level;
- amdgpu_gmc_get_pde_for_bo(entry->bo, level, &pt, &flags);
- pde = (entry - to_amdgpu_bo_vm(parent->bo)->entries) * 8;
- return vm->update_funcs->update(params, to_amdgpu_bo_vm(bo), pde, pt,
- 1, 0, flags);
-}
-
-/**
- * amdgpu_vm_invalidate_pds - mark all PDs as invalid
- *
- * @adev: amdgpu_device pointer
- * @vm: related vm
- *
- * Mark all PD level as invalid after an error.
- */
-static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev,
- struct amdgpu_vm *vm)
-{
- struct amdgpu_vm_pt_cursor cursor;
- struct amdgpu_vm_bo_base *entry;
-
- for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry)
- if (entry->bo && !entry->moved)
- amdgpu_vm_bo_relocated(entry);
-}
-
-/**
* amdgpu_vm_update_pdes - make sure that all directories are valid
*
* @adev: amdgpu_device pointer
@@ -1390,9 +967,18 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
struct amdgpu_vm *vm, bool immediate)
{
struct amdgpu_vm_update_params params;
+ struct amdgpu_vm_bo_base *entry;
+ bool flush_tlb_needed = false;
+ LIST_HEAD(relocated);
int r, idx;
- if (list_empty(&vm->relocated))
+ amdgpu_vm_assert_locked(vm);
+
+ spin_lock(&vm->status_lock);
+ list_splice_init(&vm->relocated, &relocated);
+ spin_unlock(&vm->status_lock);
+
+ if (list_empty(&relocated))
return 0;
if (!drm_dev_enter(adev_to_drm(adev), &idx))
@@ -1403,19 +989,16 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
params.vm = vm;
params.immediate = immediate;
- r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
+ r = vm->update_funcs->prepare(&params, NULL,
+ AMDGPU_KERNEL_JOB_ID_VM_UPDATE_PDES);
if (r)
- goto exit;
-
- while (!list_empty(&vm->relocated)) {
- struct amdgpu_vm_bo_base *entry;
+ goto error;
- entry = list_first_entry(&vm->relocated,
- struct amdgpu_vm_bo_base,
- vm_status);
- amdgpu_vm_bo_idle(entry);
+ list_for_each_entry(entry, &relocated, vm_status) {
+ /* vm_flush_needed after updating moved PDEs */
+ flush_tlb_needed |= entry->moved;
- r = amdgpu_vm_update_pde(&params, vm, entry);
+ r = amdgpu_vm_pde_update(&params, entry);
if (r)
goto error;
}
@@ -1423,323 +1006,149 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
r = vm->update_funcs->commit(&params, &vm->last_update);
if (r)
goto error;
- drm_dev_exit(idx);
- return 0;
+
+ if (flush_tlb_needed)
+ atomic64_inc(&vm->tlb_seq);
+
+ while (!list_empty(&relocated)) {
+ entry = list_first_entry(&relocated, struct amdgpu_vm_bo_base,
+ vm_status);
+ amdgpu_vm_bo_idle(entry);
+ }
error:
- amdgpu_vm_invalidate_pds(adev, vm);
-exit:
drm_dev_exit(idx);
return r;
}
-/*
- * amdgpu_vm_update_flags - figure out flags for PTE updates
+/**
+ * amdgpu_vm_tlb_seq_cb - make sure to increment tlb sequence
+ * @fence: unused
+ * @cb: the callback structure
*
- * Make sure to set the right flags for the PTEs at the desired level.
+ * Increments the tlb sequence to make sure that future CS execute a VM flush.
*/
-static void amdgpu_vm_update_flags(struct amdgpu_vm_update_params *params,
- struct amdgpu_bo_vm *pt, unsigned int level,
- uint64_t pe, uint64_t addr,
- unsigned int count, uint32_t incr,
- uint64_t flags)
-
+static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
+ struct dma_fence_cb *cb)
{
- if (level != AMDGPU_VM_PTB) {
- flags |= AMDGPU_PDE_PTE;
- amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags);
-
- } else if (params->adev->asic_type >= CHIP_VEGA10 &&
- !(flags & AMDGPU_PTE_VALID) &&
- !(flags & AMDGPU_PTE_PRT)) {
+ struct amdgpu_vm_tlb_seq_struct *tlb_cb;
- /* Workaround for fault priority problem on GMC9 */
- flags |= AMDGPU_PTE_EXECUTABLE;
- }
-
- params->vm->update_funcs->update(params, pt, pe, addr, count, incr,
- flags);
+ tlb_cb = container_of(cb, typeof(*tlb_cb), cb);
+ atomic64_inc(&tlb_cb->vm->tlb_seq);
+ kfree(tlb_cb);
}
/**
- * amdgpu_vm_fragment - get fragment for PTEs
+ * amdgpu_vm_tlb_flush - prepare TLB flush
*
- * @params: see amdgpu_vm_update_params definition
- * @start: first PTE to handle
- * @end: last PTE to handle
- * @flags: hw mapping flags
- * @frag: resulting fragment size
- * @frag_end: end of this fragment
+ * @params: parameters for update
+ * @fence: input fence to sync TLB flush with
+ * @tlb_cb: the callback structure
*
- * Returns the first possible fragment for the start and end address.
+ * Increments the tlb sequence to make sure that future CS execute a VM flush.
*/
-static void amdgpu_vm_fragment(struct amdgpu_vm_update_params *params,
- uint64_t start, uint64_t end, uint64_t flags,
- unsigned int *frag, uint64_t *frag_end)
+static void
+amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params,
+ struct dma_fence **fence,
+ struct amdgpu_vm_tlb_seq_struct *tlb_cb)
{
- /**
- * The MC L1 TLB supports variable sized pages, based on a fragment
- * field in the PTE. When this field is set to a non-zero value, page
- * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
- * flags are considered valid for all PTEs within the fragment range
- * and corresponding mappings are assumed to be physically contiguous.
- *
- * The L1 TLB can store a single PTE for the whole fragment,
- * significantly increasing the space available for translation
- * caching. This leads to large improvements in throughput when the
- * TLB is under pressure.
- *
- * The L2 TLB distributes small and large fragments into two
- * asymmetric partitions. The large fragment cache is significantly
- * larger. Thus, we try to use large fragments wherever possible.
- * Userspace can support this by aligning virtual base address and
- * allocation size to the fragment size.
- *
- * Starting with Vega10 the fragment size only controls the L1. The L2
- * is now directly feed with small/huge/giant pages from the walker.
- */
- unsigned max_frag;
+ struct amdgpu_vm *vm = params->vm;
- if (params->adev->asic_type < CHIP_VEGA10)
- max_frag = params->adev->vm_manager.fragment_size;
- else
- max_frag = 31;
-
- /* system pages are non continuously */
- if (params->pages_addr) {
- *frag = 0;
- *frag_end = end;
+ tlb_cb->vm = vm;
+ if (!fence || !*fence) {
+ amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
return;
}
- /* This intentionally wraps around if no bit is set */
- *frag = min((unsigned)ffs(start) - 1, (unsigned)fls64(end - start) - 1);
- if (*frag >= max_frag) {
- *frag = max_frag;
- *frag_end = end & ~((1ULL << max_frag) - 1);
+ if (!dma_fence_add_callback(*fence, &tlb_cb->cb,
+ amdgpu_vm_tlb_seq_cb)) {
+ dma_fence_put(vm->last_tlb_flush);
+ vm->last_tlb_flush = dma_fence_get(*fence);
} else {
- *frag_end = start + (1 << *frag);
+ amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
}
-}
-
-/**
- * amdgpu_vm_update_ptes - make sure that page tables are valid
- *
- * @params: see amdgpu_vm_update_params definition
- * @start: start of GPU address range
- * @end: end of GPU address range
- * @dst: destination address to map to, the next dst inside the function
- * @flags: mapping flags
- *
- * Update the page tables in the range @start - @end.
- *
- * Returns:
- * 0 for success, -EINVAL for failure.
- */
-static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
- uint64_t start, uint64_t end,
- uint64_t dst, uint64_t flags)
-{
- struct amdgpu_device *adev = params->adev;
- struct amdgpu_vm_pt_cursor cursor;
- uint64_t frag_start = start, frag_end;
- unsigned int frag;
- int r;
-
- /* figure out the initial fragment */
- amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end);
-
- /* walk over the address space and update the PTs */
- amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
- while (cursor.pfn < end) {
- unsigned shift, parent_shift, mask;
- uint64_t incr, entry_end, pe_start;
- struct amdgpu_bo *pt;
-
- if (!params->unlocked) {
- /* make sure that the page tables covering the
- * address range are actually allocated
- */
- r = amdgpu_vm_alloc_pts(params->adev, params->vm,
- &cursor, params->immediate);
- if (r)
- return r;
- }
-
- shift = amdgpu_vm_level_shift(adev, cursor.level);
- parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
- if (params->unlocked) {
- /* Unlocked updates are only allowed on the leaves */
- if (amdgpu_vm_pt_descendant(adev, &cursor))
- continue;
- } else if (adev->asic_type < CHIP_VEGA10 &&
- (flags & AMDGPU_PTE_VALID)) {
- /* No huge page support before GMC v9 */
- if (cursor.level != AMDGPU_VM_PTB) {
- if (!amdgpu_vm_pt_descendant(adev, &cursor))
- return -ENOENT;
- continue;
- }
- } else if (frag < shift) {
- /* We can't use this level when the fragment size is
- * smaller than the address shift. Go to the next
- * child entry and try again.
- */
- if (amdgpu_vm_pt_descendant(adev, &cursor))
- continue;
- } else if (frag >= parent_shift) {
- /* If the fragment size is even larger than the parent
- * shift we should go up one level and check it again.
- */
- if (!amdgpu_vm_pt_ancestor(&cursor))
- return -EINVAL;
- continue;
- }
-
- pt = cursor.entry->bo;
- if (!pt) {
- /* We need all PDs and PTs for mapping something, */
- if (flags & AMDGPU_PTE_VALID)
- return -ENOENT;
-
- /* but unmapping something can happen at a higher
- * level.
- */
- if (!amdgpu_vm_pt_ancestor(&cursor))
- return -EINVAL;
-
- pt = cursor.entry->bo;
- shift = parent_shift;
- frag_end = max(frag_end, ALIGN(frag_start + 1,
- 1ULL << shift));
- }
- /* Looks good so far, calculate parameters for the update */
- incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift;
- mask = amdgpu_vm_entries_mask(adev, cursor.level);
- pe_start = ((cursor.pfn >> shift) & mask) * 8;
- entry_end = ((uint64_t)mask + 1) << shift;
- entry_end += cursor.pfn & ~(entry_end - 1);
- entry_end = min(entry_end, end);
-
- do {
- struct amdgpu_vm *vm = params->vm;
- uint64_t upd_end = min(entry_end, frag_end);
- unsigned nptes = (upd_end - frag_start) >> shift;
- uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag);
-
- /* This can happen when we set higher level PDs to
- * silent to stop fault floods.
- */
- nptes = max(nptes, 1u);
-
- trace_amdgpu_vm_update_ptes(params, frag_start, upd_end,
- nptes, dst, incr, upd_flags,
- vm->task_info.pid,
- vm->immediate.fence_context);
- amdgpu_vm_update_flags(params, to_amdgpu_bo_vm(pt),
- cursor.level, pe_start, dst,
- nptes, incr, upd_flags);
-
- pe_start += nptes * 8;
- dst += nptes * incr;
-
- frag_start = upd_end;
- if (frag_start >= frag_end) {
- /* figure out the next fragment */
- amdgpu_vm_fragment(params, frag_start, end,
- flags, &frag, &frag_end);
- if (frag < shift)
- break;
- }
- } while (frag_start < entry_end);
-
- if (amdgpu_vm_pt_descendant(adev, &cursor)) {
- /* Free all child entries.
- * Update the tables with the flags and addresses and free up subsequent
- * tables in the case of huge pages or freed up areas.
- * This is the maximum you can free, because all other page tables are not
- * completely covered by the range and so potentially still in use.
- */
- while (cursor.pfn < frag_start) {
- /* Make sure previous mapping is freed */
- if (cursor.entry->bo) {
- params->table_freed = true;
- amdgpu_vm_free_pts(adev, params->vm, &cursor);
- }
- amdgpu_vm_pt_next(adev, &cursor);
- }
+ /* Prepare a TLB flush fence to be attached to PTs */
+ if (!params->unlocked) {
+ amdgpu_vm_tlb_fence_create(params->adev, vm, fence);
- } else if (frag >= shift) {
- /* or just move on to the next on the same level. */
- amdgpu_vm_pt_next(adev, &cursor);
- }
+ /* Makes sure no PD/PT is freed before the flush */
+ dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
+ DMA_RESV_USAGE_BOOKKEEP);
}
-
- return 0;
}
/**
- * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
+ * amdgpu_vm_update_range - update a range in the vm page table
*
- * @adev: amdgpu_device pointer of the VM
- * @bo_adev: amdgpu_device pointer of the mapped BO
- * @vm: requested vm
+ * @adev: amdgpu_device pointer to use for commands
+ * @vm: the VM to update the range
* @immediate: immediate submission in a page fault
* @unlocked: unlocked invalidation during MM callback
- * @resv: fences we need to sync to
+ * @flush_tlb: trigger tlb invalidation after update completed
+ * @allow_override: change MTYPE for local NUMA nodes
+ * @sync: fences we need to sync to
* @start: start of mapped range
* @last: last mapped entry
* @flags: flags for the entries
* @offset: offset into nodes and pages_addr
+ * @vram_base: base for vram mappings
* @res: ttm_resource to map
* @pages_addr: DMA addresses to use for mapping
* @fence: optional resulting fence
- * @table_freed: return true if page table is freed
*
* Fill in the page table entries between @start and @last.
*
* Returns:
- * 0 for success, -EINVAL for failure.
+ * 0 for success, negative erro code for failure.
*/
-int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
- struct amdgpu_device *bo_adev,
- struct amdgpu_vm *vm, bool immediate,
- bool unlocked, struct dma_resv *resv,
- uint64_t start, uint64_t last,
- uint64_t flags, uint64_t offset,
- struct ttm_resource *res,
- dma_addr_t *pages_addr,
- struct dma_fence **fence,
- bool *table_freed)
+int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ bool immediate, bool unlocked, bool flush_tlb,
+ bool allow_override, struct amdgpu_sync *sync,
+ uint64_t start, uint64_t last, uint64_t flags,
+ uint64_t offset, uint64_t vram_base,
+ struct ttm_resource *res, dma_addr_t *pages_addr,
+ struct dma_fence **fence)
{
+ struct amdgpu_vm_tlb_seq_struct *tlb_cb;
struct amdgpu_vm_update_params params;
struct amdgpu_res_cursor cursor;
- enum amdgpu_sync_mode sync_mode;
int r, idx;
if (!drm_dev_enter(adev_to_drm(adev), &idx))
return -ENODEV;
+ tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL);
+ if (!tlb_cb) {
+ drm_dev_exit(idx);
+ return -ENOMEM;
+ }
+
+ /* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache,
+ * heavy-weight flush TLB unconditionally.
+ */
+ flush_tlb |= adev->gmc.xgmi.num_physical_nodes &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0);
+
+ /*
+ * On GFX8 and older any 8 PTE block with a valid bit set enters the TLB
+ */
+ flush_tlb |= amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 0, 0);
+
memset(&params, 0, sizeof(params));
params.adev = adev;
params.vm = vm;
params.immediate = immediate;
params.pages_addr = pages_addr;
params.unlocked = unlocked;
-
- /* Implicitly sync to command submissions in the same VM before
- * unmapping. Sync to moving fences before mapping.
- */
- if (!(flags & AMDGPU_PTE_VALID))
- sync_mode = AMDGPU_SYNC_EQ_OWNER;
- else
- sync_mode = AMDGPU_SYNC_EXPLICIT;
+ params.needs_flush = flush_tlb;
+ params.allow_override = allow_override;
+ INIT_LIST_HEAD(&params.tlb_flush_waitlist);
amdgpu_vm_eviction_lock(vm);
if (vm->evicting) {
r = -EBUSY;
- goto error_unlock;
+ goto error_free;
}
if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) {
@@ -1750,9 +1159,10 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
dma_fence_put(tmp);
}
- r = vm->update_funcs->prepare(&params, resv, sync_mode);
+ r = vm->update_funcs->prepare(&params, sync,
+ AMDGPU_KERNEL_JOB_ID_VM_UPDATE_RANGE);
if (r)
- goto error_unlock;
+ goto error_free;
amdgpu_res_first(pages_addr ? NULL : res, offset,
(last - start + 1) * AMDGPU_GPU_PAGE_SIZE, &cursor);
@@ -1779,6 +1189,8 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
pages_addr[idx - 1] + PAGE_SIZE))
break;
}
+ if (!contiguous)
+ count--;
num_entries = count *
AMDGPU_GPU_PAGES_IN_CPU_PAGE;
}
@@ -1791,84 +1203,53 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
params.pages_addr = NULL;
}
- } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
- addr = bo_adev->vm_manager.vram_base_offset +
- cursor.start;
+ } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT_FLAG(adev))) {
+ addr = vram_base + cursor.start;
} else {
addr = 0;
}
tmp = start + num_entries;
- r = amdgpu_vm_update_ptes(&params, start, tmp, addr, flags);
+ r = amdgpu_vm_ptes_update(&params, start, tmp, addr, flags);
if (r)
- goto error_unlock;
+ goto error_free;
amdgpu_res_next(&cursor, num_entries * AMDGPU_GPU_PAGE_SIZE);
start = tmp;
}
r = vm->update_funcs->commit(&params, fence);
+ if (r)
+ goto error_free;
- if (table_freed)
- *table_freed = *table_freed || params.table_freed;
+ if (params.needs_flush) {
+ amdgpu_vm_tlb_flush(&params, fence, tlb_cb);
+ tlb_cb = NULL;
+ }
-error_unlock:
+ amdgpu_vm_pt_free_list(adev, &params);
+
+error_free:
+ kfree(tlb_cb);
amdgpu_vm_eviction_unlock(vm);
drm_dev_exit(idx);
return r;
}
-void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
- uint64_t *gtt_mem, uint64_t *cpu_mem)
+void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
+ struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM])
{
- struct amdgpu_bo_va *bo_va, *tmp;
-
- list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) {
- if (!bo_va->base.bo)
- continue;
- amdgpu_bo_get_memory(bo_va->base.bo, vram_mem,
- gtt_mem, cpu_mem);
- }
- list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status) {
- if (!bo_va->base.bo)
- continue;
- amdgpu_bo_get_memory(bo_va->base.bo, vram_mem,
- gtt_mem, cpu_mem);
- }
- list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) {
- if (!bo_va->base.bo)
- continue;
- amdgpu_bo_get_memory(bo_va->base.bo, vram_mem,
- gtt_mem, cpu_mem);
- }
- list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
- if (!bo_va->base.bo)
- continue;
- amdgpu_bo_get_memory(bo_va->base.bo, vram_mem,
- gtt_mem, cpu_mem);
- }
- spin_lock(&vm->invalidated_lock);
- list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) {
- if (!bo_va->base.bo)
- continue;
- amdgpu_bo_get_memory(bo_va->base.bo, vram_mem,
- gtt_mem, cpu_mem);
- }
- list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status) {
- if (!bo_va->base.bo)
- continue;
- amdgpu_bo_get_memory(bo_va->base.bo, vram_mem,
- gtt_mem, cpu_mem);
- }
- spin_unlock(&vm->invalidated_lock);
+ spin_lock(&vm->status_lock);
+ memcpy(stats, vm->stats, sizeof(*stats) * __AMDGPU_PL_NUM);
+ spin_unlock(&vm->status_lock);
}
+
/**
* amdgpu_vm_bo_update - update all BO mappings in the vm page table
*
* @adev: amdgpu_device pointer
* @bo_va: requested BO and VM object
* @clear: if true clear the entries
- * @table_freed: return true if page table is freed
*
* Fill in the page table entries for @bo_va.
*
@@ -1876,59 +1257,90 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
* 0 for success, -EINVAL for failure.
*/
int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
- bool clear, bool *table_freed)
+ bool clear)
{
struct amdgpu_bo *bo = bo_va->base.bo;
struct amdgpu_vm *vm = bo_va->base.vm;
struct amdgpu_bo_va_mapping *mapping;
+ struct dma_fence **last_update;
dma_addr_t *pages_addr = NULL;
struct ttm_resource *mem;
- struct dma_fence **last_update;
- struct dma_resv *resv;
+ struct amdgpu_sync sync;
+ bool flush_tlb = clear;
+ uint64_t vram_base;
uint64_t flags;
- struct amdgpu_device *bo_adev = adev;
+ bool uncached;
int r;
- if (clear || !bo) {
+ amdgpu_sync_create(&sync);
+ if (clear) {
+ mem = NULL;
+
+ /* Implicitly sync to command submissions in the same VM before
+ * unmapping.
+ */
+ r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv,
+ AMDGPU_SYNC_EQ_OWNER, vm);
+ if (r)
+ goto error_free;
+ if (bo) {
+ r = amdgpu_sync_kfd(&sync, bo->tbo.base.resv);
+ if (r)
+ goto error_free;
+ }
+ } else if (!bo) {
mem = NULL;
- resv = vm->root.bo->tbo.base.resv;
+
+ /* PRT map operations don't need to sync to anything. */
+
} else {
struct drm_gem_object *obj = &bo->tbo.base;
- resv = bo->tbo.base.resv;
- if (obj->import_attach && bo_va->is_xgmi) {
+ if (drm_gem_is_imported(obj) && bo_va->is_xgmi) {
struct dma_buf *dma_buf = obj->import_attach->dmabuf;
struct drm_gem_object *gobj = dma_buf->priv;
struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
- if (abo->tbo.resource->mem_type == TTM_PL_VRAM)
+ if (abo->tbo.resource &&
+ abo->tbo.resource->mem_type == TTM_PL_VRAM)
bo = gem_to_amdgpu_bo(gobj);
}
mem = bo->tbo.resource;
- if (mem->mem_type == TTM_PL_TT ||
- mem->mem_type == AMDGPU_PL_PREEMPT)
+ if (mem && (mem->mem_type == TTM_PL_TT ||
+ mem->mem_type == AMDGPU_PL_PREEMPT))
pages_addr = bo->tbo.ttm->dma_address;
+
+ /* Implicitly sync to moving fences before mapping anything */
+ r = amdgpu_sync_resv(adev, &sync, bo->tbo.base.resv,
+ AMDGPU_SYNC_EXPLICIT, vm);
+ if (r)
+ goto error_free;
}
if (bo) {
+ struct amdgpu_device *bo_adev;
+
flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
if (amdgpu_bo_encrypted(bo))
flags |= AMDGPU_PTE_TMZ;
bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ vram_base = bo_adev->vm_manager.vram_base_offset;
+ uncached = (bo->flags & AMDGPU_GEM_CREATE_UNCACHED) != 0;
} else {
flags = 0x0;
+ vram_base = 0;
+ uncached = false;
}
- if (clear || (bo && bo->tbo.base.resv ==
- vm->root.bo->tbo.base.resv))
+ if (clear || amdgpu_vm_is_bo_always_valid(vm, bo))
last_update = &vm->last_update;
else
last_update = &bo_va->last_pt_update;
if (!clear && bo_va->base.moved) {
- bo_va->base.moved = false;
+ flush_tlb = true;
list_splice_init(&bo_va->valids, &bo_va->invalids);
} else if (bo_va->cleared != clear) {
@@ -1941,34 +1353,34 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
* but in case of something, we filter the flags in first place
*/
- if (!(mapping->flags & AMDGPU_PTE_READABLE))
+ if (!(mapping->flags & AMDGPU_VM_PAGE_READABLE))
update_flags &= ~AMDGPU_PTE_READABLE;
- if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
+ if (!(mapping->flags & AMDGPU_VM_PAGE_WRITEABLE))
update_flags &= ~AMDGPU_PTE_WRITEABLE;
/* Apply ASIC specific mapping flags */
- amdgpu_gmc_get_vm_pte(adev, mapping, &update_flags);
+ amdgpu_gmc_get_vm_pte(adev, vm, bo, mapping->flags,
+ &update_flags);
trace_amdgpu_vm_bo_update(mapping);
- r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
- resv, mapping->start,
- mapping->last, update_flags,
- mapping->offset, mem,
- pages_addr, last_update, table_freed);
+ r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,
+ !uncached, &sync, mapping->start,
+ mapping->last, update_flags,
+ mapping->offset, vram_base, mem,
+ pages_addr, last_update);
if (r)
- return r;
+ goto error_free;
}
/* If the BO is not in its preferred location add it back to
* the evicted list so that it gets validated again on the
* next command submission.
*/
- if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) {
- uint32_t mem_type = bo->tbo.resource->mem_type;
-
- if (!(bo->preferred_domains &
- amdgpu_mem_type_to_domain(mem_type)))
+ if (amdgpu_vm_is_bo_always_valid(vm, bo)) {
+ if (bo->tbo.resource &&
+ !(bo->preferred_domains &
+ amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type)))
amdgpu_vm_bo_evicted(&bo_va->base);
else
amdgpu_vm_bo_idle(&bo_va->base);
@@ -1978,13 +1390,16 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
list_splice_init(&bo_va->invalids, &bo_va->valids);
bo_va->cleared = clear;
+ bo_va->base.moved = false;
if (trace_amdgpu_vm_bo_mapping_enabled()) {
list_for_each_entry(mapping, &bo_va->valids, list)
trace_amdgpu_vm_bo_mapping(mapping);
}
- return 0;
+error_free:
+ amdgpu_sync_free(&sync);
+ return r;
}
/**
@@ -2086,7 +1501,7 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping,
struct dma_fence *fence)
{
- if (mapping->flags & AMDGPU_PTE_PRT)
+ if (mapping->flags & AMDGPU_VM_PAGE_PRT)
amdgpu_vm_add_prt_cb(adev, fence);
kfree(mapping);
}
@@ -2102,30 +1517,14 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
{
struct dma_resv *resv = vm->root.bo->tbo.base.resv;
- struct dma_fence *excl, **shared;
- unsigned i, shared_count;
- int r;
-
- r = dma_resv_get_fences(resv, &excl, &shared_count, &shared);
- if (r) {
- /* Not enough memory to grab the fence list, as last resort
- * block for all the fences to complete.
- */
- dma_resv_wait_timeout(resv, true, false,
- MAX_SCHEDULE_TIMEOUT);
- return;
- }
+ struct dma_resv_iter cursor;
+ struct dma_fence *fence;
- /* Add a callback for each fence in the reservation object */
- amdgpu_vm_prt_get(adev);
- amdgpu_vm_add_prt_cb(adev, excl);
-
- for (i = 0; i < shared_count; ++i) {
+ dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, fence) {
+ /* Add a callback for each fence in the reservation object */
amdgpu_vm_prt_get(adev);
- amdgpu_vm_add_prt_cb(adev, shared[i]);
+ amdgpu_vm_add_prt_cb(adev, fence);
}
-
- kfree(shared);
}
/**
@@ -2147,29 +1546,34 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct dma_fence **fence)
{
- struct dma_resv *resv = vm->root.bo->tbo.base.resv;
struct amdgpu_bo_va_mapping *mapping;
- uint64_t init_pte_value = 0;
struct dma_fence *f = NULL;
+ struct amdgpu_sync sync;
int r;
+
+ /*
+ * Implicitly sync to command submissions in the same VM before
+ * unmapping.
+ */
+ amdgpu_sync_create(&sync);
+ r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv,
+ AMDGPU_SYNC_EQ_OWNER, vm);
+ if (r)
+ goto error_free;
+
while (!list_empty(&vm->freed)) {
mapping = list_first_entry(&vm->freed,
struct amdgpu_bo_va_mapping, list);
list_del(&mapping->list);
- if (vm->pte_support_ats &&
- mapping->start < AMDGPU_GMC_HOLE_START)
- init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
-
- r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false,
- resv, mapping->start,
- mapping->last, init_pte_value,
- 0, NULL, NULL, &f, NULL);
+ r = amdgpu_vm_update_range(adev, vm, false, false, true, false,
+ &sync, mapping->start, mapping->last,
+ 0, 0, 0, NULL, NULL, &f);
amdgpu_vm_free_mapping(adev, vm, mapping, f);
if (r) {
dma_fence_put(f);
- return r;
+ goto error_free;
}
}
@@ -2180,7 +1584,9 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
dma_fence_put(f);
}
- return 0;
+error_free:
+ amdgpu_sync_free(&sync);
+ return r;
}
@@ -2189,6 +1595,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
*
* @adev: amdgpu_device pointer
* @vm: requested vm
+ * @ticket: optional reservation ticket used to reserve the VM
*
* Make sure all BOs which are moved are updated in the PTs.
*
@@ -2198,48 +1605,116 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
* PTs have to be reserved!
*/
int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
- struct amdgpu_vm *vm)
+ struct amdgpu_vm *vm,
+ struct ww_acquire_ctx *ticket)
{
- struct amdgpu_bo_va *bo_va, *tmp;
+ struct amdgpu_bo_va *bo_va;
struct dma_resv *resv;
- bool clear;
+ bool clear, unlock;
int r;
- list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
+ spin_lock(&vm->status_lock);
+ while (!list_empty(&vm->moved)) {
+ bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va,
+ base.vm_status);
+ spin_unlock(&vm->status_lock);
+
/* Per VM BOs never need to bo cleared in the page tables */
- r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r)
return r;
+ spin_lock(&vm->status_lock);
}
- spin_lock(&vm->invalidated_lock);
while (!list_empty(&vm->invalidated)) {
bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
base.vm_status);
resv = bo_va->base.bo->tbo.base.resv;
- spin_unlock(&vm->invalidated_lock);
+ spin_unlock(&vm->status_lock);
/* Try to reserve the BO to avoid clearing its ptes */
- if (!amdgpu_vm_debug && dma_resv_trylock(resv))
+ if (!adev->debug_vm && dma_resv_trylock(resv)) {
+ clear = false;
+ unlock = true;
+ /* The caller is already holding the reservation lock */
+ } else if (ticket && dma_resv_locking_ctx(resv) == ticket) {
clear = false;
+ unlock = false;
/* Somebody else is using the BO right now */
- else
+ } else {
clear = true;
+ unlock = false;
+ }
+
+ r = amdgpu_vm_bo_update(adev, bo_va, clear);
- r = amdgpu_vm_bo_update(adev, bo_va, clear, NULL);
+ if (unlock)
+ dma_resv_unlock(resv);
if (r)
return r;
- if (!clear)
- dma_resv_unlock(resv);
- spin_lock(&vm->invalidated_lock);
+ /* Remember evicted DMABuf imports in compute VMs for later
+ * validation
+ */
+ if (vm->is_compute_context &&
+ drm_gem_is_imported(&bo_va->base.bo->tbo.base) &&
+ (!bo_va->base.bo->tbo.resource ||
+ bo_va->base.bo->tbo.resource->mem_type == TTM_PL_SYSTEM))
+ amdgpu_vm_bo_evicted_user(&bo_va->base);
+
+ spin_lock(&vm->status_lock);
}
- spin_unlock(&vm->invalidated_lock);
+ spin_unlock(&vm->status_lock);
return 0;
}
/**
+ * amdgpu_vm_flush_compute_tlb - Flush TLB on compute VM
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ * @flush_type: flush type
+ * @xcc_mask: mask of XCCs that belong to the compute partition in need of a TLB flush.
+ *
+ * Flush TLB if needed for a compute VM.
+ *
+ * Returns:
+ * 0 for success.
+ */
+int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ uint32_t flush_type,
+ uint32_t xcc_mask)
+{
+ uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
+ bool all_hub = false;
+ int xcc = 0, r = 0;
+
+ WARN_ON_ONCE(!vm->is_compute_context);
+
+ /*
+ * It can be that we race and lose here, but that is extremely unlikely
+ * and the worst thing which could happen is that we flush the changes
+ * into the TLB once more which is harmless.
+ */
+ if (atomic64_xchg(&vm->kfd_last_flushed_seq, tlb_seq) == tlb_seq)
+ return 0;
+
+ if (adev->family == AMDGPU_FAMILY_AI ||
+ adev->family == AMDGPU_FAMILY_RV)
+ all_hub = true;
+
+ for_each_inst(xcc, xcc_mask) {
+ r = amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, flush_type,
+ all_hub, xcc);
+ if (r)
+ break;
+ }
+ return r;
+}
+
+/**
* amdgpu_vm_bo_add - add a bo to a specific vm
*
* @adev: amdgpu_device pointer
@@ -2267,12 +1742,14 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
amdgpu_vm_bo_base_init(&bo_va->base, vm, bo);
bo_va->ref_count = 1;
+ bo_va->last_pt_update = dma_fence_get_stub();
INIT_LIST_HEAD(&bo_va->valids);
INIT_LIST_HEAD(&bo_va->invalids);
if (!bo)
return bo_va;
+ dma_resv_assert_held(bo->tbo.base.resv);
if (amdgpu_dmabuf_is_xgmi_accessible(adev, bo)) {
bo_va->is_xgmi = true;
/* Power up XGMI if it can be potentially used */
@@ -2303,16 +1780,46 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
list_add(&mapping->list, &bo_va->invalids);
amdgpu_vm_it_insert(mapping, &vm->va);
- if (mapping->flags & AMDGPU_PTE_PRT)
+ if (mapping->flags & AMDGPU_VM_PAGE_PRT)
amdgpu_vm_prt_get(adev);
- if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
- !bo_va->base.moved) {
- list_move(&bo_va->base.vm_status, &vm->moved);
- }
+ if (amdgpu_vm_is_bo_always_valid(vm, bo) && !bo_va->base.moved)
+ amdgpu_vm_bo_moved(&bo_va->base);
+
trace_amdgpu_vm_bo_map(bo_va, mapping);
}
+/* Validate operation parameters to prevent potential abuse */
+static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev,
+ struct amdgpu_bo *bo,
+ uint64_t saddr,
+ uint64_t offset,
+ uint64_t size)
+{
+ uint64_t tmp, lpfn;
+
+ if (saddr & AMDGPU_GPU_PAGE_MASK
+ || offset & AMDGPU_GPU_PAGE_MASK
+ || size & AMDGPU_GPU_PAGE_MASK)
+ return -EINVAL;
+
+ if (check_add_overflow(saddr, size, &tmp)
+ || check_add_overflow(offset, size, &tmp)
+ || size == 0 /* which also leads to end < begin */)
+ return -EINVAL;
+
+ /* make sure object fit at this offset */
+ if (bo && offset + size > amdgpu_bo_size(bo))
+ return -EINVAL;
+
+ /* Ensure last pfn not exceed max_pfn */
+ lpfn = (saddr + size - 1) >> AMDGPU_GPU_PAGE_SHIFT;
+ if (lpfn >= adev->vm_manager.max_pfn)
+ return -EINVAL;
+
+ return 0;
+}
+
/**
* amdgpu_vm_bo_map - map bo inside a vm
*
@@ -2333,27 +1840,20 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
int amdgpu_vm_bo_map(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
uint64_t saddr, uint64_t offset,
- uint64_t size, uint64_t flags)
+ uint64_t size, uint32_t flags)
{
struct amdgpu_bo_va_mapping *mapping, *tmp;
struct amdgpu_bo *bo = bo_va->base.bo;
struct amdgpu_vm *vm = bo_va->base.vm;
uint64_t eaddr;
+ int r;
- /* validate the parameters */
- if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK ||
- size == 0 || size & ~PAGE_MASK)
- return -EINVAL;
-
- /* make sure object fit at this offset */
- eaddr = saddr + size - 1;
- if (saddr >= eaddr ||
- (bo && offset + size > amdgpu_bo_size(bo)) ||
- (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
- return -EINVAL;
+ r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size);
+ if (r)
+ return r;
saddr /= AMDGPU_GPU_PAGE_SIZE;
- eaddr /= AMDGPU_GPU_PAGE_SIZE;
+ eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
if (tmp) {
@@ -2399,24 +1899,16 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
uint64_t saddr, uint64_t offset,
- uint64_t size, uint64_t flags)
+ uint64_t size, uint32_t flags)
{
struct amdgpu_bo_va_mapping *mapping;
struct amdgpu_bo *bo = bo_va->base.bo;
uint64_t eaddr;
int r;
- /* validate the parameters */
- if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK ||
- size == 0 || size & ~PAGE_MASK)
- return -EINVAL;
-
- /* make sure object fit at this offset */
- eaddr = saddr + size - 1;
- if (saddr >= eaddr ||
- (bo && offset + size > amdgpu_bo_size(bo)) ||
- (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
- return -EINVAL;
+ r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size);
+ if (r)
+ return r;
/* Allocate all the needed memory */
mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
@@ -2430,7 +1922,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
}
saddr /= AMDGPU_GPU_PAGE_SIZE;
- eaddr /= AMDGPU_GPU_PAGE_SIZE;
+ eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
mapping->start = saddr;
mapping->last = eaddr;
@@ -2463,6 +1955,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping;
struct amdgpu_vm *vm = bo_va->base.vm;
bool valid = true;
+ int r;
saddr /= AMDGPU_GPU_PAGE_SIZE;
@@ -2483,6 +1976,17 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
return -ENOENT;
}
+ /* It's unlikely to happen that the mapping userq hasn't been idled
+ * during user requests GEM unmap IOCTL except for forcing the unmap
+ * from user space.
+ */
+ if (unlikely(atomic_read(&bo_va->userq_va_mapped) > 0)) {
+ r = amdgpu_userq_gem_va_unmap_validate(adev, mapping, saddr);
+ if (unlikely(r == -EBUSY))
+ dev_warn_once(adev->dev,
+ "Attempt to unmap an active userq buffer\n");
+ }
+
list_del(&mapping->list);
amdgpu_vm_it_remove(mapping, &vm->va);
mapping->bo_va = NULL;
@@ -2517,10 +2021,14 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *before, *after, *tmp, *next;
LIST_HEAD(removed);
uint64_t eaddr;
+ int r;
+
+ r = amdgpu_vm_verify_parameters(adev, NULL, saddr, 0, size);
+ if (r)
+ return r;
- eaddr = saddr + size - 1;
saddr /= AMDGPU_GPU_PAGE_SIZE;
- eaddr /= AMDGPU_GPU_PAGE_SIZE;
+ eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
/* Allocate all the needed memory */
before = kzalloc(sizeof(*before), GFP_KERNEL);
@@ -2582,18 +2090,30 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
/* Insert partial mapping before the range */
if (!list_empty(&before->list)) {
+ struct amdgpu_bo *bo = before->bo_va->base.bo;
+
amdgpu_vm_it_insert(before, &vm->va);
- if (before->flags & AMDGPU_PTE_PRT)
+ if (before->flags & AMDGPU_VM_PAGE_PRT)
amdgpu_vm_prt_get(adev);
+
+ if (amdgpu_vm_is_bo_always_valid(vm, bo) &&
+ !before->bo_va->base.moved)
+ amdgpu_vm_bo_moved(&before->bo_va->base);
} else {
kfree(before);
}
/* Insert partial mapping after the range */
if (!list_empty(&after->list)) {
+ struct amdgpu_bo *bo = after->bo_va->base.bo;
+
amdgpu_vm_it_insert(after, &vm->va);
- if (after->flags & AMDGPU_PTE_PRT)
+ if (after->flags & AMDGPU_VM_PAGE_PRT)
amdgpu_vm_prt_get(adev);
+
+ if (amdgpu_vm_is_bo_always_valid(vm, bo) &&
+ !after->bo_va->base.moved)
+ amdgpu_vm_bo_moved(&after->bo_va->base);
} else {
kfree(after);
}
@@ -2650,7 +2170,7 @@ void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket)
}
/**
- * amdgpu_vm_bo_rmv - remove a bo to a specific vm
+ * amdgpu_vm_bo_del - remove a bo from a specific vm
*
* @adev: amdgpu_device pointer
* @bo_va: requested bo_va
@@ -2659,7 +2179,7 @@ void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket)
*
* Object have to be reserved!
*/
-void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
+void amdgpu_vm_bo_del(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va)
{
struct amdgpu_bo_va_mapping *mapping, *next;
@@ -2667,23 +2187,27 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
struct amdgpu_vm *vm = bo_va->base.vm;
struct amdgpu_vm_bo_base **base;
+ dma_resv_assert_held(vm->root.bo->tbo.base.resv);
+
if (bo) {
- if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv)
- vm->bulk_moveable = false;
+ dma_resv_assert_held(bo->tbo.base.resv);
+ if (amdgpu_vm_is_bo_always_valid(vm, bo))
+ ttm_bo_set_bulk_move(&bo->tbo, NULL);
for (base = &bo_va->base.bo->vm_bo; *base;
base = &(*base)->next) {
if (*base != &bo_va->base)
continue;
+ amdgpu_vm_update_stats(*base, bo->tbo.resource, -1);
*base = bo_va->base.next;
break;
}
}
- spin_lock(&vm->invalidated_lock);
+ spin_lock(&vm->status_lock);
list_del(&bo_va->base.vm_status);
- spin_unlock(&vm->invalidated_lock);
+ spin_unlock(&vm->status_lock);
list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
list_del(&mapping->list);
@@ -2723,7 +2247,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
return true;
/* Don't evict VM page tables while they are busy */
- if (!dma_resv_test_signaled(bo->tbo.base.resv, true))
+ if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP))
return false;
/* Try to block ongoing updates */
@@ -2744,25 +2268,19 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
/**
* amdgpu_vm_bo_invalidate - mark the bo as invalid
*
- * @adev: amdgpu_device pointer
* @bo: amdgpu buffer object
* @evicted: is the BO evicted
*
* Mark @bo as invalid.
*/
-void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
- struct amdgpu_bo *bo, bool evicted)
+void amdgpu_vm_bo_invalidate(struct amdgpu_bo *bo, bool evicted)
{
struct amdgpu_vm_bo_base *bo_base;
- /* shadow bo doesn't have bo base, its validation needs its parent */
- if (bo->parent && (amdgpu_bo_shadowed(bo->parent) == bo))
- bo = bo->parent;
-
for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
struct amdgpu_vm *vm = bo_base->vm;
- if (evicted && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) {
+ if (evicted && amdgpu_vm_is_bo_always_valid(vm, bo)) {
amdgpu_vm_bo_evicted(bo_base);
continue;
}
@@ -2773,7 +2291,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
if (bo->tbo.type == ttm_bo_type_kernel)
amdgpu_vm_bo_relocated(bo_base);
- else if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv)
+ else if (amdgpu_vm_is_bo_always_valid(vm, bo))
amdgpu_vm_bo_moved(bo_base);
else
amdgpu_vm_bo_invalidated(bo_base);
@@ -2781,6 +2299,32 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
}
/**
+ * amdgpu_vm_bo_move - handle BO move
+ *
+ * @bo: amdgpu buffer object
+ * @new_mem: the new placement of the BO move
+ * @evicted: is the BO evicted
+ *
+ * Update the memory stats for the new placement and mark @bo as invalid.
+ */
+void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem,
+ bool evicted)
+{
+ struct amdgpu_vm_bo_base *bo_base;
+
+ for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
+ struct amdgpu_vm *vm = bo_base->vm;
+
+ spin_lock(&vm->status_lock);
+ amdgpu_vm_update_stats_locked(bo_base, bo->tbo.resource, -1);
+ amdgpu_vm_update_stats_locked(bo_base, new_mem, +1);
+ spin_unlock(&vm->status_lock);
+ }
+
+ amdgpu_vm_bo_invalidate(bo, evicted);
+}
+
+/**
* amdgpu_vm_get_block_size - calculate VM page table size as power of two
*
* @vm_size: VM size
@@ -2850,7 +2394,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
phys_ram_gb = ((uint64_t)si.totalram * si.mem_unit +
(1 << 30) - 1) >> 30;
vm_size = roundup_pow_of_two(
- min(max(phys_ram_gb * 3, min_vm_size), max_size));
+ clamp(phys_ram_gb * 3, min_vm_size, max_size));
}
adev->vm_manager.max_pfn = (uint64_t)vm_size << 18;
@@ -2859,7 +2403,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
if (amdgpu_vm_block_size != -1)
tmp >>= amdgpu_vm_block_size - 9;
tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1;
- adev->vm_manager.num_level = min(max_level, (unsigned)tmp);
+ adev->vm_manager.num_level = min_t(unsigned int, max_level, tmp);
switch (adev->vm_manager.num_level) {
case 3:
adev->vm_manager.root_level = AMDGPU_VM_PDB2;
@@ -2889,10 +2433,11 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
else
adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
- DRM_INFO("vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n",
- vm_size, adev->vm_manager.num_level + 1,
- adev->vm_manager.block_size,
- adev->vm_manager.fragment_size);
+ dev_info(
+ adev->dev,
+ "vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n",
+ vm_size, adev->vm_manager.num_level + 1,
+ adev->vm_manager.block_size, adev->vm_manager.fragment_size);
}
/**
@@ -2903,12 +2448,114 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
*/
long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
{
- timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv, true,
- true, timeout);
+ timeout = drm_sched_entity_flush(&vm->immediate, timeout);
if (timeout <= 0)
return timeout;
- return dma_fence_wait_timeout(vm->last_unlocked, true, timeout);
+ return drm_sched_entity_flush(&vm->delayed, timeout);
+}
+
+static void amdgpu_vm_destroy_task_info(struct kref *kref)
+{
+ struct amdgpu_task_info *ti = container_of(kref, struct amdgpu_task_info, refcount);
+
+ kfree(ti);
+}
+
+static inline struct amdgpu_vm *
+amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid)
+{
+ struct amdgpu_vm *vm;
+ unsigned long flags;
+
+ xa_lock_irqsave(&adev->vm_manager.pasids, flags);
+ vm = xa_load(&adev->vm_manager.pasids, pasid);
+ xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
+
+ return vm;
+}
+
+/**
+ * amdgpu_vm_put_task_info - reference down the vm task_info ptr
+ *
+ * @task_info: task_info struct under discussion.
+ *
+ * frees the vm task_info ptr at the last put
+ */
+void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info)
+{
+ if (task_info)
+ kref_put(&task_info->refcount, amdgpu_vm_destroy_task_info);
+}
+
+/**
+ * amdgpu_vm_get_task_info_vm - Extracts task info for a vm.
+ *
+ * @vm: VM to get info from
+ *
+ * Returns the reference counted task_info structure, which must be
+ * referenced down with amdgpu_vm_put_task_info.
+ */
+struct amdgpu_task_info *
+amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm)
+{
+ struct amdgpu_task_info *ti = NULL;
+
+ if (vm) {
+ ti = vm->task_info;
+ kref_get(&vm->task_info->refcount);
+ }
+
+ return ti;
+}
+
+/**
+ * amdgpu_vm_get_task_info_pasid - Extracts task info for a PASID.
+ *
+ * @adev: drm device pointer
+ * @pasid: PASID identifier for VM
+ *
+ * Returns the reference counted task_info structure, which must be
+ * referenced down with amdgpu_vm_put_task_info.
+ */
+struct amdgpu_task_info *
+amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid)
+{
+ return amdgpu_vm_get_task_info_vm(
+ amdgpu_vm_get_vm_from_pasid(adev, pasid));
+}
+
+static int amdgpu_vm_create_task_info(struct amdgpu_vm *vm)
+{
+ vm->task_info = kzalloc(sizeof(struct amdgpu_task_info), GFP_KERNEL);
+ if (!vm->task_info)
+ return -ENOMEM;
+
+ kref_init(&vm->task_info->refcount);
+ return 0;
+}
+
+/**
+ * amdgpu_vm_set_task_info - Sets VMs task info.
+ *
+ * @vm: vm for which to set the info
+ */
+void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
+{
+ if (!vm->task_info)
+ return;
+
+ if (vm->task_info->task.pid == current->pid)
+ return;
+
+ vm->task_info->task.pid = current->pid;
+ get_task_comm(vm->task_info->task.comm, current);
+
+ if (current->group_leader->mm != current->mm)
+ return;
+
+ vm->task_info->tgid = current->group_leader->pid;
+ get_task_comm(vm->task_info->process_name, current->group_leader);
}
/**
@@ -2916,13 +2563,16 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
*
* @adev: amdgpu_device pointer
* @vm: requested vm
+ * @xcp_id: GPU partition selection id
+ * @pasid: the pasid the VM is using on this GPU
*
* Init @vm fields.
*
* Returns:
* 0 for success, error for failure.
*/
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ int32_t xcp_id, uint32_t pasid)
{
struct amdgpu_bo *root_bo;
struct amdgpu_bo_vm *root;
@@ -2932,35 +2582,29 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
vm->reserved_vmid[i] = NULL;
INIT_LIST_HEAD(&vm->evicted);
+ INIT_LIST_HEAD(&vm->evicted_user);
INIT_LIST_HEAD(&vm->relocated);
INIT_LIST_HEAD(&vm->moved);
INIT_LIST_HEAD(&vm->idle);
INIT_LIST_HEAD(&vm->invalidated);
- spin_lock_init(&vm->invalidated_lock);
+ spin_lock_init(&vm->status_lock);
INIT_LIST_HEAD(&vm->freed);
INIT_LIST_HEAD(&vm->done);
+ INIT_KFIFO(vm->faults);
- /* create scheduler entities for page table updates */
- r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL,
- adev->vm_manager.vm_pte_scheds,
- adev->vm_manager.vm_pte_num_scheds, NULL);
+ r = amdgpu_vm_init_entities(adev, vm);
if (r)
return r;
- r = drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL,
- adev->vm_manager.vm_pte_scheds,
- adev->vm_manager.vm_pte_num_scheds, NULL);
- if (r)
- goto error_free_immediate;
+ ttm_lru_bulk_move_init(&vm->lru_bulk_move);
- vm->pte_support_ats = false;
vm->is_compute_context = false;
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
AMDGPU_VM_USE_CPU_FOR_GFX);
- DRM_DEBUG_DRIVER("VM update mode is %s\n",
- vm->use_cpu_for_update ? "CPU" : "SDMA");
+ dev_dbg(adev->dev, "VM update mode is %s\n",
+ vm->use_cpu_for_update ? "CPU" : "SDMA");
WARN_ONCE((vm->use_cpu_for_update &&
!amdgpu_gmc_vram_full_visible(&adev->gmc)),
"CPU update of VM recommended only for large BAR system\n");
@@ -2969,84 +2613,75 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
vm->update_funcs = &amdgpu_vm_cpu_funcs;
else
vm->update_funcs = &amdgpu_vm_sdma_funcs;
- vm->last_update = NULL;
+
+ vm->last_update = dma_fence_get_stub();
vm->last_unlocked = dma_fence_get_stub();
+ vm->last_tlb_flush = dma_fence_get_stub();
+ vm->generation = amdgpu_vm_generation(adev, NULL);
mutex_init(&vm->eviction_lock);
vm->evicting = false;
+ vm->tlb_fence_context = dma_fence_context_alloc(1);
r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
- false, &root);
+ false, &root, xcp_id);
if (r)
goto error_free_delayed;
- root_bo = &root->bo;
+
+ root_bo = amdgpu_bo_ref(&root->bo);
r = amdgpu_bo_reserve(root_bo, true);
+ if (r) {
+ amdgpu_bo_unref(&root_bo);
+ goto error_free_delayed;
+ }
+
+ amdgpu_vm_bo_base_init(&vm->root, vm, root_bo);
+ r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1);
if (r)
goto error_free_root;
- r = dma_resv_reserve_shared(root_bo->tbo.base.resv, 1);
+ r = amdgpu_vm_pt_clear(adev, vm, root, false);
if (r)
- goto error_unreserve;
-
- amdgpu_vm_bo_base_init(&vm->root, vm, root_bo);
+ goto error_free_root;
- r = amdgpu_vm_clear_bo(adev, vm, root, false);
+ r = amdgpu_vm_create_task_info(vm);
if (r)
- goto error_unreserve;
-
- amdgpu_bo_unreserve(vm->root.bo);
+ dev_dbg(adev->dev, "Failed to create task info for VM\n");
- INIT_KFIFO(vm->faults);
+ /* Store new PASID in XArray (if non-zero) */
+ if (pasid != 0) {
+ r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm, GFP_KERNEL));
+ if (r < 0)
+ goto error_free_root;
- return 0;
+ vm->pasid = pasid;
+ }
-error_unreserve:
amdgpu_bo_unreserve(vm->root.bo);
+ amdgpu_bo_unref(&root_bo);
+
+ return 0;
error_free_root:
- amdgpu_bo_unref(&root->shadow);
+ /* If PASID was partially set, erase it from XArray before failing */
+ if (vm->pasid != 0) {
+ xa_erase_irq(&adev->vm_manager.pasids, vm->pasid);
+ vm->pasid = 0;
+ }
+ amdgpu_vm_pt_free_root(adev, vm);
+ amdgpu_bo_unreserve(vm->root.bo);
amdgpu_bo_unref(&root_bo);
- vm->root.bo = NULL;
error_free_delayed:
+ dma_fence_put(vm->last_tlb_flush);
dma_fence_put(vm->last_unlocked);
- drm_sched_entity_destroy(&vm->delayed);
-
-error_free_immediate:
- drm_sched_entity_destroy(&vm->immediate);
+ ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move);
+ amdgpu_vm_fini_entities(vm);
return r;
}
/**
- * amdgpu_vm_check_clean_reserved - check if a VM is clean
- *
- * @adev: amdgpu_device pointer
- * @vm: the VM to check
- *
- * check all entries of the root PD, if any subsequent PDs are allocated,
- * it means there are page table creating and filling, and is no a clean
- * VM
- *
- * Returns:
- * 0 if this VM is clean
- */
-static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev,
- struct amdgpu_vm *vm)
-{
- enum amdgpu_vm_level root = adev->vm_manager.root_level;
- unsigned int entries = amdgpu_vm_num_entries(adev, root);
- unsigned int i = 0;
-
- for (i = 0; i < entries; i++) {
- if (to_amdgpu_bo_vm(vm->root.bo)->entries[i].bo)
- return -EINVAL;
- }
-
- return 0;
-}
-
-/**
* amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
*
* @adev: amdgpu_device pointer
@@ -3067,35 +2702,17 @@ static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev,
*/
int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
{
- bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
int r;
r = amdgpu_bo_reserve(vm->root.bo, true);
if (r)
return r;
- /* Sanity checks */
- r = amdgpu_vm_check_clean_reserved(adev, vm);
- if (r)
- goto unreserve_bo;
-
- /* Check if PD needs to be reinitialized and do it before
- * changing any other state, in case it fails.
- */
- if (pte_support_ats != vm->pte_support_ats) {
- vm->pte_support_ats = pte_support_ats;
- r = amdgpu_vm_clear_bo(adev, vm,
- to_amdgpu_bo_vm(vm->root.bo),
- false);
- if (r)
- goto unreserve_bo;
- }
-
/* Update VM state */
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
AMDGPU_VM_USE_CPU_FOR_COMPUTE);
- DRM_DEBUG_DRIVER("VM update mode is %s\n",
- vm->use_cpu_for_update ? "CPU" : "SDMA");
+ dev_dbg(adev->dev, "VM update mode is %s\n",
+ vm->use_cpu_for_update ? "CPU" : "SDMA");
WARN_ONCE((vm->use_cpu_for_update &&
!amdgpu_gmc_vram_full_visible(&adev->gmc)),
"CPU update of VM recommended only for large BAR system\n");
@@ -3108,35 +2725,31 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
goto unreserve_bo;
vm->update_funcs = &amdgpu_vm_cpu_funcs;
+ r = amdgpu_vm_pt_map_tables(adev, vm);
+ if (r)
+ goto unreserve_bo;
+
} else {
vm->update_funcs = &amdgpu_vm_sdma_funcs;
}
+
dma_fence_put(vm->last_update);
- vm->last_update = NULL;
+ vm->last_update = dma_fence_get_stub();
vm->is_compute_context = true;
- /* Free the shadow bo for compute VM */
- amdgpu_bo_unref(&to_amdgpu_bo_vm(vm->root.bo)->shadow);
-
- goto unreserve_bo;
-
unreserve_bo:
amdgpu_bo_unreserve(vm->root.bo);
return r;
}
-/**
- * amdgpu_vm_release_compute - release a compute vm
- * @adev: amdgpu_device pointer
- * @vm: a vm turned into compute vm by calling amdgpu_vm_make_compute
- *
- * This is a correspondant of amdgpu_vm_make_compute. It decouples compute
- * pasid from vm. Compute should stop use of vm after this call.
- */
-void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+static int amdgpu_vm_stats_is_zero(struct amdgpu_vm *vm)
{
- amdgpu_vm_set_pasid(adev, vm, 0);
- vm->is_compute_context = false;
+ for (int i = 0; i < __AMDGPU_PL_NUM; ++i) {
+ if (!(drm_memory_stats_is_zero(&vm->stats[i].drm) &&
+ vm->stats[i].evicted == 0))
+ return false;
+ }
+ return true;
}
/**
@@ -3153,18 +2766,28 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
struct amdgpu_bo_va_mapping *mapping, *tmp;
bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
struct amdgpu_bo *root;
+ unsigned long flags;
int i;
amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
root = amdgpu_bo_ref(vm->root.bo);
amdgpu_bo_reserve(root, true);
- amdgpu_vm_set_pasid(adev, vm, 0);
+ /* Remove PASID mapping before destroying VM */
+ if (vm->pasid != 0) {
+ xa_erase_irq(&adev->vm_manager.pasids, vm->pasid);
+ vm->pasid = 0;
+ }
dma_fence_wait(vm->last_unlocked, false);
dma_fence_put(vm->last_unlocked);
+ dma_fence_wait(vm->last_tlb_flush, false);
+ /* Make sure that all fence callbacks have completed */
+ spin_lock_irqsave(vm->last_tlb_flush->lock, flags);
+ spin_unlock_irqrestore(vm->last_tlb_flush->lock, flags);
+ dma_fence_put(vm->last_tlb_flush);
list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
- if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
+ if (mapping->flags & AMDGPU_VM_PAGE_PRT && prt_fini_needed) {
amdgpu_vm_prt_fini(adev, vm);
prt_fini_needed = false;
}
@@ -3173,13 +2796,12 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
}
- amdgpu_vm_free_pts(adev, vm, NULL);
+ amdgpu_vm_pt_free_root(adev, vm);
amdgpu_bo_unreserve(root);
amdgpu_bo_unref(&root);
WARN_ON(vm->root.bo);
- drm_sched_entity_destroy(&vm->immediate);
- drm_sched_entity_destroy(&vm->delayed);
+ amdgpu_vm_fini_entities(vm);
if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
dev_err(adev->dev, "still active bo inside vm\n");
@@ -3194,8 +2816,22 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
}
dma_fence_put(vm->last_update);
- for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
+
+ for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) {
amdgpu_vmid_free_reserved(adev, vm, i);
+ }
+
+ ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move);
+
+ if (!amdgpu_vm_stats_is_zero(vm)) {
+ struct amdgpu_task_info *ti = vm->task_info;
+
+ dev_warn(adev->dev,
+ "VM memory stats for proc %s(%d) task %s(%d) is non-zero when fini\n",
+ ti->process_name, ti->task.pid, ti->task.comm, ti->tgid);
+ }
+
+ amdgpu_vm_put_task_info(vm->task_info);
}
/**
@@ -3207,8 +2843,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
*/
void amdgpu_vm_manager_init(struct amdgpu_device *adev)
{
- unsigned i;
-
/* Concurrent flushes are only possible starting with Vega10 and
* are broken on Navi10 and Navi14.
*/
@@ -3217,11 +2851,6 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
adev->asic_type == CHIP_NAVI14);
amdgpu_vmid_mgr_init(adev);
- adev->vm_manager.fence_context =
- dma_fence_context_alloc(AMDGPU_MAX_RINGS);
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
- adev->vm_manager.seqno[i] = 0;
-
spin_lock_init(&adev->vm_manager.prt_lock);
atomic_set(&adev->vm_manager.num_prt_users, 0);
@@ -3230,7 +2859,11 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
*/
#ifdef CONFIG_X86_64
if (amdgpu_vm_update_mode == -1) {
- if (amdgpu_gmc_vram_full_visible(&adev->gmc))
+ /* For asic with VF MMIO access protection
+ * avoid using CPU for VM table updates
+ */
+ if (amdgpu_gmc_vram_full_visible(&adev->gmc) &&
+ !amdgpu_sriov_vf_mmio_access_protection(adev))
adev->vm_manager.vm_update_mode =
AMDGPU_VM_USE_CPU_FOR_COMPUTE;
else
@@ -3274,34 +2907,18 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
union drm_amdgpu_vm *args = data;
struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_fpriv *fpriv = filp->driver_priv;
- long timeout = msecs_to_jiffies(2000);
- int r;
+ struct amdgpu_vm *vm = &fpriv->vm;
+
+ /* No valid flags defined yet */
+ if (args->in.flags)
+ return -EINVAL;
switch (args->in.op) {
case AMDGPU_VM_OP_RESERVE_VMID:
/* We only have requirement to reserve vmid from gfxhub */
- r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm,
- AMDGPU_GFXHUB_0);
- if (r)
- return r;
- break;
+ return amdgpu_vmid_alloc_reserved(adev, vm, AMDGPU_GFXHUB(0));
case AMDGPU_VM_OP_UNRESERVE_VMID:
- if (amdgpu_sriov_runtime(adev))
- timeout = 8 * timeout;
-
- /* Wait vm idle to make sure the vmid set in SPM_VMID is
- * not referenced anymore.
- */
- r = amdgpu_bo_reserve(fpriv->vm.root.bo, true);
- if (r)
- return r;
-
- r = amdgpu_vm_wait_idle(&fpriv->vm, timeout);
- if (r < 0)
- return r;
-
- amdgpu_bo_unreserve(fpriv->vm.root.bo);
- amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0);
+ amdgpu_vmid_free_reserved(adev, vm, AMDGPU_GFXHUB(0));
break;
default:
return -EINVAL;
@@ -3311,51 +2928,13 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
}
/**
- * amdgpu_vm_get_task_info - Extracts task info for a PASID.
- *
- * @adev: drm device pointer
- * @pasid: PASID identifier for VM
- * @task_info: task_info to fill.
- */
-void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
- struct amdgpu_task_info *task_info)
-{
- struct amdgpu_vm *vm;
- unsigned long flags;
-
- xa_lock_irqsave(&adev->vm_manager.pasids, flags);
-
- vm = xa_load(&adev->vm_manager.pasids, pasid);
- if (vm)
- *task_info = vm->task_info;
-
- xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
-}
-
-/**
- * amdgpu_vm_set_task_info - Sets VMs task info.
- *
- * @vm: vm for which to set the info
- */
-void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
-{
- if (vm->task_info.pid)
- return;
-
- vm->task_info.pid = current->pid;
- get_task_comm(vm->task_info.task_name, current);
-
- if (current->group_leader->mm != current->mm)
- return;
-
- vm->task_info.tgid = current->group_leader->pid;
- get_task_comm(vm->task_info.process_name, current->group_leader);
-}
-
-/**
* amdgpu_vm_handle_fault - graceful handling of VM faults.
* @adev: amdgpu device pointer
* @pasid: PASID of the VM
+ * @ts: Timestamp of the fault
+ * @vmid: VMID, only used for GFX 9.4.3.
+ * @node_id: Node_id received in IH cookie. Only applicable for
+ * GFX 9.4.3.
* @addr: Address of the fault
* @write_fault: true is write fault, false is read fault
*
@@ -3363,7 +2942,8 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
* shouldn't be reported any more.
*/
bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
- uint64_t addr, bool write_fault)
+ u32 vmid, u32 node_id, uint64_t addr, uint64_t ts,
+ bool write_fault)
{
bool is_compute_context = false;
struct amdgpu_bo *root;
@@ -3387,8 +2967,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
addr /= AMDGPU_GPU_PAGE_SIZE;
- if (is_compute_context &&
- !svm_range_restore_pages(adev, pasid, addr, write_fault)) {
+ if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid,
+ node_id, addr, ts, write_fault)) {
amdgpu_bo_unref(&root);
return true;
}
@@ -3413,8 +2993,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
/* Intentionally setting invalid PTE flag
* combination to force a no-retry-fault
*/
- flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE |
- AMDGPU_PTE_TF;
+ flags = AMDGPU_VM_NORETRY_FLAGS;
value = 0;
} else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
/* Redirect the access to the dummy page */
@@ -3427,15 +3006,14 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
value = 0;
}
- r = dma_resv_reserve_shared(root->tbo.base.resv, 1);
+ r = dma_resv_reserve_fences(root->tbo.base.resv, 1);
if (r) {
pr_debug("failed %d to reserve fence slot\n", r);
goto error_unlock;
}
- r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr,
- addr, flags, value, NULL, NULL, NULL,
- NULL);
+ r = amdgpu_vm_update_range(adev, vm, true, false, false, false,
+ NULL, addr, addr, flags, value, 0, NULL, NULL, NULL);
if (r)
goto error_unlock;
@@ -3444,7 +3022,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
error_unlock:
amdgpu_bo_unreserve(root);
if (r < 0)
- DRM_ERROR("Can't handle page fault (%d)\n", r);
+ dev_err(adev->dev, "Can't handle page fault (%d)\n", r);
error_unref:
amdgpu_bo_unref(&root);
@@ -3478,6 +3056,9 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
unsigned int total_done_objs = 0;
unsigned int id = 0;
+ amdgpu_vm_assert_locked(vm);
+
+ spin_lock(&vm->status_lock);
seq_puts(m, "\tIdle BOs:\n");
list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) {
if (!bo_va->base.bo)
@@ -3515,7 +3096,6 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
id = 0;
seq_puts(m, "\tInvalidated BOs:\n");
- spin_lock(&vm->invalidated_lock);
list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) {
if (!bo_va->base.bo)
continue;
@@ -3530,7 +3110,7 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
continue;
total_done += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
}
- spin_unlock(&vm->invalidated_lock);
+ spin_unlock(&vm->status_lock);
total_done_objs = id;
seq_printf(m, "\tTotal idle size: %12lld\tobjs:\t%d\n", total_idle,
@@ -3547,3 +3127,83 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
total_done_objs);
}
#endif
+
+/**
+ * amdgpu_vm_update_fault_cache - update cached fault into.
+ * @adev: amdgpu device pointer
+ * @pasid: PASID of the VM
+ * @addr: Address of the fault
+ * @status: GPUVM fault status register
+ * @vmhub: which vmhub got the fault
+ *
+ * Cache the fault info for later use by userspace in debugging.
+ */
+void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
+ unsigned int pasid,
+ uint64_t addr,
+ uint32_t status,
+ unsigned int vmhub)
+{
+ struct amdgpu_vm *vm;
+ unsigned long flags;
+
+ xa_lock_irqsave(&adev->vm_manager.pasids, flags);
+
+ vm = xa_load(&adev->vm_manager.pasids, pasid);
+ /* Don't update the fault cache if status is 0. In the multiple
+ * fault case, subsequent faults will return a 0 status which is
+ * useless for userspace and replaces the useful fault status, so
+ * only update if status is non-0.
+ */
+ if (vm && status) {
+ vm->fault_info.addr = addr;
+ vm->fault_info.status = status;
+ /*
+ * Update the fault information globally for later usage
+ * when vm could be stale or freed.
+ */
+ adev->vm_manager.fault_info.addr = addr;
+ adev->vm_manager.fault_info.vmhub = vmhub;
+ adev->vm_manager.fault_info.status = status;
+
+ if (AMDGPU_IS_GFXHUB(vmhub)) {
+ vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_GFX;
+ vm->fault_info.vmhub |=
+ (vmhub - AMDGPU_GFXHUB_START) << AMDGPU_VMHUB_IDX_SHIFT;
+ } else if (AMDGPU_IS_MMHUB0(vmhub)) {
+ vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM0;
+ vm->fault_info.vmhub |=
+ (vmhub - AMDGPU_MMHUB0_START) << AMDGPU_VMHUB_IDX_SHIFT;
+ } else if (AMDGPU_IS_MMHUB1(vmhub)) {
+ vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM1;
+ vm->fault_info.vmhub |=
+ (vmhub - AMDGPU_MMHUB1_START) << AMDGPU_VMHUB_IDX_SHIFT;
+ } else {
+ WARN_ONCE(1, "Invalid vmhub %u\n", vmhub);
+ }
+ }
+ xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
+}
+
+/**
+ * amdgpu_vm_is_bo_always_valid - check if the BO is VM always valid
+ *
+ * @vm: VM to test against.
+ * @bo: BO to be tested.
+ *
+ * Returns true if the BO shares the dma_resv object with the root PD and is
+ * always guaranteed to be valid inside the VM.
+ */
+bool amdgpu_vm_is_bo_always_valid(struct amdgpu_vm *vm, struct amdgpu_bo *bo)
+{
+ return bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv;
+}
+
+void amdgpu_vm_print_task_info(struct amdgpu_device *adev,
+ struct amdgpu_task_info *task_info)
+{
+ dev_err(adev->dev,
+ " Process %s pid %d thread %s pid %d\n",
+ task_info->process_name, task_info->tgid,
+ task_info->task.comm, task_info->task.pid);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 85fcfb8c5efd..15d757c016cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -29,12 +29,15 @@
#include <linux/rbtree.h>
#include <drm/gpu_scheduler.h>
#include <drm/drm_file.h>
-#include <drm/ttm/ttm_bo_driver.h>
+#include <drm/ttm/ttm_bo.h>
#include <linux/sched/mm.h>
#include "amdgpu_sync.h"
#include "amdgpu_ring.h"
#include "amdgpu_ids.h"
+#include "amdgpu_ttm.h"
+
+struct drm_exec;
struct amdgpu_bo_va;
struct amdgpu_job;
@@ -83,10 +86,19 @@ struct amdgpu_bo_vm;
/* PDE Block Fragment Size for VEGA10 */
#define AMDGPU_PDE_BFS(a) ((uint64_t)a << 59)
+/* Flag combination to set no-retry with TF disabled */
+#define AMDGPU_VM_NORETRY_FLAGS (AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE | \
+ AMDGPU_PTE_TF)
+/* Flag combination to set no-retry with TF enabled */
+#define AMDGPU_VM_NORETRY_FLAGS_TF (AMDGPU_PTE_VALID | AMDGPU_PTE_SYSTEM | \
+ AMDGPU_PTE_PRT)
/* For GFX9 */
-#define AMDGPU_PTE_MTYPE_VG10(a) ((uint64_t)(a) << 57)
-#define AMDGPU_PTE_MTYPE_VG10_MASK AMDGPU_PTE_MTYPE_VG10(3ULL)
+#define AMDGPU_PTE_MTYPE_VG10_SHIFT(mtype) ((uint64_t)(mtype) << 57)
+#define AMDGPU_PTE_MTYPE_VG10_MASK AMDGPU_PTE_MTYPE_VG10_SHIFT(3ULL)
+#define AMDGPU_PTE_MTYPE_VG10(flags, mtype) \
+ (((uint64_t)(flags) & (~AMDGPU_PTE_MTYPE_VG10_MASK)) | \
+ AMDGPU_PTE_MTYPE_VG10_SHIFT(mtype))
#define AMDGPU_MTYPE_NC 0
#define AMDGPU_MTYPE_CC 2
@@ -99,28 +111,74 @@ struct amdgpu_bo_vm;
| AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_CC))
/* gfx10 */
-#define AMDGPU_PTE_MTYPE_NV10(a) ((uint64_t)(a) << 48)
-#define AMDGPU_PTE_MTYPE_NV10_MASK AMDGPU_PTE_MTYPE_NV10(7ULL)
+#define AMDGPU_PTE_MTYPE_NV10_SHIFT(mtype) ((uint64_t)(mtype) << 48)
+#define AMDGPU_PTE_MTYPE_NV10_MASK AMDGPU_PTE_MTYPE_NV10_SHIFT(7ULL)
+#define AMDGPU_PTE_MTYPE_NV10(flags, mtype) \
+ (((uint64_t)(flags) & (~AMDGPU_PTE_MTYPE_NV10_MASK)) | \
+ AMDGPU_PTE_MTYPE_NV10_SHIFT(mtype))
+
+/* gfx12 */
+#define AMDGPU_PTE_PRT_GFX12 (1ULL << 56)
+#define AMDGPU_PTE_PRT_FLAG(adev) \
+ ((amdgpu_ip_version((adev), GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) ? AMDGPU_PTE_PRT_GFX12 : AMDGPU_PTE_PRT)
+
+#define AMDGPU_PTE_MTYPE_GFX12_SHIFT(mtype) ((uint64_t)(mtype) << 54)
+#define AMDGPU_PTE_MTYPE_GFX12_MASK AMDGPU_PTE_MTYPE_GFX12_SHIFT(3ULL)
+#define AMDGPU_PTE_MTYPE_GFX12(flags, mtype) \
+ (((uint64_t)(flags) & (~AMDGPU_PTE_MTYPE_GFX12_MASK)) | \
+ AMDGPU_PTE_MTYPE_GFX12_SHIFT(mtype))
+
+#define AMDGPU_PTE_DCC (1ULL << 58)
+#define AMDGPU_PTE_IS_PTE (1ULL << 63)
+
+/* PDE Block Fragment Size for gfx v12 */
+#define AMDGPU_PDE_BFS_GFX12(a) ((uint64_t)((a) & 0x1fULL) << 58)
+#define AMDGPU_PDE_BFS_FLAG(adev, a) \
+ ((amdgpu_ip_version((adev), GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) ? AMDGPU_PDE_BFS_GFX12(a) : AMDGPU_PDE_BFS(a))
+/* PDE is handled as PTE for gfx v12 */
+#define AMDGPU_PDE_PTE_GFX12 (1ULL << 63)
+#define AMDGPU_PDE_PTE_FLAG(adev) \
+ ((amdgpu_ip_version((adev), GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) ? AMDGPU_PDE_PTE_GFX12 : AMDGPU_PDE_PTE)
/* How to program VM fault handling */
#define AMDGPU_VM_FAULT_STOP_NEVER 0
#define AMDGPU_VM_FAULT_STOP_FIRST 1
#define AMDGPU_VM_FAULT_STOP_ALWAYS 2
-/* Reserve 4MB VRAM for page tables */
+/* How much VRAM be reserved for page tables */
#define AMDGPU_VM_RESERVED_VRAM (8ULL << 20)
-/* max number of VMHUB */
-#define AMDGPU_MAX_VMHUBS 3
-#define AMDGPU_GFXHUB_0 0
-#define AMDGPU_MMHUB_0 1
-#define AMDGPU_MMHUB_1 2
-
-/* Reserve 2MB at top/bottom of address space for kernel use */
-#define AMDGPU_VA_RESERVED_SIZE (2ULL << 20)
-
-/* max vmids dedicated for process */
-#define AMDGPU_VM_MAX_RESERVED_VMID 1
+/*
+ * max number of VMHUB
+ * layout: max 8 GFXHUB + 4 MMHUB0 + 1 MMHUB1
+ */
+#define AMDGPU_MAX_VMHUBS 13
+#define AMDGPU_GFXHUB_START 0
+#define AMDGPU_MMHUB0_START 8
+#define AMDGPU_MMHUB1_START 12
+#define AMDGPU_GFXHUB(x) (AMDGPU_GFXHUB_START + (x))
+#define AMDGPU_MMHUB0(x) (AMDGPU_MMHUB0_START + (x))
+#define AMDGPU_MMHUB1(x) (AMDGPU_MMHUB1_START + (x))
+
+#define AMDGPU_IS_GFXHUB(x) ((x) >= AMDGPU_GFXHUB_START && (x) < AMDGPU_MMHUB0_START)
+#define AMDGPU_IS_MMHUB0(x) ((x) >= AMDGPU_MMHUB0_START && (x) < AMDGPU_MMHUB1_START)
+#define AMDGPU_IS_MMHUB1(x) ((x) >= AMDGPU_MMHUB1_START && (x) < AMDGPU_MAX_VMHUBS)
+
+/* Reserve space at top/bottom of address space for kernel use */
+#define AMDGPU_VA_RESERVED_CSA_SIZE (2ULL << 20)
+#define AMDGPU_VA_RESERVED_CSA_START(adev) (((adev)->vm_manager.max_pfn \
+ << AMDGPU_GPU_PAGE_SHIFT) \
+ - AMDGPU_VA_RESERVED_CSA_SIZE)
+#define AMDGPU_VA_RESERVED_SEQ64_SIZE (2ULL << 20)
+#define AMDGPU_VA_RESERVED_SEQ64_START(adev) (AMDGPU_VA_RESERVED_CSA_START(adev) \
+ - AMDGPU_VA_RESERVED_SEQ64_SIZE)
+#define AMDGPU_VA_RESERVED_TRAP_SIZE (2ULL << 12)
+#define AMDGPU_VA_RESERVED_TRAP_START(adev) (AMDGPU_VA_RESERVED_SEQ64_START(adev) \
+ - AMDGPU_VA_RESERVED_TRAP_SIZE)
+#define AMDGPU_VA_RESERVED_BOTTOM (1ULL << 16)
+#define AMDGPU_VA_RESERVED_TOP (AMDGPU_VA_RESERVED_TRAP_SIZE + \
+ AMDGPU_VA_RESERVED_SEQ64_SIZE + \
+ AMDGPU_VA_RESERVED_CSA_SIZE)
/* See vm_update_mode */
#define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)
@@ -145,9 +203,13 @@ struct amdgpu_vm_bo_base {
/* protected by bo being reserved */
struct amdgpu_vm_bo_base *next;
- /* protected by spinlock */
+ /* protected by vm status_lock */
struct list_head vm_status;
+ /* if the bo is counted as shared in mem stats
+ * protected by vm status_lock */
+ bool shared;
+
/* protected by the BO being reserved */
bool moved;
};
@@ -174,10 +236,10 @@ struct amdgpu_vm_pte_funcs {
};
struct amdgpu_task_info {
- char process_name[TASK_COMM_LEN];
- char task_name[TASK_COMM_LEN];
- pid_t pid;
- pid_t tgid;
+ struct drm_wedge_task_info task;
+ char process_name[TASK_COMM_LEN];
+ pid_t tgid;
+ struct kref refcount;
};
/**
@@ -227,15 +289,26 @@ struct amdgpu_vm_update_params {
unsigned int num_dw_left;
/**
- * @table_freed: return true if page table is freed when updating
+ * @needs_flush: true whenever we need to invalidate the TLB
+ */
+ bool needs_flush;
+
+ /**
+ * @allow_override: true for memory that is not uncached: allows MTYPE
+ * to be overridden for NUMA local memory.
*/
- bool table_freed;
+ bool allow_override;
+
+ /**
+ * @tlb_flush_waitlist: temporary storage for BOs until tlb_flush
+ */
+ struct list_head tlb_flush_waitlist;
};
struct amdgpu_vm_update_funcs {
int (*map_table)(struct amdgpu_bo_vm *bo);
- int (*prepare)(struct amdgpu_vm_update_params *p, struct dma_resv *resv,
- enum amdgpu_sync_mode sync_mode);
+ int (*prepare)(struct amdgpu_vm_update_params *p,
+ struct amdgpu_sync *sync, u64 k_job_id);
int (*update)(struct amdgpu_vm_update_params *p,
struct amdgpu_bo_vm *bo, uint64_t pe, uint64_t addr,
unsigned count, uint32_t incr, uint64_t flags);
@@ -243,6 +316,22 @@ struct amdgpu_vm_update_funcs {
struct dma_fence **fence);
};
+struct amdgpu_vm_fault_info {
+ /* fault address */
+ uint64_t addr;
+ /* fault status register */
+ uint32_t status;
+ /* which vmhub? gfxhub, mmhub, etc. */
+ unsigned int vmhub;
+};
+
+struct amdgpu_mem_stats {
+ struct drm_memory_stats drm;
+
+ /* buffers that requested this placement but are currently evicted */
+ uint64_t evicted;
+};
+
struct amdgpu_vm {
/* tree of virtual addresses mapped */
struct rb_root_cached va;
@@ -254,7 +343,20 @@ struct amdgpu_vm {
bool evicting;
unsigned int saved_flags;
- /* BOs who needs a validation */
+ /* Lock to protect vm_bo add/del/move on all lists of vm */
+ spinlock_t status_lock;
+
+ /* Memory statistics for this vm, protected by status_lock */
+ struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM];
+
+ /*
+ * The following lists contain amdgpu_vm_bo_base objects for either
+ * PDs, PTs or per VM BOs. The state transits are:
+ *
+ * evicted -> relocated (PDs, PTs) or moved (per VM BOs) -> idle
+ */
+
+ /* Per-VM and PT BOs who needs a validation */
struct list_head evicted;
/* PT BOs which relocated and their parent need an update */
@@ -266,16 +368,29 @@ struct amdgpu_vm {
/* All BOs of this VM not currently in the state machine */
struct list_head idle;
+ /*
+ * The following lists contain amdgpu_vm_bo_base objects for BOs which
+ * have their own dma_resv object and not depend on the root PD. Their
+ * state transits are:
+ *
+ * evicted_user or invalidated -> done
+ */
+
+ /* BOs for user mode queues that need a validation */
+ struct list_head evicted_user;
+
/* regular invalidated BOs, but not yet updated in the PT */
struct list_head invalidated;
- spinlock_t invalidated_lock;
-
- /* BO mappings freed, but not yet updated in the PT */
- struct list_head freed;
/* BOs which are invalidated, has been updated in the PTs */
struct list_head done;
+ /*
+ * This list contains amdgpu_bo_va_mapping objects which have been freed
+ * but not updated in the PTs
+ */
+ struct list_head freed;
+
/* contains the page directory */
struct amdgpu_vm_bo_base root;
struct dma_fence *last_update;
@@ -284,11 +399,19 @@ struct amdgpu_vm {
struct drm_sched_entity immediate;
struct drm_sched_entity delayed;
+ /* Last finished delayed update */
+ atomic64_t tlb_seq;
+ struct dma_fence *last_tlb_flush;
+ atomic64_t kfd_last_flushed_seq;
+ uint64_t tlb_fence_context;
+
+ /* How many times we had to re-generate the page tables */
+ uint64_t generation;
+
/* Last unlocked submission to the scheduler entities */
struct dma_fence *last_unlocked;
unsigned int pasid;
- /* dedicated to vm */
struct amdgpu_vmid *reserved_vmid[AMDGPU_MAX_VMHUBS];
/* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */
@@ -297,9 +420,6 @@ struct amdgpu_vm {
/* Functions to use for VM table updates */
const struct amdgpu_vm_update_funcs *update_funcs;
- /* Flag to indicate ATS support from PTE for GFX9 */
- bool pte_support_ats;
-
/* Up to 128 pending retry page faults */
DECLARE_KFIFO(faults, u64, 128);
@@ -313,14 +433,18 @@ struct amdgpu_vm {
uint64_t pd_phys_addr;
/* Some basic info about the task */
- struct amdgpu_task_info task_info;
+ struct amdgpu_task_info *task_info;
/* Store positions of group of BOs */
struct ttm_lru_bulk_move lru_bulk_move;
- /* mark whether can do the bulk move */
- bool bulk_moveable;
/* Flag to indicate if VM is used for compute */
bool is_compute_context;
+
+ /* Memory partition number, -1 means any partition */
+ int8_t mem_id;
+
+ /* cached fault info */
+ struct amdgpu_vm_fault_info fault_info;
};
struct amdgpu_vm_manager {
@@ -329,10 +453,6 @@ struct amdgpu_vm_manager {
unsigned int first_kfd_vmid;
bool concurrent_flush;
- /* Handling of VM fences */
- u64 fence_context;
- unsigned seqno[AMDGPU_MAX_RINGS];
-
uint64_t max_pfn;
uint32_t num_level;
uint32_t block_size;
@@ -360,6 +480,8 @@ struct amdgpu_vm_manager {
* look up VM of a page fault
*/
struct xarray pasids;
+ /* Global registration of recent page fault information */
+ struct amdgpu_vm_fault_info fault_info;
};
struct amdgpu_bo_va_mapping;
@@ -374,21 +496,20 @@ extern const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs;
void amdgpu_vm_manager_init(struct amdgpu_device *adev);
void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
-int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- u32 pasid);
-
long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id, uint32_t pasid);
int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
-void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
-void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
- struct list_head *validated,
- struct amdgpu_bo_list_entry *entry);
+int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,
+ unsigned int num_fences);
+int amdgpu_vm_lock_done_list(struct amdgpu_vm *vm, struct drm_exec *exec,
+ unsigned int num_fences);
bool amdgpu_vm_ready(struct amdgpu_vm *vm);
-int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- int (*callback)(void *p, struct amdgpu_bo *bo),
- void *param);
+uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct ww_acquire_ctx *ticket,
+ int (*callback)(void *p, struct amdgpu_bo *bo),
+ void *param);
int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
struct amdgpu_vm *vm, bool immediate);
@@ -396,22 +517,31 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct dma_fence **fence);
int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
- struct amdgpu_vm *vm);
-int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
- struct amdgpu_device *bo_adev,
- struct amdgpu_vm *vm, bool immediate,
- bool unlocked, struct dma_resv *resv,
- uint64_t start, uint64_t last,
- uint64_t flags, uint64_t offset,
- struct ttm_resource *res,
- dma_addr_t *pages_addr,
- struct dma_fence **fence, bool *free_table);
+ struct amdgpu_vm *vm,
+ struct ww_acquire_ctx *ticket);
+int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ uint32_t flush_type,
+ uint32_t xcc_mask);
+void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
+ struct amdgpu_vm *vm, struct amdgpu_bo *bo);
+int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ bool immediate, bool unlocked, bool flush_tlb,
+ bool allow_override, struct amdgpu_sync *sync,
+ uint64_t start, uint64_t last, uint64_t flags,
+ uint64_t offset, uint64_t vram_base,
+ struct ttm_resource *res, dma_addr_t *pages_addr,
+ struct dma_fence **fence);
int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
- bool clear, bool *table_freed);
+ bool clear);
bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
-void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
- struct amdgpu_bo *bo, bool evicted);
+void amdgpu_vm_bo_invalidate(struct amdgpu_bo *bo, bool evicted);
+void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base,
+ struct ttm_resource *new_res, int sign);
+void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo);
+void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem,
+ bool evicted);
uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
struct amdgpu_bo *bo);
@@ -421,11 +551,11 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
int amdgpu_vm_bo_map(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
uint64_t addr, uint64_t offset,
- uint64_t size, uint64_t flags);
+ uint64_t size, uint32_t flags);
int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
uint64_t addr, uint64_t offset,
- uint64_t size, uint64_t flags);
+ uint64_t size, uint32_t flags);
int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
uint64_t addr);
@@ -435,7 +565,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
uint64_t addr);
void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket);
-void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
+void amdgpu_vm_bo_del(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va);
void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
uint32_t fragment_size_default, unsigned max_level,
@@ -445,21 +575,117 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
struct amdgpu_job *job);
void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
-void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
- struct amdgpu_task_info *task_info);
+struct amdgpu_task_info *
+amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid);
+
+struct amdgpu_task_info *
+amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm);
+
+void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info);
+
bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
- uint64_t addr, bool write_fault);
+ u32 vmid, u32 node_id, uint64_t addr, uint64_t ts,
+ bool write_fault);
void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
-void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo);
-void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
- uint64_t *gtt_mem, uint64_t *cpu_mem);
+void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
+ struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]);
+
+int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_bo_vm *vmbo, bool immediate);
+int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ int level, bool immediate, struct amdgpu_bo_vm **vmbo,
+ int32_t xcp_id);
+void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+
+int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params,
+ struct amdgpu_vm_bo_base *entry);
+int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
+ uint64_t start, uint64_t end,
+ uint64_t dst, uint64_t flags);
+void amdgpu_vm_pt_free_work(struct work_struct *work);
+void amdgpu_vm_pt_free_list(struct amdgpu_device *adev,
+ struct amdgpu_vm_update_params *params);
#if defined(CONFIG_DEBUG_FS)
void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
#endif
+int amdgpu_vm_pt_map_tables(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+
+bool amdgpu_vm_is_bo_always_valid(struct amdgpu_vm *vm, struct amdgpu_bo *bo);
+
+/**
+ * amdgpu_vm_tlb_seq - return tlb flush sequence number
+ * @vm: the amdgpu_vm structure to query
+ *
+ * Returns the tlb flush sequence number which indicates that the VM TLBs needs
+ * to be invalidated whenever the sequence number change.
+ */
+static inline uint64_t amdgpu_vm_tlb_seq(struct amdgpu_vm *vm)
+{
+ unsigned long flags;
+ spinlock_t *lock;
+
+ /*
+ * Workaround to stop racing between the fence signaling and handling
+ * the cb. The lock is static after initially setting it up, just make
+ * sure that the dma_fence structure isn't freed up.
+ */
+ rcu_read_lock();
+ lock = vm->last_tlb_flush->lock;
+ rcu_read_unlock();
+
+ spin_lock_irqsave(lock, flags);
+ spin_unlock_irqrestore(lock, flags);
+
+ return atomic64_read(&vm->tlb_seq);
+}
+
+/*
+ * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
+ * happens while holding this lock anywhere to prevent deadlocks when
+ * an MMU notifier runs in reclaim-FS context.
+ */
+static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm)
+{
+ mutex_lock(&vm->eviction_lock);
+ vm->saved_flags = memalloc_noreclaim_save();
+}
+
+static inline bool amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm)
+{
+ if (mutex_trylock(&vm->eviction_lock)) {
+ vm->saved_flags = memalloc_noreclaim_save();
+ return true;
+ }
+ return false;
+}
+
+static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
+{
+ memalloc_noreclaim_restore(vm->saved_flags);
+ mutex_unlock(&vm->eviction_lock);
+}
+
+void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
+ unsigned int pasid,
+ uint64_t addr,
+ uint32_t status,
+ unsigned int vmhub);
+void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct dma_fence **fence);
+
+void amdgpu_vm_print_task_info(struct amdgpu_device *adev,
+ struct amdgpu_task_info *task_info);
+
+#define amdgpu_vm_bo_va_for_each_valid_mapping(bo_va, mapping) \
+ list_for_each_entry(mapping, &(bo_va)->valids, list)
+#define amdgpu_vm_bo_va_for_each_invalid_mapping(bo_va, mapping) \
+ list_for_each_entry(mapping, &(bo_va)->invalids, list)
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
index e3fbf0f10add..22e2e5b47341 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
@@ -31,6 +31,7 @@
*/
static int amdgpu_vm_cpu_map_table(struct amdgpu_bo_vm *table)
{
+ table->bo.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
return amdgpu_bo_kmap(&table->bo, NULL);
}
@@ -38,20 +39,20 @@ static int amdgpu_vm_cpu_map_table(struct amdgpu_bo_vm *table)
* amdgpu_vm_cpu_prepare - prepare page table update with the CPU
*
* @p: see amdgpu_vm_update_params definition
- * @resv: reservation object with embedded fence
- * @sync_mode: synchronization mode
+ * @sync: sync obj with fences to wait on
+ * @k_job_id: the id for tracing/debug purposes
*
* Returns:
* Negativ errno, 0 for success.
*/
static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p,
- struct dma_resv *resv,
- enum amdgpu_sync_mode sync_mode)
+ struct amdgpu_sync *sync,
+ u64 k_job_id)
{
- if (!resv)
+ if (!sync)
return 0;
- return amdgpu_bo_sync_wait_resv(p->adev, resv, sync_mode, p->vm, true);
+ return amdgpu_sync_wait(sync, true);
}
/**
@@ -74,13 +75,12 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p,
{
unsigned int i;
uint64_t value;
- int r;
+ long r;
- if (vmbo->bo.tbo.moving) {
- r = dma_fence_wait(vmbo->bo.tbo.moving, true);
- if (r)
- return r;
- }
+ r = dma_resv_wait_timeout(vmbo->bo.tbo.base.resv, DMA_RESV_USAGE_KERNEL,
+ true, MAX_SCHEDULE_TIMEOUT);
+ if (r < 0)
+ return r;
pe += (unsigned long)amdgpu_bo_kptr(&vmbo->bo);
@@ -108,7 +108,9 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p,
static int amdgpu_vm_cpu_commit(struct amdgpu_vm_update_params *p,
struct dma_fence **fence)
{
- /* Flush HDP */
+ if (p->needs_flush)
+ atomic64_inc(&p->vm->tlb_seq);
+
mb();
amdgpu_device_flush_hdp(p->adev, NULL);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
new file mode 100644
index 000000000000..f794fb1cc06e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -0,0 +1,976 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_trace.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_job.h"
+
+/*
+ * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt
+ */
+struct amdgpu_vm_pt_cursor {
+ uint64_t pfn;
+ struct amdgpu_vm_bo_base *parent;
+ struct amdgpu_vm_bo_base *entry;
+ unsigned int level;
+};
+
+/**
+ * amdgpu_vm_pt_level_shift - return the addr shift for each level
+ *
+ * @adev: amdgpu_device pointer
+ * @level: VMPT level
+ *
+ * Returns:
+ * The number of bits the pfn needs to be right shifted for a level.
+ */
+static unsigned int amdgpu_vm_pt_level_shift(struct amdgpu_device *adev,
+ unsigned int level)
+{
+ switch (level) {
+ case AMDGPU_VM_PDB2:
+ case AMDGPU_VM_PDB1:
+ case AMDGPU_VM_PDB0:
+ return 9 * (AMDGPU_VM_PDB0 - level) +
+ adev->vm_manager.block_size;
+ case AMDGPU_VM_PTB:
+ return 0;
+ default:
+ return ~0;
+ }
+}
+
+/**
+ * amdgpu_vm_pt_num_entries - return the number of entries in a PD/PT
+ *
+ * @adev: amdgpu_device pointer
+ * @level: VMPT level
+ *
+ * Returns:
+ * The number of entries in a page directory or page table.
+ */
+static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev,
+ unsigned int level)
+{
+ unsigned int shift;
+
+ shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level);
+ if (level == adev->vm_manager.root_level)
+ /* For the root directory */
+ return round_up(adev->vm_manager.max_pfn, 1ULL << shift)
+ >> shift;
+ else if (level != AMDGPU_VM_PTB)
+ /* Everything in between */
+ return 512;
+
+ /* For the page tables on the leaves */
+ return AMDGPU_VM_PTE_COUNT(adev);
+}
+
+/**
+ * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT
+ *
+ * @adev: amdgpu_device pointer
+ * @level: VMPT level
+ *
+ * Returns:
+ * The mask to extract the entry number of a PD/PT from an address.
+ */
+static uint32_t amdgpu_vm_pt_entries_mask(struct amdgpu_device *adev,
+ unsigned int level)
+{
+ if (level <= adev->vm_manager.root_level)
+ return 0xffffffff;
+ else if (level != AMDGPU_VM_PTB)
+ return 0x1ff;
+ else
+ return AMDGPU_VM_PTE_COUNT(adev) - 1;
+}
+
+/**
+ * amdgpu_vm_pt_size - returns the size of the page table in bytes
+ *
+ * @adev: amdgpu_device pointer
+ * @level: VMPT level
+ *
+ * Returns:
+ * The size of the BO for a page directory or page table in bytes.
+ */
+static unsigned int amdgpu_vm_pt_size(struct amdgpu_device *adev,
+ unsigned int level)
+{
+ return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_pt_num_entries(adev, level) * 8);
+}
+
+/**
+ * amdgpu_vm_pt_parent - get the parent page directory
+ *
+ * @pt: child page table
+ *
+ * Helper to get the parent entry for the child page table. NULL if we are at
+ * the root page directory.
+ */
+static struct amdgpu_vm_bo_base *
+amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base *pt)
+{
+ struct amdgpu_bo *parent = pt->bo->parent;
+
+ if (!parent)
+ return NULL;
+
+ return parent->vm_bo;
+}
+
+/**
+ * amdgpu_vm_pt_start - start PD/PT walk
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: amdgpu_vm structure
+ * @start: start address of the walk
+ * @cursor: state to initialize
+ *
+ * Initialize a amdgpu_vm_pt_cursor to start a walk.
+ */
+static void amdgpu_vm_pt_start(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm, uint64_t start,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ cursor->pfn = start;
+ cursor->parent = NULL;
+ cursor->entry = &vm->root;
+ cursor->level = adev->vm_manager.root_level;
+}
+
+/**
+ * amdgpu_vm_pt_descendant - go to child node
+ *
+ * @adev: amdgpu_device pointer
+ * @cursor: current state
+ *
+ * Walk to the child node of the current node.
+ * Returns:
+ * True if the walk was possible, false otherwise.
+ */
+static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ unsigned int mask, shift, idx;
+
+ if ((cursor->level == AMDGPU_VM_PTB) || !cursor->entry ||
+ !cursor->entry->bo)
+ return false;
+
+ mask = amdgpu_vm_pt_entries_mask(adev, cursor->level);
+ shift = amdgpu_vm_pt_level_shift(adev, cursor->level);
+
+ ++cursor->level;
+ idx = (cursor->pfn >> shift) & mask;
+ cursor->parent = cursor->entry;
+ cursor->entry = &to_amdgpu_bo_vm(cursor->entry->bo)->entries[idx];
+ return true;
+}
+
+/**
+ * amdgpu_vm_pt_sibling - go to sibling node
+ *
+ * @adev: amdgpu_device pointer
+ * @cursor: current state
+ *
+ * Walk to the sibling node of the current node.
+ * Returns:
+ * True if the walk was possible, false otherwise.
+ */
+static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+
+ unsigned int shift, num_entries;
+ struct amdgpu_bo_vm *parent;
+
+ /* Root doesn't have a sibling */
+ if (!cursor->parent)
+ return false;
+
+ /* Go to our parents and see if we got a sibling */
+ shift = amdgpu_vm_pt_level_shift(adev, cursor->level - 1);
+ num_entries = amdgpu_vm_pt_num_entries(adev, cursor->level - 1);
+ parent = to_amdgpu_bo_vm(cursor->parent->bo);
+
+ if (cursor->entry == &parent->entries[num_entries - 1])
+ return false;
+
+ cursor->pfn += 1ULL << shift;
+ cursor->pfn &= ~((1ULL << shift) - 1);
+ ++cursor->entry;
+ return true;
+}
+
+/**
+ * amdgpu_vm_pt_ancestor - go to parent node
+ *
+ * @cursor: current state
+ *
+ * Walk to the parent node of the current node.
+ * Returns:
+ * True if the walk was possible, false otherwise.
+ */
+static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor)
+{
+ if (!cursor->parent)
+ return false;
+
+ --cursor->level;
+ cursor->entry = cursor->parent;
+ cursor->parent = amdgpu_vm_pt_parent(cursor->parent);
+ return true;
+}
+
+/**
+ * amdgpu_vm_pt_next - get next PD/PT in hieratchy
+ *
+ * @adev: amdgpu_device pointer
+ * @cursor: current state
+ *
+ * Walk the PD/PT tree to the next node.
+ */
+static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ /* First try a newborn child */
+ if (amdgpu_vm_pt_descendant(adev, cursor))
+ return;
+
+ /* If that didn't worked try to find a sibling */
+ while (!amdgpu_vm_pt_sibling(adev, cursor)) {
+ /* No sibling, go to our parents and grandparents */
+ if (!amdgpu_vm_pt_ancestor(cursor)) {
+ cursor->pfn = ~0ll;
+ return;
+ }
+ }
+}
+
+/**
+ * amdgpu_vm_pt_first_dfs - start a deep first search
+ *
+ * @adev: amdgpu_device structure
+ * @vm: amdgpu_vm structure
+ * @start: optional cursor to start with
+ * @cursor: state to initialize
+ *
+ * Starts a deep first traversal of the PD/PT tree.
+ */
+static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_vm_pt_cursor *start,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ if (start)
+ *cursor = *start;
+ else
+ amdgpu_vm_pt_start(adev, vm, 0, cursor);
+
+ while (amdgpu_vm_pt_descendant(adev, cursor))
+ ;
+}
+
+/**
+ * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue
+ *
+ * @start: starting point for the search
+ * @entry: current entry
+ *
+ * Returns:
+ * True when the search should continue, false otherwise.
+ */
+static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start,
+ struct amdgpu_vm_bo_base *entry)
+{
+ return entry && (!start || entry != start->entry);
+}
+
+/**
+ * amdgpu_vm_pt_next_dfs - get the next node for a deep first search
+ *
+ * @adev: amdgpu_device structure
+ * @cursor: current state
+ *
+ * Move the cursor to the next node in a deep first search.
+ */
+static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ if (!cursor->entry)
+ return;
+
+ if (!cursor->parent)
+ cursor->entry = NULL;
+ else if (amdgpu_vm_pt_sibling(adev, cursor))
+ while (amdgpu_vm_pt_descendant(adev, cursor))
+ ;
+ else
+ amdgpu_vm_pt_ancestor(cursor);
+}
+
+/*
+ * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs
+ */
+#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \
+ for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \
+ (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\
+ amdgpu_vm_pt_continue_dfs((start), (entry)); \
+ (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor)))
+
+/**
+ * amdgpu_vm_pt_clear - initially clear the PDs/PTs
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: VM to clear BO from
+ * @vmbo: BO to clear
+ * @immediate: use an immediate update
+ *
+ * Root PD needs to be reserved when calling this.
+ *
+ * Returns:
+ * 0 on success, errno otherwise.
+ */
+int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_bo_vm *vmbo, bool immediate)
+{
+ unsigned int level = adev->vm_manager.root_level;
+ struct ttm_operation_ctx ctx = { true, false };
+ struct amdgpu_vm_update_params params;
+ struct amdgpu_bo *ancestor = &vmbo->bo;
+ unsigned int entries;
+ struct amdgpu_bo *bo = &vmbo->bo;
+ uint64_t addr;
+ int r, idx;
+
+ /* Figure out our place in the hierarchy */
+ if (ancestor->parent) {
+ ++level;
+ while (ancestor->parent->parent) {
+ ++level;
+ ancestor = ancestor->parent;
+ }
+ }
+
+ entries = amdgpu_bo_size(bo) / 8;
+
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (r)
+ return r;
+
+ if (!drm_dev_enter(adev_to_drm(adev), &idx))
+ return -ENODEV;
+
+ r = vm->update_funcs->map_table(vmbo);
+ if (r)
+ goto exit;
+
+ memset(&params, 0, sizeof(params));
+ params.adev = adev;
+ params.vm = vm;
+ params.immediate = immediate;
+
+ r = vm->update_funcs->prepare(&params, NULL,
+ AMDGPU_KERNEL_JOB_ID_VM_PT_CLEAR);
+ if (r)
+ goto exit;
+
+ addr = 0;
+
+ uint64_t value = 0, flags = 0;
+ if (adev->asic_type >= CHIP_VEGA10) {
+ if (level != AMDGPU_VM_PTB) {
+ /* Handle leaf PDEs as PTEs */
+ flags |= AMDGPU_PDE_PTE_FLAG(adev);
+ amdgpu_gmc_get_vm_pde(adev, level,
+ &value, &flags);
+ } else {
+ /* Workaround for fault priority problem on GMC9 */
+ flags = AMDGPU_PTE_EXECUTABLE;
+ }
+ }
+
+ r = vm->update_funcs->update(&params, vmbo, addr, 0, entries,
+ value, flags);
+ if (r)
+ goto exit;
+
+ r = vm->update_funcs->commit(&params, NULL);
+exit:
+ drm_dev_exit(idx);
+ return r;
+}
+
+/**
+ * amdgpu_vm_pt_create - create bo for PD/PT
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requesting vm
+ * @level: the page table level
+ * @immediate: use a immediate update
+ * @vmbo: pointer to the buffer object pointer
+ * @xcp_id: GPU partition id
+ */
+int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ int level, bool immediate, struct amdgpu_bo_vm **vmbo,
+ int32_t xcp_id)
+{
+ struct amdgpu_bo_param bp;
+ unsigned int num_entries;
+
+ memset(&bp, 0, sizeof(bp));
+
+ bp.size = amdgpu_vm_pt_size(adev, level);
+ bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
+
+ if (!adev->gmc.is_app_apu)
+ bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+ else
+ bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+
+ bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain);
+ bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+ AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+
+ if (level < AMDGPU_VM_PTB)
+ num_entries = amdgpu_vm_pt_num_entries(adev, level);
+ else
+ num_entries = 0;
+
+ bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries);
+
+ if (vm->use_cpu_for_update)
+ bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+
+ bp.type = ttm_bo_type_kernel;
+ bp.no_wait_gpu = immediate;
+ bp.xcp_id_plus1 = xcp_id + 1;
+
+ if (vm->root.bo)
+ bp.resv = vm->root.bo->tbo.base.resv;
+
+ return amdgpu_bo_create_vm(adev, &bp, vmbo);
+}
+
+/**
+ * amdgpu_vm_pt_alloc - Allocate a specific page table
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: VM to allocate page tables for
+ * @cursor: Which page table to allocate
+ * @immediate: use an immediate update
+ *
+ * Make sure a specific page table or directory is allocated.
+ *
+ * Returns:
+ * 1 if page table needed to be allocated, 0 if page table was already
+ * allocated, negative errno if an error occurred.
+ */
+static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_vm_pt_cursor *cursor,
+ bool immediate)
+{
+ struct amdgpu_vm_bo_base *entry = cursor->entry;
+ struct amdgpu_bo *pt_bo;
+ struct amdgpu_bo_vm *pt;
+ int r;
+
+ if (entry->bo)
+ return 0;
+
+ amdgpu_vm_eviction_unlock(vm);
+ r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt,
+ vm->root.bo->xcp_id);
+ amdgpu_vm_eviction_lock(vm);
+ if (r)
+ return r;
+
+ /* Keep a reference to the root directory to avoid
+ * freeing them up in the wrong order.
+ */
+ pt_bo = &pt->bo;
+ pt_bo->parent = amdgpu_bo_ref(cursor->parent->bo);
+ amdgpu_vm_bo_base_init(entry, vm, pt_bo);
+ r = amdgpu_vm_pt_clear(adev, vm, pt, immediate);
+ if (r)
+ goto error_free_pt;
+
+ return 0;
+
+error_free_pt:
+ amdgpu_bo_unref(&pt_bo);
+ return r;
+}
+
+/**
+ * amdgpu_vm_pt_free - free one PD/PT
+ *
+ * @entry: PDE to free
+ */
+static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
+{
+ if (!entry->bo)
+ return;
+
+ amdgpu_vm_update_stats(entry, entry->bo->tbo.resource, -1);
+ entry->bo->vm_bo = NULL;
+ ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);
+
+ spin_lock(&entry->vm->status_lock);
+ list_del(&entry->vm_status);
+ spin_unlock(&entry->vm->status_lock);
+ amdgpu_bo_unref(&entry->bo);
+}
+
+/**
+ * amdgpu_vm_pt_free_list - free PD/PT levels
+ *
+ * @adev: amdgpu device structure
+ * @params: see amdgpu_vm_update_params definition
+ *
+ * Free the page directory objects saved in the flush list
+ */
+void amdgpu_vm_pt_free_list(struct amdgpu_device *adev,
+ struct amdgpu_vm_update_params *params)
+{
+ struct amdgpu_vm_bo_base *entry, *next;
+ bool unlocked = params->unlocked;
+
+ if (list_empty(&params->tlb_flush_waitlist))
+ return;
+
+ /*
+ * unlocked unmap clear page table leaves, warning to free the page entry.
+ */
+ WARN_ON(unlocked);
+
+ list_for_each_entry_safe(entry, next, &params->tlb_flush_waitlist, vm_status)
+ amdgpu_vm_pt_free(entry);
+}
+
+/**
+ * amdgpu_vm_pt_add_list - add PD/PT level to the flush list
+ *
+ * @params: parameters for the update
+ * @cursor: first PT entry to start DF search from, non NULL
+ *
+ * This list will be freed after TLB flush.
+ */
+static void amdgpu_vm_pt_add_list(struct amdgpu_vm_update_params *params,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ struct amdgpu_vm_pt_cursor seek;
+ struct amdgpu_vm_bo_base *entry;
+
+ spin_lock(&params->vm->status_lock);
+ for_each_amdgpu_vm_pt_dfs_safe(params->adev, params->vm, cursor, seek, entry) {
+ if (entry && entry->bo)
+ list_move(&entry->vm_status, &params->tlb_flush_waitlist);
+ }
+
+ /* enter start node now */
+ list_move(&cursor->entry->vm_status, &params->tlb_flush_waitlist);
+ spin_unlock(&params->vm->status_lock);
+}
+
+/**
+ * amdgpu_vm_pt_free_root - free root PD
+ * @adev: amdgpu device structure
+ * @vm: amdgpu vm structure
+ *
+ * Free the root page directory and everything below it.
+ */
+void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+ struct amdgpu_vm_pt_cursor cursor;
+ struct amdgpu_vm_bo_base *entry;
+
+ for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) {
+ if (entry)
+ amdgpu_vm_pt_free(entry);
+ }
+}
+
+/**
+ * amdgpu_vm_pde_update - update a single level in the hierarchy
+ *
+ * @params: parameters for the update
+ * @entry: entry to update
+ *
+ * Makes sure the requested entry in parent is up to date.
+ */
+int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params,
+ struct amdgpu_vm_bo_base *entry)
+{
+ struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry);
+ struct amdgpu_bo *bo, *pbo;
+ struct amdgpu_vm *vm = params->vm;
+ uint64_t pde, pt, flags;
+ unsigned int level;
+
+ if (WARN_ON(!parent))
+ return -EINVAL;
+
+ bo = parent->bo;
+ for (level = 0, pbo = bo->parent; pbo; ++level)
+ pbo = pbo->parent;
+
+ level += params->adev->vm_manager.root_level;
+ amdgpu_gmc_get_pde_for_bo(entry->bo, level, &pt, &flags);
+ pde = (entry - to_amdgpu_bo_vm(parent->bo)->entries) * 8;
+ return vm->update_funcs->update(params, to_amdgpu_bo_vm(bo), pde, pt,
+ 1, 0, flags);
+}
+
+/**
+ * amdgpu_vm_pte_update_noretry_flags - Update PTE no-retry flags
+ *
+ * @adev: amdgpu_device pointer
+ * @flags: pointer to PTE flags
+ *
+ * Update PTE no-retry flags when TF is enabled.
+ */
+static void amdgpu_vm_pte_update_noretry_flags(struct amdgpu_device *adev,
+ uint64_t *flags)
+{
+ /*
+ * Update no-retry flags with the corresponding TF
+ * no-retry combination.
+ */
+ if ((*flags & AMDGPU_VM_NORETRY_FLAGS) == AMDGPU_VM_NORETRY_FLAGS) {
+ *flags &= ~AMDGPU_VM_NORETRY_FLAGS;
+ *flags |= adev->gmc.noretry_flags;
+ }
+}
+
+/*
+ * amdgpu_vm_pte_update_flags - figure out flags for PTE updates
+ *
+ * Make sure to set the right flags for the PTEs at the desired level.
+ */
+static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params,
+ struct amdgpu_bo_vm *pt,
+ unsigned int level,
+ uint64_t pe, uint64_t addr,
+ unsigned int count, uint32_t incr,
+ uint64_t flags)
+{
+ struct amdgpu_device *adev = params->adev;
+
+ if (level != AMDGPU_VM_PTB) {
+ flags |= AMDGPU_PDE_PTE_FLAG(params->adev);
+ amdgpu_gmc_get_vm_pde(adev, level, &addr, &flags);
+
+ } else if (adev->asic_type >= CHIP_VEGA10 &&
+ !(flags & AMDGPU_PTE_VALID) &&
+ !(flags & AMDGPU_PTE_PRT_FLAG(params->adev))) {
+
+ /* Workaround for fault priority problem on GMC9 */
+ flags |= AMDGPU_PTE_EXECUTABLE;
+ }
+
+ /*
+ * Update no-retry flags to use the no-retry flag combination
+ * with TF enabled. The AMDGPU_VM_NORETRY_FLAGS flag combination
+ * does not work when TF is enabled. So, replace them with
+ * AMDGPU_VM_NORETRY_FLAGS_TF flag combination which works for
+ * all cases.
+ */
+ if (level == AMDGPU_VM_PTB)
+ amdgpu_vm_pte_update_noretry_flags(adev, &flags);
+
+ /* APUs mapping system memory may need different MTYPEs on different
+ * NUMA nodes. Only do this for contiguous ranges that can be assumed
+ * to be on the same NUMA node.
+ */
+ if ((flags & AMDGPU_PTE_SYSTEM) && (adev->flags & AMD_IS_APU) &&
+ adev->gmc.gmc_funcs->override_vm_pte_flags &&
+ num_possible_nodes() > 1 && !params->pages_addr && params->allow_override)
+ amdgpu_gmc_override_vm_pte_flags(adev, params->vm, addr, &flags);
+
+ params->vm->update_funcs->update(params, pt, pe, addr, count, incr,
+ flags);
+}
+
+/**
+ * amdgpu_vm_pte_fragment - get fragment for PTEs
+ *
+ * @params: see amdgpu_vm_update_params definition
+ * @start: first PTE to handle
+ * @end: last PTE to handle
+ * @flags: hw mapping flags
+ * @frag: resulting fragment size
+ * @frag_end: end of this fragment
+ *
+ * Returns the first possible fragment for the start and end address.
+ */
+static void amdgpu_vm_pte_fragment(struct amdgpu_vm_update_params *params,
+ uint64_t start, uint64_t end, uint64_t flags,
+ unsigned int *frag, uint64_t *frag_end)
+{
+ /**
+ * The MC L1 TLB supports variable sized pages, based on a fragment
+ * field in the PTE. When this field is set to a non-zero value, page
+ * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
+ * flags are considered valid for all PTEs within the fragment range
+ * and corresponding mappings are assumed to be physically contiguous.
+ *
+ * The L1 TLB can store a single PTE for the whole fragment,
+ * significantly increasing the space available for translation
+ * caching. This leads to large improvements in throughput when the
+ * TLB is under pressure.
+ *
+ * The L2 TLB distributes small and large fragments into two
+ * asymmetric partitions. The large fragment cache is significantly
+ * larger. Thus, we try to use large fragments wherever possible.
+ * Userspace can support this by aligning virtual base address and
+ * allocation size to the fragment size.
+ *
+ * Starting with Vega10 the fragment size only controls the L1. The L2
+ * is now directly feed with small/huge/giant pages from the walker.
+ */
+ unsigned int max_frag;
+
+ if (params->adev->asic_type < CHIP_VEGA10)
+ max_frag = params->adev->vm_manager.fragment_size;
+ else
+ max_frag = 31;
+
+ /* system pages are non continuously */
+ if (params->pages_addr) {
+ *frag = 0;
+ *frag_end = end;
+ return;
+ }
+
+ /* This intentionally wraps around if no bit is set */
+ *frag = min_t(unsigned int, ffs(start) - 1, fls64(end - start) - 1);
+ if (*frag >= max_frag) {
+ *frag = max_frag;
+ *frag_end = end & ~((1ULL << max_frag) - 1);
+ } else {
+ *frag_end = start + (1 << *frag);
+ }
+}
+
+/**
+ * amdgpu_vm_ptes_update - make sure that page tables are valid
+ *
+ * @params: see amdgpu_vm_update_params definition
+ * @start: start of GPU address range
+ * @end: end of GPU address range
+ * @dst: destination address to map to, the next dst inside the function
+ * @flags: mapping flags
+ *
+ * Update the page tables in the range @start - @end.
+ *
+ * Returns:
+ * 0 for success, -EINVAL for failure.
+ */
+int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
+ uint64_t start, uint64_t end,
+ uint64_t dst, uint64_t flags)
+{
+ struct amdgpu_device *adev = params->adev;
+ struct amdgpu_vm_pt_cursor cursor;
+ uint64_t frag_start = start, frag_end;
+ unsigned int frag;
+ int r;
+
+ /* figure out the initial fragment */
+ amdgpu_vm_pte_fragment(params, frag_start, end, flags, &frag,
+ &frag_end);
+
+ /* walk over the address space and update the PTs */
+ amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
+ while (cursor.pfn < end) {
+ unsigned int shift, parent_shift, mask;
+ uint64_t incr, entry_end, pe_start;
+ struct amdgpu_bo *pt;
+
+ if (!params->unlocked) {
+ /* make sure that the page tables covering the
+ * address range are actually allocated
+ */
+ r = amdgpu_vm_pt_alloc(params->adev, params->vm,
+ &cursor, params->immediate);
+ if (r)
+ return r;
+ }
+
+ shift = amdgpu_vm_pt_level_shift(adev, cursor.level);
+ parent_shift = amdgpu_vm_pt_level_shift(adev, cursor.level - 1);
+ if (params->unlocked) {
+ /* Unlocked updates are only allowed on the leaves */
+ if (amdgpu_vm_pt_descendant(adev, &cursor))
+ continue;
+ } else if (adev->asic_type < CHIP_VEGA10 &&
+ (flags & AMDGPU_PTE_VALID)) {
+ /* No huge page support before GMC v9 */
+ if (cursor.level != AMDGPU_VM_PTB) {
+ if (!amdgpu_vm_pt_descendant(adev, &cursor))
+ return -ENOENT;
+ continue;
+ }
+ } else if (frag < shift) {
+ /* We can't use this level when the fragment size is
+ * smaller than the address shift. Go to the next
+ * child entry and try again.
+ */
+ if (amdgpu_vm_pt_descendant(adev, &cursor))
+ continue;
+ } else if (frag >= parent_shift) {
+ /* If the fragment size is even larger than the parent
+ * shift we should go up one level and check it again.
+ */
+ if (!amdgpu_vm_pt_ancestor(&cursor))
+ return -EINVAL;
+ continue;
+ }
+
+ pt = cursor.entry->bo;
+ if (!pt) {
+ /* We need all PDs and PTs for mapping something, */
+ if (flags & AMDGPU_PTE_VALID)
+ return -ENOENT;
+
+ /* but unmapping something can happen at a higher
+ * level.
+ */
+ if (!amdgpu_vm_pt_ancestor(&cursor))
+ return -EINVAL;
+
+ pt = cursor.entry->bo;
+ shift = parent_shift;
+ frag_end = max(frag_end, ALIGN(frag_start + 1,
+ 1ULL << shift));
+ }
+
+ /* Looks good so far, calculate parameters for the update */
+ incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift;
+ mask = amdgpu_vm_pt_entries_mask(adev, cursor.level);
+ pe_start = ((cursor.pfn >> shift) & mask) * 8;
+
+ if (cursor.level < AMDGPU_VM_PTB && params->unlocked)
+ /*
+ * MMU notifier callback unlocked unmap huge page, leave is PDE entry,
+ * only clear one entry. Next entry search again for PDE or PTE leave.
+ */
+ entry_end = 1ULL << shift;
+ else
+ entry_end = ((uint64_t)mask + 1) << shift;
+ entry_end += cursor.pfn & ~(entry_end - 1);
+ entry_end = min(entry_end, end);
+
+ do {
+ struct amdgpu_vm *vm = params->vm;
+ uint64_t upd_end = min(entry_end, frag_end);
+ unsigned int nptes = (upd_end - frag_start) >> shift;
+ uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag);
+
+ /* This can happen when we set higher level PDs to
+ * silent to stop fault floods.
+ */
+ nptes = max(nptes, 1u);
+
+ trace_amdgpu_vm_update_ptes(params, frag_start, upd_end,
+ min(nptes, 32u), dst, incr,
+ upd_flags,
+ vm->task_info ? vm->task_info->tgid : 0,
+ vm->immediate.fence_context);
+ amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt),
+ cursor.level, pe_start, dst,
+ nptes, incr, upd_flags);
+
+ pe_start += nptes * 8;
+ dst += nptes * incr;
+
+ frag_start = upd_end;
+ if (frag_start >= frag_end) {
+ /* figure out the next fragment */
+ amdgpu_vm_pte_fragment(params, frag_start, end,
+ flags, &frag, &frag_end);
+ if (frag < shift)
+ break;
+ }
+ } while (frag_start < entry_end);
+
+ if (amdgpu_vm_pt_descendant(adev, &cursor)) {
+ /* Free all child entries.
+ * Update the tables with the flags and addresses and free up subsequent
+ * tables in the case of huge pages or freed up areas.
+ * This is the maximum you can free, because all other page tables are not
+ * completely covered by the range and so potentially still in use.
+ */
+ while (cursor.pfn < frag_start) {
+ /* Make sure previous mapping is freed */
+ if (cursor.entry->bo) {
+ params->needs_flush = true;
+ amdgpu_vm_pt_add_list(params, &cursor);
+ }
+ amdgpu_vm_pt_next(adev, &cursor);
+ }
+
+ } else if (frag >= shift) {
+ /* or just move on to the next on the same level. */
+ amdgpu_vm_pt_next(adev, &cursor);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_vm_pt_map_tables - have bo of root PD cpu accessible
+ * @adev: amdgpu device structure
+ * @vm: amdgpu vm structure
+ *
+ * make root page directory and everything below it cpu accessible.
+ */
+int amdgpu_vm_pt_map_tables(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+ struct amdgpu_vm_pt_cursor cursor;
+ struct amdgpu_vm_bo_base *entry;
+
+ for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) {
+
+ struct amdgpu_bo_vm *bo;
+ int r;
+
+ if (entry->bo) {
+ bo = to_amdgpu_bo_vm(entry->bo);
+ r = vm->update_funcs->map_table(bo);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index dbb551762805..36805dcfa159 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -35,47 +35,63 @@
*/
static int amdgpu_vm_sdma_map_table(struct amdgpu_bo_vm *table)
{
+ return amdgpu_ttm_alloc_gart(&table->bo.tbo);
+}
+
+/* Allocate a new job for @count PTE updates */
+static int amdgpu_vm_sdma_alloc_job(struct amdgpu_vm_update_params *p,
+ unsigned int count, u64 k_job_id)
+{
+ enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE
+ : AMDGPU_IB_POOL_DELAYED;
+ struct drm_sched_entity *entity = p->immediate ? &p->vm->immediate
+ : &p->vm->delayed;
+ unsigned int ndw;
int r;
- r = amdgpu_ttm_alloc_gart(&table->bo.tbo);
+ /* estimate how many dw we need */
+ ndw = AMDGPU_VM_SDMA_MIN_NUM_DW;
+ if (p->pages_addr)
+ ndw += count * 2;
+ ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW);
+
+ r = amdgpu_job_alloc_with_ib(p->adev, entity, AMDGPU_FENCE_OWNER_VM,
+ ndw * 4, pool, &p->job, k_job_id);
if (r)
return r;
- if (table->shadow)
- r = amdgpu_ttm_alloc_gart(&table->shadow->tbo);
-
- return r;
+ p->num_dw_left = ndw;
+ return 0;
}
/**
* amdgpu_vm_sdma_prepare - prepare SDMA command submission
*
* @p: see amdgpu_vm_update_params definition
- * @resv: reservation object with embedded fence
- * @sync_mode: synchronization mode
+ * @sync: amdgpu_sync object with fences to wait for
+ * @k_job_id: identifier of the job, for tracing purpose
*
* Returns:
* Negativ errno, 0 for success.
*/
static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
- struct dma_resv *resv,
- enum amdgpu_sync_mode sync_mode)
+ struct amdgpu_sync *sync, u64 k_job_id)
{
- enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE
- : AMDGPU_IB_POOL_DELAYED;
- unsigned int ndw = AMDGPU_VM_SDMA_MIN_NUM_DW;
int r;
- r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, pool, &p->job);
+ r = amdgpu_vm_sdma_alloc_job(p, 0, k_job_id);
if (r)
return r;
- p->num_dw_left = ndw;
-
- if (!resv)
+ if (!sync)
return 0;
- return amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode, p->vm);
+ r = amdgpu_sync_push_to_job(sync, p->job);
+ if (r) {
+ p->num_dw_left = 0;
+ amdgpu_job_free(p->job);
+ }
+ return r;
}
/**
@@ -91,38 +107,42 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
struct dma_fence **fence)
{
struct amdgpu_ib *ib = p->job->ibs;
- struct drm_sched_entity *entity;
struct amdgpu_ring *ring;
struct dma_fence *f;
- int r;
- entity = p->immediate ? &p->vm->immediate : &p->vm->delayed;
- ring = container_of(entity->rq->sched, struct amdgpu_ring, sched);
+ ring = container_of(p->vm->delayed.rq->sched, struct amdgpu_ring,
+ sched);
WARN_ON(ib->length_dw == 0);
amdgpu_ring_pad_ib(ring, ib);
+
+ if (p->needs_flush)
+ atomic64_inc(&p->vm->tlb_seq);
+
WARN_ON(ib->length_dw > p->num_dw_left);
- r = amdgpu_job_submit(p->job, entity, AMDGPU_FENCE_OWNER_VM, &f);
- if (r)
- goto error;
+ f = amdgpu_job_submit(p->job);
if (p->unlocked) {
struct dma_fence *tmp = dma_fence_get(f);
- swap(p->vm->last_unlocked, f);
+ swap(p->vm->last_unlocked, tmp);
dma_fence_put(tmp);
} else {
- amdgpu_bo_fence(p->vm->root.bo, f, true);
+ dma_resv_add_fence(p->vm->root.bo->tbo.base.resv, f,
+ DMA_RESV_USAGE_BOOKKEEP);
}
- if (fence && !p->immediate)
+ if (fence && !p->immediate) {
+ /*
+ * Most hw generations now have a separate queue for page table
+ * updates, but when the queue is shared with userspace we need
+ * the extra CPU round trip to correctly flush the TLB.
+ */
+ set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &f->flags);
swap(*fence, f);
+ }
dma_fence_put(f);
return 0;
-
-error:
- amdgpu_job_free(p->job);
- return r;
}
/**
@@ -144,7 +164,7 @@ static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params *p,
src += p->num_dw_left * 4;
- pe += amdgpu_gmc_sign_extend(amdgpu_bo_gpu_offset_no_check(bo));
+ pe += amdgpu_bo_gpu_offset_no_check(bo);
trace_amdgpu_vm_copy_ptes(pe, src, count, p->immediate);
amdgpu_vm_copy_pte(p->adev, ib, pe, src, count);
@@ -171,7 +191,7 @@ static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p,
{
struct amdgpu_ib *ib = p->job->ibs;
- pe += amdgpu_gmc_sign_extend(amdgpu_bo_gpu_offset_no_check(bo));
+ pe += amdgpu_bo_gpu_offset_no_check(bo);
trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->immediate);
if (count < 3) {
amdgpu_vm_write_pte(p->adev, ib, pe, addr | flags,
@@ -202,16 +222,24 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
uint64_t flags)
{
struct amdgpu_bo *bo = &vmbo->bo;
- enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE
- : AMDGPU_IB_POOL_DELAYED;
+ struct dma_resv_iter cursor;
unsigned int i, ndw, nptes;
+ struct dma_fence *fence;
uint64_t *pte;
int r;
/* Wait for PD/PT moves to be completed */
- r = amdgpu_sync_fence(&p->job->sync, bo->tbo.moving);
- if (r)
- return r;
+ dma_resv_iter_begin(&cursor, bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL);
+ dma_resv_for_each_fence_unlocked(&cursor, fence) {
+ dma_fence_get(fence);
+ r = drm_sched_job_add_dependency(&p->job->base, fence);
+ if (r) {
+ dma_fence_put(fence);
+ dma_resv_iter_end(&cursor);
+ return r;
+ }
+ }
+ dma_resv_iter_end(&cursor);
do {
ndw = p->num_dw_left;
@@ -222,34 +250,21 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
if (r)
return r;
- /* estimate how many dw we need */
- ndw = 32;
- if (p->pages_addr)
- ndw += count * 2;
- ndw = max(ndw, AMDGPU_VM_SDMA_MIN_NUM_DW);
- ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW);
-
- r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, pool,
- &p->job);
+ r = amdgpu_vm_sdma_alloc_job(p, count,
+ AMDGPU_KERNEL_JOB_ID_VM_UPDATE);
if (r)
return r;
-
- p->num_dw_left = ndw;
}
if (!p->pages_addr) {
/* set page commands needed */
- if (vmbo->shadow)
- amdgpu_vm_sdma_set_ptes(p, vmbo->shadow, pe, addr,
- count, incr, flags);
amdgpu_vm_sdma_set_ptes(p, bo, pe, addr, count,
incr, flags);
return 0;
}
/* copy commands needed */
- ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw *
- (vmbo->shadow ? 2 : 1);
+ ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw;
/* for padding */
ndw -= 7;
@@ -264,8 +279,6 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
pte[i] |= flags;
}
- if (vmbo->shadow)
- amdgpu_vm_sdma_copy_ptes(p, vmbo->shadow, pe, nptes);
amdgpu_vm_sdma_copy_ptes(p, bo, pe, nptes);
pe += nptes * 8;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
new file mode 100644
index 000000000000..5d26797356a3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/dma-fence.h>
+#include <linux/workqueue.h>
+
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_gmc.h"
+
+struct amdgpu_tlb_fence {
+ struct dma_fence base;
+ struct amdgpu_device *adev;
+ struct dma_fence *dependency;
+ struct work_struct work;
+ spinlock_t lock;
+ uint16_t pasid;
+
+};
+
+static const char *amdgpu_tlb_fence_get_driver_name(struct dma_fence *fence)
+{
+ return "amdgpu tlb fence";
+}
+
+static const char *amdgpu_tlb_fence_get_timeline_name(struct dma_fence *f)
+{
+ return "amdgpu tlb timeline";
+}
+
+static void amdgpu_tlb_fence_work(struct work_struct *work)
+{
+ struct amdgpu_tlb_fence *f = container_of(work, typeof(*f), work);
+ int r;
+
+ if (f->dependency) {
+ dma_fence_wait(f->dependency, false);
+ dma_fence_put(f->dependency);
+ f->dependency = NULL;
+ }
+
+ r = amdgpu_gmc_flush_gpu_tlb_pasid(f->adev, f->pasid, 2, true, 0);
+ if (r) {
+ dev_err(f->adev->dev, "TLB flush failed for PASID %d.\n",
+ f->pasid);
+ dma_fence_set_error(&f->base, r);
+ }
+
+ dma_fence_signal(&f->base);
+ dma_fence_put(&f->base);
+}
+
+static const struct dma_fence_ops amdgpu_tlb_fence_ops = {
+ .get_driver_name = amdgpu_tlb_fence_get_driver_name,
+ .get_timeline_name = amdgpu_tlb_fence_get_timeline_name
+};
+
+void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct dma_fence **fence)
+{
+ struct amdgpu_tlb_fence *f;
+
+ f = kmalloc(sizeof(*f), GFP_KERNEL);
+ if (!f) {
+ /*
+ * We can't fail since the PDEs and PTEs are already updated, so
+ * just block for the dependency and execute the TLB flush
+ */
+ if (*fence)
+ dma_fence_wait(*fence, false);
+
+ amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, 2, true, 0);
+ *fence = dma_fence_get_stub();
+ return;
+ }
+
+ f->adev = adev;
+ f->dependency = *fence;
+ f->pasid = vm->pasid;
+ INIT_WORK(&f->work, amdgpu_tlb_fence_work);
+ spin_lock_init(&f->lock);
+
+ dma_fence_init64(&f->base, &amdgpu_tlb_fence_ops, &f->lock,
+ vm->tlb_fence_context, atomic64_read(&vm->tlb_seq));
+
+ /* TODO: We probably need a separate wq here */
+ dma_fence_get(&f->base);
+ schedule_work(&f->work);
+
+ *fence = &f->base;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
new file mode 100644
index 000000000000..aa78c2ee9e21
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
@@ -0,0 +1,1018 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/firmware.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_vpe.h"
+#include "amdgpu_smu.h"
+#include "soc15_common.h"
+#include "vpe_v6_1.h"
+
+#define AMDGPU_CSA_VPE_SIZE 64
+/* VPE CSA resides in the 4th page of CSA */
+#define AMDGPU_CSA_VPE_OFFSET (4096 * 3)
+
+/* 1 second timeout */
+#define VPE_IDLE_TIMEOUT msecs_to_jiffies(1000)
+
+#define VPE_MAX_DPM_LEVEL 4
+#define FIXED1_8_BITS_PER_FRACTIONAL_PART 8
+#define GET_PRATIO_INTEGER_PART(x) ((x) >> FIXED1_8_BITS_PER_FRACTIONAL_PART)
+
+static void vpe_set_ring_funcs(struct amdgpu_device *adev);
+
+static inline uint16_t div16_u16_rem(uint16_t dividend, uint16_t divisor, uint16_t *remainder)
+{
+ *remainder = dividend % divisor;
+ return dividend / divisor;
+}
+
+static inline uint16_t complete_integer_division_u16(
+ uint16_t dividend,
+ uint16_t divisor,
+ uint16_t *remainder)
+{
+ return div16_u16_rem(dividend, divisor, (uint16_t *)remainder);
+}
+
+static uint16_t vpe_u1_8_from_fraction(uint16_t numerator, uint16_t denominator)
+{
+ u16 arg1_value = numerator;
+ u16 arg2_value = denominator;
+
+ uint16_t remainder;
+
+ /* determine integer part */
+ uint16_t res_value = complete_integer_division_u16(
+ arg1_value, arg2_value, &remainder);
+
+ if (res_value > 127 /* CHAR_MAX */)
+ return 0;
+
+ /* determine fractional part */
+ {
+ unsigned int i = FIXED1_8_BITS_PER_FRACTIONAL_PART;
+
+ do {
+ remainder <<= 1;
+
+ res_value <<= 1;
+
+ if (remainder >= arg2_value) {
+ res_value |= 1;
+ remainder -= arg2_value;
+ }
+ } while (--i != 0);
+ }
+
+ /* round up LSB */
+ {
+ uint16_t summand = (remainder << 1) >= arg2_value;
+
+ if ((res_value + summand) > 32767 /* SHRT_MAX */)
+ return 0;
+
+ res_value += summand;
+ }
+
+ return res_value;
+}
+
+static uint16_t vpe_internal_get_pratio(uint16_t from_frequency, uint16_t to_frequency)
+{
+ uint16_t pratio = vpe_u1_8_from_fraction(from_frequency, to_frequency);
+
+ if (GET_PRATIO_INTEGER_PART(pratio) > 1)
+ pratio = 0;
+
+ return pratio;
+}
+
+/*
+ * VPE has 4 DPM levels from level 0 (lowerest) to 3 (highest),
+ * VPE FW will dynamically decide which level should be used according to current loading.
+ *
+ * Get VPE and SOC clocks from PM, and select the appropriate four clock values,
+ * calculate the ratios of adjusting from one clock to another.
+ * The VPE FW can then request the appropriate frequency from the PMFW.
+ */
+int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe)
+{
+ struct amdgpu_device *adev = vpe->ring.adev;
+ uint32_t dpm_ctl;
+
+ if (adev->pm.dpm_enabled) {
+ struct dpm_clocks clock_table = { 0 };
+ struct dpm_clock *VPEClks;
+ struct dpm_clock *SOCClks;
+ uint32_t idx;
+ uint32_t vpeclk_enalbled_num = 0;
+ uint32_t pratio_vmax_vnorm = 0, pratio_vnorm_vmid = 0, pratio_vmid_vmin = 0;
+ uint16_t pratio_vmin_freq = 0, pratio_vmid_freq = 0, pratio_vnorm_freq = 0, pratio_vmax_freq = 0;
+
+ dpm_ctl = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable));
+ dpm_ctl |= 1; /* DPM enablement */
+ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl);
+
+ /* Get VPECLK and SOCCLK */
+ if (amdgpu_dpm_get_dpm_clock_table(adev, &clock_table)) {
+ dev_dbg(adev->dev, "%s: get clock failed!\n", __func__);
+ goto disable_dpm;
+ }
+
+ SOCClks = clock_table.SocClocks;
+ VPEClks = clock_table.VPEClocks;
+
+ /* Comfirm enabled vpe clk num
+ * Enabled VPE clocks are ordered from low to high in VPEClks
+ * The highest valid clock index+1 is the number of VPEClks
+ */
+ for (idx = PP_SMU_NUM_VPECLK_DPM_LEVELS; idx && !vpeclk_enalbled_num; idx--)
+ if (VPEClks[idx-1].Freq)
+ vpeclk_enalbled_num = idx;
+
+ /* vpe dpm only cares 4 levels. */
+ for (idx = 0; idx < VPE_MAX_DPM_LEVEL; idx++) {
+ uint32_t soc_dpm_level;
+ uint32_t min_freq;
+
+ if (idx == 0)
+ soc_dpm_level = 0;
+ else
+ soc_dpm_level = (idx * 2) + 1;
+
+ /* clamp the max level */
+ if (soc_dpm_level > vpeclk_enalbled_num - 1)
+ soc_dpm_level = vpeclk_enalbled_num - 1;
+
+ min_freq = (SOCClks[soc_dpm_level].Freq < VPEClks[soc_dpm_level].Freq) ?
+ SOCClks[soc_dpm_level].Freq : VPEClks[soc_dpm_level].Freq;
+
+ switch (idx) {
+ case 0:
+ pratio_vmin_freq = min_freq;
+ break;
+ case 1:
+ pratio_vmid_freq = min_freq;
+ break;
+ case 2:
+ pratio_vnorm_freq = min_freq;
+ break;
+ case 3:
+ pratio_vmax_freq = min_freq;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (pratio_vmin_freq && pratio_vmid_freq && pratio_vnorm_freq && pratio_vmax_freq) {
+ uint32_t pratio_ctl;
+
+ pratio_vmax_vnorm = (uint32_t)vpe_internal_get_pratio(pratio_vmax_freq, pratio_vnorm_freq);
+ pratio_vnorm_vmid = (uint32_t)vpe_internal_get_pratio(pratio_vnorm_freq, pratio_vmid_freq);
+ pratio_vmid_vmin = (uint32_t)vpe_internal_get_pratio(pratio_vmid_freq, pratio_vmin_freq);
+
+ pratio_ctl = pratio_vmax_vnorm | (pratio_vnorm_vmid << 9) | (pratio_vmid_vmin << 18);
+ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_pratio), pratio_ctl); /* PRatio */
+ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_request_interval), 24000); /* 1ms, unit=1/24MHz */
+ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_decision_threshold), 1200000); /* 50ms */
+ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_busy_clamp_threshold), 1200000);/* 50ms */
+ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_idle_clamp_threshold), 1200000);/* 50ms */
+ dev_dbg(adev->dev, "%s: configure vpe dpm pratio done!\n", __func__);
+ } else {
+ dev_dbg(adev->dev, "%s: invalid pratio parameters!\n", __func__);
+ goto disable_dpm;
+ }
+ }
+ return 0;
+
+disable_dpm:
+ dpm_ctl = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable));
+ dpm_ctl &= 0xfffffffe; /* Disable DPM */
+ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl);
+ dev_dbg(adev->dev, "%s: disable vpe dpm\n", __func__);
+ return -EINVAL;
+}
+
+int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev)
+{
+ struct amdgpu_firmware_info ucode = {
+ .ucode_id = AMDGPU_UCODE_ID_VPE,
+ .mc_addr = adev->vpe.cmdbuf_gpu_addr,
+ .ucode_size = 8,
+ };
+
+ return psp_execute_ip_fw_load(&adev->psp, &ucode);
+}
+
+int amdgpu_vpe_init_microcode(struct amdgpu_vpe *vpe)
+{
+ struct amdgpu_device *adev = vpe->ring.adev;
+ const struct vpe_firmware_header_v1_0 *vpe_hdr;
+ char fw_prefix[32];
+ int ret;
+
+ amdgpu_ucode_ip_version_decode(adev, VPE_HWIP, fw_prefix, sizeof(fw_prefix));
+ ret = amdgpu_ucode_request(adev, &adev->vpe.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s.bin", fw_prefix);
+ if (ret)
+ goto out;
+
+ vpe_hdr = (const struct vpe_firmware_header_v1_0 *)adev->vpe.fw->data;
+ adev->vpe.fw_version = le32_to_cpu(vpe_hdr->header.ucode_version);
+ adev->vpe.feature_version = le32_to_cpu(vpe_hdr->ucode_feature_version);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ struct amdgpu_firmware_info *info;
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_VPE_CTX];
+ info->ucode_id = AMDGPU_UCODE_ID_VPE_CTX;
+ info->fw = adev->vpe.fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(vpe_hdr->ctx_ucode_size_bytes), PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_VPE_CTL];
+ info->ucode_id = AMDGPU_UCODE_ID_VPE_CTL;
+ info->fw = adev->vpe.fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(vpe_hdr->ctl_ucode_size_bytes), PAGE_SIZE);
+ }
+
+ return 0;
+out:
+ dev_err(adev->dev, "fail to initialize vpe microcode\n");
+ release_firmware(adev->vpe.fw);
+ adev->vpe.fw = NULL;
+ return ret;
+}
+
+int amdgpu_vpe_ring_init(struct amdgpu_vpe *vpe)
+{
+ struct amdgpu_device *adev = container_of(vpe, struct amdgpu_device, vpe);
+ struct amdgpu_ring *ring = &vpe->ring;
+ int ret;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+ ring->doorbell_index = (adev->doorbell_index.vpe_ring << 1);
+ snprintf(ring->name, 4, "vpe");
+
+ ret = amdgpu_ring_init(adev, ring, 1024, &vpe->trap_irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+int amdgpu_vpe_ring_fini(struct amdgpu_vpe *vpe)
+{
+ amdgpu_ring_fini(&vpe->ring);
+
+ return 0;
+}
+
+static int vpe_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+
+ switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) {
+ case IP_VERSION(6, 1, 0):
+ case IP_VERSION(6, 1, 3):
+ vpe_v6_1_set_funcs(vpe);
+ break;
+ case IP_VERSION(6, 1, 1):
+ vpe_v6_1_set_funcs(vpe);
+ vpe->collaborate_mode = true;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ vpe_set_ring_funcs(adev);
+ vpe_set_regs(vpe);
+
+ dev_info(adev->dev, "VPE: collaborate mode %s", vpe->collaborate_mode ? "true" : "false");
+
+ return 0;
+}
+
+static bool vpe_need_dpm0_at_power_down(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) {
+ case IP_VERSION(6, 1, 1):
+ return adev->pm.fw_version < 0x0a640500;
+ default:
+ return false;
+ }
+}
+
+static int vpe_get_dpm_level(struct amdgpu_device *adev)
+{
+ struct amdgpu_vpe *vpe = &adev->vpe;
+
+ if (!adev->pm.dpm_enabled)
+ return 0;
+
+ return RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_request_lv));
+}
+
+static void vpe_idle_work_handler(struct work_struct *work)
+{
+ struct amdgpu_device *adev =
+ container_of(work, struct amdgpu_device, vpe.idle_work.work);
+ unsigned int fences = 0;
+
+ fences += amdgpu_fence_count_emitted(&adev->vpe.ring);
+ if (fences)
+ goto reschedule;
+
+ if (vpe_need_dpm0_at_power_down(adev) && vpe_get_dpm_level(adev) != 0)
+ goto reschedule;
+
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE);
+ return;
+
+reschedule:
+ schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT);
+}
+
+static int vpe_common_init(struct amdgpu_vpe *vpe)
+{
+ struct amdgpu_device *adev = container_of(vpe, struct amdgpu_device, vpe);
+ int r;
+
+ r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->vpe.cmdbuf_obj,
+ &adev->vpe.cmdbuf_gpu_addr,
+ (void **)&adev->vpe.cmdbuf_cpu_addr);
+ if (r) {
+ dev_err(adev->dev, "VPE: failed to allocate cmdbuf bo %d\n", r);
+ return r;
+ }
+
+ vpe->context_started = false;
+ INIT_DELAYED_WORK(&adev->vpe.idle_work, vpe_idle_work_handler);
+
+ return 0;
+}
+
+static int vpe_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+ int ret;
+
+ ret = vpe_common_init(vpe);
+ if (ret)
+ goto out;
+
+ ret = vpe_irq_init(vpe);
+ if (ret)
+ goto out;
+
+ ret = vpe_ring_init(vpe);
+ if (ret)
+ goto out;
+
+ ret = vpe_init_microcode(vpe);
+ if (ret)
+ goto out;
+
+ adev->vpe.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->vpe.ring);
+ if (!amdgpu_sriov_vf(adev))
+ adev->vpe.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ ret = amdgpu_vpe_sysfs_reset_mask_init(adev);
+ if (ret)
+ goto out;
+out:
+ return ret;
+}
+
+static int vpe_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+
+ release_firmware(vpe->fw);
+ vpe->fw = NULL;
+
+ amdgpu_vpe_sysfs_reset_mask_fini(adev);
+ vpe_ring_fini(vpe);
+
+ amdgpu_bo_free_kernel(&adev->vpe.cmdbuf_obj,
+ &adev->vpe.cmdbuf_gpu_addr,
+ (void **)&adev->vpe.cmdbuf_cpu_addr);
+
+ return 0;
+}
+
+static int vpe_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+ int ret;
+
+ /* Power on VPE */
+ ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE,
+ AMD_PG_STATE_UNGATE);
+ if (ret)
+ return ret;
+
+ ret = vpe_load_microcode(vpe);
+ if (ret)
+ return ret;
+
+ ret = vpe_ring_start(vpe);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int vpe_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+
+ cancel_delayed_work_sync(&adev->vpe.idle_work);
+
+ vpe_ring_stop(vpe);
+
+ /* Power off VPE */
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE);
+
+ return 0;
+}
+
+static int vpe_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return vpe_hw_fini(ip_block);
+}
+
+static int vpe_resume(struct amdgpu_ip_block *ip_block)
+{
+ return vpe_hw_init(ip_block);
+}
+
+static void vpe_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ for (i = 0; i < count; i++)
+ if (i == 0)
+ amdgpu_ring_write(ring, ring->funcs->nop |
+ VPE_CMD_NOP_HEADER_COUNT(count - 1));
+ else
+ amdgpu_ring_write(ring, ring->funcs->nop);
+}
+
+static uint64_t vpe_get_csa_mc_addr(struct amdgpu_ring *ring, uint32_t vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t index = 0;
+ uint64_t csa_mc_addr;
+
+ if (amdgpu_sriov_vf(adev) || vmid == 0 || !adev->gfx.mcbp)
+ return 0;
+
+ csa_mc_addr = amdgpu_csa_vaddr(adev) + AMDGPU_CSA_VPE_OFFSET +
+ index * AMDGPU_CSA_VPE_SIZE;
+
+ return csa_mc_addr;
+}
+
+static void vpe_ring_emit_pred_exec(struct amdgpu_ring *ring,
+ uint32_t device_select,
+ uint32_t exec_count)
+{
+ if (!ring->adev->vpe.collaborate_mode)
+ return;
+
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_PRED_EXE, 0) |
+ (device_select << 16));
+ amdgpu_ring_write(ring, exec_count & 0x1fff);
+}
+
+static void vpe_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ uint32_t vmid = AMDGPU_JOB_GET_VMID(job);
+ uint64_t csa_mc_addr = vpe_get_csa_mc_addr(ring, vmid);
+
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_INDIRECT, 0) |
+ VPE_CMD_INDIRECT_HEADER_VMID(vmid & 0xf));
+
+ /* base must be 32 byte aligned */
+ amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0);
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+ amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
+ amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
+}
+
+static void vpe_ring_emit_fence(struct amdgpu_ring *ring, uint64_t addr,
+ uint64_t seq, unsigned int flags)
+{
+ int i = 0;
+
+ do {
+ /* write the fence */
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0));
+ /* zero in first two bits */
+ WARN_ON_ONCE(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, i == 0 ? lower_32_bits(seq) : upper_32_bits(seq));
+ addr += 4;
+ } while ((flags & AMDGPU_FENCE_FLAG_64BIT) && (i++ < 1));
+
+ if (flags & AMDGPU_FENCE_FLAG_INT) {
+ /* generate an interrupt */
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_TRAP, 0));
+ amdgpu_ring_write(ring, 0);
+ }
+
+}
+
+static void vpe_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ vpe_ring_emit_pred_exec(ring, 0, 6);
+
+ /* wait for idle */
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM,
+ VPE_POLL_REGMEM_SUBOP_REGMEM) |
+ VPE_CMD_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+ VPE_CMD_POLL_REGMEM_HEADER_MEM(1));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, seq); /* reference */
+ amdgpu_ring_write(ring, 0xffffffff); /* mask */
+ amdgpu_ring_write(ring, VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ VPE_CMD_POLL_REGMEM_DW5_INTERVAL(4));
+}
+
+static void vpe_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
+{
+ vpe_ring_emit_pred_exec(ring, 0, 3);
+
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_REG_WRITE, 0));
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, val);
+}
+
+static void vpe_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ vpe_ring_emit_pred_exec(ring, 0, 6);
+
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM,
+ VPE_POLL_REGMEM_SUBOP_REGMEM) |
+ VPE_CMD_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+ VPE_CMD_POLL_REGMEM_HEADER_MEM(0));
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val); /* reference */
+ amdgpu_ring_write(ring, mask); /* mask */
+ amdgpu_ring_write(ring, VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ VPE_CMD_POLL_REGMEM_DW5_INTERVAL(10));
+}
+
+static void vpe_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid,
+ uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+}
+
+static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
+{
+ unsigned int ret;
+
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COND_EXE, 0));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, 1);
+ ret = ring->wptr & ring->buf_mask;
+ amdgpu_ring_write(ring, 0);
+
+ return ret;
+}
+
+static int vpe_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+ uint32_t preempt_reg = vpe->regs.queue0_preempt;
+ int i, r = 0;
+
+ /* assert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+ /* emit the trailing fence */
+ ring->trail_seq += 1;
+ amdgpu_ring_alloc(ring, 10);
+ vpe_ring_emit_fence(ring, ring->trail_fence_gpu_addr, ring->trail_seq, 0);
+ amdgpu_ring_commit(ring);
+
+ /* assert IB preemption */
+ WREG32(vpe_get_reg_offset(vpe, ring->me, preempt_reg), 1);
+
+ /* poll the trailing fence */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->trail_seq ==
+ le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ r = -EINVAL;
+ dev_err(adev->dev, "ring %d failed to be preempted\n", ring->idx);
+ }
+
+ /* deassert IB preemption */
+ WREG32(vpe_get_reg_offset(vpe, ring->me, preempt_reg), 0);
+
+ /* deassert the preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, true);
+
+ return r;
+}
+
+static int vpe_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int vpe_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+
+ if (!adev->pm.dpm_enabled)
+ dev_err(adev->dev, "Without PM, cannot support powergating\n");
+
+ dev_dbg(adev->dev, "%s: %s!\n", __func__, (state == AMD_PG_STATE_GATE) ? "GATE":"UNGATE");
+
+ if (state == AMD_PG_STATE_GATE) {
+ amdgpu_dpm_enable_vpe(adev, false);
+ vpe->context_started = false;
+ } else {
+ amdgpu_dpm_enable_vpe(adev, true);
+ }
+
+ return 0;
+}
+
+static uint64_t vpe_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+ uint64_t rptr;
+
+ if (ring->use_doorbell) {
+ rptr = atomic64_read((atomic64_t *)ring->rptr_cpu_addr);
+ dev_dbg(adev->dev, "rptr/doorbell before shift == 0x%016llx\n", rptr);
+ } else {
+ rptr = RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_rptr_hi));
+ rptr = rptr << 32;
+ rptr |= RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_rptr_lo));
+ dev_dbg(adev->dev, "rptr before shift [%i] == 0x%016llx\n", ring->me, rptr);
+ }
+
+ return (rptr >> 2);
+}
+
+static uint64_t vpe_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+ uint64_t wptr;
+
+ if (ring->use_doorbell) {
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ dev_dbg(adev->dev, "wptr/doorbell before shift == 0x%016llx\n", wptr);
+ } else {
+ wptr = RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_hi));
+ wptr = wptr << 32;
+ wptr |= RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_lo));
+ dev_dbg(adev->dev, "wptr before shift [%i] == 0x%016llx\n", ring->me, wptr);
+ }
+
+ return (wptr >> 2);
+}
+
+static void vpe_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+
+ if (ring->use_doorbell) {
+ dev_dbg(adev->dev, "Using doorbell, \
+ wptr_offs == 0x%08x, \
+ lower_32_bits(ring->wptr) << 2 == 0x%08x, \
+ upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ if (vpe->collaborate_mode)
+ WDOORBELL64(ring->doorbell_index + 4, ring->wptr << 2);
+ } else {
+ int i;
+
+ for (i = 0; i < vpe->num_instances; i++) {
+ dev_dbg(adev->dev, "Not using doorbell, \
+ regVPEC_QUEUE0_RB_WPTR == 0x%08x, \
+ regVPEC_QUEUE0_RB_WPTR_HI == 0x%08x\n",
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+ WREG32(vpe_get_reg_offset(vpe, i, vpe->regs.queue0_rb_wptr_lo),
+ lower_32_bits(ring->wptr << 2));
+ WREG32(vpe_get_reg_offset(vpe, i, vpe->regs.queue0_rb_wptr_hi),
+ upper_32_bits(ring->wptr << 2));
+ }
+ }
+}
+
+static int vpe_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ const uint32_t test_pattern = 0xdeadbeef;
+ uint32_t index, i;
+ uint64_t wb_addr;
+ int ret;
+
+ ret = amdgpu_device_wb_get(adev, &index);
+ if (ret) {
+ dev_err(adev->dev, "(%d) failed to allocate wb slot\n", ret);
+ return ret;
+ }
+
+ adev->wb.wb[index] = 0;
+ wb_addr = adev->wb.gpu_addr + (index * 4);
+
+ ret = amdgpu_ring_alloc(ring, 4);
+ if (ret) {
+ dev_err(adev->dev, "amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, ret);
+ goto out;
+ }
+
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0));
+ amdgpu_ring_write(ring, lower_32_bits(wb_addr));
+ amdgpu_ring_write(ring, upper_32_bits(wb_addr));
+ amdgpu_ring_write(ring, test_pattern);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (le32_to_cpu(adev->wb.wb[index]) == test_pattern)
+ goto out;
+ udelay(1);
+ }
+
+ ret = -ETIMEDOUT;
+out:
+ amdgpu_device_wb_free(adev, index);
+
+ return ret;
+}
+
+static int vpe_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ const uint32_t test_pattern = 0xdeadbeef;
+ struct amdgpu_ib ib = {};
+ struct dma_fence *f = NULL;
+ uint32_t index;
+ uint64_t wb_addr;
+ int ret;
+
+ ret = amdgpu_device_wb_get(adev, &index);
+ if (ret) {
+ dev_err(adev->dev, "(%d) failed to allocate wb slot\n", ret);
+ return ret;
+ }
+
+ adev->wb.wb[index] = 0;
+ wb_addr = adev->wb.gpu_addr + (index * 4);
+
+ ret = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (ret)
+ goto err0;
+
+ ib.ptr[0] = VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0);
+ ib.ptr[1] = lower_32_bits(wb_addr);
+ ib.ptr[2] = upper_32_bits(wb_addr);
+ ib.ptr[3] = test_pattern;
+ ib.ptr[4] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0);
+ ib.ptr[5] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0);
+ ib.ptr[6] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0);
+ ib.ptr[7] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0);
+ ib.length_dw = 8;
+
+ ret = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (ret)
+ goto err1;
+
+ ret = dma_fence_wait_timeout(f, false, timeout);
+ if (ret <= 0) {
+ ret = ret ? : -ETIMEDOUT;
+ goto err1;
+ }
+
+ ret = (le32_to_cpu(adev->wb.wb[index]) == test_pattern) ? 0 : -EINVAL;
+
+err1:
+ amdgpu_ib_free(&ib, NULL);
+ dma_fence_put(f);
+err0:
+ amdgpu_device_wb_free(adev, index);
+
+ return ret;
+}
+
+static void vpe_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+
+ cancel_delayed_work_sync(&adev->vpe.idle_work);
+
+ /* Power on VPE and notify VPE of new context */
+ if (!vpe->context_started) {
+ uint32_t context_notify;
+
+ /* Power on VPE */
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_UNGATE);
+
+ /* Indicates that a job from a new context has been submitted. */
+ context_notify = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.context_indicator));
+ if ((context_notify & 0x1) == 0)
+ context_notify |= 0x1;
+ else
+ context_notify &= ~(0x1);
+ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.context_indicator), context_notify);
+ vpe->context_started = true;
+ }
+}
+
+static void vpe_ring_end_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT);
+}
+
+static int vpe_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ r = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE,
+ AMD_PG_STATE_GATE);
+ if (r)
+ return r;
+ r = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE,
+ AMD_PG_STATE_UNGATE);
+ if (r)
+ return r;
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static ssize_t amdgpu_get_vpe_reset_mask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (!adev)
+ return -ENODEV;
+
+ return amdgpu_show_reset_mask(buf, adev->vpe.supported_reset);
+}
+
+static DEVICE_ATTR(vpe_reset_mask, 0444,
+ amdgpu_get_vpe_reset_mask, NULL);
+
+int amdgpu_vpe_sysfs_reset_mask_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (adev->vpe.num_instances) {
+ r = device_create_file(adev->dev, &dev_attr_vpe_reset_mask);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+void amdgpu_vpe_sysfs_reset_mask_fini(struct amdgpu_device *adev)
+{
+ if (adev->dev->kobj.sd) {
+ if (adev->vpe.num_instances)
+ device_remove_file(adev->dev, &dev_attr_vpe_reset_mask);
+ }
+}
+
+static const struct amdgpu_ring_funcs vpe_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_VPE,
+ .align_mask = 0xf,
+ .nop = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0),
+ .support_64bit_ptrs = true,
+ .get_rptr = vpe_ring_get_rptr,
+ .get_wptr = vpe_ring_get_wptr,
+ .set_wptr = vpe_ring_set_wptr,
+ .emit_frame_size =
+ 5 + /* vpe_ring_init_cond_exec */
+ 6 + /* vpe_ring_emit_pipeline_sync */
+ 10 + 10 + 10 + /* vpe_ring_emit_fence */
+ /* vpe_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6,
+ .emit_ib_size = 7 + 6,
+ .emit_ib = vpe_ring_emit_ib,
+ .emit_pipeline_sync = vpe_ring_emit_pipeline_sync,
+ .emit_fence = vpe_ring_emit_fence,
+ .emit_vm_flush = vpe_ring_emit_vm_flush,
+ .emit_wreg = vpe_ring_emit_wreg,
+ .emit_reg_wait = vpe_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .insert_nop = vpe_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .test_ring = vpe_ring_test_ring,
+ .test_ib = vpe_ring_test_ib,
+ .init_cond_exec = vpe_ring_init_cond_exec,
+ .preempt_ib = vpe_ring_preempt_ib,
+ .begin_use = vpe_ring_begin_use,
+ .end_use = vpe_ring_end_use,
+ .reset = vpe_ring_reset,
+};
+
+static void vpe_set_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->vpe.ring.funcs = &vpe_ring_funcs;
+}
+
+const struct amd_ip_funcs vpe_ip_funcs = {
+ .name = "vpe_v6_1",
+ .early_init = vpe_early_init,
+ .sw_init = vpe_sw_init,
+ .sw_fini = vpe_sw_fini,
+ .hw_init = vpe_hw_init,
+ .hw_fini = vpe_hw_fini,
+ .suspend = vpe_suspend,
+ .resume = vpe_resume,
+ .set_clockgating_state = vpe_set_clockgating_state,
+ .set_powergating_state = vpe_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version vpe_v6_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_VPE,
+ .major = 6,
+ .minor = 1,
+ .rev = 0,
+ .funcs = &vpe_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h
new file mode 100644
index 000000000000..695da740a97e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __AMDGPU_VPE_H__
+#define __AMDGPU_VPE_H__
+
+#include "amdgpu_ring.h"
+#include "amdgpu_irq.h"
+#include "vpe_6_1_fw_if.h"
+
+#define AMDGPU_MAX_VPE_INSTANCES 2
+
+struct amdgpu_vpe;
+
+struct vpe_funcs {
+ uint32_t (*get_reg_offset)(struct amdgpu_vpe *vpe, uint32_t inst, uint32_t offset);
+ int (*set_regs)(struct amdgpu_vpe *vpe);
+ int (*irq_init)(struct amdgpu_vpe *vpe);
+ int (*init_microcode)(struct amdgpu_vpe *vpe);
+ int (*load_microcode)(struct amdgpu_vpe *vpe);
+ int (*ring_init)(struct amdgpu_vpe *vpe);
+ int (*ring_start)(struct amdgpu_vpe *vpe);
+ int (*ring_stop)(struct amdgpu_vpe *vpe);
+ int (*ring_fini)(struct amdgpu_vpe *vpe);
+};
+
+struct vpe_regs {
+ uint32_t queue0_rb_rptr_lo;
+ uint32_t queue0_rb_rptr_hi;
+ uint32_t queue0_rb_wptr_lo;
+ uint32_t queue0_rb_wptr_hi;
+ uint32_t queue0_preempt;
+
+ uint32_t dpm_enable;
+ uint32_t dpm_pratio;
+ uint32_t dpm_request_interval;
+ uint32_t dpm_decision_threshold;
+ uint32_t dpm_busy_clamp_threshold;
+ uint32_t dpm_idle_clamp_threshold;
+ uint32_t dpm_request_lv;
+ uint32_t context_indicator;
+};
+
+struct amdgpu_vpe {
+ struct amdgpu_ring ring;
+ struct amdgpu_irq_src trap_irq;
+
+ const struct vpe_funcs *funcs;
+ struct vpe_regs regs;
+
+ const struct firmware *fw;
+ uint32_t fw_version;
+ uint32_t feature_version;
+
+ struct amdgpu_bo *cmdbuf_obj;
+ uint64_t cmdbuf_gpu_addr;
+ uint32_t *cmdbuf_cpu_addr;
+ struct delayed_work idle_work;
+ bool context_started;
+
+ uint32_t num_instances;
+ bool collaborate_mode;
+ uint32_t supported_reset;
+};
+
+int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev);
+int amdgpu_vpe_init_microcode(struct amdgpu_vpe *vpe);
+int amdgpu_vpe_ring_init(struct amdgpu_vpe *vpe);
+int amdgpu_vpe_ring_fini(struct amdgpu_vpe *vpe);
+int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe);
+void amdgpu_vpe_sysfs_reset_mask_fini(struct amdgpu_device *adev);
+int amdgpu_vpe_sysfs_reset_mask_init(struct amdgpu_device *adev);
+
+#define vpe_ring_init(vpe) ((vpe)->funcs->ring_init ? (vpe)->funcs->ring_init((vpe)) : 0)
+#define vpe_ring_start(vpe) ((vpe)->funcs->ring_start ? (vpe)->funcs->ring_start((vpe)) : 0)
+#define vpe_ring_stop(vpe) ((vpe)->funcs->ring_stop ? (vpe)->funcs->ring_stop((vpe)) : 0)
+#define vpe_ring_fini(vpe) ((vpe)->funcs->ring_fini ? (vpe)->funcs->ring_fini((vpe)) : 0)
+
+#define vpe_get_reg_offset(vpe, inst, offset) \
+ ((vpe)->funcs->get_reg_offset ? (vpe)->funcs->get_reg_offset((vpe), (inst), (offset)) : 0)
+#define vpe_set_regs(vpe) \
+ ((vpe)->funcs->set_regs ? (vpe)->funcs->set_regs((vpe)) : 0)
+#define vpe_irq_init(vpe) \
+ ((vpe)->funcs->irq_init ? (vpe)->funcs->irq_init((vpe)) : 0)
+#define vpe_init_microcode(vpe) \
+ ((vpe)->funcs->init_microcode ? (vpe)->funcs->init_microcode((vpe)) : 0)
+#define vpe_load_microcode(vpe) \
+ ((vpe)->funcs->load_microcode ? (vpe)->funcs->load_microcode((vpe)) : 0)
+
+extern const struct amdgpu_ip_block_version vpe_v6_1_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 7b2b0980ec41..9d934c07fa6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -24,16 +24,20 @@
#include <linux/dma-mapping.h>
#include <drm/ttm/ttm_range_manager.h>
+#include <drm/drm_drv.h>
#include "amdgpu.h"
#include "amdgpu_vm.h"
#include "amdgpu_res_cursor.h"
-#include "amdgpu_atomfirmware.h"
#include "atom.h"
+#define AMDGPU_MAX_SG_SEGMENT_SIZE (2UL << 30)
+
struct amdgpu_vram_reservation {
- struct list_head node;
- struct drm_mm_node mm_node;
+ u64 start;
+ u64 size;
+ struct list_head allocated;
+ struct list_head blocks;
};
static inline struct amdgpu_vram_mgr *
@@ -48,6 +52,44 @@ to_amdgpu_device(struct amdgpu_vram_mgr *mgr)
return container_of(mgr, struct amdgpu_device, mman.vram_mgr);
}
+static inline struct drm_buddy_block *
+amdgpu_vram_mgr_first_block(struct list_head *list)
+{
+ return list_first_entry_or_null(list, struct drm_buddy_block, link);
+}
+
+static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head)
+{
+ struct drm_buddy_block *block;
+ u64 start, size;
+
+ block = amdgpu_vram_mgr_first_block(head);
+ if (!block)
+ return false;
+
+ while (head != block->link.next) {
+ start = amdgpu_vram_mgr_block_start(block);
+ size = amdgpu_vram_mgr_block_size(block);
+
+ block = list_entry(block->link.next, struct drm_buddy_block, link);
+ if (start + size != amdgpu_vram_mgr_block_start(block))
+ return false;
+ }
+
+ return true;
+}
+
+static inline u64 amdgpu_vram_mgr_blocks_size(struct list_head *head)
+{
+ struct drm_buddy_block *block;
+ u64 size = 0;
+
+ list_for_each_entry(block, head, link)
+ size += amdgpu_vram_mgr_block_size(block);
+
+ return size;
+}
+
/**
* DOC: mem_info_vram_total
*
@@ -96,10 +138,9 @@ static ssize_t amdgpu_mem_info_vram_used_show(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
- struct ttm_resource_manager *man;
+ struct ttm_resource_manager *man = &adev->mman.vram_mgr.manager;
- man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
- return sysfs_emit(buf, "%llu\n", amdgpu_vram_mgr_usage(man));
+ return sysfs_emit(buf, "%llu\n", ttm_resource_manager_usage(man));
}
/**
@@ -116,10 +157,9 @@ static ssize_t amdgpu_mem_info_vis_vram_used_show(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
- struct ttm_resource_manager *man;
- man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
- return sysfs_emit(buf, "%llu\n", amdgpu_vram_mgr_vis_usage(man));
+ return sysfs_emit(buf, "%llu\n",
+ amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr));
}
/**
@@ -183,23 +223,41 @@ static struct attribute *amdgpu_vram_mgr_attributes[] = {
NULL
};
+static umode_t amdgpu_vram_attrs_is_visible(struct kobject *kobj,
+ struct attribute *attr, int i)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (attr == &dev_attr_mem_info_vram_vendor.attr &&
+ !adev->gmc.vram_vendor)
+ return 0;
+
+ if (!ttm_resource_manager_used(&adev->mman.vram_mgr.manager))
+ return 0;
+
+ return attr->mode;
+}
+
const struct attribute_group amdgpu_vram_mgr_attr_group = {
- .attrs = amdgpu_vram_mgr_attributes
+ .attrs = amdgpu_vram_mgr_attributes,
+ .is_visible = amdgpu_vram_attrs_is_visible
};
/**
- * amdgpu_vram_mgr_vis_size - Calculate visible node size
+ * amdgpu_vram_mgr_vis_size - Calculate visible block size
*
* @adev: amdgpu_device pointer
- * @node: MM node structure
+ * @block: DRM BUDDY block structure
*
- * Calculate how many bytes of the MM node are inside visible VRAM
+ * Calculate how many bytes of the DRM BUDDY block are inside visible VRAM
*/
static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
- struct drm_mm_node *node)
+ struct drm_buddy_block *block)
{
- uint64_t start = node->start << PAGE_SHIFT;
- uint64_t end = (node->size + node->start) << PAGE_SHIFT;
+ u64 start = amdgpu_vram_mgr_block_start(block);
+ u64 end = start + amdgpu_vram_mgr_block_size(block);
if (start >= adev->gmc.visible_vram_size)
return 0;
@@ -220,9 +278,9 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_resource *res = bo->tbo.resource;
- unsigned pages = res->num_pages;
- struct drm_mm_node *mm;
- u64 usage;
+ struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res);
+ struct drm_buddy_block *block;
+ u64 usage = 0;
if (amdgpu_gmc_vram_full_visible(&adev->gmc))
return amdgpu_bo_size(bo);
@@ -230,9 +288,8 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
if (res->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT)
return 0;
- mm = &container_of(res, struct ttm_range_mgr_node, base)->mm_nodes[0];
- for (usage = 0; pages; pages -= mm->size, mm++)
- usage += amdgpu_vram_mgr_vis_size(adev, mm);
+ list_for_each_entry(block, &vres->blocks, link)
+ usage += amdgpu_vram_mgr_vis_size(adev, block);
return usage;
}
@@ -242,51 +299,61 @@ static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man)
{
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
- struct drm_mm *mm = &mgr->mm;
+ struct drm_buddy *mm = &mgr->mm;
struct amdgpu_vram_reservation *rsv, *temp;
+ struct drm_buddy_block *block;
uint64_t vis_usage;
- list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) {
- if (drm_mm_reserve_node(mm, &rsv->mm_node))
+ list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks) {
+ if (drm_buddy_alloc_blocks(mm, rsv->start, rsv->start + rsv->size,
+ rsv->size, mm->chunk_size, &rsv->allocated,
+ DRM_BUDDY_RANGE_ALLOCATION))
+ continue;
+
+ block = amdgpu_vram_mgr_first_block(&rsv->allocated);
+ if (!block)
continue;
dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Succeeded\n",
- rsv->mm_node.start, rsv->mm_node.size);
+ rsv->start, rsv->size);
- vis_usage = amdgpu_vram_mgr_vis_size(adev, &rsv->mm_node);
+ vis_usage = amdgpu_vram_mgr_vis_size(adev, block);
atomic64_add(vis_usage, &mgr->vis_usage);
- atomic64_add(rsv->mm_node.size << PAGE_SHIFT, &mgr->usage);
- list_move(&rsv->node, &mgr->reserved_pages);
+ spin_lock(&man->bdev->lru_lock);
+ man->usage += rsv->size;
+ spin_unlock(&man->bdev->lru_lock);
+ list_move(&rsv->blocks, &mgr->reserved_pages);
}
}
/**
* amdgpu_vram_mgr_reserve_range - Reserve a range from VRAM
*
- * @man: TTM memory type manager
+ * @mgr: amdgpu_vram_mgr pointer
* @start: start address of the range in VRAM
* @size: size of the range
*
- * Reserve memory from start addess with the specified size in VRAM
+ * Reserve memory from start address with the specified size in VRAM
*/
-int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man,
+int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr,
uint64_t start, uint64_t size)
{
- struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_vram_reservation *rsv;
rsv = kzalloc(sizeof(*rsv), GFP_KERNEL);
if (!rsv)
return -ENOMEM;
- INIT_LIST_HEAD(&rsv->node);
- rsv->mm_node.start = start >> PAGE_SHIFT;
- rsv->mm_node.size = size >> PAGE_SHIFT;
+ INIT_LIST_HEAD(&rsv->allocated);
+ INIT_LIST_HEAD(&rsv->blocks);
- spin_lock(&mgr->lock);
- list_add_tail(&mgr->reservations_pending, &rsv->node);
- amdgpu_vram_mgr_do_reserve(man);
- spin_unlock(&mgr->lock);
+ rsv->start = start;
+ rsv->size = size;
+
+ mutex_lock(&mgr->lock);
+ list_add_tail(&rsv->blocks, &mgr->reservations_pending);
+ amdgpu_vram_mgr_do_reserve(&mgr->manager);
+ mutex_unlock(&mgr->lock);
return 0;
}
@@ -294,7 +361,7 @@ int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man,
/**
* amdgpu_vram_mgr_query_page_status - query the reservation status
*
- * @man: TTM memory type manager
+ * @mgr: amdgpu_vram_mgr pointer
* @start: start address of a page in VRAM
*
* Returns:
@@ -302,26 +369,25 @@ int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man,
* 0: the page has been reserved
* -ENOENT: the input page is not a reservation
*/
-int amdgpu_vram_mgr_query_page_status(struct ttm_resource_manager *man,
+int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
uint64_t start)
{
- struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_vram_reservation *rsv;
int ret;
- spin_lock(&mgr->lock);
+ mutex_lock(&mgr->lock);
- list_for_each_entry(rsv, &mgr->reservations_pending, node) {
- if ((rsv->mm_node.start <= start) &&
- (start < (rsv->mm_node.start + rsv->mm_node.size))) {
+ list_for_each_entry(rsv, &mgr->reservations_pending, blocks) {
+ if (rsv->start <= start &&
+ (start < (rsv->start + rsv->size))) {
ret = -EBUSY;
goto out;
}
}
- list_for_each_entry(rsv, &mgr->reserved_pages, node) {
- if ((rsv->mm_node.start <= start) &&
- (start < (rsv->mm_node.start + rsv->mm_node.size))) {
+ list_for_each_entry(rsv, &mgr->reserved_pages, blocks) {
+ if (rsv->start <= start &&
+ (start < (rsv->start + rsv->size))) {
ret = 0;
goto out;
}
@@ -329,30 +395,37 @@ int amdgpu_vram_mgr_query_page_status(struct ttm_resource_manager *man,
ret = -ENOENT;
out:
- spin_unlock(&mgr->lock);
+ mutex_unlock(&mgr->lock);
return ret;
}
-/**
- * amdgpu_vram_mgr_virt_start - update virtual start address
- *
- * @mem: ttm_resource to update
- * @node: just allocated node
- *
- * Calculate a virtual BO start address to easily check if everything is CPU
- * accessible.
- */
-static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
- struct drm_mm_node *node)
+int amdgpu_vram_mgr_query_address_block_info(struct amdgpu_vram_mgr *mgr,
+ uint64_t address, struct amdgpu_vram_block_info *info)
{
- unsigned long start;
+ struct amdgpu_vram_mgr_resource *vres;
+ struct drm_buddy_block *block;
+ u64 start, size;
+ int ret = -ENOENT;
+
+ mutex_lock(&mgr->lock);
+ list_for_each_entry(vres, &mgr->allocated_vres_list, vres_node) {
+ list_for_each_entry(block, &vres->blocks, link) {
+ start = amdgpu_vram_mgr_block_start(block);
+ size = amdgpu_vram_mgr_block_size(block);
+ if ((start <= address) && (address < (start + size))) {
+ info->start = start;
+ info->size = size;
+ memcpy(&info->task, &vres->task, sizeof(vres->task));
+ ret = 0;
+ goto out;
+ }
+ }
+ }
- start = node->start + node->size;
- if (start > mem->num_pages)
- start -= mem->num_pages;
- else
- start = 0;
- mem->start = max(mem->start, start);
+out:
+ mutex_unlock(&mgr->lock);
+
+ return ret;
}
/**
@@ -370,116 +443,183 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
const struct ttm_place *place,
struct ttm_resource **res)
{
- unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
- uint64_t vis_usage = 0, mem_bytes, max_bytes;
- struct ttm_range_mgr_node *node;
- struct drm_mm *mm = &mgr->mm;
- enum drm_mm_insert_mode mode;
- unsigned i;
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
+ u64 vis_usage = 0, max_bytes, min_block_size;
+ struct amdgpu_vram_mgr_resource *vres;
+ u64 size, remaining_size, lpfn, fpfn;
+ unsigned int adjust_dcc_size = 0;
+ struct drm_buddy *mm = &mgr->mm;
+ struct drm_buddy_block *block;
+ unsigned long pages_per_block;
int r;
- lpfn = place->lpfn;
- if (!lpfn)
+ lpfn = (u64)place->lpfn << PAGE_SHIFT;
+ if (!lpfn || lpfn > man->size)
lpfn = man->size;
+ fpfn = (u64)place->fpfn << PAGE_SHIFT;
+
max_bytes = adev->gmc.mc_vram_size;
if (tbo->type != ttm_bo_type_kernel)
max_bytes -= AMDGPU_VM_RESERVED_VRAM;
- /* bail out quickly if there's likely not enough VRAM for this BO */
- mem_bytes = tbo->base.size;
- if (atomic64_add_return(mem_bytes, &mgr->usage) > max_bytes) {
- r = -ENOSPC;
- goto error_sub;
- }
-
- if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
- pages_per_node = ~0ul;
- num_nodes = 1;
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) {
+ pages_per_block = ~0ul;
} else {
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- pages_per_node = HPAGE_PMD_NR;
+ pages_per_block = HPAGE_PMD_NR;
#else
/* default to 2MB */
- pages_per_node = 2UL << (20UL - PAGE_SHIFT);
+ pages_per_block = 2UL << (20UL - PAGE_SHIFT);
#endif
- pages_per_node = max_t(uint32_t, pages_per_node,
- tbo->page_alignment);
- num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
+ pages_per_block = max_t(u32, pages_per_block,
+ tbo->page_alignment);
}
- node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
- GFP_KERNEL | __GFP_ZERO);
- if (!node) {
- r = -ENOMEM;
- goto error_sub;
+ vres = kzalloc(sizeof(*vres), GFP_KERNEL);
+ if (!vres)
+ return -ENOMEM;
+
+ ttm_resource_init(tbo, place, &vres->base);
+
+ /* bail out quickly if there's likely not enough VRAM for this BO */
+ if (ttm_resource_manager_usage(man) > max_bytes) {
+ r = -ENOSPC;
+ goto error_fini;
}
- ttm_resource_init(tbo, place, &node->base);
+ INIT_LIST_HEAD(&vres->blocks);
- mode = DRM_MM_INSERT_BEST;
if (place->flags & TTM_PL_FLAG_TOPDOWN)
- mode = DRM_MM_INSERT_HIGH;
-
- pages_left = node->base.num_pages;
-
- /* Limit maximum size to 2GB due to SG table limitations */
- pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
-
- i = 0;
- spin_lock(&mgr->lock);
- while (pages_left) {
- uint32_t alignment = tbo->page_alignment;
-
- if (pages >= pages_per_node)
- alignment = pages_per_node;
-
- r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
- alignment, 0, place->fpfn,
- lpfn, mode);
- if (unlikely(r)) {
- if (pages > pages_per_node) {
- if (is_power_of_2(pages))
- pages = pages / 2;
- else
- pages = rounddown_pow_of_two(pages);
- continue;
- }
- goto error_free;
+ vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
+ vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
+ vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
+
+ if (fpfn || lpfn != mgr->mm.size)
+ /* Allocate blocks in desired range */
+ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
+
+ if (bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC &&
+ adev->gmc.gmc_funcs->get_dcc_alignment)
+ adjust_dcc_size = amdgpu_gmc_get_dcc_alignment(adev);
+
+ remaining_size = (u64)vres->base.size;
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) {
+ unsigned int dcc_size;
+
+ dcc_size = roundup_pow_of_two(vres->base.size + adjust_dcc_size);
+ remaining_size = (u64)dcc_size;
+
+ vres->flags |= DRM_BUDDY_TRIM_DISABLE;
+ }
+
+ mutex_lock(&mgr->lock);
+ while (remaining_size) {
+ if (tbo->page_alignment)
+ min_block_size = (u64)tbo->page_alignment << PAGE_SHIFT;
+ else
+ min_block_size = mgr->default_page_size;
+
+ size = remaining_size;
+
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size)
+ min_block_size = size;
+ else if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
+ !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
+ min_block_size = (u64)pages_per_block << PAGE_SHIFT;
+
+ BUG_ON(min_block_size < mm->chunk_size);
+
+ r = drm_buddy_alloc_blocks(mm, fpfn,
+ lpfn,
+ size,
+ min_block_size,
+ &vres->blocks,
+ vres->flags);
+
+ if (unlikely(r == -ENOSPC) && pages_per_block == ~0ul &&
+ !(place->flags & TTM_PL_FLAG_CONTIGUOUS)) {
+ vres->flags &= ~DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+ pages_per_block = max_t(u32, 2UL << (20UL - PAGE_SHIFT),
+ tbo->page_alignment);
+
+ continue;
}
- vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]);
- amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]);
- pages_left -= pages;
- ++i;
+ if (unlikely(r))
+ goto error_free_blocks;
+
+ if (size > remaining_size)
+ remaining_size = 0;
+ else
+ remaining_size -= size;
+ }
+
+ vres->task.pid = task_pid_nr(current);
+ get_task_comm(vres->task.comm, current);
+ list_add_tail(&vres->vres_node, &mgr->allocated_vres_list);
+
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) {
+ struct drm_buddy_block *dcc_block;
+ unsigned long dcc_start;
+ u64 trim_start;
+
+ dcc_block = amdgpu_vram_mgr_first_block(&vres->blocks);
+ /* Adjust the start address for DCC buffers only */
+ dcc_start =
+ roundup((unsigned long)amdgpu_vram_mgr_block_start(dcc_block),
+ adjust_dcc_size);
+ trim_start = (u64)dcc_start;
+ drm_buddy_block_trim(mm, &trim_start,
+ (u64)vres->base.size,
+ &vres->blocks);
+ }
+ mutex_unlock(&mgr->lock);
+
+ vres->base.start = 0;
+ size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks),
+ vres->base.size);
+ list_for_each_entry(block, &vres->blocks, link) {
+ unsigned long start;
- if (pages > pages_left)
- pages = pages_left;
+ start = amdgpu_vram_mgr_block_start(block) +
+ amdgpu_vram_mgr_block_size(block);
+ start >>= PAGE_SHIFT;
+
+ if (start > PFN_UP(size))
+ start -= PFN_UP(size);
+ else
+ start = 0;
+ vres->base.start = max(vres->base.start, start);
+
+ vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
}
- spin_unlock(&mgr->lock);
- if (i == 1)
- node->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
+ if (amdgpu_is_vram_mgr_blocks_contiguous(&vres->blocks))
+ vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
if (adev->gmc.xgmi.connected_to_cpu)
- node->base.bus.caching = ttm_cached;
+ vres->base.bus.caching = ttm_cached;
else
- node->base.bus.caching = ttm_write_combined;
+ vres->base.bus.caching = ttm_write_combined;
atomic64_add(vis_usage, &mgr->vis_usage);
- *res = &node->base;
+ *res = &vres->base;
return 0;
-error_free:
- while (i--)
- drm_mm_remove_node(&node->mm_nodes[i]);
- spin_unlock(&mgr->lock);
- kvfree(node);
+error_free_blocks:
+ drm_buddy_free_list(mm, &vres->blocks, 0);
+ mutex_unlock(&mgr->lock);
+error_fini:
+ ttm_resource_fini(man, &vres->base);
+ kfree(vres);
-error_sub:
- atomic64_sub(mem_bytes, &mgr->usage);
return r;
}
@@ -494,28 +634,29 @@ error_sub:
static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
struct ttm_resource *res)
{
- struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
+ struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res);
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
- uint64_t usage = 0, vis_usage = 0;
- unsigned i, pages;
+ struct drm_buddy *mm = &mgr->mm;
+ struct drm_buddy_block *block;
+ uint64_t vis_usage = 0;
- spin_lock(&mgr->lock);
- for (i = 0, pages = res->num_pages; pages;
- pages -= node->mm_nodes[i].size, ++i) {
- struct drm_mm_node *mm = &node->mm_nodes[i];
+ mutex_lock(&mgr->lock);
- drm_mm_remove_node(mm);
- usage += mm->size << PAGE_SHIFT;
- vis_usage += amdgpu_vram_mgr_vis_size(adev, mm);
- }
+ list_del(&vres->vres_node);
+ memset(&vres->task, 0, sizeof(vres->task));
+
+ list_for_each_entry(block, &vres->blocks, link)
+ vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
+
+ drm_buddy_free_list(mm, &vres->blocks, vres->flags);
amdgpu_vram_mgr_do_reserve(man);
- spin_unlock(&mgr->lock);
+ mutex_unlock(&mgr->lock);
- atomic64_sub(usage, &mgr->usage);
atomic64_sub(vis_usage, &mgr->vis_usage);
- kvfree(node);
+ ttm_resource_fini(man, res);
+ kfree(vres);
}
/**
@@ -547,11 +688,11 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
if (!*sgt)
return -ENOMEM;
- /* Determine the number of DRM_MM nodes to export */
+ /* Determine the number of DRM_BUDDY blocks to export */
amdgpu_res_first(res, offset, length, &cursor);
while (cursor.remaining) {
num_entries++;
- amdgpu_res_next(&cursor, cursor.size);
+ amdgpu_res_next(&cursor, min(cursor.size, AMDGPU_MAX_SG_SEGMENT_SIZE));
}
r = sg_alloc_table(*sgt, num_entries, GFP_KERNEL);
@@ -563,15 +704,15 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
sg->length = 0;
/*
- * Walk down DRM_MM nodes to populate scatterlist nodes
- * @note: Use iterator api to get first the DRM_MM node
+ * Walk down DRM_BUDDY blocks to populate scatterlist nodes
+ * @note: Use iterator api to get first the DRM_BUDDY block
* and the number of bytes from it. Access the following
- * DRM_MM node(s) if more buffer needs to exported
+ * DRM_BUDDY block(s) if more buffer needs to exported
*/
amdgpu_res_first(res, offset, length, &cursor);
for_each_sgtable_sg((*sgt), sg, i) {
phys_addr_t phys = cursor.start + adev->gmc.aper_base;
- size_t size = cursor.size;
+ unsigned long size = min(cursor.size, AMDGPU_MAX_SG_SEGMENT_SIZE);
dma_addr_t addr;
addr = dma_map_resource(dev, phys, size, dir,
@@ -584,7 +725,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
sg_dma_address(sg) = addr;
sg_dma_len(sg) = size;
- amdgpu_res_next(&cursor, cursor.size);
+ amdgpu_res_next(&cursor, size);
}
return 0;
@@ -630,31 +771,98 @@ void amdgpu_vram_mgr_free_sgt(struct device *dev,
}
/**
- * amdgpu_vram_mgr_usage - how many bytes are used in this domain
+ * amdgpu_vram_mgr_vis_usage - how many bytes are used in the visible part
+ *
+ * @mgr: amdgpu_vram_mgr pointer
+ *
+ * Returns how many bytes are used in the visible part of VRAM
+ */
+uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr *mgr)
+{
+ return atomic64_read(&mgr->vis_usage);
+}
+
+/**
+ * amdgpu_vram_mgr_clear_reset_blocks - reset clear blocks
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Reset the cleared drm buddy blocks.
+ */
+void amdgpu_vram_mgr_clear_reset_blocks(struct amdgpu_device *adev)
+{
+ struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
+ struct drm_buddy *mm = &mgr->mm;
+
+ mutex_lock(&mgr->lock);
+ drm_buddy_reset_clear(mm, false);
+ mutex_unlock(&mgr->lock);
+}
+
+/**
+ * amdgpu_vram_mgr_intersects - test each drm buddy block for intersection
*
* @man: TTM memory type manager
+ * @res: The resource to test
+ * @place: The place to test against
+ * @size: Size of the new allocation
*
- * Returns how many bytes are used in this domain.
+ * Test each drm buddy block for intersection for eviction decision.
*/
-uint64_t amdgpu_vram_mgr_usage(struct ttm_resource_manager *man)
+static bool amdgpu_vram_mgr_intersects(struct ttm_resource_manager *man,
+ struct ttm_resource *res,
+ const struct ttm_place *place,
+ size_t size)
{
- struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
+ struct amdgpu_vram_mgr_resource *mgr = to_amdgpu_vram_mgr_resource(res);
+ struct drm_buddy_block *block;
+
+ /* Check each drm buddy block individually */
+ list_for_each_entry(block, &mgr->blocks, link) {
+ unsigned long fpfn =
+ amdgpu_vram_mgr_block_start(block) >> PAGE_SHIFT;
+ unsigned long lpfn = fpfn +
+ (amdgpu_vram_mgr_block_size(block) >> PAGE_SHIFT);
+
+ if (place->fpfn < lpfn &&
+ (!place->lpfn || place->lpfn > fpfn))
+ return true;
+ }
- return atomic64_read(&mgr->usage);
+ return false;
}
/**
- * amdgpu_vram_mgr_vis_usage - how many bytes are used in the visible part
+ * amdgpu_vram_mgr_compatible - test each drm buddy block for compatibility
*
* @man: TTM memory type manager
+ * @res: The resource to test
+ * @place: The place to test against
+ * @size: Size of the new allocation
*
- * Returns how many bytes are used in the visible part of VRAM
+ * Test each drm buddy block for placement compatibility.
*/
-uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_resource_manager *man)
+static bool amdgpu_vram_mgr_compatible(struct ttm_resource_manager *man,
+ struct ttm_resource *res,
+ const struct ttm_place *place,
+ size_t size)
{
- struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
+ struct amdgpu_vram_mgr_resource *mgr = to_amdgpu_vram_mgr_resource(res);
+ struct drm_buddy_block *block;
+
+ /* Check each drm buddy block individually */
+ list_for_each_entry(block, &mgr->blocks, link) {
+ unsigned long fpfn =
+ amdgpu_vram_mgr_block_start(block) >> PAGE_SHIFT;
+ unsigned long lpfn = fpfn +
+ (amdgpu_vram_mgr_block_size(block) >> PAGE_SHIFT);
+
+ if (fpfn < place->fpfn ||
+ (place->lpfn && lpfn > place->lpfn))
+ return false;
+ }
- return atomic64_read(&mgr->vis_usage);
+ return true;
}
/**
@@ -669,19 +877,30 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
struct drm_printer *printer)
{
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
+ struct drm_buddy *mm = &mgr->mm;
+ struct amdgpu_vram_reservation *rsv;
+
+ drm_printf(printer, " vis usage:%llu\n",
+ amdgpu_vram_mgr_vis_usage(mgr));
- spin_lock(&mgr->lock);
- drm_mm_print(&mgr->mm, printer);
- spin_unlock(&mgr->lock);
+ mutex_lock(&mgr->lock);
+ drm_printf(printer, "default_page_size: %lluKiB\n",
+ mgr->default_page_size >> 10);
- drm_printf(printer, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n",
- man->size, amdgpu_vram_mgr_usage(man) >> 20,
- amdgpu_vram_mgr_vis_usage(man) >> 20);
+ drm_buddy_print(mm, printer);
+
+ drm_printf(printer, "reserved:\n");
+ list_for_each_entry(rsv, &mgr->reserved_pages, blocks)
+ drm_printf(printer, "%#018llx-%#018llx: %llu\n",
+ rsv->start, rsv->start + rsv->size, rsv->size);
+ mutex_unlock(&mgr->lock);
}
static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
.alloc = amdgpu_vram_mgr_new,
.free = amdgpu_vram_mgr_del,
+ .intersects = amdgpu_vram_mgr_intersects,
+ .compatible = amdgpu_vram_mgr_compatible,
.debug = amdgpu_vram_mgr_debug
};
@@ -696,15 +915,24 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
{
struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
struct ttm_resource_manager *man = &mgr->manager;
+ int err;
- ttm_resource_manager_init(man, adev->gmc.real_vram_size >> PAGE_SHIFT);
+ man->cg = drmm_cgroup_register_region(adev_to_drm(adev), "vram", adev->gmc.real_vram_size);
+ if (IS_ERR(man->cg))
+ return PTR_ERR(man->cg);
+ ttm_resource_manager_init(man, &adev->mman.bdev,
+ adev->gmc.real_vram_size);
- man->func = &amdgpu_vram_mgr_func;
-
- drm_mm_init(&mgr->mm, 0, man->size);
- spin_lock_init(&mgr->lock);
+ mutex_init(&mgr->lock);
INIT_LIST_HEAD(&mgr->reservations_pending);
INIT_LIST_HEAD(&mgr->reserved_pages);
+ INIT_LIST_HEAD(&mgr->allocated_vres_list);
+ mgr->default_page_size = PAGE_SIZE;
+
+ man->func = &amdgpu_vram_mgr_func;
+ err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
+ if (err)
+ return err;
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
ttm_resource_manager_set_used(man, true);
@@ -732,16 +960,17 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
if (ret)
return;
- spin_lock(&mgr->lock);
- list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node)
+ mutex_lock(&mgr->lock);
+ list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks)
kfree(rsv);
- list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) {
- drm_mm_remove_node(&rsv->mm_node);
+ list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) {
+ drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
kfree(rsv);
}
- drm_mm_takedown(&mgr->mm);
- spin_unlock(&mgr->lock);
+ if (!adev->gmc.is_app_apu)
+ drm_buddy_fini(&mgr->mm);
+ mutex_unlock(&mgr->lock);
ttm_resource_manager_cleanup(man);
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
new file mode 100644
index 000000000000..5f5fd9a911c2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: MIT
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_VRAM_MGR_H__
+#define __AMDGPU_VRAM_MGR_H__
+
+#include <drm/drm_buddy.h>
+
+struct amdgpu_vram_mgr {
+ struct ttm_resource_manager manager;
+ struct drm_buddy mm;
+ /* protects access to buffer objects */
+ struct mutex lock;
+ struct list_head reservations_pending;
+ struct list_head reserved_pages;
+ atomic64_t vis_usage;
+ u64 default_page_size;
+ struct list_head allocated_vres_list;
+};
+
+struct amdgpu_vres_task {
+ pid_t pid;
+ char comm[TASK_COMM_LEN];
+};
+
+struct amdgpu_vram_block_info {
+ u64 start;
+ u64 size;
+ struct amdgpu_vres_task task;
+};
+
+struct amdgpu_vram_mgr_resource {
+ struct ttm_resource base;
+ struct list_head blocks;
+ unsigned long flags;
+ struct list_head vres_node;
+ struct amdgpu_vres_task task;
+};
+
+static inline u64 amdgpu_vram_mgr_block_start(struct drm_buddy_block *block)
+{
+ return drm_buddy_block_offset(block);
+}
+
+static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
+{
+ return (u64)PAGE_SIZE << drm_buddy_block_order(block);
+}
+
+static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block)
+{
+ return drm_buddy_block_is_clear(block);
+}
+
+static inline struct amdgpu_vram_mgr_resource *
+to_amdgpu_vram_mgr_resource(struct ttm_resource *res)
+{
+ return container_of(res, struct amdgpu_vram_mgr_resource, base);
+}
+
+static inline void amdgpu_vram_mgr_set_cleared(struct ttm_resource *res)
+{
+ struct amdgpu_vram_mgr_resource *ares = to_amdgpu_vram_mgr_resource(res);
+
+ WARN_ON(ares->flags & DRM_BUDDY_CLEARED);
+ ares->flags |= DRM_BUDDY_CLEARED;
+}
+
+int amdgpu_vram_mgr_query_address_block_info(struct amdgpu_vram_mgr *mgr,
+ uint64_t address, struct amdgpu_vram_block_info *info);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
new file mode 100644
index 000000000000..1083db8cea2e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
@@ -0,0 +1,1107 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_xcp.h"
+#include "amdgpu_drv.h"
+
+#include <drm/drm_drv.h>
+#include "../amdxcp/amdgpu_xcp_drv.h"
+
+static void amdgpu_xcp_sysfs_entries_init(struct amdgpu_xcp_mgr *xcp_mgr);
+static void amdgpu_xcp_sysfs_entries_update(struct amdgpu_xcp_mgr *xcp_mgr);
+
+static int __amdgpu_xcp_run(struct amdgpu_xcp_mgr *xcp_mgr,
+ struct amdgpu_xcp_ip *xcp_ip, int xcp_state)
+{
+ int (*run_func)(void *handle, uint32_t inst_mask);
+ int ret = 0;
+
+ if (!xcp_ip || !xcp_ip->valid || !xcp_ip->ip_funcs)
+ return 0;
+
+ run_func = NULL;
+
+ switch (xcp_state) {
+ case AMDGPU_XCP_PREPARE_SUSPEND:
+ run_func = xcp_ip->ip_funcs->prepare_suspend;
+ break;
+ case AMDGPU_XCP_SUSPEND:
+ run_func = xcp_ip->ip_funcs->suspend;
+ break;
+ case AMDGPU_XCP_PREPARE_RESUME:
+ run_func = xcp_ip->ip_funcs->prepare_resume;
+ break;
+ case AMDGPU_XCP_RESUME:
+ run_func = xcp_ip->ip_funcs->resume;
+ break;
+ }
+
+ if (run_func)
+ ret = run_func(xcp_mgr->adev, xcp_ip->inst_mask);
+
+ return ret;
+}
+
+static int amdgpu_xcp_run_transition(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
+ int state)
+{
+ struct amdgpu_xcp_ip *xcp_ip;
+ struct amdgpu_xcp *xcp;
+ int i, ret;
+
+ if (xcp_id >= MAX_XCP || !xcp_mgr->xcp[xcp_id].valid)
+ return -EINVAL;
+
+ xcp = &xcp_mgr->xcp[xcp_id];
+ for (i = 0; i < AMDGPU_XCP_MAX_BLOCKS; ++i) {
+ xcp_ip = &xcp->ip[i];
+ ret = __amdgpu_xcp_run(xcp_mgr, xcp_ip, state);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+int amdgpu_xcp_prepare_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
+{
+ return amdgpu_xcp_run_transition(xcp_mgr, xcp_id,
+ AMDGPU_XCP_PREPARE_SUSPEND);
+}
+
+int amdgpu_xcp_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
+{
+ return amdgpu_xcp_run_transition(xcp_mgr, xcp_id, AMDGPU_XCP_SUSPEND);
+}
+
+int amdgpu_xcp_prepare_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
+{
+ return amdgpu_xcp_run_transition(xcp_mgr, xcp_id,
+ AMDGPU_XCP_PREPARE_RESUME);
+}
+
+int amdgpu_xcp_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
+{
+ return amdgpu_xcp_run_transition(xcp_mgr, xcp_id, AMDGPU_XCP_RESUME);
+}
+
+static void __amdgpu_xcp_add_block(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
+ struct amdgpu_xcp_ip *ip)
+{
+ struct amdgpu_xcp *xcp;
+
+ if (!ip)
+ return;
+
+ xcp = &xcp_mgr->xcp[xcp_id];
+ xcp->ip[ip->ip_id] = *ip;
+ xcp->ip[ip->ip_id].valid = true;
+
+ xcp->valid = true;
+}
+
+static void __amdgpu_xcp_set_unique_id(struct amdgpu_xcp_mgr *xcp_mgr,
+ int xcp_id)
+{
+ struct amdgpu_xcp *xcp = &xcp_mgr->xcp[xcp_id];
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ uint32_t inst_mask;
+ uint64_t uid;
+ int i;
+
+ if (!amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &inst_mask) &&
+ inst_mask) {
+ i = GET_INST(GC, (ffs(inst_mask) - 1));
+ uid = amdgpu_device_get_uid(xcp_mgr->adev->uid_info,
+ AMDGPU_UID_TYPE_XCD, i);
+ if (uid)
+ xcp->unique_id = uid;
+ }
+}
+
+int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ struct amdgpu_xcp_ip ip;
+ uint8_t mem_id;
+ int i, j, ret;
+
+ if (!num_xcps || num_xcps > MAX_XCP)
+ return -EINVAL;
+
+ xcp_mgr->mode = mode;
+
+ for (i = 0; i < MAX_XCP; ++i)
+ xcp_mgr->xcp[i].valid = false;
+
+ /* This is needed for figuring out memory id of xcp */
+ xcp_mgr->num_xcp_per_mem_partition = num_xcps / xcp_mgr->adev->gmc.num_mem_partitions;
+
+ for (i = 0; i < num_xcps; ++i) {
+ for (j = AMDGPU_XCP_GFXHUB; j < AMDGPU_XCP_MAX_BLOCKS; ++j) {
+ ret = xcp_mgr->funcs->get_ip_details(xcp_mgr, i, j,
+ &ip);
+ if (ret)
+ continue;
+
+ __amdgpu_xcp_add_block(xcp_mgr, i, &ip);
+ }
+
+ xcp_mgr->xcp[i].id = i;
+
+ if (xcp_mgr->funcs->get_xcp_mem_id) {
+ ret = xcp_mgr->funcs->get_xcp_mem_id(
+ xcp_mgr, &xcp_mgr->xcp[i], &mem_id);
+ if (ret)
+ continue;
+ else
+ xcp_mgr->xcp[i].mem_id = mem_id;
+ }
+ __amdgpu_xcp_set_unique_id(xcp_mgr, i);
+ }
+
+ xcp_mgr->num_xcps = num_xcps;
+ amdgpu_xcp_update_partition_sched_list(adev);
+
+ return 0;
+}
+
+static int __amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr,
+ int mode)
+{
+ int ret, curr_mode, num_xcps = 0;
+
+ if (!xcp_mgr->funcs || !xcp_mgr->funcs->switch_partition_mode)
+ return 0;
+
+ mutex_lock(&xcp_mgr->xcp_lock);
+
+ curr_mode = xcp_mgr->mode;
+ /* State set to transient mode */
+ xcp_mgr->mode = AMDGPU_XCP_MODE_TRANS;
+
+ ret = xcp_mgr->funcs->switch_partition_mode(xcp_mgr, mode, &num_xcps);
+
+ if (ret) {
+ /* Failed, get whatever mode it's at now */
+ if (xcp_mgr->funcs->query_partition_mode)
+ xcp_mgr->mode = amdgpu_xcp_query_partition_mode(
+ xcp_mgr, AMDGPU_XCP_FL_LOCKED);
+ else
+ xcp_mgr->mode = curr_mode;
+
+ goto out;
+ }
+ amdgpu_xcp_sysfs_entries_update(xcp_mgr);
+out:
+ mutex_unlock(&xcp_mgr->xcp_lock);
+
+ return ret;
+}
+
+int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode)
+{
+ if (!xcp_mgr || mode == AMDGPU_XCP_MODE_NONE)
+ return -EINVAL;
+
+ if (xcp_mgr->mode == mode)
+ return 0;
+
+ return __amdgpu_xcp_switch_partition_mode(xcp_mgr, mode);
+}
+
+int amdgpu_xcp_restore_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ if (!xcp_mgr || xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
+ return 0;
+
+ return __amdgpu_xcp_switch_partition_mode(xcp_mgr, xcp_mgr->mode);
+}
+
+static bool __amdgpu_xcp_is_cached_mode_valid(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ if (!xcp_mgr->funcs || !xcp_mgr->funcs->query_partition_mode)
+ return true;
+
+ if (!amdgpu_sriov_vf(xcp_mgr->adev) &&
+ xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
+ return true;
+
+ if (xcp_mgr->mode != AMDGPU_XCP_MODE_NONE &&
+ xcp_mgr->mode != AMDGPU_XCP_MODE_TRANS)
+ return true;
+
+ return false;
+}
+
+int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
+{
+ int mode;
+
+ if (__amdgpu_xcp_is_cached_mode_valid(xcp_mgr))
+ return xcp_mgr->mode;
+
+ if (!(flags & AMDGPU_XCP_FL_LOCKED))
+ mutex_lock(&xcp_mgr->xcp_lock);
+ mode = xcp_mgr->funcs->query_partition_mode(xcp_mgr);
+
+ /* First time query for VF, set the mode here */
+ if (amdgpu_sriov_vf(xcp_mgr->adev) &&
+ xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
+ xcp_mgr->mode = mode;
+
+ if (xcp_mgr->mode != AMDGPU_XCP_MODE_TRANS && mode != xcp_mgr->mode)
+ dev_WARN(
+ xcp_mgr->adev->dev,
+ "Cached partition mode %d not matching with device mode %d",
+ xcp_mgr->mode, mode);
+
+ if (!(flags & AMDGPU_XCP_FL_LOCKED))
+ mutex_unlock(&xcp_mgr->xcp_lock);
+
+ return mode;
+}
+
+static int amdgpu_xcp_dev_alloc(struct amdgpu_device *adev)
+{
+ struct drm_device *p_ddev;
+ struct drm_device *ddev;
+ int i, ret;
+
+ ddev = adev_to_drm(adev);
+
+ /* xcp #0 shares drm device setting with adev */
+ adev->xcp_mgr->xcp->ddev = ddev;
+
+ for (i = 1; i < MAX_XCP; i++) {
+ ret = amdgpu_xcp_drm_dev_alloc(&p_ddev);
+ if (ret == -ENOSPC) {
+ dev_warn(adev->dev,
+ "Skip xcp node #%d when out of drm node resource.", i);
+ ret = 0;
+ goto out;
+ } else if (ret) {
+ goto out;
+ }
+
+ /* Redirect all IOCTLs to the primary device */
+ adev->xcp_mgr->xcp[i].rdev = p_ddev->render->dev;
+ adev->xcp_mgr->xcp[i].pdev = p_ddev->primary->dev;
+ adev->xcp_mgr->xcp[i].driver = (struct drm_driver *)p_ddev->driver;
+ adev->xcp_mgr->xcp[i].vma_offset_manager = p_ddev->vma_offset_manager;
+ p_ddev->render->dev = ddev;
+ p_ddev->primary->dev = ddev;
+ p_ddev->vma_offset_manager = ddev->vma_offset_manager;
+ p_ddev->driver = &amdgpu_partition_driver;
+ adev->xcp_mgr->xcp[i].ddev = p_ddev;
+
+ dev_set_drvdata(p_ddev->dev, &adev->xcp_mgr->xcp[i]);
+ }
+ ret = 0;
+out:
+ amdgpu_xcp_sysfs_entries_init(adev->xcp_mgr);
+
+ return ret;
+}
+
+int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode,
+ int init_num_xcps,
+ struct amdgpu_xcp_mgr_funcs *xcp_funcs)
+{
+ struct amdgpu_xcp_mgr *xcp_mgr;
+ int i;
+
+ if (!xcp_funcs || !xcp_funcs->get_ip_details)
+ return -EINVAL;
+
+ xcp_mgr = kzalloc(sizeof(*xcp_mgr), GFP_KERNEL);
+
+ if (!xcp_mgr)
+ return -ENOMEM;
+
+ xcp_mgr->adev = adev;
+ xcp_mgr->funcs = xcp_funcs;
+ xcp_mgr->mode = init_mode;
+ mutex_init(&xcp_mgr->xcp_lock);
+
+ if (init_mode != AMDGPU_XCP_MODE_NONE)
+ amdgpu_xcp_init(xcp_mgr, init_num_xcps, init_mode);
+
+ adev->xcp_mgr = xcp_mgr;
+ for (i = 0; i < MAX_XCP; ++i)
+ xcp_mgr->xcp[i].xcp_mgr = xcp_mgr;
+
+ return amdgpu_xcp_dev_alloc(adev);
+}
+
+int amdgpu_xcp_get_partition(struct amdgpu_xcp_mgr *xcp_mgr,
+ enum AMDGPU_XCP_IP_BLOCK ip, int instance)
+{
+ struct amdgpu_xcp *xcp;
+ int i, id_mask = 0;
+
+ if (ip >= AMDGPU_XCP_MAX_BLOCKS)
+ return -EINVAL;
+
+ for (i = 0; i < xcp_mgr->num_xcps; ++i) {
+ xcp = &xcp_mgr->xcp[i];
+ if ((xcp->valid) && (xcp->ip[ip].valid) &&
+ (xcp->ip[ip].inst_mask & BIT(instance)))
+ id_mask |= BIT(i);
+ }
+
+ if (!id_mask)
+ id_mask = -ENXIO;
+
+ return id_mask;
+}
+
+int amdgpu_xcp_get_inst_details(struct amdgpu_xcp *xcp,
+ enum AMDGPU_XCP_IP_BLOCK ip,
+ uint32_t *inst_mask)
+{
+ if (!xcp->valid || !inst_mask || !(xcp->ip[ip].valid))
+ return -EINVAL;
+
+ *inst_mask = xcp->ip[ip].inst_mask;
+
+ return 0;
+}
+
+int amdgpu_xcp_dev_register(struct amdgpu_device *adev,
+ const struct pci_device_id *ent)
+{
+ int i, ret;
+
+ if (!adev->xcp_mgr)
+ return 0;
+
+ for (i = 1; i < MAX_XCP; i++) {
+ if (!adev->xcp_mgr->xcp[i].ddev)
+ break;
+
+ ret = drm_dev_register(adev->xcp_mgr->xcp[i].ddev, ent->driver_data);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev)
+{
+ struct drm_device *p_ddev;
+ int i;
+
+ if (!adev->xcp_mgr)
+ return;
+
+ for (i = 1; i < MAX_XCP; i++) {
+ if (!adev->xcp_mgr->xcp[i].ddev)
+ break;
+
+ p_ddev = adev->xcp_mgr->xcp[i].ddev;
+ drm_dev_unplug(p_ddev);
+ p_ddev->render->dev = adev->xcp_mgr->xcp[i].rdev;
+ p_ddev->primary->dev = adev->xcp_mgr->xcp[i].pdev;
+ p_ddev->driver = adev->xcp_mgr->xcp[i].driver;
+ p_ddev->vma_offset_manager = adev->xcp_mgr->xcp[i].vma_offset_manager;
+ amdgpu_xcp_drm_dev_free(p_ddev);
+ }
+}
+
+int amdgpu_xcp_open_device(struct amdgpu_device *adev,
+ struct amdgpu_fpriv *fpriv,
+ struct drm_file *file_priv)
+{
+ int i;
+
+ if (!adev->xcp_mgr)
+ return 0;
+
+ fpriv->xcp_id = AMDGPU_XCP_NO_PARTITION;
+ for (i = 0; i < MAX_XCP; ++i) {
+ if (!adev->xcp_mgr->xcp[i].ddev)
+ break;
+
+ if (file_priv->minor == adev->xcp_mgr->xcp[i].ddev->render) {
+ if (adev->xcp_mgr->xcp[i].valid == FALSE) {
+ dev_err(adev->dev, "renderD%d partition %d not valid!",
+ file_priv->minor->index, i);
+ return -ENOENT;
+ }
+ dev_dbg(adev->dev, "renderD%d partition %d opened!",
+ file_priv->minor->index, i);
+ fpriv->xcp_id = i;
+ break;
+ }
+ }
+
+ fpriv->vm.mem_id = fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION ? -1 :
+ adev->xcp_mgr->xcp[fpriv->xcp_id].mem_id;
+ return 0;
+}
+
+void amdgpu_xcp_release_sched(struct amdgpu_device *adev,
+ struct amdgpu_ctx_entity *entity)
+{
+ struct drm_gpu_scheduler *sched;
+ struct amdgpu_ring *ring;
+
+ if (!adev->xcp_mgr)
+ return;
+
+ sched = entity->entity.rq->sched;
+ if (drm_sched_wqueue_ready(sched)) {
+ ring = to_amdgpu_ring(entity->entity.rq->sched);
+ atomic_dec(&adev->xcp_mgr->xcp[ring->xcp_id].ref_cnt);
+ }
+}
+
+int amdgpu_xcp_select_scheds(struct amdgpu_device *adev,
+ u32 hw_ip, u32 hw_prio,
+ struct amdgpu_fpriv *fpriv,
+ unsigned int *num_scheds,
+ struct drm_gpu_scheduler ***scheds)
+{
+ u32 sel_xcp_id;
+ int i;
+ struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
+
+ if (fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION) {
+ u32 least_ref_cnt = ~0;
+
+ fpriv->xcp_id = 0;
+ for (i = 0; i < xcp_mgr->num_xcps; i++) {
+ u32 total_ref_cnt;
+
+ total_ref_cnt = atomic_read(&xcp_mgr->xcp[i].ref_cnt);
+ if (total_ref_cnt < least_ref_cnt) {
+ fpriv->xcp_id = i;
+ least_ref_cnt = total_ref_cnt;
+ }
+ }
+ }
+ sel_xcp_id = fpriv->xcp_id;
+
+ if (xcp_mgr->xcp[sel_xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds) {
+ *num_scheds =
+ xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds;
+ *scheds =
+ xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].sched;
+ atomic_inc(&adev->xcp_mgr->xcp[sel_xcp_id].ref_cnt);
+ dev_dbg(adev->dev, "Selected partition #%d", sel_xcp_id);
+ } else {
+ dev_err(adev->dev, "Failed to schedule partition #%d.", sel_xcp_id);
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static void amdgpu_set_xcp_id(struct amdgpu_device *adev,
+ uint32_t inst_idx,
+ struct amdgpu_ring *ring)
+{
+ int xcp_id;
+ enum AMDGPU_XCP_IP_BLOCK ip_blk;
+ uint32_t inst_mask;
+
+ ring->xcp_id = AMDGPU_XCP_NO_PARTITION;
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
+ adev->gfx.enforce_isolation[0].xcp_id = ring->xcp_id;
+ if ((adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) ||
+ (ring->funcs->type == AMDGPU_RING_TYPE_CPER))
+ return;
+
+ inst_mask = 1 << inst_idx;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_HW_IP_GFX:
+ case AMDGPU_RING_TYPE_COMPUTE:
+ case AMDGPU_RING_TYPE_KIQ:
+ ip_blk = AMDGPU_XCP_GFX;
+ break;
+ case AMDGPU_RING_TYPE_SDMA:
+ ip_blk = AMDGPU_XCP_SDMA;
+ break;
+ case AMDGPU_RING_TYPE_VCN_ENC:
+ case AMDGPU_RING_TYPE_VCN_JPEG:
+ ip_blk = AMDGPU_XCP_VCN;
+ break;
+ default:
+ dev_err(adev->dev, "Not support ring type %d!", ring->funcs->type);
+ return;
+ }
+
+ for (xcp_id = 0; xcp_id < adev->xcp_mgr->num_xcps; xcp_id++) {
+ if (adev->xcp_mgr->xcp[xcp_id].ip[ip_blk].inst_mask & inst_mask) {
+ ring->xcp_id = xcp_id;
+ dev_dbg(adev->dev, "ring:%s xcp_id :%u", ring->name,
+ ring->xcp_id);
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
+ adev->gfx.enforce_isolation[xcp_id].xcp_id = xcp_id;
+ break;
+ }
+ }
+}
+
+static void amdgpu_xcp_gpu_sched_update(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ unsigned int sel_xcp_id)
+{
+ unsigned int *num_gpu_sched;
+
+ num_gpu_sched = &adev->xcp_mgr->xcp[sel_xcp_id]
+ .gpu_sched[ring->funcs->type][ring->hw_prio].num_scheds;
+ adev->xcp_mgr->xcp[sel_xcp_id].gpu_sched[ring->funcs->type][ring->hw_prio]
+ .sched[(*num_gpu_sched)++] = &ring->sched;
+ dev_dbg(adev->dev, "%s :[%d] gpu_sched[%d][%d] = %d",
+ ring->name, sel_xcp_id, ring->funcs->type,
+ ring->hw_prio, *num_gpu_sched);
+}
+
+static int amdgpu_xcp_sched_list_update(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int i;
+
+ for (i = 0; i < MAX_XCP; i++) {
+ atomic_set(&adev->xcp_mgr->xcp[i].ref_cnt, 0);
+ memset(adev->xcp_mgr->xcp[i].gpu_sched, 0, sizeof(adev->xcp_mgr->xcp->gpu_sched));
+ }
+
+ if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
+ return 0;
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+ ring = adev->rings[i];
+ if (!ring || !ring->sched.ready || ring->no_scheduler)
+ continue;
+
+ amdgpu_xcp_gpu_sched_update(adev, ring, ring->xcp_id);
+
+ /* VCN may be shared by two partitions under CPX MODE in certain
+ * configs.
+ */
+ if ((ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC ||
+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) &&
+ (adev->xcp_mgr->num_xcps > adev->vcn.num_vcn_inst))
+ amdgpu_xcp_gpu_sched_update(adev, ring, ring->xcp_id + 1);
+ }
+
+ return 0;
+}
+
+int amdgpu_xcp_update_partition_sched_list(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->num_rings; i++) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE ||
+ ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ amdgpu_set_xcp_id(adev, ring->xcc_id, ring);
+ else
+ amdgpu_set_xcp_id(adev, ring->me, ring);
+ }
+
+ return amdgpu_xcp_sched_list_update(adev);
+}
+
+void amdgpu_xcp_update_supported_modes(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+
+ xcp_mgr->supp_xcp_modes = 0;
+
+ switch (NUM_XCC(adev->gfx.xcc_mask)) {
+ case 8:
+ xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) |
+ BIT(AMDGPU_DPX_PARTITION_MODE) |
+ BIT(AMDGPU_QPX_PARTITION_MODE) |
+ BIT(AMDGPU_CPX_PARTITION_MODE);
+ break;
+ case 6:
+ xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) |
+ BIT(AMDGPU_TPX_PARTITION_MODE) |
+ BIT(AMDGPU_CPX_PARTITION_MODE);
+ break;
+ case 4:
+ xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) |
+ BIT(AMDGPU_DPX_PARTITION_MODE) |
+ BIT(AMDGPU_CPX_PARTITION_MODE);
+ break;
+ case 2:
+ xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) |
+ BIT(AMDGPU_CPX_PARTITION_MODE);
+ break;
+ case 1:
+ xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) |
+ BIT(AMDGPU_CPX_PARTITION_MODE);
+ break;
+
+ default:
+ break;
+ }
+}
+
+int amdgpu_xcp_pre_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
+{
+ /* TODO:
+ * Stop user queues and threads, and make sure GPU is empty of work.
+ */
+
+ if (flags & AMDGPU_XCP_OPS_KFD)
+ amdgpu_amdkfd_device_fini_sw(xcp_mgr->adev);
+
+ return 0;
+}
+
+int amdgpu_xcp_post_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
+{
+ int ret = 0;
+
+ if (flags & AMDGPU_XCP_OPS_KFD) {
+ amdgpu_amdkfd_device_probe(xcp_mgr->adev);
+ amdgpu_amdkfd_device_init(xcp_mgr->adev);
+ /* If KFD init failed, return failure */
+ if (!xcp_mgr->adev->kfd.init_complete)
+ ret = -EIO;
+ }
+
+ return ret;
+}
+
+/*====================== xcp sysfs - configuration ======================*/
+#define XCP_CFG_SYSFS_RES_ATTR_SHOW(_name) \
+ static ssize_t amdgpu_xcp_res_sysfs_##_name##_show( \
+ struct amdgpu_xcp_res_details *xcp_res, char *buf) \
+ { \
+ return sysfs_emit(buf, "%d\n", xcp_res->_name); \
+ }
+
+struct amdgpu_xcp_res_sysfs_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct amdgpu_xcp_res_details *xcp_res, char *buf);
+};
+
+#define XCP_CFG_SYSFS_RES_ATTR(_name) \
+ struct amdgpu_xcp_res_sysfs_attribute xcp_res_sysfs_attr_##_name = { \
+ .attr = { .name = __stringify(_name), .mode = 0400 }, \
+ .show = amdgpu_xcp_res_sysfs_##_name##_show, \
+ }
+
+XCP_CFG_SYSFS_RES_ATTR_SHOW(num_inst)
+XCP_CFG_SYSFS_RES_ATTR(num_inst);
+XCP_CFG_SYSFS_RES_ATTR_SHOW(num_shared)
+XCP_CFG_SYSFS_RES_ATTR(num_shared);
+
+#define XCP_CFG_SYSFS_RES_ATTR_PTR(_name) xcp_res_sysfs_attr_##_name.attr
+
+static struct attribute *xcp_cfg_res_sysfs_attrs[] = {
+ &XCP_CFG_SYSFS_RES_ATTR_PTR(num_inst),
+ &XCP_CFG_SYSFS_RES_ATTR_PTR(num_shared), NULL
+};
+
+static const char *xcp_desc[] = {
+ [AMDGPU_SPX_PARTITION_MODE] = "SPX",
+ [AMDGPU_DPX_PARTITION_MODE] = "DPX",
+ [AMDGPU_TPX_PARTITION_MODE] = "TPX",
+ [AMDGPU_QPX_PARTITION_MODE] = "QPX",
+ [AMDGPU_CPX_PARTITION_MODE] = "CPX",
+};
+
+static const char *nps_desc[] = {
+ [UNKNOWN_MEMORY_PARTITION_MODE] = "UNKNOWN",
+ [AMDGPU_NPS1_PARTITION_MODE] = "NPS1",
+ [AMDGPU_NPS2_PARTITION_MODE] = "NPS2",
+ [AMDGPU_NPS3_PARTITION_MODE] = "NPS3",
+ [AMDGPU_NPS4_PARTITION_MODE] = "NPS4",
+ [AMDGPU_NPS6_PARTITION_MODE] = "NPS6",
+ [AMDGPU_NPS8_PARTITION_MODE] = "NPS8",
+};
+
+ATTRIBUTE_GROUPS(xcp_cfg_res_sysfs);
+
+#define to_xcp_attr(x) \
+ container_of(x, struct amdgpu_xcp_res_sysfs_attribute, attr)
+#define to_xcp_res(x) container_of(x, struct amdgpu_xcp_res_details, kobj)
+
+static ssize_t xcp_cfg_res_sysfs_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct amdgpu_xcp_res_sysfs_attribute *attribute;
+ struct amdgpu_xcp_res_details *xcp_res;
+
+ attribute = to_xcp_attr(attr);
+ xcp_res = to_xcp_res(kobj);
+
+ if (!attribute->show)
+ return -EIO;
+
+ return attribute->show(xcp_res, buf);
+}
+
+static const struct sysfs_ops xcp_cfg_res_sysfs_ops = {
+ .show = xcp_cfg_res_sysfs_attr_show,
+};
+
+static const struct kobj_type xcp_cfg_res_sysfs_ktype = {
+ .sysfs_ops = &xcp_cfg_res_sysfs_ops,
+ .default_groups = xcp_cfg_res_sysfs_groups,
+};
+
+const char *xcp_res_names[] = {
+ [AMDGPU_XCP_RES_XCC] = "xcc",
+ [AMDGPU_XCP_RES_DMA] = "dma",
+ [AMDGPU_XCP_RES_DEC] = "dec",
+ [AMDGPU_XCP_RES_JPEG] = "jpeg",
+};
+
+static int amdgpu_xcp_get_res_info(struct amdgpu_xcp_mgr *xcp_mgr,
+ int mode,
+ struct amdgpu_xcp_cfg *xcp_cfg)
+{
+ if (xcp_mgr->funcs && xcp_mgr->funcs->get_xcp_res_info)
+ return xcp_mgr->funcs->get_xcp_res_info(xcp_mgr, mode, xcp_cfg);
+
+ return -EOPNOTSUPP;
+}
+
+#define to_xcp_cfg(x) container_of(x, struct amdgpu_xcp_cfg, kobj)
+static ssize_t supported_xcp_configs_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
+ struct amdgpu_xcp_mgr *xcp_mgr = xcp_cfg->xcp_mgr;
+ int size = 0, mode;
+ char *sep = "";
+
+ if (!xcp_mgr || !xcp_mgr->supp_xcp_modes)
+ return sysfs_emit(buf, "Not supported\n");
+
+ for_each_inst(mode, xcp_mgr->supp_xcp_modes) {
+ size += sysfs_emit_at(buf, size, "%s%s", sep, xcp_desc[mode]);
+ sep = ", ";
+ }
+
+ size += sysfs_emit_at(buf, size, "\n");
+
+ return size;
+}
+
+static ssize_t supported_nps_configs_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
+ int size = 0, mode;
+ char *sep = "";
+
+ if (!xcp_cfg || !xcp_cfg->compatible_nps_modes)
+ return sysfs_emit(buf, "Not supported\n");
+
+ for_each_inst(mode, xcp_cfg->compatible_nps_modes) {
+ size += sysfs_emit_at(buf, size, "%s%s", sep, nps_desc[mode]);
+ sep = ", ";
+ }
+
+ size += sysfs_emit_at(buf, size, "\n");
+
+ return size;
+}
+
+static ssize_t xcp_config_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
+
+ return sysfs_emit(buf, "%s\n",
+ amdgpu_gfx_compute_mode_desc(xcp_cfg->mode));
+}
+
+static ssize_t xcp_config_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t size)
+{
+ struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
+ int mode, r;
+
+ if (!strncasecmp("SPX", buf, strlen("SPX")))
+ mode = AMDGPU_SPX_PARTITION_MODE;
+ else if (!strncasecmp("DPX", buf, strlen("DPX")))
+ mode = AMDGPU_DPX_PARTITION_MODE;
+ else if (!strncasecmp("TPX", buf, strlen("TPX")))
+ mode = AMDGPU_TPX_PARTITION_MODE;
+ else if (!strncasecmp("QPX", buf, strlen("QPX")))
+ mode = AMDGPU_QPX_PARTITION_MODE;
+ else if (!strncasecmp("CPX", buf, strlen("CPX")))
+ mode = AMDGPU_CPX_PARTITION_MODE;
+ else
+ return -EINVAL;
+
+ r = amdgpu_xcp_get_res_info(xcp_cfg->xcp_mgr, mode, xcp_cfg);
+
+ if (r)
+ return r;
+
+ xcp_cfg->mode = mode;
+ return size;
+}
+
+static struct kobj_attribute xcp_cfg_sysfs_mode =
+ __ATTR_RW_MODE(xcp_config, 0644);
+
+static void xcp_cfg_sysfs_release(struct kobject *kobj)
+{
+ struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
+
+ kfree(xcp_cfg);
+}
+
+static const struct kobj_type xcp_cfg_sysfs_ktype = {
+ .release = xcp_cfg_sysfs_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+};
+
+static struct kobj_attribute supp_part_sysfs_mode =
+ __ATTR_RO(supported_xcp_configs);
+
+static struct kobj_attribute supp_nps_sysfs_mode =
+ __ATTR_RO(supported_nps_configs);
+
+static const struct attribute *xcp_attrs[] = {
+ &supp_part_sysfs_mode.attr,
+ &xcp_cfg_sysfs_mode.attr,
+ NULL,
+};
+
+static void amdgpu_xcp_cfg_sysfs_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_xcp_res_details *xcp_res;
+ struct amdgpu_xcp_cfg *xcp_cfg;
+ int i, r, j, rid, mode;
+
+ if (!adev->xcp_mgr)
+ return;
+
+ xcp_cfg = kzalloc(sizeof(*xcp_cfg), GFP_KERNEL);
+ if (!xcp_cfg)
+ return;
+ xcp_cfg->xcp_mgr = adev->xcp_mgr;
+
+ r = kobject_init_and_add(&xcp_cfg->kobj, &xcp_cfg_sysfs_ktype,
+ &adev->dev->kobj, "compute_partition_config");
+ if (r)
+ goto err1;
+
+ r = sysfs_create_files(&xcp_cfg->kobj, xcp_attrs);
+ if (r)
+ goto err1;
+
+ if (adev->gmc.supported_nps_modes != 0) {
+ r = sysfs_create_file(&xcp_cfg->kobj, &supp_nps_sysfs_mode.attr);
+ if (r) {
+ sysfs_remove_files(&xcp_cfg->kobj, xcp_attrs);
+ goto err1;
+ }
+ }
+
+ mode = (xcp_cfg->xcp_mgr->mode ==
+ AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) ?
+ AMDGPU_SPX_PARTITION_MODE :
+ xcp_cfg->xcp_mgr->mode;
+ r = amdgpu_xcp_get_res_info(xcp_cfg->xcp_mgr, mode, xcp_cfg);
+ if (r) {
+ sysfs_remove_file(&xcp_cfg->kobj, &supp_nps_sysfs_mode.attr);
+ sysfs_remove_files(&xcp_cfg->kobj, xcp_attrs);
+ goto err1;
+ }
+
+ xcp_cfg->mode = mode;
+ for (i = 0; i < xcp_cfg->num_res; i++) {
+ xcp_res = &xcp_cfg->xcp_res[i];
+ rid = xcp_res->id;
+ r = kobject_init_and_add(&xcp_res->kobj,
+ &xcp_cfg_res_sysfs_ktype,
+ &xcp_cfg->kobj, "%s",
+ xcp_res_names[rid]);
+ if (r)
+ goto err;
+ }
+
+ adev->xcp_mgr->xcp_cfg = xcp_cfg;
+ return;
+err:
+ for (j = 0; j < i; j++) {
+ xcp_res = &xcp_cfg->xcp_res[i];
+ kobject_put(&xcp_res->kobj);
+ }
+
+ sysfs_remove_file(&xcp_cfg->kobj, &supp_nps_sysfs_mode.attr);
+ sysfs_remove_files(&xcp_cfg->kobj, xcp_attrs);
+err1:
+ kobject_put(&xcp_cfg->kobj);
+}
+
+static void amdgpu_xcp_cfg_sysfs_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_xcp_res_details *xcp_res;
+ struct amdgpu_xcp_cfg *xcp_cfg;
+ int i;
+
+ if (!adev->xcp_mgr || !adev->xcp_mgr->xcp_cfg)
+ return;
+
+ xcp_cfg = adev->xcp_mgr->xcp_cfg;
+ for (i = 0; i < xcp_cfg->num_res; i++) {
+ xcp_res = &xcp_cfg->xcp_res[i];
+ kobject_put(&xcp_res->kobj);
+ }
+
+ sysfs_remove_file(&xcp_cfg->kobj, &supp_nps_sysfs_mode.attr);
+ sysfs_remove_files(&xcp_cfg->kobj, xcp_attrs);
+ kobject_put(&xcp_cfg->kobj);
+}
+
+/*====================== xcp sysfs - data entries ======================*/
+
+#define to_xcp(x) container_of(x, struct amdgpu_xcp, kobj)
+
+static ssize_t xcp_metrics_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct amdgpu_xcp *xcp = to_xcp(kobj);
+ struct amdgpu_xcp_mgr *xcp_mgr;
+ ssize_t size;
+
+ xcp_mgr = xcp->xcp_mgr;
+ size = amdgpu_dpm_get_xcp_metrics(xcp_mgr->adev, xcp->id, NULL);
+ if (size <= 0)
+ return size;
+
+ if (size > PAGE_SIZE)
+ return -ENOSPC;
+
+ return amdgpu_dpm_get_xcp_metrics(xcp_mgr->adev, xcp->id, buf);
+}
+
+static umode_t amdgpu_xcp_attrs_is_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
+{
+ struct amdgpu_xcp *xcp = to_xcp(kobj);
+
+ if (!xcp || !xcp->valid)
+ return 0;
+
+ return attr->mode;
+}
+
+static struct kobj_attribute xcp_sysfs_metrics = __ATTR_RO(xcp_metrics);
+
+static struct attribute *amdgpu_xcp_attrs[] = {
+ &xcp_sysfs_metrics.attr,
+ NULL,
+};
+
+static const struct attribute_group amdgpu_xcp_attrs_group = {
+ .attrs = amdgpu_xcp_attrs,
+ .is_visible = amdgpu_xcp_attrs_is_visible
+};
+
+static const struct kobj_type xcp_sysfs_ktype = {
+ .sysfs_ops = &kobj_sysfs_ops,
+};
+
+static void amdgpu_xcp_sysfs_entries_fini(struct amdgpu_xcp_mgr *xcp_mgr, int n)
+{
+ struct amdgpu_xcp *xcp;
+
+ for (n--; n >= 0; n--) {
+ xcp = &xcp_mgr->xcp[n];
+ if (!xcp->ddev || !xcp->valid)
+ continue;
+ sysfs_remove_group(&xcp->kobj, &amdgpu_xcp_attrs_group);
+ kobject_put(&xcp->kobj);
+ }
+}
+
+static void amdgpu_xcp_sysfs_entries_init(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ struct amdgpu_xcp *xcp;
+ int i, r;
+
+ for (i = 0; i < MAX_XCP; i++) {
+ /* Redirect all IOCTLs to the primary device */
+ xcp = &xcp_mgr->xcp[i];
+ if (!xcp->ddev)
+ break;
+ r = kobject_init_and_add(&xcp->kobj, &xcp_sysfs_ktype,
+ &xcp->ddev->dev->kobj, "xcp");
+ if (r)
+ goto out;
+
+ r = sysfs_create_group(&xcp->kobj, &amdgpu_xcp_attrs_group);
+ if (r)
+ goto out;
+ }
+
+ return;
+out:
+ kobject_put(&xcp->kobj);
+}
+
+static void amdgpu_xcp_sysfs_entries_update(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ struct amdgpu_xcp *xcp;
+ int i;
+
+ for (i = 0; i < MAX_XCP; i++) {
+ /* Redirect all IOCTLs to the primary device */
+ xcp = &xcp_mgr->xcp[i];
+ if (!xcp->ddev)
+ continue;
+ sysfs_update_group(&xcp->kobj, &amdgpu_xcp_attrs_group);
+ }
+
+ return;
+}
+
+void amdgpu_xcp_sysfs_init(struct amdgpu_device *adev)
+{
+ if (!adev->xcp_mgr)
+ return;
+
+ amdgpu_xcp_cfg_sysfs_init(adev);
+
+ return;
+}
+
+void amdgpu_xcp_sysfs_fini(struct amdgpu_device *adev)
+{
+ if (!adev->xcp_mgr)
+ return;
+ amdgpu_xcp_sysfs_entries_fini(adev->xcp_mgr, MAX_XCP);
+ amdgpu_xcp_cfg_sysfs_fini(adev);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
new file mode 100644
index 000000000000..1928d9e224fc
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
@@ -0,0 +1,217 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_XCP_H
+#define AMDGPU_XCP_H
+
+#include <linux/pci.h>
+#include <linux/xarray.h>
+
+#include "amdgpu_ctx.h"
+
+#define MAX_XCP 8
+
+#define AMDGPU_XCP_MODE_NONE -1
+#define AMDGPU_XCP_MODE_TRANS -2
+
+#define AMDGPU_XCP_FL_NONE 0
+#define AMDGPU_XCP_FL_LOCKED (1 << 0)
+
+#define AMDGPU_XCP_NO_PARTITION (~0)
+
+#define AMDGPU_XCP_OPS_KFD (1 << 0)
+
+struct amdgpu_fpriv;
+
+enum AMDGPU_XCP_IP_BLOCK {
+ AMDGPU_XCP_GFXHUB,
+ AMDGPU_XCP_GFX,
+ AMDGPU_XCP_SDMA,
+ AMDGPU_XCP_VCN,
+ AMDGPU_XCP_MAX_BLOCKS
+};
+
+enum AMDGPU_XCP_STATE {
+ AMDGPU_XCP_PREPARE_SUSPEND,
+ AMDGPU_XCP_SUSPEND,
+ AMDGPU_XCP_PREPARE_RESUME,
+ AMDGPU_XCP_RESUME,
+};
+
+enum amdgpu_xcp_res_id {
+ AMDGPU_XCP_RES_XCC,
+ AMDGPU_XCP_RES_DMA,
+ AMDGPU_XCP_RES_DEC,
+ AMDGPU_XCP_RES_JPEG,
+ AMDGPU_XCP_RES_MAX,
+};
+
+struct amdgpu_xcp_res_details {
+ enum amdgpu_xcp_res_id id;
+ u8 num_inst;
+ u8 num_shared;
+ struct kobject kobj;
+};
+
+struct amdgpu_xcp_cfg {
+ u8 mode;
+ struct amdgpu_xcp_res_details xcp_res[AMDGPU_XCP_RES_MAX];
+ u8 num_res;
+ struct amdgpu_xcp_mgr *xcp_mgr;
+ struct kobject kobj;
+ u16 compatible_nps_modes;
+};
+
+struct amdgpu_xcp_ip_funcs {
+ int (*prepare_suspend)(void *handle, uint32_t inst_mask);
+ int (*suspend)(void *handle, uint32_t inst_mask);
+ int (*prepare_resume)(void *handle, uint32_t inst_mask);
+ int (*resume)(void *handle, uint32_t inst_mask);
+};
+
+struct amdgpu_xcp_ip {
+ struct amdgpu_xcp_ip_funcs *ip_funcs;
+ uint32_t inst_mask;
+
+ enum AMDGPU_XCP_IP_BLOCK ip_id;
+ bool valid;
+};
+
+struct amdgpu_xcp {
+ struct amdgpu_xcp_ip ip[AMDGPU_XCP_MAX_BLOCKS];
+
+ uint8_t id;
+ uint8_t mem_id;
+ bool valid;
+ atomic_t ref_cnt;
+ struct drm_device *ddev;
+ struct drm_device *rdev;
+ struct drm_device *pdev;
+ struct drm_driver *driver;
+ struct drm_vma_offset_manager *vma_offset_manager;
+ struct amdgpu_sched gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX];
+ struct amdgpu_xcp_mgr *xcp_mgr;
+ struct kobject kobj;
+ uint64_t unique_id;
+};
+
+struct amdgpu_xcp_mgr {
+ struct amdgpu_device *adev;
+ struct mutex xcp_lock;
+ struct amdgpu_xcp_mgr_funcs *funcs;
+
+ struct amdgpu_xcp xcp[MAX_XCP];
+ uint8_t num_xcps;
+ int8_t mode;
+
+ /* Used to determine KFD memory size limits per XCP */
+ unsigned int num_xcp_per_mem_partition;
+ struct amdgpu_xcp_cfg *xcp_cfg;
+ uint32_t supp_xcp_modes;
+ uint32_t avail_xcp_modes;
+};
+
+struct amdgpu_xcp_mgr_funcs {
+ int (*switch_partition_mode)(struct amdgpu_xcp_mgr *xcp_mgr, int mode,
+ int *num_xcps);
+ int (*query_partition_mode)(struct amdgpu_xcp_mgr *xcp_mgr);
+ int (*get_ip_details)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
+ enum AMDGPU_XCP_IP_BLOCK ip_id,
+ struct amdgpu_xcp_ip *ip);
+ int (*get_xcp_mem_id)(struct amdgpu_xcp_mgr *xcp_mgr,
+ struct amdgpu_xcp *xcp, uint8_t *mem_id);
+ int (*get_xcp_res_info)(struct amdgpu_xcp_mgr *xcp_mgr,
+ int mode,
+ struct amdgpu_xcp_cfg *xcp_cfg);
+ int (*prepare_suspend)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+ int (*suspend)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+ int (*prepare_resume)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+ int (*resume)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+};
+
+int amdgpu_xcp_prepare_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+int amdgpu_xcp_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+int amdgpu_xcp_prepare_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+int amdgpu_xcp_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+
+int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode,
+ int init_xcps, struct amdgpu_xcp_mgr_funcs *xcp_funcs);
+int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode);
+int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags);
+int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode);
+int amdgpu_xcp_restore_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr);
+int amdgpu_xcp_get_partition(struct amdgpu_xcp_mgr *xcp_mgr,
+ enum AMDGPU_XCP_IP_BLOCK ip, int instance);
+
+int amdgpu_xcp_get_inst_details(struct amdgpu_xcp *xcp,
+ enum AMDGPU_XCP_IP_BLOCK ip,
+ uint32_t *inst_mask);
+
+int amdgpu_xcp_dev_register(struct amdgpu_device *adev,
+ const struct pci_device_id *ent);
+void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev);
+int amdgpu_xcp_open_device(struct amdgpu_device *adev,
+ struct amdgpu_fpriv *fpriv,
+ struct drm_file *file_priv);
+void amdgpu_xcp_release_sched(struct amdgpu_device *adev,
+ struct amdgpu_ctx_entity *entity);
+int amdgpu_xcp_select_scheds(struct amdgpu_device *adev,
+ u32 hw_ip, u32 hw_prio,
+ struct amdgpu_fpriv *fpriv,
+ unsigned int *num_scheds,
+ struct drm_gpu_scheduler ***scheds);
+void amdgpu_xcp_update_supported_modes(struct amdgpu_xcp_mgr *xcp_mgr);
+int amdgpu_xcp_update_partition_sched_list(struct amdgpu_device *adev);
+int amdgpu_xcp_pre_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags);
+int amdgpu_xcp_post_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags);
+void amdgpu_xcp_sysfs_init(struct amdgpu_device *adev);
+void amdgpu_xcp_sysfs_fini(struct amdgpu_device *adev);
+
+static inline int amdgpu_xcp_get_num_xcp(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ if (!xcp_mgr)
+ return 1;
+ else
+ return xcp_mgr->num_xcps;
+}
+
+static inline struct amdgpu_xcp *
+amdgpu_get_next_xcp(struct amdgpu_xcp_mgr *xcp_mgr, int *from)
+{
+ if (!xcp_mgr)
+ return NULL;
+
+ while (*from < MAX_XCP) {
+ if (xcp_mgr->xcp[*from].valid)
+ return &xcp_mgr->xcp[*from];
+ ++(*from);
+ }
+
+ return NULL;
+}
+
+#define for_each_xcp(xcp_mgr, xcp, i) \
+ for (i = 0, xcp = amdgpu_get_next_xcp(xcp_mgr, &i); xcp; \
+ ++i, xcp = amdgpu_get_next_xcp(xcp_mgr, &i))
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 567df2db23ac..aad530c46a9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -29,12 +29,21 @@
#include "df/df_3_6_offset.h"
#include "xgmi/xgmi_4_0_0_smn.h"
#include "xgmi/xgmi_4_0_0_sh_mask.h"
+#include "xgmi/xgmi_6_1_0_sh_mask.h"
#include "wafl/wafl2_4_0_0_smn.h"
#include "wafl/wafl2_4_0_0_sh_mask.h"
-#define smnPCS_XGMI23_PCS_ERROR_STATUS 0x11a01210
+#include "amdgpu_reset.h"
+
#define smnPCS_XGMI3X16_PCS_ERROR_STATUS 0x11a0020c
+#define smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK 0x11a00218
#define smnPCS_GOPX1_PCS_ERROR_STATUS 0x12200210
+#define smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK 0x12200218
+
+#define XGMI_STATE_DISABLE 0xD1
+#define XGMI_STATE_LS0 0x81
+#define XGMI_LINK_ACTIVE 1
+#define XGMI_LINK_INACTIVE 0
static DEFINE_MUTEX(xgmi_mutex);
@@ -67,17 +76,6 @@ static const int wafl_pcs_err_status_reg_arct[] = {
smnPCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS + 0x100000,
};
-static const int xgmi23_pcs_err_status_reg_aldebaran[] = {
- smnPCS_XGMI23_PCS_ERROR_STATUS,
- smnPCS_XGMI23_PCS_ERROR_STATUS + 0x100000,
- smnPCS_XGMI23_PCS_ERROR_STATUS + 0x200000,
- smnPCS_XGMI23_PCS_ERROR_STATUS + 0x300000,
- smnPCS_XGMI23_PCS_ERROR_STATUS + 0x400000,
- smnPCS_XGMI23_PCS_ERROR_STATUS + 0x500000,
- smnPCS_XGMI23_PCS_ERROR_STATUS + 0x600000,
- smnPCS_XGMI23_PCS_ERROR_STATUS + 0x700000
-};
-
static const int xgmi3x16_pcs_err_status_reg_aldebaran[] = {
smnPCS_XGMI3X16_PCS_ERROR_STATUS,
smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x100000,
@@ -89,11 +87,74 @@ static const int xgmi3x16_pcs_err_status_reg_aldebaran[] = {
smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x700000
};
+static const int xgmi3x16_pcs_err_noncorrectable_mask_reg_aldebaran[] = {
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK,
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000,
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x200000,
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x300000,
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x400000,
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x500000,
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x600000,
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x700000
+};
+
static const int walf_pcs_err_status_reg_aldebaran[] = {
smnPCS_GOPX1_PCS_ERROR_STATUS,
smnPCS_GOPX1_PCS_ERROR_STATUS + 0x100000
};
+static const int walf_pcs_err_noncorrectable_mask_reg_aldebaran[] = {
+ smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK,
+ smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000
+};
+
+static const int xgmi3x16_pcs_err_status_reg_v6_4[] = {
+ smnPCS_XGMI3X16_PCS_ERROR_STATUS,
+ smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x100000
+};
+
+static const int xgmi3x16_pcs_err_noncorrectable_mask_reg_v6_4[] = {
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK,
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000
+};
+
+static const u64 xgmi_v6_4_0_mca_base_array[] = {
+ 0x11a09200,
+ 0x11b09200,
+};
+
+static const char *xgmi_v6_4_0_ras_error_code_ext[32] = {
+ [0x00] = "XGMI PCS DataLossErr",
+ [0x01] = "XGMI PCS TrainingErr",
+ [0x02] = "XGMI PCS FlowCtrlAckErr",
+ [0x03] = "XGMI PCS RxFifoUnderflowErr",
+ [0x04] = "XGMI PCS RxFifoOverflowErr",
+ [0x05] = "XGMI PCS CRCErr",
+ [0x06] = "XGMI PCS BERExceededErr",
+ [0x07] = "XGMI PCS TxMetaDataErr",
+ [0x08] = "XGMI PCS ReplayBufParityErr",
+ [0x09] = "XGMI PCS DataParityErr",
+ [0x0a] = "XGMI PCS ReplayFifoOverflowErr",
+ [0x0b] = "XGMI PCS ReplayFifoUnderflowErr",
+ [0x0c] = "XGMI PCS ElasticFifoOverflowErr",
+ [0x0d] = "XGMI PCS DeskewErr",
+ [0x0e] = "XGMI PCS FlowCtrlCRCErr",
+ [0x0f] = "XGMI PCS DataStartupLimitErr",
+ [0x10] = "XGMI PCS FCInitTimeoutErr",
+ [0x11] = "XGMI PCS RecoveryTimeoutErr",
+ [0x12] = "XGMI PCS ReadySerialTimeoutErr",
+ [0x13] = "XGMI PCS ReadySerialAttemptErr",
+ [0x14] = "XGMI PCS RecoveryAttemptErr",
+ [0x15] = "XGMI PCS RecoveryRelockAttemptErr",
+ [0x16] = "XGMI PCS ReplayAttemptErr",
+ [0x17] = "XGMI PCS SyncHdrErr",
+ [0x18] = "XGMI PCS TxReplayTimeoutErr",
+ [0x19] = "XGMI PCS RxReplayTimeoutErr",
+ [0x1a] = "XGMI PCS LinkSubTxTimeoutErr",
+ [0x1b] = "XGMI PCS LinkSubRxTimeoutErr",
+ [0x1c] = "XGMI PCS RxCMDPktErr",
+};
+
static const struct amdgpu_pcs_ras_field xgmi_pcs_ras_fields[] = {
{"XGMI PCS DataLossErr",
SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DataLossErr)},
@@ -172,6 +233,143 @@ static const struct amdgpu_pcs_ras_field wafl_pcs_ras_fields[] = {
SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, RecoveryRelockAttemptErr)},
};
+static const struct amdgpu_pcs_ras_field xgmi3x16_pcs_ras_fields[] = {
+ {"XGMI3X16 PCS DataLossErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, DataLossErr)},
+ {"XGMI3X16 PCS TrainingErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, TrainingErr)},
+ {"XGMI3X16 PCS FlowCtrlAckErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, FlowCtrlAckErr)},
+ {"XGMI3X16 PCS RxFifoUnderflowErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxFifoUnderflowErr)},
+ {"XGMI3X16 PCS RxFifoOverflowErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxFifoOverflowErr)},
+ {"XGMI3X16 PCS CRCErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, CRCErr)},
+ {"XGMI3X16 PCS BERExceededErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, BERExceededErr)},
+ {"XGMI3X16 PCS TxVcidDataErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, TxVcidDataErr)},
+ {"XGMI3X16 PCS ReplayBufParityErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReplayBufParityErr)},
+ {"XGMI3X16 PCS DataParityErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, DataParityErr)},
+ {"XGMI3X16 PCS ReplayFifoOverflowErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReplayFifoOverflowErr)},
+ {"XGMI3X16 PCS ReplayFifoUnderflowErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReplayFifoUnderflowErr)},
+ {"XGMI3X16 PCS ElasticFifoOverflowErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ElasticFifoOverflowErr)},
+ {"XGMI3X16 PCS DeskewErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, DeskewErr)},
+ {"XGMI3X16 PCS FlowCtrlCRCErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, FlowCtrlCRCErr)},
+ {"XGMI3X16 PCS DataStartupLimitErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, DataStartupLimitErr)},
+ {"XGMI3X16 PCS FCInitTimeoutErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, FCInitTimeoutErr)},
+ {"XGMI3X16 PCS RecoveryTimeoutErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RecoveryTimeoutErr)},
+ {"XGMI3X16 PCS ReadySerialTimeoutErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReadySerialTimeoutErr)},
+ {"XGMI3X16 PCS ReadySerialAttemptErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReadySerialAttemptErr)},
+ {"XGMI3X16 PCS RecoveryAttemptErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RecoveryAttemptErr)},
+ {"XGMI3X16 PCS RecoveryRelockAttemptErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RecoveryRelockAttemptErr)},
+ {"XGMI3X16 PCS ReplayAttemptErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReplayAttemptErr)},
+ {"XGMI3X16 PCS SyncHdrErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, SyncHdrErr)},
+ {"XGMI3X16 PCS TxReplayTimeoutErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, TxReplayTimeoutErr)},
+ {"XGMI3X16 PCS RxReplayTimeoutErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxReplayTimeoutErr)},
+ {"XGMI3X16 PCS LinkSubTxTimeoutErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, LinkSubTxTimeoutErr)},
+ {"XGMI3X16 PCS LinkSubRxTimeoutErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, LinkSubRxTimeoutErr)},
+ {"XGMI3X16 PCS RxCMDPktErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxCMDPktErr)},
+};
+
+int amdgpu_xgmi_get_ext_link(struct amdgpu_device *adev, int link_num)
+{
+ int link_map_6_4_x[8] = { 0, 3, 1, 2, 7, 6, 4, 5 };
+
+ if (adev->gmc.xgmi.num_physical_nodes <= 1)
+ return -EINVAL;
+
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
+ if (link_num < ARRAY_SIZE(link_map_6_4_x))
+ return link_map_6_4_x[link_num];
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return -EINVAL;
+}
+
+static u32 xgmi_v6_4_get_link_status(struct amdgpu_device *adev, int global_link_num)
+{
+ const u32 smn_xgmi_6_4_pcs_state_hist1[2] = { 0x11a00070, 0x11b00070 };
+ const u32 smn_xgmi_6_4_1_pcs_state_hist1[2] = { 0x12100070,
+ 0x11b00070 };
+ u32 i, n;
+ u64 addr;
+
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ n = ARRAY_SIZE(smn_xgmi_6_4_pcs_state_hist1);
+ addr = smn_xgmi_6_4_pcs_state_hist1[global_link_num % n];
+ break;
+ case IP_VERSION(6, 4, 1):
+ n = ARRAY_SIZE(smn_xgmi_6_4_1_pcs_state_hist1);
+ addr = smn_xgmi_6_4_1_pcs_state_hist1[global_link_num % n];
+ break;
+ default:
+ return U32_MAX;
+ }
+
+ i = global_link_num / n;
+
+ if (!(adev->aid_mask & BIT(i)))
+ return U32_MAX;
+
+ addr += adev->asic_funcs->encode_ext_smn_addressing(i);
+
+ return RREG32_PCIE_EXT(addr);
+}
+
+int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev, int global_link_num)
+{
+ u32 xgmi_state_reg_val;
+
+ if (adev->gmc.xgmi.num_physical_nodes <= 1)
+ return -EINVAL;
+
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
+ xgmi_state_reg_val = xgmi_v6_4_get_link_status(adev, global_link_num);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if ((xgmi_state_reg_val & 0xFF) == XGMI_STATE_DISABLE)
+ return -ENOLINK;
+
+ if ((xgmi_state_reg_val & 0xFF) == XGMI_STATE_LS0)
+ return XGMI_LINK_ACTIVE;
+
+ return XGMI_LINK_INACTIVE;
+}
+
/**
* DOC: AMDGPU XGMI Support
*
@@ -208,6 +406,7 @@ static struct attribute *amdgpu_xgmi_hive_attrs[] = {
&amdgpu_xgmi_hive_id,
NULL
};
+ATTRIBUTE_GROUPS(amdgpu_xgmi_hive);
static ssize_t amdgpu_xgmi_show_attrs(struct kobject *kobj,
struct attribute *attr, char *buf)
@@ -226,6 +425,9 @@ static void amdgpu_xgmi_hive_release(struct kobject *kobj)
struct amdgpu_hive_info *hive = container_of(
kobj, struct amdgpu_hive_info, kobj);
+ amdgpu_reset_put_reset_domain(hive->reset_domain);
+ hive->reset_domain = NULL;
+
mutex_destroy(&hive->hive_lock);
kfree(hive);
}
@@ -234,10 +436,10 @@ static const struct sysfs_ops amdgpu_xgmi_hive_ops = {
.show = amdgpu_xgmi_show_attrs,
};
-struct kobj_type amdgpu_xgmi_hive_type = {
+static const struct kobj_type amdgpu_xgmi_hive_type = {
.release = amdgpu_xgmi_hive_release,
.sysfs_ops = &amdgpu_xgmi_hive_ops,
- .default_attrs = amdgpu_xgmi_hive_attrs,
+ .default_groups = amdgpu_xgmi_hive_groups,
};
static ssize_t amdgpu_xgmi_show_device_id(struct device *dev,
@@ -251,6 +453,82 @@ static ssize_t amdgpu_xgmi_show_device_id(struct device *dev,
}
+static ssize_t amdgpu_xgmi_show_physical_id(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ return sysfs_emit(buf, "%u\n", adev->gmc.xgmi.physical_node_id);
+
+}
+
+static ssize_t amdgpu_xgmi_show_num_hops(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+ int i;
+
+ for (i = 0; i < top->num_nodes; i++)
+ sprintf(buf + 3 * i, "%02x ", top->nodes[i].num_hops);
+
+ return sysfs_emit(buf, "%s\n", buf);
+}
+
+static ssize_t amdgpu_xgmi_show_num_links(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+ int i;
+
+ for (i = 0; i < top->num_nodes; i++)
+ sprintf(buf + 3 * i, "%02x ", top->nodes[i].num_links);
+
+ return sysfs_emit(buf, "%s\n", buf);
+}
+
+static ssize_t amdgpu_xgmi_show_connected_port_num(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+ int i, j, size = 0;
+ int current_node;
+ /*
+ * get the node id in the sysfs for the current socket and show
+ * it in the port num info output in the sysfs for easy reading.
+ * it is NOT the one retrieved from xgmi ta.
+ */
+ for (i = 0; i < top->num_nodes; i++) {
+ if (top->nodes[i].node_id == adev->gmc.xgmi.node_id) {
+ current_node = i;
+ break;
+ }
+ }
+
+ if (i == top->num_nodes)
+ return -EINVAL;
+
+ for (i = 0; i < top->num_nodes; i++) {
+ for (j = 0; j < top->nodes[i].num_links; j++)
+ /* node id in sysfs starts from 1 rather than 0 so +1 here */
+ size += sysfs_emit_at(buf, size, "%02x:%02x -> %02x:%02x\n", current_node + 1,
+ top->nodes[i].port_num[j].src_xgmi_port_num, i + 1,
+ top->nodes[i].port_num[j].dst_xgmi_port_num);
+ }
+
+ return size;
+}
+
#define AMDGPU_XGMI_SET_FICAA(o) ((o) | 0x456801)
static ssize_t amdgpu_xgmi_show_error(struct device *dev,
struct device_attribute *attr,
@@ -265,6 +543,11 @@ static ssize_t amdgpu_xgmi_show_error(struct device *dev,
ficaa_pie_ctl_in = AMDGPU_XGMI_SET_FICAA(0x200);
ficaa_pie_status_in = AMDGPU_XGMI_SET_FICAA(0x208);
+ if ((!adev->df.funcs) ||
+ (!adev->df.funcs->get_fica) ||
+ (!adev->df.funcs->set_fica))
+ return -EINVAL;
+
fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_ctl_in);
if (fica_out != 0x1f)
pr_err("xGMI error counters not enabled!\n");
@@ -281,7 +564,11 @@ static ssize_t amdgpu_xgmi_show_error(struct device *dev,
static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL);
+static DEVICE_ATTR(xgmi_physical_id, 0444, amdgpu_xgmi_show_physical_id, NULL);
static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL);
+static DEVICE_ATTR(xgmi_num_hops, S_IRUGO, amdgpu_xgmi_show_num_hops, NULL);
+static DEVICE_ATTR(xgmi_num_links, S_IRUGO, amdgpu_xgmi_show_num_links, NULL);
+static DEVICE_ATTR(xgmi_port_num, S_IRUGO, amdgpu_xgmi_show_connected_port_num, NULL);
static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
struct amdgpu_hive_info *hive)
@@ -296,11 +583,33 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
return ret;
}
+ ret = device_create_file(adev->dev, &dev_attr_xgmi_physical_id);
+ if (ret) {
+ dev_err(adev->dev, "XGMI: Failed to create device file xgmi_physical_id\n");
+ return ret;
+ }
+
/* Create xgmi error file */
ret = device_create_file(adev->dev, &dev_attr_xgmi_error);
if (ret)
pr_err("failed to create xgmi_error\n");
+ /* Create xgmi num hops file */
+ ret = device_create_file(adev->dev, &dev_attr_xgmi_num_hops);
+ if (ret)
+ pr_err("failed to create xgmi_num_hops\n");
+
+ /* Create xgmi num links file */
+ ret = device_create_file(adev->dev, &dev_attr_xgmi_num_links);
+ if (ret)
+ pr_err("failed to create xgmi_num_links\n");
+
+ /* Create xgmi port num file if supported */
+ if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) {
+ ret = device_create_file(adev->dev, &dev_attr_xgmi_port_num);
+ if (ret)
+ dev_err(adev->dev, "failed to create xgmi_port_num\n");
+ }
/* Create sysfs link to hive info folder on the first device */
if (hive->kobj.parent != (&adev->dev->kobj)) {
@@ -328,6 +637,12 @@ remove_link:
remove_file:
device_remove_file(adev->dev, &dev_attr_xgmi_device_id);
+ device_remove_file(adev->dev, &dev_attr_xgmi_physical_id);
+ device_remove_file(adev->dev, &dev_attr_xgmi_error);
+ device_remove_file(adev->dev, &dev_attr_xgmi_num_hops);
+ device_remove_file(adev->dev, &dev_attr_xgmi_num_links);
+ if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG)
+ device_remove_file(adev->dev, &dev_attr_xgmi_port_num);
success:
return ret;
@@ -340,7 +655,12 @@ static void amdgpu_xgmi_sysfs_rem_dev_info(struct amdgpu_device *adev,
memset(node, 0, sizeof(node));
device_remove_file(adev->dev, &dev_attr_xgmi_device_id);
+ device_remove_file(adev->dev, &dev_attr_xgmi_physical_id);
device_remove_file(adev->dev, &dev_attr_xgmi_error);
+ device_remove_file(adev->dev, &dev_attr_xgmi_num_hops);
+ device_remove_file(adev->dev, &dev_attr_xgmi_num_links);
+ if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG)
+ device_remove_file(adev->dev, &dev_attr_xgmi_port_num);
if (hive->kobj.parent != (&adev->dev->kobj))
sysfs_remove_link(&adev->dev->kobj,"xgmi_hive_info");
@@ -375,6 +695,7 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
hive = kzalloc(sizeof(*hive), GFP_KERNEL);
if (!hive) {
dev_err(adev->dev, "XGMI: allocation failed\n");
+ ret = -ENOMEM;
hive = NULL;
goto pro_end;
}
@@ -387,20 +708,48 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
if (ret) {
dev_err(adev->dev, "XGMI: failed initializing kobject for xgmi hive\n");
kobject_put(&hive->kobj);
- kfree(hive);
hive = NULL;
goto pro_end;
}
+ /**
+ * Only init hive->reset_domain for none SRIOV configuration. For SRIOV,
+ * Host driver decide how to reset the GPU either through FLR or chain reset.
+ * Guest side will get individual notifications from the host for the FLR
+ * if necessary.
+ */
+ if (!amdgpu_sriov_vf(adev)) {
+ /**
+ * Avoid recreating reset domain when hive is reconstructed for the case
+ * of reset the devices in the XGMI hive during probe for passthrough GPU
+ * See https://www.spinics.net/lists/amd-gfx/msg58836.html
+ */
+ if (adev->reset_domain->type != XGMI_HIVE) {
+ hive->reset_domain =
+ amdgpu_reset_create_reset_domain(XGMI_HIVE, "amdgpu-reset-hive");
+ if (!hive->reset_domain) {
+ dev_err(adev->dev, "XGMI: failed initializing reset domain for xgmi hive\n");
+ ret = -ENOMEM;
+ kobject_put(&hive->kobj);
+ hive = NULL;
+ goto pro_end;
+ }
+ } else {
+ amdgpu_reset_get_reset_domain(adev->reset_domain);
+ hive->reset_domain = adev->reset_domain;
+ }
+ }
+
hive->hive_id = adev->gmc.xgmi.hive_id;
INIT_LIST_HEAD(&hive->device_list);
INIT_LIST_HEAD(&hive->node);
mutex_init(&hive->hive_lock);
- atomic_set(&hive->in_reset, 0);
atomic_set(&hive->number_devices, 0);
task_barrier_init(&hive->tb);
hive->pstate = AMDGPU_XGMI_PSTATE_UNKNOWN;
hive->hi_req_gpu = NULL;
+ atomic_set(&hive->requested_nps_mode, UNKNOWN_MEMORY_PARTITION_MODE);
+
/*
* hive pstate on boot is high in vega20 so we have to go to low
* pstate on after boot.
@@ -485,6 +834,9 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
{
int ret;
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
/* Each psp need to set the latest topology */
ret = psp_xgmi_set_topology_info(&adev->psp,
atomic_read(&hive->number_devices),
@@ -506,28 +858,88 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
* num_hops[2:0] = number of hops
*/
int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
- struct amdgpu_device *peer_adev)
+ struct amdgpu_device *peer_adev)
{
struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
uint8_t num_hops_mask = 0x7;
int i;
+ if (!adev->gmc.xgmi.supported)
+ return 0;
+
for (i = 0 ; i < top->num_nodes; ++i)
if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
return top->nodes[i].num_hops & num_hops_mask;
- return -EINVAL;
+
+ dev_err(adev->dev, "Failed to get xgmi hops count for peer %d.\n",
+ peer_adev->gmc.xgmi.physical_node_id);
+
+ return 0;
+}
+
+int amdgpu_xgmi_get_bandwidth(struct amdgpu_device *adev, struct amdgpu_device *peer_adev,
+ enum amdgpu_xgmi_bw_mode bw_mode, enum amdgpu_xgmi_bw_unit bw_unit,
+ uint32_t *min_bw, uint32_t *max_bw)
+{
+ bool peer_mode = bw_mode == AMDGPU_XGMI_BW_MODE_PER_PEER;
+ int unit_scale = bw_unit == AMDGPU_XGMI_BW_UNIT_MBYTES ? 1000 : 1;
+ int num_lanes = adev->gmc.xgmi.max_width;
+ int speed = adev->gmc.xgmi.max_speed;
+ int num_links = !peer_mode ? 1 : -1;
+
+ if (!(min_bw && max_bw))
+ return -EINVAL;
+
+ *min_bw = 0;
+ *max_bw = 0;
+
+ if (!adev->gmc.xgmi.supported)
+ return -ENODATA;
+
+ if (peer_mode && !peer_adev)
+ return -EINVAL;
+
+ if (peer_mode) {
+ struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+ int i;
+
+ for (i = 0 ; i < top->num_nodes; ++i) {
+ if (top->nodes[i].node_id != peer_adev->gmc.xgmi.node_id)
+ continue;
+
+ num_links = top->nodes[i].num_links;
+ break;
+ }
+ }
+
+ if (num_links == -1) {
+ dev_err(adev->dev, "Failed to get number of xgmi links for peer %d.\n",
+ peer_adev->gmc.xgmi.physical_node_id);
+ } else if (num_links) {
+ int per_link_bw = (speed * num_lanes * unit_scale)/BITS_PER_BYTE;
+
+ *min_bw = per_link_bw;
+ *max_bw = num_links * per_link_bw;
+ }
+
+ return 0;
}
-int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev,
- struct amdgpu_device *peer_adev)
+bool amdgpu_xgmi_get_is_sharing_enabled(struct amdgpu_device *adev,
+ struct amdgpu_device *peer_adev)
{
struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
int i;
+ /* Sharing should always be enabled for non-SRIOV. */
+ if (!amdgpu_sriov_vf(adev))
+ return true;
+
for (i = 0 ; i < top->num_nodes; ++i)
if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
- return top->nodes[i].num_links;
- return -EINVAL;
+ return !!top->nodes[i].is_sharing_enabled;
+
+ return false;
}
/*
@@ -568,8 +980,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
if (!adev->gmc.xgmi.supported)
return 0;
- if (!adev->gmc.xgmi.pending_reset &&
- amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
+ if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
ret = psp_xgmi_initialize(&adev->psp, false, true);
if (ret) {
dev_err(adev->dev,
@@ -615,8 +1026,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
task_barrier_add_task(&hive->tb);
- if (!adev->gmc.xgmi.pending_reset &&
- amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
+ if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
/* update node list for other device in the hive */
if (tmp_adev != adev) {
@@ -630,18 +1040,33 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
goto exit_unlock;
}
- /* get latest topology info for each device from psp */
- list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
- ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
- &tmp_adev->psp.xgmi_context.top_info, false);
+ if (amdgpu_sriov_vf(adev) &&
+ adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) {
+ /* only get topology for VF being init if it can support full duplex */
+ ret = psp_xgmi_get_topology_info(&adev->psp, count,
+ &adev->psp.xgmi_context.top_info, false);
if (ret) {
- dev_err(tmp_adev->dev,
+ dev_err(adev->dev,
"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
- tmp_adev->gmc.xgmi.node_id,
- tmp_adev->gmc.xgmi.hive_id, ret);
- /* To do : continue with some node failed or disable the whole hive */
+ adev->gmc.xgmi.node_id,
+ adev->gmc.xgmi.hive_id, ret);
+ /* To do: continue with some node failed or disable the whole hive*/
goto exit_unlock;
}
+ } else {
+ /* get latest topology info for each device from psp */
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
+ &tmp_adev->psp.xgmi_context.top_info, false);
+ if (ret) {
+ dev_err(tmp_adev->dev,
+ "XGMI: Get topology failure on device %llx, hive %llx, ret %d",
+ tmp_adev->gmc.xgmi.node_id,
+ tmp_adev->gmc.xgmi.hive_id, ret);
+ /* To do : continue with some node failed or disable the whole hive */
+ goto exit_unlock;
+ }
+ }
}
/* get topology again for hives that support extended data */
@@ -673,7 +1098,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
}
}
- if (!ret && !adev->gmc.xgmi.pending_reset)
+ if (!ret)
ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive);
exit_unlock:
@@ -723,56 +1148,94 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
amdgpu_put_xgmi_hive(hive);
}
- return psp_xgmi_terminate(&adev->psp);
+ return 0;
+}
+
+static int xgmi_v6_4_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct amdgpu_device *adev = handle->adev;
+ struct aca_bank_info info;
+ const char *error_str;
+ u64 status, count;
+ int ret, ext_error_code;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ status = bank->regs[ACA_REG_IDX_STATUS];
+ ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status);
+
+ error_str = ext_error_code < ARRAY_SIZE(xgmi_v6_4_0_ras_error_code_ext) ?
+ xgmi_v6_4_0_ras_error_code_ext[ext_error_code] : NULL;
+ if (error_str)
+ dev_info(adev->dev, "%s detected\n", error_str);
+
+ count = ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]);
+
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ if (ext_error_code != 0 && ext_error_code != 9)
+ count = 0ULL;
+
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, count);
+ break;
+ case ACA_SMU_TYPE_CE:
+ count = ext_error_code == 6 ? count : 0ULL;
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, count);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
}
-static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
+static const struct aca_bank_ops xgmi_v6_4_0_aca_bank_ops = {
+ .aca_bank_parser = xgmi_v6_4_0_aca_bank_parser,
+};
+
+static const struct aca_info xgmi_v6_4_0_aca_info = {
+ .hwip = ACA_HWIP_TYPE_PCS_XGMI,
+ .mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK,
+ .bank_ops = &xgmi_v6_4_0_aca_bank_ops,
+};
+
+static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
{
int r;
- struct ras_ih_if ih_info = {
- .cb = NULL,
- };
- struct ras_fs_if fs_info = {
- .sysfs_name = "xgmi_wafl_err_count",
- };
if (!adev->gmc.xgmi.supported ||
adev->gmc.xgmi.num_physical_nodes == 0)
return 0;
- adev->gmc.xgmi.ras_funcs->reset_ras_error_count(adev);
+ amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL);
- if (!adev->gmc.xgmi.ras_if) {
- adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
- if (!adev->gmc.xgmi.ras_if)
- return -ENOMEM;
- adev->gmc.xgmi.ras_if->block = AMDGPU_RAS_BLOCK__XGMI_WAFL;
- adev->gmc.xgmi.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
- adev->gmc.xgmi.ras_if->sub_block_index = 0;
- }
- ih_info.head = fs_info.head = *adev->gmc.xgmi.ras_if;
- r = amdgpu_ras_late_init(adev, adev->gmc.xgmi.ras_if,
- &fs_info, &ih_info);
- if (r || !amdgpu_ras_is_supported(adev, adev->gmc.xgmi.ras_if->block)) {
- kfree(adev->gmc.xgmi.ras_if);
- adev->gmc.xgmi.ras_if = NULL;
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
+ r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL,
+ &xgmi_v6_4_0_aca_info, NULL);
+ if (r)
+ goto late_fini;
+ break;
+ default:
+ break;
}
- return r;
-}
+ return 0;
-static void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev)
-{
- if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL) &&
- adev->gmc.xgmi.ras_if) {
- struct ras_common_if *ras_if = adev->gmc.xgmi.ras_if;
- struct ras_ih_if ih_info = {
- .cb = NULL,
- };
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
- amdgpu_ras_late_fini(adev, ras_if, &ih_info);
- kfree(ras_if);
- }
+ return r;
}
uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev,
@@ -788,7 +1251,7 @@ static void pcs_clear_status(struct amdgpu_device *adev, uint32_t pcs_status_reg
WREG32_PCIE(pcs_status_reg, 0);
}
-static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev)
+static void amdgpu_xgmi_legacy_reset_ras_error_count(struct amdgpu_device *adev)
{
uint32_t i;
@@ -804,9 +1267,6 @@ static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev)
xgmi_pcs_err_status_reg_vg20[i]);
break;
case CHIP_ALDEBARAN:
- for (i = 0; i < ARRAY_SIZE(xgmi23_pcs_err_status_reg_aldebaran); i++)
- pcs_clear_status(adev,
- xgmi23_pcs_err_status_reg_aldebaran[i]);
for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_aldebaran); i++)
pcs_clear_status(adev,
xgmi3x16_pcs_err_status_reg_aldebaran[i]);
@@ -817,58 +1277,116 @@ static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev)
default:
break;
}
+
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
+ for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_v6_4); i++)
+ pcs_clear_status(adev,
+ xgmi3x16_pcs_err_status_reg_v6_4[i]);
+ break;
+ default:
+ break;
+ }
+}
+
+static void __xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst, u64 mca_base)
+{
+ WREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS, 0ULL);
+}
+
+static void xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(xgmi_v6_4_0_mca_base_array); i++)
+ __xgmi_v6_4_0_reset_error_count(adev, xgmi_inst, xgmi_v6_4_0_mca_base_array[i]);
+}
+
+static void xgmi_v6_4_0_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ int i;
+
+ for_each_inst(i, adev->aid_mask)
+ xgmi_v6_4_0_reset_error_count(adev, i);
+}
+
+static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
+ xgmi_v6_4_0_reset_ras_error_count(adev);
+ break;
+ default:
+ amdgpu_xgmi_legacy_reset_ras_error_count(adev);
+ break;
+ }
}
static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev,
uint32_t value,
+ uint32_t mask_value,
uint32_t *ue_count,
uint32_t *ce_count,
- bool is_xgmi_pcs)
+ bool is_xgmi_pcs,
+ bool check_mask)
{
int i;
- int ue_cnt;
+ int ue_cnt = 0;
+ const struct amdgpu_pcs_ras_field *pcs_ras_fields = NULL;
+ uint32_t field_array_size = 0;
if (is_xgmi_pcs) {
- /* query xgmi pcs error status,
- * only ue is supported */
- for (i = 0; i < ARRAY_SIZE(xgmi_pcs_ras_fields); i ++) {
- ue_cnt = (value &
- xgmi_pcs_ras_fields[i].pcs_err_mask) >>
- xgmi_pcs_ras_fields[i].pcs_err_shift;
- if (ue_cnt) {
- dev_info(adev->dev, "%s detected\n",
- xgmi_pcs_ras_fields[i].err_name);
- *ue_count += ue_cnt;
- }
+ if (amdgpu_ip_version(adev, XGMI_HWIP, 0) ==
+ IP_VERSION(6, 1, 0) ||
+ amdgpu_ip_version(adev, XGMI_HWIP, 0) ==
+ IP_VERSION(6, 4, 0) ||
+ amdgpu_ip_version(adev, XGMI_HWIP, 0) ==
+ IP_VERSION(6, 4, 1)) {
+ pcs_ras_fields = &xgmi3x16_pcs_ras_fields[0];
+ field_array_size = ARRAY_SIZE(xgmi3x16_pcs_ras_fields);
+ } else {
+ pcs_ras_fields = &xgmi_pcs_ras_fields[0];
+ field_array_size = ARRAY_SIZE(xgmi_pcs_ras_fields);
}
} else {
- /* query wafl pcs error status,
- * only ue is supported */
- for (i = 0; i < ARRAY_SIZE(wafl_pcs_ras_fields); i++) {
- ue_cnt = (value &
- wafl_pcs_ras_fields[i].pcs_err_mask) >>
- wafl_pcs_ras_fields[i].pcs_err_shift;
- if (ue_cnt) {
- dev_info(adev->dev, "%s detected\n",
- wafl_pcs_ras_fields[i].err_name);
- *ue_count += ue_cnt;
- }
+ pcs_ras_fields = &wafl_pcs_ras_fields[0];
+ field_array_size = ARRAY_SIZE(wafl_pcs_ras_fields);
+ }
+
+ if (check_mask)
+ value = value & ~mask_value;
+
+ /* query xgmi/walf pcs error status,
+ * only ue is supported */
+ for (i = 0; value && i < field_array_size; i++) {
+ ue_cnt = (value &
+ pcs_ras_fields[i].pcs_err_mask) >>
+ pcs_ras_fields[i].pcs_err_shift;
+ if (ue_cnt) {
+ dev_info(adev->dev, "%s detected\n",
+ pcs_ras_fields[i].err_name);
+ *ue_count += ue_cnt;
}
+
+ /* reset bit value if the bit is checked */
+ value &= ~(pcs_ras_fields[i].pcs_err_mask);
}
return 0;
}
-static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
- void *ras_error_status)
+static void amdgpu_xgmi_legacy_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
- int i;
- uint32_t data;
+ int i, supported = 1;
+ uint32_t data, mask_data = 0;
uint32_t ue_cnt = 0, ce_cnt = 0;
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL))
- return -EINVAL;
+ return ;
err_data->ue_count = 0;
err_data->ce_count = 0;
@@ -879,15 +1397,15 @@ static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_arct); i++) {
data = RREG32_PCIE(xgmi_pcs_err_status_reg_arct[i]);
if (data)
- amdgpu_xgmi_query_pcs_error_status(adev,
- data, &ue_cnt, &ce_cnt, true);
+ amdgpu_xgmi_query_pcs_error_status(adev, data,
+ mask_data, &ue_cnt, &ce_cnt, true, false);
}
/* check wafl pcs error */
for (i = 0; i < ARRAY_SIZE(wafl_pcs_err_status_reg_arct); i++) {
data = RREG32_PCIE(wafl_pcs_err_status_reg_arct[i]);
if (data)
- amdgpu_xgmi_query_pcs_error_status(adev,
- data, &ue_cnt, &ce_cnt, false);
+ amdgpu_xgmi_query_pcs_error_status(adev, data,
+ mask_data, &ue_cnt, &ce_cnt, false, false);
}
break;
case CHIP_VEGA20:
@@ -895,56 +1413,366 @@ static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_vg20); i++) {
data = RREG32_PCIE(xgmi_pcs_err_status_reg_vg20[i]);
if (data)
- amdgpu_xgmi_query_pcs_error_status(adev,
- data, &ue_cnt, &ce_cnt, true);
+ amdgpu_xgmi_query_pcs_error_status(adev, data,
+ mask_data, &ue_cnt, &ce_cnt, true, false);
}
/* check wafl pcs error */
for (i = 0; i < ARRAY_SIZE(wafl_pcs_err_status_reg_vg20); i++) {
data = RREG32_PCIE(wafl_pcs_err_status_reg_vg20[i]);
if (data)
- amdgpu_xgmi_query_pcs_error_status(adev,
- data, &ue_cnt, &ce_cnt, false);
+ amdgpu_xgmi_query_pcs_error_status(adev, data,
+ mask_data, &ue_cnt, &ce_cnt, false, false);
}
break;
case CHIP_ALDEBARAN:
- /* check xgmi23 pcs error */
- for (i = 0; i < ARRAY_SIZE(xgmi23_pcs_err_status_reg_aldebaran); i++) {
- data = RREG32_PCIE(xgmi23_pcs_err_status_reg_aldebaran[i]);
- if (data)
- amdgpu_xgmi_query_pcs_error_status(adev,
- data, &ue_cnt, &ce_cnt, true);
- }
/* check xgmi3x16 pcs error */
for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_aldebaran); i++) {
data = RREG32_PCIE(xgmi3x16_pcs_err_status_reg_aldebaran[i]);
+ mask_data =
+ RREG32_PCIE(xgmi3x16_pcs_err_noncorrectable_mask_reg_aldebaran[i]);
if (data)
- amdgpu_xgmi_query_pcs_error_status(adev,
- data, &ue_cnt, &ce_cnt, true);
+ amdgpu_xgmi_query_pcs_error_status(adev, data,
+ mask_data, &ue_cnt, &ce_cnt, true, true);
}
/* check wafl pcs error */
for (i = 0; i < ARRAY_SIZE(walf_pcs_err_status_reg_aldebaran); i++) {
data = RREG32_PCIE(walf_pcs_err_status_reg_aldebaran[i]);
+ mask_data =
+ RREG32_PCIE(walf_pcs_err_noncorrectable_mask_reg_aldebaran[i]);
+ if (data)
+ amdgpu_xgmi_query_pcs_error_status(adev, data,
+ mask_data, &ue_cnt, &ce_cnt, false, true);
+ }
+ break;
+ default:
+ supported = 0;
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
+ /* check xgmi3x16 pcs error */
+ for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_v6_4); i++) {
+ data = RREG32_PCIE(xgmi3x16_pcs_err_status_reg_v6_4[i]);
+ mask_data =
+ RREG32_PCIE(xgmi3x16_pcs_err_noncorrectable_mask_reg_v6_4[i]);
if (data)
- amdgpu_xgmi_query_pcs_error_status(adev,
- data, &ue_cnt, &ce_cnt, false);
+ amdgpu_xgmi_query_pcs_error_status(adev, data,
+ mask_data, &ue_cnt, &ce_cnt, true, true);
}
break;
default:
- dev_warn(adev->dev, "XGMI RAS error query not supported");
+ if (!supported)
+ dev_warn(adev->dev, "XGMI RAS error query not supported");
break;
}
- adev->gmc.xgmi.ras_funcs->reset_ras_error_count(adev);
+ amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL);
err_data->ue_count += ue_cnt;
err_data->ce_count += ce_cnt;
+}
- return 0;
+static enum aca_error_type xgmi_v6_4_0_pcs_mca_get_error_type(struct amdgpu_device *adev, u64 status)
+{
+ const char *error_str;
+ int ext_error_code;
+
+ ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status);
+
+ error_str = ext_error_code < ARRAY_SIZE(xgmi_v6_4_0_ras_error_code_ext) ?
+ xgmi_v6_4_0_ras_error_code_ext[ext_error_code] : NULL;
+ if (error_str)
+ dev_info(adev->dev, "%s detected\n", error_str);
+
+ switch (ext_error_code) {
+ case 0:
+ return ACA_ERROR_TYPE_UE;
+ case 6:
+ return ACA_ERROR_TYPE_CE;
+ default:
+ return -EINVAL;
+ }
+
+ return -EINVAL;
+}
+
+static void __xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, struct amdgpu_smuio_mcm_config_info *mcm_info,
+ u64 mca_base, struct ras_err_data *err_data)
+{
+ int xgmi_inst = mcm_info->die_id;
+ u64 status = 0;
+
+ status = RREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS);
+ if (!ACA_REG__STATUS__VAL(status))
+ return;
+
+ switch (xgmi_v6_4_0_pcs_mca_get_error_type(adev, status)) {
+ case ACA_ERROR_TYPE_UE:
+ amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, 1ULL);
+ break;
+ case ACA_ERROR_TYPE_CE:
+ amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, 1ULL);
+ break;
+ default:
+ break;
+ }
+
+ WREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS, 0ULL);
}
-const struct amdgpu_xgmi_ras_funcs xgmi_ras_funcs = {
- .ras_late_init = amdgpu_xgmi_ras_late_init,
- .ras_fini = amdgpu_xgmi_ras_fini,
+static void xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, int xgmi_inst, struct ras_err_data *err_data)
+{
+ struct amdgpu_smuio_mcm_config_info mcm_info = {
+ .socket_id = adev->smuio.funcs->get_socket_id(adev),
+ .die_id = xgmi_inst,
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(xgmi_v6_4_0_mca_base_array); i++)
+ __xgmi_v6_4_0_query_error_count(adev, &mcm_info, xgmi_v6_4_0_mca_base_array[i], err_data);
+}
+
+static void xgmi_v6_4_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ int i;
+
+ for_each_inst(i, adev->aid_mask)
+ xgmi_v6_4_0_query_error_count(adev, i, err_data);
+}
+
+static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
+ xgmi_v6_4_0_query_ras_error_count(adev, ras_error_status);
+ break;
+ default:
+ amdgpu_xgmi_legacy_query_ras_error_count(adev, ras_error_status);
+ break;
+ }
+}
+
+/* Trigger XGMI/WAFL error */
+static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
+ void *inject_if, uint32_t instance_mask)
+{
+ int ret1, ret2;
+ struct ta_ras_trigger_error_input *block_info =
+ (struct ta_ras_trigger_error_input *)inject_if;
+
+ if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
+ dev_warn(adev->dev, "Failed to disallow df cstate");
+
+ ret1 = amdgpu_dpm_set_pm_policy(adev, PP_PM_POLICY_XGMI_PLPD, XGMI_PLPD_DISALLOW);
+ if (ret1 && ret1 != -EOPNOTSUPP)
+ dev_warn(adev->dev, "Failed to disallow XGMI power down");
+
+ ret2 = psp_ras_trigger_error(&adev->psp, block_info, instance_mask);
+
+ if (amdgpu_ras_intr_triggered())
+ return ret2;
+
+ ret1 = amdgpu_dpm_set_pm_policy(adev, PP_PM_POLICY_XGMI_PLPD, XGMI_PLPD_DEFAULT);
+ if (ret1 && ret1 != -EOPNOTSUPP)
+ dev_warn(adev->dev, "Failed to allow XGMI power down");
+
+ if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
+ dev_warn(adev->dev, "Failed to allow df cstate");
+
+ return ret2;
+}
+
+struct amdgpu_ras_block_hw_ops xgmi_ras_hw_ops = {
.query_ras_error_count = amdgpu_xgmi_query_ras_error_count,
.reset_ras_error_count = amdgpu_xgmi_reset_ras_error_count,
+ .ras_error_inject = amdgpu_ras_error_inject_xgmi,
+};
+
+struct amdgpu_xgmi_ras xgmi_ras = {
+ .ras_block = {
+ .hw_ops = &xgmi_ras_hw_ops,
+ .ras_late_init = amdgpu_xgmi_ras_late_init,
+ },
};
+
+int amdgpu_xgmi_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err;
+ struct amdgpu_xgmi_ras *ras;
+
+ if (!adev->gmc.xgmi.ras)
+ return 0;
+
+ ras = adev->gmc.xgmi.ras;
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register xgmi_wafl_pcs ras block!\n");
+ return err;
+ }
+
+ strcpy(ras->ras_block.ras_comm.name, "xgmi_wafl");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__XGMI_WAFL;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->gmc.xgmi.ras_if = &ras->ras_block.ras_comm;
+
+ return 0;
+}
+
+static void amdgpu_xgmi_reset_on_init_work(struct work_struct *work)
+{
+ struct amdgpu_hive_info *hive =
+ container_of(work, struct amdgpu_hive_info, reset_on_init_work);
+ struct amdgpu_reset_context reset_context;
+ struct amdgpu_device *tmp_adev;
+ struct list_head device_list;
+ int r;
+
+ mutex_lock(&hive->hive_lock);
+
+ INIT_LIST_HEAD(&device_list);
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
+ list_add_tail(&tmp_adev->reset_list, &device_list);
+
+ tmp_adev = list_first_entry(&device_list, struct amdgpu_device,
+ reset_list);
+ amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
+
+ reset_context.method = AMD_RESET_METHOD_ON_INIT;
+ reset_context.reset_req_dev = tmp_adev;
+ reset_context.hive = hive;
+ reset_context.reset_device_list = &device_list;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
+
+ amdgpu_reset_do_xgmi_reset_on_init(&reset_context);
+ mutex_unlock(&hive->hive_lock);
+ amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
+
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ r = amdgpu_ras_init_badpage_info(tmp_adev);
+ if (r && r != -EHWPOISON)
+ dev_err(tmp_adev->dev,
+ "error during bad page data initialization");
+ }
+}
+
+static void amdgpu_xgmi_schedule_reset_on_init(struct amdgpu_hive_info *hive)
+{
+ INIT_WORK(&hive->reset_on_init_work, amdgpu_xgmi_reset_on_init_work);
+ amdgpu_reset_domain_schedule(hive->reset_domain,
+ &hive->reset_on_init_work);
+}
+
+int amdgpu_xgmi_reset_on_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_hive_info *hive;
+ bool reset_scheduled;
+ int num_devs;
+
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (!hive)
+ return -EINVAL;
+
+ mutex_lock(&hive->hive_lock);
+ num_devs = atomic_read(&hive->number_devices);
+ reset_scheduled = false;
+ if (num_devs == adev->gmc.xgmi.num_physical_nodes) {
+ amdgpu_xgmi_schedule_reset_on_init(hive);
+ reset_scheduled = true;
+ }
+
+ mutex_unlock(&hive->hive_lock);
+ amdgpu_put_xgmi_hive(hive);
+
+ if (reset_scheduled)
+ flush_work(&hive->reset_on_init_work);
+
+ return 0;
+}
+
+int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev,
+ struct amdgpu_hive_info *hive,
+ int req_nps_mode)
+{
+ struct amdgpu_device *tmp_adev;
+ int cur_nps_mode, r;
+
+ /* This is expected to be called only during unload of driver. The
+ * request needs to be placed only once for all devices in the hive. If
+ * one of them fail, revert the request for previous successful devices.
+ * After placing the request, make hive mode as UNKNOWN so that other
+ * devices don't request anymore.
+ */
+ mutex_lock(&hive->hive_lock);
+ if (atomic_read(&hive->requested_nps_mode) ==
+ UNKNOWN_MEMORY_PARTITION_MODE) {
+ dev_dbg(adev->dev, "Unexpected entry for hive NPS change");
+ mutex_unlock(&hive->hive_lock);
+ return 0;
+ }
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ r = adev->gmc.gmc_funcs->request_mem_partition_mode(
+ tmp_adev, req_nps_mode);
+ if (r)
+ break;
+ }
+ if (r) {
+ /* Request back current mode if one of the requests failed */
+ cur_nps_mode =
+ adev->gmc.gmc_funcs->query_mem_partition_mode(tmp_adev);
+ list_for_each_entry_continue_reverse(
+ tmp_adev, &hive->device_list, gmc.xgmi.head)
+ adev->gmc.gmc_funcs->request_mem_partition_mode(
+ tmp_adev, cur_nps_mode);
+ }
+ /* Set to UNKNOWN so that other devices don't request anymore */
+ atomic_set(&hive->requested_nps_mode, UNKNOWN_MEMORY_PARTITION_MODE);
+ mutex_unlock(&hive->hive_lock);
+
+ return r;
+}
+
+bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
+ struct amdgpu_device *bo_adev)
+{
+ return (amdgpu_use_xgmi_p2p && adev != bo_adev &&
+ adev->gmc.xgmi.hive_id &&
+ adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id);
+}
+
+void amdgpu_xgmi_early_init(struct amdgpu_device *adev)
+{
+ if (!adev->gmc.xgmi.supported)
+ return;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 4, 1):
+ case IP_VERSION(9, 4, 2):
+ /* 25 GT/s */
+ adev->gmc.xgmi.max_speed = 25;
+ adev->gmc.xgmi.max_width = 16;
+ break;
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
+ /* 32 GT/s */
+ adev->gmc.xgmi.max_speed = 32;
+ adev->gmc.xgmi.max_width = 16;
+ break;
+ default:
+ break;
+ }
+}
+
+void amgpu_xgmi_set_max_speed_width(struct amdgpu_device *adev,
+ uint16_t max_speed, uint8_t max_width)
+{
+ adev->gmc.xgmi.max_speed = max_speed;
+ adev->gmc.xgmi.max_width = max_width;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index d2189bf7d428..5f36aff17e79 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -23,8 +23,7 @@
#define __AMDGPU_XGMI_H__
#include <drm/task_barrier.h>
-#include "amdgpu_psp.h"
-
+#include "amdgpu_ras.h"
struct amdgpu_hive_info {
struct kobject kobj;
@@ -33,7 +32,6 @@ struct amdgpu_hive_info {
struct list_head node;
atomic_t number_devices;
struct mutex hive_lock;
- atomic_t in_reset;
int hi_req_count;
struct amdgpu_device *hi_req_gpu;
struct task_barrier tb;
@@ -42,6 +40,12 @@ struct amdgpu_hive_info {
AMDGPU_XGMI_PSTATE_MAX_VEGA20,
AMDGPU_XGMI_PSTATE_UNKNOWN
} pstate;
+
+ struct amdgpu_reset_domain *reset_domain;
+ atomic_t ras_recovery;
+ struct ras_event_manager event_mgr;
+ struct work_struct reset_on_init_work;
+ atomic_t requested_nps_mode;
};
struct amdgpu_pcs_ras_field {
@@ -50,25 +54,80 @@ struct amdgpu_pcs_ras_field {
uint32_t pcs_err_shift;
};
-extern const struct amdgpu_xgmi_ras_funcs xgmi_ras_funcs;
+/**
+ * Bandwidth range reporting comes in two modes.
+ *
+ * PER_LINK - range for any xgmi link
+ * PER_PEER - range of max of single xgmi link to max of multiple links based on source peer
+ */
+enum amdgpu_xgmi_bw_mode {
+ AMDGPU_XGMI_BW_MODE_PER_LINK = 0,
+ AMDGPU_XGMI_BW_MODE_PER_PEER
+};
+
+enum amdgpu_xgmi_bw_unit {
+ AMDGPU_XGMI_BW_UNIT_GBYTES = 0,
+ AMDGPU_XGMI_BW_UNIT_MBYTES
+};
+
+struct amdgpu_xgmi_ras {
+ struct amdgpu_ras_block_object ras_block;
+};
+extern struct amdgpu_xgmi_ras xgmi_ras;
+
+struct amdgpu_xgmi {
+ /* from psp */
+ u64 node_id;
+ u64 hive_id;
+ /* fixed per family */
+ u64 node_segment_size;
+ /* physical node (0-3) */
+ unsigned physical_node_id;
+ /* number of nodes (0-4) */
+ unsigned num_physical_nodes;
+ /* gpu list in the same hive */
+ struct list_head head;
+ bool supported;
+ struct ras_common_if *ras_if;
+ bool connected_to_cpu;
+ struct amdgpu_xgmi_ras *ras;
+ uint16_t max_speed;
+ uint8_t max_width;
+};
+
struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive);
int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
int amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
-int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
- struct amdgpu_device *peer_adev);
-int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev,
- struct amdgpu_device *peer_adev);
+int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, struct amdgpu_device *peer_adev);
+int amdgpu_xgmi_get_bandwidth(struct amdgpu_device *adev, struct amdgpu_device *peer_adev,
+ enum amdgpu_xgmi_bw_mode bw_mode, enum amdgpu_xgmi_bw_unit bw_unit,
+ uint32_t *min_bw, uint32_t *max_bw);
+bool amdgpu_xgmi_get_is_sharing_enabled(struct amdgpu_device *adev,
+ struct amdgpu_device *peer_adev);
uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev,
uint64_t addr);
-static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
- struct amdgpu_device *bo_adev)
-{
- return (adev != bo_adev &&
- adev->gmc.xgmi.hive_id &&
- adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id);
-}
+bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
+ struct amdgpu_device *bo_adev);
+int amdgpu_xgmi_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_xgmi_reset_on_init(struct amdgpu_device *adev);
+
+int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev,
+ struct amdgpu_hive_info *hive,
+ int req_nps_mode);
+int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev,
+ int global_link_num);
+int amdgpu_xgmi_get_ext_link(struct amdgpu_device *adev, int link_num);
+
+void amdgpu_xgmi_early_init(struct amdgpu_device *adev);
+uint32_t amdgpu_xgmi_get_max_bandwidth(struct amdgpu_device *adev);
+
+void amgpu_xgmi_set_max_speed_width(struct amdgpu_device *adev,
+ uint16_t max_speed, uint8_t max_width);
+
+/* Cleanup macro for use with __free(xgmi_put_hive) */
+DEFINE_FREE(xgmi_put_hive, struct amdgpu_hive_info *, if (_T) amdgpu_put_xgmi_hive(_T))
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index 7326b6c1b71c..3cdb1e0eca37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -1,45 +1,106 @@
/*
- * Copyright 2018-2019 Advanced Micro Devices, Inc.
+ * Copyright (c) 2018-2021 Advanced Micro Devices, Inc. All rights reserved.
*
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
*/
#ifndef AMDGV_SRIOV_MSG__H_
#define AMDGV_SRIOV_MSG__H_
-/* unit in kilobytes */
-#define AMD_SRIOV_MSG_VBIOS_OFFSET 0
-#define AMD_SRIOV_MSG_VBIOS_SIZE_KB 64
-#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB AMD_SRIOV_MSG_VBIOS_SIZE_KB
-#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB 4
+#define AMD_SRIOV_MSG_SIZE_KB 1
/*
- * layout
- * 0 64KB 65KB 66KB
- * | VBIOS | PF2VF | VF2PF | Bad Page | ...
- * | 64KB | 1KB | 1KB |
+ * layout v1
+ * 0 64KB 65KB 66KB 68KB 132KB
+ * | VBIOS | PF2VF | VF2PF | Bad Page | RAS Telemetry Region | ...
+ * | 64KB | 1KB | 1KB | 2KB | 64KB | ...
*/
-#define AMD_SRIOV_MSG_SIZE_KB 1
-#define AMD_SRIOV_MSG_PF2VF_OFFSET_KB AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB
-#define AMD_SRIOV_MSG_VF2PF_OFFSET_KB (AMD_SRIOV_MSG_PF2VF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
-#define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB (AMD_SRIOV_MSG_VF2PF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
+
+/*
+ * layout v2 (offsets are dynamically allocated and the offsets below are examples)
+ * 0 1KB 64KB 65KB 66KB 68KB 132KB
+ * | INITD_H | VBIOS | PF2VF | VF2PF | Bad Page | RAS Telemetry Region | ...
+ * | 1KB | 64KB | 1KB | 1KB | 2KB | 64KB | ...
+ *
+ * Note: PF2VF + VF2PF + Bad Page = DataExchange region (allocated contiguously)
+ */
+
+/* v1 layout sizes */
+#define AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1 64
+#define AMD_SRIOV_MSG_PF2VF_SIZE_KB_V1 1
+#define AMD_SRIOV_MSG_VF2PF_SIZE_KB_V1 1
+#define AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1 2
+#define AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 64
+#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB_V1 \
+ (AMD_SRIOV_MSG_PF2VF_SIZE_KB_V1 + AMD_SRIOV_MSG_VF2PF_SIZE_KB_V1 + \
+ AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1)
+
+/* v1 offsets */
+#define AMD_SRIOV_MSG_VBIOS_OFFSET_V1 0
+#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB_V1 AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1
+#define AMD_SRIOV_MSG_TMR_OFFSET_KB 2048
+#define AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB_V1
+#define AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 \
+ (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 + AMD_SRIOV_MSG_SIZE_KB)
+#define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB_V1 \
+ (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 + AMD_SRIOV_MSG_SIZE_KB)
+#define AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 \
+ (AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB_V1 + AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1)
+#define AMD_SRIOV_MSG_INIT_DATA_TOT_SIZE_KB_V1 \
+ (AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1 + AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB_V1 + \
+ AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1)
+
+enum amd_sriov_crit_region_version {
+ GPU_CRIT_REGION_V1 = 1,
+ GPU_CRIT_REGION_V2 = 2,
+};
+
+/* v2 layout offset enum (in order of allocation) */
+enum amd_sriov_msg_table_id_enum {
+ AMD_SRIOV_MSG_IPD_TABLE_ID = 0,
+ AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID,
+ AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID,
+ AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID,
+ AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID,
+ AMD_SRIOV_MSG_INITD_H_TABLE_ID,
+ AMD_SRIOV_MSG_MAX_TABLE_ID,
+};
+
+struct amd_sriov_msg_init_data_header {
+ char signature[4]; /* "INDA" */
+ uint32_t version;
+ uint32_t checksum;
+ uint32_t initdata_offset; /* 0 */
+ uint32_t initdata_size_in_kb; /* 5MB */
+ uint32_t valid_tables;
+ uint32_t vbios_img_offset;
+ uint32_t vbios_img_size_in_kb;
+ uint32_t dataexchange_offset;
+ uint32_t dataexchange_size_in_kb;
+ uint32_t ras_tele_info_offset;
+ uint32_t ras_tele_info_size_in_kb;
+ uint32_t ip_discovery_offset;
+ uint32_t ip_discovery_size_in_kb;
+ uint32_t bad_page_info_offset;
+ uint32_t bad_page_size_in_kb;
+ uint32_t reserved[8];
+};
/*
* PF2VF history log:
@@ -51,10 +112,10 @@
* v2 defined in amdgim
* v3 current
*/
-#define AMD_SRIOV_MSG_FW_VRAM_PF2VF_VER 2
-#define AMD_SRIOV_MSG_FW_VRAM_VF2PF_VER 3
+#define AMD_SRIOV_MSG_FW_VRAM_PF2VF_VER 2
+#define AMD_SRIOV_MSG_FW_VRAM_VF2PF_VER 3
-#define AMD_SRIOV_MSG_RESERVE_UCODE 24
+#define AMD_SRIOV_MSG_RESERVE_UCODE 24
#define AMD_SRIOV_MSG_RESERVE_VCN_INST 4
@@ -83,37 +144,74 @@ enum amd_sriov_ucode_engine_id {
AMD_SRIOV_UCODE_ID__MAX
};
-#pragma pack(push, 1) // PF2VF / VF2PF data areas are byte packed
+#pragma pack(push, 1) // PF2VF / VF2PF data areas are byte packed
union amd_sriov_msg_feature_flags {
struct {
- uint32_t error_log_collect : 1;
- uint32_t host_load_ucodes : 1;
- uint32_t host_flr_vramlost : 1;
- uint32_t mm_bw_management : 1;
- uint32_t pp_one_vf_mode : 1;
- uint32_t reg_indirect_acc : 1;
- uint32_t reserved : 26;
+ uint32_t error_log_collect : 1;
+ uint32_t host_load_ucodes : 1;
+ uint32_t host_flr_vramlost : 1;
+ uint32_t mm_bw_management : 1;
+ uint32_t pp_one_vf_mode : 1;
+ uint32_t reg_indirect_acc : 1;
+ uint32_t av1_support : 1;
+ uint32_t vcn_rb_decouple : 1;
+ uint32_t mes_info_dump_enable : 1;
+ uint32_t ras_caps : 1;
+ uint32_t ras_telemetry : 1;
+ uint32_t ras_cper : 1;
+ uint32_t xgmi_ta_ext_peer_link : 1;
+ uint32_t reserved : 19;
} flags;
- uint32_t all;
+ uint32_t all;
};
union amd_sriov_reg_access_flags {
struct {
- uint32_t vf_reg_access_ih : 1;
- uint32_t vf_reg_access_mmhub : 1;
- uint32_t vf_reg_access_gc : 1;
- uint32_t reserved : 29;
+ uint32_t vf_reg_access_ih : 1;
+ uint32_t vf_reg_access_mmhub : 1;
+ uint32_t vf_reg_access_gc : 1;
+ uint32_t vf_reg_access_l1_tlb_cntl : 1;
+ uint32_t vf_reg_access_sq_config : 1;
+ uint32_t reserved : 27;
} flags;
uint32_t all;
};
+union amd_sriov_ras_caps {
+ struct {
+ uint64_t block_umc : 1;
+ uint64_t block_sdma : 1;
+ uint64_t block_gfx : 1;
+ uint64_t block_mmhub : 1;
+ uint64_t block_athub : 1;
+ uint64_t block_pcie_bif : 1;
+ uint64_t block_hdp : 1;
+ uint64_t block_xgmi_wafl : 1;
+ uint64_t block_df : 1;
+ uint64_t block_smn : 1;
+ uint64_t block_sem : 1;
+ uint64_t block_mp0 : 1;
+ uint64_t block_mp1 : 1;
+ uint64_t block_fuse : 1;
+ uint64_t block_mca : 1;
+ uint64_t block_vcn : 1;
+ uint64_t block_jpeg : 1;
+ uint64_t block_ih : 1;
+ uint64_t block_mpio : 1;
+ uint64_t block_mmsch : 1;
+ uint64_t poison_propogation_mode : 1;
+ uint64_t reserved : 43;
+ } bits;
+ uint64_t all;
+};
+
union amd_sriov_msg_os_info {
struct {
- uint32_t windows : 1;
- uint32_t reserved : 31;
+ uint32_t windows : 1;
+ uint32_t reserved : 31;
} info;
- uint32_t all;
+ uint32_t all;
};
struct amd_sriov_msg_uuid_info {
@@ -156,6 +254,7 @@ struct amd_sriov_msg_pf2vf_info_header {
uint32_t reserved[2];
};
+#define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (55)
struct amd_sriov_msg_pf2vf_info {
/* header contains size and version */
struct amd_sriov_msg_pf2vf_info_header header;
@@ -204,11 +303,19 @@ struct amd_sriov_msg_pf2vf_info {
} mm_bw_management[AMD_SRIOV_MSG_RESERVE_VCN_INST];
/* UUID info */
struct amd_sriov_msg_uuid_info uuid_info;
- /* pcie atomic Ops info */
- uint32_t pcie_atomic_ops_enabled_flags;
+ /* PCIE atomic ops support flag */
+ uint32_t pcie_atomic_ops_support_flags;
+ /* Portion of GPU memory occupied by VF. MAX value is 65535, but set to uint32_t to maintain alignment with reserved size */
+ uint32_t gpu_capacity;
+ /* vf bdf on host pci tree for debug only */
+ uint32_t bdf_on_host;
+ uint32_t more_bp; //Reserved for future use.
+ union amd_sriov_ras_caps ras_en_caps;
+ union amd_sriov_ras_caps ras_telemetry_en_caps;
+
/* reserved */
- uint32_t reserved[256 - 48];
-};
+ uint32_t reserved[256 - AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE];
+} __packed;
struct amd_sriov_msg_vf2pf_info_header {
/* the total structure size in byte */
@@ -219,12 +326,13 @@ struct amd_sriov_msg_vf2pf_info_header {
uint32_t reserved[2];
};
+#define AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE (73)
struct amd_sriov_msg_vf2pf_info {
/* header contains size and version */
struct amd_sriov_msg_vf2pf_info_header header;
uint32_t checksum;
/* driver version */
- uint8_t driver_version[64];
+ uint8_t driver_version[64];
/* driver certification, 1=WHQL, 0=None */
uint32_t driver_cert;
/* guest OS type and version */
@@ -258,14 +366,16 @@ struct amd_sriov_msg_vf2pf_info {
uint32_t fb_size;
/* guest ucode data, each one is 1.25 Dword */
struct {
- uint8_t id;
+ uint8_t id;
uint32_t version;
} ucode_info[AMD_SRIOV_MSG_RESERVE_UCODE];
uint64_t dummy_page_addr;
-
+ /* FB allocated for guest MES to record UQ info */
+ uint64_t mes_info_addr;
+ uint32_t mes_info_size;
/* reserved */
- uint32_t reserved[256-70];
-};
+ uint32_t reserved[256 - AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE];
+} __packed;
/* mailbox message send from guest to host */
enum amd_sriov_mailbox_request_message {
@@ -275,22 +385,98 @@ enum amd_sriov_mailbox_request_message {
MB_REQ_MSG_REL_GPU_FINI_ACCESS,
MB_REQ_MSG_REQ_GPU_RESET_ACCESS,
MB_REQ_MSG_REQ_GPU_INIT_DATA,
-
- MB_REQ_MSG_LOG_VF_ERROR = 200,
+ MB_REQ_MSG_PSP_VF_CMD_RELAY,
+
+ MB_REQ_MSG_LOG_VF_ERROR = 200,
+ MB_REQ_MSG_READY_TO_RESET = 201,
+ MB_REQ_MSG_RAS_POISON = 202,
+ MB_REQ_RAS_ERROR_COUNT = 203,
+ MB_REQ_RAS_CPER_DUMP = 204,
+ MB_REQ_RAS_BAD_PAGES = 205,
};
/* mailbox message send from host to guest */
enum amd_sriov_mailbox_response_message {
- MB_RES_MSG_CLR_MSG_BUF = 0,
- MB_RES_MSG_READY_TO_ACCESS_GPU = 1,
- MB_RES_MSG_FLR_NOTIFICATION,
- MB_RES_MSG_FLR_NOTIFICATION_COMPLETION,
- MB_RES_MSG_SUCCESS,
- MB_RES_MSG_FAIL,
- MB_RES_MSG_QUERY_ALIVE,
- MB_RES_MSG_GPU_INIT_DATA_READY,
-
- MB_RES_MSG_TEXT_MESSAGE = 255
+ MB_RES_MSG_CLR_MSG_BUF = 0,
+ MB_RES_MSG_READY_TO_ACCESS_GPU = 1,
+ MB_RES_MSG_FLR_NOTIFICATION = 2,
+ MB_RES_MSG_FLR_NOTIFICATION_COMPLETION = 3,
+ MB_RES_MSG_SUCCESS = 4,
+ MB_RES_MSG_FAIL = 5,
+ MB_RES_MSG_QUERY_ALIVE = 6,
+ MB_RES_MSG_GPU_INIT_DATA_READY = 7,
+ MB_RES_MSG_RAS_POISON_READY = 8,
+ MB_RES_MSG_PF_SOFT_FLR_NOTIFICATION = 9,
+ MB_RES_MSG_GPU_RMA = 10,
+ MB_RES_MSG_RAS_ERROR_COUNT_READY = 11,
+ MB_REQ_RAS_CPER_DUMP_READY = 14,
+ MB_RES_MSG_RAS_BAD_PAGES_READY = 15,
+ MB_RES_MSG_RAS_BAD_PAGES_NOTIFICATION = 16,
+ MB_RES_MSG_UNRECOV_ERR_NOTIFICATION = 17,
+ MB_RES_MSG_TEXT_MESSAGE = 255
+};
+
+enum amd_sriov_ras_telemetry_gpu_block {
+ RAS_TELEMETRY_GPU_BLOCK_UMC = 0,
+ RAS_TELEMETRY_GPU_BLOCK_SDMA = 1,
+ RAS_TELEMETRY_GPU_BLOCK_GFX = 2,
+ RAS_TELEMETRY_GPU_BLOCK_MMHUB = 3,
+ RAS_TELEMETRY_GPU_BLOCK_ATHUB = 4,
+ RAS_TELEMETRY_GPU_BLOCK_PCIE_BIF = 5,
+ RAS_TELEMETRY_GPU_BLOCK_HDP = 6,
+ RAS_TELEMETRY_GPU_BLOCK_XGMI_WAFL = 7,
+ RAS_TELEMETRY_GPU_BLOCK_DF = 8,
+ RAS_TELEMETRY_GPU_BLOCK_SMN = 9,
+ RAS_TELEMETRY_GPU_BLOCK_SEM = 10,
+ RAS_TELEMETRY_GPU_BLOCK_MP0 = 11,
+ RAS_TELEMETRY_GPU_BLOCK_MP1 = 12,
+ RAS_TELEMETRY_GPU_BLOCK_FUSE = 13,
+ RAS_TELEMETRY_GPU_BLOCK_MCA = 14,
+ RAS_TELEMETRY_GPU_BLOCK_VCN = 15,
+ RAS_TELEMETRY_GPU_BLOCK_JPEG = 16,
+ RAS_TELEMETRY_GPU_BLOCK_IH = 17,
+ RAS_TELEMETRY_GPU_BLOCK_MPIO = 18,
+ RAS_TELEMETRY_GPU_BLOCK_COUNT = 19,
+};
+
+struct amd_sriov_ras_telemetry_header {
+ uint32_t checksum;
+ uint32_t used_size;
+ uint32_t reserved[2];
+};
+
+struct amd_sriov_ras_telemetry_error_count {
+ struct {
+ uint32_t ce_count;
+ uint32_t ue_count;
+ uint32_t de_count;
+ uint32_t ce_overflow_count;
+ uint32_t ue_overflow_count;
+ uint32_t de_overflow_count;
+ uint32_t reserved[6];
+ } block[RAS_TELEMETRY_GPU_BLOCK_COUNT];
+};
+
+struct amd_sriov_ras_cper_dump {
+ uint32_t more;
+ uint64_t overflow_count;
+ uint64_t count;
+ uint64_t wptr;
+ uint32_t buf[];
+};
+
+struct amd_sriov_ras_chk_criti {
+ uint32_t hit;
+};
+
+struct amdsriov_ras_telemetry {
+ struct amd_sriov_ras_telemetry_header header;
+
+ union {
+ struct amd_sriov_ras_telemetry_error_count error_count;
+ struct amd_sriov_ras_cper_dump cper_dump;
+ struct amd_sriov_ras_chk_criti chk_criti;
+ } body;
};
/* version data stored in MAILBOX_MSGBUF_RCV_DW1 for future expansion */
@@ -298,17 +484,15 @@ enum amd_sriov_gpu_init_data_version {
GPU_INIT_DATA_READY_V1 = 1,
};
-#pragma pack(pop) // Restore previous packing option
+#pragma pack(pop) // Restore previous packing option
/* checksum function between host and guest */
-unsigned int amd_sriov_msg_checksum(void *obj,
- unsigned long obj_size,
- unsigned int key,
- unsigned int checksum);
+unsigned int amd_sriov_msg_checksum(void *obj, unsigned long obj_size, unsigned int key,
+ unsigned int checksum);
/* assertion at compile time */
#ifdef __linux__
-#define stringification(s) _stringification(s)
+#define stringification(s) _stringification(s)
#define _stringification(s) #s
_Static_assert(
@@ -319,13 +503,11 @@ _Static_assert(
sizeof(struct amd_sriov_msg_pf2vf_info) == AMD_SRIOV_MSG_SIZE_KB << 10,
"amd_sriov_msg_pf2vf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB) " KB");
-_Static_assert(
- AMD_SRIOV_MSG_RESERVE_UCODE % 4 == 0,
- "AMD_SRIOV_MSG_RESERVE_UCODE must be multiple of 4");
+_Static_assert(AMD_SRIOV_MSG_RESERVE_UCODE % 4 == 0,
+ "AMD_SRIOV_MSG_RESERVE_UCODE must be multiple of 4");
-_Static_assert(
- AMD_SRIOV_MSG_RESERVE_UCODE > AMD_SRIOV_UCODE_ID__MAX,
- "AMD_SRIOV_MSG_RESERVE_UCODE must be bigger than AMD_SRIOV_UCODE_ID__MAX");
+_Static_assert(AMD_SRIOV_MSG_RESERVE_UCODE > AMD_SRIOV_UCODE_ID__MAX,
+ "AMD_SRIOV_MSG_RESERVE_UCODE must be bigger than AMD_SRIOV_UCODE_ID__MAX");
#undef _stringification
#undef stringification
diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
new file mode 100644
index 000000000000..f9e2edf5260b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
@@ -0,0 +1,986 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "soc15.h"
+
+#include "soc15_common.h"
+#include "amdgpu_reg_state.h"
+#include "amdgpu_xcp.h"
+#include "gfx_v9_4_3.h"
+#include "gfxhub_v1_2.h"
+#include "sdma_v4_4_2.h"
+#include "amdgpu_ip.h"
+
+#define XCP_INST_MASK(num_inst, xcp_id) \
+ (num_inst ? GENMASK(num_inst - 1, 0) << (xcp_id * num_inst) : 0)
+
+void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev)
+{
+ int i;
+
+ adev->doorbell_index.kiq = AMDGPU_DOORBELL_LAYOUT1_KIQ_START;
+
+ adev->doorbell_index.mec_ring0 = AMDGPU_DOORBELL_LAYOUT1_MEC_RING_START;
+
+ adev->doorbell_index.userqueue_start = AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_START;
+ adev->doorbell_index.userqueue_end = AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_END;
+ adev->doorbell_index.xcc_doorbell_range = AMDGPU_DOORBELL_LAYOUT1_XCC_RANGE;
+
+ adev->doorbell_index.sdma_doorbell_range = 20;
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ adev->doorbell_index.sdma_engine[i] =
+ AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START +
+ i * (adev->doorbell_index.sdma_doorbell_range >> 1);
+
+ adev->doorbell_index.ih = AMDGPU_DOORBELL_LAYOUT1_IH;
+ adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_DOORBELL_LAYOUT1_VCN_START;
+
+ adev->doorbell_index.first_non_cp = AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP;
+ adev->doorbell_index.last_non_cp = AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP;
+
+ adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT << 1;
+}
+
+/* Fixed pattern for smn addressing on different AIDs:
+ * bit[34]: indicate cross AID access
+ * bit[33:32]: indicate target AID id
+ * AID id range is 0 ~ 3 as maximum AID number is 4.
+ */
+u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id)
+{
+ u64 ext_offset;
+
+ /* local routing and bit[34:32] will be zeros */
+ if (ext_id == 0)
+ return 0;
+
+ /* Initiated from host, accessing to all non-zero aids are cross traffic */
+ ext_offset = ((u64)(ext_id & 0x3) << 32) | (1ULL << 34);
+
+ return ext_offset;
+}
+
+static enum amdgpu_gfx_partition
+__aqua_vanjaram_calc_xcp_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ int num_xcc, num_xcc_per_xcp = 0, mode = 0;
+
+ num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask);
+ if (adev->gfx.funcs->get_xccs_per_xcp)
+ num_xcc_per_xcp = adev->gfx.funcs->get_xccs_per_xcp(adev);
+ if ((num_xcc_per_xcp) && (num_xcc % num_xcc_per_xcp == 0))
+ mode = num_xcc / num_xcc_per_xcp;
+
+ if (num_xcc_per_xcp == 1)
+ return AMDGPU_CPX_PARTITION_MODE;
+
+ switch (mode) {
+ case 1:
+ return AMDGPU_SPX_PARTITION_MODE;
+ case 2:
+ return AMDGPU_DPX_PARTITION_MODE;
+ case 3:
+ return AMDGPU_TPX_PARTITION_MODE;
+ case 4:
+ return AMDGPU_QPX_PARTITION_MODE;
+ default:
+ return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
+ }
+
+ return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
+}
+
+static int aqua_vanjaram_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ enum amdgpu_gfx_partition derv_mode,
+ mode = AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
+ struct amdgpu_device *adev = xcp_mgr->adev;
+
+ derv_mode = __aqua_vanjaram_calc_xcp_mode(xcp_mgr);
+
+ if (amdgpu_sriov_vf(adev))
+ return derv_mode;
+
+ if (adev->nbio.funcs->get_compute_partition_mode) {
+ mode = adev->nbio.funcs->get_compute_partition_mode(adev);
+ if (mode != derv_mode) {
+ dev_warn(
+ adev->dev,
+ "Mismatch in compute partition mode - reported : %d derived : %d",
+ mode, derv_mode);
+ if (derv_mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
+ amdgpu_device_bus_status_check(adev);
+ }
+ }
+
+ return mode;
+}
+
+static int __aqua_vanjaram_get_xcc_per_xcp(struct amdgpu_xcp_mgr *xcp_mgr, int mode)
+{
+ int num_xcc, num_xcc_per_xcp = 0;
+
+ num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask);
+
+ switch (mode) {
+ case AMDGPU_SPX_PARTITION_MODE:
+ num_xcc_per_xcp = num_xcc;
+ break;
+ case AMDGPU_DPX_PARTITION_MODE:
+ num_xcc_per_xcp = num_xcc / 2;
+ break;
+ case AMDGPU_TPX_PARTITION_MODE:
+ num_xcc_per_xcp = num_xcc / 3;
+ break;
+ case AMDGPU_QPX_PARTITION_MODE:
+ num_xcc_per_xcp = num_xcc / 4;
+ break;
+ case AMDGPU_CPX_PARTITION_MODE:
+ num_xcc_per_xcp = 1;
+ break;
+ }
+
+ return num_xcc_per_xcp;
+}
+
+static int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
+ enum AMDGPU_XCP_IP_BLOCK ip_id,
+ struct amdgpu_xcp_ip *ip)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ int num_sdma, num_vcn, num_shared_vcn, num_xcp;
+ int num_xcc_xcp, num_sdma_xcp, num_vcn_xcp;
+
+ num_sdma = adev->sdma.num_instances;
+ num_vcn = adev->vcn.num_vcn_inst;
+ num_shared_vcn = 1;
+
+ num_xcc_xcp = adev->gfx.num_xcc_per_xcp;
+ num_xcp = NUM_XCC(adev->gfx.xcc_mask) / num_xcc_xcp;
+
+ switch (xcp_mgr->mode) {
+ case AMDGPU_SPX_PARTITION_MODE:
+ case AMDGPU_DPX_PARTITION_MODE:
+ case AMDGPU_TPX_PARTITION_MODE:
+ case AMDGPU_QPX_PARTITION_MODE:
+ case AMDGPU_CPX_PARTITION_MODE:
+ num_sdma_xcp = DIV_ROUND_UP(num_sdma, num_xcp);
+ num_vcn_xcp = DIV_ROUND_UP(num_vcn, num_xcp);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (num_vcn && num_xcp > num_vcn)
+ num_shared_vcn = num_xcp / num_vcn;
+
+ switch (ip_id) {
+ case AMDGPU_XCP_GFXHUB:
+ ip->inst_mask = XCP_INST_MASK(num_xcc_xcp, xcp_id);
+ ip->ip_funcs = &gfxhub_v1_2_xcp_funcs;
+ break;
+ case AMDGPU_XCP_GFX:
+ ip->inst_mask = XCP_INST_MASK(num_xcc_xcp, xcp_id);
+ ip->ip_funcs = &gfx_v9_4_3_xcp_funcs;
+ break;
+ case AMDGPU_XCP_SDMA:
+ ip->inst_mask = XCP_INST_MASK(num_sdma_xcp, xcp_id);
+ ip->ip_funcs = &sdma_v4_4_2_xcp_funcs;
+ break;
+ case AMDGPU_XCP_VCN:
+ ip->inst_mask =
+ XCP_INST_MASK(num_vcn_xcp, xcp_id / num_shared_vcn);
+ /* TODO : Assign IP funcs */
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ip->ip_id = ip_id;
+
+ return 0;
+}
+
+static int __aqua_vanjaram_get_px_mode_info(struct amdgpu_xcp_mgr *xcp_mgr,
+ int px_mode, int *num_xcp,
+ uint16_t *nps_modes)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
+
+ if (!num_xcp || !nps_modes || !(xcp_mgr->supp_xcp_modes & BIT(px_mode)))
+ return -EINVAL;
+
+ switch (px_mode) {
+ case AMDGPU_SPX_PARTITION_MODE:
+ *num_xcp = 1;
+ *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE);
+ break;
+ case AMDGPU_DPX_PARTITION_MODE:
+ *num_xcp = 2;
+ *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) |
+ BIT(AMDGPU_NPS2_PARTITION_MODE);
+ break;
+ case AMDGPU_TPX_PARTITION_MODE:
+ *num_xcp = 3;
+ *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) |
+ BIT(AMDGPU_NPS4_PARTITION_MODE);
+ break;
+ case AMDGPU_QPX_PARTITION_MODE:
+ *num_xcp = 4;
+ *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) |
+ BIT(AMDGPU_NPS4_PARTITION_MODE);
+ if (gc_ver == IP_VERSION(9, 5, 0))
+ *nps_modes |= BIT(AMDGPU_NPS2_PARTITION_MODE);
+ break;
+ case AMDGPU_CPX_PARTITION_MODE:
+ *num_xcp = NUM_XCC(adev->gfx.xcc_mask);
+ *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) |
+ BIT(AMDGPU_NPS4_PARTITION_MODE);
+ if (gc_ver == IP_VERSION(9, 5, 0))
+ *nps_modes |= BIT(AMDGPU_NPS2_PARTITION_MODE);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int aqua_vanjaram_get_xcp_res_info(struct amdgpu_xcp_mgr *xcp_mgr,
+ int mode,
+ struct amdgpu_xcp_cfg *xcp_cfg)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ int max_res[AMDGPU_XCP_RES_MAX] = {};
+ bool res_lt_xcp;
+ int num_xcp, i, r;
+ u16 nps_modes;
+
+ if (!(xcp_mgr->supp_xcp_modes & BIT(mode)))
+ return -EINVAL;
+
+ max_res[AMDGPU_XCP_RES_XCC] = NUM_XCC(adev->gfx.xcc_mask);
+ max_res[AMDGPU_XCP_RES_DMA] = adev->sdma.num_instances;
+ max_res[AMDGPU_XCP_RES_DEC] = adev->vcn.num_vcn_inst;
+ max_res[AMDGPU_XCP_RES_JPEG] = adev->jpeg.num_jpeg_inst;
+
+ r = __aqua_vanjaram_get_px_mode_info(xcp_mgr, mode, &num_xcp, &nps_modes);
+ if (r)
+ return r;
+
+ xcp_cfg->compatible_nps_modes =
+ (adev->gmc.supported_nps_modes & nps_modes);
+ xcp_cfg->num_res = ARRAY_SIZE(max_res);
+
+ for (i = 0; i < xcp_cfg->num_res; i++) {
+ res_lt_xcp = max_res[i] < num_xcp;
+ xcp_cfg->xcp_res[i].id = i;
+ xcp_cfg->xcp_res[i].num_inst =
+ res_lt_xcp ? 1 : max_res[i] / num_xcp;
+ xcp_cfg->xcp_res[i].num_inst =
+ i == AMDGPU_XCP_RES_JPEG ?
+ xcp_cfg->xcp_res[i].num_inst *
+ adev->jpeg.num_jpeg_rings : xcp_cfg->xcp_res[i].num_inst;
+ xcp_cfg->xcp_res[i].num_shared =
+ res_lt_xcp ? num_xcp / max_res[i] : 1;
+ }
+
+ return 0;
+}
+
+static enum amdgpu_gfx_partition
+__aqua_vanjaram_get_auto_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ int num_xcc;
+
+ num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask);
+
+ if (adev->gmc.num_mem_partitions == 1)
+ return AMDGPU_SPX_PARTITION_MODE;
+
+ if (adev->gmc.num_mem_partitions == num_xcc)
+ return AMDGPU_CPX_PARTITION_MODE;
+
+ if (adev->gmc.num_mem_partitions == num_xcc / 2)
+ return (adev->flags & AMD_IS_APU) ? AMDGPU_TPX_PARTITION_MODE :
+ AMDGPU_CPX_PARTITION_MODE;
+
+ if (adev->gmc.num_mem_partitions == 2 && !(adev->flags & AMD_IS_APU))
+ return AMDGPU_DPX_PARTITION_MODE;
+
+ return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
+}
+
+static bool __aqua_vanjaram_is_valid_mode(struct amdgpu_xcp_mgr *xcp_mgr,
+ enum amdgpu_gfx_partition mode)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ int num_xcc, num_xccs_per_xcp, r;
+ int num_xcp, nps_mode;
+ u16 supp_nps_modes;
+ bool comp_mode;
+
+ nps_mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ r = __aqua_vanjaram_get_px_mode_info(xcp_mgr, mode, &num_xcp,
+ &supp_nps_modes);
+ if (r)
+ return false;
+
+ comp_mode = !!(BIT(nps_mode) & supp_nps_modes);
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ switch (mode) {
+ case AMDGPU_SPX_PARTITION_MODE:
+ return comp_mode && num_xcc > 0;
+ case AMDGPU_DPX_PARTITION_MODE:
+ return comp_mode && (num_xcc % 4) == 0;
+ case AMDGPU_TPX_PARTITION_MODE:
+ return comp_mode && ((num_xcc % 3) == 0);
+ case AMDGPU_QPX_PARTITION_MODE:
+ num_xccs_per_xcp = num_xcc / 4;
+ return comp_mode && (num_xccs_per_xcp >= 2);
+ case AMDGPU_CPX_PARTITION_MODE:
+ return comp_mode && (num_xcc > 1);
+ default:
+ return false;
+ }
+
+ return false;
+}
+
+static void __aqua_vanjaram_update_available_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ int mode;
+
+ xcp_mgr->avail_xcp_modes = 0;
+
+ for_each_inst(mode, xcp_mgr->supp_xcp_modes) {
+ if (__aqua_vanjaram_is_valid_mode(xcp_mgr, mode))
+ xcp_mgr->avail_xcp_modes |= BIT(mode);
+ }
+}
+
+static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr,
+ int mode, int *num_xcps)
+{
+ int num_xcc_per_xcp, num_xcc, ret;
+ struct amdgpu_device *adev;
+ u32 flags = 0;
+
+ adev = xcp_mgr->adev;
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ if (mode == AMDGPU_AUTO_COMPUTE_PARTITION_MODE) {
+ mode = __aqua_vanjaram_get_auto_mode(xcp_mgr);
+ if (mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) {
+ dev_err(adev->dev,
+ "Invalid config, no compatible compute partition mode found, available memory partitions: %d",
+ adev->gmc.num_mem_partitions);
+ return -EINVAL;
+ }
+ } else if (!__aqua_vanjaram_is_valid_mode(xcp_mgr, mode)) {
+ dev_err(adev->dev,
+ "Invalid compute partition mode requested, requested: %s, available memory partitions: %d",
+ amdgpu_gfx_compute_mode_desc(mode), adev->gmc.num_mem_partitions);
+ return -EINVAL;
+ }
+
+ if (adev->kfd.init_complete && !amdgpu_in_reset(adev) &&
+ !adev->in_suspend)
+ flags |= AMDGPU_XCP_OPS_KFD;
+
+ if (flags & AMDGPU_XCP_OPS_KFD) {
+ ret = amdgpu_amdkfd_check_and_lock_kfd(adev);
+ if (ret)
+ goto out;
+ }
+
+ ret = amdgpu_xcp_pre_partition_switch(xcp_mgr, flags);
+ if (ret)
+ goto unlock;
+
+ num_xcc_per_xcp = __aqua_vanjaram_get_xcc_per_xcp(xcp_mgr, mode);
+ if (adev->gfx.funcs->switch_partition_mode)
+ adev->gfx.funcs->switch_partition_mode(xcp_mgr->adev,
+ num_xcc_per_xcp);
+
+ /* Init info about new xcps */
+ *num_xcps = num_xcc / num_xcc_per_xcp;
+ amdgpu_xcp_init(xcp_mgr, *num_xcps, mode);
+
+ ret = amdgpu_xcp_post_partition_switch(xcp_mgr, flags);
+ if (!ret)
+ __aqua_vanjaram_update_available_partition_mode(xcp_mgr);
+unlock:
+ if (flags & AMDGPU_XCP_OPS_KFD)
+ amdgpu_amdkfd_unlock_kfd(adev);
+out:
+ return ret;
+}
+
+static int __aqua_vanjaram_get_xcp_mem_id(struct amdgpu_device *adev,
+ int xcc_id, uint8_t *mem_id)
+{
+ /* memory/spatial modes validation check is already done */
+ *mem_id = xcc_id / adev->gfx.num_xcc_per_xcp;
+ *mem_id /= adev->xcp_mgr->num_xcp_per_mem_partition;
+
+ return 0;
+}
+
+static int aqua_vanjaram_get_xcp_mem_id(struct amdgpu_xcp_mgr *xcp_mgr,
+ struct amdgpu_xcp *xcp, uint8_t *mem_id)
+{
+ struct amdgpu_numa_info numa_info;
+ struct amdgpu_device *adev;
+ uint32_t xcc_mask;
+ int r, i, xcc_id;
+
+ adev = xcp_mgr->adev;
+ /* TODO: BIOS is not returning the right info now
+ * Check on this later
+ */
+ /*
+ if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+ mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ */
+ if (adev->gmc.num_mem_partitions == 1) {
+ /* Only one range */
+ *mem_id = 0;
+ return 0;
+ }
+
+ r = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &xcc_mask);
+ if (r || !xcc_mask)
+ return -EINVAL;
+
+ xcc_id = ffs(xcc_mask) - 1;
+ if (!adev->gmc.is_app_apu)
+ return __aqua_vanjaram_get_xcp_mem_id(adev, xcc_id, mem_id);
+
+ r = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info);
+
+ if (r)
+ return r;
+
+ r = -EINVAL;
+ for (i = 0; i < adev->gmc.num_mem_partitions; ++i) {
+ if (adev->gmc.mem_partitions[i].numa.node == numa_info.nid) {
+ *mem_id = i;
+ r = 0;
+ break;
+ }
+ }
+
+ return r;
+}
+
+static int aqua_vanjaram_get_xcp_ip_details(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
+ enum AMDGPU_XCP_IP_BLOCK ip_id,
+ struct amdgpu_xcp_ip *ip)
+{
+ if (!ip)
+ return -EINVAL;
+
+ return __aqua_vanjaram_get_xcp_ip_info(xcp_mgr, xcp_id, ip_id, ip);
+}
+
+struct amdgpu_xcp_mgr_funcs aqua_vanjaram_xcp_funcs = {
+ .switch_partition_mode = &aqua_vanjaram_switch_partition_mode,
+ .query_partition_mode = &aqua_vanjaram_query_partition_mode,
+ .get_ip_details = &aqua_vanjaram_get_xcp_ip_details,
+ .get_xcp_res_info = &aqua_vanjaram_get_xcp_res_info,
+ .get_xcp_mem_id = &aqua_vanjaram_get_xcp_mem_id,
+};
+
+static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev)
+{
+ int ret;
+
+ if (amdgpu_sriov_vf(adev))
+ aqua_vanjaram_xcp_funcs.switch_partition_mode = NULL;
+
+ ret = amdgpu_xcp_mgr_init(adev, AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE, 1,
+ &aqua_vanjaram_xcp_funcs);
+ if (ret)
+ return ret;
+
+ amdgpu_xcp_update_supported_modes(adev->xcp_mgr);
+ /* TODO: Default memory node affinity init */
+
+ return ret;
+}
+
+int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev)
+{
+ u32 mask, avail_inst, inst_mask = adev->sdma.sdma_mask;
+ int ret, i;
+
+ /* generally 1 AID supports 4 instances */
+ adev->sdma.num_inst_per_aid = 4;
+ adev->sdma.num_instances = NUM_SDMA(adev->sdma.sdma_mask);
+
+ adev->aid_mask = i = 1;
+ inst_mask >>= adev->sdma.num_inst_per_aid;
+
+ for (mask = (1 << adev->sdma.num_inst_per_aid) - 1; inst_mask;
+ inst_mask >>= adev->sdma.num_inst_per_aid, ++i) {
+ avail_inst = inst_mask & mask;
+ if (avail_inst == mask || avail_inst == 0x3 ||
+ avail_inst == 0xc)
+ adev->aid_mask |= (1 << i);
+ }
+
+ /* Harvest config is not used for aqua vanjaram. VCN and JPEGs will be
+ * addressed based on logical instance ids.
+ */
+ adev->vcn.harvest_config = 0;
+ adev->vcn.num_inst_per_aid = 1;
+ adev->vcn.num_vcn_inst = hweight32(adev->vcn.inst_mask);
+ adev->jpeg.harvest_config = 0;
+ adev->jpeg.num_inst_per_aid = 1;
+ adev->jpeg.num_jpeg_inst = hweight32(adev->jpeg.inst_mask);
+
+ ret = aqua_vanjaram_xcp_mgr_init(adev);
+ if (ret)
+ return ret;
+
+ amdgpu_ip_map_init(adev);
+
+ return 0;
+}
+
+static void aqua_read_smn(struct amdgpu_device *adev,
+ struct amdgpu_smn_reg_data *regdata,
+ uint64_t smn_addr)
+{
+ regdata->addr = smn_addr;
+ regdata->value = RREG32_PCIE(smn_addr);
+}
+
+struct aqua_reg_list {
+ uint64_t start_addr;
+ uint32_t num_regs;
+ uint32_t incrx;
+};
+
+#define DW_ADDR_INCR 4
+
+static void aqua_read_smn_ext(struct amdgpu_device *adev,
+ struct amdgpu_smn_reg_data *regdata,
+ uint64_t smn_addr, int i)
+{
+ regdata->addr =
+ smn_addr + adev->asic_funcs->encode_ext_smn_addressing(i);
+ regdata->value = RREG32_PCIE_EXT(regdata->addr);
+}
+
+#define smnreg_0x1A340218 0x1A340218
+#define smnreg_0x1A3402E4 0x1A3402E4
+#define smnreg_0x1A340294 0x1A340294
+#define smreg_0x1A380088 0x1A380088
+
+#define NUM_PCIE_SMN_REGS 14
+
+static struct aqua_reg_list pcie_reg_addrs[] = {
+ { smnreg_0x1A340218, 1, 0 },
+ { smnreg_0x1A3402E4, 1, 0 },
+ { smnreg_0x1A340294, 6, DW_ADDR_INCR },
+ { smreg_0x1A380088, 6, DW_ADDR_INCR },
+};
+
+static ssize_t aqua_vanjaram_read_pcie_state(struct amdgpu_device *adev,
+ void *buf, size_t max_size)
+{
+ struct amdgpu_reg_state_pcie_v1_0 *pcie_reg_state;
+ uint32_t start_addr, incrx, num_regs, szbuf;
+ struct amdgpu_regs_pcie_v1_0 *pcie_regs;
+ struct amdgpu_smn_reg_data *reg_data;
+ struct pci_dev *us_pdev, *ds_pdev;
+ int aer_cap, r, n;
+
+ if (!buf || !max_size)
+ return -EINVAL;
+
+ pcie_reg_state = (struct amdgpu_reg_state_pcie_v1_0 *)buf;
+
+ szbuf = sizeof(*pcie_reg_state) +
+ amdgpu_reginst_size(1, sizeof(*pcie_regs), NUM_PCIE_SMN_REGS);
+ /* Only one instance of pcie regs */
+ if (max_size < szbuf)
+ return -EOVERFLOW;
+
+ pcie_regs = (struct amdgpu_regs_pcie_v1_0 *)((uint8_t *)buf +
+ sizeof(*pcie_reg_state));
+ pcie_regs->inst_header.instance = 0;
+ pcie_regs->inst_header.state = AMDGPU_INST_S_OK;
+ pcie_regs->inst_header.num_smn_regs = NUM_PCIE_SMN_REGS;
+
+ reg_data = pcie_regs->smn_reg_values;
+
+ for (r = 0; r < ARRAY_SIZE(pcie_reg_addrs); r++) {
+ start_addr = pcie_reg_addrs[r].start_addr;
+ incrx = pcie_reg_addrs[r].incrx;
+ num_regs = pcie_reg_addrs[r].num_regs;
+ for (n = 0; n < num_regs; n++) {
+ aqua_read_smn(adev, reg_data, start_addr + n * incrx);
+ ++reg_data;
+ }
+ }
+
+ ds_pdev = pci_upstream_bridge(adev->pdev);
+ us_pdev = pci_upstream_bridge(ds_pdev);
+
+ pcie_capability_read_word(us_pdev, PCI_EXP_DEVSTA,
+ &pcie_regs->device_status);
+ pcie_capability_read_word(us_pdev, PCI_EXP_LNKSTA,
+ &pcie_regs->link_status);
+
+ aer_cap = pci_find_ext_capability(us_pdev, PCI_EXT_CAP_ID_ERR);
+ if (aer_cap) {
+ pci_read_config_dword(us_pdev, aer_cap + PCI_ERR_COR_STATUS,
+ &pcie_regs->pcie_corr_err_status);
+ pci_read_config_dword(us_pdev, aer_cap + PCI_ERR_UNCOR_STATUS,
+ &pcie_regs->pcie_uncorr_err_status);
+ }
+
+ pci_read_config_dword(us_pdev, PCI_PRIMARY_BUS,
+ &pcie_regs->sub_bus_number_latency);
+
+ pcie_reg_state->common_header.structure_size = szbuf;
+ pcie_reg_state->common_header.format_revision = 1;
+ pcie_reg_state->common_header.content_revision = 0;
+ pcie_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_PCIE;
+ pcie_reg_state->common_header.num_instances = 1;
+
+ return pcie_reg_state->common_header.structure_size;
+}
+
+#define smnreg_0x11A00050 0x11A00050
+#define smnreg_0x11A00180 0x11A00180
+#define smnreg_0x11A00070 0x11A00070
+#define smnreg_0x11A00200 0x11A00200
+#define smnreg_0x11A0020C 0x11A0020C
+#define smnreg_0x11A00210 0x11A00210
+#define smnreg_0x11A00108 0x11A00108
+
+#define XGMI_LINK_REG(smnreg, l) ((smnreg) | (l << 20))
+
+#define NUM_XGMI_SMN_REGS 25
+
+static struct aqua_reg_list xgmi_reg_addrs[] = {
+ { smnreg_0x11A00050, 1, 0 },
+ { smnreg_0x11A00180, 16, DW_ADDR_INCR },
+ { smnreg_0x11A00070, 4, DW_ADDR_INCR },
+ { smnreg_0x11A00200, 1, 0 },
+ { smnreg_0x11A0020C, 1, 0 },
+ { smnreg_0x11A00210, 1, 0 },
+ { smnreg_0x11A00108, 1, 0 },
+};
+
+static ssize_t aqua_vanjaram_read_xgmi_state(struct amdgpu_device *adev,
+ void *buf, size_t max_size)
+{
+ struct amdgpu_reg_state_xgmi_v1_0 *xgmi_reg_state;
+ uint32_t start_addr, incrx, num_regs, szbuf;
+ struct amdgpu_regs_xgmi_v1_0 *xgmi_regs;
+ struct amdgpu_smn_reg_data *reg_data;
+ const int max_xgmi_instances = 8;
+ int inst = 0, i, j, r, n;
+ const int xgmi_inst = 2;
+ void *p;
+
+ if (!buf || !max_size)
+ return -EINVAL;
+
+ xgmi_reg_state = (struct amdgpu_reg_state_xgmi_v1_0 *)buf;
+
+ szbuf = sizeof(*xgmi_reg_state) +
+ amdgpu_reginst_size(max_xgmi_instances, sizeof(*xgmi_regs),
+ NUM_XGMI_SMN_REGS);
+ /* Only one instance of pcie regs */
+ if (max_size < szbuf)
+ return -EOVERFLOW;
+
+ p = &xgmi_reg_state->xgmi_state_regs[0];
+ for_each_inst(i, adev->aid_mask) {
+ for (j = 0; j < xgmi_inst; ++j) {
+ xgmi_regs = (struct amdgpu_regs_xgmi_v1_0 *)p;
+ xgmi_regs->inst_header.instance = inst++;
+
+ xgmi_regs->inst_header.state = AMDGPU_INST_S_OK;
+ xgmi_regs->inst_header.num_smn_regs = NUM_XGMI_SMN_REGS;
+
+ reg_data = xgmi_regs->smn_reg_values;
+
+ for (r = 0; r < ARRAY_SIZE(xgmi_reg_addrs); r++) {
+ start_addr = xgmi_reg_addrs[r].start_addr;
+ incrx = xgmi_reg_addrs[r].incrx;
+ num_regs = xgmi_reg_addrs[r].num_regs;
+
+ for (n = 0; n < num_regs; n++) {
+ aqua_read_smn_ext(
+ adev, reg_data,
+ XGMI_LINK_REG(start_addr, j) +
+ n * incrx,
+ i);
+ ++reg_data;
+ }
+ }
+ p = reg_data;
+ }
+ }
+
+ xgmi_reg_state->common_header.structure_size = szbuf;
+ xgmi_reg_state->common_header.format_revision = 1;
+ xgmi_reg_state->common_header.content_revision = 0;
+ xgmi_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_XGMI;
+ xgmi_reg_state->common_header.num_instances = max_xgmi_instances;
+
+ return xgmi_reg_state->common_header.structure_size;
+}
+
+#define smnreg_0x11C00070 0x11C00070
+#define smnreg_0x11C00210 0x11C00210
+
+static struct aqua_reg_list wafl_reg_addrs[] = {
+ { smnreg_0x11C00070, 4, DW_ADDR_INCR },
+ { smnreg_0x11C00210, 1, 0 },
+};
+
+#define WAFL_LINK_REG(smnreg, l) ((smnreg) | (l << 20))
+
+#define NUM_WAFL_SMN_REGS 5
+
+static ssize_t aqua_vanjaram_read_wafl_state(struct amdgpu_device *adev,
+ void *buf, size_t max_size)
+{
+ struct amdgpu_reg_state_wafl_v1_0 *wafl_reg_state;
+ uint32_t start_addr, incrx, num_regs, szbuf;
+ struct amdgpu_regs_wafl_v1_0 *wafl_regs;
+ struct amdgpu_smn_reg_data *reg_data;
+ const int max_wafl_instances = 8;
+ int inst = 0, i, j, r, n;
+ const int wafl_inst = 2;
+ void *p;
+
+ if (!buf || !max_size)
+ return -EINVAL;
+
+ wafl_reg_state = (struct amdgpu_reg_state_wafl_v1_0 *)buf;
+
+ szbuf = sizeof(*wafl_reg_state) +
+ amdgpu_reginst_size(max_wafl_instances, sizeof(*wafl_regs),
+ NUM_WAFL_SMN_REGS);
+
+ if (max_size < szbuf)
+ return -EOVERFLOW;
+
+ p = &wafl_reg_state->wafl_state_regs[0];
+ for_each_inst(i, adev->aid_mask) {
+ for (j = 0; j < wafl_inst; ++j) {
+ wafl_regs = (struct amdgpu_regs_wafl_v1_0 *)p;
+ wafl_regs->inst_header.instance = inst++;
+
+ wafl_regs->inst_header.state = AMDGPU_INST_S_OK;
+ wafl_regs->inst_header.num_smn_regs = NUM_WAFL_SMN_REGS;
+
+ reg_data = wafl_regs->smn_reg_values;
+
+ for (r = 0; r < ARRAY_SIZE(wafl_reg_addrs); r++) {
+ start_addr = wafl_reg_addrs[r].start_addr;
+ incrx = wafl_reg_addrs[r].incrx;
+ num_regs = wafl_reg_addrs[r].num_regs;
+ for (n = 0; n < num_regs; n++) {
+ aqua_read_smn_ext(
+ adev, reg_data,
+ WAFL_LINK_REG(start_addr, j) +
+ n * incrx,
+ i);
+ ++reg_data;
+ }
+ }
+ p = reg_data;
+ }
+ }
+
+ wafl_reg_state->common_header.structure_size = szbuf;
+ wafl_reg_state->common_header.format_revision = 1;
+ wafl_reg_state->common_header.content_revision = 0;
+ wafl_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_WAFL;
+ wafl_reg_state->common_header.num_instances = max_wafl_instances;
+
+ return wafl_reg_state->common_header.structure_size;
+}
+
+#define smnreg_0x1B311060 0x1B311060
+#define smnreg_0x1B411060 0x1B411060
+#define smnreg_0x1B511060 0x1B511060
+#define smnreg_0x1B611060 0x1B611060
+
+#define smnreg_0x1C307120 0x1C307120
+#define smnreg_0x1C317120 0x1C317120
+
+#define smnreg_0x1C320830 0x1C320830
+#define smnreg_0x1C380830 0x1C380830
+#define smnreg_0x1C3D0830 0x1C3D0830
+#define smnreg_0x1C420830 0x1C420830
+
+#define smnreg_0x1C320100 0x1C320100
+#define smnreg_0x1C380100 0x1C380100
+#define smnreg_0x1C3D0100 0x1C3D0100
+#define smnreg_0x1C420100 0x1C420100
+
+#define smnreg_0x1B310500 0x1B310500
+#define smnreg_0x1C300400 0x1C300400
+
+#define USR_CAKE_INCR 0x11000
+#define USR_LINK_INCR 0x100000
+#define USR_CP_INCR 0x10000
+
+#define NUM_USR_SMN_REGS 20
+
+struct aqua_reg_list usr_reg_addrs[] = {
+ { smnreg_0x1B311060, 4, DW_ADDR_INCR },
+ { smnreg_0x1B411060, 4, DW_ADDR_INCR },
+ { smnreg_0x1B511060, 4, DW_ADDR_INCR },
+ { smnreg_0x1B611060, 4, DW_ADDR_INCR },
+ { smnreg_0x1C307120, 2, DW_ADDR_INCR },
+ { smnreg_0x1C317120, 2, DW_ADDR_INCR },
+};
+
+#define NUM_USR1_SMN_REGS 46
+struct aqua_reg_list usr1_reg_addrs[] = {
+ { smnreg_0x1C320830, 6, USR_CAKE_INCR },
+ { smnreg_0x1C380830, 5, USR_CAKE_INCR },
+ { smnreg_0x1C3D0830, 5, USR_CAKE_INCR },
+ { smnreg_0x1C420830, 4, USR_CAKE_INCR },
+ { smnreg_0x1C320100, 6, USR_CAKE_INCR },
+ { smnreg_0x1C380100, 5, USR_CAKE_INCR },
+ { smnreg_0x1C3D0100, 5, USR_CAKE_INCR },
+ { smnreg_0x1C420100, 4, USR_CAKE_INCR },
+ { smnreg_0x1B310500, 4, USR_LINK_INCR },
+ { smnreg_0x1C300400, 2, USR_CP_INCR },
+};
+
+static ssize_t aqua_vanjaram_read_usr_state(struct amdgpu_device *adev,
+ void *buf, size_t max_size,
+ int reg_state)
+{
+ uint32_t start_addr, incrx, num_regs, szbuf, num_smn;
+ struct amdgpu_reg_state_usr_v1_0 *usr_reg_state;
+ struct amdgpu_regs_usr_v1_0 *usr_regs;
+ struct amdgpu_smn_reg_data *reg_data;
+ const int max_usr_instances = 4;
+ struct aqua_reg_list *reg_addrs;
+ int inst = 0, i, n, r, arr_size;
+ void *p;
+
+ if (!buf || !max_size)
+ return -EINVAL;
+
+ switch (reg_state) {
+ case AMDGPU_REG_STATE_TYPE_USR:
+ arr_size = ARRAY_SIZE(usr_reg_addrs);
+ reg_addrs = usr_reg_addrs;
+ num_smn = NUM_USR_SMN_REGS;
+ break;
+ case AMDGPU_REG_STATE_TYPE_USR_1:
+ arr_size = ARRAY_SIZE(usr1_reg_addrs);
+ reg_addrs = usr1_reg_addrs;
+ num_smn = NUM_USR1_SMN_REGS;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ usr_reg_state = (struct amdgpu_reg_state_usr_v1_0 *)buf;
+
+ szbuf = sizeof(*usr_reg_state) + amdgpu_reginst_size(max_usr_instances,
+ sizeof(*usr_regs),
+ num_smn);
+ if (max_size < szbuf)
+ return -EOVERFLOW;
+
+ p = &usr_reg_state->usr_state_regs[0];
+ for_each_inst(i, adev->aid_mask) {
+ usr_regs = (struct amdgpu_regs_usr_v1_0 *)p;
+ usr_regs->inst_header.instance = inst++;
+ usr_regs->inst_header.state = AMDGPU_INST_S_OK;
+ usr_regs->inst_header.num_smn_regs = num_smn;
+ reg_data = usr_regs->smn_reg_values;
+
+ for (r = 0; r < arr_size; r++) {
+ start_addr = reg_addrs[r].start_addr;
+ incrx = reg_addrs[r].incrx;
+ num_regs = reg_addrs[r].num_regs;
+ for (n = 0; n < num_regs; n++) {
+ aqua_read_smn_ext(adev, reg_data,
+ start_addr + n * incrx, i);
+ reg_data++;
+ }
+ }
+ p = reg_data;
+ }
+
+ usr_reg_state->common_header.structure_size = szbuf;
+ usr_reg_state->common_header.format_revision = 1;
+ usr_reg_state->common_header.content_revision = 0;
+ usr_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_USR;
+ usr_reg_state->common_header.num_instances = max_usr_instances;
+
+ return usr_reg_state->common_header.structure_size;
+}
+
+ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev,
+ enum amdgpu_reg_state reg_state, void *buf,
+ size_t max_size)
+{
+ ssize_t size;
+
+ switch (reg_state) {
+ case AMDGPU_REG_STATE_TYPE_PCIE:
+ size = aqua_vanjaram_read_pcie_state(adev, buf, max_size);
+ break;
+ case AMDGPU_REG_STATE_TYPE_XGMI:
+ size = aqua_vanjaram_read_xgmi_state(adev, buf, max_size);
+ break;
+ case AMDGPU_REG_STATE_TYPE_WAFL:
+ size = aqua_vanjaram_read_wafl_state(adev, buf, max_size);
+ break;
+ case AMDGPU_REG_STATE_TYPE_USR:
+ size = aqua_vanjaram_read_usr_state(adev, buf, max_size,
+ AMDGPU_REG_STATE_TYPE_USR);
+ break;
+ case AMDGPU_REG_STATE_TYPE_USR_1:
+ size = aqua_vanjaram_read_usr_state(
+ adev, buf, max_size, AMDGPU_REG_STATE_TYPE_USR_1);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return size;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c
index 3ea557864320..42f4e163e251 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c
@@ -68,12 +68,13 @@ int athub_v1_0_set_clockgating(struct amdgpu_device *adev,
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->asic_type) {
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_RAVEN:
- case CHIP_RENOIR:
+ switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+ case IP_VERSION(9, 0, 0):
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 0):
+ case IP_VERSION(9, 3, 0):
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(1, 5, 0):
athub_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
athub_update_medium_grain_light_sleep(adev,
@@ -86,7 +87,7 @@ int athub_v1_0_set_clockgating(struct amdgpu_device *adev,
return 0;
}
-void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
int data;
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h
index b279af59e34f..6be0a6704ea7 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h
@@ -25,6 +25,6 @@
int athub_v1_0_set_clockgating(struct amdgpu_device *adev,
enum amd_clockgating_state state);
-void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags);
+void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
index ab6a07e5e8c4..5a122f50a6e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
@@ -77,7 +77,8 @@ int athub_v2_0_set_clockgating(struct amdgpu_device *adev,
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[ATHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+ case IP_VERSION(1, 3, 1):
case IP_VERSION(2, 0, 0):
case IP_VERSION(2, 0, 2):
athub_v2_0_update_medium_grain_clock_gating(adev,
@@ -92,7 +93,7 @@ int athub_v2_0_set_clockgating(struct amdgpu_device *adev,
return 0;
}
-void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
int data;
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h
index 02932c1c8bab..8b763f6dfd81 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h
@@ -25,6 +25,6 @@
int athub_v2_0_set_clockgating(struct amdgpu_device *adev,
enum amd_clockgating_state state);
-void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags);
+void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c
index 2edefd10e56c..e143fcc46148 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c
@@ -70,10 +70,11 @@ int athub_v2_1_set_clockgating(struct amdgpu_device *adev,
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[ATHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
case IP_VERSION(2, 1, 0):
case IP_VERSION(2, 1, 1):
case IP_VERSION(2, 1, 2):
+ case IP_VERSION(2, 4, 0):
athub_v2_1_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE);
athub_v2_1_update_medium_grain_light_sleep(adev, state == AMD_CG_STATE_GATE);
break;
@@ -84,7 +85,7 @@ int athub_v2_1_set_clockgating(struct amdgpu_device *adev,
return 0;
}
-void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
int data;
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h
index 5e6824c0f591..b799f14bce03 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h
@@ -25,6 +25,6 @@
int athub_v2_1_set_clockgating(struct amdgpu_device *adev,
enum amd_clockgating_state state);
-void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u32 *flags);
+void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u64 *flags);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
new file mode 100644
index 000000000000..d1bba9c64e16
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "athub_v3_0.h"
+#include "athub/athub_3_0_0_offset.h"
+#include "athub/athub_3_0_0_sh_mask.h"
+#include "navi10_enum.h"
+#include "soc15_common.h"
+
+#define regATHUB_MISC_CNTL_V3_0_1 0x00d7
+#define regATHUB_MISC_CNTL_V3_0_1_BASE_IDX 0
+#define regATHUB_MISC_CNTL_V3_3_0 0x00d8
+#define regATHUB_MISC_CNTL_V3_3_0_BASE_IDX 0
+
+
+static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+ case IP_VERSION(3, 0, 1):
+ data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1);
+ break;
+ case IP_VERSION(3, 3, 0):
+ data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_3_0);
+ break;
+ default:
+ data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+ break;
+ }
+ return data;
+}
+
+static void athub_v3_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data)
+{
+ switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+ case IP_VERSION(3, 0, 1):
+ WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1, data);
+ break;
+ case IP_VERSION(3, 3, 0):
+ WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_3_0, data);
+ break;
+ default:
+ WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
+ break;
+ }
+}
+
+static void
+athub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = athub_v3_0_get_cg_cntl(adev);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_MGCG))
+ data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+ else
+ data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+
+ if (def != data)
+ athub_v3_0_set_cg_cntl(adev, data);
+}
+
+static void
+athub_v3_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = athub_v3_0_get_cg_cntl(adev);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_LS))
+ data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+ else
+ data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ athub_v3_0_set_cg_cntl(adev, data);
+}
+
+int athub_v3_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+ case IP_VERSION(3, 0, 0):
+ case IP_VERSION(3, 0, 1):
+ case IP_VERSION(3, 0, 2):
+ case IP_VERSION(3, 3, 0):
+ athub_v3_0_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ athub_v3_0_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+void athub_v3_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ int data;
+
+ /* AMD_CG_SUPPORT_ATHUB_MGCG */
+ data = athub_v3_0_get_cg_cntl(adev);
+ if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_ATHUB_MGCG;
+
+ /* AMD_CG_SUPPORT_ATHUB_LS */
+ if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_ATHUB_LS;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.h
new file mode 100644
index 000000000000..e08a7d564365
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __ATHUB_V3_0_H__
+#define __ATHUB_V3_0_H__
+
+int athub_v3_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state);
+void athub_v3_0_get_clockgating(struct amdgpu_device *adev, u64 *flags);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.c
new file mode 100644
index 000000000000..8a0773b80864
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "athub_v4_1_0.h"
+#include "athub/athub_4_1_0_offset.h"
+#include "athub/athub_4_1_0_sh_mask.h"
+#include "soc15_common.h"
+
+static uint32_t athub_v4_1_0_get_cg_cntl(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+ case IP_VERSION(4, 1, 0):
+ data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+ break;
+ default:
+ data = 0;
+ break;
+ }
+ return data;
+}
+
+static void athub_v4_1_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data)
+{
+ switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+ case IP_VERSION(4, 1, 0):
+ WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+athub_v4_1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = athub_v4_1_0_get_cg_cntl(adev);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_MGCG))
+ data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+ else
+ data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+
+ if (def != data)
+ athub_v4_1_0_set_cg_cntl(adev, data);
+}
+
+static void
+athub_v4_1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = athub_v4_1_0_get_cg_cntl(adev);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_LS))
+ data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+ else
+ data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ athub_v4_1_0_set_cg_cntl(adev, data);
+}
+
+int athub_v4_1_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+ case IP_VERSION(4, 1, 0):
+ athub_v4_1_0_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ athub_v4_1_0_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+void athub_v4_1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ int data;
+
+ /* AMD_CG_SUPPORT_ATHUB_MGCG */
+ data = athub_v4_1_0_get_cg_cntl(adev);
+ if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_ATHUB_MGCG;
+
+ /* AMD_CG_SUPPORT_ATHUB_LS */
+ if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_ATHUB_LS;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.h
new file mode 100644
index 000000000000..4d18d0998fa8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __ATHUB_V4_1_0_H__
+#define __ATHUB_V4_1_0_H__
+
+int athub_v4_1_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state);
+void athub_v4_1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c
index 6fa2229b7229..7a063e44d429 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.c
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@@ -25,7 +25,9 @@
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/slab.h>
-#include <asm/unaligned.h>
+#include <linux/string_helpers.h>
+
+#include <linux/unaligned.h>
#include <drm/drm_util.h>
@@ -60,6 +62,7 @@
typedef struct {
struct atom_context *ctx;
uint32_t *ps, *ws;
+ int ps_size, ws_size;
int ps_shift;
uint16_t start;
unsigned last_jump;
@@ -68,8 +71,8 @@ typedef struct {
} atom_exec_context;
int amdgpu_atom_debug;
-static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params);
-int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params);
+static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params, int params_size);
+int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params, int params_size);
static uint32_t atom_arg_mask[8] =
{ 0xFFFFFFFF, 0xFFFF, 0xFFFF00, 0xFFFF0000, 0xFF, 0xFF00, 0xFF0000,
@@ -221,7 +224,10 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr,
(*ptr)++;
/* get_unaligned_le32 avoids unaligned accesses from atombios
* tables, noticed on a DEC Alpha. */
- val = get_unaligned_le32((u32 *)&ctx->ps[idx]);
+ if (idx < ctx->ps_size)
+ val = get_unaligned_le32((u32 *)&ctx->ps[idx]);
+ else
+ pr_info("PS index out of range: %i > %i\n", idx, ctx->ps_size);
if (print)
DEBUG("PS[0x%02X,0x%04X]", idx, val);
break;
@@ -259,7 +265,10 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr,
val = gctx->reg_block;
break;
default:
- val = ctx->ws[idx];
+ if (idx < ctx->ws_size)
+ val = ctx->ws[idx];
+ else
+ pr_info("WS index out of range: %i > %i\n", idx, ctx->ws_size);
}
break;
case ATOM_ARG_ID:
@@ -292,7 +301,7 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr,
(*ptr) += 4;
if (print)
DEBUG("IMM 0x%08X\n", val);
- return val;
+ break;
case ATOM_SRC_WORD0:
case ATOM_SRC_WORD8:
case ATOM_SRC_WORD16:
@@ -300,7 +309,7 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr,
(*ptr) += 2;
if (print)
DEBUG("IMM 0x%04X\n", val);
- return val;
+ break;
case ATOM_SRC_BYTE0:
case ATOM_SRC_BYTE8:
case ATOM_SRC_BYTE16:
@@ -309,9 +318,9 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr,
(*ptr)++;
if (print)
DEBUG("IMM 0x%02X\n", val);
- return val;
+ break;
}
- return 0;
+ return val;
case ATOM_ARG_PLL:
idx = U8(*ptr);
(*ptr)++;
@@ -393,7 +402,6 @@ static void atom_skip_src_int(atom_exec_context *ctx, uint8_t attr, int *ptr)
(*ptr)++;
return;
}
- return;
}
}
@@ -494,6 +502,10 @@ static void atom_put_dst(atom_exec_context *ctx, int arg, uint8_t attr,
idx = U8(*ptr);
(*ptr)++;
DEBUG("PS[0x%02X]", idx);
+ if (idx >= ctx->ps_size) {
+ pr_info("PS index out of range: %i > %i\n", idx, ctx->ps_size);
+ return;
+ }
ctx->ps[idx] = cpu_to_le32(val);
break;
case ATOM_ARG_WS:
@@ -526,6 +538,10 @@ static void atom_put_dst(atom_exec_context *ctx, int arg, uint8_t attr,
gctx->reg_block = val;
break;
default:
+ if (idx >= ctx->ws_size) {
+ pr_info("WS index out of range: %i > %i\n", idx, ctx->ws_size);
+ return;
+ }
ctx->ws[idx] = val;
}
break;
@@ -623,7 +639,7 @@ static void atom_op_calltable(atom_exec_context *ctx, int *ptr, int arg)
else
SDEBUG(" table: %d\n", idx);
if (U16(ctx->ctx->cmd_table + 4 + 2 * idx))
- r = amdgpu_atom_execute_table_locked(ctx->ctx, idx, ctx->ps + ctx->ps_shift);
+ r = amdgpu_atom_execute_table_locked(ctx->ctx, idx, ctx->ps + ctx->ps_shift, ctx->ps_size - ctx->ps_shift);
if (r) {
ctx->abort = true;
}
@@ -740,7 +756,7 @@ static void atom_op_jump(atom_exec_context *ctx, int *ptr, int arg)
break;
}
if (arg != ATOM_COND_ALWAYS)
- SDEBUG(" taken: %s\n", execute ? "yes" : "no");
+ SDEBUG(" taken: %s\n", str_yes_no(execute));
SDEBUG(" target: 0x%04X\n", target);
if (execute) {
if (ctx->last_jump == (ctx->start + target)) {
@@ -1202,7 +1218,7 @@ static struct {
atom_op_div32, ATOM_ARG_WS},
};
-static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params)
+static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params, int params_size)
{
int base = CU16(ctx->cmd_table + 4 + 2 * index);
int len, ws, ps, ptr;
@@ -1224,12 +1240,21 @@ static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index,
ectx.ps_shift = ps / 4;
ectx.start = base;
ectx.ps = params;
+ ectx.ps_size = params_size;
ectx.abort = false;
ectx.last_jump = 0;
- if (ws)
+ ectx.last_jump_jiffies = 0;
+ if (ws) {
ectx.ws = kcalloc(4, ws, GFP_KERNEL);
- else
+ if (!ectx.ws) {
+ ret = -ENOMEM;
+ goto free;
+ }
+ ectx.ws_size = ws;
+ } else {
ectx.ws = NULL;
+ ectx.ws_size = 0;
+ }
debug_depth++;
while (1) {
@@ -1263,7 +1288,7 @@ free:
return ret;
}
-int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params)
+int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params, int params_size)
{
int r;
@@ -1279,7 +1304,7 @@ int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *par
/* reset divmul */
ctx->divmul[0] = 0;
ctx->divmul[1] = 0;
- r = amdgpu_atom_execute_table_locked(ctx, index, params);
+ r = amdgpu_atom_execute_table_locked(ctx, index, params, params_size);
mutex_unlock(&ctx->mutex);
return r;
}
@@ -1423,6 +1448,7 @@ static void atom_get_vbios_pn(struct atom_context *ctx)
if (vbios_str == NULL)
vbios_str += sizeof(BIOS_ATOM_PREFIX) - 1;
}
+ OPTIMIZER_HIDE_VAR(vbios_str);
if (vbios_str != NULL && *vbios_str == 0)
vbios_str++;
@@ -1436,14 +1462,33 @@ static void atom_get_vbios_pn(struct atom_context *ctx)
ctx->vbios_pn[count] = 0;
}
+
+ pr_info("ATOM BIOS: %s\n", ctx->vbios_pn);
}
static void atom_get_vbios_version(struct atom_context *ctx)
{
+ unsigned short start = 3, end;
unsigned char *vbios_ver;
+ unsigned char *p_rom;
+
+ p_rom = ctx->bios;
+ /* Search from strings offset if it's present */
+ start = *(unsigned short *)(p_rom +
+ OFFSET_TO_GET_ATOMBIOS_STRING_START);
+
+ /* Search till atom rom header start point */
+ end = *(unsigned short *)(p_rom + OFFSET_TO_ATOM_ROM_HEADER_POINTER);
+
+ /* Use hardcoded offsets, if the offsets are not populated */
+ if (end <= start) {
+ start = 3;
+ end = 1024;
+ }
/* find anchor ATOMBIOSBK-AMD */
- vbios_ver = atom_find_str_in_rom(ctx, BIOS_VERSION_PREFIX, 3, 1024, 64);
+ vbios_ver =
+ atom_find_str_in_rom(ctx, BIOS_VERSION_PREFIX, start, end, 64);
if (vbios_ver != NULL) {
/* skip ATOMBIOSBK-AMD VER */
vbios_ver += 18;
@@ -1453,16 +1498,36 @@ static void atom_get_vbios_version(struct atom_context *ctx)
}
}
+static void atom_get_vbios_build(struct atom_context *ctx)
+{
+ unsigned char *atom_rom_hdr;
+ unsigned char *str;
+ uint16_t base, len;
+
+ base = CU16(ATOM_ROM_TABLE_PTR);
+ atom_rom_hdr = CSTR(base);
+
+ str = CSTR(CU16(base + ATOM_ROM_CFG_PTR));
+ /* Skip config string */
+ while (str < atom_rom_hdr && *str++)
+ ;
+ /* Skip change list string */
+ while (str < atom_rom_hdr && *str++)
+ ;
+
+ len = min(atom_rom_hdr - str, STRLEN_NORMAL);
+ if (len)
+ strscpy(ctx->build_num, str, len);
+}
+
struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios)
{
int base;
struct atom_context *ctx =
kzalloc(sizeof(struct atom_context), GFP_KERNEL);
- char *str;
struct _ATOM_ROM_HEADER *atom_rom_header;
struct _ATOM_MASTER_DATA_TABLE *master_table;
struct _ATOM_FIRMWARE_INFO *atom_fw_info;
- u16 idx;
if (!ctx)
return NULL;
@@ -1500,16 +1565,6 @@ struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios)
return NULL;
}
- idx = CU16(ATOM_ROM_PART_NUMBER_PTR);
- if (idx == 0)
- idx = 0x80;
-
- str = CSTR(idx);
- if (*str != '\0') {
- pr_info("ATOM BIOS: %s\n", str);
- strlcpy(ctx->vbios_version, str, sizeof(ctx->vbios_version));
- }
-
atom_rom_header = (struct _ATOM_ROM_HEADER *)CSTR(base);
if (atom_rom_header->usMasterDataTableOffset != 0) {
master_table = (struct _ATOM_MASTER_DATA_TABLE *)
@@ -1525,6 +1580,7 @@ struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios)
atom_get_vbios_pn(ctx);
atom_get_vbios_date(ctx);
atom_get_vbios_version(ctx);
+ atom_get_vbios_build(ctx);
return ctx;
}
@@ -1544,7 +1600,7 @@ int amdgpu_atom_asic_init(struct atom_context *ctx)
if (!CU16(ctx->cmd_table + 4 + 2 * ATOM_CMD_INIT))
return 1;
- ret = amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, ps);
+ ret = amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, ps, 16);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.h b/drivers/gpu/drm/amd/amdgpu/atom.h
index 0c1839824520..825ff28731f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.h
+++ b/drivers/gpu/drm/amd/amdgpu/atom.h
@@ -33,11 +33,11 @@ struct drm_device;
#define ATOM_ATI_MAGIC_PTR 0x30
#define ATOM_ATI_MAGIC " 761295520"
#define ATOM_ROM_TABLE_PTR 0x48
-#define ATOM_ROM_PART_NUMBER_PTR 0x6E
#define ATOM_ROM_MAGIC "ATOM"
#define ATOM_ROM_MAGIC_PTR 4
+#define ATOM_ROM_CFG_PTR 0xC
#define ATOM_ROM_MSG_PTR 0x10
#define ATOM_ROM_CMD_PTR 0x1E
#define ATOM_ROM_DATA_PTR 0x20
@@ -118,12 +118,15 @@ struct drm_device;
struct card_info {
struct drm_device *dev;
- void (* reg_write)(struct card_info *, uint32_t, uint32_t); /* filled by driver */
- uint32_t (* reg_read)(struct card_info *, uint32_t); /* filled by driver */
- void (* mc_write)(struct card_info *, uint32_t, uint32_t); /* filled by driver */
- uint32_t (* mc_read)(struct card_info *, uint32_t); /* filled by driver */
- void (* pll_write)(struct card_info *, uint32_t, uint32_t); /* filled by driver */
- uint32_t (* pll_read)(struct card_info *, uint32_t); /* filled by driver */
+ void (*reg_write)(struct card_info *info,
+ u32 reg, uint32_t val); /* filled by driver */
+ uint32_t (*reg_read)(struct card_info *info, uint32_t reg); /* filled by driver */
+ void (*mc_write)(struct card_info *info,
+ u32 reg, uint32_t val); /* filled by driver */
+ uint32_t (*mc_read)(struct card_info *info, uint32_t reg); /* filled by driver */
+ void (*pll_write)(struct card_info *info,
+ u32 reg, uint32_t val); /* filled by driver */
+ uint32_t (*pll_read)(struct card_info *info, uint32_t reg); /* filled by driver */
};
struct atom_context {
@@ -143,21 +146,21 @@ struct atom_context {
int io_mode;
uint32_t *scratch;
int scratch_size_bytes;
- char vbios_version[20];
uint8_t name[STRLEN_LONG];
uint8_t vbios_pn[STRLEN_LONG];
uint32_t version;
uint8_t vbios_ver_str[STRLEN_NORMAL];
uint8_t date[STRLEN_NORMAL];
+ uint8_t build_num[STRLEN_NORMAL];
};
extern int amdgpu_atom_debug;
-struct atom_context *amdgpu_atom_parse(struct card_info *, void *);
-int amdgpu_atom_execute_table(struct atom_context *, int, uint32_t *);
-int amdgpu_atom_asic_init(struct atom_context *);
-void amdgpu_atom_destroy(struct atom_context *);
+struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios);
+int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params, int params_size);
+int amdgpu_atom_asic_init(struct atom_context *ctx);
+void amdgpu_atom_destroy(struct atom_context *ctx);
bool amdgpu_atom_parse_data_header(struct atom_context *ctx, int index, uint16_t *size,
uint8_t *frev, uint8_t *crev, uint16_t *data_start);
bool amdgpu_atom_parse_cmd_header(struct atom_context *ctx, int index,
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
index afad094f84c2..3dfc28840a7d 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
@@ -24,7 +24,6 @@
* Alex Deucher
*/
-#include <drm/drm_crtc_helper.h>
#include <drm/amdgpu_drm.h>
#include <drm/drm_fixed.h>
#include "amdgpu.h"
@@ -78,7 +77,7 @@ void amdgpu_atombios_crtc_overscan_setup(struct drm_crtc *crtc,
args.usOverscanTop = cpu_to_le16(amdgpu_crtc->v_border);
break;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
void amdgpu_atombios_crtc_scaler_setup(struct drm_crtc *crtc)
@@ -107,7 +106,7 @@ void amdgpu_atombios_crtc_scaler_setup(struct drm_crtc *crtc)
args.ucEnable = ATOM_SCALER_DISABLE;
break;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
void amdgpu_atombios_crtc_lock(struct drm_crtc *crtc, int lock)
@@ -124,7 +123,7 @@ void amdgpu_atombios_crtc_lock(struct drm_crtc *crtc, int lock)
args.ucCRTC = amdgpu_crtc->crtc_id;
args.ucEnable = lock;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
void amdgpu_atombios_crtc_enable(struct drm_crtc *crtc, int state)
@@ -140,7 +139,7 @@ void amdgpu_atombios_crtc_enable(struct drm_crtc *crtc, int state)
args.ucCRTC = amdgpu_crtc->crtc_id;
args.ucEnable = state;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
void amdgpu_atombios_crtc_blank(struct drm_crtc *crtc, int state)
@@ -156,7 +155,7 @@ void amdgpu_atombios_crtc_blank(struct drm_crtc *crtc, int state)
args.ucCRTC = amdgpu_crtc->crtc_id;
args.ucBlanking = state;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state)
@@ -172,7 +171,7 @@ void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state)
args.ucDispPipeId = amdgpu_crtc->crtc_id;
args.ucEnable = state;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev)
@@ -184,7 +183,7 @@ void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev)
args.ucEnable = ATOM_INIT;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
void amdgpu_atombios_crtc_set_dtd_timing(struct drm_crtc *crtc,
@@ -229,7 +228,7 @@ void amdgpu_atombios_crtc_set_dtd_timing(struct drm_crtc *crtc,
args.susModeMiscInfo.usAccess = cpu_to_le16(misc);
args.ucCRTC = amdgpu_crtc->crtc_id;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
union atom_enable_ss {
@@ -294,7 +293,7 @@ static void amdgpu_atombios_crtc_program_ss(struct amdgpu_device *adev,
args.v3.usSpreadSpectrumStep = cpu_to_le16(ss->step);
args.v3.ucEnable = enable;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
union adjust_pixel_clock {
@@ -396,7 +395,7 @@ static u32 amdgpu_atombios_crtc_adjust_pll(struct drm_crtc *crtc,
ADJUST_DISPLAY_CONFIG_SS_ENABLE;
amdgpu_atom_execute_table(adev->mode_info.atom_context,
- index, (uint32_t *)&args);
+ index, (uint32_t *)&args, sizeof(args));
adjusted_clock = le16_to_cpu(args.v1.usPixelClock) * 10;
break;
case 3:
@@ -429,7 +428,7 @@ static u32 amdgpu_atombios_crtc_adjust_pll(struct drm_crtc *crtc,
args.v3.sInput.ucExtTransmitterID = 0;
amdgpu_atom_execute_table(adev->mode_info.atom_context,
- index, (uint32_t *)&args);
+ index, (uint32_t *)&args, sizeof(args));
adjusted_clock = le32_to_cpu(args.v3.sOutput.ulDispPllFreq) * 10;
if (args.v3.sOutput.ucRefDiv) {
amdgpu_crtc->pll_flags |= AMDGPU_PLL_USE_FRAC_FB_DIV;
@@ -515,7 +514,7 @@ void amdgpu_atombios_crtc_set_disp_eng_pll(struct amdgpu_device *adev,
DRM_ERROR("Unknown table version %d %d\n", frev, crev);
return;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
union set_dce_clock {
@@ -545,7 +544,7 @@ u32 amdgpu_atombios_crtc_set_dce_clock(struct amdgpu_device *adev,
args.v2_1.asParam.ulDCEClkFreq = cpu_to_le32(freq); /* 10kHz units */
args.v2_1.asParam.ucDCEClkType = clk_type;
args.v2_1.asParam.ucDCEClkSrc = clk_src;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
ret_freq = le32_to_cpu(args.v2_1.asParam.ulDCEClkFreq) * 10;
break;
default:
@@ -741,7 +740,7 @@ void amdgpu_atombios_crtc_program_pll(struct drm_crtc *crtc,
return;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
int amdgpu_atombios_crtc_prepare_pll(struct drm_crtc *crtc,
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
index f327becb022f..492813ab1b54 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
@@ -26,6 +26,8 @@
*/
#include <drm/amdgpu_drm.h>
+#include <drm/display/drm_dp_helper.h>
+
#include "amdgpu.h"
#include "atom.h"
@@ -34,7 +36,6 @@
#include "atombios_dp.h"
#include "amdgpu_connectors.h"
#include "amdgpu_atombios.h"
-#include <drm/drm_dp_helper.h>
/* move these to drm_dp_helper.c/h */
#define DP_LINK_CONFIGURATION_SIZE 9
@@ -82,7 +83,7 @@ static int amdgpu_atombios_dp_process_aux_ch(struct amdgpu_i2c_chan *chan,
args.v2.ucDelay = delay / 10;
args.v2.ucHPD_ID = chan->rec.hpd;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
*ack = args.v2.ucReplyStatus;
@@ -300,7 +301,7 @@ static u8 amdgpu_atombios_dp_encoder_service(struct amdgpu_device *adev,
args.ucLaneNum = lane_num;
args.ucStatus = 0;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
return args.ucStatus;
}
@@ -429,7 +430,7 @@ void amdgpu_atombios_dp_set_link_config(struct drm_connector *connector,
}
int amdgpu_atombios_dp_mode_valid_helper(struct drm_connector *connector,
- struct drm_display_mode *mode)
+ const struct drm_display_mode *mode)
{
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
struct amdgpu_connector_atom_dig *dig_connector;
@@ -457,8 +458,8 @@ bool amdgpu_atombios_dp_needs_link_train(struct amdgpu_connector *amdgpu_connect
u8 link_status[DP_LINK_STATUS_SIZE];
struct amdgpu_connector_atom_dig *dig = amdgpu_connector->con_priv;
- if (drm_dp_dpcd_read_link_status(&amdgpu_connector->ddc_bus->aux, link_status)
- <= 0)
+ if (drm_dp_dpcd_read_link_status(&amdgpu_connector->ddc_bus->aux,
+ link_status) < 0)
return false;
if (drm_dp_channel_eq_ok(link_status, dig->dp_lane_count))
return false;
@@ -615,7 +616,7 @@ amdgpu_atombios_dp_link_train_cr(struct amdgpu_atombios_dp_link_train_info *dp_i
drm_dp_link_train_clock_recovery_delay(dp_info->aux, dp_info->dpcd);
if (drm_dp_dpcd_read_link_status(dp_info->aux,
- dp_info->link_status) <= 0) {
+ dp_info->link_status) < 0) {
DRM_ERROR("displayport link status failed\n");
break;
}
@@ -680,7 +681,7 @@ amdgpu_atombios_dp_link_train_ce(struct amdgpu_atombios_dp_link_train_info *dp_i
drm_dp_link_train_channel_eq_delay(dp_info->aux, dp_info->dpcd);
if (drm_dp_dpcd_read_link_status(dp_info->aux,
- dp_info->link_status) <= 0) {
+ dp_info->link_status) < 0) {
DRM_ERROR("displayport link status failed\n");
break;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.h b/drivers/gpu/drm/amd/amdgpu/atombios_dp.h
index f59d85eaddf0..3e24acf8133f 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.h
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.h
@@ -32,7 +32,7 @@ int amdgpu_atombios_dp_get_panel_mode(struct drm_encoder *encoder,
void amdgpu_atombios_dp_set_link_config(struct drm_connector *connector,
const struct drm_display_mode *mode);
int amdgpu_atombios_dp_mode_valid_helper(struct drm_connector *connector,
- struct drm_display_mode *mode);
+ const struct drm_display_mode *mode);
bool amdgpu_atombios_dp_needs_link_train(struct amdgpu_connector *amdgpu_connector);
void amdgpu_atombios_dp_set_rx_power_state(struct drm_connector *connector,
u8 power_state);
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
index 6134ed964027..a51f3414b65d 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
@@ -26,7 +26,9 @@
#include <linux/pci.h>
-#include <drm/drm_crtc_helper.h>
+#include <acpi/video.h>
+
+#include <drm/drm_edid.h>
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "amdgpu_connectors.h"
@@ -118,8 +120,6 @@ amdgpu_atombios_encoder_set_backlight_level(struct amdgpu_encoder *amdgpu_encode
}
}
-#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
-
static u8 amdgpu_atombios_encoder_backlight_level(struct backlight_device *bd)
{
u8 level;
@@ -184,7 +184,12 @@ void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *amdgpu_encode
return;
if (!(adev->mode_info.firmware_flags & ATOM_BIOS_INFO_BL_CONTROLLED_BY_GPU))
- return;
+ goto register_acpi_backlight;
+
+ if (!acpi_video_backlight_use_native()) {
+ drm_info(dev, "Skipping amdgpu atom DIG backlight registration\n");
+ goto register_acpi_backlight;
+ }
pdata = kmalloc(sizeof(struct amdgpu_backlight_privdata), GFP_KERNEL);
if (!pdata) {
@@ -210,7 +215,7 @@ void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *amdgpu_encode
dig->bl_dev = bd;
bd->props.brightness = amdgpu_atombios_encoder_get_backlight_brightness(bd);
- bd->props.power = FB_BLANK_UNBLANK;
+ bd->props.power = BACKLIGHT_POWER_ON;
backlight_update_status(bd);
DRM_INFO("amdgpu atom DIG backlight initialized\n");
@@ -220,6 +225,10 @@ void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *amdgpu_encode
error:
kfree(pdata);
return;
+
+register_acpi_backlight:
+ /* Try registering an ACPI video backlight device instead. */
+ acpi_video_register_backlight();
}
void
@@ -251,18 +260,6 @@ amdgpu_atombios_encoder_fini_backlight(struct amdgpu_encoder *amdgpu_encoder)
}
}
-#else /* !CONFIG_BACKLIGHT_CLASS_DEVICE */
-
-void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *encoder)
-{
-}
-
-void amdgpu_atombios_encoder_fini_backlight(struct amdgpu_encoder *encoder)
-{
-}
-
-#endif
-
bool amdgpu_atombios_encoder_is_digital(struct drm_encoder *encoder)
{
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
@@ -338,7 +335,7 @@ amdgpu_atombios_encoder_setup_dac(struct drm_encoder *encoder, int action)
args.ucDacStandard = ATOM_DAC1_PS2;
args.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
@@ -435,7 +432,7 @@ amdgpu_atombios_encoder_setup_dvo(struct drm_encoder *encoder, int action)
break;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder)
@@ -469,7 +466,7 @@ int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder)
if (amdgpu_connector->use_digital &&
(amdgpu_connector->audio == AMDGPU_AUDIO_ENABLE))
return ATOM_ENCODER_MODE_HDMI;
- else if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector)) &&
+ else if (connector->display_info.is_hdmi &&
(amdgpu_connector->audio == AMDGPU_AUDIO_AUTO))
return ATOM_ENCODER_MODE_HDMI;
else if (amdgpu_connector->use_digital)
@@ -488,7 +485,7 @@ int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder)
if (amdgpu_audio != 0) {
if (amdgpu_connector->audio == AMDGPU_AUDIO_ENABLE)
return ATOM_ENCODER_MODE_HDMI;
- else if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector)) &&
+ else if (connector->display_info.is_hdmi &&
(amdgpu_connector->audio == AMDGPU_AUDIO_AUTO))
return ATOM_ENCODER_MODE_HDMI;
else
@@ -506,7 +503,7 @@ int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder)
} else if (amdgpu_audio != 0) {
if (amdgpu_connector->audio == AMDGPU_AUDIO_ENABLE)
return ATOM_ENCODER_MODE_HDMI;
- else if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector)) &&
+ else if (connector->display_info.is_hdmi &&
(amdgpu_connector->audio == AMDGPU_AUDIO_AUTO))
return ATOM_ENCODER_MODE_HDMI;
else
@@ -735,7 +732,7 @@ amdgpu_atombios_encoder_setup_dig_encoder(struct drm_encoder *encoder,
break;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
@@ -765,7 +762,6 @@ amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int a
int dp_clock = 0;
int dp_lane_count = 0;
int connector_object_id = 0;
- int igp_lane_info = 0;
int dig_encoder = dig->dig_encoder;
int hpd_id = AMDGPU_HPD_NONE;
@@ -848,26 +844,6 @@ amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int a
else
args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_DIG1_ENCODER;
- if ((adev->flags & AMD_IS_APU) &&
- (amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_UNIPHY)) {
- if (is_dp ||
- !amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock)) {
- if (igp_lane_info & 0x1)
- args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_0_3;
- else if (igp_lane_info & 0x2)
- args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_4_7;
- else if (igp_lane_info & 0x4)
- args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_8_11;
- else if (igp_lane_info & 0x8)
- args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_12_15;
- } else {
- if (igp_lane_info & 0x3)
- args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_0_7;
- else if (igp_lane_info & 0xc)
- args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_8_15;
- }
- }
-
if (dig->linkb)
args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LINKB;
else
@@ -1160,7 +1136,7 @@ amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int a
break;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
bool
@@ -1188,7 +1164,7 @@ amdgpu_atombios_encoder_set_edp_panel_power(struct drm_connector *connector,
args.v1.ucAction = action;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
/* wait for the panel to power up */
if (action == ATOM_TRANSMITTER_ACTION_POWER_ON) {
@@ -1312,7 +1288,7 @@ amdgpu_atombios_encoder_setup_external_encoder(struct drm_encoder *encoder,
DRM_ERROR("Unknown table version: %d, %d\n", frev, crev);
return;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
static void
@@ -1657,7 +1633,7 @@ amdgpu_atombios_encoder_set_crtc_source(struct drm_encoder *encoder)
return;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
/* This only needs to be called once at startup */
@@ -1730,7 +1706,7 @@ amdgpu_atombios_encoder_dac_load_detect(struct drm_encoder *encoder,
args.sDacload.ucMisc = DAC_LOAD_MISC_YPrPb;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
return true;
} else
@@ -2088,24 +2064,25 @@ amdgpu_atombios_encoder_get_lcd_info(struct amdgpu_encoder *encoder)
case LCD_FAKE_EDID_PATCH_RECORD_TYPE:
fake_edid_record = (ATOM_FAKE_EDID_PATCH_RECORD *)record;
if (fake_edid_record->ucFakeEDIDLength) {
- struct edid *edid;
- int edid_size =
- max((int)EDID_LENGTH, (int)fake_edid_record->ucFakeEDIDLength);
- edid = kmalloc(edid_size, GFP_KERNEL);
- if (edid) {
- memcpy((u8 *)edid, (u8 *)&fake_edid_record->ucFakeEDIDString[0],
- fake_edid_record->ucFakeEDIDLength);
-
- if (drm_edid_is_valid(edid)) {
- adev->mode_info.bios_hardcoded_edid = edid;
- adev->mode_info.bios_hardcoded_edid_size = edid_size;
- } else
- kfree(edid);
- }
+ const struct drm_edid *edid;
+ int edid_size;
+
+ if (fake_edid_record->ucFakeEDIDLength == 128)
+ edid_size = fake_edid_record->ucFakeEDIDLength;
+ else
+ edid_size = fake_edid_record->ucFakeEDIDLength * 128;
+ edid = drm_edid_alloc(fake_edid_record->ucFakeEDIDString, edid_size);
+ if (drm_edid_valid(edid))
+ adev->mode_info.bios_hardcoded_edid = edid;
+ else
+ drm_edid_free(edid);
+ record += struct_size(fake_edid_record,
+ ucFakeEDIDString,
+ edid_size);
+ } else {
+ /* empty fake edid record must be 3 bytes long */
+ record += sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1;
}
- record += fake_edid_record->ucFakeEDIDLength ?
- fake_edid_record->ucFakeEDIDLength + 2 :
- sizeof(ATOM_FAKE_EDID_PATCH_RECORD);
break;
case LCD_PANEL_RESOLUTION_RECORD_TYPE:
panel_res_record = (ATOM_PANEL_RESOLUTION_PATCH_RECORD *)record;
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
index af0335535f82..a6501114322f 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
@@ -86,7 +86,7 @@ static int amdgpu_atombios_i2c_process_i2c_ch(struct amdgpu_i2c_chan *chan,
args.ucSlaveAddr = slave_addr << 1;
args.ucLineNumber = chan->rec.i2c_id;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
/* error */
if (args.ucStatus != HW_ASSISTED_I2C_STATUS_SUCCESS) {
@@ -172,5 +172,5 @@ void amdgpu_atombios_i2c_channel_trans(struct amdgpu_device *adev, u8 slave_addr
args.ucSlaveAddr = slave_addr;
args.ucLineNumber = line_number;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 54f28c075f21..9cd63b4177bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1141,12 +1141,12 @@ static uint32_t cik_get_register_value(struct amdgpu_device *adev,
mutex_lock(&adev->grbm_idx_mutex);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
val = RREG32(reg_offset);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
return val;
} else {
@@ -1375,14 +1375,14 @@ static int cik_asic_pci_config_reset(struct amdgpu_device *adev)
return r;
}
-static bool cik_asic_supports_baco(struct amdgpu_device *adev)
+static int cik_asic_supports_baco(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
case CHIP_BONAIRE:
case CHIP_HAWAII:
return amdgpu_dpm_is_baco_supported(adev);
default:
- return false;
+ return 0;
}
}
@@ -1428,6 +1428,10 @@ static int cik_asic_reset(struct amdgpu_device *adev)
{
int r;
+ /* APUs don't have full asic reset */
+ if (adev->flags & AMD_IS_APU)
+ return 0;
+
if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
dev_info(adev->dev, "BACO reset\n");
r = amdgpu_dpm_baco_reset(adev);
@@ -1570,17 +1574,8 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
u16 bridge_cfg2, gpu_cfg2;
u32 max_lw, current_lw, tmp;
- pcie_capability_read_word(root, PCI_EXP_LNKCTL,
- &bridge_cfg);
- pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL,
- &gpu_cfg);
-
- tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
- pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
-
- tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
- pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL,
- tmp16);
+ pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
+ pcie_capability_set_word(adev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
tmp = RREG32_PCIE(ixPCIE_LC_STATUS1);
max_lw = (tmp & PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >>
@@ -1633,45 +1628,28 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
msleep(100);
/* linkctl */
- pcie_capability_read_word(root, PCI_EXP_LNKCTL,
- &tmp16);
- tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
- tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
- pcie_capability_write_word(root, PCI_EXP_LNKCTL,
- tmp16);
-
- pcie_capability_read_word(adev->pdev,
- PCI_EXP_LNKCTL,
- &tmp16);
- tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
- tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
- pcie_capability_write_word(adev->pdev,
- PCI_EXP_LNKCTL,
- tmp16);
+ pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_HAWD,
+ bridge_cfg &
+ PCI_EXP_LNKCTL_HAWD);
+ pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_HAWD,
+ gpu_cfg &
+ PCI_EXP_LNKCTL_HAWD);
/* linkctl2 */
- pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
- &tmp16);
- tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
- PCI_EXP_LNKCTL2_TX_MARGIN);
- tmp16 |= (bridge_cfg2 &
- (PCI_EXP_LNKCTL2_ENTER_COMP |
- PCI_EXP_LNKCTL2_TX_MARGIN));
- pcie_capability_write_word(root,
- PCI_EXP_LNKCTL2,
- tmp16);
-
- pcie_capability_read_word(adev->pdev,
- PCI_EXP_LNKCTL2,
- &tmp16);
- tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
- PCI_EXP_LNKCTL2_TX_MARGIN);
- tmp16 |= (gpu_cfg2 &
- (PCI_EXP_LNKCTL2_ENTER_COMP |
- PCI_EXP_LNKCTL2_TX_MARGIN));
- pcie_capability_write_word(adev->pdev,
- PCI_EXP_LNKCTL2,
- tmp16);
+ pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL2,
+ PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN,
+ bridge_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2,
+ PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN,
+ gpu_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
tmp = RREG32_PCIE(ixPCIE_LC_CNTL4);
tmp &= ~PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
@@ -1686,16 +1664,15 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
speed_cntl &= ~PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_SW_SPEED_CHANGE_MASK;
WREG32_PCIE(ixPCIE_LC_SPEED_CNTL, speed_cntl);
- pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
-
+ tmp16 = 0;
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
else
tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
- pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16);
+ pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2,
+ PCI_EXP_LNKCTL2_TLS, tmp16);
speed_cntl = RREG32_PCIE(ixPCIE_LC_SPEED_CNTL);
speed_cntl |= PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK;
@@ -1715,16 +1692,12 @@ static void cik_program_aspm(struct amdgpu_device *adev)
bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
bool disable_clkreq = false;
- if (amdgpu_aspm == 0)
+ if (!amdgpu_device_should_use_aspm(adev))
return;
if (pci_is_root_bus(adev->pdev->bus))
return;
- /* XXX double check APUs */
- if (adev->flags & AMD_IS_APU)
- return;
-
orig = data = RREG32_PCIE(ixPCIE_LC_N_FTS_CNTL);
data &= ~PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_MASK;
data |= (0x24 << PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS__SHIFT) |
@@ -2012,9 +1985,9 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
.query_video_codecs = &cik_query_video_codecs,
};
-static int cik_common_early_init(void *handle)
+static int cik_common_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->smc_rreg = &cik_smc_rreg;
adev->smc_wreg = &cik_smc_wreg;
@@ -2151,19 +2124,9 @@ static int cik_common_early_init(void *handle)
return 0;
}
-static int cik_common_sw_init(void *handle)
-{
- return 0;
-}
-
-static int cik_common_sw_fini(void *handle)
-{
- return 0;
-}
-
-static int cik_common_hw_init(void *handle)
+static int cik_common_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* move the golden regs per IP block */
cik_init_golden_registers(adev);
@@ -2175,48 +2138,36 @@ static int cik_common_hw_init(void *handle)
return 0;
}
-static int cik_common_hw_fini(void *handle)
+static int cik_common_hw_fini(struct amdgpu_ip_block *ip_block)
{
return 0;
}
-static int cik_common_suspend(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cik_common_hw_fini(adev);
-}
-
-static int cik_common_resume(void *handle)
+static int cik_common_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cik_common_hw_init(adev);
+ return cik_common_hw_init(ip_block);
}
-static bool cik_common_is_idle(void *handle)
+static bool cik_common_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int cik_common_wait_for_idle(void *handle)
-{
- return 0;
-}
-static int cik_common_soft_reset(void *handle)
+
+static int cik_common_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* XXX hard reset?? */
return 0;
}
-static int cik_common_set_clockgating_state(void *handle,
+static int cik_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int cik_common_set_powergating_state(void *handle,
+static int cik_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -2225,15 +2176,10 @@ static int cik_common_set_powergating_state(void *handle,
static const struct amd_ip_funcs cik_common_ip_funcs = {
.name = "cik_common",
.early_init = cik_common_early_init,
- .late_init = NULL,
- .sw_init = cik_common_sw_init,
- .sw_fini = cik_common_sw_fini,
.hw_init = cik_common_hw_init,
.hw_fini = cik_common_hw_fini,
- .suspend = cik_common_suspend,
.resume = cik_common_resume,
.is_idle = cik_common_is_idle,
- .wait_for_idle = cik_common_wait_for_idle,
.soft_reset = cik_common_soft_reset,
.set_clockgating_state = cik_common_set_clockgating_state,
.set_powergating_state = cik_common_set_powergating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
index df385ffc9768..876a3256dba4 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -156,6 +156,9 @@ static int cik_ih_irq_init(struct amdgpu_device *adev)
/* enable irqs */
cik_ih_enable_interrupts(adev);
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
return 0;
}
@@ -192,6 +195,9 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev,
wptr = le32_to_cpu(*ih->wptr_cpu);
+ if (ih == &adev->irq.ih_soft)
+ goto out;
+
if (wptr & IH_RB_WPTR__RB_OVERFLOW_MASK) {
wptr &= ~IH_RB_WPTR__RB_OVERFLOW_MASK;
/* When a ring buffer overflow happen start parsing interrupt
@@ -204,7 +210,15 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev,
tmp = RREG32(mmIH_RB_CNTL);
tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
WREG32(mmIH_RB_CNTL, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
+ WREG32(mmIH_RB_CNTL, tmp);
}
+
+out:
return (wptr & ih->ptr_mask);
}
@@ -277,9 +291,9 @@ static void cik_ih_set_rptr(struct amdgpu_device *adev,
WREG32(mmIH_RB_RPTR, ih->rptr);
}
-static int cik_ih_early_init(void *handle)
+static int cik_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = amdgpu_irq_add_domain(adev);
@@ -291,23 +305,27 @@ static int cik_ih_early_init(void *handle)
return 0;
}
-static int cik_ih_sw_init(void *handle)
+static int cik_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
if (r)
return r;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
+ if (r)
+ return r;
+
r = amdgpu_irq_init(adev);
return r;
}
-static int cik_ih_sw_fini(void *handle)
+static int cik_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
amdgpu_irq_remove_domain(adev);
@@ -315,39 +333,33 @@ static int cik_ih_sw_fini(void *handle)
return 0;
}
-static int cik_ih_hw_init(void *handle)
+static int cik_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return cik_ih_irq_init(adev);
}
-static int cik_ih_hw_fini(void *handle)
+static int cik_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- cik_ih_irq_disable(adev);
+ cik_ih_irq_disable(ip_block->adev);
return 0;
}
-static int cik_ih_suspend(void *handle)
+static int cik_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cik_ih_hw_fini(adev);
+ return cik_ih_hw_fini(ip_block);
}
-static int cik_ih_resume(void *handle)
+static int cik_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cik_ih_hw_init(adev);
+ return cik_ih_hw_init(ip_block);
}
-static bool cik_ih_is_idle(void *handle)
+static bool cik_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & SRBM_STATUS__IH_BUSY_MASK)
@@ -356,11 +368,11 @@ static bool cik_ih_is_idle(void *handle)
return true;
}
-static int cik_ih_wait_for_idle(void *handle)
+static int cik_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -372,9 +384,9 @@ static int cik_ih_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int cik_ih_soft_reset(void *handle)
+static int cik_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
u32 tmp = RREG32(mmSRBM_STATUS);
@@ -402,13 +414,13 @@ static int cik_ih_soft_reset(void *handle)
return 0;
}
-static int cik_ih_set_clockgating_state(void *handle,
+static int cik_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int cik_ih_set_powergating_state(void *handle,
+static int cik_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -417,7 +429,6 @@ static int cik_ih_set_powergating_state(void *handle,
static const struct amd_ip_funcs cik_ih_ip_funcs = {
.name = "cik_ih",
.early_init = cik_ih_early_init,
- .late_init = NULL,
.sw_init = cik_ih_sw_init,
.sw_fini = cik_ih_sw_fini,
.hw_init = cik_ih_hw_init,
@@ -442,8 +453,7 @@ static void cik_ih_set_interrupt_funcs(struct amdgpu_device *adev)
adev->irq.ih_funcs = &cik_ih_funcs;
}
-const struct amdgpu_ip_block_version cik_ih_ip_block =
-{
+const struct amdgpu_ip_block_version cik_ih_ip_block = {
.type = AMD_IP_BLOCK_TYPE_IH,
.major = 2,
.minor = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index c8ebd108548d..9e8715b4739d 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -54,7 +54,9 @@ static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev);
static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev);
static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev);
static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev);
-static int cik_sdma_soft_reset(void *handle);
+static int cik_sdma_soft_reset(struct amdgpu_ip_block *ip_block);
+
+u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev);
MODULE_FIRMWARE("amdgpu/bonaire_sdma.bin");
MODULE_FIRMWARE("amdgpu/bonaire_sdma1.bin");
@@ -67,16 +69,12 @@ MODULE_FIRMWARE("amdgpu/kabini_sdma1.bin");
MODULE_FIRMWARE("amdgpu/mullins_sdma.bin");
MODULE_FIRMWARE("amdgpu/mullins_sdma1.bin");
-u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev);
-
-
static void cik_sdma_free_microcode(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- release_firmware(adev->sdma.instance[i].fw);
- adev->sdma.instance[i].fw = NULL;
- }
+
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ucode_release(&adev->sdma.instance[i].fw);
}
/*
@@ -108,7 +106,6 @@ static void cik_sdma_free_microcode(struct amdgpu_device *adev)
static int cik_sdma_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
int err = 0, i;
DRM_DEBUG("\n");
@@ -134,21 +131,22 @@ static int cik_sdma_init_microcode(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
if (i == 0)
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sdma.bin", chip_name);
else
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
- err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sdma1.bin", chip_name);
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->sdma.instance[i].fw);
}
out:
if (err) {
- pr_err("cik_sdma: Failed to load firmware \"%s\"\n", fw_name);
- for (i = 0; i < adev->sdma.num_instances; i++) {
- release_firmware(adev->sdma.instance[i].fw);
- adev->sdma.instance[i].fw = NULL;
- }
+ pr_err("cik_sdma: Failed to load firmware \"%s_sdma%s.bin\"\n",
+ chip_name, i == 0 ? "" : "1");
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ucode_release(&adev->sdma.instance[i].fw);
}
return err;
}
@@ -164,7 +162,7 @@ static uint64_t cik_sdma_ring_get_rptr(struct amdgpu_ring *ring)
{
u32 rptr;
- rptr = ring->adev->wb.wb[ring->rptr_offs];
+ rptr = *ring->rptr_cpu_addr;
return (rptr & 0x3fffc) >> 2;
}
@@ -195,7 +193,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
- (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
+ (ring->wptr << 2) & 0x3fffc);
}
static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
@@ -309,15 +307,9 @@ static void cik_sdma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
*/
static void cik_sdma_gfx_stop(struct amdgpu_device *adev)
{
- struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
- struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
u32 rb_cntl;
int i;
- if ((adev->mman.buffer_funcs_ring == sdma0) ||
- (adev->mman.buffer_funcs_ring == sdma1))
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
-
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
rb_cntl &= ~SDMA0_GFX_RB_CNTL__RB_ENABLE_MASK;
@@ -436,12 +428,10 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
struct amdgpu_ring *ring;
u32 rb_cntl, ib_cntl;
u32 rb_bufsz;
- u32 wb_offset;
int i, j, r;
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
- wb_offset = (ring->rptr_offs * 4);
mutex_lock(&adev->srbm_mutex);
for (j = 0; j < 16; j++) {
@@ -477,9 +467,9 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
/* set the wb address whether it's enabled or not */
WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
- upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i],
- ((adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
+ ((ring->rptr_gpu_addr) & 0xFFFFFFFC));
rb_cntl |= SDMA0_GFX_RB_CNTL__RPTR_WRITEBACK_ENABLE_MASK;
@@ -487,7 +477,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
ring->wptr = 0;
- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2);
/* enable DMA RB */
WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i],
@@ -499,8 +489,6 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
#endif
/* enable DMA IBs */
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
-
- ring->sched.ready = true;
}
cik_sdma_enable(adev, true);
@@ -510,9 +498,6 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
@@ -712,7 +697,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -934,12 +919,17 @@ static void cik_enable_sdma_mgls(struct amdgpu_device *adev,
}
}
-static int cik_sdma_early_init(void *handle)
+static int cik_sdma_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
adev->sdma.num_instances = SDMA_MAX_INSTANCE;
+ r = cik_sdma_init_microcode(adev);
+ if (r)
+ return r;
+
cik_sdma_set_ring_funcs(adev);
cik_sdma_set_irq_funcs(adev);
cik_sdma_set_buffer_funcs(adev);
@@ -948,18 +938,12 @@ static int cik_sdma_early_init(void *handle)
return 0;
}
-static int cik_sdma_sw_init(void *handle)
+static int cik_sdma_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r, i;
- r = cik_sdma_init_microcode(adev);
- if (r) {
- DRM_ERROR("Failed to load sdma firmware!\n");
- return r;
- }
-
/* SDMA trap event */
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224,
&adev->sdma.trap_irq);
@@ -994,9 +978,9 @@ static int cik_sdma_sw_init(void *handle)
return r;
}
-static int cik_sdma_sw_fini(void *handle)
+static int cik_sdma_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
@@ -1006,21 +990,16 @@ static int cik_sdma_sw_fini(void *handle)
return 0;
}
-static int cik_sdma_hw_init(void *handle)
+static int cik_sdma_hw_init(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- r = cik_sdma_start(adev);
- if (r)
- return r;
-
- return r;
+ return cik_sdma_start(adev);
}
-static int cik_sdma_hw_fini(void *handle)
+static int cik_sdma_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
cik_ctx_switch_enable(adev, false);
cik_sdma_enable(adev, false);
@@ -1028,25 +1007,21 @@ static int cik_sdma_hw_fini(void *handle)
return 0;
}
-static int cik_sdma_suspend(void *handle)
+static int cik_sdma_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cik_sdma_hw_fini(adev);
+ return cik_sdma_hw_fini(ip_block);
}
-static int cik_sdma_resume(void *handle)
+static int cik_sdma_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ cik_sdma_soft_reset(ip_block);
- cik_sdma_soft_reset(handle);
-
- return cik_sdma_hw_init(adev);
+ return cik_sdma_hw_init(ip_block);
}
-static bool cik_sdma_is_idle(void *handle)
+static bool cik_sdma_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS2);
if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK |
@@ -1056,27 +1031,23 @@ static bool cik_sdma_is_idle(void *handle)
return true;
}
-static int cik_sdma_wait_for_idle(void *handle)
+static int cik_sdma_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK |
- SRBM_STATUS2__SDMA1_BUSY_MASK);
-
- if (!tmp)
+ if (cik_sdma_is_idle(ip_block))
return 0;
udelay(1);
}
return -ETIMEDOUT;
}
-static int cik_sdma_soft_reset(void *handle)
+static int cik_sdma_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp;
/* sdma0 */
@@ -1210,11 +1181,11 @@ static int cik_sdma_process_illegal_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int cik_sdma_set_clockgating_state(void *handle,
+static int cik_sdma_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_CG_STATE_GATE)
gate = true;
@@ -1225,7 +1196,7 @@ static int cik_sdma_set_clockgating_state(void *handle,
return 0;
}
-static int cik_sdma_set_powergating_state(void *handle,
+static int cik_sdma_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -1234,7 +1205,6 @@ static int cik_sdma_set_powergating_state(void *handle,
static const struct amd_ip_funcs cik_sdma_ip_funcs = {
.name = "cik_sdma",
.early_init = cik_sdma_early_init,
- .late_init = NULL,
.sw_init = cik_sdma_sw_init,
.sw_fini = cik_sdma_sw_fini,
.hw_init = cik_sdma_hw_init,
@@ -1308,7 +1278,7 @@ static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: is this a secure operation
+ * @copy_flags: unused
*
* Copy GPU buffers using the DMA engine (CIK).
* Used by the amdgpu ttm implementation to move pages if
@@ -1318,7 +1288,7 @@ static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
ib->ptr[ib->length_dw++] = byte_count;
diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h
index 55982c0064b5..8aca4f2734f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/cikd.h
+++ b/drivers/gpu/drm/amd/amdgpu/cikd.h
@@ -51,8 +51,14 @@
#define HPD4_REGISTER_OFFSET (0x1813 - 0x1807)
#define HPD5_REGISTER_OFFSET (0x1816 - 0x1807)
-#define BONAIRE_GB_ADDR_CONFIG_GOLDEN 0x12010001
-#define HAWAII_GB_ADDR_CONFIG_GOLDEN 0x12011003
+/* audio endpt instance offsets */
+#define AUD0_REGISTER_OFFSET (0x1780 - 0x1780)
+#define AUD1_REGISTER_OFFSET (0x1786 - 0x1780)
+#define AUD2_REGISTER_OFFSET (0x178c - 0x1780)
+#define AUD3_REGISTER_OFFSET (0x1792 - 0x1780)
+#define AUD4_REGISTER_OFFSET (0x1798 - 0x1780)
+#define AUD5_REGISTER_OFFSET (0x179d - 0x1780)
+#define AUD6_REGISTER_OFFSET (0x17a4 - 0x1780)
#define PIPEID(x) ((x) << 0)
#define MEID(x) ((x) << 2)
@@ -364,6 +370,7 @@
* 1 - Stream
* 2 - Bypass
*/
+#define EOP_EXEC (1 << 28) /* For Trailing Fence */
#define DATA_SEL(x) ((x) << 29)
/* 0 - discard
* 1 - send low 32bit data
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h
new file mode 100644
index 000000000000..a8b29d33c464
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h
@@ -0,0 +1,997 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __CLEARSTATE_GFX11_H_
+#define __CLEARSTATE_GFX11_H_
+
+static const unsigned int gfx11_SECT_CONTEXT_def_1[] =
+{
+ 0x00000000, // DB_RENDER_CONTROL
+ 0x00000000, // DB_COUNT_CONTROL
+ 0x00000000, // DB_DEPTH_VIEW
+ 0x00000000, // DB_RENDER_OVERRIDE
+ 0x00000000, // DB_RENDER_OVERRIDE2
+ 0x00000000, // DB_HTILE_DATA_BASE
+ 0, // HOLE
+ 0x00000000, // DB_DEPTH_SIZE_XY
+ 0x00000000, // DB_DEPTH_BOUNDS_MIN
+ 0x00000000, // DB_DEPTH_BOUNDS_MAX
+ 0x00000000, // DB_STENCIL_CLEAR
+ 0x00000000, // DB_DEPTH_CLEAR
+ 0x00000000, // PA_SC_SCREEN_SCISSOR_TL
+ 0x40004000, // PA_SC_SCREEN_SCISSOR_BR
+ 0, // HOLE
+ 0x00000000, // DB_RESERVED_REG_2
+ 0x00000000, // DB_Z_INFO
+ 0x00000000, // DB_STENCIL_INFO
+ 0x00000000, // DB_Z_READ_BASE
+ 0x00000000, // DB_STENCIL_READ_BASE
+ 0x00000000, // DB_Z_WRITE_BASE
+ 0x00000000, // DB_STENCIL_WRITE_BASE
+ 0x00000000, // DB_RESERVED_REG_1
+ 0x00000000, // DB_RESERVED_REG_3
+ 0x00000000, // DB_SPI_VRS_CENTER_LOCATION
+ 0, // HOLE
+ 0x00000000, // DB_Z_READ_BASE_HI
+ 0x00000000, // DB_STENCIL_READ_BASE_HI
+ 0x00000000, // DB_Z_WRITE_BASE_HI
+ 0x00000000, // DB_STENCIL_WRITE_BASE_HI
+ 0x00000000, // DB_HTILE_DATA_BASE_HI
+ 0x00150055, // DB_RMI_L2_CACHE_CONTROL
+ 0x00000000, // TA_BC_BASE_ADDR
+ 0x00000000, // TA_BC_BASE_ADDR_HI
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // COHER_DEST_BASE_HI_0
+ 0x00000000, // COHER_DEST_BASE_HI_1
+ 0x00000000, // COHER_DEST_BASE_HI_2
+ 0x00000000, // COHER_DEST_BASE_HI_3
+ 0x00000000, // COHER_DEST_BASE_2
+ 0x00000000, // COHER_DEST_BASE_3
+ 0x00000000, // PA_SC_WINDOW_OFFSET
+ 0x80000000, // PA_SC_WINDOW_SCISSOR_TL
+ 0x40004000, // PA_SC_WINDOW_SCISSOR_BR
+ 0x0000ffff, // PA_SC_CLIPRECT_RULE
+ 0x00000000, // PA_SC_CLIPRECT_0_TL
+ 0x40004000, // PA_SC_CLIPRECT_0_BR
+ 0x00000000, // PA_SC_CLIPRECT_1_TL
+ 0x40004000, // PA_SC_CLIPRECT_1_BR
+ 0x00000000, // PA_SC_CLIPRECT_2_TL
+ 0x40004000, // PA_SC_CLIPRECT_2_BR
+ 0x00000000, // PA_SC_CLIPRECT_3_TL
+ 0x40004000, // PA_SC_CLIPRECT_3_BR
+ 0xaa99aaaa, // PA_SC_EDGERULE
+ 0x00000000, // PA_SU_HARDWARE_SCREEN_OFFSET
+ 0xffffffff, // CB_TARGET_MASK
+ 0xffffffff, // CB_SHADER_MASK
+ 0x80000000, // PA_SC_GENERIC_SCISSOR_TL
+ 0x40004000, // PA_SC_GENERIC_SCISSOR_BR
+ 0x00000000, // COHER_DEST_BASE_0
+ 0x00000000, // COHER_DEST_BASE_1
+ 0x80000000, // PA_SC_VPORT_SCISSOR_0_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_0_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_1_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_1_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_2_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_2_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_3_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_3_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_4_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_4_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_5_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_5_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_6_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_6_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_7_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_7_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_8_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_8_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_9_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_9_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_10_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_10_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_11_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_11_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_12_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_12_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_13_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_13_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_14_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_14_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_15_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_15_BR
+ 0x00000000, // PA_SC_VPORT_ZMIN_0
+ 0x3f800000, // PA_SC_VPORT_ZMAX_0
+ 0x00000000, // PA_SC_VPORT_ZMIN_1
+ 0x3f800000, // PA_SC_VPORT_ZMAX_1
+ 0x00000000, // PA_SC_VPORT_ZMIN_2
+ 0x3f800000, // PA_SC_VPORT_ZMAX_2
+ 0x00000000, // PA_SC_VPORT_ZMIN_3
+ 0x3f800000, // PA_SC_VPORT_ZMAX_3
+ 0x00000000, // PA_SC_VPORT_ZMIN_4
+ 0x3f800000, // PA_SC_VPORT_ZMAX_4
+ 0x00000000, // PA_SC_VPORT_ZMIN_5
+ 0x3f800000, // PA_SC_VPORT_ZMAX_5
+ 0x00000000, // PA_SC_VPORT_ZMIN_6
+ 0x3f800000, // PA_SC_VPORT_ZMAX_6
+ 0x00000000, // PA_SC_VPORT_ZMIN_7
+ 0x3f800000, // PA_SC_VPORT_ZMAX_7
+ 0x00000000, // PA_SC_VPORT_ZMIN_8
+ 0x3f800000, // PA_SC_VPORT_ZMAX_8
+ 0x00000000, // PA_SC_VPORT_ZMIN_9
+ 0x3f800000, // PA_SC_VPORT_ZMAX_9
+ 0x00000000, // PA_SC_VPORT_ZMIN_10
+ 0x3f800000, // PA_SC_VPORT_ZMAX_10
+ 0x00000000, // PA_SC_VPORT_ZMIN_11
+ 0x3f800000, // PA_SC_VPORT_ZMAX_11
+ 0x00000000, // PA_SC_VPORT_ZMIN_12
+ 0x3f800000, // PA_SC_VPORT_ZMAX_12
+ 0x00000000, // PA_SC_VPORT_ZMIN_13
+ 0x3f800000, // PA_SC_VPORT_ZMAX_13
+ 0x00000000, // PA_SC_VPORT_ZMIN_14
+ 0x3f800000, // PA_SC_VPORT_ZMAX_14
+ 0x00000000, // PA_SC_VPORT_ZMIN_15
+ 0x3f800000, // PA_SC_VPORT_ZMAX_15
+ 0x00000000, // PA_SC_RASTER_CONFIG
+ 0x00000000, // PA_SC_RASTER_CONFIG_1
+ 0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL
+};
+static const unsigned int gfx11_SECT_CONTEXT_def_2[] =
+{
+ 0x00000000, // CP_PERFMON_CNTX_CNTL
+ 0x00000000, // CP_PIPEID
+ 0x00000000, // CP_VMID
+ 0x00000000, // CONTEXT_RESERVED_REG0
+ 0x00000000, // CONTEXT_RESERVED_REG1
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // PA_SC_FSR_EN
+ 0x00000000, // PA_SC_FSR_FBW_RECURSIONS_X
+ 0x00000000, // PA_SC_FSR_FBW_RECURSIONS_Y
+ 0, // HOLE
+ 0x00000000, // PA_SC_VRS_OVERRIDE_CNTL
+ 0x00000000, // PA_SC_VRS_RATE_FEEDBACK_BASE
+ 0x00000000, // PA_SC_VRS_RATE_FEEDBACK_BASE_EXT
+ 0x00000000, // PA_SC_VRS_RATE_FEEDBACK_SIZE_XY
+ 0x00000000, // PA_SC_BINNER_OUTPUT_TIMEOUT_CNTL
+ 0x00000000, // PA_SC_VRS_RATE_CACHE_CNTL
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // PA_SC_VRS_RATE_BASE
+ 0x00000000, // PA_SC_VRS_RATE_BASE_EXT
+ 0x00000000, // PA_SC_VRS_RATE_SIZE_XY
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX
+ 0x00550055, // CB_RMI_GL2_CACHE_CONTROL
+ 0x00000000, // CB_BLEND_RED
+ 0x00000000, // CB_BLEND_GREEN
+ 0x00000000, // CB_BLEND_BLUE
+ 0x00000000, // CB_BLEND_ALPHA
+ 0x00000000, // CB_FDCC_CONTROL
+ 0x00000000, // CB_COVERAGE_OUT_CONTROL
+ 0x00000000, // DB_STENCIL_CONTROL
+ 0x01000000, // DB_STENCILREFMASK
+ 0x01000000, // DB_STENCILREFMASK_BF
+ 0, // HOLE
+ 0x00000000, // PA_CL_VPORT_XSCALE
+ 0x00000000, // PA_CL_VPORT_XOFFSET
+ 0x00000000, // PA_CL_VPORT_YSCALE
+ 0x00000000, // PA_CL_VPORT_YOFFSET
+ 0x00000000, // PA_CL_VPORT_ZSCALE
+ 0x00000000, // PA_CL_VPORT_ZOFFSET
+ 0x00000000, // PA_CL_VPORT_XSCALE_1
+ 0x00000000, // PA_CL_VPORT_XOFFSET_1
+ 0x00000000, // PA_CL_VPORT_YSCALE_1
+ 0x00000000, // PA_CL_VPORT_YOFFSET_1
+ 0x00000000, // PA_CL_VPORT_ZSCALE_1
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_1
+ 0x00000000, // PA_CL_VPORT_XSCALE_2
+ 0x00000000, // PA_CL_VPORT_XOFFSET_2
+ 0x00000000, // PA_CL_VPORT_YSCALE_2
+ 0x00000000, // PA_CL_VPORT_YOFFSET_2
+ 0x00000000, // PA_CL_VPORT_ZSCALE_2
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_2
+ 0x00000000, // PA_CL_VPORT_XSCALE_3
+ 0x00000000, // PA_CL_VPORT_XOFFSET_3
+ 0x00000000, // PA_CL_VPORT_YSCALE_3
+ 0x00000000, // PA_CL_VPORT_YOFFSET_3
+ 0x00000000, // PA_CL_VPORT_ZSCALE_3
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_3
+ 0x00000000, // PA_CL_VPORT_XSCALE_4
+ 0x00000000, // PA_CL_VPORT_XOFFSET_4
+ 0x00000000, // PA_CL_VPORT_YSCALE_4
+ 0x00000000, // PA_CL_VPORT_YOFFSET_4
+ 0x00000000, // PA_CL_VPORT_ZSCALE_4
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_4
+ 0x00000000, // PA_CL_VPORT_XSCALE_5
+ 0x00000000, // PA_CL_VPORT_XOFFSET_5
+ 0x00000000, // PA_CL_VPORT_YSCALE_5
+ 0x00000000, // PA_CL_VPORT_YOFFSET_5
+ 0x00000000, // PA_CL_VPORT_ZSCALE_5
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_5
+ 0x00000000, // PA_CL_VPORT_XSCALE_6
+ 0x00000000, // PA_CL_VPORT_XOFFSET_6
+ 0x00000000, // PA_CL_VPORT_YSCALE_6
+ 0x00000000, // PA_CL_VPORT_YOFFSET_6
+ 0x00000000, // PA_CL_VPORT_ZSCALE_6
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_6
+ 0x00000000, // PA_CL_VPORT_XSCALE_7
+ 0x00000000, // PA_CL_VPORT_XOFFSET_7
+ 0x00000000, // PA_CL_VPORT_YSCALE_7
+ 0x00000000, // PA_CL_VPORT_YOFFSET_7
+ 0x00000000, // PA_CL_VPORT_ZSCALE_7
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_7
+ 0x00000000, // PA_CL_VPORT_XSCALE_8
+ 0x00000000, // PA_CL_VPORT_XOFFSET_8
+ 0x00000000, // PA_CL_VPORT_YSCALE_8
+ 0x00000000, // PA_CL_VPORT_YOFFSET_8
+ 0x00000000, // PA_CL_VPORT_ZSCALE_8
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_8
+ 0x00000000, // PA_CL_VPORT_XSCALE_9
+ 0x00000000, // PA_CL_VPORT_XOFFSET_9
+ 0x00000000, // PA_CL_VPORT_YSCALE_9
+ 0x00000000, // PA_CL_VPORT_YOFFSET_9
+ 0x00000000, // PA_CL_VPORT_ZSCALE_9
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_9
+ 0x00000000, // PA_CL_VPORT_XSCALE_10
+ 0x00000000, // PA_CL_VPORT_XOFFSET_10
+ 0x00000000, // PA_CL_VPORT_YSCALE_10
+ 0x00000000, // PA_CL_VPORT_YOFFSET_10
+ 0x00000000, // PA_CL_VPORT_ZSCALE_10
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_10
+ 0x00000000, // PA_CL_VPORT_XSCALE_11
+ 0x00000000, // PA_CL_VPORT_XOFFSET_11
+ 0x00000000, // PA_CL_VPORT_YSCALE_11
+ 0x00000000, // PA_CL_VPORT_YOFFSET_11
+ 0x00000000, // PA_CL_VPORT_ZSCALE_11
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_11
+ 0x00000000, // PA_CL_VPORT_XSCALE_12
+ 0x00000000, // PA_CL_VPORT_XOFFSET_12
+ 0x00000000, // PA_CL_VPORT_YSCALE_12
+ 0x00000000, // PA_CL_VPORT_YOFFSET_12
+ 0x00000000, // PA_CL_VPORT_ZSCALE_12
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_12
+ 0x00000000, // PA_CL_VPORT_XSCALE_13
+ 0x00000000, // PA_CL_VPORT_XOFFSET_13
+ 0x00000000, // PA_CL_VPORT_YSCALE_13
+ 0x00000000, // PA_CL_VPORT_YOFFSET_13
+ 0x00000000, // PA_CL_VPORT_ZSCALE_13
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_13
+ 0x00000000, // PA_CL_VPORT_XSCALE_14
+ 0x00000000, // PA_CL_VPORT_XOFFSET_14
+ 0x00000000, // PA_CL_VPORT_YSCALE_14
+ 0x00000000, // PA_CL_VPORT_YOFFSET_14
+ 0x00000000, // PA_CL_VPORT_ZSCALE_14
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_14
+ 0x00000000, // PA_CL_VPORT_XSCALE_15
+ 0x00000000, // PA_CL_VPORT_XOFFSET_15
+ 0x00000000, // PA_CL_VPORT_YSCALE_15
+ 0x00000000, // PA_CL_VPORT_YOFFSET_15
+ 0x00000000, // PA_CL_VPORT_ZSCALE_15
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_15
+ 0x00000000, // PA_CL_UCP_0_X
+ 0x00000000, // PA_CL_UCP_0_Y
+ 0x00000000, // PA_CL_UCP_0_Z
+ 0x00000000, // PA_CL_UCP_0_W
+ 0x00000000, // PA_CL_UCP_1_X
+ 0x00000000, // PA_CL_UCP_1_Y
+ 0x00000000, // PA_CL_UCP_1_Z
+ 0x00000000, // PA_CL_UCP_1_W
+ 0x00000000, // PA_CL_UCP_2_X
+ 0x00000000, // PA_CL_UCP_2_Y
+ 0x00000000, // PA_CL_UCP_2_Z
+ 0x00000000, // PA_CL_UCP_2_W
+ 0x00000000, // PA_CL_UCP_3_X
+ 0x00000000, // PA_CL_UCP_3_Y
+ 0x00000000, // PA_CL_UCP_3_Z
+ 0x00000000, // PA_CL_UCP_3_W
+ 0x00000000, // PA_CL_UCP_4_X
+ 0x00000000, // PA_CL_UCP_4_Y
+ 0x00000000, // PA_CL_UCP_4_Z
+ 0x00000000, // PA_CL_UCP_4_W
+ 0x00000000, // PA_CL_UCP_5_X
+ 0x00000000, // PA_CL_UCP_5_Y
+ 0x00000000, // PA_CL_UCP_5_Z
+ 0x00000000, // PA_CL_UCP_5_W
+ 0x00000000, // PA_CL_PROG_NEAR_CLIP_Z
+ 0x00000000, // PA_RATE_CNTL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // SPI_PS_INPUT_CNTL_0
+ 0x00000000, // SPI_PS_INPUT_CNTL_1
+ 0x00000000, // SPI_PS_INPUT_CNTL_2
+ 0x00000000, // SPI_PS_INPUT_CNTL_3
+ 0x00000000, // SPI_PS_INPUT_CNTL_4
+ 0x00000000, // SPI_PS_INPUT_CNTL_5
+ 0x00000000, // SPI_PS_INPUT_CNTL_6
+ 0x00000000, // SPI_PS_INPUT_CNTL_7
+ 0x00000000, // SPI_PS_INPUT_CNTL_8
+ 0x00000000, // SPI_PS_INPUT_CNTL_9
+ 0x00000000, // SPI_PS_INPUT_CNTL_10
+ 0x00000000, // SPI_PS_INPUT_CNTL_11
+ 0x00000000, // SPI_PS_INPUT_CNTL_12
+ 0x00000000, // SPI_PS_INPUT_CNTL_13
+ 0x00000000, // SPI_PS_INPUT_CNTL_14
+ 0x00000000, // SPI_PS_INPUT_CNTL_15
+ 0x00000000, // SPI_PS_INPUT_CNTL_16
+ 0x00000000, // SPI_PS_INPUT_CNTL_17
+ 0x00000000, // SPI_PS_INPUT_CNTL_18
+ 0x00000000, // SPI_PS_INPUT_CNTL_19
+ 0x00000000, // SPI_PS_INPUT_CNTL_20
+ 0x00000000, // SPI_PS_INPUT_CNTL_21
+ 0x00000000, // SPI_PS_INPUT_CNTL_22
+ 0x00000000, // SPI_PS_INPUT_CNTL_23
+ 0x00000000, // SPI_PS_INPUT_CNTL_24
+ 0x00000000, // SPI_PS_INPUT_CNTL_25
+ 0x00000000, // SPI_PS_INPUT_CNTL_26
+ 0x00000000, // SPI_PS_INPUT_CNTL_27
+ 0x00000000, // SPI_PS_INPUT_CNTL_28
+ 0x00000000, // SPI_PS_INPUT_CNTL_29
+ 0x00000000, // SPI_PS_INPUT_CNTL_30
+ 0x00000000, // SPI_PS_INPUT_CNTL_31
+ 0x00000000, // SPI_VS_OUT_CONFIG
+ 0, // HOLE
+ 0x00000000, // SPI_PS_INPUT_ENA
+ 0x00000000, // SPI_PS_INPUT_ADDR
+ 0x00000000, // SPI_INTERP_CONTROL_0
+ 0x00000002, // SPI_PS_IN_CONTROL
+ 0x00000000, // SPI_BARYC_SSAA_CNTL
+ 0x00000000, // SPI_BARYC_CNTL
+ 0, // HOLE
+ 0x00000000, // SPI_TMPRING_SIZE
+ 0x00000000, // SPI_GFX_SCRATCH_BASE_LO
+ 0x00000000, // SPI_GFX_SCRATCH_BASE_HI
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // SPI_SHADER_IDX_FORMAT
+ 0x00000000, // SPI_SHADER_POS_FORMAT
+ 0x00000000, // SPI_SHADER_Z_FORMAT
+ 0x00000000, // SPI_SHADER_COL_FORMAT
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // SX_PS_DOWNCONVERT_CONTROL
+ 0x00000000, // SX_PS_DOWNCONVERT
+ 0x00000000, // SX_BLEND_OPT_EPSILON
+ 0x00000000, // SX_BLEND_OPT_CONTROL
+ 0x00000000, // SX_MRT0_BLEND_OPT
+ 0x00000000, // SX_MRT1_BLEND_OPT
+ 0x00000000, // SX_MRT2_BLEND_OPT
+ 0x00000000, // SX_MRT3_BLEND_OPT
+ 0x00000000, // SX_MRT4_BLEND_OPT
+ 0x00000000, // SX_MRT5_BLEND_OPT
+ 0x00000000, // SX_MRT6_BLEND_OPT
+ 0x00000000, // SX_MRT7_BLEND_OPT
+ 0x00000000, // CB_BLEND0_CONTROL
+ 0x00000000, // CB_BLEND1_CONTROL
+ 0x00000000, // CB_BLEND2_CONTROL
+ 0x00000000, // CB_BLEND3_CONTROL
+ 0x00000000, // CB_BLEND4_CONTROL
+ 0x00000000, // CB_BLEND5_CONTROL
+ 0x00000000, // CB_BLEND6_CONTROL
+ 0x00000000, // CB_BLEND7_CONTROL
+};
+static const unsigned int gfx11_SECT_CONTEXT_def_3[] =
+{
+ 0x00000000, // PA_CL_POINT_X_RAD
+ 0x00000000, // PA_CL_POINT_Y_RAD
+ 0x00000000, // PA_CL_POINT_SIZE
+ 0x00000000, // PA_CL_POINT_CULL_RAD
+};
+static const unsigned int gfx11_SECT_CONTEXT_def_4[] =
+{
+ 0x00000000, // GE_MAX_OUTPUT_PER_SUBGROUP
+ 0x00000000, // DB_DEPTH_CONTROL
+ 0x00000000, // DB_EQAA
+ 0x00000000, // CB_COLOR_CONTROL
+ 0x00000000, // DB_SHADER_CONTROL
+ 0x00090000, // PA_CL_CLIP_CNTL
+ 0x00000004, // PA_SU_SC_MODE_CNTL
+ 0x00000000, // PA_CL_VTE_CNTL
+ 0x00000000, // PA_CL_VS_OUT_CNTL
+ 0x00000000, // PA_CL_NANINF_CNTL
+ 0x00000000, // PA_SU_LINE_STIPPLE_CNTL
+ 0x00000000, // PA_SU_LINE_STIPPLE_SCALE
+ 0x00000000, // PA_SU_PRIM_FILTER_CNTL
+ 0x00000000, // PA_SU_SMALL_PRIM_FILTER_CNTL
+ 0, // HOLE
+ 0x00000000, // PA_CL_NGG_CNTL
+ 0x00000000, // PA_SU_OVER_RASTERIZATION_CNTL
+ 0x00000000, // PA_STEREO_CNTL
+ 0x00000000, // PA_STATE_STEREO_X
+ 0x00000000, // PA_CL_VRS_CNTL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // PA_SU_POINT_SIZE
+ 0x00000000, // PA_SU_POINT_MINMAX
+ 0x00000000, // PA_SU_LINE_CNTL
+ 0x00000000, // PA_SC_LINE_STIPPLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_HOS_MAX_TESS_LEVEL
+ 0x00000000, // VGT_HOS_MIN_TESS_LEVEL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_GS_ONCHIP_CNTL
+ 0x00000000, // PA_SC_MODE_CNTL_0
+ 0x00000000, // PA_SC_MODE_CNTL_1
+ 0x00000000, // VGT_ENHANCE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // IA_ENHANCE
+};
+static const unsigned int gfx11_SECT_CONTEXT_def_5[] =
+{
+ 0x00000000, // WD_ENHANCE
+ 0x00000000, // VGT_PRIMITIVEID_EN
+};
+static const unsigned int gfx11_SECT_CONTEXT_def_6[] =
+{
+ 0x00000000, // VGT_PRIMITIVEID_RESET
+};
+static const unsigned int gfx11_SECT_CONTEXT_def_7[] =
+{
+ 0x00000000, // VGT_DRAW_PAYLOAD_CNTL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_ESGS_RING_ITEMSIZE
+ 0, // HOLE
+ 0x00000000, // VGT_REUSE_OFF
+ 0, // HOLE
+ 0x00000000, // DB_HTILE_SURFACE
+ 0x00000000, // DB_SRESULTS_COMPARE_STATE0
+ 0x00000000, // DB_SRESULTS_COMPARE_STATE1
+ 0x00000000, // DB_PRELOAD_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_OFFSET
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
+ 0, // HOLE
+ 0x00000000, // VGT_GS_MAX_VERT_OUT
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // GE_NGG_SUBGRP_CNTL
+ 0x00000000, // VGT_TESS_DISTRIBUTION
+ 0x00000000, // VGT_SHADER_STAGES_EN
+ 0x00000000, // VGT_LS_HS_CONFIG
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_TF_PARAM
+ 0x00000000, // DB_ALPHA_TO_MASK
+ 0, // HOLE
+ 0x00000000, // PA_SU_POLY_OFFSET_DB_FMT_CNTL
+ 0x00000000, // PA_SU_POLY_OFFSET_CLAMP
+ 0x00000000, // PA_SU_POLY_OFFSET_FRONT_SCALE
+ 0x00000000, // PA_SU_POLY_OFFSET_FRONT_OFFSET
+ 0x00000000, // PA_SU_POLY_OFFSET_BACK_SCALE
+ 0x00000000, // PA_SU_POLY_OFFSET_BACK_OFFSET
+ 0x00000000, // VGT_GS_INSTANCE_CNT
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // PA_SC_CENTROID_PRIORITY_0
+ 0x00000000, // PA_SC_CENTROID_PRIORITY_1
+ 0x00001000, // PA_SC_LINE_CNTL
+ 0x00000000, // PA_SC_AA_CONFIG
+ 0x00000005, // PA_SU_VTX_CNTL
+ 0x3f800000, // PA_CL_GB_VERT_CLIP_ADJ
+ 0x3f800000, // PA_CL_GB_VERT_DISC_ADJ
+ 0x3f800000, // PA_CL_GB_HORZ_CLIP_ADJ
+ 0x3f800000, // PA_CL_GB_HORZ_DISC_ADJ
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3
+ 0xffffffff, // PA_SC_AA_MASK_X0Y0_X1Y0
+ 0xffffffff, // PA_SC_AA_MASK_X0Y1_X1Y1
+ 0x00000000, // PA_SC_SHADER_CONTROL
+ 0x00000003, // PA_SC_BINNER_CNTL_0
+ 0x00000000, // PA_SC_BINNER_CNTL_1
+ 0x00100000, // PA_SC_CONSERVATIVE_RASTERIZATION_CNTL
+ 0x00000000, // PA_SC_NGG_MODE_CNTL
+ 0x00000000, // PA_SC_BINNER_CNTL_2
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR0_BASE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR0_VIEW
+ 0x00000000, // CB_COLOR0_INFO
+ 0x00000000, // CB_COLOR0_ATTRIB
+ 0x00000000, // CB_COLOR0_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR0_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR1_BASE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR1_VIEW
+ 0x00000000, // CB_COLOR1_INFO
+ 0x00000000, // CB_COLOR1_ATTRIB
+ 0x00000000, // CB_COLOR1_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR1_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR2_BASE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR2_VIEW
+ 0x00000000, // CB_COLOR2_INFO
+ 0x00000000, // CB_COLOR2_ATTRIB
+ 0x00000000, // CB_COLOR2_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR2_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR3_BASE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR3_VIEW
+ 0x00000000, // CB_COLOR3_INFO
+ 0x00000000, // CB_COLOR3_ATTRIB
+ 0x00000000, // CB_COLOR3_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR3_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR4_BASE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR4_VIEW
+ 0x00000000, // CB_COLOR4_INFO
+ 0x00000000, // CB_COLOR4_ATTRIB
+ 0x00000000, // CB_COLOR4_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR4_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR5_BASE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR5_VIEW
+ 0x00000000, // CB_COLOR5_INFO
+ 0x00000000, // CB_COLOR5_ATTRIB
+ 0x00000000, // CB_COLOR5_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR5_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR6_BASE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR6_VIEW
+ 0x00000000, // CB_COLOR6_INFO
+ 0x00000000, // CB_COLOR6_ATTRIB
+ 0x00000000, // CB_COLOR6_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR6_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR7_BASE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR7_VIEW
+ 0x00000000, // CB_COLOR7_INFO
+ 0x00000000, // CB_COLOR7_ATTRIB
+ 0x00000000, // CB_COLOR7_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR7_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR0_BASE_EXT
+ 0x00000000, // CB_COLOR1_BASE_EXT
+ 0x00000000, // CB_COLOR2_BASE_EXT
+ 0x00000000, // CB_COLOR3_BASE_EXT
+ 0x00000000, // CB_COLOR4_BASE_EXT
+ 0x00000000, // CB_COLOR5_BASE_EXT
+ 0x00000000, // CB_COLOR6_BASE_EXT
+ 0x00000000, // CB_COLOR7_BASE_EXT
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR0_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR1_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR2_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR3_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR4_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR5_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR6_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR7_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR0_ATTRIB2
+ 0x00000000, // CB_COLOR1_ATTRIB2
+ 0x00000000, // CB_COLOR2_ATTRIB2
+ 0x00000000, // CB_COLOR3_ATTRIB2
+ 0x00000000, // CB_COLOR4_ATTRIB2
+ 0x00000000, // CB_COLOR5_ATTRIB2
+ 0x00000000, // CB_COLOR6_ATTRIB2
+ 0x00000000, // CB_COLOR7_ATTRIB2
+ 0x00000000, // CB_COLOR0_ATTRIB3
+ 0x00000000, // CB_COLOR1_ATTRIB3
+ 0x00000000, // CB_COLOR2_ATTRIB3
+ 0x00000000, // CB_COLOR3_ATTRIB3
+ 0x00000000, // CB_COLOR4_ATTRIB3
+ 0x00000000, // CB_COLOR5_ATTRIB3
+ 0x00000000, // CB_COLOR6_ATTRIB3
+ 0x00000000, // CB_COLOR7_ATTRIB3
+};
+static const struct cs_extent_def gfx11_SECT_CONTEXT_defs[] =
+{
+ {gfx11_SECT_CONTEXT_def_1, 0x0000a000, 215 },
+ {gfx11_SECT_CONTEXT_def_2, 0x0000a0d8, 272 },
+ {gfx11_SECT_CONTEXT_def_3, 0x0000a1f5, 4 },
+ {gfx11_SECT_CONTEXT_def_4, 0x0000a1ff, 158 },
+ {gfx11_SECT_CONTEXT_def_5, 0x0000a2a0, 2 },
+ {gfx11_SECT_CONTEXT_def_6, 0x0000a2a3, 1 },
+ {gfx11_SECT_CONTEXT_def_7, 0x0000a2a6, 282 },
+ { 0, 0, 0 }
+};
+static const struct cs_section_def gfx11_cs_data[] = {
+ { gfx11_SECT_CONTEXT_defs, SECT_CONTEXT },
+ { 0, SECT_NONE }
+};
+
+#endif /* __CLEARSTATE_GFX11_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx12.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx12.h
new file mode 100644
index 000000000000..2f6c9d11d5ae
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx12.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __CLEARSTATE_GFX12_H_
+#define __CLEARSTATE_GFX12_H_
+
+static const unsigned int gfx12_SECT_CONTEXT_def_1[] = {
+0x00000000, //mmSC_MEM_TEMPORAL
+0x00000000, //mmSC_MEM_SPEC_READ
+0x00000000, //mmPA_SC_VPORT_0_TL
+0x00000000, //mmPA_SC_VPORT_0_BR
+0x00000000, //mmPA_SC_VPORT_1_TL
+0x00000000, //mmPA_SC_VPORT_1_BR
+0x00000000, //mmPA_SC_VPORT_2_TL
+0x00000000, //mmPA_SC_VPORT_2_BR
+0x00000000, //mmPA_SC_VPORT_3_TL
+0x00000000, //mmPA_SC_VPORT_3_BR
+0x00000000, //mmPA_SC_VPORT_4_TL
+0x00000000, //mmPA_SC_VPORT_4_BR
+0x00000000, //mmPA_SC_VPORT_5_TL
+0x00000000, //mmPA_SC_VPORT_5_BR
+0x00000000, //mmPA_SC_VPORT_6_TL
+0x00000000, //mmPA_SC_VPORT_6_BR
+0x00000000, //mmPA_SC_VPORT_7_TL
+0x00000000, //mmPA_SC_VPORT_7_BR
+0x00000000, //mmPA_SC_VPORT_8_TL
+0x00000000, //mmPA_SC_VPORT_8_BR
+0x00000000, //mmPA_SC_VPORT_9_TL
+0x00000000, //mmPA_SC_VPORT_9_BR
+0x00000000, //mmPA_SC_VPORT_10_TL
+0x00000000, //mmPA_SC_VPORT_10_BR
+0x00000000, //mmPA_SC_VPORT_11_TL
+0x00000000, //mmPA_SC_VPORT_11_BR
+0x00000000, //mmPA_SC_VPORT_12_TL
+0x00000000, //mmPA_SC_VPORT_12_BR
+0x00000000, //mmPA_SC_VPORT_13_TL
+0x00000000, //mmPA_SC_VPORT_13_BR
+0x00000000, //mmPA_SC_VPORT_14_TL
+0x00000000, //mmPA_SC_VPORT_14_BR
+0x00000000, //mmPA_SC_VPORT_15_TL
+0x00000000, //mmPA_SC_VPORT_15_BR
+};
+
+static const unsigned int gfx12_SECT_CONTEXT_def_2[] = {
+0x00000000, //mmPA_CL_PROG_NEAR_CLIP_Z
+0x00000000, //mmPA_RATE_CNTL
+};
+
+static const unsigned int gfx12_SECT_CONTEXT_def_3[] = {
+0x00000000, //mmCP_PERFMON_CNTX_CNTL
+};
+
+static const unsigned int gfx12_SECT_CONTEXT_def_4[] = {
+0x00000000, //mmCONTEXT_RESERVED_REG0
+0x00000000, //mmCONTEXT_RESERVED_REG1
+0x00000000, //mmPA_SC_CLIPRECT_0_EXT
+0x00000000, //mmPA_SC_CLIPRECT_1_EXT
+0x00000000, //mmPA_SC_CLIPRECT_2_EXT
+0x00000000, //mmPA_SC_CLIPRECT_3_EXT
+};
+
+static const unsigned int gfx12_SECT_CONTEXT_def_5[] = {
+0x00000000, //mmPA_SC_HIZ_INFO
+0x00000000, //mmPA_SC_HIS_INFO
+0x00000000, //mmPA_SC_HIZ_BASE
+0x00000000, //mmPA_SC_HIZ_BASE_EXT
+0x00000000, //mmPA_SC_HIZ_SIZE_XY
+0x00000000, //mmPA_SC_HIS_BASE
+0x00000000, //mmPA_SC_HIS_BASE_EXT
+0x00000000, //mmPA_SC_HIS_SIZE_XY
+0x00000000, //mmPA_SC_BINNER_OUTPUT_TIMEOUT_CNTL
+0x00000000, //mmPA_SC_BINNER_DYNAMIC_BATCH_LIMIT
+0x00000000, //mmPA_SC_HISZ_CONTROL
+};
+
+static const unsigned int gfx12_SECT_CONTEXT_def_6[] = {
+0x00000000, //mmCB_MEM0_INFO
+0x00000000, //mmCB_MEM1_INFO
+0x00000000, //mmCB_MEM2_INFO
+0x00000000, //mmCB_MEM3_INFO
+0x00000000, //mmCB_MEM4_INFO
+0x00000000, //mmCB_MEM5_INFO
+0x00000000, //mmCB_MEM6_INFO
+0x00000000, //mmCB_MEM7_INFO
+};
+
+static const struct cs_extent_def gfx12_SECT_CONTEXT_defs[] = {
+ {gfx12_SECT_CONTEXT_def_1, 0x0000a03e, 34 },
+ {gfx12_SECT_CONTEXT_def_2, 0x0000a0cc, 2 },
+ {gfx12_SECT_CONTEXT_def_3, 0x0000a0d8, 1 },
+ {gfx12_SECT_CONTEXT_def_4, 0x0000a0db, 6 },
+ {gfx12_SECT_CONTEXT_def_5, 0x0000a2e5, 11 },
+ {gfx12_SECT_CONTEXT_def_6, 0x0000a3c0, 8 },
+ { 0, 0, 0 }
+};
+
+static const struct cs_section_def gfx12_cs_data[] = {
+ { gfx12_SECT_CONTEXT_defs, SECT_CONTEXT },
+ { 0, SECT_NONE }
+};
+
+#endif /* __CLEARSTATE_GFX12_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h
index 567a904804bc..9c85ca6358c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h
@@ -21,8 +21,7 @@
*
*/
-static const unsigned int gfx9_SECT_CONTEXT_def_1[] =
-{
+static const unsigned int gfx9_SECT_CONTEXT_def_1[] = {
0x00000000, // DB_RENDER_CONTROL
0x00000000, // DB_COUNT_CONTROL
0x00000000, // DB_DEPTH_VIEW
@@ -236,8 +235,7 @@ static const unsigned int gfx9_SECT_CONTEXT_def_1[] =
0x00000000, // PA_SC_VPORT_ZMIN_15
0x3f800000, // PA_SC_VPORT_ZMAX_15
};
-static const unsigned int gfx9_SECT_CONTEXT_def_2[] =
-{
+static const unsigned int gfx9_SECT_CONTEXT_def_2[] = {
0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL
0x00000000, // PA_SC_TILE_STEERING_OVERRIDE
0x00000000, // CP_PERFMON_CNTX_CNTL
@@ -521,15 +519,13 @@ static const unsigned int gfx9_SECT_CONTEXT_def_2[] =
0x00000000, // CB_MRT6_EPITCH
0x00000000, // CB_MRT7_EPITCH
};
-static const unsigned int gfx9_SECT_CONTEXT_def_3[] =
-{
+static const unsigned int gfx9_SECT_CONTEXT_def_3[] = {
0x00000000, // PA_CL_POINT_X_RAD
0x00000000, // PA_CL_POINT_Y_RAD
0x00000000, // PA_CL_POINT_SIZE
0x00000000, // PA_CL_POINT_CULL_RAD
};
-static const unsigned int gfx9_SECT_CONTEXT_def_4[] =
-{
+static const unsigned int gfx9_SECT_CONTEXT_def_4[] = {
0x00000000, // DB_DEPTH_CONTROL
0x00000000, // DB_EQAA
0x00000000, // CB_COLOR_CONTROL
@@ -688,17 +684,14 @@ static const unsigned int gfx9_SECT_CONTEXT_def_4[] =
0x00000000, // VGT_GS_OUT_PRIM_TYPE
0x00000000, // IA_ENHANCE
};
-static const unsigned int gfx9_SECT_CONTEXT_def_5[] =
-{
+static const unsigned int gfx9_SECT_CONTEXT_def_5[] = {
0x00000000, // WD_ENHANCE
0x00000000, // VGT_PRIMITIVEID_EN
};
-static const unsigned int gfx9_SECT_CONTEXT_def_6[] =
-{
+static const unsigned int gfx9_SECT_CONTEXT_def_6[] = {
0x00000000, // VGT_PRIMITIVEID_RESET
};
-static const unsigned int gfx9_SECT_CONTEXT_def_7[] =
-{
+static const unsigned int gfx9_SECT_CONTEXT_def_7[] = {
0x00000000, // VGT_GS_MAX_PRIMS_PER_SUBGROUP
0x00000000, // VGT_DRAW_PAYLOAD_CNTL
0, // HOLE
@@ -766,8 +759,7 @@ static const unsigned int gfx9_SECT_CONTEXT_def_7[] =
0x00000000, // VGT_STRMOUT_CONFIG
0x00000000, // VGT_STRMOUT_BUFFER_CONFIG
};
-static const unsigned int gfx9_SECT_CONTEXT_def_8[] =
-{
+static const unsigned int gfx9_SECT_CONTEXT_def_8[] = {
0x00000000, // PA_SC_CENTROID_PRIORITY_0
0x00000000, // PA_SC_CENTROID_PRIORITY_1
0x00001000, // PA_SC_LINE_CNTL
@@ -924,8 +916,7 @@ static const unsigned int gfx9_SECT_CONTEXT_def_8[] =
0x00000000, // CB_COLOR7_DCC_BASE
0x00000000, // CB_COLOR7_DCC_BASE_EXT
};
-static const struct cs_extent_def gfx9_SECT_CONTEXT_defs[] =
-{
+static const struct cs_extent_def gfx9_SECT_CONTEXT_defs[] = {
{gfx9_SECT_CONTEXT_def_1, 0x0000a000, 212 },
{gfx9_SECT_CONTEXT_def_2, 0x0000a0d6, 282 },
{gfx9_SECT_CONTEXT_def_3, 0x0000a1f5, 4 },
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_si.h b/drivers/gpu/drm/amd/amdgpu/clearstate_si.h
index 66e39cdb5cb0..5fd96ddd7f0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/clearstate_si.h
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_si.h
@@ -21,8 +21,7 @@
*
*/
-static const u32 si_SECT_CONTEXT_def_1[] =
-{
+static const u32 si_SECT_CONTEXT_def_1[] = {
0x00000000, // DB_RENDER_CONTROL
0x00000000, // DB_COUNT_CONTROL
0x00000000, // DB_DEPTH_VIEW
@@ -236,8 +235,7 @@ static const u32 si_SECT_CONTEXT_def_1[] =
0x00000000, // PA_SC_VPORT_ZMIN_15
0x3f800000, // PA_SC_VPORT_ZMAX_15
};
-static const u32 si_SECT_CONTEXT_def_2[] =
-{
+static const u32 si_SECT_CONTEXT_def_2[] = {
0x00000000, // CP_PERFMON_CNTX_CNTL
0x00000000, // CP_RINGID
0x00000000, // CP_VMID
@@ -511,8 +509,7 @@ static const u32 si_SECT_CONTEXT_def_2[] =
0x00000000, // CB_BLEND6_CONTROL
0x00000000, // CB_BLEND7_CONTROL
};
-static const u32 si_SECT_CONTEXT_def_3[] =
-{
+static const u32 si_SECT_CONTEXT_def_3[] = {
0x00000000, // PA_CL_POINT_X_RAD
0x00000000, // PA_CL_POINT_Y_RAD
0x00000000, // PA_CL_POINT_SIZE
@@ -520,8 +517,7 @@ static const u32 si_SECT_CONTEXT_def_3[] =
0x00000000, // VGT_DMA_BASE_HI
0x00000000, // VGT_DMA_BASE
};
-static const u32 si_SECT_CONTEXT_def_4[] =
-{
+static const u32 si_SECT_CONTEXT_def_4[] = {
0x00000000, // DB_DEPTH_CONTROL
0x00000000, // DB_EQAA
0x00000000, // CB_COLOR_CONTROL
@@ -680,16 +676,13 @@ static const u32 si_SECT_CONTEXT_def_4[] =
0x00000000, // VGT_GS_OUT_PRIM_TYPE
0x00000000, // IA_ENHANCE
};
-static const u32 si_SECT_CONTEXT_def_5[] =
-{
+static const u32 si_SECT_CONTEXT_def_5[] = {
0x00000000, // VGT_PRIMITIVEID_EN
};
-static const u32 si_SECT_CONTEXT_def_6[] =
-{
+static const u32 si_SECT_CONTEXT_def_6[] = {
0x00000000, // VGT_PRIMITIVEID_RESET
};
-static const u32 si_SECT_CONTEXT_def_7[] =
-{
+static const u32 si_SECT_CONTEXT_def_7[] = {
0x00000000, // VGT_MULTI_PRIM_IB_RESET_EN
0, // HOLE
0, // HOLE
@@ -924,8 +917,7 @@ static const u32 si_SECT_CONTEXT_def_7[] =
0x00000000, // CB_COLOR7_CLEAR_WORD0
0x00000000, // CB_COLOR7_CLEAR_WORD1
};
-static const struct cs_extent_def si_SECT_CONTEXT_defs[] =
-{
+static const struct cs_extent_def si_SECT_CONTEXT_defs[] = {
{si_SECT_CONTEXT_def_1, 0x0000a000, 212 },
{si_SECT_CONTEXT_def_2, 0x0000a0d8, 272 },
{si_SECT_CONTEXT_def_3, 0x0000a1f5, 6 },
diff --git a/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c b/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c
new file mode 100644
index 000000000000..ed1e25661706
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "nv.h"
+
+#include "soc15_common.h"
+#include "soc15_hw_ip.h"
+#include "cyan_skillfish_ip_offset.h"
+
+int cyan_skillfish_reg_base_init(struct amdgpu_device *adev)
+{
+ /* HW has more IP blocks, only initialized the blocke needed by driver */
+ uint32_t i;
+
+ adev->gfx.xcc_mask = 1;
+ for (i = 0 ; i < MAX_INSTANCE ; ++i) {
+ adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
+ adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
+ adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
+ adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
+ adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
+ adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
+ adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i]));
+ adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
+ adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i]));
+ adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
+ adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
+ adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
+ adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i]));
+ }
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index b8c47e0cf37a..bc7a2e06ab5f 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -157,6 +157,9 @@ static int cz_ih_irq_init(struct amdgpu_device *adev)
/* enable interrupts */
cz_ih_enable_interrupts(adev);
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
return 0;
}
@@ -194,6 +197,9 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev,
wptr = le32_to_cpu(*ih->wptr_cpu);
+ if (ih == &adev->irq.ih_soft)
+ goto out;
+
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
goto out;
@@ -216,6 +222,11 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32(mmIH_RB_CNTL, tmp);
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32(mmIH_RB_CNTL, tmp);
out:
return (wptr & ih->ptr_mask);
@@ -269,9 +280,9 @@ static void cz_ih_set_rptr(struct amdgpu_device *adev,
WREG32(mmIH_RB_RPTR, ih->rptr);
}
-static int cz_ih_early_init(void *handle)
+static int cz_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = amdgpu_irq_add_domain(adev);
@@ -283,23 +294,27 @@ static int cz_ih_early_init(void *handle)
return 0;
}
-static int cz_ih_sw_init(void *handle)
+static int cz_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
if (r)
return r;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
+ if (r)
+ return r;
+
r = amdgpu_irq_init(adev);
return r;
}
-static int cz_ih_sw_fini(void *handle)
+static int cz_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
amdgpu_irq_remove_domain(adev);
@@ -307,10 +322,10 @@ static int cz_ih_sw_fini(void *handle)
return 0;
}
-static int cz_ih_hw_init(void *handle)
+static int cz_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = cz_ih_irq_init(adev);
if (r)
@@ -319,32 +334,26 @@ static int cz_ih_hw_init(void *handle)
return 0;
}
-static int cz_ih_hw_fini(void *handle)
+static int cz_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- cz_ih_irq_disable(adev);
+ cz_ih_irq_disable(ip_block->adev);
return 0;
}
-static int cz_ih_suspend(void *handle)
+static int cz_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cz_ih_hw_fini(adev);
+ return cz_ih_hw_fini(ip_block);
}
-static int cz_ih_resume(void *handle)
+static int cz_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cz_ih_hw_init(adev);
+ return cz_ih_hw_init(ip_block);
}
-static bool cz_ih_is_idle(void *handle)
+static bool cz_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (REG_GET_FIELD(tmp, SRBM_STATUS, IH_BUSY))
@@ -353,11 +362,11 @@ static bool cz_ih_is_idle(void *handle)
return true;
}
-static int cz_ih_wait_for_idle(void *handle)
+static int cz_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -369,10 +378,10 @@ static int cz_ih_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int cz_ih_soft_reset(void *handle)
+static int cz_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & SRBM_STATUS__IH_BUSY_MASK)
@@ -399,14 +408,14 @@ static int cz_ih_soft_reset(void *handle)
return 0;
}
-static int cz_ih_set_clockgating_state(void *handle,
+static int cz_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
// TODO
return 0;
}
-static int cz_ih_set_powergating_state(void *handle,
+static int cz_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
// TODO
@@ -416,7 +425,6 @@ static int cz_ih_set_powergating_state(void *handle,
static const struct amd_ip_funcs cz_ih_ip_funcs = {
.name = "cz_ih",
.early_init = cz_ih_early_init,
- .late_init = NULL,
.sw_init = cz_ih_sw_init,
.sw_fini = cz_ih_sw_fini,
.hw_init = cz_ih_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index d1570a462a51..72ca6538b2e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -21,7 +21,10 @@
*
*/
+#include <drm/drm_edid.h>
#include <drm/drm_fourcc.h>
+#include <drm/drm_modeset_helper.h>
+#include <drm/drm_modeset_helper_vtables.h>
#include <drm/drm_vblank.h>
#include "amdgpu.h"
@@ -49,9 +52,9 @@
static void dce_v10_0_set_display_funcs(struct amdgpu_device *adev);
static void dce_v10_0_set_irq_funcs(struct amdgpu_device *adev);
+static void dce_v10_0_hpd_int_ack(struct amdgpu_device *adev, int hpd);
-static const u32 crtc_offsets[] =
-{
+static const u32 crtc_offsets[] = {
CRTC0_REGISTER_OFFSET,
CRTC1_REGISTER_OFFSET,
CRTC2_REGISTER_OFFSET,
@@ -61,8 +64,7 @@ static const u32 crtc_offsets[] =
CRTC6_REGISTER_OFFSET
};
-static const u32 hpd_offsets[] =
-{
+static const u32 hpd_offsets[] = {
HPD0_REGISTER_OFFSET,
HPD1_REGISTER_OFFSET,
HPD2_REGISTER_OFFSET,
@@ -119,30 +121,26 @@ static const struct {
.hpd = DISP_INTERRUPT_STATUS_CONTINUE5__DC_HPD6_INTERRUPT_MASK
} };
-static const u32 golden_settings_tonga_a11[] =
-{
+static const u32 golden_settings_tonga_a11[] = {
mmDCI_CLK_CNTL, 0x00000080, 0x00000000,
mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070,
mmFBC_MISC, 0x1f311fff, 0x12300000,
mmHDMI_CONTROL, 0x31000111, 0x00000011,
};
-static const u32 tonga_mgcg_cgcg_init[] =
-{
+static const u32 tonga_mgcg_cgcg_init[] = {
mmXDMA_CLOCK_GATING_CNTL, 0xffffffff, 0x00000100,
mmXDMA_MEM_POWER_CNTL, 0x00000101, 0x00000000,
};
-static const u32 golden_settings_fiji_a10[] =
-{
+static const u32 golden_settings_fiji_a10[] = {
mmDCI_CLK_CNTL, 0x00000080, 0x00000000,
mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070,
mmFBC_MISC, 0x1f311fff, 0x12300000,
mmHDMI_CONTROL, 0x31000111, 0x00000011,
};
-static const u32 fiji_mgcg_cgcg_init[] =
-{
+static const u32 fiji_mgcg_cgcg_init[] = {
mmXDMA_CLOCK_GATING_CNTL, 0xffffffff, 0x00000100,
mmXDMA_MEM_POWER_CNTL, 0x00000101, 0x00000000,
};
@@ -367,6 +365,7 @@ static void dce_v10_0_hpd_init(struct amdgpu_device *adev)
AMDGPU_HPD_DISCONNECT_INT_DELAY_IN_MS);
WREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
+ dce_v10_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd);
dce_v10_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
amdgpu_irq_get(adev, &adev->hpd_irq,
amdgpu_connector->hpd.hpd);
@@ -1040,7 +1039,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
(u32)mode->clock);
line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
(u32)mode->clock);
- line_time = min(line_time, (u32)65535);
+ line_time = min_t(u32, line_time, 65535);
/* watermark for high clocks */
if (adev->pm.dpm_enabled) {
@@ -1070,7 +1069,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
wm_high.num_heads = num_heads;
/* set for high clocks */
- latency_watermark_a = min(dce_v10_0_latency_watermark(&wm_high), (u32)65535);
+ latency_watermark_a = min_t(u32, dce_v10_0_latency_watermark(&wm_high), 65535);
/* possibly force display priority to high */
/* should really do this at mode validation time... */
@@ -1109,7 +1108,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
wm_low.num_heads = num_heads;
/* set for low clocks */
- latency_watermark_b = min(dce_v10_0_latency_watermark(&wm_low), (u32)65535);
+ latency_watermark_b = min_t(u32, dce_v10_0_latency_watermark(&wm_low), 65535);
/* possibly force display priority to high */
/* should really do this at mode validation time... */
@@ -1142,8 +1141,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
/* save values for DPM */
amdgpu_crtc->line_time = line_time;
- amdgpu_crtc->wm_high = latency_watermark_a;
- amdgpu_crtc->wm_low = latency_watermark_b;
+
/* Save number of lines the linebuffer leads before the scanout */
amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
}
@@ -1300,7 +1298,7 @@ static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder
return;
}
- sad_count = drm_edid_to_speaker_allocation(amdgpu_connector_edid(connector), &sadb);
+ sad_count = drm_edid_to_speaker_allocation(amdgpu_connector->edid, &sadb);
if (sad_count < 0) {
DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
sad_count = 0;
@@ -1370,7 +1368,7 @@ static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder)
return;
}
- sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
+ sad_count = drm_edid_to_sad(amdgpu_connector->edid, &sads);
if (sad_count < 0)
DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
if (sad_count <= 0)
@@ -1423,8 +1421,7 @@ static void dce_v10_0_audio_enable(struct amdgpu_device *adev,
enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0);
}
-static const u32 pin_offsets[] =
-{
+static const u32 pin_offsets[] = {
AUD0_REGISTER_OFFSET,
AUD1_REGISTER_OFFSET,
AUD2_REGISTER_OFFSET,
@@ -1464,17 +1461,12 @@ static int dce_v10_0_audio_init(struct amdgpu_device *adev)
static void dce_v10_0_audio_fini(struct amdgpu_device *adev)
{
- int i;
-
if (!amdgpu_audio)
return;
if (!adev->mode_info.audio.enabled)
return;
- for (i = 0; i < adev->mode_info.audio.num_pins; i++)
- dce_v10_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
-
adev->mode_info.audio.enabled = false;
}
@@ -1809,8 +1801,7 @@ static void dce_v10_0_afmt_fini(struct amdgpu_device *adev)
}
}
-static const u32 vga_control_regs[6] =
-{
+static const u32 vga_control_regs[6] = {
mmD1VGA_CONTROL,
mmD2VGA_CONTROL,
mmD3VGA_CONTROL,
@@ -1884,6 +1875,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
return r;
if (!atomic) {
+ abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(abo);
@@ -2404,6 +2396,7 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc,
return ret;
}
+ aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_bo_unreserve(aobj);
if (ret) {
@@ -2532,7 +2525,7 @@ static void dce_v10_0_crtc_dpms(struct drm_crtc *crtc, int mode)
break;
}
/* adjust pm to dpms */
- amdgpu_pm_compute_clocks(adev);
+ amdgpu_dpm_compute_clocks(adev);
}
static void dce_v10_0_crtc_prepare(struct drm_crtc *crtc)
@@ -2688,6 +2681,32 @@ static const struct drm_crtc_helper_funcs dce_v10_0_crtc_helper_funcs = {
.get_scanout_position = amdgpu_crtc_get_scanout_position,
};
+static void dce_v10_0_panic_flush(struct drm_plane *plane)
+{
+ struct drm_framebuffer *fb;
+ struct amdgpu_crtc *amdgpu_crtc;
+ struct amdgpu_device *adev;
+ uint32_t fb_format;
+
+ if (!plane->fb)
+ return;
+
+ fb = plane->fb;
+ amdgpu_crtc = to_amdgpu_crtc(plane->crtc);
+ adev = drm_to_adev(fb->dev);
+
+ /* Disable DC tiling */
+ fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset);
+ fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK;
+ WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
+
+}
+
+static const struct drm_plane_helper_funcs dce_v10_0_drm_primary_plane_helper_funcs = {
+ .get_scanout_buffer = amdgpu_display_get_scanout_buffer,
+ .panic_flush = dce_v10_0_panic_flush,
+};
+
static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index)
{
struct amdgpu_crtc *amdgpu_crtc;
@@ -2735,13 +2754,14 @@ static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index)
amdgpu_crtc->encoder = NULL;
amdgpu_crtc->connector = NULL;
drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v10_0_crtc_helper_funcs);
+ drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v10_0_drm_primary_plane_helper_funcs);
return 0;
}
-static int dce_v10_0_early_init(void *handle)
+static int dce_v10_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->audio_endpt_rreg = &dce_v10_0_audio_endpt_rreg;
adev->audio_endpt_wreg = &dce_v10_0_audio_endpt_wreg;
@@ -2766,10 +2786,10 @@ static int dce_v10_0_early_init(void *handle)
return 0;
}
-static int dce_v10_0_sw_init(void *handle)
+static int dce_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->mode_info.num_crtc; i++) {
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
@@ -2798,7 +2818,7 @@ static int dce_v10_0_sw_init(void *handle)
adev_to_drm(adev)->mode_config.preferred_depth = 24;
adev_to_drm(adev)->mode_config.prefer_shadow = 1;
- adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base;
+ adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
r = amdgpu_display_modeset_create_props(adev);
if (r)
@@ -2828,17 +2848,28 @@ static int dce_v10_0_sw_init(void *handle)
if (r)
return r;
+ /* Disable vblank IRQs aggressively for power-saving */
+ /* XXX: can this be enabled for DC? */
+ adev_to_drm(adev)->vblank_disable_immediate = true;
+
+ r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
+ if (r)
+ return r;
+
+ INIT_DELAYED_WORK(&adev->hotplug_work,
+ amdgpu_display_hotplug_work_func);
+
drm_kms_helper_poll_init(adev_to_drm(adev));
adev->mode_info.mode_config_initialized = true;
return 0;
}
-static int dce_v10_0_sw_fini(void *handle)
+static int dce_v10_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- kfree(adev->mode_info.bios_hardcoded_edid);
+ drm_edid_free(adev->mode_info.bios_hardcoded_edid);
drm_kms_helper_poll_fini(adev_to_drm(adev));
@@ -2852,10 +2883,10 @@ static int dce_v10_0_sw_fini(void *handle)
return 0;
}
-static int dce_v10_0_hw_init(void *handle)
+static int dce_v10_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
dce_v10_0_init_golden_registers(adev);
@@ -2877,10 +2908,10 @@ static int dce_v10_0_hw_init(void *handle)
return 0;
}
-static int dce_v10_0_hw_fini(void *handle)
+static int dce_v10_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
dce_v10_0_hpd_fini(adev);
@@ -2890,12 +2921,14 @@ static int dce_v10_0_hw_fini(void *handle)
dce_v10_0_pageflip_interrupt_fini(adev);
+ flush_delayed_work(&adev->hotplug_work);
+
return 0;
}
-static int dce_v10_0_suspend(void *handle)
+static int dce_v10_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_display_suspend_helper(adev);
@@ -2905,18 +2938,18 @@ static int dce_v10_0_suspend(void *handle)
adev->mode_info.bl_level =
amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
- return dce_v10_0_hw_fini(handle);
+ return dce_v10_0_hw_fini(ip_block);
}
-static int dce_v10_0_resume(void *handle)
+static int dce_v10_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
adev->mode_info.bl_level);
- ret = dce_v10_0_hw_init(handle);
+ ret = dce_v10_0_hw_init(ip_block);
/* turn on the BL */
if (adev->mode_info.bl_encoder) {
@@ -2931,27 +2964,22 @@ static int dce_v10_0_resume(void *handle)
return amdgpu_display_resume_helper(adev);
}
-static bool dce_v10_0_is_idle(void *handle)
+static bool dce_v10_0_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int dce_v10_0_wait_for_idle(void *handle)
+static bool dce_v10_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
- return 0;
-}
-
-static bool dce_v10_0_check_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return dce_v10_0_is_display_hung(adev);
}
-static int dce_v10_0_soft_reset(void *handle)
+static int dce_v10_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0, tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (dce_v10_0_is_display_hung(adev))
srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
@@ -3041,7 +3069,7 @@ static int dce_v10_0_set_hpd_irq_state(struct amdgpu_device *adev,
u32 tmp;
if (hpd >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hdp %d\n", hpd);
+ DRM_DEBUG("invalid hpd %d\n", hpd);
return 0;
}
@@ -3193,7 +3221,7 @@ static void dce_v10_0_hpd_int_ack(struct amdgpu_device *adev,
u32 tmp;
if (hpd >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hdp %d\n", hpd);
+ DRM_DEBUG("invalid hpd %d\n", hpd);
return;
}
@@ -3288,20 +3316,20 @@ static int dce_v10_0_hpd_irq(struct amdgpu_device *adev,
if (disp_int & mask) {
dce_v10_0_hpd_int_ack(adev, hpd);
- schedule_work(&adev->hotplug_work);
+ schedule_delayed_work(&adev->hotplug_work, 0);
DRM_DEBUG("IH: HPD%d\n", hpd + 1);
}
return 0;
}
-static int dce_v10_0_set_clockgating_state(void *handle,
+static int dce_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int dce_v10_0_set_powergating_state(void *handle,
+static int dce_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -3310,7 +3338,6 @@ static int dce_v10_0_set_powergating_state(void *handle,
static const struct amd_ip_funcs dce_v10_0_ip_funcs = {
.name = "dce_v10_0",
.early_init = dce_v10_0_early_init,
- .late_init = NULL,
.sw_init = dce_v10_0_sw_init,
.sw_fini = dce_v10_0_sw_fini,
.hw_init = dce_v10_0_hw_init,
@@ -3318,7 +3345,6 @@ static const struct amd_ip_funcs dce_v10_0_ip_funcs = {
.suspend = dce_v10_0_suspend,
.resume = dce_v10_0_resume,
.is_idle = dce_v10_0_is_idle,
- .wait_for_idle = dce_v10_0_wait_for_idle,
.check_soft_reset = dce_v10_0_check_soft_reset,
.soft_reset = dce_v10_0_soft_reset,
.set_clockgating_state = dce_v10_0_set_clockgating_state,
@@ -3636,8 +3662,7 @@ static void dce_v10_0_set_irq_funcs(struct amdgpu_device *adev)
adev->hpd_irq.funcs = &dce_v10_0_hpd_irq_funcs;
}
-const struct amdgpu_ip_block_version dce_v10_0_ip_block =
-{
+const struct amdgpu_ip_block_version dce_v10_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_DCE,
.major = 10,
.minor = 0,
@@ -3645,8 +3670,7 @@ const struct amdgpu_ip_block_version dce_v10_0_ip_block =
.funcs = &dce_v10_0_ip_funcs,
};
-const struct amdgpu_ip_block_version dce_v10_1_ip_block =
-{
+const struct amdgpu_ip_block_version dce_v10_1_ip_block = {
.type = AMD_IP_BLOCK_TYPE_DCE,
.major = 10,
.minor = 1,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
deleted file mode 100644
index 18a7b3bd633b..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ /dev/null
@@ -1,3784 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include <drm/drm_fourcc.h>
-#include <drm/drm_vblank.h>
-
-#include "amdgpu.h"
-#include "amdgpu_pm.h"
-#include "amdgpu_i2c.h"
-#include "vid.h"
-#include "atom.h"
-#include "amdgpu_atombios.h"
-#include "atombios_crtc.h"
-#include "atombios_encoders.h"
-#include "amdgpu_pll.h"
-#include "amdgpu_connectors.h"
-#include "amdgpu_display.h"
-#include "dce_v11_0.h"
-
-#include "dce/dce_11_0_d.h"
-#include "dce/dce_11_0_sh_mask.h"
-#include "dce/dce_11_0_enum.h"
-#include "oss/oss_3_0_d.h"
-#include "oss/oss_3_0_sh_mask.h"
-#include "gmc/gmc_8_1_d.h"
-#include "gmc/gmc_8_1_sh_mask.h"
-
-#include "ivsrcid/ivsrcid_vislands30.h"
-
-static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev);
-static void dce_v11_0_set_irq_funcs(struct amdgpu_device *adev);
-
-static const u32 crtc_offsets[] =
-{
- CRTC0_REGISTER_OFFSET,
- CRTC1_REGISTER_OFFSET,
- CRTC2_REGISTER_OFFSET,
- CRTC3_REGISTER_OFFSET,
- CRTC4_REGISTER_OFFSET,
- CRTC5_REGISTER_OFFSET,
- CRTC6_REGISTER_OFFSET
-};
-
-static const u32 hpd_offsets[] =
-{
- HPD0_REGISTER_OFFSET,
- HPD1_REGISTER_OFFSET,
- HPD2_REGISTER_OFFSET,
- HPD3_REGISTER_OFFSET,
- HPD4_REGISTER_OFFSET,
- HPD5_REGISTER_OFFSET
-};
-
-static const uint32_t dig_offsets[] = {
- DIG0_REGISTER_OFFSET,
- DIG1_REGISTER_OFFSET,
- DIG2_REGISTER_OFFSET,
- DIG3_REGISTER_OFFSET,
- DIG4_REGISTER_OFFSET,
- DIG5_REGISTER_OFFSET,
- DIG6_REGISTER_OFFSET,
- DIG7_REGISTER_OFFSET,
- DIG8_REGISTER_OFFSET
-};
-
-static const struct {
- uint32_t reg;
- uint32_t vblank;
- uint32_t vline;
- uint32_t hpd;
-
-} interrupt_status_offsets[] = { {
- .reg = mmDISP_INTERRUPT_STATUS,
- .vblank = DISP_INTERRUPT_STATUS__LB_D1_VBLANK_INTERRUPT_MASK,
- .vline = DISP_INTERRUPT_STATUS__LB_D1_VLINE_INTERRUPT_MASK,
- .hpd = DISP_INTERRUPT_STATUS__DC_HPD1_INTERRUPT_MASK
-}, {
- .reg = mmDISP_INTERRUPT_STATUS_CONTINUE,
- .vblank = DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VBLANK_INTERRUPT_MASK,
- .vline = DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VLINE_INTERRUPT_MASK,
- .hpd = DISP_INTERRUPT_STATUS_CONTINUE__DC_HPD2_INTERRUPT_MASK
-}, {
- .reg = mmDISP_INTERRUPT_STATUS_CONTINUE2,
- .vblank = DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VBLANK_INTERRUPT_MASK,
- .vline = DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VLINE_INTERRUPT_MASK,
- .hpd = DISP_INTERRUPT_STATUS_CONTINUE2__DC_HPD3_INTERRUPT_MASK
-}, {
- .reg = mmDISP_INTERRUPT_STATUS_CONTINUE3,
- .vblank = DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VBLANK_INTERRUPT_MASK,
- .vline = DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VLINE_INTERRUPT_MASK,
- .hpd = DISP_INTERRUPT_STATUS_CONTINUE3__DC_HPD4_INTERRUPT_MASK
-}, {
- .reg = mmDISP_INTERRUPT_STATUS_CONTINUE4,
- .vblank = DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VBLANK_INTERRUPT_MASK,
- .vline = DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VLINE_INTERRUPT_MASK,
- .hpd = DISP_INTERRUPT_STATUS_CONTINUE4__DC_HPD5_INTERRUPT_MASK
-}, {
- .reg = mmDISP_INTERRUPT_STATUS_CONTINUE5,
- .vblank = DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VBLANK_INTERRUPT_MASK,
- .vline = DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VLINE_INTERRUPT_MASK,
- .hpd = DISP_INTERRUPT_STATUS_CONTINUE5__DC_HPD6_INTERRUPT_MASK
-} };
-
-static const u32 cz_golden_settings_a11[] =
-{
- mmCRTC_DOUBLE_BUFFER_CONTROL, 0x00010101, 0x00010000,
- mmFBC_MISC, 0x1f311fff, 0x14300000,
-};
-
-static const u32 cz_mgcg_cgcg_init[] =
-{
- mmXDMA_CLOCK_GATING_CNTL, 0xffffffff, 0x00000100,
- mmXDMA_MEM_POWER_CNTL, 0x00000101, 0x00000000,
-};
-
-static const u32 stoney_golden_settings_a11[] =
-{
- mmCRTC_DOUBLE_BUFFER_CONTROL, 0x00010101, 0x00010000,
- mmFBC_MISC, 0x1f311fff, 0x14302000,
-};
-
-static const u32 polaris11_golden_settings_a11[] =
-{
- mmDCI_CLK_CNTL, 0x00000080, 0x00000000,
- mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070,
- mmFBC_DEBUG1, 0xffffffff, 0x00000008,
- mmFBC_MISC, 0x9f313fff, 0x14302008,
- mmHDMI_CONTROL, 0x313f031f, 0x00000011,
-};
-
-static const u32 polaris10_golden_settings_a11[] =
-{
- mmDCI_CLK_CNTL, 0x00000080, 0x00000000,
- mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070,
- mmFBC_MISC, 0x9f313fff, 0x14302008,
- mmHDMI_CONTROL, 0x313f031f, 0x00000011,
-};
-
-static void dce_v11_0_init_golden_registers(struct amdgpu_device *adev)
-{
- switch (adev->asic_type) {
- case CHIP_CARRIZO:
- amdgpu_device_program_register_sequence(adev,
- cz_mgcg_cgcg_init,
- ARRAY_SIZE(cz_mgcg_cgcg_init));
- amdgpu_device_program_register_sequence(adev,
- cz_golden_settings_a11,
- ARRAY_SIZE(cz_golden_settings_a11));
- break;
- case CHIP_STONEY:
- amdgpu_device_program_register_sequence(adev,
- stoney_golden_settings_a11,
- ARRAY_SIZE(stoney_golden_settings_a11));
- break;
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- amdgpu_device_program_register_sequence(adev,
- polaris11_golden_settings_a11,
- ARRAY_SIZE(polaris11_golden_settings_a11));
- break;
- case CHIP_POLARIS10:
- case CHIP_VEGAM:
- amdgpu_device_program_register_sequence(adev,
- polaris10_golden_settings_a11,
- ARRAY_SIZE(polaris10_golden_settings_a11));
- break;
- default:
- break;
- }
-}
-
-static u32 dce_v11_0_audio_endpt_rreg(struct amdgpu_device *adev,
- u32 block_offset, u32 reg)
-{
- unsigned long flags;
- u32 r;
-
- spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
- WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
- r = RREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset);
- spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
-
- return r;
-}
-
-static void dce_v11_0_audio_endpt_wreg(struct amdgpu_device *adev,
- u32 block_offset, u32 reg, u32 v)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
- WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
- WREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset, v);
- spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
-}
-
-static u32 dce_v11_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
-{
- if (crtc < 0 || crtc >= adev->mode_info.num_crtc)
- return 0;
- else
- return RREG32(mmCRTC_STATUS_FRAME_COUNT + crtc_offsets[crtc]);
-}
-
-static void dce_v11_0_pageflip_interrupt_init(struct amdgpu_device *adev)
-{
- unsigned i;
-
- /* Enable pflip interrupts */
- for (i = 0; i < adev->mode_info.num_crtc; i++)
- amdgpu_irq_get(adev, &adev->pageflip_irq, i);
-}
-
-static void dce_v11_0_pageflip_interrupt_fini(struct amdgpu_device *adev)
-{
- unsigned i;
-
- /* Disable pflip interrupts */
- for (i = 0; i < adev->mode_info.num_crtc; i++)
- amdgpu_irq_put(adev, &adev->pageflip_irq, i);
-}
-
-/**
- * dce_v11_0_page_flip - pageflip callback.
- *
- * @adev: amdgpu_device pointer
- * @crtc_id: crtc to cleanup pageflip on
- * @crtc_base: new address of the crtc (GPU MC address)
- * @async: asynchronous flip
- *
- * Triggers the actual pageflip by updating the primary
- * surface base address.
- */
-static void dce_v11_0_page_flip(struct amdgpu_device *adev,
- int crtc_id, u64 crtc_base, bool async)
-{
- struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
- struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb;
- u32 tmp;
-
- /* flip immediate for async, default is vsync */
- tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,
- GRPH_SURFACE_UPDATE_IMMEDIATE_EN, async ? 1 : 0);
- WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
- /* update pitch */
- WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset,
- fb->pitches[0] / fb->format->cpp[0]);
- /* update the scanout addresses */
- WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
- upper_32_bits(crtc_base));
- /* writing to the low address triggers the update */
- WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
- lower_32_bits(crtc_base));
- /* post the write */
- RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset);
-}
-
-static int dce_v11_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
- u32 *vbl, u32 *position)
-{
- if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc))
- return -EINVAL;
-
- *vbl = RREG32(mmCRTC_V_BLANK_START_END + crtc_offsets[crtc]);
- *position = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
-
- return 0;
-}
-
-/**
- * dce_v11_0_hpd_sense - hpd sense callback.
- *
- * @adev: amdgpu_device pointer
- * @hpd: hpd (hotplug detect) pin
- *
- * Checks if a digital monitor is connected (evergreen+).
- * Returns true if connected, false if not connected.
- */
-static bool dce_v11_0_hpd_sense(struct amdgpu_device *adev,
- enum amdgpu_hpd_id hpd)
-{
- bool connected = false;
-
- if (hpd >= adev->mode_info.num_hpd)
- return connected;
-
- if (RREG32(mmDC_HPD_INT_STATUS + hpd_offsets[hpd]) &
- DC_HPD_INT_STATUS__DC_HPD_SENSE_MASK)
- connected = true;
-
- return connected;
-}
-
-/**
- * dce_v11_0_hpd_set_polarity - hpd set polarity callback.
- *
- * @adev: amdgpu_device pointer
- * @hpd: hpd (hotplug detect) pin
- *
- * Set the polarity of the hpd pin (evergreen+).
- */
-static void dce_v11_0_hpd_set_polarity(struct amdgpu_device *adev,
- enum amdgpu_hpd_id hpd)
-{
- u32 tmp;
- bool connected = dce_v11_0_hpd_sense(adev, hpd);
-
- if (hpd >= adev->mode_info.num_hpd)
- return;
-
- tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
- if (connected)
- tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_POLARITY, 0);
- else
- tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_POLARITY, 1);
- WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
-}
-
-/**
- * dce_v11_0_hpd_init - hpd setup callback.
- *
- * @adev: amdgpu_device pointer
- *
- * Setup the hpd pins used by the card (evergreen+).
- * Enable the pin, set the polarity, and enable the hpd interrupts.
- */
-static void dce_v11_0_hpd_init(struct amdgpu_device *adev)
-{
- struct drm_device *dev = adev_to_drm(adev);
- struct drm_connector *connector;
- struct drm_connector_list_iter iter;
- u32 tmp;
-
- drm_connector_list_iter_begin(dev, &iter);
- drm_for_each_connector_iter(connector, &iter) {
- struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
-
- if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
- continue;
-
- if (connector->connector_type == DRM_MODE_CONNECTOR_eDP ||
- connector->connector_type == DRM_MODE_CONNECTOR_LVDS) {
- /* don't try to enable hpd on eDP or LVDS avoid breaking the
- * aux dp channel on imac and help (but not completely fix)
- * https://bugzilla.redhat.com/show_bug.cgi?id=726143
- * also avoid interrupt storms during dpms.
- */
- tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 0);
- WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
- continue;
- }
-
- tmp = RREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_CONTROL, DC_HPD_EN, 1);
- WREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
-
- tmp = RREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_TOGGLE_FILT_CNTL,
- DC_HPD_CONNECT_INT_DELAY,
- AMDGPU_HPD_CONNECT_INT_DELAY_IN_MS);
- tmp = REG_SET_FIELD(tmp, DC_HPD_TOGGLE_FILT_CNTL,
- DC_HPD_DISCONNECT_INT_DELAY,
- AMDGPU_HPD_DISCONNECT_INT_DELAY_IN_MS);
- WREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
-
- dce_v11_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
- amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
- }
- drm_connector_list_iter_end(&iter);
-}
-
-/**
- * dce_v11_0_hpd_fini - hpd tear down callback.
- *
- * @adev: amdgpu_device pointer
- *
- * Tear down the hpd pins used by the card (evergreen+).
- * Disable the hpd interrupts.
- */
-static void dce_v11_0_hpd_fini(struct amdgpu_device *adev)
-{
- struct drm_device *dev = adev_to_drm(adev);
- struct drm_connector *connector;
- struct drm_connector_list_iter iter;
- u32 tmp;
-
- drm_connector_list_iter_begin(dev, &iter);
- drm_for_each_connector_iter(connector, &iter) {
- struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
-
- if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
- continue;
-
- tmp = RREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_CONTROL, DC_HPD_EN, 0);
- WREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
-
- amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
- }
- drm_connector_list_iter_end(&iter);
-}
-
-static u32 dce_v11_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
-{
- return mmDC_GPIO_HPD_A;
-}
-
-static bool dce_v11_0_is_display_hung(struct amdgpu_device *adev)
-{
- u32 crtc_hung = 0;
- u32 crtc_status[6];
- u32 i, j, tmp;
-
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]);
- if (REG_GET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN)) {
- crtc_status[i] = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
- crtc_hung |= (1 << i);
- }
- }
-
- for (j = 0; j < 10; j++) {
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- if (crtc_hung & (1 << i)) {
- tmp = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
- if (tmp != crtc_status[i])
- crtc_hung &= ~(1 << i);
- }
- }
- if (crtc_hung == 0)
- return false;
- udelay(100);
- }
-
- return true;
-}
-
-static void dce_v11_0_set_vga_render_state(struct amdgpu_device *adev,
- bool render)
-{
- u32 tmp;
-
- /* Lockout access through VGA aperture*/
- tmp = RREG32(mmVGA_HDP_CONTROL);
- if (render)
- tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 0);
- else
- tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
- WREG32(mmVGA_HDP_CONTROL, tmp);
-
- /* disable VGA render */
- tmp = RREG32(mmVGA_RENDER_CONTROL);
- if (render)
- tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 1);
- else
- tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
- WREG32(mmVGA_RENDER_CONTROL, tmp);
-}
-
-static int dce_v11_0_get_num_crtc (struct amdgpu_device *adev)
-{
- int num_crtc = 0;
-
- switch (adev->asic_type) {
- case CHIP_CARRIZO:
- num_crtc = 3;
- break;
- case CHIP_STONEY:
- num_crtc = 2;
- break;
- case CHIP_POLARIS10:
- case CHIP_VEGAM:
- num_crtc = 6;
- break;
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- num_crtc = 5;
- break;
- default:
- num_crtc = 0;
- }
- return num_crtc;
-}
-
-void dce_v11_0_disable_dce(struct amdgpu_device *adev)
-{
- /*Disable VGA render and enabled crtc, if has DCE engine*/
- if (amdgpu_atombios_has_dce_engine_info(adev)) {
- u32 tmp;
- int crtc_enabled, i;
-
- dce_v11_0_set_vga_render_state(adev, false);
-
- /*Disable crtc*/
- for (i = 0; i < dce_v11_0_get_num_crtc(adev); i++) {
- crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]),
- CRTC_CONTROL, CRTC_MASTER_EN);
- if (crtc_enabled) {
- WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
- tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]);
- tmp = REG_SET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN, 0);
- WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp);
- WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
- }
- }
- }
-}
-
-static void dce_v11_0_program_fmt(struct drm_encoder *encoder)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
- struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
- int bpc = 0;
- u32 tmp = 0;
- enum amdgpu_connector_dither dither = AMDGPU_FMT_DITHER_DISABLE;
-
- if (connector) {
- struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
- bpc = amdgpu_connector_get_monitor_bpc(connector);
- dither = amdgpu_connector->dither;
- }
-
- /* LVDS/eDP FMT is set up by atom */
- if (amdgpu_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
- return;
-
- /* not needed for analog */
- if ((amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
- (amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
- return;
-
- if (bpc == 0)
- return;
-
- switch (bpc) {
- case 6:
- if (dither == AMDGPU_FMT_DITHER_ENABLE) {
- /* XXX sort out optimal dither settings */
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_FRAME_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_HIGHPASS_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_EN, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_DEPTH, 0);
- } else {
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_DEPTH, 0);
- }
- break;
- case 8:
- if (dither == AMDGPU_FMT_DITHER_ENABLE) {
- /* XXX sort out optimal dither settings */
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_FRAME_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_HIGHPASS_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_RGB_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_EN, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_DEPTH, 1);
- } else {
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_DEPTH, 1);
- }
- break;
- case 10:
- if (dither == AMDGPU_FMT_DITHER_ENABLE) {
- /* XXX sort out optimal dither settings */
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_FRAME_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_HIGHPASS_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_RGB_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_EN, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_DEPTH, 2);
- } else {
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_DEPTH, 2);
- }
- break;
- default:
- /* not needed */
- break;
- }
-
- WREG32(mmFMT_BIT_DEPTH_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-}
-
-
-/* display watermark setup */
-/**
- * dce_v11_0_line_buffer_adjust - Set up the line buffer
- *
- * @adev: amdgpu_device pointer
- * @amdgpu_crtc: the selected display controller
- * @mode: the current display mode on the selected display
- * controller
- *
- * Setup up the line buffer allocation for
- * the selected display controller (CIK).
- * Returns the line buffer size in pixels.
- */
-static u32 dce_v11_0_line_buffer_adjust(struct amdgpu_device *adev,
- struct amdgpu_crtc *amdgpu_crtc,
- struct drm_display_mode *mode)
-{
- u32 tmp, buffer_alloc, i, mem_cfg;
- u32 pipe_offset = amdgpu_crtc->crtc_id;
- /*
- * Line Buffer Setup
- * There are 6 line buffers, one for each display controllers.
- * There are 3 partitions per LB. Select the number of partitions
- * to enable based on the display width. For display widths larger
- * than 4096, you need use to use 2 display controllers and combine
- * them using the stereo blender.
- */
- if (amdgpu_crtc->base.enabled && mode) {
- if (mode->crtc_hdisplay < 1920) {
- mem_cfg = 1;
- buffer_alloc = 2;
- } else if (mode->crtc_hdisplay < 2560) {
- mem_cfg = 2;
- buffer_alloc = 2;
- } else if (mode->crtc_hdisplay < 4096) {
- mem_cfg = 0;
- buffer_alloc = (adev->flags & AMD_IS_APU) ? 2 : 4;
- } else {
- DRM_DEBUG_KMS("Mode too big for LB!\n");
- mem_cfg = 0;
- buffer_alloc = (adev->flags & AMD_IS_APU) ? 2 : 4;
- }
- } else {
- mem_cfg = 1;
- buffer_alloc = 0;
- }
-
- tmp = RREG32(mmLB_MEMORY_CTRL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, LB_MEMORY_CTRL, LB_MEMORY_CONFIG, mem_cfg);
- WREG32(mmLB_MEMORY_CTRL + amdgpu_crtc->crtc_offset, tmp);
-
- tmp = RREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset);
- tmp = REG_SET_FIELD(tmp, PIPE0_DMIF_BUFFER_CONTROL, DMIF_BUFFERS_ALLOCATED, buffer_alloc);
- WREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset, tmp);
-
- for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset);
- if (REG_GET_FIELD(tmp, PIPE0_DMIF_BUFFER_CONTROL, DMIF_BUFFERS_ALLOCATION_COMPLETED))
- break;
- udelay(1);
- }
-
- if (amdgpu_crtc->base.enabled && mode) {
- switch (mem_cfg) {
- case 0:
- default:
- return 4096 * 2;
- case 1:
- return 1920 * 2;
- case 2:
- return 2560 * 2;
- }
- }
-
- /* controller not enabled, so no lb used */
- return 0;
-}
-
-/**
- * cik_get_number_of_dram_channels - get the number of dram channels
- *
- * @adev: amdgpu_device pointer
- *
- * Look up the number of video ram channels (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the number of dram channels
- */
-static u32 cik_get_number_of_dram_channels(struct amdgpu_device *adev)
-{
- u32 tmp = RREG32(mmMC_SHARED_CHMAP);
-
- switch (REG_GET_FIELD(tmp, MC_SHARED_CHMAP, NOOFCHAN)) {
- case 0:
- default:
- return 1;
- case 1:
- return 2;
- case 2:
- return 4;
- case 3:
- return 8;
- case 4:
- return 3;
- case 5:
- return 6;
- case 6:
- return 10;
- case 7:
- return 12;
- case 8:
- return 16;
- }
-}
-
-struct dce10_wm_params {
- u32 dram_channels; /* number of dram channels */
- u32 yclk; /* bandwidth per dram data pin in kHz */
- u32 sclk; /* engine clock in kHz */
- u32 disp_clk; /* display clock in kHz */
- u32 src_width; /* viewport width */
- u32 active_time; /* active display time in ns */
- u32 blank_time; /* blank time in ns */
- bool interlaced; /* mode is interlaced */
- fixed20_12 vsc; /* vertical scale ratio */
- u32 num_heads; /* number of active crtcs */
- u32 bytes_per_pixel; /* bytes per pixel display + overlay */
- u32 lb_size; /* line buffer allocated to pipe */
- u32 vtaps; /* vertical scaler taps */
-};
-
-/**
- * dce_v11_0_dram_bandwidth - get the dram bandwidth
- *
- * @wm: watermark calculation data
- *
- * Calculate the raw dram bandwidth (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the dram bandwidth in MBytes/s
- */
-static u32 dce_v11_0_dram_bandwidth(struct dce10_wm_params *wm)
-{
- /* Calculate raw DRAM Bandwidth */
- fixed20_12 dram_efficiency; /* 0.7 */
- fixed20_12 yclk, dram_channels, bandwidth;
- fixed20_12 a;
-
- a.full = dfixed_const(1000);
- yclk.full = dfixed_const(wm->yclk);
- yclk.full = dfixed_div(yclk, a);
- dram_channels.full = dfixed_const(wm->dram_channels * 4);
- a.full = dfixed_const(10);
- dram_efficiency.full = dfixed_const(7);
- dram_efficiency.full = dfixed_div(dram_efficiency, a);
- bandwidth.full = dfixed_mul(dram_channels, yclk);
- bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
-
- return dfixed_trunc(bandwidth);
-}
-
-/**
- * dce_v11_0_dram_bandwidth_for_display - get the dram bandwidth for display
- *
- * @wm: watermark calculation data
- *
- * Calculate the dram bandwidth used for display (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the dram bandwidth for display in MBytes/s
- */
-static u32 dce_v11_0_dram_bandwidth_for_display(struct dce10_wm_params *wm)
-{
- /* Calculate DRAM Bandwidth and the part allocated to display. */
- fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
- fixed20_12 yclk, dram_channels, bandwidth;
- fixed20_12 a;
-
- a.full = dfixed_const(1000);
- yclk.full = dfixed_const(wm->yclk);
- yclk.full = dfixed_div(yclk, a);
- dram_channels.full = dfixed_const(wm->dram_channels * 4);
- a.full = dfixed_const(10);
- disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
- disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
- bandwidth.full = dfixed_mul(dram_channels, yclk);
- bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
-
- return dfixed_trunc(bandwidth);
-}
-
-/**
- * dce_v11_0_data_return_bandwidth - get the data return bandwidth
- *
- * @wm: watermark calculation data
- *
- * Calculate the data return bandwidth used for display (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the data return bandwidth in MBytes/s
- */
-static u32 dce_v11_0_data_return_bandwidth(struct dce10_wm_params *wm)
-{
- /* Calculate the display Data return Bandwidth */
- fixed20_12 return_efficiency; /* 0.8 */
- fixed20_12 sclk, bandwidth;
- fixed20_12 a;
-
- a.full = dfixed_const(1000);
- sclk.full = dfixed_const(wm->sclk);
- sclk.full = dfixed_div(sclk, a);
- a.full = dfixed_const(10);
- return_efficiency.full = dfixed_const(8);
- return_efficiency.full = dfixed_div(return_efficiency, a);
- a.full = dfixed_const(32);
- bandwidth.full = dfixed_mul(a, sclk);
- bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
-
- return dfixed_trunc(bandwidth);
-}
-
-/**
- * dce_v11_0_dmif_request_bandwidth - get the dmif bandwidth
- *
- * @wm: watermark calculation data
- *
- * Calculate the dmif bandwidth used for display (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the dmif bandwidth in MBytes/s
- */
-static u32 dce_v11_0_dmif_request_bandwidth(struct dce10_wm_params *wm)
-{
- /* Calculate the DMIF Request Bandwidth */
- fixed20_12 disp_clk_request_efficiency; /* 0.8 */
- fixed20_12 disp_clk, bandwidth;
- fixed20_12 a, b;
-
- a.full = dfixed_const(1000);
- disp_clk.full = dfixed_const(wm->disp_clk);
- disp_clk.full = dfixed_div(disp_clk, a);
- a.full = dfixed_const(32);
- b.full = dfixed_mul(a, disp_clk);
-
- a.full = dfixed_const(10);
- disp_clk_request_efficiency.full = dfixed_const(8);
- disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
-
- bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
-
- return dfixed_trunc(bandwidth);
-}
-
-/**
- * dce_v11_0_available_bandwidth - get the min available bandwidth
- *
- * @wm: watermark calculation data
- *
- * Calculate the min available bandwidth used for display (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the min available bandwidth in MBytes/s
- */
-static u32 dce_v11_0_available_bandwidth(struct dce10_wm_params *wm)
-{
- /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
- u32 dram_bandwidth = dce_v11_0_dram_bandwidth(wm);
- u32 data_return_bandwidth = dce_v11_0_data_return_bandwidth(wm);
- u32 dmif_req_bandwidth = dce_v11_0_dmif_request_bandwidth(wm);
-
- return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
-}
-
-/**
- * dce_v11_0_average_bandwidth - get the average available bandwidth
- *
- * @wm: watermark calculation data
- *
- * Calculate the average available bandwidth used for display (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the average available bandwidth in MBytes/s
- */
-static u32 dce_v11_0_average_bandwidth(struct dce10_wm_params *wm)
-{
- /* Calculate the display mode Average Bandwidth
- * DisplayMode should contain the source and destination dimensions,
- * timing, etc.
- */
- fixed20_12 bpp;
- fixed20_12 line_time;
- fixed20_12 src_width;
- fixed20_12 bandwidth;
- fixed20_12 a;
-
- a.full = dfixed_const(1000);
- line_time.full = dfixed_const(wm->active_time + wm->blank_time);
- line_time.full = dfixed_div(line_time, a);
- bpp.full = dfixed_const(wm->bytes_per_pixel);
- src_width.full = dfixed_const(wm->src_width);
- bandwidth.full = dfixed_mul(src_width, bpp);
- bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
- bandwidth.full = dfixed_div(bandwidth, line_time);
-
- return dfixed_trunc(bandwidth);
-}
-
-/**
- * dce_v11_0_latency_watermark - get the latency watermark
- *
- * @wm: watermark calculation data
- *
- * Calculate the latency watermark (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the latency watermark in ns
- */
-static u32 dce_v11_0_latency_watermark(struct dce10_wm_params *wm)
-{
- /* First calculate the latency in ns */
- u32 mc_latency = 2000; /* 2000 ns. */
- u32 available_bandwidth = dce_v11_0_available_bandwidth(wm);
- u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
- u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
- u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
- u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
- (wm->num_heads * cursor_line_pair_return_time);
- u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
- u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
- u32 tmp, dmif_size = 12288;
- fixed20_12 a, b, c;
-
- if (wm->num_heads == 0)
- return 0;
-
- a.full = dfixed_const(2);
- b.full = dfixed_const(1);
- if ((wm->vsc.full > a.full) ||
- ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
- (wm->vtaps >= 5) ||
- ((wm->vsc.full >= a.full) && wm->interlaced))
- max_src_lines_per_dst_line = 4;
- else
- max_src_lines_per_dst_line = 2;
-
- a.full = dfixed_const(available_bandwidth);
- b.full = dfixed_const(wm->num_heads);
- a.full = dfixed_div(a, b);
- tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
- tmp = min(dfixed_trunc(a), tmp);
-
- lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
-
- a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
- b.full = dfixed_const(1000);
- c.full = dfixed_const(lb_fill_bw);
- b.full = dfixed_div(c, b);
- a.full = dfixed_div(a, b);
- line_fill_time = dfixed_trunc(a);
-
- if (line_fill_time < wm->active_time)
- return latency;
- else
- return latency + (line_fill_time - wm->active_time);
-
-}
-
-/**
- * dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display - check
- * average and available dram bandwidth
- *
- * @wm: watermark calculation data
- *
- * Check if the display average bandwidth fits in the display
- * dram bandwidth (CIK).
- * Used for display watermark bandwidth calculations
- * Returns true if the display fits, false if not.
- */
-static bool dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display(struct dce10_wm_params *wm)
-{
- if (dce_v11_0_average_bandwidth(wm) <=
- (dce_v11_0_dram_bandwidth_for_display(wm) / wm->num_heads))
- return true;
- else
- return false;
-}
-
-/**
- * dce_v11_0_average_bandwidth_vs_available_bandwidth - check
- * average and available bandwidth
- *
- * @wm: watermark calculation data
- *
- * Check if the display average bandwidth fits in the display
- * available bandwidth (CIK).
- * Used for display watermark bandwidth calculations
- * Returns true if the display fits, false if not.
- */
-static bool dce_v11_0_average_bandwidth_vs_available_bandwidth(struct dce10_wm_params *wm)
-{
- if (dce_v11_0_average_bandwidth(wm) <=
- (dce_v11_0_available_bandwidth(wm) / wm->num_heads))
- return true;
- else
- return false;
-}
-
-/**
- * dce_v11_0_check_latency_hiding - check latency hiding
- *
- * @wm: watermark calculation data
- *
- * Check latency hiding (CIK).
- * Used for display watermark bandwidth calculations
- * Returns true if the display fits, false if not.
- */
-static bool dce_v11_0_check_latency_hiding(struct dce10_wm_params *wm)
-{
- u32 lb_partitions = wm->lb_size / wm->src_width;
- u32 line_time = wm->active_time + wm->blank_time;
- u32 latency_tolerant_lines;
- u32 latency_hiding;
- fixed20_12 a;
-
- a.full = dfixed_const(1);
- if (wm->vsc.full > a.full)
- latency_tolerant_lines = 1;
- else {
- if (lb_partitions <= (wm->vtaps + 1))
- latency_tolerant_lines = 1;
- else
- latency_tolerant_lines = 2;
- }
-
- latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
-
- if (dce_v11_0_latency_watermark(wm) <= latency_hiding)
- return true;
- else
- return false;
-}
-
-/**
- * dce_v11_0_program_watermarks - program display watermarks
- *
- * @adev: amdgpu_device pointer
- * @amdgpu_crtc: the selected display controller
- * @lb_size: line buffer size
- * @num_heads: number of display controllers in use
- *
- * Calculate and program the display watermarks for the
- * selected display controller (CIK).
- */
-static void dce_v11_0_program_watermarks(struct amdgpu_device *adev,
- struct amdgpu_crtc *amdgpu_crtc,
- u32 lb_size, u32 num_heads)
-{
- struct drm_display_mode *mode = &amdgpu_crtc->base.mode;
- struct dce10_wm_params wm_low, wm_high;
- u32 active_time;
- u32 line_time = 0;
- u32 latency_watermark_a = 0, latency_watermark_b = 0;
- u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
-
- if (amdgpu_crtc->base.enabled && num_heads && mode) {
- active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
- (u32)mode->clock);
- line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
- (u32)mode->clock);
- line_time = min(line_time, (u32)65535);
-
- /* watermark for high clocks */
- if (adev->pm.dpm_enabled) {
- wm_high.yclk =
- amdgpu_dpm_get_mclk(adev, false) * 10;
- wm_high.sclk =
- amdgpu_dpm_get_sclk(adev, false) * 10;
- } else {
- wm_high.yclk = adev->pm.current_mclk * 10;
- wm_high.sclk = adev->pm.current_sclk * 10;
- }
-
- wm_high.disp_clk = mode->clock;
- wm_high.src_width = mode->crtc_hdisplay;
- wm_high.active_time = active_time;
- wm_high.blank_time = line_time - wm_high.active_time;
- wm_high.interlaced = false;
- if (mode->flags & DRM_MODE_FLAG_INTERLACE)
- wm_high.interlaced = true;
- wm_high.vsc = amdgpu_crtc->vsc;
- wm_high.vtaps = 1;
- if (amdgpu_crtc->rmx_type != RMX_OFF)
- wm_high.vtaps = 2;
- wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
- wm_high.lb_size = lb_size;
- wm_high.dram_channels = cik_get_number_of_dram_channels(adev);
- wm_high.num_heads = num_heads;
-
- /* set for high clocks */
- latency_watermark_a = min(dce_v11_0_latency_watermark(&wm_high), (u32)65535);
-
- /* possibly force display priority to high */
- /* should really do this at mode validation time... */
- if (!dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
- !dce_v11_0_average_bandwidth_vs_available_bandwidth(&wm_high) ||
- !dce_v11_0_check_latency_hiding(&wm_high) ||
- (adev->mode_info.disp_priority == 2)) {
- DRM_DEBUG_KMS("force priority to high\n");
- }
-
- /* watermark for low clocks */
- if (adev->pm.dpm_enabled) {
- wm_low.yclk =
- amdgpu_dpm_get_mclk(adev, true) * 10;
- wm_low.sclk =
- amdgpu_dpm_get_sclk(adev, true) * 10;
- } else {
- wm_low.yclk = adev->pm.current_mclk * 10;
- wm_low.sclk = adev->pm.current_sclk * 10;
- }
-
- wm_low.disp_clk = mode->clock;
- wm_low.src_width = mode->crtc_hdisplay;
- wm_low.active_time = active_time;
- wm_low.blank_time = line_time - wm_low.active_time;
- wm_low.interlaced = false;
- if (mode->flags & DRM_MODE_FLAG_INTERLACE)
- wm_low.interlaced = true;
- wm_low.vsc = amdgpu_crtc->vsc;
- wm_low.vtaps = 1;
- if (amdgpu_crtc->rmx_type != RMX_OFF)
- wm_low.vtaps = 2;
- wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
- wm_low.lb_size = lb_size;
- wm_low.dram_channels = cik_get_number_of_dram_channels(adev);
- wm_low.num_heads = num_heads;
-
- /* set for low clocks */
- latency_watermark_b = min(dce_v11_0_latency_watermark(&wm_low), (u32)65535);
-
- /* possibly force display priority to high */
- /* should really do this at mode validation time... */
- if (!dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
- !dce_v11_0_average_bandwidth_vs_available_bandwidth(&wm_low) ||
- !dce_v11_0_check_latency_hiding(&wm_low) ||
- (adev->mode_info.disp_priority == 2)) {
- DRM_DEBUG_KMS("force priority to high\n");
- }
- lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
- }
-
- /* select wm A */
- wm_mask = RREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(wm_mask, DPG_WATERMARK_MASK_CONTROL, URGENCY_WATERMARK_MASK, 1);
- WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, tmp);
- tmp = RREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_LOW_WATERMARK, latency_watermark_a);
- tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_HIGH_WATERMARK, line_time);
- WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset, tmp);
- /* select wm B */
- tmp = REG_SET_FIELD(wm_mask, DPG_WATERMARK_MASK_CONTROL, URGENCY_WATERMARK_MASK, 2);
- WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, tmp);
- tmp = RREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_LOW_WATERMARK, latency_watermark_b);
- tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_HIGH_WATERMARK, line_time);
- WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset, tmp);
- /* restore original selection */
- WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, wm_mask);
-
- /* save values for DPM */
- amdgpu_crtc->line_time = line_time;
- amdgpu_crtc->wm_high = latency_watermark_a;
- amdgpu_crtc->wm_low = latency_watermark_b;
- /* Save number of lines the linebuffer leads before the scanout */
- amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
-}
-
-/**
- * dce_v11_0_bandwidth_update - program display watermarks
- *
- * @adev: amdgpu_device pointer
- *
- * Calculate and program the display watermarks and line
- * buffer allocation (CIK).
- */
-static void dce_v11_0_bandwidth_update(struct amdgpu_device *adev)
-{
- struct drm_display_mode *mode = NULL;
- u32 num_heads = 0, lb_size;
- int i;
-
- amdgpu_display_update_priority(adev);
-
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- if (adev->mode_info.crtcs[i]->base.enabled)
- num_heads++;
- }
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- mode = &adev->mode_info.crtcs[i]->base.mode;
- lb_size = dce_v11_0_line_buffer_adjust(adev, adev->mode_info.crtcs[i], mode);
- dce_v11_0_program_watermarks(adev, adev->mode_info.crtcs[i],
- lb_size, num_heads);
- }
-}
-
-static void dce_v11_0_audio_get_connected_pins(struct amdgpu_device *adev)
-{
- int i;
- u32 offset, tmp;
-
- for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
- offset = adev->mode_info.audio.pin[i].offset;
- tmp = RREG32_AUDIO_ENDPT(offset,
- ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT);
- if (((tmp &
- AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT__PORT_CONNECTIVITY_MASK) >>
- AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT__PORT_CONNECTIVITY__SHIFT) == 1)
- adev->mode_info.audio.pin[i].connected = false;
- else
- adev->mode_info.audio.pin[i].connected = true;
- }
-}
-
-static struct amdgpu_audio_pin *dce_v11_0_audio_get_pin(struct amdgpu_device *adev)
-{
- int i;
-
- dce_v11_0_audio_get_connected_pins(adev);
-
- for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
- if (adev->mode_info.audio.pin[i].connected)
- return &adev->mode_info.audio.pin[i];
- }
- DRM_ERROR("No connected audio pins found!\n");
- return NULL;
-}
-
-static void dce_v11_0_afmt_audio_select_pin(struct drm_encoder *encoder)
-{
- struct amdgpu_device *adev = drm_to_adev(encoder->dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- u32 tmp;
-
- if (!dig || !dig->afmt || !dig->afmt->pin)
- return;
-
- tmp = RREG32(mmAFMT_AUDIO_SRC_CONTROL + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_SRC_CONTROL, AFMT_AUDIO_SRC_SELECT, dig->afmt->pin->id);
- WREG32(mmAFMT_AUDIO_SRC_CONTROL + dig->afmt->offset, tmp);
-}
-
-static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder,
- struct drm_display_mode *mode)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- struct drm_connector *connector;
- struct drm_connector_list_iter iter;
- struct amdgpu_connector *amdgpu_connector = NULL;
- u32 tmp;
- int interlace = 0;
-
- if (!dig || !dig->afmt || !dig->afmt->pin)
- return;
-
- drm_connector_list_iter_begin(dev, &iter);
- drm_for_each_connector_iter(connector, &iter) {
- if (connector->encoder == encoder) {
- amdgpu_connector = to_amdgpu_connector(connector);
- break;
- }
- }
- drm_connector_list_iter_end(&iter);
-
- if (!amdgpu_connector) {
- DRM_ERROR("Couldn't find encoder's connector\n");
- return;
- }
-
- if (mode->flags & DRM_MODE_FLAG_INTERLACE)
- interlace = 1;
- if (connector->latency_present[interlace]) {
- tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
- VIDEO_LIPSYNC, connector->video_latency[interlace]);
- tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
- AUDIO_LIPSYNC, connector->audio_latency[interlace]);
- } else {
- tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
- VIDEO_LIPSYNC, 0);
- tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
- AUDIO_LIPSYNC, 0);
- }
- WREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
- ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, tmp);
-}
-
-static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- struct drm_connector *connector;
- struct drm_connector_list_iter iter;
- struct amdgpu_connector *amdgpu_connector = NULL;
- u32 tmp;
- u8 *sadb = NULL;
- int sad_count;
-
- if (!dig || !dig->afmt || !dig->afmt->pin)
- return;
-
- drm_connector_list_iter_begin(dev, &iter);
- drm_for_each_connector_iter(connector, &iter) {
- if (connector->encoder == encoder) {
- amdgpu_connector = to_amdgpu_connector(connector);
- break;
- }
- }
- drm_connector_list_iter_end(&iter);
-
- if (!amdgpu_connector) {
- DRM_ERROR("Couldn't find encoder's connector\n");
- return;
- }
-
- sad_count = drm_edid_to_speaker_allocation(amdgpu_connector_edid(connector), &sadb);
- if (sad_count < 0) {
- DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
- sad_count = 0;
- }
-
- /* program the speaker allocation */
- tmp = RREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
- ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER);
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
- DP_CONNECTION, 0);
- /* set HDMI mode */
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
- HDMI_CONNECTION, 1);
- if (sad_count)
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
- SPEAKER_ALLOCATION, sadb[0]);
- else
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
- SPEAKER_ALLOCATION, 5); /* stereo */
- WREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
- ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp);
-
- kfree(sadb);
-}
-
-static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- struct drm_connector *connector;
- struct drm_connector_list_iter iter;
- struct amdgpu_connector *amdgpu_connector = NULL;
- struct cea_sad *sads;
- int i, sad_count;
-
- static const u16 eld_reg_to_type[][2] = {
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, HDMI_AUDIO_CODING_TYPE_PCM },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1, HDMI_AUDIO_CODING_TYPE_AC3 },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2, HDMI_AUDIO_CODING_TYPE_MPEG1 },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3, HDMI_AUDIO_CODING_TYPE_MP3 },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4, HDMI_AUDIO_CODING_TYPE_MPEG2 },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5, HDMI_AUDIO_CODING_TYPE_AAC_LC },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6, HDMI_AUDIO_CODING_TYPE_DTS },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7, HDMI_AUDIO_CODING_TYPE_ATRAC },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9, HDMI_AUDIO_CODING_TYPE_EAC3 },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10, HDMI_AUDIO_CODING_TYPE_DTS_HD },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11, HDMI_AUDIO_CODING_TYPE_MLP },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO },
- };
-
- if (!dig || !dig->afmt || !dig->afmt->pin)
- return;
-
- drm_connector_list_iter_begin(dev, &iter);
- drm_for_each_connector_iter(connector, &iter) {
- if (connector->encoder == encoder) {
- amdgpu_connector = to_amdgpu_connector(connector);
- break;
- }
- }
- drm_connector_list_iter_end(&iter);
-
- if (!amdgpu_connector) {
- DRM_ERROR("Couldn't find encoder's connector\n");
- return;
- }
-
- sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
- if (sad_count < 0)
- DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
- if (sad_count <= 0)
- return;
- BUG_ON(!sads);
-
- for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
- u32 tmp = 0;
- u8 stereo_freqs = 0;
- int max_channels = -1;
- int j;
-
- for (j = 0; j < sad_count; j++) {
- struct cea_sad *sad = &sads[j];
-
- if (sad->format == eld_reg_to_type[i][1]) {
- if (sad->channels > max_channels) {
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- MAX_CHANNELS, sad->channels);
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- DESCRIPTOR_BYTE_2, sad->byte2);
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- SUPPORTED_FREQUENCIES, sad->freq);
- max_channels = sad->channels;
- }
-
- if (sad->format == HDMI_AUDIO_CODING_TYPE_PCM)
- stereo_freqs |= sad->freq;
- else
- break;
- }
- }
-
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- SUPPORTED_FREQUENCIES_STEREO, stereo_freqs);
- WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, eld_reg_to_type[i][0], tmp);
- }
-
- kfree(sads);
-}
-
-static void dce_v11_0_audio_enable(struct amdgpu_device *adev,
- struct amdgpu_audio_pin *pin,
- bool enable)
-{
- if (!pin)
- return;
-
- WREG32_AUDIO_ENDPT(pin->offset, ixAZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL,
- enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0);
-}
-
-static const u32 pin_offsets[] =
-{
- AUD0_REGISTER_OFFSET,
- AUD1_REGISTER_OFFSET,
- AUD2_REGISTER_OFFSET,
- AUD3_REGISTER_OFFSET,
- AUD4_REGISTER_OFFSET,
- AUD5_REGISTER_OFFSET,
- AUD6_REGISTER_OFFSET,
- AUD7_REGISTER_OFFSET,
-};
-
-static int dce_v11_0_audio_init(struct amdgpu_device *adev)
-{
- int i;
-
- if (!amdgpu_audio)
- return 0;
-
- adev->mode_info.audio.enabled = true;
-
- switch (adev->asic_type) {
- case CHIP_CARRIZO:
- case CHIP_STONEY:
- adev->mode_info.audio.num_pins = 7;
- break;
- case CHIP_POLARIS10:
- case CHIP_VEGAM:
- adev->mode_info.audio.num_pins = 8;
- break;
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- adev->mode_info.audio.num_pins = 6;
- break;
- default:
- return -EINVAL;
- }
-
- for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
- adev->mode_info.audio.pin[i].channels = -1;
- adev->mode_info.audio.pin[i].rate = -1;
- adev->mode_info.audio.pin[i].bits_per_sample = -1;
- adev->mode_info.audio.pin[i].status_bits = 0;
- adev->mode_info.audio.pin[i].category_code = 0;
- adev->mode_info.audio.pin[i].connected = false;
- adev->mode_info.audio.pin[i].offset = pin_offsets[i];
- adev->mode_info.audio.pin[i].id = i;
- /* disable audio. it will be set up later */
- /* XXX remove once we switch to ip funcs */
- dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
- }
-
- return 0;
-}
-
-static void dce_v11_0_audio_fini(struct amdgpu_device *adev)
-{
- int i;
-
- if (!amdgpu_audio)
- return;
-
- if (!adev->mode_info.audio.enabled)
- return;
-
- for (i = 0; i < adev->mode_info.audio.num_pins; i++)
- dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
-
- adev->mode_info.audio.enabled = false;
-}
-
-/*
- * update the N and CTS parameters for a given pixel clock rate
- */
-static void dce_v11_0_afmt_update_ACR(struct drm_encoder *encoder, uint32_t clock)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_afmt_acr acr = amdgpu_afmt_acr(clock);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- u32 tmp;
-
- tmp = RREG32(mmHDMI_ACR_32_0 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_0, HDMI_ACR_CTS_32, acr.cts_32khz);
- WREG32(mmHDMI_ACR_32_0 + dig->afmt->offset, tmp);
- tmp = RREG32(mmHDMI_ACR_32_1 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_1, HDMI_ACR_N_32, acr.n_32khz);
- WREG32(mmHDMI_ACR_32_1 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_ACR_44_0 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_0, HDMI_ACR_CTS_44, acr.cts_44_1khz);
- WREG32(mmHDMI_ACR_44_0 + dig->afmt->offset, tmp);
- tmp = RREG32(mmHDMI_ACR_44_1 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_1, HDMI_ACR_N_44, acr.n_44_1khz);
- WREG32(mmHDMI_ACR_44_1 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_ACR_48_0 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_0, HDMI_ACR_CTS_48, acr.cts_48khz);
- WREG32(mmHDMI_ACR_48_0 + dig->afmt->offset, tmp);
- tmp = RREG32(mmHDMI_ACR_48_1 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_1, HDMI_ACR_N_48, acr.n_48khz);
- WREG32(mmHDMI_ACR_48_1 + dig->afmt->offset, tmp);
-
-}
-
-/*
- * build a HDMI Video Info Frame
- */
-static void dce_v11_0_afmt_update_avi_infoframe(struct drm_encoder *encoder,
- void *buffer, size_t size)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- uint8_t *frame = buffer + 3;
- uint8_t *header = buffer;
-
- WREG32(mmAFMT_AVI_INFO0 + dig->afmt->offset,
- frame[0x0] | (frame[0x1] << 8) | (frame[0x2] << 16) | (frame[0x3] << 24));
- WREG32(mmAFMT_AVI_INFO1 + dig->afmt->offset,
- frame[0x4] | (frame[0x5] << 8) | (frame[0x6] << 16) | (frame[0x7] << 24));
- WREG32(mmAFMT_AVI_INFO2 + dig->afmt->offset,
- frame[0x8] | (frame[0x9] << 8) | (frame[0xA] << 16) | (frame[0xB] << 24));
- WREG32(mmAFMT_AVI_INFO3 + dig->afmt->offset,
- frame[0xC] | (frame[0xD] << 8) | (header[1] << 24));
-}
-
-static void dce_v11_0_audio_set_dto(struct drm_encoder *encoder, u32 clock)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
- u32 dto_phase = 24 * 1000;
- u32 dto_modulo = clock;
- u32 tmp;
-
- if (!dig || !dig->afmt)
- return;
-
- /* XXX two dtos; generally use dto0 for hdmi */
- /* Express [24MHz / target pixel clock] as an exact rational
- * number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE
- * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator
- */
- tmp = RREG32(mmDCCG_AUDIO_DTO_SOURCE);
- tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL,
- amdgpu_crtc->crtc_id);
- WREG32(mmDCCG_AUDIO_DTO_SOURCE, tmp);
- WREG32(mmDCCG_AUDIO_DTO0_PHASE, dto_phase);
- WREG32(mmDCCG_AUDIO_DTO0_MODULE, dto_modulo);
-}
-
-/*
- * update the info frames with the data from the current display mode
- */
-static void dce_v11_0_afmt_setmode(struct drm_encoder *encoder,
- struct drm_display_mode *mode)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
- u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE];
- struct hdmi_avi_infoframe frame;
- ssize_t err;
- u32 tmp;
- int bpc = 8;
-
- if (!dig || !dig->afmt)
- return;
-
- /* Silent, r600_hdmi_enable will raise WARN for us */
- if (!dig->afmt->enabled)
- return;
-
- /* hdmi deep color mode general control packets setup, if bpc > 8 */
- if (encoder->crtc) {
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
- bpc = amdgpu_crtc->bpc;
- }
-
- /* disable audio prior to setting up hw */
- dig->afmt->pin = dce_v11_0_audio_get_pin(adev);
- dce_v11_0_audio_enable(adev, dig->afmt->pin, false);
-
- dce_v11_0_audio_set_dto(encoder, mode->clock);
-
- tmp = RREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, 1);
- WREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset, tmp); /* send null packets when required */
-
- WREG32(mmAFMT_AUDIO_CRC_CONTROL + dig->afmt->offset, 0x1000);
-
- tmp = RREG32(mmHDMI_CONTROL + dig->afmt->offset);
- switch (bpc) {
- case 0:
- case 6:
- case 8:
- case 16:
- default:
- tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, 0);
- tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 0);
- DRM_DEBUG("%s: Disabling hdmi deep color for %d bpc.\n",
- connector->name, bpc);
- break;
- case 10:
- tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 1);
- DRM_DEBUG("%s: Enabling hdmi deep color 30 for 10 bpc.\n",
- connector->name);
- break;
- case 12:
- tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 2);
- DRM_DEBUG("%s: Enabling hdmi deep color 36 for 12 bpc.\n",
- connector->name);
- break;
- }
- WREG32(mmHDMI_CONTROL + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, 1); /* send null packets when required */
- tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, 1); /* send general control packets */
- tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, 1); /* send general control packets every frame */
- WREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset);
- /* enable audio info frames (frames won't be set until audio is enabled) */
- tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1);
- /* required for audio info values to be updated */
- tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_CONT, 1);
- WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset);
- /* required for audio info values to be updated */
- tmp = REG_SET_FIELD(tmp, AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, 1);
- WREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset);
- /* anything other than 0 */
- tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, 2);
- WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp);
-
- WREG32(mmHDMI_GC + dig->afmt->offset, 0); /* unset HDMI_GC_AVMUTE */
-
- tmp = RREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset);
- /* set the default audio delay */
- tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, 1);
- /* should be suffient for all audio modes and small enough for all hblanks */
- tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_PACKETS_PER_LINE, 3);
- WREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
- /* allow 60958 channel status fields to be updated */
- tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, 1);
- WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset);
- if (bpc > 8)
- /* clear SW CTS value */
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, 0);
- else
- /* select SW CTS value */
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, 1);
- /* allow hw to sent ACR packets when required */
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, 1);
- WREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset, tmp);
-
- dce_v11_0_afmt_update_ACR(encoder, mode->clock);
-
- tmp = RREG32(mmAFMT_60958_0 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_0, AFMT_60958_CS_CHANNEL_NUMBER_L, 1);
- WREG32(mmAFMT_60958_0 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmAFMT_60958_1 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_1, AFMT_60958_CS_CHANNEL_NUMBER_R, 2);
- WREG32(mmAFMT_60958_1 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmAFMT_60958_2 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_2, 3);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_3, 4);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_4, 5);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_5, 6);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_6, 7);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_7, 8);
- WREG32(mmAFMT_60958_2 + dig->afmt->offset, tmp);
-
- dce_v11_0_audio_write_speaker_allocation(encoder);
-
- WREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + dig->afmt->offset,
- (0xff << AFMT_AUDIO_PACKET_CONTROL2__AFMT_AUDIO_CHANNEL_ENABLE__SHIFT));
-
- dce_v11_0_afmt_audio_select_pin(encoder);
- dce_v11_0_audio_write_sad_regs(encoder);
- dce_v11_0_audio_write_latency_fields(encoder, mode);
-
- err = drm_hdmi_avi_infoframe_from_display_mode(&frame, connector, mode);
- if (err < 0) {
- DRM_ERROR("failed to setup AVI infoframe: %zd\n", err);
- return;
- }
-
- err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer));
- if (err < 0) {
- DRM_ERROR("failed to pack AVI infoframe: %zd\n", err);
- return;
- }
-
- dce_v11_0_afmt_update_avi_infoframe(encoder, buffer, sizeof(buffer));
-
- tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset);
- /* enable AVI info frames */
- tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_SEND, 1);
- /* required for audio info values to be updated */
- tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_CONT, 1);
- WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, HDMI_AVI_INFO_LINE, 2);
- WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
- /* send audio packets */
- tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 1);
- WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
-
- WREG32(mmAFMT_RAMP_CONTROL0 + dig->afmt->offset, 0x00FFFFFF);
- WREG32(mmAFMT_RAMP_CONTROL1 + dig->afmt->offset, 0x007FFFFF);
- WREG32(mmAFMT_RAMP_CONTROL2 + dig->afmt->offset, 0x00000001);
- WREG32(mmAFMT_RAMP_CONTROL3 + dig->afmt->offset, 0x00000001);
-
- /* enable audio after to setting up hw */
- dce_v11_0_audio_enable(adev, dig->afmt->pin, true);
-}
-
-static void dce_v11_0_afmt_enable(struct drm_encoder *encoder, bool enable)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
-
- if (!dig || !dig->afmt)
- return;
-
- /* Silent, r600_hdmi_enable will raise WARN for us */
- if (enable && dig->afmt->enabled)
- return;
- if (!enable && !dig->afmt->enabled)
- return;
-
- if (!enable && dig->afmt->pin) {
- dce_v11_0_audio_enable(adev, dig->afmt->pin, false);
- dig->afmt->pin = NULL;
- }
-
- dig->afmt->enabled = enable;
-
- DRM_DEBUG("%sabling AFMT interface @ 0x%04X for encoder 0x%x\n",
- enable ? "En" : "Dis", dig->afmt->offset, amdgpu_encoder->encoder_id);
-}
-
-static int dce_v11_0_afmt_init(struct amdgpu_device *adev)
-{
- int i;
-
- for (i = 0; i < adev->mode_info.num_dig; i++)
- adev->mode_info.afmt[i] = NULL;
-
- /* DCE11 has audio blocks tied to DIG encoders */
- for (i = 0; i < adev->mode_info.num_dig; i++) {
- adev->mode_info.afmt[i] = kzalloc(sizeof(struct amdgpu_afmt), GFP_KERNEL);
- if (adev->mode_info.afmt[i]) {
- adev->mode_info.afmt[i]->offset = dig_offsets[i];
- adev->mode_info.afmt[i]->id = i;
- } else {
- int j;
- for (j = 0; j < i; j++) {
- kfree(adev->mode_info.afmt[j]);
- adev->mode_info.afmt[j] = NULL;
- }
- return -ENOMEM;
- }
- }
- return 0;
-}
-
-static void dce_v11_0_afmt_fini(struct amdgpu_device *adev)
-{
- int i;
-
- for (i = 0; i < adev->mode_info.num_dig; i++) {
- kfree(adev->mode_info.afmt[i]);
- adev->mode_info.afmt[i] = NULL;
- }
-}
-
-static const u32 vga_control_regs[6] =
-{
- mmD1VGA_CONTROL,
- mmD2VGA_CONTROL,
- mmD3VGA_CONTROL,
- mmD4VGA_CONTROL,
- mmD5VGA_CONTROL,
- mmD6VGA_CONTROL,
-};
-
-static void dce_v11_0_vga_enable(struct drm_crtc *crtc, bool enable)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- u32 vga_control;
-
- vga_control = RREG32(vga_control_regs[amdgpu_crtc->crtc_id]) & ~1;
- if (enable)
- WREG32(vga_control_regs[amdgpu_crtc->crtc_id], vga_control | 1);
- else
- WREG32(vga_control_regs[amdgpu_crtc->crtc_id], vga_control);
-}
-
-static void dce_v11_0_grph_enable(struct drm_crtc *crtc, bool enable)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
-
- if (enable)
- WREG32(mmGRPH_ENABLE + amdgpu_crtc->crtc_offset, 1);
- else
- WREG32(mmGRPH_ENABLE + amdgpu_crtc->crtc_offset, 0);
-}
-
-static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
- struct drm_framebuffer *fb,
- int x, int y, int atomic)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct drm_framebuffer *target_fb;
- struct drm_gem_object *obj;
- struct amdgpu_bo *abo;
- uint64_t fb_location, tiling_flags;
- uint32_t fb_format, fb_pitch_pixels;
- u32 fb_swap = REG_SET_FIELD(0, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP, ENDIAN_NONE);
- u32 pipe_config;
- u32 tmp, viewport_w, viewport_h;
- int r;
- bool bypass_lut = false;
-
- /* no fb bound */
- if (!atomic && !crtc->primary->fb) {
- DRM_DEBUG_KMS("No FB bound\n");
- return 0;
- }
-
- if (atomic)
- target_fb = fb;
- else
- target_fb = crtc->primary->fb;
-
- /* If atomic, assume fb object is pinned & idle & fenced and
- * just update base pointers
- */
- obj = target_fb->obj[0];
- abo = gem_to_amdgpu_bo(obj);
- r = amdgpu_bo_reserve(abo, false);
- if (unlikely(r != 0))
- return r;
-
- if (!atomic) {
- r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
- if (unlikely(r != 0)) {
- amdgpu_bo_unreserve(abo);
- return -EINVAL;
- }
- }
- fb_location = amdgpu_bo_gpu_offset(abo);
-
- amdgpu_bo_get_tiling_flags(abo, &tiling_flags);
- amdgpu_bo_unreserve(abo);
-
- pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
-
- switch (target_fb->format->format) {
- case DRM_FORMAT_C8:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 0);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
- break;
- case DRM_FORMAT_XRGB4444:
- case DRM_FORMAT_ARGB4444:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 2);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN16);
-#endif
- break;
- case DRM_FORMAT_XRGB1555:
- case DRM_FORMAT_ARGB1555:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN16);
-#endif
- break;
- case DRM_FORMAT_BGRX5551:
- case DRM_FORMAT_BGRA5551:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 5);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN16);
-#endif
- break;
- case DRM_FORMAT_RGB565:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 1);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN16);
-#endif
- break;
- case DRM_FORMAT_XRGB8888:
- case DRM_FORMAT_ARGB8888:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN32);
-#endif
- break;
- case DRM_FORMAT_XRGB2101010:
- case DRM_FORMAT_ARGB2101010:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 1);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN32);
-#endif
- /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
- bypass_lut = true;
- break;
- case DRM_FORMAT_BGRX1010102:
- case DRM_FORMAT_BGRA1010102:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 4);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN32);
-#endif
- /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
- bypass_lut = true;
- break;
- case DRM_FORMAT_XBGR8888:
- case DRM_FORMAT_ABGR8888:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_RED_CROSSBAR, 2);
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_BLUE_CROSSBAR, 2);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN32);
-#endif
- break;
- default:
- DRM_ERROR("Unsupported screen format %p4cc\n",
- &target_fb->format->format);
- return -EINVAL;
- }
-
- if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_2D_TILED_THIN1) {
- unsigned bankw, bankh, mtaspect, tile_split, num_banks;
-
- bankw = AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
- bankh = AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
- mtaspect = AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
- tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT);
- num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
-
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_NUM_BANKS, num_banks);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_ARRAY_MODE,
- ARRAY_2D_TILED_THIN1);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_TILE_SPLIT,
- tile_split);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_BANK_WIDTH, bankw);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_BANK_HEIGHT, bankh);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_MACRO_TILE_ASPECT,
- mtaspect);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_MICRO_TILE_MODE,
- ADDR_SURF_MICRO_TILING_DISPLAY);
- } else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_1D_TILED_THIN1) {
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_ARRAY_MODE,
- ARRAY_1D_TILED_THIN1);
- }
-
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_PIPE_CONFIG,
- pipe_config);
-
- dce_v11_0_vga_enable(crtc, false);
-
- /* Make sure surface address is updated at vertical blank rather than
- * horizontal blank
- */
- tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,
- GRPH_SURFACE_UPDATE_H_RETRACE_EN, 0);
- WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
- upper_32_bits(fb_location));
- WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
- upper_32_bits(fb_location));
- WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
- (u32)fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK);
- WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
- (u32) fb_location & GRPH_SECONDARY_SURFACE_ADDRESS__GRPH_SECONDARY_SURFACE_ADDRESS_MASK);
- WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
- WREG32(mmGRPH_SWAP_CNTL + amdgpu_crtc->crtc_offset, fb_swap);
-
- /*
- * The LUT only has 256 slots for indexing by a 8 bpc fb. Bypass the LUT
- * for > 8 bpc scanout to avoid truncation of fb indices to 8 msb's, to
- * retain the full precision throughout the pipeline.
- */
- tmp = RREG32(mmGRPH_LUT_10BIT_BYPASS + amdgpu_crtc->crtc_offset);
- if (bypass_lut)
- tmp = REG_SET_FIELD(tmp, GRPH_LUT_10BIT_BYPASS, GRPH_LUT_10BIT_BYPASS_EN, 1);
- else
- tmp = REG_SET_FIELD(tmp, GRPH_LUT_10BIT_BYPASS, GRPH_LUT_10BIT_BYPASS_EN, 0);
- WREG32(mmGRPH_LUT_10BIT_BYPASS + amdgpu_crtc->crtc_offset, tmp);
-
- if (bypass_lut)
- DRM_DEBUG_KMS("Bypassing hardware LUT due to 10 bit fb scanout.\n");
-
- WREG32(mmGRPH_SURFACE_OFFSET_X + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmGRPH_SURFACE_OFFSET_Y + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmGRPH_X_START + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmGRPH_Y_START + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmGRPH_X_END + amdgpu_crtc->crtc_offset, target_fb->width);
- WREG32(mmGRPH_Y_END + amdgpu_crtc->crtc_offset, target_fb->height);
-
- fb_pitch_pixels = target_fb->pitches[0] / target_fb->format->cpp[0];
- WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, fb_pitch_pixels);
-
- dce_v11_0_grph_enable(crtc, true);
-
- WREG32(mmLB_DESKTOP_HEIGHT + amdgpu_crtc->crtc_offset,
- target_fb->height);
-
- x &= ~3;
- y &= ~1;
- WREG32(mmVIEWPORT_START + amdgpu_crtc->crtc_offset,
- (x << 16) | y);
- viewport_w = crtc->mode.hdisplay;
- viewport_h = (crtc->mode.vdisplay + 1) & ~1;
- WREG32(mmVIEWPORT_SIZE + amdgpu_crtc->crtc_offset,
- (viewport_w << 16) | viewport_h);
-
- /* set pageflip to happen anywhere in vblank interval */
- WREG32(mmCRTC_MASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
-
- if (!atomic && fb && fb != crtc->primary->fb) {
- abo = gem_to_amdgpu_bo(fb->obj[0]);
- r = amdgpu_bo_reserve(abo, true);
- if (unlikely(r != 0))
- return r;
- amdgpu_bo_unpin(abo);
- amdgpu_bo_unreserve(abo);
- }
-
- /* Bytes per pixel may have changed */
- dce_v11_0_bandwidth_update(adev);
-
- return 0;
-}
-
-static void dce_v11_0_set_interleave(struct drm_crtc *crtc,
- struct drm_display_mode *mode)
-{
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- u32 tmp;
-
- tmp = RREG32(mmLB_DATA_FORMAT + amdgpu_crtc->crtc_offset);
- if (mode->flags & DRM_MODE_FLAG_INTERLACE)
- tmp = REG_SET_FIELD(tmp, LB_DATA_FORMAT, INTERLEAVE_EN, 1);
- else
- tmp = REG_SET_FIELD(tmp, LB_DATA_FORMAT, INTERLEAVE_EN, 0);
- WREG32(mmLB_DATA_FORMAT + amdgpu_crtc->crtc_offset, tmp);
-}
-
-static void dce_v11_0_crtc_load_lut(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- u16 *r, *g, *b;
- int i;
- u32 tmp;
-
- DRM_DEBUG_KMS("%d\n", amdgpu_crtc->crtc_id);
-
- tmp = RREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, INPUT_CSC_CONTROL, INPUT_CSC_GRPH_MODE, 0);
- WREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- tmp = RREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, PRESCALE_GRPH_CONTROL, GRPH_PRESCALE_BYPASS, 1);
- WREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- tmp = RREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, INPUT_GAMMA_CONTROL, GRPH_INPUT_GAMMA_MODE, 0);
- WREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- WREG32(mmDC_LUT_CONTROL + amdgpu_crtc->crtc_offset, 0);
-
- WREG32(mmDC_LUT_BLACK_OFFSET_BLUE + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmDC_LUT_BLACK_OFFSET_GREEN + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmDC_LUT_BLACK_OFFSET_RED + amdgpu_crtc->crtc_offset, 0);
-
- WREG32(mmDC_LUT_WHITE_OFFSET_BLUE + amdgpu_crtc->crtc_offset, 0xffff);
- WREG32(mmDC_LUT_WHITE_OFFSET_GREEN + amdgpu_crtc->crtc_offset, 0xffff);
- WREG32(mmDC_LUT_WHITE_OFFSET_RED + amdgpu_crtc->crtc_offset, 0xffff);
-
- WREG32(mmDC_LUT_RW_MODE + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmDC_LUT_WRITE_EN_MASK + amdgpu_crtc->crtc_offset, 0x00000007);
-
- WREG32(mmDC_LUT_RW_INDEX + amdgpu_crtc->crtc_offset, 0);
- r = crtc->gamma_store;
- g = r + crtc->gamma_size;
- b = g + crtc->gamma_size;
- for (i = 0; i < 256; i++) {
- WREG32(mmDC_LUT_30_COLOR + amdgpu_crtc->crtc_offset,
- ((*r++ & 0xffc0) << 14) |
- ((*g++ & 0xffc0) << 4) |
- (*b++ >> 6));
- }
-
- tmp = RREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, DEGAMMA_CONTROL, GRPH_DEGAMMA_MODE, 0);
- tmp = REG_SET_FIELD(tmp, DEGAMMA_CONTROL, CURSOR_DEGAMMA_MODE, 0);
- tmp = REG_SET_FIELD(tmp, DEGAMMA_CONTROL, CURSOR2_DEGAMMA_MODE, 0);
- WREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- tmp = RREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, GAMUT_REMAP_CONTROL, GRPH_GAMUT_REMAP_MODE, 0);
- WREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- tmp = RREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, REGAMMA_CONTROL, GRPH_REGAMMA_MODE, 0);
- WREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- tmp = RREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, OUTPUT_CSC_CONTROL, OUTPUT_CSC_GRPH_MODE, 0);
- WREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- /* XXX match this to the depth of the crtc fmt block, move to modeset? */
- WREG32(mmDENORM_CONTROL + amdgpu_crtc->crtc_offset, 0);
- /* XXX this only needs to be programmed once per crtc at startup,
- * not sure where the best place for it is
- */
- tmp = RREG32(mmALPHA_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, ALPHA_CONTROL, CURSOR_ALPHA_BLND_ENA, 1);
- WREG32(mmALPHA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-}
-
-static int dce_v11_0_pick_dig_encoder(struct drm_encoder *encoder)
-{
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
-
- switch (amdgpu_encoder->encoder_id) {
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
- if (dig->linkb)
- return 1;
- else
- return 0;
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
- if (dig->linkb)
- return 3;
- else
- return 2;
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
- if (dig->linkb)
- return 5;
- else
- return 4;
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
- return 6;
- default:
- DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id);
- return 0;
- }
-}
-
-/**
- * dce_v11_0_pick_pll - Allocate a PPLL for use by the crtc.
- *
- * @crtc: drm crtc
- *
- * Returns the PPLL (Pixel PLL) to be used by the crtc. For DP monitors
- * a single PPLL can be used for all DP crtcs/encoders. For non-DP
- * monitors a dedicated PPLL must be used. If a particular board has
- * an external DP PLL, return ATOM_PPLL_INVALID to skip PLL programming
- * as there is no need to program the PLL itself. If we are not able to
- * allocate a PLL, return ATOM_PPLL_INVALID to skip PLL programming to
- * avoid messing up an existing monitor.
- *
- * Asic specific PLL information
- *
- * DCE 10.x
- * Tonga
- * - PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP)
- * CI
- * - PPLL0, PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP) and DAC
- *
- */
-static u32 dce_v11_0_pick_pll(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- u32 pll_in_use;
- int pll;
-
- if ((adev->asic_type == CHIP_POLARIS10) ||
- (adev->asic_type == CHIP_POLARIS11) ||
- (adev->asic_type == CHIP_POLARIS12) ||
- (adev->asic_type == CHIP_VEGAM)) {
- struct amdgpu_encoder *amdgpu_encoder =
- to_amdgpu_encoder(amdgpu_crtc->encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
-
- if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder)))
- return ATOM_DP_DTO;
-
- switch (amdgpu_encoder->encoder_id) {
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
- if (dig->linkb)
- return ATOM_COMBOPHY_PLL1;
- else
- return ATOM_COMBOPHY_PLL0;
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
- if (dig->linkb)
- return ATOM_COMBOPHY_PLL3;
- else
- return ATOM_COMBOPHY_PLL2;
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
- if (dig->linkb)
- return ATOM_COMBOPHY_PLL5;
- else
- return ATOM_COMBOPHY_PLL4;
- default:
- DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id);
- return ATOM_PPLL_INVALID;
- }
- }
-
- if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder))) {
- if (adev->clock.dp_extclk)
- /* skip PPLL programming if using ext clock */
- return ATOM_PPLL_INVALID;
- else {
- /* use the same PPLL for all DP monitors */
- pll = amdgpu_pll_get_shared_dp_ppll(crtc);
- if (pll != ATOM_PPLL_INVALID)
- return pll;
- }
- } else {
- /* use the same PPLL for all monitors with the same clock */
- pll = amdgpu_pll_get_shared_nondp_ppll(crtc);
- if (pll != ATOM_PPLL_INVALID)
- return pll;
- }
-
- /* XXX need to determine what plls are available on each DCE11 part */
- pll_in_use = amdgpu_pll_get_use_mask(crtc);
- if (adev->flags & AMD_IS_APU) {
- if (!(pll_in_use & (1 << ATOM_PPLL1)))
- return ATOM_PPLL1;
- if (!(pll_in_use & (1 << ATOM_PPLL0)))
- return ATOM_PPLL0;
- DRM_ERROR("unable to allocate a PPLL\n");
- return ATOM_PPLL_INVALID;
- } else {
- if (!(pll_in_use & (1 << ATOM_PPLL2)))
- return ATOM_PPLL2;
- if (!(pll_in_use & (1 << ATOM_PPLL1)))
- return ATOM_PPLL1;
- if (!(pll_in_use & (1 << ATOM_PPLL0)))
- return ATOM_PPLL0;
- DRM_ERROR("unable to allocate a PPLL\n");
- return ATOM_PPLL_INVALID;
- }
- return ATOM_PPLL_INVALID;
-}
-
-static void dce_v11_0_lock_cursor(struct drm_crtc *crtc, bool lock)
-{
- struct amdgpu_device *adev = drm_to_adev(crtc->dev);
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- uint32_t cur_lock;
-
- cur_lock = RREG32(mmCUR_UPDATE + amdgpu_crtc->crtc_offset);
- if (lock)
- cur_lock = REG_SET_FIELD(cur_lock, CUR_UPDATE, CURSOR_UPDATE_LOCK, 1);
- else
- cur_lock = REG_SET_FIELD(cur_lock, CUR_UPDATE, CURSOR_UPDATE_LOCK, 0);
- WREG32(mmCUR_UPDATE + amdgpu_crtc->crtc_offset, cur_lock);
-}
-
-static void dce_v11_0_hide_cursor(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct amdgpu_device *adev = drm_to_adev(crtc->dev);
- u32 tmp;
-
- tmp = RREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_EN, 0);
- WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-}
-
-static void dce_v11_0_show_cursor(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct amdgpu_device *adev = drm_to_adev(crtc->dev);
- u32 tmp;
-
- WREG32(mmCUR_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
- upper_32_bits(amdgpu_crtc->cursor_addr));
- WREG32(mmCUR_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
- lower_32_bits(amdgpu_crtc->cursor_addr));
-
- tmp = RREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_EN, 1);
- tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_MODE, 2);
- WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-}
-
-static int dce_v11_0_cursor_move_locked(struct drm_crtc *crtc,
- int x, int y)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct amdgpu_device *adev = drm_to_adev(crtc->dev);
- int xorigin = 0, yorigin = 0;
-
- amdgpu_crtc->cursor_x = x;
- amdgpu_crtc->cursor_y = y;
-
- /* avivo cursor are offset into the total surface */
- x += crtc->x;
- y += crtc->y;
- DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y);
-
- if (x < 0) {
- xorigin = min(-x, amdgpu_crtc->max_cursor_width - 1);
- x = 0;
- }
- if (y < 0) {
- yorigin = min(-y, amdgpu_crtc->max_cursor_height - 1);
- y = 0;
- }
-
- WREG32(mmCUR_POSITION + amdgpu_crtc->crtc_offset, (x << 16) | y);
- WREG32(mmCUR_HOT_SPOT + amdgpu_crtc->crtc_offset, (xorigin << 16) | yorigin);
- WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset,
- ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1));
-
- return 0;
-}
-
-static int dce_v11_0_crtc_cursor_move(struct drm_crtc *crtc,
- int x, int y)
-{
- int ret;
-
- dce_v11_0_lock_cursor(crtc, true);
- ret = dce_v11_0_cursor_move_locked(crtc, x, y);
- dce_v11_0_lock_cursor(crtc, false);
-
- return ret;
-}
-
-static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc,
- struct drm_file *file_priv,
- uint32_t handle,
- uint32_t width,
- uint32_t height,
- int32_t hot_x,
- int32_t hot_y)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_gem_object *obj;
- struct amdgpu_bo *aobj;
- int ret;
-
- if (!handle) {
- /* turn off cursor */
- dce_v11_0_hide_cursor(crtc);
- obj = NULL;
- goto unpin;
- }
-
- if ((width > amdgpu_crtc->max_cursor_width) ||
- (height > amdgpu_crtc->max_cursor_height)) {
- DRM_ERROR("bad cursor width or height %d x %d\n", width, height);
- return -EINVAL;
- }
-
- obj = drm_gem_object_lookup(file_priv, handle);
- if (!obj) {
- DRM_ERROR("Cannot find cursor object %x for crtc %d\n", handle, amdgpu_crtc->crtc_id);
- return -ENOENT;
- }
-
- aobj = gem_to_amdgpu_bo(obj);
- ret = amdgpu_bo_reserve(aobj, false);
- if (ret != 0) {
- drm_gem_object_put(obj);
- return ret;
- }
-
- ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
- amdgpu_bo_unreserve(aobj);
- if (ret) {
- DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret);
- drm_gem_object_put(obj);
- return ret;
- }
- amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
-
- dce_v11_0_lock_cursor(crtc, true);
-
- if (width != amdgpu_crtc->cursor_width ||
- height != amdgpu_crtc->cursor_height ||
- hot_x != amdgpu_crtc->cursor_hot_x ||
- hot_y != amdgpu_crtc->cursor_hot_y) {
- int x, y;
-
- x = amdgpu_crtc->cursor_x + amdgpu_crtc->cursor_hot_x - hot_x;
- y = amdgpu_crtc->cursor_y + amdgpu_crtc->cursor_hot_y - hot_y;
-
- dce_v11_0_cursor_move_locked(crtc, x, y);
-
- amdgpu_crtc->cursor_width = width;
- amdgpu_crtc->cursor_height = height;
- amdgpu_crtc->cursor_hot_x = hot_x;
- amdgpu_crtc->cursor_hot_y = hot_y;
- }
-
- dce_v11_0_show_cursor(crtc);
- dce_v11_0_lock_cursor(crtc, false);
-
-unpin:
- if (amdgpu_crtc->cursor_bo) {
- struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
- ret = amdgpu_bo_reserve(aobj, true);
- if (likely(ret == 0)) {
- amdgpu_bo_unpin(aobj);
- amdgpu_bo_unreserve(aobj);
- }
- drm_gem_object_put(amdgpu_crtc->cursor_bo);
- }
-
- amdgpu_crtc->cursor_bo = obj;
- return 0;
-}
-
-static void dce_v11_0_cursor_reset(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-
- if (amdgpu_crtc->cursor_bo) {
- dce_v11_0_lock_cursor(crtc, true);
-
- dce_v11_0_cursor_move_locked(crtc, amdgpu_crtc->cursor_x,
- amdgpu_crtc->cursor_y);
-
- dce_v11_0_show_cursor(crtc);
-
- dce_v11_0_lock_cursor(crtc, false);
- }
-}
-
-static int dce_v11_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
- u16 *blue, uint32_t size,
- struct drm_modeset_acquire_ctx *ctx)
-{
- dce_v11_0_crtc_load_lut(crtc);
-
- return 0;
-}
-
-static void dce_v11_0_crtc_destroy(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-
- drm_crtc_cleanup(crtc);
- kfree(amdgpu_crtc);
-}
-
-static const struct drm_crtc_funcs dce_v11_0_crtc_funcs = {
- .cursor_set2 = dce_v11_0_crtc_cursor_set2,
- .cursor_move = dce_v11_0_crtc_cursor_move,
- .gamma_set = dce_v11_0_crtc_gamma_set,
- .set_config = amdgpu_display_crtc_set_config,
- .destroy = dce_v11_0_crtc_destroy,
- .page_flip_target = amdgpu_display_crtc_page_flip_target,
- .get_vblank_counter = amdgpu_get_vblank_counter_kms,
- .enable_vblank = amdgpu_enable_vblank_kms,
- .disable_vblank = amdgpu_disable_vblank_kms,
- .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp,
-};
-
-static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode)
-{
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- unsigned type;
-
- switch (mode) {
- case DRM_MODE_DPMS_ON:
- amdgpu_crtc->enabled = true;
- amdgpu_atombios_crtc_enable(crtc, ATOM_ENABLE);
- dce_v11_0_vga_enable(crtc, true);
- amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
- dce_v11_0_vga_enable(crtc, false);
- /* Make sure VBLANK and PFLIP interrupts are still enabled */
- type = amdgpu_display_crtc_idx_to_irq_type(adev,
- amdgpu_crtc->crtc_id);
- amdgpu_irq_update(adev, &adev->crtc_irq, type);
- amdgpu_irq_update(adev, &adev->pageflip_irq, type);
- drm_crtc_vblank_on(crtc);
- dce_v11_0_crtc_load_lut(crtc);
- break;
- case DRM_MODE_DPMS_STANDBY:
- case DRM_MODE_DPMS_SUSPEND:
- case DRM_MODE_DPMS_OFF:
- drm_crtc_vblank_off(crtc);
- if (amdgpu_crtc->enabled) {
- dce_v11_0_vga_enable(crtc, true);
- amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE);
- dce_v11_0_vga_enable(crtc, false);
- }
- amdgpu_atombios_crtc_enable(crtc, ATOM_DISABLE);
- amdgpu_crtc->enabled = false;
- break;
- }
- /* adjust pm to dpms */
- amdgpu_pm_compute_clocks(adev);
-}
-
-static void dce_v11_0_crtc_prepare(struct drm_crtc *crtc)
-{
- /* disable crtc pair power gating before programming */
- amdgpu_atombios_crtc_powergate(crtc, ATOM_DISABLE);
- amdgpu_atombios_crtc_lock(crtc, ATOM_ENABLE);
- dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
-}
-
-static void dce_v11_0_crtc_commit(struct drm_crtc *crtc)
-{
- dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_ON);
- amdgpu_atombios_crtc_lock(crtc, ATOM_DISABLE);
-}
-
-static void dce_v11_0_crtc_disable(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_atom_ss ss;
- int i;
-
- dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
- if (crtc->primary->fb) {
- int r;
- struct amdgpu_bo *abo;
-
- abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
- r = amdgpu_bo_reserve(abo, true);
- if (unlikely(r))
- DRM_ERROR("failed to reserve abo before unpin\n");
- else {
- amdgpu_bo_unpin(abo);
- amdgpu_bo_unreserve(abo);
- }
- }
- /* disable the GRPH */
- dce_v11_0_grph_enable(crtc, false);
-
- amdgpu_atombios_crtc_powergate(crtc, ATOM_ENABLE);
-
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- if (adev->mode_info.crtcs[i] &&
- adev->mode_info.crtcs[i]->enabled &&
- i != amdgpu_crtc->crtc_id &&
- amdgpu_crtc->pll_id == adev->mode_info.crtcs[i]->pll_id) {
- /* one other crtc is using this pll don't turn
- * off the pll
- */
- goto done;
- }
- }
-
- switch (amdgpu_crtc->pll_id) {
- case ATOM_PPLL0:
- case ATOM_PPLL1:
- case ATOM_PPLL2:
- /* disable the ppll */
- amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id, amdgpu_crtc->pll_id,
- 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss);
- break;
- case ATOM_COMBOPHY_PLL0:
- case ATOM_COMBOPHY_PLL1:
- case ATOM_COMBOPHY_PLL2:
- case ATOM_COMBOPHY_PLL3:
- case ATOM_COMBOPHY_PLL4:
- case ATOM_COMBOPHY_PLL5:
- /* disable the ppll */
- amdgpu_atombios_crtc_program_pll(crtc, ATOM_CRTC_INVALID, amdgpu_crtc->pll_id,
- 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss);
- break;
- default:
- break;
- }
-done:
- amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
- amdgpu_crtc->adjusted_clock = 0;
- amdgpu_crtc->encoder = NULL;
- amdgpu_crtc->connector = NULL;
-}
-
-static int dce_v11_0_crtc_mode_set(struct drm_crtc *crtc,
- struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode,
- int x, int y, struct drm_framebuffer *old_fb)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
-
- if (!amdgpu_crtc->adjusted_clock)
- return -EINVAL;
-
- if ((adev->asic_type == CHIP_POLARIS10) ||
- (adev->asic_type == CHIP_POLARIS11) ||
- (adev->asic_type == CHIP_POLARIS12) ||
- (adev->asic_type == CHIP_VEGAM)) {
- struct amdgpu_encoder *amdgpu_encoder =
- to_amdgpu_encoder(amdgpu_crtc->encoder);
- int encoder_mode =
- amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder);
-
- /* SetPixelClock calculates the plls and ss values now */
- amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id,
- amdgpu_crtc->pll_id,
- encoder_mode, amdgpu_encoder->encoder_id,
- adjusted_mode->clock, 0, 0, 0, 0,
- amdgpu_crtc->bpc, amdgpu_crtc->ss_enabled, &amdgpu_crtc->ss);
- } else {
- amdgpu_atombios_crtc_set_pll(crtc, adjusted_mode);
- }
- amdgpu_atombios_crtc_set_dtd_timing(crtc, adjusted_mode);
- dce_v11_0_crtc_do_set_base(crtc, old_fb, x, y, 0);
- amdgpu_atombios_crtc_overscan_setup(crtc, mode, adjusted_mode);
- amdgpu_atombios_crtc_scaler_setup(crtc);
- dce_v11_0_cursor_reset(crtc);
- /* update the hw version fpr dpm */
- amdgpu_crtc->hw_mode = *adjusted_mode;
-
- return 0;
-}
-
-static bool dce_v11_0_crtc_mode_fixup(struct drm_crtc *crtc,
- const struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct drm_encoder *encoder;
-
- /* assign the encoder to the amdgpu crtc to avoid repeated lookups later */
- list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
- if (encoder->crtc == crtc) {
- amdgpu_crtc->encoder = encoder;
- amdgpu_crtc->connector = amdgpu_get_connector_for_encoder(encoder);
- break;
- }
- }
- if ((amdgpu_crtc->encoder == NULL) || (amdgpu_crtc->connector == NULL)) {
- amdgpu_crtc->encoder = NULL;
- amdgpu_crtc->connector = NULL;
- return false;
- }
- if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
- return false;
- if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode))
- return false;
- /* pick pll */
- amdgpu_crtc->pll_id = dce_v11_0_pick_pll(crtc);
- /* if we can't get a PPLL for a non-DP encoder, fail */
- if ((amdgpu_crtc->pll_id == ATOM_PPLL_INVALID) &&
- !ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder)))
- return false;
-
- return true;
-}
-
-static int dce_v11_0_crtc_set_base(struct drm_crtc *crtc, int x, int y,
- struct drm_framebuffer *old_fb)
-{
- return dce_v11_0_crtc_do_set_base(crtc, old_fb, x, y, 0);
-}
-
-static int dce_v11_0_crtc_set_base_atomic(struct drm_crtc *crtc,
- struct drm_framebuffer *fb,
- int x, int y, enum mode_set_atomic state)
-{
- return dce_v11_0_crtc_do_set_base(crtc, fb, x, y, 1);
-}
-
-static const struct drm_crtc_helper_funcs dce_v11_0_crtc_helper_funcs = {
- .dpms = dce_v11_0_crtc_dpms,
- .mode_fixup = dce_v11_0_crtc_mode_fixup,
- .mode_set = dce_v11_0_crtc_mode_set,
- .mode_set_base = dce_v11_0_crtc_set_base,
- .mode_set_base_atomic = dce_v11_0_crtc_set_base_atomic,
- .prepare = dce_v11_0_crtc_prepare,
- .commit = dce_v11_0_crtc_commit,
- .disable = dce_v11_0_crtc_disable,
- .get_scanout_position = amdgpu_crtc_get_scanout_position,
-};
-
-static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index)
-{
- struct amdgpu_crtc *amdgpu_crtc;
-
- amdgpu_crtc = kzalloc(sizeof(struct amdgpu_crtc) +
- (AMDGPUFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL);
- if (amdgpu_crtc == NULL)
- return -ENOMEM;
-
- drm_crtc_init(adev_to_drm(adev), &amdgpu_crtc->base, &dce_v11_0_crtc_funcs);
-
- drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
- amdgpu_crtc->crtc_id = index;
- adev->mode_info.crtcs[index] = amdgpu_crtc;
-
- amdgpu_crtc->max_cursor_width = 128;
- amdgpu_crtc->max_cursor_height = 128;
- adev_to_drm(adev)->mode_config.cursor_width = amdgpu_crtc->max_cursor_width;
- adev_to_drm(adev)->mode_config.cursor_height = amdgpu_crtc->max_cursor_height;
-
- switch (amdgpu_crtc->crtc_id) {
- case 0:
- default:
- amdgpu_crtc->crtc_offset = CRTC0_REGISTER_OFFSET;
- break;
- case 1:
- amdgpu_crtc->crtc_offset = CRTC1_REGISTER_OFFSET;
- break;
- case 2:
- amdgpu_crtc->crtc_offset = CRTC2_REGISTER_OFFSET;
- break;
- case 3:
- amdgpu_crtc->crtc_offset = CRTC3_REGISTER_OFFSET;
- break;
- case 4:
- amdgpu_crtc->crtc_offset = CRTC4_REGISTER_OFFSET;
- break;
- case 5:
- amdgpu_crtc->crtc_offset = CRTC5_REGISTER_OFFSET;
- break;
- }
-
- amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
- amdgpu_crtc->adjusted_clock = 0;
- amdgpu_crtc->encoder = NULL;
- amdgpu_crtc->connector = NULL;
- drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v11_0_crtc_helper_funcs);
-
- return 0;
-}
-
-static int dce_v11_0_early_init(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- adev->audio_endpt_rreg = &dce_v11_0_audio_endpt_rreg;
- adev->audio_endpt_wreg = &dce_v11_0_audio_endpt_wreg;
-
- dce_v11_0_set_display_funcs(adev);
-
- adev->mode_info.num_crtc = dce_v11_0_get_num_crtc(adev);
-
- switch (adev->asic_type) {
- case CHIP_CARRIZO:
- adev->mode_info.num_hpd = 6;
- adev->mode_info.num_dig = 9;
- break;
- case CHIP_STONEY:
- adev->mode_info.num_hpd = 6;
- adev->mode_info.num_dig = 9;
- break;
- case CHIP_POLARIS10:
- case CHIP_VEGAM:
- adev->mode_info.num_hpd = 6;
- adev->mode_info.num_dig = 6;
- break;
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- adev->mode_info.num_hpd = 5;
- adev->mode_info.num_dig = 5;
- break;
- default:
- /* FIXME: not supported yet */
- return -EINVAL;
- }
-
- dce_v11_0_set_irq_funcs(adev);
-
- return 0;
-}
-
-static int dce_v11_0_sw_init(void *handle)
-{
- int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
- if (r)
- return r;
- }
-
- for (i = VISLANDS30_IV_SRCID_D1_GRPH_PFLIP; i < 20; i += 2) {
- r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq);
- if (r)
- return r;
- }
-
- /* HPD hotplug */
- r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq);
- if (r)
- return r;
-
- adev_to_drm(adev)->mode_config.funcs = &amdgpu_mode_funcs;
-
- adev_to_drm(adev)->mode_config.async_page_flip = true;
-
- adev_to_drm(adev)->mode_config.max_width = 16384;
- adev_to_drm(adev)->mode_config.max_height = 16384;
-
- adev_to_drm(adev)->mode_config.preferred_depth = 24;
- adev_to_drm(adev)->mode_config.prefer_shadow = 1;
-
- adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base;
-
- r = amdgpu_display_modeset_create_props(adev);
- if (r)
- return r;
-
- adev_to_drm(adev)->mode_config.max_width = 16384;
- adev_to_drm(adev)->mode_config.max_height = 16384;
-
-
- /* allocate crtcs */
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- r = dce_v11_0_crtc_init(adev, i);
- if (r)
- return r;
- }
-
- if (amdgpu_atombios_get_connector_info_from_object_table(adev))
- amdgpu_display_print_display_setup(adev_to_drm(adev));
- else
- return -EINVAL;
-
- /* setup afmt */
- r = dce_v11_0_afmt_init(adev);
- if (r)
- return r;
-
- r = dce_v11_0_audio_init(adev);
- if (r)
- return r;
-
- drm_kms_helper_poll_init(adev_to_drm(adev));
-
- adev->mode_info.mode_config_initialized = true;
- return 0;
-}
-
-static int dce_v11_0_sw_fini(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- kfree(adev->mode_info.bios_hardcoded_edid);
-
- drm_kms_helper_poll_fini(adev_to_drm(adev));
-
- dce_v11_0_audio_fini(adev);
-
- dce_v11_0_afmt_fini(adev);
-
- drm_mode_config_cleanup(adev_to_drm(adev));
- adev->mode_info.mode_config_initialized = false;
-
- return 0;
-}
-
-static int dce_v11_0_hw_init(void *handle)
-{
- int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- dce_v11_0_init_golden_registers(adev);
-
- /* disable vga render */
- dce_v11_0_set_vga_render_state(adev, false);
- /* init dig PHYs, disp eng pll */
- amdgpu_atombios_crtc_powergate_init(adev);
- amdgpu_atombios_encoder_init_dig(adev);
- if ((adev->asic_type == CHIP_POLARIS10) ||
- (adev->asic_type == CHIP_POLARIS11) ||
- (adev->asic_type == CHIP_POLARIS12) ||
- (adev->asic_type == CHIP_VEGAM)) {
- amdgpu_atombios_crtc_set_dce_clock(adev, adev->clock.default_dispclk,
- DCE_CLOCK_TYPE_DISPCLK, ATOM_GCK_DFS);
- amdgpu_atombios_crtc_set_dce_clock(adev, 0,
- DCE_CLOCK_TYPE_DPREFCLK, ATOM_GCK_DFS);
- } else {
- amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk);
- }
-
- /* initialize hpd */
- dce_v11_0_hpd_init(adev);
-
- for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
- dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
- }
-
- dce_v11_0_pageflip_interrupt_init(adev);
-
- return 0;
-}
-
-static int dce_v11_0_hw_fini(void *handle)
-{
- int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- dce_v11_0_hpd_fini(adev);
-
- for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
- dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
- }
-
- dce_v11_0_pageflip_interrupt_fini(adev);
-
- return 0;
-}
-
-static int dce_v11_0_suspend(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int r;
-
- r = amdgpu_display_suspend_helper(adev);
- if (r)
- return r;
-
- adev->mode_info.bl_level =
- amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
-
- return dce_v11_0_hw_fini(handle);
-}
-
-static int dce_v11_0_resume(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int ret;
-
- amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
- adev->mode_info.bl_level);
-
- ret = dce_v11_0_hw_init(handle);
-
- /* turn on the BL */
- if (adev->mode_info.bl_encoder) {
- u8 bl_level = amdgpu_display_backlight_get_level(adev,
- adev->mode_info.bl_encoder);
- amdgpu_display_backlight_set_level(adev, adev->mode_info.bl_encoder,
- bl_level);
- }
- if (ret)
- return ret;
-
- return amdgpu_display_resume_helper(adev);
-}
-
-static bool dce_v11_0_is_idle(void *handle)
-{
- return true;
-}
-
-static int dce_v11_0_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int dce_v11_0_soft_reset(void *handle)
-{
- u32 srbm_soft_reset = 0, tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (dce_v11_0_is_display_hung(adev))
- srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
-
- if (srbm_soft_reset) {
- tmp = RREG32(mmSRBM_SOFT_RESET);
- tmp |= srbm_soft_reset;
- dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
-
- udelay(50);
-
- tmp &= ~srbm_soft_reset;
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
-
- /* Wait a little for things to settle down */
- udelay(50);
- }
- return 0;
-}
-
-static void dce_v11_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev,
- int crtc,
- enum amdgpu_interrupt_state state)
-{
- u32 lb_interrupt_mask;
-
- if (crtc >= adev->mode_info.num_crtc) {
- DRM_DEBUG("invalid crtc %d\n", crtc);
- return;
- }
-
- switch (state) {
- case AMDGPU_IRQ_STATE_DISABLE:
- lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
- lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
- VBLANK_INTERRUPT_MASK, 0);
- WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
- break;
- case AMDGPU_IRQ_STATE_ENABLE:
- lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
- lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
- VBLANK_INTERRUPT_MASK, 1);
- WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
- break;
- default:
- break;
- }
-}
-
-static void dce_v11_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev,
- int crtc,
- enum amdgpu_interrupt_state state)
-{
- u32 lb_interrupt_mask;
-
- if (crtc >= adev->mode_info.num_crtc) {
- DRM_DEBUG("invalid crtc %d\n", crtc);
- return;
- }
-
- switch (state) {
- case AMDGPU_IRQ_STATE_DISABLE:
- lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
- lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
- VLINE_INTERRUPT_MASK, 0);
- WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
- break;
- case AMDGPU_IRQ_STATE_ENABLE:
- lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
- lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
- VLINE_INTERRUPT_MASK, 1);
- WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
- break;
- default:
- break;
- }
-}
-
-static int dce_v11_0_set_hpd_irq_state(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- unsigned hpd,
- enum amdgpu_interrupt_state state)
-{
- u32 tmp;
-
- if (hpd >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hdp %d\n", hpd);
- return 0;
- }
-
- switch (state) {
- case AMDGPU_IRQ_STATE_DISABLE:
- tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 0);
- WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
- break;
- case AMDGPU_IRQ_STATE_ENABLE:
- tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 1);
- WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
- break;
- default:
- break;
- }
-
- return 0;
-}
-
-static int dce_v11_0_set_crtc_irq_state(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- unsigned type,
- enum amdgpu_interrupt_state state)
-{
- switch (type) {
- case AMDGPU_CRTC_IRQ_VBLANK1:
- dce_v11_0_set_crtc_vblank_interrupt_state(adev, 0, state);
- break;
- case AMDGPU_CRTC_IRQ_VBLANK2:
- dce_v11_0_set_crtc_vblank_interrupt_state(adev, 1, state);
- break;
- case AMDGPU_CRTC_IRQ_VBLANK3:
- dce_v11_0_set_crtc_vblank_interrupt_state(adev, 2, state);
- break;
- case AMDGPU_CRTC_IRQ_VBLANK4:
- dce_v11_0_set_crtc_vblank_interrupt_state(adev, 3, state);
- break;
- case AMDGPU_CRTC_IRQ_VBLANK5:
- dce_v11_0_set_crtc_vblank_interrupt_state(adev, 4, state);
- break;
- case AMDGPU_CRTC_IRQ_VBLANK6:
- dce_v11_0_set_crtc_vblank_interrupt_state(adev, 5, state);
- break;
- case AMDGPU_CRTC_IRQ_VLINE1:
- dce_v11_0_set_crtc_vline_interrupt_state(adev, 0, state);
- break;
- case AMDGPU_CRTC_IRQ_VLINE2:
- dce_v11_0_set_crtc_vline_interrupt_state(adev, 1, state);
- break;
- case AMDGPU_CRTC_IRQ_VLINE3:
- dce_v11_0_set_crtc_vline_interrupt_state(adev, 2, state);
- break;
- case AMDGPU_CRTC_IRQ_VLINE4:
- dce_v11_0_set_crtc_vline_interrupt_state(adev, 3, state);
- break;
- case AMDGPU_CRTC_IRQ_VLINE5:
- dce_v11_0_set_crtc_vline_interrupt_state(adev, 4, state);
- break;
- case AMDGPU_CRTC_IRQ_VLINE6:
- dce_v11_0_set_crtc_vline_interrupt_state(adev, 5, state);
- break;
- default:
- break;
- }
- return 0;
-}
-
-static int dce_v11_0_set_pageflip_irq_state(struct amdgpu_device *adev,
- struct amdgpu_irq_src *src,
- unsigned type,
- enum amdgpu_interrupt_state state)
-{
- u32 reg;
-
- if (type >= adev->mode_info.num_crtc) {
- DRM_ERROR("invalid pageflip crtc %d\n", type);
- return -EINVAL;
- }
-
- reg = RREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type]);
- if (state == AMDGPU_IRQ_STATE_DISABLE)
- WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
- reg & ~GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
- else
- WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
- reg | GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
-
- return 0;
-}
-
-static int dce_v11_0_pageflip_irq(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- struct amdgpu_iv_entry *entry)
-{
- unsigned long flags;
- unsigned crtc_id;
- struct amdgpu_crtc *amdgpu_crtc;
- struct amdgpu_flip_work *works;
-
- crtc_id = (entry->src_id - 8) >> 1;
- amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
-
- if (crtc_id >= adev->mode_info.num_crtc) {
- DRM_ERROR("invalid pageflip crtc %d\n", crtc_id);
- return -EINVAL;
- }
-
- if (RREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id]) &
- GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_OCCURRED_MASK)
- WREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id],
- GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_CLEAR_MASK);
-
- /* IRQ could occur when in initial stage */
- if(amdgpu_crtc == NULL)
- return 0;
-
- spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
- works = amdgpu_crtc->pflip_works;
- if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){
- DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != "
- "AMDGPU_FLIP_SUBMITTED(%d)\n",
- amdgpu_crtc->pflip_status,
- AMDGPU_FLIP_SUBMITTED);
- spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
- return 0;
- }
-
- /* page flip completed. clean up */
- amdgpu_crtc->pflip_status = AMDGPU_FLIP_NONE;
- amdgpu_crtc->pflip_works = NULL;
-
- /* wakeup usersapce */
- if(works->event)
- drm_crtc_send_vblank_event(&amdgpu_crtc->base, works->event);
-
- spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
-
- drm_crtc_vblank_put(&amdgpu_crtc->base);
- schedule_work(&works->unpin_work);
-
- return 0;
-}
-
-static void dce_v11_0_hpd_int_ack(struct amdgpu_device *adev,
- int hpd)
-{
- u32 tmp;
-
- if (hpd >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hdp %d\n", hpd);
- return;
- }
-
- tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_ACK, 1);
- WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
-}
-
-static void dce_v11_0_crtc_vblank_int_ack(struct amdgpu_device *adev,
- int crtc)
-{
- u32 tmp;
-
- if (crtc < 0 || crtc >= adev->mode_info.num_crtc) {
- DRM_DEBUG("invalid crtc %d\n", crtc);
- return;
- }
-
- tmp = RREG32(mmLB_VBLANK_STATUS + crtc_offsets[crtc]);
- tmp = REG_SET_FIELD(tmp, LB_VBLANK_STATUS, VBLANK_ACK, 1);
- WREG32(mmLB_VBLANK_STATUS + crtc_offsets[crtc], tmp);
-}
-
-static void dce_v11_0_crtc_vline_int_ack(struct amdgpu_device *adev,
- int crtc)
-{
- u32 tmp;
-
- if (crtc < 0 || crtc >= adev->mode_info.num_crtc) {
- DRM_DEBUG("invalid crtc %d\n", crtc);
- return;
- }
-
- tmp = RREG32(mmLB_VLINE_STATUS + crtc_offsets[crtc]);
- tmp = REG_SET_FIELD(tmp, LB_VLINE_STATUS, VLINE_ACK, 1);
- WREG32(mmLB_VLINE_STATUS + crtc_offsets[crtc], tmp);
-}
-
-static int dce_v11_0_crtc_irq(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- struct amdgpu_iv_entry *entry)
-{
- unsigned crtc = entry->src_id - 1;
- uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
- unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev,
- crtc);
-
- switch (entry->src_data[0]) {
- case 0: /* vblank */
- if (disp_int & interrupt_status_offsets[crtc].vblank)
- dce_v11_0_crtc_vblank_int_ack(adev, crtc);
- else
- DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
-
- if (amdgpu_irq_enabled(adev, source, irq_type)) {
- drm_handle_vblank(adev_to_drm(adev), crtc);
- }
- DRM_DEBUG("IH: D%d vblank\n", crtc + 1);
-
- break;
- case 1: /* vline */
- if (disp_int & interrupt_status_offsets[crtc].vline)
- dce_v11_0_crtc_vline_int_ack(adev, crtc);
- else
- DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
-
- DRM_DEBUG("IH: D%d vline\n", crtc + 1);
-
- break;
- default:
- DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
- break;
- }
-
- return 0;
-}
-
-static int dce_v11_0_hpd_irq(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- struct amdgpu_iv_entry *entry)
-{
- uint32_t disp_int, mask;
- unsigned hpd;
-
- if (entry->src_data[0] >= adev->mode_info.num_hpd) {
- DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
- return 0;
- }
-
- hpd = entry->src_data[0];
- disp_int = RREG32(interrupt_status_offsets[hpd].reg);
- mask = interrupt_status_offsets[hpd].hpd;
-
- if (disp_int & mask) {
- dce_v11_0_hpd_int_ack(adev, hpd);
- schedule_work(&adev->hotplug_work);
- DRM_DEBUG("IH: HPD%d\n", hpd + 1);
- }
-
- return 0;
-}
-
-static int dce_v11_0_set_clockgating_state(void *handle,
- enum amd_clockgating_state state)
-{
- return 0;
-}
-
-static int dce_v11_0_set_powergating_state(void *handle,
- enum amd_powergating_state state)
-{
- return 0;
-}
-
-static const struct amd_ip_funcs dce_v11_0_ip_funcs = {
- .name = "dce_v11_0",
- .early_init = dce_v11_0_early_init,
- .late_init = NULL,
- .sw_init = dce_v11_0_sw_init,
- .sw_fini = dce_v11_0_sw_fini,
- .hw_init = dce_v11_0_hw_init,
- .hw_fini = dce_v11_0_hw_fini,
- .suspend = dce_v11_0_suspend,
- .resume = dce_v11_0_resume,
- .is_idle = dce_v11_0_is_idle,
- .wait_for_idle = dce_v11_0_wait_for_idle,
- .soft_reset = dce_v11_0_soft_reset,
- .set_clockgating_state = dce_v11_0_set_clockgating_state,
- .set_powergating_state = dce_v11_0_set_powergating_state,
-};
-
-static void
-dce_v11_0_encoder_mode_set(struct drm_encoder *encoder,
- struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode)
-{
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
-
- amdgpu_encoder->pixel_clock = adjusted_mode->clock;
-
- /* need to call this here rather than in prepare() since we need some crtc info */
- amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
-
- /* set scaler clears this on some chips */
- dce_v11_0_set_interleave(encoder->crtc, mode);
-
- if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) {
- dce_v11_0_afmt_enable(encoder, true);
- dce_v11_0_afmt_setmode(encoder, adjusted_mode);
- }
-}
-
-static void dce_v11_0_encoder_prepare(struct drm_encoder *encoder)
-{
- struct amdgpu_device *adev = drm_to_adev(encoder->dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
-
- if ((amdgpu_encoder->active_device &
- (ATOM_DEVICE_DFP_SUPPORT | ATOM_DEVICE_LCD_SUPPORT)) ||
- (amdgpu_encoder_get_dp_bridge_encoder_id(encoder) !=
- ENCODER_OBJECT_ID_NONE)) {
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- if (dig) {
- dig->dig_encoder = dce_v11_0_pick_dig_encoder(encoder);
- if (amdgpu_encoder->active_device & ATOM_DEVICE_DFP_SUPPORT)
- dig->afmt = adev->mode_info.afmt[dig->dig_encoder];
- }
- }
-
- amdgpu_atombios_scratch_regs_lock(adev, true);
-
- if (connector) {
- struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
-
- /* select the clock/data port if it uses a router */
- if (amdgpu_connector->router.cd_valid)
- amdgpu_i2c_router_select_cd_port(amdgpu_connector);
-
- /* turn eDP panel on for mode set */
- if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
- amdgpu_atombios_encoder_set_edp_panel_power(connector,
- ATOM_TRANSMITTER_ACTION_POWER_ON);
- }
-
- /* this is needed for the pll/ss setup to work correctly in some cases */
- amdgpu_atombios_encoder_set_crtc_source(encoder);
- /* set up the FMT blocks */
- dce_v11_0_program_fmt(encoder);
-}
-
-static void dce_v11_0_encoder_commit(struct drm_encoder *encoder)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
-
- /* need to call this here as we need the crtc set up */
- amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_ON);
- amdgpu_atombios_scratch_regs_lock(adev, false);
-}
-
-static void dce_v11_0_encoder_disable(struct drm_encoder *encoder)
-{
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig;
-
- amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
-
- if (amdgpu_atombios_encoder_is_digital(encoder)) {
- if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI)
- dce_v11_0_afmt_enable(encoder, false);
- dig = amdgpu_encoder->enc_priv;
- dig->dig_encoder = -1;
- }
- amdgpu_encoder->active_device = 0;
-}
-
-/* these are handled by the primary encoders */
-static void dce_v11_0_ext_prepare(struct drm_encoder *encoder)
-{
-
-}
-
-static void dce_v11_0_ext_commit(struct drm_encoder *encoder)
-{
-
-}
-
-static void
-dce_v11_0_ext_mode_set(struct drm_encoder *encoder,
- struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode)
-{
-
-}
-
-static void dce_v11_0_ext_disable(struct drm_encoder *encoder)
-{
-
-}
-
-static void
-dce_v11_0_ext_dpms(struct drm_encoder *encoder, int mode)
-{
-
-}
-
-static const struct drm_encoder_helper_funcs dce_v11_0_ext_helper_funcs = {
- .dpms = dce_v11_0_ext_dpms,
- .prepare = dce_v11_0_ext_prepare,
- .mode_set = dce_v11_0_ext_mode_set,
- .commit = dce_v11_0_ext_commit,
- .disable = dce_v11_0_ext_disable,
- /* no detect for TMDS/LVDS yet */
-};
-
-static const struct drm_encoder_helper_funcs dce_v11_0_dig_helper_funcs = {
- .dpms = amdgpu_atombios_encoder_dpms,
- .mode_fixup = amdgpu_atombios_encoder_mode_fixup,
- .prepare = dce_v11_0_encoder_prepare,
- .mode_set = dce_v11_0_encoder_mode_set,
- .commit = dce_v11_0_encoder_commit,
- .disable = dce_v11_0_encoder_disable,
- .detect = amdgpu_atombios_encoder_dig_detect,
-};
-
-static const struct drm_encoder_helper_funcs dce_v11_0_dac_helper_funcs = {
- .dpms = amdgpu_atombios_encoder_dpms,
- .mode_fixup = amdgpu_atombios_encoder_mode_fixup,
- .prepare = dce_v11_0_encoder_prepare,
- .mode_set = dce_v11_0_encoder_mode_set,
- .commit = dce_v11_0_encoder_commit,
- .detect = amdgpu_atombios_encoder_dac_detect,
-};
-
-static void dce_v11_0_encoder_destroy(struct drm_encoder *encoder)
-{
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
- amdgpu_atombios_encoder_fini_backlight(amdgpu_encoder);
- kfree(amdgpu_encoder->enc_priv);
- drm_encoder_cleanup(encoder);
- kfree(amdgpu_encoder);
-}
-
-static const struct drm_encoder_funcs dce_v11_0_encoder_funcs = {
- .destroy = dce_v11_0_encoder_destroy,
-};
-
-static void dce_v11_0_encoder_add(struct amdgpu_device *adev,
- uint32_t encoder_enum,
- uint32_t supported_device,
- u16 caps)
-{
- struct drm_device *dev = adev_to_drm(adev);
- struct drm_encoder *encoder;
- struct amdgpu_encoder *amdgpu_encoder;
-
- /* see if we already added it */
- list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
- amdgpu_encoder = to_amdgpu_encoder(encoder);
- if (amdgpu_encoder->encoder_enum == encoder_enum) {
- amdgpu_encoder->devices |= supported_device;
- return;
- }
-
- }
-
- /* add a new one */
- amdgpu_encoder = kzalloc(sizeof(struct amdgpu_encoder), GFP_KERNEL);
- if (!amdgpu_encoder)
- return;
-
- encoder = &amdgpu_encoder->base;
- switch (adev->mode_info.num_crtc) {
- case 1:
- encoder->possible_crtcs = 0x1;
- break;
- case 2:
- default:
- encoder->possible_crtcs = 0x3;
- break;
- case 3:
- encoder->possible_crtcs = 0x7;
- break;
- case 4:
- encoder->possible_crtcs = 0xf;
- break;
- case 5:
- encoder->possible_crtcs = 0x1f;
- break;
- case 6:
- encoder->possible_crtcs = 0x3f;
- break;
- }
-
- amdgpu_encoder->enc_priv = NULL;
-
- amdgpu_encoder->encoder_enum = encoder_enum;
- amdgpu_encoder->encoder_id = (encoder_enum & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT;
- amdgpu_encoder->devices = supported_device;
- amdgpu_encoder->rmx_type = RMX_OFF;
- amdgpu_encoder->underscan_type = UNDERSCAN_OFF;
- amdgpu_encoder->is_ext_encoder = false;
- amdgpu_encoder->caps = caps;
-
- switch (amdgpu_encoder->encoder_id) {
- case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
- case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_DAC, NULL);
- drm_encoder_helper_add(encoder, &dce_v11_0_dac_helper_funcs);
- break;
- case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
- if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
- amdgpu_encoder->rmx_type = RMX_FULL;
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_LVDS, NULL);
- amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_lcd_info(amdgpu_encoder);
- } else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT)) {
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_DAC, NULL);
- amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
- } else {
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_TMDS, NULL);
- amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
- }
- drm_encoder_helper_add(encoder, &dce_v11_0_dig_helper_funcs);
- break;
- case ENCODER_OBJECT_ID_SI170B:
- case ENCODER_OBJECT_ID_CH7303:
- case ENCODER_OBJECT_ID_EXTERNAL_SDVOA:
- case ENCODER_OBJECT_ID_EXTERNAL_SDVOB:
- case ENCODER_OBJECT_ID_TITFP513:
- case ENCODER_OBJECT_ID_VT1623:
- case ENCODER_OBJECT_ID_HDMI_SI1930:
- case ENCODER_OBJECT_ID_TRAVIS:
- case ENCODER_OBJECT_ID_NUTMEG:
- /* these are handled by the primary encoders */
- amdgpu_encoder->is_ext_encoder = true;
- if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_LVDS, NULL);
- else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT))
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_DAC, NULL);
- else
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_TMDS, NULL);
- drm_encoder_helper_add(encoder, &dce_v11_0_ext_helper_funcs);
- break;
- }
-}
-
-static const struct amdgpu_display_funcs dce_v11_0_display_funcs = {
- .bandwidth_update = &dce_v11_0_bandwidth_update,
- .vblank_get_counter = &dce_v11_0_vblank_get_counter,
- .backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
- .backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
- .hpd_sense = &dce_v11_0_hpd_sense,
- .hpd_set_polarity = &dce_v11_0_hpd_set_polarity,
- .hpd_get_gpio_reg = &dce_v11_0_hpd_get_gpio_reg,
- .page_flip = &dce_v11_0_page_flip,
- .page_flip_get_scanoutpos = &dce_v11_0_crtc_get_scanoutpos,
- .add_encoder = &dce_v11_0_encoder_add,
- .add_connector = &amdgpu_connector_add,
-};
-
-static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev)
-{
- adev->mode_info.funcs = &dce_v11_0_display_funcs;
-}
-
-static const struct amdgpu_irq_src_funcs dce_v11_0_crtc_irq_funcs = {
- .set = dce_v11_0_set_crtc_irq_state,
- .process = dce_v11_0_crtc_irq,
-};
-
-static const struct amdgpu_irq_src_funcs dce_v11_0_pageflip_irq_funcs = {
- .set = dce_v11_0_set_pageflip_irq_state,
- .process = dce_v11_0_pageflip_irq,
-};
-
-static const struct amdgpu_irq_src_funcs dce_v11_0_hpd_irq_funcs = {
- .set = dce_v11_0_set_hpd_irq_state,
- .process = dce_v11_0_hpd_irq,
-};
-
-static void dce_v11_0_set_irq_funcs(struct amdgpu_device *adev)
-{
- if (adev->mode_info.num_crtc > 0)
- adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VLINE1 + adev->mode_info.num_crtc;
- else
- adev->crtc_irq.num_types = 0;
- adev->crtc_irq.funcs = &dce_v11_0_crtc_irq_funcs;
-
- adev->pageflip_irq.num_types = adev->mode_info.num_crtc;
- adev->pageflip_irq.funcs = &dce_v11_0_pageflip_irq_funcs;
-
- adev->hpd_irq.num_types = adev->mode_info.num_hpd;
- adev->hpd_irq.funcs = &dce_v11_0_hpd_irq_funcs;
-}
-
-const struct amdgpu_ip_block_version dce_v11_0_ip_block =
-{
- .type = AMD_IP_BLOCK_TYPE_DCE,
- .major = 11,
- .minor = 0,
- .rev = 0,
- .funcs = &dce_v11_0_ip_funcs,
-};
-
-const struct amdgpu_ip_block_version dce_v11_2_ip_block =
-{
- .type = AMD_IP_BLOCK_TYPE_DCE,
- .major = 11,
- .minor = 2,
- .rev = 0,
- .funcs = &dce_v11_0_ip_funcs,
-};
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.h b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.h
deleted file mode 100644
index 0d878ca3acba..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef __DCE_V11_0_H__
-#define __DCE_V11_0_H__
-
-extern const struct amdgpu_ip_block_version dce_v11_0_ip_block;
-extern const struct amdgpu_ip_block_version dce_v11_2_ip_block;
-
-void dce_v11_0_disable_dce(struct amdgpu_device *adev);
-
-#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index c7803dc2b2d5..acc887a58518 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -23,7 +23,10 @@
#include <linux/pci.h>
+#include <drm/drm_edid.h>
#include <drm/drm_fourcc.h>
+#include <drm/drm_modeset_helper.h>
+#include <drm/drm_modeset_helper_vtables.h>
#include <drm/drm_vblank.h>
#include "amdgpu.h"
@@ -37,18 +40,25 @@
#include "amdgpu_connectors.h"
#include "amdgpu_display.h"
+#include "dce_v6_0.h"
+#include "sid.h"
+
#include "bif/bif_3_0_d.h"
#include "bif/bif_3_0_sh_mask.h"
+
#include "oss/oss_1_0_d.h"
#include "oss/oss_1_0_sh_mask.h"
+
#include "gca/gfx_6_0_d.h"
#include "gca/gfx_6_0_sh_mask.h"
+#include "gca/gfx_7_2_enum.h"
+
#include "gmc/gmc_6_0_d.h"
#include "gmc/gmc_6_0_sh_mask.h"
+
#include "dce/dce_6_0_d.h"
#include "dce/dce_6_0_sh_mask.h"
-#include "gca/gfx_7_2_enum.h"
-#include "dce_v6_0.h"
+
#include "si_enums.h"
static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev);
@@ -56,31 +66,31 @@ static void dce_v6_0_set_irq_funcs(struct amdgpu_device *adev);
static const u32 crtc_offsets[6] =
{
- SI_CRTC0_REGISTER_OFFSET,
- SI_CRTC1_REGISTER_OFFSET,
- SI_CRTC2_REGISTER_OFFSET,
- SI_CRTC3_REGISTER_OFFSET,
- SI_CRTC4_REGISTER_OFFSET,
- SI_CRTC5_REGISTER_OFFSET
+ CRTC0_REGISTER_OFFSET,
+ CRTC1_REGISTER_OFFSET,
+ CRTC2_REGISTER_OFFSET,
+ CRTC3_REGISTER_OFFSET,
+ CRTC4_REGISTER_OFFSET,
+ CRTC5_REGISTER_OFFSET
};
static const u32 hpd_offsets[] =
{
- mmDC_HPD1_INT_STATUS - mmDC_HPD1_INT_STATUS,
- mmDC_HPD2_INT_STATUS - mmDC_HPD1_INT_STATUS,
- mmDC_HPD3_INT_STATUS - mmDC_HPD1_INT_STATUS,
- mmDC_HPD4_INT_STATUS - mmDC_HPD1_INT_STATUS,
- mmDC_HPD5_INT_STATUS - mmDC_HPD1_INT_STATUS,
- mmDC_HPD6_INT_STATUS - mmDC_HPD1_INT_STATUS,
+ HPD0_REGISTER_OFFSET,
+ HPD1_REGISTER_OFFSET,
+ HPD2_REGISTER_OFFSET,
+ HPD3_REGISTER_OFFSET,
+ HPD4_REGISTER_OFFSET,
+ HPD5_REGISTER_OFFSET
};
static const uint32_t dig_offsets[] = {
- SI_CRTC0_REGISTER_OFFSET,
- SI_CRTC1_REGISTER_OFFSET,
- SI_CRTC2_REGISTER_OFFSET,
- SI_CRTC3_REGISTER_OFFSET,
- SI_CRTC4_REGISTER_OFFSET,
- SI_CRTC5_REGISTER_OFFSET,
+ CRTC0_REGISTER_OFFSET,
+ CRTC1_REGISTER_OFFSET,
+ CRTC2_REGISTER_OFFSET,
+ CRTC3_REGISTER_OFFSET,
+ CRTC4_REGISTER_OFFSET,
+ CRTC5_REGISTER_OFFSET,
(0x13830 - 0x7030) >> 2,
};
@@ -203,9 +213,9 @@ static void dce_v6_0_page_flip(struct amdgpu_device *adev,
/* update the scanout addresses */
WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
upper_32_bits(crtc_base));
+ /* writing to the low address triggers the update */
WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
(u32)crtc_base);
-
/* post the write */
RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset);
}
@@ -215,11 +225,11 @@ static int dce_v6_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
{
if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc))
return -EINVAL;
+
*vbl = RREG32(mmCRTC_V_BLANK_START_END + crtc_offsets[crtc]);
*position = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
return 0;
-
}
/**
@@ -239,7 +249,8 @@ static bool dce_v6_0_hpd_sense(struct amdgpu_device *adev,
if (hpd >= adev->mode_info.num_hpd)
return connected;
- if (RREG32(mmDC_HPD1_INT_STATUS + hpd_offsets[hpd]) & DC_HPD1_INT_STATUS__DC_HPD1_SENSE_MASK)
+ if (RREG32(mmDC_HPD1_INT_STATUS + hpd_offsets[hpd]) &
+ DC_HPD1_INT_STATUS__DC_HPD1_SENSE_MASK)
connected = true;
return connected;
@@ -270,6 +281,21 @@ static void dce_v6_0_hpd_set_polarity(struct amdgpu_device *adev,
WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
}
+static void dce_v6_0_hpd_int_ack(struct amdgpu_device *adev,
+ int hpd)
+{
+ u32 tmp;
+
+ if (hpd >= adev->mode_info.num_hpd) {
+ DRM_DEBUG("invalid hpd %d\n", hpd);
+ return;
+ }
+
+ tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
+ tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK;
+ WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
+}
+
/**
* dce_v6_0_hpd_init - hpd setup callback.
*
@@ -309,6 +335,7 @@ static void dce_v6_0_hpd_init(struct amdgpu_device *adev)
continue;
}
+ dce_v6_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd);
dce_v6_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
}
@@ -339,7 +366,7 @@ static void dce_v6_0_hpd_fini(struct amdgpu_device *adev)
tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
tmp &= ~DC_HPD1_CONTROL__DC_HPD1_EN_MASK;
- WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], 0);
+ WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
}
@@ -351,13 +378,41 @@ static u32 dce_v6_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
return mmDC_GPIO_HPD_A;
}
+static bool dce_v6_0_is_display_hung(struct amdgpu_device *adev)
+{
+ u32 crtc_hung = 0;
+ u32 crtc_status[6];
+ u32 i, j, tmp;
+
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ if (RREG32(mmCRTC_CONTROL + crtc_offsets[i]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK) {
+ crtc_status[i] = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
+ crtc_hung |= (1 << i);
+ }
+ }
+
+ for (j = 0; j < 10; j++) {
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ if (crtc_hung & (1 << i)) {
+ tmp = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
+ if (tmp != crtc_status[i])
+ crtc_hung &= ~(1 << i);
+ }
+ }
+ if (crtc_hung == 0)
+ return false;
+ udelay(100);
+ }
+
+ return true;
+}
+
static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev,
bool render)
{
if (!render)
WREG32(mmVGA_RENDER_CONTROL,
- RREG32(mmVGA_RENDER_CONTROL) & VGA_VSTATUS_CNTL);
-
+ RREG32(mmVGA_RENDER_CONTROL) & ~VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK);
}
static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev)
@@ -400,7 +455,6 @@ void dce_v6_0_disable_dce(struct amdgpu_device *adev)
static void dce_v6_0_program_fmt(struct drm_encoder *encoder)
{
-
struct drm_device *dev = encoder->dev;
struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
@@ -843,7 +897,7 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
(u32)mode->clock);
line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
(u32)mode->clock);
- line_time = min(line_time, (u32)65535);
+ line_time = min_t(u32, line_time, 65535);
priority_a_cnt = 0;
priority_b_cnt = 0;
@@ -876,8 +930,8 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
wm_high.dram_channels = dram_channels;
wm_high.num_heads = num_heads;
- if (adev->pm.dpm_enabled) {
/* watermark for low clocks */
+ if (adev->pm.dpm_enabled) {
wm_low.yclk =
amdgpu_dpm_get_mclk(adev, true) * 10;
wm_low.sclk =
@@ -904,9 +958,9 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
wm_low.num_heads = num_heads;
/* set for high clocks */
- latency_watermark_a = min(dce_v6_0_latency_watermark(&wm_high), (u32)65535);
+ latency_watermark_a = min_t(u32, dce_v6_0_latency_watermark(&wm_high), 65535);
/* set for low clocks */
- latency_watermark_b = min(dce_v6_0_latency_watermark(&wm_low), (u32)65535);
+ latency_watermark_b = min_t(u32, dce_v6_0_latency_watermark(&wm_low), 65535);
/* possibly force display priority to high */
/* should really do this at mode validation time... */
@@ -957,16 +1011,16 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
/* select wm A */
arb_control3 = RREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset);
tmp = arb_control3;
- tmp &= ~LATENCY_WATERMARK_MASK(3);
- tmp |= LATENCY_WATERMARK_MASK(1);
+ tmp &= ~(3 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
+ tmp |= (1 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
WREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset, tmp);
WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset,
((latency_watermark_a << DPG_PIPE_URGENCY_CONTROL__URGENCY_LOW_WATERMARK__SHIFT) |
(line_time << DPG_PIPE_URGENCY_CONTROL__URGENCY_HIGH_WATERMARK__SHIFT)));
/* select wm B */
tmp = RREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset);
- tmp &= ~LATENCY_WATERMARK_MASK(3);
- tmp |= LATENCY_WATERMARK_MASK(2);
+ tmp &= ~(3 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
+ tmp |= (2 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
WREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset, tmp);
WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset,
((latency_watermark_b << DPG_PIPE_URGENCY_CONTROL__URGENCY_LOW_WATERMARK__SHIFT) |
@@ -980,13 +1034,26 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
/* save values for DPM */
amdgpu_crtc->line_time = line_time;
- amdgpu_crtc->wm_high = latency_watermark_a;
/* Save number of lines the linebuffer leads before the scanout */
amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
}
/* watermark setup */
+/**
+ * dce_v6_0_line_buffer_adjust - Set up the line buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @amdgpu_crtc: the selected display controller
+ * @mode: the current display mode on the selected display
+ * controller
+ * @other_mode: the display mode of another display controller
+ * that may be sharing the line buffer
+ *
+ * Setup up the line buffer allocation for
+ * the selected display controller (CIK).
+ * Returns the line buffer size in pixels.
+ */
static u32 dce_v6_0_line_buffer_adjust(struct amdgpu_device *adev,
struct amdgpu_crtc *amdgpu_crtc,
struct drm_display_mode *mode,
@@ -1021,7 +1088,7 @@ static u32 dce_v6_0_line_buffer_adjust(struct amdgpu_device *adev,
}
WREG32(mmDC_LB_MEMORY_SPLIT + amdgpu_crtc->crtc_offset,
- DC_LB_MEMORY_CONFIG(tmp));
+ (tmp << DC_LB_MEMORY_SPLIT__DC_LB_MEMORY_CONFIG__SHIFT));
WREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
(buffer_alloc << PIPE0_DMIF_BUFFER_CONTROL__DMIF_BUFFERS_ALLOCATED__SHIFT));
@@ -1198,7 +1265,7 @@ static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
return;
}
- sad_count = drm_edid_to_speaker_allocation(amdgpu_connector_edid(connector), &sadb);
+ sad_count = drm_edid_to_speaker_allocation(amdgpu_connector->edid, &sadb);
if (sad_count < 0) {
DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
sad_count = 0;
@@ -1238,6 +1305,7 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ u32 offset;
struct drm_connector *connector;
struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
@@ -1259,6 +1327,11 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO },
};
+ if (!dig || !dig->afmt || !dig->afmt->pin)
+ return;
+
+ offset = dig->afmt->pin->offset;
+
drm_connector_list_iter_begin(dev, &iter);
drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
@@ -1273,14 +1346,14 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
return;
}
- sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
+ sad_count = drm_edid_to_sad(amdgpu_connector->edid, &sads);
if (sad_count < 0)
DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
if (sad_count <= 0)
return;
for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
- u32 tmp = 0;
+ u32 value = 0;
u8 stereo_freqs = 0;
int max_channels = -1;
int j;
@@ -1290,12 +1363,12 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
if (sad->format == eld_reg_to_type[i][1]) {
if (sad->channels > max_channels) {
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- MAX_CHANNELS, sad->channels);
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- DESCRIPTOR_BYTE_2, sad->byte2);
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- SUPPORTED_FREQUENCIES, sad->freq);
+ value = (sad->channels <<
+ AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__MAX_CHANNELS__SHIFT) |
+ (sad->byte2 <<
+ AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__DESCRIPTOR_BYTE_2__SHIFT) |
+ (sad->freq <<
+ AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES__SHIFT);
max_channels = sad->channels;
}
@@ -1306,13 +1379,13 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
}
}
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- SUPPORTED_FREQUENCIES_STEREO, stereo_freqs);
- WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, eld_reg_to_type[i][0], tmp);
+ value |= (stereo_freqs <<
+ AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES_STEREO__SHIFT);
+
+ WREG32_AUDIO_ENDPT(offset, eld_reg_to_type[i][0], value);
}
kfree(sads);
-
}
static void dce_v6_0_audio_enable(struct amdgpu_device *adev,
@@ -1328,13 +1401,13 @@ static void dce_v6_0_audio_enable(struct amdgpu_device *adev,
static const u32 pin_offsets[7] =
{
- (0x1780 - 0x1780),
- (0x1786 - 0x1780),
- (0x178c - 0x1780),
- (0x1792 - 0x1780),
- (0x1798 - 0x1780),
- (0x179d - 0x1780),
- (0x17a4 - 0x1780),
+ AUD0_REGISTER_OFFSET,
+ AUD1_REGISTER_OFFSET,
+ AUD2_REGISTER_OFFSET,
+ AUD3_REGISTER_OFFSET,
+ AUD4_REGISTER_OFFSET,
+ AUD5_REGISTER_OFFSET,
+ AUD6_REGISTER_OFFSET,
};
static int dce_v6_0_audio_init(struct amdgpu_device *adev)
@@ -1367,6 +1440,8 @@ static int dce_v6_0_audio_init(struct amdgpu_device *adev)
adev->mode_info.audio.pin[i].connected = false;
adev->mode_info.audio.pin[i].offset = pin_offsets[i];
adev->mode_info.audio.pin[i].id = i;
+ /* disable audio. it will be set up later */
+ /* XXX remove once we switch to ip funcs */
dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
}
@@ -1375,17 +1450,12 @@ static int dce_v6_0_audio_init(struct amdgpu_device *adev)
static void dce_v6_0_audio_fini(struct amdgpu_device *adev)
{
- int i;
-
if (!amdgpu_audio)
return;
if (!adev->mode_info.audio.enabled)
return;
- for (i = 0; i < adev->mode_info.audio.num_pins; i++)
- dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
-
adev->mode_info.audio.enabled = false;
}
@@ -1816,7 +1886,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
struct amdgpu_bo *abo;
uint64_t fb_location, tiling_flags;
uint32_t fb_format, fb_pitch_pixels, pipe_config;
- u32 fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_NONE);
+ u32 fb_swap = (GRPH_ENDIAN_NONE << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
u32 viewport_w, viewport_h;
int r;
bool bypass_lut = false;
@@ -1842,6 +1912,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
return r;
if (!atomic) {
+ abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(abo);
@@ -1855,76 +1926,76 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
switch (target_fb->format->format) {
case DRM_FORMAT_C8:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_8BPP) |
- GRPH_FORMAT(GRPH_FORMAT_INDEXED));
+ fb_format = ((GRPH_DEPTH_8BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_INDEXED << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
break;
case DRM_FORMAT_XRGB4444:
case DRM_FORMAT_ARGB4444:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
- GRPH_FORMAT(GRPH_FORMAT_ARGB4444));
+ fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB4444 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
+ fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
break;
case DRM_FORMAT_XRGB1555:
case DRM_FORMAT_ARGB1555:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
- GRPH_FORMAT(GRPH_FORMAT_ARGB1555));
+ fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB1555 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
+ fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
break;
case DRM_FORMAT_BGRX5551:
case DRM_FORMAT_BGRA5551:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
- GRPH_FORMAT(GRPH_FORMAT_BGRA5551));
+ fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_BGRA5551 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
+ fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
break;
case DRM_FORMAT_RGB565:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
- GRPH_FORMAT(GRPH_FORMAT_ARGB565));
+ fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB565 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
+ fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
break;
case DRM_FORMAT_XRGB8888:
case DRM_FORMAT_ARGB8888:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
- GRPH_FORMAT(GRPH_FORMAT_ARGB8888));
+ fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
+ fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
break;
case DRM_FORMAT_XRGB2101010:
case DRM_FORMAT_ARGB2101010:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
- GRPH_FORMAT(GRPH_FORMAT_ARGB2101010));
+ fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB2101010 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
+ fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
bypass_lut = true;
break;
case DRM_FORMAT_BGRX1010102:
case DRM_FORMAT_BGRA1010102:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
- GRPH_FORMAT(GRPH_FORMAT_BGRA1010102));
+ fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_BGRA1010102 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
+ fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
bypass_lut = true;
break;
case DRM_FORMAT_XBGR8888:
case DRM_FORMAT_ABGR8888:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
- GRPH_FORMAT(GRPH_FORMAT_ARGB8888));
- fb_swap = (GRPH_RED_CROSSBAR(GRPH_RED_SEL_B) |
- GRPH_BLUE_CROSSBAR(GRPH_BLUE_SEL_R));
+ fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+ fb_swap = ((GRPH_RED_SEL_B << GRPH_SWAP_CNTL__GRPH_RED_CROSSBAR__SHIFT) |
+ (GRPH_BLUE_SEL_R << GRPH_SWAP_CNTL__GRPH_BLUE_CROSSBAR__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap |= GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
+ fb_swap |= (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
break;
default:
@@ -1942,18 +2013,18 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT);
num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
- fb_format |= GRPH_NUM_BANKS(num_banks);
- fb_format |= GRPH_ARRAY_MODE(GRPH_ARRAY_2D_TILED_THIN1);
- fb_format |= GRPH_TILE_SPLIT(tile_split);
- fb_format |= GRPH_BANK_WIDTH(bankw);
- fb_format |= GRPH_BANK_HEIGHT(bankh);
- fb_format |= GRPH_MACRO_TILE_ASPECT(mtaspect);
+ fb_format |= (num_banks << GRPH_CONTROL__GRPH_NUM_BANKS__SHIFT);
+ fb_format |= (GRPH_ARRAY_2D_TILED_THIN1 << GRPH_CONTROL__GRPH_ARRAY_MODE__SHIFT);
+ fb_format |= (tile_split << GRPH_CONTROL__GRPH_TILE_SPLIT__SHIFT);
+ fb_format |= (bankw << GRPH_CONTROL__GRPH_BANK_WIDTH__SHIFT);
+ fb_format |= (bankh << GRPH_CONTROL__GRPH_BANK_HEIGHT__SHIFT);
+ fb_format |= (mtaspect << GRPH_CONTROL__GRPH_MACRO_TILE_ASPECT__SHIFT);
} else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_1D_TILED_THIN1) {
- fb_format |= GRPH_ARRAY_MODE(GRPH_ARRAY_1D_TILED_THIN1);
+ fb_format |= (GRPH_ARRAY_1D_TILED_THIN1 << GRPH_CONTROL__GRPH_ARRAY_MODE__SHIFT);
}
pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
- fb_format |= GRPH_PIPE_CONFIG(pipe_config);
+ fb_format |= (pipe_config << GRPH_CONTROL__GRPH_PIPE_CONFIG__SHIFT);
dce_v6_0_vga_enable(crtc, false);
@@ -1969,7 +2040,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
(u32)fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK);
WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
- (u32) fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK);
+ (u32) fb_location & GRPH_SECONDARY_SURFACE_ADDRESS__GRPH_SECONDARY_SURFACE_ADDRESS_MASK);
WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
WREG32(mmGRPH_SWAP_CNTL + amdgpu_crtc->crtc_offset, fb_swap);
@@ -2037,14 +2108,13 @@ static void dce_v6_0_set_interleave(struct drm_crtc *crtc,
if (mode->flags & DRM_MODE_FLAG_INTERLACE)
WREG32(mmDATA_FORMAT + amdgpu_crtc->crtc_offset,
- INTERLEAVE_EN);
+ DATA_FORMAT__INTERLEAVE_EN_MASK);
else
WREG32(mmDATA_FORMAT + amdgpu_crtc->crtc_offset, 0);
}
static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc)
{
-
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
struct drm_device *dev = crtc->dev;
struct amdgpu_device *adev = drm_to_adev(dev);
@@ -2054,15 +2124,15 @@ static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc)
DRM_DEBUG_KMS("%d\n", amdgpu_crtc->crtc_id);
WREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset,
- ((0 << INPUT_CSC_CONTROL__INPUT_CSC_GRPH_MODE__SHIFT) |
- (0 << INPUT_CSC_CONTROL__INPUT_CSC_OVL_MODE__SHIFT)));
+ ((INPUT_CSC_BYPASS << INPUT_CSC_CONTROL__INPUT_CSC_GRPH_MODE__SHIFT) |
+ (INPUT_CSC_BYPASS << INPUT_CSC_CONTROL__INPUT_CSC_OVL_MODE__SHIFT)));
WREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset,
PRESCALE_GRPH_CONTROL__GRPH_PRESCALE_BYPASS_MASK);
WREG32(mmPRESCALE_OVL_CONTROL + amdgpu_crtc->crtc_offset,
PRESCALE_OVL_CONTROL__OVL_PRESCALE_BYPASS_MASK);
WREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset,
- ((0 << INPUT_GAMMA_CONTROL__GRPH_INPUT_GAMMA_MODE__SHIFT) |
- (0 << INPUT_GAMMA_CONTROL__OVL_INPUT_GAMMA_MODE__SHIFT)));
+ ((INPUT_GAMMA_USE_LUT << INPUT_GAMMA_CONTROL__GRPH_INPUT_GAMMA_MODE__SHIFT) |
+ (INPUT_GAMMA_USE_LUT << INPUT_GAMMA_CONTROL__OVL_INPUT_GAMMA_MODE__SHIFT)));
WREG32(mmDC_LUT_CONTROL + amdgpu_crtc->crtc_offset, 0);
@@ -2089,19 +2159,19 @@ static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc)
}
WREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset,
- ((0 << DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE__SHIFT) |
- (0 << DEGAMMA_CONTROL__OVL_DEGAMMA_MODE__SHIFT) |
- ICON_DEGAMMA_MODE(0) |
- (0 << DEGAMMA_CONTROL__CURSOR_DEGAMMA_MODE__SHIFT)));
+ ((DEGAMMA_BYPASS << DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE__SHIFT) |
+ (DEGAMMA_BYPASS << DEGAMMA_CONTROL__OVL_DEGAMMA_MODE__SHIFT) |
+ (DEGAMMA_BYPASS << DEGAMMA_CONTROL__ICON_DEGAMMA_MODE__SHIFT) |
+ (DEGAMMA_BYPASS << DEGAMMA_CONTROL__CURSOR_DEGAMMA_MODE__SHIFT)));
WREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset,
- ((0 << GAMUT_REMAP_CONTROL__GRPH_GAMUT_REMAP_MODE__SHIFT) |
- (0 << GAMUT_REMAP_CONTROL__OVL_GAMUT_REMAP_MODE__SHIFT)));
+ ((GAMUT_REMAP_BYPASS << GAMUT_REMAP_CONTROL__GRPH_GAMUT_REMAP_MODE__SHIFT) |
+ (GAMUT_REMAP_BYPASS << GAMUT_REMAP_CONTROL__OVL_GAMUT_REMAP_MODE__SHIFT)));
WREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset,
- ((0 << REGAMMA_CONTROL__GRPH_REGAMMA_MODE__SHIFT) |
- (0 << REGAMMA_CONTROL__OVL_REGAMMA_MODE__SHIFT)));
+ ((REGAMMA_BYPASS << REGAMMA_CONTROL__GRPH_REGAMMA_MODE__SHIFT) |
+ (REGAMMA_BYPASS << REGAMMA_CONTROL__OVL_REGAMMA_MODE__SHIFT)));
WREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset,
- ((0 << OUTPUT_CSC_CONTROL__OUTPUT_CSC_GRPH_MODE__SHIFT) |
- (0 << OUTPUT_CSC_CONTROL__OUTPUT_CSC_OVL_MODE__SHIFT)));
+ ((OUTPUT_CSC_BYPASS << OUTPUT_CSC_CONTROL__OUTPUT_CSC_GRPH_MODE__SHIFT) |
+ (OUTPUT_CSC_BYPASS << OUTPUT_CSC_CONTROL__OUTPUT_CSC_OVL_MODE__SHIFT)));
/* XXX match this to the depth of the crtc fmt block, move to modeset? */
WREG32(0x1a50 + amdgpu_crtc->crtc_offset, 0);
@@ -2196,8 +2266,6 @@ static void dce_v6_0_hide_cursor(struct drm_crtc *crtc)
WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset,
(CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
(CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
-
-
}
static void dce_v6_0_show_cursor(struct drm_crtc *crtc)
@@ -2214,7 +2282,6 @@ static void dce_v6_0_show_cursor(struct drm_crtc *crtc)
CUR_CONTROL__CURSOR_EN_MASK |
(CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
(CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
-
}
static int dce_v6_0_cursor_move_locked(struct drm_crtc *crtc,
@@ -2302,6 +2369,7 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc,
return ret;
}
+ aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_bo_unreserve(aobj);
if (ret) {
@@ -2424,7 +2492,7 @@ static void dce_v6_0_crtc_dpms(struct drm_crtc *crtc, int mode)
break;
}
/* adjust pm to dpms */
- amdgpu_pm_compute_clocks(adev);
+ amdgpu_dpm_compute_clocks(adev);
}
static void dce_v6_0_crtc_prepare(struct drm_crtc *crtc)
@@ -2524,7 +2592,6 @@ static bool dce_v6_0_crtc_mode_fixup(struct drm_crtc *crtc,
const struct drm_display_mode *mode,
struct drm_display_mode *adjusted_mode)
{
-
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
struct drm_device *dev = crtc->dev;
struct drm_encoder *encoder;
@@ -2581,6 +2648,32 @@ static const struct drm_crtc_helper_funcs dce_v6_0_crtc_helper_funcs = {
.get_scanout_position = amdgpu_crtc_get_scanout_position,
};
+static void dce_v6_0_panic_flush(struct drm_plane *plane)
+{
+ struct drm_framebuffer *fb;
+ struct amdgpu_crtc *amdgpu_crtc;
+ struct amdgpu_device *adev;
+ uint32_t fb_format;
+
+ if (!plane->fb)
+ return;
+
+ fb = plane->fb;
+ amdgpu_crtc = to_amdgpu_crtc(plane->crtc);
+ adev = drm_to_adev(fb->dev);
+
+ /* Disable DC tiling */
+ fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset);
+ fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK;
+ WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
+
+}
+
+static const struct drm_plane_helper_funcs dce_v6_0_drm_primary_plane_helper_funcs = {
+ .get_scanout_buffer = amdgpu_display_get_scanout_buffer,
+ .panic_flush = dce_v6_0_panic_flush,
+};
+
static int dce_v6_0_crtc_init(struct amdgpu_device *adev, int index)
{
struct amdgpu_crtc *amdgpu_crtc;
@@ -2608,13 +2701,14 @@ static int dce_v6_0_crtc_init(struct amdgpu_device *adev, int index)
amdgpu_crtc->encoder = NULL;
amdgpu_crtc->connector = NULL;
drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v6_0_crtc_helper_funcs);
+ drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v6_0_drm_primary_plane_helper_funcs);
return 0;
}
-static int dce_v6_0_early_init(void *handle)
+static int dce_v6_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->audio_endpt_rreg = &dce_v6_0_audio_endpt_rreg;
adev->audio_endpt_wreg = &dce_v6_0_audio_endpt_wreg;
@@ -2643,11 +2737,10 @@ static int dce_v6_0_early_init(void *handle)
return 0;
}
-static int dce_v6_0_sw_init(void *handle)
+static int dce_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- bool ret;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->mode_info.num_crtc; i++) {
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
@@ -2674,7 +2767,7 @@ static int dce_v6_0_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
adev_to_drm(adev)->mode_config.prefer_shadow = 1;
- adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base;
+ adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
r = amdgpu_display_modeset_create_props(adev);
if (r)
@@ -2690,8 +2783,7 @@ static int dce_v6_0_sw_init(void *handle)
return r;
}
- ret = amdgpu_atombios_get_connector_info_from_object_table(adev);
- if (ret)
+ if (amdgpu_atombios_get_connector_info_from_object_table(adev))
amdgpu_display_print_display_setup(adev_to_drm(adev));
else
return -EINVAL;
@@ -2705,16 +2797,28 @@ static int dce_v6_0_sw_init(void *handle)
if (r)
return r;
+ /* Disable vblank IRQs aggressively for power-saving */
+ /* XXX: can this be enabled for DC? */
+ adev_to_drm(adev)->vblank_disable_immediate = true;
+
+ r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
+ if (r)
+ return r;
+
+ /* Pre-DCE11 */
+ INIT_DELAYED_WORK(&adev->hotplug_work,
+ amdgpu_display_hotplug_work_func);
+
drm_kms_helper_poll_init(adev_to_drm(adev));
return r;
}
-static int dce_v6_0_sw_fini(void *handle)
+static int dce_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- kfree(adev->mode_info.bios_hardcoded_edid);
+ drm_edid_free(adev->mode_info.bios_hardcoded_edid);
drm_kms_helper_poll_fini(adev_to_drm(adev));
@@ -2727,10 +2831,10 @@ static int dce_v6_0_sw_fini(void *handle)
return 0;
}
-static int dce_v6_0_hw_init(void *handle)
+static int dce_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* disable vga render */
dce_v6_0_set_vga_render_state(adev, false);
@@ -2750,10 +2854,10 @@ static int dce_v6_0_hw_init(void *handle)
return 0;
}
-static int dce_v6_0_hw_fini(void *handle)
+static int dce_v6_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
dce_v6_0_hpd_fini(adev);
@@ -2763,12 +2867,14 @@ static int dce_v6_0_hw_fini(void *handle)
dce_v6_0_pageflip_interrupt_fini(adev);
+ flush_delayed_work(&adev->hotplug_work);
+
return 0;
}
-static int dce_v6_0_suspend(void *handle)
+static int dce_v6_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_display_suspend_helper(adev);
@@ -2777,18 +2883,18 @@ static int dce_v6_0_suspend(void *handle)
adev->mode_info.bl_level =
amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
- return dce_v6_0_hw_fini(handle);
+ return dce_v6_0_hw_fini(ip_block);
}
-static int dce_v6_0_resume(void *handle)
+static int dce_v6_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
adev->mode_info.bl_level);
- ret = dce_v6_0_hw_init(handle);
+ ret = dce_v6_0_hw_init(ip_block);
/* turn on the BL */
if (adev->mode_info.bl_encoder) {
@@ -2803,19 +2909,35 @@ static int dce_v6_0_resume(void *handle)
return amdgpu_display_resume_helper(adev);
}
-static bool dce_v6_0_is_idle(void *handle)
+static bool dce_v6_0_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int dce_v6_0_wait_for_idle(void *handle)
+static int dce_v6_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- return 0;
-}
+ u32 srbm_soft_reset = 0, tmp;
+ struct amdgpu_device *adev = ip_block->adev;
-static int dce_v6_0_soft_reset(void *handle)
-{
- DRM_INFO("xxxx: dce_v6_0_soft_reset --- no impl!!\n");
+ if (dce_v6_0_is_display_hung(adev))
+ srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
+
+ if (srbm_soft_reset) {
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ tmp |= srbm_soft_reset;
+ dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~srbm_soft_reset;
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+ }
return 0;
}
@@ -2832,22 +2954,22 @@ static void dce_v6_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev,
switch (crtc) {
case 0:
- reg_block = SI_CRTC0_REGISTER_OFFSET;
+ reg_block = CRTC0_REGISTER_OFFSET;
break;
case 1:
- reg_block = SI_CRTC1_REGISTER_OFFSET;
+ reg_block = CRTC1_REGISTER_OFFSET;
break;
case 2:
- reg_block = SI_CRTC2_REGISTER_OFFSET;
+ reg_block = CRTC2_REGISTER_OFFSET;
break;
case 3:
- reg_block = SI_CRTC3_REGISTER_OFFSET;
+ reg_block = CRTC3_REGISTER_OFFSET;
break;
case 4:
- reg_block = SI_CRTC4_REGISTER_OFFSET;
+ reg_block = CRTC4_REGISTER_OFFSET;
break;
case 5:
- reg_block = SI_CRTC5_REGISTER_OFFSET;
+ reg_block = CRTC5_REGISTER_OFFSET;
break;
default:
DRM_DEBUG("invalid crtc %d\n", crtc);
@@ -2857,12 +2979,12 @@ static void dce_v6_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev,
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
interrupt_mask = RREG32(mmINT_MASK + reg_block);
- interrupt_mask &= ~VBLANK_INT_MASK;
+ interrupt_mask &= ~INT_MASK__VBLANK_INT_MASK;
WREG32(mmINT_MASK + reg_block, interrupt_mask);
break;
case AMDGPU_IRQ_STATE_ENABLE:
interrupt_mask = RREG32(mmINT_MASK + reg_block);
- interrupt_mask |= VBLANK_INT_MASK;
+ interrupt_mask |= INT_MASK__VBLANK_INT_MASK;
WREG32(mmINT_MASK + reg_block, interrupt_mask);
break;
default:
@@ -2877,28 +2999,28 @@ static void dce_v6_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev,
}
-static int dce_v6_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
+static int dce_v6_0_set_hpd_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
- unsigned type,
+ unsigned hpd,
enum amdgpu_interrupt_state state)
{
u32 dc_hpd_int_cntl;
- if (type >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hdp %d\n", type);
+ if (hpd >= adev->mode_info.num_hpd) {
+ DRM_DEBUG("invalid hpd %d\n", hpd);
return 0;
}
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
- dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]);
- dc_hpd_int_cntl &= ~DC_HPDx_INT_EN;
- WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl);
+ dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
+ dc_hpd_int_cntl &= ~DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK;
+ WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], dc_hpd_int_cntl);
break;
case AMDGPU_IRQ_STATE_ENABLE:
- dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]);
- dc_hpd_int_cntl |= DC_HPDx_INT_EN;
- WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl);
+ dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
+ dc_hpd_int_cntl |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK;
+ WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], dc_hpd_int_cntl);
break;
default:
break;
@@ -2907,7 +3029,7 @@ static int dce_v6_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
return 0;
}
-static int dce_v6_0_set_crtc_interrupt_state(struct amdgpu_device *adev,
+static int dce_v6_0_set_crtc_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
enum amdgpu_interrupt_state state)
@@ -2967,7 +3089,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev,
switch (entry->src_data[0]) {
case 0: /* vblank */
if (disp_int & interrupt_status_offsets[crtc].vblank)
- WREG32(mmVBLANK_STATUS + crtc_offsets[crtc], VBLANK_ACK);
+ WREG32(mmVBLANK_STATUS + crtc_offsets[crtc], VBLANK_STATUS__VBLANK_ACK_MASK);
else
DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
@@ -2978,7 +3100,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev,
break;
case 1: /* vline */
if (disp_int & interrupt_status_offsets[crtc].vline)
- WREG32(mmVLINE_STATUS + crtc_offsets[crtc], VLINE_ACK);
+ WREG32(mmVLINE_STATUS + crtc_offsets[crtc], VLINE_STATUS__VLINE_ACK_MASK);
else
DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
@@ -2992,7 +3114,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev,
return 0;
}
-static int dce_v6_0_set_pageflip_interrupt_state(struct amdgpu_device *adev,
+static int dce_v6_0_set_pageflip_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
enum amdgpu_interrupt_state state)
@@ -3043,7 +3165,7 @@ static int dce_v6_0_pageflip_irq(struct amdgpu_device *adev,
spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
works = amdgpu_crtc->pflip_works;
- if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){
+ if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) {
DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != "
"AMDGPU_FLIP_SUBMITTED(%d)\n",
amdgpu_crtc->pflip_status,
@@ -3072,7 +3194,7 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- uint32_t disp_int, mask, tmp;
+ uint32_t disp_int, mask;
unsigned hpd;
if (entry->src_data[0] >= adev->mode_info.num_hpd) {
@@ -3085,24 +3207,21 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev,
mask = interrupt_status_offsets[hpd].hpd;
if (disp_int & mask) {
- tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
- tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK;
- WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
- schedule_work(&adev->hotplug_work);
+ dce_v6_0_hpd_int_ack(adev, hpd);
+ schedule_delayed_work(&adev->hotplug_work, 0);
DRM_DEBUG("IH: HPD%d\n", hpd + 1);
}
return 0;
-
}
-static int dce_v6_0_set_clockgating_state(void *handle,
+static int dce_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int dce_v6_0_set_powergating_state(void *handle,
+static int dce_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -3111,7 +3230,6 @@ static int dce_v6_0_set_powergating_state(void *handle,
static const struct amd_ip_funcs dce_v6_0_ip_funcs = {
.name = "dce_v6_0",
.early_init = dce_v6_0_early_init,
- .late_init = NULL,
.sw_init = dce_v6_0_sw_init,
.sw_fini = dce_v6_0_sw_fini,
.hw_init = dce_v6_0_hw_init,
@@ -3119,18 +3237,15 @@ static const struct amd_ip_funcs dce_v6_0_ip_funcs = {
.suspend = dce_v6_0_suspend,
.resume = dce_v6_0_resume,
.is_idle = dce_v6_0_is_idle,
- .wait_for_idle = dce_v6_0_wait_for_idle,
.soft_reset = dce_v6_0_soft_reset,
.set_clockgating_state = dce_v6_0_set_clockgating_state,
.set_powergating_state = dce_v6_0_set_powergating_state,
};
-static void
-dce_v6_0_encoder_mode_set(struct drm_encoder *encoder,
+static void dce_v6_0_encoder_mode_set(struct drm_encoder *encoder,
struct drm_display_mode *mode,
struct drm_display_mode *adjusted_mode)
{
-
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
@@ -3150,7 +3265,6 @@ dce_v6_0_encoder_mode_set(struct drm_encoder *encoder,
static void dce_v6_0_encoder_prepare(struct drm_encoder *encoder)
{
-
struct amdgpu_device *adev = drm_to_adev(encoder->dev);
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
@@ -3190,7 +3304,6 @@ static void dce_v6_0_encoder_prepare(struct drm_encoder *encoder)
static void dce_v6_0_encoder_commit(struct drm_encoder *encoder)
{
-
struct drm_device *dev = encoder->dev;
struct amdgpu_device *adev = drm_to_adev(dev);
@@ -3201,7 +3314,6 @@ static void dce_v6_0_encoder_commit(struct drm_encoder *encoder)
static void dce_v6_0_encoder_disable(struct drm_encoder *encoder)
{
-
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig;
int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
@@ -3228,8 +3340,7 @@ static void dce_v6_0_ext_commit(struct drm_encoder *encoder)
}
-static void
-dce_v6_0_ext_mode_set(struct drm_encoder *encoder,
+static void dce_v6_0_ext_mode_set(struct drm_encoder *encoder,
struct drm_display_mode *mode,
struct drm_display_mode *adjusted_mode)
{
@@ -3241,8 +3352,7 @@ static void dce_v6_0_ext_disable(struct drm_encoder *encoder)
}
-static void
-dce_v6_0_ext_dpms(struct drm_encoder *encoder, int mode)
+static void dce_v6_0_ext_dpms(struct drm_encoder *encoder, int mode)
{
}
@@ -3313,7 +3423,6 @@ static void dce_v6_0_encoder_add(struct amdgpu_device *adev,
amdgpu_encoder->devices |= supported_device;
return;
}
-
}
/* add a new one */
@@ -3420,17 +3529,17 @@ static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev)
}
static const struct amdgpu_irq_src_funcs dce_v6_0_crtc_irq_funcs = {
- .set = dce_v6_0_set_crtc_interrupt_state,
+ .set = dce_v6_0_set_crtc_irq_state,
.process = dce_v6_0_crtc_irq,
};
static const struct amdgpu_irq_src_funcs dce_v6_0_pageflip_irq_funcs = {
- .set = dce_v6_0_set_pageflip_interrupt_state,
+ .set = dce_v6_0_set_pageflip_irq_state,
.process = dce_v6_0_pageflip_irq,
};
static const struct amdgpu_irq_src_funcs dce_v6_0_hpd_irq_funcs = {
- .set = dce_v6_0_set_hpd_interrupt_state,
+ .set = dce_v6_0_set_hpd_irq_state,
.process = dce_v6_0_hpd_irq,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index b200b9e722d9..2ccd6aad8dd6 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -21,7 +21,10 @@
*
*/
+#include <drm/drm_edid.h>
#include <drm/drm_fourcc.h>
+#include <drm/drm_modeset_helper.h>
+#include <drm/drm_modeset_helper_vtables.h>
#include <drm/drm_vblank.h>
#include "amdgpu.h"
@@ -51,8 +54,7 @@
static void dce_v8_0_set_display_funcs(struct amdgpu_device *adev);
static void dce_v8_0_set_irq_funcs(struct amdgpu_device *adev);
-static const u32 crtc_offsets[6] =
-{
+static const u32 crtc_offsets[6] = {
CRTC0_REGISTER_OFFSET,
CRTC1_REGISTER_OFFSET,
CRTC2_REGISTER_OFFSET,
@@ -61,8 +63,7 @@ static const u32 crtc_offsets[6] =
CRTC5_REGISTER_OFFSET
};
-static const u32 hpd_offsets[] =
-{
+static const u32 hpd_offsets[] = {
HPD0_REGISTER_OFFSET,
HPD1_REGISTER_OFFSET,
HPD2_REGISTER_OFFSET,
@@ -264,6 +265,21 @@ static void dce_v8_0_hpd_set_polarity(struct amdgpu_device *adev,
WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
}
+static void dce_v8_0_hpd_int_ack(struct amdgpu_device *adev,
+ int hpd)
+{
+ u32 tmp;
+
+ if (hpd >= adev->mode_info.num_hpd) {
+ DRM_DEBUG("invalid hpd %d\n", hpd);
+ return;
+ }
+
+ tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
+ tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK;
+ WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
+}
+
/**
* dce_v8_0_hpd_init - hpd setup callback.
*
@@ -303,6 +319,7 @@ static void dce_v8_0_hpd_init(struct amdgpu_device *adev)
continue;
}
+ dce_v8_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd);
dce_v8_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
}
@@ -333,7 +350,7 @@ static void dce_v8_0_hpd_fini(struct amdgpu_device *adev)
tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
tmp &= ~DC_HPD1_CONTROL__DC_HPD1_EN_MASK;
- WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], 0);
+ WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
}
@@ -975,7 +992,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
(u32)mode->clock);
line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
(u32)mode->clock);
- line_time = min(line_time, (u32)65535);
+ line_time = min_t(u32, line_time, 65535);
/* watermark for high clocks */
if (adev->pm.dpm_enabled) {
@@ -1005,7 +1022,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
wm_high.num_heads = num_heads;
/* set for high clocks */
- latency_watermark_a = min(dce_v8_0_latency_watermark(&wm_high), (u32)65535);
+ latency_watermark_a = min_t(u32, dce_v8_0_latency_watermark(&wm_high), 65535);
/* possibly force display priority to high */
/* should really do this at mode validation time... */
@@ -1044,7 +1061,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
wm_low.num_heads = num_heads;
/* set for low clocks */
- latency_watermark_b = min(dce_v8_0_latency_watermark(&wm_low), (u32)65535);
+ latency_watermark_b = min_t(u32, dce_v8_0_latency_watermark(&wm_low), 65535);
/* possibly force display priority to high */
/* should really do this at mode validation time... */
@@ -1079,8 +1096,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
/* save values for DPM */
amdgpu_crtc->line_time = line_time;
- amdgpu_crtc->wm_high = latency_watermark_a;
- amdgpu_crtc->wm_low = latency_watermark_b;
+
/* Save number of lines the linebuffer leads before the scanout */
amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
}
@@ -1255,7 +1271,7 @@ static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
return;
}
- sad_count = drm_edid_to_speaker_allocation(amdgpu_connector_edid(connector), &sadb);
+ sad_count = drm_edid_to_speaker_allocation(amdgpu_connector->edid, &sadb);
if (sad_count < 0) {
DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
sad_count = 0;
@@ -1323,7 +1339,7 @@ static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder)
return;
}
- sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
+ sad_count = drm_edid_to_sad(amdgpu_connector->edid, &sads);
if (sad_count < 0)
DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
if (sad_count <= 0)
@@ -1343,9 +1359,9 @@ static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder)
if (sad->channels > max_channels) {
value = (sad->channels <<
AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__MAX_CHANNELS__SHIFT) |
- (sad->byte2 <<
+ (sad->byte2 <<
AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__DESCRIPTOR_BYTE_2__SHIFT) |
- (sad->freq <<
+ (sad->freq <<
AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES__SHIFT);
max_channels = sad->channels;
}
@@ -1377,15 +1393,14 @@ static void dce_v8_0_audio_enable(struct amdgpu_device *adev,
enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0);
}
-static const u32 pin_offsets[7] =
-{
- (0x1780 - 0x1780),
- (0x1786 - 0x1780),
- (0x178c - 0x1780),
- (0x1792 - 0x1780),
- (0x1798 - 0x1780),
- (0x179d - 0x1780),
- (0x17a4 - 0x1780),
+static const u32 pin_offsets[7] = {
+ AUD0_REGISTER_OFFSET,
+ AUD1_REGISTER_OFFSET,
+ AUD2_REGISTER_OFFSET,
+ AUD3_REGISTER_OFFSET,
+ AUD4_REGISTER_OFFSET,
+ AUD5_REGISTER_OFFSET,
+ AUD6_REGISTER_OFFSET,
};
static int dce_v8_0_audio_init(struct amdgpu_device *adev)
@@ -1427,17 +1442,12 @@ static int dce_v8_0_audio_init(struct amdgpu_device *adev)
static void dce_v8_0_audio_fini(struct amdgpu_device *adev)
{
- int i;
-
if (!amdgpu_audio)
return;
if (!adev->mode_info.audio.enabled)
return;
- for (i = 0; i < adev->mode_info.audio.num_pins; i++)
- dce_v8_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
-
adev->mode_info.audio.enabled = false;
}
@@ -1738,8 +1748,7 @@ static void dce_v8_0_afmt_fini(struct amdgpu_device *adev)
}
}
-static const u32 vga_control_regs[6] =
-{
+static const u32 vga_control_regs[6] = {
mmD1VGA_CONTROL,
mmD2VGA_CONTROL,
mmD3VGA_CONTROL,
@@ -1813,6 +1822,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
return r;
if (!atomic) {
+ abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(abo);
@@ -1893,9 +1903,9 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
case DRM_FORMAT_XBGR8888:
case DRM_FORMAT_ABGR8888:
fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
- (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+ (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
fb_swap = ((GRPH_RED_SEL_B << GRPH_SWAP_CNTL__GRPH_RED_CROSSBAR__SHIFT) |
- (GRPH_BLUE_SEL_R << GRPH_SWAP_CNTL__GRPH_BLUE_CROSSBAR__SHIFT));
+ (GRPH_BLUE_SEL_R << GRPH_SWAP_CNTL__GRPH_BLUE_CROSSBAR__SHIFT));
#ifdef __BIG_ENDIAN
fb_swap |= (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
@@ -2092,22 +2102,18 @@ static int dce_v8_0_pick_dig_encoder(struct drm_encoder *encoder)
return 1;
else
return 0;
- break;
case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
if (dig->linkb)
return 3;
else
return 2;
- break;
case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
if (dig->linkb)
return 5;
else
return 4;
- break;
case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
return 6;
- break;
default:
DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id);
return 0;
@@ -2309,6 +2315,7 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc,
return ret;
}
+ aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_bo_unreserve(aobj);
if (ret) {
@@ -2437,7 +2444,7 @@ static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode)
break;
}
/* adjust pm to dpms */
- amdgpu_pm_compute_clocks(adev);
+ amdgpu_dpm_compute_clocks(adev);
}
static void dce_v8_0_crtc_prepare(struct drm_crtc *crtc)
@@ -2600,6 +2607,31 @@ static const struct drm_crtc_helper_funcs dce_v8_0_crtc_helper_funcs = {
.get_scanout_position = amdgpu_crtc_get_scanout_position,
};
+static void dce_v8_0_panic_flush(struct drm_plane *plane)
+{
+ struct drm_framebuffer *fb;
+ struct amdgpu_crtc *amdgpu_crtc;
+ struct amdgpu_device *adev;
+ uint32_t fb_format;
+
+ if (!plane->fb)
+ return;
+
+ fb = plane->fb;
+ amdgpu_crtc = to_amdgpu_crtc(plane->crtc);
+ adev = drm_to_adev(fb->dev);
+
+ /* Disable DC tiling */
+ fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset);
+ fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK;
+ WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
+}
+
+static const struct drm_plane_helper_funcs dce_v8_0_drm_primary_plane_helper_funcs = {
+ .get_scanout_buffer = amdgpu_display_get_scanout_buffer,
+ .panic_flush = dce_v8_0_panic_flush,
+};
+
static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index)
{
struct amdgpu_crtc *amdgpu_crtc;
@@ -2627,13 +2659,14 @@ static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index)
amdgpu_crtc->encoder = NULL;
amdgpu_crtc->connector = NULL;
drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v8_0_crtc_helper_funcs);
+ drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v8_0_drm_primary_plane_helper_funcs);
return 0;
}
-static int dce_v8_0_early_init(void *handle)
+static int dce_v8_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->audio_endpt_rreg = &dce_v8_0_audio_endpt_rreg;
adev->audio_endpt_wreg = &dce_v8_0_audio_endpt_wreg;
@@ -2667,10 +2700,10 @@ static int dce_v8_0_early_init(void *handle)
return 0;
}
-static int dce_v8_0_sw_init(void *handle)
+static int dce_v8_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->mode_info.num_crtc; i++) {
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
@@ -2697,9 +2730,13 @@ static int dce_v8_0_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+ if (adev->asic_type == CHIP_HAWAII)
+ /* disable prefer shadow for now due to hibernation issues */
+ adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+ else
+ adev_to_drm(adev)->mode_config.prefer_shadow = 1;
- adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base;
+ adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
r = amdgpu_display_modeset_create_props(adev);
if (r)
@@ -2729,17 +2766,29 @@ static int dce_v8_0_sw_init(void *handle)
if (r)
return r;
+ /* Disable vblank IRQs aggressively for power-saving */
+ /* XXX: can this be enabled for DC? */
+ adev_to_drm(adev)->vblank_disable_immediate = true;
+
+ r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
+ if (r)
+ return r;
+
+ /* Pre-DCE11 */
+ INIT_DELAYED_WORK(&adev->hotplug_work,
+ amdgpu_display_hotplug_work_func);
+
drm_kms_helper_poll_init(adev_to_drm(adev));
adev->mode_info.mode_config_initialized = true;
return 0;
}
-static int dce_v8_0_sw_fini(void *handle)
+static int dce_v8_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- kfree(adev->mode_info.bios_hardcoded_edid);
+ drm_edid_free(adev->mode_info.bios_hardcoded_edid);
drm_kms_helper_poll_fini(adev_to_drm(adev));
@@ -2753,10 +2802,10 @@ static int dce_v8_0_sw_fini(void *handle)
return 0;
}
-static int dce_v8_0_hw_init(void *handle)
+static int dce_v8_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* disable vga render */
dce_v8_0_set_vga_render_state(adev, false);
@@ -2776,10 +2825,10 @@ static int dce_v8_0_hw_init(void *handle)
return 0;
}
-static int dce_v8_0_hw_fini(void *handle)
+static int dce_v8_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
dce_v8_0_hpd_fini(adev);
@@ -2789,12 +2838,14 @@ static int dce_v8_0_hw_fini(void *handle)
dce_v8_0_pageflip_interrupt_fini(adev);
+ flush_delayed_work(&adev->hotplug_work);
+
return 0;
}
-static int dce_v8_0_suspend(void *handle)
+static int dce_v8_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_display_suspend_helper(adev);
@@ -2804,18 +2855,18 @@ static int dce_v8_0_suspend(void *handle)
adev->mode_info.bl_level =
amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
- return dce_v8_0_hw_fini(handle);
+ return dce_v8_0_hw_fini(ip_block);
}
-static int dce_v8_0_resume(void *handle)
+static int dce_v8_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
adev->mode_info.bl_level);
- ret = dce_v8_0_hw_init(handle);
+ ret = dce_v8_0_hw_init(ip_block);
/* turn on the BL */
if (adev->mode_info.bl_encoder) {
@@ -2830,20 +2881,15 @@ static int dce_v8_0_resume(void *handle)
return amdgpu_display_resume_helper(adev);
}
-static bool dce_v8_0_is_idle(void *handle)
+static bool dce_v8_0_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int dce_v8_0_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int dce_v8_0_soft_reset(void *handle)
+static int dce_v8_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0, tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (dce_v8_0_is_display_hung(adev))
srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
@@ -2969,7 +3015,7 @@ static void dce_v8_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev,
}
}
-static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
+static int dce_v8_0_set_hpd_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
enum amdgpu_interrupt_state state)
@@ -2977,7 +3023,7 @@ static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
u32 dc_hpd_int_cntl;
if (type >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hdp %d\n", type);
+ DRM_DEBUG("invalid hpd %d\n", type);
return 0;
}
@@ -2999,7 +3045,7 @@ static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
return 0;
}
-static int dce_v8_0_set_crtc_interrupt_state(struct amdgpu_device *adev,
+static int dce_v8_0_set_crtc_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
enum amdgpu_interrupt_state state)
@@ -3084,7 +3130,7 @@ static int dce_v8_0_crtc_irq(struct amdgpu_device *adev,
return 0;
}
-static int dce_v8_0_set_pageflip_interrupt_state(struct amdgpu_device *adev,
+static int dce_v8_0_set_pageflip_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
enum amdgpu_interrupt_state state)
@@ -3135,7 +3181,7 @@ static int dce_v8_0_pageflip_irq(struct amdgpu_device *adev,
spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
works = amdgpu_crtc->pflip_works;
- if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){
+ if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) {
DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != "
"AMDGPU_FLIP_SUBMITTED(%d)\n",
amdgpu_crtc->pflip_status,
@@ -3164,7 +3210,7 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- uint32_t disp_int, mask, tmp;
+ uint32_t disp_int, mask;
unsigned hpd;
if (entry->src_data[0] >= adev->mode_info.num_hpd) {
@@ -3177,10 +3223,8 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev,
mask = interrupt_status_offsets[hpd].hpd;
if (disp_int & mask) {
- tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
- tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK;
- WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
- schedule_work(&adev->hotplug_work);
+ dce_v8_0_hpd_int_ack(adev, hpd);
+ schedule_delayed_work(&adev->hotplug_work, 0);
DRM_DEBUG("IH: HPD%d\n", hpd + 1);
}
@@ -3188,13 +3232,13 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev,
}
-static int dce_v8_0_set_clockgating_state(void *handle,
+static int dce_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int dce_v8_0_set_powergating_state(void *handle,
+static int dce_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -3203,7 +3247,6 @@ static int dce_v8_0_set_powergating_state(void *handle,
static const struct amd_ip_funcs dce_v8_0_ip_funcs = {
.name = "dce_v8_0",
.early_init = dce_v8_0_early_init,
- .late_init = NULL,
.sw_init = dce_v8_0_sw_init,
.sw_fini = dce_v8_0_sw_fini,
.hw_init = dce_v8_0_hw_init,
@@ -3211,7 +3254,6 @@ static const struct amd_ip_funcs dce_v8_0_ip_funcs = {
.suspend = dce_v8_0_suspend,
.resume = dce_v8_0_resume,
.is_idle = dce_v8_0_is_idle,
- .wait_for_idle = dce_v8_0_wait_for_idle,
.soft_reset = dce_v8_0_soft_reset,
.set_clockgating_state = dce_v8_0_set_clockgating_state,
.set_powergating_state = dce_v8_0_set_powergating_state,
@@ -3499,17 +3541,17 @@ static void dce_v8_0_set_display_funcs(struct amdgpu_device *adev)
}
static const struct amdgpu_irq_src_funcs dce_v8_0_crtc_irq_funcs = {
- .set = dce_v8_0_set_crtc_interrupt_state,
+ .set = dce_v8_0_set_crtc_irq_state,
.process = dce_v8_0_crtc_irq,
};
static const struct amdgpu_irq_src_funcs dce_v8_0_pageflip_irq_funcs = {
- .set = dce_v8_0_set_pageflip_interrupt_state,
+ .set = dce_v8_0_set_pageflip_irq_state,
.process = dce_v8_0_pageflip_irq,
};
static const struct amdgpu_irq_src_funcs dce_v8_0_hpd_irq_funcs = {
- .set = dce_v8_0_set_hpd_interrupt_state,
+ .set = dce_v8_0_set_hpd_irq_state,
.process = dce_v8_0_hpd_irq,
};
@@ -3528,8 +3570,7 @@ static void dce_v8_0_set_irq_funcs(struct amdgpu_device *adev)
adev->hpd_irq.funcs = &dce_v8_0_hpd_irq_funcs;
}
-const struct amdgpu_ip_block_version dce_v8_0_ip_block =
-{
+const struct amdgpu_ip_block_version dce_v8_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_DCE,
.major = 8,
.minor = 0,
@@ -3537,8 +3578,7 @@ const struct amdgpu_ip_block_version dce_v8_0_ip_block =
.funcs = &dce_v8_0_ip_funcs,
};
-const struct amdgpu_ip_block_version dce_v8_1_ip_block =
-{
+const struct amdgpu_ip_block_version dce_v8_1_ip_block = {
.type = AMD_IP_BLOCK_TYPE_DCE,
.major = 8,
.minor = 1,
@@ -3546,8 +3586,7 @@ const struct amdgpu_ip_block_version dce_v8_1_ip_block =
.funcs = &dce_v8_0_ip_funcs,
};
-const struct amdgpu_ip_block_version dce_v8_2_ip_block =
-{
+const struct amdgpu_ip_block_version dce_v8_2_ip_block = {
.type = AMD_IP_BLOCK_TYPE_DCE,
.major = 8,
.minor = 2,
@@ -3555,8 +3594,7 @@ const struct amdgpu_ip_block_version dce_v8_2_ip_block =
.funcs = &dce_v8_0_ip_funcs,
};
-const struct amdgpu_ip_block_version dce_v8_3_ip_block =
-{
+const struct amdgpu_ip_block_version dce_v8_3_ip_block = {
.type = AMD_IP_BLOCK_TYPE_DCE,
.major = 8,
.minor = 3,
@@ -3564,8 +3602,7 @@ const struct amdgpu_ip_block_version dce_v8_3_ip_block =
.funcs = &dce_v8_0_ip_funcs,
};
-const struct amdgpu_ip_block_version dce_v8_5_ip_block =
-{
+const struct amdgpu_ip_block_version dce_v8_5_ip_block = {
.type = AMD_IP_BLOCK_TYPE_DCE,
.major = 8,
.minor = 5,
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
index 2d01ac0d4c11..cd298556f7a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
@@ -70,6 +70,8 @@ static u32 df_v1_7_get_hbm_channel_number(struct amdgpu_device *adev)
int fb_channel_number;
fb_channel_number = adev->df.funcs->get_fb_channel_number(adev);
+ if (fb_channel_number >= ARRAY_SIZE(df_v1_7_channel_number))
+ fb_channel_number = 0;
return df_v1_7_channel_number[fb_channel_number];
}
@@ -94,12 +96,12 @@ static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
}
- /* Exit boradcast mode */
+ /* Exit broadcast mode */
adev->df.funcs->enable_broadcast_mode(adev, false);
}
static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
u32 tmp;
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
index 43c5e3ec9a39..621aeca53880 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -254,8 +254,8 @@ static void df_v3_6_sw_init(struct amdgpu_device *adev)
static void df_v3_6_sw_fini(struct amdgpu_device *adev)
{
-
- device_remove_file(adev->dev, &dev_attr_df_cntr_avail);
+ if (adev->dev->kobj.sd)
+ device_remove_file(adev->dev, &dev_attr_df_cntr_avail);
}
@@ -332,7 +332,7 @@ static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev,
}
static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
u32 tmp;
@@ -458,7 +458,7 @@ static int df_v3_6_pmc_add_cntr(struct amdgpu_device *adev,
#define DEFERRED_ARM_MASK (1 << 31)
static int df_v3_6_pmc_set_deferred(struct amdgpu_device *adev,
- int counter_idx, uint64_t config,
+ uint64_t config, int counter_idx,
bool is_deferred)
{
@@ -476,8 +476,8 @@ static int df_v3_6_pmc_set_deferred(struct amdgpu_device *adev,
}
static bool df_v3_6_pmc_is_deferred(struct amdgpu_device *adev,
- int counter_idx,
- uint64_t config)
+ uint64_t config,
+ int counter_idx)
{
return (df_v3_6_pmc_has_counter(adev, config, counter_idx) &&
(adev->df_perfmon_config_assign_mask[counter_idx]
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_15.c b/drivers/gpu/drm/amd/amdgpu/df_v4_15.c
new file mode 100644
index 000000000000..2a573e33908b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v4_15.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "df_v4_15.h"
+
+#include "df/df_4_15_offset.h"
+#include "df/df_4_15_sh_mask.h"
+
+static void df_v4_15_hw_init(struct amdgpu_device *adev)
+{
+ if (adev->have_atomics_support) {
+ uint32_t tmp;
+ uint32_t dis_lcl_proc = (1 << 1 |
+ 1 << 2 |
+ 1 << 13);
+
+ tmp = RREG32_SOC15(DF, 0, regNCSConfigurationRegister1);
+ tmp |= (dis_lcl_proc << NCSConfigurationRegister1__DisIntAtomicsLclProcessing__SHIFT);
+ WREG32_SOC15(DF, 0, regNCSConfigurationRegister1, tmp);
+ }
+}
+
+const struct amdgpu_df_funcs df_v4_15_funcs = {
+ .hw_init = df_v4_15_hw_init
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_15.h b/drivers/gpu/drm/amd/amdgpu/df_v4_15.h
new file mode 100644
index 000000000000..dddf2422112a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v4_15.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DF_V4_15_H__
+#define __DF_V4_15_H__
+
+extern const struct amdgpu_df_funcs df_v4_15_funcs;
+
+#endif /* __DF_V4_15_H__ */
+
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_3.c b/drivers/gpu/drm/amd/amdgpu/df_v4_3.c
new file mode 100644
index 000000000000..e8b9e19ede2e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v4_3.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "df_v4_3.h"
+
+#include "df/df_4_3_offset.h"
+#include "df/df_4_3_sh_mask.h"
+
+static bool df_v4_3_query_ras_poison_mode(struct amdgpu_device *adev)
+{
+ uint32_t hw_assert_msklo, hw_assert_mskhi;
+ uint32_t v0, v1, v28, v31;
+
+ hw_assert_msklo = RREG32_SOC15(DF, 0,
+ regDF_CS_UMC_AON0_HardwareAssertMaskLow);
+ hw_assert_mskhi = RREG32_SOC15(DF, 0,
+ regDF_NCS_PG0_HardwareAssertMaskHigh);
+
+ v0 = REG_GET_FIELD(hw_assert_msklo,
+ DF_CS_UMC_AON0_HardwareAssertMaskLow, HWAssertMsk0);
+ v1 = REG_GET_FIELD(hw_assert_msklo,
+ DF_CS_UMC_AON0_HardwareAssertMaskLow, HWAssertMsk1);
+ v28 = REG_GET_FIELD(hw_assert_mskhi,
+ DF_NCS_PG0_HardwareAssertMaskHigh, HWAssertMsk28);
+ v31 = REG_GET_FIELD(hw_assert_mskhi,
+ DF_NCS_PG0_HardwareAssertMaskHigh, HWAssertMsk31);
+
+ if (v0 && v1 && v28 && v31)
+ return true;
+ else if (!v0 && !v1 && !v28 && !v31)
+ return false;
+ else {
+ dev_warn(adev->dev, "DF poison setting is inconsistent(%d:%d:%d:%d)!\n",
+ v0, v1, v28, v31);
+ return false;
+ }
+}
+
+const struct amdgpu_df_funcs df_v4_3_funcs = {
+ .query_ras_poison_mode = df_v4_3_query_ras_poison_mode,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_3.h b/drivers/gpu/drm/amd/amdgpu/df_v4_3.h
new file mode 100644
index 000000000000..06ef0724edd3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v4_3.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DF_V4_3_H__
+#define __DF_V4_3_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_df_funcs df_v4_3_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.c b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.c
new file mode 100644
index 000000000000..a47960a0babd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "df_v4_6_2.h"
+
+static bool df_v4_6_2_query_ras_poison_mode(struct amdgpu_device *adev)
+{
+ /* return true since related regs are inaccessible */
+ return true;
+}
+
+const struct amdgpu_df_funcs df_v4_6_2_funcs = {
+ .query_ras_poison_mode = df_v4_6_2_query_ras_poison_mode,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.h b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.h
new file mode 100644
index 000000000000..3bc3e6d216e2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DF_V4_6_2_H__
+#define __DF_V4_6_2_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_df_funcs df_v4_6_2_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index dbe7442fb25c..d75b9940f248 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -40,11 +40,11 @@
#include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
#include "soc15.h"
-#include "soc15d.h"
#include "soc15_common.h"
#include "clearstate_gfx10.h"
#include "v10_structs.h"
#include "gfx_v10_0.h"
+#include "gfx_v10_0_cleaner_shader.h"
#include "nbio_v2_3.h"
/*
@@ -53,13 +53,9 @@
* 2. Async ring
*/
#define GFX10_NUM_GFX_RINGS_NV1X 1
-#define GFX10_NUM_GFX_RINGS_Sienna_Cichlid 1
+#define GFX10_NUM_GFX_RINGS_Sienna_Cichlid 2
#define GFX10_MEC_HPD_SIZE 2048
-#define RLCG_VFGATE_DISABLED 0x4000000
-#define RLCG_WRONG_OPERATION_TYPE 0x2000000
-#define RLCG_NOT_IN_RANGE 0x1000000
-
#define F32_CE_PROGRAM_RAM_SIZE 65536
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
@@ -106,10 +102,21 @@
#define mmGCR_GENERAL_CNTL_Sienna_Cichlid 0x1580
#define mmGCR_GENERAL_CNTL_Sienna_Cichlid_BASE_IDX 0
+#define mmGOLDEN_TSC_COUNT_UPPER_Cyan_Skillfish 0x0105
+#define mmGOLDEN_TSC_COUNT_UPPER_Cyan_Skillfish_BASE_IDX 1
+#define mmGOLDEN_TSC_COUNT_LOWER_Cyan_Skillfish 0x0106
+#define mmGOLDEN_TSC_COUNT_LOWER_Cyan_Skillfish_BASE_IDX 1
+
#define mmGOLDEN_TSC_COUNT_UPPER_Vangogh 0x0025
#define mmGOLDEN_TSC_COUNT_UPPER_Vangogh_BASE_IDX 1
#define mmGOLDEN_TSC_COUNT_LOWER_Vangogh 0x0026
#define mmGOLDEN_TSC_COUNT_LOWER_Vangogh_BASE_IDX 1
+
+#define mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6 0x002d
+#define mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6_BASE_IDX 1
+#define mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6 0x002e
+#define mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6_BASE_IDX 1
+
#define mmSPI_CONFIG_CNTL_1_Vangogh 0x2441
#define mmSPI_CONFIG_CNTL_1_Vangogh_BASE_IDX 1
#define mmVGT_TF_MEMORY_BASE_HI_Vangogh 0x2261
@@ -180,14 +187,6 @@
#define mmRLC_SPARE_INT_0_Sienna_Cichlid 0x4ca5
#define mmRLC_SPARE_INT_0_Sienna_Cichlid_BASE_IDX 1
-#define GFX_RLCG_GC_WRITE_OLD (0x8 << 28)
-#define GFX_RLCG_GC_WRITE (0x0 << 28)
-#define GFX_RLCG_GC_READ (0x1 << 28)
-#define GFX_RLCG_MMHUB_WRITE (0x2 << 28)
-
-#define RLCG_ERROR_REPORT_ENABLED(adev) \
- (amdgpu_sriov_reg_indirect_mmhub(adev) || amdgpu_sriov_reg_indirect_gc(adev))
-
MODULE_FIRMWARE("amdgpu/navi10_ce.bin");
MODULE_FIRMWARE("amdgpu/navi10_pfp.bin");
MODULE_FIRMWARE("amdgpu/navi10_me.bin");
@@ -256,13 +255,6 @@ MODULE_FIRMWARE("amdgpu/yellow_carp_mec.bin");
MODULE_FIRMWARE("amdgpu/yellow_carp_mec2.bin");
MODULE_FIRMWARE("amdgpu/yellow_carp_rlc.bin");
-MODULE_FIRMWARE("amdgpu/cyan_skillfish_ce.bin");
-MODULE_FIRMWARE("amdgpu/cyan_skillfish_pfp.bin");
-MODULE_FIRMWARE("amdgpu/cyan_skillfish_me.bin");
-MODULE_FIRMWARE("amdgpu/cyan_skillfish_mec.bin");
-MODULE_FIRMWARE("amdgpu/cyan_skillfish_mec2.bin");
-MODULE_FIRMWARE("amdgpu/cyan_skillfish_rlc.bin");
-
MODULE_FIRMWARE("amdgpu/cyan_skillfish2_ce.bin");
MODULE_FIRMWARE("amdgpu/cyan_skillfish2_pfp.bin");
MODULE_FIRMWARE("amdgpu/cyan_skillfish2_me.bin");
@@ -270,8 +262,225 @@ MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec.bin");
MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec2.bin");
MODULE_FIRMWARE("amdgpu/cyan_skillfish2_rlc.bin");
-static const struct soc15_reg_golden golden_settings_gc_10_1[] =
-{
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_ce.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_mec2.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_ce.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec2.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_rlc.bin");
+
+static const struct amdgpu_hwip_reg_entry gc_reg_list_10_1[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS3),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HPD_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS_2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL0_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL0_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSQG_UTCL0_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL0_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGCVM_L2_PROTECTION_FAULT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_DEBUG_INTERRUPT_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_3),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_4),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_RLCS_GPM_STAT_2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SPP_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_RLCS_BOOTLOAD_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_A),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_B),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_LX6_CORE_PDEBUG_INST),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_HEADER_DUMP),
+ /* SE status registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3)
+};
+
+static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_10[] = {
+ /* compute registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_STATUS),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+};
+
+static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_10[] = {
+ /* gfx queue registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_ACTIVE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CSMD_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_MAPPED),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_QUE_MGR_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_HQ_CONTROL0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_HQ_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_POLL_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_CSMD_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI),
+ /* gfx header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_1[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd000000, 0x0d000100),
@@ -314,13 +523,11 @@ static const struct soc15_reg_golden golden_settings_gc_10_1[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00c00000, 0x00c00000)
};
-static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] = {
/* Pending on emulation bring up */
};
-static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_0_nv10[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_0_nv10[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000, 0x0),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
@@ -1375,8 +1582,7 @@ static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_0_nv10[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xFFFFFFFF, 0xe0000000)
};
-static const struct soc15_reg_golden golden_settings_gc_10_1_1[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_1_1[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x003c0014),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100),
@@ -1417,8 +1623,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_1[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00c00000, 0x00c00000),
};
-static const struct soc15_reg_golden golden_settings_gc_10_1_2[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_1_2[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x003e001f, 0x003c0014),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100),
@@ -1463,150 +1668,11 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00c00000)
};
-static bool gfx_v10_get_rlcg_flag(struct amdgpu_device *adev, u32 acc_flags, u32 hwip,
- int write, u32 *rlcg_flag)
-{
- switch (hwip) {
- case GC_HWIP:
- if (amdgpu_sriov_reg_indirect_gc(adev)) {
- *rlcg_flag = write ? GFX_RLCG_GC_WRITE : GFX_RLCG_GC_READ;
-
- return true;
- /* only in new version, AMDGPU_REGS_NO_KIQ and AMDGPU_REGS_RLC enabled simultaneously */
- } else if ((acc_flags & AMDGPU_REGS_RLC) && !(acc_flags & AMDGPU_REGS_NO_KIQ)) {
- *rlcg_flag = GFX_RLCG_GC_WRITE_OLD;
-
- return true;
- }
-
- break;
- case MMHUB_HWIP:
- if (amdgpu_sriov_reg_indirect_mmhub(adev) &&
- (acc_flags & AMDGPU_REGS_RLC) && write) {
- *rlcg_flag = GFX_RLCG_MMHUB_WRITE;
- return true;
- }
-
- break;
- default:
- DRM_DEBUG("Not program register by RLCG\n");
- }
-
- return false;
-}
-
-static u32 gfx_v10_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32_t flag)
-{
- static void *scratch_reg0;
- static void *scratch_reg1;
- static void *scratch_reg2;
- static void *scratch_reg3;
- static void *spare_int;
- static uint32_t grbm_cntl;
- static uint32_t grbm_idx;
- uint32_t i = 0;
- uint32_t retries = 50000;
- u32 ret = 0;
- u32 tmp;
-
- scratch_reg0 = adev->rmmio +
- (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0) * 4;
- scratch_reg1 = adev->rmmio +
- (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1) * 4;
- scratch_reg2 = adev->rmmio +
- (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG2) * 4;
- scratch_reg3 = adev->rmmio +
- (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3) * 4;
-
- if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) {
- spare_int = adev->rmmio +
- (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_0_Sienna_Cichlid_BASE_IDX]
- + mmRLC_SPARE_INT_0_Sienna_Cichlid) * 4;
- } else {
- spare_int = adev->rmmio +
- (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT) * 4;
- }
-
- grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
- grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
-
- if (offset == grbm_cntl || offset == grbm_idx) {
- if (offset == grbm_cntl)
- writel(v, scratch_reg2);
- else if (offset == grbm_idx)
- writel(v, scratch_reg3);
-
- writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
- } else {
- writel(v, scratch_reg0);
- writel(offset | flag, scratch_reg1);
- writel(1, spare_int);
-
- for (i = 0; i < retries; i++) {
- tmp = readl(scratch_reg1);
- if (!(tmp & flag))
- break;
-
- udelay(10);
- }
-
- if (i >= retries) {
- if (RLCG_ERROR_REPORT_ENABLED(adev)) {
- if (tmp & RLCG_VFGATE_DISABLED)
- pr_err("The vfgate is disabled, program reg:0x%05x failed!\n", offset);
- else if (tmp & RLCG_WRONG_OPERATION_TYPE)
- pr_err("Wrong operation type, program reg:0x%05x failed!\n", offset);
- else if (tmp & RLCG_NOT_IN_RANGE)
- pr_err("The register is not in range, program reg:0x%05x failed!\n", offset);
- else
- pr_err("Unknown error type, program reg:0x%05x failed!\n", offset);
- } else
- pr_err("timeout: rlcg program reg:0x%05x failed!\n", offset);
- }
- }
-
- ret = readl(scratch_reg0);
-
- return ret;
-}
-
-static void gfx_v10_sriov_wreg(struct amdgpu_device *adev, u32 offset, u32 value, u32 acc_flags, u32 hwip)
-{
- u32 rlcg_flag;
-
- if (!amdgpu_sriov_runtime(adev) &&
- gfx_v10_get_rlcg_flag(adev, acc_flags, hwip, 1, &rlcg_flag)) {
- gfx_v10_rlcg_rw(adev, offset, value, rlcg_flag);
- return;
- }
-
- if (acc_flags & AMDGPU_REGS_NO_KIQ)
- WREG32_NO_KIQ(offset, value);
- else
- WREG32(offset, value);
-}
-
-static u32 gfx_v10_sriov_rreg(struct amdgpu_device *adev, u32 offset, u32 acc_flags, u32 hwip)
-{
- u32 rlcg_flag;
-
- if (!amdgpu_sriov_runtime(adev) &&
- gfx_v10_get_rlcg_flag(adev, acc_flags, hwip, 0, &rlcg_flag))
- return gfx_v10_rlcg_rw(adev, offset, 0, rlcg_flag);
-
- if (acc_flags & AMDGPU_REGS_NO_KIQ)
- return RREG32_NO_KIQ(offset);
- else
- return RREG32(offset);
-}
-
-static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] = {
/* Pending on emulation bring up */
};
-static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_nv14[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_nv14[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000L, 0x0),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
@@ -2229,13 +2295,11 @@ static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_nv14[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xFFFFFFFF, 0xe0000000)
};
-static const struct soc15_reg_golden golden_settings_gc_10_1_2_nv12[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_1_2_nv12[] = {
/* Pending on emulation bring up */
};
-static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_2_nv12[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_2_nv12[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000L, 0x0),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
@@ -3290,8 +3354,7 @@ static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_2_nv12[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xFFFFFFFF, 0xe0000000)
};
-static const struct soc15_reg_golden golden_settings_gc_10_3[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_3[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0x78000000, 0x78000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_PS_CLK_CTRL, 0xff7f0fff, 0x78000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100),
@@ -3300,7 +3363,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000),
- SOC15_REG_GOLDEN_VALUE(GC, 0 ,mmGCEA_SDP_TAG_RESERVE0, 0xffffffff, 0x10100100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_SDP_TAG_RESERVE0, 0xffffffff, 0x10100100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_SDP_TAG_RESERVE1, 0xffffffff, 0x17000088),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xff000000, 0xff008080),
@@ -3337,13 +3400,11 @@ static const struct soc15_reg_golden golden_settings_gc_10_3[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000)
};
-static const struct soc15_reg_golden golden_settings_gc_10_3_sienna_cichlid[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_3_sienna_cichlid[] = {
/* Pending on emulation bring up */
};
-static const struct soc15_reg_golden golden_settings_gc_10_3_2[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_3_2[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_PS_CLK_CTRL, 0xff7f0fff, 0x78000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100),
@@ -3390,8 +3451,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_2[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020),
};
-static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x000000e4),
@@ -3421,15 +3481,14 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020),
};
-static const struct soc15_reg_golden golden_settings_gc_10_3_3[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_3_3[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x000000e4),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000242),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210),
@@ -3445,8 +3504,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_3[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00100000)
};
-static const struct soc15_reg_golden golden_settings_gc_10_3_4[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_3_4[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0x78000000, 0x78000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0x30000000, 0x30000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0x7e000000, 0x7e000100),
@@ -3516,7 +3574,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_5[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX,0xfff7ffff, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000)
};
@@ -3557,6 +3615,56 @@ static const struct soc15_reg_golden golden_settings_gc_10_0_cyan_skillfish[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000)
};
+static const struct soc15_reg_golden golden_settings_gc_10_3_6[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x00000044),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000042),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x00000044),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x000001ff, 0x00000020),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0xffffff7f, 0x00010020),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00100000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_3_7[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x000000e4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000041),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x000001ff, 0x00000020),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf000003f, 0x01200007),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0xffffff7f, 0x00010020),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00100000)
+};
+
#define DEFAULT_SH_MEM_CONFIG \
((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
(SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
@@ -3570,11 +3678,12 @@ static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev);
static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev);
static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev);
static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev);
+static void gfx_v10_0_set_mqd_funcs(struct amdgpu_device *adev);
static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev);
static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
- u32 sh_num, u32 instance);
+ u32 sh_num, u32 instance, int xcc_id);
static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device *adev);
@@ -3587,16 +3696,29 @@ static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
static u32 gfx_v10_3_get_disabled_sa(struct amdgpu_device *adev);
static void gfx_v10_3_program_pbb_mode(struct amdgpu_device *adev);
static void gfx_v10_3_set_power_brake_sequence(struct amdgpu_device *adev);
-
+static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint8_t dst_sel);
+static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev,
+ unsigned int vmid);
+
+static int gfx_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state);
static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ u64 shader_mc_addr;
+
+ /* Cleaner shader MC address */
+ shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
+
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
+ amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
amdgpu_ring_write(kiq_ring, 0); /* oac mask */
amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
}
@@ -3604,10 +3726,23 @@ static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue
static void gfx10_kiq_map_queues(struct amdgpu_ring *kiq_ring,
struct amdgpu_ring *ring)
{
- struct amdgpu_device *adev = kiq_ring->adev;
uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
- uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
- uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+ uint64_t wptr_addr = ring->wptr_gpu_addr;
+ uint32_t eng_sel = 0;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_COMPUTE:
+ eng_sel = 0;
+ break;
+ case AMDGPU_RING_TYPE_GFX:
+ eng_sel = 4;
+ break;
+ case AMDGPU_RING_TYPE_MES:
+ eng_sel = 5;
+ break;
+ default:
+ WARN_ON(1);
+ }
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
@@ -3680,12 +3815,59 @@ static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
uint16_t pasid, uint32_t flush_type,
bool all_hub)
{
- amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
- amdgpu_ring_write(kiq_ring,
- PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
- PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
- PACKET3_INVALIDATE_TLBS_PASID(pasid) |
- PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+ gfx_v10_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
+}
+
+static void gfx_v10_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type,
+ uint32_t me_id, uint32_t pipe_id, uint32_t queue_id,
+ uint32_t xcc_id, uint32_t vmid)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ unsigned i;
+ uint32_t tmp;
+
+ /* enter save mode */
+ amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, me_id, pipe_id, queue_id, 0);
+
+ if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2);
+ WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1);
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev, "fail to wait on hqd deactive\n");
+ } else if (queue_type == AMDGPU_RING_TYPE_GFX) {
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX,
+ (uint32_t)(0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT));
+ tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE0_QUEUES, 1 << queue_id);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE1_QUEUES, 1 << queue_id);
+ WREG32_SOC15(GC, 0, mmCP_VMID_RESET, tmp);
+
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_GFX_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev, "failed to wait on gfx hqd deactivate\n");
+ } else {
+ dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type);
+ }
+
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ /* exit safe mode */
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
@@ -3694,6 +3876,7 @@ static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
.kiq_unmap_queues = gfx10_kiq_unmap_queues,
.kiq_query_status = gfx10_kiq_query_status,
.kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
+ .kiq_reset_hw_queue = gfx_v10_0_kiq_reset_hw_queue,
.set_resources_size = 8,
.map_queues_size = 7,
.unmap_queues_size = 6,
@@ -3703,12 +3886,12 @@ static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
{
- adev->gfx.kiq.pmf = &gfx_v10_0_kiq_pm4_funcs;
+ adev->gfx.kiq[0].pmf = &gfx_v10_0_kiq_pm4_funcs;
}
static void gfx_v10_0_init_spm_golden_registers(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
soc15_program_register_sequence(adev,
golden_settings_gc_rlc_spm_10_0_nv10,
@@ -3731,7 +3914,10 @@ static void gfx_v10_0_init_spm_golden_registers(struct amdgpu_device *adev)
static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
soc15_program_register_sequence(adev,
golden_settings_gc_10_1,
@@ -3781,8 +3967,8 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
break;
case IP_VERSION(10, 3, 4):
soc15_program_register_sequence(adev,
- golden_settings_gc_10_3_4,
- (const u32)ARRAY_SIZE(golden_settings_gc_10_3_4));
+ golden_settings_gc_10_3_4,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_3_4));
break;
case IP_VERSION(10, 3, 5):
soc15_program_register_sequence(adev,
@@ -3790,23 +3976,27 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
(const u32)ARRAY_SIZE(golden_settings_gc_10_3_5));
break;
case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
soc15_program_register_sequence(adev,
golden_settings_gc_10_0_cyan_skillfish,
(const u32)ARRAY_SIZE(golden_settings_gc_10_0_cyan_skillfish));
break;
+ case IP_VERSION(10, 3, 6):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_3_6,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_3_6));
+ break;
+ case IP_VERSION(10, 3, 7):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_3_7,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_3_7));
+ break;
default:
break;
}
gfx_v10_0_init_spm_golden_registers(adev);
}
-static void gfx_v10_0_scratch_init(struct amdgpu_device *adev)
-{
- adev->gfx.scratch.num_reg = 8;
- adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
- adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
-}
-
static void gfx_v10_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
bool wc, uint32_t reg, uint32_t val)
{
@@ -3843,29 +4033,22 @@ static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t scratch;
+ uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
uint32_t tmp = 0;
- unsigned i;
+ unsigned int i;
int r;
- r = amdgpu_gfx_scratch_get(adev, &scratch);
- if (r) {
- DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
- return r;
- }
-
WREG32(scratch, 0xCAFEDEAD);
-
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
ring->idx, r);
- amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
- amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
+ amdgpu_ring_write(ring, scratch -
+ PACKET3_SET_UCONFIG_REG_START);
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
@@ -3882,8 +4065,6 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
- amdgpu_gfx_scratch_free(adev, scratch);
-
return r;
}
@@ -3892,22 +4073,26 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib ib;
struct dma_fence *f = NULL;
- unsigned index;
+ unsigned int index;
uint64_t gpu_addr;
- uint32_t tmp;
+ uint32_t *cpu_ptr;
long r;
+ memset(&ib, 0, sizeof(ib));
+
r = amdgpu_device_wb_get(adev, &index);
if (r)
return r;
gpu_addr = adev->wb.gpu_addr + (index * 4);
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
- memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 16,
- AMDGPU_IB_POOL_DIRECT, &ib);
- if (r)
+ cpu_ptr = &adev->wb.wb[index];
+
+ r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
goto err1;
+ }
ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
@@ -3928,13 +4113,12 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
goto err2;
}
- tmp = adev->wb.wb[index];
- if (tmp == 0xDEADBEEF)
+ if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
r = 0;
else
r = -EINVAL;
err2:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
amdgpu_device_wb_free(adev, index);
@@ -3943,18 +4127,12 @@ err1:
static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
{
- release_firmware(adev->gfx.pfp_fw);
- adev->gfx.pfp_fw = NULL;
- release_firmware(adev->gfx.me_fw);
- adev->gfx.me_fw = NULL;
- release_firmware(adev->gfx.ce_fw);
- adev->gfx.ce_fw = NULL;
- release_firmware(adev->gfx.rlc_fw);
- adev->gfx.rlc_fw = NULL;
- release_firmware(adev->gfx.mec_fw);
- adev->gfx.mec_fw = NULL;
- release_firmware(adev->gfx.mec2_fw);
- adev->gfx.mec2_fw = NULL;
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ amdgpu_ucode_release(&adev->gfx.mec2_fw);
kfree(adev->gfx.rlc.register_list_format);
}
@@ -3963,11 +4141,12 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
{
adev->gfx.cp_fw_write_wait = false;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 2):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
if ((adev->gfx.me_fw_version >= 0x00000046) &&
(adev->gfx.me_feature_version >= 27) &&
(adev->gfx.pfp_fw_version >= 0x00000068) &&
@@ -3981,7 +4160,9 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
adev->gfx.cp_fw_write_wait = true;
break;
default:
@@ -3992,39 +4173,6 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
DRM_WARN_ONCE("CP firmware version too old, please update!");
}
-
-static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
-{
- const struct rlc_firmware_header_v2_1 *rlc_hdr;
-
- rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
- adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
- adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
- adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
- adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
- adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
- adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
- adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
- adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
- adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
- adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
- adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
- adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
- adev->gfx.rlc.reg_list_format_direct_reg_list_length =
- le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
-}
-
-static void gfx_v10_0_init_rlc_iram_dram_microcode(struct amdgpu_device *adev)
-{
- const struct rlc_firmware_header_v2_2 *rlc_hdr;
-
- rlc_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
- adev->gfx.rlc.rlc_iram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_iram_ucode_size_bytes);
- adev->gfx.rlc.rlc_iram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_iram_ucode_offset_bytes);
- adev->gfx.rlc.rlc_dram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_dram_ucode_size_bytes);
- adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes);
-}
-
static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev)
{
bool ret = false;
@@ -4039,12 +4187,12 @@ static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev)
break;
}
- return ret ;
+ return ret;
}
static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
if (!gfx_v10_0_navi10_gfxoff_should_enable(adev))
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
@@ -4056,300 +4204,87 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev)
static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
{
- const char *chip_name;
- char fw_name[40];
- char *wks = "";
+ char fw_name[53];
+ char ucode_prefix[30];
+ const char *wks = "";
int err;
- struct amdgpu_firmware_info *info = NULL;
- const struct common_firmware_header *header = NULL;
- const struct gfx_firmware_header_v1_0 *cp_hdr;
const struct rlc_firmware_header_v2_0 *rlc_hdr;
- unsigned int *tmp = NULL;
- unsigned int i = 0;
uint16_t version_major;
uint16_t version_minor;
DRM_DEBUG("\n");
- switch (adev->ip_versions[GC_HWIP][0]) {
- case IP_VERSION(10, 1, 10):
- chip_name = "navi10";
- break;
- case IP_VERSION(10, 1, 1):
- chip_name = "navi14";
- if (!(adev->pdev->device == 0x7340 &&
- adev->pdev->revision != 0x00))
- wks = "_wks";
- break;
- case IP_VERSION(10, 1, 2):
- chip_name = "navi12";
- break;
- case IP_VERSION(10, 3, 0):
- chip_name = "sienna_cichlid";
- break;
- case IP_VERSION(10, 3, 2):
- chip_name = "navy_flounder";
- break;
- case IP_VERSION(10, 3, 1):
- chip_name = "vangogh";
- break;
- case IP_VERSION(10, 3, 4):
- chip_name = "dimgrey_cavefish";
- break;
- case IP_VERSION(10, 3, 5):
- chip_name = "beige_goby";
- break;
- case IP_VERSION(10, 3, 3):
- chip_name = "yellow_carp";
- break;
- case IP_VERSION(10, 1, 3):
- if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2)
- chip_name = "cyan_skillfish2";
- else
- chip_name = "cyan_skillfish";
- break;
- default:
- BUG();
- }
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 1) &&
+ (!(adev->pdev->device == 0x7340 && adev->pdev->revision != 0x00)))
+ wks = "_wks";
+ amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp%s.bin", chip_name, wks);
- err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_pfp%s.bin", ucode_prefix, wks);
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
- if (err)
- goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
- adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", chip_name, wks);
- err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.me_fw);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_me%s.bin", ucode_prefix, wks);
if (err)
goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
- adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", chip_name, wks);
- err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.ce_fw);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ce%s.bin", ucode_prefix, wks);
if (err)
goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
- adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
if (!amdgpu_sriov_vf(adev)) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
+
+ /* don't validate this firmware. There are apparently firmwares
+ * in the wild with incorrect size in the header
+ */
rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
-
- adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
- adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
- adev->gfx.rlc.save_and_restore_offset =
- le32_to_cpu(rlc_hdr->save_and_restore_offset);
- adev->gfx.rlc.clear_state_descriptor_offset =
- le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
- adev->gfx.rlc.avail_scratch_ram_locations =
- le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
- adev->gfx.rlc.reg_restore_list_size =
- le32_to_cpu(rlc_hdr->reg_restore_list_size);
- adev->gfx.rlc.reg_list_format_start =
- le32_to_cpu(rlc_hdr->reg_list_format_start);
- adev->gfx.rlc.reg_list_format_separate_start =
- le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
- adev->gfx.rlc.starting_offsets_start =
- le32_to_cpu(rlc_hdr->starting_offsets_start);
- adev->gfx.rlc.reg_list_format_size_bytes =
- le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
- adev->gfx.rlc.reg_list_size_bytes =
- le32_to_cpu(rlc_hdr->reg_list_size_bytes);
- adev->gfx.rlc.register_list_format =
- kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
- adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
- if (!adev->gfx.rlc.register_list_format) {
- err = -ENOMEM;
+ err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
+ if (err)
goto out;
- }
-
- tmp = (unsigned int *)((uintptr_t)rlc_hdr +
- le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
- for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
- adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
-
- adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
-
- tmp = (unsigned int *)((uintptr_t)rlc_hdr +
- le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
- for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
- adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
-
- if (version_major == 2) {
- if (version_minor >= 1)
- gfx_v10_0_init_rlc_ext_microcode(adev);
- if (version_minor == 2)
- gfx_v10_0_init_rlc_iram_dram_microcode(adev);
- }
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", chip_name, wks);
- err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.mec_fw);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec%s.bin", ucode_prefix, wks);
if (err)
goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
- adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2%s.bin", chip_name, wks);
- err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec2%s.bin", ucode_prefix, wks);
if (!err) {
- err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
- if (err)
- goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)
- adev->gfx.mec2_fw->data;
- adev->gfx.mec2_fw_version =
- le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.mec2_feature_version =
- le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
} else {
err = 0;
adev->gfx.mec2_fw = NULL;
}
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
- info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
- info->fw = adev->gfx.pfp_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
- info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
- info->fw = adev->gfx.me_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
- info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
- info->fw = adev->gfx.ce_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
- info->fw = adev->gfx.rlc_fw;
- if (info->fw) {
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
- }
- if (adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
- adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
- adev->gfx.rlc.save_restore_list_srm_size_bytes) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
-
- if (adev->gfx.rlc.rlc_iram_ucode_size_bytes &&
- adev->gfx.rlc.rlc_dram_ucode_size_bytes) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_IRAM];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_IRAM;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.rlc_iram_ucode_size_bytes, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_DRAM];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_DRAM;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE);
- }
- }
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
- info->fw = adev->gfx.mec_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes) -
- le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
- info->fw = adev->gfx.mec_fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
-
- if (adev->gfx.mec2_fw) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
- info->fw = adev->gfx.mec2_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes) -
- le32_to_cpu(cp_hdr->jt_size) * 4,
- PAGE_SIZE);
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
- info->fw = adev->gfx.mec2_fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
- PAGE_SIZE);
- }
- }
-
gfx_v10_0_check_fw_write_wait(adev);
out:
if (err) {
- dev_err(adev->dev,
- "gfx10: Failed to load firmware \"%s\"\n",
- fw_name);
- release_firmware(adev->gfx.pfp_fw);
- adev->gfx.pfp_fw = NULL;
- release_firmware(adev->gfx.me_fw);
- adev->gfx.me_fw = NULL;
- release_firmware(adev->gfx.ce_fw);
- adev->gfx.ce_fw = NULL;
- release_firmware(adev->gfx.rlc_fw);
- adev->gfx.rlc_fw = NULL;
- release_firmware(adev->gfx.mec_fw);
- adev->gfx.mec_fw = NULL;
- release_firmware(adev->gfx.mec2_fw);
- adev->gfx.mec2_fw = NULL;
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ amdgpu_ucode_release(&adev->gfx.mec2_fw);
}
gfx_v10_0_check_gfxoff_flag(adev);
@@ -4387,12 +4322,9 @@ static u32 gfx_v10_0_get_csb_size(struct amdgpu_device *adev)
return count;
}
-static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev,
- volatile u32 *buffer)
+static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
{
- u32 count = 0, i;
- const struct cs_section_def *sect = NULL;
- const struct cs_extent_def *ext = NULL;
+ u32 count = 0;
int ctx_reg_offset;
if (adev->gfx.rlc.cs_data == NULL)
@@ -4400,39 +4332,15 @@ static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev,
if (buffer == NULL)
return;
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- buffer[count++] = cpu_to_le32(0x80000000);
- buffer[count++] = cpu_to_le32(0x80000000);
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
- for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
- for (ext = sect->section; ext->extent != NULL; ++ext) {
- if (sect->id == SECT_CONTEXT) {
- buffer[count++] =
- cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
- buffer[count++] = cpu_to_le32(ext->reg_index -
- PACKET3_SET_CONTEXT_REG_START);
- for (i = 0; i < ext->reg_count; i++)
- buffer[count++] = cpu_to_le32(ext->extent[i]);
- } else {
- return;
- }
- }
- }
-
- ctx_reg_offset =
- SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
+ ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
buffer[count++] = cpu_to_le32(ctx_reg_offset);
buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
- buffer[count++] = cpu_to_le32(0);
+ amdgpu_gfx_csb_preamble_end(buffer, count);
}
static void gfx_v10_0_rlc_fini(struct amdgpu_device *adev)
@@ -4448,6 +4356,30 @@ static void gfx_v10_0_rlc_fini(struct amdgpu_device *adev)
(void **)&adev->gfx.rlc.cp_table_ptr);
}
+static void gfx_v10_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
+{
+ struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
+
+ reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
+ reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
+ reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
+ reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
+ reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
+ reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
+ reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 3, 0):
+ reg_access_ctrl->spare_int =
+ SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT_0_Sienna_Cichlid);
+ break;
+ default:
+ reg_access_ctrl->spare_int =
+ SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
+ break;
+ }
+ adev->gfx.rlc.rlcg_reg_access_supported = true;
+}
+
static int gfx_v10_0_rlc_init(struct amdgpu_device *adev)
{
const struct cs_section_def *cs_data;
@@ -4464,10 +4396,6 @@ static int gfx_v10_0_rlc_init(struct amdgpu_device *adev)
return r;
}
- /* init spm vmid with 0xf */
- if (adev->gfx.rlc.funcs->update_spm_vmid)
- adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
-
return 0;
}
@@ -4477,19 +4405,11 @@ static void gfx_v10_0_mec_fini(struct amdgpu_device *adev)
amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
}
-static int gfx_v10_0_me_init(struct amdgpu_device *adev)
+static void gfx_v10_0_me_init(struct amdgpu_device *adev)
{
- int r;
-
bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
amdgpu_gfx_graphics_queue_acquire(adev);
-
- r = gfx_v10_0_init_microcode(adev);
- if (r)
- DRM_ERROR("Failed to load gfx firmware!\n");
-
- return r;
}
static int gfx_v10_0_mec_init(struct amdgpu_device *adev)
@@ -4497,13 +4417,13 @@ static int gfx_v10_0_mec_init(struct amdgpu_device *adev)
int r;
u32 *hpd;
const __le32 *fw_data = NULL;
- unsigned fw_size;
+ unsigned int fw_size;
u32 *fw = NULL;
size_t mec_hpd_size;
const struct gfx_firmware_header_v1_0 *mec_hdr = NULL;
- bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+ bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
/* take ownership of the relevant compute queues */
amdgpu_gfx_compute_queue_acquire(adev);
@@ -4575,11 +4495,12 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
}
-static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
{
/* in gfx10 the SIMD_ID is specified as part of the INSTANCE
* field when performing a select_se_sh so it should be
- * zero here */
+ * zero here
+ */
WARN_ON(simd != 0);
/* type 2 wave data */
@@ -4602,7 +4523,7 @@ static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd,
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
}
-static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
+static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t start,
uint32_t size, uint32_t *dst)
{
@@ -4613,7 +4534,7 @@ static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
dst);
}
-static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
+static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t thread,
uint32_t start, uint32_t size,
uint32_t *dst)
@@ -4624,7 +4545,7 @@ static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
}
static void gfx_v10_0_select_me_pipe_q(struct amdgpu_device *adev,
- u32 me, u32 pipe, u32 q, u32 vm)
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
{
nv_grbm_select(adev, me, pipe, q, vm);
}
@@ -4660,9 +4581,7 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)
{
u32 gb_addr_config;
- adev->gfx.funcs = &gfx_v10_0_gfx_funcs;
-
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 2):
@@ -4678,7 +4597,9 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -4689,6 +4610,7 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)
1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
break;
case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -4727,9 +4649,9 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)
static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
int me, int pipe, int queue)
{
- int r;
struct amdgpu_ring *ring;
unsigned int irq_type;
+ unsigned int hw_prio;
ring = &adev->gfx.gfx_ring[ring_id];
@@ -4744,21 +4666,20 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
else
ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
+ ring->vm_hub = AMDGPU_GFXHUB(0);
sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
- r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
- AMDGPU_RING_PRIO_DEFAULT, NULL);
- if (r)
- return r;
- return 0;
+ hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ?
+ AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ hw_prio, NULL);
}
static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
int mec, int pipe, int queue)
{
- int r;
- unsigned irq_type;
+ unsigned int irq_type;
struct amdgpu_ring *ring;
unsigned int hw_prio;
@@ -4774,36 +4695,78 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+ (ring_id * GFX10_MEC_HPD_SIZE);
+ ring->vm_hub = AMDGPU_GFXHUB(0);
sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ ring->pipe;
hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
- AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
/* type-2 packets are deprecated on MEC, use type-3 instead */
- r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
hw_prio, NULL);
- if (r)
- return r;
+}
- return 0;
+static void gfx_v10_0_alloc_ip_dump(struct amdgpu_device *adev)
+{
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
+ uint32_t *ptr;
+ uint32_t inst;
+
+ ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
+ adev->gfx.ip_dump_core = NULL;
+ } else {
+ adev->gfx.ip_dump_core = ptr;
+ }
+
+ /* Allocate memory for compute queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_10);
+ inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
+ adev->gfx.ip_dump_compute_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_compute_queues = ptr;
+ }
+
+ /* Allocate memory for gfx queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10);
+ inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
+ adev->gfx.me.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n");
+ adev->gfx.ip_dump_gfx_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_gfx_queues = ptr;
+ }
}
-static int gfx_v10_0_sw_init(void *handle)
+static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int i, j, k, r, ring_id = 0;
- struct amdgpu_kiq *kiq;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int xcc_id = 0;
+ struct amdgpu_device *adev = ip_block->adev;
+ int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
+
+ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 2):
case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
adev->gfx.me.num_me = 1;
adev->gfx.me.num_pipe_per_me = 1;
- adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.me.num_queue_per_pipe = 8;
adev->gfx.mec.num_mec = 2;
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 8;
@@ -4813,10 +4776,12 @@ static int gfx_v10_0_sw_init(void *handle)
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
adev->gfx.me.num_me = 1;
- adev->gfx.me.num_pipe_per_me = 1;
- adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.me.num_pipe_per_me = 2;
+ adev->gfx.me.num_queue_per_pipe = 2;
adev->gfx.mec.num_mec = 2;
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 4;
@@ -4830,11 +4795,79 @@ static int gfx_v10_0_sw_init(void *handle)
adev->gfx.mec.num_queue_per_pipe = 8;
break;
}
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ adev->gfx.cleaner_shader_ptr = gfx_10_1_10_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_1_10_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 101 &&
+ adev->gfx.pfp_fw_version >= 158 &&
+ adev->gfx.mec_fw_version >= 151) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 64 &&
+ adev->gfx.pfp_fw_version >= 100 &&
+ adev->gfx.mec_fw_version >= 122) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(10, 3, 6):
+ adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 14 &&
+ adev->gfx.pfp_fw_version >= 17 &&
+ adev->gfx.mec_fw_version >= 24) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(10, 3, 7):
+ adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 4 &&
+ adev->gfx.pfp_fw_version >= 9 &&
+ adev->gfx.mec_fw_version >= 12) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ default:
+ adev->gfx.enable_cleaner_shader = false;
+ break;
+ }
/* KIQ event */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
GFX_10_1__SRCID__CP_IB2_INTERRUPT_PKT,
- &adev->gfx.kiq.irq);
+ &adev->gfx.kiq[0].irq);
if (r)
return r;
@@ -4845,6 +4878,13 @@ static int gfx_v10_0_sw_init(void *handle)
if (r)
return r;
+ /* Bad opcode Event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
+ GFX_10_1__SRCID__CP_BAD_OPCODE_ERROR,
+ &adev->gfx.bad_op_irq);
+ if (r)
+ return r;
+
/* Privileged reg */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_10_1__SRCID__CP_PRIV_REG_FAULT,
&adev->gfx.priv_reg_irq);
@@ -4859,16 +4899,16 @@ static int gfx_v10_0_sw_init(void *handle)
adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
- gfx_v10_0_scratch_init(adev);
-
- r = gfx_v10_0_me_init(adev);
- if (r)
- return r;
+ gfx_v10_0_me_init(adev);
- r = gfx_v10_0_rlc_init(adev);
- if (r) {
- DRM_ERROR("Failed to init rlc BOs!\n");
- return r;
+ if (adev->gfx.rlc.funcs) {
+ if (adev->gfx.rlc.funcs->init) {
+ r = adev->gfx.rlc.funcs->init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to init rlc BOs!\n");
+ return r;
+ }
+ }
}
r = gfx_v10_0_mec_init(adev);
@@ -4879,7 +4919,7 @@ static int gfx_v10_0_sw_init(void *handle)
/* set up the gfx ring */
for (i = 0; i < adev->gfx.me.num_me; i++) {
- for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
+ for (j = 0; j < num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
continue;
@@ -4898,8 +4938,8 @@ static int gfx_v10_0_sw_init(void *handle)
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
- if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k,
- j))
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
+ k, j))
continue;
r = gfx_v10_0_compute_ring_init(adev, ring_id,
@@ -4912,18 +4952,27 @@ static int gfx_v10_0_sw_init(void *handle)
}
}
- r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE);
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ }
+
+ r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE, 0);
if (r) {
DRM_ERROR("Failed to init KIQ BOs!\n");
return r;
}
- kiq = &adev->gfx.kiq;
- r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
+ r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
if (r)
return r;
- r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd));
+ r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd), 0);
if (r)
return r;
@@ -4938,6 +4987,12 @@ static int gfx_v10_0_sw_init(void *handle)
gfx_v10_0_gpu_early_init(adev);
+ gfx_v10_0_alloc_ip_dump(adev);
+
+ r = amdgpu_gfx_sysfs_init(adev);
+ if (r)
+ return r;
+
return 0;
}
@@ -4962,19 +5017,22 @@ static void gfx_v10_0_me_fini(struct amdgpu_device *adev)
(void **)&adev->gfx.me.me_fw_ptr);
}
-static int gfx_v10_0_sw_fini(void *handle)
+static int gfx_v10_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
- amdgpu_gfx_mqd_sw_fini(adev);
- amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
- amdgpu_gfx_kiq_fini(adev);
+ amdgpu_gfx_mqd_sw_fini(adev, 0);
+
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
+ amdgpu_gfx_kiq_fini(adev, 0);
+
+ amdgpu_gfx_cleaner_shader_sw_fini(adev);
gfx_v10_0_pfp_fini(adev);
gfx_v10_0_ce_fini(adev);
@@ -4986,12 +5044,17 @@ static int gfx_v10_0_sw_fini(void *handle)
gfx_v10_0_rlc_backdoor_autoload_buffer_fini(adev);
gfx_v10_0_free_microcode(adev);
+ amdgpu_gfx_sysfs_fini(adev);
+
+ kfree(adev->gfx.ip_dump_core);
+ kfree(adev->gfx.ip_dump_compute_queues);
+ kfree(adev->gfx.ip_dump_gfx_queues);
return 0;
}
static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
- u32 sh_num, u32 instance)
+ u32 sh_num, u32 instance, int xcc_id)
{
u32 data;
@@ -5046,17 +5109,21 @@ static void gfx_v10_0_setup_rb(struct amdgpu_device *adev)
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
bitmap = i * adev->gfx.config.max_sh_per_se + j;
- if (((adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) ||
- (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 3))) &&
+ if (((amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 0)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 3)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 6))) &&
((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1))
continue;
- gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0);
data = gfx_v10_0_get_rb_active_bitmap(adev);
active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
rb_bitmap_width_per_sh);
}
}
- gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
adev->gfx.config.backend_enable_mask = active_rbs;
@@ -5073,8 +5140,9 @@ static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade
uint32_t pa_sc_tile_steering_override;
/* for ASICs that integrates GFX v10.3
- * pa_sc_tile_steering_override should be set to 0 */
- if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0))
+ * pa_sc_tile_steering_override should be set to 0
+ */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
return 0;
/* init num_sc */
@@ -5103,6 +5171,29 @@ static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade
#define DEFAULT_SH_MEM_BASES (0x6000)
+static void gfx_v10_0_debug_trap_config_init(struct amdgpu_device *adev,
+ uint32_t first_vmid,
+ uint32_t last_vmid)
+{
+ uint32_t data;
+ uint32_t trap_config_vmid_mask = 0;
+ int i;
+
+ /* Calculate trap config vmid mask */
+ for (i = first_vmid; i < last_vmid; i++)
+ trap_config_vmid_mask |= (1 << i);
+
+ data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
+ VMID_SEL, trap_config_vmid_mask);
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+ TRAP_EN, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
+}
+
static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
{
int i;
@@ -5126,14 +5217,19 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- /* Initialize all compute VMIDs to have no GDS, GWS, or OA
- acccess. These should be enabled by FW for target VMIDs. */
+ /*
+ * Initialize all compute VMIDs to have no GDS, GWS, or OA
+ * access. These should be enabled by FW for target VMIDs.
+ */
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
}
+
+ gfx_v10_0_debug_trap_config_init(adev, adev->vm_manager.first_kfd_vmid,
+ AMDGPU_NUM_VMID);
}
static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev)
@@ -5185,7 +5281,7 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0);
wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
/*
* Set corresponding TCP bits for the inactive WGPs in
@@ -5218,7 +5314,7 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
}
}
- gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
}
@@ -5227,7 +5323,7 @@ static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev)
/* TCCs are global (not instanced). */
uint32_t tcc_disable;
- if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0)) {
tcc_disable = RREG32_SOC15(GC, 0, mmCGTS_TCC_DISABLE_gc_10_3) |
RREG32_SOC15(GC, 0, mmCGTS_USER_TCC_DISABLE_gc_10_3);
} else {
@@ -5245,7 +5341,8 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
u32 tmp;
int i;
- WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+ if (!amdgpu_sriov_vf(adev))
+ WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
gfx_v10_0_setup_rb(adev);
gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info);
@@ -5256,7 +5353,7 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
/* XXX SH_MEM regs */
/* where to put LDS, scratch, GPUVM in FSA64 space */
mutex_lock(&adev->srbm_mutex);
- for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
+ for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
nv_grbm_select(adev, 0, 0, 0, i);
/* CP and shaders */
WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
@@ -5277,26 +5374,74 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
}
+static u32 gfx_v10_0_get_cpg_int_cntl(struct amdgpu_device *adev,
+ int me, int pipe)
+{
+ if (me != 0)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0);
+ case 1:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING1);
+ default:
+ return 0;
+ }
+}
+
+static u32 gfx_v10_0_get_cpc_int_cntl(struct amdgpu_device *adev,
+ int me, int pipe)
+{
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+ if (me != 1)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
+ case 1:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
+ case 2:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
+ case 3:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
+ default:
+ return 0;
+ }
+}
+
static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
bool enable)
{
- u32 tmp;
+ u32 tmp, cp_int_cntl_reg;
+ int i, j;
if (amdgpu_sriov_vf(adev))
return;
- tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
-
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
- enable ? 1 : 0);
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
- enable ? 1 : 0);
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
- enable ? 1 : 0);
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
- enable ? 1 : 0);
-
- WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
+ enable ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp);
+ }
+ }
+ }
}
static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
@@ -5304,7 +5449,7 @@ static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
/* csib */
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 2)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 2)) {
WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_HI,
adev->gfx.rlc.clear_state_gpu_addr >> 32);
WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_LO,
@@ -5360,8 +5505,10 @@ static void gfx_v10_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
static void gfx_v10_0_rlc_start(struct amdgpu_device *adev)
{
- /* TODO: enable rlc & smu handshake until smu
- * and gfxoff feature works as expected */
+ /*
+ * TODO: enable rlc & smu handshake until smu
+ * and gfxoff feature works as expected
+ */
if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
gfx_v10_0_rlc_smu_handshake_cntl(adev, false);
@@ -5384,7 +5531,7 @@ static int gfx_v10_0_rlc_load_microcode(struct amdgpu_device *adev)
{
const struct rlc_firmware_header_v2_0 *hdr;
const __le32 *fw_data;
- unsigned i, fw_size;
+ unsigned int i, fw_size;
if (!adev->gfx.rlc_fw)
return -EINVAL;
@@ -5421,6 +5568,8 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
gfx_v10_0_init_csb(adev);
+ gfx_v10_0_update_spm_vmid_internal(adev, 0xf);
+
if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
gfx_v10_0_rlc_enable_srm(adev);
} else {
@@ -5451,6 +5600,8 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
gfx_v10_0_init_csb(adev);
+ gfx_v10_0_update_spm_vmid_internal(adev, 0xf);
+
adev->gfx.rlc.funcs->start(adev);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
@@ -5459,6 +5610,7 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
return r;
}
}
+
return 0;
}
@@ -5926,11 +6078,13 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 2)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 2))
WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
- } else {
+ else
WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
- }
+
+ if (amdgpu_in_reset(adev) && !enable)
+ return 0;
for (i = 0; i < adev->usec_timeout; i++) {
if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0)
@@ -5949,7 +6103,7 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
int r;
const struct gfx_firmware_header_v1_0 *pfp_hdr;
const __le32 *fw_data;
- unsigned i, fw_size;
+ unsigned int i, fw_size;
uint32_t tmp;
uint32_t usec_timeout = 50000; /* wait for 50ms */
@@ -5998,7 +6152,7 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
@@ -6027,7 +6181,7 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev)
int r;
const struct gfx_firmware_header_v1_0 *ce_hdr;
const __le32 *fw_data;
- unsigned i, fw_size;
+ unsigned int i, fw_size;
uint32_t tmp;
uint32_t usec_timeout = 50000; /* wait for 50ms */
@@ -6076,7 +6230,7 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0);
@@ -6104,7 +6258,7 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
int r;
const struct gfx_firmware_header_v1_0 *me_hdr;
const __le32 *fw_data;
- unsigned i, fw_size;
+ unsigned int i, fw_size;
uint32_t tmp;
uint32_t usec_timeout = 50000; /* wait for 50ms */
@@ -6153,7 +6307,7 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
@@ -6315,13 +6469,15 @@ static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
}
WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
}
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
DOORBELL_RANGE_LOWER_Sienna_Cichlid, ring->doorbell_index);
WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
@@ -6346,7 +6502,6 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
u32 tmp;
u32 rb_bufsz;
u64 rb_addr, rptr_addr, wptr_gpu_addr;
- u32 i;
/* Set the write pointer delay */
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
@@ -6373,13 +6528,13 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
- /* set the wb address wether it's enabled or not */
- rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ /* set the wb address whether it's enabled or not */
+ rptr_addr = ring->rptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wptr_gpu_addr = ring->wptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO,
lower_32_bits(wptr_gpu_addr));
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI,
@@ -6411,12 +6566,12 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
ring->wptr = 0;
WREG32_SOC15(GC, 0, mmCP_RB1_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, mmCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
- /* Set the wb address wether it's enabled or not */
- rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ /* Set the wb address whether it's enabled or not */
+ rptr_addr = ring->rptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wptr_gpu_addr = ring->wptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO,
lower_32_bits(wptr_gpu_addr));
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI,
@@ -6441,24 +6596,21 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
/* start the ring */
gfx_v10_0_cp_gfx_start(adev);
- for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
- ring->sched.ready = true;
- }
-
return 0;
}
static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
{
if (enable) {
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, 0);
break;
default:
@@ -6466,13 +6618,15 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
break;
}
} else {
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid,
(CP_MEC_CNTL__MEC_ME1_HALT_MASK |
CP_MEC_CNTL__MEC_ME2_HALT_MASK));
@@ -6483,7 +6637,7 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
CP_MEC_CNTL__MEC_ME2_HALT_MASK));
break;
}
- adev->gfx.kiq.ring.sched.ready = false;
+ adev->gfx.kiq[0].ring.sched.ready = false;
}
udelay(50);
}
@@ -6492,7 +6646,7 @@ static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev)
{
const struct gfx_firmware_header_v1_0 *mec_hdr;
const __le32 *fw_data;
- unsigned i;
+ unsigned int i;
u32 tmp;
u32 usec_timeout = 50000; /* Wait for 50 ms */
@@ -6528,7 +6682,7 @@ static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
@@ -6564,35 +6718,51 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
/* tell RLC which is KIQ queue */
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp | 0x80);
break;
default:
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp | 0x80);
break;
}
}
-static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
+static void gfx_v10_0_gfx_mqd_set_priority(struct amdgpu_device *adev,
+ struct v10_gfx_mqd *mqd,
+ struct amdgpu_mqd_prop *prop)
{
- struct amdgpu_device *adev = ring->adev;
- struct v10_gfx_mqd *mqd = ring->mqd_ptr;
+ bool priority = 0;
+ u32 tmp;
+
+ /* set up default queue priority level
+ * 0x0 = low priority, 0x1 = high priority
+ */
+ if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH)
+ priority = 1;
+
+ tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority);
+ mqd->cp_gfx_hqd_queue_priority = tmp;
+}
+
+static int gfx_v10_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v10_gfx_mqd *mqd = m;
uint64_t hqd_gpu_addr, wb_gpu_addr;
uint32_t tmp;
uint32_t rb_bufsz;
@@ -6602,8 +6772,8 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
mqd->cp_gfx_hqd_wptr_hi = 0;
/* set the pointer to the MQD */
- mqd->cp_mqd_base_addr = ring->mqd_gpu_addr & 0xfffffffc;
- mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
+ mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
/* set up mqd control */
tmp = RREG32_SOC15(GC, 0, mmCP_GFX_MQD_CONTROL);
@@ -6617,11 +6787,8 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
mqd->cp_gfx_hqd_vmid = 0;
- /* set up default queue priority level
- * 0x0 = low priority, 0x1 = high priority */
- tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY);
- tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
- mqd->cp_gfx_hqd_queue_priority = tmp;
+ /* set up gfx queue priority */
+ gfx_v10_0_gfx_mqd_set_priority(adev, mqd, prop);
/* set up time quantum */
tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM);
@@ -6629,23 +6796,23 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
mqd->cp_gfx_hqd_quantum = tmp;
/* set up gfx hqd base. this is similar as CP_RB_BASE */
- hqd_gpu_addr = ring->gpu_addr >> 8;
+ hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
mqd->cp_gfx_hqd_base = hqd_gpu_addr;
mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
/* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ wb_gpu_addr = prop->rptr_gpu_addr;
mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
mqd->cp_gfx_hqd_rptr_addr_hi =
upper_32_bits(wb_gpu_addr) & 0xffff;
/* set up rb_wptr_poll addr */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wb_gpu_addr = prop->wptr_gpu_addr;
mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
/* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
- rb_bufsz = order_base_2(ring->ring_size / 4) - 1;
+ rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_CNTL);
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
@@ -6656,9 +6823,9 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
/* set up cp_doorbell_control */
tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
- if (ring->use_doorbell) {
+ if (prop->use_doorbell) {
tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
- DOORBELL_OFFSET, ring->doorbell_index);
+ DOORBELL_OFFSET, prop->doorbell_index);
tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
DOORBELL_EN, 1);
} else
@@ -6666,13 +6833,7 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
DOORBELL_EN, 0);
mqd->cp_rb_doorbell_control = tmp;
- /*if there are 2 gfx rings, set the lower doorbell range of the first ring,
- *otherwise the range of the second ring will override the first ring */
- if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1)
- gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
-
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
- ring->wptr = 0;
mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR);
/* active the queue */
@@ -6681,175 +6842,71 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
return 0;
}
-#ifdef BRING_UP_DEBUG
-static int gfx_v10_0_gfx_queue_init_register(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
- struct v10_gfx_mqd *mqd = ring->mqd_ptr;
-
- /* set mmCP_GFX_HQD_WPTR/_HI to 0 */
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_WPTR, mqd->cp_gfx_hqd_wptr);
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_WPTR_HI, mqd->cp_gfx_hqd_wptr_hi);
-
- /* set GFX_MQD_BASE */
- WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr);
- WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
-
- /* set GFX_MQD_CONTROL */
- WREG32_SOC15(GC, 0, mmCP_GFX_MQD_CONTROL, mqd->cp_gfx_mqd_control);
-
- /* set GFX_HQD_VMID to 0 */
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_VMID, mqd->cp_gfx_hqd_vmid);
-
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY,
- mqd->cp_gfx_hqd_queue_priority);
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM, mqd->cp_gfx_hqd_quantum);
-
- /* set GFX_HQD_BASE, similar as CP_RB_BASE */
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_BASE, mqd->cp_gfx_hqd_base);
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_BASE_HI, mqd->cp_gfx_hqd_base_hi);
-
- /* set GFX_HQD_RPTR_ADDR, similar as CP_RB_RPTR */
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR_ADDR, mqd->cp_gfx_hqd_rptr_addr);
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR_ADDR_HI, mqd->cp_gfx_hqd_rptr_addr_hi);
-
- /* set GFX_HQD_CNTL, similar as CP_RB_CNTL */
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_CNTL, mqd->cp_gfx_hqd_cntl);
-
- /* set RB_WPTR_POLL_ADDR */
- WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, mqd->cp_rb_wptr_poll_addr_lo);
- WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, mqd->cp_rb_wptr_poll_addr_hi);
-
- /* set RB_DOORBELL_CONTROL */
- WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, mqd->cp_rb_doorbell_control);
-
- /* active the queue */
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_ACTIVE, mqd->cp_gfx_hqd_active);
-
- return 0;
-}
-#endif
-
-static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
+static int gfx_v10_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset)
{
struct amdgpu_device *adev = ring->adev;
struct v10_gfx_mqd *mqd = ring->mqd_ptr;
int mqd_idx = ring - &adev->gfx.gfx_ring[0];
- if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
memset((void *)mqd, 0, sizeof(*mqd));
mutex_lock(&adev->srbm_mutex);
nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
- gfx_v10_0_gfx_mqd_init(ring);
-#ifdef BRING_UP_DEBUG
- gfx_v10_0_gfx_queue_init_register(ring);
-#endif
+ amdgpu_ring_init_mqd(ring);
+
+ /*
+ * if there are 2 gfx rings, set the lower doorbell
+ * range of the first ring, otherwise the range of
+ * the second ring will override the first ring
+ */
+ if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1)
+ gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.me.mqd_backup[mqd_idx])
- memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
- } else if (amdgpu_in_reset(adev)) {
- /* reset mqd with the backup copy */
- if (adev->gfx.me.mqd_backup[mqd_idx])
- memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
- /* reset the ring */
- ring->wptr = 0;
- adev->wb.wb[ring->wptr_offs] = 0;
- amdgpu_ring_clear_ring(ring);
-#ifdef BRING_UP_DEBUG
+ memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ } else {
mutex_lock(&adev->srbm_mutex);
nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
- gfx_v10_0_gfx_queue_init_register(ring);
+ if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1)
+ gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
-#endif
- } else {
+ /* restore mqd with the backup copy */
+ if (adev->gfx.me.mqd_backup[mqd_idx])
+ memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
+ /* reset the ring */
+ ring->wptr = 0;
+ *ring->wptr_cpu_addr = 0;
amdgpu_ring_clear_ring(ring);
}
return 0;
}
-#ifndef BRING_UP_DEBUG
-static int gfx_v10_0_kiq_enable_kgq(struct amdgpu_device *adev)
-{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
- int r, i;
-
- if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
- return -EINVAL;
-
- r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
- adev->gfx.num_gfx_rings);
- if (r) {
- DRM_ERROR("Failed to lock KIQ (%d).\n", r);
- return r;
- }
-
- for (i = 0; i < adev->gfx.num_gfx_rings; i++)
- kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.gfx_ring[i]);
-
- return amdgpu_ring_test_helper(kiq_ring);
-}
-#endif
-
static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
{
int r, i;
- struct amdgpu_ring *ring;
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- goto done;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v10_0_gfx_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v10_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false);
if (r)
- goto done;
+ return r;
}
-#ifndef BRING_UP_DEBUG
- r = gfx_v10_0_kiq_enable_kgq(adev);
- if (r)
- goto done;
-#endif
- r = gfx_v10_0_cp_gfx_start(adev);
+
+ r = amdgpu_gfx_enable_kgq(adev, 0);
if (r)
- goto done;
+ return r;
- for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
- ring->sched.ready = true;
- }
-done:
- return r;
+ return gfx_v10_0_cp_gfx_start(adev);
}
-static void gfx_v10_0_compute_mqd_set_priority(struct amdgpu_ring *ring, struct v10_compute_mqd *mqd)
+static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
+ struct amdgpu_mqd_prop *prop)
{
- struct amdgpu_device *adev = ring->adev;
-
- if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
- if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
- mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
- mqd->cp_hqd_queue_priority =
- AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
- }
- }
-}
-
-static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
- struct v10_compute_mqd *mqd = ring->mqd_ptr;
+ struct v10_compute_mqd *mqd = m;
uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
uint32_t tmp;
@@ -6861,7 +6918,7 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
mqd->compute_misc_reserved = 0x00000003;
- eop_base_addr = ring->eop_gpu_addr >> 8;
+ eop_base_addr = prop->eop_gpu_addr >> 8;
mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
@@ -6875,9 +6932,9 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
/* enable doorbell? */
tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
- if (ring->use_doorbell) {
+ if (prop->use_doorbell) {
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_OFFSET, ring->doorbell_index);
+ DOORBELL_OFFSET, prop->doorbell_index);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_EN, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
@@ -6892,15 +6949,14 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
mqd->cp_hqd_pq_doorbell_control = tmp;
/* disable the queue if it's active */
- ring->wptr = 0;
mqd->cp_hqd_dequeue_request = 0;
mqd->cp_hqd_pq_rptr = 0;
mqd->cp_hqd_pq_wptr_lo = 0;
mqd->cp_hqd_pq_wptr_hi = 0;
/* set the pointer to the MQD */
- mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
- mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
+ mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
/* set MQD vmid to 0 */
tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
@@ -6908,55 +6964,38 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
mqd->cp_mqd_control = tmp;
/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
- hqd_gpu_addr = ring->gpu_addr >> 8;
+ hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
/* set up the HQD, this is similar to CP_RB0_CNTL */
tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
- (order_base_2(ring->ring_size / 4) - 1));
+ (order_base_2(prop->queue_size / 4) - 1));
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
- ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
+ (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
#ifdef __BIG_ENDIAN
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
#endif
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
+ prop->allow_tunneling);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
mqd->cp_hqd_pq_control = tmp;
/* set the wb address whether it's enabled or not */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ wb_gpu_addr = prop->rptr_gpu_addr;
mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_rptr_report_addr_hi =
upper_32_bits(wb_gpu_addr) & 0xffff;
/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wb_gpu_addr = prop->wptr_gpu_addr;
mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
- tmp = 0;
- /* enable the doorbell if requested */
- if (ring->use_doorbell) {
- tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_OFFSET, ring->doorbell_index);
-
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_EN, 1);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_SOURCE, 0);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_HIT, 0);
- }
-
- mqd->cp_hqd_pq_doorbell_control = tmp;
-
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
- ring->wptr = 0;
mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
/* set the vmid for the queue */
@@ -6972,13 +7011,10 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
mqd->cp_hqd_ib_control = tmp;
/* set static priority for a compute queue/ring */
- gfx_v10_0_compute_mqd_set_priority(ring, mqd);
+ mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
+ mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
- /* map_queues packet doesn't need activate the queue,
- * so only kiq need set this field.
- */
- if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
- mqd->cp_hqd_active = 1;
+ mqd->cp_hqd_active = prop->hqd_active;
return 0;
}
@@ -6996,20 +7032,6 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring)
/* disable wptr polling */
WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
- /* write the EOP addr */
- WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
- mqd->cp_hqd_eop_base_addr_lo);
- WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
- mqd->cp_hqd_eop_base_addr_hi);
-
- /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
- WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
- mqd->cp_hqd_eop_control);
-
- /* enable doorbell? */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
- mqd->cp_hqd_pq_doorbell_control);
-
/* disable the queue if it's active */
if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
@@ -7028,6 +7050,19 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring)
mqd->cp_hqd_pq_wptr_hi);
}
+ /* disable doorbells */
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
+
+ /* write the EOP addr */
+ WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
+ mqd->cp_hqd_eop_base_addr_lo);
+ WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
+ mqd->cp_hqd_eop_base_addr_hi);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
+ mqd->cp_hqd_eop_control);
+
/* set the pointer to the MQD */
WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR,
mqd->cp_mqd_base_addr_lo);
@@ -7097,14 +7132,13 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
struct v10_compute_mqd *mqd = ring->mqd_ptr;
- int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
gfx_v10_0_kiq_setting(ring);
if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
/* reset MQD to a clean status */
- if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
/* reset ring buffer */
ring->wptr = 0;
@@ -7117,46 +7151,45 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring)
mutex_unlock(&adev->srbm_mutex);
} else {
memset((void *)mqd, 0, sizeof(*mqd));
+ if (amdgpu_sriov_vf(adev) && adev->in_suspend)
+ amdgpu_ring_clear_ring(ring);
mutex_lock(&adev->srbm_mutex);
nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
- gfx_v10_0_compute_mqd_init(ring);
+ amdgpu_ring_init_mqd(ring);
gfx_v10_0_kiq_init_register(ring);
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
}
return 0;
}
-static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
+static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore)
{
struct amdgpu_device *adev = ring->adev;
struct v10_compute_mqd *mqd = ring->mqd_ptr;
int mqd_idx = ring - &adev->gfx.compute_ring[0];
- if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ if (!restore && !amdgpu_in_reset(adev) && !adev->in_suspend) {
memset((void *)mqd, 0, sizeof(*mqd));
mutex_lock(&adev->srbm_mutex);
nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
- gfx_v10_0_compute_mqd_init(ring);
+ amdgpu_ring_init_mqd(ring);
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
- } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
- /* reset MQD to a clean status */
+ memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ } else {
+ /* restore MQD to a clean status */
if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
-
+ memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
/* reset ring buffer */
ring->wptr = 0;
- atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
- amdgpu_ring_clear_ring(ring);
- } else {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
amdgpu_ring_clear_ring(ring);
}
@@ -7165,54 +7198,24 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
static int gfx_v10_0_kiq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring;
- int r;
-
- ring = &adev->gfx.kiq.ring;
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (unlikely(r != 0))
- return r;
-
- gfx_v10_0_kiq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- amdgpu_bo_unreserve(ring->mqd_obj);
- ring->sched.ready = true;
+ gfx_v10_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
return 0;
}
static int gfx_v10_0_kcq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = NULL;
- int r = 0, i;
+ int i, r;
gfx_v10_0_cp_compute_enable(adev, true);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- goto done;
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v10_0_kcq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v10_0_kcq_init_queue(&adev->gfx.compute_ring[i],
+ false);
if (r)
- goto done;
+ return r;
}
- r = amdgpu_gfx_enable_kcq(adev);
-done:
- return r;
+ return amdgpu_gfx_enable_kcq(adev, 0);
}
static int gfx_v10_0_cp_resume(struct amdgpu_device *adev)
@@ -7279,9 +7282,11 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev)
{
uint32_t data, pattern = 0xDEADBEEF;
- /* check if mmVGT_ESGS_RING_SIZE_UMD
- * has been remapped to mmVGT_ESGS_RING_SIZE */
- switch (adev->ip_versions[GC_HWIP][0]) {
+ /*
+ * check if mmVGT_ESGS_RING_SIZE_UMD
+ * has been remapped to mmVGT_ESGS_RING_SIZE
+ */
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 4):
@@ -7291,15 +7296,15 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern);
if (RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid) == pattern) {
- WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD , data);
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, data);
return true;
- } else {
- WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid, data);
- return false;
}
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid, data);
break;
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 7):
return true;
default:
data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE);
@@ -7309,12 +7314,12 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev)
if (RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE) == pattern) {
WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, data);
return true;
- } else {
- WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, data);
- return false;
}
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, data);
break;
}
+
+ return false;
}
static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev)
@@ -7324,17 +7329,21 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev)
if (amdgpu_sriov_vf(adev))
return;
- /* initialize cam_index to 0
- * index will auto-inc after each data writting */
+ /*
+ * Initialize cam_index to 0
+ * index will auto-inc after each data writing
+ */
WREG32_SOC15(GC, 0, mmGRBM_CAM_INDEX, 0);
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
/* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */
data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) <<
GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
@@ -7453,6 +7462,7 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev)
static void gfx_v10_0_disable_gpa_mode(struct amdgpu_device *adev)
{
uint32_t data;
+
data = RREG32_SOC15(GC, 0, mmCPC_PSP_DEBUG);
data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
WREG32_SOC15(GC, 0, mmCPC_PSP_DEBUG, data);
@@ -7462,25 +7472,26 @@ static void gfx_v10_0_disable_gpa_mode(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCPG_PSP_DEBUG, data);
}
-static int gfx_v10_0_hw_init(void *handle)
+static int gfx_v10_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!amdgpu_emu_mode)
gfx_v10_0_init_golden_registers(adev);
+ amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
+ adev->gfx.cleaner_shader_ptr);
+
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
/**
* For gfx 10, rlc firmware loading relies on smu firmware is
* loaded firstly, so in direct type, it has to load smc ucode
* here before rlc.
*/
- if (!(adev->flags & AMD_IS_APU)) {
- r = amdgpu_pm_load_smu_firmware(adev, NULL);
- if (r)
- return r;
- }
+ r = amdgpu_pm_load_smu_firmware(adev, NULL);
+ if (r)
+ return r;
gfx_v10_0_disable_gpa_mode(adev);
}
@@ -7498,80 +7509,56 @@ static int gfx_v10_0_hw_init(void *handle)
* init golden registers and rlc resume may override some registers,
* reconfig them here
*/
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 10) ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 1) ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 2))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 10) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 1) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 2))
gfx_v10_0_tcp_harvest(adev);
r = gfx_v10_0_cp_resume(adev);
if (r)
return r;
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 0))
gfx_v10_3_program_pbb_mode(adev);
- if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0) && !amdgpu_sriov_vf(adev))
gfx_v10_3_set_power_brake_sequence(adev);
return r;
}
-#ifndef BRING_UP_DEBUG
-static int gfx_v10_0_kiq_disable_kgq(struct amdgpu_device *adev)
+static int gfx_v10_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
- struct amdgpu_ring *kiq_ring = &kiq->ring;
- int i;
+ struct amdgpu_device *adev = ip_block->adev;
- if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
- return -EINVAL;
-
- if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
- adev->gfx.num_gfx_rings))
- return -ENOMEM;
-
- for (i = 0; i < adev->gfx.num_gfx_rings; i++)
- kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i],
- PREEMPT_QUEUES, 0, 0);
-
- return amdgpu_ring_test_helper(kiq_ring);
-}
-#endif
-
-static int gfx_v10_0_hw_fini(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int r;
- uint32_t tmp;
+ cancel_delayed_work_sync(&adev->gfx.idle_work);
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
+
+ /* WA added for Vangogh asic fixing the SMU suspend failure
+ * It needs to set power gating again during gfxoff control
+ * otherwise the gfxoff disallowing will be failed to set.
+ */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 1))
+ gfx_v10_0_set_powergating_state(ip_block, AMD_PG_STATE_UNGATE);
if (!adev->no_hw_access) {
-#ifndef BRING_UP_DEBUG
if (amdgpu_async_gfx_ring) {
- r = gfx_v10_0_kiq_disable_kgq(adev);
- if (r)
+ if (amdgpu_gfx_disable_kgq(adev, 0))
DRM_ERROR("KGQ disable failed\n");
}
-#endif
- if (amdgpu_gfx_disable_kcq(adev))
+
+ if (amdgpu_gfx_disable_kcq(adev, 0))
DRM_ERROR("KCQ disable failed\n");
}
if (amdgpu_sriov_vf(adev)) {
gfx_v10_0_cp_gfx_enable(adev, false);
- /* Program KIQ position of RLC_CP_SCHEDULERS during destroy */
- if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) {
- tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid);
- tmp &= 0xffffff00;
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
- } else {
- tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
- tmp &= 0xffffff00;
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
- }
-
+ /* Remove the steps of clearing KIQ position.
+ * It causes GFX hang when another Win guest is rendering.
+ */
return 0;
}
gfx_v10_0_cp_enable(adev, false);
@@ -7580,19 +7567,19 @@ static int gfx_v10_0_hw_fini(void *handle)
return 0;
}
-static int gfx_v10_0_suspend(void *handle)
+static int gfx_v10_0_suspend(struct amdgpu_ip_block *ip_block)
{
- return gfx_v10_0_hw_fini(handle);
+ return gfx_v10_0_hw_fini(ip_block);
}
-static int gfx_v10_0_resume(void *handle)
+static int gfx_v10_0_resume(struct amdgpu_ip_block *ip_block)
{
- return gfx_v10_0_hw_init(handle);
+ return gfx_v10_0_hw_init(ip_block);
}
-static bool gfx_v10_0_is_idle(void *handle)
+static bool gfx_v10_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
GRBM_STATUS, GUI_ACTIVE))
@@ -7601,11 +7588,11 @@ static bool gfx_v10_0_is_idle(void *handle)
return true;
}
-static int gfx_v10_0_wait_for_idle(void *handle)
+static int gfx_v10_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- unsigned i;
+ unsigned int i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -7619,11 +7606,11 @@ static int gfx_v10_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int gfx_v10_0_soft_reset(void *handle)
+static int gfx_v10_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 grbm_soft_reset = 0;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* GRBM_STATUS */
tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
@@ -7648,12 +7635,13 @@ static int gfx_v10_0_soft_reset(void *handle)
/* GRBM_STATUS2 */
tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY_Sienna_Cichlid))
grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
@@ -7680,19 +7668,17 @@ static int gfx_v10_0_soft_reset(void *handle)
/* Disable MEC parsing/prefetching */
gfx_v10_0_cp_compute_enable(adev, false);
- if (grbm_soft_reset) {
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- tmp |= grbm_soft_reset;
- dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp |= grbm_soft_reset;
+ dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- udelay(50);
+ udelay(50);
- tmp &= ~grbm_soft_reset;
- WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- }
+ tmp &= ~grbm_soft_reset;
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
/* Wait a little for things to settle down */
udelay(50);
@@ -7704,9 +7690,26 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev)
{
uint64_t clock, clock_lo, clock_hi, hi_check;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ preempt_disable();
+ clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Cyan_Skillfish);
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Cyan_Skillfish);
+ hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Cyan_Skillfish);
+ /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
+ * roughly every 42 seconds.
+ */
+ if (hi_check != clock_hi) {
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Cyan_Skillfish);
+ clock_hi = hi_check;
+ }
+ preempt_enable();
+ clock = clock_lo | (clock_hi << 32ULL);
+ break;
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
preempt_disable();
clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh);
clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh);
@@ -7721,6 +7724,21 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev)
preempt_enable();
clock = clock_lo | (clock_hi << 32ULL);
break;
+ case IP_VERSION(10, 3, 6):
+ preempt_disable();
+ clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6);
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6);
+ hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6);
+ /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
+ * roughly every 42 seconds.
+ */
+ if (hi_check != clock_hi) {
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6);
+ clock_hi = hi_check;
+ }
+ preempt_enable();
+ clock = clock_lo | (clock_hi << 32ULL);
+ break;
default:
preempt_disable();
clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER);
@@ -7769,15 +7787,18 @@ static void gfx_v10_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
(1 << (oa_size + oa_base)) - (1 << oa_base));
}
-static int gfx_v10_0_early_init(void *handle)
+static int gfx_v10_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ adev->gfx.funcs = &gfx_v10_0_gfx_funcs;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 2):
case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_NV1X;
break;
case IP_VERSION(10, 3, 0):
@@ -7785,7 +7806,9 @@ static int gfx_v10_0_early_init(void *handle)
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_Sienna_Cichlid;
break;
default:
@@ -7800,13 +7823,17 @@ static int gfx_v10_0_early_init(void *handle)
gfx_v10_0_set_irq_funcs(adev);
gfx_v10_0_set_gds_init(adev);
gfx_v10_0_set_rlc_funcs(adev);
+ gfx_v10_0_set_mqd_funcs(adev);
- return 0;
+ /* init rlcg reg access ctrl */
+ gfx_v10_0_init_rlcg_reg_access_ctrl(adev);
+
+ return gfx_v10_0_init_microcode(adev);
}
-static int gfx_v10_0_late_init(void *handle)
+static int gfx_v10_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
@@ -7817,6 +7844,10 @@ static int gfx_v10_0_late_init(void *handle)
if (r)
return r;
+ r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
+ if (r)
+ return r;
+
return 0;
}
@@ -7829,21 +7860,23 @@ static bool gfx_v10_0_is_rlc_enabled(struct amdgpu_device *adev)
return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
}
-static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev)
+static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
uint32_t data;
- unsigned i;
+ unsigned int i;
data = RLC_SAFE_MODE__CMD_MASK;
data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data);
/* wait for RLC_SAFE_MODE */
@@ -7868,18 +7901,20 @@ static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev)
}
}
-static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev)
+static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
uint32_t data;
data = RLC_SAFE_MODE__CMD_MASK;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data);
break;
default:
@@ -8182,7 +8217,7 @@ static void gfx_v10_0_apply_medium_grain_clock_gating_workaround(struct amdgpu_d
mmCGTS_SA1_QUAD1_SM_CTRL_REG
};
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 2)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 2)) {
for (i = 0; i < ARRAY_SIZE(tcp_ctrl_regs_nv12); i++) {
reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG_BASE_IDX] +
tcp_ctrl_regs_nv12[i];
@@ -8213,7 +8248,7 @@ static void gfx_v10_0_apply_medium_grain_clock_gating_workaround(struct amdgpu_d
static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
bool enable)
{
- amdgpu_gfx_rlc_enter_safe_mode(adev);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
if (enable) {
/* enable FGCG firstly*/
@@ -8227,9 +8262,12 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
/* === CGCG + CGLS === */
gfx_v10_0_update_coarse_grain_clock_gating(adev, enable);
- if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 10)) ||
- (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 1)) ||
- (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 2)))
+ if ((amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 1, 10)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 1, 1)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 1, 2)))
gfx_v10_0_apply_medium_grain_clock_gating_workaround(adev);
} else {
/* CGCG/CGLS should be disabled before MGCG/MGLS
@@ -8252,31 +8290,39 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
AMD_CG_SUPPORT_GFX_3D_CGLS))
gfx_v10_0_enable_gui_idle_interrupt(adev, enable);
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
return 0;
}
-static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
+static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev,
+ unsigned int vmid)
{
- u32 reg, data;
-
- amdgpu_gfx_off_ctrl(adev, false);
+ u32 reg, pre_data, data;
- /* not for *_SOC15 */
reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
- if (amdgpu_sriov_is_pp_one_vf(adev))
- data = RREG32_NO_KIQ(reg);
+ /* not for *_SOC15 */
+ if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev))
+ pre_data = RREG32_NO_KIQ(reg);
else
- data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
+ pre_data = RREG32(reg);
- data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
+ data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK);
data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
- if (amdgpu_sriov_is_pp_one_vf(adev))
- WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
- else
- WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
+ if (pre_data != data) {
+ if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) {
+ WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
+ } else
+ WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
+ }
+}
+
+static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
+{
+ amdgpu_gfx_off_ctrl(adev, false);
+
+ gfx_v10_0_update_spm_vmid_internal(adev, vmid);
amdgpu_gfx_off_ctrl(adev, true);
}
@@ -8330,9 +8376,11 @@ static void gfx_v10_cntl_power_gating(struct amdgpu_device *adev, bool enable)
* Power/performance team will optimize it and might give a new value later.
*/
if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 7):
data = 0x4E20 & RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK_Vangogh;
WREG32_SOC15(GC, 0, mmRLC_PG_DELAY_3, data);
break;
@@ -8344,11 +8392,11 @@ static void gfx_v10_cntl_power_gating(struct amdgpu_device *adev, bool enable)
static void gfx_v10_cntl_pg(struct amdgpu_device *adev, bool enable)
{
- amdgpu_gfx_rlc_enter_safe_mode(adev);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
gfx_v10_cntl_power_gating(adev, enable);
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = {
@@ -8377,21 +8425,19 @@ static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs_sriov = {
.reset = gfx_v10_0_rlc_reset,
.start = gfx_v10_0_rlc_start,
.update_spm_vmid = gfx_v10_0_update_spm_vmid,
- .sriov_wreg = gfx_v10_sriov_wreg,
- .sriov_rreg = gfx_v10_sriov_rreg,
.is_rlcg_access_range = gfx_v10_0_is_rlcg_access_range,
};
-static int gfx_v10_0_set_powergating_state(void *handle,
+static int gfx_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 2):
@@ -8403,8 +8449,16 @@ static int gfx_v10_0_set_powergating_state(void *handle,
break;
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 7):
+ if (!enable)
+ amdgpu_gfx_off_ctrl(adev, false);
+
gfx_v10_cntl_pg(adev, enable);
- amdgpu_gfx_off_ctrl(adev, enable);
+
+ if (enable)
+ amdgpu_gfx_off_ctrl(adev, true);
+
break;
default:
break;
@@ -8412,15 +8466,15 @@ static int gfx_v10_0_set_powergating_state(void *handle,
return 0;
}
-static int gfx_v10_0_set_clockgating_state(void *handle,
+static int gfx_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 2):
@@ -8429,7 +8483,9 @@ static int gfx_v10_0_set_clockgating_state(void *handle,
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
gfx_v10_0_update_gfx_clock_gating(adev,
state == AMD_CG_STATE_GATE);
break;
@@ -8439,9 +8495,9 @@ static int gfx_v10_0_set_clockgating_state(void *handle,
return 0;
}
-static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags)
+static void gfx_v10_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
/* AMD_CG_SUPPORT_GFX_FGCG */
@@ -8485,7 +8541,8 @@ static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags)
static u64 gfx_v10_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
{
- return ring->adev->wb.wb[ring->rptr_offs]; /* gfx10 is 32bit rptr*/
+ /* gfx10 is 32bit rptr*/
+ return *(uint32_t *)ring->rptr_cpu_addr;
}
static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
@@ -8495,7 +8552,7 @@ static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
/* XXX check if swapping is necessary on BE */
if (ring->use_doorbell) {
- wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
} else {
wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
@@ -8510,17 +8567,21 @@ static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
- atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
- WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
- WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, mmCP_RB0_WPTR,
+ lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI,
+ upper_32_bits(ring->wptr));
}
}
static u64 gfx_v10_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
{
- return ring->adev->wb.wb[ring->rptr_offs]; /* gfx10 hardware is 32bit rptr */
+ /* gfx10 hardware is 32bit rptr */
+ return *(uint32_t *)ring->rptr_cpu_addr;
}
static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
@@ -8529,7 +8590,7 @@ static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
/* XXX check if swapping is necessary on BE */
if (ring->use_doorbell)
- wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
else
BUG();
return wptr;
@@ -8539,9 +8600,9 @@ static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- /* XXX check if swapping is necessary on BE */
if (ring->use_doorbell) {
- atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
BUG(); /* only DOORBELL method supported on gfx10 now */
@@ -8567,7 +8628,7 @@ static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
}
reg_mem_engine = 0;
} else {
- ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe;
reg_mem_engine = 1; /* pfp */
}
@@ -8582,7 +8643,7 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
struct amdgpu_ib *ib,
uint32_t flags)
{
- unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ unsigned int vmid = AMDGPU_JOB_GET_VMID(job);
u32 header, control = 0;
if (ib->flags & AMDGPU_IB_FLAG_CE)
@@ -8592,7 +8653,7 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
control |= ib->length_dw | (vmid << 24);
- if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
+ if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
control |= INDIRECT_BUFFER_PRE_ENB(1);
if (flags & AMDGPU_IB_PREEMPTED)
@@ -8619,7 +8680,7 @@ static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
struct amdgpu_ib *ib,
uint32_t flags)
{
- unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ unsigned int vmid = AMDGPU_JOB_GET_VMID(job);
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
/* Currently, there is a high possibility to get wave ID mismatch
@@ -8650,7 +8711,7 @@ static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
}
static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
- u64 seq, unsigned flags)
+ u64 seq, unsigned int flags)
{
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
@@ -8692,8 +8753,20 @@ static void gfx_v10_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
upper_32_bits(addr), seq, 0xffffffff, 4);
}
+static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint8_t dst_sel)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
static void gfx_v10_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
- unsigned vmid, uint64_t pd_addr)
+ unsigned int vmid, uint64_t pd_addr)
{
amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
@@ -8743,7 +8816,7 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
{
uint32_t dw2 = 0;
- if (amdgpu_mcbp || amdgpu_sriov_vf(ring->adev))
+ if (ring->adev->gfx.mcbp)
gfx_v10_0_ring_emit_ce_meta(ring,
(!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
@@ -8772,38 +8845,28 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, 0);
}
-static unsigned gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
{
- unsigned ret;
+ unsigned int ret;
amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
- amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, 0);
ret = ring->wptr & ring->buf_mask;
- amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+ /* patch dummy value later */
+ amdgpu_ring_write(ring, 0);
return ret;
}
-static void gfx_v10_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
-{
- unsigned cur;
- BUG_ON(offset > ring->buf_mask);
- BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
- cur = (ring->wptr - 1) & ring->buf_mask;
- if (likely(cur > offset))
- ring->ring[offset] = cur - offset;
- else
- ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
-}
-
static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring)
{
int i, r = 0;
struct amdgpu_device *adev = ring->adev;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
struct amdgpu_ring *kiq_ring = &kiq->ring;
unsigned long flags;
@@ -8850,26 +8913,26 @@ static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
{
struct amdgpu_device *adev = ring->adev;
struct v10_ce_ib_state ce_payload = {0};
- uint64_t csa_addr;
+ uint64_t offset, ce_payload_gpu_addr;
+ void *ce_payload_cpu_addr;
int cnt;
cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
- csa_addr = amdgpu_csa_vaddr(ring->adev);
+
+ offset = offsetof(struct v10_gfx_meta_data, ce_payload);
+ ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
WRITE_DATA_DST_SEL(8) |
WR_CONFIRM) |
WRITE_DATA_CACHE_POLICY(0));
- amdgpu_ring_write(ring, lower_32_bits(csa_addr +
- offsetof(struct v10_gfx_meta_data, ce_payload)));
- amdgpu_ring_write(ring, upper_32_bits(csa_addr +
- offsetof(struct v10_gfx_meta_data, ce_payload)));
+ amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
if (resume)
- amdgpu_ring_write_multiple(ring, adev->virt.csa_cpu_addr +
- offsetof(struct v10_gfx_meta_data,
- ce_payload),
+ amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
sizeof(ce_payload) >> 2);
else
amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
@@ -8880,12 +8943,18 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
{
struct amdgpu_device *adev = ring->adev;
struct v10_de_ib_state de_payload = {0};
- uint64_t csa_addr, gds_addr;
+ uint64_t offset, gds_addr, de_payload_gpu_addr;
+ void *de_payload_cpu_addr;
int cnt;
- csa_addr = amdgpu_csa_vaddr(ring->adev);
- gds_addr = ALIGN(csa_addr + AMDGPU_CSA_SIZE - adev->gds.gds_size,
+ offset = offsetof(struct v10_gfx_meta_data, de_payload);
+ de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+
+ gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
+ AMDGPU_CSA_SIZE - adev->gds.gds_size,
PAGE_SIZE);
+
de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
@@ -8895,15 +8964,11 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
WRITE_DATA_DST_SEL(8) |
WR_CONFIRM) |
WRITE_DATA_CACHE_POLICY(0));
- amdgpu_ring_write(ring, lower_32_bits(csa_addr +
- offsetof(struct v10_gfx_meta_data, de_payload)));
- amdgpu_ring_write(ring, upper_32_bits(csa_addr +
- offsetof(struct v10_gfx_meta_data, de_payload)));
+ amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
if (resume)
- amdgpu_ring_write_multiple(ring, adev->virt.csa_cpu_addr +
- offsetof(struct v10_gfx_meta_data,
- de_payload),
+ amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
sizeof(de_payload) >> 2);
else
amdgpu_ring_write_multiple(ring, (void *)&de_payload,
@@ -8983,19 +9048,6 @@ static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
ref, mask);
}
-static void gfx_v10_0_ring_soft_recovery(struct amdgpu_ring *ring,
- unsigned vmid)
-{
- struct amdgpu_device *adev = ring->adev;
- uint32_t value = 0;
-
- value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
- value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
- value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
- value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
- WREG32_SOC15(GC, 0, mmSQ_CMD, value);
-}
-
static void
gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
uint32_t me, uint32_t pipe,
@@ -9093,7 +9145,7 @@ static void gfx_v10_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev
static int gfx_v10_0_set_eop_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
- unsigned type,
+ unsigned int type,
enum amdgpu_interrupt_state state)
{
switch (type) {
@@ -9142,6 +9194,7 @@ static int gfx_v10_0_eop_irq(struct amdgpu_device *adev,
struct amdgpu_ring *ring;
DRM_DEBUG("IH: CP EOP\n");
+
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
queue_id = (entry->ring_id & 0x70) >> 4;
@@ -9158,27 +9211,58 @@ static int gfx_v10_0_eop_irq(struct amdgpu_device *adev,
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
/* Per-queue interrupt is supported for MEC starting from VI.
- * The interrupt can only be enabled/disabled per pipe instead of per queue.
- */
- if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
+ * The interrupt can only be enabled/disabled per pipe instead
+ * of per queue.
+ */
+ if ((ring->me == me_id) &&
+ (ring->pipe == pipe_id) &&
+ (ring->queue == queue_id))
amdgpu_fence_process(ring);
}
break;
}
+
return 0;
}
static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
- unsigned type,
+ unsigned int type,
enum amdgpu_interrupt_state state)
{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
case AMDGPU_IRQ_STATE_ENABLE:
- WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
- PRIV_REG_INT_ENABLE,
- state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v10_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
break;
default:
break;
@@ -9187,17 +9271,75 @@ static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
return 0;
}
+static int gfx_v10_0_set_bad_op_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v10_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
static int gfx_v10_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
- unsigned type,
+ unsigned int type,
enum amdgpu_interrupt_state state)
{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
case AMDGPU_IRQ_STATE_ENABLE:
- WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
- PRIV_INSTR_INT_ENABLE,
- state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_INSTR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
break;
default:
break;
@@ -9221,8 +9363,8 @@ static void gfx_v10_0_handle_priv_fault(struct amdgpu_device *adev,
case 0:
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
ring = &adev->gfx.gfx_ring[i];
- /* we only enabled 1 gfx queue per pipe for now */
- if (ring->me == me_id && ring->pipe == pipe_id)
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
drm_sched_fault(&ring->sched);
}
break;
@@ -9249,6 +9391,15 @@ static int gfx_v10_0_priv_reg_irq(struct amdgpu_device *adev,
return 0;
}
+static int gfx_v10_0_bad_op_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal opcode in command stream \n");
+ gfx_v10_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
static int gfx_v10_0_priv_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -9264,7 +9415,7 @@ static int gfx_v10_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state)
{
uint32_t tmp, target;
- struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
+ struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring);
if (ring->me == 1)
target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
@@ -9308,7 +9459,7 @@ static int gfx_v10_0_kiq_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
u8 me_id, pipe_id, queue_id;
- struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
+ struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring);
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
@@ -9343,6 +9494,319 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
}
+static void gfx_v10_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
+{
+ /* Header itself is a NOP packet */
+ if (num_nop == 1) {
+ amdgpu_ring_write(ring, ring->funcs->nop);
+ return;
+ }
+
+ /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
+ amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
+
+ /* Header is at index 0, followed by num_nops - 1 NOP packet's */
+ amdgpu_ring_insert_nop(ring, num_nop - 1);
+}
+
+static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ unsigned long flags;
+ u32 tmp;
+ u64 addr;
+ int r;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, 5 + 7 + 7)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ addr = amdgpu_bo_gpu_offset(ring->mqd_obj) +
+ offsetof(struct v10_gfx_mqd, cp_gfx_hqd_active);
+ tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
+ if (ring->pipe == 0)
+ tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE0_QUEUES, 1 << ring->queue);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE1_QUEUES, 1 << ring->queue);
+
+ gfx_v10_0_ring_emit_wreg(kiq_ring,
+ SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp);
+ gfx_v10_0_wait_reg_mem(kiq_ring, 0, 1, 0,
+ lower_32_bits(addr), upper_32_bits(addr),
+ 0, 1, 0x20);
+ gfx_v10_0_ring_emit_reg_wait(kiq_ring,
+ SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffffffff);
+ amdgpu_ring_commit(kiq_ring);
+ r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ if (r)
+ return r;
+
+ r = gfx_v10_0_kgq_init_queue(ring, true);
+ if (r) {
+ DRM_ERROR("fail to init kgq\n");
+ return r;
+ }
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+ kiq->pmf->kiq_map_queues(kiq_ring, ring);
+ amdgpu_ring_commit(kiq_ring);
+ r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ if (r)
+ return r;
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ unsigned long flags;
+ int i, r;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
+ 0, 0);
+ amdgpu_ring_commit(kiq_ring);
+ r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ if (r)
+ return r;
+
+ /* make sure dequeue is complete*/
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ if (r) {
+ dev_err(adev->dev, "fail to wait on hqd deactivate\n");
+ return r;
+ }
+
+ r = gfx_v10_0_kcq_init_queue(ring, true);
+ if (r) {
+ dev_err(adev->dev, "fail to init kcq\n");
+ return r;
+ }
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+ kiq->pmf->kiq_map_queues(kiq_ring, ring);
+ amdgpu_ring_commit(kiq_ring);
+ r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ if (r)
+ return r;
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static void gfx_v10_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ for (i = 0; i < reg_count; i++)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_reg_list_10_1[i].reg_name,
+ adev->gfx.ip_dump_core[i]);
+
+ /* print compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_10);
+ drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
+ adev->gfx.mec.num_mec,
+ adev->gfx.mec.num_pipe_per_mec,
+ adev->gfx.mec.num_queue_per_pipe);
+
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i && gc_cp_reg_list_10[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ "mmCP_MEC_ME2_HEADER_DUMP",
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ else
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_cp_reg_list_10[reg].reg_name,
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ }
+ index += reg_count;
+ }
+ }
+ }
+
+ /* print gfx queue registers for all instances */
+ if (!adev->gfx.ip_dump_gfx_queues)
+ return;
+
+ index = 0;
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10);
+ drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
+ adev->gfx.me.num_me,
+ adev->gfx.me.num_pipe_per_me,
+ adev->gfx.me.num_queue_per_pipe);
+
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+ drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_gfx_queue_reg_list_10[reg].reg_name,
+ adev->gfx.ip_dump_gfx_queues[index + reg]);
+ }
+ index += reg_count;
+ }
+ }
+ }
+}
+
+static void gfx_v10_ip_dump(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < reg_count; i++)
+ adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_10_1[i]));
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ /* dump compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_10);
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ /* ME0 is for GFX so start from 1 for CP */
+ nv_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
+
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i && gc_cp_reg_list_10[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME2_HEADER_DUMP));
+ else
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_cp_reg_list_10[reg]));
+ }
+ index += reg_count;
+ }
+ }
+ }
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ /* dump gfx queue registers for all instances */
+ if (!adev->gfx.ip_dump_gfx_queues)
+ return;
+
+ index = 0;
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10);
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+ nv_grbm_select(adev, i, j, k, 0);
+
+ for (reg = 0; reg < reg_count; reg++) {
+ adev->gfx.ip_dump_gfx_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_gfx_queue_reg_list_10[reg]));
+ }
+ index += reg_count;
+ }
+ }
+ }
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static void gfx_v10_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
+{
+ /* Emit the cleaner shader */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+ amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
+}
+
+static void gfx_v10_0_ring_begin_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_begin_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
+}
+
+static void gfx_v10_0_ring_end_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_end_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_end_use(ring);
+}
+
static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
.name = "gfx_v10_0",
.early_init = gfx_v10_0_early_init,
@@ -9359,6 +9823,8 @@ static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
.set_clockgating_state = gfx_v10_0_set_clockgating_state,
.set_powergating_state = gfx_v10_0_set_powergating_state,
.get_clockgating_state = gfx_v10_0_get_clockgating_state,
+ .dump_ip_state = gfx_v10_ip_dump,
+ .print_ip_state = gfx_v10_ip_print,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
@@ -9366,7 +9832,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_GFXHUB_0,
+ .secure_submission_supported = true,
.get_rptr = gfx_v10_0_ring_get_rptr_gfx,
.get_wptr = gfx_v10_0_ring_get_wptr_gfx,
.set_wptr = gfx_v10_0_ring_set_wptr_gfx,
@@ -9375,7 +9841,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
7 + /* PIPELINE_SYNC */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* VM_FLUSH */
+ 4 + /* VM_FLUSH */
8 + /* FENCE for VM_FLUSH */
20 + /* GDS switch */
4 + /* double SWITCH_BUFFER,
@@ -9391,7 +9857,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
5 + /* HDP_INVL */
8 + 8 + /* FENCE x2 */
2 + /* SWITCH_BUFFER */
- 8, /* gfx_v10_0_emit_mem_sync */
+ 8 + /* gfx_v10_0_emit_mem_sync */
+ 2, /* gfx_v10_0_ring_emit_cleaner_shader */
.emit_ib_size = 4, /* gfx_v10_0_ring_emit_ib_gfx */
.emit_ib = gfx_v10_0_ring_emit_ib_gfx,
.emit_fence = gfx_v10_0_ring_emit_fence,
@@ -9401,19 +9868,21 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
.emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
.test_ring = gfx_v10_0_ring_test_ring,
.test_ib = gfx_v10_0_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = gfx_v10_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_switch_buffer = gfx_v10_0_ring_emit_sb,
.emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl,
.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
- .patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
.preempt_ib = gfx_v10_0_ring_preempt_ib,
.emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
- .soft_recovery = gfx_v10_0_ring_soft_recovery,
.emit_mem_sync = gfx_v10_0_emit_mem_sync,
+ .reset = gfx_v10_0_reset_kgq,
+ .emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v10_0_ring_begin_use,
+ .end_use = gfx_v10_0_ring_end_use,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
@@ -9421,7 +9890,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_GFXHUB_0,
.get_rptr = gfx_v10_0_ring_get_rptr_compute,
.get_wptr = gfx_v10_0_ring_get_wptr_compute,
.set_wptr = gfx_v10_0_ring_set_wptr_compute,
@@ -9434,7 +9902,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
2 + /* gfx_v10_0_ring_emit_vm_flush */
8 + 8 + 8 + /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */
- 8, /* gfx_v10_0_emit_mem_sync */
+ 8 + /* gfx_v10_0_emit_mem_sync */
+ 2, /* gfx_v10_0_ring_emit_cleaner_shader */
.emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
.emit_ib = gfx_v10_0_ring_emit_ib_compute,
.emit_fence = gfx_v10_0_ring_emit_fence,
@@ -9444,12 +9913,16 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
.emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
.test_ring = gfx_v10_0_ring_test_ring,
.test_ib = gfx_v10_0_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = gfx_v10_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
.emit_mem_sync = gfx_v10_0_emit_mem_sync,
+ .reset = gfx_v10_0_reset_kcq,
+ .emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v10_0_ring_begin_use,
+ .end_use = gfx_v10_0_ring_end_use,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
@@ -9457,7 +9930,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_GFXHUB_0,
.get_rptr = gfx_v10_0_ring_get_rptr_compute,
.get_wptr = gfx_v10_0_ring_get_wptr_compute,
.set_wptr = gfx_v10_0_ring_set_wptr_compute,
@@ -9468,7 +9940,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
7 + /* gfx_v10_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* gfx_v10_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v10_0_ring_emit_fence_kiq x3 for user fence, vm fence */
.emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
.emit_ib = gfx_v10_0_ring_emit_ib_compute,
@@ -9481,13 +9952,14 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
+ .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
};
static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
{
int i;
- adev->gfx.kiq.ring.funcs = &gfx_v10_0_ring_funcs_kiq;
+ adev->gfx.kiq[0].ring.funcs = &gfx_v10_0_ring_funcs_kiq;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
adev->gfx.gfx_ring[i].funcs = &gfx_v10_0_ring_funcs_gfx;
@@ -9506,6 +9978,11 @@ static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_reg_irq_funcs = {
.process = gfx_v10_0_priv_reg_irq,
};
+static const struct amdgpu_irq_src_funcs gfx_v10_0_bad_op_irq_funcs = {
+ .set = gfx_v10_0_set_bad_op_fault_state,
+ .process = gfx_v10_0_bad_op_irq,
+};
+
static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_inst_irq_funcs = {
.set = gfx_v10_0_set_priv_inst_fault_state,
.process = gfx_v10_0_priv_inst_irq,
@@ -9521,27 +9998,33 @@ static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
adev->gfx.eop_irq.funcs = &gfx_v10_0_eop_irq_funcs;
- adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
- adev->gfx.kiq.irq.funcs = &gfx_v10_0_kiq_irq_funcs;
+ adev->gfx.kiq[0].irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
+ adev->gfx.kiq[0].irq.funcs = &gfx_v10_0_kiq_irq_funcs;
adev->gfx.priv_reg_irq.num_types = 1;
adev->gfx.priv_reg_irq.funcs = &gfx_v10_0_priv_reg_irq_funcs;
+ adev->gfx.bad_op_irq.num_types = 1;
+ adev->gfx.bad_op_irq.funcs = &gfx_v10_0_bad_op_irq_funcs;
+
adev->gfx.priv_inst_irq.num_types = 1;
adev->gfx.priv_inst_irq.funcs = &gfx_v10_0_priv_inst_irq_funcs;
}
static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs;
break;
case IP_VERSION(10, 1, 2):
@@ -9555,7 +10038,7 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev)
static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev)
{
- unsigned total_cu = adev->gfx.config.max_cu_per_sh *
+ unsigned int total_cu = adev->gfx.config.max_cu_per_sh *
adev->gfx.config.max_sh_per_se *
adev->gfx.config.max_shader_engines;
@@ -9565,6 +10048,20 @@ static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev)
adev->gds.oa_size = 16;
}
+static void gfx_v10_0_set_mqd_funcs(struct amdgpu_device *adev)
+{
+ /* set gfx eng mqd */
+ adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
+ sizeof(struct v10_gfx_mqd);
+ adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
+ gfx_v10_0_gfx_mqd_init;
+ /* set compute eng mqd */
+ adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
+ sizeof(struct v10_compute_mqd);
+ adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
+ gfx_v10_0_compute_mqd_init;
+}
+
static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
u32 bitmap)
{
@@ -9622,7 +10119,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
{
int i, j, k, counter, active_cu_number = 0;
u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
- unsigned disable_masks[4 * 2];
+ unsigned int disable_masks[4 * 2];
if (!adev || !cu_info)
return -EINVAL;
@@ -9633,19 +10130,25 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
bitmap = i * adev->gfx.config.max_sh_per_se + j;
- if (((adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) ||
- (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 3))) &&
+ if (((amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 0)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 3)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 6)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 7))) &&
((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1))
continue;
mask = 1;
ao_bitmap = 0;
counter = 0;
- gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0);
if (i < 4 && j < 2)
gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(
adev, disable_masks[i * 2 + j]);
bitmap = gfx_v10_0_get_cu_active_bitmap_per_sh(adev);
- cu_info->bitmap[i][j] = bitmap;
+ cu_info->bitmap[0][i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
if (bitmap & mask) {
@@ -9661,7 +10164,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
}
}
- gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number;
@@ -9737,8 +10240,7 @@ static void gfx_v10_3_set_power_brake_sequence(struct amdgpu_device *adev)
(0x1 << DIDT_SQ_THROTTLE_CTRL__PWRBRK_STALL_EN__SHIFT));
}
-const struct amdgpu_ip_block_version gfx_v10_0_ip_block =
-{
+const struct amdgpu_ip_block_version gfx_v10_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_GFX,
.major = 10,
.minor = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h
new file mode 100644
index 000000000000..f67569ccf9f6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* Define the cleaner shader gfx_10_1_10 */
+static const u32 gfx_10_1_10_cleaner_shader_hex[] = {
+ 0xb0804004, 0xbf8a0000,
+ 0xbf068100, 0xbf840023,
+ 0xbe8203b8, 0xbefc0380,
+ 0x7e008480, 0x7e028480,
+ 0x7e048480, 0x7e068480,
+ 0x7e088480, 0x7e0a8480,
+ 0x7e0c8480, 0x7e0e8480,
+ 0xbefc0302, 0x80828802,
+ 0xbf84fff5, 0xbe8203ff,
+ 0x80000000, 0x87020102,
+ 0xbf840012, 0xbefe03c1,
+ 0xbeff03c1, 0xd7650001,
+ 0x0001007f, 0xd7660001,
+ 0x0002027e, 0x16020288,
+ 0xbe8203bf, 0xbefc03c1,
+ 0xd9382000, 0x00020201,
+ 0xd9386040, 0x00040401,
+ 0xd70f6a01, 0x000202ff,
+ 0x00000400, 0x80828102,
+ 0xbf84fff7, 0xbefc03ff,
+ 0x00000068, 0xbe803000,
+ 0xbe813000, 0xbe823000,
+ 0xbe833000, 0x80fc847c,
+ 0xbf84fffa, 0xbeea0480,
+ 0xbeec0480, 0xbeee0480,
+ 0xbef00480, 0xbef20480,
+ 0xbef40480, 0xbef60480,
+ 0xbef80480, 0xbefa0480,
+ 0xbf810000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+};
+
+/* Define the cleaner shader gfx_10_3_0 */
+static const u32 gfx_10_3_0_cleaner_shader_hex[] = {
+ 0xb0804004, 0xbf8a0000,
+ 0xbe8203b8, 0xbefc0380,
+ 0x7e008480, 0x7e028480,
+ 0x7e048480, 0x7e068480,
+ 0x7e088480, 0x7e0a8480,
+ 0x7e0c8480, 0x7e0e8480,
+ 0xbefc0302, 0x80828802,
+ 0xbf84fff5, 0xbe8203ff,
+ 0x80000000, 0x87020002,
+ 0xbf840012, 0xbefe03c1,
+ 0xbeff03c1, 0xd7650001,
+ 0x0001007f, 0xd7660001,
+ 0x0002027e, 0x16020288,
+ 0xbe8203bf, 0xbefc03c1,
+ 0xd9382000, 0x00020201,
+ 0xd9386040, 0x00040401,
+ 0xd70f6a01, 0x000202ff,
+ 0x00000400, 0x80828102,
+ 0xbf84fff7, 0xbefc03ff,
+ 0x00000068, 0xbe803080,
+ 0xbe813080, 0xbe823080,
+ 0xbe833080, 0x80fc847c,
+ 0xbf84fffa, 0xbeea0480,
+ 0xbeec0480, 0xbeee0480,
+ 0xbef00480, 0xbef20480,
+ 0xbef40480, 0xbef60480,
+ 0xbef80480, 0xbefa0480,
+ 0xbf810000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm
new file mode 100644
index 000000000000..54f7ed9e2801
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 256 Dwords cleaner shader.
+
+// GFX10.1 : Clear SGPRs, VGPRs and LDS
+// Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot
+// Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD
+// Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS)
+// It takes 2 workgroups to use all of LDS: one on each CU of the WGP
+// Each wave clears SGPRs 0 - 107
+// Each wave clears VGPRs 0 - 63
+// The first wave of the workgroup clears its 64KB of LDS
+// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup
+// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared.
+
+
+shader main
+ asic(GFX10.1)
+ type(CS)
+ wave_size(32)
+// Note: original source code from SQ team
+//
+// Create 32 waves in a threadgroup (CS waves)
+// Each allocates 64 VGPRs
+// The workgroup allocates all of LDS (64kbytes)
+//
+// Takes about 2500 clocks to run.
+// (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks)
+//
+ S_BARRIER
+ s_cmp_eq_u32 s0, 1 // Bit0 is set, sgpr0 is set then clear VGPRS and LDS as FW set COMPUTE_USER_DATA_0
+ s_cbranch_scc0 label_0023 // Clean VGPRs and LDS if sgpr0 of wave is set, scc = (s0 == 1)
+
+ s_mov_b32 s2, 0x00000038 // Loop 64/8=8 times (loop unrolled for performance)
+ s_mov_b32 m0, 0
+ //
+ // CLEAR VGPRs
+ //
+label_0005:
+ v_movreld_b32 v0, 0
+ v_movreld_b32 v1, 0
+ v_movreld_b32 v2, 0
+ v_movreld_b32 v3, 0
+ v_movreld_b32 v4, 0
+ v_movreld_b32 v5, 0
+ v_movreld_b32 v6, 0
+ v_movreld_b32 v7, 0
+ s_mov_b32 m0, s2
+ s_sub_u32 s2, s2, 8
+ s_cbranch_scc0 label_0005
+ //
+ s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
+ s_and_b32 s2, s2, s1 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
+ s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup
+ // CLEAR LDS
+ //
+ s_mov_b32 exec_lo, 0xffffffff
+ s_mov_b32 exec_hi, 0xffffffff
+ v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63)
+ v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63)
+ v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte)
+ s_mov_b32 s2, 0x00000003f // 64 loop iterations
+ s_mov_b32 m0, 0xffffffff
+ // Clear all of LDS space
+ // Each FirstWave of WorkGroup clears 64kbyte block
+
+label_001F:
+ ds_write2_b64 v1, v[2:3], v[2:3] offset1:32
+ ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96
+ v_add_co_u32 v1, vcc, 0x00000400, v1
+ s_sub_u32 s2, s2, 1
+ s_cbranch_scc0 label_001F
+
+ //
+ // CLEAR SGPRs
+ //
+label_0023:
+ s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance)
+label_sgpr_loop:
+ s_movreld_b32 s0, s0
+ s_movreld_b32 s1, s0
+ s_movreld_b32 s2, s0
+ s_movreld_b32 s3, s0
+ s_sub_u32 m0, m0, 4
+ s_cbranch_scc0 label_sgpr_loop
+
+ //clear vcc
+ s_mov_b64 vcc, 0 //clear vcc
+ //s_setreg_imm32_b32 hw_reg_shader_flat_scratch_lo, 0 //clear flat scratch lo SGPR
+ //s_setreg_imm32_b32 hw_reg_shader_flat_scratch_hi, 0 //clear flat scratch hi SGPR
+ s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1
+ s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3
+ s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5
+ s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7
+ s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9
+ s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11
+ s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13
+ s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15
+
+ s_endpgm
+
+end
+
+
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm
new file mode 100644
index 000000000000..0e1c246166c0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader.
+//To turn this shader program on for complitaion change this to main and lower shader main to main_1
+
+// GFX10.3 : Clear SGPRs, VGPRs and LDS
+// Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot
+// Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD
+// Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS)
+// It takes 2 workgroups to use all of LDS: one on each CU of the WGP
+// Each wave clears SGPRs 0 - 107
+// Each wave clears VGPRs 0 - 63
+// The first wave of the workgroup clears its 64KB of LDS
+// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup
+// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared.
+
+
+shader main
+ asic(GFX10)
+ type(CS)
+ wave_size(32)
+// Note: original source code from SQ team
+
+//
+// Create 32 waves in a threadgroup (CS waves)
+// Each allocates 64 VGPRs
+// The workgroup allocates all of LDS (64kbytes)
+//
+// Takes about 2500 clocks to run.
+// (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks)
+//
+ S_BARRIER
+ s_mov_b32 s2, 0x00000038 // Loop 64/8=8 times (loop unrolled for performance)
+ s_mov_b32 m0, 0
+ //
+ // CLEAR VGPRs
+ //
+label_0005:
+ v_movreld_b32 v0, 0
+ v_movreld_b32 v1, 0
+ v_movreld_b32 v2, 0
+ v_movreld_b32 v3, 0
+ v_movreld_b32 v4, 0
+ v_movreld_b32 v5, 0
+ v_movreld_b32 v6, 0
+ v_movreld_b32 v7, 0
+ s_mov_b32 m0, s2
+ s_sub_u32 s2, s2, 8
+ s_cbranch_scc0 label_0005
+ //
+ s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
+ s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
+ s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup
+ // CLEAR LDS
+ //
+ s_mov_b32 exec_lo, 0xffffffff
+ s_mov_b32 exec_hi, 0xffffffff
+ v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63)
+ v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63)
+ v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte)
+ s_mov_b32 s2, 0x00000003f // 64 loop iterations
+ s_mov_b32 m0, 0xffffffff
+ // Clear all of LDS space
+ // Each FirstWave of WorkGroup clears 64kbyte block
+
+label_001F:
+ ds_write2_b64 v1, v[2:3], v[2:3] offset1:32
+ ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96
+ v_add_co_u32 v1, vcc, 0x00000400, v1
+ s_sub_u32 s2, s2, 1
+ s_cbranch_scc0 label_001F
+
+ //
+ // CLEAR SGPRs
+ //
+label_0023:
+ s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance)
+label_sgpr_loop:
+ s_movreld_b32 s0, 0
+ s_movreld_b32 s1, 0
+ s_movreld_b32 s2, 0
+ s_movreld_b32 s3, 0
+ s_sub_u32 m0, m0, 4
+ s_cbranch_scc0 label_sgpr_loop
+
+ //clear vcc
+ s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR
+ s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR
+ s_mov_b64 vcc, 0 //clear vcc
+ s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1
+ s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3
+ s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5
+ s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7
+ s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9
+ s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11
+ s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13
+ s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15
+
+ s_endpgm
+
+end
+
+
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
new file mode 100644
index 000000000000..8a2ee2de390f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -0,0 +1,7538 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_smu.h"
+#include "imu_v11_0.h"
+#include "soc21.h"
+#include "nvd.h"
+
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "smuio/smuio_13_0_6_offset.h"
+#include "smuio/smuio_13_0_6_sh_mask.h"
+#include "navi10_enum.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
+
+#include "soc15.h"
+#include "clearstate_gfx11.h"
+#include "v11_structs.h"
+#include "gfx_v11_0.h"
+#include "gfx_v11_0_cleaner_shader.h"
+#include "gfx_v11_0_3.h"
+#include "nbio_v4_3.h"
+#include "mes_v11_0.h"
+#include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
+
+#define GFX11_NUM_GFX_RINGS 1
+#define GFX11_MEC_HPD_SIZE 2048
+
+#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
+#define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1 0x1388
+
+#define regCGTT_WD_CLK_CTRL 0x5086
+#define regCGTT_WD_CLK_CTRL_BASE_IDX 1
+#define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1 0x4e7e
+#define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1
+#define regPC_CONFIG_CNTL_1 0x194d
+#define regPC_CONFIG_CNTL_1_BASE_IDX 1
+
+#define regCP_GFX_MQD_CONTROL_DEFAULT 0x00000100
+#define regCP_GFX_HQD_VMID_DEFAULT 0x00000000
+#define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT 0x00000000
+#define regCP_GFX_HQD_QUANTUM_DEFAULT 0x00000a01
+#define regCP_GFX_HQD_CNTL_DEFAULT 0x00a00000
+#define regCP_RB_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_GFX_HQD_RPTR_DEFAULT 0x00000000
+
+#define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000006
+#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_MQD_CONTROL_DEFAULT 0x00000100
+#define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509
+#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000
+#define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0be05501
+#define regCP_HQD_IB_CONTROL_DEFAULT 0x00300000
+
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_kicker.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_rlc.bin");
+
+static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2),
+ SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES),
+ SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_DEBUG_INTERRUPT_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ /* SE status registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE4),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE5)
+};
+
+static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = {
+ /* compute registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+};
+
+static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = {
+ /* gfx queue registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+};
+
+static const struct soc15_reg_golden golden_settings_gc_11_0[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
+{
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a)
+};
+
+#define DEFAULT_SH_MEM_CONFIG \
+ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
+ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
+ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
+
+static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev);
+static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev);
+static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev);
+static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev);
+static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev);
+static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev);
+static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev);
+static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info);
+static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev);
+static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance, int xcc_id);
+static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
+
+static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
+static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
+static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val);
+static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
+static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint8_t dst_sel);
+static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
+static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
+static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
+ bool enable);
+
+static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ u64 shader_mc_addr;
+
+ /* Cleaner shader MC address */
+ shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
+ amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
+ PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */
+ PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
+ amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
+ amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
+ amdgpu_ring_write(kiq_ring, 0); /* oac mask */
+ amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
+}
+
+static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring)
+{
+ uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+ uint64_t wptr_addr = ring->wptr_gpu_addr;
+ uint32_t me = 0, eng_sel = 0;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_COMPUTE:
+ me = 1;
+ eng_sel = 0;
+ break;
+ case AMDGPU_RING_TYPE_GFX:
+ me = 0;
+ eng_sel = 4;
+ break;
+ case AMDGPU_RING_TYPE_MES:
+ me = 2;
+ eng_sel = 5;
+ break;
+ default:
+ WARN_ON(1);
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
+ PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
+ PACKET3_MAP_QUEUES_ME((me)) |
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+ amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+}
+
+static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
+ amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
+ return;
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_UNMAP_QUEUES_ACTION(action) |
+ PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
+ PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
+ PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+
+ if (action == PREEMPT_QUEUES_NO_UNMAP) {
+ amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, seq);
+ } else {
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ }
+}
+
+static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ u64 addr,
+ u64 seq)
+{
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
+ PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
+ PACKET3_QUERY_STATUS_COMMAND(2));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
+ PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
+}
+
+static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
+}
+
+static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = {
+ .kiq_set_resources = gfx11_kiq_set_resources,
+ .kiq_map_queues = gfx11_kiq_map_queues,
+ .kiq_unmap_queues = gfx11_kiq_unmap_queues,
+ .kiq_query_status = gfx11_kiq_query_status,
+ .kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs,
+ .set_resources_size = 8,
+ .map_queues_size = 7,
+ .unmap_queues_size = 6,
+ .query_status_size = 7,
+ .invalidate_tlbs_size = 2,
+};
+
+static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.kiq[0].pmf = &gfx_v11_0_kiq_pm4_funcs;
+}
+
+static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_11_0_1,
+ (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
+ break;
+ default:
+ break;
+ }
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_11_0,
+ (const u32)ARRAY_SIZE(golden_settings_gc_11_0));
+
+}
+
+static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
+ bool wc, uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
+ WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
+ int mem_space, int opt, uint32_t addr0,
+ uint32_t addr1, uint32_t ref, uint32_t mask,
+ uint32_t inv)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+ amdgpu_ring_write(ring,
+ /* memory (1) or register (0) */
+ (WAIT_REG_MEM_MEM_SPACE(mem_space) |
+ WAIT_REG_MEM_OPERATION(opt) | /* wait */
+ WAIT_REG_MEM_FUNCTION(3) | /* equal */
+ WAIT_REG_MEM_ENGINE(eng_sel)));
+
+ if (mem_space)
+ BUG_ON(addr0 & 0x3); /* Dword align */
+ amdgpu_ring_write(ring, addr0);
+ amdgpu_ring_write(ring, addr1);
+ amdgpu_ring_write(ring, ref);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, inv); /* poll interval */
+}
+
+static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
+{
+ /* Header itself is a NOP packet */
+ if (num_nop == 1) {
+ amdgpu_ring_write(ring, ring->funcs->nop);
+ return;
+ }
+
+ /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
+ amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
+
+ /* Header is at index 0, followed by num_nops - 1 NOP packet's */
+ amdgpu_ring_insert_nop(ring, num_nop - 1);
+}
+
+static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ WREG32(scratch, 0xCAFEDEAD);
+ r = amdgpu_ring_alloc(ring, 5);
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
+ ring->idx, r);
+ return r;
+ }
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
+ gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
+ } else {
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+ amdgpu_ring_write(ring, scratch -
+ PACKET3_SET_UCONFIG_REG_START);
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ }
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(scratch);
+ if (tmp == 0xDEADBEEF)
+ break;
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ else
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+ return r;
+}
+
+static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ unsigned index;
+ uint64_t gpu_addr;
+ uint32_t *cpu_ptr;
+ long r;
+
+ /* MES KIQ fw hasn't indirect buffer support for now */
+ if (adev->enable_mes_kiq &&
+ ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ return 0;
+
+ memset(&ib, 0, sizeof(ib));
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+ cpu_ptr = &adev->wb.wb[index];
+
+ r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err1;
+ }
+
+ ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
+ ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
+ ib.ptr[2] = lower_32_bits(gpu_addr);
+ ib.ptr[3] = upper_32_bits(gpu_addr);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.length_dw = 5;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err2;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto err2;
+ } else if (r < 0) {
+ goto err2;
+ }
+
+ if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+err2:
+ amdgpu_ib_free(&ib, NULL);
+ dma_fence_put(f);
+err1:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+static void gfx_v11_0_free_microcode(struct amdgpu_device *adev)
+{
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+
+ kfree(adev->gfx.rlc.register_list_format);
+}
+
+static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix)
+{
+ const struct psp_firmware_header_v1_0 *toc_hdr;
+ int err = 0;
+
+ err = amdgpu_ucode_request(adev, &adev->psp.toc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_toc.bin", ucode_prefix);
+ if (err)
+ goto out;
+
+ toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
+ adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
+ adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
+ adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
+ adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
+ le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
+ return 0;
+out:
+ amdgpu_ucode_release(&adev->psp.toc_fw);
+ return err;
+}
+
+static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ if ((adev->gfx.me_fw_version >= 1505) &&
+ (adev->gfx.pfp_fw_version >= 1600) &&
+ (adev->gfx.mec_fw_version >= 512)) {
+ if (amdgpu_sriov_vf(adev))
+ adev->gfx.cp_gfx_shadow = true;
+ else
+ adev->gfx.cp_gfx_shadow = false;
+ }
+ break;
+ default:
+ adev->gfx.cp_gfx_shadow = false;
+ break;
+ }
+}
+
+static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
+{
+ char ucode_prefix[25];
+ int err;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ uint16_t version_major;
+ uint16_t version_minor;
+
+ DRM_DEBUG("\n");
+
+ amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_pfp.bin", ucode_prefix);
+ if (err)
+ goto out;
+ /* check pfp fw hdr version to decide if enable rs64 for gfx11.*/
+ adev->gfx.rs64_enable = amdgpu_ucode_hdr_version(
+ (union amdgpu_firmware_header *)
+ adev->gfx.pfp_fw->data, 2, 0);
+ if (adev->gfx.rs64_enable) {
+ dev_info(adev->dev, "CP RS64 enable\n");
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK);
+ } else {
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
+ }
+
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_me.bin", ucode_prefix);
+ if (err)
+ goto out;
+ if (adev->gfx.rs64_enable) {
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK);
+ } else {
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
+ }
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) &&
+ adev->pdev->revision == 0xCE)
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/gc_11_0_0_rlc_1.bin");
+ else if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc_kicker.bin", ucode_prefix);
+ else
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc.bin", ucode_prefix);
+ if (err)
+ goto out;
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+ version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+ err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
+ if (err)
+ goto out;
+ }
+
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", ucode_prefix);
+ if (err)
+ goto out;
+ if (adev->gfx.rs64_enable) {
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK);
+ } else {
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+ err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix);
+
+ /* only one MEC for gfx 11.0.0. */
+ adev->gfx.mec2_fw = NULL;
+
+ gfx_v11_0_check_fw_cp_gfx_shadow(adev);
+
+ if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) {
+ err = adev->gfx.imu.funcs->init_microcode(adev);
+ if (err)
+ DRM_ERROR("Failed to init imu firmware!\n");
+ return err;
+ }
+
+out:
+ if (err) {
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ }
+
+ return err;
+}
+
+static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
+{
+ u32 count = 0;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+
+ /* begin clear state */
+ count += 2;
+ /* context control state */
+ count += 3;
+
+ for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT)
+ count += 2 + ext->reg_count;
+ else
+ return 0;
+ }
+ }
+
+ /* set PA_SC_TILE_STEERING_OVERRIDE */
+ count += 3;
+ /* end clear state */
+ count += 2;
+ /* clear state */
+ count += 2;
+
+ return count;
+}
+
+static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
+{
+ u32 count = 0;
+ int ctx_reg_offset;
+
+ if (adev->gfx.rlc.cs_data == NULL)
+ return;
+ if (buffer == NULL)
+ return;
+
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
+
+ ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ buffer[count++] = cpu_to_le32(ctx_reg_offset);
+ buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
+
+ amdgpu_gfx_csb_preamble_end(buffer, count);
+}
+
+static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev)
+{
+ /* clear state block */
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
+ &adev->gfx.rlc.clear_state_gpu_addr,
+ (void **)&adev->gfx.rlc.cs_ptr);
+
+ /* jump table block */
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
+ &adev->gfx.rlc.cp_table_gpu_addr,
+ (void **)&adev->gfx.rlc.cp_table_ptr);
+}
+
+static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
+{
+ struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
+
+ reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
+ reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
+ reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
+ reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
+ reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
+ reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
+ reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
+ reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
+ adev->gfx.rlc.rlcg_reg_access_supported = true;
+}
+
+static int gfx_v11_0_rlc_init(struct amdgpu_device *adev)
+{
+ const struct cs_section_def *cs_data;
+ int r;
+
+ adev->gfx.rlc.cs_data = gfx11_cs_data;
+
+ cs_data = adev->gfx.rlc.cs_data;
+
+ if (cs_data) {
+ /* init clear state block */
+ r = amdgpu_gfx_rlc_init_csb(adev);
+ if (r)
+ return r;
+ }
+
+ /* init spm vmid with 0xf */
+ if (adev->gfx.rlc.funcs->update_spm_vmid)
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
+
+ return 0;
+}
+
+static void gfx_v11_0_mec_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
+}
+
+static void gfx_v11_0_me_init(struct amdgpu_device *adev)
+{
+ bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
+
+ amdgpu_gfx_graphics_queue_acquire(adev);
+}
+
+static int gfx_v11_0_mec_init(struct amdgpu_device *adev)
+{
+ int r;
+ u32 *hpd;
+ size_t mec_hpd_size;
+
+ bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+ /* take ownership of the relevant compute queues */
+ amdgpu_gfx_compute_queue_acquire(adev);
+ mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE;
+
+ if (mec_hpd_size) {
+ r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.hpd_eop_obj,
+ &adev->gfx.mec.hpd_eop_gpu_addr,
+ (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+ gfx_v11_0_mec_fini(adev);
+ return r;
+ }
+
+ memset(hpd, 0, mec_hpd_size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ }
+
+ return 0;
+}
+
+static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
+{
+ WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (address << SQ_IND_INDEX__INDEX__SHIFT));
+ return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
+}
+
+static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
+ uint32_t thread, uint32_t regno,
+ uint32_t num, uint32_t *out)
+{
+ WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (regno << SQ_IND_INDEX__INDEX__SHIFT) |
+ (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
+ (SQ_IND_INDEX__AUTO_INCR_MASK));
+ while (num--)
+ *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
+}
+
+static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+{
+ /* in gfx11 the SIMD_ID is specified as part of the INSTANCE
+ * field when performing a select_se_sh so it should be
+ * zero here */
+ WARN_ON(simd != 0);
+
+ /* type 3 wave data */
+ dst[(*no_fields)++] = 3;
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
+}
+
+static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t start,
+ uint32_t size, uint32_t *dst)
+{
+ WARN_ON(simd != 0);
+
+ wave_read_regs(
+ adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
+ dst);
+}
+
+static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t thread,
+ uint32_t start, uint32_t size,
+ uint32_t *dst)
+{
+ wave_read_regs(
+ adev, wave, thread,
+ start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
+}
+
+static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
+{
+ soc21_grbm_select(adev, me, pipe, q, vm);
+}
+
+/* all sizes are in bytes */
+#define MQD_SHADOW_BASE_SIZE 73728
+#define MQD_SHADOW_BASE_ALIGNMENT 256
+#define MQD_FWWORKAREA_SIZE 484
+#define MQD_FWWORKAREA_ALIGNMENT 256
+
+static void gfx_v11_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev,
+ struct amdgpu_gfx_shadow_info *shadow_info)
+{
+ shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
+ shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
+ shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
+ shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
+}
+
+static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev,
+ struct amdgpu_gfx_shadow_info *shadow_info,
+ bool skip_check)
+{
+ if (adev->gfx.cp_gfx_shadow || skip_check) {
+ gfx_v11_0_get_gfx_shadow_info_nocheck(adev, shadow_info);
+ return 0;
+ } else {
+ memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
+ return -ENOTSUPP;
+ }
+}
+
+static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
+ .get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter,
+ .select_se_sh = &gfx_v11_0_select_se_sh,
+ .read_wave_data = &gfx_v11_0_read_wave_data,
+ .read_wave_sgprs = &gfx_v11_0_read_wave_sgprs,
+ .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs,
+ .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q,
+ .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk,
+ .get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info,
+};
+
+static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ break;
+ case IP_VERSION(11, 0, 3):
+ adev->gfx.ras = &gfx_v11_0_3_ras;
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ break;
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300;
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
+ int me, int pipe, int queue)
+{
+ struct amdgpu_ring *ring;
+ unsigned int irq_type;
+ unsigned int hw_prio;
+
+ ring = &adev->gfx.gfx_ring[ring_id];
+
+ ring->me = me;
+ ring->pipe = pipe;
+ ring->queue = queue;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ if (adev->gfx.disable_kq) {
+ ring->no_scheduler = true;
+ ring->no_user_submission = true;
+ }
+
+ if (!ring_id)
+ ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+ else
+ ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
+ hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ?
+ AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ hw_prio, NULL);
+}
+
+static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+ int mec, int pipe, int queue)
+{
+ int r;
+ unsigned irq_type;
+ struct amdgpu_ring *ring;
+ unsigned int hw_prio;
+
+ ring = &adev->gfx.compute_ring[ring_id];
+
+ /* mec0 is me1 */
+ ring->me = mec + 1;
+ ring->pipe = pipe;
+ ring->queue = queue;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
+ ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+ + (ring_id * GFX11_MEC_HPD_SIZE);
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ + ring->pipe;
+ hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
+ AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ /* type-2 packets are deprecated on MEC, use type-3 instead */
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ hw_prio, NULL);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static struct {
+ SOC21_FIRMWARE_ID id;
+ unsigned int offset;
+ unsigned int size;
+} rlc_autoload_info[SOC21_FIRMWARE_ID_MAX];
+
+static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
+{
+ RLC_TABLE_OF_CONTENT *ucode = rlc_toc;
+
+ while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) &&
+ (ucode->id < SOC21_FIRMWARE_ID_MAX)) {
+ rlc_autoload_info[ucode->id].id = ucode->id;
+ rlc_autoload_info[ucode->id].offset = ucode->offset * 4;
+ rlc_autoload_info[ucode->id].size = ucode->size * 4;
+
+ ucode++;
+ }
+}
+
+static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev)
+{
+ uint32_t total_size = 0;
+ SOC21_FIRMWARE_ID id;
+
+ gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
+
+ for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++)
+ total_size += rlc_autoload_info[id].size;
+
+ /* In case the offset in rlc toc ucode is aligned */
+ if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset)
+ total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size;
+
+ return total_size;
+}
+
+static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
+{
+ int r;
+ uint32_t total_size;
+
+ total_size = gfx_v11_0_calc_toc_total_size(adev);
+
+ r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.rlc.rlc_autoload_bo,
+ &adev->gfx.rlc.rlc_autoload_gpu_addr,
+ (void **)&adev->gfx.rlc.rlc_autoload_ptr);
+
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
+ SOC21_FIRMWARE_ID id,
+ const void *fw_data,
+ uint32_t fw_size,
+ uint32_t *fw_autoload_mask)
+{
+ uint32_t toc_offset;
+ uint32_t toc_fw_size;
+ char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
+
+ if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX)
+ return;
+
+ toc_offset = rlc_autoload_info[id].offset;
+ toc_fw_size = rlc_autoload_info[id].size;
+
+ if (fw_size == 0)
+ fw_size = toc_fw_size;
+
+ if (fw_size > toc_fw_size)
+ fw_size = toc_fw_size;
+
+ memcpy(ptr + toc_offset, fw_data, fw_size);
+
+ if (fw_size < toc_fw_size)
+ memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
+
+ if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME))
+ *(uint64_t *)fw_autoload_mask |= 1ULL << id;
+}
+
+static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev,
+ uint32_t *fw_autoload_mask)
+{
+ void *data;
+ uint32_t size;
+ uint64_t *toc_ptr;
+
+ *(uint64_t *)fw_autoload_mask |= 0x1;
+
+ DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask);
+
+ data = adev->psp.toc.start_addr;
+ size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size;
+
+ toc_ptr = (uint64_t *)data + size / 8 - 1;
+ *toc_ptr = *(uint64_t *)fw_autoload_mask;
+
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC,
+ data, size, fw_autoload_mask);
+}
+
+static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev,
+ uint32_t *fw_autoload_mask)
+{
+ const __le32 *fw_data;
+ uint32_t fw_size;
+ const struct gfx_firmware_header_v1_0 *cp_hdr;
+ const struct gfx_firmware_header_v2_0 *cpv2_hdr;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
+ uint16_t version_major, version_minor;
+
+ if (adev->gfx.rs64_enable) {
+ /* pfp ucode */
+ cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+ /* instruction */
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP,
+ fw_data, fw_size, fw_autoload_mask);
+ /* data */
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK,
+ fw_data, fw_size, fw_autoload_mask);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK,
+ fw_data, fw_size, fw_autoload_mask);
+ /* me ucode */
+ cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+ /* instruction */
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME,
+ fw_data, fw_size, fw_autoload_mask);
+ /* data */
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK,
+ fw_data, fw_size, fw_autoload_mask);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK,
+ fw_data, fw_size, fw_autoload_mask);
+ /* mec ucode */
+ cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+ /* instruction */
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC,
+ fw_data, fw_size, fw_autoload_mask);
+ /* data */
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK,
+ fw_data, fw_size, fw_autoload_mask);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK,
+ fw_data, fw_size, fw_autoload_mask);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK,
+ fw_data, fw_size, fw_autoload_mask);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK,
+ fw_data, fw_size, fw_autoload_mask);
+ } else {
+ /* pfp ucode */
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.pfp_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP,
+ fw_data, fw_size, fw_autoload_mask);
+
+ /* me ucode */
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.me_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME,
+ fw_data, fw_size, fw_autoload_mask);
+
+ /* mec ucode */
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec_fw->data;
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
+ cp_hdr->jt_size * 4;
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC,
+ fw_data, fw_size, fw_autoload_mask);
+ }
+
+ /* rlc ucode */
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
+ adev->gfx.rlc_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE,
+ fw_data, fw_size, fw_autoload_mask);
+
+ version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+ version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+ if (version_major == 2) {
+ if (version_minor >= 2) {
+ rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE,
+ fw_data, fw_size, fw_autoload_mask);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT,
+ fw_data, fw_size, fw_autoload_mask);
+ }
+ }
+}
+
+static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev,
+ uint32_t *fw_autoload_mask)
+{
+ const __le32 *fw_data;
+ uint32_t fw_size;
+ const struct sdma_firmware_header_v2_0 *sdma_hdr;
+
+ sdma_hdr = (const struct sdma_firmware_header_v2_0 *)
+ adev->sdma.instance[0].fw->data;
+ fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
+ le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes);
+
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
+ SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask);
+
+ fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
+ le32_to_cpu(sdma_hdr->ctl_ucode_offset));
+ fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes);
+
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
+ SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask);
+}
+
+static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev,
+ uint32_t *fw_autoload_mask)
+{
+ const __le32 *fw_data;
+ unsigned fw_size;
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ int pipe, ucode_id, data_id;
+
+ for (pipe = 0; pipe < 2; pipe++) {
+ if (pipe==0) {
+ ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0;
+ data_id = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK;
+ } else {
+ ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1;
+ data_id = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK;
+ }
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw[pipe]->data;
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
+
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
+ ucode_id, fw_data, fw_size, fw_autoload_mask);
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
+
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
+ data_id, fw_data, fw_size, fw_autoload_mask);
+ }
+}
+
+static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
+{
+ uint32_t rlc_g_offset, rlc_g_size;
+ uint64_t gpu_addr;
+ uint32_t autoload_fw_id[2];
+
+ memset(autoload_fw_id, 0, sizeof(uint32_t) * 2);
+
+ /* RLC autoload sequence 2: copy ucode */
+ gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id);
+ gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id);
+ gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id);
+ gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id);
+
+ rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset;
+ rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size;
+ gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
+
+ /* RLC autoload sequence 3: load IMU fw */
+ if (adev->gfx.imu.funcs->load_microcode)
+ adev->gfx.imu.funcs->load_microcode(adev);
+ /* RLC autoload sequence 4 init IMU fw */
+ if (adev->gfx.imu.funcs->setup_imu)
+ adev->gfx.imu.funcs->setup_imu(adev);
+ if (adev->gfx.imu.funcs->start_imu)
+ adev->gfx.imu.funcs->start_imu(adev);
+
+ /* RLC autoload sequence 5 disable gpa mode */
+ gfx_v11_0_disable_gpa_mode(adev);
+
+ return 0;
+}
+
+static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev)
+{
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
+ uint32_t *ptr;
+ uint32_t inst;
+
+ ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
+ adev->gfx.ip_dump_core = NULL;
+ } else {
+ adev->gfx.ip_dump_core = ptr;
+ }
+
+ /* Allocate memory for compute queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
+ inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
+ adev->gfx.ip_dump_compute_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_compute_queues = ptr;
+ }
+
+ /* Allocate memory for gfx queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
+ inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
+ adev->gfx.me.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n");
+ adev->gfx.ip_dump_gfx_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_gfx_queues = ptr;
+ }
+}
+
+static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ int i, j, k, r, ring_id;
+ int xcc_id = 0;
+ struct amdgpu_device *adev = ip_block->adev;
+ int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
+
+ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ adev->gfx.me.num_me = 1;
+ adev->gfx.me.num_pipe_per_me = 1;
+ adev->gfx.me.num_queue_per_pipe = 2;
+ adev->gfx.mec.num_mec = 1;
+ adev->gfx.mec.num_pipe_per_mec = 4;
+ adev->gfx.mec.num_queue_per_pipe = 4;
+ break;
+ default:
+ adev->gfx.me.num_me = 1;
+ adev->gfx.me.num_pipe_per_me = 1;
+ adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.mec.num_mec = 1;
+ adev->gfx.mec.num_pipe_per_mec = 4;
+ adev->gfx.mec.num_queue_per_pipe = 8;
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ if (!adev->gfx.disable_uq &&
+ adev->gfx.me_fw_version >= 2420 &&
+ adev->gfx.pfp_fw_version >= 2580 &&
+ adev->gfx.mec_fw_version >= 2650 &&
+ adev->mes.fw_version[0] >= 120) {
+ adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
+ adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
+ }
+ break;
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ /* add firmware version checks here */
+ if (0 && !adev->gfx.disable_uq) {
+ adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
+ adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
+ }
+ break;
+ default:
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 2280 &&
+ adev->gfx.pfp_fw_version >= 2370 &&
+ adev->gfx.mec_fw_version >= 2450 &&
+ adev->mes.fw_version[0] >= 99) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+ if (adev->gfx.pfp_fw_version >= 102 &&
+ adev->gfx.mec_fw_version >= 66 &&
+ adev->mes.fw_version[0] >= 128) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+ if (adev->gfx.mec_fw_version >= 26 &&
+ adev->mes.fw_version[0] >= 114) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(11, 5, 2):
+ adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 12 &&
+ adev->gfx.pfp_fw_version >= 15 &&
+ adev->gfx.mec_fw_version >= 15) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(11, 5, 3):
+ adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 7 &&
+ adev->gfx.pfp_fw_version >= 8 &&
+ adev->gfx.mec_fw_version >= 8) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ default:
+ adev->gfx.enable_cleaner_shader = false;
+ break;
+ }
+
+ /* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3) &&
+ amdgpu_sriov_is_pp_one_vf(adev))
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG;
+
+ /* EOP Event */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
+ &adev->gfx.eop_irq);
+ if (r)
+ return r;
+
+ /* Bad opcode Event */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR,
+ &adev->gfx.bad_op_irq);
+ if (r)
+ return r;
+
+ /* Privileged reg */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
+ &adev->gfx.priv_reg_irq);
+ if (r)
+ return r;
+
+ /* Privileged inst */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT,
+ &adev->gfx.priv_inst_irq);
+ if (r)
+ return r;
+
+ /* FED error */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
+ GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT,
+ &adev->gfx.rlc_gc_fed_irq);
+ if (r)
+ return r;
+
+ adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
+
+ gfx_v11_0_me_init(adev);
+
+ r = gfx_v11_0_rlc_init(adev);
+ if (r) {
+ DRM_ERROR("Failed to init rlc BOs!\n");
+ return r;
+ }
+
+ r = gfx_v11_0_mec_init(adev);
+ if (r) {
+ DRM_ERROR("Failed to init MEC BOs!\n");
+ return r;
+ }
+
+ if (adev->gfx.num_gfx_rings) {
+ ring_id = 0;
+ /* set up the gfx ring */
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
+ if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
+ continue;
+
+ r = gfx_v11_0_gfx_ring_init(adev, ring_id,
+ i, k, j);
+ if (r)
+ return r;
+ ring_id++;
+ }
+ }
+ }
+ }
+
+ if (adev->gfx.num_compute_rings) {
+ ring_id = 0;
+ /* set up the compute queues - allocate horizontally across pipes */
+ for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+ for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
+ k, j))
+ continue;
+
+ r = gfx_v11_0_compute_ring_init(adev, ring_id,
+ i, k, j);
+ if (r)
+ return r;
+
+ ring_id++;
+ }
+ }
+ }
+ }
+
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ if ((adev->gfx.me_fw_version >= 2280) &&
+ (adev->gfx.mec_fw_version >= 2410) &&
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ }
+ break;
+ default:
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ }
+ break;
+ }
+
+ if (!adev->enable_mes_kiq) {
+ r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0);
+ if (r) {
+ DRM_ERROR("Failed to init KIQ BOs!\n");
+ return r;
+ }
+
+ r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd), 0);
+ if (r)
+ return r;
+
+ /* allocate visible FB for rlc auto-loading fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ r = gfx_v11_0_rlc_autoload_buffer_init(adev);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v11_0_gpu_early_init(adev);
+ if (r)
+ return r;
+
+ if (amdgpu_gfx_ras_sw_init(adev)) {
+ dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
+ return -EINVAL;
+ }
+
+ gfx_v11_0_alloc_ip_dump(adev);
+
+ r = amdgpu_gfx_sysfs_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
+ &adev->gfx.pfp.pfp_fw_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_ptr);
+
+ amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
+ &adev->gfx.pfp.pfp_fw_data_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
+}
+
+static void gfx_v11_0_me_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
+ &adev->gfx.me.me_fw_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_ptr);
+
+ amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
+ &adev->gfx.me.me_fw_data_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_data_ptr);
+}
+
+static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
+ &adev->gfx.rlc.rlc_autoload_gpu_addr,
+ (void **)&adev->gfx.rlc.rlc_autoload_ptr);
+}
+
+static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ int i;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
+
+ amdgpu_gfx_mqd_sw_fini(adev, 0);
+
+ if (!adev->enable_mes_kiq) {
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
+ amdgpu_gfx_kiq_fini(adev, 0);
+ }
+
+ amdgpu_gfx_cleaner_shader_sw_fini(adev);
+
+ gfx_v11_0_pfp_fini(adev);
+ gfx_v11_0_me_fini(adev);
+ gfx_v11_0_rlc_fini(adev);
+ gfx_v11_0_mec_fini(adev);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+ gfx_v11_0_rlc_autoload_buffer_fini(adev);
+
+ gfx_v11_0_free_microcode(adev);
+
+ amdgpu_gfx_sysfs_fini(adev);
+
+ kfree(adev->gfx.ip_dump_core);
+ kfree(adev->gfx.ip_dump_compute_queues);
+ kfree(adev->gfx.ip_dump_gfx_queues);
+
+ return 0;
+}
+
+static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance, int xcc_id)
+{
+ u32 data;
+
+ if (instance == 0xffffffff)
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
+ instance);
+
+ if (se_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
+
+ if (sh_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
+
+ WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
+}
+
+static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev)
+{
+ u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask;
+
+ gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE);
+ gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask,
+ CC_GC_SA_UNIT_DISABLE,
+ SA_DISABLE);
+ gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGC_USER_SA_UNIT_DISABLE);
+ gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask,
+ GC_USER_SA_UNIT_DISABLE,
+ SA_DISABLE);
+ sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
+ adev->gfx.config.max_shader_engines);
+
+ return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask));
+}
+
+static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
+{
+ u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask;
+ u32 rb_mask;
+
+ gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
+ gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask,
+ CC_RB_BACKEND_DISABLE,
+ BACKEND_DISABLE);
+ gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
+ gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask,
+ GC_USER_RB_BACKEND_DISABLE,
+ BACKEND_DISABLE);
+ rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se *
+ adev->gfx.config.max_shader_engines);
+
+ return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask));
+}
+
+static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
+{
+ u32 rb_bitmap_per_sa;
+ u32 rb_bitmap_width_per_sa;
+ u32 max_sa;
+ u32 active_sa_bitmap;
+ u32 global_active_rb_bitmap;
+ u32 active_rb_bitmap = 0;
+ u32 i;
+
+ /* query sa bitmap from SA_UNIT_DISABLE registers */
+ active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev);
+ /* query rb bitmap from RB_BACKEND_DISABLE registers */
+ global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev);
+
+ /* generate active rb bitmap according to active sa bitmap */
+ max_sa = adev->gfx.config.max_shader_engines *
+ adev->gfx.config.max_sh_per_se;
+ rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se;
+ rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa);
+
+ for (i = 0; i < max_sa; i++) {
+ if (active_sa_bitmap & (1 << i))
+ active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa));
+ }
+
+ active_rb_bitmap &= global_active_rb_bitmap;
+ adev->gfx.config.backend_enable_mask = active_rb_bitmap;
+ adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
+}
+
+#define DEFAULT_SH_MEM_BASES (0x6000)
+#define LDS_APP_BASE 0x1
+#define SCRATCH_APP_BASE 0x2
+
+static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev)
+{
+ int i;
+ uint32_t sh_mem_bases;
+ uint32_t data;
+
+ /*
+ * Configure apertures:
+ * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
+ * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
+ * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
+ */
+ sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
+ SCRATCH_APP_BASE;
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ soc21_grbm_select(adev, 0, 0, 0, i);
+ /* CP and shaders */
+ WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
+ WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
+
+ /* Enable trap for each kfd vmid. */
+ data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data);
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /*
+ * Initialize all compute VMIDs to have no GDS, GWS, or OA
+ * access. These should be enabled by FW for target VMIDs.
+ */
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0);
+ }
+}
+
+static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev)
+{
+ int vmid;
+
+ /*
+ * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
+ * access. Compute VMIDs should be enabled by FW for target VMIDs,
+ * the driver can enable them for graphics. VMID0 should maintain
+ * access so that HWS firmware can save/restore entries.
+ */
+ for (vmid = 1; vmid < 16; vmid++) {
+ WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0);
+ }
+}
+
+static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev)
+{
+ /* TODO: harvest feature to be added later. */
+}
+
+static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev)
+{
+ /* TCCs are global (not instanced). */
+ uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) |
+ RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE);
+
+ adev->gfx.config.tcc_disabled_mask =
+ REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) |
+ (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16);
+}
+
+static void gfx_v11_0_constants_init(struct amdgpu_device *adev)
+{
+ u32 tmp;
+ int i;
+
+ if (!amdgpu_sriov_vf(adev))
+ WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+
+ gfx_v11_0_setup_rb(adev);
+ gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info);
+ gfx_v11_0_get_tcc_info(adev);
+ adev->gfx.config.pa_sc_tile_steering_override = 0;
+
+ /* Set whether texture coordinate truncation is conformant. */
+ tmp = RREG32_SOC15(GC, 0, regTA_CNTL2);
+ adev->gfx.config.ta_cntl2_truncate_coord_mode =
+ REG_GET_FIELD(tmp, TA_CNTL2, TRUNCATE_COORD_MODE);
+
+ /* XXX SH_MEM regs */
+ /* where to put LDS, scratch, GPUVM in FSA64 space */
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
+ soc21_grbm_select(adev, 0, 0, 0, i);
+ /* CP and shaders */
+ WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
+ if (i != 0) {
+ tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
+ (adev->gmc.private_aperture_start >> 48));
+ tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
+ (adev->gmc.shared_aperture_start >> 48));
+ WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
+ }
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+
+ mutex_unlock(&adev->srbm_mutex);
+
+ gfx_v11_0_init_compute_vmid(adev);
+ gfx_v11_0_init_gds_vmid(adev);
+}
+
+static u32 gfx_v11_0_get_cpg_int_cntl(struct amdgpu_device *adev,
+ int me, int pipe)
+{
+ if (me != 0)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
+ case 1:
+ return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
+ default:
+ return 0;
+ }
+}
+
+static u32 gfx_v11_0_get_cpc_int_cntl(struct amdgpu_device *adev,
+ int me, int pipe)
+{
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+ if (me != 1)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
+ case 1:
+ return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
+ case 2:
+ return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
+ case 3:
+ return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
+ default:
+ return 0;
+ }
+}
+
+static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp, cp_int_cntl_reg;
+ int i, j;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
+ enable ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp);
+ }
+ }
+ }
+}
+
+static int gfx_v11_0_init_csb(struct amdgpu_device *adev)
+{
+ adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
+
+ WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
+ adev->gfx.rlc.clear_state_gpu_addr >> 32);
+ WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
+ adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
+ WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
+
+ return 0;
+}
+
+static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev)
+{
+ u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
+ WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
+}
+
+static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev)
+{
+ WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
+ udelay(50);
+ WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
+ udelay(50);
+}
+
+static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t rlc_pg_cntl;
+
+ rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
+
+ if (!enable) {
+ /* RLC_PG_CNTL[23] = 0 (default)
+ * RLC will wait for handshake acks with SMU
+ * GFXOFF will be enabled
+ * RLC_PG_CNTL[23] = 1
+ * RLC will not issue any message to SMU
+ * hence no handshake between SMU & RLC
+ * GFXOFF will be disabled
+ */
+ rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
+ } else
+ rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
+ WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
+}
+
+static void gfx_v11_0_rlc_start(struct amdgpu_device *adev)
+{
+ /* TODO: enable rlc & smu handshake until smu
+ * and gfxoff feature works as expected */
+ if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
+ gfx_v11_0_rlc_smu_handshake_cntl(adev, false);
+
+ WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
+ udelay(50);
+}
+
+static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* enable Save Restore Machine */
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
+ tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
+ tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
+ WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
+}
+
+static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_0 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
+ RLCG_UCODE_LOADING_START_ADDRESS);
+
+ for (i = 0; i < fw_size; i++)
+ WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
+ le32_to_cpup(fw_data++));
+
+ WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
+}
+
+static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_2 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+ u32 tmp;
+
+ hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
+
+ for (i = 0; i < fw_size; i++) {
+ if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
+ msleep(1);
+ WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
+ le32_to_cpup(fw_data++));
+ }
+
+ WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
+ for (i = 0; i < fw_size; i++) {
+ if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
+ msleep(1);
+ WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
+ le32_to_cpup(fw_data++));
+ }
+
+ WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
+
+ tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
+ tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
+ WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
+}
+
+static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_3 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+ u32 tmp;
+
+ hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->rlcp_ucode_offset_bytes));
+ fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0);
+
+ for (i = 0; i < fw_size; i++) {
+ if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
+ msleep(1);
+ WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA,
+ le32_to_cpup(fw_data++));
+ }
+
+ WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version);
+
+ tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
+ tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->rlcv_ucode_offset_bytes));
+ fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0);
+
+ for (i = 0; i < fw_size; i++) {
+ if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
+ msleep(1);
+ WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA,
+ le32_to_cpup(fw_data++));
+ }
+
+ WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version);
+
+ tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL);
+ tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1);
+ WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp);
+}
+
+static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_0 *hdr;
+ uint16_t version_major;
+ uint16_t version_minor;
+
+ if (!adev->gfx.rlc_fw)
+ return -EINVAL;
+
+ hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ amdgpu_ucode_print_rlc_hdr(&hdr->header);
+
+ version_major = le16_to_cpu(hdr->header.header_version_major);
+ version_minor = le16_to_cpu(hdr->header.header_version_minor);
+
+ if (version_major == 2) {
+ gfx_v11_0_load_rlcg_microcode(adev);
+ if (amdgpu_dpm == 1) {
+ if (version_minor >= 2)
+ gfx_v11_0_load_rlc_iram_dram_microcode(adev);
+ if (version_minor == 3)
+ gfx_v11_0_load_rlcp_rlcv_microcode(adev);
+ }
+
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ gfx_v11_0_init_csb(adev);
+
+ if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
+ gfx_v11_0_rlc_enable_srm(adev);
+ } else {
+ if (amdgpu_sriov_vf(adev)) {
+ gfx_v11_0_init_csb(adev);
+ return 0;
+ }
+
+ adev->gfx.rlc.funcs->stop(adev);
+
+ /* disable CG */
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
+
+ /* disable PG */
+ WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ /* legacy rlc firmware loading */
+ r = gfx_v11_0_rlc_load_microcode(adev);
+ if (r)
+ return r;
+ }
+
+ gfx_v11_0_init_csb(adev);
+
+ adev->gfx.rlc.funcs->start(adev);
+ }
+ return 0;
+}
+
+static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr)
+{
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+ uint32_t tmp;
+ int i;
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ if (amdgpu_emu_mode == 1)
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
+
+ /* Program me ucode address into intruction cache address register */
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
+ lower_32_bits(addr) & 0xFFFFF000);
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
+ upper_32_bits(addr));
+
+ return 0;
+}
+
+static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr)
+{
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+ uint32_t tmp;
+ int i;
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ if (amdgpu_emu_mode == 1)
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
+
+ /* Program pfp ucode address into intruction cache address register */
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
+ lower_32_bits(addr) & 0xFFFFF000);
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
+ upper_32_bits(addr));
+
+ return 0;
+}
+
+static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr)
+{
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+ uint32_t tmp;
+ int i;
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ if (amdgpu_emu_mode == 1)
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
+
+ /* Program mec1 ucode address into intruction cache address register */
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
+ lower_32_bits(addr) & 0xFFFFF000);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
+ upper_32_bits(addr));
+
+ return 0;
+}
+
+static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
+{
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+ uint32_t tmp;
+ unsigned i, pipe_id;
+ const struct gfx_firmware_header_v2_0 *pfp_hdr;
+
+ pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
+ lower_32_bits(addr));
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
+ upper_32_bits(addr));
+
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
+
+ /*
+ * Programming any of the CP_PFP_IC_BASE registers
+ * forces invalidation of the ME L1 I$. Wait for the
+ * invalidation complete
+ */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Prime the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
+ /* Waiting for cache primed*/
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+ ICACHE_PRIMED))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to prime instruction cache\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+ soc21_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
+ (pfp_hdr->ucode_start_addr_hi << 30) |
+ (pfp_hdr->ucode_start_addr_lo >> 2));
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
+ pfp_hdr->ucode_start_addr_hi >> 2);
+
+ /*
+ * Program CP_ME_CNTL to reset given PIPE to take
+ * effect of CP_PFP_PRGRM_CNTR_START.
+ */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* Clear pfp pipe0 reset bit. */
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 0);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
+ lower_32_bits(addr2));
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
+ upper_32_bits(addr2));
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
+
+ /* Invalidate the data caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
+
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
+{
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+ uint32_t tmp;
+ unsigned i, pipe_id;
+ const struct gfx_firmware_header_v2_0 *me_hdr;
+
+ me_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
+ lower_32_bits(addr));
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
+ upper_32_bits(addr));
+
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
+
+ /*
+ * Programming any of the CP_ME_IC_BASE registers
+ * forces invalidation of the ME L1 I$. Wait for the
+ * invalidation complete
+ */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Prime the instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
+
+ /* Waiting for instruction cache primed*/
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+ ICACHE_PRIMED))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to prime instruction cache\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+ soc21_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
+ (me_hdr->ucode_start_addr_hi << 30) |
+ (me_hdr->ucode_start_addr_lo >> 2) );
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
+ me_hdr->ucode_start_addr_hi>>2);
+
+ /*
+ * Program CP_ME_CNTL to reset given PIPE to take
+ * effect of CP_PFP_PRGRM_CNTR_START.
+ */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE0_RESET, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* Clear pfp pipe0 reset bit. */
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE0_RESET, 0);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
+ lower_32_bits(addr2));
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
+ upper_32_bits(addr2));
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
+
+ /* Invalidate the data caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
+
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
+{
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+ uint32_t tmp;
+ unsigned i;
+ const struct gfx_firmware_header_v2_0 *mec_hdr;
+
+ mec_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+ soc21_grbm_select(adev, 1, i, 0, 0);
+
+ WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2);
+ WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
+ upper_32_bits(addr2));
+
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
+ mec_hdr->ucode_start_addr_lo >> 2 |
+ mec_hdr->ucode_start_addr_hi << 30);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
+ mec_hdr->ucode_start_addr_hi >> 2);
+
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
+ upper_32_bits(addr));
+ }
+ mutex_unlock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v2_0 *pfp_hdr;
+ const struct gfx_firmware_header_v2_0 *me_hdr;
+ const struct gfx_firmware_header_v2_0 *mec_hdr;
+ uint32_t pipe_id, tmp;
+
+ mec_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+ me_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+ pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+
+ /* config pfp program start addr */
+ for (pipe_id = 0; pipe_id < 2; pipe_id++) {
+ soc21_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
+ (pfp_hdr->ucode_start_addr_hi << 30) |
+ (pfp_hdr->ucode_start_addr_lo >> 2));
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
+ pfp_hdr->ucode_start_addr_hi >> 2);
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+
+ /* reset pfp pipe */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* clear pfp pipe reset */
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* config me program start addr */
+ for (pipe_id = 0; pipe_id < 2; pipe_id++) {
+ soc21_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
+ (me_hdr->ucode_start_addr_hi << 30) |
+ (me_hdr->ucode_start_addr_lo >> 2) );
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
+ me_hdr->ucode_start_addr_hi>>2);
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+
+ /* reset me pipe */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* clear me pipe reset */
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* config mec program start addr */
+ for (pipe_id = 0; pipe_id < 4; pipe_id++) {
+ soc21_grbm_select(adev, 1, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
+ mec_hdr->ucode_start_addr_lo >> 2 |
+ mec_hdr->ucode_start_addr_hi << 30);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
+ mec_hdr->ucode_start_addr_hi >> 2);
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+
+ /* reset mec pipe */
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
+
+ /* clear mec pipe reset */
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
+}
+
+static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
+{
+ uint32_t cp_status;
+ uint32_t bootload_status;
+ int i, r;
+ uint64_t addr, addr2;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(11, 0, 1) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(11, 0, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 3))
+ bootload_status = RREG32_SOC15(GC, 0,
+ regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
+ else
+ bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
+
+ if ((cp_status == 0) &&
+ (REG_GET_FIELD(bootload_status,
+ RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
+ break;
+ }
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
+ return -ETIMEDOUT;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ if (adev->gfx.rs64_enable) {
+ addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset;
+ addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset;
+ r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2);
+ if (r)
+ return r;
+ addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset;
+ addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset;
+ r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2);
+ if (r)
+ return r;
+ addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset;
+ addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset;
+ r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2);
+ if (r)
+ return r;
+ } else {
+ addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset;
+ r = gfx_v11_0_config_me_cache(adev, addr);
+ if (r)
+ return r;
+ addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset;
+ r = gfx_v11_0_config_pfp_cache(adev, addr);
+ if (r)
+ return r;
+ addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset;
+ r = gfx_v11_0_config_mec_cache(adev, addr);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
+{
+ int i;
+ u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
+
+ return 0;
+}
+
+static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
+{
+ int r;
+ const struct gfx_firmware_header_v1_0 *pfp_hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.pfp_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
+
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.pfp.pfp_fw_obj,
+ &adev->gfx.pfp.pfp_fw_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r);
+ gfx_v11_0_pfp_fini(adev);
+ return r;
+ }
+
+ memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
+
+ gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr);
+
+ WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0);
+
+ for (i = 0; i < pfp_hdr->jt_size; i++)
+ WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA,
+ le32_to_cpup(fw_data + pfp_hdr->jt_offset + i));
+
+ WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
+
+ return 0;
+}
+
+static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
+{
+ int r;
+ const struct gfx_firmware_header_v2_0 *pfp_hdr;
+ const __le32 *fw_ucode, *fw_data;
+ unsigned i, pipe_id, fw_ucode_size, fw_data_size;
+ uint32_t tmp;
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+
+ pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
+
+ /* instruction */
+ fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(pfp_hdr->ucode_offset_bytes));
+ fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
+ /* data */
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(pfp_hdr->data_offset_bytes));
+ fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
+
+ /* 64kb align */
+ r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
+ 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.pfp.pfp_fw_obj,
+ &adev->gfx.pfp.pfp_fw_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
+ gfx_v11_0_pfp_fini(adev);
+ return r;
+ }
+
+ r = amdgpu_bo_create_reserved(adev, fw_data_size,
+ 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.pfp.pfp_fw_data_obj,
+ &adev->gfx.pfp.pfp_fw_data_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
+ gfx_v11_0_pfp_fini(adev);
+ return r;
+ }
+
+ memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
+ memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
+
+ amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
+ amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
+ amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
+
+ if (amdgpu_emu_mode == 1)
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
+ lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
+ upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
+
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
+
+ /*
+ * Programming any of the CP_PFP_IC_BASE registers
+ * forces invalidation of the ME L1 I$. Wait for the
+ * invalidation complete
+ */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Prime the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
+ /* Waiting for cache primed*/
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+ ICACHE_PRIMED))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to prime instruction cache\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+ soc21_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
+ (pfp_hdr->ucode_start_addr_hi << 30) |
+ (pfp_hdr->ucode_start_addr_lo >> 2) );
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
+ pfp_hdr->ucode_start_addr_hi>>2);
+
+ /*
+ * Program CP_ME_CNTL to reset given PIPE to take
+ * effect of CP_PFP_PRGRM_CNTR_START.
+ */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* Clear pfp pipe0 reset bit. */
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 0);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
+ lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
+ upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
+
+ /* Invalidate the data caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
+
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
+{
+ int r;
+ const struct gfx_firmware_header_v1_0 *me_hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ me_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.me_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
+
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.me.me_fw_obj,
+ &adev->gfx.me.me_fw_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create me fw bo\n", r);
+ gfx_v11_0_me_fini(adev);
+ return r;
+ }
+
+ memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
+
+ gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr);
+
+ WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0);
+
+ for (i = 0; i < me_hdr->jt_size; i++)
+ WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA,
+ le32_to_cpup(fw_data + me_hdr->jt_offset + i));
+
+ WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version);
+
+ return 0;
+}
+
+static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
+{
+ int r;
+ const struct gfx_firmware_header_v2_0 *me_hdr;
+ const __le32 *fw_ucode, *fw_data;
+ unsigned i, pipe_id, fw_ucode_size, fw_data_size;
+ uint32_t tmp;
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+
+ me_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
+
+ /* instruction */
+ fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(me_hdr->ucode_offset_bytes));
+ fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
+ /* data */
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(me_hdr->data_offset_bytes));
+ fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
+
+ /* 64kb align*/
+ r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
+ 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.me.me_fw_obj,
+ &adev->gfx.me.me_fw_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
+ gfx_v11_0_me_fini(adev);
+ return r;
+ }
+
+ r = amdgpu_bo_create_reserved(adev, fw_data_size,
+ 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.me.me_fw_data_obj,
+ &adev->gfx.me.me_fw_data_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_data_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
+ gfx_v11_0_pfp_fini(adev);
+ return r;
+ }
+
+ memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
+ memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
+
+ amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
+ amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
+ amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
+
+ if (amdgpu_emu_mode == 1)
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
+ lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
+ upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
+
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
+
+ /*
+ * Programming any of the CP_ME_IC_BASE registers
+ * forces invalidation of the ME L1 I$. Wait for the
+ * invalidation complete
+ */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Prime the instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
+
+ /* Waiting for instruction cache primed*/
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+ ICACHE_PRIMED))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to prime instruction cache\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+ soc21_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
+ (me_hdr->ucode_start_addr_hi << 30) |
+ (me_hdr->ucode_start_addr_lo >> 2) );
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
+ me_hdr->ucode_start_addr_hi>>2);
+
+ /*
+ * Program CP_ME_CNTL to reset given PIPE to take
+ * effect of CP_PFP_PRGRM_CNTR_START.
+ */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE0_RESET, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* Clear pfp pipe0 reset bit. */
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE0_RESET, 0);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
+ lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
+ upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
+
+ /* Invalidate the data caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
+
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
+ return -EINVAL;
+
+ gfx_v11_0_cp_gfx_enable(adev, false);
+
+ if (adev->gfx.rs64_enable)
+ r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev);
+ else
+ r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
+ return r;
+ }
+
+ if (adev->gfx.rs64_enable)
+ r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev);
+ else
+ r = gfx_v11_0_cp_gfx_load_me_microcode(adev);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to load me fw\n", r);
+ return r;
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+ int r, i;
+ int ctx_reg_offset;
+
+ /* init the CP */
+ WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
+ adev->gfx.config.max_hw_contexts - 1);
+ WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
+
+ if (!amdgpu_async_gfx_ring)
+ gfx_v11_0_cp_gfx_enable(adev, true);
+
+ ring = &adev->gfx.gfx_ring[0];
+ r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev));
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ return r;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, 0x80000000);
+ amdgpu_ring_write(ring, 0x80000000);
+
+ for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT) {
+ amdgpu_ring_write(ring,
+ PACKET3(PACKET3_SET_CONTEXT_REG,
+ ext->reg_count));
+ amdgpu_ring_write(ring, ext->reg_index -
+ PACKET3_SET_CONTEXT_REG_START);
+ for (i = 0; i < ext->reg_count; i++)
+ amdgpu_ring_write(ring, ext->extent[i]);
+ }
+ }
+ }
+
+ ctx_reg_offset =
+ SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ amdgpu_ring_write(ring, ctx_reg_offset);
+ amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_commit(ring);
+
+ /* submit cs packet to copy state 0 to next available state */
+ if (adev->gfx.num_gfx_rings > 1) {
+ /* maximum supported gfx ring is 2 */
+ ring = &adev->gfx.gfx_ring[1];
+ r = amdgpu_ring_alloc(ring, 2);
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ return r;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_commit(ring);
+ }
+ return 0;
+}
+
+static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
+ CP_PIPE_ID pipe)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
+ tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
+
+ WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
+}
+
+static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ }
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
+
+ tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
+ DOORBELL_RANGE_LOWER, ring->doorbell_index);
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
+
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
+ CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
+}
+
+static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ u32 tmp;
+ u32 rb_bufsz;
+ u64 rb_addr, rptr_addr, wptr_gpu_addr;
+
+ /* Set the write pointer delay */
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
+
+ /* set the RB to use vmid 0 */
+ WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
+
+ /* Init gfx ring 0 for pipe 0 */
+ mutex_lock(&adev->srbm_mutex);
+ gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
+
+ /* Set ring buffer size */
+ ring = &adev->gfx.gfx_ring[0];
+ rb_bufsz = order_base_2(ring->ring_size / 8);
+ tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
+ WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
+
+ /* Initialize the ring buffer's write pointers */
+ ring->wptr = 0;
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
+
+ /* set the wb address whether it's enabled or not */
+ rptr_addr = ring->rptr_gpu_addr;
+ WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
+ WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
+ CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
+
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
+ upper_32_bits(wptr_gpu_addr));
+
+ mdelay(1);
+ WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
+
+ rb_addr = ring->gpu_addr >> 8;
+ WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
+ WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
+
+ WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
+
+ gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* Init gfx ring 1 for pipe 1 */
+ if (adev->gfx.num_gfx_rings > 1) {
+ mutex_lock(&adev->srbm_mutex);
+ gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
+ /* maximum supported gfx ring is 2 */
+ ring = &adev->gfx.gfx_ring[1];
+ rb_bufsz = order_base_2(ring->ring_size / 8);
+ tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
+ WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
+ /* Initialize the ring buffer's write pointers */
+ ring->wptr = 0;
+ WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
+ /* Set the wb address whether it's enabled or not */
+ rptr_addr = ring->rptr_gpu_addr;
+ WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
+ WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
+ CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
+ upper_32_bits(wptr_gpu_addr));
+
+ mdelay(1);
+ WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
+
+ rb_addr = ring->gpu_addr >> 8;
+ WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr);
+ WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr));
+ WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1);
+
+ gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
+ mutex_unlock(&adev->srbm_mutex);
+ }
+ /* Switch to pipe 0 */
+ mutex_lock(&adev->srbm_mutex);
+ gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* start the ring */
+ gfx_v11_0_cp_gfx_start(adev);
+
+ return 0;
+}
+
+static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 data;
+
+ if (adev->gfx.rs64_enable) {
+ data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
+ enable ? 0 : 1);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
+ } else {
+ data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL);
+
+ if (enable) {
+ data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0);
+ if (!adev->enable_mes_kiq)
+ data = REG_SET_FIELD(data, CP_MEC_CNTL,
+ MEC_ME2_HALT, 0);
+ } else {
+ data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1);
+ data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1);
+ }
+ WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data);
+ }
+
+ udelay(50);
+}
+
+static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v1_0 *mec_hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+ u32 *fw = NULL;
+ int r;
+
+ if (!adev->gfx.mec_fw)
+ return -EINVAL;
+
+ gfx_v11_0_cp_compute_enable(adev, false);
+
+ mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+ amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
+
+ fw_data = (const __le32 *)
+ (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.mec_fw_obj,
+ &adev->gfx.mec.mec_fw_gpu_addr,
+ (void **)&fw);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r);
+ gfx_v11_0_mec_fini(adev);
+ return r;
+ }
+
+ memcpy(fw, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
+
+ gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr);
+
+ /* MEC1 */
+ WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0);
+
+ for (i = 0; i < mec_hdr->jt_size; i++)
+ WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA,
+ le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
+
+ WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
+
+ return 0;
+}
+
+static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v2_0 *mec_hdr;
+ const __le32 *fw_ucode, *fw_data;
+ u32 tmp, fw_ucode_size, fw_data_size;
+ u32 i, usec_timeout = 50000; /* Wait for 50 ms */
+ u32 *fw_ucode_ptr, *fw_data_ptr;
+ int r;
+
+ if (!adev->gfx.mec_fw)
+ return -EINVAL;
+
+ gfx_v11_0_cp_compute_enable(adev, false);
+
+ mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
+ amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
+
+ fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->ucode_offset_bytes));
+ fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
+
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->data_offset_bytes));
+ fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
+ 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.mec_fw_obj,
+ &adev->gfx.mec.mec_fw_gpu_addr,
+ (void **)&fw_ucode_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
+ gfx_v11_0_mec_fini(adev);
+ return r;
+ }
+
+ r = amdgpu_bo_create_reserved(adev, fw_data_size,
+ 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.mec_fw_data_obj,
+ &adev->gfx.mec.mec_fw_data_gpu_addr,
+ (void **)&fw_data_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
+ gfx_v11_0_mec_fini(adev);
+ return r;
+ }
+
+ memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
+ memcpy(fw_data_ptr, fw_data, fw_data_size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
+ amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+ soc21_grbm_select(adev, 1, i, 0, 0);
+
+ WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr);
+ WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
+ upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr));
+
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
+ mec_hdr->ucode_start_addr_lo >> 2 |
+ mec_hdr->ucode_start_addr_hi << 30);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
+ mec_hdr->ucode_start_addr_hi >> 2);
+
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
+ upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
+ }
+ mutex_unlock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring)
+{
+ uint32_t tmp;
+ struct amdgpu_device *adev = ring->adev;
+
+ /* tell RLC which is KIQ queue */
+ tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
+}
+
+static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
+{
+ /* set graphics engine doorbell range */
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
+ (adev->doorbell_index.gfx_ring0 * 2) << 2);
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
+ (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
+
+ /* set compute engine doorbell range */
+ WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
+ (adev->doorbell_index.kiq * 2) << 2);
+ WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
+ (adev->doorbell_index.userqueue_end * 2) << 2);
+}
+
+static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev,
+ struct v11_gfx_mqd *mqd,
+ struct amdgpu_mqd_prop *prop)
+{
+ bool priority = 0;
+ u32 tmp;
+
+ /* set up default queue priority level
+ * 0x0 = low priority, 0x1 = high priority
+ */
+ if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH)
+ priority = 1;
+
+ tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority);
+ mqd->cp_gfx_hqd_queue_priority = tmp;
+}
+
+static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v11_gfx_mqd *mqd = m;
+ uint64_t hqd_gpu_addr, wb_gpu_addr;
+ uint32_t tmp;
+ uint32_t rb_bufsz;
+
+ /* set up gfx hqd wptr */
+ mqd->cp_gfx_hqd_wptr = 0;
+ mqd->cp_gfx_hqd_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
+
+ /* set up mqd control */
+ tmp = regCP_GFX_MQD_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
+ mqd->cp_gfx_mqd_control = tmp;
+
+ /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
+ tmp = regCP_GFX_HQD_VMID_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
+ mqd->cp_gfx_hqd_vmid = 0;
+
+ /* set up gfx queue priority */
+ gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop);
+
+ /* set up time quantum */
+ tmp = regCP_GFX_HQD_QUANTUM_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
+ mqd->cp_gfx_hqd_quantum = tmp;
+
+ /* set up gfx hqd base. this is similar as CP_RB_BASE */
+ hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
+ mqd->cp_gfx_hqd_base = hqd_gpu_addr;
+ mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_gfx_hqd_rptr_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* set up rb_wptr_poll addr */
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
+ rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
+ tmp = regCP_GFX_HQD_CNTL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
+#ifdef __BIG_ENDIAN
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
+#endif
+ if (prop->tmz_queue)
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1);
+ if (!prop->kernel_queue)
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1);
+ mqd->cp_gfx_hqd_cntl = tmp;
+
+ /* set up cp_doorbell_control */
+ tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT;
+ if (prop->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, prop->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ } else
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ mqd->cp_rb_doorbell_control = tmp;
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT;
+
+ /* active the queue */
+ mqd->cp_gfx_hqd_active = 1;
+
+ /* set gfx UQ items */
+ mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr);
+ mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr);
+ mqd->gds_bkup_base_lo = lower_32_bits(prop->gds_bkup_addr);
+ mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr);
+ mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr);
+ mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr);
+ mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+ mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
+ return 0;
+}
+
+static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v11_gfx_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = ring - &adev->gfx.gfx_ring[0];
+
+ if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ amdgpu_ring_init_mqd(ring);
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ if (adev->gfx.me.mqd_backup[mqd_idx])
+ memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ } else {
+ /* restore mqd with the backup copy */
+ if (adev->gfx.me.mqd_backup[mqd_idx])
+ memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
+ /* reset the ring */
+ ring->wptr = 0;
+ *ring->wptr_cpu_addr = 0;
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
+{
+ int r, i;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ r = gfx_v11_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_gfx_enable_kgq(adev, 0);
+ if (r)
+ return r;
+
+ return gfx_v11_0_cp_gfx_start(adev);
+}
+
+static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v11_compute_mqd *mqd = m;
+ uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+ uint32_t tmp;
+
+ mqd->header = 0xC0310800;
+ mqd->compute_pipelinestat_enable = 0x00000001;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+ mqd->compute_misc_reserved = 0x00000007;
+
+ eop_base_addr = prop->eop_gpu_addr >> 8;
+ mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
+ mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+ (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1));
+
+ mqd->cp_hqd_eop_control = tmp;
+
+ /* enable doorbell? */
+ tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
+
+ if (prop->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, prop->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ }
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* disable the queue if it's active */
+ mqd->cp_hqd_dequeue_request = 0;
+ mqd->cp_hqd_pq_rptr = 0;
+ mqd->cp_hqd_pq_wptr_lo = 0;
+ mqd->cp_hqd_pq_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
+
+ /* set MQD vmid to 0 */
+ tmp = regCP_MQD_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+ mqd->cp_mqd_control = tmp;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+ (order_base_2(prop->queue_size / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+ (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
+ prop->allow_tunneling);
+ if (prop->kernel_queue) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ }
+ if (prop->tmz_queue)
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1);
+ mqd->cp_hqd_pq_control = tmp;
+
+ /* set the wb address whether it's enabled or not */
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ tmp = 0;
+ /* enable the doorbell if requested */
+ if (prop->use_doorbell) {
+ tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, prop->doorbell_index);
+
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ }
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT;
+
+ /* set the vmid for the queue */
+ mqd->cp_hqd_vmid = 0;
+
+ tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
+ mqd->cp_hqd_persistent_state = tmp;
+
+ /* set MIN_IB_AVAIL_SIZE */
+ tmp = regCP_HQD_IB_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
+ mqd->cp_hqd_ib_control = tmp;
+
+ /* set static priority for a compute queue/ring */
+ mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
+ mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
+
+ mqd->cp_hqd_active = prop->hqd_active;
+
+ /* set UQ fenceaddress */
+ mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+ mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
+ return 0;
+}
+
+static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v11_compute_mqd *mqd = ring->mqd_ptr;
+ int j;
+
+ /* inactivate the queue */
+ if (amdgpu_sriov_vf(adev))
+ WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
+
+ /* disable wptr polling */
+ WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
+
+ /* write the EOP addr */
+ WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
+ mqd->cp_hqd_eop_base_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
+ mqd->cp_hqd_eop_base_addr_hi);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
+ mqd->cp_hqd_eop_control);
+
+ /* enable doorbell? */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* disable the queue if it's active */
+ if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
+ for (j = 0; j < adev->usec_timeout; j++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
+ mqd->cp_hqd_dequeue_request);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
+ mqd->cp_hqd_pq_rptr);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+ }
+
+ /* set the pointer to the MQD */
+ WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
+ mqd->cp_mqd_base_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
+ mqd->cp_mqd_base_addr_hi);
+
+ /* set MQD vmid to 0 */
+ WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
+ mqd->cp_mqd_control);
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
+ mqd->cp_hqd_pq_base_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
+ mqd->cp_hqd_pq_base_hi);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
+ mqd->cp_hqd_pq_control);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
+ mqd->cp_hqd_pq_rptr_report_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
+ mqd->cp_hqd_pq_wptr_poll_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+ /* enable the doorbell if requested */
+ if (ring->use_doorbell) {
+ WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
+ (adev->doorbell_index.kiq * 2) << 2);
+ WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
+ (adev->doorbell_index.userqueue_end * 2) << 2);
+ }
+
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+
+ /* set the vmid for the queue */
+ WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
+
+ WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
+ mqd->cp_hqd_persistent_state);
+
+ /* activate the queue */
+ WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
+ mqd->cp_hqd_active);
+
+ if (ring->use_doorbell)
+ WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
+
+ return 0;
+}
+
+static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v11_compute_mqd *mqd = ring->mqd_ptr;
+
+ gfx_v11_0_kiq_setting(ring);
+
+ if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
+ /* reset MQD to a clean status */
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
+
+ /* reset ring buffer */
+ ring->wptr = 0;
+ amdgpu_ring_clear_ring(ring);
+
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ gfx_v11_0_kiq_init_register(ring);
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ } else {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ if (amdgpu_sriov_vf(adev) && adev->in_suspend)
+ amdgpu_ring_clear_ring(ring);
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ amdgpu_ring_init_mqd(ring);
+ gfx_v11_0_kiq_init_register(ring);
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v11_compute_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = ring - &adev->gfx.compute_ring[0];
+
+ if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ amdgpu_ring_init_mqd(ring);
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ } else {
+ /* restore MQD to a clean status */
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+ /* reset ring buffer */
+ ring->wptr = 0;
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev)
+{
+ gfx_v11_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
+ return 0;
+}
+
+static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ if (!amdgpu_async_gfx_ring)
+ gfx_v11_0_cp_compute_enable(adev, true);
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ r = gfx_v11_0_kcq_init_queue(&adev->gfx.compute_ring[i], false);
+ if (r)
+ return r;
+ }
+
+ return amdgpu_gfx_enable_kcq(adev, 0);
+}
+
+static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
+{
+ int r, i;
+ struct amdgpu_ring *ring;
+
+ if (!(adev->flags & AMD_IS_APU))
+ gfx_v11_0_enable_gui_idle_interrupt(adev, false);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ /* legacy firmware loading */
+ r = gfx_v11_0_cp_gfx_load_microcode(adev);
+ if (r)
+ return r;
+
+ if (adev->gfx.rs64_enable)
+ r = gfx_v11_0_cp_compute_load_microcode_rs64(adev);
+ else
+ r = gfx_v11_0_cp_compute_load_microcode(adev);
+ if (r)
+ return r;
+ }
+
+ gfx_v11_0_cp_set_doorbell_range(adev);
+
+ if (amdgpu_async_gfx_ring) {
+ gfx_v11_0_cp_compute_enable(adev, true);
+ gfx_v11_0_cp_gfx_enable(adev, true);
+ }
+
+ if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
+ r = amdgpu_mes_kiq_hw_init(adev);
+ else
+ r = gfx_v11_0_kiq_resume(adev);
+ if (r)
+ return r;
+
+ r = gfx_v11_0_kcq_resume(adev);
+ if (r)
+ return r;
+
+ if (!amdgpu_async_gfx_ring) {
+ r = gfx_v11_0_cp_gfx_resume(adev);
+ if (r)
+ return r;
+ } else {
+ r = gfx_v11_0_cp_async_gfx_ring_resume(adev);
+ if (r)
+ return r;
+ }
+
+ if (adev->gfx.disable_kq) {
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ /* we don't want to set ring->ready */
+ r = amdgpu_ring_test_ring(ring);
+ if (r)
+ return r;
+ }
+ if (amdgpu_async_gfx_ring)
+ amdgpu_gfx_disable_kgq(adev, 0);
+ } else {
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+ }
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable)
+{
+ gfx_v11_0_cp_gfx_enable(adev, enable);
+ gfx_v11_0_cp_compute_enable(adev, enable);
+}
+
+static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev)
+{
+ int r;
+ bool value;
+
+ r = adev->gfxhub.funcs->gart_enable(adev);
+ if (r)
+ return r;
+
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS;
+
+ adev->gfxhub.funcs->set_fault_enable_default(adev, value);
+ /* TODO investigate why this and the hdp flush above is needed,
+ * are we missing a flush somewhere else? */
+ adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
+
+ return 0;
+}
+
+static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ /* select RS64 */
+ if (adev->gfx.rs64_enable) {
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1);
+ WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1);
+ WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp);
+ }
+
+ if (amdgpu_emu_mode == 1)
+ msleep(100);
+}
+
+static int get_gb_addr_config(struct amdgpu_device * adev)
+{
+ u32 gb_addr_config;
+
+ gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
+ if (gb_addr_config == 0)
+ return -EINVAL;
+
+ adev->gfx.config.gb_addr_config_fields.num_pkrs =
+ 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
+
+ adev->gfx.config.gb_addr_config = gb_addr_config;
+
+ adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, NUM_PIPES);
+
+ adev->gfx.config.max_tile_pipes =
+ adev->gfx.config.gb_addr_config_fields.num_pipes;
+
+ adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
+ adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, NUM_RB_PER_SE);
+ adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
+ adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
+
+ return 0;
+}
+
+static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
+ data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
+ WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
+
+ data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
+ data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
+ WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
+}
+
+static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
+ adev->gfx.cleaner_shader_ptr);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ if (adev->gfx.imu.funcs) {
+ /* RLC autoload sequence 1: Program rlc ram */
+ if (adev->gfx.imu.funcs->program_rlc_ram)
+ adev->gfx.imu.funcs->program_rlc_ram(adev);
+ /* rlc autoload firmware */
+ r = gfx_v11_0_rlc_backdoor_autoload_enable(adev);
+ if (r)
+ return r;
+ }
+ } else {
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
+ if (adev->gfx.imu.funcs->load_microcode)
+ adev->gfx.imu.funcs->load_microcode(adev);
+ if (adev->gfx.imu.funcs->setup_imu)
+ adev->gfx.imu.funcs->setup_imu(adev);
+ if (adev->gfx.imu.funcs->start_imu)
+ adev->gfx.imu.funcs->start_imu(adev);
+ }
+
+ /* disable gpa mode in backdoor loading */
+ gfx_v11_0_disable_gpa_mode(adev);
+ }
+ }
+
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
+ (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
+ r = gfx_v11_0_wait_for_rlc_autoload_complete(adev);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
+ return r;
+ }
+ }
+
+ adev->gfx.is_poweron = true;
+
+ if(get_gb_addr_config(adev))
+ DRM_WARN("Invalid gb_addr_config !\n");
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
+ adev->gfx.rs64_enable)
+ gfx_v11_0_config_gfx_rs64(adev);
+
+ r = gfx_v11_0_gfxhub_enable(adev);
+ if (r)
+ return r;
+
+ if (!amdgpu_emu_mode)
+ gfx_v11_0_init_golden_registers(adev);
+
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
+ (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
+ /**
+ * For gfx 11, rlc firmware loading relies on smu firmware is
+ * loaded firstly, so in direct type, it has to load smc ucode
+ * here before rlc.
+ */
+ r = amdgpu_pm_load_smu_firmware(adev, NULL);
+ if (r)
+ return r;
+ }
+
+ gfx_v11_0_constants_init(adev);
+
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+ gfx_v11_0_select_cp_fw_arch(adev);
+
+ if (adev->nbio.funcs->gc_doorbell_init)
+ adev->nbio.funcs->gc_doorbell_init(adev);
+
+ r = gfx_v11_0_rlc_resume(adev);
+ if (r)
+ return r;
+
+ /*
+ * init golden registers and rlc resume may override some registers,
+ * reconfig them here
+ */
+ gfx_v11_0_tcp_harvest(adev);
+
+ r = gfx_v11_0_cp_resume(adev);
+ if (r)
+ return r;
+
+ /* get IMU version from HW if it's not set */
+ if (!adev->gfx.imu_fw_version)
+ adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0);
+
+ return r;
+}
+
+static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev,
+ bool enable)
+{
+ unsigned int irq_type;
+ int m, p, r;
+
+ if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) {
+ for (m = 0; m < adev->gfx.me.num_me; m++) {
+ for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) {
+ irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+ }
+
+ if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) {
+ for (m = 0; m < adev->gfx.mec.num_mec; ++m) {
+ for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) {
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + (m * adev->gfx.mec.num_pipe_per_mec)
+ + p;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ cancel_delayed_work_sync(&adev->gfx.idle_work);
+
+ amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
+ gfx_v11_0_set_userq_eop_interrupts(adev, false);
+
+ if (!adev->no_hw_access) {
+ if (amdgpu_async_gfx_ring &&
+ !adev->gfx.disable_kq) {
+ if (amdgpu_gfx_disable_kgq(adev, 0))
+ DRM_ERROR("KGQ disable failed\n");
+ }
+
+ if (amdgpu_gfx_disable_kcq(adev, 0))
+ DRM_ERROR("KCQ disable failed\n");
+
+ amdgpu_mes_kiq_hw_fini(adev);
+ }
+
+ if (amdgpu_sriov_vf(adev))
+ /* Remove the steps disabling CPG and clearing KIQ position,
+ * so that CP could perform IDLE-SAVE during switch. Those
+ * steps are necessary to avoid a DMAR error in gfx9 but it is
+ * not reproduced on gfx11.
+ */
+ return 0;
+
+ gfx_v11_0_cp_enable(adev, false);
+ gfx_v11_0_enable_gui_idle_interrupt(adev, false);
+
+ adev->gfxhub.funcs->gart_disable(adev);
+
+ adev->gfx.is_poweron = false;
+
+ return 0;
+}
+
+static int gfx_v11_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return gfx_v11_0_hw_fini(ip_block);
+}
+
+static int gfx_v11_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ return gfx_v11_0_hw_init(ip_block);
+}
+
+static bool gfx_v11_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
+ GRBM_STATUS, GUI_ACTIVE))
+ return false;
+ else
+ return true;
+}
+
+static int gfx_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ unsigned i;
+ u32 tmp;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* read MC_STATUS */
+ tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
+ GRBM_STATUS__GUI_ACTIVE_MASK;
+
+ if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev,
+ bool req)
+{
+ u32 i, tmp, val;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* Request with MeId=2, PipeId=0 */
+ tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4);
+ WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp);
+
+ val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX);
+ if (req) {
+ if (val == tmp)
+ break;
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX,
+ REQUEST, 1);
+
+ /* unlocked or locked by firmware */
+ if (val != tmp)
+ break;
+ }
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int gfx_v11_0_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ u32 grbm_soft_reset = 0;
+ u32 tmp;
+ int r, i, j, k;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0);
+ WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+ for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ soc21_grbm_select(adev, i, k, j, 0);
+
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
+ WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.me.num_me; ++i) {
+ for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
+ soc21_grbm_select(adev, i, k, j, 0);
+
+ WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1);
+ }
+ }
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* Try to acquire the gfx mutex before access to CP_VMID_RESET */
+ mutex_lock(&adev->gfx.reset_sem_mutex);
+ r = gfx_v11_0_request_gfx_index_mutex(adev, true);
+ if (r) {
+ mutex_unlock(&adev->gfx.reset_sem_mutex);
+ DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n");
+ return r;
+ }
+
+ WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe);
+
+ // Read CP_VMID_RESET register three times.
+ // to get sufficient time for GFX_HQD_ACTIVE reach 0
+ RREG32_SOC15(GC, 0, regCP_VMID_RESET);
+ RREG32_SOC15(GC, 0, regCP_VMID_RESET);
+ RREG32_SOC15(GC, 0, regCP_VMID_RESET);
+
+ /* release the gfx mutex */
+ r = gfx_v11_0_request_gfx_index_mutex(adev, false);
+ mutex_unlock(&adev->gfx.reset_sem_mutex);
+ if (r) {
+ DRM_ERROR("Failed to release the gfx mutex during soft reset\n");
+ return r;
+ }
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
+ !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ printk("Failed to wait all pipes clean\n");
+ return -EINVAL;
+ }
+
+ /********** trigger soft reset ***********/
+ grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CP, 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_GFX, 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPF, 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPC, 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPG, 1);
+ WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
+ /********** exit soft reset ***********/
+ grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CP, 0);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_GFX, 0);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPF, 0);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPC, 0);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPG, 0);
+ WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1);
+ WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp);
+
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ printk("Failed to wait CP_VMID_RESET to 0\n");
+ return -EINVAL;
+ }
+
+ tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+ return gfx_v11_0_cp_resume(adev);
+}
+
+static bool gfx_v11_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ int i, r;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ long tmo = msecs_to_jiffies(1000);
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ r = amdgpu_ring_test_ib(ring, tmo);
+ if (r)
+ return true;
+ }
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ r = amdgpu_ring_test_ib(ring, tmo);
+ if (r)
+ return true;
+ }
+
+ return false;
+}
+
+static int gfx_v11_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ /**
+ * GFX soft reset will impact MES, need resume MES when do GFX soft reset
+ */
+ return amdgpu_mes_resume(adev);
+}
+
+static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+ uint64_t clock;
+ uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after;
+
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->gfx.gpu_clock_mutex);
+ clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI);
+ clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO);
+ clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI);
+ if (clock_counter_hi_pre != clock_counter_hi_after)
+ clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO);
+ mutex_unlock(&adev->gfx.gpu_clock_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+ } else {
+ preempt_disable();
+ clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
+ clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
+ clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
+ if (clock_counter_hi_pre != clock_counter_hi_after)
+ clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
+ preempt_enable();
+ }
+ clock = clock_counter_lo | (clock_counter_hi_after << 32ULL);
+
+ return clock;
+}
+
+static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
+ uint32_t vmid,
+ uint32_t gds_base, uint32_t gds_size,
+ uint32_t gws_base, uint32_t gws_size,
+ uint32_t oa_base, uint32_t oa_size)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* GDS Base */
+ gfx_v11_0_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid,
+ gds_base);
+
+ /* GDS Size */
+ gfx_v11_0_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid,
+ gds_size);
+
+ /* GWS */
+ gfx_v11_0_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid,
+ gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
+
+ /* OA */
+ gfx_v11_0_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid,
+ (1 << (oa_size + oa_base)) - (1 << oa_base));
+}
+
+static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_user_queue) {
+ case -1:
+ case 0:
+ default:
+ adev->gfx.disable_kq = false;
+ adev->gfx.disable_uq = true;
+ break;
+ case 1:
+ adev->gfx.disable_kq = false;
+ adev->gfx.disable_uq = false;
+ break;
+ case 2:
+ adev->gfx.disable_kq = true;
+ adev->gfx.disable_uq = false;
+ break;
+ }
+
+ adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
+
+ if (adev->gfx.disable_kq) {
+ /* We need one GFX ring temporarily to set up
+ * the clear state.
+ */
+ adev->gfx.num_gfx_rings = 1;
+ adev->gfx.num_compute_rings = 0;
+ } else {
+ adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
+ adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+ AMDGPU_MAX_COMPUTE_RINGS);
+ }
+
+ gfx_v11_0_set_kiq_pm4_funcs(adev);
+ gfx_v11_0_set_ring_funcs(adev);
+ gfx_v11_0_set_irq_funcs(adev);
+ gfx_v11_0_set_gds_init(adev);
+ gfx_v11_0_set_rlc_funcs(adev);
+ gfx_v11_0_set_mqd_funcs(adev);
+ gfx_v11_0_set_imu_funcs(adev);
+
+ gfx_v11_0_init_rlcg_reg_access_ctrl(adev);
+
+ return gfx_v11_0_init_microcode(adev);
+}
+
+static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
+ if (r)
+ return r;
+
+ r = gfx_v11_0_set_userq_eop_interrupts(adev, true);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev)
+{
+ uint32_t rlc_cntl;
+
+ /* if RLC is not enabled, do nothing */
+ rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
+ return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
+}
+
+static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
+{
+ uint32_t data;
+ unsigned i;
+
+ data = RLC_SAFE_MODE__CMD_MASK;
+ data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
+
+ WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
+
+ /* wait for RLC_SAFE_MODE */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
+ RLC_SAFE_MODE, CMD))
+ break;
+ udelay(1);
+ }
+}
+
+static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
+{
+ WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
+}
+
+static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
+ return;
+
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
+ return;
+
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
+ return;
+
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def;
+
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
+ return;
+
+ /* It is disabled by HW by default */
+ if (enable) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
+ /* 1 - RLC_CGTT_MGCG_OVERRIDE */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+ }
+ } else {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+ }
+ }
+}
+
+static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags &
+ (AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS)))
+ return;
+
+ if (enable) {
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ /* unset CGCG override */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
+ adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
+
+ /* update CGCG override bits */
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* enable cgcg FSM(0x0000363F) */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
+ data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
+ data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+ }
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
+ data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
+ data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+ }
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
+
+ /* Program RLC_CGCG_CGLS_CTRL_3D */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
+ data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+ }
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
+ data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+ RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+ }
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
+
+ /* set IDLE_POLL_COUNT(0x00900100) */
+ def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
+
+ data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
+ data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
+ (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
+
+ data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
+
+ data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
+ data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
+
+ /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
+ if (adev->sdma.num_instances > 1) {
+ data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
+ data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
+ }
+ } else {
+ /* Program RLC_CGCG_CGLS_CTRL */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+ data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
+
+ /* Program RLC_CGCG_CGLS_CTRL_3D */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
+
+ data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
+ data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
+ WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
+
+ /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
+ if (adev->sdma.num_instances > 1) {
+ data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
+ data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
+ WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
+ }
+ }
+}
+
+static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ gfx_v11_0_update_coarse_grain_clock_gating(adev, enable);
+
+ gfx_v11_0_update_medium_grain_clock_gating(adev, enable);
+
+ gfx_v11_0_update_repeater_fgcg(adev, enable);
+
+ gfx_v11_0_update_sram_fgcg(adev, enable);
+
+ gfx_v11_0_update_perf_clk(adev, enable);
+
+ if (adev->cg_flags &
+ (AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS))
+ gfx_v11_0_enable_gui_idle_interrupt(adev, enable);
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+ return 0;
+}
+
+static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
+{
+ u32 reg, pre_data, data;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
+ if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev))
+ pre_data = RREG32_NO_KIQ(reg);
+ else
+ pre_data = RREG32(reg);
+
+ data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK);
+ data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
+
+ if (pre_data != data) {
+ if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) {
+ WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
+ } else
+ WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
+ }
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ if (ring
+ && amdgpu_sriov_is_pp_one_vf(adev)
+ && (pre_data != data)
+ && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX)
+ || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) {
+ amdgpu_ring_emit_wreg(ring, reg, data);
+ }
+}
+
+static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
+ .is_rlc_enabled = gfx_v11_0_is_rlc_enabled,
+ .set_safe_mode = gfx_v11_0_set_safe_mode,
+ .unset_safe_mode = gfx_v11_0_unset_safe_mode,
+ .init = gfx_v11_0_rlc_init,
+ .get_csb_size = gfx_v11_0_get_csb_size,
+ .get_csb_buffer = gfx_v11_0_get_csb_buffer,
+ .resume = gfx_v11_0_rlc_resume,
+ .stop = gfx_v11_0_rlc_stop,
+ .reset = gfx_v11_0_rlc_reset,
+ .start = gfx_v11_0_rlc_start,
+ .update_spm_vmid = gfx_v11_0_update_spm_vmid,
+};
+
+static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
+{
+ u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
+
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
+ data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+ else
+ data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+
+ WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data);
+
+ // Program RLC_PG_DELAY3 for CGPG hysteresis
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
+{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ gfx_v11_cntl_power_gating(adev, enable);
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+}
+
+static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = (state == AMD_PG_STATE_GATE);
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ amdgpu_gfx_off_ctrl(adev, enable);
+ break;
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ if (!enable)
+ amdgpu_gfx_off_ctrl(adev, false);
+
+ gfx_v11_cntl_pg(adev, enable);
+
+ if (enable)
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ gfx_v11_0_update_gfx_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void gfx_v11_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int data;
+
+ /* AMD_CG_SUPPORT_GFX_MGCG */
+ data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_MGCG;
+
+ /* AMD_CG_SUPPORT_REPEATER_FGCG */
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
+
+ /* AMD_CG_SUPPORT_GFX_FGCG */
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_FGCG;
+
+ /* AMD_CG_SUPPORT_GFX_PERF_CLK */
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
+
+ /* AMD_CG_SUPPORT_GFX_CGCG */
+ data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
+ if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGCG;
+
+ /* AMD_CG_SUPPORT_GFX_CGLS */
+ if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGLS;
+
+ /* AMD_CG_SUPPORT_GFX_3D_CGCG */
+ data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
+ if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
+
+ /* AMD_CG_SUPPORT_GFX_3D_CGLS */
+ if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
+}
+
+static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
+{
+ /* gfx11 is 32bit rptr*/
+ return *(uint32_t *)ring->rptr_cpu_addr;
+}
+
+static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u64 wptr;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ } else {
+ wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
+ wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
+ }
+
+ return wptr;
+}
+
+static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
+ lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
+ upper_32_bits(ring->wptr));
+ }
+}
+
+static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
+{
+ /* gfx11 hardware is 32bit rptr */
+ return *(uint32_t *)ring->rptr_cpu_addr;
+}
+
+static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
+{
+ u64 wptr;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell)
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ else
+ BUG();
+ return wptr;
+}
+
+static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ BUG(); /* only DOORBELL method supported on gfx11 now */
+ }
+}
+
+static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 ref_and_mask, reg_mem_engine;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+ switch (ring->me) {
+ case 1:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
+ break;
+ case 2:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
+ break;
+ default:
+ return;
+ }
+ reg_mem_engine = 0;
+ } else {
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe;
+ reg_mem_engine = 1; /* pfp */
+ }
+
+ gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
+ adev->nbio.funcs->get_hdp_flush_req_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_done_offset(adev),
+ ref_and_mask, ref_and_mask, 0x20);
+}
+
+static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 header, control = 0;
+
+ header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
+
+ control |= ib->length_dw | (vmid << 24);
+
+ if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
+ control |= INDIRECT_BUFFER_PRE_ENB(1);
+
+ if (flags & AMDGPU_IB_PREEMPTED)
+ control |= INDIRECT_BUFFER_PRE_RESUME(1);
+
+ if (vmid && !ring->adev->gfx.rs64_enable)
+ gfx_v11_0_ring_emit_de_meta(ring,
+ !amdgpu_sriov_vf(ring->adev) && (flags & AMDGPU_IB_PREEMPTED));
+ }
+
+ amdgpu_ring_write(ring, header);
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+
+ /* Currently, there is a high possibility to get wave ID mismatch
+ * between ME and GDS, leading to a hw deadlock, because ME generates
+ * different wave IDs than the GDS expects. This situation happens
+ * randomly when at least 5 compute pipes use GDS ordered append.
+ * The wave IDs generated by ME are also wrong after suspend/resume.
+ * Those are probably bugs somewhere else in the kernel driver.
+ *
+ * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+ * GDS to 0 for this ring (me/pipe).
+ */
+ if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+ amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
+ amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+
+ /* RELEASE_MEM - flush caches, send int */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
+ amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
+ PACKET3_RELEASE_MEM_GCR_GL2_WB |
+ PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */
+ PACKET3_RELEASE_MEM_GCR_GLM_WB |
+ PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
+ PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
+ amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
+ PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
+
+ /*
+ * the address should be Qword aligned if 64bit write, Dword
+ * aligned if only send 32bit data low (discard data high)
+ */
+ if (write64bit)
+ BUG_ON(addr & 0x7);
+ else
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ amdgpu_ring_write(ring, 0);
+}
+
+static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
+ upper_32_bits(addr), seq, 0xffffffff, 4);
+}
+
+static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint8_t dst_sel)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
+static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* compute doesn't have PFP */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
+ /* sync PFP to ME, otherwise we might get invalid PFP reads */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+ amdgpu_ring_write(ring, 0x0);
+ }
+
+ /* Make sure that we can't skip the SET_Q_MODE packets when the VM
+ * changed in any way.
+ */
+ ring->set_q_mode_offs = 0;
+ ring->set_q_mode_ptr = NULL;
+}
+
+static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned int flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* we only allocate 32bit for each seq wb address */
+ BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ /* write fence seq to the "addr" */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ if (flags & AMDGPU_FENCE_FLAG_INT) {
+ /* set register to trigger INT */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
+ amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
+ }
+}
+
+static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
+ uint32_t flags)
+{
+ uint32_t dw2 = 0;
+
+ dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+ if (flags & AMDGPU_HAVE_CTX_SWITCH) {
+ /* set load_global_config & load_global_uconfig */
+ dw2 |= 0x8001;
+ /* set load_cs_sh_regs */
+ dw2 |= 0x01000000;
+ /* set load_per_context_state & load_gfx_sh_regs for GFX */
+ dw2 |= 0x10002;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, dw2);
+ amdgpu_ring_write(ring, 0);
+}
+
+static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
+{
+ unsigned ret;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, 0);
+ ret = ring->wptr & ring->buf_mask;
+ /* patch dummy value later */
+ amdgpu_ring_write(ring, 0);
+
+ return ret;
+}
+
+static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,
+ u64 shadow_va, u64 csa_va,
+ u64 gds_va, bool init_shadow,
+ int vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned int offs, end;
+
+ if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj)
+ return;
+
+ /*
+ * The logic here isn't easy to understand because we need to keep state
+ * accross multiple executions of the function as well as between the
+ * CPU and GPU. The general idea is that the newly written GPU command
+ * has a condition on the previous one and only executed if really
+ * necessary.
+ */
+
+ /*
+ * The dw in the NOP controls if the next SET_Q_MODE packet should be
+ * executed or not. Reserve 64bits just to be on the save side.
+ */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1));
+ offs = ring->wptr & ring->buf_mask;
+
+ /*
+ * We start with skipping the prefix SET_Q_MODE and always executing
+ * the postfix SET_Q_MODE packet. This is changed below with a
+ * WRITE_DATA command when the postfix executed.
+ */
+ amdgpu_ring_write(ring, shadow_va ? 1 : 0);
+ amdgpu_ring_write(ring, 0);
+
+ if (ring->set_q_mode_offs) {
+ uint64_t addr;
+
+ addr = amdgpu_bo_gpu_offset(ring->ring_obj);
+ addr += ring->set_q_mode_offs << 2;
+ end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr);
+ }
+
+ /*
+ * When the postfix SET_Q_MODE packet executes we need to make sure that the
+ * next prefix SET_Q_MODE packet executes as well.
+ */
+ if (!shadow_va) {
+ uint64_t addr;
+
+ addr = amdgpu_bo_gpu_offset(ring->ring_obj);
+ addr += offs << 2;
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, 0x1);
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7));
+ amdgpu_ring_write(ring, lower_32_bits(shadow_va));
+ amdgpu_ring_write(ring, upper_32_bits(shadow_va));
+ amdgpu_ring_write(ring, lower_32_bits(gds_va));
+ amdgpu_ring_write(ring, upper_32_bits(gds_va));
+ amdgpu_ring_write(ring, lower_32_bits(csa_va));
+ amdgpu_ring_write(ring, upper_32_bits(csa_va));
+ amdgpu_ring_write(ring, shadow_va ?
+ PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0);
+ amdgpu_ring_write(ring, init_shadow ?
+ PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0);
+
+ if (ring->set_q_mode_offs)
+ amdgpu_ring_patch_cond_exec(ring, end);
+
+ if (shadow_va) {
+ uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid;
+
+ /*
+ * If the tokens match try to skip the last postfix SET_Q_MODE
+ * packet to avoid saving/restoring the state all the time.
+ */
+ if (ring->set_q_mode_ptr && ring->set_q_mode_token == token)
+ *ring->set_q_mode_ptr = 0;
+
+ ring->set_q_mode_token = token;
+ } else {
+ ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs];
+ }
+
+ ring->set_q_mode_offs = offs;
+}
+
+static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+ int i, r = 0;
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ unsigned long flags;
+
+ if (adev->enable_mes)
+ return -EINVAL;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ /* assert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+ /* assert IB preemption, emit the trailing fence */
+ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
+ ring->trail_fence_gpu_addr,
+ ++ring->trail_seq);
+ amdgpu_ring_commit(kiq_ring);
+
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ /* poll the trailing fence */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->trail_seq ==
+ le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ r = -EINVAL;
+ DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
+ }
+
+ /* deassert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, true);
+ return r;
+}
+
+static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v10_de_ib_state de_payload = {0};
+ uint64_t offset, gds_addr, de_payload_gpu_addr;
+ void *de_payload_cpu_addr;
+ int cnt;
+
+ offset = offsetof(struct v10_gfx_meta_data, de_payload);
+ de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+
+ gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
+ AMDGPU_CSA_SIZE - adev->gds.gds_size,
+ PAGE_SIZE);
+
+ de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
+ de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
+
+ cnt = (sizeof(de_payload) >> 2) + 4 - 2;
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
+ WRITE_DATA_DST_SEL(8) |
+ WR_CONFIRM) |
+ WRITE_DATA_CACHE_POLICY(0));
+ amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
+
+ if (resume)
+ amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
+ sizeof(de_payload) >> 2);
+ else
+ amdgpu_ring_write_multiple(ring, (void *)&de_payload,
+ sizeof(de_payload) >> 2);
+}
+
+static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
+ bool secure)
+{
+ uint32_t v = secure ? FRAME_TMZ : 0;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
+ amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
+}
+
+static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t reg_val_offs)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
+ amdgpu_ring_write(ring, 0 | /* src: register*/
+ (5 << 8) | /* dst: memory */
+ (1 << 20)); /* write confirm */
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+ amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+}
+
+static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val)
+{
+ uint32_t cmd = 0;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
+ break;
+ case AMDGPU_RING_TYPE_KIQ:
+ cmd = (1 << 16); /* no inc addr */
+ break;
+ default:
+ cmd = WR_CONFIRM;
+ break;
+ }
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, cmd);
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
+}
+
+static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+
+ gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+ ref, mask, 0x20);
+}
+
+static void
+gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
+ uint32_t me, uint32_t pipe,
+ enum amdgpu_interrupt_state state)
+{
+ uint32_t cp_int_cntl, cp_int_cntl_reg;
+
+ if (!me) {
+ switch (pipe) {
+ case 0:
+ cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
+ break;
+ case 1:
+ cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+ } else {
+ DRM_DEBUG("invalid me %d\n", me);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ TIME_STAMP_INT_ENABLE, 0);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ GENERIC0_INT_ENABLE, 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ TIME_STAMP_INT_ENABLE, 1);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ GENERIC0_INT_ENABLE, 1);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ break;
+ default:
+ break;
+ }
+}
+
+static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
+ int me, int pipe,
+ enum amdgpu_interrupt_state state)
+{
+ u32 mec_int_cntl, mec_int_cntl_reg;
+
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+
+ if (me == 1) {
+ switch (pipe) {
+ case 0:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
+ break;
+ case 1:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
+ break;
+ case 2:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
+ break;
+ case 3:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+ } else {
+ DRM_DEBUG("invalid me %d\n", me);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 0);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ GENERIC0_INT_ENABLE, 0);
+ WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 1);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ GENERIC0_INT_ENABLE, 1);
+ WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
+ break;
+ default:
+ break;
+ }
+}
+
+static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (type) {
+ case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
+ gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
+ break;
+ case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
+ gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
+ gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
+ gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
+ gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
+ gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u32 doorbell_offset = entry->src_data[0];
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+ int i;
+
+ DRM_DEBUG("IH: CP EOP\n");
+
+ if (adev->enable_mes && doorbell_offset) {
+ struct amdgpu_userq_fence_driver *fence_drv = NULL;
+ struct xarray *xa = &adev->userq_xa;
+ unsigned long flags;
+
+ xa_lock_irqsave(xa, flags);
+ fence_drv = xa_load(xa, doorbell_offset);
+ if (fence_drv)
+ amdgpu_userq_fence_driver_process(fence_drv);
+ xa_unlock_irqrestore(xa, flags);
+ } else {
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ switch (me_id) {
+ case 0:
+ if (pipe_id == 0)
+ amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+ else
+ amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ /* Per-queue interrupt is supported for MEC starting from VI.
+ * The interrupt can only be enabled/disabled per pipe instead
+ * of per queue.
+ */
+ if ((ring->me == me_id) &&
+ (ring->pipe == pipe_id) &&
+ (ring->queue == queue_id))
+ amdgpu_fence_process(ring);
+ }
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_set_bad_op_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_INSTR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+ int i;
+
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ if (!adev->gfx.disable_kq) {
+ switch (me_id) {
+ case 0:
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ default:
+ BUG();
+ break;
+ }
+ }
+}
+
+static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal register access in command stream\n");
+ gfx_v11_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal opcode in command stream \n");
+ gfx_v11_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal instruction in command stream\n");
+ gfx_v11_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ if (adev->gfx.ras && adev->gfx.ras->rlc_gc_fed_irq)
+ return adev->gfx.ras->rlc_gc_fed_irq(adev, source, entry);
+
+ return 0;
+}
+
+#if 0
+static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ uint32_t tmp, target;
+ struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring);
+
+ target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
+ target += ring->pipe;
+
+ switch (type) {
+ case AMDGPU_CP_KIQ_IRQ_DRIVER0:
+ if (state == AMDGPU_IRQ_STATE_DISABLE) {
+ tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
+ tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
+ GENERIC2_INT_ENABLE, 0);
+ WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
+
+ tmp = RREG32_SOC15_IP(GC, target);
+ tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
+ GENERIC2_INT_ENABLE, 0);
+ WREG32_SOC15_IP(GC, target, tmp);
+ } else {
+ tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
+ tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
+ GENERIC2_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
+
+ tmp = RREG32_SOC15_IP(GC, target);
+ tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
+ GENERIC2_INT_ENABLE, 1);
+ WREG32_SOC15_IP(GC, target, tmp);
+ }
+ break;
+ default:
+ BUG(); /* kiq only support GENERIC2_INT now */
+ break;
+ }
+ return 0;
+}
+#endif
+
+static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ const unsigned int gcr_cntl =
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
+
+ /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
+ amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
+ amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
+ amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
+ amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
+ amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
+}
+
+static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev)
+{
+ /* Disable the pipe reset until the CPFW fully support it.*/
+ dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n");
+ return false;
+}
+
+
+static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reset_pipe = 0, clean_pipe = 0;
+ int r;
+
+ if (!gfx_v11_pipe_reset_support(adev))
+ return -EOPNOTSUPP;
+
+ gfx_v11_0_set_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 1);
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ ME_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 0);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ ME_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 1);
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ ME_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 0);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ ME_PIPE1_RESET, 0);
+ break;
+ default:
+ break;
+ }
+
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe);
+
+ r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) -
+ RS64_FW_UC_START_ADDR_LO;
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ gfx_v11_0_unset_safe_mode(adev, 0);
+
+ dev_info(adev->dev, "The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name,
+ r == 0 ? "successfully" : "failed");
+ /* FIXME: Sometimes driver can't cache the ME firmware start PC correctly,
+ * so the pipe reset status relies on the later gfx ring test result.
+ */
+ return 0;
+}
+
+static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
+ if (r) {
+
+ dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r);
+ r = gfx_v11_reset_gfx_pipe(ring);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v11_0_kgq_init_queue(ring, true);
+ if (r) {
+ dev_err(adev->dev, "failed to init kgq\n");
+ return r;
+ }
+
+ r = amdgpu_mes_map_legacy_queue(adev, ring);
+ if (r) {
+ dev_err(adev->dev, "failed to remap kgq\n");
+ return r;
+ }
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring)
+{
+
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reset_pipe = 0, clean_pipe = 0;
+ int r;
+
+ if (!gfx_v11_pipe_reset_support(adev))
+ return -EOPNOTSUPP;
+
+ gfx_v11_0_set_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+ reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+ clean_pipe = reset_pipe;
+
+ if (adev->gfx.rs64_enable) {
+
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE1_RESET, 0);
+ break;
+ case 2:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE2_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE2_RESET, 0);
+ break;
+ case 3:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE3_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE3_RESET, 0);
+ break;
+ default:
+ break;
+ }
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
+ r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) -
+ RS64_FW_UC_START_ADDR_LO;
+ } else {
+ if (ring->me == 1) {
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE1_RESET, 0);
+ break;
+ case 2:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE2_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE2_RESET, 0);
+ break;
+ case 3:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE3_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE3_RESET, 0);
+ break;
+ default:
+ break;
+ }
+ /* mec1 fw pc: CP_MEC1_INSTR_PNTR */
+ } else {
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE1_RESET, 0);
+ break;
+ case 2:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE2_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE2_RESET, 0);
+ break;
+ case 3:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE3_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE3_RESET, 0);
+ break;
+ default:
+ break;
+ }
+ /* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */
+ }
+ WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
+ r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR));
+ }
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ gfx_v11_0_unset_safe_mode(adev, 0);
+
+ dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", ring->name,
+ r == 0 ? "successfully" : "failed");
+ /*FIXME:Sometimes driver can't cache the MEC firmware start PC correctly, so the pipe
+ * reset status relies on the compute ring test result.
+ */
+ return 0;
+}
+
+static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r = 0;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
+ if (r) {
+ dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r);
+ r = gfx_v11_0_reset_compute_pipe(ring);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v11_0_kcq_init_queue(ring, true);
+ if (r) {
+ dev_err(adev->dev, "fail to init kcq\n");
+ return r;
+ }
+ r = amdgpu_mes_map_legacy_queue(adev, ring);
+ if (r) {
+ dev_err(adev->dev, "failed to remap kcq\n");
+ return r;
+ }
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ for (i = 0; i < reg_count; i++)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_reg_list_11_0[i].reg_name,
+ adev->gfx.ip_dump_core[i]);
+
+ /* print compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
+ drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
+ adev->gfx.mec.num_mec,
+ adev->gfx.mec.num_pipe_per_mec,
+ adev->gfx.mec.num_queue_per_pipe);
+
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i && gc_cp_reg_list_11[reg].reg_offset == regCP_MEC_ME1_HEADER_DUMP)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ "regCP_MEC_ME2_HEADER_DUMP",
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ else
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_cp_reg_list_11[reg].reg_name,
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ }
+ index += reg_count;
+ }
+ }
+ }
+
+ /* print gfx queue registers for all instances */
+ if (!adev->gfx.ip_dump_gfx_queues)
+ return;
+
+ index = 0;
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
+ drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
+ adev->gfx.me.num_me,
+ adev->gfx.me.num_pipe_per_me,
+ adev->gfx.me.num_queue_per_pipe);
+
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+ drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_gfx_queue_reg_list_11[reg].reg_name,
+ adev->gfx.ip_dump_gfx_queues[index + reg]);
+ }
+ index += reg_count;
+ }
+ }
+ }
+}
+
+static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < reg_count; i++)
+ adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_11_0[i]));
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ /* dump compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ /* ME0 is for GFX so start from 1 for CP */
+ soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i &&
+ gc_cp_reg_list_11[reg].reg_offset ==
+ regCP_MEC_ME1_HEADER_DUMP)
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_OFFSET(GC, 0,
+ regCP_MEC_ME2_HEADER_DUMP));
+ else
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_cp_reg_list_11[reg]));
+ }
+ index += reg_count;
+ }
+ }
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ /* dump gfx queue registers for all instances */
+ if (!adev->gfx.ip_dump_gfx_queues)
+ return;
+
+ index = 0;
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+ soc21_grbm_select(adev, i, j, k, 0);
+
+ for (reg = 0; reg < reg_count; reg++) {
+ adev->gfx.ip_dump_gfx_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_gfx_queue_reg_list_11[reg]));
+ }
+ index += reg_count;
+ }
+ }
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static void gfx_v11_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
+{
+ /* Emit the cleaner shader */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+ amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
+}
+
+static void gfx_v11_0_ring_begin_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_begin_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
+}
+
+static void gfx_v11_0_ring_end_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_end_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_end_use(ring);
+}
+
+static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
+ .name = "gfx_v11_0",
+ .early_init = gfx_v11_0_early_init,
+ .late_init = gfx_v11_0_late_init,
+ .sw_init = gfx_v11_0_sw_init,
+ .sw_fini = gfx_v11_0_sw_fini,
+ .hw_init = gfx_v11_0_hw_init,
+ .hw_fini = gfx_v11_0_hw_fini,
+ .suspend = gfx_v11_0_suspend,
+ .resume = gfx_v11_0_resume,
+ .is_idle = gfx_v11_0_is_idle,
+ .wait_for_idle = gfx_v11_0_wait_for_idle,
+ .soft_reset = gfx_v11_0_soft_reset,
+ .check_soft_reset = gfx_v11_0_check_soft_reset,
+ .post_soft_reset = gfx_v11_0_post_soft_reset,
+ .set_clockgating_state = gfx_v11_0_set_clockgating_state,
+ .set_powergating_state = gfx_v11_0_set_powergating_state,
+ .get_clockgating_state = gfx_v11_0_get_clockgating_state,
+ .dump_ip_state = gfx_v11_ip_dump,
+ .print_ip_state = gfx_v11_ip_print,
+};
+
+static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
+ .type = AMDGPU_RING_TYPE_GFX,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .secure_submission_supported = true,
+ .get_rptr = gfx_v11_0_ring_get_rptr_gfx,
+ .get_wptr = gfx_v11_0_ring_get_wptr_gfx,
+ .set_wptr = gfx_v11_0_ring_set_wptr_gfx,
+ .emit_frame_size = /* totally 247 maximum if 16 IBs */
+ 5 + /* update_spm_vmid */
+ 5 + /* COND_EXEC */
+ 22 + /* SET_Q_PREEMPTION_MODE */
+ 7 + /* PIPELINE_SYNC */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 4 + /* VM_FLUSH */
+ 8 + /* FENCE for VM_FLUSH */
+ 20 + /* GDS switch */
+ 5 + /* COND_EXEC */
+ 7 + /* HDP_flush */
+ 4 + /* VGT_flush */
+ 31 + /* DE_META */
+ 3 + /* CNTX_CTRL */
+ 5 + /* HDP_INVL */
+ 22 + /* SET_Q_PREEMPTION_MODE */
+ 8 + 8 + /* FENCE x2 */
+ 8 + /* gfx_v11_0_emit_mem_sync */
+ 2, /* gfx_v11_0_ring_emit_cleaner_shader */
+ .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */
+ .emit_ib = gfx_v11_0_ring_emit_ib_gfx,
+ .emit_fence = gfx_v11_0_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
+ .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
+ .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
+ .test_ring = gfx_v11_0_ring_test_ring,
+ .test_ib = gfx_v11_0_ring_test_ib,
+ .insert_nop = gfx_v11_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
+ .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow,
+ .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
+ .preempt_ib = gfx_v11_0_ring_preempt_ib,
+ .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl,
+ .emit_wreg = gfx_v11_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
+ .emit_mem_sync = gfx_v11_0_emit_mem_sync,
+ .reset = gfx_v11_0_reset_kgq,
+ .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v11_0_ring_begin_use,
+ .end_use = gfx_v11_0_ring_end_use,
+};
+
+static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
+ .type = AMDGPU_RING_TYPE_COMPUTE,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v11_0_ring_get_rptr_compute,
+ .get_wptr = gfx_v11_0_ring_get_wptr_compute,
+ .set_wptr = gfx_v11_0_ring_set_wptr_compute,
+ .emit_frame_size =
+ 5 + /* update_spm_vmid */
+ 20 + /* gfx_v11_0_ring_emit_gds_switch */
+ 7 + /* gfx_v11_0_ring_emit_hdp_flush */
+ 5 + /* hdp invalidate */
+ 7 + /* gfx_v11_0_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v11_0_ring_emit_vm_flush */
+ 8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */
+ 8 + /* gfx_v11_0_emit_mem_sync */
+ 2, /* gfx_v11_0_ring_emit_cleaner_shader */
+ .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */
+ .emit_ib = gfx_v11_0_ring_emit_ib_compute,
+ .emit_fence = gfx_v11_0_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
+ .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
+ .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
+ .test_ring = gfx_v11_0_ring_test_ring,
+ .test_ib = gfx_v11_0_ring_test_ib,
+ .insert_nop = gfx_v11_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_wreg = gfx_v11_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
+ .emit_mem_sync = gfx_v11_0_emit_mem_sync,
+ .reset = gfx_v11_0_reset_kcq,
+ .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v11_0_ring_begin_use,
+ .end_use = gfx_v11_0_ring_end_use,
+};
+
+static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
+ .type = AMDGPU_RING_TYPE_KIQ,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v11_0_ring_get_rptr_compute,
+ .get_wptr = gfx_v11_0_ring_get_wptr_compute,
+ .set_wptr = gfx_v11_0_ring_set_wptr_compute,
+ .emit_frame_size =
+ 20 + /* gfx_v11_0_ring_emit_gds_switch */
+ 7 + /* gfx_v11_0_ring_emit_hdp_flush */
+ 5 + /*hdp invalidate */
+ 7 + /* gfx_v11_0_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */
+ .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */
+ .emit_ib = gfx_v11_0_ring_emit_ib_compute,
+ .emit_fence = gfx_v11_0_ring_emit_fence_kiq,
+ .test_ring = gfx_v11_0_ring_test_ring,
+ .test_ib = gfx_v11_0_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_rreg = gfx_v11_0_ring_emit_rreg,
+ .emit_wreg = gfx_v11_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
+ .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
+};
+
+static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ adev->gfx.kiq[0].ring.funcs = &gfx_v11_0_ring_funcs_kiq;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute;
+}
+
+static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = {
+ .set = gfx_v11_0_set_eop_interrupt_state,
+ .process = gfx_v11_0_eop_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = {
+ .set = gfx_v11_0_set_priv_reg_fault_state,
+ .process = gfx_v11_0_priv_reg_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v11_0_bad_op_irq_funcs = {
+ .set = gfx_v11_0_set_bad_op_fault_state,
+ .process = gfx_v11_0_bad_op_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = {
+ .set = gfx_v11_0_set_priv_inst_fault_state,
+ .process = gfx_v11_0_priv_inst_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = {
+ .process = gfx_v11_0_rlc_gc_fed_irq,
+};
+
+static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
+ adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs;
+
+ adev->gfx.priv_reg_irq.num_types = 1;
+ adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs;
+
+ adev->gfx.bad_op_irq.num_types = 1;
+ adev->gfx.bad_op_irq.funcs = &gfx_v11_0_bad_op_irq_funcs;
+
+ adev->gfx.priv_inst_irq.num_types = 1;
+ adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs;
+
+ adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */
+ adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs;
+
+}
+
+static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev)
+{
+ if (adev->flags & AMD_IS_APU)
+ adev->gfx.imu.mode = MISSION_MODE;
+ else
+ adev->gfx.imu.mode = DEBUG_MODE;
+
+ adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs;
+}
+
+static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs;
+}
+
+static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev)
+{
+ unsigned total_cu = adev->gfx.config.max_cu_per_sh *
+ adev->gfx.config.max_sh_per_se *
+ adev->gfx.config.max_shader_engines;
+
+ adev->gds.gds_size = 0x1000;
+ adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1;
+ adev->gds.gws_size = 64;
+ adev->gds.oa_size = 16;
+}
+
+static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev)
+{
+ /* set gfx eng mqd */
+ adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
+ sizeof(struct v11_gfx_mqd);
+ adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
+ gfx_v11_0_gfx_mqd_init;
+ /* set compute eng mqd */
+ adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
+ sizeof(struct v11_compute_mqd);
+ adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
+ gfx_v11_0_compute_mqd_init;
+}
+
+static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
+ u32 bitmap)
+{
+ u32 data;
+
+ if (!bitmap)
+ return;
+
+ data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+ data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+
+ WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
+static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
+{
+ u32 data, wgp_bitmask;
+ data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
+ data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
+
+ data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+ data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+
+ wgp_bitmask =
+ amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
+
+ return (~data) & wgp_bitmask;
+}
+
+static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
+{
+ u32 wgp_idx, wgp_active_bitmap;
+ u32 cu_bitmap_per_wgp, cu_active_bitmap;
+
+ wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev);
+ cu_active_bitmap = 0;
+
+ for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
+ /* if there is one WGP enabled, it means 2 CUs will be enabled */
+ cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
+ if (wgp_active_bitmap & (1 << wgp_idx))
+ cu_active_bitmap |= cu_bitmap_per_wgp;
+ }
+
+ return cu_active_bitmap;
+}
+
+static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info)
+{
+ int i, j, k, counter, active_cu_number = 0;
+ u32 mask, bitmap;
+ unsigned disable_masks[8 * 2];
+
+ if (!adev || !cu_info)
+ return -EINVAL;
+
+ amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ bitmap = i * adev->gfx.config.max_sh_per_se + j;
+ if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1))
+ continue;
+ mask = 1;
+ counter = 0;
+ gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0);
+ if (i < 8 && j < 2)
+ gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(
+ adev, disable_masks[i * 2 + j]);
+ bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev);
+
+ /**
+ * GFX11 could support more than 4 SEs, while the bitmap
+ * in cu_info struct is 4x4 and ioctl interface struct
+ * drm_amdgpu_info_device should keep stable.
+ * So we use last two columns of bitmap to store cu mask for
+ * SEs 4 to 7, the layout of the bitmap is as below:
+ * SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
+ * SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
+ * SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
+ * SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
+ * SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
+ * SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
+ * SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
+ * SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
+ */
+ cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;
+
+ for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
+ if (bitmap & mask)
+ counter++;
+
+ mask <<= 1;
+ }
+ active_cu_number += counter;
+ }
+ }
+ gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ cu_info->number = active_cu_number;
+ cu_info->simd_per_cu = NUM_SIMD_PER_CU;
+
+ return 0;
+}
+
+const struct amdgpu_ip_block_version gfx_v11_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_GFX,
+ .major = 11,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &gfx_v11_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h
new file mode 100644
index 000000000000..157a5c812259
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2021 dvanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V11_0_H__
+#define __GFX_V11_0_H__
+
+extern const struct amdgpu_ip_block_version gfx_v11_0_ip_block;
+
+int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev,
+ bool req);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c
new file mode 100644
index 000000000000..999bb3cc88b7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "soc21.h"
+#include "gc/gc_11_0_3_offset.h"
+#include "gc/gc_11_0_3_sh_mask.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "gfx_v11_0.h"
+
+
+static int gfx_v11_0_3_rlc_gc_fed_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t rlc_status0 = 0, rlc_status1 = 0;
+ struct ras_common_if *ras_if = NULL;
+ struct ras_dispatch_if ih_data = {
+ .entry = entry,
+ };
+
+ rlc_status0 = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_RLCS_FED_STATUS_0));
+ rlc_status1 = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_RLCS_FED_STATUS_1));
+
+ if (!rlc_status0 && !rlc_status1) {
+ dev_warn(adev->dev, "RLC_GC_FED irq is generated, but rlc_status0 and rlc_status1 are empty!\n");
+ return 0;
+ }
+
+ /* Use RLC_RLCS_FED_STATUS_0/1 to distinguish FED error block. */
+ if (REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA0_FED_ERR) ||
+ REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA1_FED_ERR))
+ ras_if = adev->sdma.ras_if;
+ else
+ ras_if = adev->gfx.ras_if;
+
+ if (!ras_if) {
+ dev_err(adev->dev, "Gfx or sdma ras block not initialized, rlc_status0:0x%x.\n",
+ rlc_status0);
+ return -EINVAL;
+ }
+
+ dev_warn(adev->dev, "RLC %s FED IRQ\n", ras_if->name);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ ih_data.head = *ras_if;
+ amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+ } else {
+ if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
+ adev->virt.ops->ras_poison_handler(adev, ras_if->block);
+ else
+ dev_warn(adev->dev,
+ "No ras_poison_handler interface in SRIOV for %s!\n", ras_if->name);
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_3_poison_consumption_handler(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ /* Workaround: when vmid and pasid are both zero, trigger gpu reset in KGD. */
+ if (entry && (entry->client_id == SOC21_IH_CLIENTID_GFX) &&
+ (entry->src_id == GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT) &&
+ !entry->vmid && !entry->pasid) {
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ uint32_t rlc_status0 = 0;
+
+ rlc_status0 = RREG32_SOC15(GC, 0, regRLC_RLCS_FED_STATUS_0);
+
+ if (REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA0_FED_ERR) ||
+ REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA1_FED_ERR)) {
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ }
+
+ if (con && !amdgpu_ras_is_rma(adev))
+ amdgpu_ras_reset_gpu(adev);
+ }
+
+ return 0;
+}
+
+struct amdgpu_gfx_ras gfx_v11_0_3_ras = {
+ .rlc_gc_fed_irq = gfx_v11_0_3_rlc_gc_fed_irq,
+ .poison_consumption_handler = gfx_v11_0_3_poison_consumption_handler,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.h b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.h
new file mode 100644
index 000000000000..672c7920b3d0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V11_0_3_H__
+#define __GFX_V11_0_3_H__
+
+extern struct amdgpu_gfx_ras gfx_v11_0_3_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm
new file mode 100644
index 000000000000..9b90b66368c7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader.
+//To turn this shader program on for complitaion change this to main and lower shader main to main_1
+
+// Navi3 : Clear SGPRs, VGPRs and LDS
+// Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot
+// Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD
+// Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS)
+// It takes 2 workgroups to use all of LDS: one on each CU of the WGP
+// Each wave clears SGPRs 0 - 107
+// Each wave clears VGPRs 0 - 63
+// The first wave of the workgroup clears its 64KB of LDS
+// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup
+// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared.
+
+shader main
+ asic(GFX11)
+ type(CS)
+ wave_size(32)
+// Note: original source code from SQ team
+
+// Takes about 2500 clocks to run.
+// (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks)
+//
+ S_BARRIER
+
+ //
+ // CLEAR VGPRs
+ //
+ s_mov_b32 m0, 0x00000058 // Loop 96/8=12 times (loop unrolled for performance)
+
+label_0005:
+ v_movreld_b32 v0, 0
+ v_movreld_b32 v1, 0
+ v_movreld_b32 v2, 0
+ v_movreld_b32 v3, 0
+ v_movreld_b32 v4, 0
+ v_movreld_b32 v5, 0
+ v_movreld_b32 v6, 0
+ v_movreld_b32 v7, 0
+ s_sub_u32 m0, m0, 8
+ s_cbranch_scc0 label_0005
+ //
+ //
+
+ s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
+ s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
+ s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup
+ // CLEAR LDS
+ //
+ s_mov_b32 exec_lo, 0xffffffff
+ s_mov_b32 exec_hi, 0xffffffff
+ v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63)
+ v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63)
+ v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte)
+ s_mov_b32 s2, 0x00000003f // 64 loop iterations
+ s_mov_b32 m0, 0xffffffff
+ // Clear all of LDS space
+ // Each FirstWave of WorkGroup clears 64kbyte block
+
+label_001F:
+ ds_write2_b64 v1, v[2:3], v[2:3] offset1:32
+ ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96
+ v_add_co_u32 v1, vcc, 0x00000400, v1
+ s_sub_u32 s2, s2, 1
+ s_cbranch_scc0 label_001F
+ //
+ // CLEAR SGPRs
+ //
+label_0023:
+ s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance)
+label_sgpr_loop:
+ s_movreld_b32 s0, 0
+ s_movreld_b32 s1, 0
+ s_movreld_b32 s2, 0
+ s_movreld_b32 s3, 0
+ s_sub_u32 m0, m0, 4
+ s_cbranch_scc0 label_sgpr_loop
+
+ //clear vcc
+ s_mov_b64 vcc, 0 //clear vcc
+ s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR
+ s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR
+ s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1
+ s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3
+ s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5
+ s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7
+ s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9
+ s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11
+ s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13
+ s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15
+
+ s_endpgm
+
+end
+
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h
new file mode 100644
index 000000000000..3218cc04f543
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* Define the cleaner shader gfx_11_0_3 */
+static const u32 gfx_11_0_3_cleaner_shader_hex[] = {
+ 0xb0804006, 0xbe8200ff,
+ 0x00000058, 0xbefd0080,
+ 0x7e008480, 0x7e028480,
+ 0x7e048480, 0x7e068480,
+ 0x7e088480, 0x7e0a8480,
+ 0x7e0c8480, 0x7e0e8480,
+ 0xbefd0002, 0x80828802,
+ 0xbfa1fff5, 0xbe8200ff,
+ 0x80000000, 0x8b020002,
+ 0xbfa10012, 0xbefe00c1,
+ 0xbeff00c1, 0xd71f0001,
+ 0x0001007f, 0xd7200001,
+ 0x0002027e, 0x16020288,
+ 0xbe8200bf, 0xbefd00c1,
+ 0xd9382000, 0x00020201,
+ 0xd9386040, 0x00040401,
+ 0xd7006a01, 0x000202ff,
+ 0x00000400, 0x80828102,
+ 0xbfa1fff7, 0xbefd00ff,
+ 0x00000068, 0xbe804280,
+ 0xbe814280, 0xbe824280,
+ 0xbe834280, 0x80fd847d,
+ 0xbfa1fffa, 0xbeea0180,
+ 0xbeec0180, 0xbeee0180,
+ 0xbef00180, 0xbef20180,
+ 0xbef40180, 0xbef60180,
+ 0xbef80180, 0xbefa0180,
+ 0xbfb00000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
new file mode 100644
index 000000000000..d01d2712cf57
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -0,0 +1,5793 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_smu.h"
+#include "imu_v12_0.h"
+#include "soc24.h"
+#include "nvd.h"
+
+#include "gc/gc_12_0_0_offset.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "soc24_enum.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_12_0_0.h"
+
+#include "soc15.h"
+#include "clearstate_gfx12.h"
+#include "v12_structs.h"
+#include "gfx_v12_0.h"
+#include "nbif_v6_3_1.h"
+#include "mes_v12_0.h"
+#include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
+
+#define GFX12_NUM_GFX_RINGS 1
+#define GFX12_MEC_HPD_SIZE 2048
+
+#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
+
+#define regCP_GFX_MQD_CONTROL_DEFAULT 0x00000100
+#define regCP_GFX_HQD_VMID_DEFAULT 0x00000000
+#define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT 0x00000000
+#define regCP_GFX_HQD_QUANTUM_DEFAULT 0x00000a01
+#define regCP_GFX_HQD_CNTL_DEFAULT 0x00f00000
+#define regCP_RB_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_GFX_HQD_RPTR_DEFAULT 0x00000000
+
+#define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000006
+#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_MQD_CONTROL_DEFAULT 0x00000100
+#define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509
+#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000
+#define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0be05501
+#define regCP_HQD_IB_CONTROL_DEFAULT 0x00300000
+
+
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_toc.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_rlc_kicker.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_toc.bin");
+
+static const struct amdgpu_hwip_reg_entry gc_reg_list_12_0[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2),
+ SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES),
+ SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS_LO32),
+ SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS_HI32),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR0),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_RS64_INSTR_PNTR),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ /* SE status registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3)
+};
+
+static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_12[] = {
+ /* compute registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+};
+
+static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = {
+ /* gfx queue registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+};
+
+static const struct soc15_reg_golden golden_settings_gc_12_0_rev0[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x0000000f, 0x0000000f),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regCB_HW_CONTROL_1, 0x03000000, 0x03000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL5, 0x00000070, 0x00000020)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_12_0[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x00008000, 0x00008000),
+};
+
+#define DEFAULT_SH_MEM_CONFIG \
+ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
+ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
+ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
+
+static void gfx_v12_0_disable_gpa_mode(struct amdgpu_device *adev);
+static void gfx_v12_0_set_ring_funcs(struct amdgpu_device *adev);
+static void gfx_v12_0_set_irq_funcs(struct amdgpu_device *adev);
+static void gfx_v12_0_set_rlc_funcs(struct amdgpu_device *adev);
+static void gfx_v12_0_set_mqd_funcs(struct amdgpu_device *adev);
+static void gfx_v12_0_set_imu_funcs(struct amdgpu_device *adev);
+static int gfx_v12_0_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info);
+static uint64_t gfx_v12_0_get_gpu_clock_counter(struct amdgpu_device *adev);
+static void gfx_v12_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance, int xcc_id);
+static u32 gfx_v12_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
+
+static void gfx_v12_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
+static void gfx_v12_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val);
+static int gfx_v12_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
+static void gfx_v12_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint8_t dst_sel);
+static void gfx_v12_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
+static void gfx_v12_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
+static void gfx_v12_0_update_perf_clk(struct amdgpu_device *adev,
+ bool enable);
+
+static void gfx_v12_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
+ uint64_t queue_mask)
+{
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
+ amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
+ PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
+ amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* oac mask */
+ amdgpu_ring_write(kiq_ring, 0);
+}
+
+static void gfx_v12_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring)
+{
+ uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+ uint64_t wptr_addr = ring->wptr_gpu_addr;
+ uint32_t me = 0, eng_sel = 0;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_COMPUTE:
+ me = 1;
+ eng_sel = 0;
+ break;
+ case AMDGPU_RING_TYPE_GFX:
+ me = 0;
+ eng_sel = 4;
+ break;
+ case AMDGPU_RING_TYPE_MES:
+ me = 2;
+ eng_sel = 5;
+ break;
+ default:
+ WARN_ON(1);
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
+ PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
+ PACKET3_MAP_QUEUES_ME((me)) |
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+ amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+}
+
+static void gfx_v12_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
+ amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
+ return;
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_UNMAP_QUEUES_ACTION(action) |
+ PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
+ PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
+ PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+
+ if (action == PREEMPT_QUEUES_NO_UNMAP) {
+ amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, seq);
+ } else {
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ }
+}
+
+static void gfx_v12_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ u64 addr, u64 seq)
+{
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
+ PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
+ PACKET3_QUERY_STATUS_COMMAND(2));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
+ PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
+}
+
+static void gfx_v12_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+ uint16_t pasid,
+ uint32_t flush_type,
+ bool all_hub)
+{
+ gfx_v12_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
+}
+
+static const struct kiq_pm4_funcs gfx_v12_0_kiq_pm4_funcs = {
+ .kiq_set_resources = gfx_v12_0_kiq_set_resources,
+ .kiq_map_queues = gfx_v12_0_kiq_map_queues,
+ .kiq_unmap_queues = gfx_v12_0_kiq_unmap_queues,
+ .kiq_query_status = gfx_v12_0_kiq_query_status,
+ .kiq_invalidate_tlbs = gfx_v12_0_kiq_invalidate_tlbs,
+ .set_resources_size = 8,
+ .map_queues_size = 7,
+ .unmap_queues_size = 6,
+ .query_status_size = 7,
+ .invalidate_tlbs_size = 2,
+};
+
+static void gfx_v12_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.kiq[0].pmf = &gfx_v12_0_kiq_pm4_funcs;
+}
+
+static void gfx_v12_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
+ int mem_space, int opt, uint32_t addr0,
+ uint32_t addr1, uint32_t ref,
+ uint32_t mask, uint32_t inv)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+ amdgpu_ring_write(ring,
+ /* memory (1) or register (0) */
+ (WAIT_REG_MEM_MEM_SPACE(mem_space) |
+ WAIT_REG_MEM_OPERATION(opt) | /* wait */
+ WAIT_REG_MEM_FUNCTION(3) | /* equal */
+ WAIT_REG_MEM_ENGINE(eng_sel)));
+
+ if (mem_space)
+ BUG_ON(addr0 & 0x3); /* Dword align */
+ amdgpu_ring_write(ring, addr0);
+ amdgpu_ring_write(ring, addr1);
+ amdgpu_ring_write(ring, ref);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, inv); /* poll interval */
+}
+
+static int gfx_v12_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ WREG32(scratch, 0xCAFEDEAD);
+ r = amdgpu_ring_alloc(ring, 5);
+ if (r) {
+ dev_err(adev->dev,
+ "amdgpu: cp failed to lock ring %d (%d).\n",
+ ring->idx, r);
+ return r;
+ }
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
+ gfx_v12_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
+ } else {
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+ amdgpu_ring_write(ring, scratch -
+ PACKET3_SET_UCONFIG_REG_START);
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ }
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(scratch);
+ if (tmp == 0xDEADBEEF)
+ break;
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ else
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+ return r;
+}
+
+static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ unsigned index;
+ uint64_t gpu_addr;
+ uint32_t *cpu_ptr;
+ long r;
+
+ /* MES KIQ fw hasn't indirect buffer support for now */
+ if (adev->enable_mes_kiq &&
+ ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ return 0;
+
+ memset(&ib, 0, sizeof(ib));
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+ cpu_ptr = &adev->wb.wb[index];
+
+ r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r);
+ goto err1;
+ }
+
+ ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
+ ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
+ ib.ptr[2] = lower_32_bits(gpu_addr);
+ ib.ptr[3] = upper_32_bits(gpu_addr);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.length_dw = 5;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err2;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto err2;
+ } else if (r < 0) {
+ goto err2;
+ }
+
+ if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+err2:
+ amdgpu_ib_free(&ib, NULL);
+ dma_fence_put(f);
+err1:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+static void gfx_v12_0_free_microcode(struct amdgpu_device *adev)
+{
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+
+ kfree(adev->gfx.rlc.register_list_format);
+}
+
+static int gfx_v12_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix)
+{
+ const struct psp_firmware_header_v1_0 *toc_hdr;
+ int err = 0;
+
+ err = amdgpu_ucode_request(adev, &adev->psp.toc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_toc.bin", ucode_prefix);
+ if (err)
+ goto out;
+
+ toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
+ adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
+ adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
+ adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
+ adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
+ le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
+ return 0;
+out:
+ amdgpu_ucode_release(&adev->psp.toc_fw);
+ return err;
+}
+
+static int gfx_v12_0_init_microcode(struct amdgpu_device *adev)
+{
+ char ucode_prefix[30];
+ int err;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ uint16_t version_major;
+ uint16_t version_minor;
+
+ DRM_DEBUG("\n");
+
+ amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_pfp.bin", ucode_prefix);
+ if (err)
+ goto out;
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
+
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_me.bin", ucode_prefix);
+ if (err)
+ goto out;
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc_kicker.bin", ucode_prefix);
+ else
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc.bin", ucode_prefix);
+ if (err)
+ goto out;
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+ version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+ err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
+ if (err)
+ goto out;
+ }
+
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", ucode_prefix);
+ if (err)
+ goto out;
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+ err = gfx_v12_0_init_toc_microcode(adev, ucode_prefix);
+
+ /* only one MEC for gfx 12 */
+ adev->gfx.mec2_fw = NULL;
+
+ if (adev->gfx.imu.funcs) {
+ if (adev->gfx.imu.funcs->init_microcode) {
+ err = adev->gfx.imu.funcs->init_microcode(adev);
+ if (err)
+ dev_err(adev->dev, "Failed to load imu firmware!\n");
+ }
+ }
+
+out:
+ if (err) {
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ }
+
+ return err;
+}
+
+static u32 gfx_v12_0_get_csb_size(struct amdgpu_device *adev)
+{
+ u32 count = 0;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+
+ count += 1;
+
+ for (sect = gfx12_cs_data; sect->section != NULL; ++sect) {
+ if (sect->id == SECT_CONTEXT) {
+ for (ext = sect->section; ext->extent != NULL; ++ext)
+ count += 2 + ext->reg_count;
+ } else
+ return 0;
+ }
+
+ return count;
+}
+
+static void gfx_v12_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
+{
+ u32 count = 0, clustercount = 0, i;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+
+ if (adev->gfx.rlc.cs_data == NULL)
+ return;
+ if (buffer == NULL)
+ return;
+
+ count += 1;
+
+ for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
+ if (sect->id == SECT_CONTEXT) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ clustercount++;
+ buffer[count++] = ext->reg_count;
+ buffer[count++] = ext->reg_index;
+
+ for (i = 0; i < ext->reg_count; i++)
+ buffer[count++] = cpu_to_le32(ext->extent[i]);
+ }
+ } else
+ return;
+ }
+
+ buffer[0] = clustercount;
+}
+
+static void gfx_v12_0_rlc_fini(struct amdgpu_device *adev)
+{
+ /* clear state block */
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
+ &adev->gfx.rlc.clear_state_gpu_addr,
+ (void **)&adev->gfx.rlc.cs_ptr);
+
+ /* jump table block */
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
+ &adev->gfx.rlc.cp_table_gpu_addr,
+ (void **)&adev->gfx.rlc.cp_table_ptr);
+}
+
+static void gfx_v12_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
+{
+ struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
+
+ reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
+ reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
+ reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
+ reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
+ reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
+ reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
+ reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
+ reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
+ adev->gfx.rlc.rlcg_reg_access_supported = true;
+}
+
+static int gfx_v12_0_rlc_init(struct amdgpu_device *adev)
+{
+ const struct cs_section_def *cs_data;
+ int r;
+
+ adev->gfx.rlc.cs_data = gfx12_cs_data;
+
+ cs_data = adev->gfx.rlc.cs_data;
+
+ if (cs_data) {
+ /* init clear state block */
+ r = amdgpu_gfx_rlc_init_csb(adev);
+ if (r)
+ return r;
+ }
+
+ /* init spm vmid with 0xf */
+ if (adev->gfx.rlc.funcs->update_spm_vmid)
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
+
+ return 0;
+}
+
+static void gfx_v12_0_mec_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
+}
+
+static void gfx_v12_0_me_init(struct amdgpu_device *adev)
+{
+ bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
+
+ amdgpu_gfx_graphics_queue_acquire(adev);
+}
+
+static int gfx_v12_0_mec_init(struct amdgpu_device *adev)
+{
+ int r;
+ u32 *hpd;
+ size_t mec_hpd_size;
+
+ bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+ /* take ownership of the relevant compute queues */
+ amdgpu_gfx_compute_queue_acquire(adev);
+ mec_hpd_size = adev->gfx.num_compute_rings * GFX12_MEC_HPD_SIZE;
+
+ if (mec_hpd_size) {
+ r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.hpd_eop_obj,
+ &adev->gfx.mec.hpd_eop_gpu_addr,
+ (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+ gfx_v12_0_mec_fini(adev);
+ return r;
+ }
+
+ memset(hpd, 0, mec_hpd_size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ }
+
+ return 0;
+}
+
+static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
+{
+ WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (address << SQ_IND_INDEX__INDEX__SHIFT));
+ return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
+}
+
+static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
+ uint32_t thread, uint32_t regno,
+ uint32_t num, uint32_t *out)
+{
+ WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (regno << SQ_IND_INDEX__INDEX__SHIFT) |
+ (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
+ (SQ_IND_INDEX__AUTO_INCR_MASK));
+ while (num--)
+ *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
+}
+
+static void gfx_v12_0_read_wave_data(struct amdgpu_device *adev,
+ uint32_t xcc_id,
+ uint32_t simd, uint32_t wave,
+ uint32_t *dst, int *no_fields)
+{
+ /* in gfx12 the SIMD_ID is specified as part of the INSTANCE
+ * field when performing a select_se_sh so it should be
+ * zero here */
+ WARN_ON(simd != 0);
+
+ /* type 4 wave data */
+ dst[(*no_fields)++] = 4;
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATE_PRIV);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXCP_FLAG_PRIV);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXCP_FLAG_USER);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAP_CTRL);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_ACTIVE);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_VALID_AND_IDLE);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_DVGPR_ALLOC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_DVGPR_ALLOC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_SCHED_MODE);
+}
+
+static void gfx_v12_0_read_wave_sgprs(struct amdgpu_device *adev,
+ uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t start,
+ uint32_t size, uint32_t *dst)
+{
+ WARN_ON(simd != 0);
+
+ wave_read_regs(
+ adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
+ dst);
+}
+
+static void gfx_v12_0_read_wave_vgprs(struct amdgpu_device *adev,
+ uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t thread,
+ uint32_t start, uint32_t size,
+ uint32_t *dst)
+{
+ wave_read_regs(
+ adev, wave, thread,
+ start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
+}
+
+static void gfx_v12_0_select_me_pipe_q(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
+{
+ soc24_grbm_select(adev, me, pipe, q, vm);
+}
+
+/* all sizes are in bytes */
+#define MQD_SHADOW_BASE_SIZE 73728
+#define MQD_SHADOW_BASE_ALIGNMENT 256
+#define MQD_FWWORKAREA_SIZE 484
+#define MQD_FWWORKAREA_ALIGNMENT 256
+
+static void gfx_v12_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev,
+ struct amdgpu_gfx_shadow_info *shadow_info)
+{
+ shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
+ shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
+ shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
+ shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
+}
+
+static int gfx_v12_0_get_gfx_shadow_info(struct amdgpu_device *adev,
+ struct amdgpu_gfx_shadow_info *shadow_info,
+ bool skip_check)
+{
+ if (adev->gfx.cp_gfx_shadow || skip_check) {
+ gfx_v12_0_get_gfx_shadow_info_nocheck(adev, shadow_info);
+ return 0;
+ }
+
+ memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
+ return -EINVAL;
+}
+
+static const struct amdgpu_gfx_funcs gfx_v12_0_gfx_funcs = {
+ .get_gpu_clock_counter = &gfx_v12_0_get_gpu_clock_counter,
+ .select_se_sh = &gfx_v12_0_select_se_sh,
+ .read_wave_data = &gfx_v12_0_read_wave_data,
+ .read_wave_sgprs = &gfx_v12_0_read_wave_sgprs,
+ .read_wave_vgprs = &gfx_v12_0_read_wave_vgprs,
+ .select_me_pipe_q = &gfx_v12_0_select_me_pipe_q,
+ .update_perfmon_mgcg = &gfx_v12_0_update_perf_clk,
+ .get_gfx_shadow_info = &gfx_v12_0_get_gfx_shadow_info,
+};
+
+static int gfx_v12_0_gpu_early_init(struct amdgpu_device *adev)
+{
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
+ int me, int pipe, int queue)
+{
+ int r;
+ struct amdgpu_ring *ring;
+ unsigned int irq_type;
+
+ ring = &adev->gfx.gfx_ring[ring_id];
+
+ ring->me = me;
+ ring->pipe = pipe;
+ ring->queue = queue;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+
+ if (!ring_id)
+ ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+ else
+ ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ return 0;
+}
+
+static int gfx_v12_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+ int mec, int pipe, int queue)
+{
+ int r;
+ unsigned irq_type;
+ struct amdgpu_ring *ring;
+ unsigned int hw_prio;
+
+ ring = &adev->gfx.compute_ring[ring_id];
+
+ /* mec0 is me1 */
+ ring->me = mec + 1;
+ ring->pipe = pipe;
+ ring->queue = queue;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
+ ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+ + (ring_id * GFX12_MEC_HPD_SIZE);
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ + ring->pipe;
+ hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
+ AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ /* type-2 packets are deprecated on MEC, use type-3 instead */
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ hw_prio, NULL);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static struct {
+ SOC24_FIRMWARE_ID id;
+ unsigned int offset;
+ unsigned int size;
+ unsigned int size_x16;
+} rlc_autoload_info[SOC24_FIRMWARE_ID_MAX];
+
+#define RLC_TOC_OFFSET_DWUNIT 8
+#define RLC_SIZE_MULTIPLE 1024
+#define RLC_TOC_UMF_SIZE_inM 23ULL
+#define RLC_TOC_FORMAT_API 165ULL
+
+static void gfx_v12_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
+{
+ RLC_TABLE_OF_CONTENT_V2 *ucode = rlc_toc;
+
+ while (ucode && (ucode->id > SOC24_FIRMWARE_ID_INVALID)) {
+ rlc_autoload_info[ucode->id].id = ucode->id;
+ rlc_autoload_info[ucode->id].offset =
+ ucode->offset * RLC_TOC_OFFSET_DWUNIT * 4;
+ rlc_autoload_info[ucode->id].size =
+ ucode->size_x16 ? ucode->size * RLC_SIZE_MULTIPLE * 4 :
+ ucode->size * 4;
+ ucode++;
+ }
+}
+
+static uint32_t gfx_v12_0_calc_toc_total_size(struct amdgpu_device *adev)
+{
+ uint32_t total_size = 0;
+ SOC24_FIRMWARE_ID id;
+
+ gfx_v12_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
+
+ for (id = SOC24_FIRMWARE_ID_RLC_G_UCODE; id < SOC24_FIRMWARE_ID_MAX; id++)
+ total_size += rlc_autoload_info[id].size;
+
+ /* In case the offset in rlc toc ucode is aligned */
+ if (total_size < rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset)
+ total_size = rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset +
+ rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].size;
+ if (total_size < (RLC_TOC_UMF_SIZE_inM << 20))
+ total_size = RLC_TOC_UMF_SIZE_inM << 20;
+
+ return total_size;
+}
+
+static int gfx_v12_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
+{
+ int r;
+ uint32_t total_size;
+
+ total_size = gfx_v12_0_calc_toc_total_size(adev);
+
+ r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.rlc.rlc_autoload_bo,
+ &adev->gfx.rlc.rlc_autoload_gpu_addr,
+ (void **)&adev->gfx.rlc.rlc_autoload_ptr);
+
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v12_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
+ SOC24_FIRMWARE_ID id,
+ const void *fw_data,
+ uint32_t fw_size)
+{
+ uint32_t toc_offset;
+ uint32_t toc_fw_size;
+ char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
+
+ if (id <= SOC24_FIRMWARE_ID_INVALID || id >= SOC24_FIRMWARE_ID_MAX)
+ return;
+
+ toc_offset = rlc_autoload_info[id].offset;
+ toc_fw_size = rlc_autoload_info[id].size;
+
+ if (fw_size == 0)
+ fw_size = toc_fw_size;
+
+ if (fw_size > toc_fw_size)
+ fw_size = toc_fw_size;
+
+ memcpy(ptr + toc_offset, fw_data, fw_size);
+
+ if (fw_size < toc_fw_size)
+ memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
+}
+
+static void
+gfx_v12_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev)
+{
+ void *data;
+ uint32_t size;
+ uint32_t *toc_ptr;
+
+ data = adev->psp.toc.start_addr;
+ size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_TOC].size;
+
+ toc_ptr = (uint32_t *)data + size / 4 - 2;
+ *toc_ptr = (RLC_TOC_FORMAT_API << 24) | 0x1;
+
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_TOC,
+ data, size);
+}
+
+static void
+gfx_v12_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev)
+{
+ const __le32 *fw_data;
+ uint32_t fw_size;
+ const struct gfx_firmware_header_v2_0 *cpv2_hdr;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ const struct rlc_firmware_header_v2_1 *rlcv21_hdr;
+ const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
+ uint16_t version_major, version_minor;
+
+ /* pfp ucode */
+ cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+ /* instruction */
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_PFP,
+ fw_data, fw_size);
+ /* data */
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_PFP_P0_STACK,
+ fw_data, fw_size);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_PFP_P1_STACK,
+ fw_data, fw_size);
+ /* me ucode */
+ cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+ /* instruction */
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_ME,
+ fw_data, fw_size);
+ /* data */
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_ME_P0_STACK,
+ fw_data, fw_size);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_ME_P1_STACK,
+ fw_data, fw_size);
+ /* mec ucode */
+ cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+ /* instruction */
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC,
+ fw_data, fw_size);
+ /* data */
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P0_STACK,
+ fw_data, fw_size);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P1_STACK,
+ fw_data, fw_size);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P2_STACK,
+ fw_data, fw_size);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P3_STACK,
+ fw_data, fw_size);
+
+ /* rlc ucode */
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
+ adev->gfx.rlc_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_G_UCODE,
+ fw_data, fw_size);
+
+ version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+ version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+ if (version_major == 2) {
+ if (version_minor >= 1) {
+ rlcv21_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_offset_bytes));
+ fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLCG_SCRATCH,
+ fw_data, fw_size);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlcv21_hdr->save_restore_list_srm_offset_bytes));
+ fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_srm_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_SRM_ARAM,
+ fw_data, fw_size);
+ }
+ if (version_minor >= 2) {
+ rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_UCODE,
+ fw_data, fw_size);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_DRAM_BOOT,
+ fw_data, fw_size);
+ }
+ }
+}
+
+static void
+gfx_v12_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev)
+{
+ const __le32 *fw_data;
+ uint32_t fw_size;
+ const struct sdma_firmware_header_v3_0 *sdma_hdr;
+
+ sdma_hdr = (const struct sdma_firmware_header_v3_0 *)
+ adev->sdma.instance[0].fw->data;
+ fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
+ le32_to_cpu(sdma_hdr->ucode_offset_bytes));
+ fw_size = le32_to_cpu(sdma_hdr->ucode_size_bytes);
+
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_SDMA_UCODE_TH0,
+ fw_data, fw_size);
+}
+
+static void
+gfx_v12_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev)
+{
+ const __le32 *fw_data;
+ unsigned fw_size;
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ int pipe, ucode_id, data_id;
+
+ for (pipe = 0; pipe < 2; pipe++) {
+ if (pipe == 0) {
+ ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P0;
+ data_id = SOC24_FIRMWARE_ID_RS64_MES_P0_STACK;
+ } else {
+ ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P1;
+ data_id = SOC24_FIRMWARE_ID_RS64_MES_P1_STACK;
+ }
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw[pipe]->data;
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
+
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, ucode_id, fw_data, fw_size);
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
+
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, data_id, fw_data, fw_size);
+ }
+}
+
+static int gfx_v12_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
+{
+ uint32_t rlc_g_offset, rlc_g_size;
+ uint64_t gpu_addr;
+ uint32_t data;
+
+ /* RLC autoload sequence 2: copy ucode */
+ gfx_v12_0_rlc_backdoor_autoload_copy_sdma_ucode(adev);
+ gfx_v12_0_rlc_backdoor_autoload_copy_gfx_ucode(adev);
+ gfx_v12_0_rlc_backdoor_autoload_copy_mes_ucode(adev);
+ gfx_v12_0_rlc_backdoor_autoload_copy_toc_ucode(adev);
+
+ rlc_g_offset = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].offset;
+ rlc_g_size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].size;
+ gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset - adev->gmc.vram_start;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
+
+ if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
+ /* RLC autoload sequence 3: load IMU fw */
+ if (adev->gfx.imu.funcs->load_microcode)
+ adev->gfx.imu.funcs->load_microcode(adev);
+ /* RLC autoload sequence 4 init IMU fw */
+ if (adev->gfx.imu.funcs->setup_imu)
+ adev->gfx.imu.funcs->setup_imu(adev);
+ if (adev->gfx.imu.funcs->start_imu)
+ adev->gfx.imu.funcs->start_imu(adev);
+
+ /* RLC autoload sequence 5 disable gpa mode */
+ gfx_v12_0_disable_gpa_mode(adev);
+ } else {
+ /* unhalt rlc to start autoload without imu */
+ data = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
+ data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD0_ENABLE, 1);
+ data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, data);
+ WREG32_SOC15(GC, 0, regRLC_CNTL, RLC_CNTL__RLC_ENABLE_F32_MASK);
+ }
+
+ return 0;
+}
+
+static void gfx_v12_0_alloc_ip_dump(struct amdgpu_device *adev)
+{
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0);
+ uint32_t *ptr;
+ uint32_t inst;
+
+ ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
+ adev->gfx.ip_dump_core = NULL;
+ } else {
+ adev->gfx.ip_dump_core = ptr;
+ }
+
+ /* Allocate memory for compute queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_12);
+ inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
+ adev->gfx.ip_dump_compute_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_compute_queues = ptr;
+ }
+
+ /* Allocate memory for gfx queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12);
+ inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
+ adev->gfx.me.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n");
+ adev->gfx.ip_dump_gfx_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_gfx_queues = ptr;
+ }
+}
+
+static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ int i, j, k, r, ring_id = 0;
+ unsigned num_compute_rings;
+ int xcc_id = 0;
+ struct amdgpu_device *adev = ip_block->adev;
+ int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
+
+ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ adev->gfx.me.num_me = 1;
+ adev->gfx.me.num_pipe_per_me = 1;
+ adev->gfx.me.num_queue_per_pipe = 8;
+ adev->gfx.mec.num_mec = 1;
+ adev->gfx.mec.num_pipe_per_mec = 2;
+ adev->gfx.mec.num_queue_per_pipe = 4;
+ break;
+ default:
+ adev->gfx.me.num_me = 1;
+ adev->gfx.me.num_pipe_per_me = 1;
+ adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.mec.num_mec = 1;
+ adev->gfx.mec.num_pipe_per_mec = 4;
+ adev->gfx.mec.num_queue_per_pipe = 8;
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ if (!adev->gfx.disable_uq &&
+ adev->gfx.me_fw_version >= 2780 &&
+ adev->gfx.pfp_fw_version >= 2840 &&
+ adev->gfx.mec_fw_version >= 3050 &&
+ adev->mes.fw_version[0] >= 123) {
+ adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
+ adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
+ }
+ break;
+ default:
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ if (adev->gfx.me_fw_version >= 2480 &&
+ adev->gfx.pfp_fw_version >= 2530 &&
+ adev->gfx.mec_fw_version >= 2680 &&
+ adev->mes.fw_version[0] >= 100)
+ adev->gfx.enable_cleaner_shader = true;
+ break;
+ default:
+ adev->gfx.enable_cleaner_shader = false;
+ break;
+ }
+
+ if (adev->gfx.num_compute_rings) {
+ /* recalculate compute rings to use based on hardware configuration */
+ num_compute_rings = (adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe) / 2;
+ adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings,
+ num_compute_rings);
+ }
+
+ /* EOP Event */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ GFX_12_0_0__SRCID__CP_EOP_INTERRUPT,
+ &adev->gfx.eop_irq);
+ if (r)
+ return r;
+
+ /* Bad opcode Event */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ GFX_12_0_0__SRCID__CP_BAD_OPCODE_ERROR,
+ &adev->gfx.bad_op_irq);
+ if (r)
+ return r;
+
+ /* Privileged reg */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ GFX_12_0_0__SRCID__CP_PRIV_REG_FAULT,
+ &adev->gfx.priv_reg_irq);
+ if (r)
+ return r;
+
+ /* Privileged inst */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ GFX_12_0_0__SRCID__CP_PRIV_INSTR_FAULT,
+ &adev->gfx.priv_inst_irq);
+ if (r)
+ return r;
+
+ adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
+
+ gfx_v12_0_me_init(adev);
+
+ r = gfx_v12_0_rlc_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to init rlc BOs!\n");
+ return r;
+ }
+
+ r = gfx_v12_0_mec_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to init MEC BOs!\n");
+ return r;
+ }
+
+ if (adev->gfx.num_gfx_rings) {
+ /* set up the gfx ring */
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
+ if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
+ continue;
+
+ r = gfx_v12_0_gfx_ring_init(adev, ring_id,
+ i, k, j);
+ if (r)
+ return r;
+ ring_id++;
+ }
+ }
+ }
+ }
+
+ if (adev->gfx.num_compute_rings) {
+ ring_id = 0;
+ /* set up the compute queues - allocate horizontally across pipes */
+ for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+ for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev,
+ 0, i, k, j))
+ continue;
+
+ r = gfx_v12_0_compute_ring_init(adev, ring_id,
+ i, k, j);
+ if (r)
+ return r;
+
+ ring_id++;
+ }
+ }
+ }
+ }
+
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ if ((adev->gfx.me_fw_version >= 2660) &&
+ (adev->gfx.mec_fw_version >= 2920) &&
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (!adev->enable_mes_kiq) {
+ r = amdgpu_gfx_kiq_init(adev, GFX12_MEC_HPD_SIZE, 0);
+ if (r) {
+ dev_err(adev->dev, "Failed to init KIQ BOs!\n");
+ return r;
+ }
+
+ r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v12_compute_mqd), 0);
+ if (r)
+ return r;
+
+ /* allocate visible FB for rlc auto-loading fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ r = gfx_v12_0_rlc_autoload_buffer_init(adev);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v12_0_gpu_early_init(adev);
+ if (r)
+ return r;
+
+ gfx_v12_0_alloc_ip_dump(adev);
+
+ r = amdgpu_gfx_sysfs_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static void gfx_v12_0_pfp_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
+ &adev->gfx.pfp.pfp_fw_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_ptr);
+
+ amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
+ &adev->gfx.pfp.pfp_fw_data_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
+}
+
+static void gfx_v12_0_me_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
+ &adev->gfx.me.me_fw_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_ptr);
+
+ amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
+ &adev->gfx.me.me_fw_data_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_data_ptr);
+}
+
+static void gfx_v12_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
+ &adev->gfx.rlc.rlc_autoload_gpu_addr,
+ (void **)&adev->gfx.rlc.rlc_autoload_ptr);
+}
+
+static int gfx_v12_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ int i;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
+
+ amdgpu_gfx_mqd_sw_fini(adev, 0);
+
+ if (!adev->enable_mes_kiq) {
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
+ amdgpu_gfx_kiq_fini(adev, 0);
+ }
+
+ gfx_v12_0_pfp_fini(adev);
+ gfx_v12_0_me_fini(adev);
+ gfx_v12_0_rlc_fini(adev);
+ gfx_v12_0_mec_fini(adev);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+ gfx_v12_0_rlc_autoload_buffer_fini(adev);
+
+ gfx_v12_0_free_microcode(adev);
+
+ amdgpu_gfx_sysfs_fini(adev);
+
+ kfree(adev->gfx.ip_dump_core);
+ kfree(adev->gfx.ip_dump_compute_queues);
+ kfree(adev->gfx.ip_dump_gfx_queues);
+
+ return 0;
+}
+
+static void gfx_v12_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance, int xcc_id)
+{
+ u32 data;
+
+ if (instance == 0xffffffff)
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
+ instance);
+
+ if (se_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
+
+ if (sh_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
+
+ WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
+}
+
+static u32 gfx_v12_0_get_sa_active_bitmap(struct amdgpu_device *adev)
+{
+ u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask;
+
+ gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regGRBM_CC_GC_SA_UNIT_DISABLE);
+ gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask,
+ GRBM_CC_GC_SA_UNIT_DISABLE,
+ SA_DISABLE);
+ gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGRBM_GC_USER_SA_UNIT_DISABLE);
+ gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask,
+ GRBM_GC_USER_SA_UNIT_DISABLE,
+ SA_DISABLE);
+ sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
+ adev->gfx.config.max_shader_engines);
+
+ return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask));
+}
+
+static u32 gfx_v12_0_get_rb_active_bitmap(struct amdgpu_device *adev)
+{
+ u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask;
+ u32 rb_mask;
+
+ gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
+ gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask,
+ CC_RB_BACKEND_DISABLE,
+ BACKEND_DISABLE);
+ gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
+ gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask,
+ GC_USER_RB_BACKEND_DISABLE,
+ BACKEND_DISABLE);
+ rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se *
+ adev->gfx.config.max_shader_engines);
+
+ return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask));
+}
+
+static void gfx_v12_0_setup_rb(struct amdgpu_device *adev)
+{
+ u32 rb_bitmap_per_sa;
+ u32 rb_bitmap_width_per_sa;
+ u32 max_sa;
+ u32 active_sa_bitmap;
+ u32 global_active_rb_bitmap;
+ u32 active_rb_bitmap = 0;
+ u32 i;
+
+ /* query sa bitmap from SA_UNIT_DISABLE registers */
+ active_sa_bitmap = gfx_v12_0_get_sa_active_bitmap(adev);
+ /* query rb bitmap from RB_BACKEND_DISABLE registers */
+ global_active_rb_bitmap = gfx_v12_0_get_rb_active_bitmap(adev);
+
+ /* generate active rb bitmap according to active sa bitmap */
+ max_sa = adev->gfx.config.max_shader_engines *
+ adev->gfx.config.max_sh_per_se;
+ rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se;
+ rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa);
+
+ for (i = 0; i < max_sa; i++) {
+ if (active_sa_bitmap & (1 << i))
+ active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa));
+ }
+
+ active_rb_bitmap &= global_active_rb_bitmap;
+ adev->gfx.config.backend_enable_mask = active_rb_bitmap;
+ adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
+}
+
+#define LDS_APP_BASE 0x1
+#define SCRATCH_APP_BASE 0x2
+
+static void gfx_v12_0_init_compute_vmid(struct amdgpu_device *adev)
+{
+ int i;
+ uint32_t sh_mem_bases;
+ uint32_t data;
+
+ /*
+ * Configure apertures:
+ * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
+ * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
+ * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
+ */
+ sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
+ SCRATCH_APP_BASE;
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ soc24_grbm_select(adev, 0, 0, 0, i);
+ /* CP and shaders */
+ WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
+ WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
+
+ /* Enable trap for each kfd vmid. */
+ data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data);
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void gfx_v12_0_tcp_harvest(struct amdgpu_device *adev)
+{
+ /* TODO: harvest feature to be added later. */
+}
+
+static void gfx_v12_0_get_tcc_info(struct amdgpu_device *adev)
+{
+}
+
+static void gfx_v12_0_constants_init(struct amdgpu_device *adev)
+{
+ u32 tmp;
+ int i;
+
+ if (!amdgpu_sriov_vf(adev))
+ WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+
+ gfx_v12_0_setup_rb(adev);
+ gfx_v12_0_get_cu_info(adev, &adev->gfx.cu_info);
+ gfx_v12_0_get_tcc_info(adev);
+ adev->gfx.config.pa_sc_tile_steering_override = 0;
+
+ /* XXX SH_MEM regs */
+ /* where to put LDS, scratch, GPUVM in FSA64 space */
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
+ soc24_grbm_select(adev, 0, 0, 0, i);
+ /* CP and shaders */
+ WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
+ if (i != 0) {
+ tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
+ (adev->gmc.private_aperture_start >> 48));
+ tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
+ (adev->gmc.shared_aperture_start >> 48));
+ WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
+ }
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+
+ mutex_unlock(&adev->srbm_mutex);
+
+ gfx_v12_0_init_compute_vmid(adev);
+}
+
+static u32 gfx_v12_0_get_cpg_int_cntl(struct amdgpu_device *adev,
+ int me, int pipe)
+{
+ if (me != 0)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
+ default:
+ return 0;
+ }
+}
+
+static u32 gfx_v12_0_get_cpc_int_cntl(struct amdgpu_device *adev,
+ int me, int pipe)
+{
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+ if (me != 1)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
+ case 1:
+ return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
+ default:
+ return 0;
+ }
+}
+
+static void gfx_v12_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp, cp_int_cntl_reg;
+ int i, j;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
+ enable ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp);
+ }
+ }
+ }
+}
+
+static int gfx_v12_0_init_csb(struct amdgpu_device *adev)
+{
+ adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
+
+ WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
+ adev->gfx.rlc.clear_state_gpu_addr >> 32);
+ WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
+ adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
+ WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
+
+ return 0;
+}
+
+static void gfx_v12_0_rlc_stop(struct amdgpu_device *adev)
+{
+ u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
+ WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
+}
+
+static void gfx_v12_0_rlc_reset(struct amdgpu_device *adev)
+{
+ WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
+ udelay(50);
+ WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
+ udelay(50);
+}
+
+static void gfx_v12_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t rlc_pg_cntl;
+
+ rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
+
+ if (!enable) {
+ /* RLC_PG_CNTL[23] = 0 (default)
+ * RLC will wait for handshake acks with SMU
+ * GFXOFF will be enabled
+ * RLC_PG_CNTL[23] = 1
+ * RLC will not issue any message to SMU
+ * hence no handshake between SMU & RLC
+ * GFXOFF will be disabled
+ */
+ rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
+ } else
+ rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
+ WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
+}
+
+static void gfx_v12_0_rlc_start(struct amdgpu_device *adev)
+{
+ /* TODO: enable rlc & smu handshake until smu
+ * and gfxoff feature works as expected */
+ if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
+ gfx_v12_0_rlc_smu_handshake_cntl(adev, false);
+
+ WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
+ udelay(50);
+}
+
+static void gfx_v12_0_rlc_enable_srm(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* enable Save Restore Machine */
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
+ tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
+ tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
+ WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
+}
+
+static void gfx_v12_0_load_rlcg_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_0 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
+ RLCG_UCODE_LOADING_START_ADDRESS);
+
+ for (i = 0; i < fw_size; i++)
+ WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
+ le32_to_cpup(fw_data++));
+
+ WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
+}
+
+static void gfx_v12_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_2 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+ u32 tmp;
+
+ hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
+
+ for (i = 0; i < fw_size; i++) {
+ if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
+ msleep(1);
+ WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
+ le32_to_cpup(fw_data++));
+ }
+
+ WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
+ for (i = 0; i < fw_size; i++) {
+ if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
+ msleep(1);
+ WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
+ le32_to_cpup(fw_data++));
+ }
+
+ WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
+
+ tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
+ tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
+ WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
+}
+
+static int gfx_v12_0_rlc_load_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_0 *hdr;
+ uint16_t version_major;
+ uint16_t version_minor;
+
+ if (!adev->gfx.rlc_fw)
+ return -EINVAL;
+
+ hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ amdgpu_ucode_print_rlc_hdr(&hdr->header);
+
+ version_major = le16_to_cpu(hdr->header.header_version_major);
+ version_minor = le16_to_cpu(hdr->header.header_version_minor);
+
+ if (version_major == 2) {
+ gfx_v12_0_load_rlcg_microcode(adev);
+ if (amdgpu_dpm == 1) {
+ if (version_minor >= 2)
+ gfx_v12_0_load_rlc_iram_dram_microcode(adev);
+ }
+
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int gfx_v12_0_rlc_resume(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ gfx_v12_0_init_csb(adev);
+
+ if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
+ gfx_v12_0_rlc_enable_srm(adev);
+ } else {
+ if (amdgpu_sriov_vf(adev)) {
+ gfx_v12_0_init_csb(adev);
+ return 0;
+ }
+
+ adev->gfx.rlc.funcs->stop(adev);
+
+ /* disable CG */
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
+
+ /* disable PG */
+ WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ /* legacy rlc firmware loading */
+ r = gfx_v12_0_rlc_load_microcode(adev);
+ if (r)
+ return r;
+ }
+
+ gfx_v12_0_init_csb(adev);
+
+ adev->gfx.rlc.funcs->start(adev);
+ }
+
+ return 0;
+}
+
+static void gfx_v12_0_config_gfx_rs64(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v2_0 *pfp_hdr;
+ const struct gfx_firmware_header_v2_0 *me_hdr;
+ const struct gfx_firmware_header_v2_0 *mec_hdr;
+ uint32_t pipe_id, tmp;
+
+ mec_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+ me_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+ pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+
+ /* config pfp program start addr */
+ for (pipe_id = 0; pipe_id < 2; pipe_id++) {
+ soc24_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
+ (pfp_hdr->ucode_start_addr_hi << 30) |
+ (pfp_hdr->ucode_start_addr_lo >> 2));
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
+ pfp_hdr->ucode_start_addr_hi >> 2);
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+
+ /* reset pfp pipe */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* clear pfp pipe reset */
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* config me program start addr */
+ for (pipe_id = 0; pipe_id < 2; pipe_id++) {
+ soc24_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
+ (me_hdr->ucode_start_addr_hi << 30) |
+ (me_hdr->ucode_start_addr_lo >> 2));
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
+ me_hdr->ucode_start_addr_hi>>2);
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+
+ /* reset me pipe */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* clear me pipe reset */
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* config mec program start addr */
+ for (pipe_id = 0; pipe_id < 4; pipe_id++) {
+ soc24_grbm_select(adev, 1, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
+ mec_hdr->ucode_start_addr_lo >> 2 |
+ mec_hdr->ucode_start_addr_hi << 30);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
+ mec_hdr->ucode_start_addr_hi >> 2);
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+
+ /* reset mec pipe */
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
+
+ /* clear mec pipe reset */
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
+}
+
+static void gfx_v12_0_set_pfp_ucode_start_addr(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v2_0 *cp_hdr;
+ unsigned pipe_id, tmp;
+
+ cp_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+ soc24_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
+ (cp_hdr->ucode_start_addr_hi << 30) |
+ (cp_hdr->ucode_start_addr_lo >> 2));
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
+ cp_hdr->ucode_start_addr_hi>>2);
+
+ /*
+ * Program CP_ME_CNTL to reset given PIPE to take
+ * effect of CP_PFP_PRGRM_CNTR_START.
+ */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* Clear pfp pipe0 reset bit. */
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 0);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void gfx_v12_0_set_me_ucode_start_addr(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v2_0 *cp_hdr;
+ unsigned pipe_id, tmp;
+
+ cp_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+ soc24_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
+ (cp_hdr->ucode_start_addr_hi << 30) |
+ (cp_hdr->ucode_start_addr_lo >> 2) );
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
+ cp_hdr->ucode_start_addr_hi>>2);
+
+ /*
+ * Program CP_ME_CNTL to reset given PIPE to take
+ * effect of CP_ME_PRGRM_CNTR_START.
+ */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE0_RESET, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* Clear pfp pipe0 reset bit. */
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE0_RESET, 0);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void gfx_v12_0_set_mec_ucode_start_addr(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v2_0 *cp_hdr;
+ unsigned pipe_id;
+
+ cp_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.mec.num_pipe_per_mec; pipe_id++) {
+ soc24_grbm_select(adev, 1, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
+ cp_hdr->ucode_start_addr_lo >> 2 |
+ cp_hdr->ucode_start_addr_hi << 30);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
+ cp_hdr->ucode_start_addr_hi >> 2);
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static int gfx_v12_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
+{
+ uint32_t cp_status;
+ uint32_t bootload_status;
+ int i;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
+ bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
+
+ if ((cp_status == 0) &&
+ (REG_GET_FIELD(bootload_status,
+ RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
+ break;
+ }
+ udelay(1);
+ if (amdgpu_emu_mode)
+ msleep(10);
+ }
+
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
+ return -ETIMEDOUT;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ gfx_v12_0_set_pfp_ucode_start_addr(adev);
+ gfx_v12_0_set_me_ucode_start_addr(adev);
+ gfx_v12_0_set_mec_ucode_start_addr(adev);
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
+{
+ int i;
+ u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
+
+ return 0;
+}
+
+static int gfx_v12_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
+{
+ int r;
+ const struct gfx_firmware_header_v2_0 *pfp_hdr;
+ const __le32 *fw_ucode, *fw_data;
+ unsigned i, pipe_id, fw_ucode_size, fw_data_size;
+ uint32_t tmp;
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+
+ pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
+
+ /* instruction */
+ fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(pfp_hdr->ucode_offset_bytes));
+ fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
+ /* data */
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(pfp_hdr->data_offset_bytes));
+ fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
+
+ /* 64kb align */
+ r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
+ 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.pfp.pfp_fw_obj,
+ &adev->gfx.pfp.pfp_fw_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
+ gfx_v12_0_pfp_fini(adev);
+ return r;
+ }
+
+ r = amdgpu_bo_create_reserved(adev, fw_data_size,
+ 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.pfp.pfp_fw_data_obj,
+ &adev->gfx.pfp.pfp_fw_data_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
+ gfx_v12_0_pfp_fini(adev);
+ return r;
+ }
+
+ memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
+ memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
+
+ amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
+ amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
+ amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
+
+ if (amdgpu_emu_mode == 1)
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
+ lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
+ upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
+
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
+
+ /*
+ * Programming any of the CP_PFP_IC_BASE registers
+ * forces invalidation of the ME L1 I$. Wait for the
+ * invalidation complete
+ */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Prime the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
+ /* Waiting for cache primed*/
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+ ICACHE_PRIMED))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to prime instruction cache\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+ soc24_grbm_select(adev, 0, pipe_id, 0, 0);
+
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
+ lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
+ upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
+
+ /* Invalidate the data caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
+
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
+ return -EINVAL;
+ }
+
+ gfx_v12_0_set_pfp_ucode_start_addr(adev);
+
+ return 0;
+}
+
+static int gfx_v12_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
+{
+ int r;
+ const struct gfx_firmware_header_v2_0 *me_hdr;
+ const __le32 *fw_ucode, *fw_data;
+ unsigned i, pipe_id, fw_ucode_size, fw_data_size;
+ uint32_t tmp;
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+
+ me_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
+
+ /* instruction */
+ fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(me_hdr->ucode_offset_bytes));
+ fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
+ /* data */
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(me_hdr->data_offset_bytes));
+ fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
+
+ /* 64kb align*/
+ r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
+ 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.me.me_fw_obj,
+ &adev->gfx.me.me_fw_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
+ gfx_v12_0_me_fini(adev);
+ return r;
+ }
+
+ r = amdgpu_bo_create_reserved(adev, fw_data_size,
+ 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.me.me_fw_data_obj,
+ &adev->gfx.me.me_fw_data_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_data_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
+ gfx_v12_0_me_fini(adev);
+ return r;
+ }
+
+ memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
+ memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
+
+ amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
+ amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
+ amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
+
+ if (amdgpu_emu_mode == 1)
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
+ lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
+ upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
+
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
+
+ /*
+ * Programming any of the CP_ME_IC_BASE registers
+ * forces invalidation of the ME L1 I$. Wait for the
+ * invalidation complete
+ */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Prime the instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
+
+ /* Waiting for instruction cache primed*/
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+ ICACHE_PRIMED))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to prime instruction cache\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+ soc24_grbm_select(adev, 0, pipe_id, 0, 0);
+
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
+ lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
+ upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
+
+ /* Invalidate the data caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
+
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
+ return -EINVAL;
+ }
+
+ gfx_v12_0_set_me_ucode_start_addr(adev);
+
+ return 0;
+}
+
+static int gfx_v12_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
+ return -EINVAL;
+
+ gfx_v12_0_cp_gfx_enable(adev, false);
+
+ r = gfx_v12_0_cp_gfx_load_pfp_microcode_rs64(adev);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
+ return r;
+ }
+
+ r = gfx_v12_0_cp_gfx_load_me_microcode_rs64(adev);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to load me fw\n", r);
+ return r;
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_cp_gfx_start(struct amdgpu_device *adev)
+{
+ /* init the CP */
+ WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
+ adev->gfx.config.max_hw_contexts - 1);
+ WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
+
+ if (!amdgpu_async_gfx_ring)
+ gfx_v12_0_cp_gfx_enable(adev, true);
+
+ return 0;
+}
+
+static void gfx_v12_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
+ CP_PIPE_ID pipe)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
+ tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
+
+ WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
+}
+
+static void gfx_v12_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ }
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
+
+ tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
+ DOORBELL_RANGE_LOWER, ring->doorbell_index);
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
+
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
+ CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
+}
+
+static int gfx_v12_0_cp_gfx_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ u32 tmp;
+ u32 rb_bufsz;
+ u64 rb_addr, rptr_addr, wptr_gpu_addr;
+
+ /* Set the write pointer delay */
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
+
+ /* set the RB to use vmid 0 */
+ WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
+
+ /* Init gfx ring 0 for pipe 0 */
+ mutex_lock(&adev->srbm_mutex);
+ gfx_v12_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
+
+ /* Set ring buffer size */
+ ring = &adev->gfx.gfx_ring[0];
+ rb_bufsz = order_base_2(ring->ring_size / 8);
+ tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
+ WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
+
+ /* Initialize the ring buffer's write pointers */
+ ring->wptr = 0;
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
+
+ /* set the wb address whether it's enabled or not */
+ rptr_addr = ring->rptr_gpu_addr;
+ WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
+ WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
+ CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
+
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
+ upper_32_bits(wptr_gpu_addr));
+
+ mdelay(1);
+ WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
+
+ rb_addr = ring->gpu_addr >> 8;
+ WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
+ WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
+
+ WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
+
+ gfx_v12_0_cp_gfx_set_doorbell(adev, ring);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* Switch to pipe 0 */
+ mutex_lock(&adev->srbm_mutex);
+ gfx_v12_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* start the ring */
+ gfx_v12_0_cp_gfx_start(adev);
+ return 0;
+}
+
+static void gfx_v12_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 data;
+
+ data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
+ enable ? 0 : 1);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
+
+ adev->gfx.kiq[0].ring.sched.ready = enable;
+
+ udelay(50);
+}
+
+static int gfx_v12_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v2_0 *mec_hdr;
+ const __le32 *fw_ucode, *fw_data;
+ u32 tmp, fw_ucode_size, fw_data_size;
+ u32 i, usec_timeout = 50000; /* Wait for 50 ms */
+ u32 *fw_ucode_ptr, *fw_data_ptr;
+ int r;
+
+ if (!adev->gfx.mec_fw)
+ return -EINVAL;
+
+ gfx_v12_0_cp_compute_enable(adev, false);
+
+ mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
+ amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
+
+ fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->ucode_offset_bytes));
+ fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
+
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->data_offset_bytes));
+ fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
+ 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.mec.mec_fw_obj,
+ &adev->gfx.mec.mec_fw_gpu_addr,
+ (void **)&fw_ucode_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
+ gfx_v12_0_mec_fini(adev);
+ return r;
+ }
+
+ r = amdgpu_bo_create_reserved(adev,
+ ALIGN(fw_data_size, 64 * 1024) *
+ adev->gfx.mec.num_pipe_per_mec,
+ 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.mec.mec_fw_data_obj,
+ &adev->gfx.mec.mec_fw_data_gpu_addr,
+ (void **)&fw_data_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
+ gfx_v12_0_mec_fini(adev);
+ return r;
+ }
+
+ memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
+ for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+ memcpy(fw_data_ptr + i * ALIGN(fw_data_size, 64 * 1024) / 4, fw_data, fw_data_size);
+ }
+
+ amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
+ amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+ soc24_grbm_select(adev, 1, i, 0, 0);
+
+ WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO,
+ lower_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
+ i * ALIGN(fw_data_size, 64 * 1024)));
+ WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
+ upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
+ i * ALIGN(fw_data_size, 64 * 1024)));
+
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
+ lower_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
+ upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
+ }
+ mutex_unlock(&adev->srbm_mutex);
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ gfx_v12_0_set_mec_ucode_start_addr(adev);
+
+ return 0;
+}
+
+static void gfx_v12_0_kiq_setting(struct amdgpu_ring *ring)
+{
+ uint32_t tmp;
+ struct amdgpu_device *adev = ring->adev;
+
+ /* tell RLC which is KIQ queue */
+ tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
+}
+
+static void gfx_v12_0_cp_set_doorbell_range(struct amdgpu_device *adev)
+{
+ /* set graphics engine doorbell range */
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
+ (adev->doorbell_index.gfx_ring0 * 2) << 2);
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
+ (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
+
+ /* set compute engine doorbell range */
+ WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
+ (adev->doorbell_index.kiq * 2) << 2);
+ WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
+ (adev->doorbell_index.userqueue_end * 2) << 2);
+}
+
+static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v12_gfx_mqd *mqd = m;
+ uint64_t hqd_gpu_addr, wb_gpu_addr;
+ uint32_t tmp;
+ uint32_t rb_bufsz;
+
+ /* set up gfx hqd wptr */
+ mqd->cp_gfx_hqd_wptr = 0;
+ mqd->cp_gfx_hqd_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
+
+ /* set up mqd control */
+ tmp = regCP_GFX_MQD_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
+ mqd->cp_gfx_mqd_control = tmp;
+
+ /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
+ tmp = regCP_GFX_HQD_VMID_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
+ mqd->cp_gfx_hqd_vmid = 0;
+
+ /* set up default queue priority level
+ * 0x0 = low priority, 0x1 = high priority */
+ tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
+ mqd->cp_gfx_hqd_queue_priority = tmp;
+
+ /* set up time quantum */
+ tmp = regCP_GFX_HQD_QUANTUM_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
+ mqd->cp_gfx_hqd_quantum = tmp;
+
+ /* set up gfx hqd base. this is similar as CP_RB_BASE */
+ hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
+ mqd->cp_gfx_hqd_base = hqd_gpu_addr;
+ mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_gfx_hqd_rptr_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* set up rb_wptr_poll addr */
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
+ rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
+ tmp = regCP_GFX_HQD_CNTL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
+#ifdef __BIG_ENDIAN
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
+#endif
+ if (prop->tmz_queue)
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1);
+ if (!prop->kernel_queue)
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1);
+ mqd->cp_gfx_hqd_cntl = tmp;
+
+ /* set up cp_doorbell_control */
+ tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT;
+ if (prop->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, prop->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ } else
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ mqd->cp_rb_doorbell_control = tmp;
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT;
+
+ /* active the queue */
+ mqd->cp_gfx_hqd_active = 1;
+
+ /* set gfx UQ items */
+ mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr);
+ mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr);
+ mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr);
+ mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr);
+ mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+ mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
+ return 0;
+}
+
+static int gfx_v12_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v12_gfx_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = ring - &adev->gfx.gfx_ring[0];
+
+ if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ mutex_lock(&adev->srbm_mutex);
+ soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ amdgpu_ring_init_mqd(ring);
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ if (adev->gfx.me.mqd_backup[mqd_idx])
+ memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ } else {
+ /* restore mqd with the backup copy */
+ if (adev->gfx.me.mqd_backup[mqd_idx])
+ memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
+ /* reset the ring */
+ ring->wptr = 0;
+ *ring->wptr_cpu_addr = 0;
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ r = gfx_v12_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_gfx_enable_kgq(adev, 0);
+ if (r)
+ return r;
+
+ return gfx_v12_0_cp_gfx_start(adev);
+}
+
+static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v12_compute_mqd *mqd = m;
+ uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+ uint32_t tmp;
+
+ mqd->header = 0xC0310800;
+ mqd->compute_pipelinestat_enable = 0x00000001;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+ mqd->compute_misc_reserved = 0x00000007;
+
+ eop_base_addr = prop->eop_gpu_addr >> 8;
+ mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
+ mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+ (order_base_2(GFX12_MEC_HPD_SIZE / 4) - 1));
+
+ mqd->cp_hqd_eop_control = tmp;
+
+ /* enable doorbell? */
+ tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
+
+ if (prop->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, prop->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ }
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* disable the queue if it's active */
+ mqd->cp_hqd_dequeue_request = 0;
+ mqd->cp_hqd_pq_rptr = 0;
+ mqd->cp_hqd_pq_wptr_lo = 0;
+ mqd->cp_hqd_pq_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
+
+ /* set MQD vmid to 0 */
+ tmp = regCP_MQD_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+ mqd->cp_mqd_control = tmp;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+ (order_base_2(prop->queue_size / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+ (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+ if (prop->kernel_queue) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ }
+ if (prop->tmz_queue)
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1);
+ mqd->cp_hqd_pq_control = tmp;
+
+ /* set the wb address whether it's enabled or not */
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ tmp = 0;
+ /* enable the doorbell if requested */
+ if (prop->use_doorbell) {
+ tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, prop->doorbell_index);
+
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ }
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT;
+
+ /* set the vmid for the queue */
+ mqd->cp_hqd_vmid = 0;
+
+ tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
+ mqd->cp_hqd_persistent_state = tmp;
+
+ /* set MIN_IB_AVAIL_SIZE */
+ tmp = regCP_HQD_IB_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
+ mqd->cp_hqd_ib_control = tmp;
+
+ /* set static priority for a compute queue/ring */
+ mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
+ mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
+
+ mqd->cp_hqd_active = prop->hqd_active;
+
+ /* set UQ fenceaddress */
+ mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+ mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
+ return 0;
+}
+
+static int gfx_v12_0_kiq_init_register(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v12_compute_mqd *mqd = ring->mqd_ptr;
+ int j;
+
+ /* inactivate the queue */
+ if (amdgpu_sriov_vf(adev))
+ WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
+
+ /* disable wptr polling */
+ WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
+
+ /* write the EOP addr */
+ WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
+ mqd->cp_hqd_eop_base_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
+ mqd->cp_hqd_eop_base_addr_hi);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
+ mqd->cp_hqd_eop_control);
+
+ /* enable doorbell? */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* disable the queue if it's active */
+ if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
+ for (j = 0; j < adev->usec_timeout; j++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
+ mqd->cp_hqd_dequeue_request);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
+ mqd->cp_hqd_pq_rptr);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+ }
+
+ /* set the pointer to the MQD */
+ WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
+ mqd->cp_mqd_base_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
+ mqd->cp_mqd_base_addr_hi);
+
+ /* set MQD vmid to 0 */
+ WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
+ mqd->cp_mqd_control);
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
+ mqd->cp_hqd_pq_base_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
+ mqd->cp_hqd_pq_base_hi);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
+ mqd->cp_hqd_pq_control);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
+ mqd->cp_hqd_pq_rptr_report_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
+ mqd->cp_hqd_pq_wptr_poll_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+ /* enable the doorbell if requested */
+ if (ring->use_doorbell) {
+ WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
+ (adev->doorbell_index.kiq * 2) << 2);
+ WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
+ (adev->doorbell_index.userqueue_end * 2) << 2);
+ }
+
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+
+ /* set the vmid for the queue */
+ WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
+
+ WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
+ mqd->cp_hqd_persistent_state);
+
+ /* activate the queue */
+ WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
+ mqd->cp_hqd_active);
+
+ if (ring->use_doorbell)
+ WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
+
+ return 0;
+}
+
+static int gfx_v12_0_kiq_init_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v12_compute_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
+
+ gfx_v12_0_kiq_setting(ring);
+
+ if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
+ /* reset MQD to a clean status */
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+
+ /* reset ring buffer */
+ ring->wptr = 0;
+ amdgpu_ring_clear_ring(ring);
+
+ mutex_lock(&adev->srbm_mutex);
+ soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ gfx_v12_0_kiq_init_register(ring);
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ } else {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ if (amdgpu_sriov_vf(adev) && adev->in_suspend)
+ amdgpu_ring_clear_ring(ring);
+ mutex_lock(&adev->srbm_mutex);
+ soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ amdgpu_ring_init_mqd(ring);
+ gfx_v12_0_kiq_init_register(ring);
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v12_compute_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = ring - &adev->gfx.compute_ring[0];
+
+ if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ mutex_lock(&adev->srbm_mutex);
+ soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ amdgpu_ring_init_mqd(ring);
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ } else {
+ /* restore MQD to a clean status */
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+ /* reset ring buffer */
+ ring->wptr = 0;
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_kiq_resume(struct amdgpu_device *adev)
+{
+ gfx_v12_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
+ adev->gfx.kiq[0].ring.sched.ready = true;
+ return 0;
+}
+
+static int gfx_v12_0_kcq_resume(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ if (!amdgpu_async_gfx_ring)
+ gfx_v12_0_cp_compute_enable(adev, true);
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ r = gfx_v12_0_kcq_init_queue(&adev->gfx.compute_ring[i], false);
+ if (r)
+ return r;
+ }
+
+ return amdgpu_gfx_enable_kcq(adev, 0);
+}
+
+static int gfx_v12_0_cp_resume(struct amdgpu_device *adev)
+{
+ int r, i;
+ struct amdgpu_ring *ring;
+
+ if (!(adev->flags & AMD_IS_APU))
+ gfx_v12_0_enable_gui_idle_interrupt(adev, false);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ /* legacy firmware loading */
+ r = gfx_v12_0_cp_gfx_load_microcode(adev);
+ if (r)
+ return r;
+
+ r = gfx_v12_0_cp_compute_load_microcode_rs64(adev);
+ if (r)
+ return r;
+ }
+
+ gfx_v12_0_cp_set_doorbell_range(adev);
+
+ if (amdgpu_async_gfx_ring) {
+ gfx_v12_0_cp_compute_enable(adev, true);
+ gfx_v12_0_cp_gfx_enable(adev, true);
+ }
+
+ if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
+ r = amdgpu_mes_kiq_hw_init(adev);
+ else
+ r = gfx_v12_0_kiq_resume(adev);
+ if (r)
+ return r;
+
+ r = gfx_v12_0_kcq_resume(adev);
+ if (r)
+ return r;
+
+ if (!amdgpu_async_gfx_ring) {
+ r = gfx_v12_0_cp_gfx_resume(adev);
+ if (r)
+ return r;
+ } else {
+ r = gfx_v12_0_cp_async_gfx_ring_resume(adev);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v12_0_cp_enable(struct amdgpu_device *adev, bool enable)
+{
+ gfx_v12_0_cp_gfx_enable(adev, enable);
+ gfx_v12_0_cp_compute_enable(adev, enable);
+}
+
+static int gfx_v12_0_gfxhub_enable(struct amdgpu_device *adev)
+{
+ int r;
+ bool value;
+
+ r = adev->gfxhub.funcs->gart_enable(adev);
+ if (r)
+ return r;
+
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS;
+
+ adev->gfxhub.funcs->set_fault_enable_default(adev, value);
+ /* TODO investigate why this and the hdp flush above is needed,
+ * are we missing a flush somewhere else? */
+ adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
+
+ return 0;
+}
+
+static int get_gb_addr_config(struct amdgpu_device *adev)
+{
+ u32 gb_addr_config;
+
+ gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
+ if (gb_addr_config == 0)
+ return -EINVAL;
+
+ adev->gfx.config.gb_addr_config_fields.num_pkrs =
+ 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
+
+ adev->gfx.config.gb_addr_config = gb_addr_config;
+
+ adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, NUM_PIPES);
+
+ adev->gfx.config.max_tile_pipes =
+ adev->gfx.config.gb_addr_config_fields.num_pipes;
+
+ adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
+ adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, NUM_RB_PER_SE);
+ adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
+ adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
+
+ return 0;
+}
+
+static void gfx_v12_0_disable_gpa_mode(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
+ data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
+ WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
+
+ data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
+ data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
+ WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
+}
+
+static void gfx_v12_0_init_golden_registers(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_12_0,
+ (const u32)ARRAY_SIZE(golden_settings_gc_12_0));
+
+ if (adev->rev_id == 0)
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_12_0_rev0,
+ (const u32)ARRAY_SIZE(golden_settings_gc_12_0_rev0));
+ break;
+ default:
+ break;
+ }
+}
+
+static int gfx_v12_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
+ /* RLC autoload sequence 1: Program rlc ram */
+ if (adev->gfx.imu.funcs->program_rlc_ram)
+ adev->gfx.imu.funcs->program_rlc_ram(adev);
+ }
+ /* rlc autoload firmware */
+ r = gfx_v12_0_rlc_backdoor_autoload_enable(adev);
+ if (r)
+ return r;
+ } else {
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
+ if (adev->gfx.imu.funcs->load_microcode)
+ adev->gfx.imu.funcs->load_microcode(adev);
+ if (adev->gfx.imu.funcs->setup_imu)
+ adev->gfx.imu.funcs->setup_imu(adev);
+ if (adev->gfx.imu.funcs->start_imu)
+ adev->gfx.imu.funcs->start_imu(adev);
+ }
+
+ /* disable gpa mode in backdoor loading */
+ gfx_v12_0_disable_gpa_mode(adev);
+ }
+ }
+
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
+ (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
+ r = gfx_v12_0_wait_for_rlc_autoload_complete(adev);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
+ return r;
+ }
+ }
+
+ if (!amdgpu_emu_mode)
+ gfx_v12_0_init_golden_registers(adev);
+
+ adev->gfx.is_poweron = true;
+
+ if (get_gb_addr_config(adev))
+ DRM_WARN("Invalid gb_addr_config !\n");
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
+ gfx_v12_0_config_gfx_rs64(adev);
+
+ r = gfx_v12_0_gfxhub_enable(adev);
+ if (r)
+ return r;
+
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT ||
+ adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) &&
+ (amdgpu_dpm == 1)) {
+ /**
+ * For gfx 12, rlc firmware loading relies on smu firmware is
+ * loaded firstly, so in direct type, it has to load smc ucode
+ * here before rlc.
+ */
+ r = amdgpu_pm_load_smu_firmware(adev, NULL);
+ if (r)
+ return r;
+ }
+
+ gfx_v12_0_constants_init(adev);
+
+ if (adev->nbio.funcs->gc_doorbell_init)
+ adev->nbio.funcs->gc_doorbell_init(adev);
+
+ r = gfx_v12_0_rlc_resume(adev);
+ if (r)
+ return r;
+
+ /*
+ * init golden registers and rlc resume may override some registers,
+ * reconfig them here
+ */
+ gfx_v12_0_tcp_harvest(adev);
+
+ r = gfx_v12_0_cp_resume(adev);
+ if (r)
+ return r;
+
+ return r;
+}
+
+static int gfx_v12_0_set_userq_eop_interrupts(struct amdgpu_device *adev,
+ bool enable)
+{
+ unsigned int irq_type;
+ int m, p, r;
+
+ if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) {
+ for (m = 0; m < adev->gfx.me.num_me; m++) {
+ for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) {
+ irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+ }
+
+ if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) {
+ for (m = 0; m < adev->gfx.mec.num_mec; ++m) {
+ for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) {
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + (m * adev->gfx.mec.num_pipe_per_mec)
+ + p;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t tmp;
+
+ cancel_delayed_work_sync(&adev->gfx.idle_work);
+
+ amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
+ gfx_v12_0_set_userq_eop_interrupts(adev, false);
+
+ if (!adev->no_hw_access) {
+ if (amdgpu_async_gfx_ring) {
+ if (amdgpu_gfx_disable_kgq(adev, 0))
+ DRM_ERROR("KGQ disable failed\n");
+ }
+
+ if (amdgpu_gfx_disable_kcq(adev, 0))
+ DRM_ERROR("KCQ disable failed\n");
+
+ amdgpu_mes_kiq_hw_fini(adev);
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ gfx_v12_0_cp_gfx_enable(adev, false);
+ /* Program KIQ position of RLC_CP_SCHEDULERS during destroy */
+ tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
+
+ return 0;
+ }
+ gfx_v12_0_cp_enable(adev, false);
+ gfx_v12_0_enable_gui_idle_interrupt(adev, false);
+
+ adev->gfxhub.funcs->gart_disable(adev);
+
+ adev->gfx.is_poweron = false;
+
+ return 0;
+}
+
+static int gfx_v12_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return gfx_v12_0_hw_fini(ip_block);
+}
+
+static int gfx_v12_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ return gfx_v12_0_hw_init(ip_block);
+}
+
+static bool gfx_v12_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
+ GRBM_STATUS, GUI_ACTIVE))
+ return false;
+ else
+ return true;
+}
+
+static int gfx_v12_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ unsigned i;
+ u32 tmp;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* read MC_STATUS */
+ tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
+ GRBM_STATUS__GUI_ACTIVE_MASK;
+
+ if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static uint64_t gfx_v12_0_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+ uint64_t clock = 0;
+
+ if (adev->smuio.funcs &&
+ adev->smuio.funcs->get_gpu_clock_counter)
+ clock = adev->smuio.funcs->get_gpu_clock_counter(adev);
+ else
+ dev_warn(adev->dev, "query gpu clock counter is not supported\n");
+
+ return clock;
+}
+
+static int gfx_v12_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_user_queue) {
+ case -1:
+ case 0:
+ default:
+ adev->gfx.disable_kq = false;
+ adev->gfx.disable_uq = true;
+ break;
+ case 1:
+ adev->gfx.disable_kq = false;
+ adev->gfx.disable_uq = false;
+ break;
+ case 2:
+ adev->gfx.disable_kq = true;
+ adev->gfx.disable_uq = false;
+ break;
+ }
+
+ adev->gfx.funcs = &gfx_v12_0_gfx_funcs;
+
+ if (adev->gfx.disable_kq) {
+ adev->gfx.num_gfx_rings = 0;
+ adev->gfx.num_compute_rings = 0;
+ } else {
+ adev->gfx.num_gfx_rings = GFX12_NUM_GFX_RINGS;
+ adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+ AMDGPU_MAX_COMPUTE_RINGS);
+ }
+
+ gfx_v12_0_set_kiq_pm4_funcs(adev);
+ gfx_v12_0_set_ring_funcs(adev);
+ gfx_v12_0_set_irq_funcs(adev);
+ gfx_v12_0_set_rlc_funcs(adev);
+ gfx_v12_0_set_mqd_funcs(adev);
+ gfx_v12_0_set_imu_funcs(adev);
+
+ gfx_v12_0_init_rlcg_reg_access_ctrl(adev);
+
+ return gfx_v12_0_init_microcode(adev);
+}
+
+static int gfx_v12_0_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
+ if (r)
+ return r;
+
+ r = gfx_v12_0_set_userq_eop_interrupts(adev, true);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static bool gfx_v12_0_is_rlc_enabled(struct amdgpu_device *adev)
+{
+ uint32_t rlc_cntl;
+
+ /* if RLC is not enabled, do nothing */
+ rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
+ return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
+}
+
+static void gfx_v12_0_set_safe_mode(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint32_t data;
+ unsigned i;
+
+ data = RLC_SAFE_MODE__CMD_MASK;
+ data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
+
+ WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
+
+ /* wait for RLC_SAFE_MODE */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
+ RLC_SAFE_MODE, CMD))
+ break;
+ udelay(1);
+ }
+}
+
+static void gfx_v12_0_unset_safe_mode(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
+}
+
+static void gfx_v12_0_update_perf_clk(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
+ return;
+
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static void gfx_v12_0_update_spm_vmid(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ unsigned vmid)
+{
+ u32 reg, data;
+
+ reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
+ if (amdgpu_sriov_is_pp_one_vf(adev))
+ data = RREG32_NO_KIQ(reg);
+ else
+ data = RREG32(reg);
+
+ data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
+ data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
+
+ if (amdgpu_sriov_is_pp_one_vf(adev))
+ WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
+ else
+ WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
+
+ if (ring
+ && amdgpu_sriov_is_pp_one_vf(adev)
+ && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX)
+ || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) {
+ uint32_t reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
+ amdgpu_ring_emit_wreg(ring, reg, data);
+ }
+}
+
+static const struct amdgpu_rlc_funcs gfx_v12_0_rlc_funcs = {
+ .is_rlc_enabled = gfx_v12_0_is_rlc_enabled,
+ .set_safe_mode = gfx_v12_0_set_safe_mode,
+ .unset_safe_mode = gfx_v12_0_unset_safe_mode,
+ .init = gfx_v12_0_rlc_init,
+ .get_csb_size = gfx_v12_0_get_csb_size,
+ .get_csb_buffer = gfx_v12_0_get_csb_buffer,
+ .resume = gfx_v12_0_rlc_resume,
+ .stop = gfx_v12_0_rlc_stop,
+ .reset = gfx_v12_0_rlc_reset,
+ .start = gfx_v12_0_rlc_start,
+ .update_spm_vmid = gfx_v12_0_update_spm_vmid,
+};
+
+#if 0
+static void gfx_v12_cntl_power_gating(struct amdgpu_device *adev, bool enable)
+{
+ /* TODO */
+}
+
+static void gfx_v12_cntl_pg(struct amdgpu_device *adev, bool enable)
+{
+ /* TODO */
+}
+#endif
+
+static int gfx_v12_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = (state == AMD_PG_STATE_GATE);
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ amdgpu_gfx_off_ctrl(adev, enable);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void gfx_v12_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags &
+ (AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS)))
+ return;
+
+ if (enable) {
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ /* unset CGCG override */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
+ adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
+
+ /* update CGCG override bits */
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* enable cgcg FSM(0x0000363F) */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
+ data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
+ data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+ }
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
+ data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
+ data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+ }
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
+
+ /* Program RLC_CGCG_CGLS_CTRL_3D */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
+ data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+ }
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
+ data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+ RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+ }
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
+
+ /* set IDLE_POLL_COUNT(0x00900100) */
+ def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
+
+ data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
+ data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
+ (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
+
+ data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
+
+ data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
+ data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
+
+ /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
+ if (adev->sdma.num_instances > 1) {
+ data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
+ data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
+ }
+ } else {
+ /* Program RLC_CGCG_CGLS_CTRL */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+ data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
+
+ /* Program RLC_CGCG_CGLS_CTRL_3D */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
+ }
+}
+
+static void gfx_v12_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def;
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
+ return;
+
+ /* It is disabled by HW by default */
+ if (enable) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
+ /* 1 - RLC_CGTT_MGCG_OVERRIDE */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+ }
+ } else {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+ }
+ }
+}
+
+static void gfx_v12_0_update_repeater_fgcg(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
+ return;
+
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~(RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__RLC_REPEATER_FGCG_OVERRIDE_MASK);
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__RLC_REPEATER_FGCG_OVERRIDE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static void gfx_v12_0_update_sram_fgcg(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
+ return;
+
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static int gfx_v12_0_update_gfx_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ gfx_v12_0_update_coarse_grain_clock_gating(adev, enable);
+
+ gfx_v12_0_update_medium_grain_clock_gating(adev, enable);
+
+ gfx_v12_0_update_repeater_fgcg(adev, enable);
+
+ gfx_v12_0_update_sram_fgcg(adev, enable);
+
+ gfx_v12_0_update_perf_clk(adev, enable);
+
+ if (adev->cg_flags &
+ (AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS))
+ gfx_v12_0_enable_gui_idle_interrupt(adev, enable);
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+ return 0;
+}
+
+static int gfx_v12_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ gfx_v12_0_update_gfx_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void gfx_v12_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int data;
+
+ /* AMD_CG_SUPPORT_GFX_MGCG */
+ data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_MGCG;
+
+ /* AMD_CG_SUPPORT_REPEATER_FGCG */
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
+
+ /* AMD_CG_SUPPORT_GFX_FGCG */
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_FGCG;
+
+ /* AMD_CG_SUPPORT_GFX_PERF_CLK */
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
+
+ /* AMD_CG_SUPPORT_GFX_CGCG */
+ data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
+ if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGCG;
+
+ /* AMD_CG_SUPPORT_GFX_CGLS */
+ if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGLS;
+
+ /* AMD_CG_SUPPORT_GFX_3D_CGCG */
+ data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
+ if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
+
+ /* AMD_CG_SUPPORT_GFX_3D_CGLS */
+ if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
+}
+
+static u64 gfx_v12_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
+{
+ /* gfx12 is 32bit rptr*/
+ return *(uint32_t *)ring->rptr_cpu_addr;
+}
+
+static u64 gfx_v12_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u64 wptr;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ } else {
+ wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
+ wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
+ }
+
+ return wptr;
+}
+
+static void gfx_v12_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
+ lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
+ upper_32_bits(ring->wptr));
+ }
+}
+
+static u64 gfx_v12_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
+{
+ /* gfx12 hardware is 32bit rptr */
+ return *(uint32_t *)ring->rptr_cpu_addr;
+}
+
+static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
+{
+ u64 wptr;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell)
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ else
+ BUG();
+ return wptr;
+}
+
+static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ BUG(); /* only DOORBELL method supported on gfx12 now */
+ }
+}
+
+static void gfx_v12_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 ref_and_mask, reg_mem_engine;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+ switch (ring->me) {
+ case 1:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
+ break;
+ case 2:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
+ break;
+ default:
+ return;
+ }
+ reg_mem_engine = 0;
+ } else {
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
+ reg_mem_engine = 1; /* pfp */
+ }
+
+ gfx_v12_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
+ adev->nbio.funcs->get_hdp_flush_req_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_done_offset(adev),
+ ref_and_mask, ref_and_mask, 0x20);
+}
+
+static void gfx_v12_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 header, control = 0;
+
+ header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
+
+ control |= ib->length_dw | (vmid << 24);
+
+ amdgpu_ring_write(ring, header);
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v12_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v12_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+
+ /* RELEASE_MEM - flush caches, send int */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
+ amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
+ PACKET3_RELEASE_MEM_GCR_GL2_WB |
+ PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
+ PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
+ amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
+ PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
+
+ /*
+ * the address should be Qword aligned if 64bit write, Dword
+ * aligned if only send 32bit data low (discard data high)
+ */
+ if (write64bit)
+ BUG_ON(addr & 0x7);
+ else
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ amdgpu_ring_write(ring, 0);
+}
+
+static void gfx_v12_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ gfx_v12_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
+ upper_32_bits(addr), seq, 0xffffffff, 4);
+}
+
+static void gfx_v12_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint8_t dst_sel)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
+static void gfx_v12_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* compute doesn't have PFP */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
+ /* sync PFP to ME, otherwise we might get invalid PFP reads */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+ amdgpu_ring_write(ring, 0x0);
+ }
+}
+
+static void gfx_v12_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned int flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* we only allocate 32bit for each seq wb address */
+ BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ /* write fence seq to the "addr" */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ if (flags & AMDGPU_FENCE_FLAG_INT) {
+ /* set register to trigger INT */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
+ amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
+ }
+}
+
+static void gfx_v12_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
+ uint32_t flags)
+{
+ uint32_t dw2 = 0;
+
+ dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+ if (flags & AMDGPU_HAVE_CTX_SWITCH) {
+ /* set load_global_config & load_global_uconfig */
+ dw2 |= 0x8001;
+ /* set load_cs_sh_regs */
+ dw2 |= 0x01000000;
+ /* set load_per_context_state & load_gfx_sh_regs for GFX */
+ dw2 |= 0x10002;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, dw2);
+ amdgpu_ring_write(ring, 0);
+}
+
+static unsigned gfx_v12_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
+{
+ unsigned ret;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, 0);
+ ret = ring->wptr & ring->buf_mask;
+ /* patch dummy value later */
+ amdgpu_ring_write(ring, 0);
+
+ return ret;
+}
+
+static int gfx_v12_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+ int i, r = 0;
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ unsigned long flags;
+
+ if (adev->enable_mes)
+ return -EINVAL;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ /* assert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+ /* assert IB preemption, emit the trailing fence */
+ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
+ ring->trail_fence_gpu_addr,
+ ++ring->trail_seq);
+ amdgpu_ring_commit(kiq_ring);
+
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ /* poll the trailing fence */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->trail_seq ==
+ le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ r = -EINVAL;
+ DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
+ }
+
+ /* deassert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, true);
+ return r;
+}
+
+static void gfx_v12_0_ring_emit_frame_cntl(struct amdgpu_ring *ring,
+ bool start,
+ bool secure)
+{
+ uint32_t v = secure ? FRAME_TMZ : 0;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
+ amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
+}
+
+static void gfx_v12_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t reg_val_offs)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
+ amdgpu_ring_write(ring, 0 | /* src: register*/
+ (5 << 8) | /* dst: memory */
+ (1 << 20)); /* write confirm */
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+ amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+}
+
+static void gfx_v12_0_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg,
+ uint32_t val)
+{
+ uint32_t cmd = 0;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
+ break;
+ case AMDGPU_RING_TYPE_KIQ:
+ cmd = (1 << 16); /* no inc addr */
+ break;
+ default:
+ cmd = WR_CONFIRM;
+ break;
+ }
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, cmd);
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v12_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ gfx_v12_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
+}
+
+static void gfx_v12_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+
+ gfx_v12_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+ ref, mask, 0x20);
+}
+
+static void
+gfx_v12_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
+ uint32_t me, uint32_t pipe,
+ enum amdgpu_interrupt_state state)
+{
+ uint32_t cp_int_cntl, cp_int_cntl_reg;
+
+ if (!me) {
+ switch (pipe) {
+ case 0:
+ cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+ } else {
+ DRM_DEBUG("invalid me %d\n", me);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ TIME_STAMP_INT_ENABLE, 0);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ GENERIC0_INT_ENABLE, 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ TIME_STAMP_INT_ENABLE, 1);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ GENERIC0_INT_ENABLE, 1);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ break;
+ default:
+ break;
+ }
+}
+
+static void gfx_v12_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
+ int me, int pipe,
+ enum amdgpu_interrupt_state state)
+{
+ u32 mec_int_cntl, mec_int_cntl_reg;
+
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+
+ if (me == 1) {
+ switch (pipe) {
+ case 0:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
+ break;
+ case 1:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+ } else {
+ DRM_DEBUG("invalid me %d\n", me);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 0);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ GENERIC0_INT_ENABLE, 0);
+ WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 1);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ GENERIC0_INT_ENABLE, 1);
+ WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
+ break;
+ default:
+ break;
+ }
+}
+
+static int gfx_v12_0_set_eop_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (type) {
+ case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
+ gfx_v12_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
+ break;
+ case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
+ gfx_v12_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
+ gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
+ gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
+ gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
+ gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int gfx_v12_0_eop_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u32 doorbell_offset = entry->src_data[0];
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+ int i;
+
+ DRM_DEBUG("IH: CP EOP\n");
+
+ if (adev->enable_mes && doorbell_offset) {
+ struct amdgpu_userq_fence_driver *fence_drv = NULL;
+ struct xarray *xa = &adev->userq_xa;
+ unsigned long flags;
+
+ xa_lock_irqsave(xa, flags);
+ fence_drv = xa_load(xa, doorbell_offset);
+ if (fence_drv)
+ amdgpu_userq_fence_driver_process(fence_drv);
+ xa_unlock_irqrestore(xa, flags);
+ } else {
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ switch (me_id) {
+ case 0:
+ if (pipe_id == 0)
+ amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+ else
+ amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ /* Per-queue interrupt is supported for MEC starting from VI.
+ * The interrupt can only be enabled/disabled per pipe instead
+ * of per queue.
+ */
+ if ((ring->me == me_id) &&
+ (ring->pipe == pipe_id) &&
+ (ring->queue == queue_id))
+ amdgpu_fence_process(ring);
+ }
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v12_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_set_bad_op_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v12_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int gfx_v12_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_INSTR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void gfx_v12_0_handle_priv_fault(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+ int i;
+
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ if (!adev->gfx.disable_kq) {
+ switch (me_id) {
+ case 0:
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ default:
+ BUG();
+ break;
+ }
+ }
+}
+
+static int gfx_v12_0_priv_reg_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal register access in command stream\n");
+ gfx_v12_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v12_0_bad_op_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal opcode in command stream \n");
+ gfx_v12_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v12_0_priv_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal instruction in command stream\n");
+ gfx_v12_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
+static void gfx_v12_0_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ const unsigned int gcr_cntl =
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
+
+ /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
+ amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
+ amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
+ amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
+ amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
+ amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
+}
+
+static void gfx_v12_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
+{
+ /* Header itself is a NOP packet */
+ if (num_nop == 1) {
+ amdgpu_ring_write(ring, ring->funcs->nop);
+ return;
+ }
+
+ /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
+ amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
+
+ /* Header is at index 0, followed by num_nops - 1 NOP packet's */
+ amdgpu_ring_insert_nop(ring, num_nop - 1);
+}
+
+static void gfx_v12_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
+{
+ /* Emit the cleaner shader */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+ amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
+}
+
+static void gfx_v12_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ for (i = 0; i < reg_count; i++)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_reg_list_12_0[i].reg_name,
+ adev->gfx.ip_dump_core[i]);
+
+ /* print compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_12);
+ drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
+ adev->gfx.mec.num_mec,
+ adev->gfx.mec.num_pipe_per_mec,
+ adev->gfx.mec.num_queue_per_pipe);
+
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_cp_reg_list_12[reg].reg_name,
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ }
+ index += reg_count;
+ }
+ }
+ }
+
+ /* print gfx queue registers for all instances */
+ if (!adev->gfx.ip_dump_gfx_queues)
+ return;
+
+ index = 0;
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12);
+ drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
+ adev->gfx.me.num_me,
+ adev->gfx.me.num_pipe_per_me,
+ adev->gfx.me.num_queue_per_pipe);
+
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+ drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_gfx_queue_reg_list_12[reg].reg_name,
+ adev->gfx.ip_dump_gfx_queues[index + reg]);
+ }
+ index += reg_count;
+ }
+ }
+ }
+}
+
+static void gfx_v12_ip_dump(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < reg_count; i++)
+ adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_12_0[i]));
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ /* dump compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_12);
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ /* ME0 is for GFX so start from 1 for CP */
+ soc24_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
+ for (reg = 0; reg < reg_count; reg++) {
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_cp_reg_list_12[reg]));
+ }
+ index += reg_count;
+ }
+ }
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ /* dump gfx queue registers for all instances */
+ if (!adev->gfx.ip_dump_gfx_queues)
+ return;
+
+ index = 0;
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12);
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+ soc24_grbm_select(adev, i, j, k, 0);
+
+ for (reg = 0; reg < reg_count; reg++) {
+ adev->gfx.ip_dump_gfx_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_gfx_queue_reg_list_12[reg]));
+ }
+ index += reg_count;
+ }
+ }
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static bool gfx_v12_pipe_reset_support(struct amdgpu_device *adev)
+{
+ /* Disable the pipe reset until the CPFW fully support it.*/
+ dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n");
+ return false;
+}
+
+static int gfx_v12_reset_gfx_pipe(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reset_pipe = 0, clean_pipe = 0;
+ int r;
+
+ if (!gfx_v12_pipe_reset_support(adev))
+ return -EOPNOTSUPP;
+
+ gfx_v12_0_set_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
+ soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 1);
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ ME_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 0);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ ME_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 1);
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ ME_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 0);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ ME_PIPE1_RESET, 0);
+ break;
+ default:
+ break;
+ }
+
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe);
+
+ r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) -
+ RS64_FW_UC_START_ADDR_LO;
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ gfx_v12_0_unset_safe_mode(adev, 0);
+
+ dev_info(adev->dev, "The ring %s pipe reset: %s\n", ring->name,
+ r == 0 ? "successfully" : "failed");
+ /* Sometimes the ME start pc counter can't cache correctly, so the
+ * PC check only as a reference and pipe reset result rely on the
+ * later ring test.
+ */
+ return 0;
+}
+
+static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
+ if (r) {
+ dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r);
+ r = gfx_v12_reset_gfx_pipe(ring);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v12_0_kgq_init_queue(ring, true);
+ if (r) {
+ dev_err(adev->dev, "failed to init kgq\n");
+ return r;
+ }
+
+ r = amdgpu_mes_map_legacy_queue(adev, ring);
+ if (r) {
+ dev_err(adev->dev, "failed to remap kgq\n");
+ return r;
+ }
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static int gfx_v12_0_reset_compute_pipe(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reset_pipe = 0, clean_pipe = 0;
+ int r = 0;
+
+ if (!gfx_v12_pipe_reset_support(adev))
+ return -EOPNOTSUPP;
+
+ gfx_v12_0_set_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
+ soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+ reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+ clean_pipe = reset_pipe;
+
+ if (adev->gfx.rs64_enable) {
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE1_RESET, 0);
+ break;
+ case 2:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE2_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE2_RESET, 0);
+ break;
+ case 3:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE3_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE3_RESET, 0);
+ break;
+ default:
+ break;
+ }
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
+ r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) -
+ RS64_FW_UC_START_ADDR_LO;
+ } else {
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE1_RESET, 0);
+ break;
+ default:
+ break;
+ }
+ WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
+ /* Doesn't find the F32 MEC instruction pointer register, and suppose
+ * the driver won't run into the F32 mode.
+ */
+ }
+
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ gfx_v12_0_unset_safe_mode(adev, 0);
+
+ dev_info(adev->dev, "The ring %s pipe resets: %s\n", ring->name,
+ r == 0 ? "successfully" : "failed");
+ /* Need the ring test to verify the pipe reset result.*/
+ return 0;
+}
+
+static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
+ if (r) {
+ dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r);
+ r = gfx_v12_0_reset_compute_pipe(ring);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v12_0_kcq_init_queue(ring, true);
+ if (r) {
+ dev_err(adev->dev, "failed to init kcq\n");
+ return r;
+ }
+ r = amdgpu_mes_map_legacy_queue(adev, ring);
+ if (r) {
+ dev_err(adev->dev, "failed to remap kcq\n");
+ return r;
+ }
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static void gfx_v12_0_ring_begin_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_begin_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
+}
+
+static void gfx_v12_0_ring_end_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_end_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_end_use(ring);
+}
+
+static const struct amd_ip_funcs gfx_v12_0_ip_funcs = {
+ .name = "gfx_v12_0",
+ .early_init = gfx_v12_0_early_init,
+ .late_init = gfx_v12_0_late_init,
+ .sw_init = gfx_v12_0_sw_init,
+ .sw_fini = gfx_v12_0_sw_fini,
+ .hw_init = gfx_v12_0_hw_init,
+ .hw_fini = gfx_v12_0_hw_fini,
+ .suspend = gfx_v12_0_suspend,
+ .resume = gfx_v12_0_resume,
+ .is_idle = gfx_v12_0_is_idle,
+ .wait_for_idle = gfx_v12_0_wait_for_idle,
+ .set_clockgating_state = gfx_v12_0_set_clockgating_state,
+ .set_powergating_state = gfx_v12_0_set_powergating_state,
+ .get_clockgating_state = gfx_v12_0_get_clockgating_state,
+ .dump_ip_state = gfx_v12_ip_dump,
+ .print_ip_state = gfx_v12_ip_print,
+};
+
+static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_gfx = {
+ .type = AMDGPU_RING_TYPE_GFX,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .secure_submission_supported = true,
+ .get_rptr = gfx_v12_0_ring_get_rptr_gfx,
+ .get_wptr = gfx_v12_0_ring_get_wptr_gfx,
+ .set_wptr = gfx_v12_0_ring_set_wptr_gfx,
+ .emit_frame_size = /* totally 242 maximum if 16 IBs */
+ 5 + /* COND_EXEC */
+ 7 + /* PIPELINE_SYNC */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* VM_FLUSH */
+ 8 + /* FENCE for VM_FLUSH */
+ 5 + /* COND_EXEC */
+ 7 + /* HDP_flush */
+ 4 + /* VGT_flush */
+ 31 + /* DE_META */
+ 3 + /* CNTX_CTRL */
+ 5 + /* HDP_INVL */
+ 8 + 8 + /* FENCE x2 */
+ 8 + /* gfx_v12_0_emit_mem_sync */
+ 2, /* gfx_v12_0_ring_emit_cleaner_shader */
+ .emit_ib_size = 4, /* gfx_v12_0_ring_emit_ib_gfx */
+ .emit_ib = gfx_v12_0_ring_emit_ib_gfx,
+ .emit_fence = gfx_v12_0_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v12_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v12_0_ring_emit_vm_flush,
+ .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush,
+ .test_ring = gfx_v12_0_ring_test_ring,
+ .test_ib = gfx_v12_0_ring_test_ib,
+ .insert_nop = gfx_v12_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_cntxcntl = gfx_v12_0_ring_emit_cntxcntl,
+ .init_cond_exec = gfx_v12_0_ring_emit_init_cond_exec,
+ .preempt_ib = gfx_v12_0_ring_preempt_ib,
+ .emit_frame_cntl = gfx_v12_0_ring_emit_frame_cntl,
+ .emit_wreg = gfx_v12_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait,
+ .emit_mem_sync = gfx_v12_0_emit_mem_sync,
+ .reset = gfx_v12_0_reset_kgq,
+ .emit_cleaner_shader = gfx_v12_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v12_0_ring_begin_use,
+ .end_use = gfx_v12_0_ring_end_use,
+};
+
+static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = {
+ .type = AMDGPU_RING_TYPE_COMPUTE,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v12_0_ring_get_rptr_compute,
+ .get_wptr = gfx_v12_0_ring_get_wptr_compute,
+ .set_wptr = gfx_v12_0_ring_set_wptr_compute,
+ .emit_frame_size =
+ 7 + /* gfx_v12_0_ring_emit_hdp_flush */
+ 5 + /* hdp invalidate */
+ 7 + /* gfx_v12_0_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v12_0_ring_emit_vm_flush */
+ 8 + 8 + 8 + /* gfx_v12_0_ring_emit_fence x3 for user fence, vm fence */
+ 8 + /* gfx_v12_0_emit_mem_sync */
+ 2, /* gfx_v12_0_ring_emit_cleaner_shader */
+ .emit_ib_size = 7, /* gfx_v12_0_ring_emit_ib_compute */
+ .emit_ib = gfx_v12_0_ring_emit_ib_compute,
+ .emit_fence = gfx_v12_0_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v12_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v12_0_ring_emit_vm_flush,
+ .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush,
+ .test_ring = gfx_v12_0_ring_test_ring,
+ .test_ib = gfx_v12_0_ring_test_ib,
+ .insert_nop = gfx_v12_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_wreg = gfx_v12_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait,
+ .emit_mem_sync = gfx_v12_0_emit_mem_sync,
+ .reset = gfx_v12_0_reset_kcq,
+ .emit_cleaner_shader = gfx_v12_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v12_0_ring_begin_use,
+ .end_use = gfx_v12_0_ring_end_use,
+};
+
+static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_kiq = {
+ .type = AMDGPU_RING_TYPE_KIQ,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v12_0_ring_get_rptr_compute,
+ .get_wptr = gfx_v12_0_ring_get_wptr_compute,
+ .set_wptr = gfx_v12_0_ring_set_wptr_compute,
+ .emit_frame_size =
+ 7 + /* gfx_v12_0_ring_emit_hdp_flush */
+ 5 + /*hdp invalidate */
+ 7 + /* gfx_v12_0_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v12_0_ring_emit_vm_flush */
+ 8 + 8 + 8, /* gfx_v12_0_ring_emit_fence_kiq x3 for user fence, vm fence */
+ .emit_ib_size = 7, /* gfx_v12_0_ring_emit_ib_compute */
+ .emit_ib = gfx_v12_0_ring_emit_ib_compute,
+ .emit_fence = gfx_v12_0_ring_emit_fence_kiq,
+ .test_ring = gfx_v12_0_ring_test_ring,
+ .test_ib = gfx_v12_0_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_rreg = gfx_v12_0_ring_emit_rreg,
+ .emit_wreg = gfx_v12_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait,
+ .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush,
+};
+
+static void gfx_v12_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ adev->gfx.kiq[0].ring.funcs = &gfx_v12_0_ring_funcs_kiq;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ adev->gfx.gfx_ring[i].funcs = &gfx_v12_0_ring_funcs_gfx;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ adev->gfx.compute_ring[i].funcs = &gfx_v12_0_ring_funcs_compute;
+}
+
+static const struct amdgpu_irq_src_funcs gfx_v12_0_eop_irq_funcs = {
+ .set = gfx_v12_0_set_eop_interrupt_state,
+ .process = gfx_v12_0_eop_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v12_0_priv_reg_irq_funcs = {
+ .set = gfx_v12_0_set_priv_reg_fault_state,
+ .process = gfx_v12_0_priv_reg_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v12_0_bad_op_irq_funcs = {
+ .set = gfx_v12_0_set_bad_op_fault_state,
+ .process = gfx_v12_0_bad_op_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v12_0_priv_inst_irq_funcs = {
+ .set = gfx_v12_0_set_priv_inst_fault_state,
+ .process = gfx_v12_0_priv_inst_irq,
+};
+
+static void gfx_v12_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
+ adev->gfx.eop_irq.funcs = &gfx_v12_0_eop_irq_funcs;
+
+ adev->gfx.priv_reg_irq.num_types = 1;
+ adev->gfx.priv_reg_irq.funcs = &gfx_v12_0_priv_reg_irq_funcs;
+
+ adev->gfx.bad_op_irq.num_types = 1;
+ adev->gfx.bad_op_irq.funcs = &gfx_v12_0_bad_op_irq_funcs;
+
+ adev->gfx.priv_inst_irq.num_types = 1;
+ adev->gfx.priv_inst_irq.funcs = &gfx_v12_0_priv_inst_irq_funcs;
+}
+
+static void gfx_v12_0_set_imu_funcs(struct amdgpu_device *adev)
+{
+ if (adev->flags & AMD_IS_APU)
+ adev->gfx.imu.mode = MISSION_MODE;
+ else
+ adev->gfx.imu.mode = DEBUG_MODE;
+
+ adev->gfx.imu.funcs = &gfx_v12_0_imu_funcs;
+}
+
+static void gfx_v12_0_set_rlc_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.rlc.funcs = &gfx_v12_0_rlc_funcs;
+}
+
+static void gfx_v12_0_set_mqd_funcs(struct amdgpu_device *adev)
+{
+ /* set gfx eng mqd */
+ adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
+ sizeof(struct v12_gfx_mqd);
+ adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
+ gfx_v12_0_gfx_mqd_init;
+ /* set compute eng mqd */
+ adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
+ sizeof(struct v12_compute_mqd);
+ adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
+ gfx_v12_0_compute_mqd_init;
+}
+
+static void gfx_v12_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
+ u32 bitmap)
+{
+ u32 data;
+
+ if (!bitmap)
+ return;
+
+ data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+ data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+
+ WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
+static u32 gfx_v12_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
+{
+ u32 data, wgp_bitmask;
+ data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
+ data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
+
+ data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+ data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+
+ wgp_bitmask =
+ amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
+
+ return (~data) & wgp_bitmask;
+}
+
+static u32 gfx_v12_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
+{
+ u32 wgp_idx, wgp_active_bitmap;
+ u32 cu_bitmap_per_wgp, cu_active_bitmap;
+
+ wgp_active_bitmap = gfx_v12_0_get_wgp_active_bitmap_per_sh(adev);
+ cu_active_bitmap = 0;
+
+ for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
+ /* if there is one WGP enabled, it means 2 CUs will be enabled */
+ cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
+ if (wgp_active_bitmap & (1 << wgp_idx))
+ cu_active_bitmap |= cu_bitmap_per_wgp;
+ }
+
+ return cu_active_bitmap;
+}
+
+static int gfx_v12_0_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info)
+{
+ int i, j, k, counter, active_cu_number = 0;
+ u32 mask, bitmap;
+ unsigned disable_masks[8 * 2];
+
+ if (!adev || !cu_info)
+ return -EINVAL;
+
+ amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ bitmap = i * adev->gfx.config.max_sh_per_se + j;
+ if (!((gfx_v12_0_get_sa_active_bitmap(adev) >> bitmap) & 1))
+ continue;
+ mask = 1;
+ counter = 0;
+ gfx_v12_0_select_se_sh(adev, i, j, 0xffffffff, 0);
+ if (i < 8 && j < 2)
+ gfx_v12_0_set_user_wgp_inactive_bitmap_per_sh(
+ adev, disable_masks[i * 2 + j]);
+ bitmap = gfx_v12_0_get_cu_active_bitmap_per_sh(adev);
+
+ /**
+ * GFX12 could support more than 4 SEs, while the bitmap
+ * in cu_info struct is 4x4 and ioctl interface struct
+ * drm_amdgpu_info_device should keep stable.
+ * So we use last two columns of bitmap to store cu mask for
+ * SEs 4 to 7, the layout of the bitmap is as below:
+ * SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
+ * SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
+ * SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
+ * SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
+ * SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
+ * SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
+ * SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
+ * SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
+ */
+ cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;
+
+ for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
+ if (bitmap & mask)
+ counter++;
+
+ mask <<= 1;
+ }
+ active_cu_number += counter;
+ }
+ }
+ gfx_v12_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ cu_info->number = active_cu_number;
+ cu_info->simd_per_cu = NUM_SIMD_PER_CU;
+
+ return 0;
+}
+
+const struct amdgpu_ip_block_version gfx_v12_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_GFX,
+ .major = 12,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &gfx_v12_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h
new file mode 100644
index 000000000000..f7184b2dc4e8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2023 dvanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V12_0_H__
+#define __GFX_V12_0_H__
+
+extern const struct amdgpu_ip_block_version gfx_v12_0_ip_block;
+
+int gfx_v12_0_request_gfx_index_mutex(struct amdgpu_device *adev,
+ bool req);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 6a8dadea40f9..80565392313f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -28,19 +28,33 @@
#include "amdgpu_gfx.h"
#include "amdgpu_ucode.h"
#include "clearstate_si.h"
+#include "si.h"
+#include "sid.h"
+
#include "bif/bif_3_0_d.h"
#include "bif/bif_3_0_sh_mask.h"
+
#include "oss/oss_1_0_d.h"
#include "oss/oss_1_0_sh_mask.h"
+
#include "gca/gfx_6_0_d.h"
#include "gca/gfx_6_0_sh_mask.h"
+#include "gca/gfx_7_2_enum.h"
+
#include "gmc/gmc_6_0_d.h"
#include "gmc/gmc_6_0_sh_mask.h"
+
#include "dce/dce_6_0_d.h"
#include "dce/dce_6_0_sh_mask.h"
-#include "gca/gfx_7_2_enum.h"
+
#include "si_enums.h"
-#include "si.h"
+
+#define TAHITI_GB_ADDR_CONFIG_GOLDEN 0x12011003
+#define VERDE_GB_ADDR_CONFIG_GOLDEN 0x12010002
+#define HAINAN_GB_ADDR_CONFIG_GOLDEN 0x02010001
+
+#define GFX6_NUM_GFX_RINGS 1
+#define GFX6_NUM_COMPUTE_RINGS 2
static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev);
static void gfx_v6_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -72,7 +86,7 @@ MODULE_FIRMWARE("amdgpu/hainan_ce.bin");
MODULE_FIRMWARE("amdgpu/hainan_rlc.bin");
static u32 gfx_v6_0_get_csb_size(struct amdgpu_device *adev);
-static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
+static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer);
//static void gfx_v6_0_init_cp_pg_table(struct amdgpu_device *adev);
static void gfx_v6_0_init_pg(struct amdgpu_device *adev);
@@ -311,7 +325,6 @@ static const u32 verde_rlc_save_restore_register_list[] =
static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
int err;
const struct gfx_firmware_header_v1_0 *cp_hdr;
const struct rlc_firmware_header_v1_0 *rlc_hdr;
@@ -337,59 +350,49 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
default: BUG();
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
- err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_pfp.bin", chip_name);
if (err)
goto out;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
- err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.me_fw);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_me.bin", chip_name);
if (err)
goto out;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
- err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.ce_fw);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ce.bin", chip_name);
if (err)
goto out;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
- err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc.bin", chip_name);
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
rlc_hdr = (const struct rlc_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
out:
if (err) {
- pr_err("gfx6: Failed to load firmware \"%s\"\n", fw_name);
- release_firmware(adev->gfx.pfp_fw);
- adev->gfx.pfp_fw = NULL;
- release_firmware(adev->gfx.me_fw);
- adev->gfx.me_fw = NULL;
- release_firmware(adev->gfx.ce_fw);
- adev->gfx.ce_fw = NULL;
- release_firmware(adev->gfx.rlc_fw);
- adev->gfx.rlc_fw = NULL;
+ pr_err("gfx6: Failed to load firmware %s gfx firmware\n", chip_name);
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
}
return err;
}
@@ -1299,7 +1302,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev)
}
static void gfx_v6_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
- u32 sh_num, u32 instance)
+ u32 sh_num, u32 instance, int xcc_id)
{
u32 data;
@@ -1452,12 +1455,12 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev,
}
/* GRBM_GFX_INDEX has a different offset on SI */
- gfx_v6_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0);
WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
}
/* GRBM_GFX_INDEX has a different offset on SI */
- gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
}
static void gfx_v6_0_setup_rb(struct amdgpu_device *adev)
@@ -1473,14 +1476,14 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0);
data = gfx_v6_0_get_rb_active_bitmap(adev);
active_rbs |= data <<
((i * adev->gfx.config.max_sh_per_se + j) *
rb_bitmap_width_per_sh);
}
}
- gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
adev->gfx.config.backend_enable_mask = active_rbs;
adev->gfx.config.num_rbs = hweight32(active_rbs);
@@ -1501,7 +1504,7 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev)
/* cache the values for userspace */
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0);
adev->gfx.config.rb_config[i][j].rb_backend_disable =
RREG32(mmCC_RB_BACKEND_DISABLE);
adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
@@ -1510,7 +1513,7 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev)
RREG32(mmPA_SC_RASTER_CONFIG);
}
}
- gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
}
@@ -1549,7 +1552,7 @@ static void gfx_v6_0_setup_spi(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0);
data = RREG32(mmSPI_STATIC_THREAD_MGMT_3);
active_cu = gfx_v6_0_get_cu_enabled(adev);
@@ -1564,7 +1567,7 @@ static void gfx_v6_0_setup_spi(struct amdgpu_device *adev)
}
}
}
- gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
}
@@ -1732,10 +1735,14 @@ static void gfx_v6_0_constants_init(struct amdgpu_device *adev)
gfx_v6_0_get_cu_info(adev);
gfx_v6_0_config_init(adev);
- WREG32(mmCP_QUEUE_THRESHOLDS, ((0x16 << CP_QUEUE_THRESHOLDS__ROQ_IB1_START__SHIFT) |
- (0x2b << CP_QUEUE_THRESHOLDS__ROQ_IB2_START__SHIFT)));
- WREG32(mmCP_MEQ_THRESHOLDS, (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
- (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
+ WREG32(mmCP_QUEUE_THRESHOLDS,
+ ((0x16 << CP_QUEUE_THRESHOLDS__ROQ_IB1_START__SHIFT) |
+ (0x2b << CP_QUEUE_THRESHOLDS__ROQ_IB2_START__SHIFT)));
+
+ /* set HW defaults for 3D engine */
+ WREG32(mmCP_MEQ_THRESHOLDS,
+ (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
+ (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
sx_debug_1 = RREG32(mmSX_DEBUG_1);
WREG32(mmSX_DEBUG_1, sx_debug_1);
@@ -1778,39 +1785,26 @@ static void gfx_v6_0_constants_init(struct amdgpu_device *adev)
udelay(50);
}
-
-static void gfx_v6_0_scratch_init(struct amdgpu_device *adev)
-{
- adev->gfx.scratch.num_reg = 8;
- adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
- adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
-}
-
static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t scratch;
uint32_t tmp = 0;
unsigned i;
int r;
- r = amdgpu_gfx_scratch_get(adev, &scratch);
- if (r)
- return r;
-
- WREG32(scratch, 0xCAFEDEAD);
+ WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r)
- goto error_free_scratch;
+ return r;
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
- amdgpu_ring_write(ring, (scratch - PACKET3_SET_CONFIG_REG_START));
+ amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_CONFIG_REG_START);
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(scratch);
+ tmp = RREG32(mmSCRATCH_REG0);
if (tmp == 0xDEADBEEF)
break;
udelay(1);
@@ -1818,9 +1812,6 @@ static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring)
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
-
-error_free_scratch:
- amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@@ -1903,50 +1894,42 @@ static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct amdgpu_device *adev = ring->adev;
- struct amdgpu_ib ib;
struct dma_fence *f = NULL;
- uint32_t scratch;
+ struct amdgpu_ib ib;
uint32_t tmp = 0;
long r;
- r = amdgpu_gfx_scratch_get(adev, &scratch);
- if (r)
- return r;
-
- WREG32(scratch, 0xCAFEDEAD);
+ WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 256,
- AMDGPU_IB_POOL_DIRECT, &ib);
+ r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
- goto err1;
+ return r;
ib.ptr[0] = PACKET3(PACKET3_SET_CONFIG_REG, 1);
- ib.ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_START));
+ ib.ptr[1] = mmSCRATCH_REG0 - PACKET3_SET_CONFIG_REG_START;
ib.ptr[2] = 0xDEADBEEF;
ib.length_dw = 3;
r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
if (r)
- goto err2;
+ goto error;
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
r = -ETIMEDOUT;
- goto err2;
+ goto error;
} else if (r < 0) {
- goto err2;
+ goto error;
}
- tmp = RREG32(scratch);
+ tmp = RREG32(mmSCRATCH_REG0);
if (tmp == 0xDEADBEEF)
r = 0;
else
r = -EINVAL;
-err2:
- amdgpu_ib_free(adev, &ib, NULL);
+error:
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
-err1:
- amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@@ -2117,7 +2100,7 @@ static int gfx_v6_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32(mmCP_RB0_WPTR, ring->wptr);
/* set the wb address whether it's enabled or not */
- rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ rptr_addr = ring->rptr_gpu_addr;
WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
@@ -2139,7 +2122,7 @@ static int gfx_v6_0_cp_gfx_resume(struct amdgpu_device *adev)
static u64 gfx_v6_0_ring_get_rptr(struct amdgpu_ring *ring)
{
- return ring->adev->wb.wb[ring->rptr_offs];
+ return *ring->rptr_cpu_addr;
}
static u64 gfx_v6_0_ring_get_wptr(struct amdgpu_ring *ring)
@@ -2203,7 +2186,7 @@ static int gfx_v6_0_cp_compute_resume(struct amdgpu_device *adev)
ring->wptr = 0;
WREG32(mmCP_RB1_WPTR, ring->wptr);
- rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ rptr_addr = ring->rptr_gpu_addr;
WREG32(mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32(mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
@@ -2222,7 +2205,7 @@ static int gfx_v6_0_cp_compute_resume(struct amdgpu_device *adev)
WREG32(mmCP_RB2_CNTL, tmp | CP_RB2_CNTL__RB_RPTR_WR_ENA_MASK);
ring->wptr = 0;
WREG32(mmCP_RB2_WPTR, ring->wptr);
- rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ rptr_addr = ring->rptr_gpu_addr;
WREG32(mmCP_RB2_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32(mmCP_RB2_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
@@ -2371,7 +2354,7 @@ static void gfx_v6_0_ring_emit_wreg(struct amdgpu_ring *ring,
static int gfx_v6_0_rlc_init(struct amdgpu_device *adev)
{
const u32 *src_ptr;
- volatile u32 *dst_ptr;
+ u32 *dst_ptr;
u32 dws;
u64 reg_list_mc_addr;
const struct cs_section_def *cs_data;
@@ -2399,7 +2382,8 @@ static int gfx_v6_0_rlc_init(struct amdgpu_device *adev)
dws = adev->gfx.rlc.clear_state_size + (256 / 4);
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
(void **)&adev->gfx.rlc.cs_ptr);
@@ -2428,7 +2412,7 @@ static void gfx_v6_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
WREG32_FIELD(RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
if (!enable) {
- gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
WREG32(mmSPI_LB_CU_MASK, 0x00ff);
}
}
@@ -2871,47 +2855,23 @@ static u32 gfx_v6_0_get_csb_size(struct amdgpu_device *adev)
return count;
}
-static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev,
- volatile u32 *buffer)
+static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
{
- u32 count = 0, i;
- const struct cs_section_def *sect = NULL;
- const struct cs_extent_def *ext = NULL;
+ u32 count = 0;
if (adev->gfx.rlc.cs_data == NULL)
return;
if (buffer == NULL)
return;
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- buffer[count++] = cpu_to_le32(0x80000000);
- buffer[count++] = cpu_to_le32(0x80000000);
-
- for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
- for (ext = sect->section; ext->extent != NULL; ++ext) {
- if (sect->id == SECT_CONTEXT) {
- buffer[count++] =
- cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
- buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
- for (i = 0; i < ext->reg_count; i++)
- buffer[count++] = cpu_to_le32(ext->extent[i]);
- } else {
- return;
- }
- }
- }
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
- buffer[count++] = cpu_to_le32(0);
+ amdgpu_gfx_csb_preamble_end(buffer, count);
}
static void gfx_v6_0_init_pg(struct amdgpu_device *adev)
@@ -3005,7 +2965,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
*(out++) = RREG32(mmSQ_IND_DATA);
}
-static void gfx_v6_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+static void gfx_v6_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
{
/* type 0 wave data */
dst[(*no_fields)++] = 0;
@@ -3030,7 +2990,7 @@ static void gfx_v6_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u
dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
}
-static void gfx_v6_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
+static void gfx_v6_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t start,
uint32_t size, uint32_t *dst)
{
@@ -3040,7 +3000,7 @@ static void gfx_v6_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
}
static void gfx_v6_0_select_me_pipe_q(struct amdgpu_device *adev,
- u32 me, u32 pipe, u32 q, u32 vm)
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
{
DRM_INFO("Not implemented\n");
}
@@ -3061,10 +3021,11 @@ static const struct amdgpu_rlc_funcs gfx_v6_0_rlc_funcs = {
.start = gfx_v6_0_rlc_start
};
-static int gfx_v6_0_early_init(void *handle)
+static int gfx_v6_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ adev->gfx.xcc_mask = 1;
adev->gfx.num_gfx_rings = GFX6_NUM_GFX_RINGS;
adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
GFX6_NUM_COMPUTE_RINGS);
@@ -3076,10 +3037,10 @@ static int gfx_v6_0_early_init(void *handle)
return 0;
}
-static int gfx_v6_0_sw_init(void *handle)
+static int gfx_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, r;
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
@@ -3094,8 +3055,6 @@ static int gfx_v6_0_sw_init(void *handle)
if (r)
return r;
- gfx_v6_0_scratch_init(adev);
-
r = gfx_v6_0_init_microcode(adev);
if (r) {
DRM_ERROR("Failed to load gfx firmware!\n");
@@ -3112,7 +3071,7 @@ static int gfx_v6_0_sw_init(void *handle)
ring = &adev->gfx.gfx_ring[i];
ring->ring_obj = NULL;
sprintf(ring->name, "gfx");
- r = amdgpu_ring_init(adev, ring, 1024,
+ r = amdgpu_ring_init(adev, ring, 2048,
&adev->gfx.eop_irq,
AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
AMDGPU_RING_PRIO_DEFAULT, NULL);
@@ -3143,13 +3102,18 @@ static int gfx_v6_0_sw_init(void *handle)
return r;
}
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+
return r;
}
-static int gfx_v6_0_sw_fini(void *handle)
+static int gfx_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@@ -3161,10 +3125,10 @@ static int gfx_v6_0_sw_fini(void *handle)
return 0;
}
-static int gfx_v6_0_hw_init(void *handle)
+static int gfx_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gfx_v6_0_constants_init(adev);
@@ -3181,9 +3145,9 @@ static int gfx_v6_0_hw_init(void *handle)
return r;
}
-static int gfx_v6_0_hw_fini(void *handle)
+static int gfx_v6_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gfx_v6_0_cp_enable(adev, false);
adev->gfx.rlc.funcs->stop(adev);
@@ -3192,23 +3156,19 @@ static int gfx_v6_0_hw_fini(void *handle)
return 0;
}
-static int gfx_v6_0_suspend(void *handle)
+static int gfx_v6_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return gfx_v6_0_hw_fini(adev);
+ return gfx_v6_0_hw_fini(ip_block);
}
-static int gfx_v6_0_resume(void *handle)
+static int gfx_v6_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return gfx_v6_0_hw_init(adev);
+ return gfx_v6_0_hw_init(ip_block);
}
-static bool gfx_v6_0_is_idle(void *handle)
+static bool gfx_v6_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK)
return false;
@@ -3216,24 +3176,19 @@ static bool gfx_v6_0_is_idle(void *handle)
return true;
}
-static int gfx_v6_0_wait_for_idle(void *handle)
+static int gfx_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (gfx_v6_0_is_idle(handle))
+ if (gfx_v6_0_is_idle(ip_block))
return 0;
udelay(1);
}
return -ETIMEDOUT;
}
-static int gfx_v6_0_soft_reset(void *handle)
-{
- return 0;
-}
-
static void gfx_v6_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state)
{
@@ -3421,11 +3376,11 @@ static int gfx_v6_0_priv_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int gfx_v6_0_set_clockgating_state(void *handle,
+static int gfx_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_CG_STATE_GATE)
gate = true;
@@ -3443,11 +3398,11 @@ static int gfx_v6_0_set_clockgating_state(void *handle,
return 0;
}
-static int gfx_v6_0_set_powergating_state(void *handle,
+static int gfx_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_PG_STATE_GATE)
gate = true;
@@ -3483,7 +3438,6 @@ static void gfx_v6_0_emit_mem_sync(struct amdgpu_ring *ring)
static const struct amd_ip_funcs gfx_v6_0_ip_funcs = {
.name = "gfx_v6_0",
.early_init = gfx_v6_0_early_init,
- .late_init = NULL,
.sw_init = gfx_v6_0_sw_init,
.sw_fini = gfx_v6_0_sw_fini,
.hw_init = gfx_v6_0_hw_init,
@@ -3492,7 +3446,6 @@ static const struct amd_ip_funcs gfx_v6_0_ip_funcs = {
.resume = gfx_v6_0_resume,
.is_idle = gfx_v6_0_is_idle,
.wait_for_idle = gfx_v6_0_wait_for_idle,
- .soft_reset = gfx_v6_0_soft_reset,
.set_clockgating_state = gfx_v6_0_set_clockgating_state,
.set_powergating_state = gfx_v6_0_set_powergating_state,
};
@@ -3610,12 +3563,12 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev)
mask = 1;
ao_bitmap = 0;
counter = 0;
- gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0);
if (i < 4 && j < 2)
gfx_v6_0_set_user_cu_inactive_bitmap(
adev, disable_masks[i * 2 + j]);
bitmap = gfx_v6_0_get_cu_enabled(adev);
- cu_info->bitmap[i][j] = bitmap;
+ cu_info->bitmap[0][i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
if (bitmap & mask) {
@@ -3632,7 +3585,7 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev)
}
}
- gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index d17a6f399347..2b7aba22ecc1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -55,6 +55,9 @@
#define GFX7_NUM_GFX_RINGS 1
#define GFX7_MEC_HPD_SIZE 2048
+#define BONAIRE_GB_ADDR_CONFIG_GOLDEN 0x12010001
+#define HAWAII_GB_ADDR_CONFIG_GOLDEN 0x12011003
+
static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
@@ -90,8 +93,7 @@ MODULE_FIRMWARE("amdgpu/mullins_ce.bin");
MODULE_FIRMWARE("amdgpu/mullins_rlc.bin");
MODULE_FIRMWARE("amdgpu/mullins_mec.bin");
-static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
-{
+static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = {
{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
@@ -110,8 +112,7 @@ static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
};
-static const u32 spectre_rlc_save_restore_register_list[] =
-{
+static const u32 spectre_rlc_save_restore_register_list[] = {
(0x0e00 << 16) | (0xc12c >> 2),
0x00000000,
(0x0e00 << 16) | (0xc140 >> 2),
@@ -557,8 +558,7 @@ static const u32 spectre_rlc_save_restore_register_list[] =
(0x0e00 << 16) | (0x9600 >> 2),
};
-static const u32 kalindi_rlc_save_restore_register_list[] =
-{
+static const u32 kalindi_rlc_save_restore_register_list[] = {
(0x0e00 << 16) | (0xc12c >> 2),
0x00000000,
(0x0e00 << 16) | (0xc140 >> 2),
@@ -883,10 +883,20 @@ static const u32 kalindi_rlc_save_restore_register_list[] =
};
static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev);
-static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
+static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer);
static void gfx_v7_0_init_pg(struct amdgpu_device *adev);
static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev);
+static void gfx_v7_0_free_microcode(struct amdgpu_device *adev)
+{
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ amdgpu_ucode_release(&adev->gfx.mec2_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+}
+
/*
* Core functions
*/
@@ -902,7 +912,6 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev);
static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
int err;
DRM_DEBUG("\n");
@@ -923,92 +932,53 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
case CHIP_MULLINS:
chip_name = "mullins";
break;
- default: BUG();
+ default:
+ BUG();
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
- err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_pfp.bin", chip_name);
if (err)
goto out;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
- err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.me_fw);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_me.bin", chip_name);
if (err)
goto out;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
- err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.ce_fw);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ce.bin", chip_name);
if (err)
goto out;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
- err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.mec_fw);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", chip_name);
if (err)
goto out;
if (adev->asic_type == CHIP_KAVERI) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
- err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec2.bin", chip_name);
if (err)
goto out;
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
- err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
-
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc.bin", chip_name);
out:
if (err) {
- pr_err("gfx7: Failed to load firmware \"%s\"\n", fw_name);
- release_firmware(adev->gfx.pfp_fw);
- adev->gfx.pfp_fw = NULL;
- release_firmware(adev->gfx.me_fw);
- adev->gfx.me_fw = NULL;
- release_firmware(adev->gfx.ce_fw);
- adev->gfx.ce_fw = NULL;
- release_firmware(adev->gfx.mec_fw);
- adev->gfx.mec_fw = NULL;
- release_firmware(adev->gfx.mec2_fw);
- adev->gfx.mec2_fw = NULL;
- release_firmware(adev->gfx.rlc_fw);
- adev->gfx.rlc_fw = NULL;
+ pr_err("gfx7: Failed to load firmware %s gfx firmware\n", chip_name);
+ gfx_v7_0_free_microcode(adev);
}
return err;
}
-static void gfx_v7_0_free_microcode(struct amdgpu_device *adev)
-{
- release_firmware(adev->gfx.pfp_fw);
- adev->gfx.pfp_fw = NULL;
- release_firmware(adev->gfx.me_fw);
- adev->gfx.me_fw = NULL;
- release_firmware(adev->gfx.ce_fw);
- adev->gfx.ce_fw = NULL;
- release_firmware(adev->gfx.mec_fw);
- adev->gfx.mec_fw = NULL;
- release_firmware(adev->gfx.mec2_fw);
- adev->gfx.mec2_fw = NULL;
- release_firmware(adev->gfx.rlc_fw);
- adev->gfx.rlc_fw = NULL;
-}
-
/**
* gfx_v7_0_tiling_mode_table_init - init the hw tiling table
*
@@ -1582,11 +1552,12 @@ static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)
* @sh_num: sh block to address
* @instance: Certain registers are instanced per SE or SH.
* 0xffffffff means broadcast to all SEs or SHs (CIK).
- *
+ * @xcc_id: xcc accelerated compute core id
* Select which SE, SH combinations to address.
*/
static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev,
- u32 se_num, u32 sh_num, u32 instance)
+ u32 se_num, u32 sh_num, u32 instance,
+ int xcc_id)
{
u32 data;
@@ -1766,13 +1737,13 @@ gfx_v7_0_write_harvested_raster_configs(struct amdgpu_device *adev,
}
/* GRBM_GFX_INDEX has a different offset on CI+ */
- gfx_v7_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0);
WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
}
/* GRBM_GFX_INDEX has a different offset on CI+ */
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
}
/**
@@ -1795,13 +1766,13 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0);
data = gfx_v7_0_get_rb_active_bitmap(adev);
active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
rb_bitmap_width_per_sh);
}
}
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
adev->gfx.config.backend_enable_mask = active_rbs;
adev->gfx.config.num_rbs = hweight32(active_rbs);
@@ -1824,7 +1795,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
/* cache the values for userspace */
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0);
adev->gfx.config.rb_config[i][j].rb_backend_disable =
RREG32(mmCC_RB_BACKEND_DISABLE);
adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
@@ -1835,7 +1806,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
RREG32(mmPA_SC_RASTER_CONFIG_1);
}
}
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
}
@@ -1945,7 +1916,7 @@ static void gfx_v7_0_constants_init(struct amdgpu_device *adev)
* making sure that the following register writes will be broadcasted
* to all the shaders
*/
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
/* XXX SH_MEM regs */
/* where to put LDS, scratch, GPUVM in FSA64 space */
@@ -2049,26 +2020,6 @@ static void gfx_v7_0_constants_init(struct amdgpu_device *adev)
udelay(50);
}
-/*
- * GPU scratch registers helpers function.
- */
-/**
- * gfx_v7_0_scratch_init - setup driver info for CP scratch regs
- *
- * @adev: amdgpu_device pointer
- *
- * Set up the number and offset of the CP scratch registers.
- * NOTE: use of CP scratch registers is a legacy interface and
- * is not used by default on newer asics (r6xx+). On newer asics,
- * memory buffers are used for fences rather than scratch regs.
- */
-static void gfx_v7_0_scratch_init(struct amdgpu_device *adev)
-{
- adev->gfx.scratch.num_reg = 8;
- adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
- adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
-}
-
/**
* gfx_v7_0_ring_test_ring - basic gfx ring test
*
@@ -2082,36 +2033,28 @@ static void gfx_v7_0_scratch_init(struct amdgpu_device *adev)
static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t scratch;
uint32_t tmp = 0;
unsigned i;
int r;
- r = amdgpu_gfx_scratch_get(adev, &scratch);
- if (r)
- return r;
-
- WREG32(scratch, 0xCAFEDEAD);
+ WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r)
- goto error_free_scratch;
+ return r;
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
- amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
+ amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START);
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(scratch);
+ tmp = RREG32(mmSCRATCH_REG0);
if (tmp == 0xDEADBEEF)
break;
udelay(1);
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
-
-error_free_scratch:
- amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@@ -2180,6 +2123,8 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
{
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+ bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
+
/* Workaround for cache flush problems. First send a dummy EOP
* event down the pipe with seq one below.
*/
@@ -2199,7 +2144,8 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
EOP_TC_ACTION_EN |
EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
- EVENT_INDEX(5)));
+ EVENT_INDEX(5) |
+ (exec ? EOP_EXEC : 0)));
amdgpu_ring_write(ring, addr & 0xfffffffc);
amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
@@ -2355,48 +2301,40 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib ib;
struct dma_fence *f = NULL;
- uint32_t scratch;
uint32_t tmp = 0;
long r;
- r = amdgpu_gfx_scratch_get(adev, &scratch);
- if (r)
- return r;
-
- WREG32(scratch, 0xCAFEDEAD);
+ WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 256,
- AMDGPU_IB_POOL_DIRECT, &ib);
+ r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
- goto err1;
+ return r;
ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
- ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
+ ib.ptr[1] = mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START;
ib.ptr[2] = 0xDEADBEEF;
ib.length_dw = 3;
r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
if (r)
- goto err2;
+ goto error;
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
r = -ETIMEDOUT;
- goto err2;
+ goto error;
} else if (r < 0) {
- goto err2;
+ goto error;
}
- tmp = RREG32(scratch);
+ tmp = RREG32(mmSCRATCH_REG0);
if (tmp == 0xDEADBEEF)
r = 0;
else
r = -EINVAL;
-err2:
- amdgpu_ib_free(adev, &ib, NULL);
+error:
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
-err1:
- amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@@ -2631,7 +2569,7 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
/* set the wb address whether it's enabled or not */
- rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ rptr_addr = ring->rptr_gpu_addr;
WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
@@ -2656,7 +2594,7 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
static u64 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
{
- return ring->adev->wb.wb[ring->rptr_offs];
+ return *ring->rptr_cpu_addr;
}
static u64 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
@@ -2677,7 +2615,7 @@ static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
static u64 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
{
/* XXX check if swapping is necessary on BE */
- return ring->adev->wb.wb[ring->wptr_offs];
+ return *ring->wptr_cpu_addr;
}
static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
@@ -2685,7 +2623,7 @@ static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
/* XXX check if swapping is necessary on BE */
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
}
@@ -2798,7 +2736,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
u32 *hpd;
size_t mec_hpd_size;
- bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+ bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
/* take ownership of the relevant compute queues */
amdgpu_gfx_compute_queue_acquire(adev);
@@ -2808,7 +2746,8 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
* GFX7_MEC_HPD_SIZE * 2;
r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
&adev->gfx.mec.hpd_eop_obj,
&adev->gfx.mec.hpd_eop_gpu_addr,
(void **)&hpd);
@@ -2827,45 +2766,6 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
return 0;
}
-struct hqd_registers
-{
- u32 cp_mqd_base_addr;
- u32 cp_mqd_base_addr_hi;
- u32 cp_hqd_active;
- u32 cp_hqd_vmid;
- u32 cp_hqd_persistent_state;
- u32 cp_hqd_pipe_priority;
- u32 cp_hqd_queue_priority;
- u32 cp_hqd_quantum;
- u32 cp_hqd_pq_base;
- u32 cp_hqd_pq_base_hi;
- u32 cp_hqd_pq_rptr;
- u32 cp_hqd_pq_rptr_report_addr;
- u32 cp_hqd_pq_rptr_report_addr_hi;
- u32 cp_hqd_pq_wptr_poll_addr;
- u32 cp_hqd_pq_wptr_poll_addr_hi;
- u32 cp_hqd_pq_doorbell_control;
- u32 cp_hqd_pq_wptr;
- u32 cp_hqd_pq_control;
- u32 cp_hqd_ib_base_addr;
- u32 cp_hqd_ib_base_addr_hi;
- u32 cp_hqd_ib_rptr;
- u32 cp_hqd_ib_control;
- u32 cp_hqd_iq_timer;
- u32 cp_hqd_iq_rptr;
- u32 cp_hqd_dequeue_request;
- u32 cp_hqd_dma_offload;
- u32 cp_hqd_sema_cmd;
- u32 cp_hqd_msg_type;
- u32 cp_hqd_atomic0_preop_lo;
- u32 cp_hqd_atomic0_preop_hi;
- u32 cp_hqd_atomic1_preop_lo;
- u32 cp_hqd_atomic1_preop_hi;
- u32 cp_hqd_hq_scheduler0;
- u32 cp_hqd_hq_scheduler1;
- u32 cp_mqd_control;
-};
-
static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev,
int mec, int pipe)
{
@@ -2981,12 +2881,12 @@ static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */
/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wb_gpu_addr = ring->wptr_gpu_addr;
mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
/* set the wb address whether it's enabled or not */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ wb_gpu_addr = ring->rptr_gpu_addr;
mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_rptr_report_addr_hi =
upper_32_bits(wb_gpu_addr) & 0xffff;
@@ -3345,7 +3245,7 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
/* init spm vmid with 0xf */
if (adev->gfx.rlc.funcs->update_spm_vmid)
- adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
return 0;
}
@@ -3370,7 +3270,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0);
for (k = 0; k < adev->usec_timeout; k++) {
if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
break;
@@ -3378,7 +3278,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
}
}
}
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
@@ -3430,7 +3330,7 @@ static bool gfx_v7_0_is_rlc_enabled(struct amdgpu_device *adev)
return true;
}
-static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev)
+static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
u32 tmp, i, mask;
@@ -3452,7 +3352,7 @@ static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev)
}
}
-static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev)
+static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
u32 tmp;
@@ -3543,7 +3443,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
WREG32(mmRLC_LB_CNTR_MAX, 0x00008000);
mutex_lock(&adev->grbm_idx_mutex);
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff);
WREG32(mmRLC_LB_PARAMS, 0x00600408);
WREG32(mmRLC_LB_CNTL, 0x80000004);
@@ -3571,7 +3471,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
return 0;
}
-static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
+static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
{
u32 data;
@@ -3599,7 +3499,7 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
tmp = gfx_v7_0_halt_rlc(adev);
mutex_lock(&adev->grbm_idx_mutex);
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
@@ -3653,7 +3553,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
tmp = gfx_v7_0_halt_rlc(adev);
mutex_lock(&adev->grbm_idx_mutex);
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
@@ -3704,7 +3604,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
tmp = gfx_v7_0_halt_rlc(adev);
mutex_lock(&adev->grbm_idx_mutex);
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK;
@@ -3982,70 +3882,24 @@ static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev)
return count;
}
-static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev,
- volatile u32 *buffer)
+static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
{
- u32 count = 0, i;
- const struct cs_section_def *sect = NULL;
- const struct cs_extent_def *ext = NULL;
+ u32 count = 0;
if (adev->gfx.rlc.cs_data == NULL)
return;
if (buffer == NULL)
return;
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- buffer[count++] = cpu_to_le32(0x80000000);
- buffer[count++] = cpu_to_le32(0x80000000);
-
- for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
- for (ext = sect->section; ext->extent != NULL; ++ext) {
- if (sect->id == SECT_CONTEXT) {
- buffer[count++] =
- cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
- buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
- for (i = 0; i < ext->reg_count; i++)
- buffer[count++] = cpu_to_le32(ext->extent[i]);
- } else {
- return;
- }
- }
- }
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
- switch (adev->asic_type) {
- case CHIP_BONAIRE:
- buffer[count++] = cpu_to_le32(0x16000012);
- buffer[count++] = cpu_to_le32(0x00000000);
- break;
- case CHIP_KAVERI:
- buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
- buffer[count++] = cpu_to_le32(0x00000000);
- break;
- case CHIP_KABINI:
- case CHIP_MULLINS:
- buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
- buffer[count++] = cpu_to_le32(0x00000000);
- break;
- case CHIP_HAWAII:
- buffer[count++] = cpu_to_le32(0x3a00161a);
- buffer[count++] = cpu_to_le32(0x0000002e);
- break;
- default:
- buffer[count++] = cpu_to_le32(0x00000000);
- buffer[count++] = cpu_to_le32(0x00000000);
- break;
- }
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
+ buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
+ buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
- buffer[count++] = cpu_to_le32(0);
+ amdgpu_gfx_csb_preamble_end(buffer, count);
}
static void gfx_v7_0_init_pg(struct amdgpu_device *adev)
@@ -4180,7 +4034,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
*(out++) = RREG32(mmSQ_IND_DATA);
}
-static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
{
/* type 0 wave data */
dst[(*no_fields)++] = 0;
@@ -4205,7 +4059,7 @@ static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u
dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
}
-static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
+static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t start,
uint32_t size, uint32_t *dst)
{
@@ -4215,7 +4069,7 @@ static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
}
static void gfx_v7_0_select_me_pipe_q(struct amdgpu_device *adev,
- u32 me, u32 pipe, u32 q, u32 vm)
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
{
cik_srbm_select(adev, me, pipe, q, vm);
}
@@ -4243,10 +4097,11 @@ static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
.update_spm_vmid = gfx_v7_0_update_spm_vmid
};
-static int gfx_v7_0_early_init(void *handle)
+static int gfx_v7_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ adev->gfx.xcc_mask = 1;
adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
AMDGPU_MAX_COMPUTE_RINGS);
@@ -4259,9 +4114,9 @@ static int gfx_v7_0_early_init(void *handle)
return 0;
}
-static int gfx_v7_0_late_init(void *handle)
+static int gfx_v7_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
@@ -4451,10 +4306,10 @@ static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
return 0;
}
-static int gfx_v7_0_sw_init(void *handle)
+static int gfx_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j, k, r, ring_id;
switch (adev->asic_type) {
@@ -4489,8 +4344,6 @@ static int gfx_v7_0_sw_init(void *handle)
if (r)
return r;
- gfx_v7_0_scratch_init(adev);
-
r = gfx_v7_0_init_microcode(adev);
if (r) {
DRM_ERROR("Failed to load gfx firmware!\n");
@@ -4527,7 +4380,8 @@ static int gfx_v7_0_sw_init(void *handle)
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
- if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
+ k, j))
continue;
r = gfx_v7_0_compute_ring_init(adev,
@@ -4545,12 +4399,17 @@ static int gfx_v7_0_sw_init(void *handle)
gfx_v7_0_gpu_early_init(adev);
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+
return r;
}
-static int gfx_v7_0_sw_fini(void *handle)
+static int gfx_v7_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
@@ -4574,10 +4433,10 @@ static int gfx_v7_0_sw_fini(void *handle)
return 0;
}
-static int gfx_v7_0_hw_init(void *handle)
+static int gfx_v7_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gfx_v7_0_constants_init(adev);
@@ -4595,9 +4454,9 @@ static int gfx_v7_0_hw_init(void *handle)
return r;
}
-static int gfx_v7_0_hw_fini(void *handle)
+static int gfx_v7_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
@@ -4608,23 +4467,19 @@ static int gfx_v7_0_hw_fini(void *handle)
return 0;
}
-static int gfx_v7_0_suspend(void *handle)
+static int gfx_v7_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return gfx_v7_0_hw_fini(adev);
+ return gfx_v7_0_hw_fini(ip_block);
}
-static int gfx_v7_0_resume(void *handle)
+static int gfx_v7_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return gfx_v7_0_hw_init(adev);
+ return gfx_v7_0_hw_init(ip_block);
}
-static bool gfx_v7_0_is_idle(void *handle)
+static bool gfx_v7_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK)
return false;
@@ -4632,11 +4487,11 @@ static bool gfx_v7_0_is_idle(void *handle)
return true;
}
-static int gfx_v7_0_wait_for_idle(void *handle)
+static int gfx_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -4649,11 +4504,11 @@ static int gfx_v7_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int gfx_v7_0_soft_reset(void *handle)
+static int gfx_v7_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* GRBM_STATUS */
tmp = RREG32(mmGRBM_STATUS);
@@ -4959,11 +4814,11 @@ static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int gfx_v7_0_set_clockgating_state(void *handle,
+static int gfx_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_CG_STATE_GATE)
gate = true;
@@ -4982,11 +4837,11 @@ static int gfx_v7_0_set_clockgating_state(void *handle,
return 0;
}
-static int gfx_v7_0_set_powergating_state(void *handle,
+static int gfx_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_PG_STATE_GATE)
gate = true;
@@ -5112,6 +4967,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v7_0_ring_emit_wreg,
+ .soft_recovery = gfx_v7_0_ring_soft_recovery,
.emit_mem_sync = gfx_v7_0_emit_mem_sync_compute,
};
@@ -5185,18 +5041,18 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
mask = 1;
ao_bitmap = 0;
counter = 0;
- gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0);
if (i < 4 && j < 2)
gfx_v7_0_set_user_cu_inactive_bitmap(
adev, disable_masks[i * 2 + j]);
bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
- cu_info->bitmap[i][j] = bitmap;
+ cu_info->bitmap[0][i][j] = bitmap;
- for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
+ for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
if (bitmap & mask) {
if (counter < ao_cu_num)
ao_bitmap |= mask;
- counter ++;
+ counter++;
}
mask <<= 1;
}
@@ -5206,7 +5062,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
}
}
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number;
@@ -5218,8 +5074,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
cu_info->lds_size = 64;
}
-const struct amdgpu_ip_block_version gfx_v7_1_ip_block =
-{
+const struct amdgpu_ip_block_version gfx_v7_1_ip_block = {
.type = AMD_IP_BLOCK_TYPE_GFX,
.major = 7,
.minor = 1,
@@ -5227,8 +5082,7 @@ const struct amdgpu_ip_block_version gfx_v7_1_ip_block =
.funcs = &gfx_v7_0_ip_funcs,
};
-const struct amdgpu_ip_block_version gfx_v7_2_ip_block =
-{
+const struct amdgpu_ip_block_version gfx_v7_2_ip_block = {
.type = AMD_IP_BLOCK_TYPE_GFX,
.major = 7,
.minor = 2,
@@ -5236,8 +5090,7 @@ const struct amdgpu_ip_block_version gfx_v7_2_ip_block =
.funcs = &gfx_v7_0_ip_funcs,
};
-const struct amdgpu_ip_block_version gfx_v7_3_ip_block =
-{
+const struct amdgpu_ip_block_version gfx_v7_3_ip_block = {
.type = AMD_IP_BLOCK_TYPE_GFX,
.major = 7,
.minor = 3,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 5f112efda634..1c87375e1dd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -835,37 +835,25 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
}
}
-static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
-{
- adev->gfx.scratch.num_reg = 8;
- adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
- adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
-}
-
static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t scratch;
uint32_t tmp = 0;
unsigned i;
int r;
- r = amdgpu_gfx_scratch_get(adev, &scratch);
- if (r)
- return r;
-
- WREG32(scratch, 0xCAFEDEAD);
+ WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r)
- goto error_free_scratch;
+ return r;
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
- amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
+ amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START);
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(scratch);
+ tmp = RREG32(mmSCRATCH_REG0);
if (tmp == 0xDEADBEEF)
break;
udelay(1);
@@ -874,8 +862,6 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
-error_free_scratch:
- amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@@ -897,8 +883,8 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
gpu_addr = adev->wb.gpu_addr + (index * 4);
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 16,
- AMDGPU_IB_POOL_DIRECT, &ib);
+
+ r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
goto err1;
@@ -928,7 +914,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err2:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
amdgpu_device_wb_free(adev, index);
@@ -938,20 +924,14 @@ err1:
static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
{
- release_firmware(adev->gfx.pfp_fw);
- adev->gfx.pfp_fw = NULL;
- release_firmware(adev->gfx.me_fw);
- adev->gfx.me_fw = NULL;
- release_firmware(adev->gfx.ce_fw);
- adev->gfx.ce_fw = NULL;
- release_firmware(adev->gfx.rlc_fw);
- adev->gfx.rlc_fw = NULL;
- release_firmware(adev->gfx.mec_fw);
- adev->gfx.mec_fw = NULL;
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
if ((adev->asic_type != CHIP_STONEY) &&
(adev->asic_type != CHIP_TOPAZ))
- release_firmware(adev->gfx.mec2_fw);
- adev->gfx.mec2_fw = NULL;
+ amdgpu_ucode_release(&adev->gfx.mec2_fw);
kfree(adev->gfx.rlc.register_list_format);
}
@@ -959,7 +939,6 @@ static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
int err;
struct amdgpu_firmware_info *info = NULL;
const struct common_firmware_header *header = NULL;
@@ -1002,62 +981,62 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
}
if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
- err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
- if (err == -ENOENT) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
- err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s_pfp_2.bin", chip_name);
+ if (err == -ENODEV) {
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_pfp.bin", chip_name);
}
} else {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
- err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_pfp.bin", chip_name);
}
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
- if (err)
- goto out;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
- err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
- if (err == -ENOENT) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
- err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s_me_2.bin", chip_name);
+ if (err == -ENODEV) {
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_me.bin", chip_name);
}
} else {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
- err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_me.bin", chip_name);
}
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->gfx.me_fw);
- if (err)
- goto out;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
- err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
- if (err == -ENOENT) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
- err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s_ce_2.bin", chip_name);
+ if (err == -ENODEV) {
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ce.bin", chip_name);
}
} else {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
- err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ce.bin", chip_name);
}
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->gfx.ce_fw);
- if (err)
- goto out;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
@@ -1073,11 +1052,11 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
} else
adev->virt.chained_ib_support = false;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
- err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc.bin", chip_name);
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
@@ -1123,21 +1102,21 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
- err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
- if (err == -ENOENT) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
- err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s_mec_2.bin", chip_name);
+ if (err == -ENODEV) {
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", chip_name);
}
} else {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
- err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", chip_name);
}
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->gfx.mec_fw);
- if (err)
- goto out;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
@@ -1145,20 +1124,20 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
if ((adev->asic_type != CHIP_STONEY) &&
(adev->asic_type != CHIP_TOPAZ)) {
if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
- err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
- if (err == -ENOENT) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
- err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s_mec2_2.bin", chip_name);
+ if (err == -ENODEV) {
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec2.bin", chip_name);
}
} else {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
- err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec2.bin", chip_name);
}
if (!err) {
- err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
- if (err)
- goto out;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)
adev->gfx.mec2_fw->data;
adev->gfx.mec2_fw_version =
@@ -1230,70 +1209,35 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
out:
if (err) {
- dev_err(adev->dev,
- "gfx8: Failed to load firmware \"%s\"\n",
- fw_name);
- release_firmware(adev->gfx.pfp_fw);
- adev->gfx.pfp_fw = NULL;
- release_firmware(adev->gfx.me_fw);
- adev->gfx.me_fw = NULL;
- release_firmware(adev->gfx.ce_fw);
- adev->gfx.ce_fw = NULL;
- release_firmware(adev->gfx.rlc_fw);
- adev->gfx.rlc_fw = NULL;
- release_firmware(adev->gfx.mec_fw);
- adev->gfx.mec_fw = NULL;
- release_firmware(adev->gfx.mec2_fw);
- adev->gfx.mec2_fw = NULL;
+ dev_err(adev->dev, "gfx8: Failed to load firmware %s gfx firmware\n", chip_name);
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ amdgpu_ucode_release(&adev->gfx.mec2_fw);
}
return err;
}
-static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
- volatile u32 *buffer)
+static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
{
- u32 count = 0, i;
- const struct cs_section_def *sect = NULL;
- const struct cs_extent_def *ext = NULL;
+ u32 count = 0;
if (adev->gfx.rlc.cs_data == NULL)
return;
if (buffer == NULL)
return;
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- buffer[count++] = cpu_to_le32(0x80000000);
- buffer[count++] = cpu_to_le32(0x80000000);
-
- for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
- for (ext = sect->section; ext->extent != NULL; ++ext) {
- if (sect->id == SECT_CONTEXT) {
- buffer[count++] =
- cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
- buffer[count++] = cpu_to_le32(ext->reg_index -
- PACKET3_SET_CONTEXT_REG_START);
- for (i = 0; i < ext->reg_count; i++)
- buffer[count++] = cpu_to_le32(ext->extent[i]);
- } else {
- return;
- }
- }
- }
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
- buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
- PACKET3_SET_CONTEXT_REG_START);
+ buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
- buffer[count++] = cpu_to_le32(0);
+ amdgpu_gfx_csb_preamble_end(buffer, count);
}
static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
@@ -1330,7 +1274,7 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
/* init spm vmid with 0xf */
if (adev->gfx.rlc.funcs->update_spm_vmid)
- adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
return 0;
}
@@ -1346,7 +1290,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
u32 *hpd;
size_t mec_hpd_size;
- bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+ bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
/* take ownership of the relevant compute queues */
amdgpu_gfx_compute_queue_acquire(adev);
@@ -1354,7 +1298,8 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
if (mec_hpd_size) {
r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
&adev->gfx.mec.hpd_eop_obj,
&adev->gfx.mec.hpd_eop_gpu_addr,
(void **)&hpd);
@@ -1684,7 +1629,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
RREG32(sec_ded_counter_registers[i]);
fail:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
return r;
@@ -1925,7 +1870,7 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+ ring->pipe;
hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
- AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_RING_PRIO_DEFAULT;
+ AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
/* type-2 packets are deprecated on MEC, use type-3 instead */
r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
hw_prio, NULL);
@@ -1938,12 +1883,12 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
-static int gfx_v8_0_sw_init(void *handle)
+static int gfx_v8_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int i, j, k, r, ring_id;
+ int xcc_id = 0;
struct amdgpu_ring *ring;
- struct amdgpu_kiq *kiq;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
switch (adev->asic_type) {
case CHIP_TONGA:
@@ -2000,8 +1945,6 @@ static int gfx_v8_0_sw_init(void *handle)
adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
- gfx_v8_0_scratch_init(adev);
-
r = gfx_v8_0_init_microcode(adev);
if (r) {
DRM_ERROR("Failed to load gfx firmware!\n");
@@ -2044,7 +1987,8 @@ static int gfx_v8_0_sw_init(void *handle)
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
- if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
+ k, j))
continue;
r = gfx_v8_0_compute_ring_init(adev,
@@ -2058,19 +2002,18 @@ static int gfx_v8_0_sw_init(void *handle)
}
}
- r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
+ r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE, 0);
if (r) {
DRM_ERROR("Failed to init KIQ BOs!\n");
return r;
}
- kiq = &adev->gfx.kiq;
- r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
+ r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
if (r)
return r;
/* create MQD for all compute queues as well as KIQ for SRIOV case */
- r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
+ r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation), 0);
if (r)
return r;
@@ -2080,12 +2023,17 @@ static int gfx_v8_0_sw_init(void *handle)
if (r)
return r;
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+
return 0;
}
-static int gfx_v8_0_sw_fini(void *handle)
+static int gfx_v8_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
@@ -2093,9 +2041,9 @@ static int gfx_v8_0_sw_fini(void *handle)
for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
- amdgpu_gfx_mqd_sw_fini(adev);
- amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
- amdgpu_gfx_kiq_fini(adev);
+ amdgpu_gfx_mqd_sw_fini(adev, 0);
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
+ amdgpu_gfx_kiq_fini(adev, 0);
gfx_v8_0_mec_fini(adev);
amdgpu_gfx_rlc_fini(adev);
@@ -3437,7 +3385,8 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
}
static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
- u32 se_num, u32 sh_num, u32 instance)
+ u32 se_num, u32 sh_num, u32 instance,
+ int xcc_id)
{
u32 data;
@@ -3460,7 +3409,7 @@ static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
}
static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
- u32 me, u32 pipe, u32 q, u32 vm)
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
{
vi_srbm_select(adev, me, pipe, q, vm);
}
@@ -3621,13 +3570,13 @@ gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
}
/* GRBM_GFX_INDEX has a different offset on VI */
- gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0);
WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
}
/* GRBM_GFX_INDEX has a different offset on VI */
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
}
static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
@@ -3643,13 +3592,13 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
data = gfx_v8_0_get_rb_active_bitmap(adev);
active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
rb_bitmap_width_per_sh);
}
}
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
adev->gfx.config.backend_enable_mask = active_rbs;
adev->gfx.config.num_rbs = hweight32(active_rbs);
@@ -3672,7 +3621,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
/* cache the values for userspace */
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
adev->gfx.config.rb_config[i][j].rb_backend_disable =
RREG32(mmCC_RB_BACKEND_DISABLE);
adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
@@ -3683,7 +3632,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
RREG32(mmPA_SC_RASTER_CONFIG_1);
}
}
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
}
@@ -3730,7 +3679,7 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
mutex_unlock(&adev->srbm_mutex);
/* Initialize all compute VMIDs to have no GDS, GWS, or OA
- acccess. These should be enabled by FW for target VMIDs. */
+ access. These should be enabled by FW for target VMIDs. */
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
@@ -3830,7 +3779,7 @@ static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
* making sure that the following register writes will be broadcasted
* to all the shaders
*/
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
WREG32(mmPA_SC_FIFO_SIZE,
(adev->gfx.config.sc_prim_fifo_size_frontend <<
@@ -3861,7 +3810,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
for (k = 0; k < adev->usec_timeout; k++) {
if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
break;
@@ -3869,7 +3818,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
}
if (k == adev->usec_timeout) {
gfx_v8_0_select_se_sh(adev, 0xffffffff,
- 0xffffffff, 0xffffffff);
+ 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
i, j);
@@ -3877,7 +3826,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
}
}
}
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
@@ -4305,12 +4254,12 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
ring->wptr = 0;
WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
- /* set the wb address wether it's enabled or not */
- rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ /* set the wb address whether it's enabled or not */
+ rptr_addr = ring->rptr_gpu_addr;
WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wptr_gpu_addr = ring->wptr_gpu_addr;
WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
mdelay(1);
@@ -4324,7 +4273,6 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
/* start the ring */
amdgpu_ring_clear_ring(ring);
gfx_v8_0_cp_gfx_start(adev);
- ring->sched.ready = true;
return 0;
}
@@ -4335,7 +4283,7 @@ static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
WREG32(mmCP_MEC_CNTL, 0);
} else {
WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
- adev->gfx.kiq.ring.sched.ready = false;
+ adev->gfx.kiq[0].ring.sched.ready = false;
}
udelay(50);
}
@@ -4350,19 +4298,17 @@ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32(mmRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32(mmRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32(mmRLC_CP_SCHEDULERS, tmp);
+ WREG32(mmRLC_CP_SCHEDULERS, tmp | 0x80);
}
static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
{
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
uint64_t queue_mask = 0;
int r, i;
for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
- if (!test_bit(i, adev->gfx.mec.queue_bitmap))
+ if (!test_bit(i, adev->gfx.mec_bitmap[0].queue_bitmap))
continue;
/* This situation may be hit in the future if a new HW
@@ -4393,7 +4339,7 @@ static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
- uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ uint64_t wptr_addr = ring->wptr_gpu_addr;
/* map queues */
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
@@ -4506,7 +4452,7 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
(order_base_2(ring->ring_size / 4) - 1));
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
- ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
+ (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
#ifdef __BIG_ENDIAN
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
#endif
@@ -4517,13 +4463,13 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
mqd->cp_hqd_pq_control = tmp;
/* set the wb address whether it's enabled or not */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ wb_gpu_addr = ring->rptr_gpu_addr;
mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_rptr_report_addr_hi =
upper_32_bits(wb_gpu_addr) & 0xffff;
/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wb_gpu_addr = ring->wptr_gpu_addr;
mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
@@ -4638,14 +4584,13 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
struct vi_mqd *mqd = ring->mqd_ptr;
- int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
gfx_v8_0_kiq_setting(ring);
if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
/* reset MQD to a clean status */
- if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct vi_mqd_allocation));
/* reset ring buffer */
ring->wptr = 0;
@@ -4659,6 +4604,8 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
+ if (amdgpu_sriov_vf(adev) && adev->in_suspend)
+ amdgpu_ring_clear_ring(ring);
mutex_lock(&adev->srbm_mutex);
vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
gfx_v8_0_mqd_init(ring);
@@ -4666,8 +4613,8 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct vi_mqd_allocation));
}
return 0;
@@ -4691,14 +4638,13 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
if (adev->gfx.mec.mqd_backup[mqd_idx])
memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
- } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
- /* reset MQD to a clean status */
+ } else {
+ /* restore MQD to a clean status */
if (adev->gfx.mec.mqd_backup[mqd_idx])
memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
/* reset ring buffer */
ring->wptr = 0;
- amdgpu_ring_clear_ring(ring);
- } else {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
amdgpu_ring_clear_ring(ring);
}
return 0;
@@ -4716,59 +4662,25 @@ static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring;
- int r;
-
- ring = &adev->gfx.kiq.ring;
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
- if (unlikely(r != 0))
- return r;
-
- gfx_v8_0_kiq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- amdgpu_bo_unreserve(ring->mqd_obj);
- ring->sched.ready = true;
+ gfx_v8_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
return 0;
}
static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = NULL;
- int r = 0, i;
+ int i, r;
gfx_v8_0_cp_compute_enable(adev, true);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- goto done;
- r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
- if (!r) {
- r = gfx_v8_0_kcq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v8_0_kcq_init_queue(&adev->gfx.compute_ring[i]);
if (r)
- goto done;
+ return r;
}
gfx_v8_0_set_mec_doorbell_range(adev);
- r = gfx_v8_0_kiq_kcq_enable(adev);
- if (r)
- goto done;
-
-done:
- return r;
+ return gfx_v8_0_kiq_kcq_enable(adev);
}
static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
@@ -4782,7 +4694,7 @@ static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
if (r)
return r;
- ring = &adev->gfx.kiq.ring;
+ ring = &adev->gfx.kiq[0].ring;
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
@@ -4829,10 +4741,10 @@ static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
gfx_v8_0_cp_compute_enable(adev, enable);
}
-static int gfx_v8_0_hw_init(void *handle)
+static int gfx_v8_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gfx_v8_0_init_golden_registers(adev);
gfx_v8_0_constants_init(adev);
@@ -4849,7 +4761,7 @@ static int gfx_v8_0_hw_init(void *handle)
static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
{
int r, i;
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
if (r)
@@ -4869,6 +4781,13 @@ static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
}
+ /* Submit unmap queue packet */
+ amdgpu_ring_commit(kiq_ring);
+ /*
+ * Ring test will do a basic scratch register change check. Just run
+ * this to ensure that unmap queues that is submitted before got
+ * processed successfully before returning.
+ */
r = amdgpu_ring_test_helper(kiq_ring);
if (r)
DRM_ERROR("KCQ disable failed\n");
@@ -4876,9 +4795,9 @@ static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
return r;
}
-static bool gfx_v8_0_is_idle(void *handle)
+static bool gfx_v8_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
|| RREG32(mmGRBM_STATUS2) != 0x8)
@@ -4911,13 +4830,13 @@ static int gfx_v8_0_wait_for_rlc_idle(void *handle)
return -ETIMEDOUT;
}
-static int gfx_v8_0_wait_for_idle(void *handle)
+static int gfx_v8_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (gfx_v8_0_is_idle(handle))
+ if (gfx_v8_0_is_idle(ip_block))
return 0;
udelay(1);
@@ -4925,9 +4844,9 @@ static int gfx_v8_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int gfx_v8_0_hw_fini(void *handle)
+static int gfx_v8_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
@@ -4943,8 +4862,9 @@ static int gfx_v8_0_hw_fini(void *handle)
pr_debug("For SRIOV client, shouldn't do anything.\n");
return 0;
}
- amdgpu_gfx_rlc_enter_safe_mode(adev);
- if (!gfx_v8_0_wait_for_idle(adev))
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ if (!gfx_v8_0_wait_for_idle(ip_block))
gfx_v8_0_cp_enable(adev, false);
else
pr_err("cp is busy, skip halt cp\n");
@@ -4952,24 +4872,24 @@ static int gfx_v8_0_hw_fini(void *handle)
adev->gfx.rlc.funcs->stop(adev);
else
pr_err("rlc is busy, skip halt rlc\n");
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
return 0;
}
-static int gfx_v8_0_suspend(void *handle)
+static int gfx_v8_0_suspend(struct amdgpu_ip_block *ip_block)
{
- return gfx_v8_0_hw_fini(handle);
+ return gfx_v8_0_hw_fini(ip_block);
}
-static int gfx_v8_0_resume(void *handle)
+static int gfx_v8_0_resume(struct amdgpu_ip_block *ip_block)
{
- return gfx_v8_0_hw_init(handle);
+ return gfx_v8_0_hw_init(ip_block);
}
-static bool gfx_v8_0_check_soft_reset(void *handle)
+static bool gfx_v8_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
u32 tmp;
@@ -5029,9 +4949,9 @@ static bool gfx_v8_0_check_soft_reset(void *handle)
}
}
-static int gfx_v8_0_pre_soft_reset(void *handle)
+static int gfx_v8_0_pre_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 grbm_soft_reset = 0;
if ((!adev->gfx.grbm_soft_reset) &&
@@ -5070,9 +4990,9 @@ static int gfx_v8_0_pre_soft_reset(void *handle)
return 0;
}
-static int gfx_v8_0_soft_reset(void *handle)
+static int gfx_v8_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
u32 tmp;
@@ -5132,9 +5052,9 @@ static int gfx_v8_0_soft_reset(void *handle)
return 0;
}
-static int gfx_v8_0_post_soft_reset(void *handle)
+static int gfx_v8_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 grbm_soft_reset = 0;
if ((!adev->gfx.grbm_soft_reset) &&
@@ -5257,7 +5177,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
*(out++) = RREG32(mmSQ_IND_DATA);
}
-static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
{
/* type 0 wave data */
dst[(*no_fields)++] = 0;
@@ -5282,7 +5202,7 @@ static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u
dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
}
-static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
+static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t start,
uint32_t size, uint32_t *dst)
{
@@ -5300,10 +5220,11 @@ static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
.select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
};
-static int gfx_v8_0_early_init(void *handle)
+static int gfx_v8_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ adev->gfx.xcc_mask = 1;
adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
AMDGPU_MAX_COMPUTE_RINGS);
@@ -5316,9 +5237,9 @@ static int gfx_v8_0_early_init(void *handle)
return 0;
}
-static int gfx_v8_0_late_init(void *handle)
+static int gfx_v8_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
@@ -5358,7 +5279,7 @@ static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *ade
(adev->asic_type == CHIP_POLARIS12) ||
(adev->asic_type == CHIP_VEGAM))
/* Send msg to SMU via Powerplay */
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable, 0);
WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
}
@@ -5404,10 +5325,10 @@ static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
}
}
-static int gfx_v8_0_set_powergating_state(void *handle,
+static int gfx_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
if (amdgpu_sriov_vf(adev))
@@ -5417,7 +5338,7 @@ static int gfx_v8_0_set_powergating_state(void *handle,
AMD_PG_SUPPORT_RLC_SMU_HS |
AMD_PG_SUPPORT_CP |
AMD_PG_SUPPORT_GFX_DMG))
- amdgpu_gfx_rlc_enter_safe_mode(adev);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
switch (adev->asic_type) {
case CHIP_CARRIZO:
case CHIP_STONEY:
@@ -5471,13 +5392,13 @@ static int gfx_v8_0_set_powergating_state(void *handle,
AMD_PG_SUPPORT_RLC_SMU_HS |
AMD_PG_SUPPORT_CP |
AMD_PG_SUPPORT_GFX_DMG))
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
return 0;
}
-static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
+static void gfx_v8_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -5522,7 +5443,7 @@ static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
{
uint32_t data;
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
@@ -5576,7 +5497,7 @@ static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
return true;
}
-static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
+static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
uint32_t data;
unsigned i;
@@ -5603,7 +5524,7 @@ static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
}
}
-static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
+static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
uint32_t data;
unsigned i;
@@ -5620,7 +5541,7 @@ static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
}
}
-static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
+static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
{
u32 data;
@@ -5662,8 +5583,6 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
{
uint32_t temp, data;
- amdgpu_gfx_rlc_enter_safe_mode(adev);
-
/* It is disabled by HW by default */
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
@@ -5757,8 +5676,6 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
gfx_v8_0_wait_for_rlc_serdes(adev);
}
-
- amdgpu_gfx_rlc_exit_safe_mode(adev);
}
static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
@@ -5768,8 +5685,6 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
- amdgpu_gfx_rlc_enter_safe_mode(adev);
-
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
@@ -5831,7 +5746,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
gfx_v8_0_wait_for_rlc_serdes(adev);
- /* write cmd to Set CGCG Overrride */
+ /* write cmd to Set CGCG Override */
gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
@@ -5850,12 +5765,12 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
}
gfx_v8_0_wait_for_rlc_serdes(adev);
-
- amdgpu_gfx_rlc_exit_safe_mode(adev);
}
static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
bool enable)
{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
if (enable) {
/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
* === MGCG + MGLS + TS(CG/LS) ===
@@ -5869,6 +5784,8 @@ static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
}
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
return 0;
}
@@ -6019,10 +5936,10 @@ static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
return 0;
}
-static int gfx_v8_0_set_clockgating_state(void *handle,
+static int gfx_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -6051,7 +5968,7 @@ static int gfx_v8_0_set_clockgating_state(void *handle,
static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
{
- return ring->adev->wb.wb[ring->rptr_offs];
+ return *ring->rptr_cpu_addr;
}
static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
@@ -6060,7 +5977,7 @@ static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
if (ring->use_doorbell)
/* XXX check if swapping is necessary on BE */
- return ring->adev->wb.wb[ring->wptr_offs];
+ return *ring->wptr_cpu_addr;
else
return RREG32(mmCP_RB0_WPTR);
}
@@ -6071,7 +5988,7 @@ static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
@@ -6194,6 +6111,7 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
{
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+ bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
/* Workaround for cache flush problems. First send a dummy EOP
* event down the pipe with seq one below.
@@ -6217,7 +6135,8 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
EOP_TC_ACTION_EN |
EOP_TC_WB_ACTION_EN |
EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
- EVENT_INDEX(5)));
+ EVENT_INDEX(5) |
+ (exec ? EOP_EXEC : 0)));
amdgpu_ring_write(ring, addr & 0xfffffffc);
amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
@@ -6271,7 +6190,7 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
{
- return ring->adev->wb.wb[ring->wptr_offs];
+ return *ring->wptr_cpu_addr;
}
static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
@@ -6279,7 +6198,7 @@ static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
/* XXX check if swapping is necessary on BE */
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
}
@@ -6368,33 +6287,22 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
amdgpu_ring_write(ring, 0);
}
-static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
{
unsigned ret;
amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
- amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, 0);
ret = ring->wptr & ring->buf_mask;
- amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+ /* patch dummy value later */
+ amdgpu_ring_write(ring, 0);
return ret;
}
-static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
-{
- unsigned cur;
-
- BUG_ON(offset > ring->buf_mask);
- BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
- cur = (ring->wptr & ring->buf_mask) - 1;
- if (likely(cur > offset))
- ring->ring[offset] = cur - offset;
- else
- ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
-}
-
static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
uint32_t reg_val_offs)
{
@@ -6764,11 +6672,11 @@ static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data,
*/
if (from_wq) {
mutex_lock(&adev->grbm_idx_mutex);
- gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
+ gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id, 0);
sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
}
@@ -6974,7 +6882,6 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
.emit_switch_buffer = gfx_v8_ring_emit_sb,
.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
- .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
.emit_wreg = gfx_v8_0_ring_emit_wreg,
.soft_recovery = gfx_v8_0_ring_soft_recovery,
.emit_mem_sync = gfx_v8_0_emit_mem_sync,
@@ -7010,6 +6917,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v8_0_ring_emit_wreg,
+ .soft_recovery = gfx_v8_0_ring_soft_recovery,
.emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
.emit_wave_limit = gfx_v8_0_emit_wave_limit,
};
@@ -7036,13 +6944,14 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_rreg = gfx_v8_0_ring_emit_rreg,
.emit_wreg = gfx_v8_0_ring_emit_wreg,
+ .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
};
static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
{
int i;
- adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
+ adev->gfx.kiq[0].ring.funcs = &gfx_v8_0_ring_funcs_kiq;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
@@ -7157,12 +7066,12 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
mask = 1;
ao_bitmap = 0;
counter = 0;
- gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
if (i < 4 && j < 2)
gfx_v8_0_set_user_cu_inactive_bitmap(
adev, disable_masks[i * 2 + j]);
bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
- cu_info->bitmap[i][j] = bitmap;
+ cu_info->bitmap[0][i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
if (bitmap & mask) {
@@ -7178,7 +7087,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
}
}
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index edb3e3b08eed..0148d7ff34d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -47,8 +47,10 @@
#include "amdgpu_ras.h"
+#include "amdgpu_ring_mux.h"
#include "gfx_v9_4.h"
#include "gfx_v9_0.h"
+#include "gfx_v9_0_cleaner_shader.h"
#include "gfx_v9_4_2.h"
#include "asic_reg/pwr/pwr_10_0_offset.h"
@@ -56,6 +58,7 @@
#include "asic_reg/gc/gc_9_0_default.h"
#define GFX9_NUM_GFX_RINGS 1
+#define GFX9_NUM_SW_GFX_RINGS 2
#define GFX9_MEC_HPD_SIZE 4096
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
@@ -126,6 +129,8 @@ MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
#define mmTCP_CHAN_STEER_0_ARCT 0x0b03
#define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0
@@ -145,6 +150,162 @@ MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
#define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026
#define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1
+static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSQ_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
+ /* SE status registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3),
+ /* packet headers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP)
+};
+
+static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = {
+ /* compute queue registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ACTIVE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP)
+};
+
enum ta_ras_gfx_subblock {
/*CPC*/
TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
@@ -739,75 +900,6 @@ static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
};
-static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag)
-{
- static void *scratch_reg0;
- static void *scratch_reg1;
- static void *scratch_reg2;
- static void *scratch_reg3;
- static void *spare_int;
- static uint32_t grbm_cntl;
- static uint32_t grbm_idx;
-
- scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
- scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
- scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
- scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
- spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
-
- grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
- grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
-
- if (amdgpu_sriov_runtime(adev)) {
- pr_err("shouldn't call rlcg write register during runtime\n");
- return;
- }
-
- if (offset == grbm_cntl || offset == grbm_idx) {
- if (offset == grbm_cntl)
- writel(v, scratch_reg2);
- else if (offset == grbm_idx)
- writel(v, scratch_reg3);
-
- writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
- } else {
- uint32_t i = 0;
- uint32_t retries = 50000;
-
- writel(v, scratch_reg0);
- writel(offset | 0x80000000, scratch_reg1);
- writel(1, spare_int);
- for (i = 0; i < retries; i++) {
- u32 tmp;
-
- tmp = readl(scratch_reg1);
- if (!(tmp & 0x80000000))
- break;
-
- udelay(10);
- }
- if (i >= retries)
- pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
- }
-
-}
-
-static void gfx_v9_0_sriov_wreg(struct amdgpu_device *adev, u32 offset,
- u32 v, u32 acc_flags, u32 hwip)
-{
- if ((acc_flags & AMDGPU_REGS_RLC) &&
- amdgpu_sriov_fullaccess(adev)) {
- gfx_v9_0_rlcg_w(adev, offset, v, acc_flags);
-
- return;
- }
-
- if (acc_flags & AMDGPU_REGS_NO_KIQ)
- WREG32_NO_KIQ(offset, v);
- else
- WREG32(offset, v);
-}
-
#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
#define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
@@ -820,17 +912,27 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds);
static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
-static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
+static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status);
static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
- void *inject_if);
+ void *inject_if, uint32_t instance_mask);
static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
+static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
+ unsigned int vmid);
+static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
+static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
uint64_t queue_mask)
{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ u64 shader_mc_addr;
+
+ /* Cleaner shader MC address */
+ shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
+
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
amdgpu_ring_write(kiq_ring,
PACKET3_SET_RESOURCES_VMID_MASK(0) |
@@ -840,8 +942,8 @@ static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
lower_32_bits(queue_mask)); /* queue mask lo */
amdgpu_ring_write(kiq_ring,
upper_32_bits(queue_mask)); /* queue mask hi */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
+ amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
amdgpu_ring_write(kiq_ring, 0); /* oac mask */
amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
}
@@ -849,9 +951,8 @@ static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
struct amdgpu_ring *ring)
{
- struct amdgpu_device *adev = kiq_ring->adev;
uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
- uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ uint64_t wptr_addr = ring->wptr_gpu_addr;
uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
@@ -894,9 +995,10 @@ static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
if (action == PREEMPT_QUEUES_NO_UNMAP) {
- amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
- amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
- amdgpu_ring_write(kiq_ring, seq);
+ amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+
} else {
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
@@ -938,12 +1040,47 @@ static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
}
+
+static void gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type,
+ uint32_t me_id, uint32_t pipe_id, uint32_t queue_id,
+ uint32_t xcc_id, uint32_t vmid)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ unsigned i;
+
+ /* enter save mode */
+ amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, 0);
+
+ if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2);
+ WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1);
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev, "fail to wait on hqd deactive\n");
+ } else {
+ dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type);
+ }
+
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ /* exit safe mode */
+ amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
+}
+
static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
.kiq_set_resources = gfx_v9_0_kiq_set_resources,
.kiq_map_queues = gfx_v9_0_kiq_map_queues,
.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
.kiq_query_status = gfx_v9_0_kiq_query_status,
.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
+ .kiq_reset_hw_queue = gfx_v9_0_kiq_reset_hw_queue,
.set_resources_size = 8,
.map_queues_size = 7,
.unmap_queues_size = 6,
@@ -953,12 +1090,12 @@ static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
{
- adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
+ adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs;
}
static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
soc15_program_register_sequence(adev,
golden_settings_gc_9_0,
@@ -1014,19 +1151,12 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
break;
}
- if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
- (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2)))
+ if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)))
soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
}
-static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
-{
- adev->gfx.scratch.num_reg = 8;
- adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
- adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
-}
-
static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
bool wc, uint32_t reg, uint32_t val)
{
@@ -1064,22 +1194,18 @@ static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t scratch;
+ uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
uint32_t tmp = 0;
unsigned i;
int r;
- r = amdgpu_gfx_scratch_get(adev, &scratch);
- if (r)
- return r;
-
WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r)
- goto error_free_scratch;
+ return r;
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
- amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
+ amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
@@ -1092,9 +1218,6 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
-
-error_free_scratch:
- amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@@ -1116,8 +1239,8 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
gpu_addr = adev->wb.gpu_addr + (index * 4);
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 16,
- AMDGPU_IB_POOL_DIRECT, &ib);
+
+ r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
goto err1;
@@ -1147,7 +1270,7 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err2:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
amdgpu_device_wb_free(adev, index);
@@ -1157,56 +1280,30 @@ err1:
static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
{
- release_firmware(adev->gfx.pfp_fw);
- adev->gfx.pfp_fw = NULL;
- release_firmware(adev->gfx.me_fw);
- adev->gfx.me_fw = NULL;
- release_firmware(adev->gfx.ce_fw);
- adev->gfx.ce_fw = NULL;
- release_firmware(adev->gfx.rlc_fw);
- adev->gfx.rlc_fw = NULL;
- release_firmware(adev->gfx.mec_fw);
- adev->gfx.mec_fw = NULL;
- release_firmware(adev->gfx.mec2_fw);
- adev->gfx.mec2_fw = NULL;
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ amdgpu_ucode_release(&adev->gfx.mec2_fw);
kfree(adev->gfx.rlc.register_list_format);
}
-static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
-{
- const struct rlc_firmware_header_v2_1 *rlc_hdr;
-
- rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
- adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
- adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
- adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
- adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
- adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
- adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
- adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
- adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
- adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
- adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
- adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
- adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
- adev->gfx.rlc.reg_list_format_direct_reg_list_length =
- le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
-}
-
static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
{
adev->gfx.me_fw_write_wait = false;
adev->gfx.mec_fw_write_wait = false;
- if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
+ if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) &&
((adev->gfx.mec_fw_version < 0x000001a5) ||
- (adev->gfx.mec_feature_version < 46) ||
- (adev->gfx.pfp_fw_version < 0x000000b7) ||
- (adev->gfx.pfp_feature_version < 46)))
+ (adev->gfx.mec_feature_version < 46) ||
+ (adev->gfx.pfp_fw_version < 0x000000b7) ||
+ (adev->gfx.pfp_feature_version < 46)))
DRM_WARN_ONCE("CP firmware version too old, please update!");
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
if ((adev->gfx.me_fw_version >= 0x0000009c) &&
(adev->gfx.me_feature_version >= 42) &&
@@ -1274,6 +1371,12 @@ static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
+ /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
+ { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
+ /* https://bbs.openkylin.top/t/topic/171497 */
+ { 0x1002, 0x15d8, 0x19e5, 0x3e14, 0xc2 },
+ /* HP 705G4 DM with R5 2400G */
+ { 0x1002, 0x15dd, 0x103c, 0x8464, 0xd6 },
{ 0, 0, 0, 0, 0 },
};
@@ -1304,7 +1407,7 @@ static bool is_raven_kicker(struct amdgpu_device *adev)
static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
{
- if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) &&
+ if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) &&
(adev->gfx.me_fw_version >= 0x000000a5) &&
(adev->gfx.me_feature_version >= 52))
return true;
@@ -1317,7 +1420,7 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
case IP_VERSION(9, 2, 1):
case IP_VERSION(9, 4, 0):
@@ -1349,95 +1452,45 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
}
static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
- const char *chip_name)
+ char *chip_name)
{
- char fw_name[30];
int err;
- struct amdgpu_firmware_info *info = NULL;
- const struct common_firmware_header *header = NULL;
- const struct gfx_firmware_header_v1_0 *cp_hdr;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
- err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_pfp.bin", chip_name);
if (err)
goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
- adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
- err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.me_fw);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_me.bin", chip_name);
if (err)
goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
- adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
- err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ce.bin", chip_name);
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->gfx.ce_fw);
- if (err)
- goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
- adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
- info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
- info->fw = adev->gfx.pfp_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
- info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
- info->fw = adev->gfx.me_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
- info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
- info->fw = adev->gfx.ce_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
- }
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
out:
if (err) {
- dev_err(adev->dev,
- "gfx9: Failed to load firmware \"%s\"\n",
- fw_name);
- release_firmware(adev->gfx.pfp_fw);
- adev->gfx.pfp_fw = NULL;
- release_firmware(adev->gfx.me_fw);
- adev->gfx.me_fw = NULL;
- release_firmware(adev->gfx.ce_fw);
- adev->gfx.ce_fw = NULL;
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
}
return err;
}
static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
- const char *chip_name)
+ char *chip_name)
{
- char fw_name[30];
int err;
- struct amdgpu_firmware_info *info = NULL;
- const struct common_firmware_header *header = NULL;
const struct rlc_firmware_header_v2_0 *rlc_hdr;
- unsigned int *tmp = NULL;
- unsigned int i = 0;
uint16_t version_major;
uint16_t version_minor;
uint32_t smu_version;
@@ -1453,271 +1506,114 @@ static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
if (!strcmp(chip_name, "picasso") &&
(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc_am4.bin", chip_name);
else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
(smu_version >= 0x41e2b))
/**
*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
*/
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_kicker_rlc.bin", chip_name);
else
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
- err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc.bin", chip_name);
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
- rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
- if (version_major == 2 && version_minor == 1)
- adev->gfx.rlc.is_rlc_v2_1 = true;
-
- adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
- adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
- adev->gfx.rlc.save_and_restore_offset =
- le32_to_cpu(rlc_hdr->save_and_restore_offset);
- adev->gfx.rlc.clear_state_descriptor_offset =
- le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
- adev->gfx.rlc.avail_scratch_ram_locations =
- le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
- adev->gfx.rlc.reg_restore_list_size =
- le32_to_cpu(rlc_hdr->reg_restore_list_size);
- adev->gfx.rlc.reg_list_format_start =
- le32_to_cpu(rlc_hdr->reg_list_format_start);
- adev->gfx.rlc.reg_list_format_separate_start =
- le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
- adev->gfx.rlc.starting_offsets_start =
- le32_to_cpu(rlc_hdr->starting_offsets_start);
- adev->gfx.rlc.reg_list_format_size_bytes =
- le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
- adev->gfx.rlc.reg_list_size_bytes =
- le32_to_cpu(rlc_hdr->reg_list_size_bytes);
- adev->gfx.rlc.register_list_format =
- kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
- adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
- if (!adev->gfx.rlc.register_list_format) {
- err = -ENOMEM;
- goto out;
- }
-
- tmp = (unsigned int *)((uintptr_t)rlc_hdr +
- le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
- for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
- adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
-
- adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
-
- tmp = (unsigned int *)((uintptr_t)rlc_hdr +
- le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
- for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
- adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
-
- if (adev->gfx.rlc.is_rlc_v2_1)
- gfx_v9_0_init_rlc_ext_microcode(adev);
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
- info->fw = adev->gfx.rlc_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- if (adev->gfx.rlc.is_rlc_v2_1 &&
- adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
- adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
- adev->gfx.rlc.save_restore_list_srm_size_bytes) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
- }
- }
-
+ err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
out:
- if (err) {
- dev_err(adev->dev,
- "gfx9: Failed to load firmware \"%s\"\n",
- fw_name);
- release_firmware(adev->gfx.rlc_fw);
- adev->gfx.rlc_fw = NULL;
- }
+ if (err)
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+
return err;
}
static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
{
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0))
return false;
return true;
}
static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
- const char *chip_name)
+ char *chip_name)
{
- char fw_name[30];
int err;
- struct amdgpu_firmware_info *info = NULL;
- const struct common_firmware_header *header = NULL;
- const struct gfx_firmware_header_v1_0 *cp_hdr;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
- err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.mec_fw);
+ if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sjt_mec.bin", chip_name);
+ else
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", chip_name);
if (err)
goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
- adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
- err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
+ if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sjt_mec2.bin", chip_name);
+ else
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec2.bin", chip_name);
if (!err) {
- err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
- if (err)
- goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)
- adev->gfx.mec2_fw->data;
- adev->gfx.mec2_fw_version =
- le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.mec2_feature_version =
- le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
} else {
err = 0;
- adev->gfx.mec2_fw = NULL;
+ amdgpu_ucode_release(&adev->gfx.mec2_fw);
}
} else {
adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
}
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
- info->fw = adev->gfx.mec_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
- info->fw = adev->gfx.mec_fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
-
- if (adev->gfx.mec2_fw) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
- info->fw = adev->gfx.mec2_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
-
- /* TODO: Determine if MEC2 JT FW loading can be removed
- for all GFX V9 asic and above */
- if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
- info->fw = adev->gfx.mec2_fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
- PAGE_SIZE);
- }
- }
- }
-
-out:
gfx_v9_0_check_if_need_gfxoff(adev);
gfx_v9_0_check_fw_write_wait(adev);
- if (err) {
- dev_err(adev->dev,
- "gfx9: Failed to load firmware \"%s\"\n",
- fw_name);
- release_firmware(adev->gfx.mec_fw);
- adev->gfx.mec_fw = NULL;
- release_firmware(adev->gfx.mec2_fw);
- adev->gfx.mec2_fw = NULL;
- }
+
+out:
+ if (err)
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
return err;
}
static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
{
- const char *chip_name;
+ char ucode_prefix[30];
int r;
DRM_DEBUG("\n");
-
- switch (adev->ip_versions[GC_HWIP][0]) {
- case IP_VERSION(9, 0, 1):
- chip_name = "vega10";
- break;
- case IP_VERSION(9, 2, 1):
- chip_name = "vega12";
- break;
- case IP_VERSION(9, 4, 0):
- chip_name = "vega20";
- break;
- case IP_VERSION(9, 2, 2):
- case IP_VERSION(9, 1, 0):
- if (adev->apu_flags & AMD_APU_IS_RAVEN2)
- chip_name = "raven2";
- else if (adev->apu_flags & AMD_APU_IS_PICASSO)
- chip_name = "picasso";
- else
- chip_name = "raven";
- break;
- case IP_VERSION(9, 4, 1):
- chip_name = "arcturus";
- break;
- case IP_VERSION(9, 3, 0):
- if (adev->apu_flags & AMD_APU_IS_RENOIR)
- chip_name = "renoir";
- else
- chip_name = "green_sardine";
- break;
- case IP_VERSION(9, 4, 2):
- chip_name = "aldebaran";
- break;
- default:
- BUG();
- }
+ amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
/* No CPG in Arcturus */
if (adev->gfx.num_gfx_rings) {
- r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
+ r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix);
if (r)
return r;
}
- r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
+ r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix);
if (r)
return r;
- r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
+ r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix);
if (r)
return r;
@@ -1752,45 +1648,18 @@ static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
return count;
}
-static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
- volatile u32 *buffer)
+static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
{
- u32 count = 0, i;
- const struct cs_section_def *sect = NULL;
- const struct cs_extent_def *ext = NULL;
+ u32 count = 0;
if (adev->gfx.rlc.cs_data == NULL)
return;
if (buffer == NULL)
return;
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- buffer[count++] = cpu_to_le32(0x80000000);
- buffer[count++] = cpu_to_le32(0x80000000);
-
- for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
- for (ext = sect->section; ext->extent != NULL; ++ext) {
- if (sect->id == SECT_CONTEXT) {
- buffer[count++] =
- cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
- buffer[count++] = cpu_to_le32(ext->reg_index -
- PACKET3_SET_CONTEXT_REG_START);
- for (i = 0; i < ext->reg_count; i++)
- buffer[count++] = cpu_to_le32(ext->extent[i]);
- } else {
- return;
- }
- }
- }
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
- buffer[count++] = cpu_to_le32(0);
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
+ amdgpu_gfx_csb_preamble_end(buffer, count);
}
static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
@@ -1803,7 +1672,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
if (adev->flags & AMD_IS_APU)
always_on_cu_num = 4;
- else if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1))
+ else if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 2, 1))
always_on_cu_num = 8;
else
always_on_cu_num = 12;
@@ -1814,10 +1683,10 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
mask = 1;
cu_bitmap = 0;
counter = 0;
- gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
- if (cu_info->bitmap[i][j] & mask) {
+ if (cu_info->bitmap[0][i][j] & mask) {
if (counter == pg_always_on_cu_num)
WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
if (counter < always_on_cu_num)
@@ -1833,7 +1702,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
}
}
- gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
}
@@ -1855,7 +1724,7 @@ static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
- gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
/* set mmRLC_LB_PARAMS = 0x003F_1006 */
@@ -1904,7 +1773,7 @@ static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
- gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
/* set mmRLC_LB_PARAMS = 0x003F_1006 */
@@ -1948,6 +1817,21 @@ static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
return 4;
}
+static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
+{
+ struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
+
+ reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
+ reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
+ reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
+ reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
+ reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
+ reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
+ reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
+ reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
+ adev->gfx.rlc.rlcg_reg_access_supported = true;
+}
+
static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
{
const struct cs_section_def *cs_data;
@@ -1972,22 +1856,6 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
return r;
}
- switch (adev->ip_versions[GC_HWIP][0]) {
- case IP_VERSION(9, 2, 2):
- case IP_VERSION(9, 1, 0):
- gfx_v9_0_init_lbpw(adev);
- break;
- case IP_VERSION(9, 4, 0):
- gfx_v9_4_init_lbpw(adev);
- break;
- default:
- break;
- }
-
- /* init spm vmid with 0xf */
- if (adev->gfx.rlc.funcs->update_spm_vmid)
- adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
-
return 0;
}
@@ -2008,14 +1876,15 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
const struct gfx_firmware_header_v1_0 *mec_hdr;
- bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+ bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
/* take ownership of the relevant compute queues */
amdgpu_gfx_compute_queue_acquire(adev);
mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
if (mec_hpd_size) {
r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
&adev->gfx.mec.hpd_eop_obj,
&adev->gfx.mec.hpd_eop_gpu_addr,
(void **)&hpd);
@@ -2082,7 +1951,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
}
-static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
{
/* type 1 wave data */
dst[(*no_fields)++] = 1;
@@ -2103,7 +1972,7 @@ static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u
dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
}
-static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
+static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t start,
uint32_t size, uint32_t *dst)
{
@@ -2112,7 +1981,7 @@ static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
}
-static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
+static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t thread,
uint32_t start, uint32_t size,
uint32_t *dst)
@@ -2123,9 +1992,9 @@ static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
}
static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
- u32 me, u32 pipe, u32 q, u32 vm)
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
{
- soc15_grbm_select(adev, me, pipe, q, vm);
+ soc15_grbm_select(adev, me, pipe, q, vm, 0);
}
static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
@@ -2137,12 +2006,16 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
};
-static const struct amdgpu_gfx_ras_funcs gfx_v9_0_ras_funcs = {
- .ras_late_init = amdgpu_gfx_ras_late_init,
- .ras_fini = amdgpu_gfx_ras_fini,
- .ras_error_inject = &gfx_v9_0_ras_error_inject,
- .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
- .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
+const struct amdgpu_ras_block_hw_ops gfx_v9_0_ras_ops = {
+ .ras_error_inject = &gfx_v9_0_ras_error_inject,
+ .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
+ .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
+};
+
+static struct amdgpu_gfx_ras gfx_v9_0_ras = {
+ .ras_block = {
+ .hw_ops = &gfx_v9_0_ras_ops,
+ },
};
static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
@@ -2150,9 +2023,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
u32 gb_addr_config;
int err;
- adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
-
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
@@ -2171,7 +2042,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
DRM_INFO("fix gfx.config for vega12\n");
break;
case IP_VERSION(9, 4, 0):
- adev->gfx.ras_funcs = &gfx_v9_0_ras_funcs;
+ adev->gfx.ras = &gfx_v9_0_ras;
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -2198,7 +2069,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
break;
case IP_VERSION(9, 4, 1):
- adev->gfx.ras_funcs = &gfx_v9_4_ras_funcs;
+ adev->gfx.ras = &gfx_v9_4_ras;
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -2219,7 +2090,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
gb_addr_config |= 0x22010042;
break;
case IP_VERSION(9, 4, 2):
- adev->gfx.ras_funcs = &gfx_v9_4_2_ras_funcs;
+ adev->gfx.ras = &gfx_v9_4_2_ras;
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -2297,26 +2168,56 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+ (ring_id * GFX9_MEC_HPD_SIZE);
+ ring->vm_hub = AMDGPU_GFXHUB(0);
sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ ring->pipe;
hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
- AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
/* type-2 packets are deprecated on MEC, use type-3 instead */
return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
hw_prio, NULL);
}
-static int gfx_v9_0_sw_init(void *handle)
+static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev)
+{
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
+ uint32_t *ptr;
+ uint32_t inst;
+
+ ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
+ adev->gfx.ip_dump_core = NULL;
+ } else {
+ adev->gfx.ip_dump_core = ptr;
+ }
+
+ /* Allocate memory for compute queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
+ inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
+ adev->gfx.ip_dump_compute_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_compute_queues = ptr;
+ }
+}
+
+static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int i, j, k, r, ring_id;
+ int xcc_id = 0;
struct amdgpu_ring *ring;
- struct amdgpu_kiq *kiq;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ unsigned int hw_prio;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
case IP_VERSION(9, 2, 1):
case IP_VERSION(9, 4, 0):
@@ -2332,6 +2233,43 @@ static int gfx_v9_0_sw_init(void *handle)
break;
}
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 3, 0):
+ adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 167 &&
+ adev->gfx.pfp_fw_version >= 196 &&
+ adev->gfx.mec_fw_version >= 474) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(9, 4, 2):
+ adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex);
+ if (adev->gfx.mec_fw_version >= 88) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ default:
+ adev->gfx.enable_cleaner_shader = false;
+ break;
+ }
+
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 8;
@@ -2340,6 +2278,13 @@ static int gfx_v9_0_sw_init(void *handle)
if (r)
return r;
+ /* Bad opcode Event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
+ GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR,
+ &adev->gfx.bad_op_irq);
+ if (r)
+ return r;
+
/* Privileged reg */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
&adev->gfx.priv_reg_irq);
@@ -2366,18 +2311,14 @@ static int gfx_v9_0_sw_init(void *handle)
adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
- gfx_v9_0_scratch_init(adev);
-
- r = gfx_v9_0_init_microcode(adev);
- if (r) {
- DRM_ERROR("Failed to load gfx firmware!\n");
- return r;
- }
-
- r = adev->gfx.rlc.funcs->init(adev);
- if (r) {
- DRM_ERROR("Failed to init rlc BOs!\n");
- return r;
+ if (adev->gfx.rlc.funcs) {
+ if (adev->gfx.rlc.funcs->init) {
+ r = adev->gfx.rlc.funcs->init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to init rlc BOs!\n");
+ return r;
+ }
+ }
}
r = gfx_v9_0_mec_init(adev);
@@ -2396,6 +2337,10 @@ static int gfx_v9_0_sw_init(void *handle)
sprintf(ring->name, "gfx_%d", i);
ring->use_doorbell = true;
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+
+ /* disable scheduler on the real ring */
+ ring->no_scheduler = adev->gfx.mcbp;
+ ring->vm_hub = AMDGPU_GFXHUB(0);
r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
AMDGPU_RING_PRIO_DEFAULT, NULL);
@@ -2403,12 +2348,49 @@ static int gfx_v9_0_sw_init(void *handle)
return r;
}
+ /* set up the software rings */
+ if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
+ for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
+ ring = &adev->gfx.sw_gfx_ring[i];
+ ring->ring_obj = NULL;
+ sprintf(ring->name, amdgpu_sw_ring_name(i));
+ ring->use_doorbell = true;
+ ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+ ring->is_sw_ring = true;
+ hw_prio = amdgpu_sw_ring_priority(i);
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
+ AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
+ NULL);
+ if (r)
+ return r;
+ ring->wptr = 0;
+ }
+
+ /* init the muxer and add software rings */
+ r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
+ GFX9_NUM_SW_GFX_RINGS);
+ if (r) {
+ DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
+ return r;
+ }
+ for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
+ r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
+ &adev->gfx.sw_gfx_ring[i]);
+ if (r) {
+ DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
+ return r;
+ }
+ }
+ }
+
/* set up the compute queues - allocate horizontally across pipes */
ring_id = 0;
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
- if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
+ k, j))
continue;
r = gfx_v9_0_compute_ring_init(adev,
@@ -2422,19 +2404,26 @@ static int gfx_v9_0_sw_init(void *handle)
}
}
- r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
+ /* TODO: Add queue reset mask when FW fully supports it */
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+ if (!amdgpu_sriov_vf(adev) && !adev->debug_disable_gpu_ring_reset)
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+
+ r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
if (r) {
DRM_ERROR("Failed to init KIQ BOs!\n");
return r;
}
- kiq = &adev->gfx.kiq;
- r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
+ r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
if (r)
return r;
/* create MQD for all compute queues as wel as KIQ for SRIOV case */
- r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
+ r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0);
if (r)
return r;
@@ -2444,27 +2433,42 @@ static int gfx_v9_0_sw_init(void *handle)
if (r)
return r;
+ if (amdgpu_gfx_ras_sw_init(adev)) {
+ dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
+ return -EINVAL;
+ }
+
+ gfx_v9_0_alloc_ip_dump(adev);
+
+ r = amdgpu_gfx_sysfs_init(adev);
+ if (r)
+ return r;
+
return 0;
}
-static int gfx_v9_0_sw_fini(void *handle)
+static int gfx_v9_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- if (adev->gfx.ras_funcs &&
- adev->gfx.ras_funcs->ras_fini)
- adev->gfx.ras_funcs->ras_fini(adev);
+ if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
+ for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
+ amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
+ amdgpu_ring_mux_fini(&adev->gfx.muxer);
+ }
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
- amdgpu_gfx_mqd_sw_fini(adev);
- amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
- amdgpu_gfx_kiq_fini(adev);
+ amdgpu_gfx_mqd_sw_fini(adev, 0);
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
+ amdgpu_gfx_kiq_fini(adev, 0);
+
+ amdgpu_gfx_cleaner_shader_sw_fini(adev);
gfx_v9_0_mec_fini(adev);
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
@@ -2477,6 +2481,11 @@ static int gfx_v9_0_sw_fini(void *handle)
}
gfx_v9_0_free_microcode(adev);
+ amdgpu_gfx_sysfs_fini(adev);
+
+ kfree(adev->gfx.ip_dump_core);
+ kfree(adev->gfx.ip_dump_compute_queues);
+
return 0;
}
@@ -2487,7 +2496,7 @@ static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
}
void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
- u32 instance)
+ u32 instance, int xcc_id)
{
u32 data;
@@ -2536,19 +2545,42 @@ static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
data = gfx_v9_0_get_rb_active_bitmap(adev);
active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
rb_bitmap_width_per_sh);
}
}
- gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
adev->gfx.config.backend_enable_mask = active_rbs;
adev->gfx.config.num_rbs = hweight32(active_rbs);
}
+static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev,
+ uint32_t first_vmid,
+ uint32_t last_vmid)
+{
+ uint32_t data;
+ uint32_t trap_config_vmid_mask = 0;
+ int i;
+
+ /* Calculate trap config vmid mask */
+ for (i = first_vmid; i < last_vmid; i++)
+ trap_config_vmid_mask |= (1 << i);
+
+ data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
+ VMID_SEL, trap_config_vmid_mask);
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+ TRAP_EN, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
+}
+
#define DEFAULT_SH_MEM_BASES (0x6000)
static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
{
@@ -2570,16 +2602,16 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
mutex_lock(&adev->srbm_mutex);
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
- soc15_grbm_select(adev, 0, 0, 0, i);
+ soc15_grbm_select(adev, 0, 0, 0, i, 0);
/* CP and shaders */
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
}
- soc15_grbm_select(adev, 0, 0, 0, 0);
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
/* Initialize all compute VMIDs to have no GDS, GWS, or OA
- acccess. These should be enabled by FW for target VMIDs. */
+ access. These should be enabled by FW for target VMIDs. */
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
@@ -2610,13 +2642,16 @@ static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
{
uint32_t tmp;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 4, 1):
tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
- tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
- DISABLE_BARRIER_WAITCNT, 1);
+ tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
+ !READ_ONCE(adev->barrier_has_auto_waitcnt));
WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
break;
+ case IP_VERSION(9, 4, 2):
+ gfx_v9_4_2_init_sq(adev);
+ break;
default:
break;
}
@@ -2627,19 +2662,23 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
u32 tmp;
int i;
- WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+ if (!amdgpu_sriov_vf(adev) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) {
+ WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+ }
gfx_v9_0_tiling_mode_table_init(adev);
- gfx_v9_0_setup_rb(adev);
+ if (adev->gfx.num_gfx_rings)
+ gfx_v9_0_setup_rb(adev);
gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
/* XXX SH_MEM regs */
/* where to put LDS, scratch, GPUVM in FSA64 space */
mutex_lock(&adev->srbm_mutex);
- for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
- soc15_grbm_select(adev, 0, 0, 0, i);
+ for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
+ soc15_grbm_select(adev, 0, 0, 0, i, 0);
/* CP and shaders */
if (i == 0) {
tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
@@ -2661,7 +2700,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
}
}
- soc15_grbm_select(adev, 0, 0, 0, 0);
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
@@ -2678,15 +2717,15 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
for (k = 0; k < adev->usec_timeout; k++) {
if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
break;
udelay(1);
}
if (k == adev->usec_timeout) {
- gfx_v9_0_select_se_sh(adev, 0xffffffff,
- 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff,
+ 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
i, j);
@@ -2694,7 +2733,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
}
}
}
- gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
@@ -2720,7 +2759,7 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
- if(adev->gfx.num_gfx_rings)
+ if (adev->gfx.num_gfx_rings)
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
@@ -2946,7 +2985,7 @@ static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
- if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 3, 0))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 3, 0))
pwr_10_0_gfxip_control_over_cgpg(adev, true);
}
}
@@ -3058,7 +3097,8 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
* And it's needed by gfxoff feature.
*/
if (adev->gfx.rlc.is_rlc_v2_1) {
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1) ||
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(9, 2, 1) ||
(adev->apu_flags & AMD_APU_IS_RAVEN2))
gfx_v9_1_init_rlc_save_restore_list(adev);
gfx_v9_0_enable_save_restore_machine(adev);
@@ -3171,15 +3211,17 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
return r;
}
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 2, 2):
case IP_VERSION(9, 1, 0):
+ gfx_v9_0_init_lbpw(adev);
if (amdgpu_lbpw == 0)
gfx_v9_0_enable_lbpw(adev, false);
else
gfx_v9_0_enable_lbpw(adev, true);
break;
case IP_VERSION(9, 4, 0):
+ gfx_v9_4_init_lbpw(adev);
if (amdgpu_lbpw > 0)
gfx_v9_0_enable_lbpw(adev, true);
else
@@ -3189,6 +3231,8 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
break;
}
+ gfx_v9_0_update_spm_vmid_internal(adev, 0xf);
+
adev->gfx.rlc.funcs->start(adev);
return 0;
@@ -3198,6 +3242,15 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
{
u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_INVALIDATE_ICACHE, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_INVALIDATE_ICACHE, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_INVALIDATE_ICACHE, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE0_RESET, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE1_RESET, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, enable ? 0 : 1);
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
@@ -3275,6 +3328,14 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
gfx_v9_0_cp_gfx_enable(adev, true);
+ /* Now only limit the quirk on the APU gfx9 series and already
+ * confirmed that the APU gfx10/gfx11 needn't such update.
+ */
+ if (adev->flags & AMD_IS_APU &&
+ adev->in_s3 && !pm_resume_via_firmware()) {
+ DRM_INFO("Will skip the CSB packet resubmit\n");
+ return 0;
+ }
r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
@@ -3352,12 +3413,12 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
- /* set the wb address wether it's enabled or not */
- rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ /* set the wb address whether it's enabled or not */
+ rptr_addr = ring->rptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wptr_gpu_addr = ring->wptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
@@ -3389,7 +3450,6 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
/* start the ring */
gfx_v9_0_cp_gfx_start(adev);
- ring->sched.ready = true;
return 0;
}
@@ -3400,8 +3460,16 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
} else {
WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
- (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
- adev->gfx.kiq.ring.sched.ready = false;
+ (CP_MEC_CNTL__MEC_INVALIDATE_ICACHE_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE0_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE1_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE2_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE3_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME2_PIPE0_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME2_PIPE1_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_HALT_MASK |
+ CP_MEC_CNTL__MEC_ME2_HALT_MASK));
+ adev->gfx.kiq[0].ring.sched.ready = false;
}
udelay(50);
}
@@ -3458,9 +3526,7 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp | 0x80);
}
static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
@@ -3558,7 +3624,7 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
(order_base_2(ring->ring_size / 4) - 1));
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
- ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
+ (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
#ifdef __BIG_ENDIAN
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
#endif
@@ -3569,33 +3635,16 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
mqd->cp_hqd_pq_control = tmp;
/* set the wb address whether it's enabled or not */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ wb_gpu_addr = ring->rptr_gpu_addr;
mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_rptr_report_addr_hi =
upper_32_bits(wb_gpu_addr) & 0xffff;
/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wb_gpu_addr = ring->wptr_gpu_addr;
mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
- tmp = 0;
- /* enable the doorbell if requested */
- if (ring->use_doorbell) {
- tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_OFFSET, ring->doorbell_index);
-
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_EN, 1);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_SOURCE, 0);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_HIT, 0);
- }
-
- mqd->cp_hqd_pq_doorbell_control = tmp;
-
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
ring->wptr = 0;
mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
@@ -3782,7 +3831,6 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
struct v9_mqd *mqd = ring->mqd_ptr;
- int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
struct v9_mqd *tmp_mqd;
gfx_v9_0_kiq_setting(ring);
@@ -3792,40 +3840,42 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
* driver need to re-init the mqd.
* check mqd->cp_hqd_pq_control since this value should not be 0
*/
- tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
+ tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup;
if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
/* for GPU_RESET case , reset MQD to a clean status */
- if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation));
/* reset ring buffer */
ring->wptr = 0;
amdgpu_ring_clear_ring(ring);
mutex_lock(&adev->srbm_mutex);
- soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
gfx_v9_0_kiq_init_register(ring);
- soc15_grbm_select(adev, 0, 0, 0, 0);
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
} else {
memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
+ if (amdgpu_sriov_vf(adev) && adev->in_suspend)
+ amdgpu_ring_clear_ring(ring);
mutex_lock(&adev->srbm_mutex);
- soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
gfx_v9_0_mqd_init(ring);
gfx_v9_0_kiq_init_register(ring);
- soc15_grbm_select(adev, 0, 0, 0, 0);
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation));
}
return 0;
}
-static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
+static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore)
{
struct amdgpu_device *adev = ring->adev;
struct v9_mqd *mqd = ring->mqd_ptr;
@@ -3837,29 +3887,26 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
*/
tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
- if (!tmp_mqd->cp_hqd_pq_control ||
- (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
+ if (!restore && (!tmp_mqd->cp_hqd_pq_control ||
+ (!amdgpu_in_reset(adev) && !adev->in_suspend))) {
memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
mutex_lock(&adev->srbm_mutex);
- soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
gfx_v9_0_mqd_init(ring);
- soc15_grbm_select(adev, 0, 0, 0, 0);
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.mec.mqd_backup[mqd_idx])
memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
- } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
- /* reset MQD to a clean status */
+ } else {
+ /* restore MQD to a clean status */
if (adev->gfx.mec.mqd_backup[mqd_idx])
memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
-
/* reset ring buffer */
ring->wptr = 0;
- atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
- amdgpu_ring_clear_ring(ring);
- } else {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
amdgpu_ring_clear_ring(ring);
}
@@ -3868,54 +3915,23 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring;
- int r;
-
- ring = &adev->gfx.kiq.ring;
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (unlikely(r != 0))
- return r;
-
- gfx_v9_0_kiq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- amdgpu_bo_unreserve(ring->mqd_obj);
- ring->sched.ready = true;
+ gfx_v9_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
return 0;
}
static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = NULL;
- int r = 0, i;
+ int i, r;
gfx_v9_0_cp_compute_enable(adev, true);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- goto done;
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v9_0_kcq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v9_0_kcq_init_queue(&adev->gfx.compute_ring[i], false);
if (r)
- goto done;
+ return r;
}
- r = amdgpu_gfx_enable_kcq(adev);
-done:
- return r;
+ return amdgpu_gfx_enable_kcq(adev, 0);
}
static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
@@ -3939,6 +3955,10 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
return r;
}
+ if (adev->gfx.num_gfx_rings)
+ gfx_v9_0_cp_gfx_enable(adev, false);
+ gfx_v9_0_cp_compute_enable(adev, false);
+
r = gfx_v9_0_kiq_resume(adev);
if (r)
return r;
@@ -3974,8 +3994,8 @@ static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
{
u32 tmp;
- if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1) &&
- adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1) &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2))
return;
tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
@@ -3995,10 +4015,13 @@ static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
gfx_v9_0_cp_compute_enable(adev, enable);
}
-static int gfx_v9_0_hw_init(void *handle)
+static int gfx_v9_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
+ adev->gfx.cleaner_shader_ptr);
if (!amdgpu_sriov_vf(adev))
gfx_v9_0_init_golden_registers(adev);
@@ -4015,24 +4038,27 @@ static int gfx_v9_0_hw_init(void *handle)
if (r)
return r;
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) &&
+ !amdgpu_sriov_vf(adev))
gfx_v9_4_2_set_power_brake_sequence(adev);
return r;
}
-static int gfx_v9_0_hw_fini(void *handle)
+static int gfx_v9_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
/* DF freeze and kcq disable will fail */
if (!amdgpu_ras_intr_triggered())
/* disable KCQ to avoid CPC touch memory not valid anymore */
- amdgpu_gfx_disable_kcq(adev);
+ amdgpu_gfx_disable_kcq(adev, 0);
if (amdgpu_sriov_vf(adev)) {
gfx_v9_0_cp_gfx_enable(adev, false);
@@ -4050,11 +4076,11 @@ static int gfx_v9_0_hw_fini(void *handle)
*/
if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
mutex_lock(&adev->srbm_mutex);
- soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
- adev->gfx.kiq.ring.pipe,
- adev->gfx.kiq.ring.queue, 0);
- gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
- soc15_grbm_select(adev, 0, 0, 0, 0);
+ soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me,
+ adev->gfx.kiq[0].ring.pipe,
+ adev->gfx.kiq[0].ring.queue, 0, 0);
+ gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring);
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
@@ -4062,7 +4088,7 @@ static int gfx_v9_0_hw_fini(void *handle)
/* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
- (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) {
+ (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) {
dev_dbg(adev->dev, "Skipping RLC halt\n");
return 0;
}
@@ -4071,19 +4097,19 @@ static int gfx_v9_0_hw_fini(void *handle)
return 0;
}
-static int gfx_v9_0_suspend(void *handle)
+static int gfx_v9_0_suspend(struct amdgpu_ip_block *ip_block)
{
- return gfx_v9_0_hw_fini(handle);
+ return gfx_v9_0_hw_fini(ip_block);
}
-static int gfx_v9_0_resume(void *handle)
+static int gfx_v9_0_resume(struct amdgpu_ip_block *ip_block)
{
- return gfx_v9_0_hw_init(handle);
+ return gfx_v9_0_hw_init(ip_block);
}
-static bool gfx_v9_0_is_idle(void *handle)
+static bool gfx_v9_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
GRBM_STATUS, GUI_ACTIVE))
@@ -4092,24 +4118,24 @@ static bool gfx_v9_0_is_idle(void *handle)
return true;
}
-static int gfx_v9_0_wait_for_idle(void *handle)
+static int gfx_v9_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (gfx_v9_0_is_idle(handle))
+ if (gfx_v9_0_is_idle(ip_block))
return 0;
udelay(1);
}
return -ETIMEDOUT;
}
-static int gfx_v9_0_soft_reset(void *handle)
+static int gfx_v9_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 grbm_soft_reset = 0;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* GRBM_STATUS */
tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
@@ -4148,19 +4174,17 @@ static int gfx_v9_0_soft_reset(void *handle)
/* Disable MEC parsing/prefetching */
gfx_v9_0_cp_compute_enable(adev, false);
- if (grbm_soft_reset) {
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- tmp |= grbm_soft_reset;
- dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp |= grbm_soft_reset;
+ dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- udelay(50);
+ udelay(50);
- tmp &= ~grbm_soft_reset;
- WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- }
+ tmp &= ~grbm_soft_reset;
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
/* Wait a little for things to settle down */
udelay(50);
@@ -4174,7 +4198,7 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
unsigned long flags;
uint32_t seq, reg_val_offs = 0;
uint64_t value = 0;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
struct amdgpu_ring *ring = &kiq->ring;
BUG_ON(!ring->funcs->emit_rreg);
@@ -4246,7 +4270,7 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
{
uint64_t clock, clock_lo, clock_hi, hi_check;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 3, 0):
preempt_disable();
clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
@@ -4265,7 +4289,9 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
default:
amdgpu_gfx_off_ctrl(adev, false);
mutex_lock(&adev->gfx.gpu_clock_mutex);
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(9, 0, 1) &&
+ amdgpu_sriov_runtime(adev)) {
clock = gfx_v9_0_kiq_read_clock(adev);
} else {
WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
@@ -4617,7 +4643,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
if (!ring->sched.ready)
return 0;
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) {
vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
@@ -4757,21 +4783,24 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
}
fail:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
return r;
}
-static int gfx_v9_0_early_init(void *handle)
+static int gfx_v9_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
adev->gfx.num_gfx_rings = 0;
else
adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
+ adev->gfx.xcc_mask = 1;
adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
AMDGPU_MAX_COMPUTE_RINGS);
gfx_v9_0_set_kiq_pm4_funcs(adev);
@@ -4780,12 +4809,15 @@ static int gfx_v9_0_early_init(void *handle)
gfx_v9_0_set_gds_init(adev);
gfx_v9_0_set_rlc_funcs(adev);
- return 0;
+ /* init rlcg reg access ctrl */
+ gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
+
+ return gfx_v9_0_init_microcode(adev);
}
-static int gfx_v9_0_ecc_late_init(void *handle)
+static int gfx_v9_0_ecc_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
/*
@@ -4802,7 +4834,7 @@ static int gfx_v9_0_ecc_late_init(void *handle)
}
/* requires IBs so do in late init after IB pool is initialized */
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
else
r = gfx_v9_0_do_edc_gpr_workarounds(adev);
@@ -4810,23 +4842,16 @@ static int gfx_v9_0_ecc_late_init(void *handle)
if (r)
return r;
- if (adev->gfx.ras_funcs &&
- adev->gfx.ras_funcs->ras_late_init) {
- r = adev->gfx.ras_funcs->ras_late_init(adev);
- if (r)
- return r;
- }
-
- if (adev->gfx.ras_funcs &&
- adev->gfx.ras_funcs->enable_watchdog_timer)
- adev->gfx.ras_funcs->enable_watchdog_timer(adev);
+ if (adev->gfx.ras &&
+ adev->gfx.ras->enable_watchdog_timer)
+ adev->gfx.ras->enable_watchdog_timer(adev);
return 0;
}
-static int gfx_v9_0_late_init(void *handle)
+static int gfx_v9_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
@@ -4837,10 +4862,21 @@ static int gfx_v9_0_late_init(void *handle)
if (r)
return r;
- r = gfx_v9_0_ecc_late_init(handle);
+ r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
if (r)
return r;
+ r = gfx_v9_0_ecc_late_init(ip_block);
+ if (r)
+ return r;
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
+ gfx_v9_4_2_debug_trap_config_init(adev,
+ adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
+ else
+ gfx_v9_0_debug_trap_config_init(adev,
+ adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
+
return 0;
}
@@ -4856,7 +4892,7 @@ static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
return true;
}
-static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
+static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
uint32_t data;
unsigned i;
@@ -4873,7 +4909,7 @@ static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
}
}
-static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
+static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
uint32_t data;
@@ -4884,7 +4920,7 @@ static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
bool enable)
{
- amdgpu_gfx_rlc_enter_safe_mode(adev);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
@@ -4896,7 +4932,7 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
}
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
@@ -4923,14 +4959,12 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
{
uint32_t data, def;
- amdgpu_gfx_rlc_enter_safe_mode(adev);
-
/* It is disabled by HW by default */
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
/* 1 - RLC_CGTT_MGCG_OVERRIDE */
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
- if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
@@ -4964,7 +4998,7 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
/* 1 - MGCG_OVERRIDE */
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
- if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
@@ -4989,8 +5023,6 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
}
}
-
- amdgpu_gfx_rlc_exit_safe_mode(adev);
}
static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
@@ -5001,8 +5033,6 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
if (!adev->gfx.num_gfx_rings)
return;
- amdgpu_gfx_rlc_enter_safe_mode(adev);
-
/* Enable 3D CGCG/CGLS */
if (enable) {
/* write cmd to clear cgcg/cgls ov */
@@ -5044,8 +5074,6 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
}
-
- amdgpu_gfx_rlc_exit_safe_mode(adev);
}
static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
@@ -5053,8 +5081,6 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
{
uint32_t def, data;
- amdgpu_gfx_rlc_enter_safe_mode(adev);
-
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
/* unset CGCG override */
@@ -5070,7 +5096,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
/* enable cgcg FSM(0x0000363F) */
def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1))
data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
else
@@ -5096,13 +5122,12 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
}
-
- amdgpu_gfx_rlc_exit_safe_mode(adev);
}
static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
bool enable)
{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
if (enable) {
/* CGCG/CGLS should be enabled after MGCG/MGLS
* === MGCG + MGLS ===
@@ -5122,20 +5147,20 @@ static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
/* === MGCG + MGLS === */
gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
}
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
return 0;
}
-static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
+static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
+ unsigned int vmid)
{
u32 reg, data;
- amdgpu_gfx_off_ctrl(adev, false);
-
reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
if (amdgpu_sriov_is_pp_one_vf(adev))
data = RREG32_NO_KIQ(reg);
else
- data = RREG32(reg);
+ data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
@@ -5144,6 +5169,13 @@ static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
else
WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
+}
+
+static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
+{
+ amdgpu_gfx_off_ctrl(adev, false);
+
+ gfx_v9_0_update_spm_vmid_internal(adev, vmid);
amdgpu_gfx_off_ctrl(adev, true);
}
@@ -5190,22 +5222,21 @@ static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
.reset = gfx_v9_0_rlc_reset,
.start = gfx_v9_0_rlc_start,
.update_spm_vmid = gfx_v9_0_update_spm_vmid,
- .sriov_wreg = gfx_v9_0_sriov_wreg,
.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
};
-static int gfx_v9_0_set_powergating_state(void *handle,
+static int gfx_v9_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 2, 2):
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 3, 0):
if (!enable)
- amdgpu_gfx_off_ctrl(adev, false);
+ amdgpu_gfx_off_ctrl_immediate(adev, false);
if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
@@ -5227,10 +5258,10 @@ static int gfx_v9_0_set_powergating_state(void *handle,
gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
if (enable)
- amdgpu_gfx_off_ctrl(adev, true);
+ amdgpu_gfx_off_ctrl_immediate(adev, true);
break;
case IP_VERSION(9, 2, 1):
- amdgpu_gfx_off_ctrl(adev, enable);
+ amdgpu_gfx_off_ctrl_immediate(adev, enable);
break;
default:
break;
@@ -5239,15 +5270,15 @@ static int gfx_v9_0_set_powergating_state(void *handle,
return 0;
}
-static int gfx_v9_0_set_clockgating_state(void *handle,
+static int gfx_v9_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
case IP_VERSION(9, 2, 1):
case IP_VERSION(9, 4, 0):
@@ -5265,9 +5296,9 @@ static int gfx_v9_0_set_clockgating_state(void *handle,
return 0;
}
-static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
+static void gfx_v9_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -5297,7 +5328,7 @@ static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
- if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) {
/* AMD_CG_SUPPORT_GFX_3D_CGCG */
data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
@@ -5311,7 +5342,7 @@ static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
{
- return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
+ return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
}
static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
@@ -5321,7 +5352,7 @@ static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
/* XXX check if swapping is necessary on BE */
if (ring->use_doorbell) {
- wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
} else {
wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
@@ -5336,7 +5367,7 @@ static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
- atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
@@ -5388,11 +5419,18 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
control |= ib->length_dw | (vmid << 24);
- if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
+ if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
control |= INDIRECT_BUFFER_PRE_ENB(1);
+ if (flags & AMDGPU_IB_PREEMPTED)
+ control |= INDIRECT_BUFFER_PRE_RESUME(1);
+
if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
- gfx_v9_0_ring_emit_de_meta(ring);
+ gfx_v9_0_ring_emit_de_meta(ring,
+ (!amdgpu_sriov_vf(ring->adev) &&
+ flags & AMDGPU_IB_PREEMPTED) ?
+ true : false,
+ job->gds_size > 0 && job->gds_base != 0);
}
amdgpu_ring_write(ring, header);
@@ -5403,9 +5441,70 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
#endif
lower_32_bits(ib->gpu_addr));
amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_ib_on_emit_cntl(ring);
amdgpu_ring_write(ring, control);
}
+static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring,
+ unsigned offset)
+{
+ u32 control = ring->ring[offset];
+
+ control |= INDIRECT_BUFFER_PRE_RESUME(1);
+ ring->ring[offset] = control;
+}
+
+static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
+ unsigned offset)
+{
+ struct amdgpu_device *adev = ring->adev;
+ void *ce_payload_cpu_addr;
+ uint64_t payload_offset, payload_size;
+
+ payload_size = sizeof(struct v9_ce_ib_state);
+
+ payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
+ ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
+
+ if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
+ memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size);
+ } else {
+ memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr,
+ (ring->buf_mask + 1 - offset) << 2);
+ payload_size -= (ring->buf_mask + 1 - offset) << 2;
+ memcpy((void *)&ring->ring[0],
+ ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
+ payload_size);
+ }
+}
+
+static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
+ unsigned offset)
+{
+ struct amdgpu_device *adev = ring->adev;
+ void *de_payload_cpu_addr;
+ uint64_t payload_offset, payload_size;
+
+ payload_size = sizeof(struct v9_de_ib_state);
+
+ payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
+ de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
+
+ ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
+ IB_COMPLETION_STATUS_PREEMPTED;
+
+ if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
+ memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size);
+ } else {
+ memcpy((void *)&ring->ring[offset], de_payload_cpu_addr,
+ (ring->buf_mask + 1 - offset) << 2);
+ payload_size -= (ring->buf_mask + 1 - offset) << 2;
+ memcpy((void *)&ring->ring[0],
+ de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
+ payload_size);
+ }
+}
+
static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
@@ -5447,17 +5546,24 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+ bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
+ uint32_t dw2 = 0;
/* RELEASE_MEM - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
- amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
- EOP_TC_NC_ACTION_EN) :
- (EOP_TCL1_ACTION_EN |
- EOP_TC_ACTION_EN |
- EOP_TC_WB_ACTION_EN |
- EOP_TC_MD_ACTION_EN)) |
- EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
- EVENT_INDEX(5)));
+
+ if (writeback) {
+ dw2 = EOP_TC_NC_ACTION_EN;
+ } else {
+ dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
+ EOP_TC_MD_ACTION_EN;
+ }
+ dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ EVENT_INDEX(5);
+ if (exec)
+ dw2 |= EOP_EXEC;
+
+ amdgpu_ring_write(ring, dw2);
amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
/*
@@ -5501,7 +5607,7 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
{
- return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
+ return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
}
static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
@@ -5510,7 +5616,7 @@ static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
/* XXX check if swapping is necessary on BE */
if (ring->use_doorbell)
- wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
else
BUG();
return wptr;
@@ -5522,7 +5628,7 @@ static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
/* XXX check if swapping is necessary on BE */
if (ring->use_doorbell) {
- atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
} else{
BUG(); /* only DOORBELL method supported on gfx9 now */
@@ -5562,35 +5668,116 @@ static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, 0);
}
-static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
+static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
{
+ struct amdgpu_device *adev = ring->adev;
struct v9_ce_ib_state ce_payload = {0};
- uint64_t csa_addr;
+ uint64_t offset, ce_payload_gpu_addr;
+ void *ce_payload_cpu_addr;
int cnt;
cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
- csa_addr = amdgpu_csa_vaddr(ring->adev);
+
+ offset = offsetof(struct v9_gfx_meta_data, ce_payload);
+ ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
WRITE_DATA_DST_SEL(8) |
WR_CONFIRM) |
WRITE_DATA_CACHE_POLICY(0));
- amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
- amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
- amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
+ amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
+
+ amdgpu_ring_ib_on_emit_ce(ring);
+
+ if (resume)
+ amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
+ sizeof(ce_payload) >> 2);
+ else
+ amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
+ sizeof(ce_payload) >> 2);
+}
+
+static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+ int i, r = 0;
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ unsigned long flags;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ /* assert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+ ring->trail_seq += 1;
+ amdgpu_ring_alloc(ring, 13);
+ gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
+ ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
+
+ /* assert IB preemption, emit the trailing fence */
+ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
+ ring->trail_fence_gpu_addr,
+ ring->trail_seq);
+
+ amdgpu_ring_commit(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ /* poll the trailing fence */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->trail_seq ==
+ le32_to_cpu(*ring->trail_fence_cpu_addr))
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ r = -EINVAL;
+ DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
+ }
+
+ /*reset the CP_VMID_PREEMPT after trailing fence*/
+ amdgpu_ring_emit_wreg(ring,
+ SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
+ 0x0);
+ amdgpu_ring_commit(ring);
+
+ /* deassert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, true);
+ return r;
}
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds)
{
+ struct amdgpu_device *adev = ring->adev;
struct v9_de_ib_state de_payload = {0};
- uint64_t csa_addr, gds_addr;
+ uint64_t offset, gds_addr, de_payload_gpu_addr;
+ void *de_payload_cpu_addr;
int cnt;
- csa_addr = amdgpu_csa_vaddr(ring->adev);
- gds_addr = csa_addr + 4096;
- de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
- de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
+ offset = offsetof(struct v9_gfx_meta_data, de_payload);
+ de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+
+ gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
+ AMDGPU_CSA_SIZE - adev->gds.gds_size,
+ PAGE_SIZE);
+
+ if (usegds) {
+ de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
+ de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
+ }
cnt = (sizeof(de_payload) >> 2) + 4 - 2;
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
@@ -5598,9 +5785,16 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
WRITE_DATA_DST_SEL(8) |
WR_CONFIRM) |
WRITE_DATA_CACHE_POLICY(0));
- amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
- amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
- amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
+ amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
+
+ amdgpu_ring_ib_on_emit_de(ring);
+ if (resume)
+ amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
+ sizeof(de_payload) >> 2);
+ else
+ amdgpu_ring_write_multiple(ring, (void *)&de_payload,
+ sizeof(de_payload) >> 2);
}
static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
@@ -5616,8 +5810,9 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
{
uint32_t dw2 = 0;
- if (amdgpu_sriov_vf(ring->adev))
- gfx_v9_0_ring_emit_ce_meta(ring);
+ gfx_v9_0_ring_emit_ce_meta(ring,
+ (!amdgpu_sriov_vf(ring->adev) &&
+ flags & AMDGPU_IB_PREEMPTED) ? true : false);
dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
if (flags & AMDGPU_HAVE_CTX_SWITCH) {
@@ -5644,31 +5839,21 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
amdgpu_ring_write(ring, 0);
}
-static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
{
unsigned ret;
amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
- amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, 0);
ret = ring->wptr & ring->buf_mask;
- amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+ /* patch dummy value later */
+ amdgpu_ring_write(ring, 0);
return ret;
}
-static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
-{
- unsigned cur;
- BUG_ON(offset > ring->buf_mask);
- BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
- cur = (ring->wptr & ring->buf_mask) - 1;
- if (likely(cur > offset))
- ring->ring[offset] = cur - offset;
- else
- ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
-}
-
static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
uint32_t reg_val_offs)
{
@@ -5741,7 +5926,9 @@ static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
WREG32_SOC15(GC, 0, mmSQ_CMD, value);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
@@ -5796,33 +5983,111 @@ static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
- mec_int_cntl = RREG32(mec_int_cntl_reg);
+ mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
TIME_STAMP_INT_ENABLE, 0);
- WREG32(mec_int_cntl_reg, mec_int_cntl);
+ WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
break;
case AMDGPU_IRQ_STATE_ENABLE:
- mec_int_cntl = RREG32(mec_int_cntl_reg);
+ mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
TIME_STAMP_INT_ENABLE, 1);
- WREG32(mec_int_cntl_reg, mec_int_cntl);
+ WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
break;
default:
break;
}
}
+static u32 gfx_v9_0_get_cpc_int_cntl(struct amdgpu_device *adev,
+ int me, int pipe)
+{
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+ if (me != 1)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
+ case 1:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
+ case 2:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
+ case 3:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
+ default:
+ return 0;
+ }
+}
+
static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned type,
enum amdgpu_interrupt_state state)
{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
case AMDGPU_IRQ_STATE_ENABLE:
WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
PRIV_REG_INT_ENABLE,
state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v9_0_set_bad_op_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
break;
default:
break;
@@ -5943,7 +6208,15 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
switch (me_id) {
case 0:
- amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+ if (adev->gfx.num_gfx_rings) {
+ if (!adev->gfx.mcbp) {
+ amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+ } else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
+ /* Fence signals are handled on the software rings*/
+ for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
+ amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
+ }
+ }
break;
case 1:
case 2:
@@ -5996,6 +6269,15 @@ static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
return 0;
}
+static int gfx_v9_0_bad_op_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal opcode in command stream\n");
+ gfx_v9_0_fault(adev, entry);
+ return 0;
+}
+
static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -6448,7 +6730,7 @@ static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
};
static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
- void *inject_if)
+ void *inject_if, uint32_t instance_mask)
{
struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
int ret;
@@ -6487,13 +6769,13 @@ static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
block_info.value = info->value;
mutex_lock(&adev->grbm_idx_mutex);
- ret = psp_ras_trigger_error(&adev->psp, &block_info);
+ ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask);
mutex_unlock(&adev->grbm_idx_mutex);
return ret;
}
-static const char *vml2_mems[] = {
+static const char * const vml2_mems[] = {
"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
"UTC_VML2_BANK_CACHE_0_4K_MEM0",
@@ -6512,7 +6794,7 @@ static const char *vml2_mems[] = {
"UTC_VML2_BANK_CACHE_3_4K_MEM1",
};
-static const char *vml2_walker_mems[] = {
+static const char * const vml2_walker_mems[] = {
"UTC_VML2_CACHE_PDE0_MEM0",
"UTC_VML2_CACHE_PDE0_MEM1",
"UTC_VML2_CACHE_PDE1_MEM0",
@@ -6522,7 +6804,7 @@ static const char *vml2_walker_mems[] = {
"UTC_VML2_RDIF_LOG_FIFO",
};
-static const char *atc_l2_cache_2m_mems[] = {
+static const char * const atc_l2_cache_2m_mems[] = {
"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
@@ -6715,7 +6997,7 @@ static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
- gfx_v9_0_select_se_sh(adev, j, 0x0, k);
+ amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0);
RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
}
}
@@ -6758,7 +7040,7 @@ static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
}
-static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
+static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status)
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
@@ -6767,7 +7049,7 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
uint32_t reg_value;
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
- return -EINVAL;
+ return;
err_data->ue_count = 0;
err_data->ce_count = 0;
@@ -6777,7 +7059,7 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
- gfx_v9_0_select_se_sh(adev, j, 0, k);
+ amdgpu_gfx_select_se_sh(adev, j, 0, k, 0);
reg_value =
RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
if (reg_value)
@@ -6792,12 +7074,10 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
err_data->ce_count += sec_count;
err_data->ue_count += ded_count;
- gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
gfx_v9_0_query_utc_edc_status(adev, err_data);
-
- return 0;
}
static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
@@ -6878,6 +7158,230 @@ static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
}
}
+static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
+{
+ /* Header itself is a NOP packet */
+ if (num_nop == 1) {
+ amdgpu_ring_write(ring, ring->funcs->nop);
+ return;
+ }
+
+ /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
+ amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
+
+ /* Header is at index 0, followed by num_nops - 1 NOP packet's */
+ amdgpu_ring_insert_nop(ring, num_nop - 1);
+}
+
+static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ unsigned long flags;
+ int i, r;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
+ 0, 0);
+ amdgpu_ring_commit(kiq_ring);
+
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ r = amdgpu_ring_test_ring(kiq_ring);
+ if (r)
+ return r;
+
+ /* make sure dequeue is complete*/
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ if (r) {
+ dev_err(adev->dev, "fail to wait on hqd deactive\n");
+ return r;
+ }
+
+ r = gfx_v9_0_kcq_init_queue(ring, true);
+ if (r) {
+ dev_err(adev->dev, "fail to init kcq\n");
+ return r;
+ }
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
+ if (r) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+ kiq->pmf->kiq_map_queues(kiq_ring, ring);
+ amdgpu_ring_commit(kiq_ring);
+ r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ if (r) {
+ DRM_ERROR("fail to remap queue\n");
+ return r;
+ }
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ for (i = 0; i < reg_count; i++)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_reg_list_9[i].reg_name,
+ adev->gfx.ip_dump_core[i]);
+
+ /* print compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
+ drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
+ adev->gfx.mec.num_mec,
+ adev->gfx.mec.num_pipe_per_mec,
+ adev->gfx.mec.num_queue_per_pipe);
+
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i && gc_cp_reg_list_9[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ "mmCP_MEC_ME2_HEADER_DUMP",
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ else
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_cp_reg_list_9[reg].reg_name,
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ }
+ index += reg_count;
+ }
+ }
+ }
+
+}
+
+static void gfx_v9_ip_dump(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
+
+ if (!adev->gfx.ip_dump_core || !adev->gfx.num_gfx_rings)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < reg_count; i++)
+ adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_9[i]));
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ /* dump compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ /* ME0 is for GFX so start from 1 for CP */
+ soc15_grbm_select(adev, 1 + i, j, k, 0, 0);
+
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i && gc_cp_reg_list_9[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME2_HEADER_DUMP));
+ else
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_cp_reg_list_9[reg]));
+ }
+ index += reg_count;
+ }
+ }
+ }
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+
+}
+
+static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* Emit the cleaner shader */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+ else
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER_9_0, 0));
+
+ amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
+}
+
+static void gfx_v9_0_ring_begin_use_compute(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ip_block *gfx_block =
+ amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+
+ amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
+
+ /* Raven and PCO APUs seem to have stability issues
+ * with compute and gfxoff and gfx pg. Disable gfx pg during
+ * submission and allow again afterwards.
+ */
+ if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0))
+ gfx_v9_0_set_powergating_state(gfx_block, AMD_PG_STATE_UNGATE);
+}
+
+static void gfx_v9_0_ring_end_use_compute(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ip_block *gfx_block =
+ amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+
+ /* Raven and PCO APUs seem to have stability issues
+ * with compute and gfxoff and gfx pg. Disable gfx pg during
+ * submission and allow again afterwards.
+ */
+ if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0))
+ gfx_v9_0_set_powergating_state(gfx_block, AMD_PG_STATE_GATE);
+
+ amdgpu_gfx_enforce_isolation_ring_end_use(ring);
+}
+
static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
.name = "gfx_v9_0",
.early_init = gfx_v9_0_early_init,
@@ -6894,6 +7398,8 @@ static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
.set_clockgating_state = gfx_v9_0_set_clockgating_state,
.set_powergating_state = gfx_v9_0_set_powergating_state,
.get_clockgating_state = gfx_v9_0_get_clockgating_state,
+ .dump_ip_state = gfx_v9_ip_dump,
+ .print_ip_state = gfx_v9_ip_print,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
@@ -6901,7 +7407,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_GFXHUB_0,
+ .secure_submission_supported = true,
.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
@@ -6925,7 +7431,65 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
5 + /* HDP_INVL */
8 + 8 + /* FENCE x2 */
2 + /* SWITCH_BUFFER */
- 7, /* gfx_v9_0_emit_mem_sync */
+ 7 + /* gfx_v9_0_emit_mem_sync */
+ 2, /* gfx_v9_0_ring_emit_cleaner_shader */
+ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
+ .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
+ .emit_fence = gfx_v9_0_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
+ .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
+ .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
+ .test_ring = gfx_v9_0_ring_test_ring,
+ .insert_nop = gfx_v9_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_switch_buffer = gfx_v9_ring_emit_sb,
+ .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
+ .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
+ .preempt_ib = gfx_v9_0_ring_preempt_ib,
+ .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
+ .emit_wreg = gfx_v9_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
+ .soft_recovery = gfx_v9_0_ring_soft_recovery,
+ .emit_mem_sync = gfx_v9_0_emit_mem_sync,
+ .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
+ .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
+ .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
+};
+
+static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
+ .type = AMDGPU_RING_TYPE_GFX,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .secure_submission_supported = true,
+ .get_rptr = amdgpu_sw_ring_get_rptr_gfx,
+ .get_wptr = amdgpu_sw_ring_get_wptr_gfx,
+ .set_wptr = amdgpu_sw_ring_set_wptr_gfx,
+ .emit_frame_size = /* totally 242 maximum if 16 IBs */
+ 5 + /* COND_EXEC */
+ 7 + /* PIPELINE_SYNC */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* VM_FLUSH */
+ 8 + /* FENCE for VM_FLUSH */
+ 20 + /* GDS switch */
+ 4 + /* double SWITCH_BUFFER,
+ * the first COND_EXEC jump to the place just
+ * prior to this double SWITCH_BUFFER
+ */
+ 5 + /* COND_EXEC */
+ 7 + /* HDP_flush */
+ 4 + /* VGT_flush */
+ 14 + /* CE_META */
+ 31 + /* DE_META */
+ 3 + /* CNTX_CTRL */
+ 5 + /* HDP_INVL */
+ 8 + 8 + /* FENCE x2 */
+ 2 + /* SWITCH_BUFFER */
+ 7 + /* gfx_v9_0_emit_mem_sync */
+ 2, /* gfx_v9_0_ring_emit_cleaner_shader */
.emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
.emit_fence = gfx_v9_0_ring_emit_fence,
@@ -6935,18 +7499,23 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
.test_ring = gfx_v9_0_ring_test_ring,
.test_ib = gfx_v9_0_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = gfx_v9_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_switch_buffer = gfx_v9_ring_emit_sb,
.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
- .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
.soft_recovery = gfx_v9_0_ring_soft_recovery,
.emit_mem_sync = gfx_v9_0_emit_mem_sync,
+ .patch_cntl = gfx_v9_0_ring_patch_cntl,
+ .patch_de = gfx_v9_0_ring_patch_de_meta,
+ .patch_ce = gfx_v9_0_ring_patch_ce_meta,
+ .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
+ .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
+ .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
@@ -6954,7 +7523,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_GFXHUB_0,
.get_rptr = gfx_v9_0_ring_get_rptr_compute,
.get_wptr = gfx_v9_0_ring_get_wptr_compute,
.set_wptr = gfx_v9_0_ring_set_wptr_compute,
@@ -6965,11 +7533,11 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
7 + /* gfx_v9_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* gfx_v9_0_ring_emit_vm_flush */
8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
7 + /* gfx_v9_0_emit_mem_sync */
5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
- 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
+ 15 + /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
+ 2, /* gfx_v9_0_ring_emit_cleaner_shader */
.emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
.emit_ib = gfx_v9_0_ring_emit_ib_compute,
.emit_fence = gfx_v9_0_ring_emit_fence,
@@ -6979,13 +7547,18 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
.test_ring = gfx_v9_0_ring_test_ring,
.test_ib = gfx_v9_0_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = gfx_v9_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
+ .soft_recovery = gfx_v9_0_ring_soft_recovery,
.emit_mem_sync = gfx_v9_0_emit_mem_sync,
.emit_wave_limit = gfx_v9_0_emit_wave_limit,
+ .reset = gfx_v9_0_reset_kcq,
+ .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v9_0_ring_begin_use_compute,
+ .end_use = gfx_v9_0_ring_end_use_compute,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
@@ -6993,7 +7566,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_GFXHUB_0,
.get_rptr = gfx_v9_0_ring_get_rptr_compute,
.get_wptr = gfx_v9_0_ring_get_wptr_compute,
.set_wptr = gfx_v9_0_ring_set_wptr_compute,
@@ -7004,7 +7576,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
7 + /* gfx_v9_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* gfx_v9_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
.emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
@@ -7015,17 +7586,23 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
+ .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
};
static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
{
int i;
- adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
+ adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
+ if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
+ for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
+ adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
+ }
+
for (i = 0; i < adev->gfx.num_compute_rings; i++)
adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
}
@@ -7040,6 +7617,11 @@ static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
.process = gfx_v9_0_priv_reg_irq,
};
+static const struct amdgpu_irq_src_funcs gfx_v9_0_bad_op_irq_funcs = {
+ .set = gfx_v9_0_set_bad_op_fault_state,
+ .process = gfx_v9_0_bad_op_irq,
+};
+
static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
.set = gfx_v9_0_set_priv_inst_fault_state,
.process = gfx_v9_0_priv_inst_irq,
@@ -7059,6 +7641,9 @@ static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gfx.priv_reg_irq.num_types = 1;
adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
+ adev->gfx.bad_op_irq.num_types = 1;
+ adev->gfx.bad_op_irq.funcs = &gfx_v9_0_bad_op_irq_funcs;
+
adev->gfx.priv_inst_irq.num_types = 1;
adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
@@ -7068,7 +7653,7 @@ static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
case IP_VERSION(9, 2, 1):
case IP_VERSION(9, 4, 0):
@@ -7087,7 +7672,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
{
/* init asci gds info */
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
case IP_VERSION(9, 2, 1):
case IP_VERSION(9, 4, 0):
@@ -7109,7 +7694,7 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
break;
}
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
case IP_VERSION(9, 4, 0):
adev->gds.gds_compute_max_wave_id = 0x7ff;
@@ -7197,7 +7782,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
mask = 1;
ao_bitmap = 0;
counter = 0;
- gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
gfx_v9_0_set_user_cu_inactive_bitmap(
adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
@@ -7214,7 +7799,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
* SE6,SH0 --> bitmap[2][1]
* SE7,SH0 --> bitmap[3][1]
*/
- cu_info->bitmap[i % 4][j + i / 4] = bitmap;
+ cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
if (bitmap & mask) {
@@ -7230,7 +7815,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
}
}
- gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h
index dfe8d4841f58..f9f6edc5e558 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h
@@ -27,6 +27,6 @@
extern const struct amdgpu_ip_block_version gfx_v9_0_ip_block;
void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
- u32 instance);
+ u32 instance, int xcc_id);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h
new file mode 100644
index 000000000000..0b6bd09b7529
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+/* Define the cleaner shader gfx_9_0 */
+static const u32 __maybe_unused gfx_9_0_cleaner_shader_hex[] = {
+ /* Add the cleaner shader code here */
+};
+
+/* Define the cleaner shader gfx_9_4_2 */
+static const u32 gfx_9_4_2_cleaner_shader_hex[] = {
+ 0xbf068100, 0xbf84003b,
+ 0xbf8a0000, 0xb07c0000,
+ 0xbe8200ff, 0x00000078,
+ 0xbf110802, 0x7e000280,
+ 0x7e020280, 0x7e040280,
+ 0x7e060280, 0x7e080280,
+ 0x7e0a0280, 0x7e0c0280,
+ 0x7e0e0280, 0x80828802,
+ 0xbe803202, 0xbf84fff5,
+ 0xbf9c0000, 0xbe8200ff,
+ 0x80000000, 0x86020102,
+ 0xbf840011, 0xbefe00c1,
+ 0xbeff00c1, 0xd28c0001,
+ 0x0001007f, 0xd28d0001,
+ 0x0002027e, 0x10020288,
+ 0xbe8200bf, 0xbefc00c1,
+ 0xd89c2000, 0x00020201,
+ 0xd89c6040, 0x00040401,
+ 0x320202ff, 0x00000400,
+ 0x80828102, 0xbf84fff8,
+ 0xbefc00ff, 0x0000005c,
+ 0xbf800000, 0xbe802c80,
+ 0xbe812c80, 0xbe822c80,
+ 0xbe832c80, 0x80fc847c,
+ 0xbf84fffa, 0xbee60080,
+ 0xbee70080, 0xbeea0180,
+ 0xbeec0180, 0xbeee0180,
+ 0xbef00180, 0xbef20180,
+ 0xbef40180, 0xbef60180,
+ 0xbef80180, 0xbefa0180,
+ 0xbf810000, 0xbf8d0001,
+ 0xbefc00ff, 0x0000005c,
+ 0xbf800000, 0xbe802c80,
+ 0xbe812c80, 0xbe822c80,
+ 0xbe832c80, 0x80fc847c,
+ 0xbf84fffa, 0xbee60080,
+ 0xbee70080, 0xbeea01ff,
+ 0x000000ee, 0xbf810000,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
index b4789dfc2bb9..6028afd81690 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
@@ -27,7 +27,6 @@
#include "amdgpu_gfx.h"
#include "soc15.h"
#include "soc15d.h"
-#include "amdgpu_atomfirmware.h"
#include "amdgpu_pm.h"
#include "gc/gc_9_4_1_offset.h"
@@ -863,7 +862,7 @@ static int gfx_v9_4_ras_error_count(struct amdgpu_device *adev,
return 0;
}
-static int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
+static void gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status)
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
@@ -872,7 +871,7 @@ static int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
uint32_t reg_value;
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
- return -EINVAL;
+ return;
err_data->ue_count = 0;
err_data->ce_count = 0;
@@ -903,7 +902,6 @@ static int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
gfx_v9_4_query_utc_edc_status(adev, err_data);
- return 0;
}
static void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev)
@@ -971,31 +969,9 @@ static void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255);
}
-static int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev,
- void *inject_if)
-{
- struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
- int ret;
- struct ta_ras_trigger_error_input block_info = { 0 };
-
- if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
- return -EINVAL;
-
- block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
- block_info.sub_block_index = info->head.sub_block_index;
- block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
- block_info.address = info->address;
- block_info.value = info->value;
-
- mutex_lock(&adev->grbm_idx_mutex);
- ret = psp_ras_trigger_error(&adev->psp, &block_info);
- mutex_unlock(&adev->grbm_idx_mutex);
-
- return ret;
-}
-
-static const struct soc15_reg_entry gfx_v9_4_ea_err_status_regs =
- { SOC15_REG_ENTRY(GC, 0, mmGCEA_ERR_STATUS), 0, 1, 32 };
+static const struct soc15_reg_entry gfx_v9_4_ea_err_status_regs = {
+ SOC15_REG_ENTRY(GC, 0, mmGCEA_ERR_STATUS), 0, 1, 32
+};
static void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev)
{
@@ -1029,11 +1005,15 @@ static void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev)
mutex_unlock(&adev->grbm_idx_mutex);
}
-const struct amdgpu_gfx_ras_funcs gfx_v9_4_ras_funcs = {
- .ras_late_init = amdgpu_gfx_ras_late_init,
- .ras_fini = amdgpu_gfx_ras_fini,
- .ras_error_inject = &gfx_v9_4_ras_error_inject,
- .query_ras_error_count = &gfx_v9_4_query_ras_error_count,
- .reset_ras_error_count = &gfx_v9_4_reset_ras_error_count,
- .query_ras_error_status = &gfx_v9_4_query_ras_error_status,
+
+const struct amdgpu_ras_block_hw_ops gfx_v9_4_ras_ops = {
+ .query_ras_error_count = &gfx_v9_4_query_ras_error_count,
+ .reset_ras_error_count = &gfx_v9_4_reset_ras_error_count,
+ .query_ras_error_status = &gfx_v9_4_query_ras_error_status,
+};
+
+struct amdgpu_gfx_ras gfx_v9_4_ras = {
+ .ras_block = {
+ .hw_ops = &gfx_v9_4_ras_ops,
+ },
};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h
index bdd16b568021..ca520a767267 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h
@@ -24,6 +24,6 @@
#ifndef __GFX_V9_4_H__
#define __GFX_V9_4_H__
-extern const struct amdgpu_gfx_ras_funcs gfx_v9_4_ras_funcs;
+extern struct amdgpu_gfx_ras gfx_v9_4_ras;
#endif /* __GFX_V9_4_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
index c4f37a161875..8058ea91ecaf 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
@@ -412,7 +412,7 @@ static int gfx_v9_4_2_run_shader(struct amdgpu_device *adev,
r = amdgpu_ib_schedule(ring, 1, ib, NULL, fence_ptr);
if (r) {
dev_err(adev->dev, "ib submit failed (%d).\n", r);
- amdgpu_ib_free(adev, ib, NULL);
+ amdgpu_ib_free(ib, NULL);
}
return r;
}
@@ -611,16 +611,16 @@ static int gfx_v9_4_2_do_sgprs_init(struct amdgpu_device *adev)
}
disp2_failed:
- amdgpu_ib_free(adev, &disp_ibs[2], NULL);
+ amdgpu_ib_free(&disp_ibs[2], NULL);
dma_fence_put(fences[2]);
disp1_failed:
- amdgpu_ib_free(adev, &disp_ibs[1], NULL);
+ amdgpu_ib_free(&disp_ibs[1], NULL);
dma_fence_put(fences[1]);
disp0_failed:
- amdgpu_ib_free(adev, &disp_ibs[0], NULL);
+ amdgpu_ib_free(&disp_ibs[0], NULL);
dma_fence_put(fences[0]);
pro_end:
- amdgpu_ib_free(adev, &wb_ib, NULL);
+ amdgpu_ib_free(&wb_ib, NULL);
if (r)
dev_info(adev->dev, "Init SGPRS Failed\n");
@@ -687,10 +687,10 @@ static int gfx_v9_4_2_do_vgprs_init(struct amdgpu_device *adev)
}
disp_failed:
- amdgpu_ib_free(adev, &disp_ib, NULL);
+ amdgpu_ib_free(&disp_ib, NULL);
dma_fence_put(fence);
pro_end:
- amdgpu_ib_free(adev, &wb_ib, NULL);
+ amdgpu_ib_free(&wb_ib, NULL);
if (r)
dev_info(adev->dev, "Init VGPRS Failed\n");
@@ -746,8 +746,18 @@ void gfx_v9_4_2_init_golden_registers(struct amdgpu_device *adev,
die_id);
break;
}
+}
+
+void gfx_v9_4_2_init_sq(struct amdgpu_device *adev)
+{
+ uint32_t data;
- return;
+ if (adev->gfx.mec_fw_version >= 98) {
+ adev->gmc.xnack_flags |= AMDGPU_GMC_XNACK_FLAG_CHAIN;
+ data = RREG32_SOC15(GC, 0, regSQ_CONFIG1);
+ data = REG_SET_FIELD(data, SQ_CONFIG1, DISABLE_XNACK_CHECK_IN_RETRY_DISABLE, 1);
+ WREG32_SOC15(GC, 0, regSQ_CONFIG1, data);
+ }
}
void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev,
@@ -761,7 +771,7 @@ void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev,
for (i = first_vmid; i < last_vmid; i++) {
data = 0;
- soc15_grbm_select(adev, 0, 0, 0, i);
+ soc15_grbm_select(adev, 0, 0, 0, i, 0);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE,
@@ -769,15 +779,18 @@ void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev,
WREG32(SOC15_REG_OFFSET(GC, 0, regSPI_GDBG_PER_VMID_CNTL), data);
}
- soc15_grbm_select(adev, 0, 0, 0, 0);
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, regSPI_GDBG_TRAP_DATA0), 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, regSPI_GDBG_TRAP_DATA1), 0);
}
void gfx_v9_4_2_set_power_brake_sequence(struct amdgpu_device *adev)
{
u32 tmp;
- gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
tmp = 0;
tmp = REG_SET_FIELD(tmp, GC_THROTTLE_CTRL, PATTERN_MODE, 1);
@@ -1545,8 +1558,8 @@ static void gfx_v9_4_2_log_utc_edc_count(struct amdgpu_device *adev,
uint32_t ded_cnt)
{
uint32_t bank, way, mem;
- static const char *vml2_way_str[] = { "BIGK", "4K" };
- static const char *utcl2_rounter_str[] = { "VMC", "APT" };
+ static const char * const vml2_way_str[] = { "BIGK", "4K" };
+ static const char * const utcl2_router_str[] = { "VMC", "APT" };
mem = instance % blk->num_mem_blocks;
way = (instance / blk->num_mem_blocks) % blk->num_ways;
@@ -1567,7 +1580,7 @@ static void gfx_v9_4_2_log_utc_edc_count(struct amdgpu_device *adev,
dev_info(
adev->dev,
"GFX SubBlock UTCL2_ROUTER_IFIF%d_GROUP0_%s, SED %d, DED %d\n",
- bank, utcl2_rounter_str[mem], sec_cnt, ded_cnt);
+ bank, utcl2_router_str[mem], sec_cnt, ded_cnt);
break;
case ATC_L2_CACHE_2M:
dev_info(
@@ -1641,14 +1654,14 @@ static int gfx_v9_4_2_query_utc_edc_count(struct amdgpu_device *adev,
return 0;
}
-static int gfx_v9_4_2_query_ras_error_count(struct amdgpu_device *adev,
+static void gfx_v9_4_2_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status)
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
uint32_t sec_count = 0, ded_count = 0;
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
- return -EINVAL;
+ return;
err_data->ue_count = 0;
err_data->ce_count = 0;
@@ -1661,7 +1674,6 @@ static int gfx_v9_4_2_query_ras_error_count(struct amdgpu_device *adev,
err_data->ce_count += sec_count;
err_data->ue_count += ded_count;
- return 0;
}
static void gfx_v9_4_2_reset_utc_err_status(struct amdgpu_device *adev)
@@ -1700,28 +1712,6 @@ static void gfx_v9_4_2_reset_ras_error_count(struct amdgpu_device *adev)
gfx_v9_4_2_query_utc_edc_count(adev, NULL, NULL);
}
-static int gfx_v9_4_2_ras_error_inject(struct amdgpu_device *adev, void *inject_if)
-{
- struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
- int ret;
- struct ta_ras_trigger_error_input block_info = { 0 };
-
- if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
- return -EINVAL;
-
- block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
- block_info.sub_block_index = info->head.sub_block_index;
- block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
- block_info.address = info->address;
- block_info.value = info->value;
-
- mutex_lock(&adev->grbm_idx_mutex);
- ret = psp_ras_trigger_error(&adev->psp, &block_info);
- mutex_unlock(&adev->grbm_idx_mutex);
-
- return ret;
-}
-
static void gfx_v9_4_2_query_ea_err_status(struct amdgpu_device *adev)
{
uint32_t i, j;
@@ -1931,13 +1921,18 @@ static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev)
mutex_unlock(&adev->grbm_idx_mutex);
}
-const struct amdgpu_gfx_ras_funcs gfx_v9_4_2_ras_funcs = {
- .ras_late_init = amdgpu_gfx_ras_late_init,
- .ras_fini = amdgpu_gfx_ras_fini,
- .ras_error_inject = &gfx_v9_4_2_ras_error_inject,
- .query_ras_error_count = &gfx_v9_4_2_query_ras_error_count,
- .reset_ras_error_count = &gfx_v9_4_2_reset_ras_error_count,
- .query_ras_error_status = &gfx_v9_4_2_query_ras_error_status,
- .reset_ras_error_status = &gfx_v9_4_2_reset_ras_error_status,
+
+
+struct amdgpu_ras_block_hw_ops gfx_v9_4_2_ras_ops = {
+ .query_ras_error_count = &gfx_v9_4_2_query_ras_error_count,
+ .reset_ras_error_count = &gfx_v9_4_2_reset_ras_error_count,
+ .query_ras_error_status = &gfx_v9_4_2_query_ras_error_status,
+ .reset_ras_error_status = &gfx_v9_4_2_reset_ras_error_status,
+};
+
+struct amdgpu_gfx_ras gfx_v9_4_2_ras = {
+ .ras_block = {
+ .hw_ops = &gfx_v9_4_2_ras_ops,
+ },
.enable_watchdog_timer = &gfx_v9_4_2_enable_watchdog_timer,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h
index 6db1f88509af..a603724c1dfc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h
@@ -28,9 +28,10 @@ void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev,
uint32_t first_vmid, uint32_t last_vmid);
void gfx_v9_4_2_init_golden_registers(struct amdgpu_device *adev,
uint32_t die_id);
+void gfx_v9_4_2_init_sq(struct amdgpu_device *adev);
void gfx_v9_4_2_set_power_brake_sequence(struct amdgpu_device *adev);
int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev);
-extern const struct amdgpu_gfx_ras_funcs gfx_v9_4_2_ras_funcs;
+extern struct amdgpu_gfx_ras gfx_v9_4_2_ras;
#endif /* __GFX_V9_4_2_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2_cleaner_shader.asm
new file mode 100644
index 000000000000..35b8cf9070bd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2_cleaner_shader.asm
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader.
+//To turn this shader program on for complitaion change this to main and lower shader main to main_1
+
+// MI200 : Clear SGPRs, VGPRs and LDS
+// Uses two kernels launched separately:
+// 1. Clean VGPRs, LDS, and lower SGPRs
+// Launches one workgroup per CU, each workgroup with 4x wave64 per SIMD in the CU
+// Waves are "wave64" and have 128 VGPRs each, which uses all 512 VGPRs per SIMD
+// Waves in the workgroup share the 64KB of LDS
+// Each wave clears SGPRs 0 - 95. Because there are 4 waves/SIMD, this is physical SGPRs 0-383
+// Each wave clears 128 VGPRs, so all 512 in the SIMD
+// The first wave of the workgroup clears its 64KB of LDS
+// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup
+// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared.
+// 2. Clean remaining SGPRs
+// Launches a workgroup with 24 waves per workgroup, yielding 6 waves per SIMD in each CU
+// Waves are allocating 96 SGPRs
+// CP sets up SPI_RESOURCE_RESERVE_* registers to prevent these waves from allocating SGPRs 0-223.
+// As such, these 6 waves per SIMD are allocated physical SGPRs 224-799
+// Barriers do not work for >16 waves per workgroup, so we cannot start with S_BARRIER
+// Instead, the shader starts with an S_SETHALT 1. Once all waves are launched CP will send unhalt command
+// The shader then clears all SGPRs allocated to it, cleaning out physical SGPRs 224-799
+
+shader main
+ asic(MI200)
+ type(CS)
+ wave_size(64)
+// Note: original source code from SQ team
+
+// (theorhetical fastest = ~512clks vgpr + 1536 lds + ~128 sgpr = 2176 clks)
+
+ s_cmp_eq_u32 s0, 1 // Bit0 is set, sgpr0 is set then clear VGPRS and LDS as FW set COMPUTE_USER_DATA_3
+ s_cbranch_scc0 label_0023 // Clean VGPRs and LDS if sgpr0 of wave is set, scc = (s3 == 1)
+ S_BARRIER
+
+ s_movk_i32 m0, 0x0000
+ s_mov_b32 s2, 0x00000078 // Loop 128/8=16 times (loop unrolled for performance)
+ //
+ // CLEAR VGPRs
+ //
+ s_set_gpr_idx_on s2, 0x8 // enable Dest VGPR indexing
+label_0005:
+ v_mov_b32 v0, 0
+ v_mov_b32 v1, 0
+ v_mov_b32 v2, 0
+ v_mov_b32 v3, 0
+ v_mov_b32 v4, 0
+ v_mov_b32 v5, 0
+ v_mov_b32 v6, 0
+ v_mov_b32 v7, 0
+ s_sub_u32 s2, s2, 8
+ s_set_gpr_idx_idx s2
+ s_cbranch_scc0 label_0005
+ s_set_gpr_idx_off
+
+ //
+ //
+
+ s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
+ s_and_b32 s2, s2, s1 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
+ s_cbranch_scc0 label_clean_sgpr_1 // Clean LDS if its first wave of ThreadGroup/WorkGroup
+ // CLEAR LDS
+ //
+ s_mov_b32 exec_lo, 0xffffffff
+ s_mov_b32 exec_hi, 0xffffffff
+ v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63)
+ v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63)
+ v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte)
+ s_mov_b32 s2, 0x00000003f // 64 loop iterations
+ s_mov_b32 m0, 0xffffffff
+ // Clear all of LDS space
+ // Each FirstWave of WorkGroup clears 64kbyte block
+
+label_001F:
+ ds_write2_b64 v1, v[2:3], v[2:3] offset1:32
+ ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96
+ v_add_co_u32 v1, vcc, 0x00000400, v1
+ s_sub_u32 s2, s2, 1
+ s_cbranch_scc0 label_001F
+ //
+ // CLEAR SGPRs
+ //
+label_clean_sgpr_1:
+ s_mov_b32 m0, 0x0000005c // Loop 96/4=24 times (loop unrolled for performance)
+ s_nop 0
+label_sgpr_loop:
+ s_movreld_b32 s0, 0
+ s_movreld_b32 s1, 0
+ s_movreld_b32 s2, 0
+ s_movreld_b32 s3, 0
+ s_sub_u32 m0, m0, 4
+ s_cbranch_scc0 label_sgpr_loop
+
+ //clear vcc, flat scratch
+ s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR
+ s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR
+ s_mov_b64 vcc, 0 //clear vcc
+ s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1
+ s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3
+ s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5
+ s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7
+ s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9
+ s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11
+ s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13
+ s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15
+s_endpgm
+
+label_0023:
+
+ s_sethalt 1
+
+ s_mov_b32 m0, 0x0000005c // Loop 96/4=24 times (loop unrolled for performance)
+ s_nop 0
+label_sgpr_loop1:
+
+ s_movreld_b32 s0, 0
+ s_movreld_b32 s1, 0
+ s_movreld_b32 s2, 0
+ s_movreld_b32 s3, 0
+ s_sub_u32 m0, m0, 4
+ s_cbranch_scc0 label_sgpr_loop1
+
+ //clear vcc, flat scratch
+ s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR
+ s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR
+ s_mov_b64 vcc, 0xee //clear vcc
+
+s_endpgm
+end
+
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
new file mode 100644
index 000000000000..cbb74ffc4792
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -0,0 +1,5062 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "soc15_common.h"
+#include "vega10_enum.h"
+
+#include "v9_structs.h"
+
+#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
+
+#include "gc/gc_9_4_3_offset.h"
+#include "gc/gc_9_4_3_sh_mask.h"
+
+#include "gfx_v9_4_3.h"
+#include "gfx_v9_4_3_cleaner_shader.h"
+#include "amdgpu_xcp.h"
+#include "amdgpu_aca.h"
+
+MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_4_4_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_5_0_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_5_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_4_3_sjt_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_4_4_sjt_mec.bin");
+
+#define GFX9_MEC_HPD_SIZE 4096
+#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
+
+#define GOLDEN_GB_ADDR_CONFIG 0x2a114042
+#define CP_HQD_PERSISTENT_STATE_DEFAULT 0xbe05301
+
+#define XCC_REG_RANGE_0_LOW 0x2000 /* XCC gfxdec0 lower Bound */
+#define XCC_REG_RANGE_0_HIGH 0x3400 /* XCC gfxdec0 upper Bound */
+#define XCC_REG_RANGE_1_LOW 0xA000 /* XCC gfxdec1 lower Bound */
+#define XCC_REG_RANGE_1_HIGH 0x10000 /* XCC gfxdec1 upper Bound */
+
+#define NORMALIZE_XCC_REG_OFFSET(offset) \
+ (offset & 0xFFFF)
+
+static const struct amdgpu_hwip_reg_entry gc_reg_list_9_4_3[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSQC_DCACHE_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSQC_ICACHE_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSQ_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regTCP_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regVM_L2_PROTECTION_FAULT_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regVM_L2_PROTECTION_FAULT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC2_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_COMMAND),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_MESSAGE),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_ARGUMENT_1),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_ARGUMENT_2),
+ SOC15_REG_ENTRY_STR(GC, 0, regSMU_RLC_RESPONSE),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_SAFE_MODE),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_SAFE_MODE),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_INT_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_GPM_GENERAL_6),
+ /* SE status registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3)
+};
+
+static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9_4_3[] = {
+ /* compute queue registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ACTIVE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GFX_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+};
+
+struct amdgpu_gfx_ras gfx_v9_4_3_ras;
+
+static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev);
+static void gfx_v9_4_3_set_irq_funcs(struct amdgpu_device *adev);
+static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev);
+static void gfx_v9_4_3_set_rlc_funcs(struct amdgpu_device *adev);
+static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info);
+static void gfx_v9_4_3_xcc_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
+static void gfx_v9_4_3_xcc_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
+
+static void gfx_v9_4_3_kiq_set_resources(struct amdgpu_ring *kiq_ring,
+ uint64_t queue_mask)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ u64 shader_mc_addr;
+
+ /* Cleaner shader MC address */
+ shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_SET_RESOURCES_VMID_MASK(0) |
+ /* vmid_mask:0* queue_type:0 (KIQ) */
+ PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
+ amdgpu_ring_write(kiq_ring,
+ lower_32_bits(queue_mask)); /* queue mask lo */
+ amdgpu_ring_write(kiq_ring,
+ upper_32_bits(queue_mask)); /* queue mask hi */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
+ amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
+ amdgpu_ring_write(kiq_ring, 0); /* oac mask */
+ amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
+}
+
+static void gfx_v9_4_3_kiq_map_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+ uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
+ PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
+ PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
+ /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
+ /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
+ PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
+ /* num_queues: must be 1 */
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+}
+
+static void gfx_v9_4_3_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq)
+{
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_UNMAP_QUEUES_ACTION(action) |
+ PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
+ PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
+ PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+
+ if (action == PREEMPT_QUEUES_NO_UNMAP) {
+ amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, seq);
+ } else {
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ }
+}
+
+static void gfx_v9_4_3_kiq_query_status(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ u64 addr,
+ u64 seq)
+{
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
+ PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
+ PACKET3_QUERY_STATUS_COMMAND(2));
+ /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
+ PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
+}
+
+static void gfx_v9_4_3_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
+static void gfx_v9_4_3_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type,
+ uint32_t me_id, uint32_t pipe_id, uint32_t queue_id,
+ uint32_t xcc_id, uint32_t vmid)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ unsigned i;
+
+ /* enter save mode */
+ amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, xcc_id);
+
+ if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 0x2);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSPI_COMPUTE_QUEUE_RESET, 0x1);
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev, "fail to wait on hqd deactive\n");
+ } else {
+ dev_err(adev->dev, "reset queue_type(%d) not supported\n\n", queue_type);
+ }
+
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ /* exit safe mode */
+ amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
+}
+
+static const struct kiq_pm4_funcs gfx_v9_4_3_kiq_pm4_funcs = {
+ .kiq_set_resources = gfx_v9_4_3_kiq_set_resources,
+ .kiq_map_queues = gfx_v9_4_3_kiq_map_queues,
+ .kiq_unmap_queues = gfx_v9_4_3_kiq_unmap_queues,
+ .kiq_query_status = gfx_v9_4_3_kiq_query_status,
+ .kiq_invalidate_tlbs = gfx_v9_4_3_kiq_invalidate_tlbs,
+ .kiq_reset_hw_queue = gfx_v9_4_3_kiq_reset_hw_queue,
+ .set_resources_size = 8,
+ .map_queues_size = 7,
+ .unmap_queues_size = 6,
+ .query_status_size = 7,
+ .invalidate_tlbs_size = 2,
+};
+
+static void gfx_v9_4_3_set_kiq_pm4_funcs(struct amdgpu_device *adev)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++)
+ adev->gfx.kiq[i].pmf = &gfx_v9_4_3_kiq_pm4_funcs;
+}
+
+static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev)
+{
+ int i, num_xcc, dev_inst;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ dev_inst = GET_INST(GC, i);
+
+ WREG32_SOC15(GC, dev_inst, regGB_ADDR_CONFIG,
+ GOLDEN_GB_ADDR_CONFIG);
+ WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL2, SPARE, 0x1);
+ }
+}
+
+static uint32_t gfx_v9_4_3_normalize_xcc_reg_offset(uint32_t reg)
+{
+ uint32_t normalized_reg = NORMALIZE_XCC_REG_OFFSET(reg);
+
+ /* If it is an XCC reg, normalize the reg to keep
+ lower 16 bits in local xcc */
+
+ if (((normalized_reg >= XCC_REG_RANGE_0_LOW) && (normalized_reg < XCC_REG_RANGE_0_HIGH)) ||
+ ((normalized_reg >= XCC_REG_RANGE_1_LOW) && (normalized_reg < XCC_REG_RANGE_1_HIGH)))
+ return normalized_reg;
+ else
+ return reg;
+}
+
+static void gfx_v9_4_3_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
+ bool wc, uint32_t reg, uint32_t val)
+{
+ reg = gfx_v9_4_3_normalize_xcc_reg_offset(reg);
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
+ WRITE_DATA_DST_SEL(0) |
+ (wc ? WR_CONFIRM : 0));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v9_4_3_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
+ int mem_space, int opt, uint32_t addr0,
+ uint32_t addr1, uint32_t ref, uint32_t mask,
+ uint32_t inv)
+{
+ /* Only do the normalization on regspace */
+ if (mem_space == 0) {
+ addr0 = gfx_v9_4_3_normalize_xcc_reg_offset(addr0);
+ addr1 = gfx_v9_4_3_normalize_xcc_reg_offset(addr1);
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+ amdgpu_ring_write(ring,
+ /* memory (1) or register (0) */
+ (WAIT_REG_MEM_MEM_SPACE(mem_space) |
+ WAIT_REG_MEM_OPERATION(opt) | /* wait */
+ WAIT_REG_MEM_FUNCTION(3) | /* equal */
+ WAIT_REG_MEM_ENGINE(eng_sel)));
+
+ if (mem_space)
+ BUG_ON(addr0 & 0x3); /* Dword align */
+ amdgpu_ring_write(ring, addr0);
+ amdgpu_ring_write(ring, addr1);
+ amdgpu_ring_write(ring, ref);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, inv); /* poll interval */
+}
+
+static int gfx_v9_4_3_ring_test_ring(struct amdgpu_ring *ring)
+{
+ uint32_t scratch_reg0_offset, xcc_offset;
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ /* Use register offset which is local to XCC in the packet */
+ xcc_offset = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
+ scratch_reg0_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, ring->xcc_id), regSCRATCH_REG0);
+ WREG32(scratch_reg0_offset, 0xCAFEDEAD);
+ tmp = RREG32(scratch_reg0_offset);
+
+ r = amdgpu_ring_alloc(ring, 3);
+ if (r)
+ return r;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+ amdgpu_ring_write(ring, xcc_offset - PACKET3_SET_UCONFIG_REG_START);
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(scratch_reg0_offset);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+ return r;
+}
+
+static int gfx_v9_4_3_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+
+ unsigned index;
+ uint64_t gpu_addr;
+ uint32_t tmp;
+ long r;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+ memset(&ib, 0, sizeof(ib));
+
+ r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r)
+ goto err1;
+
+ ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
+ ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
+ ib.ptr[2] = lower_32_bits(gpu_addr);
+ ib.ptr[3] = upper_32_bits(gpu_addr);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.length_dw = 5;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err2;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto err2;
+ } else if (r < 0) {
+ goto err2;
+ }
+
+ tmp = adev->wb.wb[index];
+ if (tmp == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+
+err2:
+ amdgpu_ib_free(&ib, NULL);
+ dma_fence_put(f);
+err1:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+
+/* This value might differs per partition */
+static uint64_t gfx_v9_4_3_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+ uint64_t clock;
+
+ mutex_lock(&adev->gfx.gpu_clock_mutex);
+ WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
+ clock = (uint64_t)RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_GPU_CLOCK_COUNT_LSB) |
+ ((uint64_t)RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+ mutex_unlock(&adev->gfx.gpu_clock_mutex);
+
+ return clock;
+}
+
+static void gfx_v9_4_3_free_microcode(struct amdgpu_device *adev)
+{
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ amdgpu_ucode_release(&adev->gfx.mec2_fw);
+
+ kfree(adev->gfx.rlc.register_list_format);
+}
+
+static int gfx_v9_4_3_init_rlc_microcode(struct amdgpu_device *adev,
+ const char *chip_name)
+{
+ int err;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ uint16_t version_major;
+ uint16_t version_minor;
+
+
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc.bin", chip_name);
+ if (err)
+ goto out;
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+
+ version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+ version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+ err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
+out:
+ if (err)
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+
+ return err;
+}
+
+static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev,
+ const char *chip_name)
+{
+ int err;
+
+ if (amdgpu_sriov_vf(adev)) {
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sjt_mec.bin", chip_name);
+
+ if (err)
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", chip_name);
+ } else
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", chip_name);
+ if (err)
+ goto out;
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
+
+ adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
+ adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
+
+out:
+ if (err)
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ return err;
+}
+
+static int gfx_v9_4_3_init_microcode(struct amdgpu_device *adev)
+{
+ char ucode_prefix[15];
+ int r;
+
+ amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ r = gfx_v9_4_3_init_rlc_microcode(adev, ucode_prefix);
+ if (r)
+ return r;
+
+ r = gfx_v9_4_3_init_cp_compute_microcode(adev, ucode_prefix);
+ if (r)
+ return r;
+
+ return r;
+}
+
+static void gfx_v9_4_3_mec_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
+}
+
+static int gfx_v9_4_3_mec_init(struct amdgpu_device *adev)
+{
+ int r, i, num_xcc;
+ u32 *hpd;
+ const __le32 *fw_data;
+ unsigned fw_size;
+ u32 *fw;
+ size_t mec_hpd_size;
+
+ const struct gfx_firmware_header_v1_0 *mec_hdr;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++)
+ bitmap_zero(adev->gfx.mec_bitmap[i].queue_bitmap,
+ AMDGPU_MAX_COMPUTE_QUEUES);
+
+ /* take ownership of the relevant compute queues */
+ amdgpu_gfx_compute_queue_acquire(adev);
+ mec_hpd_size =
+ adev->gfx.num_compute_rings * num_xcc * GFX9_MEC_HPD_SIZE;
+ if (mec_hpd_size) {
+ r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.hpd_eop_obj,
+ &adev->gfx.mec.hpd_eop_gpu_addr,
+ (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+ gfx_v9_4_3_mec_fini(adev);
+ return r;
+ }
+
+ if (amdgpu_emu_mode == 1) {
+ for (i = 0; i < mec_hpd_size / 4; i++) {
+ memset((void *)(hpd + i), 0, 4);
+ if (i % 50 == 0)
+ msleep(1);
+ }
+ } else {
+ memset(hpd, 0, mec_hpd_size);
+ }
+
+ amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ }
+
+ mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+
+ fw_data = (const __le32 *)
+ (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.mec_fw_obj,
+ &adev->gfx.mec.mec_fw_gpu_addr,
+ (void **)&fw);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
+ gfx_v9_4_3_mec_fini(adev);
+ return r;
+ }
+
+ memcpy(fw, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
+
+ return 0;
+}
+
+static void gfx_v9_4_3_xcc_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance, int xcc_id)
+{
+ u32 data;
+
+ if (instance == 0xffffffff)
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
+ INSTANCE_INDEX, instance);
+
+ if (se_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SE_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
+
+ if (sh_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SH_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
+
+ WREG32_SOC15_RLC_SHADOW_EX(reg, GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX, data);
+}
+
+static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t address)
+{
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
+ (address << SQ_IND_INDEX__INDEX__SHIFT) |
+ (SQ_IND_INDEX__FORCE_READ_MASK));
+ return RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_DATA);
+}
+
+static void wave_read_regs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t thread,
+ uint32_t regno, uint32_t num, uint32_t *out)
+{
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
+ (regno << SQ_IND_INDEX__INDEX__SHIFT) |
+ (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
+ (SQ_IND_INDEX__FORCE_READ_MASK) |
+ (SQ_IND_INDEX__AUTO_INCR_MASK));
+ while (num--)
+ *(out++) = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_DATA);
+}
+
+static void gfx_v9_4_3_read_wave_data(struct amdgpu_device *adev,
+ uint32_t xcc_id, uint32_t simd, uint32_t wave,
+ uint32_t *dst, int *no_fields)
+{
+ /* type 1 wave data */
+ dst[(*no_fields)++] = 1;
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_STATUS);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_HW_ID);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW0);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW1);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_GPR_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_LDS_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_TRAPSTS);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_IB_STS);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_IB_DBG0);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_M0);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_MODE);
+}
+
+static void gfx_v9_4_3_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t start,
+ uint32_t size, uint32_t *dst)
+{
+ wave_read_regs(adev, xcc_id, simd, wave, 0,
+ start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
+}
+
+static void gfx_v9_4_3_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t thread,
+ uint32_t start, uint32_t size,
+ uint32_t *dst)
+{
+ wave_read_regs(adev, xcc_id, simd, wave, thread,
+ start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
+}
+
+static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
+{
+ soc15_grbm_select(adev, me, pipe, q, vm, GET_INST(GC, xcc_id));
+}
+
+static int gfx_v9_4_3_get_xccs_per_xcp(struct amdgpu_device *adev)
+{
+ u32 xcp_ctl;
+
+ /* Value is expected to be the same on all, fetch from first instance */
+ xcp_ctl = RREG32_SOC15(GC, GET_INST(GC, 0), regCP_HYP_XCP_CTL);
+
+ return REG_GET_FIELD(xcp_ctl, CP_HYP_XCP_CTL, NUM_XCC_IN_XCP);
+}
+
+static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev,
+ int num_xccs_per_xcp)
+{
+ int ret, i, num_xcc;
+ u32 tmp = 0;
+
+ if (adev->psp.funcs) {
+ ret = psp_spatial_partition(&adev->psp,
+ NUM_XCC(adev->gfx.xcc_mask) /
+ num_xccs_per_xcp);
+ if (ret)
+ return ret;
+ } else {
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ for (i = 0; i < num_xcc; i++) {
+ tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, NUM_XCC_IN_XCP,
+ num_xccs_per_xcp);
+ tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, VIRTUAL_XCC_ID,
+ i % num_xccs_per_xcp);
+ WREG32_SOC15(GC, GET_INST(GC, i), regCP_HYP_XCP_CTL,
+ tmp);
+ }
+ ret = 0;
+ }
+
+ adev->gfx.num_xcc_per_xcp = num_xccs_per_xcp;
+
+ return ret;
+}
+
+static int gfx_v9_4_3_ih_to_xcc_inst(struct amdgpu_device *adev, int ih_node)
+{
+ int xcc;
+
+ xcc = hweight8(adev->gfx.xcc_mask & GENMASK(ih_node / 2, 0));
+ if (!xcc) {
+ dev_err(adev->dev, "Couldn't find xcc mapping from IH node");
+ return -EINVAL;
+ }
+
+ return xcc - 1;
+}
+
+static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = {
+ .get_gpu_clock_counter = &gfx_v9_4_3_get_gpu_clock_counter,
+ .select_se_sh = &gfx_v9_4_3_xcc_select_se_sh,
+ .read_wave_data = &gfx_v9_4_3_read_wave_data,
+ .read_wave_sgprs = &gfx_v9_4_3_read_wave_sgprs,
+ .read_wave_vgprs = &gfx_v9_4_3_read_wave_vgprs,
+ .select_me_pipe_q = &gfx_v9_4_3_select_me_pipe_q,
+ .switch_partition_mode = &gfx_v9_4_3_switch_compute_partition,
+ .ih_node_to_logical_xcc = &gfx_v9_4_3_ih_to_xcc_inst,
+ .get_xccs_per_xcp = &gfx_v9_4_3_get_xccs_per_xcp,
+};
+
+static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle,
+ struct aca_bank *bank, enum aca_smu_type type,
+ void *data)
+{
+ struct aca_bank_info info;
+ u64 misc0;
+ u32 instlo;
+ int ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ /* NOTE: overwrite info.die_id with xcd id for gfx */
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+ info.die_id = instlo == mmSMNAID_XCD0_MCA_SMU ? 0 : 1;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+static bool gfx_v9_4_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ u32 instlo;
+
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+ switch (instlo) {
+ case mmSMNAID_XCD0_MCA_SMU:
+ case mmSMNAID_XCD1_MCA_SMU:
+ case mmSMNXCD_XCD0_MCA_SMU:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+static const struct aca_bank_ops gfx_v9_4_3_aca_bank_ops = {
+ .aca_bank_parser = gfx_v9_4_3_aca_bank_parser,
+ .aca_bank_is_valid = gfx_v9_4_3_aca_bank_is_valid,
+};
+
+static const struct aca_info gfx_v9_4_3_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK,
+ .bank_ops = &gfx_v9_4_3_aca_bank_ops,
+};
+
+static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev)
+{
+ adev->gfx.funcs = &gfx_v9_4_3_gfx_funcs;
+ adev->gfx.ras = &gfx_v9_4_3_ras;
+
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ adev->gfx.config.gb_addr_config = GOLDEN_GB_ADDR_CONFIG;
+
+ adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ NUM_PIPES);
+
+ adev->gfx.config.max_tile_pipes =
+ adev->gfx.config.gb_addr_config_fields.num_pipes;
+
+ adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ NUM_BANKS);
+ adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ MAX_COMPRESSED_FRAGS);
+ adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ NUM_RB_PER_SE);
+ adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ NUM_SHADER_ENGINES);
+ adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ PIPE_INTERLEAVE_SIZE));
+
+ return 0;
+}
+
+static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+ int xcc_id, int mec, int pipe, int queue)
+{
+ unsigned irq_type;
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+ unsigned int hw_prio;
+ uint32_t xcc_doorbell_start;
+
+ ring = &adev->gfx.compute_ring[xcc_id * adev->gfx.num_compute_rings +
+ ring_id];
+
+ /* mec0 is me1 */
+ ring->xcc_id = xcc_id;
+ ring->me = mec + 1;
+ ring->pipe = pipe;
+ ring->queue = queue;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ xcc_doorbell_start = adev->doorbell_index.mec_ring0 +
+ xcc_id * adev->doorbell_index.xcc_doorbell_range;
+ ring->doorbell_index = (xcc_doorbell_start + ring_id) << 1;
+ ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr +
+ (ring_id + xcc_id * adev->gfx.num_compute_rings) *
+ GFX9_MEC_HPD_SIZE;
+ ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
+ sprintf(ring->name, "comp_%d.%d.%d.%d",
+ ring->xcc_id, ring->me, ring->pipe, ring->queue);
+
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ + ring->pipe;
+ hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
+ AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ /* type-2 packets are deprecated on MEC, use type-3 instead */
+ return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ hw_prio, NULL);
+}
+
+static void gfx_v9_4_3_alloc_ip_dump(struct amdgpu_device *adev)
+{
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3);
+ uint32_t *ptr, num_xcc, inst;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ ptr = kcalloc(reg_count * num_xcc, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
+ adev->gfx.ip_dump_core = NULL;
+ } else {
+ adev->gfx.ip_dump_core = ptr;
+ }
+
+ /* Allocate memory for compute queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3);
+ inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst * num_xcc, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
+ adev->gfx.ip_dump_compute_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_compute_queues = ptr;
+ }
+}
+
+static int gfx_v9_4_3_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ int i, j, k, r, ring_id, xcc_id, num_xcc;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ adev->gfx.cleaner_shader_ptr = gfx_9_4_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_3_cleaner_shader_hex);
+ if (adev->gfx.mec_fw_version >= 153) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ default:
+ adev->gfx.enable_cleaner_shader = false;
+ break;
+ }
+
+ adev->gfx.mec.num_mec = 2;
+ adev->gfx.mec.num_pipe_per_mec = 4;
+ adev->gfx.mec.num_queue_per_pipe = 8;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ /* EOP Event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
+ if (r)
+ return r;
+
+ /* Bad opcode Event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
+ GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR,
+ &adev->gfx.bad_op_irq);
+ if (r)
+ return r;
+
+ /* Privileged reg */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
+ &adev->gfx.priv_reg_irq);
+ if (r)
+ return r;
+
+ /* Privileged inst */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
+ &adev->gfx.priv_inst_irq);
+ if (r)
+ return r;
+
+ adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
+
+ r = adev->gfx.rlc.funcs->init(adev);
+ if (r) {
+ DRM_ERROR("Failed to init rlc BOs!\n");
+ return r;
+ }
+
+ r = gfx_v9_4_3_mec_init(adev);
+ if (r) {
+ DRM_ERROR("Failed to init MEC BOs!\n");
+ return r;
+ }
+
+ /* set up the compute queues - allocate horizontally across pipes */
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ ring_id = 0;
+ for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+ for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec;
+ k++) {
+ if (!amdgpu_gfx_is_mec_queue_enabled(
+ adev, xcc_id, i, k, j))
+ continue;
+
+ r = gfx_v9_4_3_compute_ring_init(adev,
+ ring_id,
+ xcc_id,
+ i, k, j);
+ if (r)
+ return r;
+
+ ring_id++;
+ }
+ }
+ }
+
+ r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, xcc_id);
+ if (r) {
+ DRM_ERROR("Failed to init KIQ BOs!\n");
+ return r;
+ }
+
+ r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
+ if (r)
+ return r;
+
+ /* create MQD for all compute queues as wel as KIQ for SRIOV case */
+ r = amdgpu_gfx_mqd_sw_init(adev,
+ sizeof(struct v9_mqd_allocation), xcc_id);
+ if (r)
+ return r;
+ }
+
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ if ((adev->gfx.mec_fw_version >= 155) &&
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE;
+ }
+ break;
+ case IP_VERSION(9, 5, 0):
+ if ((adev->gfx.mec_fw_version >= 21) &&
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE;
+ }
+ break;
+ default:
+ break;
+ }
+ r = gfx_v9_4_3_gpu_early_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_gfx_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_gfx_sysfs_init(adev);
+ if (r)
+ return r;
+
+ gfx_v9_4_3_alloc_ip_dump(adev);
+
+ return 0;
+}
+
+static int gfx_v9_4_3_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ int i, num_xcc;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < adev->gfx.num_compute_rings * num_xcc; i++)
+ amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
+
+ for (i = 0; i < num_xcc; i++) {
+ amdgpu_gfx_mqd_sw_fini(adev, i);
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[i].ring);
+ amdgpu_gfx_kiq_fini(adev, i);
+ }
+
+ amdgpu_gfx_cleaner_shader_sw_fini(adev);
+
+ gfx_v9_4_3_mec_fini(adev);
+ amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
+ gfx_v9_4_3_free_microcode(adev);
+ amdgpu_gfx_sysfs_fini(adev);
+
+ kfree(adev->gfx.ip_dump_core);
+ kfree(adev->gfx.ip_dump_compute_queues);
+
+ return 0;
+}
+
+#define DEFAULT_SH_MEM_BASES (0x6000)
+static void gfx_v9_4_3_xcc_init_compute_vmid(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ int i;
+ uint32_t sh_mem_config;
+ uint32_t sh_mem_bases;
+ uint32_t data;
+
+ /*
+ * Configure apertures:
+ * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
+ * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
+ * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
+ */
+ sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
+
+ sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ soc15_grbm_select(adev, 0, 0, 0, i, GET_INST(GC, xcc_id));
+ /* CP and shaders */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSH_MEM_CONFIG, sh_mem_config);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSH_MEM_BASES, sh_mem_bases);
+
+ /* Enable trap for each kfd vmid. */
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSPI_GDBG_PER_VMID_CNTL);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSPI_GDBG_PER_VMID_CNTL, data);
+ }
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+
+ /*
+ * Initialize all compute VMIDs to have no GDS, GWS, or OA
+ * access. These should be enabled by FW for target VMIDs.
+ */
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_BASE, 2 * i, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_SIZE, 2 * i, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_GWS_VMID0, i, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_OA_VMID0, i, 0);
+ }
+}
+
+static void gfx_v9_4_3_xcc_init_gds_vmid(struct amdgpu_device *adev, int xcc_id)
+{
+ int vmid;
+
+ /*
+ * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
+ * access. Compute VMIDs should be enabled by FW for target VMIDs,
+ * the driver can enable them for graphics. VMID0 should maintain
+ * access so that HWS firmware can save/restore entries.
+ */
+ for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_BASE, 2 * vmid, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_SIZE, 2 * vmid, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_GWS_VMID0, vmid, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_OA_VMID0, vmid, 0);
+ }
+}
+
+/* For ASICs that needs xnack chain and MEC version supports, set SG_CONFIG1
+ * DISABLE_XNACK_CHECK_IN_RETRY_DISABLE bit and inform KFD to set xnack_chain
+ * bit in SET_RESOURCES
+ */
+static void gfx_v9_4_3_xcc_init_sq(struct amdgpu_device *adev, int xcc_id)
+{
+ uint32_t data;
+
+ if (!(adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN))
+ return;
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_CONFIG1);
+ data = REG_SET_FIELD(data, SQ_CONFIG1, DISABLE_XNACK_CHECK_IN_RETRY_DISABLE, 1);
+ WREG32_SOC15(GC, xcc_id, regSQ_CONFIG1, data);
+}
+
+static void gfx_v9_4_3_xcc_constants_init(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ u32 tmp;
+ int i;
+
+ /* XXX SH_MEM regs */
+ /* where to put LDS, scratch, GPUVM in FSA64 space */
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
+ soc15_grbm_select(adev, 0, 0, 0, i, GET_INST(GC, xcc_id));
+ /* CP and shaders */
+ if (i == 0) {
+ tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+ tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
+ !!adev->gmc.noretry);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id),
+ regSH_MEM_CONFIG, tmp);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id),
+ regSH_MEM_BASES, 0);
+ } else {
+ tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+ tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
+ !!adev->gmc.noretry);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id),
+ regSH_MEM_CONFIG, tmp);
+ tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
+ (adev->gmc.private_aperture_start >>
+ 48));
+ tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
+ (adev->gmc.shared_aperture_start >>
+ 48));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id),
+ regSH_MEM_BASES, tmp);
+ }
+ }
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, 0));
+
+ mutex_unlock(&adev->srbm_mutex);
+
+ gfx_v9_4_3_xcc_init_compute_vmid(adev, xcc_id);
+ gfx_v9_4_3_xcc_init_gds_vmid(adev, xcc_id);
+ gfx_v9_4_3_xcc_init_sq(adev, xcc_id);
+}
+
+static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ gfx_v9_4_3_get_cu_info(adev, &adev->gfx.cu_info);
+ adev->gfx.config.db_debug2 =
+ RREG32_SOC15(GC, GET_INST(GC, 0), regDB_DEBUG2);
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ /* ToDo: GC 9.4.4 */
+ case IP_VERSION(9, 4, 3):
+ if (adev->gfx.mec_fw_version >= 184 &&
+ (amdgpu_sriov_reg_access_sq_config(adev) ||
+ !amdgpu_sriov_vf(adev)))
+ adev->gmc.xnack_flags |= AMDGPU_GMC_XNACK_FLAG_CHAIN;
+ break;
+ case IP_VERSION(9, 5, 0):
+ if (adev->gfx.mec_fw_version >= 23)
+ adev->gmc.xnack_flags |= AMDGPU_GMC_XNACK_FLAG_CHAIN;
+ break;
+ default:
+ break;
+ }
+
+ for (i = 0; i < num_xcc; i++)
+ gfx_v9_4_3_xcc_constants_init(adev, i);
+}
+
+static void
+gfx_v9_4_3_xcc_enable_save_restore_machine(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), RLC_SRM_CNTL, SRM_ENABLE, 1);
+}
+
+static void gfx_v9_4_3_xcc_init_pg(struct amdgpu_device *adev, int xcc_id)
+{
+ /*
+ * Rlc save restore list is workable since v2_1.
+ */
+ gfx_v9_4_3_xcc_enable_save_restore_machine(adev, xcc_id);
+}
+
+static void gfx_v9_4_3_xcc_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPC_PSP_DEBUG);
+ data |= CPC_PSP_DEBUG__UTCL2IUGPAOVERRIDE_MASK;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPC_PSP_DEBUG, data);
+}
+
+static bool gfx_v9_4_3_is_rlc_enabled(struct amdgpu_device *adev)
+{
+ uint32_t rlc_setting;
+
+ /* if RLC is not enabled, do nothing */
+ rlc_setting = RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CNTL);
+ if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
+ return false;
+
+ return true;
+}
+
+static void gfx_v9_4_3_xcc_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
+{
+ uint32_t data;
+ unsigned i;
+
+ data = RLC_SAFE_MODE__CMD_MASK;
+ data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SAFE_MODE, data);
+
+ /* wait for RLC_SAFE_MODE */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
+ break;
+ udelay(1);
+ }
+}
+
+static void gfx_v9_4_3_xcc_unset_safe_mode(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint32_t data;
+
+ data = RLC_SAFE_MODE__CMD_MASK;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SAFE_MODE, data);
+}
+
+static void gfx_v9_4_3_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
+{
+ int xcc_id, num_xcc;
+ struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[GET_INST(GC, xcc_id)];
+ reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regSCRATCH_REG0);
+ reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regSCRATCH_REG1);
+ reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regSCRATCH_REG2);
+ reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regSCRATCH_REG3);
+ reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regGRBM_GFX_CNTL);
+ reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX);
+ reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SPARE_INT);
+ }
+ adev->gfx.rlc.rlcg_reg_access_supported = true;
+}
+
+static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev)
+{
+ /* init spm vmid with 0xf */
+ if (adev->gfx.rlc.funcs->update_spm_vmid)
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
+
+ return 0;
+}
+
+static void gfx_v9_4_3_xcc_wait_for_rlc_serdes(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ u32 i, j, k;
+ u32 mask;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff,
+ xcc_id);
+ for (k = 0; k < adev->usec_timeout; k++) {
+ if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SERDES_CU_MASTER_BUSY) == 0)
+ break;
+ udelay(1);
+ }
+ if (k == adev->usec_timeout) {
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff,
+ 0xffffffff,
+ 0xffffffff, xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
+ i, j);
+ return;
+ }
+ }
+ }
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
+ RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
+ RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
+ RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
+ for (k = 0; k < adev->usec_timeout; k++) {
+ if ((RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
+ break;
+ udelay(1);
+ }
+}
+
+static void gfx_v9_4_3_xcc_enable_gui_idle_interrupt(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ u32 tmp;
+
+ /* These interrupts should be enabled to drive DS clock */
+
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL_RING0);
+
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL_RING0, tmp);
+}
+
+static void gfx_v9_4_3_xcc_rlc_stop(struct amdgpu_device *adev, int xcc_id)
+{
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), RLC_CNTL,
+ RLC_ENABLE_F32, 0);
+ gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, false, xcc_id);
+ gfx_v9_4_3_xcc_wait_for_rlc_serdes(adev, xcc_id);
+}
+
+static void gfx_v9_4_3_rlc_stop(struct amdgpu_device *adev)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++)
+ gfx_v9_4_3_xcc_rlc_stop(adev, i);
+}
+
+static void gfx_v9_4_3_xcc_rlc_reset(struct amdgpu_device *adev, int xcc_id)
+{
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), GRBM_SOFT_RESET,
+ SOFT_RESET_RLC, 1);
+ udelay(50);
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), GRBM_SOFT_RESET,
+ SOFT_RESET_RLC, 0);
+ udelay(50);
+}
+
+static void gfx_v9_4_3_rlc_reset(struct amdgpu_device *adev)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++)
+ gfx_v9_4_3_xcc_rlc_reset(adev, i);
+}
+
+static void gfx_v9_4_3_xcc_rlc_start(struct amdgpu_device *adev, int xcc_id)
+{
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), RLC_CNTL,
+ RLC_ENABLE_F32, 1);
+ udelay(50);
+
+ /* carrizo do enable cp interrupt after cp inited */
+ if (!(adev->flags & AMD_IS_APU)) {
+ gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, true, xcc_id);
+ udelay(50);
+ }
+}
+
+static void gfx_v9_4_3_rlc_start(struct amdgpu_device *adev)
+{
+#ifdef AMDGPU_RLC_DEBUG_RETRY
+ u32 rlc_ucode_ver;
+#endif
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ gfx_v9_4_3_xcc_rlc_start(adev, i);
+#ifdef AMDGPU_RLC_DEBUG_RETRY
+ /* RLC_GPM_GENERAL_6 : RLC Ucode version */
+ rlc_ucode_ver = RREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_GENERAL_6);
+ if (rlc_ucode_ver == 0x108) {
+ dev_info(adev->dev,
+ "Using rlc debug ucode. regRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
+ rlc_ucode_ver, adev->gfx.rlc_fw_version);
+ /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
+ * default is 0x9C4 to create a 100us interval */
+ WREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_TIMER_INT_3, 0x9C4);
+ /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
+ * to disable the page fault retry interrupts, default is
+ * 0x100 (256) */
+ WREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_GENERAL_12, 0x100);
+ }
+#endif
+ }
+}
+
+static int gfx_v9_4_3_xcc_rlc_load_microcode(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ const struct rlc_firmware_header_v2_0 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ if (!adev->gfx.rlc_fw)
+ return -EINVAL;
+
+ hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ amdgpu_ucode_print_rlc_hdr(&hdr->header);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_GPM_UCODE_ADDR,
+ RLCG_UCODE_LOADING_START_ADDRESS);
+ for (i = 0; i < fw_size; i++) {
+ if (amdgpu_emu_mode == 1 && i % 100 == 0) {
+ dev_info(adev->dev, "Write RLC ucode data %u DWs\n", i);
+ msleep(1);
+ }
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
+ }
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
+
+ return 0;
+}
+
+static int gfx_v9_4_3_xcc_rlc_resume(struct amdgpu_device *adev, int xcc_id)
+{
+ int r;
+
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ gfx_v9_4_3_xcc_rlc_stop(adev, xcc_id);
+ /* legacy rlc firmware loading */
+ r = gfx_v9_4_3_xcc_rlc_load_microcode(adev, xcc_id);
+ if (r)
+ return r;
+ gfx_v9_4_3_xcc_rlc_start(adev, xcc_id);
+ }
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
+ /* disable CG */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, 0);
+ gfx_v9_4_3_xcc_init_pg(adev, xcc_id);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
+
+ return 0;
+}
+
+static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev)
+{
+ int r, i, num_xcc;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ r = gfx_v9_4_3_xcc_rlc_resume(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring,
+ unsigned vmid)
+{
+ u32 reg, pre_data, data;
+
+ reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL);
+ if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev))
+ pre_data = RREG32_NO_KIQ(reg);
+ else
+ pre_data = RREG32(reg);
+
+ data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK);
+ data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
+
+ if (pre_data != data) {
+ if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) {
+ WREG32_SOC15_NO_KIQ(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data);
+ } else
+ WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data);
+ }
+}
+
+static const struct soc15_reg_rlcg rlcg_access_gc_9_4_3[] = {
+ {SOC15_REG_ENTRY(GC, 0, regGRBM_GFX_INDEX)},
+ {SOC15_REG_ENTRY(GC, 0, regSQ_IND_INDEX)},
+};
+
+static bool gfx_v9_4_3_check_rlcg_range(struct amdgpu_device *adev,
+ uint32_t offset,
+ struct soc15_reg_rlcg *entries, int arr_size)
+{
+ int i, inst;
+ uint32_t reg;
+
+ if (!entries)
+ return false;
+
+ for (i = 0; i < arr_size; i++) {
+ const struct soc15_reg_rlcg *entry;
+
+ entry = &entries[i];
+ inst = adev->ip_map.logical_to_dev_inst ?
+ adev->ip_map.logical_to_dev_inst(
+ adev, entry->hwip, entry->instance) :
+ entry->instance;
+ reg = adev->reg_offset[entry->hwip][inst][entry->segment] +
+ entry->reg;
+ if (offset == reg)
+ return true;
+ }
+
+ return false;
+}
+
+static bool gfx_v9_4_3_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
+{
+ return gfx_v9_4_3_check_rlcg_range(adev, offset,
+ (void *)rlcg_access_gc_9_4_3,
+ ARRAY_SIZE(rlcg_access_gc_9_4_3));
+}
+
+static void gfx_v9_4_3_xcc_cp_compute_enable(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ if (enable) {
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MEC_CNTL, 0);
+ } else {
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MEC_CNTL,
+ (CP_MEC_CNTL__MEC_INVALIDATE_ICACHE_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE0_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE1_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE2_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE3_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME2_PIPE0_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME2_PIPE1_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_HALT_MASK |
+ CP_MEC_CNTL__MEC_ME2_HALT_MASK));
+ adev->gfx.kiq[xcc_id].ring.sched.ready = false;
+ }
+ udelay(50);
+}
+
+static int gfx_v9_4_3_xcc_cp_compute_load_microcode(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ const struct gfx_firmware_header_v1_0 *mec_hdr;
+ const __le32 *fw_data;
+ unsigned i;
+ u32 tmp;
+ u32 mec_ucode_addr_offset;
+ u32 mec_ucode_data_offset;
+
+ if (!adev->gfx.mec_fw)
+ return -EINVAL;
+
+ gfx_v9_4_3_xcc_cp_compute_enable(adev, false, xcc_id);
+
+ mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+ amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
+
+ fw_data = (const __le32 *)
+ (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
+ tmp = 0;
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL, tmp);
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_LO,
+ adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_HI,
+ upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
+
+ mec_ucode_addr_offset =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_MEC_ME1_UCODE_ADDR);
+ mec_ucode_data_offset =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_MEC_ME1_UCODE_DATA);
+
+ /* MEC1 */
+ WREG32(mec_ucode_addr_offset, mec_hdr->jt_offset);
+ for (i = 0; i < mec_hdr->jt_size; i++)
+ WREG32(mec_ucode_data_offset,
+ le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
+
+ WREG32(mec_ucode_addr_offset, adev->gfx.mec_fw_version);
+ /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
+
+ return 0;
+}
+
+/* KIQ functions */
+static void gfx_v9_4_3_xcc_kiq_setting(struct amdgpu_ring *ring, int xcc_id)
+{
+ uint32_t tmp;
+ struct amdgpu_device *adev = ring->adev;
+
+ /* tell RLC which is KIQ queue */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp | 0x80);
+}
+
+static void gfx_v9_4_3_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+ if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
+ mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
+ mqd->cp_hqd_queue_priority =
+ AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
+ }
+ }
+}
+
+static int gfx_v9_4_3_xcc_mqd_init(struct amdgpu_ring *ring, int xcc_id)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v9_mqd *mqd = ring->mqd_ptr;
+ uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+ uint32_t tmp;
+
+ mqd->header = 0xC0310800;
+ mqd->compute_pipelinestat_enable = 0x00000001;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+ mqd->compute_misc_reserved = 0x00000003;
+
+ mqd->dynamic_cu_mask_addr_lo =
+ lower_32_bits(ring->mqd_gpu_addr
+ + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
+ mqd->dynamic_cu_mask_addr_hi =
+ upper_32_bits(ring->mqd_gpu_addr
+ + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
+
+ eop_base_addr = ring->eop_gpu_addr >> 8;
+ mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
+ mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+ (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
+
+ mqd->cp_hqd_eop_control = tmp;
+
+ /* enable doorbell? */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL);
+
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ if (amdgpu_sriov_multi_vf_mode(adev))
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_MODE, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ }
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* disable the queue if it's active */
+ ring->wptr = 0;
+ mqd->cp_hqd_dequeue_request = 0;
+ mqd->cp_hqd_pq_rptr = 0;
+ mqd->cp_hqd_pq_wptr_lo = 0;
+ mqd->cp_hqd_pq_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
+
+ /* set MQD vmid to 0 */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+ mqd->cp_mqd_control = tmp;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = ring->gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+ (order_base_2(ring->ring_size / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+ ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
+#ifdef __BIG_ENDIAN
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
+#endif
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ mqd->cp_hqd_pq_control = tmp;
+
+ /* set the wb address whether it's enabled or not */
+ wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ ring->wptr = 0;
+ mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR);
+
+ /* set the vmid for the queue */
+ mqd->cp_hqd_vmid = 0;
+
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
+ mqd->cp_hqd_persistent_state = tmp;
+
+ /* set MIN_IB_AVAIL_SIZE */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_IB_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
+ mqd->cp_hqd_ib_control = tmp;
+
+ /* set static priority for a queue/ring */
+ gfx_v9_4_3_mqd_set_priority(ring, mqd);
+ mqd->cp_hqd_quantum = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_QUANTUM);
+
+ /* map_queues packet doesn't need activate the queue,
+ * so only kiq need set this field.
+ */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ mqd->cp_hqd_active = 1;
+
+ return 0;
+}
+
+static int gfx_v9_4_3_xcc_kiq_init_register(struct amdgpu_ring *ring,
+ int xcc_id)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v9_mqd *mqd = ring->mqd_ptr;
+ int j;
+
+ /* disable wptr polling */
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), CP_PQ_WPTR_POLL_CNTL, EN, 0);
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_BASE_ADDR,
+ mqd->cp_hqd_eop_base_addr_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_BASE_ADDR_HI,
+ mqd->cp_hqd_eop_base_addr_hi);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_CONTROL,
+ mqd->cp_hqd_eop_control);
+
+ /* enable doorbell? */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* disable the queue if it's active */
+ if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) {
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1);
+ for (j = 0; j < adev->usec_timeout; j++) {
+ if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST,
+ mqd->cp_hqd_dequeue_request);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR,
+ mqd->cp_hqd_pq_rptr);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+ }
+
+ /* set the pointer to the MQD */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR,
+ mqd->cp_mqd_base_addr_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR_HI,
+ mqd->cp_mqd_base_addr_hi);
+
+ /* set MQD vmid to 0 */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL,
+ mqd->cp_mqd_control);
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE,
+ mqd->cp_hqd_pq_base_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE_HI,
+ mqd->cp_hqd_pq_base_hi);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL,
+ mqd->cp_hqd_pq_control);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR,
+ mqd->cp_hqd_pq_rptr_report_addr_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR,
+ mqd->cp_hqd_pq_wptr_poll_addr_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+ /* enable the doorbell if requested */
+ if (ring->use_doorbell) {
+ WREG32_SOC15(
+ GC, GET_INST(GC, xcc_id),
+ regCP_MEC_DOORBELL_RANGE_LOWER,
+ ((adev->doorbell_index.kiq +
+ xcc_id * adev->doorbell_index.xcc_doorbell_range) *
+ 2) << 2);
+ WREG32_SOC15(
+ GC, GET_INST(GC, xcc_id),
+ regCP_MEC_DOORBELL_RANGE_UPPER,
+ ((adev->doorbell_index.userqueue_end +
+ xcc_id * adev->doorbell_index.xcc_doorbell_range) *
+ 2) << 2);
+ }
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+
+ /* set the vmid for the queue */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID, mqd->cp_hqd_vmid);
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE,
+ mqd->cp_hqd_persistent_state);
+
+ /* activate the queue */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE,
+ mqd->cp_hqd_active);
+
+ if (ring->use_doorbell)
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), CP_PQ_STATUS, DOORBELL_ENABLE, 1);
+
+ return 0;
+}
+
+static int gfx_v9_4_3_xcc_q_fini_register(struct amdgpu_ring *ring,
+ int xcc_id)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int j;
+
+ /* disable the queue if it's active */
+ if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) {
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1);
+
+ for (j = 0; j < adev->usec_timeout; j++) {
+ if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+
+ if (j == AMDGPU_MAX_USEC_TIMEOUT) {
+ DRM_DEBUG("%s dequeue request failed.\n", ring->name);
+
+ /* Manual disable if dequeue request times out */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, 0);
+ }
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST,
+ 0);
+ }
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_IQ_TIMER, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_IB_CONTROL, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE, CP_HQD_PERSISTENT_STATE_DEFAULT);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO, 0);
+
+ return 0;
+}
+
+static int gfx_v9_4_3_xcc_kiq_init_queue(struct amdgpu_ring *ring, int xcc_id)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v9_mqd *mqd = ring->mqd_ptr;
+ struct v9_mqd *tmp_mqd;
+
+ gfx_v9_4_3_xcc_kiq_setting(ring, xcc_id);
+
+ /* GPU could be in bad state during probe, driver trigger the reset
+ * after load the SMU, in this case , the mqd is not be initialized.
+ * driver need to re-init the mqd.
+ * check mqd->cp_hqd_pq_control since this value should not be 0
+ */
+ tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[xcc_id].mqd_backup;
+ if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control) {
+ /* for GPU_RESET case , reset MQD to a clean status */
+ if (adev->gfx.kiq[xcc_id].mqd_backup)
+ memcpy(mqd, adev->gfx.kiq[xcc_id].mqd_backup, sizeof(struct v9_mqd_allocation));
+
+ /* reset ring buffer */
+ ring->wptr = 0;
+ amdgpu_ring_clear_ring(ring);
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id));
+ gfx_v9_4_3_xcc_kiq_init_register(ring, xcc_id);
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+ } else {
+ memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
+ ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
+ ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
+ mutex_lock(&adev->srbm_mutex);
+ if (amdgpu_sriov_vf(adev) && adev->in_suspend)
+ amdgpu_ring_clear_ring(ring);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id));
+ gfx_v9_4_3_xcc_mqd_init(ring, xcc_id);
+ gfx_v9_4_3_xcc_kiq_init_register(ring, xcc_id);
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.kiq[xcc_id].mqd_backup)
+ memcpy(adev->gfx.kiq[xcc_id].mqd_backup, mqd, sizeof(struct v9_mqd_allocation));
+ }
+
+ return 0;
+}
+
+static void gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id,
+ bool restore)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v9_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = ring - &adev->gfx.compute_ring[0];
+ struct v9_mqd *tmp_mqd;
+
+ /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
+ * is not be initialized before
+ */
+ tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
+
+ if (!restore && (!tmp_mqd->cp_hqd_pq_control ||
+ (!amdgpu_in_reset(adev) && !adev->in_suspend))) {
+ memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
+ ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
+ ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id));
+ gfx_v9_4_3_xcc_mqd_init(ring, xcc_id);
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
+ } else {
+ /* restore MQD to a clean status */
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
+ /* reset ring buffer */
+ ring->wptr = 0;
+ atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
+ amdgpu_ring_clear_ring(ring);
+ }
+}
+
+static int gfx_v9_4_3_xcc_kcq_fini_register(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_ring *ring;
+ int j;
+
+ for (j = 0; j < adev->gfx.num_compute_rings; j++) {
+ ring = &adev->gfx.compute_ring[j + xcc_id * adev->gfx.num_compute_rings];
+ if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, ring->me,
+ ring->pipe,
+ ring->queue, 0, GET_INST(GC, xcc_id));
+ gfx_v9_4_3_xcc_q_fini_register(ring, xcc_id);
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_xcc_kiq_resume(struct amdgpu_device *adev, int xcc_id)
+{
+ gfx_v9_4_3_xcc_kiq_init_queue(&adev->gfx.kiq[xcc_id].ring, xcc_id);
+ return 0;
+}
+
+static int gfx_v9_4_3_xcc_kcq_resume(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_ring *ring;
+ int i;
+
+ gfx_v9_4_3_xcc_cp_compute_enable(adev, true, xcc_id);
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i + xcc_id *
+ adev->gfx.num_compute_rings];
+
+ gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id, false);
+ }
+
+ return amdgpu_gfx_enable_kcq(adev, xcc_id);
+}
+
+static int gfx_v9_4_3_xcc_cp_resume(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_ring *ring;
+ int r, j;
+
+ gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, false, xcc_id);
+
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ gfx_v9_4_3_xcc_disable_gpa_mode(adev, xcc_id);
+
+ r = gfx_v9_4_3_xcc_cp_compute_load_microcode(adev, xcc_id);
+ if (r)
+ return r;
+ } else {
+ gfx_v9_4_3_xcc_cp_compute_enable(adev, false, xcc_id);
+ }
+
+ r = gfx_v9_4_3_xcc_kiq_resume(adev, xcc_id);
+ if (r)
+ return r;
+
+ r = gfx_v9_4_3_xcc_kcq_resume(adev, xcc_id);
+ if (r)
+ return r;
+
+ for (j = 0; j < adev->gfx.num_compute_rings; j++) {
+ ring = &adev->gfx.compute_ring
+ [j + xcc_id * adev->gfx.num_compute_rings];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, true, xcc_id);
+
+ return 0;
+}
+
+static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev)
+{
+ int r = 0, i, num_xcc, num_xcp, num_xcc_per_xcp;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ if (amdgpu_sriov_vf(adev)) {
+ enum amdgpu_gfx_partition mode;
+
+ mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
+ AMDGPU_XCP_FL_NONE);
+ if (mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
+ return -EINVAL;
+ num_xcc_per_xcp = gfx_v9_4_3_get_xccs_per_xcp(adev);
+ adev->gfx.num_xcc_per_xcp = num_xcc_per_xcp;
+ num_xcp = num_xcc / num_xcc_per_xcp;
+ r = amdgpu_xcp_init(adev->xcp_mgr, num_xcp, mode);
+
+ } else {
+ if (adev->in_suspend)
+ amdgpu_xcp_restore_partition_mode(adev->xcp_mgr);
+ else if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
+ AMDGPU_XCP_FL_NONE) ==
+ AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
+ r = amdgpu_xcp_switch_partition_mode(
+ adev->xcp_mgr, amdgpu_user_partt_mode);
+ }
+ if (r)
+ return r;
+
+ for (i = 0; i < num_xcc; i++) {
+ r = gfx_v9_4_3_xcc_cp_resume(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v9_4_3_xcc_fini(struct amdgpu_device *adev, int xcc_id)
+{
+ if (amdgpu_gfx_disable_kcq(adev, xcc_id))
+ DRM_ERROR("XCD %d KCQ disable failed\n", xcc_id);
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* must disable polling for SRIOV when hw finished, otherwise
+ * CPC engine may still keep fetching WB address which is already
+ * invalid after sw finished and trigger DMAR reading error in
+ * hypervisor side.
+ */
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), CP_PQ_WPTR_POLL_CNTL, EN, 0);
+ return;
+ }
+
+ /* Use deinitialize sequence from CAIL when unbinding device
+ * from driver, otherwise KIQ is hanging when binding back
+ */
+ if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, adev->gfx.kiq[xcc_id].ring.me,
+ adev->gfx.kiq[xcc_id].ring.pipe,
+ adev->gfx.kiq[xcc_id].ring.queue, 0,
+ GET_INST(GC, xcc_id));
+ gfx_v9_4_3_xcc_q_fini_register(&adev->gfx.kiq[xcc_id].ring,
+ xcc_id);
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+ }
+
+ gfx_v9_4_3_xcc_kcq_fini_register(adev, xcc_id);
+ gfx_v9_4_3_xcc_cp_compute_enable(adev, false, xcc_id);
+}
+
+static int gfx_v9_4_3_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
+ adev->gfx.cleaner_shader_ptr);
+
+ if (!amdgpu_sriov_vf(adev))
+ gfx_v9_4_3_init_golden_registers(adev);
+
+ gfx_v9_4_3_constants_init(adev);
+
+ r = adev->gfx.rlc.funcs->resume(adev);
+ if (r)
+ return r;
+
+ r = gfx_v9_4_3_cp_resume(adev);
+ if (r)
+ return r;
+
+ return r;
+}
+
+static int gfx_v9_4_3_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, num_xcc;
+
+ amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ gfx_v9_4_3_xcc_fini(adev, i);
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return gfx_v9_4_3_hw_fini(ip_block);
+}
+
+static int gfx_v9_4_3_resume(struct amdgpu_ip_block *ip_block)
+{
+ return gfx_v9_4_3_hw_init(ip_block);
+}
+
+static bool gfx_v9_4_3_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ if (REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, i), regGRBM_STATUS),
+ GRBM_STATUS, GUI_ACTIVE))
+ return false;
+ }
+ return true;
+}
+
+static int gfx_v9_4_3_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ unsigned i;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (gfx_v9_4_3_is_idle(ip_block))
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int gfx_v9_4_3_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ u32 grbm_soft_reset = 0;
+ u32 tmp;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /* GRBM_STATUS */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_STATUS);
+ if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
+ GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
+ GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
+ GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
+ GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
+ GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
+ }
+
+ if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
+ }
+
+ /* GRBM_STATUS2 */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_STATUS2);
+ if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
+
+
+ if (grbm_soft_reset) {
+ /* stop the rlc */
+ adev->gfx.rlc.funcs->stop(adev);
+
+ /* Disable MEC parsing/prefetching */
+ gfx_v9_4_3_xcc_cp_compute_enable(adev, false, 0);
+
+ tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET);
+ tmp |= grbm_soft_reset;
+ dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~grbm_soft_reset;
+ WREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+ }
+ return 0;
+}
+
+static void gfx_v9_4_3_ring_emit_gds_switch(struct amdgpu_ring *ring,
+ uint32_t vmid,
+ uint32_t gds_base, uint32_t gds_size,
+ uint32_t gws_base, uint32_t gws_size,
+ uint32_t oa_base, uint32_t oa_size)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* GDS Base */
+ gfx_v9_4_3_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regGDS_VMID0_BASE) + 2 * vmid,
+ gds_base);
+
+ /* GDS Size */
+ gfx_v9_4_3_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regGDS_VMID0_SIZE) + 2 * vmid,
+ gds_size);
+
+ /* GWS */
+ gfx_v9_4_3_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regGDS_GWS_VMID0) + vmid,
+ gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
+
+ /* OA */
+ gfx_v9_4_3_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regGDS_OA_VMID0) + vmid,
+ (1 << (oa_size + oa_base)) - (1 << oa_base));
+}
+
+static int gfx_v9_4_3_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+ AMDGPU_MAX_COMPUTE_RINGS);
+ gfx_v9_4_3_set_kiq_pm4_funcs(adev);
+ gfx_v9_4_3_set_ring_funcs(adev);
+ gfx_v9_4_3_set_irq_funcs(adev);
+ gfx_v9_4_3_set_gds_init(adev);
+ gfx_v9_4_3_set_rlc_funcs(adev);
+
+ /* init rlcg reg access ctrl */
+ gfx_v9_4_3_init_rlcg_reg_access_ctrl(adev);
+
+ return gfx_v9_4_3_init_microcode(adev);
+}
+
+static int gfx_v9_4_3_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
+ if (r)
+ return r;
+
+ if (adev->gfx.ras &&
+ adev->gfx.ras->enable_watchdog_timer)
+ adev->gfx.ras->enable_watchdog_timer(adev);
+
+ return 0;
+}
+
+static void gfx_v9_4_3_xcc_update_sram_fgcg(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
+ return;
+
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_CGTT_MGCG_OVERRIDE, data);
+
+}
+
+static void gfx_v9_4_3_xcc_update_repeater_fgcg(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
+ return;
+
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REP_FGCG_OVERRIDE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REP_FGCG_OVERRIDE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static void
+gfx_v9_4_3_xcc_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ uint32_t data, def;
+
+ /* It is disabled by HW by default */
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
+ /* 1 - RLC_CGTT_MGCG_OVERRIDE */
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE);
+
+ data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
+
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* MGLS is a global flag to control all MGLS in GFX */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
+ /* 2 - RLC memory Light sleep */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_MEM_SLP_CNTL);
+ data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_MEM_SLP_CNTL, data);
+ }
+ /* 3 - CP memory Light sleep */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEM_SLP_CNTL);
+ data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEM_SLP_CNTL, data);
+ }
+ }
+ } else {
+ /* 1 - MGCG_OVERRIDE */
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE);
+
+ data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
+
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* 2 - disable MGLS in RLC */
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_MEM_SLP_CNTL);
+ if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
+ data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_MEM_SLP_CNTL, data);
+ }
+
+ /* 3 - disable MGLS in CP */
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEM_SLP_CNTL);
+ if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
+ data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEM_SLP_CNTL, data);
+ }
+ }
+
+}
+
+static void
+gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ uint32_t def, data;
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
+
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE);
+ /* unset CGCG override */
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+ /* update CGCG and CGLS override bits */
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* CGCG Hysteresis: 400us */
+ def = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL);
+
+ data = (0x2710
+ << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, data);
+
+ /* set IDLE_POLL_COUNT(0x33450100)*/
+ def = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL);
+ data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
+ (0x3345 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL, data);
+ } else {
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL);
+ /* reset CGCG/CGLS bits */
+ data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
+ /* disable cgcg and cgls in FSM */
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, data);
+ }
+
+}
+
+static int gfx_v9_4_3_xcc_update_gfx_clock_gating(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
+
+ if (enable) {
+ /* FGCG */
+ gfx_v9_4_3_xcc_update_sram_fgcg(adev, enable, xcc_id);
+ gfx_v9_4_3_xcc_update_repeater_fgcg(adev, enable, xcc_id);
+
+ /* CGCG/CGLS should be enabled after MGCG/MGLS
+ * === MGCG + MGLS ===
+ */
+ gfx_v9_4_3_xcc_update_medium_grain_clock_gating(adev, enable,
+ xcc_id);
+ /* === CGCG + CGLS === */
+ gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(adev, enable,
+ xcc_id);
+ } else {
+ /* CGCG/CGLS should be disabled before MGCG/MGLS
+ * === CGCG + CGLS ===
+ */
+ gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(adev, enable,
+ xcc_id);
+ /* === MGCG + MGLS === */
+ gfx_v9_4_3_xcc_update_medium_grain_clock_gating(adev, enable,
+ xcc_id);
+
+ /* FGCG */
+ gfx_v9_4_3_xcc_update_sram_fgcg(adev, enable, xcc_id);
+ gfx_v9_4_3_xcc_update_repeater_fgcg(adev, enable, xcc_id);
+ }
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
+
+ return 0;
+}
+
+static const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs = {
+ .is_rlc_enabled = gfx_v9_4_3_is_rlc_enabled,
+ .set_safe_mode = gfx_v9_4_3_xcc_set_safe_mode,
+ .unset_safe_mode = gfx_v9_4_3_xcc_unset_safe_mode,
+ .init = gfx_v9_4_3_rlc_init,
+ .resume = gfx_v9_4_3_rlc_resume,
+ .stop = gfx_v9_4_3_rlc_stop,
+ .reset = gfx_v9_4_3_rlc_reset,
+ .start = gfx_v9_4_3_rlc_start,
+ .update_spm_vmid = gfx_v9_4_3_update_spm_vmid,
+ .is_rlcg_access_range = gfx_v9_4_3_is_rlcg_access_range,
+};
+
+static int gfx_v9_4_3_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static int gfx_v9_4_3_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, num_xcc;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++)
+ gfx_v9_4_3_xcc_update_gfx_clock_gating(
+ adev, state == AMD_CG_STATE_GATE, i);
+
+ return 0;
+}
+
+static void gfx_v9_4_3_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ /* AMD_CG_SUPPORT_GFX_MGCG */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_CGTT_MGCG_OVERRIDE));
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_MGCG;
+
+ /* AMD_CG_SUPPORT_GFX_CGCG */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_CGCG_CGLS_CTRL));
+ if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGCG;
+
+ /* AMD_CG_SUPPORT_GFX_CGLS */
+ if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGLS;
+
+ /* AMD_CG_SUPPORT_GFX_RLC_LS */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_MEM_SLP_CNTL));
+ if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
+
+ /* AMD_CG_SUPPORT_GFX_CP_LS */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regCP_MEM_SLP_CNTL));
+ if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
+}
+
+static void gfx_v9_4_3_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 ref_and_mask, reg_mem_engine;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+ switch (ring->me) {
+ case 1:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
+ break;
+ case 2:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
+ break;
+ default:
+ return;
+ }
+ reg_mem_engine = 0;
+ } else {
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
+ reg_mem_engine = 1; /* pfp */
+ }
+
+ gfx_v9_4_3_wait_reg_mem(ring, reg_mem_engine, 0, 1,
+ adev->nbio.funcs->get_hdp_flush_req_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_done_offset(adev),
+ ref_and_mask, ref_and_mask, 0x20);
+}
+
+static void gfx_v9_4_3_ring_emit_ib_compute(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+
+ /* Currently, there is a high possibility to get wave ID mismatch
+ * between ME and GDS, leading to a hw deadlock, because ME generates
+ * different wave IDs than the GDS expects. This situation happens
+ * randomly when at least 5 compute pipes use GDS ordered append.
+ * The wave IDs generated by ME are also wrong after suspend/resume.
+ * Those are probably bugs somewhere else in the kernel driver.
+ *
+ * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+ * GDS to 0 for this ring (me/pipe).
+ */
+ if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+ amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
+ amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v9_4_3_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+ bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+
+ /* RELEASE_MEM - flush caches, send int */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
+ amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
+ EOP_TC_NC_ACTION_EN) :
+ (EOP_TCL1_ACTION_EN |
+ EOP_TC_ACTION_EN |
+ EOP_TC_WB_ACTION_EN |
+ EOP_TC_MD_ACTION_EN)) |
+ EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ EVENT_INDEX(5)));
+ amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
+
+ /*
+ * the address should be Qword aligned if 64bit write, Dword
+ * aligned if only send 32bit data low (discard data high)
+ */
+ if (write64bit)
+ BUG_ON(addr & 0x7);
+ else
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ amdgpu_ring_write(ring, 0);
+}
+
+static void gfx_v9_4_3_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ gfx_v9_4_3_wait_reg_mem(ring, usepfp, 1, 0,
+ lower_32_bits(addr), upper_32_bits(addr),
+ seq, 0xffffffff, 4);
+}
+
+static void gfx_v9_4_3_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+}
+
+static u64 gfx_v9_4_3_ring_get_rptr_compute(struct amdgpu_ring *ring)
+{
+ return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
+}
+
+static u64 gfx_v9_4_3_ring_get_wptr_compute(struct amdgpu_ring *ring)
+{
+ u64 wptr;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell)
+ wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
+ else
+ BUG();
+ return wptr;
+}
+
+static void gfx_v9_4_3_ring_set_wptr_compute(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ BUG(); /* only DOORBELL method supported on gfx9 now */
+ }
+}
+
+static void gfx_v9_4_3_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned int flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* we only allocate 32bit for each seq wb address */
+ BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ /* write fence seq to the "addr" */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ if (flags & AMDGPU_FENCE_FLAG_INT) {
+ /* set register to trigger INT */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
+ amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regCPC_INT_STATUS));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
+ }
+}
+
+static void gfx_v9_4_3_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t reg_val_offs)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ reg = gfx_v9_4_3_normalize_xcc_reg_offset(reg);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
+ amdgpu_ring_write(ring, 0 | /* src: register*/
+ (5 << 8) | /* dst: memory */
+ (1 << 20)); /* write confirm */
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+ amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+}
+
+static void gfx_v9_4_3_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val)
+{
+ uint32_t cmd = 0;
+
+ reg = gfx_v9_4_3_normalize_xcc_reg_offset(reg);
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
+ break;
+ case AMDGPU_RING_TYPE_KIQ:
+ cmd = (1 << 16); /* no inc addr */
+ break;
+ default:
+ cmd = WR_CONFIRM;
+ break;
+ }
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, cmd);
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v9_4_3_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ gfx_v9_4_3_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
+}
+
+static void gfx_v9_4_3_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
+ ref, mask);
+}
+
+static void gfx_v9_4_3_ring_soft_recovery(struct amdgpu_ring *ring,
+ unsigned vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t value = 0;
+
+ value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
+ value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
+ value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
+ value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, ring->xcc_id);
+ WREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regSQ_CMD, value);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, ring->xcc_id);
+}
+
+static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ struct amdgpu_device *adev, int me, int pipe,
+ enum amdgpu_interrupt_state state, int xcc_id)
+{
+ u32 mec_int_cntl, mec_int_cntl_reg;
+
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+
+ if (me == 1) {
+ switch (pipe) {
+ case 0:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE0_INT_CNTL);
+ break;
+ case 1:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE1_INT_CNTL);
+ break;
+ case 2:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE2_INT_CNTL);
+ break;
+ case 3:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE3_INT_CNTL);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+ } else {
+ DRM_DEBUG("invalid me %d\n", me);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 0);
+ WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 1);
+ WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id);
+ break;
+ default:
+ break;
+ }
+}
+
+static u32 gfx_v9_4_3_get_cpc_int_cntl(struct amdgpu_device *adev,
+ int xcc_id, int me, int pipe)
+{
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+ if (me != 1)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE0_INT_CNTL);
+ case 1:
+ return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE1_INT_CNTL);
+ case 2:
+ return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE2_INT_CNTL);
+ case 3:
+ return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE3_INT_CNTL);
+ default:
+ return 0;
+ }
+}
+
+static int gfx_v9_4_3_set_priv_reg_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 mec_int_cntl_reg, mec_int_cntl;
+ int i, j, k, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < num_xcc; i++) {
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ for (j = 0; j < adev->gfx.mec.num_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ /* MECs start at 1 */
+ mec_int_cntl_reg = gfx_v9_4_3_get_cpc_int_cntl(adev, i, j + 1, k);
+
+ if (mec_int_cntl_reg) {
+ mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, i);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ?
+ 1 : 0);
+ WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, i);
+ }
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_set_bad_op_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 mec_int_cntl_reg, mec_int_cntl;
+ int i, j, k, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < num_xcc; i++) {
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ for (j = 0; j < adev->gfx.mec.num_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ /* MECs start at 1 */
+ mec_int_cntl_reg = gfx_v9_4_3_get_cpc_int_cntl(adev, i, j + 1, k);
+
+ if (mec_int_cntl_reg) {
+ mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, i);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ?
+ 1 : 0);
+ WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, i);
+ }
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_set_priv_inst_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < num_xcc; i++)
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0,
+ PRIV_INSTR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_set_eop_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ switch (type) {
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 1, 0, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 1, 1, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 1, 2, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 1, 3, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 2, 0, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 2, 1, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 2, 2, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 2, 3, state, i);
+ break;
+ default:
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_eop_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ int i, xcc_id;
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+
+ DRM_DEBUG("IH: CP EOP\n");
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ xcc_id = gfx_v9_4_3_ih_to_xcc_inst(adev, entry->node_id);
+
+ if (xcc_id == -EINVAL)
+ return -EINVAL;
+
+ switch (me_id) {
+ case 0:
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring
+ [i +
+ xcc_id * adev->gfx.num_compute_rings];
+ /* Per-queue interrupt is supported for MEC starting from VI.
+ * The interrupt can only be enabled/disabled per pipe instead of per queue.
+ */
+
+ if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
+ amdgpu_fence_process(ring);
+ }
+ break;
+ }
+ return 0;
+}
+
+static void gfx_v9_4_3_fault(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+ int i, xcc_id;
+
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ xcc_id = gfx_v9_4_3_ih_to_xcc_inst(adev, entry->node_id);
+
+ if (xcc_id == -EINVAL)
+ return;
+
+ switch (me_id) {
+ case 0:
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring
+ [i +
+ xcc_id * adev->gfx.num_compute_rings];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ }
+}
+
+static int gfx_v9_4_3_priv_reg_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal register access in command stream\n");
+ gfx_v9_4_3_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v9_4_3_bad_op_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal opcode in command stream\n");
+ gfx_v9_4_3_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v9_4_3_priv_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal instruction in command stream\n");
+ gfx_v9_4_3_fault(adev, entry);
+ return 0;
+}
+
+static void gfx_v9_4_3_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ const unsigned int cp_coher_cntl =
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
+
+ /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
+ amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
+ amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
+ amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
+ amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
+}
+
+static void gfx_v9_4_3_emit_wave_limit_cs(struct amdgpu_ring *ring,
+ uint32_t pipe, bool enable)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t val;
+ uint32_t wcl_cs_reg;
+
+ /* regSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
+ val = enable ? 0x1 : 0x7f;
+
+ switch (pipe) {
+ case 0:
+ wcl_cs_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_CS0);
+ break;
+ case 1:
+ wcl_cs_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_CS1);
+ break;
+ case 2:
+ wcl_cs_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_CS2);
+ break;
+ case 3:
+ wcl_cs_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_CS3);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+
+ amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
+
+}
+static void gfx_v9_4_3_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t val;
+ int i;
+
+ /* regSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
+ * number of gfx waves. Setting 5 bit will make sure gfx only gets
+ * around 25% of gpu resources.
+ */
+ val = enable ? 0x1f : 0x07ffffff;
+ amdgpu_ring_emit_wreg(ring,
+ SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_GFX),
+ val);
+
+ /* Restrict waves for normal/low priority compute queues as well
+ * to get best QoS for high priority compute jobs.
+ *
+ * amdgpu controls only 1st ME(0-3 CS pipes).
+ */
+ for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+ if (i != ring->pipe)
+ gfx_v9_4_3_emit_wave_limit_cs(ring, i, enable);
+
+ }
+}
+
+static int gfx_v9_4_3_unmap_done(struct amdgpu_device *adev, uint32_t me,
+ uint32_t pipe, uint32_t queue,
+ uint32_t xcc_id)
+{
+ int i, r;
+ /* make sure dequeue is complete*/
+ gfx_v9_4_3_xcc_set_safe_mode(adev, xcc_id);
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, me, pipe, queue, 0, GET_INST(GC, xcc_id));
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+ else
+ r = 0;
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+ gfx_v9_4_3_xcc_unset_safe_mode(adev, xcc_id);
+
+ return r;
+
+}
+
+static bool gfx_v9_4_3_pipe_reset_support(struct amdgpu_device *adev)
+{
+ if (!!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_PIPE))
+ return true;
+ else
+ dev_warn_once(adev->dev, "Please use the latest MEC version to see whether support pipe reset\n");
+
+ return false;
+}
+
+static int gfx_v9_4_3_reset_hw_pipe(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reset_pipe, clean_pipe;
+ int r;
+
+ if (!gfx_v9_4_3_pipe_reset_support(adev))
+ return -EINVAL;
+
+ gfx_v9_4_3_xcc_set_safe_mode(adev, ring->xcc_id);
+ mutex_lock(&adev->srbm_mutex);
+
+ reset_pipe = RREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regCP_MEC_CNTL);
+ clean_pipe = reset_pipe;
+
+ if (ring->me == 1) {
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE0_RESET, 1);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE1_RESET, 1);
+ break;
+ case 2:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE2_RESET, 1);
+ break;
+ case 3:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE3_RESET, 1);
+ break;
+ default:
+ break;
+ }
+ } else {
+ if (ring->pipe)
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE1_RESET, 1);
+ else
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE0_RESET, 1);
+ }
+
+ WREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regCP_MEC_CNTL, reset_pipe);
+ WREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regCP_MEC_CNTL, clean_pipe);
+ mutex_unlock(&adev->srbm_mutex);
+ gfx_v9_4_3_xcc_unset_safe_mode(adev, ring->xcc_id);
+
+ r = gfx_v9_4_3_unmap_done(adev, ring->me, ring->pipe, ring->queue, ring->xcc_id);
+ return r;
+}
+
+static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[ring->xcc_id];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ int reset_mode = AMDGPU_RESET_TYPE_PER_QUEUE;
+ unsigned long flags;
+ int r;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
+ 0, 0);
+ amdgpu_ring_commit(kiq_ring);
+
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ r = amdgpu_ring_test_ring(kiq_ring);
+ if (r) {
+ dev_err(adev->dev, "kiq ring test failed after ring: %s queue reset\n",
+ ring->name);
+ goto pipe_reset;
+ }
+
+ r = gfx_v9_4_3_unmap_done(adev, ring->me, ring->pipe, ring->queue, ring->xcc_id);
+ if (r)
+ dev_err(adev->dev, "fail to wait on hqd deactive and will try pipe reset\n");
+
+pipe_reset:
+ if (r) {
+ if (!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_PIPE))
+ return -EOPNOTSUPP;
+ r = gfx_v9_4_3_reset_hw_pipe(ring);
+ reset_mode = AMDGPU_RESET_TYPE_PER_PIPE;
+ dev_info(adev->dev, "ring: %s pipe reset :%s\n", ring->name,
+ r ? "failed" : "successfully");
+ if (r)
+ return r;
+ }
+
+ gfx_v9_4_3_xcc_kcq_init_queue(ring, ring->xcc_id, true);
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
+ if (r) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+ kiq->pmf->kiq_map_queues(kiq_ring, ring);
+ amdgpu_ring_commit(kiq_ring);
+ r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ if (r) {
+ if (reset_mode == AMDGPU_RESET_TYPE_PER_QUEUE)
+ goto pipe_reset;
+
+ dev_err(adev->dev, "fail to remap queue\n");
+ return r;
+ }
+
+ if (reset_mode == AMDGPU_RESET_TYPE_PER_QUEUE) {
+ r = amdgpu_ring_test_ring(ring);
+ if (r)
+ goto pipe_reset;
+ }
+
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+enum amdgpu_gfx_cp_ras_mem_id {
+ AMDGPU_GFX_CP_MEM1 = 1,
+ AMDGPU_GFX_CP_MEM2,
+ AMDGPU_GFX_CP_MEM3,
+ AMDGPU_GFX_CP_MEM4,
+ AMDGPU_GFX_CP_MEM5,
+};
+
+enum amdgpu_gfx_gcea_ras_mem_id {
+ AMDGPU_GFX_GCEA_IOWR_CMDMEM = 4,
+ AMDGPU_GFX_GCEA_IORD_CMDMEM,
+ AMDGPU_GFX_GCEA_GMIWR_CMDMEM,
+ AMDGPU_GFX_GCEA_GMIRD_CMDMEM,
+ AMDGPU_GFX_GCEA_DRAMWR_CMDMEM,
+ AMDGPU_GFX_GCEA_DRAMRD_CMDMEM,
+ AMDGPU_GFX_GCEA_MAM_DMEM0,
+ AMDGPU_GFX_GCEA_MAM_DMEM1,
+ AMDGPU_GFX_GCEA_MAM_DMEM2,
+ AMDGPU_GFX_GCEA_MAM_DMEM3,
+ AMDGPU_GFX_GCEA_MAM_AMEM0,
+ AMDGPU_GFX_GCEA_MAM_AMEM1,
+ AMDGPU_GFX_GCEA_MAM_AMEM2,
+ AMDGPU_GFX_GCEA_MAM_AMEM3,
+ AMDGPU_GFX_GCEA_MAM_AFLUSH_BUFFER,
+ AMDGPU_GFX_GCEA_WRET_TAGMEM,
+ AMDGPU_GFX_GCEA_RRET_TAGMEM,
+ AMDGPU_GFX_GCEA_IOWR_DATAMEM,
+ AMDGPU_GFX_GCEA_GMIWR_DATAMEM,
+ AMDGPU_GFX_GCEA_DRAM_DATAMEM,
+};
+
+enum amdgpu_gfx_gc_cane_ras_mem_id {
+ AMDGPU_GFX_GC_CANE_MEM0 = 0,
+};
+
+enum amdgpu_gfx_gcutcl2_ras_mem_id {
+ AMDGPU_GFX_GCUTCL2_MEM2P512X95 = 160,
+};
+
+enum amdgpu_gfx_gds_ras_mem_id {
+ AMDGPU_GFX_GDS_MEM0 = 0,
+};
+
+enum amdgpu_gfx_lds_ras_mem_id {
+ AMDGPU_GFX_LDS_BANK0 = 0,
+ AMDGPU_GFX_LDS_BANK1,
+ AMDGPU_GFX_LDS_BANK2,
+ AMDGPU_GFX_LDS_BANK3,
+ AMDGPU_GFX_LDS_BANK4,
+ AMDGPU_GFX_LDS_BANK5,
+ AMDGPU_GFX_LDS_BANK6,
+ AMDGPU_GFX_LDS_BANK7,
+ AMDGPU_GFX_LDS_BANK8,
+ AMDGPU_GFX_LDS_BANK9,
+ AMDGPU_GFX_LDS_BANK10,
+ AMDGPU_GFX_LDS_BANK11,
+ AMDGPU_GFX_LDS_BANK12,
+ AMDGPU_GFX_LDS_BANK13,
+ AMDGPU_GFX_LDS_BANK14,
+ AMDGPU_GFX_LDS_BANK15,
+ AMDGPU_GFX_LDS_BANK16,
+ AMDGPU_GFX_LDS_BANK17,
+ AMDGPU_GFX_LDS_BANK18,
+ AMDGPU_GFX_LDS_BANK19,
+ AMDGPU_GFX_LDS_BANK20,
+ AMDGPU_GFX_LDS_BANK21,
+ AMDGPU_GFX_LDS_BANK22,
+ AMDGPU_GFX_LDS_BANK23,
+ AMDGPU_GFX_LDS_BANK24,
+ AMDGPU_GFX_LDS_BANK25,
+ AMDGPU_GFX_LDS_BANK26,
+ AMDGPU_GFX_LDS_BANK27,
+ AMDGPU_GFX_LDS_BANK28,
+ AMDGPU_GFX_LDS_BANK29,
+ AMDGPU_GFX_LDS_BANK30,
+ AMDGPU_GFX_LDS_BANK31,
+ AMDGPU_GFX_LDS_SP_BUFFER_A,
+ AMDGPU_GFX_LDS_SP_BUFFER_B,
+};
+
+enum amdgpu_gfx_rlc_ras_mem_id {
+ AMDGPU_GFX_RLC_GPMF32 = 1,
+ AMDGPU_GFX_RLC_RLCVF32,
+ AMDGPU_GFX_RLC_SCRATCH,
+ AMDGPU_GFX_RLC_SRM_ARAM,
+ AMDGPU_GFX_RLC_SRM_DRAM,
+ AMDGPU_GFX_RLC_TCTAG,
+ AMDGPU_GFX_RLC_SPM_SE,
+ AMDGPU_GFX_RLC_SPM_GRBMT,
+};
+
+enum amdgpu_gfx_sp_ras_mem_id {
+ AMDGPU_GFX_SP_SIMDID0 = 0,
+};
+
+enum amdgpu_gfx_spi_ras_mem_id {
+ AMDGPU_GFX_SPI_MEM0 = 0,
+ AMDGPU_GFX_SPI_MEM1,
+ AMDGPU_GFX_SPI_MEM2,
+ AMDGPU_GFX_SPI_MEM3,
+};
+
+enum amdgpu_gfx_sqc_ras_mem_id {
+ AMDGPU_GFX_SQC_INST_CACHE_A = 100,
+ AMDGPU_GFX_SQC_INST_CACHE_B = 101,
+ AMDGPU_GFX_SQC_INST_CACHE_TAG_A = 102,
+ AMDGPU_GFX_SQC_INST_CACHE_TAG_B = 103,
+ AMDGPU_GFX_SQC_INST_CACHE_MISS_FIFO_A = 104,
+ AMDGPU_GFX_SQC_INST_CACHE_MISS_FIFO_B = 105,
+ AMDGPU_GFX_SQC_INST_CACHE_GATCL1_MISS_FIFO_A = 106,
+ AMDGPU_GFX_SQC_INST_CACHE_GATCL1_MISS_FIFO_B = 107,
+ AMDGPU_GFX_SQC_DATA_CACHE_A = 200,
+ AMDGPU_GFX_SQC_DATA_CACHE_B = 201,
+ AMDGPU_GFX_SQC_DATA_CACHE_TAG_A = 202,
+ AMDGPU_GFX_SQC_DATA_CACHE_TAG_B = 203,
+ AMDGPU_GFX_SQC_DATA_CACHE_MISS_FIFO_A = 204,
+ AMDGPU_GFX_SQC_DATA_CACHE_MISS_FIFO_B = 205,
+ AMDGPU_GFX_SQC_DATA_CACHE_HIT_FIFO_A = 206,
+ AMDGPU_GFX_SQC_DATA_CACHE_HIT_FIFO_B = 207,
+ AMDGPU_GFX_SQC_DIRTY_BIT_A = 208,
+ AMDGPU_GFX_SQC_DIRTY_BIT_B = 209,
+ AMDGPU_GFX_SQC_WRITE_DATA_BUFFER_CU0 = 210,
+ AMDGPU_GFX_SQC_WRITE_DATA_BUFFER_CU1 = 211,
+ AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_DATA_CACHE_A = 212,
+ AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_DATA_CACHE_B = 213,
+ AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_INST_CACHE = 108,
+};
+
+enum amdgpu_gfx_sq_ras_mem_id {
+ AMDGPU_GFX_SQ_SGPR_MEM0 = 0,
+ AMDGPU_GFX_SQ_SGPR_MEM1,
+ AMDGPU_GFX_SQ_SGPR_MEM2,
+ AMDGPU_GFX_SQ_SGPR_MEM3,
+};
+
+enum amdgpu_gfx_ta_ras_mem_id {
+ AMDGPU_GFX_TA_FS_AFIFO_RAM_LO = 1,
+ AMDGPU_GFX_TA_FS_AFIFO_RAM_HI,
+ AMDGPU_GFX_TA_FS_CFIFO_RAM,
+ AMDGPU_GFX_TA_FSX_LFIFO,
+ AMDGPU_GFX_TA_FS_DFIFO_RAM,
+};
+
+enum amdgpu_gfx_tcc_ras_mem_id {
+ AMDGPU_GFX_TCC_MEM1 = 1,
+};
+
+enum amdgpu_gfx_tca_ras_mem_id {
+ AMDGPU_GFX_TCA_MEM1 = 1,
+};
+
+enum amdgpu_gfx_tci_ras_mem_id {
+ AMDGPU_GFX_TCIW_MEM = 1,
+};
+
+enum amdgpu_gfx_tcp_ras_mem_id {
+ AMDGPU_GFX_TCP_LFIFO0 = 1,
+ AMDGPU_GFX_TCP_SET0BANK0_RAM,
+ AMDGPU_GFX_TCP_SET0BANK1_RAM,
+ AMDGPU_GFX_TCP_SET0BANK2_RAM,
+ AMDGPU_GFX_TCP_SET0BANK3_RAM,
+ AMDGPU_GFX_TCP_SET1BANK0_RAM,
+ AMDGPU_GFX_TCP_SET1BANK1_RAM,
+ AMDGPU_GFX_TCP_SET1BANK2_RAM,
+ AMDGPU_GFX_TCP_SET1BANK3_RAM,
+ AMDGPU_GFX_TCP_SET2BANK0_RAM,
+ AMDGPU_GFX_TCP_SET2BANK1_RAM,
+ AMDGPU_GFX_TCP_SET2BANK2_RAM,
+ AMDGPU_GFX_TCP_SET2BANK3_RAM,
+ AMDGPU_GFX_TCP_SET3BANK0_RAM,
+ AMDGPU_GFX_TCP_SET3BANK1_RAM,
+ AMDGPU_GFX_TCP_SET3BANK2_RAM,
+ AMDGPU_GFX_TCP_SET3BANK3_RAM,
+ AMDGPU_GFX_TCP_VM_FIFO,
+ AMDGPU_GFX_TCP_DB_TAGRAM0,
+ AMDGPU_GFX_TCP_DB_TAGRAM1,
+ AMDGPU_GFX_TCP_DB_TAGRAM2,
+ AMDGPU_GFX_TCP_DB_TAGRAM3,
+ AMDGPU_GFX_TCP_UTCL1_LFIFO_PROBE0,
+ AMDGPU_GFX_TCP_UTCL1_LFIFO_PROBE1,
+ AMDGPU_GFX_TCP_CMD_FIFO,
+};
+
+enum amdgpu_gfx_td_ras_mem_id {
+ AMDGPU_GFX_TD_UTD_CS_FIFO_MEM = 1,
+ AMDGPU_GFX_TD_UTD_SS_FIFO_LO_MEM,
+ AMDGPU_GFX_TD_UTD_SS_FIFO_HI_MEM,
+};
+
+enum amdgpu_gfx_tcx_ras_mem_id {
+ AMDGPU_GFX_TCX_FIFOD0 = 0,
+ AMDGPU_GFX_TCX_FIFOD1,
+ AMDGPU_GFX_TCX_FIFOD2,
+ AMDGPU_GFX_TCX_FIFOD3,
+ AMDGPU_GFX_TCX_FIFOD4,
+ AMDGPU_GFX_TCX_FIFOD5,
+ AMDGPU_GFX_TCX_FIFOD6,
+ AMDGPU_GFX_TCX_FIFOD7,
+ AMDGPU_GFX_TCX_FIFOB0,
+ AMDGPU_GFX_TCX_FIFOB1,
+ AMDGPU_GFX_TCX_FIFOB2,
+ AMDGPU_GFX_TCX_FIFOB3,
+ AMDGPU_GFX_TCX_FIFOB4,
+ AMDGPU_GFX_TCX_FIFOB5,
+ AMDGPU_GFX_TCX_FIFOB6,
+ AMDGPU_GFX_TCX_FIFOB7,
+ AMDGPU_GFX_TCX_FIFOA0,
+ AMDGPU_GFX_TCX_FIFOA1,
+ AMDGPU_GFX_TCX_FIFOA2,
+ AMDGPU_GFX_TCX_FIFOA3,
+ AMDGPU_GFX_TCX_FIFOA4,
+ AMDGPU_GFX_TCX_FIFOA5,
+ AMDGPU_GFX_TCX_FIFOA6,
+ AMDGPU_GFX_TCX_FIFOA7,
+ AMDGPU_GFX_TCX_CFIFO0,
+ AMDGPU_GFX_TCX_CFIFO1,
+ AMDGPU_GFX_TCX_CFIFO2,
+ AMDGPU_GFX_TCX_CFIFO3,
+ AMDGPU_GFX_TCX_CFIFO4,
+ AMDGPU_GFX_TCX_CFIFO5,
+ AMDGPU_GFX_TCX_CFIFO6,
+ AMDGPU_GFX_TCX_CFIFO7,
+ AMDGPU_GFX_TCX_FIFO_ACKB0,
+ AMDGPU_GFX_TCX_FIFO_ACKB1,
+ AMDGPU_GFX_TCX_FIFO_ACKB2,
+ AMDGPU_GFX_TCX_FIFO_ACKB3,
+ AMDGPU_GFX_TCX_FIFO_ACKB4,
+ AMDGPU_GFX_TCX_FIFO_ACKB5,
+ AMDGPU_GFX_TCX_FIFO_ACKB6,
+ AMDGPU_GFX_TCX_FIFO_ACKB7,
+ AMDGPU_GFX_TCX_FIFO_ACKD0,
+ AMDGPU_GFX_TCX_FIFO_ACKD1,
+ AMDGPU_GFX_TCX_FIFO_ACKD2,
+ AMDGPU_GFX_TCX_FIFO_ACKD3,
+ AMDGPU_GFX_TCX_FIFO_ACKD4,
+ AMDGPU_GFX_TCX_FIFO_ACKD5,
+ AMDGPU_GFX_TCX_FIFO_ACKD6,
+ AMDGPU_GFX_TCX_FIFO_ACKD7,
+ AMDGPU_GFX_TCX_DST_FIFOA0,
+ AMDGPU_GFX_TCX_DST_FIFOA1,
+ AMDGPU_GFX_TCX_DST_FIFOA2,
+ AMDGPU_GFX_TCX_DST_FIFOA3,
+ AMDGPU_GFX_TCX_DST_FIFOA4,
+ AMDGPU_GFX_TCX_DST_FIFOA5,
+ AMDGPU_GFX_TCX_DST_FIFOA6,
+ AMDGPU_GFX_TCX_DST_FIFOA7,
+ AMDGPU_GFX_TCX_DST_FIFOB0,
+ AMDGPU_GFX_TCX_DST_FIFOB1,
+ AMDGPU_GFX_TCX_DST_FIFOB2,
+ AMDGPU_GFX_TCX_DST_FIFOB3,
+ AMDGPU_GFX_TCX_DST_FIFOB4,
+ AMDGPU_GFX_TCX_DST_FIFOB5,
+ AMDGPU_GFX_TCX_DST_FIFOB6,
+ AMDGPU_GFX_TCX_DST_FIFOB7,
+ AMDGPU_GFX_TCX_DST_FIFOD0,
+ AMDGPU_GFX_TCX_DST_FIFOD1,
+ AMDGPU_GFX_TCX_DST_FIFOD2,
+ AMDGPU_GFX_TCX_DST_FIFOD3,
+ AMDGPU_GFX_TCX_DST_FIFOD4,
+ AMDGPU_GFX_TCX_DST_FIFOD5,
+ AMDGPU_GFX_TCX_DST_FIFOD6,
+ AMDGPU_GFX_TCX_DST_FIFOD7,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB0,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB1,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB2,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB3,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB4,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB5,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB6,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB7,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD0,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD1,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD2,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD3,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD4,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD5,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD6,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD7,
+};
+
+enum amdgpu_gfx_atc_l2_ras_mem_id {
+ AMDGPU_GFX_ATC_L2_MEM0 = 0,
+};
+
+enum amdgpu_gfx_utcl2_ras_mem_id {
+ AMDGPU_GFX_UTCL2_MEM0 = 0,
+};
+
+enum amdgpu_gfx_vml2_ras_mem_id {
+ AMDGPU_GFX_VML2_MEM0 = 0,
+};
+
+enum amdgpu_gfx_vml2_walker_ras_mem_id {
+ AMDGPU_GFX_VML2_WALKER_MEM0 = 0,
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_cp_mem_list[] = {
+ {AMDGPU_GFX_CP_MEM1, "CP_MEM1"},
+ {AMDGPU_GFX_CP_MEM2, "CP_MEM2"},
+ {AMDGPU_GFX_CP_MEM3, "CP_MEM3"},
+ {AMDGPU_GFX_CP_MEM4, "CP_MEM4"},
+ {AMDGPU_GFX_CP_MEM5, "CP_MEM5"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_gcea_mem_list[] = {
+ {AMDGPU_GFX_GCEA_IOWR_CMDMEM, "GCEA_IOWR_CMDMEM"},
+ {AMDGPU_GFX_GCEA_IORD_CMDMEM, "GCEA_IORD_CMDMEM"},
+ {AMDGPU_GFX_GCEA_GMIWR_CMDMEM, "GCEA_GMIWR_CMDMEM"},
+ {AMDGPU_GFX_GCEA_GMIRD_CMDMEM, "GCEA_GMIRD_CMDMEM"},
+ {AMDGPU_GFX_GCEA_DRAMWR_CMDMEM, "GCEA_DRAMWR_CMDMEM"},
+ {AMDGPU_GFX_GCEA_DRAMRD_CMDMEM, "GCEA_DRAMRD_CMDMEM"},
+ {AMDGPU_GFX_GCEA_MAM_DMEM0, "GCEA_MAM_DMEM0"},
+ {AMDGPU_GFX_GCEA_MAM_DMEM1, "GCEA_MAM_DMEM1"},
+ {AMDGPU_GFX_GCEA_MAM_DMEM2, "GCEA_MAM_DMEM2"},
+ {AMDGPU_GFX_GCEA_MAM_DMEM3, "GCEA_MAM_DMEM3"},
+ {AMDGPU_GFX_GCEA_MAM_AMEM0, "GCEA_MAM_AMEM0"},
+ {AMDGPU_GFX_GCEA_MAM_AMEM1, "GCEA_MAM_AMEM1"},
+ {AMDGPU_GFX_GCEA_MAM_AMEM2, "GCEA_MAM_AMEM2"},
+ {AMDGPU_GFX_GCEA_MAM_AMEM3, "GCEA_MAM_AMEM3"},
+ {AMDGPU_GFX_GCEA_MAM_AFLUSH_BUFFER, "GCEA_MAM_AFLUSH_BUFFER"},
+ {AMDGPU_GFX_GCEA_WRET_TAGMEM, "GCEA_WRET_TAGMEM"},
+ {AMDGPU_GFX_GCEA_RRET_TAGMEM, "GCEA_RRET_TAGMEM"},
+ {AMDGPU_GFX_GCEA_IOWR_DATAMEM, "GCEA_IOWR_DATAMEM"},
+ {AMDGPU_GFX_GCEA_GMIWR_DATAMEM, "GCEA_GMIWR_DATAMEM"},
+ {AMDGPU_GFX_GCEA_DRAM_DATAMEM, "GCEA_DRAM_DATAMEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_gc_cane_mem_list[] = {
+ {AMDGPU_GFX_GC_CANE_MEM0, "GC_CANE_MEM0"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_gcutcl2_mem_list[] = {
+ {AMDGPU_GFX_GCUTCL2_MEM2P512X95, "GCUTCL2_MEM2P512X95"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_gds_mem_list[] = {
+ {AMDGPU_GFX_GDS_MEM0, "GDS_MEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_lds_mem_list[] = {
+ {AMDGPU_GFX_LDS_BANK0, "LDS_BANK0"},
+ {AMDGPU_GFX_LDS_BANK1, "LDS_BANK1"},
+ {AMDGPU_GFX_LDS_BANK2, "LDS_BANK2"},
+ {AMDGPU_GFX_LDS_BANK3, "LDS_BANK3"},
+ {AMDGPU_GFX_LDS_BANK4, "LDS_BANK4"},
+ {AMDGPU_GFX_LDS_BANK5, "LDS_BANK5"},
+ {AMDGPU_GFX_LDS_BANK6, "LDS_BANK6"},
+ {AMDGPU_GFX_LDS_BANK7, "LDS_BANK7"},
+ {AMDGPU_GFX_LDS_BANK8, "LDS_BANK8"},
+ {AMDGPU_GFX_LDS_BANK9, "LDS_BANK9"},
+ {AMDGPU_GFX_LDS_BANK10, "LDS_BANK10"},
+ {AMDGPU_GFX_LDS_BANK11, "LDS_BANK11"},
+ {AMDGPU_GFX_LDS_BANK12, "LDS_BANK12"},
+ {AMDGPU_GFX_LDS_BANK13, "LDS_BANK13"},
+ {AMDGPU_GFX_LDS_BANK14, "LDS_BANK14"},
+ {AMDGPU_GFX_LDS_BANK15, "LDS_BANK15"},
+ {AMDGPU_GFX_LDS_BANK16, "LDS_BANK16"},
+ {AMDGPU_GFX_LDS_BANK17, "LDS_BANK17"},
+ {AMDGPU_GFX_LDS_BANK18, "LDS_BANK18"},
+ {AMDGPU_GFX_LDS_BANK19, "LDS_BANK19"},
+ {AMDGPU_GFX_LDS_BANK20, "LDS_BANK20"},
+ {AMDGPU_GFX_LDS_BANK21, "LDS_BANK21"},
+ {AMDGPU_GFX_LDS_BANK22, "LDS_BANK22"},
+ {AMDGPU_GFX_LDS_BANK23, "LDS_BANK23"},
+ {AMDGPU_GFX_LDS_BANK24, "LDS_BANK24"},
+ {AMDGPU_GFX_LDS_BANK25, "LDS_BANK25"},
+ {AMDGPU_GFX_LDS_BANK26, "LDS_BANK26"},
+ {AMDGPU_GFX_LDS_BANK27, "LDS_BANK27"},
+ {AMDGPU_GFX_LDS_BANK28, "LDS_BANK28"},
+ {AMDGPU_GFX_LDS_BANK29, "LDS_BANK29"},
+ {AMDGPU_GFX_LDS_BANK30, "LDS_BANK30"},
+ {AMDGPU_GFX_LDS_BANK31, "LDS_BANK31"},
+ {AMDGPU_GFX_LDS_SP_BUFFER_A, "LDS_SP_BUFFER_A"},
+ {AMDGPU_GFX_LDS_SP_BUFFER_B, "LDS_SP_BUFFER_B"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_rlc_mem_list[] = {
+ {AMDGPU_GFX_RLC_GPMF32, "RLC_GPMF32"},
+ {AMDGPU_GFX_RLC_RLCVF32, "RLC_RLCVF32"},
+ {AMDGPU_GFX_RLC_SCRATCH, "RLC_SCRATCH"},
+ {AMDGPU_GFX_RLC_SRM_ARAM, "RLC_SRM_ARAM"},
+ {AMDGPU_GFX_RLC_SRM_DRAM, "RLC_SRM_DRAM"},
+ {AMDGPU_GFX_RLC_TCTAG, "RLC_TCTAG"},
+ {AMDGPU_GFX_RLC_SPM_SE, "RLC_SPM_SE"},
+ {AMDGPU_GFX_RLC_SPM_GRBMT, "RLC_SPM_GRBMT"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_sp_mem_list[] = {
+ {AMDGPU_GFX_SP_SIMDID0, "SP_SIMDID0"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_spi_mem_list[] = {
+ {AMDGPU_GFX_SPI_MEM0, "SPI_MEM0"},
+ {AMDGPU_GFX_SPI_MEM1, "SPI_MEM1"},
+ {AMDGPU_GFX_SPI_MEM2, "SPI_MEM2"},
+ {AMDGPU_GFX_SPI_MEM3, "SPI_MEM3"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_sqc_mem_list[] = {
+ {AMDGPU_GFX_SQC_INST_CACHE_A, "SQC_INST_CACHE_A"},
+ {AMDGPU_GFX_SQC_INST_CACHE_B, "SQC_INST_CACHE_B"},
+ {AMDGPU_GFX_SQC_INST_CACHE_TAG_A, "SQC_INST_CACHE_TAG_A"},
+ {AMDGPU_GFX_SQC_INST_CACHE_TAG_B, "SQC_INST_CACHE_TAG_B"},
+ {AMDGPU_GFX_SQC_INST_CACHE_MISS_FIFO_A, "SQC_INST_CACHE_MISS_FIFO_A"},
+ {AMDGPU_GFX_SQC_INST_CACHE_MISS_FIFO_B, "SQC_INST_CACHE_MISS_FIFO_B"},
+ {AMDGPU_GFX_SQC_INST_CACHE_GATCL1_MISS_FIFO_A, "SQC_INST_CACHE_GATCL1_MISS_FIFO_A"},
+ {AMDGPU_GFX_SQC_INST_CACHE_GATCL1_MISS_FIFO_B, "SQC_INST_CACHE_GATCL1_MISS_FIFO_B"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_A, "SQC_DATA_CACHE_A"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_B, "SQC_DATA_CACHE_B"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_TAG_A, "SQC_DATA_CACHE_TAG_A"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_TAG_B, "SQC_DATA_CACHE_TAG_B"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_MISS_FIFO_A, "SQC_DATA_CACHE_MISS_FIFO_A"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_MISS_FIFO_B, "SQC_DATA_CACHE_MISS_FIFO_B"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_HIT_FIFO_A, "SQC_DATA_CACHE_HIT_FIFO_A"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_HIT_FIFO_B, "SQC_DATA_CACHE_HIT_FIFO_B"},
+ {AMDGPU_GFX_SQC_DIRTY_BIT_A, "SQC_DIRTY_BIT_A"},
+ {AMDGPU_GFX_SQC_DIRTY_BIT_B, "SQC_DIRTY_BIT_B"},
+ {AMDGPU_GFX_SQC_WRITE_DATA_BUFFER_CU0, "SQC_WRITE_DATA_BUFFER_CU0"},
+ {AMDGPU_GFX_SQC_WRITE_DATA_BUFFER_CU1, "SQC_WRITE_DATA_BUFFER_CU1"},
+ {AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_DATA_CACHE_A, "SQC_UTCL1_MISS_LFIFO_DATA_CACHE_A"},
+ {AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_DATA_CACHE_B, "SQC_UTCL1_MISS_LFIFO_DATA_CACHE_B"},
+ {AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_INST_CACHE, "SQC_UTCL1_MISS_LFIFO_INST_CACHE"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_sq_mem_list[] = {
+ {AMDGPU_GFX_SQ_SGPR_MEM0, "SQ_SGPR_MEM0"},
+ {AMDGPU_GFX_SQ_SGPR_MEM1, "SQ_SGPR_MEM1"},
+ {AMDGPU_GFX_SQ_SGPR_MEM2, "SQ_SGPR_MEM2"},
+ {AMDGPU_GFX_SQ_SGPR_MEM3, "SQ_SGPR_MEM3"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_ta_mem_list[] = {
+ {AMDGPU_GFX_TA_FS_AFIFO_RAM_LO, "TA_FS_AFIFO_RAM_LO"},
+ {AMDGPU_GFX_TA_FS_AFIFO_RAM_HI, "TA_FS_AFIFO_RAM_HI"},
+ {AMDGPU_GFX_TA_FS_CFIFO_RAM, "TA_FS_CFIFO_RAM"},
+ {AMDGPU_GFX_TA_FSX_LFIFO, "TA_FSX_LFIFO"},
+ {AMDGPU_GFX_TA_FS_DFIFO_RAM, "TA_FS_DFIFO_RAM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tcc_mem_list[] = {
+ {AMDGPU_GFX_TCC_MEM1, "TCC_MEM1"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tca_mem_list[] = {
+ {AMDGPU_GFX_TCA_MEM1, "TCA_MEM1"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tci_mem_list[] = {
+ {AMDGPU_GFX_TCIW_MEM, "TCIW_MEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tcp_mem_list[] = {
+ {AMDGPU_GFX_TCP_LFIFO0, "TCP_LFIFO0"},
+ {AMDGPU_GFX_TCP_SET0BANK0_RAM, "TCP_SET0BANK0_RAM"},
+ {AMDGPU_GFX_TCP_SET0BANK1_RAM, "TCP_SET0BANK1_RAM"},
+ {AMDGPU_GFX_TCP_SET0BANK2_RAM, "TCP_SET0BANK2_RAM"},
+ {AMDGPU_GFX_TCP_SET0BANK3_RAM, "TCP_SET0BANK3_RAM"},
+ {AMDGPU_GFX_TCP_SET1BANK0_RAM, "TCP_SET1BANK0_RAM"},
+ {AMDGPU_GFX_TCP_SET1BANK1_RAM, "TCP_SET1BANK1_RAM"},
+ {AMDGPU_GFX_TCP_SET1BANK2_RAM, "TCP_SET1BANK2_RAM"},
+ {AMDGPU_GFX_TCP_SET1BANK3_RAM, "TCP_SET1BANK3_RAM"},
+ {AMDGPU_GFX_TCP_SET2BANK0_RAM, "TCP_SET2BANK0_RAM"},
+ {AMDGPU_GFX_TCP_SET2BANK1_RAM, "TCP_SET2BANK1_RAM"},
+ {AMDGPU_GFX_TCP_SET2BANK2_RAM, "TCP_SET2BANK2_RAM"},
+ {AMDGPU_GFX_TCP_SET2BANK3_RAM, "TCP_SET2BANK3_RAM"},
+ {AMDGPU_GFX_TCP_SET3BANK0_RAM, "TCP_SET3BANK0_RAM"},
+ {AMDGPU_GFX_TCP_SET3BANK1_RAM, "TCP_SET3BANK1_RAM"},
+ {AMDGPU_GFX_TCP_SET3BANK2_RAM, "TCP_SET3BANK2_RAM"},
+ {AMDGPU_GFX_TCP_SET3BANK3_RAM, "TCP_SET3BANK3_RAM"},
+ {AMDGPU_GFX_TCP_VM_FIFO, "TCP_VM_FIFO"},
+ {AMDGPU_GFX_TCP_DB_TAGRAM0, "TCP_DB_TAGRAM0"},
+ {AMDGPU_GFX_TCP_DB_TAGRAM1, "TCP_DB_TAGRAM1"},
+ {AMDGPU_GFX_TCP_DB_TAGRAM2, "TCP_DB_TAGRAM2"},
+ {AMDGPU_GFX_TCP_DB_TAGRAM3, "TCP_DB_TAGRAM3"},
+ {AMDGPU_GFX_TCP_UTCL1_LFIFO_PROBE0, "TCP_UTCL1_LFIFO_PROBE0"},
+ {AMDGPU_GFX_TCP_UTCL1_LFIFO_PROBE1, "TCP_UTCL1_LFIFO_PROBE1"},
+ {AMDGPU_GFX_TCP_CMD_FIFO, "TCP_CMD_FIFO"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_td_mem_list[] = {
+ {AMDGPU_GFX_TD_UTD_CS_FIFO_MEM, "TD_UTD_CS_FIFO_MEM"},
+ {AMDGPU_GFX_TD_UTD_SS_FIFO_LO_MEM, "TD_UTD_SS_FIFO_LO_MEM"},
+ {AMDGPU_GFX_TD_UTD_SS_FIFO_HI_MEM, "TD_UTD_SS_FIFO_HI_MEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tcx_mem_list[] = {
+ {AMDGPU_GFX_TCX_FIFOD0, "TCX_FIFOD0"},
+ {AMDGPU_GFX_TCX_FIFOD1, "TCX_FIFOD1"},
+ {AMDGPU_GFX_TCX_FIFOD2, "TCX_FIFOD2"},
+ {AMDGPU_GFX_TCX_FIFOD3, "TCX_FIFOD3"},
+ {AMDGPU_GFX_TCX_FIFOD4, "TCX_FIFOD4"},
+ {AMDGPU_GFX_TCX_FIFOD5, "TCX_FIFOD5"},
+ {AMDGPU_GFX_TCX_FIFOD6, "TCX_FIFOD6"},
+ {AMDGPU_GFX_TCX_FIFOD7, "TCX_FIFOD7"},
+ {AMDGPU_GFX_TCX_FIFOB0, "TCX_FIFOB0"},
+ {AMDGPU_GFX_TCX_FIFOB1, "TCX_FIFOB1"},
+ {AMDGPU_GFX_TCX_FIFOB2, "TCX_FIFOB2"},
+ {AMDGPU_GFX_TCX_FIFOB3, "TCX_FIFOB3"},
+ {AMDGPU_GFX_TCX_FIFOB4, "TCX_FIFOB4"},
+ {AMDGPU_GFX_TCX_FIFOB5, "TCX_FIFOB5"},
+ {AMDGPU_GFX_TCX_FIFOB6, "TCX_FIFOB6"},
+ {AMDGPU_GFX_TCX_FIFOB7, "TCX_FIFOB7"},
+ {AMDGPU_GFX_TCX_FIFOA0, "TCX_FIFOA0"},
+ {AMDGPU_GFX_TCX_FIFOA1, "TCX_FIFOA1"},
+ {AMDGPU_GFX_TCX_FIFOA2, "TCX_FIFOA2"},
+ {AMDGPU_GFX_TCX_FIFOA3, "TCX_FIFOA3"},
+ {AMDGPU_GFX_TCX_FIFOA4, "TCX_FIFOA4"},
+ {AMDGPU_GFX_TCX_FIFOA5, "TCX_FIFOA5"},
+ {AMDGPU_GFX_TCX_FIFOA6, "TCX_FIFOA6"},
+ {AMDGPU_GFX_TCX_FIFOA7, "TCX_FIFOA7"},
+ {AMDGPU_GFX_TCX_CFIFO0, "TCX_CFIFO0"},
+ {AMDGPU_GFX_TCX_CFIFO1, "TCX_CFIFO1"},
+ {AMDGPU_GFX_TCX_CFIFO2, "TCX_CFIFO2"},
+ {AMDGPU_GFX_TCX_CFIFO3, "TCX_CFIFO3"},
+ {AMDGPU_GFX_TCX_CFIFO4, "TCX_CFIFO4"},
+ {AMDGPU_GFX_TCX_CFIFO5, "TCX_CFIFO5"},
+ {AMDGPU_GFX_TCX_CFIFO6, "TCX_CFIFO6"},
+ {AMDGPU_GFX_TCX_CFIFO7, "TCX_CFIFO7"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB0, "TCX_FIFO_ACKB0"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB1, "TCX_FIFO_ACKB1"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB2, "TCX_FIFO_ACKB2"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB3, "TCX_FIFO_ACKB3"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB4, "TCX_FIFO_ACKB4"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB5, "TCX_FIFO_ACKB5"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB6, "TCX_FIFO_ACKB6"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB7, "TCX_FIFO_ACKB7"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD0, "TCX_FIFO_ACKD0"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD1, "TCX_FIFO_ACKD1"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD2, "TCX_FIFO_ACKD2"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD3, "TCX_FIFO_ACKD3"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD4, "TCX_FIFO_ACKD4"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD5, "TCX_FIFO_ACKD5"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD6, "TCX_FIFO_ACKD6"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD7, "TCX_FIFO_ACKD7"},
+ {AMDGPU_GFX_TCX_DST_FIFOA0, "TCX_DST_FIFOA0"},
+ {AMDGPU_GFX_TCX_DST_FIFOA1, "TCX_DST_FIFOA1"},
+ {AMDGPU_GFX_TCX_DST_FIFOA2, "TCX_DST_FIFOA2"},
+ {AMDGPU_GFX_TCX_DST_FIFOA3, "TCX_DST_FIFOA3"},
+ {AMDGPU_GFX_TCX_DST_FIFOA4, "TCX_DST_FIFOA4"},
+ {AMDGPU_GFX_TCX_DST_FIFOA5, "TCX_DST_FIFOA5"},
+ {AMDGPU_GFX_TCX_DST_FIFOA6, "TCX_DST_FIFOA6"},
+ {AMDGPU_GFX_TCX_DST_FIFOA7, "TCX_DST_FIFOA7"},
+ {AMDGPU_GFX_TCX_DST_FIFOB0, "TCX_DST_FIFOB0"},
+ {AMDGPU_GFX_TCX_DST_FIFOB1, "TCX_DST_FIFOB1"},
+ {AMDGPU_GFX_TCX_DST_FIFOB2, "TCX_DST_FIFOB2"},
+ {AMDGPU_GFX_TCX_DST_FIFOB3, "TCX_DST_FIFOB3"},
+ {AMDGPU_GFX_TCX_DST_FIFOB4, "TCX_DST_FIFOB4"},
+ {AMDGPU_GFX_TCX_DST_FIFOB5, "TCX_DST_FIFOB5"},
+ {AMDGPU_GFX_TCX_DST_FIFOB6, "TCX_DST_FIFOB6"},
+ {AMDGPU_GFX_TCX_DST_FIFOB7, "TCX_DST_FIFOB7"},
+ {AMDGPU_GFX_TCX_DST_FIFOD0, "TCX_DST_FIFOD0"},
+ {AMDGPU_GFX_TCX_DST_FIFOD1, "TCX_DST_FIFOD1"},
+ {AMDGPU_GFX_TCX_DST_FIFOD2, "TCX_DST_FIFOD2"},
+ {AMDGPU_GFX_TCX_DST_FIFOD3, "TCX_DST_FIFOD3"},
+ {AMDGPU_GFX_TCX_DST_FIFOD4, "TCX_DST_FIFOD4"},
+ {AMDGPU_GFX_TCX_DST_FIFOD5, "TCX_DST_FIFOD5"},
+ {AMDGPU_GFX_TCX_DST_FIFOD6, "TCX_DST_FIFOD6"},
+ {AMDGPU_GFX_TCX_DST_FIFOD7, "TCX_DST_FIFOD7"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB0, "TCX_DST_FIFO_ACKB0"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB1, "TCX_DST_FIFO_ACKB1"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB2, "TCX_DST_FIFO_ACKB2"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB3, "TCX_DST_FIFO_ACKB3"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB4, "TCX_DST_FIFO_ACKB4"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB5, "TCX_DST_FIFO_ACKB5"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB6, "TCX_DST_FIFO_ACKB6"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB7, "TCX_DST_FIFO_ACKB7"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD0, "TCX_DST_FIFO_ACKD0"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD1, "TCX_DST_FIFO_ACKD1"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD2, "TCX_DST_FIFO_ACKD2"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD3, "TCX_DST_FIFO_ACKD3"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD4, "TCX_DST_FIFO_ACKD4"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD5, "TCX_DST_FIFO_ACKD5"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD6, "TCX_DST_FIFO_ACKD6"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD7, "TCX_DST_FIFO_ACKD7"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_atc_l2_mem_list[] = {
+ {AMDGPU_GFX_ATC_L2_MEM, "ATC_L2_MEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_utcl2_mem_list[] = {
+ {AMDGPU_GFX_UTCL2_MEM, "UTCL2_MEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_vml2_mem_list[] = {
+ {AMDGPU_GFX_VML2_MEM, "VML2_MEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_vml2_walker_mem_list[] = {
+ {AMDGPU_GFX_VML2_WALKER_MEM, "VML2_WALKER_MEM"},
+};
+
+static const struct amdgpu_gfx_ras_mem_id_entry gfx_v9_4_3_ras_mem_list_array[AMDGPU_GFX_MEM_TYPE_NUM] = {
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_cp_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_gcea_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_gc_cane_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_gcutcl2_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_gds_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_lds_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_rlc_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_sp_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_spi_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_sqc_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_sq_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_ta_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tcc_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tca_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tci_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tcp_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_td_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tcx_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_atc_l2_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_utcl2_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_vml2_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_vml2_walker_mem_list)
+};
+
+static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ce_reg_list[] = {
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regRLC_CE_ERR_STATUS_LOW, regRLC_CE_ERR_STATUS_HIGH),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "RLC"},
+ AMDGPU_GFX_RLC_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPC_CE_ERR_STATUS_LO, regCPC_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPC"},
+ AMDGPU_GFX_CP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPF_CE_ERR_STATUS_LO, regCPF_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPF"},
+ AMDGPU_GFX_CP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPG_CE_ERR_STATUS_LO, regCPG_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPG"},
+ AMDGPU_GFX_CP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGDS_CE_ERR_STATUS_LO, regGDS_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "GDS"},
+ AMDGPU_GFX_GDS_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGC_CANE_CE_ERR_STATUS_LO, regGC_CANE_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CANE"},
+ AMDGPU_GFX_GC_CANE_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSPI_CE_ERR_STATUS_LO, regSPI_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SPI"},
+ AMDGPU_GFX_SPI_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP0_CE_ERR_STATUS_LO, regSP0_CE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP0"},
+ AMDGPU_GFX_SP_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP1_CE_ERR_STATUS_LO, regSP1_CE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP1"},
+ AMDGPU_GFX_SP_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQ_CE_ERR_STATUS_LO, regSQ_CE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQ"},
+ AMDGPU_GFX_SQ_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQC_CE_EDC_LO, regSQC_CE_EDC_HI),
+ 5, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQC"},
+ AMDGPU_GFX_SQC_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCX_CE_ERR_STATUS_LO, regTCX_CE_ERR_STATUS_HI),
+ 2, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCX"},
+ AMDGPU_GFX_TCX_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCC_CE_ERR_STATUS_LO, regTCC_CE_ERR_STATUS_HI),
+ 16, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCC"},
+ AMDGPU_GFX_TCC_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTA_CE_EDC_LO, regTA_CE_EDC_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TA"},
+ AMDGPU_GFX_TA_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCI_CE_EDC_LO_REG, regTCI_CE_EDC_HI_REG),
+ 27, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCI"},
+ AMDGPU_GFX_TCI_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCP_CE_EDC_LO_REG, regTCP_CE_EDC_HI_REG),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCP"},
+ AMDGPU_GFX_TCP_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTD_CE_EDC_LO, regTD_CE_EDC_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TD"},
+ AMDGPU_GFX_TD_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGCEA_CE_ERR_STATUS_LO, regGCEA_CE_ERR_STATUS_HI),
+ 16, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "GCEA"},
+ AMDGPU_GFX_GCEA_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regLDS_CE_ERR_STATUS_LO, regLDS_CE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "LDS"},
+ AMDGPU_GFX_LDS_MEM, 4},
+};
+
+static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ue_reg_list[] = {
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regRLC_UE_ERR_STATUS_LOW, regRLC_UE_ERR_STATUS_HIGH),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "RLC"},
+ AMDGPU_GFX_RLC_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPC_UE_ERR_STATUS_LO, regCPC_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPC"},
+ AMDGPU_GFX_CP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPF_UE_ERR_STATUS_LO, regCPF_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPF"},
+ AMDGPU_GFX_CP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPG_UE_ERR_STATUS_LO, regCPG_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPG"},
+ AMDGPU_GFX_CP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGDS_UE_ERR_STATUS_LO, regGDS_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "GDS"},
+ AMDGPU_GFX_GDS_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGC_CANE_UE_ERR_STATUS_LO, regGC_CANE_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CANE"},
+ AMDGPU_GFX_GC_CANE_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSPI_UE_ERR_STATUS_LO, regSPI_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SPI"},
+ AMDGPU_GFX_SPI_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP0_UE_ERR_STATUS_LO, regSP0_UE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP0"},
+ AMDGPU_GFX_SP_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP1_UE_ERR_STATUS_LO, regSP1_UE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP1"},
+ AMDGPU_GFX_SP_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQ_UE_ERR_STATUS_LO, regSQ_UE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQ"},
+ AMDGPU_GFX_SQ_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQC_UE_EDC_LO, regSQC_UE_EDC_HI),
+ 5, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQC"},
+ AMDGPU_GFX_SQC_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCX_UE_ERR_STATUS_LO, regTCX_UE_ERR_STATUS_HI),
+ 2, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCX"},
+ AMDGPU_GFX_TCX_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCC_UE_ERR_STATUS_LO, regTCC_UE_ERR_STATUS_HI),
+ 16, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCC"},
+ AMDGPU_GFX_TCC_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTA_UE_EDC_LO, regTA_UE_EDC_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TA"},
+ AMDGPU_GFX_TA_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCI_UE_EDC_LO_REG, regTCI_UE_EDC_HI_REG),
+ 27, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCI"},
+ AMDGPU_GFX_TCI_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCP_UE_EDC_LO_REG, regTCP_UE_EDC_HI_REG),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCP"},
+ AMDGPU_GFX_TCP_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTD_UE_EDC_LO, regTD_UE_EDC_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TD"},
+ AMDGPU_GFX_TD_MEM, 4},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCA_UE_ERR_STATUS_LO, regTCA_UE_ERR_STATUS_HI),
+ 2, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCA"},
+ AMDGPU_GFX_TCA_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGCEA_UE_ERR_STATUS_LO, regGCEA_UE_ERR_STATUS_HI),
+ 16, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "GCEA"},
+ AMDGPU_GFX_GCEA_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regLDS_UE_ERR_STATUS_LO, regLDS_UE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "LDS"},
+ AMDGPU_GFX_LDS_MEM, 4},
+};
+
+static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev,
+ void *ras_error_status, int xcc_id)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ unsigned long ce_count = 0, ue_count = 0;
+ uint32_t i, j, k;
+
+ /* NOTE: convert xcc_id to physical XCD ID (XCD0 or XCD1) */
+ struct amdgpu_smuio_mcm_config_info mcm_info = {
+ .socket_id = adev->smuio.funcs->get_socket_id(adev),
+ .die_id = xcc_id & 0x01 ? 1 : 0,
+ };
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_4_3_ce_reg_list); i++) {
+ for (j = 0; j < gfx_v9_4_3_ce_reg_list[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst; k++) {
+ /* no need to select if instance number is 1 */
+ if (gfx_v9_4_3_ce_reg_list[i].se_num > 1 ||
+ gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst > 1)
+ gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id);
+
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ &(gfx_v9_4_3_ce_reg_list[i].reg_entry),
+ 1,
+ gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ce_reg_list[i].mem_id_type].mem_id_ent,
+ gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ce_reg_list[i].mem_id_type].size,
+ GET_INST(GC, xcc_id),
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE,
+ &ce_count);
+
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ &(gfx_v9_4_3_ue_reg_list[i].reg_entry),
+ 1,
+ gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].mem_id_ent,
+ gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].size,
+ GET_INST(GC, xcc_id),
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ &ue_count);
+ }
+ }
+ }
+
+ /* handle extra register entries of UE */
+ for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) {
+ for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) {
+ /* no need to select if instance number is 1 */
+ if (gfx_v9_4_3_ue_reg_list[i].se_num > 1 ||
+ gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1)
+ gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id);
+
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ &(gfx_v9_4_3_ue_reg_list[i].reg_entry),
+ 1,
+ gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].mem_id_ent,
+ gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].size,
+ GET_INST(GC, xcc_id),
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ &ue_count);
+ }
+ }
+ }
+
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ /* the caller should make sure initialize value of
+ * err_data->ue_count and err_data->ce_count
+ */
+ amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count);
+ amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count);
+}
+
+static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev,
+ void *ras_error_status, int xcc_id)
+{
+ uint32_t i, j, k;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_4_3_ce_reg_list); i++) {
+ for (j = 0; j < gfx_v9_4_3_ce_reg_list[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst; k++) {
+ /* no need to select if instance number is 1 */
+ if (gfx_v9_4_3_ce_reg_list[i].se_num > 1 ||
+ gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst > 1)
+ gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id);
+
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ &(gfx_v9_4_3_ce_reg_list[i].reg_entry),
+ 1,
+ GET_INST(GC, xcc_id));
+
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ &(gfx_v9_4_3_ue_reg_list[i].reg_entry),
+ 1,
+ GET_INST(GC, xcc_id));
+ }
+ }
+ }
+
+ /* handle extra register entries of UE */
+ for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) {
+ for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) {
+ /* no need to select if instance number is 1 */
+ if (gfx_v9_4_3_ue_reg_list[i].se_num > 1 ||
+ gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1)
+ gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id);
+
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ &(gfx_v9_4_3_ue_reg_list[i].reg_entry),
+ 1,
+ GET_INST(GC, xcc_id));
+ }
+ }
+ }
+
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v9_4_3_inst_enable_watchdog_timer(struct amdgpu_device *adev,
+ void *ras_error_status, int xcc_id)
+{
+ uint32_t i;
+ uint32_t data;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ data = RREG32_SOC15(GC, GET_INST(GC, 0), regSQ_TIMEOUT_CONFIG);
+ data = REG_SET_FIELD(data, SQ_TIMEOUT_CONFIG, TIMEOUT_FATAL_DISABLE,
+ amdgpu_watchdog_timer.timeout_fatal_disable ? 1 : 0);
+
+ if (amdgpu_watchdog_timer.timeout_fatal_disable &&
+ (amdgpu_watchdog_timer.period < 1 ||
+ amdgpu_watchdog_timer.period > 0x23)) {
+ dev_warn(adev->dev, "Watchdog period range is 1 to 0x23\n");
+ amdgpu_watchdog_timer.period = 0x23;
+ }
+ data = REG_SET_FIELD(data, SQ_TIMEOUT_CONFIG, PERIOD_SEL,
+ amdgpu_watchdog_timer.period);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ gfx_v9_4_3_xcc_select_se_sh(adev, i, 0xffffffff, 0xffffffff, xcc_id);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_TIMEOUT_CONFIG, data);
+ }
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v9_4_3_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_gfx_ras_error_func(adev, ras_error_status,
+ gfx_v9_4_3_inst_query_ras_err_count);
+}
+
+static void gfx_v9_4_3_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_count);
+}
+
+static void gfx_v9_4_3_enable_watchdog_timer(struct amdgpu_device *adev)
+{
+ amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_enable_watchdog_timer);
+}
+
+static void gfx_v9_4_3_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
+{
+ /* Header itself is a NOP packet */
+ if (num_nop == 1) {
+ amdgpu_ring_write(ring, ring->funcs->nop);
+ return;
+ }
+
+ /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
+ amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
+
+ /* Header is at index 0, followed by num_nops - 1 NOP packet's */
+ amdgpu_ring_insert_nop(ring, num_nop - 1);
+}
+
+static void gfx_v9_4_3_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k;
+ uint32_t xcc_id, xcc_offset, inst_offset;
+ uint32_t num_xcc, reg, num_inst;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ drm_printf(p, "Number of Instances:%d\n", num_xcc);
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ xcc_offset = xcc_id * reg_count;
+ drm_printf(p, "\nInstance id:%d\n", xcc_id);
+ for (i = 0; i < reg_count; i++)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_reg_list_9_4_3[i].reg_name,
+ adev->gfx.ip_dump_core[xcc_offset + i]);
+ }
+
+ /* print compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ num_inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3);
+ drm_printf(p, "\nnum_xcc: %d num_mec: %d num_pipe: %d num_queue: %d\n",
+ num_xcc,
+ adev->gfx.mec.num_mec,
+ adev->gfx.mec.num_pipe_per_mec,
+ adev->gfx.mec.num_queue_per_pipe);
+
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ xcc_offset = xcc_id * reg_count * num_inst;
+ inst_offset = 0;
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ drm_printf(p,
+ "\nxcc:%d mec:%d, pipe:%d, queue:%d\n",
+ xcc_id, i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i && gc_cp_reg_list_9_4_3[reg].reg_offset ==
+ regCP_MEC_ME1_HEADER_DUMP)
+ drm_printf(p,
+ "%-50s \t 0x%08x\n",
+ "regCP_MEC_ME2_HEADER_DUMP",
+ adev->gfx.ip_dump_compute_queues
+ [xcc_offset + inst_offset +
+ reg]);
+ else
+ drm_printf(p,
+ "%-50s \t 0x%08x\n",
+ gc_cp_reg_list_9_4_3[reg].reg_name,
+ adev->gfx.ip_dump_compute_queues
+ [xcc_offset + inst_offset +
+ reg]);
+ }
+ inst_offset += reg_count;
+ }
+ }
+ }
+ }
+}
+
+static void gfx_v9_4_3_ip_dump(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k;
+ uint32_t num_xcc, reg, num_inst;
+ uint32_t xcc_id, xcc_offset, inst_offset;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ xcc_offset = xcc_id * reg_count;
+ for (i = 0; i < reg_count; i++)
+ adev->gfx.ip_dump_core[xcc_offset + i] =
+ RREG32(SOC15_REG_ENTRY_OFFSET_INST(gc_reg_list_9_4_3[i],
+ GET_INST(GC, xcc_id)));
+ }
+
+ /* dump compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ num_inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe;
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3);
+ mutex_lock(&adev->srbm_mutex);
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ xcc_offset = xcc_id * reg_count * num_inst;
+ inst_offset = 0;
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ /* ME0 is for GFX so start from 1 for CP */
+ soc15_grbm_select(adev, 1 + i, j, k, 0,
+ GET_INST(GC, xcc_id));
+
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i && gc_cp_reg_list_9_4_3[reg].reg_offset ==
+ regCP_MEC_ME1_HEADER_DUMP)
+ adev->gfx.ip_dump_compute_queues
+ [xcc_offset +
+ inst_offset + reg] =
+ RREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id),
+ regCP_MEC_ME2_HEADER_DUMP));
+ else
+ adev->gfx.ip_dump_compute_queues
+ [xcc_offset +
+ inst_offset + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET_INST(
+ gc_cp_reg_list_9_4_3[reg],
+ GET_INST(GC, xcc_id)));
+ }
+ inst_offset += reg_count;
+ }
+ }
+ }
+ }
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void gfx_v9_4_3_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
+{
+ /* Emit the cleaner shader */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+ amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
+}
+
+static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = {
+ .name = "gfx_v9_4_3",
+ .early_init = gfx_v9_4_3_early_init,
+ .late_init = gfx_v9_4_3_late_init,
+ .sw_init = gfx_v9_4_3_sw_init,
+ .sw_fini = gfx_v9_4_3_sw_fini,
+ .hw_init = gfx_v9_4_3_hw_init,
+ .hw_fini = gfx_v9_4_3_hw_fini,
+ .suspend = gfx_v9_4_3_suspend,
+ .resume = gfx_v9_4_3_resume,
+ .is_idle = gfx_v9_4_3_is_idle,
+ .wait_for_idle = gfx_v9_4_3_wait_for_idle,
+ .soft_reset = gfx_v9_4_3_soft_reset,
+ .set_clockgating_state = gfx_v9_4_3_set_clockgating_state,
+ .set_powergating_state = gfx_v9_4_3_set_powergating_state,
+ .get_clockgating_state = gfx_v9_4_3_get_clockgating_state,
+ .dump_ip_state = gfx_v9_4_3_ip_dump,
+ .print_ip_state = gfx_v9_4_3_ip_print,
+};
+
+static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = {
+ .type = AMDGPU_RING_TYPE_COMPUTE,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v9_4_3_ring_get_rptr_compute,
+ .get_wptr = gfx_v9_4_3_ring_get_wptr_compute,
+ .set_wptr = gfx_v9_4_3_ring_set_wptr_compute,
+ .emit_frame_size =
+ 20 + /* gfx_v9_4_3_ring_emit_gds_switch */
+ 7 + /* gfx_v9_4_3_ring_emit_hdp_flush */
+ 5 + /* hdp invalidate */
+ 7 + /* gfx_v9_4_3_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v9_4_3_ring_emit_vm_flush */
+ 8 + 8 + 8 + /* gfx_v9_4_3_ring_emit_fence x3 for user fence, vm fence */
+ 7 + /* gfx_v9_4_3_emit_mem_sync */
+ 5 + /* gfx_v9_4_3_emit_wave_limit for updating regSPI_WCL_PIPE_PERCENT_GFX register */
+ 15 + /* for updating 3 regSPI_WCL_PIPE_PERCENT_CS registers */
+ 2, /* gfx_v9_4_3_ring_emit_cleaner_shader */
+ .emit_ib_size = 7, /* gfx_v9_4_3_ring_emit_ib_compute */
+ .emit_ib = gfx_v9_4_3_ring_emit_ib_compute,
+ .emit_fence = gfx_v9_4_3_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v9_4_3_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v9_4_3_ring_emit_vm_flush,
+ .emit_gds_switch = gfx_v9_4_3_ring_emit_gds_switch,
+ .emit_hdp_flush = gfx_v9_4_3_ring_emit_hdp_flush,
+ .test_ring = gfx_v9_4_3_ring_test_ring,
+ .test_ib = gfx_v9_4_3_ring_test_ib,
+ .insert_nop = gfx_v9_4_3_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_wreg = gfx_v9_4_3_ring_emit_wreg,
+ .emit_reg_wait = gfx_v9_4_3_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v9_4_3_ring_emit_reg_write_reg_wait,
+ .soft_recovery = gfx_v9_4_3_ring_soft_recovery,
+ .emit_mem_sync = gfx_v9_4_3_emit_mem_sync,
+ .emit_wave_limit = gfx_v9_4_3_emit_wave_limit,
+ .reset = gfx_v9_4_3_reset_kcq,
+ .emit_cleaner_shader = gfx_v9_4_3_ring_emit_cleaner_shader,
+ .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
+ .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
+};
+
+static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_kiq = {
+ .type = AMDGPU_RING_TYPE_KIQ,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v9_4_3_ring_get_rptr_compute,
+ .get_wptr = gfx_v9_4_3_ring_get_wptr_compute,
+ .set_wptr = gfx_v9_4_3_ring_set_wptr_compute,
+ .emit_frame_size =
+ 20 + /* gfx_v9_4_3_ring_emit_gds_switch */
+ 7 + /* gfx_v9_4_3_ring_emit_hdp_flush */
+ 5 + /* hdp invalidate */
+ 7 + /* gfx_v9_4_3_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v9_4_3_ring_emit_vm_flush */
+ 8 + 8 + 8, /* gfx_v9_4_3_ring_emit_fence_kiq x3 for user fence, vm fence */
+ .emit_ib_size = 7, /* gfx_v9_4_3_ring_emit_ib_compute */
+ .emit_fence = gfx_v9_4_3_ring_emit_fence_kiq,
+ .test_ring = gfx_v9_4_3_ring_test_ring,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_rreg = gfx_v9_4_3_ring_emit_rreg,
+ .emit_wreg = gfx_v9_4_3_ring_emit_wreg,
+ .emit_reg_wait = gfx_v9_4_3_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v9_4_3_ring_emit_reg_write_reg_wait,
+ .emit_hdp_flush = gfx_v9_4_3_ring_emit_hdp_flush,
+};
+
+static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, j, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ adev->gfx.kiq[i].ring.funcs = &gfx_v9_4_3_ring_funcs_kiq;
+
+ for (j = 0; j < adev->gfx.num_compute_rings; j++)
+ adev->gfx.compute_ring[j + i * adev->gfx.num_compute_rings].funcs
+ = &gfx_v9_4_3_ring_funcs_compute;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs gfx_v9_4_3_eop_irq_funcs = {
+ .set = gfx_v9_4_3_set_eop_interrupt_state,
+ .process = gfx_v9_4_3_eop_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_reg_irq_funcs = {
+ .set = gfx_v9_4_3_set_priv_reg_fault_state,
+ .process = gfx_v9_4_3_priv_reg_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v9_4_3_bad_op_irq_funcs = {
+ .set = gfx_v9_4_3_set_bad_op_fault_state,
+ .process = gfx_v9_4_3_bad_op_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_inst_irq_funcs = {
+ .set = gfx_v9_4_3_set_priv_inst_fault_state,
+ .process = gfx_v9_4_3_priv_inst_irq,
+};
+
+static void gfx_v9_4_3_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
+ adev->gfx.eop_irq.funcs = &gfx_v9_4_3_eop_irq_funcs;
+
+ adev->gfx.priv_reg_irq.num_types = 1;
+ adev->gfx.priv_reg_irq.funcs = &gfx_v9_4_3_priv_reg_irq_funcs;
+
+ adev->gfx.bad_op_irq.num_types = 1;
+ adev->gfx.bad_op_irq.funcs = &gfx_v9_4_3_bad_op_irq_funcs;
+
+ adev->gfx.priv_inst_irq.num_types = 1;
+ adev->gfx.priv_inst_irq.funcs = &gfx_v9_4_3_priv_inst_irq_funcs;
+}
+
+static void gfx_v9_4_3_set_rlc_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.rlc.funcs = &gfx_v9_4_3_rlc_funcs;
+}
+
+
+static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev)
+{
+ /* 9.4.3 variants removed all the GDS internal memory,
+ * only support GWS opcode in kernel, like barrier
+ * semaphore.etc */
+
+ /* init asic gds info */
+ adev->gds.gds_size = 0;
+ adev->gds.gds_compute_max_wave_id = 0;
+ adev->gds.gws_size = 64;
+ adev->gds.oa_size = 16;
+}
+
+static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
+ u32 bitmap, int xcc_id)
+{
+ u32 data;
+
+ if (!bitmap)
+ return;
+
+ data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+ data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
+static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev, int xcc_id)
+{
+ u32 data, mask;
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCC_GC_SHADER_ARRAY_CONFIG);
+ data |= RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG);
+
+ data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
+ data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+
+ mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
+
+ return (~data) & mask;
+}
+
+static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info)
+{
+ int i, j, k, prev_counter, counter, xcc_id, active_cu_number = 0;
+ u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0, tmp;
+ unsigned disable_masks[4 * 4];
+ bool is_symmetric_cus;
+
+ if (!adev || !cu_info)
+ return -EINVAL;
+
+ /*
+ * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
+ */
+ if (adev->gfx.config.max_shader_engines *
+ adev->gfx.config.max_sh_per_se > 16)
+ return -EINVAL;
+
+ amdgpu_gfx_parse_disable_cu(disable_masks,
+ adev->gfx.config.max_shader_engines,
+ adev->gfx.config.max_sh_per_se);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) {
+ is_symmetric_cus = true;
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ mask = 1;
+ ao_bitmap = 0;
+ counter = 0;
+ gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, xcc_id);
+ gfx_v9_4_3_set_user_cu_inactive_bitmap(
+ adev,
+ disable_masks[i * adev->gfx.config.max_sh_per_se + j],
+ xcc_id);
+ bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev, xcc_id);
+
+ cu_info->bitmap[xcc_id][i][j] = bitmap;
+
+ for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
+ if (bitmap & mask) {
+ if (counter < adev->gfx.config.max_cu_per_sh)
+ ao_bitmap |= mask;
+ counter++;
+ }
+ mask <<= 1;
+ }
+ active_cu_number += counter;
+ if (i < 2 && j < 2)
+ ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
+ cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
+ }
+ if (i && is_symmetric_cus && prev_counter != counter)
+ is_symmetric_cus = false;
+ prev_counter = counter;
+ }
+ if (is_symmetric_cus) {
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_DEBUG);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_DEBUG, CPC_HARVESTING_RELAUNCH_DISABLE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_DEBUG, CPC_HARVESTING_DISPATCH_DISABLE, 1);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_DEBUG, tmp);
+ }
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ xcc_id);
+ }
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ cu_info->number = active_cu_number;
+ cu_info->ao_cu_mask = ao_cu_mask;
+ cu_info->simd_per_cu = NUM_SIMD_PER_CU;
+
+ return 0;
+}
+
+const struct amdgpu_ip_block_version gfx_v9_4_3_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_GFX,
+ .major = 9,
+ .minor = 4,
+ .rev = 3,
+ .funcs = &gfx_v9_4_3_ip_funcs,
+};
+
+static int gfx_v9_4_3_xcp_resume(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ uint32_t tmp_mask;
+ int i, r;
+
+ /* TODO : Initialize golden regs */
+ /* gfx_v9_4_3_init_golden_registers(adev); */
+
+ tmp_mask = inst_mask;
+ for_each_inst(i, tmp_mask)
+ gfx_v9_4_3_xcc_constants_init(adev, i);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ tmp_mask = inst_mask;
+ for_each_inst(i, tmp_mask) {
+ r = gfx_v9_4_3_xcc_rlc_resume(adev, i);
+ if (r)
+ return r;
+ }
+ }
+
+ tmp_mask = inst_mask;
+ for_each_inst(i, tmp_mask) {
+ r = gfx_v9_4_3_xcc_cp_resume(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_xcp_suspend(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i;
+
+ for_each_inst(i, inst_mask)
+ gfx_v9_4_3_xcc_fini(adev, i);
+
+ return 0;
+}
+
+struct amdgpu_xcp_ip_funcs gfx_v9_4_3_xcp_funcs = {
+ .suspend = &gfx_v9_4_3_xcp_suspend,
+ .resume = &gfx_v9_4_3_xcp_resume
+};
+
+struct amdgpu_ras_block_hw_ops gfx_v9_4_3_ras_ops = {
+ .query_ras_error_count = &gfx_v9_4_3_query_ras_error_count,
+ .reset_ras_error_count = &gfx_v9_4_3_reset_ras_error_count,
+};
+
+static int gfx_v9_4_3_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__GFX,
+ &gfx_v9_4_3_aca_info,
+ NULL);
+ if (r)
+ goto late_fini;
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+
+ return r;
+}
+
+struct amdgpu_gfx_ras gfx_v9_4_3_ras = {
+ .ras_block = {
+ .hw_ops = &gfx_v9_4_3_ras_ops,
+ .ras_late_init = &gfx_v9_4_3_ras_late_init,
+ },
+ .enable_watchdog_timer = &gfx_v9_4_3_enable_watchdog_timer,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h
new file mode 100644
index 000000000000..42d67ee0e7ef
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V9_4_3_H__
+#define __GFX_V9_4_3_H__
+
+extern const struct amdgpu_ip_block_version gfx_v9_4_3_ip_block;
+
+extern struct amdgpu_xcp_ip_funcs gfx_v9_4_3_xcp_funcs;
+
+#endif /* __GFX_V9_4_3_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm
new file mode 100644
index 000000000000..d5325ef80ab0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader.
+//To turn this shader program on for complitaion change this to main and lower shader main to main_1
+
+// MI300 : Clear SGPRs, VGPRs and LDS
+// Uses two kernels launched separately:
+// 1. Clean VGPRs, LDS, and lower SGPRs
+// Launches one workgroup per CU, each workgroup with 4x wave64 per SIMD in the CU
+// Waves are "wave64" and have 128 VGPRs each, which uses all 512 VGPRs per SIMD
+// Waves in the workgroup share the 64KB of LDS
+// Each wave clears SGPRs 0 - 95. Because there are 4 waves/SIMD, this is physical SGPRs 0-383
+// Each wave clears 128 VGPRs, so all 512 in the SIMD
+// The first wave of the workgroup clears its 64KB of LDS
+// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup
+// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared.
+// 2. Clean remaining SGPRs
+// Launches a workgroup with 24 waves per workgroup, yielding 6 waves per SIMD in each CU
+// Waves are allocating 96 SGPRs
+// CP sets up SPI_RESOURCE_RESERVE_* registers to prevent these waves from allocating SGPRs 0-223.
+// As such, these 6 waves per SIMD are allocated physical SGPRs 224-799
+// Barriers do not work for >16 waves per workgroup, so we cannot start with S_BARRIER
+// Instead, the shader starts with an S_SETHALT 1. Once all waves are launched CP will send unhalt command
+// The shader then clears all SGPRs allocated to it, cleaning out physical SGPRs 224-799
+
+shader main
+ asic(MI300)
+ type(CS)
+ wave_size(64)
+// Note: original source code from SQ team
+
+// (theorhetical fastest = ~512clks vgpr + 1536 lds + ~128 sgpr = 2176 clks)
+
+ s_cmp_eq_u32 s0, 1 // Bit0 is set, sgpr0 is set then clear VGPRS and LDS as FW set COMPUTE_USER_DATA_3
+ s_cbranch_scc0 label_0023 // Clean VGPRs and LDS if sgpr0 of wave is set, scc = (s3 == 1)
+ S_BARRIER
+
+ s_movk_i32 m0, 0x0000
+ s_mov_b32 s2, 0x00000078 // Loop 128/8=16 times (loop unrolled for performance)
+ //
+ // CLEAR VGPRs
+ //
+ s_set_gpr_idx_on s2, 0x8 // enable Dest VGPR indexing
+label_0005:
+ v_mov_b32 v0, 0
+ v_mov_b32 v1, 0
+ v_mov_b32 v2, 0
+ v_mov_b32 v3, 0
+ v_mov_b32 v4, 0
+ v_mov_b32 v5, 0
+ v_mov_b32 v6, 0
+ v_mov_b32 v7, 0
+ s_sub_u32 s2, s2, 8
+ s_set_gpr_idx_idx s2
+ s_cbranch_scc0 label_0005
+ s_set_gpr_idx_off
+
+ //
+ //
+
+ s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
+ s_and_b32 s2, s2, s1 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
+ s_cbranch_scc0 label_clean_sgpr_1 // Clean LDS if its first wave of ThreadGroup/WorkGroup
+ // CLEAR LDS
+ //
+ s_mov_b32 exec_lo, 0xffffffff
+ s_mov_b32 exec_hi, 0xffffffff
+ v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63)
+ v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63)
+ v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte)
+ s_mov_b32 s2, 0x00000003f // 64 loop iteraions
+ s_mov_b32 m0, 0xffffffff
+ // Clear all of LDS space
+ // Each FirstWave of WorkGroup clears 64kbyte block
+
+label_001F:
+ ds_write2_b64 v1, v[2:3], v[2:3] offset1:32
+ ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96
+ v_add_co_u32 v1, vcc, 0x00000400, v1
+ s_sub_u32 s2, s2, 1
+ s_cbranch_scc0 label_001F
+ //
+ // CLEAR SGPRs
+ //
+label_clean_sgpr_1:
+ s_mov_b32 m0, 0x0000005c // Loop 96/4=24 times (loop unrolled for performance)
+ s_nop 0
+label_sgpr_loop:
+ s_movreld_b32 s0, 0
+ s_movreld_b32 s1, 0
+ s_movreld_b32 s2, 0
+ s_movreld_b32 s3, 0
+ s_sub_u32 m0, m0, 4
+ s_cbranch_scc0 label_sgpr_loop
+
+ //clear vcc, flat scratch
+ s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR
+ s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR
+ s_mov_b64 vcc, 0 //clear vcc
+ s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1
+ s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3
+ s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5
+ s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7
+ s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9
+ s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11
+ s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13
+ s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15
+s_endpgm
+
+label_0023:
+
+ s_sethalt 1
+
+ s_mov_b32 m0, 0x0000005c // Loop 96/4=24 times (loop unrolled for performance)
+ s_nop 0
+label_sgpr_loop1:
+
+ s_movreld_b32 s0, 0
+ s_movreld_b32 s1, 0
+ s_movreld_b32 s2, 0
+ s_movreld_b32 s3, 0
+ s_sub_u32 m0, m0, 4
+ s_cbranch_scc0 label_sgpr_loop1
+
+ //clear vcc, flat scratch
+ s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR
+ s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR
+ s_mov_b64 vcc, 0xee //clear vcc
+
+s_endpgm
+end
+
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h
new file mode 100644
index 000000000000..69aa567c6c1d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* Define the cleaner shader gfx_9_4_3 */
+static const u32 gfx_9_4_3_cleaner_shader_hex[] = {
+ 0xbf068100, 0xbf84003b,
+ 0xbf8a0000, 0xb07c0000,
+ 0xbe8200ff, 0x00000078,
+ 0xbf110802, 0x7e000280,
+ 0x7e020280, 0x7e040280,
+ 0x7e060280, 0x7e080280,
+ 0x7e0a0280, 0x7e0c0280,
+ 0x7e0e0280, 0x80828802,
+ 0xbe803202, 0xbf84fff5,
+ 0xbf9c0000, 0xbe8200ff,
+ 0x80000000, 0x86020102,
+ 0xbf840011, 0xbefe00c1,
+ 0xbeff00c1, 0xd28c0001,
+ 0x0001007f, 0xd28d0001,
+ 0x0002027e, 0x10020288,
+ 0xbe8200bf, 0xbefc00c1,
+ 0xd89c2000, 0x00020201,
+ 0xd89c6040, 0x00040401,
+ 0x320202ff, 0x00000400,
+ 0x80828102, 0xbf84fff8,
+ 0xbefc00ff, 0x0000005c,
+ 0xbf800000, 0xbe802c80,
+ 0xbe812c80, 0xbe822c80,
+ 0xbe832c80, 0x80fc847c,
+ 0xbf84fffa, 0xbee60080,
+ 0xbee70080, 0xbeea0180,
+ 0xbeec0180, 0xbeee0180,
+ 0xbef00180, 0xbef20180,
+ 0xbef40180, 0xbef60180,
+ 0xbef80180, 0xbefa0180,
+ 0xbf810000, 0xbf8d0001,
+ 0xbefc00ff, 0x0000005c,
+ 0xbf800000, 0xbe802c80,
+ 0xbe812c80, 0xbe822c80,
+ 0xbe832c80, 0x80fc847c,
+ 0xbf84fffa, 0xbee60080,
+ 0xbee70080, 0xbeea01ff,
+ 0x000000ee, 0xbf810000,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.c
new file mode 100644
index 000000000000..f9949fedfbb9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.c
@@ -0,0 +1,516 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "gfxhub_v11_5_0.h"
+
+#include "gc/gc_11_5_0_offset.h"
+#include "gc/gc_11_5_0_sh_mask.h"
+
+#include "navi10_enum.h"
+#include "soc15_common.h"
+
+#define regGCVM_L2_CNTL3_DEFAULT 0x80100007
+#define regGCVM_L2_CNTL4_DEFAULT 0x000000c1
+#define regGCVM_L2_CNTL5_DEFAULT 0x00003fe0
+
+
+static const char *gfxhub_client_ids[] = {
+ "CB/DB",
+ "Reserved",
+ "GE1",
+ "GE2",
+ "CPF",
+ "CPC",
+ "CPG",
+ "RLC",
+ "TCP",
+ "SQC (inst)",
+ "SQC (data)",
+ "SQG",
+ "Reserved",
+ "SDMA0",
+ "SDMA1",
+ "GCR",
+ "SDMA2",
+ "SDMA3",
+};
+
+static uint32_t gfxhub_v11_5_0_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+gfxhub_v11_5_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ u32 cid = REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, CID);
+
+ dev_err(adev->dev,
+ "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid],
+ cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, RW));
+}
+
+static u64 gfxhub_v11_5_0_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base = RREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE);
+
+ base &= GCMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 gfxhub_v11_5_0_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(GC, 0, regGCMC_VM_FB_OFFSET) << 24;
+}
+
+static void gfxhub_v11_5_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void gfxhub_v11_5_0_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ gfxhub_v11_5_0_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void gfxhub_v11_5_0_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+}
+
+static void gfxhub_v11_5_0_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void gfxhub_v11_5_0_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL2, tmp);
+
+ tmp = regGCVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, tmp);
+
+ tmp = regGCVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL4, tmp);
+
+ tmp = regGCVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL5, tmp);
+}
+
+static void gfxhub_v11_5_0_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL, tmp);
+}
+
+static void gfxhub_v11_5_0_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ 0);
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+
+}
+
+static void gfxhub_v11_5_0_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i * hub->ctx_distance);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void gfxhub_v11_5_0_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ unsigned i;
+
+ for (i = 0 ; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int gfxhub_v11_5_0_gart_enable(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev)) {
+ /*
+ * GCMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
+ * VF copy registers so vbios post doesn't program them, for
+ * SRIOV driver need to program them
+ */
+ WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE,
+ adev->gmc.vram_start >> 24);
+ WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_TOP,
+ adev->gmc.vram_end >> 24);
+ }
+
+ /* GART Enable. */
+ gfxhub_v11_5_0_init_gart_aperture_regs(adev);
+ gfxhub_v11_5_0_init_system_aperture_regs(adev);
+ gfxhub_v11_5_0_init_tlb_regs(adev);
+ gfxhub_v11_5_0_init_cache_regs(adev);
+
+ gfxhub_v11_5_0_enable_system_domain(adev);
+ gfxhub_v11_5_0_disable_identity_aperture(adev);
+ gfxhub_v11_5_0_setup_vmid_config(adev);
+ gfxhub_v11_5_0_program_invalidation(adev);
+
+ return 0;
+}
+
+static void gfxhub_v11_5_0_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, 0);
+}
+
+/**
+ * gfxhub_v11_5_0_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void gfxhub_v11_5_0_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+
+ /* NO halt CP when page fault */
+ tmp = RREG32_SOC15(GC, 0, regCP_DEBUG);
+ tmp = REG_SET_FIELD(tmp, CP_DEBUG, CPG_UTCL1_ERROR_HALT_DISABLE, 1);
+ WREG32_SOC15(GC, 0, regCP_DEBUG, tmp);
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs gfxhub_v11_5_0_vmhub_funcs = {
+ .print_l2_protection_fault_status = gfxhub_v11_5_0_print_l2_protection_fault_status,
+ .get_invalidate_req = gfxhub_v11_5_0_get_invalidate_req,
+};
+
+static void gfxhub_v11_5_0_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(GC, 0,
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(GC, 0,
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regGCVM_CONTEXT1_CNTL - regGCVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regGCVM_INVALIDATE_ENG1_REQ -
+ regGCVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vmhub_funcs = &gfxhub_v11_5_0_vmhub_funcs;
+}
+
+const struct amdgpu_gfxhub_funcs gfxhub_v11_5_0_funcs = {
+ .get_fb_location = gfxhub_v11_5_0_get_fb_location,
+ .get_mc_fb_offset = gfxhub_v11_5_0_get_mc_fb_offset,
+ .setup_vm_pt_regs = gfxhub_v11_5_0_setup_vm_pt_regs,
+ .gart_enable = gfxhub_v11_5_0_gart_enable,
+ .gart_disable = gfxhub_v11_5_0_gart_disable,
+ .set_fault_enable_default = gfxhub_v11_5_0_set_fault_enable_default,
+ .init = gfxhub_v11_5_0_init,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.h
new file mode 100644
index 000000000000..265ab631b3d0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFXHUB_V11_5_0_H__
+#define __GFXHUB_V11_5_0_H__
+
+extern const struct amdgpu_gfxhub_funcs gfxhub_v11_5_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.c
new file mode 100644
index 000000000000..7609b9cecae8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.c
@@ -0,0 +1,521 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "gfxhub_v12_0.h"
+
+#include "gc/gc_12_0_0_offset.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "soc24_enum.h"
+#include "soc15_common.h"
+
+#define regGCVM_L2_CNTL3_DEFAULT 0x80120007
+#define regGCVM_L2_CNTL4_DEFAULT 0x000000c1
+#define regGCVM_L2_CNTL5_DEFAULT 0x00003fe0
+#define regGRBM_GFX_INDEX_DEFAULT 0xe0000000
+
+static const char *gfxhub_client_ids[] = {
+ "CB",
+ "DB",
+ "GE1",
+ "GE2",
+ "CPF",
+ "CPC",
+ "CPG",
+ "RLC",
+ "TCP",
+ "SQC (inst)",
+ "SQC (data)",
+ "SQG/PC/SC",
+ "Reserved",
+ "SDMA0",
+ "SDMA1",
+ "GCR",
+ "Reserved",
+ "Reserved",
+ "WGS",
+ "DSM",
+ "PA"
+};
+
+static uint32_t gfxhub_v12_0_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+gfxhub_v12_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ u32 cid = REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS_LO32, CID);
+
+ dev_err(adev->dev,
+ "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid],
+ cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS_LO32, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS_LO32, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS_LO32, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS_LO32, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS_LO32, RW));
+}
+
+static u64 gfxhub_v12_0_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base = RREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE);
+
+ base &= GCMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 gfxhub_v12_0_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(GC, 0, regGCMC_VM_FB_OFFSET) << 24;
+}
+
+static void gfxhub_v12_0_setup_vm_pt_regs(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void gfxhub_v12_0_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ gfxhub_v12_0_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void gfxhub_v12_0_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+
+ /* Program the AGP BAR */
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = adev->mem_scratch.gpu_addr - adev->gmc.vram_start
+ + adev->vm_manager.vram_base_offset;
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+}
+
+
+static void gfxhub_v12_0_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void gfxhub_v12_0_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL2, tmp);
+
+ tmp = regGCVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, tmp);
+
+ tmp = regGCVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL4, tmp);
+
+ tmp = regGCVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL5, tmp);
+}
+
+static void gfxhub_v12_0_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL, tmp);
+}
+
+static void gfxhub_v12_0_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ 0);
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+
+}
+
+static void gfxhub_v12_0_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void gfxhub_v12_0_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ unsigned i;
+
+ for (i = 0 ; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int gfxhub_v12_0_gart_enable(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev)) {
+ /*
+ * GCMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
+ * VF copy registers so vbios post doesn't program them, for
+ * SRIOV driver need to program them
+ */
+ WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE,
+ adev->gmc.vram_start >> 24);
+ WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_TOP,
+ adev->gmc.vram_end >> 24);
+ }
+
+ /* GART Enable. */
+ gfxhub_v12_0_init_gart_aperture_regs(adev);
+ gfxhub_v12_0_init_system_aperture_regs(adev);
+ gfxhub_v12_0_init_tlb_regs(adev);
+ gfxhub_v12_0_init_cache_regs(adev);
+
+ gfxhub_v12_0_enable_system_domain(adev);
+ gfxhub_v12_0_disable_identity_aperture(adev);
+ gfxhub_v12_0_setup_vmid_config(adev);
+ gfxhub_v12_0_program_invalidation(adev);
+
+ return 0;
+}
+
+static void gfxhub_v12_0_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, 0);
+}
+
+/**
+ * gfxhub_v12_0_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void gfxhub_v12_0_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+
+ /* NO halt CP when page fault */
+ tmp = RREG32_SOC15(GC, 0, regCP_DEBUG);
+ tmp = REG_SET_FIELD(tmp, CP_DEBUG, CPG_UTCL1_ERROR_HALT_DISABLE, 1);
+ WREG32_SOC15(GC, 0, regCP_DEBUG, tmp);
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs gfxhub_v12_0_vmhub_funcs = {
+ .print_l2_protection_fault_status = gfxhub_v12_0_print_l2_protection_fault_status,
+ .get_invalidate_req = gfxhub_v12_0_get_invalidate_req,
+};
+
+static void gfxhub_v12_0_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(GC, 0,
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(GC, 0,
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS_LO32);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regGCVM_CONTEXT1_CNTL - regGCVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regGCVM_INVALIDATE_ENG1_REQ -
+ regGCVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vmhub_funcs = &gfxhub_v12_0_vmhub_funcs;
+}
+
+const struct amdgpu_gfxhub_funcs gfxhub_v12_0_funcs = {
+ .get_fb_location = gfxhub_v12_0_get_fb_location,
+ .get_mc_fb_offset = gfxhub_v12_0_get_mc_fb_offset,
+ .setup_vm_pt_regs = gfxhub_v12_0_setup_vm_pt_regs,
+ .gart_enable = gfxhub_v12_0_gart_enable,
+ .gart_disable = gfxhub_v12_0_gart_disable,
+ .set_fault_enable_default = gfxhub_v12_0_set_fault_enable_default,
+ .init = gfxhub_v12_0_init,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.h
new file mode 100644
index 000000000000..f1258265f802
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFXHUB_V12_0_H__
+#define __GFXHUB_V12_0_H__
+
+extern const struct amdgpu_gfxhub_funcs gfxhub_v12_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index ec4d5e15b766..a7bfc9f41d0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -40,7 +40,7 @@ static void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev,
uint32_t vmid,
uint64_t page_table_base)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
@@ -92,18 +92,20 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
{
uint64_t value;
- /* Program the AGP BAR */
- WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BASE, 0);
- WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
- WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
-
if (!amdgpu_sriov_vf(adev) || adev->asic_type <= CHIP_VEGA10) {
+ /* Program the AGP BAR */
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BASE, 0);
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
/* Program the system aperture low logical page number. */
WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
- if (adev->apu_flags & AMD_APU_IS_RAVEN2)
- /*
+ if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+ AMD_APU_IS_RENOIR |
+ AMD_APU_IS_GREEN_SARDINE))
+ /*
* Raven2 has a HW issue that it is unable to use the
* vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR.
* So here is the workaround that increase system
@@ -120,7 +122,7 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
/* Set default page address. */
- value = amdgpu_gmc_vram_mc2pa(adev, adev->vram_scratch.gpu_addr);
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
@@ -247,8 +249,8 @@ static void gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
- unsigned num_level, block_size;
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ unsigned int num_level, block_size;
uint32_t tmp;
int i;
@@ -260,7 +262,7 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
block_size -= 9;
for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i);
+ tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
num_level);
@@ -307,8 +309,8 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
- unsigned i;
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ unsigned int i;
for (i = 0 ; i < 18; ++i) {
WREG32_SOC15_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
@@ -338,7 +340,7 @@ static int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
static void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
u32 tmp;
u32 i;
@@ -375,6 +377,7 @@ static void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
bool value)
{
u32 tmp;
+
tmp = RREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
@@ -411,7 +414,7 @@ static void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
static void gfxhub_v1_0_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(GC, 0,
@@ -440,7 +443,6 @@ static void gfxhub_v1_0_init(struct amdgpu_device *adev)
mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
}
-
const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs = {
.get_mc_fb_offset = gfxhub_v1_0_get_mc_fb_offset,
.setup_vm_pt_regs = gfxhub_v1_0_setup_vm_pt_regs,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
new file mode 100644
index 000000000000..6c03bf9f1ae8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
@@ -0,0 +1,674 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_xcp.h"
+#include "gfxhub_v1_2.h"
+#include "gfxhub_v1_1.h"
+
+#include "gc/gc_9_4_3_offset.h"
+#include "gc/gc_9_4_3_sh_mask.h"
+#include "vega10_enum.h"
+
+#include "soc15_common.h"
+
+#define regVM_L2_CNTL3_DEFAULT 0x80100007
+#define regVM_L2_CNTL4_DEFAULT 0x000000c1
+
+static u64 gfxhub_v1_2_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(GC, GET_INST(GC, 0), regMC_VM_FB_OFFSET) << 24;
+}
+
+static void gfxhub_v1_2_xcc_setup_vm_pt_regs(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint64_t page_table_base,
+ uint32_t xcc_mask)
+{
+ struct amdgpu_vmhub *hub;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ hub = &adev->vmhub[AMDGPU_GFXHUB(i)];
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+ }
+}
+
+static void gfxhub_v1_2_setup_vm_pt_regs(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint64_t page_table_base)
+{
+ uint32_t xcc_mask;
+
+ xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
+ gfxhub_v1_2_xcc_setup_vm_pt_regs(adev, vmid, page_table_base, xcc_mask);
+}
+
+static void gfxhub_v1_2_xcc_init_gart_aperture_regs(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ uint64_t gart_start = amdgpu_virt_xgmi_migrate_enabled(adev) ?
+ adev->gmc.vram_start : adev->gmc.fb_start;
+ uint64_t pt_base;
+ int i;
+
+ if (adev->gmc.pdb0_bo)
+ pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo);
+ else
+ pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ gfxhub_v1_2_xcc_setup_vm_pt_regs(adev, 0, pt_base, xcc_mask);
+
+ /* If use GART for FB translation, vmid0 page table covers both
+ * vram and system memory (gart)
+ */
+ for_each_inst(i, xcc_mask) {
+ if (adev->gmc.pdb0_bo) {
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(gart_start >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(gart_start >> 44));
+
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+ } else {
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+ }
+ }
+}
+
+static void
+gfxhub_v1_2_xcc_init_system_aperture_regs(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ uint64_t value;
+ uint32_t tmp;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ /* Program the AGP BAR */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_AGP_BASE, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ if (!amdgpu_sriov_vf(adev) || adev->asic_type <= CHIP_VEGA10) {
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+
+ if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+ AMD_APU_IS_RENOIR |
+ AMD_APU_IS_GREEN_SARDINE))
+ /*
+ * Raven2 has a HW issue that it is unable to use the
+ * vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR.
+ * So here is the workaround that increase system
+ * aperture high address (add 1) to get rid of the VM
+ * fault and hardware hang.
+ */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i),
+ regMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max((adev->gmc.fb_end >> 18) + 0x1,
+ adev->gmc.agp_end >> 18));
+ else
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i),
+ regMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+ }
+
+ /* In the case squeezing vram into GART aperture, we don't use
+ * FB aperture and AGP aperture. Disable them.
+ */
+ if (adev->gmc.pdb0_bo && adev->gmc.xgmi.connected_to_cpu) {
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_TOP, 0);
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_BASE, 0x00FFFFFF);
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_AGP_TOP, 0);
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_AGP_BOT, 0xFFFFFF);
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x3FFFFFFF);
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0);
+ }
+ }
+}
+
+static void gfxhub_v1_2_xcc_init_tlb_regs(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ uint32_t tmp;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC);/* XXX for emulation. */
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_MX_L1_TLB_CNTL, tmp);
+ }
+}
+
+static void gfxhub_v1_2_xcc_init_cache_regs(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ uint32_t tmp;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regVM_L2_CNTL2, tmp);
+
+ tmp = regVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regVM_L2_CNTL3, tmp);
+
+ tmp = regVM_L2_CNTL4_DEFAULT;
+ /* For AMD APP APUs setup WC memory */
+ if (adev->gmc.xgmi.connected_to_cpu || adev->gmc.is_app_apu) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ }
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regVM_L2_CNTL4, tmp);
+ }
+}
+
+static void gfxhub_v1_2_xcc_enable_system_domain(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ uint32_t tmp;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH,
+ adev->gmc.vmid0_page_table_depth);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_BLOCK_SIZE,
+ adev->gmc.vmid0_page_table_block_size);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(GC, GET_INST(GC, i), regVM_CONTEXT0_CNTL, tmp);
+ }
+}
+
+static void
+gfxhub_v1_2_xcc_disable_identity_aperture(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0XFFFFFFFF);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ 0);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ 0);
+
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+ }
+}
+
+static inline bool
+gfxhub_v1_2_per_process_xnack_support(struct amdgpu_device *adev)
+{
+ /*
+ * TODO: Check if this function is really needed, so far only 9.4.3
+ * variants use GFXHUB 1.2
+ */
+ return !!adev->aid_mask;
+}
+
+static void gfxhub_v1_2_xcc_setup_vmid_config(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ struct amdgpu_vmhub *hub;
+ unsigned int num_level, block_size;
+ uint32_t tmp;
+ int i, j;
+
+ num_level = adev->vm_manager.num_level;
+ block_size = adev->vm_manager.block_size;
+ if (adev->gmc.translate_further)
+ num_level -= 1;
+ else
+ block_size -= 9;
+
+ for_each_inst(j, xcc_mask) {
+ hub = &adev->vmhub[AMDGPU_GFXHUB(j)];
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ num_level);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ block_size);
+ /* Send no-retry XNACK on fault to suppress VM fault storm.
+ * On 9.4.3 variants, XNACK can be enabled in
+ * the SQ per-process.
+ * Retry faults need to be enabled for that to work.
+ */
+ tmp = REG_SET_FIELD(
+ tmp, VM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !adev->gmc.noretry ||
+ gfxhub_v1_2_per_process_xnack_support(
+ adev));
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
+ regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
+ regVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
+ regVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
+ regVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+ }
+}
+
+static void gfxhub_v1_2_xcc_program_invalidation(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ struct amdgpu_vmhub *hub;
+ unsigned int i, j;
+
+ for_each_inst(j, xcc_mask) {
+ hub = &adev->vmhub[AMDGPU_GFXHUB(j)];
+
+ for (i = 0 ; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+ }
+}
+
+static int gfxhub_v1_2_xcc_gart_enable(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ /* GART Enable. */
+ gfxhub_v1_2_xcc_init_gart_aperture_regs(adev, xcc_mask);
+ gfxhub_v1_2_xcc_init_system_aperture_regs(adev, xcc_mask);
+ gfxhub_v1_2_xcc_init_tlb_regs(adev, xcc_mask);
+ if (!amdgpu_sriov_vf(adev))
+ gfxhub_v1_2_xcc_init_cache_regs(adev, xcc_mask);
+
+ gfxhub_v1_2_xcc_enable_system_domain(adev, xcc_mask);
+ if (!amdgpu_sriov_vf(adev))
+ gfxhub_v1_2_xcc_disable_identity_aperture(adev, xcc_mask);
+ gfxhub_v1_2_xcc_setup_vmid_config(adev, xcc_mask);
+ gfxhub_v1_2_xcc_program_invalidation(adev, xcc_mask);
+
+ return 0;
+}
+
+static int gfxhub_v1_2_gart_enable(struct amdgpu_device *adev)
+{
+ uint32_t xcc_mask;
+
+ xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
+ return gfxhub_v1_2_xcc_gart_enable(adev, xcc_mask);
+}
+
+static void gfxhub_v1_2_xcc_gart_disable(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ struct amdgpu_vmhub *hub;
+ u32 tmp;
+ u32 i, j;
+
+ for_each_inst(j, xcc_mask) {
+ hub = &adev->vmhub[AMDGPU_GFXHUB(j)];
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, j), regMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp,
+ MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL,
+ 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, j), regMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ if (!amdgpu_sriov_vf(adev)) {
+ tmp = RREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL, tmp);
+ WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL3, 0);
+ }
+ }
+}
+
+static void gfxhub_v1_2_gart_disable(struct amdgpu_device *adev)
+{
+ uint32_t xcc_mask;
+
+ xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
+ gfxhub_v1_2_xcc_gart_disable(adev, xcc_mask);
+}
+
+static void gfxhub_v1_2_xcc_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value,
+ uint32_t xcc_mask)
+{
+ u32 tmp;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp,
+ VM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL, tmp);
+ }
+}
+
+/**
+ * gfxhub_v1_2_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void gfxhub_v1_2_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ uint32_t xcc_mask;
+
+ xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
+ gfxhub_v1_2_xcc_set_fault_enable_default(adev, value, xcc_mask);
+}
+
+static void gfxhub_v1_2_xcc_init(struct amdgpu_device *adev, uint32_t xcc_mask)
+{
+ struct amdgpu_vmhub *hub;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ hub = &adev->vmhub[AMDGPU_GFXHUB(i)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i),
+ regVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regVM_CONTEXT1_CNTL -
+ regVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance =
+ regVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regVM_INVALIDATE_ENG1_REQ -
+ regVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance =
+ regVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+ }
+}
+
+static void gfxhub_v1_2_init(struct amdgpu_device *adev)
+{
+ uint32_t xcc_mask;
+
+ xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
+ gfxhub_v1_2_xcc_init(adev, xcc_mask);
+}
+
+static int gfxhub_v1_2_get_xgmi_info(struct amdgpu_device *adev)
+{
+ u32 max_num_physical_nodes;
+ u32 max_physical_node_id;
+ u32 xgmi_lfb_cntl;
+ u32 max_region;
+ u64 seg_size;
+
+ xgmi_lfb_cntl = RREG32_SOC15(GC, GET_INST(GC, 0), regMC_VM_XGMI_LFB_CNTL);
+ seg_size = REG_GET_FIELD(
+ RREG32_SOC15(GC, GET_INST(GC, 0), regMC_VM_XGMI_LFB_SIZE),
+ MC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24;
+ max_region =
+ REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_MAX_REGION);
+
+
+
+ max_num_physical_nodes = 8;
+ max_physical_node_id = 7;
+
+ /* PF_MAX_REGION=0 means xgmi is disabled */
+ if (max_region || adev->gmc.xgmi.connected_to_cpu) {
+ adev->gmc.xgmi.num_physical_nodes = max_region + 1;
+
+ if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes)
+ return -EINVAL;
+
+ adev->gmc.xgmi.physical_node_id =
+ REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL,
+ PF_LFB_REGION);
+
+ if (adev->gmc.xgmi.physical_node_id > max_physical_node_id)
+ return -EINVAL;
+
+ adev->gmc.xgmi.node_segment_size = seg_size;
+ }
+
+ return 0;
+}
+
+const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = {
+ .get_mc_fb_offset = gfxhub_v1_2_get_mc_fb_offset,
+ .setup_vm_pt_regs = gfxhub_v1_2_setup_vm_pt_regs,
+ .gart_enable = gfxhub_v1_2_gart_enable,
+ .gart_disable = gfxhub_v1_2_gart_disable,
+ .set_fault_enable_default = gfxhub_v1_2_set_fault_enable_default,
+ .init = gfxhub_v1_2_init,
+ .get_xgmi_info = gfxhub_v1_2_get_xgmi_info,
+};
+
+static int gfxhub_v1_2_xcp_resume(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool value;
+
+ if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
+ value = false;
+ else
+ value = true;
+
+ gfxhub_v1_2_xcc_set_fault_enable_default(adev, value, inst_mask);
+
+ if (!amdgpu_sriov_vf(adev))
+ return gfxhub_v1_2_xcc_gart_enable(adev, inst_mask);
+
+ return 0;
+}
+
+static int gfxhub_v1_2_xcp_suspend(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!amdgpu_sriov_vf(adev))
+ gfxhub_v1_2_xcc_gart_disable(adev, inst_mask);
+
+ return 0;
+}
+
+struct amdgpu_xcp_ip_funcs gfxhub_v1_2_xcp_funcs = {
+ .suspend = &gfxhub_v1_2_xcp_suspend,
+ .resume = &gfxhub_v1_2_xcp_resume
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.h
new file mode 100644
index 000000000000..997e9f90c990
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFXHUB_V1_2_H__
+#define __GFXHUB_V1_2_H__
+
+extern const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs;
+
+extern struct amdgpu_xcp_ip_funcs gfxhub_v1_2_xcp_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
index 6e0ace2fbfab..793faf62cb07 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
@@ -31,7 +31,7 @@
#include "soc15_common.h"
-static const char *gfxhub_client_ids[] = {
+static const char * const gfxhub_client_ids[] = {
"CB/DB",
"Reserved",
"GE1",
@@ -120,7 +120,7 @@ static u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev)
static void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
@@ -165,7 +165,7 @@ static void gfxhub_v2_0_init_system_aperture_regs(struct amdgpu_device *adev)
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
/* Set default page address. */
- value = amdgpu_gmc_vram_mc2pa(adev, adev->vram_scratch.gpu_addr);
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
@@ -282,12 +282,12 @@ static void gfxhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev)
static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
int i;
uint32_t tmp;
for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i);
+ tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
@@ -325,12 +325,14 @@ static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev)
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
+
+ hub->vm_cntx_cntl = tmp;
}
static void gfxhub_v2_0_program_invalidation(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
- unsigned i;
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ unsigned int i;
for (i = 0 ; i < 18; ++i) {
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
@@ -358,7 +360,7 @@ static int gfxhub_v2_0_gart_enable(struct amdgpu_device *adev)
static void gfxhub_v2_0_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
u32 tmp;
u32 i;
@@ -391,6 +393,7 @@ static void gfxhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev,
bool value)
{
u32 tmp;
+
tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
@@ -431,7 +434,7 @@ static const struct amdgpu_vmhub_funcs gfxhub_v2_0_vmhub_funcs = {
static void gfxhub_v2_0_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(GC, 0,
@@ -468,6 +471,9 @@ static void gfxhub_v2_0_init(struct amdgpu_device *adev)
GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+ /* TODO: This is only needed on some Navi 1x revisions */
+ hub->sdma_invalidation_workaround = true;
+
hub->vmhub_funcs = &gfxhub_v2_0_vmhub_funcs;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
index b4eddf6e98a6..deb95fab02df 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
@@ -34,7 +34,7 @@
#define mmGCUTCL2_HARVEST_BYPASS_GROUPS_YELLOW_CARP 0x16f8
#define mmGCUTCL2_HARVEST_BYPASS_GROUPS_YELLOW_CARP_BASE_IDX 0
-static const char *gfxhub_client_ids[] = {
+static const char * const gfxhub_client_ids[] = {
"CB/DB",
"Reserved",
"GE1",
@@ -123,7 +123,7 @@ static u64 gfxhub_v2_1_get_mc_fb_offset(struct amdgpu_device *adev)
static void gfxhub_v2_1_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
@@ -155,6 +155,9 @@ static void gfxhub_v2_1_init_system_aperture_regs(struct amdgpu_device *adev)
{
uint64_t value;
+ if (amdgpu_sriov_vf(adev))
+ return;
+
/* Program the AGP BAR */
WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BASE, 0);
WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
@@ -167,7 +170,7 @@ static void gfxhub_v2_1_init_system_aperture_regs(struct amdgpu_device *adev)
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
/* Set default page address. */
- value = amdgpu_gmc_vram_mc2pa(adev, adev->vram_scratch.gpu_addr);
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
@@ -291,12 +294,12 @@ static void gfxhub_v2_1_disable_identity_aperture(struct amdgpu_device *adev)
static void gfxhub_v2_1_setup_vmid_config(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
int i;
uint32_t tmp;
for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i);
+ tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
@@ -334,12 +337,14 @@ static void gfxhub_v2_1_setup_vmid_config(struct amdgpu_device *adev)
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
+
+ hub->vm_cntx_cntl = tmp;
}
static void gfxhub_v2_1_program_invalidation(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
- unsigned i;
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ unsigned int i;
for (i = 0 ; i < 18; ++i) {
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
@@ -379,7 +384,7 @@ static int gfxhub_v2_1_gart_enable(struct amdgpu_device *adev)
static void gfxhub_v2_1_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
u32 tmp;
u32 i;
@@ -395,6 +400,9 @@ static void gfxhub_v2_1_gart_disable(struct amdgpu_device *adev)
ENABLE_ADVANCED_DRIVER_MODEL, 0);
WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, tmp);
+ if (amdgpu_sriov_vf(adev))
+ return;
+
/* Setup L2 cache */
WREG32_FIELD15(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0);
WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, 0);
@@ -457,7 +465,7 @@ static const struct amdgpu_vmhub_funcs gfxhub_v2_1_vmhub_funcs = {
static void gfxhub_v2_1_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(GC, 0,
@@ -497,42 +505,6 @@ static void gfxhub_v2_1_init(struct amdgpu_device *adev)
hub->vmhub_funcs = &gfxhub_v2_1_vmhub_funcs;
}
-static int gfxhub_v2_1_get_xgmi_info(struct amdgpu_device *adev)
-{
- u32 xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmGCMC_VM_XGMI_LFB_CNTL);
- u32 max_region =
- REG_GET_FIELD(xgmi_lfb_cntl, GCMC_VM_XGMI_LFB_CNTL, PF_MAX_REGION);
- u32 max_num_physical_nodes = 0;
- u32 max_physical_node_id = 0;
-
- switch (adev->ip_versions[XGMI_HWIP][0]) {
- case IP_VERSION(4, 8, 0):
- max_num_physical_nodes = 4;
- max_physical_node_id = 3;
- break;
- default:
- return -EINVAL;
- }
-
- /* PF_MAX_REGION=0 means xgmi is disabled */
- if (max_region) {
- adev->gmc.xgmi.num_physical_nodes = max_region + 1;
- if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes)
- return -EINVAL;
-
- adev->gmc.xgmi.physical_node_id =
- REG_GET_FIELD(xgmi_lfb_cntl, GCMC_VM_XGMI_LFB_CNTL, PF_LFB_REGION);
- if (adev->gmc.xgmi.physical_node_id > max_physical_node_id)
- return -EINVAL;
-
- adev->gmc.xgmi.node_segment_size = REG_GET_FIELD(
- RREG32_SOC15(GC, 0, mmGCMC_VM_XGMI_LFB_SIZE),
- GCMC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24;
- }
-
- return 0;
-}
-
static void gfxhub_v2_1_utcl2_harvest(struct amdgpu_device *adev)
{
int i;
@@ -543,7 +515,9 @@ static void gfxhub_v2_1_utcl2_harvest(struct amdgpu_device *adev)
adev->gfx.config.max_sh_per_se *
adev->gfx.config.max_shader_engines);
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 3)) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 3):
/* Get SA disabled bitmap from eFuse setting */
efuse_setting = RREG32_SOC15(GC, 0, mmCC_GC_SA_UNIT_DISABLE);
efuse_setting &= CC_GC_SA_UNIT_DISABLE__SA_DISABLE_MASK;
@@ -566,9 +540,118 @@ static void gfxhub_v2_1_utcl2_harvest(struct amdgpu_device *adev)
disabled_sa = tmp;
WREG32_SOC15(GC, 0, mmGCUTCL2_HARVEST_BYPASS_GROUPS_YELLOW_CARP, disabled_sa);
+ break;
+ default:
+ break;
}
}
+static void gfxhub_v2_1_save_regs(struct amdgpu_device *adev)
+{
+ int i;
+
+ adev->gmc.VM_L2_CNTL = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL);
+ adev->gmc.VM_L2_CNTL2 = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2);
+ adev->gmc.VM_DUMMY_PAGE_FAULT_CNTL = RREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_CNTL);
+ adev->gmc.VM_DUMMY_PAGE_FAULT_ADDR_LO32 = RREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_ADDR_LO32);
+ adev->gmc.VM_DUMMY_PAGE_FAULT_ADDR_HI32 = RREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_ADDR_HI32);
+ adev->gmc.VM_L2_PROTECTION_FAULT_CNTL = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL);
+ adev->gmc.VM_L2_PROTECTION_FAULT_CNTL2 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL2);
+ adev->gmc.VM_L2_PROTECTION_FAULT_MM_CNTL3 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_MM_CNTL3);
+ adev->gmc.VM_L2_PROTECTION_FAULT_MM_CNTL4 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_MM_CNTL4);
+ adev->gmc.VM_L2_PROTECTION_FAULT_ADDR_LO32 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_ADDR_LO32);
+ adev->gmc.VM_L2_PROTECTION_FAULT_ADDR_HI32 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_ADDR_HI32);
+ adev->gmc.VM_DEBUG = RREG32_SOC15(GC, 0, mmGCVM_DEBUG);
+ adev->gmc.VM_L2_MM_GROUP_RT_CLASSES = RREG32_SOC15(GC, 0, mmGCVM_L2_MM_GROUP_RT_CLASSES);
+ adev->gmc.VM_L2_BANK_SELECT_RESERVED_CID = RREG32_SOC15(GC, 0, mmGCVM_L2_BANK_SELECT_RESERVED_CID);
+ adev->gmc.VM_L2_BANK_SELECT_RESERVED_CID2 = RREG32_SOC15(GC, 0, mmGCVM_L2_BANK_SELECT_RESERVED_CID2);
+ adev->gmc.VM_L2_CACHE_PARITY_CNTL = RREG32_SOC15(GC, 0, mmGCVM_L2_CACHE_PARITY_CNTL);
+ adev->gmc.VM_L2_IH_LOG_CNTL = RREG32_SOC15(GC, 0, mmGCVM_L2_IH_LOG_CNTL);
+
+ for (i = 0; i <= 15; i++) {
+ adev->gmc.VM_CONTEXT_CNTL[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, i);
+ adev->gmc.VM_CONTEXT_PAGE_TABLE_BASE_ADDR_LO32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, i * 2);
+ adev->gmc.VM_CONTEXT_PAGE_TABLE_BASE_ADDR_HI32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, i * 2);
+ adev->gmc.VM_CONTEXT_PAGE_TABLE_START_ADDR_LO32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, i * 2);
+ adev->gmc.VM_CONTEXT_PAGE_TABLE_START_ADDR_HI32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, i * 2);
+ adev->gmc.VM_CONTEXT_PAGE_TABLE_END_ADDR_LO32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, i * 2);
+ adev->gmc.VM_CONTEXT_PAGE_TABLE_END_ADDR_HI32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, i * 2);
+ }
+
+ adev->gmc.MC_VM_MX_L1_TLB_CNTL = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL);
+}
+
+static void gfxhub_v2_1_restore_regs(struct amdgpu_device *adev)
+{
+ int i;
+
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL, adev->gmc.VM_L2_CNTL);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2, adev->gmc.VM_L2_CNTL2);
+ WREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_CNTL, adev->gmc.VM_DUMMY_PAGE_FAULT_CNTL);
+ WREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_ADDR_LO32, adev->gmc.VM_DUMMY_PAGE_FAULT_ADDR_LO32);
+ WREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_ADDR_HI32, adev->gmc.VM_DUMMY_PAGE_FAULT_ADDR_HI32);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL, adev->gmc.VM_L2_PROTECTION_FAULT_CNTL);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL2, adev->gmc.VM_L2_PROTECTION_FAULT_CNTL2);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_MM_CNTL3, adev->gmc.VM_L2_PROTECTION_FAULT_MM_CNTL3);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_MM_CNTL4, adev->gmc.VM_L2_PROTECTION_FAULT_MM_CNTL4);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_ADDR_LO32, adev->gmc.VM_L2_PROTECTION_FAULT_ADDR_LO32);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_ADDR_HI32, adev->gmc.VM_L2_PROTECTION_FAULT_ADDR_HI32);
+ WREG32_SOC15(GC, 0, mmGCVM_DEBUG, adev->gmc.VM_DEBUG);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_MM_GROUP_RT_CLASSES, adev->gmc.VM_L2_MM_GROUP_RT_CLASSES);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_BANK_SELECT_RESERVED_CID, adev->gmc.VM_L2_BANK_SELECT_RESERVED_CID);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_BANK_SELECT_RESERVED_CID2, adev->gmc.VM_L2_BANK_SELECT_RESERVED_CID2);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CACHE_PARITY_CNTL, adev->gmc.VM_L2_CACHE_PARITY_CNTL);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_IH_LOG_CNTL, adev->gmc.VM_L2_IH_LOG_CNTL);
+
+ for (i = 0; i <= 15; i++) {
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, i, adev->gmc.VM_CONTEXT_CNTL[i]);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_BASE_ADDR_LO32[i]);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_BASE_ADDR_HI32[i]);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_START_ADDR_LO32[i]);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_START_ADDR_HI32[i]);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_END_ADDR_LO32[i]);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_END_ADDR_HI32[i]);
+ }
+
+ WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_BASE, adev->gmc.vram_start >> 24);
+ WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_TOP, adev->gmc.vram_end >> 24);
+ WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, adev->gmc.MC_VM_MX_L1_TLB_CNTL);
+}
+
+static void gfxhub_v2_1_halt(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ int i;
+ uint32_t tmp;
+ int time = 1000;
+
+ gfxhub_v2_1_set_fault_enable_default(adev, false);
+
+ for (i = 0; i <= 14; i++) {
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, ~0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, ~0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ 0);
+ }
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
+ while ((tmp & (GRBM_STATUS2__EA_BUSY_MASK |
+ GRBM_STATUS2__EA_LINK_BUSY_MASK)) != 0 &&
+ time) {
+ udelay(100);
+ time--;
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
+ }
+
+ if (!time)
+ DRM_WARN("failed to wait for GRBM(EA) idle\n");
+}
+
const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = {
.get_fb_location = gfxhub_v2_1_get_fb_location,
.get_mc_fb_offset = gfxhub_v2_1_get_mc_fb_offset,
@@ -577,6 +660,8 @@ const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = {
.gart_disable = gfxhub_v2_1_gart_disable,
.set_fault_enable_default = gfxhub_v2_1_set_fault_enable_default,
.init = gfxhub_v2_1_init,
- .get_xgmi_info = gfxhub_v2_1_get_xgmi_info,
.utcl2_harvest = gfxhub_v2_1_utcl2_harvest,
+ .mode2_save_regs = gfxhub_v2_1_save_regs,
+ .mode2_restore_regs = gfxhub_v2_1_restore_regs,
+ .halt = gfxhub_v2_1_halt,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c
new file mode 100644
index 000000000000..abe30c8bd2ba
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c
@@ -0,0 +1,513 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "gfxhub_v3_0.h"
+
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "gc/gc_11_0_0_default.h"
+#include "navi10_enum.h"
+#include "soc15_common.h"
+
+static const char * const gfxhub_client_ids[] = {
+ "CB/DB",
+ "Reserved",
+ "GE1",
+ "GE2",
+ "CPF",
+ "CPC",
+ "CPG",
+ "RLC",
+ "TCP",
+ "SQC (inst)",
+ "SQC (data)",
+ "SQG",
+ "Reserved",
+ "SDMA0",
+ "SDMA1",
+ "GCR",
+ "SDMA2",
+ "SDMA3",
+};
+
+static uint32_t gfxhub_v3_0_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+gfxhub_v3_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ u32 cid = REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, CID);
+
+ dev_err(adev->dev,
+ "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid],
+ cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, RW));
+}
+
+static u64 gfxhub_v3_0_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base = RREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE);
+
+ base &= GCMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 gfxhub_v3_0_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(GC, 0, regGCMC_VM_FB_OFFSET) << 24;
+}
+
+static void gfxhub_v3_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void gfxhub_v3_0_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ gfxhub_v3_0_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void gfxhub_v3_0_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+
+ /* Program the AGP BAR */
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+}
+
+
+static void gfxhub_v3_0_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void gfxhub_v3_0_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL2, tmp);
+
+ tmp = regGCVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, tmp);
+
+ tmp = regGCVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL4, tmp);
+
+ tmp = regGCVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL5, tmp);
+}
+
+static void gfxhub_v3_0_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL, tmp);
+}
+
+static void gfxhub_v3_0_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ 0);
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+
+}
+
+static void gfxhub_v3_0_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i * hub->ctx_distance);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void gfxhub_v3_0_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ unsigned int i;
+
+ for (i = 0 ; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int gfxhub_v3_0_gart_enable(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev)) {
+ /*
+ * GCMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
+ * VF copy registers so vbios post doesn't program them, for
+ * SRIOV driver need to program them
+ */
+ WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE,
+ adev->gmc.vram_start >> 24);
+ WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_TOP,
+ adev->gmc.vram_end >> 24);
+ }
+
+ /* GART Enable. */
+ gfxhub_v3_0_init_gart_aperture_regs(adev);
+ gfxhub_v3_0_init_system_aperture_regs(adev);
+ gfxhub_v3_0_init_tlb_regs(adev);
+ gfxhub_v3_0_init_cache_regs(adev);
+
+ gfxhub_v3_0_enable_system_domain(adev);
+ gfxhub_v3_0_disable_identity_aperture(adev);
+ gfxhub_v3_0_setup_vmid_config(adev);
+ gfxhub_v3_0_program_invalidation(adev);
+
+ return 0;
+}
+
+static void gfxhub_v3_0_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, 0);
+}
+
+/**
+ * gfxhub_v3_0_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void gfxhub_v3_0_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+
+ /* NO halt CP when page fault */
+ tmp = RREG32_SOC15(GC, 0, regCP_DEBUG);
+ tmp = REG_SET_FIELD(tmp, CP_DEBUG, CPG_UTCL1_ERROR_HALT_DISABLE, 1);
+ WREG32_SOC15(GC, 0, regCP_DEBUG, tmp);
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs gfxhub_v3_0_vmhub_funcs = {
+ .print_l2_protection_fault_status = gfxhub_v3_0_print_l2_protection_fault_status,
+ .get_invalidate_req = gfxhub_v3_0_get_invalidate_req,
+};
+
+static void gfxhub_v3_0_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(GC, 0,
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(GC, 0,
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regGCVM_CONTEXT1_CNTL - regGCVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regGCVM_INVALIDATE_ENG1_REQ -
+ regGCVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vmhub_funcs = &gfxhub_v3_0_vmhub_funcs;
+}
+
+const struct amdgpu_gfxhub_funcs gfxhub_v3_0_funcs = {
+ .get_fb_location = gfxhub_v3_0_get_fb_location,
+ .get_mc_fb_offset = gfxhub_v3_0_get_mc_fb_offset,
+ .setup_vm_pt_regs = gfxhub_v3_0_setup_vm_pt_regs,
+ .gart_enable = gfxhub_v3_0_gart_enable,
+ .gart_disable = gfxhub_v3_0_gart_disable,
+ .set_fault_enable_default = gfxhub_v3_0_set_fault_enable_default,
+ .init = gfxhub_v3_0_init,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.h
new file mode 100644
index 000000000000..ea345e4e072a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFXHUB_V3_0_H__
+#define __GFXHUB_V3_0_H__
+
+extern const struct amdgpu_gfxhub_funcs gfxhub_v3_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c
new file mode 100644
index 000000000000..b3ef6e71811f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c
@@ -0,0 +1,501 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "gfxhub_v3_0_3.h"
+
+#include "gc/gc_11_0_3_offset.h"
+#include "gc/gc_11_0_3_sh_mask.h"
+#include "navi10_enum.h"
+#include "soc15_common.h"
+
+#define regGCVM_L2_CNTL3_DEFAULT 0x80100007
+#define regGCVM_L2_CNTL4_DEFAULT 0x000000c1
+#define regGCVM_L2_CNTL5_DEFAULT 0x00003fe0
+
+static const char * const gfxhub_client_ids[] = {
+ "CB/DB",
+ "Reserved",
+ "GE1",
+ "GE2",
+ "CPF",
+ "CPC",
+ "CPG",
+ "RLC",
+ "TCP",
+ "SQC (inst)",
+ "SQC (data)",
+ "SQG",
+ "Reserved",
+ "SDMA0",
+ "SDMA1",
+ "GCR",
+ "SDMA2",
+ "SDMA3",
+};
+
+static uint32_t gfxhub_v3_0_3_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+gfxhub_v3_0_3_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ u32 cid = REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, CID);
+
+ dev_err(adev->dev,
+ "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid],
+ cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, RW));
+}
+
+static u64 gfxhub_v3_0_3_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base = RREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE);
+
+ base &= GCMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 gfxhub_v3_0_3_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(GC, 0, regGCMC_VM_FB_OFFSET) << 24;
+}
+
+static void gfxhub_v3_0_3_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void gfxhub_v3_0_3_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ gfxhub_v3_0_3_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void gfxhub_v3_0_3_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Disable AGP. */
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+}
+
+
+static void gfxhub_v3_0_3_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void gfxhub_v3_0_3_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL2, tmp);
+
+ tmp = regGCVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, tmp);
+
+ tmp = regGCVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL4, tmp);
+
+ tmp = regGCVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL5, tmp);
+}
+
+static void gfxhub_v3_0_3_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL, tmp);
+}
+
+static void gfxhub_v3_0_3_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ 0);
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+
+}
+
+static void gfxhub_v3_0_3_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i * hub->ctx_distance);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void gfxhub_v3_0_3_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ unsigned int i;
+
+ for (i = 0 ; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int gfxhub_v3_0_3_gart_enable(struct amdgpu_device *adev)
+{
+ /* GART Enable. */
+ gfxhub_v3_0_3_init_gart_aperture_regs(adev);
+ gfxhub_v3_0_3_init_system_aperture_regs(adev);
+ gfxhub_v3_0_3_init_tlb_regs(adev);
+ gfxhub_v3_0_3_init_cache_regs(adev);
+
+ gfxhub_v3_0_3_enable_system_domain(adev);
+ gfxhub_v3_0_3_disable_identity_aperture(adev);
+ gfxhub_v3_0_3_setup_vmid_config(adev);
+ gfxhub_v3_0_3_program_invalidation(adev);
+
+ return 0;
+}
+
+static void gfxhub_v3_0_3_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, 0);
+}
+
+/**
+ * gfxhub_v3_0_3_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void gfxhub_v3_0_3_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs gfxhub_v3_0_3_vmhub_funcs = {
+ .print_l2_protection_fault_status = gfxhub_v3_0_3_print_l2_protection_fault_status,
+ .get_invalidate_req = gfxhub_v3_0_3_get_invalidate_req,
+};
+
+static void gfxhub_v3_0_3_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(GC, 0,
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(GC, 0,
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regGCVM_CONTEXT1_CNTL - regGCVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regGCVM_INVALIDATE_ENG1_REQ -
+ regGCVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vmhub_funcs = &gfxhub_v3_0_3_vmhub_funcs;
+}
+
+const struct amdgpu_gfxhub_funcs gfxhub_v3_0_3_funcs = {
+ .get_fb_location = gfxhub_v3_0_3_get_fb_location,
+ .get_mc_fb_offset = gfxhub_v3_0_3_get_mc_fb_offset,
+ .setup_vm_pt_regs = gfxhub_v3_0_3_setup_vm_pt_regs,
+ .gart_enable = gfxhub_v3_0_3_gart_enable,
+ .gart_disable = gfxhub_v3_0_3_gart_disable,
+ .set_fault_enable_default = gfxhub_v3_0_3_set_fault_enable_default,
+ .init = gfxhub_v3_0_3_init,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.h
new file mode 100644
index 000000000000..6153bd5e3083
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFXHUB_V3_0_3_H__
+#define __GFXHUB_V3_0_3_H__
+
+extern const struct amdgpu_gfxhub_funcs gfxhub_v3_0_3_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 3ec5ff5a6dbe..ce6e04242c52 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -22,6 +22,9 @@
*/
#include <linux/firmware.h>
#include <linux/pci.h>
+
+#include <drm/drm_cache.h>
+
#include "amdgpu.h"
#include "amdgpu_atomfirmware.h"
#include "gmc_v10_0.h"
@@ -48,16 +51,9 @@
#include "athub_v2_0.h"
#include "athub_v2_1.h"
-#if 0
-static const struct soc15_reg_golden golden_settings_navi10_hdp[] =
-{
- /* TODO add golden setting for hdp */
-};
-#endif
-
static int gmc_v10_0_ecc_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
- unsigned type,
+ unsigned int type,
enum amdgpu_interrupt_state state)
{
return 0;
@@ -65,21 +61,33 @@ static int gmc_v10_0_ecc_interrupt_state(struct amdgpu_device *adev,
static int
gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
- struct amdgpu_irq_src *src, unsigned type,
+ struct amdgpu_irq_src *src, unsigned int type,
enum amdgpu_interrupt_state state)
{
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
/* MM HUB */
- amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB_0, false);
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), false);
/* GFX HUB */
- amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB_0, false);
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (!adev->in_s0ix)
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), false);
break;
case AMDGPU_IRQ_STATE_ENABLE:
/* MM HUB */
- amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB_0, true);
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), true);
/* GFX HUB */
- amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB_0, true);
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (!adev->in_s0ix)
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), true);
break;
default:
break;
@@ -92,10 +100,14 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- bool retry_fault = !!(entry->src_data[1] & 0x80);
- bool write_fault = !!(entry->src_data[1] & 0x20);
- struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src];
- struct amdgpu_task_info task_info;
+ uint32_t vmhub_index = entry->client_id == SOC15_IH_CLIENTID_VMC ?
+ AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0);
+ struct amdgpu_vmhub *hub = &adev->vmhub[vmhub_index];
+ bool retry_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_RETRY);
+ bool write_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_WRITE);
+ struct amdgpu_task_info *task_info;
uint32_t status = 0;
u64 addr;
@@ -107,7 +119,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
/* Process it onyl if it's the first fault for this address */
if (entry->ih != &adev->irq.ih_soft &&
- amdgpu_gmc_filter_faults(adev, addr, entry->pasid,
+ amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
entry->timestamp))
return 1;
@@ -122,7 +134,8 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
/* Try to handle the recoverable page faults by filling page
* tables
*/
- if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault))
+ if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr,
+ entry->timestamp, write_fault))
return 1;
}
@@ -132,32 +145,39 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
* be updated to avoid reading an incorrect value due to
* the new fast GRBM interface.
*/
- if ((entry->vmid_src == AMDGPU_GFXHUB_0) &&
- (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0)))
+ if ((entry->vmid_src == AMDGPU_GFXHUB(0)) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) <
+ IP_VERSION(10, 3, 0)))
RREG32(hub->vm_l2_pro_fault_status);
status = RREG32(hub->vm_l2_pro_fault_status);
WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+
+ amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
+ entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0));
}
if (!printk_ratelimit())
return 0;
- memset(&task_info, 0, sizeof(struct amdgpu_task_info));
- amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
-
dev_err(adev->dev,
- "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, "
- "for process %s pid %d thread %s pid %d)\n",
+ "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
entry->vmid_src ? "mmhub" : "gfxhub",
- entry->src_id, entry->ring_id, entry->vmid,
- entry->pasid, task_info.process_name, task_info.tgid,
- task_info.task_name, task_info.pid);
+ entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ if (task_info) {
+ amdgpu_vm_print_task_info(adev, task_info);
+ amdgpu_vm_put_task_info(task_info);
+ }
+
dev_err(adev->dev, " in page starting at address 0x%016llx from client 0x%x (%s)\n",
- addr, entry->client_id,
- soc15_ih_clientid_name[entry->client_id]);
+ addr, entry->client_id,
+ soc15_ih_clientid_name[entry->client_id]);
- if (!amdgpu_sriov_vf(adev))
+ /* Only print L2 fault status if the status register could be read and
+ * contains useful information
+ */
+ if (status != 0)
hub->vmhub_funcs->print_l2_protection_fault_status(adev,
status);
@@ -195,8 +215,7 @@ static void gmc_v10_0_set_irq_funcs(struct amdgpu_device *adev)
static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
uint32_t vmhub)
{
- return ((vmhub == AMDGPU_MMHUB_0 ||
- vmhub == AMDGPU_MMHUB_1) &&
+ return ((vmhub == AMDGPU_MMHUB0(0)) &&
(!amdgpu_sriov_vf(adev)));
}
@@ -220,20 +239,48 @@ static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
* by the amdgpu vm/hsa code.
*/
-static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
- unsigned int vmhub, uint32_t flush_type)
+/**
+ * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
+ *
+ * @adev: amdgpu_device pointer
+ * @vmid: vm instance to flush
+ * @vmhub: vmhub type
+ * @flush_type: the flush type
+ *
+ * Flush the TLB for the requested page table.
+ */
+static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
{
bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
- u32 tmp;
/* Use register 17 for GART */
- const unsigned eng = 17;
- unsigned int i;
+ const unsigned int eng = 17;
unsigned char hub_ip = 0;
+ u32 sem, req, ack;
+ unsigned int i;
+ u32 tmp;
+
+ sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
+ req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
+ ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
- hub_ip = (vmhub == AMDGPU_GFXHUB_0) ?
- GC_HWIP : MMHUB_HWIP;
+ /* flush hdp cache */
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ /* This is necessary for SRIOV as well as for GFXOFF to function
+ * properly under bare metal
+ */
+ if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
+ (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
+ amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req,
+ 1 << vmid, GET_INST(GC, 0));
+ return;
+ }
+
+ /* This path is needed before KIQ/MES/GFXOFF are set up */
+ hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP;
spin_lock(&adev->gmc.invalidate_lock);
/*
@@ -247,9 +294,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
if (use_semaphore) {
for (i = 0; i < adev->usec_timeout; i++) {
/* a read return value of 1 means semaphore acuqire */
- tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
- hub->eng_distance * eng, hub_ip);
-
+ tmp = RREG32_RLC_NO_KIQ(sem, hub_ip);
if (tmp & 0x1)
break;
udelay(1);
@@ -259,24 +304,19 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
}
- WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
- hub->eng_distance * eng,
- inv_req, hub_ip);
+ WREG32_RLC_NO_KIQ(req, inv_req, hub_ip);
/*
* Issue a dummy read to wait for the ACK register to be cleared
* to avoid a false ACK due to the new fast GRBM interface.
*/
- if ((vmhub == AMDGPU_GFXHUB_0) &&
- (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0)))
- RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
- hub->eng_distance * eng, hub_ip);
+ if ((vmhub == AMDGPU_GFXHUB(0)) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 3, 0)))
+ RREG32_RLC_NO_KIQ(req, hub_ip);
/* Wait for ACK with a delay.*/
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack +
- hub->eng_distance * eng, hub_ip);
-
+ tmp = RREG32_RLC_NO_KIQ(ack, hub_ip);
tmp &= 1 << vmid;
if (tmp)
break;
@@ -286,113 +326,13 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
if (use_semaphore)
- /*
- * add semaphore release after invalidation,
- * write with 0 means semaphore release
- */
- WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
- hub->eng_distance * eng, 0, hub_ip);
+ WREG32_RLC_NO_KIQ(sem, 0, hub_ip);
spin_unlock(&adev->gmc.invalidate_lock);
- if (i < adev->usec_timeout)
- return;
-
- DRM_ERROR("Timeout waiting for VM flush hub: %d!\n", vmhub);
-}
-
-/**
- * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
- *
- * @adev: amdgpu_device pointer
- * @vmid: vm instance to flush
- * @vmhub: vmhub type
- * @flush_type: the flush type
- *
- * Flush the TLB for the requested page table.
- */
-static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
- uint32_t vmhub, uint32_t flush_type)
-{
- struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
- struct dma_fence *fence;
- struct amdgpu_job *job;
-
- int r;
-
- /* flush hdp cache */
- adev->hdp.funcs->flush_hdp(adev, NULL);
-
- /* For SRIOV run time, driver shouldn't access the register through MMIO
- * Directly use kiq to do the vm invalidation instead
- */
- if (adev->gfx.kiq.ring.sched.ready &&
- (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
- down_read_trylock(&adev->reset_sem)) {
- struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
- const unsigned eng = 17;
- u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
- u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
- u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
-
- amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
- 1 << vmid);
-
- up_read(&adev->reset_sem);
- return;
- }
-
- mutex_lock(&adev->mman.gtt_window_lock);
-
- if (vmhub == AMDGPU_MMHUB_0) {
- gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0);
- mutex_unlock(&adev->mman.gtt_window_lock);
- return;
- }
-
- BUG_ON(vmhub != AMDGPU_GFXHUB_0);
-
- if (!adev->mman.buffer_funcs_enabled ||
- !adev->ib_pool_ready ||
- amdgpu_in_reset(adev) ||
- ring->sched.ready == false) {
- gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0);
- mutex_unlock(&adev->mman.gtt_window_lock);
- return;
- }
-
- /* The SDMA on Navi has a bug which can theoretically result in memory
- * corruption if an invalidation happens at the same time as an VA
- * translation. Avoid this by doing the invalidation from the SDMA
- * itself.
- */
- r = amdgpu_job_alloc_with_ib(adev, 16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
- &job);
- if (r)
- goto error_alloc;
-
- job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
- job->vm_needs_flush = true;
- job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
- amdgpu_ring_pad_ib(ring, &job->ibs[0]);
- r = amdgpu_job_submit(job, &adev->mman.entity,
- AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
- if (r)
- goto error_submit;
-
- mutex_unlock(&adev->mman.gtt_window_lock);
-
- dma_fence_wait(fence, false);
- dma_fence_put(fence);
-
- return;
-
-error_submit:
- amdgpu_job_free(job);
-
-error_alloc:
- mutex_unlock(&adev->mman.gtt_window_lock);
- DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev, "Timeout waiting for VM flush hub: %d!\n",
+ vmhub);
}
/**
@@ -402,72 +342,44 @@ error_alloc:
* @pasid: pasid to be flush
* @flush_type: the flush type
* @all_hub: Used with PACKET3_INVALIDATE_TLBS_ALL_HUB()
+ * @inst: is used to select which instance of KIQ to use for the invalidation
*
* Flush the TLB for the requested pasid.
*/
-static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
- uint16_t pasid, uint32_t flush_type,
- bool all_hub)
+static void gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint32_t inst)
{
+ uint16_t queried;
int vmid, i;
- signed long r;
- uint32_t seq;
- uint16_t queried_pasid;
- bool ret;
- struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
-
- if (amdgpu_emu_mode == 0 && ring->sched.ready) {
- spin_lock(&adev->gfx.kiq.ring_lock);
- /* 2 dwords flush + 8 dwords fence */
- amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
- kiq->pmf->kiq_invalidate_tlbs(ring,
- pasid, flush_type, all_hub);
- r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
- if (r) {
- amdgpu_ring_undo(ring);
- spin_unlock(&adev->gfx.kiq.ring_lock);
- return -ETIME;
- }
-
- amdgpu_ring_commit(ring);
- spin_unlock(&adev->gfx.kiq.ring_lock);
- r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
- if (r < 1) {
- dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
- return -ETIME;
- }
-
- return 0;
- }
for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
-
- ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
- &queried_pasid);
- if (ret && queried_pasid == pasid) {
- if (all_hub) {
- for (i = 0; i < adev->num_vmhubs; i++)
- gmc_v10_0_flush_gpu_tlb(adev, vmid,
- i, flush_type);
- } else {
- gmc_v10_0_flush_gpu_tlb(adev, vmid,
- AMDGPU_GFXHUB_0, flush_type);
- }
- break;
+ bool valid;
+
+ valid = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+ &queried);
+ if (!valid || queried != pasid)
+ continue;
+
+ if (all_hub) {
+ for_each_set_bit(i, adev->vmhubs_mask,
+ AMDGPU_MAX_VMHUBS)
+ gmc_v10_0_flush_gpu_tlb(adev, vmid, i,
+ flush_type);
+ } else {
+ gmc_v10_0_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0),
+ flush_type);
}
}
-
- return 0;
}
static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
- unsigned vmid, uint64_t pd_addr)
+ unsigned int vmid, uint64_t pd_addr)
{
- bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
- struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+ bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(ring->adev, ring->vm_hub);
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0);
- unsigned eng = ring->vm_inv_eng;
+ unsigned int eng = ring->vm_inv_eng;
/*
* It may lose gpuvm invalidate acknowldege state across power-gating
@@ -509,13 +421,13 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
return pd_addr;
}
-static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
- unsigned pasid)
+static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int vmid,
+ unsigned int pasid)
{
struct amdgpu_device *adev = ring->adev;
uint32_t reg;
- if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)
+ if (ring->vm_hub == AMDGPU_GFXHUB(0))
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
else
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
@@ -556,24 +468,6 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid
* 0 valid
*/
-static uint64_t gmc_v10_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
-{
- switch (flags) {
- case AMDGPU_VM_MTYPE_DEFAULT:
- return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
- case AMDGPU_VM_MTYPE_NC:
- return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
- case AMDGPU_VM_MTYPE_WC:
- return AMDGPU_PTE_MTYPE_NV10(MTYPE_WC);
- case AMDGPU_VM_MTYPE_CC:
- return AMDGPU_PTE_MTYPE_NV10(MTYPE_CC);
- case AMDGPU_VM_MTYPE_UC:
- return AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
- default:
- return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
- }
-}
-
static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level,
uint64_t *addr, uint64_t *flags)
{
@@ -598,28 +492,56 @@ static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level,
}
static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
- struct amdgpu_bo_va_mapping *mapping,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo,
+ uint32_t vm_flags,
uint64_t *flags)
{
- *flags &= ~AMDGPU_PTE_EXECUTABLE;
- *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+ if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE)
+ *flags |= AMDGPU_PTE_EXECUTABLE;
+ else
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
- *flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK;
- *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK);
+ switch (vm_flags & AMDGPU_VM_MTYPE_MASK) {
+ case AMDGPU_VM_MTYPE_DEFAULT:
+ case AMDGPU_VM_MTYPE_NC:
+ default:
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_NC);
+ break;
+ case AMDGPU_VM_MTYPE_WC:
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_WC);
+ break;
+ case AMDGPU_VM_MTYPE_CC:
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_CC);
+ break;
+ case AMDGPU_VM_MTYPE_UC:
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC);
+ break;
+ }
- if (mapping->flags & AMDGPU_PTE_PRT) {
+ if (vm_flags & AMDGPU_VM_PAGE_NOALLOC)
+ *flags |= AMDGPU_PTE_NOALLOC;
+ else
+ *flags &= ~AMDGPU_PTE_NOALLOC;
+
+ if (vm_flags & AMDGPU_VM_PAGE_PRT) {
*flags |= AMDGPU_PTE_PRT;
*flags |= AMDGPU_PTE_SNOOPED;
*flags |= AMDGPU_PTE_LOG;
*flags |= AMDGPU_PTE_SYSTEM;
*flags &= ~AMDGPU_PTE_VALID;
}
+
+ if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+ AMDGPU_GEM_CREATE_EXT_COHERENT |
+ AMDGPU_GEM_CREATE_UNCACHED))
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC);
}
-static unsigned gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev)
+static unsigned int gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev)
{
u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
- unsigned size;
+ unsigned int size;
if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
size = AMDGPU_VBIOS_VGA_ALLOCATION;
@@ -643,7 +565,6 @@ static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
.flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
.emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
.emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
- .map_mtype = gmc_v10_0_map_mtype,
.get_vm_pde = gmc_v10_0_get_vm_pde,
.get_vm_pte = gmc_v10_0_get_vm_pte,
.get_vbios_fb_size = gmc_v10_0_get_vbios_fb_size,
@@ -657,26 +578,27 @@ static void gmc_v10_0_set_gmc_funcs(struct amdgpu_device *adev)
static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[UMC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
case IP_VERSION(8, 7, 0):
adev->umc.max_ras_err_cnt_per_query = UMC_V8_7_TOTAL_CHANNEL_NUM;
adev->umc.channel_inst_num = UMC_V8_7_CHANNEL_INSTANCE_NUM;
adev->umc.umc_inst_num = UMC_V8_7_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V8_7_PER_CHANNEL_OFFSET_SIENNA;
+ adev->umc.retire_unit = 1;
adev->umc.channel_idx_tbl = &umc_v8_7_channel_idx_tbl[0][0];
- adev->umc.ras_funcs = &umc_v8_7_ras_funcs;
+ adev->umc.ras = &umc_v8_7_ras;
break;
default:
break;
}
}
-
static void gmc_v10_0_set_mmhub_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(2, 3, 0):
case IP_VERSION(2, 4, 0):
+ case IP_VERSION(2, 4, 1):
adev->mmhub.funcs = &mmhub_v2_3_funcs;
break;
default:
@@ -687,13 +609,15 @@ static void gmc_v10_0_set_mmhub_funcs(struct amdgpu_device *adev)
static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
adev->gfxhub.funcs = &gfxhub_v2_1_funcs;
break;
default:
@@ -703,9 +627,9 @@ static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device *adev)
}
-static int gmc_v10_0_early_init(void *handle)
+static int gmc_v10_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v10_0_set_mmhub_funcs(adev);
gmc_v10_0_set_gfxhub_funcs(adev);
@@ -719,13 +643,14 @@ static int gmc_v10_0_early_init(void *handle)
adev->gmc.private_aperture_start = 0x1000000000000000ULL;
adev->gmc.private_aperture_end =
adev->gmc.private_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF;
return 0;
}
-static int gmc_v10_0_late_init(void *handle)
+static int gmc_v10_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_gmc_allocate_vm_inv_eng(adev);
@@ -749,9 +674,11 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev,
/* add the xgmi offset of the physical node */
base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
+ amdgpu_gmc_set_agp_default(adev, mc);
amdgpu_gmc_vram_location(adev, &adev->gmc, base);
- amdgpu_gmc_gart_location(adev, mc);
- amdgpu_gmc_agp_location(adev, mc);
+ amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
+ if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1))
+ amdgpu_gmc_agp_location(adev, mc);
/* base offset of vram pages */
adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
@@ -788,22 +715,30 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev)
adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
#ifdef CONFIG_X86_64
- if (adev->flags & AMD_IS_APU) {
+ if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) {
adev->gmc.aper_base = adev->gfxhub.funcs->get_mc_fb_offset(adev);
adev->gmc.aper_size = adev->gmc.real_vram_size;
}
#endif
- /* In case the PCI BAR is larger than the actual amount of vram */
adev->gmc.visible_vram_size = adev->gmc.aper_size;
- if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
- adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
/* set the gart size */
- if (amdgpu_gart_size == -1)
- adev->gmc.gart_size = 512ULL << 20;
- else
+ if (amdgpu_gart_size == -1) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ default:
+ adev->gmc.gart_size = 512ULL << 20;
+ break;
+ case IP_VERSION(10, 3, 1): /* DCE SG support */
+ case IP_VERSION(10, 3, 3): /* DCE SG support */
+ case IP_VERSION(10, 3, 6): /* DCE SG support */
+ case IP_VERSION(10, 3, 7): /* DCE SG support */
+ adev->gmc.gart_size = 1024ULL << 20;
+ break;
+ }
+ } else {
adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
+ }
gmc_v10_0_vram_gtt_location(adev, &adev->gmc);
@@ -825,16 +760,16 @@ static int gmc_v10_0_gart_init(struct amdgpu_device *adev)
return r;
adev->gart.table_size = adev->gart.num_gpu_pages * 8;
- adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(MTYPE_UC) |
+ adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_UC) |
AMDGPU_PTE_EXECUTABLE;
return amdgpu_gart_table_vram_alloc(adev);
}
-static int gmc_v10_0_sw_init(void *handle)
+static int gmc_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->gfxhub.funcs->init(adev);
@@ -857,18 +792,40 @@ static int gmc_v10_0_sw_init(void *handle)
adev->gmc.vram_vendor = vram_vendor;
}
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 3, 0):
+ adev->gmc.mall_size = 128 * 1024 * 1024;
+ break;
+ case IP_VERSION(10, 3, 2):
+ adev->gmc.mall_size = 96 * 1024 * 1024;
+ break;
+ case IP_VERSION(10, 3, 4):
+ adev->gmc.mall_size = 32 * 1024 * 1024;
+ break;
+ case IP_VERSION(10, 3, 5):
+ adev->gmc.mall_size = 16 * 1024 * 1024;
+ break;
+ default:
+ adev->gmc.mall_size = 0;
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 2):
case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
- adev->num_vmhubs = 2;
+ case IP_VERSION(10, 3, 7):
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+ set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
/*
* To fulfill 4-level page support,
* vm size is 256TB (48bit), maximum size of Navi10/Navi14/Navi12,
@@ -910,22 +867,17 @@ static int gmc_v10_0_sw_init(void *handle)
r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
if (r) {
- printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
+ dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n");
return r;
}
- if (adev->gmc.xgmi.supported) {
- r = adev->gfxhub.funcs->get_xgmi_info(adev);
- if (r)
- return r;
- }
+ adev->need_swiotlb = drm_need_swiotlb(44);
r = gmc_v10_0_mc_init(adev);
if (r)
return r;
amdgpu_gmc_get_vbios_allocations(adev);
- amdgpu_gmc_get_reserved_allocation(adev);
/* Memory manager */
r = amdgpu_bo_init(adev);
@@ -946,6 +898,10 @@ static int gmc_v10_0_sw_init(void *handle)
amdgpu_vm_manager_init(adev);
+ r = amdgpu_gmc_ras_sw_init(adev);
+ if (r)
+ return r;
+
return 0;
}
@@ -961,9 +917,9 @@ static void gmc_v10_0_gart_fini(struct amdgpu_device *adev)
amdgpu_gart_table_vram_free(adev);
}
-static int gmc_v10_0_sw_fini(void *handle)
+static int gmc_v10_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_vm_manager_fini(adev);
gmc_v10_0_gart_fini(adev);
@@ -992,13 +948,13 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
return -EINVAL;
}
- r = amdgpu_gart_table_vram_pin(adev);
- if (r)
- return r;
+ amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
- r = adev->gfxhub.funcs->gart_enable(adev);
- if (r)
- return r;
+ if (!adev->in_s0ix) {
+ r = adev->gfxhub.funcs->gart_enable(adev);
+ if (r)
+ return r;
+ }
r = adev->mmhub.funcs->gart_enable(adev);
if (r)
@@ -1007,29 +963,30 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
adev->hdp.funcs->init_registers(adev);
/* Flush HDP after it is initialized */
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
- value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
- false : true;
+ value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS;
- adev->gfxhub.funcs->set_fault_enable_default(adev, value);
+ if (!adev->in_s0ix)
+ adev->gfxhub.funcs->set_fault_enable_default(adev, value);
adev->mmhub.funcs->set_fault_enable_default(adev, value);
- gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0);
- gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0);
+ gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0);
+ if (!adev->in_s0ix)
+ gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
- (unsigned)(adev->gmc.gart_size >> 20),
+ (unsigned int)(adev->gmc.gart_size >> 20),
(unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
- adev->gart.ready = true;
-
return 0;
}
-static int gmc_v10_0_hw_init(void *handle)
+static int gmc_v10_0_hw_init(struct amdgpu_ip_block *ip_block)
{
+ struct amdgpu_device *adev = ip_block->adev;
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->gmc.flush_pasid_uses_kiq = !amdgpu_emu_mode;
/* The sequence of these two function calls matters.*/
gmc_v10_0_init_golden_registers(adev);
@@ -1038,13 +995,19 @@ static int gmc_v10_0_hw_init(void *handle)
* harvestable groups in gc_utcl2 need to be programmed before any GFX block
* register setup within GMC, or else system hang when harvesting SA.
*/
- if (adev->gfxhub.funcs && adev->gfxhub.funcs->utcl2_harvest)
+ if (!adev->in_s0ix && adev->gfxhub.funcs && adev->gfxhub.funcs->utcl2_harvest)
adev->gfxhub.funcs->utcl2_harvest(adev);
r = gmc_v10_0_gart_enable(adev);
if (r)
return r;
+ if (amdgpu_emu_mode == 1) {
+ r = amdgpu_gmc_vram_checking(adev);
+ if (r)
+ return r;
+ }
+
if (adev->umc.funcs && adev->umc.funcs->init_registers)
adev->umc.funcs->init_registers(adev);
@@ -1060,14 +1023,14 @@ static int gmc_v10_0_hw_init(void *handle)
*/
static void gmc_v10_0_gart_disable(struct amdgpu_device *adev)
{
- adev->gfxhub.funcs->gart_disable(adev);
+ if (!adev->in_s0ix)
+ adev->gfxhub.funcs->gart_disable(adev);
adev->mmhub.funcs->gart_disable(adev);
- amdgpu_gart_table_vram_unpin(adev);
}
-static int gmc_v10_0_hw_fini(void *handle)
+static int gmc_v10_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v10_0_gart_disable(adev);
@@ -1077,81 +1040,91 @@ static int gmc_v10_0_hw_fini(void *handle)
return 0;
}
- amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+ if (adev->gmc.ecc_irq.funcs &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+ amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+
return 0;
}
-static int gmc_v10_0_suspend(void *handle)
+static int gmc_v10_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- gmc_v10_0_hw_fini(adev);
+ gmc_v10_0_hw_fini(ip_block);
return 0;
}
-static int gmc_v10_0_resume(void *handle)
+static int gmc_v10_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = gmc_v10_0_hw_init(adev);
+ r = gmc_v10_0_hw_init(ip_block);
if (r)
return r;
- amdgpu_vmid_reset_all(adev);
+ amdgpu_vmid_reset_all(ip_block->adev);
return 0;
}
-static bool gmc_v10_0_is_idle(void *handle)
+static bool gmc_v10_0_is_idle(struct amdgpu_ip_block *ip_block)
{
/* MC is always ready in GMC v10.*/
return true;
}
-static int gmc_v10_0_wait_for_idle(void *handle)
+static int gmc_v10_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* There is no need to wait for MC idle in GMC v10.*/
return 0;
}
-static int gmc_v10_0_soft_reset(void *handle)
-{
- return 0;
-}
-
-static int gmc_v10_0_set_clockgating_state(void *handle,
+static int gmc_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /*
+ * The issue mmhub can't disconnect from DF with MMHUB clock gating being disabled
+ * is a new problem observed at DF 3.0.3, however with the same suspend sequence not
+ * seen any issue on the DF 3.0.2 series platform.
+ */
+ if (adev->in_s0ix &&
+ amdgpu_ip_version(adev, DF_HWIP, 0) > IP_VERSION(3, 0, 2)) {
+ dev_dbg(adev->dev, "keep mmhub clock gating being enabled for s0ix\n");
+ return 0;
+ }
r = adev->mmhub.funcs->set_clockgating(adev, state);
if (r)
return r;
- if (adev->ip_versions[ATHUB_HWIP][0] >= IP_VERSION(2, 1, 0))
+ if (amdgpu_ip_version(adev, ATHUB_HWIP, 0) >= IP_VERSION(2, 1, 0))
return athub_v2_1_set_clockgating(adev, state);
else
return athub_v2_0_set_clockgating(adev, state);
}
-static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags)
+static void gmc_v10_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 3) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 4))
+ return;
adev->mmhub.funcs->get_clockgating(adev, flags);
- if (adev->ip_versions[ATHUB_HWIP][0] >= IP_VERSION(2, 1, 0))
+ if (amdgpu_ip_version(adev, ATHUB_HWIP, 0) >= IP_VERSION(2, 1, 0))
athub_v2_1_get_clockgating(adev, flags);
else
athub_v2_0_get_clockgating(adev, flags);
}
-static int gmc_v10_0_set_powergating_state(void *handle,
+static int gmc_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -1169,14 +1142,12 @@ const struct amd_ip_funcs gmc_v10_0_ip_funcs = {
.resume = gmc_v10_0_resume,
.is_idle = gmc_v10_0_is_idle,
.wait_for_idle = gmc_v10_0_wait_for_idle,
- .soft_reset = gmc_v10_0_soft_reset,
.set_clockgating_state = gmc_v10_0_set_clockgating_state,
.set_powergating_state = gmc_v10_0_set_powergating_state,
.get_clockgating_state = gmc_v10_0_get_clockgating_state,
};
-const struct amdgpu_ip_block_version gmc_v10_0_ip_block =
-{
+const struct amdgpu_ip_block_version gmc_v10_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_GMC,
.major = 10,
.minor = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
new file mode 100644
index 000000000000..ba59ee8e398a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -0,0 +1,1085 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include <linux/pci.h>
+
+#include <drm/drm_cache.h>
+
+#include "amdgpu.h"
+#include "amdgpu_atomfirmware.h"
+#include "gmc_v11_0.h"
+#include "umc_v8_10.h"
+#include "athub/athub_3_0_0_sh_mask.h"
+#include "athub/athub_3_0_0_offset.h"
+#include "dcn/dcn_3_2_0_offset.h"
+#include "dcn/dcn_3_2_0_sh_mask.h"
+#include "oss/osssys_6_0_0_offset.h"
+#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
+#include "navi10_enum.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "soc15_common.h"
+#include "nbio_v4_3.h"
+#include "gfxhub_v3_0.h"
+#include "gfxhub_v3_0_3.h"
+#include "gfxhub_v11_5_0.h"
+#include "mmhub_v3_0.h"
+#include "mmhub_v3_0_1.h"
+#include "mmhub_v3_0_2.h"
+#include "mmhub_v3_3.h"
+#include "athub_v3_0.h"
+
+
+static int gmc_v11_0_ecc_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int
+gmc_v11_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src, unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ /* MM HUB */
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), false);
+ /* GFX HUB */
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (!adev->in_s0ix && (adev->in_runpm || adev->in_suspend ||
+ amdgpu_in_reset(adev)))
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), false);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ /* MM HUB */
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), true);
+ /* GFX HUB */
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (!adev->in_s0ix)
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), true);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t vmhub_index = entry->client_id == SOC21_IH_CLIENTID_VMC ?
+ AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0);
+ struct amdgpu_vmhub *hub = &adev->vmhub[vmhub_index];
+ bool retry_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_RETRY);
+ bool write_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_WRITE);
+ uint32_t status = 0;
+ u64 addr;
+
+ addr = (u64)entry->src_data[0] << 12;
+ addr |= ((u64)entry->src_data[1] & 0xf) << 44;
+
+ if (retry_fault) {
+ /* Returning 1 here also prevents sending the IV to the KFD */
+
+ /* Process it only if it's the first fault for this address */
+ if (entry->ih != &adev->irq.ih_soft &&
+ amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
+ entry->timestamp))
+ return 1;
+
+ /* Delegate it to a different ring if the hardware hasn't
+ * already done it.
+ */
+ if (entry->ih == &adev->irq.ih) {
+ amdgpu_irq_delegate(adev, entry, 8);
+ return 1;
+ }
+
+ /* Try to handle the recoverable page faults by filling page
+ * tables
+ */
+ if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr,
+ entry->timestamp, write_fault))
+ return 1;
+ }
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /*
+ * Issue a dummy read to wait for the status register to
+ * be updated to avoid reading an incorrect value due to
+ * the new fast GRBM interface.
+ */
+ if (entry->vmid_src == AMDGPU_GFXHUB(0))
+ RREG32(hub->vm_l2_pro_fault_status);
+
+ status = RREG32(hub->vm_l2_pro_fault_status);
+ WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+
+ amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
+ entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0));
+ }
+
+ if (printk_ratelimit()) {
+ struct amdgpu_task_info *task_info;
+
+ dev_err(adev->dev,
+ "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
+ entry->vmid_src ? "mmhub" : "gfxhub",
+ entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ if (task_info) {
+ amdgpu_vm_print_task_info(adev, task_info);
+ amdgpu_vm_put_task_info(task_info);
+ }
+
+ dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n",
+ addr, entry->client_id);
+
+ /* Only print L2 fault status if the status register could be read and
+ * contains useful information
+ */
+ if (status != 0)
+ hub->vmhub_funcs->print_l2_protection_fault_status(adev, status);
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs gmc_v11_0_irq_funcs = {
+ .set = gmc_v11_0_vm_fault_interrupt_state,
+ .process = gmc_v11_0_process_interrupt,
+};
+
+static const struct amdgpu_irq_src_funcs gmc_v11_0_ecc_funcs = {
+ .set = gmc_v11_0_ecc_interrupt_state,
+ .process = amdgpu_umc_process_ecc_irq,
+};
+
+static void gmc_v11_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gmc.vm_fault.num_types = 1;
+ adev->gmc.vm_fault.funcs = &gmc_v11_0_irq_funcs;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ adev->gmc.ecc_irq.num_types = 1;
+ adev->gmc.ecc_irq.funcs = &gmc_v11_0_ecc_funcs;
+ }
+}
+
+/**
+ * gmc_v11_0_use_invalidate_semaphore - judge whether to use semaphore
+ *
+ * @adev: amdgpu_device pointer
+ * @vmhub: vmhub type
+ *
+ */
+static bool gmc_v11_0_use_invalidate_semaphore(struct amdgpu_device *adev,
+ uint32_t vmhub)
+{
+ return ((vmhub == AMDGPU_MMHUB0(0)) &&
+ (!amdgpu_sriov_vf(adev)));
+}
+
+static bool gmc_v11_0_get_vmid_pasid_mapping_info(
+ struct amdgpu_device *adev,
+ uint8_t vmid, uint16_t *p_pasid)
+{
+ *p_pasid = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid) & 0xffff;
+
+ return !!(*p_pasid);
+}
+
+/**
+ * gmc_v11_0_flush_gpu_tlb - gart tlb flush callback
+ *
+ * @adev: amdgpu_device pointer
+ * @vmid: vm instance to flush
+ * @vmhub: which hub to flush
+ * @flush_type: the flush type
+ *
+ * Flush the TLB for the requested page table.
+ */
+static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
+{
+ bool use_semaphore = gmc_v11_0_use_invalidate_semaphore(adev, vmhub);
+ struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
+ u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
+ /* Use register 17 for GART */
+ const unsigned int eng = 17;
+ unsigned char hub_ip;
+ u32 sem, req, ack;
+ unsigned int i;
+ u32 tmp;
+
+ if ((vmhub == AMDGPU_GFXHUB(0)) && !adev->gfx.is_poweron)
+ return;
+
+ sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
+ req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
+ ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
+
+ /* flush hdp cache */
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ /* This is necessary for SRIOV as well as for GFXOFF to function
+ * properly under bare metal
+ */
+ if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) &&
+ (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
+ amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req,
+ 1 << vmid, GET_INST(GC, 0));
+ return;
+ }
+
+ /* This path is needed before KIQ/MES/GFXOFF are set up */
+ hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP;
+
+ spin_lock(&adev->gmc.invalidate_lock);
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore) {
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* a read return value of 1 means semaphore acuqire */
+ tmp = RREG32_RLC_NO_KIQ(sem, hub_ip);
+ if (tmp & 0x1)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
+ }
+
+ WREG32_RLC_NO_KIQ(req, inv_req, hub_ip);
+
+ /* Wait for ACK with a delay.*/
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32_RLC_NO_KIQ(ack, hub_ip);
+ tmp &= 1 << vmid;
+ if (tmp)
+ break;
+
+ udelay(1);
+ }
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ WREG32_RLC_NO_KIQ(sem, 0, hub_ip);
+
+ /* Issue additional private vm invalidation to MMHUB */
+ if ((vmhub != AMDGPU_GFXHUB(0)) &&
+ (hub->vm_l2_bank_select_reserved_cid2) &&
+ !amdgpu_sriov_vf(adev)) {
+ inv_req = RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2);
+ /* bit 25: RSERVED_CACHE_PRIVATE_INVALIDATION */
+ inv_req |= (1 << 25);
+ /* Issue private invalidation */
+ WREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2, inv_req);
+ /* Read back to ensure invalidation is done*/
+ RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2);
+ }
+
+ spin_unlock(&adev->gmc.invalidate_lock);
+
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev, "Timeout waiting for VM flush ACK!\n");
+}
+
+/**
+ * gmc_v11_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ * @flush_type: the flush type
+ * @all_hub: flush all hubs
+ * @inst: is used to select which instance of KIQ to use for the invalidation
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static void gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint32_t inst)
+{
+ uint16_t queried;
+ int vmid, i;
+
+ for (vmid = 1; vmid < 16; vmid++) {
+ bool valid;
+
+ valid = gmc_v11_0_get_vmid_pasid_mapping_info(adev, vmid,
+ &queried);
+ if (!valid || queried != pasid)
+ continue;
+
+ if (all_hub) {
+ for_each_set_bit(i, adev->vmhubs_mask,
+ AMDGPU_MAX_VMHUBS)
+ gmc_v11_0_flush_gpu_tlb(adev, vmid, i,
+ flush_type);
+ } else {
+ gmc_v11_0_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0),
+ flush_type);
+ }
+ }
+}
+
+static uint64_t gmc_v11_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr)
+{
+ bool use_semaphore = gmc_v11_0_use_invalidate_semaphore(ring->adev, ring->vm_hub);
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0);
+ unsigned int eng = ring->vm_inv_eng;
+
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /* a read return value of 1 means semaphore acuqire */
+ amdgpu_ring_emit_reg_wait(ring,
+ hub->vm_inv_eng0_sem +
+ hub->eng_distance * eng, 0x1, 0x1);
+
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
+ (hub->ctx_addr_distance * vmid),
+ lower_32_bits(pd_addr));
+
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
+ (hub->ctx_addr_distance * vmid),
+ upper_32_bits(pd_addr));
+
+ amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req +
+ hub->eng_distance * eng,
+ hub->vm_inv_eng0_ack +
+ hub->eng_distance * eng,
+ req, 1 << vmid);
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /*
+ * add semaphore release after invalidation,
+ * write with 0 means semaphore release
+ */
+ amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem +
+ hub->eng_distance * eng, 0);
+
+ return pd_addr;
+}
+
+static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int vmid,
+ unsigned int pasid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reg;
+
+ if (ring->vm_hub == AMDGPU_GFXHUB(0))
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid;
+ else
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid;
+
+ amdgpu_ring_emit_wreg(ring, reg, pasid);
+}
+
+/*
+ * PTE format:
+ * 63:59 reserved
+ * 58:57 reserved
+ * 56 F
+ * 55 L
+ * 54 reserved
+ * 53:52 SW
+ * 51 T
+ * 50:48 mtype
+ * 47:12 4k physical page base address
+ * 11:7 fragment
+ * 6 write
+ * 5 read
+ * 4 exe
+ * 3 Z
+ * 2 snooped
+ * 1 system
+ * 0 valid
+ *
+ * PDE format:
+ * 63:59 block fragment size
+ * 58:55 reserved
+ * 54 P
+ * 53:48 reserved
+ * 47:6 physical base address of PD or PTE
+ * 5:3 reserved
+ * 2 C
+ * 1 system
+ * 0 valid
+ */
+
+static void gmc_v11_0_get_vm_pde(struct amdgpu_device *adev, int level,
+ uint64_t *addr, uint64_t *flags)
+{
+ if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM))
+ *addr = amdgpu_gmc_vram_mc2pa(adev, *addr);
+ BUG_ON(*addr & 0xFFFF00000000003FULL);
+
+ if (!adev->gmc.translate_further)
+ return;
+
+ if (level == AMDGPU_VM_PDB1) {
+ /* Set the block fragment size */
+ if (!(*flags & AMDGPU_PDE_PTE))
+ *flags |= AMDGPU_PDE_BFS(0x9);
+
+ } else if (level == AMDGPU_VM_PDB0) {
+ if (*flags & AMDGPU_PDE_PTE)
+ *flags &= ~AMDGPU_PDE_PTE;
+ else
+ *flags |= AMDGPU_PTE_TF;
+ }
+}
+
+static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo,
+ uint32_t vm_flags,
+ uint64_t *flags)
+{
+ if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE)
+ *flags |= AMDGPU_PTE_EXECUTABLE;
+ else
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
+
+ switch (vm_flags & AMDGPU_VM_MTYPE_MASK) {
+ case AMDGPU_VM_MTYPE_DEFAULT:
+ case AMDGPU_VM_MTYPE_NC:
+ default:
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_NC);
+ break;
+ case AMDGPU_VM_MTYPE_WC:
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_WC);
+ break;
+ case AMDGPU_VM_MTYPE_CC:
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_CC);
+ break;
+ case AMDGPU_VM_MTYPE_UC:
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC);
+ break;
+ }
+
+ if (vm_flags & AMDGPU_VM_PAGE_NOALLOC)
+ *flags |= AMDGPU_PTE_NOALLOC;
+ else
+ *flags &= ~AMDGPU_PTE_NOALLOC;
+
+ if (vm_flags & AMDGPU_VM_PAGE_PRT) {
+ *flags |= AMDGPU_PTE_PRT;
+ *flags |= AMDGPU_PTE_SNOOPED;
+ *flags |= AMDGPU_PTE_LOG;
+ *flags |= AMDGPU_PTE_SYSTEM;
+ *flags &= ~AMDGPU_PTE_VALID;
+ }
+
+ if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+ AMDGPU_GEM_CREATE_EXT_COHERENT |
+ AMDGPU_GEM_CREATE_UNCACHED))
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC);
+}
+
+static unsigned int gmc_v11_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+ u32 d1vga_control = RREG32_SOC15(DCE, 0, regD1VGA_CONTROL);
+ unsigned int size;
+
+ if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
+ size = AMDGPU_VBIOS_VGA_ALLOCATION;
+ } else {
+ u32 viewport;
+ u32 pitch;
+
+ viewport = RREG32_SOC15(DCE, 0, regHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
+ pitch = RREG32_SOC15(DCE, 0, regHUBPREQ0_DCSURF_SURFACE_PITCH);
+ size = (REG_GET_FIELD(viewport,
+ HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
+ REG_GET_FIELD(pitch, HUBPREQ0_DCSURF_SURFACE_PITCH, PITCH) *
+ 4);
+ }
+
+ return size;
+}
+
+static const struct amdgpu_gmc_funcs gmc_v11_0_gmc_funcs = {
+ .flush_gpu_tlb = gmc_v11_0_flush_gpu_tlb,
+ .flush_gpu_tlb_pasid = gmc_v11_0_flush_gpu_tlb_pasid,
+ .emit_flush_gpu_tlb = gmc_v11_0_emit_flush_gpu_tlb,
+ .emit_pasid_mapping = gmc_v11_0_emit_pasid_mapping,
+ .get_vm_pde = gmc_v11_0_get_vm_pde,
+ .get_vm_pte = gmc_v11_0_get_vm_pte,
+ .get_vbios_fb_size = gmc_v11_0_get_vbios_fb_size,
+};
+
+static void gmc_v11_0_set_gmc_funcs(struct amdgpu_device *adev)
+{
+ adev->gmc.gmc_funcs = &gmc_v11_0_gmc_funcs;
+}
+
+static void gmc_v11_0_set_umc_funcs(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
+ case IP_VERSION(8, 10, 0):
+ adev->umc.channel_inst_num = UMC_V8_10_CHANNEL_INSTANCE_NUM;
+ adev->umc.umc_inst_num = UMC_V8_10_UMC_INSTANCE_NUM;
+ adev->umc.max_ras_err_cnt_per_query = UMC_V8_10_TOTAL_CHANNEL_NUM(adev);
+ adev->umc.channel_offs = UMC_V8_10_PER_CHANNEL_OFFSET;
+ adev->umc.retire_unit = UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM;
+ if (adev->umc.node_inst_num == 4)
+ adev->umc.channel_idx_tbl = &umc_v8_10_channel_idx_tbl_ext0[0][0][0];
+ else
+ adev->umc.channel_idx_tbl = &umc_v8_10_channel_idx_tbl[0][0][0];
+ adev->umc.ras = &umc_v8_10_ras;
+ break;
+ case IP_VERSION(8, 11, 0):
+ break;
+ default:
+ break;
+ }
+}
+
+
+static void gmc_v11_0_set_mmhub_funcs(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(3, 0, 1):
+ adev->mmhub.funcs = &mmhub_v3_0_1_funcs;
+ break;
+ case IP_VERSION(3, 0, 2):
+ adev->mmhub.funcs = &mmhub_v3_0_2_funcs;
+ break;
+ case IP_VERSION(3, 3, 0):
+ case IP_VERSION(3, 3, 1):
+ case IP_VERSION(3, 3, 2):
+ adev->mmhub.funcs = &mmhub_v3_3_funcs;
+ break;
+ default:
+ adev->mmhub.funcs = &mmhub_v3_0_funcs;
+ break;
+ }
+}
+
+static void gmc_v11_0_set_gfxhub_funcs(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 3):
+ adev->gfxhub.funcs = &gfxhub_v3_0_3_funcs;
+ break;
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ adev->gfxhub.funcs = &gfxhub_v11_5_0_funcs;
+ break;
+ default:
+ adev->gfxhub.funcs = &gfxhub_v3_0_funcs;
+ break;
+ }
+}
+
+static int gmc_v11_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ gmc_v11_0_set_gfxhub_funcs(adev);
+ gmc_v11_0_set_mmhub_funcs(adev);
+ gmc_v11_0_set_gmc_funcs(adev);
+ gmc_v11_0_set_irq_funcs(adev);
+ gmc_v11_0_set_umc_funcs(adev);
+
+ adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
+ adev->gmc.shared_aperture_end =
+ adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.private_aperture_start = 0x1000000000000000ULL;
+ adev->gmc.private_aperture_end =
+ adev->gmc.private_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF;
+
+ return 0;
+}
+
+static int gmc_v11_0_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_gmc_allocate_vm_inv_eng(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_gmc_ras_late_init(adev);
+ if (r)
+ return r;
+
+ return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
+}
+
+static void gmc_v11_0_vram_gtt_location(struct amdgpu_device *adev,
+ struct amdgpu_gmc *mc)
+{
+ u64 base = 0;
+
+ base = adev->mmhub.funcs->get_fb_location(adev);
+
+ amdgpu_gmc_set_agp_default(adev, mc);
+ amdgpu_gmc_vram_location(adev, &adev->gmc, base);
+ amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_HIGH);
+ if (!amdgpu_sriov_vf(adev) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(11, 5, 0)) &&
+ (amdgpu_agp == 1))
+ amdgpu_gmc_agp_location(adev, mc);
+
+ /* base offset of vram pages */
+ if (amdgpu_sriov_vf(adev))
+ adev->vm_manager.vram_base_offset = 0;
+ else
+ adev->vm_manager.vram_base_offset = adev->mmhub.funcs->get_mc_fb_offset(adev);
+}
+
+/**
+ * gmc_v11_0_mc_init - initialize the memory controller driver params
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up the amount of vram, vram width, and decide how to place
+ * vram and gart within the GPU's physical address space.
+ * Returns 0 for success.
+ */
+static int gmc_v11_0_mc_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ /* size in MB on si */
+ adev->gmc.mc_vram_size =
+ adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+ adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = amdgpu_device_resize_fb_bar(adev);
+ if (r)
+ return r;
+ }
+ adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
+ adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
+
+#ifdef CONFIG_X86_64
+ if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) {
+ adev->gmc.aper_base = adev->mmhub.funcs->get_mc_fb_offset(adev);
+ adev->gmc.aper_size = adev->gmc.real_vram_size;
+ }
+#endif
+ /* In case the PCI BAR is larger than the actual amount of vram */
+ adev->gmc.visible_vram_size = adev->gmc.aper_size;
+ if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
+ adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
+
+ /* set the gart size */
+ if (amdgpu_gart_size == -1)
+ adev->gmc.gart_size = 512ULL << 20;
+ else
+ adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
+
+ gmc_v11_0_vram_gtt_location(adev, &adev->gmc);
+
+ return 0;
+}
+
+static int gmc_v11_0_gart_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->gart.bo) {
+ WARN(1, "PCIE GART already initialized\n");
+ return 0;
+ }
+
+ /* Initialize common gart structure */
+ r = amdgpu_gart_init(adev);
+ if (r)
+ return r;
+
+ adev->gart.table_size = adev->gart.num_gpu_pages * 8;
+ adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_UC) |
+ AMDGPU_PTE_EXECUTABLE;
+
+ return amdgpu_gart_table_vram_alloc(adev);
+}
+
+static int gmc_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->mmhub.funcs->init(adev);
+
+ adev->gfxhub.funcs->init(adev);
+
+ spin_lock_init(&adev->gmc.invalidate_lock);
+
+ r = amdgpu_atomfirmware_get_vram_info(adev,
+ &vram_width, &vram_type, &vram_vendor);
+ adev->gmc.vram_width = vram_width;
+
+ adev->gmc.vram_type = vram_type;
+ adev->gmc.vram_vendor = vram_vendor;
+
+ /* The mall_size is already calculated as mall_size_per_umc * num_umc.
+ * However, for gfx1151, which features a 2-to-1 UMC mapping,
+ * the result must be multiplied by 2 to determine the actual mall size.
+ */
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 5, 1):
+ adev->gmc.mall_size *= 2;
+ break;
+ default:
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+ set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
+ /*
+ * To fulfill 4-level page support,
+ * vm size is 256TB (48bit), maximum size,
+ * block size 512 (9bit)
+ */
+ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ break;
+ default:
+ break;
+ }
+
+ /* This interrupt is VMC page fault.*/
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_VMC,
+ VMC_1_0__SRCID__VM_FAULT,
+ &adev->gmc.vm_fault);
+
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
+ UTCL2_1_0__SRCID__FAULT,
+ &adev->gmc.vm_fault);
+ if (r)
+ return r;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* interrupt sent to DF. */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_DF, 0,
+ &adev->gmc.ecc_irq);
+ if (r)
+ return r;
+ }
+
+ /*
+ * Set the internal MC address mask This is the max address of the GPU's
+ * internal address space.
+ */
+ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
+
+ r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
+ if (r) {
+ dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n");
+ return r;
+ }
+
+ adev->need_swiotlb = drm_need_swiotlb(44);
+
+ r = gmc_v11_0_mc_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_gmc_get_vbios_allocations(adev);
+
+ /* Memory manager */
+ r = amdgpu_bo_init(adev);
+ if (r)
+ return r;
+
+ r = gmc_v11_0_gart_init(adev);
+ if (r)
+ return r;
+
+ /*
+ * number of VMs
+ * VMID 0 is reserved for System
+ * amdgpu graphics/compute will use VMIDs 1-7
+ * amdkfd will use VMIDs 8-15
+ */
+ adev->vm_manager.first_kfd_vmid = adev->gfx.disable_kq ? 1 : 8;
+
+ amdgpu_vm_manager_init(adev);
+
+ r = amdgpu_gmc_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * gmc_v11_0_gart_fini - vm fini callback
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tears down the driver GART/VM setup (CIK).
+ */
+static void gmc_v11_0_gart_fini(struct amdgpu_device *adev)
+{
+ amdgpu_gart_table_vram_free(adev);
+}
+
+static int gmc_v11_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ amdgpu_vm_manager_fini(adev);
+ gmc_v11_0_gart_fini(adev);
+ amdgpu_gem_force_release(adev);
+ amdgpu_bo_fini(adev);
+
+ return 0;
+}
+
+static void gmc_v11_0_init_golden_registers(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev)) {
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ WREG32(hub->vm_contexts_disable, 0);
+ return;
+ }
+}
+
+/**
+ * gmc_v11_0_gart_enable - gart enable
+ *
+ * @adev: amdgpu_device pointer
+ */
+static int gmc_v11_0_gart_enable(struct amdgpu_device *adev)
+{
+ int r;
+ bool value;
+
+ if (adev->gart.bo == NULL) {
+ dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
+ return -EINVAL;
+ }
+
+ amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
+
+ r = adev->mmhub.funcs->gart_enable(adev);
+ if (r)
+ return r;
+
+ /* Flush HDP after it is initialized */
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS;
+
+ adev->mmhub.funcs->set_fault_enable_default(adev, value);
+ gmc_v11_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0);
+
+ DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+ (unsigned int)(adev->gmc.gart_size >> 20),
+ (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
+
+ return 0;
+}
+
+static int gmc_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ adev->gmc.flush_pasid_uses_kiq = !amdgpu_emu_mode;
+
+ /* The sequence of these two function calls matters.*/
+ gmc_v11_0_init_golden_registers(adev);
+
+ r = gmc_v11_0_gart_enable(adev);
+ if (r)
+ return r;
+
+ if (adev->umc.funcs && adev->umc.funcs->init_registers)
+ adev->umc.funcs->init_registers(adev);
+
+ return 0;
+}
+
+/**
+ * gmc_v11_0_gart_disable - gart disable
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * This disables all VM page table.
+ */
+static void gmc_v11_0_gart_disable(struct amdgpu_device *adev)
+{
+ adev->mmhub.funcs->gart_disable(adev);
+}
+
+static int gmc_v11_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* full access mode, so don't touch any GMC register */
+ DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
+ return 0;
+ }
+
+ amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+
+ if (adev->gmc.ecc_irq.funcs &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+ amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+
+ gmc_v11_0_gart_disable(adev);
+
+ return 0;
+}
+
+static int gmc_v11_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ gmc_v11_0_hw_fini(ip_block);
+
+ return 0;
+}
+
+static int gmc_v11_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = gmc_v11_0_hw_init(ip_block);
+ if (r)
+ return r;
+
+ amdgpu_vmid_reset_all(ip_block->adev);
+
+ return 0;
+}
+
+static bool gmc_v11_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ /* MC is always ready in GMC v11.*/
+ return true;
+}
+
+static int gmc_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ /* There is no need to wait for MC idle in GMC v11.*/
+ return 0;
+}
+
+static int gmc_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ r = adev->mmhub.funcs->set_clockgating(adev, state);
+ if (r)
+ return r;
+
+ return athub_v3_0_set_clockgating(adev, state);
+}
+
+static void gmc_v11_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->mmhub.funcs->get_clockgating(adev, flags);
+
+ athub_v3_0_get_clockgating(adev, flags);
+}
+
+static int gmc_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+const struct amd_ip_funcs gmc_v11_0_ip_funcs = {
+ .name = "gmc_v11_0",
+ .early_init = gmc_v11_0_early_init,
+ .sw_init = gmc_v11_0_sw_init,
+ .hw_init = gmc_v11_0_hw_init,
+ .late_init = gmc_v11_0_late_init,
+ .sw_fini = gmc_v11_0_sw_fini,
+ .hw_fini = gmc_v11_0_hw_fini,
+ .suspend = gmc_v11_0_suspend,
+ .resume = gmc_v11_0_resume,
+ .is_idle = gmc_v11_0_is_idle,
+ .wait_for_idle = gmc_v11_0_wait_for_idle,
+ .set_clockgating_state = gmc_v11_0_set_clockgating_state,
+ .set_powergating_state = gmc_v11_0_set_powergating_state,
+ .get_clockgating_state = gmc_v11_0_get_clockgating_state,
+};
+
+const struct amdgpu_ip_block_version gmc_v11_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_GMC,
+ .major = 11,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &gmc_v11_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.h
new file mode 100644
index 000000000000..def4d5516f82
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GMC_V11_0_H__
+#define __GMC_V11_0_H__
+
+extern const struct amd_ip_funcs gmc_v11_0_ip_funcs;
+extern const struct amdgpu_ip_block_version gmc_v11_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
new file mode 100644
index 000000000000..7a9d6894e321
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -0,0 +1,1070 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include <linux/pci.h>
+
+#include <drm/drm_cache.h>
+
+#include "amdgpu.h"
+#include "amdgpu_atomfirmware.h"
+#include "gmc_v12_0.h"
+#include "athub/athub_4_1_0_sh_mask.h"
+#include "athub/athub_4_1_0_offset.h"
+#include "oss/osssys_7_0_0_offset.h"
+#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
+#include "soc24_enum.h"
+#include "soc24.h"
+#include "soc15d.h"
+#include "soc15_common.h"
+#include "nbif_v6_3_1.h"
+#include "gfxhub_v12_0.h"
+#include "mmhub_v4_1_0.h"
+#include "athub_v4_1_0.h"
+#include "umc_v8_14.h"
+
+static int gmc_v12_0_ecc_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int gmc_v12_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src, unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ /* MM HUB */
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), false);
+ /* GFX HUB */
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (!adev->in_s0ix)
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), false);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ /* MM HUB */
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), true);
+ /* GFX HUB */
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (!adev->in_s0ix)
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), true);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gmc_v12_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ struct amdgpu_vmhub *hub;
+ bool retry_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_RETRY);
+ bool write_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_WRITE);
+ uint32_t status = 0;
+ u64 addr;
+
+ addr = (u64)entry->src_data[0] << 12;
+ addr |= ((u64)entry->src_data[1] & 0xf) << 44;
+
+ if (entry->client_id == SOC21_IH_CLIENTID_VMC)
+ hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ else
+ hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ if (retry_fault) {
+ /* Returning 1 here also prevents sending the IV to the KFD */
+
+ /* Process it only if it's the first fault for this address */
+ if (entry->ih != &adev->irq.ih_soft &&
+ amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
+ entry->timestamp))
+ return 1;
+
+ /* Delegate it to a different ring if the hardware hasn't
+ * already done it.
+ */
+ if (entry->ih == &adev->irq.ih) {
+ amdgpu_irq_delegate(adev, entry, 8);
+ return 1;
+ }
+
+ /* Try to handle the recoverable page faults by filling page
+ * tables
+ */
+ if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr,
+ entry->timestamp, write_fault))
+ return 1;
+ }
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /*
+ * Issue a dummy read to wait for the status register to
+ * be updated to avoid reading an incorrect value due to
+ * the new fast GRBM interface.
+ */
+ if (entry->vmid_src == AMDGPU_GFXHUB(0))
+ RREG32(hub->vm_l2_pro_fault_status);
+
+ status = RREG32(hub->vm_l2_pro_fault_status);
+ WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+
+ amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
+ entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0));
+ }
+
+ if (printk_ratelimit()) {
+ struct amdgpu_task_info *task_info;
+
+ dev_err(adev->dev,
+ "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
+ entry->vmid_src ? "mmhub" : "gfxhub",
+ entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ if (task_info) {
+ amdgpu_vm_print_task_info(adev, task_info);
+ amdgpu_vm_put_task_info(task_info);
+ }
+
+ dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n",
+ addr, entry->client_id);
+
+ /* Only print L2 fault status if the status register could be read and
+ * contains useful information
+ */
+ if (status != 0)
+ hub->vmhub_funcs->print_l2_protection_fault_status(adev, status);
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs gmc_v12_0_irq_funcs = {
+ .set = gmc_v12_0_vm_fault_interrupt_state,
+ .process = gmc_v12_0_process_interrupt,
+};
+
+static const struct amdgpu_irq_src_funcs gmc_v12_0_ecc_funcs = {
+ .set = gmc_v12_0_ecc_interrupt_state,
+ .process = amdgpu_umc_process_ecc_irq,
+};
+
+static void gmc_v12_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gmc.vm_fault.num_types = 1;
+ adev->gmc.vm_fault.funcs = &gmc_v12_0_irq_funcs;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ adev->gmc.ecc_irq.num_types = 1;
+ adev->gmc.ecc_irq.funcs = &gmc_v12_0_ecc_funcs;
+ }
+}
+
+/**
+ * gmc_v12_0_use_invalidate_semaphore - judge whether to use semaphore
+ *
+ * @adev: amdgpu_device pointer
+ * @vmhub: vmhub type
+ *
+ */
+static bool gmc_v12_0_use_invalidate_semaphore(struct amdgpu_device *adev,
+ uint32_t vmhub)
+{
+ return ((vmhub == AMDGPU_MMHUB0(0)) &&
+ (!amdgpu_sriov_vf(adev)));
+}
+
+static bool gmc_v12_0_get_vmid_pasid_mapping_info(
+ struct amdgpu_device *adev,
+ uint8_t vmid, uint16_t *p_pasid)
+{
+ *p_pasid = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid) & 0xffff;
+
+ return !!(*p_pasid);
+}
+
+/*
+ * GART
+ * VMID 0 is the physical GPU addresses as used by the kernel.
+ * VMIDs 1-15 are used for userspace clients and are handled
+ * by the amdgpu vm/hsa code.
+ */
+
+static void gmc_v12_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
+ unsigned int vmhub, uint32_t flush_type)
+{
+ bool use_semaphore = gmc_v12_0_use_invalidate_semaphore(adev, vmhub);
+ struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
+ u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
+ u32 tmp;
+ /* Use register 17 for GART */
+ const unsigned eng = 17;
+ unsigned int i;
+ unsigned char hub_ip = 0;
+
+ hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ?
+ GC_HWIP : MMHUB_HWIP;
+
+ spin_lock(&adev->gmc.invalidate_lock);
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore) {
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* a read return value of 1 means semaphore acuqire */
+ tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
+ hub->eng_distance * eng, hub_ip);
+ if (tmp & 0x1)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev,
+ "Timeout waiting for sem acquire in VM flush!\n");
+ }
+
+ WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req, hub_ip);
+
+ /* Wait for ACK with a delay.*/
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack +
+ hub->eng_distance * eng, hub_ip);
+ tmp &= 1 << vmid;
+ if (tmp)
+ break;
+
+ udelay(1);
+ }
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /*
+ * add semaphore release after invalidation,
+ * write with 0 means semaphore release
+ */
+ WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
+ hub->eng_distance * eng, 0, hub_ip);
+
+ /* Issue additional private vm invalidation to MMHUB */
+ if ((vmhub != AMDGPU_GFXHUB(0)) &&
+ (hub->vm_l2_bank_select_reserved_cid2) &&
+ !amdgpu_sriov_vf(adev)) {
+ inv_req = RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2);
+ /* bit 25: RSERVED_CACHE_PRIVATE_INVALIDATION */
+ inv_req |= (1 << 25);
+ /* Issue private invalidation */
+ WREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2, inv_req);
+ /* Read back to ensure invalidation is done*/
+ RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2);
+ }
+
+ spin_unlock(&adev->gmc.invalidate_lock);
+
+ if (i < adev->usec_timeout)
+ return;
+
+ dev_err(adev->dev, "Timeout waiting for VM flush ACK!\n");
+}
+
+/**
+ * gmc_v12_0_flush_gpu_tlb - gart tlb flush callback
+ *
+ * @adev: amdgpu_device pointer
+ * @vmid: vm instance to flush
+ * @vmhub: which hub to flush
+ * @flush_type: the flush type
+ *
+ * Flush the TLB for the requested page table.
+ */
+static void gmc_v12_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
+{
+ if ((vmhub == AMDGPU_GFXHUB(0)) && !adev->gfx.is_poweron)
+ return;
+
+ /* flush hdp cache */
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ /* This is necessary for SRIOV as well as for GFXOFF to function
+ * properly under bare metal
+ */
+ if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) &&
+ (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
+ struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
+ const unsigned eng = 17;
+ u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
+ u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
+ u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
+
+ amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req,
+ 1 << vmid, GET_INST(GC, 0));
+ return;
+ }
+
+ gmc_v12_0_flush_vm_hub(adev, vmid, vmhub, 0);
+ return;
+}
+
+/**
+ * gmc_v12_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ * @flush_type: the flush type
+ * @all_hub: flush all hubs
+ * @inst: is used to select which instance of KIQ to use for the invalidation
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static void gmc_v12_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint32_t inst)
+{
+ uint16_t queried;
+ int vmid, i;
+
+ if (adev->enable_uni_mes && adev->mes.ring[AMDGPU_MES_SCHED_PIPE].sched.ready &&
+ (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x84) {
+ struct mes_inv_tlbs_pasid_input input = {0};
+ input.pasid = pasid;
+ input.flush_type = flush_type;
+ input.hub_id = AMDGPU_GFXHUB(0);
+ /* MES will invalidate all gc_hub for the device from master */
+ adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
+ if (all_hub) {
+ /* Only need to invalidate mm_hub now, gfx12 only support one mmhub */
+ input.hub_id = AMDGPU_MMHUB0(0);
+ adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
+ }
+ return;
+ }
+
+ for (vmid = 1; vmid < 16; vmid++) {
+ bool valid;
+
+ valid = gmc_v12_0_get_vmid_pasid_mapping_info(adev, vmid,
+ &queried);
+ if (!valid || queried != pasid)
+ continue;
+
+ if (all_hub) {
+ for_each_set_bit(i, adev->vmhubs_mask,
+ AMDGPU_MAX_VMHUBS)
+ gmc_v12_0_flush_gpu_tlb(adev, vmid, i,
+ flush_type);
+ } else {
+ gmc_v12_0_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0),
+ flush_type);
+ }
+ }
+}
+
+static uint64_t gmc_v12_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ bool use_semaphore = gmc_v12_0_use_invalidate_semaphore(ring->adev, ring->vm_hub);
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0);
+ unsigned eng = ring->vm_inv_eng;
+
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /* a read return value of 1 means semaphore acuqire */
+ amdgpu_ring_emit_reg_wait(ring,
+ hub->vm_inv_eng0_sem +
+ hub->eng_distance * eng, 0x1, 0x1);
+
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
+ (hub->ctx_addr_distance * vmid),
+ lower_32_bits(pd_addr));
+
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
+ (hub->ctx_addr_distance * vmid),
+ upper_32_bits(pd_addr));
+
+ amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req +
+ hub->eng_distance * eng,
+ hub->vm_inv_eng0_ack +
+ hub->eng_distance * eng,
+ req, 1 << vmid);
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /*
+ * add semaphore release after invalidation,
+ * write with 0 means semaphore release
+ */
+ amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem +
+ hub->eng_distance * eng, 0);
+
+ return pd_addr;
+}
+
+static void gmc_v12_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
+ unsigned pasid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reg;
+
+ if (ring->vm_hub == AMDGPU_GFXHUB(0))
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid;
+ else
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid;
+
+ amdgpu_ring_emit_wreg(ring, reg, pasid);
+}
+
+/*
+ * PTE format:
+ * 63 P
+ * 62:59 reserved
+ * 58 D
+ * 57 G
+ * 56 T
+ * 55:54 M
+ * 53:52 SW
+ * 51:48 reserved for future
+ * 47:12 4k physical page base address
+ * 11:7 fragment
+ * 6 write
+ * 5 read
+ * 4 exe
+ * 3 Z
+ * 2 snooped
+ * 1 system
+ * 0 valid
+ *
+ * PDE format:
+ * 63 P
+ * 62:58 block fragment size
+ * 57 reserved
+ * 56 A
+ * 55:54 M
+ * 53:52 reserved
+ * 51:48 reserved for future
+ * 47:6 physical base address of PD or PTE
+ * 5:3 reserved
+ * 2 C
+ * 1 system
+ * 0 valid
+ */
+
+static void gmc_v12_0_get_vm_pde(struct amdgpu_device *adev, int level,
+ uint64_t *addr, uint64_t *flags)
+{
+ if (!(*flags & AMDGPU_PDE_PTE_GFX12) && !(*flags & AMDGPU_PTE_SYSTEM))
+ *addr = adev->vm_manager.vram_base_offset + *addr -
+ adev->gmc.vram_start;
+ BUG_ON(*addr & 0xFFFF00000000003FULL);
+
+ if (!adev->gmc.translate_further)
+ return;
+
+ if (level == AMDGPU_VM_PDB1) {
+ /* Set the block fragment size */
+ if (!(*flags & AMDGPU_PDE_PTE_GFX12))
+ *flags |= AMDGPU_PDE_BFS_GFX12(0x9);
+
+ } else if (level == AMDGPU_VM_PDB0) {
+ if (*flags & AMDGPU_PDE_PTE_GFX12)
+ *flags &= ~AMDGPU_PDE_PTE_GFX12;
+ }
+}
+
+static void gmc_v12_0_get_vm_pte(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo,
+ uint32_t vm_flags,
+ uint64_t *flags)
+{
+ if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE)
+ *flags |= AMDGPU_PTE_EXECUTABLE;
+ else
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
+
+ switch (vm_flags & AMDGPU_VM_MTYPE_MASK) {
+ case AMDGPU_VM_MTYPE_DEFAULT:
+ *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC);
+ break;
+ case AMDGPU_VM_MTYPE_NC:
+ default:
+ *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC);
+ break;
+ case AMDGPU_VM_MTYPE_UC:
+ *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC);
+ break;
+ }
+
+ if (vm_flags & AMDGPU_VM_PAGE_NOALLOC)
+ *flags |= AMDGPU_PTE_NOALLOC;
+ else
+ *flags &= ~AMDGPU_PTE_NOALLOC;
+
+ if (vm_flags & AMDGPU_VM_PAGE_PRT) {
+ *flags |= AMDGPU_PTE_PRT_GFX12;
+ *flags |= AMDGPU_PTE_SNOOPED;
+ *flags |= AMDGPU_PTE_SYSTEM;
+ *flags |= AMDGPU_PTE_IS_PTE;
+ *flags &= ~AMDGPU_PTE_VALID;
+ }
+
+ if (bo && bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
+ *flags |= AMDGPU_PTE_DCC;
+
+ if (bo && bo->flags & AMDGPU_GEM_CREATE_UNCACHED)
+ *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC);
+}
+
+static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+ return 0;
+}
+
+static unsigned int gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev)
+{
+ unsigned int max_tex_channel_caches, alignment;
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 0, 0) &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 0, 1))
+ return 0;
+
+ max_tex_channel_caches = adev->gfx.config.max_texture_channel_caches;
+ if (is_power_of_2(max_tex_channel_caches))
+ alignment = (unsigned int)(max_tex_channel_caches / SZ_4);
+ else
+ alignment = roundup_pow_of_two(max_tex_channel_caches);
+
+ return (unsigned int)(alignment * max_tex_channel_caches * SZ_1K);
+}
+
+static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = {
+ .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb,
+ .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid,
+ .emit_flush_gpu_tlb = gmc_v12_0_emit_flush_gpu_tlb,
+ .emit_pasid_mapping = gmc_v12_0_emit_pasid_mapping,
+ .get_vm_pde = gmc_v12_0_get_vm_pde,
+ .get_vm_pte = gmc_v12_0_get_vm_pte,
+ .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size,
+ .get_dcc_alignment = gmc_v12_0_get_dcc_alignment,
+};
+
+static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev)
+{
+ adev->gmc.gmc_funcs = &gmc_v12_0_gmc_funcs;
+}
+
+static void gmc_v12_0_set_umc_funcs(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
+ case IP_VERSION(8, 14, 0):
+ adev->umc.channel_inst_num = UMC_V8_14_CHANNEL_INSTANCE_NUM;
+ adev->umc.umc_inst_num = UMC_V8_14_UMC_INSTANCE_NUM(adev);
+ adev->umc.node_inst_num = 0;
+ adev->umc.max_ras_err_cnt_per_query = UMC_V8_14_TOTAL_CHANNEL_NUM(adev);
+ adev->umc.channel_offs = UMC_V8_14_PER_CHANNEL_OFFSET;
+ adev->umc.ras = &umc_v8_14_ras;
+ break;
+ default:
+ break;
+ }
+}
+
+
+static void gmc_v12_0_set_mmhub_funcs(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(4, 1, 0):
+ adev->mmhub.funcs = &mmhub_v4_1_0_funcs;
+ break;
+ default:
+ break;
+ }
+}
+
+static void gmc_v12_0_set_gfxhub_funcs(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ adev->gfxhub.funcs = &gfxhub_v12_0_funcs;
+ break;
+ default:
+ break;
+ }
+}
+
+static int gmc_v12_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ gmc_v12_0_set_gfxhub_funcs(adev);
+ gmc_v12_0_set_mmhub_funcs(adev);
+ gmc_v12_0_set_gmc_funcs(adev);
+ gmc_v12_0_set_irq_funcs(adev);
+ gmc_v12_0_set_umc_funcs(adev);
+
+ adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
+ adev->gmc.shared_aperture_end =
+ adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.private_aperture_start = 0x1000000000000000ULL;
+ adev->gmc.private_aperture_end =
+ adev->gmc.private_aperture_start + (4ULL << 30) - 1;
+
+ return 0;
+}
+
+static int gmc_v12_0_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_gmc_allocate_vm_inv_eng(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_gmc_ras_late_init(adev);
+ if (r)
+ return r;
+
+ return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
+}
+
+static void gmc_v12_0_vram_gtt_location(struct amdgpu_device *adev,
+ struct amdgpu_gmc *mc)
+{
+ u64 base = 0;
+
+ base = adev->mmhub.funcs->get_fb_location(adev);
+
+ amdgpu_gmc_set_agp_default(adev, mc);
+ amdgpu_gmc_vram_location(adev, &adev->gmc, base);
+ amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_LOW);
+ if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1))
+ amdgpu_gmc_agp_location(adev, mc);
+
+ /* base offset of vram pages */
+ if (amdgpu_sriov_vf(adev))
+ adev->vm_manager.vram_base_offset = 0;
+ else
+ adev->vm_manager.vram_base_offset = adev->mmhub.funcs->get_mc_fb_offset(adev);
+}
+
+/**
+ * gmc_v12_0_mc_init - initialize the memory controller driver params
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up the amount of vram, vram width, and decide how to place
+ * vram and gart within the GPU's physical address space.
+ * Returns 0 for success.
+ */
+static int gmc_v12_0_mc_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ /* size in MB on si */
+ adev->gmc.mc_vram_size =
+ adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+ adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = amdgpu_device_resize_fb_bar(adev);
+ if (r)
+ return r;
+ }
+
+ adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
+ adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
+
+#ifdef CONFIG_X86_64
+ if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) {
+ adev->gmc.aper_base = adev->mmhub.funcs->get_mc_fb_offset(adev);
+ adev->gmc.aper_size = adev->gmc.real_vram_size;
+ }
+#endif
+ /* In case the PCI BAR is larger than the actual amount of vram */
+ adev->gmc.visible_vram_size = adev->gmc.aper_size;
+ if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
+ adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
+
+ /* set the gart size */
+ if (amdgpu_gart_size == -1) {
+ adev->gmc.gart_size = 512ULL << 20;
+ } else
+ adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
+
+ gmc_v12_0_vram_gtt_location(adev, &adev->gmc);
+
+ return 0;
+}
+
+static int gmc_v12_0_gart_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->gart.bo) {
+ WARN(1, "PCIE GART already initialized\n");
+ return 0;
+ }
+
+ /* Initialize common gart structure */
+ r = amdgpu_gart_init(adev);
+ if (r)
+ return r;
+
+ adev->gart.table_size = adev->gart.num_gpu_pages * 8;
+ adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_GFX12(0ULL, MTYPE_UC) |
+ AMDGPU_PTE_EXECUTABLE |
+ AMDGPU_PTE_IS_PTE;
+
+ return amdgpu_gart_table_vram_alloc(adev);
+}
+
+static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->mmhub.funcs->init(adev);
+
+ adev->gfxhub.funcs->init(adev);
+
+ spin_lock_init(&adev->gmc.invalidate_lock);
+
+ r = amdgpu_atomfirmware_get_vram_info(adev,
+ &vram_width, &vram_type, &vram_vendor);
+ adev->gmc.vram_width = vram_width;
+
+ adev->gmc.vram_type = vram_type;
+ adev->gmc.vram_vendor = vram_vendor;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+ set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
+ /*
+ * To fulfill 4-level page support,
+ * vm size is 256TB (48bit), maximum size,
+ * block size 512 (9bit)
+ */
+ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ break;
+ default:
+ break;
+ }
+
+ /* This interrupt is VMC page fault.*/
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_VMC,
+ VMC_1_0__SRCID__VM_FAULT,
+ &adev->gmc.vm_fault);
+
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
+ UTCL2_1_0__SRCID__FAULT,
+ &adev->gmc.vm_fault);
+ if (r)
+ return r;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* interrupt sent to DF. */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_DF, 0,
+ &adev->gmc.ecc_irq);
+ if (r)
+ return r;
+ }
+
+ /*
+ * Set the internal MC address mask This is the max address of the GPU's
+ * internal address space.
+ */
+ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
+
+ r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
+ if (r) {
+ printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
+ return r;
+ }
+
+ adev->need_swiotlb = drm_need_swiotlb(44);
+
+ r = gmc_v12_0_mc_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_gmc_get_vbios_allocations(adev);
+
+ /* Memory manager */
+ r = amdgpu_bo_init(adev);
+ if (r)
+ return r;
+
+ r = gmc_v12_0_gart_init(adev);
+ if (r)
+ return r;
+
+ /*
+ * number of VMs
+ * VMID 0 is reserved for System
+ * amdgpu graphics/compute will use VMIDs 1-7
+ * amdkfd will use VMIDs 8-15
+ */
+ adev->vm_manager.first_kfd_vmid = adev->gfx.disable_kq ? 1 : 8;
+
+ amdgpu_vm_manager_init(adev);
+
+ r = amdgpu_gmc_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * gmc_v12_0_gart_fini - vm fini callback
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tears down the driver GART/VM setup (CIK).
+ */
+static void gmc_v12_0_gart_fini(struct amdgpu_device *adev)
+{
+ amdgpu_gart_table_vram_free(adev);
+}
+
+static int gmc_v12_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ amdgpu_vm_manager_fini(adev);
+ gmc_v12_0_gart_fini(adev);
+ amdgpu_gem_force_release(adev);
+ amdgpu_bo_fini(adev);
+
+ return 0;
+}
+
+static void gmc_v12_0_init_golden_registers(struct amdgpu_device *adev)
+{
+}
+
+/**
+ * gmc_v12_0_gart_enable - gart enable
+ *
+ * @adev: amdgpu_device pointer
+ */
+static int gmc_v12_0_gart_enable(struct amdgpu_device *adev)
+{
+ int r;
+ bool value;
+
+ if (adev->gart.bo == NULL) {
+ dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
+ return -EINVAL;
+ }
+
+ amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
+
+ r = adev->mmhub.funcs->gart_enable(adev);
+ if (r)
+ return r;
+
+ /* Flush HDP after it is initialized */
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS;
+
+ adev->mmhub.funcs->set_fault_enable_default(adev, value);
+ gmc_v12_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0);
+
+ dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
+ (unsigned)(adev->gmc.gart_size >> 20),
+ (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
+
+ return 0;
+}
+
+static int gmc_v12_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /* The sequence of these two function calls matters.*/
+ gmc_v12_0_init_golden_registers(adev);
+
+ r = gmc_v12_0_gart_enable(adev);
+ if (r)
+ return r;
+
+ if (adev->umc.funcs && adev->umc.funcs->init_registers)
+ adev->umc.funcs->init_registers(adev);
+
+ return 0;
+}
+
+/**
+ * gmc_v12_0_gart_disable - gart disable
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * This disables all VM page table.
+ */
+static void gmc_v12_0_gart_disable(struct amdgpu_device *adev)
+{
+ adev->mmhub.funcs->gart_disable(adev);
+}
+
+static int gmc_v12_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* full access mode, so don't touch any GMC register */
+ DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
+ return 0;
+ }
+
+ amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+
+ if (adev->gmc.ecc_irq.funcs &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+ amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+
+ gmc_v12_0_gart_disable(adev);
+
+ return 0;
+}
+
+static int gmc_v12_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ gmc_v12_0_hw_fini(ip_block);
+
+ return 0;
+}
+
+static int gmc_v12_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = gmc_v12_0_hw_init(ip_block);
+ if (r)
+ return r;
+
+ amdgpu_vmid_reset_all(ip_block->adev);
+
+ return 0;
+}
+
+static bool gmc_v12_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ /* MC is always ready in GMC v11.*/
+ return true;
+}
+
+static int gmc_v12_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ /* There is no need to wait for MC idle in GMC v11.*/
+ return 0;
+}
+
+static int gmc_v12_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ r = adev->mmhub.funcs->set_clockgating(adev, state);
+ if (r)
+ return r;
+
+ return athub_v4_1_0_set_clockgating(adev, state);
+}
+
+static void gmc_v12_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->mmhub.funcs->get_clockgating(adev, flags);
+
+ athub_v4_1_0_get_clockgating(adev, flags);
+}
+
+static int gmc_v12_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+const struct amd_ip_funcs gmc_v12_0_ip_funcs = {
+ .name = "gmc_v12_0",
+ .early_init = gmc_v12_0_early_init,
+ .sw_init = gmc_v12_0_sw_init,
+ .hw_init = gmc_v12_0_hw_init,
+ .late_init = gmc_v12_0_late_init,
+ .sw_fini = gmc_v12_0_sw_fini,
+ .hw_fini = gmc_v12_0_hw_fini,
+ .suspend = gmc_v12_0_suspend,
+ .resume = gmc_v12_0_resume,
+ .is_idle = gmc_v12_0_is_idle,
+ .wait_for_idle = gmc_v12_0_wait_for_idle,
+ .set_clockgating_state = gmc_v12_0_set_clockgating_state,
+ .set_powergating_state = gmc_v12_0_set_powergating_state,
+ .get_clockgating_state = gmc_v12_0_get_clockgating_state,
+};
+
+const struct amdgpu_ip_block_version gmc_v12_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_GMC,
+ .major = 12,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &gmc_v12_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.h
new file mode 100644
index 000000000000..deca93e4a156
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GMC_V12_0_H__
+#define __GMC_V12_0_H__
+
+extern const struct amd_ip_funcs gmc_v12_0_ip_funcs;
+extern const struct amdgpu_ip_block_version gmc_v12_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 0fe714f54cca..a8ec95f42926 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -43,7 +43,7 @@
static void gmc_v6_0_set_gmc_funcs(struct amdgpu_device *adev);
static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev);
-static int gmc_v6_0_wait_for_idle(void *handle);
+static int gmc_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block);
MODULE_FIRMWARE("amdgpu/tahiti_mc.bin");
MODULE_FIRMWARE("amdgpu/pitcairn_mc.bin");
@@ -64,8 +64,13 @@ MODULE_FIRMWARE("amdgpu/si58_mc.bin");
static void gmc_v6_0_mc_stop(struct amdgpu_device *adev)
{
u32 blackout;
+ struct amdgpu_ip_block *ip_block;
- gmc_v6_0_wait_for_idle((void *)adev);
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC);
+ if (!ip_block)
+ return;
+
+ gmc_v6_0_wait_for_idle(ip_block);
blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) {
@@ -98,9 +103,7 @@ static void gmc_v6_0_mc_resume(struct amdgpu_device *adev)
static int gmc_v6_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
int err;
- bool is_58_fw = false;
DRM_DEBUG("\n");
@@ -120,30 +123,21 @@ static int gmc_v6_0_init_microcode(struct amdgpu_device *adev)
case CHIP_HAINAN:
chip_name = "hainan";
break;
- default: BUG();
+ default:
+ BUG();
}
/* this memory configuration requires special firmware */
if (((RREG32(mmMC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58)
- is_58_fw = true;
-
- if (is_58_fw)
- snprintf(fw_name, sizeof(fw_name), "amdgpu/si58_mc.bin");
- else
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);
- err = request_firmware(&adev->gmc.fw, fw_name, adev->dev);
- if (err)
- goto out;
-
- err = amdgpu_ucode_validate(adev->gmc.fw);
+ chip_name = "si58";
-out:
+ err = amdgpu_ucode_request(adev, &adev->gmc.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mc.bin", chip_name);
if (err) {
dev_err(adev->dev,
- "si_mc: Failed to load firmware \"%s\"\n",
- fw_name);
- release_firmware(adev->gmc.fw);
- adev->gmc.fw = NULL;
+ "si_mc: Failed to load firmware \"%s_mc.bin\"\n",
+ chip_name);
+ amdgpu_ucode_release(&adev->gmc.fw);
}
return err;
}
@@ -185,9 +179,8 @@ static int gmc_v6_0_mc_load_microcode(struct amdgpu_device *adev)
WREG32(mmMC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
}
/* load the MC ucode */
- for (i = 0; i < ucode_size; i++) {
+ for (i = 0; i < ucode_size; i++)
WREG32(mmMC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
- }
/* put the engine back into the active state */
WREG32(mmMC_SEQ_SUP_CNTL, 0x00000008);
@@ -215,15 +208,19 @@ static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc)
{
u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
+
base <<= 24;
+ amdgpu_gmc_set_agp_default(adev, mc);
amdgpu_gmc_vram_location(adev, mc, base);
- amdgpu_gmc_gart_location(adev, mc);
+ amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_LOW);
}
static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
{
int i, j;
+ struct amdgpu_ip_block *ip_block;
+
/* Initialize HDP */
for (i = 0, j = 0; i < 32; i++, j += 0x6) {
@@ -235,9 +232,12 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
}
WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0);
- if (gmc_v6_0_wait_for_idle((void *)adev)) {
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC);
+ if (!ip_block)
+ return;
+
+ if (gmc_v6_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for MC idle timedout !\n");
- }
if (adev->mode_info.num_crtc) {
u32 tmp;
@@ -249,7 +249,7 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
/* disable VGA render */
tmp = RREG32(mmVGA_RENDER_CONTROL);
- tmp &= ~VGA_VSTATUS_CNTL;
+ tmp &= VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK;
WREG32(mmVGA_RENDER_CONTROL, tmp);
}
/* Update configuration */
@@ -258,14 +258,13 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
adev->gmc.vram_end >> 12);
WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
- adev->vram_scratch.gpu_addr >> 12);
+ adev->mem_scratch.gpu_addr >> 12);
WREG32(mmMC_VM_AGP_BASE, 0);
- WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF);
- WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF);
+ WREG32(mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 22);
+ WREG32(mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 22);
- if (gmc_v6_0_wait_for_idle((void *)adev)) {
+ if (gmc_v6_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for MC idle timedout !\n");
- }
}
static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
@@ -276,13 +275,13 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
int r;
tmp = RREG32(mmMC_ARB_RAMCFG);
- if (tmp & (1 << 11)) {
+ if (tmp & (1 << 11))
chansize = 16;
- } else if (tmp & MC_ARB_RAMCFG__CHANSIZE_MASK) {
+ else if (tmp & MC_ARB_RAMCFG__CHANSIZE_MASK)
chansize = 64;
- } else {
+ else
chansize = 32;
- }
+
tmp = RREG32(mmMC_SHARED_CHMAP);
switch ((tmp & MC_SHARED_CHMAP__NOOFCHAN_MASK) >> MC_SHARED_CHMAP__NOOFCHAN__SHIFT) {
case 0:
@@ -359,7 +358,7 @@ static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
}
static uint64_t gmc_v6_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
- unsigned vmid, uint64_t pd_addr)
+ unsigned int vmid, uint64_t pd_addr)
{
uint32_t reg;
@@ -383,7 +382,9 @@ static void gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, int level,
}
static void gmc_v6_0_get_vm_pte(struct amdgpu_device *adev,
- struct amdgpu_bo_va_mapping *mapping,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo,
+ uint32_t vm_flags,
uint64_t *flags)
{
*flags &= ~AMDGPU_PTE_EXECUTABLE;
@@ -412,11 +413,11 @@ static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev,
}
/**
- + * gmc_v8_0_set_prt - set PRT VM fault
- + *
- + * @adev: amdgpu_device pointer
- + * @enable: enable/disable VM fault handling for PRT
- +*/
+ * gmc_v8_0_set_prt() - set PRT VM fault
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable VM fault handling for PRT
+ */
static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable)
{
u32 tmp;
@@ -442,9 +443,10 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable)
WREG32(mmVM_PRT_CNTL, tmp);
if (enable) {
- uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
+ uint32_t low = AMDGPU_VA_RESERVED_BOTTOM >>
+ AMDGPU_GPU_PAGE_SHIFT;
uint32_t high = adev->vm_manager.max_pfn -
- (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT);
+ (AMDGPU_VA_RESERVED_TOP >> AMDGPU_GPU_PAGE_SHIFT);
WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
@@ -469,16 +471,14 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable)
static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
{
uint64_t table_addr;
- int r, i;
u32 field;
+ int i;
if (adev->gart.bo == NULL) {
dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL;
}
- r = amdgpu_gart_table_vram_pin(adev);
- if (r)
- return r;
+ amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
table_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
@@ -556,9 +556,8 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
gmc_v6_0_flush_gpu_tlb(adev, 0, 0, 0);
dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
- (unsigned)(adev->gmc.gart_size >> 20),
+ (unsigned int)(adev->gmc.gart_size >> 20),
(unsigned long long)table_addr);
- adev->gart.ready = true;
return 0;
}
@@ -608,40 +607,36 @@ static void gmc_v6_0_gart_disable(struct amdgpu_device *adev)
WREG32(mmVM_L2_CNTL3,
VM_L2_CNTL3__L2_CACHE_BIGK_ASSOCIATIVITY_MASK |
(0UL << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT));
- amdgpu_gart_table_vram_unpin(adev);
}
static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev,
- u32 status, u32 addr, u32 mc_client)
+ u32 status, u32 addr)
{
u32 mc_id;
u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
PROTECTIONS);
- char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
- (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
mc_id = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
MEMORY_CLIENT_ID);
- dev_err(adev->dev, "VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
+ dev_err(adev->dev, "VM fault (0x%02x, vmid %d) at page %u, %s from %d\n",
protections, vmid, addr,
REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
MEMORY_CLIENT_RW) ?
- "write" : "read", block, mc_client, mc_id);
+ "write" : "read", mc_id);
}
-/*
static const u32 mc_cg_registers[] = {
- MC_HUB_MISC_HUB_CG,
- MC_HUB_MISC_SIP_CG,
- MC_HUB_MISC_VM_CG,
- MC_XPB_CLK_GAT,
- ATC_MISC_CG,
- MC_CITF_MISC_WR_CG,
- MC_CITF_MISC_RD_CG,
- MC_CITF_MISC_VM_CG,
- VM_L2_CG,
+ mmMC_HUB_MISC_HUB_CG,
+ mmMC_HUB_MISC_SIP_CG,
+ mmMC_HUB_MISC_VM_CG,
+ mmMC_XPB_CLK_GAT,
+ mmATC_MISC_CG,
+ mmMC_CITF_MISC_WR_CG,
+ mmMC_CITF_MISC_RD_CG,
+ mmMC_CITF_MISC_VM_CG,
+ mmVM_L2_CG,
};
static const u32 mc_cg_ls_en[] = {
@@ -676,7 +671,7 @@ static void gmc_v6_0_enable_mc_ls(struct amdgpu_device *adev,
for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
orig = data = RREG32(mc_cg_registers[i]);
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_MC_LS))
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
data |= mc_cg_ls_en[i];
else
data &= ~mc_cg_ls_en[i];
@@ -693,7 +688,7 @@ static void gmc_v6_0_enable_mc_mgcg(struct amdgpu_device *adev,
for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
orig = data = RREG32(mc_cg_registers[i]);
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_MC_MGCG))
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
data |= mc_cg_en[i];
else
data &= ~mc_cg_en[i];
@@ -709,7 +704,7 @@ static void gmc_v6_0_enable_bif_mgls(struct amdgpu_device *adev,
orig = data = RREG32_PCIE(ixPCIE_CNTL2);
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_BIF_LS)) {
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) {
data = REG_SET_FIELD(data, PCIE_CNTL2, SLV_MEM_LS_EN, 1);
data = REG_SET_FIELD(data, PCIE_CNTL2, MST_MEM_LS_EN, 1);
data = REG_SET_FIELD(data, PCIE_CNTL2, REPLAY_MEM_LS_EN, 1);
@@ -732,7 +727,7 @@ static void gmc_v6_0_enable_hdp_mgcg(struct amdgpu_device *adev,
orig = data = RREG32(mmHDP_HOST_PATH_CNTL);
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_MGCG))
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG))
data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 0);
else
data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 1);
@@ -748,7 +743,7 @@ static void gmc_v6_0_enable_hdp_ls(struct amdgpu_device *adev,
orig = data = RREG32(mmHDP_MEM_POWER_LS);
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_LS))
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 1);
else
data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 0);
@@ -756,7 +751,6 @@ static void gmc_v6_0_enable_hdp_ls(struct amdgpu_device *adev,
if (orig != data)
WREG32(mmHDP_MEM_POWER_LS, data);
}
-*/
static int gmc_v6_0_convert_vram_type(int mc_seq_vram_type)
{
@@ -778,9 +772,9 @@ static int gmc_v6_0_convert_vram_type(int mc_seq_vram_type)
}
}
-static int gmc_v6_0_early_init(void *handle)
+static int gmc_v6_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v6_0_set_gmc_funcs(adev);
gmc_v6_0_set_irq_funcs(adev);
@@ -788,9 +782,9 @@ static int gmc_v6_0_early_init(void *handle)
return 0;
}
-static int gmc_v6_0_late_init(void *handle)
+static int gmc_v6_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
@@ -798,15 +792,16 @@ static int gmc_v6_0_late_init(void *handle)
return 0;
}
-static unsigned gmc_v6_0_get_vbios_fb_size(struct amdgpu_device *adev)
+static unsigned int gmc_v6_0_get_vbios_fb_size(struct amdgpu_device *adev)
{
u32 d1vga_control = RREG32(mmD1VGA_CONTROL);
- unsigned size;
+ unsigned int size;
if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
size = AMDGPU_VBIOS_VGA_ALLOCATION;
} else {
u32 viewport = RREG32(mmVIEWPORT_SIZE);
+
size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) *
4);
@@ -814,17 +809,18 @@ static unsigned gmc_v6_0_get_vbios_fb_size(struct amdgpu_device *adev)
return size;
}
-static int gmc_v6_0_sw_init(void *handle)
+static int gmc_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- adev->num_vmhubs = 1;
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
if (adev->flags & AMD_IS_APU) {
adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
} else {
u32 tmp = RREG32(mmMC_SEQ_MISC0);
+
tmp &= MC_SEQ_MISC0__MT__MASK;
adev->gmc.vram_type = gmc_v6_0_convert_vram_type(tmp);
}
@@ -890,24 +886,23 @@ static int gmc_v6_0_sw_init(void *handle)
return 0;
}
-static int gmc_v6_0_sw_fini(void *handle)
+static int gmc_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
amdgpu_gart_table_vram_free(adev);
amdgpu_bo_fini(adev);
- release_firmware(adev->gmc.fw);
- adev->gmc.fw = NULL;
+ amdgpu_ucode_release(&adev->gmc.fw);
return 0;
}
-static int gmc_v6_0_hw_init(void *handle)
+static int gmc_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v6_0_mc_program(adev);
@@ -923,12 +918,15 @@ static int gmc_v6_0_hw_init(void *handle)
if (r)
return r;
- return r;
+ if (amdgpu_emu_mode == 1)
+ return amdgpu_gmc_vram_checking(adev);
+
+ return 0;
}
-static int gmc_v6_0_hw_fini(void *handle)
+static int gmc_v6_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
gmc_v6_0_gart_disable(adev);
@@ -936,21 +934,19 @@ static int gmc_v6_0_hw_fini(void *handle)
return 0;
}
-static int gmc_v6_0_suspend(void *handle)
+static int gmc_v6_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- gmc_v6_0_hw_fini(adev);
+ gmc_v6_0_hw_fini(ip_block);
return 0;
}
-static int gmc_v6_0_resume(void *handle)
+static int gmc_v6_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- r = gmc_v6_0_hw_init(adev);
+ r = gmc_v6_0_hw_init(ip_block);
if (r)
return r;
@@ -959,9 +955,10 @@ static int gmc_v6_0_resume(void *handle)
return 0;
}
-static bool gmc_v6_0_is_idle(void *handle)
+static bool gmc_v6_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
@@ -971,13 +968,13 @@ static bool gmc_v6_0_is_idle(void *handle)
return true;
}
-static int gmc_v6_0_wait_for_idle(void *handle)
+static int gmc_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ unsigned int i;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (gmc_v6_0_is_idle(handle))
+ if (gmc_v6_0_is_idle(ip_block))
return 0;
udelay(1);
}
@@ -985,9 +982,10 @@ static int gmc_v6_0_wait_for_idle(void *handle)
}
-static int gmc_v6_0_soft_reset(void *handle)
+static int gmc_v6_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
u32 srbm_soft_reset = 0;
u32 tmp = RREG32(mmSRBM_STATUS);
@@ -1004,10 +1002,9 @@ static int gmc_v6_0_soft_reset(void *handle)
if (srbm_soft_reset) {
gmc_v6_0_mc_stop(adev);
- if (gmc_v6_0_wait_for_idle(adev)) {
- dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
- }
+ if (gmc_v6_0_wait_for_idle(ip_block))
+ dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
tmp = RREG32(mmSRBM_SOFT_RESET);
tmp |= srbm_soft_reset;
@@ -1032,7 +1029,7 @@ static int gmc_v6_0_soft_reset(void *handle)
static int gmc_v6_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
- unsigned type,
+ unsigned int type,
enum amdgpu_interrupt_state state)
{
u32 tmp;
@@ -1073,6 +1070,12 @@ static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev,
{
u32 addr, status;
+ /* Delegate to the soft IRQ handler ring */
+ if (adev->irq.ih_soft.enabled && entry->ih != &adev->irq.ih_soft) {
+ amdgpu_irq_delegate(adev, entry, 4);
+ return 1;
+ }
+
addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
WREG32_P(mmVM_CONTEXT1_CNTL2, 1, ~1);
@@ -1080,6 +1083,10 @@ static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev,
if (!addr && !status)
return 0;
+ amdgpu_vm_update_fault_cache(adev, entry->pasid,
+ ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT,
+ status, AMDGPU_GFXHUB(0));
+
if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
gmc_v6_0_set_fault_enable_default(adev, false);
@@ -1090,19 +1097,33 @@ static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev,
addr);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
status);
- gmc_v6_0_vm_decode_fault(adev, status, addr, 0);
+ gmc_v6_0_vm_decode_fault(adev, status, addr);
}
return 0;
}
-static int gmc_v6_0_set_clockgating_state(void *handle,
+static int gmc_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool gate = false;
+
+ if (state == AMD_CG_STATE_GATE)
+ gate = true;
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ gmc_v6_0_enable_mc_mgcg(adev, gate);
+ gmc_v6_0_enable_mc_ls(adev, gate);
+ }
+ gmc_v6_0_enable_bif_mgls(adev, gate);
+ gmc_v6_0_enable_hdp_mgcg(adev, gate);
+ gmc_v6_0_enable_hdp_ls(adev, gate);
+
return 0;
}
-static int gmc_v6_0_set_powergating_state(void *handle,
+static int gmc_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -1150,8 +1171,7 @@ static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gmc.vm_fault.funcs = &gmc_v6_0_irq_funcs;
}
-const struct amdgpu_ip_block_version gmc_v6_0_ip_block =
-{
+const struct amdgpu_ip_block_version gmc_v6_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_GMC,
.major = 6,
.minor = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 0a50fdaced7e..fbd0bf147f50 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -52,22 +52,20 @@
static void gmc_v7_0_set_gmc_funcs(struct amdgpu_device *adev);
static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev);
-static int gmc_v7_0_wait_for_idle(void *handle);
+static int gmc_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block);
MODULE_FIRMWARE("amdgpu/bonaire_mc.bin");
MODULE_FIRMWARE("amdgpu/hawaii_mc.bin");
MODULE_FIRMWARE("amdgpu/topaz_mc.bin");
-static const u32 golden_settings_iceland_a11[] =
-{
+static const u32 golden_settings_iceland_a11[] = {
mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff,
mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff
};
-static const u32 iceland_mgcg_cgcg_init[] =
-{
+static const u32 iceland_mgcg_cgcg_init[] = {
mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
};
@@ -89,9 +87,14 @@ static void gmc_v7_0_init_golden_registers(struct amdgpu_device *adev)
static void gmc_v7_0_mc_stop(struct amdgpu_device *adev)
{
+ struct amdgpu_ip_block *ip_block;
u32 blackout;
- gmc_v7_0_wait_for_idle((void *)adev);
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC);
+ if (!ip_block)
+ return;
+
+ gmc_v7_0_wait_for_idle(ip_block);
blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) {
@@ -132,7 +135,6 @@ static void gmc_v7_0_mc_resume(struct amdgpu_device *adev)
static int gmc_v7_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
int err;
DRM_DEBUG("\n");
@@ -151,21 +153,15 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev)
case CHIP_KABINI:
case CHIP_MULLINS:
return 0;
- default: BUG();
+ default:
+ return -EINVAL;
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);
-
- err = request_firmware(&adev->gmc.fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gmc.fw);
-
-out:
+ err = amdgpu_ucode_request(adev, &adev->gmc.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mc.bin", chip_name);
if (err) {
- pr_err("cik_mc: Failed to load firmware \"%s\"\n", fw_name);
- release_firmware(adev->gmc.fw);
- adev->gmc.fw = NULL;
+ pr_err("cik_mc: Failed to load firmware \"%s_mc.bin\"\n", chip_name);
+ amdgpu_ucode_release(&adev->gmc.fw);
}
return err;
}
@@ -243,10 +239,12 @@ static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc)
{
u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
+
base <<= 24;
+ amdgpu_gmc_set_agp_default(adev, mc);
amdgpu_gmc_vram_location(adev, mc, base);
- amdgpu_gmc_gart_location(adev, mc);
+ amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
}
/**
@@ -259,9 +257,14 @@ static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev,
*/
static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
{
+ struct amdgpu_ip_block *ip_block;
u32 tmp;
int i, j;
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC);
+ if (!ip_block)
+ return;
+
/* Initialize HDP */
for (i = 0, j = 0; i < 32; i++, j += 0x6) {
WREG32((0xb05 + j), 0x00000000);
@@ -272,9 +275,9 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
}
WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0);
- if (gmc_v7_0_wait_for_idle((void *)adev)) {
+ if (gmc_v7_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for MC idle timedout !\n");
- }
+
if (adev->mode_info.num_crtc) {
/* Lockout access through VGA aperture*/
tmp = RREG32(mmVGA_HDP_CONTROL);
@@ -292,13 +295,12 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
adev->gmc.vram_end >> 12);
WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
- adev->vram_scratch.gpu_addr >> 12);
+ adev->mem_scratch.gpu_addr >> 12);
WREG32(mmMC_VM_AGP_BASE, 0);
- WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF);
- WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF);
- if (gmc_v7_0_wait_for_idle((void *)adev)) {
+ WREG32(mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 22);
+ WREG32(mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 22);
+ if (gmc_v7_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for MC idle timedout !\n");
- }
WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
@@ -330,11 +332,11 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
/* Get VRAM informations */
tmp = RREG32(mmMC_ARB_RAMCFG);
- if (REG_GET_FIELD(tmp, MC_ARB_RAMCFG, CHANSIZE)) {
+ if (REG_GET_FIELD(tmp, MC_ARB_RAMCFG, CHANSIZE))
chansize = 64;
- } else {
+ else
chansize = 32;
- }
+
tmp = RREG32(mmMC_SHARED_CHMAP);
switch (REG_GET_FIELD(tmp, MC_SHARED_CHMAP, NOOFCHAN)) {
case 0:
@@ -381,17 +383,15 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
#ifdef CONFIG_X86_64
- if (adev->flags & AMD_IS_APU &&
- adev->gmc.real_vram_size > adev->gmc.aper_size) {
+ if ((adev->flags & AMD_IS_APU) &&
+ adev->gmc.real_vram_size > adev->gmc.aper_size &&
+ !amdgpu_passthrough(adev)) {
adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22;
adev->gmc.aper_size = adev->gmc.real_vram_size;
}
#endif
- /* In case the PCI BAR is larger than the actual amount of vram */
adev->gmc.visible_vram_size = adev->gmc.aper_size;
- if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
- adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
/* set the gart size */
if (amdgpu_gart_size == -1) {
@@ -427,31 +427,27 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
* @pasid: pasid to be flush
* @flush_type: type of flush
* @all_hub: flush all hubs
+ * @inst: is used to select which instance of KIQ to use for the invalidation
*
* Flush the TLB for the requested pasid.
*/
-static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
- uint16_t pasid, uint32_t flush_type,
- bool all_hub)
+static void gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint32_t inst)
{
+ u32 mask = 0x0;
int vmid;
- unsigned int tmp;
-
- if (amdgpu_in_reset(adev))
- return -EIO;
for (vmid = 1; vmid < 16; vmid++) {
+ u32 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
- tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
- (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
- WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
- RREG32(mmVM_INVALIDATE_RESPONSE);
- break;
- }
+ (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid)
+ mask |= 1 << vmid;
}
- return 0;
+ WREG32(mmVM_INVALIDATE_REQUEST, mask);
+ RREG32(mmVM_INVALIDATE_RESPONSE);
}
/*
@@ -479,7 +475,7 @@ static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
}
static uint64_t gmc_v7_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
- unsigned vmid, uint64_t pd_addr)
+ unsigned int vmid, uint64_t pd_addr)
{
uint32_t reg;
@@ -495,8 +491,8 @@ static uint64_t gmc_v7_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
return pd_addr;
}
-static void gmc_v7_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
- unsigned pasid)
+static void gmc_v7_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int vmid,
+ unsigned int pasid)
{
amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid);
}
@@ -508,7 +504,9 @@ static void gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, int level,
}
static void gmc_v7_0_get_vm_pte(struct amdgpu_device *adev,
- struct amdgpu_bo_va_mapping *mapping,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo,
+ uint32_t vm_flags,
uint64_t *flags)
{
*flags &= ~AMDGPU_PTE_EXECUTABLE;
@@ -575,9 +573,10 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable)
WREG32(mmVM_PRT_CNTL, tmp);
if (enable) {
- uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
+ uint32_t low = AMDGPU_VA_RESERVED_BOTTOM >>
+ AMDGPU_GPU_PAGE_SHIFT;
uint32_t high = adev->vm_manager.max_pfn -
- (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT);
+ (AMDGPU_VA_RESERVED_TOP >> AMDGPU_GPU_PAGE_SHIFT);
WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
@@ -613,17 +612,14 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable)
static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
{
uint64_t table_addr;
- int r, i;
u32 tmp, field;
+ int i;
if (adev->gart.bo == NULL) {
dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL;
}
- r = amdgpu_gart_table_vram_pin(adev);
- if (r)
- return r;
-
+ amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
table_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
/* Setup TLB control */
@@ -710,9 +706,8 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
gmc_v7_0_flush_gpu_tlb(adev, 0, 0, 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
- (unsigned)(adev->gmc.gart_size >> 20),
+ (unsigned int)(adev->gmc.gart_size >> 20),
(unsigned long long)table_addr);
- adev->gart.ready = true;
return 0;
}
@@ -758,7 +753,6 @@ static void gmc_v7_0_gart_disable(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
WREG32(mmVM_L2_CNTL, tmp);
WREG32(mmVM_L2_CNTL2, 0);
- amdgpu_gart_table_vram_unpin(adev);
}
/**
@@ -773,7 +767,7 @@ static void gmc_v7_0_gart_disable(struct amdgpu_device *adev)
* Print human readable fault information (CIK).
*/
static void gmc_v7_0_vm_decode_fault(struct amdgpu_device *adev, u32 status,
- u32 addr, u32 mc_client, unsigned pasid)
+ u32 addr, u32 mc_client, unsigned int pasid)
{
u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
@@ -940,9 +934,9 @@ static int gmc_v7_0_convert_vram_type(int mc_seq_vram_type)
}
}
-static int gmc_v7_0_early_init(void *handle)
+static int gmc_v7_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v7_0_set_gmc_funcs(adev);
gmc_v7_0_set_irq_funcs(adev);
@@ -954,13 +948,14 @@ static int gmc_v7_0_early_init(void *handle)
adev->gmc.shared_aperture_end + 1;
adev->gmc.private_aperture_end =
adev->gmc.private_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF;
return 0;
}
-static int gmc_v7_0_late_init(void *handle)
+static int gmc_v7_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
@@ -968,15 +963,16 @@ static int gmc_v7_0_late_init(void *handle)
return 0;
}
-static unsigned gmc_v7_0_get_vbios_fb_size(struct amdgpu_device *adev)
+static unsigned int gmc_v7_0_get_vbios_fb_size(struct amdgpu_device *adev)
{
u32 d1vga_control = RREG32(mmD1VGA_CONTROL);
- unsigned size;
+ unsigned int size;
if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
size = AMDGPU_VBIOS_VGA_ALLOCATION;
} else {
u32 viewport = RREG32(mmVIEWPORT_SIZE);
+
size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) *
4);
@@ -985,17 +981,18 @@ static unsigned gmc_v7_0_get_vbios_fb_size(struct amdgpu_device *adev)
return size;
}
-static int gmc_v7_0_sw_init(void *handle)
+static int gmc_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- adev->num_vmhubs = 1;
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
if (adev->flags & AMD_IS_APU) {
adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
} else {
u32 tmp = RREG32(mmMC_SEQ_MISC0);
+
tmp &= MC_SEQ_MISC0__MT__MASK;
adev->gmc.vram_type = gmc_v7_0_convert_vram_type(tmp);
}
@@ -1071,30 +1068,29 @@ static int gmc_v7_0_sw_init(void *handle)
GFP_KERNEL);
if (!adev->gmc.vm_fault_info)
return -ENOMEM;
- atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+ atomic_set_release(&adev->gmc.vm_fault_info_updated, 0);
return 0;
}
-static int gmc_v7_0_sw_fini(void *handle)
+static int gmc_v7_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
kfree(adev->gmc.vm_fault_info);
amdgpu_gart_table_vram_free(adev);
amdgpu_bo_fini(adev);
- release_firmware(adev->gmc.fw);
- adev->gmc.fw = NULL;
+ amdgpu_ucode_release(&adev->gmc.fw);
return 0;
}
-static int gmc_v7_0_hw_init(void *handle)
+static int gmc_v7_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v7_0_init_golden_registers(adev);
@@ -1112,12 +1108,15 @@ static int gmc_v7_0_hw_init(void *handle)
if (r)
return r;
- return r;
+ if (amdgpu_emu_mode == 1)
+ return amdgpu_gmc_vram_checking(adev);
+
+ return 0;
}
-static int gmc_v7_0_hw_fini(void *handle)
+static int gmc_v7_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
gmc_v7_0_gart_disable(adev);
@@ -1125,32 +1124,29 @@ static int gmc_v7_0_hw_fini(void *handle)
return 0;
}
-static int gmc_v7_0_suspend(void *handle)
+static int gmc_v7_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- gmc_v7_0_hw_fini(adev);
+ gmc_v7_0_hw_fini(ip_block);
return 0;
}
-static int gmc_v7_0_resume(void *handle)
+static int gmc_v7_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = gmc_v7_0_hw_init(adev);
+ r = gmc_v7_0_hw_init(ip_block);
if (r)
return r;
- amdgpu_vmid_reset_all(adev);
+ amdgpu_vmid_reset_all(ip_block->adev);
return 0;
}
-static bool gmc_v7_0_is_idle(void *handle)
+static bool gmc_v7_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
@@ -1160,20 +1156,13 @@ static bool gmc_v7_0_is_idle(void *handle)
return true;
}
-static int gmc_v7_0_wait_for_idle(void *handle)
+static int gmc_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- unsigned i;
- u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ unsigned int i;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- /* read MC_STATUS */
- tmp = RREG32(mmSRBM_STATUS) & (SRBM_STATUS__MCB_BUSY_MASK |
- SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
- SRBM_STATUS__MCC_BUSY_MASK |
- SRBM_STATUS__MCD_BUSY_MASK |
- SRBM_STATUS__VMC_BUSY_MASK);
- if (!tmp)
+ if (gmc_v7_0_is_idle(ip_block))
return 0;
udelay(1);
}
@@ -1181,9 +1170,9 @@ static int gmc_v7_0_wait_for_idle(void *handle)
}
-static int gmc_v7_0_soft_reset(void *handle)
+static int gmc_v7_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
u32 tmp = RREG32(mmSRBM_STATUS);
@@ -1200,10 +1189,8 @@ static int gmc_v7_0_soft_reset(void *handle)
if (srbm_soft_reset) {
gmc_v7_0_mc_stop(adev);
- if (gmc_v7_0_wait_for_idle((void *)adev)) {
+ if (gmc_v7_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
- }
-
tmp = RREG32(mmSRBM_SOFT_RESET);
tmp |= srbm_soft_reset;
@@ -1229,7 +1216,7 @@ static int gmc_v7_0_soft_reset(void *handle)
static int gmc_v7_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
- unsigned type,
+ unsigned int type,
enum amdgpu_interrupt_state state)
{
u32 tmp;
@@ -1274,6 +1261,12 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
{
u32 addr, status, mc_client, vmid;
+ /* Delegate to the soft IRQ handler ring */
+ if (adev->irq.ih_soft.enabled && entry->ih != &adev->irq.ih_soft) {
+ amdgpu_irq_delegate(adev, entry, 4);
+ return 1;
+ }
+
addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
mc_client = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
@@ -1283,6 +1276,9 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
if (!addr && !status)
return 0;
+ amdgpu_vm_update_fault_cache(adev, entry->pasid,
+ ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status, AMDGPU_GFXHUB(0));
+
if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
gmc_v7_0_set_fault_enable_default(adev, false);
@@ -1300,7 +1296,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
VMID);
if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
- && !atomic_read(&adev->gmc.vm_fault_info_updated)) {
+ && !atomic_read_acquire(&adev->gmc.vm_fault_info_updated)) {
struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
u32 protections = REG_GET_FIELD(status,
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
@@ -1316,18 +1312,17 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
info->prot_read = protections & 0x8 ? true : false;
info->prot_write = protections & 0x10 ? true : false;
info->prot_exec = protections & 0x20 ? true : false;
- mb();
- atomic_set(&adev->gmc.vm_fault_info_updated, 1);
+ atomic_set_release(&adev->gmc.vm_fault_info_updated, 1);
}
return 0;
}
-static int gmc_v7_0_set_clockgating_state(void *handle,
+static int gmc_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_CG_STATE_GATE)
gate = true;
@@ -1343,7 +1338,7 @@ static int gmc_v7_0_set_clockgating_state(void *handle,
return 0;
}
-static int gmc_v7_0_set_powergating_state(void *handle,
+static int gmc_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -1393,8 +1388,7 @@ static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gmc.vm_fault.funcs = &gmc_v7_0_irq_funcs;
}
-const struct amdgpu_ip_block_version gmc_v7_0_ip_block =
-{
+const struct amdgpu_ip_block_version gmc_v7_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_GMC,
.major = 7,
.minor = 0,
@@ -1402,8 +1396,7 @@ const struct amdgpu_ip_block_version gmc_v7_0_ip_block =
.funcs = &gmc_v7_0_ip_funcs,
};
-const struct amdgpu_ip_block_version gmc_v7_4_ip_block =
-{
+const struct amdgpu_ip_block_version gmc_v7_4_ip_block = {
.type = AMD_IP_BLOCK_TYPE_GMC,
.major = 7,
.minor = 4,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 492ebed2915b..6551b60f2584 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -53,7 +53,7 @@
static void gmc_v8_0_set_gmc_funcs(struct amdgpu_device *adev);
static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev);
-static int gmc_v8_0_wait_for_idle(void *handle);
+static int gmc_v8_0_wait_for_idle(struct amdgpu_ip_block *ip_block);
MODULE_FIRMWARE("amdgpu/tonga_mc.bin");
MODULE_FIRMWARE("amdgpu/polaris11_mc.bin");
@@ -64,8 +64,7 @@ MODULE_FIRMWARE("amdgpu/polaris11_k_mc.bin");
MODULE_FIRMWARE("amdgpu/polaris10_k_mc.bin");
MODULE_FIRMWARE("amdgpu/polaris12_k_mc.bin");
-static const u32 golden_settings_tonga_a11[] =
-{
+static const u32 golden_settings_tonga_a11[] = {
mmMC_ARB_WTM_GRPWT_RD, 0x00000003, 0x00000000,
mmMC_HUB_RDREQ_DMIF_LIMIT, 0x0000007f, 0x00000028,
mmMC_HUB_WDP_UMC, 0x00007fb6, 0x00000991,
@@ -75,34 +74,29 @@ static const u32 golden_settings_tonga_a11[] =
mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff,
};
-static const u32 tonga_mgcg_cgcg_init[] =
-{
+static const u32 tonga_mgcg_cgcg_init[] = {
mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
};
-static const u32 golden_settings_fiji_a10[] =
-{
+static const u32 golden_settings_fiji_a10[] = {
mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff,
mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff,
};
-static const u32 fiji_mgcg_cgcg_init[] =
-{
+static const u32 fiji_mgcg_cgcg_init[] = {
mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
};
-static const u32 golden_settings_polaris11_a11[] =
-{
+static const u32 golden_settings_polaris11_a11[] = {
mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff,
mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff
};
-static const u32 golden_settings_polaris10_a11[] =
-{
+static const u32 golden_settings_polaris10_a11[] = {
mmMC_ARB_WTM_GRPWT_RD, 0x00000003, 0x00000000,
mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff,
mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
@@ -110,19 +104,16 @@ static const u32 golden_settings_polaris10_a11[] =
mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff
};
-static const u32 cz_mgcg_cgcg_init[] =
-{
+static const u32 cz_mgcg_cgcg_init[] = {
mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
};
-static const u32 stoney_mgcg_cgcg_init[] =
-{
+static const u32 stoney_mgcg_cgcg_init[] = {
mmATC_MISC_CG, 0xffffffff, 0x000c0200,
mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
};
-static const u32 golden_settings_stoney_common[] =
-{
+static const u32 golden_settings_stoney_common[] = {
mmMC_HUB_RDREQ_UVD, MC_HUB_RDREQ_UVD__PRESCALE_MASK, 0x00000004,
mmMC_RD_GRP_OTH, MC_RD_GRP_OTH__UVD_MASK, 0x00600000
};
@@ -179,8 +170,13 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
static void gmc_v8_0_mc_stop(struct amdgpu_device *adev)
{
u32 blackout;
+ struct amdgpu_ip_block *ip_block;
- gmc_v8_0_wait_for_idle(adev);
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC);
+ if (!ip_block)
+ return;
+
+ gmc_v8_0_wait_for_idle(ip_block);
blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) {
@@ -221,7 +217,6 @@ static void gmc_v8_0_mc_resume(struct amdgpu_device *adev)
static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
int err;
DRM_DEBUG("\n");
@@ -260,20 +255,15 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
case CHIP_STONEY:
case CHIP_VEGAM:
return 0;
- default: BUG();
+ default:
+ return -EINVAL;
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);
- err = request_firmware(&adev->gmc.fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gmc.fw);
-
-out:
+ err = amdgpu_ucode_request(adev, &adev->gmc.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mc.bin", chip_name);
if (err) {
- pr_err("mc: Failed to load firmware \"%s\"\n", fw_name);
- release_firmware(adev->gmc.fw);
- adev->gmc.fw = NULL;
+ pr_err("mc: Failed to load firmware \"%s_mc.bin\"\n", chip_name);
+ amdgpu_ucode_release(&adev->gmc.fw);
}
return err;
}
@@ -427,8 +417,9 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
base <<= 24;
+ amdgpu_gmc_set_agp_default(adev, mc);
amdgpu_gmc_vram_location(adev, mc, base);
- amdgpu_gmc_gart_location(adev, mc);
+ amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
}
/**
@@ -441,6 +432,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
*/
static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
{
+ struct amdgpu_ip_block *ip_block;
u32 tmp;
int i, j;
@@ -454,9 +446,13 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
}
WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0);
- if (gmc_v8_0_wait_for_idle((void *)adev)) {
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC);
+ if (!ip_block)
+ return;
+
+ if (gmc_v8_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for MC idle timedout !\n");
- }
+
if (adev->mode_info.num_crtc) {
/* Lockout access through VGA aperture*/
tmp = RREG32(mmVGA_HDP_CONTROL);
@@ -474,7 +470,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
adev->gmc.vram_end >> 12);
WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
- adev->vram_scratch.gpu_addr >> 12);
+ adev->mem_scratch.gpu_addr >> 12);
if (amdgpu_sriov_vf(adev)) {
tmp = ((adev->gmc.vram_end >> 24) & 0xFFFF) << 16;
@@ -487,11 +483,10 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
}
WREG32(mmMC_VM_AGP_BASE, 0);
- WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF);
- WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF);
- if (gmc_v8_0_wait_for_idle((void *)adev)) {
+ WREG32(mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 22);
+ WREG32(mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 22);
+ if (gmc_v8_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for MC idle timedout !\n");
- }
WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
@@ -515,19 +510,19 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
{
int r;
+ u32 tmp;
adev->gmc.vram_width = amdgpu_atombios_get_vram_width(adev);
if (!adev->gmc.vram_width) {
- u32 tmp;
int chansize, numchan;
/* Get VRAM informations */
tmp = RREG32(mmMC_ARB_RAMCFG);
- if (REG_GET_FIELD(tmp, MC_ARB_RAMCFG, CHANSIZE)) {
+ if (REG_GET_FIELD(tmp, MC_ARB_RAMCFG, CHANSIZE))
chansize = 64;
- } else {
+ else
chansize = 32;
- }
+
tmp = RREG32(mmMC_SHARED_CHMAP);
switch (REG_GET_FIELD(tmp, MC_SHARED_CHMAP, NOOFCHAN)) {
case 0:
@@ -562,8 +557,15 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
adev->gmc.vram_width = numchan * chansize;
}
/* size in MB on si */
- adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
- adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
+ tmp = RREG32(mmCONFIG_MEMSIZE);
+ /* some boards may have garbage in the upper 16 bits */
+ if (tmp & 0xffff0000) {
+ DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
+ if (tmp & 0xffff)
+ tmp &= 0xffff;
+ }
+ adev->gmc.mc_vram_size = tmp * 1024ULL * 1024ULL;
+ adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
if (!(adev->flags & AMD_IS_APU)) {
r = amdgpu_device_resize_fb_bar(adev);
@@ -574,16 +576,13 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
#ifdef CONFIG_X86_64
- if (adev->flags & AMD_IS_APU) {
+ if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) {
adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22;
adev->gmc.aper_size = adev->gmc.real_vram_size;
}
#endif
- /* In case the PCI BAR is larger than the actual amount of vram */
adev->gmc.visible_vram_size = adev->gmc.aper_size;
- if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
- adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
/* set the gart size */
if (amdgpu_gart_size == -1) {
@@ -619,32 +618,27 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
* @pasid: pasid to be flush
* @flush_type: type of flush
* @all_hub: flush all hubs
+ * @inst: is used to select which instance of KIQ to use for the invalidation
*
* Flush the TLB for the requested pasid.
*/
-static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
- uint16_t pasid, uint32_t flush_type,
- bool all_hub)
+static void gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint32_t inst)
{
+ u32 mask = 0x0;
int vmid;
- unsigned int tmp;
-
- if (amdgpu_in_reset(adev))
- return -EIO;
for (vmid = 1; vmid < 16; vmid++) {
+ u32 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
- tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
- (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
- WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
- RREG32(mmVM_INVALIDATE_RESPONSE);
- break;
- }
+ (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid)
+ mask |= 1 << vmid;
}
- return 0;
-
+ WREG32(mmVM_INVALIDATE_REQUEST, mask);
+ RREG32(mmVM_INVALIDATE_RESPONSE);
}
/*
@@ -672,7 +666,7 @@ static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
}
static uint64_t gmc_v8_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
- unsigned vmid, uint64_t pd_addr)
+ unsigned int vmid, uint64_t pd_addr)
{
uint32_t reg;
@@ -688,8 +682,8 @@ static uint64_t gmc_v8_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
return pd_addr;
}
-static void gmc_v8_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
- unsigned pasid)
+static void gmc_v8_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int vmid,
+ unsigned int pasid)
{
amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid);
}
@@ -722,11 +716,15 @@ static void gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, int level,
}
static void gmc_v8_0_get_vm_pte(struct amdgpu_device *adev,
- struct amdgpu_bo_va_mapping *mapping,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo,
+ uint32_t vm_flags,
uint64_t *flags)
{
- *flags &= ~AMDGPU_PTE_EXECUTABLE;
- *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+ if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE)
+ *flags |= AMDGPU_PTE_EXECUTABLE;
+ else
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
*flags &= ~AMDGPU_PTE_PRT;
}
@@ -760,11 +758,11 @@ static void gmc_v8_0_set_fault_enable_default(struct amdgpu_device *adev,
}
/**
- * gmc_v8_0_set_prt - set PRT VM fault
+ * gmc_v8_0_set_prt() - set PRT VM fault
*
* @adev: amdgpu_device pointer
* @enable: enable/disable VM fault handling for PRT
-*/
+ */
static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
{
u32 tmp;
@@ -792,9 +790,10 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
WREG32(mmVM_PRT_CNTL, tmp);
if (enable) {
- uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
+ uint32_t low = AMDGPU_VA_RESERVED_BOTTOM >>
+ AMDGPU_GPU_PAGE_SHIFT;
uint32_t high = adev->vm_manager.max_pfn -
- (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT);
+ (AMDGPU_VA_RESERVED_TOP >> AMDGPU_GPU_PAGE_SHIFT);
WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
@@ -830,17 +829,14 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
{
uint64_t table_addr;
- int r, i;
u32 tmp, field;
+ int i;
if (adev->gart.bo == NULL) {
dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL;
}
- r = amdgpu_gart_table_vram_pin(adev);
- if (r)
- return r;
-
+ amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
table_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
/* Setup TLB control */
@@ -944,9 +940,8 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
gmc_v8_0_flush_gpu_tlb(adev, 0, 0, 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
- (unsigned)(adev->gmc.gart_size >> 20),
+ (unsigned int)(adev->gmc.gart_size >> 20),
(unsigned long long)table_addr);
- adev->gart.ready = true;
return 0;
}
@@ -992,7 +987,6 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
WREG32(mmVM_L2_CNTL, tmp);
WREG32(mmVM_L2_CNTL2, 0);
- amdgpu_gart_table_vram_unpin(adev);
}
/**
@@ -1007,7 +1001,7 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
* Print human readable fault information (VI).
*/
static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev, u32 status,
- u32 addr, u32 mc_client, unsigned pasid)
+ u32 addr, u32 mc_client, unsigned int pasid)
{
u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
@@ -1048,9 +1042,9 @@ static int gmc_v8_0_convert_vram_type(int mc_seq_vram_type)
}
}
-static int gmc_v8_0_early_init(void *handle)
+static int gmc_v8_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v8_0_set_gmc_funcs(adev);
gmc_v8_0_set_irq_funcs(adev);
@@ -1062,13 +1056,14 @@ static int gmc_v8_0_early_init(void *handle)
adev->gmc.shared_aperture_end + 1;
adev->gmc.private_aperture_end =
adev->gmc.private_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF;
return 0;
}
-static int gmc_v8_0_late_init(void *handle)
+static int gmc_v8_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
@@ -1076,15 +1071,16 @@ static int gmc_v8_0_late_init(void *handle)
return 0;
}
-static unsigned gmc_v8_0_get_vbios_fb_size(struct amdgpu_device *adev)
+static unsigned int gmc_v8_0_get_vbios_fb_size(struct amdgpu_device *adev)
{
u32 d1vga_control = RREG32(mmD1VGA_CONTROL);
- unsigned size;
+ unsigned int size;
if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
size = AMDGPU_VBIOS_VGA_ALLOCATION;
} else {
u32 viewport = RREG32(mmVIEWPORT_SIZE);
+
size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) *
4);
@@ -1095,12 +1091,12 @@ static unsigned gmc_v8_0_get_vbios_fb_size(struct amdgpu_device *adev)
#define mmMC_SEQ_MISC0_FIJI 0xA71
-static int gmc_v8_0_sw_init(void *handle)
+static int gmc_v8_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- adev->num_vmhubs = 1;
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
if (adev->flags & AMD_IS_APU) {
adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
@@ -1187,30 +1183,29 @@ static int gmc_v8_0_sw_init(void *handle)
GFP_KERNEL);
if (!adev->gmc.vm_fault_info)
return -ENOMEM;
- atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+ atomic_set_release(&adev->gmc.vm_fault_info_updated, 0);
return 0;
}
-static int gmc_v8_0_sw_fini(void *handle)
+static int gmc_v8_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
kfree(adev->gmc.vm_fault_info);
amdgpu_gart_table_vram_free(adev);
amdgpu_bo_fini(adev);
- release_firmware(adev->gmc.fw);
- adev->gmc.fw = NULL;
+ amdgpu_ucode_release(&adev->gmc.fw);
return 0;
}
-static int gmc_v8_0_hw_init(void *handle)
+static int gmc_v8_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v8_0_init_golden_registers(adev);
@@ -1236,12 +1231,15 @@ static int gmc_v8_0_hw_init(void *handle)
if (r)
return r;
- return r;
+ if (amdgpu_emu_mode == 1)
+ return amdgpu_gmc_vram_checking(adev);
+
+ return 0;
}
-static int gmc_v8_0_hw_fini(void *handle)
+static int gmc_v8_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
gmc_v8_0_gart_disable(adev);
@@ -1249,32 +1247,29 @@ static int gmc_v8_0_hw_fini(void *handle)
return 0;
}
-static int gmc_v8_0_suspend(void *handle)
+static int gmc_v8_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- gmc_v8_0_hw_fini(adev);
+ gmc_v8_0_hw_fini(ip_block);
return 0;
}
-static int gmc_v8_0_resume(void *handle)
+static int gmc_v8_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = gmc_v8_0_hw_init(adev);
+ r = gmc_v8_0_hw_init(ip_block);
if (r)
return r;
- amdgpu_vmid_reset_all(adev);
+ amdgpu_vmid_reset_all(ip_block->adev);
return 0;
}
-static bool gmc_v8_0_is_idle(void *handle)
+static bool gmc_v8_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
@@ -1284,11 +1279,11 @@ static bool gmc_v8_0_is_idle(void *handle)
return true;
}
-static int gmc_v8_0_wait_for_idle(void *handle)
+static int gmc_v8_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- unsigned i;
+ unsigned int i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -1306,10 +1301,10 @@ static int gmc_v8_0_wait_for_idle(void *handle)
}
-static bool gmc_v8_0_check_soft_reset(void *handle)
+static bool gmc_v8_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & SRBM_STATUS__VMC_BUSY_MASK)
@@ -1322,33 +1317,34 @@ static bool gmc_v8_0_check_soft_reset(void *handle)
srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
SRBM_SOFT_RESET, SOFT_RESET_MC, 1);
}
+
if (srbm_soft_reset) {
adev->gmc.srbm_soft_reset = srbm_soft_reset;
return true;
- } else {
- adev->gmc.srbm_soft_reset = 0;
- return false;
}
+
+ adev->gmc.srbm_soft_reset = 0;
+
+ return false;
}
-static int gmc_v8_0_pre_soft_reset(void *handle)
+static int gmc_v8_0_pre_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->gmc.srbm_soft_reset)
return 0;
gmc_v8_0_mc_stop(adev);
- if (gmc_v8_0_wait_for_idle(adev)) {
+ if (gmc_v8_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
- }
return 0;
}
-static int gmc_v8_0_soft_reset(void *handle)
+static int gmc_v8_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset;
if (!adev->gmc.srbm_soft_reset)
@@ -1377,9 +1373,9 @@ static int gmc_v8_0_soft_reset(void *handle)
return 0;
}
-static int gmc_v8_0_post_soft_reset(void *handle)
+static int gmc_v8_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->gmc.srbm_soft_reset)
return 0;
@@ -1390,7 +1386,7 @@ static int gmc_v8_0_post_soft_reset(void *handle)
static int gmc_v8_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
- unsigned type,
+ unsigned int type,
enum amdgpu_interrupt_state state)
{
u32 tmp;
@@ -1443,6 +1439,12 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
+ /* Delegate to the soft IRQ handler ring */
+ if (adev->irq.ih_soft.enabled && entry->ih != &adev->irq.ih_soft) {
+ amdgpu_irq_delegate(adev, entry, 4);
+ return 1;
+ }
+
addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
mc_client = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
@@ -1452,22 +1454,29 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
if (!addr && !status)
return 0;
+ amdgpu_vm_update_fault_cache(adev, entry->pasid,
+ ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status, AMDGPU_GFXHUB(0));
+
if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
gmc_v8_0_set_fault_enable_default(adev, false);
if (printk_ratelimit()) {
- struct amdgpu_task_info task_info;
+ struct amdgpu_task_info *task_info;
+
+ dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
+ entry->src_id, entry->src_data[0]);
- memset(&task_info, 0, sizeof(struct amdgpu_task_info));
- amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ if (task_info) {
+ amdgpu_vm_print_task_info(adev, task_info);
+ amdgpu_vm_put_task_info(task_info);
+ }
- dev_err(adev->dev, "GPU fault detected: %d 0x%08x for process %s pid %d thread %s pid %d\n",
- entry->src_id, entry->src_data[0], task_info.process_name,
- task_info.tgid, task_info.task_name, task_info.pid);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
- addr);
+ addr);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
status);
+
gmc_v8_0_vm_decode_fault(adev, status, addr, mc_client,
entry->pasid);
}
@@ -1475,7 +1484,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
VMID);
if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
- && !atomic_read(&adev->gmc.vm_fault_info_updated)) {
+ && !atomic_read_acquire(&adev->gmc.vm_fault_info_updated)) {
struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
u32 protections = REG_GET_FIELD(status,
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
@@ -1491,8 +1500,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
info->prot_read = protections & 0x8 ? true : false;
info->prot_write = protections & 0x10 ? true : false;
info->prot_exec = protections & 0x20 ? true : false;
- mb();
- atomic_set(&adev->gmc.vm_fault_info_updated, 1);
+ atomic_set_release(&adev->gmc.vm_fault_info_updated, 1);
}
return 0;
@@ -1658,10 +1666,10 @@ static void fiji_update_mc_light_sleep(struct amdgpu_device *adev,
}
}
-static int gmc_v8_0_set_clockgating_state(void *handle,
+static int gmc_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1679,15 +1687,15 @@ static int gmc_v8_0_set_clockgating_state(void *handle,
return 0;
}
-static int gmc_v8_0_set_powergating_state(void *handle,
+static int gmc_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void gmc_v8_0_get_clockgating_state(void *handle, u32 *flags)
+static void gmc_v8_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -1751,8 +1759,7 @@ static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gmc.vm_fault.funcs = &gmc_v8_0_irq_funcs;
}
-const struct amdgpu_ip_block_version gmc_v8_0_ip_block =
-{
+const struct amdgpu_ip_block_version gmc_v8_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_GMC,
.major = 8,
.minor = 0,
@@ -1760,8 +1767,7 @@ const struct amdgpu_ip_block_version gmc_v8_0_ip_block =
.funcs = &gmc_v8_0_ip_funcs,
};
-const struct amdgpu_ip_block_version gmc_v8_1_ip_block =
-{
+const struct amdgpu_ip_block_version gmc_v8_1_ip_block = {
.type = AMD_IP_BLOCK_TYPE_GMC,
.major = 8,
.minor = 1,
@@ -1769,8 +1775,7 @@ const struct amdgpu_ip_block_version gmc_v8_1_ip_block =
.funcs = &gmc_v8_0_ip_funcs,
};
-const struct amdgpu_ip_block_version gmc_v8_5_ip_block =
-{
+const struct amdgpu_ip_block_version gmc_v8_5_ip_block = {
.type = AMD_IP_BLOCK_TYPE_GMC,
.major = 8,
.minor = 5,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index d84523cf5f75..8ad7519f7b58 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -49,11 +49,14 @@
#include "mmhub_v1_0.h"
#include "athub_v1_0.h"
#include "gfxhub_v1_1.h"
+#include "gfxhub_v1_2.h"
#include "mmhub_v9_4.h"
#include "mmhub_v1_7.h"
+#include "mmhub_v1_8.h"
#include "umc_v6_1.h"
#include "umc_v6_0.h"
#include "umc_v6_7.h"
+#include "umc_v12_0.h"
#include "hdp_v4_0.h"
#include "mca_v3_0.h"
@@ -72,8 +75,10 @@
#define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0 0x049d
#define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0_BASE_IDX 2
+#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2 0x05ea
+#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2_BASE_IDX 2
-static const char *gfxhub_client_ids[] = {
+static const char * const gfxhub_client_ids[] = {
"CB",
"DB",
"IA",
@@ -324,14 +329,12 @@ static const char *mmhub_client_ids_aldebaran[][2] = {
[384+0][1] = "OSS",
};
-static const struct soc15_reg_golden golden_settings_mmhub_1_0_0[] =
-{
+static const struct soc15_reg_golden golden_settings_mmhub_1_0_0[] = {
SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmDAGB1_WRCLI2, 0x00000007, 0xfe5fe0fa),
SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmMMEA1_DRAM_WR_CLI2GRP_MAP0, 0x00000030, 0x55555565)
};
-static const struct soc15_reg_golden golden_settings_athub_1_0_0[] =
-{
+static const struct soc15_reg_golden golden_settings_athub_1_0_0[] = {
SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL, 0x0000ff00, 0x00000800),
SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL2, 0x00ff00ff, 0x00080008)
};
@@ -408,13 +411,14 @@ static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = {
static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
- unsigned type,
+ unsigned int type,
enum amdgpu_interrupt_state state)
{
u32 bits, i, tmp, reg;
/* Devices newer then VEGA10/12 shall have these programming
- sequences performed by PSP BL */
+ * sequences performed by PSP BL
+ */
if (adev->asic_type >= CHIP_VEGA20)
return 0;
@@ -458,7 +462,7 @@ static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
- unsigned type,
+ unsigned int type,
enum amdgpu_interrupt_state state)
{
struct amdgpu_vmhub *hub;
@@ -474,24 +478,58 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
- for (j = 0; j < adev->num_vmhubs; j++) {
+ for_each_set_bit(j, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
hub = &adev->vmhub[j];
for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + i;
- tmp = RREG32(reg);
+
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0)))
+ continue;
+
+ if (j >= AMDGPU_MMHUB0(0))
+ tmp = RREG32_SOC15_IP(MMHUB, reg);
+ else
+ tmp = RREG32_XCC(reg, j);
+
tmp &= ~bits;
- WREG32(reg, tmp);
+
+ if (j >= AMDGPU_MMHUB0(0))
+ WREG32_SOC15_IP(MMHUB, reg, tmp);
+ else
+ WREG32_XCC(reg, tmp, j);
}
}
break;
case AMDGPU_IRQ_STATE_ENABLE:
- for (j = 0; j < adev->num_vmhubs; j++) {
+ for_each_set_bit(j, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
hub = &adev->vmhub[j];
for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + i;
- tmp = RREG32(reg);
+
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0)))
+ continue;
+
+ if (j >= AMDGPU_MMHUB0(0))
+ tmp = RREG32_SOC15_IP(MMHUB, reg);
+ else
+ tmp = RREG32_XCC(reg, j);
+
tmp |= bits;
- WREG32(reg, tmp);
+
+ if (j >= AMDGPU_MMHUB0(0))
+ WREG32_SOC15_IP(MMHUB, reg, tmp);
+ else
+ WREG32_XCC(reg, tmp, j);
}
}
break;
@@ -506,70 +544,111 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- bool retry_fault = !!(entry->src_data[1] & 0x80);
- bool write_fault = !!(entry->src_data[1] & 0x20);
- uint32_t status = 0, cid = 0, rw = 0;
- struct amdgpu_task_info task_info;
+ bool retry_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_RETRY);
+ bool write_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_WRITE);
+ uint32_t status = 0, cid = 0, rw = 0, fed = 0;
+ struct amdgpu_task_info *task_info;
struct amdgpu_vmhub *hub;
const char *mmhub_cid;
const char *hub_name;
+ unsigned int vmhub;
u64 addr;
+ uint32_t cam_index = 0;
+ int ret, xcc_id = 0;
+ uint32_t node_id;
+
+ node_id = entry->node_id;
addr = (u64)entry->src_data[0] << 12;
addr |= ((u64)entry->src_data[1] & 0xf) << 44;
+ if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
+ hub_name = "mmhub0";
+ vmhub = AMDGPU_MMHUB0(node_id / 4);
+ } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
+ hub_name = "mmhub1";
+ vmhub = AMDGPU_MMHUB1(0);
+ } else {
+ hub_name = "gfxhub0";
+ if (adev->gfx.funcs->ih_node_to_logical_xcc) {
+ xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev,
+ node_id);
+ if (xcc_id < 0)
+ xcc_id = 0;
+ }
+ vmhub = xcc_id;
+ }
+ hub = &adev->vmhub[vmhub];
+
if (retry_fault) {
- /* Returning 1 here also prevents sending the IV to the KFD */
+ if (adev->irq.retry_cam_enabled) {
+ /* Delegate it to a different ring if the hardware hasn't
+ * already done it.
+ */
+ if (entry->ih == &adev->irq.ih) {
+ amdgpu_irq_delegate(adev, entry, 8);
+ return 1;
+ }
+
+ cam_index = entry->src_data[2] & 0x3ff;
- /* Process it onyl if it's the first fault for this address */
- if (entry->ih != &adev->irq.ih_soft &&
- amdgpu_gmc_filter_faults(adev, addr, entry->pasid,
+ ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
+ addr, entry->timestamp, write_fault);
+ WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index);
+ if (ret)
+ return 1;
+ } else {
+ /* Process it onyl if it's the first fault for this address */
+ if (entry->ih != &adev->irq.ih_soft &&
+ amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
entry->timestamp))
- return 1;
+ return 1;
+
+ /* Delegate it to a different ring if the hardware hasn't
+ * already done it.
+ */
+ if (entry->ih == &adev->irq.ih) {
+ amdgpu_irq_delegate(adev, entry, 8);
+ return 1;
+ }
- /* Delegate it to a different ring if the hardware hasn't
- * already done it.
- */
- if (entry->ih == &adev->irq.ih) {
- amdgpu_irq_delegate(adev, entry, 8);
- return 1;
+ /* Try to handle the recoverable page faults by filling page
+ * tables
+ */
+ if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
+ addr, entry->timestamp, write_fault))
+ return 1;
}
-
- /* Try to handle the recoverable page faults by filling page
- * tables
- */
- if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault))
- return 1;
}
+ if (kgd2kfd_vmfault_fast_path(adev, entry, retry_fault))
+ return 1;
+
if (!printk_ratelimit())
return 0;
- if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
- hub_name = "mmhub0";
- hub = &adev->vmhub[AMDGPU_MMHUB_0];
- } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
- hub_name = "mmhub1";
- hub = &adev->vmhub[AMDGPU_MMHUB_1];
- } else {
- hub_name = "gfxhub0";
- hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ dev_err(adev->dev,
+ "[%s] %s page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", hub_name,
+ retry_fault ? "retry" : "no-retry",
+ entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
+
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ if (task_info) {
+ amdgpu_vm_print_task_info(adev, task_info);
+ amdgpu_vm_put_task_info(task_info);
}
- memset(&task_info, 0, sizeof(struct amdgpu_task_info));
- amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
-
- dev_err(adev->dev,
- "[%s] %s page fault (src_id:%u ring:%u vmid:%u "
- "pasid:%u, for process %s pid %d thread %s pid %d)\n",
- hub_name, retry_fault ? "retry" : "no-retry",
- entry->src_id, entry->ring_id, entry->vmid,
- entry->pasid, task_info.process_name, task_info.tgid,
- task_info.task_name, task_info.pid);
dev_err(adev->dev, " in page starting at address 0x%016llx from IH client 0x%x (%s)\n",
addr, entry->client_id,
soc15_ih_clientid_name[entry->client_id]);
+ if (amdgpu_is_multi_aid(adev))
+ dev_err(adev->dev, " cookie node_id %d fault from die %s%d%s\n",
+ node_id, node_id % 4 == 3 ? "RSV" : "AID", node_id / 4,
+ node_id % 4 == 1 ? ".XCD0" : node_id % 4 == 2 ? ".XCD1" : "");
+
if (amdgpu_sriov_vf(adev))
return 0;
@@ -578,26 +657,41 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
* be updated to avoid reading an incorrect value due to
* the new fast GRBM interface.
*/
- if ((entry->vmid_src == AMDGPU_GFXHUB_0) &&
- (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
+ if ((entry->vmid_src == AMDGPU_GFXHUB(0)) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2)))
RREG32(hub->vm_l2_pro_fault_status);
status = RREG32(hub->vm_l2_pro_fault_status);
cid = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, CID);
rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW);
- WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+ fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED);
+
+ /* for fed error, kfd will handle it, return directly */
+ if (fed && amdgpu_ras_is_poison_mode_supported(adev) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2)))
+ return 0;
+
+ /* Only print L2 fault status if the status register could be read and
+ * contains useful information
+ */
+ if (!status)
+ return 0;
+
+ if (!amdgpu_sriov_vf(adev))
+ WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+ amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, vmhub);
dev_err(adev->dev,
"VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
status);
- if (hub == &adev->vmhub[AMDGPU_GFXHUB_0]) {
+ if (entry->vmid_src == AMDGPU_GFXHUB(0)) {
dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" :
gfxhub_client_ids[cid],
cid);
} else {
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(9, 0, 0):
mmhub_cid = mmhub_client_ids_vega10[cid][rw];
break;
@@ -618,6 +712,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
case IP_VERSION(2, 4, 0):
mmhub_cid = mmhub_client_ids_renoir[cid][rw];
break;
+ case IP_VERSION(1, 8, 0):
case IP_VERSION(9, 4, 2):
mmhub_cid = mmhub_client_ids_aldebaran[cid][rw];
break;
@@ -661,7 +756,8 @@ static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
if (!amdgpu_sriov_vf(adev) &&
- !adev->gmc.xgmi.connected_to_cpu) {
+ !adev->gmc.xgmi.connected_to_cpu &&
+ !adev->gmc.is_app_apu) {
adev->gmc.ecc_irq.num_types = 1;
adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs;
}
@@ -696,11 +792,12 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
uint32_t vmhub)
{
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
+ amdgpu_is_multi_aid(adev))
return false;
- return ((vmhub == AMDGPU_MMHUB_0 ||
- vmhub == AMDGPU_MMHUB_1) &&
+ return ((vmhub == AMDGPU_MMHUB0(0) ||
+ vmhub == AMDGPU_MMHUB1(0)) &&
(!amdgpu_sriov_vf(adev)) &&
(!(!(adev->apu_flags & AMD_APU_IS_RAVEN2) &&
(adev->apu_flags & AMD_APU_IS_PICASSO))));
@@ -739,43 +836,37 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type)
{
bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
- const unsigned eng = 17;
- u32 j, inv_req, inv_req2, tmp;
+ u32 j, inv_req, tmp, sem, req, ack, inst;
+ const unsigned int eng = 17;
struct amdgpu_vmhub *hub;
- BUG_ON(vmhub >= adev->num_vmhubs);
+ BUG_ON(vmhub >= AMDGPU_MAX_VMHUBS);
hub = &adev->vmhub[vmhub];
- if (adev->gmc.xgmi.num_physical_nodes &&
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0)) {
- /* Vega20+XGMI caches PTEs in TC and TLB. Add a
- * heavy-weight TLB flush (type 2), which flushes
- * both. Due to a race condition with concurrent
- * memory accesses using the same TLB cache line, we
- * still need a second TLB flush after this.
- */
- inv_req = gmc_v9_0_get_invalidate_req(vmid, 2);
- inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
- } else {
- inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
- inv_req2 = 0;
- }
+ inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
+ sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
+ req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
+ ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
- /* This is necessary for a HW workaround under SRIOV as well
- * as GFXOFF under bare metal
+ if (vmhub >= AMDGPU_MMHUB0(0))
+ inst = 0;
+ else
+ inst = vmhub;
+
+ /* This is necessary for SRIOV as well as for GFXOFF to function
+ * properly under bare metal
*/
- if (adev->gfx.kiq.ring.sched.ready &&
- (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
- down_read_trylock(&adev->reset_sem)) {
+ if (adev->gfx.kiq[inst].ring.sched.ready &&
+ (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
- amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
- 1 << vmid);
- up_read(&adev->reset_sem);
+ amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req,
+ 1 << vmid, inst);
return;
}
+ /* This path is needed before KIQ/MES/GFXOFF are set up */
spin_lock(&adev->gmc.invalidate_lock);
/*
@@ -788,9 +879,11 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
if (use_semaphore) {
for (j = 0; j < adev->usec_timeout; j++) {
- /* a read return value of 1 means semaphore acuqire */
- tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem +
- hub->eng_distance * eng);
+ /* a read return value of 1 means semaphore acquire */
+ if (vmhub >= AMDGPU_MMHUB0(0))
+ tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem, GET_INST(GC, inst));
+ else
+ tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem, GET_INST(GC, inst));
if (tmp & 0x1)
break;
udelay(1);
@@ -800,40 +893,41 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
}
- do {
- WREG32_NO_KIQ(hub->vm_inv_eng0_req +
- hub->eng_distance * eng, inv_req);
-
- /*
- * Issue a dummy read to wait for the ACK register to
- * be cleared to avoid a false ACK due to the new fast
- * GRBM interface.
- */
- if ((vmhub == AMDGPU_GFXHUB_0) &&
- (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
- RREG32_NO_KIQ(hub->vm_inv_eng0_req +
- hub->eng_distance * eng);
+ if (vmhub >= AMDGPU_MMHUB0(0))
+ WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req, GET_INST(GC, inst));
+ else
+ WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req, GET_INST(GC, inst));
- for (j = 0; j < adev->usec_timeout; j++) {
- tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack +
- hub->eng_distance * eng);
- if (tmp & (1 << vmid))
- break;
- udelay(1);
- }
+ /*
+ * Issue a dummy read to wait for the ACK register to
+ * be cleared to avoid a false ACK due to the new fast
+ * GRBM interface.
+ */
+ if ((vmhub == AMDGPU_GFXHUB(0)) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2)))
+ RREG32_NO_KIQ(req);
- inv_req = inv_req2;
- inv_req2 = 0;
- } while (inv_req);
+ for (j = 0; j < adev->usec_timeout; j++) {
+ if (vmhub >= AMDGPU_MMHUB0(0))
+ tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack, GET_INST(GC, inst));
+ else
+ tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack, GET_INST(GC, inst));
+ if (tmp & (1 << vmid))
+ break;
+ udelay(1);
+ }
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
- if (use_semaphore)
+ if (use_semaphore) {
/*
* add semaphore release after invalidation,
* write with 0 means semaphore release
*/
- WREG32_NO_KIQ(hub->vm_inv_eng0_sem +
- hub->eng_distance * eng, 0);
+ if (vmhub >= AMDGPU_MMHUB0(0))
+ WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0, GET_INST(GC, inst));
+ else
+ WREG32_SOC15_IP_NO_KIQ(GC, sem, 0, GET_INST(GC, inst));
+ }
spin_unlock(&adev->gmc.invalidate_lock);
@@ -850,96 +944,46 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
* @pasid: pasid to be flush
* @flush_type: the flush type
* @all_hub: flush all hubs
+ * @inst: is used to select which instance of KIQ to use for the invalidation
*
* Flush the TLB for the requested pasid.
*/
-static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
- uint16_t pasid, uint32_t flush_type,
- bool all_hub)
+static void gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint32_t inst)
{
- int vmid, i;
- signed long r;
- uint32_t seq;
- uint16_t queried_pasid;
- bool ret;
- struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
-
- if (amdgpu_in_reset(adev))
- return -EIO;
-
- if (ring->sched.ready && down_read_trylock(&adev->reset_sem)) {
- /* Vega20+XGMI caches PTEs in TC and TLB. Add a
- * heavy-weight TLB flush (type 2), which flushes
- * both. Due to a race condition with concurrent
- * memory accesses using the same TLB cache line, we
- * still need a second TLB flush after this.
- */
- bool vega20_xgmi_wa = (adev->gmc.xgmi.num_physical_nodes &&
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0));
- /* 2 dwords flush + 8 dwords fence */
- unsigned int ndw = kiq->pmf->invalidate_tlbs_size + 8;
-
- if (vega20_xgmi_wa)
- ndw += kiq->pmf->invalidate_tlbs_size;
-
- spin_lock(&adev->gfx.kiq.ring_lock);
- /* 2 dwords flush + 8 dwords fence */
- amdgpu_ring_alloc(ring, ndw);
- if (vega20_xgmi_wa)
- kiq->pmf->kiq_invalidate_tlbs(ring,
- pasid, 2, all_hub);
- kiq->pmf->kiq_invalidate_tlbs(ring,
- pasid, flush_type, all_hub);
- r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
- if (r) {
- amdgpu_ring_undo(ring);
- spin_unlock(&adev->gfx.kiq.ring_lock);
- up_read(&adev->reset_sem);
- return -ETIME;
- }
-
- amdgpu_ring_commit(ring);
- spin_unlock(&adev->gfx.kiq.ring_lock);
- r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
- if (r < 1) {
- dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
- up_read(&adev->reset_sem);
- return -ETIME;
- }
- up_read(&adev->reset_sem);
- return 0;
- }
+ uint16_t queried;
+ int i, vmid;
for (vmid = 1; vmid < 16; vmid++) {
-
- ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
- &queried_pasid);
- if (ret && queried_pasid == pasid) {
- if (all_hub) {
- for (i = 0; i < adev->num_vmhubs; i++)
- gmc_v9_0_flush_gpu_tlb(adev, vmid,
- i, flush_type);
- } else {
- gmc_v9_0_flush_gpu_tlb(adev, vmid,
- AMDGPU_GFXHUB_0, flush_type);
- }
- break;
+ bool valid;
+
+ valid = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+ &queried);
+ if (!valid || queried != pasid)
+ continue;
+
+ if (all_hub) {
+ for_each_set_bit(i, adev->vmhubs_mask,
+ AMDGPU_MAX_VMHUBS)
+ gmc_v9_0_flush_gpu_tlb(adev, vmid, i,
+ flush_type);
+ } else {
+ gmc_v9_0_flush_gpu_tlb(adev, vmid,
+ AMDGPU_GFXHUB(0),
+ flush_type);
}
}
-
- return 0;
-
}
static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
- unsigned vmid, uint64_t pd_addr)
+ unsigned int vmid, uint64_t pd_addr)
{
- bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
+ bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->vm_hub);
struct amdgpu_device *adev = ring->adev;
- struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
+ struct amdgpu_vmhub *hub = &adev->vmhub[ring->vm_hub];
uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
- unsigned eng = ring->vm_inv_eng;
+ unsigned int eng = ring->vm_inv_eng;
/*
* It may lose gpuvm invalidate acknowldege state across power-gating
@@ -981,17 +1025,17 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
return pd_addr;
}
-static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
- unsigned pasid)
+static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int vmid,
+ unsigned int pasid)
{
struct amdgpu_device *adev = ring->adev;
uint32_t reg;
/* Do nothing because there's no lut register for mmhub1. */
- if (ring->funcs->vmhub == AMDGPU_MMHUB_1)
+ if (ring->vm_hub == AMDGPU_MMHUB1(0))
return;
- if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)
+ if (ring->vm_hub == AMDGPU_GFXHUB(0))
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
else
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
@@ -1031,27 +1075,6 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
* 0 valid
*/
-static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
-
-{
- switch (flags) {
- case AMDGPU_VM_MTYPE_DEFAULT:
- return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
- case AMDGPU_VM_MTYPE_NC:
- return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
- case AMDGPU_VM_MTYPE_WC:
- return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC);
- case AMDGPU_VM_MTYPE_RW:
- return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW);
- case AMDGPU_VM_MTYPE_CC:
- return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC);
- case AMDGPU_VM_MTYPE_UC:
- return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC);
- default:
- return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
- }
-}
-
static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
uint64_t *addr, uint64_t *flags)
{
@@ -1068,52 +1091,263 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
*flags |= AMDGPU_PDE_BFS(0x9);
} else if (level == AMDGPU_VM_PDB0) {
- if (*flags & AMDGPU_PDE_PTE)
+ if (*flags & AMDGPU_PDE_PTE) {
*flags &= ~AMDGPU_PDE_PTE;
- else
+ if (!(*flags & AMDGPU_PTE_VALID))
+ *addr |= 1 << PAGE_SHIFT;
+ } else {
*flags |= AMDGPU_PTE_TF;
+ }
+ }
+}
+
+static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo,
+ uint32_t vm_flags,
+ uint64_t *flags)
+{
+ struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ bool is_vram = bo->tbo.resource &&
+ bo->tbo.resource->mem_type == TTM_PL_VRAM;
+ bool coherent = bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+ AMDGPU_GEM_CREATE_EXT_COHERENT);
+ bool ext_coherent = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENT;
+ bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
+ unsigned int mtype_local, mtype;
+ uint32_t gc_ip_version = amdgpu_ip_version(adev, GC_HWIP, 0);
+ bool snoop = false;
+ bool is_local;
+
+ dma_resv_assert_held(bo->tbo.base.resv);
+
+ switch (gc_ip_version) {
+ case IP_VERSION(9, 4, 1):
+ case IP_VERSION(9, 4, 2):
+ if (is_vram) {
+ if (bo_adev == adev) {
+ if (uncached)
+ mtype = MTYPE_UC;
+ else if (coherent)
+ mtype = MTYPE_CC;
+ else
+ mtype = MTYPE_RW;
+ /* FIXME: is this still needed? Or does
+ * amdgpu_ttm_tt_pde_flags already handle this?
+ */
+ if (gc_ip_version == IP_VERSION(9, 4, 2) &&
+ adev->gmc.xgmi.connected_to_cpu)
+ snoop = true;
+ } else {
+ if (uncached || coherent)
+ mtype = MTYPE_UC;
+ else
+ mtype = MTYPE_NC;
+ if (amdgpu_xgmi_same_hive(adev, bo_adev))
+ snoop = true;
+ }
+ } else {
+ if (uncached || coherent)
+ mtype = MTYPE_UC;
+ else
+ mtype = MTYPE_NC;
+ /* FIXME: is this still needed? Or does
+ * amdgpu_ttm_tt_pde_flags already handle this?
+ */
+ snoop = true;
+ }
+ break;
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
+ /* Only local VRAM BOs or system memory on non-NUMA APUs
+ * can be assumed to be local in their entirety. Choose
+ * MTYPE_NC as safe fallback for all system memory BOs on
+ * NUMA systems. Their MTYPE can be overridden per-page in
+ * gmc_v9_0_override_vm_pte_flags.
+ */
+ mtype_local = MTYPE_RW;
+ if (amdgpu_mtype_local == 1) {
+ DRM_INFO_ONCE("Using MTYPE_NC for local memory\n");
+ mtype_local = MTYPE_NC;
+ } else if (amdgpu_mtype_local == 2) {
+ DRM_INFO_ONCE("Using MTYPE_CC for local memory\n");
+ mtype_local = MTYPE_CC;
+ } else {
+ DRM_INFO_ONCE("Using MTYPE_RW for local memory\n");
+ }
+ is_local = (!is_vram && (adev->flags & AMD_IS_APU) &&
+ num_possible_nodes() <= 1) ||
+ (is_vram && adev == bo_adev &&
+ KFD_XCP_MEM_ID(adev, bo->xcp_id) == vm->mem_id);
+ snoop = true;
+ if (uncached) {
+ mtype = MTYPE_UC;
+ } else if (ext_coherent) {
+ mtype = is_local ? MTYPE_CC : MTYPE_UC;
+ } else if (adev->flags & AMD_IS_APU) {
+ mtype = is_local ? mtype_local : MTYPE_NC;
+ } else {
+ /* dGPU */
+ if (is_local)
+ mtype = mtype_local;
+ else if (gc_ip_version < IP_VERSION(9, 5, 0) && !is_vram)
+ mtype = MTYPE_UC;
+ else
+ mtype = MTYPE_NC;
+ }
+
+ break;
+ default:
+ if (uncached || coherent)
+ mtype = MTYPE_UC;
+ else
+ mtype = MTYPE_NC;
+
+ /* FIXME: is this still needed? Or does
+ * amdgpu_ttm_tt_pde_flags already handle this?
+ */
+ if (!is_vram)
+ snoop = true;
}
+
+ if (mtype != MTYPE_NC)
+ *flags = AMDGPU_PTE_MTYPE_VG10(*flags, mtype);
+
+ *flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
}
static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
- struct amdgpu_bo_va_mapping *mapping,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo,
+ uint32_t vm_flags,
uint64_t *flags)
{
- *flags &= ~AMDGPU_PTE_EXECUTABLE;
- *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+ if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE)
+ *flags |= AMDGPU_PTE_EXECUTABLE;
+ else
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
- *flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
- *flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK;
+ switch (vm_flags & AMDGPU_VM_MTYPE_MASK) {
+ case AMDGPU_VM_MTYPE_DEFAULT:
+ case AMDGPU_VM_MTYPE_NC:
+ default:
+ *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_NC);
+ break;
+ case AMDGPU_VM_MTYPE_WC:
+ *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_WC);
+ break;
+ case AMDGPU_VM_MTYPE_RW:
+ *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_RW);
+ break;
+ case AMDGPU_VM_MTYPE_CC:
+ *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC);
+ break;
+ case AMDGPU_VM_MTYPE_UC:
+ *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_UC);
+ break;
+ }
- if (mapping->flags & AMDGPU_PTE_PRT) {
+ if (vm_flags & AMDGPU_VM_PAGE_PRT) {
*flags |= AMDGPU_PTE_PRT;
*flags &= ~AMDGPU_PTE_VALID;
}
- if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) &&
- !(*flags & AMDGPU_PTE_SYSTEM) &&
- mapping->bo_va->is_xgmi)
- *flags |= AMDGPU_PTE_SNOOPED;
+ if ((*flags & AMDGPU_PTE_VALID) && bo)
+ gmc_v9_0_get_coherence_flags(adev, vm, bo, vm_flags, flags);
+}
+
+static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ uint64_t addr, uint64_t *flags)
+{
+ int local_node, nid;
+
+ /* Only GFX 9.4.3 APUs associate GPUs with NUMA nodes. Local system
+ * memory can use more efficient MTYPEs.
+ */
+ if (!(adev->flags & AMD_IS_APU) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3))
+ return;
+
+ /* Only direct-mapped memory allows us to determine the NUMA node from
+ * the DMA address.
+ */
+ if (!adev->ram_is_direct_mapped) {
+ dev_dbg_ratelimited(adev->dev, "RAM is not direct mapped\n");
+ return;
+ }
+
+ /* MTYPE_NC is the same default and can be overridden.
+ * MTYPE_UC will be present if the memory is extended-coherent
+ * and can also be overridden.
+ */
+ if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) !=
+ AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC) &&
+ (*flags & AMDGPU_PTE_MTYPE_VG10_MASK) !=
+ AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_UC)) {
+ dev_dbg_ratelimited(adev->dev, "MTYPE is not NC or UC\n");
+ return;
+ }
+
+ /* FIXME: Only supported on native mode for now. For carve-out, the
+ * NUMA affinity of the GPU/VM needs to come from the PCI info because
+ * memory partitions are not associated with different NUMA nodes.
+ */
+ if (adev->gmc.is_app_apu && vm->mem_id >= 0) {
+ local_node = adev->gmc.mem_partitions[vm->mem_id].numa.node;
+ } else {
+ dev_dbg_ratelimited(adev->dev, "Only native mode APU is supported.\n");
+ return;
+ }
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
- *flags |= mapping->flags & AMDGPU_PTE_SNOOPED;
+ /* Only handle real RAM. Mappings of PCIe resources don't have struct
+ * page or NUMA nodes.
+ */
+ if (!page_is_ram(addr >> PAGE_SHIFT)) {
+ dev_dbg_ratelimited(adev->dev, "Page is not RAM.\n");
+ return;
+ }
+ nid = pfn_to_nid(addr >> PAGE_SHIFT);
+ dev_dbg_ratelimited(adev->dev, "vm->mem_id=%d, local_node=%d, nid=%d\n",
+ vm->mem_id, local_node, nid);
+ if (nid == local_node) {
+ uint64_t old_flags = *flags;
+ if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) ==
+ AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC)) {
+ unsigned int mtype_local = MTYPE_RW;
+
+ if (amdgpu_mtype_local == 1)
+ mtype_local = MTYPE_NC;
+ else if (amdgpu_mtype_local == 2)
+ mtype_local = MTYPE_CC;
+
+ *flags = AMDGPU_PTE_MTYPE_VG10(*flags, mtype_local);
+ } else {
+ /* MTYPE_UC case */
+ *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC);
+ }
+
+ dev_dbg_ratelimited(adev->dev, "flags updated from %llx to %llx\n",
+ old_flags, *flags);
+ }
}
-static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
+static unsigned int gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
{
u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
- unsigned size;
+ unsigned int size;
+
+ /* TODO move to DC so GMC doesn't need to hard-code DCN registers */
if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
size = AMDGPU_VBIOS_VGA_ALLOCATION;
} else {
u32 viewport;
- switch (adev->ip_versions[DCE_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
case IP_VERSION(1, 0, 0):
case IP_VERSION(1, 0, 1):
- case IP_VERSION(2, 1, 0):
viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
size = (REG_GET_FIELD(viewport,
HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
@@ -1121,6 +1355,14 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
4);
break;
+ case IP_VERSION(2, 1, 0):
+ viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2);
+ size = (REG_GET_FIELD(viewport,
+ HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
+ REG_GET_FIELD(viewport,
+ HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
+ 4);
+ break;
default:
viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE);
size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
@@ -1133,15 +1375,29 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
return size;
}
+static bool gmc_v9_0_need_reset_on_init(struct amdgpu_device *adev)
+{
+ if (adev->nbio.funcs && adev->nbio.funcs->is_nps_switch_requested &&
+ adev->nbio.funcs->is_nps_switch_requested(adev)) {
+ adev->gmc.reset_flags |= AMDGPU_GMC_INIT_RESET_NPS;
+ return true;
+ }
+
+ return false;
+}
+
static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
.flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
.emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
.emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
- .map_mtype = gmc_v9_0_map_mtype,
.get_vm_pde = gmc_v9_0_get_vm_pde,
.get_vm_pte = gmc_v9_0_get_vm_pte,
+ .override_vm_pte_flags = gmc_v9_0_override_vm_pte_flags,
.get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size,
+ .query_mem_partition_mode = &amdgpu_gmc_query_memory_partition,
+ .request_mem_partition_mode = &amdgpu_gmc_request_memory_partition,
+ .need_reset_on_init = &gmc_v9_0_need_reset_on_init,
};
static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
@@ -1151,7 +1407,7 @@ static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[UMC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
case IP_VERSION(6, 0, 0):
adev->umc.funcs = &umc_v6_0_funcs;
break;
@@ -1160,29 +1416,44 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
+ adev->umc.retire_unit = 1;
adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
- adev->umc.ras_funcs = &umc_v6_1_ras_funcs;
+ adev->umc.ras = &umc_v6_1_ras;
break;
case IP_VERSION(6, 1, 2):
adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
+ adev->umc.retire_unit = 1;
adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
- adev->umc.ras_funcs = &umc_v6_1_ras_funcs;
+ adev->umc.ras = &umc_v6_1_ras;
break;
case IP_VERSION(6, 7, 0):
- adev->umc.max_ras_err_cnt_per_query = UMC_V6_7_TOTAL_CHANNEL_NUM;
+ adev->umc.max_ras_err_cnt_per_query =
+ UMC_V6_7_TOTAL_CHANNEL_NUM * UMC_V6_7_BAD_PAGE_NUM_PER_CHANNEL;
adev->umc.channel_inst_num = UMC_V6_7_CHANNEL_INSTANCE_NUM;
adev->umc.umc_inst_num = UMC_V6_7_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V6_7_PER_CHANNEL_OFFSET;
+ adev->umc.retire_unit = (UMC_V6_7_NA_MAP_PA_NUM * 2);
if (!adev->gmc.xgmi.connected_to_cpu)
- adev->umc.ras_funcs = &umc_v6_7_ras_funcs;
+ adev->umc.ras = &umc_v6_7_ras;
if (1 & adev->smuio.funcs->get_die_id(adev))
adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_first[0][0];
else
adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_second[0][0];
break;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 5, 0):
+ adev->umc.max_ras_err_cnt_per_query =
+ UMC_V12_0_TOTAL_CHANNEL_NUM(adev) * UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL;
+ adev->umc.channel_inst_num = UMC_V12_0_CHANNEL_INSTANCE_NUM;
+ adev->umc.umc_inst_num = UMC_V12_0_UMC_INSTANCE_NUM;
+ adev->umc.node_inst_num /= UMC_V12_0_UMC_INSTANCE_NUM;
+ adev->umc.channel_offs = UMC_V12_0_PER_CHANNEL_OFFSET;
+ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu)
+ adev->umc.ras = &umc_v12_0_ras;
+ break;
default:
break;
}
@@ -1190,13 +1461,17 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(9, 4, 1):
adev->mmhub.funcs = &mmhub_v9_4_funcs;
break;
case IP_VERSION(9, 4, 2):
adev->mmhub.funcs = &mmhub_v1_7_funcs;
break;
+ case IP_VERSION(1, 8, 0):
+ case IP_VERSION(1, 8, 1):
+ adev->mmhub.funcs = &mmhub_v1_8_funcs;
+ break;
default:
adev->mmhub.funcs = &mmhub_v1_0_funcs;
break;
@@ -1205,15 +1480,19 @@ static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(9, 4, 0):
- adev->mmhub.ras_funcs = &mmhub_v1_0_ras_funcs;
+ adev->mmhub.ras = &mmhub_v1_0_ras;
break;
case IP_VERSION(9, 4, 1):
- adev->mmhub.ras_funcs = &mmhub_v9_4_ras_funcs;
+ adev->mmhub.ras = &mmhub_v9_4_ras;
break;
case IP_VERSION(9, 4, 2):
- adev->mmhub.ras_funcs = &mmhub_v1_7_ras_funcs;
+ adev->mmhub.ras = &mmhub_v1_7_ras;
+ break;
+ case IP_VERSION(1, 8, 0):
+ case IP_VERSION(1, 8, 1):
+ adev->mmhub.ras = &mmhub_v1_8_ras;
break;
default:
/* mmhub ras is not available */
@@ -1223,42 +1502,111 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev)
static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev)
{
- adev->gfxhub.funcs = &gfxhub_v1_0_funcs;
+ if (amdgpu_is_multi_aid(adev))
+ adev->gfxhub.funcs = &gfxhub_v1_2_funcs;
+ else
+ adev->gfxhub.funcs = &gfxhub_v1_0_funcs;
}
static void gmc_v9_0_set_hdp_ras_funcs(struct amdgpu_device *adev)
{
- adev->hdp.ras_funcs = &hdp_v4_0_ras_funcs;
+ adev->hdp.ras = &hdp_v4_0_ras;
}
-static void gmc_v9_0_set_mca_funcs(struct amdgpu_device *adev)
+static void gmc_v9_0_set_mca_ras_funcs(struct amdgpu_device *adev)
{
+ struct amdgpu_mca *mca = &adev->mca;
+
/* is UMC the right IP to check for MCA? Maybe DF? */
- switch (adev->ip_versions[UMC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
case IP_VERSION(6, 7, 0):
- if (!adev->gmc.xgmi.connected_to_cpu)
- adev->mca.funcs = &mca_v3_0_funcs;
+ if (!adev->gmc.xgmi.connected_to_cpu) {
+ mca->mp0.ras = &mca_v3_0_mp0_ras;
+ mca->mp1.ras = &mca_v3_0_mp1_ras;
+ mca->mpio.ras = &mca_v3_0_mpio_ras;
+ }
break;
default:
break;
}
}
-static int gmc_v9_0_early_init(void *handle)
+static void gmc_v9_0_set_xgmi_ras_funcs(struct amdgpu_device *adev)
+{
+ if (!adev->gmc.xgmi.connected_to_cpu)
+ adev->gmc.xgmi.ras = &xgmi_ras;
+}
+
+static void gmc_v9_0_init_nps_details(struct amdgpu_device *adev)
+{
+ enum amdgpu_memory_partition mode;
+ uint32_t supp_modes;
+ int i;
+
+ adev->gmc.supported_nps_modes = 0;
+
+ if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU))
+ return;
+
+ mode = amdgpu_gmc_get_memory_partition(adev, &supp_modes);
+
+ /* Mode detected by hardware and supported modes available */
+ if ((mode != UNKNOWN_MEMORY_PARTITION_MODE) && supp_modes) {
+ while ((i = ffs(supp_modes))) {
+ if (AMDGPU_ALL_NPS_MASK & BIT(i))
+ adev->gmc.supported_nps_modes |= BIT(i);
+ supp_modes &= supp_modes - 1;
+ }
+ } else {
+ /*TODO: Check PSP version also which supports NPS switch. Otherwise keep
+ * supported modes as 0.
+ */
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ adev->gmc.supported_nps_modes =
+ BIT(AMDGPU_NPS1_PARTITION_MODE) |
+ BIT(AMDGPU_NPS4_PARTITION_MODE);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static int gmc_v9_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- /* ARCT and VEGA20 don't have XGMI defined in their IP discovery tables */
- if (adev->asic_type == CHIP_VEGA20 ||
- adev->asic_type == CHIP_ARCTURUS)
+ /*
+ * 9.4.0, 9.4.1 and 9.4.3 don't have XGMI defined
+ * in their IP discovery tables
+ */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
+ amdgpu_is_multi_aid(adev))
adev->gmc.xgmi.supported = true;
- if (adev->ip_versions[XGMI_HWIP][0] == IP_VERSION(6, 1, 0)) {
+ if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(6, 1, 0)) {
adev->gmc.xgmi.supported = true;
adev->gmc.xgmi.connected_to_cpu =
adev->smuio.funcs->is_host_gpu_xgmi_supported(adev);
}
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) {
+ enum amdgpu_pkg_type pkg_type =
+ adev->smuio.funcs->get_pkg_type(adev);
+ /* On GFXIP 9.4.3. APU, there is no physical VRAM domain present
+ * and the APU, can be in used two possible modes:
+ * - carveout mode
+ * - native APU mode
+ * "is_app_apu" can be used to identify the APU in the native
+ * mode.
+ */
+ adev->gmc.is_app_apu = (pkg_type == AMDGPU_PKG_TYPE_APU &&
+ !pci_resource_len(adev->pdev, 0));
+ }
+
gmc_v9_0_set_gmc_funcs(adev);
gmc_v9_0_set_irq_funcs(adev);
gmc_v9_0_set_umc_funcs(adev);
@@ -1266,7 +1614,8 @@ static int gmc_v9_0_early_init(void *handle)
gmc_v9_0_set_mmhub_ras_funcs(adev);
gmc_v9_0_set_gfxhub_funcs(adev);
gmc_v9_0_set_hdp_ras_funcs(adev);
- gmc_v9_0_set_mca_funcs(adev);
+ gmc_v9_0_set_mca_ras_funcs(adev);
+ gmc_v9_0_set_xgmi_ras_funcs(adev);
adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
adev->gmc.shared_aperture_end =
@@ -1274,13 +1623,14 @@ static int gmc_v9_0_early_init(void *handle)
adev->gmc.private_aperture_start = 0x1000000000000000ULL;
adev->gmc.private_aperture_end =
adev->gmc.private_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF;
return 0;
}
-static int gmc_v9_0_late_init(void *handle)
+static int gmc_v9_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_gmc_allocate_vm_inv_eng(adev);
@@ -1292,21 +1642,17 @@ static int gmc_v9_0_late_init(void *handle)
* writes, while disables HBM ECC for vega10.
*/
if (!amdgpu_sriov_vf(adev) &&
- (adev->ip_versions[UMC_HWIP][0] == IP_VERSION(6, 0, 0))) {
+ (amdgpu_ip_version(adev, UMC_HWIP, 0) == IP_VERSION(6, 0, 0))) {
if (!(adev->ras_enabled & (1 << AMDGPU_RAS_BLOCK__UMC))) {
- if (adev->df.funcs->enable_ecc_force_par_wr_rmw)
+ if (adev->df.funcs &&
+ adev->df.funcs->enable_ecc_force_par_wr_rmw)
adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
}
}
if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
- if (adev->mmhub.ras_funcs &&
- adev->mmhub.ras_funcs->reset_ras_error_count)
- adev->mmhub.ras_funcs->reset_ras_error_count(adev);
-
- if (adev->hdp.ras_funcs &&
- adev->hdp.ras_funcs->reset_ras_error_count)
- adev->hdp.ras_funcs->reset_ras_error_count(adev);
+ amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
+ amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__HDP);
}
r = amdgpu_gmc_ras_late_init(adev);
@@ -1321,14 +1667,17 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
{
u64 base = adev->mmhub.funcs->get_fb_location(adev);
+ amdgpu_gmc_set_agp_default(adev, mc);
+
/* add the xgmi offset of the physical node */
base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
- if (adev->gmc.xgmi.connected_to_cpu) {
+ if (amdgpu_gmc_is_pdb0_enabled(adev)) {
amdgpu_gmc_sysvm_location(adev, mc);
} else {
amdgpu_gmc_vram_location(adev, mc, base);
- amdgpu_gmc_gart_location(adev, mc);
- amdgpu_gmc_agp_location(adev, mc);
+ amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
+ if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1))
+ amdgpu_gmc_agp_location(adev, mc);
}
/* base offset of vram pages */
adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
@@ -1352,8 +1701,13 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
int r;
/* size in MB on si */
- adev->gmc.mc_vram_size =
- adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+ if (!adev->gmc.is_app_apu) {
+ adev->gmc.mc_vram_size =
+ adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+ } else {
+ DRM_DEBUG("Set mc_vram_size = 0 for APP APU\n");
+ adev->gmc.mc_vram_size = 0;
+ }
adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
if (!(adev->flags & AMD_IS_APU) &&
@@ -1378,7 +1732,8 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
*/
/* check whether both host-gpu and gpu-gpu xgmi links exist */
- if ((adev->flags & AMD_IS_APU) ||
+ if ((!amdgpu_sriov_vf(adev) &&
+ (adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) ||
(adev->gmc.xgmi.supported &&
adev->gmc.xgmi.connected_to_cpu)) {
adev->gmc.aper_base =
@@ -1389,19 +1744,19 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
}
#endif
- /* In case the PCI BAR is larger than the actual amount of vram */
adev->gmc.visible_vram_size = adev->gmc.aper_size;
- if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
- adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
/* set the gart size */
if (amdgpu_gart_size == -1) {
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1): /* all engines support GPUVM */
case IP_VERSION(9, 2, 1): /* all engines support GPUVM */
case IP_VERSION(9, 4, 0):
case IP_VERSION(9, 4, 1):
case IP_VERSION(9, 4, 2):
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
default:
adev->gmc.gart_size = 512ULL << 20;
break;
@@ -1431,7 +1786,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
return 0;
}
- if (adev->gmc.xgmi.connected_to_cpu) {
+ if (amdgpu_gmc_is_pdb0_enabled(adev)) {
adev->gmc.vmid0_page_table_depth = 1;
adev->gmc.vmid0_page_table_block_size = 12;
} else {
@@ -1444,15 +1799,21 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
if (r)
return r;
adev->gart.table_size = adev->gart.num_gpu_pages * 8;
- adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) |
+ adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_UC) |
AMDGPU_PTE_EXECUTABLE;
- r = amdgpu_gart_table_vram_alloc(adev);
- if (r)
- return r;
+ if (!adev->gmc.real_vram_size) {
+ dev_info(adev->dev, "Put GART in system memory for APU\n");
+ r = amdgpu_gart_table_ram_alloc(adev);
+ if (r)
+ dev_err(adev->dev, "Failed to allocate GART in system memory\n");
+ } else {
+ r = amdgpu_gart_table_vram_alloc(adev);
+ if (r)
+ return r;
- if (adev->gmc.xgmi.connected_to_cpu) {
- r = amdgpu_gmc_pdb0_alloc(adev);
+ if (amdgpu_gmc_is_pdb0_enabled(adev))
+ r = amdgpu_gmc_pdb0_alloc(adev);
}
return r;
@@ -1468,54 +1829,89 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
*/
static void gmc_v9_0_save_registers(struct amdgpu_device *adev)
{
- if ((adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 0)) ||
- (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 1)))
+ if ((amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 0)) ||
+ (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 1)))
adev->gmc.sdpif_register = RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0);
}
-static int gmc_v9_0_sw_init(void *handle)
+static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev)
{
- int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ static const u32 regBIF_BIOS_SCRATCH_4 = 0x50;
+ u32 vram_info;
+
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM;
+ adev->gmc.vram_width = 128 * 64;
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM3E;
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) &&
+ adev->rev_id == 0x3)
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM3E;
+
+ if (!(adev->flags & AMD_IS_APU) && !amdgpu_sriov_vf(adev)) {
+ vram_info = RREG32(regBIF_BIOS_SCRATCH_4);
+ adev->gmc.vram_vendor = vram_info & 0xF;
+ }
+}
+
+static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r, vram_width = 0, vram_type = 0, vram_vendor = 0, dma_addr_bits;
+ struct amdgpu_device *adev = ip_block->adev;
+ unsigned long inst_mask = adev->aid_mask;
adev->gfxhub.funcs->init(adev);
adev->mmhub.funcs->init(adev);
- if (adev->mca.funcs)
- adev->mca.funcs->init(adev);
spin_lock_init(&adev->gmc.invalidate_lock);
- r = amdgpu_atomfirmware_get_vram_info(adev,
- &vram_width, &vram_type, &vram_vendor);
- if (amdgpu_sriov_vf(adev))
- /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
- * and DF related registers is not readable, seems hardcord is the
- * only way to set the correct vram_width
- */
- adev->gmc.vram_width = 2048;
- else if (amdgpu_emu_mode != 1)
- adev->gmc.vram_width = vram_width;
-
- if (!adev->gmc.vram_width) {
- int chansize, numchan;
-
- /* hbm memory channel size */
- if (adev->flags & AMD_IS_APU)
- chansize = 64;
- else
- chansize = 128;
+ if (amdgpu_is_multi_aid(adev)) {
+ gmc_v9_4_3_init_vram_info(adev);
+ } else if (!adev->bios) {
+ if (adev->flags & AMD_IS_APU) {
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_DDR4;
+ adev->gmc.vram_width = 64 * 64;
+ } else {
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM;
+ adev->gmc.vram_width = 128 * 64;
+ }
+ } else {
+ r = amdgpu_atomfirmware_get_vram_info(adev,
+ &vram_width, &vram_type, &vram_vendor);
+ if (amdgpu_sriov_vf(adev))
+ /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
+ * and DF related registers is not readable, seems hardcord is the
+ * only way to set the correct vram_width
+ */
+ adev->gmc.vram_width = 2048;
+ else if (amdgpu_emu_mode != 1)
+ adev->gmc.vram_width = vram_width;
+
+ if (!adev->gmc.vram_width) {
+ int chansize, numchan;
+
+ /* hbm memory channel size */
+ if (adev->flags & AMD_IS_APU)
+ chansize = 64;
+ else
+ chansize = 128;
+ if (adev->df.funcs &&
+ adev->df.funcs->get_hbm_channel_number) {
+ numchan = adev->df.funcs->get_hbm_channel_number(adev);
+ adev->gmc.vram_width = numchan * chansize;
+ }
+ }
- numchan = adev->df.funcs->get_hbm_channel_number(adev);
- adev->gmc.vram_width = numchan * chansize;
+ adev->gmc.vram_type = vram_type;
+ adev->gmc.vram_vendor = vram_vendor;
}
-
- adev->gmc.vram_type = vram_type;
- adev->gmc.vram_vendor = vram_vendor;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 2, 2):
- adev->num_vmhubs = 2;
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+ set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
@@ -1531,25 +1927,39 @@ static int gmc_v9_0_sw_init(void *handle)
case IP_VERSION(9, 4, 0):
case IP_VERSION(9, 3, 0):
case IP_VERSION(9, 4, 2):
- adev->num_vmhubs = 2;
-
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+ set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
/*
* To fulfill 4-level page support,
* vm size is 256TB (48bit), maximum size of Vega10,
* block size 512 (9bit)
*/
- /* sriov restrict max_pfn below AMDGPU_GMC_HOLE */
- if (amdgpu_sriov_vf(adev))
- amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47);
- else
- amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+
+ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
+ adev->gmc.translate_further = adev->vm_manager.num_level > 1;
break;
case IP_VERSION(9, 4, 1):
- adev->num_vmhubs = 3;
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+ set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
+ set_bit(AMDGPU_MMHUB1(0), adev->vmhubs_mask);
/* Keep the vm size same with Vega20 */
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ adev->gmc.translate_further = adev->vm_manager.num_level > 1;
+ break;
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
+ bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0),
+ NUM_XCC(adev->gfx.xcc_mask));
+
+ inst_mask <<= AMDGPU_MMHUB0(0);
+ bitmap_or(adev->vmhubs_mask, adev->vmhubs_mask, &inst_mask, 32);
+
+ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ adev->gmc.translate_further = adev->vm_manager.num_level > 1;
break;
default:
break;
@@ -1561,7 +1971,7 @@ static int gmc_v9_0_sw_init(void *handle)
if (r)
return r;
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) {
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT,
&adev->gmc.vm_fault);
if (r)
@@ -1575,7 +1985,8 @@ static int gmc_v9_0_sw_init(void *handle)
return r;
if (!amdgpu_sriov_vf(adev) &&
- !adev->gmc.xgmi.connected_to_cpu) {
+ !adev->gmc.xgmi.connected_to_cpu &&
+ !adev->gmc.is_app_apu) {
/* interrupt sent to DF. */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0,
&adev->gmc.ecc_irq);
@@ -1589,18 +2000,16 @@ static int gmc_v9_0_sw_init(void *handle)
*/
adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
- r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
+ dma_addr_bits = amdgpu_ip_version(adev, GC_HWIP, 0) >=
+ IP_VERSION(9, 4, 2) ?
+ 48 :
+ 44;
+ r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(dma_addr_bits));
if (r) {
- printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
+ dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n");
return r;
}
- adev->need_swiotlb = drm_need_swiotlb(44);
-
- if (adev->gmc.xgmi.supported) {
- r = adev->gfxhub.funcs->get_xgmi_info(adev);
- if (r)
- return r;
- }
+ adev->need_swiotlb = drm_need_swiotlb(dma_addr_bits);
r = gmc_v9_0_mc_init(adev);
if (r)
@@ -1608,6 +2017,12 @@ static int gmc_v9_0_sw_init(void *handle)
amdgpu_gmc_get_vbios_allocations(adev);
+ if (amdgpu_is_multi_aid(adev)) {
+ r = amdgpu_gmc_init_mem_ranges(adev);
+ if (r)
+ return r;
+ }
+
/* Memory manager */
r = amdgpu_bo_init(adev);
if (r)
@@ -1617,6 +2032,7 @@ static int gmc_v9_0_sw_init(void *handle)
if (r)
return r;
+ gmc_v9_0_init_nps_details(adev);
/*
* number of VMs
* VMID 0 is reserved for System
@@ -1628,34 +2044,54 @@ static int gmc_v9_0_sw_init(void *handle)
* for video processing.
*/
adev->vm_manager.first_kfd_vmid =
- (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) ? 3 : 8;
+ (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
+ amdgpu_is_multi_aid(adev)) ?
+ 3 :
+ 8;
amdgpu_vm_manager_init(adev);
gmc_v9_0_save_registers(adev);
+ r = amdgpu_gmc_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ if (amdgpu_is_multi_aid(adev))
+ amdgpu_gmc_sysfs_init(adev);
+
return 0;
}
-static int gmc_v9_0_sw_fini(void *handle)
+static int gmc_v9_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_is_multi_aid(adev))
+ amdgpu_gmc_sysfs_fini(adev);
amdgpu_gmc_ras_fini(adev);
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
- amdgpu_gart_table_vram_free(adev);
- amdgpu_bo_unref(&adev->gmc.pdb0_bo);
+ if (!adev->gmc.real_vram_size) {
+ dev_info(adev->dev, "Put GART in system memory for APU free\n");
+ amdgpu_gart_table_ram_free(adev);
+ } else {
+ amdgpu_gart_table_vram_free(adev);
+ }
+ amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0);
amdgpu_bo_fini(adev);
+ adev->gmc.num_mem_partitions = 0;
+ kfree(adev->gmc.mem_partitions);
+
return 0;
}
static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
{
-
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(9, 0, 0):
if (amdgpu_sriov_vf(adev))
break;
@@ -1689,8 +2125,8 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
*/
void gmc_v9_0_restore_registers(struct amdgpu_device *adev)
{
- if ((adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 0)) ||
- (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 1))) {
+ if ((amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 0)) ||
+ (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 1))) {
WREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0, adev->gmc.sdpif_register);
WARN_ON(adev->gmc.sdpif_register !=
RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0));
@@ -1706,7 +2142,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
{
int r;
- if (adev->gmc.xgmi.connected_to_cpu)
+ if (amdgpu_gmc_is_pdb0_enabled(adev))
amdgpu_gmc_init_pdb0(adev);
if (adev->gart.bo == NULL) {
@@ -1714,35 +2150,45 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
return -EINVAL;
}
- r = amdgpu_gart_table_vram_pin(adev);
- if (r)
- return r;
+ amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
- r = adev->gfxhub.funcs->gart_enable(adev);
- if (r)
- return r;
+ if (!adev->in_s0ix) {
+ r = adev->gfxhub.funcs->gart_enable(adev);
+ if (r)
+ return r;
+ }
r = adev->mmhub.funcs->gart_enable(adev);
if (r)
return r;
DRM_INFO("PCIE GART of %uM enabled.\n",
- (unsigned)(adev->gmc.gart_size >> 20));
+ (unsigned int)(adev->gmc.gart_size >> 20));
if (adev->gmc.pdb0_bo)
DRM_INFO("PDB0 located at 0x%016llX\n",
(unsigned long long)amdgpu_bo_gpu_offset(adev->gmc.pdb0_bo));
DRM_INFO("PTB located at 0x%016llX\n",
(unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
- adev->gart.ready = true;
return 0;
}
-static int gmc_v9_0_hw_init(void *handle)
+static int gmc_v9_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool value;
- int r, i;
+ int i, r;
+
+ adev->gmc.flush_pasid_uses_kiq = true;
+
+ /* Vega20+XGMI caches PTEs in TC and TLB. Add a heavy-weight TLB flush
+ * (type 2), which flushes both. Due to a race condition with
+ * concurrent memory accesses using the same TLB cache line, we still
+ * need a second TLB flush after this.
+ */
+ adev->gmc.flush_tlb_needs_extra_type_2 =
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) &&
+ adev->gmc.xgmi.num_physical_nodes;
/* The sequence of these two function calls matters.*/
gmc_v9_0_init_golden_registers(adev);
@@ -1760,7 +2206,7 @@ static int gmc_v9_0_hw_init(void *handle)
adev->hdp.funcs->init_registers(adev);
/* After HDP is initialized, flush HDP.*/
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
value = false;
@@ -1768,18 +2214,27 @@ static int gmc_v9_0_hw_init(void *handle)
value = true;
if (!amdgpu_sriov_vf(adev)) {
- adev->gfxhub.funcs->set_fault_enable_default(adev, value);
+ if (!adev->in_s0ix)
+ adev->gfxhub.funcs->set_fault_enable_default(adev, value);
adev->mmhub.funcs->set_fault_enable_default(adev, value);
}
- for (i = 0; i < adev->num_vmhubs; ++i)
+ for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
+ if (adev->in_s0ix && (i == AMDGPU_GFXHUB(0)))
+ continue;
gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0);
+ }
if (adev->umc.funcs && adev->umc.funcs->init_registers)
adev->umc.funcs->init_registers(adev);
r = gmc_v9_0_gart_enable(adev);
+ if (r)
+ return r;
- return r;
+ if (amdgpu_emu_mode == 1)
+ return amdgpu_gmc_vram_checking(adev);
+
+ return 0;
}
/**
@@ -1791,14 +2246,14 @@ static int gmc_v9_0_hw_init(void *handle)
*/
static void gmc_v9_0_gart_disable(struct amdgpu_device *adev)
{
- adev->gfxhub.funcs->gart_disable(adev);
+ if (!adev->in_s0ix)
+ adev->gfxhub.funcs->gart_disable(adev);
adev->mmhub.funcs->gart_disable(adev);
- amdgpu_gart_table_vram_unpin(adev);
}
-static int gmc_v9_0_hw_fini(void *handle)
+static int gmc_v9_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v9_0_gart_disable(adev);
@@ -1816,55 +2271,70 @@ static int gmc_v9_0_hw_fini(void *handle)
if (adev->mmhub.funcs->update_power_gating)
adev->mmhub.funcs->update_power_gating(adev, false);
- amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
- amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+ /*
+ * For minimal init, late_init is not called, hence VM fault/RAS irqs
+ * are not enabled.
+ */
+ if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
+ amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+
+ if (adev->gmc.ecc_irq.funcs &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+ amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+ }
return 0;
}
-static int gmc_v9_0_suspend(void *handle)
+static int gmc_v9_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return gmc_v9_0_hw_fini(adev);
+ return gmc_v9_0_hw_fini(ip_block);
}
-static int gmc_v9_0_resume(void *handle)
+static int gmc_v9_0_resume(struct amdgpu_ip_block *ip_block)
{
+ struct amdgpu_device *adev = ip_block->adev;
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = gmc_v9_0_hw_init(adev);
+ /* If a reset is done for NPS mode switch, read the memory range
+ * information again.
+ */
+ if (adev->gmc.reset_flags & AMDGPU_GMC_INIT_RESET_NPS) {
+ amdgpu_gmc_init_sw_mem_ranges(adev, adev->gmc.mem_partitions);
+ adev->gmc.reset_flags &= ~AMDGPU_GMC_INIT_RESET_NPS;
+ }
+
+ r = gmc_v9_0_hw_init(ip_block);
if (r)
return r;
- amdgpu_vmid_reset_all(adev);
+ amdgpu_vmid_reset_all(ip_block->adev);
return 0;
}
-static bool gmc_v9_0_is_idle(void *handle)
+static bool gmc_v9_0_is_idle(struct amdgpu_ip_block *ip_block)
{
/* MC is always ready in GMC v9.*/
return true;
}
-static int gmc_v9_0_wait_for_idle(void *handle)
+static int gmc_v9_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* There is no need to wait for MC idle in GMC v9.*/
return 0;
}
-static int gmc_v9_0_soft_reset(void *handle)
+static int gmc_v9_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* XXX for emulation.*/
return 0;
}
-static int gmc_v9_0_set_clockgating_state(void *handle,
+static int gmc_v9_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->mmhub.funcs->set_clockgating(adev, state);
@@ -1873,16 +2343,16 @@ static int gmc_v9_0_set_clockgating_state(void *handle,
return 0;
}
-static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags)
+static void gmc_v9_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->mmhub.funcs->get_clockgating(adev, flags);
athub_v1_0_get_clockgating(adev, flags);
}
-static int gmc_v9_0_set_powergating_state(void *handle,
+static int gmc_v9_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -1906,8 +2376,7 @@ const struct amd_ip_funcs gmc_v9_0_ip_funcs = {
.get_clockgating_state = gmc_v9_0_get_clockgating_state,
};
-const struct amdgpu_ip_block_version gmc_v9_0_ip_block =
-{
+const struct amdgpu_ip_block_version gmc_v9_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_GMC,
.major = 9,
.minor = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
index eecfb1545c1e..e6c0d86d3486 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "hdp_v4_0.h"
#include "amdgpu_ras.h"
@@ -37,26 +36,21 @@
#define HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK 0x00020000L
#define mmHDP_MEM_POWER_CTRL_BASE_IDX 0
-static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev,
- struct amdgpu_ring *ring)
-{
- if (!ring || !ring->funcs->emit_wreg)
- WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
- else
- amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
-}
-
static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
- if (adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 4, 0))
+ if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 0) ||
+ amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 2) ||
+ amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 5))
return;
- if (!ring || !ring->funcs->emit_wreg)
+ if (!ring || !ring->funcs->emit_wreg) {
WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1);
- else
+ RREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE);
+ } else {
amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
+ }
}
static void hdp_v4_0_query_ras_error_count(struct amdgpu_device *adev,
@@ -79,7 +73,7 @@ static void hdp_v4_0_reset_ras_error_count(struct amdgpu_device *adev)
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP))
return;
- if (adev->ip_versions[HDP_HWIP][0] >= IP_VERSION(4, 4, 0))
+ if (amdgpu_ip_version(adev, HDP_HWIP, 0) >= IP_VERSION(4, 4, 0))
WREG32_SOC15(HDP, 0, mmHDP_EDC_CNT, 0);
else
/*read back hdp ras counter to reset it to 0 */
@@ -91,10 +85,10 @@ static void hdp_v4_0_update_clock_gating(struct amdgpu_device *adev,
{
uint32_t def, data;
- if (adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 0, 0) ||
- adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 0, 1) ||
- adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 1, 1) ||
- adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 1, 0)) {
+ if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 0, 0) ||
+ amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 0, 1) ||
+ amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 1, 1) ||
+ amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 1, 0)) {
def = data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS));
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
@@ -124,10 +118,16 @@ static void hdp_v4_0_update_clock_gating(struct amdgpu_device *adev,
}
static void hdp_v4_0_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
int data;
+ if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 2) ||
+ amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 5)) {
+ /* Default enabled */
+ *flags |= AMD_CG_SUPPORT_HDP_MGCG;
+ return;
+ }
/* AMD_CG_SUPPORT_HDP_LS */
data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS));
if (data & HDP_MEM_POWER_LS__LS_ENABLE_MASK)
@@ -136,7 +136,7 @@ static void hdp_v4_0_get_clockgating_state(struct amdgpu_device *adev,
static void hdp_v4_0_init_registers(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[HDP_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, HDP_HWIP, 0)) {
case IP_VERSION(4, 2, 1):
WREG32_FIELD15(HDP, 0, HDP_MMHUB_CNTL, HDP_MMHUB_GCC, 1);
break;
@@ -144,21 +144,32 @@ static void hdp_v4_0_init_registers(struct amdgpu_device *adev)
break;
}
+ /* Do not program registers if VF */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
+ if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 0))
+ WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, READ_BUFFER_WATERMARK, 2);
+
WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8));
WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40));
}
-const struct amdgpu_hdp_ras_funcs hdp_v4_0_ras_funcs = {
- .ras_late_init = amdgpu_hdp_ras_late_init,
- .ras_fini = amdgpu_hdp_ras_fini,
+struct amdgpu_ras_block_hw_ops hdp_v4_0_ras_hw_ops = {
.query_ras_error_count = hdp_v4_0_query_ras_error_count,
.reset_ras_error_count = hdp_v4_0_reset_ras_error_count,
};
+struct amdgpu_hdp_ras hdp_v4_0_ras = {
+ .ras_block = {
+ .hw_ops = &hdp_v4_0_ras_hw_ops,
+ },
+};
+
const struct amdgpu_hdp_funcs hdp_v4_0_funcs = {
- .flush_hdp = hdp_v4_0_flush_hdp,
+ .flush_hdp = amdgpu_hdp_generic_flush,
.invalidate_hdp = hdp_v4_0_invalidate_hdp,
.update_clock_gating = hdp_v4_0_update_clock_gating,
.get_clock_gating_state = hdp_v4_0_get_clockgating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h
index dc3a1b81dd62..c44eee9282ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h
@@ -27,6 +27,6 @@
#include "soc15_common.h"
extern const struct amdgpu_hdp_funcs hdp_v4_0_funcs;
-extern const struct amdgpu_hdp_ras_funcs hdp_v4_0_ras_funcs;
+extern struct amdgpu_hdp_ras hdp_v4_0_ras;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
index 5793977953cc..8bc001dc9f63 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
@@ -21,27 +21,18 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "hdp_v5_0.h"
#include "hdp/hdp_5_0_0_offset.h"
#include "hdp/hdp_5_0_0_sh_mask.h"
#include <uapi/linux/kfd_ioctl.h>
-static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev,
- struct amdgpu_ring *ring)
-{
- if (!ring || !ring->funcs->emit_wreg)
- WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
- else
- amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
-}
-
static void hdp_v5_0_invalidate_hdp(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
if (!ring || !ring->funcs->emit_wreg) {
WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1);
+ RREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE);
} else {
amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
@@ -181,7 +172,7 @@ static void hdp_v5_0_update_clock_gating(struct amdgpu_device *adev,
}
static void hdp_v5_0_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
uint32_t tmp;
@@ -215,7 +206,7 @@ static void hdp_v5_0_init_registers(struct amdgpu_device *adev)
}
const struct amdgpu_hdp_funcs hdp_v5_0_funcs = {
- .flush_hdp = hdp_v5_0_flush_hdp,
+ .flush_hdp = amdgpu_hdp_generic_flush,
.invalidate_hdp = hdp_v5_0_invalidate_hdp,
.update_clock_gating = hdp_v5_0_update_clock_gating,
.get_clock_gating_state = hdp_v5_0_get_clockgating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
new file mode 100644
index 000000000000..40940b4ab400
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
@@ -0,0 +1,206 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "hdp_v5_2.h"
+
+#include "hdp/hdp_5_2_1_offset.h"
+#include "hdp/hdp_5_2_1_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2,
+ 0);
+ if (amdgpu_sriov_vf(adev)) {
+ /* this is fine because SR_IOV doesn't remap the register */
+ RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
+ } else {
+ /* We just need to read back a register to post the write.
+ * Reading back the remapped register causes problems on
+ * some platforms so just read back the memory size register.
+ */
+ if (adev->nbio.funcs->get_memsize)
+ adev->nbio.funcs->get_memsize(adev);
+ }
+ } else {
+ amdgpu_ring_emit_wreg(ring,
+ (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2,
+ 0);
+ }
+}
+
+static void hdp_v5_2_update_mem_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t hdp_clk_cntl;
+ uint32_t hdp_mem_pwr_cntl;
+
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)))
+ return;
+
+ hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL);
+ hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+
+ /* Before doing clock/power mode switch, forced on MEM clock */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ ATOMIC_MEM_CLK_SOFT_OVERRIDE, 1);
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 1);
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+
+ /* disable clock and power gating before any changing */
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_SD_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_SD_EN, 0);
+ WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+
+ /* Already disabled above. The actions below are for "enabled" only */
+ if (enable) {
+ /* only one clock gating mode (LS/DS/SD) can be enabled */
+ if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_SD_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_SD_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_LS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_DS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 1);
+ }
+
+ /* confirmed that ATOMIC/RC_MEM_POWER_CTRL_EN have to be set for SRAM LS/DS/SD */
+ if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_CTRL_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 1);
+ WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+ }
+ }
+
+ /* disable MEM clock override after clock/power mode changing */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ ATOMIC_MEM_CLK_SOFT_OVERRIDE, 0);
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 0);
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+}
+
+static void hdp_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t hdp_clk_cntl;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG))
+ return;
+
+ hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL);
+
+ if (enable) {
+ hdp_clk_cntl &=
+ ~(uint32_t)
+ (HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK);
+ } else {
+ hdp_clk_cntl |= HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK;
+ }
+
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+}
+
+static void hdp_v5_2_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ uint32_t tmp;
+
+ /* AMD_CG_SUPPORT_HDP_MGCG */
+ tmp = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL);
+ if (!(tmp & (HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK)))
+ *flags |= AMD_CG_SUPPORT_HDP_MGCG;
+
+ /* AMD_CG_SUPPORT_HDP_LS/DS/SD */
+ tmp = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+ if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_LS;
+ else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_DS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_DS;
+ else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_SD_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_SD;
+}
+
+static void hdp_v5_2_update_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ hdp_v5_2_update_mem_power_gating(adev, enable);
+ hdp_v5_2_update_medium_grain_clock_gating(adev, enable);
+}
+
+const struct amdgpu_hdp_funcs hdp_v5_2_funcs = {
+ .flush_hdp = hdp_v5_2_flush_hdp,
+ .update_clock_gating = hdp_v5_2_update_clock_gating,
+ .get_clock_gating_state = hdp_v5_2_get_clockgating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.h b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.h
new file mode 100644
index 000000000000..cb2abc0c80ee
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __HDP_V5_2_H__
+#define __HDP_V5_2_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_hdp_funcs hdp_v5_2_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
new file mode 100644
index 000000000000..ec20daf4272c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "hdp_v6_0.h"
+
+#include "hdp/hdp_6_0_0_offset.h"
+#include "hdp/hdp_6_0_0_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+#define regHDP_CLK_CNTL_V6_1 0xd5
+#define regHDP_CLK_CNTL_V6_1_BASE_IDX 0
+
+static void hdp_v6_0_update_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t hdp_clk_cntl;
+ uint32_t hdp_mem_pwr_cntl;
+
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)))
+ return;
+
+ if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(6, 1, 0))
+ hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL_V6_1);
+ else
+ hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL);
+ hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+
+ /* Before doing clock/power mode switch,
+ * forced on IPH & RC clock */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 1);
+ if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(6, 1, 0))
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL_V6_1, hdp_clk_cntl);
+ else
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+
+ /* disable clock and power gating before any changing */
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_SD_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_SD_EN, 0);
+ WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+
+ /* Already disabled above. The actions below are for "enabled" only */
+ if (enable) {
+ /* only one clock gating mode (LS/DS/SD) can be enabled */
+ if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_SD_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_SD_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_LS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_DS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 1);
+ }
+
+ /* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to
+ * be set for SRAM LS/DS/SD */
+ if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_CTRL_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 1);
+ WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+ }
+ }
+
+ /* disable IPH & RC clock override after clock/power mode changing */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 0);
+ if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(6, 1, 0))
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL_V6_1, hdp_clk_cntl);
+ else
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+}
+
+static void hdp_v6_0_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ uint32_t tmp;
+
+ /* AMD_CG_SUPPORT_HDP_LS/DS/SD */
+ tmp = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+ if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_LS;
+ else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_DS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_DS;
+ else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_SD_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_SD;
+}
+
+const struct amdgpu_hdp_funcs hdp_v6_0_funcs = {
+ .flush_hdp = amdgpu_hdp_generic_flush,
+ .update_clock_gating = hdp_v6_0_update_clock_gating,
+ .get_clock_gating_state = hdp_v6_0_get_clockgating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.h b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.h
new file mode 100644
index 000000000000..533ecd8c0800
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __HDP_V6_0_H__
+#define __HDP_V6_0_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_hdp_funcs hdp_v6_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c
new file mode 100644
index 000000000000..ed1debc03507
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "hdp_v7_0.h"
+
+#include "hdp/hdp_7_0_0_offset.h"
+#include "hdp/hdp_7_0_0_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static void hdp_v7_0_update_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t hdp_clk_cntl, hdp_clk_cntl1;
+ uint32_t hdp_mem_pwr_cntl;
+
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)))
+ return;
+
+ hdp_clk_cntl = hdp_clk_cntl1 = RREG32_SOC15(HDP, 0,regHDP_CLK_CNTL);
+ hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+
+ /* Before doing clock/power mode switch,
+ * forced on IPH & RC clock */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 1);
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+
+ /* disable clock and power gating before any changing */
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_SD_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_SD_EN, 0);
+ WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+
+ /* Already disabled above. The actions below are for "enabled" only */
+ if (enable) {
+ /* only one clock gating mode (LS/DS/SD) can be enabled */
+ if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_SD_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_SD_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_LS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_DS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 1);
+ }
+
+ /* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to
+ * be set for SRAM LS/DS/SD */
+ if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_CTRL_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 1);
+ WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+ }
+ }
+
+ /* disable IPH & RC clock override after clock/power mode changing */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 0);
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+}
+
+static void hdp_v7_0_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ uint32_t tmp;
+
+ /* AMD_CG_SUPPORT_HDP_LS/DS/SD */
+ tmp = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+ if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_LS;
+ else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_DS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_DS;
+ else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_SD_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_SD;
+}
+
+const struct amdgpu_hdp_funcs hdp_v7_0_funcs = {
+ .flush_hdp = amdgpu_hdp_generic_flush,
+ .update_clock_gating = hdp_v7_0_update_clock_gating,
+ .get_clock_gating_state = hdp_v7_0_get_clockgating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.h b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.h
new file mode 100644
index 000000000000..25b69201402d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __HDP_V7_0_H__
+#define __HDP_V7_0_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_hdp_funcs hdp_v7_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
index ddfe4eaeea05..01cadf898c00 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -157,6 +157,9 @@ static int iceland_ih_irq_init(struct amdgpu_device *adev)
/* enable interrupts */
iceland_ih_enable_interrupts(adev);
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
return 0;
}
@@ -194,6 +197,9 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev,
wptr = le32_to_cpu(*ih->wptr_cpu);
+ if (ih == &adev->irq.ih_soft)
+ goto out;
+
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
goto out;
@@ -215,6 +221,11 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32(mmIH_RB_CNTL, tmp);
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32(mmIH_RB_CNTL, tmp);
out:
return (wptr & ih->ptr_mask);
@@ -268,9 +279,9 @@ static void iceland_ih_set_rptr(struct amdgpu_device *adev,
WREG32(mmIH_RB_RPTR, ih->rptr);
}
-static int iceland_ih_early_init(void *handle)
+static int iceland_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = amdgpu_irq_add_domain(adev);
@@ -282,23 +293,27 @@ static int iceland_ih_early_init(void *handle)
return 0;
}
-static int iceland_ih_sw_init(void *handle)
+static int iceland_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
if (r)
return r;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
+ if (r)
+ return r;
+
r = amdgpu_irq_init(adev);
return r;
}
-static int iceland_ih_sw_fini(void *handle)
+static int iceland_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
amdgpu_irq_remove_domain(adev);
@@ -306,44 +321,33 @@ static int iceland_ih_sw_fini(void *handle)
return 0;
}
-static int iceland_ih_hw_init(void *handle)
+static int iceland_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- r = iceland_ih_irq_init(adev);
- if (r)
- return r;
+ struct amdgpu_device *adev = ip_block->adev;
- return 0;
+ return iceland_ih_irq_init(adev);
}
-static int iceland_ih_hw_fini(void *handle)
+static int iceland_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- iceland_ih_irq_disable(adev);
+ iceland_ih_irq_disable(ip_block->adev);
return 0;
}
-static int iceland_ih_suspend(void *handle)
+static int iceland_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return iceland_ih_hw_fini(adev);
+ return iceland_ih_hw_fini(ip_block);
}
-static int iceland_ih_resume(void *handle)
+static int iceland_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return iceland_ih_hw_init(adev);
+ return iceland_ih_hw_init(ip_block);
}
-static bool iceland_ih_is_idle(void *handle)
+static bool iceland_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (REG_GET_FIELD(tmp, SRBM_STATUS, IH_BUSY))
@@ -352,11 +356,11 @@ static bool iceland_ih_is_idle(void *handle)
return true;
}
-static int iceland_ih_wait_for_idle(void *handle)
+static int iceland_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -368,10 +372,10 @@ static int iceland_ih_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int iceland_ih_soft_reset(void *handle)
+static int iceland_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & SRBM_STATUS__IH_BUSY_MASK)
@@ -398,13 +402,13 @@ static int iceland_ih_soft_reset(void *handle)
return 0;
}
-static int iceland_ih_set_clockgating_state(void *handle,
+static int iceland_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int iceland_ih_set_powergating_state(void *handle,
+static int iceland_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -413,7 +417,6 @@ static int iceland_ih_set_powergating_state(void *handle,
static const struct amd_ip_funcs iceland_ih_ip_funcs = {
.name = "iceland_ih",
.early_init = iceland_ih_early_init,
- .late_init = NULL,
.sw_init = iceland_ih_sw_init,
.sw_fini = iceland_ih_sw_fini,
.hw_init = iceland_ih_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
new file mode 100644
index 000000000000..333e9c30c091
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
@@ -0,0 +1,817 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+
+#include "oss/osssys_6_0_0_offset.h"
+#include "oss/osssys_6_0_0_sh_mask.h"
+
+#include "soc15_common.h"
+#include "ih_v6_0.h"
+
+#define MAX_REARM_RETRY 10
+
+static void ih_v6_0_set_interrupt_funcs(struct amdgpu_device *adev);
+
+/**
+ * ih_v6_0_init_register_offset - Initialize register offset for ih rings
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initialize register offset ih rings (IH_V6_0).
+ */
+static void ih_v6_0_init_register_offset(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_regs *ih_regs;
+
+ /* ih ring 2 is removed
+ * ih ring and ih ring 1 are available */
+ if (adev->irq.ih.ring_size) {
+ ih_regs = &adev->irq.ih.ih_regs;
+ ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE);
+ ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI);
+ ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL);
+ ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR);
+ ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR);
+ ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR);
+ ih_regs->ih_rb_wptr_addr_lo = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_LO);
+ ih_regs->ih_rb_wptr_addr_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_HI);
+ ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL;
+ }
+
+ if (adev->irq.ih1.ring_size) {
+ ih_regs = &adev->irq.ih1.ih_regs;
+ ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_RING1);
+ ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI_RING1);
+ ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL_RING1);
+ ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_RING1);
+ ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR_RING1);
+ ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR_RING1);
+ ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL_RING1;
+ }
+}
+
+/**
+ * force_update_wptr_for_self_int - Force update the wptr for self interrupt
+ *
+ * @adev: amdgpu_device pointer
+ * @threshold: threshold to trigger the wptr reporting
+ * @timeout: timeout to trigger the wptr reporting
+ * @enabled: Enable/disable timeout flush mechanism
+ *
+ * threshold input range: 0 ~ 15, default 0,
+ * real_threshold = 2^threshold
+ * timeout input range: 0 ~ 20, default 8,
+ * real_timeout = (2^timeout) * 1024 / (socclk_freq)
+ *
+ * Force update wptr for self interrupt ( >= SIENNA_CICHLID).
+ */
+static void
+force_update_wptr_for_self_int(struct amdgpu_device *adev,
+ u32 threshold, u32 timeout, bool enabled)
+{
+ u32 ih_cntl, ih_rb_cntl;
+
+ ih_cntl = RREG32_SOC15(OSSSYS, 0, regIH_CNTL2);
+ ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1);
+
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2,
+ SELF_IV_FORCE_WPTR_UPDATE_TIMEOUT, timeout);
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2,
+ SELF_IV_FORCE_WPTR_UPDATE_ENABLE, enabled);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
+ RB_USED_INT_THRESHOLD, threshold);
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, ih_rb_cntl))
+ return;
+ } else {
+ WREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1, ih_rb_cntl);
+ }
+
+ WREG32_SOC15(OSSSYS, 0, regIH_CNTL2, ih_cntl);
+}
+
+/**
+ * ih_v6_0_toggle_ring_interrupts - toggle the interrupt ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ * @enable: true - enable the interrupts, false - disable the interrupts
+ *
+ * Toggle the interrupt ring buffer (IH_V6_0)
+ */
+static int ih_v6_0_toggle_ring_interrupts(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih,
+ bool enable)
+{
+ struct amdgpu_ih_regs *ih_regs;
+ uint32_t tmp;
+
+ ih_regs = &ih->ih_regs;
+
+ tmp = RREG32(ih_regs->ih_rb_cntl);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
+
+ if (enable) {
+ /* Unset the CLEAR_OVERFLOW bit to make sure the next step
+ * is switching the bit from 0 to 1
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp))
+ return -ETIMEDOUT;
+ } else {
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ /* Clear RB_OVERFLOW bit */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp))
+ return -ETIMEDOUT;
+ } else {
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ }
+
+ /* enable_intr field is only valid in ring0 */
+ if (ih == &adev->irq.ih)
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0));
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp))
+ return -ETIMEDOUT;
+ } else {
+ WREG32(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ if (enable) {
+ ih->enabled = true;
+ } else {
+ /* set rptr, wptr to 0 */
+ WREG32(ih_regs->ih_rb_rptr, 0);
+ WREG32(ih_regs->ih_rb_wptr, 0);
+ ih->enabled = false;
+ ih->rptr = 0;
+ }
+
+ return 0;
+}
+
+/**
+ * ih_v6_0_toggle_interrupts - Toggle all the available interrupt ring buffers
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable or disable interrupt ring buffers
+ *
+ * Toggle all the available interrupt ring buffers (IH_V6_0).
+ */
+static int ih_v6_0_toggle_interrupts(struct amdgpu_device *adev, bool enable)
+{
+ struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1};
+ int i;
+ int r;
+
+ for (i = 0; i < ARRAY_SIZE(ih); i++) {
+ if (ih[i]->ring_size) {
+ r = ih_v6_0_toggle_ring_interrupts(adev, ih[i], enable);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static uint32_t ih_v6_0_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl)
+{
+ int rb_bufsz = order_base_2(ih->ring_size / 4);
+
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ MC_SPACE, ih->use_bus_addr ? 2 : 4);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_OVERFLOW_CLEAR, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_OVERFLOW_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register
+ * value is written to memory
+ */
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_WRITEBACK_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SNOOP, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_RO, 0);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0);
+
+ return ih_rb_cntl;
+}
+
+static uint32_t ih_v6_0_doorbell_rptr(struct amdgpu_ih_ring *ih)
+{
+ u32 ih_doorbell_rtpr = 0;
+
+ if (ih->use_doorbell) {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR, OFFSET,
+ ih->doorbell_index);
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR,
+ ENABLE, 1);
+ } else {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR,
+ ENABLE, 0);
+ }
+ return ih_doorbell_rtpr;
+}
+
+/**
+ * ih_v6_0_enable_ring - enable an ih ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ * Enable an ih ring buffer (IH_V6_0)
+ */
+static int ih_v6_0_enable_ring(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ struct amdgpu_ih_regs *ih_regs;
+ uint32_t tmp;
+
+ ih_regs = &ih->ih_regs;
+
+ /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
+ WREG32(ih_regs->ih_rb_base, ih->gpu_addr >> 8);
+ WREG32(ih_regs->ih_rb_base_hi, (ih->gpu_addr >> 40) & 0xff);
+
+ tmp = RREG32(ih_regs->ih_rb_cntl);
+ tmp = ih_v6_0_rb_cntl(ih, tmp);
+ if (ih == &adev->irq.ih)
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled);
+ if (ih == &adev->irq.ih1) {
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1);
+ }
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) {
+ DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+ return -ETIMEDOUT;
+ }
+ } else {
+ WREG32(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ if (ih == &adev->irq.ih) {
+ /* set the ih ring 0 writeback address whether it's enabled or not */
+ WREG32(ih_regs->ih_rb_wptr_addr_lo, lower_32_bits(ih->wptr_addr));
+ WREG32(ih_regs->ih_rb_wptr_addr_hi, upper_32_bits(ih->wptr_addr) & 0xFFFF);
+ }
+
+ /* set rptr, wptr to 0 */
+ WREG32(ih_regs->ih_rb_wptr, 0);
+ WREG32(ih_regs->ih_rb_rptr, 0);
+
+ WREG32(ih_regs->ih_doorbell_rptr, ih_v6_0_doorbell_rptr(ih));
+
+ return 0;
+}
+
+/**
+ * ih_v6_0_irq_init - init and enable the interrupt ring
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate a ring buffer for the interrupt controller,
+ * enable the RLC, disable interrupts, enable the IH
+ * ring buffer and enable it.
+ * Called at device load and reume.
+ * Returns 0 for success, errors for failure.
+ */
+static int ih_v6_0_irq_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1};
+ u32 ih_chicken;
+ u32 tmp;
+ int ret;
+ int i;
+
+ /* disable irqs */
+ ret = ih_v6_0_toggle_interrupts(adev, false);
+ if (ret)
+ return ret;
+
+ adev->nbio.funcs->ih_control(adev);
+
+ if (unlikely((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
+ (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO))) {
+ if (ih[0]->use_bus_addr) {
+ ih_chicken = RREG32_SOC15(OSSSYS, 0, regIH_CHICKEN);
+ ih_chicken = REG_SET_FIELD(ih_chicken,
+ IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1);
+ WREG32_SOC15(OSSSYS, 0, regIH_CHICKEN, ih_chicken);
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(ih); i++) {
+ if (ih[i]->ring_size) {
+ ret = ih_v6_0_enable_ring(adev, ih[i]);
+ if (ret)
+ return ret;
+ }
+ ih[i]->overflow = false;
+ }
+
+ /* update doorbell range for ih ring 0 */
+ adev->nbio.funcs->ih_doorbell_range(adev, ih[0]->use_doorbell,
+ ih[0]->doorbell_index);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL);
+ tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL,
+ CLIENT18_IS_STORM_CLIENT, 1);
+ WREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL, tmp);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL);
+ tmp = REG_SET_FIELD(tmp, IH_INT_FLOOD_CNTL, FLOOD_CNTL_ENABLE, 1);
+ WREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL, tmp);
+
+ /* GC/MMHUB UTCL2 page fault interrupts are configured as
+ * MSI storm capable interrupts by deafult. The delay is
+ * used to avoid ISR being called too frequently
+ * when page fault happens on several continuous page
+ * and thus avoid MSI storm */
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL);
+ tmp = REG_SET_FIELD(tmp, IH_MSI_STORM_CTRL,
+ DELAY, 3);
+ WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp);
+
+ /* Redirect the interrupts to IH RB1 for dGPU */
+ if (adev->irq.ih1.ring_size) {
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_INDEX, INDEX, 0);
+ WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX, tmp);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, CLIENT_ID, 0xa);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, SOURCE_ID, 0x0);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA,
+ SOURCE_ID_MATCH_ENABLE, 0x1);
+
+ WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA, tmp);
+ }
+
+ pci_set_master(adev->pdev);
+
+ /* enable interrupts */
+ ret = ih_v6_0_toggle_interrupts(adev, true);
+ if (ret)
+ return ret;
+ /* enable wptr force update for self int */
+ force_update_wptr_for_self_int(adev, 0, 8, true);
+
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
+ return 0;
+}
+
+/**
+ * ih_v6_0_irq_disable - disable interrupts
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable interrupts on the hw.
+ */
+static void ih_v6_0_irq_disable(struct amdgpu_device *adev)
+{
+ force_update_wptr_for_self_int(adev, 0, 8, false);
+ ih_v6_0_toggle_interrupts(adev, false);
+
+ /* Wait and acknowledge irq */
+ mdelay(1);
+}
+
+/**
+ * ih_v6_0_get_wptr - get the IH ring buffer wptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ * Get the IH ring buffer wptr from either the register
+ * or the writeback memory buffer. Also check for
+ * ring buffer overflow and deal with it.
+ * Returns the value of the wptr.
+ */
+static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ u32 wptr, tmp;
+ struct amdgpu_ih_regs *ih_regs;
+
+ wptr = le32_to_cpu(*ih->wptr_cpu);
+ ih_regs = &ih->ih_regs;
+
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+
+ wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr);
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+ if (!amdgpu_sriov_vf(adev))
+ wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+ else
+ ih->overflow = true;
+
+ /* When a ring buffer overflow happen start parsing interrupt
+ * from the last not overwritten vector (wptr + 32). Hopefully
+ * this should allow us to catch up.
+ */
+ tmp = (wptr + 32) & ih->ptr_mask;
+ dev_warn(adev->dev, "IH ring buffer overflow "
+ "(0x%08X, 0x%08X, 0x%08X)\n",
+ wptr, ih->rptr, tmp);
+ ih->rptr = tmp;
+
+ tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+out:
+ return (wptr & ih->ptr_mask);
+}
+
+/**
+ * ih_v6_0_irq_rearm - rearm IRQ if lost
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ */
+static void ih_v6_0_irq_rearm(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ uint32_t v = 0;
+ uint32_t i = 0;
+ struct amdgpu_ih_regs *ih_regs;
+
+ ih_regs = &ih->ih_regs;
+
+ /* Rearm IRQ / re-write doorbell if doorbell write is lost */
+ for (i = 0; i < MAX_REARM_RETRY; i++) {
+ v = RREG32_NO_KIQ(ih_regs->ih_rb_rptr);
+ if ((v < ih->ring_size) && (v != ih->rptr))
+ WDOORBELL32(ih->doorbell_index, ih->rptr);
+ else
+ break;
+ }
+}
+
+/**
+ * ih_v6_0_set_rptr - set the IH ring buffer rptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ * Set the IH ring buffer rptr.
+ */
+static void ih_v6_0_set_rptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ struct amdgpu_ih_regs *ih_regs;
+
+ if (ih->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ *ih->rptr_cpu = ih->rptr;
+ WDOORBELL32(ih->doorbell_index, ih->rptr);
+
+ if (amdgpu_sriov_vf(adev))
+ ih_v6_0_irq_rearm(adev, ih);
+ } else {
+ ih_regs = &ih->ih_regs;
+ WREG32(ih_regs->ih_rb_rptr, ih->rptr);
+ }
+}
+
+/**
+ * ih_v6_0_self_irq - dispatch work for ring 1
+ *
+ * @adev: amdgpu_device pointer
+ * @source: irq source
+ * @entry: IV with WPTR update
+ *
+ * Update the WPTR from the IV and schedule work to handle the entries.
+ */
+static int ih_v6_0_self_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t wptr = cpu_to_le32(entry->src_data[0]);
+
+ switch (entry->ring_id) {
+ case 1:
+ *adev->irq.ih1.wptr_cpu = wptr;
+ schedule_work(&adev->irq.ih1_work);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs ih_v6_0_self_irq_funcs = {
+ .process = ih_v6_0_self_irq,
+};
+
+static void ih_v6_0_set_self_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.self_irq.num_types = 0;
+ adev->irq.self_irq.funcs = &ih_v6_0_self_irq_funcs;
+}
+
+static int ih_v6_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ ih_v6_0_set_interrupt_funcs(adev);
+ ih_v6_0_set_self_irq_funcs(adev);
+ return 0;
+}
+
+static int ih_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+ bool use_bus_addr;
+
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_IH, 0,
+ &adev->irq.self_irq);
+
+ if (r)
+ return r;
+
+ /* use gpu virtual address for ih ring
+ * until ih_checken is programmed to allow
+ * use bus address for ih ring by psp bl */
+ use_bus_addr = adev->firmware.load_type != AMDGPU_FW_LOAD_PSP;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, use_bus_addr);
+ if (r)
+ return r;
+
+ adev->irq.ih.use_doorbell = true;
+ adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1;
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, IH_RING_SIZE,
+ use_bus_addr);
+ if (r)
+ return r;
+
+ adev->irq.ih1.use_doorbell = true;
+ adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1;
+ }
+
+ /* initialize ih control register offset */
+ ih_v6_0_init_register_offset(adev);
+
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_init(adev);
+
+ return r;
+}
+
+static int ih_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ amdgpu_irq_fini_sw(adev);
+
+ return 0;
+}
+
+static int ih_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ r = ih_v6_0_irq_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int ih_v6_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ ih_v6_0_irq_disable(ip_block->adev);
+
+ return 0;
+}
+
+static int ih_v6_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return ih_v6_0_hw_fini(ip_block);
+}
+
+static int ih_v6_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ return ih_v6_0_hw_init(ip_block);
+}
+
+static bool ih_v6_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ /* todo */
+ return true;
+}
+
+static int ih_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ /* todo */
+ return -ETIMEDOUT;
+}
+
+static int ih_v6_0_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ /* todo */
+ return 0;
+}
+
+static void ih_v6_0_update_clockgating_state(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def, field_val;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) {
+ def = data = RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL);
+ field_val = enable ? 0 : 1;
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DBUS_MUX_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DYN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ REG_CLK_SOFT_OVERRIDE, field_val);
+ if (def != data)
+ WREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL, data);
+ }
+}
+
+static int ih_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ ih_v6_0_update_clockgating_state(adev,
+ state == AMD_CG_STATE_GATE);
+ return 0;
+}
+
+static void ih_v6_0_update_ih_mem_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t ih_mem_pwr_cntl;
+
+ /* Disable ih sram power cntl before switch powergating mode */
+ ih_mem_pwr_cntl = RREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_CTRL_EN, 0);
+ WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
+
+ /* It is recommended to set mem powergating mode to DS mode */
+ if (enable) {
+ /* mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_DS_EN, 1);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_SD_EN, 0);
+ /* cam mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 1);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0);
+ /* re-enable power cntl */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_CTRL_EN, 1);
+ } else {
+ /* mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_DS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_SD_EN, 0);
+ /* cam mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0);
+ /* re-enable power cntl*/
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_CTRL_EN, 1);
+ }
+
+ WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
+}
+
+static int ih_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = (state == AMD_PG_STATE_GATE);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG)
+ ih_v6_0_update_ih_mem_power_gating(adev, enable);
+
+ return 0;
+}
+
+static void ih_v6_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (!RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL))
+ *flags |= AMD_CG_SUPPORT_IH_CG;
+}
+
+static const struct amd_ip_funcs ih_v6_0_ip_funcs = {
+ .name = "ih_v6_0",
+ .early_init = ih_v6_0_early_init,
+ .sw_init = ih_v6_0_sw_init,
+ .sw_fini = ih_v6_0_sw_fini,
+ .hw_init = ih_v6_0_hw_init,
+ .hw_fini = ih_v6_0_hw_fini,
+ .suspend = ih_v6_0_suspend,
+ .resume = ih_v6_0_resume,
+ .is_idle = ih_v6_0_is_idle,
+ .wait_for_idle = ih_v6_0_wait_for_idle,
+ .soft_reset = ih_v6_0_soft_reset,
+ .set_clockgating_state = ih_v6_0_set_clockgating_state,
+ .set_powergating_state = ih_v6_0_set_powergating_state,
+ .get_clockgating_state = ih_v6_0_get_clockgating_state,
+};
+
+static const struct amdgpu_ih_funcs ih_v6_0_funcs = {
+ .get_wptr = ih_v6_0_get_wptr,
+ .decode_iv = amdgpu_ih_decode_iv_helper,
+ .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
+ .set_rptr = ih_v6_0_set_rptr
+};
+
+static void ih_v6_0_set_interrupt_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.ih_funcs = &ih_v6_0_funcs;
+}
+
+const struct amdgpu_ip_block_version ih_v6_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_IH,
+ .major = 6,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &ih_v6_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.h b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.h
new file mode 100644
index 000000000000..f27b55580716
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __IH_V6_0_IH_H__
+#define __IH_V6_0_IH_H__
+
+extern const struct amdgpu_ip_block_version ih_v6_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
new file mode 100644
index 000000000000..95b3f4e55ec3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
@@ -0,0 +1,796 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+
+#include "oss/osssys_6_1_0_offset.h"
+#include "oss/osssys_6_1_0_sh_mask.h"
+
+#include "soc15_common.h"
+#include "ih_v6_1.h"
+
+#define MAX_REARM_RETRY 10
+
+static void ih_v6_1_set_interrupt_funcs(struct amdgpu_device *adev);
+
+/**
+ * ih_v6_1_init_register_offset - Initialize register offset for ih rings
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initialize register offset ih rings (IH_V6_0).
+ */
+static void ih_v6_1_init_register_offset(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_regs *ih_regs;
+
+ /* ih ring 2 is removed
+ * ih ring and ih ring 1 are available */
+ if (adev->irq.ih.ring_size) {
+ ih_regs = &adev->irq.ih.ih_regs;
+ ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE);
+ ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI);
+ ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL);
+ ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR);
+ ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR);
+ ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR);
+ ih_regs->ih_rb_wptr_addr_lo = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_LO);
+ ih_regs->ih_rb_wptr_addr_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_HI);
+ ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL;
+ }
+
+ if (adev->irq.ih1.ring_size) {
+ ih_regs = &adev->irq.ih1.ih_regs;
+ ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_RING1);
+ ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI_RING1);
+ ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL_RING1);
+ ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_RING1);
+ ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR_RING1);
+ ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR_RING1);
+ ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL_RING1;
+ }
+}
+
+/**
+ * force_update_wptr_for_self_int - Force update the wptr for self interrupt
+ *
+ * @adev: amdgpu_device pointer
+ * @threshold: threshold to trigger the wptr reporting
+ * @timeout: timeout to trigger the wptr reporting
+ * @enabled: Enable/disable timeout flush mechanism
+ *
+ * threshold input range: 0 ~ 15, default 0,
+ * real_threshold = 2^threshold
+ * timeout input range: 0 ~ 20, default 8,
+ * real_timeout = (2^timeout) * 1024 / (socclk_freq)
+ *
+ * Force update wptr for self interrupt ( >= SIENNA_CICHLID).
+ */
+static void
+force_update_wptr_for_self_int(struct amdgpu_device *adev,
+ u32 threshold, u32 timeout, bool enabled)
+{
+ u32 ih_cntl, ih_rb_cntl;
+
+ ih_cntl = RREG32_SOC15(OSSSYS, 0, regIH_CNTL2);
+ ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1);
+
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2,
+ SELF_IV_FORCE_WPTR_UPDATE_TIMEOUT, timeout);
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2,
+ SELF_IV_FORCE_WPTR_UPDATE_ENABLE, enabled);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
+ RB_USED_INT_THRESHOLD, threshold);
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, ih_rb_cntl))
+ return;
+ } else {
+ WREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1, ih_rb_cntl);
+ }
+
+ WREG32_SOC15(OSSSYS, 0, regIH_CNTL2, ih_cntl);
+}
+
+/**
+ * ih_v6_1_toggle_ring_interrupts - toggle the interrupt ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ * @enable: true - enable the interrupts, false - disable the interrupts
+ *
+ * Toggle the interrupt ring buffer (IH_V6_0)
+ */
+static int ih_v6_1_toggle_ring_interrupts(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih,
+ bool enable)
+{
+ struct amdgpu_ih_regs *ih_regs;
+ uint32_t tmp;
+
+ ih_regs = &ih->ih_regs;
+
+ tmp = RREG32(ih_regs->ih_rb_cntl);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
+ /* enable_intr field is only valid in ring0 */
+ if (ih == &adev->irq.ih)
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0));
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp))
+ return -ETIMEDOUT;
+ } else {
+ WREG32(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ if (enable) {
+ ih->enabled = true;
+ } else {
+ /* set rptr, wptr to 0 */
+ WREG32(ih_regs->ih_rb_rptr, 0);
+ WREG32(ih_regs->ih_rb_wptr, 0);
+ ih->enabled = false;
+ ih->rptr = 0;
+ }
+
+ return 0;
+}
+
+/**
+ * ih_v6_1_toggle_interrupts - Toggle all the available interrupt ring buffers
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable or disable interrupt ring buffers
+ *
+ * Toggle all the available interrupt ring buffers (IH_V6_0).
+ */
+static int ih_v6_1_toggle_interrupts(struct amdgpu_device *adev, bool enable)
+{
+ struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1};
+ int i;
+ int r;
+
+ for (i = 0; i < ARRAY_SIZE(ih); i++) {
+ if (ih[i]->ring_size) {
+ r = ih_v6_1_toggle_ring_interrupts(adev, ih[i], enable);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static uint32_t ih_v6_1_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl)
+{
+ int rb_bufsz = order_base_2(ih->ring_size / 4);
+
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ MC_SPACE, ih->use_bus_addr ? 2 : 4);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_OVERFLOW_CLEAR, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_OVERFLOW_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register
+ * value is written to memory
+ */
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_WRITEBACK_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SNOOP, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_RO, 0);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0);
+
+ return ih_rb_cntl;
+}
+
+static uint32_t ih_v6_1_doorbell_rptr(struct amdgpu_ih_ring *ih)
+{
+ u32 ih_doorbell_rtpr = 0;
+
+ if (ih->use_doorbell) {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR, OFFSET,
+ ih->doorbell_index);
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR,
+ ENABLE, 1);
+ } else {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR,
+ ENABLE, 0);
+ }
+ return ih_doorbell_rtpr;
+}
+
+/**
+ * ih_v6_1_enable_ring - enable an ih ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ * Enable an ih ring buffer (IH_V6_0)
+ */
+static int ih_v6_1_enable_ring(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ struct amdgpu_ih_regs *ih_regs;
+ uint32_t tmp;
+
+ ih_regs = &ih->ih_regs;
+
+ /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
+ WREG32(ih_regs->ih_rb_base, ih->gpu_addr >> 8);
+ WREG32(ih_regs->ih_rb_base_hi, (ih->gpu_addr >> 40) & 0xff);
+
+ tmp = RREG32(ih_regs->ih_rb_cntl);
+ tmp = ih_v6_1_rb_cntl(ih, tmp);
+ if (ih == &adev->irq.ih)
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled);
+ if (ih == &adev->irq.ih1) {
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1);
+ }
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) {
+ DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+ return -ETIMEDOUT;
+ }
+ } else {
+ WREG32(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ if (ih == &adev->irq.ih) {
+ /* set the ih ring 0 writeback address whether it's enabled or not */
+ WREG32(ih_regs->ih_rb_wptr_addr_lo, lower_32_bits(ih->wptr_addr));
+ WREG32(ih_regs->ih_rb_wptr_addr_hi, upper_32_bits(ih->wptr_addr) & 0xFFFF);
+ }
+
+ /* set rptr, wptr to 0 */
+ WREG32(ih_regs->ih_rb_wptr, 0);
+ WREG32(ih_regs->ih_rb_rptr, 0);
+
+ WREG32(ih_regs->ih_doorbell_rptr, ih_v6_1_doorbell_rptr(ih));
+
+ return 0;
+}
+
+/**
+ * ih_v6_1_irq_init - init and enable the interrupt ring
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate a ring buffer for the interrupt controller,
+ * enable the RLC, disable interrupts, enable the IH
+ * ring buffer and enable it.
+ * Called at device load and reume.
+ * Returns 0 for success, errors for failure.
+ */
+static int ih_v6_1_irq_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1};
+ u32 ih_chicken;
+ u32 tmp;
+ int ret;
+ int i;
+
+ /* disable irqs */
+ ret = ih_v6_1_toggle_interrupts(adev, false);
+ if (ret)
+ return ret;
+
+ adev->nbio.funcs->ih_control(adev);
+
+ if (unlikely((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
+ (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO))) {
+ if (ih[0]->use_bus_addr) {
+ ih_chicken = RREG32_SOC15(OSSSYS, 0, regIH_CHICKEN);
+ ih_chicken = REG_SET_FIELD(ih_chicken,
+ IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1);
+ WREG32_SOC15(OSSSYS, 0, regIH_CHICKEN, ih_chicken);
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(ih); i++) {
+ if (ih[i]->ring_size) {
+ ret = ih_v6_1_enable_ring(adev, ih[i]);
+ if (ret)
+ return ret;
+ }
+ }
+
+ /* update doorbell range for ih ring 0 */
+ adev->nbio.funcs->ih_doorbell_range(adev, ih[0]->use_doorbell,
+ ih[0]->doorbell_index);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL);
+ tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL,
+ CLIENT18_IS_STORM_CLIENT, 1);
+ WREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL, tmp);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL);
+ tmp = REG_SET_FIELD(tmp, IH_INT_FLOOD_CNTL, FLOOD_CNTL_ENABLE, 1);
+ WREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL, tmp);
+
+ /* GC/MMHUB UTCL2 page fault interrupts are configured as
+ * MSI storm capable interrupts by deafult. The delay is
+ * used to avoid ISR being called too frequently
+ * when page fault happens on several continuous page
+ * and thus avoid MSI storm */
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL);
+ tmp = REG_SET_FIELD(tmp, IH_MSI_STORM_CTRL,
+ DELAY, 3);
+ WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp);
+
+ /* Redirect the interrupts to IH RB1 for dGPU */
+ if (adev->irq.ih1.ring_size) {
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_INDEX, INDEX, 0);
+ WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX, tmp);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, CLIENT_ID, 0xa);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, SOURCE_ID, 0x0);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA,
+ SOURCE_ID_MATCH_ENABLE, 0x1);
+
+ WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA, tmp);
+ }
+
+ pci_set_master(adev->pdev);
+
+ /* enable interrupts */
+ ret = ih_v6_1_toggle_interrupts(adev, true);
+ if (ret)
+ return ret;
+ /* enable wptr force update for self int */
+ force_update_wptr_for_self_int(adev, 0, 8, true);
+
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
+ return 0;
+}
+
+/**
+ * ih_v6_1_irq_disable - disable interrupts
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable interrupts on the hw.
+ */
+static void ih_v6_1_irq_disable(struct amdgpu_device *adev)
+{
+ force_update_wptr_for_self_int(adev, 0, 8, false);
+ ih_v6_1_toggle_interrupts(adev, false);
+
+ /* Wait and acknowledge irq */
+ mdelay(1);
+}
+
+/**
+ * ih_v6_1_get_wptr - get the IH ring buffer wptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ * Get the IH ring buffer wptr from either the register
+ * or the writeback memory buffer. Also check for
+ * ring buffer overflow and deal with it.
+ * Returns the value of the wptr.
+ */
+static u32 ih_v6_1_get_wptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ u32 wptr, tmp;
+ struct amdgpu_ih_regs *ih_regs;
+
+ wptr = le32_to_cpu(*ih->wptr_cpu);
+ ih_regs = &ih->ih_regs;
+
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+
+ wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr);
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+ wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+
+ /* When a ring buffer overflow happen start parsing interrupt
+ * from the last not overwritten vector (wptr + 32). Hopefully
+ * this should allow us to catch up.
+ */
+ tmp = (wptr + 32) & ih->ptr_mask;
+ dev_warn(adev->dev, "IH ring buffer overflow "
+ "(0x%08X, 0x%08X, 0x%08X)\n",
+ wptr, ih->rptr, tmp);
+ ih->rptr = tmp;
+
+ tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+out:
+ return (wptr & ih->ptr_mask);
+}
+
+/**
+ * ih_v6_1_irq_rearm - rearm IRQ if lost
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ */
+static void ih_v6_1_irq_rearm(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ uint32_t v = 0;
+ uint32_t i = 0;
+ struct amdgpu_ih_regs *ih_regs;
+
+ ih_regs = &ih->ih_regs;
+
+ /* Rearm IRQ / re-write doorbell if doorbell write is lost */
+ for (i = 0; i < MAX_REARM_RETRY; i++) {
+ v = RREG32_NO_KIQ(ih_regs->ih_rb_rptr);
+ if ((v < ih->ring_size) && (v != ih->rptr))
+ WDOORBELL32(ih->doorbell_index, ih->rptr);
+ else
+ break;
+ }
+}
+
+/**
+ * ih_v6_1_set_rptr - set the IH ring buffer rptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ * Set the IH ring buffer rptr.
+ */
+static void ih_v6_1_set_rptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ struct amdgpu_ih_regs *ih_regs;
+
+ if (ih->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ *ih->rptr_cpu = ih->rptr;
+ WDOORBELL32(ih->doorbell_index, ih->rptr);
+
+ if (amdgpu_sriov_vf(adev))
+ ih_v6_1_irq_rearm(adev, ih);
+ } else {
+ ih_regs = &ih->ih_regs;
+ WREG32(ih_regs->ih_rb_rptr, ih->rptr);
+ }
+}
+
+/**
+ * ih_v6_1_self_irq - dispatch work for ring 1
+ *
+ * @adev: amdgpu_device pointer
+ * @source: irq source
+ * @entry: IV with WPTR update
+ *
+ * Update the WPTR from the IV and schedule work to handle the entries.
+ */
+static int ih_v6_1_self_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t wptr = cpu_to_le32(entry->src_data[0]);
+
+ switch (entry->ring_id) {
+ case 1:
+ *adev->irq.ih1.wptr_cpu = wptr;
+ schedule_work(&adev->irq.ih1_work);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs ih_v6_1_self_irq_funcs = {
+ .process = ih_v6_1_self_irq,
+};
+
+static void ih_v6_1_set_self_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.self_irq.num_types = 0;
+ adev->irq.self_irq.funcs = &ih_v6_1_self_irq_funcs;
+}
+
+static int ih_v6_1_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret;
+
+ ret = amdgpu_irq_add_domain(adev);
+ if (ret) {
+ return ret;
+ }
+
+ ih_v6_1_set_interrupt_funcs(adev);
+ ih_v6_1_set_self_irq_funcs(adev);
+ return 0;
+}
+
+static int ih_v6_1_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+ bool use_bus_addr;
+
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_IH, 0,
+ &adev->irq.self_irq);
+
+ if (r)
+ return r;
+
+ /* use gpu virtual address for ih ring
+ * until ih_checken is programmed to allow
+ * use bus address for ih ring by psp bl */
+ use_bus_addr = adev->firmware.load_type != AMDGPU_FW_LOAD_PSP;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr);
+ if (r)
+ return r;
+
+ adev->irq.ih.use_doorbell = true;
+ adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1;
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, IH_RING_SIZE,
+ use_bus_addr);
+ if (r)
+ return r;
+
+ adev->irq.ih1.use_doorbell = true;
+ adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1;
+ }
+
+ /* initialize ih control register offset */
+ ih_v6_1_init_register_offset(adev);
+
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_init(adev);
+
+ return r;
+}
+
+static int ih_v6_1_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ amdgpu_irq_fini_sw(adev);
+
+ return 0;
+}
+
+static int ih_v6_1_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ r = ih_v6_1_irq_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int ih_v6_1_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ ih_v6_1_irq_disable(ip_block->adev);
+
+ return 0;
+}
+
+static int ih_v6_1_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return ih_v6_1_hw_fini(ip_block);
+}
+
+static int ih_v6_1_resume(struct amdgpu_ip_block *ip_block)
+{
+ return ih_v6_1_hw_init(ip_block);
+}
+
+static bool ih_v6_1_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ /* todo */
+ return true;
+}
+
+static int ih_v6_1_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ /* todo */
+ return -ETIMEDOUT;
+}
+
+static int ih_v6_1_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ /* todo */
+ return 0;
+}
+
+static void ih_v6_1_update_clockgating_state(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def, field_val;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) {
+ def = data = RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL);
+ field_val = enable ? 0 : 1;
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DBUS_MUX_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DYN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ REG_CLK_SOFT_OVERRIDE, field_val);
+ if (def != data)
+ WREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL, data);
+ }
+
+ return;
+}
+
+static int ih_v6_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ ih_v6_1_update_clockgating_state(adev,
+ state == AMD_CG_STATE_GATE);
+ return 0;
+}
+
+static void ih_v6_1_update_ih_mem_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t ih_mem_pwr_cntl;
+
+ /* Disable ih sram power cntl before switch powergating mode */
+ ih_mem_pwr_cntl = RREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_CTRL_EN, 0);
+ WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
+
+ /* It is recommended to set mem powergating mode to DS mode */
+ if (enable) {
+ /* mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_DS_EN, 1);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_SD_EN, 0);
+ /* cam mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 1);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0);
+ /* re-enable power cntl */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_CTRL_EN, 1);
+ } else {
+ /* mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_DS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_SD_EN, 0);
+ /* cam mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0);
+ /* re-enable power cntl*/
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_CTRL_EN, 1);
+ }
+
+ WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
+}
+
+static int ih_v6_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = (state == AMD_PG_STATE_GATE);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG)
+ ih_v6_1_update_ih_mem_power_gating(adev, enable);
+
+ return 0;
+}
+
+static void ih_v6_1_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (!RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL))
+ *flags |= AMD_CG_SUPPORT_IH_CG;
+
+ return;
+}
+
+static const struct amd_ip_funcs ih_v6_1_ip_funcs = {
+ .name = "ih_v6_1",
+ .early_init = ih_v6_1_early_init,
+ .sw_init = ih_v6_1_sw_init,
+ .sw_fini = ih_v6_1_sw_fini,
+ .hw_init = ih_v6_1_hw_init,
+ .hw_fini = ih_v6_1_hw_fini,
+ .suspend = ih_v6_1_suspend,
+ .resume = ih_v6_1_resume,
+ .is_idle = ih_v6_1_is_idle,
+ .wait_for_idle = ih_v6_1_wait_for_idle,
+ .soft_reset = ih_v6_1_soft_reset,
+ .set_clockgating_state = ih_v6_1_set_clockgating_state,
+ .set_powergating_state = ih_v6_1_set_powergating_state,
+ .get_clockgating_state = ih_v6_1_get_clockgating_state,
+};
+
+static const struct amdgpu_ih_funcs ih_v6_1_funcs = {
+ .get_wptr = ih_v6_1_get_wptr,
+ .decode_iv = amdgpu_ih_decode_iv_helper,
+ .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
+ .set_rptr = ih_v6_1_set_rptr
+};
+
+static void ih_v6_1_set_interrupt_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.ih_funcs = &ih_v6_1_funcs;
+}
+
+const struct amdgpu_ip_block_version ih_v6_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_IH,
+ .major = 6,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &ih_v6_1_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.h b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.h
new file mode 100644
index 000000000000..2232bc5cbd09
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __IH_V6_1_IH_H__
+#define __IH_V6_1_IH_H__
+
+extern const struct amdgpu_ip_block_version ih_v6_1_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c
new file mode 100644
index 000000000000..b32ea4129c61
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c
@@ -0,0 +1,787 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+
+#include "oss/osssys_7_0_0_offset.h"
+#include "oss/osssys_7_0_0_sh_mask.h"
+
+#include "soc15_common.h"
+#include "ih_v7_0.h"
+
+#define MAX_REARM_RETRY 10
+
+static void ih_v7_0_set_interrupt_funcs(struct amdgpu_device *adev);
+
+/**
+ * ih_v7_0_init_register_offset - Initialize register offset for ih rings
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initialize register offset ih rings (IH_V7_0).
+ */
+static void ih_v7_0_init_register_offset(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_regs *ih_regs;
+
+ /* ih ring 2 is removed
+ * ih ring and ih ring 1 are available */
+ if (adev->irq.ih.ring_size) {
+ ih_regs = &adev->irq.ih.ih_regs;
+ ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE);
+ ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI);
+ ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL);
+ ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR);
+ ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR);
+ ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR);
+ ih_regs->ih_rb_wptr_addr_lo = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_LO);
+ ih_regs->ih_rb_wptr_addr_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_HI);
+ ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL;
+ }
+
+ if (adev->irq.ih1.ring_size) {
+ ih_regs = &adev->irq.ih1.ih_regs;
+ ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_RING1);
+ ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI_RING1);
+ ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL_RING1);
+ ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_RING1);
+ ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR_RING1);
+ ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR_RING1);
+ ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL_RING1;
+ }
+}
+
+/**
+ * force_update_wptr_for_self_int - Force update the wptr for self interrupt
+ *
+ * @adev: amdgpu_device pointer
+ * @threshold: threshold to trigger the wptr reporting
+ * @timeout: timeout to trigger the wptr reporting
+ * @enabled: Enable/disable timeout flush mechanism
+ *
+ * threshold input range: 0 ~ 15, default 0,
+ * real_threshold = 2^threshold
+ * timeout input range: 0 ~ 20, default 8,
+ * real_timeout = (2^timeout) * 1024 / (socclk_freq)
+ *
+ * Force update wptr for self interrupt ( >= SIENNA_CICHLID).
+ */
+static void
+force_update_wptr_for_self_int(struct amdgpu_device *adev,
+ u32 threshold, u32 timeout, bool enabled)
+{
+ u32 ih_cntl, ih_rb_cntl;
+
+ ih_cntl = RREG32_SOC15(OSSSYS, 0, regIH_CNTL2);
+ ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1);
+
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2,
+ SELF_IV_FORCE_WPTR_UPDATE_TIMEOUT, timeout);
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2,
+ SELF_IV_FORCE_WPTR_UPDATE_ENABLE, enabled);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
+ RB_USED_INT_THRESHOLD, threshold);
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, ih_rb_cntl))
+ return;
+ } else {
+ WREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1, ih_rb_cntl);
+ }
+
+ WREG32_SOC15(OSSSYS, 0, regIH_CNTL2, ih_cntl);
+}
+
+/**
+ * ih_v7_0_toggle_ring_interrupts - toggle the interrupt ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointet
+ * @enable: true - enable the interrupts, false - disable the interrupts
+ *
+ * Toggle the interrupt ring buffer (IH_V7_0)
+ */
+static int ih_v7_0_toggle_ring_interrupts(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih,
+ bool enable)
+{
+ struct amdgpu_ih_regs *ih_regs;
+ uint32_t tmp;
+
+ ih_regs = &ih->ih_regs;
+
+ tmp = RREG32(ih_regs->ih_rb_cntl);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
+ /* enable_intr field is only valid in ring0 */
+ if (ih == &adev->irq.ih)
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0));
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp))
+ return -ETIMEDOUT;
+ } else {
+ WREG32(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ if (enable) {
+ ih->enabled = true;
+ } else {
+ /* set rptr, wptr to 0 */
+ WREG32(ih_regs->ih_rb_rptr, 0);
+ WREG32(ih_regs->ih_rb_wptr, 0);
+ ih->enabled = false;
+ ih->rptr = 0;
+ }
+
+ return 0;
+}
+
+/**
+ * ih_v7_0_toggle_interrupts - Toggle all the available interrupt ring buffers
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable or disable interrupt ring buffers
+ *
+ * Toggle all the available interrupt ring buffers (IH_V7_0).
+ */
+static int ih_v7_0_toggle_interrupts(struct amdgpu_device *adev, bool enable)
+{
+ struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1};
+ int i;
+ int r;
+
+ for (i = 0; i < ARRAY_SIZE(ih); i++) {
+ if (ih[i]->ring_size) {
+ r = ih_v7_0_toggle_ring_interrupts(adev, ih[i], enable);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static uint32_t ih_v7_0_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl)
+{
+ int rb_bufsz = order_base_2(ih->ring_size / 4);
+
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ MC_SPACE, ih->use_bus_addr ? 2 : 4);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_OVERFLOW_CLEAR, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_OVERFLOW_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register
+ * value is written to memory
+ */
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_WRITEBACK_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SNOOP, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_RO, 0);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0);
+
+ return ih_rb_cntl;
+}
+
+static uint32_t ih_v7_0_doorbell_rptr(struct amdgpu_ih_ring *ih)
+{
+ u32 ih_doorbell_rtpr = 0;
+
+ if (ih->use_doorbell) {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR, OFFSET,
+ ih->doorbell_index);
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR,
+ ENABLE, 1);
+ } else {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR,
+ ENABLE, 0);
+ }
+ return ih_doorbell_rtpr;
+}
+
+/**
+ * ih_v7_0_enable_ring - enable an ih ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ * Enable an ih ring buffer (IH_V7_0)
+ */
+static int ih_v7_0_enable_ring(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ struct amdgpu_ih_regs *ih_regs;
+ uint32_t tmp;
+
+ ih_regs = &ih->ih_regs;
+
+ /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
+ WREG32(ih_regs->ih_rb_base, ih->gpu_addr >> 8);
+ WREG32(ih_regs->ih_rb_base_hi, (ih->gpu_addr >> 40) & 0xff);
+
+ tmp = RREG32(ih_regs->ih_rb_cntl);
+ tmp = ih_v7_0_rb_cntl(ih, tmp);
+ if (ih == &adev->irq.ih)
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled);
+ if (ih == &adev->irq.ih1) {
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1);
+ }
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) {
+ DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+ return -ETIMEDOUT;
+ }
+ } else {
+ WREG32(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ if (ih == &adev->irq.ih) {
+ /* set the ih ring 0 writeback address whether it's enabled or not */
+ WREG32(ih_regs->ih_rb_wptr_addr_lo, lower_32_bits(ih->wptr_addr));
+ WREG32(ih_regs->ih_rb_wptr_addr_hi, upper_32_bits(ih->wptr_addr) & 0xFFFF);
+ }
+
+ /* set rptr, wptr to 0 */
+ WREG32(ih_regs->ih_rb_wptr, 0);
+ WREG32(ih_regs->ih_rb_rptr, 0);
+
+ WREG32(ih_regs->ih_doorbell_rptr, ih_v7_0_doorbell_rptr(ih));
+
+ return 0;
+}
+
+/**
+ * ih_v7_0_irq_init - init and enable the interrupt ring
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate a ring buffer for the interrupt controller,
+ * enable the RLC, disable interrupts, enable the IH
+ * ring buffer and enable it.
+ * Called at device load and reume.
+ * Returns 0 for success, errors for failure.
+ */
+static int ih_v7_0_irq_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1};
+ u32 ih_chicken;
+ u32 tmp;
+ int ret;
+ int i;
+
+ /* disable irqs */
+ ret = ih_v7_0_toggle_interrupts(adev, false);
+ if (ret)
+ return ret;
+
+ adev->nbio.funcs->ih_control(adev);
+
+ if (unlikely((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
+ (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO))) {
+ if (ih[0]->use_bus_addr) {
+ ih_chicken = RREG32_SOC15(OSSSYS, 0, regIH_CHICKEN);
+ ih_chicken = REG_SET_FIELD(ih_chicken,
+ IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1);
+ WREG32_SOC15(OSSSYS, 0, regIH_CHICKEN, ih_chicken);
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(ih); i++) {
+ if (ih[i]->ring_size) {
+ ret = ih_v7_0_enable_ring(adev, ih[i]);
+ if (ret)
+ return ret;
+ }
+ }
+
+ /* update doorbell range for ih ring 0 */
+ adev->nbio.funcs->ih_doorbell_range(adev, ih[0]->use_doorbell,
+ ih[0]->doorbell_index);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL);
+ tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL,
+ CLIENT18_IS_STORM_CLIENT, 1);
+ WREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL, tmp);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL);
+ tmp = REG_SET_FIELD(tmp, IH_INT_FLOOD_CNTL, FLOOD_CNTL_ENABLE, 1);
+ WREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL, tmp);
+
+ /* GC/MMHUB UTCL2 page fault interrupts are configured as
+ * MSI storm capable interrupts by deafult. The delay is
+ * used to avoid ISR being called too frequently
+ * when page fault happens on several continuous page
+ * and thus avoid MSI storm */
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL);
+ tmp = REG_SET_FIELD(tmp, IH_MSI_STORM_CTRL,
+ DELAY, 3);
+ WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp);
+
+ /* Redirect the interrupts to IH RB1 for dGPU */
+ if (adev->irq.ih1.ring_size) {
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_INDEX, INDEX, 0);
+ WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX, tmp);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, CLIENT_ID, 0xa);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, SOURCE_ID, 0x0);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA,
+ SOURCE_ID_MATCH_ENABLE, 0x1);
+
+ WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA, tmp);
+ }
+
+ pci_set_master(adev->pdev);
+
+ /* enable interrupts */
+ ret = ih_v7_0_toggle_interrupts(adev, true);
+ if (ret)
+ return ret;
+ /* enable wptr force update for self int */
+ force_update_wptr_for_self_int(adev, 0, 8, true);
+
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
+ return 0;
+}
+
+/**
+ * ih_v7_0_irq_disable - disable interrupts
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable interrupts on the hw.
+ */
+static void ih_v7_0_irq_disable(struct amdgpu_device *adev)
+{
+ force_update_wptr_for_self_int(adev, 0, 8, false);
+ ih_v7_0_toggle_interrupts(adev, false);
+
+ /* Wait and acknowledge irq */
+ mdelay(1);
+}
+
+/**
+ * ih_v7_0_get_wptr() - get the IH ring buffer wptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring buffer to fetch wptr
+ *
+ * Get the IH ring buffer wptr from either the register
+ * or the writeback memory buffer. Also check for
+ * ring buffer overflow and deal with it.
+ * Returns the value of the wptr.
+ */
+static u32 ih_v7_0_get_wptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ u32 wptr, tmp;
+ struct amdgpu_ih_regs *ih_regs;
+
+ wptr = le32_to_cpu(*ih->wptr_cpu);
+ ih_regs = &ih->ih_regs;
+
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+
+ wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr);
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+ wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+
+ /* When a ring buffer overflow happen start parsing interrupt
+ * from the last not overwritten vector (wptr + 32). Hopefully
+ * this should allow us to catch up.
+ */
+ tmp = (wptr + 32) & ih->ptr_mask;
+ dev_warn(adev->dev, "IH ring buffer overflow "
+ "(0x%08X, 0x%08X, 0x%08X)\n",
+ wptr, ih->rptr, tmp);
+ ih->rptr = tmp;
+
+ tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+out:
+ return (wptr & ih->ptr_mask);
+}
+
+/**
+ * ih_v7_0_irq_rearm - rearm IRQ if lost
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring to match
+ *
+ */
+static void ih_v7_0_irq_rearm(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ uint32_t v = 0;
+ uint32_t i = 0;
+ struct amdgpu_ih_regs *ih_regs;
+
+ ih_regs = &ih->ih_regs;
+
+ /* Rearm IRQ / re-write doorbell if doorbell write is lost */
+ for (i = 0; i < MAX_REARM_RETRY; i++) {
+ v = RREG32_NO_KIQ(ih_regs->ih_rb_rptr);
+ if ((v < ih->ring_size) && (v != ih->rptr))
+ WDOORBELL32(ih->doorbell_index, ih->rptr);
+ else
+ break;
+ }
+}
+
+/**
+ * ih_v7_0_set_rptr - set the IH ring buffer rptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring buffer to set rptr
+ */
+static void ih_v7_0_set_rptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ struct amdgpu_ih_regs *ih_regs;
+
+ if (ih->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ *ih->rptr_cpu = ih->rptr;
+ WDOORBELL32(ih->doorbell_index, ih->rptr);
+
+ if (amdgpu_sriov_vf(adev))
+ ih_v7_0_irq_rearm(adev, ih);
+ } else {
+ ih_regs = &ih->ih_regs;
+ WREG32(ih_regs->ih_rb_rptr, ih->rptr);
+ }
+}
+
+/**
+ * ih_v7_0_self_irq - dispatch work for ring 1
+ *
+ * @adev: amdgpu_device pointer
+ * @source: irq source
+ * @entry: IV with WPTR update
+ *
+ * Update the WPTR from the IV and schedule work to handle the entries.
+ */
+static int ih_v7_0_self_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t wptr = cpu_to_le32(entry->src_data[0]);
+
+ switch (entry->ring_id) {
+ case 1:
+ *adev->irq.ih1.wptr_cpu = wptr;
+ schedule_work(&adev->irq.ih1_work);
+ break;
+ default: break;
+ }
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs ih_v7_0_self_irq_funcs = {
+ .process = ih_v7_0_self_irq,
+};
+
+static void ih_v7_0_set_self_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.self_irq.num_types = 0;
+ adev->irq.self_irq.funcs = &ih_v7_0_self_irq_funcs;
+}
+
+static int ih_v7_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ ih_v7_0_set_interrupt_funcs(adev);
+ ih_v7_0_set_self_irq_funcs(adev);
+ return 0;
+}
+
+static int ih_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+ bool use_bus_addr;
+
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_IH, 0,
+ &adev->irq.self_irq);
+
+ if (r)
+ return r;
+
+ /* use gpu virtual address for ih ring
+ * until ih_checken is programmed to allow
+ * use bus address for ih ring by psp bl */
+ use_bus_addr = adev->firmware.load_type != AMDGPU_FW_LOAD_PSP;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr);
+ if (r)
+ return r;
+
+ adev->irq.ih.use_doorbell = true;
+ adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1;
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, IH_RING_SIZE,
+ use_bus_addr);
+ if (r)
+ return r;
+
+ adev->irq.ih1.use_doorbell = true;
+ adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1;
+ }
+
+ /* initialize ih control register offset */
+ ih_v7_0_init_register_offset(adev);
+
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_init(adev);
+
+ return r;
+}
+
+static int ih_v7_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ amdgpu_irq_fini_sw(adev);
+
+ return 0;
+}
+
+static int ih_v7_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ r = ih_v7_0_irq_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int ih_v7_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ ih_v7_0_irq_disable(ip_block->adev);
+
+ return 0;
+}
+
+static int ih_v7_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return ih_v7_0_hw_fini(ip_block);
+}
+
+static int ih_v7_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ return ih_v7_0_hw_init(ip_block);
+}
+
+static bool ih_v7_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ /* todo */
+ return true;
+}
+
+static int ih_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ /* todo */
+ return -ETIMEDOUT;
+}
+
+static int ih_v7_0_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ /* todo */
+ return 0;
+}
+
+static void ih_v7_0_update_clockgating_state(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def, field_val;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) {
+ def = data = RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL);
+ field_val = enable ? 0 : 1;
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DBUS_MUX_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DYN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ REG_CLK_SOFT_OVERRIDE, field_val);
+ if (def != data)
+ WREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL, data);
+ }
+
+ return;
+}
+
+static int ih_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ ih_v7_0_update_clockgating_state(adev,
+ state == AMD_CG_STATE_GATE);
+ return 0;
+}
+
+static void ih_v7_0_update_ih_mem_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t ih_mem_pwr_cntl;
+
+ /* Disable ih sram power cntl before switch powergating mode */
+ ih_mem_pwr_cntl = RREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_CTRL_EN, 0);
+ WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
+
+ /* It is recommended to set mem powergating mode to DS mode */
+ if (enable) {
+ /* mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_DS_EN, 1);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_SD_EN, 0);
+ /* cam mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 1);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0);
+ /* re-enable power cntl */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_CTRL_EN, 1);
+ } else {
+ /* mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_DS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_SD_EN, 0);
+ /* cam mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0);
+ /* re-enable power cntl*/
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_CTRL_EN, 1);
+ }
+
+ WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
+}
+
+static int ih_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = (state == AMD_PG_STATE_GATE);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG)
+ ih_v7_0_update_ih_mem_power_gating(adev, enable);
+
+ return 0;
+}
+
+static void ih_v7_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (!RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL))
+ *flags |= AMD_CG_SUPPORT_IH_CG;
+
+ return;
+}
+
+static const struct amd_ip_funcs ih_v7_0_ip_funcs = {
+ .name = "ih_v7_0",
+ .early_init = ih_v7_0_early_init,
+ .sw_init = ih_v7_0_sw_init,
+ .sw_fini = ih_v7_0_sw_fini,
+ .hw_init = ih_v7_0_hw_init,
+ .hw_fini = ih_v7_0_hw_fini,
+ .suspend = ih_v7_0_suspend,
+ .resume = ih_v7_0_resume,
+ .is_idle = ih_v7_0_is_idle,
+ .wait_for_idle = ih_v7_0_wait_for_idle,
+ .soft_reset = ih_v7_0_soft_reset,
+ .set_clockgating_state = ih_v7_0_set_clockgating_state,
+ .set_powergating_state = ih_v7_0_set_powergating_state,
+ .get_clockgating_state = ih_v7_0_get_clockgating_state,
+};
+
+static const struct amdgpu_ih_funcs ih_v7_0_funcs = {
+ .get_wptr = ih_v7_0_get_wptr,
+ .decode_iv = amdgpu_ih_decode_iv_helper,
+ .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
+ .set_rptr = ih_v7_0_set_rptr
+};
+
+static void ih_v7_0_set_interrupt_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.ih_funcs = &ih_v7_0_funcs;
+}
+
+const struct amdgpu_ip_block_version ih_v7_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_IH,
+ .major = 7,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &ih_v7_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.h b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.h
new file mode 100644
index 000000000000..af9dcbc451fd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __IH_V7_0_IH_H__
+#define __IH_V7_0_IH_H__
+
+extern const struct amdgpu_ip_block_version ih_v7_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
new file mode 100644
index 000000000000..cc626036ed9c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
@@ -0,0 +1,393 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_imu.h"
+#include "amdgpu_dpm.h"
+
+#include "imu_v11_0_3.h"
+
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu_kicker.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_imu.bin");
+
+static int imu_v11_0_init_microcode(struct amdgpu_device *adev)
+{
+ char ucode_prefix[30];
+ int err;
+ const struct imu_firmware_header_v1_0 *imu_hdr;
+ struct amdgpu_firmware_info *info = NULL;
+
+ DRM_DEBUG("\n");
+
+ amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
+ if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_imu_kicker.bin", ucode_prefix);
+ else
+ err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_imu.bin", ucode_prefix);
+ if (err)
+ goto out;
+
+ imu_hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data;
+ //adev->gfx.imu_feature_version = le32_to_cpu(imu_hdr->ucode_feature_version);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_IMU_I];
+ info->ucode_id = AMDGPU_UCODE_ID_IMU_I;
+ info->fw = adev->gfx.imu_fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(imu_hdr->imu_iram_ucode_size_bytes), PAGE_SIZE);
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_IMU_D];
+ info->ucode_id = AMDGPU_UCODE_ID_IMU_D;
+ info->fw = adev->gfx.imu_fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(imu_hdr->imu_dram_ucode_size_bytes), PAGE_SIZE);
+ } else
+ adev->gfx.imu_fw_version = le32_to_cpu(imu_hdr->header.ucode_version);
+
+out:
+ if (err) {
+ dev_err(adev->dev,
+ "gfx11: Failed to load firmware \"%s_imu.bin\"\n",
+ ucode_prefix);
+ amdgpu_ucode_release(&adev->gfx.imu_fw);
+ }
+
+ return err;
+}
+
+static int imu_v11_0_load_microcode(struct amdgpu_device *adev)
+{
+ const struct imu_firmware_header_v1_0 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ if (!adev->gfx.imu_fw)
+ return -EINVAL;
+
+ hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data;
+ //amdgpu_ucode_print_rlc_hdr(&hdr->header);
+
+ fw_data = (const __le32 *)(adev->gfx.imu_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(hdr->imu_iram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_I_RAM_ADDR, 0);
+
+ for (i = 0; i < fw_size; i++)
+ WREG32_SOC15(GC, 0, regGFX_IMU_I_RAM_DATA, le32_to_cpup(fw_data++));
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_I_RAM_ADDR, adev->gfx.imu_fw_version);
+
+ fw_data = (const __le32 *)(adev->gfx.imu_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes) +
+ le32_to_cpu(hdr->imu_iram_ucode_size_bytes));
+ fw_size = le32_to_cpu(hdr->imu_dram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_D_RAM_ADDR, 0);
+
+ for (i = 0; i < fw_size; i++)
+ WREG32_SOC15(GC, 0, regGFX_IMU_D_RAM_DATA, le32_to_cpup(fw_data++));
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_D_RAM_ADDR, adev->gfx.imu_fw_version);
+
+ return 0;
+}
+
+static int imu_v11_0_wait_for_reset_status(struct amdgpu_device *adev)
+{
+ int i, imu_reg_val = 0;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_GFX_RESET_CTRL);
+ if ((imu_reg_val & 0x1f) == 0x1f)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "init imu: IMU start timeout\n");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static void imu_v11_0_setup(struct amdgpu_device *adev)
+{
+ int imu_reg_val;
+
+ //enable IMU debug mode
+ WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL0, 0xffffff);
+ WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL1, 0xffff);
+
+ if (adev->gfx.imu.mode == DEBUG_MODE) {
+ imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16);
+ imu_reg_val |= 0x1;
+ WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16, imu_reg_val);
+ }
+
+ //disable imu Rtavfs, SmsRepair, DfllBTC, and ClkB
+ imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10);
+ imu_reg_val |= 0x10007;
+ WREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10, imu_reg_val);
+}
+
+static int imu_v11_0_start(struct amdgpu_device *adev)
+{
+ int imu_reg_val;
+
+ //Start IMU by set GFX_IMU_CORE_CTRL.CRESET = 0
+ imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL);
+ imu_reg_val &= 0xfffffffe;
+ WREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL, imu_reg_val);
+
+ if (adev->flags & AMD_IS_APU)
+ amdgpu_dpm_set_gfx_power_up_by_imu(adev);
+
+ return imu_v11_0_wait_for_reset_status(adev);
+}
+
+static const struct imu_rlc_ram_golden imu_rlc_ram_golden_11[] =
+{
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_IO_RD_COMBINE_FLUSH, 0x00055555, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_IO_WR_COMBINE_FLUSH, 0x00055555, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_DRAM_COMBINE_FLUSH, 0x00555555, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_MISC2, 0x00001ffe, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_CREDITS , 0x003f3fff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_TAG_RESERVE1, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCC_RESERVE0, 0x00041000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCC_RESERVE1, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCD_RESERVE0, 0x00040000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCD_RESERVE1, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_MISC, 0x00000017, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_ENABLE, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_CREDITS , 0x003f3fbf, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_TAG_RESERVE0, 0x10201000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_TAG_RESERVE1, 0x00000080, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_VCC_RESERVE0, 0x1d041040, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_VCC_RESERVE1, 0x80000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_IO_PRIORITY, 0x88888888, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_MAM_CTRL, 0x0000d800, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_ARB_FINAL, 0x000003f7, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_ENABLE, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL2, 0x00020000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0000000c, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_CACHEABLE_DRAM_ADDRESS_END, 0x000fffff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_MISC, 0x0c48bff0, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SA_UNIT_DISABLE, 0x00fffc01, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_PRIM_CONFIG, 0x000fffe1, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_RB_BACKEND_DISABLE, 0x0fffff01, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xfffe0001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000500, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_START, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_END, 0x000fffff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_TOP_OF_DRAM_SLOT1, 0xff800000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_LOWER_TOP_OF_DRAM2, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_UPPER_TOP_OF_DRAM2, 0x00000fff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, 0x00001ffc, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000501, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL, 0x00080603, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL2, 0x00000003, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL3, 0x00100003, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL5, 0x00003fe0, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CONTEXT0_PER_PFVF_PTE_CACHE_FRAGMENT_SIZES, 0x00000c00, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CONTEXT1_PER_PFVF_PTE_CACHE_FRAGMENT_SIZES, 0x00000c00, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x00000545, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_0, 0x13455431, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_1, 0x13455431, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_2, 0x76027602, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_3, 0x76207620, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x00000345, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCUTCL2_HARVEST_BYPASS_GROUPS, 0x0000003e, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_BASE, 0x00006000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_TOP, 0x000061ff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0000000c, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BASE, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BOT, 0x00000002, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_TOP, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL2, 0x00020000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA0_UCODE_SELFLOAD_CONTROL, 0x00000210, 0),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA1_UCODE_SELFLOAD_CONTROL, 0x00000210, 0),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPC_PSP_DEBUG, CPC_PSP_DEBUG__GPA_OVERRIDE_MASK, 0),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPG_PSP_DEBUG, CPG_PSP_DEBUG__GPA_OVERRIDE_MASK, 0)
+};
+
+static const struct imu_rlc_ram_golden imu_rlc_ram_golden_11_0_2[] =
+{
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_MISC, 0x0c48bff0, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_CREDITS, 0x003f3fbf, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_TAG_RESERVE0, 0x10200800, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_TAG_RESERVE1, 0x00000088, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_VCC_RESERVE0, 0x1d041040, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_VCC_RESERVE1, 0x80000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_IO_PRIORITY, 0x88888888, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_MAM_CTRL, 0x0000d800, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_ARB_FINAL, 0x000007ef, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_DRAM_PAGE_BURST, 0x20080200, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_ENABLE, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0000000c, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_CACHEABLE_DRAM_ADDRESS_END, 0x000fffff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_IO_RD_COMBINE_FLUSH, 0x00055555, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_IO_WR_COMBINE_FLUSH, 0x00055555, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_DRAM_COMBINE_FLUSH, 0x00555555, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_MISC2, 0x00001ffe, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_CREDITS, 0x003f3fff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_TAG_RESERVE1, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCC_RESERVE0, 0x00041000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCC_RESERVE1, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCD_RESERVE0, 0x00040000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCD_RESERVE1, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_MISC, 0x00000017, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_ENABLE, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SA_UNIT_DISABLE, 0x00fffc01, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_PRIM_CONFIG, 0x000fffe1, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_RB_BACKEND_DISABLE, 0x00000f01, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xfffe0001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL1_PIPE_STEER, 0x000000e4, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCH_PIPE_STEER, 0x000000e4, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_0, 0x01231023, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x00000243, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCUTCL2_HARVEST_BYPASS_GROUPS, 0x00000002, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000500, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_START, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_END, 0x000001ff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_BASE, 0x00006000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_TOP, 0x000061ff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0000000c, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_TOP_OF_DRAM_SLOT1, 0xff800000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_LOWER_TOP_OF_DRAM2, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_UPPER_TOP_OF_DRAM2, 0x00000fff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BASE, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BOT, 0x00000002, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_TOP, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, 0x00001ffc, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL2, 0x00002825, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000501, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL, 0x00080603, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL2, 0x00000003, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL3, 0x00100003, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL5, 0x00003fe0, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CONTEXT0_PER_PFVF_PTE_CACHE_FRAGMENT_SIZES, 0x00000c00, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CONTEXT1_PER_PFVF_PTE_CACHE_FRAGMENT_SIZES, 0x00000c00, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA0_UCODE_SELFLOAD_CONTROL, 0x00000210, 0),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA1_UCODE_SELFLOAD_CONTROL, 0x00000210, 0),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPC_PSP_DEBUG, CPC_PSP_DEBUG__GPA_OVERRIDE_MASK, 0),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPG_PSP_DEBUG, CPG_PSP_DEBUG__GPA_OVERRIDE_MASK, 0)
+};
+
+static void program_imu_rlc_ram(struct amdgpu_device *adev,
+ const struct imu_rlc_ram_golden *regs,
+ const u32 array_size)
+{
+ const struct imu_rlc_ram_golden *entry;
+ u32 reg, data;
+ int i;
+
+ for (i = 0; i < array_size; ++i) {
+ entry = &regs[i];
+ reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
+ reg |= entry->addr_mask;
+
+ data = entry->data;
+ if (entry->reg == regGCMC_VM_AGP_BASE)
+ data = 0x00ffffff;
+ else if (entry->reg == regGCMC_VM_AGP_TOP)
+ data = 0x0;
+ else if (entry->reg == regGCMC_VM_FB_LOCATION_BASE)
+ data = adev->gmc.vram_start >> 24;
+ else if (entry->reg == regGCMC_VM_FB_LOCATION_TOP)
+ data = adev->gmc.vram_end >> 24;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, 0);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, reg);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, data);
+ }
+ //Indicate the latest entry
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, 0);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, 0);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, 0);
+}
+
+static void imu_v11_0_program_rlc_ram(struct amdgpu_device *adev)
+{
+ u32 reg_data;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX, 0x2);
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ program_imu_rlc_ram(adev, imu_rlc_ram_golden_11,
+ (const u32)ARRAY_SIZE(imu_rlc_ram_golden_11));
+ break;
+ case IP_VERSION(11, 0, 2):
+ program_imu_rlc_ram(adev, imu_rlc_ram_golden_11_0_2,
+ (const u32)ARRAY_SIZE(imu_rlc_ram_golden_11_0_2));
+ break;
+ case IP_VERSION(11, 0, 3):
+ imu_v11_0_3_program_rlc_ram(adev);
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ //Indicate the contents of the RAM are valid
+ reg_data = RREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX);
+ reg_data |= GFX_IMU_RLC_RAM_INDEX__RAM_VALID_MASK;
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX, reg_data);
+}
+
+const struct amdgpu_imu_funcs gfx_v11_0_imu_funcs = {
+ .init_microcode = imu_v11_0_init_microcode,
+ .load_microcode = imu_v11_0_load_microcode,
+ .setup_imu = imu_v11_0_setup,
+ .start_imu = imu_v11_0_start,
+ .program_rlc_ram = imu_v11_0_program_rlc_ram,
+ .wait_for_reset_status = imu_v11_0_wait_for_reset_status,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.h b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.h
new file mode 100644
index 000000000000..e71f96fc2f06
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __IMU_V11_0_H__
+#define __IMU_V11_0_H__
+
+extern const struct amdgpu_imu_funcs gfx_v11_0_imu_funcs;
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c
new file mode 100644
index 000000000000..fc69c1a29e23
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_imu.h"
+#include "imu_v11_0_3.h"
+
+#include "gc/gc_11_0_3_offset.h"
+#include "gc/gc_11_0_3_sh_mask.h"
+
+static const struct imu_rlc_ram_golden imu_rlc_ram_golden_11_0_3[] = {
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_IO_RD_COMBINE_FLUSH, 0x00055555, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_IO_WR_COMBINE_FLUSH, 0x00055555, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_DRAM_COMBINE_FLUSH, 0x00555555, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_MISC2, 0x00001ffe, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_CREDITS, 0x003f3fff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_TAG_RESERVE1, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCC_RESERVE0, 0x00041000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCC_RESERVE1, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCD_RESERVE0, 0x00040000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCD_RESERVE1, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_MISC, 0x00000017, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_ENABLE, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_CREDITS, 0x003f3fbf, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_TAG_RESERVE0, 0x10200800, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_TAG_RESERVE1, 0x00000088, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_VCC_RESERVE0, 0x1d041040, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_VCC_RESERVE1, 0x80000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_IO_PRIORITY, 0x88888888, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_MAM_CTRL, 0x0000d800, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_ARB_FINAL, 0x000007ff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_DRAM_PAGE_BURST, 0x20080200, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_ENABLE, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL2, 0x00020000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0000000c, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_CACHEABLE_DRAM_ADDRESS_END, 0x000fffff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_MISC, 0x0c48bff0, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SA_UNIT_DISABLE, 0x00fffc01, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_PRIM_CONFIG, 0x000fffe1, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_RB_BACKEND_DISABLE, 0xffffff01, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xfffe0001, 0x40000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xfffe0001, 0x42000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xffff0001, 0x44000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xffff0001, 0x46000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xffff0001, 0x48000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xffff0001, 0x4A000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCGTS_TCC_DISABLE, 0x00000001, 0x00000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_RATE_CONFIG, 0x00000001, 0x00000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_EDC_CONFIG, 0x00000001, 0x00000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000500, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_START, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_END, 0x000005ff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_BASE, 0x00006000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_TOP, 0x000065ff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_TOP_OF_DRAM_SLOT1, 0xff800000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_LOWER_TOP_OF_DRAM2, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_UPPER_TOP_OF_DRAM2, 0x00000fff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, 0x00001ffc, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000551, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL, 0x00080603, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL2, 0x00000003, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL3, 0x00100003, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL5, 0x00003fe0, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CONTEXT0_PER_PFVF_PTE_CACHE_FRAGMENT_SIZES, 0x00000c00, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CONTEXT1_PER_PFVF_PTE_CACHE_FRAGMENT_SIZES, 0x00000c00, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x00000444, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_0, 0x54105410, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_2, 0x76323276, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x00000244, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCUTCL2_HARVEST_BYPASS_GROUPS, 0x00000006, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0000000c, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BASE, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BOT, 0x00000002, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_TOP, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL2, 0x00020000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA0_UCODE_SELFLOAD_CONTROL, 0x00000210, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA1_UCODE_SELFLOAD_CONTROL, 0x00000210, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPC_PSP_DEBUG, CPC_PSP_DEBUG__GPA_OVERRIDE_MASK, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPG_PSP_DEBUG, CPG_PSP_DEBUG__GPA_OVERRIDE_MASK, 0xe0000000),
+};
+
+static void program_rlc_ram_register_setting(struct amdgpu_device *adev,
+ const struct imu_rlc_ram_golden *regs,
+ const u32 array_size)
+{
+ const struct imu_rlc_ram_golden *entry;
+ u32 reg, data;
+ int i;
+
+ for (i = 0; i < array_size; ++i) {
+ entry = &regs[i];
+ reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
+ reg |= entry->addr_mask;
+
+ data = entry->data;
+ if (entry->reg == regGCMC_VM_AGP_BASE)
+ data = 0x00ffffff;
+ else if (entry->reg == regGCMC_VM_AGP_TOP)
+ data = 0x0;
+ else if (entry->reg == regGCMC_VM_FB_LOCATION_BASE)
+ data = adev->gmc.vram_start >> 24;
+ else if (entry->reg == regGCMC_VM_FB_LOCATION_TOP)
+ data = adev->gmc.vram_end >> 24;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, 0);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, reg);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, data);
+ }
+ //Indicate the latest entry
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, 0);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, 0);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, 0);
+}
+
+void imu_v11_0_3_program_rlc_ram(struct amdgpu_device *adev)
+{
+ program_rlc_ram_register_setting(adev,
+ imu_rlc_ram_golden_11_0_3,
+ (const u32)ARRAY_SIZE(imu_rlc_ram_golden_11_0_3));
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.h b/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.h
new file mode 100644
index 000000000000..702be568f26b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __IMU_V11_0_3_H__
+#define __IMU_V11_0_3_H__
+
+void imu_v11_0_3_program_rlc_ram(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c
new file mode 100644
index 000000000000..58cd87db8061
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c
@@ -0,0 +1,406 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_imu.h"
+#include "amdgpu_dpm.h"
+
+#include "imu_v12_0.h"
+
+#include "gc/gc_12_0_0_offset.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "mmhub/mmhub_4_1_0_offset.h"
+
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_imu_kicker.bin");
+
+#define TRANSFER_RAM_MASK 0x001c0000
+
+static int imu_v12_0_init_microcode(struct amdgpu_device *adev)
+{
+ char ucode_prefix[30];
+ int err;
+ const struct imu_firmware_header_v1_0 *imu_hdr;
+ struct amdgpu_firmware_info *info = NULL;
+
+ DRM_DEBUG("\n");
+
+ amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
+ if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_imu_kicker.bin", ucode_prefix);
+ else
+ err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_imu.bin", ucode_prefix);
+ if (err)
+ goto out;
+
+ imu_hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data;
+ adev->gfx.imu_fw_version = le32_to_cpu(imu_hdr->header.ucode_version);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_IMU_I];
+ info->ucode_id = AMDGPU_UCODE_ID_IMU_I;
+ info->fw = adev->gfx.imu_fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(imu_hdr->imu_iram_ucode_size_bytes), PAGE_SIZE);
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_IMU_D];
+ info->ucode_id = AMDGPU_UCODE_ID_IMU_D;
+ info->fw = adev->gfx.imu_fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(imu_hdr->imu_dram_ucode_size_bytes), PAGE_SIZE);
+ }
+
+out:
+ if (err) {
+ dev_err(adev->dev,
+ "gfx12: Failed to load firmware \"%s_imu.bin\"\n",
+ ucode_prefix);
+ amdgpu_ucode_release(&adev->gfx.imu_fw);
+ }
+
+ return err;
+}
+
+static int imu_v12_0_load_microcode(struct amdgpu_device *adev)
+{
+ const struct imu_firmware_header_v1_0 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ if (!adev->gfx.imu_fw)
+ return -EINVAL;
+
+ hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data;
+
+ fw_data = (const __le32 *)(adev->gfx.imu_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(hdr->imu_iram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_I_RAM_ADDR, 0);
+
+ for (i = 0; i < fw_size; i++)
+ WREG32_SOC15(GC, 0, regGFX_IMU_I_RAM_DATA, le32_to_cpup(fw_data++));
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_I_RAM_ADDR, adev->gfx.imu_fw_version);
+
+ fw_data = (const __le32 *)(adev->gfx.imu_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes) +
+ le32_to_cpu(hdr->imu_iram_ucode_size_bytes));
+ fw_size = le32_to_cpu(hdr->imu_dram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_D_RAM_ADDR, 0);
+
+ for (i = 0; i < fw_size; i++)
+ WREG32_SOC15(GC, 0, regGFX_IMU_D_RAM_DATA, le32_to_cpup(fw_data++));
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_D_RAM_ADDR, adev->gfx.imu_fw_version);
+
+ return 0;
+}
+
+static int imu_v12_0_wait_for_reset_status(struct amdgpu_device *adev)
+{
+ u32 imu_reg_val = 0;
+ int i;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_GFX_RESET_CTRL);
+ if ((imu_reg_val & 0x1f) == 0x1f)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "init imu: IMU start timeout\n");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static void imu_v12_0_setup(struct amdgpu_device *adev)
+{
+ u32 imu_reg_val;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL0, 0xffffff);
+ WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL1, 0xffff);
+
+ if (adev->gfx.imu.mode == DEBUG_MODE) {
+ imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16);
+ imu_reg_val |= 0x1;
+ WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16, imu_reg_val);
+
+ imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10);
+ imu_reg_val |= 0x20010007;
+ WREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10, imu_reg_val);
+
+ }
+}
+
+static int imu_v12_0_start(struct amdgpu_device *adev)
+{
+ u32 imu_reg_val;
+
+ imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL);
+ imu_reg_val &= 0xfffffffe;
+ WREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL, imu_reg_val);
+
+ if (adev->flags & AMD_IS_APU)
+ amdgpu_dpm_set_gfx_power_up_by_imu(adev);
+
+ return imu_v12_0_wait_for_reset_status(adev);
+}
+
+static const struct imu_rlc_ram_golden imu_rlc_ram_golden_12_0_1[] = {
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCH_PIPE_STEER, 0x1e4, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL1X_PIPE_STEER, 0x1e4, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL1_PIPE_STEER, 0x1e4, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_0, 0x13571357, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_1, 0x64206420, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_2, 0x2460246, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_3, 0x75317531, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xc0d41183, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA0_CHICKEN_BITS, 0x507d1c0, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA1_CHICKEN_BITS, 0x507d1c0, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCP_RB_WPTR_POLL_CNTL, 0x600100, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_CREDITS, 0x3f7fff, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_CREDITS, 0x3f7ebf, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_TAG_RESERVE0, 0x2e00000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_TAG_RESERVE1, 0x1a078, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_TAG_RESERVE2, 0x0, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_TAG_RESERVE0, 0x0, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_TAG_RESERVE1, 0x12030, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_TAG_RESERVE2, 0x0, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_VCC_RESERVE0, 0x19041000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_VCC_RESERVE1, 0x80000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_VCC_RESERVE0, 0x1e080000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_VCC_RESERVE1, 0x80000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_PRIORITY, 0x880, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_PRIORITY, 0x8880, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_ARB_FINAL, 0x17, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_ARB_FINAL, 0x77, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_ENABLE, 0x00000001, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_ENABLE, 0x00000001, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL2, 0x20000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0c, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_CACHEABLE_DRAM_ADDRESS_END, 0xfffff, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_MISC, 0x0091, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_MISC, 0x0091, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0xe0000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCR_GENERAL_CNTL, 0x00008500, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0x00880007, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regTD_CNTL, 0x00000001, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0x00000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regRMI_GENERAL_CNTL, 0x01e00000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0x00000001, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regRMI_GENERAL_CNTL, 0x01e00000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0x00000100, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regRMI_GENERAL_CNTL, 0x01e00000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0x00000101, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regRMI_GENERAL_CNTL, 0x01e00000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0xe0000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x08200545, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBMH_CP_PERFMON_CNTL, 0x00000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCB_PERFCOUNTER0_SELECT1, 0x000fffff, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCP_DEBUG_2, 0x00020000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCP_CPC_DEBUG, 0x00500010, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000500, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x00000001, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0x00000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_START, 0x00000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_END, 0x0000000f, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_BASE, 0x00006000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_TOP, 0x0000600f, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_TOP_OF_DRAM_SLOT1, 0xff800000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_LOWER_TOP_OF_DRAM2, 0x00000001, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_UPPER_TOP_OF_DRAM2, 0x0000ffff, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BASE, 0x00000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BOT, 0x00000002, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_TOP, 0x00000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, 0x00001ffc, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000551, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL, 0x00080603, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL2, 0x00000003, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL3, 0x00100003, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL5, 0x00003fe0, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x0003d000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0x0003d7ff, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, 0, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, 0, 0x1c0000)
+};
+
+static void program_imu_rlc_ram_old(struct amdgpu_device *adev,
+ const struct imu_rlc_ram_golden *regs,
+ const u32 array_size)
+{
+ const struct imu_rlc_ram_golden *entry;
+ u32 reg, data;
+ int i;
+
+ for (i = 0; i < array_size; ++i) {
+ entry = &regs[i];
+ reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
+ reg |= entry->addr_mask;
+ data = entry->data;
+ if (entry->reg == regGCMC_VM_AGP_BASE)
+ data = 0x00ffffff;
+ else if (entry->reg == regGCMC_VM_AGP_TOP)
+ data = 0x0;
+ else if (entry->reg == regGCMC_VM_FB_LOCATION_BASE)
+ data = adev->gmc.vram_start >> 24;
+ else if (entry->reg == regGCMC_VM_FB_LOCATION_TOP)
+ data = adev->gmc.vram_end >> 24;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, 0);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, reg);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, data);
+ }
+}
+
+static u32 imu_v12_0_grbm_gfx_index_remap(struct amdgpu_device *adev,
+ u32 data, bool high)
+{
+ u32 val, inst_index;
+
+ inst_index = REG_GET_FIELD(data, GRBM_GFX_INDEX, INSTANCE_INDEX);
+
+ if (high)
+ val = inst_index >> 5;
+ else
+ val = REG_GET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES) << 18 |
+ REG_GET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES) << 19 |
+ REG_GET_FIELD(data, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES) << 20 |
+ REG_GET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX) << 21 |
+ REG_GET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX) << 25 |
+ (inst_index & 0x1f);
+
+ return val;
+}
+
+static u32 imu_v12_init_gfxhub_settings(struct amdgpu_device *adev,
+ u32 reg, u32 data)
+{
+ if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_FB_LOCATION_BASE))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_FB_LOCATION_TOP))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_TOP);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_FB_OFFSET))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_AGP_BASE))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_AGP_BOT))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_AGP_TOP))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_START))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_LOCAL_FB_ADDRESS_START);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_END))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_LOCAL_FB_ADDRESS_END);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_LOCAL_SYSMEM_ADDRESS_START))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_LOCAL_SYSMEM_ADDRESS_START);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_LOCAL_SYSMEM_ADDRESS_END))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_LOCAL_SYSMEM_ADDRESS_END);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB);
+ else
+ return data;
+}
+
+static void program_imu_rlc_ram(struct amdgpu_device *adev,
+ const u32 *regs,
+ const u32 array_size)
+{
+ u32 reg, data, val_h = 0, val_l = TRANSFER_RAM_MASK;
+ int i;
+
+ if (array_size % 3)
+ return;
+
+ for (i = 0; i < array_size; i += 3) {
+ reg = regs[i + 0];
+ data = regs[i + 2];
+ data = imu_v12_init_gfxhub_settings(adev, reg, data);
+ if (reg == SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX)) {
+ val_l = imu_v12_0_grbm_gfx_index_remap(adev, data, false);
+ val_h = imu_v12_0_grbm_gfx_index_remap(adev, data, true);
+ } else {
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, val_h);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, reg | val_l);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, data);
+ }
+ }
+}
+
+static void imu_v12_0_program_rlc_ram(struct amdgpu_device *adev)
+{
+ u32 reg_data, size = 0;
+ const u32 *data = NULL;
+ int r = -EINVAL;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX, 0x2);
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ if (!r)
+ program_imu_rlc_ram(adev, data, (const u32)size);
+ else
+ program_imu_rlc_ram_old(adev, imu_rlc_ram_golden_12_0_1,
+ (const u32)ARRAY_SIZE(imu_rlc_ram_golden_12_0_1));
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ //Indicate the latest entry
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, 0);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, 0);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, 0);
+
+ reg_data = RREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX);
+ reg_data |= GFX_IMU_RLC_RAM_INDEX__RAM_VALID_MASK;
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX, reg_data);
+}
+
+const struct amdgpu_imu_funcs gfx_v12_0_imu_funcs = {
+ .init_microcode = imu_v12_0_init_microcode,
+ .load_microcode = imu_v12_0_load_microcode,
+ .setup_imu = imu_v12_0_setup,
+ .start_imu = imu_v12_0_start,
+ .program_rlc_ram = imu_v12_0_program_rlc_ram,
+ .wait_for_reset_status = imu_v12_0_wait_for_reset_status,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.h b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.h
new file mode 100644
index 000000000000..a1f50cb1aeab
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __IMU_V12_0_H__
+#define __IMU_V12_0_H__
+
+extern const struct amdgpu_imu_funcs gfx_v12_0_imu_funcs;
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c
new file mode 100644
index 000000000000..0027a639c7e6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include "amdgpu.h"
+#include "isp_v4_1_0.h"
+
+static const unsigned int isp_4_1_0_int_srcid[MAX_ISP410_INT_SRC] = {
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT9,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT10,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT11,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT12,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT13,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT14,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT15,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT16
+};
+
+static int isp_v4_1_0_hw_init(struct amdgpu_isp *isp)
+{
+ struct amdgpu_device *adev = isp->adev;
+ int idx, int_idx, num_res, r;
+ u64 isp_base;
+
+ if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289)
+ return -EINVAL;
+
+ isp_base = adev->rmmio_base;
+
+ isp->isp_cell = kcalloc(3, sizeof(struct mfd_cell), GFP_KERNEL);
+ if (!isp->isp_cell) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev,
+ "%s: isp mfd cell alloc failed\n", __func__);
+ goto failure;
+ }
+
+ num_res = MAX_ISP410_MEM_RES + MAX_ISP410_INT_SRC;
+ isp->isp_res = kcalloc(num_res, sizeof(struct resource),
+ GFP_KERNEL);
+ if (!isp->isp_res) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev,
+ "%s: isp mfd res alloc failed\n", __func__);
+ goto failure;
+ }
+
+ isp->isp_pdata = kzalloc(sizeof(*isp->isp_pdata), GFP_KERNEL);
+ if (!isp->isp_pdata) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev,
+ "%s: isp platform data alloc failed\n", __func__);
+ goto failure;
+ }
+
+ /* initialize isp platform data */
+ isp->isp_pdata->adev = (void *)adev;
+ isp->isp_pdata->asic_type = adev->asic_type;
+ isp->isp_pdata->base_rmmio_size = adev->rmmio_size;
+
+ isp->isp_res[0].name = "isp_4_1_0_reg";
+ isp->isp_res[0].flags = IORESOURCE_MEM;
+ isp->isp_res[0].start = isp_base;
+ isp->isp_res[0].end = isp_base + ISP_REGS_OFFSET_END;
+
+ isp->isp_res[1].name = "isp_4_1_phy0_reg";
+ isp->isp_res[1].flags = IORESOURCE_MEM;
+ isp->isp_res[1].start = isp_base + ISP410_PHY0_OFFSET;
+ isp->isp_res[1].end = isp_base + ISP410_PHY0_OFFSET + ISP410_PHY0_SIZE;
+
+ for (idx = MAX_ISP410_MEM_RES, int_idx = 0; idx < num_res; idx++, int_idx++) {
+ isp->isp_res[idx].name = "isp_4_1_0_irq";
+ isp->isp_res[idx].flags = IORESOURCE_IRQ;
+ isp->isp_res[idx].start =
+ amdgpu_irq_create_mapping(adev, isp_4_1_0_int_srcid[int_idx]);
+ isp->isp_res[idx].end =
+ isp->isp_res[idx].start;
+ }
+
+ isp->isp_cell[0].name = "amd_isp_capture";
+ isp->isp_cell[0].num_resources = num_res;
+ isp->isp_cell[0].resources = &isp->isp_res[0];
+ isp->isp_cell[0].platform_data = isp->isp_pdata;
+ isp->isp_cell[0].pdata_size = sizeof(struct isp_platform_data);
+
+ /* initialize isp i2c platform data */
+ isp->isp_i2c_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL);
+ if (!isp->isp_i2c_res) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev,
+ "%s: isp mfd res alloc failed\n", __func__);
+ goto failure;
+ }
+
+ isp->isp_i2c_res[0].name = "isp_i2c0_reg";
+ isp->isp_i2c_res[0].flags = IORESOURCE_MEM;
+ isp->isp_i2c_res[0].start = isp_base + ISP410_I2C0_OFFSET;
+ isp->isp_i2c_res[0].end = isp_base + ISP410_I2C0_OFFSET + ISP410_I2C0_SIZE;
+
+ isp->isp_cell[1].name = "amd_isp_i2c_designware";
+ isp->isp_cell[1].num_resources = 1;
+ isp->isp_cell[1].resources = &isp->isp_i2c_res[0];
+ isp->isp_cell[1].platform_data = isp->isp_pdata;
+ isp->isp_cell[1].pdata_size = sizeof(struct isp_platform_data);
+
+ /* initialize isp gpiochip platform data */
+ isp->isp_gpio_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL);
+ if (!isp->isp_gpio_res) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev,
+ "%s: isp gpio res alloc failed\n", __func__);
+ goto failure;
+ }
+
+ isp->isp_gpio_res[0].name = "isp_gpio_reg";
+ isp->isp_gpio_res[0].flags = IORESOURCE_MEM;
+ isp->isp_gpio_res[0].start = isp_base + ISP410_GPIO_SENSOR_OFFSET;
+ isp->isp_gpio_res[0].end = isp_base + ISP410_GPIO_SENSOR_OFFSET +
+ ISP410_GPIO_SENSOR_SIZE;
+
+ isp->isp_cell[2].name = "amdisp-pinctrl";
+ isp->isp_cell[2].num_resources = 1;
+ isp->isp_cell[2].resources = &isp->isp_gpio_res[0];
+ isp->isp_cell[2].platform_data = isp->isp_pdata;
+ isp->isp_cell[2].pdata_size = sizeof(struct isp_platform_data);
+
+ r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 3);
+ if (r) {
+ drm_err(&adev->ddev,
+ "%s: add mfd hotplug device failed\n", __func__);
+ goto failure;
+ }
+
+ return 0;
+
+failure:
+
+ kfree(isp->isp_pdata);
+ kfree(isp->isp_res);
+ kfree(isp->isp_cell);
+ kfree(isp->isp_i2c_res);
+ kfree(isp->isp_gpio_res);
+
+ return r;
+}
+
+static int isp_v4_1_0_hw_fini(struct amdgpu_isp *isp)
+{
+ mfd_remove_devices(isp->parent);
+
+ kfree(isp->isp_res);
+ kfree(isp->isp_cell);
+ kfree(isp->isp_pdata);
+ kfree(isp->isp_i2c_res);
+ kfree(isp->isp_gpio_res);
+
+ return 0;
+}
+
+static const struct isp_funcs isp_v4_1_0_funcs = {
+ .hw_init = isp_v4_1_0_hw_init,
+ .hw_fini = isp_v4_1_0_hw_fini,
+};
+
+void isp_v4_1_0_set_isp_funcs(struct amdgpu_isp *isp)
+{
+ isp->funcs = &isp_v4_1_0_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h
new file mode 100644
index 000000000000..4d239198edd0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#ifndef __ISP_V4_1_0_H__
+#define __ISP_V4_1_0_H__
+
+#include "amdgpu_isp.h"
+
+#include "ivsrcid/isp/irqsrcs_isp_4_1.h"
+
+#define MAX_ISP410_MEM_RES 2
+#define MAX_ISP410_SENSOR_RES 1
+#define MAX_ISP410_INT_SRC 8
+
+#define ISP410_PHY0_OFFSET 0x66700
+#define ISP410_PHY0_SIZE 0xD30
+
+#define ISP410_I2C0_OFFSET 0x66400
+#define ISP410_I2C0_SIZE 0x100
+
+#define ISP410_GPIO_SENSOR_OFFSET 0x6613C
+#define ISP410_GPIO_SENSOR_SIZE 0x54
+
+void isp_v4_1_0_set_isp_funcs(struct amdgpu_isp *isp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c
new file mode 100644
index 000000000000..4258d3e0b706
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c
@@ -0,0 +1,377 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include <linux/gpio/machine.h>
+#include "amdgpu.h"
+#include "isp_v4_1_1.h"
+
+MODULE_FIRMWARE("amdgpu/isp_4_1_1.bin");
+
+#define ISP_PERFORMANCE_STATE_LOW 0
+#define ISP_PERFORMANCE_STATE_HIGH 1
+
+#define ISP_HIGH_PERFORMANC_XCLK 788
+#define ISP_HIGH_PERFORMANC_ICLK 788
+
+static const unsigned int isp_4_1_1_int_srcid[MAX_ISP411_INT_SRC] = {
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT9,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT10,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT11,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT12,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT13,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT14,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT15,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT16
+};
+
+static struct gpiod_lookup_table isp_gpio_table = {
+ .dev_id = "amd_isp_capture",
+ .table = {
+ GPIO_LOOKUP("AMDI0030:00", 85, "enable_isp", GPIO_ACTIVE_HIGH),
+ { }
+ },
+};
+
+static struct gpiod_lookup_table isp_sensor_gpio_table = {
+ .dev_id = "i2c-ov05c10",
+ .table = {
+ GPIO_LOOKUP("amdisp-pinctrl", 0, "enable", GPIO_ACTIVE_HIGH),
+ { }
+ },
+};
+
+static int isp_poweroff(struct generic_pm_domain *genpd)
+{
+ struct amdgpu_isp *isp = container_of(genpd, struct amdgpu_isp, ispgpd);
+ struct amdgpu_device *adev = isp->adev;
+
+ return amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ISP, true, 0);
+}
+
+static int isp_poweron(struct generic_pm_domain *genpd)
+{
+ struct amdgpu_isp *isp = container_of(genpd, struct amdgpu_isp, ispgpd);
+ struct amdgpu_device *adev = isp->adev;
+
+ return amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ISP, false, 0);
+}
+
+static int isp_set_performance_state(struct generic_pm_domain *genpd,
+ unsigned int state)
+{
+ struct amdgpu_isp *isp = container_of(genpd, struct amdgpu_isp, ispgpd);
+ struct amdgpu_device *adev = isp->adev;
+ u32 iclk, xclk;
+ int ret;
+
+ switch (state) {
+ case ISP_PERFORMANCE_STATE_HIGH:
+ xclk = ISP_HIGH_PERFORMANC_XCLK;
+ iclk = ISP_HIGH_PERFORMANC_ICLK;
+ break;
+ case ISP_PERFORMANCE_STATE_LOW:
+ /* isp runs at default lowest clock-rate on power-on, do nothing */
+ return 0;
+ default:
+ return -EINVAL;
+ }
+
+ ret = amdgpu_dpm_set_soft_freq_range(adev, PP_ISPXCLK, xclk, 0);
+ if (ret) {
+ drm_err(&adev->ddev, "failed to set xclk %u to %u: %d\n",
+ xclk, state, ret);
+ return ret;
+ }
+
+ ret = amdgpu_dpm_set_soft_freq_range(adev, PP_ISPICLK, iclk, 0);
+ if (ret) {
+ drm_err(&adev->ddev, "failed to set iclk %u to %u: %d\n",
+ iclk, state, ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int isp_genpd_add_device(struct device *dev, void *data)
+{
+ struct generic_pm_domain *gpd = data;
+ struct platform_device *pdev = container_of(dev, struct platform_device, dev);
+ struct amdgpu_isp *isp = container_of(gpd, struct amdgpu_isp, ispgpd);
+ struct amdgpu_device *adev = isp->adev;
+ int ret;
+
+ if (!pdev)
+ return -EINVAL;
+
+ if (!dev->type->name) {
+ drm_dbg(&adev->ddev, "Invalid device type to add\n");
+ goto exit;
+ }
+
+ if (strcmp(dev->type->name, "mfd_device")) {
+ drm_dbg(&adev->ddev, "Invalid isp mfd device %s to add\n", pdev->mfd_cell->name);
+ goto exit;
+ }
+
+ ret = pm_genpd_add_device(gpd, dev);
+ if (ret) {
+ drm_err(&adev->ddev, "Failed to add dev %s to genpd %d\n",
+ pdev->mfd_cell->name, ret);
+ return -ENODEV;
+ }
+
+exit:
+ /* Continue to add */
+ return 0;
+}
+
+static int isp_genpd_remove_device(struct device *dev, void *data)
+{
+ struct generic_pm_domain *gpd = data;
+ struct platform_device *pdev = container_of(dev, struct platform_device, dev);
+ struct amdgpu_isp *isp = container_of(gpd, struct amdgpu_isp, ispgpd);
+ struct amdgpu_device *adev = isp->adev;
+ int ret;
+
+ if (!pdev)
+ return -EINVAL;
+
+ if (!dev->type->name) {
+ drm_dbg(&adev->ddev, "Invalid device type to remove\n");
+ goto exit;
+ }
+
+ if (strcmp(dev->type->name, "mfd_device")) {
+ drm_dbg(&adev->ddev, "Invalid isp mfd device %s to remove\n",
+ pdev->mfd_cell->name);
+ goto exit;
+ }
+
+ ret = pm_genpd_remove_device(dev);
+ if (ret) {
+ drm_err(&adev->ddev, "Failed to remove dev from genpd %d\n", ret);
+ return -ENODEV;
+ }
+
+exit:
+ /* Continue to remove */
+ return 0;
+}
+
+static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp)
+{
+ const struct software_node *amd_camera_node, *isp4_node;
+ struct amdgpu_device *adev = isp->adev;
+ struct acpi_device *acpi_dev;
+ int idx, int_idx, num_res, r;
+ u64 isp_base;
+
+ if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289)
+ return -EINVAL;
+
+ r = amdgpu_acpi_get_isp4_dev(&acpi_dev);
+ if (r) {
+ drm_dbg(&adev->ddev, "Invalid isp platform detected (%d)", r);
+ /* allow GPU init to progress */
+ return 0;
+ }
+
+ /* add GPIO resources required for OMNI5C10 sensor */
+ if (!strcmp("OMNI5C10", acpi_device_hid(acpi_dev))) {
+ gpiod_add_lookup_table(&isp_gpio_table);
+ gpiod_add_lookup_table(&isp_sensor_gpio_table);
+ }
+
+ isp_base = adev->rmmio_base;
+
+ isp->ispgpd.name = "ISP_v_4_1_1";
+ isp->ispgpd.power_off = isp_poweroff;
+ isp->ispgpd.power_on = isp_poweron;
+ isp->ispgpd.set_performance_state = isp_set_performance_state;
+
+ r = pm_genpd_init(&isp->ispgpd, NULL, true);
+ if (r) {
+ drm_err(&adev->ddev, "failed to initialize genpd (%d)\n", r);
+ return -EINVAL;
+ }
+
+ isp->isp_cell = kcalloc(3, sizeof(struct mfd_cell), GFP_KERNEL);
+ if (!isp->isp_cell) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev, "isp mfd cell alloc failed (%d)\n", r);
+ goto failure;
+ }
+
+ num_res = MAX_ISP411_MEM_RES + MAX_ISP411_INT_SRC;
+
+ isp->isp_res = kcalloc(num_res, sizeof(struct resource),
+ GFP_KERNEL);
+ if (!isp->isp_res) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev, "isp mfd resource alloc failed (%d)\n", r);
+ goto failure;
+ }
+
+ isp->isp_pdata = kzalloc(sizeof(*isp->isp_pdata), GFP_KERNEL);
+ if (!isp->isp_pdata) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev, "isp platform data alloc failed (%d)\n", r);
+ goto failure;
+ }
+
+ amd_camera_node = (const struct software_node *)acpi_dev->driver_data;
+ isp4_node = software_node_find_by_name(amd_camera_node, "isp4");
+
+ /* initialize isp platform data */
+ isp->isp_pdata->adev = (void *)adev;
+ isp->isp_pdata->asic_type = adev->asic_type;
+ isp->isp_pdata->base_rmmio_size = adev->rmmio_size;
+
+ isp->isp_res[0].name = "isp_4_1_1_reg";
+ isp->isp_res[0].flags = IORESOURCE_MEM;
+ isp->isp_res[0].start = isp_base;
+ isp->isp_res[0].end = isp_base + ISP_REGS_OFFSET_END;
+
+ isp->isp_res[1].name = "isp_4_1_1_phy0_reg";
+ isp->isp_res[1].flags = IORESOURCE_MEM;
+ isp->isp_res[1].start = isp_base + ISP411_PHY0_OFFSET;
+ isp->isp_res[1].end = isp_base + ISP411_PHY0_OFFSET + ISP411_PHY0_SIZE;
+
+ for (idx = MAX_ISP411_MEM_RES, int_idx = 0; idx < num_res; idx++, int_idx++) {
+ isp->isp_res[idx].name = "isp_4_1_1_irq";
+ isp->isp_res[idx].flags = IORESOURCE_IRQ;
+ isp->isp_res[idx].start =
+ amdgpu_irq_create_mapping(adev, isp_4_1_1_int_srcid[int_idx]);
+ isp->isp_res[idx].end =
+ isp->isp_res[idx].start;
+ }
+
+ isp->isp_cell[0].name = "amd_isp_capture";
+ isp->isp_cell[0].num_resources = num_res;
+ isp->isp_cell[0].resources = &isp->isp_res[0];
+ isp->isp_cell[0].platform_data = isp->isp_pdata;
+ isp->isp_cell[0].swnode = isp4_node;
+ isp->isp_cell[0].pdata_size = sizeof(struct isp_platform_data);
+
+ /* initialize isp i2c platform data */
+ isp->isp_i2c_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL);
+ if (!isp->isp_i2c_res) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev, "isp mfd res alloc failed (%d)\n", r);
+ goto failure;
+ }
+
+ isp->isp_i2c_res[0].name = "isp_i2c0_reg";
+ isp->isp_i2c_res[0].flags = IORESOURCE_MEM;
+ isp->isp_i2c_res[0].start = isp_base + ISP411_I2C0_OFFSET;
+ isp->isp_i2c_res[0].end = isp_base + ISP411_I2C0_OFFSET + ISP411_I2C0_SIZE;
+
+ isp->isp_cell[1].name = "amd_isp_i2c_designware";
+ isp->isp_cell[1].num_resources = 1;
+ isp->isp_cell[1].resources = &isp->isp_i2c_res[0];
+ isp->isp_cell[1].platform_data = isp->isp_pdata;
+ isp->isp_cell[1].pdata_size = sizeof(struct isp_platform_data);
+
+ /* initialize isp gpiochip platform data */
+ isp->isp_gpio_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL);
+ if (!isp->isp_gpio_res) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev, "isp gpio resource alloc failed (%d)\n", r);
+ goto failure;
+ }
+
+ isp->isp_gpio_res[0].name = "isp_gpio_reg";
+ isp->isp_gpio_res[0].flags = IORESOURCE_MEM;
+ isp->isp_gpio_res[0].start = isp_base + ISP411_GPIO_SENSOR_OFFSET;
+ isp->isp_gpio_res[0].end = isp_base + ISP411_GPIO_SENSOR_OFFSET +
+ ISP411_GPIO_SENSOR_SIZE;
+
+ isp->isp_cell[2].name = "amdisp-pinctrl";
+ isp->isp_cell[2].num_resources = 1;
+ isp->isp_cell[2].resources = &isp->isp_gpio_res[0];
+ isp->isp_cell[2].platform_data = isp->isp_pdata;
+ isp->isp_cell[2].pdata_size = sizeof(struct isp_platform_data);
+
+ /* add only amd_isp_capture and amd_isp_i2c_designware to genpd */
+ r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 2);
+ if (r) {
+ drm_err(&adev->ddev, "add mfd hotplug device failed (%d)\n", r);
+ goto failure;
+ }
+
+ r = device_for_each_child(isp->parent, &isp->ispgpd,
+ isp_genpd_add_device);
+ if (r) {
+ drm_err(&adev->ddev, "failed to add devices to genpd (%d)\n", r);
+ goto failure;
+ }
+
+ r = mfd_add_hotplug_devices(isp->parent, &isp->isp_cell[2], 1);
+ if (r) {
+ drm_err(&adev->ddev, "add pinctl hotplug device failed (%d)\n", r);
+ goto failure;
+ }
+
+ return 0;
+
+failure:
+
+ kfree(isp->isp_pdata);
+ kfree(isp->isp_res);
+ kfree(isp->isp_cell);
+ kfree(isp->isp_i2c_res);
+ kfree(isp->isp_gpio_res);
+
+ return r;
+}
+
+static int isp_v4_1_1_hw_fini(struct amdgpu_isp *isp)
+{
+ device_for_each_child(isp->parent, NULL,
+ isp_genpd_remove_device);
+
+ mfd_remove_devices(isp->parent);
+
+ kfree(isp->isp_res);
+ kfree(isp->isp_cell);
+ kfree(isp->isp_pdata);
+ kfree(isp->isp_i2c_res);
+ kfree(isp->isp_gpio_res);
+
+ return 0;
+}
+
+static const struct isp_funcs isp_v4_1_1_funcs = {
+ .hw_init = isp_v4_1_1_hw_init,
+ .hw_fini = isp_v4_1_1_hw_fini,
+};
+
+void isp_v4_1_1_set_isp_funcs(struct amdgpu_isp *isp)
+{
+ isp->funcs = &isp_v4_1_1_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h
new file mode 100644
index 000000000000..fe45d70d87f1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#ifndef __ISP_V4_1_1_H__
+#define __ISP_V4_1_1_H__
+
+#include "amdgpu_isp.h"
+
+#include "ivsrcid/isp/irqsrcs_isp_4_1.h"
+
+#define MAX_ISP411_MEM_RES 2
+#define MAX_ISP411_INT_SRC 8
+
+#define ISP411_PHY0_OFFSET 0x66700
+#define ISP411_PHY0_SIZE 0xD30
+
+#define ISP411_I2C0_OFFSET 0x66400
+#define ISP411_I2C0_SIZE 0x100
+
+#define ISP411_GPIO_SENSOR_OFFSET 0x6613C
+#define ISP411_GPIO_SENSOR_SIZE 0x54
+
+void isp_v4_1_1_set_isp_funcs(struct amdgpu_isp *isp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
index 9360204da7fb..b5bb7f4d607c 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
@@ -23,6 +23,7 @@
#include "amdgpu.h"
#include "amdgpu_jpeg.h"
+#include "amdgpu_cs.h"
#include "soc15.h"
#include "soc15d.h"
#include "vcn_v1_0.h"
@@ -34,6 +35,9 @@
static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev);
static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring);
+static int jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib);
static void jpeg_v1_0_decode_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val)
{
@@ -300,7 +304,10 @@ static void jpeg_v1_0_decode_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring,
PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+ if (ring->funcs->parse_cs)
+ amdgpu_ring_write(ring, 0);
+ else
+ amdgpu_ring_write(ring, (vmid | (vmid << 4)));
amdgpu_ring_write(ring,
PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0));
@@ -376,7 +383,7 @@ static void jpeg_v1_0_decode_ring_emit_reg_wait(struct amdgpu_ring *ring,
static void jpeg_v1_0_decode_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
- struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
uint32_t data0, data1, mask;
pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
@@ -437,7 +444,7 @@ static int jpeg_v1_0_process_interrupt(struct amdgpu_device *adev,
switch (entry->src_id) {
case 126:
- amdgpu_fence_process(&adev->jpeg.inst->ring_dec);
+ amdgpu_fence_process(adev->jpeg.inst->ring_dec);
break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
@@ -451,15 +458,16 @@ static int jpeg_v1_0_process_interrupt(struct amdgpu_device *adev,
/**
* jpeg_v1_0_early_init - set function pointers
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
*/
-int jpeg_v1_0_early_init(void *handle)
+int jpeg_v1_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->jpeg.num_jpeg_inst = 1;
+ adev->jpeg.num_jpeg_rings = 1;
jpeg_v1_0_set_dec_ring_funcs(adev);
jpeg_v1_0_set_irq_funcs(adev);
@@ -470,12 +478,12 @@ int jpeg_v1_0_early_init(void *handle)
/**
* jpeg_v1_0_sw_init - sw init for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-int jpeg_v1_0_sw_init(void *handle)
+int jpeg_v1_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int r;
@@ -484,14 +492,15 @@ int jpeg_v1_0_sw_init(void *handle)
if (r)
return r;
- ring = &adev->jpeg.inst->ring_dec;
+ ring = adev->jpeg.inst->ring_dec;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "jpeg_dec");
r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq,
0, AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
- adev->jpeg.internal.jpeg_pitch = adev->jpeg.inst->external.jpeg_pitch =
+ adev->jpeg.internal.jpeg_pitch[0] = adev->jpeg.inst->external.jpeg_pitch[0] =
SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
return 0;
@@ -500,15 +509,15 @@ int jpeg_v1_0_sw_init(void *handle)
/**
* jpeg_v1_0_sw_fini - sw fini for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* JPEG free up sw allocation
*/
-void jpeg_v1_0_sw_fini(void *handle)
+void jpeg_v1_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- amdgpu_ring_fini(&adev->jpeg.inst[0].ring_dec);
+ amdgpu_ring_fini(adev->jpeg.inst->ring_dec);
}
/**
@@ -521,7 +530,7 @@ void jpeg_v1_0_sw_fini(void *handle)
*/
void jpeg_v1_0_start(struct amdgpu_device *adev, int mode)
{
- struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
if (mode == 0) {
WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
@@ -548,11 +557,11 @@ static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = {
.nop = PACKET0(0x81ff, 0),
.support_64bit_ptrs = false,
.no_user_fence = true,
- .vmhub = AMDGPU_MMHUB_0,
- .extra_dw = 64,
+ .extra_bytes = 256,
.get_rptr = jpeg_v1_0_decode_ring_get_rptr,
.get_wptr = jpeg_v1_0_decode_ring_get_wptr,
.set_wptr = jpeg_v1_0_decode_ring_set_wptr,
+ .parse_cs = jpeg_v1_dec_ring_parse_cs,
.emit_frame_size =
6 + 6 + /* hdp invalidate / flush */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
@@ -579,8 +588,7 @@ static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = {
static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
{
- adev->jpeg.inst->ring_dec.funcs = &jpeg_v1_0_decode_ring_vm_funcs;
- DRM_INFO("JPEG decode is enabled in VM mode\n");
+ adev->jpeg.inst->ring_dec->funcs = &jpeg_v1_0_decode_ring_vm_funcs;
}
static const struct amdgpu_irq_src_funcs jpeg_v1_0_irq_funcs = {
@@ -596,18 +604,84 @@ static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev)
static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
+ bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work);
int cnt = 0;
- mutex_lock(&adev->vcn.vcn1_jpeg1_workaround);
+ mutex_lock(&adev->vcn.inst[0].vcn1_jpeg1_workaround);
if (amdgpu_fence_wait_empty(&adev->vcn.inst->ring_dec))
DRM_ERROR("JPEG dec: vcn dec ring may not be empty\n");
- for (cnt = 0; cnt < adev->vcn.num_enc_rings; cnt++) {
+ for (cnt = 0; cnt < adev->vcn.inst[0].num_enc_rings; cnt++) {
if (amdgpu_fence_wait_empty(&adev->vcn.inst->ring_enc[cnt]))
DRM_ERROR("JPEG dec: vcn enc ring[%d] may not be empty\n", cnt);
}
vcn_v1_0_set_pg_for_begin_use(ring, set_clocks);
}
+
+/**
+ * jpeg_v1_dec_ring_parse_cs - command submission parser
+ *
+ * @parser: Command submission parser context
+ * @job: the job to parse
+ * @ib: the IB to parse
+ *
+ * Parse the command stream, return -EINVAL for invalid packet,
+ * 0 otherwise
+ */
+static int jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
+{
+ u32 i, reg, res, cond, type;
+ int ret = 0;
+ struct amdgpu_device *adev = parser->adev;
+
+ for (i = 0; i < ib->length_dw ; i += 2) {
+ reg = CP_PACKETJ_GET_REG(ib->ptr[i]);
+ res = CP_PACKETJ_GET_RES(ib->ptr[i]);
+ cond = CP_PACKETJ_GET_COND(ib->ptr[i]);
+ type = CP_PACKETJ_GET_TYPE(ib->ptr[i]);
+
+ if (res || cond != PACKETJ_CONDITION_CHECK0) /* only allow 0 for now */
+ return -EINVAL;
+
+ if (reg >= JPEG_V1_REG_RANGE_START && reg <= JPEG_V1_REG_RANGE_END)
+ continue;
+
+ switch (type) {
+ case PACKETJ_TYPE0:
+ if (reg != JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_HIGH &&
+ reg != JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_LOW &&
+ reg != JPEG_V1_LMI_JPEG_READ_64BIT_BAR_HIGH &&
+ reg != JPEG_V1_LMI_JPEG_READ_64BIT_BAR_LOW &&
+ reg != JPEG_V1_REG_CTX_INDEX &&
+ reg != JPEG_V1_REG_CTX_DATA) {
+ ret = -EINVAL;
+ }
+ break;
+ case PACKETJ_TYPE1:
+ if (reg != JPEG_V1_REG_CTX_DATA)
+ ret = -EINVAL;
+ break;
+ case PACKETJ_TYPE3:
+ if (reg != JPEG_V1_REG_SOFT_RESET)
+ ret = -EINVAL;
+ break;
+ case PACKETJ_TYPE6:
+ if (ib->ptr[i] != CP_PACKETJ_NOP)
+ ret = -EINVAL;
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ if (ret) {
+ dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h
index bbf33a6a3972..097328635083 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h
@@ -24,9 +24,20 @@
#ifndef __JPEG_V1_0_H__
#define __JPEG_V1_0_H__
-int jpeg_v1_0_early_init(void *handle);
-int jpeg_v1_0_sw_init(void *handle);
-void jpeg_v1_0_sw_fini(void *handle);
+int jpeg_v1_0_early_init(struct amdgpu_ip_block *ip_block);
+int jpeg_v1_0_sw_init(struct amdgpu_ip_block *ip_block);
+void jpeg_v1_0_sw_fini(struct amdgpu_ip_block *ip_block);
void jpeg_v1_0_start(struct amdgpu_device *adev, int mode);
+#define JPEG_V1_REG_RANGE_START 0x8000
+#define JPEG_V1_REG_RANGE_END 0x803f
+
+#define JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_HIGH 0x8238
+#define JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_LOW 0x8239
+#define JPEG_V1_LMI_JPEG_READ_64BIT_BAR_HIGH 0x825a
+#define JPEG_V1_LMI_JPEG_READ_64BIT_BAR_LOW 0x825b
+#define JPEG_V1_REG_CTX_INDEX 0x8328
+#define JPEG_V1_REG_CTX_DATA 0x8329
+#define JPEG_V1_REG_SOFT_RESET 0x83a0
+
#endif /*__JPEG_V1_0_H__*/
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
index 299de1d131d8..27c76bd424cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
@@ -32,23 +32,40 @@
#include "vcn/vcn_2_0_0_sh_mask.h"
#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_2_0[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_CNTL),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_SIZE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_UV_PITCH),
+};
+
static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v2_0_set_irq_funcs(struct amdgpu_device *adev);
-static int jpeg_v2_0_set_powergating_state(void *handle,
+static int jpeg_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
/**
* jpeg_v2_0_early_init - set function pointers
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
*/
-static int jpeg_v2_0_early_init(void *handle)
+static int jpeg_v2_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->jpeg.num_jpeg_inst = 1;
+ adev->jpeg.num_jpeg_rings = 1;
jpeg_v2_0_set_dec_ring_funcs(adev);
jpeg_v2_0_set_irq_funcs(adev);
@@ -59,13 +76,13 @@ static int jpeg_v2_0_early_init(void *handle)
/**
* jpeg_v2_0_sw_init - sw init for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int jpeg_v2_0_sw_init(void *handle)
+static int jpeg_v2_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int r;
@@ -83,37 +100,50 @@ static int jpeg_v2_0_sw_init(void *handle)
if (r)
return r;
- ring = &adev->jpeg.inst->ring_dec;
+ ring = adev->jpeg.inst->ring_dec;
ring->use_doorbell = true;
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "jpeg_dec");
r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq,
0, AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
- adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
- adev->jpeg.inst->external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
+ adev->jpeg.internal.jpeg_pitch[0] = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
- return 0;
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_2_0, ARRAY_SIZE(jpeg_reg_list_2_0));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec);
+ if (!amdgpu_sriov_vf(adev))
+ adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+
+ return r;
}
/**
* jpeg_v2_0_sw_fini - sw fini for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* JPEG suspend and free up sw allocation
*/
-static int jpeg_v2_0_sw_fini(void *handle)
+static int jpeg_v2_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_jpeg_suspend(adev);
if (r)
return r;
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+
r = amdgpu_jpeg_sw_fini(adev);
return r;
@@ -122,41 +152,36 @@ static int jpeg_v2_0_sw_fini(void *handle)
/**
* jpeg_v2_0_hw_init - start and test JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int jpeg_v2_0_hw_init(void *handle)
+static int jpeg_v2_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
- int r;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
(adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
- r = amdgpu_ring_test_helper(ring);
- if (!r)
- DRM_INFO("JPEG decode initialized successfully.\n");
-
- return r;
+ return amdgpu_ring_test_helper(ring);
}
/**
* jpeg_v2_0_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the JPEG block, mark ring as not ready any more
*/
-static int jpeg_v2_0_hw_fini(void *handle)
+static int jpeg_v2_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS))
- jpeg_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ jpeg_v2_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
return 0;
}
@@ -164,20 +189,19 @@ static int jpeg_v2_0_hw_fini(void *handle)
/**
* jpeg_v2_0_suspend - suspend JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend JPEG block
*/
-static int jpeg_v2_0_suspend(void *handle)
+static int jpeg_v2_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = jpeg_v2_0_hw_fini(adev);
+ r = jpeg_v2_0_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_jpeg_suspend(adev);
+ r = amdgpu_jpeg_suspend(ip_block->adev);
return r;
}
@@ -185,20 +209,19 @@ static int jpeg_v2_0_suspend(void *handle)
/**
* jpeg_v2_0_resume - resume JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init JPEG block
*/
-static int jpeg_v2_0_resume(void *handle)
+static int jpeg_v2_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_jpeg_resume(adev);
+ r = amdgpu_jpeg_resume(ip_block->adev);
if (r)
return r;
- r = jpeg_v2_0_hw_init(adev);
+ r = jpeg_v2_0_hw_init(ip_block);
return r;
}
@@ -311,7 +334,7 @@ static void jpeg_v2_0_enable_clock_gating(struct amdgpu_device *adev)
*/
static int jpeg_v2_0_start(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
int r;
if (adev->pm.dpm_enabled)
@@ -407,7 +430,7 @@ static uint64_t jpeg_v2_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell)
- return adev->wb.wb[ring->wptr_offs];
+ return *ring->wptr_cpu_addr;
else
return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
}
@@ -424,7 +447,7 @@ static void jpeg_v2_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell) {
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
@@ -535,13 +558,21 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_IH_CTRL_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid << JPEG_IH_CTRL__IH_VMID__SHIFT));
+
amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+ if (ring->funcs->parse_cs)
+ amdgpu_ring_write(ring, 0);
+ else
+ amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8)));
amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8)));
amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
@@ -609,7 +640,7 @@ void jpeg_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
- struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
uint32_t data0, data1, mask;
pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
@@ -651,18 +682,18 @@ void jpeg_v2_0_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count)
}
}
-static bool jpeg_v2_0_is_idle(void *handle)
+static bool jpeg_v2_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return ((RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS) &
UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
}
-static int jpeg_v2_0_wait_for_idle(void *handle)
+static int jpeg_v2_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_JRBC_STATUS, UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
@@ -671,14 +702,14 @@ static int jpeg_v2_0_wait_for_idle(void *handle)
return ret;
}
-static int jpeg_v2_0_set_clockgating_state(void *handle,
+static int jpeg_v2_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
- if (!jpeg_v2_0_is_idle(handle))
+ if (!jpeg_v2_0_is_idle(ip_block))
return -EBUSY;
jpeg_v2_0_enable_clock_gating(adev);
} else {
@@ -688,10 +719,10 @@ static int jpeg_v2_0_set_clockgating_state(void *handle,
return 0;
}
-static int jpeg_v2_0_set_powergating_state(void *handle,
+static int jpeg_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
if (state == adev->jpeg.cur_state)
@@ -724,7 +755,7 @@ static int jpeg_v2_0_process_interrupt(struct amdgpu_device *adev,
switch (entry->src_id) {
case VCN_2_0__SRCID__JPEG_DECODE:
- amdgpu_fence_process(&adev->jpeg.inst->ring_dec);
+ amdgpu_fence_process(adev->jpeg.inst->ring_dec);
break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
@@ -735,10 +766,25 @@ static int jpeg_v2_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
+static int jpeg_v2_0_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ r = jpeg_v2_0_stop(ring->adev);
+ if (r)
+ return r;
+ r = jpeg_v2_0_start(ring->adev);
+ if (r)
+ return r;
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = {
.name = "jpeg_v2_0",
.early_init = jpeg_v2_0_early_init,
- .late_init = NULL,
.sw_init = jpeg_v2_0_sw_init,
.sw_fini = jpeg_v2_0_sw_fini,
.hw_init = jpeg_v2_0_hw_init,
@@ -747,28 +793,26 @@ static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = {
.resume = jpeg_v2_0_resume,
.is_idle = jpeg_v2_0_is_idle,
.wait_for_idle = jpeg_v2_0_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = jpeg_v2_0_set_clockgating_state,
.set_powergating_state = jpeg_v2_0_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
};
static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_JPEG,
.align_mask = 0xf,
- .vmhub = AMDGPU_MMHUB_0,
.get_rptr = jpeg_v2_0_dec_ring_get_rptr,
.get_wptr = jpeg_v2_0_dec_ring_get_wptr,
.set_wptr = jpeg_v2_0_dec_ring_set_wptr,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
8 + /* jpeg_v2_0_dec_ring_emit_vm_flush */
18 + 18 + /* jpeg_v2_0_dec_ring_emit_fence x2 vm fence */
8 + 16,
- .emit_ib_size = 22, /* jpeg_v2_0_dec_ring_emit_ib */
+ .emit_ib_size = 24, /* jpeg_v2_0_dec_ring_emit_ib */
.emit_ib = jpeg_v2_0_dec_ring_emit_ib,
.emit_fence = jpeg_v2_0_dec_ring_emit_fence,
.emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush,
@@ -783,12 +827,12 @@ static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = {
.emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
.emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v2_0_ring_reset,
};
static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev)
{
- adev->jpeg.inst->ring_dec.funcs = &jpeg_v2_0_dec_ring_vm_funcs;
- DRM_INFO("JPEG decode is enabled in VM mode\n");
+ adev->jpeg.inst->ring_dec->funcs = &jpeg_v2_0_dec_ring_vm_funcs;
}
static const struct amdgpu_irq_src_funcs jpeg_v2_0_irq_funcs = {
@@ -802,8 +846,7 @@ static void jpeg_v2_0_set_irq_funcs(struct amdgpu_device *adev)
adev->jpeg.inst->irq.funcs = &jpeg_v2_0_irq_funcs;
}
-const struct amdgpu_ip_block_version jpeg_v2_0_ip_block =
-{
+const struct amdgpu_ip_block_version jpeg_v2_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_JPEG,
.major = 2,
.minor = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h
index 1a03baa59755..654e43e83e2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h
@@ -41,6 +41,7 @@
#define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084
#define mmUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089
#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
+#define mmUVD_JPEG_IH_CTRL_INTERNAL_OFFSET 0x4149
#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
index a29c86617fb5..20983f126b49 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
@@ -26,6 +26,7 @@
#include "soc15.h"
#include "soc15d.h"
#include "jpeg_v2_0.h"
+#include "jpeg_v2_5.h"
#include "vcn/vcn_2_5_offset.h"
#include "vcn/vcn_2_5_sh_mask.h"
@@ -35,10 +36,27 @@
#define JPEG25_MAX_HW_INSTANCES_ARCTURUS 2
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_2_5[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_CNTL),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_SIZE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_UV_PITCH),
+};
+
static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev);
-static int jpeg_v2_5_set_powergating_state(void *handle,
+static int jpeg_v2_5_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
+static void jpeg_v2_5_set_ras_funcs(struct amdgpu_device *adev);
static int amdgpu_ih_clientid_jpeg[] = {
SOC15_IH_CLIENTID_VCN,
@@ -48,16 +66,17 @@ static int amdgpu_ih_clientid_jpeg[] = {
/**
* jpeg_v2_5_early_init - set function pointers
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
*/
-static int jpeg_v2_5_early_init(void *handle)
+static int jpeg_v2_5_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 harvest;
int i;
+ adev->jpeg.num_jpeg_rings = 1;
adev->jpeg.num_jpeg_inst = JPEG25_MAX_HW_INSTANCES_ARCTURUS;
for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
harvest = RREG32_SOC15(JPEG, i, mmCC_UVD_HARVESTING);
@@ -70,6 +89,7 @@ static int jpeg_v2_5_early_init(void *handle)
jpeg_v2_5_set_dec_ring_funcs(adev);
jpeg_v2_5_set_irq_funcs(adev);
+ jpeg_v2_5_set_ras_funcs(adev);
return 0;
}
@@ -77,15 +97,15 @@ static int jpeg_v2_5_early_init(void *handle)
/**
* jpeg_v2_5_sw_init - sw init for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int jpeg_v2_5_sw_init(void *handle)
+static int jpeg_v2_5_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int i, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (adev->jpeg.harvest_config & (1 << i))
@@ -96,6 +116,18 @@ static int jpeg_v2_5_sw_init(void *handle)
VCN_2_0__SRCID__JPEG_DECODE, &adev->jpeg.inst[i].irq);
if (r)
return r;
+
+ /* JPEG DJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i],
+ VCN_2_6__SRCID_DJPEG0_POISON, &adev->jpeg.inst[i].ras_poison_irq);
+ if (r)
+ return r;
+
+ /* JPEG EJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i],
+ VCN_2_6__SRCID_EJPEG0_POISON, &adev->jpeg.inst[i].ras_poison_irq);
+ if (r)
+ return r;
}
r = amdgpu_jpeg_sw_init(adev);
@@ -110,8 +142,12 @@ static int jpeg_v2_5_sw_init(void *handle)
if (adev->jpeg.harvest_config & (1 << i))
continue;
- ring = &adev->jpeg.inst[i].ring_dec;
+ ring = adev->jpeg.inst[i].ring_dec;
ring->use_doorbell = true;
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(2, 5, 0))
+ ring->vm_hub = AMDGPU_MMHUB1(0);
+ else
+ ring->vm_hub = AMDGPU_MMHUB0(0);
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8 * i;
sprintf(ring->name, "jpeg_dec_%d", i);
r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst[i].irq,
@@ -119,29 +155,45 @@ static int jpeg_v2_5_sw_init(void *handle)
if (r)
return r;
- adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
- adev->jpeg.inst[i].external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_PITCH);
+ adev->jpeg.internal.jpeg_pitch[0] = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst[i].external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_PITCH);
}
- return 0;
+ r = amdgpu_jpeg_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_2_5, ARRAY_SIZE(jpeg_reg_list_2_5));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec);
+ if (!amdgpu_sriov_vf(adev))
+ adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+
+ return r;
}
/**
* jpeg_v2_5_sw_fini - sw fini for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* JPEG suspend and free up sw allocation
*/
-static int jpeg_v2_5_sw_fini(void *handle)
+static int jpeg_v2_5_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_jpeg_suspend(adev);
if (r)
return r;
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+
r = amdgpu_jpeg_sw_fini(adev);
return r;
@@ -150,12 +202,12 @@ static int jpeg_v2_5_sw_fini(void *handle)
/**
* jpeg_v2_5_hw_init - start and test JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int jpeg_v2_5_hw_init(void *handle)
+static int jpeg_v2_5_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int i, r;
@@ -163,7 +215,7 @@ static int jpeg_v2_5_hw_init(void *handle)
if (adev->jpeg.harvest_config & (1 << i))
continue;
- ring = &adev->jpeg.inst[i].ring_dec;
+ ring = adev->jpeg.inst[i].ring_dec;
adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i, i);
@@ -172,24 +224,22 @@ static int jpeg_v2_5_hw_init(void *handle)
return r;
}
- DRM_INFO("JPEG decode initialized successfully.\n");
-
return 0;
}
/**
* jpeg_v2_5_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the JPEG block, mark ring as not ready any more
*/
-static int jpeg_v2_5_hw_fini(void *handle)
+static int jpeg_v2_5_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (adev->jpeg.harvest_config & (1 << i))
@@ -197,7 +247,10 @@ static int jpeg_v2_5_hw_fini(void *handle)
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS))
- jpeg_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ jpeg_v2_5_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG))
+ amdgpu_irq_put(adev, &adev->jpeg.inst[i].ras_poison_irq, 0);
}
return 0;
@@ -206,20 +259,19 @@ static int jpeg_v2_5_hw_fini(void *handle)
/**
* jpeg_v2_5_suspend - suspend JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend JPEG block
*/
-static int jpeg_v2_5_suspend(void *handle)
+static int jpeg_v2_5_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = jpeg_v2_5_hw_fini(adev);
+ r = jpeg_v2_5_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_jpeg_suspend(adev);
+ r = amdgpu_jpeg_suspend(ip_block->adev);
return r;
}
@@ -227,20 +279,19 @@ static int jpeg_v2_5_suspend(void *handle)
/**
* jpeg_v2_5_resume - resume JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init JPEG block
*/
-static int jpeg_v2_5_resume(void *handle)
+static int jpeg_v2_5_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = amdgpu_jpeg_resume(adev);
+ r = amdgpu_jpeg_resume(ip_block->adev);
if (r)
return r;
- r = jpeg_v2_5_hw_init(adev);
+ r = jpeg_v2_5_hw_init(ip_block);
return r;
}
@@ -287,6 +338,44 @@ static void jpeg_v2_5_enable_clock_gating(struct amdgpu_device *adev, int inst)
WREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE, data);
}
+static void jpeg_v2_5_start_inst(struct amdgpu_device *adev, int i)
+{
+ struct amdgpu_ring *ring = adev->jpeg.inst[i].ring_dec;
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ /* JPEG disable CGC */
+ jpeg_v2_5_disable_clock_gating(adev, i);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC_MASK,
+ ~JPEG_SYS_INT_EN__DJRBC_MASK);
+
+ WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR);
+}
+
/**
* jpeg_v2_5_start - start JPEG block
*
@@ -296,52 +385,33 @@ static void jpeg_v2_5_enable_clock_gating(struct amdgpu_device *adev, int inst)
*/
static int jpeg_v2_5_start(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring;
int i;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (adev->jpeg.harvest_config & (1 << i))
continue;
+ jpeg_v2_5_start_inst(adev, i);
- ring = &adev->jpeg.inst[i].ring_dec;
- /* disable anti hang mechanism */
- WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS), 0,
- ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
-
- /* JPEG disable CGC */
- jpeg_v2_5_disable_clock_gating(adev, i);
-
- /* MJPEG global tiling registers */
- WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX8_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
- WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX10_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
-
- /* enable JMI channel */
- WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL), 0,
- ~UVD_JMI_CNTL__SOFT_RESET_MASK);
-
- /* enable System Interrupt for JRBC */
- WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmJPEG_SYS_INT_EN),
- JPEG_SYS_INT_EN__DJRBC_MASK,
- ~JPEG_SYS_INT_EN__DJRBC_MASK);
-
- WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_VMID, 0);
- WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
- WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
- lower_32_bits(ring->gpu_addr));
- WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
- upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_RPTR, 0);
- WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR, 0);
- WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, 0x00000002L);
- WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
- ring->wptr = RREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR);
}
return 0;
}
+static void jpeg_v2_5_stop_inst(struct amdgpu_device *adev, int i)
+{
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ jpeg_v2_5_enable_clock_gating(adev, i);
+
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+}
+
/**
* jpeg_v2_5_stop - stop JPEG block
*
@@ -356,18 +426,7 @@ static int jpeg_v2_5_stop(struct amdgpu_device *adev)
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (adev->jpeg.harvest_config & (1 << i))
continue;
-
- /* reset JMI */
- WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL),
- UVD_JMI_CNTL__SOFT_RESET_MASK,
- ~UVD_JMI_CNTL__SOFT_RESET_MASK);
-
- jpeg_v2_5_enable_clock_gating(adev, i);
-
- /* enable anti hang mechanism */
- WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS),
- UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
- ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+ jpeg_v2_5_stop_inst(adev, i);
}
return 0;
@@ -399,7 +458,7 @@ static uint64_t jpeg_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell)
- return adev->wb.wb[ring->wptr_offs];
+ return *ring->wptr_cpu_addr;
else
return RREG32_SOC15(JPEG, ring->me, mmUVD_JRBC_RB_WPTR);
}
@@ -416,7 +475,7 @@ static void jpeg_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell) {
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
WREG32_SOC15(JPEG, ring->me, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
@@ -459,9 +518,9 @@ static void jpeg_v2_6_dec_ring_insert_end(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, (1 << (ring->me * 2 + 14)));
}
-static bool jpeg_v2_5_is_idle(void *handle)
+static bool jpeg_v2_5_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 1;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
@@ -476,9 +535,9 @@ static bool jpeg_v2_5_is_idle(void *handle)
return ret;
}
-static int jpeg_v2_5_wait_for_idle(void *handle)
+static int jpeg_v2_5_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
@@ -495,10 +554,10 @@ static int jpeg_v2_5_wait_for_idle(void *handle)
return 0;
}
-static int jpeg_v2_5_set_clockgating_state(void *handle,
+static int jpeg_v2_5_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
int i;
@@ -507,7 +566,7 @@ static int jpeg_v2_5_set_clockgating_state(void *handle,
continue;
if (enable) {
- if (!jpeg_v2_5_is_idle(handle))
+ if (!jpeg_v2_5_is_idle(ip_block))
return -EBUSY;
jpeg_v2_5_enable_clock_gating(adev, i);
} else {
@@ -518,13 +577,13 @@ static int jpeg_v2_5_set_clockgating_state(void *handle,
return 0;
}
-static int jpeg_v2_5_set_powergating_state(void *handle,
+static int jpeg_v2_5_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
- if(state == adev->jpeg.cur_state)
+ if (state == adev->jpeg.cur_state)
return 0;
if (state == AMD_PG_STATE_GATE)
@@ -532,7 +591,7 @@ static int jpeg_v2_5_set_powergating_state(void *handle,
else
ret = jpeg_v2_5_start(adev);
- if(!ret)
+ if (!ret)
adev->jpeg.cur_state = state;
return ret;
@@ -546,6 +605,14 @@ static int jpeg_v2_5_set_interrupt_state(struct amdgpu_device *adev,
return 0;
}
+static int jpeg_v2_6_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
static int jpeg_v2_5_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -568,7 +635,7 @@ static int jpeg_v2_5_process_interrupt(struct amdgpu_device *adev,
switch (entry->src_id) {
case VCN_2_0__SRCID__JPEG_DECODE:
- amdgpu_fence_process(&adev->jpeg.inst[ip_instance].ring_dec);
+ amdgpu_fence_process(adev->jpeg.inst[ip_instance].ring_dec);
break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
@@ -579,10 +646,19 @@ static int jpeg_v2_5_process_interrupt(struct amdgpu_device *adev,
return 0;
}
+static int jpeg_v2_5_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ jpeg_v2_5_stop_inst(ring->adev, ring->me);
+ jpeg_v2_5_start_inst(ring->adev, ring->me);
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = {
.name = "jpeg_v2_5",
.early_init = jpeg_v2_5_early_init,
- .late_init = NULL,
.sw_init = jpeg_v2_5_sw_init,
.sw_fini = jpeg_v2_5_sw_fini,
.hw_init = jpeg_v2_5_hw_init,
@@ -591,18 +667,15 @@ static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = {
.resume = jpeg_v2_5_resume,
.is_idle = jpeg_v2_5_is_idle,
.wait_for_idle = jpeg_v2_5_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = jpeg_v2_5_set_clockgating_state,
.set_powergating_state = jpeg_v2_5_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
};
static const struct amd_ip_funcs jpeg_v2_6_ip_funcs = {
.name = "jpeg_v2_6",
.early_init = jpeg_v2_5_early_init,
- .late_init = NULL,
.sw_init = jpeg_v2_5_sw_init,
.sw_fini = jpeg_v2_5_sw_fini,
.hw_init = jpeg_v2_5_hw_init,
@@ -611,21 +684,19 @@ static const struct amd_ip_funcs jpeg_v2_6_ip_funcs = {
.resume = jpeg_v2_5_resume,
.is_idle = jpeg_v2_5_is_idle,
.wait_for_idle = jpeg_v2_5_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = jpeg_v2_5_set_clockgating_state,
.set_powergating_state = jpeg_v2_5_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
};
static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_JPEG,
.align_mask = 0xf,
- .vmhub = AMDGPU_MMHUB_1,
.get_rptr = jpeg_v2_5_dec_ring_get_rptr,
.get_wptr = jpeg_v2_5_dec_ring_get_wptr,
.set_wptr = jpeg_v2_5_dec_ring_set_wptr,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
@@ -647,15 +718,16 @@ static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = {
.emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
.emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v2_5_ring_reset,
};
static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_JPEG,
.align_mask = 0xf,
- .vmhub = AMDGPU_MMHUB_0,
.get_rptr = jpeg_v2_5_dec_ring_get_rptr,
.get_wptr = jpeg_v2_5_dec_ring_get_wptr,
.set_wptr = jpeg_v2_5_dec_ring_set_wptr,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
@@ -677,6 +749,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = {
.emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
.emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v2_5_ring_reset,
};
static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev)
@@ -687,11 +760,10 @@ static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev)
if (adev->jpeg.harvest_config & (1 << i))
continue;
if (adev->asic_type == CHIP_ARCTURUS)
- adev->jpeg.inst[i].ring_dec.funcs = &jpeg_v2_5_dec_ring_vm_funcs;
+ adev->jpeg.inst[i].ring_dec->funcs = &jpeg_v2_5_dec_ring_vm_funcs;
else /* CHIP_ALDEBARAN */
- adev->jpeg.inst[i].ring_dec.funcs = &jpeg_v2_6_dec_ring_vm_funcs;
- adev->jpeg.inst[i].ring_dec.me = i;
- DRM_INFO("JPEG(%d) JPEG decode is enabled in VM mode\n", i);
+ adev->jpeg.inst[i].ring_dec->funcs = &jpeg_v2_6_dec_ring_vm_funcs;
+ adev->jpeg.inst[i].ring_dec->me = i;
}
}
@@ -700,6 +772,11 @@ static const struct amdgpu_irq_src_funcs jpeg_v2_5_irq_funcs = {
.process = jpeg_v2_5_process_interrupt,
};
+static const struct amdgpu_irq_src_funcs jpeg_v2_6_ras_irq_funcs = {
+ .set = jpeg_v2_6_set_ras_interrupt_state,
+ .process = amdgpu_jpeg_process_poison_irq,
+};
+
static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev)
{
int i;
@@ -710,11 +787,13 @@ static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev)
adev->jpeg.inst[i].irq.num_types = 1;
adev->jpeg.inst[i].irq.funcs = &jpeg_v2_5_irq_funcs;
+
+ adev->jpeg.inst[i].ras_poison_irq.num_types = 1;
+ adev->jpeg.inst[i].ras_poison_irq.funcs = &jpeg_v2_6_ras_irq_funcs;
}
}
-const struct amdgpu_ip_block_version jpeg_v2_5_ip_block =
-{
+const struct amdgpu_ip_block_version jpeg_v2_5_ip_block = {
.type = AMD_IP_BLOCK_TYPE_JPEG,
.major = 2,
.minor = 5,
@@ -722,11 +801,69 @@ const struct amdgpu_ip_block_version jpeg_v2_5_ip_block =
.funcs = &jpeg_v2_5_ip_funcs,
};
-const struct amdgpu_ip_block_version jpeg_v2_6_ip_block =
-{
+const struct amdgpu_ip_block_version jpeg_v2_6_ip_block = {
.type = AMD_IP_BLOCK_TYPE_JPEG,
.major = 2,
.minor = 6,
.rev = 0,
.funcs = &jpeg_v2_6_ip_funcs,
};
+
+static uint32_t jpeg_v2_6_query_poison_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, uint32_t sub_block)
+{
+ uint32_t poison_stat = 0, reg_value = 0;
+
+ switch (sub_block) {
+ case AMDGPU_JPEG_V2_6_JPEG0:
+ reg_value = RREG32_SOC15(JPEG, instance, mmUVD_RAS_JPEG0_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG0_STATUS, POISONED_PF);
+ break;
+ case AMDGPU_JPEG_V2_6_JPEG1:
+ reg_value = RREG32_SOC15(JPEG, instance, mmUVD_RAS_JPEG1_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG1_STATUS, POISONED_PF);
+ break;
+ default:
+ break;
+ }
+
+ if (poison_stat)
+ dev_info(adev->dev, "Poison detected in JPEG%d sub_block%d\n",
+ instance, sub_block);
+
+ return poison_stat;
+}
+
+static bool jpeg_v2_6_query_ras_poison_status(struct amdgpu_device *adev)
+{
+ uint32_t inst = 0, sub = 0, poison_stat = 0;
+
+ for (inst = 0; inst < adev->jpeg.num_jpeg_inst; inst++)
+ for (sub = 0; sub < AMDGPU_JPEG_V2_6_MAX_SUB_BLOCK; sub++)
+ poison_stat +=
+ jpeg_v2_6_query_poison_by_instance(adev, inst, sub);
+
+ return !!poison_stat;
+}
+
+const struct amdgpu_ras_block_hw_ops jpeg_v2_6_ras_hw_ops = {
+ .query_poison_status = jpeg_v2_6_query_ras_poison_status,
+};
+
+static struct amdgpu_jpeg_ras jpeg_v2_6_ras = {
+ .ras_block = {
+ .hw_ops = &jpeg_v2_6_ras_hw_ops,
+ .ras_late_init = amdgpu_jpeg_ras_late_init,
+ },
+};
+
+static void jpeg_v2_5_set_ras_funcs(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, JPEG_HWIP, 0)) {
+ case IP_VERSION(2, 6, 0):
+ adev->jpeg.ras = &jpeg_v2_6_ras;
+ break;
+ default:
+ break;
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h
index 3b0aa29b9879..1e858c6cdf13 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h
@@ -24,6 +24,13 @@
#ifndef __JPEG_V2_5_H__
#define __JPEG_V2_5_H__
+enum amdgpu_jpeg_v2_6_sub_block {
+ AMDGPU_JPEG_V2_6_JPEG0 = 0,
+ AMDGPU_JPEG_V2_6_JPEG1,
+
+ AMDGPU_JPEG_V2_6_MAX_SUB_BLOCK,
+};
+
extern const struct amdgpu_ip_block_version jpeg_v2_5_ip_block;
extern const struct amdgpu_ip_block_version jpeg_v2_6_ip_block;
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
index 01c242c5abc3..d1a011c40ba2 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
@@ -34,30 +34,53 @@
#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_3_0[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_CNTL),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_SIZE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_UV_PITCH),
+};
+
static void jpeg_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v3_0_set_irq_funcs(struct amdgpu_device *adev);
-static int jpeg_v3_0_set_powergating_state(void *handle,
+static int jpeg_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
/**
* jpeg_v3_0_early_init - set function pointers
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
*/
-static int jpeg_v3_0_early_init(void *handle)
+static int jpeg_v3_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- if (adev->asic_type != CHIP_YELLOW_CARP) {
- u32 harvest = RREG32_SOC15(JPEG, 0, mmCC_UVD_HARVESTING);
+ u32 harvest;
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
+ case IP_VERSION(3, 1, 1):
+ case IP_VERSION(3, 1, 2):
+ break;
+ default:
+ harvest = RREG32_SOC15(JPEG, 0, mmCC_UVD_HARVESTING);
if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
return -ENOENT;
+ break;
}
adev->jpeg.num_jpeg_inst = 1;
+ adev->jpeg.num_jpeg_rings = 1;
jpeg_v3_0_set_dec_ring_funcs(adev);
jpeg_v3_0_set_irq_funcs(adev);
@@ -68,13 +91,13 @@ static int jpeg_v3_0_early_init(void *handle)
/**
* jpeg_v3_0_sw_init - sw init for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int jpeg_v3_0_sw_init(void *handle)
+static int jpeg_v3_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int r;
@@ -92,37 +115,50 @@ static int jpeg_v3_0_sw_init(void *handle)
if (r)
return r;
- ring = &adev->jpeg.inst->ring_dec;
+ ring = adev->jpeg.inst->ring_dec;
ring->use_doorbell = true;
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "jpeg_dec");
r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
- adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
- adev->jpeg.inst->external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
+ adev->jpeg.internal.jpeg_pitch[0] = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
- return 0;
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_3_0, ARRAY_SIZE(jpeg_reg_list_3_0));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec);
+ if (!amdgpu_sriov_vf(adev))
+ adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+
+ return r;
}
/**
* jpeg_v3_0_sw_fini - sw fini for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* JPEG suspend and free up sw allocation
*/
-static int jpeg_v3_0_sw_fini(void *handle)
+static int jpeg_v3_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_jpeg_suspend(adev);
if (r)
return r;
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+
r = amdgpu_jpeg_sw_fini(adev);
return r;
@@ -131,43 +167,36 @@ static int jpeg_v3_0_sw_fini(void *handle)
/**
* jpeg_v3_0_hw_init - start and test JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int jpeg_v3_0_hw_init(void *handle)
+static int jpeg_v3_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
- int r;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
(adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
- r = amdgpu_ring_test_helper(ring);
- if (r)
- return r;
-
- DRM_INFO("JPEG decode initialized successfully.\n");
-
- return 0;
+ return amdgpu_ring_test_helper(ring);
}
/**
* jpeg_v3_0_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the JPEG block, mark ring as not ready any more
*/
-static int jpeg_v3_0_hw_fini(void *handle)
+static int jpeg_v3_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS))
- jpeg_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ jpeg_v3_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
return 0;
}
@@ -175,20 +204,19 @@ static int jpeg_v3_0_hw_fini(void *handle)
/**
* jpeg_v3_0_suspend - suspend JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend JPEG block
*/
-static int jpeg_v3_0_suspend(void *handle)
+static int jpeg_v3_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = jpeg_v3_0_hw_fini(adev);
+ r = jpeg_v3_0_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_jpeg_suspend(adev);
+ r = amdgpu_jpeg_suspend(ip_block->adev);
return r;
}
@@ -196,20 +224,19 @@ static int jpeg_v3_0_suspend(void *handle)
/**
* jpeg_v3_0_resume - resume JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init JPEG block
*/
-static int jpeg_v3_0_resume(void *handle)
+static int jpeg_v3_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = amdgpu_jpeg_resume(adev);
+ r = amdgpu_jpeg_resume(ip_block->adev);
if (r)
return r;
- r = jpeg_v3_0_hw_init(adev);
+ r = jpeg_v3_0_hw_init(ip_block);
return r;
}
@@ -323,7 +350,7 @@ static int jpeg_v3_0_enable_static_power_gating(struct amdgpu_device *adev)
*/
static int jpeg_v3_0_start(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
int r;
if (adev->pm.dpm_enabled)
@@ -422,7 +449,7 @@ static uint64_t jpeg_v3_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell)
- return adev->wb.wb[ring->wptr_offs];
+ return *ring->wptr_cpu_addr;
else
return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
}
@@ -439,16 +466,16 @@ static void jpeg_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell) {
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
}
}
-static bool jpeg_v3_0_is_idle(void *handle)
+static bool jpeg_v3_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret = 1;
ret &= (((RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS) &
@@ -458,23 +485,23 @@ static bool jpeg_v3_0_is_idle(void *handle)
return ret;
}
-static int jpeg_v3_0_wait_for_idle(void *handle)
+static int jpeg_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_JRBC_STATUS,
UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
}
-static int jpeg_v3_0_set_clockgating_state(void *handle,
+static int jpeg_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
if (enable) {
- if (!jpeg_v3_0_is_idle(handle))
+ if (!jpeg_v3_0_is_idle(ip_block))
return -EBUSY;
jpeg_v3_0_enable_clock_gating(adev);
} else {
@@ -484,10 +511,10 @@ static int jpeg_v3_0_set_clockgating_state(void *handle,
return 0;
}
-static int jpeg_v3_0_set_powergating_state(void *handle,
+static int jpeg_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
if(state == adev->jpeg.cur_state)
@@ -520,7 +547,7 @@ static int jpeg_v3_0_process_interrupt(struct amdgpu_device *adev,
switch (entry->src_id) {
case VCN_2_0__SRCID__JPEG_DECODE:
- amdgpu_fence_process(&adev->jpeg.inst->ring_dec);
+ amdgpu_fence_process(adev->jpeg.inst->ring_dec);
break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
@@ -531,10 +558,25 @@ static int jpeg_v3_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
+static int jpeg_v3_0_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ r = jpeg_v3_0_stop(ring->adev);
+ if (r)
+ return r;
+ r = jpeg_v3_0_start(ring->adev);
+ if (r)
+ return r;
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = {
.name = "jpeg_v3_0",
.early_init = jpeg_v3_0_early_init,
- .late_init = NULL,
.sw_init = jpeg_v3_0_sw_init,
.sw_fini = jpeg_v3_0_sw_fini,
.hw_init = jpeg_v3_0_hw_init,
@@ -543,21 +585,19 @@ static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = {
.resume = jpeg_v3_0_resume,
.is_idle = jpeg_v3_0_is_idle,
.wait_for_idle = jpeg_v3_0_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = jpeg_v3_0_set_clockgating_state,
.set_powergating_state = jpeg_v3_0_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
};
static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_JPEG,
.align_mask = 0xf,
- .vmhub = AMDGPU_MMHUB_0,
.get_rptr = jpeg_v3_0_dec_ring_get_rptr,
.get_wptr = jpeg_v3_0_dec_ring_get_wptr,
.set_wptr = jpeg_v3_0_dec_ring_set_wptr,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
@@ -579,12 +619,12 @@ static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = {
.emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
.emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v3_0_ring_reset,
};
static void jpeg_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev)
{
- adev->jpeg.inst->ring_dec.funcs = &jpeg_v3_0_dec_ring_vm_funcs;
- DRM_INFO("JPEG decode is enabled in VM mode\n");
+ adev->jpeg.inst->ring_dec->funcs = &jpeg_v3_0_dec_ring_vm_funcs;
}
static const struct amdgpu_irq_src_funcs jpeg_v3_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
new file mode 100644
index 000000000000..33db2c1ae6cc
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
@@ -0,0 +1,878 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "jpeg_v2_0.h"
+#include "jpeg_v4_0.h"
+#include "mmsch_v4_0.h"
+
+#include "vcn/vcn_4_0_0_offset.h"
+#include "vcn/vcn_4_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
+
+#define regUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
+
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_4_0[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_CNTL),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_SIZE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH),
+};
+
+static int jpeg_v4_0_start_sriov(struct amdgpu_device *adev);
+static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v4_0_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state);
+static void jpeg_v4_0_set_ras_funcs(struct amdgpu_device *adev);
+static void jpeg_v4_0_dec_ring_set_wptr(struct amdgpu_ring *ring);
+
+/**
+ * jpeg_v4_0_early_init - set function pointers
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v4_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+
+ adev->jpeg.num_jpeg_inst = 1;
+ adev->jpeg.num_jpeg_rings = 1;
+
+ jpeg_v4_0_set_dec_ring_funcs(adev);
+ jpeg_v4_0_set_irq_funcs(adev);
+ jpeg_v4_0_set_ras_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_sw_init - sw init for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v4_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int r;
+
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_4_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq);
+ if (r)
+ return r;
+
+ /* JPEG DJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_4_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq);
+ if (r)
+ return r;
+
+ /* JPEG EJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_4_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ ring = adev->jpeg.inst->ring_dec;
+ ring->use_doorbell = true;
+ ring->doorbell_index = amdgpu_sriov_vf(adev) ? (((adev->doorbell_index.vcn.vcn_ring0_1) << 1) + 4) : ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1);
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+
+ sprintf(ring->name, "jpeg_dec");
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH);
+
+ r = amdgpu_jpeg_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_4_0, ARRAY_SIZE(jpeg_reg_list_4_0));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec);
+ if (!amdgpu_sriov_vf(adev))
+ adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v4_0_sw_fini - sw fini for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v4_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v4_0_hw_init - start and test JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ */
+static int jpeg_v4_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
+ int r;
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = jpeg_v4_0_start_sriov(adev);
+ if (r)
+ return r;
+ ring->wptr = 0;
+ ring->wptr_old = 0;
+ jpeg_v4_0_dec_ring_set_wptr(ring);
+ ring->sched.ready = true;
+ } else {
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
+
+ WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL,
+ ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+ VCN_JPEG_DB_CTRL__EN_MASK);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v4_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
+ if (!amdgpu_sriov_vf(adev)) {
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS))
+ jpeg_v4_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
+ }
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG))
+ amdgpu_irq_put(adev, &adev->jpeg.inst->ras_poison_irq, 0);
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_suspend - suspend JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v4_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = jpeg_v4_0_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(ip_block->adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v4_0_resume - resume JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v4_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = amdgpu_jpeg_resume(ip_block->adev);
+ if (r)
+ return r;
+
+ r = jpeg_v4_0_hw_init(ip_block);
+
+ return r;
+}
+
+static void jpeg_v4_0_disable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) {
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data &= (~JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK);
+ } else {
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ }
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE);
+ data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
+ | JPEG_CGC_GATE__JPEG2_DEC_MASK
+ | JPEG_CGC_GATE__JMCIF_MASK
+ | JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data);
+}
+
+static void jpeg_v4_0_enable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) {
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK;
+ } else {
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ }
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE);
+ data |= (JPEG_CGC_GATE__JPEG_DEC_MASK
+ |JPEG_CGC_GATE__JPEG2_DEC_MASK
+ |JPEG_CGC_GATE__JMCIF_MASK
+ |JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data);
+}
+
+static int jpeg_v4_0_disable_static_power_gating(struct amdgpu_device *adev)
+{
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ uint32_t data = 0;
+ int r = 0;
+
+ data = 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
+ WREG32(SOC15_REG_OFFSET(JPEG, 0, regUVD_PGFSM_CONFIG), data);
+
+ r = SOC15_WAIT_ON_RREG(JPEG, 0,
+ regUVD_PGFSM_STATUS, UVD_PGFSM_STATUS_UVDJ_PWR_ON,
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
+
+ if (r) {
+ DRM_DEV_ERROR(adev->dev, "amdgpu: JPEG disable power gating failed\n");
+ return r;
+ }
+ }
+
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ /* keep the JPEG in static PG mode */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK);
+
+ return 0;
+}
+
+static int jpeg_v4_0_enable_static_power_gating(struct amdgpu_device *adev)
+{
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ uint32_t data = 0;
+ int r = 0;
+
+ data = 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
+ WREG32(SOC15_REG_OFFSET(JPEG, 0, regUVD_PGFSM_CONFIG), data);
+
+ r = SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_PGFSM_STATUS,
+ (2 << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT),
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
+
+ if (r) {
+ DRM_DEV_ERROR(adev->dev, "amdgpu: JPEG enable power gating failed\n");
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v4_0_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
+ int r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, true);
+
+ /* disable power gating */
+ r = jpeg_v4_0_disable_static_power_gating(adev);
+ if (r)
+ return r;
+
+ /* JPEG disable CGC */
+ jpeg_v4_0_disable_clock_gating(adev);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC_MASK,
+ ~JPEG_SYS_INT_EN__DJRBC_MASK);
+
+ WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR);
+
+ return 0;
+}
+
+static int jpeg_v4_0_start_sriov(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ uint64_t ctx_addr;
+ uint32_t param, resp, expected;
+ uint32_t tmp, timeout;
+
+ struct amdgpu_mm_table *table = &adev->virt.mm_table;
+ uint32_t *table_loc;
+ uint32_t table_size;
+ uint32_t size, size_dw;
+ uint32_t init_status;
+
+ struct mmsch_v4_0_cmd_direct_write
+ direct_wt = { {0} };
+ struct mmsch_v4_0_cmd_end end = { {0} };
+ struct mmsch_v4_0_init_header header;
+
+ direct_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_WRITE;
+ end.cmd_header.command_type =
+ MMSCH_COMMAND__END;
+
+ size = sizeof(struct mmsch_v4_0_init_header);
+ table_loc = (uint32_t *)table->cpu_addr;
+ memcpy(&header, (void *)table_loc, size);
+
+ header.version = MMSCH_VERSION;
+ header.total_size = RREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_SIZE);
+
+ header.jpegdec.init_status = 0;
+ header.jpegdec.table_offset = 0;
+ header.jpegdec.table_size = 0;
+
+ table_loc = (uint32_t *)table->cpu_addr;
+ table_loc += header.total_size;
+
+ table_size = 0;
+
+ ring = adev->jpeg.inst->ring_dec;
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(JPEG, 0,
+ regUVD_LMI_JRBC_RB_64BIT_BAR_LOW),
+ lower_32_bits(ring->gpu_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(JPEG, 0,
+ regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH),
+ upper_32_bits(ring->gpu_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(JPEG, 0,
+ regUVD_JRBC_RB_SIZE), ring->ring_size / 4);
+
+ /* add end packet */
+ MMSCH_V4_0_INSERT_END();
+
+ /* refine header */
+ header.jpegdec.init_status = 0;
+ header.jpegdec.table_offset = header.total_size;
+ header.jpegdec.table_size = table_size;
+ header.total_size += table_size;
+
+ /* Update init table header in memory */
+ size = sizeof(struct mmsch_v4_0_init_header);
+ table_loc = (uint32_t *)table->cpu_addr;
+ memcpy((void *)table_loc, &header, size);
+
+ /* Perform HDP flush before writing to MMSCH registers */
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ /* message MMSCH (in VCN[0]) to initialize this client
+ * 1, write to mmsch_vf_ctx_addr_lo/hi register with GPU mc addr
+ * of memory descriptor location
+ */
+ ctx_addr = table->gpu_addr;
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
+
+ /* 2, update vmid of descriptor */
+ tmp = RREG32_SOC15(VCN, 0, regMMSCH_VF_VMID);
+ tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ /* use domain0 for MM scheduler */
+ tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_VMID, tmp);
+
+ /* 3, notify mmsch about the size of this descriptor */
+ size = header.total_size;
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_SIZE, size);
+
+ /* 4, set resp to zero */
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP, 0);
+
+ /* 5, kick off the initialization and wait until
+ * MMSCH_VF_MAILBOX_RESP becomes non-zero
+ */
+ param = 0x00000001;
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_HOST, param);
+ tmp = 0;
+ timeout = 1000;
+ resp = 0;
+ expected = MMSCH_VF_MAILBOX_RESP__OK;
+ init_status = ((struct mmsch_v4_0_init_header *)(table_loc))->jpegdec.init_status;
+ while (resp != expected) {
+ resp = RREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP);
+
+ if (resp != 0)
+ break;
+ udelay(10);
+ tmp = tmp + 10;
+ if (tmp >= timeout) {
+ DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
+ " waiting for regMMSCH_VF_MAILBOX_RESP "\
+ "(expected=0x%08x, readback=0x%08x)\n",
+ tmp, expected, resp);
+ return -EBUSY;
+ }
+ }
+ if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
+ && init_status != MMSCH_VF_ENGINE_STATUS__PASS) {
+ DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init status for jpeg: %x\n", resp, init_status);
+ return -EINVAL;
+ }
+
+ return 0;
+
+}
+
+/**
+ * jpeg_v4_0_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v4_0_stop(struct amdgpu_device *adev)
+{
+ int r;
+
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ jpeg_v4_0_enable_clock_gating(adev);
+
+ /* enable power gating */
+ r = jpeg_v4_0_enable_static_power_gating(adev);
+ if (r)
+ return r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, false);
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v4_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR);
+}
+
+/**
+ * jpeg_v4_0_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v4_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * jpeg_v4_0_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v4_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+static bool jpeg_v4_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret = 1;
+
+ ret &= (((RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS) &
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK));
+
+ return ret;
+}
+
+static int jpeg_v4_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ return SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_JRBC_STATUS,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+}
+
+static int jpeg_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
+
+ if (enable) {
+ if (!jpeg_v4_0_is_idle(ip_block))
+ return -EBUSY;
+ jpeg_v4_0_enable_clock_gating(adev);
+ } else {
+ jpeg_v4_0_disable_clock_gating(adev);
+ }
+
+ return 0;
+}
+
+static int jpeg_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev)) {
+ adev->jpeg.cur_state = AMD_PG_STATE_UNGATE;
+ return 0;
+ }
+
+ if (state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v4_0_stop(adev);
+ else
+ ret = jpeg_v4_0_start(adev);
+
+ if (!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v4_0_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v4_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("IH: JPEG TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_4_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(adev->jpeg.inst->ring_dec);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static int jpeg_v4_0_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ r = jpeg_v4_0_stop(ring->adev);
+ if (r)
+ return r;
+ r = jpeg_v4_0_start(ring->adev);
+ if (r)
+ return r;
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = {
+ .name = "jpeg_v4_0",
+ .early_init = jpeg_v4_0_early_init,
+ .sw_init = jpeg_v4_0_sw_init,
+ .sw_fini = jpeg_v4_0_sw_fini,
+ .hw_init = jpeg_v4_0_hw_init,
+ .hw_fini = jpeg_v4_0_hw_fini,
+ .suspend = jpeg_v4_0_suspend,
+ .resume = jpeg_v4_0_resume,
+ .is_idle = jpeg_v4_0_is_idle,
+ .wait_for_idle = jpeg_v4_0_wait_for_idle,
+ .set_clockgating_state = jpeg_v4_0_set_clockgating_state,
+ .set_powergating_state = jpeg_v4_0_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .get_rptr = jpeg_v4_0_dec_ring_get_rptr,
+ .get_wptr = jpeg_v4_0_dec_ring_get_wptr,
+ .set_wptr = jpeg_v4_0_dec_ring_set_wptr,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v4_0_dec_ring_emit_vm_flush */
+ 18 + 18 + /* jpeg_v4_0_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v4_0_dec_ring_emit_ib */
+ .emit_ib = jpeg_v2_0_dec_ring_emit_ib,
+ .emit_fence = jpeg_v2_0_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v2_0_dec_ring_nop,
+ .insert_start = jpeg_v2_0_dec_ring_insert_start,
+ .insert_end = jpeg_v2_0_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v4_0_ring_reset,
+};
+
+static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->ring_dec->funcs = &jpeg_v4_0_dec_ring_vm_funcs;
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v4_0_irq_funcs = {
+ .process = jpeg_v4_0_process_interrupt,
+};
+
+static const struct amdgpu_irq_src_funcs jpeg_v4_0_ras_irq_funcs = {
+ .set = jpeg_v4_0_set_ras_interrupt_state,
+ .process = amdgpu_jpeg_process_poison_irq,
+};
+
+static void jpeg_v4_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->irq.num_types = 1;
+ adev->jpeg.inst->irq.funcs = &jpeg_v4_0_irq_funcs;
+
+ adev->jpeg.inst->ras_poison_irq.num_types = 1;
+ adev->jpeg.inst->ras_poison_irq.funcs = &jpeg_v4_0_ras_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version jpeg_v4_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 4,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &jpeg_v4_0_ip_funcs,
+};
+
+static uint32_t jpeg_v4_0_query_poison_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, uint32_t sub_block)
+{
+ uint32_t poison_stat = 0, reg_value = 0;
+
+ switch (sub_block) {
+ case AMDGPU_JPEG_V4_0_JPEG0:
+ reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG0_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG0_STATUS, POISONED_PF);
+ break;
+ case AMDGPU_JPEG_V4_0_JPEG1:
+ reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG1_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG1_STATUS, POISONED_PF);
+ break;
+ default:
+ break;
+ }
+
+ if (poison_stat)
+ dev_info(adev->dev, "Poison detected in JPEG%d sub_block%d\n",
+ instance, sub_block);
+
+ return poison_stat;
+}
+
+static bool jpeg_v4_0_query_ras_poison_status(struct amdgpu_device *adev)
+{
+ uint32_t inst = 0, sub = 0, poison_stat = 0;
+
+ for (inst = 0; inst < adev->jpeg.num_jpeg_inst; inst++)
+ for (sub = 0; sub < AMDGPU_JPEG_V4_0_MAX_SUB_BLOCK; sub++)
+ poison_stat +=
+ jpeg_v4_0_query_poison_by_instance(adev, inst, sub);
+
+ return !!poison_stat;
+}
+
+const struct amdgpu_ras_block_hw_ops jpeg_v4_0_ras_hw_ops = {
+ .query_poison_status = jpeg_v4_0_query_ras_poison_status,
+};
+
+static struct amdgpu_jpeg_ras jpeg_v4_0_ras = {
+ .ras_block = {
+ .hw_ops = &jpeg_v4_0_ras_hw_ops,
+ .ras_late_init = amdgpu_jpeg_ras_late_init,
+ },
+};
+
+static void jpeg_v4_0_set_ras_funcs(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, JPEG_HWIP, 0)) {
+ case IP_VERSION(4, 0, 0):
+ adev->jpeg.ras = &jpeg_v4_0_ras;
+ break;
+ default:
+ break;
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h
new file mode 100644
index 000000000000..47638fd4d4e2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V4_0_H__
+#define __JPEG_V4_0_H__
+
+enum amdgpu_jpeg_v4_0_sub_block {
+ AMDGPU_JPEG_V4_0_JPEG0 = 0,
+ AMDGPU_JPEG_V4_0_JPEG1,
+
+ AMDGPU_JPEG_V4_0_MAX_SUB_BLOCK,
+};
+
+extern const struct amdgpu_ip_block_version jpeg_v4_0_ip_block;
+#endif /* __JPEG_V4_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
new file mode 100644
index 000000000000..aae7328973d1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -0,0 +1,1486 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "jpeg_v2_0.h"
+#include "jpeg_v4_0_3.h"
+#include "mmsch_v4_0_3.h"
+
+#include "vcn/vcn_4_0_3_offset.h"
+#include "vcn/vcn_4_0_3_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
+
+#define NORMALIZE_JPEG_REG_OFFSET(offset) \
+ (offset & 0x1FFFF)
+
+enum jpeg_engin_status {
+ UVD_PGFSM_STATUS__UVDJ_PWR_ON = 0,
+ UVD_PGFSM_STATUS__UVDJ_PWR_OFF = 2,
+};
+
+static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v4_0_3_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state);
+static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev);
+static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring);
+
+static int amdgpu_ih_srcid_jpeg[] = {
+ VCN_4_0__SRCID__JPEG_DECODE,
+ VCN_4_0__SRCID__JPEG1_DECODE,
+ VCN_4_0__SRCID__JPEG2_DECODE,
+ VCN_4_0__SRCID__JPEG3_DECODE,
+ VCN_4_0__SRCID__JPEG4_DECODE,
+ VCN_4_0__SRCID__JPEG5_DECODE,
+ VCN_4_0__SRCID__JPEG6_DECODE,
+ VCN_4_0__SRCID__JPEG7_DECODE
+};
+
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_4_0_3[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_SYS_INT_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_STATUS),
+};
+
+static inline bool jpeg_v4_0_3_normalizn_reqd(struct amdgpu_device *adev)
+{
+ return (adev->jpeg.caps & AMDGPU_JPEG_CAPS(RRMT_ENABLED)) == 0;
+}
+
+static inline int jpeg_v4_0_3_core_reg_offset(u32 pipe)
+{
+ if (pipe)
+ return ((0x40 * pipe) - 0xc80);
+ else
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_3_early_init - set function pointers
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v4_0_3_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS_4_0_3;
+
+ jpeg_v4_0_3_set_dec_ring_funcs(adev);
+ jpeg_v4_0_3_set_irq_funcs(adev);
+ jpeg_v4_0_3_set_ras_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_3_sw_init - sw init for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, j, r, jpeg_inst;
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ amdgpu_ih_srcid_jpeg[j], &adev->jpeg.inst->irq);
+ if (r)
+ return r;
+ }
+
+ /* JPEG DJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_4_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq);
+ if (r)
+ return r;
+
+ /* JPEG EJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_4_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_inst = GET_INST(JPEG, i);
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ ring->use_doorbell = true;
+ ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id);
+ if (!amdgpu_sriov_vf(adev)) {
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 1 + j + 9 * jpeg_inst;
+ } else {
+ if (j < 4)
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 4 + j + 32 * jpeg_inst;
+ else
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 8 + j + 32 * jpeg_inst;
+ }
+ sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch[j] =
+ regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET;
+ adev->jpeg.inst[i].external.jpeg_pitch[j] =
+ SOC15_REG_OFFSET1(JPEG, jpeg_inst, regUVD_JRBC0_UVD_JRBC_SCRATCH0,
+ jpeg_v4_0_3_core_reg_offset(j));
+ }
+ }
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) {
+ r = amdgpu_jpeg_ras_sw_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to initialize jpeg ras block!\n");
+ return r;
+ }
+ }
+
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_4_0_3, ARRAY_SIZE(jpeg_reg_list_4_0_3));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec);
+ if (!amdgpu_sriov_vf(adev))
+ adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_3_sw_fini - sw fini for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ uint64_t ctx_addr;
+ uint32_t param, resp, expected;
+ uint32_t tmp, timeout;
+
+ struct amdgpu_mm_table *table = &adev->virt.mm_table;
+ uint32_t *table_loc;
+ uint32_t table_size;
+ uint32_t size, size_dw, item_offset;
+ uint32_t init_status;
+ int i, j, jpeg_inst;
+
+ struct mmsch_v4_0_cmd_direct_write
+ direct_wt = { {0} };
+ struct mmsch_v4_0_cmd_end end = { {0} };
+ struct mmsch_v4_0_3_init_header header;
+
+ direct_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_WRITE;
+ end.cmd_header.command_type =
+ MMSCH_COMMAND__END;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ jpeg_inst = GET_INST(JPEG, i);
+
+ memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header));
+ header.version = MMSCH_VERSION;
+ header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2;
+
+ table_loc = (uint32_t *)table->cpu_addr;
+ table_loc += header.total_size;
+
+ item_offset = header.total_size;
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ table_size = 0;
+
+ tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW);
+ MMSCH_V4_0_INSERT_DIRECT_WT(tmp, lower_32_bits(ring->gpu_addr));
+ tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH);
+ MMSCH_V4_0_INSERT_DIRECT_WT(tmp, upper_32_bits(ring->gpu_addr));
+ tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_SIZE);
+ MMSCH_V4_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4);
+
+ if (j <= 3) {
+ header.mjpegdec0[j].table_offset = item_offset;
+ header.mjpegdec0[j].init_status = 0;
+ header.mjpegdec0[j].table_size = table_size;
+ } else {
+ header.mjpegdec1[j - 4].table_offset = item_offset;
+ header.mjpegdec1[j - 4].init_status = 0;
+ header.mjpegdec1[j - 4].table_size = table_size;
+ }
+ header.total_size += table_size;
+ item_offset += table_size;
+ }
+
+ MMSCH_V4_0_INSERT_END();
+
+ /* send init table to MMSCH */
+ size = sizeof(struct mmsch_v4_0_3_init_header);
+ table_loc = (uint32_t *)table->cpu_addr;
+ memcpy((void *)table_loc, &header, size);
+
+ ctx_addr = table->gpu_addr;
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
+
+ tmp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID);
+ tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID, tmp);
+
+ size = header.total_size;
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_SIZE, size);
+
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP, 0);
+
+ param = 0x00000001;
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_HOST, param);
+ tmp = 0;
+ timeout = 1000;
+ resp = 0;
+ expected = MMSCH_VF_MAILBOX_RESP__OK;
+ init_status =
+ ((struct mmsch_v4_0_3_init_header *)(table_loc))->mjpegdec0[i].init_status;
+ while (resp != expected) {
+ resp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP);
+
+ if (resp != 0)
+ break;
+ udelay(10);
+ tmp = tmp + 10;
+ if (tmp >= timeout) {
+ DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
+ " waiting for regMMSCH_VF_MAILBOX_RESP "\
+ "(expected=0x%08x, readback=0x%08x)\n",
+ tmp, expected, resp);
+ return -EBUSY;
+ }
+ }
+ if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE &&
+ init_status != MMSCH_VF_ENGINE_STATUS__PASS)
+ DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init status for jpeg: %x\n",
+ resp, init_status);
+
+ }
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_3_hw_init - start and test JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ */
+static int jpeg_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, j, r, jpeg_inst;
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = jpeg_v4_0_3_start_sriov(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ ring->wptr = 0;
+ ring->wptr_old = 0;
+ jpeg_v4_0_3_dec_ring_set_wptr(ring);
+ ring->sched.ready = true;
+ }
+ }
+ } else {
+ /* This flag is not set for VF, assumed to be disabled always */
+ if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) &
+ 0x100)
+ adev->jpeg.caps |= AMDGPU_JPEG_CAPS(RRMT_ENABLED);
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_inst = GET_INST(JPEG, i);
+
+ ring = adev->jpeg.inst[i].ring_dec;
+
+ if (ring->use_doorbell)
+ adev->nbio.funcs->vcn_doorbell_range(
+ adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 9 * jpeg_inst,
+ adev->jpeg.inst[i].aid_id);
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ if (ring->use_doorbell)
+ WREG32_SOC15_OFFSET(
+ VCN, GET_INST(VCN, i),
+ regVCN_JPEG_DB_CTRL,
+ (ring->pipe ? (ring->pipe - 0x15) : 0),
+ ring->doorbell_index
+ << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+ VCN_JPEG_DB_CTRL__EN_MASK);
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_3_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret = 0;
+
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE)
+ ret = jpeg_v4_0_3_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
+ }
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG) && !amdgpu_sriov_vf(adev))
+ amdgpu_irq_put(adev, &adev->jpeg.inst->ras_poison_irq, 0);
+
+ return ret;
+}
+
+/**
+ * jpeg_v4_0_3_suspend - suspend JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v4_0_3_suspend(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = jpeg_v4_0_3_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(ip_block->adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v4_0_3_resume - resume JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v4_0_3_resume(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = amdgpu_jpeg_resume(ip_block->adev);
+ if (r)
+ return r;
+
+ r = jpeg_v4_0_3_hw_init(ip_block);
+
+ return r;
+}
+
+static void jpeg_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst_idx)
+{
+ int i, jpeg_inst;
+ uint32_t data;
+
+ jpeg_inst = GET_INST(JPEG, inst_idx);
+ data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) {
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data &= (~(JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK << 1));
+ } else {
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ }
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE);
+ data &= ~(JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK);
+ for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i)
+ data &= ~(JPEG_CGC_GATE__JPEG0_DEC_MASK << i);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE, data);
+}
+
+static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_idx)
+{
+ int i, jpeg_inst;
+ uint32_t data;
+
+ jpeg_inst = GET_INST(JPEG, inst_idx);
+ data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) {
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= (JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK << 1);
+ } else {
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ }
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE);
+ data |= (JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK);
+ for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i)
+ data |= (JPEG_CGC_GATE__JPEG0_DEC_MASK << i);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE, data);
+}
+
+static void jpeg_v4_0_3_start_inst(struct amdgpu_device *adev, int inst)
+{
+ int jpeg_inst = GET_INST(JPEG, inst);
+
+ WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG,
+ 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(JPEG, jpeg_inst, regUVD_PGFSM_STATUS,
+ UVD_PGFSM_STATUS__UVDJ_PWR_ON <<
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT,
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
+
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS),
+ 0, ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ /* JPEG disable CGC */
+ jpeg_v4_0_3_disable_clock_gating(adev, inst);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+}
+
+static void jpeg_v4_0_3_start_jrbc(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int jpeg_inst = GET_INST(JPEG, ring->me);
+ int reg_offset = jpeg_v4_0_3_core_reg_offset(ring->pipe);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe,
+ ~(JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe));
+
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_LMI_JRBC_RB_VMID,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_CNTL,
+ reg_offset,
+ (0x00000001L | 0x00000002L));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ reg_offset, lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ reg_offset, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_RPTR,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_WPTR,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_CNTL,
+ reg_offset, 0x00000002L);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_SIZE,
+ reg_offset, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15_OFFSET(JPEG, jpeg_inst, regUVD_JRBC0_UVD_JRBC_RB_WPTR,
+ reg_offset);
+}
+
+/**
+ * jpeg_v4_0_3_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v4_0_3_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_v4_0_3_start_inst(adev, i);
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ jpeg_v4_0_3_start_jrbc(ring);
+ }
+ }
+
+ return 0;
+}
+
+static void jpeg_v4_0_3_stop_inst(struct amdgpu_device *adev, int inst)
+{
+ int jpeg_inst = GET_INST(JPEG, inst);
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ jpeg_v4_0_3_enable_clock_gating(adev, inst);
+
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+}
+
+/**
+ * jpeg_v4_0_3_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v4_0_3_stop(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i)
+ jpeg_v4_0_3_stop_inst(adev, i);
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v4_0_3_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_RPTR,
+ jpeg_v4_0_3_core_reg_offset(ring->pipe));
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v4_0_3_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+
+ return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_WPTR,
+ jpeg_v4_0_3_core_reg_offset(ring->pipe));
+}
+
+void jpeg_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ /* JPEG engine access for HDP flush doesn't work when RRMT is enabled.
+ * This is a workaround to avoid any HDP flush through JPEG ring.
+ */
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_WPTR,
+ jpeg_v4_0_3_core_reg_offset(ring->pipe),
+ lower_32_bits(ring->wptr));
+ }
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_insert_start - insert a start command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a start command to the ring.
+ */
+void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring)
+{
+ if (!amdgpu_sriov_vf(ring->adev)) {
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */
+
+ amdgpu_ring_write(ring,
+ PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0,
+ 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x80004000);
+ }
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_insert_end - insert a end command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a end command to the ring.
+ */
+void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring)
+{
+ if (!amdgpu_sriov_vf(ring->adev)) {
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x62a04);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0,
+ 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x00004000);
+ }
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_emit_fence - emit an fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Write a fence and a trap command to the ring.
+ */
+void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned int flags)
+{
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x8);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
+ 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7));
+ amdgpu_ring_write(ring, 0);
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_emit_ib - execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @job: job to retrieve vmid from
+ * @ib: indirect buffer to execute
+ * @flags: unused
+ *
+ * Write ring commands to execute the indirect buffer.
+ */
+void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned int vmid = AMDGPU_JOB_GET_VMID(job);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+
+ if (ring->funcs->parse_cs)
+ amdgpu_ring_write(ring, 0);
+ else
+ amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8)));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8)));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_IB_SIZE_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, ib->length_dw);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x2);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_STATUS_INTERNAL_OFFSET,
+ 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3));
+ amdgpu_ring_write(ring, 0x2);
+}
+
+void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ uint32_t reg_offset;
+
+ /* Use normalized offsets if required */
+ if (jpeg_v4_0_3_normalizn_reqd(ring->adev))
+ reg = NORMALIZE_JPEG_REG_OFFSET(reg);
+
+ reg_offset = (reg << 2);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, val);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3));
+ } else {
+ amdgpu_ring_write(ring, reg_offset);
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE3));
+ }
+ amdgpu_ring_write(ring, mask);
+}
+
+void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t data0, data1, mask;
+
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for register write */
+ data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance;
+ data1 = lower_32_bits(pd_addr);
+ mask = 0xffffffff;
+ jpeg_v4_0_3_dec_ring_emit_reg_wait(ring, data0, data1, mask);
+}
+
+void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
+{
+ uint32_t reg_offset;
+
+ /* Use normalized offsets if required */
+ if (jpeg_v4_0_3_normalizn_reqd(ring->adev))
+ reg = NORMALIZE_JPEG_REG_OFFSET(reg);
+
+ reg_offset = (reg << 2);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0));
+ } else {
+ amdgpu_ring_write(ring, reg_offset);
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ }
+ amdgpu_ring_write(ring, val);
+}
+
+void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
+static bool jpeg_v4_0_3_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool ret = false;
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ret &= ((RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, i),
+ regUVD_JRBC0_UVD_JRBC_STATUS, jpeg_v4_0_3_core_reg_offset(j)) &
+ UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+ }
+ }
+
+ return ret;
+}
+
+static int jpeg_v4_0_3_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret = 0;
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ret &= (SOC15_WAIT_ON_RREG_OFFSET(JPEG, GET_INST(JPEG, i),
+ regUVD_JRBC0_UVD_JRBC_STATUS, jpeg_v4_0_3_core_reg_offset(j),
+ UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK));
+ }
+ }
+ return ret;
+}
+
+static int jpeg_v4_0_3_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (enable) {
+ if (!jpeg_v4_0_3_is_idle(ip_block))
+ return -EBUSY;
+ jpeg_v4_0_3_enable_clock_gating(adev, i);
+ } else {
+ jpeg_v4_0_3_disable_clock_gating(adev, i);
+ }
+ }
+ return 0;
+}
+
+static int jpeg_v4_0_3_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev)) {
+ adev->jpeg.cur_state = AMD_PG_STATE_UNGATE;
+ return 0;
+ }
+
+ if (state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v4_0_3_stop(adev);
+ else
+ ret = jpeg_v4_0_3_start(adev);
+
+ if (!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v4_0_3_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v4_0_3_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t i, inst;
+
+ i = node_id_to_phys_map[entry->node_id];
+ DRM_DEV_DEBUG(adev->dev, "IH: JPEG TRAP\n");
+
+ for (inst = 0; inst < adev->jpeg.num_jpeg_inst; ++inst)
+ if (adev->jpeg.inst[inst].aid_id == i)
+ break;
+
+ if (inst >= adev->jpeg.num_jpeg_inst) {
+ dev_WARN_ONCE(adev->dev, 1,
+ "Interrupt received for unknown JPEG instance %d",
+ entry->node_id);
+ return 0;
+ }
+
+ switch (entry->src_id) {
+ case VCN_4_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[0]);
+ break;
+ case VCN_4_0__SRCID__JPEG1_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[1]);
+ break;
+ case VCN_4_0__SRCID__JPEG2_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[2]);
+ break;
+ case VCN_4_0__SRCID__JPEG3_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[3]);
+ break;
+ case VCN_4_0__SRCID__JPEG4_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[4]);
+ break;
+ case VCN_4_0__SRCID__JPEG5_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[5]);
+ break;
+ case VCN_4_0__SRCID__JPEG6_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[6]);
+ break;
+ case VCN_4_0__SRCID__JPEG7_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[7]);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static void jpeg_v4_0_3_core_stall_reset(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int jpeg_inst = GET_INST(JPEG, ring->me);
+ int reg_offset = jpeg_v4_0_3_core_reg_offset(ring->pipe);
+
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_JMI_CLIENT_STALL,
+ reg_offset, 0x1F);
+ SOC15_WAIT_ON_RREG_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS,
+ reg_offset, 0x1F, 0x1F);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_JPEG_LMI_DROP,
+ reg_offset, 0x1F);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 1 << ring->pipe);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_JMI_CLIENT_STALL,
+ reg_offset, 0x00);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_JPEG_LMI_DROP,
+ reg_offset, 0x00);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 0x00);
+}
+
+static int jpeg_v4_0_3_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ if (amdgpu_sriov_vf(ring->adev))
+ return -EOPNOTSUPP;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ jpeg_v4_0_3_core_stall_reset(ring);
+ jpeg_v4_0_3_start_jrbc(ring);
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = {
+ .name = "jpeg_v4_0_3",
+ .early_init = jpeg_v4_0_3_early_init,
+ .sw_init = jpeg_v4_0_3_sw_init,
+ .sw_fini = jpeg_v4_0_3_sw_fini,
+ .hw_init = jpeg_v4_0_3_hw_init,
+ .hw_fini = jpeg_v4_0_3_hw_fini,
+ .suspend = jpeg_v4_0_3_suspend,
+ .resume = jpeg_v4_0_3_resume,
+ .is_idle = jpeg_v4_0_3_is_idle,
+ .wait_for_idle = jpeg_v4_0_3_wait_for_idle,
+ .set_clockgating_state = jpeg_v4_0_3_set_clockgating_state,
+ .set_powergating_state = jpeg_v4_0_3_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr,
+ .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr,
+ .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v4_0_3_dec_ring_emit_vm_flush */
+ 18 + 18 + /* jpeg_v4_0_3_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v4_0_3_dec_ring_emit_ib */
+ .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib,
+ .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush,
+ .emit_hdp_flush = jpeg_v4_0_3_ring_emit_hdp_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v4_0_3_dec_ring_nop,
+ .insert_start = jpeg_v4_0_3_dec_ring_insert_start,
+ .insert_end = jpeg_v4_0_3_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v4_0_3_ring_reset,
+};
+
+static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, j, jpeg_inst;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ adev->jpeg.inst[i].ring_dec[j].funcs = &jpeg_v4_0_3_dec_ring_vm_funcs;
+ adev->jpeg.inst[i].ring_dec[j].me = i;
+ adev->jpeg.inst[i].ring_dec[j].pipe = j;
+ }
+ jpeg_inst = GET_INST(JPEG, i);
+ adev->jpeg.inst[i].aid_id =
+ jpeg_inst / adev->jpeg.num_inst_per_aid;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v4_0_3_irq_funcs = {
+ .set = jpeg_v4_0_3_set_interrupt_state,
+ .process = jpeg_v4_0_3_process_interrupt,
+};
+
+static const struct amdgpu_irq_src_funcs jpeg_v4_0_3_ras_irq_funcs = {
+ .set = jpeg_v4_0_3_set_ras_interrupt_state,
+ .process = amdgpu_jpeg_process_poison_irq,
+};
+
+static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings;
+ }
+ adev->jpeg.inst->irq.funcs = &jpeg_v4_0_3_irq_funcs;
+
+ adev->jpeg.inst->ras_poison_irq.num_types = 1;
+ adev->jpeg.inst->ras_poison_irq.funcs = &jpeg_v4_0_3_ras_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 4,
+ .minor = 0,
+ .rev = 3,
+ .funcs = &jpeg_v4_0_3_ip_funcs,
+};
+
+static const struct amdgpu_ras_err_status_reg_entry jpeg_v4_0_3_ue_reg_list[] = {
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG0S, regVCN_UE_ERR_STATUS_HI_JPEG0S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG0S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG0D, regVCN_UE_ERR_STATUS_HI_JPEG0D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG0D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG1S, regVCN_UE_ERR_STATUS_HI_JPEG1S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG1S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG1D, regVCN_UE_ERR_STATUS_HI_JPEG1D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG1D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG2S, regVCN_UE_ERR_STATUS_HI_JPEG2S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG2S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG2D, regVCN_UE_ERR_STATUS_HI_JPEG2D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG2D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG3S, regVCN_UE_ERR_STATUS_HI_JPEG3S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG3S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG3D, regVCN_UE_ERR_STATUS_HI_JPEG3D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG3D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG4S, regVCN_UE_ERR_STATUS_HI_JPEG4S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG4S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG4D, regVCN_UE_ERR_STATUS_HI_JPEG4D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG4D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG5S, regVCN_UE_ERR_STATUS_HI_JPEG5S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG5S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG5D, regVCN_UE_ERR_STATUS_HI_JPEG5D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG5D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG6S, regVCN_UE_ERR_STATUS_HI_JPEG6S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG6S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG6D, regVCN_UE_ERR_STATUS_HI_JPEG6D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG6D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG7S, regVCN_UE_ERR_STATUS_HI_JPEG7S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG7S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG7D, regVCN_UE_ERR_STATUS_HI_JPEG7D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG7D"},
+};
+
+static void jpeg_v4_0_3_inst_query_ras_error_count(struct amdgpu_device *adev,
+ uint32_t jpeg_inst,
+ void *ras_err_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status;
+
+ /* jpeg v4_0_3 only support uncorrectable errors */
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ jpeg_v4_0_3_ue_reg_list,
+ ARRAY_SIZE(jpeg_v4_0_3_ue_reg_list),
+ NULL, 0, GET_INST(VCN, jpeg_inst),
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ &err_data->ue_count);
+}
+
+static void jpeg_v4_0_3_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_err_status)
+{
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) {
+ dev_warn(adev->dev, "JPEG RAS is not supported\n");
+ return;
+ }
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++)
+ jpeg_v4_0_3_inst_query_ras_error_count(adev, i, ras_err_status);
+}
+
+static void jpeg_v4_0_3_inst_reset_ras_error_count(struct amdgpu_device *adev,
+ uint32_t jpeg_inst)
+{
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ jpeg_v4_0_3_ue_reg_list,
+ ARRAY_SIZE(jpeg_v4_0_3_ue_reg_list),
+ GET_INST(VCN, jpeg_inst));
+}
+
+static void jpeg_v4_0_3_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) {
+ dev_warn(adev->dev, "JPEG RAS is not supported\n");
+ return;
+ }
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++)
+ jpeg_v4_0_3_inst_reset_ras_error_count(adev, i);
+}
+
+static uint32_t jpeg_v4_0_3_query_poison_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, uint32_t sub_block)
+{
+ uint32_t poison_stat = 0, reg_value = 0;
+
+ switch (sub_block) {
+ case AMDGPU_JPEG_V4_0_3_JPEG0:
+ reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG0_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG0_STATUS, POISONED_PF);
+ break;
+ case AMDGPU_JPEG_V4_0_3_JPEG1:
+ reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG1_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG1_STATUS, POISONED_PF);
+ break;
+ default:
+ break;
+ }
+
+ if (poison_stat)
+ dev_info(adev->dev, "Poison detected in JPEG%d sub_block%d\n",
+ instance, sub_block);
+
+ return poison_stat;
+}
+
+static bool jpeg_v4_0_3_query_ras_poison_status(struct amdgpu_device *adev)
+{
+ uint32_t inst = 0, sub = 0, poison_stat = 0;
+
+ for (inst = 0; inst < adev->jpeg.num_jpeg_inst; inst++)
+ for (sub = 0; sub < AMDGPU_JPEG_V4_0_3_MAX_SUB_BLOCK; sub++)
+ poison_stat +=
+ jpeg_v4_0_3_query_poison_by_instance(adev, inst, sub);
+
+ return !!poison_stat;
+}
+
+static const struct amdgpu_ras_block_hw_ops jpeg_v4_0_3_ras_hw_ops = {
+ .query_ras_error_count = jpeg_v4_0_3_query_ras_error_count,
+ .reset_ras_error_count = jpeg_v4_0_3_reset_ras_error_count,
+ .query_poison_status = jpeg_v4_0_3_query_ras_poison_status,
+};
+
+static int jpeg_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct aca_bank_info info;
+ u64 misc0;
+ int ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
+ 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+/* reference to smu driver if header file */
+static int jpeg_v4_0_3_err_codes[] = {
+ 16, 17, 18, 19, 20, 21, 22, 23, /* JPEG[0-7][S|D] */
+ 24, 25, 26, 27, 28, 29, 30, 31
+};
+
+static bool jpeg_v4_0_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ u32 instlo;
+
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+
+ if (instlo != mmSMNAID_AID0_MCA_SMU)
+ return false;
+
+ if (aca_bank_check_error_codes(handle->adev, bank,
+ jpeg_v4_0_3_err_codes,
+ ARRAY_SIZE(jpeg_v4_0_3_err_codes)))
+ return false;
+
+ return true;
+}
+
+static const struct aca_bank_ops jpeg_v4_0_3_aca_bank_ops = {
+ .aca_bank_parser = jpeg_v4_0_3_aca_bank_parser,
+ .aca_bank_is_valid = jpeg_v4_0_3_aca_bank_is_valid,
+};
+
+static const struct aca_info jpeg_v4_0_3_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK,
+ .bank_ops = &jpeg_v4_0_3_aca_bank_ops,
+};
+
+static int jpeg_v4_0_3_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ if (amdgpu_ras_is_supported(adev, ras_block->block) &&
+ adev->jpeg.inst->ras_poison_irq.funcs) {
+ r = amdgpu_irq_get(adev, &adev->jpeg.inst->ras_poison_irq, 0);
+ if (r)
+ goto late_fini;
+ }
+
+ r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__JPEG,
+ &jpeg_v4_0_3_aca_info, NULL);
+ if (r)
+ goto late_fini;
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+
+ return r;
+}
+
+static struct amdgpu_jpeg_ras jpeg_v4_0_3_ras = {
+ .ras_block = {
+ .hw_ops = &jpeg_v4_0_3_ras_hw_ops,
+ .ras_late_init = jpeg_v4_0_3_ras_late_init,
+ },
+};
+
+static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.ras = &jpeg_v4_0_3_ras;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h
new file mode 100644
index 000000000000..2e110d04af84
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V4_0_3_H__
+#define __JPEG_V4_0_3_H__
+
+#define regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET 0x1bfff
+#define regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET 0x404d
+#define regUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET 0x404e
+#define regUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET 0x404f
+#define regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ab
+#define regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40ac
+#define regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET 0x40a4
+#define regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET 0x40a6
+#define regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40b6
+#define regUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40b7
+#define regUVD_JRBC_IB_SIZE_INTERNAL_OFFSET 0x4082
+#define regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET 0x42d4
+#define regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x42d5
+#define regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET 0x4085
+#define regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084
+#define regUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089
+#define regUVD_JPEG_PITCH_INTERNAL_OFFSET 0x4043
+#define regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET 0x4094
+#define regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET 0x1bffe
+
+#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000
+
+enum amdgpu_jpeg_v4_0_3_sub_block {
+ AMDGPU_JPEG_V4_0_3_JPEG0 = 0,
+ AMDGPU_JPEG_V4_0_3_JPEG1,
+
+ AMDGPU_JPEG_V4_0_3_MAX_SUB_BLOCK,
+};
+
+extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block;
+
+void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags);
+void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned int flags);
+void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr);
+void jpeg_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring);
+void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count);
+void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring);
+void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring);
+void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
+void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask);
+
+#endif /* __JPEG_V4_0_3_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
new file mode 100644
index 000000000000..54fd9c800c40
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
@@ -0,0 +1,872 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "jpeg_v2_0.h"
+#include "jpeg_v4_0_5.h"
+#include "mmsch_v4_0.h"
+
+#include "vcn/vcn_4_0_5_offset.h"
+#include "vcn/vcn_4_0_5_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
+
+#define mmUVD_DPG_LMA_CTL regUVD_DPG_LMA_CTL
+#define mmUVD_DPG_LMA_CTL_BASE_IDX regUVD_DPG_LMA_CTL_BASE_IDX
+#define mmUVD_DPG_LMA_DATA regUVD_DPG_LMA_DATA
+#define mmUVD_DPG_LMA_DATA_BASE_IDX regUVD_DPG_LMA_DATA_BASE_IDX
+
+#define regUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
+#define regJPEG_DEC_GFX10_ADDR_CONFIG_INTERNAL_OFFSET 0x4026
+#define regJPEG_SYS_INT_EN_INTERNAL_OFFSET 0x4141
+#define regJPEG_CGC_CTRL_INTERNAL_OFFSET 0x4161
+#define regJPEG_CGC_GATE_INTERNAL_OFFSET 0x4160
+#define regUVD_NO_OP_INTERNAL_OFFSET 0x0029
+
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_4_0_5[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_CNTL),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_SIZE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH),
+};
+
+static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v4_0_5_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v4_0_5_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state);
+static void jpeg_v4_0_5_dec_ring_set_wptr(struct amdgpu_ring *ring);
+
+static int amdgpu_ih_clientid_jpeg[] = {
+ SOC15_IH_CLIENTID_VCN,
+ SOC15_IH_CLIENTID_VCN1
+};
+
+
+
+/**
+ * jpeg_v4_0_5_early_init - set function pointers
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v4_0_5_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
+ case IP_VERSION(4, 0, 5):
+ adev->jpeg.num_jpeg_inst = 1;
+ break;
+ case IP_VERSION(4, 0, 6):
+ adev->jpeg.num_jpeg_inst = 2;
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev,
+ "Failed to init vcn ip block(UVD_HWIP:0x%x)\n",
+ amdgpu_ip_version(adev, UVD_HWIP, 0));
+ return -EINVAL;
+ }
+
+ adev->jpeg.num_jpeg_rings = 1;
+
+ jpeg_v4_0_5_set_dec_ring_funcs(adev);
+ jpeg_v4_0_5_set_irq_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_5_sw_init - sw init for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int r, i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i],
+ VCN_4_0__SRCID__JPEG_DECODE, &adev->jpeg.inst[i].irq);
+ if (r)
+ return r;
+
+ /* JPEG DJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i],
+ VCN_4_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst[i].irq);
+ if (r)
+ return r;
+
+ /* JPEG EJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i],
+ VCN_4_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst[i].irq);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ ring = adev->jpeg.inst[i].ring_dec;
+ ring->use_doorbell = true;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8 * i;
+ sprintf(ring->name, "jpeg_dec_%d", i);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst[i].irq,
+ 0, AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst[i].external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, i, regUVD_JPEG_PITCH);
+ }
+
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_4_0_5, ARRAY_SIZE(jpeg_reg_list_4_0_5));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]);
+ if (!amdgpu_sriov_vf(adev))
+ adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_5_sw_fini - sw fini for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v4_0_5_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v4_0_5_hw_init - start and test JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ */
+static int jpeg_v4_0_5_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, r = 0;
+
+ // TODO: Enable ring test with DPG support
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) {
+ return 0;
+ }
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ ring = adev->jpeg.inst[i].ring_dec;
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_5_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v4_0_5_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(JPEG, i, regUVD_JRBC_STATUS))
+ jpeg_v4_0_5_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
+ }
+ }
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_5_suspend - suspend JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v4_0_5_suspend(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = jpeg_v4_0_5_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(ip_block->adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v4_0_5_resume - resume JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v4_0_5_resume(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = amdgpu_jpeg_resume(ip_block->adev);
+ if (r)
+ return r;
+
+ r = jpeg_v4_0_5_hw_init(ip_block);
+
+ return r;
+}
+
+static void jpeg_v4_0_5_disable_clock_gating(struct amdgpu_device *adev, int inst)
+{
+ uint32_t data = 0;
+
+ data = RREG32_SOC15(JPEG, inst, regJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) {
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data &= (~JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK);
+ } else {
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ }
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, inst, regJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, inst, regJPEG_CGC_GATE);
+ data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
+ | JPEG_CGC_GATE__JPEG2_DEC_MASK
+ | JPEG_CGC_GATE__JMCIF_MASK
+ | JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, inst, regJPEG_CGC_GATE, data);
+}
+
+static void jpeg_v4_0_5_enable_clock_gating(struct amdgpu_device *adev, int inst)
+{
+ uint32_t data = 0;
+
+ data = RREG32_SOC15(JPEG, inst, regJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) {
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK;
+ } else {
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ }
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, inst, regJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, inst, regJPEG_CGC_GATE);
+ data |= (JPEG_CGC_GATE__JPEG_DEC_MASK
+ |JPEG_CGC_GATE__JPEG2_DEC_MASK
+ |JPEG_CGC_GATE__JMCIF_MASK
+ |JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, inst, regJPEG_CGC_GATE, data);
+}
+
+static void jpeg_engine_4_0_5_dpg_clock_gating_mode(struct amdgpu_device *adev,
+ int inst_idx, uint8_t indirect)
+{
+ uint32_t data = 0;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG)
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data |= 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regJPEG_CGC_CTRL_INTERNAL_OFFSET, data, indirect);
+
+ data = 0;
+ WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regJPEG_CGC_GATE_INTERNAL_OFFSET,
+ data, indirect);
+}
+
+static int jpeg_v4_0_5_disable_static_power_gating(struct amdgpu_device *adev, int inst)
+{
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ WREG32(SOC15_REG_OFFSET(JPEG, inst, regUVD_IPX_DLDO_CONFIG),
+ 1 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(JPEG, inst, regUVD_IPX_DLDO_STATUS,
+ 0, UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK);
+ }
+
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, inst, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ /* keep the JPEG in static PG mode */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, inst, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK);
+
+ return 0;
+}
+
+static int jpeg_v4_0_5_enable_static_power_gating(struct amdgpu_device *adev, int inst)
+{
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, inst, regUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ WREG32(SOC15_REG_OFFSET(JPEG, inst, regUVD_IPX_DLDO_CONFIG),
+ 2 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(JPEG, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK);
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_5_start_dpg_mode - Jpeg start with dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ * @indirect: indirectly write sram
+ *
+ * Start JPEG block with dpg mode
+ */
+static void jpeg_v4_0_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+ struct amdgpu_ring *ring = adev->jpeg.inst[inst_idx].ring_dec;
+ uint32_t reg_data = 0;
+
+ /* enable anti hang mechanism */
+ reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS);
+ reg_data &= ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK;
+ reg_data |= 0x1;
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ WREG32(SOC15_REG_OFFSET(JPEG, inst_idx, regUVD_IPX_DLDO_CONFIG),
+ 2 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(JPEG, inst_idx, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK);
+ }
+
+ reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS);
+ reg_data |= UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK;
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data);
+
+ if (indirect)
+ adev->jpeg.inst[inst_idx].dpg_sram_curr_addr =
+ (uint32_t *)adev->jpeg.inst[inst_idx].dpg_sram_cpu_addr;
+
+ jpeg_engine_4_0_5_dpg_clock_gating_mode(adev, inst_idx, indirect);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regJPEG_DEC_GFX10_ADDR_CONFIG_INTERNAL_OFFSET,
+ adev->gfx.config.gb_addr_config, indirect);
+ /* enable System Interrupt for JRBC */
+ WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regJPEG_SYS_INT_EN_INTERNAL_OFFSET,
+ JPEG_SYS_INT_EN__DJRBC_MASK, indirect);
+
+ /* add nop to workaround PSP size check */
+ WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regUVD_NO_OP_INTERNAL_OFFSET, 0, indirect);
+
+ if (indirect)
+ amdgpu_jpeg_psp_update_sram(adev, inst_idx, 0);
+
+ WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * jpeg_v4_0_5_stop_dpg_mode - Jpeg stop with dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ *
+ * Stop JPEG block with dpg mode
+ */
+static void jpeg_v4_0_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+{
+ uint32_t reg_data = 0;
+
+ reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS);
+ reg_data &= ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK;
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data);
+
+}
+
+/**
+ * jpeg_v4_0_5_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v4_0_5_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int r, i;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, true);
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ ring = adev->jpeg.inst[i].ring_dec;
+ /* doorbell programming is done for every playback */
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i, i);
+
+ WREG32_SOC15(VCN, i, regVCN_JPEG_DB_CTRL,
+ ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+ VCN_JPEG_DB_CTRL__EN_MASK);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) {
+ jpeg_v4_0_5_start_dpg_mode(adev, i, adev->jpeg.indirect_sram);
+ continue;
+ }
+
+ /* disable power gating */
+ r = jpeg_v4_0_5_disable_static_power_gating(adev, i);
+ if (r)
+ return r;
+
+ /* JPEG disable CGC */
+ jpeg_v4_0_5_disable_clock_gating(adev, i);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, i, regJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, regJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC_MASK,
+ ~JPEG_SYS_INT_EN__DJRBC_MASK);
+
+ WREG32_SOC15(JPEG, i, regUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, i, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, i, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, i, regUVD_JRBC_RB_WPTR);
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_5_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v4_0_5_stop(struct amdgpu_device *adev)
+{
+ int r, i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) {
+ jpeg_v4_0_5_stop_dpg_mode(adev, i);
+ continue;
+ }
+
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ jpeg_v4_0_5_enable_clock_gating(adev, i);
+
+ /* enable power gating */
+ r = jpeg_v4_0_5_enable_static_power_gating(adev, i);
+ if (r)
+ return r;
+ }
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, false);
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_5_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v4_0_5_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(JPEG, ring->me, regUVD_JRBC_RB_RPTR);
+}
+
+/**
+ * jpeg_v4_0_5_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v4_0_5_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(JPEG, ring->me, regUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * jpeg_v4_0_5_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v4_0_5_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(JPEG, ring->me, regUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+static bool jpeg_v4_0_5_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, ret = 1;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ ret &= (((RREG32_SOC15(JPEG, i, regUVD_JRBC_STATUS) &
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK));
+ }
+ return ret;
+}
+
+static int jpeg_v4_0_5_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ return SOC15_WAIT_ON_RREG(JPEG, i, regUVD_JRBC_STATUS,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+ }
+
+ return 0;
+}
+
+static int jpeg_v4_0_5_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ if (enable) {
+ if (!jpeg_v4_0_5_is_idle(ip_block))
+ return -EBUSY;
+
+ jpeg_v4_0_5_enable_clock_gating(adev, i);
+ } else {
+ jpeg_v4_0_5_disable_clock_gating(adev, i);
+ }
+ }
+
+ return 0;
+}
+
+static int jpeg_v4_0_5_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev)) {
+ adev->jpeg.cur_state = AMD_PG_STATE_UNGATE;
+ return 0;
+ }
+
+ if (state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v4_0_5_stop(adev);
+ else
+ ret = jpeg_v4_0_5_start(adev);
+
+ if (!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v4_0_5_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t ip_instance;
+
+ DRM_DEBUG("IH: JPEG TRAP\n");
+
+ switch (entry->client_id) {
+ case SOC15_IH_CLIENTID_VCN:
+ ip_instance = 0;
+ break;
+ case SOC15_IH_CLIENTID_VCN1:
+ ip_instance = 1;
+ break;
+ default:
+ DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
+ return 0;
+ }
+
+ switch (entry->src_id) {
+ case VCN_4_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(adev->jpeg.inst[ip_instance].ring_dec);
+ break;
+ case VCN_4_0__SRCID_DJPEG0_POISON:
+ case VCN_4_0__SRCID_EJPEG0_POISON:
+ amdgpu_jpeg_process_poison_irq(adev, source, entry);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static int jpeg_v4_0_5_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ r = jpeg_v4_0_5_stop(ring->adev);
+ if (r)
+ return r;
+ r = jpeg_v4_0_5_start(ring->adev);
+ if (r)
+ return r;
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static const struct amd_ip_funcs jpeg_v4_0_5_ip_funcs = {
+ .name = "jpeg_v4_0_5",
+ .early_init = jpeg_v4_0_5_early_init,
+ .sw_init = jpeg_v4_0_5_sw_init,
+ .sw_fini = jpeg_v4_0_5_sw_fini,
+ .hw_init = jpeg_v4_0_5_hw_init,
+ .hw_fini = jpeg_v4_0_5_hw_fini,
+ .suspend = jpeg_v4_0_5_suspend,
+ .resume = jpeg_v4_0_5_resume,
+ .is_idle = jpeg_v4_0_5_is_idle,
+ .wait_for_idle = jpeg_v4_0_5_wait_for_idle,
+ .set_clockgating_state = jpeg_v4_0_5_set_clockgating_state,
+ .set_powergating_state = jpeg_v4_0_5_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .get_rptr = jpeg_v4_0_5_dec_ring_get_rptr,
+ .get_wptr = jpeg_v4_0_5_dec_ring_get_wptr,
+ .set_wptr = jpeg_v4_0_5_dec_ring_set_wptr,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v4_0_5_dec_ring_emit_vm_flush */
+ 18 + 18 + /* jpeg_v4_0_5_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v4_0_5_dec_ring_emit_ib */
+ .emit_ib = jpeg_v2_0_dec_ring_emit_ib,
+ .emit_fence = jpeg_v2_0_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v2_0_dec_ring_nop,
+ .insert_start = jpeg_v2_0_dec_ring_insert_start,
+ .insert_end = jpeg_v2_0_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v4_0_5_ring_reset,
+};
+
+static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ adev->jpeg.inst[i].ring_dec->funcs = &jpeg_v4_0_5_dec_ring_vm_funcs;
+ adev->jpeg.inst[i].ring_dec->me = i;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v4_0_5_irq_funcs = {
+ .process = jpeg_v4_0_5_process_interrupt,
+};
+
+static void jpeg_v4_0_5_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ adev->jpeg.inst[i].irq.num_types = 1;
+ adev->jpeg.inst[i].irq.funcs = &jpeg_v4_0_5_irq_funcs;
+ }
+}
+
+const struct amdgpu_ip_block_version jpeg_v4_0_5_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 4,
+ .minor = 0,
+ .rev = 5,
+ .funcs = &jpeg_v4_0_5_ip_funcs,
+};
+
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.h
new file mode 100644
index 000000000000..c5eee572079c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V4_0_5_H__
+#define __JPEG_V4_0_5_H__
+
+enum amdgpu_jpeg_v4_0_5_sub_block {
+ AMDGPU_JPEG_V4_0_5_JPEG0 = 0,
+
+ AMDGPU_JPEG_V4_0_5_MAX_SUB_BLOCK,
+};
+
+extern const struct amdgpu_ip_block_version jpeg_v4_0_5_ip_block;
+
+#endif /* __JPEG_V4_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
new file mode 100644
index 000000000000..46bf15dce2bd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
@@ -0,0 +1,733 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "jpeg_v2_0.h"
+#include "jpeg_v4_0_3.h"
+
+#include "vcn/vcn_5_0_0_offset.h"
+#include "vcn/vcn_5_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h"
+#include "jpeg_v5_0_0.h"
+
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_5_0[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_CNTL),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_SIZE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH),
+};
+
+static void jpeg_v5_0_0_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v5_0_0_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v5_0_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state);
+
+/**
+ * jpeg_v5_0_0_early_init - set function pointers
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v5_0_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->jpeg.num_jpeg_inst = 1;
+ adev->jpeg.num_jpeg_rings = 1;
+
+ jpeg_v5_0_0_set_dec_ring_funcs(adev);
+ jpeg_v5_0_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_0_sw_init - sw init for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int r;
+
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_5_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ ring = adev->jpeg.inst->ring_dec;
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+
+ sprintf(ring->name, "jpeg_dec");
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH);
+
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_5_0, ARRAY_SIZE(jpeg_reg_list_5_0));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]);
+ if (!amdgpu_sriov_vf(adev))
+ adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v5_0_0_sw_fini - sw fini for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v5_0_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v5_0_0_hw_init - start and test JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ */
+static int jpeg_v5_0_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
+ int r;
+
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
+
+ /* Skip ring test because pause DPG is not implemented. */
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG)
+ return 0;
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_0_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v5_0_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS))
+ jpeg_v5_0_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_0_suspend - suspend JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v5_0_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = jpeg_v5_0_0_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(ip_block->adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v5_0_0_resume - resume JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v5_0_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = amdgpu_jpeg_resume(ip_block->adev);
+ if (r)
+ return r;
+
+ r = jpeg_v5_0_0_hw_init(ip_block);
+
+ return r;
+}
+
+static void jpeg_v5_0_0_disable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data);
+
+ data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL);
+ data &= ~(JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK
+ | JPEG_CGC_CTRL__JPEG_ENC_MODE_MASK);
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data);
+}
+
+static void jpeg_v5_0_0_enable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL);
+
+ data |= 1 << JPEG_CGC_CTRL__JPEG0_DEC_MODE__SHIFT;
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE);
+ data |= (JPEG_CGC_GATE__JPEG0_DEC_MASK
+ |JPEG_CGC_GATE__JPEG_ENC_MASK
+ |JPEG_CGC_GATE__JMCIF_MASK
+ |JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data);
+}
+
+static int jpeg_v5_0_0_disable_power_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ data = 1 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(JPEG, 0, regUVD_IPX_DLDO_CONFIG, data);
+ SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_IPX_DLDO_STATUS, 0,
+ UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK);
+
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ return 0;
+}
+
+static int jpeg_v5_0_0_enable_power_gating(struct amdgpu_device *adev)
+{
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ WREG32(SOC15_REG_OFFSET(JPEG, 0, regUVD_IPX_DLDO_CONFIG),
+ 2 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK);
+ }
+
+ return 0;
+}
+
+static void jpeg_engine_5_0_0_dpg_clock_gating_mode(struct amdgpu_device *adev,
+ int inst_idx, uint8_t indirect)
+{
+ uint32_t data = 0;
+
+ // JPEG disable CGC
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG)
+ data = 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data = 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+
+ if (indirect) {
+ ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipJPEG_CGC_CTRL, data, indirect);
+
+ // Turn on All JPEG clocks
+ data = 0;
+ ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipJPEG_CGC_GATE, data, indirect);
+ } else {
+ WREG32_SOC24_JPEG_DPG_MODE(inst_idx, vcnipJPEG_CGC_CTRL, data, indirect);
+
+ // Turn on All JPEG clocks
+ data = 0;
+ WREG32_SOC24_JPEG_DPG_MODE(inst_idx, vcnipJPEG_CGC_GATE, data, indirect);
+ }
+}
+
+/**
+ * jpeg_v5_0_0_start_dpg_mode - Jpeg start with dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ * @indirect: indirectly write sram
+ *
+ * Start JPEG block with dpg mode
+ */
+static int jpeg_v5_0_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+ struct amdgpu_ring *ring = adev->jpeg.inst[inst_idx].ring_dec;
+ uint32_t reg_data = 0;
+
+ jpeg_v5_0_0_enable_power_gating(adev);
+
+ // enable dynamic power gating mode
+ reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS);
+ reg_data |= UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK;
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data);
+
+ if (indirect)
+ adev->jpeg.inst[inst_idx].dpg_sram_curr_addr =
+ (uint32_t *)adev->jpeg.inst[inst_idx].dpg_sram_cpu_addr;
+
+ jpeg_engine_5_0_0_dpg_clock_gating_mode(adev, inst_idx, indirect);
+
+ /* MJPEG global tiling registers */
+ if (indirect)
+ ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, indirect);
+ else
+ WREG32_SOC24_JPEG_DPG_MODE(inst_idx, vcnipJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, 1);
+
+ /* enable System Interrupt for JRBC */
+ if (indirect)
+ ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipJPEG_SYS_INT_EN,
+ JPEG_SYS_INT_EN__DJRBC0_MASK, indirect);
+ else
+ WREG32_SOC24_JPEG_DPG_MODE(inst_idx, vcnipJPEG_SYS_INT_EN,
+ JPEG_SYS_INT_EN__DJRBC0_MASK, 1);
+
+ if (indirect) {
+ /* add nop to workaround PSP size check */
+ ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipUVD_NO_OP, 0, indirect);
+
+ amdgpu_jpeg_psp_update_sram(adev, inst_idx, 0);
+ }
+
+ WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL,
+ ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+ VCN_JPEG_DB_CTRL__EN_MASK);
+
+ WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_WPTR);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_0_stop_dpg_mode - Jpeg stop with dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ *
+ * Stop JPEG block with dpg mode
+ */
+static void jpeg_v5_0_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+{
+ uint32_t reg_data = 0;
+
+ reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS);
+ reg_data &= ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK;
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data);
+}
+
+/**
+ * jpeg_v5_0_0_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v5_0_0_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
+ int r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, true);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) {
+ r = jpeg_v5_0_0_start_dpg_mode(adev, 0, adev->jpeg.indirect_sram);
+ return r;
+ }
+
+ /* disable power gating */
+ r = jpeg_v5_0_0_disable_power_gating(adev);
+ if (r)
+ return r;
+
+ /* JPEG disable CGC */
+ jpeg_v5_0_0_disable_clock_gating(adev);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC0_MASK,
+ ~JPEG_SYS_INT_EN__DJRBC0_MASK);
+
+ WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL,
+ ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+ VCN_JPEG_DB_CTRL__EN_MASK);
+
+ WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_0_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v5_0_0_stop(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) {
+ jpeg_v5_0_0_stop_dpg_mode(adev, 0);
+ } else {
+
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ jpeg_v5_0_0_enable_clock_gating(adev);
+
+ /* enable power gating */
+ r = jpeg_v5_0_0_enable_power_gating(adev);
+ if (r)
+ return r;
+ }
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, false);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_0_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v5_0_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR);
+}
+
+/**
+ * jpeg_v5_0_0_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v5_0_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * jpeg_v5_0_0_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v5_0_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+static bool jpeg_v5_0_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret = 1;
+
+ ret &= (((RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS) &
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK));
+
+ return ret;
+}
+
+static int jpeg_v5_0_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ return SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_JRBC_STATUS,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+}
+
+static int jpeg_v5_0_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
+
+ if (enable) {
+ if (!jpeg_v5_0_0_is_idle(ip_block))
+ return -EBUSY;
+ jpeg_v5_0_0_enable_clock_gating(adev);
+ } else {
+ jpeg_v5_0_0_disable_clock_gating(adev);
+ }
+
+ return 0;
+}
+
+static int jpeg_v5_0_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret;
+
+ if (state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v5_0_0_stop(adev);
+ else
+ ret = jpeg_v5_0_0_start(adev);
+
+ if (!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v5_0_0_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v5_0_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("IH: JPEG TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_5_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(adev->jpeg.inst->ring_dec);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static int jpeg_v5_0_0_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ r = jpeg_v5_0_0_stop(ring->adev);
+ if (r)
+ return r;
+ r = jpeg_v5_0_0_start(ring->adev);
+ if (r)
+ return r;
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static const struct amd_ip_funcs jpeg_v5_0_0_ip_funcs = {
+ .name = "jpeg_v5_0_0",
+ .early_init = jpeg_v5_0_0_early_init,
+ .sw_init = jpeg_v5_0_0_sw_init,
+ .sw_fini = jpeg_v5_0_0_sw_fini,
+ .hw_init = jpeg_v5_0_0_hw_init,
+ .hw_fini = jpeg_v5_0_0_hw_fini,
+ .suspend = jpeg_v5_0_0_suspend,
+ .resume = jpeg_v5_0_0_resume,
+ .is_idle = jpeg_v5_0_0_is_idle,
+ .wait_for_idle = jpeg_v5_0_0_wait_for_idle,
+ .set_clockgating_state = jpeg_v5_0_0_set_clockgating_state,
+ .set_powergating_state = jpeg_v5_0_0_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .get_rptr = jpeg_v5_0_0_dec_ring_get_rptr,
+ .get_wptr = jpeg_v5_0_0_dec_ring_get_wptr,
+ .set_wptr = jpeg_v5_0_0_dec_ring_set_wptr,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v5_0_0_dec_ring_emit_vm_flush */
+ 22 + 22 + /* jpeg_v5_0_0_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v5_0_0_dec_ring_emit_ib */
+ .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib,
+ .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v4_0_3_dec_ring_nop,
+ .insert_start = jpeg_v4_0_3_dec_ring_insert_start,
+ .insert_end = jpeg_v4_0_3_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v5_0_0_ring_reset,
+};
+
+static void jpeg_v5_0_0_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->ring_dec->funcs = &jpeg_v5_0_0_dec_ring_vm_funcs;
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v5_0_0_irq_funcs = {
+ .set = jpeg_v5_0_0_set_interrupt_state,
+ .process = jpeg_v5_0_0_process_interrupt,
+};
+
+static void jpeg_v5_0_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->irq.num_types = 1;
+ adev->jpeg.inst->irq.funcs = &jpeg_v5_0_0_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version jpeg_v5_0_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 5,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &jpeg_v5_0_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h
new file mode 100644
index 000000000000..5abb96159814
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V5_0_0_H__
+#define __JPEG_V5_0_0_H__
+
+#define vcnipJPEG_CGC_GATE 0x4160
+#define vcnipJPEG_CGC_CTRL 0x4161
+#define vcnipJPEG_SYS_INT_EN 0x4141
+#define vcnipUVD_NO_OP 0x0029
+#define vcnipJPEG_DEC_GFX10_ADDR_CONFIG 0x404A
+
+extern const struct amdgpu_ip_block_version jpeg_v5_0_0_ip_block;
+
+#endif /* __JPEG_V5_0_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c
new file mode 100644
index 000000000000..ab0bf880d3d8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c
@@ -0,0 +1,1101 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2014-2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "jpeg_v4_0_3.h"
+#include "jpeg_v5_0_1.h"
+#include "mmsch_v5_0.h"
+
+#include "vcn/vcn_5_0_0_offset.h"
+#include "vcn/vcn_5_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h"
+
+static int jpeg_v5_0_1_start_sriov(struct amdgpu_device *adev);
+static void jpeg_v5_0_1_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v5_0_1_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v5_0_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state);
+static void jpeg_v5_0_1_set_ras_funcs(struct amdgpu_device *adev);
+static void jpeg_v5_0_1_dec_ring_set_wptr(struct amdgpu_ring *ring);
+
+static int amdgpu_ih_srcid_jpeg[] = {
+ VCN_5_0__SRCID__JPEG_DECODE,
+ VCN_5_0__SRCID__JPEG1_DECODE,
+ VCN_5_0__SRCID__JPEG2_DECODE,
+ VCN_5_0__SRCID__JPEG3_DECODE,
+ VCN_5_0__SRCID__JPEG4_DECODE,
+ VCN_5_0__SRCID__JPEG5_DECODE,
+ VCN_5_0__SRCID__JPEG6_DECODE,
+ VCN_5_0__SRCID__JPEG7_DECODE,
+ VCN_5_0__SRCID__JPEG8_DECODE,
+ VCN_5_0__SRCID__JPEG9_DECODE,
+};
+
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_5_0_1[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC8_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC8_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC8_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC9_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC9_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC9_UVD_JRBC_STATUS),
+};
+
+static int jpeg_v5_0_1_core_reg_offset(u32 pipe)
+{
+ if (pipe <= AMDGPU_MAX_JPEG_RINGS_4_0_3)
+ return ((0x40 * pipe) - 0xc80);
+ else
+ return ((0x40 * pipe) - 0x440);
+}
+
+/**
+ * jpeg_v5_0_1_early_init - set function pointers
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v5_0_1_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (!adev->jpeg.num_jpeg_inst || adev->jpeg.num_jpeg_inst > AMDGPU_MAX_JPEG_INSTANCES)
+ return -ENOENT;
+
+ adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS;
+ jpeg_v5_0_1_set_dec_ring_funcs(adev);
+ jpeg_v5_0_1_set_irq_funcs(adev);
+ jpeg_v5_0_1_set_ras_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_1_sw_init - sw init for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, j, r, jpeg_inst;
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ amdgpu_ih_srcid_jpeg[j], &adev->jpeg.inst->irq);
+ if (r)
+ return r;
+ }
+ /* JPEG DJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_5_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq);
+ if (r)
+ return r;
+
+ /* JPEG EJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_5_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_inst = GET_INST(JPEG, i);
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ ring->use_doorbell = true;
+ ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id);
+ if (!amdgpu_sriov_vf(adev)) {
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 1 + j + 11 * jpeg_inst;
+ } else {
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 2 + j + 32 * jpeg_inst;
+ }
+ sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch[j] =
+ regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET;
+ adev->jpeg.inst[i].external.jpeg_pitch[j] =
+ SOC15_REG_OFFSET1(JPEG, jpeg_inst, regUVD_JRBC_SCRATCH0,
+ (j ? jpeg_v5_0_1_core_reg_offset(j) : 0));
+ }
+ }
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) {
+ r = amdgpu_jpeg_ras_sw_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to initialize jpeg ras block!\n");
+ return r;
+ }
+ }
+
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_5_0_1, ARRAY_SIZE(jpeg_reg_list_5_0_1));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]);
+ if (!amdgpu_sriov_vf(adev))
+ adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v5_0_1_sw_fini - sw fini for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v5_0_1_hw_init - start and test JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ */
+static int jpeg_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, j, r, jpeg_inst;
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = jpeg_v5_0_1_start_sriov(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ ring->wptr = 0;
+ ring->wptr_old = 0;
+ jpeg_v5_0_1_dec_ring_set_wptr(ring);
+ ring->sched.ready = true;
+ }
+ }
+ return 0;
+ }
+ if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100)
+ adev->jpeg.caps |= AMDGPU_JPEG_CAPS(RRMT_ENABLED);
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_inst = GET_INST(JPEG, i);
+ ring = adev->jpeg.inst[i].ring_dec;
+ if (ring->use_doorbell)
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 11 * jpeg_inst,
+ adev->jpeg.inst[i].aid_id);
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ if (ring->use_doorbell)
+ WREG32_SOC15_OFFSET(VCN, GET_INST(VCN, i), regVCN_JPEG_DB_CTRL,
+ ring->pipe,
+ ring->doorbell_index <<
+ VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+ VCN_JPEG_DB_CTRL__EN_MASK);
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_1_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v5_0_1_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret = 0;
+
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE)
+ ret = jpeg_v5_0_1_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
+ }
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG) && !amdgpu_sriov_vf(adev))
+ amdgpu_irq_put(adev, &adev->jpeg.inst->ras_poison_irq, 0);
+
+ return ret;
+}
+
+/**
+ * jpeg_v5_0_1_suspend - suspend JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v5_0_1_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = jpeg_v5_0_1_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v5_0_1_resume - resume JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v5_0_1_resume(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ r = jpeg_v5_0_1_hw_init(ip_block);
+
+ return r;
+}
+
+static void jpeg_v5_0_1_init_inst(struct amdgpu_device *adev, int i)
+{
+ int jpeg_inst = GET_INST(JPEG, i);
+
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ /* keep the JPEG in static PG mode */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+}
+
+static void jpeg_v5_0_1_deinit_inst(struct amdgpu_device *adev, int i)
+{
+ int jpeg_inst = GET_INST(JPEG, i);
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+}
+
+static void jpeg_v5_0_1_init_jrbc(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 reg, data, mask;
+ int jpeg_inst = GET_INST(JPEG, ring->me);
+ int reg_offset = ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0;
+
+ /* enable System Interrupt for JRBC */
+ reg = SOC15_REG_OFFSET(JPEG, jpeg_inst, regJPEG_SYS_INT_EN);
+ if (ring->pipe < AMDGPU_MAX_JPEG_RINGS_4_0_3) {
+ data = JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe;
+ mask = ~(JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe);
+ WREG32_P(reg, data, mask);
+ } else {
+ data = JPEG_SYS_INT_EN__DJRBC0_MASK << (ring->pipe+12);
+ mask = ~(JPEG_SYS_INT_EN__DJRBC0_MASK << (ring->pipe+12));
+ WREG32_P(reg, data, mask);
+ }
+
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_LMI_JRBC_RB_VMID,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_CNTL,
+ reg_offset,
+ (0x00000001L | 0x00000002L));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ reg_offset, lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ reg_offset, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_RPTR,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_WPTR,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_CNTL,
+ reg_offset, 0x00000002L);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_SIZE,
+ reg_offset, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15_OFFSET(JPEG, jpeg_inst, regUVD_JRBC_RB_WPTR,
+ reg_offset);
+}
+
+static int jpeg_v5_0_1_start_sriov(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ uint64_t ctx_addr;
+ uint32_t param, resp, expected;
+ uint32_t tmp, timeout;
+
+ struct amdgpu_mm_table *table = &adev->virt.mm_table;
+ uint32_t *table_loc;
+ uint32_t table_size;
+ uint32_t size, size_dw, item_offset;
+ uint32_t init_status;
+ int i, j, jpeg_inst;
+
+ struct mmsch_v5_0_cmd_direct_write
+ direct_wt = { {0} };
+ struct mmsch_v5_0_cmd_end end = { {0} };
+ struct mmsch_v5_0_init_header header;
+
+ direct_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_WRITE;
+ end.cmd_header.command_type =
+ MMSCH_COMMAND__END;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ jpeg_inst = GET_INST(JPEG, i);
+
+ memset(&header, 0, sizeof(struct mmsch_v5_0_init_header));
+ header.version = MMSCH_VERSION;
+ header.total_size = sizeof(struct mmsch_v5_0_init_header) >> 2;
+
+ table_loc = (uint32_t *)table->cpu_addr;
+ table_loc += header.total_size;
+
+ item_offset = header.total_size;
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ table_size = 0;
+
+ tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW);
+ MMSCH_V5_0_INSERT_DIRECT_WT(tmp, lower_32_bits(ring->gpu_addr));
+ tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH);
+ MMSCH_V5_0_INSERT_DIRECT_WT(tmp, upper_32_bits(ring->gpu_addr));
+ tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JRBC_RB_SIZE);
+ MMSCH_V5_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4);
+
+ if (j < 5) {
+ header.mjpegdec0[j].table_offset = item_offset;
+ header.mjpegdec0[j].init_status = 0;
+ header.mjpegdec0[j].table_size = table_size;
+ } else {
+ header.mjpegdec1[j - 5].table_offset = item_offset;
+ header.mjpegdec1[j - 5].init_status = 0;
+ header.mjpegdec1[j - 5].table_size = table_size;
+ }
+ header.total_size += table_size;
+ item_offset += table_size;
+ }
+
+ MMSCH_V5_0_INSERT_END();
+
+ /* send init table to MMSCH */
+ size = sizeof(struct mmsch_v5_0_init_header);
+ table_loc = (uint32_t *)table->cpu_addr;
+ memcpy((void *)table_loc, &header, size);
+
+ ctx_addr = table->gpu_addr;
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
+
+ tmp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID);
+ tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID, tmp);
+
+ size = header.total_size;
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_SIZE, size);
+
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP, 0);
+
+ param = 0x00000001;
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_HOST, param);
+ tmp = 0;
+ timeout = 1000;
+ resp = 0;
+ expected = MMSCH_VF_MAILBOX_RESP__OK;
+ init_status =
+ ((struct mmsch_v5_0_init_header *)(table_loc))->mjpegdec0[i].init_status;
+ while (resp != expected) {
+ resp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP);
+
+ if (resp != 0)
+ break;
+ udelay(10);
+ tmp = tmp + 10;
+ if (tmp >= timeout) {
+ DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
+ " waiting for regMMSCH_VF_MAILBOX_RESP "\
+ "(expected=0x%08x, readback=0x%08x)\n",
+ tmp, expected, resp);
+ return -EBUSY;
+ }
+ }
+ if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE &&
+ init_status != MMSCH_VF_ENGINE_STATUS__PASS)
+ DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init status for jpeg: %x\n",
+ resp, init_status);
+
+ }
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_1_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v5_0_1_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_v5_0_1_init_inst(adev, i);
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ jpeg_v5_0_1_init_jrbc(ring);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_1_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v5_0_1_stop(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i)
+ jpeg_v5_0_1_deinit_inst(adev, i);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_1_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v5_0_1_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC_RB_RPTR,
+ ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0);
+}
+
+/**
+ * jpeg_v5_0_1_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v5_0_1_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+
+ return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC_RB_WPTR,
+ ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0);
+}
+
+/**
+ * jpeg_v5_0_1_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v5_0_1_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me),
+ regUVD_JRBC_RB_WPTR,
+ (ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0),
+ lower_32_bits(ring->wptr));
+ }
+}
+
+static bool jpeg_v5_0_1_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool ret = false;
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ int reg_offset = (j ? jpeg_v5_0_1_core_reg_offset(j) : 0);
+
+ ret &= ((RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, i),
+ regUVD_JRBC_STATUS, reg_offset) &
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+ }
+ }
+
+ return ret;
+}
+
+static int jpeg_v5_0_1_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret = 0;
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ int reg_offset = (j ? jpeg_v5_0_1_core_reg_offset(j) : 0);
+
+ ret &= SOC15_WAIT_ON_RREG_OFFSET(JPEG, GET_INST(JPEG, i),
+ regUVD_JRBC_STATUS, reg_offset,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+ }
+ }
+ return ret;
+}
+
+static int jpeg_v5_0_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
+
+ int i;
+
+ if (!enable)
+ return 0;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (!jpeg_v5_0_1_is_idle(ip_block))
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+static int jpeg_v5_0_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev)) {
+ adev->jpeg.cur_state = AMD_PG_STATE_UNGATE;
+ return 0;
+ }
+
+ if (state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v5_0_1_stop(adev);
+ else
+ ret = jpeg_v5_0_1_start(adev);
+
+ if (!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v5_0_1_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v5_0_1_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+
+
+static int jpeg_v5_0_1_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u32 i, inst;
+
+ i = node_id_to_phys_map[entry->node_id];
+ DRM_DEV_DEBUG(adev->dev, "IH: JPEG TRAP\n");
+
+ for (inst = 0; inst < adev->jpeg.num_jpeg_inst; ++inst)
+ if (adev->jpeg.inst[inst].aid_id == i)
+ break;
+
+ if (inst >= adev->jpeg.num_jpeg_inst) {
+ dev_WARN_ONCE(adev->dev, 1,
+ "Interrupt received for unknown JPEG instance %d",
+ entry->node_id);
+ return 0;
+ }
+
+ switch (entry->src_id) {
+ case VCN_5_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[0]);
+ break;
+ case VCN_5_0__SRCID__JPEG1_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[1]);
+ break;
+ case VCN_5_0__SRCID__JPEG2_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[2]);
+ break;
+ case VCN_5_0__SRCID__JPEG3_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[3]);
+ break;
+ case VCN_5_0__SRCID__JPEG4_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[4]);
+ break;
+ case VCN_5_0__SRCID__JPEG5_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[5]);
+ break;
+ case VCN_5_0__SRCID__JPEG6_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[6]);
+ break;
+ case VCN_5_0__SRCID__JPEG7_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[7]);
+ break;
+ case VCN_5_0__SRCID__JPEG8_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[8]);
+ break;
+ case VCN_5_0__SRCID__JPEG9_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[9]);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static void jpeg_v5_0_1_core_stall_reset(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int jpeg_inst = GET_INST(JPEG, ring->me);
+ int reg_offset = ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0;
+
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_JMI_CLIENT_STALL,
+ reg_offset, 0x1F);
+ SOC15_WAIT_ON_RREG_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS,
+ reg_offset, 0x1F, 0x1F);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_JPEG_LMI_DROP,
+ reg_offset, 0x1F);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 1 << ring->pipe);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_JMI_CLIENT_STALL,
+ reg_offset, 0x00);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_JPEG_LMI_DROP,
+ reg_offset, 0x00);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 0x00);
+}
+
+static int jpeg_v5_0_1_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ jpeg_v5_0_1_core_stall_reset(ring);
+ jpeg_v5_0_1_init_jrbc(ring);
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static const struct amd_ip_funcs jpeg_v5_0_1_ip_funcs = {
+ .name = "jpeg_v5_0_1",
+ .early_init = jpeg_v5_0_1_early_init,
+ .late_init = NULL,
+ .sw_init = jpeg_v5_0_1_sw_init,
+ .sw_fini = jpeg_v5_0_1_sw_fini,
+ .hw_init = jpeg_v5_0_1_hw_init,
+ .hw_fini = jpeg_v5_0_1_hw_fini,
+ .suspend = jpeg_v5_0_1_suspend,
+ .resume = jpeg_v5_0_1_resume,
+ .is_idle = jpeg_v5_0_1_is_idle,
+ .wait_for_idle = jpeg_v5_0_1_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = jpeg_v5_0_1_set_clockgating_state,
+ .set_powergating_state = jpeg_v5_0_1_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v5_0_1_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .get_rptr = jpeg_v5_0_1_dec_ring_get_rptr,
+ .get_wptr = jpeg_v5_0_1_dec_ring_get_wptr,
+ .set_wptr = jpeg_v5_0_1_dec_ring_set_wptr,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v5_0_1_dec_ring_emit_vm_flush */
+ 22 + 22 + /* jpeg_v5_0_1_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v5_0_1_dec_ring_emit_ib */
+ .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib,
+ .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush,
+ .emit_hdp_flush = jpeg_v4_0_3_ring_emit_hdp_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v4_0_3_dec_ring_nop,
+ .insert_start = jpeg_v4_0_3_dec_ring_insert_start,
+ .insert_end = jpeg_v4_0_3_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v5_0_1_ring_reset,
+};
+
+static void jpeg_v5_0_1_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, j, jpeg_inst;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ adev->jpeg.inst[i].ring_dec[j].funcs = &jpeg_v5_0_1_dec_ring_vm_funcs;
+ adev->jpeg.inst[i].ring_dec[j].me = i;
+ adev->jpeg.inst[i].ring_dec[j].pipe = j;
+ }
+ jpeg_inst = GET_INST(JPEG, i);
+ adev->jpeg.inst[i].aid_id =
+ jpeg_inst / adev->jpeg.num_inst_per_aid;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v5_0_1_irq_funcs = {
+ .set = jpeg_v5_0_1_set_interrupt_state,
+ .process = jpeg_v5_0_1_process_interrupt,
+};
+
+static const struct amdgpu_irq_src_funcs jpeg_v5_0_1_ras_irq_funcs = {
+ .set = jpeg_v5_0_1_set_ras_interrupt_state,
+ .process = amdgpu_jpeg_process_poison_irq,
+};
+
+static void jpeg_v5_0_1_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i)
+ adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings;
+
+ adev->jpeg.inst->irq.funcs = &jpeg_v5_0_1_irq_funcs;
+
+ adev->jpeg.inst->ras_poison_irq.num_types = 1;
+ adev->jpeg.inst->ras_poison_irq.funcs = &jpeg_v5_0_1_ras_irq_funcs;
+
+}
+
+const struct amdgpu_ip_block_version jpeg_v5_0_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 5,
+ .minor = 0,
+ .rev = 1,
+ .funcs = &jpeg_v5_0_1_ip_funcs,
+};
+
+static uint32_t jpeg_v5_0_1_query_poison_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, uint32_t sub_block)
+{
+ uint32_t poison_stat = 0, reg_value = 0;
+
+ switch (sub_block) {
+ case AMDGPU_JPEG_V5_0_1_JPEG0:
+ reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG0_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG0_STATUS, POISONED_PF);
+ break;
+ case AMDGPU_JPEG_V5_0_1_JPEG1:
+ reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG1_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG1_STATUS, POISONED_PF);
+ break;
+ default:
+ break;
+ }
+
+ if (poison_stat)
+ dev_info(adev->dev, "Poison detected in JPEG%d sub_block%d\n",
+ instance, sub_block);
+
+ return poison_stat;
+}
+
+static bool jpeg_v5_0_1_query_ras_poison_status(struct amdgpu_device *adev)
+{
+ uint32_t inst = 0, sub = 0, poison_stat = 0;
+
+ for (inst = 0; inst < adev->jpeg.num_jpeg_inst; inst++)
+ for (sub = 0; sub < AMDGPU_JPEG_V5_0_1_MAX_SUB_BLOCK; sub++)
+ poison_stat +=
+ jpeg_v5_0_1_query_poison_by_instance(adev, inst, sub);
+
+ return !!poison_stat;
+}
+
+static const struct amdgpu_ras_block_hw_ops jpeg_v5_0_1_ras_hw_ops = {
+ .query_poison_status = jpeg_v5_0_1_query_ras_poison_status,
+};
+
+static int jpeg_v5_0_1_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct aca_bank_info info;
+ u64 misc0;
+ int ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
+ 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+/* reference to smu driver if header file */
+static int jpeg_v5_0_1_err_codes[] = {
+ 16, 17, 18, 19, 20, 21, 22, 23, /* JPEG[0-9][S|D] */
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 48, 49, 50, 51,
+};
+
+static bool jpeg_v5_0_1_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ u32 instlo;
+
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+
+ if (instlo != mmSMNAID_AID0_MCA_SMU)
+ return false;
+
+ if (aca_bank_check_error_codes(handle->adev, bank,
+ jpeg_v5_0_1_err_codes,
+ ARRAY_SIZE(jpeg_v5_0_1_err_codes)))
+ return false;
+
+ return true;
+}
+
+static const struct aca_bank_ops jpeg_v5_0_1_aca_bank_ops = {
+ .aca_bank_parser = jpeg_v5_0_1_aca_bank_parser,
+ .aca_bank_is_valid = jpeg_v5_0_1_aca_bank_is_valid,
+};
+
+static const struct aca_info jpeg_v5_0_1_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK,
+ .bank_ops = &jpeg_v5_0_1_aca_bank_ops,
+};
+
+static int jpeg_v5_0_1_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__JPEG,
+ &jpeg_v5_0_1_aca_info, NULL);
+ if (r)
+ goto late_fini;
+
+ if (amdgpu_ras_is_supported(adev, ras_block->block) &&
+ adev->jpeg.inst->ras_poison_irq.funcs) {
+ r = amdgpu_irq_get(adev, &adev->jpeg.inst->ras_poison_irq, 0);
+ if (r)
+ goto late_fini;
+ }
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+
+ return r;
+}
+
+static struct amdgpu_jpeg_ras jpeg_v5_0_1_ras = {
+ .ras_block = {
+ .hw_ops = &jpeg_v5_0_1_ras_hw_ops,
+ .ras_late_init = jpeg_v5_0_1_ras_late_init,
+ },
+};
+
+static void jpeg_v5_0_1_set_ras_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.ras = &jpeg_v5_0_1_ras;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h
new file mode 100644
index 000000000000..a7e58d5fb246
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V5_0_1_H__
+#define __JPEG_V5_0_1_H__
+
+extern const struct amdgpu_ip_block_version jpeg_v5_0_1_ip_block;
+
+#define regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET 0x4094
+#define regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET 0x1bffe
+
+#define regUVD_JRBC0_UVD_JRBC_RB_WPTR 0x0640
+#define regUVD_JRBC0_UVD_JRBC_RB_WPTR_BASE_IDX 1
+#define regUVD_JRBC0_UVD_JRBC_STATUS 0x0649
+#define regUVD_JRBC0_UVD_JRBC_STATUS_BASE_IDX 1
+#define regUVD_JRBC0_UVD_JRBC_RB_RPTR 0x064a
+#define regUVD_JRBC0_UVD_JRBC_RB_RPTR_BASE_IDX 1
+#define regUVD_JRBC1_UVD_JRBC_RB_WPTR 0x0000
+#define regUVD_JRBC1_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC1_UVD_JRBC_STATUS 0x0009
+#define regUVD_JRBC1_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC1_UVD_JRBC_RB_RPTR 0x000a
+#define regUVD_JRBC1_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC2_UVD_JRBC_RB_WPTR 0x0040
+#define regUVD_JRBC2_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC2_UVD_JRBC_STATUS 0x0049
+#define regUVD_JRBC2_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC2_UVD_JRBC_RB_RPTR 0x004a
+#define regUVD_JRBC2_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC3_UVD_JRBC_RB_WPTR 0x0080
+#define regUVD_JRBC3_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC3_UVD_JRBC_STATUS 0x0089
+#define regUVD_JRBC3_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC3_UVD_JRBC_RB_RPTR 0x008a
+#define regUVD_JRBC3_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC4_UVD_JRBC_RB_WPTR 0x00c0
+#define regUVD_JRBC4_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC4_UVD_JRBC_STATUS 0x00c9
+#define regUVD_JRBC4_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC4_UVD_JRBC_RB_RPTR 0x00ca
+#define regUVD_JRBC4_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC5_UVD_JRBC_RB_WPTR 0x0100
+#define regUVD_JRBC5_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC5_UVD_JRBC_STATUS 0x0109
+#define regUVD_JRBC5_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC5_UVD_JRBC_RB_RPTR 0x010a
+#define regUVD_JRBC5_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC6_UVD_JRBC_RB_WPTR 0x0140
+#define regUVD_JRBC6_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC6_UVD_JRBC_STATUS 0x0149
+#define regUVD_JRBC6_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC6_UVD_JRBC_RB_RPTR 0x014a
+#define regUVD_JRBC6_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC7_UVD_JRBC_RB_WPTR 0x0180
+#define regUVD_JRBC7_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC7_UVD_JRBC_STATUS 0x0189
+#define regUVD_JRBC7_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC7_UVD_JRBC_RB_RPTR 0x018a
+#define regUVD_JRBC7_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC8_UVD_JRBC_RB_WPTR 0x01c0
+#define regUVD_JRBC8_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC8_UVD_JRBC_STATUS 0x01c9
+#define regUVD_JRBC8_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC8_UVD_JRBC_RB_RPTR 0x01ca
+#define regUVD_JRBC8_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC9_UVD_JRBC_RB_WPTR 0x0440
+#define regUVD_JRBC9_UVD_JRBC_RB_WPTR_BASE_IDX 1
+#define regUVD_JRBC9_UVD_JRBC_STATUS 0x0449
+#define regUVD_JRBC9_UVD_JRBC_STATUS_BASE_IDX 1
+#define regUVD_JRBC9_UVD_JRBC_RB_RPTR 0x044a
+#define regUVD_JRBC9_UVD_JRBC_RB_RPTR_BASE_IDX 1
+#define regUVD_JMI0_JPEG_LMI_DROP 0x0663
+#define regUVD_JMI0_JPEG_LMI_DROP_BASE_IDX 1
+#define regUVD_JMI0_UVD_JMI_CLIENT_STALL 0x067a
+#define regUVD_JMI0_UVD_JMI_CLIENT_STALL_BASE_IDX 1
+#define regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS 0x067b
+#define regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS_BASE_IDX 1
+#define regJPEG_CORE_RST_CTRL 0x072e
+#define regJPEG_CORE_RST_CTRL_BASE_IDX 1
+
+#define regVCN_RRMT_CNTL 0x0940
+#define regVCN_RRMT_CNTL_BASE_IDX 1
+
+enum amdgpu_jpeg_v5_0_1_sub_block {
+ AMDGPU_JPEG_V5_0_1_JPEG0 = 0,
+ AMDGPU_JPEG_V5_0_1_JPEG1,
+
+ AMDGPU_JPEG_V5_0_1_MAX_SUB_BLOCK,
+};
+
+#endif /* __JPEG_V5_0_1_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c
new file mode 100644
index 000000000000..1a285b531881
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+#include "amdgpu.h"
+#include "lsdma_v6_0.h"
+#include "amdgpu_lsdma.h"
+
+#include "lsdma/lsdma_6_0_0_offset.h"
+#include "lsdma/lsdma_6_0_0_sh_mask.h"
+
+static int lsdma_v6_0_wait_pio_status(struct amdgpu_device *adev)
+{
+ return amdgpu_lsdma_wait_for(adev, SOC15_REG_OFFSET(LSDMA, 0, regLSDMA_PIO_STATUS),
+ LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK,
+ LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK);
+}
+
+static int lsdma_v6_0_copy_mem(struct amdgpu_device *adev,
+ uint64_t src_addr,
+ uint64_t dst_addr,
+ uint64_t size)
+{
+ int ret;
+ uint32_t tmp;
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_LO, lower_32_bits(src_addr));
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_HI, upper_32_bits(src_addr));
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_LO, lower_32_bits(dst_addr));
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_HI, upper_32_bits(dst_addr));
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONTROL, 0x0);
+
+ tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, BYTE_COUNT, size);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_LOCATION, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_LOCATION, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_ADDR_INC, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_ADDR_INC, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, OVERLAP_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 0);
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp);
+
+ ret = lsdma_v6_0_wait_pio_status(adev);
+ if (ret)
+ dev_err(adev->dev, "LSDMA PIO failed to copy memory!\n");
+
+ return ret;
+}
+
+static int lsdma_v6_0_fill_mem(struct amdgpu_device *adev,
+ uint64_t dst_addr,
+ uint32_t data,
+ uint64_t size)
+{
+ int ret;
+ uint32_t tmp;
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONSTFILL_DATA, data);
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_LO, lower_32_bits(dst_addr));
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_HI, upper_32_bits(dst_addr));
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONTROL, 0x0);
+
+ tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, BYTE_COUNT, size);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_LOCATION, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_LOCATION, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_ADDR_INC, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_ADDR_INC, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, OVERLAP_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 1);
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp);
+
+ ret = lsdma_v6_0_wait_pio_status(adev);
+ if (ret)
+ dev_err(adev->dev, "LSDMA PIO failed to fill memory!\n");
+
+ return ret;
+}
+
+static void lsdma_v6_0_update_memory_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL);
+ tmp = REG_SET_FIELD(tmp, LSDMA_MEM_POWER_CTRL, MEM_POWER_CTRL_EN, 0);
+ WREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL, tmp);
+
+ tmp = REG_SET_FIELD(tmp, LSDMA_MEM_POWER_CTRL, MEM_POWER_CTRL_EN, enable);
+ WREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL, tmp);
+}
+
+const struct amdgpu_lsdma_funcs lsdma_v6_0_funcs = {
+ .copy_mem = lsdma_v6_0_copy_mem,
+ .fill_mem = lsdma_v6_0_fill_mem,
+ .update_memory_power_gating = lsdma_v6_0_update_memory_power_gating
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.h b/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.h
new file mode 100644
index 000000000000..3ef79be1a9bf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __LSDMA_V6_0_H__
+#define __LSDMA_V6_0_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_lsdma_funcs lsdma_v6_0_funcs;
+
+#endif /* __LSDMA_V6_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.c
new file mode 100644
index 000000000000..396262044ea8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+#include "amdgpu.h"
+#include "lsdma_v7_0.h"
+#include "amdgpu_lsdma.h"
+
+#include "lsdma/lsdma_7_0_0_offset.h"
+#include "lsdma/lsdma_7_0_0_sh_mask.h"
+
+static int lsdma_v7_0_wait_pio_status(struct amdgpu_device *adev)
+{
+ return amdgpu_lsdma_wait_for(adev, SOC15_REG_OFFSET(LSDMA, 0, regLSDMA_PIO_STATUS),
+ LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK,
+ LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK);
+}
+
+static int lsdma_v7_0_copy_mem(struct amdgpu_device *adev,
+ uint64_t src_addr,
+ uint64_t dst_addr,
+ uint64_t size)
+{
+ int ret;
+ uint32_t tmp;
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_LO, lower_32_bits(src_addr));
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_HI, upper_32_bits(src_addr));
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_LO, lower_32_bits(dst_addr));
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_HI, upper_32_bits(dst_addr));
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONTROL, 0x0);
+
+ tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, BYTE_COUNT, size);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_LOCATION, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_LOCATION, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_ADDR_INC, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_ADDR_INC, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, OVERLAP_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 0);
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp);
+
+ ret = lsdma_v7_0_wait_pio_status(adev);
+ if (ret)
+ dev_err(adev->dev, "LSDMA PIO failed to copy memory!\n");
+
+ return ret;
+}
+
+static int lsdma_v7_0_fill_mem(struct amdgpu_device *adev,
+ uint64_t dst_addr,
+ uint32_t data,
+ uint64_t size)
+{
+ int ret;
+ uint32_t tmp;
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONSTFILL_DATA, data);
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_LO, lower_32_bits(dst_addr));
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_HI, upper_32_bits(dst_addr));
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONTROL, 0x0);
+
+ tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, BYTE_COUNT, size);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_LOCATION, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_LOCATION, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_ADDR_INC, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_ADDR_INC, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, OVERLAP_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 1);
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp);
+
+ ret = lsdma_v7_0_wait_pio_status(adev);
+ if (ret)
+ dev_err(adev->dev, "LSDMA PIO failed to fill memory!\n");
+
+ return ret;
+}
+
+static void lsdma_v7_0_update_memory_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL);
+ tmp = REG_SET_FIELD(tmp, LSDMA_MEM_POWER_CTRL, MEM_POWER_CTRL_EN, 0);
+ WREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL, tmp);
+
+ tmp = REG_SET_FIELD(tmp, LSDMA_MEM_POWER_CTRL, MEM_POWER_CTRL_EN, enable);
+ WREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL, tmp);
+}
+
+const struct amdgpu_lsdma_funcs lsdma_v7_0_funcs = {
+ .copy_mem = lsdma_v7_0_copy_mem,
+ .fill_mem = lsdma_v7_0_fill_mem,
+ .update_memory_power_gating = lsdma_v7_0_update_memory_power_gating
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.h b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.h
new file mode 100644
index 000000000000..52b4485cdd98
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __LSDMA_V7_0_H__
+#define __LSDMA_V7_0_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_lsdma_funcs lsdma_v7_0_funcs;
+
+#endif /* __LSDMA_V7_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
index 8f7107d392af..6dae4a2e2767 100644
--- a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
@@ -37,24 +37,30 @@ static void mca_v3_0_mp0_query_ras_error_count(struct amdgpu_device *adev,
ras_error_status);
}
-static int mca_v3_0_mp0_ras_late_init(struct amdgpu_device *adev)
+static int mca_v3_0_ras_block_match(struct amdgpu_ras_block_object *block_obj,
+ enum amdgpu_ras_block block, uint32_t sub_block_index)
{
- return amdgpu_mca_ras_late_init(adev, &adev->mca.mp0);
-}
+ if (!block_obj)
+ return -EINVAL;
-static void mca_v3_0_mp0_ras_fini(struct amdgpu_device *adev)
-{
- amdgpu_mca_ras_fini(adev, &adev->mca.mp0);
+ if ((block_obj->ras_comm.block == block) &&
+ (block_obj->ras_comm.sub_block_index == sub_block_index)) {
+ return 0;
+ }
+
+ return -EINVAL;
}
-const struct amdgpu_mca_ras_funcs mca_v3_0_mp0_ras_funcs = {
- .ras_late_init = mca_v3_0_mp0_ras_late_init,
- .ras_fini = mca_v3_0_mp0_ras_fini,
+static const struct amdgpu_ras_block_hw_ops mca_v3_0_mp0_hw_ops = {
.query_ras_error_count = mca_v3_0_mp0_query_ras_error_count,
.query_ras_error_address = NULL,
- .ras_block = AMDGPU_RAS_BLOCK__MCA,
- .ras_sub_block = AMDGPU_RAS_MCA_BLOCK__MP0,
- .sysfs_name = "mp0_err_count",
+};
+
+struct amdgpu_mca_ras_block mca_v3_0_mp0_ras = {
+ .ras_block = {
+ .hw_ops = &mca_v3_0_mp0_hw_ops,
+ .ras_block_match = mca_v3_0_ras_block_match,
+ },
};
static void mca_v3_0_mp1_query_ras_error_count(struct amdgpu_device *adev,
@@ -65,24 +71,16 @@ static void mca_v3_0_mp1_query_ras_error_count(struct amdgpu_device *adev,
ras_error_status);
}
-static int mca_v3_0_mp1_ras_late_init(struct amdgpu_device *adev)
-{
- return amdgpu_mca_ras_late_init(adev, &adev->mca.mp1);
-}
-
-static void mca_v3_0_mp1_ras_fini(struct amdgpu_device *adev)
-{
- amdgpu_mca_ras_fini(adev, &adev->mca.mp1);
-}
-
-const struct amdgpu_mca_ras_funcs mca_v3_0_mp1_ras_funcs = {
- .ras_late_init = mca_v3_0_mp1_ras_late_init,
- .ras_fini = mca_v3_0_mp1_ras_fini,
+static const struct amdgpu_ras_block_hw_ops mca_v3_0_mp1_hw_ops = {
.query_ras_error_count = mca_v3_0_mp1_query_ras_error_count,
.query_ras_error_address = NULL,
- .ras_block = AMDGPU_RAS_BLOCK__MCA,
- .ras_sub_block = AMDGPU_RAS_MCA_BLOCK__MP1,
- .sysfs_name = "mp1_err_count",
+};
+
+struct amdgpu_mca_ras_block mca_v3_0_mp1_ras = {
+ .ras_block = {
+ .hw_ops = &mca_v3_0_mp1_hw_ops,
+ .ras_block_match = mca_v3_0_ras_block_match,
+ },
};
static void mca_v3_0_mpio_query_ras_error_count(struct amdgpu_device *adev,
@@ -93,36 +91,14 @@ static void mca_v3_0_mpio_query_ras_error_count(struct amdgpu_device *adev,
ras_error_status);
}
-static int mca_v3_0_mpio_ras_late_init(struct amdgpu_device *adev)
-{
- return amdgpu_mca_ras_late_init(adev, &adev->mca.mpio);
-}
-
-static void mca_v3_0_mpio_ras_fini(struct amdgpu_device *adev)
-{
- amdgpu_mca_ras_fini(adev, &adev->mca.mpio);
-}
-
-const struct amdgpu_mca_ras_funcs mca_v3_0_mpio_ras_funcs = {
- .ras_late_init = mca_v3_0_mpio_ras_late_init,
- .ras_fini = mca_v3_0_mpio_ras_fini,
+static const struct amdgpu_ras_block_hw_ops mca_v3_0_mpio_hw_ops = {
.query_ras_error_count = mca_v3_0_mpio_query_ras_error_count,
.query_ras_error_address = NULL,
- .ras_block = AMDGPU_RAS_BLOCK__MCA,
- .ras_sub_block = AMDGPU_RAS_MCA_BLOCK__MPIO,
- .sysfs_name = "mpio_err_count",
};
-
-static void mca_v3_0_init(struct amdgpu_device *adev)
-{
- struct amdgpu_mca *mca = &adev->mca;
-
- mca->mp0.ras_funcs = &mca_v3_0_mp0_ras_funcs;
- mca->mp1.ras_funcs = &mca_v3_0_mp1_ras_funcs;
- mca->mpio.ras_funcs = &mca_v3_0_mpio_ras_funcs;
-}
-
-const struct amdgpu_mca_funcs mca_v3_0_funcs = {
- .init = mca_v3_0_init,
-}; \ No newline at end of file
+struct amdgpu_mca_ras_block mca_v3_0_mpio_ras = {
+ .ras_block = {
+ .hw_ops = &mca_v3_0_mpio_hw_ops,
+ .ras_block_match = mca_v3_0_ras_block_match,
+ },
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.h b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.h
index b899b86194c2..d3eaef0d7f2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.h
@@ -21,6 +21,8 @@
#ifndef __MCA_V3_0_H__
#define __MCA_V3_0_H__
-extern const struct amdgpu_mca_funcs mca_v3_0_funcs;
+extern struct amdgpu_mca_ras_block mca_v3_0_mp0_ras;
+extern struct amdgpu_mca_ras_block mca_v3_0_mp1_ras;
+extern struct amdgpu_mca_ras_block mca_v3_0_mpio_ras;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_api_def.h b/drivers/gpu/drm/amd/amdgpu/mes_api_def.h
deleted file mode 100644
index 3f4fca5fd1da..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/mes_api_def.h
+++ /dev/null
@@ -1,443 +0,0 @@
-/*
- * Copyright 2019 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef __MES_API_DEF_H__
-#define __MES_API_DEF_H__
-
-#pragma pack(push, 4)
-
-#define MES_API_VERSION 1
-
-/* Driver submits one API(cmd) as a single Frame and this command size is same
- * for all API to ease the debugging and parsing of ring buffer.
- */
-enum { API_FRAME_SIZE_IN_DWORDS = 64 };
-
-/* To avoid command in scheduler context to be overwritten whenenver mutilple
- * interrupts come in, this creates another queue.
- */
-enum { API_NUMBER_OF_COMMAND_MAX = 32 };
-
-enum MES_API_TYPE {
- MES_API_TYPE_SCHEDULER = 1,
- MES_API_TYPE_MAX
-};
-
-enum MES_SCH_API_OPCODE {
- MES_SCH_API_SET_HW_RSRC = 0,
- MES_SCH_API_SET_SCHEDULING_CONFIG = 1, /* agreegated db, quantums, etc */
- MES_SCH_API_ADD_QUEUE = 2,
- MES_SCH_API_REMOVE_QUEUE = 3,
- MES_SCH_API_PERFORM_YIELD = 4,
- MES_SCH_API_SET_GANG_PRIORITY_LEVEL = 5,
- MES_SCH_API_SUSPEND = 6,
- MES_SCH_API_RESUME = 7,
- MES_SCH_API_RESET = 8,
- MES_SCH_API_SET_LOG_BUFFER = 9,
- MES_SCH_API_CHANGE_GANG_PRORITY = 10,
- MES_SCH_API_QUERY_SCHEDULER_STATUS = 11,
- MES_SCH_API_PROGRAM_GDS = 12,
- MES_SCH_API_SET_DEBUG_VMID = 13,
- MES_SCH_API_MISC = 14,
- MES_SCH_API_MAX = 0xFF
-};
-
-union MES_API_HEADER {
- struct {
- uint32_t type : 4; /* 0 - Invalid; 1 - Scheduling; 2 - TBD */
- uint32_t opcode : 8;
- uint32_t dwsize : 8; /* including header */
- uint32_t reserved : 12;
- };
-
- uint32_t u32All;
-};
-
-enum MES_AMD_PRIORITY_LEVEL {
- AMD_PRIORITY_LEVEL_LOW = 0,
- AMD_PRIORITY_LEVEL_NORMAL = 1,
- AMD_PRIORITY_LEVEL_MEDIUM = 2,
- AMD_PRIORITY_LEVEL_HIGH = 3,
- AMD_PRIORITY_LEVEL_REALTIME = 4,
- AMD_PRIORITY_NUM_LEVELS
-};
-
-enum MES_QUEUE_TYPE {
- MES_QUEUE_TYPE_GFX,
- MES_QUEUE_TYPE_COMPUTE,
- MES_QUEUE_TYPE_SDMA,
- MES_QUEUE_TYPE_MAX,
-};
-
-struct MES_API_STATUS {
- uint64_t api_completion_fence_addr;
- uint64_t api_completion_fence_value;
-};
-
-enum { MAX_COMPUTE_PIPES = 8 };
-enum { MAX_GFX_PIPES = 2 };
-enum { MAX_SDMA_PIPES = 2 };
-
-enum { MAX_COMPUTE_HQD_PER_PIPE = 8 };
-enum { MAX_GFX_HQD_PER_PIPE = 8 };
-enum { MAX_SDMA_HQD_PER_PIPE = 10 };
-
-enum { MAX_QUEUES_IN_A_GANG = 8 };
-
-enum VM_HUB_TYPE {
- VM_HUB_TYPE_GC = 0,
- VM_HUB_TYPE_MM = 1,
- VM_HUB_TYPE_MAX,
-};
-
-enum { VMID_INVALID = 0xffff };
-
-enum { MAX_VMID_GCHUB = 16 };
-enum { MAX_VMID_MMHUB = 16 };
-
-enum MES_LOG_OPERATION {
- MES_LOG_OPERATION_CONTEXT_STATE_CHANGE = 0
-};
-
-enum MES_LOG_CONTEXT_STATE {
- MES_LOG_CONTEXT_STATE_IDLE = 0,
- MES_LOG_CONTEXT_STATE_RUNNING = 1,
- MES_LOG_CONTEXT_STATE_READY = 2,
- MES_LOG_CONTEXT_STATE_READY_STANDBY = 3,
-};
-
-struct MES_LOG_CONTEXT_STATE_CHANGE {
- void *h_context;
- enum MES_LOG_CONTEXT_STATE new_context_state;
-};
-
-struct MES_LOG_ENTRY_HEADER {
- uint32_t first_free_entry_index;
- uint32_t wraparound_count;
- uint64_t number_of_entries;
- uint64_t reserved[2];
-};
-
-struct MES_LOG_ENTRY_DATA {
- uint64_t gpu_time_stamp;
- uint32_t operation_type; /* operation_type is of MES_LOG_OPERATION type */
- uint32_t reserved_operation_type_bits;
- union {
- struct MES_LOG_CONTEXT_STATE_CHANGE context_state_change;
- uint64_t reserved_operation_data[2];
- };
-};
-
-struct MES_LOG_BUFFER {
- struct MES_LOG_ENTRY_HEADER header;
- struct MES_LOG_ENTRY_DATA entries[1];
-};
-
-union MESAPI_SET_HW_RESOURCES {
- struct {
- union MES_API_HEADER header;
- uint32_t vmid_mask_mmhub;
- uint32_t vmid_mask_gfxhub;
- uint32_t gds_size;
- uint32_t paging_vmid;
- uint32_t compute_hqd_mask[MAX_COMPUTE_PIPES];
- uint32_t gfx_hqd_mask[MAX_GFX_PIPES];
- uint32_t sdma_hqd_mask[MAX_SDMA_PIPES];
- uint32_t agreegated_doorbells[AMD_PRIORITY_NUM_LEVELS];
- uint64_t g_sch_ctx_gpu_mc_ptr;
- uint64_t query_status_fence_gpu_mc_ptr;
- struct MES_API_STATUS api_status;
- union {
- struct {
- uint32_t disable_reset : 1;
- uint32_t reserved : 31;
- };
- uint32_t uint32_t_all;
- };
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__ADD_QUEUE {
- struct {
- union MES_API_HEADER header;
- uint32_t process_id;
- uint64_t page_table_base_addr;
- uint64_t process_va_start;
- uint64_t process_va_end;
- uint64_t process_quantum;
- uint64_t process_context_addr;
- uint64_t gang_quantum;
- uint64_t gang_context_addr;
- uint32_t inprocess_gang_priority;
- enum MES_AMD_PRIORITY_LEVEL gang_global_priority_level;
- uint32_t doorbell_offset;
- uint64_t mqd_addr;
- uint64_t wptr_addr;
- enum MES_QUEUE_TYPE queue_type;
- uint32_t gds_base;
- uint32_t gds_size;
- uint32_t gws_base;
- uint32_t gws_size;
- uint32_t oa_mask;
-
- struct {
- uint32_t paging : 1;
- uint32_t debug_vmid : 4;
- uint32_t program_gds : 1;
- uint32_t is_gang_suspended : 1;
- uint32_t is_tmz_queue : 1;
- uint32_t reserved : 24;
- };
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__REMOVE_QUEUE {
- struct {
- union MES_API_HEADER header;
- uint32_t doorbell_offset;
- uint64_t gang_context_addr;
-
- struct {
- uint32_t unmap_legacy_gfx_queue : 1;
- uint32_t reserved : 31;
- };
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__SET_SCHEDULING_CONFIG {
- struct {
- union MES_API_HEADER header;
- /* Grace period when preempting another priority band for this
- * priority band. The value for idle priority band is ignored,
- * as it never preempts other bands.
- */
- uint64_t grace_period_other_levels[AMD_PRIORITY_NUM_LEVELS];
- /* Default quantum for scheduling across processes within
- * a priority band.
- */
- uint64_t process_quantum_for_level[AMD_PRIORITY_NUM_LEVELS];
- /* Default grace period for processes that preempt each other
- * within a priority band.
- */
- uint64_t process_grace_period_same_level[AMD_PRIORITY_NUM_LEVELS];
- /* For normal level this field specifies the target GPU
- * percentage in situations when it's starved by the high level.
- * Valid values are between 0 and 50, with the default being 10.
- */
- uint32_t normal_yield_percent;
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__PERFORM_YIELD {
- struct {
- union MES_API_HEADER header;
- uint32_t dummy;
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__CHANGE_GANG_PRIORITY_LEVEL {
- struct {
- union MES_API_HEADER header;
- uint32_t inprocess_gang_priority;
- enum MES_AMD_PRIORITY_LEVEL gang_global_priority_level;
- uint64_t gang_quantum;
- uint64_t gang_context_addr;
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__SUSPEND {
- struct {
- union MES_API_HEADER header;
- /* false - suspend all gangs; true - specific gang */
- struct {
- uint32_t suspend_all_gangs : 1;
- uint32_t reserved : 31;
- };
- /* gang_context_addr is valid only if suspend_all = false */
- uint64_t gang_context_addr;
-
- uint64_t suspend_fence_addr;
- uint32_t suspend_fence_value;
-
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__RESUME {
- struct {
- union MES_API_HEADER header;
- /* false - resume all gangs; true - specified gang */
- struct {
- uint32_t resume_all_gangs : 1;
- uint32_t reserved : 31;
- };
- /* valid only if resume_all_gangs = false */
- uint64_t gang_context_addr;
-
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__RESET {
- struct {
- union MES_API_HEADER header;
-
- struct {
- uint32_t reset_queue : 1;
- uint32_t reserved : 31;
- };
-
- uint64_t gang_context_addr;
- uint32_t doorbell_offset; /* valid only if reset_queue = true */
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__SET_LOGGING_BUFFER {
- struct {
- union MES_API_HEADER header;
- /* There are separate log buffers for each queue type */
- enum MES_QUEUE_TYPE log_type;
- /* Log buffer GPU Address */
- uint64_t logging_buffer_addr;
- /* number of entries in the log buffer */
- uint32_t number_of_entries;
- /* Entry index at which CPU interrupt needs to be signalled */
- uint32_t interrupt_entry;
-
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__QUERY_MES_STATUS {
- struct {
- union MES_API_HEADER header;
- bool mes_healthy; /* 0 - not healthy, 1 - healthy */
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__PROGRAM_GDS {
- struct {
- union MES_API_HEADER header;
- uint64_t process_context_addr;
- uint32_t gds_base;
- uint32_t gds_size;
- uint32_t gws_base;
- uint32_t gws_size;
- uint32_t oa_mask;
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__SET_DEBUG_VMID {
- struct {
- union MES_API_HEADER header;
- struct MES_API_STATUS api_status;
- union {
- struct {
- uint32_t use_gds : 1;
- uint32_t reserved : 31;
- } flags;
- uint32_t u32All;
- };
- uint32_t reserved;
- uint32_t debug_vmid;
- uint64_t process_context_addr;
- uint64_t page_table_base_addr;
- uint64_t process_va_start;
- uint64_t process_va_end;
- uint32_t gds_base;
- uint32_t gds_size;
- uint32_t gws_base;
- uint32_t gws_size;
- uint32_t oa_mask;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-enum MESAPI_MISC_OPCODE {
- MESAPI_MISC__MODIFY_REG,
- MESAPI_MISC__MAX,
-};
-
-enum MODIFY_REG_SUBCODE {
- MODIFY_REG__OVERWRITE,
- MODIFY_REG__RMW_OR,
- MODIFY_REG__RMW_AND,
- MODIFY_REG__MAX,
-};
-
-enum { MISC_DATA_MAX_SIZE_IN_DWORDS = 20 };
-
-union MESAPI__MISC {
- struct {
- union MES_API_HEADER header;
- enum MESAPI_MISC_OPCODE opcode;
- struct MES_API_STATUS api_status;
-
- union {
- struct {
- enum MODIFY_REG_SUBCODE subcode;
- uint32_t reg_offset;
- uint32_t reg_value;
- } modify_reg;
- uint32_t data[MISC_DATA_MAX_SIZE_IN_DWORDS];
- };
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-#pragma pack(pop)
-#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
new file mode 100644
index 000000000000..64cae89357b6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -0,0 +1,501 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <drm/drm_drv.h>
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
+
+#define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
+#define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
+
+static int
+mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo)
+{
+ int ret;
+
+ ret = amdgpu_bo_reserve(bo, true);
+ if (ret) {
+ DRM_ERROR("Failed to reserve bo. ret %d\n", ret);
+ goto err_reserve_bo_failed;
+ }
+
+ ret = amdgpu_ttm_alloc_gart(&bo->tbo);
+ if (ret) {
+ DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
+ goto err_map_bo_gart_failed;
+ }
+
+ amdgpu_bo_unreserve(bo);
+ bo = amdgpu_bo_ref(bo);
+
+ return 0;
+
+err_map_bo_gart_failed:
+ amdgpu_bo_unreserve(bo);
+err_reserve_bo_failed:
+ return ret;
+}
+
+static int
+mes_userq_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue,
+ uint64_t wptr)
+{
+ struct amdgpu_bo_va_mapping *wptr_mapping;
+ struct amdgpu_vm *wptr_vm;
+ struct amdgpu_userq_obj *wptr_obj = &queue->wptr_obj;
+ int ret;
+
+ wptr_vm = queue->vm;
+ ret = amdgpu_bo_reserve(wptr_vm->root.bo, false);
+ if (ret)
+ return ret;
+
+ wptr &= AMDGPU_GMC_HOLE_MASK;
+ wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT);
+ amdgpu_bo_unreserve(wptr_vm->root.bo);
+ if (!wptr_mapping) {
+ DRM_ERROR("Failed to lookup wptr bo\n");
+ return -EINVAL;
+ }
+
+ wptr_obj->obj = wptr_mapping->bo_va->base.bo;
+ if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) {
+ DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n");
+ return -EINVAL;
+ }
+
+ ret = mes_userq_map_gtt_bo_to_gart(wptr_obj->obj);
+ if (ret) {
+ DRM_ERROR("Failed to map wptr bo to GART\n");
+ return ret;
+ }
+
+ queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset_no_check(wptr_obj->obj);
+ return 0;
+}
+
+static int convert_to_mes_priority(int priority)
+{
+ switch (priority) {
+ case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_LOW:
+ default:
+ return AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
+ case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW:
+ return AMDGPU_MES_PRIORITY_LEVEL_LOW;
+ case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH:
+ return AMDGPU_MES_PRIORITY_LEVEL_MEDIUM;
+ case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH:
+ return AMDGPU_MES_PRIORITY_LEVEL_HIGH;
+ }
+}
+
+static int mes_userq_map(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+ struct amdgpu_mqd_prop *userq_props = queue->userq_prop;
+ struct mes_add_queue_input queue_input;
+ int r;
+
+ memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
+
+ queue_input.process_va_start = 0;
+ queue_input.process_va_end = adev->vm_manager.max_pfn - 1;
+
+ /* set process quantum to 10 ms and gang quantum to 1 ms as default */
+ queue_input.process_quantum = 100000;
+ queue_input.gang_quantum = 10000;
+ queue_input.paging = false;
+
+ queue_input.process_context_addr = ctx->gpu_addr;
+ queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
+ queue_input.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
+ queue_input.gang_global_priority_level = convert_to_mes_priority(queue->priority);
+
+ queue_input.process_id = queue->vm->pasid;
+ queue_input.queue_type = queue->queue_type;
+ queue_input.mqd_addr = queue->mqd.gpu_addr;
+ queue_input.wptr_addr = userq_props->wptr_gpu_addr;
+ queue_input.queue_size = userq_props->queue_size >> 2;
+ queue_input.doorbell_offset = userq_props->doorbell_index;
+ queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(queue->vm->root.bo);
+ queue_input.wptr_mc_addr = queue->wptr_obj.gpu_addr;
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r) {
+ DRM_ERROR("Failed to map queue in HW, err (%d)\n", r);
+ return r;
+ }
+
+ DRM_DEBUG_DRIVER("Queue (doorbell:%d) mapped successfully\n", userq_props->doorbell_index);
+ return 0;
+}
+
+static int mes_userq_unmap(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct mes_remove_queue_input queue_input;
+ struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+ int r;
+
+ memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
+ queue_input.doorbell_offset = queue->doorbell_index;
+ queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ DRM_ERROR("Failed to unmap queue in HW, err (%d)\n", r);
+ return r;
+}
+
+static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue,
+ struct drm_amdgpu_userq_in *mqd_user)
+{
+ struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+ int r, size;
+
+ /*
+ * The FW expects at least one page space allocated for
+ * process ctx and gang ctx each. Create an object
+ * for the same.
+ */
+ size = AMDGPU_USERQ_PROC_CTX_SZ + AMDGPU_USERQ_GANG_CTX_SZ;
+ r = amdgpu_userq_create_object(uq_mgr, ctx, size);
+ if (r) {
+ DRM_ERROR("Failed to allocate ctx space bo for userqueue, err:%d\n", r);
+ return r;
+ }
+
+ return 0;
+}
+
+static int mes_userq_detect_and_reset(struct amdgpu_device *adev,
+ int queue_type)
+{
+ int db_array_size = amdgpu_mes_get_hung_queue_db_array_size(adev);
+ struct mes_detect_and_reset_queue_input input;
+ struct amdgpu_usermode_queue *queue;
+ unsigned int hung_db_num = 0;
+ unsigned long queue_id;
+ u32 db_array[8];
+ bool found_hung_queue = false;
+ int r, i;
+
+ if (db_array_size > 8) {
+ dev_err(adev->dev, "DB array size (%d vs 8) too small\n",
+ db_array_size);
+ return -EINVAL;
+ }
+
+ memset(&input, 0x0, sizeof(struct mes_detect_and_reset_queue_input));
+
+ input.queue_type = queue_type;
+
+ amdgpu_mes_lock(&adev->mes);
+ r = amdgpu_mes_detect_and_reset_hung_queues(adev, queue_type, false,
+ &hung_db_num, db_array);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r) {
+ dev_err(adev->dev, "Failed to detect and reset queues, err (%d)\n", r);
+ } else if (hung_db_num) {
+ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+ if (queue->queue_type == queue_type) {
+ for (i = 0; i < hung_db_num; i++) {
+ if (queue->doorbell_index == db_array[i]) {
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ found_hung_queue = true;
+ atomic_inc(&adev->gpu_reset_counter);
+ amdgpu_userq_fence_driver_force_completion(queue);
+ drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE, NULL);
+ }
+ }
+ }
+ }
+ }
+
+ if (found_hung_queue) {
+ /* Resume scheduling after hang recovery */
+ r = amdgpu_mes_resume(adev);
+ }
+
+ return r;
+}
+
+static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
+ struct drm_amdgpu_userq_in *args_in,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_mqd *mqd_hw_default = &adev->mqds[queue->queue_type];
+ struct drm_amdgpu_userq_in *mqd_user = args_in;
+ struct amdgpu_mqd_prop *userq_props;
+ int r;
+
+ /* Structure to initialize MQD for userqueue using generic MQD init function */
+ userq_props = kzalloc(sizeof(struct amdgpu_mqd_prop), GFP_KERNEL);
+ if (!userq_props) {
+ DRM_ERROR("Failed to allocate memory for userq_props\n");
+ return -ENOMEM;
+ }
+
+ r = amdgpu_userq_create_object(uq_mgr, &queue->mqd, mqd_hw_default->mqd_size);
+ if (r) {
+ DRM_ERROR("Failed to create MQD object for userqueue\n");
+ goto free_props;
+ }
+
+ /* Initialize the MQD BO with user given values */
+ userq_props->wptr_gpu_addr = mqd_user->wptr_va;
+ userq_props->rptr_gpu_addr = mqd_user->rptr_va;
+ userq_props->queue_size = mqd_user->queue_size;
+ userq_props->hqd_base_gpu_addr = mqd_user->queue_va;
+ userq_props->mqd_gpu_addr = queue->mqd.gpu_addr;
+ userq_props->use_doorbell = true;
+ userq_props->doorbell_index = queue->doorbell_index;
+ userq_props->fence_address = queue->fence_drv->gpu_addr;
+
+ if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) {
+ struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd;
+
+ if (mqd_user->mqd_size != sizeof(*compute_mqd)) {
+ DRM_ERROR("Invalid compute IP MQD size\n");
+ r = -EINVAL;
+ goto free_mqd;
+ }
+
+ compute_mqd = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size);
+ if (IS_ERR(compute_mqd)) {
+ DRM_ERROR("Failed to read user MQD\n");
+ r = -ENOMEM;
+ goto free_mqd;
+ }
+
+ r = amdgpu_userq_input_va_validate(queue, compute_mqd->eop_va,
+ 2048);
+ if (r)
+ goto free_mqd;
+
+ userq_props->eop_gpu_addr = compute_mqd->eop_va;
+ userq_props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ userq_props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM;
+ userq_props->hqd_active = false;
+ userq_props->tmz_queue =
+ mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
+ kfree(compute_mqd);
+ } else if (queue->queue_type == AMDGPU_HW_IP_GFX) {
+ struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11;
+ struct amdgpu_gfx_shadow_info shadow_info;
+
+ if (adev->gfx.funcs->get_gfx_shadow_info) {
+ adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow_info, true);
+ } else {
+ r = -EINVAL;
+ goto free_mqd;
+ }
+
+ if (mqd_user->mqd_size != sizeof(*mqd_gfx_v11) || !mqd_user->mqd) {
+ DRM_ERROR("Invalid GFX MQD\n");
+ r = -EINVAL;
+ goto free_mqd;
+ }
+
+ mqd_gfx_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size);
+ if (IS_ERR(mqd_gfx_v11)) {
+ DRM_ERROR("Failed to read user MQD\n");
+ r = -ENOMEM;
+ goto free_mqd;
+ }
+
+ userq_props->shadow_addr = mqd_gfx_v11->shadow_va;
+ userq_props->csa_addr = mqd_gfx_v11->csa_va;
+ userq_props->tmz_queue =
+ mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
+
+ r = amdgpu_userq_input_va_validate(queue, mqd_gfx_v11->shadow_va,
+ shadow_info.shadow_size);
+ if (r)
+ goto free_mqd;
+ r = amdgpu_userq_input_va_validate(queue, mqd_gfx_v11->csa_va,
+ shadow_info.csa_size);
+ if (r)
+ goto free_mqd;
+
+ kfree(mqd_gfx_v11);
+ } else if (queue->queue_type == AMDGPU_HW_IP_DMA) {
+ struct drm_amdgpu_userq_mqd_sdma_gfx11 *mqd_sdma_v11;
+
+ if (mqd_user->mqd_size != sizeof(*mqd_sdma_v11) || !mqd_user->mqd) {
+ DRM_ERROR("Invalid SDMA MQD\n");
+ r = -EINVAL;
+ goto free_mqd;
+ }
+
+ mqd_sdma_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size);
+ if (IS_ERR(mqd_sdma_v11)) {
+ DRM_ERROR("Failed to read sdma user MQD\n");
+ r = -ENOMEM;
+ goto free_mqd;
+ }
+ r = amdgpu_userq_input_va_validate(queue, mqd_sdma_v11->csa_va,
+ 32);
+ if (r)
+ goto free_mqd;
+
+ userq_props->csa_addr = mqd_sdma_v11->csa_va;
+ kfree(mqd_sdma_v11);
+ }
+
+ queue->userq_prop = userq_props;
+
+ r = mqd_hw_default->init_mqd(adev, (void *)queue->mqd.cpu_ptr, userq_props);
+ if (r) {
+ DRM_ERROR("Failed to initialize MQD for userqueue\n");
+ goto free_mqd;
+ }
+
+ /* Create BO for FW operations */
+ r = mes_userq_create_ctx_space(uq_mgr, queue, mqd_user);
+ if (r) {
+ DRM_ERROR("Failed to allocate BO for userqueue (%d)", r);
+ goto free_mqd;
+ }
+
+ /* FW expects WPTR BOs to be mapped into GART */
+ r = mes_userq_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr);
+ if (r) {
+ DRM_ERROR("Failed to create WPTR mapping\n");
+ goto free_ctx;
+ }
+
+ return 0;
+
+free_ctx:
+ amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj);
+
+free_mqd:
+ amdgpu_userq_destroy_object(uq_mgr, &queue->mqd);
+
+free_props:
+ kfree(userq_props);
+
+ return r;
+}
+
+static void
+mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj);
+ kfree(queue->userq_prop);
+ amdgpu_userq_destroy_object(uq_mgr, &queue->mqd);
+}
+
+static int mes_userq_preempt(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct mes_suspend_gang_input queue_input;
+ struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+ signed long timeout = 2100000; /* 2100 ms */
+ u64 fence_gpu_addr;
+ u32 fence_offset;
+ u64 *fence_ptr;
+ int i, r;
+
+ if (queue->state != AMDGPU_USERQ_STATE_MAPPED)
+ return 0;
+ r = amdgpu_device_wb_get(adev, &fence_offset);
+ if (r)
+ return r;
+
+ fence_gpu_addr = adev->wb.gpu_addr + (fence_offset * 4);
+ fence_ptr = (u64 *)&adev->wb.wb[fence_offset];
+ *fence_ptr = 0;
+
+ memset(&queue_input, 0x0, sizeof(struct mes_suspend_gang_input));
+ queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
+ queue_input.suspend_fence_addr = fence_gpu_addr;
+ queue_input.suspend_fence_value = 1;
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->suspend_gang(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r) {
+ DRM_ERROR("Failed to suspend gang: %d\n", r);
+ goto out;
+ }
+
+ for (i = 0; i < timeout; i++) {
+ if (*fence_ptr == 1)
+ goto out;
+ udelay(1);
+ }
+ r = -ETIMEDOUT;
+
+out:
+ amdgpu_device_wb_free(adev, fence_offset);
+ return r;
+}
+
+static int mes_userq_restore(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct mes_resume_gang_input queue_input;
+ struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+ int r;
+
+ if (queue->state == AMDGPU_USERQ_STATE_HUNG)
+ return -EINVAL;
+ if (queue->state != AMDGPU_USERQ_STATE_PREEMPTED)
+ return 0;
+
+ memset(&queue_input, 0x0, sizeof(struct mes_resume_gang_input));
+ queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->resume_gang(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ dev_err(adev->dev, "Failed to resume queue, err (%d)\n", r);
+ return r;
+}
+
+const struct amdgpu_userq_funcs userq_mes_funcs = {
+ .mqd_create = mes_userq_mqd_create,
+ .mqd_destroy = mes_userq_mqd_destroy,
+ .unmap = mes_userq_unmap,
+ .map = mes_userq_map,
+ .detect_and_reset = mes_userq_detect_and_reset,
+ .preempt = mes_userq_preempt,
+ .restore = mes_userq_restore,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h
new file mode 100644
index 000000000000..090ae8897770
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef MES_USERQ_H
+#define MES_USERQ_H
+#include "amdgpu_userq.h"
+
+extern const struct amdgpu_userq_funcs userq_mes_funcs;
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
deleted file mode 100644
index a7ec4ac89da5..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
+++ /dev/null
@@ -1,1008 +0,0 @@
-/*
- * Copyright 2019 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include <linux/firmware.h>
-#include <linux/module.h>
-#include "amdgpu.h"
-#include "soc15_common.h"
-#include "nv.h"
-#include "gc/gc_10_1_0_offset.h"
-#include "gc/gc_10_1_0_sh_mask.h"
-#include "v10_structs.h"
-#include "mes_api_def.h"
-
-#define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid 0x2820
-#define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid_BASE_IDX 1
-
-MODULE_FIRMWARE("amdgpu/navi10_mes.bin");
-MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes.bin");
-
-static int mes_v10_1_hw_fini(void *handle);
-
-#define MES_EOP_SIZE 2048
-
-static void mes_v10_1_ring_set_wptr(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- if (ring->use_doorbell) {
- atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs],
- ring->wptr);
- WDOORBELL64(ring->doorbell_index, ring->wptr);
- } else {
- BUG();
- }
-}
-
-static u64 mes_v10_1_ring_get_rptr(struct amdgpu_ring *ring)
-{
- return ring->adev->wb.wb[ring->rptr_offs];
-}
-
-static u64 mes_v10_1_ring_get_wptr(struct amdgpu_ring *ring)
-{
- u64 wptr;
-
- if (ring->use_doorbell)
- wptr = atomic64_read((atomic64_t *)
- &ring->adev->wb.wb[ring->wptr_offs]);
- else
- BUG();
- return wptr;
-}
-
-static const struct amdgpu_ring_funcs mes_v10_1_ring_funcs = {
- .type = AMDGPU_RING_TYPE_MES,
- .align_mask = 1,
- .nop = 0,
- .support_64bit_ptrs = true,
- .get_rptr = mes_v10_1_ring_get_rptr,
- .get_wptr = mes_v10_1_ring_get_wptr,
- .set_wptr = mes_v10_1_ring_set_wptr,
- .insert_nop = amdgpu_ring_insert_nop,
-};
-
-static int mes_v10_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
- void *pkt, int size)
-{
- int ndw = size / 4;
- signed long r;
- union MESAPI__ADD_QUEUE *x_pkt = pkt;
- struct amdgpu_device *adev = mes->adev;
- struct amdgpu_ring *ring = &mes->ring;
-
- BUG_ON(size % 4 != 0);
-
- if (amdgpu_ring_alloc(ring, ndw))
- return -ENOMEM;
-
- amdgpu_ring_write_multiple(ring, pkt, ndw);
- amdgpu_ring_commit(ring);
-
- DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
-
- r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
- adev->usec_timeout);
- if (r < 1) {
- DRM_ERROR("MES failed to response msg=%d\n",
- x_pkt->header.opcode);
- return -ETIMEDOUT;
- }
-
- return 0;
-}
-
-static int convert_to_mes_queue_type(int queue_type)
-{
- if (queue_type == AMDGPU_RING_TYPE_GFX)
- return MES_QUEUE_TYPE_GFX;
- else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
- return MES_QUEUE_TYPE_COMPUTE;
- else if (queue_type == AMDGPU_RING_TYPE_SDMA)
- return MES_QUEUE_TYPE_SDMA;
- else
- BUG();
- return -1;
-}
-
-static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes,
- struct mes_add_queue_input *input)
-{
- struct amdgpu_device *adev = mes->adev;
- union MESAPI__ADD_QUEUE mes_add_queue_pkt;
-
- memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
-
- mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
- mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
- mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
-
- mes_add_queue_pkt.process_id = input->process_id;
- mes_add_queue_pkt.page_table_base_addr =
- input->page_table_base_addr - adev->gmc.vram_start;
- mes_add_queue_pkt.process_va_start = input->process_va_start;
- mes_add_queue_pkt.process_va_end = input->process_va_end;
- mes_add_queue_pkt.process_quantum = input->process_quantum;
- mes_add_queue_pkt.process_context_addr = input->process_context_addr;
- mes_add_queue_pkt.gang_quantum = input->gang_quantum;
- mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
- mes_add_queue_pkt.inprocess_gang_priority =
- input->inprocess_gang_priority;
- mes_add_queue_pkt.gang_global_priority_level =
- input->gang_global_priority_level;
- mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
- mes_add_queue_pkt.mqd_addr = input->mqd_addr;
- mes_add_queue_pkt.wptr_addr = input->wptr_addr;
- mes_add_queue_pkt.queue_type =
- convert_to_mes_queue_type(input->queue_type);
- mes_add_queue_pkt.paging = input->paging;
-
- mes_add_queue_pkt.api_status.api_completion_fence_addr =
- mes->ring.fence_drv.gpu_addr;
- mes_add_queue_pkt.api_status.api_completion_fence_value =
- ++mes->ring.fence_drv.sync_seq;
-
- return mes_v10_1_submit_pkt_and_poll_completion(mes,
- &mes_add_queue_pkt, sizeof(mes_add_queue_pkt));
-}
-
-static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes,
- struct mes_remove_queue_input *input)
-{
- union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
-
- memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
-
- mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
- mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
- mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
-
- mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
- mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
-
- mes_remove_queue_pkt.api_status.api_completion_fence_addr =
- mes->ring.fence_drv.gpu_addr;
- mes_remove_queue_pkt.api_status.api_completion_fence_value =
- ++mes->ring.fence_drv.sync_seq;
-
- return mes_v10_1_submit_pkt_and_poll_completion(mes,
- &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
-}
-
-static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes,
- struct mes_suspend_gang_input *input)
-{
- return 0;
-}
-
-static int mes_v10_1_resume_gang(struct amdgpu_mes *mes,
- struct mes_resume_gang_input *input)
-{
- return 0;
-}
-
-static int mes_v10_1_query_sched_status(struct amdgpu_mes *mes)
-{
- union MESAPI__QUERY_MES_STATUS mes_status_pkt;
-
- memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
-
- mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
- mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
- mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
-
- mes_status_pkt.api_status.api_completion_fence_addr =
- mes->ring.fence_drv.gpu_addr;
- mes_status_pkt.api_status.api_completion_fence_value =
- ++mes->ring.fence_drv.sync_seq;
-
- return mes_v10_1_submit_pkt_and_poll_completion(mes,
- &mes_status_pkt, sizeof(mes_status_pkt));
-}
-
-static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes)
-{
- int i;
- struct amdgpu_device *adev = mes->adev;
- union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
-
- memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
-
- mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
- mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
- mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
-
- mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
- mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
- mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
- mes_set_hw_res_pkt.paging_vmid = 0;
- mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr;
- mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
- mes->query_status_fence_gpu_addr;
-
- for (i = 0; i < MAX_COMPUTE_PIPES; i++)
- mes_set_hw_res_pkt.compute_hqd_mask[i] =
- mes->compute_hqd_mask[i];
-
- for (i = 0; i < MAX_GFX_PIPES; i++)
- mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i];
-
- for (i = 0; i < MAX_SDMA_PIPES; i++)
- mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
-
- for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
- mes_set_hw_res_pkt.agreegated_doorbells[i] =
- mes->agreegated_doorbells[i];
-
- mes_set_hw_res_pkt.api_status.api_completion_fence_addr =
- mes->ring.fence_drv.gpu_addr;
- mes_set_hw_res_pkt.api_status.api_completion_fence_value =
- ++mes->ring.fence_drv.sync_seq;
-
- return mes_v10_1_submit_pkt_and_poll_completion(mes,
- &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt));
-}
-
-static const struct amdgpu_mes_funcs mes_v10_1_funcs = {
- .add_hw_queue = mes_v10_1_add_hw_queue,
- .remove_hw_queue = mes_v10_1_remove_hw_queue,
- .suspend_gang = mes_v10_1_suspend_gang,
- .resume_gang = mes_v10_1_resume_gang,
-};
-
-static int mes_v10_1_init_microcode(struct amdgpu_device *adev)
-{
- const char *chip_name;
- char fw_name[30];
- int err;
- const struct mes_firmware_header_v1_0 *mes_hdr;
- struct amdgpu_firmware_info *info;
-
- switch (adev->asic_type) {
- case CHIP_NAVI10:
- chip_name = "navi10";
- break;
- case CHIP_SIENNA_CICHLID:
- chip_name = "sienna_cichlid";
- break;
- default:
- BUG();
- }
-
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin", chip_name);
- err = request_firmware(&adev->mes.fw, fw_name, adev->dev);
- if (err)
- return err;
-
- err = amdgpu_ucode_validate(adev->mes.fw);
- if (err) {
- release_firmware(adev->mes.fw);
- adev->mes.fw = NULL;
- return err;
- }
-
- mes_hdr = (const struct mes_firmware_header_v1_0 *)adev->mes.fw->data;
- adev->mes.ucode_fw_version = le32_to_cpu(mes_hdr->mes_ucode_version);
- adev->mes.ucode_fw_version =
- le32_to_cpu(mes_hdr->mes_ucode_data_version);
- adev->mes.uc_start_addr =
- le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |
- ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32);
- adev->mes.data_start_addr =
- le32_to_cpu(mes_hdr->mes_data_start_addr_lo) |
- ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32);
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MES];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MES;
- info->fw = adev->mes.fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes),
- PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MES_DATA];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MES_DATA;
- info->fw = adev->mes.fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes),
- PAGE_SIZE);
- }
-
- return 0;
-}
-
-static void mes_v10_1_free_microcode(struct amdgpu_device *adev)
-{
- release_firmware(adev->mes.fw);
- adev->mes.fw = NULL;
-}
-
-static int mes_v10_1_allocate_ucode_buffer(struct amdgpu_device *adev)
-{
- int r;
- const struct mes_firmware_header_v1_0 *mes_hdr;
- const __le32 *fw_data;
- unsigned fw_size;
-
- mes_hdr = (const struct mes_firmware_header_v1_0 *)
- adev->mes.fw->data;
-
- fw_data = (const __le32 *)(adev->mes.fw->data +
- le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
- fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
-
- r = amdgpu_bo_create_reserved(adev, fw_size,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
- &adev->mes.ucode_fw_obj,
- &adev->mes.ucode_fw_gpu_addr,
- (void **)&adev->mes.ucode_fw_ptr);
- if (r) {
- dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r);
- return r;
- }
-
- memcpy(adev->mes.ucode_fw_ptr, fw_data, fw_size);
-
- amdgpu_bo_kunmap(adev->mes.ucode_fw_obj);
- amdgpu_bo_unreserve(adev->mes.ucode_fw_obj);
-
- return 0;
-}
-
-static int mes_v10_1_allocate_ucode_data_buffer(struct amdgpu_device *adev)
-{
- int r;
- const struct mes_firmware_header_v1_0 *mes_hdr;
- const __le32 *fw_data;
- unsigned fw_size;
-
- mes_hdr = (const struct mes_firmware_header_v1_0 *)
- adev->mes.fw->data;
-
- fw_data = (const __le32 *)(adev->mes.fw->data +
- le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
- fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
-
- r = amdgpu_bo_create_reserved(adev, fw_size,
- 64 * 1024, AMDGPU_GEM_DOMAIN_GTT,
- &adev->mes.data_fw_obj,
- &adev->mes.data_fw_gpu_addr,
- (void **)&adev->mes.data_fw_ptr);
- if (r) {
- dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r);
- return r;
- }
-
- memcpy(adev->mes.data_fw_ptr, fw_data, fw_size);
-
- amdgpu_bo_kunmap(adev->mes.data_fw_obj);
- amdgpu_bo_unreserve(adev->mes.data_fw_obj);
-
- return 0;
-}
-
-static void mes_v10_1_free_ucode_buffers(struct amdgpu_device *adev)
-{
- amdgpu_bo_free_kernel(&adev->mes.data_fw_obj,
- &adev->mes.data_fw_gpu_addr,
- (void **)&adev->mes.data_fw_ptr);
-
- amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj,
- &adev->mes.ucode_fw_gpu_addr,
- (void **)&adev->mes.ucode_fw_ptr);
-}
-
-static void mes_v10_1_enable(struct amdgpu_device *adev, bool enable)
-{
- uint32_t data = 0;
-
- if (enable) {
- data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL);
- data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
- WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
-
- /* set ucode start address */
- WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START,
- (uint32_t)(adev->mes.uc_start_addr) >> 2);
-
- /* clear BYPASS_UNCACHED to avoid hangs after interrupt. */
- data = RREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL);
- data = REG_SET_FIELD(data, CP_MES_DC_OP_CNTL,
- BYPASS_UNCACHED, 0);
- WREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL, data);
-
- /* unhalt MES and activate pipe0 */
- data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
- WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
- } else {
- data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL);
- data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
- data = REG_SET_FIELD(data, CP_MES_CNTL,
- MES_INVALIDATE_ICACHE, 1);
- data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
- data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
- WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
- }
-}
-
-/* This function is for backdoor MES firmware */
-static int mes_v10_1_load_microcode(struct amdgpu_device *adev)
-{
- int r;
- uint32_t data;
-
- if (!adev->mes.fw)
- return -EINVAL;
-
- r = mes_v10_1_allocate_ucode_buffer(adev);
- if (r)
- return r;
-
- r = mes_v10_1_allocate_ucode_data_buffer(adev);
- if (r) {
- mes_v10_1_free_ucode_buffers(adev);
- return r;
- }
-
- mes_v10_1_enable(adev, false);
-
- WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_CNTL, 0);
-
- mutex_lock(&adev->srbm_mutex);
- /* me=3, pipe=0, queue=0 */
- nv_grbm_select(adev, 3, 0, 0, 0);
-
- /* set ucode start address */
- WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START,
- (uint32_t)(adev->mes.uc_start_addr) >> 2);
-
- /* set ucode fimrware address */
- WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_LO,
- lower_32_bits(adev->mes.ucode_fw_gpu_addr));
- WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_HI,
- upper_32_bits(adev->mes.ucode_fw_gpu_addr));
-
- /* set ucode instruction cache boundary to 2M-1 */
- WREG32_SOC15(GC, 0, mmCP_MES_MIBOUND_LO, 0x1FFFFF);
-
- /* set ucode data firmware address */
- WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_LO,
- lower_32_bits(adev->mes.data_fw_gpu_addr));
- WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_HI,
- upper_32_bits(adev->mes.data_fw_gpu_addr));
-
- /* Set 0x3FFFF (256K-1) to CP_MES_MDBOUND_LO */
- WREG32_SOC15(GC, 0, mmCP_MES_MDBOUND_LO, 0x3FFFF);
-
- /* invalidate ICACHE */
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid);
- break;
- default:
- data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
- break;
- }
- data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
- data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data);
- break;
- default:
- WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
- break;
- }
-
- /* prime the ICACHE. */
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid);
- break;
- default:
- data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
- break;
- }
- data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
- switch (adev->asic_type) {
- case CHIP_SIENNA_CICHLID:
- WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data);
- break;
- default:
- WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
- break;
- }
-
- nv_grbm_select(adev, 0, 0, 0, 0);
- mutex_unlock(&adev->srbm_mutex);
-
- return 0;
-}
-
-static int mes_v10_1_allocate_eop_buf(struct amdgpu_device *adev)
-{
- int r;
- u32 *eop;
-
- r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT,
- &adev->mes.eop_gpu_obj,
- &adev->mes.eop_gpu_addr,
- (void **)&eop);
- if (r) {
- dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
- return r;
- }
-
- memset(eop, 0, adev->mes.eop_gpu_obj->tbo.base.size);
-
- amdgpu_bo_kunmap(adev->mes.eop_gpu_obj);
- amdgpu_bo_unreserve(adev->mes.eop_gpu_obj);
-
- return 0;
-}
-
-static int mes_v10_1_allocate_mem_slots(struct amdgpu_device *adev)
-{
- int r;
-
- r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs);
- if (r) {
- dev_err(adev->dev,
- "(%d) mes sch_ctx_offs wb alloc failed\n", r);
- return r;
- }
- adev->mes.sch_ctx_gpu_addr =
- adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4);
- adev->mes.sch_ctx_ptr =
- (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs];
-
- r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs);
- if (r) {
- dev_err(adev->dev,
- "(%d) query_status_fence_offs wb alloc failed\n", r);
- return r;
- }
- adev->mes.query_status_fence_gpu_addr =
- adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4);
- adev->mes.query_status_fence_ptr =
- (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs];
-
- return 0;
-}
-
-static int mes_v10_1_mqd_init(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
- struct v10_compute_mqd *mqd = ring->mqd_ptr;
- uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
- uint32_t tmp;
-
- mqd->header = 0xC0310800;
- mqd->compute_pipelinestat_enable = 0x00000001;
- mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
- mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
- mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
- mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
- mqd->compute_misc_reserved = 0x00000003;
-
- eop_base_addr = ring->eop_gpu_addr >> 8;
- mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
- mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
-
- /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
- tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
- tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
- (order_base_2(MES_EOP_SIZE / 4) - 1));
-
- mqd->cp_hqd_eop_control = tmp;
-
- /* enable doorbell? */
- tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
-
- if (ring->use_doorbell) {
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_OFFSET, ring->doorbell_index);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_EN, 1);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_SOURCE, 0);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_HIT, 0);
- }
- else
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_EN, 0);
-
- mqd->cp_hqd_pq_doorbell_control = tmp;
-
- /* disable the queue if it's active */
- ring->wptr = 0;
- mqd->cp_hqd_dequeue_request = 0;
- mqd->cp_hqd_pq_rptr = 0;
- mqd->cp_hqd_pq_wptr_lo = 0;
- mqd->cp_hqd_pq_wptr_hi = 0;
-
- /* set the pointer to the MQD */
- mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
- mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
-
- /* set MQD vmid to 0 */
- tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
- tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
- mqd->cp_mqd_control = tmp;
-
- /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
- hqd_gpu_addr = ring->gpu_addr >> 8;
- mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
- mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
-
- /* set up the HQD, this is similar to CP_RB0_CNTL */
- tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
- (order_base_2(ring->ring_size / 4) - 1));
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
- ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
-#ifdef __BIG_ENDIAN
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
-#endif
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
- mqd->cp_hqd_pq_control = tmp;
-
- /* set the wb address whether it's enabled or not */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
- mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
- mqd->cp_hqd_pq_rptr_report_addr_hi =
- upper_32_bits(wb_gpu_addr) & 0xffff;
-
- /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
- mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
- mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
-
- tmp = 0;
- /* enable the doorbell if requested */
- if (ring->use_doorbell) {
- tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_OFFSET, ring->doorbell_index);
-
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_EN, 1);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_SOURCE, 0);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_HIT, 0);
- }
-
- mqd->cp_hqd_pq_doorbell_control = tmp;
-
- /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
- ring->wptr = 0;
- mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
-
- /* set the vmid for the queue */
- mqd->cp_hqd_vmid = 0;
-
- tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
- mqd->cp_hqd_persistent_state = tmp;
-
- /* set MIN_IB_AVAIL_SIZE */
- tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
- tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
- mqd->cp_hqd_ib_control = tmp;
-
- /* activate the queue */
- mqd->cp_hqd_active = 1;
- return 0;
-}
-
-static void mes_v10_1_queue_init_register(struct amdgpu_ring *ring)
-{
- struct v10_compute_mqd *mqd = ring->mqd_ptr;
- struct amdgpu_device *adev = ring->adev;
- uint32_t data = 0;
-
- mutex_lock(&adev->srbm_mutex);
- nv_grbm_select(adev, 3, 0, 0, 0);
-
- /* set CP_HQD_VMID.VMID = 0. */
- data = RREG32_SOC15(GC, 0, mmCP_HQD_VMID);
- data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
- WREG32_SOC15(GC, 0, mmCP_HQD_VMID, data);
-
- /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
- data = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
- data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_EN, 0);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data);
-
- /* set CP_MQD_BASE_ADDR/HI with the MQD base address */
- WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
- WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
-
- /* set CP_MQD_CONTROL.VMID=0 */
- data = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
- data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
- WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, 0);
-
- /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
-
- /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
- mqd->cp_hqd_pq_rptr_report_addr_lo);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
- mqd->cp_hqd_pq_rptr_report_addr_hi);
-
- /* set CP_HQD_PQ_CONTROL */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
-
- /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
- mqd->cp_hqd_pq_wptr_poll_addr_lo);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
- mqd->cp_hqd_pq_wptr_poll_addr_hi);
-
- /* set CP_HQD_PQ_DOORBELL_CONTROL */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
- mqd->cp_hqd_pq_doorbell_control);
-
- /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
- WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
-
- /* set CP_HQD_ACTIVE.ACTIVE=1 */
- WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
-
- nv_grbm_select(adev, 0, 0, 0, 0);
- mutex_unlock(&adev->srbm_mutex);
-}
-
-#if 0
-static int mes_v10_1_kiq_enable_queue(struct amdgpu_device *adev)
-{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
- int r;
-
- if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
- return -EINVAL;
-
- r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
- if (r) {
- DRM_ERROR("Failed to lock KIQ (%d).\n", r);
- return r;
- }
-
- kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring);
-
- r = amdgpu_ring_test_ring(kiq_ring);
- if (r) {
- DRM_ERROR("kfq enable failed\n");
- kiq_ring->sched.ready = false;
- }
- return r;
-}
-#endif
-
-static int mes_v10_1_queue_init(struct amdgpu_device *adev)
-{
- int r;
-
- r = mes_v10_1_mqd_init(&adev->mes.ring);
- if (r)
- return r;
-
-#if 0
- r = mes_v10_1_kiq_enable_queue(adev);
- if (r)
- return r;
-#else
- mes_v10_1_queue_init_register(&adev->mes.ring);
-#endif
-
- return 0;
-}
-
-static int mes_v10_1_ring_init(struct amdgpu_device *adev)
-{
- struct amdgpu_ring *ring;
-
- ring = &adev->mes.ring;
-
- ring->funcs = &mes_v10_1_ring_funcs;
-
- ring->me = 3;
- ring->pipe = 0;
- ring->queue = 0;
-
- ring->ring_obj = NULL;
- ring->use_doorbell = true;
- ring->doorbell_index = adev->doorbell_index.mes_ring << 1;
- ring->eop_gpu_addr = adev->mes.eop_gpu_addr;
- ring->no_scheduler = true;
- sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue);
-
- return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
- AMDGPU_RING_PRIO_DEFAULT, NULL);
-}
-
-static int mes_v10_1_mqd_sw_init(struct amdgpu_device *adev)
-{
- int r, mqd_size = sizeof(struct v10_compute_mqd);
- struct amdgpu_ring *ring = &adev->mes.ring;
-
- if (ring->mqd_obj)
- return 0;
-
- r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
- &ring->mqd_gpu_addr, &ring->mqd_ptr);
- if (r) {
- dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
- return r;
- }
-
- /* prepare MQD backup */
- adev->mes.mqd_backup = kmalloc(mqd_size, GFP_KERNEL);
- if (!adev->mes.mqd_backup)
- dev_warn(adev->dev,
- "no memory to create MQD backup for ring %s\n",
- ring->name);
-
- return 0;
-}
-
-static int mes_v10_1_sw_init(void *handle)
-{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- adev->mes.adev = adev;
- adev->mes.funcs = &mes_v10_1_funcs;
-
- r = mes_v10_1_init_microcode(adev);
- if (r)
- return r;
-
- r = mes_v10_1_allocate_eop_buf(adev);
- if (r)
- return r;
-
- r = mes_v10_1_mqd_sw_init(adev);
- if (r)
- return r;
-
- r = mes_v10_1_ring_init(adev);
- if (r)
- return r;
-
- r = mes_v10_1_allocate_mem_slots(adev);
- if (r)
- return r;
-
- return 0;
-}
-
-static int mes_v10_1_sw_fini(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
- amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
-
- kfree(adev->mes.mqd_backup);
-
- amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj,
- &adev->mes.ring.mqd_gpu_addr,
- &adev->mes.ring.mqd_ptr);
-
- amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj,
- &adev->mes.eop_gpu_addr,
- NULL);
-
- mes_v10_1_free_microcode(adev);
-
- return 0;
-}
-
-static int mes_v10_1_hw_init(void *handle)
-{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
- r = mes_v10_1_load_microcode(adev);
- if (r) {
- DRM_ERROR("failed to MES fw, r=%d\n", r);
- return r;
- }
- }
-
- mes_v10_1_enable(adev, true);
-
- r = mes_v10_1_queue_init(adev);
- if (r)
- goto failure;
-
- r = mes_v10_1_set_hw_resources(&adev->mes);
- if (r)
- goto failure;
-
- r = mes_v10_1_query_sched_status(&adev->mes);
- if (r) {
- DRM_ERROR("MES is busy\n");
- goto failure;
- }
-
- return 0;
-
-failure:
- mes_v10_1_hw_fini(adev);
- return r;
-}
-
-static int mes_v10_1_hw_fini(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- mes_v10_1_enable(adev, false);
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)
- mes_v10_1_free_ucode_buffers(adev);
-
- return 0;
-}
-
-static int mes_v10_1_suspend(void *handle)
-{
- return 0;
-}
-
-static int mes_v10_1_resume(void *handle)
-{
- return 0;
-}
-
-static const struct amd_ip_funcs mes_v10_1_ip_funcs = {
- .name = "mes_v10_1",
- .sw_init = mes_v10_1_sw_init,
- .sw_fini = mes_v10_1_sw_fini,
- .hw_init = mes_v10_1_hw_init,
- .hw_fini = mes_v10_1_hw_fini,
- .suspend = mes_v10_1_suspend,
- .resume = mes_v10_1_resume,
-};
-
-const struct amdgpu_ip_block_version mes_v10_1_ip_block = {
- .type = AMD_IP_BLOCK_TYPE_MES,
- .major = 10,
- .minor = 1,
- .rev = 0,
- .funcs = &mes_v10_1_ip_funcs,
-};
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.h b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.h
deleted file mode 100644
index 9afd6ddb01e9..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright 2019 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef __MES_V10_1_H__
-#define __MES_V10_1_H__
-
-extern const struct amdgpu_ip_block_version mes_v10_1_ip_block;
-
-#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
new file mode 100644
index 000000000000..3a52754b5cad
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -0,0 +1,1760 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include "amdgpu.h"
+#include "soc15_common.h"
+#include "soc21.h"
+#include "gfx_v11_0.h"
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "gc/gc_11_0_0_default.h"
+#include "v11_structs.h"
+#include "mes_v11_api_def.h"
+
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_mes1.bin");
+
+static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block);
+static int mes_v11_0_hw_fini(struct amdgpu_ip_block *ip_block);
+static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev);
+static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev);
+
+#define MES_EOP_SIZE 2048
+#define GFX_MES_DRAM_SIZE 0x80000
+#define MES11_HW_RESOURCE_1_SIZE (128 * AMDGPU_GPU_PAGE_SIZE)
+
+#define MES11_HUNG_DB_OFFSET_ARRAY_SIZE 8 /* [0:3] = db offset, [4:7] = hqd info */
+#define MES11_HUNG_HQD_INFO_OFFSET 4
+
+static void mes_v11_0_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ BUG();
+ }
+}
+
+static u64 mes_v11_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ return *ring->rptr_cpu_addr;
+}
+
+static u64 mes_v11_0_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ u64 wptr;
+
+ if (ring->use_doorbell)
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ else
+ BUG();
+ return wptr;
+}
+
+static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_MES,
+ .align_mask = 1,
+ .nop = 0,
+ .support_64bit_ptrs = true,
+ .get_rptr = mes_v11_0_ring_get_rptr,
+ .get_wptr = mes_v11_0_ring_get_wptr,
+ .set_wptr = mes_v11_0_ring_set_wptr,
+ .insert_nop = amdgpu_ring_insert_nop,
+};
+
+static const char *mes_v11_0_opcodes[] = {
+ "SET_HW_RSRC",
+ "SET_SCHEDULING_CONFIG",
+ "ADD_QUEUE",
+ "REMOVE_QUEUE",
+ "PERFORM_YIELD",
+ "SET_GANG_PRIORITY_LEVEL",
+ "SUSPEND",
+ "RESUME",
+ "RESET",
+ "SET_LOG_BUFFER",
+ "CHANGE_GANG_PRORITY",
+ "QUERY_SCHEDULER_STATUS",
+ "PROGRAM_GDS",
+ "SET_DEBUG_VMID",
+ "MISC",
+ "UPDATE_ROOT_PAGE_TABLE",
+ "AMD_LOG",
+ "unused",
+ "unused",
+ "SET_HW_RSRC_1",
+};
+
+static const char *mes_v11_0_misc_opcodes[] = {
+ "WRITE_REG",
+ "INV_GART",
+ "QUERY_STATUS",
+ "READ_REG",
+ "WAIT_REG_MEM",
+ "SET_SHADER_DEBUGGER",
+};
+
+static const char *mes_v11_0_get_op_string(union MESAPI__MISC *x_pkt)
+{
+ const char *op_str = NULL;
+
+ if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes))
+ op_str = mes_v11_0_opcodes[x_pkt->header.opcode];
+
+ return op_str;
+}
+
+static const char *mes_v11_0_get_misc_op_string(union MESAPI__MISC *x_pkt)
+{
+ const char *op_str = NULL;
+
+ if ((x_pkt->header.opcode == MES_SCH_API_MISC) &&
+ (x_pkt->opcode < ARRAY_SIZE(mes_v11_0_misc_opcodes)))
+ op_str = mes_v11_0_misc_opcodes[x_pkt->opcode];
+
+ return op_str;
+}
+
+static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
+ void *pkt, int size,
+ int api_status_off)
+{
+ union MESAPI__QUERY_MES_STATUS mes_status_pkt;
+ signed long timeout = 2100000; /* 2100 ms */
+ struct amdgpu_device *adev = mes->adev;
+ struct amdgpu_ring *ring = &mes->ring[0];
+ struct MES_API_STATUS *api_status;
+ union MESAPI__MISC *x_pkt = pkt;
+ const char *op_str, *misc_op_str;
+ unsigned long flags;
+ u64 status_gpu_addr;
+ u32 seq, status_offset;
+ u64 *status_ptr;
+ signed long r;
+ int ret;
+
+ if (x_pkt->header.opcode >= MES_SCH_API_MAX)
+ return -EINVAL;
+
+ if (amdgpu_emu_mode) {
+ timeout *= 100;
+ } else if (amdgpu_sriov_vf(adev)) {
+ /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */
+ timeout = 15 * 600 * 1000;
+ }
+
+ ret = amdgpu_device_wb_get(adev, &status_offset);
+ if (ret)
+ return ret;
+
+ status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4);
+ status_ptr = (u64 *)&adev->wb.wb[status_offset];
+ *status_ptr = 0;
+
+ spin_lock_irqsave(&mes->ring_lock[0], flags);
+ r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4);
+ if (r)
+ goto error_unlock_free;
+
+ seq = ++ring->fence_drv.sync_seq;
+ r = amdgpu_fence_wait_polling(ring,
+ seq - ring->fence_drv.num_fences_mask,
+ timeout);
+ if (r < 1)
+ goto error_undo;
+
+ api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
+ api_status->api_completion_fence_addr = status_gpu_addr;
+ api_status->api_completion_fence_value = 1;
+
+ amdgpu_ring_write_multiple(ring, pkt, size / 4);
+
+ memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
+ mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
+ mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+ mes_status_pkt.api_status.api_completion_fence_addr =
+ ring->fence_drv.gpu_addr;
+ mes_status_pkt.api_status.api_completion_fence_value = seq;
+
+ amdgpu_ring_write_multiple(ring, &mes_status_pkt,
+ sizeof(mes_status_pkt) / 4);
+
+ amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(&mes->ring_lock[0], flags);
+
+ op_str = mes_v11_0_get_op_string(x_pkt);
+ misc_op_str = mes_v11_0_get_misc_op_string(x_pkt);
+
+ if (misc_op_str)
+ dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str,
+ misc_op_str);
+ else if (op_str)
+ dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str);
+ else
+ dev_dbg(adev->dev, "MES msg=%d was emitted\n",
+ x_pkt->header.opcode);
+
+ r = amdgpu_fence_wait_polling(ring, seq, timeout);
+ if (r < 1 || !*status_ptr) {
+
+ if (misc_op_str)
+ dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n",
+ op_str, misc_op_str);
+ else if (op_str)
+ dev_err(adev->dev, "MES failed to respond to msg=%s\n",
+ op_str);
+ else
+ dev_err(adev->dev, "MES failed to respond to msg=%d\n",
+ x_pkt->header.opcode);
+
+ while (halt_if_hws_hang)
+ schedule();
+
+ r = -ETIMEDOUT;
+ goto error_wb_free;
+ }
+
+ amdgpu_device_wb_free(adev, status_offset);
+ return 0;
+
+error_undo:
+ dev_err(adev->dev, "MES ring buffer is full.\n");
+ amdgpu_ring_undo(ring);
+
+error_unlock_free:
+ spin_unlock_irqrestore(&mes->ring_lock[0], flags);
+
+error_wb_free:
+ amdgpu_device_wb_free(adev, status_offset);
+ return r;
+}
+
+static int convert_to_mes_queue_type(int queue_type)
+{
+ if (queue_type == AMDGPU_RING_TYPE_GFX)
+ return MES_QUEUE_TYPE_GFX;
+ else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
+ return MES_QUEUE_TYPE_COMPUTE;
+ else if (queue_type == AMDGPU_RING_TYPE_SDMA)
+ return MES_QUEUE_TYPE_SDMA;
+ else
+ BUG();
+ return -1;
+}
+
+static int convert_to_mes_priority_level(int priority_level)
+{
+ switch (priority_level) {
+ case AMDGPU_MES_PRIORITY_LEVEL_LOW:
+ return AMD_PRIORITY_LEVEL_LOW;
+ case AMDGPU_MES_PRIORITY_LEVEL_NORMAL:
+ default:
+ return AMD_PRIORITY_LEVEL_NORMAL;
+ case AMDGPU_MES_PRIORITY_LEVEL_MEDIUM:
+ return AMD_PRIORITY_LEVEL_MEDIUM;
+ case AMDGPU_MES_PRIORITY_LEVEL_HIGH:
+ return AMD_PRIORITY_LEVEL_HIGH;
+ case AMDGPU_MES_PRIORITY_LEVEL_REALTIME:
+ return AMD_PRIORITY_LEVEL_REALTIME;
+ }
+}
+
+static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
+ struct mes_add_queue_input *input)
+{
+ struct amdgpu_device *adev = mes->adev;
+ union MESAPI__ADD_QUEUE mes_add_queue_pkt;
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ uint32_t vm_cntx_cntl = hub->vm_cntx_cntl;
+
+ memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
+
+ mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
+ mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_add_queue_pkt.process_id = input->process_id;
+ mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr;
+ mes_add_queue_pkt.process_va_start = input->process_va_start;
+ mes_add_queue_pkt.process_va_end = input->process_va_end;
+ mes_add_queue_pkt.process_quantum = input->process_quantum;
+ mes_add_queue_pkt.process_context_addr = input->process_context_addr;
+ mes_add_queue_pkt.gang_quantum = input->gang_quantum;
+ mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
+ mes_add_queue_pkt.inprocess_gang_priority =
+ convert_to_mes_priority_level(input->inprocess_gang_priority);
+ mes_add_queue_pkt.gang_global_priority_level =
+ convert_to_mes_priority_level(input->gang_global_priority_level);
+ mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_add_queue_pkt.mqd_addr = input->mqd_addr;
+
+ if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >>
+ AMDGPU_MES_API_VERSION_SHIFT) >= 2)
+ mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr;
+ else
+ mes_add_queue_pkt.wptr_addr = input->wptr_addr;
+
+ mes_add_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ mes_add_queue_pkt.paging = input->paging;
+ mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl;
+ mes_add_queue_pkt.gws_base = input->gws_base;
+ mes_add_queue_pkt.gws_size = input->gws_size;
+ mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
+ mes_add_queue_pkt.tma_addr = input->tma_addr;
+ mes_add_queue_pkt.trap_en = input->trap_en;
+ mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear;
+ mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
+
+ /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
+ mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
+ mes_add_queue_pkt.gds_size = input->queue_size;
+
+ mes_add_queue_pkt.exclusively_scheduled = input->exclusively_scheduled;
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
+ offsetof(union MESAPI__ADD_QUEUE, api_status));
+}
+
+static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes,
+ struct mes_remove_queue_input *input)
+{
+ union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
+ uint32_t mes_rev = mes->sched_version & AMDGPU_MES_VERSION_MASK;
+
+ memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
+
+ mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
+ mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
+
+ if (mes_rev >= 0x60)
+ mes_remove_queue_pkt.remove_queue_after_reset = input->remove_queue_after_reset;
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
+ offsetof(union MESAPI__REMOVE_QUEUE, api_status));
+}
+
+static int mes_v11_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_type,
+ uint32_t me_id, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t vmid)
+{
+ struct amdgpu_device *adev = mes->adev;
+ uint32_t value, reg;
+ int i, r = 0;
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ if (queue_type == AMDGPU_RING_TYPE_GFX) {
+ dev_info(adev->dev, "reset gfx queue (%d:%d:%d: vmid:%d)\n",
+ me_id, pipe_id, queue_id, vmid);
+
+ mutex_lock(&adev->gfx.reset_sem_mutex);
+ gfx_v11_0_request_gfx_index_mutex(adev, true);
+ /* all se allow writes */
+ WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX,
+ (uint32_t)(0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT));
+ value = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
+ if (pipe_id == 0)
+ value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE0_QUEUES, 1 << queue_id);
+ else
+ value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE1_QUEUES, 1 << queue_id);
+ WREG32_SOC15(GC, 0, regCP_VMID_RESET, value);
+ gfx_v11_0_request_gfx_index_mutex(adev, false);
+ mutex_unlock(&adev->gfx.reset_sem_mutex);
+
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0);
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to wait on gfx hqd deactivate\n");
+ r = -ETIMEDOUT;
+ }
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ } else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ dev_info(adev->dev, "reset compute queue (%d:%d:%d)\n",
+ me_id, pipe_id, queue_id);
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
+ WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
+
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to wait on hqd deactivate\n");
+ r = -ETIMEDOUT;
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ } else if (queue_type == AMDGPU_RING_TYPE_SDMA) {
+ dev_info(adev->dev, "reset sdma queue (%d:%d:%d)\n",
+ me_id, pipe_id, queue_id);
+ switch (me_id) {
+ case 1:
+ reg = SOC15_REG_OFFSET(GC, 0, regSDMA1_QUEUE_RESET_REQ);
+ break;
+ case 0:
+ default:
+ reg = SOC15_REG_OFFSET(GC, 0, regSDMA0_QUEUE_RESET_REQ);
+ break;
+ }
+
+ value = 1 << queue_id;
+ WREG32(reg, value);
+ /* wait for queue reset done */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32(reg) & value))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to wait on sdma queue reset done\n");
+ r = -ETIMEDOUT;
+ }
+ }
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ return r;
+}
+
+static int mes_v11_0_map_legacy_queue(struct amdgpu_mes *mes,
+ struct mes_map_legacy_queue_input *input)
+{
+ union MESAPI__ADD_QUEUE mes_add_queue_pkt;
+
+ memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
+
+ mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
+ mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_add_queue_pkt.pipe_id = input->pipe_id;
+ mes_add_queue_pkt.queue_id = input->queue_id;
+ mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_add_queue_pkt.mqd_addr = input->mqd_addr;
+ mes_add_queue_pkt.wptr_addr = input->wptr_addr;
+ mes_add_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ mes_add_queue_pkt.map_legacy_kq = 1;
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
+ offsetof(union MESAPI__ADD_QUEUE, api_status));
+}
+
+static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes,
+ struct mes_unmap_legacy_queue_input *input)
+{
+ union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
+
+ memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
+
+ mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
+ mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_remove_queue_pkt.gang_context_addr = 0;
+
+ mes_remove_queue_pkt.pipe_id = input->pipe_id;
+ mes_remove_queue_pkt.queue_id = input->queue_id;
+
+ if (input->action == PREEMPT_QUEUES_NO_UNMAP) {
+ mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1;
+ mes_remove_queue_pkt.tf_addr = input->trail_fence_addr;
+ mes_remove_queue_pkt.tf_data =
+ lower_32_bits(input->trail_fence_data);
+ } else {
+ mes_remove_queue_pkt.unmap_legacy_queue = 1;
+ mes_remove_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ }
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
+ offsetof(union MESAPI__REMOVE_QUEUE, api_status));
+}
+
+static int mes_v11_0_suspend_gang(struct amdgpu_mes *mes,
+ struct mes_suspend_gang_input *input)
+{
+ union MESAPI__SUSPEND mes_suspend_gang_pkt;
+
+ memset(&mes_suspend_gang_pkt, 0, sizeof(mes_suspend_gang_pkt));
+
+ mes_suspend_gang_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_suspend_gang_pkt.header.opcode = MES_SCH_API_SUSPEND;
+ mes_suspend_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_suspend_gang_pkt.suspend_all_gangs = input->suspend_all_gangs;
+ mes_suspend_gang_pkt.gang_context_addr = input->gang_context_addr;
+ mes_suspend_gang_pkt.suspend_fence_addr = input->suspend_fence_addr;
+ mes_suspend_gang_pkt.suspend_fence_value = input->suspend_fence_value;
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_suspend_gang_pkt, sizeof(mes_suspend_gang_pkt),
+ offsetof(union MESAPI__SUSPEND, api_status));
+}
+
+static int mes_v11_0_resume_gang(struct amdgpu_mes *mes,
+ struct mes_resume_gang_input *input)
+{
+ union MESAPI__RESUME mes_resume_gang_pkt;
+
+ memset(&mes_resume_gang_pkt, 0, sizeof(mes_resume_gang_pkt));
+
+ mes_resume_gang_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_resume_gang_pkt.header.opcode = MES_SCH_API_RESUME;
+ mes_resume_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_resume_gang_pkt.resume_all_gangs = input->resume_all_gangs;
+ mes_resume_gang_pkt.gang_context_addr = input->gang_context_addr;
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_resume_gang_pkt, sizeof(mes_resume_gang_pkt),
+ offsetof(union MESAPI__RESUME, api_status));
+}
+
+static int mes_v11_0_query_sched_status(struct amdgpu_mes *mes)
+{
+ union MESAPI__QUERY_MES_STATUS mes_status_pkt;
+
+ memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
+
+ mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
+ mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_status_pkt, sizeof(mes_status_pkt),
+ offsetof(union MESAPI__QUERY_MES_STATUS, api_status));
+}
+
+static int mes_v11_0_misc_op(struct amdgpu_mes *mes,
+ struct mes_misc_op_input *input)
+{
+ union MESAPI__MISC misc_pkt;
+
+ memset(&misc_pkt, 0, sizeof(misc_pkt));
+
+ misc_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ misc_pkt.header.opcode = MES_SCH_API_MISC;
+ misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ switch (input->op) {
+ case MES_MISC_OP_READ_REG:
+ misc_pkt.opcode = MESAPI_MISC__READ_REG;
+ misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset;
+ misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr;
+ break;
+ case MES_MISC_OP_WRITE_REG:
+ misc_pkt.opcode = MESAPI_MISC__WRITE_REG;
+ misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset;
+ misc_pkt.write_reg.reg_value = input->write_reg.reg_value;
+ break;
+ case MES_MISC_OP_WRM_REG_WAIT:
+ misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
+ misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM;
+ misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
+ misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
+ misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
+ misc_pkt.wait_reg_mem.reg_offset2 = 0;
+ break;
+ case MES_MISC_OP_WRM_REG_WR_WAIT:
+ misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
+ misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG;
+ misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
+ misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
+ misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
+ misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1;
+ break;
+ case MES_MISC_OP_SET_SHADER_DEBUGGER:
+ misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER;
+ misc_pkt.set_shader_debugger.process_context_addr =
+ input->set_shader_debugger.process_context_addr;
+ misc_pkt.set_shader_debugger.flags.u32all =
+ input->set_shader_debugger.flags.u32all;
+ misc_pkt.set_shader_debugger.spi_gdbg_per_vmid_cntl =
+ input->set_shader_debugger.spi_gdbg_per_vmid_cntl;
+ memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl,
+ input->set_shader_debugger.tcp_watch_cntl,
+ sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl));
+ misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en;
+ break;
+ case MES_MISC_OP_CHANGE_CONFIG:
+ if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) < 0x63) {
+ dev_warn_once(mes->adev->dev,
+ "MES FW version must be larger than 0x63 to support limit single process feature.\n");
+ return 0;
+ }
+ misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG;
+ misc_pkt.change_config.opcode =
+ MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS;
+ misc_pkt.change_config.option.bits.limit_single_process =
+ input->change_config.option.limit_single_process;
+ break;
+
+ default:
+ DRM_ERROR("unsupported misc op (%d) \n", input->op);
+ return -EINVAL;
+ }
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &misc_pkt, sizeof(misc_pkt),
+ offsetof(union MESAPI__MISC, api_status));
+}
+
+static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
+{
+ int i;
+ struct amdgpu_device *adev = mes->adev;
+ union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
+
+ memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
+
+ mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
+ mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
+ mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
+ mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
+ mes_set_hw_res_pkt.paging_vmid = 0;
+ mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr[0];
+ mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
+ mes->query_status_fence_gpu_addr[0];
+
+ for (i = 0; i < MAX_COMPUTE_PIPES; i++)
+ mes_set_hw_res_pkt.compute_hqd_mask[i] =
+ mes->compute_hqd_mask[i];
+
+ for (i = 0; i < MAX_GFX_PIPES; i++)
+ mes_set_hw_res_pkt.gfx_hqd_mask[i] =
+ mes->gfx_hqd_mask[i];
+
+ for (i = 0; i < MAX_SDMA_PIPES; i++)
+ mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
+
+ for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
+ mes_set_hw_res_pkt.aggregated_doorbells[i] =
+ mes->aggregated_doorbells[i];
+
+ for (i = 0; i < 5; i++) {
+ mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i];
+ mes_set_hw_res_pkt.mmhub_base[i] =
+ adev->reg_offset[MMHUB_HWIP][0][i];
+ mes_set_hw_res_pkt.osssys_base[i] =
+ adev->reg_offset[OSSSYS_HWIP][0][i];
+ }
+
+ mes_set_hw_res_pkt.disable_reset = 1;
+ mes_set_hw_res_pkt.disable_mes_log = 1;
+ mes_set_hw_res_pkt.use_different_vmid_compute = 1;
+ mes_set_hw_res_pkt.enable_reg_active_poll = 1;
+ mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
+ mes_set_hw_res_pkt.oversubscription_timer = 50;
+ if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x7f)
+ mes_set_hw_res_pkt.enable_lr_compute_wa = 1;
+ else
+ dev_info_once(mes->adev->dev,
+ "MES FW version must be >= 0x7f to enable LR compute workaround.\n");
+
+ if (amdgpu_mes_log_enable) {
+ mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
+ mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr =
+ mes->event_log_gpu_addr;
+ }
+
+ if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
+ mes_set_hw_res_pkt.limit_single_process = 1;
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
+ offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
+}
+
+static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes)
+{
+ union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_pkt;
+ memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
+
+ mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1;
+ mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+ mes_set_hw_res_pkt.enable_mes_info_ctx = 1;
+
+ mes_set_hw_res_pkt.cleaner_shader_fence_mc_addr = mes->resource_1_gpu_addr[0];
+ if (amdgpu_sriov_is_mes_info_enable(mes->adev)) {
+ mes_set_hw_res_pkt.mes_info_ctx_mc_addr =
+ mes->resource_1_gpu_addr[0] + AMDGPU_GPU_PAGE_SIZE;
+ mes_set_hw_res_pkt.mes_info_ctx_size = MES11_HW_RESOURCE_1_SIZE;
+ }
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
+ offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status));
+}
+
+static int mes_v11_0_reset_hw_queue(struct amdgpu_mes *mes,
+ struct mes_reset_queue_input *input)
+{
+ union MESAPI__RESET mes_reset_queue_pkt;
+
+ if (input->use_mmio)
+ return mes_v11_0_reset_queue_mmio(mes, input->queue_type,
+ input->me_id, input->pipe_id,
+ input->queue_id, input->vmid);
+
+ memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
+
+ mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
+ mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_reset_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+
+ if (input->legacy_gfx) {
+ mes_reset_queue_pkt.reset_legacy_gfx = 1;
+ mes_reset_queue_pkt.pipe_id_lp = input->pipe_id;
+ mes_reset_queue_pkt.queue_id_lp = input->queue_id;
+ mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr;
+ mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset;
+ mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr;
+ mes_reset_queue_pkt.vmid_id_lp = input->vmid;
+ } else {
+ mes_reset_queue_pkt.reset_queue_only = 1;
+ mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
+ }
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
+ offsetof(union MESAPI__RESET, api_status));
+}
+
+static int mes_v11_0_detect_and_reset_hung_queues(struct amdgpu_mes *mes,
+ struct mes_detect_and_reset_queue_input *input)
+{
+ union MESAPI__RESET mes_reset_queue_pkt;
+
+ memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
+
+ mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
+ mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_reset_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ mes_reset_queue_pkt.doorbell_offset_addr =
+ mes->hung_queue_db_array_gpu_addr;
+
+ if (input->detect_only)
+ mes_reset_queue_pkt.hang_detect_only = 1;
+ else
+ mes_reset_queue_pkt.hang_detect_then_reset = 1;
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
+ offsetof(union MESAPI__RESET, api_status));
+}
+
+static const struct amdgpu_mes_funcs mes_v11_0_funcs = {
+ .add_hw_queue = mes_v11_0_add_hw_queue,
+ .remove_hw_queue = mes_v11_0_remove_hw_queue,
+ .map_legacy_queue = mes_v11_0_map_legacy_queue,
+ .unmap_legacy_queue = mes_v11_0_unmap_legacy_queue,
+ .suspend_gang = mes_v11_0_suspend_gang,
+ .resume_gang = mes_v11_0_resume_gang,
+ .misc_op = mes_v11_0_misc_op,
+ .reset_hw_queue = mes_v11_0_reset_hw_queue,
+ .detect_and_reset_hung_queues = mes_v11_0_detect_and_reset_hung_queues,
+};
+
+static int mes_v11_0_allocate_ucode_buffer(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ int r;
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ const __le32 *fw_data;
+ unsigned fw_size;
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw[pipe]->data;
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_size,
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mes.ucode_fw_obj[pipe],
+ &adev->mes.ucode_fw_gpu_addr[pipe],
+ (void **)&adev->mes.ucode_fw_ptr[pipe]);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->mes.ucode_fw_ptr[pipe], fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[pipe]);
+ amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[pipe]);
+
+ return 0;
+}
+
+static int mes_v11_0_allocate_ucode_data_buffer(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ int r;
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ const __le32 *fw_data;
+ unsigned fw_size;
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw[pipe]->data;
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
+
+ if (fw_size > GFX_MES_DRAM_SIZE) {
+ dev_err(adev->dev, "PIPE%d ucode data fw size (%d) is greater than dram size (%d)\n",
+ pipe, fw_size, GFX_MES_DRAM_SIZE);
+ return -EINVAL;
+ }
+
+ r = amdgpu_bo_create_reserved(adev, GFX_MES_DRAM_SIZE,
+ 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mes.data_fw_obj[pipe],
+ &adev->mes.data_fw_gpu_addr[pipe],
+ (void **)&adev->mes.data_fw_ptr[pipe]);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->mes.data_fw_ptr[pipe], fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->mes.data_fw_obj[pipe]);
+ amdgpu_bo_unreserve(adev->mes.data_fw_obj[pipe]);
+
+ return 0;
+}
+
+static void mes_v11_0_free_ucode_buffers(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[pipe],
+ &adev->mes.data_fw_gpu_addr[pipe],
+ (void **)&adev->mes.data_fw_ptr[pipe]);
+
+ amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[pipe],
+ &adev->mes.ucode_fw_gpu_addr[pipe],
+ (void **)&adev->mes.ucode_fw_ptr[pipe]);
+}
+
+static void mes_v11_0_get_fw_version(struct amdgpu_device *adev)
+{
+ int pipe;
+
+ /* return early if we have already fetched these */
+ if (adev->mes.sched_version && adev->mes.kiq_version)
+ return;
+
+ /* get MES scheduler/KIQ versions */
+ mutex_lock(&adev->srbm_mutex);
+
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ soc21_grbm_select(adev, 3, pipe, 0, 0);
+
+ if (pipe == AMDGPU_MES_SCHED_PIPE)
+ adev->mes.sched_version =
+ RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
+ else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
+ adev->mes.kiq_version =
+ RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
+ }
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void mes_v11_0_enable(struct amdgpu_device *adev, bool enable)
+{
+ uint64_t ucode_addr;
+ uint32_t pipe, data = 0;
+
+ if (enable) {
+ if (amdgpu_mes_log_enable) {
+ WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO,
+ lower_32_bits(adev->mes.event_log_gpu_addr + AMDGPU_MES_LOG_BUFFER_SIZE));
+ WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI,
+ upper_32_bits(adev->mes.event_log_gpu_addr + AMDGPU_MES_LOG_BUFFER_SIZE));
+ dev_info(adev->dev, "Setup CP MES MSCRATCH address : 0x%x. 0x%x\n",
+ RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI),
+ RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO));
+ }
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL,
+ MES_PIPE1_RESET, adev->enable_mes_kiq ? 1 : 0);
+ WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
+
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ if (!adev->enable_mes_kiq &&
+ pipe == AMDGPU_MES_KIQ_PIPE)
+ continue;
+
+ soc21_grbm_select(adev, 3, pipe, 0, 0);
+
+ ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
+ WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START,
+ lower_32_bits(ucode_addr));
+ WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START_HI,
+ upper_32_bits(ucode_addr));
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* unhalt MES and activate pipe0 */
+ data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE,
+ adev->enable_mes_kiq ? 1 : 0);
+ WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
+
+ if (amdgpu_emu_mode)
+ msleep(100);
+ else
+ udelay(500);
+ } else {
+ data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0);
+ data = REG_SET_FIELD(data, CP_MES_CNTL,
+ MES_INVALIDATE_ICACHE, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET,
+ adev->enable_mes_kiq ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
+ WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
+ }
+}
+
+/* This function is for backdoor MES firmware */
+static int mes_v11_0_load_microcode(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe, bool prime_icache)
+{
+ int r;
+ uint32_t data;
+ uint64_t ucode_addr;
+
+ mes_v11_0_enable(adev, false);
+
+ if (!adev->mes.fw[pipe])
+ return -EINVAL;
+
+ r = mes_v11_0_allocate_ucode_buffer(adev, pipe);
+ if (r)
+ return r;
+
+ r = mes_v11_0_allocate_ucode_data_buffer(adev, pipe);
+ if (r) {
+ mes_v11_0_free_ucode_buffers(adev, pipe);
+ return r;
+ }
+
+ mutex_lock(&adev->srbm_mutex);
+ /* me=3, pipe=0, queue=0 */
+ soc21_grbm_select(adev, 3, pipe, 0, 0);
+
+ WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_CNTL, 0);
+
+ /* set ucode start address */
+ ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
+ WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START,
+ lower_32_bits(ucode_addr));
+ WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START_HI,
+ upper_32_bits(ucode_addr));
+
+ /* set ucode fimrware address */
+ WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_LO,
+ lower_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
+ WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_HI,
+ upper_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
+
+ /* set ucode instruction cache boundary to 2M-1 */
+ WREG32_SOC15(GC, 0, regCP_MES_MIBOUND_LO, 0x1FFFFF);
+
+ /* set ucode data firmware address */
+ WREG32_SOC15(GC, 0, regCP_MES_MDBASE_LO,
+ lower_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
+ WREG32_SOC15(GC, 0, regCP_MES_MDBASE_HI,
+ upper_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
+
+ /* Set 0x7FFFF (512K-1) to CP_MES_MDBOUND_LO */
+ WREG32_SOC15(GC, 0, regCP_MES_MDBOUND_LO, 0x7FFFF);
+
+ if (prime_icache) {
+ /* invalidate ICACHE */
+ data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data);
+
+ /* prime the ICACHE. */
+ data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data);
+ }
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ return 0;
+}
+
+static int mes_v11_0_allocate_eop_buf(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ int r;
+ u32 *eop;
+
+ r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mes.eop_gpu_obj[pipe],
+ &adev->mes.eop_gpu_addr[pipe],
+ (void **)&eop);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
+ return r;
+ }
+
+ memset(eop, 0,
+ adev->mes.eop_gpu_obj[pipe]->tbo.base.size);
+
+ amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[pipe]);
+ amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[pipe]);
+
+ return 0;
+}
+
+static int mes_v11_0_mqd_init(struct amdgpu_ring *ring)
+{
+ struct v11_compute_mqd *mqd = ring->mqd_ptr;
+ uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+ uint32_t tmp;
+
+ memset(mqd, 0, sizeof(*mqd));
+
+ mqd->header = 0xC0310800;
+ mqd->compute_pipelinestat_enable = 0x00000001;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+ mqd->compute_misc_reserved = 0x00000007;
+
+ eop_base_addr = ring->eop_gpu_addr >> 8;
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+ (order_base_2(MES_EOP_SIZE / 4) - 1));
+
+ mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr);
+ mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+ mqd->cp_hqd_eop_control = tmp;
+
+ /* disable the queue if it's active */
+ ring->wptr = 0;
+ mqd->cp_hqd_pq_rptr = 0;
+ mqd->cp_hqd_pq_wptr_lo = 0;
+ mqd->cp_hqd_pq_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
+
+ /* set MQD vmid to 0 */
+ tmp = regCP_MQD_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+ mqd->cp_mqd_control = tmp;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = ring->gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr);
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set the wb address whether it's enabled or not */
+ wb_gpu_addr = ring->rptr_gpu_addr;
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = ring->wptr_gpu_addr;
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+ (order_base_2(ring->ring_size / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+ ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1);
+ mqd->cp_hqd_pq_control = tmp;
+
+ /* enable doorbell */
+ tmp = 0;
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ } else
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ mqd->cp_hqd_vmid = 0;
+ /* activate the queue */
+ mqd->cp_hqd_active = 1;
+
+ tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE,
+ PRELOAD_SIZE, 0x55);
+ mqd->cp_hqd_persistent_state = tmp;
+
+ mqd->cp_hqd_ib_control = regCP_HQD_IB_CONTROL_DEFAULT;
+ mqd->cp_hqd_iq_timer = regCP_HQD_IQ_TIMER_DEFAULT;
+ mqd->cp_hqd_quantum = regCP_HQD_QUANTUM_DEFAULT;
+
+ amdgpu_device_flush_hdp(ring->adev, NULL);
+ return 0;
+}
+
+static void mes_v11_0_queue_init_register(struct amdgpu_ring *ring)
+{
+ struct v11_compute_mqd *mqd = ring->mqd_ptr;
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t data = 0;
+
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, 3, ring->pipe, 0, 0);
+
+ /* set CP_HQD_VMID.VMID = 0. */
+ data = RREG32_SOC15(GC, 0, regCP_HQD_VMID);
+ data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_VMID, data);
+
+ /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
+ data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
+ data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+ /* set CP_MQD_BASE_ADDR/HI with the MQD base address */
+ WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
+
+ /* set CP_MQD_CONTROL.VMID=0 */
+ data = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
+ data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
+ WREG32_SOC15(GC, 0, regCP_MQD_CONTROL, 0);
+
+ /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
+
+ /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
+ mqd->cp_hqd_pq_rptr_report_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+ /* set CP_HQD_PQ_CONTROL */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
+
+ /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
+ mqd->cp_hqd_pq_wptr_poll_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+ /* set CP_HQD_PQ_DOORBELL_CONTROL */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
+ WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
+
+ /* set CP_HQD_ACTIVE.ACTIVE=1 */
+ WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, mqd->cp_hqd_active);
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static int mes_v11_0_kiq_enable_queue(struct amdgpu_device *adev)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
+ int r;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
+ return -EINVAL;
+
+ r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
+ if (r) {
+ DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+ return r;
+ }
+
+ kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]);
+
+ return amdgpu_ring_test_helper(kiq_ring);
+}
+
+static int mes_v11_0_queue_init(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ struct amdgpu_ring *ring;
+ int r;
+
+ if (pipe == AMDGPU_MES_KIQ_PIPE)
+ ring = &adev->gfx.kiq[0].ring;
+ else if (pipe == AMDGPU_MES_SCHED_PIPE)
+ ring = &adev->mes.ring[0];
+ else
+ BUG();
+
+ if ((pipe == AMDGPU_MES_SCHED_PIPE) &&
+ (amdgpu_in_reset(adev) || adev->in_suspend)) {
+ *(ring->wptr_cpu_addr) = 0;
+ *(ring->rptr_cpu_addr) = 0;
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ r = mes_v11_0_mqd_init(ring);
+ if (r)
+ return r;
+
+ if (pipe == AMDGPU_MES_SCHED_PIPE) {
+ r = mes_v11_0_kiq_enable_queue(adev);
+ if (r)
+ return r;
+ } else {
+ mes_v11_0_queue_init_register(ring);
+ }
+
+ return 0;
+}
+
+static int mes_v11_0_ring_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+
+ ring = &adev->mes.ring[0];
+
+ ring->funcs = &mes_v11_0_ring_funcs;
+
+ ring->me = 3;
+ ring->pipe = 0;
+ ring->queue = 0;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1;
+ ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_SCHED_PIPE];
+ ring->no_scheduler = true;
+ sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+}
+
+static int mes_v11_0_kiq_ring_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+
+ spin_lock_init(&adev->gfx.kiq[0].ring_lock);
+
+ ring = &adev->gfx.kiq[0].ring;
+
+ ring->me = 3;
+ ring->pipe = 1;
+ ring->queue = 0;
+
+ ring->adev = NULL;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1;
+ ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_KIQ_PIPE];
+ ring->no_scheduler = true;
+ sprintf(ring->name, "mes_kiq_%d.%d.%d",
+ ring->me, ring->pipe, ring->queue);
+
+ return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+}
+
+static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ int r, mqd_size = sizeof(struct v11_compute_mqd);
+ struct amdgpu_ring *ring;
+
+ if (pipe == AMDGPU_MES_KIQ_PIPE)
+ ring = &adev->gfx.kiq[0].ring;
+ else if (pipe == AMDGPU_MES_SCHED_PIPE)
+ ring = &adev->mes.ring[0];
+ else
+ BUG();
+
+ if (ring->mqd_obj)
+ return 0;
+
+ r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
+ &ring->mqd_gpu_addr, &ring->mqd_ptr);
+ if (r) {
+ dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
+ return r;
+ }
+
+ memset(ring->mqd_ptr, 0, mqd_size);
+
+ /* prepare MQD backup */
+ adev->mes.mqd_backup[pipe] = kmalloc(mqd_size, GFP_KERNEL);
+ if (!adev->mes.mqd_backup[pipe]) {
+ dev_warn(adev->dev,
+ "no memory to create MQD backup for ring %s\n",
+ ring->name);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int mes_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int pipe, r, bo_size;
+
+ adev->mes.funcs = &mes_v11_0_funcs;
+ adev->mes.kiq_hw_init = &mes_v11_0_kiq_hw_init;
+ adev->mes.kiq_hw_fini = &mes_v11_0_kiq_hw_fini;
+
+ adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE;
+
+ r = amdgpu_mes_init(adev);
+ if (r)
+ return r;
+
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE)
+ continue;
+
+ r = mes_v11_0_allocate_eop_buf(adev, pipe);
+ if (r)
+ return r;
+
+ r = mes_v11_0_mqd_sw_init(adev, pipe);
+ if (r)
+ return r;
+ }
+
+ if (adev->enable_mes_kiq) {
+ r = mes_v11_0_kiq_ring_init(adev);
+ if (r)
+ return r;
+ }
+
+ r = mes_v11_0_ring_init(adev);
+ if (r)
+ return r;
+
+ bo_size = AMDGPU_GPU_PAGE_SIZE;
+ if (amdgpu_sriov_is_mes_info_enable(adev))
+ bo_size += MES11_HW_RESOURCE_1_SIZE;
+
+ /* Only needed for AMDGPU_MES_SCHED_PIPE on MES 11*/
+ r = amdgpu_bo_create_kernel(adev,
+ bo_size,
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->mes.resource_1[0],
+ &adev->mes.resource_1_gpu_addr[0],
+ &adev->mes.resource_1_addr[0]);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mes resource_1 bo\n", r);
+ return r;
+ }
+
+ return 0;
+}
+
+static int mes_v11_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int pipe;
+
+ amdgpu_bo_free_kernel(&adev->mes.resource_1[0], &adev->mes.resource_1_gpu_addr[0],
+ &adev->mes.resource_1_addr[0]);
+
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ kfree(adev->mes.mqd_backup[pipe]);
+
+ amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[pipe],
+ &adev->mes.eop_gpu_addr[pipe],
+ NULL);
+ amdgpu_ucode_release(&adev->mes.fw[pipe]);
+ }
+
+ amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj,
+ &adev->gfx.kiq[0].ring.mqd_gpu_addr,
+ &adev->gfx.kiq[0].ring.mqd_ptr);
+
+ amdgpu_bo_free_kernel(&adev->mes.ring[0].mqd_obj,
+ &adev->mes.ring[0].mqd_gpu_addr,
+ &adev->mes.ring[0].mqd_ptr);
+
+ amdgpu_ring_fini(&adev->gfx.kiq[0].ring);
+ amdgpu_ring_fini(&adev->mes.ring[0]);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ mes_v11_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE);
+ mes_v11_0_free_ucode_buffers(adev, AMDGPU_MES_SCHED_PIPE);
+ }
+
+ amdgpu_mes_fini(adev);
+ return 0;
+}
+
+static void mes_v11_0_kiq_dequeue(struct amdgpu_ring *ring)
+{
+ uint32_t data;
+ int i;
+ struct amdgpu_device *adev = ring->adev;
+
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, 3, ring->pipe, 0, 0);
+
+ /* disable the queue if it's active */
+ if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ }
+ data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
+ data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 1);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 0);
+
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 0);
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void mes_v11_0_kiq_setting(struct amdgpu_ring *ring)
+{
+ uint32_t tmp;
+ struct amdgpu_device *adev = ring->adev;
+
+ /* tell RLC which is KIQ queue */
+ tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
+}
+
+static void mes_v11_0_kiq_clear(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* tell RLC which is KIQ dequeue */
+ tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
+ tmp &= ~RLC_CP_SCHEDULERS__scheduler0_MASK;
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
+}
+
+static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+ struct amdgpu_ip_block *ip_block;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+
+ r = mes_v11_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, false);
+ if (r) {
+ DRM_ERROR("failed to load MES fw, r=%d\n", r);
+ return r;
+ }
+
+ r = mes_v11_0_load_microcode(adev, AMDGPU_MES_KIQ_PIPE, true);
+ if (r) {
+ DRM_ERROR("failed to load MES kiq fw, r=%d\n", r);
+ return r;
+ }
+
+ }
+
+ mes_v11_0_enable(adev, true);
+
+ mes_v11_0_get_fw_version(adev);
+
+ mes_v11_0_kiq_setting(&adev->gfx.kiq[0].ring);
+
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_MES);
+ if (unlikely(!ip_block)) {
+ dev_err(adev->dev, "Failed to get MES handle\n");
+ return -EINVAL;
+ }
+
+ r = mes_v11_0_queue_init(adev, AMDGPU_MES_KIQ_PIPE);
+ if (r)
+ goto failure;
+
+ if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x47)
+ adev->mes.enable_legacy_queue_map = true;
+ else
+ adev->mes.enable_legacy_queue_map = false;
+
+ if (adev->mes.enable_legacy_queue_map) {
+ r = mes_v11_0_hw_init(ip_block);
+ if (r)
+ goto failure;
+ }
+
+ return r;
+
+failure:
+ mes_v11_0_hw_fini(ip_block);
+ return r;
+}
+
+static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev)
+{
+ if (adev->mes.ring[0].sched.ready) {
+ mes_v11_0_kiq_dequeue(&adev->mes.ring[0]);
+ adev->mes.ring[0].sched.ready = false;
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ mes_v11_0_kiq_dequeue(&adev->gfx.kiq[0].ring);
+ mes_v11_0_kiq_clear(adev);
+ }
+
+ mes_v11_0_enable(adev, false);
+
+ return 0;
+}
+
+static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (adev->mes.ring[0].sched.ready)
+ goto out;
+
+ if (!adev->enable_mes_kiq) {
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ r = mes_v11_0_load_microcode(adev,
+ AMDGPU_MES_SCHED_PIPE, true);
+ if (r) {
+ DRM_ERROR("failed to MES fw, r=%d\n", r);
+ return r;
+ }
+ }
+
+ mes_v11_0_enable(adev, true);
+ }
+
+ r = mes_v11_0_queue_init(adev, AMDGPU_MES_SCHED_PIPE);
+ if (r)
+ goto failure;
+
+ r = mes_v11_0_set_hw_resources(&adev->mes);
+ if (r)
+ goto failure;
+
+ if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x50) {
+ r = mes_v11_0_set_hw_resources_1(&adev->mes);
+ if (r) {
+ DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r);
+ goto failure;
+ }
+ }
+
+ r = mes_v11_0_query_sched_status(&adev->mes);
+ if (r) {
+ DRM_ERROR("MES is busy\n");
+ goto failure;
+ }
+
+ r = amdgpu_mes_update_enforce_isolation(adev);
+ if (r)
+ goto failure;
+
+out:
+ /*
+ * Disable KIQ ring usage from the driver once MES is enabled.
+ * MES uses KIQ ring exclusively so driver cannot access KIQ ring
+ * with MES enabled.
+ */
+ adev->gfx.kiq[0].ring.sched.ready = false;
+ adev->mes.ring[0].sched.ready = true;
+
+ return 0;
+
+failure:
+ mes_v11_0_hw_fini(ip_block);
+ return r;
+}
+
+static int mes_v11_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ return 0;
+}
+
+static int mes_v11_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return mes_v11_0_hw_fini(ip_block);
+}
+
+static int mes_v11_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ return mes_v11_0_hw_init(ip_block);
+}
+
+static int mes_v11_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int pipe, r;
+
+ adev->mes.hung_queue_db_array_size = MES11_HUNG_DB_OFFSET_ARRAY_SIZE;
+ adev->mes.hung_queue_hqd_info_offset = MES11_HUNG_HQD_INFO_OFFSET;
+
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE)
+ continue;
+ r = amdgpu_mes_init_microcode(adev, pipe);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static const struct amd_ip_funcs mes_v11_0_ip_funcs = {
+ .name = "mes_v11_0",
+ .early_init = mes_v11_0_early_init,
+ .late_init = NULL,
+ .sw_init = mes_v11_0_sw_init,
+ .sw_fini = mes_v11_0_sw_fini,
+ .hw_init = mes_v11_0_hw_init,
+ .hw_fini = mes_v11_0_hw_fini,
+ .suspend = mes_v11_0_suspend,
+ .resume = mes_v11_0_resume,
+};
+
+const struct amdgpu_ip_block_version mes_v11_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_MES,
+ .major = 11,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &mes_v11_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.h b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.h
new file mode 100644
index 000000000000..b3519e1df2b2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MES_V11_0_H__
+#define __MES_V11_0_H__
+
+extern const struct amdgpu_ip_block_version mes_v11_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
new file mode 100644
index 000000000000..744e95d3984a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -0,0 +1,1942 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include "amdgpu.h"
+#include "gfx_v12_0.h"
+#include "soc15_common.h"
+#include "soc21.h"
+#include "gc/gc_12_0_0_offset.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "gc/gc_11_0_0_default.h"
+#include "v12_structs.h"
+#include "mes_v12_api_def.h"
+
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_uni_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_uni_mes.bin");
+
+static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block);
+static int mes_v12_0_hw_fini(struct amdgpu_ip_block *ip_block);
+static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev);
+static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev);
+
+#define MES_EOP_SIZE 2048
+
+#define MES12_HUNG_DB_OFFSET_ARRAY_SIZE 8 /* [0:3] = db offset [4:7] hqd info */
+#define MES12_HUNG_HQD_INFO_OFFSET 4
+
+static void mes_v12_0_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ BUG();
+ }
+}
+
+static u64 mes_v12_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ return *ring->rptr_cpu_addr;
+}
+
+static u64 mes_v12_0_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ u64 wptr;
+
+ if (ring->use_doorbell)
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ else
+ BUG();
+ return wptr;
+}
+
+static const struct amdgpu_ring_funcs mes_v12_0_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_MES,
+ .align_mask = 1,
+ .nop = 0,
+ .support_64bit_ptrs = true,
+ .get_rptr = mes_v12_0_ring_get_rptr,
+ .get_wptr = mes_v12_0_ring_get_wptr,
+ .set_wptr = mes_v12_0_ring_set_wptr,
+ .insert_nop = amdgpu_ring_insert_nop,
+};
+
+static const char *mes_v12_0_opcodes[] = {
+ "SET_HW_RSRC",
+ "SET_SCHEDULING_CONFIG",
+ "ADD_QUEUE",
+ "REMOVE_QUEUE",
+ "PERFORM_YIELD",
+ "SET_GANG_PRIORITY_LEVEL",
+ "SUSPEND",
+ "RESUME",
+ "RESET",
+ "SET_LOG_BUFFER",
+ "CHANGE_GANG_PRORITY",
+ "QUERY_SCHEDULER_STATUS",
+ "unused",
+ "SET_DEBUG_VMID",
+ "MISC",
+ "UPDATE_ROOT_PAGE_TABLE",
+ "AMD_LOG",
+ "SET_SE_MODE",
+ "SET_GANG_SUBMIT",
+ "SET_HW_RSRC_1",
+ "INVALIDATE_TLBS",
+};
+
+static const char *mes_v12_0_misc_opcodes[] = {
+ "WRITE_REG",
+ "INV_GART",
+ "QUERY_STATUS",
+ "READ_REG",
+ "WAIT_REG_MEM",
+ "SET_SHADER_DEBUGGER",
+ "NOTIFY_WORK_ON_UNMAPPED_QUEUE",
+ "NOTIFY_TO_UNMAP_PROCESSES",
+};
+
+static const char *mes_v12_0_get_op_string(union MESAPI__MISC *x_pkt)
+{
+ const char *op_str = NULL;
+
+ if (x_pkt->header.opcode < ARRAY_SIZE(mes_v12_0_opcodes))
+ op_str = mes_v12_0_opcodes[x_pkt->header.opcode];
+
+ return op_str;
+}
+
+static const char *mes_v12_0_get_misc_op_string(union MESAPI__MISC *x_pkt)
+{
+ const char *op_str = NULL;
+
+ if ((x_pkt->header.opcode == MES_SCH_API_MISC) &&
+ (x_pkt->opcode < ARRAY_SIZE(mes_v12_0_misc_opcodes)))
+ op_str = mes_v12_0_misc_opcodes[x_pkt->opcode];
+
+ return op_str;
+}
+
+static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
+ int pipe, void *pkt, int size,
+ int api_status_off)
+{
+ union MESAPI__QUERY_MES_STATUS mes_status_pkt;
+ signed long timeout = 2100000; /* 2100 ms */
+ struct amdgpu_device *adev = mes->adev;
+ struct amdgpu_ring *ring = &mes->ring[pipe];
+ spinlock_t *ring_lock = &mes->ring_lock[pipe];
+ struct MES_API_STATUS *api_status;
+ union MESAPI__MISC *x_pkt = pkt;
+ const char *op_str, *misc_op_str;
+ unsigned long flags;
+ u64 status_gpu_addr;
+ u32 seq, status_offset;
+ u64 *status_ptr;
+ signed long r;
+ int ret;
+
+ if (x_pkt->header.opcode >= MES_SCH_API_MAX)
+ return -EINVAL;
+
+ if (amdgpu_emu_mode) {
+ timeout *= 100;
+ } else if (amdgpu_sriov_vf(adev)) {
+ /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */
+ timeout = 15 * 600 * 1000;
+ }
+
+ ret = amdgpu_device_wb_get(adev, &status_offset);
+ if (ret)
+ return ret;
+
+ status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4);
+ status_ptr = (u64 *)&adev->wb.wb[status_offset];
+ *status_ptr = 0;
+
+ spin_lock_irqsave(ring_lock, flags);
+ r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4);
+ if (r)
+ goto error_unlock_free;
+
+ seq = ++ring->fence_drv.sync_seq;
+ r = amdgpu_fence_wait_polling(ring,
+ seq - ring->fence_drv.num_fences_mask,
+ timeout);
+ if (r < 1)
+ goto error_undo;
+
+ api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
+ api_status->api_completion_fence_addr = status_gpu_addr;
+ api_status->api_completion_fence_value = 1;
+
+ amdgpu_ring_write_multiple(ring, pkt, size / 4);
+
+ memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
+ mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
+ mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+ mes_status_pkt.api_status.api_completion_fence_addr =
+ ring->fence_drv.gpu_addr;
+ mes_status_pkt.api_status.api_completion_fence_value = seq;
+
+ amdgpu_ring_write_multiple(ring, &mes_status_pkt,
+ sizeof(mes_status_pkt) / 4);
+
+ amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(ring_lock, flags);
+
+ op_str = mes_v12_0_get_op_string(x_pkt);
+ misc_op_str = mes_v12_0_get_misc_op_string(x_pkt);
+
+ if (misc_op_str)
+ dev_dbg(adev->dev, "MES(%d) msg=%s (%s) was emitted\n",
+ pipe, op_str, misc_op_str);
+ else if (op_str)
+ dev_dbg(adev->dev, "MES(%d) msg=%s was emitted\n",
+ pipe, op_str);
+ else
+ dev_dbg(adev->dev, "MES(%d) msg=%d was emitted\n",
+ pipe, x_pkt->header.opcode);
+
+ r = amdgpu_fence_wait_polling(ring, seq, timeout);
+
+ /*
+ * status_ptr[31:0] == 0 (fail) or status_ptr[63:0] == 1 (success).
+ * If status_ptr[31:0] == 0 then status_ptr[63:32] will have debug error information.
+ */
+ if (r < 1 || !(lower_32_bits(*status_ptr))) {
+
+ if (misc_op_str)
+ dev_err(adev->dev, "MES(%d) failed to respond to msg=%s (%s)\n",
+ pipe, op_str, misc_op_str);
+ else if (op_str)
+ dev_err(adev->dev, "MES(%d) failed to respond to msg=%s\n",
+ pipe, op_str);
+ else
+ dev_err(adev->dev, "MES(%d) failed to respond to msg=%d\n",
+ pipe, x_pkt->header.opcode);
+
+ while (halt_if_hws_hang)
+ schedule();
+
+ r = -ETIMEDOUT;
+ goto error_wb_free;
+ }
+
+ amdgpu_device_wb_free(adev, status_offset);
+ return 0;
+
+error_undo:
+ dev_err(adev->dev, "MES ring buffer is full.\n");
+ amdgpu_ring_undo(ring);
+
+error_unlock_free:
+ spin_unlock_irqrestore(ring_lock, flags);
+
+error_wb_free:
+ amdgpu_device_wb_free(adev, status_offset);
+ return r;
+}
+
+static int convert_to_mes_queue_type(int queue_type)
+{
+ if (queue_type == AMDGPU_RING_TYPE_GFX)
+ return MES_QUEUE_TYPE_GFX;
+ else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
+ return MES_QUEUE_TYPE_COMPUTE;
+ else if (queue_type == AMDGPU_RING_TYPE_SDMA)
+ return MES_QUEUE_TYPE_SDMA;
+ else if (queue_type == AMDGPU_RING_TYPE_MES)
+ return MES_QUEUE_TYPE_SCHQ;
+ else
+ BUG();
+ return -1;
+}
+
+static int convert_to_mes_priority_level(int priority_level)
+{
+ switch (priority_level) {
+ case AMDGPU_MES_PRIORITY_LEVEL_LOW:
+ return AMD_PRIORITY_LEVEL_LOW;
+ case AMDGPU_MES_PRIORITY_LEVEL_NORMAL:
+ default:
+ return AMD_PRIORITY_LEVEL_NORMAL;
+ case AMDGPU_MES_PRIORITY_LEVEL_MEDIUM:
+ return AMD_PRIORITY_LEVEL_MEDIUM;
+ case AMDGPU_MES_PRIORITY_LEVEL_HIGH:
+ return AMD_PRIORITY_LEVEL_HIGH;
+ case AMDGPU_MES_PRIORITY_LEVEL_REALTIME:
+ return AMD_PRIORITY_LEVEL_REALTIME;
+ }
+}
+
+static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes,
+ struct mes_add_queue_input *input)
+{
+ struct amdgpu_device *adev = mes->adev;
+ union MESAPI__ADD_QUEUE mes_add_queue_pkt;
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ uint32_t vm_cntx_cntl = hub->vm_cntx_cntl;
+
+ memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
+
+ mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
+ mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_add_queue_pkt.process_id = input->process_id;
+ mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr;
+ mes_add_queue_pkt.process_va_start = input->process_va_start;
+ mes_add_queue_pkt.process_va_end = input->process_va_end;
+ mes_add_queue_pkt.process_quantum = input->process_quantum;
+ mes_add_queue_pkt.process_context_addr = input->process_context_addr;
+ mes_add_queue_pkt.gang_quantum = input->gang_quantum;
+ mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
+ mes_add_queue_pkt.inprocess_gang_priority =
+ convert_to_mes_priority_level(input->inprocess_gang_priority);
+ mes_add_queue_pkt.gang_global_priority_level =
+ convert_to_mes_priority_level(input->gang_global_priority_level);
+ mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_add_queue_pkt.mqd_addr = input->mqd_addr;
+
+ mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr;
+
+ mes_add_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ mes_add_queue_pkt.paging = input->paging;
+ mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl;
+ mes_add_queue_pkt.gws_base = input->gws_base;
+ mes_add_queue_pkt.gws_size = input->gws_size;
+ mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
+ mes_add_queue_pkt.tma_addr = input->tma_addr;
+ mes_add_queue_pkt.trap_en = input->trap_en;
+ mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear;
+ mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
+
+ /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
+ mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
+ mes_add_queue_pkt.gds_size = input->queue_size;
+
+ /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
+ mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
+ mes_add_queue_pkt.gds_size = input->queue_size;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes,
+ AMDGPU_MES_SCHED_PIPE,
+ &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
+ offsetof(union MESAPI__ADD_QUEUE, api_status));
+}
+
+static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes,
+ struct mes_remove_queue_input *input)
+{
+ union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
+ uint32_t mes_rev = mes->sched_version & AMDGPU_MES_VERSION_MASK;
+
+ memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
+
+ mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
+ mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
+
+ if (mes_rev >= 0x5a)
+ mes_remove_queue_pkt.remove_queue_after_reset = input->remove_queue_after_reset;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes,
+ AMDGPU_MES_SCHED_PIPE,
+ &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
+ offsetof(union MESAPI__REMOVE_QUEUE, api_status));
+}
+
+int gfx_v12_0_request_gfx_index_mutex(struct amdgpu_device *adev,
+ bool req)
+{
+ u32 i, tmp, val;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* Request with MeId=2, PipeId=0 */
+ tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4);
+ WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp);
+
+ val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX);
+ if (req) {
+ if (val == tmp)
+ break;
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX,
+ REQUEST, 1);
+
+ /* unlocked or locked by firmware */
+ if (val != tmp)
+ break;
+ }
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int mes_v12_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_type,
+ uint32_t me_id, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t vmid)
+{
+ struct amdgpu_device *adev = mes->adev;
+ uint32_t value, reg;
+ int i, r = 0;
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ if (queue_type == AMDGPU_RING_TYPE_GFX) {
+ dev_info(adev->dev, "reset gfx queue (%d:%d:%d: vmid:%d)\n",
+ me_id, pipe_id, queue_id, vmid);
+
+ mutex_lock(&adev->gfx.reset_sem_mutex);
+ gfx_v12_0_request_gfx_index_mutex(adev, true);
+ /* all se allow writes */
+ WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX,
+ (uint32_t)(0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT));
+ value = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
+ if (pipe_id == 0)
+ value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE0_QUEUES, 1 << queue_id);
+ else
+ value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE1_QUEUES, 1 << queue_id);
+ WREG32_SOC15(GC, 0, regCP_VMID_RESET, value);
+ gfx_v12_0_request_gfx_index_mutex(adev, false);
+ mutex_unlock(&adev->gfx.reset_sem_mutex);
+
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0);
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to wait on gfx hqd deactivate\n");
+ r = -ETIMEDOUT;
+ }
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ } else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ dev_info(adev->dev, "reset compute queue (%d:%d:%d)\n",
+ me_id, pipe_id, queue_id);
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
+ WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
+
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to wait on hqd deactivate\n");
+ r = -ETIMEDOUT;
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ } else if (queue_type == AMDGPU_RING_TYPE_SDMA) {
+ dev_info(adev->dev, "reset sdma queue (%d:%d:%d)\n",
+ me_id, pipe_id, queue_id);
+ switch (me_id) {
+ case 1:
+ reg = SOC15_REG_OFFSET(GC, 0, regSDMA1_QUEUE_RESET_REQ);
+ break;
+ case 0:
+ default:
+ reg = SOC15_REG_OFFSET(GC, 0, regSDMA0_QUEUE_RESET_REQ);
+ break;
+ }
+
+ value = 1 << queue_id;
+ WREG32(reg, value);
+ /* wait for queue reset done */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32(reg) & value))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to wait on sdma queue reset done\n");
+ r = -ETIMEDOUT;
+ }
+ }
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ return r;
+}
+
+static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes,
+ struct mes_map_legacy_queue_input *input)
+{
+ union MESAPI__ADD_QUEUE mes_add_queue_pkt;
+ int pipe;
+
+ memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
+
+ mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
+ mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_add_queue_pkt.pipe_id = input->pipe_id;
+ mes_add_queue_pkt.queue_id = input->queue_id;
+ mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_add_queue_pkt.mqd_addr = input->mqd_addr;
+ mes_add_queue_pkt.wptr_addr = input->wptr_addr;
+ mes_add_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ mes_add_queue_pkt.map_legacy_kq = 1;
+
+ if (mes->adev->enable_uni_mes)
+ pipe = AMDGPU_MES_KIQ_PIPE;
+ else
+ pipe = AMDGPU_MES_SCHED_PIPE;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
+ &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
+ offsetof(union MESAPI__ADD_QUEUE, api_status));
+}
+
+static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes,
+ struct mes_unmap_legacy_queue_input *input)
+{
+ union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
+ int pipe;
+
+ memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
+
+ mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
+ mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_remove_queue_pkt.gang_context_addr = 0;
+
+ mes_remove_queue_pkt.pipe_id = input->pipe_id;
+ mes_remove_queue_pkt.queue_id = input->queue_id;
+
+ if (input->action == PREEMPT_QUEUES_NO_UNMAP) {
+ mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1;
+ mes_remove_queue_pkt.tf_addr = input->trail_fence_addr;
+ mes_remove_queue_pkt.tf_data =
+ lower_32_bits(input->trail_fence_data);
+ } else {
+ mes_remove_queue_pkt.unmap_legacy_queue = 1;
+ mes_remove_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ }
+
+ if (mes->adev->enable_uni_mes)
+ pipe = AMDGPU_MES_KIQ_PIPE;
+ else
+ pipe = AMDGPU_MES_SCHED_PIPE;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
+ &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
+ offsetof(union MESAPI__REMOVE_QUEUE, api_status));
+}
+
+static int mes_v12_0_suspend_gang(struct amdgpu_mes *mes,
+ struct mes_suspend_gang_input *input)
+{
+ union MESAPI__SUSPEND mes_suspend_gang_pkt;
+
+ memset(&mes_suspend_gang_pkt, 0, sizeof(mes_suspend_gang_pkt));
+
+ mes_suspend_gang_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_suspend_gang_pkt.header.opcode = MES_SCH_API_SUSPEND;
+ mes_suspend_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_suspend_gang_pkt.suspend_all_gangs = input->suspend_all_gangs;
+ mes_suspend_gang_pkt.gang_context_addr = input->gang_context_addr;
+ mes_suspend_gang_pkt.suspend_fence_addr = input->suspend_fence_addr;
+ mes_suspend_gang_pkt.suspend_fence_value = input->suspend_fence_value;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_SCHED_PIPE,
+ &mes_suspend_gang_pkt, sizeof(mes_suspend_gang_pkt),
+ offsetof(union MESAPI__SUSPEND, api_status));
+}
+
+static int mes_v12_0_resume_gang(struct amdgpu_mes *mes,
+ struct mes_resume_gang_input *input)
+{
+ union MESAPI__RESUME mes_resume_gang_pkt;
+
+ memset(&mes_resume_gang_pkt, 0, sizeof(mes_resume_gang_pkt));
+
+ mes_resume_gang_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_resume_gang_pkt.header.opcode = MES_SCH_API_RESUME;
+ mes_resume_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_resume_gang_pkt.resume_all_gangs = input->resume_all_gangs;
+ mes_resume_gang_pkt.gang_context_addr = input->gang_context_addr;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_SCHED_PIPE,
+ &mes_resume_gang_pkt, sizeof(mes_resume_gang_pkt),
+ offsetof(union MESAPI__RESUME, api_status));
+}
+
+static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes, int pipe)
+{
+ union MESAPI__QUERY_MES_STATUS mes_status_pkt;
+
+ memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
+
+ mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
+ mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
+ &mes_status_pkt, sizeof(mes_status_pkt),
+ offsetof(union MESAPI__QUERY_MES_STATUS, api_status));
+}
+
+static int mes_v12_0_misc_op(struct amdgpu_mes *mes,
+ struct mes_misc_op_input *input)
+{
+ union MESAPI__MISC misc_pkt;
+ int pipe;
+
+ if (mes->adev->enable_uni_mes)
+ pipe = AMDGPU_MES_KIQ_PIPE;
+ else
+ pipe = AMDGPU_MES_SCHED_PIPE;
+
+ memset(&misc_pkt, 0, sizeof(misc_pkt));
+
+ misc_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ misc_pkt.header.opcode = MES_SCH_API_MISC;
+ misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ switch (input->op) {
+ case MES_MISC_OP_READ_REG:
+ misc_pkt.opcode = MESAPI_MISC__READ_REG;
+ misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset;
+ misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr;
+ break;
+ case MES_MISC_OP_WRITE_REG:
+ misc_pkt.opcode = MESAPI_MISC__WRITE_REG;
+ misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset;
+ misc_pkt.write_reg.reg_value = input->write_reg.reg_value;
+ break;
+ case MES_MISC_OP_WRM_REG_WAIT:
+ misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
+ misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM;
+ misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
+ misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
+ misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
+ misc_pkt.wait_reg_mem.reg_offset2 = 0;
+ break;
+ case MES_MISC_OP_WRM_REG_WR_WAIT:
+ misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
+ misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG;
+ misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
+ misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
+ misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
+ misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1;
+ break;
+ case MES_MISC_OP_SET_SHADER_DEBUGGER:
+ pipe = AMDGPU_MES_SCHED_PIPE;
+ misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER;
+ misc_pkt.set_shader_debugger.process_context_addr =
+ input->set_shader_debugger.process_context_addr;
+ misc_pkt.set_shader_debugger.flags.u32all =
+ input->set_shader_debugger.flags.u32all;
+ misc_pkt.set_shader_debugger.spi_gdbg_per_vmid_cntl =
+ input->set_shader_debugger.spi_gdbg_per_vmid_cntl;
+ memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl,
+ input->set_shader_debugger.tcp_watch_cntl,
+ sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl));
+ misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en;
+ break;
+ case MES_MISC_OP_CHANGE_CONFIG:
+ misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG;
+ misc_pkt.change_config.opcode =
+ MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS;
+ misc_pkt.change_config.option.bits.limit_single_process =
+ input->change_config.option.limit_single_process;
+ break;
+
+ default:
+ DRM_ERROR("unsupported misc op (%d) \n", input->op);
+ return -EINVAL;
+ }
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
+ &misc_pkt, sizeof(misc_pkt),
+ offsetof(union MESAPI__MISC, api_status));
+}
+
+static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes, int pipe)
+{
+ union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt;
+
+ memset(&mes_set_hw_res_1_pkt, 0, sizeof(mes_set_hw_res_1_pkt));
+
+ mes_set_hw_res_1_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_set_hw_res_1_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1;
+ mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+ mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 0xa;
+ mes_set_hw_res_1_pkt.cleaner_shader_fence_mc_addr =
+ mes->resource_1_gpu_addr[pipe];
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
+ &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt),
+ offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status));
+}
+
+static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe)
+{
+ int i;
+ struct amdgpu_device *adev = mes->adev;
+ union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
+
+ memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
+
+ mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
+ mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ if (pipe == AMDGPU_MES_SCHED_PIPE) {
+ mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
+ mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
+ mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
+ mes_set_hw_res_pkt.paging_vmid = 0;
+
+ for (i = 0; i < MAX_COMPUTE_PIPES; i++)
+ mes_set_hw_res_pkt.compute_hqd_mask[i] =
+ mes->compute_hqd_mask[i];
+
+ for (i = 0; i < MAX_GFX_PIPES; i++)
+ mes_set_hw_res_pkt.gfx_hqd_mask[i] =
+ mes->gfx_hqd_mask[i];
+
+ for (i = 0; i < MAX_SDMA_PIPES; i++)
+ mes_set_hw_res_pkt.sdma_hqd_mask[i] =
+ mes->sdma_hqd_mask[i];
+
+ for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
+ mes_set_hw_res_pkt.aggregated_doorbells[i] =
+ mes->aggregated_doorbells[i];
+ }
+
+ mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr =
+ mes->sch_ctx_gpu_addr[pipe];
+ mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
+ mes->query_status_fence_gpu_addr[pipe];
+
+ for (i = 0; i < 5; i++) {
+ mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i];
+ mes_set_hw_res_pkt.mmhub_base[i] =
+ adev->reg_offset[MMHUB_HWIP][0][i];
+ mes_set_hw_res_pkt.osssys_base[i] =
+ adev->reg_offset[OSSSYS_HWIP][0][i];
+ }
+
+ mes_set_hw_res_pkt.disable_reset = 1;
+ mes_set_hw_res_pkt.disable_mes_log = 1;
+ mes_set_hw_res_pkt.use_different_vmid_compute = 1;
+ mes_set_hw_res_pkt.enable_reg_active_poll = 1;
+ mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
+ if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x82)
+ mes_set_hw_res_pkt.enable_lr_compute_wa = 1;
+ else
+ dev_info_once(adev->dev,
+ "MES FW version must be >= 0x82 to enable LR compute workaround.\n");
+
+ /*
+ * Keep oversubscribe timer for sdma . When we have unmapped doorbell
+ * handling support, other queue will not use the oversubscribe timer.
+ * handling mode - 0: disabled; 1: basic version; 2: basic+ version
+ */
+ mes_set_hw_res_pkt.oversubscription_timer = 50;
+ mes_set_hw_res_pkt.unmapped_doorbell_handling = 1;
+
+ if (amdgpu_mes_log_enable) {
+ mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
+ mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr +
+ pipe * (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE);
+ }
+
+ if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
+ mes_set_hw_res_pkt.limit_single_process = 1;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
+ &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
+ offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
+}
+
+static void mes_v12_0_init_aggregated_doorbell(struct amdgpu_mes *mes)
+{
+ struct amdgpu_device *adev = mes->adev;
+ uint32_t data;
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1);
+ data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] <<
+ CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1, data);
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2);
+ data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] <<
+ CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2, data);
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3);
+ data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] <<
+ CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3, data);
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4);
+ data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] <<
+ CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4, data);
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5);
+ data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] <<
+ CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5, data);
+
+ data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_HQD_GFX_CONTROL, data);
+}
+
+
+static void mes_v12_0_enable_unmapped_doorbell_handling(
+ struct amdgpu_mes *mes, bool enable)
+{
+ struct amdgpu_device *adev = mes->adev;
+ uint32_t data = RREG32_SOC15(GC, 0, regCP_UNMAPPED_DOORBELL);
+
+ /*
+ * The default PROC_LSB settng is 0xc which means doorbell
+ * addr[16:12] gives the doorbell page number. For kfd, each
+ * process will use 2 pages of doorbell, we need to change the
+ * setting to 0xd
+ */
+ data &= ~CP_UNMAPPED_DOORBELL__PROC_LSB_MASK;
+ data |= 0xd << CP_UNMAPPED_DOORBELL__PROC_LSB__SHIFT;
+
+ data |= (enable ? 1 : 0) << CP_UNMAPPED_DOORBELL__ENABLE__SHIFT;
+
+ WREG32_SOC15(GC, 0, regCP_UNMAPPED_DOORBELL, data);
+}
+
+static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes,
+ struct mes_reset_queue_input *input)
+{
+ union MESAPI__RESET mes_reset_queue_pkt;
+ int pipe;
+
+ if (input->use_mmio)
+ return mes_v12_0_reset_queue_mmio(mes, input->queue_type,
+ input->me_id, input->pipe_id,
+ input->queue_id, input->vmid);
+
+ memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
+
+ mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
+ mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_reset_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+
+ if (input->legacy_gfx) {
+ mes_reset_queue_pkt.reset_legacy_gfx = 1;
+ mes_reset_queue_pkt.pipe_id_lp = input->pipe_id;
+ mes_reset_queue_pkt.queue_id_lp = input->queue_id;
+ mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr;
+ mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset;
+ mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr;
+ mes_reset_queue_pkt.vmid_id_lp = input->vmid;
+ } else {
+ mes_reset_queue_pkt.reset_queue_only = 1;
+ mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
+ }
+
+ if (input->is_kq)
+ pipe = AMDGPU_MES_KIQ_PIPE;
+ else
+ pipe = AMDGPU_MES_SCHED_PIPE;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
+ &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
+ offsetof(union MESAPI__RESET, api_status));
+}
+
+static int mes_v12_0_detect_and_reset_hung_queues(struct amdgpu_mes *mes,
+ struct mes_detect_and_reset_queue_input *input)
+{
+ union MESAPI__RESET mes_reset_queue_pkt;
+
+ memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
+
+ mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
+ mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_reset_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ mes_reset_queue_pkt.doorbell_offset_addr =
+ mes->hung_queue_db_array_gpu_addr;
+
+ if (input->detect_only)
+ mes_reset_queue_pkt.hang_detect_only = 1;
+ else
+ mes_reset_queue_pkt.hang_detect_then_reset = 1;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_SCHED_PIPE,
+ &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
+ offsetof(union MESAPI__RESET, api_status));
+}
+
+static int mes_v12_inv_tlb_convert_hub_id(uint8_t id)
+{
+ /*
+ * MES doesn't support invalidate gc_hub on slave xcc individually
+ * master xcc will invalidate all gc_hub for the partition
+ */
+ if (AMDGPU_IS_GFXHUB(id))
+ return 0;
+ else if (AMDGPU_IS_MMHUB0(id))
+ return 1;
+ else
+ return -EINVAL;
+
+}
+
+static int mes_v12_0_inv_tlbs_pasid(struct amdgpu_mes *mes,
+ struct mes_inv_tlbs_pasid_input *input)
+{
+ union MESAPI__INV_TLBS mes_inv_tlbs;
+ int ret;
+
+ memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs));
+
+ mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER;
+ mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS;
+ mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_inv_tlbs.invalidate_tlbs.inv_sel = 0;
+ mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type;
+ mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid;
+
+ /*convert amdgpu_mes_hub_id to mes expected hub_id */
+ ret = mes_v12_inv_tlb_convert_hub_id(input->hub_id);
+ if (ret < 0)
+ return -EINVAL;
+ mes_inv_tlbs.invalidate_tlbs.hub_id = ret;
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_KIQ_PIPE,
+ &mes_inv_tlbs, sizeof(mes_inv_tlbs),
+ offsetof(union MESAPI__INV_TLBS, api_status));
+
+}
+
+static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
+ .add_hw_queue = mes_v12_0_add_hw_queue,
+ .remove_hw_queue = mes_v12_0_remove_hw_queue,
+ .map_legacy_queue = mes_v12_0_map_legacy_queue,
+ .unmap_legacy_queue = mes_v12_0_unmap_legacy_queue,
+ .suspend_gang = mes_v12_0_suspend_gang,
+ .resume_gang = mes_v12_0_resume_gang,
+ .misc_op = mes_v12_0_misc_op,
+ .reset_hw_queue = mes_v12_0_reset_hw_queue,
+ .invalidate_tlbs_pasid = mes_v12_0_inv_tlbs_pasid,
+ .detect_and_reset_hung_queues = mes_v12_0_detect_and_reset_hung_queues,
+};
+
+static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ int r;
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ const __le32 *fw_data;
+ unsigned fw_size;
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw[pipe]->data;
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_size,
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->mes.ucode_fw_obj[pipe],
+ &adev->mes.ucode_fw_gpu_addr[pipe],
+ (void **)&adev->mes.ucode_fw_ptr[pipe]);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->mes.ucode_fw_ptr[pipe], fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[pipe]);
+ amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[pipe]);
+
+ return 0;
+}
+
+static int mes_v12_0_allocate_ucode_data_buffer(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ int r;
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ const __le32 *fw_data;
+ unsigned fw_size;
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw[pipe]->data;
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_size,
+ 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->mes.data_fw_obj[pipe],
+ &adev->mes.data_fw_gpu_addr[pipe],
+ (void **)&adev->mes.data_fw_ptr[pipe]);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->mes.data_fw_ptr[pipe], fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->mes.data_fw_obj[pipe]);
+ amdgpu_bo_unreserve(adev->mes.data_fw_obj[pipe]);
+
+ return 0;
+}
+
+static void mes_v12_0_free_ucode_buffers(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[pipe],
+ &adev->mes.data_fw_gpu_addr[pipe],
+ (void **)&adev->mes.data_fw_ptr[pipe]);
+
+ amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[pipe],
+ &adev->mes.ucode_fw_gpu_addr[pipe],
+ (void **)&adev->mes.ucode_fw_ptr[pipe]);
+}
+
+static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable)
+{
+ uint64_t ucode_addr;
+ uint32_t pipe, data = 0;
+
+ if (enable) {
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ soc21_grbm_select(adev, 3, pipe, 0, 0);
+ if (amdgpu_mes_log_enable) {
+ u32 log_size = AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE;
+ /* In case uni mes is not enabled, only program for pipe 0 */
+ if (adev->mes.event_log_size >= (pipe + 1) * log_size) {
+ WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO,
+ lower_32_bits(adev->mes.event_log_gpu_addr +
+ pipe * log_size + AMDGPU_MES_LOG_BUFFER_SIZE));
+ WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI,
+ upper_32_bits(adev->mes.event_log_gpu_addr +
+ pipe * log_size + AMDGPU_MES_LOG_BUFFER_SIZE));
+ dev_info(adev->dev, "Setup CP MES MSCRATCH address : 0x%x. 0x%x\n",
+ RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI),
+ RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO));
+ }
+ }
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
+ if (pipe == 0)
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
+ else
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
+
+ ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
+ WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START,
+ lower_32_bits(ucode_addr));
+ WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START_HI,
+ upper_32_bits(ucode_addr));
+
+ /* unhalt MES and activate one pipe each loop */
+ data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
+ if (pipe)
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1);
+ dev_info(adev->dev, "program CP_MES_CNTL : 0x%x\n", data);
+
+ WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
+
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (amdgpu_emu_mode)
+ msleep(100);
+ else if (adev->enable_uni_mes)
+ udelay(500);
+ else
+ udelay(50);
+ } else {
+ data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0);
+ data = REG_SET_FIELD(data, CP_MES_CNTL,
+ MES_INVALIDATE_ICACHE, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
+ WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
+ }
+}
+
+static void mes_v12_0_set_ucode_start_addr(struct amdgpu_device *adev)
+{
+ uint64_t ucode_addr;
+ int pipe;
+
+ mes_v12_0_enable(adev, false);
+
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ /* me=3, queue=0 */
+ soc21_grbm_select(adev, 3, pipe, 0, 0);
+
+ /* set ucode start address */
+ ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
+ WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START,
+ lower_32_bits(ucode_addr));
+ WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START_HI,
+ upper_32_bits(ucode_addr));
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ }
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+/* This function is for backdoor MES firmware */
+static int mes_v12_0_load_microcode(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe, bool prime_icache)
+{
+ int r;
+ uint32_t data;
+
+ mes_v12_0_enable(adev, false);
+
+ if (!adev->mes.fw[pipe])
+ return -EINVAL;
+
+ r = mes_v12_0_allocate_ucode_buffer(adev, pipe);
+ if (r)
+ return r;
+
+ r = mes_v12_0_allocate_ucode_data_buffer(adev, pipe);
+ if (r) {
+ mes_v12_0_free_ucode_buffers(adev, pipe);
+ return r;
+ }
+
+ mutex_lock(&adev->srbm_mutex);
+ /* me=3, pipe=0, queue=0 */
+ soc21_grbm_select(adev, 3, pipe, 0, 0);
+
+ WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_CNTL, 0);
+
+ /* set ucode fimrware address */
+ WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_LO,
+ lower_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
+ WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_HI,
+ upper_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
+
+ /* set ucode instruction cache boundary to 2M-1 */
+ WREG32_SOC15(GC, 0, regCP_MES_MIBOUND_LO, 0x1FFFFF);
+
+ /* set ucode data firmware address */
+ WREG32_SOC15(GC, 0, regCP_MES_MDBASE_LO,
+ lower_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
+ WREG32_SOC15(GC, 0, regCP_MES_MDBASE_HI,
+ upper_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
+
+ /* Set data cache boundary CP_MES_MDBOUND_LO */
+ WREG32_SOC15(GC, 0, regCP_MES_MDBOUND_LO, 0x7FFFF);
+
+ if (prime_icache) {
+ /* invalidate ICACHE */
+ data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data);
+
+ /* prime the ICACHE. */
+ data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data);
+ }
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ return 0;
+}
+
+static int mes_v12_0_allocate_eop_buf(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ int r;
+ u32 *eop;
+
+ r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mes.eop_gpu_obj[pipe],
+ &adev->mes.eop_gpu_addr[pipe],
+ (void **)&eop);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
+ return r;
+ }
+
+ memset(eop, 0,
+ adev->mes.eop_gpu_obj[pipe]->tbo.base.size);
+
+ amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[pipe]);
+ amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[pipe]);
+
+ return 0;
+}
+
+static int mes_v12_0_mqd_init(struct amdgpu_ring *ring)
+{
+ struct v12_compute_mqd *mqd = ring->mqd_ptr;
+ uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+ uint32_t tmp;
+
+ mqd->header = 0xC0310800;
+ mqd->compute_pipelinestat_enable = 0x00000001;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+ mqd->compute_misc_reserved = 0x00000007;
+
+ eop_base_addr = ring->eop_gpu_addr >> 8;
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+ (order_base_2(MES_EOP_SIZE / 4) - 1));
+
+ mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr);
+ mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+ mqd->cp_hqd_eop_control = tmp;
+
+ /* disable the queue if it's active */
+ ring->wptr = 0;
+ mqd->cp_hqd_pq_rptr = 0;
+ mqd->cp_hqd_pq_wptr_lo = 0;
+ mqd->cp_hqd_pq_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
+
+ /* set MQD vmid to 0 */
+ tmp = regCP_MQD_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+ mqd->cp_mqd_control = tmp;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = ring->gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr);
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set the wb address whether it's enabled or not */
+ wb_gpu_addr = ring->rptr_gpu_addr;
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = ring->wptr_gpu_addr;
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+ (order_base_2(ring->ring_size / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+ ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1);
+ mqd->cp_hqd_pq_control = tmp;
+
+ /* enable doorbell */
+ tmp = 0;
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ }
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ mqd->cp_hqd_vmid = 0;
+ /* activate the queue */
+ mqd->cp_hqd_active = 1;
+
+ tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE,
+ PRELOAD_SIZE, 0x55);
+ mqd->cp_hqd_persistent_state = tmp;
+
+ mqd->cp_hqd_ib_control = regCP_HQD_IB_CONTROL_DEFAULT;
+ mqd->cp_hqd_iq_timer = regCP_HQD_IQ_TIMER_DEFAULT;
+ mqd->cp_hqd_quantum = regCP_HQD_QUANTUM_DEFAULT;
+
+ /*
+ * Set CP_HQD_GFX_CONTROL.DB_UPDATED_MSG_EN[15] to enable unmapped
+ * doorbell handling. This is a reserved CP internal register can
+ * not be accesss by others
+ */
+ mqd->reserved_184 = BIT(15);
+
+ return 0;
+}
+
+static void mes_v12_0_queue_init_register(struct amdgpu_ring *ring)
+{
+ struct v12_compute_mqd *mqd = ring->mqd_ptr;
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t data = 0;
+
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, 3, ring->pipe, 0, 0);
+
+ /* set CP_HQD_VMID.VMID = 0. */
+ data = RREG32_SOC15(GC, 0, regCP_HQD_VMID);
+ data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_VMID, data);
+
+ /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
+ data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
+ data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+ /* set CP_MQD_BASE_ADDR/HI with the MQD base address */
+ WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
+
+ /* set CP_MQD_CONTROL.VMID=0 */
+ data = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
+ data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
+ WREG32_SOC15(GC, 0, regCP_MQD_CONTROL, 0);
+
+ /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
+
+ /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
+ mqd->cp_hqd_pq_rptr_report_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+ /* set CP_HQD_PQ_CONTROL */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
+
+ /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
+ mqd->cp_hqd_pq_wptr_poll_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+ /* set CP_HQD_PQ_DOORBELL_CONTROL */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
+ WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
+
+ /* set CP_HQD_ACTIVE.ACTIVE=1 */
+ WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, mqd->cp_hqd_active);
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static int mes_v12_0_kiq_enable_queue(struct amdgpu_device *adev)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
+ int r;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
+ return -EINVAL;
+
+ r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
+ if (r) {
+ DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+ return r;
+ }
+
+ kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]);
+
+ r = amdgpu_ring_test_ring(kiq_ring);
+ if (r) {
+ DRM_ERROR("kfq enable failed\n");
+ kiq_ring->sched.ready = false;
+ }
+ return r;
+}
+
+static int mes_v12_0_queue_init(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ struct amdgpu_ring *ring;
+ int r;
+
+ if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
+ ring = &adev->gfx.kiq[0].ring;
+ else
+ ring = &adev->mes.ring[pipe];
+
+ if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) &&
+ (amdgpu_in_reset(adev) || adev->in_suspend)) {
+ *(ring->wptr_cpu_addr) = 0;
+ *(ring->rptr_cpu_addr) = 0;
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ r = mes_v12_0_mqd_init(ring);
+ if (r)
+ return r;
+
+ if (pipe == AMDGPU_MES_SCHED_PIPE) {
+ if (adev->enable_uni_mes)
+ r = amdgpu_mes_map_legacy_queue(adev, ring);
+ else
+ r = mes_v12_0_kiq_enable_queue(adev);
+ if (r)
+ return r;
+ } else {
+ mes_v12_0_queue_init_register(ring);
+ }
+
+ if (((pipe == AMDGPU_MES_SCHED_PIPE) && !adev->mes.sched_version) ||
+ ((pipe == AMDGPU_MES_KIQ_PIPE) && !adev->mes.kiq_version)) {
+ /* get MES scheduler/KIQ versions */
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, 3, pipe, 0, 0);
+
+ if (pipe == AMDGPU_MES_SCHED_PIPE)
+ adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
+ else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
+ adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ }
+
+ return 0;
+}
+
+static int mes_v12_0_ring_init(struct amdgpu_device *adev, int pipe)
+{
+ struct amdgpu_ring *ring;
+
+ ring = &adev->mes.ring[pipe];
+
+ ring->funcs = &mes_v12_0_ring_funcs;
+
+ ring->me = 3;
+ ring->pipe = pipe;
+ ring->queue = 0;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->eop_gpu_addr = adev->mes.eop_gpu_addr[pipe];
+ ring->no_scheduler = true;
+ sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ if (pipe == AMDGPU_MES_SCHED_PIPE)
+ ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1;
+ else
+ ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1;
+
+ return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+}
+
+static int mes_v12_0_kiq_ring_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+
+ spin_lock_init(&adev->gfx.kiq[0].ring_lock);
+
+ ring = &adev->gfx.kiq[0].ring;
+
+ ring->me = 3;
+ ring->pipe = 1;
+ ring->queue = 0;
+
+ ring->adev = NULL;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1;
+ ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_KIQ_PIPE];
+ ring->no_scheduler = true;
+ sprintf(ring->name, "mes_kiq_%d.%d.%d",
+ ring->me, ring->pipe, ring->queue);
+
+ return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+}
+
+static int mes_v12_0_mqd_sw_init(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ int r, mqd_size = sizeof(struct v12_compute_mqd);
+ struct amdgpu_ring *ring;
+
+ if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
+ ring = &adev->gfx.kiq[0].ring;
+ else
+ ring = &adev->mes.ring[pipe];
+
+ if (ring->mqd_obj)
+ return 0;
+
+ r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
+ &ring->mqd_gpu_addr, &ring->mqd_ptr);
+ if (r) {
+ dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
+ return r;
+ }
+
+ memset(ring->mqd_ptr, 0, mqd_size);
+
+ /* prepare MQD backup */
+ adev->mes.mqd_backup[pipe] = kmalloc(mqd_size, GFP_KERNEL);
+ if (!adev->mes.mqd_backup[pipe])
+ dev_warn(adev->dev,
+ "no memory to create MQD backup for ring %s\n",
+ ring->name);
+
+ return 0;
+}
+
+static int mes_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int pipe, r;
+
+ adev->mes.funcs = &mes_v12_0_funcs;
+ adev->mes.kiq_hw_init = &mes_v12_0_kiq_hw_init;
+ adev->mes.kiq_hw_fini = &mes_v12_0_kiq_hw_fini;
+ adev->mes.enable_legacy_queue_map = true;
+
+ adev->mes.event_log_size = adev->enable_uni_mes ?
+ (AMDGPU_MAX_MES_PIPES * (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE)) :
+ (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE);
+ r = amdgpu_mes_init(adev);
+ if (r)
+ return r;
+
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ r = mes_v12_0_allocate_eop_buf(adev, pipe);
+ if (r)
+ return r;
+
+ r = mes_v12_0_mqd_sw_init(adev, pipe);
+ if (r)
+ return r;
+
+ if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) {
+ r = mes_v12_0_kiq_ring_init(adev);
+ }
+ else {
+ r = mes_v12_0_ring_init(adev, pipe);
+ if (r)
+ return r;
+ r = amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->mes.resource_1[pipe],
+ &adev->mes.resource_1_gpu_addr[pipe],
+ &adev->mes.resource_1_addr[pipe]);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mes resource_1 bo pipe[%d]\n", r, pipe);
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int mes_v12_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int pipe;
+
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ amdgpu_bo_free_kernel(&adev->mes.resource_1[pipe],
+ &adev->mes.resource_1_gpu_addr[pipe],
+ &adev->mes.resource_1_addr[pipe]);
+
+ kfree(adev->mes.mqd_backup[pipe]);
+
+ amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[pipe],
+ &adev->mes.eop_gpu_addr[pipe],
+ NULL);
+ amdgpu_ucode_release(&adev->mes.fw[pipe]);
+
+ if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) {
+ amdgpu_bo_free_kernel(&adev->mes.ring[pipe].mqd_obj,
+ &adev->mes.ring[pipe].mqd_gpu_addr,
+ &adev->mes.ring[pipe].mqd_ptr);
+ amdgpu_ring_fini(&adev->mes.ring[pipe]);
+ }
+ }
+
+ if (!adev->enable_uni_mes) {
+ amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj,
+ &adev->gfx.kiq[0].ring.mqd_gpu_addr,
+ &adev->gfx.kiq[0].ring.mqd_ptr);
+ amdgpu_ring_fini(&adev->gfx.kiq[0].ring);
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ mes_v12_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE);
+ mes_v12_0_free_ucode_buffers(adev, AMDGPU_MES_SCHED_PIPE);
+ }
+
+ amdgpu_mes_fini(adev);
+ return 0;
+}
+
+static void mes_v12_0_kiq_dequeue_sched(struct amdgpu_device *adev)
+{
+ uint32_t data;
+ int i;
+
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0);
+
+ /* disable the queue if it's active */
+ if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ }
+ data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
+ data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 1);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 0);
+
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 0);
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ adev->mes.ring[0].sched.ready = false;
+}
+
+static void mes_v12_0_kiq_setting(struct amdgpu_ring *ring)
+{
+ uint32_t tmp;
+ struct amdgpu_device *adev = ring->adev;
+
+ /* tell RLC which is KIQ queue */
+ tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
+}
+
+static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+ struct amdgpu_ip_block *ip_block;
+
+ if (adev->enable_uni_mes)
+ mes_v12_0_kiq_setting(&adev->mes.ring[AMDGPU_MES_KIQ_PIPE]);
+ else
+ mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+
+ r = mes_v12_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, false);
+ if (r) {
+ DRM_ERROR("failed to load MES fw, r=%d\n", r);
+ return r;
+ }
+
+ r = mes_v12_0_load_microcode(adev, AMDGPU_MES_KIQ_PIPE, true);
+ if (r) {
+ DRM_ERROR("failed to load MES kiq fw, r=%d\n", r);
+ return r;
+ }
+
+ mes_v12_0_set_ucode_start_addr(adev);
+
+ } else if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+ mes_v12_0_set_ucode_start_addr(adev);
+
+ mes_v12_0_enable(adev, true);
+
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_MES);
+ if (unlikely(!ip_block)) {
+ dev_err(adev->dev, "Failed to get MES handle\n");
+ return -EINVAL;
+ }
+
+ r = mes_v12_0_queue_init(adev, AMDGPU_MES_KIQ_PIPE);
+ if (r)
+ goto failure;
+
+ if (adev->enable_uni_mes) {
+ r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_KIQ_PIPE);
+ if (r)
+ goto failure;
+
+ mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_KIQ_PIPE);
+ }
+
+ if (adev->mes.enable_legacy_queue_map) {
+ r = mes_v12_0_hw_init(ip_block);
+ if (r)
+ goto failure;
+ }
+
+ return r;
+
+failure:
+ mes_v12_0_hw_fini(ip_block);
+ return r;
+}
+
+static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev)
+{
+ if (adev->mes.ring[0].sched.ready) {
+ if (adev->enable_uni_mes)
+ amdgpu_mes_unmap_legacy_queue(adev,
+ &adev->mes.ring[AMDGPU_MES_SCHED_PIPE],
+ RESET_QUEUES, 0, 0);
+ else
+ mes_v12_0_kiq_dequeue_sched(adev);
+
+ adev->mes.ring[0].sched.ready = false;
+ }
+
+ mes_v12_0_enable(adev, false);
+
+ return 0;
+}
+
+static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (adev->mes.ring[0].sched.ready)
+ goto out;
+
+ if (!adev->enable_mes_kiq) {
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ r = mes_v12_0_load_microcode(adev,
+ AMDGPU_MES_SCHED_PIPE, true);
+ if (r) {
+ DRM_ERROR("failed to MES fw, r=%d\n", r);
+ return r;
+ }
+
+ mes_v12_0_set_ucode_start_addr(adev);
+
+ } else if (adev->firmware.load_type ==
+ AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+
+ mes_v12_0_set_ucode_start_addr(adev);
+ }
+
+ mes_v12_0_enable(adev, true);
+ }
+
+ /* Enable the MES to handle doorbell ring on unmapped queue */
+ mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true);
+
+ r = mes_v12_0_queue_init(adev, AMDGPU_MES_SCHED_PIPE);
+ if (r)
+ goto failure;
+
+ r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_SCHED_PIPE);
+ if (r)
+ goto failure;
+
+ if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x4b)
+ mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE);
+
+ mes_v12_0_init_aggregated_doorbell(&adev->mes);
+
+ r = mes_v12_0_query_sched_status(&adev->mes, AMDGPU_MES_SCHED_PIPE);
+ if (r) {
+ DRM_ERROR("MES is busy\n");
+ goto failure;
+ }
+
+ r = amdgpu_mes_update_enforce_isolation(adev);
+ if (r)
+ goto failure;
+
+out:
+ /*
+ * Disable KIQ ring usage from the driver once MES is enabled.
+ * MES uses KIQ ring exclusively so driver cannot access KIQ ring
+ * with MES enabled.
+ */
+ adev->gfx.kiq[0].ring.sched.ready = false;
+ adev->mes.ring[0].sched.ready = true;
+
+ return 0;
+
+failure:
+ mes_v12_0_hw_fini(ip_block);
+ return r;
+}
+
+static int mes_v12_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ return 0;
+}
+
+static int mes_v12_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return mes_v12_0_hw_fini(ip_block);
+}
+
+static int mes_v12_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ return mes_v12_0_hw_init(ip_block);
+}
+
+static int mes_v12_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int pipe, r;
+
+ adev->mes.hung_queue_db_array_size = MES12_HUNG_DB_OFFSET_ARRAY_SIZE;
+ adev->mes.hung_queue_hqd_info_offset = MES12_HUNG_HQD_INFO_OFFSET;
+
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ r = amdgpu_mes_init_microcode(adev, pipe);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static const struct amd_ip_funcs mes_v12_0_ip_funcs = {
+ .name = "mes_v12_0",
+ .early_init = mes_v12_0_early_init,
+ .late_init = NULL,
+ .sw_init = mes_v12_0_sw_init,
+ .sw_fini = mes_v12_0_sw_fini,
+ .hw_init = mes_v12_0_hw_init,
+ .hw_fini = mes_v12_0_hw_fini,
+ .suspend = mes_v12_0_suspend,
+ .resume = mes_v12_0_resume,
+};
+
+const struct amdgpu_ip_block_version mes_v12_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_MES,
+ .major = 12,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &mes_v12_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.h b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.h
new file mode 100644
index 000000000000..ac3740f353aa
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MES_V12_0_H__
+#define __MES_V12_0_H__
+
+extern const struct amdgpu_ip_block_version mes_v12_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 1da2ec692057..243eabda0607 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -54,7 +54,7 @@ static u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev)
static void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
@@ -96,7 +96,9 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
- if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+ AMD_APU_IS_RENOIR |
+ AMD_APU_IS_GREEN_SARDINE))
/*
* Raven2 has a HW issue that it is unable to use the vram which
* is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
@@ -114,7 +116,7 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
return;
/* Set default page address. */
- value = amdgpu_gmc_vram_mc2pa(adev, adev->vram_scratch.gpu_addr);
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
@@ -176,6 +178,7 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
+ tmp = mmVM_L2_CNTL3_DEFAULT;
if (adev->gmc.translate_further) {
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
@@ -226,9 +229,55 @@ static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
0);
}
+static void mmhub_v1_0_init_saw(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+ uint32_t tmp;
+
+ /* VM_9_X_REGISTER_VM_L2_SAW_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32 */
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ lower_32_bits(pt_base >> 12));
+
+ /* VM_9_X_REGISTER_VM_L2_SAW_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32 */
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ upper_32_bits(pt_base >> 12));
+
+ /* VM_9_X_REGISTER_VM_L2_SAW_CONTEXT0_PAGE_TABLE_START_ADDR_LO32 */
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+
+ /* VM_9_X_REGISTER_VM_L2_SAW_CONTEXT0_PAGE_TABLE_START_ADDR_HI32 */
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ /* VM_9_X_REGISTER_VM_L2_SAW_CONTEXT0_PAGE_TABLE_END_ADDR_LO32 */
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+
+ /* VM_9_X_REGISTER_VM_L2_SAW_CONTEXT0_PAGE_TABLE_END_ADDR_HI32 */
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+
+ /* Program SAW CONTEXT0 CNTL */
+ tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_CNTL);
+ tmp |= 1 << CONTEXT0_CNTL_ENABLE_OFFSET;
+ tmp &= ~(3 << CONTEXT0_CNTL_PAGE_TABLE_DEPTH_OFFSET);
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_CNTL, tmp);
+
+ /* Disable all Contexts except Context0 */
+ tmp = 0xfffe;
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXTS_DISABLE, tmp);
+
+ /* Program SAW CNTL4 */
+ tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CNTL4);
+ tmp |= 1 << VMC_TAP_PDE_REQUEST_SNOOP_OFFSET;
+ tmp |= 1 << VMC_TAP_PTE_REQUEST_SNOOP_OFFSET;
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CNTL4, tmp);
+}
+
static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
unsigned num_level, block_size;
uint32_t tmp;
int i;
@@ -241,7 +290,7 @@ static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
block_size -= 9;
for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i);
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
num_level);
@@ -280,11 +329,14 @@ static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
+
+ if (amdgpu_ip_version(adev, ISP_HWIP, 0))
+ mmhub_v1_0_init_saw(adev);
}
static void mmhub_v1_0_program_invalidation(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
unsigned i;
for (i = 0; i < 18; ++i) {
@@ -304,7 +356,7 @@ static void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
if (adev->pg_flags & AMD_PG_SUPPORT_MMHUB)
amdgpu_dpm_set_powergating_by_smu(adev,
AMD_IP_BLOCK_TYPE_GMC,
- enable);
+ enable, 0);
}
static int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
@@ -337,7 +389,7 @@ static int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
static void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
u32 tmp;
u32 i;
@@ -414,7 +466,7 @@ static void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool
static void mmhub_v1_0_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(MMHUB, 0,
@@ -546,7 +598,7 @@ static int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
return 0;
}
-static void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+static void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
int data, data1;
@@ -774,13 +826,17 @@ static void mmhub_v1_0_reset_ras_error_count(struct amdgpu_device *adev)
}
}
-const struct amdgpu_mmhub_ras_funcs mmhub_v1_0_ras_funcs = {
- .ras_late_init = amdgpu_mmhub_ras_late_init,
- .ras_fini = amdgpu_mmhub_ras_fini,
+struct amdgpu_ras_block_hw_ops mmhub_v1_0_ras_hw_ops = {
.query_ras_error_count = mmhub_v1_0_query_ras_error_count,
.reset_ras_error_count = mmhub_v1_0_reset_ras_error_count,
};
+struct amdgpu_mmhub_ras mmhub_v1_0_ras = {
+ .ras_block = {
+ .hw_ops = &mmhub_v1_0_ras_hw_ops,
+ },
+};
+
const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs = {
.get_fb_location = mmhub_v1_0_get_fb_location,
.init = mmhub_v1_0_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h
index 4661b094e007..dae7ca48bd8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h
@@ -24,6 +24,6 @@
#define __MMHUB_V1_0_H__
extern const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs;
-extern const struct amdgpu_mmhub_ras_funcs mmhub_v1_0_ras_funcs;
+extern struct amdgpu_mmhub_ras mmhub_v1_0_ras;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
index f5f7181f9af5..2adee2b94c37 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
@@ -54,7 +54,7 @@ static u64 mmhub_v1_7_get_fb_location(struct amdgpu_device *adev)
static void mmhub_v1_7_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid, lower_32_bits(page_table_base));
@@ -134,7 +134,7 @@ static void mmhub_v1_7_init_system_aperture_regs(struct amdgpu_device *adev)
}
/* Set default page address. */
- value = amdgpu_gmc_vram_mc2pa(adev, adev->vram_scratch.gpu_addr);
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
@@ -172,6 +172,30 @@ static void mmhub_v1_7_init_tlb_regs(struct amdgpu_device *adev)
WREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL, tmp);
}
+/* Set snoop bit for SDMA so that SDMA writes probe-invalidates RW lines */
+static void mmhub_v1_7_init_snoop_override_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+ int i;
+ uint32_t distance = regDAGB1_WRCLI_GPU_SNOOP_OVERRIDE -
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE;
+
+ for (i = 0; i < 5; i++) { /* DAGB instances */
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, i * distance);
+ tmp |= (1 << 15); /* SDMA client is BIT15 */
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, i * distance, tmp);
+
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, i * distance);
+ tmp |= (1 << 15);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, i * distance, tmp);
+ }
+
+}
+
static void mmhub_v1_7_init_cache_regs(struct amdgpu_device *adev)
{
uint32_t tmp;
@@ -261,7 +285,7 @@ static void mmhub_v1_7_disable_identity_aperture(struct amdgpu_device *adev)
static void mmhub_v1_7_setup_vmid_config(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
unsigned num_level, block_size;
uint32_t tmp;
int i;
@@ -274,7 +298,7 @@ static void mmhub_v1_7_setup_vmid_config(struct amdgpu_device *adev)
block_size -= 9;
for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_CNTL, i);
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_CNTL, i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
num_level);
@@ -319,7 +343,7 @@ static void mmhub_v1_7_setup_vmid_config(struct amdgpu_device *adev)
static void mmhub_v1_7_program_invalidation(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
unsigned i;
for (i = 0; i < 18; ++i) {
@@ -337,6 +361,7 @@ static int mmhub_v1_7_gart_enable(struct amdgpu_device *adev)
mmhub_v1_7_init_system_aperture_regs(adev);
mmhub_v1_7_init_tlb_regs(adev);
mmhub_v1_7_init_cache_regs(adev);
+ mmhub_v1_7_init_snoop_override_regs(adev);
mmhub_v1_7_enable_system_domain(adev);
mmhub_v1_7_disable_identity_aperture(adev);
@@ -348,7 +373,7 @@ static int mmhub_v1_7_gart_enable(struct amdgpu_device *adev)
static void mmhub_v1_7_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
u32 tmp;
u32 i;
@@ -425,7 +450,7 @@ static void mmhub_v1_7_set_fault_enable_default(struct amdgpu_device *adev, bool
static void mmhub_v1_7_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(MMHUB, 0,
@@ -542,9 +567,9 @@ static int mmhub_v1_7_set_clockgating(struct amdgpu_device *adev,
return 0;
}
-static void mmhub_v1_7_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+static void mmhub_v1_7_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
- int data, data1;
+ u32 data, data1;
if (amdgpu_sriov_vf(adev))
*flags = 0;
@@ -1321,15 +1346,19 @@ static void mmhub_v1_7_reset_ras_error_status(struct amdgpu_device *adev)
}
}
-const struct amdgpu_mmhub_ras_funcs mmhub_v1_7_ras_funcs = {
- .ras_late_init = amdgpu_mmhub_ras_late_init,
- .ras_fini = amdgpu_mmhub_ras_fini,
+struct amdgpu_ras_block_hw_ops mmhub_v1_7_ras_hw_ops = {
.query_ras_error_count = mmhub_v1_7_query_ras_error_count,
.reset_ras_error_count = mmhub_v1_7_reset_ras_error_count,
.query_ras_error_status = mmhub_v1_7_query_ras_error_status,
.reset_ras_error_status = mmhub_v1_7_reset_ras_error_status,
};
+struct amdgpu_mmhub_ras mmhub_v1_7_ras = {
+ .ras_block = {
+ .hw_ops = &mmhub_v1_7_ras_hw_ops,
+ },
+};
+
const struct amdgpu_mmhub_funcs mmhub_v1_7_funcs = {
.get_fb_location = mmhub_v1_7_get_fb_location,
.init = mmhub_v1_7_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.h
index a7f9dfc24697..629f49052137 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.h
@@ -24,6 +24,6 @@
#define __MMHUB_V1_7_H__
extern const struct amdgpu_mmhub_funcs mmhub_v1_7_funcs;
-extern const struct amdgpu_mmhub_ras_funcs mmhub_v1_7_ras_funcs;
+extern struct amdgpu_mmhub_ras mmhub_v1_7_ras;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
new file mode 100644
index 000000000000..cc688ae79e84
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
@@ -0,0 +1,871 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "mmhub_v1_8.h"
+
+#include "mmhub/mmhub_1_8_0_offset.h"
+#include "mmhub/mmhub_1_8_0_sh_mask.h"
+#include "vega10_enum.h"
+
+#include "soc15_common.h"
+#include "soc15.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_psp.h"
+
+#define regVM_L2_CNTL3_DEFAULT 0x80100007
+#define regVM_L2_CNTL4_DEFAULT 0x000000c1
+
+static u64 mmhub_v1_8_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base = RREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_BASE);
+ u64 top = RREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_TOP);
+
+ base &= MC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ top &= MC_VM_FB_LOCATION_TOP__FB_TOP_MASK;
+ top <<= 24;
+
+ adev->gmc.fb_start = base;
+ adev->gmc.fb_end = top;
+
+ return base;
+}
+
+static void mmhub_v1_8_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub;
+ u32 inst_mask;
+ int i;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ hub = &adev->vmhub[AMDGPU_MMHUB0(i)];
+ WREG32_SOC15_OFFSET(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+ }
+}
+
+static void mmhub_v1_8_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t gart_start = amdgpu_virt_xgmi_migrate_enabled(adev) ?
+ adev->gmc.vram_start : adev->gmc.fb_start;
+ uint64_t pt_base;
+ u32 inst_mask;
+ int i;
+
+ if (adev->gmc.pdb0_bo)
+ pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo);
+ else
+ pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v1_8_setup_vm_pt_regs(adev, 0, pt_base);
+
+ /* If use GART for FB translation, vmid0 page table covers both
+ * vram and system memory (gart)
+ */
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ if (adev->gmc.pdb0_bo) {
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(gart_start >> 12));
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+
+ } else {
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+ }
+ }
+}
+
+static void mmhub_v1_8_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp, inst_mask;
+ uint64_t value;
+ int i;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ /* Program the AGP BAR */
+ WREG32_SOC15(MMHUB, i, regMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(MMHUB, i, regMC_VM_AGP_BOT,
+ adev->gmc.agp_start >> 24);
+ WREG32_SOC15(MMHUB, i, regMC_VM_AGP_TOP,
+ adev->gmc.agp_end >> 24);
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+
+ WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* In the case squeezing vram into GART aperture, we don't use
+ * FB aperture and AGP aperture. Disable them.
+ */
+ if (adev->gmc.pdb0_bo) {
+ WREG32_SOC15(MMHUB, i, regMC_VM_AGP_BOT, 0xFFFFFF);
+ WREG32_SOC15(MMHUB, i, regMC_VM_AGP_TOP, 0);
+ WREG32_SOC15(MMHUB, i, regMC_VM_FB_LOCATION_TOP, 0);
+ WREG32_SOC15(MMHUB, i, regMC_VM_FB_LOCATION_BASE,
+ 0x00FFFFFF);
+ WREG32_SOC15(MMHUB, i,
+ regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ 0x3FFFFFFF);
+ WREG32_SOC15(MMHUB, i,
+ regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0);
+ }
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, i, regVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(MMHUB, i, regVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+ }
+}
+
+static void mmhub_v1_8_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp, inst_mask;
+ int i;
+
+ if (amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev)) {
+ tmp = RREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC);/* XXX for emulation. */
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
+
+ psp_reg_program_no_ring(&adev->psp, tmp, PSP_REG_MMHUB_L1_TLB_CNTL);
+ } else {
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC);/* XXX for emulation. */
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
+
+ WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp);
+ }
+ }
+}
+
+/* Set snoop bit for SDMA so that SDMA writes probe-invalidates RW lines */
+static void mmhub_v1_8_init_snoop_override_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp, inst_mask;
+ int i, j;
+ uint32_t distance = regDAGB1_WRCLI_GPU_SNOOP_OVERRIDE -
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ for (j = 0; j < 5; j++) { /* DAGB instances */
+ tmp = RREG32_SOC15_OFFSET(MMHUB, i,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, j * distance);
+ tmp |= (1 << 15); /* SDMA client is BIT15 */
+ WREG32_SOC15_OFFSET(MMHUB, i,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, j * distance, tmp);
+
+ tmp = RREG32_SOC15_OFFSET(MMHUB, i,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, j * distance);
+ tmp |= (1 << 15);
+ WREG32_SOC15_OFFSET(MMHUB, i,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, j * distance, tmp);
+ }
+ }
+}
+
+static void mmhub_v1_8_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp, inst_mask;
+ int i;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ tmp = RREG32_SOC15(MMHUB, i, regVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL,
+ ENABLE_L2_FRAGMENT_PROCESSING, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL,
+ L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION,
+ 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL,
+ CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL,
+ IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, i, regVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(MMHUB, i, regVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS,
+ 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, i, regVM_L2_CNTL2, tmp);
+
+ tmp = regVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(MMHUB, i, regVM_L2_CNTL3, tmp);
+
+ tmp = regVM_L2_CNTL4_DEFAULT;
+ /* For AMD APP APUs setup WC memory */
+ if (adev->gmc.xgmi.connected_to_cpu || adev->gmc.is_app_apu) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
+ VMC_TAP_PDE_REQUEST_PHYSICAL, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
+ VMC_TAP_PTE_REQUEST_PHYSICAL, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
+ VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
+ VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ }
+ WREG32_SOC15(MMHUB, i, regVM_L2_CNTL4, tmp);
+ }
+}
+
+static void mmhub_v1_8_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp, inst_mask;
+ int i;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ tmp = RREG32_SOC15(MMHUB, i, regVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH,
+ adev->gmc.vmid0_page_table_depth);
+ tmp = REG_SET_FIELD(tmp,
+ VM_CONTEXT0_CNTL, PAGE_TABLE_BLOCK_SIZE,
+ adev->gmc.vmid0_page_table_block_size);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(MMHUB, i, regVM_CONTEXT0_CNTL, tmp);
+ }
+}
+
+static void mmhub_v1_8_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ u32 inst_mask;
+ int i;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0XFFFFFFFF);
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ 0);
+
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+ }
+}
+
+static void mmhub_v1_8_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub;
+ unsigned int num_level, block_size;
+ uint32_t tmp, inst_mask;
+ int i, j;
+
+ num_level = adev->vm_manager.num_level;
+ block_size = adev->vm_manager.block_size;
+ if (adev->gmc.translate_further)
+ num_level -= 1;
+ else
+ block_size -= 9;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(j, inst_mask) {
+ hub = &adev->vmhub[AMDGPU_MMHUB0(j)];
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, j, regVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PAGE_TABLE_DEPTH, num_level);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ block_size);
+ /* On 9.4.3, XNACK can be enabled in the SQ
+ * per-process. Retry faults need to be enabled for
+ * that to work.
+ */
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 1);
+ WREG32_SOC15_OFFSET(MMHUB, j, regVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, j,
+ regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, j,
+ regVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, j,
+ regVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, j,
+ regVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+ }
+}
+
+static void mmhub_v1_8_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub;
+ u32 i, j, inst_mask;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(j, inst_mask) {
+ hub = &adev->vmhub[AMDGPU_MMHUB0(j)];
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, j,
+ regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, j,
+ regVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+ }
+}
+
+static int mmhub_v1_8_gart_enable(struct amdgpu_device *adev)
+{
+ /* GART Enable. */
+ mmhub_v1_8_init_gart_aperture_regs(adev);
+ mmhub_v1_8_init_system_aperture_regs(adev);
+ mmhub_v1_8_init_tlb_regs(adev);
+ mmhub_v1_8_init_cache_regs(adev);
+ mmhub_v1_8_init_snoop_override_regs(adev);
+
+ mmhub_v1_8_enable_system_domain(adev);
+ mmhub_v1_8_disable_identity_aperture(adev);
+ mmhub_v1_8_setup_vmid_config(adev);
+ mmhub_v1_8_program_invalidation(adev);
+
+ return 0;
+}
+
+static void mmhub_v1_8_disable_l1_tlb(struct amdgpu_device *adev)
+{
+ u32 tmp;
+ u32 i, inst_mask;
+
+ if (amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev)) {
+ tmp = RREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ psp_reg_program_no_ring(&adev->psp, tmp, PSP_REG_MMHUB_L1_TLB_CNTL);
+ } else {
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB,
+ 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp);
+ }
+ }
+}
+
+static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub;
+ u32 tmp;
+ u32 i, j, inst_mask;
+
+ /* Disable all tables */
+ inst_mask = adev->aid_mask;
+ for_each_inst(j, inst_mask) {
+ hub = &adev->vmhub[AMDGPU_MMHUB0(j)];
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(MMHUB, j, regVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+ if (!amdgpu_sriov_vf(adev)) {
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, j, regVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE,
+ 0);
+ WREG32_SOC15(MMHUB, j, regVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, j, regVM_L2_CNTL3, 0);
+ }
+ }
+
+ mmhub_v1_8_disable_l1_tlb(adev);
+}
+
+/**
+ * mmhub_v1_8_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void mmhub_v1_8_set_fault_enable_default(struct amdgpu_device *adev, bool value)
+{
+ u32 tmp, inst_mask;
+ int i;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ tmp = RREG32_SOC15(MMHUB, i, regVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+
+ WREG32_SOC15(MMHUB, i, regVM_L2_PROTECTION_FAULT_CNTL, tmp);
+ }
+}
+
+static void mmhub_v1_8_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub;
+ u32 inst_mask;
+ int i;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ hub = &adev->vmhub[AMDGPU_MMHUB0(i)];
+
+ hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, i, regVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, i, regVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, i, regVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status = SOC15_REG_OFFSET(MMHUB, i,
+ regVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl = SOC15_REG_OFFSET(MMHUB, i,
+ regVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regVM_CONTEXT1_CNTL - regVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance =
+ regVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regVM_INVALIDATE_ENG1_REQ -
+ regVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+ }
+}
+
+static int mmhub_v1_8_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static void mmhub_v1_8_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+
+}
+
+const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = {
+ .get_fb_location = mmhub_v1_8_get_fb_location,
+ .init = mmhub_v1_8_init,
+ .gart_enable = mmhub_v1_8_gart_enable,
+ .set_fault_enable_default = mmhub_v1_8_set_fault_enable_default,
+ .gart_disable = mmhub_v1_8_gart_disable,
+ .setup_vm_pt_regs = mmhub_v1_8_setup_vm_pt_regs,
+ .set_clockgating = mmhub_v1_8_set_clockgating,
+ .get_clockgating = mmhub_v1_8_get_clockgating,
+};
+
+static const struct amdgpu_ras_err_status_reg_entry mmhub_v1_8_ce_reg_list[] = {
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA0_CE_ERR_STATUS_LO, regMMEA0_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA0"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA1_CE_ERR_STATUS_LO, regMMEA1_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA1"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA2_CE_ERR_STATUS_LO, regMMEA2_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA2"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA3_CE_ERR_STATUS_LO, regMMEA3_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA3"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA4_CE_ERR_STATUS_LO, regMMEA4_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA4"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMM_CANE_CE_ERR_STATUS_LO, regMM_CANE_CE_ERR_STATUS_HI),
+ 1, 0, "MM_CANE"},
+};
+
+static const struct amdgpu_ras_err_status_reg_entry mmhub_v1_8_ue_reg_list[] = {
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA0_UE_ERR_STATUS_LO, regMMEA0_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA0"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA1_UE_ERR_STATUS_LO, regMMEA1_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA1"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA2_UE_ERR_STATUS_LO, regMMEA2_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA2"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA3_UE_ERR_STATUS_LO, regMMEA3_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA3"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA4_UE_ERR_STATUS_LO, regMMEA4_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA4"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMM_CANE_UE_ERR_STATUS_LO, regMM_CANE_UE_ERR_STATUS_HI),
+ 1, 0, "MM_CANE"},
+};
+
+static const struct amdgpu_ras_memory_id_entry mmhub_v1_8_ras_memory_list[] = {
+ {AMDGPU_MMHUB_WGMI_PAGEMEM, "MMEA_WGMI_PAGEMEM"},
+ {AMDGPU_MMHUB_RGMI_PAGEMEM, "MMEA_RGMI_PAGEMEM"},
+ {AMDGPU_MMHUB_WDRAM_PAGEMEM, "MMEA_WDRAM_PAGEMEM"},
+ {AMDGPU_MMHUB_RDRAM_PAGEMEM, "MMEA_RDRAM_PAGEMEM"},
+ {AMDGPU_MMHUB_WIO_CMDMEM, "MMEA_WIO_CMDMEM"},
+ {AMDGPU_MMHUB_RIO_CMDMEM, "MMEA_RIO_CMDMEM"},
+ {AMDGPU_MMHUB_WGMI_CMDMEM, "MMEA_WGMI_CMDMEM"},
+ {AMDGPU_MMHUB_RGMI_CMDMEM, "MMEA_RGMI_CMDMEM"},
+ {AMDGPU_MMHUB_WDRAM_CMDMEM, "MMEA_WDRAM_CMDMEM"},
+ {AMDGPU_MMHUB_RDRAM_CMDMEM, "MMEA_RDRAM_CMDMEM"},
+ {AMDGPU_MMHUB_MAM_DMEM0, "MMEA_MAM_DMEM0"},
+ {AMDGPU_MMHUB_MAM_DMEM1, "MMEA_MAM_DMEM1"},
+ {AMDGPU_MMHUB_MAM_DMEM2, "MMEA_MAM_DMEM2"},
+ {AMDGPU_MMHUB_MAM_DMEM3, "MMEA_MAM_DMEM3"},
+ {AMDGPU_MMHUB_WRET_TAGMEM, "MMEA_WRET_TAGMEM"},
+ {AMDGPU_MMHUB_RRET_TAGMEM, "MMEA_RRET_TAGMEM"},
+ {AMDGPU_MMHUB_WIO_DATAMEM, "MMEA_WIO_DATAMEM"},
+ {AMDGPU_MMHUB_WGMI_DATAMEM, "MMEA_WGMI_DATAMEM"},
+ {AMDGPU_MMHUB_WDRAM_DATAMEM, "MMEA_WDRAM_DATAMEM"},
+};
+
+static void mmhub_v1_8_inst_query_ras_error_count(struct amdgpu_device *adev,
+ uint32_t mmhub_inst,
+ void *ras_err_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status;
+ unsigned long ue_count = 0, ce_count = 0;
+
+ /* NOTE: mmhub is converted by aid_mask and the range is 0-3,
+ * which can be used as die ID directly */
+ struct amdgpu_smuio_mcm_config_info mcm_info = {
+ .socket_id = adev->smuio.funcs->get_socket_id(adev),
+ .die_id = mmhub_inst,
+ };
+
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ mmhub_v1_8_ce_reg_list,
+ ARRAY_SIZE(mmhub_v1_8_ce_reg_list),
+ mmhub_v1_8_ras_memory_list,
+ ARRAY_SIZE(mmhub_v1_8_ras_memory_list),
+ mmhub_inst,
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE,
+ &ce_count);
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ mmhub_v1_8_ue_reg_list,
+ ARRAY_SIZE(mmhub_v1_8_ue_reg_list),
+ mmhub_v1_8_ras_memory_list,
+ ARRAY_SIZE(mmhub_v1_8_ras_memory_list),
+ mmhub_inst,
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ &ue_count);
+
+ amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count);
+ amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count);
+}
+
+static void mmhub_v1_8_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_err_status)
+{
+ uint32_t inst_mask;
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+ dev_warn(adev->dev, "MMHUB RAS is not supported\n");
+ return;
+ }
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask)
+ mmhub_v1_8_inst_query_ras_error_count(adev, i, ras_err_status);
+}
+
+static void mmhub_v1_8_inst_reset_ras_error_count(struct amdgpu_device *adev,
+ uint32_t mmhub_inst)
+{
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ mmhub_v1_8_ce_reg_list,
+ ARRAY_SIZE(mmhub_v1_8_ce_reg_list),
+ mmhub_inst);
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ mmhub_v1_8_ue_reg_list,
+ ARRAY_SIZE(mmhub_v1_8_ue_reg_list),
+ mmhub_inst);
+}
+
+static void mmhub_v1_8_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ uint32_t inst_mask;
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+ dev_warn(adev->dev, "MMHUB RAS is not supported\n");
+ return;
+ }
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask)
+ mmhub_v1_8_inst_reset_ras_error_count(adev, i);
+}
+
+static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = {
+ .query_ras_error_count = mmhub_v1_8_query_ras_error_count,
+ .reset_ras_error_count = mmhub_v1_8_reset_ras_error_count,
+};
+
+static int mmhub_v1_8_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct aca_bank_info info;
+ u64 misc0;
+ int ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
+ 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+/* reference to smu driver if header file */
+static int mmhub_v1_8_err_codes[] = {
+ 0, 1, 2, 3, 4, /* CODE_DAGB0 - 4 */
+ 5, 6, 7, 8, 9, /* CODE_EA0 - 4 */
+ 10, /* CODE_UTCL2_ROUTER */
+ 11, /* CODE_VML2 */
+ 12, /* CODE_VML2_WALKER */
+ 13, /* CODE_MMCANE */
+};
+
+static bool mmhub_v1_8_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ u32 instlo;
+
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+
+ if (instlo != mmSMNAID_AID0_MCA_SMU)
+ return false;
+
+ if (aca_bank_check_error_codes(handle->adev, bank,
+ mmhub_v1_8_err_codes,
+ ARRAY_SIZE(mmhub_v1_8_err_codes)))
+ return false;
+
+ return true;
+}
+
+static const struct aca_bank_ops mmhub_v1_8_aca_bank_ops = {
+ .aca_bank_parser = mmhub_v1_8_aca_bank_parser,
+ .aca_bank_is_valid = mmhub_v1_8_aca_bank_is_valid,
+};
+
+static const struct aca_info mmhub_v1_8_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK,
+ .bank_ops = &mmhub_v1_8_aca_bank_ops,
+};
+
+static int mmhub_v1_8_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__MMHUB,
+ &mmhub_v1_8_aca_info, NULL);
+ if (r)
+ goto late_fini;
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+
+ return r;
+}
+
+struct amdgpu_mmhub_ras mmhub_v1_8_ras = {
+ .ras_block = {
+ .hw_ops = &mmhub_v1_8_ras_hw_ops,
+ .ras_late_init = mmhub_v1_8_ras_late_init,
+ },
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.h
new file mode 100644
index 000000000000..126f0075ac50
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __MMHUB_V1_8_H__
+#define __MMHUB_V1_8_H__
+
+extern const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs;
+extern struct amdgpu_mmhub_ras mmhub_v1_8_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
index 3718ff610ab2..a0cc8e218ca1 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
@@ -32,8 +32,6 @@
#include "gc/gc_10_1_0_offset.h"
#include "soc15_common.h"
-#define mmMM_ATC_L2_MISC_CG_Sienna_Cichlid 0x064d
-#define mmMM_ATC_L2_MISC_CG_Sienna_Cichlid_BASE_IDX 0
#define mmDAGB0_CNTL_MISC2_Sienna_Cichlid 0x0070
#define mmDAGB0_CNTL_MISC2_Sienna_Cichlid_BASE_IDX 0
@@ -153,7 +151,7 @@ mmhub_v2_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
dev_err(adev->dev,
"MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
status);
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(2, 0, 0):
case IP_VERSION(2, 0, 2):
mmhub_cid = mmhub_client_ids_navi1x[cid][rw];
@@ -189,7 +187,7 @@ mmhub_v2_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
static void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
@@ -236,7 +234,7 @@ static void mmhub_v2_0_init_system_aperture_regs(struct amdgpu_device *adev)
}
/* Set default page address. */
- value = amdgpu_gmc_vram_mc2pa(adev, adev->vram_scratch.gpu_addr);
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
@@ -321,7 +319,7 @@ static void mmhub_v2_0_init_cache_regs(struct amdgpu_device *adev)
tmp = mmMMVM_L2_CNTL5_DEFAULT;
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
- WREG32_SOC15(GC, 0, mmMMVM_L2_CNTL5, tmp);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL5, tmp);
}
static void mmhub_v2_0_enable_system_domain(struct amdgpu_device *adev)
@@ -364,12 +362,12 @@ static void mmhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev)
static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
int i;
uint32_t tmp;
for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i);
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
@@ -408,11 +406,13 @@ static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev)
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
+
+ hub->vm_cntx_cntl = tmp;
}
static void mmhub_v2_0_program_invalidation(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
unsigned i;
for (i = 0; i < 18; ++i) {
@@ -441,7 +441,7 @@ static int mmhub_v2_0_gart_enable(struct amdgpu_device *adev)
static void mmhub_v2_0_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
u32 tmp;
u32 i;
@@ -520,7 +520,7 @@ static const struct amdgpu_vmhub_funcs mmhub_v2_0_vmhub_funcs = {
static void mmhub_v2_0_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(MMHUB, 0,
@@ -568,11 +568,10 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
return;
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(2, 1, 0):
case IP_VERSION(2, 1, 1):
case IP_VERSION(2, 1, 2):
- def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid);
def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid);
break;
default:
@@ -602,12 +601,10 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
}
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(2, 1, 0):
case IP_VERSION(2, 1, 1):
case IP_VERSION(2, 1, 2):
- if (def != data)
- WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data);
if (def1 != data1)
WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid, data1);
break;
@@ -628,12 +625,12 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade
if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
return;
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(2, 1, 0):
case IP_VERSION(2, 1, 1):
case IP_VERSION(2, 1, 2):
- def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid);
- break;
+ /* There is no ATCL2 in MMHUB for 2.1.x */
+ return;
default:
def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG);
break;
@@ -644,18 +641,8 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade
else
data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
- if (def != data) {
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
- case IP_VERSION(2, 1, 0):
- case IP_VERSION(2, 1, 1):
- case IP_VERSION(2, 1, 2):
- WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data);
- break;
- default:
- WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data);
- break;
- }
- }
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data);
}
static int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,
@@ -664,7 +651,7 @@ static int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(2, 0, 0):
case IP_VERSION(2, 0, 2):
case IP_VERSION(2, 1, 0):
@@ -682,18 +669,21 @@ static int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,
return 0;
}
-static void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+static void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
- int data, data1;
+ u32 data, data1;
if (amdgpu_sriov_vf(adev))
*flags = 0;
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(2, 1, 0):
case IP_VERSION(2, 1, 1):
case IP_VERSION(2, 1, 2):
- data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid);
+ /* There is no ATCL2 in MMHUB for 2.1.x. Keep the status
+ * based on DAGB
+ */
+ data = MM_ATC_L2_MISC_CG__ENABLE_MASK;
data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid);
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
index 9e16da28505a..5eb8122e2746 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
@@ -90,9 +90,10 @@ mmhub_v2_3_print_l2_protection_fault_status(struct amdgpu_device *adev,
dev_err(adev->dev,
"MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
status);
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(2, 3, 0):
case IP_VERSION(2, 4, 0):
+ case IP_VERSION(2, 4, 1):
mmhub_cid = mmhub_client_ids_vangogh[cid][rw];
break;
default:
@@ -120,7 +121,7 @@ static void mmhub_v2_3_setup_vm_pt_regs(struct amdgpu_device *adev,
uint32_t vmid,
uint64_t page_table_base)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid, lower_32_bits(page_table_base));
@@ -163,7 +164,7 @@ static void mmhub_v2_3_init_system_aperture_regs(struct amdgpu_device *adev)
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
/* Set default page address. */
- value = amdgpu_gmc_vram_mc2pa(adev, adev->vram_scratch.gpu_addr);
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
@@ -242,7 +243,7 @@ static void mmhub_v2_3_init_cache_regs(struct amdgpu_device *adev)
tmp = mmMMVM_L2_CNTL5_DEFAULT;
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
- WREG32_SOC15(GC, 0, mmMMVM_L2_CNTL5, tmp);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL5, tmp);
}
static void mmhub_v2_3_enable_system_domain(struct amdgpu_device *adev)
@@ -279,12 +280,12 @@ static void mmhub_v2_3_disable_identity_aperture(struct amdgpu_device *adev)
static void mmhub_v2_3_setup_vmid_config(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
int i;
uint32_t tmp;
for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i);
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
@@ -323,12 +324,14 @@ static void mmhub_v2_3_setup_vmid_config(struct amdgpu_device *adev)
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
+
+ hub->vm_cntx_cntl = tmp;
}
static void mmhub_v2_3_program_invalidation(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
- unsigned i;
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned int i;
for (i = 0; i < 18; ++i) {
WREG32_SOC15_OFFSET(MMHUB, 0,
@@ -370,7 +373,7 @@ static int mmhub_v2_3_gart_enable(struct amdgpu_device *adev)
static void mmhub_v2_3_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
u32 tmp;
u32 i;
@@ -403,6 +406,7 @@ static void mmhub_v2_3_set_fault_enable_default(struct amdgpu_device *adev,
bool value)
{
u32 tmp;
+
tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
@@ -443,7 +447,7 @@ static const struct amdgpu_vmhub_funcs mmhub_v2_3_vmhub_funcs = {
static void mmhub_v2_3_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(MMHUB, 0,
@@ -496,11 +500,11 @@ mmhub_v2_3_update_medium_grain_clock_gating(struct amdgpu_device *adev,
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) {
data &= ~MM_ATC_L2_CGTT_CLK_CTRL__SOFT_OVERRIDE_MASK;
data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
- DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
- DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
- DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
- DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
- DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
} else {
data |= MM_ATC_L2_CGTT_CLK_CTRL__SOFT_OVERRIDE_MASK;
@@ -576,7 +580,7 @@ static int mmhub_v2_3_set_clockgating(struct amdgpu_device *adev,
return 0;
}
-static void mmhub_v2_3_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+static void mmhub_v2_3_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
int data, data1, data2, data3;
@@ -590,13 +594,13 @@ static void mmhub_v2_3_get_clockgating(struct amdgpu_device *adev, u32 *flags)
/* AMD_CG_SUPPORT_MC_MGCG */
if (!(data & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
- DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
- DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
- DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
- DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
- DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK))
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK))
&& !(data1 & MM_ATC_L2_CGTT_CLK_CTRL__SOFT_OVERRIDE_MASK)) {
- *flags |= AMD_CG_SUPPORT_MC_MGCG;
+ *flags |= AMD_CG_SUPPORT_MC_MGCG;
}
/* AMD_CG_SUPPORT_MC_LS */
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c
new file mode 100644
index 000000000000..7d5242df58a5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c
@@ -0,0 +1,664 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "mmhub_v3_0.h"
+
+#include "mmhub/mmhub_3_0_0_offset.h"
+#include "mmhub/mmhub_3_0_0_sh_mask.h"
+#include "navi10_enum.h"
+
+#include "soc15_common.h"
+
+#define regMMVM_L2_CNTL3_DEFAULT 0x80100007
+#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1
+#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0
+
+static const char *mmhub_client_ids_v3_0_0[][2] = {
+ [0][0] = "VMC",
+ [4][0] = "DCEDMC",
+ [5][0] = "DCEVGA",
+ [6][0] = "MP0",
+ [7][0] = "MP1",
+ [8][0] = "MPIO",
+ [16][0] = "HDP",
+ [17][0] = "LSDMA",
+ [18][0] = "JPEG",
+ [19][0] = "VCNU0",
+ [21][0] = "VSCH",
+ [22][0] = "VCNU1",
+ [23][0] = "VCN1",
+ [32+20][0] = "VCN0",
+ [2][1] = "DBGUNBIO",
+ [3][1] = "DCEDWB",
+ [4][1] = "DCEDMC",
+ [5][1] = "DCEVGA",
+ [6][1] = "MP0",
+ [7][1] = "MP1",
+ [8][1] = "MPIO",
+ [10][1] = "DBGU0",
+ [11][1] = "DBGU1",
+ [12][1] = "DBGU2",
+ [13][1] = "DBGU3",
+ [14][1] = "XDP",
+ [15][1] = "OSSSYS",
+ [16][1] = "HDP",
+ [17][1] = "LSDMA",
+ [18][1] = "JPEG",
+ [19][1] = "VCNU0",
+ [20][1] = "VCN0",
+ [21][1] = "VSCH",
+ [22][1] = "VCNU1",
+ [23][1] = "VCN1",
+};
+
+static uint32_t mmhub_v3_0_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+mmhub_v3_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ uint32_t cid, rw;
+ const char *mmhub_cid = NULL;
+
+ cid = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, CID);
+ rw = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, RW);
+
+ dev_err(adev->dev,
+ "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(3, 0, 0):
+ case IP_VERSION(3, 0, 1):
+ mmhub_cid = mmhub_client_ids_v3_0_0[cid][rw];
+ break;
+ default:
+ mmhub_cid = NULL;
+ break;
+ }
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ mmhub_cid ? mmhub_cid : "unknown", cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%x\n", rw);
+}
+
+static void mmhub_v3_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void mmhub_v3_0_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v3_0_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void mmhub_v3_0_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+ uint32_t tmp;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /*
+ * the new L1 policy will block SRIOV guest from writing
+ * these regs, and they will be programed at host.
+ * so skip programing these regs.
+ */
+ /* Program the AGP BAR */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+}
+
+static void mmhub_v3_0_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void mmhub_v3_0_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2, tmp);
+
+ tmp = regMMVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, tmp);
+
+ tmp = regMMVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL4, tmp);
+
+ tmp = regMMVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp);
+}
+
+static void mmhub_v3_0_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL, tmp);
+}
+
+static void mmhub_v3_0_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
+ 0);
+}
+
+static void mmhub_v3_0_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i * hub->ctx_distance);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void mmhub_v3_0_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned i;
+
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int mmhub_v3_0_gart_enable(struct amdgpu_device *adev)
+{
+ /* GART Enable. */
+ mmhub_v3_0_init_gart_aperture_regs(adev);
+ mmhub_v3_0_init_system_aperture_regs(adev);
+ mmhub_v3_0_init_tlb_regs(adev);
+ mmhub_v3_0_init_cache_regs(adev);
+
+ mmhub_v3_0_enable_system_domain(adev);
+ mmhub_v3_0_disable_identity_aperture(adev);
+ mmhub_v3_0_setup_vmid_config(adev);
+ mmhub_v3_0_program_invalidation(adev);
+
+ return 0;
+}
+
+static void mmhub_v3_0_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, 0);
+}
+
+/**
+ * mmhub_v3_0_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void mmhub_v3_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
+{
+ u32 tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs mmhub_v3_0_vmhub_funcs = {
+ .print_l2_protection_fault_status = mmhub_v3_0_print_l2_protection_fault_status,
+ .get_invalidate_req = mmhub_v3_0_get_invalidate_req,
+};
+
+static void mmhub_v3_0_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regMMVM_CONTEXT1_CNTL - regMMVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regMMVM_INVALIDATE_ENG1_REQ -
+ regMMVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vm_l2_bank_select_reserved_cid2 =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_BANK_SELECT_RESERVED_CID2);
+
+ hub->vm_contexts_disable =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXTS_DISABLE);
+
+ hub->vmhub_funcs = &mmhub_v3_0_vmhub_funcs;
+}
+
+static u64 mmhub_v3_0_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base;
+
+ base = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE);
+
+ base &= MMMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 mmhub_v3_0_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET) << 24;
+}
+
+static void mmhub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+#if 0
+ uint32_t def1, data1, def2 = 0, data2 = 0;
+#endif
+
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+#if 0
+ def1 = data1 = RREG32_SOC15(MMHUB, 0, regDAGB0_CNTL_MISC2);
+ def2 = data2 = RREG32_SOC15(MMHUB, 0, regDAGB1_CNTL_MISC2);
+#endif
+
+ if (enable) {
+ data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
+#if 0
+ data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+
+ data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+#endif
+ } else {
+ data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK;
+#if 0
+ data1 |= (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+
+ data2 |= (DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+#endif
+ }
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
+#if 0
+ if (def1 != data1)
+ WREG32_SOC15(MMHUB, 0, regDAGB0_CNTL_MISC2, data1);
+
+ if (def2 != data2)
+ WREG32_SOC15(MMHUB, 0, regDAGB1_CNTL_MISC2, data2);
+#endif
+}
+
+static void mmhub_v3_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ if (enable)
+ data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+ else
+ data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
+}
+
+static int mmhub_v3_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)
+ mmhub_v3_0_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)
+ mmhub_v3_0_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+
+ return 0;
+}
+
+static void mmhub_v3_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ /* AMD_CG_SUPPORT_MC_MGCG */
+ if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_MGCG;
+
+ /* AMD_CG_SUPPORT_MC_LS */
+ if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_LS;
+}
+
+const struct amdgpu_mmhub_funcs mmhub_v3_0_funcs = {
+ .init = mmhub_v3_0_init,
+ .get_fb_location = mmhub_v3_0_get_fb_location,
+ .get_mc_fb_offset = mmhub_v3_0_get_mc_fb_offset,
+ .gart_enable = mmhub_v3_0_gart_enable,
+ .set_fault_enable_default = mmhub_v3_0_set_fault_enable_default,
+ .gart_disable = mmhub_v3_0_gart_disable,
+ .set_clockgating = mmhub_v3_0_set_clockgating,
+ .get_clockgating = mmhub_v3_0_get_clockgating,
+ .setup_vm_pt_regs = mmhub_v3_0_setup_vm_pt_regs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.h
new file mode 100644
index 000000000000..3ced20f350bb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __MMHUB_V3_0_H__
+#define __MMHUB_V3_0_H__
+
+extern const struct amdgpu_mmhub_funcs mmhub_v3_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
new file mode 100644
index 000000000000..910337dc28d1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
@@ -0,0 +1,597 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "mmhub_v3_0_1.h"
+
+#include "mmhub/mmhub_3_0_1_offset.h"
+#include "mmhub/mmhub_3_0_1_sh_mask.h"
+#include "navi10_enum.h"
+
+#include "soc15_common.h"
+
+#define regMMVM_L2_CNTL3_DEFAULT 0x80100007
+#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1
+#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0
+
+static const char *mmhub_client_ids_v3_0_1[][2] = {
+ [0][0] = "VMC",
+ [1][0] = "ISPXT",
+ [2][0] = "ISPIXT",
+ [4][0] = "DCEDMC",
+ [5][0] = "DCEVGA",
+ [6][0] = "MP0",
+ [7][0] = "MP1",
+ [8][0] = "MPM",
+ [12][0] = "ISPTNR",
+ [14][0] = "ISPCRD0",
+ [15][0] = "ISPCRD1",
+ [16][0] = "ISPCRD2",
+ [22][0] = "HDP",
+ [23][0] = "LSDMA",
+ [24][0] = "JPEG",
+ [27][0] = "VSCH",
+ [28][0] = "VCNU",
+ [29][0] = "VCN",
+ [1][1] = "ISPXT",
+ [2][1] = "ISPIXT",
+ [3][1] = "DCEDWB",
+ [4][1] = "DCEDMC",
+ [5][1] = "DCEVGA",
+ [6][1] = "MP0",
+ [7][1] = "MP1",
+ [8][1] = "MPM",
+ [10][1] = "ISPMWR0",
+ [11][1] = "ISPMWR1",
+ [12][1] = "ISPTNR",
+ [13][1] = "ISPSWR",
+ [14][1] = "ISPCWR0",
+ [15][1] = "ISPCWR1",
+ [16][1] = "ISPCWR2",
+ [17][1] = "ISPCWR3",
+ [18][1] = "XDP",
+ [21][1] = "OSSSYS",
+ [22][1] = "HDP",
+ [23][1] = "LSDMA",
+ [24][1] = "JPEG",
+ [27][1] = "VSCH",
+ [28][1] = "VCNU",
+ [29][1] = "VCN",
+};
+
+static uint32_t mmhub_v3_0_1_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+mmhub_v3_0_1_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ uint32_t cid, rw;
+ const char *mmhub_cid = NULL;
+
+ cid = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, CID);
+ rw = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, RW);
+
+ dev_err(adev->dev,
+ "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(3, 0, 1):
+ mmhub_cid = mmhub_client_ids_v3_0_1[cid][rw];
+ break;
+ default:
+ mmhub_cid = NULL;
+ break;
+ }
+
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ mmhub_cid ? mmhub_cid : "unknown", cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%x\n", rw);
+}
+
+static void mmhub_v3_0_1_setup_vm_pt_regs(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void mmhub_v3_0_1_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v3_0_1_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void mmhub_v3_0_1_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+ uint32_t tmp;
+
+ /* Program the AGP BAR */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ /*
+ * the new L1 policy will block SRIOV guest from writing
+ * these regs, and they will be programed at host.
+ * so skip programing these regs.
+ */
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+}
+
+static void mmhub_v3_0_1_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void mmhub_v3_0_1_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2, tmp);
+
+ tmp = regMMVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, tmp);
+
+ tmp = regMMVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL4, tmp);
+
+ tmp = regMMVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp);
+}
+
+static void mmhub_v3_0_1_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL, tmp);
+}
+
+static void mmhub_v3_0_1_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
+ 0);
+}
+
+static void mmhub_v3_0_1_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i * hub->ctx_distance);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void mmhub_v3_0_1_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned i;
+
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int mmhub_v3_0_1_gart_enable(struct amdgpu_device *adev)
+{
+ /* GART Enable. */
+ mmhub_v3_0_1_init_gart_aperture_regs(adev);
+ mmhub_v3_0_1_init_system_aperture_regs(adev);
+ mmhub_v3_0_1_init_tlb_regs(adev);
+ mmhub_v3_0_1_init_cache_regs(adev);
+
+ mmhub_v3_0_1_enable_system_domain(adev);
+ mmhub_v3_0_1_disable_identity_aperture(adev);
+ mmhub_v3_0_1_setup_vmid_config(adev);
+ mmhub_v3_0_1_program_invalidation(adev);
+
+ return 0;
+}
+
+static void mmhub_v3_0_1_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, 0);
+}
+
+/**
+ * mmhub_v3_0_1_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void mmhub_v3_0_1_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs mmhub_v3_0_1_vmhub_funcs = {
+ .print_l2_protection_fault_status = mmhub_v3_0_1_print_l2_protection_fault_status,
+ .get_invalidate_req = mmhub_v3_0_1_get_invalidate_req,
+};
+
+static void mmhub_v3_0_1_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regMMVM_CONTEXT1_CNTL - regMMVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regMMVM_INVALIDATE_ENG1_REQ -
+ regMMVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vmhub_funcs = &mmhub_v3_0_1_vmhub_funcs;
+}
+
+static u64 mmhub_v3_0_1_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base;
+
+ base = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE);
+ base &= MMMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 mmhub_v3_0_1_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET) << 24;
+}
+
+static void mmhub_v3_0_1_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ if (enable)
+ data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
+ else
+ data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
+}
+
+static void mmhub_v3_0_1_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ if (enable)
+ data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+ else
+ data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
+}
+
+static int mmhub_v3_0_1_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ mmhub_v3_0_1_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ mmhub_v3_0_1_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ return 0;
+}
+
+static void mmhub_v3_0_1_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ /* AMD_CG_SUPPORT_MC_MGCG */
+ if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_MGCG;
+
+ /* AMD_CG_SUPPORT_MC_LS */
+ if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_LS;
+}
+
+const struct amdgpu_mmhub_funcs mmhub_v3_0_1_funcs = {
+ .init = mmhub_v3_0_1_init,
+ .get_fb_location = mmhub_v3_0_1_get_fb_location,
+ .get_mc_fb_offset = mmhub_v3_0_1_get_mc_fb_offset,
+ .gart_enable = mmhub_v3_0_1_gart_enable,
+ .set_fault_enable_default = mmhub_v3_0_1_set_fault_enable_default,
+ .gart_disable = mmhub_v3_0_1_gart_disable,
+ .set_clockgating = mmhub_v3_0_1_set_clockgating,
+ .get_clockgating = mmhub_v3_0_1_get_clockgating,
+ .setup_vm_pt_regs = mmhub_v3_0_1_setup_vm_pt_regs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.h
new file mode 100644
index 000000000000..4c1246735e7d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __MMHUB_V3_0_1_H__
+#define __MMHUB_V3_0_1_H__
+
+extern const struct amdgpu_mmhub_funcs mmhub_v3_0_1_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c
new file mode 100644
index 000000000000..f0f182f033b9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c
@@ -0,0 +1,570 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "mmhub_v3_0_2.h"
+
+#include "mmhub/mmhub_3_0_2_offset.h"
+#include "mmhub/mmhub_3_0_2_sh_mask.h"
+#include "navi10_enum.h"
+
+#include "soc15_common.h"
+
+#define regMMVM_L2_CNTL3_DEFAULT 0x80100007
+#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1
+#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0
+
+static const char *mmhub_client_ids_v3_0_2[][2] = {
+ [0][0] = "VMC",
+ [4][0] = "DCEDMC",
+ [5][0] = "DCEVGA",
+ [6][0] = "MP0",
+ [7][0] = "MP1",
+ [8][0] = "MPIO",
+ [16][0] = "HDP",
+ [17][0] = "LSDMA",
+ [18][0] = "JPEG",
+ [19][0] = "VCNU0",
+ [21][0] = "VSCH",
+ [22][0] = "VCNU1",
+ [23][0] = "VCN1",
+ [32+20][0] = "VCN0",
+ [2][1] = "DBGUNBIO",
+ [3][1] = "DCEDWB",
+ [4][1] = "DCEDMC",
+ [5][1] = "DCEVGA",
+ [6][1] = "MP0",
+ [7][1] = "MP1",
+ [8][1] = "MPIO",
+ [10][1] = "DBGU0",
+ [11][1] = "DBGU1",
+ [12][1] = "DBGU2",
+ [13][1] = "DBGU3",
+ [14][1] = "XDP",
+ [15][1] = "OSSSYS",
+ [16][1] = "HDP",
+ [17][1] = "LSDMA",
+ [18][1] = "JPEG",
+ [19][1] = "VCNU0",
+ [20][1] = "VCN0",
+ [21][1] = "VSCH",
+ [22][1] = "VCNU1",
+ [23][1] = "VCN1",
+};
+
+static uint32_t mmhub_v3_0_2_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+mmhub_v3_0_2_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ uint32_t cid, rw;
+ const char *mmhub_cid = NULL;
+
+ cid = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, CID);
+ rw = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, RW);
+
+ dev_err(adev->dev,
+ "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+
+ mmhub_cid = mmhub_client_ids_v3_0_2[cid][rw];
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ mmhub_cid ? mmhub_cid : "unknown", cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%x\n", rw);
+}
+
+static void mmhub_v3_0_2_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void mmhub_v3_0_2_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v3_0_2_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void mmhub_v3_0_2_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+ uint32_t tmp;
+
+ /* Program the AGP BAR */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /*
+ * the new L1 policy will block SRIOV guest from writing
+ * these regs, and they will be programed at host.
+ * so skip programing these regs.
+ */
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+ }
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+}
+
+static void mmhub_v3_0_2_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void mmhub_v3_0_2_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2, tmp);
+
+ tmp = regMMVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, tmp);
+
+ tmp = regMMVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL4, tmp);
+
+ tmp = regMMVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp);
+}
+
+static void mmhub_v3_0_2_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL, tmp);
+}
+
+static void mmhub_v3_0_2_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
+ 0);
+}
+
+static void mmhub_v3_0_2_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i * hub->ctx_distance);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void mmhub_v3_0_2_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned i;
+
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int mmhub_v3_0_2_gart_enable(struct amdgpu_device *adev)
+{
+ /* GART Enable. */
+ mmhub_v3_0_2_init_gart_aperture_regs(adev);
+ mmhub_v3_0_2_init_system_aperture_regs(adev);
+ mmhub_v3_0_2_init_tlb_regs(adev);
+ mmhub_v3_0_2_init_cache_regs(adev);
+
+ mmhub_v3_0_2_enable_system_domain(adev);
+ mmhub_v3_0_2_disable_identity_aperture(adev);
+ mmhub_v3_0_2_setup_vmid_config(adev);
+ mmhub_v3_0_2_program_invalidation(adev);
+
+ return 0;
+}
+
+static void mmhub_v3_0_2_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, 0);
+}
+
+/**
+ * mmhub_v3_0_2_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void mmhub_v3_0_2_set_fault_enable_default(struct amdgpu_device *adev, bool value)
+{
+ u32 tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs mmhub_v3_0_2_vmhub_funcs = {
+ .print_l2_protection_fault_status = mmhub_v3_0_2_print_l2_protection_fault_status,
+ .get_invalidate_req = mmhub_v3_0_2_get_invalidate_req,
+};
+
+static void mmhub_v3_0_2_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regMMVM_CONTEXT1_CNTL - regMMVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regMMVM_INVALIDATE_ENG1_REQ -
+ regMMVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vm_l2_bank_select_reserved_cid2 =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_BANK_SELECT_RESERVED_CID2);
+
+ hub->vmhub_funcs = &mmhub_v3_0_2_vmhub_funcs;
+}
+
+static u64 mmhub_v3_0_2_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base;
+
+ base = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE);
+ base &= MMMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 mmhub_v3_0_2_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET) << 24;
+}
+
+static void mmhub_v3_0_2_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ //TODO
+}
+
+static void mmhub_v3_0_2_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ //TODO
+}
+
+static int mmhub_v3_0_2_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ mmhub_v3_0_2_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ mmhub_v3_0_2_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ return 0;
+}
+
+static void mmhub_v3_0_2_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ //TODO
+}
+
+const struct amdgpu_mmhub_funcs mmhub_v3_0_2_funcs = {
+ .init = mmhub_v3_0_2_init,
+ .get_fb_location = mmhub_v3_0_2_get_fb_location,
+ .get_mc_fb_offset = mmhub_v3_0_2_get_mc_fb_offset,
+ .gart_enable = mmhub_v3_0_2_gart_enable,
+ .set_fault_enable_default = mmhub_v3_0_2_set_fault_enable_default,
+ .gart_disable = mmhub_v3_0_2_gart_disable,
+ .set_clockgating = mmhub_v3_0_2_set_clockgating,
+ .get_clockgating = mmhub_v3_0_2_get_clockgating,
+ .setup_vm_pt_regs = mmhub_v3_0_2_setup_vm_pt_regs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.h
new file mode 100644
index 000000000000..23ad7b156cdb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __MMHUB_V3_0_2_H__
+#define __MMHUB_V3_0_2_H__
+
+extern const struct amdgpu_mmhub_funcs mmhub_v3_0_2_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c
new file mode 100644
index 000000000000..f6fc9778bc30
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c
@@ -0,0 +1,746 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "mmhub_v3_3.h"
+
+#include "mmhub/mmhub_3_3_0_offset.h"
+#include "mmhub/mmhub_3_3_0_sh_mask.h"
+
+#include "navi10_enum.h"
+#include "soc15_common.h"
+
+#define regMMVM_L2_CNTL3_DEFAULT 0x80100007
+#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1
+#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0
+#define regDAGB0_L1TLB_REG_RW_3_3 0x00a4
+#define regDAGB0_L1TLB_REG_RW_3_3_BASE_IDX 1
+#define regDAGB1_L1TLB_REG_RW_3_3 0x0163
+#define regDAGB1_L1TLB_REG_RW_3_3_BASE_IDX 1
+
+static const char *mmhub_client_ids_v3_3[][2] = {
+ [0][0] = "VMC",
+ [1][0] = "ISPXT",
+ [2][0] = "ISPIXT",
+ [4][0] = "DCEDMC",
+ [6][0] = "MP0",
+ [7][0] = "MP1",
+ [8][0] = "MPM",
+ [9][0] = "ISPPDPRD",
+ [10][0] = "ISPCSTATRD",
+ [11][0] = "ISPBYRPRD",
+ [12][0] = "ISPRGBPRD",
+ [13][0] = "ISPMCFPRD",
+ [14][0] = "ISPMCFPRD1",
+ [15][0] = "ISPYUVPRD",
+ [16][0] = "ISPMCSCRD",
+ [17][0] = "ISPGDCRD",
+ [18][0] = "ISPLMERD",
+ [22][0] = "ISPXT1",
+ [23][0] = "ISPIXT1",
+ [24][0] = "HDP",
+ [25][0] = "LSDMA",
+ [26][0] = "JPEG",
+ [27][0] = "VPE",
+ [28][0] = "VSCH",
+ [29][0] = "VCNU",
+ [30][0] = "VCN",
+ [1][1] = "ISPXT",
+ [2][1] = "ISPIXT",
+ [3][1] = "DCEDWB",
+ [4][1] = "DCEDMC",
+ [5][1] = "ISPCSISWR",
+ [6][1] = "MP0",
+ [7][1] = "MP1",
+ [8][1] = "MPM",
+ [9][1] = "ISPPDPWR",
+ [10][1] = "ISPCSTATWR",
+ [11][1] = "ISPBYRPWR",
+ [12][1] = "ISPRGBPWR",
+ [13][1] = "ISPMCFPWR",
+ [14][1] = "ISPMWR0",
+ [15][1] = "ISPYUVPWR",
+ [16][1] = "ISPMCSCWR",
+ [17][1] = "ISPGDCWR",
+ [18][1] = "ISPLMEWR",
+ [20][1] = "ISPMWR2",
+ [21][1] = "OSSSYS",
+ [22][1] = "ISPXT1",
+ [23][1] = "ISPIXT1",
+ [24][1] = "HDP",
+ [25][1] = "LSDMA",
+ [26][1] = "JPEG",
+ [27][1] = "VPE",
+ [28][1] = "VSCH",
+ [29][1] = "VCNU",
+ [30][1] = "VCN",
+};
+
+static const char *mmhub_client_ids_v3_3_1[][2] = {
+ [0][0] = "VMC",
+ [4][0] = "DCEDMC",
+ [6][0] = "MP0",
+ [7][0] = "MP1",
+ [8][0] = "MPM",
+ [24][0] = "HDP",
+ [25][0] = "LSDMA",
+ [26][0] = "JPEG0",
+ [27][0] = "VPE0",
+ [28][0] = "VSCH",
+ [29][0] = "VCNU0",
+ [30][0] = "VCN0",
+ [32+1][0] = "ISPXT",
+ [32+2][0] = "ISPIXT",
+ [32+9][0] = "ISPPDPRD",
+ [32+10][0] = "ISPCSTATRD",
+ [32+11][0] = "ISPBYRPRD",
+ [32+12][0] = "ISPRGBPRD",
+ [32+13][0] = "ISPMCFPRD",
+ [32+14][0] = "ISPMCFPRD1",
+ [32+15][0] = "ISPYUVPRD",
+ [32+16][0] = "ISPMCSCRD",
+ [32+17][0] = "ISPGDCRD",
+ [32+18][0] = "ISPLMERD",
+ [32+22][0] = "ISPXT1",
+ [32+23][0] = "ISPIXT1",
+ [32+26][0] = "JPEG1",
+ [32+27][0] = "VPE1",
+ [32+29][0] = "VCNU1",
+ [32+30][0] = "VCN1",
+ [3][1] = "DCEDWB",
+ [4][1] = "DCEDMC",
+ [6][1] = "MP0",
+ [7][1] = "MP1",
+ [8][1] = "MPM",
+ [21][1] = "OSSSYS",
+ [24][1] = "HDP",
+ [25][1] = "LSDMA",
+ [26][1] = "JPEG0",
+ [27][1] = "VPE0",
+ [28][1] = "VSCH",
+ [29][1] = "VCNU0",
+ [30][1] = "VCN0",
+ [32+1][1] = "ISPXT",
+ [32+2][1] = "ISPIXT",
+ [32+5][1] = "ISPCSISWR",
+ [32+9][1] = "ISPPDPWR",
+ [32+10][1] = "ISPCSTATWR",
+ [32+11][1] = "ISPBYRPWR",
+ [32+12][1] = "ISPRGBPWR",
+ [32+13][1] = "ISPMCFPWR",
+ [32+14][1] = "ISPMWR0",
+ [32+15][1] = "ISPYUVPWR",
+ [32+16][1] = "ISPMCSCWR",
+ [32+17][1] = "ISPGDCWR",
+ [32+18][1] = "ISPLMEWR",
+ [32+19][1] = "ISPMWR1",
+ [32+20][1] = "ISPMWR2",
+ [32+22][1] = "ISPXT1",
+ [32+23][1] = "ISPIXT1",
+ [32+26][1] = "JPEG1",
+ [32+27][1] = "VPE1",
+ [32+29][1] = "VCNU1",
+ [32+30][1] = "VCN1",
+};
+
+static uint32_t mmhub_v3_3_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type ? : 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+mmhub_v3_3_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ uint32_t cid, rw;
+ const char *mmhub_cid = NULL;
+
+ cid = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, CID);
+ rw = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, RW);
+
+ dev_err(adev->dev,
+ "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(3, 3, 0):
+ case IP_VERSION(3, 3, 2):
+ mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_3) ?
+ mmhub_client_ids_v3_3[cid][rw] :
+ cid == 0x140 ? "UMSCH" : NULL;
+ break;
+ case IP_VERSION(3, 3, 1):
+ mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_3_1) ?
+ mmhub_client_ids_v3_3_1[cid][rw] :
+ cid == 0x140 ? "UMSCH" : NULL;
+ break;
+ default:
+ mmhub_cid = NULL;
+ break;
+ }
+
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ mmhub_cid ? mmhub_cid : "unknown", cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%x\n", rw);
+}
+
+static void mmhub_v3_3_setup_vm_pt_regs(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+
+}
+
+static void mmhub_v3_3_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v3_3_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void mmhub_v3_3_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+ uint32_t tmp;
+
+ /* Program the AGP BAR */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ /*
+ * the new L1 policy will block SRIOV guest from writing
+ * these regs, and they will be programed at host.
+ * so skip programing these regs.
+ */
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+}
+
+static void mmhub_v3_3_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void mmhub_v3_3_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2, tmp);
+
+ tmp = regMMVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, tmp);
+
+ tmp = regMMVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL4, tmp);
+
+ tmp = regMMVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp);
+}
+
+static void mmhub_v3_3_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL, tmp);
+}
+
+static void mmhub_v3_3_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
+ 0);
+}
+
+static void mmhub_v3_3_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i * hub->ctx_distance);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void mmhub_v3_3_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned int i;
+
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static void mmhub_v3_3_init_saw_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+ uint32_t tmp;
+
+ /* Program page table base, gart start, gart end */
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ lower_32_bits(pt_base >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ upper_32_bits(pt_base >> 12));
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_SAW_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_SAW_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_CNTL, tmp);
+
+ /* Disable all contexts except context 0 */
+ tmp = 0xfffe;
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXTS_DISABLE, tmp);
+
+ /* Program saw cntl4 */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CNTL4);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_SAW_CNTL4, VMC_TAP_CONTEXT0_PDE_REQUEST_SNOOP, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_SAW_CNTL4, VMC_TAP_CONTEXT0_PTE_REQUEST_SNOOP, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CNTL4, tmp);
+}
+
+static void mmhub_v3_3_enable_tls(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(MMHUB, 0, regDAGB0_L1TLB_REG_RW_3_3, 0);
+ WREG32_SOC15(MMHUB, 0, regDAGB1_L1TLB_REG_RW_3_3, 3);
+}
+
+static int mmhub_v3_3_gart_enable(struct amdgpu_device *adev)
+{
+ /* GART Enable. */
+ mmhub_v3_3_init_gart_aperture_regs(adev);
+ mmhub_v3_3_init_system_aperture_regs(adev);
+ mmhub_v3_3_init_tlb_regs(adev);
+ mmhub_v3_3_init_cache_regs(adev);
+
+ mmhub_v3_3_enable_system_domain(adev);
+ mmhub_v3_3_disable_identity_aperture(adev);
+ mmhub_v3_3_setup_vmid_config(adev);
+ mmhub_v3_3_program_invalidation(adev);
+
+ /* standalone alone walker init */
+ mmhub_v3_3_init_saw_regs(adev);
+
+ /* enable mmhub tls */
+ mmhub_v3_3_enable_tls(adev);
+
+ return 0;
+}
+
+static void mmhub_v3_3_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, 0);
+}
+
+/**
+ * mmhub_v3_3_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void mmhub_v3_3_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs mmhub_v3_3_vmhub_funcs = {
+ .print_l2_protection_fault_status = mmhub_v3_3_print_l2_protection_fault_status,
+ .get_invalidate_req = mmhub_v3_3_get_invalidate_req,
+};
+
+static void mmhub_v3_3_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regMMVM_CONTEXT1_CNTL - regMMVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regMMVM_INVALIDATE_ENG1_REQ -
+ regMMVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vmhub_funcs = &mmhub_v3_3_vmhub_funcs;
+}
+
+static u64 mmhub_v3_3_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base;
+
+ base = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE);
+ base &= MMMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 mmhub_v3_3_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ u64 offset;
+
+ offset = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET);
+ offset &= MMMC_VM_FB_OFFSET__FB_OFFSET_MASK;
+ offset <<= 24;
+
+ return offset;
+}
+
+static void mmhub_v3_3_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ if (enable)
+ data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
+ else
+ data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
+}
+
+static void mmhub_v3_3_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ if (enable)
+ data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+ else
+ data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
+}
+
+static int mmhub_v3_3_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ mmhub_v3_3_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ mmhub_v3_3_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ return 0;
+}
+
+static void mmhub_v3_3_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ u32 data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ /* AMD_CG_SUPPORT_MC_MGCG */
+ if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_MGCG;
+
+ /* AMD_CG_SUPPORT_MC_LS */
+ if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_LS;
+}
+
+const struct amdgpu_mmhub_funcs mmhub_v3_3_funcs = {
+ .init = mmhub_v3_3_init,
+ .get_fb_location = mmhub_v3_3_get_fb_location,
+ .get_mc_fb_offset = mmhub_v3_3_get_mc_fb_offset,
+ .gart_enable = mmhub_v3_3_gart_enable,
+ .set_fault_enable_default = mmhub_v3_3_set_fault_enable_default,
+ .gart_disable = mmhub_v3_3_gart_disable,
+ .set_clockgating = mmhub_v3_3_set_clockgating,
+ .get_clockgating = mmhub_v3_3_get_clockgating,
+ .setup_vm_pt_regs = mmhub_v3_3_setup_vm_pt_regs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.h
new file mode 100644
index 000000000000..37b62c7e5a4a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MMHUB_V3_3_H__
+#define __MMHUB_V3_3_H__
+
+extern const struct amdgpu_mmhub_funcs mmhub_v3_3_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c
new file mode 100644
index 000000000000..951998454b25
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c
@@ -0,0 +1,647 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "mmhub_v4_1_0.h"
+
+#include "mmhub/mmhub_4_1_0_offset.h"
+#include "mmhub/mmhub_4_1_0_sh_mask.h"
+
+#include "soc15_common.h"
+#include "soc24_enum.h"
+
+#define regMMVM_L2_CNTL3_DEFAULT 0x80100007
+#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1
+#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0
+
+static const char *mmhub_client_ids_v4_1_0[][2] = {
+ [0][0] = "VMC",
+ [4][0] = "DCEDMC",
+ [6][0] = "MP0",
+ [7][0] = "MP1",
+ [8][0] = "MPIO",
+ [16][0] = "LSDMA",
+ [17][0] = "JPEG",
+ [19][0] = "VCNU",
+ [22][0] = "VSCH",
+ [23][0] = "HDP",
+ [32+23][0] = "VCNRD",
+ [3][1] = "DCEDWB",
+ [4][1] = "DCEDMC",
+ [6][1] = "MP0",
+ [7][1] = "MP1",
+ [8][1] = "MPIO",
+ [10][1] = "DBGU0",
+ [11][1] = "DBGU1",
+ [12][1] = "DBGUNBIO",
+ [14][1] = "XDP",
+ [15][1] = "OSSSYS",
+ [16][1] = "LSDMA",
+ [17][1] = "JPEG",
+ [18][1] = "VCNWR",
+ [19][1] = "VCNU",
+ [22][1] = "VSCH",
+ [23][1] = "HDP",
+};
+
+static uint32_t mmhub_v4_1_0_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ /* Only use legacy inv on mmhub side */
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+mmhub_v4_1_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ uint32_t cid, rw;
+ const char *mmhub_cid = NULL;
+
+ cid = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS_LO32, CID);
+ rw = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS_LO32, RW);
+
+ dev_err(adev->dev,
+ "MMVM_L2_PROTECTION_FAULT_STATUS_LO32:0x%08X\n",
+ status);
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(4, 1, 0):
+ mmhub_cid = mmhub_client_ids_v4_1_0[cid][rw];
+ break;
+ default:
+ mmhub_cid = NULL;
+ break;
+ }
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ mmhub_cid ? mmhub_cid : "unknown", cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS_LO32, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS_LO32, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS_LO32, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS_LO32, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%x\n", rw);
+}
+
+static void mmhub_v4_1_0_setup_vm_pt_regs(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void mmhub_v4_1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v4_1_0_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void mmhub_v4_1_0_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+ uint32_t tmp;
+
+ /*
+ * the new L1 policy will block SRIOV guest from writing
+ * these regs, and they will be programed at host.
+ * so skip programing these regs.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Program the AGP BAR */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = adev->mem_scratch.gpu_addr - adev->gmc.vram_start +
+ adev->vm_manager.vram_base_offset;
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+}
+
+static void mmhub_v4_1_0_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void mmhub_v4_1_0_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2, tmp);
+
+ tmp = regMMVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, tmp);
+
+ tmp = regMMVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL4, tmp);
+
+ tmp = regMMVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp);
+}
+
+static void mmhub_v4_1_0_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL, tmp);
+}
+
+static void mmhub_v4_1_0_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
+ 0);
+}
+
+static void mmhub_v4_1_0_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void mmhub_v4_1_0_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned i;
+
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int mmhub_v4_1_0_gart_enable(struct amdgpu_device *adev)
+{
+ /* GART Enable. */
+ mmhub_v4_1_0_init_gart_aperture_regs(adev);
+ mmhub_v4_1_0_init_system_aperture_regs(adev);
+ mmhub_v4_1_0_init_tlb_regs(adev);
+ mmhub_v4_1_0_init_cache_regs(adev);
+
+ mmhub_v4_1_0_enable_system_domain(adev);
+ mmhub_v4_1_0_disable_identity_aperture(adev);
+ mmhub_v4_1_0_setup_vmid_config(adev);
+ mmhub_v4_1_0_program_invalidation(adev);
+
+ return 0;
+}
+
+static void mmhub_v4_1_0_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, 0);
+}
+
+/**
+ * mmhub_v4_1_0_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void
+mmhub_v4_1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
+{
+ u32 tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs mmhub_v4_1_0_vmhub_funcs = {
+ .print_l2_protection_fault_status = mmhub_v4_1_0_print_l2_protection_fault_status,
+ .get_invalidate_req = mmhub_v4_1_0_get_invalidate_req,
+};
+
+static void mmhub_v4_1_0_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_STATUS_LO32);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regMMVM_CONTEXT1_CNTL - regMMVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regMMVM_INVALIDATE_ENG1_REQ -
+ regMMVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vm_l2_bank_select_reserved_cid2 =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_BANK_SELECT_RESERVED_CID2);
+
+ hub->vm_contexts_disable =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXTS_DISABLE);
+
+ hub->vmhub_funcs = &mmhub_v4_1_0_vmhub_funcs;
+}
+
+static u64 mmhub_v4_1_0_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base;
+
+ base = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE);
+
+ base &= MMMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 mmhub_v4_1_0_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET) << 24;
+}
+
+static void
+mmhub_v4_1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+#if 0
+ uint32_t def, data;
+#endif
+ uint32_t def1, data1, def2 = 0, data2 = 0;
+#if 0
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+#endif
+ def1 = data1 = RREG32_SOC15(MMHUB, 0, regDAGB0_CNTL_MISC2);
+ def2 = data2 = RREG32_SOC15(MMHUB, 0, regDAGB1_CNTL_MISC2);
+
+ if (enable) {
+#if 0
+ data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
+#endif
+ data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK);
+
+ data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK);
+ } else {
+#if 0
+ data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK;
+#endif
+ data1 |= (DAGB0_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK);
+
+ data2 |= (DAGB1_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK);
+ }
+
+#if 0
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
+#endif
+ if (def1 != data1)
+ WREG32_SOC15(MMHUB, 0, regDAGB0_CNTL_MISC2, data1);
+
+ if (def2 != data2)
+ WREG32_SOC15(MMHUB, 0, regDAGB1_CNTL_MISC2, data2);
+}
+
+static void
+mmhub_v4_1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+#if 0
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ if (enable)
+ data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+ else
+ data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
+#endif
+}
+
+static int mmhub_v4_1_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)
+ mmhub_v4_1_0_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)
+ mmhub_v4_1_0_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+
+ return 0;
+}
+
+static void mmhub_v4_1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+#if 0
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ /* AMD_CG_SUPPORT_MC_MGCG */
+ if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_MGCG;
+
+ /* AMD_CG_SUPPORT_MC_LS */
+ if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_LS;
+#endif
+}
+
+const struct amdgpu_mmhub_funcs mmhub_v4_1_0_funcs = {
+ .init = mmhub_v4_1_0_init,
+ .get_fb_location = mmhub_v4_1_0_get_fb_location,
+ .get_mc_fb_offset = mmhub_v4_1_0_get_mc_fb_offset,
+ .gart_enable = mmhub_v4_1_0_gart_enable,
+ .set_fault_enable_default = mmhub_v4_1_0_set_fault_enable_default,
+ .gart_disable = mmhub_v4_1_0_gart_disable,
+ .set_clockgating = mmhub_v4_1_0_set_clockgating,
+ .get_clockgating = mmhub_v4_1_0_get_clockgating,
+ .setup_vm_pt_regs = mmhub_v4_1_0_setup_vm_pt_regs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.h
new file mode 100644
index 000000000000..3902d653353c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __MMHUB_V4_1_0_H__
+#define __MMHUB_V4_1_0_H__
+
+extern const struct amdgpu_mmhub_funcs mmhub_v4_1_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
index ff49eeaf7882..fe0710b55c3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
@@ -57,7 +57,7 @@ static u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev)
static void mmhub_v9_4_setup_hubid_vm_pt_regs(struct amdgpu_device *adev, int hubid,
uint32_t vmid, uint64_t value)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
WREG32_SOC15_OFFSET(MMHUB, 0,
mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
@@ -108,7 +108,7 @@ static void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmi
}
static void mmhub_v9_4_init_system_aperture_regs(struct amdgpu_device *adev,
- int hubid)
+ int hubid)
{
uint64_t value;
uint32_t tmp;
@@ -136,7 +136,7 @@ static void mmhub_v9_4_init_system_aperture_regs(struct amdgpu_device *adev,
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
/* Set default page address. */
- value = amdgpu_gmc_vram_mc2pa(adev, adev->vram_scratch.gpu_addr);
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
WREG32_SOC15_OFFSET(
MMHUB, 0,
mmVMSHAREDPF0_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
@@ -198,6 +198,36 @@ static void mmhub_v9_4_init_tlb_regs(struct amdgpu_device *adev, int hubid)
hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
}
+/* Set snoop bit for SDMA so that SDMA writes probe-invalidates RW lines */
+static void mmhub_v9_4_init_snoop_override_regs(struct amdgpu_device *adev, int hubid)
+{
+ uint32_t tmp;
+ int i;
+ uint32_t distance = mmDAGB1_WRCLI_GPU_SNOOP_OVERRIDE -
+ mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE;
+ uint32_t huboffset = hubid * MMHUB_INSTANCE_REGISTER_OFFSET;
+
+ for (i = 0; i < 5 - (2 * hubid); i++) {
+ /* DAGB instances 0 to 4 are in hub0 and 5 to 7 are in hub1 */
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
+ mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE,
+ huboffset + i * distance);
+ tmp |= (1 << 15); /* SDMA client is BIT15 */
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE,
+ huboffset + i * distance, tmp);
+
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
+ mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE,
+ huboffset + i * distance);
+ tmp |= (1 << 15);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE,
+ huboffset + i * distance, tmp);
+ }
+
+}
+
static void mmhub_v9_4_init_cache_regs(struct amdgpu_device *adev, int hubid)
{
uint32_t tmp;
@@ -294,18 +324,26 @@ static void mmhub_v9_4_disable_identity_aperture(struct amdgpu_device *adev,
static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned int num_level, block_size;
uint32_t tmp;
int i;
+ num_level = adev->vm_manager.num_level;
+ block_size = adev->vm_manager.block_size;
+ if (adev->gmc.translate_further)
+ num_level -= 1;
+ else
+ block_size -= 9;
+
for (i = 0; i <= 14; i++) {
tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL,
- hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i);
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
PAGE_TABLE_DEPTH,
- adev->vm_manager.num_level);
+ num_level);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
@@ -323,7 +361,7 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid)
EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
PAGE_TABLE_BLOCK_SIZE,
- adev->vm_manager.block_size - 9);
+ block_size);
/* Send no-retry XNACK on fault to suppress VM fault storm. */
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
@@ -355,7 +393,7 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid)
static void mmhub_v9_4_program_invalidation(struct amdgpu_device *adev,
int hubid)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
unsigned i;
for (i = 0; i < 18; ++i) {
@@ -384,6 +422,7 @@ static int mmhub_v9_4_gart_enable(struct amdgpu_device *adev)
if (!amdgpu_sriov_vf(adev))
mmhub_v9_4_init_cache_regs(adev, i);
+ mmhub_v9_4_init_snoop_override_regs(adev, i);
mmhub_v9_4_enable_system_domain(adev, i);
if (!amdgpu_sriov_vf(adev))
mmhub_v9_4_disable_identity_aperture(adev, i);
@@ -396,7 +435,7 @@ static int mmhub_v9_4_gart_enable(struct amdgpu_device *adev)
static void mmhub_v9_4_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
u32 tmp;
u32 i, j;
@@ -499,8 +538,8 @@ static void mmhub_v9_4_set_fault_enable_default(struct amdgpu_device *adev, bool
static void mmhub_v9_4_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub[MMHUB_NUM_INSTANCES] =
- {&adev->vmhub[AMDGPU_MMHUB_0], &adev->vmhub[AMDGPU_MMHUB_1]};
+ struct amdgpu_vmhub *hub[MMHUB_NUM_INSTANCES] = {
+ &adev->vmhub[AMDGPU_MMHUB0(0)], &adev->vmhub[AMDGPU_MMHUB1(0)]};
int i;
for (i = 0; i < MMHUB_NUM_INSTANCES; i++) {
@@ -647,9 +686,9 @@ static int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev,
return 0;
}
-static void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+static void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
- int data, data1;
+ u32 data, data1;
if (amdgpu_sriov_vf(adev))
*flags = 0;
@@ -1560,7 +1599,7 @@ static int mmhub_v9_4_get_ras_error_count(struct amdgpu_device *adev,
uint32_t sec_cnt, ded_cnt;
for (i = 0; i < ARRAY_SIZE(mmhub_v9_4_ras_fields); i++) {
- if(mmhub_v9_4_ras_fields[i].reg_offset != reg->reg_offset)
+ if (mmhub_v9_4_ras_fields[i].reg_offset != reg->reg_offset)
continue;
sec_cnt = (value &
@@ -1655,14 +1694,18 @@ static void mmhub_v9_4_query_ras_error_status(struct amdgpu_device *adev)
}
}
-const struct amdgpu_mmhub_ras_funcs mmhub_v9_4_ras_funcs = {
- .ras_late_init = amdgpu_mmhub_ras_late_init,
- .ras_fini = amdgpu_mmhub_ras_fini,
+const struct amdgpu_ras_block_hw_ops mmhub_v9_4_ras_hw_ops = {
.query_ras_error_count = mmhub_v9_4_query_ras_error_count,
.reset_ras_error_count = mmhub_v9_4_reset_ras_error_count,
.query_ras_error_status = mmhub_v9_4_query_ras_error_status,
};
+struct amdgpu_mmhub_ras mmhub_v9_4_ras = {
+ .ras_block = {
+ .hw_ops = &mmhub_v9_4_ras_hw_ops,
+ },
+};
+
const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs = {
.get_fb_location = mmhub_v9_4_get_fb_location,
.init = mmhub_v9_4_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h
index 90436efa92ef..a48329d95f71 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h
@@ -24,6 +24,6 @@
#define __MMHUB_V9_4_H__
extern const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs;
-extern const struct amdgpu_mmhub_ras_funcs mmhub_v9_4_ras_funcs;
+extern struct amdgpu_mmhub_ras mmhub_v9_4_ras;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h
index 3e4e858a6965..a773ef61b78c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h
@@ -30,6 +30,8 @@
#define MMSCH_VERSION_MINOR 0
#define MMSCH_VERSION (MMSCH_VERSION_MAJOR << 16 | MMSCH_VERSION_MINOR)
+#define MMSCH_V3_0_VCN_INSTANCES 0x2
+
enum mmsch_v3_0_command_type {
MMSCH_COMMAND__DIRECT_REG_WRITE = 0,
MMSCH_COMMAND__DIRECT_REG_POLLING = 2,
@@ -47,7 +49,7 @@ struct mmsch_v3_0_table_info {
struct mmsch_v3_0_init_header {
uint32_t version;
uint32_t total_size;
- struct mmsch_v3_0_table_info inst[AMDGPU_MAX_VCN_INSTANCES];
+ struct mmsch_v3_0_table_info inst[MMSCH_V3_0_VCN_INSTANCES];
};
struct mmsch_v3_0_cmd_direct_reg_header {
diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h
new file mode 100644
index 000000000000..ced26cc5123a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MMSCH_V4_0_H__
+#define __MMSCH_V4_0_H__
+
+#include "amdgpu_vcn.h"
+
+#define MMSCH_VERSION_MAJOR 4
+#define MMSCH_VERSION_MINOR 0
+#define MMSCH_VERSION (MMSCH_VERSION_MAJOR << 16 | MMSCH_VERSION_MINOR)
+
+#define RB_ENABLED (1 << 0)
+#define RB4_ENABLED (1 << 1)
+
+#define MMSCH_VF_ENGINE_STATUS__PASS 0x1
+
+#define MMSCH_VF_MAILBOX_RESP__OK 0x1
+#define MMSCH_VF_MAILBOX_RESP__INCOMPLETE 0x2
+#define MMSCH_VF_MAILBOX_RESP__FAILED 0x3
+#define MMSCH_VF_MAILBOX_RESP__FAILED_SMALL_CTX_SIZE 0x4
+#define MMSCH_VF_MAILBOX_RESP__UNKNOWN_CMD 0x5
+
+#define MMSCH_V4_0_VCN_INSTANCES 0x2
+
+enum mmsch_v4_0_command_type {
+ MMSCH_COMMAND__DIRECT_REG_WRITE = 0,
+ MMSCH_COMMAND__DIRECT_REG_POLLING = 2,
+ MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE = 3,
+ MMSCH_COMMAND__INDIRECT_REG_WRITE = 8,
+ MMSCH_COMMAND__END = 0xf
+};
+
+struct mmsch_v4_0_table_info {
+ uint32_t init_status;
+ uint32_t table_offset;
+ uint32_t table_size;
+};
+
+struct mmsch_v4_0_init_header {
+ uint32_t version;
+ uint32_t total_size;
+ struct mmsch_v4_0_table_info inst[MMSCH_V4_0_VCN_INSTANCES];
+ struct mmsch_v4_0_table_info jpegdec;
+};
+
+struct mmsch_v4_0_cmd_direct_reg_header {
+ uint32_t reg_offset : 28;
+ uint32_t command_type : 4;
+};
+
+struct mmsch_v4_0_cmd_indirect_reg_header {
+ uint32_t reg_offset : 20;
+ uint32_t reg_idx_space : 8;
+ uint32_t command_type : 4;
+};
+
+struct mmsch_v4_0_cmd_direct_write {
+ struct mmsch_v4_0_cmd_direct_reg_header cmd_header;
+ uint32_t reg_value;
+};
+
+struct mmsch_v4_0_cmd_direct_read_modify_write {
+ struct mmsch_v4_0_cmd_direct_reg_header cmd_header;
+ uint32_t write_data;
+ uint32_t mask_value;
+};
+
+struct mmsch_v4_0_cmd_direct_polling {
+ struct mmsch_v4_0_cmd_direct_reg_header cmd_header;
+ uint32_t mask_value;
+ uint32_t wait_value;
+};
+
+struct mmsch_v4_0_cmd_end {
+ struct mmsch_v4_0_cmd_direct_reg_header cmd_header;
+};
+
+struct mmsch_v4_0_cmd_indirect_write {
+ struct mmsch_v4_0_cmd_indirect_reg_header cmd_header;
+ uint32_t reg_value;
+};
+
+#define MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \
+ size = sizeof(struct mmsch_v4_0_cmd_direct_read_modify_write); \
+ size_dw = size / 4; \
+ direct_rd_mod_wt.cmd_header.reg_offset = reg; \
+ direct_rd_mod_wt.mask_value = mask; \
+ direct_rd_mod_wt.write_data = data; \
+ memcpy((void *)table_loc, &direct_rd_mod_wt, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#define MMSCH_V4_0_INSERT_DIRECT_WT(reg, value) { \
+ size = sizeof(struct mmsch_v4_0_cmd_direct_write); \
+ size_dw = size / 4; \
+ direct_wt.cmd_header.reg_offset = reg; \
+ direct_wt.reg_value = value; \
+ memcpy((void *)table_loc, &direct_wt, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#define MMSCH_V4_0_INSERT_DIRECT_POLL(reg, mask, wait) { \
+ size = sizeof(struct mmsch_v4_0_cmd_direct_polling); \
+ size_dw = size / 4; \
+ direct_poll.cmd_header.reg_offset = reg; \
+ direct_poll.mask_value = mask; \
+ direct_poll.wait_value = wait; \
+ memcpy((void *)table_loc, &direct_poll, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#define MMSCH_V4_0_INSERT_END() { \
+ size = sizeof(struct mmsch_v4_0_cmd_end); \
+ size_dw = size / 4; \
+ memcpy((void *)table_loc, &end, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h
new file mode 100644
index 000000000000..db7eb5260295
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MMSCH_V4_0_3_H__
+#define __MMSCH_V4_0_3_H__
+
+#include "amdgpu_vcn.h"
+#include "mmsch_v4_0.h"
+
+struct mmsch_v4_0_3_init_header {
+ uint32_t version;
+ uint32_t total_size;
+ struct mmsch_v4_0_table_info vcn0;
+ struct mmsch_v4_0_table_info mjpegdec0[4];
+ struct mmsch_v4_0_table_info mjpegdec1[4];
+};
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v5_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v5_0.h
new file mode 100644
index 000000000000..6f749814929f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v5_0.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MMSCH_V5_0_H__
+#define __MMSCH_V5_0_H__
+
+#include "amdgpu_vcn.h"
+
+#define MMSCH_VERSION_MAJOR 5
+#define MMSCH_VERSION_MINOR 0
+#define MMSCH_VERSION (MMSCH_VERSION_MAJOR << 16 | MMSCH_VERSION_MINOR)
+
+#define RB_ENABLED (1 << 0)
+#define RB4_ENABLED (1 << 1)
+
+#define MMSCH_VF_ENGINE_STATUS__PASS 0x1
+
+#define MMSCH_VF_MAILBOX_RESP__OK 0x1
+#define MMSCH_VF_MAILBOX_RESP__INCOMPLETE 0x2
+#define MMSCH_VF_MAILBOX_RESP__FAILED 0x3
+#define MMSCH_VF_MAILBOX_RESP__FAILED_SMALL_CTX_SIZE 0x4
+#define MMSCH_VF_MAILBOX_RESP__UNKNOWN_CMD 0x5
+
+enum mmsch_v5_0_command_type {
+ MMSCH_COMMAND__DIRECT_REG_WRITE = 0,
+ MMSCH_COMMAND__DIRECT_REG_POLLING = 2,
+ MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE = 3,
+ MMSCH_COMMAND__INDIRECT_REG_WRITE = 8,
+ MMSCH_COMMAND__END = 0xf
+};
+
+struct mmsch_v5_0_table_info {
+ uint32_t init_status;
+ uint32_t table_offset;
+ uint32_t table_size;
+};
+
+struct mmsch_v5_0_init_header {
+ uint32_t version;
+ uint32_t total_size;
+ struct mmsch_v5_0_table_info vcn0;
+ struct mmsch_v5_0_table_info mjpegdec0[5];
+ struct mmsch_v5_0_table_info mjpegdec1[5];
+};
+
+struct mmsch_v5_0_cmd_direct_reg_header {
+ uint32_t reg_offset : 28;
+ uint32_t command_type : 4;
+};
+
+struct mmsch_v5_0_cmd_indirect_reg_header {
+ uint32_t reg_offset : 20;
+ uint32_t reg_idx_space : 8;
+ uint32_t command_type : 4;
+};
+
+struct mmsch_v5_0_cmd_direct_write {
+ struct mmsch_v5_0_cmd_direct_reg_header cmd_header;
+ uint32_t reg_value;
+};
+
+struct mmsch_v5_0_cmd_direct_read_modify_write {
+ struct mmsch_v5_0_cmd_direct_reg_header cmd_header;
+ uint32_t write_data;
+ uint32_t mask_value;
+};
+
+struct mmsch_v5_0_cmd_direct_polling {
+ struct mmsch_v5_0_cmd_direct_reg_header cmd_header;
+ uint32_t mask_value;
+ uint32_t wait_value;
+};
+
+struct mmsch_v5_0_cmd_end {
+ struct mmsch_v5_0_cmd_direct_reg_header cmd_header;
+};
+
+struct mmsch_v5_0_cmd_indirect_write {
+ struct mmsch_v5_0_cmd_indirect_reg_header cmd_header;
+ uint32_t reg_value;
+};
+
+#define MMSCH_V5_0_INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \
+ size = sizeof(struct mmsch_v5_0_cmd_direct_read_modify_write); \
+ size_dw = size / 4; \
+ direct_rd_mod_wt.cmd_header.reg_offset = reg; \
+ direct_rd_mod_wt.mask_value = mask; \
+ direct_rd_mod_wt.write_data = data; \
+ memcpy((void *)table_loc, &direct_rd_mod_wt, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#define MMSCH_V5_0_INSERT_DIRECT_WT(reg, value) { \
+ size = sizeof(struct mmsch_v5_0_cmd_direct_write); \
+ size_dw = size / 4; \
+ direct_wt.cmd_header.reg_offset = reg; \
+ direct_wt.reg_value = value; \
+ memcpy((void *)table_loc, &direct_wt, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#define MMSCH_V5_0_INSERT_DIRECT_POLL(reg, mask, wait) { \
+ size = sizeof(struct mmsch_v5_0_cmd_direct_polling); \
+ size_dw = size / 4; \
+ direct_poll.cmd_header.reg_offset = reg; \
+ direct_poll.mask_value = mask; \
+ direct_poll.wait_value = wait; \
+ memcpy((void *)table_loc, &direct_poll, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#define MMSCH_V5_0_INSERT_END() { \
+ size = sizeof(struct mmsch_v5_0_cmd_end); \
+ size_dw = size / 4; \
+ memcpy((void *)table_loc, &end, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 23b066bcffb2..9a40107a0869 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -32,6 +32,8 @@
#include "soc15_common.h"
#include "mxgpu_ai.h"
+#include "amdgpu_reset.h"
+
static void xgpu_ai_mailbox_send_ack(struct amdgpu_device *adev)
{
WREG8(AI_MAIBOX_CONTROL_RCV_OFFSET_BYTE, 2);
@@ -91,7 +93,7 @@ static int xgpu_ai_poll_ack(struct amdgpu_device *adev)
timeout -= 5;
} while (timeout > 1);
- pr_err("Doesn't get TRN_MSG_ACK from pf in %d msec\n", AI_MAILBOX_POLL_ACK_TIMEDOUT);
+ dev_err(adev->dev, "Doesn't get TRN_MSG_ACK from pf in %d msec\n", AI_MAILBOX_POLL_ACK_TIMEDOUT);
return -ETIME;
}
@@ -109,7 +111,7 @@ static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event)
timeout -= 10;
} while (timeout > 1);
- pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r);
+ dev_err(adev->dev, "Doesn't get msg:%d from pf, error=%d\n", event, r);
return -ETIME;
}
@@ -130,7 +132,7 @@ static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev,
xgpu_ai_mailbox_set_valid(adev, false);
trn = xgpu_ai_peek_ack(adev);
if (trn) {
- pr_err("trn=%x ACK should not assert! wait again !\n", trn);
+ dev_err_ratelimited(adev->dev, "trn=%x ACK should not assert! wait again !\n", trn);
msleep(1);
}
} while(trn);
@@ -153,7 +155,7 @@ static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev,
/* start to poll ack */
r = xgpu_ai_poll_ack(adev);
if (r)
- pr_err("Doesn't get ack from pf, continue\n");
+ dev_err(adev->dev, "Doesn't get ack from pf, continue\n");
xgpu_ai_mailbox_set_valid(adev, false);
}
@@ -171,7 +173,7 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
req == IDH_REQ_GPU_RESET_ACCESS) {
r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
if (r) {
- pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n");
+ dev_err(adev->dev, "Doesn't get READY_TO_ACCESS_GPU from pf, give up\n");
return r;
}
/* Retrieve checksum from mailbox2 */
@@ -180,6 +182,11 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW2));
}
+ } else if (req == IDH_REQ_GPU_INIT_DATA){
+ /* Dummy REQ_GPU_INIT_DATA handling */
+ r = xgpu_ai_poll_msg(adev, IDH_REQ_GPU_INIT_DATA_READY);
+ /* version set to 0 since dummy */
+ adev->virt.req_init_data_ver = 0;
}
return 0;
@@ -224,7 +231,7 @@ static int xgpu_ai_mailbox_ack_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- DRM_DEBUG("get ack intr and do nothing.\n");
+ dev_dbg(adev->dev, "get ack intr and do nothing.\n");
return 0;
}
@@ -242,41 +249,78 @@ static int xgpu_ai_set_mailbox_ack_irq(struct amdgpu_device *adev,
return 0;
}
-static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
+static void xgpu_ai_ready_to_reset(struct amdgpu_device *adev)
{
- struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
- struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
- int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
-
- /* block amdgpu_gpu_recover till msg FLR COMPLETE received,
- * otherwise the mailbox msg will be ruined/reseted by
- * the VF FLR.
- */
- if (!down_write_trylock(&adev->reset_sem))
- return;
-
- amdgpu_virt_fini_data_exchange(adev);
- atomic_set(&adev->in_gpu_reset, 1);
-
xgpu_ai_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0);
+}
+static int xgpu_ai_wait_reset(struct amdgpu_device *adev)
+{
+ int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
do {
- if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
- goto flr_done;
-
+ if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) {
+ dev_dbg(adev->dev, "Got AI IDH_FLR_NOTIFICATION_CMPL after %d ms\n", AI_MAILBOX_POLL_FLR_TIMEDOUT - timeout);
+ return 0;
+ }
msleep(10);
timeout -= 10;
} while (timeout > 1);
-flr_done:
- atomic_set(&adev->in_gpu_reset, 0);
- up_write(&adev->reset_sem);
+ dev_dbg(adev->dev, "waiting AI IDH_FLR_NOTIFICATION_CMPL timeout\n");
+ return -ETIME;
+}
+
+static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
+{
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
+ struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+ struct amdgpu_reset_context reset_context = { 0 };
+
+ amdgpu_virt_fini_data_exchange(adev);
/* Trigger recovery for world switch failure if no TDR */
if (amdgpu_device_should_recover_gpu(adev)
&& (!amdgpu_device_has_job_running(adev) ||
- adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT))
- amdgpu_device_gpu_recover(adev, NULL);
+ adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)) {
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ set_bit(AMDGPU_HOST_FLR, &reset_context.flags);
+
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+ }
+}
+
+static void xgpu_ai_mailbox_req_bad_pages_work(struct work_struct *work)
+{
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, req_bad_pages_work);
+ struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+
+ if (down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_virt_fini_data_exchange(adev);
+ amdgpu_virt_request_bad_pages(adev);
+ up_read(&adev->reset_domain->sem);
+ }
+}
+
+/**
+ * xgpu_ai_mailbox_handle_bad_pages_work - Reinitialize the data exchange region to get fresh bad page information
+ * @work: pointer to the work_struct
+ *
+ * This work handler is triggered when bad pages are ready, and it reinitializes
+ * the data exchange region to retrieve updated bad page information from the host.
+ */
+static void xgpu_ai_mailbox_handle_bad_pages_work(struct work_struct *work)
+{
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, handle_bad_pages_work);
+ struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+
+ if (down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_virt_fini_data_exchange(adev);
+ amdgpu_virt_init_data_exchange(adev);
+ up_read(&adev->reset_domain->sem);
+ }
}
static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
@@ -298,23 +342,47 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
enum idh_event event = xgpu_ai_mailbox_peek_msg(adev);
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
switch (event) {
- case IDH_FLR_NOTIFICATION:
+ case IDH_RAS_BAD_PAGES_READY:
+ xgpu_ai_mailbox_send_ack(adev);
if (amdgpu_sriov_runtime(adev))
- schedule_work(&adev->virt.flr_work);
+ schedule_work(&adev->virt.handle_bad_pages_work);
break;
- case IDH_QUERY_ALIVE:
- xgpu_ai_mailbox_send_ack(adev);
- break;
- /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
- * it byfar since that polling thread will handle it,
- * other msg like flr complete is not handled here.
- */
- case IDH_CLR_MSG_BUF:
- case IDH_FLR_NOTIFICATION_CMPL:
- case IDH_READY_TO_ACCESS_GPU:
- default:
+ case IDH_RAS_BAD_PAGES_NOTIFICATION:
+ xgpu_ai_mailbox_send_ack(adev);
+ if (amdgpu_sriov_runtime(adev))
+ schedule_work(&adev->virt.req_bad_pages_work);
+ break;
+ case IDH_UNRECOV_ERR_NOTIFICATION:
+ xgpu_ai_mailbox_send_ack(adev);
+ ras->is_rma = true;
+ dev_err(adev->dev, "VF is in an unrecoverable state. Runtime Services are halted.\n");
+ if (amdgpu_sriov_runtime(adev))
+ WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __func__);
+ break;
+ case IDH_FLR_NOTIFICATION:
+ if (amdgpu_sriov_runtime(adev))
+ WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __func__);
+ break;
+ case IDH_QUERY_ALIVE:
+ xgpu_ai_mailbox_send_ack(adev);
+ break;
+ /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
+ * it byfar since that polling thread will handle it,
+ * other msg like flr complete is not handled here.
+ */
+ case IDH_CLR_MSG_BUF:
+ case IDH_FLR_NOTIFICATION_CMPL:
+ case IDH_READY_TO_ACCESS_GPU:
+ default:
break;
}
@@ -370,6 +438,8 @@ int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev)
}
INIT_WORK(&adev->virt.flr_work, xgpu_ai_mailbox_flr_work);
+ INIT_WORK(&adev->virt.req_bad_pages_work, xgpu_ai_mailbox_req_bad_pages_work);
+ INIT_WORK(&adev->virt.handle_bad_pages_work, xgpu_ai_mailbox_handle_bad_pages_work);
return 0;
}
@@ -380,10 +450,32 @@ void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev)
amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
}
+static int xgpu_ai_request_init_data(struct amdgpu_device *adev)
+{
+ return xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_INIT_DATA);
+}
+
+static void xgpu_ai_ras_poison_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block)
+{
+ xgpu_ai_send_access_requests(adev, IDH_RAS_POISON);
+}
+
+static bool xgpu_ai_rcvd_ras_intr(struct amdgpu_device *adev)
+{
+ enum idh_event msg = xgpu_ai_mailbox_peek_msg(adev);
+
+ return (msg == IDH_RAS_ERROR_DETECTED || msg == 0xFFFFFFFF);
+}
+
const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
.req_full_gpu = xgpu_ai_request_full_gpu_access,
.rel_full_gpu = xgpu_ai_release_full_gpu_access,
.reset_gpu = xgpu_ai_request_reset,
- .wait_reset = NULL,
+ .ready_to_reset = xgpu_ai_ready_to_reset,
+ .wait_reset = xgpu_ai_wait_reset,
.trans_msg = xgpu_ai_mailbox_trans_msg,
+ .req_init_data = xgpu_ai_request_init_data,
+ .ras_poison_handler = xgpu_ai_ras_poison_handler,
+ .rcvd_ras_intr = xgpu_ai_rcvd_ras_intr,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
index bd3b23171579..874b9f8f9804 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
@@ -26,7 +26,7 @@
#define AI_MAILBOX_POLL_ACK_TIMEDOUT 500
#define AI_MAILBOX_POLL_MSG_TIMEDOUT 6000
-#define AI_MAILBOX_POLL_FLR_TIMEDOUT 5000
+#define AI_MAILBOX_POLL_FLR_TIMEDOUT 10000
#define AI_MAILBOX_POLL_MSG_REP_MAX 11
enum idh_request {
@@ -35,9 +35,12 @@ enum idh_request {
IDH_REQ_GPU_FINI_ACCESS,
IDH_REL_GPU_FINI_ACCESS,
IDH_REQ_GPU_RESET_ACCESS,
+ IDH_REQ_GPU_INIT_DATA,
IDH_LOG_VF_ERROR = 200,
IDH_READY_TO_RESET = 201,
+ IDH_RAS_POISON = 202,
+ IDH_REQ_RAS_BAD_PAGES = 205,
};
enum idh_event {
@@ -48,7 +51,13 @@ enum idh_event {
IDH_SUCCESS,
IDH_FAIL,
IDH_QUERY_ALIVE,
-
+ IDH_REQ_GPU_INIT_DATA_READY,
+ IDH_RAS_POISON_READY,
+ IDH_PF_SOFT_FLR_NOTIFICATION,
+ IDH_RAS_ERROR_DETECTED,
+ IDH_RAS_BAD_PAGES_READY = 15,
+ IDH_RAS_BAD_PAGES_NOTIFICATION = 16,
+ IDH_UNRECOV_ERR_NOTIFICATION = 17,
IDH_TEXT_MESSAGE = 255,
};
@@ -59,7 +68,9 @@ int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev);
int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev);
void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev);
-#define AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4
-#define AI_MAIBOX_CONTROL_RCV_OFFSET_BYTE SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4 + 1
+#define AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE \
+ (SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4)
+#define AI_MAIBOX_CONTROL_RCV_OFFSET_BYTE \
+ (SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4 + 1)
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index a35e6d87e537..e7cd07383d56 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -31,6 +31,8 @@
#include "soc15_common.h"
#include "mxgpu_nv.h"
+#include "amdgpu_reset.h"
+
static void xgpu_nv_mailbox_send_ack(struct amdgpu_device *adev)
{
WREG8(NV_MAIBOX_CONTROL_RCV_OFFSET_BYTE, 2);
@@ -59,15 +61,20 @@ static enum idh_event xgpu_nv_mailbox_peek_msg(struct amdgpu_device *adev)
static int xgpu_nv_mailbox_rcv_msg(struct amdgpu_device *adev,
enum idh_event event)
{
+ int r = 0;
u32 reg;
reg = RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW0);
- if (reg != event)
+ if (reg == IDH_FAIL)
+ r = -EINVAL;
+ if (reg == IDH_UNRECOV_ERR_NOTIFICATION)
+ r = -ENODEV;
+ else if (reg != event)
return -ENOENT;
xgpu_nv_mailbox_send_ack(adev);
- return 0;
+ return r;
}
static uint8_t xgpu_nv_peek_ack(struct amdgpu_device *adev)
@@ -89,7 +96,7 @@ static int xgpu_nv_poll_ack(struct amdgpu_device *adev)
timeout -= 5;
} while (timeout > 1);
- pr_err("Doesn't get TRN_MSG_ACK from pf in %d msec\n", NV_MAILBOX_POLL_ACK_TIMEDOUT);
+ dev_err(adev->dev, "Doesn't get TRN_MSG_ACK from pf in %d msec \n", NV_MAILBOX_POLL_ACK_TIMEDOUT);
return -ETIME;
}
@@ -98,19 +105,31 @@ static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event)
{
int r;
uint64_t timeout, now;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
now = (uint64_t)ktime_to_ms(ktime_get());
timeout = now + NV_MAILBOX_POLL_MSG_TIMEDOUT;
do {
r = xgpu_nv_mailbox_rcv_msg(adev, event);
- if (!r)
+ if (!r) {
+ dev_dbg(adev->dev, "rcv_msg 0x%x after %llu ms\n",
+ event, NV_MAILBOX_POLL_MSG_TIMEDOUT - timeout + now);
return 0;
+ } else if (r == -ENODEV) {
+ if (!amdgpu_ras_is_rma(adev)) {
+ ras->is_rma = true;
+ dev_err(adev->dev, "VF is in an unrecoverable state. "
+ "Runtime Services are halted.\n");
+ }
+ return r;
+ }
msleep(10);
now = (uint64_t)ktime_to_ms(ktime_get());
} while (timeout > now);
+ dev_dbg(adev->dev, "nv_poll_msg timed out\n");
return -ETIME;
}
@@ -131,11 +150,12 @@ static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev,
xgpu_nv_mailbox_set_valid(adev, false);
trn = xgpu_nv_peek_ack(adev);
if (trn) {
- pr_err("trn=%x ACK should not assert! wait again !\n", trn);
+ dev_err_ratelimited(adev->dev, "trn=%x ACK should not assert! wait again !\n", trn);
msleep(1);
}
} while (trn);
+ dev_dbg(adev->dev, "trans_msg req = 0x%x, data1 = 0x%x\n", req, data1);
WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW0, req);
WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW1, data1);
WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW2, data2);
@@ -145,19 +165,27 @@ static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev,
/* start to poll ack */
r = xgpu_nv_poll_ack(adev);
if (r)
- pr_err("Doesn't get ack from pf, continue\n");
+ dev_err(adev->dev, "Doesn't get ack from pf, continue\n");
xgpu_nv_mailbox_set_valid(adev, false);
}
-static int xgpu_nv_send_access_requests(struct amdgpu_device *adev,
- enum idh_request req)
+static int xgpu_nv_send_access_requests_with_param(struct amdgpu_device *adev,
+ enum idh_request req, u32 data1, u32 data2, u32 data3)
{
- int r, retry = 1;
+ struct amdgpu_virt *virt = &adev->virt;
+ int r = 0, retry = 1;
enum idh_event event = -1;
+ mutex_lock(&virt->access_req_mutex);
send_request:
- xgpu_nv_mailbox_trans_msg(adev, req, 0, 0, 0);
+
+ if (amdgpu_ras_is_rma(adev)) {
+ r = -ENODEV;
+ goto out;
+ }
+
+ xgpu_nv_mailbox_trans_msg(adev, req, data1, data2, data3);
switch (req) {
case IDH_REQ_GPU_INIT_ACCESS:
@@ -168,6 +196,19 @@ send_request:
case IDH_REQ_GPU_INIT_DATA:
event = IDH_REQ_GPU_INIT_DATA_READY;
break;
+ case IDH_RAS_POISON:
+ if (data1 != 0)
+ event = IDH_RAS_POISON_READY;
+ break;
+ case IDH_REQ_RAS_ERROR_COUNT:
+ event = IDH_RAS_ERROR_COUNT_READY;
+ break;
+ case IDH_REQ_RAS_CPER_DUMP:
+ event = IDH_RAS_CPER_DUMP_READY;
+ break;
+ case IDH_REQ_RAS_CHK_CRITI:
+ event = IDH_REQ_RAS_CHK_CRITI_READY;
+ break;
default:
break;
}
@@ -175,24 +216,30 @@ send_request:
if (event != -1) {
r = xgpu_nv_poll_msg(adev, event);
if (r) {
- if (retry++ < 2)
+ if (retry++ < 5)
goto send_request;
if (req != IDH_REQ_GPU_INIT_DATA) {
- pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r);
- return r;
- }
- else /* host doesn't support REQ_GPU_INIT_DATA handshake */
+ dev_err(adev->dev, "Doesn't get msg:%d from pf, error=%d\n", event, r);
+ goto out;
+ } else /* host doesn't support REQ_GPU_INIT_DATA handshake */
adev->virt.req_init_data_ver = 0;
} else {
- if (req == IDH_REQ_GPU_INIT_DATA)
- {
- adev->virt.req_init_data_ver =
- RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW1);
-
- /* assume V1 in case host doesn't set version number */
- if (adev->virt.req_init_data_ver < 1)
- adev->virt.req_init_data_ver = 1;
+ if (req == IDH_REQ_GPU_INIT_DATA) {
+ switch (RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW1)) {
+ case GPU_CRIT_REGION_V2:
+ adev->virt.req_init_data_ver = GPU_CRIT_REGION_V2;
+ adev->virt.init_data_header.offset =
+ RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW2);
+ adev->virt.init_data_header.size_kb =
+ RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW3);
+ break;
+ default:
+ adev->virt.req_init_data_ver = GPU_CRIT_REGION_V1;
+ adev->virt.init_data_header.offset = -1;
+ adev->virt.init_data_header.size_kb = 0;
+ break;
+ }
}
}
@@ -203,7 +250,17 @@ send_request:
}
}
- return 0;
+out:
+ mutex_unlock(&virt->access_req_mutex);
+
+ return r;
+}
+
+static int xgpu_nv_send_access_requests(struct amdgpu_device *adev,
+ enum idh_request req)
+{
+ return xgpu_nv_send_access_requests_with_param(adev,
+ req, 0, 0, 0);
}
static int xgpu_nv_request_reset(struct amdgpu_device *adev)
@@ -243,14 +300,15 @@ static int xgpu_nv_release_full_gpu_access(struct amdgpu_device *adev,
static int xgpu_nv_request_init_data(struct amdgpu_device *adev)
{
- return xgpu_nv_send_access_requests(adev, IDH_REQ_GPU_INIT_DATA);
+ return xgpu_nv_send_access_requests_with_param(adev, IDH_REQ_GPU_INIT_DATA,
+ 0, GPU_CRIT_REGION_V2, 0);
}
static int xgpu_nv_mailbox_ack_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- DRM_DEBUG("get ack intr and do nothing.\n");
+ dev_dbg(adev->dev, "get ack intr and do nothing.\n");
return 0;
}
@@ -271,35 +329,34 @@ static int xgpu_nv_set_mailbox_ack_irq(struct amdgpu_device *adev,
return 0;
}
-static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
+static void xgpu_nv_ready_to_reset(struct amdgpu_device *adev)
{
- struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
- struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
- int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT;
-
- /* block amdgpu_gpu_recover till msg FLR COMPLETE received,
- * otherwise the mailbox msg will be ruined/reseted by
- * the VF FLR.
- */
- if (!down_write_trylock(&adev->reset_sem))
- return;
-
- amdgpu_virt_fini_data_exchange(adev);
- atomic_set(&adev->in_gpu_reset, 1);
-
xgpu_nv_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0);
+}
+static int xgpu_nv_wait_reset(struct amdgpu_device *adev)
+{
+ int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT;
do {
- if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
- goto flr_done;
-
+ if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) {
+ dev_dbg(adev->dev, "Got NV IDH_FLR_NOTIFICATION_CMPL after %d ms\n", NV_MAILBOX_POLL_FLR_TIMEDOUT - timeout);
+ return 0;
+ }
msleep(10);
timeout -= 10;
} while (timeout > 1);
-flr_done:
- atomic_set(&adev->in_gpu_reset, 0);
- up_write(&adev->reset_sem);
+ dev_dbg(adev->dev, "waiting NV IDH_FLR_NOTIFICATION_CMPL timeout\n");
+ return -ETIME;
+}
+
+static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
+{
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
+ struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+ struct amdgpu_reset_context reset_context = { 0 };
+
+ amdgpu_virt_fini_data_exchange(adev);
/* Trigger recovery for world switch failure if no TDR */
if (amdgpu_device_should_recover_gpu(adev)
@@ -307,8 +364,46 @@ flr_done:
adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT ||
adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
- adev->video_timeout == MAX_SCHEDULE_TIMEOUT))
- amdgpu_device_gpu_recover(adev, NULL);
+ adev->video_timeout == MAX_SCHEDULE_TIMEOUT)) {
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ set_bit(AMDGPU_HOST_FLR, &reset_context.flags);
+
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+ }
+}
+
+static void xgpu_nv_mailbox_req_bad_pages_work(struct work_struct *work)
+{
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, req_bad_pages_work);
+ struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+
+ if (down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_virt_fini_data_exchange(adev);
+ amdgpu_virt_request_bad_pages(adev);
+ up_read(&adev->reset_domain->sem);
+ }
+}
+
+/**
+ * xgpu_nv_mailbox_handle_bad_pages_work - Reinitialize the data exchange region to get fresh bad page information
+ * @work: pointer to the work_struct
+ *
+ * This work handler is triggered when bad pages are ready, and it reinitializes
+ * the data exchange region to retrieve updated bad page information from the host.
+ */
+static void xgpu_nv_mailbox_handle_bad_pages_work(struct work_struct *work)
+{
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, handle_bad_pages_work);
+ struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+
+ if (down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_virt_fini_data_exchange(adev);
+ amdgpu_virt_init_data_exchange(adev);
+ up_read(&adev->reset_domain->sem);
+ }
}
static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev,
@@ -333,11 +428,38 @@ static int xgpu_nv_mailbox_rcv_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
enum idh_event event = xgpu_nv_mailbox_peek_msg(adev);
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
switch (event) {
+ case IDH_RAS_BAD_PAGES_READY:
+ xgpu_nv_mailbox_send_ack(adev);
+ if (amdgpu_sriov_runtime(adev))
+ schedule_work(&adev->virt.handle_bad_pages_work);
+ break;
+ case IDH_RAS_BAD_PAGES_NOTIFICATION:
+ xgpu_nv_mailbox_send_ack(adev);
+ if (amdgpu_sriov_runtime(adev))
+ schedule_work(&adev->virt.req_bad_pages_work);
+ break;
+ case IDH_UNRECOV_ERR_NOTIFICATION:
+ xgpu_nv_mailbox_send_ack(adev);
+ if (!amdgpu_ras_is_rma(adev)) {
+ ras->is_rma = true;
+ dev_err(adev->dev, "VF is in an unrecoverable state. Runtime Services are halted.\n");
+ }
+
+ if (amdgpu_sriov_runtime(adev))
+ WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __func__);
+ break;
case IDH_FLR_NOTIFICATION:
if (amdgpu_sriov_runtime(adev))
- schedule_work(&adev->virt.flr_work);
+ WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __func__);
break;
/* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
* it byfar since that polling thread will handle it,
@@ -402,6 +524,8 @@ int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev)
}
INIT_WORK(&adev->virt.flr_work, xgpu_nv_mailbox_flr_work);
+ INIT_WORK(&adev->virt.req_bad_pages_work, xgpu_nv_mailbox_req_bad_pages_work);
+ INIT_WORK(&adev->virt.handle_bad_pages_work, xgpu_nv_mailbox_handle_bad_pages_work);
return 0;
}
@@ -412,11 +536,67 @@ void xgpu_nv_mailbox_put_irq(struct amdgpu_device *adev)
amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
}
+static void xgpu_nv_ras_poison_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block)
+{
+ if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0)) {
+ xgpu_nv_send_access_requests(adev, IDH_RAS_POISON);
+ } else {
+ amdgpu_virt_fini_data_exchange(adev);
+ xgpu_nv_send_access_requests_with_param(adev,
+ IDH_RAS_POISON, block, 0, 0);
+ }
+}
+
+static bool xgpu_nv_rcvd_ras_intr(struct amdgpu_device *adev)
+{
+ enum idh_event msg = xgpu_nv_mailbox_peek_msg(adev);
+
+ return (msg == IDH_RAS_ERROR_DETECTED || msg == 0xFFFFFFFF);
+}
+
+static int xgpu_nv_req_ras_err_count(struct amdgpu_device *adev)
+{
+ return xgpu_nv_send_access_requests(adev, IDH_REQ_RAS_ERROR_COUNT);
+}
+
+static int xgpu_nv_req_ras_cper_dump(struct amdgpu_device *adev, u64 vf_rptr)
+{
+ uint32_t vf_rptr_hi, vf_rptr_lo;
+
+ vf_rptr_hi = (uint32_t)(vf_rptr >> 32);
+ vf_rptr_lo = (uint32_t)(vf_rptr & 0xFFFFFFFF);
+ return xgpu_nv_send_access_requests_with_param(
+ adev, IDH_REQ_RAS_CPER_DUMP, vf_rptr_hi, vf_rptr_lo, 0);
+}
+
+static int xgpu_nv_req_ras_bad_pages(struct amdgpu_device *adev)
+{
+ return xgpu_nv_send_access_requests(adev, IDH_REQ_RAS_BAD_PAGES);
+}
+
+static int xgpu_nv_check_vf_critical_region(struct amdgpu_device *adev, u64 addr)
+{
+ uint32_t addr_hi, addr_lo;
+
+ addr_hi = (uint32_t)(addr >> 32);
+ addr_lo = (uint32_t)(addr & 0xFFFFFFFF);
+ return xgpu_nv_send_access_requests_with_param(
+ adev, IDH_REQ_RAS_CHK_CRITI, addr_hi, addr_lo, 0);
+}
+
const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
.req_full_gpu = xgpu_nv_request_full_gpu_access,
.rel_full_gpu = xgpu_nv_release_full_gpu_access,
.req_init_data = xgpu_nv_request_init_data,
.reset_gpu = xgpu_nv_request_reset,
- .wait_reset = NULL,
+ .ready_to_reset = xgpu_nv_ready_to_reset,
+ .wait_reset = xgpu_nv_wait_reset,
.trans_msg = xgpu_nv_mailbox_trans_msg,
+ .ras_poison_handler = xgpu_nv_ras_poison_handler,
+ .rcvd_ras_intr = xgpu_nv_rcvd_ras_intr,
+ .req_ras_err_count = xgpu_nv_req_ras_err_count,
+ .req_ras_cper_dump = xgpu_nv_req_ras_cper_dump,
+ .req_bad_pages = xgpu_nv_req_ras_bad_pages,
+ .req_ras_chk_criti = xgpu_nv_check_vf_critical_region
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
index 73887b0aa1d6..c1083e5e41e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
@@ -25,8 +25,8 @@
#define __MXGPU_NV_H__
#define NV_MAILBOX_POLL_ACK_TIMEDOUT 500
-#define NV_MAILBOX_POLL_MSG_TIMEDOUT 6000
-#define NV_MAILBOX_POLL_FLR_TIMEDOUT 5000
+#define NV_MAILBOX_POLL_MSG_TIMEDOUT 15000
+#define NV_MAILBOX_POLL_FLR_TIMEDOUT 10000
#define NV_MAILBOX_POLL_MSG_REP_MAX 11
enum idh_request {
@@ -39,6 +39,11 @@ enum idh_request {
IDH_LOG_VF_ERROR = 200,
IDH_READY_TO_RESET = 201,
+ IDH_RAS_POISON = 202,
+ IDH_REQ_RAS_ERROR_COUNT = 203,
+ IDH_REQ_RAS_CPER_DUMP = 204,
+ IDH_REQ_RAS_BAD_PAGES = 205,
+ IDH_REQ_RAS_CHK_CRITI = 206
};
enum idh_event {
@@ -50,6 +55,15 @@ enum idh_event {
IDH_FAIL,
IDH_QUERY_ALIVE,
IDH_REQ_GPU_INIT_DATA_READY,
+ IDH_RAS_POISON_READY,
+ IDH_PF_SOFT_FLR_NOTIFICATION,
+ IDH_RAS_ERROR_DETECTED,
+ IDH_RAS_ERROR_COUNT_READY = 11,
+ IDH_RAS_CPER_DUMP_READY = 14,
+ IDH_RAS_BAD_PAGES_READY = 15,
+ IDH_RAS_BAD_PAGES_NOTIFICATION = 16,
+ IDH_UNRECOV_ERR_NOTIFICATION = 17,
+ IDH_REQ_RAS_CHK_CRITI_READY = 18,
IDH_TEXT_MESSAGE = 255,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
index aef9d059ae52..e1d63bed84bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
@@ -42,6 +42,8 @@
#include "smu/smu_7_1_3_d.h"
#include "mxgpu_vi.h"
+#include "amdgpu_reset.h"
+
/* VI golden setting */
static const u32 xgpu_fiji_mgcg_cgcg_init[] = {
mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
@@ -332,7 +334,7 @@ static void xgpu_vi_mailbox_send_ack(struct amdgpu_device *adev)
break;
}
mdelay(1);
- timeout -=1;
+ timeout -= 1;
reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
}
@@ -513,15 +515,18 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
- /* wait until RCV_MSG become 3 */
- if (xgpu_vi_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL)) {
- pr_err("failed to receive FLR_CMPL\n");
- return;
- }
-
/* Trigger recovery due to world switch failure */
- if (amdgpu_device_should_recover_gpu(adev))
- amdgpu_device_gpu_recover(adev, NULL);
+ if (amdgpu_device_should_recover_gpu(adev)) {
+ struct amdgpu_reset_context reset_context;
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ set_bit(AMDGPU_HOST_FLR, &reset_context.flags);
+
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+ }
}
static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,
@@ -544,14 +549,17 @@ static int xgpu_vi_mailbox_rcv_irq(struct amdgpu_device *adev,
{
int r;
- /* trigger gpu-reset by hypervisor only if TDR disbaled */
+ /* trigger gpu-reset by hypervisor only if TDR disabled */
if (!amdgpu_gpu_recovery) {
/* see what event we get */
r = xgpu_vi_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION);
/* only handle FLR_NOTIFY now */
if (!r)
- schedule_work(&adev->virt.flr_work);
+ WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __func__);
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index 38241cf0e1f1..4cd325149b63 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -107,7 +107,7 @@ force_update_wptr_for_self_int(struct amdgpu_device *adev,
{
u32 ih_cntl, ih_rb_cntl;
- if (adev->ip_versions[OSSSYS_HWIP][0] < IP_VERSION(5, 0, 3))
+ if (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) < IP_VERSION(5, 0, 3))
return;
ih_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_CNTL2);
@@ -330,7 +330,7 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev)
if (unlikely(adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)) {
if (ih[0]->use_bus_addr) {
- switch (adev->ip_versions[OSSSYS_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, OSSSYS_HWIP, 0)) {
case IP_VERSION(5, 0, 3):
case IP_VERSION(5, 2, 0):
case IP_VERSION(5, 2, 1):
@@ -409,9 +409,11 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev,
u32 wptr, tmp;
struct amdgpu_ih_regs *ih_regs;
- if (ih == &adev->irq.ih) {
+ if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) {
/* Only ring0 supports writeback. On other rings fall back
* to register-based code with overflow checking below.
+ * ih_soft ring doesn't have any backing hardware registers,
+ * update wptr and return.
*/
wptr = le32_to_cpu(*ih->wptr_cpu);
@@ -432,14 +434,19 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev,
* this should allow us to catch up.
*/
tmp = (wptr + 32) & ih->ptr_mask;
- dev_warn(adev->dev, "IH ring buffer overflow "
- "(0x%08X, 0x%08X, 0x%08X)\n",
- wptr, ih->rptr, tmp);
+ dev_warn(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
+ amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp);
ih->rptr = tmp;
tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
out:
return (wptr & ih->ptr_mask);
}
@@ -483,6 +490,9 @@ static void navi10_ih_set_rptr(struct amdgpu_device *adev,
{
struct amdgpu_ih_regs *ih_regs;
+ if (ih == &adev->irq.ih_soft)
+ return;
+
if (ih->use_doorbell) {
/* XXX check if swapping is necessary on BE */
*ih->rptr_cpu = ih->rptr;
@@ -531,19 +541,19 @@ static void navi10_ih_set_self_irq_funcs(struct amdgpu_device *adev)
adev->irq.self_irq.funcs = &navi10_ih_self_irq_funcs;
}
-static int navi10_ih_early_init(void *handle)
+static int navi10_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
navi10_ih_set_interrupt_funcs(adev);
navi10_ih_set_self_irq_funcs(adev);
return 0;
}
-static int navi10_ih_sw_init(void *handle)
+static int navi10_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool use_bus_addr;
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_IH, 0,
@@ -560,7 +570,7 @@ static int navi10_ih_sw_init(void *handle)
use_bus_addr = false;
else
use_bus_addr = true;
- r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr);
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, use_bus_addr);
if (r)
return r;
@@ -573,7 +583,7 @@ static int navi10_ih_sw_init(void *handle)
/* initialize ih control registers offset */
navi10_ih_init_register_offset(adev);
- r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true);
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
if (r)
return r;
@@ -582,63 +592,52 @@ static int navi10_ih_sw_init(void *handle)
return r;
}
-static int navi10_ih_sw_fini(void *handle)
+static int navi10_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
return 0;
}
-static int navi10_ih_hw_init(void *handle)
+static int navi10_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- r = navi10_ih_irq_init(adev);
- if (r)
- return r;
-
- return 0;
+ return navi10_ih_irq_init(adev);
}
-static int navi10_ih_hw_fini(void *handle)
+static int navi10_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- navi10_ih_irq_disable(adev);
+ navi10_ih_irq_disable(ip_block->adev);
return 0;
}
-static int navi10_ih_suspend(void *handle)
+static int navi10_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return navi10_ih_hw_fini(adev);
+ return navi10_ih_hw_fini(ip_block);
}
-static int navi10_ih_resume(void *handle)
+static int navi10_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return navi10_ih_hw_init(adev);
+ return navi10_ih_hw_init(ip_block);
}
-static bool navi10_ih_is_idle(void *handle)
+static bool navi10_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return true;
}
-static int navi10_ih_wait_for_idle(void *handle)
+static int navi10_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return -ETIMEDOUT;
}
-static int navi10_ih_soft_reset(void *handle)
+static int navi10_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* todo */
return 0;
@@ -665,40 +664,35 @@ static void navi10_ih_update_clockgating_state(struct amdgpu_device *adev,
if (def != data)
WREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL, data);
}
-
- return;
}
-static int navi10_ih_set_clockgating_state(void *handle,
+static int navi10_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
navi10_ih_update_clockgating_state(adev,
state == AMD_CG_STATE_GATE);
return 0;
}
-static int navi10_ih_set_powergating_state(void *handle,
+static int navi10_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void navi10_ih_get_clockgating_state(void *handle, u32 *flags)
+static void navi10_ih_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!RREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL))
*flags |= AMD_CG_SUPPORT_IH_CG;
-
- return;
}
static const struct amd_ip_funcs navi10_ih_ip_funcs = {
.name = "navi10_ih",
.early_init = navi10_ih_early_init,
- .late_init = NULL,
.sw_init = navi10_ih_sw_init,
.sw_fini = navi10_ih_sw_fini,
.hw_init = navi10_ih_hw_init,
@@ -716,6 +710,7 @@ static const struct amd_ip_funcs navi10_ih_ip_funcs = {
static const struct amdgpu_ih_funcs navi10_ih_funcs = {
.get_wptr = navi10_ih_get_wptr,
.decode_iv = amdgpu_ih_decode_iv_helper,
+ .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
.set_rptr = navi10_ih_set_rptr
};
@@ -725,8 +720,7 @@ static void navi10_ih_set_interrupt_funcs(struct amdgpu_device *adev)
adev->irq.ih_funcs = &navi10_ih_funcs;
}
-const struct amdgpu_ip_block_version navi10_ih_ip_block =
-{
+const struct amdgpu_ip_block_version navi10_ih_ip_block = {
.type = AMD_IP_BLOCK_TYPE_IH,
.major = 5,
.minor = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h
index a5b60c9a2418..c88284ff92d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h
@@ -68,6 +68,7 @@
#define SDMA_SUBOP_POLL_REG_WRITE_MEM 1
#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM 2
#define SDMA_SUBOP_POLL_MEM_VERIFY 3
+#define SDMA_SUBOP_VM_INVALIDATION 4
#define HEADER_AGENT_DISPATCH 4
#define HEADER_BARRIER 5
#define SDMA_OP_AQL_COPY 0
@@ -4041,6 +4042,69 @@
/*
+** Definitions for SDMA_PKT_VM_INVALIDATION packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_op_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_VM_INVALIDATION_HEADER_op_shift 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_OP(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift 8
+#define SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift)
+
+/*define for gfx_eng_id field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_mask 0x0000001F
+#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_shift 16
+#define SDMA_PKT_VM_INVALIDATION_HEADER_GFX_ENG_ID(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_shift)
+
+/*define for mm_eng_id field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_mask 0x0000001F
+#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_shift 24
+#define SDMA_PKT_VM_INVALIDATION_HEADER_MM_ENG_ID(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_shift)
+
+/*define for INVALIDATEREQ word*/
+/*define for invalidatereq field*/
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_offset 1
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask 0xFFFFFFFF
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift 0
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(x) (((x) & SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask) << SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift)
+
+/*define for ADDRESSRANGELO word*/
+/*define for addressrangelo field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_offset 2
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask 0xFFFFFFFF
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift 0
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_ADDRESSRANGELO(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift)
+
+/*define for ADDRESSRANGEHI word*/
+/*define for invalidateack field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_offset 3
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask 0x0000FFFF
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift 0
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift)
+
+/*define for addressrangehi field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_offset 3
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask 0x0000001F
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift 16
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift)
+
+/*define for reserved field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_offset 3
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask 0x000001FF
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift 23
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_RESERVED(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift)
+
+
+/*
** Definitions for SDMA_PKT_ATOMIC packet
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c
new file mode 100644
index 000000000000..9b4025c39e44
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c
@@ -0,0 +1,554 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "nbif_v6_3_1.h"
+
+#include "nbif/nbif_6_3_1_offset.h"
+#include "nbif/nbif_6_3_1_sh_mask.h"
+#include "pcie/pcie_6_1_0_offset.h"
+#include "pcie/pcie_6_1_0_sh_mask.h"
+#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static void nbif_v6_3_1_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
+static u32 nbif_v6_3_1_get_rev_id(struct amdgpu_device *adev)
+{
+ u32 tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0);
+
+ tmp &= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
+ tmp >>= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
+
+ return tmp;
+}
+
+static void nbif_v6_3_1_mc_access_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (enable)
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN,
+ BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK |
+ BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK);
+ else
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, 0);
+}
+
+static u32 nbif_v6_3_1_get_memsize(struct amdgpu_device *adev)
+{
+ return RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_RCC_CONFIG_MEMSIZE);
+}
+
+static void nbif_v6_3_1_sdma_doorbell_range(struct amdgpu_device *adev,
+ int instance, bool use_doorbell,
+ int doorbell_index,
+ int doorbell_size)
+{
+ if (instance == 0) {
+ u32 doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_ENABLE,
+ 0x1);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_AWID,
+ 0xe);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_RANGE_OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_RANGE_SIZE,
+ doorbell_size);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_AWADDR_31_28_VALUE,
+ 0x3);
+ } else
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_RANGE_SIZE,
+ 0);
+
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, doorbell_range);
+ }
+}
+
+static void nbif_v6_3_1_vcn_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index,
+ int instance)
+{
+ u32 doorbell_range;
+
+ if (instance)
+ doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL);
+ else
+ doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_ENABLE,
+ 0x1);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_AWID,
+ instance ? 0x7 : 0x4);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_RANGE_OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_RANGE_SIZE,
+ 8);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_AWADDR_31_28_VALUE,
+ instance ? 0x7 : 0x4);
+ } else
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_RANGE_SIZE,
+ 0);
+
+ if (instance)
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL, doorbell_range);
+ else
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, doorbell_range);
+}
+
+static void nbif_v6_3_1_gc_doorbell_init(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_0_CTRL, 0x30000007);
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_3_CTRL, 0x3000000d);
+}
+
+static void nbif_v6_3_1_enable_doorbell_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ WREG32_FIELD15_PREREG(NBIO, 0, RCC_DEV0_EPF0_RCC_DOORBELL_APER_EN,
+ BIF_DOORBELL_APER_EN, enable ? 1 : 0);
+}
+
+static void
+nbif_v6_3_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp = 0;
+
+ if (enable) {
+ tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_EN, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_MODE, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_SIZE, 0);
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW,
+ lower_32_bits(adev->doorbell.base));
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH,
+ upper_32_bits(adev->doorbell.base));
+ }
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, tmp);
+}
+
+static void nbif_v6_3_1_ih_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index)
+{
+ u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL);
+
+ if (use_doorbell) {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_ENABLE,
+ 0x1);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWID,
+ 0x0);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_OFFSET,
+ doorbell_index);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_SIZE,
+ 2);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE,
+ 0x0);
+ } else
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_SIZE,
+ 0);
+
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, ih_doorbell_range);
+}
+
+static void nbif_v6_3_1_ih_control(struct amdgpu_device *adev)
+{
+ u32 interrupt_cntl;
+
+ /* setup interrupt control */
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
+
+ interrupt_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL);
+ /*
+ * BIF_BX0_INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+ * BIF_BX0_INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+ */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL,
+ IH_DUMMY_RD_OVERRIDE, 0);
+
+ /* BIF_BX0_INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL,
+ IH_REQ_NONSNOOP_EN, 0);
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL, interrupt_cntl);
+}
+
+static void
+nbif_v6_3_1_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+}
+
+static void
+nbif_v6_3_1_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+}
+
+static void
+nbif_v6_3_1_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+}
+
+static u32 nbif_v6_3_1_get_hdp_flush_req_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_REQ);
+}
+
+static u32 nbif_v6_3_1_get_hdp_flush_done_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_DONE);
+}
+
+static u32 nbif_v6_3_1_get_pcie_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_INDEX);
+}
+
+static u32 nbif_v6_3_1_get_pcie_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_DATA);
+}
+
+const struct nbio_hdp_flush_reg nbif_v6_3_1_hdp_flush_reg = {
+ .ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK,
+ .ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK,
+ .ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK,
+ .ref_and_mask_cp3 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP3_MASK,
+ .ref_and_mask_cp4 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP4_MASK,
+ .ref_and_mask_cp5 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP5_MASK,
+ .ref_and_mask_cp6 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP6_MASK,
+ .ref_and_mask_cp7 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP7_MASK,
+ .ref_and_mask_cp8 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP8_MASK,
+ .ref_and_mask_cp9 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP9_MASK,
+ .ref_and_mask_sdma0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA0_MASK,
+ .ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK,
+};
+
+static void nbif_v6_3_1_init_registers(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2);
+ data &= ~RCC_DEV0_EPF2_STRAP2__STRAP_NO_SOFT_RESET_DEV0_F2_MASK;
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2, data);
+}
+
+static u32 nbif_v6_3_1_get_rom_offset(struct amdgpu_device *adev)
+{
+ u32 data, rom_offset;
+
+ data = RREG32_SOC15(NBIO, 0, regREGS_ROM_OFFSET_CTRL);
+ rom_offset = REG_GET_FIELD(data, REGS_ROM_OFFSET_CTRL, ROM_OFFSET);
+
+ return rom_offset;
+}
+
+#ifdef CONFIG_PCIEASPM
+static void nbif_v6_3_1_program_ltr(struct amdgpu_device *adev)
+{
+ uint32_t def, data;
+ u16 devctl2;
+
+ def = RREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL);
+ data = 0x35EB;
+ data &= ~RCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL__LTR_PRIV_MSG_DIS_IN_PM_NON_D0_MASK;
+ data &= ~RCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL__LTR_PRIV_RST_LTR_IN_DL_DOWN_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2);
+ data &= ~RCC_STRAP0_RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2, data);
+
+ pcie_capability_read_word(adev->pdev, PCI_EXP_DEVCTL2, &devctl2);
+
+ if (adev->pdev->ltr_path == (devctl2 & PCI_EXP_DEVCTL2_LTR_EN))
+ return;
+
+ if (adev->pdev->ltr_path)
+ pcie_capability_set_word(adev->pdev, PCI_EXP_DEVCTL2, PCI_EXP_DEVCTL2_LTR_EN);
+ else
+ pcie_capability_clear_word(adev->pdev, PCI_EXP_DEVCTL2, PCI_EXP_DEVCTL2_LTR_EN);
+}
+#endif
+
+static void nbif_v6_3_1_program_aspm(struct amdgpu_device *adev)
+{
+#ifdef CONFIG_PCIEASPM
+ uint32_t def, data;
+ u16 devctl2, ltr;
+
+ def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL);
+ data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK;
+ data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
+ data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL, data);
+
+ def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL7);
+ data |= PCIE_LC_CNTL7__LC_NBIF_ASPM_INPUT_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL7, data);
+
+ def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3);
+ data |= PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ if (def != data)
+ WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3);
+ data &= ~RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK;
+ data &= ~RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5);
+ data &= ~RCC_STRAP0_RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data);
+
+ pcie_capability_read_word(adev->pdev, PCI_EXP_DEVCTL2, &devctl2);
+ data = def = devctl2;
+ data &= ~PCI_EXP_DEVCTL2_LTR_EN;
+ if (def != data)
+ pcie_capability_set_word(adev->pdev, PCI_EXP_DEVCTL2, (u16)data);
+
+ ltr = pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_LTR);
+
+ if (ltr) {
+ pci_write_config_dword(adev->pdev, ltr + PCI_LTR_MAX_SNOOP_LAT, 0x10011001);
+ }
+
+#if 0
+ /* regPSWUSP0_PCIE_LC_CNTL2 should be replace by PCIE_LC_CNTL2 or someone else ? */
+ def = data = RREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2);
+ data |= PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK |
+ PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK;
+ data &= ~PSWUSP0_PCIE_LC_CNTL2__LC_RCV_L0_TO_RCV_L0S_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2, data);
+#endif
+ def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL4);
+ data |= PCIE_LC_CNTL4__LC_L1_POWERDOWN_MASK;
+ if (def != data)
+ WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL4, data);
+
+ def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL);
+ data |= PCIE_LC_RXRECOVER_RXSTANDBY_CNTL__LC_RX_L0S_STANDBY_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(PCIE, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL, data);
+
+ nbif_v6_3_1_program_ltr(adev);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3);
+ data |= 0x5DE0 << RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
+ data |= 0x0010 << RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5);
+ data |= 0x0010 << RCC_STRAP0_RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data);
+
+ def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL);
+ data |= 0x0 << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT;
+ data |= 0x9 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
+ data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL, data);
+
+ def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3);
+ data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ if (def != data)
+ WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3, data);
+#endif
+}
+
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+
+static void nbif_v6_3_1_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
+ regBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
+}
+
+const struct amdgpu_nbio_funcs nbif_v6_3_1_funcs = {
+ .get_hdp_flush_req_offset = nbif_v6_3_1_get_hdp_flush_req_offset,
+ .get_hdp_flush_done_offset = nbif_v6_3_1_get_hdp_flush_done_offset,
+ .get_pcie_index_offset = nbif_v6_3_1_get_pcie_index_offset,
+ .get_pcie_data_offset = nbif_v6_3_1_get_pcie_data_offset,
+ .get_rev_id = nbif_v6_3_1_get_rev_id,
+ .mc_access_enable = nbif_v6_3_1_mc_access_enable,
+ .get_memsize = nbif_v6_3_1_get_memsize,
+ .sdma_doorbell_range = nbif_v6_3_1_sdma_doorbell_range,
+ .vcn_doorbell_range = nbif_v6_3_1_vcn_doorbell_range,
+ .gc_doorbell_init = nbif_v6_3_1_gc_doorbell_init,
+ .enable_doorbell_aperture = nbif_v6_3_1_enable_doorbell_aperture,
+ .enable_doorbell_selfring_aperture = nbif_v6_3_1_enable_doorbell_selfring_aperture,
+ .ih_doorbell_range = nbif_v6_3_1_ih_doorbell_range,
+ .update_medium_grain_clock_gating = nbif_v6_3_1_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbif_v6_3_1_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbif_v6_3_1_get_clockgating_state,
+ .ih_control = nbif_v6_3_1_ih_control,
+ .init_registers = nbif_v6_3_1_init_registers,
+ .remap_hdp_registers = nbif_v6_3_1_remap_hdp_registers,
+ .get_rom_offset = nbif_v6_3_1_get_rom_offset,
+ .program_aspm = nbif_v6_3_1_program_aspm,
+ .set_reg_remap = nbif_v6_3_1_set_reg_remap,
+};
+
+
+static int nbif_v6_3_1_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ /* The ras_controller_irq enablement should be done in psp bl when it
+ * tries to enable ras feature. Driver only need to set the correct interrupt
+ * vector for bare-metal and sriov use case respectively
+ */
+ uint32_t bif_doorbell_int_cntl;
+
+ bif_doorbell_int_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL);
+ bif_doorbell_int_cntl = REG_SET_FIELD(bif_doorbell_int_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_DISABLE,
+ (state == AMDGPU_IRQ_STATE_ENABLE) ? 0 : 1);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_int_cntl);
+
+ return 0;
+}
+
+static int nbif_v6_3_1_process_err_event_athub_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ /* By design, the ih cookie for err_event_athub_irq should be written
+ * to bif ring. since bif ring is not enabled, just leave process callback
+ * as a dummy one.
+ */
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs nbif_v6_3_1_ras_err_event_athub_irq_funcs = {
+ .set = nbif_v6_3_1_set_ras_err_event_athub_irq_state,
+ .process = nbif_v6_3_1_process_err_event_athub_irq,
+};
+
+static void nbif_v6_3_1_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev)
+{
+ uint32_t bif_doorbell_int_cntl;
+
+ bif_doorbell_int_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL);
+ if (REG_GET_FIELD(bif_doorbell_int_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) {
+ /* driver has to clear the interrupt status when bif ring is disabled */
+ bif_doorbell_int_cntl = REG_SET_FIELD(bif_doorbell_int_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_int_cntl);
+ amdgpu_ras_global_ras_isr(adev);
+ }
+}
+
+static int nbif_v6_3_1_init_ras_err_event_athub_interrupt(struct amdgpu_device *adev)
+{
+ int r;
+
+ /* init the irq funcs */
+ adev->nbio.ras_err_event_athub_irq.funcs =
+ &nbif_v6_3_1_ras_err_event_athub_irq_funcs;
+ adev->nbio.ras_err_event_athub_irq.num_types = 1;
+
+ /* register ras err event athub interrupt
+ * nbif v6_3_1 uses the same irq source as nbio v7_4
+ */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_BIF,
+ NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT,
+ &adev->nbio.ras_err_event_athub_irq);
+
+ return r;
+}
+
+struct amdgpu_nbio_ras nbif_v6_3_1_ras = {
+ .handle_ras_err_event_athub_intr_no_bifring =
+ nbif_v6_3_1_handle_ras_err_event_athub_intr_no_bifring,
+ .init_ras_err_event_athub_interrupt =
+ nbif_v6_3_1_init_ras_err_event_athub_interrupt,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h
new file mode 100644
index 000000000000..3afec715a9fe
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NBIO_V6_3_1_H__
+#define __NBIO_V6_3_1_H__
+
+#include "soc15_common.h"
+
+extern const struct nbio_hdp_flush_reg nbif_v6_3_1_hdp_flush_reg;
+extern const struct amdgpu_nbio_funcs nbif_v6_3_1_funcs;
+extern struct amdgpu_nbio_ras nbif_v6_3_1_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
index ee7cab37dfd5..04041b398781 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -21,13 +21,13 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbio_v2_3.h"
#include "nbio/nbio_2_3_default.h"
#include "nbio/nbio_2_3_offset.h"
#include "nbio/nbio_2_3_sh_mask.h"
#include <uapi/linux/kfd_ioctl.h>
+#include <linux/device.h>
#include <linux/pci.h>
#define smnPCIE_CONFIG_CNTL 0x11180044
@@ -278,7 +278,7 @@ static void nbio_v2_3_update_medium_grain_light_sleep(struct amdgpu_device *adev
}
static void nbio_v2_3_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
int data;
@@ -328,27 +328,6 @@ const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg = {
.ref_and_mask_sdma1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA1_MASK,
};
-const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg_sc = {
- .ref_and_mask_cp0 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP0_MASK,
- .ref_and_mask_cp1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP1_MASK,
- .ref_and_mask_cp2 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP2_MASK,
- .ref_and_mask_cp3 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP3_MASK,
- .ref_and_mask_cp4 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP4_MASK,
- .ref_and_mask_cp5 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP5_MASK,
- .ref_and_mask_cp6 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP6_MASK,
- .ref_and_mask_cp7 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP7_MASK,
- .ref_and_mask_cp8 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP8_MASK,
- .ref_and_mask_cp9 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP9_MASK,
- .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK,
- .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK,
- .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK,
- .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK,
- .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK,
- .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK,
- .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK,
- .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK,
-};
-
static void nbio_v2_3_init_registers(struct amdgpu_device *adev)
{
uint32_t def, data;
@@ -359,15 +338,11 @@ static void nbio_v2_3_init_registers(struct amdgpu_device *adev)
if (def != data)
WREG32_PCIE(smnPCIE_CONFIG_CNTL, data);
-
- if (amdgpu_sriov_vf(adev))
- adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
- mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
}
#define NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT 0x00000000 // off by default, no gains over L1
-#define NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT 0x00000009 // 1=1us, 9=1ms
-#define NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT 0x0000000E // 4ms
+#define NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT 0x0000000A // 1=1us, 9=1ms, 10=4ms
+#define NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT 0x0000000E // 400ms
static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev,
bool enable)
@@ -382,14 +357,14 @@ static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev,
data |= NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT;
- if (pci_is_thunderbolt_attached(adev->pdev))
+ if (dev_is_removable(&adev->pdev->dev))
data |= NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
else
data |= NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
} else {
- /* Disbale ASPM L1 */
+ /* Disable ASPM L1 */
data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK;
/* Disable ASPM TxL0s */
data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
@@ -401,6 +376,7 @@ static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev,
WREG32_PCIE(smnPCIE_LC_CNTL, data);
}
+#ifdef CONFIG_PCIEASPM
static void nbio_v2_3_program_ltr(struct amdgpu_device *adev)
{
uint32_t def, data;
@@ -422,9 +398,11 @@ static void nbio_v2_3_program_ltr(struct amdgpu_device *adev)
if (def != data)
WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
}
+#endif
static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
{
+#ifdef CONFIG_PCIEASPM
uint32_t def, data;
def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
@@ -480,7 +458,10 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
if (def != data)
WREG32_PCIE(smnPCIE_LC_CNTL6, data);
- nbio_v2_3_program_ltr(adev);
+ /* Don't bother about LTR if LTR is not enabled
+ * in the path */
+ if (adev->pdev->ltr_path)
+ nbio_v2_3_program_ltr(adev);
def = data = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP3);
data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
@@ -494,9 +475,12 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
WREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP5, data);
def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
- data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
- data |= 0x9 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
- data |= 0x1 << PCIE_LC_CNTL__LC_PMI_TO_L1_DIS__SHIFT;
+ data |= NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT;
+ if (dev_is_removable(&adev->pdev->dev))
+ data |= NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
+ else
+ data |= NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
+ data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
if (def != data)
WREG32_PCIE(smnPCIE_LC_CNTL, data);
@@ -504,6 +488,7 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
if (def != data)
WREG32_PCIE(smnPCIE_LC_CNTL3, data);
+#endif
}
static void nbio_v2_3_apply_lc_spc_mode_wa(struct amdgpu_device *adev)
@@ -547,7 +532,7 @@ static void nbio_v2_3_clear_doorbell_interrupt(struct amdgpu_device *adev)
{
uint32_t reg, reg_data;
- if (adev->asic_type != CHIP_SIENNA_CICHLID)
+ if (amdgpu_ip_version(adev, NBIO_HWIP, 0) != IP_VERSION(3, 3, 0))
return;
reg = RREG32_SOC15(NBIO, 0, mmBIF_RB_CNTL);
@@ -563,6 +548,20 @@ static void nbio_v2_3_clear_doorbell_interrupt(struct amdgpu_device *adev)
}
}
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+
+static void nbio_v2_3_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
+}
+
const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
.get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset,
@@ -587,4 +586,5 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
.apply_lc_spc_mode_wa = nbio_v2_3_apply_lc_spc_mode_wa,
.apply_l1_link_width_reconfig_wa = nbio_v2_3_apply_l1_link_width_reconfig_wa,
.clear_doorbell_interrupt = nbio_v2_3_clear_doorbell_interrupt,
+ .set_reg_remap = nbio_v2_3_set_reg_remap,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h
index 6074dd3a1ed8..a43b60acf7f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h
@@ -27,7 +27,6 @@
#include "soc15_common.h"
extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg;
-extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg_sc;
extern const struct amdgpu_nbio_funcs nbio_v2_3_funcs;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
new file mode 100644
index 000000000000..f89e5f40e1a5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
@@ -0,0 +1,634 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "nbio_v4_3.h"
+
+#include "nbio/nbio_4_3_0_offset.h"
+#include "nbio/nbio_4_3_0_sh_mask.h"
+#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static void nbio_v4_3_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
+static u32 nbio_v4_3_get_rev_id(struct amdgpu_device *adev)
+{
+ u32 tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0);
+
+ tmp &= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
+ tmp >>= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
+
+ return tmp;
+}
+
+static void nbio_v4_3_mc_access_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (enable)
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN,
+ BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK |
+ BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK);
+ else
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, 0);
+}
+
+static u32 nbio_v4_3_get_memsize(struct amdgpu_device *adev)
+{
+ return RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_RCC_CONFIG_MEMSIZE);
+}
+
+static void nbio_v4_3_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index,
+ int doorbell_size)
+{
+ if (instance == 0) {
+ u32 doorbell_range = RREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_2_CTRL);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_ENABLE,
+ 0x1);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_AWID,
+ 0xe);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_RANGE_OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_RANGE_SIZE,
+ doorbell_size);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_AWADDR_31_28_VALUE,
+ 0x3);
+ } else
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_RANGE_SIZE,
+ 0);
+
+ WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_2_CTRL, doorbell_range);
+ }
+}
+
+static void nbio_v4_3_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell,
+ int doorbell_index, int instance)
+{
+ u32 doorbell_range;
+
+ if (instance)
+ doorbell_range = RREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_5_CTRL);
+ else
+ doorbell_range = RREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_4_CTRL);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_ENABLE,
+ 0x1);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_AWID,
+ instance ? 0x7 : 0x4);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_RANGE_OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_RANGE_SIZE,
+ 8);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_AWADDR_31_28_VALUE,
+ instance ? 0x7 : 0x4);
+ } else
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_RANGE_SIZE,
+ 0);
+
+ if (instance)
+ WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_5_CTRL, doorbell_range);
+ else
+ WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_4_CTRL, doorbell_range);
+}
+
+static void nbio_v4_3_gc_doorbell_init(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_0_CTRL, 0x30000007);
+ WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_3_CTRL, 0x3000000d);
+}
+
+static void nbio_v4_3_enable_doorbell_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ WREG32_FIELD15_PREREG(NBIO, 0, RCC_DEV0_EPF0_RCC_DOORBELL_APER_EN,
+ BIF_DOORBELL_APER_EN, enable ? 1 : 0);
+}
+
+static void nbio_v4_3_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp = 0;
+
+ if (enable) {
+ tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_EN, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_MODE, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_SIZE, 0);
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW,
+ lower_32_bits(adev->doorbell.base));
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH,
+ upper_32_bits(adev->doorbell.base));
+ }
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ tmp);
+}
+
+static void nbio_v4_3_ih_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index)
+{
+ u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_1_CTRL);
+
+ if (use_doorbell) {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_ENABLE,
+ 0x1);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWID,
+ 0x0);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_OFFSET,
+ doorbell_index);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_SIZE,
+ 2);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE,
+ 0x0);
+ } else
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_SIZE,
+ 0);
+
+ WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_1_CTRL, ih_doorbell_range);
+}
+
+static void nbio_v4_3_ih_control(struct amdgpu_device *adev)
+{
+ u32 interrupt_cntl;
+
+ /* setup interrupt control */
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
+
+ interrupt_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL);
+ /*
+ * BIF_BX0_INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+ * BIF_BX0_INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+ */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL,
+ IH_DUMMY_RD_OVERRIDE, 0);
+
+ /* BIF_BX0_INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL,
+ IH_REQ_NONSNOOP_EN, 0);
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL, interrupt_cntl);
+}
+
+static void nbio_v4_3_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
+ return;
+
+ def = data = RREG32_SOC15(NBIO, 0, regCPM_CONTROL);
+ if (enable) {
+ data |= (CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ } else {
+ data &= ~(CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regCPM_CONTROL, data);
+}
+
+static void nbio_v4_3_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
+ return;
+
+ /* TODO: need update in future */
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_CNTL2);
+ if (enable) {
+ data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+ } else {
+ data &= ~PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+ }
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_CNTL2, data);
+}
+
+static void nbio_v4_3_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ int data;
+
+ /* AMD_CG_SUPPORT_BIF_MGCG */
+ data = RREG32_SOC15(NBIO, 0, regCPM_CONTROL);
+ if (data & CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_MGCG;
+
+ /* AMD_CG_SUPPORT_BIF_LS */
+ data = RREG32_SOC15(NBIO, 0, regPCIE_CNTL2);
+ if (data & PCIE_CNTL2__SLV_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_LS;
+}
+
+static u32 nbio_v4_3_get_hdp_flush_req_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_REQ);
+}
+
+static u32 nbio_v4_3_get_hdp_flush_done_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_DONE);
+}
+
+static u32 nbio_v4_3_get_pcie_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_INDEX);
+}
+
+static u32 nbio_v4_3_get_pcie_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_DATA);
+}
+
+const struct nbio_hdp_flush_reg nbio_v4_3_hdp_flush_reg = {
+ .ref_and_mask_cp0 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP0_MASK,
+ .ref_and_mask_cp1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP1_MASK,
+ .ref_and_mask_cp2 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP2_MASK,
+ .ref_and_mask_cp3 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP3_MASK,
+ .ref_and_mask_cp4 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP4_MASK,
+ .ref_and_mask_cp5 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP5_MASK,
+ .ref_and_mask_cp6 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP6_MASK,
+ .ref_and_mask_cp7 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP7_MASK,
+ .ref_and_mask_cp8 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP8_MASK,
+ .ref_and_mask_cp9 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP9_MASK,
+ .ref_and_mask_sdma0 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA0_MASK,
+ .ref_and_mask_sdma1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA1_MASK,
+};
+
+static void nbio_v4_3_init_registers(struct amdgpu_device *adev)
+{
+ if (amdgpu_ip_version(adev, NBIO_HWIP, 0) == IP_VERSION(4, 3, 0)) {
+ uint32_t data;
+
+ data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2);
+ data &= ~RCC_DEV0_EPF2_STRAP2__STRAP_NO_SOFT_RESET_DEV0_F2_MASK;
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2, data);
+ }
+}
+
+static u32 nbio_v4_3_get_rom_offset(struct amdgpu_device *adev)
+{
+ u32 data, rom_offset;
+
+ data = RREG32_SOC15(NBIO, 0, regREGS_ROM_OFFSET_CTRL);
+ rom_offset = REG_GET_FIELD(data, REGS_ROM_OFFSET_CTRL, ROM_OFFSET);
+
+ return rom_offset;
+}
+
+#ifdef CONFIG_PCIEASPM
+static void nbio_v4_3_program_ltr(struct amdgpu_device *adev)
+{
+ uint32_t def, data;
+
+ def = RREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL);
+ data = 0x35EB;
+ data &= ~EP_PCIE_TX_LTR_CNTL__LTR_PRIV_MSG_DIS_IN_PM_NON_D0_MASK;
+ data &= ~EP_PCIE_TX_LTR_CNTL__LTR_PRIV_RST_LTR_IN_DL_DOWN_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2);
+ data &= ~RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2);
+ if (adev->pdev->ltr_path)
+ data |= BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
+ else
+ data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
+}
+#endif
+
+static void nbio_v4_3_program_aspm(struct amdgpu_device *adev)
+{
+#ifdef CONFIG_PCIEASPM
+ uint32_t def, data;
+
+ if (!(amdgpu_ip_version(adev, PCIE_HWIP, 0) == IP_VERSION(7, 4, 0)) &&
+ !(amdgpu_ip_version(adev, PCIE_HWIP, 0) == IP_VERSION(7, 6, 0)))
+ return;
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL);
+ data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK;
+ data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
+ data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL7);
+ data |= PCIE_LC_CNTL7__LC_NBIF_ASPM_INPUT_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL7, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3);
+ data |= PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3);
+ data &= ~RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK;
+ data &= ~RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5);
+ data &= ~RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2);
+ data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
+
+ WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_PCIE_LTR_CAP, 0x10011001);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2);
+ data |= PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK |
+ PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK;
+ data &= ~PSWUSP0_PCIE_LC_CNTL2__LC_RCV_L0_TO_RCV_L0S_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL4);
+ data |= PCIE_LC_CNTL4__LC_L1_POWERDOWN_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL4, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL);
+ data |= PCIE_LC_RXRECOVER_RXSTANDBY_CNTL__LC_RX_L0S_STANDBY_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL, data);
+
+ nbio_v4_3_program_ltr(adev);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3);
+ data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
+ data |= 0x0010 << RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5);
+ data |= 0x0010 << RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL);
+ data |= 0x0 << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT;
+ data |= 0x9 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
+ data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3);
+ data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3, data);
+#endif
+}
+
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+
+static void nbio_v4_3_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
+ regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
+}
+
+const struct amdgpu_nbio_funcs nbio_v4_3_funcs = {
+ .get_hdp_flush_req_offset = nbio_v4_3_get_hdp_flush_req_offset,
+ .get_hdp_flush_done_offset = nbio_v4_3_get_hdp_flush_done_offset,
+ .get_pcie_index_offset = nbio_v4_3_get_pcie_index_offset,
+ .get_pcie_data_offset = nbio_v4_3_get_pcie_data_offset,
+ .get_rev_id = nbio_v4_3_get_rev_id,
+ .mc_access_enable = nbio_v4_3_mc_access_enable,
+ .get_memsize = nbio_v4_3_get_memsize,
+ .sdma_doorbell_range = nbio_v4_3_sdma_doorbell_range,
+ .vcn_doorbell_range = nbio_v4_3_vcn_doorbell_range,
+ .gc_doorbell_init = nbio_v4_3_gc_doorbell_init,
+ .enable_doorbell_aperture = nbio_v4_3_enable_doorbell_aperture,
+ .enable_doorbell_selfring_aperture = nbio_v4_3_enable_doorbell_selfring_aperture,
+ .ih_doorbell_range = nbio_v4_3_ih_doorbell_range,
+ .update_medium_grain_clock_gating = nbio_v4_3_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbio_v4_3_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbio_v4_3_get_clockgating_state,
+ .ih_control = nbio_v4_3_ih_control,
+ .init_registers = nbio_v4_3_init_registers,
+ .remap_hdp_registers = nbio_v4_3_remap_hdp_registers,
+ .get_rom_offset = nbio_v4_3_get_rom_offset,
+ .program_aspm = nbio_v4_3_program_aspm,
+ .set_reg_remap = nbio_v4_3_set_reg_remap,
+};
+
+
+static void nbio_v4_3_sriov_ih_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index)
+{
+}
+
+static void nbio_v4_3_sriov_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index,
+ int doorbell_size)
+{
+}
+
+static void nbio_v4_3_sriov_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell,
+ int doorbell_index, int instance)
+{
+}
+
+static void nbio_v4_3_sriov_gc_doorbell_init(struct amdgpu_device *adev)
+{
+}
+
+const struct amdgpu_nbio_funcs nbio_v4_3_sriov_funcs = {
+ .get_hdp_flush_req_offset = nbio_v4_3_get_hdp_flush_req_offset,
+ .get_hdp_flush_done_offset = nbio_v4_3_get_hdp_flush_done_offset,
+ .get_pcie_index_offset = nbio_v4_3_get_pcie_index_offset,
+ .get_pcie_data_offset = nbio_v4_3_get_pcie_data_offset,
+ .get_rev_id = nbio_v4_3_get_rev_id,
+ .mc_access_enable = nbio_v4_3_mc_access_enable,
+ .get_memsize = nbio_v4_3_get_memsize,
+ .sdma_doorbell_range = nbio_v4_3_sriov_sdma_doorbell_range,
+ .vcn_doorbell_range = nbio_v4_3_sriov_vcn_doorbell_range,
+ .gc_doorbell_init = nbio_v4_3_sriov_gc_doorbell_init,
+ .enable_doorbell_aperture = nbio_v4_3_enable_doorbell_aperture,
+ .enable_doorbell_selfring_aperture = nbio_v4_3_enable_doorbell_selfring_aperture,
+ .ih_doorbell_range = nbio_v4_3_sriov_ih_doorbell_range,
+ .update_medium_grain_clock_gating = nbio_v4_3_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbio_v4_3_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbio_v4_3_get_clockgating_state,
+ .ih_control = nbio_v4_3_ih_control,
+ .init_registers = nbio_v4_3_init_registers,
+ .remap_hdp_registers = nbio_v4_3_remap_hdp_registers,
+ .get_rom_offset = nbio_v4_3_get_rom_offset,
+ .set_reg_remap = nbio_v4_3_set_reg_remap,
+};
+
+static int nbio_v4_3_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ /* The ras_controller_irq enablement should be done in psp bl when it
+ * tries to enable ras feature. Driver only need to set the correct interrupt
+ * vector for bare-metal and sriov use case respectively
+ */
+ uint32_t bif_doorbell_int_cntl;
+
+ bif_doorbell_int_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL);
+ bif_doorbell_int_cntl = REG_SET_FIELD(bif_doorbell_int_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_DISABLE,
+ (state == AMDGPU_IRQ_STATE_ENABLE) ? 0 : 1);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_int_cntl);
+
+ return 0;
+}
+
+static int nbio_v4_3_process_err_event_athub_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ /* By design, the ih cookie for err_event_athub_irq should be written
+ * to bif ring. since bif ring is not enabled, just leave process callback
+ * as a dummy one.
+ */
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs nbio_v4_3_ras_err_event_athub_irq_funcs = {
+ .set = nbio_v4_3_set_ras_err_event_athub_irq_state,
+ .process = nbio_v4_3_process_err_event_athub_irq,
+};
+
+static void nbio_v4_3_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev)
+{
+ uint32_t bif_doorbell_int_cntl;
+
+ bif_doorbell_int_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL);
+ if (REG_GET_FIELD(bif_doorbell_int_cntl,
+ BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) {
+ /* driver has to clear the interrupt status when bif ring is disabled */
+ bif_doorbell_int_cntl = REG_SET_FIELD(bif_doorbell_int_cntl,
+ BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_int_cntl);
+ amdgpu_ras_global_ras_isr(adev);
+ }
+}
+
+static int nbio_v4_3_init_ras_err_event_athub_interrupt(struct amdgpu_device *adev)
+{
+
+ int r;
+
+ /* init the irq funcs */
+ adev->nbio.ras_err_event_athub_irq.funcs =
+ &nbio_v4_3_ras_err_event_athub_irq_funcs;
+ adev->nbio.ras_err_event_athub_irq.num_types = 1;
+
+ /* register ras err event athub interrupt
+ * nbio v4_3 uses the same irq source as nbio v7_4 */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_BIF,
+ NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT,
+ &adev->nbio.ras_err_event_athub_irq);
+
+ return r;
+}
+
+struct amdgpu_nbio_ras nbio_v4_3_ras = {
+ .handle_ras_err_event_athub_intr_no_bifring = nbio_v4_3_handle_ras_err_event_athub_intr_no_bifring,
+ .init_ras_err_event_athub_interrupt = nbio_v4_3_init_ras_err_event_athub_interrupt,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.h b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.h
new file mode 100644
index 000000000000..399037cdf4fb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NBIO_V4_3_H__
+#define __NBIO_V4_3_H__
+
+#include "soc15_common.h"
+
+extern const struct nbio_hdp_flush_reg nbio_v4_3_hdp_flush_reg;
+extern const struct amdgpu_nbio_funcs nbio_v4_3_funcs;
+extern const struct amdgpu_nbio_funcs nbio_v4_3_sriov_funcs;
+extern struct amdgpu_nbio_ras nbio_v4_3_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
index 4bbacf1be25a..e911368c1aeb 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbio_v6_1.h"
#include "nbio/nbio_6_1_default.h"
@@ -210,7 +209,7 @@ static void nbio_v6_1_update_medium_grain_light_sleep(struct amdgpu_device *adev
}
static void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
int data;
@@ -276,12 +275,9 @@ static void nbio_v6_1_init_registers(struct amdgpu_device *adev)
if (def != data)
WREG32_PCIE(smnPCIE_CI_CNTL, data);
-
- if (amdgpu_sriov_vf(adev))
- adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
- mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
}
+#ifdef CONFIG_PCIEASPM
static void nbio_v6_1_program_ltr(struct amdgpu_device *adev)
{
uint32_t def, data;
@@ -303,9 +299,11 @@ static void nbio_v6_1_program_ltr(struct amdgpu_device *adev)
if (def != data)
WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
}
+#endif
static void nbio_v6_1_program_aspm(struct amdgpu_device *adev)
{
+#ifdef CONFIG_PCIEASPM
uint32_t def, data;
def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
@@ -361,7 +359,10 @@ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev)
if (def != data)
WREG32_PCIE(smnPCIE_LC_CNTL6, data);
- nbio_v6_1_program_ltr(adev);
+ /* Don't bother about LTR if LTR is not enabled
+ * in the path */
+ if (adev->pdev->ltr_path)
+ nbio_v6_1_program_ltr(adev);
def = data = RREG32_PCIE(smnRCC_BIF_STRAP3);
data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
@@ -385,6 +386,22 @@ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev)
data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
if (def != data)
WREG32_PCIE(smnPCIE_LC_CNTL3, data);
+#endif
+}
+
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+
+static void nbio_v6_1_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset =
+ SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
}
const struct amdgpu_nbio_funcs nbio_v6_1_funcs = {
@@ -405,5 +422,6 @@ const struct amdgpu_nbio_funcs nbio_v6_1_funcs = {
.ih_control = nbio_v6_1_ih_control,
.init_registers = nbio_v6_1_init_registers,
.remap_hdp_registers = nbio_v6_1_remap_hdp_registers,
- .program_aspm = nbio_v6_1_program_aspm,
+ .program_aspm = nbio_v6_1_program_aspm,
+ .set_reg_remap = nbio_v6_1_set_reg_remap,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
index 37a4039fdfc5..1569a1e934ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbio_v7_0.h"
#include "nbio/nbio_7_0_default.h"
@@ -205,7 +204,7 @@ static void nbio_v7_0_update_medium_grain_light_sleep(struct amdgpu_device *adev
}
static void nbio_v7_0_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
int data;
@@ -271,11 +270,33 @@ const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg = {
.ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK,
};
+#define regRCC_DEV0_EPF6_STRAP4 0xd304
+#define regRCC_DEV0_EPF6_STRAP4_BASE_IDX 5
+
static void nbio_v7_0_init_registers(struct amdgpu_device *adev)
{
- if (amdgpu_sriov_vf(adev))
+ uint32_t data;
+
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(2, 5, 0):
+ data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF6_STRAP4) & ~BIT(23);
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF6_STRAP4, data);
+ break;
+ }
+}
+
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+
+static void nbio_v7_0_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
adev->rmmio_remap.reg_offset =
SOC15_REG_OFFSET(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
}
const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
@@ -297,4 +318,5 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
.ih_control = nbio_v7_0_ih_control,
.init_registers = nbio_v7_0_init_registers,
.remap_hdp_registers = nbio_v7_0_remap_hdp_registers,
+ .set_reg_remap = nbio_v7_0_set_reg_remap,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
new file mode 100644
index 000000000000..bed5ef4d8788
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
@@ -0,0 +1,400 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "nbio_v7_11.h"
+
+#include "nbio/nbio_7_11_0_offset.h"
+#include "nbio/nbio_7_11_0_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static void nbio_v7_11_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
+static u32 nbio_v7_11_get_rev_id(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP1_RCC_DEV0_EPF0_STRAP0);
+ tmp &= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
+ tmp >>= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
+
+ return tmp;
+}
+
+static void nbio_v7_11_mc_access_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (enable)
+ WREG32_SOC15(NBIO, 0, regBIF_BX1_BIF_FB_EN,
+ BIF_BX1_BIF_FB_EN__FB_READ_EN_MASK |
+ BIF_BX1_BIF_FB_EN__FB_WRITE_EN_MASK);
+ else
+ WREG32_SOC15(NBIO, 0, regBIF_BX1_BIF_FB_EN, 0);
+}
+
+static u32 nbio_v7_11_get_memsize(struct amdgpu_device *adev)
+{
+ return RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_CONFIG_MEMSIZE);
+}
+
+static void nbio_v7_11_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index,
+ int doorbell_size)
+{
+ u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_CSDMA_DOORBELL_RANGE);
+ u32 doorbell_range = RREG32_PCIE_PORT(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_CSDMA_DOORBELL_RANGE,
+ OFFSET, doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_CSDMA_DOORBELL_RANGE,
+ SIZE, doorbell_size);
+ } else {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_CSDMA_DOORBELL_RANGE,
+ SIZE, 0);
+ }
+
+ WREG32_PCIE_PORT(reg, doorbell_range);
+}
+
+static void nbio_v7_11_vpe_doorbell_range(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index,
+ int doorbell_size)
+{
+ u32 reg = instance == 0 ?
+ SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VPE_DOORBELL_RANGE) :
+ SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VPE1_DOORBELL_RANGE);
+ u32 doorbell_range = RREG32_PCIE_PORT(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VPE_DOORBELL_RANGE,
+ OFFSET, doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VPE_DOORBELL_RANGE,
+ SIZE, doorbell_size);
+ } else {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VPE_DOORBELL_RANGE,
+ SIZE, 0);
+ }
+
+ WREG32_PCIE_PORT(reg, doorbell_range);
+}
+
+static void nbio_v7_11_vcn_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell,
+ int doorbell_index, int instance)
+{
+ u32 reg = instance == 0 ?
+ SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VCN0_DOORBELL_RANGE):
+ SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VCN1_DOORBELL_RANGE);
+
+ u32 doorbell_range = RREG32_PCIE_PORT(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VCN0_DOORBELL_RANGE, OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 8);
+ } else {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 0);
+ }
+
+ WREG32_PCIE_PORT(reg, doorbell_range);
+}
+
+static void nbio_v7_11_enable_doorbell_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 reg;
+
+
+ reg = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN);
+ reg = REG_SET_FIELD(reg, RCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN,
+ BIF_DOORBELL_APER_EN, enable ? 1 : 0);
+
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN, reg);
+}
+
+static void nbio_v7_11_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp = 0;
+
+ if (enable) {
+ tmp = REG_SET_FIELD(tmp, BIF_BX_PF1_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_EN, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF1_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_MODE, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF1_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_SIZE, 0);
+
+ WREG32_SOC15(NBIO, 0,
+ regBIF_BX_PF1_DOORBELL_SELFRING_GPA_APER_BASE_LOW,
+ lower_32_bits(adev->doorbell.base));
+ WREG32_SOC15(NBIO, 0,
+ regBIF_BX_PF1_DOORBELL_SELFRING_GPA_APER_BASE_HIGH,
+ upper_32_bits(adev->doorbell.base));
+ }
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF1_DOORBELL_SELFRING_GPA_APER_CNTL, tmp);
+}
+
+
+static void nbio_v7_11_ih_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index)
+{
+ u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0,regGDC0_BIF_IH_DOORBELL_RANGE);
+
+ if (use_doorbell) {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC0_BIF_IH_DOORBELL_RANGE, OFFSET,
+ doorbell_index);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC0_BIF_IH_DOORBELL_RANGE, SIZE,
+ 2);
+ } else {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC0_BIF_IH_DOORBELL_RANGE, SIZE,
+ 0);
+ }
+
+ WREG32_SOC15(NBIO, 0, regGDC0_BIF_IH_DOORBELL_RANGE,
+ ih_doorbell_range);
+}
+
+static void nbio_v7_11_ih_control(struct amdgpu_device *adev)
+{
+ u32 interrupt_cntl;
+
+ /* setup interrupt control */
+ WREG32_SOC15(NBIO, 0, regBIF_BX1_INTERRUPT_CNTL2,
+ adev->dummy_page_addr >> 8);
+
+ interrupt_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX1_INTERRUPT_CNTL);
+ /*
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+ */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX1_INTERRUPT_CNTL,
+ IH_DUMMY_RD_OVERRIDE, 0);
+
+ /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX1_INTERRUPT_CNTL,
+ IH_REQ_NONSNOOP_EN, 0);
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX1_INTERRUPT_CNTL, interrupt_cntl);
+}
+
+static u32 nbio_v7_11_get_hdp_flush_req_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF1_GPU_HDP_FLUSH_REQ);
+}
+
+static u32 nbio_v7_11_get_hdp_flush_done_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF1_GPU_HDP_FLUSH_DONE);
+}
+
+static u32 nbio_v7_11_get_pcie_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX1_PCIE_INDEX2);
+}
+
+static u32 nbio_v7_11_get_pcie_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX1_PCIE_DATA2);
+}
+
+static u32 nbio_v7_11_get_pcie_port_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF1_RSMU_INDEX);
+}
+
+static u32 nbio_v7_11_get_pcie_port_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF1_RSMU_DATA);
+}
+
+const struct nbio_hdp_flush_reg nbio_v7_11_hdp_flush_reg = {
+ .ref_and_mask_cp0 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP0_MASK,
+ .ref_and_mask_cp1 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP1_MASK,
+ .ref_and_mask_cp2 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP2_MASK,
+ .ref_and_mask_cp3 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP3_MASK,
+ .ref_and_mask_cp4 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP4_MASK,
+ .ref_and_mask_cp5 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP5_MASK,
+ .ref_and_mask_cp6 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP6_MASK,
+ .ref_and_mask_cp7 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP7_MASK,
+ .ref_and_mask_cp8 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP8_MASK,
+ .ref_and_mask_cp9 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP9_MASK,
+ .ref_and_mask_sdma0 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__SDMA0_MASK,
+ .ref_and_mask_sdma1 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__SDMA1_MASK,
+};
+
+static void nbio_v7_11_init_registers(struct amdgpu_device *adev)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3);
+ data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3,
+ CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1);
+ data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3,
+ CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1);
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, data);
+
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(7, 11, 0):
+ case IP_VERSION(7, 11, 1):
+ case IP_VERSION(7, 11, 2):
+ case IP_VERSION(7, 11, 3):
+ data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4) & ~BIT(23);
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4, data);
+ break;
+ }
+}
+
+static void nbio_v7_11_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
+ return;
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL);
+ if (enable) {
+ data |= (BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ } else {
+ data &= ~(BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL, data);
+}
+
+static void nbio_v7_11_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
+ return;
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2);
+ if (enable)
+ data |= BIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+ else
+ data &= ~BIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1);
+ if (enable) {
+ data |= (BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK);
+ } else {
+ data &= ~(BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1, data);
+}
+
+static void nbio_v7_11_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ uint32_t data;
+
+ /* AMD_CG_SUPPORT_BIF_MGCG */
+ data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL);
+ if (data & BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_MGCG;
+
+ /* AMD_CG_SUPPORT_BIF_LS */
+ data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2);
+ if (data & BIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2__SLV_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_LS;
+}
+
+#define MMIO_REG_HOLE_OFFSET 0x44000
+
+static void nbio_v7_11_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset =
+ SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF1_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
+}
+
+const struct amdgpu_nbio_funcs nbio_v7_11_funcs = {
+ .get_hdp_flush_req_offset = nbio_v7_11_get_hdp_flush_req_offset,
+ .get_hdp_flush_done_offset = nbio_v7_11_get_hdp_flush_done_offset,
+ .get_pcie_index_offset = nbio_v7_11_get_pcie_index_offset,
+ .get_pcie_data_offset = nbio_v7_11_get_pcie_data_offset,
+ .get_pcie_port_index_offset = nbio_v7_11_get_pcie_port_index_offset,
+ .get_pcie_port_data_offset = nbio_v7_11_get_pcie_port_data_offset,
+ .get_rev_id = nbio_v7_11_get_rev_id,
+ .mc_access_enable = nbio_v7_11_mc_access_enable,
+ .get_memsize = nbio_v7_11_get_memsize,
+ .sdma_doorbell_range = nbio_v7_11_sdma_doorbell_range,
+ .vcn_doorbell_range = nbio_v7_11_vcn_doorbell_range,
+ .vpe_doorbell_range = nbio_v7_11_vpe_doorbell_range,
+ .enable_doorbell_aperture = nbio_v7_11_enable_doorbell_aperture,
+ .enable_doorbell_selfring_aperture = nbio_v7_11_enable_doorbell_selfring_aperture,
+ .ih_doorbell_range = nbio_v7_11_ih_doorbell_range,
+ .update_medium_grain_clock_gating = nbio_v7_11_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbio_v7_11_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbio_v7_11_get_clockgating_state,
+ .ih_control = nbio_v7_11_ih_control,
+ .init_registers = nbio_v7_11_init_registers,
+ .remap_hdp_registers = nbio_v7_11_remap_hdp_registers,
+ .set_reg_remap = nbio_v7_11_set_reg_remap,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.h
new file mode 100644
index 000000000000..9d8258ed3f0a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NBIO_V7_11_H__
+#define __NBIO_V7_11_H__
+
+#include "soc15_common.h"
+
+extern const struct nbio_hdp_flush_reg nbio_v7_11_hdp_flush_reg;
+extern const struct amdgpu_nbio_funcs nbio_v7_11_funcs;
+extern const struct amdgpu_nbio_ras_funcs nbio_v7_11_ras_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
index 3444332ea110..acc5f363684a 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbio_v7_2.h"
#include "nbio/nbio_7_2_0_offset.h"
@@ -59,10 +58,16 @@ static u32 nbio_v7_2_get_rev_id(struct amdgpu_device *adev)
{
u32 tmp;
- if (adev->asic_type == CHIP_YELLOW_CARP)
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(7, 2, 1):
+ case IP_VERSION(7, 3, 0):
+ case IP_VERSION(7, 5, 0):
tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0_YC);
- else
+ break;
+ default:
tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0);
+ break;
+ }
tmp &= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
tmp >>= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
@@ -72,20 +77,26 @@ static u32 nbio_v7_2_get_rev_id(struct amdgpu_device *adev)
static void nbio_v7_2_mc_access_enable(struct amdgpu_device *adev, bool enable)
{
- if (enable)
- if (adev->asic_type == CHIP_YELLOW_CARP)
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(7, 2, 1):
+ case IP_VERSION(7, 3, 0):
+ case IP_VERSION(7, 5, 0):
+ if (enable)
WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN_YC,
BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK |
BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK);
else
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN_YC, 0);
+ break;
+ default:
+ if (enable)
WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN,
BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK |
BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK);
- else
- if (adev->asic_type == CHIP_YELLOW_CARP)
- WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN_YC, 0);
else
WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, 0);
+ break;
+ }
}
static u32 nbio_v7_2_get_memsize(struct amdgpu_device *adev)
@@ -250,7 +261,10 @@ static void nbio_v7_2_update_medium_grain_light_sleep(struct amdgpu_device *adev
{
uint32_t def, data;
- if (adev->asic_type == CHIP_YELLOW_CARP) {
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(7, 2, 1):
+ case IP_VERSION(7, 3, 0):
+ case IP_VERSION(7, 5, 0):
def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2));
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
@@ -260,8 +274,8 @@ static void nbio_v7_2_update_medium_grain_light_sleep(struct amdgpu_device *adev
if (def != data)
WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2), data);
- data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regBIF1_PCIE_TX_POWER_CTRL_1));
- def = data;
+ def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0,
+ regBIF1_PCIE_TX_POWER_CTRL_1));
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
data |= (BIF1_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK |
BIF1_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK);
@@ -272,7 +286,8 @@ static void nbio_v7_2_update_medium_grain_light_sleep(struct amdgpu_device *adev
if (def != data)
WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regBIF1_PCIE_TX_POWER_CTRL_1),
data);
- } else {
+ break;
+ default:
def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2));
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
@@ -285,11 +300,12 @@ static void nbio_v7_2_update_medium_grain_light_sleep(struct amdgpu_device *adev
if (def != data)
WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2), data);
+ break;
}
}
static void nbio_v7_2_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
int data;
@@ -352,7 +368,10 @@ const struct nbio_hdp_flush_reg nbio_v7_2_hdp_flush_reg = {
static void nbio_v7_2_init_registers(struct amdgpu_device *adev)
{
uint32_t def, data;
- if (adev->asic_type == CHIP_YELLOW_CARP) {
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(7, 2, 1):
+ case IP_VERSION(7, 3, 0):
+ case IP_VERSION(7, 5, 0):
def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regBIF1_PCIE_MST_CTRL_3));
data = REG_SET_FIELD(data, BIF1_PCIE_MST_CTRL_3,
CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1);
@@ -361,7 +380,8 @@ static void nbio_v7_2_init_registers(struct amdgpu_device *adev)
if (def != data)
WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regBIF1_PCIE_MST_CTRL_3), data);
- } else {
+ break;
+ default:
def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CONFIG_CNTL));
data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL,
CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1);
@@ -370,11 +390,32 @@ static void nbio_v7_2_init_registers(struct amdgpu_device *adev)
if (def != data)
WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CONFIG_CNTL), data);
+ break;
}
- if (amdgpu_sriov_vf(adev))
- adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
- regBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(7, 3, 0):
+ case IP_VERSION(7, 5, 1):
+ data = RREG32_SOC15(NBIO, 0, regRCC_DEV2_EPF0_STRAP2);
+ data &= ~RCC_DEV2_EPF0_STRAP2__STRAP_NO_SOFT_RESET_DEV2_F0_MASK;
+ WREG32_SOC15(NBIO, 0, regRCC_DEV2_EPF0_STRAP2, data);
+ break;
+ }
+}
+
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+
+static void nbio_v7_2_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset =
+ SOC15_REG_OFFSET(NBIO, 0,
+ regBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
}
const struct amdgpu_nbio_funcs nbio_v7_2_funcs = {
@@ -398,4 +439,5 @@ const struct amdgpu_nbio_funcs nbio_v7_2_funcs = {
.ih_control = nbio_v7_2_ih_control,
.init_registers = nbio_v7_2_init_registers,
.remap_hdp_registers = nbio_v7_2_remap_hdp_registers,
+ .set_reg_remap = nbio_v7_2_set_reg_remap,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index dc5e93756fea..860bc5cb03c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbio_v7_4.h"
#include "amdgpu_ras.h"
@@ -152,9 +151,9 @@ static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instan
* BIF_SDMA0_DOORBELL_RANGE: 0x3bc0
* BIF_SDMA1_DOORBELL_RANGE: 0x3bc4
* BIF_SDMA2_DOORBELL_RANGE: 0x3bd8
-+ * BIF_SDMA4_DOORBELL_RANGE:
-+ * ARCTURUS: 0x3be0
-+ * ALDEBARAN: 0x3be4
+ * BIF_SDMA4_DOORBELL_RANGE:
+ * ARCTURUS: 0x3be0
+ * ALDEBARAN: 0x3be4
*/
if (adev->asic_type == CHIP_ALDEBARAN && instance == 4)
reg = instance + 0x4 + 0x1 +
@@ -238,7 +237,7 @@ static void nbio_v7_4_ih_doorbell_range(struct amdgpu_device *adev,
if (use_doorbell) {
ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index);
- ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 4);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 8);
} else
ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 0);
@@ -273,7 +272,7 @@ static void nbio_v7_4_update_medium_grain_light_sleep(struct amdgpu_device *adev
}
static void nbio_v7_4_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
int data;
@@ -339,41 +338,34 @@ const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = {
.ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK,
};
-const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg_ald = {
- .ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK,
- .ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK,
- .ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK,
- .ref_and_mask_cp3 = GPU_HDP_FLUSH_DONE__CP3_MASK,
- .ref_and_mask_cp4 = GPU_HDP_FLUSH_DONE__CP4_MASK,
- .ref_and_mask_cp5 = GPU_HDP_FLUSH_DONE__CP5_MASK,
- .ref_and_mask_cp6 = GPU_HDP_FLUSH_DONE__CP6_MASK,
- .ref_and_mask_cp7 = GPU_HDP_FLUSH_DONE__CP7_MASK,
- .ref_and_mask_cp8 = GPU_HDP_FLUSH_DONE__CP8_MASK,
- .ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK,
- .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK,
- .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK,
- .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK,
- .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK,
- .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK,
- .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK,
- .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK,
- .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK,
-};
-
static void nbio_v7_4_init_registers(struct amdgpu_device *adev)
{
- if (amdgpu_sriov_vf(adev))
- adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
- mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ uint32_t baco_cntl;
+
+ if (amdgpu_ip_version(adev, NBIO_HWIP, 0) == IP_VERSION(7, 4, 4) &&
+ !amdgpu_sriov_vf(adev)) {
+ baco_cntl = RREG32_SOC15(NBIO, 0, mmBACO_CNTL);
+ if (baco_cntl &
+ (BACO_CNTL__BACO_DUMMY_EN_MASK | BACO_CNTL__BACO_EN_MASK)) {
+ baco_cntl &= ~(BACO_CNTL__BACO_DUMMY_EN_MASK |
+ BACO_CNTL__BACO_EN_MASK);
+ dev_dbg(adev->dev, "Unsetting baco dummy mode %x",
+ baco_cntl);
+ WREG32_SOC15(NBIO, 0, mmBACO_CNTL, baco_cntl);
+ }
+ }
}
static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device *adev)
{
uint32_t bif_doorbell_intr_cntl;
struct ras_manager *obj = amdgpu_ras_find_obj(adev, adev->nbio.ras_if);
- struct ras_err_data err_data = {0, 0, 0, NULL};
+ struct ras_err_data err_data;
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ if (amdgpu_ras_error_data_init(&err_data))
+ return;
+
if (adev->asic_type == CHIP_ALDEBARAN)
bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL_ALDE);
else
@@ -390,7 +382,7 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device
else
WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl);
- if (!ras->disable_ras_err_cnt_harvest) {
+ if (ras && !ras->disable_ras_err_cnt_harvest && obj) {
/*
* clear error status after ras_controller_intr
* according to hw team and count ue number
@@ -404,8 +396,7 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device
if (err_data.ce_count)
dev_info(adev->dev, "%ld correctable hardware "
- "errors detected in %s block, "
- "no user action is needed.\n",
+ "errors detected in %s block\n",
obj->err_data.ce_count,
get_ras_block_str(adev->nbio.ras_if));
@@ -422,8 +413,10 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device
/* ras_controller_int is dedicated for nbif ras error,
* not the global interrupt for sync flood
*/
- amdgpu_ras_reset_gpu(adev);
+ amdgpu_ras_global_ras_isr(adev);
}
+
+ amdgpu_ras_error_data_fini(&err_data);
}
static void nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev)
@@ -658,16 +651,28 @@ static void nbio_v7_4_enable_doorbell_interrupt(struct amdgpu_device *adev,
DOORBELL_INTERRUPT_DISABLE, enable ? 0 : 1);
}
-const struct amdgpu_nbio_ras_funcs nbio_v7_4_ras_funcs = {
+const struct amdgpu_ras_block_hw_ops nbio_v7_4_ras_hw_ops = {
+ .query_ras_error_count = nbio_v7_4_query_ras_error_count,
+};
+
+struct amdgpu_nbio_ras nbio_v7_4_ras = {
+ .ras_block = {
+ .ras_comm = {
+ .name = "pcie_bif",
+ .block = AMDGPU_RAS_BLOCK__PCIE_BIF,
+ .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ },
+ .hw_ops = &nbio_v7_4_ras_hw_ops,
+ .ras_late_init = amdgpu_nbio_ras_late_init,
+ },
.handle_ras_controller_intr_no_bifring = nbio_v7_4_handle_ras_controller_intr_no_bifring,
.handle_ras_err_event_athub_intr_no_bifring = nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring,
.init_ras_controller_interrupt = nbio_v7_4_init_ras_controller_interrupt,
.init_ras_err_event_athub_interrupt = nbio_v7_4_init_ras_err_event_athub_interrupt,
- .query_ras_error_count = nbio_v7_4_query_ras_error_count,
- .ras_late_init = amdgpu_nbio_ras_late_init,
- .ras_fini = amdgpu_nbio_ras_fini,
};
+
+#ifdef CONFIG_PCIEASPM
static void nbio_v7_4_program_ltr(struct amdgpu_device *adev)
{
uint32_t def, data;
@@ -689,12 +694,14 @@ static void nbio_v7_4_program_ltr(struct amdgpu_device *adev)
if (def != data)
WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
}
+#endif
static void nbio_v7_4_program_aspm(struct amdgpu_device *adev)
{
+#ifdef CONFIG_PCIEASPM
uint32_t def, data;
- if (adev->ip_versions[NBIO_HWIP][0] == IP_VERSION(7, 4, 4))
+ if (amdgpu_ip_version(adev, NBIO_HWIP, 0) == IP_VERSION(7, 4, 4))
return;
def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
@@ -750,7 +757,10 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev)
if (def != data)
WREG32_PCIE(smnPCIE_LC_CNTL6, data);
- nbio_v7_4_program_ltr(adev);
+ /* Don't bother about LTR if LTR is not enabled
+ * in the path */
+ if (adev->pdev->ltr_path)
+ nbio_v7_4_program_ltr(adev);
def = data = RREG32_PCIE(smnRCC_BIF_STRAP3);
data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
@@ -774,6 +784,22 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev)
data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
if (def != data)
WREG32_PCIE(smnPCIE_LC_CNTL3, data);
+#endif
+}
+
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+
+static void nbio_v7_4_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset =
+ SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
}
const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
@@ -797,4 +823,5 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
.init_registers = nbio_v7_4_init_registers,
.remap_hdp_registers = nbio_v7_4_remap_hdp_registers,
.program_aspm = nbio_v7_4_program_aspm,
+ .set_reg_remap = nbio_v7_4_set_reg_remap,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h
index cc5692db6f98..f27c41728822 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h
@@ -27,8 +27,7 @@
#include "soc15_common.h"
extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg;
-extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg_ald;
extern const struct amdgpu_nbio_funcs nbio_v7_4_funcs;
-extern const struct amdgpu_nbio_ras_funcs nbio_v7_4_ras_funcs;
+extern struct amdgpu_nbio_ras nbio_v7_4_ras;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
new file mode 100644
index 000000000000..2ee60b8746a6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "nbio_v7_7.h"
+
+#include "nbio/nbio_7_7_0_offset.h"
+#include "nbio/nbio_7_7_0_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static void nbio_v7_7_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
+static u32 nbio_v7_7_get_rev_id(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0);
+ tmp &= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
+ tmp >>= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
+
+ return tmp;
+}
+
+static void nbio_v7_7_mc_access_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (enable)
+ WREG32_SOC15(NBIO, 0, regBIF_BX1_BIF_FB_EN,
+ BIF_BX1_BIF_FB_EN__FB_READ_EN_MASK |
+ BIF_BX1_BIF_FB_EN__FB_WRITE_EN_MASK);
+ else
+ WREG32_SOC15(NBIO, 0, regBIF_BX1_BIF_FB_EN, 0);
+}
+
+static u32 nbio_v7_7_get_memsize(struct amdgpu_device *adev)
+{
+ return RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_CONFIG_MEMSIZE);
+}
+
+static void nbio_v7_7_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index,
+ int doorbell_size)
+{
+ u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_CSDMA_DOORBELL_RANGE);
+ u32 doorbell_range = RREG32_PCIE_PORT(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_CSDMA_DOORBELL_RANGE,
+ OFFSET, doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_CSDMA_DOORBELL_RANGE,
+ SIZE, doorbell_size);
+ } else {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_SDMA0_DOORBELL_RANGE,
+ SIZE, 0);
+ }
+
+ WREG32_PCIE_PORT(reg, doorbell_range);
+}
+
+static void nbio_v7_7_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell,
+ int doorbell_index, int instance)
+{
+ u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VCN0_DOORBELL_RANGE);
+ u32 doorbell_range = RREG32_PCIE_PORT(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VCN0_DOORBELL_RANGE, OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 8);
+ } else {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 0);
+ }
+
+ WREG32_PCIE_PORT(reg, doorbell_range);
+}
+
+static void nbio_v7_7_enable_doorbell_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 reg;
+
+ reg = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN);
+ reg = REG_SET_FIELD(reg, RCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN,
+ BIF_DOORBELL_APER_EN, enable ? 1 : 0);
+
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN, reg);
+}
+
+static void nbio_v7_7_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp = 0;
+
+ if (enable) {
+ tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_EN, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_MODE, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_SIZE, 0);
+
+ WREG32_SOC15(NBIO, 0,
+ regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW,
+ lower_32_bits(adev->doorbell.base));
+ WREG32_SOC15(NBIO, 0,
+ regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH,
+ upper_32_bits(adev->doorbell.base));
+ }
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ tmp);
+}
+
+
+static void nbio_v7_7_ih_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index)
+{
+ u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0,
+ regGDC0_BIF_IH_DOORBELL_RANGE);
+
+ if (use_doorbell) {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC0_BIF_IH_DOORBELL_RANGE, OFFSET,
+ doorbell_index);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC0_BIF_IH_DOORBELL_RANGE, SIZE,
+ 2);
+ } else {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC0_BIF_IH_DOORBELL_RANGE, SIZE,
+ 0);
+ }
+
+ WREG32_SOC15(NBIO, 0, regGDC0_BIF_IH_DOORBELL_RANGE,
+ ih_doorbell_range);
+}
+
+static void nbio_v7_7_ih_control(struct amdgpu_device *adev)
+{
+ u32 interrupt_cntl;
+
+ /* setup interrupt control */
+ WREG32_SOC15(NBIO, 0, regBIF_BX1_INTERRUPT_CNTL2,
+ adev->dummy_page_addr >> 8);
+
+ interrupt_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX1_INTERRUPT_CNTL);
+ /*
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+ */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX1_INTERRUPT_CNTL,
+ IH_DUMMY_RD_OVERRIDE, 0);
+
+ /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX1_INTERRUPT_CNTL,
+ IH_REQ_NONSNOOP_EN, 0);
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX1_INTERRUPT_CNTL, interrupt_cntl);
+}
+
+static u32 nbio_v7_7_get_hdp_flush_req_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_REQ);
+}
+
+static u32 nbio_v7_7_get_hdp_flush_done_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_DONE);
+}
+
+static u32 nbio_v7_7_get_pcie_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_INDEX2);
+}
+
+static u32 nbio_v7_7_get_pcie_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_DATA2);
+}
+
+static u32 nbio_v7_7_get_pcie_port_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_INDEX);
+}
+
+static u32 nbio_v7_7_get_pcie_port_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_DATA);
+}
+
+const struct nbio_hdp_flush_reg nbio_v7_7_hdp_flush_reg = {
+ .ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK,
+ .ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK,
+ .ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK,
+ .ref_and_mask_cp3 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP3_MASK,
+ .ref_and_mask_cp4 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP4_MASK,
+ .ref_and_mask_cp5 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP5_MASK,
+ .ref_and_mask_cp6 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP6_MASK,
+ .ref_and_mask_cp7 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP7_MASK,
+ .ref_and_mask_cp8 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP8_MASK,
+ .ref_and_mask_cp9 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP9_MASK,
+ .ref_and_mask_sdma0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA0_MASK,
+ .ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK,
+};
+
+static void nbio_v7_7_init_registers(struct amdgpu_device *adev)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_MST_CTRL_3);
+ data = REG_SET_FIELD(data, BIF0_PCIE_MST_CTRL_3,
+ CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1);
+ data = REG_SET_FIELD(data, BIF0_PCIE_MST_CTRL_3,
+ CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1);
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF0_PCIE_MST_CTRL_3, data);
+
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(7, 7, 0):
+ data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4) & ~BIT(23);
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4, data);
+ break;
+ }
+}
+
+static void nbio_v7_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
+ return;
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL);
+ if (enable) {
+ data |= (BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ } else {
+ data &= ~(BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL, data);
+}
+
+static void nbio_v7_7_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
+ return;
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2);
+ if (enable)
+ data |= BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+ else
+ data &= ~BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_TX_POWER_CTRL_1);
+ if (enable) {
+ data |= (BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK |
+ BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK);
+ } else {
+ data &= ~(BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK |
+ BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF0_PCIE_TX_POWER_CTRL_1, data);
+}
+
+static void nbio_v7_7_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ uint32_t data;
+
+ /* AMD_CG_SUPPORT_BIF_MGCG */
+ data = RREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL);
+ if (data & BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_MGCG;
+
+ /* AMD_CG_SUPPORT_BIF_LS */
+ data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2);
+ if (data & BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_LS;
+}
+
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+
+static void nbio_v7_7_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset =
+ SOC15_REG_OFFSET(NBIO, 0,
+ regBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
+}
+
+const struct amdgpu_nbio_funcs nbio_v7_7_funcs = {
+ .get_hdp_flush_req_offset = nbio_v7_7_get_hdp_flush_req_offset,
+ .get_hdp_flush_done_offset = nbio_v7_7_get_hdp_flush_done_offset,
+ .get_pcie_index_offset = nbio_v7_7_get_pcie_index_offset,
+ .get_pcie_data_offset = nbio_v7_7_get_pcie_data_offset,
+ .get_pcie_port_index_offset = nbio_v7_7_get_pcie_port_index_offset,
+ .get_pcie_port_data_offset = nbio_v7_7_get_pcie_port_data_offset,
+ .get_rev_id = nbio_v7_7_get_rev_id,
+ .mc_access_enable = nbio_v7_7_mc_access_enable,
+ .get_memsize = nbio_v7_7_get_memsize,
+ .sdma_doorbell_range = nbio_v7_7_sdma_doorbell_range,
+ .vcn_doorbell_range = nbio_v7_7_vcn_doorbell_range,
+ .enable_doorbell_aperture = nbio_v7_7_enable_doorbell_aperture,
+ .enable_doorbell_selfring_aperture = nbio_v7_7_enable_doorbell_selfring_aperture,
+ .ih_doorbell_range = nbio_v7_7_ih_doorbell_range,
+ .update_medium_grain_clock_gating = nbio_v7_7_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbio_v7_7_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbio_v7_7_get_clockgating_state,
+ .ih_control = nbio_v7_7_ih_control,
+ .init_registers = nbio_v7_7_init_registers,
+ .remap_hdp_registers = nbio_v7_7_remap_hdp_registers,
+ .set_reg_remap = nbio_v7_7_set_reg_remap,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.h
new file mode 100644
index 000000000000..2a33b256ba81
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NBIO_V7_7_H__
+#define __NBIO_V7_7_H__
+
+#include "soc15_common.h"
+
+extern const struct nbio_hdp_flush_reg nbio_v7_7_hdp_flush_reg;
+extern const struct amdgpu_nbio_funcs nbio_v7_7_funcs;
+extern const struct amdgpu_nbio_ras_funcs nbio_v7_7_ras_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
new file mode 100644
index 000000000000..bdfd2917e3ca
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
@@ -0,0 +1,696 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "nbio_v7_9.h"
+#include "amdgpu_ras.h"
+
+#include "nbio/nbio_7_9_0_offset.h"
+#include "nbio/nbio_7_9_0_sh_mask.h"
+#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+#define NPS_MODE_MASK 0x000000FFL
+
+static void nbio_v7_9_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
+static u32 nbio_v7_9_get_rev_id(struct amdgpu_device *adev)
+{
+ u32 rev_id;
+
+ /*
+ * fetch the sub-revision field from the IP-discovery table
+ * (returns zero if the table entry is not populated).
+ */
+ if (amdgpu_sriov_vf(adev)) {
+ rev_id = IP_VERSION_SUBREV(amdgpu_ip_version_full(adev, NBIO_HWIP, 0));
+ } else {
+ rev_id = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0);
+ rev_id = REG_GET_FIELD(rev_id, RCC_STRAP0_RCC_DEV0_EPF0_STRAP0,
+ STRAP_ATI_REV_ID_DEV0_F0);
+ }
+
+ return rev_id;
+}
+
+static void nbio_v7_9_mc_access_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (enable)
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN,
+ BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK | BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK);
+ else
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, 0);
+}
+
+static u32 nbio_v7_9_get_memsize(struct amdgpu_device *adev)
+{
+ return RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_RCC_CONFIG_MEMSIZE);
+}
+
+static void nbio_v7_9_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index, int doorbell_size)
+{
+ u32 doorbell_range = 0, doorbell_ctrl = 0;
+ int aid_id, dev_inst;
+
+ dev_inst = GET_INST(SDMA0, instance);
+ aid_id = adev->sdma.instance[instance].aid_id;
+
+ if (use_doorbell == false)
+ return;
+
+ doorbell_range =
+ REG_SET_FIELD(doorbell_range, DOORBELL0_CTRL_ENTRY_0,
+ BIF_DOORBELL0_RANGE_OFFSET_ENTRY, doorbell_index);
+ doorbell_range =
+ REG_SET_FIELD(doorbell_range, DOORBELL0_CTRL_ENTRY_0,
+ BIF_DOORBELL0_RANGE_SIZE_ENTRY, doorbell_size);
+ doorbell_ctrl =
+ REG_SET_FIELD(doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_ENABLE, 1);
+ doorbell_ctrl =
+ REG_SET_FIELD(doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_SIZE, doorbell_size);
+
+ switch (dev_inst % adev->sdma.num_inst_per_aid) {
+ case 0:
+ WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_1,
+ 4 * aid_id, doorbell_range);
+
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWID, 0xe);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_OFFSET, 0xe);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE,
+ 0x1);
+ WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_1_CTRL,
+ aid_id, doorbell_ctrl);
+ break;
+ case 1:
+ WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_2,
+ 4 * aid_id, doorbell_range);
+
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWID, 0x8);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_OFFSET, 0x8);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE,
+ 0x2);
+ WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_2_CTRL,
+ aid_id, doorbell_ctrl);
+ break;
+ case 2:
+ WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_3,
+ 4 * aid_id, doorbell_range);
+
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWID, 0x9);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_OFFSET, 0x9);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE,
+ 0x8);
+ WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_5_CTRL,
+ aid_id, doorbell_ctrl);
+ break;
+ case 3:
+ WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_4,
+ 4 * aid_id, doorbell_range);
+
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWID, 0xa);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_OFFSET, 0xa);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE,
+ 0x9);
+ WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_6_CTRL,
+ aid_id, doorbell_ctrl);
+ break;
+ default:
+ break;
+ }
+}
+
+static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell,
+ int doorbell_index, int instance)
+{
+ u32 doorbell_range = 0, doorbell_ctrl = 0;
+ u32 aid_id = instance;
+ u32 range_size;
+
+ if (use_doorbell) {
+ range_size = (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(9, 5, 0)) ?
+ 0xb : 0x9;
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ DOORBELL0_CTRL_ENTRY_0,
+ BIF_DOORBELL0_RANGE_OFFSET_ENTRY,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ DOORBELL0_CTRL_ENTRY_0,
+ BIF_DOORBELL0_RANGE_SIZE_ENTRY,
+ range_size);
+ if (aid_id)
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ DOORBELL0_CTRL_ENTRY_0,
+ DOORBELL0_FENCE_ENABLE_ENTRY,
+ 0x4);
+
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_ENABLE, 1);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWID, 0x4);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_OFFSET, 0x4);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_SIZE, range_size);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE, 0x4);
+
+ WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_17,
+ aid_id, doorbell_range);
+ WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_4_CTRL,
+ aid_id, doorbell_ctrl);
+ } else {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ DOORBELL0_CTRL_ENTRY_0,
+ BIF_DOORBELL0_RANGE_SIZE_ENTRY, 0);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_SIZE, 0);
+
+ WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_17,
+ aid_id, doorbell_range);
+ WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_4_CTRL,
+ aid_id, doorbell_ctrl);
+ }
+}
+
+static void nbio_v7_9_enable_doorbell_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ /* Enable to allow doorbell pass thru on pre-silicon bare-metal */
+ WREG32_SOC15(NBIO, 0, regBIFC_DOORBELL_ACCESS_EN_PF, 0xfffff);
+ WREG32_FIELD15_PREREG(NBIO, 0, RCC_DEV0_EPF0_RCC_DOORBELL_APER_EN,
+ BIF_DOORBELL_APER_EN, enable ? 1 : 0);
+}
+
+static void nbio_v7_9_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp = 0;
+
+ if (enable) {
+ tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_EN, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_MODE, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_SIZE, 0);
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW,
+ lower_32_bits(adev->doorbell.base));
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH,
+ upper_32_bits(adev->doorbell.base));
+ }
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, tmp);
+}
+
+static void nbio_v7_9_ih_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index)
+{
+ u32 ih_doorbell_range = 0, ih_doorbell_ctrl = 0;
+
+ if (use_doorbell) {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ DOORBELL0_CTRL_ENTRY_0,
+ BIF_DOORBELL0_RANGE_OFFSET_ENTRY,
+ doorbell_index);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ DOORBELL0_CTRL_ENTRY_0,
+ BIF_DOORBELL0_RANGE_SIZE_ENTRY,
+ 0x8);
+
+ ih_doorbell_ctrl = REG_SET_FIELD(ih_doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_ENABLE, 1);
+ ih_doorbell_ctrl = REG_SET_FIELD(ih_doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWID, 0);
+ ih_doorbell_ctrl = REG_SET_FIELD(ih_doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_OFFSET, 0);
+ ih_doorbell_ctrl = REG_SET_FIELD(ih_doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_SIZE, 0x8);
+ ih_doorbell_ctrl = REG_SET_FIELD(ih_doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE, 0);
+ } else {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ DOORBELL0_CTRL_ENTRY_0,
+ BIF_DOORBELL0_RANGE_SIZE_ENTRY, 0);
+ ih_doorbell_ctrl = REG_SET_FIELD(ih_doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_SIZE, 0);
+ }
+
+ WREG32_SOC15(NBIO, 0, regDOORBELL0_CTRL_ENTRY_0, ih_doorbell_range);
+ WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_3_CTRL, ih_doorbell_ctrl);
+}
+
+
+static void nbio_v7_9_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+}
+
+static void nbio_v7_9_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+}
+
+static void nbio_v7_9_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+}
+
+static void nbio_v7_9_ih_control(struct amdgpu_device *adev)
+{
+ u32 interrupt_cntl;
+
+ /* setup interrupt control */
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
+ interrupt_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL);
+ /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+ */
+ interrupt_cntl =
+ REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL, IH_DUMMY_RD_OVERRIDE, 0);
+ /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+ interrupt_cntl =
+ REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL, IH_REQ_NONSNOOP_EN, 0);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL, interrupt_cntl);
+}
+
+static u32 nbio_v7_9_get_hdp_flush_req_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_REQ);
+}
+
+static u32 nbio_v7_9_get_hdp_flush_done_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_DONE);
+}
+
+static u32 nbio_v7_9_get_pcie_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_INDEX2);
+}
+
+static u32 nbio_v7_9_get_pcie_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_DATA2);
+}
+
+static u32 nbio_v7_9_get_pcie_index_hi_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_INDEX2_HI);
+}
+
+const struct nbio_hdp_flush_reg nbio_v7_9_hdp_flush_reg = {
+ .ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK,
+ .ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK,
+ .ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK,
+ .ref_and_mask_cp3 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP3_MASK,
+ .ref_and_mask_cp4 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP4_MASK,
+ .ref_and_mask_cp5 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP5_MASK,
+ .ref_and_mask_cp6 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP6_MASK,
+ .ref_and_mask_cp7 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP7_MASK,
+ .ref_and_mask_cp8 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP8_MASK,
+ .ref_and_mask_cp9 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP9_MASK,
+ .ref_and_mask_sdma0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA0_MASK,
+ .ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK,
+ .ref_and_mask_sdma2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK,
+ .ref_and_mask_sdma3 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK,
+ .ref_and_mask_sdma4 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK,
+ .ref_and_mask_sdma5 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK,
+ .ref_and_mask_sdma6 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK,
+ .ref_and_mask_sdma7 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK,
+};
+
+static void nbio_v7_9_enable_doorbell_interrupt(struct amdgpu_device *adev,
+ bool enable)
+{
+ WREG32_FIELD15_PREREG(NBIO, 0, BIF_BX0_BIF_DOORBELL_INT_CNTL,
+ DOORBELL_INTERRUPT_DISABLE, enable ? 0 : 1);
+}
+
+static int nbio_v7_9_get_compute_partition_mode(struct amdgpu_device *adev)
+{
+ u32 tmp, px;
+
+ tmp = RREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_COMPUTE_STATUS);
+ px = REG_GET_FIELD(tmp, BIF_BX_PF0_PARTITION_COMPUTE_STATUS,
+ PARTITION_MODE);
+
+ return px;
+}
+
+static bool nbio_v7_9_is_nps_switch_requested(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_MEM_STATUS);
+ tmp = REG_GET_FIELD(tmp, BIF_BX_PF0_PARTITION_MEM_STATUS,
+ CHANGE_STATUE);
+
+ /* 0x8 - NPS switch requested */
+ return (tmp == 0x8);
+}
+static u32 nbio_v7_9_get_memory_partition_mode(struct amdgpu_device *adev,
+ u32 *supp_modes)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_MEM_STATUS);
+ tmp = REG_GET_FIELD(tmp, BIF_BX_PF0_PARTITION_MEM_STATUS, NPS_MODE);
+
+ if (supp_modes) {
+ *supp_modes =
+ RREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_MEM_CAP);
+ }
+
+ return ffs(tmp);
+}
+
+static void nbio_v7_9_init_registers(struct amdgpu_device *adev)
+{
+ u32 inst_mask;
+ int i;
+
+ WREG32_SOC15(NBIO, 0, regXCC_DOORBELL_FENCE,
+ 0xff & ~(adev->gfx.xcc_mask));
+
+ WREG32_SOC15(NBIO, 0, regBIFC_GFX_INT_MONITOR_MASK, 0x7ff);
+
+ inst_mask = adev->aid_mask & ~1U;
+ for_each_inst(i, inst_mask) {
+ WREG32_SOC15_EXT(NBIO, i, regXCC_DOORBELL_FENCE, i,
+ XCC_DOORBELL_FENCE__SHUB_SLV_MODE_MASK);
+
+ }
+
+ if (!amdgpu_sriov_vf(adev)) {
+ u32 baco_cntl;
+ for_each_inst(i, adev->aid_mask) {
+ baco_cntl = RREG32_SOC15(NBIO, i, regBIF_BX0_BACO_CNTL);
+ if (baco_cntl & (BIF_BX0_BACO_CNTL__BACO_DUMMY_EN_MASK |
+ BIF_BX0_BACO_CNTL__BACO_EN_MASK)) {
+ baco_cntl &= ~(
+ BIF_BX0_BACO_CNTL__BACO_DUMMY_EN_MASK |
+ BIF_BX0_BACO_CNTL__BACO_EN_MASK);
+ dev_dbg(adev->dev,
+ "Unsetting baco dummy mode %x",
+ baco_cntl);
+ WREG32_SOC15(NBIO, i, regBIF_BX0_BACO_CNTL,
+ baco_cntl);
+ }
+ }
+ }
+}
+
+#define MMIO_REG_HOLE_OFFSET 0x1A000
+
+static void nbio_v7_9_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset =
+ SOC15_REG_OFFSET(
+ NBIO, 0,
+ regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL)
+ << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
+}
+
+const struct amdgpu_nbio_funcs nbio_v7_9_funcs = {
+ .get_hdp_flush_req_offset = nbio_v7_9_get_hdp_flush_req_offset,
+ .get_hdp_flush_done_offset = nbio_v7_9_get_hdp_flush_done_offset,
+ .get_pcie_index_offset = nbio_v7_9_get_pcie_index_offset,
+ .get_pcie_data_offset = nbio_v7_9_get_pcie_data_offset,
+ .get_pcie_index_hi_offset = nbio_v7_9_get_pcie_index_hi_offset,
+ .get_rev_id = nbio_v7_9_get_rev_id,
+ .mc_access_enable = nbio_v7_9_mc_access_enable,
+ .get_memsize = nbio_v7_9_get_memsize,
+ .sdma_doorbell_range = nbio_v7_9_sdma_doorbell_range,
+ .vcn_doorbell_range = nbio_v7_9_vcn_doorbell_range,
+ .enable_doorbell_aperture = nbio_v7_9_enable_doorbell_aperture,
+ .enable_doorbell_selfring_aperture = nbio_v7_9_enable_doorbell_selfring_aperture,
+ .ih_doorbell_range = nbio_v7_9_ih_doorbell_range,
+ .enable_doorbell_interrupt = nbio_v7_9_enable_doorbell_interrupt,
+ .update_medium_grain_clock_gating = nbio_v7_9_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbio_v7_9_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbio_v7_9_get_clockgating_state,
+ .ih_control = nbio_v7_9_ih_control,
+ .remap_hdp_registers = nbio_v7_9_remap_hdp_registers,
+ .get_compute_partition_mode = nbio_v7_9_get_compute_partition_mode,
+ .get_memory_partition_mode = nbio_v7_9_get_memory_partition_mode,
+ .is_nps_switch_requested = nbio_v7_9_is_nps_switch_requested,
+ .init_registers = nbio_v7_9_init_registers,
+ .set_reg_remap = nbio_v7_9_set_reg_remap,
+};
+
+static void nbio_v7_9_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+}
+
+static void nbio_v7_9_handle_ras_controller_intr_no_bifring(struct amdgpu_device *adev)
+{
+ uint32_t bif_doorbell_intr_cntl;
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, adev->nbio.ras_if);
+ struct ras_err_data err_data;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ if (amdgpu_ras_error_data_init(&err_data))
+ return;
+
+ bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL);
+
+ if (REG_GET_FIELD(bif_doorbell_intr_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL, RAS_CNTLR_INTERRUPT_STATUS)) {
+ /* driver has to clear the interrupt status when bif ring is disabled */
+ bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL,
+ RAS_CNTLR_INTERRUPT_CLEAR, 1);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl);
+
+ if (!ras->disable_ras_err_cnt_harvest) {
+ /*
+ * clear error status after ras_controller_intr
+ * according to hw team and count ue number
+ * for query
+ */
+ nbio_v7_9_query_ras_error_count(adev, &err_data);
+
+ /* logging on error cnt and printing for awareness */
+ obj->err_data.ue_count += err_data.ue_count;
+ obj->err_data.ce_count += err_data.ce_count;
+
+ if (err_data.ce_count)
+ dev_info(adev->dev, "%ld correctable hardware "
+ "errors detected in %s block\n",
+ obj->err_data.ce_count,
+ get_ras_block_str(adev->nbio.ras_if));
+
+ if (err_data.ue_count)
+ dev_info(adev->dev, "%ld uncorrectable hardware "
+ "errors detected in %s block\n",
+ obj->err_data.ue_count,
+ get_ras_block_str(adev->nbio.ras_if));
+ }
+
+ dev_info(adev->dev, "RAS controller interrupt triggered "
+ "by NBIF error\n");
+ }
+
+ amdgpu_ras_error_data_fini(&err_data);
+}
+
+static void nbio_v7_9_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev)
+{
+ uint32_t bif_doorbell_intr_cntl;
+
+ bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL);
+
+ if (REG_GET_FIELD(bif_doorbell_intr_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL, RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) {
+ /* driver has to clear the interrupt status when bif ring is disabled */
+ bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1);
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl);
+
+ amdgpu_ras_global_ras_isr(adev);
+ }
+}
+
+static int nbio_v7_9_set_ras_controller_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ /* Dummy function, there is no initialization operation in driver */
+
+ return 0;
+}
+
+static int nbio_v7_9_process_ras_controller_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ /* By design, the ih cookie for ras_controller_irq should be written
+ * to BIFring instead of general iv ring. However, due to known bif ring
+ * hw bug, it has to be disabled. There is no chance the process function
+ * will be involked. Just left it as a dummy one.
+ */
+ return 0;
+}
+
+static int nbio_v7_9_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ /* Dummy function, there is no initialization operation in driver */
+
+ return 0;
+}
+
+static int nbio_v7_9_process_err_event_athub_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ /* By design, the ih cookie for err_event_athub_irq should be written
+ * to BIFring instead of general iv ring. However, due to known bif ring
+ * hw bug, it has to be disabled. There is no chance the process function
+ * will be involked. Just left it as a dummy one.
+ */
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs nbio_v7_9_ras_controller_irq_funcs = {
+ .set = nbio_v7_9_set_ras_controller_irq_state,
+ .process = nbio_v7_9_process_ras_controller_irq,
+};
+
+static const struct amdgpu_irq_src_funcs nbio_v7_9_ras_err_event_athub_irq_funcs = {
+ .set = nbio_v7_9_set_ras_err_event_athub_irq_state,
+ .process = nbio_v7_9_process_err_event_athub_irq,
+};
+
+static int nbio_v7_9_init_ras_controller_interrupt (struct amdgpu_device *adev)
+{
+ int r;
+
+ /* init the irq funcs */
+ adev->nbio.ras_controller_irq.funcs =
+ &nbio_v7_9_ras_controller_irq_funcs;
+ adev->nbio.ras_controller_irq.num_types = 1;
+
+ /* register ras controller interrupt */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF,
+ NBIF_7_4__SRCID__RAS_CONTROLLER_INTERRUPT,
+ &adev->nbio.ras_controller_irq);
+
+ return r;
+}
+
+static int nbio_v7_9_init_ras_err_event_athub_interrupt (struct amdgpu_device *adev)
+{
+
+ int r;
+
+ /* init the irq funcs */
+ adev->nbio.ras_err_event_athub_irq.funcs =
+ &nbio_v7_9_ras_err_event_athub_irq_funcs;
+ adev->nbio.ras_err_event_athub_irq.num_types = 1;
+
+ /* register ras err event athub interrupt */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF,
+ NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT,
+ &adev->nbio.ras_err_event_athub_irq);
+
+ return r;
+}
+
+const struct amdgpu_ras_block_hw_ops nbio_v7_9_ras_hw_ops = {
+ .query_ras_error_count = nbio_v7_9_query_ras_error_count,
+};
+
+struct amdgpu_nbio_ras nbio_v7_9_ras = {
+ .ras_block = {
+ .ras_comm = {
+ .name = "pcie_bif",
+ .block = AMDGPU_RAS_BLOCK__PCIE_BIF,
+ .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ },
+ .hw_ops = &nbio_v7_9_ras_hw_ops,
+ .ras_late_init = amdgpu_nbio_ras_late_init,
+ },
+ .handle_ras_controller_intr_no_bifring = nbio_v7_9_handle_ras_controller_intr_no_bifring,
+ .handle_ras_err_event_athub_intr_no_bifring = nbio_v7_9_handle_ras_err_event_athub_intr_no_bifring,
+ .init_ras_controller_interrupt = nbio_v7_9_init_ras_controller_interrupt,
+ .init_ras_err_event_athub_interrupt = nbio_v7_9_init_ras_err_event_athub_interrupt,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.h
new file mode 100644
index 000000000000..73709771950d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NBIO_V7_9_H__
+#define __NBIO_V7_9_H__
+
+#include "soc15_common.h"
+
+extern const struct nbio_hdp_flush_reg nbio_v7_9_hdp_flush_reg;
+extern const struct amdgpu_nbio_funcs nbio_v7_9_funcs;
+extern struct amdgpu_nbio_ras nbio_v7_9_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 2ec1ffb36b1f..50e77d9b30af 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -59,7 +59,6 @@
#include "vcn_v3_0.h"
#include "jpeg_v3_0.h"
#include "amdgpu_vkms.h"
-#include "mes_v10_1.h"
#include "mxgpu_nv.h"
#include "smuio_v11_0.h"
#include "smuio_v11_0_6.h"
@@ -67,89 +66,119 @@
static const struct amd_ip_funcs nv_common_ip_funcs;
/* Navi */
-static const struct amdgpu_video_codec_info nv_video_codecs_encode_array[] =
-{
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+static const struct amdgpu_video_codec_info nv_video_codecs_encode_array[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 0)},
};
-static const struct amdgpu_video_codecs nv_video_codecs_encode =
-{
+static const struct amdgpu_video_codecs nv_video_codecs_encode = {
.codec_count = ARRAY_SIZE(nv_video_codecs_encode_array),
.codec_array = nv_video_codecs_encode_array,
};
/* Navi1x */
-static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] =
-{
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 8192, 8192, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
};
-static const struct amdgpu_video_codecs nv_video_codecs_decode =
-{
+static const struct amdgpu_video_codecs nv_video_codecs_decode = {
.codec_count = ARRAY_SIZE(nv_video_codecs_decode_array),
.codec_array = nv_video_codecs_decode_array,
};
/* Sienna Cichlid */
-static const struct amdgpu_video_codec_info sc_video_codecs_decode_array[] =
-{
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+static const struct amdgpu_video_codec_info sc_video_codecs_encode_array[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs sc_video_codecs_encode = {
+ .codec_count = ARRAY_SIZE(sc_video_codecs_encode_array),
+ .codec_array = sc_video_codecs_encode_array,
+};
+
+static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
};
-static const struct amdgpu_video_codecs sc_video_codecs_decode =
-{
- .codec_count = ARRAY_SIZE(sc_video_codecs_decode_array),
- .codec_array = sc_video_codecs_decode_array,
+static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn1[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs sc_video_codecs_decode_vcn0 = {
+ .codec_count = ARRAY_SIZE(sc_video_codecs_decode_array_vcn0),
+ .codec_array = sc_video_codecs_decode_array_vcn0,
+};
+
+static const struct amdgpu_video_codecs sc_video_codecs_decode_vcn1 = {
+ .codec_count = ARRAY_SIZE(sc_video_codecs_decode_array_vcn1),
+ .codec_array = sc_video_codecs_decode_array_vcn1,
};
/* SRIOV Sienna Cichlid, not const since data is controlled by host */
-static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] =
-{
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
};
-static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array[] =
-{
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
};
-static struct amdgpu_video_codecs sriov_sc_video_codecs_encode =
-{
+static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn1[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+};
+
+static struct amdgpu_video_codecs sriov_sc_video_codecs_encode = {
.codec_count = ARRAY_SIZE(sriov_sc_video_codecs_encode_array),
.codec_array = sriov_sc_video_codecs_encode_array,
};
-static struct amdgpu_video_codecs sriov_sc_video_codecs_decode =
-{
- .codec_count = ARRAY_SIZE(sriov_sc_video_codecs_decode_array),
- .codec_array = sriov_sc_video_codecs_decode_array,
+static struct amdgpu_video_codecs sriov_sc_video_codecs_decode_vcn0 = {
+ .codec_count = ARRAY_SIZE(sriov_sc_video_codecs_decode_array_vcn0),
+ .codec_array = sriov_sc_video_codecs_decode_array_vcn0,
+};
+
+static struct amdgpu_video_codecs sriov_sc_video_codecs_decode_vcn1 = {
+ .codec_count = ARRAY_SIZE(sriov_sc_video_codecs_decode_array_vcn1),
+ .codec_array = sriov_sc_video_codecs_decode_array_vcn1,
};
/* Beige Goby*/
static const struct amdgpu_video_codec_info bg_video_codecs_decode_array[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
};
@@ -166,10 +195,11 @@ static const struct amdgpu_video_codecs bg_video_codecs_encode = {
/* Yellow Carp*/
static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
};
static const struct amdgpu_video_codecs yc_video_codecs_decode = {
@@ -180,32 +210,50 @@ static const struct amdgpu_video_codecs yc_video_codecs_decode = {
static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode,
const struct amdgpu_video_codecs **codecs)
{
- switch (adev->ip_versions[UVD_HWIP][0]) {
+ if (adev->vcn.num_vcn_inst == hweight8(adev->vcn.harvest_config))
+ return -EINVAL;
+
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
case IP_VERSION(3, 0, 0):
case IP_VERSION(3, 0, 64):
case IP_VERSION(3, 0, 192):
if (amdgpu_sriov_vf(adev)) {
- if (encode)
- *codecs = &sriov_sc_video_codecs_encode;
- else
- *codecs = &sriov_sc_video_codecs_decode;
+ if (adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) {
+ if (encode)
+ *codecs = &sriov_sc_video_codecs_encode;
+ else
+ *codecs = &sriov_sc_video_codecs_decode_vcn1;
+ } else {
+ if (encode)
+ *codecs = &sriov_sc_video_codecs_encode;
+ else
+ *codecs = &sriov_sc_video_codecs_decode_vcn0;
+ }
} else {
- if (encode)
- *codecs = &nv_video_codecs_encode;
- else
- *codecs = &sc_video_codecs_decode;
+ if (adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) {
+ if (encode)
+ *codecs = &sc_video_codecs_encode;
+ else
+ *codecs = &sc_video_codecs_decode_vcn1;
+ } else {
+ if (encode)
+ *codecs = &sc_video_codecs_encode;
+ else
+ *codecs = &sc_video_codecs_decode_vcn0;
+ }
}
return 0;
case IP_VERSION(3, 0, 16):
case IP_VERSION(3, 0, 2):
if (encode)
- *codecs = &nv_video_codecs_encode;
+ *codecs = &sc_video_codecs_encode;
else
- *codecs = &sc_video_codecs_decode;
+ *codecs = &sc_video_codecs_decode_vcn0;
return 0;
case IP_VERSION(3, 1, 1):
+ case IP_VERSION(3, 1, 2):
if (encode)
- *codecs = &nv_video_codecs_encode;
+ *codecs = &sc_video_codecs_encode;
else
*codecs = &yc_video_codecs_decode;
return 0;
@@ -227,77 +275,6 @@ static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode,
}
}
-/*
- * Indirect registers accessor
- */
-static u32 nv_pcie_rreg(struct amdgpu_device *adev, u32 reg)
-{
- unsigned long address, data;
- address = adev->nbio.funcs->get_pcie_index_offset(adev);
- data = adev->nbio.funcs->get_pcie_data_offset(adev);
-
- return amdgpu_device_indirect_rreg(adev, address, data, reg);
-}
-
-static void nv_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
-{
- unsigned long address, data;
-
- address = adev->nbio.funcs->get_pcie_index_offset(adev);
- data = adev->nbio.funcs->get_pcie_data_offset(adev);
-
- amdgpu_device_indirect_wreg(adev, address, data, reg, v);
-}
-
-static u64 nv_pcie_rreg64(struct amdgpu_device *adev, u32 reg)
-{
- unsigned long address, data;
- address = adev->nbio.funcs->get_pcie_index_offset(adev);
- data = adev->nbio.funcs->get_pcie_data_offset(adev);
-
- return amdgpu_device_indirect_rreg64(adev, address, data, reg);
-}
-
-static u32 nv_pcie_port_rreg(struct amdgpu_device *adev, u32 reg)
-{
- unsigned long flags, address, data;
- u32 r;
- address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
- data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
-
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
- WREG32(address, reg * 4);
- (void)RREG32(address);
- r = RREG32(data);
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
- return r;
-}
-
-static void nv_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v)
-{
- unsigned long address, data;
-
- address = adev->nbio.funcs->get_pcie_index_offset(adev);
- data = adev->nbio.funcs->get_pcie_data_offset(adev);
-
- amdgpu_device_indirect_wreg64(adev, address, data, reg, v);
-}
-
-static void nv_pcie_port_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
-{
- unsigned long flags, address, data;
-
- address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
- data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
-
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
- WREG32(address, reg * 4);
- (void)RREG32(address);
- WREG32(data, v);
- (void)RREG32(data);
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
-}
-
static u32 nv_didt_rreg(struct amdgpu_device *adev, u32 reg)
{
unsigned long flags, address, data;
@@ -349,49 +326,12 @@ void nv_grbm_select(struct amdgpu_device *adev,
WREG32_SOC15(GC, 0, mmGRBM_GFX_CNTL, grbm_gfx_cntl);
}
-static void nv_vga_set_state(struct amdgpu_device *adev, bool state)
-{
- /* todo */
-}
-
static bool nv_read_disabled_bios(struct amdgpu_device *adev)
{
/* todo */
return false;
}
-static bool nv_read_bios_from_rom(struct amdgpu_device *adev,
- u8 *bios, u32 length_bytes)
-{
- u32 *dw_ptr;
- u32 i, length_dw;
- u32 rom_index_offset, rom_data_offset;
-
- if (bios == NULL)
- return false;
- if (length_bytes == 0)
- return false;
- /* APU vbios image is part of sbios image */
- if (adev->flags & AMD_IS_APU)
- return false;
-
- dw_ptr = (u32 *)bios;
- length_dw = ALIGN(length_bytes, 4) / 4;
-
- rom_index_offset =
- adev->smuio.funcs->get_rom_index_offset(adev);
- rom_data_offset =
- adev->smuio.funcs->get_rom_data_offset(adev);
-
- /* set rom index to 0 */
- WREG32(rom_index_offset, 0);
- /* read out the rom data */
- for (i = 0; i < length_dw; i++)
- dw_ptr[i] = RREG32(rom_data_offset);
-
- return true;
-}
-
static struct soc15_allowed_register_entry nv_allowed_read_registers[] = {
{ SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS)},
{ SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS2)},
@@ -421,12 +361,12 @@ static uint32_t nv_read_indexed_register(struct amdgpu_device *adev, u32 se_num,
mutex_lock(&adev->grbm_idx_mutex);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
val = RREG32(reg_offset);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
return val;
}
@@ -453,9 +393,10 @@ static int nv_read_register(struct amdgpu_device *adev, u32 se_num,
*value = 0;
for (i = 0; i < ARRAY_SIZE(nv_allowed_read_registers); i++) {
en = &nv_allowed_read_registers[i];
- if ((i == 7 && (adev->sdma.num_instances == 1)) || /* some asics don't have SDMA1 */
- reg_offset !=
- (adev->reg_offset[en->hwip][en->inst][en->seg] + en->reg_offset))
+ if (!adev->reg_offset[en->hwip][en->inst])
+ continue;
+ else if (reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg]
+ + en->reg_offset))
continue;
*value = nv_get_register_value(adev,
@@ -511,10 +452,13 @@ nv_asic_reset_method(struct amdgpu_device *adev)
dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
amdgpu_reset_method);
- switch (adev->ip_versions[MP1_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 2):
case IP_VERSION(13, 0, 1):
case IP_VERSION(13, 0, 3):
+ case IP_VERSION(13, 0, 5):
+ case IP_VERSION(13, 0, 8):
return AMD_RESET_METHOD_MODE2;
case IP_VERSION(11, 0, 7):
case IP_VERSION(11, 0, 11):
@@ -567,41 +511,17 @@ static int nv_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
return 0;
}
-static void nv_pcie_gen3_enable(struct amdgpu_device *adev)
-{
- if (pci_is_root_bus(adev->pdev->bus))
- return;
-
- if (amdgpu_pcie_gen2 == 0)
- return;
-
- if (!(adev->pm.pcie_gen_mask & (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
- CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)))
- return;
-
- /* todo */
-}
-
static void nv_program_aspm(struct amdgpu_device *adev)
{
- if (!amdgpu_aspm)
+ if (!amdgpu_device_should_use_aspm(adev))
return;
- if (!(adev->flags & AMD_IS_APU) &&
- (adev->nbio.funcs->program_aspm))
+ if (adev->nbio.funcs->program_aspm)
adev->nbio.funcs->program_aspm(adev);
}
-static void nv_enable_doorbell_aperture(struct amdgpu_device *adev,
- bool enable)
-{
- adev->nbio.funcs->enable_doorbell_aperture(adev, enable);
- adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable);
-}
-
-const struct amdgpu_ip_block_version nv_common_ip_block =
-{
+const struct amdgpu_ip_block_version nv_common_ip_block = {
.type = AMD_IP_BLOCK_TYPE_COMMON,
.major = 1,
.minor = 0,
@@ -614,11 +534,6 @@ void nv_set_virt_ops(struct amdgpu_device *adev)
adev->virt.ops = &xgpu_nv_virt_ops;
}
-static uint32_t nv_get_rev_id(struct amdgpu_device *adev)
-{
- return adev->nbio.funcs->get_rev_id(adev);
-}
-
static bool nv_need_full_reset(struct amdgpu_device *adev)
{
return true;
@@ -641,16 +556,6 @@ static bool nv_need_reset_on_init(struct amdgpu_device *adev)
return false;
}
-static uint64_t nv_get_pcie_replay_count(struct amdgpu_device *adev)
-{
-
- /* TODO
- * dummy implement for pcie_replay_count sysfs interface
- * */
-
- return 0;
-}
-
static void nv_init_doorbell_index(struct amdgpu_device *adev)
{
adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ;
@@ -666,7 +571,12 @@ static void nv_init_doorbell_index(struct amdgpu_device *adev)
adev->doorbell_index.userqueue_end = AMDGPU_NAVI10_DOORBELL_USERQUEUE_END;
adev->doorbell_index.gfx_ring0 = AMDGPU_NAVI10_DOORBELL_GFX_RING0;
adev->doorbell_index.gfx_ring1 = AMDGPU_NAVI10_DOORBELL_GFX_RING1;
- adev->doorbell_index.mes_ring = AMDGPU_NAVI10_DOORBELL_MES_RING;
+ adev->doorbell_index.gfx_userqueue_start =
+ AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_START;
+ adev->doorbell_index.gfx_userqueue_end =
+ AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_END;
+ adev->doorbell_index.mes_ring0 = AMDGPU_NAVI10_DOORBELL_MES_RING0;
+ adev->doorbell_index.mes_ring1 = AMDGPU_NAVI10_DOORBELL_MES_RING1;
adev->doorbell_index.sdma_engine[0] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0;
adev->doorbell_index.sdma_engine[1] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1;
adev->doorbell_index.sdma_engine[2] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE2;
@@ -691,28 +601,26 @@ static int nv_update_umd_stable_pstate(struct amdgpu_device *adev,
bool enter)
{
if (enter)
- amdgpu_gfx_rlc_enter_safe_mode(adev);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
else
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
if (adev->gfx.funcs->update_perfmon_mgcg)
adev->gfx.funcs->update_perfmon_mgcg(adev, !enter);
- if (!(adev->flags & AMD_IS_APU) &&
- (adev->nbio.funcs->enable_aspm))
+ if (adev->nbio.funcs->enable_aspm &&
+ amdgpu_device_should_use_aspm(adev))
adev->nbio.funcs->enable_aspm(adev, !enter);
return 0;
}
-static const struct amdgpu_asic_funcs nv_asic_funcs =
-{
+static const struct amdgpu_asic_funcs nv_asic_funcs = {
.read_disabled_bios = &nv_read_disabled_bios,
- .read_bios_from_rom = &nv_read_bios_from_rom,
+ .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom,
.read_register = &nv_read_register,
.reset = &nv_asic_reset,
.reset_method = &nv_asic_reset_method,
- .set_vga_state = &nv_vga_set_state,
.get_xclk = &nv_get_xclk,
.set_uvd_clocks = &nv_set_uvd_clocks,
.set_vce_clocks = &nv_set_vce_clocks,
@@ -720,30 +628,26 @@ static const struct amdgpu_asic_funcs nv_asic_funcs =
.init_doorbell_index = &nv_init_doorbell_index,
.need_full_reset = &nv_need_full_reset,
.need_reset_on_init = &nv_need_reset_on_init,
- .get_pcie_replay_count = &nv_get_pcie_replay_count,
+ .get_pcie_replay_count = &amdgpu_nbio_get_pcie_replay_count,
.supports_baco = &amdgpu_dpm_is_baco_supported,
.pre_asic_init = &nv_pre_asic_init,
.update_umd_stable_pstate = &nv_update_umd_stable_pstate,
.query_video_codecs = &nv_query_video_codecs,
};
-static int nv_common_early_init(void *handle)
+static int nv_common_early_init(struct amdgpu_ip_block *ip_block)
{
-#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- if (!amdgpu_sriov_vf(adev)) {
- adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
- adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
- }
+ adev->nbio.funcs->set_reg_remap(adev);
adev->smc_rreg = NULL;
adev->smc_wreg = NULL;
- adev->pcie_rreg = &nv_pcie_rreg;
- adev->pcie_wreg = &nv_pcie_wreg;
- adev->pcie_rreg64 = &nv_pcie_rreg64;
- adev->pcie_wreg64 = &nv_pcie_wreg64;
- adev->pciep_rreg = &nv_pcie_port_rreg;
- adev->pciep_wreg = &nv_pcie_port_wreg;
+ adev->pcie_rreg = &amdgpu_device_indirect_rreg;
+ adev->pcie_wreg = &amdgpu_device_indirect_wreg;
+ adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64;
+ adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64;
+ adev->pciep_rreg = amdgpu_device_pcie_port_rreg;
+ adev->pciep_wreg = amdgpu_device_pcie_port_wreg;
/* TODO: will add them during VCN v2 implementation */
adev->uvd_ctx_rreg = NULL;
@@ -754,12 +658,12 @@ static int nv_common_early_init(void *handle)
adev->asic_funcs = &nv_asic_funcs;
- adev->rev_id = nv_get_rev_id(adev);
+ adev->rev_id = amdgpu_device_get_rev_id(adev);
adev->external_rev_id = 0xff;
/* TODO: split the GC and PG flags based on the relevant IP version for which
* they are relevant.
*/
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_CGCG |
@@ -953,7 +857,8 @@ static int nv_common_early_init(void *handle)
AMD_CG_SUPPORT_ATHUB_LS |
AMD_CG_SUPPORT_IH_CG |
AMD_CG_SUPPORT_VCN_MGCG |
- AMD_CG_SUPPORT_JPEG_MGCG;
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_SDMA_MGCG;
adev->pg_flags = AMD_PG_SUPPORT_GFX_PG |
AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_VCN_DPG |
@@ -964,10 +869,64 @@ static int nv_common_early_init(void *handle)
adev->external_rev_id = adev->rev_id + 0x01;
break;
case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
adev->cg_flags = 0;
adev->pg_flags = 0;
adev->external_rev_id = adev->rev_id + 0x82;
break;
+ case IP_VERSION(10, 3, 6):
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_GFX_RLC_LS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG;
+ adev->pg_flags = AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG;
+ adev->external_rev_id = adev->rev_id + 0x01;
+ break;
+ case IP_VERSION(10, 3, 7):
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_GFX_RLC_LS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_SDMA_MGCG;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_GFX_PG;
+ adev->external_rev_id = adev->rev_id + 0x01;
+ break;
default:
/* FIXME: not supported yet */
return -EINVAL;
@@ -986,23 +945,38 @@ static int nv_common_early_init(void *handle)
return 0;
}
-static int nv_common_late_init(void *handle)
+static int nv_common_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev)) {
xgpu_nv_mailbox_get_irq(adev);
- amdgpu_virt_update_sriov_video_codec(adev,
- sriov_sc_video_codecs_encode_array, ARRAY_SIZE(sriov_sc_video_codecs_encode_array),
- sriov_sc_video_codecs_decode_array, ARRAY_SIZE(sriov_sc_video_codecs_decode_array));
+ if (adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) {
+ amdgpu_virt_update_sriov_video_codec(adev,
+ sriov_sc_video_codecs_encode_array,
+ ARRAY_SIZE(sriov_sc_video_codecs_encode_array),
+ sriov_sc_video_codecs_decode_array_vcn1,
+ ARRAY_SIZE(sriov_sc_video_codecs_decode_array_vcn1));
+ } else {
+ amdgpu_virt_update_sriov_video_codec(adev,
+ sriov_sc_video_codecs_encode_array,
+ ARRAY_SIZE(sriov_sc_video_codecs_encode_array),
+ sriov_sc_video_codecs_decode_array_vcn0,
+ ARRAY_SIZE(sriov_sc_video_codecs_decode_array_vcn0));
+ }
}
+ /* Enable selfring doorbell aperture late because doorbell BAR
+ * aperture will change if resize BAR successfully in gmc sw_init.
+ */
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, true);
+
return 0;
}
-static int nv_common_sw_init(void *handle)
+static int nv_common_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
xgpu_nv_mailbox_add_irq_id(adev);
@@ -1010,14 +984,9 @@ static int nv_common_sw_init(void *handle)
return 0;
}
-static int nv_common_sw_fini(void *handle)
+static int nv_common_hw_init(struct amdgpu_ip_block *ip_block)
{
- return 0;
-}
-
-static int nv_common_hw_init(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (adev->nbio.funcs->apply_lc_spc_mode_wa)
adev->nbio.funcs->apply_lc_spc_mode_wa(adev);
@@ -1025,8 +994,6 @@ static int nv_common_hw_init(void *handle)
if (adev->nbio.funcs->apply_l1_link_width_reconfig_wa)
adev->nbio.funcs->apply_l1_link_width_reconfig_wa(adev);
- /* enable pcie gen2/3 link */
- nv_pcie_gen3_enable(adev);
/* enable aspm */
nv_program_aspm(adev);
/* setup nbio registers */
@@ -1038,59 +1005,50 @@ static int nv_common_hw_init(void *handle)
if (adev->nbio.funcs->remap_hdp_registers && !amdgpu_sriov_vf(adev))
adev->nbio.funcs->remap_hdp_registers(adev);
/* enable the doorbell aperture */
- nv_enable_doorbell_aperture(adev, true);
+ adev->nbio.funcs->enable_doorbell_aperture(adev, true);
return 0;
}
-static int nv_common_hw_fini(void *handle)
+static int nv_common_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- /* disable the doorbell aperture */
- nv_enable_doorbell_aperture(adev, false);
+ /* Disable the doorbell aperture and selfring doorbell aperture
+ * separately in hw_fini because nv_enable_doorbell_aperture
+ * has been removed and there is no need to delay disabling
+ * selfring doorbell.
+ */
+ adev->nbio.funcs->enable_doorbell_aperture(adev, false);
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false);
return 0;
}
-static int nv_common_suspend(void *handle)
+static int nv_common_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return nv_common_hw_fini(adev);
+ return nv_common_hw_fini(ip_block);
}
-static int nv_common_resume(void *handle)
+static int nv_common_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return nv_common_hw_init(adev);
+ return nv_common_hw_init(ip_block);
}
-static bool nv_common_is_idle(void *handle)
+static bool nv_common_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int nv_common_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int nv_common_soft_reset(void *handle)
-{
- return 0;
-}
-
-static int nv_common_set_clockgating_state(void *handle,
+static int nv_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[NBIO_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
case IP_VERSION(2, 3, 0):
case IP_VERSION(2, 3, 1):
case IP_VERSION(2, 3, 2):
@@ -1113,16 +1071,16 @@ static int nv_common_set_clockgating_state(void *handle,
return 0;
}
-static int nv_common_set_powergating_state(void *handle,
+static int nv_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* TODO */
return 0;
}
-static void nv_common_get_clockgating_state(void *handle, u32 *flags)
+static void nv_common_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
*flags = 0;
@@ -1132,8 +1090,6 @@ static void nv_common_get_clockgating_state(void *handle, u32 *flags)
adev->hdp.funcs->get_clock_gating_state(adev, flags);
adev->smuio.funcs->get_clock_gating_state(adev, flags);
-
- return;
}
static const struct amd_ip_funcs nv_common_ip_funcs = {
@@ -1141,14 +1097,11 @@ static const struct amd_ip_funcs nv_common_ip_funcs = {
.early_init = nv_common_early_init,
.late_init = nv_common_late_init,
.sw_init = nv_common_sw_init,
- .sw_fini = nv_common_sw_fini,
.hw_init = nv_common_hw_init,
.hw_fini = nv_common_hw_fini,
.suspend = nv_common_suspend,
.resume = nv_common_resume,
.is_idle = nv_common_is_idle,
- .wait_for_idle = nv_common_wait_for_idle,
- .soft_reset = nv_common_soft_reset,
.set_clockgating_state = nv_common_set_clockgating_state,
.set_powergating_state = nv_common_set_powergating_state,
.get_clockgating_state = nv_common_get_clockgating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h
index 83e9782aef39..8f4817404f10 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/nv.h
@@ -31,5 +31,6 @@ extern const struct amdgpu_ip_block_version nv_common_ip_block;
void nv_grbm_select(struct amdgpu_device *adev,
u32 me, u32 pipe, u32 queue, u32 vmid);
void nv_set_virt_ops(struct amdgpu_device *adev);
+int cyan_skillfish_reg_base_init(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nvd.h b/drivers/gpu/drm/amd/amdgpu/nvd.h
index fd6b58243b03..56f1bfac0b20 100644
--- a/drivers/gpu/drm/amd/amdgpu/nvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/nvd.h
@@ -64,6 +64,24 @@
#define PACKET3_INDIRECT_BUFFER_CNST_END 0x19
#define PACKET3_ATOMIC_GDS 0x1D
#define PACKET3_ATOMIC_MEM 0x1E
+#define PACKET3_ATOMIC_MEM__ATOMIC(x) ((((unsigned)(x)) & 0x7F) << 0)
+#define PACKET3_ATOMIC_MEM__COMMAND(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_ATOMIC_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_ATOMIC_MEM__ADDR_LO(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__ADDR_HI(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__SRC_DATA_LO(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__SRC_DATA_HI(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__CMP_DATA_LO(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__CMP_DATA_HI(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__LOOP_INTERVAL(x) ((((unsigned)(x)) & 0x1FFF) << 0)
+#define PACKET3_ATOMIC_MEM__COMMAND__SINGLE_PASS_ATOMIC 0
+#define PACKET3_ATOMIC_MEM__COMMAND__LOOP_UNTIL_COMPARE_SATISFIED 1
+#define PACKET3_ATOMIC_MEM__COMMAND__WAIT_FOR_WRITE_CONFIRMATION 2
+#define PACKET3_ATOMIC_MEM__COMMAND__SEND_AND_CONTINUE 3
+#define PACKET3_ATOMIC_MEM__CACHE_POLICY__LRU 0
+#define PACKET3_ATOMIC_MEM__CACHE_POLICY__STREAM 1
+#define PACKET3_ATOMIC_MEM__CACHE_POLICY__NOA 2
+#define PACKET3_ATOMIC_MEM__CACHE_POLICY__BYPASS 3
#define PACKET3_OCCLUSION_QUERY 0x1F
#define PACKET3_SET_PREDICATION 0x20
#define PACKET3_REG_RMW 0x21
@@ -105,6 +123,38 @@
* 1 - pfp
* 2 - ce
*/
+#define PACKET3_WRITE_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_WRITE_DATA__ADDR_INCR(x) ((((unsigned)(x)) & 0x1) << 16)
+#define PACKET3_WRITE_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20)
+#define PACKET3_WRITE_DATA__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_WRITE_DATA__DST_MMREG_ADDR(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_WRITE_DATA__DST_GDS_ADDR(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_WRITE_DATA__DST_MEM_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_WRITE_DATA__DST_MEM_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_WRITE_DATA__MODE(x) ((((unsigned)(x)) & 0x1) << 21)
+#define PACKET3_WRITE_DATA__AID_ID(x) ((((unsigned)(x)) & 0x3) << 22)
+#define PACKET3_WRITE_DATA__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 24)
+#define PACKET3_WRITE_DATA__DST_MMREG_ADDR_LO(x) ((unsigned)(x))
+#define PACKET3_WRITE_DATA__DST_MMREG_ADDR_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
+#define PACKET3_WRITE_DATA__DST_SEL__MEM_MAPPED_REGISTER 0
+#define PACKET3_WRITE_DATA__DST_SEL__TC_L2 2
+#define PACKET3_WRITE_DATA__DST_SEL__GDS 3
+#define PACKET3_WRITE_DATA__DST_SEL__MEMORY 5
+#define PACKET3_WRITE_DATA__DST_SEL__MEMORY_MAPPED_ADC_PERSISTENT_STATE 6
+#define PACKET3_WRITE_DATA__ADDR_INCR__INCREMENT_ADDRESS 0
+#define PACKET3_WRITE_DATA__ADDR_INCR__DO_NOT_INCREMENT_ADDRESS 1
+#define PACKET3_WRITE_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_WRITE_CONFIRMATION 0
+#define PACKET3_WRITE_DATA__WR_CONFIRM__WAIT_FOR_WRITE_CONFIRMATION 1
+#define PACKET3_WRITE_DATA__MODE__PF_VF_DISABLED 0
+#define PACKET3_WRITE_DATA__MODE__PF_VF_ENABLED 1
+#define PACKET3_WRITE_DATA__TEMPORAL__RT 0
+#define PACKET3_WRITE_DATA__TEMPORAL__NT 1
+#define PACKET3_WRITE_DATA__TEMPORAL__HT 2
+#define PACKET3_WRITE_DATA__TEMPORAL__LU 3
+#define PACKET3_WRITE_DATA__CACHE_POLICY__LRU 0
+#define PACKET3_WRITE_DATA__CACHE_POLICY__STREAM 1
+#define PACKET3_WRITE_DATA__CACHE_POLICY__NOA 2
+#define PACKET3_WRITE_DATA__CACHE_POLICY__BYPASS 3
#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
#define PACKET3_MEM_SEMAPHORE 0x39
# define PACKET3_SEM_USE_MAILBOX (0x1 << 16)
@@ -135,6 +185,42 @@
/* 0 - me
* 1 - pfp
*/
+#define PACKET3_WAIT_REG_MEM__FUNCTION(x) ((((unsigned)(x)) & 0x7) << 0)
+#define PACKET3_WAIT_REG_MEM__MEM_SPACE(x) ((((unsigned)(x)) & 0x3) << 4)
+#define PACKET3_WAIT_REG_MEM__OPERATION(x) ((((unsigned)(x)) & 0x3) << 6)
+#define PACKET3_WAIT_REG_MEM__MES_INTR_PIPE(x) ((((unsigned)(x)) & 0x3) << 22)
+#define PACKET3_WAIT_REG_MEM__MES_ACTION(x) ((((unsigned)(x)) & 0x1) << 24)
+#define PACKET3_WAIT_REG_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_WAIT_REG_MEM__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_WAIT_REG_MEM__REG_POLL_ADDR(x) ((((unsigned)(x)) & 0X3FFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR1(x) ((((unsigned)(x)) & 0X3FFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR2(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__REFERENCE(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__MASK(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__OPTIMIZE_ACE_OFFLOAD_MODE(x) ((((unsigned)(x)) & 0x1) << 31)
+#define PACKET3_WAIT_REG_MEM__FUNCTION__ALWAYS_PASS 0
+#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_REF_VALUE 1
+#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_EQUAL_TO_THE_REF_VALUE 2
+#define PACKET3_WAIT_REG_MEM__FUNCTION__EQUAL_TO_THE_REFERENCE_VALUE 3
+#define PACKET3_WAIT_REG_MEM__FUNCTION__NOT_EQUAL_REFERENCE_VALUE 4
+#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_OR_EQUAL_REFERENCE_VALUE 5
+#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_REFERENCE_VALUE 6
+#define PACKET3_WAIT_REG_MEM__MEM_SPACE__REGISTER_SPACE 0
+#define PACKET3_WAIT_REG_MEM__MEM_SPACE__MEMORY_SPACE 1
+#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_REG_MEM 0
+#define PACKET3_WAIT_REG_MEM__OPERATION__WR_WAIT_WR_REG 1
+#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_MEM_PREEMPTABLE 3
+#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__LRU 0
+#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__STREAM 1
+#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__NOA 2
+#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__BYPASS 3
+#define PACKET3_WAIT_REG_MEM__TEMPORAL__RT 0
+#define PACKET3_WAIT_REG_MEM__TEMPORAL__NT 1
+#define PACKET3_WAIT_REG_MEM__TEMPORAL__HT 2
+#define PACKET3_WAIT_REG_MEM__TEMPORAL__LU 3
#define PACKET3_INDIRECT_BUFFER 0x3F
#define INDIRECT_BUFFER_VALID (1 << 23)
#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28)
@@ -144,8 +230,94 @@
*/
#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21)
#define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30)
+#define PACKET3_INDIRECT_BUFFER__IB_BASE_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_INDIRECT_BUFFER__IB_BASE_HI(x) ((unsigned)(x))
+#define PACKET3_INDIRECT_BUFFER__IB_SIZE(x) ((((unsigned)(x)) & 0xFFFFF) << 0)
+#define PACKET3_INDIRECT_BUFFER__CHAIN(x) ((((unsigned)(x)) & 0x1) << 20)
+#define PACKET3_INDIRECT_BUFFER__OFFLOAD_POLLING(x) ((((unsigned)(x)) & 0x1) << 21)
+#define PACKET3_INDIRECT_BUFFER__VALID(x) ((((unsigned)(x)) & 0x1) << 23)
+#define PACKET3_INDIRECT_BUFFER__VMID(x) ((((unsigned)(x)) & 0xF) << 24)
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 28)
+#define PACKET3_INDIRECT_BUFFER__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 28)
+#define PACKET3_INDIRECT_BUFFER__PRIV(x) ((((unsigned)(x)) & 0x1) << 31)
+#define PACKET3_INDIRECT_BUFFER__TEMPORAL__RT 0
+#define PACKET3_INDIRECT_BUFFER__TEMPORAL__NT 1
+#define PACKET3_INDIRECT_BUFFER__TEMPORAL__HT 2
+#define PACKET3_INDIRECT_BUFFER__TEMPORAL__LU 3
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__LRU 0
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__STREAM 1
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__NOA 2
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__BYPASS 3
#define PACKET3_COND_INDIRECT_BUFFER 0x3F
#define PACKET3_COPY_DATA 0x40
+#define PACKET3_COPY_DATA__SRC_SEL(x) ((((unsigned)(x)) & 0xF) << 0)
+#define PACKET3_COPY_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 13)
+#define PACKET3_COPY_DATA__SRC_TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 13)
+#define PACKET3_COPY_DATA__COUNT_SEL(x) ((((unsigned)(x)) & 0x1) << 16)
+#define PACKET3_COPY_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20)
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_COPY_DATA__PQ_EXE_STATUS(x) ((((unsigned)(x)) & 0x1) << 29)
+#define PACKET3_COPY_DATA__SRC_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_COPY_DATA__SRC_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_COPY_DATA__SRC_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_COPY_DATA__SRC_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_COPY_DATA__IMM_DATA(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_MEMTC_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_IMM_DATA(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__DST_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_COPY_DATA__DST_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_COPY_DATA__DST_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_COPY_DATA__DST_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_COPY_DATA__DST_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__MODE(x) ((((unsigned)(x)) & 0x1) << 21)
+#define PACKET3_COPY_DATA__AID_ID(x) ((((unsigned)(x)) & 0x3) << 23)
+#define PACKET3_COPY_DATA__DST_TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_COPY_DATA__SRC_REG_OFFSET_LO(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_REG_OFFSET_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
+#define PACKET3_COPY_DATA__DST_REG_OFFSET_LO(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__DST_REG_OFFSET_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
+#define PACKET3_COPY_DATA__SRC_SEL__MEM_MAPPED_REGISTER 0
+#define PACKET3_COPY_DATA__SRC_SEL__TC_L2_OBSOLETE 1
+#define PACKET3_COPY_DATA__SRC_SEL__TC_L2 2
+#define PACKET3_COPY_DATA__SRC_SEL__GDS 3
+#define PACKET3_COPY_DATA__SRC_SEL__PERFCOUNTERS 4
+#define PACKET3_COPY_DATA__SRC_SEL__IMMEDIATE_DATA 5
+#define PACKET3_COPY_DATA__SRC_SEL__ATOMIC_RETURN_DATA 6
+#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA0 7
+#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA1 8
+#define PACKET3_COPY_DATA__SRC_SEL__GPU_CLOCK_COUNT 9
+#define PACKET3_COPY_DATA__SRC_SEL__SYSTEM_CLOCK_COUNT 10
+#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REGISTER 0
+#define PACKET3_COPY_DATA__DST_SEL__TC_L2 2
+#define PACKET3_COPY_DATA__DST_SEL__GDS 3
+#define PACKET3_COPY_DATA__DST_SEL__PERFCOUNTERS 4
+#define PACKET3_COPY_DATA__DST_SEL__TC_L2_OBSOLETE 5
+#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REG_DC 6
+#define PACKET3_COPY_DATA__SRC_TEMPORAL__RT 0
+#define PACKET3_COPY_DATA__SRC_TEMPORAL__NT 1
+#define PACKET3_COPY_DATA__SRC_TEMPORAL__HT 2
+#define PACKET3_COPY_DATA__SRC_TEMPORAL__LU 3
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__LRU 0
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__STREAM 1
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__NOA 2
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__BYPASS 3
+#define PACKET3_COPY_DATA__COUNT_SEL__32_BITS_OF_DATA 0
+#define PACKET3_COPY_DATA__COUNT_SEL__64_BITS_OF_DATA 1
+#define PACKET3_COPY_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_CONFIRMATION 0
+#define PACKET3_COPY_DATA__WR_CONFIRM__WAIT_FOR_CONFIRMATION 1
+#define PACKET3_COPY_DATA__MODE__PF_VF_DISABLED 0
+#define PACKET3_COPY_DATA__MODE__PF_VF_ENABLED 1
+#define PACKET3_COPY_DATA__DST_TEMPORAL__RT 0
+#define PACKET3_COPY_DATA__DST_TEMPORAL__NT 1
+#define PACKET3_COPY_DATA__DST_TEMPORAL__HT 2
+#define PACKET3_COPY_DATA__DST_TEMPORAL__LU 3
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY__LRU 0
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY__STREAM 1
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY__NOA 2
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY__BYPASS 3
+#define PACKET3_COPY_DATA__PQ_EXE_STATUS__DEFAULT 0
+#define PACKET3_COPY_DATA__PQ_EXE_STATUS__PHASE_UPDATE 1
#define PACKET3_CP_DMA 0x41
#define PACKET3_PFP_SYNC_ME 0x42
#define PACKET3_SURFACE_SYNC 0x43
@@ -160,6 +332,23 @@
* 3 - SAMPLE_STREAMOUTSTAT*
* 4 - *S_PARTIAL_FLUSH
*/
+#define PACKET3_EVENT_WRITE__EVENT_TYPE(x) ((((unsigned)(x)) & 0x3F) << 0)
+#define PACKET3_EVENT_WRITE__EVENT_INDEX(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE(x) ((((unsigned)(x)) & 0x3) << 29)
+#define PACKET3_EVENT_WRITE__OFFLOAD_ENABLE(x) ((((unsigned)(x)) & 0x1) << 0)
+#define PACKET3_EVENT_WRITE__ADDRESS_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_EVENT_WRITE__ADDRESS_HI(x) ((unsigned)(x))
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__OTHER 0
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_PIPELINESTAT 2
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__CS_PARTIAL_FLUSH 4
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS 8
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS1 9
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS2 10
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS3 11
+#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__LEGACY_MODE 0
+#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__MIXED_MODE1 1
+#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__NEW_MODE 2
+#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__MIXED_MODE3 3
#define PACKET3_EVENT_WRITE_EOP 0x47
#define PACKET3_EVENT_WRITE_EOS 0x48
#define PACKET3_RELEASE_MEM 0x49
@@ -304,6 +493,12 @@
* 2: REVERSE
*/
#define PACKET3_ACQUIRE_MEM_GCR_RANGE_IS_PA (1 << 18)
+#define PACKET3_ACQUIRE_MEM__COHER_SIZE(x) ((unsigned)(x))
+#define PACKET3_ACQUIRE_MEM__COHER_SIZE_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
+#define PACKET3_ACQUIRE_MEM__COHER_BASE_LO(x) ((unsigned)(x))
+#define PACKET3_ACQUIRE_MEM__COHER_BASE_HI(x) ((((unsigned)(x)) & 0xFFFFFF) << 0)
+#define PACKET3_ACQUIRE_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_ACQUIRE_MEM__GCR_CNTL(x) ((((unsigned)(x)) & 0x7FFFF) << 0)
#define PACKET3_REWIND 0x59
#define PACKET3_INTERRUPT 0x5A
#define PACKET3_GEN_PDEPTE 0x5B
@@ -330,11 +525,17 @@
#define PACKET3_SET_SH_REG 0x76
#define PACKET3_SET_SH_REG_START 0x00002c00
#define PACKET3_SET_SH_REG_END 0x00003000
+#define PACKET3_SET_SH_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_SET_SH_REG__VMID_SHIFT(x) ((((unsigned)(x)) & 0x1F) << 23)
+#define PACKET3_SET_SH_REG__INDEX(x) ((((unsigned)(x)) & 0xF) << 28)
+#define PACKET3_SET_SH_REG__INDEX__DEFAULT 0
+#define PACKET3_SET_SH_REG__INDEX__INSERT_VMID 1
#define PACKET3_SET_SH_REG_OFFSET 0x77
#define PACKET3_SET_QUEUE_REG 0x78
#define PACKET3_SET_UCONFIG_REG 0x79
#define PACKET3_SET_UCONFIG_REG_START 0x0000c000
#define PACKET3_SET_UCONFIG_REG_END 0x0000c400
+#define PACKET3_SET_UCONFIG_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0)
#define PACKET3_SET_UCONFIG_REG_INDEX 0x7A
#define PACKET3_FORWARD_HEADER 0x7C
#define PACKET3_SCRATCH_RAM_WRITE 0x7D
@@ -369,6 +570,7 @@
# define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0)
# define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4)
# define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5)
+# define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29)
#define PACKET3_AQL_PACKET 0x99
#define PACKET3_DMA_DATA_FILL_MULTI 0x9A
#define PACKET3_SET_SH_REG_INDEX 0x9B
@@ -463,5 +665,14 @@
#define PACKET3_RUN_LIST 0xA5
#define PACKET3_MAP_PROCESS_VM 0xA6
+#define PACKET3_RUN_CLEANER_SHADER 0xD2
+/* 1. header
+ * 2. RESERVED [31:0]
+ */
+
+/* GFX11 */
+#define PACKET3_SET_Q_PREEMPTION_MODE 0xF0
+# define PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(x) ((x) << 0)
+# define PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM (1 << 0)
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index dd0dce254901..73f87131a7e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -102,6 +102,13 @@ enum psp_gfx_cmd_id
GFX_CMD_ID_LOAD_TOC = 0x00000020, /* Load TOC and obtain TMR size */
GFX_CMD_ID_AUTOLOAD_RLC = 0x00000021, /* Indicates all graphics fw loaded, start RLC autoload */
GFX_CMD_ID_BOOT_CFG = 0x00000022, /* Boot Config */
+ GFX_CMD_ID_SRIOV_SPATIAL_PART = 0x00000027, /* Configure spatial partitioning mode */
+ /*IDs of performance monitoring/profiling*/
+ GFX_CMD_ID_CONFIG_SQ_PERFMON = 0x00000046, /* Config CGTT_SQ_CLK_CTRL */
+ /* Dynamic memory partitioninig (NPS mode change)*/
+ GFX_CMD_ID_FB_NPS_MODE = 0x00000048, /* Configure memory partitioning mode */
+ GFX_CMD_ID_FB_FW_RESERV_ADDR = 0x00000050, /* Query FW reservation addr */
+ GFX_CMD_ID_FB_FW_RESERV_EXT_ADDR = 0x00000051, /* Query FW reservation extended addr */
};
/* PSP boot config sub-commands */
@@ -258,7 +265,45 @@ enum psp_gfx_fw_type {
GFX_FW_TYPE_SDMA6 = 56, /* SDMA6 MI */
GFX_FW_TYPE_SDMA7 = 57, /* SDMA7 MI */
GFX_FW_TYPE_VCN1 = 58, /* VCN1 MI */
+ GFX_FW_TYPE_CAP = 62, /* CAP_FW */
+ GFX_FW_TYPE_SE2_TAP_DELAYS = 65, /* SE2 TAP DELAYS NV */
+ GFX_FW_TYPE_SE3_TAP_DELAYS = 66, /* SE3 TAP DELAYS NV */
GFX_FW_TYPE_REG_LIST = 67, /* REG_LIST MI */
+ GFX_FW_TYPE_IMU_I = 68, /* IMU Instruction FW SOC21 */
+ GFX_FW_TYPE_IMU_D = 69, /* IMU Data FW SOC21 */
+ GFX_FW_TYPE_LSDMA = 70, /* LSDMA FW SOC21 */
+ GFX_FW_TYPE_SDMA_UCODE_TH0 = 71, /* SDMA Thread 0/CTX SOC21 */
+ GFX_FW_TYPE_SDMA_UCODE_TH1 = 72, /* SDMA Thread 1/CTL SOC21 */
+ GFX_FW_TYPE_PPTABLE = 73, /* PPTABLE SOC21 */
+ GFX_FW_TYPE_DISCRETE_USB4 = 74, /* dUSB4 FW SOC21 */
+ GFX_FW_TYPE_TA = 75, /* SRIOV TA FW UUID SOC21 */
+ GFX_FW_TYPE_RS64_MES = 76, /* RS64 MES ucode SOC21 */
+ GFX_FW_TYPE_RS64_MES_STACK = 77, /* RS64 MES stack ucode SOC21 */
+ GFX_FW_TYPE_RS64_KIQ = 78, /* RS64 KIQ ucode SOC21 */
+ GFX_FW_TYPE_RS64_KIQ_STACK = 79, /* RS64 KIQ Heap stack SOC21 */
+ GFX_FW_TYPE_ISP_DATA = 80, /* ISP DATA SOC21 */
+ GFX_FW_TYPE_CP_MES_KIQ = 81, /* MES KIQ ucode SOC21 */
+ GFX_FW_TYPE_MES_KIQ_STACK = 82, /* MES KIQ stack SOC21 */
+ GFX_FW_TYPE_UMSCH_DATA = 83, /* User Mode Scheduler Data SOC21 */
+ GFX_FW_TYPE_UMSCH_UCODE = 84, /* User Mode Scheduler Ucode SOC21 */
+ GFX_FW_TYPE_UMSCH_CMD_BUFFER = 85, /* User Mode Scheduler Command Buffer SOC21 */
+ GFX_FW_TYPE_USB_DP_COMBO_PHY = 86, /* USB-Display port Combo SOC21 */
+ GFX_FW_TYPE_RS64_PFP = 87, /* RS64 PFP SOC21 */
+ GFX_FW_TYPE_RS64_ME = 88, /* RS64 ME SOC21 */
+ GFX_FW_TYPE_RS64_MEC = 89, /* RS64 MEC SOC21 */
+ GFX_FW_TYPE_RS64_PFP_P0_STACK = 90, /* RS64 PFP stack P0 SOC21 */
+ GFX_FW_TYPE_RS64_PFP_P1_STACK = 91, /* RS64 PFP stack P1 SOC21 */
+ GFX_FW_TYPE_RS64_ME_P0_STACK = 92, /* RS64 ME stack P0 SOC21 */
+ GFX_FW_TYPE_RS64_ME_P1_STACK = 93, /* RS64 ME stack P1 SOC21 */
+ GFX_FW_TYPE_RS64_MEC_P0_STACK = 94, /* RS64 MEC stack P0 SOC21 */
+ GFX_FW_TYPE_RS64_MEC_P1_STACK = 95, /* RS64 MEC stack P1 SOC21 */
+ GFX_FW_TYPE_RS64_MEC_P2_STACK = 96, /* RS64 MEC stack P2 SOC21 */
+ GFX_FW_TYPE_RS64_MEC_P3_STACK = 97, /* RS64 MEC stack P3 SOC21 */
+ GFX_FW_TYPE_VPEC_FW1 = 100, /* VPEC FW1 To Save VPE */
+ GFX_FW_TYPE_VPEC_FW2 = 101, /* VPEC FW2 To Save VPE */
+ GFX_FW_TYPE_VPE = 102,
+ GFX_FW_TYPE_JPEG_RAM = 128, /**< JPEG Command buffer */
+ GFX_FW_TYPE_P2S_TABLE = 129,
GFX_FW_TYPE_MAX
};
@@ -305,6 +350,27 @@ struct psp_gfx_cmd_boot_cfg
uint32_t boot_config_valid; /* dynamic boot configuration valid bits bitmask */
};
+struct psp_gfx_cmd_sriov_spatial_part {
+ uint32_t mode;
+ uint32_t override_ips;
+ uint32_t override_xcds_avail;
+ uint32_t override_this_aid;
+};
+
+/*Structure for sq performance monitoring/profiling enable/disable*/
+struct psp_gfx_cmd_config_sq_perfmon {
+ uint32_t gfx_xcp_mask;
+ uint8_t core_override;
+ uint8_t reg_override;
+ uint8_t perfmon_override;
+ uint8_t reserved[5];
+};
+
+struct psp_gfx_cmd_fb_memory_part {
+ uint32_t mode; /* requested NPS mode */
+ uint32_t resvd;
+};
+
/* All GFX ring buffer commands. */
union psp_gfx_commands
{
@@ -318,6 +384,9 @@ union psp_gfx_commands
struct psp_gfx_cmd_setup_tmr cmd_setup_vmr;
struct psp_gfx_cmd_load_toc cmd_load_toc;
struct psp_gfx_cmd_boot_cfg boot_cfg;
+ struct psp_gfx_cmd_sriov_spatial_part cmd_spatial_part;
+ struct psp_gfx_cmd_config_sq_perfmon config_sq_perfmon;
+ struct psp_gfx_cmd_fb_memory_part cmd_memory_part;
};
struct psp_gfx_uresp_reserved
@@ -337,11 +406,19 @@ struct psp_gfx_uresp_bootcfg {
uint32_t boot_cfg; /* boot config data */
};
+/* Command-specific response for fw reserve info */
+struct psp_gfx_uresp_fw_reserve_info {
+ uint32_t reserve_base_address_hi;
+ uint32_t reserve_base_address_lo;
+ uint32_t reserve_size;
+};
+
/* Union of command-specific responses for GPCOM ring. */
union psp_gfx_uresp {
struct psp_gfx_uresp_reserved reserved;
struct psp_gfx_uresp_bootcfg boot_cfg;
struct psp_gfx_uresp_fwar_db_info fwar_db_info;
+ struct psp_gfx_uresp_fw_reserve_info fw_reserve_info;
};
/* Structure of GFX Response buffer.
@@ -417,8 +494,9 @@ struct psp_gfx_rb_frame
#define PSP_ERR_UNKNOWN_COMMAND 0x00000100
enum tee_error_code {
- TEE_SUCCESS = 0x00000000,
- TEE_ERROR_NOT_SUPPORTED = 0xFFFF000A,
+ TEE_SUCCESS = 0x00000000,
+ TEE_ERROR_CANCEL = 0xFFFF0002,
+ TEE_ERROR_NOT_SUPPORTED = 0xFFFF000A,
};
#endif /* _PSP_TEE_GFX_IF_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
index ed2293686f0d..3584b8c18fd9 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
@@ -47,111 +47,26 @@ MODULE_FIRMWARE("amdgpu/raven_ta.bin");
static int psp_v10_0_init_microcode(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
- const char *chip_name;
- char fw_name[30];
+ char ucode_prefix[30];
int err = 0;
- const struct ta_firmware_header_v1_0 *ta_hdr;
DRM_DEBUG("\n");
- switch (adev->asic_type) {
- case CHIP_RAVEN:
- if (adev->apu_flags & AMD_APU_IS_RAVEN2)
- chip_name = "raven2";
- else if (adev->apu_flags & AMD_APU_IS_PICASSO)
- chip_name = "picasso";
- else
- chip_name = "raven";
- break;
- default: BUG();
- }
+ amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
- err = psp_init_asd_microcode(psp, chip_name);
+ err = psp_init_asd_microcode(psp, ucode_prefix);
if (err)
- goto out;
-
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
- err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
- if (err) {
- release_firmware(adev->psp.ta_fw);
- adev->psp.ta_fw = NULL;
- dev_info(adev->dev,
- "psp v10.0: Failed to load firmware \"%s\"\n",
- fw_name);
- } else {
- err = amdgpu_ucode_validate(adev->psp.ta_fw);
- if (err)
- goto out2;
-
- ta_hdr = (const struct ta_firmware_header_v1_0 *)
- adev->psp.ta_fw->data;
- adev->psp.hdcp_context.context.bin_desc.fw_version =
- le32_to_cpu(ta_hdr->hdcp.fw_version);
- adev->psp.hdcp_context.context.bin_desc.size_bytes =
- le32_to_cpu(ta_hdr->hdcp.size_bytes);
- adev->psp.hdcp_context.context.bin_desc.start_addr =
- (uint8_t *)ta_hdr +
- le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
-
- adev->psp.dtm_context.context.bin_desc.fw_version =
- le32_to_cpu(ta_hdr->dtm.fw_version);
- adev->psp.dtm_context.context.bin_desc.size_bytes =
- le32_to_cpu(ta_hdr->dtm.size_bytes);
- adev->psp.dtm_context.context.bin_desc.start_addr =
- (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr +
- le32_to_cpu(ta_hdr->dtm.offset_bytes);
-
- adev->psp.securedisplay_context.context.bin_desc.fw_version =
- le32_to_cpu(ta_hdr->securedisplay.fw_version);
- adev->psp.securedisplay_context.context.bin_desc.size_bytes =
- le32_to_cpu(ta_hdr->securedisplay.size_bytes);
- adev->psp.securedisplay_context.context.bin_desc.start_addr =
- (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr +
- le32_to_cpu(ta_hdr->securedisplay.offset_bytes);
-
- adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
+ return err;
+
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0)) &&
+ (adev->pdev->revision == 0xa1) &&
+ (psp->securedisplay_context.context.bin_desc.fw_version >=
+ 0x27000008)) {
+ adev->psp.securedisplay_context.context.bin_desc.size_bytes = 0;
}
-
- return 0;
-
-out2:
- release_firmware(adev->psp.ta_fw);
- adev->psp.ta_fw = NULL;
-out:
- if (err) {
- dev_err(adev->dev,
- "psp v10.0: Failed to load firmware \"%s\"\n",
- fw_name);
- }
-
return err;
}
-static int psp_v10_0_ring_init(struct psp_context *psp,
- enum psp_ring_type ring_type)
-{
- int ret = 0;
- struct psp_ring *ring;
- struct amdgpu_device *adev = psp->adev;
-
- ring = &psp->km_ring;
-
- ring->ring_type = ring_type;
-
- /* allocate 4k Page of Local Frame Buffer memory for ring */
- ring->ring_size = 0x1000;
- ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->firmware.rbuf,
- &ring->ring_mem_mc_addr,
- (void **)&ring->ring_mem);
- if (ret) {
- ring->ring_size = 0;
- return ret;
- }
-
- return 0;
-}
-
static int psp_v10_0_ring_create(struct psp_context *psp,
enum psp_ring_type ring_type)
{
@@ -179,7 +94,7 @@ static int psp_v10_0_ring_create(struct psp_context *psp,
/* Wait for response flag (bit 31) in C2PMSG_64 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
return ret;
}
@@ -200,7 +115,7 @@ static int psp_v10_0_ring_stop(struct psp_context *psp,
/* Wait for response flag (bit 31) in C2PMSG_64 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
return ret;
}
@@ -245,7 +160,6 @@ static void psp_v10_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
static const struct psp_funcs psp_v10_0_funcs = {
.init_microcode = psp_v10_0_init_microcode,
- .ring_init = psp_v10_0_ring_init,
.ring_create = psp_v10_0_ring_create,
.ring_stop = psp_v10_0_ring_stop,
.ring_destroy = psp_v10_0_ring_destroy,
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index 2176ef85f137..a9be7a505026 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -53,11 +53,13 @@ MODULE_FIRMWARE("amdgpu/navi14_ta.bin");
MODULE_FIRMWARE("amdgpu/navi12_sos.bin");
MODULE_FIRMWARE("amdgpu/navi12_asd.bin");
MODULE_FIRMWARE("amdgpu/navi12_ta.bin");
+MODULE_FIRMWARE("amdgpu/navi12_cap.bin");
MODULE_FIRMWARE("amdgpu/arcturus_sos.bin");
MODULE_FIRMWARE("amdgpu/arcturus_asd.bin");
MODULE_FIRMWARE("amdgpu/arcturus_ta.bin");
MODULE_FIRMWARE("amdgpu/sienna_cichlid_sos.bin");
MODULE_FIRMWARE("amdgpu/sienna_cichlid_ta.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_cap.bin");
MODULE_FIRMWARE("amdgpu/navy_flounder_sos.bin");
MODULE_FIRMWARE("amdgpu/navy_flounder_ta.bin");
MODULE_FIRMWARE("amdgpu/vangogh_asd.bin");
@@ -86,179 +88,97 @@ MODULE_FIRMWARE("amdgpu/beige_goby_ta.bin");
static int psp_v11_0_init_microcode(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
- const char *chip_name;
- char fw_name[PSP_FW_NAME_LEN];
+ char ucode_prefix[30];
int err = 0;
- const struct ta_firmware_header_v1_0 *ta_hdr;
DRM_DEBUG("\n");
- switch (adev->ip_versions[MP0_HWIP][0]) {
- case IP_VERSION(11, 0, 2):
- chip_name = "vega20";
- break;
- case IP_VERSION(11, 0, 0):
- chip_name = "navi10";
- break;
- case IP_VERSION(11, 0, 5):
- chip_name = "navi14";
- break;
- case IP_VERSION(11, 0, 9):
- chip_name = "navi12";
- break;
- case IP_VERSION(11, 0, 4):
- chip_name = "arcturus";
- break;
- case IP_VERSION(11, 0, 7):
- chip_name = "sienna_cichlid";
- break;
- case IP_VERSION(11, 0, 11):
- chip_name = "navy_flounder";
- break;
- case IP_VERSION(11, 5, 0):
- chip_name = "vangogh";
- break;
- case IP_VERSION(11, 0, 12):
- chip_name = "dimgrey_cavefish";
- break;
- case IP_VERSION(11, 0, 13):
- chip_name = "beige_goby";
- break;
- default:
- BUG();
- }
-
+ amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
- switch (adev->ip_versions[MP0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 4):
- err = psp_init_sos_microcode(psp, chip_name);
+ err = psp_init_sos_microcode(psp, ucode_prefix);
if (err)
return err;
- err = psp_init_asd_microcode(psp, chip_name);
+ err = psp_init_asd_microcode(psp, ucode_prefix);
if (err)
return err;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
- err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
- if (err) {
- release_firmware(adev->psp.ta_fw);
- adev->psp.ta_fw = NULL;
- dev_info(adev->dev,
- "psp v11.0: Failed to load firmware \"%s\"\n", fw_name);
- } else {
- err = amdgpu_ucode_validate(adev->psp.ta_fw);
- if (err)
- goto out2;
-
- ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data;
- adev->psp.xgmi_context.context.bin_desc.fw_version =
- le32_to_cpu(ta_hdr->xgmi.fw_version);
- adev->psp.xgmi_context.context.bin_desc.size_bytes =
- le32_to_cpu(ta_hdr->xgmi.size_bytes);
- adev->psp.xgmi_context.context.bin_desc.start_addr =
- (uint8_t *)ta_hdr +
- le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
- adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
- adev->psp.ras_context.context.bin_desc.fw_version =
- le32_to_cpu(ta_hdr->ras.fw_version);
- adev->psp.ras_context.context.bin_desc.size_bytes =
- le32_to_cpu(ta_hdr->ras.size_bytes);
- adev->psp.ras_context.context.bin_desc.start_addr =
- (uint8_t *)adev->psp.xgmi_context.context.bin_desc.start_addr +
- le32_to_cpu(ta_hdr->ras.offset_bytes);
- }
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ adev->psp.securedisplay_context.context.bin_desc.size_bytes = 0;
break;
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 5):
case IP_VERSION(11, 0, 9):
- err = psp_init_sos_microcode(psp, chip_name);
+ err = psp_init_sos_microcode(psp, ucode_prefix);
if (err)
return err;
- err = psp_init_asd_microcode(psp, chip_name);
+ err = psp_init_asd_microcode(psp, ucode_prefix);
if (err)
return err;
- if (amdgpu_sriov_vf(adev))
- break;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
- err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
- if (err) {
- release_firmware(adev->psp.ta_fw);
- adev->psp.ta_fw = NULL;
- dev_info(adev->dev,
- "psp v11.0: Failed to load firmware \"%s\"\n", fw_name);
- } else {
- err = amdgpu_ucode_validate(adev->psp.ta_fw);
- if (err)
- goto out2;
-
- ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data;
- adev->psp.hdcp_context.context.bin_desc.fw_version =
- le32_to_cpu(ta_hdr->hdcp.fw_version);
- adev->psp.hdcp_context.context.bin_desc.size_bytes =
- le32_to_cpu(ta_hdr->hdcp.size_bytes);
- adev->psp.hdcp_context.context.bin_desc.start_addr =
- (uint8_t *)ta_hdr +
- le32_to_cpu(
- ta_hdr->header.ucode_array_offset_bytes);
-
- adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
-
- adev->psp.dtm_context.context.bin_desc.fw_version =
- le32_to_cpu(ta_hdr->dtm.fw_version);
- adev->psp.dtm_context.context.bin_desc.size_bytes =
- le32_to_cpu(ta_hdr->dtm.size_bytes);
- adev->psp.dtm_context.context.bin_desc.start_addr =
- (uint8_t *)adev->psp.hdcp_context.context
- .bin_desc.start_addr +
- le32_to_cpu(ta_hdr->dtm.offset_bytes);
- }
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ adev->psp.securedisplay_context.context.bin_desc.size_bytes = 0;
break;
case IP_VERSION(11, 0, 7):
case IP_VERSION(11, 0, 11):
case IP_VERSION(11, 0, 12):
case IP_VERSION(11, 0, 13):
- err = psp_init_sos_microcode(psp, chip_name);
- if (err)
- return err;
- err = psp_init_ta_microcode(psp, chip_name);
+ err = psp_init_sos_microcode(psp, ucode_prefix);
if (err)
return err;
+ err = psp_init_ta_microcode(psp, ucode_prefix);
break;
case IP_VERSION(11, 5, 0):
- err = psp_init_asd_microcode(psp, chip_name);
- if (err)
- return err;
- err = psp_init_toc_microcode(psp, chip_name);
+ case IP_VERSION(11, 5, 2):
+ err = psp_init_asd_microcode(psp, ucode_prefix);
if (err)
return err;
+ err = psp_init_toc_microcode(psp, ucode_prefix);
break;
default:
BUG();
}
- return 0;
-
-out2:
- release_firmware(adev->psp.ta_fw);
- adev->psp.ta_fw = NULL;
return err;
}
-static int psp_v11_0_wait_for_bootloader(struct psp_context *psp)
+static int psp_v11_wait_for_tos_unload(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
+ uint32_t sol_reg1, sol_reg2;
+ int retry_loop;
+
+ /* Wait for the TOS to be unloaded */
+ for (retry_loop = 0; retry_loop < 20; retry_loop++) {
+ sol_reg1 = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
+ usleep_range(1000, 2000);
+ sol_reg2 = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
+ if (sol_reg1 == sol_reg2)
+ return 0;
+ }
+ dev_err(adev->dev, "TOS unload failed, C2PMSG_33: %x C2PMSG_81: %x",
+ RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_33),
+ RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81));
+ return -ETIME;
+}
+
+static int psp_v11_0_wait_for_bootloader(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
int ret;
int retry_loop;
- for (retry_loop = 0; retry_loop < 10; retry_loop++) {
+ /* For a reset done at the end of S3, only wait for TOS to be unloaded */
+ if (adev->in_s3 && !(adev->flags & AMD_IS_APU) && amdgpu_in_reset(adev))
+ return psp_v11_wait_for_tos_unload(psp);
+
+ for (retry_loop = 0; retry_loop < 20; retry_loop++) {
/* Wait for bootloader to signify that is
ready having bit 31 of C2PMSG_35 set to 1 */
- ret = psp_wait_for(psp,
- SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000,
- 0x80000000,
- false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x8000FFFF, PSP_WAITREG_NOVERBOSE);
if (ret == 0)
return 0;
@@ -277,13 +197,15 @@ static bool psp_v11_0_is_sos_alive(struct psp_context *psp)
return sol_reg != 0x0;
}
-static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp)
+static int psp_v11_0_bootloader_load_component(struct psp_context *psp,
+ struct psp_bin_desc *bin_desc,
+ enum psp_bootloader_cmd bl_cmd)
{
int ret;
uint32_t psp_gfxdrv_command_reg = 0;
struct amdgpu_device *adev = psp->adev;
- /* Check tOS sign of life register to confirm sys driver and sOS
+ /* Check sOS sign of life register to confirm sys driver and sOS
* are already been loaded.
*/
if (psp_v11_0_is_sos_alive(psp))
@@ -293,13 +215,13 @@ static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp)
if (ret)
return ret;
- /* Copy PSP KDB binary to memory */
- psp_copy_fw(psp, psp->kdb.start_addr, psp->kdb.size_bytes);
+ /* Copy PSP System Driver binary to memory */
+ psp_copy_fw(psp, bin_desc->start_addr, bin_desc->size_bytes);
- /* Provide the PSP KDB to bootloader */
+ /* Provide the sys driver to bootloader */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
(uint32_t)(psp->fw_pri_mc_addr >> 20));
- psp_gfxdrv_command_reg = PSP_BL__LOAD_KEY_DATABASE;
+ psp_gfxdrv_command_reg = bl_cmd;
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
psp_gfxdrv_command_reg);
@@ -308,69 +230,19 @@ static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp)
return ret;
}
-static int psp_v11_0_bootloader_load_spl(struct psp_context *psp)
+static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp)
{
- int ret;
- uint32_t psp_gfxdrv_command_reg = 0;
- struct amdgpu_device *adev = psp->adev;
-
- /* Check tOS sign of life register to confirm sys driver and sOS
- * are already been loaded.
- */
- if (psp_v11_0_is_sos_alive(psp))
- return 0;
-
- ret = psp_v11_0_wait_for_bootloader(psp);
- if (ret)
- return ret;
-
- /* Copy PSP SPL binary to memory */
- psp_copy_fw(psp, psp->spl.start_addr, psp->spl.size_bytes);
-
- /* Provide the PSP SPL to bootloader */
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
- (uint32_t)(psp->fw_pri_mc_addr >> 20));
- psp_gfxdrv_command_reg = PSP_BL__LOAD_TOS_SPL_TABLE;
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
- psp_gfxdrv_command_reg);
-
- ret = psp_v11_0_wait_for_bootloader(psp);
+ return psp_v11_0_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_KEY_DATABASE);
+}
- return ret;
+static int psp_v11_0_bootloader_load_spl(struct psp_context *psp)
+{
+ return psp_v11_0_bootloader_load_component(psp, &psp->spl, PSP_BL__LOAD_TOS_SPL_TABLE);
}
static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp)
{
- int ret;
- uint32_t psp_gfxdrv_command_reg = 0;
- struct amdgpu_device *adev = psp->adev;
-
- /* Check sOS sign of life register to confirm sys driver and sOS
- * are already been loaded.
- */
- if (psp_v11_0_is_sos_alive(psp))
- return 0;
-
- ret = psp_v11_0_wait_for_bootloader(psp);
- if (ret)
- return ret;
-
- /* Copy PSP System Driver binary to memory */
- psp_copy_fw(psp, psp->sys.start_addr, psp->sys.size_bytes);
-
- /* Provide the sys driver to bootloader */
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
- (uint32_t)(psp->fw_pri_mc_addr >> 20));
- psp_gfxdrv_command_reg = PSP_BL__LOAD_SYSDRV;
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
- psp_gfxdrv_command_reg);
-
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
-
- ret = psp_v11_0_wait_for_bootloader(psp);
-
- return ret;
+ return psp_v11_0_bootloader_load_component(psp, &psp->sys, PSP_BL__LOAD_SYSDRV);
}
static int psp_v11_0_bootloader_load_sos(struct psp_context *psp)
@@ -402,38 +274,12 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp)
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81),
- RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81),
- 0, true);
+ RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), 0,
+ PSP_WAITREG_CHANGED);
return ret;
}
-static int psp_v11_0_ring_init(struct psp_context *psp,
- enum psp_ring_type ring_type)
-{
- int ret = 0;
- struct psp_ring *ring;
- struct amdgpu_device *adev = psp->adev;
-
- ring = &psp->km_ring;
-
- ring->ring_type = ring_type;
-
- /* allocate 4k Page of Local Frame Buffer memory for ring */
- ring->ring_size = 0x1000;
- ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->firmware.rbuf,
- &ring->ring_mem_mc_addr,
- (void **)&ring->ring_mem);
- if (ret) {
- ring->ring_size = 0;
- return ret;
- }
-
- return 0;
-}
-
static int psp_v11_0_ring_stop(struct psp_context *psp,
enum psp_ring_type ring_type)
{
@@ -453,11 +299,13 @@ static int psp_v11_0_ring_stop(struct psp_context *psp,
/* Wait for response flag (bit 31) */
if (amdgpu_sriov_vf(adev))
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
else
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
return ret;
}
@@ -493,13 +341,15 @@ static int psp_v11_0_ring_create(struct psp_context *psp,
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_101 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
- 0x80000000, 0x8000FFFF, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
} else {
/* Wait for sOS ready for ring creation */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0);
if (ret) {
DRM_ERROR("Failed to wait for sOS ready for ring creation\n");
return ret;
@@ -523,8 +373,9 @@ static int psp_v11_0_ring_create(struct psp_context *psp,
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_64 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
}
return ret;
@@ -557,7 +408,8 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp)
offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64);
- ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false);
+ ret = psp_wait_for(psp, offset, MBOX_TOS_READY_FLAG,
+ MBOX_TOS_READY_MASK, 0);
if (ret) {
DRM_INFO("psp is not working correctly before mode1 reset!\n");
@@ -569,17 +421,6 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp)
msleep(500);
- offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33);
-
- ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false);
-
- if (ret) {
- DRM_INFO("psp mode 1 reset failed!\n");
- return -EINVAL;
- }
-
- DRM_INFO("psp mode1 reset succeed \n");
-
return 0;
}
@@ -597,8 +438,9 @@ static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int msg)
max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout;
for (i = 0; i < max_wait; i++) {
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE);
if (ret == 0)
break;
}
@@ -683,7 +525,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)
* before training, and restore it after training to avoid
* VRAM corruption.
*/
- sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE;
+ sz = BIST_MEM_TRAINING_ENCROACHED_SIZE;
if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) {
DRM_ERROR("visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n",
@@ -709,7 +551,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)
}
memcpy_toio(adev->mman.aper_base_kaddr, buf, sz);
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
vfree(buf);
drm_dev_exit(idx);
} else {
@@ -777,7 +619,7 @@ static int psp_v11_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (fw_pri_mc_addr >> 20));
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
if (ret)
return ret;
@@ -814,7 +656,7 @@ static int psp_v11_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver)
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, C2PMSG_CMD_GFX_USB_PD_FW_VER);
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
if (!ret)
*fw_ver = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36);
@@ -827,7 +669,6 @@ static const struct psp_funcs psp_v11_0_funcs = {
.bootloader_load_spl = psp_v11_0_bootloader_load_spl,
.bootloader_load_sysdrv = psp_v11_0_bootloader_load_sysdrv,
.bootloader_load_sos = psp_v11_0_bootloader_load_sos,
- .ring_init = psp_v11_0_ring_init,
.ring_create = psp_v11_0_ring_create,
.ring_stop = psp_v11_0_ring_stop,
.ring_destroy = psp_v11_0_ring_destroy,
@@ -836,7 +677,8 @@ static const struct psp_funcs psp_v11_0_funcs = {
.ring_get_wptr = psp_v11_0_ring_get_wptr,
.ring_set_wptr = psp_v11_0_ring_set_wptr,
.load_usbc_pd_fw = psp_v11_0_load_usbc_pd_fw,
- .read_usbc_pd_fw = psp_v11_0_read_usbc_pd_fw
+ .read_usbc_pd_fw = psp_v11_0_read_usbc_pd_fw,
+ .wait_for_bootloader = psp_v11_0_wait_for_bootloader
};
void psp_v11_0_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c
index ff13e1beb49b..93787a90d598 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c
@@ -28,32 +28,6 @@
#include "mp/mp_11_0_8_offset.h"
-static int psp_v11_0_8_ring_init(struct psp_context *psp,
- enum psp_ring_type ring_type)
-{
- int ret = 0;
- struct psp_ring *ring;
- struct amdgpu_device *adev = psp->adev;
-
- ring = &psp->km_ring;
-
- ring->ring_type = ring_type;
-
- /* allocate 4k Page of Local Frame Buffer memory for ring */
- ring->ring_size = 0x1000;
- ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->firmware.rbuf,
- &ring->ring_mem_mc_addr,
- (void **)&ring->ring_mem);
- if (ret) {
- ring->ring_size = 0;
- return ret;
- }
-
- return 0;
-}
-
static int psp_v11_0_8_ring_stop(struct psp_context *psp,
enum psp_ring_type ring_type)
{
@@ -67,8 +41,9 @@ static int psp_v11_0_8_ring_stop(struct psp_context *psp,
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
/* Wait for response flag (bit 31) */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
} else {
/* Write the ring destroy command*/
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64,
@@ -76,8 +51,9 @@ static int psp_v11_0_8_ring_stop(struct psp_context *psp,
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
/* Wait for response flag (bit 31) */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
}
return ret;
@@ -113,13 +89,15 @@ static int psp_v11_0_8_ring_create(struct psp_context *psp,
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_101 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
- 0x80000000, 0x8000FFFF, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
} else {
/* Wait for sOS ready for ring creation */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0);
if (ret) {
DRM_ERROR("Failed to wait for trust OS ready for ring creation\n");
return ret;
@@ -143,8 +121,9 @@ static int psp_v11_0_8_ring_create(struct psp_context *psp,
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_64 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
}
return ret;
@@ -194,7 +173,6 @@ static void psp_v11_0_8_ring_set_wptr(struct psp_context *psp, uint32_t value)
}
static const struct psp_funcs psp_v11_0_8_funcs = {
- .ring_init = psp_v11_0_8_ring_init,
.ring_create = psp_v11_0_8_ring_create,
.ring_stop = psp_v11_0_8_ring_stop,
.ring_destroy = psp_v11_0_8_ring_destroy,
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
index a2588200ea58..4c6450d62299 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
@@ -34,9 +34,6 @@
#include "sdma0/sdma0_4_0_offset.h"
#include "nbio/nbio_7_4_offset.h"
-#include "oss/osssys_4_0_offset.h"
-#include "oss/osssys_4_0_sh_mask.h"
-
MODULE_FIRMWARE("amdgpu/renoir_asd.bin");
MODULE_FIRMWARE("amdgpu/renoir_ta.bin");
MODULE_FIRMWARE("amdgpu/green_sardine_asd.bin");
@@ -48,73 +45,25 @@ MODULE_FIRMWARE("amdgpu/green_sardine_ta.bin");
static int psp_v12_0_init_microcode(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
- const char *chip_name;
- char fw_name[30];
+ char ucode_prefix[30];
int err = 0;
- const struct ta_firmware_header_v1_0 *ta_hdr;
DRM_DEBUG("\n");
- switch (adev->asic_type) {
- case CHIP_RENOIR:
- if (adev->apu_flags & AMD_APU_IS_RENOIR)
- chip_name = "renoir";
- else
- chip_name = "green_sardine";
- break;
- default:
- BUG();
- }
+ amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
- err = psp_init_asd_microcode(psp, chip_name);
+ err = psp_init_asd_microcode(psp, ucode_prefix);
if (err)
return err;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
- err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
- if (err) {
- release_firmware(adev->psp.ta_fw);
- adev->psp.ta_fw = NULL;
- dev_info(adev->dev,
- "psp v12.0: Failed to load firmware \"%s\"\n",
- fw_name);
- } else {
- err = amdgpu_ucode_validate(adev->psp.ta_fw);
- if (err)
- goto out;
-
- ta_hdr = (const struct ta_firmware_header_v1_0 *)
- adev->psp.ta_fw->data;
- adev->psp.hdcp_context.context.bin_desc.fw_version =
- le32_to_cpu(ta_hdr->hdcp.fw_version);
- adev->psp.hdcp_context.context.bin_desc.size_bytes =
- le32_to_cpu(ta_hdr->hdcp.size_bytes);
- adev->psp.hdcp_context.context.bin_desc.start_addr =
- (uint8_t *)ta_hdr +
- le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
-
- adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
-
- adev->psp.dtm_context.context.bin_desc.fw_version =
- le32_to_cpu(ta_hdr->dtm.fw_version);
- adev->psp.dtm_context.context.bin_desc.size_bytes =
- le32_to_cpu(ta_hdr->dtm.size_bytes);
- adev->psp.dtm_context.context.bin_desc.start_addr =
- (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr +
- le32_to_cpu(ta_hdr->dtm.offset_bytes);
- }
-
- return 0;
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
-out:
- release_firmware(adev->psp.ta_fw);
- adev->psp.ta_fw = NULL;
- if (err) {
- dev_err(adev->dev,
- "psp v12.0: Failed to load firmware \"%s\"\n",
- fw_name);
- }
+ /* only supported on renoir */
+ if (!(adev->apu_flags & AMD_APU_IS_RENOIR))
+ adev->psp.securedisplay_context.context.bin_desc.size_bytes = 0;
- return err;
+ return 0;
}
static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp)
@@ -133,7 +82,7 @@ static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp)
/* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
if (ret)
return ret;
@@ -147,11 +96,8 @@ static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp)
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
psp_gfxdrv_command_reg);
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
-
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
return ret;
}
@@ -172,7 +118,7 @@ static int psp_v12_0_bootloader_load_sos(struct psp_context *psp)
/* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
if (ret)
return ret;
@@ -186,74 +132,13 @@ static int psp_v12_0_bootloader_load_sos(struct psp_context *psp)
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
psp_gfxdrv_command_reg);
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81),
- RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81),
- 0, true);
+ RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), 0,
+ PSP_WAITREG_CHANGED);
return ret;
}
-static void psp_v12_0_reroute_ih(struct psp_context *psp)
-{
- struct amdgpu_device *adev = psp->adev;
- uint32_t tmp;
-
- /* Change IH ring for VMC */
- tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1244b);
- tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, CLIENT_TYPE, 1);
- tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1);
-
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 3);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET);
-
- mdelay(20);
- psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
-
- /* Change IH ring for UMC */
- tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1216b);
- tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1);
-
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 4);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET);
-
- mdelay(20);
- psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
-}
-
-static int psp_v12_0_ring_init(struct psp_context *psp,
- enum psp_ring_type ring_type)
-{
- int ret = 0;
- struct psp_ring *ring;
- struct amdgpu_device *adev = psp->adev;
-
- psp_v12_0_reroute_ih(psp);
-
- ring = &psp->km_ring;
-
- ring->ring_type = ring_type;
-
- /* allocate 4k Page of Local Frame Buffer memory for ring */
- ring->ring_size = 0x1000;
- ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->firmware.rbuf,
- &ring->ring_mem_mc_addr,
- (void **)&ring->ring_mem);
- if (ret) {
- ring->ring_size = 0;
- return ret;
- }
-
- return 0;
-}
-
static int psp_v12_0_ring_create(struct psp_context *psp,
enum psp_ring_type ring_type)
{
@@ -262,47 +147,23 @@ static int psp_v12_0_ring_create(struct psp_context *psp,
struct psp_ring *ring = &psp->km_ring;
struct amdgpu_device *adev = psp->adev;
- if (amdgpu_sriov_vf(psp->adev)) {
- /* Write low address of the ring to C2PMSG_102 */
- psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
- /* Write high address of the ring to C2PMSG_103 */
- psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg);
-
- /* Write the ring initialization command to C2PMSG_101 */
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
- GFX_CTRL_CMD_ID_INIT_GPCOM_RING);
-
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
-
- /* Wait for response flag (bit 31) in C2PMSG_101 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
- 0x80000000, 0x8000FFFF, false);
-
- } else {
- /* Write low address of the ring to C2PMSG_69 */
- psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
- /* Write high address of the ring to C2PMSG_70 */
- psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
- /* Write size of ring to C2PMSG_71 */
- psp_ring_reg = ring->ring_size;
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
- /* Write the ring initialization command to C2PMSG_64 */
- psp_ring_reg = ring_type;
- psp_ring_reg = psp_ring_reg << 16;
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
-
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
-
- /* Wait for response flag (bit 31) in C2PMSG_64 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
- }
+ /* Write low address of the ring to C2PMSG_69 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_70 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
+ /* Write size of ring to C2PMSG_71 */
+ psp_ring_reg = ring->ring_size;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
+ /* Write the ring initialization command to C2PMSG_64 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
return ret;
}
@@ -321,16 +182,15 @@ static int psp_v12_0_ring_stop(struct psp_context *psp,
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64,
GFX_CTRL_CMD_ID_DESTROY_RINGS);
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
-
/* Wait for response flag (bit 31) */
if (amdgpu_sriov_vf(adev))
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
else
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
return ret;
}
@@ -361,7 +221,8 @@ static int psp_v12_0_mode1_reset(struct psp_context *psp)
offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64);
- ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false);
+ ret = psp_wait_for(psp, offset, MBOX_TOS_READY_FLAG,
+ MBOX_TOS_READY_MASK, 0);
if (ret) {
DRM_INFO("psp is not working correctly before mode1 reset!\n");
@@ -375,7 +236,8 @@ static int psp_v12_0_mode1_reset(struct psp_context *psp)
offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33);
- ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(psp, offset, MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK,
+ 0);
if (ret) {
DRM_INFO("psp mode 1 reset failed!\n");
@@ -415,7 +277,6 @@ static const struct psp_funcs psp_v12_0_funcs = {
.init_microcode = psp_v12_0_init_microcode,
.bootloader_load_sysdrv = psp_v12_0_bootloader_load_sysdrv,
.bootloader_load_sos = psp_v12_0_bootloader_load_sos,
- .ring_init = psp_v12_0_ring_init,
.ring_create = psp_v12_0_ring_create,
.ring_stop = psp_v12_0_ring_stop,
.ring_destroy = psp_v12_0_ring_destroy,
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index 17655bc6d2f1..af4a7d7c4abd 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -20,20 +20,49 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
+#include <drm/drm_drv.h>
+#include <linux/vmalloc.h>
#include "amdgpu.h"
#include "amdgpu_psp.h"
#include "amdgpu_ucode.h"
#include "soc15_common.h"
#include "psp_v13_0.h"
+#include "amdgpu_ras.h"
#include "mp/mp_13_0_2_offset.h"
#include "mp/mp_13_0_2_sh_mask.h"
MODULE_FIRMWARE("amdgpu/aldebaran_sos.bin");
MODULE_FIRMWARE("amdgpu/aldebaran_ta.bin");
-MODULE_FIRMWARE("amdgpu/yellow_carp_asd.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_cap.bin");
MODULE_FIRMWARE("amdgpu/yellow_carp_toc.bin");
MODULE_FIRMWARE("amdgpu/yellow_carp_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_5_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos_kicker.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta_kicker.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_10_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_10_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_11_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_11_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_6_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_6_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_12_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_12_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_14_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_14_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_0_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_0_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_1_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_1_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_4_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_4_ta.bin");
/* For large FW files the time to complete can be very long */
#define USBC_PD_POLLING_LIMIT_S 240
@@ -41,41 +70,62 @@ MODULE_FIRMWARE("amdgpu/yellow_carp_ta.bin");
/* Read USB-PD from LFB */
#define GFX_CMD_USB_PD_USE_LFB 0x480
+/* Retry times for vmbx ready wait */
+#define PSP_VMBX_POLLING_LIMIT 3000
+
+/* memory training timeout define */
+#define MEM_TRAIN_SEND_MSG_TIMEOUT_US 3000000
+
+#define regMP1_PUB_SCRATCH0 0x3b10090
+
+#define PSP13_BL_STATUS_SIZE 100
+
static int psp_v13_0_init_microcode(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
- const char *chip_name;
+ char ucode_prefix[30];
int err = 0;
- switch (adev->ip_versions[MP0_HWIP][0]) {
+ amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
case IP_VERSION(13, 0, 2):
- chip_name = "aldebaran";
+ err = psp_init_sos_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ /* It's not necessary to load ras ta on Guest side */
+ if (!amdgpu_sriov_vf(adev)) {
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ }
break;
case IP_VERSION(13, 0, 1):
case IP_VERSION(13, 0, 3):
- chip_name = "yellow_carp";
- break;
- default:
- BUG();
- }
- switch (adev->ip_versions[MP0_HWIP][0]) {
- case IP_VERSION(13, 0, 2):
- err = psp_init_sos_microcode(psp, chip_name);
+ case IP_VERSION(13, 0, 5):
+ case IP_VERSION(13, 0, 8):
+ case IP_VERSION(13, 0, 11):
+ case IP_VERSION(14, 0, 0):
+ case IP_VERSION(14, 0, 1):
+ case IP_VERSION(14, 0, 4):
+ err = psp_init_toc_microcode(psp, ucode_prefix);
if (err)
return err;
- err = psp_init_ta_microcode(&adev->psp, chip_name);
+ err = psp_init_ta_microcode(psp, ucode_prefix);
if (err)
return err;
break;
- case IP_VERSION(13, 0, 1):
- case IP_VERSION(13, 0, 3):
- err = psp_init_asd_microcode(psp, chip_name);
+ case IP_VERSION(13, 0, 0):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 7):
+ case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
+ err = psp_init_sos_microcode(psp, ucode_prefix);
if (err)
return err;
- err = psp_init_toc_microcode(psp, chip_name);
- if (err)
- return err;
- err = psp_init_ta_microcode(psp, chip_name);
+ /* It's not necessary to load ras ta on Guest side */
+ err = psp_init_ta_microcode(psp, ucode_prefix);
if (err)
return err;
break;
@@ -96,29 +146,107 @@ static bool psp_v13_0_is_sos_alive(struct psp_context *psp)
return sol_reg != 0x0;
}
-static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
+static void psp_v13_0_bootloader_print_status(struct psp_context *psp,
+ const char *msg)
{
struct amdgpu_device *adev = psp->adev;
+ u32 bl_status_reg;
+ char bl_status_msg[PSP13_BL_STATUS_SIZE];
+ int i, at;
+
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) {
+ at = 0;
+ for_each_inst(i, adev->aid_mask) {
+ bl_status_reg =
+ (SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_92)
+ << 2) +
+ adev->asic_funcs->encode_ext_smn_addressing(i);
+ at += snprintf(bl_status_msg + at,
+ PSP13_BL_STATUS_SIZE - at,
+ " status(%02i): 0x%08x", i,
+ RREG32_PCIE_EXT(bl_status_reg));
+ }
+ dev_info(adev->dev, "%s - %s", msg, bl_status_msg);
+ }
+}
- int ret;
- int retry_loop;
+static int psp_v13_0_wait_for_vmbx_ready(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int retry_loop, ret;
- for (retry_loop = 0; retry_loop < 10; retry_loop++) {
+ for (retry_loop = 0; retry_loop < PSP_VMBX_POLLING_LIMIT; retry_loop++) {
/* Wait for bootloader to signify that is
- ready having bit 31 of C2PMSG_35 set to 1 */
- ret = psp_wait_for(psp,
- SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
- 0x80000000,
- 0x80000000,
- false);
+ ready having bit 31 of C2PMSG_33 set to 1 */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_33),
+ 0x80000000, 0xffffffff, PSP_WAITREG_NOVERBOSE);
+
+ if (ret == 0)
+ break;
+ }
+
+ if (ret)
+ dev_warn(adev->dev, "Bootloader wait timed out");
+
+ return ret;
+}
+
+static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int retry_loop, retry_cnt, ret;
+
+ retry_cnt =
+ ((amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14))) ?
+ PSP_VMBX_POLLING_LIMIT :
+ 10;
+ /* Wait for bootloader to signify that it is ready having bit 31 of
+ * C2PMSG_35 set to 1. All other bits are expected to be cleared.
+ * If there is an error in processing command, bits[7:0] will be set.
+ * This is applicable for PSP v13.0.6 and newer.
+ */
+ for (retry_loop = 0; retry_loop < retry_cnt; retry_loop++) {
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
+ 0x80000000, 0xffffffff, PSP_WAITREG_NOVERBOSE);
if (ret == 0)
return 0;
+ if (retry_loop && !(retry_loop % 10))
+ psp_v13_0_bootloader_print_status(
+ psp, "Waiting for bootloader completion");
}
return ret;
}
+static int psp_v13_0_wait_for_bootloader_steady_state(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int ret;
+
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) {
+ ret = psp_v13_0_wait_for_vmbx_ready(psp);
+ if (ret)
+ amdgpu_ras_query_boot_status(adev, 4);
+
+ ret = psp_v13_0_wait_for_bootloader(psp);
+ if (ret)
+ amdgpu_ras_query_boot_status(adev, 4);
+
+ return ret;
+ }
+
+ return 0;
+}
+
static int psp_v13_0_bootloader_load_component(struct psp_context *psp,
struct psp_bin_desc *bin_desc,
enum psp_bootloader_cmd bl_cmd)
@@ -159,6 +287,11 @@ static int psp_v13_0_bootloader_load_kdb(struct psp_context *psp)
return psp_v13_0_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_KEY_DATABASE);
}
+static int psp_v13_0_bootloader_load_spl(struct psp_context *psp)
+{
+ return psp_v13_0_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_TOS_SPL_TABLE);
+}
+
static int psp_v13_0_bootloader_load_sysdrv(struct psp_context *psp)
{
return psp_v13_0_bootloader_load_component(psp, &psp->sys, PSP_BL__LOAD_SYSDRV);
@@ -179,6 +312,23 @@ static int psp_v13_0_bootloader_load_dbg_drv(struct psp_context *psp)
return psp_v13_0_bootloader_load_component(psp, &psp->dbg_drv, PSP_BL__LOAD_DBGDRV);
}
+static int psp_v13_0_bootloader_load_ras_drv(struct psp_context *psp)
+{
+ return psp_v13_0_bootloader_load_component(psp, &psp->ras_drv, PSP_BL__LOAD_RASDRV);
+}
+
+static int psp_v13_0_bootloader_load_spdm_drv(struct psp_context *psp)
+{
+ return psp_v13_0_bootloader_load_component(psp, &psp->spdm_drv, PSP_BL__LOAD_SPDMDRV);
+}
+
+static inline void psp_v13_0_init_sos_version(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ psp->sos.fw_version = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_58);
+}
+
static int psp_v13_0_bootloader_load_sos(struct psp_context *psp)
{
int ret;
@@ -188,8 +338,10 @@ static int psp_v13_0_bootloader_load_sos(struct psp_context *psp)
/* Check sOS sign of life register to confirm sys driver and sOS
* are already been loaded.
*/
- if (psp_v13_0_is_sos_alive(psp))
+ if (psp_v13_0_is_sos_alive(psp)) {
+ psp_v13_0_init_sos_version(psp);
return 0;
+ }
ret = psp_v13_0_wait_for_bootloader(psp);
if (ret)
@@ -210,36 +362,13 @@ static int psp_v13_0_bootloader_load_sos(struct psp_context *psp)
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_81),
- RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81),
- 0, true);
-
- return ret;
-}
+ RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81), 0,
+ PSP_WAITREG_CHANGED);
-static int psp_v13_0_ring_init(struct psp_context *psp,
- enum psp_ring_type ring_type)
-{
- int ret = 0;
- struct psp_ring *ring;
- struct amdgpu_device *adev = psp->adev;
-
- ring = &psp->km_ring;
-
- ring->ring_type = ring_type;
-
- /* allocate 4k Page of Local Frame Buffer memory for ring */
- ring->ring_size = 0x1000;
- ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->firmware.rbuf,
- &ring->ring_mem_mc_addr,
- (void **)&ring->ring_mem);
- if (ret) {
- ring->ring_size = 0;
- return ret;
- }
+ if (!ret)
+ psp_v13_0_init_sos_version(psp);
- return 0;
+ return ret;
}
static int psp_v13_0_ring_stop(struct psp_context *psp,
@@ -255,8 +384,9 @@ static int psp_v13_0_ring_stop(struct psp_context *psp,
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
/* Wait for response flag (bit 31) */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
} else {
/* Write the ring destroy command*/
WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_64,
@@ -264,8 +394,9 @@ static int psp_v13_0_ring_stop(struct psp_context *psp,
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
/* Wait for response flag (bit 31) */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
}
return ret;
@@ -301,13 +432,15 @@ static int psp_v13_0_ring_create(struct psp_context *psp,
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_101 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
- 0x80000000, 0x8000FFFF, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
} else {
/* Wait for sOS ready for ring creation */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
+ MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0);
if (ret) {
DRM_ERROR("Failed to wait for trust OS ready for ring creation\n");
return ret;
@@ -331,8 +464,9 @@ static int psp_v13_0_ring_create(struct psp_context *psp,
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_64 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
}
return ret;
@@ -381,6 +515,160 @@ static void psp_v13_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_67, value);
}
+static int psp_v13_0_memory_training_send_msg(struct psp_context *psp, int msg)
+{
+ int ret;
+ int i;
+ uint32_t data_32;
+ int max_wait;
+ struct amdgpu_device *adev = psp->adev;
+
+ data_32 = (psp->mem_train_ctx.c2p_train_data_offset >> 20);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36, data_32);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35, msg);
+
+ max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout;
+ for (i = 0; i < max_wait; i++) {
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE);
+ if (ret == 0)
+ break;
+ }
+ if (i < max_wait)
+ ret = 0;
+ else
+ ret = -ETIME;
+
+ dev_dbg(adev->dev, "training %s %s, cost %d @ %d ms\n",
+ (msg == PSP_BL__DRAM_SHORT_TRAIN) ? "short" : "long",
+ (ret == 0) ? "succeed" : "failed",
+ i, adev->usec_timeout/1000);
+ return ret;
+}
+
+
+static int psp_v13_0_memory_training(struct psp_context *psp, uint32_t ops)
+{
+ struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
+ uint32_t *pcache = (uint32_t *)ctx->sys_cache;
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t p2c_header[4];
+ uint32_t sz;
+ void *buf;
+ int ret, idx;
+
+ if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) {
+ dev_dbg(adev->dev, "Memory training is not supported.\n");
+ return 0;
+ } else if (ctx->init != PSP_MEM_TRAIN_INIT_SUCCESS) {
+ dev_err(adev->dev, "Memory training initialization failure.\n");
+ return -EINVAL;
+ }
+
+ if (psp_v13_0_is_sos_alive(psp)) {
+ dev_dbg(adev->dev, "SOS is alive, skip memory training.\n");
+ return 0;
+ }
+
+ amdgpu_device_vram_access(adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false);
+ dev_dbg(adev->dev, "sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n",
+ pcache[0], pcache[1], pcache[2], pcache[3],
+ p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]);
+
+ if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) {
+ dev_dbg(adev->dev, "Short training depends on restore.\n");
+ ops |= PSP_MEM_TRAIN_RESTORE;
+ }
+
+ if ((ops & PSP_MEM_TRAIN_RESTORE) &&
+ pcache[0] != MEM_TRAIN_SYSTEM_SIGNATURE) {
+ dev_dbg(adev->dev, "sys_cache[0] is invalid, restore depends on save.\n");
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ if (p2c_header[0] == MEM_TRAIN_SYSTEM_SIGNATURE &&
+ !(pcache[0] == MEM_TRAIN_SYSTEM_SIGNATURE &&
+ pcache[3] == p2c_header[3])) {
+ dev_dbg(adev->dev, "sys_cache is invalid or out-of-date, need save training data to sys_cache.\n");
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ if ((ops & PSP_MEM_TRAIN_SAVE) &&
+ p2c_header[0] != MEM_TRAIN_SYSTEM_SIGNATURE) {
+ dev_dbg(adev->dev, "p2c_header[0] is invalid, save depends on long training.\n");
+ ops |= PSP_MEM_TRAIN_SEND_LONG_MSG;
+ }
+
+ if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
+ ops &= ~PSP_MEM_TRAIN_SEND_SHORT_MSG;
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ dev_dbg(adev->dev, "Memory training ops:%x.\n", ops);
+
+ if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
+ /*
+ * Long training will encroach a certain amount on the bottom of VRAM;
+ * save the content from the bottom of VRAM to system memory
+ * before training, and restore it after training to avoid
+ * VRAM corruption.
+ */
+ sz = BIST_MEM_TRAINING_ENCROACHED_SIZE;
+
+ if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) {
+ dev_err(adev->dev, "visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n",
+ adev->gmc.visible_vram_size,
+ adev->mman.aper_base_kaddr);
+ return -EINVAL;
+ }
+
+ buf = vmalloc(sz);
+ if (!buf) {
+ dev_err(adev->dev, "failed to allocate system memory.\n");
+ return -ENOMEM;
+ }
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz);
+ ret = psp_v13_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN);
+ if (ret) {
+ DRM_ERROR("Send long training msg failed.\n");
+ vfree(buf);
+ drm_dev_exit(idx);
+ return ret;
+ }
+
+ memcpy_toio(adev->mman.aper_base_kaddr, buf, sz);
+ amdgpu_device_flush_hdp(adev, NULL);
+ vfree(buf);
+ drm_dev_exit(idx);
+ } else {
+ vfree(buf);
+ return -ENODEV;
+ }
+ }
+
+ if (ops & PSP_MEM_TRAIN_SAVE) {
+ amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, ctx->sys_cache, ctx->train_data_size, false);
+ }
+
+ if (ops & PSP_MEM_TRAIN_RESTORE) {
+ amdgpu_device_vram_access(psp->adev, ctx->c2p_train_data_offset, ctx->sys_cache, ctx->train_data_size, true);
+ }
+
+ if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) {
+ ret = psp_v13_0_memory_training_send_msg(psp, (amdgpu_force_long_training > 0) ?
+ PSP_BL__DRAM_LONG_TRAIN : PSP_BL__DRAM_SHORT_TRAIN);
+ if (ret) {
+ dev_err(adev->dev, "send training msg failed.\n");
+ return ret;
+ }
+ }
+ ctx->training_cnt++;
+ return 0;
+}
+
static int psp_v13_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc_addr)
{
struct amdgpu_device *adev = psp->adev;
@@ -395,7 +683,7 @@ static int psp_v13_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc
WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36, (fw_pri_mc_addr >> 20));
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
if (ret)
return ret;
@@ -432,29 +720,255 @@ static int psp_v13_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver)
WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35, C2PMSG_CMD_GFX_USB_PD_FW_VER);
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
if (!ret)
*fw_ver = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36);
return ret;
}
+static int psp_v13_0_exec_spi_cmd(struct psp_context *psp, int cmd)
+{
+ uint32_t reg_status = 0, reg_val = 0;
+ struct amdgpu_device *adev = psp->adev;
+ int ret;
+
+ /* clear MBX ready (MBOX_READY_MASK bit is 0) and set update command */
+ reg_val |= (cmd << 16);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_115, reg_val);
+
+ /* Ring the doorbell */
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_73, 1);
+
+ if (cmd == C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE ||
+ cmd == C2PMSG_CMD_SPI_GET_FLASH_IMAGE)
+ ret = psp_wait_for_spirom_update(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, PSP_SPIROM_UPDATE_TIMEOUT);
+ else
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, 0);
+ if (ret) {
+ dev_err(adev->dev, "SPI cmd %x timed out, ret = %d", cmd, ret);
+ return ret;
+ }
+
+ reg_status = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_115);
+ if ((reg_status & 0xFFFF) != 0) {
+ dev_err(adev->dev, "SPI cmd %x failed, fail status = %04x\n",
+ cmd, reg_status & 0xFFFF);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int psp_v13_0_update_spirom(struct psp_context *psp,
+ uint64_t fw_pri_mc_addr)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int ret;
+
+ /* Confirm PSP is ready to start */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, 0);
+ if (ret) {
+ dev_err(adev->dev, "PSP Not ready to start processing, ret = %d", ret);
+ return ret;
+ }
+
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_116, lower_32_bits(fw_pri_mc_addr));
+
+ ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO);
+ if (ret)
+ return ret;
+
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_116, upper_32_bits(fw_pri_mc_addr));
+
+ ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI);
+ if (ret)
+ return ret;
+
+ psp->vbflash_done = true;
+
+ ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int psp_v13_0_dump_spirom(struct psp_context *psp,
+ uint64_t fw_pri_mc_addr)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int ret;
+
+ /* Confirm PSP is ready to start */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, 0);
+ if (ret) {
+ dev_err(adev->dev, "PSP Not ready to start processing, ret = %d", ret);
+ return ret;
+ }
+
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_116, lower_32_bits(fw_pri_mc_addr));
+
+ ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_LO);
+ if (ret)
+ return ret;
+
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_116, upper_32_bits(fw_pri_mc_addr));
+
+ ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_HI);
+ if (ret)
+ return ret;
+
+ ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_GET_FLASH_IMAGE);
+
+ return ret;
+}
+
+static int psp_v13_0_vbflash_status(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ return RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_115);
+}
+
+static int psp_v13_0_fatal_error_recovery_quirk(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 10)) {
+ uint32_t reg_data;
+ /* MP1 fatal error: trigger PSP dram read to unhalt PSP
+ * during MP1 triggered sync flood.
+ */
+ reg_data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_67);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_67, reg_data + 0x10);
+
+ /* delay 1000ms for the mode1 reset for fatal error
+ * to be recovered back.
+ */
+ msleep(1000);
+ }
+
+ return 0;
+}
+
+static bool psp_v13_0_get_ras_capability(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ u32 reg_data;
+
+ /* query ras cap should be done from host side */
+ if (amdgpu_sriov_vf(adev))
+ return false;
+
+ if (!con)
+ return false;
+
+ if ((amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) &&
+ (!(adev->flags & AMD_IS_APU))) {
+ reg_data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_127);
+ adev->ras_hw_enabled = (reg_data & GENMASK_ULL(23, 0));
+ con->poison_supported = ((reg_data & GENMASK_ULL(24, 24)) >> 24) ? true : false;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+static bool psp_v13_0_is_aux_sos_load_required(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ u32 pmfw_ver;
+
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6))
+ return false;
+
+ /* load 4e version of sos if pmfw version less than 85.115.0 */
+ pmfw_ver = RREG32(regMP1_PUB_SCRATCH0 / 4);
+
+ return (pmfw_ver < 0x557300);
+}
+
+static bool psp_v13_0_is_reload_needed(struct psp_context *psp)
+{
+ uint32_t ucode_ver;
+
+ if (!psp_v13_0_is_sos_alive(psp))
+ return false;
+
+ /* Restrict reload support only to specific IP versions */
+ switch (amdgpu_ip_version(psp->adev, MP0_HWIP, 0)) {
+ case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 14):
+ /* TOS version read from microcode header */
+ ucode_ver = psp->sos.fw_version;
+ /* Read TOS version from hardware */
+ psp_v13_0_init_sos_version(psp);
+ return (ucode_ver != psp->sos.fw_version);
+ default:
+ return false;
+ }
+
+ return false;
+}
+
+static int psp_v13_0_reg_program_no_ring(struct psp_context *psp, uint32_t val,
+ enum psp_reg_prog_id id)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int ret = -EOPNOTSUPP;
+
+ /* PSP will broadcast the value to all instances */
+ if (amdgpu_sriov_vf(adev)) {
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_GBR_IH_SET);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102, id);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_103, val);
+
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x80000000, 0);
+ }
+
+ return ret;
+}
+
static const struct psp_funcs psp_v13_0_funcs = {
.init_microcode = psp_v13_0_init_microcode,
+ .wait_for_bootloader = psp_v13_0_wait_for_bootloader_steady_state,
.bootloader_load_kdb = psp_v13_0_bootloader_load_kdb,
+ .bootloader_load_spl = psp_v13_0_bootloader_load_spl,
.bootloader_load_sysdrv = psp_v13_0_bootloader_load_sysdrv,
.bootloader_load_soc_drv = psp_v13_0_bootloader_load_soc_drv,
.bootloader_load_intf_drv = psp_v13_0_bootloader_load_intf_drv,
.bootloader_load_dbg_drv = psp_v13_0_bootloader_load_dbg_drv,
+ .bootloader_load_ras_drv = psp_v13_0_bootloader_load_ras_drv,
+ .bootloader_load_spdm_drv = psp_v13_0_bootloader_load_spdm_drv,
.bootloader_load_sos = psp_v13_0_bootloader_load_sos,
- .ring_init = psp_v13_0_ring_init,
.ring_create = psp_v13_0_ring_create,
.ring_stop = psp_v13_0_ring_stop,
.ring_destroy = psp_v13_0_ring_destroy,
.ring_get_wptr = psp_v13_0_ring_get_wptr,
.ring_set_wptr = psp_v13_0_ring_set_wptr,
+ .mem_training = psp_v13_0_memory_training,
.load_usbc_pd_fw = psp_v13_0_load_usbc_pd_fw,
- .read_usbc_pd_fw = psp_v13_0_read_usbc_pd_fw
+ .read_usbc_pd_fw = psp_v13_0_read_usbc_pd_fw,
+ .update_spirom = psp_v13_0_update_spirom,
+ .dump_spirom = psp_v13_0_dump_spirom,
+ .vbflash_stat = psp_v13_0_vbflash_status,
+ .fatal_error_recovery_quirk = psp_v13_0_fatal_error_recovery_quirk,
+ .get_ras_capability = psp_v13_0_get_ras_capability,
+ .is_aux_sos_load_required = psp_v13_0_is_aux_sos_load_required,
+ .is_reload_needed = psp_v13_0_is_reload_needed,
+ .reg_program_no_ring = psp_v13_0_reg_program_no_ring,
};
void psp_v13_0_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.h
index b2414a729ca1..de5677ce4330 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.h
@@ -25,6 +25,8 @@
#include "amdgpu_psp.h"
+#define PSP_SPIROM_UPDATE_TIMEOUT 60000 /* 60s */
+
void psp_v13_0_set_psp_funcs(struct psp_context *psp);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c
new file mode 100644
index 000000000000..5f39a2edcc95
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c
@@ -0,0 +1,355 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ucode.h"
+#include "soc15_common.h"
+#include "psp_v13_0_4.h"
+
+#include "mp/mp_13_0_4_offset.h"
+#include "mp/mp_13_0_4_sh_mask.h"
+
+MODULE_FIRMWARE("amdgpu/psp_13_0_4_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_4_ta.bin");
+
+static int psp_v13_0_4_init_microcode(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ char ucode_prefix[30];
+ int err = 0;
+
+ amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(13, 0, 4):
+ err = psp_init_toc_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ break;
+ default:
+ BUG();
+ }
+
+ return 0;
+}
+
+static bool psp_v13_0_4_is_sos_alive(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t sol_reg;
+
+ sol_reg = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+
+ return sol_reg != 0x0;
+}
+
+static int psp_v13_0_4_wait_for_bootloader(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ int ret;
+ int retry_loop;
+
+ for (retry_loop = 0; retry_loop < 10; retry_loop++) {
+ /* Wait for bootloader to signify that is
+ ready having bit 31 of C2PMSG_35 set to 1 */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE);
+
+ if (ret == 0)
+ return 0;
+ }
+
+ return ret;
+}
+
+static int psp_v13_0_4_bootloader_load_component(struct psp_context *psp,
+ struct psp_bin_desc *bin_desc,
+ enum psp_bootloader_cmd bl_cmd)
+{
+ int ret;
+ uint32_t psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Check tOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ if (psp_v13_0_4_is_sos_alive(psp))
+ return 0;
+
+ ret = psp_v13_0_4_wait_for_bootloader(psp);
+ if (ret)
+ return ret;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+
+ /* Copy PSP KDB binary to memory */
+ memcpy(psp->fw_pri_buf, bin_desc->start_addr, bin_desc->size_bytes);
+
+ /* Provide the PSP KDB to bootloader */
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = bl_cmd;
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ ret = psp_v13_0_4_wait_for_bootloader(psp);
+
+ return ret;
+}
+
+static int psp_v13_0_4_bootloader_load_kdb(struct psp_context *psp)
+{
+ return psp_v13_0_4_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_KEY_DATABASE);
+}
+
+static int psp_v13_0_4_bootloader_load_spl(struct psp_context *psp)
+{
+ return psp_v13_0_4_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_TOS_SPL_TABLE);
+}
+
+static int psp_v13_0_4_bootloader_load_sysdrv(struct psp_context *psp)
+{
+ return psp_v13_0_4_bootloader_load_component(psp, &psp->sys, PSP_BL__LOAD_SYSDRV);
+}
+
+static int psp_v13_0_4_bootloader_load_soc_drv(struct psp_context *psp)
+{
+ return psp_v13_0_4_bootloader_load_component(psp, &psp->soc_drv, PSP_BL__LOAD_SOCDRV);
+}
+
+static int psp_v13_0_4_bootloader_load_intf_drv(struct psp_context *psp)
+{
+ return psp_v13_0_4_bootloader_load_component(psp, &psp->intf_drv, PSP_BL__LOAD_INTFDRV);
+}
+
+static int psp_v13_0_4_bootloader_load_dbg_drv(struct psp_context *psp)
+{
+ return psp_v13_0_4_bootloader_load_component(psp, &psp->dbg_drv, PSP_BL__LOAD_DBGDRV);
+}
+
+static int psp_v13_0_4_bootloader_load_sos(struct psp_context *psp)
+{
+ int ret;
+ unsigned int psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ if (psp_v13_0_4_is_sos_alive(psp))
+ return 0;
+
+ ret = psp_v13_0_4_wait_for_bootloader(psp);
+ if (ret)
+ return ret;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+
+ /* Copy Secure OS binary to PSP memory */
+ memcpy(psp->fw_pri_buf, psp->sos.start_addr, psp->sos.size_bytes);
+
+ /* Provide the PSP secure OS to bootloader */
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = PSP_BL__LOAD_SOSDRV;
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_81),
+ RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81), 0,
+ PSP_WAITREG_CHANGED);
+
+ return ret;
+}
+
+static int psp_v13_0_4_ring_stop(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* Write the ring destroy command*/
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ /* Wait for response flag (bit 31) */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+ } else {
+ /* Write the ring destroy command*/
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_64,
+ GFX_CTRL_CMD_ID_DESTROY_RINGS);
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ /* Wait for response flag (bit 31) */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+ }
+
+ return ret;
+}
+
+static int psp_v13_0_4_ring_create(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ unsigned int psp_ring_reg = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ ret = psp_v13_0_4_ring_stop(psp, ring_type);
+ if (ret) {
+ DRM_ERROR("psp_v13_0_ring_stop_sriov failed!\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_102 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_103 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_103, psp_ring_reg);
+
+ /* Write the ring initialization command to C2PMSG_101 */
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_INIT_GPCOM_RING);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_101 */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+
+ } else {
+ /* Wait for sOS ready for ring creation */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
+ MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0);
+ if (ret) {
+ DRM_ERROR("Failed to wait for trust OS ready for ring creation\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_69 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_69, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_70 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_70, psp_ring_reg);
+ /* Write size of ring to C2PMSG_71 */
+ psp_ring_reg = ring->ring_size;
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_71, psp_ring_reg);
+ /* Write the ring initialization command to C2PMSG_64 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+ }
+
+ return ret;
+}
+
+static int psp_v13_0_4_ring_destroy(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ ret = psp_v13_0_4_ring_stop(psp, ring_type);
+ if (ret)
+ DRM_ERROR("Fail to stop psp ring\n");
+
+ amdgpu_bo_free_kernel(&adev->firmware.rbuf,
+ &ring->ring_mem_mc_addr,
+ (void **)&ring->ring_mem);
+
+ return ret;
+}
+
+static uint32_t psp_v13_0_4_ring_get_wptr(struct psp_context *psp)
+{
+ uint32_t data;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102);
+ else
+ data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_67);
+
+ return data;
+}
+
+static void psp_v13_0_4_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102, value);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_CONSUME_CMD);
+ } else
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_67, value);
+}
+
+static const struct psp_funcs psp_v13_0_4_funcs = {
+ .init_microcode = psp_v13_0_4_init_microcode,
+ .bootloader_load_kdb = psp_v13_0_4_bootloader_load_kdb,
+ .bootloader_load_spl = psp_v13_0_4_bootloader_load_spl,
+ .bootloader_load_sysdrv = psp_v13_0_4_bootloader_load_sysdrv,
+ .bootloader_load_soc_drv = psp_v13_0_4_bootloader_load_soc_drv,
+ .bootloader_load_intf_drv = psp_v13_0_4_bootloader_load_intf_drv,
+ .bootloader_load_dbg_drv = psp_v13_0_4_bootloader_load_dbg_drv,
+ .bootloader_load_sos = psp_v13_0_4_bootloader_load_sos,
+ .ring_create = psp_v13_0_4_ring_create,
+ .ring_stop = psp_v13_0_4_ring_stop,
+ .ring_destroy = psp_v13_0_4_ring_destroy,
+ .ring_get_wptr = psp_v13_0_4_ring_get_wptr,
+ .ring_set_wptr = psp_v13_0_4_ring_set_wptr,
+};
+
+void psp_v13_0_4_set_psp_funcs(struct psp_context *psp)
+{
+ psp->funcs = &psp_v13_0_4_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.h b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.h
new file mode 100644
index 000000000000..8547b8d514d5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __PSP_V13_0_4_H__
+#define __PSP_V13_0_4_H__
+
+#include "amdgpu_psp.h"
+
+void psp_v13_0_4_set_psp_funcs(struct psp_context *psp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c
new file mode 100644
index 000000000000..38dfc5c19f2a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c
@@ -0,0 +1,705 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <drm/drm_drv.h>
+#include <linux/vmalloc.h>
+#include "amdgpu.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ucode.h"
+#include "soc15_common.h"
+#include "psp_v14_0.h"
+
+#include "mp/mp_14_0_2_offset.h"
+#include "mp/mp_14_0_2_sh_mask.h"
+
+MODULE_FIRMWARE("amdgpu/psp_14_0_2_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_2_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos_kicker.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_3_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_3_ta_kicker.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_5_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_5_ta.bin");
+
+/* For large FW files the time to complete can be very long */
+#define USBC_PD_POLLING_LIMIT_S 240
+
+/* Read USB-PD from LFB */
+#define GFX_CMD_USB_PD_USE_LFB 0x480
+
+/* VBIOS gfl defines */
+#define MBOX_READY_MASK 0x80000000
+#define MBOX_STATUS_MASK 0x0000FFFF
+#define MBOX_COMMAND_MASK 0x00FF0000
+#define MBOX_READY_FLAG 0x80000000
+#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO 0x2
+#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI 0x3
+#define C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE 0x4
+
+/* memory training timeout define */
+#define MEM_TRAIN_SEND_MSG_TIMEOUT_US 3000000
+
+static int psp_v14_0_init_microcode(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ char ucode_prefix[30];
+ int err = 0;
+
+ amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(14, 0, 2):
+ case IP_VERSION(14, 0, 3):
+ err = psp_init_sos_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ break;
+ case IP_VERSION(14, 0, 5):
+ err = psp_init_toc_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ break;
+ default:
+ BUG();
+ }
+
+ return 0;
+}
+
+static bool psp_v14_0_is_sos_alive(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t sol_reg;
+
+ sol_reg = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_81);
+
+ return sol_reg != 0x0;
+}
+
+static int psp_v14_0_wait_for_bootloader(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ int ret;
+ int retry_loop;
+
+ for (retry_loop = 0; retry_loop < 10; retry_loop++) {
+ /* Wait for bootloader to signify that is
+ ready having bit 31 of C2PMSG_35 set to 1 */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE);
+
+ if (ret == 0)
+ return 0;
+ }
+
+ return ret;
+}
+
+static int psp_v14_0_bootloader_load_component(struct psp_context *psp,
+ struct psp_bin_desc *bin_desc,
+ enum psp_bootloader_cmd bl_cmd)
+{
+ int ret;
+ uint32_t psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Check tOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ if (psp_v14_0_is_sos_alive(psp))
+ return 0;
+
+ ret = psp_v14_0_wait_for_bootloader(psp);
+ if (ret)
+ return ret;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+
+ /* Copy PSP KDB binary to memory */
+ memcpy(psp->fw_pri_buf, bin_desc->start_addr, bin_desc->size_bytes);
+
+ /* Provide the PSP KDB to bootloader */
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = bl_cmd;
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ ret = psp_v14_0_wait_for_bootloader(psp);
+
+ return ret;
+}
+
+static int psp_v14_0_bootloader_load_kdb(struct psp_context *psp)
+{
+ return psp_v14_0_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_KEY_DATABASE);
+}
+
+static int psp_v14_0_bootloader_load_spl(struct psp_context *psp)
+{
+ return psp_v14_0_bootloader_load_component(psp, &psp->spl, PSP_BL__LOAD_TOS_SPL_TABLE);
+}
+
+static int psp_v14_0_bootloader_load_sysdrv(struct psp_context *psp)
+{
+ return psp_v14_0_bootloader_load_component(psp, &psp->sys, PSP_BL__LOAD_SYSDRV);
+}
+
+static int psp_v14_0_bootloader_load_soc_drv(struct psp_context *psp)
+{
+ return psp_v14_0_bootloader_load_component(psp, &psp->soc_drv, PSP_BL__LOAD_SOCDRV);
+}
+
+static int psp_v14_0_bootloader_load_intf_drv(struct psp_context *psp)
+{
+ return psp_v14_0_bootloader_load_component(psp, &psp->intf_drv, PSP_BL__LOAD_INTFDRV);
+}
+
+static int psp_v14_0_bootloader_load_dbg_drv(struct psp_context *psp)
+{
+ /* dbg_drv was renamed to had_drv in psp v14 */
+ return psp_v14_0_bootloader_load_component(psp, &psp->dbg_drv, PSP_BL__LOAD_HADDRV);
+}
+
+static int psp_v14_0_bootloader_load_ras_drv(struct psp_context *psp)
+{
+ return psp_v14_0_bootloader_load_component(psp, &psp->ras_drv, PSP_BL__LOAD_RASDRV);
+}
+
+static int psp_v14_0_bootloader_load_ipkeymgr_drv(struct psp_context *psp)
+{
+ return psp_v14_0_bootloader_load_component(psp, &psp->ipkeymgr_drv, PSP_BL__LOAD_IPKEYMGRDRV);
+}
+
+static int psp_v14_0_bootloader_load_sos(struct psp_context *psp)
+{
+ int ret;
+ unsigned int psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ if (psp_v14_0_is_sos_alive(psp))
+ return 0;
+
+ ret = psp_v14_0_wait_for_bootloader(psp);
+ if (ret)
+ return ret;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+
+ /* Copy Secure OS binary to PSP memory */
+ memcpy(psp->fw_pri_buf, psp->sos.start_addr, psp->sos.size_bytes);
+
+ /* Provide the PSP secure OS to bootloader */
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = PSP_BL__LOAD_SOSDRV;
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ ret = psp_wait_for(psp,
+ SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_81),
+ RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_81), 0,
+ PSP_WAITREG_CHANGED);
+
+ return ret;
+}
+
+static int psp_v14_0_ring_stop(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* Write the ring destroy command*/
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ /* Wait for response flag (bit 31) */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+ } else {
+ /* Write the ring destroy command*/
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64,
+ GFX_CTRL_CMD_ID_DESTROY_RINGS);
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ /* Wait for response flag (bit 31) */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+ }
+
+ return ret;
+}
+
+static int psp_v14_0_ring_create(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ unsigned int psp_ring_reg = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ ret = psp_v14_0_ring_stop(psp, ring_type);
+ if (ret) {
+ DRM_ERROR("psp_v14_0_ring_stop_sriov failed!\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_102 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_103 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_103, psp_ring_reg);
+
+ /* Write the ring initialization command to C2PMSG_101 */
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_INIT_GPCOM_RING);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_101 */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+
+ } else {
+ /* Wait for sOS ready for ring creation */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64),
+ MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0);
+ if (ret) {
+ DRM_ERROR("Failed to wait for trust OS ready for ring creation\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_69 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_69, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_70 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_70, psp_ring_reg);
+ /* Write size of ring to C2PMSG_71 */
+ psp_ring_reg = ring->ring_size;
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_71, psp_ring_reg);
+ /* Write the ring initialization command to C2PMSG_64 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+ }
+
+ return ret;
+}
+
+static int psp_v14_0_ring_destroy(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ ret = psp_v14_0_ring_stop(psp, ring_type);
+ if (ret)
+ DRM_ERROR("Fail to stop psp ring\n");
+
+ amdgpu_bo_free_kernel(&adev->firmware.rbuf,
+ &ring->ring_mem_mc_addr,
+ (void **)&ring->ring_mem);
+
+ return ret;
+}
+
+static uint32_t psp_v14_0_ring_get_wptr(struct psp_context *psp)
+{
+ uint32_t data;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102);
+ else
+ data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67);
+
+ return data;
+}
+
+static void psp_v14_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102, value);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_CONSUME_CMD);
+ } else
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67, value);
+}
+
+static int psp_v14_0_memory_training_send_msg(struct psp_context *psp, int msg)
+{
+ int ret;
+ int i;
+ uint32_t data_32;
+ int max_wait;
+ struct amdgpu_device *adev = psp->adev;
+
+ data_32 = (psp->mem_train_ctx.c2p_train_data_offset >> 20);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36, data_32);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35, msg);
+
+ max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout;
+ for (i = 0; i < max_wait; i++) {
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE);
+ if (ret == 0)
+ break;
+ }
+ if (i < max_wait)
+ ret = 0;
+ else
+ ret = -ETIME;
+
+ dev_dbg(adev->dev, "training %s %s, cost %d @ %d ms\n",
+ (msg == PSP_BL__DRAM_SHORT_TRAIN) ? "short" : "long",
+ (ret == 0) ? "succeed" : "failed",
+ i, adev->usec_timeout/1000);
+ return ret;
+}
+
+
+static int psp_v14_0_memory_training(struct psp_context *psp, uint32_t ops)
+{
+ struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
+ uint32_t *pcache = (uint32_t *)ctx->sys_cache;
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t p2c_header[4];
+ uint32_t sz;
+ void *buf;
+ int ret, idx;
+
+ if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) {
+ dev_dbg(adev->dev, "Memory training is not supported.\n");
+ return 0;
+ } else if (ctx->init != PSP_MEM_TRAIN_INIT_SUCCESS) {
+ dev_err(adev->dev, "Memory training initialization failure.\n");
+ return -EINVAL;
+ }
+
+ if (psp_v14_0_is_sos_alive(psp)) {
+ dev_dbg(adev->dev, "SOS is alive, skip memory training.\n");
+ return 0;
+ }
+
+ amdgpu_device_vram_access(adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false);
+ dev_dbg(adev->dev, "sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n",
+ pcache[0], pcache[1], pcache[2], pcache[3],
+ p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]);
+
+ if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) {
+ dev_dbg(adev->dev, "Short training depends on restore.\n");
+ ops |= PSP_MEM_TRAIN_RESTORE;
+ }
+
+ if ((ops & PSP_MEM_TRAIN_RESTORE) &&
+ pcache[0] != MEM_TRAIN_SYSTEM_SIGNATURE) {
+ dev_dbg(adev->dev, "sys_cache[0] is invalid, restore depends on save.\n");
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ if (p2c_header[0] == MEM_TRAIN_SYSTEM_SIGNATURE &&
+ !(pcache[0] == MEM_TRAIN_SYSTEM_SIGNATURE &&
+ pcache[3] == p2c_header[3])) {
+ dev_dbg(adev->dev, "sys_cache is invalid or out-of-date, need save training data to sys_cache.\n");
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ if ((ops & PSP_MEM_TRAIN_SAVE) &&
+ p2c_header[0] != MEM_TRAIN_SYSTEM_SIGNATURE) {
+ dev_dbg(adev->dev, "p2c_header[0] is invalid, save depends on long training.\n");
+ ops |= PSP_MEM_TRAIN_SEND_LONG_MSG;
+ }
+
+ if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
+ ops &= ~PSP_MEM_TRAIN_SEND_SHORT_MSG;
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ dev_dbg(adev->dev, "Memory training ops:%x.\n", ops);
+
+ if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
+ /*
+ * Long training will encroach a certain amount on the bottom of VRAM;
+ * save the content from the bottom of VRAM to system memory
+ * before training, and restore it after training to avoid
+ * VRAM corruption.
+ */
+ sz = BIST_MEM_TRAINING_ENCROACHED_SIZE;
+
+ if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) {
+ dev_err(adev->dev, "visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n",
+ adev->gmc.visible_vram_size,
+ adev->mman.aper_base_kaddr);
+ return -EINVAL;
+ }
+
+ buf = vmalloc(sz);
+ if (!buf) {
+ dev_err(adev->dev, "failed to allocate system memory.\n");
+ return -ENOMEM;
+ }
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz);
+ ret = psp_v14_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN);
+ if (ret) {
+ DRM_ERROR("Send long training msg failed.\n");
+ vfree(buf);
+ drm_dev_exit(idx);
+ return ret;
+ }
+
+ memcpy_toio(adev->mman.aper_base_kaddr, buf, sz);
+ amdgpu_device_flush_hdp(adev, NULL);
+ vfree(buf);
+ drm_dev_exit(idx);
+ } else {
+ vfree(buf);
+ return -ENODEV;
+ }
+ }
+
+ if (ops & PSP_MEM_TRAIN_SAVE) {
+ amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, ctx->sys_cache, ctx->train_data_size, false);
+ }
+
+ if (ops & PSP_MEM_TRAIN_RESTORE) {
+ amdgpu_device_vram_access(psp->adev, ctx->c2p_train_data_offset, ctx->sys_cache, ctx->train_data_size, true);
+ }
+
+ if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) {
+ ret = psp_v14_0_memory_training_send_msg(psp, (amdgpu_force_long_training > 0) ?
+ PSP_BL__DRAM_LONG_TRAIN : PSP_BL__DRAM_SHORT_TRAIN);
+ if (ret) {
+ dev_err(adev->dev, "send training msg failed.\n");
+ return ret;
+ }
+ }
+ ctx->training_cnt++;
+ return 0;
+}
+
+static int psp_v14_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc_addr)
+{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t reg_status;
+ int ret, i = 0;
+
+ /*
+ * LFB address which is aligned to 1MB address and has to be
+ * right-shifted by 20 so that LFB address can be passed on a 32-bit C2P
+ * register
+ */
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36, (fw_pri_mc_addr >> 20));
+
+ ret = psp_wait_for(psp,
+ SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, 0);
+ if (ret)
+ return ret;
+
+ /* Fireup interrupt so PSP can pick up the address */
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35, (GFX_CMD_USB_PD_USE_LFB << 16));
+
+ /* FW load takes very long time */
+ do {
+ msleep(1000);
+ reg_status = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35);
+
+ if (reg_status & 0x80000000)
+ goto done;
+
+ } while (++i < USBC_PD_POLLING_LIMIT_S);
+
+ return -ETIME;
+done:
+
+ if ((reg_status & 0xFFFF) != 0) {
+ DRM_ERROR("Address load failed - MP0_SMN_C2PMSG_35.Bits [15:0] = %04x\n",
+ reg_status & 0xFFFF);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int psp_v14_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int ret;
+
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35, C2PMSG_CMD_GFX_USB_PD_FW_VER);
+
+ ret = psp_wait_for(psp,
+ SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, 0);
+ if (!ret)
+ *fw_ver = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36);
+
+ return ret;
+}
+
+static int psp_v14_0_exec_spi_cmd(struct psp_context *psp, int cmd)
+{
+ uint32_t reg_status = 0, reg_val = 0;
+ struct amdgpu_device *adev = psp->adev;
+ int ret;
+
+ /* clear MBX ready (MBOX_READY_MASK bit is 0) and set update command */
+ reg_val |= (cmd << 16);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_115, reg_val);
+
+ /* Ring the doorbell */
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_73, 1);
+
+ if (cmd == C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE)
+ ret = psp_wait_for_spirom_update(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, PSP_SPIROM_UPDATE_TIMEOUT);
+ else
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, 0);
+
+ ret = psp_wait_for(psp,
+ SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, 0);
+ if (ret) {
+ dev_err(adev->dev, "SPI cmd %x timed out, ret = %d", cmd, ret);
+ return ret;
+ }
+
+ reg_status = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_115);
+ if ((reg_status & 0xFFFF) != 0) {
+ dev_err(adev->dev, "SPI cmd %x failed, fail status = %04x\n",
+ cmd, reg_status & 0xFFFF);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int psp_v14_0_update_spirom(struct psp_context *psp,
+ uint64_t fw_pri_mc_addr)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int ret;
+
+ /* Confirm PSP is ready to start */
+ ret = psp_wait_for(psp,
+ SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, 0);
+ if (ret) {
+ dev_err(adev->dev, "PSP Not ready to start processing, ret = %d", ret);
+ return ret;
+ }
+
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_116, lower_32_bits(fw_pri_mc_addr));
+
+ ret = psp_v14_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO);
+ if (ret)
+ return ret;
+
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_116, upper_32_bits(fw_pri_mc_addr));
+
+ ret = psp_v14_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI);
+ if (ret)
+ return ret;
+
+ psp->vbflash_done = true;
+
+ ret = psp_v14_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int psp_v14_0_vbflash_status(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ return RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_115);
+}
+
+static const struct psp_funcs psp_v14_0_funcs = {
+ .init_microcode = psp_v14_0_init_microcode,
+ .bootloader_load_kdb = psp_v14_0_bootloader_load_kdb,
+ .bootloader_load_spl = psp_v14_0_bootloader_load_spl,
+ .bootloader_load_sysdrv = psp_v14_0_bootloader_load_sysdrv,
+ .bootloader_load_soc_drv = psp_v14_0_bootloader_load_soc_drv,
+ .bootloader_load_intf_drv = psp_v14_0_bootloader_load_intf_drv,
+ .bootloader_load_dbg_drv = psp_v14_0_bootloader_load_dbg_drv,
+ .bootloader_load_ras_drv = psp_v14_0_bootloader_load_ras_drv,
+ .bootloader_load_ipkeymgr_drv = psp_v14_0_bootloader_load_ipkeymgr_drv,
+ .bootloader_load_sos = psp_v14_0_bootloader_load_sos,
+ .ring_create = psp_v14_0_ring_create,
+ .ring_stop = psp_v14_0_ring_stop,
+ .ring_destroy = psp_v14_0_ring_destroy,
+ .ring_get_wptr = psp_v14_0_ring_get_wptr,
+ .ring_set_wptr = psp_v14_0_ring_set_wptr,
+ .mem_training = psp_v14_0_memory_training,
+ .load_usbc_pd_fw = psp_v14_0_load_usbc_pd_fw,
+ .read_usbc_pd_fw = psp_v14_0_read_usbc_pd_fw,
+ .update_spirom = psp_v14_0_update_spirom,
+ .vbflash_stat = psp_v14_0_vbflash_status
+};
+
+void psp_v14_0_set_psp_funcs(struct psp_context *psp)
+{
+ psp->funcs = &psp_v14_0_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.h
new file mode 100644
index 000000000000..dd18ba2cfad5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __PSP_V14_0_H__
+#define __PSP_V14_0_H__
+
+#include "amdgpu_psp.h"
+
+#define PSP_SPIROM_UPDATE_TIMEOUT 60000 /* 60s */
+
+void psp_v14_0_set_psp_funcs(struct psp_context *psp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index 1ed357cb0f49..833830bc3e2e 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -44,6 +44,7 @@
MODULE_FIRMWARE("amdgpu/vega10_sos.bin");
MODULE_FIRMWARE("amdgpu/vega10_asd.bin");
+MODULE_FIRMWARE("amdgpu/vega10_cap.bin");
MODULE_FIRMWARE("amdgpu/vega12_sos.bin");
MODULE_FIRMWARE("amdgpu/vega12_asd.bin");
@@ -56,26 +57,18 @@ static int psp_v3_1_ring_stop(struct psp_context *psp,
static int psp_v3_1_init_microcode(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
- const char *chip_name;
+ char ucode_prefix[30];
int err = 0;
DRM_DEBUG("\n");
- switch (adev->asic_type) {
- case CHIP_VEGA10:
- chip_name = "vega10";
- break;
- case CHIP_VEGA12:
- chip_name = "vega12";
- break;
- default: BUG();
- }
+ amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
- err = psp_init_sos_microcode(psp, chip_name);
+ err = psp_init_sos_microcode(psp, ucode_prefix);
if (err)
return err;
- err = psp_init_asd_microcode(psp, chip_name);
+ err = psp_init_asd_microcode(psp, ucode_prefix);
if (err)
return err;
@@ -98,7 +91,7 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp)
/* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
if (ret)
return ret;
@@ -116,7 +109,7 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp)
mdelay(20);
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
return ret;
}
@@ -137,7 +130,7 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
/* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
if (ret)
return ret;
@@ -154,37 +147,11 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81),
- RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81),
- 0, true);
+ RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), 0,
+ PSP_WAITREG_CHANGED);
return ret;
}
-static int psp_v3_1_ring_init(struct psp_context *psp,
- enum psp_ring_type ring_type)
-{
- int ret = 0;
- struct psp_ring *ring;
- struct amdgpu_device *adev = psp->adev;
-
- ring = &psp->km_ring;
-
- ring->ring_type = ring_type;
-
- /* allocate 4k Page of Local Frame Buffer memory for ring */
- ring->ring_size = 0x1000;
- ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->firmware.rbuf,
- &ring->ring_mem_mc_addr,
- (void **)&ring->ring_mem);
- if (ret) {
- ring->ring_size = 0;
- return ret;
- }
-
- return 0;
-}
-
static void psp_v3_1_reroute_ih(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
@@ -201,7 +168,7 @@ static void psp_v3_1_reroute_ih(struct psp_context *psp)
mdelay(20);
psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
+ 0x80000000, 0x8000FFFF, 0);
/* Change IH ring for UMC */
tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1216b);
@@ -213,7 +180,7 @@ static void psp_v3_1_reroute_ih(struct psp_context *psp)
mdelay(20);
psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
+ 0x80000000, 0x8000FFFF, 0);
}
static int psp_v3_1_ring_create(struct psp_context *psp,
@@ -250,9 +217,9 @@ static int psp_v3_1_ring_create(struct psp_context *psp,
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_101 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0,
- mmMP0_SMN_C2PMSG_101), 0x80000000,
- 0x8000FFFF, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x8000FFFF, 0);
} else {
/* Write low address of the ring to C2PMSG_69 */
@@ -273,10 +240,9 @@ static int psp_v3_1_ring_create(struct psp_context *psp,
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_64 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0,
- mmMP0_SMN_C2PMSG_64), 0x80000000,
- 0x8000FFFF, false);
-
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x8000FFFF, 0);
}
return ret;
}
@@ -300,11 +266,13 @@ static int psp_v3_1_ring_stop(struct psp_context *psp,
/* Wait for response flag (bit 31) */
if (amdgpu_sriov_vf(adev))
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x80000000, 0);
else
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, 0);
return ret;
}
@@ -344,7 +312,7 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp)
offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64);
- ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false);
+ ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, 0);
if (ret) {
DRM_INFO("psp is not working correctly before mode1 reset!\n");
@@ -358,7 +326,7 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp)
offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33);
- ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, 0);
if (ret) {
DRM_INFO("psp mode 1 reset failed!\n");
@@ -400,7 +368,6 @@ static const struct psp_funcs psp_v3_1_funcs = {
.init_microcode = psp_v3_1_init_microcode,
.bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv,
.bootloader_load_sos = psp_v3_1_bootloader_load_sos,
- .ring_init = psp_v3_1_ring_init,
.ring_create = psp_v3_1_ring_create,
.ring_stop = psp_v3_1_ring_stop,
.ring_destroy = psp_v3_1_ring_destroy,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 4509bd4cce2d..92ce580647cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -57,22 +57,19 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev);
MODULE_FIRMWARE("amdgpu/topaz_sdma.bin");
MODULE_FIRMWARE("amdgpu/topaz_sdma1.bin");
-static const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
-{
+static const u32 sdma_offsets[SDMA_MAX_INSTANCE] = {
SDMA0_REGISTER_OFFSET,
SDMA1_REGISTER_OFFSET
};
-static const u32 golden_settings_iceland_a11[] =
-{
+static const u32 golden_settings_iceland_a11[] = {
mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007,
mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000,
mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007,
mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000,
};
-static const u32 iceland_mgcg_cgcg_init[] =
-{
+static const u32 iceland_mgcg_cgcg_init[] = {
mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100,
mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100
};
@@ -113,10 +110,9 @@ static void sdma_v2_4_init_golden_registers(struct amdgpu_device *adev)
static void sdma_v2_4_free_microcode(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- release_firmware(adev->sdma.instance[i].fw);
- adev->sdma.instance[i].fw = NULL;
- }
+
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ucode_release(&adev->sdma.instance[i].fw);
}
/**
@@ -131,7 +127,6 @@ static void sdma_v2_4_free_microcode(struct amdgpu_device *adev)
static int sdma_v2_4_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
int err = 0, i;
struct amdgpu_firmware_info *info = NULL;
const struct common_firmware_header *header = NULL;
@@ -143,18 +138,19 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev)
case CHIP_TOPAZ:
chip_name = "topaz";
break;
- default: BUG();
+ default:
+ BUG();
}
for (i = 0; i < adev->sdma.num_instances; i++) {
if (i == 0)
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sdma.bin", chip_name);
else
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
- err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->sdma.instance[i].fw);
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sdma1.bin", chip_name);
if (err)
goto out;
hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
@@ -175,11 +171,10 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev)
out:
if (err) {
- pr_err("sdma_v2_4: Failed to load firmware \"%s\"\n", fw_name);
- for (i = 0; i < adev->sdma.num_instances; i++) {
- release_firmware(adev->sdma.instance[i].fw);
- adev->sdma.instance[i].fw = NULL;
- }
+ pr_err("sdma_v2_4: Failed to load firmware \"%s_sdma%s.bin\"\n",
+ chip_name, i == 0 ? "" : "1");
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ucode_release(&adev->sdma.instance[i].fw);
}
return err;
}
@@ -194,7 +189,7 @@ out:
static uint64_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring)
{
/* XXX check if swapping is necessary on BE */
- return ring->adev->wb.wb[ring->rptr_offs] >> 2;
+ return *ring->rptr_cpu_addr >> 2;
}
/**
@@ -223,7 +218,7 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], lower_32_bits(ring->wptr) << 2);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], ring->wptr << 2);
}
static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
@@ -342,15 +337,9 @@ static void sdma_v2_4_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
*/
static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev)
{
- struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
- struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
u32 rb_cntl, ib_cntl;
int i;
- if ((adev->mman.buffer_funcs_ring == sdma0) ||
- (adev->mman.buffer_funcs_ring == sdma1))
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
-
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
@@ -414,12 +403,10 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
struct amdgpu_ring *ring;
u32 rb_cntl, ib_cntl;
u32 rb_bufsz;
- u32 wb_offset;
int i, j, r;
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
- wb_offset = (ring->rptr_offs * 4);
mutex_lock(&adev->srbm_mutex);
for (j = 0; j < 16; j++) {
@@ -455,9 +442,9 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
/* set the wb address whether it's enabled or not */
WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
- upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i],
- lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
@@ -465,7 +452,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
ring->wptr = 0;
- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2);
/* enable DMA RB */
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
@@ -478,8 +465,6 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
#endif
/* enable DMA IBs */
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
-
- ring->sched.ready = true;
}
sdma_v2_4_enable(adev, true);
@@ -488,9 +473,6 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
@@ -651,7 +633,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -827,12 +809,17 @@ static void sdma_v2_4_ring_emit_wreg(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, val);
}
-static int sdma_v2_4_early_init(void *handle)
+static int sdma_v2_4_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
adev->sdma.num_instances = SDMA_MAX_INSTANCE;
+ r = sdma_v2_4_init_microcode(adev);
+ if (r)
+ return r;
+
sdma_v2_4_set_ring_funcs(adev);
sdma_v2_4_set_buffer_funcs(adev);
sdma_v2_4_set_vm_pte_funcs(adev);
@@ -841,11 +828,11 @@ static int sdma_v2_4_early_init(void *handle)
return 0;
}
-static int sdma_v2_4_sw_init(void *handle)
+static int sdma_v2_4_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* SDMA trap event */
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP,
@@ -865,12 +852,6 @@ static int sdma_v2_4_sw_init(void *handle)
if (r)
return r;
- r = sdma_v2_4_init_microcode(adev);
- if (r) {
- DRM_ERROR("Failed to load sdma firmware!\n");
- return r;
- }
-
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
ring->ring_obj = NULL;
@@ -887,9 +868,9 @@ static int sdma_v2_4_sw_init(void *handle)
return r;
}
-static int sdma_v2_4_sw_fini(void *handle)
+static int sdma_v2_4_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
@@ -899,10 +880,10 @@ static int sdma_v2_4_sw_fini(void *handle)
return 0;
}
-static int sdma_v2_4_hw_init(void *handle)
+static int sdma_v2_4_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
sdma_v2_4_init_golden_registers(adev);
@@ -913,32 +894,26 @@ static int sdma_v2_4_hw_init(void *handle)
return r;
}
-static int sdma_v2_4_hw_fini(void *handle)
+static int sdma_v2_4_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- sdma_v2_4_enable(adev, false);
+ sdma_v2_4_enable(ip_block->adev, false);
return 0;
}
-static int sdma_v2_4_suspend(void *handle)
+static int sdma_v2_4_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v2_4_hw_fini(adev);
+ return sdma_v2_4_hw_fini(ip_block);
}
-static int sdma_v2_4_resume(void *handle)
+static int sdma_v2_4_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v2_4_hw_init(adev);
+ return sdma_v2_4_hw_init(ip_block);
}
-static bool sdma_v2_4_is_idle(void *handle)
+static bool sdma_v2_4_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS2);
if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK |
@@ -948,11 +923,11 @@ static bool sdma_v2_4_is_idle(void *handle)
return true;
}
-static int sdma_v2_4_wait_for_idle(void *handle)
+static int sdma_v2_4_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK |
@@ -965,10 +940,10 @@ static int sdma_v2_4_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int sdma_v2_4_soft_reset(void *handle)
+static int sdma_v2_4_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS2);
if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) {
@@ -1107,14 +1082,14 @@ static int sdma_v2_4_process_illegal_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int sdma_v2_4_set_clockgating_state(void *handle,
+static int sdma_v2_4_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
/* XXX handled via the smc on VI */
return 0;
}
-static int sdma_v2_4_set_powergating_state(void *handle,
+static int sdma_v2_4_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -1123,7 +1098,6 @@ static int sdma_v2_4_set_powergating_state(void *handle,
static const struct amd_ip_funcs sdma_v2_4_ip_funcs = {
.name = "sdma_v2_4",
.early_init = sdma_v2_4_early_init,
- .late_init = NULL,
.sw_init = sdma_v2_4_sw_init,
.sw_fini = sdma_v2_4_sw_fini,
.hw_init = sdma_v2_4_hw_init,
@@ -1142,6 +1116,7 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
.align_mask = 0xf,
.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
.support_64bit_ptrs = false,
+ .secure_submission_supported = true,
.get_rptr = sdma_v2_4_ring_get_rptr,
.get_wptr = sdma_v2_4_ring_get_wptr,
.set_wptr = sdma_v2_4_ring_set_wptr,
@@ -1197,7 +1172,7 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: unused
+ * @copy_flags: unused
*
* Copy GPU buffers using the DMA engine (VI).
* Used by the amdgpu ttm implementation to move pages if
@@ -1207,7 +1182,7 @@ static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
@@ -1277,8 +1252,7 @@ static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev)
adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
}
-const struct amdgpu_ip_block_version sdma_v2_4_ip_block =
-{
+const struct amdgpu_ip_block_version sdma_v2_4_ip_block = {
.type = AMD_IP_BLOCK_TYPE_SDMA,
.major = 2,
.minor = 4,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 135727b59c41..1c076bd1cf73 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -250,10 +250,9 @@ static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev)
static void sdma_v3_0_free_microcode(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- release_firmware(adev->sdma.instance[i].fw);
- adev->sdma.instance[i].fw = NULL;
- }
+
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ucode_release(&adev->sdma.instance[i].fw);
}
/**
@@ -268,7 +267,6 @@ static void sdma_v3_0_free_microcode(struct amdgpu_device *adev)
static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
int err = 0, i;
struct amdgpu_firmware_info *info = NULL;
const struct common_firmware_header *header = NULL;
@@ -306,13 +304,13 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
if (i == 0)
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sdma.bin", chip_name);
else
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
- err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->sdma.instance[i].fw);
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sdma1.bin", chip_name);
if (err)
goto out;
hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
@@ -331,11 +329,10 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
}
out:
if (err) {
- pr_err("sdma_v3_0: Failed to load firmware \"%s\"\n", fw_name);
- for (i = 0; i < adev->sdma.num_instances; i++) {
- release_firmware(adev->sdma.instance[i].fw);
- adev->sdma.instance[i].fw = NULL;
- }
+ pr_err("sdma_v3_0: Failed to load firmware \"%s_sdma%s.bin\"\n",
+ chip_name, i == 0 ? "" : "1");
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ucode_release(&adev->sdma.instance[i].fw);
}
return err;
}
@@ -350,7 +347,7 @@ out:
static uint64_t sdma_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
{
/* XXX check if swapping is necessary on BE */
- return ring->adev->wb.wb[ring->rptr_offs] >> 2;
+ return *ring->rptr_cpu_addr >> 2;
}
/**
@@ -367,7 +364,7 @@ static uint64_t sdma_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell || ring->use_pollmem) {
/* XXX check if swapping is necessary on BE */
- wptr = ring->adev->wb.wb[ring->wptr_offs] >> 2;
+ wptr = *ring->wptr_cpu_addr >> 2;
} else {
wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me]) >> 2;
}
@@ -387,16 +384,16 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell) {
- u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
+ u32 *wb = (u32 *)ring->wptr_cpu_addr;
/* XXX check if swapping is necessary on BE */
- WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
- WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2);
+ WRITE_ONCE(*wb, ring->wptr << 2);
+ WDOORBELL32(ring->doorbell_index, ring->wptr << 2);
} else if (ring->use_pollmem) {
- u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
+ u32 *wb = (u32 *)ring->wptr_cpu_addr;
- WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
+ WRITE_ONCE(*wb, ring->wptr << 2);
} else {
- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], lower_32_bits(ring->wptr) << 2);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], ring->wptr << 2);
}
}
@@ -516,15 +513,9 @@ static void sdma_v3_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
*/
static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev)
{
- struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
- struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
u32 rb_cntl, ib_cntl;
int i;
- if ((adev->mman.buffer_funcs_ring == sdma0) ||
- (adev->mman.buffer_funcs_ring == sdma1))
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
-
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
@@ -649,7 +640,6 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
struct amdgpu_ring *ring;
u32 rb_cntl, ib_cntl, wptr_poll_cntl;
u32 rb_bufsz;
- u32 wb_offset;
u32 doorbell;
u64 wptr_gpu_addr;
int i, j, r;
@@ -657,7 +647,6 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
amdgpu_ring_clear_ring(ring);
- wb_offset = (ring->rptr_offs * 4);
mutex_lock(&adev->srbm_mutex);
for (j = 0; j < 16; j++) {
@@ -694,9 +683,9 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
/* set the wb address whether it's enabled or not */
WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
- upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i],
- lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
@@ -715,7 +704,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
WREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i], doorbell);
/* setup the wptr shadow polling */
- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wptr_gpu_addr = ring->wptr_gpu_addr;
WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO + sdma_offsets[i],
lower_32_bits(wptr_gpu_addr));
@@ -723,7 +712,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
upper_32_bits(wptr_gpu_addr));
wptr_poll_cntl = RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]);
if (ring->use_pollmem) {
- /*wptr polling is not enogh fast, directly clean the wptr register */
+ /*wptr polling is not enough fast, directly clean the wptr register */
WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0);
wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
SDMA0_GFX_RB_WPTR_POLL_CNTL,
@@ -746,8 +735,6 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
#endif
/* enable DMA IBs */
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
-
- ring->sched.ready = true;
}
/* unhalt the MEs */
@@ -760,9 +747,6 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
@@ -922,7 +906,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
else
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -1098,9 +1082,10 @@ static void sdma_v3_0_ring_emit_wreg(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, val);
}
-static int sdma_v3_0_early_init(void *handle)
+static int sdma_v3_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
switch (adev->asic_type) {
case CHIP_STONEY:
@@ -1111,6 +1096,10 @@ static int sdma_v3_0_early_init(void *handle)
break;
}
+ r = sdma_v3_0_init_microcode(adev);
+ if (r)
+ return r;
+
sdma_v3_0_set_ring_funcs(adev);
sdma_v3_0_set_buffer_funcs(adev);
sdma_v3_0_set_vm_pte_funcs(adev);
@@ -1119,11 +1108,11 @@ static int sdma_v3_0_early_init(void *handle)
return 0;
}
-static int sdma_v3_0_sw_init(void *handle)
+static int sdma_v3_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* SDMA trap event */
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP,
@@ -1143,12 +1132,6 @@ static int sdma_v3_0_sw_init(void *handle)
if (r)
return r;
- r = sdma_v3_0_init_microcode(adev);
- if (r) {
- DRM_ERROR("Failed to load sdma firmware!\n");
- return r;
- }
-
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
ring->ring_obj = NULL;
@@ -1171,9 +1154,9 @@ static int sdma_v3_0_sw_init(void *handle)
return r;
}
-static int sdma_v3_0_sw_fini(void *handle)
+static int sdma_v3_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
@@ -1183,10 +1166,10 @@ static int sdma_v3_0_sw_fini(void *handle)
return 0;
}
-static int sdma_v3_0_hw_init(void *handle)
+static int sdma_v3_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
sdma_v3_0_init_golden_registers(adev);
@@ -1197,9 +1180,9 @@ static int sdma_v3_0_hw_init(void *handle)
return r;
}
-static int sdma_v3_0_hw_fini(void *handle)
+static int sdma_v3_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
sdma_v3_0_ctx_switch_enable(adev, false);
sdma_v3_0_enable(adev, false);
@@ -1207,23 +1190,19 @@ static int sdma_v3_0_hw_fini(void *handle)
return 0;
}
-static int sdma_v3_0_suspend(void *handle)
+static int sdma_v3_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v3_0_hw_fini(adev);
+ return sdma_v3_0_hw_fini(ip_block);
}
-static int sdma_v3_0_resume(void *handle)
+static int sdma_v3_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v3_0_hw_init(adev);
+ return sdma_v3_0_hw_init(ip_block);
}
-static bool sdma_v3_0_is_idle(void *handle)
+static bool sdma_v3_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS2);
if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK |
@@ -1233,11 +1212,11 @@ static bool sdma_v3_0_is_idle(void *handle)
return true;
}
-static int sdma_v3_0_wait_for_idle(void *handle)
+static int sdma_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK |
@@ -1250,9 +1229,9 @@ static int sdma_v3_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static bool sdma_v3_0_check_soft_reset(void *handle)
+static bool sdma_v3_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
u32 tmp = RREG32(mmSRBM_STATUS2);
@@ -1271,9 +1250,9 @@ static bool sdma_v3_0_check_soft_reset(void *handle)
}
}
-static int sdma_v3_0_pre_soft_reset(void *handle)
+static int sdma_v3_0_pre_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
if (!adev->sdma.srbm_soft_reset)
@@ -1290,9 +1269,9 @@ static int sdma_v3_0_pre_soft_reset(void *handle)
return 0;
}
-static int sdma_v3_0_post_soft_reset(void *handle)
+static int sdma_v3_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
if (!adev->sdma.srbm_soft_reset)
@@ -1309,9 +1288,9 @@ static int sdma_v3_0_post_soft_reset(void *handle)
return 0;
}
-static int sdma_v3_0_soft_reset(void *handle)
+static int sdma_v3_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
u32 tmp;
@@ -1506,10 +1485,10 @@ static void sdma_v3_0_update_sdma_medium_grain_light_sleep(
}
}
-static int sdma_v3_0_set_clockgating_state(void *handle,
+static int sdma_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1529,15 +1508,15 @@ static int sdma_v3_0_set_clockgating_state(void *handle,
return 0;
}
-static int sdma_v3_0_set_powergating_state(void *handle,
+static int sdma_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void sdma_v3_0_get_clockgating_state(void *handle, u32 *flags)
+static void sdma_v3_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -1557,7 +1536,6 @@ static void sdma_v3_0_get_clockgating_state(void *handle, u32 *flags)
static const struct amd_ip_funcs sdma_v3_0_ip_funcs = {
.name = "sdma_v3_0",
.early_init = sdma_v3_0_early_init,
- .late_init = NULL,
.sw_init = sdma_v3_0_sw_init,
.sw_fini = sdma_v3_0_sw_fini,
.hw_init = sdma_v3_0_hw_init,
@@ -1580,6 +1558,7 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
.align_mask = 0xf,
.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
.support_64bit_ptrs = false,
+ .secure_submission_supported = true,
.get_rptr = sdma_v3_0_ring_get_rptr,
.get_wptr = sdma_v3_0_ring_get_wptr,
.set_wptr = sdma_v3_0_ring_set_wptr,
@@ -1635,7 +1614,7 @@ static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: unused
+ * @copy_flags: unused
*
* Copy GPU buffers using the DMA engine (VI).
* Used by the amdgpu ttm implementation to move pages if
@@ -1645,7 +1624,7 @@ static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index e8e4749e9c79..f38004e6064e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -72,6 +72,53 @@ MODULE_FIRMWARE("amdgpu/renoir_sdma.bin");
MODULE_FIRMWARE("amdgpu/green_sardine_sdma.bin");
MODULE_FIRMWARE("amdgpu/aldebaran_sdma.bin");
+static const struct amdgpu_hwip_reg_entry sdma_reg_list_4_0[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS1_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS2_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS3_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UCODE_CHECKSUM),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_VM_CNTL)
+};
+
#define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L
#define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L
@@ -469,7 +516,7 @@ static int sdma_v4_0_irq_id_to_seq(unsigned client_id)
static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
case IP_VERSION(4, 0, 0):
soc15_program_register_sequence(adev,
golden_settings_sdma_4,
@@ -539,7 +586,7 @@ static void sdma_v4_0_setup_ulv(struct amdgpu_device *adev)
* The only chips with SDMAv4 and ULV are VG10 and VG20.
* Server SKUs take a different hysteresis setting from other SKUs.
*/
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
case IP_VERSION(4, 0, 0):
if (adev->pdev->device == 0x6860)
break;
@@ -561,44 +608,6 @@ static void sdma_v4_0_setup_ulv(struct amdgpu_device *adev)
}
}
-static int sdma_v4_0_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
-{
- int err = 0;
- const struct sdma_firmware_header_v1_0 *hdr;
-
- err = amdgpu_ucode_validate(sdma_inst->fw);
- if (err)
- return err;
-
- hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data;
- sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version);
- sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version);
-
- if (sdma_inst->feature_version >= 20)
- sdma_inst->burst_nop = true;
-
- return 0;
-}
-
-static void sdma_v4_0_destroy_inst_ctx(struct amdgpu_device *adev)
-{
- int i;
-
- for (i = 0; i < adev->sdma.num_instances; i++) {
- release_firmware(adev->sdma.instance[i].fw);
- adev->sdma.instance[i].fw = NULL;
-
- /* arcturus shares the same FW memory across
- all SDMA isntances */
- if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) ||
- adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 0))
- break;
- }
-
- memset((void *)adev->sdma.instance, 0,
- sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES);
-}
-
/**
* sdma_v4_0_init_microcode - load ucode images from disk
*
@@ -613,101 +622,25 @@ static void sdma_v4_0_destroy_inst_ctx(struct amdgpu_device *adev)
// vega10 real chip need to use PSP to load firmware
static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
{
- const char *chip_name;
- char fw_name[30];
- int err = 0, i;
- struct amdgpu_firmware_info *info = NULL;
- const struct common_firmware_header *header = NULL;
-
- DRM_DEBUG("\n");
+ int ret, i;
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
- case IP_VERSION(4, 0, 0):
- chip_name = "vega10";
- break;
- case IP_VERSION(4, 0, 1):
- chip_name = "vega12";
- break;
- case IP_VERSION(4, 2, 0):
- chip_name = "vega20";
- break;
- case IP_VERSION(4, 1, 0):
- case IP_VERSION(4, 1, 1):
- if (adev->apu_flags & AMD_APU_IS_RAVEN2)
- chip_name = "raven2";
- else if (adev->apu_flags & AMD_APU_IS_PICASSO)
- chip_name = "picasso";
- else
- chip_name = "raven";
- break;
- case IP_VERSION(4, 2, 2):
- chip_name = "arcturus";
- break;
- case IP_VERSION(4, 1, 2):
- if (adev->apu_flags & AMD_APU_IS_RENOIR)
- chip_name = "renoir";
- else
- chip_name = "green_sardine";
- break;
- case IP_VERSION(4, 4, 0):
- chip_name = "aldebaran";
- break;
- default:
- BUG();
- }
-
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
-
- err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev);
- if (err)
- goto out;
-
- err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[0]);
- if (err)
- goto out;
-
- for (i = 1; i < adev->sdma.num_instances; i++) {
- if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) ||
- adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 0)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 2, 2) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 4, 0)) {
/* Acturus & Aldebaran will leverage the same FW memory
for every SDMA instance */
- memcpy((void *)&adev->sdma.instance[i],
- (void *)&adev->sdma.instance[0],
- sizeof(struct amdgpu_sdma_instance));
- }
- else {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i);
-
- err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
- if (err)
- goto out;
-
- err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[i]);
- if (err)
- goto out;
- }
- }
-
- DRM_DEBUG("psp_load == '%s'\n",
- adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- for (i = 0; i < adev->sdma.num_instances; i++) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
- info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
- info->fw = adev->sdma.instance[i].fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+ ret = amdgpu_sdma_init_microcode(adev, 0, true);
+ break;
+ } else {
+ ret = amdgpu_sdma_init_microcode(adev, i, false);
+ if (ret)
+ return ret;
}
}
-out:
- if (err) {
- DRM_ERROR("sdma_v4_0: Failed to load firmware \"%s\"\n", fw_name);
- sdma_v4_0_destroy_inst_ctx(adev);
- }
- return err;
+ return ret;
}
/**
@@ -722,7 +655,7 @@ static uint64_t sdma_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
u64 *rptr;
/* XXX check if swapping is necessary on BE */
- rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]);
+ rptr = ((u64 *)ring->rptr_cpu_addr);
DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
return ((*rptr) >> 2);
@@ -742,7 +675,7 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
- wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
+ wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
} else {
wptr = RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI);
@@ -768,12 +701,12 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
DRM_DEBUG("Setting write pointer\n");
if (ring->use_doorbell) {
- u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs];
+ u64 *wb = (u64 *)ring->wptr_cpu_addr;
DRM_DEBUG("Using doorbell -- "
"wptr_offs == 0x%08x "
- "lower_32_bits(ring->wptr) << 2 == 0x%08x "
- "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ "lower_32_bits(ring->wptr << 2) == 0x%08x "
+ "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
ring->wptr_offs,
lower_32_bits(ring->wptr << 2),
upper_32_bits(ring->wptr << 2));
@@ -811,7 +744,7 @@ static uint64_t sdma_v4_0_page_ring_get_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
- wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
+ wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
} else {
wptr = RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI);
wptr = wptr << 32;
@@ -833,7 +766,7 @@ static void sdma_v4_0_page_ring_set_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell) {
- u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs];
+ u64 *wb = (u64 *)ring->wptr_cpu_addr;
/* XXX check if swapping is necessary on BE */
WRITE_ONCE(*wb, (ring->wptr << 2));
@@ -980,31 +913,23 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
/**
- * sdma_v4_0_gfx_stop - stop the gfx async dma engines
+ * sdma_v4_0_gfx_enable - enable the gfx async dma engines
*
* @adev: amdgpu_device pointer
- *
- * Stop the gfx async dma ring buffers (VEGA10).
+ * @enable: enable SDMA RB/IB
+ * control the gfx async dma ring buffers (VEGA10).
*/
-static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
+static void sdma_v4_0_gfx_enable(struct amdgpu_device *adev, bool enable)
{
- struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
u32 rb_cntl, ib_cntl;
- int i, unset = 0;
+ int i;
for (i = 0; i < adev->sdma.num_instances; i++) {
- sdma[i] = &adev->sdma.instance[i].ring;
-
- if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) {
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
- unset = 1;
- }
-
rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, enable ? 1 : 0);
WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, enable ? 1 : 0);
WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
}
}
@@ -1030,20 +955,10 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev)
*/
static void sdma_v4_0_page_stop(struct amdgpu_device *adev)
{
- struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
u32 rb_cntl, ib_cntl;
int i;
- bool unset = false;
for (i = 0; i < adev->sdma.num_instances; i++) {
- sdma[i] = &adev->sdma.instance[i].page;
-
- if ((adev->mman.buffer_funcs_ring == sdma[i]) &&
- (!unset)) {
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
- unset = true;
- }
-
rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
RB_ENABLE, 0);
@@ -1108,7 +1023,8 @@ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
* Arcturus for the moment and firmware version 14
* and above.
*/
- if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) &&
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 2, 2) &&
adev->sdma.instance[i].fw_version >= 14)
WREG32_SDMA(i, mmSDMA0_PUB_DUMMY_REG2, enable);
/* Extend page fault timeout to avoid interrupt storm */
@@ -1131,7 +1047,7 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
int i;
if (!enable) {
- sdma_v4_0_gfx_stop(adev);
+ sdma_v4_0_gfx_enable(adev, enable);
sdma_v4_0_rlc_stop(adev);
if (adev->sdma.has_page_queue)
sdma_v4_0_page_stop(adev);
@@ -1174,13 +1090,10 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
{
struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
u32 rb_cntl, ib_cntl, wptr_poll_cntl;
- u32 wb_offset;
u32 doorbell;
u32 doorbell_offset;
u64 wptr_gpu_addr;
- wb_offset = (ring->rptr_offs * 4);
-
rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl);
WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
@@ -1193,9 +1106,9 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
/* set the wb address whether it's enabled or not */
WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_HI,
- upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_LO,
- lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
RPTR_WRITEBACK_ENABLE, 1);
@@ -1225,7 +1138,7 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 0);
/* setup the wptr shadow polling */
- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wptr_gpu_addr = ring->wptr_gpu_addr;
WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO,
lower_32_bits(wptr_gpu_addr));
WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI,
@@ -1247,8 +1160,6 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
#endif
/* enable DMA IBs */
WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
-
- ring->sched.ready = true;
}
/**
@@ -1264,13 +1175,10 @@ static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i)
{
struct amdgpu_ring *ring = &adev->sdma.instance[i].page;
u32 rb_cntl, ib_cntl, wptr_poll_cntl;
- u32 wb_offset;
u32 doorbell;
u32 doorbell_offset;
u64 wptr_gpu_addr;
- wb_offset = (ring->rptr_offs * 4);
-
rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl);
WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
@@ -1283,9 +1191,9 @@ static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i)
/* set the wb address whether it's enabled or not */
WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_HI,
- upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_LO,
- lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
RPTR_WRITEBACK_ENABLE, 1);
@@ -1316,7 +1224,7 @@ static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i)
WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 0);
/* setup the wptr shadow polling */
- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wptr_gpu_addr = ring->wptr_gpu_addr;
WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_LO,
lower_32_bits(wptr_gpu_addr));
WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_HI,
@@ -1338,8 +1246,6 @@ static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i)
#endif
/* enable DMA IBs */
WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
-
- ring->sched.ready = true;
}
static void
@@ -1395,7 +1301,7 @@ static void sdma_v4_0_init_pg(struct amdgpu_device *adev)
if (!(adev->pg_flags & AMD_PG_SUPPORT_SDMA))
return;
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
case IP_VERSION(4, 1, 0):
case IP_VERSION(4, 1, 1):
case IP_VERSION(4, 1, 2):
@@ -1539,13 +1445,7 @@ static int sdma_v4_0_start(struct amdgpu_device *adev)
r = amdgpu_ring_test_helper(page);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == page)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return r;
@@ -1665,7 +1565,7 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -1838,7 +1738,7 @@ static bool sdma_v4_0_fw_support_paging_queue(struct amdgpu_device *adev)
{
uint fw_version = adev->sdma.instance[0].fw_version;
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
case IP_VERSION(4, 0, 0):
return fw_version >= 430;
case IP_VERSION(4, 0, 1):
@@ -1851,19 +1751,17 @@ static bool sdma_v4_0_fw_support_paging_queue(struct amdgpu_device *adev)
}
}
-static int sdma_v4_0_early_init(void *handle)
+static int sdma_v4_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = sdma_v4_0_init_microcode(adev);
- if (r) {
- DRM_ERROR("Failed to load sdma firmware!\n");
+ if (r)
return r;
- }
/* TODO: Page queue breaks driver reload under SRIOV */
- if ((adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 0, 0)) &&
+ if ((amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 0, 0)) &&
amdgpu_sriov_vf((adev)))
adev->sdma.has_page_queue = false;
else if (sdma_v4_0_fw_support_paging_queue(adev))
@@ -1882,32 +1780,25 @@ static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
void *err_data,
struct amdgpu_iv_entry *entry);
-static int sdma_v4_0_late_init(void *handle)
+static int sdma_v4_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct ras_ih_if ih_info = {
- .cb = sdma_v4_0_process_ras_data_cb,
- };
+ struct amdgpu_device *adev = ip_block->adev;
sdma_v4_0_setup_ulv(adev);
- if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
- if (adev->sdma.funcs &&
- adev->sdma.funcs->reset_ras_error_count)
- adev->sdma.funcs->reset_ras_error_count(adev);
- }
+ if (!amdgpu_persistent_edc_harvesting_supported(adev))
+ amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__SDMA);
- if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init)
- return adev->sdma.funcs->ras_late_init(adev, &ih_info);
- else
- return 0;
+ return 0;
}
-static int sdma_v4_0_sw_init(void *handle)
+static int sdma_v4_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0);
+ uint32_t *ptr;
/* SDMA trap event */
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -1965,6 +1856,17 @@ static int sdma_v4_0_sw_init(void *handle)
/* doorbell size is 2 dwords, get DWORD offset */
ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;
+ /*
+ * On Arcturus, SDMA instance 5~7 has a different vmhub
+ * type(AMDGPU_MMHUB1).
+ */
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 2, 2) &&
+ i >= 5)
+ ring->vm_hub = AMDGPU_MMHUB1(0);
+ else
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+
sprintf(ring->name, "sdma%d", i);
r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
AMDGPU_SDMA_IRQ_INSTANCE0 + i,
@@ -1980,8 +1882,27 @@ static int sdma_v4_0_sw_init(void *handle)
/* paging queue use same doorbell index/routing as gfx queue
* with 0x400 (4096 dwords) offset on second doorbell page
*/
- ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;
- ring->doorbell_index += 0x400;
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
+ IP_VERSION(4, 0, 0) &&
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) <
+ IP_VERSION(4, 2, 0)) {
+ ring->doorbell_index =
+ adev->doorbell_index.sdma_engine[i] << 1;
+ ring->doorbell_index += 0x400;
+ } else {
+ /* From vega20, the sdma_doorbell_range in 1st
+ * doorbell page is reserved for page queue.
+ */
+ ring->doorbell_index =
+ (adev->doorbell_index.sdma_engine[i] + 1) << 1;
+ }
+
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 2, 2) &&
+ i >= 5)
+ ring->vm_hub = AMDGPU_MMHUB1(0);
+ else
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "page%d", i);
r = amdgpu_ring_init(adev, ring, 1024,
@@ -1993,83 +1914,110 @@ static int sdma_v4_0_sw_init(void *handle)
}
}
+ if (amdgpu_sdma_ras_sw_init(adev)) {
+ dev_err(adev->dev, "Failed to initialize sdma ras block!\n");
+ return -EINVAL;
+ }
+
+ /* Allocate memory for SDMA IP Dump buffer */
+ ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (ptr)
+ adev->sdma.ip_dump = ptr;
+ else
+ DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
+
return r;
}
-static int sdma_v4_0_sw_fini(void *handle)
+static int sdma_v4_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
- if (adev->sdma.funcs && adev->sdma.funcs->ras_fini)
- adev->sdma.funcs->ras_fini(adev);
-
for (i = 0; i < adev->sdma.num_instances; i++) {
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
if (adev->sdma.has_page_queue)
amdgpu_ring_fini(&adev->sdma.instance[i].page);
}
- sdma_v4_0_destroy_inst_ctx(adev);
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 2, 2) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 0))
+ amdgpu_sdma_destroy_inst_ctx(adev, true);
+ else
+ amdgpu_sdma_destroy_inst_ctx(adev, false);
+
+ kfree(adev->sdma.ip_dump);
return 0;
}
-static int sdma_v4_0_hw_init(void *handle)
+static int sdma_v4_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (adev->flags & AMD_IS_APU)
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false, 0);
if (!amdgpu_sriov_vf(adev))
sdma_v4_0_init_golden_registers(adev);
- r = sdma_v4_0_start(adev);
-
- return r;
+ return sdma_v4_0_start(adev);
}
-static int sdma_v4_0_hw_fini(void *handle)
+static int sdma_v4_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
if (amdgpu_sriov_vf(adev))
return 0;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
- AMDGPU_SDMA_IRQ_INSTANCE0 + i);
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i);
+ }
}
sdma_v4_0_ctx_switch_enable(adev, false);
sdma_v4_0_enable(adev, false);
if (adev->flags & AMD_IS_APU)
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true, 0);
return 0;
}
-static int sdma_v4_0_suspend(void *handle)
+static int sdma_v4_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /* SMU saves SDMA state for us */
+ if (adev->in_s0ix) {
+ sdma_v4_0_gfx_enable(adev, false);
+ return 0;
+ }
- return sdma_v4_0_hw_fini(adev);
+ return sdma_v4_0_hw_fini(ip_block);
}
-static int sdma_v4_0_resume(void *handle)
+static int sdma_v4_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- return sdma_v4_0_hw_init(adev);
+ /* SMU restores SDMA state for us */
+ if (adev->in_s0ix) {
+ sdma_v4_0_enable(adev, true);
+ sdma_v4_0_gfx_enable(adev, true);
+ return 0;
+ }
+
+ return sdma_v4_0_hw_init(ip_block);
}
-static bool sdma_v4_0_is_idle(void *handle)
+static bool sdma_v4_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 i;
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -2082,11 +2030,11 @@ static bool sdma_v4_0_is_idle(void *handle)
return true;
}
-static int sdma_v4_0_wait_for_idle(void *handle)
+static int sdma_v4_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i, j;
u32 sdma[AMDGPU_MAX_SDMA_INSTANCES];
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
for (j = 0; j < adev->sdma.num_instances; j++) {
@@ -2101,7 +2049,7 @@ static int sdma_v4_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int sdma_v4_0_soft_reset(void *handle)
+static int sdma_v4_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* todo */
@@ -2127,23 +2075,28 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- uint32_t instance;
+ int instance;
DRM_DEBUG("IH: SDMA trap\n");
instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
+ if (instance < 0)
+ return instance;
+
switch (entry->ring_id) {
case 0:
amdgpu_fence_process(&adev->sdma.instance[instance].ring);
break;
case 1:
- if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 0))
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 2, 0))
amdgpu_fence_process(&adev->sdma.instance[instance].page);
break;
case 2:
/* XXX compute */
break;
case 3:
- if (adev->ip_versions[SDMA0_HWIP][0] != IP_VERSION(4, 2, 0))
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) !=
+ IP_VERSION(4, 2, 0))
amdgpu_fence_process(&adev->sdma.instance[instance].page);
break;
}
@@ -2212,7 +2165,7 @@ static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
int instance;
- struct amdgpu_task_info task_info;
+ struct amdgpu_task_info *task_info;
u64 addr;
instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
@@ -2224,15 +2177,20 @@ static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev,
addr = (u64)entry->src_data[0] << 12;
addr |= ((u64)entry->src_data[1] & 0xf) << 44;
- memset(&task_info, 0, sizeof(struct amdgpu_task_info));
- amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
-
dev_dbg_ratelimited(adev->dev,
- "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u "
- "pasid:%u, for process %s pid %d thread %s pid %d\n",
- instance, addr, entry->src_id, entry->ring_id, entry->vmid,
- entry->pasid, task_info.process_name, task_info.tgid,
- task_info.task_name, task_info.pid);
+ "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u pasid:%u\n",
+ instance, addr, entry->src_id, entry->ring_id, entry->vmid,
+ entry->pasid);
+
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ if (task_info) {
+ dev_dbg_ratelimited(adev->dev,
+ " for process %s pid %d thread %s pid %d\n",
+ task_info->process_name, task_info->tgid,
+ task_info->task.comm, task_info->task.pid);
+ amdgpu_vm_put_task_info(task_info);
+ }
+
return 0;
}
@@ -2339,10 +2297,10 @@ static void sdma_v4_0_update_medium_grain_light_sleep(
}
}
-static int sdma_v4_0_set_clockgating_state(void *handle,
+static int sdma_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -2354,12 +2312,12 @@ static int sdma_v4_0_set_clockgating_state(void *handle,
return 0;
}
-static int sdma_v4_0_set_powergating_state(void *handle,
+static int sdma_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
case IP_VERSION(4, 1, 0):
case IP_VERSION(4, 1, 1):
case IP_VERSION(4, 1, 2):
@@ -2373,9 +2331,9 @@ static int sdma_v4_0_set_powergating_state(void *handle,
return 0;
}
-static void sdma_v4_0_get_clockgating_state(void *handle, u32 *flags)
+static void sdma_v4_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -2392,6 +2350,46 @@ static void sdma_v4_0_get_clockgating_state(void *handle, u32 *flags)
*flags |= AMD_CG_SUPPORT_SDMA_LS;
}
+static void sdma_v4_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0);
+ uint32_t instance_offset;
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ drm_printf(p, "\nInstance:%d\n", i);
+
+ for (j = 0; j < reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_4_0[j].reg_name,
+ adev->sdma.ip_dump[instance_offset + j]);
+ }
+}
+
+static void sdma_v4_0_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t instance_offset;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0);
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ for (j = 0; j < reg_count; j++)
+ adev->sdma.ip_dump[instance_offset + j] =
+ RREG32(sdma_v4_0_get_reg_offset(adev, i,
+ sdma_reg_list_4_0[j].reg_offset));
+ }
+}
+
const struct amd_ip_funcs sdma_v4_0_ip_funcs = {
.name = "sdma_v4_0",
.early_init = sdma_v4_0_early_init,
@@ -2408,50 +2406,16 @@ const struct amd_ip_funcs sdma_v4_0_ip_funcs = {
.set_clockgating_state = sdma_v4_0_set_clockgating_state,
.set_powergating_state = sdma_v4_0_set_powergating_state,
.get_clockgating_state = sdma_v4_0_get_clockgating_state,
+ .dump_ip_state = sdma_v4_0_dump_ip_state,
+ .print_ip_state = sdma_v4_0_print_ip_state,
};
static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
.type = AMDGPU_RING_TYPE_SDMA,
- .align_mask = 0xf,
- .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
- .support_64bit_ptrs = true,
- .vmhub = AMDGPU_MMHUB_0,
- .get_rptr = sdma_v4_0_ring_get_rptr,
- .get_wptr = sdma_v4_0_ring_get_wptr,
- .set_wptr = sdma_v4_0_ring_set_wptr,
- .emit_frame_size =
- 6 + /* sdma_v4_0_ring_emit_hdp_flush */
- 3 + /* hdp invalidate */
- 6 + /* sdma_v4_0_ring_emit_pipeline_sync */
- /* sdma_v4_0_ring_emit_vm_flush */
- SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
- SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
- 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
- .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
- .emit_ib = sdma_v4_0_ring_emit_ib,
- .emit_fence = sdma_v4_0_ring_emit_fence,
- .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
- .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
- .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
- .test_ring = sdma_v4_0_ring_test_ring,
- .test_ib = sdma_v4_0_ring_test_ib,
- .insert_nop = sdma_v4_0_ring_insert_nop,
- .pad_ib = sdma_v4_0_ring_pad_ib,
- .emit_wreg = sdma_v4_0_ring_emit_wreg,
- .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
- .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
-};
-
-/*
- * On Arcturus, SDMA instance 5~7 has a different vmhub type(AMDGPU_MMHUB_1).
- * So create a individual constant ring_funcs for those instances.
- */
-static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs_2nd_mmhub = {
- .type = AMDGPU_RING_TYPE_SDMA,
- .align_mask = 0xf,
+ .align_mask = 0xff,
.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_MMHUB_1,
+ .secure_submission_supported = true,
.get_rptr = sdma_v4_0_ring_get_rptr,
.get_wptr = sdma_v4_0_ring_get_wptr,
.set_wptr = sdma_v4_0_ring_set_wptr,
@@ -2480,42 +2444,10 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs_2nd_mmhub = {
static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = {
.type = AMDGPU_RING_TYPE_SDMA,
- .align_mask = 0xf,
+ .align_mask = 0xff,
.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_MMHUB_0,
- .get_rptr = sdma_v4_0_ring_get_rptr,
- .get_wptr = sdma_v4_0_page_ring_get_wptr,
- .set_wptr = sdma_v4_0_page_ring_set_wptr,
- .emit_frame_size =
- 6 + /* sdma_v4_0_ring_emit_hdp_flush */
- 3 + /* hdp invalidate */
- 6 + /* sdma_v4_0_ring_emit_pipeline_sync */
- /* sdma_v4_0_ring_emit_vm_flush */
- SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
- SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
- 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
- .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
- .emit_ib = sdma_v4_0_ring_emit_ib,
- .emit_fence = sdma_v4_0_ring_emit_fence,
- .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
- .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
- .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
- .test_ring = sdma_v4_0_ring_test_ring,
- .test_ib = sdma_v4_0_ring_test_ib,
- .insert_nop = sdma_v4_0_ring_insert_nop,
- .pad_ib = sdma_v4_0_ring_pad_ib,
- .emit_wreg = sdma_v4_0_ring_emit_wreg,
- .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
- .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
-};
-
-static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs_2nd_mmhub = {
- .type = AMDGPU_RING_TYPE_SDMA,
- .align_mask = 0xf,
- .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
- .support_64bit_ptrs = true,
- .vmhub = AMDGPU_MMHUB_1,
+ .secure_submission_supported = true,
.get_rptr = sdma_v4_0_ring_get_rptr,
.get_wptr = sdma_v4_0_page_ring_get_wptr,
.set_wptr = sdma_v4_0_page_ring_set_wptr,
@@ -2547,19 +2479,10 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
int i;
for (i = 0; i < adev->sdma.num_instances; i++) {
- if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) && i >= 5)
- adev->sdma.instance[i].ring.funcs =
- &sdma_v4_0_ring_funcs_2nd_mmhub;
- else
- adev->sdma.instance[i].ring.funcs =
- &sdma_v4_0_ring_funcs;
+ adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs;
adev->sdma.instance[i].ring.me = i;
if (adev->sdma.has_page_queue) {
- if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) && i >= 5)
- adev->sdma.instance[i].page.funcs =
- &sdma_v4_0_page_ring_funcs_2nd_mmhub;
- else
- adev->sdma.instance[i].page.funcs =
+ adev->sdma.instance[i].page.funcs =
&sdma_v4_0_page_ring_funcs;
adev->sdma.instance[i].page.me = i;
}
@@ -2628,7 +2551,7 @@ static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: if a secure copy should be used
+ * @copy_flags: copy flags for the buffers
*
* Copy GPU buffers using the DMA engine (VEGA10/12).
* Used by the amdgpu ttm implementation to move pages if
@@ -2638,11 +2561,11 @@ static void sdma_v4_0_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
- SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0);
ib->ptr[ib->length_dw++] = byte_count - 1;
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
@@ -2740,7 +2663,7 @@ static void sdma_v4_0_get_ras_error_count(uint32_t value,
}
}
-static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev,
+static int sdma_v4_0_query_ras_error_count_by_instance(struct amdgpu_device *adev,
uint32_t instance, void *ras_error_status)
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
@@ -2762,6 +2685,18 @@ static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev,
return 0;
};
+static void sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status)
+{
+ int i = 0;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (sdma_v4_0_query_ras_error_count_by_instance(adev, i, ras_error_status)) {
+ dev_err(adev->dev, "Query ras error count failed in SDMA%d\n", i);
+ return;
+ }
+ }
+}
+
static void sdma_v4_0_reset_ras_error_count(struct amdgpu_device *adev)
{
int i;
@@ -2773,22 +2708,27 @@ static void sdma_v4_0_reset_ras_error_count(struct amdgpu_device *adev)
}
}
-static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = {
- .ras_late_init = amdgpu_sdma_ras_late_init,
- .ras_fini = amdgpu_sdma_ras_fini,
+const struct amdgpu_ras_block_hw_ops sdma_v4_0_ras_hw_ops = {
.query_ras_error_count = sdma_v4_0_query_ras_error_count,
.reset_ras_error_count = sdma_v4_0_reset_ras_error_count,
};
+static struct amdgpu_sdma_ras sdma_v4_0_ras = {
+ .ras_block = {
+ .hw_ops = &sdma_v4_0_ras_hw_ops,
+ .ras_cb = sdma_v4_0_process_ras_data_cb,
+ },
+};
+
static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
case IP_VERSION(4, 2, 0):
case IP_VERSION(4, 2, 2):
- adev->sdma.funcs = &sdma_v4_0_ras_funcs;
+ adev->sdma.ras = &sdma_v4_0_ras;
break;
case IP_VERSION(4, 4, 0):
- adev->sdma.funcs = &sdma_v4_4_ras_funcs;
+ adev->sdma.ras = &sdma_v4_4_ras;
break;
default:
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
index bf95007f0843..0ddb6955a6d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
@@ -141,6 +141,10 @@ static const struct soc15_ras_field_entry sdma_v4_4_ras_fields[] = {
SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UTCL1_RDBST_FIFO_SED),
0, 0,
},
+ { "SDMA_UTCL1_WR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UTCL1_WR_FIFO_SED),
+ 0, 0,
+ },
{ "SDMA_DATA_LUT_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_DATA_LUT_FIFO_SED),
0, 0,
@@ -188,7 +192,7 @@ static void sdma_v4_4_get_ras_error_count(struct amdgpu_device *adev,
}
}
-static int sdma_v4_4_query_ras_error_count(struct amdgpu_device *adev,
+static int sdma_v4_4_query_ras_error_count_by_instance(struct amdgpu_device *adev,
uint32_t instance,
void *ras_error_status)
{
@@ -245,9 +249,26 @@ static void sdma_v4_4_reset_ras_error_count(struct amdgpu_device *adev)
}
}
-const struct amdgpu_sdma_ras_funcs sdma_v4_4_ras_funcs = {
- .ras_late_init = amdgpu_sdma_ras_late_init,
- .ras_fini = amdgpu_sdma_ras_fini,
+static void sdma_v4_4_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status)
+{
+ int i = 0;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (sdma_v4_4_query_ras_error_count_by_instance(adev, i, ras_error_status)) {
+ dev_err(adev->dev, "Query ras error count failed in SDMA%d\n", i);
+ return;
+ }
+ }
+
+}
+
+const struct amdgpu_ras_block_hw_ops sdma_v4_4_ras_hw_ops = {
.query_ras_error_count = sdma_v4_4_query_ras_error_count,
.reset_ras_error_count = sdma_v4_4_reset_ras_error_count,
};
+
+struct amdgpu_sdma_ras sdma_v4_4_ras = {
+ .ras_block = {
+ .hw_ops = &sdma_v4_4_ras_hw_ops,
+ },
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.h b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.h
index 74a6e5b5e949..a9f0c68359e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.h
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.h
@@ -23,6 +23,6 @@
#ifndef __SDMA_V4_4_H__
#define __SDMA_V4_4_H__
-extern const struct amdgpu_sdma_ras_funcs sdma_v4_4_ras_funcs;
+extern struct amdgpu_sdma_ras sdma_v4_4_ras;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
new file mode 100644
index 000000000000..a1443990d5c6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -0,0 +1,2613 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_xcp.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_trace.h"
+#include "amdgpu_reset.h"
+
+#include "sdma/sdma_4_4_2_offset.h"
+#include "sdma/sdma_4_4_2_sh_mask.h"
+
+#include "soc15_common.h"
+#include "soc15.h"
+#include "vega10_sdma_pkt_open.h"
+
+#include "ivsrcid/sdma0/irqsrcs_sdma0_4_0.h"
+#include "ivsrcid/sdma1/irqsrcs_sdma1_4_0.h"
+
+#include "amdgpu_ras.h"
+
+MODULE_FIRMWARE("amdgpu/sdma_4_4_2.bin");
+MODULE_FIRMWARE("amdgpu/sdma_4_4_4.bin");
+MODULE_FIRMWARE("amdgpu/sdma_4_4_5.bin");
+
+static const struct amdgpu_hwip_reg_entry sdma_reg_list_4_4_2[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS1_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS2_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS3_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UCODE_CHECKSUM),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RB_RPTR_FETCH_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RB_RPTR_FETCH),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_RD_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_WR_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_RD_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_RD_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_WR_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_WR_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_VM_CNTL)
+};
+
+#define mmSMNAID_AID0_MCA_SMU 0x03b30400
+
+#define WREG32_SDMA(instance, offset, value) \
+ WREG32(sdma_v4_4_2_get_reg_offset(adev, (instance), (offset)), value)
+#define RREG32_SDMA(instance, offset) \
+ RREG32(sdma_v4_4_2_get_reg_offset(adev, (instance), (offset)))
+
+static void sdma_v4_4_2_set_ring_funcs(struct amdgpu_device *adev);
+static void sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device *adev);
+static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev);
+static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev);
+static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev);
+static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev);
+static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring);
+static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring);
+static int sdma_v4_4_2_soft_reset_engine(struct amdgpu_device *adev,
+ u32 instance_id);
+
+static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device *adev,
+ u32 instance, u32 offset)
+{
+ u32 dev_inst = GET_INST(SDMA0, instance);
+
+ return (adev->reg_offset[SDMA0_HWIP][dev_inst][0] + offset);
+}
+
+static unsigned sdma_v4_4_2_seq_to_irq_id(int seq_num)
+{
+ switch (seq_num) {
+ case 0:
+ return SOC15_IH_CLIENTID_SDMA0;
+ case 1:
+ return SOC15_IH_CLIENTID_SDMA1;
+ case 2:
+ return SOC15_IH_CLIENTID_SDMA2;
+ case 3:
+ return SOC15_IH_CLIENTID_SDMA3;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int sdma_v4_4_2_irq_id_to_seq(struct amdgpu_device *adev, unsigned client_id)
+{
+ switch (client_id) {
+ case SOC15_IH_CLIENTID_SDMA0:
+ return 0;
+ case SOC15_IH_CLIENTID_SDMA1:
+ return 1;
+ case SOC15_IH_CLIENTID_SDMA2:
+ if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1))
+ return 0;
+ else
+ return 2;
+ case SOC15_IH_CLIENTID_SDMA3:
+ if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1))
+ return 1;
+ else
+ return 3;
+ default:
+ return -EINVAL;
+ }
+}
+
+static void sdma_v4_4_2_inst_init_golden_registers(struct amdgpu_device *adev,
+ uint32_t inst_mask)
+{
+ u32 val;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ val = RREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG);
+ val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG, NUM_BANKS, 4);
+ val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG,
+ PIPE_INTERLEAVE_SIZE, 0);
+ WREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG, val);
+
+ val = RREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG_READ);
+ val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG_READ, NUM_BANKS,
+ 4);
+ val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG_READ,
+ PIPE_INTERLEAVE_SIZE, 0);
+ WREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG_READ, val);
+ }
+}
+
+/**
+ * sdma_v4_4_2_init_microcode - load ucode images from disk
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Use the firmware interface to load the ucode images into
+ * the driver (not loaded into hw).
+ * Returns 0 on success, error on failure.
+ */
+static int sdma_v4_4_2_init_microcode(struct amdgpu_device *adev)
+{
+ int ret, i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 4) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) {
+ ret = amdgpu_sdma_init_microcode(adev, 0, true);
+ break;
+ } else {
+ ret = amdgpu_sdma_init_microcode(adev, i, false);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+/**
+ * sdma_v4_4_2_ring_get_rptr - get the current read pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current rptr from the hardware.
+ */
+static uint64_t sdma_v4_4_2_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ u64 rptr;
+
+ /* XXX check if swapping is necessary on BE */
+ rptr = READ_ONCE(*((u64 *)&ring->adev->wb.wb[ring->rptr_offs]));
+
+ DRM_DEBUG("rptr before shift == 0x%016llx\n", rptr);
+ return rptr >> 2;
+}
+
+/**
+ * sdma_v4_4_2_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware.
+ */
+static uint64_t sdma_v4_4_2_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u64 wptr;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
+ DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
+ } else {
+ wptr = RREG32_SDMA(ring->me, regSDMA_GFX_RB_WPTR_HI);
+ wptr = wptr << 32;
+ wptr |= RREG32_SDMA(ring->me, regSDMA_GFX_RB_WPTR);
+ DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n",
+ ring->me, wptr);
+ }
+
+ return wptr >> 2;
+}
+
+/**
+ * sdma_v4_4_2_ring_set_wptr - commit the write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Write the wptr back to the hardware.
+ */
+static void sdma_v4_4_2_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ DRM_DEBUG("Setting write pointer\n");
+ if (ring->use_doorbell) {
+ u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs];
+
+ DRM_DEBUG("Using doorbell -- "
+ "wptr_offs == 0x%08x "
+ "lower_32_bits(ring->wptr) << 2 == 0x%08x "
+ "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+ /* XXX check if swapping is necessary on BE */
+ WRITE_ONCE(*wb, (ring->wptr << 2));
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ } else {
+ DRM_DEBUG("Not using doorbell -- "
+ "regSDMA%i_GFX_RB_WPTR == 0x%08x "
+ "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
+ ring->me,
+ lower_32_bits(ring->wptr << 2),
+ ring->me,
+ upper_32_bits(ring->wptr << 2));
+ WREG32_SDMA(ring->me, regSDMA_GFX_RB_WPTR,
+ lower_32_bits(ring->wptr << 2));
+ WREG32_SDMA(ring->me, regSDMA_GFX_RB_WPTR_HI,
+ upper_32_bits(ring->wptr << 2));
+ }
+}
+
+/**
+ * sdma_v4_4_2_page_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware.
+ */
+static uint64_t sdma_v4_4_2_page_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u64 wptr;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
+ } else {
+ wptr = RREG32_SDMA(ring->me, regSDMA_PAGE_RB_WPTR_HI);
+ wptr = wptr << 32;
+ wptr |= RREG32_SDMA(ring->me, regSDMA_PAGE_RB_WPTR);
+ }
+
+ return wptr >> 2;
+}
+
+/**
+ * sdma_v4_4_2_page_ring_set_wptr - commit the write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Write the wptr back to the hardware.
+ */
+static void sdma_v4_4_2_page_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs];
+
+ /* XXX check if swapping is necessary on BE */
+ WRITE_ONCE(*wb, (ring->wptr << 2));
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ } else {
+ uint64_t wptr = ring->wptr << 2;
+
+ WREG32_SDMA(ring->me, regSDMA_PAGE_RB_WPTR,
+ lower_32_bits(wptr));
+ WREG32_SDMA(ring->me, regSDMA_PAGE_RB_WPTR_HI,
+ upper_32_bits(wptr));
+ }
+}
+
+static void sdma_v4_4_2_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ int i;
+
+ for (i = 0; i < count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ amdgpu_ring_write(ring, ring->funcs->nop |
+ SDMA_PKT_NOP_HEADER_COUNT(count - 1));
+ else
+ amdgpu_ring_write(ring, ring->funcs->nop);
+}
+
+/**
+ * sdma_v4_4_2_ring_emit_ib - Schedule an IB on the DMA engine
+ *
+ * @ring: amdgpu ring pointer
+ * @job: job to retrieve vmid from
+ * @ib: IB object to schedule
+ * @flags: unused
+ *
+ * Schedule an IB in the DMA ring.
+ */
+static void sdma_v4_4_2_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
+ /* IB packet must end on a 8 DW boundary */
+ sdma_v4_4_2_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
+ SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
+ /* base must be 32 byte aligned */
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0);
+
+}
+
+static void sdma_v4_4_2_wait_reg_mem(struct amdgpu_ring *ring,
+ int mem_space, int hdp,
+ uint32_t addr0, uint32_t addr1,
+ uint32_t ref, uint32_t mask,
+ uint32_t inv)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(hdp) |
+ SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(mem_space) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
+ if (mem_space) {
+ /* memory */
+ amdgpu_ring_write(ring, addr0);
+ amdgpu_ring_write(ring, addr1);
+ } else {
+ /* registers */
+ amdgpu_ring_write(ring, addr0 << 2);
+ amdgpu_ring_write(ring, addr1 << 2);
+ }
+ amdgpu_ring_write(ring, ref); /* reference */
+ amdgpu_ring_write(ring, mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(inv)); /* retry count, poll interval */
+}
+
+/**
+ * sdma_v4_4_2_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Emit an hdp flush packet on the requested DMA ring.
+ */
+static void sdma_v4_4_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 ref_and_mask = 0;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0
+ << (ring->me % adev->sdma.num_inst_per_aid);
+
+ sdma_v4_4_2_wait_reg_mem(ring, 0, 1,
+ adev->nbio.funcs->get_hdp_flush_done_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_req_offset(adev),
+ ref_and_mask, ref_and_mask, 10);
+}
+
+/**
+ * sdma_v4_4_2_ring_emit_fence - emit a fence on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed.
+ */
+static void sdma_v4_4_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ /* write the fence */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ /* optionally write high bits as well */
+ if (write64bit) {
+ addr += 4;
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ }
+
+ /* generate an interrupt */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
+ amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
+}
+
+
+/**
+ * sdma_v4_4_2_inst_gfx_stop - stop the gfx async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be disabled
+ *
+ * Stop the gfx async dma ring buffers.
+ */
+static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev,
+ uint32_t inst_mask)
+{
+ struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
+ u32 doorbell_offset, doorbell;
+ u32 rb_cntl, ib_cntl, sdma_cntl;
+ int i;
+
+ for_each_inst(i, inst_mask) {
+ sdma[i] = &adev->sdma.instance[i].ring;
+
+ rb_cntl = RREG32_SDMA(i, regSDMA_GFX_RB_CNTL);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_ENABLE, 0);
+ WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl);
+ ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 0);
+ WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl);
+ sdma_cntl = RREG32_SDMA(i, regSDMA_CNTL);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, UTC_L1_ENABLE, 0);
+ WREG32_SDMA(i, regSDMA_CNTL, sdma_cntl);
+
+ if (sdma[i]->use_doorbell) {
+ doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL);
+ doorbell_offset = RREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET);
+
+ doorbell = REG_SET_FIELD(doorbell, SDMA_GFX_DOORBELL, ENABLE, 0);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset,
+ SDMA_GFX_DOORBELL_OFFSET,
+ OFFSET, 0);
+ WREG32_SDMA(i, regSDMA_GFX_DOORBELL, doorbell);
+ WREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET, doorbell_offset);
+ }
+ }
+}
+
+/**
+ * sdma_v4_4_2_inst_rlc_stop - stop the compute async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be disabled
+ *
+ * Stop the compute async dma queues.
+ */
+static void sdma_v4_4_2_inst_rlc_stop(struct amdgpu_device *adev,
+ uint32_t inst_mask)
+{
+ /* XXX todo */
+}
+
+/**
+ * sdma_v4_4_2_inst_page_stop - stop the page async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be disabled
+ *
+ * Stop the page async dma ring buffers.
+ */
+static void sdma_v4_4_2_inst_page_stop(struct amdgpu_device *adev,
+ uint32_t inst_mask)
+{
+ u32 rb_cntl, ib_cntl;
+ int i;
+
+ for_each_inst(i, inst_mask) {
+ rb_cntl = RREG32_SDMA(i, regSDMA_PAGE_RB_CNTL);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_PAGE_RB_CNTL,
+ RB_ENABLE, 0);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_CNTL, rb_cntl);
+ ib_cntl = RREG32_SDMA(i, regSDMA_PAGE_IB_CNTL);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_PAGE_IB_CNTL,
+ IB_ENABLE, 0);
+ WREG32_SDMA(i, regSDMA_PAGE_IB_CNTL, ib_cntl);
+ }
+}
+
+/**
+ * sdma_v4_4_2_inst_ctx_switch_enable - stop the async dma engines context switch
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs context switch.
+ * @inst_mask: mask of dma engine instances to be enabled
+ *
+ * Halt or unhalt the async dma engines context switch.
+ */
+static void sdma_v4_4_2_inst_ctx_switch_enable(struct amdgpu_device *adev,
+ bool enable, uint32_t inst_mask)
+{
+ u32 f32_cntl, phase_quantum = 0;
+ int i;
+
+ if (amdgpu_sdma_phase_quantum) {
+ unsigned value = amdgpu_sdma_phase_quantum;
+ unsigned unit = 0;
+
+ while (value > (SDMA_PHASE0_QUANTUM__VALUE_MASK >>
+ SDMA_PHASE0_QUANTUM__VALUE__SHIFT)) {
+ value = (value + 1) >> 1;
+ unit++;
+ }
+ if (unit > (SDMA_PHASE0_QUANTUM__UNIT_MASK >>
+ SDMA_PHASE0_QUANTUM__UNIT__SHIFT)) {
+ value = (SDMA_PHASE0_QUANTUM__VALUE_MASK >>
+ SDMA_PHASE0_QUANTUM__VALUE__SHIFT);
+ unit = (SDMA_PHASE0_QUANTUM__UNIT_MASK >>
+ SDMA_PHASE0_QUANTUM__UNIT__SHIFT);
+ WARN_ONCE(1,
+ "clamping sdma_phase_quantum to %uK clock cycles\n",
+ value << unit);
+ }
+ phase_quantum =
+ value << SDMA_PHASE0_QUANTUM__VALUE__SHIFT |
+ unit << SDMA_PHASE0_QUANTUM__UNIT__SHIFT;
+ }
+
+ for_each_inst(i, inst_mask) {
+ f32_cntl = RREG32_SDMA(i, regSDMA_CNTL);
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA_CNTL,
+ AUTO_CTXSW_ENABLE, enable ? 1 : 0);
+ if (enable && amdgpu_sdma_phase_quantum) {
+ WREG32_SDMA(i, regSDMA_PHASE0_QUANTUM, phase_quantum);
+ WREG32_SDMA(i, regSDMA_PHASE1_QUANTUM, phase_quantum);
+ WREG32_SDMA(i, regSDMA_PHASE2_QUANTUM, phase_quantum);
+ }
+ WREG32_SDMA(i, regSDMA_CNTL, f32_cntl);
+
+ /* Extend page fault timeout to avoid interrupt storm */
+ WREG32_SDMA(i, regSDMA_UTCL1_TIMEOUT, 0x00800080);
+ }
+}
+
+/**
+ * sdma_v4_4_2_inst_enable - stop the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs.
+ * @inst_mask: mask of dma engine instances to be enabled
+ *
+ * Halt or unhalt the async dma engines.
+ */
+static void sdma_v4_4_2_inst_enable(struct amdgpu_device *adev, bool enable,
+ uint32_t inst_mask)
+{
+ u32 f32_cntl;
+ int i;
+
+ if (!enable) {
+ sdma_v4_4_2_inst_gfx_stop(adev, inst_mask);
+ sdma_v4_4_2_inst_rlc_stop(adev, inst_mask);
+ if (adev->sdma.has_page_queue)
+ sdma_v4_4_2_inst_page_stop(adev, inst_mask);
+
+ /* SDMA FW needs to respond to FREEZE requests during reset.
+ * Keep it running during reset */
+ if (!amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev))
+ return;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
+ return;
+
+ for_each_inst(i, inst_mask) {
+ f32_cntl = RREG32_SDMA(i, regSDMA_F32_CNTL);
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA_F32_CNTL, HALT, enable ? 0 : 1);
+ WREG32_SDMA(i, regSDMA_F32_CNTL, f32_cntl);
+ }
+}
+
+/*
+ * sdma_v4_4_2_rb_cntl - get parameters for rb_cntl
+ */
+static uint32_t sdma_v4_4_2_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl)
+{
+ /* Set ring buffer size in dwords */
+ uint32_t rb_bufsz = order_base_2(ring->ring_size / 4);
+
+ barrier(); /* work around https://llvm.org/pr42576 */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
+#ifdef __BIG_ENDIAN
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
+#endif
+ return rb_cntl;
+}
+
+/**
+ * sdma_v4_4_2_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @i: instance to resume
+ * @restore: used to restore wptr when restart
+ *
+ * Set up the gfx DMA ring buffers and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i, bool restore)
+{
+ struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
+ u32 rb_cntl, ib_cntl, wptr_poll_cntl;
+ u32 wb_offset;
+ u32 doorbell;
+ u32 doorbell_offset;
+ u64 wptr_gpu_addr;
+ u64 rwptr;
+
+ wb_offset = (ring->rptr_offs * 4);
+
+ rb_cntl = RREG32_SDMA(i, regSDMA_GFX_RB_CNTL);
+ rb_cntl = sdma_v4_4_2_rb_cntl(ring, rb_cntl);
+ WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_ADDR_HI,
+ upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_ADDR_LO,
+ lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL,
+ RPTR_WRITEBACK_ENABLE, 1);
+
+ WREG32_SDMA(i, regSDMA_GFX_RB_BASE, ring->gpu_addr >> 8);
+ WREG32_SDMA(i, regSDMA_GFX_RB_BASE_HI, ring->gpu_addr >> 40);
+
+ if (!restore)
+ ring->wptr = 0;
+
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SDMA(i, regSDMA_GFX_MINOR_PTR_UPDATE, 1);
+
+ /* For the guilty queue, set RPTR to the current wptr to skip bad commands,
+ * It is not a guilty queue, restore cache_rptr and continue execution.
+ */
+ if (adev->sdma.instance[i].gfx_guilty)
+ rwptr = ring->wptr;
+ else
+ rwptr = ring->cached_rptr;
+
+ /* Initialize the ring buffer's read and write pointers */
+ if (restore) {
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, lower_32_bits(rwptr << 2));
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, upper_32_bits(rwptr << 2));
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, lower_32_bits(rwptr << 2));
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, upper_32_bits(rwptr << 2));
+ } else {
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0);
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0);
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0);
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0);
+ }
+
+ doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL);
+ doorbell_offset = RREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET);
+
+ doorbell = REG_SET_FIELD(doorbell, SDMA_GFX_DOORBELL, ENABLE,
+ ring->use_doorbell);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset,
+ SDMA_GFX_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ WREG32_SDMA(i, regSDMA_GFX_DOORBELL, doorbell);
+ WREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET, doorbell_offset);
+
+ sdma_v4_4_2_ring_set_wptr(ring);
+
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SDMA(i, regSDMA_GFX_MINOR_PTR_UPDATE, 0);
+
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_POLL_ADDR_LO,
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_POLL_ADDR_HI,
+ upper_32_bits(wptr_gpu_addr));
+ wptr_poll_cntl = RREG32_SDMA(i, regSDMA_GFX_RB_WPTR_POLL_CNTL);
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+ SDMA_GFX_RB_WPTR_POLL_CNTL,
+ F32_POLL_ENABLE, amdgpu_sriov_vf(adev)? 1 : 0);
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
+
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl);
+
+ ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 1);
+#ifdef __BIG_ENDIAN
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
+#endif
+ /* enable DMA IBs */
+ WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl);
+}
+
+/**
+ * sdma_v4_4_2_page_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @i: instance to resume
+ * @restore: boolean to say restore needed or not
+ *
+ * Set up the page DMA ring buffers and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i, bool restore)
+{
+ struct amdgpu_ring *ring = &adev->sdma.instance[i].page;
+ u32 rb_cntl, ib_cntl, wptr_poll_cntl;
+ u32 wb_offset;
+ u32 doorbell;
+ u32 doorbell_offset;
+ u64 wptr_gpu_addr;
+ u64 rwptr;
+
+ wb_offset = (ring->rptr_offs * 4);
+
+ rb_cntl = RREG32_SDMA(i, regSDMA_PAGE_RB_CNTL);
+ rb_cntl = sdma_v4_4_2_rb_cntl(ring, rb_cntl);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_CNTL, rb_cntl);
+
+ /* For the guilty queue, set RPTR to the current wptr to skip bad commands,
+ * It is not a guilty queue, restore cache_rptr and continue execution.
+ */
+ if (adev->sdma.instance[i].page_guilty)
+ rwptr = ring->wptr;
+ else
+ rwptr = ring->cached_rptr;
+
+ /* Initialize the ring buffer's read and write pointers */
+ if (restore) {
+ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, lower_32_bits(rwptr << 2));
+ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, upper_32_bits(rwptr << 2));
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, lower_32_bits(rwptr << 2));
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, upper_32_bits(rwptr << 2));
+ } else {
+ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, 0);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, 0);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, 0);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, 0);
+ }
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_ADDR_HI,
+ upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_ADDR_LO,
+ lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_PAGE_RB_CNTL,
+ RPTR_WRITEBACK_ENABLE, 1);
+
+ WREG32_SDMA(i, regSDMA_PAGE_RB_BASE, ring->gpu_addr >> 8);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_BASE_HI, ring->gpu_addr >> 40);
+
+ if (!restore)
+ ring->wptr = 0;
+
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SDMA(i, regSDMA_PAGE_MINOR_PTR_UPDATE, 1);
+
+ doorbell = RREG32_SDMA(i, regSDMA_PAGE_DOORBELL);
+ doorbell_offset = RREG32_SDMA(i, regSDMA_PAGE_DOORBELL_OFFSET);
+
+ doorbell = REG_SET_FIELD(doorbell, SDMA_PAGE_DOORBELL, ENABLE,
+ ring->use_doorbell);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset,
+ SDMA_PAGE_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ WREG32_SDMA(i, regSDMA_PAGE_DOORBELL, doorbell);
+ WREG32_SDMA(i, regSDMA_PAGE_DOORBELL_OFFSET, doorbell_offset);
+
+ /* paging queue doorbell range is setup at sdma_v4_4_2_gfx_resume */
+ sdma_v4_4_2_page_ring_set_wptr(ring);
+
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SDMA(i, regSDMA_PAGE_MINOR_PTR_UPDATE, 0);
+
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_POLL_ADDR_LO,
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_POLL_ADDR_HI,
+ upper_32_bits(wptr_gpu_addr));
+ wptr_poll_cntl = RREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_POLL_CNTL);
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+ SDMA_PAGE_RB_WPTR_POLL_CNTL,
+ F32_POLL_ENABLE, amdgpu_sriov_vf(adev)? 1 : 0);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
+
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_PAGE_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_CNTL, rb_cntl);
+
+ ib_cntl = RREG32_SDMA(i, regSDMA_PAGE_IB_CNTL);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_PAGE_IB_CNTL, IB_ENABLE, 1);
+#ifdef __BIG_ENDIAN
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_PAGE_IB_CNTL, IB_SWAP_ENABLE, 1);
+#endif
+ /* enable DMA IBs */
+ WREG32_SDMA(i, regSDMA_PAGE_IB_CNTL, ib_cntl);
+}
+
+static void sdma_v4_4_2_init_pg(struct amdgpu_device *adev)
+{
+
+}
+
+/**
+ * sdma_v4_4_2_inst_rlc_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be enabled
+ *
+ * Set up the compute DMA queues and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v4_4_2_inst_rlc_resume(struct amdgpu_device *adev,
+ uint32_t inst_mask)
+{
+ sdma_v4_4_2_init_pg(adev);
+
+ return 0;
+}
+
+/**
+ * sdma_v4_4_2_inst_load_microcode - load the sDMA ME ucode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be enabled
+ *
+ * Loads the sDMA0/1 ucode.
+ * Returns 0 for success, -EINVAL if the ucode is not available.
+ */
+static int sdma_v4_4_2_inst_load_microcode(struct amdgpu_device *adev,
+ uint32_t inst_mask)
+{
+ const struct sdma_firmware_header_v1_0 *hdr;
+ const __le32 *fw_data;
+ u32 fw_size;
+ int i, j;
+
+ /* halt the MEs */
+ sdma_v4_4_2_inst_enable(adev, false, inst_mask);
+
+ for_each_inst(i, inst_mask) {
+ if (!adev->sdma.instance[i].fw)
+ return -EINVAL;
+
+ hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
+ amdgpu_ucode_print_sdma_hdr(&hdr->header);
+ fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+
+ fw_data = (const __le32 *)
+ (adev->sdma.instance[i].fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+
+ WREG32_SDMA(i, regSDMA_UCODE_ADDR, 0);
+
+ for (j = 0; j < fw_size; j++)
+ WREG32_SDMA(i, regSDMA_UCODE_DATA,
+ le32_to_cpup(fw_data++));
+
+ WREG32_SDMA(i, regSDMA_UCODE_ADDR,
+ adev->sdma.instance[i].fw_version);
+ }
+
+ return 0;
+}
+
+/**
+ * sdma_v4_4_2_inst_start - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be enabled
+ * @restore: boolean to say restore needed or not
+ *
+ * Set up the DMA engines and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev,
+ uint32_t inst_mask, bool restore)
+{
+ struct amdgpu_ring *ring;
+ uint32_t tmp_mask;
+ int i, r = 0;
+
+ if (amdgpu_sriov_vf(adev)) {
+ sdma_v4_4_2_inst_ctx_switch_enable(adev, false, inst_mask);
+ sdma_v4_4_2_inst_enable(adev, false, inst_mask);
+ } else {
+ /* bypass sdma microcode loading on Gopher */
+ if (!restore && adev->firmware.load_type != AMDGPU_FW_LOAD_PSP &&
+ adev->sdma.instance[0].fw) {
+ r = sdma_v4_4_2_inst_load_microcode(adev, inst_mask);
+ if (r)
+ return r;
+ }
+
+ /* unhalt the MEs */
+ sdma_v4_4_2_inst_enable(adev, true, inst_mask);
+ /* enable sdma ring preemption */
+ sdma_v4_4_2_inst_ctx_switch_enable(adev, true, inst_mask);
+ }
+
+ /* start the gfx rings and rlc compute queues */
+ tmp_mask = inst_mask;
+ for_each_inst(i, tmp_mask) {
+ uint32_t temp;
+
+ WREG32_SDMA(i, regSDMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
+ sdma_v4_4_2_gfx_resume(adev, i, restore);
+ if (adev->sdma.has_page_queue)
+ sdma_v4_4_2_page_resume(adev, i, restore);
+
+ /* set utc l1 enable flag always to 1 */
+ temp = RREG32_SDMA(i, regSDMA_CNTL);
+ temp = REG_SET_FIELD(temp, SDMA_CNTL, UTC_L1_ENABLE, 1);
+ WREG32_SDMA(i, regSDMA_CNTL, temp);
+
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) < IP_VERSION(4, 4, 5)) {
+ /* enable context empty interrupt during initialization */
+ temp = REG_SET_FIELD(temp, SDMA_CNTL, CTXEMPTY_INT_ENABLE, 1);
+ WREG32_SDMA(i, regSDMA_CNTL, temp);
+ }
+ if (!amdgpu_sriov_vf(adev)) {
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ /* unhalt engine */
+ temp = RREG32_SDMA(i, regSDMA_F32_CNTL);
+ temp = REG_SET_FIELD(temp, SDMA_F32_CNTL, HALT, 0);
+ WREG32_SDMA(i, regSDMA_F32_CNTL, temp);
+ }
+ }
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ sdma_v4_4_2_inst_ctx_switch_enable(adev, true, inst_mask);
+ sdma_v4_4_2_inst_enable(adev, true, inst_mask);
+ } else {
+ r = sdma_v4_4_2_inst_rlc_resume(adev, inst_mask);
+ if (r)
+ return r;
+ }
+
+ tmp_mask = inst_mask;
+ for_each_inst(i, tmp_mask) {
+ ring = &adev->sdma.instance[i].ring;
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+
+ if (adev->sdma.has_page_queue) {
+ struct amdgpu_ring *page = &adev->sdma.instance[i].page;
+
+ r = amdgpu_ring_test_helper(page);
+ if (r)
+ return r;
+ }
+ }
+
+ return r;
+}
+
+/**
+ * sdma_v4_4_2_ring_test_ring - simple async dma engine test
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Test the DMA engine by writing using it to write an
+ * value to memory.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v4_4_2_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned i;
+ unsigned index;
+ int r;
+ u32 tmp;
+ u64 gpu_addr;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ tmp = 0xCAFEDEAD;
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
+ r = amdgpu_ring_alloc(ring, 5);
+ if (r)
+ goto error_free_wb;
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
+ amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+error_free_wb:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+/**
+ * sdma_v4_4_2_ring_test_ib - test an IB on the DMA engine
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Test a simple IB in the DMA ring.
+ * Returns 0 on success, error on failure.
+ */
+static int sdma_v4_4_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ unsigned index;
+ long r;
+ u32 tmp = 0;
+ u64 gpu_addr;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ tmp = 0xCAFEDEAD;
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+ memset(&ib, 0, sizeof(ib));
+ r = amdgpu_ib_get(adev, NULL, 256,
+ AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r)
+ goto err0;
+
+ ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib.ptr[1] = lower_32_bits(gpu_addr);
+ ib.ptr[2] = upper_32_bits(gpu_addr);
+ ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.length_dw = 8;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err1;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto err1;
+ } else if (r < 0) {
+ goto err1;
+ }
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+
+err1:
+ amdgpu_ib_free(&ib, NULL);
+ dma_fence_put(f);
+err0:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+
+/**
+ * sdma_v4_4_2_vm_copy_pte - update PTEs by copying them from the GART
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @src: src addr to copy from
+ * @count: number of page entries to update
+ *
+ * Update PTEs by copying them from the GART using sDMA.
+ */
+static void sdma_v4_4_2_vm_copy_pte(struct amdgpu_ib *ib,
+ uint64_t pe, uint64_t src,
+ unsigned count)
+{
+ unsigned bytes = count * 8;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ ib->ptr[ib->length_dw++] = bytes - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+
+}
+
+/**
+ * sdma_v4_4_2_vm_write_pte - update PTEs by writing them manually
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @value: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ *
+ * Update PTEs by writing them manually using sDMA.
+ */
+static void sdma_v4_4_2_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t value, unsigned count,
+ uint32_t incr)
+{
+ unsigned ndw = count * 2;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = ndw - 1;
+ for (; ndw > 0; ndw -= 2) {
+ ib->ptr[ib->length_dw++] = lower_32_bits(value);
+ ib->ptr[ib->length_dw++] = upper_32_bits(value);
+ value += incr;
+ }
+}
+
+/**
+ * sdma_v4_4_2_vm_set_pte_pde - update the page tables using sDMA
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: access flags
+ *
+ * Update the page tables using sDMA.
+ */
+static void sdma_v4_4_2_vm_set_pte_pde(struct amdgpu_ib *ib,
+ uint64_t pe,
+ uint64_t addr, unsigned count,
+ uint32_t incr, uint64_t flags)
+{
+ /* for physically contiguous pages (vram) */
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_PTEPDE);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
+ ib->ptr[ib->length_dw++] = upper_32_bits(flags);
+ ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = incr; /* increment size */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
+}
+
+/**
+ * sdma_v4_4_2_ring_pad_ib - pad the IB to the required number of dw
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @ib: indirect buffer to fill with padding
+ */
+static void sdma_v4_4_2_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ u32 pad_count;
+ int i;
+
+ pad_count = (-ib->length_dw) & 7;
+ for (i = 0; i < pad_count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
+ SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
+ else
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
+}
+
+
+/**
+ * sdma_v4_4_2_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void sdma_v4_4_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ /* wait for idle */
+ sdma_v4_4_2_wait_reg_mem(ring, 1, 0,
+ addr & 0xfffffffc,
+ upper_32_bits(addr) & 0xffffffff,
+ seq, 0xffffffff, 4);
+}
+
+
+/**
+ * sdma_v4_4_2_ring_emit_vm_flush - vm flush using sDMA
+ *
+ * @ring: amdgpu_ring pointer
+ * @vmid: vmid number to use
+ * @pd_addr: address
+ *
+ * Update the page table base and flush the VM TLB
+ * using sDMA.
+ */
+static void sdma_v4_4_2_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+}
+
+static void sdma_v4_4_2_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+ SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, val);
+}
+
+static void sdma_v4_4_2_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ sdma_v4_4_2_wait_reg_mem(ring, 0, 0, reg, 0, val, mask, 10);
+}
+
+static bool sdma_v4_4_2_fw_support_paging_queue(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(4, 4, 2):
+ case IP_VERSION(4, 4, 5):
+ return false;
+ default:
+ return false;
+ }
+}
+
+static const struct amdgpu_sdma_funcs sdma_v4_4_2_sdma_funcs = {
+ .stop_kernel_queue = &sdma_v4_4_2_stop_queue,
+ .start_kernel_queue = &sdma_v4_4_2_restore_queue,
+ .soft_reset_kernel_queue = &sdma_v4_4_2_soft_reset_engine,
+};
+
+static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = sdma_v4_4_2_init_microcode(adev);
+ if (r)
+ return r;
+
+ /* TODO: Page queue breaks driver reload under SRIOV */
+ if (sdma_v4_4_2_fw_support_paging_queue(adev))
+ adev->sdma.has_page_queue = true;
+
+ sdma_v4_4_2_set_ring_funcs(adev);
+ sdma_v4_4_2_set_buffer_funcs(adev);
+ sdma_v4_4_2_set_vm_pte_funcs(adev);
+ sdma_v4_4_2_set_irq_funcs(adev);
+ sdma_v4_4_2_set_ras_funcs(adev);
+ return 0;
+}
+
+#if 0
+static int sdma_v4_4_2_process_ras_data_cb(struct amdgpu_device *adev,
+ void *err_data,
+ struct amdgpu_iv_entry *entry);
+#endif
+
+static int sdma_v4_4_2_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+#if 0
+ struct ras_ih_if ih_info = {
+ .cb = sdma_v4_4_2_process_ras_data_cb,
+ };
+#endif
+ if (!amdgpu_persistent_edc_harvesting_supported(adev))
+ amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__SDMA);
+
+ /* The initialization is done in the late_init stage to ensure that the SMU
+ * initialization and capability setup are completed before we check the SDMA
+ * reset capability
+ */
+ sdma_v4_4_2_update_reset_mask(adev);
+
+ return 0;
+}
+
+static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_ring *ring;
+ int r, i;
+ struct amdgpu_device *adev = ip_block->adev;
+ u32 aid_id;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_4_2);
+ uint32_t *ptr;
+
+ /* SDMA trap event */
+ for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_TRAP,
+ &adev->sdma.trap_irq);
+ if (r)
+ return r;
+ }
+
+ /* SDMA SRAM ECC event */
+ for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_SRAM_ECC,
+ &adev->sdma.ecc_irq);
+ if (r)
+ return r;
+ }
+
+ /* SDMA VM_HOLE/DOORBELL_INV/POLL_TIMEOUT/SRBM_WRITE_PROTECTION event*/
+ for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_VM_HOLE,
+ &adev->sdma.vm_hole_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_DOORBELL_INVALID,
+ &adev->sdma.doorbell_invalid_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_POLL_TIMEOUT,
+ &adev->sdma.pool_timeout_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_SRBMWRITE,
+ &adev->sdma.srbm_write_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_CTXEMPTY,
+ &adev->sdma.ctxt_empty_irq);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ mutex_init(&adev->sdma.instance[i].engine_reset_mutex);
+ /* Initialize guilty flags for GFX and PAGE queues */
+ adev->sdma.instance[i].gfx_guilty = false;
+ adev->sdma.instance[i].page_guilty = false;
+ adev->sdma.instance[i].funcs = &sdma_v4_4_2_sdma_funcs;
+
+ ring = &adev->sdma.instance[i].ring;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ aid_id = adev->sdma.instance[i].aid_id;
+
+ DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
+ ring->use_doorbell?"true":"false");
+
+ /* doorbell size is 2 dwords, get DWORD offset */
+ ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;
+ ring->vm_hub = AMDGPU_MMHUB0(aid_id);
+
+ sprintf(ring->name, "sdma%d.%d", aid_id,
+ i % adev->sdma.num_inst_per_aid);
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ if (adev->sdma.has_page_queue) {
+ ring = &adev->sdma.instance[i].page;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+
+ /* doorbell index of page queue is assigned right after
+ * gfx queue on the same instance
+ */
+ ring->doorbell_index =
+ (adev->doorbell_index.sdma_engine[i] + 1) << 1;
+ ring->vm_hub = AMDGPU_MMHUB0(aid_id);
+
+ sprintf(ring->name, "page%d.%d", aid_id,
+ i % adev->sdma.num_inst_per_aid);
+ r = amdgpu_ring_init(adev, ring, 1024,
+ &adev->sdma.trap_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ }
+ }
+
+ adev->sdma.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+
+ if (amdgpu_sdma_ras_sw_init(adev)) {
+ dev_err(adev->dev, "fail to initialize sdma ras block\n");
+ return -EINVAL;
+ }
+
+ /* Allocate memory for SDMA IP Dump buffer */
+ ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (ptr)
+ adev->sdma.ip_dump = ptr;
+ else
+ DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
+
+ r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
+ return r;
+}
+
+static int sdma_v4_4_2_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+ if (adev->sdma.has_page_queue)
+ amdgpu_ring_fini(&adev->sdma.instance[i].page);
+ }
+
+ amdgpu_sdma_sysfs_reset_mask_fini(adev);
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 4) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5))
+ amdgpu_sdma_destroy_inst_ctx(adev, true);
+ else
+ amdgpu_sdma_destroy_inst_ctx(adev, false);
+
+ kfree(adev->sdma.ip_dump);
+
+ return 0;
+}
+
+static int sdma_v4_4_2_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t inst_mask;
+
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
+ if (!amdgpu_sriov_vf(adev))
+ sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask);
+
+ r = sdma_v4_4_2_inst_start(adev, inst_mask, false);
+
+ return r;
+}
+
+static int sdma_v4_4_2_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t inst_mask;
+ int i;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i);
+ }
+ }
+
+ sdma_v4_4_2_inst_ctx_switch_enable(adev, false, inst_mask);
+ sdma_v4_4_2_inst_enable(adev, false, inst_mask);
+
+ return 0;
+}
+
+static int sdma_v4_4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state);
+
+static int sdma_v4_4_2_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_in_reset(adev))
+ sdma_v4_4_2_set_clockgating_state(ip_block, AMD_CG_STATE_UNGATE);
+
+ return sdma_v4_4_2_hw_fini(ip_block);
+}
+
+static int sdma_v4_4_2_resume(struct amdgpu_ip_block *ip_block)
+{
+ return sdma_v4_4_2_hw_init(ip_block);
+}
+
+static bool sdma_v4_4_2_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ u32 i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ u32 tmp = RREG32_SDMA(i, regSDMA_STATUS_REG);
+
+ if (!(tmp & SDMA_STATUS_REG__IDLE_MASK))
+ return false;
+ }
+
+ return true;
+}
+
+static int sdma_v4_4_2_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ unsigned i, j;
+ u32 sdma[AMDGPU_MAX_SDMA_INSTANCES];
+ struct amdgpu_device *adev = ip_block->adev;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ for (j = 0; j < adev->sdma.num_instances; j++) {
+ sdma[j] = RREG32_SDMA(j, regSDMA_STATUS_REG);
+ if (!(sdma[j] & SDMA_STATUS_REG__IDLE_MASK))
+ break;
+ }
+ if (j == adev->sdma.num_instances)
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int sdma_v4_4_2_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ /* todo */
+
+ return 0;
+}
+
+static bool sdma_v4_4_2_is_queue_selected(struct amdgpu_device *adev, uint32_t instance_id, bool is_page_queue)
+{
+ uint32_t reg_offset = is_page_queue ? regSDMA_PAGE_CONTEXT_STATUS : regSDMA_GFX_CONTEXT_STATUS;
+ uint32_t context_status = RREG32(sdma_v4_4_2_get_reg_offset(adev, instance_id, reg_offset));
+
+ /* Check if the SELECTED bit is set */
+ return (context_status & SDMA_GFX_CONTEXT_STATUS__SELECTED_MASK) != 0;
+}
+
+static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 id = ring->me;
+ int r;
+
+ amdgpu_amdkfd_suspend(adev, true);
+ r = amdgpu_sdma_reset_engine(adev, id, false);
+ amdgpu_amdkfd_resume(adev, true);
+ return r;
+}
+
+static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 instance_id = ring->me;
+ u32 inst_mask;
+ uint64_t rptr;
+
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
+ /* Check if this queue is the guilty one */
+ adev->sdma.instance[instance_id].gfx_guilty =
+ sdma_v4_4_2_is_queue_selected(adev, instance_id, false);
+ if (adev->sdma.has_page_queue)
+ adev->sdma.instance[instance_id].page_guilty =
+ sdma_v4_4_2_is_queue_selected(adev, instance_id, true);
+
+ /* Cache the rptr before reset, after the reset,
+ * all of the registers will be reset to 0
+ */
+ rptr = amdgpu_ring_get_rptr(ring);
+ ring->cached_rptr = rptr;
+ /* Cache the rptr for the page queue if it exists */
+ if (adev->sdma.has_page_queue) {
+ struct amdgpu_ring *page_ring = &adev->sdma.instance[instance_id].page;
+ rptr = amdgpu_ring_get_rptr(page_ring);
+ page_ring->cached_rptr = rptr;
+ }
+
+ /* stop queue */
+ inst_mask = 1 << ring->me;
+ sdma_v4_4_2_inst_gfx_stop(adev, inst_mask);
+ if (adev->sdma.has_page_queue)
+ sdma_v4_4_2_inst_page_stop(adev, inst_mask);
+
+ return 0;
+}
+
+static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 inst_mask;
+ int i, r;
+
+ inst_mask = 1 << ring->me;
+ udelay(50);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!REG_GET_FIELD(RREG32_SDMA(ring->me, regSDMA_F32_CNTL), SDMA_F32_CNTL, HALT))
+ break;
+ udelay(1);
+ }
+
+ if (i == adev->usec_timeout) {
+ dev_err(adev->dev, "timed out waiting for SDMA%d unhalt after reset\n",
+ ring->me);
+ return -ETIMEDOUT;
+ }
+
+ r = sdma_v4_4_2_inst_start(adev, inst_mask, true);
+
+ return r;
+}
+
+static int sdma_v4_4_2_soft_reset_engine(struct amdgpu_device *adev,
+ u32 instance_id)
+{
+ /* For SDMA 4.x, use the existing DPM interface for backward compatibility
+ * we need to convert the logical instance ID to physical instance ID before reset.
+ */
+ return amdgpu_dpm_reset_sdma(adev, 1 << GET_INST(SDMA0, instance_id));
+}
+
+static int sdma_v4_4_2_set_trap_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 sdma_cntl;
+
+ sdma_cntl = RREG32_SDMA(type, regSDMA_CNTL);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, TRAP_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl);
+
+ return 0;
+}
+
+static int sdma_v4_4_2_process_trap_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t instance, i;
+
+ DRM_DEBUG("IH: SDMA trap\n");
+ instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
+
+ /* Client id gives the SDMA instance in AID. To know the exact SDMA
+ * instance, interrupt entry gives the node id which corresponds to the AID instance.
+ * Match node id with the AID id associated with the SDMA instance. */
+ for (i = instance; i < adev->sdma.num_instances;
+ i += adev->sdma.num_inst_per_aid) {
+ if (adev->sdma.instance[i].aid_id ==
+ node_id_to_phys_map[entry->node_id])
+ break;
+ }
+
+ if (i >= adev->sdma.num_instances) {
+ dev_WARN_ONCE(
+ adev->dev, 1,
+ "Couldn't find the right sdma instance in trap handler");
+ return 0;
+ }
+
+ switch (entry->ring_id) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[i].ring);
+ break;
+ case 1:
+ amdgpu_fence_process(&adev->sdma.instance[i].page);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+#if 0
+static int sdma_v4_4_2_process_ras_data_cb(struct amdgpu_device *adev,
+ void *err_data,
+ struct amdgpu_iv_entry *entry)
+{
+ int instance;
+
+ /* When “Full RAS” is enabled, the per-IP interrupt sources should
+ * be disabled and the driver should only look for the aggregated
+ * interrupt via sync flood
+ */
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA))
+ goto out;
+
+ instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
+ if (instance < 0)
+ goto out;
+
+ amdgpu_sdma_process_ras_data_cb(adev, err_data, entry);
+
+out:
+ return AMDGPU_RAS_SUCCESS;
+}
+#endif
+
+static int sdma_v4_4_2_process_illegal_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ int instance;
+
+ DRM_ERROR("Illegal instruction in SDMA command stream\n");
+
+ instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
+ if (instance < 0)
+ return 0;
+
+ switch (entry->ring_id) {
+ case 0:
+ drm_sched_fault(&adev->sdma.instance[instance].ring.sched);
+ break;
+ }
+ return 0;
+}
+
+static int sdma_v4_4_2_set_ecc_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 sdma_cntl;
+
+ sdma_cntl = RREG32_SDMA(type, regSDMA_CNTL);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, DRAM_ECC_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl);
+
+ return 0;
+}
+
+static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ int instance;
+ struct amdgpu_task_info *task_info;
+ u64 addr;
+
+ instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
+ if (instance < 0 || instance >= adev->sdma.num_instances) {
+ dev_err(adev->dev, "sdma instance invalid %d\n", instance);
+ return -EINVAL;
+ }
+
+ addr = (u64)entry->src_data[0] << 12;
+ addr |= ((u64)entry->src_data[1] & 0xf) << 44;
+
+ dev_dbg_ratelimited(adev->dev,
+ "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u pasid:%u\n",
+ instance, addr, entry->src_id, entry->ring_id, entry->vmid,
+ entry->pasid);
+
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ if (task_info) {
+ dev_dbg_ratelimited(adev->dev, " for process %s pid %d thread %s pid %d\n",
+ task_info->process_name, task_info->tgid,
+ task_info->task.comm, task_info->task.pid);
+ amdgpu_vm_put_task_info(task_info);
+ }
+
+ return 0;
+}
+
+static int sdma_v4_4_2_process_vm_hole_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ dev_dbg_ratelimited(adev->dev, "MC or SEM address in VM hole\n");
+ sdma_v4_4_2_print_iv_entry(adev, entry);
+ return 0;
+}
+
+static int sdma_v4_4_2_process_doorbell_invalid_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+
+ dev_dbg_ratelimited(adev->dev, "SDMA received a doorbell from BIF with byte_enable !=0xff\n");
+ sdma_v4_4_2_print_iv_entry(adev, entry);
+ return 0;
+}
+
+static int sdma_v4_4_2_process_pool_timeout_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ dev_dbg_ratelimited(adev->dev,
+ "Polling register/memory timeout executing POLL_REG/MEM with finite timer\n");
+ sdma_v4_4_2_print_iv_entry(adev, entry);
+ return 0;
+}
+
+static int sdma_v4_4_2_process_srbm_write_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ dev_dbg_ratelimited(adev->dev,
+ "SDMA gets an Register Write SRBM_WRITE command in non-privilege command buffer\n");
+ sdma_v4_4_2_print_iv_entry(adev, entry);
+ return 0;
+}
+
+static int sdma_v4_4_2_process_ctxt_empty_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ /* There is nothing useful to be done here, only kept for debug */
+ dev_dbg_ratelimited(adev->dev, "SDMA context empty interrupt");
+ sdma_v4_4_2_print_iv_entry(adev, entry);
+ return 0;
+}
+
+static void sdma_v4_4_2_inst_update_medium_grain_light_sleep(
+ struct amdgpu_device *adev, bool enable, uint32_t inst_mask)
+{
+ uint32_t data, def;
+ int i;
+
+ /* leave as default if it is not driver controlled */
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS))
+ return;
+
+ if (enable) {
+ for_each_inst(i, inst_mask) {
+ /* 1-not override: enable sdma mem light sleep */
+ def = data = RREG32_SDMA(i, regSDMA_POWER_CNTL);
+ data |= SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+ if (def != data)
+ WREG32_SDMA(i, regSDMA_POWER_CNTL, data);
+ }
+ } else {
+ for_each_inst(i, inst_mask) {
+ /* 0-override:disable sdma mem light sleep */
+ def = data = RREG32_SDMA(i, regSDMA_POWER_CNTL);
+ data &= ~SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+ if (def != data)
+ WREG32_SDMA(i, regSDMA_POWER_CNTL, data);
+ }
+ }
+}
+
+static void sdma_v4_4_2_inst_update_medium_grain_clock_gating(
+ struct amdgpu_device *adev, bool enable, uint32_t inst_mask)
+{
+ uint32_t data, def;
+ int i;
+
+ /* leave as default if it is not driver controlled */
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG))
+ return;
+
+ if (enable) {
+ for_each_inst(i, inst_mask) {
+ def = data = RREG32_SDMA(i, regSDMA_CLK_CTRL);
+ data &= ~(SDMA_CLK_CTRL__SOFT_OVERRIDE5_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE4_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE3_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE2_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE1_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE0_MASK);
+ if (def != data)
+ WREG32_SDMA(i, regSDMA_CLK_CTRL, data);
+ }
+ } else {
+ for_each_inst(i, inst_mask) {
+ def = data = RREG32_SDMA(i, regSDMA_CLK_CTRL);
+ data |= (SDMA_CLK_CTRL__SOFT_OVERRIDE5_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE4_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE3_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE2_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE1_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE0_MASK);
+ if (def != data)
+ WREG32_SDMA(i, regSDMA_CLK_CTRL, data);
+ }
+ }
+}
+
+static int sdma_v4_4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t inst_mask;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
+
+ sdma_v4_4_2_inst_update_medium_grain_clock_gating(
+ adev, state == AMD_CG_STATE_GATE, inst_mask);
+ sdma_v4_4_2_inst_update_medium_grain_light_sleep(
+ adev, state == AMD_CG_STATE_GATE, inst_mask);
+ return 0;
+}
+
+static int sdma_v4_4_2_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static void sdma_v4_4_2_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ /* AMD_CG_SUPPORT_SDMA_MGCG */
+ data = RREG32(SOC15_REG_OFFSET(SDMA0, GET_INST(SDMA0, 0), regSDMA_CLK_CTRL));
+ if (!(data & SDMA_CLK_CTRL__SOFT_OVERRIDE5_MASK))
+ *flags |= AMD_CG_SUPPORT_SDMA_MGCG;
+
+ /* AMD_CG_SUPPORT_SDMA_LS */
+ data = RREG32(SOC15_REG_OFFSET(SDMA0, GET_INST(SDMA0, 0), regSDMA_POWER_CNTL));
+ if (data & SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK)
+ *flags |= AMD_CG_SUPPORT_SDMA_LS;
+}
+
+static void sdma_v4_4_2_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_4_2);
+ uint32_t instance_offset;
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ drm_printf(p, "\nInstance:%d\n", i);
+
+ for (j = 0; j < reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_4_4_2[j].reg_name,
+ adev->sdma.ip_dump[instance_offset + j]);
+ }
+}
+
+static void sdma_v4_4_2_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t instance_offset;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_4_2);
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ for (j = 0; j < reg_count; j++)
+ adev->sdma.ip_dump[instance_offset + j] =
+ RREG32(sdma_v4_4_2_get_reg_offset(adev, i,
+ sdma_reg_list_4_4_2[j].reg_offset));
+ }
+}
+
+const struct amd_ip_funcs sdma_v4_4_2_ip_funcs = {
+ .name = "sdma_v4_4_2",
+ .early_init = sdma_v4_4_2_early_init,
+ .late_init = sdma_v4_4_2_late_init,
+ .sw_init = sdma_v4_4_2_sw_init,
+ .sw_fini = sdma_v4_4_2_sw_fini,
+ .hw_init = sdma_v4_4_2_hw_init,
+ .hw_fini = sdma_v4_4_2_hw_fini,
+ .suspend = sdma_v4_4_2_suspend,
+ .resume = sdma_v4_4_2_resume,
+ .is_idle = sdma_v4_4_2_is_idle,
+ .wait_for_idle = sdma_v4_4_2_wait_for_idle,
+ .soft_reset = sdma_v4_4_2_soft_reset,
+ .set_clockgating_state = sdma_v4_4_2_set_clockgating_state,
+ .set_powergating_state = sdma_v4_4_2_set_powergating_state,
+ .get_clockgating_state = sdma_v4_4_2_get_clockgating_state,
+ .dump_ip_state = sdma_v4_4_2_dump_ip_state,
+ .print_ip_state = sdma_v4_4_2_print_ip_state,
+};
+
+static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xff,
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
+ .support_64bit_ptrs = true,
+ .get_rptr = sdma_v4_4_2_ring_get_rptr,
+ .get_wptr = sdma_v4_4_2_ring_get_wptr,
+ .set_wptr = sdma_v4_4_2_ring_set_wptr,
+ .emit_frame_size =
+ 6 + /* sdma_v4_4_2_ring_emit_hdp_flush */
+ 3 + /* hdp invalidate */
+ 6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */
+ /* sdma_v4_4_2_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+ 10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */
+ .emit_ib = sdma_v4_4_2_ring_emit_ib,
+ .emit_fence = sdma_v4_4_2_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v4_4_2_ring_emit_pipeline_sync,
+ .emit_vm_flush = sdma_v4_4_2_ring_emit_vm_flush,
+ .emit_hdp_flush = sdma_v4_4_2_ring_emit_hdp_flush,
+ .test_ring = sdma_v4_4_2_ring_test_ring,
+ .test_ib = sdma_v4_4_2_ring_test_ib,
+ .insert_nop = sdma_v4_4_2_ring_insert_nop,
+ .pad_ib = sdma_v4_4_2_ring_pad_ib,
+ .emit_wreg = sdma_v4_4_2_ring_emit_wreg,
+ .emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = sdma_v4_4_2_reset_queue,
+};
+
+static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xff,
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
+ .support_64bit_ptrs = true,
+ .get_rptr = sdma_v4_4_2_ring_get_rptr,
+ .get_wptr = sdma_v4_4_2_page_ring_get_wptr,
+ .set_wptr = sdma_v4_4_2_page_ring_set_wptr,
+ .emit_frame_size =
+ 6 + /* sdma_v4_4_2_ring_emit_hdp_flush */
+ 3 + /* hdp invalidate */
+ 6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */
+ /* sdma_v4_4_2_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+ 10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */
+ .emit_ib = sdma_v4_4_2_ring_emit_ib,
+ .emit_fence = sdma_v4_4_2_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v4_4_2_ring_emit_pipeline_sync,
+ .emit_vm_flush = sdma_v4_4_2_ring_emit_vm_flush,
+ .emit_hdp_flush = sdma_v4_4_2_ring_emit_hdp_flush,
+ .test_ring = sdma_v4_4_2_ring_test_ring,
+ .test_ib = sdma_v4_4_2_ring_test_ib,
+ .insert_nop = sdma_v4_4_2_ring_insert_nop,
+ .pad_ib = sdma_v4_4_2_ring_pad_ib,
+ .emit_wreg = sdma_v4_4_2_ring_emit_wreg,
+ .emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = sdma_v4_4_2_reset_queue,
+};
+
+static void sdma_v4_4_2_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, dev_inst;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->sdma.instance[i].ring.funcs = &sdma_v4_4_2_ring_funcs;
+ adev->sdma.instance[i].ring.me = i;
+ if (adev->sdma.has_page_queue) {
+ adev->sdma.instance[i].page.funcs =
+ &sdma_v4_4_2_page_ring_funcs;
+ adev->sdma.instance[i].page.me = i;
+ }
+
+ dev_inst = GET_INST(SDMA0, i);
+ /* AID to which SDMA belongs depends on physical instance */
+ adev->sdma.instance[i].aid_id =
+ dev_inst / adev->sdma.num_inst_per_aid;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs sdma_v4_4_2_trap_irq_funcs = {
+ .set = sdma_v4_4_2_set_trap_irq_state,
+ .process = sdma_v4_4_2_process_trap_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_4_2_illegal_inst_irq_funcs = {
+ .process = sdma_v4_4_2_process_illegal_inst_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_4_2_ecc_irq_funcs = {
+ .set = sdma_v4_4_2_set_ecc_irq_state,
+ .process = amdgpu_sdma_process_ecc_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_4_2_vm_hole_irq_funcs = {
+ .process = sdma_v4_4_2_process_vm_hole_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_4_2_doorbell_invalid_irq_funcs = {
+ .process = sdma_v4_4_2_process_doorbell_invalid_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_4_2_pool_timeout_irq_funcs = {
+ .process = sdma_v4_4_2_process_pool_timeout_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_4_2_srbm_write_irq_funcs = {
+ .process = sdma_v4_4_2_process_srbm_write_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_4_2_ctxt_empty_irq_funcs = {
+ .process = sdma_v4_4_2_process_ctxt_empty_irq,
+};
+
+static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->sdma.trap_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.ecc_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.vm_hole_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.doorbell_invalid_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.pool_timeout_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.srbm_write_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.ctxt_empty_irq.num_types = adev->sdma.num_instances;
+
+ adev->sdma.trap_irq.funcs = &sdma_v4_4_2_trap_irq_funcs;
+ adev->sdma.illegal_inst_irq.funcs = &sdma_v4_4_2_illegal_inst_irq_funcs;
+ adev->sdma.ecc_irq.funcs = &sdma_v4_4_2_ecc_irq_funcs;
+ adev->sdma.vm_hole_irq.funcs = &sdma_v4_4_2_vm_hole_irq_funcs;
+ adev->sdma.doorbell_invalid_irq.funcs = &sdma_v4_4_2_doorbell_invalid_irq_funcs;
+ adev->sdma.pool_timeout_irq.funcs = &sdma_v4_4_2_pool_timeout_irq_funcs;
+ adev->sdma.srbm_write_irq.funcs = &sdma_v4_4_2_srbm_write_irq_funcs;
+ adev->sdma.ctxt_empty_irq.funcs = &sdma_v4_4_2_ctxt_empty_irq_funcs;
+}
+
+/**
+ * sdma_v4_4_2_emit_copy_buffer - copy buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to copy to
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ * @copy_flags: copy flags for the buffers
+ *
+ * Copy GPU buffers using the DMA engine.
+ * Used by the amdgpu ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+static void sdma_v4_4_2_emit_copy_buffer(struct amdgpu_ib *ib,
+ uint64_t src_offset,
+ uint64_t dst_offset,
+ uint32_t byte_count,
+ uint32_t copy_flags)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0);
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+}
+
+/**
+ * sdma_v4_4_2_emit_fill_buffer - fill buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to copy to
+ * @src_data: value to write to buffer
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ *
+ * Fill GPU buffers using the DMA engine.
+ */
+static void sdma_v4_4_2_emit_fill_buffer(struct amdgpu_ib *ib,
+ uint32_t src_data,
+ uint64_t dst_offset,
+ uint32_t byte_count)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = src_data;
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+}
+
+static const struct amdgpu_buffer_funcs sdma_v4_4_2_buffer_funcs = {
+ .copy_max_bytes = 0x400000,
+ .copy_num_dw = 7,
+ .emit_copy_buffer = sdma_v4_4_2_emit_copy_buffer,
+
+ .fill_max_bytes = 0x400000,
+ .fill_num_dw = 5,
+ .emit_fill_buffer = sdma_v4_4_2_emit_fill_buffer,
+};
+
+static void sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device *adev)
+{
+ adev->mman.buffer_funcs = &sdma_v4_4_2_buffer_funcs;
+ if (adev->sdma.has_page_queue)
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].page;
+ else
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
+}
+
+static const struct amdgpu_vm_pte_funcs sdma_v4_4_2_vm_pte_funcs = {
+ .copy_pte_num_dw = 7,
+ .copy_pte = sdma_v4_4_2_vm_copy_pte,
+
+ .write_pte = sdma_v4_4_2_vm_write_pte,
+ .set_pte_pde = sdma_v4_4_2_vm_set_pte_pde,
+};
+
+static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev)
+{
+ struct drm_gpu_scheduler *sched;
+ unsigned i;
+
+ adev->vm_manager.vm_pte_funcs = &sdma_v4_4_2_vm_pte_funcs;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (adev->sdma.has_page_queue)
+ sched = &adev->sdma.instance[i].page.sched;
+ else
+ sched = &adev->sdma.instance[i].ring.sched;
+ adev->vm_manager.vm_pte_scheds[i] = sched;
+ }
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
+}
+
+/**
+ * sdma_v4_4_2_update_reset_mask - update reset mask for SDMA
+ * @adev: Pointer to the AMDGPU device structure
+ *
+ * This function update reset mask for SDMA and sets the supported
+ * reset types based on the IP version and firmware versions.
+ *
+ */
+static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev)
+{
+ /* per queue reset not supported for SRIOV */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /*
+ * the user queue relies on MEC fw and pmfw when the sdma queue do reset.
+ * it needs to check both of them at here to skip old mec and pmfw.
+ */
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ if ((adev->gfx.mec_fw_version >= 0xb0) &&
+ amdgpu_dpm_reset_sdma_is_supported(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ case IP_VERSION(9, 5, 0):
+ if ((adev->gfx.mec_fw_version >= 0xf) &&
+ amdgpu_dpm_reset_sdma_is_supported(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ default:
+ break;
+ }
+
+}
+
+const struct amdgpu_ip_block_version sdma_v4_4_2_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_SDMA,
+ .major = 4,
+ .minor = 4,
+ .rev = 2,
+ .funcs = &sdma_v4_4_2_ip_funcs,
+};
+
+static int sdma_v4_4_2_xcp_resume(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ if (!amdgpu_sriov_vf(adev))
+ sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask);
+
+ r = sdma_v4_4_2_inst_start(adev, inst_mask, false);
+
+ return r;
+}
+
+static int sdma_v4_4_2_xcp_suspend(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ uint32_t tmp_mask = inst_mask;
+ int i;
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+ for_each_inst(i, tmp_mask) {
+ amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i);
+ }
+ }
+
+ sdma_v4_4_2_inst_ctx_switch_enable(adev, false, inst_mask);
+ sdma_v4_4_2_inst_enable(adev, false, inst_mask);
+
+ return 0;
+}
+
+struct amdgpu_xcp_ip_funcs sdma_v4_4_2_xcp_funcs = {
+ .suspend = &sdma_v4_4_2_xcp_suspend,
+ .resume = &sdma_v4_4_2_xcp_resume
+};
+
+static const struct amdgpu_ras_err_status_reg_entry sdma_v4_2_2_ue_reg_list[] = {
+ {AMDGPU_RAS_REG_ENTRY(SDMA0, 0, regSDMA_UE_ERR_STATUS_LO, regSDMA_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SDMA"},
+};
+
+static const struct amdgpu_ras_memory_id_entry sdma_v4_4_2_ras_memory_list[] = {
+ {AMDGPU_SDMA_MBANK_DATA_BUF0, "SDMA_MBANK_DATA_BUF0"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF1, "SDMA_MBANK_DATA_BUF1"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF2, "SDMA_MBANK_DATA_BUF2"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF3, "SDMA_MBANK_DATA_BUF3"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF4, "SDMA_MBANK_DATA_BUF4"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF5, "SDMA_MBANK_DATA_BUF5"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF6, "SDMA_MBANK_DATA_BUF6"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF7, "SDMA_MBANK_DATA_BUF7"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF8, "SDMA_MBANK_DATA_BUF8"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF9, "SDMA_MBANK_DATA_BUF9"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF10, "SDMA_MBANK_DATA_BUF10"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF11, "SDMA_MBANK_DATA_BUF11"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF12, "SDMA_MBANK_DATA_BUF12"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF13, "SDMA_MBANK_DATA_BUF13"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF14, "SDMA_MBANK_DATA_BUF14"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF15, "SDMA_MBANK_DATA_BUF15"},
+ {AMDGPU_SDMA_UCODE_BUF, "SDMA_UCODE_BUF"},
+ {AMDGPU_SDMA_RB_CMD_BUF, "SDMA_RB_CMD_BUF"},
+ {AMDGPU_SDMA_IB_CMD_BUF, "SDMA_IB_CMD_BUF"},
+ {AMDGPU_SDMA_UTCL1_RD_FIFO, "SDMA_UTCL1_RD_FIFO"},
+ {AMDGPU_SDMA_UTCL1_RDBST_FIFO, "SDMA_UTCL1_RDBST_FIFO"},
+ {AMDGPU_SDMA_UTCL1_WR_FIFO, "SDMA_UTCL1_WR_FIFO"},
+ {AMDGPU_SDMA_DATA_LUT_FIFO, "SDMA_DATA_LUT_FIFO"},
+ {AMDGPU_SDMA_SPLIT_DAT_BUF, "SDMA_SPLIT_DAT_BUF"},
+};
+
+static void sdma_v4_4_2_inst_query_ras_error_count(struct amdgpu_device *adev,
+ uint32_t sdma_inst,
+ void *ras_err_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status;
+ uint32_t sdma_dev_inst = GET_INST(SDMA0, sdma_inst);
+ unsigned long ue_count = 0;
+ struct amdgpu_smuio_mcm_config_info mcm_info = {
+ .socket_id = adev->smuio.funcs->get_socket_id(adev),
+ .die_id = adev->sdma.instance[sdma_inst].aid_id,
+ };
+
+ /* sdma v4_4_2 doesn't support query ce counts */
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ sdma_v4_2_2_ue_reg_list,
+ ARRAY_SIZE(sdma_v4_2_2_ue_reg_list),
+ sdma_v4_4_2_ras_memory_list,
+ ARRAY_SIZE(sdma_v4_4_2_ras_memory_list),
+ sdma_dev_inst,
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ &ue_count);
+
+ amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count);
+}
+
+static void sdma_v4_4_2_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_err_status)
+{
+ uint32_t inst_mask;
+ int i = 0;
+
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+ for_each_inst(i, inst_mask)
+ sdma_v4_4_2_inst_query_ras_error_count(adev, i, ras_err_status);
+ } else {
+ dev_warn(adev->dev, "SDMA RAS is not supported\n");
+ }
+}
+
+static void sdma_v4_4_2_inst_reset_ras_error_count(struct amdgpu_device *adev,
+ uint32_t sdma_inst)
+{
+ uint32_t sdma_dev_inst = GET_INST(SDMA0, sdma_inst);
+
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ sdma_v4_2_2_ue_reg_list,
+ ARRAY_SIZE(sdma_v4_2_2_ue_reg_list),
+ sdma_dev_inst);
+}
+
+static void sdma_v4_4_2_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ uint32_t inst_mask;
+ int i = 0;
+
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+ for_each_inst(i, inst_mask)
+ sdma_v4_4_2_inst_reset_ras_error_count(adev, i);
+ } else {
+ dev_warn(adev->dev, "SDMA RAS is not supported\n");
+ }
+}
+
+static const struct amdgpu_ras_block_hw_ops sdma_v4_4_2_ras_hw_ops = {
+ .query_ras_error_count = sdma_v4_4_2_query_ras_error_count,
+ .reset_ras_error_count = sdma_v4_4_2_reset_ras_error_count,
+};
+
+static int sdma_v4_4_2_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct aca_bank_info info;
+ u64 misc0;
+ int ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
+ 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+/* CODE_SDMA0 - CODE_SDMA4, reference to smu driver if header file */
+static int sdma_v4_4_2_err_codes[] = { 33, 34, 35, 36 };
+
+static bool sdma_v4_4_2_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ u32 instlo;
+
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+
+ if (instlo != mmSMNAID_AID0_MCA_SMU)
+ return false;
+
+ if (aca_bank_check_error_codes(handle->adev, bank,
+ sdma_v4_4_2_err_codes,
+ ARRAY_SIZE(sdma_v4_4_2_err_codes)))
+ return false;
+
+ return true;
+}
+
+static const struct aca_bank_ops sdma_v4_4_2_aca_bank_ops = {
+ .aca_bank_parser = sdma_v4_4_2_aca_bank_parser,
+ .aca_bank_is_valid = sdma_v4_4_2_aca_bank_is_valid,
+};
+
+static const struct aca_info sdma_v4_4_2_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK,
+ .bank_ops = &sdma_v4_4_2_aca_bank_ops,
+};
+
+static int sdma_v4_4_2_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_sdma_ras_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ return amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__SDMA,
+ &sdma_v4_4_2_aca_info, NULL);
+}
+
+static struct amdgpu_sdma_ras sdma_v4_4_2_ras = {
+ .ras_block = {
+ .hw_ops = &sdma_v4_4_2_ras_hw_ops,
+ .ras_late_init = sdma_v4_4_2_ras_late_init,
+ },
+};
+
+static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev)
+{
+ adev->sdma.ras = &sdma_v4_4_2_ras;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.h b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.h
new file mode 100644
index 000000000000..d516145529bb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SDMA_V4_4_2_H__
+#define __SDMA_V4_4_2_H__
+
+extern const struct amd_ip_funcs sdma_v4_4_2_ip_funcs;
+extern const struct amdgpu_ip_block_version sdma_v4_4_2_ip_block;
+
+extern struct amdgpu_xcp_ip_funcs sdma_v4_4_2_xcp_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 853d1511b889..8ddc4df06a1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -51,9 +51,6 @@ MODULE_FIRMWARE("amdgpu/navi14_sdma1.bin");
MODULE_FIRMWARE("amdgpu/navi12_sdma.bin");
MODULE_FIRMWARE("amdgpu/navi12_sdma1.bin");
-MODULE_FIRMWARE("amdgpu/cyan_skillfish_sdma.bin");
-MODULE_FIRMWARE("amdgpu/cyan_skillfish_sdma1.bin");
-
MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma.bin");
MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma1.bin");
@@ -62,10 +59,61 @@ MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma1.bin");
#define SDMA0_HYP_DEC_REG_END 0x5893
#define SDMA1_HYP_DEC_REG_OFFSET 0x20
+static const struct amdgpu_hwip_reg_entry sdma_reg_list_5_0[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS1_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS2_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS3_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UCODE_CHECKSUM),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_INT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_VM_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2)
+};
+
static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev);
static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev);
static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev);
+static int sdma_v5_0_stop_queue(struct amdgpu_ring *ring);
+static int sdma_v5_0_restore_queue(struct amdgpu_ring *ring);
static const struct soc15_reg_golden golden_settings_sdma_5[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107),
@@ -187,7 +235,7 @@ static u32 sdma_v5_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3
static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
case IP_VERSION(5, 0, 0):
soc15_program_register_sequence(adev,
golden_settings_sdma_5,
@@ -241,106 +289,34 @@ static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev)
// navi10 real chip need to use PSP to load firmware
static int sdma_v5_0_init_microcode(struct amdgpu_device *adev)
{
- const char *chip_name;
- char fw_name[40];
- int err = 0, i;
- struct amdgpu_firmware_info *info = NULL;
- const struct common_firmware_header *header = NULL;
- const struct sdma_firmware_header_v1_0 *hdr;
-
- if (amdgpu_sriov_vf(adev) && (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(5, 0, 5)))
- return 0;
-
- DRM_DEBUG("\n");
-
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
- case IP_VERSION(5, 0, 0):
- chip_name = "navi10";
- break;
- case IP_VERSION(5, 0, 2):
- chip_name = "navi14";
- break;
- case IP_VERSION(5, 0, 5):
- chip_name = "navi12";
- break;
- case IP_VERSION(5, 0, 1):
- if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2)
- chip_name = "cyan_skillfish2";
- else
- chip_name = "cyan_skillfish";
- break;
- default:
- BUG();
- }
+ int ret, i;
for (i = 0; i < adev->sdma.num_instances; i++) {
- if (i == 0)
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
- else
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
- err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->sdma.instance[i].fw);
- if (err)
- goto out;
- hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
- adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
- adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
- if (adev->sdma.instance[i].feature_version >= 20)
- adev->sdma.instance[i].burst_nop = true;
- DRM_DEBUG("psp_load == '%s'\n",
- adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
- info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
- info->fw = adev->sdma.instance[i].fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
- }
+ ret = amdgpu_sdma_init_microcode(adev, i, false);
+ if (ret)
+ return ret;
}
-out:
- if (err) {
- DRM_ERROR("sdma_v5_0: Failed to load firmware \"%s\"\n", fw_name);
- for (i = 0; i < adev->sdma.num_instances; i++) {
- release_firmware(adev->sdma.instance[i].fw);
- adev->sdma.instance[i].fw = NULL;
- }
- }
- return err;
+
+ return ret;
}
-static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
{
unsigned ret;
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE));
- amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
amdgpu_ring_write(ring, 1);
- ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
- amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
+ /* this is the offset we need patch later */
+ ret = ring->wptr & ring->buf_mask;
+ /* insert dummy here and patch it later */
+ amdgpu_ring_write(ring, 0);
return ret;
}
-static void sdma_v5_0_ring_patch_cond_exec(struct amdgpu_ring *ring,
- unsigned offset)
-{
- unsigned cur;
-
- BUG_ON(offset > ring->buf_mask);
- BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
- cur = (ring->wptr - 1) & ring->buf_mask;
- if (cur > offset)
- ring->ring[offset] = cur - offset;
- else
- ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
-}
-
/**
* sdma_v5_0_ring_get_rptr - get the current read pointer
*
@@ -353,7 +329,7 @@ static uint64_t sdma_v5_0_ring_get_rptr(struct amdgpu_ring *ring)
u64 *rptr;
/* XXX check if swapping is necessary on BE */
- rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]);
+ rptr = (u64 *)ring->rptr_cpu_addr;
DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
return ((*rptr) >> 2);
@@ -373,7 +349,7 @@ static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
- wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
+ wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
} else {
wptr = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI));
@@ -399,30 +375,32 @@ static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring)
DRM_DEBUG("Setting write pointer\n");
if (ring->use_doorbell) {
DRM_DEBUG("Using doorbell -- "
- "wptr_offs == 0x%08x "
- "lower_32_bits(ring->wptr) << 2 == 0x%08x "
- "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
- ring->wptr_offs,
- lower_32_bits(ring->wptr << 2),
- upper_32_bits(ring->wptr << 2));
+ "wptr_offs == 0x%08x "
+ "lower_32_bits(ring->wptr) << 2 == 0x%08x "
+ "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
/* XXX check if swapping is necessary on BE */
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr << 2);
- adev->wb.wb[ring->wptr_offs + 1] = upper_32_bits(ring->wptr << 2);
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr << 2);
DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
- ring->doorbell_index, ring->wptr << 2);
+ ring->doorbell_index, ring->wptr << 2);
WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
} else {
DRM_DEBUG("Not using doorbell -- "
- "mmSDMA%i_GFX_RB_WPTR == 0x%08x "
- "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
- ring->me,
- lower_32_bits(ring->wptr << 2),
- ring->me,
+ "mmSDMA%i_GFX_RB_WPTR == 0x%08x "
+ "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
+ ring->me,
+ lower_32_bits(ring->wptr << 2),
+ ring->me,
+ upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev,
+ ring->me, mmSDMA0_GFX_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev,
+ ring->me, mmSDMA0_GFX_RB_WPTR_HI),
upper_32_bits(ring->wptr << 2));
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR),
- lower_32_bits(ring->wptr << 2));
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI),
- upper_32_bits(ring->wptr << 2));
}
}
@@ -481,8 +459,6 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
* sdma_v5_0_ring_emit_mem_sync - flush the IB by graphics cache rinse
*
* @ring: amdgpu ring pointer
- * @job: job to retrieve vmid from
- * @ib: IB object to schedule
*
* flush the IB by graphics cache rinse.
*/
@@ -581,21 +557,15 @@ static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
* sdma_v5_0_gfx_stop - stop the gfx async dma engines
*
* @adev: amdgpu_device pointer
- *
+ * @inst_mask: mask of dma engine instances to be disabled
* Stop the gfx async dma ring buffers (NAVI10).
*/
-static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev)
+static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev, uint32_t inst_mask)
{
- struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
- struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
u32 rb_cntl, ib_cntl;
int i;
- if ((adev->mman.buffer_funcs_ring == sdma0) ||
- (adev->mman.buffer_funcs_ring == sdma1))
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
-
- for (i = 0; i < adev->sdma.num_instances; i++) {
+ for_each_inst(i, inst_mask) {
rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
@@ -687,9 +657,11 @@ static void sdma_v5_0_enable(struct amdgpu_device *adev, bool enable)
{
u32 f32_cntl;
int i;
+ uint32_t inst_mask;
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
if (!enable) {
- sdma_v5_0_gfx_stop(adev);
+ sdma_v5_0_gfx_stop(adev, 1 << inst_mask);
sdma_v5_0_rlc_stop(adev);
}
@@ -704,168 +676,184 @@ static void sdma_v5_0_enable(struct amdgpu_device *adev, bool enable)
}
/**
- * sdma_v5_0_gfx_resume - setup and start the async dma engines
+ * sdma_v5_0_gfx_resume_instance - start/restart a certain sdma engine
*
* @adev: amdgpu_device pointer
+ * @i: instance
+ * @restore: used to restore wptr when restart
*
- * Set up the gfx DMA ring buffers and enable them (NAVI10).
- * Returns 0 for success, error for failure.
+ * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr.
+ * Return 0 for success.
*/
-static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
+static int sdma_v5_0_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore)
{
struct amdgpu_ring *ring;
u32 rb_cntl, ib_cntl;
u32 rb_bufsz;
- u32 wb_offset;
u32 doorbell;
u32 doorbell_offset;
u32 temp;
u32 wptr_poll_cntl;
u64 wptr_gpu_addr;
- int i, r;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- ring = &adev->sdma.instance[i].ring;
- wb_offset = (ring->rptr_offs * 4);
+ ring = &adev->sdma.instance[i].ring;
- if (!amdgpu_sriov_vf(adev))
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
+ if (!amdgpu_sriov_vf(adev))
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
- /* Set ring buffer size in dwords */
- rb_bufsz = order_base_2(ring->ring_size / 4);
- rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
#ifdef __BIG_ENDIAN
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
- RPTR_WRITEBACK_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
#endif
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
-
- /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ if (restore) {
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ } else {
WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
-
- /* setup the wptr shadow polling */
- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
- lower_32_bits(wptr_gpu_addr));
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
- upper_32_bits(wptr_gpu_addr));
- wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
- mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
- wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
- SDMA0_GFX_RB_WPTR_POLL_CNTL,
- F32_POLL_ENABLE, 1);
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL),
- wptr_poll_cntl);
-
- /* set the wb address whether it's enabled or not */
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
- upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
- lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
-
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
-
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE),
- ring->gpu_addr >> 8);
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI),
- ring->gpu_addr >> 40);
-
+ }
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
+ upper_32_bits(wptr_gpu_addr));
+ wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
+ mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+ SDMA0_GFX_RB_WPTR_POLL_CNTL,
+ F32_POLL_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL),
+ wptr_poll_cntl);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE),
+ ring->gpu_addr >> 8);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI),
+ ring->gpu_addr >> 40);
+
+ if (!restore)
ring->wptr = 0;
- /* before programing wptr to a less value, need set minor_ptr_update first */
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
- if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
- lower_32_bits(ring->wptr) << 2);
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
- upper_32_bits(ring->wptr) << 2);
- }
+ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
+ upper_32_bits(ring->wptr << 2));
+ }
- doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
- doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
- mmSDMA0_GFX_DOORBELL_OFFSET));
+ doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
+ doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
+ mmSDMA0_GFX_DOORBELL_OFFSET));
- if (ring->use_doorbell) {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
- doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
- OFFSET, ring->doorbell_index);
- } else {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
- }
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET),
- doorbell_offset);
+ if (ring->use_doorbell) {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ } else {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
+ }
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET),
+ doorbell_offset);
- adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
- ring->doorbell_index, 20);
+ adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+ ring->doorbell_index, 20);
- if (amdgpu_sriov_vf(adev))
- sdma_v5_0_ring_set_wptr(ring);
+ if (amdgpu_sriov_vf(adev))
+ sdma_v5_0_ring_set_wptr(ring);
- /* set minor_ptr_update to 0 after wptr programed */
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
- if (!amdgpu_sriov_vf(adev)) {
- /* set utc l1 enable flag always to 1 */
- temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
-
- /* enable MCBP */
- temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1);
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
-
- /* Set up RESP_MODE to non-copy addresses */
- temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
- temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp);
-
- /* program default cache read and write policy */
- temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE));
- /* clean read policy and write policy bits */
- temp &= 0xFF0FFF;
- temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | (CACHE_WRITE_POLICY_L2__DEFAULT << 14));
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
- }
+ if (!amdgpu_sriov_vf(adev)) {
+ /* set utc l1 enable flag always to 1 */
+ temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
+
+ /* enable MCBP */
+ temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
+
+ /* Set up RESP_MODE to non-copy addresses */
+ temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp);
+
+ /* program default cache read and write policy */
+ temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE));
+ /* clean read policy and write policy bits */
+ temp &= 0xFF0FFF;
+ temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | (CACHE_WRITE_POLICY_L2__DEFAULT << 14));
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
+ }
- if (!amdgpu_sriov_vf(adev)) {
- /* unhalt engine */
- temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
- }
+ if (!amdgpu_sriov_vf(adev)) {
+ /* unhalt engine */
+ temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
+ }
- /* enable DMA RB */
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
- ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
#ifdef __BIG_ENDIAN
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
#endif
- /* enable DMA IBs */
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+ /* enable DMA IBs */
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
- ring->sched.ready = true;
+ if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
+ sdma_v5_0_ctx_switch_enable(adev, true);
+ sdma_v5_0_enable(adev, true);
+ }
- if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
- sdma_v5_0_ctx_switch_enable(adev, true);
- sdma_v5_0_enable(adev, true);
- }
+ return amdgpu_ring_test_helper(ring);
+}
- r = amdgpu_ring_test_helper(ring);
+/**
+ * sdma_v5_0_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them (NAVI10).
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = sdma_v5_0_gfx_resume_instance(adev, i, false);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
@@ -969,6 +957,49 @@ static int sdma_v5_0_start(struct amdgpu_device *adev)
return r;
}
+static int sdma_v5_0_mqd_init(struct amdgpu_device *adev, void *mqd,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v10_sdma_mqd *m = mqd;
+ uint64_t wb_gpu_addr;
+
+ m->sdmax_rlcx_rb_cntl =
+ order_base_2(prop->queue_size / 4) << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
+ 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+ 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
+ 1 << SDMA0_RLC0_RB_CNTL__RB_PRIV__SHIFT;
+
+ m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8);
+ m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8);
+
+ m->sdmax_rlcx_rb_wptr_poll_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, 0,
+ mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
+
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr);
+ m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr);
+
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr);
+ m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr);
+
+ m->sdmax_rlcx_ib_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, 0,
+ mmSDMA0_GFX_IB_CNTL));
+
+ m->sdmax_rlcx_doorbell_offset =
+ prop->doorbell_index << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_RLC0_DOORBELL, ENABLE, 1);
+
+ return 0;
+}
+
+static void sdma_v5_0_set_mqd_funcs(struct amdgpu_device *adev)
+{
+ adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v10_sdma_mqd);
+ adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v5_0_mqd_init;
+}
+
/**
* sdma_v5_0_ring_test_ring - simple async dma engine test
*
@@ -987,6 +1018,8 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring)
u32 tmp;
u64 gpu_addr;
+ tmp = 0xCAFEDEAD;
+
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
@@ -994,10 +1027,9 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring)
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
- tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
- r = amdgpu_ring_alloc(ring, 5);
+ r = amdgpu_ring_alloc(ring, 20);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
amdgpu_device_wb_free(adev, index);
@@ -1049,6 +1081,9 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
u32 tmp = 0;
u64 gpu_addr;
+ tmp = 0xCAFEDEAD;
+ memset(&ib, 0, sizeof(ib));
+
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
@@ -1056,11 +1091,10 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
- tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
- memset(&ib, 0, sizeof(ib));
+
r = amdgpu_ib_get(adev, NULL, 256,
- AMDGPU_IB_POOL_DIRECT, &ib);
+ AMDGPU_IB_POOL_DIRECT, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
goto err0;
@@ -1090,14 +1124,16 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err1;
}
+
tmp = le32_to_cpu(adev->wb.wb[index]);
+
if (tmp == 0xDEADBEEF)
r = 0;
else
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -1291,24 +1327,62 @@ static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
}
-static int sdma_v5_0_early_init(void *handle)
+static int sdma_v5_0_soft_reset_engine(struct amdgpu_device *adev, u32 instance_id)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 grbm_soft_reset;
+ u32 tmp;
+
+ grbm_soft_reset = REG_SET_FIELD(0,
+ GRBM_SOFT_RESET, SOFT_RESET_SDMA0,
+ 1);
+ grbm_soft_reset <<= instance_id;
+
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp |= grbm_soft_reset;
+ DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~grbm_soft_reset;
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ return 0;
+}
+
+static const struct amdgpu_sdma_funcs sdma_v5_0_sdma_funcs = {
+ .stop_kernel_queue = &sdma_v5_0_stop_queue,
+ .start_kernel_queue = &sdma_v5_0_restore_queue,
+ .soft_reset_kernel_queue = &sdma_v5_0_soft_reset_engine,
+};
+
+static int sdma_v5_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = sdma_v5_0_init_microcode(adev);
+ if (r)
+ return r;
sdma_v5_0_set_ring_funcs(adev);
sdma_v5_0_set_buffer_funcs(adev);
sdma_v5_0_set_vm_pte_funcs(adev);
sdma_v5_0_set_irq_funcs(adev);
+ sdma_v5_0_set_mqd_funcs(adev);
return 0;
}
-static int sdma_v5_0_sw_init(void *handle)
+static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_0);
+ uint32_t *ptr;
/* SDMA trap event */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0,
@@ -1324,13 +1398,9 @@ static int sdma_v5_0_sw_init(void *handle)
if (r)
return r;
- r = sdma_v5_0_init_microcode(adev);
- if (r) {
- DRM_ERROR("Failed to load sdma firmware!\n");
- return r;
- }
-
for (i = 0; i < adev->sdma.num_instances; i++) {
+ mutex_init(&adev->sdma.instance[i].engine_reset_mutex);
+ adev->sdma.instance[i].funcs = &sdma_v5_0_sdma_funcs;
ring = &adev->sdma.instance[i].ring;
ring->ring_obj = NULL;
ring->use_doorbell = true;
@@ -1342,6 +1412,7 @@ static int sdma_v5_0_sw_init(void *handle)
(adev->doorbell_index.sdma_engine[0] << 1) //get DWORD offset
: (adev->doorbell_index.sdma_engine[1] << 1); // get DWORD offset
+ ring->vm_hub = AMDGPU_GFXHUB(0);
sprintf(ring->name, "sdma%d", i);
r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
(i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 :
@@ -1351,28 +1422,55 @@ static int sdma_v5_0_sw_init(void *handle)
return r;
}
+ adev->sdma.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(5, 0, 0):
+ case IP_VERSION(5, 0, 2):
+ case IP_VERSION(5, 0, 5):
+ if ((adev->sdma.instance[0].fw_version >= 35) &&
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ default:
+ break;
+ }
+
+ /* Allocate memory for SDMA IP Dump buffer */
+ ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (ptr)
+ adev->sdma.ip_dump = ptr;
+ else
+ DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
+
+ r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
return r;
}
-static int sdma_v5_0_sw_fini(void *handle)
+static int sdma_v5_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- release_firmware(adev->sdma.instance[i].fw);
- adev->sdma.instance[i].fw = NULL;
-
+ for (i = 0; i < adev->sdma.num_instances; i++)
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
- }
+
+ amdgpu_sdma_sysfs_reset_mask_fini(adev);
+ amdgpu_sdma_destroy_inst_ctx(adev, false);
+
+ kfree(adev->sdma.ip_dump);
return 0;
}
-static int sdma_v5_0_hw_init(void *handle)
+static int sdma_v5_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
sdma_v5_0_init_golden_registers(adev);
@@ -1381,9 +1479,9 @@ static int sdma_v5_0_hw_init(void *handle)
return r;
}
-static int sdma_v5_0_hw_fini(void *handle)
+static int sdma_v5_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1394,23 +1492,19 @@ static int sdma_v5_0_hw_fini(void *handle)
return 0;
}
-static int sdma_v5_0_suspend(void *handle)
+static int sdma_v5_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v5_0_hw_fini(adev);
+ return sdma_v5_0_hw_fini(ip_block);
}
-static int sdma_v5_0_resume(void *handle)
+static int sdma_v5_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v5_0_hw_init(adev);
+ return sdma_v5_0_hw_init(ip_block);
}
-static bool sdma_v5_0_is_idle(void *handle)
+static bool sdma_v5_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 i;
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -1423,11 +1517,11 @@ static bool sdma_v5_0_is_idle(void *handle)
return true;
}
-static int sdma_v5_0_wait_for_idle(void *handle)
+static int sdma_v5_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 sdma0, sdma1;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
sdma0 = RREG32(sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG));
@@ -1440,13 +1534,104 @@ static int sdma_v5_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int sdma_v5_0_soft_reset(void *handle)
+static int sdma_v5_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* todo */
return 0;
}
+static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ if (ring->me >= adev->sdma.num_instances) {
+ dev_err(adev->dev, "sdma instance not found\n");
+ return -EINVAL;
+ }
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ amdgpu_amdkfd_suspend(adev, true);
+ r = amdgpu_sdma_reset_engine(adev, ring->me, true);
+ amdgpu_amdkfd_resume(adev, true);
+ if (r)
+ return r;
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static int sdma_v5_0_stop_queue(struct amdgpu_ring *ring)
+{
+ u32 f32_cntl, freeze, cntl, stat1_reg;
+ struct amdgpu_device *adev = ring->adev;
+ int i, j, r = 0;
+
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
+ i = ring->me;
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ /* stop queue */
+ sdma_v5_0_gfx_stop(adev, 1 << i);
+
+ /* engine stop SDMA1_F32_CNTL.HALT to 1 and SDMAx_FREEZE freeze bit to 1 */
+ freeze = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE));
+ freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 1);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE), freeze);
+
+ for (j = 0; j < adev->usec_timeout; j++) {
+ freeze = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE));
+ if (REG_GET_FIELD(freeze, SDMA0_FREEZE, FROZEN) & 1)
+ break;
+ udelay(1);
+ }
+
+ /* check sdma copy engine all idle if frozen not received*/
+ if (j == adev->usec_timeout) {
+ stat1_reg = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_STATUS1_REG));
+ if ((stat1_reg & 0x3FF) != 0x3FF) {
+ DRM_ERROR("cannot soft reset as sdma not idle\n");
+ r = -ETIMEDOUT;
+ goto err0;
+ }
+ }
+
+ f32_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
+
+ cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ cntl = REG_SET_FIELD(cntl, SDMA0_CNTL, UTC_L1_ENABLE, 0);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), cntl);
+err0:
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ return r;
+}
+
+static int sdma_v5_0_restore_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 inst_id = ring->me;
+ u32 freeze;
+ int r;
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ /* unfreeze*/
+ freeze = RREG32(sdma_v5_0_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE));
+ freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 0);
+ WREG32(sdma_v5_0_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE), freeze);
+
+ r = sdma_v5_0_gfx_resume_instance(adev, inst_id, true);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+ return r;
+}
+
static int sdma_v5_0_ring_preempt_ib(struct amdgpu_ring *ring)
{
int i, r = 0;
@@ -1519,7 +1704,25 @@ static int sdma_v5_0_process_trap_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
+ uint32_t mes_queue_id = entry->src_data[0];
+
DRM_DEBUG("IH: SDMA trap\n");
+
+ if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
+ struct amdgpu_mes_queue *queue;
+
+ mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
+
+ spin_lock(&adev->mes.queue_id_lock);
+ queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
+ if (queue) {
+ DRM_DEBUG("process smda queue id = %d\n", mes_queue_id);
+ amdgpu_fence_process(queue->ring);
+ }
+ spin_unlock(&adev->mes.queue_id_lock);
+ return 0;
+ }
+
switch (entry->client_id) {
case SOC15_IH_CLIENTID_SDMA0:
switch (entry->ring_id) {
@@ -1626,15 +1829,15 @@ static void sdma_v5_0_update_medium_grain_light_sleep(struct amdgpu_device *adev
}
}
-static int sdma_v5_0_set_clockgating_state(void *handle,
+static int sdma_v5_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
case IP_VERSION(5, 0, 0):
case IP_VERSION(5, 0, 2):
case IP_VERSION(5, 0, 5):
@@ -1650,15 +1853,15 @@ static int sdma_v5_0_set_clockgating_state(void *handle,
return 0;
}
-static int sdma_v5_0_set_powergating_state(void *handle,
+static int sdma_v5_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void sdma_v5_0_get_clockgating_state(void *handle, u32 *flags)
+static void sdma_v5_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -1675,10 +1878,51 @@ static void sdma_v5_0_get_clockgating_state(void *handle, u32 *flags)
*flags |= AMD_CG_SUPPORT_SDMA_LS;
}
-const struct amd_ip_funcs sdma_v5_0_ip_funcs = {
+static void sdma_v5_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_0);
+ uint32_t instance_offset;
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ drm_printf(p, "\nInstance:%d\n", i);
+
+ for (j = 0; j < reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_5_0[j].reg_name,
+ adev->sdma.ip_dump[instance_offset + j]);
+ }
+}
+
+static void sdma_v5_0_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t instance_offset;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_0);
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ for (j = 0; j < reg_count; j++)
+ adev->sdma.ip_dump[instance_offset + j] =
+ RREG32(sdma_v5_0_get_reg_offset(adev, i,
+ sdma_reg_list_5_0[j].reg_offset));
+ }
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static const struct amd_ip_funcs sdma_v5_0_ip_funcs = {
.name = "sdma_v5_0",
.early_init = sdma_v5_0_early_init,
- .late_init = NULL,
.sw_init = sdma_v5_0_sw_init,
.sw_fini = sdma_v5_0_sw_fini,
.hw_init = sdma_v5_0_hw_init,
@@ -1691,6 +1935,8 @@ const struct amd_ip_funcs sdma_v5_0_ip_funcs = {
.set_clockgating_state = sdma_v5_0_set_clockgating_state,
.set_powergating_state = sdma_v5_0_set_powergating_state,
.get_clockgating_state = sdma_v5_0_get_clockgating_state,
+ .dump_ip_state = sdma_v5_0_dump_ip_state,
+ .print_ip_state = sdma_v5_0_print_ip_state,
};
static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
@@ -1698,7 +1944,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
.align_mask = 0xf,
.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_GFXHUB_0,
+ .secure_submission_supported = true,
.get_rptr = sdma_v5_0_ring_get_rptr,
.get_wptr = sdma_v5_0_ring_get_wptr,
.set_wptr = sdma_v5_0_ring_set_wptr,
@@ -1726,8 +1972,8 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
- .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
.preempt_ib = sdma_v5_0_ring_preempt_ib,
+ .reset = sdma_v5_0_reset_queue,
};
static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -1764,7 +2010,7 @@ static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: if a secure copy should be used
+ * @copy_flags: copy flags for the buffers
*
* Copy GPU buffers using the DMA engine (NAVI10).
* Used by the amdgpu ttm implementation to move pages if
@@ -1774,11 +2020,11 @@ static void sdma_v5_0_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
- SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0);
ib->ptr[ib->length_dw++] = byte_count - 1;
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h
index d4e3c2e696f6..2ab71f21755a 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h
@@ -24,7 +24,6 @@
#ifndef __SDMA_V5_0_H__
#define __SDMA_V5_0_H__
-extern const struct amd_ip_funcs sdma_v5_0_ip_funcs;
extern const struct amdgpu_ip_block_version sdma_v5_0_ip_block;
#endif /* __SDMA_V5_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 4d4d1aa51b8a..51101b0aa2fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -51,6 +51,8 @@ MODULE_FIRMWARE("amdgpu/beige_goby_sdma.bin");
MODULE_FIRMWARE("amdgpu/vangogh_sdma.bin");
MODULE_FIRMWARE("amdgpu/yellow_carp_sdma.bin");
+MODULE_FIRMWARE("amdgpu/sdma_5_2_6.bin");
+MODULE_FIRMWARE("amdgpu/sdma_5_2_7.bin");
#define SDMA1_REG_OFFSET 0x600
#define SDMA3_REG_OFFSET 0x400
@@ -58,10 +60,61 @@ MODULE_FIRMWARE("amdgpu/yellow_carp_sdma.bin");
#define SDMA0_HYP_DEC_REG_END 0x5893
#define SDMA1_HYP_DEC_REG_OFFSET 0x20
+static const struct amdgpu_hwip_reg_entry sdma_reg_list_5_2[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS1_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS2_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS3_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UCODE_CHECKSUM),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_INT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_VM_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2)
+};
+
static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev);
static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev);
static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev);
+static int sdma_v5_2_stop_queue(struct amdgpu_ring *ring);
+static int sdma_v5_2_restore_queue(struct amdgpu_ring *ring);
static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
{
@@ -87,147 +140,23 @@ static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3
return base + internal_offset;
}
-static int sdma_v5_2_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
-{
- int err = 0;
- const struct sdma_firmware_header_v1_0 *hdr;
-
- err = amdgpu_ucode_validate(sdma_inst->fw);
- if (err)
- return err;
-
- hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data;
- sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version);
- sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version);
-
- if (sdma_inst->feature_version >= 20)
- sdma_inst->burst_nop = true;
-
- return 0;
-}
-
-static void sdma_v5_2_destroy_inst_ctx(struct amdgpu_device *adev)
-{
- release_firmware(adev->sdma.instance[0].fw);
-
- memset((void *)adev->sdma.instance, 0,
- sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES);
-}
-
-/**
- * sdma_v5_2_init_microcode - load ucode images from disk
- *
- * @adev: amdgpu_device pointer
- *
- * Use the firmware interface to load the ucode images into
- * the driver (not loaded into hw).
- * Returns 0 on success, error on failure.
- */
-
-// emulation only, won't work on real chip
-// navi10 real chip need to use PSP to load firmware
-static int sdma_v5_2_init_microcode(struct amdgpu_device *adev)
-{
- const char *chip_name;
- char fw_name[40];
- int err = 0, i;
- struct amdgpu_firmware_info *info = NULL;
- const struct common_firmware_header *header = NULL;
-
- DRM_DEBUG("\n");
-
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
- case IP_VERSION(5, 2, 0):
- chip_name = "sienna_cichlid";
- break;
- case IP_VERSION(5, 2, 2):
- chip_name = "navy_flounder";
- break;
- case IP_VERSION(5, 2, 1):
- chip_name = "vangogh";
- break;
- case IP_VERSION(5, 2, 4):
- chip_name = "dimgrey_cavefish";
- break;
- case IP_VERSION(5, 2, 5):
- chip_name = "beige_goby";
- break;
- case IP_VERSION(5, 2, 3):
- chip_name = "yellow_carp";
- break;
- default:
- BUG();
- }
-
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
-
- err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev);
- if (err)
- goto out;
-
- err = sdma_v5_2_init_inst_ctx(&adev->sdma.instance[0]);
- if (err)
- goto out;
-
- for (i = 1; i < adev->sdma.num_instances; i++)
- memcpy((void *)&adev->sdma.instance[i],
- (void *)&adev->sdma.instance[0],
- sizeof(struct amdgpu_sdma_instance));
-
- if (amdgpu_sriov_vf(adev) && (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(5, 2, 0)))
- return 0;
-
- DRM_DEBUG("psp_load == '%s'\n",
- adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- for (i = 0; i < adev->sdma.num_instances; i++) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
- info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
- info->fw = adev->sdma.instance[i].fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
- }
- }
-
-out:
- if (err) {
- DRM_ERROR("sdma_v5_2: Failed to load firmware \"%s\"\n", fw_name);
- sdma_v5_2_destroy_inst_ctx(adev);
- }
- return err;
-}
-
-static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
{
unsigned ret;
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE));
- amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
amdgpu_ring_write(ring, 1);
- ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
- amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
+ /* this is the offset we need patch later */
+ ret = ring->wptr & ring->buf_mask;
+ /* insert dummy here and patch it later */
+ amdgpu_ring_write(ring, 0);
return ret;
}
-static void sdma_v5_2_ring_patch_cond_exec(struct amdgpu_ring *ring,
- unsigned offset)
-{
- unsigned cur;
-
- BUG_ON(offset > ring->buf_mask);
- BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
- cur = (ring->wptr - 1) & ring->buf_mask;
- if (cur > offset)
- ring->ring[offset] = cur - offset;
- else
- ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
-}
-
/**
* sdma_v5_2_ring_get_rptr - get the current read pointer
*
@@ -240,7 +169,7 @@ static uint64_t sdma_v5_2_ring_get_rptr(struct amdgpu_ring *ring)
u64 *rptr;
/* XXX check if swapping is necessary on BE */
- rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]);
+ rptr = (u64 *)ring->rptr_cpu_addr;
DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
return ((*rptr) >> 2);
@@ -260,7 +189,7 @@ static uint64_t sdma_v5_2_ring_get_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
- wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
+ wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
} else {
wptr = RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI));
@@ -287,17 +216,27 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
DRM_DEBUG("Using doorbell -- "
"wptr_offs == 0x%08x "
- "lower_32_bits(ring->wptr) << 2 == 0x%08x "
- "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ "lower_32_bits(ring->wptr << 2) == 0x%08x "
+ "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
ring->wptr_offs,
lower_32_bits(ring->wptr << 2),
upper_32_bits(ring->wptr << 2));
/* XXX check if swapping is necessary on BE */
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr << 2);
- adev->wb.wb[ring->wptr_offs + 1] = upper_32_bits(ring->wptr << 2);
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr << 2);
DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
ring->doorbell_index, ring->wptr << 2);
WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(5, 2, 1)) {
+ /* SDMA seems to miss doorbells sometimes when powergating kicks in.
+ * Updating the wptr directly will wake it. This is only safe because
+ * we disallow gfxoff in begin_use() and then allow it again in end_use().
+ */
+ WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI),
+ upper_32_bits(ring->wptr << 2));
+ }
} else {
DRM_DEBUG("Not using doorbell -- "
"mmSDMA%i_GFX_RB_WPTR == 0x%08x "
@@ -368,8 +307,6 @@ static void sdma_v5_2_ring_emit_ib(struct amdgpu_ring *ring,
* sdma_v5_2_ring_emit_mem_sync - flush the IB by graphics cache rinse
*
* @ring: amdgpu ring pointer
- * @job: job to retrieve vmid from
- * @ib: IB object to schedule
*
* flush the IB by graphics cache rinse.
*/
@@ -404,17 +341,21 @@ static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)
u32 ref_and_mask = 0;
const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
- ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
-
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
- SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
- SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
- amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
- amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
- amdgpu_ring_write(ring, ref_and_mask); /* reference */
- amdgpu_ring_write(ring, ref_and_mask); /* mask */
- amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
- SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+ if (ring->me > 1) {
+ amdgpu_hdp_flush(adev, ring);
+ } else {
+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
+ amdgpu_ring_write(ring, ref_and_mask); /* reference */
+ amdgpu_ring_write(ring, ref_and_mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+ }
}
/**
@@ -454,7 +395,7 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
amdgpu_ring_write(ring, upper_32_bits(seq));
}
- if (flags & AMDGPU_FENCE_FLAG_INT) {
+ if ((flags & AMDGPU_FENCE_FLAG_INT)) {
/* generate an interrupt */
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
@@ -466,25 +407,15 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
* sdma_v5_2_gfx_stop - stop the gfx async dma engines
*
* @adev: amdgpu_device pointer
- *
+ * @inst_mask: mask of dma engine instances to be disabled
* Stop the gfx async dma ring buffers.
*/
-static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev)
+static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev, uint32_t inst_mask)
{
- struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
- struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
- struct amdgpu_ring *sdma2 = &adev->sdma.instance[2].ring;
- struct amdgpu_ring *sdma3 = &adev->sdma.instance[3].ring;
u32 rb_cntl, ib_cntl;
int i;
- if ((adev->mman.buffer_funcs_ring == sdma0) ||
- (adev->mman.buffer_funcs_ring == sdma1) ||
- (adev->mman.buffer_funcs_ring == sdma2) ||
- (adev->mman.buffer_funcs_ring == sdma3))
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
-
- for (i = 0; i < adev->sdma.num_instances; i++) {
+ for_each_inst(i, inst_mask) {
rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
@@ -544,9 +475,6 @@ static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
}
for (i = 0; i < adev->sdma.num_instances; i++) {
- f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
- f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
- AUTO_CTXSW_ENABLE, enable ? 1 : 0);
if (enable && amdgpu_sdma_phase_quantum) {
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
phase_quantum);
@@ -555,7 +483,13 @@ static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
phase_quantum);
}
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+ AUTO_CTXSW_ENABLE, enable ? 1 : 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
+ }
}
}
@@ -572,121 +506,136 @@ static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
{
u32 f32_cntl;
int i;
+ uint32_t inst_mask;
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
if (!enable) {
- sdma_v5_2_gfx_stop(adev);
+ sdma_v5_2_gfx_stop(adev, inst_mask);
sdma_v5_2_rlc_stop(adev);
}
- for (i = 0; i < adev->sdma.num_instances; i++) {
- f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
- f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
+ if (!amdgpu_sriov_vf(adev)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
+ }
}
}
/**
- * sdma_v5_2_gfx_resume - setup and start the async dma engines
+ * sdma_v5_2_gfx_resume_instance - start/restart a certain sdma engine
*
* @adev: amdgpu_device pointer
+ * @i: instance
+ * @restore: used to restore wptr when restart
*
- * Set up the gfx DMA ring buffers and enable them.
- * Returns 0 for success, error for failure.
+ * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr.
+ * Return 0 for success.
*/
-static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
+
+static int sdma_v5_2_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore)
{
struct amdgpu_ring *ring;
u32 rb_cntl, ib_cntl;
u32 rb_bufsz;
- u32 wb_offset;
u32 doorbell;
u32 doorbell_offset;
u32 temp;
u32 wptr_poll_cntl;
u64 wptr_gpu_addr;
- int i, r;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- ring = &adev->sdma.instance[i].ring;
- wb_offset = (ring->rptr_offs * 4);
+ ring = &adev->sdma.instance[i].ring;
+ if (!amdgpu_sriov_vf(adev))
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
- /* Set ring buffer size in dwords */
- rb_bufsz = order_base_2(ring->ring_size / 4);
- rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
#ifdef __BIG_ENDIAN
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
- RPTR_WRITEBACK_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
#endif
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
-
- /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ if (restore) {
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ } else {
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
+ }
- /* setup the wptr shadow polling */
- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
- lower_32_bits(wptr_gpu_addr));
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
- upper_32_bits(wptr_gpu_addr));
- wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
- mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
- wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
- SDMA0_GFX_RB_WPTR_POLL_CNTL,
- F32_POLL_ENABLE, 1);
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL),
- wptr_poll_cntl);
-
- /* set the wb address whether it's enabled or not */
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
- upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
- lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
-
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
-
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
-
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
+ upper_32_bits(wptr_gpu_addr));
+ wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
+ mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+ SDMA0_GFX_RB_WPTR_POLL_CNTL,
+ F32_POLL_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL),
+ wptr_poll_cntl);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
+
+ if (!restore)
ring->wptr = 0;
- /* before programing wptr to a less value, need set minor_ptr_update first */
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
- if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
- }
+ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ }
- doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
- doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
+ doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
+ doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
- if (ring->use_doorbell) {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
- doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
- OFFSET, ring->doorbell_index);
- } else {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
- }
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
+ if (ring->use_doorbell) {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ } else {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
+ }
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
- adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
- ring->doorbell_index,
- adev->doorbell_index.sdma_doorbell_range);
+ adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+ ring->doorbell_index,
+ adev->doorbell_index.sdma_doorbell_range);
- if (amdgpu_sriov_vf(adev))
- sdma_v5_2_ring_set_wptr(ring);
+ if (amdgpu_sriov_vf(adev))
+ sdma_v5_2_ring_set_wptr(ring);
- /* set minor_ptr_update to 0 after wptr programed */
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
+
+ /* SRIOV VF has no control of any of registers below */
+ if (!amdgpu_sriov_vf(adev)) {
/* set utc l1 enable flag always to 1 */
temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
@@ -710,40 +659,48 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK);
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
- if (!amdgpu_sriov_vf(adev)) {
- /* unhalt engine */
- temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
- }
+ /* unhalt engine */
+ temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
+ }
- /* enable DMA RB */
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
- ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
#ifdef __BIG_ENDIAN
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
#endif
- /* enable DMA IBs */
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+ /* enable DMA IBs */
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
- ring->sched.ready = true;
+ if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
+ sdma_v5_2_ctx_switch_enable(adev, true);
+ sdma_v5_2_enable(adev, true);
+ }
- if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
- sdma_v5_2_ctx_switch_enable(adev, true);
- sdma_v5_2_enable(adev, true);
- }
+ return amdgpu_ring_test_helper(ring);
+}
- r = amdgpu_ring_test_ring(ring);
- if (r) {
- ring->sched.ready = false;
- return r;
- }
+/**
+ * sdma_v5_2_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
+{
+ int i, r;
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = sdma_v5_2_gfx_resume_instance(adev, i, false);
+ if (r)
+ return r;
}
return 0;
@@ -806,37 +763,49 @@ static int sdma_v5_2_load_microcode(struct amdgpu_device *adev)
return 0;
}
-static int sdma_v5_2_soft_reset(void *handle)
+static int sdma_v5_2_soft_reset_engine(struct amdgpu_device *adev, u32 instance_id)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
u32 grbm_soft_reset;
u32 tmp;
- int i;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- grbm_soft_reset = REG_SET_FIELD(0,
- GRBM_SOFT_RESET, SOFT_RESET_SDMA0,
- 1);
- grbm_soft_reset <<= i;
+ grbm_soft_reset = REG_SET_FIELD(0,
+ GRBM_SOFT_RESET, SOFT_RESET_SDMA0,
+ 1);
+ grbm_soft_reset <<= instance_id;
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- tmp |= grbm_soft_reset;
- DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp |= grbm_soft_reset;
+ DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- udelay(50);
+ udelay(50);
- tmp &= ~grbm_soft_reset;
- WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp &= ~grbm_soft_reset;
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ return 0;
+}
+static int sdma_v5_2_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ sdma_v5_2_soft_reset_engine(adev, i);
udelay(50);
}
return 0;
}
+static const struct amdgpu_sdma_funcs sdma_v5_2_sdma_funcs = {
+ .stop_kernel_queue = &sdma_v5_2_stop_queue,
+ .start_kernel_queue = &sdma_v5_2_restore_queue,
+ .soft_reset_kernel_queue = &sdma_v5_2_soft_reset_engine,
+};
+
/**
* sdma_v5_2_start - setup and start the async dma engines
*
@@ -848,6 +817,7 @@ static int sdma_v5_2_soft_reset(void *handle)
static int sdma_v5_2_start(struct amdgpu_device *adev)
{
int r = 0;
+ struct amdgpu_ip_block *ip_block;
if (amdgpu_sriov_vf(adev)) {
sdma_v5_2_ctx_switch_enable(adev, false);
@@ -868,13 +838,11 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
msleep(1000);
}
- /* TODO: check whether can submit a doorbell request to raise
- * a doorbell fence to exit gfxoff.
- */
- if (adev->in_s0ix)
- amdgpu_gfx_off_ctrl(adev, false);
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_SDMA);
+ if (!ip_block)
+ return -EINVAL;
- sdma_v5_2_soft_reset(adev);
+ sdma_v5_2_soft_reset(ip_block);
/* unhalt the MEs */
sdma_v5_2_enable(adev, true);
/* enable sdma ring preemption */
@@ -882,8 +850,6 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
/* start the gfx rings and rlc compute queues */
r = sdma_v5_2_gfx_resume(adev);
- if (adev->in_s0ix)
- amdgpu_gfx_off_ctrl(adev, true);
if (r)
return r;
r = sdma_v5_2_rlc_resume(adev);
@@ -891,6 +857,49 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
return r;
}
+static int sdma_v5_2_mqd_init(struct amdgpu_device *adev, void *mqd,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v10_sdma_mqd *m = mqd;
+ uint64_t wb_gpu_addr;
+
+ m->sdmax_rlcx_rb_cntl =
+ order_base_2(prop->queue_size / 4) << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
+ 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+ 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
+ 1 << SDMA0_RLC0_RB_CNTL__RB_PRIV__SHIFT;
+
+ m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8);
+ m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8);
+
+ m->sdmax_rlcx_rb_wptr_poll_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, 0,
+ mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
+
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr);
+ m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr);
+
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr);
+ m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr);
+
+ m->sdmax_rlcx_ib_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, 0,
+ mmSDMA0_GFX_IB_CNTL));
+
+ m->sdmax_rlcx_doorbell_offset =
+ prop->doorbell_index << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_RLC0_DOORBELL, ENABLE, 1);
+
+ return 0;
+}
+
+static void sdma_v5_2_set_mqd_funcs(struct amdgpu_device *adev)
+{
+ adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v10_sdma_mqd);
+ adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v5_2_mqd_init;
+}
+
/**
* sdma_v5_2_ring_test_ring - simple async dma engine test
*
@@ -909,6 +918,8 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring)
u32 tmp;
u64 gpu_addr;
+ tmp = 0xCAFEDEAD;
+
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
@@ -916,10 +927,9 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring)
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
- tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
- r = amdgpu_ring_alloc(ring, 5);
+ r = amdgpu_ring_alloc(ring, 20);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
amdgpu_device_wb_free(adev, index);
@@ -971,6 +981,9 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
u32 tmp = 0;
u64 gpu_addr;
+ tmp = 0xCAFEDEAD;
+ memset(&ib, 0, sizeof(ib));
+
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
@@ -978,9 +991,8 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
- tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
- memset(&ib, 0, sizeof(ib));
+
r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
@@ -1011,14 +1023,16 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err1;
}
+
tmp = le32_to_cpu(adev->wb.wb[index]);
+
if (tmp == 0xDEADBEEF)
r = 0;
else
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -1177,7 +1191,28 @@ static void sdma_v5_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
static void sdma_v5_2_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
- amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0);
+
+ /* Update the PD address for this VMID. */
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
+ (hub->ctx_addr_distance * vmid),
+ lower_32_bits(pd_addr));
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
+ (hub->ctx_addr_distance * vmid),
+ upper_32_bits(pd_addr));
+
+ /* Trigger invalidation. */
+ amdgpu_ring_write(ring,
+ SDMA_PKT_VM_INVALIDATION_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(SDMA_SUBOP_VM_INVALIDATION) |
+ SDMA_PKT_VM_INVALIDATION_HEADER_GFX_ENG_ID(ring->vm_inv_eng) |
+ SDMA_PKT_VM_INVALIDATION_HEADER_MM_ENG_ID(0x1f));
+ amdgpu_ring_write(ring, req);
+ amdgpu_ring_write(ring, 0xFFFFFFFF);
+ amdgpu_ring_write(ring,
+ SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(1 << vmid) |
+ SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(0x1F));
}
static void sdma_v5_2_ring_emit_wreg(struct amdgpu_ring *ring,
@@ -1213,14 +1248,20 @@ static void sdma_v5_2_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
}
-static int sdma_v5_2_early_init(void *handle)
+static int sdma_v5_2_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_sdma_init_microcode(adev, 0, true);
+ if (r)
+ return r;
sdma_v5_2_set_ring_funcs(adev);
sdma_v5_2_set_buffer_funcs(adev);
sdma_v5_2_set_vm_pte_funcs(adev);
sdma_v5_2_set_irq_funcs(adev);
+ sdma_v5_2_set_mqd_funcs(adev);
return 0;
}
@@ -1259,11 +1300,13 @@ static unsigned sdma_v5_2_seq_to_trap_id(int seq_num)
return -EINVAL;
}
-static int sdma_v5_2_sw_init(void *handle)
+static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2);
+ uint32_t *ptr;
/* SDMA trap event */
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -1274,13 +1317,9 @@ static int sdma_v5_2_sw_init(void *handle)
return r;
}
- r = sdma_v5_2_init_microcode(adev);
- if (r) {
- DRM_ERROR("Failed to load sdma firmware!\n");
- return r;
- }
-
for (i = 0; i < adev->sdma.num_instances; i++) {
+ mutex_init(&adev->sdma.instance[i].engine_reset_mutex);
+ adev->sdma.instance[i].funcs = &sdma_v5_2_sdma_funcs;
ring = &adev->sdma.instance[i].ring;
ring->ring_obj = NULL;
ring->use_doorbell = true;
@@ -1292,6 +1331,7 @@ static int sdma_v5_2_sw_init(void *handle)
ring->doorbell_index =
(adev->doorbell_index.sdma_engine[i] << 1); //get DWORD offset
+ ring->vm_hub = AMDGPU_GFXHUB(0);
sprintf(ring->name, "sdma%d", i);
r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
AMDGPU_SDMA_IRQ_INSTANCE0 + i,
@@ -1300,35 +1340,68 @@ static int sdma_v5_2_sw_init(void *handle)
return r;
}
+ adev->sdma.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(5, 2, 0):
+ case IP_VERSION(5, 2, 2):
+ case IP_VERSION(5, 2, 3):
+ case IP_VERSION(5, 2, 4):
+ if ((adev->sdma.instance[0].fw_version >= 76) &&
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ case IP_VERSION(5, 2, 5):
+ if ((adev->sdma.instance[0].fw_version >= 34) &&
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ default:
+ break;
+ }
+
+ /* Allocate memory for SDMA IP Dump buffer */
+ ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (ptr)
+ adev->sdma.ip_dump = ptr;
+ else
+ DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
+
+ r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
return r;
}
-static int sdma_v5_2_sw_fini(void *handle)
+static int sdma_v5_2_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
- sdma_v5_2_destroy_inst_ctx(adev);
+ amdgpu_sdma_sysfs_reset_mask_fini(adev);
+ amdgpu_sdma_destroy_inst_ctx(adev, true);
+
+ kfree(adev->sdma.ip_dump);
return 0;
}
-static int sdma_v5_2_hw_init(void *handle)
+static int sdma_v5_2_hw_init(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- r = sdma_v5_2_start(adev);
-
- return r;
+ return sdma_v5_2_start(adev);
}
-static int sdma_v5_2_hw_fini(void *handle)
+static int sdma_v5_2_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1339,23 +1412,19 @@ static int sdma_v5_2_hw_fini(void *handle)
return 0;
}
-static int sdma_v5_2_suspend(void *handle)
+static int sdma_v5_2_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v5_2_hw_fini(adev);
+ return sdma_v5_2_hw_fini(ip_block);
}
-static int sdma_v5_2_resume(void *handle)
+static int sdma_v5_2_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v5_2_hw_init(adev);
+ return sdma_v5_2_hw_init(ip_block);
}
-static bool sdma_v5_2_is_idle(void *handle)
+static bool sdma_v5_2_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 i;
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -1368,11 +1437,11 @@ static bool sdma_v5_2_is_idle(void *handle)
return true;
}
-static int sdma_v5_2_wait_for_idle(void *handle)
+static int sdma_v5_2_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 sdma0, sdma1, sdma2, sdma3;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
sdma0 = RREG32(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG));
@@ -1387,6 +1456,100 @@ static int sdma_v5_2_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
+static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ if (ring->me >= adev->sdma.num_instances) {
+ dev_err(adev->dev, "sdma instance not found\n");
+ return -EINVAL;
+ }
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ amdgpu_amdkfd_suspend(adev, true);
+ r = amdgpu_sdma_reset_engine(adev, ring->me, true);
+ amdgpu_amdkfd_resume(adev, true);
+ if (r)
+ return r;
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static int sdma_v5_2_stop_queue(struct amdgpu_ring *ring)
+{
+ u32 f32_cntl, freeze, cntl, stat1_reg;
+ struct amdgpu_device *adev = ring->adev;
+ int i, j, r = 0;
+
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
+ i = ring->me;
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ /* stop queue */
+ sdma_v5_2_gfx_stop(adev, 1 << i);
+
+ /*engine stop SDMA1_F32_CNTL.HALT to 1 and SDMAx_FREEZE freeze bit to 1 */
+ freeze = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE));
+ freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 1);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE), freeze);
+
+ for (j = 0; j < adev->usec_timeout; j++) {
+ freeze = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE));
+
+ if (REG_GET_FIELD(freeze, SDMA0_FREEZE, FROZEN) & 1)
+ break;
+ udelay(1);
+ }
+
+
+ if (j == adev->usec_timeout) {
+ stat1_reg = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_STATUS1_REG));
+ if ((stat1_reg & 0x3FF) != 0x3FF) {
+ DRM_ERROR("cannot soft reset as sdma not idle\n");
+ r = -ETIMEDOUT;
+ goto err0;
+ }
+ }
+
+ f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
+
+ cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ cntl = REG_SET_FIELD(cntl, SDMA0_CNTL, UTC_L1_ENABLE, 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), cntl);
+
+err0:
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ return r;
+}
+
+static int sdma_v5_2_restore_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 inst_id = ring->me;
+ u32 freeze;
+ int r;
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ /* unfreeze and unhalt */
+ freeze = RREG32(sdma_v5_2_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE));
+ freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE), freeze);
+
+ r = sdma_v5_2_gfx_resume_instance(adev, inst_id, true);
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+ return r;
+}
+
static int sdma_v5_2_ring_preempt_ib(struct amdgpu_ring *ring)
{
int i, r = 0;
@@ -1438,13 +1601,14 @@ static int sdma_v5_2_set_trap_irq_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state)
{
u32 sdma_cntl;
-
u32 reg_offset = sdma_v5_2_get_reg_offset(adev, type, mmSDMA0_CNTL);
- sdma_cntl = RREG32(reg_offset);
- sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
- state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
- WREG32(reg_offset, sdma_cntl);
+ if (!amdgpu_sriov_vf(adev)) {
+ sdma_cntl = RREG32(reg_offset);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32(reg_offset, sdma_cntl);
+ }
return 0;
}
@@ -1453,7 +1617,25 @@ static int sdma_v5_2_process_trap_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
+ uint32_t mes_queue_id = entry->src_data[0];
+
DRM_DEBUG("IH: SDMA trap\n");
+
+ if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
+ struct amdgpu_mes_queue *queue;
+
+ mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
+
+ spin_lock(&adev->mes.queue_id_lock);
+ queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
+ if (queue) {
+ DRM_DEBUG("process smda queue id = %d\n", mes_queue_id);
+ amdgpu_fence_process(queue->ring);
+ }
+ spin_unlock(&adev->mes.queue_id_lock);
+ return 0;
+ }
+
switch (entry->client_id) {
case SOC15_IH_CLIENTID_SDMA0:
switch (entry->ring_id) {
@@ -1530,6 +1712,30 @@ static int sdma_v5_2_process_illegal_inst_irq(struct amdgpu_device *adev,
return 0;
}
+static bool sdma_v5_2_firmware_mgcg_support(struct amdgpu_device *adev,
+ int i)
+{
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(5, 2, 1):
+ if (adev->sdma.instance[i].fw_version < 70)
+ return false;
+ break;
+ case IP_VERSION(5, 2, 3):
+ if (adev->sdma.instance[i].fw_version < 47)
+ return false;
+ break;
+ case IP_VERSION(5, 2, 7):
+ if (adev->sdma.instance[i].fw_version < 9)
+ return false;
+ break;
+ default:
+ return true;
+ }
+
+ return true;
+
+}
+
static void sdma_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *adev,
bool enable)
{
@@ -1538,7 +1744,7 @@ static void sdma_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *ade
for (i = 0; i < adev->sdma.num_instances; i++) {
- if (adev->sdma.instance[i].fw_version < 70 && adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(5, 2, 1))
+ if (!sdma_v5_2_firmware_mgcg_support(adev, i))
adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_MGCG;
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
@@ -1574,8 +1780,9 @@ static void sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device *adev
int i;
for (i = 0; i < adev->sdma.num_instances; i++) {
-
- if (adev->sdma.instance[i].fw_version < 70 && adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(5, 2, 1))
+ if (adev->sdma.instance[i].fw_version < 70 &&
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(5, 2, 1))
adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_LS;
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
@@ -1596,21 +1803,23 @@ static void sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device *adev
}
}
-static int sdma_v5_2_set_clockgating_state(void *handle,
+static int sdma_v5_2_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
case IP_VERSION(5, 2, 0):
case IP_VERSION(5, 2, 2):
case IP_VERSION(5, 2, 1):
case IP_VERSION(5, 2, 4):
case IP_VERSION(5, 2, 5):
+ case IP_VERSION(5, 2, 6):
case IP_VERSION(5, 2, 3):
+ case IP_VERSION(5, 2, 7):
sdma_v5_2_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
sdma_v5_2_update_medium_grain_light_sleep(adev,
@@ -1623,30 +1832,106 @@ static int sdma_v5_2_set_clockgating_state(void *handle,
return 0;
}
-static int sdma_v5_2_set_powergating_state(void *handle,
+static int sdma_v5_2_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void sdma_v5_2_get_clockgating_state(void *handle, u32 *flags)
+static void sdma_v5_2_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
*flags = 0;
+ /* AMD_CG_SUPPORT_SDMA_MGCG */
+ data = RREG32(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_CLK_CTRL));
+ if (!(data & SDMA0_CLK_CTRL__CGCG_EN_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_SDMA_MGCG;
+
/* AMD_CG_SUPPORT_SDMA_LS */
data = RREG32_KIQ(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_POWER_CNTL));
if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK)
*flags |= AMD_CG_SUPPORT_SDMA_LS;
}
-const struct amd_ip_funcs sdma_v5_2_ip_funcs = {
+static void sdma_v5_2_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* SDMA 5.2.3 (RMB) FW doesn't seem to properly
+ * disallow GFXOFF in some cases leading to
+ * hangs in SDMA. Disallow GFXOFF while SDMA is active.
+ * We can probably just limit this to 5.2.3,
+ * but it shouldn't hurt for other parts since
+ * this GFXOFF will be disallowed anyway when SDMA is
+ * active, this just makes it explicit.
+ * sdma_v5_2_ring_set_wptr() takes advantage of this
+ * to update the wptr because sometimes SDMA seems to miss
+ * doorbells when entering PG. If you remove this, update
+ * sdma_v5_2_ring_set_wptr() as well!
+ */
+ amdgpu_gfx_off_ctrl(adev, false);
+}
+
+static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* SDMA 5.2.3 (RMB) FW doesn't seem to properly
+ * disallow GFXOFF in some cases leading to
+ * hangs in SDMA. Allow GFXOFF when SDMA is complete.
+ */
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static void sdma_v5_2_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2);
+ uint32_t instance_offset;
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ drm_printf(p, "\nInstance:%d\n", i);
+
+ for (j = 0; j < reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_5_2[j].reg_name,
+ adev->sdma.ip_dump[instance_offset + j]);
+ }
+}
+
+static void sdma_v5_2_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t instance_offset;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2);
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ for (j = 0; j < reg_count; j++)
+ adev->sdma.ip_dump[instance_offset + j] =
+ RREG32(sdma_v5_2_get_reg_offset(adev, i,
+ sdma_reg_list_5_2[j].reg_offset));
+ }
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static const struct amd_ip_funcs sdma_v5_2_ip_funcs = {
.name = "sdma_v5_2",
.early_init = sdma_v5_2_early_init,
- .late_init = NULL,
.sw_init = sdma_v5_2_sw_init,
.sw_fini = sdma_v5_2_sw_fini,
.hw_init = sdma_v5_2_hw_init,
@@ -1659,6 +1944,8 @@ const struct amd_ip_funcs sdma_v5_2_ip_funcs = {
.set_clockgating_state = sdma_v5_2_set_clockgating_state,
.set_powergating_state = sdma_v5_2_set_powergating_state,
.get_clockgating_state = sdma_v5_2_get_clockgating_state,
+ .dump_ip_state = sdma_v5_2_dump_ip_state,
+ .print_ip_state = sdma_v5_2_print_ip_state,
};
static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = {
@@ -1666,7 +1953,7 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = {
.align_mask = 0xf,
.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_GFXHUB_0,
+ .secure_submission_supported = true,
.get_rptr = sdma_v5_2_ring_get_rptr,
.get_wptr = sdma_v5_2_ring_get_wptr,
.set_wptr = sdma_v5_2_ring_set_wptr,
@@ -1690,12 +1977,14 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = {
.test_ib = sdma_v5_2_ring_test_ib,
.insert_nop = sdma_v5_2_ring_insert_nop,
.pad_ib = sdma_v5_2_ring_pad_ib,
+ .begin_use = sdma_v5_2_ring_begin_use,
+ .end_use = sdma_v5_2_ring_end_use,
.emit_wreg = sdma_v5_2_ring_emit_wreg,
.emit_reg_wait = sdma_v5_2_ring_emit_reg_wait,
.emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait,
.init_cond_exec = sdma_v5_2_ring_init_cond_exec,
- .patch_cond_exec = sdma_v5_2_ring_patch_cond_exec,
.preempt_ib = sdma_v5_2_ring_preempt_ib,
+ .reset = sdma_v5_2_reset_queue,
};
static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev)
@@ -1732,7 +2021,7 @@ static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: if a secure copy should be used
+ * @copy_flags: copy flags for the buffers
*
* Copy GPU buffers using the DMA engine.
* Used by the amdgpu ttm implementation to move pages if
@@ -1742,11 +2031,11 @@ static void sdma_v5_2_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
- SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0);
ib->ptr[ib->length_dw++] = byte_count - 1;
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h
index b70414fef2a1..863145b3a77e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h
@@ -24,7 +24,6 @@
#ifndef __SDMA_V5_2_H__
#define __SDMA_V5_2_H__
-extern const struct amd_ip_funcs sdma_v5_2_ip_funcs;
extern const struct amdgpu_ip_block_version sdma_v5_2_ip_block;
#endif /* __SDMA_V5_2_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
new file mode 100644
index 000000000000..217040044987
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -0,0 +1,1917 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_trace.h"
+
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "gc/gc_11_0_0_default.h"
+#include "hdp/hdp_6_0_0_offset.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
+
+#include "soc15_common.h"
+#include "soc15.h"
+#include "sdma_v6_0_0_pkt_open.h"
+#include "nbio_v4_3.h"
+#include "sdma_common.h"
+#include "sdma_v6_0.h"
+#include "v11_structs.h"
+#include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
+
+MODULE_FIRMWARE("amdgpu/sdma_6_0_0.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_0_1.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_0_2.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_0_3.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_1_0.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_1_1.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_1_2.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_1_3.bin");
+
+#define SDMA1_REG_OFFSET 0x600
+#define SDMA0_HYP_DEC_REG_START 0x5880
+#define SDMA0_HYP_DEC_REG_END 0x589a
+#define SDMA1_HYP_DEC_REG_OFFSET 0x20
+
+static const struct amdgpu_hwip_reg_entry sdma_reg_list_6_0[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS1_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS2_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS3_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS4_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS5_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS6_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UCODE_CHECKSUM),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_INT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_CHICKEN_BITS),
+};
+
+static void sdma_v6_0_set_ring_funcs(struct amdgpu_device *adev);
+static void sdma_v6_0_set_buffer_funcs(struct amdgpu_device *adev);
+static void sdma_v6_0_set_vm_pte_funcs(struct amdgpu_device *adev);
+static void sdma_v6_0_set_irq_funcs(struct amdgpu_device *adev);
+static int sdma_v6_0_start(struct amdgpu_device *adev);
+
+static u32 sdma_v6_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
+{
+ u32 base;
+
+ if (internal_offset >= SDMA0_HYP_DEC_REG_START &&
+ internal_offset <= SDMA0_HYP_DEC_REG_END) {
+ base = adev->reg_offset[GC_HWIP][0][1];
+ if (instance != 0)
+ internal_offset += SDMA1_HYP_DEC_REG_OFFSET * instance;
+ } else {
+ base = adev->reg_offset[GC_HWIP][0][0];
+ if (instance == 1)
+ internal_offset += SDMA1_REG_OFFSET;
+ }
+
+ return base + internal_offset;
+}
+
+static unsigned sdma_v6_0_ring_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
+{
+ unsigned ret;
+
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COND_EXE));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, 1);
+ /* this is the offset we need patch later */
+ ret = ring->wptr & ring->buf_mask;
+ /* insert dummy here and patch it later */
+ amdgpu_ring_write(ring, 0);
+
+ return ret;
+}
+
+/**
+ * sdma_v6_0_ring_get_rptr - get the current read pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current rptr from the hardware.
+ */
+static uint64_t sdma_v6_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ u64 *rptr;
+
+ /* XXX check if swapping is necessary on BE */
+ rptr = (u64 *)ring->rptr_cpu_addr;
+
+ DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
+ return ((*rptr) >> 2);
+}
+
+/**
+ * sdma_v6_0_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware.
+ */
+static uint64_t sdma_v6_0_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ u64 wptr = 0;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
+ DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
+ }
+
+ return wptr >> 2;
+}
+
+/**
+ * sdma_v6_0_ring_set_wptr - commit the write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Write the wptr back to the hardware.
+ */
+static void sdma_v6_0_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ DRM_DEBUG("Using doorbell -- "
+ "wptr_offs == 0x%08x "
+ "lower_32_bits(ring->wptr) << 2 == 0x%08x "
+ "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr << 2);
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ } else {
+ DRM_DEBUG("Not using doorbell -- "
+ "regSDMA%i_GFX_RB_WPTR == 0x%08x "
+ "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
+ ring->me,
+ lower_32_bits(ring->wptr << 2),
+ ring->me,
+ upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev,
+ ring->me, regSDMA0_QUEUE0_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev,
+ ring->me, regSDMA0_QUEUE0_RB_WPTR_HI),
+ upper_32_bits(ring->wptr << 2));
+ }
+}
+
+static void sdma_v6_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ int i;
+
+ for (i = 0; i < count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ amdgpu_ring_write(ring, ring->funcs->nop |
+ SDMA_PKT_NOP_HEADER_COUNT(count - 1));
+ else
+ amdgpu_ring_write(ring, ring->funcs->nop);
+}
+
+/*
+ * sdma_v6_0_ring_emit_ib - Schedule an IB on the DMA engine
+ *
+ * @ring: amdgpu ring pointer
+ * @ib: IB object to schedule
+ * @flags: unused
+ * @job: job to retrieve vmid from
+ *
+ * Schedule an IB in the DMA ring.
+ */
+static void sdma_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
+
+ /* An IB packet must end on a 8 DW boundary--the next dword
+ * must be on a 8-dword boundary. Our IB packet below is 6
+ * dwords long, thus add x number of NOPs, such that, in
+ * modular arithmetic,
+ * wptr + 6 + x = 8k, k >= 0, which in C is,
+ * (wptr + 6 + x) % 8 = 0.
+ * The expression below, is a solution of x.
+ */
+ sdma_v6_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
+
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_INDIRECT) |
+ SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
+ /* base must be 32 byte aligned */
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+ amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
+ amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
+}
+
+/**
+ * sdma_v6_0_ring_emit_mem_sync - flush the IB by graphics cache rinse
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * flush the IB by graphics cache rinse.
+ */
+static void sdma_v6_0_ring_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ uint32_t gcr_cntl = SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV |
+ SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV |
+ SDMA_GCR_GLI_INV(1);
+
+ /* flush entire cache L0/L1/L2, this can be optimized by performance requirement */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_GCR_REQ));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(gcr_cntl) |
+ SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(0));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(0) |
+ SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(gcr_cntl >> 16));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(0) |
+ SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(0));
+}
+
+
+/**
+ * sdma_v6_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Emit an hdp flush packet on the requested DMA ring.
+ */
+static void sdma_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 ref_and_mask = 0;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
+
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
+ amdgpu_ring_write(ring, ref_and_mask); /* reference */
+ amdgpu_ring_write(ring, ref_and_mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+}
+
+/**
+ * sdma_v6_0_ring_emit_fence - emit a fence on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ * @addr: address
+ * @seq: fence seq number
+ * @flags: fence flags
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed.
+ */
+static void sdma_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ /* write the fence */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
+ SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ /* optionally write high bits as well */
+ if (write64bit) {
+ addr += 4;
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
+ SDMA_PKT_FENCE_HEADER_MTYPE(0x3));
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ }
+
+ if (flags & AMDGPU_FENCE_FLAG_INT) {
+ /* generate an interrupt */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP));
+ amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
+ }
+}
+
+/**
+ * sdma_v6_0_gfx_stop - stop the gfx async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the gfx async dma ring buffers.
+ */
+static void sdma_v6_0_gfx_stop(struct amdgpu_device *adev)
+{
+ u32 rb_cntl, ib_cntl;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 0);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 0);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl);
+ }
+}
+
+/**
+ * sdma_v6_0_rlc_stop - stop the compute async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the compute async dma queues.
+ */
+static void sdma_v6_0_rlc_stop(struct amdgpu_device *adev)
+{
+ /* XXX todo */
+}
+
+/**
+ * sdma_v6_0_ctxempty_int_enable - enable or disable context empty interrupts
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable context switching due to queue empty conditions
+ *
+ * Enable or disable the async dma engines queue empty context switch.
+ */
+static void sdma_v6_0_ctxempty_int_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 f32_cntl;
+ int i;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ f32_cntl = RREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+ CTXEMPTY_INT_ENABLE, enable ? 1 : 0);
+ WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_CNTL), f32_cntl);
+ }
+ }
+}
+
+/**
+ * sdma_v6_0_enable - stop the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs.
+ *
+ * Halt or unhalt the async dma engines.
+ */
+static void sdma_v6_0_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 f32_cntl;
+ int i;
+
+ if (!enable) {
+ sdma_v6_0_gfx_stop(adev);
+ sdma_v6_0_rlc_stop(adev);
+ }
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ f32_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL), f32_cntl);
+ }
+}
+
+/**
+ * sdma_v6_0_gfx_resume_instance - start/restart a certain sdma engine
+ *
+ * @adev: amdgpu_device pointer
+ * @i: instance
+ * @restore: used to restore wptr when restart
+ *
+ * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr.
+ * Return 0 for success.
+ */
+static int sdma_v6_0_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore)
+{
+ struct amdgpu_ring *ring;
+ u32 rb_cntl, ib_cntl;
+ u32 rb_bufsz;
+ u32 doorbell;
+ u32 doorbell_offset;
+ u32 temp;
+ u64 wptr_gpu_addr;
+
+ ring = &adev->sdma.instance[i].ring;
+ if (!amdgpu_sriov_vf(adev))
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
+
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz);
+#ifdef __BIG_ENDIAN
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
+#endif
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_PRIV, 1);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ if (restore) {
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ } else {
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), 0);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), 0);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), 0);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), 0);
+ }
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_LO),
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_HI),
+ upper_32_bits(wptr_gpu_addr));
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_HI),
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_LO),
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, F32_WPTR_POLL_ENABLE, 1);
+
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40);
+
+ if (!restore)
+ ring->wptr = 0;
+
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 1);
+
+ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
+ }
+
+ doorbell = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL));
+ doorbell_offset = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET));
+
+ if (ring->use_doorbell) {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 1);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_QUEUE0_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ } else {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 0);
+ }
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL), doorbell);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET), doorbell_offset);
+
+ if (i == 0)
+ adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+ ring->doorbell_index,
+ adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances);
+
+ if (amdgpu_sriov_vf(adev))
+ sdma_v6_0_ring_set_wptr(ring);
+
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0);
+
+ /* Set up sdma hang watchdog */
+ temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL));
+ /* 100ms per unit */
+ temp = REG_SET_FIELD(temp, SDMA0_WATCHDOG_CNTL, QUEUE_HANG_COUNT,
+ max(adev->usec_timeout/100000, 1));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL), temp);
+
+ /* Set up RESP_MODE to non-copy addresses */
+ temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL), temp);
+
+ /* program default cache read and write policy */
+ temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE));
+ /* clean read policy and write policy bits */
+ temp &= 0xFF0FFF;
+ temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
+ (CACHE_WRITE_POLICY_L2__DEFAULT << 14) |
+ SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE), temp);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* unhalt engine */
+ temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, TH1_RESET, 0);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL), temp);
+ }
+
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 1);
+#ifdef __BIG_ENDIAN
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1);
+#endif
+ /* enable DMA IBs */
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl);
+
+ if (amdgpu_sriov_vf(adev))
+ sdma_v6_0_enable(adev, true);
+
+ return amdgpu_ring_test_helper(ring);
+}
+
+/**
+ * sdma_v6_0_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = sdma_v6_0_gfx_resume_instance(adev, i, false);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/**
+ * sdma_v6_0_rlc_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the compute DMA queues and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v6_0_rlc_resume(struct amdgpu_device *adev)
+{
+ return 0;
+}
+
+/**
+ * sdma_v6_0_load_microcode - load the sDMA ME ucode
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Loads the sDMA0/1 ucode.
+ * Returns 0 for success, -EINVAL if the ucode is not available.
+ */
+static int sdma_v6_0_load_microcode(struct amdgpu_device *adev)
+{
+ const struct sdma_firmware_header_v2_0 *hdr;
+ const __le32 *fw_data;
+ u32 fw_size;
+ int i, j;
+ bool use_broadcast;
+
+ /* halt the MEs */
+ sdma_v6_0_enable(adev, false);
+
+ if (!adev->sdma.instance[0].fw)
+ return -EINVAL;
+
+ /* use broadcast mode to load SDMA microcode by default */
+ use_broadcast = true;
+
+ if (use_broadcast) {
+ dev_info(adev->dev, "Use broadcast method to load SDMA firmware\n");
+ /* load Control Thread microcode */
+ hdr = (const struct sdma_firmware_header_v2_0 *)adev->sdma.instance[0].fw->data;
+ amdgpu_ucode_print_sdma_hdr(&hdr->header);
+ fw_size = le32_to_cpu(hdr->ctx_jt_offset + hdr->ctx_jt_size) / 4;
+
+ fw_data = (const __le32 *)
+ (adev->sdma.instance[0].fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+
+ WREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_BROADCAST_UCODE_ADDR), 0);
+
+ for (j = 0; j < fw_size; j++) {
+ if (amdgpu_emu_mode == 1 && j % 500 == 0)
+ msleep(1);
+ WREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_BROADCAST_UCODE_DATA), le32_to_cpup(fw_data++));
+ }
+
+ /* load Context Switch microcode */
+ fw_size = le32_to_cpu(hdr->ctl_jt_offset + hdr->ctl_jt_size) / 4;
+
+ fw_data = (const __le32 *)
+ (adev->sdma.instance[0].fw->data +
+ le32_to_cpu(hdr->ctl_ucode_offset));
+
+ WREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_BROADCAST_UCODE_ADDR), 0x8000);
+
+ for (j = 0; j < fw_size; j++) {
+ if (amdgpu_emu_mode == 1 && j % 500 == 0)
+ msleep(1);
+ WREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_BROADCAST_UCODE_DATA), le32_to_cpup(fw_data++));
+ }
+ } else {
+ dev_info(adev->dev, "Use legacy method to load SDMA firmware\n");
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ /* load Control Thread microcode */
+ hdr = (const struct sdma_firmware_header_v2_0 *)adev->sdma.instance[0].fw->data;
+ amdgpu_ucode_print_sdma_hdr(&hdr->header);
+ fw_size = le32_to_cpu(hdr->ctx_jt_offset + hdr->ctx_jt_size) / 4;
+
+ fw_data = (const __le32 *)
+ (adev->sdma.instance[0].fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+
+ WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_ADDR), 0);
+
+ for (j = 0; j < fw_size; j++) {
+ if (amdgpu_emu_mode == 1 && j % 500 == 0)
+ msleep(1);
+ WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
+ }
+
+ WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_ADDR), adev->sdma.instance[0].fw_version);
+
+ /* load Context Switch microcode */
+ fw_size = le32_to_cpu(hdr->ctl_jt_offset + hdr->ctl_jt_size) / 4;
+
+ fw_data = (const __le32 *)
+ (adev->sdma.instance[0].fw->data +
+ le32_to_cpu(hdr->ctl_ucode_offset));
+
+ WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_ADDR), 0x8000);
+
+ for (j = 0; j < fw_size; j++) {
+ if (amdgpu_emu_mode == 1 && j % 500 == 0)
+ msleep(1);
+ WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
+ }
+
+ WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_ADDR), adev->sdma.instance[0].fw_version);
+ }
+ }
+
+ return 0;
+}
+
+static int sdma_v6_0_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ u32 tmp;
+ int i;
+
+ sdma_v6_0_gfx_stop(adev);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ tmp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_FREEZE));
+ tmp |= SDMA0_FREEZE__FREEZE_MASK;
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_FREEZE), tmp);
+ tmp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL));
+ tmp |= SDMA0_F32_CNTL__HALT_MASK;
+ tmp |= SDMA0_F32_CNTL__TH1_RESET_MASK;
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL), tmp);
+
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_PREEMPT), 0);
+
+ udelay(100);
+
+ tmp = GRBM_SOFT_RESET__SOFT_RESET_SDMA0_MASK << i;
+ WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
+
+ udelay(100);
+
+ WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, 0);
+ tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
+
+ udelay(100);
+ }
+
+ return sdma_v6_0_start(adev);
+}
+
+static bool sdma_v6_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, r;
+ long tmo = msecs_to_jiffies(1000);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ r = amdgpu_ring_test_ib(ring, tmo);
+ if (r)
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * sdma_v6_0_start - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the DMA engines and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v6_0_start(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (amdgpu_sriov_vf(adev)) {
+ sdma_v6_0_enable(adev, false);
+
+ /* set RB registers */
+ r = sdma_v6_0_gfx_resume(adev);
+ return r;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ r = sdma_v6_0_load_microcode(adev);
+ if (r)
+ return r;
+
+ /* The value of regSDMA_F32_CNTL is invalid the moment after loading fw */
+ if (amdgpu_emu_mode == 1)
+ msleep(1000);
+ }
+
+ /* unhalt the MEs */
+ sdma_v6_0_enable(adev, true);
+ /* enable sdma ring preemption */
+ sdma_v6_0_ctxempty_int_enable(adev, true);
+
+ /* start the gfx rings and rlc compute queues */
+ r = sdma_v6_0_gfx_resume(adev);
+ if (r)
+ return r;
+ r = sdma_v6_0_rlc_resume(adev);
+
+ return r;
+}
+
+static int sdma_v6_0_mqd_init(struct amdgpu_device *adev, void *mqd,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v11_sdma_mqd *m = mqd;
+ uint64_t wb_gpu_addr;
+
+ m->sdmax_rlcx_rb_cntl =
+ order_base_2(prop->queue_size / 4) << SDMA0_QUEUE0_RB_CNTL__RB_SIZE__SHIFT |
+ 1 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+ 4 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
+ 1 << SDMA0_QUEUE0_RB_CNTL__F32_WPTR_POLL_ENABLE__SHIFT;
+
+ m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8);
+ m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8);
+
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr);
+ m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr);
+
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr);
+ m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr);
+
+ m->sdmax_rlcx_ib_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, 0,
+ regSDMA0_QUEUE0_IB_CNTL));
+
+ m->sdmax_rlcx_doorbell_offset =
+ prop->doorbell_index << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_QUEUE0_DOORBELL, ENABLE, 1);
+
+ m->sdmax_rlcx_skip_cntl = 0;
+ m->sdmax_rlcx_context_status = 0;
+ m->sdmax_rlcx_doorbell_log = 0;
+
+ m->sdmax_rlcx_rb_aql_cntl = regSDMA0_QUEUE0_RB_AQL_CNTL_DEFAULT;
+ m->sdmax_rlcx_dummy_reg = regSDMA0_QUEUE0_DUMMY_REG_DEFAULT;
+
+ m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr);
+ m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr);
+
+ m->sdmax_rlcx_f32_dbg0 = lower_32_bits(prop->fence_address);
+ m->sdmax_rlcx_f32_dbg1 = upper_32_bits(prop->fence_address);
+
+ return 0;
+}
+
+static void sdma_v6_0_set_mqd_funcs(struct amdgpu_device *adev)
+{
+ adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v11_sdma_mqd);
+ adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v6_0_mqd_init;
+}
+
+/**
+ * sdma_v6_0_ring_test_ring - simple async dma engine test
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Test the DMA engine by writing using it to write an
+ * value to memory.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned i;
+ unsigned index;
+ int r;
+ u32 tmp;
+ u64 gpu_addr;
+
+ tmp = 0xCAFEDEAD;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+ return r;
+ }
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
+ r = amdgpu_ring_alloc(ring, 5);
+ if (r) {
+ DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
+ amdgpu_device_wb_free(adev, index);
+ return r;
+ }
+
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
+ amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ break;
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ else
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ amdgpu_device_wb_free(adev, index);
+
+ return r;
+}
+
+/*
+ * sdma_v6_0_ring_test_ib - test an IB on the DMA engine
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Test a simple IB in the DMA ring.
+ * Returns 0 on success, error on failure.
+ */
+static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ unsigned index;
+ long r;
+ u32 tmp = 0;
+ u64 gpu_addr;
+
+ tmp = 0xCAFEDEAD;
+ memset(&ib, 0, sizeof(ib));
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
+ return r;
+ }
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
+ r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err0;
+ }
+
+ ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib.ptr[1] = lower_32_bits(gpu_addr);
+ ib.ptr[2] = upper_32_bits(gpu_addr);
+ ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.length_dw = 8;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err1;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ DRM_ERROR("amdgpu: IB test timed out\n");
+ r = -ETIMEDOUT;
+ goto err1;
+ } else if (r < 0) {
+ DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+ goto err1;
+ }
+
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+
+ if (tmp == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+
+err1:
+ amdgpu_ib_free(&ib, NULL);
+ dma_fence_put(f);
+err0:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+
+/**
+ * sdma_v6_0_vm_copy_pte - update PTEs by copying them from the GART
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @src: src addr to copy from
+ * @count: number of page entries to update
+ *
+ * Update PTEs by copying them from the GART using sDMA.
+ */
+static void sdma_v6_0_vm_copy_pte(struct amdgpu_ib *ib,
+ uint64_t pe, uint64_t src,
+ unsigned count)
+{
+ unsigned bytes = count * 8;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ ib->ptr[ib->length_dw++] = bytes - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+
+}
+
+/**
+ * sdma_v6_0_vm_write_pte - update PTEs by writing them manually
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @value: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ *
+ * Update PTEs by writing them manually using sDMA.
+ */
+static void sdma_v6_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t value, unsigned count,
+ uint32_t incr)
+{
+ unsigned ndw = count * 2;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = ndw - 1;
+ for (; ndw > 0; ndw -= 2) {
+ ib->ptr[ib->length_dw++] = lower_32_bits(value);
+ ib->ptr[ib->length_dw++] = upper_32_bits(value);
+ value += incr;
+ }
+}
+
+/**
+ * sdma_v6_0_vm_set_pte_pde - update the page tables using sDMA
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: access flags
+ *
+ * Update the page tables using sDMA.
+ */
+static void sdma_v6_0_vm_set_pte_pde(struct amdgpu_ib *ib,
+ uint64_t pe,
+ uint64_t addr, unsigned count,
+ uint32_t incr, uint64_t flags)
+{
+ /* for physically contiguous pages (vram) */
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_PTEPDE);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
+ ib->ptr[ib->length_dw++] = upper_32_bits(flags);
+ ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = incr; /* increment size */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
+}
+
+/*
+ * sdma_v6_0_ring_pad_ib - pad the IB
+ * @ib: indirect buffer to fill with padding
+ * @ring: amdgpu ring pointer
+ *
+ * Pad the IB with NOPs to a boundary multiple of 8.
+ */
+static void sdma_v6_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ u32 pad_count;
+ int i;
+
+ pad_count = (-ib->length_dw) & 0x7;
+ for (i = 0; i < pad_count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP) |
+ SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
+ else
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP);
+}
+
+/**
+ * sdma_v6_0_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void sdma_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ /* wait for idle */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+ SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ amdgpu_ring_write(ring, seq); /* reference */
+ amdgpu_ring_write(ring, 0xffffffff); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
+}
+
+/*
+ * sdma_v6_0_ring_emit_vm_flush - vm flush using sDMA
+ *
+ * @ring: amdgpu_ring pointer
+ * @vmid: vmid number to use
+ * @pd_addr: address
+ *
+ * Update the page table base and flush the VM TLB
+ * using sDMA.
+ */
+static void sdma_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0);
+
+ /* Update the PD address for this VMID. */
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
+ (hub->ctx_addr_distance * vmid),
+ lower_32_bits(pd_addr));
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
+ (hub->ctx_addr_distance * vmid),
+ upper_32_bits(pd_addr));
+
+ /* Trigger invalidation. */
+ amdgpu_ring_write(ring,
+ SDMA_PKT_VM_INVALIDATION_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(SDMA_SUBOP_VM_INVALIDATION) |
+ SDMA_PKT_VM_INVALIDATION_HEADER_GFX_ENG_ID(ring->vm_inv_eng) |
+ SDMA_PKT_VM_INVALIDATION_HEADER_MM_ENG_ID(0x1f));
+ amdgpu_ring_write(ring, req);
+ amdgpu_ring_write(ring, 0xFFFFFFFF);
+ amdgpu_ring_write(ring,
+ SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(1 << vmid) |
+ SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(0x1F));
+}
+
+static void sdma_v6_0_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+ SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, val);
+}
+
+static void sdma_v6_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val); /* reference */
+ amdgpu_ring_write(ring, mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
+}
+
+static void sdma_v6_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ amdgpu_ring_emit_wreg(ring, reg0, ref);
+ /* wait for a cycle to reset vm_inv_eng*_ack */
+ amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
+ amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
+}
+
+static struct amdgpu_sdma_ras sdma_v6_0_3_ras = {
+ .ras_block = {
+ .ras_late_init = amdgpu_ras_block_late_init,
+ },
+};
+
+static void sdma_v6_0_set_ras_funcs(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(6, 0, 3):
+ adev->sdma.ras = &sdma_v6_0_3_ras;
+ break;
+ default:
+ break;
+ }
+}
+
+static int sdma_v6_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ switch (amdgpu_user_queue) {
+ case -1:
+ case 0:
+ default:
+ adev->sdma.no_user_submission = false;
+ adev->sdma.disable_uq = true;
+ break;
+ case 1:
+ adev->sdma.no_user_submission = false;
+ adev->sdma.disable_uq = false;
+ break;
+ case 2:
+ adev->sdma.no_user_submission = true;
+ adev->sdma.disable_uq = false;
+ break;
+ }
+
+ r = amdgpu_sdma_init_microcode(adev, 0, true);
+ if (r)
+ return r;
+
+ sdma_v6_0_set_ring_funcs(adev);
+ sdma_v6_0_set_buffer_funcs(adev);
+ sdma_v6_0_set_vm_pte_funcs(adev);
+ sdma_v6_0_set_irq_funcs(adev);
+ sdma_v6_0_set_mqd_funcs(adev);
+ sdma_v6_0_set_ras_funcs(adev);
+
+ return 0;
+}
+
+static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_ring *ring;
+ int r, i;
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_6_0);
+ uint32_t *ptr;
+
+ /* SDMA trap event */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
+ GFX_11_0_0__SRCID__SDMA_TRAP,
+ &adev->sdma.trap_irq);
+ if (r)
+ return r;
+
+ /* SDMA user fence event */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
+ GFX_11_0_0__SRCID__SDMA_FENCE,
+ &adev->sdma.fence_irq);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->me = i;
+ ring->no_user_submission = adev->sdma.no_user_submission;
+
+ DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
+ ring->use_doorbell?"true":"false");
+
+ ring->doorbell_index =
+ (adev->doorbell_index.sdma_engine[i] << 1); // get DWORD offset
+
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ sprintf(ring->name, "sdma%d", i);
+ r = amdgpu_ring_init(adev, ring, 1024,
+ &adev->sdma.trap_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ }
+
+ adev->sdma.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(6, 0, 0):
+ case IP_VERSION(6, 0, 2):
+ case IP_VERSION(6, 0, 3):
+ if ((adev->sdma.instance[0].fw_version >= 21) &&
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ default:
+ break;
+ }
+
+ if (amdgpu_sdma_ras_sw_init(adev)) {
+ dev_err(adev->dev, "Failed to initialize sdma ras block!\n");
+ return -EINVAL;
+ }
+
+ /* Allocate memory for SDMA IP Dump buffer */
+ ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (ptr)
+ adev->sdma.ip_dump = ptr;
+ else
+ DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
+
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(6, 0, 0):
+ if ((adev->sdma.instance[0].fw_version >= 27) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ case IP_VERSION(6, 0, 1):
+ if ((adev->sdma.instance[0].fw_version >= 18) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ case IP_VERSION(6, 0, 2):
+ if ((adev->sdma.instance[0].fw_version >= 23) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ case IP_VERSION(6, 0, 3):
+ if (adev->sdma.instance[0].fw_version >= 29 && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ case IP_VERSION(6, 1, 0):
+ if ((adev->sdma.instance[0].fw_version >= 14) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ case IP_VERSION(6, 1, 1):
+ if ((adev->sdma.instance[0].fw_version >= 17) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ case IP_VERSION(6, 1, 2):
+ if ((adev->sdma.instance[0].fw_version >= 15) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ case IP_VERSION(6, 1, 3):
+ if ((adev->sdma.instance[0].fw_version >= 10) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ default:
+ break;
+ }
+
+ r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
+ return r;
+}
+
+static int sdma_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+
+ amdgpu_sdma_sysfs_reset_mask_fini(adev);
+ amdgpu_sdma_destroy_inst_ctx(adev, true);
+
+ kfree(adev->sdma.ip_dump);
+
+ return 0;
+}
+
+static int sdma_v6_0_set_userq_trap_interrupts(struct amdgpu_device *adev,
+ bool enable)
+{
+ unsigned int irq_type;
+ int i, r;
+
+ if (adev->userq_funcs[AMDGPU_HW_IP_DMA]) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ irq_type = AMDGPU_SDMA_IRQ_INSTANCE0 + i;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->sdma.trap_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->sdma.trap_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int sdma_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = sdma_v6_0_start(adev);
+ if (r)
+ return r;
+
+ return sdma_v6_0_set_userq_trap_interrupts(adev, true);
+}
+
+static int sdma_v6_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ sdma_v6_0_ctxempty_int_enable(adev, false);
+ sdma_v6_0_enable(adev, false);
+ sdma_v6_0_set_userq_trap_interrupts(adev, false);
+
+ return 0;
+}
+
+static int sdma_v6_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return sdma_v6_0_hw_fini(ip_block);
+}
+
+static int sdma_v6_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ return sdma_v6_0_hw_init(ip_block);
+}
+
+static bool sdma_v6_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ u32 i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ u32 tmp = RREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_STATUS_REG));
+
+ if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
+ return false;
+ }
+
+ return true;
+}
+
+static int sdma_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ unsigned i;
+ u32 sdma0, sdma1;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ sdma0 = RREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_STATUS_REG));
+ sdma1 = RREG32(sdma_v6_0_get_reg_offset(adev, 1, regSDMA0_STATUS_REG));
+
+ if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK)
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int sdma_v6_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+ int i, r = 0;
+ struct amdgpu_device *adev = ring->adev;
+ u32 index = 0;
+ u64 sdma_gfx_preempt;
+
+ amdgpu_sdma_get_index_from_ring(ring, &index);
+ sdma_gfx_preempt =
+ sdma_v6_0_get_reg_offset(adev, index, regSDMA0_QUEUE0_PREEMPT);
+
+ /* assert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+ /* emit the trailing fence */
+ ring->trail_seq += 1;
+ amdgpu_ring_alloc(ring, 10);
+ sdma_v6_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
+ ring->trail_seq, 0);
+ amdgpu_ring_commit(ring);
+
+ /* assert IB preemption */
+ WREG32(sdma_gfx_preempt, 1);
+
+ /* poll the trailing fence */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->trail_seq ==
+ le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ r = -EINVAL;
+ DRM_ERROR("ring %d failed to be preempted\n", ring->idx);
+ }
+
+ /* deassert IB preemption */
+ WREG32(sdma_gfx_preempt, 0);
+
+ /* deassert the preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, true);
+ return r;
+}
+
+static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ if (ring->me >= adev->sdma.num_instances) {
+ dev_err(adev->dev, "sdma instance not found\n");
+ return -EINVAL;
+ }
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true);
+ if (r)
+ return r;
+
+ r = sdma_v6_0_gfx_resume_instance(adev, ring->me, true);
+ if (r)
+ return r;
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static int sdma_v6_0_set_trap_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 sdma_cntl;
+
+ u32 reg_offset = sdma_v6_0_get_reg_offset(adev, type, regSDMA0_CNTL);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ sdma_cntl = RREG32(reg_offset);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32(reg_offset, sdma_cntl);
+ }
+
+ return 0;
+}
+
+static int sdma_v6_0_process_trap_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ int instances, queue;
+
+ DRM_DEBUG("IH: SDMA trap\n");
+
+ queue = entry->ring_id & 0xf;
+ instances = (entry->ring_id & 0xf0) >> 4;
+ if (instances > 1) {
+ DRM_ERROR("IH: wrong ring_ID detected, as wrong sdma instance\n");
+ return -EINVAL;
+ }
+
+ switch (entry->client_id) {
+ case SOC21_IH_CLIENTID_GFX:
+ switch (queue) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[instances].ring);
+ break;
+ default:
+ break;
+ }
+ break;
+ }
+ return 0;
+}
+
+static int sdma_v6_0_process_fence_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u32 doorbell_offset = entry->src_data[0];
+
+ if (adev->enable_mes && doorbell_offset) {
+ struct amdgpu_userq_fence_driver *fence_drv = NULL;
+ struct xarray *xa = &adev->userq_xa;
+ unsigned long flags;
+
+ doorbell_offset >>= SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ xa_lock_irqsave(xa, flags);
+ fence_drv = xa_load(xa, doorbell_offset);
+ if (fence_drv)
+ amdgpu_userq_fence_driver_process(fence_drv);
+ xa_unlock_irqrestore(xa, flags);
+ }
+
+ return 0;
+}
+
+static int sdma_v6_0_process_illegal_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ return 0;
+}
+
+static int sdma_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int sdma_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static void sdma_v6_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+}
+
+static void sdma_v6_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_6_0);
+ uint32_t instance_offset;
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ drm_printf(p, "\nInstance:%d\n", i);
+
+ for (j = 0; j < reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_6_0[j].reg_name,
+ adev->sdma.ip_dump[instance_offset + j]);
+ }
+}
+
+static void sdma_v6_0_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t instance_offset;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_6_0);
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ for (j = 0; j < reg_count; j++)
+ adev->sdma.ip_dump[instance_offset + j] =
+ RREG32(sdma_v6_0_get_reg_offset(adev, i,
+ sdma_reg_list_6_0[j].reg_offset));
+ }
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+const struct amd_ip_funcs sdma_v6_0_ip_funcs = {
+ .name = "sdma_v6_0",
+ .early_init = sdma_v6_0_early_init,
+ .sw_init = sdma_v6_0_sw_init,
+ .sw_fini = sdma_v6_0_sw_fini,
+ .hw_init = sdma_v6_0_hw_init,
+ .hw_fini = sdma_v6_0_hw_fini,
+ .suspend = sdma_v6_0_suspend,
+ .resume = sdma_v6_0_resume,
+ .is_idle = sdma_v6_0_is_idle,
+ .wait_for_idle = sdma_v6_0_wait_for_idle,
+ .soft_reset = sdma_v6_0_soft_reset,
+ .check_soft_reset = sdma_v6_0_check_soft_reset,
+ .set_clockgating_state = sdma_v6_0_set_clockgating_state,
+ .set_powergating_state = sdma_v6_0_set_powergating_state,
+ .get_clockgating_state = sdma_v6_0_get_clockgating_state,
+ .dump_ip_state = sdma_v6_0_dump_ip_state,
+ .print_ip_state = sdma_v6_0_print_ip_state,
+};
+
+static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xf,
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
+ .support_64bit_ptrs = true,
+ .secure_submission_supported = true,
+ .get_rptr = sdma_v6_0_ring_get_rptr,
+ .get_wptr = sdma_v6_0_ring_get_wptr,
+ .set_wptr = sdma_v6_0_ring_set_wptr,
+ .emit_frame_size =
+ 5 + /* sdma_v6_0_ring_init_cond_exec */
+ 6 + /* sdma_v6_0_ring_emit_hdp_flush */
+ 6 + /* sdma_v6_0_ring_emit_pipeline_sync */
+ /* sdma_v6_0_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+ 10 + 10 + 10, /* sdma_v6_0_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 5 + 7 + 6, /* sdma_v6_0_ring_emit_ib */
+ .emit_ib = sdma_v6_0_ring_emit_ib,
+ .emit_mem_sync = sdma_v6_0_ring_emit_mem_sync,
+ .emit_fence = sdma_v6_0_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v6_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = sdma_v6_0_ring_emit_vm_flush,
+ .emit_hdp_flush = sdma_v6_0_ring_emit_hdp_flush,
+ .test_ring = sdma_v6_0_ring_test_ring,
+ .test_ib = sdma_v6_0_ring_test_ib,
+ .insert_nop = sdma_v6_0_ring_insert_nop,
+ .pad_ib = sdma_v6_0_ring_pad_ib,
+ .emit_wreg = sdma_v6_0_ring_emit_wreg,
+ .emit_reg_wait = sdma_v6_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = sdma_v6_0_ring_emit_reg_write_reg_wait,
+ .init_cond_exec = sdma_v6_0_ring_init_cond_exec,
+ .preempt_ib = sdma_v6_0_ring_preempt_ib,
+ .reset = sdma_v6_0_reset_queue,
+};
+
+static void sdma_v6_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->sdma.instance[i].ring.funcs = &sdma_v6_0_ring_funcs;
+ adev->sdma.instance[i].ring.me = i;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs sdma_v6_0_trap_irq_funcs = {
+ .set = sdma_v6_0_set_trap_irq_state,
+ .process = sdma_v6_0_process_trap_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v6_0_fence_irq_funcs = {
+ .process = sdma_v6_0_process_fence_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v6_0_illegal_inst_irq_funcs = {
+ .process = sdma_v6_0_process_illegal_inst_irq,
+};
+
+static void sdma_v6_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 +
+ adev->sdma.num_instances;
+ adev->sdma.trap_irq.funcs = &sdma_v6_0_trap_irq_funcs;
+ adev->sdma.fence_irq.funcs = &sdma_v6_0_fence_irq_funcs;
+ adev->sdma.illegal_inst_irq.funcs = &sdma_v6_0_illegal_inst_irq_funcs;
+}
+
+/**
+ * sdma_v6_0_emit_copy_buffer - copy buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to fill with commands
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ * @copy_flags: copy flags for the buffers
+ *
+ * Copy GPU buffers using the DMA engine.
+ * Used by the amdgpu ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+static void sdma_v6_0_emit_copy_buffer(struct amdgpu_ib *ib,
+ uint64_t src_offset,
+ uint64_t dst_offset,
+ uint32_t byte_count,
+ uint32_t copy_flags)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0);
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+}
+
+/**
+ * sdma_v6_0_emit_fill_buffer - fill buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to fill
+ * @src_data: value to write to buffer
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ *
+ * Fill GPU buffers using the DMA engine.
+ */
+static void sdma_v6_0_emit_fill_buffer(struct amdgpu_ib *ib,
+ uint32_t src_data,
+ uint64_t dst_offset,
+ uint32_t byte_count)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_CONSTANT_FILL_HEADER_OP(SDMA_OP_CONST_FILL);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = src_data;
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+}
+
+static const struct amdgpu_buffer_funcs sdma_v6_0_buffer_funcs = {
+ .copy_max_bytes = 0x400000,
+ .copy_num_dw = 7,
+ .emit_copy_buffer = sdma_v6_0_emit_copy_buffer,
+
+ .fill_max_bytes = 0x400000,
+ .fill_num_dw = 5,
+ .emit_fill_buffer = sdma_v6_0_emit_fill_buffer,
+};
+
+static void sdma_v6_0_set_buffer_funcs(struct amdgpu_device *adev)
+{
+ adev->mman.buffer_funcs = &sdma_v6_0_buffer_funcs;
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
+}
+
+static const struct amdgpu_vm_pte_funcs sdma_v6_0_vm_pte_funcs = {
+ .copy_pte_num_dw = 7,
+ .copy_pte = sdma_v6_0_vm_copy_pte,
+ .write_pte = sdma_v6_0_vm_write_pte,
+ .set_pte_pde = sdma_v6_0_vm_set_pte_pde,
+};
+
+static void sdma_v6_0_set_vm_pte_funcs(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ adev->vm_manager.vm_pte_funcs = &sdma_v6_0_vm_pte_funcs;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->vm_manager.vm_pte_scheds[i] =
+ &adev->sdma.instance[i].ring.sched;
+ }
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
+}
+
+const struct amdgpu_ip_block_version sdma_v6_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_SDMA,
+ .major = 6,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &sdma_v6_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.h b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.h
new file mode 100644
index 000000000000..e473ec7dfc8f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SDMA_V6_0_H__
+#define __SDMA_V6_0_H__
+
+extern const struct amd_ip_funcs sdma_v6_0_ip_funcs;
+extern const struct amdgpu_ip_block_version sdma_v6_0_ip_block;
+
+#endif /* __SDMA_V6_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0_0_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0_0_pkt_open.h
new file mode 100644
index 000000000000..d8cf830916b9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0_0_pkt_open.h
@@ -0,0 +1,5672 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SDMA_V6_0_0_PKT_OPEN_H_
+#define __SDMA_V6_0_0_PKT_OPEN_H_
+
+#define SDMA_OP_NOP 0
+#define SDMA_OP_COPY 1
+#define SDMA_OP_WRITE 2
+#define SDMA_OP_INDIRECT 4
+#define SDMA_OP_FENCE 5
+#define SDMA_OP_TRAP 6
+#define SDMA_OP_SEM 7
+#define SDMA_OP_POLL_REGMEM 8
+#define SDMA_OP_COND_EXE 9
+#define SDMA_OP_ATOMIC 10
+#define SDMA_OP_CONST_FILL 11
+#define SDMA_OP_PTEPDE 12
+#define SDMA_OP_TIMESTAMP 13
+#define SDMA_OP_SRBM_WRITE 14
+#define SDMA_OP_PRE_EXE 15
+#define SDMA_OP_GPUVM_INV 16
+#define SDMA_OP_GCR_REQ 17
+#define SDMA_OP_DUMMY_TRAP 32
+#define SDMA_SUBOP_TIMESTAMP_SET 0
+#define SDMA_SUBOP_TIMESTAMP_GET 1
+#define SDMA_SUBOP_TIMESTAMP_GET_GLOBAL 2
+#define SDMA_SUBOP_COPY_LINEAR 0
+#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND 4
+#define SDMA_SUBOP_COPY_TILED 1
+#define SDMA_SUBOP_COPY_TILED_SUB_WIND 5
+#define SDMA_SUBOP_COPY_T2T_SUB_WIND 6
+#define SDMA_SUBOP_COPY_SOA 3
+#define SDMA_SUBOP_COPY_DIRTY_PAGE 7
+#define SDMA_SUBOP_COPY_LINEAR_PHY 8
+#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND_LARGE 36
+#define SDMA_SUBOP_COPY_LINEAR_BC 16
+#define SDMA_SUBOP_COPY_TILED_BC 17
+#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND_BC 20
+#define SDMA_SUBOP_COPY_TILED_SUB_WIND_BC 21
+#define SDMA_SUBOP_COPY_T2T_SUB_WIND_BC 22
+#define SDMA_SUBOP_WRITE_LINEAR 0
+#define SDMA_SUBOP_WRITE_TILED 1
+#define SDMA_SUBOP_WRITE_TILED_BC 17
+#define SDMA_SUBOP_PTEPDE_GEN 0
+#define SDMA_SUBOP_PTEPDE_COPY 1
+#define SDMA_SUBOP_PTEPDE_RMW 2
+#define SDMA_SUBOP_PTEPDE_COPY_BACKWARDS 3
+#define SDMA_SUBOP_MEM_INCR 1
+#define SDMA_SUBOP_DATA_FILL_MULTI 1
+#define SDMA_SUBOP_POLL_REG_WRITE_MEM 1
+#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM 2
+#define SDMA_SUBOP_POLL_MEM_VERIFY 3
+#define SDMA_SUBOP_VM_INVALIDATION 4
+#define HEADER_AGENT_DISPATCH 4
+#define HEADER_BARRIER 5
+#define SDMA_OP_AQL_COPY 0
+#define SDMA_OP_AQL_BARRIER_OR 0
+
+#define SDMA_GCR_RANGE_IS_PA (1 << 18)
+#define SDMA_GCR_SEQ(x) (((x) & 0x3) << 16)
+#define SDMA_GCR_GL2_WB (1 << 15)
+#define SDMA_GCR_GL2_INV (1 << 14)
+#define SDMA_GCR_GL2_DISCARD (1 << 13)
+#define SDMA_GCR_GL2_RANGE(x) (((x) & 0x3) << 11)
+#define SDMA_GCR_GL2_US (1 << 10)
+#define SDMA_GCR_GL1_INV (1 << 9)
+#define SDMA_GCR_GLV_INV (1 << 8)
+#define SDMA_GCR_GLK_INV (1 << 7)
+#define SDMA_GCR_GLK_WB (1 << 6)
+#define SDMA_GCR_GLM_INV (1 << 5)
+#define SDMA_GCR_GLM_WB (1 << 4)
+#define SDMA_GCR_GL1_RANGE(x) (((x) & 0x3) << 2)
+#define SDMA_GCR_GLI_INV(x) (((x) & 0x3) << 0)
+
+#define SDMA_DCC_DATA_FORMAT(x) ((x) & 0x3f)
+#define SDMA_DCC_NUM_TYPE(x) (((x) & 0x7) << 9)
+#define SDMA_DCC_READ_CM(x) (((x) & 0x3) << 16)
+#define SDMA_DCC_WRITE_CM(x) (((x) & 0x3) << 18)
+#define SDMA_DCC_MAX_COM(x) (((x) & 0x3) << 24)
+#define SDMA_DCC_MAX_UCOM(x) (((x) & 0x1) << 26)
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_LINEAR_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_encrypt_mask) << SDMA_PKT_COPY_LINEAR_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_LINEAR_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_LINEAR_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_cpv_mask) << SDMA_PKT_COPY_LINEAR_HEADER_cpv_shift)
+
+/*define for backwards field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_backwards_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_backwards_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_backwards_shift 25
+#define SDMA_PKT_COPY_LINEAR_HEADER_BACKWARDS(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_backwards_mask) << SDMA_PKT_COPY_LINEAR_HEADER_backwards_shift)
+
+/*define for broadcast field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_shift 27
+#define SDMA_PKT_COPY_LINEAR_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_broadcast_mask) << SDMA_PKT_COPY_LINEAR_HEADER_broadcast_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_LINEAR_COUNT_count_offset 1
+#define SDMA_PKT_COPY_LINEAR_COUNT_count_mask 0x3FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_COUNT_count_shift 0
+#define SDMA_PKT_COPY_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift)
+
+/*define for dst_cache_policy field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_shift 18
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_DST_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for src_cache_policy field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_shift 26
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_SRC_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_LINEAR_BC_COUNT_count_offset 1
+#define SDMA_PKT_COPY_LINEAR_BC_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_COUNT_count_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_COUNT_count_mask) << SDMA_PKT_COPY_LINEAR_BC_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_shift)
+
+/*define for dst_ha field*/
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_offset 2
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_shift 19
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_DST_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_shift)
+
+/*define for src_ha field*/
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_offset 2
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_shift 27
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_SRC_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_DIRTY_PAGE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_OP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_cpv_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_cpv_shift)
+
+/*define for all field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_shift 31
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_ALL(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_offset 1
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_mask) << SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_mtype field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_mask 0x00000007
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_shift 3
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_MTYPE(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_shift)
+
+/*define for dst_l2_policy field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_mask 0x00000003
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_shift 6
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_L2_POLICY(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_shift)
+
+/*define for dst_llc field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_llc_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_llc_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_llc_shift 8
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_LLC(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_llc_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_llc_shift)
+
+/*define for src_mtype field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_mask 0x00000007
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_shift 11
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_MTYPE(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_shift)
+
+/*define for src_l2_policy field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_mask 0x00000003
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_shift 14
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_L2_POLICY(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_shift)
+
+/*define for src_llc field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_llc_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_llc_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_llc_shift 16
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_LLC(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_llc_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_llc_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_shift 17
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_shift)
+
+/*define for dst_gcc field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_shift 19
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_GCC(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_shift)
+
+/*define for dst_sys field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_shift 20
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SYS(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_shift)
+
+/*define for dst_snoop field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_shift 22
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SNOOP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_shift)
+
+/*define for dst_gpa field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_shift 23
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_GPA(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_shift)
+
+/*define for src_sys field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_shift 28
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SYS(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_shift)
+
+/*define for src_snoop field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_shift 30
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SNOOP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_shift)
+
+/*define for src_gpa field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_shift 31
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_GPA(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_PHYSICAL_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_offset 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_cpv_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_cpv_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_offset 1
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_shift)
+
+/*define for addr_pair_num field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_addr_pair_num_offset 1
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_addr_pair_num_mask 0x000000FF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_addr_pair_num_shift 24
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_ADDR_PAIR_NUM(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_addr_pair_num_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_addr_pair_num_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_mtype field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_mask 0x00000007
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_shift 3
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_MTYPE(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_shift)
+
+/*define for dst_l2_policy field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_mask 0x00000003
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_shift 6
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_L2_POLICY(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_shift)
+
+/*define for dst_llc field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_llc_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_llc_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_llc_shift 8
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_LLC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_llc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_llc_shift)
+
+/*define for src_mtype field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_mask 0x00000007
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_shift 11
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_MTYPE(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_shift)
+
+/*define for src_l2_policy field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_mask 0x00000003
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_shift 14
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_L2_POLICY(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_shift)
+
+/*define for src_llc field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_llc_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_llc_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_llc_shift 16
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_LLC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_llc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_llc_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_shift 17
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_shift)
+
+/*define for dst_gcc field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_shift 19
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_GCC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_shift)
+
+/*define for dst_sys field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_shift 20
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SYS(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_shift)
+
+/*define for dst_log field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_shift 21
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_LOG(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_shift)
+
+/*define for dst_snoop field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_shift 22
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SNOOP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_shift)
+
+/*define for dst_gpa field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_shift 23
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_GPA(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for src_gcc field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_shift 27
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_GCC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_shift)
+
+/*define for src_sys field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_shift 28
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SYS(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_shift)
+
+/*define for src_snoop field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_shift 30
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SNOOP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_shift)
+
+/*define for src_gpa field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_shift 31
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_GPA(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_BROADCAST_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_cpv_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_cpv_shift)
+
+/*define for broadcast field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_shift 27
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_offset 1
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_mask 0x3FFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst2_sw field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_mask 0x00000003
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_shift 8
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST2_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_shift)
+
+/*define for dst2_cache_policy field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_cache_policy_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_cache_policy_shift 10
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST2_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_cache_policy_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_cache_policy_shift)
+
+/*define for dst1_sw field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_mask 0x00000003
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_shift 16
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST1_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_shift)
+
+/*define for dst1_cache_policy field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_cache_policy_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_cache_policy_shift 18
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST1_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_cache_policy_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_cache_policy_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for src_cache_policy field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_cache_policy_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_cache_policy_shift 26
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_SRC_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_cache_policy_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_cache_policy_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST1_ADDR_LO word*/
+/*define for dst1_addr_31_0 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_offset 5
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_DST1_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_shift)
+
+/*define for DST1_ADDR_HI word*/
+/*define for dst1_addr_63_32 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_offset 6
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_DST1_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_shift)
+
+/*define for DST2_ADDR_LO word*/
+/*define for dst2_addr_31_0 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_offset 7
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_DST2_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_shift)
+
+/*define for DST2_ADDR_HI word*/
+/*define for dst2_addr_63_32 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_offset 8
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_DST2_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR_SUBWIN packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_cpv_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_cpv_shift)
+
+/*define for elementsize field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_shift 29
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_ELEMENTSIZE(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_shift)
+
+/*define for src_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_shift 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_SRC_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_shift)
+
+/*define for DW_5 word*/
+/*define for src_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_offset 5
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_SRC_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_offset 6
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_offset 7
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_8 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_DST_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_DST_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_shift)
+
+/*define for DW_9 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_DST_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_shift)
+
+/*define for dst_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_shift 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_DST_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_shift)
+
+/*define for DW_10 word*/
+/*define for dst_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_offset 10
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_DST_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_RECT_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_RECT_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_shift)
+
+/*define for DW_12 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_RECT_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_shift)
+
+/*define for dst_cache_policy field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_cache_policy_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_cache_policy_shift 18
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_DST_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_cache_policy_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_cache_policy_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_shift)
+
+/*define for src_cache_policy field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_cache_policy_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_cache_policy_shift 26
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_SRC_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_cache_policy_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_cache_policy_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_tmz_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_cpv_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_cpv_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_src_x_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_src_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_src_x_shift)
+
+/*define for DW_4 word*/
+/*define for src_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_src_y_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_src_y_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_src_y_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_SRC_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_src_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_src_y_shift)
+
+/*define for DW_5 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_src_z_offset 5
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_src_z_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_src_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_SRC_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_src_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_src_z_shift)
+
+/*define for DW_6 word*/
+/*define for src_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_src_pitch_offset 6
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_src_pitch_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_src_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_SRC_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_src_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_src_pitch_shift)
+
+/*define for DW_7 word*/
+/*define for src_slice_pitch_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_src_slice_pitch_31_0_offset 7
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_src_slice_pitch_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_src_slice_pitch_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_SRC_SLICE_PITCH_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_src_slice_pitch_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_src_slice_pitch_31_0_shift)
+
+/*define for DW_8 word*/
+/*define for src_slice_pitch_47_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_src_slice_pitch_47_32_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_src_slice_pitch_47_32_mask 0x0000FFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_src_slice_pitch_47_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_SRC_SLICE_PITCH_47_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_src_slice_pitch_47_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_src_slice_pitch_47_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_dst_addr_31_0_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_dst_addr_63_32_offset 10
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_11 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_dst_x_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_dst_x_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_dst_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_DST_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_dst_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_dst_x_shift)
+
+/*define for DW_12 word*/
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_dst_y_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_dst_y_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_dst_y_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_DST_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_dst_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_dst_y_shift)
+
+/*define for DW_13 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_dst_z_offset 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_dst_z_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_dst_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_DST_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_dst_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_dst_z_shift)
+
+/*define for DW_14 word*/
+/*define for dst_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_dst_pitch_offset 14
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_dst_pitch_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_dst_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_DST_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_dst_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_dst_pitch_shift)
+
+/*define for DW_15 word*/
+/*define for dst_slice_pitch_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_dst_slice_pitch_31_0_offset 15
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_dst_slice_pitch_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_dst_slice_pitch_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_DST_SLICE_PITCH_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_dst_slice_pitch_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_dst_slice_pitch_31_0_shift)
+
+/*define for DW_16 word*/
+/*define for dst_slice_pitch_47_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_slice_pitch_47_32_offset 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_slice_pitch_47_32_mask 0x0000FFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_slice_pitch_47_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_DST_SLICE_PITCH_47_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_slice_pitch_47_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_slice_pitch_47_32_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_sw_offset 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_sw_shift)
+
+/*define for dst_policy field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_policy_offset 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_policy_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_policy_shift 18
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_DST_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_policy_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_policy_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_sw_offset 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_sw_shift)
+
+/*define for src_policy field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_policy_offset 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_policy_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_policy_shift 26
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_SRC_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_policy_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_policy_shift)
+
+/*define for DW_17 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_rect_x_offset 17
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_rect_x_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_rect_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_RECT_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_rect_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_rect_x_shift)
+
+/*define for DW_18 word*/
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_rect_y_offset 18
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_rect_y_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_rect_y_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_RECT_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_rect_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_rect_y_shift)
+
+/*define for DW_19 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_rect_z_offset 19
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_rect_z_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_rect_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_RECT_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_rect_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_rect_z_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR_SUBWIN_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_shift)
+
+/*define for elementsize field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_shift 29
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_ELEMENTSIZE(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_mask 0x000007FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_shift)
+
+/*define for src_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_shift 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_SRC_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_shift)
+
+/*define for DW_5 word*/
+/*define for src_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_offset 5
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_SRC_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_offset 6
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_offset 7
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_8 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_DST_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_DST_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_shift)
+
+/*define for DW_9 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_mask 0x000007FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_DST_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_shift)
+
+/*define for dst_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_shift 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_DST_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_shift)
+
+/*define for DW_10 word*/
+/*define for dst_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_offset 10
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_DST_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_RECT_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_RECT_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_shift)
+
+/*define for DW_12 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_mask 0x000007FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_RECT_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_shift)
+
+/*define for dst_ha field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_shift 19
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_DST_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_shift)
+
+/*define for src_ha field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_shift 27
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_SRC_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_op_mask) << SDMA_PKT_COPY_TILED_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_TILED_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_TILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_encrypt_mask) << SDMA_PKT_COPY_TILED_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_TILED_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_TILED_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_tmz_mask) << SDMA_PKT_COPY_TILED_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_TILED_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_TILED_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_cpv_mask) << SDMA_PKT_COPY_TILED_HEADER_cpv_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for width field*/
+#define SDMA_PKT_COPY_TILED_DW_3_width_offset 3
+#define SDMA_PKT_COPY_TILED_DW_3_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_3_width_shift 0
+#define SDMA_PKT_COPY_TILED_DW_3_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_DW_3_width_mask) << SDMA_PKT_COPY_TILED_DW_3_width_shift)
+
+/*define for DW_4 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_DW_4_height_offset 4
+#define SDMA_PKT_COPY_TILED_DW_4_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_4_height_shift 0
+#define SDMA_PKT_COPY_TILED_DW_4_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_DW_4_height_mask) << SDMA_PKT_COPY_TILED_DW_4_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_TILED_DW_4_depth_offset 4
+#define SDMA_PKT_COPY_TILED_DW_4_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_DW_4_depth_shift 16
+#define SDMA_PKT_COPY_TILED_DW_4_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_DW_4_depth_mask) << SDMA_PKT_COPY_TILED_DW_4_depth_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_DW_5_element_size_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_DW_5_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_element_size_mask) << SDMA_PKT_COPY_TILED_DW_5_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_TILED_DW_5_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_mask) << SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_COPY_TILED_DW_5_dimension_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_5_dimension_shift 9
+#define SDMA_PKT_COPY_TILED_DW_5_DIMENSION(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_dimension_mask) << SDMA_PKT_COPY_TILED_DW_5_dimension_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_COPY_TILED_DW_5_mip_max_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_DW_5_mip_max_shift 16
+#define SDMA_PKT_COPY_TILED_DW_5_MIP_MAX(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_mip_max_mask) << SDMA_PKT_COPY_TILED_DW_5_mip_max_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_COPY_TILED_DW_6_x_offset 6
+#define SDMA_PKT_COPY_TILED_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_6_x_shift 0
+#define SDMA_PKT_COPY_TILED_DW_6_X(x) (((x) & SDMA_PKT_COPY_TILED_DW_6_x_mask) << SDMA_PKT_COPY_TILED_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_COPY_TILED_DW_6_y_offset 6
+#define SDMA_PKT_COPY_TILED_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_6_y_shift 16
+#define SDMA_PKT_COPY_TILED_DW_6_Y(x) (((x) & SDMA_PKT_COPY_TILED_DW_6_y_mask) << SDMA_PKT_COPY_TILED_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_COPY_TILED_DW_7_z_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_DW_7_z_shift 0
+#define SDMA_PKT_COPY_TILED_DW_7_Z(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_z_mask) << SDMA_PKT_COPY_TILED_DW_7_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_DW_7_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_linear_sw_mask) << SDMA_PKT_COPY_TILED_DW_7_linear_sw_shift)
+
+/*define for linear_cache_policy field*/
+#define SDMA_PKT_COPY_TILED_DW_7_linear_cache_policy_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_linear_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_DW_7_linear_cache_policy_shift 18
+#define SDMA_PKT_COPY_TILED_DW_7_LINEAR_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_linear_cache_policy_mask) << SDMA_PKT_COPY_TILED_DW_7_linear_cache_policy_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_DW_7_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_tile_sw_mask) << SDMA_PKT_COPY_TILED_DW_7_tile_sw_shift)
+
+/*define for tile_cache_policy field*/
+#define SDMA_PKT_COPY_TILED_DW_7_tile_cache_policy_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_tile_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_DW_7_tile_cache_policy_shift 26
+#define SDMA_PKT_COPY_TILED_DW_7_TILE_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_tile_cache_policy_mask) << SDMA_PKT_COPY_TILED_DW_7_tile_cache_policy_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_offset 8
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_offset 9
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for LINEAR_PITCH word*/
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_shift)
+
+/*define for LINEAR_SLICE_PITCH word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_offset 11
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_TILED_COUNT_count_offset 12
+#define SDMA_PKT_COPY_TILED_COUNT_count_mask 0x3FFFFFFF
+#define SDMA_PKT_COPY_TILED_COUNT_count_shift 0
+#define SDMA_PKT_COPY_TILED_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_TILED_COUNT_count_mask) << SDMA_PKT_COPY_TILED_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_BC_HEADER_op_mask) << SDMA_PKT_COPY_TILED_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_BC_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_BC_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_BC_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_BC_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_BC_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_BC_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for width field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_3_width_offset 3
+#define SDMA_PKT_COPY_TILED_BC_DW_3_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_BC_DW_3_width_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_3_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_3_width_mask) << SDMA_PKT_COPY_TILED_BC_DW_3_width_shift)
+
+/*define for DW_4 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_4_height_offset 4
+#define SDMA_PKT_COPY_TILED_BC_DW_4_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_BC_DW_4_height_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_4_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_4_height_mask) << SDMA_PKT_COPY_TILED_BC_DW_4_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_4_depth_offset 4
+#define SDMA_PKT_COPY_TILED_BC_DW_4_depth_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_BC_DW_4_depth_shift 16
+#define SDMA_PKT_COPY_TILED_BC_DW_4_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_4_depth_mask) << SDMA_PKT_COPY_TILED_BC_DW_4_depth_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_element_size_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_BC_DW_5_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_element_size_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_element_size_shift)
+
+/*define for array_mode field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_shift 3
+#define SDMA_PKT_COPY_TILED_BC_DW_5_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_shift)
+
+/*define for mit_mode field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_shift 8
+#define SDMA_PKT_COPY_TILED_BC_DW_5_MIT_MODE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_shift)
+
+/*define for tilesplit_size field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_TILED_BC_DW_5_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_shift)
+
+/*define for bank_w field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_shift 15
+#define SDMA_PKT_COPY_TILED_BC_DW_5_BANK_W(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_shift)
+
+/*define for bank_h field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_shift 18
+#define SDMA_PKT_COPY_TILED_BC_DW_5_BANK_H(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_shift)
+
+/*define for num_bank field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_shift 21
+#define SDMA_PKT_COPY_TILED_BC_DW_5_NUM_BANK(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_shift)
+
+/*define for mat_aspt field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_shift 24
+#define SDMA_PKT_COPY_TILED_BC_DW_5_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_shift)
+
+/*define for pipe_config field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_shift 26
+#define SDMA_PKT_COPY_TILED_BC_DW_5_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_6_x_offset 6
+#define SDMA_PKT_COPY_TILED_BC_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_BC_DW_6_x_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_6_X(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_6_x_mask) << SDMA_PKT_COPY_TILED_BC_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_6_y_offset 6
+#define SDMA_PKT_COPY_TILED_BC_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_BC_DW_6_y_shift 16
+#define SDMA_PKT_COPY_TILED_BC_DW_6_Y(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_6_y_mask) << SDMA_PKT_COPY_TILED_BC_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_7_z_offset 7
+#define SDMA_PKT_COPY_TILED_BC_DW_7_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_BC_DW_7_z_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_7_Z(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_7_z_mask) << SDMA_PKT_COPY_TILED_BC_DW_7_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_offset 7
+#define SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_BC_DW_7_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_mask) << SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_offset 7
+#define SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_BC_DW_7_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_mask) << SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_offset 8
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_offset 9
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for LINEAR_PITCH word*/
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_shift)
+
+/*define for LINEAR_SLICE_PITCH word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_linear_slice_pitch_offset 11
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_linear_slice_pitch_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_linear_slice_pitch_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_TILED_BC_COUNT_count_offset 12
+#define SDMA_PKT_COPY_TILED_BC_COUNT_count_mask 0x000FFFFF
+#define SDMA_PKT_COPY_TILED_BC_COUNT_count_shift 2
+#define SDMA_PKT_COPY_TILED_BC_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_TILED_BC_COUNT_count_mask) << SDMA_PKT_COPY_TILED_BC_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_L2T_BROADCAST packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_OP(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_cpv_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_cpv_shift)
+
+/*define for videocopy field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_shift 26
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_VIDEOCOPY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_shift)
+
+/*define for broadcast field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_shift 27
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_shift)
+
+/*define for TILED_ADDR_LO_0 word*/
+/*define for tiled_addr0_31_0 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_offset 1
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_TILED_ADDR0_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_shift)
+
+/*define for TILED_ADDR_HI_0 word*/
+/*define for tiled_addr0_63_32 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_offset 2
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_TILED_ADDR0_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_shift)
+
+/*define for TILED_ADDR_LO_1 word*/
+/*define for tiled_addr1_31_0 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_offset 3
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_TILED_ADDR1_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_shift)
+
+/*define for TILED_ADDR_HI_1 word*/
+/*define for tiled_addr1_63_32 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_offset 4
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_TILED_ADDR1_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_shift)
+
+/*define for DW_5 word*/
+/*define for width field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_offset 5
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_WIDTH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_shift)
+
+/*define for DW_6 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_offset 6
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_HEIGHT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_offset 6
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_DEPTH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_shift)
+
+/*define for DW_7 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_shift 9
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_DIMENSION(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_MIP_MAX(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_shift)
+
+/*define for DW_8 word*/
+/*define for x field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_offset 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_X(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_offset 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_Y(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_shift)
+
+/*define for DW_9 word*/
+/*define for z field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_offset 9
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_Z(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_shift)
+
+/*define for DW_10 word*/
+/*define for dst2_sw field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_shift 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_DST2_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_shift)
+
+/*define for dst2_cache_policy field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_cache_policy_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_cache_policy_shift 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_DST2_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_cache_policy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_cache_policy_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_shift)
+
+/*define for linear_cache_policy field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_cache_policy_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_cache_policy_shift 18
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_LINEAR_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_cache_policy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_cache_policy_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_shift 24
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_TILE_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_shift)
+
+/*define for tile_cache_policy field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_cache_policy_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_cache_policy_shift 26
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_TILE_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_cache_policy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_cache_policy_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_offset 11
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_offset 12
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for LINEAR_PITCH word*/
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_offset 13
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_shift)
+
+/*define for LINEAR_SLICE_PITCH word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_offset 14
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_offset 15
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_mask 0x3FFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_mask) << SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_T2T packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_T2T_HEADER_op_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_HEADER_op_shift 0
+#define SDMA_PKT_COPY_T2T_HEADER_OP(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_op_mask) << SDMA_PKT_COPY_T2T_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_T2T_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_T2T_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_sub_op_mask) << SDMA_PKT_COPY_T2T_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_T2T_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_T2T_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_tmz_mask) << SDMA_PKT_COPY_T2T_HEADER_tmz_shift)
+
+/*define for dcc field*/
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_shift 19
+#define SDMA_PKT_COPY_T2T_HEADER_DCC(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_dcc_mask) << SDMA_PKT_COPY_T2T_HEADER_dcc_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_T2T_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_HEADER_cpv_shift 28
+#define SDMA_PKT_COPY_T2T_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_cpv_mask) << SDMA_PKT_COPY_T2T_HEADER_cpv_shift)
+
+/*define for dcc_dir field*/
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_dir_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_dir_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_dir_shift 31
+#define SDMA_PKT_COPY_T2T_HEADER_DCC_DIR(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_dcc_dir_mask) << SDMA_PKT_COPY_T2T_HEADER_dcc_dir_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_T2T_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_T2T_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_T2T_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_3_src_x_mask) << SDMA_PKT_COPY_T2T_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_T2T_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_T2T_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_T2T_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_3_src_y_mask) << SDMA_PKT_COPY_T2T_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_T2T_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_T2T_DW_4_src_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_T2T_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_4_src_z_mask) << SDMA_PKT_COPY_T2T_DW_4_src_z_shift)
+
+/*define for src_width field*/
+#define SDMA_PKT_COPY_T2T_DW_4_src_width_offset 4
+#define SDMA_PKT_COPY_T2T_DW_4_src_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_4_src_width_shift 16
+#define SDMA_PKT_COPY_T2T_DW_4_SRC_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_4_src_width_mask) << SDMA_PKT_COPY_T2T_DW_4_src_width_shift)
+
+/*define for DW_5 word*/
+/*define for src_height field*/
+#define SDMA_PKT_COPY_T2T_DW_5_src_height_offset 5
+#define SDMA_PKT_COPY_T2T_DW_5_src_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_5_src_height_shift 0
+#define SDMA_PKT_COPY_T2T_DW_5_SRC_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_DW_5_src_height_mask) << SDMA_PKT_COPY_T2T_DW_5_src_height_shift)
+
+/*define for src_depth field*/
+#define SDMA_PKT_COPY_T2T_DW_5_src_depth_offset 5
+#define SDMA_PKT_COPY_T2T_DW_5_src_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_5_src_depth_shift 16
+#define SDMA_PKT_COPY_T2T_DW_5_SRC_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_5_src_depth_mask) << SDMA_PKT_COPY_T2T_DW_5_src_depth_shift)
+
+/*define for DW_6 word*/
+/*define for src_element_size field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_element_size_mask) << SDMA_PKT_COPY_T2T_DW_6_src_element_size_shift)
+
+/*define for src_swizzle_mode field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_mask) << SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_shift)
+
+/*define for src_dimension field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_shift 9
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_DIMENSION(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_dimension_mask) << SDMA_PKT_COPY_T2T_DW_6_src_dimension_shift)
+
+/*define for src_mip_max field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_max_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_max_shift 16
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_MIP_MAX(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_mip_max_mask) << SDMA_PKT_COPY_T2T_DW_6_src_mip_max_shift)
+
+/*define for src_mip_id field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_id_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_id_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_id_shift 20
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_MIP_ID(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_mip_id_mask) << SDMA_PKT_COPY_T2T_DW_6_src_mip_id_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_offset 7
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_offset 8
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_T2T_DW_9_dst_x_offset 9
+#define SDMA_PKT_COPY_T2T_DW_9_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_9_dst_x_shift 0
+#define SDMA_PKT_COPY_T2T_DW_9_DST_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_9_dst_x_mask) << SDMA_PKT_COPY_T2T_DW_9_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_T2T_DW_9_dst_y_offset 9
+#define SDMA_PKT_COPY_T2T_DW_9_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_9_dst_y_shift 16
+#define SDMA_PKT_COPY_T2T_DW_9_DST_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_9_dst_y_mask) << SDMA_PKT_COPY_T2T_DW_9_dst_y_shift)
+
+/*define for DW_10 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_T2T_DW_10_dst_z_offset 10
+#define SDMA_PKT_COPY_T2T_DW_10_dst_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_10_dst_z_shift 0
+#define SDMA_PKT_COPY_T2T_DW_10_DST_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_10_dst_z_mask) << SDMA_PKT_COPY_T2T_DW_10_dst_z_shift)
+
+/*define for dst_width field*/
+#define SDMA_PKT_COPY_T2T_DW_10_dst_width_offset 10
+#define SDMA_PKT_COPY_T2T_DW_10_dst_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_10_dst_width_shift 16
+#define SDMA_PKT_COPY_T2T_DW_10_DST_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_10_dst_width_mask) << SDMA_PKT_COPY_T2T_DW_10_dst_width_shift)
+
+/*define for DW_11 word*/
+/*define for dst_height field*/
+#define SDMA_PKT_COPY_T2T_DW_11_dst_height_offset 11
+#define SDMA_PKT_COPY_T2T_DW_11_dst_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_11_dst_height_shift 0
+#define SDMA_PKT_COPY_T2T_DW_11_DST_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_DW_11_dst_height_mask) << SDMA_PKT_COPY_T2T_DW_11_dst_height_shift)
+
+/*define for dst_depth field*/
+#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_offset 11
+#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_shift 16
+#define SDMA_PKT_COPY_T2T_DW_11_DST_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_11_dst_depth_mask) << SDMA_PKT_COPY_T2T_DW_11_dst_depth_shift)
+
+/*define for DW_12 word*/
+/*define for dst_element_size field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_DW_12_DST_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_element_size_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_element_size_shift)
+
+/*define for dst_swizzle_mode field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_T2T_DW_12_DST_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_shift)
+
+/*define for dst_dimension field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_shift 9
+#define SDMA_PKT_COPY_T2T_DW_12_DST_DIMENSION(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_dimension_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_dimension_shift)
+
+/*define for dst_mip_max field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_shift 16
+#define SDMA_PKT_COPY_T2T_DW_12_DST_MIP_MAX(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_shift)
+
+/*define for dst_mip_id field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_shift 20
+#define SDMA_PKT_COPY_T2T_DW_12_DST_MIP_ID(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_shift)
+
+/*define for DW_13 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_T2T_DW_13_rect_x_offset 13
+#define SDMA_PKT_COPY_T2T_DW_13_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_13_rect_x_shift 0
+#define SDMA_PKT_COPY_T2T_DW_13_RECT_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_13_rect_x_mask) << SDMA_PKT_COPY_T2T_DW_13_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_T2T_DW_13_rect_y_offset 13
+#define SDMA_PKT_COPY_T2T_DW_13_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_13_rect_y_shift 16
+#define SDMA_PKT_COPY_T2T_DW_13_RECT_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_13_rect_y_mask) << SDMA_PKT_COPY_T2T_DW_13_rect_y_shift)
+
+/*define for DW_14 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_T2T_DW_14_rect_z_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_rect_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_14_rect_z_shift 0
+#define SDMA_PKT_COPY_T2T_DW_14_RECT_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_rect_z_mask) << SDMA_PKT_COPY_T2T_DW_14_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_shift 16
+#define SDMA_PKT_COPY_T2T_DW_14_DST_SW(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_dst_sw_mask) << SDMA_PKT_COPY_T2T_DW_14_dst_sw_shift)
+
+/*define for dst_cache_policy field*/
+#define SDMA_PKT_COPY_T2T_DW_14_dst_cache_policy_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_dst_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_14_dst_cache_policy_shift 18
+#define SDMA_PKT_COPY_T2T_DW_14_DST_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_dst_cache_policy_mask) << SDMA_PKT_COPY_T2T_DW_14_dst_cache_policy_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_T2T_DW_14_src_sw_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_14_src_sw_shift 24
+#define SDMA_PKT_COPY_T2T_DW_14_SRC_SW(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_src_sw_mask) << SDMA_PKT_COPY_T2T_DW_14_src_sw_shift)
+
+/*define for src_cache_policy field*/
+#define SDMA_PKT_COPY_T2T_DW_14_src_cache_policy_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_src_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_14_src_cache_policy_shift 26
+#define SDMA_PKT_COPY_T2T_DW_14_SRC_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_src_cache_policy_mask) << SDMA_PKT_COPY_T2T_DW_14_src_cache_policy_shift)
+
+/*define for META_ADDR_LO word*/
+/*define for meta_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_offset 15
+#define SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_META_ADDR_LO_META_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_mask) << SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_shift)
+
+/*define for META_ADDR_HI word*/
+/*define for meta_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_offset 16
+#define SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_META_ADDR_HI_META_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_mask) << SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_shift)
+
+/*define for META_CONFIG word*/
+/*define for data_format field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_data_format_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_data_format_mask 0x0000007F
+#define SDMA_PKT_COPY_T2T_META_CONFIG_data_format_shift 0
+#define SDMA_PKT_COPY_T2T_META_CONFIG_DATA_FORMAT(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_data_format_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_data_format_shift)
+
+/*define for color_transform_disable field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_shift 7
+#define SDMA_PKT_COPY_T2T_META_CONFIG_COLOR_TRANSFORM_DISABLE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_shift)
+
+/*define for alpha_is_on_msb field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_shift 8
+#define SDMA_PKT_COPY_T2T_META_CONFIG_ALPHA_IS_ON_MSB(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_shift)
+
+/*define for number_type field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_number_type_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_number_type_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_META_CONFIG_number_type_shift 9
+#define SDMA_PKT_COPY_T2T_META_CONFIG_NUMBER_TYPE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_number_type_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_number_type_shift)
+
+/*define for surface_type field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_shift 12
+#define SDMA_PKT_COPY_T2T_META_CONFIG_SURFACE_TYPE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_shift)
+
+/*define for meta_llc field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_llc_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_llc_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_llc_shift 14
+#define SDMA_PKT_COPY_T2T_META_CONFIG_META_LLC(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_meta_llc_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_meta_llc_shift)
+
+/*define for max_comp_block_size field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_shift 24
+#define SDMA_PKT_COPY_T2T_META_CONFIG_MAX_COMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_shift)
+
+/*define for max_uncomp_block_size field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_shift 26
+#define SDMA_PKT_COPY_T2T_META_CONFIG_MAX_UNCOMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_shift)
+
+/*define for write_compress_enable field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_shift 28
+#define SDMA_PKT_COPY_T2T_META_CONFIG_WRITE_COMPRESS_ENABLE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_shift)
+
+/*define for meta_tmz field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_shift 29
+#define SDMA_PKT_COPY_T2T_META_CONFIG_META_TMZ(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_shift)
+
+/*define for pipe_aligned field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_pipe_aligned_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_pipe_aligned_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_pipe_aligned_shift 31
+#define SDMA_PKT_COPY_T2T_META_CONFIG_PIPE_ALIGNED(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_pipe_aligned_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_pipe_aligned_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_T2T_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_T2T_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_T2T_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_T2T_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_T2T_BC_HEADER_op_mask) << SDMA_PKT_COPY_T2T_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_T2T_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_3_src_x_mask) << SDMA_PKT_COPY_T2T_BC_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_3_src_y_mask) << SDMA_PKT_COPY_T2T_BC_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_z_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_4_src_z_mask) << SDMA_PKT_COPY_T2T_BC_DW_4_src_z_shift)
+
+/*define for src_width field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_width_offset 4
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_width_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_4_SRC_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_4_src_width_mask) << SDMA_PKT_COPY_T2T_BC_DW_4_src_width_shift)
+
+/*define for DW_5 word*/
+/*define for src_height field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_height_offset 5
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_height_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_5_SRC_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_5_src_height_mask) << SDMA_PKT_COPY_T2T_BC_DW_5_src_height_shift)
+
+/*define for src_depth field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_offset 5
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_5_SRC_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_mask) << SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_shift)
+
+/*define for DW_6 word*/
+/*define for src_element_size field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_shift)
+
+/*define for src_array_mode field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_shift 3
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_shift)
+
+/*define for src_mit_mode field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_shift 8
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_MIT_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_shift)
+
+/*define for src_tilesplit_size field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_shift)
+
+/*define for src_bank_w field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_shift 15
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_BANK_W(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_shift)
+
+/*define for src_bank_h field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_shift 18
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_BANK_H(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_shift)
+
+/*define for src_num_bank field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_shift 21
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_NUM_BANK(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_shift)
+
+/*define for src_mat_aspt field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_shift 24
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_shift)
+
+/*define for src_pipe_config field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_shift 26
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_offset 7
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_offset 8
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_offset 9
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_9_DST_X(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_mask) << SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_offset 9
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_9_DST_Y(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_mask) << SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_shift)
+
+/*define for DW_10 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_offset 10
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_10_DST_Z(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_mask) << SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_shift)
+
+/*define for dst_width field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_offset 10
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_10_DST_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_mask) << SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_shift)
+
+/*define for DW_11 word*/
+/*define for dst_height field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_offset 11
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_11_DST_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_mask) << SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_shift)
+
+/*define for dst_depth field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_offset 11
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_mask 0x00000FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_11_DST_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_mask) << SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_shift)
+
+/*define for DW_12 word*/
+/*define for dst_element_size field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_shift)
+
+/*define for dst_array_mode field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_shift 3
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_shift)
+
+/*define for dst_mit_mode field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_shift 8
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_MIT_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_shift)
+
+/*define for dst_tilesplit_size field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_shift)
+
+/*define for dst_bank_w field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_shift 15
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_BANK_W(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_shift)
+
+/*define for dst_bank_h field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_shift 18
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_BANK_H(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_shift)
+
+/*define for dst_num_bank field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_shift 21
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_NUM_BANK(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_shift)
+
+/*define for dst_mat_aspt field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_shift 24
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_shift)
+
+/*define for dst_pipe_config field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_shift 26
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_shift)
+
+/*define for DW_13 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_offset 13
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_13_RECT_X(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_mask) << SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_offset 13
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_13_RECT_Y(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_mask) << SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_shift)
+
+/*define for DW_14 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_offset 14
+#define SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_14_RECT_Z(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_mask) << SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_offset 14
+#define SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_14_DST_SW(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_mask) << SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_offset 14
+#define SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_shift 24
+#define SDMA_PKT_COPY_T2T_BC_DW_14_SRC_SW(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_mask) << SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED_SUBWIN packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_shift)
+
+/*define for dcc field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_shift 19
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_DCC(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_cpv_shift 28
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_cpv_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_cpv_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for tiled_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_TILED_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_shift)
+
+/*define for tiled_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_TILED_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_shift)
+
+/*define for DW_4 word*/
+/*define for tiled_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_TILED_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_shift)
+
+/*define for width field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_shift)
+
+/*define for DW_5 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_shift)
+
+/*define for DW_6 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_shift 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_DIMENSION(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_MIP_MAX(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_shift)
+
+/*define for mip_id field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_shift 20
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_MIP_ID(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_offset 7
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_offset 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for linear_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_LINEAR_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_shift)
+
+/*define for linear_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_LINEAR_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_shift)
+
+/*define for DW_10 word*/
+/*define for linear_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_LINEAR_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_shift)
+
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_offset 11
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_shift)
+
+/*define for DW_12 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_RECT_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_RECT_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_shift)
+
+/*define for DW_13 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_RECT_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_shift)
+
+/*define for linear_cache_policy field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_cache_policy_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_cache_policy_shift 18
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_LINEAR_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_cache_policy_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_cache_policy_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_shift)
+
+/*define for tile_cache_policy field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_cache_policy_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_cache_policy_shift 26
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_TILE_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_cache_policy_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_cache_policy_shift)
+
+/*define for META_ADDR_LO word*/
+/*define for meta_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_offset 14
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_META_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_shift)
+
+/*define for META_ADDR_HI word*/
+/*define for meta_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_offset 15
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_META_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_shift)
+
+/*define for META_CONFIG word*/
+/*define for data_format field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_mask 0x0000007F
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_DATA_FORMAT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_shift)
+
+/*define for color_transform_disable field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_shift 7
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_COLOR_TRANSFORM_DISABLE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_shift)
+
+/*define for alpha_is_on_msb field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_ALPHA_IS_ON_MSB(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_shift)
+
+/*define for number_type field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_shift 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_NUMBER_TYPE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_shift)
+
+/*define for surface_type field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_shift 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_SURFACE_TYPE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_shift)
+
+/*define for meta_llc field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_llc_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_llc_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_llc_shift 14
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_META_LLC(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_llc_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_llc_shift)
+
+/*define for max_comp_block_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_MAX_COMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_shift)
+
+/*define for max_uncomp_block_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_shift 26
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_MAX_UNCOMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_shift)
+
+/*define for write_compress_enable field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_shift 28
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_WRITE_COMPRESS_ENABLE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_shift)
+
+/*define for meta_tmz field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_shift 29
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_META_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_shift)
+
+/*define for pipe_aligned field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_pipe_aligned_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_pipe_aligned_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_pipe_aligned_shift 31
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_PIPE_ALIGNED(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_pipe_aligned_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_pipe_aligned_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED_SUBWIN_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for tiled_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_TILED_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_shift)
+
+/*define for tiled_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_TILED_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_shift)
+
+/*define for DW_4 word*/
+/*define for tiled_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_TILED_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_shift)
+
+/*define for width field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_shift)
+
+/*define for DW_5 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_shift)
+
+/*define for DW_6 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_shift)
+
+/*define for array_mode field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_shift 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_shift)
+
+/*define for mit_mode field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_MIT_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_shift)
+
+/*define for tilesplit_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_shift)
+
+/*define for bank_w field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_shift 15
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_BANK_W(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_shift)
+
+/*define for bank_h field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_shift 18
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_BANK_H(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_shift)
+
+/*define for num_bank field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_shift 21
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_NUM_BANK(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_shift)
+
+/*define for mat_aspt field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_shift)
+
+/*define for pipe_config field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_shift 26
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_offset 7
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_offset 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for linear_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_LINEAR_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_shift)
+
+/*define for linear_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_LINEAR_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_shift)
+
+/*define for DW_10 word*/
+/*define for linear_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_LINEAR_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_shift)
+
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_offset 11
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_shift)
+
+/*define for DW_12 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_RECT_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_RECT_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_shift)
+
+/*define for DW_13 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_RECT_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_STRUCT packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_op_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_STRUCT_HEADER_op_shift 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_OP(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_op_mask) << SDMA_PKT_COPY_STRUCT_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_STRUCT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_sub_op_mask) << SDMA_PKT_COPY_STRUCT_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_STRUCT_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_tmz_mask) << SDMA_PKT_COPY_STRUCT_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_STRUCT_HEADER_cpv_shift 28
+#define SDMA_PKT_COPY_STRUCT_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_cpv_mask) << SDMA_PKT_COPY_STRUCT_HEADER_cpv_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_STRUCT_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_STRUCT_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_detile_mask) << SDMA_PKT_COPY_STRUCT_HEADER_detile_shift)
+
+/*define for SB_ADDR_LO word*/
+/*define for sb_addr_31_0 field*/
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_offset 1
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_shift 0
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_SB_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_mask) << SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_shift)
+
+/*define for SB_ADDR_HI word*/
+/*define for sb_addr_63_32 field*/
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_offset 2
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_shift 0
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_SB_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_mask) << SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_shift)
+
+/*define for START_INDEX word*/
+/*define for start_index field*/
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_offset 3
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_shift 0
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_START_INDEX(x) (((x) & SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_mask) << SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_STRUCT_COUNT_count_offset 4
+#define SDMA_PKT_COPY_STRUCT_COUNT_count_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_COUNT_count_shift 0
+#define SDMA_PKT_COPY_STRUCT_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_STRUCT_COUNT_count_mask) << SDMA_PKT_COPY_STRUCT_COUNT_count_shift)
+
+/*define for DW_5 word*/
+/*define for stride field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_stride_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_stride_mask 0x000007FF
+#define SDMA_PKT_COPY_STRUCT_DW_5_stride_shift 0
+#define SDMA_PKT_COPY_STRUCT_DW_5_STRIDE(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_stride_mask) << SDMA_PKT_COPY_STRUCT_DW_5_stride_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_shift 16
+#define SDMA_PKT_COPY_STRUCT_DW_5_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_mask) << SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_shift)
+
+/*define for linear_cache_policy field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_cache_policy_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_cache_policy_shift 18
+#define SDMA_PKT_COPY_STRUCT_DW_5_LINEAR_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_linear_cache_policy_mask) << SDMA_PKT_COPY_STRUCT_DW_5_linear_cache_policy_shift)
+
+/*define for struct_sw field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_mask 0x00000003
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_shift 24
+#define SDMA_PKT_COPY_STRUCT_DW_5_STRUCT_SW(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_mask) << SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_shift)
+
+/*define for struct_cache_policy field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_cache_policy_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_cache_policy_shift 26
+#define SDMA_PKT_COPY_STRUCT_DW_5_STRUCT_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_struct_cache_policy_mask) << SDMA_PKT_COPY_STRUCT_DW_5_struct_cache_policy_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_offset 6
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_offset 7
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_UNTILED packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_UNTILED_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_op_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_UNTILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_shift 16
+#define SDMA_PKT_WRITE_UNTILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_shift 18
+#define SDMA_PKT_WRITE_UNTILED_HEADER_TMZ(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_tmz_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_cpv_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_WRITE_UNTILED_HEADER_cpv_shift 28
+#define SDMA_PKT_WRITE_UNTILED_HEADER_CPV(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_cpv_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_cpv_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_UNTILED_DW_3_count_offset 3
+#define SDMA_PKT_WRITE_UNTILED_DW_3_count_mask 0x000FFFFF
+#define SDMA_PKT_WRITE_UNTILED_DW_3_count_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_count_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_count_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_offset 3
+#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_mask 0x00000003
+#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_shift 24
+#define SDMA_PKT_WRITE_UNTILED_DW_3_SW(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_sw_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_sw_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_WRITE_UNTILED_DW_3_cache_policy_offset 3
+#define SDMA_PKT_WRITE_UNTILED_DW_3_cache_policy_mask 0x00000007
+#define SDMA_PKT_WRITE_UNTILED_DW_3_cache_policy_shift 26
+#define SDMA_PKT_WRITE_UNTILED_DW_3_CACHE_POLICY(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_cache_policy_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_cache_policy_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_offset 4
+#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_UNTILED_DATA0_data0_mask) << SDMA_PKT_WRITE_UNTILED_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_TILED packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_TILED_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_op_mask) << SDMA_PKT_WRITE_TILED_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_TILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_sub_op_mask) << SDMA_PKT_WRITE_TILED_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_shift 16
+#define SDMA_PKT_WRITE_TILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_encrypt_mask) << SDMA_PKT_WRITE_TILED_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_tmz_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_WRITE_TILED_HEADER_tmz_shift 18
+#define SDMA_PKT_WRITE_TILED_HEADER_TMZ(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_tmz_mask) << SDMA_PKT_WRITE_TILED_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_cpv_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_WRITE_TILED_HEADER_cpv_shift 28
+#define SDMA_PKT_WRITE_TILED_HEADER_CPV(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_cpv_mask) << SDMA_PKT_WRITE_TILED_HEADER_cpv_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for width field*/
+#define SDMA_PKT_WRITE_TILED_DW_3_width_offset 3
+#define SDMA_PKT_WRITE_TILED_DW_3_width_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_3_width_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_3_WIDTH(x) (((x) & SDMA_PKT_WRITE_TILED_DW_3_width_mask) << SDMA_PKT_WRITE_TILED_DW_3_width_shift)
+
+/*define for DW_4 word*/
+/*define for height field*/
+#define SDMA_PKT_WRITE_TILED_DW_4_height_offset 4
+#define SDMA_PKT_WRITE_TILED_DW_4_height_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_4_height_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_4_HEIGHT(x) (((x) & SDMA_PKT_WRITE_TILED_DW_4_height_mask) << SDMA_PKT_WRITE_TILED_DW_4_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_WRITE_TILED_DW_4_depth_offset 4
+#define SDMA_PKT_WRITE_TILED_DW_4_depth_mask 0x00001FFF
+#define SDMA_PKT_WRITE_TILED_DW_4_depth_shift 16
+#define SDMA_PKT_WRITE_TILED_DW_4_DEPTH(x) (((x) & SDMA_PKT_WRITE_TILED_DW_4_depth_mask) << SDMA_PKT_WRITE_TILED_DW_4_depth_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_element_size_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_DW_5_element_size_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_element_size_mask) << SDMA_PKT_WRITE_TILED_DW_5_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_shift 3
+#define SDMA_PKT_WRITE_TILED_DW_5_SWIZZLE_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_mask) << SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_dimension_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_dimension_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_DW_5_dimension_shift 9
+#define SDMA_PKT_WRITE_TILED_DW_5_DIMENSION(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_dimension_mask) << SDMA_PKT_WRITE_TILED_DW_5_dimension_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_mip_max_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_mip_max_mask 0x0000000F
+#define SDMA_PKT_WRITE_TILED_DW_5_mip_max_shift 16
+#define SDMA_PKT_WRITE_TILED_DW_5_MIP_MAX(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_mip_max_mask) << SDMA_PKT_WRITE_TILED_DW_5_mip_max_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_WRITE_TILED_DW_6_x_offset 6
+#define SDMA_PKT_WRITE_TILED_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_6_x_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_6_X(x) (((x) & SDMA_PKT_WRITE_TILED_DW_6_x_mask) << SDMA_PKT_WRITE_TILED_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_WRITE_TILED_DW_6_y_offset 6
+#define SDMA_PKT_WRITE_TILED_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_6_y_shift 16
+#define SDMA_PKT_WRITE_TILED_DW_6_Y(x) (((x) & SDMA_PKT_WRITE_TILED_DW_6_y_mask) << SDMA_PKT_WRITE_TILED_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_WRITE_TILED_DW_7_z_offset 7
+#define SDMA_PKT_WRITE_TILED_DW_7_z_mask 0x00001FFF
+#define SDMA_PKT_WRITE_TILED_DW_7_z_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_7_Z(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_z_mask) << SDMA_PKT_WRITE_TILED_DW_7_z_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_WRITE_TILED_DW_7_sw_offset 7
+#define SDMA_PKT_WRITE_TILED_DW_7_sw_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_DW_7_sw_shift 24
+#define SDMA_PKT_WRITE_TILED_DW_7_SW(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_sw_mask) << SDMA_PKT_WRITE_TILED_DW_7_sw_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_WRITE_TILED_DW_7_cache_policy_offset 7
+#define SDMA_PKT_WRITE_TILED_DW_7_cache_policy_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_DW_7_cache_policy_shift 26
+#define SDMA_PKT_WRITE_TILED_DW_7_CACHE_POLICY(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_cache_policy_mask) << SDMA_PKT_WRITE_TILED_DW_7_cache_policy_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_TILED_COUNT_count_offset 8
+#define SDMA_PKT_WRITE_TILED_COUNT_count_mask 0x000FFFFF
+#define SDMA_PKT_WRITE_TILED_COUNT_count_shift 0
+#define SDMA_PKT_WRITE_TILED_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_TILED_COUNT_count_mask) << SDMA_PKT_WRITE_TILED_COUNT_count_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_WRITE_TILED_DATA0_data0_offset 9
+#define SDMA_PKT_WRITE_TILED_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_DATA0_data0_shift 0
+#define SDMA_PKT_WRITE_TILED_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_TILED_DATA0_data0_mask) << SDMA_PKT_WRITE_TILED_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_TILED_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_TILED_BC_HEADER_op_mask) << SDMA_PKT_WRITE_TILED_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_mask) << SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for width field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_3_width_offset 3
+#define SDMA_PKT_WRITE_TILED_BC_DW_3_width_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_BC_DW_3_width_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_3_WIDTH(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_3_width_mask) << SDMA_PKT_WRITE_TILED_BC_DW_3_width_shift)
+
+/*define for DW_4 word*/
+/*define for height field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_height_offset 4
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_height_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_height_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_HEIGHT(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_4_height_mask) << SDMA_PKT_WRITE_TILED_BC_DW_4_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_depth_offset 4
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_depth_mask 0x000007FF
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_depth_shift 16
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_DEPTH(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_4_depth_mask) << SDMA_PKT_WRITE_TILED_BC_DW_4_depth_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_shift)
+
+/*define for array_mode field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_mask 0x0000000F
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_shift 3
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_ARRAY_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_shift)
+
+/*define for mit_mode field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_shift 8
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_MIT_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_shift)
+
+/*define for tilesplit_size field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_shift 11
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_shift)
+
+/*define for bank_w field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_shift 15
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_BANK_W(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_shift)
+
+/*define for bank_h field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_shift 18
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_BANK_H(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_shift)
+
+/*define for num_bank field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_shift 21
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_NUM_BANK(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_shift)
+
+/*define for mat_aspt field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_shift 24
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_MAT_ASPT(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_shift)
+
+/*define for pipe_config field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_mask 0x0000001F
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_shift 26
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_PIPE_CONFIG(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_x_offset 6
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_x_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_X(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_6_x_mask) << SDMA_PKT_WRITE_TILED_BC_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_y_offset 6
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_y_shift 16
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_Y(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_6_y_mask) << SDMA_PKT_WRITE_TILED_BC_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_z_offset 7
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_z_mask 0x000007FF
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_z_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_Z(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_7_z_mask) << SDMA_PKT_WRITE_TILED_BC_DW_7_z_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_sw_offset 7
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_sw_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_sw_shift 24
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_SW(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_7_sw_mask) << SDMA_PKT_WRITE_TILED_BC_DW_7_sw_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_TILED_BC_COUNT_count_offset 8
+#define SDMA_PKT_WRITE_TILED_BC_COUNT_count_mask 0x000FFFFF
+#define SDMA_PKT_WRITE_TILED_BC_COUNT_count_shift 2
+#define SDMA_PKT_WRITE_TILED_BC_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_TILED_BC_COUNT_count_mask) << SDMA_PKT_WRITE_TILED_BC_COUNT_count_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_WRITE_TILED_BC_DATA0_data0_offset 9
+#define SDMA_PKT_WRITE_TILED_BC_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_BC_DATA0_data0_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DATA0_data0_mask) << SDMA_PKT_WRITE_TILED_BC_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PTEPDE_COPY packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_HEADER_op_shift 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_shift 8
+#define SDMA_PKT_PTEPDE_COPY_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_tmz_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_HEADER_tmz_shift 18
+#define SDMA_PKT_PTEPDE_COPY_HEADER_TMZ(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_tmz_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_cpv_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_HEADER_cpv_shift 28
+#define SDMA_PKT_PTEPDE_COPY_HEADER_CPV(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_cpv_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_cpv_shift)
+
+/*define for ptepde_op field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_shift 31
+#define SDMA_PKT_PTEPDE_COPY_HEADER_PTEPDE_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_offset 3
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_offset 4
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for MASK_DW0 word*/
+/*define for mask_dw0 field*/
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_offset 5
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_MASK_DW0(x) (((x) & SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_mask) << SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_shift)
+
+/*define for MASK_DW1 word*/
+/*define for mask_dw1 field*/
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_offset 6
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_shift 0
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_MASK_DW1(x) (((x) & SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_mask) << SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_PTEPDE_COPY_COUNT_count_offset 7
+#define SDMA_PKT_PTEPDE_COPY_COUNT_count_mask 0x0007FFFF
+#define SDMA_PKT_PTEPDE_COPY_COUNT_count_shift 0
+#define SDMA_PKT_PTEPDE_COPY_COUNT_COUNT(x) (((x) & SDMA_PKT_PTEPDE_COPY_COUNT_count_mask) << SDMA_PKT_PTEPDE_COPY_COUNT_count_shift)
+
+/*define for dst_cache_policy field*/
+#define SDMA_PKT_PTEPDE_COPY_COUNT_dst_cache_policy_offset 7
+#define SDMA_PKT_PTEPDE_COPY_COUNT_dst_cache_policy_mask 0x00000007
+#define SDMA_PKT_PTEPDE_COPY_COUNT_dst_cache_policy_shift 22
+#define SDMA_PKT_PTEPDE_COPY_COUNT_DST_CACHE_POLICY(x) (((x) & SDMA_PKT_PTEPDE_COPY_COUNT_dst_cache_policy_mask) << SDMA_PKT_PTEPDE_COPY_COUNT_dst_cache_policy_shift)
+
+/*define for src_cache_policy field*/
+#define SDMA_PKT_PTEPDE_COPY_COUNT_src_cache_policy_offset 7
+#define SDMA_PKT_PTEPDE_COPY_COUNT_src_cache_policy_mask 0x00000007
+#define SDMA_PKT_PTEPDE_COPY_COUNT_src_cache_policy_shift 29
+#define SDMA_PKT_PTEPDE_COPY_COUNT_SRC_CACHE_POLICY(x) (((x) & SDMA_PKT_PTEPDE_COPY_COUNT_src_cache_policy_mask) << SDMA_PKT_PTEPDE_COPY_COUNT_src_cache_policy_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PTEPDE_COPY_BACKWARDS packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_shift 8
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_shift)
+
+/*define for pte_size field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_mask 0x00000003
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_shift 28
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_PTE_SIZE(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_shift)
+
+/*define for direction field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_shift 30
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_DIRECTION(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_shift)
+
+/*define for ptepde_op field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_shift 31
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_PTEPDE_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_offset 3
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_offset 4
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for MASK_BIT_FOR_DW word*/
+/*define for mask_first_xfer field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_offset 5
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_MASK_FIRST_XFER(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_shift)
+
+/*define for mask_last_xfer field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_offset 5
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_shift 8
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_MASK_LAST_XFER(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_shift)
+
+/*define for COUNT_IN_32B_XFER word*/
+/*define for count field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_offset 6
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_mask 0x0001FFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_COUNT(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PTEPDE_RMW packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_op_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_RMW_HEADER_op_shift 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_op_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_shift 8
+#define SDMA_PKT_PTEPDE_RMW_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_shift)
+
+/*define for mtype field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_mtype_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_mtype_mask 0x00000007
+#define SDMA_PKT_PTEPDE_RMW_HEADER_mtype_shift 16
+#define SDMA_PKT_PTEPDE_RMW_HEADER_MTYPE(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_mtype_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_mtype_shift)
+
+/*define for gcc field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_shift 19
+#define SDMA_PKT_PTEPDE_RMW_HEADER_GCC(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_gcc_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_gcc_shift)
+
+/*define for sys field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_shift 20
+#define SDMA_PKT_PTEPDE_RMW_HEADER_SYS(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_sys_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_sys_shift)
+
+/*define for snp field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_shift 22
+#define SDMA_PKT_PTEPDE_RMW_HEADER_SNP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_snp_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_snp_shift)
+
+/*define for gpa field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_shift 23
+#define SDMA_PKT_PTEPDE_RMW_HEADER_GPA(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_gpa_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_gpa_shift)
+
+/*define for l2_policy field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_mask 0x00000003
+#define SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_shift 24
+#define SDMA_PKT_PTEPDE_RMW_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_shift)
+
+/*define for llc_policy field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_llc_policy_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_llc_policy_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_llc_policy_shift 26
+#define SDMA_PKT_PTEPDE_RMW_HEADER_LLC_POLICY(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_llc_policy_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_llc_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_cpv_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_cpv_shift 28
+#define SDMA_PKT_PTEPDE_RMW_HEADER_CPV(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_cpv_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_cpv_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_mask) << SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_mask) << SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_shift)
+
+/*define for MASK_LO word*/
+/*define for mask_31_0 field*/
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_offset 3
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_shift 0
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_MASK_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_mask) << SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_shift)
+
+/*define for MASK_HI word*/
+/*define for mask_63_32 field*/
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_offset 4
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_shift 0
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_MASK_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_mask) << SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_shift)
+
+/*define for VALUE_LO word*/
+/*define for value_31_0 field*/
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_offset 5
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_shift 0
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_VALUE_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_mask) << SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_shift)
+
+/*define for VALUE_HI word*/
+/*define for value_63_32 field*/
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_offset 6
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_shift 0
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_VALUE_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_mask) << SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_shift)
+
+/*define for COUNT word*/
+/*define for num_of_pte field*/
+#define SDMA_PKT_PTEPDE_RMW_COUNT_num_of_pte_offset 7
+#define SDMA_PKT_PTEPDE_RMW_COUNT_num_of_pte_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_COUNT_num_of_pte_shift 0
+#define SDMA_PKT_PTEPDE_RMW_COUNT_NUM_OF_PTE(x) (((x) & SDMA_PKT_PTEPDE_RMW_COUNT_num_of_pte_mask) << SDMA_PKT_PTEPDE_RMW_COUNT_num_of_pte_shift)
+
+
+/*
+** Definitions for SDMA_PKT_REGISTER_RMW packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_REGISTER_RMW_HEADER_op_offset 0
+#define SDMA_PKT_REGISTER_RMW_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_REGISTER_RMW_HEADER_op_shift 0
+#define SDMA_PKT_REGISTER_RMW_HEADER_OP(x) (((x) & SDMA_PKT_REGISTER_RMW_HEADER_op_mask) << SDMA_PKT_REGISTER_RMW_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_REGISTER_RMW_HEADER_sub_op_offset 0
+#define SDMA_PKT_REGISTER_RMW_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_REGISTER_RMW_HEADER_sub_op_shift 8
+#define SDMA_PKT_REGISTER_RMW_HEADER_SUB_OP(x) (((x) & SDMA_PKT_REGISTER_RMW_HEADER_sub_op_mask) << SDMA_PKT_REGISTER_RMW_HEADER_sub_op_shift)
+
+/*define for ADDR word*/
+/*define for addr field*/
+#define SDMA_PKT_REGISTER_RMW_ADDR_addr_offset 1
+#define SDMA_PKT_REGISTER_RMW_ADDR_addr_mask 0x000FFFFF
+#define SDMA_PKT_REGISTER_RMW_ADDR_addr_shift 0
+#define SDMA_PKT_REGISTER_RMW_ADDR_ADDR(x) (((x) & SDMA_PKT_REGISTER_RMW_ADDR_addr_mask) << SDMA_PKT_REGISTER_RMW_ADDR_addr_shift)
+
+/*define for aperture_id field*/
+#define SDMA_PKT_REGISTER_RMW_ADDR_aperture_id_offset 1
+#define SDMA_PKT_REGISTER_RMW_ADDR_aperture_id_mask 0x00000FFF
+#define SDMA_PKT_REGISTER_RMW_ADDR_aperture_id_shift 20
+#define SDMA_PKT_REGISTER_RMW_ADDR_APERTURE_ID(x) (((x) & SDMA_PKT_REGISTER_RMW_ADDR_aperture_id_mask) << SDMA_PKT_REGISTER_RMW_ADDR_aperture_id_shift)
+
+/*define for MASK word*/
+/*define for mask field*/
+#define SDMA_PKT_REGISTER_RMW_MASK_mask_offset 2
+#define SDMA_PKT_REGISTER_RMW_MASK_mask_mask 0xFFFFFFFF
+#define SDMA_PKT_REGISTER_RMW_MASK_mask_shift 0
+#define SDMA_PKT_REGISTER_RMW_MASK_MASK(x) (((x) & SDMA_PKT_REGISTER_RMW_MASK_mask_mask) << SDMA_PKT_REGISTER_RMW_MASK_mask_shift)
+
+/*define for VALUE word*/
+/*define for value field*/
+#define SDMA_PKT_REGISTER_RMW_VALUE_value_offset 3
+#define SDMA_PKT_REGISTER_RMW_VALUE_value_mask 0xFFFFFFFF
+#define SDMA_PKT_REGISTER_RMW_VALUE_value_shift 0
+#define SDMA_PKT_REGISTER_RMW_VALUE_VALUE(x) (((x) & SDMA_PKT_REGISTER_RMW_VALUE_value_mask) << SDMA_PKT_REGISTER_RMW_VALUE_value_shift)
+
+/*define for MISC word*/
+/*define for stride field*/
+#define SDMA_PKT_REGISTER_RMW_MISC_stride_offset 4
+#define SDMA_PKT_REGISTER_RMW_MISC_stride_mask 0x000FFFFF
+#define SDMA_PKT_REGISTER_RMW_MISC_stride_shift 0
+#define SDMA_PKT_REGISTER_RMW_MISC_STRIDE(x) (((x) & SDMA_PKT_REGISTER_RMW_MISC_stride_mask) << SDMA_PKT_REGISTER_RMW_MISC_stride_shift)
+
+/*define for num_of_reg field*/
+#define SDMA_PKT_REGISTER_RMW_MISC_num_of_reg_offset 4
+#define SDMA_PKT_REGISTER_RMW_MISC_num_of_reg_mask 0x00000FFF
+#define SDMA_PKT_REGISTER_RMW_MISC_num_of_reg_shift 20
+#define SDMA_PKT_REGISTER_RMW_MISC_NUM_OF_REG(x) (((x) & SDMA_PKT_REGISTER_RMW_MISC_num_of_reg_mask) << SDMA_PKT_REGISTER_RMW_MISC_num_of_reg_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_INCR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_INCR_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_INCR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_INCR_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_INCR_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_op_mask) << SDMA_PKT_WRITE_INCR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_INCR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_sub_op_mask) << SDMA_PKT_WRITE_INCR_HEADER_sub_op_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_WRITE_INCR_HEADER_cache_policy_offset 0
+#define SDMA_PKT_WRITE_INCR_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_WRITE_INCR_HEADER_cache_policy_shift 24
+#define SDMA_PKT_WRITE_INCR_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_cache_policy_mask) << SDMA_PKT_WRITE_INCR_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_WRITE_INCR_HEADER_cpv_offset 0
+#define SDMA_PKT_WRITE_INCR_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_WRITE_INCR_HEADER_cpv_shift 28
+#define SDMA_PKT_WRITE_INCR_HEADER_CPV(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_cpv_mask) << SDMA_PKT_WRITE_INCR_HEADER_cpv_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for MASK_DW0 word*/
+/*define for mask_dw0 field*/
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_offset 3
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_shift 0
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_MASK_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_mask) << SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_shift)
+
+/*define for MASK_DW1 word*/
+/*define for mask_dw1 field*/
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_offset 4
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_shift 0
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_MASK_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_mask) << SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_shift)
+
+/*define for INIT_DW0 word*/
+/*define for init_dw0 field*/
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_offset 5
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_shift 0
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_INIT_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_mask) << SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_shift)
+
+/*define for INIT_DW1 word*/
+/*define for init_dw1 field*/
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_offset 6
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_shift 0
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_INIT_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_mask) << SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_shift)
+
+/*define for INCR_DW0 word*/
+/*define for incr_dw0 field*/
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_offset 7
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_shift 0
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_INCR_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_mask) << SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_shift)
+
+/*define for INCR_DW1 word*/
+/*define for incr_dw1 field*/
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_offset 8
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_shift 0
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_INCR_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_mask) << SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_INCR_COUNT_count_offset 9
+#define SDMA_PKT_WRITE_INCR_COUNT_count_mask 0x0007FFFF
+#define SDMA_PKT_WRITE_INCR_COUNT_count_shift 0
+#define SDMA_PKT_WRITE_INCR_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_INCR_COUNT_count_mask) << SDMA_PKT_WRITE_INCR_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_INDIRECT packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_INDIRECT_HEADER_op_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_INDIRECT_HEADER_op_shift 0
+#define SDMA_PKT_INDIRECT_HEADER_OP(x) (((x) & SDMA_PKT_INDIRECT_HEADER_op_mask) << SDMA_PKT_INDIRECT_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_INDIRECT_HEADER_sub_op_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_INDIRECT_HEADER_sub_op_shift 8
+#define SDMA_PKT_INDIRECT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_INDIRECT_HEADER_sub_op_mask) << SDMA_PKT_INDIRECT_HEADER_sub_op_shift)
+
+/*define for vmid field*/
+#define SDMA_PKT_INDIRECT_HEADER_vmid_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_vmid_mask 0x0000000F
+#define SDMA_PKT_INDIRECT_HEADER_vmid_shift 16
+#define SDMA_PKT_INDIRECT_HEADER_VMID(x) (((x) & SDMA_PKT_INDIRECT_HEADER_vmid_mask) << SDMA_PKT_INDIRECT_HEADER_vmid_shift)
+
+/*define for priv field*/
+#define SDMA_PKT_INDIRECT_HEADER_priv_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_priv_mask 0x00000001
+#define SDMA_PKT_INDIRECT_HEADER_priv_shift 31
+#define SDMA_PKT_INDIRECT_HEADER_PRIV(x) (((x) & SDMA_PKT_INDIRECT_HEADER_priv_mask) << SDMA_PKT_INDIRECT_HEADER_priv_shift)
+
+/*define for BASE_LO word*/
+/*define for ib_base_31_0 field*/
+#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_offset 1
+#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_shift 0
+#define SDMA_PKT_INDIRECT_BASE_LO_IB_BASE_31_0(x) (((x) & SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_mask) << SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_shift)
+
+/*define for BASE_HI word*/
+/*define for ib_base_63_32 field*/
+#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_offset 2
+#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_shift 0
+#define SDMA_PKT_INDIRECT_BASE_HI_IB_BASE_63_32(x) (((x) & SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_mask) << SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_shift)
+
+/*define for IB_SIZE word*/
+/*define for ib_size field*/
+#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_offset 3
+#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_mask 0x000FFFFF
+#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_shift 0
+#define SDMA_PKT_INDIRECT_IB_SIZE_IB_SIZE(x) (((x) & SDMA_PKT_INDIRECT_IB_SIZE_ib_size_mask) << SDMA_PKT_INDIRECT_IB_SIZE_ib_size_shift)
+
+/*define for CSA_ADDR_LO word*/
+/*define for csa_addr_31_0 field*/
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_offset 4
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_shift 0
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_CSA_ADDR_31_0(x) (((x) & SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_mask) << SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_shift)
+
+/*define for CSA_ADDR_HI word*/
+/*define for csa_addr_63_32 field*/
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_offset 5
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_shift 0
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_CSA_ADDR_63_32(x) (((x) & SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_mask) << SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_SEMAPHORE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_op_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_SEMAPHORE_HEADER_op_shift 0
+#define SDMA_PKT_SEMAPHORE_HEADER_OP(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_op_mask) << SDMA_PKT_SEMAPHORE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_shift 8
+#define SDMA_PKT_SEMAPHORE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_sub_op_mask) << SDMA_PKT_SEMAPHORE_HEADER_sub_op_shift)
+
+/*define for write_one field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_write_one_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_write_one_mask 0x00000001
+#define SDMA_PKT_SEMAPHORE_HEADER_write_one_shift 29
+#define SDMA_PKT_SEMAPHORE_HEADER_WRITE_ONE(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_write_one_mask) << SDMA_PKT_SEMAPHORE_HEADER_write_one_shift)
+
+/*define for signal field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_signal_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_signal_mask 0x00000001
+#define SDMA_PKT_SEMAPHORE_HEADER_signal_shift 30
+#define SDMA_PKT_SEMAPHORE_HEADER_SIGNAL(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_signal_mask) << SDMA_PKT_SEMAPHORE_HEADER_signal_shift)
+
+/*define for mailbox field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_mask 0x00000001
+#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_shift 31
+#define SDMA_PKT_SEMAPHORE_HEADER_MAILBOX(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_mailbox_mask) << SDMA_PKT_SEMAPHORE_HEADER_mailbox_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_MEM_INCR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_MEM_INCR_HEADER_op_offset 0
+#define SDMA_PKT_MEM_INCR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_MEM_INCR_HEADER_op_shift 0
+#define SDMA_PKT_MEM_INCR_HEADER_OP(x) (((x) & SDMA_PKT_MEM_INCR_HEADER_op_mask) << SDMA_PKT_MEM_INCR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_MEM_INCR_HEADER_sub_op_offset 0
+#define SDMA_PKT_MEM_INCR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_MEM_INCR_HEADER_sub_op_shift 8
+#define SDMA_PKT_MEM_INCR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_MEM_INCR_HEADER_sub_op_mask) << SDMA_PKT_MEM_INCR_HEADER_sub_op_shift)
+
+/*define for l2_policy field*/
+#define SDMA_PKT_MEM_INCR_HEADER_l2_policy_offset 0
+#define SDMA_PKT_MEM_INCR_HEADER_l2_policy_mask 0x00000003
+#define SDMA_PKT_MEM_INCR_HEADER_l2_policy_shift 24
+#define SDMA_PKT_MEM_INCR_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_MEM_INCR_HEADER_l2_policy_mask) << SDMA_PKT_MEM_INCR_HEADER_l2_policy_shift)
+
+/*define for llc_policy field*/
+#define SDMA_PKT_MEM_INCR_HEADER_llc_policy_offset 0
+#define SDMA_PKT_MEM_INCR_HEADER_llc_policy_mask 0x00000001
+#define SDMA_PKT_MEM_INCR_HEADER_llc_policy_shift 26
+#define SDMA_PKT_MEM_INCR_HEADER_LLC_POLICY(x) (((x) & SDMA_PKT_MEM_INCR_HEADER_llc_policy_mask) << SDMA_PKT_MEM_INCR_HEADER_llc_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_MEM_INCR_HEADER_cpv_offset 0
+#define SDMA_PKT_MEM_INCR_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_MEM_INCR_HEADER_cpv_shift 28
+#define SDMA_PKT_MEM_INCR_HEADER_CPV(x) (((x) & SDMA_PKT_MEM_INCR_HEADER_cpv_mask) << SDMA_PKT_MEM_INCR_HEADER_cpv_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_MEM_INCR_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_MEM_INCR_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_MEM_INCR_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_MEM_INCR_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_MEM_INCR_ADDR_LO_addr_31_0_mask) << SDMA_PKT_MEM_INCR_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_MEM_INCR_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_MEM_INCR_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_MEM_INCR_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_MEM_INCR_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_MEM_INCR_ADDR_HI_addr_63_32_mask) << SDMA_PKT_MEM_INCR_ADDR_HI_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_VM_INVALIDATION packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_op_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_VM_INVALIDATION_HEADER_op_shift 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_OP(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift 8
+#define SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift)
+
+/*define for gfx_eng_id field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_mask 0x0000001F
+#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_shift 16
+#define SDMA_PKT_VM_INVALIDATION_HEADER_GFX_ENG_ID(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_shift)
+
+/*define for mm_eng_id field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_mask 0x0000001F
+#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_shift 24
+#define SDMA_PKT_VM_INVALIDATION_HEADER_MM_ENG_ID(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_shift)
+
+/*define for INVALIDATEREQ word*/
+/*define for invalidatereq field*/
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_offset 1
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask 0xFFFFFFFF
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift 0
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(x) (((x) & SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask) << SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift)
+
+/*define for ADDRESSRANGELO word*/
+/*define for addressrangelo field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_offset 2
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask 0xFFFFFFFF
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift 0
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_ADDRESSRANGELO(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift)
+
+/*define for ADDRESSRANGEHI word*/
+/*define for invalidateack field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_offset 3
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask 0x0000FFFF
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift 0
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift)
+
+/*define for addressrangehi field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_offset 3
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask 0x0000001F
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift 16
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift)
+
+/*define for reserved field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_offset 3
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask 0x000001FF
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift 23
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_RESERVED(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift)
+
+
+/*
+** Definitions for SDMA_PKT_FENCE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_FENCE_HEADER_op_offset 0
+#define SDMA_PKT_FENCE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_FENCE_HEADER_op_shift 0
+#define SDMA_PKT_FENCE_HEADER_OP(x) (((x) & SDMA_PKT_FENCE_HEADER_op_mask) << SDMA_PKT_FENCE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_FENCE_HEADER_sub_op_offset 0
+#define SDMA_PKT_FENCE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_FENCE_HEADER_sub_op_shift 8
+#define SDMA_PKT_FENCE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_FENCE_HEADER_sub_op_mask) << SDMA_PKT_FENCE_HEADER_sub_op_shift)
+
+/*define for mtype field*/
+#define SDMA_PKT_FENCE_HEADER_mtype_offset 0
+#define SDMA_PKT_FENCE_HEADER_mtype_mask 0x00000007
+#define SDMA_PKT_FENCE_HEADER_mtype_shift 16
+#define SDMA_PKT_FENCE_HEADER_MTYPE(x) (((x) & SDMA_PKT_FENCE_HEADER_mtype_mask) << SDMA_PKT_FENCE_HEADER_mtype_shift)
+
+/*define for gcc field*/
+#define SDMA_PKT_FENCE_HEADER_gcc_offset 0
+#define SDMA_PKT_FENCE_HEADER_gcc_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_gcc_shift 19
+#define SDMA_PKT_FENCE_HEADER_GCC(x) (((x) & SDMA_PKT_FENCE_HEADER_gcc_mask) << SDMA_PKT_FENCE_HEADER_gcc_shift)
+
+/*define for sys field*/
+#define SDMA_PKT_FENCE_HEADER_sys_offset 0
+#define SDMA_PKT_FENCE_HEADER_sys_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_sys_shift 20
+#define SDMA_PKT_FENCE_HEADER_SYS(x) (((x) & SDMA_PKT_FENCE_HEADER_sys_mask) << SDMA_PKT_FENCE_HEADER_sys_shift)
+
+/*define for snp field*/
+#define SDMA_PKT_FENCE_HEADER_snp_offset 0
+#define SDMA_PKT_FENCE_HEADER_snp_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_snp_shift 22
+#define SDMA_PKT_FENCE_HEADER_SNP(x) (((x) & SDMA_PKT_FENCE_HEADER_snp_mask) << SDMA_PKT_FENCE_HEADER_snp_shift)
+
+/*define for gpa field*/
+#define SDMA_PKT_FENCE_HEADER_gpa_offset 0
+#define SDMA_PKT_FENCE_HEADER_gpa_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_gpa_shift 23
+#define SDMA_PKT_FENCE_HEADER_GPA(x) (((x) & SDMA_PKT_FENCE_HEADER_gpa_mask) << SDMA_PKT_FENCE_HEADER_gpa_shift)
+
+/*define for l2_policy field*/
+#define SDMA_PKT_FENCE_HEADER_l2_policy_offset 0
+#define SDMA_PKT_FENCE_HEADER_l2_policy_mask 0x00000003
+#define SDMA_PKT_FENCE_HEADER_l2_policy_shift 24
+#define SDMA_PKT_FENCE_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_FENCE_HEADER_l2_policy_mask) << SDMA_PKT_FENCE_HEADER_l2_policy_shift)
+
+/*define for llc_policy field*/
+#define SDMA_PKT_FENCE_HEADER_llc_policy_offset 0
+#define SDMA_PKT_FENCE_HEADER_llc_policy_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_llc_policy_shift 26
+#define SDMA_PKT_FENCE_HEADER_LLC_POLICY(x) (((x) & SDMA_PKT_FENCE_HEADER_llc_policy_mask) << SDMA_PKT_FENCE_HEADER_llc_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_FENCE_HEADER_cpv_offset 0
+#define SDMA_PKT_FENCE_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_cpv_shift 28
+#define SDMA_PKT_FENCE_HEADER_CPV(x) (((x) & SDMA_PKT_FENCE_HEADER_cpv_mask) << SDMA_PKT_FENCE_HEADER_cpv_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_FENCE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_FENCE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_FENCE_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_FENCE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_FENCE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_FENCE_ADDR_HI_addr_63_32_shift)
+
+/*define for DATA word*/
+/*define for data field*/
+#define SDMA_PKT_FENCE_DATA_data_offset 3
+#define SDMA_PKT_FENCE_DATA_data_mask 0xFFFFFFFF
+#define SDMA_PKT_FENCE_DATA_data_shift 0
+#define SDMA_PKT_FENCE_DATA_DATA(x) (((x) & SDMA_PKT_FENCE_DATA_data_mask) << SDMA_PKT_FENCE_DATA_data_shift)
+
+
+/*
+** Definitions for SDMA_PKT_SRBM_WRITE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_SRBM_WRITE_HEADER_op_offset 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_SRBM_WRITE_HEADER_op_shift 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_OP(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_op_mask) << SDMA_PKT_SRBM_WRITE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_offset 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_shift 8
+#define SDMA_PKT_SRBM_WRITE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_sub_op_mask) << SDMA_PKT_SRBM_WRITE_HEADER_sub_op_shift)
+
+/*define for byte_en field*/
+#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_offset 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_mask 0x0000000F
+#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_shift 28
+#define SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_byte_en_mask) << SDMA_PKT_SRBM_WRITE_HEADER_byte_en_shift)
+
+/*define for ADDR word*/
+/*define for addr field*/
+#define SDMA_PKT_SRBM_WRITE_ADDR_addr_offset 1
+#define SDMA_PKT_SRBM_WRITE_ADDR_addr_mask 0x0003FFFF
+#define SDMA_PKT_SRBM_WRITE_ADDR_addr_shift 0
+#define SDMA_PKT_SRBM_WRITE_ADDR_ADDR(x) (((x) & SDMA_PKT_SRBM_WRITE_ADDR_addr_mask) << SDMA_PKT_SRBM_WRITE_ADDR_addr_shift)
+
+/*define for apertureid field*/
+#define SDMA_PKT_SRBM_WRITE_ADDR_apertureid_offset 1
+#define SDMA_PKT_SRBM_WRITE_ADDR_apertureid_mask 0x00000FFF
+#define SDMA_PKT_SRBM_WRITE_ADDR_apertureid_shift 20
+#define SDMA_PKT_SRBM_WRITE_ADDR_APERTUREID(x) (((x) & SDMA_PKT_SRBM_WRITE_ADDR_apertureid_mask) << SDMA_PKT_SRBM_WRITE_ADDR_apertureid_shift)
+
+/*define for DATA word*/
+/*define for data field*/
+#define SDMA_PKT_SRBM_WRITE_DATA_data_offset 2
+#define SDMA_PKT_SRBM_WRITE_DATA_data_mask 0xFFFFFFFF
+#define SDMA_PKT_SRBM_WRITE_DATA_data_shift 0
+#define SDMA_PKT_SRBM_WRITE_DATA_DATA(x) (((x) & SDMA_PKT_SRBM_WRITE_DATA_data_mask) << SDMA_PKT_SRBM_WRITE_DATA_data_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PRE_EXE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PRE_EXE_HEADER_op_offset 0
+#define SDMA_PKT_PRE_EXE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PRE_EXE_HEADER_op_shift 0
+#define SDMA_PKT_PRE_EXE_HEADER_OP(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_op_mask) << SDMA_PKT_PRE_EXE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PRE_EXE_HEADER_sub_op_offset 0
+#define SDMA_PKT_PRE_EXE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PRE_EXE_HEADER_sub_op_shift 8
+#define SDMA_PKT_PRE_EXE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_sub_op_mask) << SDMA_PKT_PRE_EXE_HEADER_sub_op_shift)
+
+/*define for dev_sel field*/
+#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_offset 0
+#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_mask 0x000000FF
+#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_shift 16
+#define SDMA_PKT_PRE_EXE_HEADER_DEV_SEL(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_dev_sel_mask) << SDMA_PKT_PRE_EXE_HEADER_dev_sel_shift)
+
+/*define for EXEC_COUNT word*/
+/*define for exec_count field*/
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_offset 1
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_mask 0x00003FFF
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_shift 0
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_EXEC_COUNT(x) (((x) & SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_mask) << SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COND_EXE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COND_EXE_HEADER_op_offset 0
+#define SDMA_PKT_COND_EXE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COND_EXE_HEADER_op_shift 0
+#define SDMA_PKT_COND_EXE_HEADER_OP(x) (((x) & SDMA_PKT_COND_EXE_HEADER_op_mask) << SDMA_PKT_COND_EXE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COND_EXE_HEADER_sub_op_offset 0
+#define SDMA_PKT_COND_EXE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COND_EXE_HEADER_sub_op_shift 8
+#define SDMA_PKT_COND_EXE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COND_EXE_HEADER_sub_op_mask) << SDMA_PKT_COND_EXE_HEADER_sub_op_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_COND_EXE_HEADER_cache_policy_offset 0
+#define SDMA_PKT_COND_EXE_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_COND_EXE_HEADER_cache_policy_shift 24
+#define SDMA_PKT_COND_EXE_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_COND_EXE_HEADER_cache_policy_mask) << SDMA_PKT_COND_EXE_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COND_EXE_HEADER_cpv_offset 0
+#define SDMA_PKT_COND_EXE_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COND_EXE_HEADER_cpv_shift 28
+#define SDMA_PKT_COND_EXE_HEADER_CPV(x) (((x) & SDMA_PKT_COND_EXE_HEADER_cpv_mask) << SDMA_PKT_COND_EXE_HEADER_cpv_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_COND_EXE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_COND_EXE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_shift)
+
+/*define for REFERENCE word*/
+/*define for reference field*/
+#define SDMA_PKT_COND_EXE_REFERENCE_reference_offset 3
+#define SDMA_PKT_COND_EXE_REFERENCE_reference_mask 0xFFFFFFFF
+#define SDMA_PKT_COND_EXE_REFERENCE_reference_shift 0
+#define SDMA_PKT_COND_EXE_REFERENCE_REFERENCE(x) (((x) & SDMA_PKT_COND_EXE_REFERENCE_reference_mask) << SDMA_PKT_COND_EXE_REFERENCE_reference_shift)
+
+/*define for EXEC_COUNT word*/
+/*define for exec_count field*/
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_offset 4
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_mask 0x00003FFF
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_shift 0
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_EXEC_COUNT(x) (((x) & SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_mask) << SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_CONSTANT_FILL packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_op_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_CONSTANT_FILL_HEADER_op_shift 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_OP(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_op_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_shift 8
+#define SDMA_PKT_CONSTANT_FILL_HEADER_SUB_OP(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_mask 0x00000003
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_shift 16
+#define SDMA_PKT_CONSTANT_FILL_HEADER_SW(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_sw_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_sw_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_cache_policy_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_CONSTANT_FILL_HEADER_cache_policy_shift 24
+#define SDMA_PKT_CONSTANT_FILL_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_cache_policy_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_cpv_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_CONSTANT_FILL_HEADER_cpv_shift 28
+#define SDMA_PKT_CONSTANT_FILL_HEADER_CPV(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_cpv_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_cpv_shift)
+
+/*define for fillsize field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_mask 0x00000003
+#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_shift 30
+#define SDMA_PKT_CONSTANT_FILL_HEADER_FILLSIZE(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DATA word*/
+/*define for src_data_31_0 field*/
+#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_offset 3
+#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_shift 0
+#define SDMA_PKT_CONSTANT_FILL_DATA_SRC_DATA_31_0(x) (((x) & SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_mask) << SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_CONSTANT_FILL_COUNT_count_offset 4
+#define SDMA_PKT_CONSTANT_FILL_COUNT_count_mask 0x3FFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_COUNT_count_shift 0
+#define SDMA_PKT_CONSTANT_FILL_COUNT_COUNT(x) (((x) & SDMA_PKT_CONSTANT_FILL_COUNT_count_mask) << SDMA_PKT_CONSTANT_FILL_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_DATA_FILL_MULTI packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_OP(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_op_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_shift 8
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_SUB_OP(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cache_policy_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cache_policy_shift 24
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_cache_policy_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cpv_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cpv_shift 28
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_CPV(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_cpv_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_cpv_shift)
+
+/*define for memlog_clr field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_mask 0x00000001
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_shift 31
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_MEMLOG_CLR(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_shift)
+
+/*define for BYTE_STRIDE word*/
+/*define for byte_stride field*/
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_offset 1
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_BYTE_STRIDE(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_mask) << SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_shift)
+
+/*define for DMA_COUNT word*/
+/*define for dma_count field*/
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_offset 2
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_DMA_COUNT(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_mask) << SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_offset 3
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_offset 4
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for BYTE_COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_offset 5
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_mask 0x03FFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_COUNT(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_mask) << SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_REGMEM packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_op_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REGMEM_HEADER_op_shift 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_op_mask) << SDMA_PKT_POLL_REGMEM_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_REGMEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_REGMEM_HEADER_sub_op_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_cache_policy_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_POLL_REGMEM_HEADER_cache_policy_shift 20
+#define SDMA_PKT_POLL_REGMEM_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_cache_policy_mask) << SDMA_PKT_POLL_REGMEM_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_cpv_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_POLL_REGMEM_HEADER_cpv_shift 24
+#define SDMA_PKT_POLL_REGMEM_HEADER_CPV(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_cpv_mask) << SDMA_PKT_POLL_REGMEM_HEADER_cpv_shift)
+
+/*define for hdp_flush field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_mask 0x00000001
+#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_shift 26
+#define SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_mask) << SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_shift)
+
+/*define for func field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_func_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_func_mask 0x00000007
+#define SDMA_PKT_POLL_REGMEM_HEADER_func_shift 28
+#define SDMA_PKT_POLL_REGMEM_HEADER_FUNC(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_func_mask) << SDMA_PKT_POLL_REGMEM_HEADER_func_shift)
+
+/*define for mem_poll field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_mask 0x00000001
+#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_shift 31
+#define SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_mask) << SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_shift)
+
+/*define for VALUE word*/
+/*define for value field*/
+#define SDMA_PKT_POLL_REGMEM_VALUE_value_offset 3
+#define SDMA_PKT_POLL_REGMEM_VALUE_value_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_VALUE_value_shift 0
+#define SDMA_PKT_POLL_REGMEM_VALUE_VALUE(x) (((x) & SDMA_PKT_POLL_REGMEM_VALUE_value_mask) << SDMA_PKT_POLL_REGMEM_VALUE_value_shift)
+
+/*define for MASK word*/
+/*define for mask field*/
+#define SDMA_PKT_POLL_REGMEM_MASK_mask_offset 4
+#define SDMA_PKT_POLL_REGMEM_MASK_mask_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_MASK_mask_shift 0
+#define SDMA_PKT_POLL_REGMEM_MASK_MASK(x) (((x) & SDMA_PKT_POLL_REGMEM_MASK_mask_mask) << SDMA_PKT_POLL_REGMEM_MASK_mask_shift)
+
+/*define for DW5 word*/
+/*define for interval field*/
+#define SDMA_PKT_POLL_REGMEM_DW5_interval_offset 5
+#define SDMA_PKT_POLL_REGMEM_DW5_interval_mask 0x0000FFFF
+#define SDMA_PKT_POLL_REGMEM_DW5_interval_shift 0
+#define SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(x) (((x) & SDMA_PKT_POLL_REGMEM_DW5_interval_mask) << SDMA_PKT_POLL_REGMEM_DW5_interval_shift)
+
+/*define for retry_count field*/
+#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_offset 5
+#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_mask 0x00000FFF
+#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_shift 16
+#define SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(x) (((x) & SDMA_PKT_POLL_REGMEM_DW5_retry_count_mask) << SDMA_PKT_POLL_REGMEM_DW5_retry_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_REG_WRITE_MEM packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_offset 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_shift 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cache_policy_offset 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cache_policy_shift 24
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cache_policy_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cpv_offset 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cpv_shift 28
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_CPV(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cpv_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cpv_shift)
+
+/*define for SRC_ADDR word*/
+/*define for addr_31_2 field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_offset 1
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_mask 0x3FFFFFFF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_shift 2
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_ADDR_31_2(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_offset 2
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_offset 3
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_DBIT_WRITE_MEM packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_shift)
+
+/*define for ea field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_mask 0x00000003
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_shift 16
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_EA(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cache_policy_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cache_policy_shift 24
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cache_policy_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cpv_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cpv_shift 28
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_CPV(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cpv_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cpv_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift)
+
+/*define for START_PAGE word*/
+/*define for addr_31_4 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_offset 3
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_mask 0x0FFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_shift 4
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_ADDR_31_4(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_shift)
+
+/*define for PAGE_NUM word*/
+/*define for page_num_31_0 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_offset 4
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_PAGE_NUM_31_0(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_MEM_VERIFY packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_OP(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cache_policy_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cache_policy_shift 24
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_cache_policy_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cpv_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cpv_shift 28
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_CPV(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_cpv_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_cpv_shift)
+
+/*define for mode field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_mask 0x00000001
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_shift 31
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_MODE(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_shift)
+
+/*define for PATTERN word*/
+/*define for pattern field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_offset 1
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_PATTERN(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_mask) << SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_shift)
+
+/*define for CMP0_ADDR_START_LO word*/
+/*define for cmp0_start_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_offset 2
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_CMP0_START_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_shift)
+
+/*define for CMP0_ADDR_START_HI word*/
+/*define for cmp0_start_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_offset 3
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_CMP0_START_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_shift)
+
+/*define for CMP0_ADDR_END_LO word*/
+/*define for cmp0_end_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp0_end_31_0_offset 4
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp0_end_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp0_end_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_CMP0_END_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp0_end_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp0_end_31_0_shift)
+
+/*define for CMP0_ADDR_END_HI word*/
+/*define for cmp0_end_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp0_end_63_32_offset 5
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp0_end_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp0_end_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_CMP0_END_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp0_end_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp0_end_63_32_shift)
+
+/*define for CMP1_ADDR_START_LO word*/
+/*define for cmp1_start_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_offset 6
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_CMP1_START_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_shift)
+
+/*define for CMP1_ADDR_START_HI word*/
+/*define for cmp1_start_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_offset 7
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_CMP1_START_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_shift)
+
+/*define for CMP1_ADDR_END_LO word*/
+/*define for cmp1_end_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_offset 8
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_CMP1_END_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_shift)
+
+/*define for CMP1_ADDR_END_HI word*/
+/*define for cmp1_end_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_offset 9
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_CMP1_END_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_shift)
+
+/*define for REC_ADDR_LO word*/
+/*define for rec_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_offset 10
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_REC_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_shift)
+
+/*define for REC_ADDR_HI word*/
+/*define for rec_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_offset 11
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_REC_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_shift)
+
+/*define for RESERVED word*/
+/*define for reserved field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_offset 12
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_RESERVED(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_mask) << SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_shift)
+
+
+/*
+** Definitions for SDMA_PKT_ATOMIC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_ATOMIC_HEADER_op_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_ATOMIC_HEADER_op_shift 0
+#define SDMA_PKT_ATOMIC_HEADER_OP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_op_mask) << SDMA_PKT_ATOMIC_HEADER_op_shift)
+
+/*define for loop field*/
+#define SDMA_PKT_ATOMIC_HEADER_loop_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_loop_mask 0x00000001
+#define SDMA_PKT_ATOMIC_HEADER_loop_shift 16
+#define SDMA_PKT_ATOMIC_HEADER_LOOP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_loop_mask) << SDMA_PKT_ATOMIC_HEADER_loop_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_ATOMIC_HEADER_tmz_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_ATOMIC_HEADER_tmz_shift 18
+#define SDMA_PKT_ATOMIC_HEADER_TMZ(x) (((x) & SDMA_PKT_ATOMIC_HEADER_tmz_mask) << SDMA_PKT_ATOMIC_HEADER_tmz_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_ATOMIC_HEADER_cache_policy_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_ATOMIC_HEADER_cache_policy_shift 20
+#define SDMA_PKT_ATOMIC_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_ATOMIC_HEADER_cache_policy_mask) << SDMA_PKT_ATOMIC_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_ATOMIC_HEADER_cpv_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_ATOMIC_HEADER_cpv_shift 24
+#define SDMA_PKT_ATOMIC_HEADER_CPV(x) (((x) & SDMA_PKT_ATOMIC_HEADER_cpv_mask) << SDMA_PKT_ATOMIC_HEADER_cpv_shift)
+
+/*define for atomic_op field*/
+#define SDMA_PKT_ATOMIC_HEADER_atomic_op_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_atomic_op_mask 0x0000007F
+#define SDMA_PKT_ATOMIC_HEADER_atomic_op_shift 25
+#define SDMA_PKT_ATOMIC_HEADER_ATOMIC_OP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_atomic_op_mask) << SDMA_PKT_ATOMIC_HEADER_atomic_op_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_ATOMIC_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_mask) << SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_ATOMIC_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_mask) << SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_shift)
+
+/*define for SRC_DATA_LO word*/
+/*define for src_data_31_0 field*/
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_offset 3
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_shift 0
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_SRC_DATA_31_0(x) (((x) & SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_mask) << SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_shift)
+
+/*define for SRC_DATA_HI word*/
+/*define for src_data_63_32 field*/
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_offset 4
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_shift 0
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_SRC_DATA_63_32(x) (((x) & SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_mask) << SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_shift)
+
+/*define for CMP_DATA_LO word*/
+/*define for cmp_data_31_0 field*/
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_offset 5
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_shift 0
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_CMP_DATA_31_0(x) (((x) & SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_mask) << SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_shift)
+
+/*define for CMP_DATA_HI word*/
+/*define for cmp_data_63_32 field*/
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_offset 6
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_shift 0
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_CMP_DATA_63_32(x) (((x) & SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_mask) << SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_shift)
+
+/*define for LOOP_INTERVAL word*/
+/*define for loop_interval field*/
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_offset 7
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_mask 0x00001FFF
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_shift 0
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_LOOP_INTERVAL(x) (((x) & SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_mask) << SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TIMESTAMP_SET packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_offset 0
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_shift 0
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_SET_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_SET_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_offset 0
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_shift 8
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_shift)
+
+/*define for INIT_DATA_LO word*/
+/*define for init_data_31_0 field*/
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_offset 1
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_shift 0
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_INIT_DATA_31_0(x) (((x) & SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_mask) << SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_shift)
+
+/*define for INIT_DATA_HI word*/
+/*define for init_data_63_32 field*/
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_offset 2
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_shift 0
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_INIT_DATA_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_mask) << SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TIMESTAMP_GET packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_shift 8
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_shift)
+
+/*define for l2_policy field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_l2_policy_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_l2_policy_mask 0x00000003
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_l2_policy_shift 24
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_l2_policy_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_l2_policy_shift)
+
+/*define for llc_policy field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_llc_policy_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_llc_policy_mask 0x00000001
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_llc_policy_shift 26
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_LLC_POLICY(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_llc_policy_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_llc_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_cpv_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_cpv_shift 28
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_CPV(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_cpv_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_cpv_shift)
+
+/*define for WRITE_ADDR_LO word*/
+/*define for write_addr_31_3 field*/
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_offset 1
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_mask 0x1FFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_shift 3
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_WRITE_ADDR_31_3(x) (((x) & SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_mask) << SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_shift)
+
+/*define for WRITE_ADDR_HI word*/
+/*define for write_addr_63_32 field*/
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_offset 2
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_WRITE_ADDR_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_mask) << SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TIMESTAMP_GET_GLOBAL packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_shift 8
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_shift)
+
+/*define for l2_policy field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_l2_policy_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_l2_policy_mask 0x00000003
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_l2_policy_shift 24
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_l2_policy_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_l2_policy_shift)
+
+/*define for llc_policy field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_llc_policy_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_llc_policy_mask 0x00000001
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_llc_policy_shift 26
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_LLC_POLICY(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_llc_policy_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_llc_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_cpv_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_cpv_shift 28
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_CPV(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_cpv_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_cpv_shift)
+
+/*define for WRITE_ADDR_LO word*/
+/*define for write_addr_31_3 field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_offset 1
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_mask 0x1FFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_shift 3
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_WRITE_ADDR_31_3(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_shift)
+
+/*define for WRITE_ADDR_HI word*/
+/*define for write_addr_63_32 field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_offset 2
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_WRITE_ADDR_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TRAP packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TRAP_HEADER_op_offset 0
+#define SDMA_PKT_TRAP_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TRAP_HEADER_op_shift 0
+#define SDMA_PKT_TRAP_HEADER_OP(x) (((x) & SDMA_PKT_TRAP_HEADER_op_mask) << SDMA_PKT_TRAP_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TRAP_HEADER_sub_op_offset 0
+#define SDMA_PKT_TRAP_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TRAP_HEADER_sub_op_shift 8
+#define SDMA_PKT_TRAP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TRAP_HEADER_sub_op_mask) << SDMA_PKT_TRAP_HEADER_sub_op_shift)
+
+/*define for INT_CONTEXT word*/
+/*define for int_context field*/
+#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_offset 1
+#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_mask 0x0FFFFFFF
+#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_shift 0
+#define SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(x) (((x) & SDMA_PKT_TRAP_INT_CONTEXT_int_context_mask) << SDMA_PKT_TRAP_INT_CONTEXT_int_context_shift)
+
+
+/*
+** Definitions for SDMA_PKT_DUMMY_TRAP packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_DUMMY_TRAP_HEADER_op_offset 0
+#define SDMA_PKT_DUMMY_TRAP_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_DUMMY_TRAP_HEADER_op_shift 0
+#define SDMA_PKT_DUMMY_TRAP_HEADER_OP(x) (((x) & SDMA_PKT_DUMMY_TRAP_HEADER_op_mask) << SDMA_PKT_DUMMY_TRAP_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_offset 0
+#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_shift 8
+#define SDMA_PKT_DUMMY_TRAP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_mask) << SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_shift)
+
+/*define for INT_CONTEXT word*/
+/*define for int_context field*/
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_offset 1
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_mask 0x0FFFFFFF
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_shift 0
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_INT_CONTEXT(x) (((x) & SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_mask) << SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_shift)
+
+
+/*
+** Definitions for SDMA_PKT_GPUVM_INV packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_GPUVM_INV_HEADER_op_offset 0
+#define SDMA_PKT_GPUVM_INV_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_GPUVM_INV_HEADER_op_shift 0
+#define SDMA_PKT_GPUVM_INV_HEADER_OP(x) (((x) & SDMA_PKT_GPUVM_INV_HEADER_op_mask) << SDMA_PKT_GPUVM_INV_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_GPUVM_INV_HEADER_sub_op_offset 0
+#define SDMA_PKT_GPUVM_INV_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_GPUVM_INV_HEADER_sub_op_shift 8
+#define SDMA_PKT_GPUVM_INV_HEADER_SUB_OP(x) (((x) & SDMA_PKT_GPUVM_INV_HEADER_sub_op_mask) << SDMA_PKT_GPUVM_INV_HEADER_sub_op_shift)
+
+/*define for PAYLOAD1 word*/
+/*define for per_vmid_inv_req field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_mask 0x0000FFFF
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_shift 0
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_PER_VMID_INV_REQ(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_shift)
+
+/*define for flush_type field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_mask 0x00000007
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_shift 16
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_FLUSH_TYPE(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_shift)
+
+/*define for l2_ptes field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_shift 19
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PTES(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_shift)
+
+/*define for l2_pde0 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_shift 20
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PDE0(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_shift)
+
+/*define for l2_pde1 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_shift 21
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PDE1(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_shift)
+
+/*define for l2_pde2 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_shift 22
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PDE2(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_shift)
+
+/*define for l1_ptes field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_shift 23
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L1_PTES(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_shift)
+
+/*define for clr_protection_fault_status_addr field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_shift 24
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_CLR_PROTECTION_FAULT_STATUS_ADDR(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_shift)
+
+/*define for log_request field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_shift 25
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_LOG_REQUEST(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_shift)
+
+/*define for four_kilobytes field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_shift 26
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_FOUR_KILOBYTES(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_shift)
+
+/*define for PAYLOAD2 word*/
+/*define for s field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_s_offset 2
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_s_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_s_shift 0
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_S(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD2_s_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD2_s_shift)
+
+/*define for page_va_42_12 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_offset 2
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_mask 0x7FFFFFFF
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_shift 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_PAGE_VA_42_12(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_shift)
+
+/*define for PAYLOAD3 word*/
+/*define for page_va_47_43 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_offset 3
+#define SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_mask 0x0000003F
+#define SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_shift 0
+#define SDMA_PKT_GPUVM_INV_PAYLOAD3_PAGE_VA_47_43(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_shift)
+
+
+/*
+** Definitions for SDMA_PKT_GCR_REQ packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_GCR_REQ_HEADER_op_offset 0
+#define SDMA_PKT_GCR_REQ_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_GCR_REQ_HEADER_op_shift 0
+#define SDMA_PKT_GCR_REQ_HEADER_OP(x) (((x) & SDMA_PKT_GCR_REQ_HEADER_op_mask) << SDMA_PKT_GCR_REQ_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_GCR_REQ_HEADER_sub_op_offset 0
+#define SDMA_PKT_GCR_REQ_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_GCR_REQ_HEADER_sub_op_shift 8
+#define SDMA_PKT_GCR_REQ_HEADER_SUB_OP(x) (((x) & SDMA_PKT_GCR_REQ_HEADER_sub_op_mask) << SDMA_PKT_GCR_REQ_HEADER_sub_op_shift)
+
+/*define for PAYLOAD1 word*/
+/*define for base_va_31_7 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_offset 1
+#define SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_mask 0x01FFFFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_shift 7
+#define SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_mask) << SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_shift)
+
+/*define for PAYLOAD2 word*/
+/*define for base_va_47_32 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_offset 2
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_mask 0x0000FFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_shift 0
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_mask) << SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_shift)
+
+/*define for gcr_control_15_0 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_offset 2
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_mask 0x0000FFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_shift 16
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_mask) << SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_shift)
+
+/*define for PAYLOAD3 word*/
+/*define for gcr_control_18_16 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_offset 3
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_mask 0x00000007
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_shift 0
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_mask) << SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_shift)
+
+/*define for limit_va_31_7 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_offset 3
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_mask 0x01FFFFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_shift 7
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_mask) << SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_shift)
+
+/*define for PAYLOAD4 word*/
+/*define for limit_va_47_32 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_offset 4
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_mask 0x0000FFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_shift 0
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_mask) << SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_shift)
+
+/*define for vmid field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_offset 4
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_mask 0x0000000F
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_shift 24
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_mask) << SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_shift)
+
+
+/*
+** Definitions for SDMA_PKT_NOP packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_NOP_HEADER_op_offset 0
+#define SDMA_PKT_NOP_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_NOP_HEADER_op_shift 0
+#define SDMA_PKT_NOP_HEADER_OP(x) (((x) & SDMA_PKT_NOP_HEADER_op_mask) << SDMA_PKT_NOP_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_NOP_HEADER_sub_op_offset 0
+#define SDMA_PKT_NOP_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_NOP_HEADER_sub_op_shift 8
+#define SDMA_PKT_NOP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_NOP_HEADER_sub_op_mask) << SDMA_PKT_NOP_HEADER_sub_op_shift)
+
+/*define for count field*/
+#define SDMA_PKT_NOP_HEADER_count_offset 0
+#define SDMA_PKT_NOP_HEADER_count_mask 0x00003FFF
+#define SDMA_PKT_NOP_HEADER_count_shift 16
+#define SDMA_PKT_NOP_HEADER_COUNT(x) (((x) & SDMA_PKT_NOP_HEADER_count_mask) << SDMA_PKT_NOP_HEADER_count_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_NOP_DATA0_data0_offset 1
+#define SDMA_PKT_NOP_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_NOP_DATA0_data0_shift 0
+#define SDMA_PKT_NOP_DATA0_DATA0(x) (((x) & SDMA_PKT_NOP_DATA0_data0_mask) << SDMA_PKT_NOP_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_AQL_PKT_HEADER packet
+*/
+
+/*define for HEADER word*/
+/*define for format field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_format_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_format_mask 0x000000FF
+#define SDMA_AQL_PKT_HEADER_HEADER_format_shift 0
+#define SDMA_AQL_PKT_HEADER_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_format_mask) << SDMA_AQL_PKT_HEADER_HEADER_format_shift)
+
+/*define for barrier field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_barrier_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_barrier_mask 0x00000001
+#define SDMA_AQL_PKT_HEADER_HEADER_barrier_shift 8
+#define SDMA_AQL_PKT_HEADER_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_barrier_mask) << SDMA_AQL_PKT_HEADER_HEADER_barrier_shift)
+
+/*define for acquire_fence_scope field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_shift 9
+#define SDMA_AQL_PKT_HEADER_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_shift)
+
+/*define for release_fence_scope field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_shift 11
+#define SDMA_AQL_PKT_HEADER_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_shift)
+
+/*define for reserved field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_reserved_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_reserved_mask 0x00000007
+#define SDMA_AQL_PKT_HEADER_HEADER_reserved_shift 13
+#define SDMA_AQL_PKT_HEADER_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_reserved_mask) << SDMA_AQL_PKT_HEADER_HEADER_reserved_shift)
+
+/*define for op field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_op_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_op_mask 0x0000000F
+#define SDMA_AQL_PKT_HEADER_HEADER_op_shift 16
+#define SDMA_AQL_PKT_HEADER_HEADER_OP(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_op_mask) << SDMA_AQL_PKT_HEADER_HEADER_op_shift)
+
+/*define for subop field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_subop_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_subop_mask 0x00000007
+#define SDMA_AQL_PKT_HEADER_HEADER_subop_shift 20
+#define SDMA_AQL_PKT_HEADER_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_subop_mask) << SDMA_AQL_PKT_HEADER_HEADER_subop_shift)
+
+/*define for cpv field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_cpv_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_cpv_mask 0x00000001
+#define SDMA_AQL_PKT_HEADER_HEADER_cpv_shift 28
+#define SDMA_AQL_PKT_HEADER_HEADER_CPV(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_cpv_mask) << SDMA_AQL_PKT_HEADER_HEADER_cpv_shift)
+
+
+/*
+** Definitions for SDMA_AQL_PKT_COPY_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for format field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_mask 0x000000FF
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_shift)
+
+/*define for barrier field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_mask 0x00000001
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_shift 8
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_shift)
+
+/*define for acquire_fence_scope field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_shift 9
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_shift)
+
+/*define for release_fence_scope field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_shift 11
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_shift)
+
+/*define for reserved field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_mask 0x00000007
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_shift 13
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_shift)
+
+/*define for op field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_mask 0x0000000F
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_shift 16
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_OP(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_shift)
+
+/*define for subop field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_mask 0x00000007
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_shift 20
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_shift)
+
+/*define for cpv field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_cpv_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_cpv_mask 0x00000001
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_cpv_shift 28
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_CPV(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_cpv_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_cpv_shift)
+
+/*define for RESERVED_DW1 word*/
+/*define for reserved_dw1 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_offset 1
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_RESERVED_DW1(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_shift)
+
+/*define for RETURN_ADDR_LO word*/
+/*define for return_addr_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_offset 2
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_RETURN_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_shift)
+
+/*define for RETURN_ADDR_HI word*/
+/*define for return_addr_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_offset 3
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_RETURN_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_offset 4
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_mask 0x003FFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_COUNT(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_mask) << SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_sw field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_offset 5
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift 16
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift)
+
+/*define for dst_cache_policy field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_offset 5
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_mask 0x00000007
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_shift 18
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_DST_CACHE_POLICY(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_shift)
+
+/*define for src_sw field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_offset 5
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for src_cache_policy field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_offset 5
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_mask 0x00000007
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_shift 26
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_SRC_CACHE_POLICY(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 6
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 7
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 8
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 9
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for RESERVED_DW10 word*/
+/*define for reserved_dw10 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_offset 10
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_RESERVED_DW10(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_shift)
+
+/*define for RESERVED_DW11 word*/
+/*define for reserved_dw11 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_offset 11
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_RESERVED_DW11(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_shift)
+
+/*define for RESERVED_DW12 word*/
+/*define for reserved_dw12 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_offset 12
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_RESERVED_DW12(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_shift)
+
+/*define for RESERVED_DW13 word*/
+/*define for reserved_dw13 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_offset 13
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_RESERVED_DW13(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_shift)
+
+/*define for COMPLETION_SIGNAL_LO word*/
+/*define for completion_signal_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_offset 14
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_COMPLETION_SIGNAL_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift)
+
+/*define for COMPLETION_SIGNAL_HI word*/
+/*define for completion_signal_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_offset 15
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift)
+
+
+/*
+** Definitions for SDMA_AQL_PKT_BARRIER_OR packet
+*/
+
+/*define for HEADER word*/
+/*define for format field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_mask 0x000000FF
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_format_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_format_shift)
+
+/*define for barrier field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_mask 0x00000001
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_shift 8
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_shift)
+
+/*define for acquire_fence_scope field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_shift 9
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_shift)
+
+/*define for release_fence_scope field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_shift 11
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_shift)
+
+/*define for reserved field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_shift 13
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_shift)
+
+/*define for op field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_mask 0x0000000F
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_shift 16
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_OP(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_op_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_op_shift)
+
+/*define for subop field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_shift 20
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_shift)
+
+/*define for cpv field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_cpv_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_cpv_mask 0x00000001
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_cpv_shift 28
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_CPV(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_cpv_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_cpv_shift)
+
+/*define for RESERVED_DW1 word*/
+/*define for reserved_dw1 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_offset 1
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_RESERVED_DW1(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_shift)
+
+/*define for DEPENDENT_ADDR_0_LO word*/
+/*define for dependent_addr_0_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_offset 2
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_DEPENDENT_ADDR_0_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_shift)
+
+/*define for DEPENDENT_ADDR_0_HI word*/
+/*define for dependent_addr_0_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_offset 3
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_DEPENDENT_ADDR_0_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_shift)
+
+/*define for DEPENDENT_ADDR_1_LO word*/
+/*define for dependent_addr_1_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_offset 4
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_DEPENDENT_ADDR_1_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_shift)
+
+/*define for DEPENDENT_ADDR_1_HI word*/
+/*define for dependent_addr_1_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_offset 5
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_DEPENDENT_ADDR_1_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_shift)
+
+/*define for DEPENDENT_ADDR_2_LO word*/
+/*define for dependent_addr_2_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_offset 6
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_DEPENDENT_ADDR_2_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_shift)
+
+/*define for DEPENDENT_ADDR_2_HI word*/
+/*define for dependent_addr_2_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_offset 7
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_DEPENDENT_ADDR_2_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_shift)
+
+/*define for DEPENDENT_ADDR_3_LO word*/
+/*define for dependent_addr_3_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_offset 8
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_DEPENDENT_ADDR_3_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_shift)
+
+/*define for DEPENDENT_ADDR_3_HI word*/
+/*define for dependent_addr_3_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_offset 9
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_DEPENDENT_ADDR_3_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_shift)
+
+/*define for DEPENDENT_ADDR_4_LO word*/
+/*define for dependent_addr_4_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_offset 10
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_DEPENDENT_ADDR_4_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_shift)
+
+/*define for DEPENDENT_ADDR_4_HI word*/
+/*define for dependent_addr_4_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_offset 11
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_DEPENDENT_ADDR_4_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_shift)
+
+/*define for CACHE_POLICY word*/
+/*define for cache_policy0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy0_offset 12
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy0_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_CACHE_POLICY0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy0_mask) << SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy0_shift)
+
+/*define for cache_policy1 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy1_offset 12
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy1_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy1_shift 5
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_CACHE_POLICY1(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy1_mask) << SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy1_shift)
+
+/*define for cache_policy2 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy2_offset 12
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy2_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy2_shift 10
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_CACHE_POLICY2(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy2_mask) << SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy2_shift)
+
+/*define for cache_policy3 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy3_offset 12
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy3_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy3_shift 15
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_CACHE_POLICY3(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy3_mask) << SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy3_shift)
+
+/*define for cache_policy4 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy4_offset 12
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy4_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy4_shift 20
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_CACHE_POLICY4(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy4_mask) << SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy4_shift)
+
+/*define for RESERVED_DW13 word*/
+/*define for reserved_dw13 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_offset 13
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_RESERVED_DW13(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_shift)
+
+/*define for COMPLETION_SIGNAL_LO word*/
+/*define for completion_signal_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_offset 14
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_COMPLETION_SIGNAL_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift)
+
+/*define for COMPLETION_SIGNAL_HI word*/
+/*define for completion_signal_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_offset 15
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift)
+
+
+#endif /* __SDMA_V6_0_0_PKT_OPEN_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
new file mode 100644
index 000000000000..2b81344dcd66
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
@@ -0,0 +1,1859 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_trace.h"
+
+#include "gc/gc_12_0_0_offset.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "hdp/hdp_6_0_0_offset.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_12_0_0.h"
+
+#include "soc15_common.h"
+#include "soc15.h"
+#include "sdma_v6_0_0_pkt_open.h"
+#include "nbio_v4_3.h"
+#include "sdma_common.h"
+#include "sdma_v7_0.h"
+#include "v12_structs.h"
+#include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
+
+MODULE_FIRMWARE("amdgpu/sdma_7_0_0.bin");
+MODULE_FIRMWARE("amdgpu/sdma_7_0_1.bin");
+
+#define SDMA1_REG_OFFSET 0x600
+#define SDMA0_HYP_DEC_REG_START 0x5880
+#define SDMA0_HYP_DEC_REG_END 0x589a
+#define SDMA1_HYP_DEC_REG_OFFSET 0x20
+
+/*define for compression field for sdma7*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_compress_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_compress_mask 0x00000001
+#define SDMA_PKT_CONSTANT_FILL_HEADER_compress_shift 16
+#define SDMA_PKT_CONSTANT_FILL_HEADER_COMPRESS(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_compress_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_compress_shift)
+
+static const struct amdgpu_hwip_reg_entry sdma_reg_list_7_0[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS1_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS2_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS3_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS4_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS5_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS6_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UCODE_REV),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_INT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_VM_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_CHICKEN_BITS),
+};
+
+static void sdma_v7_0_set_ring_funcs(struct amdgpu_device *adev);
+static void sdma_v7_0_set_buffer_funcs(struct amdgpu_device *adev);
+static void sdma_v7_0_set_vm_pte_funcs(struct amdgpu_device *adev);
+static void sdma_v7_0_set_irq_funcs(struct amdgpu_device *adev);
+static int sdma_v7_0_start(struct amdgpu_device *adev);
+
+static u32 sdma_v7_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
+{
+ u32 base;
+
+ if (internal_offset >= SDMA0_HYP_DEC_REG_START &&
+ internal_offset <= SDMA0_HYP_DEC_REG_END) {
+ base = adev->reg_offset[GC_HWIP][0][1];
+ if (instance != 0)
+ internal_offset += SDMA1_HYP_DEC_REG_OFFSET * instance;
+ } else {
+ base = adev->reg_offset[GC_HWIP][0][0];
+ if (instance == 1)
+ internal_offset += SDMA1_REG_OFFSET;
+ }
+
+ return base + internal_offset;
+}
+
+static unsigned sdma_v7_0_ring_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
+{
+ unsigned ret;
+
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COND_EXE));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, 1);
+ /* this is the offset we need patch later */
+ ret = ring->wptr & ring->buf_mask;
+ /* insert dummy here and patch it later */
+ amdgpu_ring_write(ring, 0);
+
+ return ret;
+}
+
+/**
+ * sdma_v7_0_ring_get_rptr - get the current read pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current rptr from the hardware.
+ */
+static uint64_t sdma_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ u64 *rptr;
+
+ /* XXX check if swapping is necessary on BE */
+ rptr = (u64 *)ring->rptr_cpu_addr;
+
+ DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
+ return ((*rptr) >> 2);
+}
+
+/**
+ * sdma_v7_0_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware.
+ */
+static uint64_t sdma_v7_0_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ u64 wptr = 0;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
+ DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
+ }
+
+ return wptr >> 2;
+}
+
+/**
+ * sdma_v7_0_ring_set_wptr - commit the write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Write the wptr back to the hardware.
+ */
+static void sdma_v7_0_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ DRM_DEBUG("Setting write pointer\n");
+
+ if (ring->use_doorbell) {
+ DRM_DEBUG("Using doorbell -- "
+ "wptr_offs == 0x%08x "
+ "lower_32_bits(ring->wptr) << 2 == 0x%08x "
+ "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr << 2);
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ } else {
+ DRM_DEBUG("Not using doorbell -- "
+ "regSDMA%i_GFX_RB_WPTR == 0x%08x "
+ "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
+ ring->me,
+ lower_32_bits(ring->wptr << 2),
+ ring->me,
+ upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev,
+ ring->me,
+ regSDMA0_QUEUE0_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev,
+ ring->me,
+ regSDMA0_QUEUE0_RB_WPTR_HI),
+ upper_32_bits(ring->wptr << 2));
+ }
+}
+
+static void sdma_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ int i;
+
+ for (i = 0; i < count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ amdgpu_ring_write(ring, ring->funcs->nop |
+ SDMA_PKT_NOP_HEADER_COUNT(count - 1));
+ else
+ amdgpu_ring_write(ring, ring->funcs->nop);
+}
+
+/**
+ * sdma_v7_0_ring_emit_ib - Schedule an IB on the DMA engine
+ *
+ * @ring: amdgpu ring pointer
+ * @job: job to retrieve vmid from
+ * @ib: IB object to schedule
+ * @flags: unused
+ *
+ * Schedule an IB in the DMA ring.
+ */
+static void sdma_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
+
+ /* An IB packet must end on a 8 DW boundary--the next dword
+ * must be on a 8-dword boundary. Our IB packet below is 6
+ * dwords long, thus add x number of NOPs, such that, in
+ * modular arithmetic,
+ * wptr + 6 + x = 8k, k >= 0, which in C is,
+ * (wptr + 6 + x) % 8 = 0.
+ * The expression below, is a solution of x.
+ */
+ sdma_v7_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
+
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_INDIRECT) |
+ SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
+ /* base must be 32 byte aligned */
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+ amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
+ amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
+}
+
+/**
+ * sdma_v7_0_ring_emit_mem_sync - flush the IB by graphics cache rinse
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * flush the IB by graphics cache rinse.
+ */
+static void sdma_v7_0_ring_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ uint32_t gcr_cntl = SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV |
+ SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV |
+ SDMA_GCR_GLI_INV(1);
+
+ /* flush entire cache L0/L1/L2, this can be optimized by performance requirement */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_GCR_REQ));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(gcr_cntl) |
+ SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(0));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(0) |
+ SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(gcr_cntl >> 16));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(0) |
+ SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(0));
+}
+
+
+/**
+ * sdma_v7_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Emit an hdp flush packet on the requested DMA ring.
+ */
+static void sdma_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 ref_and_mask = 0;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
+
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
+ amdgpu_ring_write(ring, ref_and_mask); /* reference */
+ amdgpu_ring_write(ring, ref_and_mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+}
+
+/**
+ * sdma_v7_0_ring_emit_fence - emit a fence on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ * @addr: address
+ * @seq: fence seq number
+ * @flags: fence flags
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed.
+ */
+static void sdma_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ /* write the fence */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
+ SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ /* optionally write high bits as well */
+ if (write64bit) {
+ addr += 4;
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
+ SDMA_PKT_FENCE_HEADER_MTYPE(0x3));
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ }
+
+ if (flags & AMDGPU_FENCE_FLAG_INT) {
+ /* generate an interrupt */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP));
+ amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
+ }
+}
+
+/**
+ * sdma_v7_0_gfx_stop - stop the gfx async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the gfx async dma ring buffers.
+ */
+static void sdma_v7_0_gfx_stop(struct amdgpu_device *adev)
+{
+ u32 rb_cntl, ib_cntl;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 0);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 0);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl);
+ }
+}
+
+/**
+ * sdma_v7_0_rlc_stop - stop the compute async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the compute async dma queues.
+ */
+static void sdma_v7_0_rlc_stop(struct amdgpu_device *adev)
+{
+ /* XXX todo */
+}
+
+/**
+ * sdma_v7_0_ctx_switch_enable - stop the async dma engines context switch
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs context switch.
+ *
+ * Halt or unhalt the async dma engines context switch.
+ */
+static void sdma_v7_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
+{
+}
+
+/**
+ * sdma_v7_0_enable - stop the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs.
+ *
+ * Halt or unhalt the async dma engines.
+ */
+static void sdma_v7_0_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 mcu_cntl;
+ int i;
+
+ if (!enable) {
+ sdma_v7_0_gfx_stop(adev);
+ sdma_v7_0_rlc_stop(adev);
+ }
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ mcu_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL));
+ mcu_cntl = REG_SET_FIELD(mcu_cntl, SDMA0_MCU_CNTL, HALT, enable ? 0 : 1);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL), mcu_cntl);
+ }
+}
+
+/**
+ * sdma_v7_0_gfx_resume_instance - start/restart a certain sdma engine
+ *
+ * @adev: amdgpu_device pointer
+ * @i: instance
+ * @restore: used to restore wptr when restart
+ *
+ * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr.
+ * Return 0 for success.
+ */
+static int sdma_v7_0_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore)
+{
+ struct amdgpu_ring *ring;
+ u32 rb_cntl, ib_cntl;
+ u32 rb_bufsz;
+ u32 doorbell;
+ u32 doorbell_offset;
+ u32 temp;
+ u64 wptr_gpu_addr;
+ int r;
+
+ ring = &adev->sdma.instance[i].ring;
+
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz);
+#ifdef __BIG_ENDIAN
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
+#endif
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_PRIV, 1);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ if (restore) {
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ } else {
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), 0);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), 0);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), 0);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), 0);
+ }
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_LO),
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_HI),
+ upper_32_bits(wptr_gpu_addr));
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_HI),
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_LO),
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+ if (amdgpu_sriov_vf(adev))
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 1);
+ else
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, MCU_WPTR_POLL_ENABLE, 1);
+
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40);
+
+ if (!restore)
+ ring->wptr = 0;
+
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 1);
+
+ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
+ }
+
+ doorbell = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL));
+ doorbell_offset = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET));
+
+ if (ring->use_doorbell) {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 1);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_QUEUE0_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ } else {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 0);
+ }
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL), doorbell);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET), doorbell_offset);
+
+ if (i == 0)
+ adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+ ring->doorbell_index,
+ adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances);
+
+ if (amdgpu_sriov_vf(adev))
+ sdma_v7_0_ring_set_wptr(ring);
+
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0);
+
+ /* Set up sdma hang watchdog */
+ temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL));
+ /* 100ms per unit */
+ temp = REG_SET_FIELD(temp, SDMA0_WATCHDOG_CNTL, QUEUE_HANG_COUNT,
+ max(adev->usec_timeout/100000, 1));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL), temp);
+
+ /* Set up RESP_MODE to non-copy addresses */
+ temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL), temp);
+
+ /* program default cache read and write policy */
+ temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE));
+ /* clean read policy and write policy bits */
+ temp &= 0xFF0FFF;
+ temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
+ (CACHE_WRITE_POLICY_L2__DEFAULT << 14));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE), temp);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* unhalt engine */
+ temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_MCU_CNTL, HALT, 0);
+ temp = REG_SET_FIELD(temp, SDMA0_MCU_CNTL, RESET, 0);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL), temp);
+ }
+
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 1);
+#ifdef __BIG_ENDIAN
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1);
+#endif
+ /* enable DMA IBs */
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl);
+ ring->sched.ready = true;
+
+ if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
+ sdma_v7_0_ctx_switch_enable(adev, true);
+ sdma_v7_0_enable(adev, true);
+ }
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ ring->sched.ready = false;
+
+ return r;
+}
+
+/**
+ * sdma_v7_0_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v7_0_gfx_resume(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = sdma_v7_0_gfx_resume_instance(adev, i, false);
+ if (r)
+ return r;
+ }
+
+ return 0;
+
+}
+
+/**
+ * sdma_v7_0_rlc_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the compute DMA queues and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v7_0_rlc_resume(struct amdgpu_device *adev)
+{
+ return 0;
+}
+
+static void sdma_v12_0_free_ucode_buffer(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ amdgpu_bo_free_kernel(&adev->sdma.instance[i].sdma_fw_obj,
+ &adev->sdma.instance[i].sdma_fw_gpu_addr,
+ (void **)&adev->sdma.instance[i].sdma_fw_ptr);
+ }
+}
+
+/**
+ * sdma_v7_0_load_microcode - load the sDMA ME ucode
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Loads the sDMA0/1 ucode.
+ * Returns 0 for success, -EINVAL if the ucode is not available.
+ */
+static int sdma_v7_0_load_microcode(struct amdgpu_device *adev)
+{
+ const struct sdma_firmware_header_v3_0 *hdr;
+ const __le32 *fw_data;
+ u32 fw_size;
+ uint32_t tmp, sdma_status, ic_op_cntl;
+ int i, r, j;
+
+ /* halt the MEs */
+ sdma_v7_0_enable(adev, false);
+
+ if (!adev->sdma.instance[0].fw)
+ return -EINVAL;
+
+ hdr = (const struct sdma_firmware_header_v3_0 *)
+ adev->sdma.instance[0].fw->data;
+ amdgpu_ucode_print_sdma_hdr(&hdr->header);
+
+ fw_data = (const __le32 *)(adev->sdma.instance[0].fw->data +
+ le32_to_cpu(hdr->ucode_offset_bytes));
+ fw_size = le32_to_cpu(hdr->ucode_size_bytes);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = amdgpu_bo_create_reserved(adev, fw_size,
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->sdma.instance[i].sdma_fw_obj,
+ &adev->sdma.instance[i].sdma_fw_gpu_addr,
+ (void **)&adev->sdma.instance[i].sdma_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create sdma ucode bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->sdma.instance[i].sdma_fw_ptr, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->sdma.instance[i].sdma_fw_obj);
+ amdgpu_bo_unreserve(adev->sdma.instance[i].sdma_fw_obj);
+
+ tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_CNTL));
+ tmp = REG_SET_FIELD(tmp, SDMA0_IC_CNTL, GPA, 0);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_CNTL), tmp);
+
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_BASE_LO),
+ lower_32_bits(adev->sdma.instance[i].sdma_fw_gpu_addr));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_BASE_HI),
+ upper_32_bits(adev->sdma.instance[i].sdma_fw_gpu_addr));
+
+ tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_OP_CNTL));
+ tmp = REG_SET_FIELD(tmp, SDMA0_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_OP_CNTL), tmp);
+
+ /* Wait for sdma ucode init complete */
+ for (j = 0; j < adev->usec_timeout; j++) {
+ ic_op_cntl = RREG32_SOC15_IP(GC,
+ sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_OP_CNTL));
+ sdma_status = RREG32_SOC15_IP(GC,
+ sdma_v7_0_get_reg_offset(adev, i, regSDMA0_STATUS_REG));
+ if ((REG_GET_FIELD(ic_op_cntl, SDMA0_IC_OP_CNTL, ICACHE_PRIMED) == 1) &&
+ (REG_GET_FIELD(sdma_status, SDMA0_STATUS_REG, UCODE_INIT_DONE) == 1))
+ break;
+ udelay(1);
+ }
+
+ if (j >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to init sdma ucode\n");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int sdma_v7_0_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ u32 tmp;
+ int i;
+
+ sdma_v7_0_gfx_stop(adev);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ //tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_FREEZE));
+ //tmp |= SDMA0_FREEZE__FREEZE_MASK;
+ //WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_FREEZE), tmp);
+ tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL));
+ tmp |= SDMA0_MCU_CNTL__HALT_MASK;
+ tmp |= SDMA0_MCU_CNTL__RESET_MASK;
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL), tmp);
+
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_PREEMPT), 0);
+
+ udelay(100);
+
+ tmp = GRBM_SOFT_RESET__SOFT_RESET_SDMA0_MASK << i;
+ WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
+
+ udelay(100);
+
+ WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, 0);
+ tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
+
+ udelay(100);
+ }
+
+ return sdma_v7_0_start(adev);
+}
+
+static bool sdma_v7_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, r;
+ long tmo = msecs_to_jiffies(1000);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ r = amdgpu_ring_test_ib(ring, tmo);
+ if (r)
+ return true;
+ }
+
+ return false;
+}
+
+static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ if (ring->me >= adev->sdma.num_instances) {
+ dev_err(adev->dev, "sdma instance not found\n");
+ return -EINVAL;
+ }
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true);
+ if (r)
+ return r;
+
+ r = sdma_v7_0_gfx_resume_instance(adev, ring->me, true);
+ if (r)
+ return r;
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+/**
+ * sdma_v7_0_start - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the DMA engines and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v7_0_start(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (amdgpu_sriov_vf(adev)) {
+ sdma_v7_0_ctx_switch_enable(adev, false);
+ sdma_v7_0_enable(adev, false);
+
+ /* set RB registers */
+ r = sdma_v7_0_gfx_resume(adev);
+ return r;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ r = sdma_v7_0_load_microcode(adev);
+ if (r) {
+ sdma_v12_0_free_ucode_buffer(adev);
+ return r;
+ }
+
+ if (amdgpu_emu_mode == 1)
+ msleep(1000);
+ }
+
+ /* unhalt the MEs */
+ sdma_v7_0_enable(adev, true);
+ /* enable sdma ring preemption */
+ sdma_v7_0_ctx_switch_enable(adev, true);
+
+ /* start the gfx rings and rlc compute queues */
+ r = sdma_v7_0_gfx_resume(adev);
+ if (r)
+ return r;
+ r = sdma_v7_0_rlc_resume(adev);
+
+ return r;
+}
+
+static int sdma_v7_0_mqd_init(struct amdgpu_device *adev, void *mqd,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v12_sdma_mqd *m = mqd;
+ uint64_t wb_gpu_addr;
+
+ m->sdmax_rlcx_rb_cntl =
+ order_base_2(prop->queue_size / 4) << SDMA0_QUEUE0_RB_CNTL__RB_SIZE__SHIFT |
+ 1 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+ 4 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
+ 1 << SDMA0_QUEUE0_RB_CNTL__MCU_WPTR_POLL_ENABLE__SHIFT;
+
+ m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8);
+ m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8);
+
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr);
+ m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr);
+
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr);
+ m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr);
+
+ m->sdmax_rlcx_ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, 0,
+ regSDMA0_QUEUE0_IB_CNTL));
+
+ m->sdmax_rlcx_doorbell_offset =
+ prop->doorbell_index << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_QUEUE0_DOORBELL, ENABLE, 1);
+
+ m->sdmax_rlcx_doorbell_log = 0;
+ m->sdmax_rlcx_rb_aql_cntl = 0x4000; //regSDMA0_QUEUE0_RB_AQL_CNTL_DEFAULT;
+ m->sdmax_rlcx_dummy_reg = 0xf; //regSDMA0_QUEUE0_DUMMY_REG_DEFAULT;
+
+ m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr);
+ m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr);
+
+ m->sdmax_rlcx_mcu_dbg0 = lower_32_bits(prop->fence_address);
+ m->sdmax_rlcx_mcu_dbg1 = upper_32_bits(prop->fence_address);
+
+ return 0;
+}
+
+static void sdma_v7_0_set_mqd_funcs(struct amdgpu_device *adev)
+{
+ adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v12_sdma_mqd);
+ adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v7_0_mqd_init;
+}
+
+/**
+ * sdma_v7_0_ring_test_ring - simple async dma engine test
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Test the DMA engine by writing using it to write an
+ * value to memory.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned i;
+ unsigned index;
+ int r;
+ u32 tmp;
+ u64 gpu_addr;
+
+ tmp = 0xCAFEDEAD;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+ return r;
+ }
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
+ r = amdgpu_ring_alloc(ring, 5);
+ if (r) {
+ DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
+ amdgpu_device_wb_free(adev, index);
+ return r;
+ }
+
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
+ amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ break;
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ else
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ amdgpu_device_wb_free(adev, index);
+
+ return r;
+}
+
+/**
+ * sdma_v7_0_ring_test_ib - test an IB on the DMA engine
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Test a simple IB in the DMA ring.
+ * Returns 0 on success, error on failure.
+ */
+static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ unsigned index;
+ long r;
+ u32 tmp = 0;
+ u64 gpu_addr;
+
+ tmp = 0xCAFEDEAD;
+ memset(&ib, 0, sizeof(ib));
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
+ return r;
+ }
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
+ r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err0;
+ }
+
+ ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib.ptr[1] = lower_32_bits(gpu_addr);
+ ib.ptr[2] = upper_32_bits(gpu_addr);
+ ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.length_dw = 8;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err1;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ DRM_ERROR("amdgpu: IB test timed out\n");
+ r = -ETIMEDOUT;
+ goto err1;
+ } else if (r < 0) {
+ DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+ goto err1;
+ }
+
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+
+ if (tmp == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+
+err1:
+ amdgpu_ib_free(&ib, NULL);
+ dma_fence_put(f);
+err0:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+
+/**
+ * sdma_v7_0_vm_copy_pte - update PTEs by copying them from the GART
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @src: src addr to copy from
+ * @count: number of page entries to update
+ *
+ * Update PTEs by copying them from the GART using sDMA.
+ */
+static void sdma_v7_0_vm_copy_pte(struct amdgpu_ib *ib,
+ uint64_t pe, uint64_t src,
+ unsigned count)
+{
+ unsigned bytes = count * 8;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
+ SDMA_PKT_COPY_LINEAR_HEADER_CPV(1);
+
+ ib->ptr[ib->length_dw++] = bytes - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = 0;
+
+}
+
+/**
+ * sdma_v7_0_vm_write_pte - update PTEs by writing them manually
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @value: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ *
+ * Update PTEs by writing them manually using sDMA.
+ */
+static void sdma_v7_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t value, unsigned count,
+ uint32_t incr)
+{
+ unsigned ndw = count * 2;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = ndw - 1;
+ for (; ndw > 0; ndw -= 2) {
+ ib->ptr[ib->length_dw++] = lower_32_bits(value);
+ ib->ptr[ib->length_dw++] = upper_32_bits(value);
+ value += incr;
+ }
+}
+
+/**
+ * sdma_v7_0_vm_set_pte_pde - update the page tables using sDMA
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: access flags
+ *
+ * Update the page tables using sDMA.
+ */
+static void sdma_v7_0_vm_set_pte_pde(struct amdgpu_ib *ib,
+ uint64_t pe,
+ uint64_t addr, unsigned count,
+ uint32_t incr, uint64_t flags)
+{
+ /* for physically contiguous pages (vram) */
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_PTEPDE);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
+ ib->ptr[ib->length_dw++] = upper_32_bits(flags);
+ ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = incr; /* increment size */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
+}
+
+/**
+ * sdma_v7_0_ring_pad_ib - pad the IB
+ *
+ * @ring: amdgpu ring pointer
+ * @ib: indirect buffer to fill with padding
+ *
+ * Pad the IB with NOPs to a boundary multiple of 8.
+ */
+static void sdma_v7_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ u32 pad_count;
+ int i;
+
+ pad_count = (-ib->length_dw) & 0x7;
+ for (i = 0; i < pad_count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP) |
+ SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
+ else
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP);
+}
+
+/**
+ * sdma_v7_0_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void sdma_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ /* wait for idle */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+ SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ amdgpu_ring_write(ring, seq); /* reference */
+ amdgpu_ring_write(ring, 0xffffffff); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
+}
+
+/**
+ * sdma_v7_0_ring_emit_vm_flush - vm flush using sDMA
+ *
+ * @ring: amdgpu_ring pointer
+ * @vmid: vmid number to use
+ * @pd_addr: address
+ *
+ * Update the page table base and flush the VM TLB
+ * using sDMA.
+ */
+static void sdma_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+}
+
+static void sdma_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ /* SRBM WRITE command will not support on sdma v7.
+ * Use Register WRITE command instead, which OPCODE is same as SRBM WRITE
+ */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_SRBM_WRITE));
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, val);
+}
+
+static void sdma_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val); /* reference */
+ amdgpu_ring_write(ring, mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
+}
+
+static void sdma_v7_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ amdgpu_ring_emit_wreg(ring, reg0, ref);
+ /* wait for a cycle to reset vm_inv_eng*_ack */
+ amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
+ amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
+}
+
+static int sdma_v7_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ switch (amdgpu_user_queue) {
+ case -1:
+ case 0:
+ default:
+ adev->sdma.no_user_submission = false;
+ adev->sdma.disable_uq = true;
+ break;
+ case 1:
+ adev->sdma.no_user_submission = false;
+ adev->sdma.disable_uq = false;
+ break;
+ case 2:
+ adev->sdma.no_user_submission = true;
+ adev->sdma.disable_uq = false;
+ break;
+ }
+
+ r = amdgpu_sdma_init_microcode(adev, 0, true);
+ if (r) {
+ DRM_ERROR("Failed to init sdma firmware!\n");
+ return r;
+ }
+
+ sdma_v7_0_set_ring_funcs(adev);
+ sdma_v7_0_set_buffer_funcs(adev);
+ sdma_v7_0_set_vm_pte_funcs(adev);
+ sdma_v7_0_set_irq_funcs(adev);
+ sdma_v7_0_set_mqd_funcs(adev);
+
+ return 0;
+}
+
+static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_ring *ring;
+ int r, i;
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_0);
+ uint32_t *ptr;
+
+ /* SDMA trap event */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
+ GFX_12_0_0__SRCID__SDMA_TRAP,
+ &adev->sdma.trap_irq);
+ if (r)
+ return r;
+
+ /* SDMA user fence event */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
+ GFX_12_0_0__SRCID__SDMA_FENCE,
+ &adev->sdma.fence_irq);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->me = i;
+ ring->no_user_submission = adev->sdma.no_user_submission;
+
+ DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
+ ring->use_doorbell?"true":"false");
+
+ ring->doorbell_index =
+ (adev->doorbell_index.sdma_engine[i] << 1); // get DWORD offset
+
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ sprintf(ring->name, "sdma%d", i);
+ r = amdgpu_ring_init(adev, ring, 1024,
+ &adev->sdma.trap_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ }
+
+ adev->sdma.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+
+ r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+ /* Allocate memory for SDMA IP Dump buffer */
+ ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (ptr)
+ adev->sdma.ip_dump = ptr;
+ else
+ DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
+
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(7, 0, 0):
+ case IP_VERSION(7, 0, 1):
+ if ((adev->sdma.instance[0].fw_version >= 7966358) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int sdma_v7_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+
+ amdgpu_sdma_sysfs_reset_mask_fini(adev);
+ amdgpu_sdma_destroy_inst_ctx(adev, true);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)
+ sdma_v12_0_free_ucode_buffer(adev);
+
+ kfree(adev->sdma.ip_dump);
+
+ return 0;
+}
+
+static int sdma_v7_0_set_userq_trap_interrupts(struct amdgpu_device *adev,
+ bool enable)
+{
+ unsigned int irq_type;
+ int i, r;
+
+ if (adev->userq_funcs[AMDGPU_HW_IP_DMA]) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ irq_type = AMDGPU_SDMA_IRQ_INSTANCE0 + i;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->sdma.trap_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->sdma.trap_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int sdma_v7_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = sdma_v7_0_start(adev);
+ if (r)
+ return r;
+
+ return sdma_v7_0_set_userq_trap_interrupts(adev, true);
+}
+
+static int sdma_v7_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ sdma_v7_0_ctx_switch_enable(adev, false);
+ sdma_v7_0_enable(adev, false);
+ sdma_v7_0_set_userq_trap_interrupts(adev, false);
+
+ return 0;
+}
+
+static int sdma_v7_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return sdma_v7_0_hw_fini(ip_block);
+}
+
+static int sdma_v7_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ return sdma_v7_0_hw_init(ip_block);
+}
+
+static bool sdma_v7_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ u32 i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ u32 tmp = RREG32(sdma_v7_0_get_reg_offset(adev, i, regSDMA0_STATUS_REG));
+
+ if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
+ return false;
+ }
+
+ return true;
+}
+
+static int sdma_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ unsigned i;
+ u32 sdma0, sdma1;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ sdma0 = RREG32(sdma_v7_0_get_reg_offset(adev, 0, regSDMA0_STATUS_REG));
+ sdma1 = RREG32(sdma_v7_0_get_reg_offset(adev, 1, regSDMA0_STATUS_REG));
+
+ if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK)
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int sdma_v7_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+ int i, r = 0;
+ struct amdgpu_device *adev = ring->adev;
+ u32 index = 0;
+ u64 sdma_gfx_preempt;
+
+ amdgpu_sdma_get_index_from_ring(ring, &index);
+ sdma_gfx_preempt =
+ sdma_v7_0_get_reg_offset(adev, index, regSDMA0_QUEUE0_PREEMPT);
+
+ /* assert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+ /* emit the trailing fence */
+ ring->trail_seq += 1;
+ r = amdgpu_ring_alloc(ring, 10);
+ if (r) {
+ DRM_ERROR("ring %d failed to be allocated \n", ring->idx);
+ return r;
+ }
+ sdma_v7_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
+ ring->trail_seq, 0);
+ amdgpu_ring_commit(ring);
+
+ /* assert IB preemption */
+ WREG32(sdma_gfx_preempt, 1);
+
+ /* poll the trailing fence */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->trail_seq ==
+ le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ r = -EINVAL;
+ DRM_ERROR("ring %d failed to be preempted\n", ring->idx);
+ }
+
+ /* deassert IB preemption */
+ WREG32(sdma_gfx_preempt, 0);
+
+ /* deassert the preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, true);
+ return r;
+}
+
+static int sdma_v7_0_set_trap_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 sdma_cntl;
+
+ u32 reg_offset = sdma_v7_0_get_reg_offset(adev, type, regSDMA0_CNTL);
+
+ sdma_cntl = RREG32(reg_offset);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32(reg_offset, sdma_cntl);
+
+ return 0;
+}
+
+static int sdma_v7_0_process_trap_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ int instances, queue;
+
+ DRM_DEBUG("IH: SDMA trap\n");
+
+ queue = entry->ring_id & 0xf;
+ instances = (entry->ring_id & 0xf0) >> 4;
+ if (instances > 1) {
+ DRM_ERROR("IH: wrong ring_ID detected, as wrong sdma instance\n");
+ return -EINVAL;
+ }
+
+ switch (entry->client_id) {
+ case SOC21_IH_CLIENTID_GFX:
+ switch (queue) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[instances].ring);
+ break;
+ default:
+ break;
+ }
+ break;
+ }
+ return 0;
+}
+
+static int sdma_v7_0_process_fence_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u32 doorbell_offset = entry->src_data[0];
+
+ if (adev->enable_mes && doorbell_offset) {
+ struct amdgpu_userq_fence_driver *fence_drv = NULL;
+ struct xarray *xa = &adev->userq_xa;
+ unsigned long flags;
+
+ doorbell_offset >>= SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ xa_lock_irqsave(xa, flags);
+ fence_drv = xa_load(xa, doorbell_offset);
+ if (fence_drv)
+ amdgpu_userq_fence_driver_process(fence_drv);
+ xa_unlock_irqrestore(xa, flags);
+ }
+
+ return 0;
+}
+
+static int sdma_v7_0_process_illegal_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ return 0;
+}
+
+static int sdma_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int sdma_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static void sdma_v7_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+}
+
+static void sdma_v7_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_0);
+ uint32_t instance_offset;
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ drm_printf(p, "\nInstance:%d\n", i);
+
+ for (j = 0; j < reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_7_0[j].reg_name,
+ adev->sdma.ip_dump[instance_offset + j]);
+ }
+}
+
+static void sdma_v7_0_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t instance_offset;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_0);
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ for (j = 0; j < reg_count; j++)
+ adev->sdma.ip_dump[instance_offset + j] =
+ RREG32(sdma_v7_0_get_reg_offset(adev, i,
+ sdma_reg_list_7_0[j].reg_offset));
+ }
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+const struct amd_ip_funcs sdma_v7_0_ip_funcs = {
+ .name = "sdma_v7_0",
+ .early_init = sdma_v7_0_early_init,
+ .late_init = NULL,
+ .sw_init = sdma_v7_0_sw_init,
+ .sw_fini = sdma_v7_0_sw_fini,
+ .hw_init = sdma_v7_0_hw_init,
+ .hw_fini = sdma_v7_0_hw_fini,
+ .suspend = sdma_v7_0_suspend,
+ .resume = sdma_v7_0_resume,
+ .is_idle = sdma_v7_0_is_idle,
+ .wait_for_idle = sdma_v7_0_wait_for_idle,
+ .soft_reset = sdma_v7_0_soft_reset,
+ .check_soft_reset = sdma_v7_0_check_soft_reset,
+ .set_clockgating_state = sdma_v7_0_set_clockgating_state,
+ .set_powergating_state = sdma_v7_0_set_powergating_state,
+ .get_clockgating_state = sdma_v7_0_get_clockgating_state,
+ .dump_ip_state = sdma_v7_0_dump_ip_state,
+ .print_ip_state = sdma_v7_0_print_ip_state,
+};
+
+static const struct amdgpu_ring_funcs sdma_v7_0_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xf,
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
+ .support_64bit_ptrs = true,
+ .secure_submission_supported = true,
+ .get_rptr = sdma_v7_0_ring_get_rptr,
+ .get_wptr = sdma_v7_0_ring_get_wptr,
+ .set_wptr = sdma_v7_0_ring_set_wptr,
+ .emit_frame_size =
+ 5 + /* sdma_v7_0_ring_init_cond_exec */
+ 6 + /* sdma_v7_0_ring_emit_hdp_flush */
+ 6 + /* sdma_v7_0_ring_emit_pipeline_sync */
+ /* sdma_v7_0_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+ 10 + 10 + 10, /* sdma_v7_0_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 5 + 7 + 6, /* sdma_v7_0_ring_emit_ib */
+ .emit_ib = sdma_v7_0_ring_emit_ib,
+ .emit_mem_sync = sdma_v7_0_ring_emit_mem_sync,
+ .emit_fence = sdma_v7_0_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v7_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = sdma_v7_0_ring_emit_vm_flush,
+ .emit_hdp_flush = sdma_v7_0_ring_emit_hdp_flush,
+ .test_ring = sdma_v7_0_ring_test_ring,
+ .test_ib = sdma_v7_0_ring_test_ib,
+ .insert_nop = sdma_v7_0_ring_insert_nop,
+ .pad_ib = sdma_v7_0_ring_pad_ib,
+ .emit_wreg = sdma_v7_0_ring_emit_wreg,
+ .emit_reg_wait = sdma_v7_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = sdma_v7_0_ring_emit_reg_write_reg_wait,
+ .init_cond_exec = sdma_v7_0_ring_init_cond_exec,
+ .preempt_ib = sdma_v7_0_ring_preempt_ib,
+ .reset = sdma_v7_0_reset_queue,
+};
+
+static void sdma_v7_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->sdma.instance[i].ring.funcs = &sdma_v7_0_ring_funcs;
+ adev->sdma.instance[i].ring.me = i;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs sdma_v7_0_trap_irq_funcs = {
+ .set = sdma_v7_0_set_trap_irq_state,
+ .process = sdma_v7_0_process_trap_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v7_0_fence_irq_funcs = {
+ .process = sdma_v7_0_process_fence_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v7_0_illegal_inst_irq_funcs = {
+ .process = sdma_v7_0_process_illegal_inst_irq,
+};
+
+static void sdma_v7_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 +
+ adev->sdma.num_instances;
+ adev->sdma.trap_irq.funcs = &sdma_v7_0_trap_irq_funcs;
+ adev->sdma.fence_irq.funcs = &sdma_v7_0_fence_irq_funcs;
+ adev->sdma.illegal_inst_irq.funcs = &sdma_v7_0_illegal_inst_irq_funcs;
+}
+
+/**
+ * sdma_v7_0_emit_copy_buffer - copy buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to fill with commands
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ * @copy_flags: copy flags for the buffers
+ *
+ * Copy GPU buffers using the DMA engine.
+ * Used by the amdgpu ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib,
+ uint64_t src_offset,
+ uint64_t dst_offset,
+ uint32_t byte_count,
+ uint32_t copy_flags)
+{
+ uint32_t num_type, data_format, max_com, write_cm;
+
+ max_com = AMDGPU_COPY_FLAGS_GET(copy_flags, MAX_COMPRESSED);
+ data_format = AMDGPU_COPY_FLAGS_GET(copy_flags, DATA_FORMAT);
+ num_type = AMDGPU_COPY_FLAGS_GET(copy_flags, NUMBER_TYPE);
+ write_cm = AMDGPU_COPY_FLAGS_GET(copy_flags, WRITE_COMPRESS_DISABLE) ? 2 : 1;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0) |
+ SDMA_PKT_COPY_LINEAR_HEADER_CPV(1);
+
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+
+ if ((copy_flags & (AMDGPU_COPY_FLAGS_READ_DECOMPRESSED | AMDGPU_COPY_FLAGS_WRITE_COMPRESSED)))
+ ib->ptr[ib->length_dw++] = SDMA_DCC_DATA_FORMAT(data_format) | SDMA_DCC_NUM_TYPE(num_type) |
+ ((copy_flags & AMDGPU_COPY_FLAGS_READ_DECOMPRESSED) ? SDMA_DCC_READ_CM(2) : 0) |
+ ((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(write_cm) : 0) |
+ SDMA_DCC_MAX_COM(max_com) | SDMA_DCC_MAX_UCOM(1);
+ else
+ ib->ptr[ib->length_dw++] = 0;
+}
+
+/**
+ * sdma_v7_0_emit_fill_buffer - fill buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to fill
+ * @src_data: value to write to buffer
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ *
+ * Fill GPU buffers using the DMA engine.
+ */
+static void sdma_v7_0_emit_fill_buffer(struct amdgpu_ib *ib,
+ uint32_t src_data,
+ uint64_t dst_offset,
+ uint32_t byte_count)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_CONSTANT_FILL_HEADER_OP(SDMA_OP_CONST_FILL) |
+ SDMA_PKT_CONSTANT_FILL_HEADER_COMPRESS(1);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = src_data;
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+}
+
+static const struct amdgpu_buffer_funcs sdma_v7_0_buffer_funcs = {
+ .copy_max_bytes = 0x400000,
+ .copy_num_dw = 8,
+ .emit_copy_buffer = sdma_v7_0_emit_copy_buffer,
+ .fill_max_bytes = 0x400000,
+ .fill_num_dw = 5,
+ .emit_fill_buffer = sdma_v7_0_emit_fill_buffer,
+};
+
+static void sdma_v7_0_set_buffer_funcs(struct amdgpu_device *adev)
+{
+ adev->mman.buffer_funcs = &sdma_v7_0_buffer_funcs;
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
+}
+
+static const struct amdgpu_vm_pte_funcs sdma_v7_0_vm_pte_funcs = {
+ .copy_pte_num_dw = 8,
+ .copy_pte = sdma_v7_0_vm_copy_pte,
+ .write_pte = sdma_v7_0_vm_write_pte,
+ .set_pte_pde = sdma_v7_0_vm_set_pte_pde,
+};
+
+static void sdma_v7_0_set_vm_pte_funcs(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ adev->vm_manager.vm_pte_funcs = &sdma_v7_0_vm_pte_funcs;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->vm_manager.vm_pte_scheds[i] =
+ &adev->sdma.instance[i].ring.sched;
+ }
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
+}
+
+const struct amdgpu_ip_block_version sdma_v7_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_SDMA,
+ .major = 7,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &sdma_v7_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.h b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.h
new file mode 100644
index 000000000000..5af863bb39c4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SDMA_V7_0_H__
+#define __SDMA_V7_0_H__
+
+extern const struct amd_ip_funcs sdma_v7_0_ip_funcs;
+extern const struct amdgpu_ip_block_version sdma_v7_0_ip_block;
+
+#endif /* __SDMA_V7_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index e6d2f74a7976..f7288372ee61 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -35,6 +35,7 @@
#include "amdgpu_vce.h"
#include "atom.h"
#include "amd_pcie.h"
+
#include "si_dpm.h"
#include "sid.h"
#include "si_ih.h"
@@ -44,17 +45,31 @@
#include "dce_v6_0.h"
#include "si.h"
#include "uvd_v3_1.h"
-#include "amdgpu_vkms.h"
+#include "vce_v1_0.h"
+
+#include "uvd/uvd_4_0_d.h"
+
+#include "smu/smu_6_0_d.h"
+#include "smu/smu_6_0_sh_mask.h"
+
#include "gca/gfx_6_0_d.h"
+#include "gca/gfx_6_0_sh_mask.h"
+
#include "oss/oss_1_0_d.h"
#include "oss/oss_1_0_sh_mask.h"
+
#include "gmc/gmc_6_0_d.h"
+#include"gmc/gmc_6_0_sh_mask.h"
+
#include "dce/dce_6_0_d.h"
-#include "uvd/uvd_4_0_d.h"
+#include "dce/dce_6_0_sh_mask.h"
+
#include "bif/bif_3_0_d.h"
#include "bif/bif_3_0_sh_mask.h"
+#include "si_enums.h"
#include "amdgpu_dm.h"
+#include "amdgpu_vkms.h"
static const u32 tahiti_golden_registers[] =
{
@@ -907,9 +922,7 @@ static const u32 hainan_mgcg_cgcg_init[] =
0x3630, 0xfffffff0, 0x00000100,
};
-/* XXX: update when we support VCE */
-#if 0
-/* tahiti, pitcarin, verde */
+/* tahiti, pitcairn, verde */
static const struct amdgpu_video_codec_info tahiti_video_codecs_encode_array[] =
{
{
@@ -926,13 +939,7 @@ static const struct amdgpu_video_codecs tahiti_video_codecs_encode =
.codec_count = ARRAY_SIZE(tahiti_video_codecs_encode_array),
.codec_array = tahiti_video_codecs_encode_array,
};
-#else
-static const struct amdgpu_video_codecs tahiti_video_codecs_encode =
-{
- .codec_count = 0,
- .codec_array = NULL,
-};
-#endif
+
/* oland and hainan don't support encode */
static const struct amdgpu_video_codecs hainan_video_codecs_encode =
{
@@ -940,7 +947,7 @@ static const struct amdgpu_video_codecs hainan_video_codecs_encode =
.codec_array = NULL,
};
-/* tahiti, pitcarin, verde, oland */
+/* tahiti, pitcairn, verde, oland */
static const struct amdgpu_video_codec_info tahiti_video_codecs_decode_array[] =
{
{
@@ -1071,8 +1078,8 @@ static u32 si_smc_rreg(struct amdgpu_device *adev, u32 reg)
u32 r;
spin_lock_irqsave(&adev->smc_idx_lock, flags);
- WREG32(SMC_IND_INDEX_0, (reg));
- r = RREG32(SMC_IND_DATA_0);
+ WREG32(mmSMC_IND_INDEX_0, (reg));
+ r = RREG32(mmSMC_IND_DATA_0);
spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
return r;
}
@@ -1082,8 +1089,8 @@ static void si_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
unsigned long flags;
spin_lock_irqsave(&adev->smc_idx_lock, flags);
- WREG32(SMC_IND_INDEX_0, (reg));
- WREG32(SMC_IND_DATA_0, (v));
+ WREG32(mmSMC_IND_INDEX_0, (reg));
+ WREG32(mmSMC_IND_DATA_0, (v));
spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
}
@@ -1110,55 +1117,55 @@ static void si_uvd_ctx_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
}
static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = {
- {GRBM_STATUS},
+ {mmGRBM_STATUS},
{mmGRBM_STATUS2},
{mmGRBM_STATUS_SE0},
{mmGRBM_STATUS_SE1},
{mmSRBM_STATUS},
{mmSRBM_STATUS2},
- {DMA_STATUS_REG + DMA0_REGISTER_OFFSET},
- {DMA_STATUS_REG + DMA1_REGISTER_OFFSET},
+ {mmDMA_STATUS_REG + DMA0_REGISTER_OFFSET},
+ {mmDMA_STATUS_REG + DMA1_REGISTER_OFFSET},
{mmCP_STAT},
{mmCP_STALLED_STAT1},
{mmCP_STALLED_STAT2},
{mmCP_STALLED_STAT3},
- {GB_ADDR_CONFIG},
- {MC_ARB_RAMCFG},
- {GB_TILE_MODE0},
- {GB_TILE_MODE1},
- {GB_TILE_MODE2},
- {GB_TILE_MODE3},
- {GB_TILE_MODE4},
- {GB_TILE_MODE5},
- {GB_TILE_MODE6},
- {GB_TILE_MODE7},
- {GB_TILE_MODE8},
- {GB_TILE_MODE9},
- {GB_TILE_MODE10},
- {GB_TILE_MODE11},
- {GB_TILE_MODE12},
- {GB_TILE_MODE13},
- {GB_TILE_MODE14},
- {GB_TILE_MODE15},
- {GB_TILE_MODE16},
- {GB_TILE_MODE17},
- {GB_TILE_MODE18},
- {GB_TILE_MODE19},
- {GB_TILE_MODE20},
- {GB_TILE_MODE21},
- {GB_TILE_MODE22},
- {GB_TILE_MODE23},
- {GB_TILE_MODE24},
- {GB_TILE_MODE25},
- {GB_TILE_MODE26},
- {GB_TILE_MODE27},
- {GB_TILE_MODE28},
- {GB_TILE_MODE29},
- {GB_TILE_MODE30},
- {GB_TILE_MODE31},
- {CC_RB_BACKEND_DISABLE, true},
- {GC_USER_RB_BACKEND_DISABLE, true},
- {PA_SC_RASTER_CONFIG, true},
+ {mmGB_ADDR_CONFIG},
+ {mmMC_ARB_RAMCFG},
+ {mmGB_TILE_MODE0},
+ {mmGB_TILE_MODE1},
+ {mmGB_TILE_MODE2},
+ {mmGB_TILE_MODE3},
+ {mmGB_TILE_MODE4},
+ {mmGB_TILE_MODE5},
+ {mmGB_TILE_MODE6},
+ {mmGB_TILE_MODE7},
+ {mmGB_TILE_MODE8},
+ {mmGB_TILE_MODE9},
+ {mmGB_TILE_MODE10},
+ {mmGB_TILE_MODE11},
+ {mmGB_TILE_MODE12},
+ {mmGB_TILE_MODE13},
+ {mmGB_TILE_MODE14},
+ {mmGB_TILE_MODE15},
+ {mmGB_TILE_MODE16},
+ {mmGB_TILE_MODE17},
+ {mmGB_TILE_MODE18},
+ {mmGB_TILE_MODE19},
+ {mmGB_TILE_MODE20},
+ {mmGB_TILE_MODE21},
+ {mmGB_TILE_MODE22},
+ {mmGB_TILE_MODE23},
+ {mmGB_TILE_MODE24},
+ {mmGB_TILE_MODE25},
+ {mmGB_TILE_MODE26},
+ {mmGB_TILE_MODE27},
+ {mmGB_TILE_MODE28},
+ {mmGB_TILE_MODE29},
+ {mmGB_TILE_MODE30},
+ {mmGB_TILE_MODE31},
+ {mmCC_RB_BACKEND_DISABLE, true},
+ {mmGC_USER_RB_BACKEND_DISABLE, true},
+ {mmPA_SC_RASTER_CONFIG, true},
};
static uint32_t si_get_register_value(struct amdgpu_device *adev,
@@ -1181,12 +1188,12 @@ static uint32_t si_get_register_value(struct amdgpu_device *adev,
mutex_lock(&adev->grbm_idx_mutex);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
val = RREG32(reg_offset);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
return val;
} else {
@@ -1264,37 +1271,37 @@ static bool si_read_disabled_bios(struct amdgpu_device *adev)
u32 rom_cntl;
bool r;
- bus_cntl = RREG32(R600_BUS_CNTL);
+ bus_cntl = RREG32(mmBUS_CNTL);
if (adev->mode_info.num_crtc) {
- d1vga_control = RREG32(AVIVO_D1VGA_CONTROL);
- d2vga_control = RREG32(AVIVO_D2VGA_CONTROL);
- vga_render_control = RREG32(VGA_RENDER_CONTROL);
+ d1vga_control = RREG32(mmD1VGA_CONTROL);
+ d2vga_control = RREG32(mmD2VGA_CONTROL);
+ vga_render_control = RREG32(mmVGA_RENDER_CONTROL);
}
rom_cntl = RREG32(R600_ROM_CNTL);
/* enable the rom */
- WREG32(R600_BUS_CNTL, (bus_cntl & ~R600_BIOS_ROM_DIS));
+ WREG32(mmBUS_CNTL, (bus_cntl & ~BUS_CNTL__BIOS_ROM_DIS_MASK));
if (adev->mode_info.num_crtc) {
/* Disable VGA mode */
- WREG32(AVIVO_D1VGA_CONTROL,
- (d1vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE |
- AVIVO_DVGA_CONTROL_TIMING_SELECT)));
- WREG32(AVIVO_D2VGA_CONTROL,
- (d2vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE |
- AVIVO_DVGA_CONTROL_TIMING_SELECT)));
- WREG32(VGA_RENDER_CONTROL,
- (vga_render_control & C_000300_VGA_VSTATUS_CNTL));
+ WREG32(mmD1VGA_CONTROL,
+ (d1vga_control & ~(D1VGA_CONTROL__D1VGA_MODE_ENABLE_MASK |
+ D1VGA_CONTROL__D1VGA_TIMING_SELECT_MASK)));
+ WREG32(mmD2VGA_CONTROL,
+ (d2vga_control & ~(D1VGA_CONTROL__D1VGA_MODE_ENABLE_MASK |
+ D1VGA_CONTROL__D1VGA_TIMING_SELECT_MASK)));
+ WREG32(mmVGA_RENDER_CONTROL,
+ (vga_render_control & ~VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK));
}
WREG32(R600_ROM_CNTL, rom_cntl | R600_SCK_OVERWRITE);
r = amdgpu_read_bios(adev);
/* restore regs */
- WREG32(R600_BUS_CNTL, bus_cntl);
+ WREG32(mmBUS_CNTL, bus_cntl);
if (adev->mode_info.num_crtc) {
- WREG32(AVIVO_D1VGA_CONTROL, d1vga_control);
- WREG32(AVIVO_D2VGA_CONTROL, d2vga_control);
- WREG32(VGA_RENDER_CONTROL, vga_render_control);
+ WREG32(mmD1VGA_CONTROL, d1vga_control);
+ WREG32(mmD2VGA_CONTROL, d2vga_control);
+ WREG32(mmVGA_RENDER_CONTROL, vga_render_control);
}
WREG32(R600_ROM_CNTL, rom_cntl);
return r;
@@ -1331,23 +1338,24 @@ static void si_set_clk_bypass_mode(struct amdgpu_device *adev)
{
u32 tmp, i;
- tmp = RREG32(CG_SPLL_FUNC_CNTL);
- tmp |= SPLL_BYPASS_EN;
- WREG32(CG_SPLL_FUNC_CNTL, tmp);
+ tmp = RREG32(mmCG_SPLL_FUNC_CNTL);
+ tmp |= CG_SPLL_FUNC_CNTL__SPLL_BYPASS_EN_MASK;
+ WREG32(mmCG_SPLL_FUNC_CNTL, tmp);
- tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
- tmp |= SPLL_CTLREQ_CHG;
- WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
+ tmp = RREG32(mmCG_SPLL_FUNC_CNTL_2);
+ tmp |= CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG_MASK;
+ WREG32(mmCG_SPLL_FUNC_CNTL_2, tmp);
for (i = 0; i < adev->usec_timeout; i++) {
- if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
+ if (RREG32(mmCG_SPLL_STATUS) & CG_SPLL_STATUS__SPLL_CHG_STATUS_MASK)
break;
udelay(1);
}
- tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
- tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
- WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
+ tmp = RREG32(mmCG_SPLL_FUNC_CNTL_2);
+ tmp &= ~(CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG_MASK |
+ CG_SPLL_FUNC_CNTL_2__SCLK_MUX_UPDATE_MASK);
+ WREG32(mmCG_SPLL_FUNC_CNTL_2, tmp);
tmp = RREG32(MPLL_CNTL_MODE);
tmp &= ~MPLL_MCLK_SEL;
@@ -1358,21 +1366,21 @@ static void si_spll_powerdown(struct amdgpu_device *adev)
{
u32 tmp;
- tmp = RREG32(SPLL_CNTL_MODE);
- tmp |= SPLL_SW_DIR_CONTROL;
- WREG32(SPLL_CNTL_MODE, tmp);
+ tmp = RREG32(mmSPLL_CNTL_MODE);
+ tmp |= SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL_MASK;
+ WREG32(mmSPLL_CNTL_MODE, tmp);
- tmp = RREG32(CG_SPLL_FUNC_CNTL);
- tmp |= SPLL_RESET;
- WREG32(CG_SPLL_FUNC_CNTL, tmp);
+ tmp = RREG32(mmCG_SPLL_FUNC_CNTL);
+ tmp |= CG_SPLL_FUNC_CNTL__SPLL_RESET_MASK;
+ WREG32(mmCG_SPLL_FUNC_CNTL, tmp);
- tmp = RREG32(CG_SPLL_FUNC_CNTL);
- tmp |= SPLL_SLEEP;
- WREG32(CG_SPLL_FUNC_CNTL, tmp);
+ tmp = RREG32(mmCG_SPLL_FUNC_CNTL);
+ tmp |= CG_SPLL_FUNC_CNTL__SPLL_SLEEP_MASK;
+ WREG32(mmCG_SPLL_FUNC_CNTL, tmp);
- tmp = RREG32(SPLL_CNTL_MODE);
- tmp &= ~SPLL_SW_DIR_CONTROL;
- WREG32(SPLL_CNTL_MODE, tmp);
+ tmp = RREG32(mmSPLL_CNTL_MODE);
+ tmp &= ~SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL_MASK;
+ WREG32(mmSPLL_CNTL_MODE, tmp);
}
static int si_gpu_pci_config_reset(struct amdgpu_device *adev)
@@ -1409,9 +1417,9 @@ static int si_gpu_pci_config_reset(struct amdgpu_device *adev)
return r;
}
-static bool si_asic_supports_baco(struct amdgpu_device *adev)
+static int si_asic_supports_baco(struct amdgpu_device *adev)
{
- return false;
+ return 0;
}
static enum amd_reset_method
@@ -1454,14 +1462,14 @@ static void si_vga_set_state(struct amdgpu_device *adev, bool state)
{
uint32_t temp;
- temp = RREG32(CONFIG_CNTL);
+ temp = RREG32(mmCONFIG_CNTL);
if (!state) {
temp &= ~(1<<0);
temp |= (1<<1);
} else {
temp &= ~(1<<1);
}
- WREG32(CONFIG_CNTL, temp);
+ WREG32(mmCONFIG_CNTL, temp);
}
static u32 si_get_xclk(struct amdgpu_device *adev)
@@ -1469,12 +1477,12 @@ static u32 si_get_xclk(struct amdgpu_device *adev)
u32 reference_clock = adev->clock.spll.reference_freq;
u32 tmp;
- tmp = RREG32(CG_CLKPIN_CNTL_2);
- if (tmp & MUX_TCLK_TO_XCLK)
+ tmp = RREG32(mmCG_CLKPIN_CNTL_2);
+ if (tmp & CG_CLKPIN_CNTL_2__MUX_TCLK_TO_XCLK_MASK)
return TCLK;
- tmp = RREG32(CG_CLKPIN_CNTL);
- if (tmp & XTALIN_DIVIDE)
+ tmp = RREG32(mmCG_CLKPIN_CNTL);
+ if (tmp & CG_CLKPIN_CNTL__XTALIN_DIVIDE_MASK)
return reference_clock / 4;
return reference_clock;
@@ -1519,9 +1527,9 @@ static int si_get_pcie_lanes(struct amdgpu_device *adev)
if (adev->flags & AMD_IS_APU)
return 0;
- link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
+ link_width_cntl = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL);
- switch ((link_width_cntl & LC_LINK_WIDTH_RD_MASK) >> LC_LINK_WIDTH_RD_SHIFT) {
+ switch ((link_width_cntl & PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK) >> PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT) {
case LC_LINK_WIDTH_X1:
return 1;
case LC_LINK_WIDTH_X2:
@@ -1568,13 +1576,13 @@ static void si_set_pcie_lanes(struct amdgpu_device *adev, int lanes)
return;
}
- link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
- link_width_cntl &= ~LC_LINK_WIDTH_MASK;
- link_width_cntl |= mask << LC_LINK_WIDTH_SHIFT;
- link_width_cntl |= (LC_RECONFIG_NOW |
- LC_RECONFIG_ARC_MISSING_ESCAPE);
+ link_width_cntl = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL);
+ link_width_cntl &= ~PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_MASK;
+ link_width_cntl |= mask << PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH__SHIFT;
+ link_width_cntl |= (PCIE_LC_LINK_WIDTH_CNTL__LC_RECONFIG_NOW_MASK |
+ PCIE_LC_LINK_WIDTH_CNTL__LC_RECONFIG_ARC_MISSING_ESCAPE_MASK);
- WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
+ WREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
}
static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
@@ -1888,7 +1896,7 @@ static int si_vce_send_vcepll_ctlreq(struct amdgpu_device *adev)
WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
if (i == SI_MAX_CTLACKS_ASSERTION_WAIT) {
- DRM_ERROR("Timeout setting UVD clocks!\n");
+ DRM_ERROR("Timeout setting VCE clocks!\n");
return -ETIMEDOUT;
}
@@ -1910,6 +1918,14 @@ static int si_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
~VCEPLL_BYPASS_EN_MASK);
if (!evclk || !ecclk) {
+ /*
+ * On some chips, the PLL takes way too long to get out of
+ * sleep mode, causing a timeout waiting on CTLACK/CTLACK2.
+ * Leave the PLL running in bypass mode.
+ */
+ if (adev->pdev->device == 0x6780)
+ return 0;
+
/* Keep the Bypass mode, put PLL to sleep */
WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
~VCEPLL_SLEEP_MASK);
@@ -2018,13 +2034,13 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
static uint32_t si_get_rev_id(struct amdgpu_device *adev)
{
- return (RREG32(CC_DRM_ID_STRAPS) & CC_DRM_ID_STRAPS__ATI_REV_ID_MASK)
+ return (RREG32(mmCC_DRM_ID_STRAPS) & CC_DRM_ID_STRAPS__ATI_REV_ID_MASK)
>> CC_DRM_ID_STRAPS__ATI_REV_ID__SHIFT;
}
-static int si_common_early_init(void *handle)
+static int si_common_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->smc_rreg = &si_smc_rreg;
adev->smc_wreg = &si_smc_wreg;
@@ -2148,17 +2164,6 @@ static int si_common_early_init(void *handle)
return 0;
}
-static int si_common_sw_init(void *handle)
-{
- return 0;
-}
-
-static int si_common_sw_fini(void *handle)
-{
- return 0;
-}
-
-
static void si_init_golden_registers(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
@@ -2250,9 +2255,9 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)))
return;
- speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
- current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
- LC_CURRENT_DATA_RATE_SHIFT;
+ speed_cntl = RREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL);
+ current_data_rate = (speed_cntl & PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK) >>
+ PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT;
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
if (current_data_rate == 2) {
DRM_INFO("PCIE gen 3 link speeds already enabled\n");
@@ -2276,29 +2281,20 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
u16 bridge_cfg2, gpu_cfg2;
u32 max_lw, current_lw, tmp;
- pcie_capability_read_word(root, PCI_EXP_LNKCTL,
- &bridge_cfg);
- pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL,
- &gpu_cfg);
+ pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
+ pcie_capability_set_word(adev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
- tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
- pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
-
- tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
- pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL,
- tmp16);
-
- tmp = RREG32_PCIE(PCIE_LC_STATUS1);
- max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
- current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
+ tmp = RREG32_PCIE(ixPCIE_LC_STATUS1);
+ max_lw = (tmp & PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >> PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH__SHIFT;
+ current_lw = (tmp & PCIE_LC_STATUS1__LC_OPERATING_LINK_WIDTH_MASK) >> PCIE_LC_STATUS1__LC_OPERATING_LINK_WIDTH__SHIFT;
if (current_lw < max_lw) {
- tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
- if (tmp & LC_RENEGOTIATION_SUPPORT) {
- tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
- tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
- tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
- WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
+ tmp = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL);
+ if (tmp & PCIE_LC_LINK_WIDTH_CNTL__LC_RENEGOTIATION_SUPPORT_MASK) {
+ tmp &= ~(PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_MASK | PCIE_LC_LINK_WIDTH_CNTL__LC_UPCONFIGURE_DIS_MASK);
+ tmp |= (max_lw << PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH__SHIFT);
+ tmp |= PCIE_LC_LINK_WIDTH_CNTL__LC_UPCONFIGURE_SUPPORT_MASK | PCIE_LC_LINK_WIDTH_CNTL__LC_RENEGOTIATE_EN_MASK | PCIE_LC_LINK_WIDTH_CNTL__LC_RECONFIG_NOW_MASK;
+ WREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL, tmp);
}
}
@@ -2321,84 +2317,66 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
PCI_EXP_LNKCTL2,
&gpu_cfg2);
- tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
- tmp |= LC_SET_QUIESCE;
- WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+ tmp = RREG32_PCIE_PORT(ixPCIE_LC_CNTL4);
+ tmp |= PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL4, tmp);
- tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
- tmp |= LC_REDO_EQ;
- WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+ tmp = RREG32_PCIE_PORT(ixPCIE_LC_CNTL4);
+ tmp |= PCIE_LC_CNTL4__LC_REDO_EQ_MASK;
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL4, tmp);
mdelay(100);
- pcie_capability_read_word(root, PCI_EXP_LNKCTL,
- &tmp16);
- tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
- tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
- pcie_capability_write_word(root, PCI_EXP_LNKCTL,
- tmp16);
-
- pcie_capability_read_word(adev->pdev,
- PCI_EXP_LNKCTL,
- &tmp16);
- tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
- tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
- pcie_capability_write_word(adev->pdev,
- PCI_EXP_LNKCTL,
- tmp16);
-
- pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
- &tmp16);
- tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
- PCI_EXP_LNKCTL2_TX_MARGIN);
- tmp16 |= (bridge_cfg2 &
- (PCI_EXP_LNKCTL2_ENTER_COMP |
- PCI_EXP_LNKCTL2_TX_MARGIN));
- pcie_capability_write_word(root,
- PCI_EXP_LNKCTL2,
- tmp16);
-
- pcie_capability_read_word(adev->pdev,
- PCI_EXP_LNKCTL2,
- &tmp16);
- tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
- PCI_EXP_LNKCTL2_TX_MARGIN);
- tmp16 |= (gpu_cfg2 &
- (PCI_EXP_LNKCTL2_ENTER_COMP |
- PCI_EXP_LNKCTL2_TX_MARGIN));
- pcie_capability_write_word(adev->pdev,
- PCI_EXP_LNKCTL2,
- tmp16);
-
- tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
- tmp &= ~LC_SET_QUIESCE;
- WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+ pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_HAWD,
+ bridge_cfg &
+ PCI_EXP_LNKCTL_HAWD);
+ pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_HAWD,
+ gpu_cfg &
+ PCI_EXP_LNKCTL_HAWD);
+
+ pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL2,
+ PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN,
+ bridge_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2,
+ PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN,
+ gpu_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+
+ tmp = RREG32_PCIE_PORT(ixPCIE_LC_CNTL4);
+ tmp &= ~PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL4, tmp);
}
}
}
- speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
- speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
- WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
-
- pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
+ speed_cntl |= PCIE_LC_SPEED_CNTL__LC_FORCE_EN_SW_SPEED_CHANGE_MASK | PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_HW_SPEED_CHANGE_MASK;
+ speed_cntl &= ~PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_SW_SPEED_CHANGE_MASK;
+ WREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL, speed_cntl);
+ tmp16 = 0;
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
else
tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
- pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16);
+ pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2,
+ PCI_EXP_LNKCTL2_TLS, tmp16);
- speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
- speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
- WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
+ speed_cntl = RREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL);
+ speed_cntl |= PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK;
+ WREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL, speed_cntl);
for (i = 0; i < adev->usec_timeout; i++) {
- speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
- if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
+ speed_cntl = RREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL);
+ if ((speed_cntl & PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK) == 0)
break;
udelay(1);
}
@@ -2453,126 +2431,124 @@ static void si_program_aspm(struct amdgpu_device *adev)
bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
bool disable_clkreq = false;
- if (amdgpu_aspm == 0)
+ if (!amdgpu_device_should_use_aspm(adev))
return;
- if (adev->flags & AMD_IS_APU)
- return;
- orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
- data &= ~LC_XMIT_N_FTS_MASK;
- data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
+ orig = data = RREG32_PCIE_PORT(ixPCIE_LC_N_FTS_CNTL);
+ data &= ~PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_MASK;
+ data |= (0x24 << PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS__SHIFT) | PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_OVERRIDE_EN_MASK;
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_N_FTS_CNTL, data);
- orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
- data |= LC_GO_TO_RECOVERY;
+ orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL3);
+ data |= PCIE_LC_CNTL3__LC_GO_TO_RECOVERY_MASK;
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL3, data);
- orig = data = RREG32_PCIE(PCIE_P_CNTL);
- data |= P_IGNORE_EDB_ERR;
+ orig = data = RREG32_PCIE(ixPCIE_P_CNTL);
+ data |= PCIE_P_CNTL__P_IGNORE_EDB_ERR_MASK;
if (orig != data)
- WREG32_PCIE(PCIE_P_CNTL, data);
+ WREG32_PCIE(ixPCIE_P_CNTL, data);
- orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
- data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
- data |= LC_PMI_TO_L1_DIS;
+ orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL);
+ data &= ~(PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK | PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK);
+ data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
if (!disable_l0s)
- data |= LC_L0S_INACTIVITY(7);
+ data |= (7 << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT);
if (!disable_l1) {
- data |= LC_L1_INACTIVITY(7);
- data &= ~LC_PMI_TO_L1_DIS;
+ data |= (7 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT);
+ data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL, data);
if (!disable_plloff_in_l1) {
bool clk_req_support;
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_0);
- data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
- data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_0);
+ data &= ~(PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0_MASK | PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0_MASK);
+ data |= (7 << PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0__SHIFT) | (7 << PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0__SHIFT);
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_0, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_0, data);
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_1);
- data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
- data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_1);
+ data &= ~(PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1_MASK | PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1_MASK);
+ data |= (7 << PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1__SHIFT) | (7 << PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1__SHIFT);
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_1, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_1, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_0);
- data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
- data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_0);
+ data &= ~(PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0_MASK | PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0_MASK);
+ data |= (7 << PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0__SHIFT) | (7 << PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0__SHIFT);
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_0, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_0, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_1);
- data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
- data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_1);
+ data &= ~(PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1_MASK | PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1_MASK);
+ data |= (7 << PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1__SHIFT) | (7 << PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1__SHIFT);
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_1, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_1, data);
if ((adev->asic_type != CHIP_OLAND) && (adev->asic_type != CHIP_HAINAN)) {
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_0);
- data &= ~PLL_RAMP_UP_TIME_0_MASK;
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_0);
+ data &= ~PB0_PIF_PWRDOWN_0__PLL_RAMP_UP_TIME_0_MASK;
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_0, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_0, data);
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_1);
- data &= ~PLL_RAMP_UP_TIME_1_MASK;
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_1);
+ data &= ~PB0_PIF_PWRDOWN_1__PLL_RAMP_UP_TIME_1_MASK;
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_1, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_1, data);
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_2);
- data &= ~PLL_RAMP_UP_TIME_2_MASK;
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_2);
+ data &= ~PB0_PIF_PWRDOWN_2__PLL_RAMP_UP_TIME_2_MASK;
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_2, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_2, data);
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_3);
- data &= ~PLL_RAMP_UP_TIME_3_MASK;
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_3);
+ data &= ~PB0_PIF_PWRDOWN_3__PLL_RAMP_UP_TIME_3_MASK;
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_3, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_3, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_0);
- data &= ~PLL_RAMP_UP_TIME_0_MASK;
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_0);
+ data &= ~PB1_PIF_PWRDOWN_0__PLL_RAMP_UP_TIME_0_MASK;
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_0, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_0, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_1);
- data &= ~PLL_RAMP_UP_TIME_1_MASK;
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_1);
+ data &= ~PB1_PIF_PWRDOWN_1__PLL_RAMP_UP_TIME_1_MASK;
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_1, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_1, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_2);
- data &= ~PLL_RAMP_UP_TIME_2_MASK;
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_2);
+ data &= ~PB1_PIF_PWRDOWN_2__PLL_RAMP_UP_TIME_2_MASK;
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_2, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_2, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_3);
- data &= ~PLL_RAMP_UP_TIME_3_MASK;
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_3);
+ data &= ~PB1_PIF_PWRDOWN_3__PLL_RAMP_UP_TIME_3_MASK;
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_3, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_3, data);
}
- orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
- data &= ~LC_DYN_LANES_PWR_STATE_MASK;
- data |= LC_DYN_LANES_PWR_STATE(3);
+ orig = data = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL);
+ data &= ~PCIE_LC_LINK_WIDTH_CNTL__LC_DYN_LANES_PWR_STATE_MASK;
+ data |= (3 << PCIE_LC_LINK_WIDTH_CNTL__LC_DYN_LANES_PWR_STATE__SHIFT);
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL, data);
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_CNTL);
- data &= ~LS2_EXIT_TIME_MASK;
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_CNTL);
+ data &= ~PB0_PIF_CNTL__LS2_EXIT_TIME_MASK;
if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN))
- data |= LS2_EXIT_TIME(5);
+ data |= (5 << PB0_PIF_CNTL__LS2_EXIT_TIME__SHIFT);
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_CNTL, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_CNTL, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_CNTL);
- data &= ~LS2_EXIT_TIME_MASK;
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_CNTL);
+ data &= ~PB1_PIF_CNTL__LS2_EXIT_TIME_MASK;
if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN))
- data |= LS2_EXIT_TIME(5);
+ data |= (5 << PB1_PIF_CNTL__LS2_EXIT_TIME__SHIFT);
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_CNTL, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_CNTL, data);
if (!disable_clkreq &&
!pci_is_root_bus(adev->pdev->bus)) {
@@ -2588,64 +2564,64 @@ static void si_program_aspm(struct amdgpu_device *adev)
}
if (clk_req_support) {
- orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
- data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
+ orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL2);
+ data |= PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK | PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK;
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL2, data);
- orig = data = RREG32(THM_CLK_CNTL);
- data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
- data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
+ orig = data = RREG32(mmTHM_CLK_CNTL);
+ data &= ~(THM_CLK_CNTL__CMON_CLK_SEL_MASK | THM_CLK_CNTL__TMON_CLK_SEL_MASK);
+ data |= (1 << THM_CLK_CNTL__CMON_CLK_SEL__SHIFT) | (1 << THM_CLK_CNTL__TMON_CLK_SEL__SHIFT);
if (orig != data)
- WREG32(THM_CLK_CNTL, data);
+ WREG32(mmTHM_CLK_CNTL, data);
- orig = data = RREG32(MISC_CLK_CNTL);
- data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
- data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
+ orig = data = RREG32(mmMISC_CLK_CNTL);
+ data &= ~(MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL_MASK | MISC_CLK_CNTL__ZCLK_SEL_MASK);
+ data |= (1 << MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL__SHIFT) | (1 << MISC_CLK_CNTL__ZCLK_SEL__SHIFT);
if (orig != data)
- WREG32(MISC_CLK_CNTL, data);
+ WREG32(mmMISC_CLK_CNTL, data);
- orig = data = RREG32(CG_CLKPIN_CNTL);
- data &= ~BCLK_AS_XCLK;
+ orig = data = RREG32(mmCG_CLKPIN_CNTL);
+ data &= ~CG_CLKPIN_CNTL__BCLK_AS_XCLK_MASK;
if (orig != data)
- WREG32(CG_CLKPIN_CNTL, data);
+ WREG32(mmCG_CLKPIN_CNTL, data);
- orig = data = RREG32(CG_CLKPIN_CNTL_2);
- data &= ~FORCE_BIF_REFCLK_EN;
+ orig = data = RREG32(mmCG_CLKPIN_CNTL_2);
+ data &= ~CG_CLKPIN_CNTL_2__FORCE_BIF_REFCLK_EN_MASK;
if (orig != data)
- WREG32(CG_CLKPIN_CNTL_2, data);
+ WREG32(mmCG_CLKPIN_CNTL_2, data);
- orig = data = RREG32(MPLL_BYPASSCLK_SEL);
- data &= ~MPLL_CLKOUT_SEL_MASK;
- data |= MPLL_CLKOUT_SEL(4);
+ orig = data = RREG32(mmMPLL_BYPASSCLK_SEL);
+ data &= ~MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL_MASK;
+ data |= 4 << MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL__SHIFT;
if (orig != data)
- WREG32(MPLL_BYPASSCLK_SEL, data);
+ WREG32(mmMPLL_BYPASSCLK_SEL, data);
- orig = data = RREG32(SPLL_CNTL_MODE);
- data &= ~SPLL_REFCLK_SEL_MASK;
+ orig = data = RREG32(mmSPLL_CNTL_MODE);
+ data &= ~SPLL_CNTL_MODE__SPLL_REFCLK_SEL_MASK;
if (orig != data)
- WREG32(SPLL_CNTL_MODE, data);
+ WREG32(mmSPLL_CNTL_MODE, data);
}
}
} else {
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL, data);
}
- orig = data = RREG32_PCIE(PCIE_CNTL2);
- data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
+ orig = data = RREG32_PCIE(ixPCIE_CNTL2);
+ data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK | PCIE_CNTL2__MST_MEM_LS_EN_MASK | PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK;
if (orig != data)
- WREG32_PCIE(PCIE_CNTL2, data);
+ WREG32_PCIE(ixPCIE_CNTL2, data);
if (!disable_l0s) {
- data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
- if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
- data = RREG32_PCIE(PCIE_LC_STATUS1);
- if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
- orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
- data &= ~LC_L0S_INACTIVITY_MASK;
+ data = RREG32_PCIE_PORT(ixPCIE_LC_N_FTS_CNTL);
+ if((data & PCIE_LC_N_FTS_CNTL__LC_N_FTS_MASK) == PCIE_LC_N_FTS_CNTL__LC_N_FTS_MASK) {
+ data = RREG32_PCIE(ixPCIE_LC_STATUS1);
+ if ((data & PCIE_LC_STATUS1__LC_REVERSE_XMIT_MASK) && (data & PCIE_LC_STATUS1__LC_REVERSE_RCVR_MASK)) {
+ orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL);
+ data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL, data);
}
}
}
@@ -2662,9 +2638,9 @@ static void si_fix_pci_max_read_req_size(struct amdgpu_device *adev)
pcie_set_readrq(adev->pdev, 512);
}
-static int si_common_hw_init(void *handle)
+static int si_common_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
si_fix_pci_max_read_req_size(adev);
si_init_golden_registers(adev);
@@ -2674,47 +2650,28 @@ static int si_common_hw_init(void *handle)
return 0;
}
-static int si_common_hw_fini(void *handle)
+static int si_common_hw_fini(struct amdgpu_ip_block *ip_block)
{
return 0;
}
-static int si_common_suspend(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return si_common_hw_fini(adev);
-}
-
-static int si_common_resume(void *handle)
+static int si_common_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return si_common_hw_init(adev);
+ return si_common_hw_init(ip_block);
}
-static bool si_common_is_idle(void *handle)
+static bool si_common_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int si_common_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int si_common_soft_reset(void *handle)
-{
- return 0;
-}
-
-static int si_common_set_clockgating_state(void *handle,
+static int si_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int si_common_set_powergating_state(void *handle,
+static int si_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -2723,16 +2680,10 @@ static int si_common_set_powergating_state(void *handle,
static const struct amd_ip_funcs si_common_ip_funcs = {
.name = "si_common",
.early_init = si_common_early_init,
- .late_init = NULL,
- .sw_init = si_common_sw_init,
- .sw_fini = si_common_sw_fini,
.hw_init = si_common_hw_init,
.hw_fini = si_common_hw_fini,
- .suspend = si_common_suspend,
.resume = si_common_resume,
.is_idle = si_common_is_idle,
- .wait_for_idle = si_common_wait_for_idle,
- .soft_reset = si_common_soft_reset,
.set_clockgating_state = si_common_set_clockgating_state,
.set_powergating_state = si_common_set_powergating_state,
};
@@ -2767,7 +2718,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev)
else
amdgpu_device_ip_block_add(adev, &dce_v6_0_ip_block);
amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block);
- /* amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block); */
+ amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block);
break;
case CHIP_OLAND:
amdgpu_device_ip_block_add(adev, &si_common_ip_block);
@@ -2785,7 +2736,6 @@ int si_set_ip_blocks(struct amdgpu_device *adev)
else
amdgpu_device_ip_block_add(adev, &dce_v6_4_ip_block);
amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block);
- /* amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block); */
break;
case CHIP_HAINAN:
amdgpu_device_ip_block_add(adev, &si_common_ip_block);
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index 195b45bcb8ad..7f18e4875287 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -27,6 +27,8 @@
#include "si.h"
#include "sid.h"
+#include "oss/oss_1_0_d.h"
+#include "oss/oss_1_0_sh_mask.h"
const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
{
DMA0_REGISTER_OFFSET,
@@ -38,17 +40,31 @@ static void si_dma_set_buffer_funcs(struct amdgpu_device *adev);
static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev);
static void si_dma_set_irq_funcs(struct amdgpu_device *adev);
+/**
+ * si_dma_ring_get_rptr - get the current read pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current rptr from the hardware (SI).
+ */
static uint64_t si_dma_ring_get_rptr(struct amdgpu_ring *ring)
{
- return ring->adev->wb.wb[ring->rptr_offs>>2];
+ return *ring->rptr_cpu_addr;
}
+/**
+ * si_dma_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware (SI).
+ */
static uint64_t si_dma_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
- return (RREG32(DMA_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2;
+ return (RREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2;
}
static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
@@ -56,8 +72,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
- WREG32(DMA_RB_WPTR + sdma_offsets[me],
- (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
+ WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc);
}
static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
@@ -113,19 +128,14 @@ static void si_dma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
static void si_dma_stop(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring;
u32 rb_cntl;
unsigned i;
for (i = 0; i < adev->sdma.num_instances; i++) {
- ring = &adev->sdma.instance[i].ring;
/* dma0 */
- rb_cntl = RREG32(DMA_RB_CNTL + sdma_offsets[i]);
- rb_cntl &= ~DMA_RB_ENABLE;
- WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl);
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
+ rb_cntl = RREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i]);
+ rb_cntl &= ~DMA_GFX_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
}
}
@@ -139,53 +149,48 @@ static int si_dma_start(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
- WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0);
- WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
+ WREG32(mmDMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0);
+ WREG32(mmDMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
/* Set ring buffer size in dwords */
rb_bufsz = order_base_2(ring->ring_size / 4);
rb_cntl = rb_bufsz << 1;
#ifdef __BIG_ENDIAN
- rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
+ rb_cntl |= DMA_GFX_RB_CNTL__RB_SWAP_ENABLE_MASK | DMA_GFX_RB_CNTL__RPTR_WRITEBACK_SWAP_ENABLE_MASK;
#endif
- WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl);
+ WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
/* Initialize the ring buffer's read and write pointers */
- WREG32(DMA_RB_RPTR + sdma_offsets[i], 0);
- WREG32(DMA_RB_WPTR + sdma_offsets[i], 0);
+ WREG32(mmDMA_GFX_RB_RPTR + sdma_offsets[i], 0);
+ WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[i], 0);
- rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ rptr_addr = ring->rptr_gpu_addr;
- WREG32(DMA_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr));
- WREG32(DMA_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF);
+ WREG32(mmDMA_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr));
+ WREG32(mmDMA_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF);
- rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
+ rb_cntl |= DMA_GFX_RB_CNTL__RPTR_WRITEBACK_ENABLE_MASK;
- WREG32(DMA_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8);
+ WREG32(mmDMA_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8);
/* enable DMA IBs */
- ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
+ ib_cntl = DMA_GFX_IB_CNTL__IB_ENABLE_MASK | DMA_GFX_IB_CNTL__CMD_VMID_FORCE_MASK;
#ifdef __BIG_ENDIAN
- ib_cntl |= DMA_IB_SWAP_ENABLE;
+ ib_cntl |= DMA_GFX_IB_CNTL__IB_SWAP_ENABLE_MASK;
#endif
- WREG32(DMA_IB_CNTL + sdma_offsets[i], ib_cntl);
+ WREG32(mmDMA_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
- dma_cntl = RREG32(DMA_CNTL + sdma_offsets[i]);
- dma_cntl &= ~CTXEMPTY_INT_ENABLE;
- WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl);
+ dma_cntl = RREG32(mmDMA_CNTL + sdma_offsets[i]);
+ dma_cntl &= ~DMA_CNTL__CTXEMPTY_INT_ENABLE_MASK;
+ WREG32(mmDMA_CNTL + sdma_offsets[i], dma_cntl);
ring->wptr = 0;
- WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
- WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
-
- ring->sched.ready = true;
+ WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2);
+ WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_GFX_RB_CNTL__RB_ENABLE_MASK);
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
@@ -297,7 +302,7 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -468,11 +473,11 @@ static void si_dma_ring_emit_wreg(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, val);
}
-static int si_dma_early_init(void *handle)
+static int si_dma_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- adev->sdma.num_instances = 2;
+ adev->sdma.num_instances = SDMA_MAX_INSTANCE;
si_dma_set_ring_funcs(adev);
si_dma_set_buffer_funcs(adev);
@@ -482,11 +487,11 @@ static int si_dma_early_init(void *handle)
return 0;
}
-static int si_dma_sw_init(void *handle)
+static int si_dma_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* DMA0 trap event */
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224,
@@ -517,9 +522,9 @@ static int si_dma_sw_init(void *handle)
return r;
}
-static int si_dma_sw_fini(void *handle)
+static int si_dma_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
@@ -528,61 +533,56 @@ static int si_dma_sw_fini(void *handle)
return 0;
}
-static int si_dma_hw_init(void *handle)
+static int si_dma_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return si_dma_start(adev);
}
-static int si_dma_hw_fini(void *handle)
+static int si_dma_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- si_dma_stop(adev);
+ si_dma_stop(ip_block->adev);
return 0;
}
-static int si_dma_suspend(void *handle)
+static int si_dma_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return si_dma_hw_fini(adev);
+ return si_dma_hw_fini(ip_block);
}
-static int si_dma_resume(void *handle)
+static int si_dma_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return si_dma_hw_init(adev);
+ return si_dma_hw_init(ip_block);
}
-static bool si_dma_is_idle(void *handle)
+static bool si_dma_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 tmp = RREG32(SRBM_STATUS2);
+ struct amdgpu_device *adev = ip_block->adev;
+
+ u32 tmp = RREG32(mmSRBM_STATUS2);
- if (tmp & (DMA_BUSY_MASK | DMA1_BUSY_MASK))
+ if (tmp & (SRBM_STATUS2__DMA_BUSY_MASK | SRBM_STATUS2__DMA1_BUSY_MASK))
return false;
return true;
}
-static int si_dma_wait_for_idle(void *handle)
+static int si_dma_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (si_dma_is_idle(handle))
+ if (si_dma_is_idle(ip_block))
return 0;
udelay(1);
}
return -ETIMEDOUT;
}
-static int si_dma_soft_reset(void *handle)
+static int si_dma_soft_reset(struct amdgpu_ip_block *ip_block)
{
DRM_INFO("si_dma_soft_reset --- not implemented !!!!!!!\n");
return 0;
@@ -599,14 +599,14 @@ static int si_dma_set_trap_irq_state(struct amdgpu_device *adev,
case AMDGPU_SDMA_IRQ_INSTANCE0:
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
- sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET);
- sdma_cntl &= ~TRAP_ENABLE;
- WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl);
+ sdma_cntl = RREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET);
+ sdma_cntl &= ~DMA_CNTL__TRAP_ENABLE_MASK;
+ WREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl);
break;
case AMDGPU_IRQ_STATE_ENABLE:
- sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET);
- sdma_cntl |= TRAP_ENABLE;
- WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl);
+ sdma_cntl = RREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET);
+ sdma_cntl |= DMA_CNTL__TRAP_ENABLE_MASK;
+ WREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl);
break;
default:
break;
@@ -615,14 +615,14 @@ static int si_dma_set_trap_irq_state(struct amdgpu_device *adev,
case AMDGPU_SDMA_IRQ_INSTANCE1:
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
- sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET);
- sdma_cntl &= ~TRAP_ENABLE;
- WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl);
+ sdma_cntl = RREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET);
+ sdma_cntl &= ~DMA_CNTL__TRAP_ENABLE_MASK;
+ WREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl);
break;
case AMDGPU_IRQ_STATE_ENABLE:
- sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET);
- sdma_cntl |= TRAP_ENABLE;
- WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl);
+ sdma_cntl = RREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET);
+ sdma_cntl |= DMA_CNTL__TRAP_ENABLE_MASK;
+ WREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl);
break;
default:
break;
@@ -645,13 +645,13 @@ static int si_dma_process_trap_irq(struct amdgpu_device *adev,
return 0;
}
-static int si_dma_set_clockgating_state(void *handle,
+static int si_dma_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
u32 orig, data, offset;
int i;
bool enable;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
enable = (state == AMD_CG_STATE_GATE);
@@ -661,11 +661,11 @@ static int si_dma_set_clockgating_state(void *handle,
offset = DMA0_REGISTER_OFFSET;
else
offset = DMA1_REGISTER_OFFSET;
- orig = data = RREG32(DMA_POWER_CNTL + offset);
- data &= ~MEM_POWER_OVERRIDE;
+ orig = data = RREG32(mmDMA_POWER_CNTL + offset);
+ data &= ~DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
if (data != orig)
- WREG32(DMA_POWER_CNTL + offset, data);
- WREG32(DMA_CLK_CTRL + offset, 0x00000100);
+ WREG32(mmDMA_POWER_CNTL + offset, data);
+ WREG32(mmDMA_CLK_CTRL + offset, 0x00000100);
}
} else {
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -673,33 +673,33 @@ static int si_dma_set_clockgating_state(void *handle,
offset = DMA0_REGISTER_OFFSET;
else
offset = DMA1_REGISTER_OFFSET;
- orig = data = RREG32(DMA_POWER_CNTL + offset);
- data |= MEM_POWER_OVERRIDE;
+ orig = data = RREG32(mmDMA_POWER_CNTL + offset);
+ data |= DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
if (data != orig)
- WREG32(DMA_POWER_CNTL + offset, data);
+ WREG32(mmDMA_POWER_CNTL + offset, data);
- orig = data = RREG32(DMA_CLK_CTRL + offset);
+ orig = data = RREG32(mmDMA_CLK_CTRL + offset);
data = 0xff000000;
if (data != orig)
- WREG32(DMA_CLK_CTRL + offset, data);
+ WREG32(mmDMA_CLK_CTRL + offset, data);
}
}
return 0;
}
-static int si_dma_set_powergating_state(void *handle,
+static int si_dma_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- WREG32(DMA_PGFSM_WRITE, 0x00002000);
- WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
+ WREG32(mmDMA_PGFSM_WRITE, 0x00002000);
+ WREG32(mmDMA_PGFSM_CONFIG, 0x100010ff);
for (tmp = 0; tmp < 5; tmp++)
- WREG32(DMA_PGFSM_WRITE, 0);
+ WREG32(mmDMA_PGFSM_WRITE, 0);
return 0;
}
@@ -707,7 +707,6 @@ static int si_dma_set_powergating_state(void *handle,
static const struct amd_ip_funcs si_dma_ip_funcs = {
.name = "si_dma",
.early_init = si_dma_early_init,
- .late_init = NULL,
.sw_init = si_dma_sw_init,
.sw_fini = si_dma_sw_fini,
.hw_init = si_dma_hw_init,
@@ -772,7 +771,7 @@ static void si_dma_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: is this a secure operation
+ * @copy_flags: unused
*
* Copy GPU buffers using the DMA engine (VI).
* Used by the amdgpu ttm implementation to move pages if
@@ -782,7 +781,7 @@ static void si_dma_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY,
1, 0, 0, byte_count);
diff --git a/drivers/gpu/drm/amd/amdgpu/si_enums.h b/drivers/gpu/drm/amd/amdgpu/si_enums.h
index 4e935baa7b91..6da65778292b 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_enums.h
+++ b/drivers/gpu/drm/amd/amdgpu/si_enums.h
@@ -23,123 +23,15 @@
#ifndef SI_ENUMS_H
#define SI_ENUMS_H
-#define VBLANK_INT_MASK (1 << 0)
-#define DC_HPDx_INT_EN (1 << 16)
-#define VBLANK_ACK (1 << 4)
-#define VLINE_ACK (1 << 4)
-
-#define CURSOR_WIDTH 64
-#define CURSOR_HEIGHT 64
-
-#define VGA_VSTATUS_CNTL 0xFFFCFFFF
#define PRIORITY_MARK_MASK 0x7fff
#define PRIORITY_OFF (1 << 16)
#define PRIORITY_ALWAYS_ON (1 << 20)
-#define INTERLEAVE_EN (1 << 0)
-
-#define LATENCY_WATERMARK_MASK(x) ((x) << 16)
-#define DC_LB_MEMORY_CONFIG(x) ((x) << 20)
-#define ICON_DEGAMMA_MODE(x) (((x) & 0x3) << 8)
-
-#define GRPH_ENDIAN_SWAP(x) (((x) & 0x3) << 0)
-#define GRPH_ENDIAN_NONE 0
-#define GRPH_ENDIAN_8IN16 1
-#define GRPH_ENDIAN_8IN32 2
-#define GRPH_ENDIAN_8IN64 3
-#define GRPH_RED_CROSSBAR(x) (((x) & 0x3) << 4)
-#define GRPH_RED_SEL_R 0
-#define GRPH_RED_SEL_G 1
-#define GRPH_RED_SEL_B 2
-#define GRPH_RED_SEL_A 3
-#define GRPH_GREEN_CROSSBAR(x) (((x) & 0x3) << 6)
-#define GRPH_GREEN_SEL_G 0
-#define GRPH_GREEN_SEL_B 1
-#define GRPH_GREEN_SEL_A 2
-#define GRPH_GREEN_SEL_R 3
-#define GRPH_BLUE_CROSSBAR(x) (((x) & 0x3) << 8)
-#define GRPH_BLUE_SEL_B 0
-#define GRPH_BLUE_SEL_A 1
-#define GRPH_BLUE_SEL_R 2
-#define GRPH_BLUE_SEL_G 3
-#define GRPH_ALPHA_CROSSBAR(x) (((x) & 0x3) << 10)
-#define GRPH_ALPHA_SEL_A 0
-#define GRPH_ALPHA_SEL_R 1
-#define GRPH_ALPHA_SEL_G 2
-#define GRPH_ALPHA_SEL_B 3
-
-#define GRPH_DEPTH(x) (((x) & 0x3) << 0)
-#define GRPH_DEPTH_8BPP 0
-#define GRPH_DEPTH_16BPP 1
-#define GRPH_DEPTH_32BPP 2
-
-#define GRPH_FORMAT(x) (((x) & 0x7) << 8)
-#define GRPH_FORMAT_INDEXED 0
-#define GRPH_FORMAT_ARGB1555 0
-#define GRPH_FORMAT_ARGB565 1
-#define GRPH_FORMAT_ARGB4444 2
-#define GRPH_FORMAT_AI88 3
-#define GRPH_FORMAT_MONO16 4
-#define GRPH_FORMAT_BGRA5551 5
-#define GRPH_FORMAT_ARGB8888 0
-#define GRPH_FORMAT_ARGB2101010 1
-#define GRPH_FORMAT_32BPP_DIG 2
-#define GRPH_FORMAT_8B_ARGB2101010 3
-#define GRPH_FORMAT_BGRA1010102 4
-#define GRPH_FORMAT_8B_BGRA1010102 5
-#define GRPH_FORMAT_RGB111110 6
-#define GRPH_FORMAT_BGR101111 7
-
-#define GRPH_NUM_BANKS(x) (((x) & 0x3) << 2)
-#define GRPH_ARRAY_MODE(x) (((x) & 0x7) << 20)
-#define GRPH_ARRAY_LINEAR_GENERAL 0
-#define GRPH_ARRAY_LINEAR_ALIGNED 1
-#define GRPH_ARRAY_1D_TILED_THIN1 2
-#define GRPH_ARRAY_2D_TILED_THIN1 4
-#define GRPH_TILE_SPLIT(x) (((x) & 0x7) << 13)
-#define GRPH_BANK_WIDTH(x) (((x) & 0x3) << 6)
-#define GRPH_BANK_HEIGHT(x) (((x) & 0x3) << 11)
-#define GRPH_MACRO_TILE_ASPECT(x) (((x) & 0x3) << 18)
-#define GRPH_ARRAY_MODE(x) (((x) & 0x7) << 20)
-#define GRPH_PIPE_CONFIG(x) (((x) & 0x1f) << 24)
-
-#define CURSOR_EN (1 << 0)
-#define CURSOR_MODE(x) (((x) & 0x3) << 8)
-#define CURSOR_MONO 0
-#define CURSOR_24_1 1
-#define CURSOR_24_8_PRE_MULT 2
-#define CURSOR_24_8_UNPRE_MULT 3
-#define CURSOR_2X_MAGNIFY (1 << 16)
-#define CURSOR_FORCE_MC_ON (1 << 20)
-#define CURSOR_URGENT_CONTROL(x) (((x) & 0x7) << 24)
-#define CURSOR_URGENT_ALWAYS 0
-#define CURSOR_URGENT_1_8 1
-#define CURSOR_URGENT_1_4 2
-#define CURSOR_URGENT_3_8 3
-#define CURSOR_URGENT_1_2 4
-#define CURSOR_UPDATE_PENDING (1 << 0)
-#define CURSOR_UPDATE_TAKEN (1 << 1)
-#define CURSOR_UPDATE_LOCK (1 << 16)
-#define CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24)
-
-#define SI_CRTC0_REGISTER_OFFSET 0
-#define SI_CRTC1_REGISTER_OFFSET 0x300
-#define SI_CRTC2_REGISTER_OFFSET 0x2600
-#define SI_CRTC3_REGISTER_OFFSET 0x2900
-#define SI_CRTC4_REGISTER_OFFSET 0x2c00
-#define SI_CRTC5_REGISTER_OFFSET 0x2f00
-
-#define DMA0_REGISTER_OFFSET 0x000
-#define DMA1_REGISTER_OFFSET 0x200
-#define ES_AND_GS_AUTO 3
-#define RADEON_PACKET_TYPE3 3
-#define CE_PARTITION_BASE 3
-#define BUF_SWAP_32BIT (2 << 16)
#define GFX_POWER_STATUS (1 << 1)
#define GFX_CLOCK_STATUS (1 << 2)
#define GFX_LS_STATUS (1 << 3)
-#define RLC_BUSY_STATUS (1 << 0)
+#define RLC_BUSY_STATUS (1 << 0)
#define RLC_PUD(x) ((x) << 0)
#define RLC_PUD_MASK (0xff << 0)
#define RLC_PDD(x) ((x) << 8)
@@ -148,144 +40,8 @@
#define RLC_TTPD_MASK (0xff << 16)
#define RLC_MSD(x) ((x) << 24)
#define RLC_MSD_MASK (0xff << 24)
-#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
-#define WRITE_DATA_DST_SEL(x) ((x) << 8)
-#define EVENT_TYPE(x) ((x) << 0)
-#define EVENT_INDEX(x) ((x) << 8)
-#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4)
-#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0)
-#define WAIT_REG_MEM_ENGINE(x) ((x) << 8)
-#define GFX6_NUM_GFX_RINGS 1
-#define GFX6_NUM_COMPUTE_RINGS 2
#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
-#define TAHITI_GB_ADDR_CONFIG_GOLDEN 0x12011003
-#define VERDE_GB_ADDR_CONFIG_GOLDEN 0x02010002
-#define HAINAN_GB_ADDR_CONFIG_GOLDEN 0x02011003
-
-#define PACKET3(op, n) ((RADEON_PACKET_TYPE3 << 30) | \
- (((op) & 0xFF) << 8) | \
- ((n) & 0x3FFF) << 16)
-#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1)
-#define PACKET3_NOP 0x10
-#define PACKET3_SET_BASE 0x11
-#define PACKET3_BASE_INDEX(x) ((x) << 0)
-#define PACKET3_CLEAR_STATE 0x12
-#define PACKET3_INDEX_BUFFER_SIZE 0x13
-#define PACKET3_DISPATCH_DIRECT 0x15
-#define PACKET3_DISPATCH_INDIRECT 0x16
-#define PACKET3_ALLOC_GDS 0x1B
-#define PACKET3_WRITE_GDS_RAM 0x1C
-#define PACKET3_ATOMIC_GDS 0x1D
-#define PACKET3_ATOMIC 0x1E
-#define PACKET3_OCCLUSION_QUERY 0x1F
-#define PACKET3_SET_PREDICATION 0x20
-#define PACKET3_REG_RMW 0x21
-#define PACKET3_COND_EXEC 0x22
-#define PACKET3_PRED_EXEC 0x23
-#define PACKET3_DRAW_INDIRECT 0x24
-#define PACKET3_DRAW_INDEX_INDIRECT 0x25
-#define PACKET3_INDEX_BASE 0x26
-#define PACKET3_DRAW_INDEX_2 0x27
-#define PACKET3_CONTEXT_CONTROL 0x28
-#define PACKET3_INDEX_TYPE 0x2A
-#define PACKET3_DRAW_INDIRECT_MULTI 0x2C
-#define PACKET3_DRAW_INDEX_AUTO 0x2D
-#define PACKET3_DRAW_INDEX_IMMD 0x2E
-#define PACKET3_NUM_INSTANCES 0x2F
-#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30
-#define PACKET3_INDIRECT_BUFFER_CONST 0x31
-#define PACKET3_INDIRECT_BUFFER 0x3F
-#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34
-#define PACKET3_DRAW_INDEX_OFFSET_2 0x35
-#define PACKET3_DRAW_INDEX_MULTI_ELEMENT 0x36
-#define PACKET3_WRITE_DATA 0x37
-#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
-#define PACKET3_MEM_SEMAPHORE 0x39
-#define PACKET3_MPEG_INDEX 0x3A
-#define PACKET3_COPY_DW 0x3B
-#define PACKET3_WAIT_REG_MEM 0x3C
-#define PACKET3_MEM_WRITE 0x3D
-#define PACKET3_COPY_DATA 0x40
-#define PACKET3_CP_DMA 0x41
-# define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20)
-# define PACKET3_CP_DMA_ENGINE(x) ((x) << 27)
-# define PACKET3_CP_DMA_SRC_SEL(x) ((x) << 29)
-# define PACKET3_CP_DMA_CP_SYNC (1 << 31)
-# define PACKET3_CP_DMA_DIS_WC (1 << 21)
-# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22)
-# define PACKET3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24)
-# define PACKET3_CP_DMA_CMD_SAS (1 << 26)
-# define PACKET3_CP_DMA_CMD_DAS (1 << 27)
-# define PACKET3_CP_DMA_CMD_SAIC (1 << 28)
-# define PACKET3_CP_DMA_CMD_DAIC (1 << 29)
-# define PACKET3_CP_DMA_CMD_RAW_WAIT (1 << 30)
-#define PACKET3_PFP_SYNC_ME 0x42
-#define PACKET3_SURFACE_SYNC 0x43
-# define PACKET3_DEST_BASE_0_ENA (1 << 0)
-# define PACKET3_DEST_BASE_1_ENA (1 << 1)
-# define PACKET3_CB0_DEST_BASE_ENA (1 << 6)
-# define PACKET3_CB1_DEST_BASE_ENA (1 << 7)
-# define PACKET3_CB2_DEST_BASE_ENA (1 << 8)
-# define PACKET3_CB3_DEST_BASE_ENA (1 << 9)
-# define PACKET3_CB4_DEST_BASE_ENA (1 << 10)
-# define PACKET3_CB5_DEST_BASE_ENA (1 << 11)
-# define PACKET3_CB6_DEST_BASE_ENA (1 << 12)
-# define PACKET3_CB7_DEST_BASE_ENA (1 << 13)
-# define PACKET3_DB_DEST_BASE_ENA (1 << 14)
-# define PACKET3_DEST_BASE_2_ENA (1 << 19)
-# define PACKET3_DEST_BASE_3_ENA (1 << 21)
-# define PACKET3_TCL1_ACTION_ENA (1 << 22)
-# define PACKET3_TC_ACTION_ENA (1 << 23)
-# define PACKET3_CB_ACTION_ENA (1 << 25)
-# define PACKET3_DB_ACTION_ENA (1 << 26)
-# define PACKET3_SH_KCACHE_ACTION_ENA (1 << 27)
-# define PACKET3_SH_ICACHE_ACTION_ENA (1 << 29)
-#define PACKET3_ME_INITIALIZE 0x44
-#define PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16)
-#define PACKET3_COND_WRITE 0x45
-#define PACKET3_EVENT_WRITE 0x46
-#define PACKET3_EVENT_WRITE_EOP 0x47
-#define PACKET3_EVENT_WRITE_EOS 0x48
-#define PACKET3_PREAMBLE_CNTL 0x4A
-# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28)
-# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28)
-#define PACKET3_ONE_REG_WRITE 0x57
-#define PACKET3_LOAD_CONFIG_REG 0x5F
-#define PACKET3_LOAD_CONTEXT_REG 0x60
-#define PACKET3_LOAD_SH_REG 0x61
-#define PACKET3_SET_CONFIG_REG 0x68
-#define PACKET3_SET_CONFIG_REG_START 0x00002000
-#define PACKET3_SET_CONFIG_REG_END 0x00002c00
-#define PACKET3_SET_CONTEXT_REG 0x69
-#define PACKET3_SET_CONTEXT_REG_START 0x000a000
-#define PACKET3_SET_CONTEXT_REG_END 0x000a400
-#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73
-#define PACKET3_SET_RESOURCE_INDIRECT 0x74
-#define PACKET3_SET_SH_REG 0x76
-#define PACKET3_SET_SH_REG_START 0x00002c00
-#define PACKET3_SET_SH_REG_END 0x00003000
-#define PACKET3_SET_SH_REG_OFFSET 0x77
-#define PACKET3_ME_WRITE 0x7A
-#define PACKET3_SCRATCH_RAM_WRITE 0x7D
-#define PACKET3_SCRATCH_RAM_READ 0x7E
-#define PACKET3_CE_WRITE 0x7F
-#define PACKET3_LOAD_CONST_RAM 0x80
-#define PACKET3_WRITE_CONST_RAM 0x81
-#define PACKET3_WRITE_CONST_RAM_OFFSET 0x82
-#define PACKET3_DUMP_CONST_RAM 0x83
-#define PACKET3_INCREMENT_CE_COUNTER 0x84
-#define PACKET3_INCREMENT_DE_COUNTER 0x85
-#define PACKET3_WAIT_ON_CE_COUNTER 0x86
-#define PACKET3_WAIT_ON_DE_COUNTER 0x87
-#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88
-#define PACKET3_SET_CE_DE_COUNTERS 0x89
-#define PACKET3_WAIT_ON_AVAIL_BUFFER 0x8A
-#define PACKET3_SWITCH_BUFFER 0x8B
-#define PACKET3_SEM_WAIT_ON_SIGNAL (0x1 << 12)
-#define PACKET3_SEM_SEL_SIGNAL (0x6 << 29)
-#define PACKET3_SEM_SEL_WAIT (0x7 << 29)
-
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
index 9a24f17a5750..66f650f87243 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -27,6 +27,7 @@
#include "amdgpu_ih.h"
#include "sid.h"
#include "si_ih.h"
+
#include "oss/oss_1_0_d.h"
#include "oss/oss_1_0_sh_mask.h"
@@ -95,6 +96,9 @@ static int si_ih_irq_init(struct amdgpu_device *adev)
pci_set_master(adev->pdev);
si_ih_enable_interrupts(adev);
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
return 0;
}
@@ -111,6 +115,9 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev,
wptr = le32_to_cpu(*ih->wptr_cpu);
+ if (ih == &adev->irq.ih_soft)
+ goto out;
+
if (wptr & IH_RB_WPTR__RB_OVERFLOW_MASK) {
wptr &= ~IH_RB_WPTR__RB_OVERFLOW_MASK;
dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
@@ -119,7 +126,15 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev,
tmp = RREG32(IH_RB_CNTL);
tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
WREG32(IH_RB_CNTL, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
+ WREG32(IH_RB_CNTL, tmp);
}
+
+out:
return (wptr & ih->ptr_mask);
}
@@ -150,70 +165,68 @@ static void si_ih_set_rptr(struct amdgpu_device *adev,
WREG32(IH_RB_RPTR, ih->rptr);
}
-static int si_ih_early_init(void *handle)
+static int si_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
si_ih_set_interrupt_funcs(adev);
return 0;
}
-static int si_ih_sw_init(void *handle)
+static int si_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
if (r)
return r;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
+ if (r)
+ return r;
+
return amdgpu_irq_init(adev);
}
-static int si_ih_sw_fini(void *handle)
+static int si_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
return 0;
}
-static int si_ih_hw_init(void *handle)
+static int si_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return si_ih_irq_init(adev);
}
-static int si_ih_hw_fini(void *handle)
+static int si_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- si_ih_irq_disable(adev);
+ si_ih_irq_disable(ip_block->adev);
return 0;
}
-static int si_ih_suspend(void *handle)
+static int si_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return si_ih_hw_fini(adev);
+ return si_ih_hw_fini(ip_block);
}
-static int si_ih_resume(void *handle)
+static int si_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return si_ih_hw_init(adev);
+ return si_ih_hw_init(ip_block);
}
-static bool si_ih_is_idle(void *handle)
+static bool si_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 tmp = RREG32(SRBM_STATUS);
+ struct amdgpu_device *adev = ip_block->adev;
+ u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & SRBM_STATUS__IH_BUSY_MASK)
return false;
@@ -221,41 +234,41 @@ static bool si_ih_is_idle(void *handle)
return true;
}
-static int si_ih_wait_for_idle(void *handle)
+static int si_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (si_ih_is_idle(handle))
+ if (si_ih_is_idle(ip_block))
return 0;
udelay(1);
}
return -ETIMEDOUT;
}
-static int si_ih_soft_reset(void *handle)
+static int si_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
- u32 tmp = RREG32(SRBM_STATUS);
+ u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & SRBM_STATUS__IH_BUSY_MASK)
srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_IH_MASK;
if (srbm_soft_reset) {
- tmp = RREG32(SRBM_SOFT_RESET);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
tmp |= srbm_soft_reset;
- dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32(SRBM_SOFT_RESET, tmp);
- tmp = RREG32(SRBM_SOFT_RESET);
+ dev_info(adev->dev, "mmSRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
udelay(50);
tmp &= ~srbm_soft_reset;
- WREG32(SRBM_SOFT_RESET, tmp);
- tmp = RREG32(SRBM_SOFT_RESET);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
udelay(50);
}
@@ -263,13 +276,13 @@ static int si_ih_soft_reset(void *handle)
return 0;
}
-static int si_ih_set_clockgating_state(void *handle,
+static int si_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int si_ih_set_powergating_state(void *handle,
+static int si_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -278,7 +291,6 @@ static int si_ih_set_powergating_state(void *handle,
static const struct amd_ip_funcs si_ih_ip_funcs = {
.name = "si_ih",
.early_init = si_ih_early_init,
- .late_init = NULL,
.sw_init = si_ih_sw_init,
.sw_fini = si_ih_sw_fini,
.hw_init = si_ih_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h
index 9a39cbfe6db9..561462a8332e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sid.h
+++ b/drivers/gpu/drm/amd/amdgpu/sid.h
@@ -24,47 +24,12 @@
#ifndef SI_H
#define SI_H
-#define TAHITI_RB_BITMAP_WIDTH_PER_SH 2
-
-#define TAHITI_GB_ADDR_CONFIG_GOLDEN 0x12011003
-#define VERDE_GB_ADDR_CONFIG_GOLDEN 0x12010002
-#define HAINAN_GB_ADDR_CONFIG_GOLDEN 0x02010001
-
-#define SI_MAX_SH_GPRS 256
-#define SI_MAX_TEMP_GPRS 16
-#define SI_MAX_SH_THREADS 256
-#define SI_MAX_SH_STACK_ENTRIES 4096
-#define SI_MAX_FRC_EOV_CNT 16384
-#define SI_MAX_BACKENDS 8
-#define SI_MAX_BACKENDS_MASK 0xFF
-#define SI_MAX_BACKENDS_PER_SE_MASK 0x0F
-#define SI_MAX_SIMDS 12
-#define SI_MAX_SIMDS_MASK 0x0FFF
-#define SI_MAX_SIMDS_PER_SE_MASK 0x00FF
-#define SI_MAX_PIPES 8
-#define SI_MAX_PIPES_MASK 0xFF
-#define SI_MAX_PIPES_PER_SIMD_MASK 0x3F
-#define SI_MAX_LDS_NUM 0xFFFF
-#define SI_MAX_TCC 16
-#define SI_MAX_TCC_MASK 0xFFFF
#define SI_MAX_CTLACKS_ASSERTION_WAIT 100
-/* SMC IND accessor regs */
-#define SMC_IND_INDEX_0 0x80
-#define SMC_IND_DATA_0 0x81
-
-#define SMC_IND_ACCESS_CNTL 0x8A
-# define AUTO_INCREMENT_IND_0 (1 << 0)
-#define SMC_MESSAGE_0 0x8B
-#define SMC_RESP_0 0x8C
-
/* CG IND registers are accessed via SMC indirect space + SMC_CG_IND_START */
#define SMC_CG_IND_START 0xc0030000
#define SMC_CG_IND_END 0xc0040000
-#define CG_CGTT_LOCAL_0 0x400
-#define CG_CGTT_LOCAL_1 0x401
-
/* SMC IND registers */
#define SMC_SYSCON_RESET_CNTL 0x80000000
# define RST_REG (1 << 0)
@@ -72,9 +37,6 @@
# define CK_DISABLE (1 << 0)
# define CKEN (1 << 24)
-#define VGA_HDP_CONTROL 0xCA
-#define VGA_MEMORY_DISABLE (1 << 4)
-
#define DCCG_DISP_SLOW_SELECT_REG 0x13F
#define DCCG_DISP1_SLOW_SELECT(x) ((x) << 0)
#define DCCG_DISP1_SLOW_SELECT_MASK (7 << 0)
@@ -83,47 +45,6 @@
#define DCCG_DISP2_SLOW_SELECT_MASK (7 << 4)
#define DCCG_DISP2_SLOW_SELECT_SHIFT 4
-#define CG_SPLL_FUNC_CNTL 0x180
-#define SPLL_RESET (1 << 0)
-#define SPLL_SLEEP (1 << 1)
-#define SPLL_BYPASS_EN (1 << 3)
-#define SPLL_REF_DIV(x) ((x) << 4)
-#define SPLL_REF_DIV_MASK (0x3f << 4)
-#define SPLL_PDIV_A(x) ((x) << 20)
-#define SPLL_PDIV_A_MASK (0x7f << 20)
-#define SPLL_PDIV_A_SHIFT 20
-#define CG_SPLL_FUNC_CNTL_2 0x181
-#define SCLK_MUX_SEL(x) ((x) << 0)
-#define SCLK_MUX_SEL_MASK (0x1ff << 0)
-#define SPLL_CTLREQ_CHG (1 << 23)
-#define SCLK_MUX_UPDATE (1 << 26)
-#define CG_SPLL_FUNC_CNTL_3 0x182
-#define SPLL_FB_DIV(x) ((x) << 0)
-#define SPLL_FB_DIV_MASK (0x3ffffff << 0)
-#define SPLL_FB_DIV_SHIFT 0
-#define SPLL_DITHEN (1 << 28)
-#define CG_SPLL_FUNC_CNTL_4 0x183
-
-#define SPLL_STATUS 0x185
-#define SPLL_CHG_STATUS (1 << 1)
-#define SPLL_CNTL_MODE 0x186
-#define SPLL_SW_DIR_CONTROL (1 << 0)
-# define SPLL_REFCLK_SEL(x) ((x) << 26)
-# define SPLL_REFCLK_SEL_MASK (3 << 26)
-
-#define CG_SPLL_SPREAD_SPECTRUM 0x188
-#define SSEN (1 << 0)
-#define CLK_S(x) ((x) << 4)
-#define CLK_S_MASK (0xfff << 4)
-#define CLK_S_SHIFT 4
-#define CG_SPLL_SPREAD_SPECTRUM_2 0x189
-#define CLK_V(x) ((x) << 0)
-#define CLK_V_MASK (0x3ffffff << 0)
-#define CLK_V_SHIFT 0
-
-#define CG_SPLL_AUTOSCALE_CNTL 0x18b
-# define AUTOSCALE_ON_SS_CLEAR (1 << 9)
-
/* discrete uvd clocks */
#define CG_UPLL_FUNC_CNTL 0x18d
# define UPLL_RESET_MASK 0x00000001
@@ -153,317 +74,13 @@
#define CG_UPLL_SPREAD_SPECTRUM 0x194
# define SSEN_MASK 0x00000001
-#define MPLL_BYPASSCLK_SEL 0x197
-# define MPLL_CLKOUT_SEL(x) ((x) << 8)
-# define MPLL_CLKOUT_SEL_MASK 0xFF00
-
-#define CG_CLKPIN_CNTL 0x198
-# define XTALIN_DIVIDE (1 << 1)
-# define BCLK_AS_XCLK (1 << 2)
-#define CG_CLKPIN_CNTL_2 0x199
-# define FORCE_BIF_REFCLK_EN (1 << 3)
-# define MUX_TCLK_TO_XCLK (1 << 8)
-
-#define THM_CLK_CNTL 0x19b
-# define CMON_CLK_SEL(x) ((x) << 0)
-# define CMON_CLK_SEL_MASK 0xFF
-# define TMON_CLK_SEL(x) ((x) << 8)
-# define TMON_CLK_SEL_MASK 0xFF00
-#define MISC_CLK_CNTL 0x19c
-# define DEEP_SLEEP_CLK_SEL(x) ((x) << 0)
-# define DEEP_SLEEP_CLK_SEL_MASK 0xFF
-# define ZCLK_SEL(x) ((x) << 8)
-# define ZCLK_SEL_MASK 0xFF00
-
-#define CG_THERMAL_CTRL 0x1c0
-#define DPM_EVENT_SRC(x) ((x) << 0)
-#define DPM_EVENT_SRC_MASK (7 << 0)
-#define DIG_THERM_DPM(x) ((x) << 14)
-#define DIG_THERM_DPM_MASK 0x003FC000
-#define DIG_THERM_DPM_SHIFT 14
-#define CG_THERMAL_STATUS 0x1c1
-#define FDO_PWM_DUTY(x) ((x) << 9)
-#define FDO_PWM_DUTY_MASK (0xff << 9)
-#define FDO_PWM_DUTY_SHIFT 9
-#define CG_THERMAL_INT 0x1c2
-#define DIG_THERM_INTH(x) ((x) << 8)
-#define DIG_THERM_INTH_MASK 0x0000FF00
-#define DIG_THERM_INTH_SHIFT 8
-#define DIG_THERM_INTL(x) ((x) << 16)
-#define DIG_THERM_INTL_MASK 0x00FF0000
-#define DIG_THERM_INTL_SHIFT 16
-#define THERM_INT_MASK_HIGH (1 << 24)
-#define THERM_INT_MASK_LOW (1 << 25)
-
-#define CG_MULT_THERMAL_CTRL 0x1c4
-#define TEMP_SEL(x) ((x) << 20)
-#define TEMP_SEL_MASK (0xff << 20)
-#define TEMP_SEL_SHIFT 20
-#define CG_MULT_THERMAL_STATUS 0x1c5
-#define ASIC_MAX_TEMP(x) ((x) << 0)
-#define ASIC_MAX_TEMP_MASK 0x000001ff
-#define ASIC_MAX_TEMP_SHIFT 0
-#define CTF_TEMP(x) ((x) << 9)
-#define CTF_TEMP_MASK 0x0003fe00
-#define CTF_TEMP_SHIFT 9
-
-#define CG_FDO_CTRL0 0x1d5
-#define FDO_STATIC_DUTY(x) ((x) << 0)
-#define FDO_STATIC_DUTY_MASK 0x000000FF
-#define FDO_STATIC_DUTY_SHIFT 0
-#define CG_FDO_CTRL1 0x1d6
-#define FMAX_DUTY100(x) ((x) << 0)
-#define FMAX_DUTY100_MASK 0x000000FF
-#define FMAX_DUTY100_SHIFT 0
-#define CG_FDO_CTRL2 0x1d7
-#define TMIN(x) ((x) << 0)
-#define TMIN_MASK 0x000000FF
-#define TMIN_SHIFT 0
-#define FDO_PWM_MODE(x) ((x) << 11)
-#define FDO_PWM_MODE_MASK (7 << 11)
-#define FDO_PWM_MODE_SHIFT 11
-#define TACH_PWM_RESP_RATE(x) ((x) << 25)
-#define TACH_PWM_RESP_RATE_MASK (0x7f << 25)
-#define TACH_PWM_RESP_RATE_SHIFT 25
-
-#define CG_TACH_CTRL 0x1dc
-# define EDGE_PER_REV(x) ((x) << 0)
-# define EDGE_PER_REV_MASK (0x7 << 0)
-# define EDGE_PER_REV_SHIFT 0
-# define TARGET_PERIOD(x) ((x) << 3)
-# define TARGET_PERIOD_MASK 0xfffffff8
-# define TARGET_PERIOD_SHIFT 3
-#define CG_TACH_STATUS 0x1dd
-# define TACH_PERIOD(x) ((x) << 0)
-# define TACH_PERIOD_MASK 0xffffffff
-# define TACH_PERIOD_SHIFT 0
-
-#define GENERAL_PWRMGT 0x1e0
-# define GLOBAL_PWRMGT_EN (1 << 0)
-# define STATIC_PM_EN (1 << 1)
-# define THERMAL_PROTECTION_DIS (1 << 2)
-# define THERMAL_PROTECTION_TYPE (1 << 3)
-# define SW_SMIO_INDEX(x) ((x) << 6)
-# define SW_SMIO_INDEX_MASK (1 << 6)
-# define SW_SMIO_INDEX_SHIFT 6
-# define VOLT_PWRMGT_EN (1 << 10)
-# define DYN_SPREAD_SPECTRUM_EN (1 << 23)
-#define CG_TPC 0x1e1
-#define SCLK_PWRMGT_CNTL 0x1e2
-# define SCLK_PWRMGT_OFF (1 << 0)
-# define SCLK_LOW_D1 (1 << 1)
-# define FIR_RESET (1 << 4)
-# define FIR_FORCE_TREND_SEL (1 << 5)
-# define FIR_TREND_MODE (1 << 6)
-# define DYN_GFX_CLK_OFF_EN (1 << 7)
-# define GFX_CLK_FORCE_ON (1 << 8)
-# define GFX_CLK_REQUEST_OFF (1 << 9)
-# define GFX_CLK_FORCE_OFF (1 << 10)
-# define GFX_CLK_OFF_ACPI_D1 (1 << 11)
-# define GFX_CLK_OFF_ACPI_D2 (1 << 12)
-# define GFX_CLK_OFF_ACPI_D3 (1 << 13)
-# define DYN_LIGHT_SLEEP_EN (1 << 14)
-
-#define TARGET_AND_CURRENT_PROFILE_INDEX 0x1e6
-# define CURRENT_STATE_INDEX_MASK (0xf << 4)
-# define CURRENT_STATE_INDEX_SHIFT 4
-
-#define CG_FTV 0x1ef
-
-#define CG_FFCT_0 0x1f0
-# define UTC_0(x) ((x) << 0)
-# define UTC_0_MASK (0x3ff << 0)
-# define DTC_0(x) ((x) << 10)
-# define DTC_0_MASK (0x3ff << 10)
-
-#define CG_BSP 0x1ff
-# define BSP(x) ((x) << 0)
-# define BSP_MASK (0xffff << 0)
-# define BSU(x) ((x) << 16)
-# define BSU_MASK (0xf << 16)
-#define CG_AT 0x200
-# define CG_R(x) ((x) << 0)
-# define CG_R_MASK (0xffff << 0)
-# define CG_L(x) ((x) << 16)
-# define CG_L_MASK (0xffff << 16)
-
-#define CG_GIT 0x201
-# define CG_GICST(x) ((x) << 0)
-# define CG_GICST_MASK (0xffff << 0)
-# define CG_GIPOT(x) ((x) << 16)
-# define CG_GIPOT_MASK (0xffff << 16)
-
-#define CG_SSP 0x203
-# define SST(x) ((x) << 0)
-# define SST_MASK (0xffff << 0)
-# define SSTU(x) ((x) << 16)
-# define SSTU_MASK (0xf << 16)
-
-#define CG_DISPLAY_GAP_CNTL 0x20a
-# define DISP1_GAP(x) ((x) << 0)
-# define DISP1_GAP_MASK (3 << 0)
-# define DISP2_GAP(x) ((x) << 2)
-# define DISP2_GAP_MASK (3 << 2)
-# define VBI_TIMER_COUNT(x) ((x) << 4)
-# define VBI_TIMER_COUNT_MASK (0x3fff << 4)
-# define VBI_TIMER_UNIT(x) ((x) << 20)
-# define VBI_TIMER_UNIT_MASK (7 << 20)
-# define DISP1_GAP_MCHG(x) ((x) << 24)
-# define DISP1_GAP_MCHG_MASK (3 << 24)
-# define DISP2_GAP_MCHG(x) ((x) << 26)
-# define DISP2_GAP_MCHG_MASK (3 << 26)
-
-#define CG_ULV_CONTROL 0x21e
-#define CG_ULV_PARAMETER 0x21f
-
-#define SMC_SCRATCH0 0x221
-
-#define CG_CAC_CTRL 0x22e
-# define CAC_WINDOW(x) ((x) << 0)
-# define CAC_WINDOW_MASK 0x00ffffff
-
-#define DMIF_ADDR_CONFIG 0x2F5
-
-#define DMIF_ADDR_CALC 0x300
-
-#define PIPE0_DMIF_BUFFER_CONTROL 0x0328
-# define DMIF_BUFFERS_ALLOCATED(x) ((x) << 0)
-# define DMIF_BUFFERS_ALLOCATED_COMPLETED (1 << 4)
-
-#define SRBM_STATUS 0x394
-#define GRBM_RQ_PENDING (1 << 5)
-#define VMC_BUSY (1 << 8)
-#define MCB_BUSY (1 << 9)
-#define MCB_NON_DISPLAY_BUSY (1 << 10)
-#define MCC_BUSY (1 << 11)
-#define MCD_BUSY (1 << 12)
-#define SEM_BUSY (1 << 14)
-#define IH_BUSY (1 << 17)
-
-#define SRBM_SOFT_RESET 0x398
-#define SOFT_RESET_BIF (1 << 1)
-#define SOFT_RESET_DC (1 << 5)
-#define SOFT_RESET_DMA1 (1 << 6)
-#define SOFT_RESET_GRBM (1 << 8)
-#define SOFT_RESET_HDP (1 << 9)
-#define SOFT_RESET_IH (1 << 10)
-#define SOFT_RESET_MC (1 << 11)
-#define SOFT_RESET_ROM (1 << 14)
-#define SOFT_RESET_SEM (1 << 15)
-#define SOFT_RESET_VMC (1 << 17)
-#define SOFT_RESET_DMA (1 << 20)
-#define SOFT_RESET_TST (1 << 21)
-#define SOFT_RESET_REGBB (1 << 22)
-#define SOFT_RESET_ORB (1 << 23)
-
-#define CC_SYS_RB_BACKEND_DISABLE 0x3A0
-#define GC_USER_SYS_RB_BACKEND_DISABLE 0x3A1
-
-#define SRBM_READ_ERROR 0x3A6
-#define SRBM_INT_CNTL 0x3A8
-#define SRBM_INT_ACK 0x3AA
-
-#define SRBM_STATUS2 0x3B1
-#define DMA_BUSY (1 << 5)
-#define DMA1_BUSY (1 << 6)
-
-#define VM_L2_CNTL 0x500
-#define ENABLE_L2_CACHE (1 << 0)
-#define ENABLE_L2_FRAGMENT_PROCESSING (1 << 1)
-#define L2_CACHE_PTE_ENDIAN_SWAP_MODE(x) ((x) << 2)
-#define L2_CACHE_PDE_ENDIAN_SWAP_MODE(x) ((x) << 4)
-#define ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE (1 << 9)
-#define ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE (1 << 10)
-#define EFFECTIVE_L2_QUEUE_SIZE(x) (((x) & 7) << 15)
-#define CONTEXT1_IDENTITY_ACCESS_MODE(x) (((x) & 3) << 19)
-#define VM_L2_CNTL2 0x501
-#define INVALIDATE_ALL_L1_TLBS (1 << 0)
-#define INVALIDATE_L2_CACHE (1 << 1)
-#define INVALIDATE_CACHE_MODE(x) ((x) << 26)
-#define INVALIDATE_PTE_AND_PDE_CACHES 0
-#define INVALIDATE_ONLY_PTE_CACHES 1
-#define INVALIDATE_ONLY_PDE_CACHES 2
-#define VM_L2_CNTL3 0x502
-#define BANK_SELECT(x) ((x) << 0)
-#define L2_CACHE_UPDATE_MODE(x) ((x) << 6)
-#define L2_CACHE_BIGK_FRAGMENT_SIZE(x) ((x) << 15)
-#define L2_CACHE_BIGK_ASSOCIATIVITY (1 << 20)
-#define VM_L2_STATUS 0x503
-#define L2_BUSY (1 << 0)
-#define VM_CONTEXT0_CNTL 0x504
-#define ENABLE_CONTEXT (1 << 0)
-#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1)
-#define RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 3)
-#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4)
-#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 6)
-#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 7)
-#define PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 9)
-#define PDE0_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 10)
-#define VALID_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 12)
-#define VALID_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 13)
-#define READ_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 15)
-#define READ_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 16)
-#define WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 18)
-#define WRITE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 19)
-#define PAGE_TABLE_BLOCK_SIZE(x) (((x) & 0xF) << 24)
-#define VM_CONTEXT1_CNTL 0x505
-#define VM_CONTEXT0_CNTL2 0x50C
-#define VM_CONTEXT1_CNTL2 0x50D
-#define VM_CONTEXT8_PAGE_TABLE_BASE_ADDR 0x50E
-#define VM_CONTEXT9_PAGE_TABLE_BASE_ADDR 0x50F
-#define VM_CONTEXT10_PAGE_TABLE_BASE_ADDR 0x510
-#define VM_CONTEXT11_PAGE_TABLE_BASE_ADDR 0x511
-#define VM_CONTEXT12_PAGE_TABLE_BASE_ADDR 0x512
-#define VM_CONTEXT13_PAGE_TABLE_BASE_ADDR 0x513
-#define VM_CONTEXT14_PAGE_TABLE_BASE_ADDR 0x514
-#define VM_CONTEXT15_PAGE_TABLE_BASE_ADDR 0x515
-
-#define VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x53f
-#define VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x537
-#define PROTECTIONS_MASK (0xf << 0)
-#define PROTECTIONS_SHIFT 0
- /* bit 0: range
- * bit 1: pde0
- * bit 2: valid
- * bit 3: read
- * bit 4: write
- */
-#define MEMORY_CLIENT_ID_MASK (0xff << 12)
-#define MEMORY_CLIENT_ID_SHIFT 12
-#define MEMORY_CLIENT_RW_MASK (1 << 24)
-#define MEMORY_CLIENT_RW_SHIFT 24
-#define FAULT_VMID_MASK (0xf << 25)
-#define FAULT_VMID_SHIFT 25
-
#define VM_INVALIDATE_REQUEST 0x51E
#define VM_INVALIDATE_RESPONSE 0x51F
-#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR 0x546
-#define VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR 0x547
-
-#define VM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x54F
-#define VM_CONTEXT1_PAGE_TABLE_BASE_ADDR 0x550
-#define VM_CONTEXT2_PAGE_TABLE_BASE_ADDR 0x551
-#define VM_CONTEXT3_PAGE_TABLE_BASE_ADDR 0x552
-#define VM_CONTEXT4_PAGE_TABLE_BASE_ADDR 0x553
-#define VM_CONTEXT5_PAGE_TABLE_BASE_ADDR 0x554
-#define VM_CONTEXT6_PAGE_TABLE_BASE_ADDR 0x555
-#define VM_CONTEXT7_PAGE_TABLE_BASE_ADDR 0x556
-#define VM_CONTEXT0_PAGE_TABLE_START_ADDR 0x557
-#define VM_CONTEXT1_PAGE_TABLE_START_ADDR 0x558
-
-#define VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x55F
-#define VM_CONTEXT1_PAGE_TABLE_END_ADDR 0x560
-
#define VM_L2_CG 0x570
#define MC_CG_ENABLE (1 << 18)
#define MC_LS_ENABLE (1 << 19)
-#define MC_SHARED_CHMAP 0x801
-#define NOOFCHAN_SHIFT 12
-#define NOOFCHAN_MASK 0x0000f000
-#define MC_SHARED_CHREMAP 0x802
-
#define MC_VM_FB_LOCATION 0x809
#define MC_VM_AGP_TOP 0x80A
#define MC_VM_AGP_BOT 0x80B
@@ -495,21 +112,6 @@
#define MC_CITF_MISC_WR_CG 0x993
#define MC_CITF_MISC_VM_CG 0x994
-#define MC_ARB_RAMCFG 0x9D8
-#define NOOFBANK_SHIFT 0
-#define NOOFBANK_MASK 0x00000003
-#define NOOFRANK_SHIFT 2
-#define NOOFRANK_MASK 0x00000004
-#define NOOFROWS_SHIFT 3
-#define NOOFROWS_MASK 0x00000038
-#define NOOFCOLS_SHIFT 6
-#define NOOFCOLS_MASK 0x000000C0
-#define CHANSIZE_SHIFT 8
-#define CHANSIZE_MASK 0x00000100
-#define CHANSIZE_OVERRIDE (1 << 11)
-#define NOOFGROUPS_SHIFT 12
-#define NOOFGROUPS_MASK 0x00001000
-
#define MC_ARB_DRAM_TIMING 0x9DD
#define MC_ARB_DRAM_TIMING2 0x9DE
@@ -635,20 +237,6 @@
#define CLKS(x) ((x) << 0)
#define CLKS_MASK (0xfff << 0)
-#define HDP_HOST_PATH_CNTL 0xB00
-#define CLOCK_GATING_DIS (1 << 23)
-#define HDP_NONSURFACE_BASE 0xB01
-#define HDP_NONSURFACE_INFO 0xB02
-#define HDP_NONSURFACE_SIZE 0xB03
-
-#define HDP_DEBUG0 0xBCC
-
-#define HDP_ADDR_CONFIG 0xBD2
-#define HDP_MISC_CNTL 0xBD3
-#define HDP_FLUSH_INVALIDATE_CACHE (1 << 0)
-#define HDP_MEM_POWER_LS 0xBD4
-#define HDP_LS_ENABLE (1 << 0)
-
#define ATC_MISC_CG 0xCD4
#define IH_RB_CNTL 0xF80
@@ -678,8 +266,6 @@
# define MC_WR_CLEAN_CNT(x) ((x) << 20)
# define MC_VMID(x) ((x) << 25)
-#define CONFIG_MEMSIZE 0x150A
-
#define INTERRUPT_CNTL 0x151A
# define IH_DUMMY_RD_OVERRIDE (1 << 0)
# define IH_DUMMY_RD_EN (1 << 1)
@@ -687,734 +273,28 @@
# define GEN_IH_INT_EN (1 << 8)
#define INTERRUPT_CNTL2 0x151B
-#define HDP_MEM_COHERENCY_FLUSH_CNTL 0x1520
-
-#define BIF_FB_EN 0x1524
-#define FB_READ_EN (1 << 0)
-#define FB_WRITE_EN (1 << 1)
-
-#define HDP_REG_COHERENCY_FLUSH_CNTL 0x1528
-
-/* DCE6 ELD audio interface */
-#define AZ_F0_CODEC_ENDPOINT_INDEX 0x1780
-# define AZ_ENDPOINT_REG_INDEX(x) (((x) & 0xff) << 0)
-# define AZ_ENDPOINT_REG_WRITE_EN (1 << 8)
-#define AZ_F0_CODEC_ENDPOINT_DATA 0x1781
-
-#define AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER 0x25
-#define SPEAKER_ALLOCATION(x) (((x) & 0x7f) << 0)
-#define SPEAKER_ALLOCATION_MASK (0x7f << 0)
-#define SPEAKER_ALLOCATION_SHIFT 0
-#define HDMI_CONNECTION (1 << 16)
-#define DP_CONNECTION (1 << 17)
-
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0 0x28 /* LPCM */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1 0x29 /* AC3 */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2 0x2A /* MPEG1 */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3 0x2B /* MP3 */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4 0x2C /* MPEG2 */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5 0x2D /* AAC */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6 0x2E /* DTS */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7 0x2F /* ATRAC */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR8 0x30 /* one bit audio - leave at 0 (default) */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9 0x31 /* Dolby Digital */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10 0x32 /* DTS-HD */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11 0x33 /* MAT-MLP */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR12 0x34 /* DTS */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13 0x35 /* WMA Pro */
-# define MAX_CHANNELS(x) (((x) & 0x7) << 0)
-/* max channels minus one. 7 = 8 channels */
-# define SUPPORTED_FREQUENCIES(x) (((x) & 0xff) << 8)
-# define DESCRIPTOR_BYTE_2(x) (((x) & 0xff) << 16)
-# define SUPPORTED_FREQUENCIES_STEREO(x) (((x) & 0xff) << 24) /* LPCM only */
-/* SUPPORTED_FREQUENCIES, SUPPORTED_FREQUENCIES_STEREO
- * bit0 = 32 kHz
- * bit1 = 44.1 kHz
- * bit2 = 48 kHz
- * bit3 = 88.2 kHz
- * bit4 = 96 kHz
- * bit5 = 176.4 kHz
- * bit6 = 192 kHz
- */
-
-#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC 0x37
-# define VIDEO_LIPSYNC(x) (((x) & 0xff) << 0)
-# define AUDIO_LIPSYNC(x) (((x) & 0xff) << 8)
-/* VIDEO_LIPSYNC, AUDIO_LIPSYNC
- * 0 = invalid
- * x = legal delay value
- * 255 = sync not supported
- */
-#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_HBR 0x38
-# define HBR_CAPABLE (1 << 0) /* enabled by default */
-
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO0 0x3a
-# define MANUFACTURER_ID(x) (((x) & 0xffff) << 0)
-# define PRODUCT_ID(x) (((x) & 0xffff) << 16)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO1 0x3b
-# define SINK_DESCRIPTION_LEN(x) (((x) & 0xff) << 0)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO2 0x3c
-# define PORT_ID0(x) (((x) & 0xffffffff) << 0)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO3 0x3d
-# define PORT_ID1(x) (((x) & 0xffffffff) << 0)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO4 0x3e
-# define DESCRIPTION0(x) (((x) & 0xff) << 0)
-# define DESCRIPTION1(x) (((x) & 0xff) << 8)
-# define DESCRIPTION2(x) (((x) & 0xff) << 16)
-# define DESCRIPTION3(x) (((x) & 0xff) << 24)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO5 0x3f
-# define DESCRIPTION4(x) (((x) & 0xff) << 0)
-# define DESCRIPTION5(x) (((x) & 0xff) << 8)
-# define DESCRIPTION6(x) (((x) & 0xff) << 16)
-# define DESCRIPTION7(x) (((x) & 0xff) << 24)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO6 0x40
-# define DESCRIPTION8(x) (((x) & 0xff) << 0)
-# define DESCRIPTION9(x) (((x) & 0xff) << 8)
-# define DESCRIPTION10(x) (((x) & 0xff) << 16)
-# define DESCRIPTION11(x) (((x) & 0xff) << 24)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO7 0x41
-# define DESCRIPTION12(x) (((x) & 0xff) << 0)
-# define DESCRIPTION13(x) (((x) & 0xff) << 8)
-# define DESCRIPTION14(x) (((x) & 0xff) << 16)
-# define DESCRIPTION15(x) (((x) & 0xff) << 24)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO8 0x42
-# define DESCRIPTION16(x) (((x) & 0xff) << 0)
-# define DESCRIPTION17(x) (((x) & 0xff) << 8)
-
-#define AZ_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL 0x54
-# define AUDIO_ENABLED (1 << 31)
-
-#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT 0x56
-#define PORT_CONNECTIVITY_MASK (3 << 30)
-#define PORT_CONNECTIVITY_SHIFT 30
-
-#define DC_LB_MEMORY_SPLIT 0x1AC3
-#define DC_LB_MEMORY_CONFIG(x) ((x) << 20)
-
-#define PRIORITY_A_CNT 0x1AC6
-#define PRIORITY_MARK_MASK 0x7fff
-#define PRIORITY_OFF (1 << 16)
-#define PRIORITY_ALWAYS_ON (1 << 20)
-#define PRIORITY_B_CNT 0x1AC7
-
-#define DPG_PIPE_ARBITRATION_CONTROL3 0x1B32
-# define LATENCY_WATERMARK_MASK(x) ((x) << 16)
-#define DPG_PIPE_LATENCY_CONTROL 0x1B33
-# define LATENCY_LOW_WATERMARK(x) ((x) << 0)
-# define LATENCY_HIGH_WATERMARK(x) ((x) << 16)
-
-/* 0x6bb8, 0x77b8, 0x103b8, 0x10fb8, 0x11bb8, 0x127b8 */
-#define VLINE_STATUS 0x1AEE
-# define VLINE_OCCURRED (1 << 0)
-# define VLINE_ACK (1 << 4)
-# define VLINE_STAT (1 << 12)
-# define VLINE_INTERRUPT (1 << 16)
-# define VLINE_INTERRUPT_TYPE (1 << 17)
-/* 0x6bbc, 0x77bc, 0x103bc, 0x10fbc, 0x11bbc, 0x127bc */
-#define VBLANK_STATUS 0x1AEF
-# define VBLANK_OCCURRED (1 << 0)
-# define VBLANK_ACK (1 << 4)
-# define VBLANK_STAT (1 << 12)
-# define VBLANK_INTERRUPT (1 << 16)
-# define VBLANK_INTERRUPT_TYPE (1 << 17)
-
-/* 0x6b40, 0x7740, 0x10340, 0x10f40, 0x11b40, 0x12740 */
-#define INT_MASK 0x1AD0
-# define VBLANK_INT_MASK (1 << 0)
-# define VLINE_INT_MASK (1 << 4)
-
-#define DISP_INTERRUPT_STATUS 0x183D
-# define LB_D1_VLINE_INTERRUPT (1 << 2)
-# define LB_D1_VBLANK_INTERRUPT (1 << 3)
-# define DC_HPD1_INTERRUPT (1 << 17)
-# define DC_HPD1_RX_INTERRUPT (1 << 18)
-# define DACA_AUTODETECT_INTERRUPT (1 << 22)
-# define DACB_AUTODETECT_INTERRUPT (1 << 23)
-# define DC_I2C_SW_DONE_INTERRUPT (1 << 24)
-# define DC_I2C_HW_DONE_INTERRUPT (1 << 25)
-#define DISP_INTERRUPT_STATUS_CONTINUE 0x183E
-# define LB_D2_VLINE_INTERRUPT (1 << 2)
-# define LB_D2_VBLANK_INTERRUPT (1 << 3)
-# define DC_HPD2_INTERRUPT (1 << 17)
-# define DC_HPD2_RX_INTERRUPT (1 << 18)
-# define DISP_TIMER_INTERRUPT (1 << 24)
-#define DISP_INTERRUPT_STATUS_CONTINUE2 0x183F
-# define LB_D3_VLINE_INTERRUPT (1 << 2)
-# define LB_D3_VBLANK_INTERRUPT (1 << 3)
-# define DC_HPD3_INTERRUPT (1 << 17)
-# define DC_HPD3_RX_INTERRUPT (1 << 18)
-#define DISP_INTERRUPT_STATUS_CONTINUE3 0x1840
-# define LB_D4_VLINE_INTERRUPT (1 << 2)
-# define LB_D4_VBLANK_INTERRUPT (1 << 3)
-# define DC_HPD4_INTERRUPT (1 << 17)
-# define DC_HPD4_RX_INTERRUPT (1 << 18)
-#define DISP_INTERRUPT_STATUS_CONTINUE4 0x1853
-# define LB_D5_VLINE_INTERRUPT (1 << 2)
-# define LB_D5_VBLANK_INTERRUPT (1 << 3)
-# define DC_HPD5_INTERRUPT (1 << 17)
-# define DC_HPD5_RX_INTERRUPT (1 << 18)
-#define DISP_INTERRUPT_STATUS_CONTINUE5 0x1854
-# define LB_D6_VLINE_INTERRUPT (1 << 2)
-# define LB_D6_VBLANK_INTERRUPT (1 << 3)
-# define DC_HPD6_INTERRUPT (1 << 17)
-# define DC_HPD6_RX_INTERRUPT (1 << 18)
-
-/* 0x6858, 0x7458, 0x10058, 0x10c58, 0x11858, 0x12458 */
-#define GRPH_INT_STATUS 0x1A16
-# define GRPH_PFLIP_INT_OCCURRED (1 << 0)
-# define GRPH_PFLIP_INT_CLEAR (1 << 8)
-/* 0x685c, 0x745c, 0x1005c, 0x10c5c, 0x1185c, 0x1245c */
-#define GRPH_INT_CONTROL 0x1A17
-# define GRPH_PFLIP_INT_MASK (1 << 0)
-# define GRPH_PFLIP_INT_TYPE (1 << 8)
-
-#define DAC_AUTODETECT_INT_CONTROL 0x19F2
-
-#define DC_HPD1_INT_STATUS 0x1807
-#define DC_HPD2_INT_STATUS 0x180A
-#define DC_HPD3_INT_STATUS 0x180D
-#define DC_HPD4_INT_STATUS 0x1810
-#define DC_HPD5_INT_STATUS 0x1813
-#define DC_HPD6_INT_STATUS 0x1816
-# define DC_HPDx_INT_STATUS (1 << 0)
-# define DC_HPDx_SENSE (1 << 1)
-# define DC_HPDx_RX_INT_STATUS (1 << 8)
-
-#define DC_HPD1_INT_CONTROL 0x1808
-#define DC_HPD2_INT_CONTROL 0x180B
-#define DC_HPD3_INT_CONTROL 0x180E
-#define DC_HPD4_INT_CONTROL 0x1811
-#define DC_HPD5_INT_CONTROL 0x1814
-#define DC_HPD6_INT_CONTROL 0x1817
-# define DC_HPDx_INT_ACK (1 << 0)
-# define DC_HPDx_INT_POLARITY (1 << 8)
-# define DC_HPDx_INT_EN (1 << 16)
-# define DC_HPDx_RX_INT_ACK (1 << 20)
-# define DC_HPDx_RX_INT_EN (1 << 24)
-
-#define DC_HPD1_CONTROL 0x1809
-#define DC_HPD2_CONTROL 0x180C
-#define DC_HPD3_CONTROL 0x180F
-#define DC_HPD4_CONTROL 0x1812
-#define DC_HPD5_CONTROL 0x1815
-#define DC_HPD6_CONTROL 0x1818
-# define DC_HPDx_CONNECTION_TIMER(x) ((x) << 0)
-# define DC_HPDx_RX_INT_TIMER(x) ((x) << 16)
-# define DC_HPDx_EN (1 << 28)
-
-#define DPG_PIPE_STUTTER_CONTROL 0x1B35
-# define STUTTER_ENABLE (1 << 0)
-
-/* 0x6e98, 0x7a98, 0x10698, 0x11298, 0x11e98, 0x12a98 */
-#define CRTC_STATUS_FRAME_COUNT 0x1BA6
-
-/* Audio clocks */
-#define DCCG_AUDIO_DTO_SOURCE 0x05ac
-# define DCCG_AUDIO_DTO0_SOURCE_SEL(x) ((x) << 0) /* crtc0 - crtc5 */
-# define DCCG_AUDIO_DTO_SEL (1 << 4) /* 0=dto0 1=dto1 */
-
-#define DCCG_AUDIO_DTO0_PHASE 0x05b0
-#define DCCG_AUDIO_DTO0_MODULE 0x05b4
-#define DCCG_AUDIO_DTO1_PHASE 0x05c0
-#define DCCG_AUDIO_DTO1_MODULE 0x05c4
-
-#define AFMT_AUDIO_SRC_CONTROL 0x1c4f
-#define AFMT_AUDIO_SRC_SELECT(x) (((x) & 7) << 0)
-/* AFMT_AUDIO_SRC_SELECT
- * 0 = stream0
- * 1 = stream1
- * 2 = stream2
- * 3 = stream3
- * 4 = stream4
- * 5 = stream5
- */
-
-#define GRBM_CNTL 0x2000
-#define GRBM_READ_TIMEOUT(x) ((x) << 0)
-
-#define GRBM_STATUS2 0x2002
-#define RLC_RQ_PENDING (1 << 0)
-#define RLC_BUSY (1 << 8)
-#define TC_BUSY (1 << 9)
-
-#define GRBM_STATUS 0x2004
-#define CMDFIFO_AVAIL_MASK 0x0000000F
-#define RING2_RQ_PENDING (1 << 4)
-#define SRBM_RQ_PENDING (1 << 5)
-#define RING1_RQ_PENDING (1 << 6)
-#define CF_RQ_PENDING (1 << 7)
-#define PF_RQ_PENDING (1 << 8)
-#define GDS_DMA_RQ_PENDING (1 << 9)
-#define GRBM_EE_BUSY (1 << 10)
-#define DB_CLEAN (1 << 12)
-#define CB_CLEAN (1 << 13)
-#define TA_BUSY (1 << 14)
-#define GDS_BUSY (1 << 15)
-#define VGT_BUSY (1 << 17)
-#define IA_BUSY_NO_DMA (1 << 18)
-#define IA_BUSY (1 << 19)
-#define SX_BUSY (1 << 20)
-#define SPI_BUSY (1 << 22)
-#define BCI_BUSY (1 << 23)
-#define SC_BUSY (1 << 24)
-#define PA_BUSY (1 << 25)
-#define DB_BUSY (1 << 26)
-#define CP_COHERENCY_BUSY (1 << 28)
-#define CP_BUSY (1 << 29)
-#define CB_BUSY (1 << 30)
-#define GUI_ACTIVE (1 << 31)
-#define GRBM_STATUS_SE0 0x2005
-#define GRBM_STATUS_SE1 0x2006
-#define SE_DB_CLEAN (1 << 1)
-#define SE_CB_CLEAN (1 << 2)
-#define SE_BCI_BUSY (1 << 22)
-#define SE_VGT_BUSY (1 << 23)
-#define SE_PA_BUSY (1 << 24)
-#define SE_TA_BUSY (1 << 25)
-#define SE_SX_BUSY (1 << 26)
-#define SE_SPI_BUSY (1 << 27)
-#define SE_SC_BUSY (1 << 29)
-#define SE_DB_BUSY (1 << 30)
-#define SE_CB_BUSY (1 << 31)
-
-#define GRBM_SOFT_RESET 0x2008
-#define SOFT_RESET_CP (1 << 0)
-#define SOFT_RESET_CB (1 << 1)
-#define SOFT_RESET_RLC (1 << 2)
-#define SOFT_RESET_DB (1 << 3)
-#define SOFT_RESET_GDS (1 << 4)
-#define SOFT_RESET_PA (1 << 5)
-#define SOFT_RESET_SC (1 << 6)
-#define SOFT_RESET_BCI (1 << 7)
-#define SOFT_RESET_SPI (1 << 8)
-#define SOFT_RESET_SX (1 << 10)
-#define SOFT_RESET_TC (1 << 11)
-#define SOFT_RESET_TA (1 << 12)
-#define SOFT_RESET_VGT (1 << 14)
-#define SOFT_RESET_IA (1 << 15)
-
-#define GRBM_GFX_INDEX 0x200B
-#define INSTANCE_INDEX(x) ((x) << 0)
-#define SH_INDEX(x) ((x) << 8)
-#define SE_INDEX(x) ((x) << 16)
-#define SH_BROADCAST_WRITES (1 << 29)
-#define INSTANCE_BROADCAST_WRITES (1 << 30)
-#define SE_BROADCAST_WRITES (1 << 31)
-
-#define GRBM_INT_CNTL 0x2018
-# define RDERR_INT_ENABLE (1 << 0)
-# define GUI_IDLE_INT_ENABLE (1 << 19)
-
-#define CP_STRMOUT_CNTL 0x213F
-#define SCRATCH_REG0 0x2140
-#define SCRATCH_REG1 0x2141
-#define SCRATCH_REG2 0x2142
-#define SCRATCH_REG3 0x2143
-#define SCRATCH_REG4 0x2144
-#define SCRATCH_REG5 0x2145
-#define SCRATCH_REG6 0x2146
-#define SCRATCH_REG7 0x2147
-
-#define SCRATCH_UMSK 0x2150
-#define SCRATCH_ADDR 0x2151
-
-#define CP_SEM_WAIT_TIMER 0x216F
-
-#define CP_SEM_INCOMPLETE_TIMER_CNTL 0x2172
-
-#define CP_ME_CNTL 0x21B6
-#define CP_CE_HALT (1 << 24)
-#define CP_PFP_HALT (1 << 26)
-#define CP_ME_HALT (1 << 28)
-
-#define CP_COHER_CNTL2 0x217A
-
-#define CP_RB2_RPTR 0x21BE
-#define CP_RB1_RPTR 0x21BF
-#define CP_RB0_RPTR 0x21C0
-#define CP_RB_WPTR_DELAY 0x21C1
-
-#define CP_QUEUE_THRESHOLDS 0x21D8
-#define ROQ_IB1_START(x) ((x) << 0)
-#define ROQ_IB2_START(x) ((x) << 8)
-#define CP_MEQ_THRESHOLDS 0x21D9
-#define MEQ1_START(x) ((x) << 0)
-#define MEQ2_START(x) ((x) << 8)
-
-#define CP_PERFMON_CNTL 0x21FF
-
#define VGT_VTX_VECT_EJECT_REG 0x222C
-
-#define VGT_CACHE_INVALIDATION 0x2231
-#define CACHE_INVALIDATION(x) ((x) << 0)
-#define VC_ONLY 0
-#define TC_ONLY 1
-#define VC_AND_TC 2
-#define AUTO_INVLD_EN(x) ((x) << 6)
-#define NO_AUTO 0
-#define ES_AUTO 1
-#define GS_AUTO 2
-#define ES_AND_GS_AUTO 3
#define VGT_ESGS_RING_SIZE 0x2232
#define VGT_GSVS_RING_SIZE 0x2233
-
#define VGT_GS_VERTEX_REUSE 0x2235
-
#define VGT_PRIMITIVE_TYPE 0x2256
#define VGT_INDEX_TYPE 0x2257
-
#define VGT_NUM_INDICES 0x225C
#define VGT_NUM_INSTANCES 0x225D
-
#define VGT_TF_RING_SIZE 0x2262
-
#define VGT_HS_OFFCHIP_PARAM 0x226C
-
#define VGT_TF_MEMORY_BASE 0x226E
-#define CC_GC_SHADER_ARRAY_CONFIG 0x226F
-#define INACTIVE_CUS_MASK 0xFFFF0000
-#define INACTIVE_CUS_SHIFT 16
-#define GC_USER_SHADER_ARRAY_CONFIG 0x2270
-
-#define PA_CL_ENHANCE 0x2285
-#define CLIP_VTX_REORDER_ENA (1 << 0)
-#define NUM_CLIP_SEQ(x) ((x) << 1)
-
-#define PA_SU_LINE_STIPPLE_VALUE 0x2298
-
-#define PA_SC_LINE_STIPPLE_STATE 0x22C4
-
-#define PA_SC_FORCE_EOV_MAX_CNTS 0x22C9
-#define FORCE_EOV_MAX_CLK_CNT(x) ((x) << 0)
-#define FORCE_EOV_MAX_REZ_CNT(x) ((x) << 16)
-
-#define PA_SC_FIFO_SIZE 0x22F3
-#define SC_FRONTEND_PRIM_FIFO_SIZE(x) ((x) << 0)
-#define SC_BACKEND_PRIM_FIFO_SIZE(x) ((x) << 6)
-#define SC_HIZ_TILE_FIFO_SIZE(x) ((x) << 15)
-#define SC_EARLYZ_TILE_FIFO_SIZE(x) ((x) << 23)
-
#define PA_SC_ENHANCE 0x22FC
-#define SQ_CONFIG 0x2300
-
-#define SQC_CACHES 0x2302
-
-#define SQ_POWER_THROTTLE 0x2396
-#define MIN_POWER(x) ((x) << 0)
-#define MIN_POWER_MASK (0x3fff << 0)
-#define MIN_POWER_SHIFT 0
-#define MAX_POWER(x) ((x) << 16)
-#define MAX_POWER_MASK (0x3fff << 16)
-#define MAX_POWER_SHIFT 0
-#define SQ_POWER_THROTTLE2 0x2397
-#define MAX_POWER_DELTA(x) ((x) << 0)
-#define MAX_POWER_DELTA_MASK (0x3fff << 0)
-#define MAX_POWER_DELTA_SHIFT 0
-#define STI_SIZE(x) ((x) << 16)
-#define STI_SIZE_MASK (0x3ff << 16)
-#define STI_SIZE_SHIFT 16
-#define LTI_RATIO(x) ((x) << 27)
-#define LTI_RATIO_MASK (0xf << 27)
-#define LTI_RATIO_SHIFT 27
-
-#define SX_DEBUG_1 0x2418
-
-#define SPI_STATIC_THREAD_MGMT_1 0x2438
-#define SPI_STATIC_THREAD_MGMT_2 0x2439
-#define SPI_STATIC_THREAD_MGMT_3 0x243A
-#define SPI_PS_MAX_WAVE_ID 0x243B
-
-#define SPI_CONFIG_CNTL 0x2440
-
-#define SPI_CONFIG_CNTL_1 0x244F
-#define VTX_DONE_DELAY(x) ((x) << 0)
-#define INTERP_ONE_PRIM_PER_ROW (1 << 4)
-
-#define CGTS_TCC_DISABLE 0x2452
-#define CGTS_USER_TCC_DISABLE 0x2453
-#define TCC_DISABLE_MASK 0xFFFF0000
-#define TCC_DISABLE_SHIFT 16
-#define CGTS_SM_CTRL_REG 0x2454
-#define OVERRIDE (1 << 21)
-#define LS_OVERRIDE (1 << 22)
-
-#define SPI_LB_CU_MASK 0x24D5
-
#define TA_CNTL_AUX 0x2542
-#define CC_RB_BACKEND_DISABLE 0x263D
-#define BACKEND_DISABLE(x) ((x) << 16)
-#define GB_ADDR_CONFIG 0x263E
-#define NUM_PIPES(x) ((x) << 0)
-#define NUM_PIPES_MASK 0x00000007
-#define NUM_PIPES_SHIFT 0
-#define PIPE_INTERLEAVE_SIZE(x) ((x) << 4)
-#define PIPE_INTERLEAVE_SIZE_MASK 0x00000070
-#define PIPE_INTERLEAVE_SIZE_SHIFT 4
-#define NUM_SHADER_ENGINES(x) ((x) << 12)
-#define NUM_SHADER_ENGINES_MASK 0x00003000
-#define NUM_SHADER_ENGINES_SHIFT 12
-#define SHADER_ENGINE_TILE_SIZE(x) ((x) << 16)
-#define SHADER_ENGINE_TILE_SIZE_MASK 0x00070000
-#define SHADER_ENGINE_TILE_SIZE_SHIFT 16
-#define NUM_GPUS(x) ((x) << 20)
-#define NUM_GPUS_MASK 0x00700000
-#define NUM_GPUS_SHIFT 20
-#define MULTI_GPU_TILE_SIZE(x) ((x) << 24)
-#define MULTI_GPU_TILE_SIZE_MASK 0x03000000
-#define MULTI_GPU_TILE_SIZE_SHIFT 24
-#define ROW_SIZE(x) ((x) << 28)
-#define ROW_SIZE_MASK 0x30000000
-#define ROW_SIZE_SHIFT 28
-
-#define GB_TILE_MODE0 0x2644
-# define MICRO_TILE_MODE(x) ((x) << 0)
-# define ADDR_SURF_DISPLAY_MICRO_TILING 0
-# define ADDR_SURF_THIN_MICRO_TILING 1
-# define ADDR_SURF_DEPTH_MICRO_TILING 2
-# define ARRAY_MODE(x) ((x) << 2)
-# define ARRAY_LINEAR_GENERAL 0
-# define ARRAY_LINEAR_ALIGNED 1
-# define ARRAY_1D_TILED_THIN1 2
-# define ARRAY_2D_TILED_THIN1 4
-# define PIPE_CONFIG(x) ((x) << 6)
-# define ADDR_SURF_P2 0
-# define ADDR_SURF_P4_8x16 4
-# define ADDR_SURF_P4_16x16 5
-# define ADDR_SURF_P4_16x32 6
-# define ADDR_SURF_P4_32x32 7
-# define ADDR_SURF_P8_16x16_8x16 8
-# define ADDR_SURF_P8_16x32_8x16 9
-# define ADDR_SURF_P8_32x32_8x16 10
-# define ADDR_SURF_P8_16x32_16x16 11
-# define ADDR_SURF_P8_32x32_16x16 12
-# define ADDR_SURF_P8_32x32_16x32 13
-# define ADDR_SURF_P8_32x64_32x32 14
-# define TILE_SPLIT(x) ((x) << 11)
-# define ADDR_SURF_TILE_SPLIT_64B 0
-# define ADDR_SURF_TILE_SPLIT_128B 1
-# define ADDR_SURF_TILE_SPLIT_256B 2
-# define ADDR_SURF_TILE_SPLIT_512B 3
-# define ADDR_SURF_TILE_SPLIT_1KB 4
-# define ADDR_SURF_TILE_SPLIT_2KB 5
-# define ADDR_SURF_TILE_SPLIT_4KB 6
-# define BANK_WIDTH(x) ((x) << 14)
-# define ADDR_SURF_BANK_WIDTH_1 0
-# define ADDR_SURF_BANK_WIDTH_2 1
-# define ADDR_SURF_BANK_WIDTH_4 2
-# define ADDR_SURF_BANK_WIDTH_8 3
-# define BANK_HEIGHT(x) ((x) << 16)
-# define ADDR_SURF_BANK_HEIGHT_1 0
-# define ADDR_SURF_BANK_HEIGHT_2 1
-# define ADDR_SURF_BANK_HEIGHT_4 2
-# define ADDR_SURF_BANK_HEIGHT_8 3
-# define MACRO_TILE_ASPECT(x) ((x) << 18)
-# define ADDR_SURF_MACRO_ASPECT_1 0
-# define ADDR_SURF_MACRO_ASPECT_2 1
-# define ADDR_SURF_MACRO_ASPECT_4 2
-# define ADDR_SURF_MACRO_ASPECT_8 3
-# define NUM_BANKS(x) ((x) << 20)
-# define ADDR_SURF_2_BANK 0
-# define ADDR_SURF_4_BANK 1
-# define ADDR_SURF_8_BANK 2
-# define ADDR_SURF_16_BANK 3
-#define GB_TILE_MODE1 0x2645
-#define GB_TILE_MODE2 0x2646
-#define GB_TILE_MODE3 0x2647
-#define GB_TILE_MODE4 0x2648
-#define GB_TILE_MODE5 0x2649
-#define GB_TILE_MODE6 0x264a
-#define GB_TILE_MODE7 0x264b
-#define GB_TILE_MODE8 0x264c
-#define GB_TILE_MODE9 0x264d
-#define GB_TILE_MODE10 0x264e
-#define GB_TILE_MODE11 0x264f
-#define GB_TILE_MODE12 0x2650
-#define GB_TILE_MODE13 0x2651
-#define GB_TILE_MODE14 0x2652
-#define GB_TILE_MODE15 0x2653
-#define GB_TILE_MODE16 0x2654
-#define GB_TILE_MODE17 0x2655
-#define GB_TILE_MODE18 0x2656
-#define GB_TILE_MODE19 0x2657
-#define GB_TILE_MODE20 0x2658
-#define GB_TILE_MODE21 0x2659
-#define GB_TILE_MODE22 0x265a
-#define GB_TILE_MODE23 0x265b
-#define GB_TILE_MODE24 0x265c
-#define GB_TILE_MODE25 0x265d
-#define GB_TILE_MODE26 0x265e
-#define GB_TILE_MODE27 0x265f
-#define GB_TILE_MODE28 0x2660
-#define GB_TILE_MODE29 0x2661
-#define GB_TILE_MODE30 0x2662
-#define GB_TILE_MODE31 0x2663
-
-#define CB_PERFCOUNTER0_SELECT0 0x2688
-#define CB_PERFCOUNTER0_SELECT1 0x2689
-#define CB_PERFCOUNTER1_SELECT0 0x268A
-#define CB_PERFCOUNTER1_SELECT1 0x268B
-#define CB_PERFCOUNTER2_SELECT0 0x268C
-#define CB_PERFCOUNTER2_SELECT1 0x268D
-#define CB_PERFCOUNTER3_SELECT0 0x268E
-#define CB_PERFCOUNTER3_SELECT1 0x268F
-
-#define CB_CGTT_SCLK_CTRL 0x2698
-
-#define GC_USER_RB_BACKEND_DISABLE 0x26DF
-#define BACKEND_DISABLE_MASK 0x00FF0000
-#define BACKEND_DISABLE_SHIFT 16
-
-#define TCP_CHAN_STEER_LO 0x2B03
-#define TCP_CHAN_STEER_HI 0x2B94
-
-#define CP_RB0_BASE 0x3040
-#define CP_RB0_CNTL 0x3041
-#define RB_BUFSZ(x) ((x) << 0)
-#define RB_BLKSZ(x) ((x) << 8)
-#define BUF_SWAP_32BIT (2 << 16)
-#define RB_NO_UPDATE (1 << 27)
-#define RB_RPTR_WR_ENA (1 << 31)
-
-#define CP_RB0_RPTR_ADDR 0x3043
-#define CP_RB0_RPTR_ADDR_HI 0x3044
-#define CP_RB0_WPTR 0x3045
-
-#define CP_PFP_UCODE_ADDR 0x3054
-#define CP_PFP_UCODE_DATA 0x3055
-#define CP_ME_RAM_RADDR 0x3056
-#define CP_ME_RAM_WADDR 0x3057
-#define CP_ME_RAM_DATA 0x3058
-
-#define CP_CE_UCODE_ADDR 0x305A
-#define CP_CE_UCODE_DATA 0x305B
-
-#define CP_RB1_BASE 0x3060
-#define CP_RB1_CNTL 0x3061
-#define CP_RB1_RPTR_ADDR 0x3062
-#define CP_RB1_RPTR_ADDR_HI 0x3063
-#define CP_RB1_WPTR 0x3064
-#define CP_RB2_BASE 0x3065
-#define CP_RB2_CNTL 0x3066
-#define CP_RB2_RPTR_ADDR 0x3067
-#define CP_RB2_RPTR_ADDR_HI 0x3068
-#define CP_RB2_WPTR 0x3069
-#define CP_INT_CNTL_RING0 0x306A
-#define CP_INT_CNTL_RING1 0x306B
-#define CP_INT_CNTL_RING2 0x306C
-# define CNTX_BUSY_INT_ENABLE (1 << 19)
-# define CNTX_EMPTY_INT_ENABLE (1 << 20)
-# define WAIT_MEM_SEM_INT_ENABLE (1 << 21)
-# define TIME_STAMP_INT_ENABLE (1 << 26)
-# define CP_RINGID2_INT_ENABLE (1 << 29)
-# define CP_RINGID1_INT_ENABLE (1 << 30)
-# define CP_RINGID0_INT_ENABLE (1 << 31)
-#define CP_INT_STATUS_RING0 0x306D
-#define CP_INT_STATUS_RING1 0x306E
-#define CP_INT_STATUS_RING2 0x306F
-# define WAIT_MEM_SEM_INT_STAT (1 << 21)
-# define TIME_STAMP_INT_STAT (1 << 26)
-# define CP_RINGID2_INT_STAT (1 << 29)
-# define CP_RINGID1_INT_STAT (1 << 30)
-# define CP_RINGID0_INT_STAT (1 << 31)
-
-#define CP_MEM_SLP_CNTL 0x3079
-# define CP_MEM_LS_EN (1 << 0)
-
-#define CP_DEBUG 0x307F
-
-#define RLC_CNTL 0x30C0
-# define RLC_ENABLE (1 << 0)
-#define RLC_RL_BASE 0x30C1
-#define RLC_RL_SIZE 0x30C2
-#define RLC_LB_CNTL 0x30C3
-# define LOAD_BALANCE_ENABLE (1 << 0)
-#define RLC_SAVE_AND_RESTORE_BASE 0x30C4
-#define RLC_LB_CNTR_MAX 0x30C5
-#define RLC_LB_CNTR_INIT 0x30C6
-
-#define RLC_CLEAR_STATE_RESTORE_BASE 0x30C8
-
-#define RLC_UCODE_ADDR 0x30CB
-#define RLC_UCODE_DATA 0x30CC
-
-#define RLC_GPU_CLOCK_COUNT_LSB 0x30CE
-#define RLC_GPU_CLOCK_COUNT_MSB 0x30CF
-#define RLC_CAPTURE_GPU_CLOCK_COUNT 0x30D0
-#define RLC_MC_CNTL 0x30D1
-#define RLC_UCODE_CNTL 0x30D2
-#define RLC_STAT 0x30D3
-# define RLC_BUSY_STATUS (1 << 0)
-# define GFX_POWER_STATUS (1 << 1)
-# define GFX_CLOCK_STATUS (1 << 2)
-# define GFX_LS_STATUS (1 << 3)
-
-#define RLC_PG_CNTL 0x30D7
-# define GFX_PG_ENABLE (1 << 0)
-# define GFX_PG_SRC (1 << 1)
-
-#define RLC_CGTT_MGCG_OVERRIDE 0x3100
-#define RLC_CGCG_CGLS_CTRL 0x3101
-# define CGCG_EN (1 << 0)
-# define CGLS_EN (1 << 1)
-
-#define RLC_TTOP_D 0x3105
-# define RLC_PUD(x) ((x) << 0)
-# define RLC_PUD_MASK (0xff << 0)
-# define RLC_PDD(x) ((x) << 8)
-# define RLC_PDD_MASK (0xff << 8)
-# define RLC_TTPD(x) ((x) << 16)
-# define RLC_TTPD_MASK (0xff << 16)
-# define RLC_MSD(x) ((x) << 24)
-# define RLC_MSD_MASK (0xff << 24)
-
-#define RLC_LB_INIT_CU_MASK 0x3107
-
-#define RLC_PG_AO_CU_MASK 0x310B
-#define RLC_MAX_PG_CU 0x310C
-# define MAX_PU_CU(x) ((x) << 0)
-# define MAX_PU_CU_MASK (0xff << 0)
-#define RLC_AUTO_PG_CTRL 0x310C
-# define AUTO_PG_EN (1 << 0)
-# define GRBM_REG_SGIT(x) ((x) << 3)
-# define GRBM_REG_SGIT_MASK (0xffff << 3)
-# define PG_AFTER_GRBM_REG_ST(x) ((x) << 19)
-# define PG_AFTER_GRBM_REG_ST_MASK (0x1fff << 19)
-
-#define RLC_SERDES_WR_MASTER_MASK_0 0x3115
-#define RLC_SERDES_WR_MASTER_MASK_1 0x3116
-#define RLC_SERDES_WR_CTRL 0x3117
-
-#define RLC_SERDES_MASTER_BUSY_0 0x3119
-#define RLC_SERDES_MASTER_BUSY_1 0x311A
-
-#define RLC_GCPM_GENERAL_3 0x311E
-
-#define DB_RENDER_CONTROL 0xA000
-
-#define DB_DEPTH_INFO 0xA00F
-
-#define PA_SC_RASTER_CONFIG 0xA0D4
-# define RB_MAP_PKR0(x) ((x) << 0)
-# define RB_MAP_PKR0_MASK (0x3 << 0)
-# define RB_MAP_PKR1(x) ((x) << 2)
-# define RB_MAP_PKR1_MASK (0x3 << 2)
-# define RASTER_CONFIG_RB_MAP_0 0
-# define RASTER_CONFIG_RB_MAP_1 1
-# define RASTER_CONFIG_RB_MAP_2 2
-# define RASTER_CONFIG_RB_MAP_3 3
+// #define PA_SC_RASTER_CONFIG 0xA0D4
# define RB_XSEL2(x) ((x) << 4)
# define RB_XSEL2_MASK (0x3 << 4)
# define RB_XSEL (1 << 6)
# define RB_YSEL (1 << 7)
# define PKR_MAP(x) ((x) << 8)
-# define PKR_MAP_MASK (0x3 << 8)
-# define RASTER_CONFIG_PKR_MAP_0 0
-# define RASTER_CONFIG_PKR_MAP_1 1
-# define RASTER_CONFIG_PKR_MAP_2 2
-# define RASTER_CONFIG_PKR_MAP_3 3
# define PKR_XSEL(x) ((x) << 10)
# define PKR_XSEL_MASK (0x3 << 10)
# define PKR_YSEL(x) ((x) << 12)
@@ -1426,221 +306,19 @@
# define SC_YSEL(x) ((x) << 20)
# define SC_YSEL_MASK (0x3 << 20)
# define SE_MAP(x) ((x) << 24)
-# define SE_MAP_MASK (0x3 << 24)
-# define RASTER_CONFIG_SE_MAP_0 0
-# define RASTER_CONFIG_SE_MAP_1 1
-# define RASTER_CONFIG_SE_MAP_2 2
-# define RASTER_CONFIG_SE_MAP_3 3
# define SE_XSEL(x) ((x) << 26)
# define SE_XSEL_MASK (0x3 << 26)
# define SE_YSEL(x) ((x) << 28)
# define SE_YSEL_MASK (0x3 << 28)
-
-#define VGT_EVENT_INITIATOR 0xA2A4
-# define SAMPLE_STREAMOUTSTATS1 (1 << 0)
-# define SAMPLE_STREAMOUTSTATS2 (2 << 0)
-# define SAMPLE_STREAMOUTSTATS3 (3 << 0)
-# define CACHE_FLUSH_TS (4 << 0)
-# define CACHE_FLUSH (6 << 0)
-# define CS_PARTIAL_FLUSH (7 << 0)
-# define VGT_STREAMOUT_RESET (10 << 0)
-# define END_OF_PIPE_INCR_DE (11 << 0)
-# define END_OF_PIPE_IB_END (12 << 0)
-# define RST_PIX_CNT (13 << 0)
-# define VS_PARTIAL_FLUSH (15 << 0)
-# define PS_PARTIAL_FLUSH (16 << 0)
-# define CACHE_FLUSH_AND_INV_TS_EVENT (20 << 0)
-# define ZPASS_DONE (21 << 0)
-# define CACHE_FLUSH_AND_INV_EVENT (22 << 0)
-# define PERFCOUNTER_START (23 << 0)
-# define PERFCOUNTER_STOP (24 << 0)
-# define PIPELINESTAT_START (25 << 0)
-# define PIPELINESTAT_STOP (26 << 0)
-# define PERFCOUNTER_SAMPLE (27 << 0)
-# define SAMPLE_PIPELINESTAT (30 << 0)
-# define SAMPLE_STREAMOUTSTATS (32 << 0)
-# define RESET_VTX_CNT (33 << 0)
-# define VGT_FLUSH (36 << 0)
-# define BOTTOM_OF_PIPE_TS (40 << 0)
-# define DB_CACHE_FLUSH_AND_INV (42 << 0)
-# define FLUSH_AND_INV_DB_DATA_TS (43 << 0)
-# define FLUSH_AND_INV_DB_META (44 << 0)
-# define FLUSH_AND_INV_CB_DATA_TS (45 << 0)
-# define FLUSH_AND_INV_CB_META (46 << 0)
-# define CS_DONE (47 << 0)
-# define PS_DONE (48 << 0)
-# define FLUSH_AND_INV_CB_PIXEL_DATA (49 << 0)
-# define THREAD_TRACE_START (51 << 0)
-# define THREAD_TRACE_STOP (52 << 0)
-# define THREAD_TRACE_FLUSH (54 << 0)
-# define THREAD_TRACE_FINISH (55 << 0)
-
-/* PIF PHY0 registers idx/data 0x8/0xc */
-#define PB0_PIF_CNTL 0x10
-# define LS2_EXIT_TIME(x) ((x) << 17)
-# define LS2_EXIT_TIME_MASK (0x7 << 17)
-# define LS2_EXIT_TIME_SHIFT 17
-#define PB0_PIF_PAIRING 0x11
-# define MULTI_PIF (1 << 25)
-#define PB0_PIF_PWRDOWN_0 0x12
-# define PLL_POWER_STATE_IN_TXS2_0(x) ((x) << 7)
-# define PLL_POWER_STATE_IN_TXS2_0_MASK (0x7 << 7)
-# define PLL_POWER_STATE_IN_TXS2_0_SHIFT 7
-# define PLL_POWER_STATE_IN_OFF_0(x) ((x) << 10)
-# define PLL_POWER_STATE_IN_OFF_0_MASK (0x7 << 10)
-# define PLL_POWER_STATE_IN_OFF_0_SHIFT 10
-# define PLL_RAMP_UP_TIME_0(x) ((x) << 24)
-# define PLL_RAMP_UP_TIME_0_MASK (0x7 << 24)
-# define PLL_RAMP_UP_TIME_0_SHIFT 24
-#define PB0_PIF_PWRDOWN_1 0x13
-# define PLL_POWER_STATE_IN_TXS2_1(x) ((x) << 7)
-# define PLL_POWER_STATE_IN_TXS2_1_MASK (0x7 << 7)
-# define PLL_POWER_STATE_IN_TXS2_1_SHIFT 7
-# define PLL_POWER_STATE_IN_OFF_1(x) ((x) << 10)
-# define PLL_POWER_STATE_IN_OFF_1_MASK (0x7 << 10)
-# define PLL_POWER_STATE_IN_OFF_1_SHIFT 10
-# define PLL_RAMP_UP_TIME_1(x) ((x) << 24)
-# define PLL_RAMP_UP_TIME_1_MASK (0x7 << 24)
-# define PLL_RAMP_UP_TIME_1_SHIFT 24
-
-#define PB0_PIF_PWRDOWN_2 0x17
-# define PLL_POWER_STATE_IN_TXS2_2(x) ((x) << 7)
-# define PLL_POWER_STATE_IN_TXS2_2_MASK (0x7 << 7)
-# define PLL_POWER_STATE_IN_TXS2_2_SHIFT 7
-# define PLL_POWER_STATE_IN_OFF_2(x) ((x) << 10)
-# define PLL_POWER_STATE_IN_OFF_2_MASK (0x7 << 10)
-# define PLL_POWER_STATE_IN_OFF_2_SHIFT 10
-# define PLL_RAMP_UP_TIME_2(x) ((x) << 24)
-# define PLL_RAMP_UP_TIME_2_MASK (0x7 << 24)
-# define PLL_RAMP_UP_TIME_2_SHIFT 24
-#define PB0_PIF_PWRDOWN_3 0x18
-# define PLL_POWER_STATE_IN_TXS2_3(x) ((x) << 7)
-# define PLL_POWER_STATE_IN_TXS2_3_MASK (0x7 << 7)
-# define PLL_POWER_STATE_IN_TXS2_3_SHIFT 7
-# define PLL_POWER_STATE_IN_OFF_3(x) ((x) << 10)
-# define PLL_POWER_STATE_IN_OFF_3_MASK (0x7 << 10)
-# define PLL_POWER_STATE_IN_OFF_3_SHIFT 10
-# define PLL_RAMP_UP_TIME_3(x) ((x) << 24)
-# define PLL_RAMP_UP_TIME_3_MASK (0x7 << 24)
-# define PLL_RAMP_UP_TIME_3_SHIFT 24
-/* PIF PHY1 registers idx/data 0x10/0x14 */
-#define PB1_PIF_CNTL 0x10
-#define PB1_PIF_PAIRING 0x11
-#define PB1_PIF_PWRDOWN_0 0x12
-#define PB1_PIF_PWRDOWN_1 0x13
-
-#define PB1_PIF_PWRDOWN_2 0x17
-#define PB1_PIF_PWRDOWN_3 0x18
-/* PCIE registers idx/data 0x30/0x34 */
-#define PCIE_CNTL2 0x1c /* PCIE */
-# define SLV_MEM_LS_EN (1 << 16)
-# define SLV_MEM_AGGRESSIVE_LS_EN (1 << 17)
-# define MST_MEM_LS_EN (1 << 18)
-# define REPLAY_MEM_LS_EN (1 << 19)
-#define PCIE_LC_STATUS1 0x28 /* PCIE */
-# define LC_REVERSE_RCVR (1 << 0)
-# define LC_REVERSE_XMIT (1 << 1)
-# define LC_OPERATING_LINK_WIDTH_MASK (0x7 << 2)
-# define LC_OPERATING_LINK_WIDTH_SHIFT 2
-# define LC_DETECTED_LINK_WIDTH_MASK (0x7 << 5)
-# define LC_DETECTED_LINK_WIDTH_SHIFT 5
-
-#define PCIE_P_CNTL 0x40 /* PCIE */
-# define P_IGNORE_EDB_ERR (1 << 6)
-
/* PCIE PORT registers idx/data 0x38/0x3c */
-#define PCIE_LC_CNTL 0xa0
-# define LC_L0S_INACTIVITY(x) ((x) << 8)
-# define LC_L0S_INACTIVITY_MASK (0xf << 8)
-# define LC_L0S_INACTIVITY_SHIFT 8
-# define LC_L1_INACTIVITY(x) ((x) << 12)
-# define LC_L1_INACTIVITY_MASK (0xf << 12)
-# define LC_L1_INACTIVITY_SHIFT 12
-# define LC_PMI_TO_L1_DIS (1 << 16)
-# define LC_ASPM_TO_L1_DIS (1 << 24)
-#define PCIE_LC_LINK_WIDTH_CNTL 0xa2 /* PCIE_P */
-# define LC_LINK_WIDTH_SHIFT 0
-# define LC_LINK_WIDTH_MASK 0x7
+// #define PCIE_LC_LINK_WIDTH_CNTL 0xa2 /* PCIE_P */
# define LC_LINK_WIDTH_X0 0
# define LC_LINK_WIDTH_X1 1
# define LC_LINK_WIDTH_X2 2
# define LC_LINK_WIDTH_X4 3
# define LC_LINK_WIDTH_X8 4
# define LC_LINK_WIDTH_X16 6
-# define LC_LINK_WIDTH_RD_SHIFT 4
-# define LC_LINK_WIDTH_RD_MASK 0x70
-# define LC_RECONFIG_ARC_MISSING_ESCAPE (1 << 7)
-# define LC_RECONFIG_NOW (1 << 8)
-# define LC_RENEGOTIATION_SUPPORT (1 << 9)
-# define LC_RENEGOTIATE_EN (1 << 10)
-# define LC_SHORT_RECONFIG_EN (1 << 11)
-# define LC_UPCONFIGURE_SUPPORT (1 << 12)
-# define LC_UPCONFIGURE_DIS (1 << 13)
-# define LC_DYN_LANES_PWR_STATE(x) ((x) << 21)
-# define LC_DYN_LANES_PWR_STATE_MASK (0x3 << 21)
-# define LC_DYN_LANES_PWR_STATE_SHIFT 21
-#define PCIE_LC_N_FTS_CNTL 0xa3 /* PCIE_P */
-# define LC_XMIT_N_FTS(x) ((x) << 0)
-# define LC_XMIT_N_FTS_MASK (0xff << 0)
-# define LC_XMIT_N_FTS_SHIFT 0
-# define LC_XMIT_N_FTS_OVERRIDE_EN (1 << 8)
-# define LC_N_FTS_MASK (0xff << 24)
-#define PCIE_LC_SPEED_CNTL 0xa4 /* PCIE_P */
-# define LC_GEN2_EN_STRAP (1 << 0)
-# define LC_GEN3_EN_STRAP (1 << 1)
-# define LC_TARGET_LINK_SPEED_OVERRIDE_EN (1 << 2)
-# define LC_TARGET_LINK_SPEED_OVERRIDE_MASK (0x3 << 3)
-# define LC_TARGET_LINK_SPEED_OVERRIDE_SHIFT 3
-# define LC_FORCE_EN_SW_SPEED_CHANGE (1 << 5)
-# define LC_FORCE_DIS_SW_SPEED_CHANGE (1 << 6)
-# define LC_FORCE_EN_HW_SPEED_CHANGE (1 << 7)
-# define LC_FORCE_DIS_HW_SPEED_CHANGE (1 << 8)
-# define LC_INITIATE_LINK_SPEED_CHANGE (1 << 9)
-# define LC_SPEED_CHANGE_ATTEMPTS_ALLOWED_MASK (0x3 << 10)
-# define LC_SPEED_CHANGE_ATTEMPTS_ALLOWED_SHIFT 10
-# define LC_CURRENT_DATA_RATE_MASK (0x3 << 13) /* 0/1/2 = gen1/2/3 */
-# define LC_CURRENT_DATA_RATE_SHIFT 13
-# define LC_CLR_FAILED_SPD_CHANGE_CNT (1 << 16)
-# define LC_OTHER_SIDE_EVER_SENT_GEN2 (1 << 18)
-# define LC_OTHER_SIDE_SUPPORTS_GEN2 (1 << 19)
-# define LC_OTHER_SIDE_EVER_SENT_GEN3 (1 << 20)
-# define LC_OTHER_SIDE_SUPPORTS_GEN3 (1 << 21)
-
-#define PCIE_LC_CNTL2 0xb1
-# define LC_ALLOW_PDWN_IN_L1 (1 << 17)
-# define LC_ALLOW_PDWN_IN_L23 (1 << 18)
-
-#define PCIE_LC_CNTL3 0xb5 /* PCIE_P */
-# define LC_GO_TO_RECOVERY (1 << 30)
-#define PCIE_LC_CNTL4 0xb6 /* PCIE_P */
-# define LC_REDO_EQ (1 << 5)
-# define LC_SET_QUIESCE (1 << 13)
-
-/*
- * UVD
- */
-#define UVD_UDEC_ADDR_CONFIG 0x3bd3
-#define UVD_UDEC_DB_ADDR_CONFIG 0x3bd4
-#define UVD_UDEC_DBW_ADDR_CONFIG 0x3bd5
-#define UVD_RBC_RB_RPTR 0x3da4
-#define UVD_RBC_RB_WPTR 0x3da5
-#define UVD_STATUS 0x3daf
-
-#define UVD_CGC_CTRL 0x3dc2
-# define DCM (1 << 0)
-# define CG_DT(x) ((x) << 2)
-# define CG_DT_MASK (0xf << 2)
-# define CLK_OD(x) ((x) << 6)
-# define CLK_OD_MASK (0x1f << 6)
-
- /* UVD CTX indirect */
-#define UVD_CGC_MEM_CTRL 0xC0
-#define UVD_CGC_CTRL2 0xC1
-# define DYN_OR_EN (1 << 0)
-# define DYN_RR_EN (1 << 1)
-# define G_DIV_ID(x) ((x) << 2)
-# define G_DIV_ID_MASK (0x7 << 2)
/*
* PM4
@@ -1874,45 +552,7 @@
/* ASYNC DMA - first instance at 0xd000, second at 0xd800 */
#define DMA0_REGISTER_OFFSET 0x0 /* not a register */
#define DMA1_REGISTER_OFFSET 0x200 /* not a register */
-
-#define DMA_RB_CNTL 0x3400
-# define DMA_RB_ENABLE (1 << 0)
-# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */
-# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */
-# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12)
-# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */
-# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
-#define DMA_RB_BASE 0x3401
-#define DMA_RB_RPTR 0x3402
-#define DMA_RB_WPTR 0x3403
-
-#define DMA_RB_RPTR_ADDR_HI 0x3407
-#define DMA_RB_RPTR_ADDR_LO 0x3408
-
-#define DMA_IB_CNTL 0x3409
-# define DMA_IB_ENABLE (1 << 0)
-# define DMA_IB_SWAP_ENABLE (1 << 4)
-# define CMD_VMID_FORCE (1 << 31)
-#define DMA_IB_RPTR 0x340a
-#define DMA_CNTL 0x340b
-# define TRAP_ENABLE (1 << 0)
-# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
-# define SEM_WAIT_INT_ENABLE (1 << 2)
-# define DATA_SWAP_ENABLE (1 << 3)
-# define FENCE_SWAP_ENABLE (1 << 4)
-# define CTXEMPTY_INT_ENABLE (1 << 28)
-#define DMA_STATUS_REG 0x340d
-# define DMA_IDLE (1 << 0)
-#define DMA_TILING_CONFIG 0x342e
-
-#define DMA_POWER_CNTL 0x342f
-# define MEM_POWER_OVERRIDE (1 << 8)
-#define DMA_CLK_CTRL 0x3430
-
-#define DMA_PG 0x3435
-# define PG_CNTL_ENABLE (1 << 0)
-#define DMA_PGFSM_CONFIG 0x3436
-#define DMA_PGFSM_WRITE 0x3437
+#define SDMA_MAX_INSTANCE 2
#define DMA_PACKET(cmd, b, t, s, n) ((((cmd) & 0xF) << 28) | \
(((b) & 0x1) << 26) | \
@@ -1941,45 +581,7 @@
#define DMA_PACKET_POLL_REG_MEM 0xe
#define DMA_PACKET_NOP 0xf
-#define VCE_STATUS 0x20004
-#define VCE_VCPU_CNTL 0x20014
-#define VCE_CLK_EN (1 << 0)
-#define VCE_VCPU_CACHE_OFFSET0 0x20024
-#define VCE_VCPU_CACHE_SIZE0 0x20028
-#define VCE_VCPU_CACHE_OFFSET1 0x2002c
-#define VCE_VCPU_CACHE_SIZE1 0x20030
-#define VCE_VCPU_CACHE_OFFSET2 0x20034
-#define VCE_VCPU_CACHE_SIZE2 0x20038
-#define VCE_SOFT_RESET 0x20120
-#define VCE_ECPU_SOFT_RESET (1 << 0)
-#define VCE_FME_SOFT_RESET (1 << 2)
-#define VCE_RB_BASE_LO2 0x2016c
-#define VCE_RB_BASE_HI2 0x20170
-#define VCE_RB_SIZE2 0x20174
-#define VCE_RB_RPTR2 0x20178
-#define VCE_RB_WPTR2 0x2017c
-#define VCE_RB_BASE_LO 0x20180
-#define VCE_RB_BASE_HI 0x20184
-#define VCE_RB_SIZE 0x20188
-#define VCE_RB_RPTR 0x2018c
-#define VCE_RB_WPTR 0x20190
-#define VCE_CLOCK_GATING_A 0x202f8
-#define VCE_CLOCK_GATING_B 0x202fc
-#define VCE_UENC_CLOCK_GATING 0x205bc
-#define VCE_UENC_REG_CLOCK_GATING 0x205c0
-#define VCE_FW_REG_STATUS 0x20e10
-# define VCE_FW_REG_STATUS_BUSY (1 << 0)
-# define VCE_FW_REG_STATUS_PASS (1 << 3)
-# define VCE_FW_REG_STATUS_DONE (1 << 11)
-#define VCE_LMI_FW_START_KEYSEL 0x20e18
-#define VCE_LMI_FW_PERIODIC_CTRL 0x20e20
-#define VCE_LMI_CTRL2 0x20e74
-#define VCE_LMI_CTRL 0x20e98
-#define VCE_LMI_VM_CTRL 0x20ea0
-#define VCE_LMI_SWAP_CNTL 0x20eb4
-#define VCE_LMI_SWAP_CNTL1 0x20eb8
-#define VCE_LMI_CACHE_CTRL 0x20ef4
-
+/* VCE */
#define VCE_CMD_NO_OP 0x00000000
#define VCE_CMD_END 0x00000001
#define VCE_CMD_IB 0x00000002
@@ -1988,434 +590,146 @@
#define VCE_CMD_IB_AUTO 0x00000005
#define VCE_CMD_SEMAPHORE 0x00000006
-
//#dce stupp
/* display controller offsets used for crtc/cur/lut/grph/viewport/etc. */
-#define SI_CRTC0_REGISTER_OFFSET 0 //(0x6df0 - 0x6df0)/4
-#define SI_CRTC1_REGISTER_OFFSET 0x300 //(0x79f0 - 0x6df0)/4
-#define SI_CRTC2_REGISTER_OFFSET 0x2600 //(0x105f0 - 0x6df0)/4
-#define SI_CRTC3_REGISTER_OFFSET 0x2900 //(0x111f0 - 0x6df0)/4
-#define SI_CRTC4_REGISTER_OFFSET 0x2c00 //(0x11df0 - 0x6df0)/4
-#define SI_CRTC5_REGISTER_OFFSET 0x2f00 //(0x129f0 - 0x6df0)/4
+#define CRTC0_REGISTER_OFFSET (0x1b7c - 0x1b7c) //(0x6df0 - 0x6df0)/4
+#define CRTC1_REGISTER_OFFSET (0x1e7c - 0x1b7c) //(0x79f0 - 0x6df0)/4
+#define CRTC2_REGISTER_OFFSET (0x417c - 0x1b7c) //(0x105f0 - 0x6df0)/4
+#define CRTC3_REGISTER_OFFSET (0x447c - 0x1b7c) //(0x111f0 - 0x6df0)/4
+#define CRTC4_REGISTER_OFFSET (0x477c - 0x1b7c) //(0x11df0 - 0x6df0)/4
+#define CRTC5_REGISTER_OFFSET (0x4a7c - 0x1b7c) //(0x129f0 - 0x6df0)/4
+
+/* hpd instance offsets */
+#define HPD0_REGISTER_OFFSET (0x1807 - 0x1807)
+#define HPD1_REGISTER_OFFSET (0x180a - 0x1807)
+#define HPD2_REGISTER_OFFSET (0x180d - 0x1807)
+#define HPD3_REGISTER_OFFSET (0x1810 - 0x1807)
+#define HPD4_REGISTER_OFFSET (0x1813 - 0x1807)
+#define HPD5_REGISTER_OFFSET (0x1816 - 0x1807)
+
+/* audio endpt instance offsets */
+#define AUD0_REGISTER_OFFSET (0x1780 - 0x1780)
+#define AUD1_REGISTER_OFFSET (0x1786 - 0x1780)
+#define AUD2_REGISTER_OFFSET (0x178c - 0x1780)
+#define AUD3_REGISTER_OFFSET (0x1792 - 0x1780)
+#define AUD4_REGISTER_OFFSET (0x1798 - 0x1780)
+#define AUD5_REGISTER_OFFSET (0x179d - 0x1780)
+#define AUD6_REGISTER_OFFSET (0x17a4 - 0x1780)
#define CURSOR_WIDTH 64
#define CURSOR_HEIGHT 64
-#define AMDGPU_MM_INDEX 0x0000
-#define AMDGPU_MM_DATA 0x0001
-
-#define VERDE_NUM_CRTC 6
-#define BLACKOUT_MODE_MASK 0x00000007
-#define VGA_RENDER_CONTROL 0xC0
-#define R_000300_VGA_RENDER_CONTROL 0xC0
-#define C_000300_VGA_VSTATUS_CNTL 0xFFFCFFFF
-#define EVERGREEN_CRTC_STATUS 0x1BA3
-#define EVERGREEN_CRTC_V_BLANK (1 << 0)
-#define EVERGREEN_CRTC_STATUS_POSITION 0x1BA4
-/* CRTC blocks at 0x6df0, 0x79f0, 0x105f0, 0x111f0, 0x11df0, 0x129f0 */
-#define EVERGREEN_CRTC_V_BLANK_START_END 0x1b8d
-#define EVERGREEN_CRTC_CONTROL 0x1b9c
-#define EVERGREEN_CRTC_MASTER_EN (1 << 0)
-#define EVERGREEN_CRTC_DISP_READ_REQUEST_DISABLE (1 << 24)
-#define EVERGREEN_CRTC_BLANK_CONTROL 0x1b9d
-#define EVERGREEN_CRTC_BLANK_DATA_EN (1 << 8)
-#define EVERGREEN_CRTC_V_BLANK (1 << 0)
-#define EVERGREEN_CRTC_STATUS_HV_COUNT 0x1ba8
-#define EVERGREEN_CRTC_UPDATE_LOCK 0x1bb5
-#define EVERGREEN_MASTER_UPDATE_LOCK 0x1bbd
-#define EVERGREEN_MASTER_UPDATE_MODE 0x1bbe
-#define EVERGREEN_GRPH_UPDATE_LOCK (1 << 16)
-#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a07
-#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a08
-#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS 0x1a04
-#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS 0x1a05
-#define EVERGREEN_GRPH_UPDATE 0x1a11
-#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS 0xc4
-#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS_HIGH 0xc9
-#define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING (1 << 2)
-
-#define EVERGREEN_DATA_FORMAT 0x1ac0
-# define EVERGREEN_INTERLEAVE_EN (1 << 0)
-
-#define MC_SHARED_CHMAP__NOOFCHAN_MASK 0xf000
-#define MC_SHARED_CHMAP__NOOFCHAN__SHIFT 0xc
-
-#define R600_D1GRPH_ARRAY_MODE_LINEAR_GENERAL (0 << 20)
-#define R600_D1GRPH_ARRAY_MODE_LINEAR_ALIGNED (1 << 20)
-#define R600_D1GRPH_ARRAY_MODE_1D_TILED_THIN1 (2 << 20)
-#define R600_D1GRPH_ARRAY_MODE_2D_TILED_THIN1 (4 << 20)
-
-#define R700_D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a45
-#define R700_D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1845
-
-#define R700_D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1847
-#define R700_D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a47
-
-#define DISP_INTERRUPT_STATUS__LB_D1_VBLANK_INTERRUPT_MASK 0x8
-#define DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VBLANK_INTERRUPT_MASK 0x8
-#define DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VBLANK_INTERRUPT_MASK 0x8
-#define DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VBLANK_INTERRUPT_MASK 0x8
-#define DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VBLANK_INTERRUPT_MASK 0x8
-#define DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VBLANK_INTERRUPT_MASK 0x8
-
-#define DISP_INTERRUPT_STATUS__LB_D1_VLINE_INTERRUPT_MASK 0x4
-#define DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VLINE_INTERRUPT_MASK 0x4
-#define DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VLINE_INTERRUPT_MASK 0x4
-#define DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VLINE_INTERRUPT_MASK 0x4
-#define DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VLINE_INTERRUPT_MASK 0x4
-#define DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VLINE_INTERRUPT_MASK 0x4
-
-#define DISP_INTERRUPT_STATUS__DC_HPD1_INTERRUPT_MASK 0x20000
-#define DISP_INTERRUPT_STATUS_CONTINUE__DC_HPD2_INTERRUPT_MASK 0x20000
-#define DISP_INTERRUPT_STATUS_CONTINUE2__DC_HPD3_INTERRUPT_MASK 0x20000
-#define DISP_INTERRUPT_STATUS_CONTINUE3__DC_HPD4_INTERRUPT_MASK 0x20000
-#define DISP_INTERRUPT_STATUS_CONTINUE4__DC_HPD5_INTERRUPT_MASK 0x20000
-#define DISP_INTERRUPT_STATUS_CONTINUE5__DC_HPD6_INTERRUPT_MASK 0x20000
-
-#define GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_OCCURRED_MASK 0x1
-#define GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_CLEAR_MASK 0x100
-
-#define DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK 0x1
-
-#define R600_D1GRPH_SWAP_CONTROL 0x1843
-#define R600_D1GRPH_SWAP_ENDIAN_NONE (0 << 0)
-#define R600_D1GRPH_SWAP_ENDIAN_16BIT (1 << 0)
-#define R600_D1GRPH_SWAP_ENDIAN_32BIT (2 << 0)
-#define R600_D1GRPH_SWAP_ENDIAN_64BIT (3 << 0)
-
-#define AVIVO_D1VGA_CONTROL 0x00cc
-# define AVIVO_DVGA_CONTROL_MODE_ENABLE (1 << 0)
-# define AVIVO_DVGA_CONTROL_TIMING_SELECT (1 << 8)
-# define AVIVO_DVGA_CONTROL_SYNC_POLARITY_SELECT (1 << 9)
-# define AVIVO_DVGA_CONTROL_OVERSCAN_TIMING_SELECT (1 << 10)
-# define AVIVO_DVGA_CONTROL_OVERSCAN_COLOR_EN (1 << 16)
-# define AVIVO_DVGA_CONTROL_ROTATE (1 << 24)
-#define AVIVO_D2VGA_CONTROL 0x00ce
-
-#define R600_BUS_CNTL 0x1508
-# define R600_BIOS_ROM_DIS (1 << 1)
+
#define R600_ROM_CNTL 0x580
# define R600_SCK_OVERWRITE (1 << 1)
# define R600_SCK_PRESCALE_CRYSTAL_CLK_SHIFT 28
# define R600_SCK_PRESCALE_CRYSTAL_CLK_MASK (0xf << 28)
-#define GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK 0x1
-
-#define FMT_BIT_DEPTH_CONTROL 0x1bf2
-#define FMT_TRUNCATE_EN (1 << 0)
-#define FMT_TRUNCATE_DEPTH (1 << 4)
-#define FMT_SPATIAL_DITHER_EN (1 << 8)
-#define FMT_SPATIAL_DITHER_MODE(x) ((x) << 9)
-#define FMT_SPATIAL_DITHER_DEPTH (1 << 12)
-#define FMT_FRAME_RANDOM_ENABLE (1 << 13)
-#define FMT_RGB_RANDOM_ENABLE (1 << 14)
-#define FMT_HIGHPASS_RANDOM_ENABLE (1 << 15)
-#define FMT_TEMPORAL_DITHER_EN (1 << 16)
-#define FMT_TEMPORAL_DITHER_DEPTH (1 << 20)
-#define FMT_TEMPORAL_DITHER_OFFSET(x) ((x) << 21)
-#define FMT_TEMPORAL_LEVEL (1 << 24)
-#define FMT_TEMPORAL_DITHER_RESET (1 << 25)
-#define FMT_25FRC_SEL(x) ((x) << 26)
-#define FMT_50FRC_SEL(x) ((x) << 28)
-#define FMT_75FRC_SEL(x) ((x) << 30)
-
-#define EVERGREEN_DC_LUT_CONTROL 0x1a80
-#define EVERGREEN_DC_LUT_BLACK_OFFSET_BLUE 0x1a81
-#define EVERGREEN_DC_LUT_BLACK_OFFSET_GREEN 0x1a82
-#define EVERGREEN_DC_LUT_BLACK_OFFSET_RED 0x1a83
-#define EVERGREEN_DC_LUT_WHITE_OFFSET_BLUE 0x1a84
-#define EVERGREEN_DC_LUT_WHITE_OFFSET_GREEN 0x1a85
-#define EVERGREEN_DC_LUT_WHITE_OFFSET_RED 0x1a86
-#define EVERGREEN_DC_LUT_30_COLOR 0x1a7c
-#define EVERGREEN_DC_LUT_RW_INDEX 0x1a79
-#define EVERGREEN_DC_LUT_WRITE_EN_MASK 0x1a7e
-#define EVERGREEN_DC_LUT_RW_MODE 0x1a78
-
-#define EVERGREEN_GRPH_ENABLE 0x1a00
-#define EVERGREEN_GRPH_CONTROL 0x1a01
-#define EVERGREEN_GRPH_DEPTH(x) (((x) & 0x3) << 0)
-#define EVERGREEN_GRPH_DEPTH_8BPP 0
-#define EVERGREEN_GRPH_DEPTH_16BPP 1
-#define EVERGREEN_GRPH_DEPTH_32BPP 2
-#define EVERGREEN_GRPH_NUM_BANKS(x) (((x) & 0x3) << 2)
-#define EVERGREEN_ADDR_SURF_2_BANK 0
-#define EVERGREEN_ADDR_SURF_4_BANK 1
-#define EVERGREEN_ADDR_SURF_8_BANK 2
-#define EVERGREEN_ADDR_SURF_16_BANK 3
-#define EVERGREEN_GRPH_Z(x) (((x) & 0x3) << 4)
-#define EVERGREEN_GRPH_BANK_WIDTH(x) (((x) & 0x3) << 6)
-#define EVERGREEN_ADDR_SURF_BANK_WIDTH_1 0
-#define EVERGREEN_ADDR_SURF_BANK_WIDTH_2 1
-#define EVERGREEN_ADDR_SURF_BANK_WIDTH_4 2
-#define EVERGREEN_ADDR_SURF_BANK_WIDTH_8 3
-#define EVERGREEN_GRPH_FORMAT(x) (((x) & 0x7) << 8)
-
-#define EVERGREEN_GRPH_FORMAT_INDEXED 0
-#define EVERGREEN_GRPH_FORMAT_ARGB1555 0
-#define EVERGREEN_GRPH_FORMAT_ARGB565 1
-#define EVERGREEN_GRPH_FORMAT_ARGB4444 2
-#define EVERGREEN_GRPH_FORMAT_AI88 3
-#define EVERGREEN_GRPH_FORMAT_MONO16 4
-#define EVERGREEN_GRPH_FORMAT_BGRA5551 5
+#define GRPH_ARRAY_LINEAR_GENERAL 0
+#define GRPH_ARRAY_LINEAR_ALIGNED 1
+#define GRPH_ARRAY_1D_TILED_THIN1 2
+#define GRPH_ARRAY_2D_TILED_THIN1 4
+
+#define ES_AND_GS_AUTO 3
+#define BUF_SWAP_32BIT (2 << 16)
+
+#define GRPH_DEPTH_8BPP 0
+#define GRPH_DEPTH_16BPP 1
+#define GRPH_DEPTH_32BPP 2
+
+/* 8 BPP */
+#define GRPH_FORMAT_INDEXED 0
+
+/* 16 BPP */
+#define GRPH_FORMAT_ARGB1555 0
+#define GRPH_FORMAT_ARGB565 1
+#define GRPH_FORMAT_ARGB4444 2
+#define GRPH_FORMAT_AI88 3
+#define GRPH_FORMAT_MONO16 4
+#define GRPH_FORMAT_BGRA5551 5
/* 32 BPP */
-#define EVERGREEN_GRPH_FORMAT_ARGB8888 0
-#define EVERGREEN_GRPH_FORMAT_ARGB2101010 1
-#define EVERGREEN_GRPH_FORMAT_32BPP_DIG 2
-#define EVERGREEN_GRPH_FORMAT_8B_ARGB2101010 3
-#define EVERGREEN_GRPH_FORMAT_BGRA1010102 4
-#define EVERGREEN_GRPH_FORMAT_8B_BGRA1010102 5
-#define EVERGREEN_GRPH_FORMAT_RGB111110 6
-#define EVERGREEN_GRPH_FORMAT_BGR101111 7
-#define EVERGREEN_GRPH_BANK_HEIGHT(x) (((x) & 0x3) << 11)
-#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_1 0
-#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_2 1
-#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_4 2
-#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_8 3
-#define EVERGREEN_GRPH_TILE_SPLIT(x) (((x) & 0x7) << 13)
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_64B 0
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_128B 1
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_256B 2
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_512B 3
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_1KB 4
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_2KB 5
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_4KB 6
-#define EVERGREEN_GRPH_MACRO_TILE_ASPECT(x) (((x) & 0x3) << 18)
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_1 0
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_2 1
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_4 2
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_8 3
-#define EVERGREEN_GRPH_ARRAY_MODE(x) (((x) & 0x7) << 20)
-#define EVERGREEN_GRPH_ARRAY_LINEAR_GENERAL 0
-#define EVERGREEN_GRPH_ARRAY_LINEAR_ALIGNED 1
-#define EVERGREEN_GRPH_ARRAY_1D_TILED_THIN1 2
-#define EVERGREEN_GRPH_ARRAY_2D_TILED_THIN1 4
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_1 0
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_2 1
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_4 2
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_8 3
-
-#define EVERGREEN_GRPH_SWAP_CONTROL 0x1a03
-#define EVERGREEN_GRPH_ENDIAN_SWAP(x) (((x) & 0x3) << 0)
-# define EVERGREEN_GRPH_ENDIAN_NONE 0
-# define EVERGREEN_GRPH_ENDIAN_8IN16 1
-# define EVERGREEN_GRPH_ENDIAN_8IN32 2
-# define EVERGREEN_GRPH_ENDIAN_8IN64 3
-#define EVERGREEN_GRPH_RED_CROSSBAR(x) (((x) & 0x3) << 4)
-# define EVERGREEN_GRPH_RED_SEL_R 0
-# define EVERGREEN_GRPH_RED_SEL_G 1
-# define EVERGREEN_GRPH_RED_SEL_B 2
-# define EVERGREEN_GRPH_RED_SEL_A 3
-#define EVERGREEN_GRPH_GREEN_CROSSBAR(x) (((x) & 0x3) << 6)
-# define EVERGREEN_GRPH_GREEN_SEL_G 0
-# define EVERGREEN_GRPH_GREEN_SEL_B 1
-# define EVERGREEN_GRPH_GREEN_SEL_A 2
-# define EVERGREEN_GRPH_GREEN_SEL_R 3
-#define EVERGREEN_GRPH_BLUE_CROSSBAR(x) (((x) & 0x3) << 8)
-# define EVERGREEN_GRPH_BLUE_SEL_B 0
-# define EVERGREEN_GRPH_BLUE_SEL_A 1
-# define EVERGREEN_GRPH_BLUE_SEL_R 2
-# define EVERGREEN_GRPH_BLUE_SEL_G 3
-#define EVERGREEN_GRPH_ALPHA_CROSSBAR(x) (((x) & 0x3) << 10)
-# define EVERGREEN_GRPH_ALPHA_SEL_A 0
-# define EVERGREEN_GRPH_ALPHA_SEL_R 1
-# define EVERGREEN_GRPH_ALPHA_SEL_G 2
-# define EVERGREEN_GRPH_ALPHA_SEL_B 3
-
-#define EVERGREEN_D3VGA_CONTROL 0xf8
-#define EVERGREEN_D4VGA_CONTROL 0xf9
-#define EVERGREEN_D5VGA_CONTROL 0xfa
-#define EVERGREEN_D6VGA_CONTROL 0xfb
-
-#define EVERGREEN_GRPH_SURFACE_ADDRESS_MASK 0xffffff00
-
-#define EVERGREEN_GRPH_LUT_10BIT_BYPASS_CONTROL 0x1a02
-#define EVERGREEN_LUT_10BIT_BYPASS_EN (1 << 8)
-
-#define EVERGREEN_GRPH_PITCH 0x1a06
-#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a07
-#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a08
-#define EVERGREEN_GRPH_SURFACE_OFFSET_X 0x1a09
-#define EVERGREEN_GRPH_SURFACE_OFFSET_Y 0x1a0a
-#define EVERGREEN_GRPH_X_START 0x1a0b
-#define EVERGREEN_GRPH_Y_START 0x1a0c
-#define EVERGREEN_GRPH_X_END 0x1a0d
-#define EVERGREEN_GRPH_Y_END 0x1a0e
-#define EVERGREEN_GRPH_UPDATE 0x1a11
-#define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING (1 << 2)
-#define EVERGREEN_GRPH_UPDATE_LOCK (1 << 16)
-#define EVERGREEN_GRPH_FLIP_CONTROL 0x1a12
-#define EVERGREEN_GRPH_SURFACE_UPDATE_H_RETRACE_EN (1 << 0)
-
-#define EVERGREEN_VIEWPORT_START 0x1b5c
-#define EVERGREEN_VIEWPORT_SIZE 0x1b5d
-#define EVERGREEN_DESKTOP_HEIGHT 0x1ac1
-
-/* CUR blocks at 0x6998, 0x7598, 0x10198, 0x10d98, 0x11998, 0x12598 */
-#define EVERGREEN_CUR_CONTROL 0x1a66
-# define EVERGREEN_CURSOR_EN (1 << 0)
-# define EVERGREEN_CURSOR_MODE(x) (((x) & 0x3) << 8)
-# define EVERGREEN_CURSOR_MONO 0
-# define EVERGREEN_CURSOR_24_1 1
-# define EVERGREEN_CURSOR_24_8_PRE_MULT 2
-# define EVERGREEN_CURSOR_24_8_UNPRE_MULT 3
-# define EVERGREEN_CURSOR_2X_MAGNIFY (1 << 16)
-# define EVERGREEN_CURSOR_FORCE_MC_ON (1 << 20)
-# define EVERGREEN_CURSOR_URGENT_CONTROL(x) (((x) & 0x7) << 24)
-# define EVERGREEN_CURSOR_URGENT_ALWAYS 0
-# define EVERGREEN_CURSOR_URGENT_1_8 1
-# define EVERGREEN_CURSOR_URGENT_1_4 2
-# define EVERGREEN_CURSOR_URGENT_3_8 3
-# define EVERGREEN_CURSOR_URGENT_1_2 4
-#define EVERGREEN_CUR_SURFACE_ADDRESS 0x1a67
-# define EVERGREEN_CUR_SURFACE_ADDRESS_MASK 0xfffff000
-#define EVERGREEN_CUR_SIZE 0x1a68
-#define EVERGREEN_CUR_SURFACE_ADDRESS_HIGH 0x1a69
-#define EVERGREEN_CUR_POSITION 0x1a6a
-#define EVERGREEN_CUR_HOT_SPOT 0x1a6b
-#define EVERGREEN_CUR_COLOR1 0x1a6c
-#define EVERGREEN_CUR_COLOR2 0x1a6d
-#define EVERGREEN_CUR_UPDATE 0x1a6e
-# define EVERGREEN_CURSOR_UPDATE_PENDING (1 << 0)
-# define EVERGREEN_CURSOR_UPDATE_TAKEN (1 << 1)
-# define EVERGREEN_CURSOR_UPDATE_LOCK (1 << 16)
-# define EVERGREEN_CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24)
-
-
-#define NI_INPUT_CSC_CONTROL 0x1a35
-# define NI_INPUT_CSC_GRPH_MODE(x) (((x) & 0x3) << 0)
-# define NI_INPUT_CSC_BYPASS 0
-# define NI_INPUT_CSC_PROG_COEFF 1
-# define NI_INPUT_CSC_PROG_SHARED_MATRIXA 2
-# define NI_INPUT_CSC_OVL_MODE(x) (((x) & 0x3) << 4)
-
-#define NI_OUTPUT_CSC_CONTROL 0x1a3c
-# define NI_OUTPUT_CSC_GRPH_MODE(x) (((x) & 0x7) << 0)
-# define NI_OUTPUT_CSC_BYPASS 0
-# define NI_OUTPUT_CSC_TV_RGB 1
-# define NI_OUTPUT_CSC_YCBCR_601 2
-# define NI_OUTPUT_CSC_YCBCR_709 3
-# define NI_OUTPUT_CSC_PROG_COEFF 4
-# define NI_OUTPUT_CSC_PROG_SHARED_MATRIXB 5
-# define NI_OUTPUT_CSC_OVL_MODE(x) (((x) & 0x7) << 4)
-
-#define NI_DEGAMMA_CONTROL 0x1a58
-# define NI_GRPH_DEGAMMA_MODE(x) (((x) & 0x3) << 0)
-# define NI_DEGAMMA_BYPASS 0
-# define NI_DEGAMMA_SRGB_24 1
-# define NI_DEGAMMA_XVYCC_222 2
-# define NI_OVL_DEGAMMA_MODE(x) (((x) & 0x3) << 4)
-# define NI_ICON_DEGAMMA_MODE(x) (((x) & 0x3) << 8)
-# define NI_CURSOR_DEGAMMA_MODE(x) (((x) & 0x3) << 12)
-
-#define NI_GAMUT_REMAP_CONTROL 0x1a59
-# define NI_GRPH_GAMUT_REMAP_MODE(x) (((x) & 0x3) << 0)
-# define NI_GAMUT_REMAP_BYPASS 0
-# define NI_GAMUT_REMAP_PROG_COEFF 1
-# define NI_GAMUT_REMAP_PROG_SHARED_MATRIXA 2
-# define NI_GAMUT_REMAP_PROG_SHARED_MATRIXB 3
-# define NI_OVL_GAMUT_REMAP_MODE(x) (((x) & 0x3) << 4)
-
-#define NI_REGAMMA_CONTROL 0x1aa0
-# define NI_GRPH_REGAMMA_MODE(x) (((x) & 0x7) << 0)
-# define NI_REGAMMA_BYPASS 0
-# define NI_REGAMMA_SRGB_24 1
-# define NI_REGAMMA_XVYCC_222 2
-# define NI_REGAMMA_PROG_A 3
-# define NI_REGAMMA_PROG_B 4
-# define NI_OVL_REGAMMA_MODE(x) (((x) & 0x7) << 4)
-
-
-#define NI_PRESCALE_GRPH_CONTROL 0x1a2d
-# define NI_GRPH_PRESCALE_BYPASS (1 << 4)
-
-#define NI_PRESCALE_OVL_CONTROL 0x1a31
-# define NI_OVL_PRESCALE_BYPASS (1 << 4)
-
-#define NI_INPUT_GAMMA_CONTROL 0x1a10
-# define NI_GRPH_INPUT_GAMMA_MODE(x) (((x) & 0x3) << 0)
-# define NI_INPUT_GAMMA_USE_LUT 0
-# define NI_INPUT_GAMMA_BYPASS 1
-# define NI_INPUT_GAMMA_SRGB_24 2
-# define NI_INPUT_GAMMA_XVYCC_222 3
-# define NI_OVL_INPUT_GAMMA_MODE(x) (((x) & 0x3) << 4)
-
-#define BLACKOUT_MODE_MASK 0x00000007
-#define VGA_RENDER_CONTROL 0xC0
-#define R_000300_VGA_RENDER_CONTROL 0xC0
-#define C_000300_VGA_VSTATUS_CNTL 0xFFFCFFFF
-#define EVERGREEN_CRTC_STATUS 0x1BA3
-#define EVERGREEN_CRTC_V_BLANK (1 << 0)
-#define EVERGREEN_CRTC_STATUS_POSITION 0x1BA4
-/* CRTC blocks at 0x6df0, 0x79f0, 0x105f0, 0x111f0, 0x11df0, 0x129f0 */
-#define EVERGREEN_CRTC_V_BLANK_START_END 0x1b8d
-#define EVERGREEN_CRTC_CONTROL 0x1b9c
-# define EVERGREEN_CRTC_MASTER_EN (1 << 0)
-# define EVERGREEN_CRTC_DISP_READ_REQUEST_DISABLE (1 << 24)
-#define EVERGREEN_CRTC_BLANK_CONTROL 0x1b9d
-# define EVERGREEN_CRTC_BLANK_DATA_EN (1 << 8)
-# define EVERGREEN_CRTC_V_BLANK (1 << 0)
-#define EVERGREEN_CRTC_STATUS_HV_COUNT 0x1ba8
-#define EVERGREEN_CRTC_UPDATE_LOCK 0x1bb5
-#define EVERGREEN_MASTER_UPDATE_LOCK 0x1bbd
-#define EVERGREEN_MASTER_UPDATE_MODE 0x1bbe
-#define EVERGREEN_GRPH_UPDATE_LOCK (1 << 16)
-#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a07
-#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a08
-#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS 0x1a04
-#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS 0x1a05
-#define EVERGREEN_GRPH_UPDATE 0x1a11
-#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS 0xc4
-#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS_HIGH 0xc9
-#define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING (1 << 2)
-
-#define mmVM_CONTEXT1_CNTL__xxRANGE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x10
-#define mmVM_CONTEXT1_CNTL__xxRANGE_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x4
-#define mmVM_CONTEXT1_CNTL__xxDUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x80
-#define mmVM_CONTEXT1_CNTL__xxDUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x7
-#define mmVM_CONTEXT1_CNTL__xxPDE0_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x400
-#define mmVM_CONTEXT1_CNTL__xxPDE0_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0xa
-#define mmVM_CONTEXT1_CNTL__xxVALID_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x2000
-#define mmVM_CONTEXT1_CNTL__xxVALID_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0xd
-#define mmVM_CONTEXT1_CNTL__xxREAD_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x10000
-#define mmVM_CONTEXT1_CNTL__xxREAD_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x10
-#define mmVM_CONTEXT1_CNTL__xxWRITE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x80000
-#define mmVM_CONTEXT1_CNTL__xxWRITE_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x13
-
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxVMID_MASK 0x1e000000
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxVMID__SHIFT 0x19
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxPROTECTIONS_MASK 0xff
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxPROTECTIONS__SHIFT 0x0
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_ID_MASK 0xff000
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_ID__SHIFT 0xc
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_RW_MASK 0x1000000
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_RW__SHIFT 0x18
-
-#define mmMC_SHARED_BLACKOUT_CNTL__xxBLACKOUT_MODE_MASK 0x7
-#define mmMC_SHARED_BLACKOUT_CNTL__xxBLACKOUT_MODE__SHIFT 0x0
-
-#define mmBIF_FB_EN__xxFB_READ_EN_MASK 0x1
-#define mmBIF_FB_EN__xxFB_READ_EN__SHIFT 0x0
-#define mmBIF_FB_EN__xxFB_WRITE_EN_MASK 0x2
-#define mmBIF_FB_EN__xxFB_WRITE_EN__SHIFT 0x1
-
-#define mmSRBM_SOFT_RESET__xxSOFT_RESET_VMC_MASK 0x20000
-#define mmSRBM_SOFT_RESET__xxSOFT_RESET_VMC__SHIFT 0x11
-#define mmSRBM_SOFT_RESET__xxSOFT_RESET_MC_MASK 0x800
-#define mmSRBM_SOFT_RESET__xxSOFT_RESET_MC__SHIFT 0xb
-
-#define VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x8
-#define VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x3
-#define VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x40
-#define VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x6
-#define VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x200
-#define VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x9
-#define VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x1000
-#define VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0xc
-#define VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x8000
-#define VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0xf
-#define VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x40000
-#define VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x12
+#define GRPH_FORMAT_ARGB8888 0
+#define GRPH_FORMAT_ARGB2101010 1
+#define GRPH_FORMAT_32BPP_DIG 2
+#define GRPH_FORMAT_8B_ARGB2101010 3
+#define GRPH_FORMAT_BGRA1010102 4
+#define GRPH_FORMAT_8B_BGRA1010102 5
+#define GRPH_FORMAT_RGB111110 6
+#define GRPH_FORMAT_BGR101111 7
+
+#define GRPH_ENDIAN_NONE 0
+#define GRPH_ENDIAN_8IN16 1
+#define GRPH_ENDIAN_8IN32 2
+#define GRPH_ENDIAN_8IN64 3
+#define GRPH_RED_SEL_R 0
+#define GRPH_RED_SEL_G 1
+#define GRPH_RED_SEL_B 2
+#define GRPH_RED_SEL_A 3
+
+#define GRPH_GREEN_SEL_G 0
+#define GRPH_GREEN_SEL_B 1
+#define GRPH_GREEN_SEL_A 2
+#define GRPH_GREEN_SEL_R 3
+
+#define GRPH_BLUE_SEL_B 0
+#define GRPH_BLUE_SEL_A 1
+#define GRPH_BLUE_SEL_R 2
+#define GRPH_BLUE_SEL_G 3
+
+#define GRPH_ALPHA_SEL_A 0
+#define GRPH_ALPHA_SEL_R 1
+#define GRPH_ALPHA_SEL_G 2
+#define GRPH_ALPHA_SEL_B 3
+
+/* CUR_CONTROL */
+ #define CURSOR_MONO 0
+ #define CURSOR_24_1 1
+ #define CURSOR_24_8_PRE_MULT 2
+ #define CURSOR_24_8_UNPRE_MULT 3
+ #define CURSOR_URGENT_ALWAYS 0
+ #define CURSOR_URGENT_1_8 1
+ #define CURSOR_URGENT_1_4 2
+ #define CURSOR_URGENT_3_8 3
+ #define CURSOR_URGENT_1_2 4
+
+/* INPUT_CSC_CONTROL */
+# define INPUT_CSC_BYPASS 0
+# define INPUT_CSC_PROG_COEFF 1
+# define INPUT_CSC_PROG_SHARED_MATRIXA 2
+
+/* OUTPUT_CSC_CONTROL */
+# define OUTPUT_CSC_BYPASS 0
+# define OUTPUT_CSC_TV_RGB 1
+# define OUTPUT_CSC_YCBCR_601 2
+# define OUTPUT_CSC_YCBCR_709 3
+# define OUTPUT_CSC_PROG_COEFF 4
+# define OUTPUT_CSC_PROG_SHARED_MATRIXB 5
+
+/* DEGAMMA_CONTROL */
+# define DEGAMMA_BYPASS 0
+# define DEGAMMA_SRGB_24 1
+# define DEGAMMA_XVYCC_222 2
+
+/* GAMUT_REMAP_CONTROL */
+# define GAMUT_REMAP_BYPASS 0
+# define GAMUT_REMAP_PROG_COEFF 1
+# define GAMUT_REMAP_PROG_SHARED_MATRIXA 2
+# define GAMUT_REMAP_PROG_SHARED_MATRIXB 3
+
+/* REGAMMA_CONTROL */
+# define REGAMMA_BYPASS 0
+# define REGAMMA_SRGB_24 1
+# define REGAMMA_XVYCC_222 2
+# define REGAMMA_PROG_A 3
+# define REGAMMA_PROG_B 4
+
+
+/* INPUT_GAMMA_CONTROL */
+# define INPUT_GAMMA_USE_LUT 0
+# define INPUT_GAMMA_BYPASS 1
+# define INPUT_GAMMA_SRGB_24 2
+# define INPUT_GAMMA_XVYCC_222 3
#define MC_SEQ_MISC0__MT__MASK 0xf0000000
#define MC_SEQ_MISC0__MT__GDDR1 0x10000000
@@ -2426,28 +740,14 @@
#define MC_SEQ_MISC0__MT__HBM 0x60000000
#define MC_SEQ_MISC0__MT__DDR3 0xB0000000
-#define GRBM_STATUS__GUI_ACTIVE_MASK 0x80000000
#define CP_INT_CNTL_RING__TIME_STAMP_INT_ENABLE_MASK 0x4000000
-#define CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK 0x800000
-#define CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK 0x400000
#define PACKET3_SEM_WAIT_ON_SIGNAL (0x1 << 12)
#define PACKET3_SEM_SEL_SIGNAL (0x6 << 29)
#define PACKET3_SEM_SEL_WAIT (0x7 << 29)
-#define CONFIG_CNTL 0x1509
-#define CC_DRM_ID_STRAPS 0X1559
#define AMDGPU_PCIE_INDEX 0xc
#define AMDGPU_PCIE_DATA 0xd
-#define DMA_SEM_INCOMPLETE_TIMER_CNTL 0x3411
-#define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0x3412
-#define DMA_MODE 0x342f
-#define DMA_RB_RPTR_ADDR_HI 0x3407
-#define DMA_RB_RPTR_ADDR_LO 0x3408
-#define DMA_BUSY_MASK 0x20
-#define DMA1_BUSY_MASK 0X40
-#define SDMA_MAX_INSTANCE 2
-
#define PCIE_BUS_CLK 10000
#define TCLK (PCIE_BUS_CLK / 10)
#define PCIE_PORT_INDEX 0xe
@@ -2457,8 +757,6 @@
#define EVERGREEN_PIF_PHY1_INDEX 0x10
#define EVERGREEN_PIF_PHY1_DATA 0x14
-#define MC_VM_FB_OFFSET 0x81a
-
/* Discrete VCE clocks */
#define CG_VCEPLL_FUNC_CNTL 0xc0030600
#define VCEPLL_RESET_MASK 0x00000001
diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
new file mode 100644
index 000000000000..2594467bdd87
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
@@ -0,0 +1,298 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "sienna_cichlid.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_dpm.h"
+#include "amdgpu_job.h"
+#include "amdgpu_ring.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_xgmi.h"
+
+static bool sienna_cichlid_is_mode2_default(struct amdgpu_reset_control *reset_ctl)
+{
+#if 0
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+
+ if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(11, 0, 7) &&
+ adev->pm.fw_version >= 0x3a5500 && !amdgpu_sriov_vf(adev))
+ return true;
+#endif
+ return amdgpu_reset_method == AMD_RESET_METHOD_MODE2;
+}
+
+static struct amdgpu_reset_handler *
+sienna_cichlid_get_reset_handler(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_reset_handler *handler;
+ int i;
+
+ if (reset_context->method != AMD_RESET_METHOD_NONE) {
+ for_each_handler(i, handler, reset_ctl) {
+ if (handler->reset_method == reset_context->method)
+ return handler;
+ }
+ }
+
+ if (sienna_cichlid_is_mode2_default(reset_ctl)) {
+ for_each_handler(i, handler, reset_ctl) {
+ if (handler->reset_method == AMD_RESET_METHOD_MODE2)
+ return handler;
+ }
+ }
+
+ return NULL;
+}
+
+static int sienna_cichlid_mode2_suspend_ip(struct amdgpu_device *adev)
+{
+ int r, i;
+
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
+
+ for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+ if (!(adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_SDMA))
+ continue;
+
+ r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int
+sienna_cichlid_mode2_prepare_hwcontext(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ int r = 0;
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if (adev->gfxhub.funcs->mode2_save_regs)
+ adev->gfxhub.funcs->mode2_save_regs(adev);
+ if (adev->gfxhub.funcs->halt)
+ adev->gfxhub.funcs->halt(adev);
+ r = sienna_cichlid_mode2_suspend_ip(adev);
+ }
+
+ return r;
+}
+
+static void sienna_cichlid_async_reset(struct work_struct *work)
+{
+ struct amdgpu_reset_handler *handler;
+ struct amdgpu_reset_control *reset_ctl =
+ container_of(work, struct amdgpu_reset_control, reset_work);
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ int i;
+
+ for_each_handler(i, handler, reset_ctl) {
+ if (handler->reset_method == reset_ctl->active_reset) {
+ dev_dbg(adev->dev, "Resetting device\n");
+ handler->do_reset(adev);
+ break;
+ }
+ }
+}
+
+static int sienna_cichlid_mode2_reset(struct amdgpu_device *adev)
+{
+ /* disable BM */
+ pci_clear_master(adev->pdev);
+ return amdgpu_dpm_mode2_reset(adev);
+}
+
+static int
+sienna_cichlid_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ int r;
+
+ r = sienna_cichlid_mode2_reset(adev);
+ if (r) {
+ dev_err(adev->dev,
+ "ASIC reset failed with error, %d ", r);
+ }
+ return r;
+}
+
+static int sienna_cichlid_mode2_restore_ip(struct amdgpu_device *adev)
+{
+ int i, r;
+ struct psp_context *psp = &adev->psp;
+
+ r = psp_rlc_autoload_start(psp);
+ if (r) {
+ dev_err(adev->dev, "Failed to start rlc autoload\n");
+ return r;
+ }
+
+ /* Reinit GFXHUB */
+ if (adev->gfxhub.funcs->mode2_restore_regs)
+ adev->gfxhub.funcs->mode2_restore_regs(adev);
+ adev->gfxhub.funcs->init(adev);
+ r = adev->gfxhub.funcs->gart_enable(adev);
+ if (r) {
+ dev_err(adev->dev, "GFXHUB gart reenable failed after reset\n");
+ return r;
+ }
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
+ return r;
+ }
+ }
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!(adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_SDMA))
+ continue;
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!(adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_SDMA))
+ continue;
+
+ if (adev->ip_blocks[i].version->funcs->late_init) {
+ r = adev->ip_blocks[i].version->funcs->late_init(
+ &adev->ip_blocks[i]);
+ if (r) {
+ dev_err(adev->dev,
+ "late_init of IP block <%s> failed %d after reset\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
+ return r;
+ }
+ }
+ adev->ip_blocks[i].status.late_initialized = true;
+ }
+
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
+
+ return r;
+}
+
+static int
+sienna_cichlid_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ int r;
+ struct amdgpu_device *tmp_adev = (struct amdgpu_device *)reset_ctl->handle;
+
+ amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_RESET_RECOVERY);
+ dev_info(tmp_adev->dev,
+ "GPU reset succeeded, trying to resume\n");
+ r = sienna_cichlid_mode2_restore_ip(tmp_adev);
+ if (r)
+ goto end;
+
+ /*
+ * Add this ASIC as tracked as reset was already
+ * complete successfully.
+ */
+ amdgpu_register_gpu_instance(tmp_adev);
+
+ /* Resume RAS */
+ amdgpu_ras_resume(tmp_adev);
+
+ amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
+
+ amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT);
+ r = amdgpu_ib_ring_tests(tmp_adev);
+ if (r) {
+ dev_err(tmp_adev->dev,
+ "ib ring test failed (%d).\n", r);
+ r = -EAGAIN;
+ goto end;
+ }
+
+end:
+ if (r)
+ return -EAGAIN;
+ else
+ return r;
+}
+
+static struct amdgpu_reset_handler sienna_cichlid_mode2_handler = {
+ .reset_method = AMD_RESET_METHOD_MODE2,
+ .prepare_env = NULL,
+ .prepare_hwcontext = sienna_cichlid_mode2_prepare_hwcontext,
+ .perform_reset = sienna_cichlid_mode2_perform_reset,
+ .restore_hwcontext = sienna_cichlid_mode2_restore_hwcontext,
+ .restore_env = NULL,
+ .do_reset = sienna_cichlid_mode2_reset,
+};
+
+static struct amdgpu_reset_handler
+ *sienna_cichlid_rst_handlers[AMDGPU_RESET_MAX_HANDLERS] = {
+ &sienna_cichlid_mode2_handler,
+ };
+
+int sienna_cichlid_reset_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_reset_control *reset_ctl;
+
+ reset_ctl = kzalloc(sizeof(*reset_ctl), GFP_KERNEL);
+ if (!reset_ctl)
+ return -ENOMEM;
+
+ reset_ctl->handle = adev;
+ reset_ctl->async_reset = sienna_cichlid_async_reset;
+ reset_ctl->active_reset = AMD_RESET_METHOD_NONE;
+ reset_ctl->get_reset_handler = sienna_cichlid_get_reset_handler;
+
+ INIT_WORK(&reset_ctl->reset_work, reset_ctl->async_reset);
+ /* Only mode2 is handled through reset control now */
+ reset_ctl->reset_handlers = &sienna_cichlid_rst_handlers;
+ adev->reset_cntl = reset_ctl;
+
+ return 0;
+}
+
+int sienna_cichlid_reset_fini(struct amdgpu_device *adev)
+{
+ kfree(adev->reset_cntl);
+ adev->reset_cntl = NULL;
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.h b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.h
new file mode 100644
index 000000000000..5213b162dacd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SIENNA_CICHLID_H__
+#define __SIENNA_CICHLID_H__
+
+#include "amdgpu.h"
+
+int sienna_cichlid_reset_init(struct amdgpu_device *adev);
+int sienna_cichlid_reset_fini(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
index 73ffa8fde3df..68aef47254a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
@@ -26,6 +26,7 @@
#include "smu_v11_0_i2c.h"
#include "amdgpu.h"
+#include "amdgpu_dpm.h"
#include "soc15_common.h"
#include <drm/drm_fixed.h>
#include <drm/drm_drv.h>
@@ -43,11 +44,10 @@
#define I2C_X_RESTART BIT(31)
-#define to_amdgpu_device(x) (container_of(x, struct amdgpu_device, pm.smu_i2c))
-
static void smu_v11_0_i2c_set_clock_gating(struct i2c_adapter *control, bool en)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
uint32_t reg = RREG32_SOC15(SMUIO, 0, mmSMUIO_PWRMGT);
reg = REG_SET_FIELD(reg, SMUIO_PWRMGT, i2c_clk_gate_en, en ? 1 : 0);
@@ -75,7 +75,8 @@ static void smu_v11_0_i2c_set_clock_gating(struct i2c_adapter *control, bool en)
static int smu_v11_0_i2c_enable(struct i2c_adapter *control, bool enable)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE, enable ? 1 : 0);
@@ -100,7 +101,8 @@ static int smu_v11_0_i2c_enable(struct i2c_adapter *control, bool enable)
static void smu_v11_0_i2c_clear_status(struct i2c_adapter *control)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
/* do */
{
RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_CLR_INTR);
@@ -110,7 +112,8 @@ static void smu_v11_0_i2c_clear_status(struct i2c_adapter *control)
static void smu_v11_0_i2c_configure(struct i2c_adapter *control)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
uint32_t reg = 0;
reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_SLAVE_DISABLE, 1);
@@ -131,7 +134,8 @@ static void smu_v11_0_i2c_configure(struct i2c_adapter *control)
static void smu_v11_0_i2c_set_clock(struct i2c_adapter *control)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
/*
* Standard mode speed, These values are taken from SMUIO MAS,
@@ -154,7 +158,8 @@ static void smu_v11_0_i2c_set_clock(struct i2c_adapter *control)
static void smu_v11_0_i2c_set_address(struct i2c_adapter *control, u16 address)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
/* The IC_TAR::IC_TAR field is 10-bits wide.
* It takes a 7-bit or 10-bit addresses as an address,
@@ -165,7 +170,8 @@ static void smu_v11_0_i2c_set_address(struct i2c_adapter *control, u16 address)
static uint32_t smu_v11_0_i2c_poll_tx_status(struct i2c_adapter *control)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
uint32_t ret = I2C_OK;
uint32_t reg, reg_c_tx_abrt_source;
@@ -216,7 +222,8 @@ static uint32_t smu_v11_0_i2c_poll_tx_status(struct i2c_adapter *control)
static uint32_t smu_v11_0_i2c_poll_rx_status(struct i2c_adapter *control)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
uint32_t ret = I2C_OK;
uint32_t reg_ic_status, reg_c_tx_abrt_source;
@@ -262,7 +269,8 @@ static uint32_t smu_v11_0_i2c_transmit(struct i2c_adapter *control,
u16 address, u8 *data,
u32 numbytes, u32 i2c_flag)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
u32 bytes_sent, reg, ret = I2C_OK;
unsigned long timeout_counter;
@@ -360,7 +368,8 @@ static uint32_t smu_v11_0_i2c_receive(struct i2c_adapter *control,
u16 address, u8 *data,
u32 numbytes, u32 i2c_flag)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
uint32_t bytes_received, ret = I2C_OK;
bytes_received = 0;
@@ -431,7 +440,8 @@ static uint32_t smu_v11_0_i2c_receive(struct i2c_adapter *control,
static void smu_v11_0_i2c_abort(struct i2c_adapter *control)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
uint32_t reg = 0;
/* Enable I2C engine; */
@@ -447,7 +457,8 @@ static void smu_v11_0_i2c_abort(struct i2c_adapter *control)
static bool smu_v11_0_i2c_activity_done(struct i2c_adapter *control)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
const uint32_t IDLE_TIMEOUT = 1024;
uint32_t timeout_count = 0;
@@ -508,7 +519,8 @@ static void smu_v11_0_i2c_init(struct i2c_adapter *control)
static void smu_v11_0_i2c_fini(struct i2c_adapter *control)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
u32 status, enable, en_stat;
int res;
@@ -543,7 +555,8 @@ static void smu_v11_0_i2c_fini(struct i2c_adapter *control)
static bool smu_v11_0_i2c_bus_lock(struct i2c_adapter *control)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
/* Send PPSMC_MSG_RequestI2CBus */
if (!amdgpu_dpm_smu_i2c_bus_access(adev, true))
@@ -554,7 +567,8 @@ static bool smu_v11_0_i2c_bus_lock(struct i2c_adapter *control)
static bool smu_v11_0_i2c_bus_unlock(struct i2c_adapter *control)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
/* Send PPSMC_MSG_ReleaseI2CBus */
if (!amdgpu_dpm_smu_i2c_bus_access(adev, false))
@@ -587,16 +601,17 @@ static uint32_t smu_v11_0_i2c_write_data(struct i2c_adapter *control,
if (ret != I2C_OK)
DRM_ERROR("WriteI2CData() - I2C error occurred :%x", ret);
-
+
return ret;
}
static void lock_bus(struct i2c_adapter *i2c, unsigned int flags)
{
- struct amdgpu_device *adev = to_amdgpu_device(i2c);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(i2c);
+ struct amdgpu_device *adev = smu_i2c->adev;
- mutex_lock(&adev->pm.smu_i2c_mutex);
+ mutex_lock(&smu_i2c->mutex);
if (!smu_v11_0_i2c_bus_lock(i2c))
DRM_ERROR("Failed to lock the bus from SMU");
else
@@ -611,13 +626,14 @@ static int trylock_bus(struct i2c_adapter *i2c, unsigned int flags)
static void unlock_bus(struct i2c_adapter *i2c, unsigned int flags)
{
- struct amdgpu_device *adev = to_amdgpu_device(i2c);
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(i2c);
+ struct amdgpu_device *adev = smu_i2c->adev;
if (!smu_v11_0_i2c_bus_unlock(i2c))
DRM_ERROR("Failed to unlock the bus from SMU");
else
adev->pm.bus_locked = false;
- mutex_unlock(&adev->pm.smu_i2c_mutex);
+ mutex_unlock(&smu_i2c->mutex);
}
static const struct i2c_lock_operations smu_v11_0_i2c_i2c_lock_ops = {
@@ -706,30 +722,38 @@ static const struct i2c_adapter_quirks smu_v11_0_i2c_control_quirks = {
.flags = I2C_AQ_NO_ZERO_LEN,
};
-int smu_v11_0_i2c_control_init(struct i2c_adapter *control)
+int smu_v11_0_i2c_control_init(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[0];
+ struct i2c_adapter *control = &smu_i2c->adapter;
int res;
- mutex_init(&adev->pm.smu_i2c_mutex);
+ smu_i2c->adev = adev;
+ smu_i2c->port = 0;
+ mutex_init(&smu_i2c->mutex);
control->owner = THIS_MODULE;
control->class = I2C_CLASS_HWMON;
control->dev.parent = &adev->pdev->dev;
control->algo = &smu_v11_0_i2c_algo;
- snprintf(control->name, sizeof(control->name), "AMDGPU SMU");
+ snprintf(control->name, sizeof(control->name), "AMDGPU SMU 0");
control->lock_ops = &smu_v11_0_i2c_i2c_lock_ops;
control->quirks = &smu_v11_0_i2c_control_quirks;
+ i2c_set_adapdata(control, smu_i2c);
+
+ adev->pm.ras_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter;
+ adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter;
- res = i2c_add_adapter(control);
+ res = devm_i2c_add_adapter(adev->dev, control);
if (res)
DRM_ERROR("Failed to register hw i2c, err: %d\n", res);
return res;
}
-void smu_v11_0_i2c_control_fini(struct i2c_adapter *control)
+void smu_v11_0_i2c_control_fini(struct amdgpu_device *adev)
{
- i2c_del_adapter(control);
+ adev->pm.ras_eeprom_i2c_bus = NULL;
+ adev->pm.fru_eeprom_i2c_bus = NULL;
}
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h
index 44467c05f642..96ad14288a0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h
+++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h
@@ -26,9 +26,9 @@
#include <linux/types.h>
-struct i2c_adapter;
+struct amdgpu_device;
-int smu_v11_0_i2c_control_init(struct i2c_adapter *control);
-void smu_v11_0_i2c_control_fini(struct i2c_adapter *control);
+int smu_v11_0_i2c_control_init(struct amdgpu_device *adev);
+void smu_v11_0_i2c_control_fini(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c
new file mode 100644
index 000000000000..70569ea906bc
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c
@@ -0,0 +1,296 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "smu_v13_0_10.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_dpm.h"
+#include "amdgpu_job.h"
+#include "amdgpu_ring.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_psp.h"
+
+static bool smu_v13_0_10_is_mode2_default(struct amdgpu_reset_control *reset_ctl)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ if (adev->pm.fw_version >= 0x00502005 && !amdgpu_sriov_vf(adev))
+ return true;
+
+ return false;
+}
+
+static struct amdgpu_reset_handler *
+smu_v13_0_10_get_reset_handler(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_reset_handler *handler;
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ int i;
+
+ if (reset_context->method != AMD_RESET_METHOD_NONE) {
+ for_each_handler(i, handler, reset_ctl) {
+ if (handler->reset_method == reset_context->method)
+ return handler;
+ }
+ }
+
+ if (smu_v13_0_10_is_mode2_default(reset_ctl) &&
+ amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_MODE2) {
+ for_each_handler(i, handler, reset_ctl) {
+ if (handler->reset_method == AMD_RESET_METHOD_MODE2)
+ return handler;
+ }
+ }
+
+ return NULL;
+}
+
+static int smu_v13_0_10_mode2_suspend_ip(struct amdgpu_device *adev)
+{
+ int r, i;
+
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
+
+ for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+ if (!(adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_SDMA ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_MES))
+ continue;
+
+ r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int
+smu_v13_0_10_mode2_prepare_hwcontext(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ int r = 0;
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+
+ if (!amdgpu_sriov_vf(adev))
+ r = smu_v13_0_10_mode2_suspend_ip(adev);
+
+ return r;
+}
+
+static int smu_v13_0_10_mode2_reset(struct amdgpu_device *adev)
+{
+ return amdgpu_dpm_mode2_reset(adev);
+}
+
+static void smu_v13_0_10_async_reset(struct work_struct *work)
+{
+ struct amdgpu_reset_handler *handler;
+ struct amdgpu_reset_control *reset_ctl =
+ container_of(work, struct amdgpu_reset_control, reset_work);
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ int i;
+
+ for_each_handler(i, handler, reset_ctl) {
+ if (handler->reset_method == reset_ctl->active_reset) {
+ dev_dbg(adev->dev, "Resetting device\n");
+ handler->do_reset(adev);
+ break;
+ }
+ }
+}
+static int
+smu_v13_0_10_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ int r;
+
+ r = smu_v13_0_10_mode2_reset(adev);
+ if (r) {
+ dev_err(adev->dev,
+ "ASIC reset failed with error, %d ", r);
+ }
+ return r;
+}
+
+static int smu_v13_0_10_mode2_restore_ip(struct amdgpu_device *adev)
+{
+ int i, r;
+ struct psp_context *psp = &adev->psp;
+ struct amdgpu_firmware_info *ucode;
+ struct amdgpu_firmware_info *ucode_list[2];
+ int ucode_count = 0;
+
+ for (i = 0; i < adev->firmware.max_ucodes; i++) {
+ ucode = &adev->firmware.ucode[i];
+
+ switch (ucode->ucode_id) {
+ case AMDGPU_UCODE_ID_IMU_I:
+ case AMDGPU_UCODE_ID_IMU_D:
+ ucode_list[ucode_count++] = ucode;
+ break;
+ default:
+ break;
+ }
+ }
+
+ r = psp_load_fw_list(psp, ucode_list, ucode_count);
+ if (r) {
+ dev_err(adev->dev, "IMU ucode load failed after mode2 reset\n");
+ return r;
+ }
+
+ r = psp_rlc_autoload_start(psp);
+ if (r) {
+ DRM_ERROR("Failed to start rlc autoload after mode2 reset\n");
+ return r;
+ }
+
+ amdgpu_dpm_enable_gfx_features(adev);
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!(adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_MES ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_SDMA))
+ continue;
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!(adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_MES ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_SDMA))
+ continue;
+
+ if (adev->ip_blocks[i].version->funcs->late_init) {
+ r = adev->ip_blocks[i].version->funcs->late_init(
+ &adev->ip_blocks[i]);
+ if (r) {
+ dev_err(adev->dev,
+ "late_init of IP block <%s> failed %d after reset\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
+ return r;
+ }
+ }
+ adev->ip_blocks[i].status.late_initialized = true;
+ }
+
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
+
+ return r;
+}
+
+static int
+smu_v13_0_10_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ int r;
+ struct amdgpu_device *tmp_adev = (struct amdgpu_device *)reset_ctl->handle;
+
+ amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_RESET_RECOVERY);
+ dev_info(tmp_adev->dev,
+ "GPU reset succeeded, trying to resume\n");
+ r = smu_v13_0_10_mode2_restore_ip(tmp_adev);
+ if (r)
+ goto end;
+
+ amdgpu_register_gpu_instance(tmp_adev);
+
+ /* Resume RAS */
+ amdgpu_ras_resume(tmp_adev);
+
+ amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
+
+ amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT);
+ r = amdgpu_ib_ring_tests(tmp_adev);
+ if (r) {
+ dev_err(tmp_adev->dev,
+ "ib ring test failed (%d).\n", r);
+ r = -EAGAIN;
+ goto end;
+ }
+
+end:
+ if (r)
+ return -EAGAIN;
+ else
+ return r;
+}
+
+static struct amdgpu_reset_handler smu_v13_0_10_mode2_handler = {
+ .reset_method = AMD_RESET_METHOD_MODE2,
+ .prepare_env = NULL,
+ .prepare_hwcontext = smu_v13_0_10_mode2_prepare_hwcontext,
+ .perform_reset = smu_v13_0_10_mode2_perform_reset,
+ .restore_hwcontext = smu_v13_0_10_mode2_restore_hwcontext,
+ .restore_env = NULL,
+ .do_reset = smu_v13_0_10_mode2_reset,
+};
+
+static struct amdgpu_reset_handler
+ *smu_v13_0_10_rst_handlers[AMDGPU_RESET_MAX_HANDLERS] = {
+ &smu_v13_0_10_mode2_handler,
+ };
+
+int smu_v13_0_10_reset_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_reset_control *reset_ctl;
+
+ reset_ctl = kzalloc(sizeof(*reset_ctl), GFP_KERNEL);
+ if (!reset_ctl)
+ return -ENOMEM;
+
+ reset_ctl->handle = adev;
+ reset_ctl->async_reset = smu_v13_0_10_async_reset;
+ reset_ctl->active_reset = AMD_RESET_METHOD_NONE;
+ reset_ctl->get_reset_handler = smu_v13_0_10_get_reset_handler;
+
+ INIT_WORK(&reset_ctl->reset_work, reset_ctl->async_reset);
+ /* Only mode2 is handled through reset control now */
+ reset_ctl->reset_handlers = &smu_v13_0_10_rst_handlers;
+
+ adev->reset_cntl = reset_ctl;
+
+ return 0;
+}
+
+int smu_v13_0_10_reset_fini(struct amdgpu_device *adev)
+{
+ kfree(adev->reset_cntl);
+ adev->reset_cntl = NULL;
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.h b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.h
new file mode 100644
index 000000000000..e0cb72a0eec6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SMU_V13_0_10_H__
+#define __SMU_V13_0_10_H__
+
+#include "amdgpu.h"
+
+int smu_v13_0_10_reset_init(struct amdgpu_device *adev);
+int smu_v13_0_10_reset_fini(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c
index b6f1322f908c..acdc40f99ab3 100644
--- a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c
@@ -59,7 +59,7 @@ static void smuio_v11_0_update_rom_clock_gating(struct amdgpu_device *adev, bool
WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data);
}
-static void smuio_v11_0_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags)
+static void smuio_v11_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags)
{
u32 data;
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c
index 3a18dbb55c32..2afeb8b37f62 100644
--- a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c
@@ -56,7 +56,7 @@ static void smuio_v11_0_6_update_rom_clock_gating(struct amdgpu_device *adev, bo
WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data);
}
-static void smuio_v11_0_6_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags)
+static void smuio_v11_0_6_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags)
{
u32 data;
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c
index 39b7c206770f..bf8b8e5ddf5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c
@@ -58,7 +58,7 @@ static void smuio_v13_0_update_rom_clock_gating(struct amdgpu_device *adev, bool
WREG32_SOC15(SMUIO, 0, regCGTT_ROM_CLK_CTRL0, data);
}
-static void smuio_v13_0_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags)
+static void smuio_v13_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags)
{
u32 data;
@@ -128,6 +128,27 @@ static bool smuio_v13_0_is_host_gpu_xgmi_supported(struct amdgpu_device *adev)
return data ? true : false;
}
+static enum amdgpu_pkg_type smuio_v13_0_get_pkg_type(struct amdgpu_device *adev)
+{
+ enum amdgpu_pkg_type pkg_type;
+ u32 data;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ data = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, TOPOLOGY_ID);
+
+ switch (data) {
+ case 0x4:
+ case 0xC:
+ pkg_type = AMDGPU_PKG_TYPE_CEM;
+ break;
+ default:
+ pkg_type = AMDGPU_PKG_TYPE_OAM;
+ break;
+ }
+
+ return pkg_type;
+}
+
const struct amdgpu_smuio_funcs smuio_v13_0_funcs = {
.get_rom_index_offset = smuio_v13_0_get_rom_index_offset,
.get_rom_data_offset = smuio_v13_0_get_rom_data_offset,
@@ -136,4 +157,5 @@ const struct amdgpu_smuio_funcs smuio_v13_0_funcs = {
.is_host_gpu_xgmi_supported = smuio_v13_0_is_host_gpu_xgmi_supported,
.update_rom_clock_gating = smuio_v13_0_update_rom_clock_gating,
.get_clock_gating_state = smuio_v13_0_get_clock_gating_state,
+ .get_pkg_type = smuio_v13_0_get_pkg_type,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c
new file mode 100644
index 000000000000..5461b5289793
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "smuio_v13_0_3.h"
+#include "soc15_common.h"
+#include "smuio/smuio_13_0_3_offset.h"
+#include "smuio/smuio_13_0_3_sh_mask.h"
+
+#define PKG_TYPE_MASK 0x00000003L
+
+/**
+ * smuio_v13_0_3_get_die_id - query die id from FCH.
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns die id
+ */
+static u32 smuio_v13_0_3_get_die_id(struct amdgpu_device *adev)
+{
+ u32 data, die_id;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ die_id = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, DIE_ID);
+
+ return die_id;
+}
+
+/**
+ * smuio_v13_0_3_get_socket_id - query socket id from FCH
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns socket id
+ */
+static u32 smuio_v13_0_3_get_socket_id(struct amdgpu_device *adev)
+{
+ u32 data, socket_id;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ socket_id = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, SOCKET_ID);
+
+ return socket_id;
+}
+
+/**
+ * smuio_v13_0_3_get_pkg_type - query package type set by MP1/bootcode
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns package type
+ */
+
+static enum amdgpu_pkg_type smuio_v13_0_3_get_pkg_type(struct amdgpu_device *adev)
+{
+ enum amdgpu_pkg_type pkg_type;
+ u32 data;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ data = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, PKG_TYPE);
+ /* pkg_type[4:0]
+ *
+ * bit 1 == 1 APU form factor
+ *
+ * b0100 - b1111 - Reserved
+ */
+ switch (data & PKG_TYPE_MASK) {
+ case 0x0:
+ pkg_type = AMDGPU_PKG_TYPE_CEM;
+ break;
+ case 0x1:
+ pkg_type = AMDGPU_PKG_TYPE_OAM;
+ break;
+ case 0x2:
+ pkg_type = AMDGPU_PKG_TYPE_APU;
+ break;
+ default:
+ pkg_type = AMDGPU_PKG_TYPE_UNKNOWN;
+ break;
+ }
+
+ return pkg_type;
+}
+
+
+const struct amdgpu_smuio_funcs smuio_v13_0_3_funcs = {
+ .get_die_id = smuio_v13_0_3_get_die_id,
+ .get_socket_id = smuio_v13_0_3_get_socket_id,
+ .get_pkg_type = smuio_v13_0_3_get_pkg_type,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.h b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.h
new file mode 100644
index 000000000000..795f66c5a58b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SMUIO_V13_0_3_H__
+#define __SMUIO_V13_0_3_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_smuio_funcs smuio_v13_0_3_funcs;
+
+#endif /* __SMUIO_V13_0_3_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_6.c b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_6.c
new file mode 100644
index 000000000000..de998e328b08
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_6.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "smuio_v13_0_6.h"
+#include "smuio/smuio_13_0_6_offset.h"
+#include "smuio/smuio_13_0_6_sh_mask.h"
+
+static u32 smuio_v13_0_6_get_rom_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(SMUIO, 0, regROM_INDEX);
+}
+
+static u32 smuio_v13_0_6_get_rom_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(SMUIO, 0, regROM_DATA);
+}
+
+const struct amdgpu_smuio_funcs smuio_v13_0_6_funcs = {
+ .get_rom_index_offset = smuio_v13_0_6_get_rom_index_offset,
+ .get_rom_data_offset = smuio_v13_0_6_get_rom_data_offset,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_6.h b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_6.h
new file mode 100644
index 000000000000..c75621de5ab5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_6.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SMUIO_V13_0_6_H__
+#define __SMUIO_V13_0_6_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_smuio_funcs smuio_v13_0_6_funcs;
+
+#endif /* __SMUIO_V13_0_6_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c
new file mode 100644
index 000000000000..2a51a70d4846
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "smuio_v14_0_2.h"
+#include "smuio/smuio_14_0_2_offset.h"
+#include "smuio/smuio_14_0_2_sh_mask.h"
+#include <linux/preempt.h>
+
+static u32 smuio_v14_0_2_get_rom_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(SMUIO, 0, regROM_INDEX);
+}
+
+static u32 smuio_v14_0_2_get_rom_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(SMUIO, 0, regROM_DATA);
+}
+
+static u64 smuio_v14_0_2_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+ u64 clock;
+ u64 clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after;
+
+ preempt_disable();
+ clock_counter_hi_pre = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
+ clock_counter_lo = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
+ /* the clock counter may be udpated during polling the counters */
+ clock_counter_hi_after = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
+ if (clock_counter_hi_pre != clock_counter_hi_after)
+ clock_counter_lo = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
+ preempt_enable();
+
+ clock = clock_counter_lo | (clock_counter_hi_after << 32ULL);
+
+ return clock;
+}
+
+const struct amdgpu_smuio_funcs smuio_v14_0_2_funcs = {
+ .get_rom_index_offset = smuio_v14_0_2_get_rom_index_offset,
+ .get_rom_data_offset = smuio_v14_0_2_get_rom_data_offset,
+ .get_gpu_clock_counter = smuio_v14_0_2_get_gpu_clock_counter,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h
new file mode 100644
index 000000000000..6e617f832d90
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SMUIO_V14_0_2_H__
+#define __SMUIO_V14_0_2_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_smuio_funcs smuio_v14_0_2_funcs;
+
+#endif /* __SMUIO_V14_0_2_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c
index 8417890af227..c04fdd2d5b38 100644
--- a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c
@@ -56,11 +56,11 @@ static void smuio_v9_0_update_rom_clock_gating(struct amdgpu_device *adev, bool
WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data);
}
-static void smuio_v9_0_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags)
+static void smuio_v9_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags)
{
u32 data;
- /* CGTT_ROM_CLK_CTRL0 is not availabe for APUs */
+ /* CGTT_ROM_CLK_CTRL0 is not available for APUs */
if (adev->flags & AMD_IS_APU)
return;
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index de9b55383e9f..42f5d9c0e3af 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -28,7 +28,6 @@
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "amdgpu_ih.h"
#include "amdgpu_uvd.h"
#include "amdgpu_vce.h"
@@ -90,8 +89,8 @@ static const struct amd_ip_funcs soc15_common_ip_funcs;
/* Vega, Raven, Arcturus */
static const struct amdgpu_video_codec_info vega_video_codecs_encode_array[] =
{
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 0)},
};
static const struct amdgpu_video_codecs vega_video_codecs_encode =
@@ -103,12 +102,11 @@ static const struct amdgpu_video_codecs vega_video_codecs_encode =
/* Vega */
static const struct amdgpu_video_codec_info vega_video_codecs_decode_array[] =
{
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
};
static const struct amdgpu_video_codecs vega_video_codecs_decode =
@@ -120,12 +118,12 @@ static const struct amdgpu_video_codecs vega_video_codecs_decode =
/* Raven */
static const struct amdgpu_video_codec_info rv_video_codecs_decode_array[] =
{
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 8192, 8192, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 4096, 4096, 0)},
};
@@ -138,12 +136,12 @@ static const struct amdgpu_video_codecs rv_video_codecs_decode =
/* Renoir, Arcturus */
static const struct amdgpu_video_codec_info rn_video_codecs_decode_array[] =
{
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
};
@@ -153,11 +151,47 @@ static const struct amdgpu_video_codecs rn_video_codecs_decode =
.codec_array = rn_video_codecs_decode_array,
};
+static const struct amdgpu_video_codec_info vcn_4_0_3_video_codecs_decode_array[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs vcn_4_0_3_video_codecs_decode = {
+ .codec_count = ARRAY_SIZE(vcn_4_0_3_video_codecs_decode_array),
+ .codec_array = vcn_4_0_3_video_codecs_decode_array,
+};
+
+static const struct amdgpu_video_codecs vcn_4_0_3_video_codecs_encode = {
+ .codec_count = 0,
+ .codec_array = NULL,
+};
+
+static const struct amdgpu_video_codecs vcn_5_0_1_video_codecs_encode_vcn0 = {
+ .codec_count = 0,
+ .codec_array = NULL,
+};
+
+static const struct amdgpu_video_codec_info vcn_5_0_1_video_codecs_decode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs vcn_5_0_1_video_codecs_decode_vcn0 = {
+ .codec_count = ARRAY_SIZE(vcn_5_0_1_video_codecs_decode_array_vcn0),
+ .codec_array = vcn_5_0_1_video_codecs_decode_array_vcn0,
+};
+
static int soc15_query_video_codecs(struct amdgpu_device *adev, bool encode,
const struct amdgpu_video_codecs **codecs)
{
- if (adev->ip_versions[VCE_HWIP][0]) {
- switch (adev->ip_versions[VCE_HWIP][0]) {
+ if (amdgpu_ip_version(adev, VCE_HWIP, 0)) {
+ switch (amdgpu_ip_version(adev, VCE_HWIP, 0)) {
case IP_VERSION(4, 0, 0):
case IP_VERSION(4, 1, 0):
if (encode)
@@ -169,7 +203,7 @@ static int soc15_query_video_codecs(struct amdgpu_device *adev, bool encode,
return -EINVAL;
}
} else {
- switch (adev->ip_versions[UVD_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
case IP_VERSION(1, 0, 0):
case IP_VERSION(1, 0, 1):
if (encode)
@@ -185,53 +219,24 @@ static int soc15_query_video_codecs(struct amdgpu_device *adev, bool encode,
else
*codecs = &rn_video_codecs_decode;
return 0;
+ case IP_VERSION(4, 0, 3):
+ if (encode)
+ *codecs = &vcn_4_0_3_video_codecs_encode;
+ else
+ *codecs = &vcn_4_0_3_video_codecs_decode;
+ return 0;
+ case IP_VERSION(5, 0, 1):
+ if (encode)
+ *codecs = &vcn_5_0_1_video_codecs_encode_vcn0;
+ else
+ *codecs = &vcn_5_0_1_video_codecs_decode_vcn0;
+ return 0;
default:
return -EINVAL;
}
}
}
-/*
- * Indirect registers accessor
- */
-static u32 soc15_pcie_rreg(struct amdgpu_device *adev, u32 reg)
-{
- unsigned long address, data;
- address = adev->nbio.funcs->get_pcie_index_offset(adev);
- data = adev->nbio.funcs->get_pcie_data_offset(adev);
-
- return amdgpu_device_indirect_rreg(adev, address, data, reg);
-}
-
-static void soc15_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
-{
- unsigned long address, data;
-
- address = adev->nbio.funcs->get_pcie_index_offset(adev);
- data = adev->nbio.funcs->get_pcie_data_offset(adev);
-
- amdgpu_device_indirect_wreg(adev, address, data, reg, v);
-}
-
-static u64 soc15_pcie_rreg64(struct amdgpu_device *adev, u32 reg)
-{
- unsigned long address, data;
- address = adev->nbio.funcs->get_pcie_index_offset(adev);
- data = adev->nbio.funcs->get_pcie_data_offset(adev);
-
- return amdgpu_device_indirect_rreg64(adev, address, data, reg);
-}
-
-static void soc15_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v)
-{
- unsigned long address, data;
-
- address = adev->nbio.funcs->get_pcie_index_offset(adev);
- data = adev->nbio.funcs->get_pcie_data_offset(adev);
-
- amdgpu_device_indirect_wreg64(adev, address, data, reg, v);
-}
-
static u32 soc15_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg)
{
unsigned long flags, address, data;
@@ -341,11 +346,14 @@ static u32 soc15_get_xclk(struct amdgpu_device *adev)
{
u32 reference_clock = adev->clock.spll.reference_freq;
- if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 0) ||
- adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 1))
+ if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(12, 0, 0) ||
+ amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(12, 0, 1) ||
+ amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) ||
+ amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 14))
return 10000;
- if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(10, 0, 0) ||
- adev->ip_versions[MP1_HWIP][0] == IP_VERSION(10, 0, 1))
+ if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(10, 0, 0) ||
+ amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(10, 0, 1))
return reference_clock / 4;
return reference_clock;
@@ -353,7 +361,7 @@ static u32 soc15_get_xclk(struct amdgpu_device *adev)
void soc15_grbm_select(struct amdgpu_device *adev,
- u32 me, u32 pipe, u32 queue, u32 vmid)
+ u32 me, u32 pipe, u32 queue, u32 vmid, int xcc_id)
{
u32 grbm_gfx_cntl = 0;
grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, PIPEID, pipe);
@@ -361,12 +369,7 @@ void soc15_grbm_select(struct amdgpu_device *adev,
grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid);
grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue);
- WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_CNTL, grbm_gfx_cntl);
-}
-
-static void soc15_vga_set_state(struct amdgpu_device *adev, bool state)
-{
- /* todo */
+ WREG32_SOC15_RLC_SHADOW(GC, xcc_id, mmGRBM_GFX_CNTL, grbm_gfx_cntl);
}
static bool soc15_read_disabled_bios(struct amdgpu_device *adev)
@@ -375,39 +378,6 @@ static bool soc15_read_disabled_bios(struct amdgpu_device *adev)
return false;
}
-static bool soc15_read_bios_from_rom(struct amdgpu_device *adev,
- u8 *bios, u32 length_bytes)
-{
- u32 *dw_ptr;
- u32 i, length_dw;
- uint32_t rom_index_offset;
- uint32_t rom_data_offset;
-
- if (bios == NULL)
- return false;
- if (length_bytes == 0)
- return false;
- /* APU vbios image is part of sbios image */
- if (adev->flags & AMD_IS_APU)
- return false;
-
- dw_ptr = (u32 *)bios;
- length_dw = ALIGN(length_bytes, 4) / 4;
-
- rom_index_offset =
- adev->smuio.funcs->get_rom_index_offset(adev);
- rom_data_offset =
- adev->smuio.funcs->get_rom_data_offset(adev);
-
- /* set rom index to 0 */
- WREG32(rom_index_offset, 0);
- /* read out the rom data */
- for (i = 0; i < length_dw; i++)
- dw_ptr[i] = RREG32(rom_data_offset);
-
- return true;
-}
-
static struct soc15_allowed_register_entry soc15_allowed_read_registers[] = {
{ SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS)},
{ SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS2)},
@@ -438,12 +408,12 @@ static uint32_t soc15_read_indexed_register(struct amdgpu_device *adev, u32 se_n
mutex_lock(&adev->grbm_idx_mutex);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
val = RREG32(reg_offset);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
return val;
}
@@ -472,8 +442,9 @@ static int soc15_read_register(struct amdgpu_device *adev, u32 se_num,
*value = 0;
for (i = 0; i < ARRAY_SIZE(soc15_allowed_read_registers); i++) {
en = &soc15_allowed_read_registers[i];
- if (adev->reg_offset[en->hwip][en->inst] &&
- reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg]
+ if (!adev->reg_offset[en->hwip][en->inst])
+ continue;
+ else if (reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg]
+ en->reg_offset))
continue;
@@ -555,7 +526,7 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev)
static enum amd_reset_method
soc15_asic_reset_method(struct amdgpu_device *adev)
{
- bool baco_reset = false;
+ int baco_reset = 0;
bool connected_to_cpu = false;
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
@@ -576,7 +547,7 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
amdgpu_reset_method);
- switch (adev->ip_versions[MP1_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
case IP_VERSION(10, 0, 0):
case IP_VERSION(10, 0, 1):
case IP_VERSION(12, 0, 0):
@@ -593,7 +564,7 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
*/
if (ras && adev->ras_enabled &&
adev->pm.fw_version <= 0x283400)
- baco_reset = false;
+ baco_reset = 0;
} else {
baco_reset = amdgpu_dpm_is_baco_supported(adev);
}
@@ -606,6 +577,21 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
if (connected_to_cpu)
return AMD_RESET_METHOD_MODE2;
break;
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 14):
+ case IP_VERSION(13, 0, 12):
+ /* Use gpu_recovery param to target a reset method.
+ * Enable triggering of GPU reset only if specified
+ * by module parameter.
+ */
+ if (adev->pcie_reset_ctx.in_link_reset)
+ return AMD_RESET_METHOD_LINK;
+ if (amdgpu_gpu_recovery == 4 || amdgpu_gpu_recovery == 5)
+ return AMD_RESET_METHOD_MODE2;
+ else if (!(adev->flags & AMD_IS_APU))
+ return AMD_RESET_METHOD_MODE1;
+ else
+ return AMD_RESET_METHOD_MODE2;
default:
break;
}
@@ -616,13 +602,36 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
return AMD_RESET_METHOD_MODE1;
}
+static bool soc15_need_reset_on_resume(struct amdgpu_device *adev)
+{
+ /* Will reset for the following suspend abort cases.
+ * 1) S3 suspend aborted in the normal S3 suspend
+ * 2) S3 suspend aborted in performing pm core test.
+ */
+ if (adev->in_s3 && !pm_resume_via_firmware())
+ return true;
+ else
+ return false;
+}
+
static int soc15_asic_reset(struct amdgpu_device *adev)
{
/* original raven doesn't have full asic reset */
- if ((adev->apu_flags & AMD_APU_IS_RAVEN) &&
- !(adev->apu_flags & AMD_APU_IS_RAVEN2))
+ /* On the latest Raven, the GPU reset can be performed
+ * successfully. So now, temporarily enable it for the
+ * S3 suspend abort case.
+ */
+
+ if ((adev->apu_flags & AMD_APU_IS_PICASSO ||
+ !(adev->apu_flags & AMD_APU_IS_RAVEN)) &&
+ soc15_need_reset_on_resume(adev))
+ goto asic_reset;
+
+ if ((adev->apu_flags & AMD_APU_IS_RAVEN) ||
+ (adev->apu_flags & AMD_APU_IS_RAVEN2))
return 0;
+asic_reset:
switch (soc15_asic_reset_method(adev)) {
case AMD_RESET_METHOD_PCI:
dev_info(adev->dev, "PCI reset\n");
@@ -633,27 +642,30 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
case AMD_RESET_METHOD_MODE2:
dev_info(adev->dev, "MODE2 reset\n");
return amdgpu_dpm_mode2_reset(adev);
+ case AMD_RESET_METHOD_LINK:
+ dev_info(adev->dev, "Link reset\n");
+ return amdgpu_device_link_reset(adev);
default:
dev_info(adev->dev, "MODE1 reset\n");
return amdgpu_device_mode1_reset(adev);
}
}
-static bool soc15_supports_baco(struct amdgpu_device *adev)
+static int soc15_supports_baco(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[MP1_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
case IP_VERSION(9, 0, 0):
case IP_VERSION(11, 0, 2):
if (adev->asic_type == CHIP_VEGA20) {
if (adev->psp.sos.fw_version >= 0x80067)
return amdgpu_dpm_is_baco_supported(adev);
- return false;
+ return 0;
} else {
return amdgpu_dpm_is_baco_supported(adev);
}
break;
default:
- return false;
+ return 0;
}
}
@@ -683,41 +695,15 @@ static int soc15_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk
return 0;
}
-static void soc15_pcie_gen3_enable(struct amdgpu_device *adev)
-{
- if (pci_is_root_bus(adev->pdev->bus))
- return;
-
- if (amdgpu_pcie_gen2 == 0)
- return;
-
- if (adev->flags & AMD_IS_APU)
- return;
-
- if (!(adev->pm.pcie_gen_mask & (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
- CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)))
- return;
-
- /* todo */
-}
-
static void soc15_program_aspm(struct amdgpu_device *adev)
{
- if (!amdgpu_aspm)
+ if (!amdgpu_device_should_use_aspm(adev))
return;
- if (!(adev->flags & AMD_IS_APU) &&
- (adev->nbio.funcs->program_aspm))
+ if (adev->nbio.funcs->program_aspm)
adev->nbio.funcs->program_aspm(adev);
}
-static void soc15_enable_doorbell_aperture(struct amdgpu_device *adev,
- bool enable)
-{
- adev->nbio.funcs->enable_doorbell_aperture(adev, enable);
- adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable);
-}
-
const struct amdgpu_ip_block_version vega10_common_ip_block =
{
.type = AMD_IP_BLOCK_TYPE_COMMON,
@@ -727,32 +713,14 @@ const struct amdgpu_ip_block_version vega10_common_ip_block =
.funcs = &soc15_common_ip_funcs,
};
-static uint32_t soc15_get_rev_id(struct amdgpu_device *adev)
-{
- return adev->nbio.funcs->get_rev_id(adev);
-}
-
static void soc15_reg_base_init(struct amdgpu_device *adev)
{
- int r;
-
/* Set IP register base before any HW register access */
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_RAVEN:
- vega10_reg_base_init(adev);
- break;
case CHIP_RENOIR:
- /* It's safe to do ip discovery here for Renior,
- * it doesn't support SRIOV. */
- if (amdgpu_discovery) {
- r = amdgpu_discovery_reg_base_init(adev);
- if (r == 0)
- break;
- DRM_WARN("failed to init reg base from ip discovery table, "
- "fallback to legacy init method\n");
- }
vega10_reg_base_init(adev);
break;
case CHIP_VEGA20:
@@ -773,7 +741,6 @@ static void soc15_reg_base_init(struct amdgpu_device *adev)
void soc15_set_virt_ops(struct amdgpu_device *adev)
{
adev->virt.ops = &xgpu_ai_virt_ops;
-
/* init soc15 reg base early enough so we can
* request request full access for sriov before
* set_ip_blocks. */
@@ -886,6 +853,10 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev)
{
u32 sol_reg;
+ if (amdgpu_gmc_need_reset_on_init(adev))
+ return true;
+ if (amdgpu_psp_tos_reload_needed(adev))
+ return true;
/* Just return false for soc15 GPUs. Reset does not seem to
* be necessary.
*/
@@ -925,11 +896,10 @@ static void soc15_pre_asic_init(struct amdgpu_device *adev)
static const struct amdgpu_asic_funcs soc15_asic_funcs =
{
.read_disabled_bios = &soc15_read_disabled_bios,
- .read_bios_from_rom = &soc15_read_bios_from_rom,
+ .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom,
.read_register = &soc15_read_register,
.reset = &soc15_asic_reset,
.reset_method = &soc15_asic_reset_method,
- .set_vga_state = &soc15_vga_set_state,
.get_xclk = &soc15_get_xclk,
.set_uvd_clocks = &soc15_set_uvd_clocks,
.set_vce_clocks = &soc15_set_vce_clocks,
@@ -947,11 +917,10 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =
static const struct amdgpu_asic_funcs vega20_asic_funcs =
{
.read_disabled_bios = &soc15_read_disabled_bios,
- .read_bios_from_rom = &soc15_read_bios_from_rom,
+ .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom,
.read_register = &soc15_read_register,
.reset = &soc15_asic_reset,
.reset_method = &soc15_asic_reset_method,
- .set_vga_state = &soc15_vga_set_state,
.get_xclk = &soc15_get_xclk,
.set_uvd_clocks = &soc15_set_uvd_clocks,
.set_vce_clocks = &soc15_set_vce_clocks,
@@ -966,21 +935,43 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =
.query_video_codecs = &soc15_query_video_codecs,
};
-static int soc15_common_early_init(void *handle)
+static const struct amdgpu_asic_funcs aqua_vanjaram_asic_funcs =
{
-#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ .read_disabled_bios = &soc15_read_disabled_bios,
+ .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom,
+ .read_register = &soc15_read_register,
+ .reset = &soc15_asic_reset,
+ .reset_method = &soc15_asic_reset_method,
+ .get_xclk = &soc15_get_xclk,
+ .set_uvd_clocks = &soc15_set_uvd_clocks,
+ .set_vce_clocks = &soc15_set_vce_clocks,
+ .get_config_memsize = &soc15_get_config_memsize,
+ .need_full_reset = &soc15_need_full_reset,
+ .init_doorbell_index = &aqua_vanjaram_doorbell_index_init,
+ .need_reset_on_init = &soc15_need_reset_on_init,
+ .get_pcie_replay_count = &amdgpu_nbio_get_pcie_replay_count,
+ .supports_baco = &soc15_supports_baco,
+ .pre_asic_init = &soc15_pre_asic_init,
+ .query_video_codecs = &soc15_query_video_codecs,
+ .encode_ext_smn_addressing = &aqua_vanjaram_encode_ext_smn_addressing,
+ .get_reg_state = &aqua_vanjaram_get_reg_state,
+};
- if (!amdgpu_sriov_vf(adev)) {
- adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
- adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
- }
+static int soc15_common_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->nbio.funcs->set_reg_remap(adev);
adev->smc_rreg = NULL;
adev->smc_wreg = NULL;
- adev->pcie_rreg = &soc15_pcie_rreg;
- adev->pcie_wreg = &soc15_pcie_wreg;
- adev->pcie_rreg64 = &soc15_pcie_rreg64;
- adev->pcie_wreg64 = &soc15_pcie_wreg64;
+ adev->pcie_rreg = &amdgpu_device_indirect_rreg;
+ adev->pcie_wreg = &amdgpu_device_indirect_wreg;
+ adev->pcie_rreg_ext = &amdgpu_device_indirect_rreg_ext;
+ adev->pcie_wreg_ext = &amdgpu_device_indirect_wreg_ext;
+ adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64;
+ adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64;
+ adev->pcie_rreg64_ext = &amdgpu_device_indirect_rreg64_ext;
+ adev->pcie_wreg64_ext = &amdgpu_device_indirect_wreg64_ext;
adev->uvd_ctx_rreg = &soc15_uvd_ctx_rreg;
adev->uvd_ctx_wreg = &soc15_uvd_ctx_wreg;
adev->didt_rreg = &soc15_didt_rreg;
@@ -990,12 +981,12 @@ static int soc15_common_early_init(void *handle)
adev->se_cac_rreg = &soc15_se_cac_rreg;
adev->se_cac_wreg = &soc15_se_cac_wreg;
- adev->rev_id = soc15_get_rev_id(adev);
+ adev->rev_id = amdgpu_device_get_rev_id(adev);
adev->external_rev_id = 0xFF;
/* TODO: split the GC and PG flags based on the relevant IP version for which
* they are relevant.
*/
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
adev->asic_funcs = &soc15_asic_funcs;
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
@@ -1114,8 +1105,11 @@ static int soc15_common_early_init(void *handle)
AMD_CG_SUPPORT_SDMA_LS |
AMD_CG_SUPPORT_VCN_MGCG;
+ /*
+ * MMHUB PG needs to be disabled for Picasso for
+ * stability reasons.
+ */
adev->pg_flags = AMD_PG_SUPPORT_SDMA |
- AMD_PG_SUPPORT_MMHUB |
AMD_PG_SUPPORT_VCN;
} else {
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
@@ -1203,6 +1197,25 @@ static int soc15_common_early_init(void *handle)
adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG;
adev->external_rev_id = adev->rev_id + 0x3c;
break;
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
+ adev->asic_funcs = &aqua_vanjaram_asic_funcs;
+ adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS | AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_GFX_FGCG | AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_IH_CG;
+ adev->pg_flags =
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG;
+ /*TODO: need a new external_rev_id for GC 9.4.4? */
+ adev->external_rev_id = adev->rev_id + 0x46;
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
+ adev->external_rev_id = adev->rev_id + 0x50;
+ break;
default:
/* FIXME: not supported yet */
return -EINVAL;
@@ -1216,69 +1229,63 @@ static int soc15_common_early_init(void *handle)
return 0;
}
-static int soc15_common_late_init(void *handle)
+static int soc15_common_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int r = 0;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
xgpu_ai_mailbox_get_irq(adev);
- if (adev->nbio.ras_funcs &&
- adev->nbio.ras_funcs->ras_late_init)
- r = adev->nbio.ras_funcs->ras_late_init(adev);
+ /* Enable selfring doorbell aperture late because doorbell BAR
+ * aperture will change if resize BAR successfully in gmc sw_init.
+ */
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, true);
- return r;
+ return 0;
}
-static int soc15_common_sw_init(void *handle)
+static int soc15_common_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
xgpu_ai_mailbox_add_irq_id(adev);
- adev->df.funcs->sw_init(adev);
+ if (adev->df.funcs &&
+ adev->df.funcs->sw_init)
+ adev->df.funcs->sw_init(adev);
return 0;
}
-static int soc15_common_sw_fini(void *handle)
+static int soc15_common_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- if (adev->nbio.ras_funcs &&
- adev->nbio.ras_funcs->ras_fini)
- adev->nbio.ras_funcs->ras_fini(adev);
- adev->df.funcs->sw_fini(adev);
+ if (adev->df.funcs &&
+ adev->df.funcs->sw_fini)
+ adev->df.funcs->sw_fini(adev);
return 0;
}
-static void soc15_doorbell_range_init(struct amdgpu_device *adev)
+static void soc15_sdma_doorbell_range_init(struct amdgpu_device *adev)
{
int i;
- struct amdgpu_ring *ring;
- /* sdma/ih doorbell range are programed by hypervisor */
+ /* sdma doorbell range is programed by hypervisor */
if (!amdgpu_sriov_vf(adev)) {
for (i = 0; i < adev->sdma.num_instances; i++) {
- ring = &adev->sdma.instance[i].ring;
adev->nbio.funcs->sdma_doorbell_range(adev, i,
- ring->use_doorbell, ring->doorbell_index,
+ true, adev->doorbell_index.sdma_engine[i] << 1,
adev->doorbell_index.sdma_doorbell_range);
}
-
- adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
- adev->irq.ih.doorbell_index);
}
}
-static int soc15_common_hw_init(void *handle)
+static int soc15_common_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- /* enable pcie gen2/3 link */
- soc15_pcie_gen3_enable(adev);
/* enable aspm */
soc15_program_aspm(adev);
/* setup nbio registers */
@@ -1291,68 +1298,74 @@ static int soc15_common_hw_init(void *handle)
adev->nbio.funcs->remap_hdp_registers(adev);
/* enable the doorbell aperture */
- soc15_enable_doorbell_aperture(adev, true);
+ adev->nbio.funcs->enable_doorbell_aperture(adev, true);
+
/* HW doorbell routing policy: doorbell writing not
* in SDMA/IH/MM/ACV range will be routed to CP. So
- * we need to init SDMA/IH/MM/ACV doorbell range prior
- * to CP ip block init and ring test.
+ * we need to init SDMA doorbell range prior
+ * to CP ip block init and ring test. IH already
+ * happens before CP.
*/
- soc15_doorbell_range_init(adev);
+ soc15_sdma_doorbell_range_init(adev);
return 0;
}
-static int soc15_common_hw_fini(void *handle)
+static int soc15_common_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /* Disable the doorbell aperture and selfring doorbell aperture
+ * separately in hw_fini because soc15_enable_doorbell_aperture
+ * has been removed and there is no need to delay disabling
+ * selfring doorbell.
+ */
+ adev->nbio.funcs->enable_doorbell_aperture(adev, false);
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false);
- /* disable the doorbell aperture */
- soc15_enable_doorbell_aperture(adev, false);
if (amdgpu_sriov_vf(adev))
xgpu_ai_mailbox_put_irq(adev);
- if (adev->nbio.ras_if &&
+ /*
+ * For minimal init, late_init is not called, hence RAS irqs are not
+ * enabled.
+ */
+ if ((!amdgpu_sriov_vf(adev)) &&
+ (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) &&
+ adev->nbio.ras_if &&
amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) {
- if (adev->nbio.ras_funcs &&
- adev->nbio.ras_funcs->init_ras_controller_interrupt)
+ if (adev->nbio.ras &&
+ adev->nbio.ras->init_ras_controller_interrupt)
amdgpu_irq_put(adev, &adev->nbio.ras_controller_irq, 0);
- if (adev->nbio.ras_funcs &&
- adev->nbio.ras_funcs->init_ras_err_event_athub_interrupt)
+ if (adev->nbio.ras &&
+ adev->nbio.ras->init_ras_err_event_athub_interrupt)
amdgpu_irq_put(adev, &adev->nbio.ras_err_event_athub_irq, 0);
}
return 0;
}
-static int soc15_common_suspend(void *handle)
+static int soc15_common_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return soc15_common_hw_fini(adev);
+ return soc15_common_hw_fini(ip_block);
}
-static int soc15_common_resume(void *handle)
+static int soc15_common_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- return soc15_common_hw_init(adev);
+ if (soc15_need_reset_on_resume(adev)) {
+ dev_info(adev->dev, "S3 suspend abort case, let's reset ASIC.\n");
+ soc15_asic_reset(adev);
+ }
+ return soc15_common_hw_init(ip_block);
}
-static bool soc15_common_is_idle(void *handle)
+static bool soc15_common_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int soc15_common_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int soc15_common_soft_reset(void *handle)
-{
- return 0;
-}
-
static void soc15_update_drm_clock_gating(struct amdgpu_device *adev, bool enable)
{
uint32_t def, data;
@@ -1397,15 +1410,15 @@ static void soc15_update_drm_light_sleep(struct amdgpu_device *adev, bool enable
WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_LIGHT_SLEEP_CTRL), data);
}
-static int soc15_common_set_clockgating_state(void *handle,
+static int soc15_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[NBIO_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
case IP_VERSION(6, 1, 0):
case IP_VERSION(6, 2, 0):
case IP_VERSION(7, 4, 0):
@@ -1449,20 +1462,24 @@ static int soc15_common_set_clockgating_state(void *handle,
return 0;
}
-static void soc15_common_get_clockgating_state(void *handle, u32 *flags)
+static void soc15_common_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
*flags = 0;
- adev->nbio.funcs->get_clockgating_state(adev, flags);
-
- adev->hdp.funcs->get_clock_gating_state(adev, flags);
+ if (adev->nbio.funcs && adev->nbio.funcs->get_clockgating_state)
+ adev->nbio.funcs->get_clockgating_state(adev, flags);
- if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(13, 0, 2)) {
+ if (adev->hdp.funcs && adev->hdp.funcs->get_clock_gating_state)
+ adev->hdp.funcs->get_clock_gating_state(adev, flags);
+ if ((amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) &&
+ (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) &&
+ (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 12)) &&
+ (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 14))) {
/* AMD_CG_SUPPORT_DRM_MGCG */
data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_CGTT_CTRL0));
if (!(data & 0x01000000))
@@ -1475,12 +1492,14 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags)
}
/* AMD_CG_SUPPORT_ROM_MGCG */
- adev->smuio.funcs->get_clock_gating_state(adev, flags);
+ if (adev->smuio.funcs && adev->smuio.funcs->get_clock_gating_state)
+ adev->smuio.funcs->get_clock_gating_state(adev, flags);
- adev->df.funcs->get_clockgating_state(adev, flags);
+ if (adev->df.funcs && adev->df.funcs->get_clockgating_state)
+ adev->df.funcs->get_clockgating_state(adev, flags);
}
-static int soc15_common_set_powergating_state(void *handle,
+static int soc15_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* todo */
@@ -1498,8 +1517,6 @@ static const struct amd_ip_funcs soc15_common_ip_funcs = {
.suspend = soc15_common_suspend,
.resume = soc15_common_resume,
.is_idle = soc15_common_is_idle,
- .wait_for_idle = soc15_common_wait_for_idle,
- .soft_reset = soc15_common_soft_reset,
.set_clockgating_state = soc15_common_set_clockgating_state,
.set_powergating_state = soc15_common_set_powergating_state,
.get_clockgating_state= soc15_common_get_clockgating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index efc2a253e8db..c8ac11a9cdef 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -27,6 +27,7 @@
#include "nbio_v6_1.h"
#include "nbio_v7_0.h"
#include "nbio_v7_4.h"
+#include "amdgpu_reg_state.h"
extern const struct amdgpu_ip_block_version vega10_common_ip_block;
@@ -87,9 +88,15 @@ struct soc15_ras_field_entry {
};
#define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg
+#define SOC15_REG_ENTRY_STR(ip, inst, reg) \
+ { ip##_HWIP, inst, reg##_BASE_IDX, reg, #reg }
#define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset)
+/* Over ride the instance id */
+#define SOC15_REG_ENTRY_OFFSET_INST(entry, inst) \
+ (adev->reg_offset[entry.hwip][inst][entry.seg] + entry.reg_offset)
+
#define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \
{ ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask }
@@ -100,7 +107,7 @@ struct soc15_ras_field_entry {
#define SOC15_RAS_REG_FIELD_VAL(val, entry, field) SOC15_REG_FIELD_VAL((val), (entry).field##_count_mask, (entry).field##_count_shift)
void soc15_grbm_select(struct amdgpu_device *adev,
- u32 me, u32 pipe, u32 queue, u32 vmid);
+ u32 me, u32 pipe, u32 queue, u32 vmid, int xcc_id);
void soc15_set_virt_ops(struct amdgpu_device *adev);
void soc15_program_register_sequence(struct amdgpu_device *adev,
@@ -111,7 +118,13 @@ int vega10_reg_base_init(struct amdgpu_device *adev);
int vega20_reg_base_init(struct amdgpu_device *adev);
int arct_reg_base_init(struct amdgpu_device *adev);
int aldebaran_reg_base_init(struct amdgpu_device *adev);
+u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id);
+int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev);
+ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev,
+ enum amdgpu_reg_state reg_state, void *buf,
+ size_t max_size);
void vega10_doorbell_index_init(struct amdgpu_device *adev);
void vega20_doorbell_index_init(struct amdgpu_device *adev);
+void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 8a9ca87d8663..242b24f73c17 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -24,92 +24,100 @@
#ifndef __SOC15_COMMON_H__
#define __SOC15_COMMON_H__
+/* GET_INST returns the physical instance corresponding to a logical instance */
+#define GET_INST(ip, inst) \
+ (adev->ip_map.logical_to_dev_inst ? \
+ adev->ip_map.logical_to_dev_inst(adev, ip##_HWIP, inst) : inst)
+#define GET_MASK(ip, mask) \
+ (adev->ip_map.logical_to_dev_mask ? \
+ adev->ip_map.logical_to_dev_mask(adev, ip##_HWIP, mask) : mask)
+
/* Register Access Macros */
#define SOC15_REG_OFFSET(ip, inst, reg) (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg)
+#define SOC15_REG_OFFSET1(ip, inst, reg, offset) \
+ (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + (reg)+(offset))
-#define __WREG32_SOC15_RLC__(reg, value, flag, hwip) \
- ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.funcs->sriov_wreg) ? \
- adev->gfx.rlc.funcs->sriov_wreg(adev, reg, value, flag, hwip) : \
+#define __WREG32_SOC15_RLC__(reg, value, flag, hwip, inst) \
+ ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.rlcg_reg_access_supported) ? \
+ amdgpu_sriov_wreg(adev, reg, value, flag, hwip, inst) : \
WREG32(reg, value))
-#define __RREG32_SOC15_RLC__(reg, flag, hwip) \
- ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.funcs->sriov_rreg) ? \
- adev->gfx.rlc.funcs->sriov_rreg(adev, reg, flag, hwip) : \
+#define __RREG32_SOC15_RLC__(reg, flag, hwip, inst) \
+ ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.rlcg_reg_access_supported) ? \
+ amdgpu_sriov_rreg(adev, reg, flag, hwip, inst) : \
RREG32(reg))
#define WREG32_FIELD15(ip, idx, reg, field, val) \
__WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg, \
(__RREG32_SOC15_RLC__( \
adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg, \
- 0, ip##_HWIP) & \
+ 0, ip##_HWIP, idx) & \
~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field), \
- 0, ip##_HWIP)
+ 0, ip##_HWIP, idx)
+
+#define WREG32_FIELD15_PREREG(ip, idx, reg_name, field, val) \
+ __WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][idx][reg##reg_name##_BASE_IDX] + reg##reg_name, \
+ (__RREG32_SOC15_RLC__( \
+ adev->reg_offset[ip##_HWIP][idx][reg##reg_name##_BASE_IDX] + reg##reg_name, \
+ 0, ip##_HWIP, idx) & \
+ ~REG_FIELD_MASK(reg_name, field)) | (val) << REG_FIELD_SHIFT(reg_name, field), \
+ 0, ip##_HWIP, idx)
#define RREG32_SOC15(ip, inst, reg) \
__RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \
- 0, ip##_HWIP)
+ 0, ip##_HWIP, inst)
+
+#define RREG32_SOC15_IP(ip, reg) __RREG32_SOC15_RLC__(reg, 0, ip##_HWIP, 0)
-#define RREG32_SOC15_IP(ip, reg) __RREG32_SOC15_RLC__(reg, 0, ip##_HWIP)
+#define RREG32_SOC15_IP_NO_KIQ(ip, reg, inst) __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst)
#define RREG32_SOC15_NO_KIQ(ip, inst, reg) \
__RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \
- AMDGPU_REGS_NO_KIQ, ip##_HWIP)
+ AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst)
#define RREG32_SOC15_OFFSET(ip, inst, reg, offset) \
- __RREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, 0, ip##_HWIP)
+ __RREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + (reg)) + \
+ (offset), 0, ip##_HWIP, inst)
#define WREG32_SOC15(ip, inst, reg, value) \
__WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), \
- value, 0, ip##_HWIP)
+ value, 0, ip##_HWIP, inst)
#define WREG32_SOC15_IP(ip, reg, value) \
- __WREG32_SOC15_RLC__(reg, value, 0, ip##_HWIP)
+ __WREG32_SOC15_RLC__(reg, value, 0, ip##_HWIP, 0)
+
+#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value, inst) \
+ __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst)
#define WREG32_SOC15_NO_KIQ(ip, inst, reg, value) \
__WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \
- value, AMDGPU_REGS_NO_KIQ, ip##_HWIP)
+ value, AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst)
#define WREG32_SOC15_OFFSET(ip, inst, reg, offset, value) \
__WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, \
- value, 0, ip##_HWIP)
+ value, 0, ip##_HWIP, inst)
-#define SOC15_WAIT_ON_RREG(ip, inst, reg, expected_value, mask) \
-({ int ret = 0; \
- do { \
- uint32_t old_ = 0; \
- uint32_t tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \
- uint32_t loop = adev->usec_timeout; \
- ret = 0; \
- while ((tmp_ & (mask)) != (expected_value)) { \
- if (old_ != tmp_) { \
- loop = adev->usec_timeout; \
- old_ = tmp_; \
- } else \
- udelay(1); \
- tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \
- loop--; \
- if (!loop) { \
- DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08x\n", \
- inst, #reg, (unsigned)expected_value, (unsigned)(tmp_ & (mask))); \
- ret = -ETIMEDOUT; \
- break; \
- } \
- } \
- } while (0); \
- ret; \
-})
+#define SOC15_WAIT_ON_RREG(ip, inst, reg, expected_value, mask) \
+ amdgpu_device_wait_on_rreg(adev, inst, \
+ (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + (reg)), \
+ #reg, expected_value, mask)
+
+#define SOC15_WAIT_ON_RREG_OFFSET(ip, inst, reg, offset, expected_value, mask) \
+ amdgpu_device_wait_on_rreg(adev, inst, \
+ (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + (reg) + (offset)), \
+ #reg, expected_value, mask)
#define WREG32_RLC(reg, value) \
- __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_RLC, GC_HWIP)
+ __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_RLC, GC_HWIP, 0)
-#define WREG32_RLC_EX(prefix, reg, value) \
+#define WREG32_RLC_EX(prefix, reg, value, inst) \
do { \
if (amdgpu_sriov_fullaccess(adev)) { \
uint32_t i = 0; \
uint32_t retries = 50000; \
- uint32_t r0 = adev->reg_offset[GC_HWIP][0][prefix##SCRATCH_REG0_BASE_IDX] + prefix##SCRATCH_REG0; \
- uint32_t r1 = adev->reg_offset[GC_HWIP][0][prefix##SCRATCH_REG1_BASE_IDX] + prefix##SCRATCH_REG1; \
- uint32_t spare_int = adev->reg_offset[GC_HWIP][0][prefix##RLC_SPARE_INT_BASE_IDX] + prefix##RLC_SPARE_INT; \
+ uint32_t r0 = adev->reg_offset[GC_HWIP][inst][prefix##SCRATCH_REG0_BASE_IDX] + prefix##SCRATCH_REG0; \
+ uint32_t r1 = adev->reg_offset[GC_HWIP][inst][prefix##SCRATCH_REG1_BASE_IDX] + prefix##SCRATCH_REG1; \
+ uint32_t spare_int = adev->reg_offset[GC_HWIP][inst][prefix##RLC_SPARE_INT_BASE_IDX] + prefix##RLC_SPARE_INT; \
WREG32(r0, value); \
WREG32(r1, (reg | 0x80000000)); \
WREG32(spare_int, 0x1); \
@@ -128,26 +136,26 @@
/* shadow the registers in the callback function */
#define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \
- __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), value, AMDGPU_REGS_RLC, GC_HWIP)
+ __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), value, AMDGPU_REGS_RLC, GC_HWIP, inst)
/* for GC only */
#define RREG32_RLC(reg) \
- __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_RLC, GC_HWIP)
+ __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_RLC, GC_HWIP, 0)
#define WREG32_RLC_NO_KIQ(reg, value, hwip) \
- __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ | AMDGPU_REGS_RLC, hwip)
+ __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ | AMDGPU_REGS_RLC, hwip, 0)
#define RREG32_RLC_NO_KIQ(reg, hwip) \
- __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_NO_KIQ | AMDGPU_REGS_RLC, hwip)
+ __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_NO_KIQ | AMDGPU_REGS_RLC, hwip, 0)
#define WREG32_SOC15_RLC_SHADOW_EX(prefix, ip, inst, reg, value) \
do { \
uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\
if (amdgpu_sriov_fullaccess(adev)) { \
- uint32_t r2 = adev->reg_offset[GC_HWIP][0][prefix##SCRATCH_REG1_BASE_IDX] + prefix##SCRATCH_REG2; \
- uint32_t r3 = adev->reg_offset[GC_HWIP][0][prefix##SCRATCH_REG1_BASE_IDX] + prefix##SCRATCH_REG3; \
- uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][prefix##GRBM_GFX_CNTL_BASE_IDX] + prefix##GRBM_GFX_CNTL; \
- uint32_t grbm_idx = adev->reg_offset[GC_HWIP][0][prefix##GRBM_GFX_INDEX_BASE_IDX] + prefix##GRBM_GFX_INDEX; \
+ uint32_t r2 = adev->reg_offset[GC_HWIP][inst][prefix##SCRATCH_REG1_BASE_IDX] + prefix##SCRATCH_REG2; \
+ uint32_t r3 = adev->reg_offset[GC_HWIP][inst][prefix##SCRATCH_REG1_BASE_IDX] + prefix##SCRATCH_REG3; \
+ uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][inst][prefix##GRBM_GFX_CNTL_BASE_IDX] + prefix##GRBM_GFX_CNTL; \
+ uint32_t grbm_idx = adev->reg_offset[GC_HWIP][inst][prefix##GRBM_GFX_INDEX_BASE_IDX] + prefix##GRBM_GFX_INDEX; \
if (target_reg == grbm_cntl) \
WREG32(r2, value); \
else if (target_reg == grbm_idx) \
@@ -159,31 +167,47 @@
} while (0)
#define RREG32_SOC15_RLC(ip, inst, reg) \
- __RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, AMDGPU_REGS_RLC, ip##_HWIP)
+ __RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, AMDGPU_REGS_RLC, ip##_HWIP, inst)
#define WREG32_SOC15_RLC(ip, inst, reg, value) \
do { \
- uint32_t target_reg = adev->reg_offset[ip##_HWIP][0][reg##_BASE_IDX] + reg;\
- __WREG32_SOC15_RLC__(target_reg, value, AMDGPU_REGS_RLC, ip##_HWIP); \
+ uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\
+ __WREG32_SOC15_RLC__(target_reg, value, AMDGPU_REGS_RLC, ip##_HWIP, inst); \
} while (0)
#define WREG32_SOC15_RLC_EX(prefix, ip, inst, reg, value) \
do { \
- uint32_t target_reg = adev->reg_offset[GC_HWIP][0][reg##_BASE_IDX] + reg;\
- WREG32_RLC_EX(prefix, target_reg, value); \
+ uint32_t target_reg = adev->reg_offset[GC_HWIP][inst][reg##_BASE_IDX] + reg;\
+ WREG32_RLC_EX(prefix, target_reg, value, inst); \
} while (0)
#define WREG32_FIELD15_RLC(ip, idx, reg, field, val) \
__WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg), \
(__RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg, \
- AMDGPU_REGS_RLC, ip##_HWIP) & \
+ AMDGPU_REGS_RLC, ip##_HWIP, idx) & \
~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field), \
- AMDGPU_REGS_RLC, ip##_HWIP)
+ AMDGPU_REGS_RLC, ip##_HWIP, idx)
#define WREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset, value) \
- __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, value, AMDGPU_REGS_RLC, ip##_HWIP)
+ __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, value, AMDGPU_REGS_RLC, ip##_HWIP, inst)
#define RREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset) \
- __RREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, AMDGPU_REGS_RLC, ip##_HWIP)
+ __RREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, AMDGPU_REGS_RLC, ip##_HWIP, inst)
+
+/* inst equals to ext for some IPs */
+#define RREG32_SOC15_EXT(ip, inst, reg, ext) \
+ RREG32_PCIE_EXT((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) * 4 \
+ + adev->asic_funcs->encode_ext_smn_addressing(ext)) \
+
+#define WREG32_SOC15_EXT(ip, inst, reg, ext, value) \
+ WREG32_PCIE_EXT((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) * 4 \
+ + adev->asic_funcs->encode_ext_smn_addressing(ext), \
+ value) \
+
+#define RREG64_MCA(ext, mca_base, idx) \
+ RREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) + mca_base + (idx * 8))
+
+#define WREG64_MCA(ext, mca_base, idx, val) \
+ WREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) + mca_base + (idx * 8), val)
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h
index 799925d22fc8..cf93fa477674 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15d.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
@@ -76,6 +76,12 @@
((cond & 0xF) << 24) | \
((type & 0xF) << 28))
+#define CP_PACKETJ_NOP 0x60000000
+#define CP_PACKETJ_GET_REG(x) ((x) & 0x3FFFF)
+#define CP_PACKETJ_GET_RES(x) (((x) >> 18) & 0x3F)
+#define CP_PACKETJ_GET_COND(x) (((x) >> 24) & 0xF)
+#define CP_PACKETJ_GET_TYPE(x) (((x) >> 28) & 0xF)
+
/* Packet 3 types */
#define PACKET3_NOP 0x10
#define PACKET3_SET_BASE 0x11
@@ -87,11 +93,25 @@
#define PACKET3_DISPATCH_INDIRECT 0x16
#define PACKET3_ATOMIC_GDS 0x1D
#define PACKET3_ATOMIC_MEM 0x1E
+#define PACKET3_ATOMIC_MEM__ATOMIC(x) ((((unsigned)(x)) & 0x3F) << 0)
+#define PACKET3_ATOMIC_MEM__COMMAND(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_ATOMIC_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_ATOMIC_MEM__ADDR_LO(x) (((unsigned)(x)) << 0)
+#define PACKET3_ATOMIC_MEM__ADDR_HI(x) (((unsigned)(x)) << 0)
+#define PACKET3_ATOMIC_MEM__SRC_DATA_LO(x) (((unsigned)(x)) << 0)
+#define PACKET3_ATOMIC_MEM__SRC_DATA_HI(x) (((unsigned)(x)) << 0)
+#define PACKET3_ATOMIC_MEM__CMP_DATA_LO(x) (((unsigned)(x)) << 0)
+#define PACKET3_ATOMIC_MEM__CMP_DATA_HI(x) (((unsigned)(x)) << 0)
+#define PACKET3_ATOMIC_MEM__LOOP_INTERVAL(x) ((((unsigned)(x)) & 0x1FFF) << 0)
+#define PACKET3_ATOMIC_MEM__COMMAND__SINGLE_PASS_ATOMIC 0
+#define PACKET3_ATOMIC_MEM__COMMAND__LOOP_UNTIL_COMPARE_SATISFIED 1
#define PACKET3_OCCLUSION_QUERY 0x1F
#define PACKET3_SET_PREDICATION 0x20
#define PACKET3_REG_RMW 0x21
#define PACKET3_COND_EXEC 0x22
#define PACKET3_PRED_EXEC 0x23
+#define PACKET3_PRED_EXEC__EXEC_COUNT(x) ((((unsigned)(x)) & 0x3FFF) << 0)
+#define PACKET3_PRED_EXEC__VIRTUAL_XCC_ID_SELECT(x) ((((unsigned)(x)) & 0xFF) << 24)
#define PACKET3_DRAW_INDIRECT 0x24
#define PACKET3_DRAW_INDEX_INDIRECT 0x25
#define PACKET3_INDEX_BASE 0x26
@@ -126,6 +146,28 @@
* 1 - pfp
* 2 - ce
*/
+#define PACKET3_WRITE_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_WRITE_DATA__ADDR_INCR(x) ((((unsigned)(x)) & 0x1) << 16)
+#define PACKET3_WRITE_DATA__RESUME_VF_MI300(x) ((((unsigned)(x)) & 0x1) << 19)
+#define PACKET3_WRITE_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20)
+#define PACKET3_WRITE_DATA__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_WRITE_DATA__DST_MMREG_ADDR(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_WRITE_DATA__DST_GDS_ADDR(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_WRITE_DATA__DST_MEM_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_WRITE_DATA__DST_MEM_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_WRITE_DATA__DST_SEL__MEM_MAPPED_REGISTER 0
+#define PACKET3_WRITE_DATA__DST_SEL__TC_L2 2
+#define PACKET3_WRITE_DATA__DST_SEL__GDS 3
+#define PACKET3_WRITE_DATA__DST_SEL__MEMORY 5
+#define PACKET3_WRITE_DATA__DST_SEL__MEMORY_MAPPED_ADC_PERSISTENT_STATE 6
+#define PACKET3_WRITE_DATA__ADDR_INCR__INCREMENT_ADDRESS 0
+#define PACKET3_WRITE_DATA__ADDR_INCR__DO_NOT_INCREMENT_ADDRESS 1
+#define PACKET3_WRITE_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_WRITE_CONFIRMATION 0
+#define PACKET3_WRITE_DATA__WR_CONFIRM__WAIT_FOR_WRITE_CONFIRMATION 1
+#define PACKET3_WRITE_DATA__CACHE_POLICY__LRU 0
+#define PACKET3_WRITE_DATA__CACHE_POLICY__STREAM 1
+#define PACKET3_WRITE_DATA__CACHE_POLICY__NOA 2
+#define PACKET3_WRITE_DATA__CACHE_POLICY__BYPASS 3
#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
#define PACKET3_MEM_SEMAPHORE 0x39
# define PACKET3_SEM_USE_MAILBOX (0x1 << 16)
@@ -154,6 +196,33 @@
/* 0 - me
* 1 - pfp
*/
+#define PACKET3_WAIT_REG_MEM__FUNCTION(x) ((((unsigned)(x)) & 0x7) << 0)
+#define PACKET3_WAIT_REG_MEM__MEM_SPACE(x) ((((unsigned)(x)) & 0x3) << 4)
+#define PACKET3_WAIT_REG_MEM__OPERATION(x) ((((unsigned)(x)) & 0x3) << 6)
+#define PACKET3_WAIT_REG_MEM__MES_INTR_PIPE(x) ((((unsigned)(x)) & 0x3) << 22)
+#define PACKET3_WAIT_REG_MEM__MES_ACTION(x) ((((unsigned)(x)) & 0x1) << 24)
+#define PACKET3_WAIT_REG_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_WAIT_REG_MEM__REG_POLL_ADDR(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR1(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR2(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__REFERENCE(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__MASK(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__OPTIMIZE_ACE_OFFLOAD_MODE(x) ((((unsigned)(x)) & 0x1) << 31)
+#define PACKET3_WAIT_REG_MEM__FUNCTION__ALWAYS_PASS 0
+#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_REF_VALUE 1
+#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_EQUAL_TO_THE_REF_VALUE 2
+#define PACKET3_WAIT_REG_MEM__FUNCTION__EQUAL_TO_THE_REFERENCE_VALUE 3
+#define PACKET3_WAIT_REG_MEM__FUNCTION__NOT_EQUAL_REFERENCE_VALUE 4
+#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_OR_EQUAL_REFERENCE_VALUE 5
+#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_REFERENCE_VALUE 6
+#define PACKET3_WAIT_REG_MEM__MEM_SPACE__REGISTER_SPACE 0
+#define PACKET3_WAIT_REG_MEM__MEM_SPACE__MEMORY_SPACE 1
+#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_REG_MEM 0
+#define PACKET3_WAIT_REG_MEM__OPERATION__WR_WAIT_WR_REG 1
+#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_MEM_PREEMPTABLE 3
#define PACKET3_INDIRECT_BUFFER 0x3F
#define INDIRECT_BUFFER_VALID (1 << 23)
#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28)
@@ -162,7 +231,64 @@
* 2 - Bypass
*/
#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21)
+#define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30)
+#define PACKET3_INDIRECT_BUFFER__IB_BASE_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_INDIRECT_BUFFER__IB_BASE_HI(x) ((unsigned)(x))
+#define PACKET3_INDIRECT_BUFFER__IB_SIZE(x) ((((unsigned)(x)) & 0xFFFFF) << 0)
+#define PACKET3_INDIRECT_BUFFER__CHAIN(x) ((((unsigned)(x)) & 0x1) << 20)
+#define PACKET3_INDIRECT_BUFFER__OFFLOAD_POLLING(x) ((((unsigned)(x)) & 0x1) << 21)
+#define PACKET3_INDIRECT_BUFFER__VALID(x) ((((unsigned)(x)) & 0x1) << 23)
+#define PACKET3_INDIRECT_BUFFER__VMID(x) ((((unsigned)(x)) & 0xF) << 24)
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 28)
+#define PACKET3_INDIRECT_BUFFER__PRIV(x) ((((unsigned)(x)) & 0x1) << 31)
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__LRU 0
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__STREAM 1
#define PACKET3_COPY_DATA 0x40
+#define PACKET3_COPY_DATA__SRC_SEL(x) ((((unsigned)(x)) & 0xF) << 0)
+#define PACKET3_COPY_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 13)
+#define PACKET3_COPY_DATA__COUNT_SEL(x) ((((unsigned)(x)) & 0x1) << 16)
+#define PACKET3_COPY_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20)
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_COPY_DATA__PQ_EXE_STATUS(x) ((((unsigned)(x)) & 0x1) << 29)
+#define PACKET3_COPY_DATA__SRC_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_COPY_DATA__SRC_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_COPY_DATA__SRC_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_COPY_DATA__SRC_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_COPY_DATA__IMM_DATA(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_MEMTC_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_IMM_DATA(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__DST_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_COPY_DATA__DST_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_COPY_DATA__DST_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_COPY_DATA__DST_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_COPY_DATA__DST_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_SEL__MEM_MAPPED_REGISTER 0
+#define PACKET3_COPY_DATA__SRC_SEL__MEMORY 1
+#define PACKET3_COPY_DATA__SRC_SEL__TC_L2 2
+#define PACKET3_COPY_DATA__SRC_SEL__GDS 3
+#define PACKET3_COPY_DATA__SRC_SEL__PERFCOUNTERS 4
+#define PACKET3_COPY_DATA__SRC_SEL__IMMEDIATE_DATA 5
+#define PACKET3_COPY_DATA__SRC_SEL__ATOMIC_RETURN_DATA 6
+#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA0 7
+#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA1 8
+#define PACKET3_COPY_DATA__SRC_SEL__GPU_CLOCK_COUNT 9
+#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REGISTER 0
+#define PACKET3_COPY_DATA__DST_SEL__TC_L2 2
+#define PACKET3_COPY_DATA__DST_SEL__GDS 3
+#define PACKET3_COPY_DATA__DST_SEL__PERFCOUNTERS 4
+#define PACKET3_COPY_DATA__DST_SEL__MEMORY 5
+#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REG_DC 6
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__LRU 0
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__STREAM 1
+#define PACKET3_COPY_DATA__COUNT_SEL__32_BITS_OF_DATA 0
+#define PACKET3_COPY_DATA__COUNT_SEL__64_BITS_OF_DATA 1
+#define PACKET3_COPY_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_CONFIRMATION 0
+#define PACKET3_COPY_DATA__WR_CONFIRM__WAIT_FOR_CONFIRMATION 1
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY__LRU 0
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY__STREAM 1
+#define PACKET3_COPY_DATA__PQ_EXE_STATUS__DEFAULT 0
+#define PACKET3_COPY_DATA__PQ_EXE_STATUS__PHASE_UPDATE 1
#define PACKET3_PFP_SYNC_ME 0x42
#define PACKET3_COND_WRITE 0x45
#define PACKET3_EVENT_WRITE 0x46
@@ -174,6 +300,15 @@
* 3 - SAMPLE_STREAMOUTSTAT*
* 4 - *S_PARTIAL_FLUSH
*/
+#define PACKET3_EVENT_WRITE__EVENT_TYPE(x) ((((unsigned)(x)) & 0x3F) << 0)
+#define PACKET3_EVENT_WRITE__EVENT_INDEX(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_EVENT_WRITE__OFFLOAD_ENABLE(x) ((((unsigned)(x)) & 0x1) << 31)
+#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE(x) ((((unsigned)(x)) & 0x3) << 29)
+#define PACKET3_EVENT_WRITE__ADDRESS_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_EVENT_WRITE__ADDRESS_HI(x) (((unsigned)(x)) << 0)
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__OTHER 0
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_PIPELINESTATS 2
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__CS_PARTIAL_FLUSH 4
#define PACKET3_RELEASE_MEM 0x49
#define EVENT_TYPE(x) ((x) << 0)
#define EVENT_INDEX(x) ((x) << 8)
@@ -184,6 +319,7 @@
#define EOP_TC_ACTION_EN (1 << 17) /* L2 */
#define EOP_TC_NC_ACTION_EN (1 << 19)
#define EOP_TC_MD_ACTION_EN (1 << 21) /* L2 metadata */
+#define EOP_EXEC (1 << 28) /* For Trailing Fence */
#define DATA_SEL(x) ((x) << 29)
/* 0 - discard
@@ -278,6 +414,13 @@
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(x) ((x) << 29)
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_WB_ACTION_ENA(x) ((x) << 30)
#define PACKET3_REWIND 0x59
+#define PACKET3_ACQUIRE_MEM__COHER_SIZE(x) ((unsigned)(x))
+#define PACKET3_ACQUIRE_MEM__COHER_SIZE_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
+#define PACKET3_ACQUIRE_MEM__COHER_SIZE_HI_VG10(x) ((((unsigned)(x)) & 0xFFFFFF) << 0)
+#define PACKET3_ACQUIRE_MEM__COHER_BASE_LO(x) ((unsigned)(x))
+#define PACKET3_ACQUIRE_MEM__COHER_BASE_HI(x) ((((unsigned)(x)) & 0xFFFFFF) << 0)
+#define PACKET3_ACQUIRE_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_ACQUIRE_MEM__GCR_CNTL(x) ((((unsigned)(x)) & 0x7FF) << 0)
#define PACKET3_LOAD_UCONFIG_REG 0x5E
#define PACKET3_LOAD_SH_REG 0x5F
#define PACKET3_LOAD_CONFIG_REG 0x60
@@ -292,12 +435,16 @@
#define PACKET3_SET_SH_REG 0x76
#define PACKET3_SET_SH_REG_START 0x00002c00
#define PACKET3_SET_SH_REG_END 0x00003000
+#define PACKET3_SET_SH_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_SET_SH_REG__VMID_SHIFT(x) ((((unsigned)(x)) & 0x1F) << 23)
+#define PACKET3_SET_SH_REG__INDEX(x) ((((unsigned)(x)) & 0xF) << 28)
#define PACKET3_SET_SH_REG_OFFSET 0x77
#define PACKET3_SET_QUEUE_REG 0x78
#define PACKET3_SET_UCONFIG_REG 0x79
#define PACKET3_SET_UCONFIG_REG_START 0x0000c000
#define PACKET3_SET_UCONFIG_REG_END 0x0000c400
#define PACKET3_SET_UCONFIG_REG_INDEX_TYPE (2 << 28)
+#define PACKET3_SET_UCONFIG_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0)
#define PACKET3_SCRATCH_RAM_WRITE 0x7D
#define PACKET3_SCRATCH_RAM_READ 0x7E
#define PACKET3_LOAD_CONST_RAM 0x80
@@ -405,6 +552,15 @@
# define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2)
# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25)
+#define PACKET3_RUN_CLEANER_SHADER_9_0 0xD7
+/* 1. header
+ * 2. RESERVED [31:0]
+ */
+
+#define PACKET3_RUN_CLEANER_SHADER 0xD2
+/* 1. header
+ * 2. RESERVED [31:0]
+ */
#define VCE_CMD_NO_OP 0x00000000
#define VCE_CMD_END 0x00000001
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
new file mode 100644
index 000000000000..ad36c96478a8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -0,0 +1,1021 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_atombios.h"
+#include "amdgpu_ih.h"
+#include "amdgpu_uvd.h"
+#include "amdgpu_vce.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_smu.h"
+#include "atom.h"
+#include "amd_pcie.h"
+
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "mp/mp_13_0_0_offset.h"
+
+#include "soc15.h"
+#include "soc15_common.h"
+#include "soc21.h"
+#include "mxgpu_nv.h"
+
+static const struct amd_ip_funcs soc21_common_ip_funcs;
+
+/* SOC21 */
+static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn1[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn0 = {
+ .codec_count = ARRAY_SIZE(vcn_4_0_0_video_codecs_encode_array_vcn0),
+ .codec_array = vcn_4_0_0_video_codecs_encode_array_vcn0,
+};
+
+static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn1 = {
+ .codec_count = ARRAY_SIZE(vcn_4_0_0_video_codecs_encode_array_vcn1),
+ .codec_array = vcn_4_0_0_video_codecs_encode_array_vcn1,
+};
+
+static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_vcn1[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_decode_vcn0 = {
+ .codec_count = ARRAY_SIZE(vcn_4_0_0_video_codecs_decode_array_vcn0),
+ .codec_array = vcn_4_0_0_video_codecs_decode_array_vcn0,
+};
+
+static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_decode_vcn1 = {
+ .codec_count = ARRAY_SIZE(vcn_4_0_0_video_codecs_decode_array_vcn1),
+ .codec_array = vcn_4_0_0_video_codecs_decode_array_vcn1,
+};
+
+/* SRIOV SOC21, not const since data is controlled by host */
+static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_encode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_encode_array_vcn1[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
+};
+
+static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_encode_vcn0 = {
+ .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn0),
+ .codec_array = sriov_vcn_4_0_0_video_codecs_encode_array_vcn0,
+};
+
+static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_encode_vcn1 = {
+ .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn1),
+ .codec_array = sriov_vcn_4_0_0_video_codecs_encode_array_vcn1,
+};
+
+static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn1[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+};
+
+static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_decode_vcn0 = {
+ .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn0),
+ .codec_array = sriov_vcn_4_0_0_video_codecs_decode_array_vcn0,
+};
+
+static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_decode_vcn1 = {
+ .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn1),
+ .codec_array = sriov_vcn_4_0_0_video_codecs_decode_array_vcn1,
+};
+
+static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode,
+ const struct amdgpu_video_codecs **codecs)
+{
+ if (adev->vcn.num_vcn_inst == hweight8(adev->vcn.harvest_config))
+ return -EINVAL;
+
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
+ case IP_VERSION(4, 0, 0):
+ case IP_VERSION(4, 0, 2):
+ case IP_VERSION(4, 0, 4):
+ case IP_VERSION(4, 0, 5):
+ if (amdgpu_sriov_vf(adev)) {
+ if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) ||
+ !amdgpu_sriov_is_av1_support(adev)) {
+ if (encode)
+ *codecs = &sriov_vcn_4_0_0_video_codecs_encode_vcn1;
+ else
+ *codecs = &sriov_vcn_4_0_0_video_codecs_decode_vcn1;
+ } else {
+ if (encode)
+ *codecs = &sriov_vcn_4_0_0_video_codecs_encode_vcn0;
+ else
+ *codecs = &sriov_vcn_4_0_0_video_codecs_decode_vcn0;
+ }
+ } else {
+ if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0)) {
+ if (encode)
+ *codecs = &vcn_4_0_0_video_codecs_encode_vcn1;
+ else
+ *codecs = &vcn_4_0_0_video_codecs_decode_vcn1;
+ } else {
+ if (encode)
+ *codecs = &vcn_4_0_0_video_codecs_encode_vcn0;
+ else
+ *codecs = &vcn_4_0_0_video_codecs_decode_vcn0;
+ }
+ }
+ return 0;
+ case IP_VERSION(4, 0, 6):
+ if (encode)
+ *codecs = &vcn_4_0_0_video_codecs_encode_vcn0;
+ else
+ *codecs = &vcn_4_0_0_video_codecs_decode_vcn0;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static u32 soc21_didt_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags, address, data;
+ u32 r;
+
+ address = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_INDEX);
+ data = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_DATA);
+
+ spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ WREG32(address, (reg));
+ r = RREG32(data);
+ spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+ return r;
+}
+
+static void soc21_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags, address, data;
+
+ address = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_INDEX);
+ data = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_DATA);
+
+ spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ WREG32(address, (reg));
+ WREG32(data, (v));
+ spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+}
+
+static u32 soc21_get_config_memsize(struct amdgpu_device *adev)
+{
+ return adev->nbio.funcs->get_memsize(adev);
+}
+
+static u32 soc21_get_xclk(struct amdgpu_device *adev)
+{
+ return adev->clock.spll.reference_freq;
+}
+
+
+void soc21_grbm_select(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 queue, u32 vmid)
+{
+ u32 grbm_gfx_cntl = 0;
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, PIPEID, pipe);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, MEID, me);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue);
+
+ WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, grbm_gfx_cntl);
+}
+
+static bool soc21_read_disabled_bios(struct amdgpu_device *adev)
+{
+ /* todo */
+ return false;
+}
+
+static struct soc15_allowed_register_entry soc21_allowed_read_registers[] = {
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS2)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE0)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE1)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE2)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE3)},
+ { SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_STATUS_REG)},
+ { SOC15_REG_ENTRY(SDMA1, 0, regSDMA1_STATUS_REG)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT2)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT3)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPF_BUSY_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPF_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPF_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPC_BUSY_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPC_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPC_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, regGB_ADDR_CONFIG)},
+};
+
+static uint32_t soc21_read_indexed_register(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 reg_offset)
+{
+ uint32_t val;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
+
+ val = RREG32(reg_offset);
+
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ return val;
+}
+
+static uint32_t soc21_get_register_value(struct amdgpu_device *adev,
+ bool indexed, u32 se_num,
+ u32 sh_num, u32 reg_offset)
+{
+ if (indexed) {
+ return soc21_read_indexed_register(adev, se_num, sh_num, reg_offset);
+ } else {
+ if (reg_offset == SOC15_REG_OFFSET(GC, 0, regGB_ADDR_CONFIG) && adev->gfx.config.gb_addr_config)
+ return adev->gfx.config.gb_addr_config;
+ return RREG32(reg_offset);
+ }
+}
+
+static int soc21_read_register(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 reg_offset, u32 *value)
+{
+ uint32_t i;
+ struct soc15_allowed_register_entry *en;
+
+ *value = 0;
+ for (i = 0; i < ARRAY_SIZE(soc21_allowed_read_registers); i++) {
+ en = &soc21_allowed_read_registers[i];
+ if (!adev->reg_offset[en->hwip][en->inst])
+ continue;
+ else if (reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg]
+ + en->reg_offset))
+ continue;
+
+ *value = soc21_get_register_value(adev,
+ soc21_allowed_read_registers[i].grbm_indexed,
+ se_num, sh_num, reg_offset);
+ return 0;
+ }
+ return -EINVAL;
+}
+
+#if 0
+static int soc21_asic_mode1_reset(struct amdgpu_device *adev)
+{
+ u32 i;
+ int ret = 0;
+
+ amdgpu_atombios_scratch_regs_engine_hung(adev, true);
+
+ /* disable BM */
+ pci_clear_master(adev->pdev);
+
+ amdgpu_device_cache_pci_state(adev->pdev);
+
+ if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
+ dev_info(adev->dev, "GPU smu mode1 reset\n");
+ ret = amdgpu_dpm_mode1_reset(adev);
+ } else {
+ dev_info(adev->dev, "GPU psp mode1 reset\n");
+ ret = psp_gpu_reset(adev);
+ }
+
+ if (ret)
+ dev_err(adev->dev, "GPU mode1 reset failed\n");
+ amdgpu_device_load_pci_state(adev->pdev);
+
+ /* wait for asic to come out of reset */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ u32 memsize = adev->nbio.funcs->get_memsize(adev);
+
+ if (memsize != 0xffffffff)
+ break;
+ udelay(1);
+ }
+
+ amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+
+ return ret;
+}
+#endif
+
+static enum amd_reset_method
+soc21_asic_reset_method(struct amdgpu_device *adev)
+{
+ if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
+ amdgpu_reset_method == AMD_RESET_METHOD_MODE2 ||
+ amdgpu_reset_method == AMD_RESET_METHOD_BACO)
+ return amdgpu_reset_method;
+
+ if (amdgpu_reset_method != -1)
+ dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
+ amdgpu_reset_method);
+
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+ case IP_VERSION(13, 0, 0):
+ case IP_VERSION(13, 0, 7):
+ case IP_VERSION(13, 0, 10):
+ return AMD_RESET_METHOD_MODE1;
+ case IP_VERSION(13, 0, 4):
+ case IP_VERSION(13, 0, 11):
+ case IP_VERSION(14, 0, 0):
+ case IP_VERSION(14, 0, 1):
+ case IP_VERSION(14, 0, 4):
+ case IP_VERSION(14, 0, 5):
+ return AMD_RESET_METHOD_MODE2;
+ default:
+ if (amdgpu_dpm_is_baco_supported(adev))
+ return AMD_RESET_METHOD_BACO;
+ else
+ return AMD_RESET_METHOD_MODE1;
+ }
+}
+
+static int soc21_asic_reset(struct amdgpu_device *adev)
+{
+ int ret = 0;
+
+ switch (soc21_asic_reset_method(adev)) {
+ case AMD_RESET_METHOD_PCI:
+ dev_info(adev->dev, "PCI reset\n");
+ ret = amdgpu_device_pci_reset(adev);
+ break;
+ case AMD_RESET_METHOD_BACO:
+ dev_info(adev->dev, "BACO reset\n");
+ ret = amdgpu_dpm_baco_reset(adev);
+ break;
+ case AMD_RESET_METHOD_MODE2:
+ dev_info(adev->dev, "MODE2 reset\n");
+ ret = amdgpu_dpm_mode2_reset(adev);
+ break;
+ default:
+ dev_info(adev->dev, "MODE1 reset\n");
+ ret = amdgpu_device_mode1_reset(adev);
+ break;
+ }
+
+ return ret;
+}
+
+static int soc21_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk)
+{
+ /* todo */
+ return 0;
+}
+
+static int soc21_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
+{
+ /* todo */
+ return 0;
+}
+
+static void soc21_program_aspm(struct amdgpu_device *adev)
+{
+ if (!amdgpu_device_should_use_aspm(adev))
+ return;
+
+ if (adev->nbio.funcs->program_aspm)
+ adev->nbio.funcs->program_aspm(adev);
+}
+
+const struct amdgpu_ip_block_version soc21_common_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_COMMON,
+ .major = 1,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &soc21_common_ip_funcs,
+};
+
+static bool soc21_need_full_reset(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ default:
+ return true;
+ }
+}
+
+static bool soc21_need_reset_on_init(struct amdgpu_device *adev)
+{
+ u32 sol_reg;
+
+ if (adev->flags & AMD_IS_APU)
+ return false;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ sol_reg = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+ if (sol_reg)
+ return true;
+
+ return false;
+}
+
+static void soc21_init_doorbell_index(struct amdgpu_device *adev)
+{
+ adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ;
+ adev->doorbell_index.mec_ring0 = AMDGPU_NAVI10_DOORBELL_MEC_RING0;
+ adev->doorbell_index.mec_ring1 = AMDGPU_NAVI10_DOORBELL_MEC_RING1;
+ adev->doorbell_index.mec_ring2 = AMDGPU_NAVI10_DOORBELL_MEC_RING2;
+ adev->doorbell_index.mec_ring3 = AMDGPU_NAVI10_DOORBELL_MEC_RING3;
+ adev->doorbell_index.mec_ring4 = AMDGPU_NAVI10_DOORBELL_MEC_RING4;
+ adev->doorbell_index.mec_ring5 = AMDGPU_NAVI10_DOORBELL_MEC_RING5;
+ adev->doorbell_index.mec_ring6 = AMDGPU_NAVI10_DOORBELL_MEC_RING6;
+ adev->doorbell_index.mec_ring7 = AMDGPU_NAVI10_DOORBELL_MEC_RING7;
+ adev->doorbell_index.userqueue_start = AMDGPU_NAVI10_DOORBELL_USERQUEUE_START;
+ adev->doorbell_index.userqueue_end = AMDGPU_NAVI10_DOORBELL_USERQUEUE_END;
+ adev->doorbell_index.gfx_ring0 = AMDGPU_NAVI10_DOORBELL_GFX_RING0;
+ adev->doorbell_index.gfx_ring1 = AMDGPU_NAVI10_DOORBELL_GFX_RING1;
+ adev->doorbell_index.gfx_userqueue_start =
+ AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_START;
+ adev->doorbell_index.gfx_userqueue_end =
+ AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_END;
+ adev->doorbell_index.mes_ring0 = AMDGPU_NAVI10_DOORBELL_MES_RING0;
+ adev->doorbell_index.mes_ring1 = AMDGPU_NAVI10_DOORBELL_MES_RING1;
+ adev->doorbell_index.sdma_engine[0] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0;
+ adev->doorbell_index.sdma_engine[1] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1;
+ adev->doorbell_index.ih = AMDGPU_NAVI10_DOORBELL_IH;
+ adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_NAVI10_DOORBELL64_VCN0_1;
+ adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_NAVI10_DOORBELL64_VCN2_3;
+ adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_NAVI10_DOORBELL64_VCN4_5;
+ adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_NAVI10_DOORBELL64_VCN6_7;
+ adev->doorbell_index.vpe_ring = AMDGPU_NAVI10_DOORBELL64_VPE;
+ adev->doorbell_index.first_non_cp = AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP;
+ adev->doorbell_index.last_non_cp = AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP;
+
+ adev->doorbell_index.max_assignment = AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT << 1;
+ adev->doorbell_index.sdma_doorbell_range = 20;
+}
+
+static void soc21_pre_asic_init(struct amdgpu_device *adev)
+{
+}
+
+static int soc21_update_umd_stable_pstate(struct amdgpu_device *adev,
+ bool enter)
+{
+ if (enter)
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ else
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+ if (adev->gfx.funcs->update_perfmon_mgcg)
+ adev->gfx.funcs->update_perfmon_mgcg(adev, !enter);
+
+ return 0;
+}
+
+static const struct amdgpu_asic_funcs soc21_asic_funcs = {
+ .read_disabled_bios = &soc21_read_disabled_bios,
+ .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom,
+ .read_register = &soc21_read_register,
+ .reset = &soc21_asic_reset,
+ .reset_method = &soc21_asic_reset_method,
+ .get_xclk = &soc21_get_xclk,
+ .set_uvd_clocks = &soc21_set_uvd_clocks,
+ .set_vce_clocks = &soc21_set_vce_clocks,
+ .get_config_memsize = &soc21_get_config_memsize,
+ .init_doorbell_index = &soc21_init_doorbell_index,
+ .need_full_reset = &soc21_need_full_reset,
+ .need_reset_on_init = &soc21_need_reset_on_init,
+ .get_pcie_replay_count = &amdgpu_nbio_get_pcie_replay_count,
+ .supports_baco = &amdgpu_dpm_is_baco_supported,
+ .pre_asic_init = &soc21_pre_asic_init,
+ .query_video_codecs = &soc21_query_video_codecs,
+ .update_umd_stable_pstate = &soc21_update_umd_stable_pstate,
+};
+
+static int soc21_common_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->nbio.funcs->set_reg_remap(adev);
+ adev->smc_rreg = NULL;
+ adev->smc_wreg = NULL;
+ adev->pcie_rreg = &amdgpu_device_indirect_rreg;
+ adev->pcie_wreg = &amdgpu_device_indirect_wreg;
+ adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64;
+ adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64;
+ adev->pciep_rreg = amdgpu_device_pcie_port_rreg;
+ adev->pciep_wreg = amdgpu_device_pcie_port_wreg;
+
+ /* TODO: will add them during VCN v2 implementation */
+ adev->uvd_ctx_rreg = NULL;
+ adev->uvd_ctx_wreg = NULL;
+
+ adev->didt_rreg = &soc21_didt_rreg;
+ adev->didt_wreg = &soc21_didt_wreg;
+
+ adev->asic_funcs = &soc21_asic_funcs;
+
+ adev->rev_id = amdgpu_device_get_rev_id(adev);
+ adev->external_rev_id = 0xff;
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+#if 0
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+#endif
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_HDP_SD;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_ATHUB |
+ AMD_PG_SUPPORT_MMHUB;
+ adev->external_rev_id = adev->rev_id + 0x1; // TODO: need update
+ break;
+ case IP_VERSION(11, 0, 2):
+ adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_HDP_SD;
+ adev->pg_flags =
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_ATHUB |
+ AMD_PG_SUPPORT_MMHUB;
+ adev->external_rev_id = adev->rev_id + 0x10;
+ break;
+ case IP_VERSION(11, 0, 1):
+ adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG;
+ adev->pg_flags =
+ AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG;
+ adev->external_rev_id = adev->rev_id + 0x1;
+ break;
+ case IP_VERSION(11, 0, 3):
+ adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_HDP_SD |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG;
+ adev->external_rev_id = adev->rev_id + 0x20;
+ break;
+ case IP_VERSION(11, 0, 4):
+ adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_JPEG;
+ adev->external_rev_id = adev->rev_id + 0x80;
+ break;
+ case IP_VERSION(11, 5, 0):
+ adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG_DPG |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_GFX_PG;
+ if (adev->rev_id == 0)
+ adev->external_rev_id = 0x1;
+ else
+ adev->external_rev_id = adev->rev_id + 0x10;
+ break;
+ case IP_VERSION(11, 5, 1):
+ adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG;
+ adev->pg_flags =
+ AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG;
+ adev->external_rev_id = adev->rev_id + 0xc1;
+ break;
+ case IP_VERSION(11, 5, 2):
+ adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_JPEG_DPG |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_GFX_PG;
+ adev->external_rev_id = adev->rev_id + 0x40;
+ break;
+ case IP_VERSION(11, 5, 3):
+ adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_JPEG_DPG |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_GFX_PG;
+ adev->external_rev_id = adev->rev_id + 0x50;
+ break;
+ default:
+ /* FIXME: not supported yet */
+ return -EINVAL;
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_init_setting(adev);
+ xgpu_nv_mailbox_set_irq_funcs(adev);
+ }
+
+ return 0;
+}
+
+static int soc21_common_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ xgpu_nv_mailbox_get_irq(adev);
+ if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) ||
+ !amdgpu_sriov_is_av1_support(adev)) {
+ amdgpu_virt_update_sriov_video_codec(adev,
+ sriov_vcn_4_0_0_video_codecs_encode_array_vcn1,
+ ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn1),
+ sriov_vcn_4_0_0_video_codecs_decode_array_vcn1,
+ ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn1));
+ } else {
+ amdgpu_virt_update_sriov_video_codec(adev,
+ sriov_vcn_4_0_0_video_codecs_encode_array_vcn0,
+ ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn0),
+ sriov_vcn_4_0_0_video_codecs_decode_array_vcn0,
+ ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn0));
+ }
+ } else {
+ if (adev->nbio.ras &&
+ adev->nbio.ras_err_event_athub_irq.funcs)
+ /* don't need to fail gpu late init
+ * if enabling athub_err_event interrupt failed
+ * nbio v4_3 only support fatal error hanlding
+ * just enable the interrupt directly */
+ amdgpu_irq_get(adev, &adev->nbio.ras_err_event_athub_irq, 0);
+ }
+
+ /* Enable selfring doorbell aperture late because doorbell BAR
+ * aperture will change if resize BAR successfully in gmc sw_init.
+ */
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, true);
+
+ return 0;
+}
+
+static int soc21_common_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ xgpu_nv_mailbox_add_irq_id(adev);
+
+ return 0;
+}
+
+static int soc21_common_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /* enable aspm */
+ soc21_program_aspm(adev);
+ /* setup nbio registers */
+ adev->nbio.funcs->init_registers(adev);
+ /* remap HDP registers to a hole in mmio space,
+ * for the purpose of expose those registers
+ * to process space
+ */
+ if (adev->nbio.funcs->remap_hdp_registers && !amdgpu_sriov_vf(adev))
+ adev->nbio.funcs->remap_hdp_registers(adev);
+ /* enable the doorbell aperture */
+ adev->nbio.funcs->enable_doorbell_aperture(adev, true);
+
+ return 0;
+}
+
+static int soc21_common_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /* Disable the doorbell aperture and selfring doorbell aperture
+ * separately in hw_fini because soc21_enable_doorbell_aperture
+ * has been removed and there is no need to delay disabling
+ * selfring doorbell.
+ */
+ adev->nbio.funcs->enable_doorbell_aperture(adev, false);
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false);
+
+ if (amdgpu_sriov_vf(adev)) {
+ xgpu_nv_mailbox_put_irq(adev);
+ } else {
+ if (adev->nbio.ras &&
+ adev->nbio.ras_err_event_athub_irq.funcs)
+ amdgpu_irq_put(adev, &adev->nbio.ras_err_event_athub_irq, 0);
+ }
+
+ return 0;
+}
+
+static int soc21_common_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return soc21_common_hw_fini(ip_block);
+}
+
+static bool soc21_need_reset_on_resume(struct amdgpu_device *adev)
+{
+ u32 sol_reg1, sol_reg2;
+
+ /* Will reset for the following suspend abort cases.
+ * 1) Only reset dGPU side.
+ * 2) S3 suspend got aborted and TOS is active.
+ * As for dGPU suspend abort cases the SOL value
+ * will be kept as zero at this resume point.
+ */
+ if (!(adev->flags & AMD_IS_APU) && adev->in_s3) {
+ sol_reg1 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+ msleep(100);
+ sol_reg2 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+
+ return (sol_reg1 != sol_reg2);
+ }
+
+ return false;
+}
+
+static int soc21_common_resume(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (soc21_need_reset_on_resume(adev)) {
+ dev_info(adev->dev, "S3 suspend aborted, resetting...");
+ soc21_asic_reset(adev);
+ }
+
+ return soc21_common_hw_init(ip_block);
+}
+
+static bool soc21_common_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ return true;
+}
+
+static int soc21_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(4, 3, 0):
+ case IP_VERSION(4, 3, 1):
+ case IP_VERSION(7, 7, 0):
+ case IP_VERSION(7, 7, 1):
+ case IP_VERSION(7, 11, 0):
+ case IP_VERSION(7, 11, 1):
+ case IP_VERSION(7, 11, 2):
+ case IP_VERSION(7, 11, 3):
+ adev->nbio.funcs->update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->nbio.funcs->update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->hdp.funcs->update_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int soc21_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_ip_version(adev, LSDMA_HWIP, 0)) {
+ case IP_VERSION(6, 0, 0):
+ case IP_VERSION(6, 0, 2):
+ adev->lsdma.funcs->update_memory_power_gating(adev,
+ state == AMD_PG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void soc21_common_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->nbio.funcs->get_clockgating_state(adev, flags);
+
+ adev->hdp.funcs->get_clock_gating_state(adev, flags);
+}
+
+static const struct amd_ip_funcs soc21_common_ip_funcs = {
+ .name = "soc21_common",
+ .early_init = soc21_common_early_init,
+ .late_init = soc21_common_late_init,
+ .sw_init = soc21_common_sw_init,
+ .hw_init = soc21_common_hw_init,
+ .hw_fini = soc21_common_hw_fini,
+ .suspend = soc21_common_suspend,
+ .resume = soc21_common_resume,
+ .is_idle = soc21_common_is_idle,
+ .set_clockgating_state = soc21_common_set_clockgating_state,
+ .set_powergating_state = soc21_common_set_powergating_state,
+ .get_clockgating_state = soc21_common_get_clockgating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.h b/drivers/gpu/drm/amd/amdgpu/soc21.h
new file mode 100644
index 000000000000..4c8067af1b65
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SOC21_H__
+#define __SOC21_H__
+
+extern const struct amdgpu_ip_block_version soc21_common_ip_block;
+
+void soc21_grbm_select(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 queue, u32 vmid);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c
new file mode 100644
index 000000000000..972b449ab89f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/soc24.c
@@ -0,0 +1,601 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+#include "amdgpu_uvd.h"
+#include "amdgpu_vce.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_smu.h"
+#include "atom.h"
+#include "amd_pcie.h"
+
+#include "gc/gc_12_0_0_offset.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "mp/mp_14_0_2_offset.h"
+
+#include "soc15.h"
+#include "soc15_common.h"
+#include "soc24.h"
+#include "mxgpu_nv.h"
+
+static const struct amd_ip_funcs soc24_common_ip_funcs;
+
+static const struct amdgpu_video_codec_info vcn_5_0_0_video_codecs_encode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs vcn_5_0_0_video_codecs_encode_vcn0 = {
+ .codec_count = ARRAY_SIZE(vcn_5_0_0_video_codecs_encode_array_vcn0),
+ .codec_array = vcn_5_0_0_video_codecs_encode_array_vcn0,
+};
+
+static const struct amdgpu_video_codec_info vcn_5_0_0_video_codecs_decode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs vcn_5_0_0_video_codecs_decode_vcn0 = {
+ .codec_count = ARRAY_SIZE(vcn_5_0_0_video_codecs_decode_array_vcn0),
+ .codec_array = vcn_5_0_0_video_codecs_decode_array_vcn0,
+};
+
+static int soc24_query_video_codecs(struct amdgpu_device *adev, bool encode,
+ const struct amdgpu_video_codecs **codecs)
+{
+ if (adev->vcn.num_vcn_inst == hweight8(adev->vcn.harvest_config))
+ return -EINVAL;
+
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
+ case IP_VERSION(5, 0, 0):
+ if (encode)
+ *codecs = &vcn_5_0_0_video_codecs_encode_vcn0;
+ else
+ *codecs = &vcn_5_0_0_video_codecs_decode_vcn0;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static u32 soc24_get_config_memsize(struct amdgpu_device *adev)
+{
+ return adev->nbio.funcs->get_memsize(adev);
+}
+
+static u32 soc24_get_xclk(struct amdgpu_device *adev)
+{
+ return adev->clock.spll.reference_freq;
+}
+
+void soc24_grbm_select(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 queue, u32 vmid)
+{
+ u32 grbm_gfx_cntl = 0;
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, PIPEID, pipe);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, MEID, me);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue);
+
+ WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, grbm_gfx_cntl);
+}
+
+static struct soc15_allowed_register_entry soc24_allowed_read_registers[] = {
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS2)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE0)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE1)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE2)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE3)},
+ { SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_STATUS_REG)},
+ { SOC15_REG_ENTRY(SDMA1, 0, regSDMA1_STATUS_REG)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT2)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT3)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPF_BUSY_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPF_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPF_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPC_BUSY_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPC_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPC_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, regGB_ADDR_CONFIG)},
+};
+
+static uint32_t soc24_read_indexed_register(struct amdgpu_device *adev,
+ u32 se_num,
+ u32 sh_num,
+ u32 reg_offset)
+{
+ uint32_t val;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
+
+ val = RREG32(reg_offset);
+
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ return val;
+}
+
+static uint32_t soc24_get_register_value(struct amdgpu_device *adev,
+ bool indexed, u32 se_num,
+ u32 sh_num, u32 reg_offset)
+{
+ if (indexed) {
+ return soc24_read_indexed_register(adev, se_num, sh_num, reg_offset);
+ } else {
+ if (reg_offset == SOC15_REG_OFFSET(GC, 0, regGB_ADDR_CONFIG) &&
+ adev->gfx.config.gb_addr_config)
+ return adev->gfx.config.gb_addr_config;
+ return RREG32(reg_offset);
+ }
+}
+
+static int soc24_read_register(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 reg_offset, u32 *value)
+{
+ uint32_t i;
+ struct soc15_allowed_register_entry *en;
+
+ *value = 0;
+ for (i = 0; i < ARRAY_SIZE(soc24_allowed_read_registers); i++) {
+ en = &soc24_allowed_read_registers[i];
+ if (!adev->reg_offset[en->hwip][en->inst])
+ continue;
+ else if (reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg]
+ + en->reg_offset))
+ continue;
+
+ *value = soc24_get_register_value(adev,
+ soc24_allowed_read_registers[i].grbm_indexed,
+ se_num, sh_num, reg_offset);
+ return 0;
+ }
+ return -EINVAL;
+}
+
+static enum amd_reset_method
+soc24_asic_reset_method(struct amdgpu_device *adev)
+{
+ if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
+ amdgpu_reset_method == AMD_RESET_METHOD_MODE2 ||
+ amdgpu_reset_method == AMD_RESET_METHOD_BACO)
+ return amdgpu_reset_method;
+
+ if (amdgpu_reset_method != -1)
+ dev_warn(adev->dev,
+ "Specified reset method:%d isn't supported, using AUTO instead.\n",
+ amdgpu_reset_method);
+
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+ case IP_VERSION(14, 0, 2):
+ case IP_VERSION(14, 0, 3):
+ return AMD_RESET_METHOD_MODE1;
+ default:
+ if (amdgpu_dpm_is_baco_supported(adev))
+ return AMD_RESET_METHOD_BACO;
+ else
+ return AMD_RESET_METHOD_MODE1;
+ }
+}
+
+static int soc24_asic_reset(struct amdgpu_device *adev)
+{
+ int ret = 0;
+
+ switch (soc24_asic_reset_method(adev)) {
+ case AMD_RESET_METHOD_PCI:
+ dev_info(adev->dev, "PCI reset\n");
+ ret = amdgpu_device_pci_reset(adev);
+ break;
+ case AMD_RESET_METHOD_BACO:
+ dev_info(adev->dev, "BACO reset\n");
+ ret = amdgpu_dpm_baco_reset(adev);
+ break;
+ case AMD_RESET_METHOD_MODE2:
+ dev_info(adev->dev, "MODE2 reset\n");
+ ret = amdgpu_dpm_mode2_reset(adev);
+ break;
+ default:
+ dev_info(adev->dev, "MODE1 reset\n");
+ ret = amdgpu_device_mode1_reset(adev);
+ break;
+ }
+
+ return ret;
+}
+
+static void soc24_program_aspm(struct amdgpu_device *adev)
+{
+ if (!amdgpu_device_should_use_aspm(adev))
+ return;
+
+ if (!(adev->flags & AMD_IS_APU) &&
+ (adev->nbio.funcs->program_aspm))
+ adev->nbio.funcs->program_aspm(adev);
+}
+
+const struct amdgpu_ip_block_version soc24_common_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_COMMON,
+ .major = 1,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &soc24_common_ip_funcs,
+};
+
+static bool soc24_need_full_reset(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ default:
+ return true;
+ }
+}
+
+static bool soc24_need_reset_on_init(struct amdgpu_device *adev)
+{
+ u32 sol_reg;
+
+ if (adev->flags & AMD_IS_APU)
+ return false;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ sol_reg = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_81);
+ if (sol_reg)
+ return true;
+
+ return false;
+}
+
+static uint64_t soc24_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+ /* TODO
+ * dummy implement for pcie_replay_count sysfs interface
+ * */
+ return 0;
+}
+
+static void soc24_init_doorbell_index(struct amdgpu_device *adev)
+{
+ adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ;
+ adev->doorbell_index.mec_ring0 = AMDGPU_NAVI10_DOORBELL_MEC_RING0;
+ adev->doorbell_index.mec_ring1 = AMDGPU_NAVI10_DOORBELL_MEC_RING1;
+ adev->doorbell_index.mec_ring2 = AMDGPU_NAVI10_DOORBELL_MEC_RING2;
+ adev->doorbell_index.mec_ring3 = AMDGPU_NAVI10_DOORBELL_MEC_RING3;
+ adev->doorbell_index.mec_ring4 = AMDGPU_NAVI10_DOORBELL_MEC_RING4;
+ adev->doorbell_index.mec_ring5 = AMDGPU_NAVI10_DOORBELL_MEC_RING5;
+ adev->doorbell_index.mec_ring6 = AMDGPU_NAVI10_DOORBELL_MEC_RING6;
+ adev->doorbell_index.mec_ring7 = AMDGPU_NAVI10_DOORBELL_MEC_RING7;
+ adev->doorbell_index.userqueue_start = AMDGPU_NAVI10_DOORBELL_USERQUEUE_START;
+ adev->doorbell_index.userqueue_end = AMDGPU_NAVI10_DOORBELL_USERQUEUE_END;
+ adev->doorbell_index.gfx_ring0 = AMDGPU_NAVI10_DOORBELL_GFX_RING0;
+ adev->doorbell_index.gfx_ring1 = AMDGPU_NAVI10_DOORBELL_GFX_RING1;
+ adev->doorbell_index.gfx_userqueue_start =
+ AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_START;
+ adev->doorbell_index.gfx_userqueue_end =
+ AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_END;
+ adev->doorbell_index.mes_ring0 = AMDGPU_NAVI10_DOORBELL_MES_RING0;
+ adev->doorbell_index.mes_ring1 = AMDGPU_NAVI10_DOORBELL_MES_RING1;
+ adev->doorbell_index.sdma_engine[0] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0;
+ adev->doorbell_index.sdma_engine[1] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1;
+ adev->doorbell_index.ih = AMDGPU_NAVI10_DOORBELL_IH;
+ adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_NAVI10_DOORBELL64_VCN0_1;
+ adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_NAVI10_DOORBELL64_VCN2_3;
+ adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_NAVI10_DOORBELL64_VCN4_5;
+ adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_NAVI10_DOORBELL64_VCN6_7;
+ adev->doorbell_index.first_non_cp = AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP;
+ adev->doorbell_index.last_non_cp = AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP;
+
+ adev->doorbell_index.max_assignment = AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT << 1;
+ adev->doorbell_index.sdma_doorbell_range = 20;
+}
+
+static void soc24_pre_asic_init(struct amdgpu_device *adev)
+{
+}
+
+static int soc24_update_umd_stable_pstate(struct amdgpu_device *adev,
+ bool enter)
+{
+ if (enter)
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ else
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+ if (adev->gfx.funcs->update_perfmon_mgcg)
+ adev->gfx.funcs->update_perfmon_mgcg(adev, !enter);
+
+ return 0;
+}
+
+static const struct amdgpu_asic_funcs soc24_asic_funcs = {
+ .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom,
+ .read_register = &soc24_read_register,
+ .reset = &soc24_asic_reset,
+ .reset_method = &soc24_asic_reset_method,
+ .get_xclk = &soc24_get_xclk,
+ .get_config_memsize = &soc24_get_config_memsize,
+ .init_doorbell_index = &soc24_init_doorbell_index,
+ .need_full_reset = &soc24_need_full_reset,
+ .need_reset_on_init = &soc24_need_reset_on_init,
+ .get_pcie_replay_count = &soc24_get_pcie_replay_count,
+ .supports_baco = &amdgpu_dpm_is_baco_supported,
+ .pre_asic_init = &soc24_pre_asic_init,
+ .query_video_codecs = &soc24_query_video_codecs,
+ .update_umd_stable_pstate = &soc24_update_umd_stable_pstate,
+};
+
+static int soc24_common_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->nbio.funcs->set_reg_remap(adev);
+ adev->smc_rreg = NULL;
+ adev->smc_wreg = NULL;
+ adev->pcie_rreg = &amdgpu_device_indirect_rreg;
+ adev->pcie_wreg = &amdgpu_device_indirect_wreg;
+ adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64;
+ adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64;
+ adev->pciep_rreg = amdgpu_device_pcie_port_rreg;
+ adev->pciep_wreg = amdgpu_device_pcie_port_wreg;
+ adev->uvd_ctx_rreg = NULL;
+ adev->uvd_ctx_wreg = NULL;
+ adev->didt_rreg = NULL;
+ adev->didt_wreg = NULL;
+
+ adev->asic_funcs = &soc24_asic_funcs;
+
+ adev->rev_id = amdgpu_device_get_rev_id(adev);
+ adev->external_rev_id = 0xff;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_HDP_SD |
+ AMD_CG_SUPPORT_MC_LS;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_VCN_DPG;
+ adev->external_rev_id = adev->rev_id + 0x40;
+ break;
+ case IP_VERSION(12, 0, 1):
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_HDP_SD |
+ AMD_CG_SUPPORT_MC_LS;
+
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_JPEG_DPG |
+ AMD_PG_SUPPORT_VCN_DPG;
+ adev->external_rev_id = adev->rev_id + 0x50;
+ break;
+ default:
+ /* FIXME: not supported yet */
+ return -EINVAL;
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_init_setting(adev);
+ xgpu_nv_mailbox_set_irq_funcs(adev);
+ }
+
+ return 0;
+}
+
+static int soc24_common_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ xgpu_nv_mailbox_get_irq(adev);
+ } else {
+ if (adev->nbio.ras &&
+ adev->nbio.ras_err_event_athub_irq.funcs)
+ /* don't need to fail gpu late init
+ * if enabling athub_err_event interrupt failed
+ * nbif v6_3_1 only support fatal error hanlding
+ * just enable the interrupt directly
+ */
+ amdgpu_irq_get(adev, &adev->nbio.ras_err_event_athub_irq, 0);
+ }
+
+ /* Enable selfring doorbell aperture late because doorbell BAR
+ * aperture will change if resize BAR successfully in gmc sw_init.
+ */
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, true);
+
+ return 0;
+}
+
+static int soc24_common_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ xgpu_nv_mailbox_add_irq_id(adev);
+
+ return 0;
+}
+
+static int soc24_common_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /* enable aspm */
+ soc24_program_aspm(adev);
+ /* setup nbio registers */
+ adev->nbio.funcs->init_registers(adev);
+ /* remap HDP registers to a hole in mmio space,
+ * for the purpose of expose those registers
+ * to process space
+ */
+ if (adev->nbio.funcs->remap_hdp_registers)
+ adev->nbio.funcs->remap_hdp_registers(adev);
+
+ if (adev->df.funcs->hw_init)
+ adev->df.funcs->hw_init(adev);
+
+ /* enable the doorbell aperture */
+ adev->nbio.funcs->enable_doorbell_aperture(adev, true);
+
+ return 0;
+}
+
+static int soc24_common_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /* Disable the doorbell aperture and selfring doorbell aperture
+ * separately in hw_fini because soc21_enable_doorbell_aperture
+ * has been removed and there is no need to delay disabling
+ * selfring doorbell.
+ */
+ adev->nbio.funcs->enable_doorbell_aperture(adev, false);
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false);
+
+ if (amdgpu_sriov_vf(adev)) {
+ xgpu_nv_mailbox_put_irq(adev);
+ } else {
+ if (adev->nbio.ras &&
+ adev->nbio.ras_err_event_athub_irq.funcs)
+ amdgpu_irq_put(adev, &adev->nbio.ras_err_event_athub_irq, 0);
+ }
+
+ return 0;
+}
+
+static int soc24_common_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return soc24_common_hw_fini(ip_block);
+}
+
+static int soc24_common_resume(struct amdgpu_ip_block *ip_block)
+{
+ return soc24_common_hw_init(ip_block);
+}
+
+static bool soc24_common_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ return true;
+}
+
+static int soc24_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(6, 3, 1):
+ adev->nbio.funcs->update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->nbio.funcs->update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->hdp.funcs->update_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int soc24_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_ip_version(adev, LSDMA_HWIP, 0)) {
+ case IP_VERSION(7, 0, 0):
+ case IP_VERSION(7, 0, 1):
+ adev->lsdma.funcs->update_memory_power_gating(adev,
+ state == AMD_PG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void soc24_common_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->nbio.funcs->get_clockgating_state(adev, flags);
+
+ adev->hdp.funcs->get_clock_gating_state(adev, flags);
+
+ return;
+}
+
+static const struct amd_ip_funcs soc24_common_ip_funcs = {
+ .name = "soc24_common",
+ .early_init = soc24_common_early_init,
+ .late_init = soc24_common_late_init,
+ .sw_init = soc24_common_sw_init,
+ .hw_init = soc24_common_hw_init,
+ .hw_fini = soc24_common_hw_fini,
+ .suspend = soc24_common_suspend,
+ .resume = soc24_common_resume,
+ .is_idle = soc24_common_is_idle,
+ .set_clockgating_state = soc24_common_set_clockgating_state,
+ .set_powergating_state = soc24_common_set_powergating_state,
+ .get_clockgating_state = soc24_common_get_clockgating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.h b/drivers/gpu/drm/amd/amdgpu/soc24.h
new file mode 100644
index 000000000000..fa7e442e0b62
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/soc24.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SOC24_H__
+#define __SOC24_H__
+
+extern const struct amdgpu_ip_block_version soc24_common_ip_block;
+
+void soc24_grbm_select(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 queue, u32 vmid);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
index 5093826a43d1..8a3f326474e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
@@ -30,12 +30,18 @@
#define RSP_ID_MASK (1U << 31)
#define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK)
+/* invalid node instance value */
+#define TA_RAS_INV_NODE 0xffff
+
/* RAS related enumerations */
/**********************************************************/
enum ras_command {
TA_RAS_COMMAND__ENABLE_FEATURES = 0,
TA_RAS_COMMAND__DISABLE_FEATURES,
TA_RAS_COMMAND__TRIGGER_ERROR,
+ TA_RAS_COMMAND__QUERY_BLOCK_INFO,
+ TA_RAS_COMMAND__QUERY_SUB_BLOCK_INFO,
+ TA_RAS_COMMAND__QUERY_ADDRESS,
};
enum ta_ras_status {
@@ -64,7 +70,8 @@ enum ta_ras_status {
TA_RAS_STATUS__ERROR_PCS_STATE_ERROR = 0xA016,
TA_RAS_STATUS__ERROR_PCS_STATE_HANG = 0xA017,
TA_RAS_STATUS__ERROR_PCS_STATE_UNKNOWN = 0xA018,
- TA_RAS_STATUS__ERROR_UNSUPPORTED_ERROR_INJ = 0xA019
+ TA_RAS_STATUS__ERROR_UNSUPPORTED_ERROR_INJ = 0xA019,
+ TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED = 0xA01A
};
enum ta_ras_block {
@@ -83,6 +90,11 @@ enum ta_ras_block {
TA_RAS_BLOCK__MP1,
TA_RAS_BLOCK__FUSE,
TA_RAS_BLOCK__MCA,
+ TA_RAS_BLOCK__VCN,
+ TA_RAS_BLOCK__JPEG,
+ TA_RAS_BLOCK__IH,
+ TA_RAS_BLOCK__MPIO,
+ TA_RAS_BLOCK__MMSCH,
TA_NUM_BLOCK_MAX
};
@@ -102,6 +114,19 @@ enum ta_ras_error_type {
TA_RAS_ERROR__POISON = 8,
};
+enum ta_ras_address_type {
+ TA_RAS_MCA_TO_PA,
+ TA_RAS_PA_TO_MCA,
+};
+
+enum ta_ras_nps_mode {
+ TA_RAS_UNKNOWN_MODE = 0,
+ TA_RAS_NPS1_MODE = 1,
+ TA_RAS_NPS2_MODE = 2,
+ TA_RAS_NPS4_MODE = 4,
+ TA_RAS_NPS8_MODE = 8,
+};
+
/* Input/output structures for RAS commands */
/**********************************************************/
@@ -126,6 +151,30 @@ struct ta_ras_trigger_error_input {
struct ta_ras_init_flags {
uint8_t poison_mode_en;
uint8_t dgpu_mode;
+ uint16_t xcc_mask;
+ uint8_t channel_dis_num;
+ uint8_t nps_mode;
+ uint32_t active_umc_mask;
+};
+
+struct ta_ras_mca_addr {
+ uint64_t err_addr;
+ uint32_t ch_inst;
+ uint32_t umc_inst;
+ uint32_t node_inst;
+ uint32_t socket_id;
+};
+
+struct ta_ras_phy_addr {
+ uint64_t pa;
+ uint32_t bank;
+ uint32_t channel_idx;
+};
+
+struct ta_ras_query_address_input {
+ enum ta_ras_address_type addr_type;
+ struct ta_ras_mca_addr ma;
+ struct ta_ras_phy_addr pa;
};
struct ta_ras_output_flags {
@@ -134,6 +183,13 @@ struct ta_ras_output_flags {
uint8_t reg_access_failure_flag;
};
+struct ta_ras_query_address_output {
+ /* don't use the flags here */
+ struct ta_ras_output_flags flags;
+ struct ta_ras_mca_addr ma;
+ struct ta_ras_phy_addr pa;
+};
+
/* Common input structure for RAS callbacks */
/**********************************************************/
union ta_ras_cmd_input {
@@ -141,12 +197,14 @@ union ta_ras_cmd_input {
struct ta_ras_enable_features_input enable_features;
struct ta_ras_disable_features_input disable_features;
struct ta_ras_trigger_error_input trigger_error;
+ struct ta_ras_query_address_input address;
uint32_t reserve_pad[256];
};
union ta_ras_cmd_output {
struct ta_ras_output_flags flags;
+ struct ta_ras_query_address_output address;
uint32_t reserve_pad[256];
};
diff --git a/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h b/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h
index cf8ff064dc72..9ec2e03d41c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h
@@ -31,10 +31,12 @@
* Secure Display Command ID
*/
enum ta_securedisplay_command {
- /* Query whether TA is responding used only for validation purpose */
+ /* Query whether TA is responding. It is used only for validation purpose */
TA_SECUREDISPLAY_COMMAND__QUERY_TA = 1,
/* Send region of Interest and CRC value to I2C */
TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC = 2,
+ /* V2 to send multiple regions of Interest and CRC value to I2C */
+ TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2 = 3,
/* Maximum Command ID */
TA_SECUREDISPLAY_COMMAND__MAX_ID = 0x7FFFFFFF,
};
@@ -55,10 +57,10 @@ enum ta_securedisplay_status {
TA_SECUREDISPLAY_STATUS__MAX = 0x7FFFFFFF,/* Maximum Value for status*/
};
-/** @enum ta_securedisplay_max_phy
+/** @enum ta_securedisplay_phy_ID
* Physical ID number to use for reading corresponding DIO Scratch register for ROI
*/
-enum ta_securedisplay_max_phy {
+enum ta_securedisplay_phy_ID {
TA_SECUREDISPLAY_PHY0 = 0,
TA_SECUREDISPLAY_PHY1 = 1,
TA_SECUREDISPLAY_PHY2 = 2,
@@ -83,6 +85,8 @@ enum ta_securedisplay_ta_query_cmd_ret {
enum ta_securedisplay_buffer_size {
/* 15 bytes = 8 byte (ROI) + 6 byte(CRC) + 1 byte(phy_id) */
TA_SECUREDISPLAY_I2C_BUFFER_SIZE = 15,
+ /* 16 bytes = 8 byte (ROI) + 6 byte(CRC) + 1 byte(phy_id) + 1 byte(roi_idx) */
+ TA_SECUREDISPLAY_V2_I2C_BUFFER_SIZE = 16,
};
/** Input/output structures for Secure Display commands */
@@ -95,7 +99,15 @@ enum ta_securedisplay_buffer_size {
* Physical ID to determine which DIO scratch register should be used to get ROI
*/
struct ta_securedisplay_send_roi_crc_input {
- uint32_t phy_id; /* Physical ID */
+ /* Physical ID */
+ uint32_t phy_id;
+};
+
+struct ta_securedisplay_send_roi_crc_v2_input {
+ /* Physical ID */
+ uint32_t phy_id;
+ /* Region of interest index */
+ uint8_t roi_idx;
};
/** @union ta_securedisplay_cmd_input
@@ -104,6 +116,8 @@ struct ta_securedisplay_send_roi_crc_input {
union ta_securedisplay_cmd_input {
/* send ROI and CRC input buffer format */
struct ta_securedisplay_send_roi_crc_input send_roi_crc;
+ /* send ROI and CRC input buffer format, v2 adds a ROI index */
+ struct ta_securedisplay_send_roi_crc_v2_input send_roi_crc_v2;
uint32_t reserved[4];
};
@@ -128,6 +142,10 @@ struct ta_securedisplay_send_roi_crc_output {
uint8_t reserved;
};
+struct ta_securedisplay_send_roi_crc_v2_output {
+ uint8_t i2c_buf[TA_SECUREDISPLAY_V2_I2C_BUFFER_SIZE]; /* I2C buffer */
+};
+
/** @union ta_securedisplay_cmd_output
* Output buffer
*/
@@ -136,19 +154,21 @@ union ta_securedisplay_cmd_output {
struct ta_securedisplay_query_ta_output query_ta;
/* Send ROI CRC output buffer format used only for validation purpose */
struct ta_securedisplay_send_roi_crc_output send_roi_crc;
+ /* Send ROI CRC output buffer format used only for validation purpose */
+ struct ta_securedisplay_send_roi_crc_v2_output send_roi_crc_v2;
uint32_t reserved[4];
};
-/** @struct securedisplay_cmd
- * Secure Display Command which is shared buffer memory
- */
-struct securedisplay_cmd {
- uint32_t cmd_id; /* +0 Bytes Command ID */
- enum ta_securedisplay_status status; /* +4 Bytes Status of Secure Display TA */
- uint32_t reserved[2]; /* +8 Bytes Reserved */
- union ta_securedisplay_cmd_input securedisplay_in_message; /* +16 Bytes Input Buffer */
- union ta_securedisplay_cmd_output securedisplay_out_message;/* +32 Bytes Output Buffer */
- /**@note Total 48 Bytes */
+/** @struct ta_securedisplay_cmd
+* Secure display command which is shared buffer memory
+*/
+struct ta_securedisplay_cmd {
+ uint32_t cmd_id; /**< +0 Bytes Command ID */
+ enum ta_securedisplay_status status; /**< +4 Bytes Status code returned by the secure display TA */
+ uint32_t reserved[2]; /**< +8 Bytes Reserved */
+ union ta_securedisplay_cmd_input securedisplay_in_message; /**< +16 Bytes Command input buffer */
+ union ta_securedisplay_cmd_output securedisplay_out_message; /**< +32 Bytes Command output buffer */
+ /**@note Total 48 Bytes */
};
#endif //_TA_SECUREDISPLAY_IF_H
diff --git a/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h
index da815a93d46e..d5748032674e 100644
--- a/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2018 Advanced Micro Devices, Inc.
+ * Copyright 2018-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -20,7 +20,6 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
-
#ifndef _TA_XGMI_IF_H
#define _TA_XGMI_IF_H
@@ -28,20 +27,31 @@
#define RSP_ID_MASK (1U << 31)
#define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK)
+#define EXTEND_PEER_LINK_INFO_CMD_FLAG 1
+
enum ta_command_xgmi {
+ /* Initialize the Context and Session Topology */
TA_COMMAND_XGMI__INITIALIZE = 0x00,
+ /* Gets the current GPU's node ID */
TA_COMMAND_XGMI__GET_NODE_ID = 0x01,
+ /* Gets the current GPU's hive ID */
TA_COMMAND_XGMI__GET_HIVE_ID = 0x02,
- TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO = 0x03,
+ /* Gets the Peer's topology Information */
+ TA_COMMAND_XGMI__GET_TOPOLOGY_INFO = 0x03,
+ /* Sets the Peer's topology Information */
TA_COMMAND_XGMI__SET_TOPOLOGY_INFO = 0x04,
- TA_COMMAND_XGMI__GET_PEER_LINKS = 0x0B
+ /* Gets the total links between adjacent peer dies in hive */
+ TA_COMMAND_XGMI__GET_PEER_LINKS = 0x0B,
+ /* Gets the total links and connected port numbers between adjacent peer dies in hive */
+ TA_COMMAND_XGMI__GET_EXTEND_PEER_LINKS = 0x0C
};
/* XGMI related enumerations */
/**********************************************************/;
-enum ta_xgmi_connected_nodes {
- TA_XGMI__MAX_CONNECTED_NODES = 64
-};
+enum { TA_XGMI__MAX_CONNECTED_NODES = 64 };
+enum { TA_XGMI__MAX_INTERNAL_STATE = 32 };
+enum { TA_XGMI__MAX_INTERNAL_STATE_BUFFER = 128 };
+enum { TA_XGMI__MAX_PORT_NUM = 8 };
enum ta_xgmi_status {
TA_XGMI_STATUS__SUCCESS = 0x00,
@@ -81,6 +91,18 @@ struct ta_xgmi_peer_link_info {
uint8_t num_links;
};
+struct xgmi_connected_port_num {
+ uint8_t dst_xgmi_port_num;
+ uint8_t src_xgmi_port_num;
+};
+
+/* support both the port num and num_links */
+struct ta_xgmi_extend_peer_link_info {
+ uint64_t node_id;
+ uint8_t num_links;
+ struct xgmi_connected_port_num port_num[TA_XGMI__MAX_PORT_NUM];
+};
+
struct ta_xgmi_cmd_initialize_output {
uint32_t status;
};
@@ -103,16 +125,21 @@ struct ta_xgmi_cmd_get_topology_info_output {
struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
};
-struct ta_xgmi_cmd_get_peer_link_info_output {
+struct ta_xgmi_cmd_set_topology_info_input {
uint32_t num_nodes;
- struct ta_xgmi_peer_link_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
+ struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
};
-struct ta_xgmi_cmd_set_topology_info_input {
+/* support XGMI TA w/ and w/o port_num both so two similar structs defined */
+struct ta_xgmi_cmd_get_peer_link_info {
uint32_t num_nodes;
- struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
+ struct ta_xgmi_peer_link_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
};
+struct ta_xgmi_cmd_get_extend_peer_link_info {
+ uint32_t num_nodes;
+ struct ta_xgmi_extend_peer_link_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
+};
/**********************************************************/
/* Common input structure for XGMI callbacks */
union ta_xgmi_cmd_input {
@@ -126,16 +153,23 @@ union ta_xgmi_cmd_output {
struct ta_xgmi_cmd_get_node_id_output get_node_id;
struct ta_xgmi_cmd_get_hive_id_output get_hive_id;
struct ta_xgmi_cmd_get_topology_info_output get_topology_info;
- struct ta_xgmi_cmd_get_peer_link_info_output get_link_info;
+ struct ta_xgmi_cmd_get_peer_link_info get_link_info;
+ struct ta_xgmi_cmd_get_extend_peer_link_info get_extend_link_info;
};
-/**********************************************************/
struct ta_xgmi_shared_memory {
uint32_t cmd_id;
uint32_t resp_id;
enum ta_xgmi_status xgmi_status;
+
+ /* if the number of xgmi link record is more than 128, driver will set the
+ * flag 0 to get the first 128 of the link records and will set to 1, to get
+ * the second set
+ */
uint8_t flag_extend_link_record;
- uint8_t reserved0[3];
+ /* bit0: port_num info support flag for GET_EXTEND_PEER_LINKS commmand */
+ uint8_t caps_flag;
+ uint8_t reserved[2];
union ta_xgmi_cmd_input xgmi_in_message;
union ta_xgmi_cmd_output xgmi_out_message;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
index b08905d1c00f..ee8038df17e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
@@ -159,6 +159,9 @@ static int tonga_ih_irq_init(struct amdgpu_device *adev)
/* enable interrupts */
tonga_ih_enable_interrupts(adev);
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
return 0;
}
@@ -196,6 +199,9 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev,
wptr = le32_to_cpu(*ih->wptr_cpu);
+ if (ih == &adev->irq.ih_soft)
+ goto out;
+
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
goto out;
@@ -219,6 +225,12 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32(mmIH_RB_CNTL, tmp);
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32(mmIH_RB_CNTL, tmp);
+
out:
return (wptr & ih->ptr_mask);
}
@@ -277,9 +289,9 @@ static void tonga_ih_set_rptr(struct amdgpu_device *adev,
}
}
-static int tonga_ih_early_init(void *handle)
+static int tonga_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = amdgpu_irq_add_domain(adev);
@@ -291,15 +303,19 @@ static int tonga_ih_early_init(void *handle)
return 0;
}
-static int tonga_ih_sw_init(void *handle)
+static int tonga_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, true);
if (r)
return r;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
+ if (r)
+ return r;
+
adev->irq.ih.use_doorbell = true;
adev->irq.ih.doorbell_index = adev->doorbell_index.ih;
@@ -308,9 +324,9 @@ static int tonga_ih_sw_init(void *handle)
return r;
}
-static int tonga_ih_sw_fini(void *handle)
+static int tonga_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
amdgpu_irq_remove_domain(adev);
@@ -318,10 +334,10 @@ static int tonga_ih_sw_fini(void *handle)
return 0;
}
-static int tonga_ih_hw_init(void *handle)
+static int tonga_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = tonga_ih_irq_init(adev);
if (r)
@@ -330,32 +346,26 @@ static int tonga_ih_hw_init(void *handle)
return 0;
}
-static int tonga_ih_hw_fini(void *handle)
+static int tonga_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- tonga_ih_irq_disable(adev);
+ tonga_ih_irq_disable(ip_block->adev);
return 0;
}
-static int tonga_ih_suspend(void *handle)
+static int tonga_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return tonga_ih_hw_fini(adev);
+ return tonga_ih_hw_fini(ip_block);
}
-static int tonga_ih_resume(void *handle)
+static int tonga_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return tonga_ih_hw_init(adev);
+ return tonga_ih_hw_init(ip_block);
}
-static bool tonga_ih_is_idle(void *handle)
+static bool tonga_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (REG_GET_FIELD(tmp, SRBM_STATUS, IH_BUSY))
@@ -364,11 +374,11 @@ static bool tonga_ih_is_idle(void *handle)
return true;
}
-static int tonga_ih_wait_for_idle(void *handle)
+static int tonga_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -380,9 +390,9 @@ static int tonga_ih_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static bool tonga_ih_check_soft_reset(void *handle)
+static bool tonga_ih_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
u32 tmp = RREG32(mmSRBM_STATUS);
@@ -399,29 +409,27 @@ static bool tonga_ih_check_soft_reset(void *handle)
}
}
-static int tonga_ih_pre_soft_reset(void *handle)
+static int tonga_ih_pre_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (!adev->irq.srbm_soft_reset)
+ if (!ip_block->adev->irq.srbm_soft_reset)
return 0;
- return tonga_ih_hw_fini(adev);
+ return tonga_ih_hw_fini(ip_block);
}
-static int tonga_ih_post_soft_reset(void *handle)
+static int tonga_ih_post_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->irq.srbm_soft_reset)
return 0;
- return tonga_ih_hw_init(adev);
+ return tonga_ih_hw_init(ip_block);
}
-static int tonga_ih_soft_reset(void *handle)
+static int tonga_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset;
if (!adev->irq.srbm_soft_reset)
@@ -450,13 +458,13 @@ static int tonga_ih_soft_reset(void *handle)
return 0;
}
-static int tonga_ih_set_clockgating_state(void *handle,
+static int tonga_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int tonga_ih_set_powergating_state(void *handle,
+static int tonga_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -465,7 +473,6 @@ static int tonga_ih_set_powergating_state(void *handle,
static const struct amd_ip_funcs tonga_ih_ip_funcs = {
.name = "tonga_ih",
.early_init = tonga_ih_early_init,
- .late_init = NULL,
.sw_init = tonga_ih_sw_init,
.sw_fini = tonga_ih_sw_fini,
.hw_init = tonga_ih_hw_init,
@@ -493,8 +500,7 @@ static void tonga_ih_set_interrupt_funcs(struct amdgpu_device *adev)
adev->irq.ih_funcs = &tonga_ih_funcs;
}
-const struct amdgpu_ip_block_version tonga_ih_ip_block =
-{
+const struct amdgpu_ip_block_version tonga_ih_ip_block = {
.type = AMD_IP_BLOCK_TYPE_IH,
.major = 3,
.minor = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
new file mode 100644
index 000000000000..0f5b1719fda5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -0,0 +1,742 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "umc_v12_0.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_umc.h"
+#include "amdgpu.h"
+#include "umc/umc_12_0_0_offset.h"
+#include "umc/umc_12_0_0_sh_mask.h"
+#include "mp/mp_13_0_6_sh_mask.h"
+
+#define MAX_ECC_NUM_PER_RETIREMENT 32
+#define DELAYED_TIME_FOR_GPU_RESET 1000 //ms
+
+static inline uint64_t get_umc_v12_0_reg_offset(struct amdgpu_device *adev,
+ uint32_t node_inst,
+ uint32_t umc_inst,
+ uint32_t ch_inst)
+{
+ uint32_t index = umc_inst * adev->umc.channel_inst_num + ch_inst;
+ uint64_t cross_node_offset = (node_inst == 0) ? 0 : UMC_V12_0_CROSS_NODE_OFFSET;
+
+ umc_inst = index / 4;
+ ch_inst = index % 4;
+
+ return adev->umc.channel_offs * ch_inst + UMC_V12_0_INST_DIST * umc_inst +
+ UMC_V12_0_NODE_DIST * node_inst + cross_node_offset;
+}
+
+static int umc_v12_0_reset_error_count_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint64_t odecc_err_cnt_addr;
+ uint64_t umc_reg_offset =
+ get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst);
+
+ odecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_OdEccErrCnt);
+
+ /* clear error count */
+ WREG32_PCIE_EXT((odecc_err_cnt_addr + umc_reg_offset) * 4,
+ UMC_V12_0_CE_CNT_INIT);
+
+ return 0;
+}
+
+static void umc_v12_0_reset_error_count(struct amdgpu_device *adev)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v12_0_reset_error_count_per_channel, NULL);
+}
+
+bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status)
+{
+ dev_dbg(adev->dev,
+ "MCA_UMC_STATUS(0x%llx): Val:%llu, Poison:%llu, Deferred:%llu, PCC:%llu, UC:%llu, TCC:%llu\n",
+ mc_umc_status,
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val),
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Poison),
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred),
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC),
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC),
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC)
+ );
+
+ return (amdgpu_ras_is_poison_mode_supported(adev) &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1) ||
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Poison) == 1)));
+}
+
+bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status)
+{
+ if (umc_v12_0_is_deferred_error(adev, mc_umc_status))
+ return false;
+
+ return ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1));
+}
+
+bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status)
+{
+ if (umc_v12_0_is_deferred_error(adev, mc_umc_status))
+ return false;
+
+ return (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1 ||
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 0) ||
+ /* Identify data parity error in replay mode */
+ ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0x5 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0xb) &&
+ !(umc_v12_0_is_uncorrectable_error(adev, mc_umc_status)))));
+}
+
+static void umc_v12_0_query_error_count_per_type(struct amdgpu_device *adev,
+ uint64_t umc_reg_offset,
+ unsigned long *error_count,
+ check_error_type_func error_type_func)
+{
+ uint64_t mc_umc_status;
+ uint64_t mc_umc_status_addr;
+
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
+
+ /* Check MCUMC_STATUS */
+ mc_umc_status =
+ RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4);
+
+ if (error_type_func(adev, mc_umc_status))
+ *error_count += 1;
+}
+
+static int umc_v12_0_query_error_count(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+ unsigned long ue_count = 0, ce_count = 0, de_count = 0;
+
+ /* NOTE: node_inst is converted by adev->umc.active_mask and the range is [0-3],
+ * which can be used as die ID directly */
+ struct amdgpu_smuio_mcm_config_info mcm_info = {
+ .socket_id = adev->smuio.funcs->get_socket_id(adev),
+ .die_id = node_inst,
+ };
+
+ uint64_t umc_reg_offset =
+ get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst);
+
+ umc_v12_0_query_error_count_per_type(adev, umc_reg_offset,
+ &ce_count, umc_v12_0_is_correctable_error);
+ umc_v12_0_query_error_count_per_type(adev, umc_reg_offset,
+ &ue_count, umc_v12_0_is_uncorrectable_error);
+ umc_v12_0_query_error_count_per_type(adev, umc_reg_offset,
+ &de_count, umc_v12_0_is_deferred_error);
+
+ amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count);
+ amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count);
+ amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, de_count);
+
+ return 0;
+}
+
+static void umc_v12_0_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v12_0_query_error_count, ras_error_status);
+
+ umc_v12_0_reset_error_count(adev);
+}
+
+static void umc_v12_0_get_retire_flip_bits(struct amdgpu_device *adev)
+{
+ enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE;
+ uint32_t vram_type = adev->gmc.vram_type;
+ struct amdgpu_umc_flip_bits *flip_bits = &(adev->umc.flip_bits);
+
+ if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+ nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+
+ /* default setting */
+ flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_C2_BIT;
+ flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C3_BIT;
+ flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_C4_BIT;
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R13_BIT;
+ flip_bits->flip_row_bit = 13;
+ flip_bits->bit_num = 4;
+ flip_bits->r13_in_pa = UMC_V12_0_PA_R13_BIT;
+
+ if (nps == AMDGPU_NPS2_PARTITION_MODE) {
+ flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH5_BIT;
+ flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C2_BIT;
+ flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B1_BIT;
+ flip_bits->r13_in_pa = UMC_V12_0_PA_R12_BIT;
+ } else if (nps == AMDGPU_NPS4_PARTITION_MODE) {
+ flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH4_BIT;
+ flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_CH5_BIT;
+ flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B0_BIT;
+ flip_bits->r13_in_pa = UMC_V12_0_PA_R11_BIT;
+ }
+
+ switch (vram_type) {
+ case AMDGPU_VRAM_TYPE_HBM:
+ /* other nps modes are taken as nps1 */
+ if (nps == AMDGPU_NPS2_PARTITION_MODE)
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT;
+ else if (nps == AMDGPU_NPS4_PARTITION_MODE)
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT;
+
+ break;
+ case AMDGPU_VRAM_TYPE_HBM3E:
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT;
+ flip_bits->flip_row_bit = 12;
+
+ if (nps == AMDGPU_NPS2_PARTITION_MODE)
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT;
+ else if (nps == AMDGPU_NPS4_PARTITION_MODE)
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R10_BIT;
+
+ break;
+ default:
+ dev_warn(adev->dev,
+ "Unknown HBM type, set RAS retire flip bits to the value in NPS1 mode.\n");
+ break;
+ }
+
+ adev->umc.retire_unit = 0x1 << flip_bits->bit_num;
+}
+
+static int umc_v12_0_convert_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data,
+ struct ta_ras_query_address_input *addr_in,
+ struct ta_ras_query_address_output *addr_out,
+ bool dump_addr)
+{
+ uint32_t col, col_lower, row, row_lower, row_high, bank;
+ uint32_t channel_index = 0, umc_inst = 0;
+ uint32_t i, bit_num, retire_unit, *flip_bits;
+ uint64_t soc_pa, column, err_addr;
+ struct ta_ras_query_address_output addr_out_tmp;
+ struct ta_ras_query_address_output *paddr_out;
+ int ret = 0;
+
+ if (!addr_out)
+ paddr_out = &addr_out_tmp;
+ else
+ paddr_out = addr_out;
+
+ err_addr = bank = 0;
+ if (addr_in) {
+ err_addr = addr_in->ma.err_addr;
+ addr_in->addr_type = TA_RAS_MCA_TO_PA;
+ ret = psp_ras_query_address(&adev->psp, addr_in, paddr_out);
+ if (ret) {
+ dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx",
+ err_addr);
+
+ goto out;
+ }
+
+ bank = paddr_out->pa.bank;
+ /* no need to care about umc inst if addr_in is NULL */
+ umc_inst = addr_in->ma.umc_inst;
+ }
+
+ flip_bits = adev->umc.flip_bits.flip_bits_in_pa;
+ bit_num = adev->umc.flip_bits.bit_num;
+ retire_unit = adev->umc.retire_unit;
+
+ soc_pa = paddr_out->pa.pa;
+ channel_index = paddr_out->pa.channel_idx;
+ /* clear loop bits in soc physical address */
+ for (i = 0; i < bit_num; i++)
+ soc_pa &= ~BIT_ULL(flip_bits[i]);
+
+ paddr_out->pa.pa = soc_pa;
+ /* get column bit 0 and 1 in mca address */
+ col_lower = (err_addr >> 1) & 0x3ULL;
+ /* extra row bit will be handled later */
+ row_lower = (err_addr >> UMC_V12_0_MA_R0_BIT) & 0x1fffULL;
+ row_lower &= ~BIT_ULL(adev->umc.flip_bits.flip_row_bit);
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 5, 0)) {
+ row_high = (soc_pa >> adev->umc.flip_bits.r13_in_pa) & 0x3ULL;
+ /* it's 2.25GB in each channel, from MCA address to PA
+ * [R14 R13] is converted if the two bits value are 0x3,
+ * get them from PA instead of MCA address.
+ */
+ row_lower |= (row_high << 13);
+ }
+
+ if (!err_data && !dump_addr)
+ goto out;
+
+ /* loop for all possibilities of retired bits */
+ for (column = 0; column < retire_unit; column++) {
+ soc_pa = paddr_out->pa.pa;
+ for (i = 0; i < bit_num; i++)
+ soc_pa |= (((column >> i) & 0x1ULL) << flip_bits[i]);
+
+ col = ((column & 0x7) << 2) | col_lower;
+ /* handle extra row bit */
+ if (bit_num == RETIRE_FLIP_BITS_NUM)
+ row = ((column >> 3) << adev->umc.flip_bits.flip_row_bit) |
+ row_lower;
+
+ if (dump_addr)
+ dev_info(adev->dev,
+ "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n",
+ soc_pa, row, col, bank, channel_index);
+
+ if (err_data)
+ amdgpu_umc_fill_error_record(err_data, err_addr,
+ soc_pa, channel_index, umc_inst);
+ }
+
+out:
+ return ret;
+}
+
+static int umc_v12_0_query_error_address(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+ struct ta_ras_query_address_input addr_in;
+ uint64_t mc_umc_status_addr;
+ uint64_t mc_umc_status, err_addr;
+ uint64_t mc_umc_addrt0;
+ uint64_t umc_reg_offset =
+ get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst);
+
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
+
+ mc_umc_status = RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4);
+
+ if (mc_umc_status == 0)
+ return 0;
+
+ if (!err_data->err_addr) {
+ /* clear umc status */
+ WREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
+
+ return 0;
+ }
+
+ /* calculate error address if ue error is detected */
+ if (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status) ||
+ umc_v12_0_is_deferred_error(adev, mc_umc_status)) {
+ mc_umc_addrt0 =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0);
+
+ err_addr = RREG64_PCIE_EXT((mc_umc_addrt0 + umc_reg_offset) * 4);
+
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+ if (!adev->aid_mask &&
+ adev->smuio.funcs &&
+ adev->smuio.funcs->get_socket_id)
+ addr_in.ma.socket_id = adev->smuio.funcs->get_socket_id(adev);
+ else
+ addr_in.ma.socket_id = 0;
+
+ addr_in.ma.err_addr = err_addr;
+ addr_in.ma.ch_inst = ch_inst;
+ addr_in.ma.umc_inst = umc_inst;
+ addr_in.ma.node_inst = node_inst;
+
+ umc_v12_0_convert_error_address(adev, err_data, &addr_in, NULL, true);
+ }
+
+ /* clear umc status */
+ WREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
+
+ return 0;
+}
+
+static void umc_v12_0_query_ras_error_address(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v12_0_query_error_address, ras_error_status);
+}
+
+static int umc_v12_0_err_cnt_init_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint32_t odecc_cnt_sel;
+ uint64_t odecc_cnt_sel_addr, odecc_err_cnt_addr;
+ uint64_t umc_reg_offset =
+ get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst);
+
+ odecc_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_OdEccCntSel);
+ odecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_OdEccErrCnt);
+
+ odecc_cnt_sel = RREG32_PCIE_EXT((odecc_cnt_sel_addr + umc_reg_offset) * 4);
+
+ /* set ce error interrupt type to APIC based interrupt */
+ odecc_cnt_sel = REG_SET_FIELD(odecc_cnt_sel, UMCCH0_OdEccCntSel,
+ OdEccErrInt, 0x1);
+ WREG32_PCIE_EXT((odecc_cnt_sel_addr + umc_reg_offset) * 4, odecc_cnt_sel);
+
+ /* set error count to initial value */
+ WREG32_PCIE_EXT((odecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V12_0_CE_CNT_INIT);
+
+ return 0;
+}
+
+static bool umc_v12_0_check_ecc_err_status(struct amdgpu_device *adev,
+ enum amdgpu_mca_error_type type, void *ras_error_status)
+{
+ uint64_t mc_umc_status = *(uint64_t *)ras_error_status;
+
+ switch (type) {
+ case AMDGPU_MCA_ERROR_TYPE_UE:
+ return umc_v12_0_is_uncorrectable_error(adev, mc_umc_status);
+ case AMDGPU_MCA_ERROR_TYPE_CE:
+ return umc_v12_0_is_correctable_error(adev, mc_umc_status);
+ case AMDGPU_MCA_ERROR_TYPE_DE:
+ return umc_v12_0_is_deferred_error(adev, mc_umc_status);
+ default:
+ return false;
+ }
+
+ return false;
+}
+
+static void umc_v12_0_err_cnt_init(struct amdgpu_device *adev)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v12_0_err_cnt_init_per_channel, NULL);
+}
+
+static bool umc_v12_0_query_ras_poison_mode(struct amdgpu_device *adev)
+{
+ /*
+ * Force return true, because regUMCCH0_EccCtrl
+ * is not accessible from host side
+ */
+ return true;
+}
+
+const struct amdgpu_ras_block_hw_ops umc_v12_0_ras_hw_ops = {
+ .query_ras_error_count = umc_v12_0_query_ras_error_count,
+ .query_ras_error_address = umc_v12_0_query_ras_error_address,
+};
+
+static int umc_v12_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct amdgpu_device *adev = handle->adev;
+ struct aca_bank_info info;
+ enum aca_error_type err_type;
+ u64 status, count;
+ u32 ext_error_code;
+ int ret;
+
+ status = bank->regs[ACA_REG_IDX_STATUS];
+ if (umc_v12_0_is_deferred_error(adev, status))
+ err_type = ACA_ERROR_TYPE_DEFERRED;
+ else if (umc_v12_0_is_uncorrectable_error(adev, status))
+ err_type = ACA_ERROR_TYPE_UE;
+ else if (umc_v12_0_is_correctable_error(adev, status))
+ err_type = ACA_ERROR_TYPE_CE;
+ else
+ return 0;
+ bank->aca_err_type = err_type;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ amdgpu_umc_update_ecc_status(adev,
+ bank->regs[ACA_REG_IDX_STATUS],
+ bank->regs[ACA_REG_IDX_IPID],
+ bank->regs[ACA_REG_IDX_ADDR]);
+
+ ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status);
+ if (umc_v12_0_is_deferred_error(adev, status))
+ count = ext_error_code == 0 ?
+ adev->umc.err_addr_cnt / adev->umc.retire_unit : 1ULL;
+ else
+ count = ext_error_code == 0 ?
+ ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]) : 1ULL;
+
+ return aca_error_cache_log_bank_error(handle, &info, err_type, count);
+}
+
+static const struct aca_bank_ops umc_v12_0_aca_bank_ops = {
+ .aca_bank_parser = umc_v12_0_aca_bank_parser,
+};
+
+const struct aca_info umc_v12_0_aca_info = {
+ .hwip = ACA_HWIP_TYPE_UMC,
+ .mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK | ACA_ERROR_DEFERRED_MASK,
+ .bank_ops = &umc_v12_0_aca_bank_ops,
+};
+
+static int umc_v12_0_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int ret;
+
+ ret = amdgpu_umc_ras_late_init(adev, ras_block);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__UMC,
+ &umc_v12_0_aca_info, NULL);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev,
+ uint64_t status, uint64_t ipid, uint64_t addr)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ uint16_t hwid, mcatype;
+ uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL];
+ uint64_t err_addr, pa_addr = 0;
+ struct ras_ecc_err *ecc_err;
+ struct ta_ras_query_address_output addr_out;
+ uint32_t shift_bit = adev->umc.flip_bits.flip_bits_in_pa[2];
+ int count, ret, i;
+
+ hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID);
+ mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType);
+
+ /* The IP block decode of consumption is SMU */
+ if (hwid != MCA_UMC_HWID_V12_0 || mcatype != MCA_UMC_MCATYPE_V12_0) {
+ con->umc_ecc_log.consumption_q_count++;
+ return 0;
+ }
+
+ if (!status)
+ return 0;
+
+ if (!umc_v12_0_is_deferred_error(adev, status))
+ return 0;
+
+ err_addr = REG_GET_FIELD(addr,
+ MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+ dev_dbg(adev->dev,
+ "UMC:IPID:0x%llx, socket:%llu, aid:%llu, inst:%llu, ch:%llu, err_addr:0x%llx\n",
+ ipid,
+ MCA_IPID_2_SOCKET_ID(ipid),
+ MCA_IPID_2_DIE_ID(ipid),
+ MCA_IPID_2_UMC_INST(ipid),
+ MCA_IPID_2_UMC_CH(ipid),
+ err_addr);
+
+ ret = amdgpu_umc_mca_to_addr(adev,
+ err_addr, MCA_IPID_2_UMC_CH(ipid),
+ MCA_IPID_2_UMC_INST(ipid), MCA_IPID_2_DIE_ID(ipid),
+ MCA_IPID_2_SOCKET_ID(ipid), &addr_out, true);
+ if (ret)
+ return ret;
+
+ ecc_err = kzalloc(sizeof(*ecc_err), GFP_KERNEL);
+ if (!ecc_err)
+ return -ENOMEM;
+
+ pa_addr = addr_out.pa.pa;
+ ecc_err->status = status;
+ ecc_err->ipid = ipid;
+ ecc_err->addr = addr;
+ ecc_err->pa_pfn = pa_addr >> AMDGPU_GPU_PAGE_SHIFT;
+ ecc_err->channel_idx = addr_out.pa.channel_idx;
+
+ /* If converted pa_pfn is 0, use pa C4 pfn. */
+ if (!ecc_err->pa_pfn)
+ ecc_err->pa_pfn = BIT_ULL(shift_bit) >> AMDGPU_GPU_PAGE_SHIFT;
+
+ ret = amdgpu_umc_logs_ecc_err(adev, &con->umc_ecc_log.de_page_tree, ecc_err);
+ if (ret) {
+ if (ret == -EEXIST)
+ con->umc_ecc_log.de_queried_count++;
+ else
+ dev_err(adev->dev, "Fail to log ecc error! ret:%d\n", ret);
+
+ kfree(ecc_err);
+ return ret;
+ }
+
+ con->umc_ecc_log.de_queried_count++;
+
+ memset(page_pfn, 0, sizeof(page_pfn));
+ count = amdgpu_umc_lookup_bad_pages_in_a_row(adev,
+ pa_addr,
+ page_pfn, ARRAY_SIZE(page_pfn));
+ if (count <= 0) {
+ dev_warn(adev->dev, "Fail to convert error address! count:%d\n", count);
+ return 0;
+ }
+
+ /* Reserve memory */
+ for (i = 0; i < count; i++)
+ amdgpu_ras_reserve_page(adev, page_pfn[i]);
+
+ /* The problem case is as follows:
+ * 1. GPU A triggers a gpu ras reset, and GPU A drives
+ * GPU B to also perform a gpu ras reset.
+ * 2. After gpu B ras reset started, gpu B queried a DE
+ * data. Since the DE data was queried in the ras reset
+ * thread instead of the page retirement thread, bad
+ * page retirement work would not be triggered. Then
+ * even if all gpu resets are completed, the bad pages
+ * will be cached in RAM until GPU B's bad page retirement
+ * work is triggered again and then saved to eeprom.
+ * Trigger delayed work to save the bad pages to eeprom in time
+ * after gpu ras reset is completed.
+ */
+ if (amdgpu_ras_in_recovery(adev))
+ schedule_delayed_work(&con->page_retirement_dwork,
+ msecs_to_jiffies(DELAYED_TIME_FOR_GPU_RESET));
+
+ return 0;
+}
+
+static int umc_v12_0_fill_error_record(struct amdgpu_device *adev,
+ struct ras_ecc_err *ecc_err, void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL];
+ int ret, i, count;
+
+ if (!err_data || !ecc_err)
+ return -EINVAL;
+
+ memset(page_pfn, 0, sizeof(page_pfn));
+ count = amdgpu_umc_lookup_bad_pages_in_a_row(adev,
+ ecc_err->pa_pfn << AMDGPU_GPU_PAGE_SHIFT,
+ page_pfn, ARRAY_SIZE(page_pfn));
+
+ for (i = 0; i < count; i++) {
+ ret = amdgpu_umc_fill_error_record(err_data,
+ ecc_err->addr,
+ page_pfn[i] << AMDGPU_GPU_PAGE_SHIFT,
+ ecc_err->channel_idx,
+ MCA_IPID_2_UMC_INST(ecc_err->ipid));
+ if (ret)
+ break;
+ }
+
+ err_data->de_count++;
+
+ return ret;
+}
+
+static void umc_v12_0_query_ras_ecc_err_addr(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_ecc_err *entries[MAX_ECC_NUM_PER_RETIREMENT];
+ struct radix_tree_root *ecc_tree;
+ int new_detected, ret, i;
+
+ ecc_tree = &con->umc_ecc_log.de_page_tree;
+
+ mutex_lock(&con->umc_ecc_log.lock);
+ new_detected = radix_tree_gang_lookup_tag(ecc_tree, (void **)entries,
+ 0, ARRAY_SIZE(entries), UMC_ECC_NEW_DETECTED_TAG);
+ for (i = 0; i < new_detected; i++) {
+ if (!entries[i])
+ continue;
+
+ ret = umc_v12_0_fill_error_record(adev, entries[i], ras_error_status);
+ if (ret) {
+ dev_err(adev->dev, "Fail to fill umc error record, ret:%d\n", ret);
+ break;
+ }
+ radix_tree_tag_clear(ecc_tree,
+ entries[i]->pa_pfn, UMC_ECC_NEW_DETECTED_TAG);
+ }
+ mutex_unlock(&con->umc_ecc_log.lock);
+}
+
+static uint32_t umc_v12_0_get_die_id(struct amdgpu_device *adev,
+ uint64_t mca_addr, uint64_t retired_page)
+{
+ uint32_t die = 0;
+
+ /* we only calculate die id for nps1 mode right now */
+ die += ((((retired_page >> 12) & 0x1ULL)^
+ ((retired_page >> 20) & 0x1ULL) ^
+ ((retired_page >> 27) & 0x1ULL) ^
+ ((retired_page >> 34) & 0x1ULL) ^
+ ((retired_page >> 41) & 0x1ULL)) << 0);
+
+ /* the original PA_C4 and PA_R13 may be cleared in retired_page, so
+ * get them from mca_addr.
+ */
+ die += ((((retired_page >> 13) & 0x1ULL) ^
+ ((mca_addr >> 5) & 0x1ULL) ^
+ ((retired_page >> 28) & 0x1ULL) ^
+ ((mca_addr >> 23) & 0x1ULL) ^
+ ((retired_page >> 42) & 0x1ULL)) << 1);
+ die &= 3;
+
+ return die;
+}
+
+static void umc_v12_0_mca_ipid_parse(struct amdgpu_device *adev, uint64_t ipid,
+ uint32_t *did, uint32_t *ch, uint32_t *umc_inst, uint32_t *sid)
+{
+ if (did)
+ *did = MCA_IPID_2_DIE_ID(ipid);
+ if (ch)
+ *ch = MCA_IPID_2_UMC_CH(ipid);
+ if (umc_inst)
+ *umc_inst = MCA_IPID_2_UMC_INST(ipid);
+ if (sid)
+ *sid = MCA_IPID_2_SOCKET_ID(ipid);
+}
+
+struct amdgpu_umc_ras umc_v12_0_ras = {
+ .ras_block = {
+ .hw_ops = &umc_v12_0_ras_hw_ops,
+ .ras_late_init = umc_v12_0_ras_late_init,
+ },
+ .err_cnt_init = umc_v12_0_err_cnt_init,
+ .query_ras_poison_mode = umc_v12_0_query_ras_poison_mode,
+ .ecc_info_query_ras_error_address = umc_v12_0_query_ras_ecc_err_addr,
+ .check_ecc_err_status = umc_v12_0_check_ecc_err_status,
+ .update_ecc_status = umc_v12_0_update_ecc_status,
+ .convert_ras_err_addr = umc_v12_0_convert_error_address,
+ .get_die_id_from_pa = umc_v12_0_get_die_id,
+ .get_retire_flip_bits = umc_v12_0_get_retire_flip_bits,
+ .mca_ipid_parse = umc_v12_0_mca_ipid_parse,
+};
+
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
new file mode 100644
index 000000000000..63b7e7254526
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __UMC_V12_0_H__
+#define __UMC_V12_0_H__
+
+#include "soc15_common.h"
+#include "amdgpu.h"
+
+#define UMC_V12_0_NODE_DIST 0x40000000
+#define UMC_V12_0_INST_DIST 0x40000
+
+/* UMC register per channel offset */
+#define UMC_V12_0_PER_CHANNEL_OFFSET 0x400
+
+/* UMC cross node offset */
+#define UMC_V12_0_CROSS_NODE_OFFSET 0x100000000
+
+/* OdEccErrCnt max value */
+#define UMC_V12_0_CE_CNT_MAX 0xffff
+/* umc ce interrupt threshold */
+#define UMC_V12_0_CE_INT_THRESHOLD 0xffff
+/* umc ce count initial value */
+#define UMC_V12_0_CE_CNT_INIT (UMC_V12_0_CE_CNT_MAX - UMC_V12_0_CE_INT_THRESHOLD)
+
+/* number of umc channel instance with memory map register access */
+#define UMC_V12_0_CHANNEL_INSTANCE_NUM 8
+/* number of umc instance with memory map register access */
+#define UMC_V12_0_UMC_INSTANCE_NUM 4
+
+/* Total channel instances for all available umc nodes */
+#define UMC_V12_0_TOTAL_CHANNEL_NUM(adev) \
+ (UMC_V12_0_CHANNEL_INSTANCE_NUM * (adev)->gmc.num_umc)
+
+/* one piece of normalized address is mapped to 8 pieces of physical address */
+#define UMC_V12_0_NA_MAP_PA_NUM 8
+/* R13 bit shift should be considered, double the number */
+#define UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL (UMC_V12_0_NA_MAP_PA_NUM * 2)
+
+/* column bits in SOC physical address */
+#define UMC_V12_0_PA_C2_BIT 15
+#define UMC_V12_0_PA_C3_BIT 16
+#define UMC_V12_0_PA_C4_BIT 21
+/* row bits in SOC physical address */
+#define UMC_V12_0_PA_R0_BIT 22
+#define UMC_V12_0_PA_R10_BIT 32
+#define UMC_V12_0_PA_R11_BIT 33
+#define UMC_V12_0_PA_R12_BIT 34
+#define UMC_V12_0_PA_R13_BIT 35
+/* channel bit in SOC physical address */
+#define UMC_V12_0_PA_CH4_BIT 12
+#define UMC_V12_0_PA_CH5_BIT 13
+/* bank bit in SOC physical address */
+#define UMC_V12_0_PA_B0_BIT 19
+#define UMC_V12_0_PA_B1_BIT 20
+/* row bits in MCA address */
+#define UMC_V12_0_MA_R0_BIT 10
+
+#define MCA_UMC_HWID_V12_0 0x96
+#define MCA_UMC_MCATYPE_V12_0 0x0
+
+#define MCA_IPID_LO_2_UMC_CH(_ipid_lo) (((((_ipid_lo) >> 20) & 0x1) * 4) + \
+ (((_ipid_lo) >> 12) & 0xF))
+#define MCA_IPID_LO_2_UMC_INST(_ipid_lo) (((_ipid_lo) >> 21) & 0x7)
+
+#define MCA_IPID_2_DIE_ID(ipid) ((REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi) >> 2) & 0x03)
+
+#define MCA_IPID_2_UMC_CH(ipid) \
+ (MCA_IPID_LO_2_UMC_CH(REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo)))
+
+#define MCA_IPID_2_UMC_INST(ipid) \
+ (MCA_IPID_LO_2_UMC_INST(REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo)))
+
+#define MCA_IPID_2_SOCKET_ID(ipid) \
+ (((REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo) & 0x1) << 2) | \
+ (REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi) & 0x03))
+
+bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
+bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
+bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
+
+typedef bool (*check_error_type_func)(struct amdgpu_device *adev, uint64_t mc_umc_status);
+
+extern struct amdgpu_umc_ras umc_v12_0_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c
index 0d6b50528d76..97fa88ed770c 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c
@@ -25,7 +25,7 @@
static void umc_v6_0_init_registers(struct amdgpu_device *adev)
{
- unsigned i,j;
+ unsigned i, j;
for (i = 0; i < 4; i++)
for (j = 0; j < 4; j++)
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
index 20b44983ac94..f17d297b594b 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
@@ -300,7 +300,6 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
{
uint32_t lsb, mc_umc_status_addr;
uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
- struct eeprom_table_record *err_rec;
uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
if (adev->asic_type == CHIP_ARCTURUS) {
@@ -328,12 +327,9 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
return;
}
- err_rec = &err_data->err_addr[err_data->err_addr_cnt];
-
- /* calculate error address if ue/ce error is detected */
+ /* calculate error address if ue error is detected */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
- (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
/* the lowest lsb bits should be ignored */
@@ -346,20 +342,8 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
ADDR_OF_256B_BLOCK(channel_index) |
OFFSET_IN_256B_BLOCK(err_addr);
- /* we only save ue error information currently, ce is skipped */
- if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
- == 1) {
- err_rec->address = err_addr;
- /* page frame address is saved */
- err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
- err_rec->ts = (uint64_t)ktime_get_real_seconds();
- err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
- err_rec->cu = 0;
- err_rec->mem_channel = channel_index;
- err_rec->mcumc_id = umc_inst;
-
- err_data->err_addr_cnt++;
- }
+ amdgpu_umc_fill_error_record(err_data, err_addr,
+ retired_page, channel_index, umc_inst);
}
/* clear umc status */
@@ -465,10 +449,14 @@ static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev)
umc_v6_1_enable_umc_index_mode(adev);
}
-const struct amdgpu_umc_ras_funcs umc_v6_1_ras_funcs = {
- .err_cnt_init = umc_v6_1_err_cnt_init,
- .ras_late_init = amdgpu_umc_ras_late_init,
- .ras_fini = amdgpu_umc_ras_fini,
+const struct amdgpu_ras_block_hw_ops umc_v6_1_ras_hw_ops = {
.query_ras_error_count = umc_v6_1_query_ras_error_count,
.query_ras_error_address = umc_v6_1_query_ras_error_address,
};
+
+struct amdgpu_umc_ras umc_v6_1_ras = {
+ .ras_block = {
+ .hw_ops = &umc_v6_1_ras_hw_ops,
+ },
+ .err_cnt_init = umc_v6_1_err_cnt_init,
+}; \ No newline at end of file
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h
index 5dc36c730bb2..50c632eb4cc6 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h
@@ -45,7 +45,7 @@
/* umc ce count initial value */
#define UMC_V6_1_CE_CNT_INIT (UMC_V6_1_CE_CNT_MAX - UMC_V6_1_CE_INT_THRESHOLD)
-extern const struct amdgpu_umc_ras_funcs umc_v6_1_ras_funcs;
+extern struct amdgpu_umc_ras umc_v6_1_ras;
extern const uint32_t
umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM];
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
index f7ec3fe134e5..a3ee3c4c650f 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
@@ -47,12 +47,222 @@ static inline uint32_t get_umc_v6_7_reg_offset(struct amdgpu_device *adev,
uint32_t umc_inst,
uint32_t ch_inst)
{
+ uint32_t index = umc_inst * adev->umc.channel_inst_num + ch_inst;
+
+ /* adjust umc and channel index offset,
+ * the register address is not linear on each umc instace */
+ umc_inst = index / 4;
+ ch_inst = index % 4;
+
return adev->umc.channel_offs * ch_inst + UMC_V6_7_INST_DIST * umc_inst;
}
+static void umc_v6_7_query_error_status_helper(struct amdgpu_device *adev,
+ uint64_t mc_umc_status, uint32_t umc_reg_offset)
+{
+ uint32_t mc_umc_addr;
+ uint64_t reg_value;
+
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)
+ dev_info(adev->dev, "Deferred error\n");
+
+ if (mc_umc_status)
+ dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset);
+
+ /* print IPID registers value */
+ mc_umc_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0);
+ reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
+ if (reg_value)
+ dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
+
+ /* print SYND registers value */
+ mc_umc_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0);
+ reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
+ if (reg_value)
+ dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
+
+ /* print MISC0 registers value */
+ mc_umc_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0);
+ reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
+ if (reg_value)
+ dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
+}
+
+static void umc_v6_7_ecc_info_query_correctable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_inst, uint32_t ch_inst,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t eccinfo_table_idx;
+ uint32_t umc_reg_offset;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ umc_reg_offset = get_umc_v6_7_reg_offset(adev,
+ umc_inst, ch_inst);
+
+ eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
+ /* check for SRAM correctable error
+ MCUMC_STATUS is a 64 bit register */
+ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) {
+ *error_count += 1;
+
+ umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
+
+ if (ras->umc_ecc.record_ce_addr_supported) {
+ uint64_t err_addr, soc_pa;
+ uint32_t channel_index =
+ adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+
+ err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_ceumc_addr;
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+ /* translate umc channel address to soc pa, 3 parts are included */
+ soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
+ ADDR_OF_256B_BLOCK(channel_index) |
+ OFFSET_IN_256B_BLOCK(err_addr);
+
+ /* The umc channel bits are not original values, they are hashed */
+ SET_CHANNEL_HASH(channel_index, soc_pa);
+
+ dev_info(adev->dev, "Error Address(PA): 0x%llx\n", soc_pa);
+ }
+ }
+}
+
+static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_inst, uint32_t ch_inst,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t eccinfo_table_idx;
+ uint32_t umc_reg_offset;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ umc_reg_offset = get_umc_v6_7_reg_offset(adev,
+ umc_inst, ch_inst);
+
+ eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
+ /* check the MCUMC_STATUS */
+ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+ if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) {
+ *error_count += 1;
+
+ umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
+ }
+}
+
+static int umc_v6_7_ecc_info_querry_ecc_error_count(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+
+ umc_v6_7_ecc_info_query_correctable_error_count(adev,
+ umc_inst, ch_inst,
+ &(err_data->ce_count));
+
+ umc_v6_7_ecc_info_querry_uncorrectable_error_count(adev,
+ umc_inst, ch_inst,
+ &(err_data->ue_count));
+
+ return 0;
+}
+
+static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v6_7_ecc_info_querry_ecc_error_count, ras_error_status);
+}
+
+void umc_v6_7_convert_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t err_addr,
+ uint32_t ch_inst, uint32_t umc_inst)
+{
+ uint32_t channel_index;
+ uint64_t soc_pa, retired_page, column;
+
+ channel_index =
+ adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+ /* translate umc channel address to soc pa, 3 parts are included */
+ soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
+ ADDR_OF_256B_BLOCK(channel_index) |
+ OFFSET_IN_256B_BLOCK(err_addr);
+
+ /* The umc channel bits are not original values, they are hashed */
+ SET_CHANNEL_HASH(channel_index, soc_pa);
+
+ /* clear [C4 C3 C2] in soc physical address */
+ soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT);
+
+ /* loop for all possibilities of [C4 C3 C2] */
+ for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) {
+ retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT);
+ dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page);
+ amdgpu_umc_fill_error_record(err_data, err_addr,
+ retired_page, channel_index, umc_inst);
+
+ /* shift R14 bit */
+ retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT);
+ dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page);
+ amdgpu_umc_fill_error_record(err_data, err_addr,
+ retired_page, channel_index, umc_inst);
+ }
+}
+
+static int umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint64_t mc_umc_status, err_addr;
+ uint32_t eccinfo_table_idx;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+
+ eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
+ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+
+ if (mc_umc_status == 0)
+ return 0;
+
+ if (!err_data->err_addr)
+ return 0;
+
+ /* calculate error address if ue error is detected */
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
+
+ err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+ umc_v6_7_convert_error_address(adev, err_data, err_addr,
+ ch_inst, umc_inst);
+ }
+
+ return 0;
+}
+
+static void umc_v6_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v6_7_ecc_info_query_error_address, ras_error_status);
+}
+
static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev,
uint32_t umc_reg_offset,
- unsigned long *error_count)
+ unsigned long *error_count,
+ uint32_t ch_inst,
+ uint32_t umc_inst)
{
uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
uint32_t ecc_err_cnt, ecc_err_cnt_addr;
@@ -92,8 +302,36 @@ static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev,
MCUMC_STATUS is a 64 bit register */
mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) {
*error_count += 1;
+
+ umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
+
+ {
+ uint64_t err_addr, soc_pa;
+ uint32_t mc_umc_addrt0;
+ uint32_t channel_index;
+
+ mc_umc_addrt0 =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0);
+
+ channel_index =
+ adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+
+ err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+ /* translate umc channel address to soc pa, 3 parts are included */
+ soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
+ ADDR_OF_256B_BLOCK(channel_index) |
+ OFFSET_IN_256B_BLOCK(err_addr);
+
+ /* The umc channel bits are not original values, they are hashed */
+ SET_CHANNEL_HASH(channel_index, soc_pa);
+
+ dev_info(adev->dev, "Error Address(PA): 0x%llx\n", soc_pa);
+ }
+ }
}
static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev,
@@ -113,15 +351,21 @@ static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) {
*error_count += 1;
+
+ umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
+ }
}
-static void umc_v6_7_reset_error_count_per_channel(struct amdgpu_device *adev,
- uint32_t umc_reg_offset)
+static int umc_v6_7_reset_error_count_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
{
uint32_t ecc_err_cnt_addr;
uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
+ uint32_t umc_reg_offset =
+ get_umc_v6_7_reg_offset(adev, umc_inst, ch_inst);
ecc_err_cnt_sel_addr =
SOC15_REG_OFFSET(UMC, 0,
@@ -155,60 +399,54 @@ static void umc_v6_7_reset_error_count_per_channel(struct amdgpu_device *adev,
/* clear higher chip error count */
WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
UMC_V6_7_CE_CNT_INIT);
+
+ return 0;
}
static void umc_v6_7_reset_error_count(struct amdgpu_device *adev)
{
- uint32_t umc_inst = 0;
- uint32_t ch_inst = 0;
- uint32_t umc_reg_offset = 0;
+ amdgpu_umc_loop_channels(adev,
+ umc_v6_7_reset_error_count_per_channel, NULL);
+}
- LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
- umc_reg_offset = get_umc_v6_7_reg_offset(adev,
- umc_inst,
- ch_inst);
+static int umc_v6_7_query_ecc_error_count(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+ uint32_t umc_reg_offset =
+ get_umc_v6_7_reg_offset(adev, umc_inst, ch_inst);
- umc_v6_7_reset_error_count_per_channel(adev,
- umc_reg_offset);
- }
+ umc_v6_7_query_correctable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ce_count),
+ ch_inst, umc_inst);
+
+ umc_v6_7_querry_uncorrectable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ue_count));
+
+ return 0;
}
static void umc_v6_7_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status)
{
- struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
-
- uint32_t umc_inst = 0;
- uint32_t ch_inst = 0;
- uint32_t umc_reg_offset = 0;
-
- /*TODO: driver needs to toggle DF Cstate to ensure
- * safe access of UMC registers. Will add the protection */
- LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
- umc_reg_offset = get_umc_v6_7_reg_offset(adev,
- umc_inst,
- ch_inst);
- umc_v6_7_query_correctable_error_count(adev,
- umc_reg_offset,
- &(err_data->ce_count));
- umc_v6_7_querry_uncorrectable_error_count(adev,
- umc_reg_offset,
- &(err_data->ue_count));
- }
+ amdgpu_umc_loop_channels(adev,
+ umc_v6_7_query_ecc_error_count, ras_error_status);
umc_v6_7_reset_error_count(adev);
}
-static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
- struct ras_err_data *err_data,
- uint32_t umc_reg_offset,
- uint32_t ch_inst,
- uint32_t umc_inst)
+static int umc_v6_7_query_error_address(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
{
uint32_t mc_umc_status_addr;
- uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
- struct eeprom_table_record *err_rec;
- uint32_t channel_index;
+ uint64_t mc_umc_status = 0, mc_umc_addrt0, err_addr;
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+ uint32_t umc_reg_offset =
+ get_umc_v6_7_reg_offset(adev, umc_inst, ch_inst);
mc_umc_status_addr =
SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
@@ -218,74 +456,36 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
if (mc_umc_status == 0)
- return;
+ return 0;
if (!err_data->err_addr) {
/* clear umc status */
WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
- return;
+ return 0;
}
- err_rec = &err_data->err_addr[err_data->err_addr_cnt];
-
- channel_index =
- adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
-
- /* calculate error address if ue/ce error is detected */
+ /* calculate error address if ue error is detected */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
- (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
-
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
- err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+ err_addr =
+ REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
- /* translate umc channel address to soc pa, 3 parts are included */
- retired_page = ADDR_OF_8KB_BLOCK(err_addr) |
- ADDR_OF_256B_BLOCK(channel_index) |
- OFFSET_IN_256B_BLOCK(err_addr);
-
- /* we only save ue error information currently, ce is skipped */
- if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
- == 1) {
- err_rec->address = err_addr;
- /* page frame address is saved */
- err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
- err_rec->ts = (uint64_t)ktime_get_real_seconds();
- err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
- err_rec->cu = 0;
- err_rec->mem_channel = channel_index;
- err_rec->mcumc_id = umc_inst;
-
- err_data->err_addr_cnt++;
- }
+ umc_v6_7_convert_error_address(adev, err_data, err_addr,
+ ch_inst, umc_inst);
}
/* clear umc status */
WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
+
+ return 0;
}
static void umc_v6_7_query_ras_error_address(struct amdgpu_device *adev,
void *ras_error_status)
{
- struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
-
- uint32_t umc_inst = 0;
- uint32_t ch_inst = 0;
- uint32_t umc_reg_offset = 0;
-
- /*TODO: driver needs to toggle DF Cstate to ensure
- * safe access of UMC resgisters. Will add the protection
- * when firmware interface is ready */
- LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
- umc_reg_offset = get_umc_v6_7_reg_offset(adev,
- umc_inst,
- ch_inst);
- umc_v6_7_query_error_address(adev,
- err_data,
- umc_reg_offset,
- ch_inst,
- umc_inst);
- }
+ amdgpu_umc_loop_channels(adev,
+ umc_v6_7_query_error_address, ras_error_status);
}
static uint32_t umc_v6_7_query_ras_poison_mode_per_channel(
@@ -304,27 +504,25 @@ static uint32_t umc_v6_7_query_ras_poison_mode_per_channel(
static bool umc_v6_7_query_ras_poison_mode(struct amdgpu_device *adev)
{
- uint32_t umc_inst = 0;
- uint32_t ch_inst = 0;
uint32_t umc_reg_offset = 0;
- LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
- umc_reg_offset = get_umc_v6_7_reg_offset(adev,
- umc_inst,
- ch_inst);
- /* Enabling fatal error in one channel will be considered
- as fatal error mode */
- if (umc_v6_7_query_ras_poison_mode_per_channel(adev, umc_reg_offset))
- return false;
- }
-
- return true;
+ /* Enabling fatal error in umc instance0 channel0 will be
+ * considered as fatal error mode
+ */
+ umc_reg_offset = get_umc_v6_7_reg_offset(adev, 0, 0);
+ return !umc_v6_7_query_ras_poison_mode_per_channel(adev, umc_reg_offset);
}
-const struct amdgpu_umc_ras_funcs umc_v6_7_ras_funcs = {
- .ras_late_init = amdgpu_umc_ras_late_init,
- .ras_fini = amdgpu_umc_ras_fini,
+const struct amdgpu_ras_block_hw_ops umc_v6_7_ras_hw_ops = {
.query_ras_error_count = umc_v6_7_query_ras_error_count,
.query_ras_error_address = umc_v6_7_query_ras_error_address,
+};
+
+struct amdgpu_umc_ras umc_v6_7_ras = {
+ .ras_block = {
+ .hw_ops = &umc_v6_7_ras_hw_ops,
+ },
.query_ras_poison_mode = umc_v6_7_query_ras_poison_mode,
+ .ecc_info_query_ras_error_count = umc_v6_7_ecc_info_query_ras_error_count,
+ .ecc_info_query_ras_error_address = umc_v6_7_ecc_info_query_ras_error_address,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h
index 57f2557e7aca..105245d5b6e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h
@@ -41,12 +41,37 @@
#define UMC_V6_7_CHANNEL_INSTANCE_NUM 8
/* total channel instances in one umc block */
#define UMC_V6_7_TOTAL_CHANNEL_NUM (UMC_V6_7_CHANNEL_INSTANCE_NUM * UMC_V6_7_UMC_INSTANCE_NUM)
+/* one piece of normalizing address is mapped to 8 pieces of physical address */
+#define UMC_V6_7_NA_MAP_PA_NUM 8
+/* R14 bit shift should be considered, double the number */
+#define UMC_V6_7_BAD_PAGE_NUM_PER_CHANNEL (UMC_V6_7_NA_MAP_PA_NUM * 2)
+/* The CH4 bit in SOC physical address */
+#define UMC_V6_7_PA_CH4_BIT 12
+/* The C2 bit in SOC physical address */
+#define UMC_V6_7_PA_C2_BIT 17
+/* The R14 bit in SOC physical address */
+#define UMC_V6_7_PA_R14_BIT 34
/* UMC regiser per channel offset */
#define UMC_V6_7_PER_CHANNEL_OFFSET 0x400
-extern const struct amdgpu_umc_ras_funcs umc_v6_7_ras_funcs;
+
+/* XOR bit 20, 25, 34 of PA into CH4 bit (bit 12 of PA),
+ * hash bit is only effective when related setting is enabled
+ */
+#define CHANNEL_HASH(channel_idx, pa) (((channel_idx) >> 4) ^ \
+ (((pa) >> 20) & 0x1ULL & adev->df.hash_status.hash_64k) ^ \
+ (((pa) >> 25) & 0x1ULL & adev->df.hash_status.hash_2m) ^ \
+ (((pa) >> 34) & 0x1ULL & adev->df.hash_status.hash_1g))
+#define SET_CHANNEL_HASH(channel_idx, pa) do { \
+ (pa) &= ~(0x1ULL << UMC_V6_7_PA_CH4_BIT); \
+ (pa) |= (CHANNEL_HASH(channel_idx, pa) << UMC_V6_7_PA_CH4_BIT); \
+ } while (0)
+
+extern struct amdgpu_umc_ras umc_v6_7_ras;
extern const uint32_t
umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM];
extern const uint32_t
umc_v6_7_channel_idx_tbl_first[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM];
-
+void umc_v6_7_convert_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t err_addr,
+ uint32_t ch_inst, uint32_t umc_inst);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
new file mode 100644
index 000000000000..a32f87992f20
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
@@ -0,0 +1,458 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "umc_v8_10.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_umc.h"
+#include "amdgpu.h"
+#include "umc/umc_8_10_0_offset.h"
+#include "umc/umc_8_10_0_sh_mask.h"
+
+#define UMC_8_NODE_DIST 0x800000
+#define UMC_8_INST_DIST 0x4000
+
+struct channelnum_map_colbit {
+ uint32_t channel_num;
+ uint32_t col_bit;
+};
+
+const struct channelnum_map_colbit umc_v8_10_channelnum_map_colbit_table[] = {
+ {24, 13},
+ {20, 13},
+ {16, 12},
+ {14, 12},
+ {12, 12},
+ {10, 12},
+ {6, 11},
+};
+
+const uint32_t
+ umc_v8_10_channel_idx_tbl_ext0[]
+ [UMC_V8_10_UMC_INSTANCE_NUM]
+ [UMC_V8_10_CHANNEL_INSTANCE_NUM] = {
+ {{1, 5}, {7, 3}},
+ {{14, 15}, {13, 12}},
+ {{10, 11}, {9, 8}},
+ {{6, 2}, {0, 4}}
+ };
+
+const uint32_t
+ umc_v8_10_channel_idx_tbl[]
+ [UMC_V8_10_UMC_INSTANCE_NUM]
+ [UMC_V8_10_CHANNEL_INSTANCE_NUM] = {
+ {{16, 18}, {17, 19}},
+ {{15, 11}, {3, 7}},
+ {{1, 5}, {13, 9}},
+ {{23, 21}, {22, 20}},
+ {{0, 4}, {12, 8}},
+ {{14, 10}, {2, 6}}
+ };
+
+static inline uint32_t get_umc_v8_10_reg_offset(struct amdgpu_device *adev,
+ uint32_t node_inst,
+ uint32_t umc_inst,
+ uint32_t ch_inst)
+{
+ return adev->umc.channel_offs * ch_inst + UMC_8_INST_DIST * umc_inst +
+ UMC_8_NODE_DIST * node_inst;
+}
+
+static int umc_v8_10_clear_error_count_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint32_t ecc_err_cnt_addr;
+ uint32_t umc_reg_offset =
+ get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
+
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt);
+
+ /* clear error count */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
+ UMC_V8_10_CE_CNT_INIT);
+
+ return 0;
+}
+
+static void umc_v8_10_clear_error_count(struct amdgpu_device *adev)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_10_clear_error_count_per_channel, NULL);
+}
+
+static void umc_v8_10_query_correctable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t mc_umc_status_addr;
+
+ /* UMC 8_10 registers */
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
+
+ /* Rely on MCUMC_STATUS for correctable error counter
+ * MCUMC_STATUS is a 64 bit register
+ */
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
+ *error_count += 1;
+}
+
+static void umc_v8_10_query_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t mc_umc_status_addr;
+
+ mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
+
+ /* Check the MCUMC_STATUS. */
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+ if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
+ *error_count += 1;
+}
+
+static int umc_v8_10_query_ecc_error_count(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+ uint32_t umc_reg_offset =
+ get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
+
+ umc_v8_10_query_correctable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ce_count));
+ umc_v8_10_query_uncorrectable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ue_count));
+
+ return 0;
+}
+
+static void umc_v8_10_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_10_query_ecc_error_count, ras_error_status);
+
+ umc_v8_10_clear_error_count(adev);
+}
+
+static uint32_t umc_v8_10_get_col_bit(uint32_t channel_num)
+{
+ uint32_t t = 0;
+
+ for (t = 0; t < ARRAY_SIZE(umc_v8_10_channelnum_map_colbit_table); t++)
+ if (channel_num == umc_v8_10_channelnum_map_colbit_table[t].channel_num)
+ return umc_v8_10_channelnum_map_colbit_table[t].col_bit;
+
+ /* Failed to get col_bit. */
+ return U32_MAX;
+}
+
+/*
+ * Mapping normal address to soc physical address in swizzle mode.
+ */
+static int umc_v8_10_swizzle_mode_na_to_pa(struct amdgpu_device *adev,
+ uint32_t channel_idx,
+ uint64_t na, uint64_t *soc_pa)
+{
+ uint32_t channel_num = UMC_V8_10_TOTAL_CHANNEL_NUM(adev);
+ uint32_t col_bit = umc_v8_10_get_col_bit(channel_num);
+ uint64_t tmp_addr;
+
+ if (col_bit == U32_MAX)
+ return -1;
+
+ tmp_addr = SWIZZLE_MODE_TMP_ADDR(na, channel_num, channel_idx);
+ *soc_pa = SWIZZLE_MODE_ADDR_HI(tmp_addr, col_bit) |
+ SWIZZLE_MODE_ADDR_MID(na, col_bit) |
+ SWIZZLE_MODE_ADDR_LOW(tmp_addr, col_bit) |
+ SWIZZLE_MODE_ADDR_LSB(na);
+
+ return 0;
+}
+
+static void umc_v8_10_convert_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t err_addr,
+ uint32_t ch_inst, uint32_t umc_inst,
+ uint32_t node_inst, uint64_t mc_umc_status)
+{
+ uint64_t na_err_addr_base;
+ uint64_t na_err_addr, retired_page_addr;
+ uint32_t channel_index, addr_lsb, col = 0;
+ int ret = 0;
+
+ channel_index =
+ adev->umc.channel_idx_tbl[node_inst * adev->umc.umc_inst_num *
+ adev->umc.channel_inst_num +
+ umc_inst * adev->umc.channel_inst_num +
+ ch_inst];
+
+ /* the lowest lsb bits should be ignored */
+ addr_lsb = REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrLsb);
+ err_addr &= ~((0x1ULL << addr_lsb) - 1);
+ na_err_addr_base = err_addr & ~(0x3ULL << UMC_V8_10_NA_C5_BIT);
+
+ /* loop for all possibilities of [C6 C5] in normal address. */
+ for (col = 0; col < UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM; col++) {
+ na_err_addr = na_err_addr_base | (col << UMC_V8_10_NA_C5_BIT);
+
+ /* Mapping normal error address to retired soc physical address. */
+ ret = umc_v8_10_swizzle_mode_na_to_pa(adev, channel_index,
+ na_err_addr, &retired_page_addr);
+ if (ret) {
+ dev_err(adev->dev, "Failed to map pa from umc na.\n");
+ break;
+ }
+ dev_info(adev->dev, "Error Address(PA): 0x%llx\n",
+ retired_page_addr);
+ amdgpu_umc_fill_error_record(err_data, na_err_addr,
+ retired_page_addr, channel_index, umc_inst);
+ }
+}
+
+static int umc_v8_10_query_error_address(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint64_t mc_umc_status_addr;
+ uint64_t mc_umc_status, err_addr;
+ uint64_t mc_umc_addrt0;
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+ uint32_t umc_reg_offset =
+ get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
+
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+
+ if (mc_umc_status == 0)
+ return 0;
+
+ if (!err_data->err_addr) {
+ /* clear umc status */
+ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
+ return 0;
+ }
+
+ /* calculate error address if ue error is detected */
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
+
+ mc_umc_addrt0 = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0);
+ err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+ umc_v8_10_convert_error_address(adev, err_data, err_addr,
+ ch_inst, umc_inst, node_inst, mc_umc_status);
+ }
+
+ /* clear umc status */
+ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
+
+ return 0;
+}
+
+static void umc_v8_10_query_ras_error_address(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_10_query_error_address, ras_error_status);
+}
+
+static int umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
+ uint32_t ecc_err_cnt_addr;
+ uint32_t umc_reg_offset =
+ get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
+
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCntSel);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt);
+
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
+
+ /* set ce error interrupt type to APIC based interrupt */
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_GeccErrCntSel,
+ GeccErrInt, 0x1);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+ /* set error count to initial value */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_10_CE_CNT_INIT);
+
+ return 0;
+}
+
+static void umc_v8_10_err_cnt_init(struct amdgpu_device *adev)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_10_err_cnt_init_per_channel, NULL);
+}
+
+static bool umc_v8_10_query_ras_poison_mode(struct amdgpu_device *adev)
+{
+ /*
+ * Force return true, because UMCCH0_0_GeccCtrl
+ * is not accessible from host side
+ */
+ return true;
+}
+
+static void umc_v8_10_ecc_info_query_correctable_error_count(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst,
+ unsigned long *error_count)
+{
+ uint16_t ecc_ce_cnt;
+ uint32_t eccinfo_table_idx;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ eccinfo_table_idx = node_inst * adev->umc.umc_inst_num *
+ adev->umc.channel_inst_num +
+ umc_inst * adev->umc.channel_inst_num +
+ ch_inst;
+
+ /* Retrieve CE count */
+ ecc_ce_cnt = ras->umc_ecc.ecc[eccinfo_table_idx].ce_count_lo_chip;
+ if (ecc_ce_cnt)
+ *error_count += ecc_ce_cnt;
+}
+
+static void umc_v8_10_ecc_info_query_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t eccinfo_table_idx;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ eccinfo_table_idx = node_inst * adev->umc.umc_inst_num *
+ adev->umc.channel_inst_num +
+ umc_inst * adev->umc.channel_inst_num +
+ ch_inst;
+
+ /* check the MCUMC_STATUS */
+ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+ if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) {
+ *error_count += 1;
+ }
+}
+
+static int umc_v8_10_ecc_info_query_ecc_error_count(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+
+ umc_v8_10_ecc_info_query_correctable_error_count(adev,
+ node_inst, umc_inst, ch_inst,
+ &(err_data->ce_count));
+ umc_v8_10_ecc_info_query_uncorrectable_error_count(adev,
+ node_inst, umc_inst, ch_inst,
+ &(err_data->ue_count));
+ return 0;
+}
+
+static void umc_v8_10_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_10_ecc_info_query_ecc_error_count, ras_error_status);
+}
+
+static int umc_v8_10_ecc_info_query_error_address(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint32_t eccinfo_table_idx;
+ uint64_t mc_umc_status, err_addr;
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ eccinfo_table_idx = node_inst * adev->umc.umc_inst_num *
+ adev->umc.channel_inst_num +
+ umc_inst * adev->umc.channel_inst_num +
+ ch_inst;
+
+ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+
+ if (mc_umc_status == 0)
+ return 0;
+
+ if (!err_data->err_addr)
+ return 0;
+
+ /* calculate error address if ue error is detected */
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1)) {
+
+ err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+ umc_v8_10_convert_error_address(adev, err_data, err_addr,
+ ch_inst, umc_inst, node_inst, mc_umc_status);
+ }
+
+ return 0;
+}
+
+static void umc_v8_10_ecc_info_query_ras_error_address(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_10_ecc_info_query_error_address, ras_error_status);
+}
+
+const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = {
+ .query_ras_error_count = umc_v8_10_query_ras_error_count,
+ .query_ras_error_address = umc_v8_10_query_ras_error_address,
+};
+
+struct amdgpu_umc_ras umc_v8_10_ras = {
+ .ras_block = {
+ .hw_ops = &umc_v8_10_ras_hw_ops,
+ },
+ .err_cnt_init = umc_v8_10_err_cnt_init,
+ .query_ras_poison_mode = umc_v8_10_query_ras_poison_mode,
+ .ecc_info_query_ras_error_count = umc_v8_10_ecc_info_query_ras_error_count,
+ .ecc_info_query_ras_error_address = umc_v8_10_ecc_info_query_ras_error_address,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h
new file mode 100644
index 000000000000..dc12e0af5451
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __UMC_V8_10_H__
+#define __UMC_V8_10_H__
+
+#include "soc15_common.h"
+#include "amdgpu.h"
+
+/* number of umc channel instance with memory map register access */
+#define UMC_V8_10_CHANNEL_INSTANCE_NUM 2
+/* number of umc instance with memory map register access */
+#define UMC_V8_10_UMC_INSTANCE_NUM 2
+
+/* Total channel instances for all available umc nodes */
+#define UMC_V8_10_TOTAL_CHANNEL_NUM(adev) \
+ (UMC_V8_10_CHANNEL_INSTANCE_NUM * UMC_V8_10_UMC_INSTANCE_NUM * \
+ (adev)->gmc.num_umc - hweight32((adev)->gmc.m_half_use) * 2)
+
+/* UMC regiser per channel offset */
+#define UMC_V8_10_PER_CHANNEL_OFFSET 0x400
+
+/* EccErrCnt max value */
+#define UMC_V8_10_CE_CNT_MAX 0xffff
+/* umc ce interrupt threshold */
+#define UUMC_V8_10_CE_INT_THRESHOLD 0xffff
+/* umc ce count initial value */
+#define UMC_V8_10_CE_CNT_INIT (UMC_V8_10_CE_CNT_MAX - UUMC_V8_10_CE_INT_THRESHOLD)
+
+#define UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM 4
+
+/* The C5 bit in NA address */
+#define UMC_V8_10_NA_C5_BIT 14
+
+/* Map to swizzle mode address */
+#define SWIZZLE_MODE_TMP_ADDR(na, ch_num, ch_idx) \
+ ((((na) >> 10) * (ch_num) + (ch_idx)) << 10)
+#define SWIZZLE_MODE_ADDR_HI(addr, col_bit) \
+ (((addr) >> ((col_bit) + 2)) << ((col_bit) + 2))
+#define SWIZZLE_MODE_ADDR_MID(na, col_bit) ((((na) >> 8) & 0x3) << (col_bit))
+#define SWIZZLE_MODE_ADDR_LOW(addr, col_bit) \
+ ((((addr) >> 10) & ((0x1ULL << (col_bit - 8)) - 1)) << 8)
+#define SWIZZLE_MODE_ADDR_LSB(na) ((na) & 0xFF)
+
+extern struct amdgpu_umc_ras umc_v8_10_ras;
+extern const uint32_t
+ umc_v8_10_channel_idx_tbl[]
+ [UMC_V8_10_UMC_INSTANCE_NUM]
+ [UMC_V8_10_CHANNEL_INSTANCE_NUM];
+
+extern const uint32_t
+ umc_v8_10_channel_idx_tbl_ext0[]
+ [UMC_V8_10_UMC_INSTANCE_NUM]
+ [UMC_V8_10_CHANNEL_INSTANCE_NUM];
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_14.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.c
new file mode 100644
index 000000000000..eaca10a3c4a9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "umc_v8_14.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_umc.h"
+#include "amdgpu.h"
+#include "umc/umc_8_14_0_offset.h"
+#include "umc/umc_8_14_0_sh_mask.h"
+
+static inline uint32_t get_umc_v8_14_reg_offset(struct amdgpu_device *adev,
+ uint32_t umc_inst,
+ uint32_t ch_inst)
+{
+ return adev->umc.channel_offs * ch_inst + UMC_V8_14_INST_DIST * umc_inst;
+}
+
+static int umc_v8_14_clear_error_count_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint32_t ecc_err_cnt_addr;
+ uint32_t umc_reg_offset =
+ get_umc_v8_14_reg_offset(adev, umc_inst, ch_inst);
+
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt);
+
+ /* clear error count */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
+ UMC_V8_14_CE_CNT_INIT);
+
+ return 0;
+}
+
+static void umc_v8_14_clear_error_count(struct amdgpu_device *adev)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_14_clear_error_count_per_channel, NULL);
+}
+
+static void umc_v8_14_query_correctable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint32_t ecc_err_cnt, ecc_err_cnt_addr;
+
+ /* UMC 8_14 registers */
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt);
+
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
+ *error_count +=
+ (REG_GET_FIELD(ecc_err_cnt, UMCCH0_GeccErrCnt, GeccErrCnt) -
+ UMC_V8_14_CE_CNT_INIT);
+}
+
+static void umc_v8_14_query_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint32_t ecc_err_cnt, ecc_err_cnt_addr;
+ /* UMC 8_14 registers */
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt);
+
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
+ *error_count +=
+ (REG_GET_FIELD(ecc_err_cnt, UMCCH0_GeccErrCnt, GeccUnCorrErrCnt) -
+ UMC_V8_14_CE_CNT_INIT);
+}
+
+static int umc_v8_14_query_error_count_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+ uint32_t umc_reg_offset =
+ get_umc_v8_14_reg_offset(adev, umc_inst, ch_inst);
+
+ umc_v8_14_query_correctable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ce_count));
+ umc_v8_14_query_uncorrectable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ue_count));
+
+ return 0;
+}
+
+static void umc_v8_14_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_14_query_error_count_per_channel, ras_error_status);
+
+ umc_v8_14_clear_error_count(adev);
+}
+
+static int umc_v8_14_err_cnt_init_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
+ uint32_t ecc_err_cnt_addr;
+ uint32_t umc_reg_offset =
+ get_umc_v8_14_reg_offset(adev, umc_inst, ch_inst);
+
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCntSel);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt);
+
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
+
+ /* set ce error interrupt type to APIC based interrupt */
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_GeccErrCntSel,
+ GeccErrInt, 0x1);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+ /* set error count to initial value */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_14_CE_CNT_INIT);
+
+ return 0;
+}
+
+static void umc_v8_14_err_cnt_init(struct amdgpu_device *adev)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_14_err_cnt_init_per_channel, NULL);
+}
+
+const struct amdgpu_ras_block_hw_ops umc_v8_14_ras_hw_ops = {
+ .query_ras_error_count = umc_v8_14_query_ras_error_count,
+};
+
+struct amdgpu_umc_ras umc_v8_14_ras = {
+ .ras_block = {
+ .hw_ops = &umc_v8_14_ras_hw_ops,
+ },
+ .err_cnt_init = umc_v8_14_err_cnt_init,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_14.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.h
new file mode 100644
index 000000000000..20a258f0017a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __UMC_V8_14_H__
+#define __UMC_V8_14_H__
+
+#include "soc15_common.h"
+#include "amdgpu.h"
+
+/* number of umc channel instance with memory map register access */
+#define UMC_V8_14_CHANNEL_INSTANCE_NUM 2
+/* number of umc instance with memory map register access */
+#define UMC_V8_14_UMC_INSTANCE_NUM(adev) ((adev)->umc.node_inst_num)
+
+/* Total channel instances for all available umc nodes */
+#define UMC_V8_14_TOTAL_CHANNEL_NUM(adev) \
+ (UMC_V8_14_CHANNEL_INSTANCE_NUM * (adev)->gmc.num_umc)
+
+/* UMC register per channel offset */
+#define UMC_V8_14_PER_CHANNEL_OFFSET 0x400
+
+#define UMC_V8_14_INST_DIST 0x40000
+
+/* EccErrCnt max value */
+#define UMC_V8_14_CE_CNT_MAX 0xffff
+/* umc ce interrupt threshold */
+#define UMC_V8_14_CE_INT_THRESHOLD 0xffff
+/* umc ce count initial value */
+#define UMC_V8_14_CE_CNT_INIT (UMC_V8_14_CE_CNT_MAX - UMC_V8_14_CE_INT_THRESHOLD)
+
+extern struct amdgpu_umc_ras umc_v8_14_ras;
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
index af59a35788e3..b717fdaa46e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
@@ -40,13 +40,143 @@ const uint32_t
{9, 0}, {15, 6}
};
-static inline uint32_t get_umc_8_reg_offset(struct amdgpu_device *adev,
+static inline uint32_t get_umc_v8_7_reg_offset(struct amdgpu_device *adev,
uint32_t umc_inst,
uint32_t ch_inst)
{
return adev->umc.channel_offs*ch_inst + UMC_8_INST_DIST*umc_inst;
}
+static void umc_v8_7_ecc_info_query_correctable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_inst, uint32_t ch_inst,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t eccinfo_table_idx;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
+
+ /* check for SRAM correctable error
+ * MCUMC_STATUS is a 64 bit register
+ */
+ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
+ *error_count += 1;
+}
+
+static void umc_v8_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_inst, uint32_t ch_inst,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t eccinfo_table_idx;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
+
+ /* check the MCUMC_STATUS */
+ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+ if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
+ *error_count += 1;
+}
+
+static void umc_v8_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+
+ /* TODO: driver needs to toggle DF Cstate to ensure
+ * safe access of UMC registers. Will add the protection
+ */
+ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+ umc_v8_7_ecc_info_query_correctable_error_count(adev,
+ umc_inst, ch_inst,
+ &(err_data->ce_count));
+ umc_v8_7_ecc_info_querry_uncorrectable_error_count(adev,
+ umc_inst, ch_inst,
+ &(err_data->ue_count));
+ }
+}
+
+static void umc_v8_7_convert_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t err_addr,
+ uint32_t ch_inst, uint32_t umc_inst)
+{
+ uint64_t retired_page;
+ uint32_t channel_index;
+
+ channel_index =
+ adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+
+ /* translate umc channel address to soc pa, 3 parts are included */
+ retired_page = ADDR_OF_4KB_BLOCK(err_addr) |
+ ADDR_OF_256B_BLOCK(channel_index) |
+ OFFSET_IN_256B_BLOCK(err_addr);
+
+ amdgpu_umc_fill_error_record(err_data, err_addr,
+ retired_page, channel_index, umc_inst);
+}
+
+static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data,
+ uint32_t ch_inst,
+ uint32_t umc_inst)
+{
+ uint64_t mc_umc_status, err_addr;
+ uint32_t eccinfo_table_idx;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
+ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+
+ if (mc_umc_status == 0)
+ return;
+
+ if (!err_data->err_addr)
+ return;
+
+ /* calculate error address if ue error is detected */
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
+
+ err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+ umc_v8_7_convert_error_address(adev, err_data, err_addr,
+ ch_inst, umc_inst);
+ }
+}
+
+static void umc_v8_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+
+ /* TODO: driver needs to toggle DF Cstate to ensure
+ * safe access of UMC resgisters. Will add the protection
+ * when firmware interface is ready
+ */
+ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+ umc_v8_7_ecc_info_query_error_address(adev,
+ err_data,
+ ch_inst,
+ umc_inst);
+ }
+}
+
static void umc_v8_7_clear_error_count_per_channel(struct amdgpu_device *adev,
uint32_t umc_reg_offset)
{
@@ -92,7 +222,7 @@ static void umc_v8_7_clear_error_count(struct amdgpu_device *adev)
uint32_t umc_reg_offset = 0;
LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
- umc_reg_offset = get_umc_8_reg_offset(adev,
+ umc_reg_offset = get_umc_v8_7_reg_offset(adev,
umc_inst,
ch_inst);
@@ -178,7 +308,7 @@ static void umc_v8_7_query_ras_error_count(struct amdgpu_device *adev,
uint32_t umc_reg_offset = 0;
LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
- umc_reg_offset = get_umc_8_reg_offset(adev,
+ umc_reg_offset = get_umc_v8_7_reg_offset(adev,
umc_inst,
ch_inst);
@@ -200,15 +330,12 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
uint32_t umc_inst)
{
uint32_t lsb, mc_umc_status_addr;
- uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
- struct eeprom_table_record *err_rec;
- uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+ uint64_t mc_umc_status, err_addr, mc_umc_addrt0;
mc_umc_status_addr =
SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
mc_umc_addrt0 =
SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0);
-
mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
if (mc_umc_status == 0)
@@ -220,12 +347,9 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
return;
}
- err_rec = &err_data->err_addr[err_data->err_addr_cnt];
-
- /* calculate error address if ue/ce error is detected */
+ /* calculate error address if ue error is detected */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
- (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
/* the lowest lsb bits should be ignored */
@@ -233,25 +357,8 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
err_addr &= ~((0x1ULL << lsb) - 1);
- /* translate umc channel address to soc pa, 3 parts are included */
- retired_page = ADDR_OF_4KB_BLOCK(err_addr) |
- ADDR_OF_256B_BLOCK(channel_index) |
- OFFSET_IN_256B_BLOCK(err_addr);
-
- /* we only save ue error information currently, ce is skipped */
- if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
- == 1) {
- err_rec->address = err_addr;
- /* page frame address is saved */
- err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
- err_rec->ts = (uint64_t)ktime_get_real_seconds();
- err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
- err_rec->cu = 0;
- err_rec->mem_channel = channel_index;
- err_rec->mcumc_id = umc_inst;
-
- err_data->err_addr_cnt++;
- }
+ umc_v8_7_convert_error_address(adev, err_data, err_addr,
+ ch_inst, umc_inst);
}
/* clear umc status */
@@ -268,7 +375,7 @@ static void umc_v8_7_query_ras_error_address(struct amdgpu_device *adev,
uint32_t umc_reg_offset = 0;
LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
- umc_reg_offset = get_umc_8_reg_offset(adev,
+ umc_reg_offset = get_umc_v8_7_reg_offset(adev,
umc_inst,
ch_inst);
@@ -316,7 +423,7 @@ static void umc_v8_7_err_cnt_init(struct amdgpu_device *adev)
uint32_t umc_reg_offset = 0;
LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
- umc_reg_offset = get_umc_8_reg_offset(adev,
+ umc_reg_offset = get_umc_v8_7_reg_offset(adev,
umc_inst,
ch_inst);
@@ -324,10 +431,16 @@ static void umc_v8_7_err_cnt_init(struct amdgpu_device *adev)
}
}
-const struct amdgpu_umc_ras_funcs umc_v8_7_ras_funcs = {
- .err_cnt_init = umc_v8_7_err_cnt_init,
- .ras_late_init = amdgpu_umc_ras_late_init,
- .ras_fini = amdgpu_umc_ras_fini,
+const struct amdgpu_ras_block_hw_ops umc_v8_7_ras_hw_ops = {
.query_ras_error_count = umc_v8_7_query_ras_error_count,
.query_ras_error_address = umc_v8_7_query_ras_error_address,
};
+
+struct amdgpu_umc_ras umc_v8_7_ras = {
+ .ras_block = {
+ .hw_ops = &umc_v8_7_ras_hw_ops,
+ },
+ .err_cnt_init = umc_v8_7_err_cnt_init,
+ .ecc_info_query_ras_error_count = umc_v8_7_ecc_info_query_ras_error_count,
+ .ecc_info_query_ras_error_address = umc_v8_7_ecc_info_query_ras_error_address,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.h
index 37e6dc7c28e0..dd4993f5f78f 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.h
@@ -44,7 +44,7 @@
/* umc ce count initial value */
#define UMC_V8_7_CE_CNT_INIT (UMC_V8_7_CE_CNT_MAX - UMC_V8_7_CE_INT_THRESHOLD)
-extern const struct amdgpu_umc_ras_funcs umc_v8_7_ras_funcs;
+extern struct amdgpu_umc_ras umc_v8_7_ras;
extern const uint32_t
umc_v8_7_channel_idx_tbl[UMC_V8_7_UMC_INSTANCE_NUM][UMC_V8_7_CHANNEL_INSTANCE_NUM];
diff --git a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c
new file mode 100644
index 000000000000..ce3bb12e3572
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c
@@ -0,0 +1,434 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include "amdgpu.h"
+#include "soc15_common.h"
+#include "soc21.h"
+#include "vcn/vcn_4_0_0_offset.h"
+#include "vcn/vcn_4_0_0_sh_mask.h"
+
+#include "amdgpu_umsch_mm.h"
+#include "umsch_mm_4_0_api_def.h"
+#include "umsch_mm_v4_0.h"
+
+#define regUVD_IPX_DLDO_CONFIG 0x0064
+#define regUVD_IPX_DLDO_CONFIG_BASE_IDX 1
+#define regUVD_IPX_DLDO_STATUS 0x0065
+#define regUVD_IPX_DLDO_STATUS_BASE_IDX 1
+
+#define UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT 0x00000002
+#define UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG_MASK 0x0000000cUL
+#define UVD_IPX_DLDO_STATUS__ONO0_PWR_STATUS__SHIFT 0x00000001
+#define UVD_IPX_DLDO_STATUS__ONO0_PWR_STATUS_MASK 0x00000002UL
+
+static int umsch_mm_v4_0_load_microcode(struct amdgpu_umsch_mm *umsch)
+{
+ struct amdgpu_device *adev = umsch->ring.adev;
+ uint64_t data;
+ int r;
+
+ r = amdgpu_umsch_mm_allocate_ucode_buffer(umsch);
+ if (r)
+ return r;
+
+ r = amdgpu_umsch_mm_allocate_ucode_data_buffer(umsch);
+ if (r)
+ goto err_free_ucode_bo;
+
+ umsch->cmd_buf_curr_ptr = umsch->cmd_buf_ptr;
+
+ if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) {
+ WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG,
+ 1 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS,
+ 0 << UVD_IPX_DLDO_STATUS__ONO0_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO0_PWR_STATUS_MASK);
+ }
+
+ data = RREG32_SOC15(VCN, 0, regUMSCH_MES_RESET_CTRL);
+ data = REG_SET_FIELD(data, UMSCH_MES_RESET_CTRL, MES_CORE_SOFT_RESET, 0);
+ WREG32_SOC15_UMSCH(regUMSCH_MES_RESET_CTRL, data);
+
+ data = RREG32_SOC15(VCN, 0, regVCN_MES_CNTL);
+ data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_INVALIDATE_ICACHE, 1);
+ data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_PIPE0_RESET, 1);
+ data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_PIPE0_ACTIVE, 0);
+ data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_HALT, 1);
+ WREG32_SOC15_UMSCH(regVCN_MES_CNTL, data);
+
+ data = RREG32_SOC15(VCN, 0, regVCN_MES_IC_BASE_CNTL);
+ data = REG_SET_FIELD(data, VCN_MES_IC_BASE_CNTL, VMID, 0);
+ data = REG_SET_FIELD(data, VCN_MES_IC_BASE_CNTL, EXE_DISABLE, 0);
+ data = REG_SET_FIELD(data, VCN_MES_IC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15_UMSCH(regVCN_MES_IC_BASE_CNTL, data);
+
+ WREG32_SOC15_UMSCH(regVCN_MES_INTR_ROUTINE_START,
+ lower_32_bits(adev->umsch_mm.irq_start_addr >> 2));
+ WREG32_SOC15_UMSCH(regVCN_MES_INTR_ROUTINE_START_HI,
+ upper_32_bits(adev->umsch_mm.irq_start_addr >> 2));
+
+ WREG32_SOC15_UMSCH(regVCN_MES_PRGRM_CNTR_START,
+ lower_32_bits(adev->umsch_mm.uc_start_addr >> 2));
+ WREG32_SOC15_UMSCH(regVCN_MES_PRGRM_CNTR_START_HI,
+ upper_32_bits(adev->umsch_mm.uc_start_addr >> 2));
+
+ WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_INSTR_BASE_LO, 0);
+ WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_INSTR_BASE_HI, 0);
+
+ data = adev->umsch_mm.uc_start_addr + adev->umsch_mm.ucode_size - 1;
+ WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_INSTR_MASK_LO, lower_32_bits(data));
+ WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_INSTR_MASK_HI, upper_32_bits(data));
+
+ data = adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ?
+ 0 : adev->umsch_mm.ucode_fw_gpu_addr;
+ WREG32_SOC15_UMSCH(regVCN_MES_IC_BASE_LO, lower_32_bits(data));
+ WREG32_SOC15_UMSCH(regVCN_MES_IC_BASE_HI, upper_32_bits(data));
+
+ WREG32_SOC15_UMSCH(regVCN_MES_MIBOUND_LO, 0x1FFFFF);
+
+ WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_BASE0_LO,
+ lower_32_bits(adev->umsch_mm.data_start_addr));
+ WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_BASE0_HI,
+ upper_32_bits(adev->umsch_mm.data_start_addr));
+
+ WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_MASK0_LO,
+ adev->umsch_mm.data_size - 1);
+ WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_MASK0_HI, 0);
+
+ data = adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ?
+ 0 : adev->umsch_mm.data_fw_gpu_addr;
+ WREG32_SOC15_UMSCH(regVCN_MES_DC_BASE_LO, lower_32_bits(data));
+ WREG32_SOC15_UMSCH(regVCN_MES_DC_BASE_HI, upper_32_bits(data));
+
+ WREG32_SOC15_UMSCH(regVCN_MES_MDBOUND_LO, 0x3FFFF);
+
+ data = RREG32_SOC15(VCN, 0, regUVD_UMSCH_FORCE);
+ data = REG_SET_FIELD(data, UVD_UMSCH_FORCE, IC_FORCE_GPUVM, 1);
+ data = REG_SET_FIELD(data, UVD_UMSCH_FORCE, DC_FORCE_GPUVM, 1);
+ WREG32_SOC15_UMSCH(regUVD_UMSCH_FORCE, data);
+
+ data = RREG32_SOC15(VCN, 0, regVCN_MES_IC_OP_CNTL);
+ data = REG_SET_FIELD(data, VCN_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
+ data = REG_SET_FIELD(data, VCN_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15_UMSCH(regVCN_MES_IC_OP_CNTL, data);
+
+ data = RREG32_SOC15(VCN, 0, regVCN_MES_IC_OP_CNTL);
+ data = REG_SET_FIELD(data, VCN_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15_UMSCH(regVCN_MES_IC_OP_CNTL, data);
+
+ WREG32_SOC15_UMSCH(regVCN_MES_GP0_LO, 0);
+ WREG32_SOC15_UMSCH(regVCN_MES_GP0_HI, 0);
+
+#if defined(CONFIG_DEBUG_FS)
+ WREG32_SOC15_UMSCH(regVCN_MES_GP0_LO, lower_32_bits(umsch->log_gpu_addr));
+ WREG32_SOC15_UMSCH(regVCN_MES_GP0_HI, upper_32_bits(umsch->log_gpu_addr));
+#endif
+
+ WREG32_SOC15_UMSCH(regVCN_MES_GP1_LO, 0);
+ WREG32_SOC15_UMSCH(regVCN_MES_GP1_HI, 0);
+
+ data = RREG32_SOC15(VCN, 0, regVCN_MES_CNTL);
+ data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_INVALIDATE_ICACHE, 0);
+ data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_PIPE0_RESET, 0);
+ data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_HALT, 0);
+ data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_PIPE0_ACTIVE, 1);
+ WREG32_SOC15_UMSCH(regVCN_MES_CNTL, data);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
+ amdgpu_umsch_mm_psp_execute_cmd_buf(umsch);
+
+ r = SOC15_WAIT_ON_RREG(VCN, 0, regVCN_MES_MSTATUS_LO, 0xAAAAAAAA, 0xFFFFFFFF);
+ if (r) {
+ dev_err(adev->dev, "UMSCH FW Load: Failed, regVCN_MES_MSTATUS_LO: 0x%08x\n",
+ RREG32_SOC15(VCN, 0, regVCN_MES_MSTATUS_LO));
+ goto err_free_data_bo;
+ }
+
+ return 0;
+
+err_free_data_bo:
+ amdgpu_bo_free_kernel(&adev->umsch_mm.data_fw_obj,
+ &adev->umsch_mm.data_fw_gpu_addr,
+ (void **)&adev->umsch_mm.data_fw_ptr);
+err_free_ucode_bo:
+ amdgpu_bo_free_kernel(&adev->umsch_mm.ucode_fw_obj,
+ &adev->umsch_mm.ucode_fw_gpu_addr,
+ (void **)&adev->umsch_mm.ucode_fw_ptr);
+ return r;
+}
+
+static void umsch_mm_v4_0_aggregated_doorbell_init(struct amdgpu_umsch_mm *umsch)
+{
+ struct amdgpu_device *adev = umsch->ring.adev;
+ uint32_t data;
+
+ data = RREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL0);
+ data = REG_SET_FIELD(data, VCN_AGDB_CTRL0, OFFSET,
+ umsch->agdb_index[CONTEXT_PRIORITY_LEVEL_REALTIME]);
+ data = REG_SET_FIELD(data, VCN_AGDB_CTRL0, EN, 1);
+ WREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL0, data);
+
+ data = RREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL1);
+ data = REG_SET_FIELD(data, VCN_AGDB_CTRL1, OFFSET,
+ umsch->agdb_index[CONTEXT_PRIORITY_LEVEL_FOCUS]);
+ data = REG_SET_FIELD(data, VCN_AGDB_CTRL1, EN, 1);
+ WREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL1, data);
+
+ data = RREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL2);
+ data = REG_SET_FIELD(data, VCN_AGDB_CTRL2, OFFSET,
+ umsch->agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL]);
+ data = REG_SET_FIELD(data, VCN_AGDB_CTRL2, EN, 1);
+ WREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL2, data);
+
+ data = RREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL3);
+ data = REG_SET_FIELD(data, VCN_AGDB_CTRL3, OFFSET,
+ umsch->agdb_index[CONTEXT_PRIORITY_LEVEL_IDLE]);
+ data = REG_SET_FIELD(data, VCN_AGDB_CTRL3, EN, 1);
+ WREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL3, data);
+}
+
+static int umsch_mm_v4_0_ring_start(struct amdgpu_umsch_mm *umsch)
+{
+ struct amdgpu_ring *ring = &umsch->ring;
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t data;
+
+ data = RREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_DB_CTRL);
+ data = REG_SET_FIELD(data, VCN_UMSCH_RB_DB_CTRL, OFFSET, ring->doorbell_index);
+ data = REG_SET_FIELD(data, VCN_UMSCH_RB_DB_CTRL, EN, 1);
+ WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_DB_CTRL, data);
+
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
+
+ WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_BASE_LO, lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+
+ WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_SIZE, ring->ring_size);
+
+ ring->wptr = 0;
+
+ data = RREG32_SOC15(VCN, 0, regVCN_RB_ENABLE);
+ data &= ~(VCN_RB_ENABLE__AUDIO_RB_EN_MASK);
+ WREG32_SOC15(VCN, 0, regVCN_RB_ENABLE, data);
+
+ umsch_mm_v4_0_aggregated_doorbell_init(umsch);
+
+ return 0;
+}
+
+static int umsch_mm_v4_0_ring_stop(struct amdgpu_umsch_mm *umsch)
+{
+ struct amdgpu_ring *ring = &umsch->ring;
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t data;
+
+ data = RREG32_SOC15(VCN, 0, regVCN_RB_ENABLE);
+ data = REG_SET_FIELD(data, VCN_RB_ENABLE, UMSCH_RB_EN, 0);
+ WREG32_SOC15(VCN, 0, regVCN_RB_ENABLE, data);
+
+ data = RREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_DB_CTRL);
+ data = REG_SET_FIELD(data, VCN_UMSCH_RB_DB_CTRL, EN, 0);
+ WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_DB_CTRL, data);
+
+ if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) {
+ WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG,
+ 2 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO0_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO0_PWR_STATUS_MASK);
+ }
+
+ return 0;
+}
+
+static int umsch_mm_v4_0_set_hw_resources(struct amdgpu_umsch_mm *umsch)
+{
+ union UMSCHAPI__SET_HW_RESOURCES set_hw_resources = {};
+ struct amdgpu_device *adev = umsch->ring.adev;
+ int r;
+
+ set_hw_resources.header.type = UMSCH_API_TYPE_SCHEDULER;
+ set_hw_resources.header.opcode = UMSCH_API_SET_HW_RSRC;
+ set_hw_resources.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ set_hw_resources.vmid_mask_mm_vcn = umsch->vmid_mask_mm_vcn;
+ set_hw_resources.vmid_mask_mm_vpe = umsch->vmid_mask_mm_vpe;
+ set_hw_resources.collaboration_mask_vpe =
+ adev->vpe.collaborate_mode ? 0x3 : 0x0;
+ set_hw_resources.engine_mask = umsch->engine_mask;
+
+ set_hw_resources.vcn0_hqd_mask[0] = umsch->vcn0_hqd_mask;
+ set_hw_resources.vcn1_hqd_mask[0] = umsch->vcn1_hqd_mask;
+ set_hw_resources.vcn_hqd_mask[0] = umsch->vcn_hqd_mask[0];
+ set_hw_resources.vcn_hqd_mask[1] = umsch->vcn_hqd_mask[1];
+ set_hw_resources.vpe_hqd_mask[0] = umsch->vpe_hqd_mask;
+
+ set_hw_resources.g_sch_ctx_gpu_mc_ptr = umsch->sch_ctx_gpu_addr;
+
+ set_hw_resources.enable_level_process_quantum_check = 1;
+
+ memcpy(set_hw_resources.mmhub_base, adev->reg_offset[MMHUB_HWIP][0],
+ sizeof(uint32_t) * 5);
+ set_hw_resources.mmhub_version =
+ IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, MMHUB_HWIP, 0));
+
+ memcpy(set_hw_resources.osssys_base, adev->reg_offset[OSSSYS_HWIP][0],
+ sizeof(uint32_t) * 5);
+ set_hw_resources.osssys_version =
+ IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, OSSSYS_HWIP, 0));
+
+ set_hw_resources.vcn_version =
+ IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, VCN_HWIP, 0));
+ set_hw_resources.vpe_version =
+ IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, VPE_HWIP, 0));
+
+ set_hw_resources.api_status.api_completion_fence_addr = umsch->ring.fence_drv.gpu_addr;
+ set_hw_resources.api_status.api_completion_fence_value = ++umsch->ring.fence_drv.sync_seq;
+
+ r = amdgpu_umsch_mm_submit_pkt(umsch, &set_hw_resources.max_dwords_in_api,
+ API_FRAME_SIZE_IN_DWORDS);
+ if (r)
+ return r;
+
+ r = amdgpu_umsch_mm_query_fence(umsch);
+ if (r) {
+ dev_err(adev->dev, "UMSCH SET_HW_RESOURCES: Failed\n");
+ return r;
+ }
+
+ return 0;
+}
+
+static int umsch_mm_v4_0_add_queue(struct amdgpu_umsch_mm *umsch,
+ struct umsch_mm_add_queue_input *input_ptr)
+{
+ struct amdgpu_device *adev = umsch->ring.adev;
+ union UMSCHAPI__ADD_QUEUE add_queue = {};
+ int r;
+
+ add_queue.header.type = UMSCH_API_TYPE_SCHEDULER;
+ add_queue.header.opcode = UMSCH_API_ADD_QUEUE;
+ add_queue.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ add_queue.process_id = input_ptr->process_id;
+ add_queue.page_table_base_addr = input_ptr->page_table_base_addr;
+ add_queue.process_va_start = input_ptr->process_va_start;
+ add_queue.process_va_end = input_ptr->process_va_end;
+ add_queue.process_quantum = input_ptr->process_quantum;
+ add_queue.process_csa_addr = input_ptr->process_csa_addr;
+ add_queue.context_quantum = input_ptr->context_quantum;
+ add_queue.context_csa_addr = input_ptr->context_csa_addr;
+ add_queue.inprocess_context_priority = input_ptr->inprocess_context_priority;
+ add_queue.context_global_priority_level =
+ (enum UMSCH_AMD_PRIORITY_LEVEL)input_ptr->context_global_priority_level;
+ add_queue.doorbell_offset_0 = input_ptr->doorbell_offset_0;
+ add_queue.doorbell_offset_1 = input_ptr->doorbell_offset_1;
+ add_queue.affinity.u32All = input_ptr->affinity;
+ add_queue.mqd_addr = input_ptr->mqd_addr;
+ add_queue.engine_type = (enum UMSCH_ENGINE_TYPE)input_ptr->engine_type;
+ add_queue.h_context = input_ptr->h_context;
+ add_queue.h_queue = input_ptr->h_queue;
+ add_queue.vm_context_cntl = input_ptr->vm_context_cntl;
+ add_queue.is_context_suspended = input_ptr->is_context_suspended;
+ add_queue.collaboration_mode = adev->vpe.collaborate_mode ? 1 : 0;
+
+ add_queue.api_status.api_completion_fence_addr = umsch->ring.fence_drv.gpu_addr;
+ add_queue.api_status.api_completion_fence_value = ++umsch->ring.fence_drv.sync_seq;
+
+ r = amdgpu_umsch_mm_submit_pkt(umsch, &add_queue.max_dwords_in_api,
+ API_FRAME_SIZE_IN_DWORDS);
+ if (r)
+ return r;
+
+ r = amdgpu_umsch_mm_query_fence(umsch);
+ if (r) {
+ dev_err(adev->dev, "UMSCH ADD_QUEUE: Failed\n");
+ return r;
+ }
+
+ return 0;
+}
+
+static int umsch_mm_v4_0_remove_queue(struct amdgpu_umsch_mm *umsch,
+ struct umsch_mm_remove_queue_input *input_ptr)
+{
+ union UMSCHAPI__REMOVE_QUEUE remove_queue = {};
+ struct amdgpu_device *adev = umsch->ring.adev;
+ int r;
+
+ remove_queue.header.type = UMSCH_API_TYPE_SCHEDULER;
+ remove_queue.header.opcode = UMSCH_API_REMOVE_QUEUE;
+ remove_queue.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ remove_queue.doorbell_offset_0 = input_ptr->doorbell_offset_0;
+ remove_queue.doorbell_offset_1 = input_ptr->doorbell_offset_1;
+ remove_queue.context_csa_addr = input_ptr->context_csa_addr;
+
+ remove_queue.api_status.api_completion_fence_addr = umsch->ring.fence_drv.gpu_addr;
+ remove_queue.api_status.api_completion_fence_value = ++umsch->ring.fence_drv.sync_seq;
+
+ r = amdgpu_umsch_mm_submit_pkt(umsch, &remove_queue.max_dwords_in_api,
+ API_FRAME_SIZE_IN_DWORDS);
+ if (r)
+ return r;
+
+ r = amdgpu_umsch_mm_query_fence(umsch);
+ if (r) {
+ dev_err(adev->dev, "UMSCH REMOVE_QUEUE: Failed\n");
+ return r;
+ }
+
+ return 0;
+}
+
+static int umsch_mm_v4_0_set_regs(struct amdgpu_umsch_mm *umsch)
+{
+ struct amdgpu_device *adev = container_of(umsch, struct amdgpu_device, umsch_mm);
+
+ umsch->rb_wptr = SOC15_REG_OFFSET(VCN, 0, regVCN_UMSCH_RB_WPTR);
+ umsch->rb_rptr = SOC15_REG_OFFSET(VCN, 0, regVCN_UMSCH_RB_RPTR);
+
+ return 0;
+}
+
+static const struct umsch_mm_funcs umsch_mm_v4_0_funcs = {
+ .set_hw_resources = umsch_mm_v4_0_set_hw_resources,
+ .add_queue = umsch_mm_v4_0_add_queue,
+ .remove_queue = umsch_mm_v4_0_remove_queue,
+ .set_regs = umsch_mm_v4_0_set_regs,
+ .init_microcode = amdgpu_umsch_mm_init_microcode,
+ .load_microcode = umsch_mm_v4_0_load_microcode,
+ .ring_init = amdgpu_umsch_mm_ring_init,
+ .ring_start = umsch_mm_v4_0_ring_start,
+ .ring_stop = umsch_mm_v4_0_ring_stop,
+};
+
+void umsch_mm_v4_0_set_funcs(struct amdgpu_umsch_mm *umsch)
+{
+ umsch->funcs = &umsch_mm_v4_0_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.h b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.h
new file mode 100644
index 000000000000..06bc0fa74996
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __UMSCH_MM_V4_0_H__
+#define __UMSCH_MM_V4_0_H__
+
+void umsch_mm_v4_0_set_funcs(struct amdgpu_umsch_mm *umsch);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
index 0fef925b6602..2e79a3afc774 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
@@ -98,7 +98,7 @@ static void uvd_v3_1_ring_emit_ib(struct amdgpu_ring *ring,
}
/**
- * uvd_v3_1_ring_emit_fence - emit an fence & trap command
+ * uvd_v3_1_ring_emit_fence - emit a fence & trap command
*
* @ring: amdgpu_ring pointer
* @addr: address
@@ -242,7 +242,7 @@ static void uvd_v3_1_mc_resume(struct amdgpu_device *adev)
uint64_t addr;
uint32_t size;
- /* programm the VCPU memory controller bits 0-27 */
+ /* program the VCPU memory controller bits 0-27 */
addr = (adev->uvd.inst->gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3;
size = AMDGPU_UVD_FIRMWARE_SIZE(adev) >> 3;
WREG32(mmUVD_VCPU_CACHE_OFFSET0, addr);
@@ -416,7 +416,7 @@ static int uvd_v3_1_start(struct amdgpu_device *adev)
/* Set the write pointer delay */
WREG32(mmUVD_RBC_RB_WPTR_CNTL, 0);
- /* programm the 4GB memory segment for rptr and ring buffer */
+ /* Program the 4GB memory segment for rptr and ring buffer */
WREG32(mmUVD_LMI_EXT40_ADDR, upper_32_bits(ring->gpu_addr) |
(0x7 << 16) | (0x1 << 31));
@@ -531,9 +531,9 @@ static void uvd_v3_1_set_irq_funcs(struct amdgpu_device *adev)
}
-static int uvd_v3_1_early_init(void *handle)
+static int uvd_v3_1_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->uvd.num_uvd_inst = 1;
uvd_v3_1_set_ring_funcs(adev);
@@ -542,10 +542,10 @@ static int uvd_v3_1_early_init(void *handle)
return 0;
}
-static int uvd_v3_1_sw_init(void *handle)
+static int uvd_v3_1_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
void *ptr;
uint32_t ucode_len;
@@ -577,15 +577,13 @@ static int uvd_v3_1_sw_init(void *handle)
ptr += ucode_len;
memcpy(&adev->uvd.keyselect, ptr, 4);
- r = amdgpu_uvd_entity_init(adev);
-
return r;
}
-static int uvd_v3_1_sw_fini(void *handle)
+static int uvd_v3_1_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_uvd_suspend(adev);
if (r)
@@ -623,18 +621,42 @@ static void uvd_v3_1_enable_mgcg(struct amdgpu_device *adev,
/**
* uvd_v3_1_hw_init - start and test UVD block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing.
*
- * Initialize the hardware, boot up the VCPU and do some testing
+ * On SI, the UVD is meant to be used in a specific power state,
+ * or alternatively the driver can manually enable its clock.
+ * In amdgpu we use the dedicated UVD power state when DPM is enabled.
+ * Calling amdgpu_dpm_enable_uvd makes DPM select the UVD power state
+ * for the SMU and afterwards enables the UVD clock.
+ * This is automatically done by amdgpu_uvd_ring_begin_use when work
+ * is submitted to the UVD ring. Here, we have to call it manually
+ * in order to power up UVD before firmware validation.
+ *
+ * Note that we must not disable the UVD clock here, as that would
+ * cause the ring test to fail. However, UVD is powered off
+ * automatically after the ring test: amdgpu_uvd_ring_end_use calls
+ * the UVD idle work handler which will disable the UVD clock when
+ * all fences are signalled.
*/
-static int uvd_v3_1_hw_init(void *handle)
+static int uvd_v3_1_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t tmp;
int r;
uvd_v3_1_mc_resume(adev);
+ uvd_v3_1_enable_mgcg(adev, true);
+
+ /* Make sure UVD is powered during FW validation.
+ * It's going to be automatically powered off after the ring test.
+ */
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, true);
+ else
+ amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
r = uvd_v3_1_fw_validate(adev);
if (r) {
@@ -642,9 +664,6 @@ static int uvd_v3_1_hw_init(void *handle)
return r;
}
- uvd_v3_1_enable_mgcg(adev, true);
- amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
-
uvd_v3_1_start(adev);
r = amdgpu_ring_test_helper(ring);
@@ -690,13 +709,13 @@ done:
/**
* uvd_v3_1_hw_fini - stop the hardware block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the UVD block, mark ring as not ready any more
*/
-static int uvd_v3_1_hw_fini(void *handle)
+static int uvd_v3_1_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
cancel_delayed_work_sync(&adev->uvd.idle_work);
@@ -706,10 +725,17 @@ static int uvd_v3_1_hw_fini(void *handle)
return 0;
}
-static int uvd_v3_1_suspend(void *handle)
+static int uvd_v3_1_prepare_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ return amdgpu_uvd_prepare_suspend(adev);
+}
+
+static int uvd_v3_1_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/*
* Proper cleanups before halting the HW engine:
@@ -735,36 +761,35 @@ static int uvd_v3_1_suspend(void *handle)
AMD_CG_STATE_GATE);
}
- r = uvd_v3_1_hw_fini(adev);
+ r = uvd_v3_1_hw_fini(ip_block);
if (r)
return r;
return amdgpu_uvd_suspend(adev);
}
-static int uvd_v3_1_resume(void *handle)
+static int uvd_v3_1_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_uvd_resume(adev);
+ r = amdgpu_uvd_resume(ip_block->adev);
if (r)
return r;
- return uvd_v3_1_hw_init(adev);
+ return uvd_v3_1_hw_init(ip_block);
}
-static bool uvd_v3_1_is_idle(void *handle)
+static bool uvd_v3_1_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
}
-static int uvd_v3_1_wait_for_idle(void *handle)
+static int uvd_v3_1_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
if (!(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK))
@@ -773,9 +798,9 @@ static int uvd_v3_1_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int uvd_v3_1_soft_reset(void *handle)
+static int uvd_v3_1_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uvd_v3_1_stop(adev);
@@ -786,13 +811,13 @@ static int uvd_v3_1_soft_reset(void *handle)
return uvd_v3_1_start(adev);
}
-static int uvd_v3_1_set_clockgating_state(void *handle,
+static int uvd_v3_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int uvd_v3_1_set_powergating_state(void *handle,
+static int uvd_v3_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -801,11 +826,11 @@ static int uvd_v3_1_set_powergating_state(void *handle,
static const struct amd_ip_funcs uvd_v3_1_ip_funcs = {
.name = "uvd_v3_1",
.early_init = uvd_v3_1_early_init,
- .late_init = NULL,
.sw_init = uvd_v3_1_sw_init,
.sw_fini = uvd_v3_1_sw_fini,
.hw_init = uvd_v3_1_hw_init,
.hw_fini = uvd_v3_1_hw_fini,
+ .prepare_suspend = uvd_v3_1_prepare_suspend,
.suspend = uvd_v3_1_suspend,
.resume = uvd_v3_1_resume,
.is_idle = uvd_v3_1_is_idle,
@@ -815,8 +840,7 @@ static const struct amd_ip_funcs uvd_v3_1_ip_funcs = {
.set_powergating_state = uvd_v3_1_set_powergating_state,
};
-const struct amdgpu_ip_block_version uvd_v3_1_ip_block =
-{
+const struct amdgpu_ip_block_version uvd_v3_1_ip_block = {
.type = AMD_IP_BLOCK_TYPE_UVD,
.major = 3,
.minor = 1,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index c108b8381795..4b96fd583772 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -44,7 +44,7 @@ static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev);
static void uvd_v4_2_set_irq_funcs(struct amdgpu_device *adev);
static int uvd_v4_2_start(struct amdgpu_device *adev);
static void uvd_v4_2_stop(struct amdgpu_device *adev);
-static int uvd_v4_2_set_clockgating_state(void *handle,
+static int uvd_v4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state);
static void uvd_v4_2_set_dcm(struct amdgpu_device *adev,
bool sw_mode);
@@ -90,9 +90,9 @@ static void uvd_v4_2_ring_set_wptr(struct amdgpu_ring *ring)
WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
}
-static int uvd_v4_2_early_init(void *handle)
+static int uvd_v4_2_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->uvd.num_uvd_inst = 1;
uvd_v4_2_set_ring_funcs(adev);
@@ -101,10 +101,10 @@ static int uvd_v4_2_early_init(void *handle)
return 0;
}
-static int uvd_v4_2_sw_init(void *handle)
+static int uvd_v4_2_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
/* UVD TRAP */
@@ -127,15 +127,13 @@ static int uvd_v4_2_sw_init(void *handle)
if (r)
return r;
- r = amdgpu_uvd_entity_init(adev);
-
return r;
}
-static int uvd_v4_2_sw_fini(void *handle)
+static int uvd_v4_2_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_uvd_suspend(adev);
if (r)
@@ -149,13 +147,13 @@ static void uvd_v4_2_enable_mgcg(struct amdgpu_device *adev,
/**
* uvd_v4_2_hw_init - start and test UVD block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int uvd_v4_2_hw_init(void *handle)
+static int uvd_v4_2_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t tmp;
int r;
@@ -204,13 +202,13 @@ done:
/**
* uvd_v4_2_hw_fini - stop the hardware block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the UVD block, mark ring as not ready any more
*/
-static int uvd_v4_2_hw_fini(void *handle)
+static int uvd_v4_2_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
cancel_delayed_work_sync(&adev->uvd.idle_work);
@@ -220,10 +218,17 @@ static int uvd_v4_2_hw_fini(void *handle)
return 0;
}
-static int uvd_v4_2_suspend(void *handle)
+static int uvd_v4_2_prepare_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ return amdgpu_uvd_prepare_suspend(adev);
+}
+
+static int uvd_v4_2_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/*
* Proper cleanups before halting the HW engine:
@@ -249,23 +254,22 @@ static int uvd_v4_2_suspend(void *handle)
AMD_CG_STATE_GATE);
}
- r = uvd_v4_2_hw_fini(adev);
+ r = uvd_v4_2_hw_fini(ip_block);
if (r)
return r;
return amdgpu_uvd_suspend(adev);
}
-static int uvd_v4_2_resume(void *handle)
+static int uvd_v4_2_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_uvd_resume(adev);
+ r = amdgpu_uvd_resume(ip_block->adev);
if (r)
return r;
- return uvd_v4_2_hw_init(adev);
+ return uvd_v4_2_hw_init(ip_block);
}
/**
@@ -298,7 +302,7 @@ static int uvd_v4_2_start(struct amdgpu_device *adev)
/* enable VCPU clock */
WREG32(mmUVD_VCPU_CNTL, 1 << 9);
- /* disable interupt */
+ /* disable interrupt */
WREG32_P(mmUVD_MASTINT_EN, 0, ~(1 << 1));
#ifdef __BIG_ENDIAN
@@ -308,6 +312,7 @@ static int uvd_v4_2_start(struct amdgpu_device *adev)
#endif
WREG32(mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
WREG32(mmUVD_MP_SWAP_CNTL, mp_swap_cntl);
+
/* initialize UVD memory controller */
WREG32(mmUVD_LMI_CTRL, 0x203108);
@@ -654,17 +659,17 @@ static void uvd_v4_2_set_dcm(struct amdgpu_device *adev,
WREG32_UVD_CTX(ixUVD_CGC_CTRL2, tmp2);
}
-static bool uvd_v4_2_is_idle(void *handle)
+static bool uvd_v4_2_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
}
-static int uvd_v4_2_wait_for_idle(void *handle)
+static int uvd_v4_2_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
if (!(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK))
@@ -673,9 +678,9 @@ static int uvd_v4_2_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int uvd_v4_2_soft_reset(void *handle)
+static int uvd_v4_2_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uvd_v4_2_stop(adev);
@@ -704,13 +709,13 @@ static int uvd_v4_2_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static int uvd_v4_2_set_clockgating_state(void *handle,
+static int uvd_v4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int uvd_v4_2_set_powergating_state(void *handle,
+static int uvd_v4_2_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the UVD block.
@@ -720,7 +725,7 @@ static int uvd_v4_2_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_PG_STATE_GATE) {
uvd_v4_2_stop(adev);
@@ -751,11 +756,11 @@ static int uvd_v4_2_set_powergating_state(void *handle,
static const struct amd_ip_funcs uvd_v4_2_ip_funcs = {
.name = "uvd_v4_2",
.early_init = uvd_v4_2_early_init,
- .late_init = NULL,
.sw_init = uvd_v4_2_sw_init,
.sw_fini = uvd_v4_2_sw_fini,
.hw_init = uvd_v4_2_hw_init,
.hw_fini = uvd_v4_2_hw_fini,
+ .prepare_suspend = uvd_v4_2_prepare_suspend,
.suspend = uvd_v4_2_suspend,
.resume = uvd_v4_2_resume,
.is_idle = uvd_v4_2_is_idle,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index 563493d1f830..71409ad8b7ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -42,7 +42,7 @@ static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev);
static void uvd_v5_0_set_irq_funcs(struct amdgpu_device *adev);
static int uvd_v5_0_start(struct amdgpu_device *adev);
static void uvd_v5_0_stop(struct amdgpu_device *adev);
-static int uvd_v5_0_set_clockgating_state(void *handle,
+static int uvd_v5_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state);
static void uvd_v5_0_enable_mgcg(struct amdgpu_device *adev,
bool enable);
@@ -88,9 +88,9 @@ static void uvd_v5_0_ring_set_wptr(struct amdgpu_ring *ring)
WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
}
-static int uvd_v5_0_early_init(void *handle)
+static int uvd_v5_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->uvd.num_uvd_inst = 1;
uvd_v5_0_set_ring_funcs(adev);
@@ -99,10 +99,10 @@ static int uvd_v5_0_early_init(void *handle)
return 0;
}
-static int uvd_v5_0_sw_init(void *handle)
+static int uvd_v5_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
/* UVD TRAP */
@@ -125,15 +125,13 @@ static int uvd_v5_0_sw_init(void *handle)
if (r)
return r;
- r = amdgpu_uvd_entity_init(adev);
-
return r;
}
-static int uvd_v5_0_sw_fini(void *handle)
+static int uvd_v5_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_uvd_suspend(adev);
if (r)
@@ -145,19 +143,19 @@ static int uvd_v5_0_sw_fini(void *handle)
/**
* uvd_v5_0_hw_init - start and test UVD block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int uvd_v5_0_hw_init(void *handle)
+static int uvd_v5_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t tmp;
int r;
amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
- uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
+ uvd_v5_0_set_clockgating_state(ip_block, AMD_CG_STATE_UNGATE);
uvd_v5_0_enable_mgcg(adev, true);
r = amdgpu_ring_test_helper(ring);
@@ -202,13 +200,13 @@ done:
/**
* uvd_v5_0_hw_fini - stop the hardware block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the UVD block, mark ring as not ready any more
*/
-static int uvd_v5_0_hw_fini(void *handle)
+static int uvd_v5_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
cancel_delayed_work_sync(&adev->uvd.idle_work);
@@ -218,10 +216,17 @@ static int uvd_v5_0_hw_fini(void *handle)
return 0;
}
-static int uvd_v5_0_suspend(void *handle)
+static int uvd_v5_0_prepare_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ return amdgpu_uvd_prepare_suspend(adev);
+}
+
+static int uvd_v5_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/*
* Proper cleanups before halting the HW engine:
@@ -247,23 +252,22 @@ static int uvd_v5_0_suspend(void *handle)
AMD_CG_STATE_GATE);
}
- r = uvd_v5_0_hw_fini(adev);
+ r = uvd_v5_0_hw_fini(ip_block);
if (r)
return r;
return amdgpu_uvd_suspend(adev);
}
-static int uvd_v5_0_resume(void *handle)
+static int uvd_v5_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_uvd_resume(adev);
+ r = amdgpu_uvd_resume(ip_block->adev);
if (r)
return r;
- return uvd_v5_0_hw_init(adev);
+ return uvd_v5_0_hw_init(ip_block);
}
/**
@@ -576,17 +580,17 @@ static void uvd_v5_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
}
}
-static bool uvd_v5_0_is_idle(void *handle)
+static bool uvd_v5_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
}
-static int uvd_v5_0_wait_for_idle(void *handle)
+static int uvd_v5_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
if (!(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK))
@@ -595,9 +599,9 @@ static int uvd_v5_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int uvd_v5_0_soft_reset(void *handle)
+static int uvd_v5_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uvd_v5_0_stop(adev);
@@ -786,15 +790,15 @@ static void uvd_v5_0_enable_mgcg(struct amdgpu_device *adev,
}
}
-static int uvd_v5_0_set_clockgating_state(void *handle,
+static int uvd_v5_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
/* wait for STATUS to clear */
- if (uvd_v5_0_wait_for_idle(handle))
+ if (uvd_v5_0_wait_for_idle(ip_block))
return -EBUSY;
uvd_v5_0_enable_clock_gating(adev, true);
@@ -808,7 +812,7 @@ static int uvd_v5_0_set_clockgating_state(void *handle,
return 0;
}
-static int uvd_v5_0_set_powergating_state(void *handle,
+static int uvd_v5_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the UVD block.
@@ -818,7 +822,7 @@ static int uvd_v5_0_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret = 0;
if (state == AMD_PG_STATE_GATE) {
@@ -833,9 +837,9 @@ out:
return ret;
}
-static void uvd_v5_0_get_clockgating_state(void *handle, u32 *flags)
+static void uvd_v5_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
mutex_lock(&adev->pm.mutex);
@@ -858,11 +862,11 @@ out:
static const struct amd_ip_funcs uvd_v5_0_ip_funcs = {
.name = "uvd_v5_0",
.early_init = uvd_v5_0_early_init,
- .late_init = NULL,
.sw_init = uvd_v5_0_sw_init,
.sw_fini = uvd_v5_0_sw_fini,
.hw_init = uvd_v5_0_hw_init,
.hw_fini = uvd_v5_0_hw_fini,
+ .prepare_suspend = uvd_v5_0_prepare_suspend,
.suspend = uvd_v5_0_suspend,
.resume = uvd_v5_0_resume,
.is_idle = uvd_v5_0_is_idle,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 2d558c2f417d..ceb94bbb03a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -48,7 +48,7 @@ static void uvd_v6_0_set_irq_funcs(struct amdgpu_device *adev);
static int uvd_v6_0_start(struct amdgpu_device *adev);
static void uvd_v6_0_stop(struct amdgpu_device *adev);
static void uvd_v6_0_set_sw_clock_gating(struct amdgpu_device *adev);
-static int uvd_v6_0_set_clockgating_state(void *handle,
+static int uvd_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state);
static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev,
bool enable);
@@ -216,8 +216,9 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
- AMDGPU_IB_POOL_DIRECT, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
+ AMDGPU_IB_POOL_DIRECT, &job,
+ AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
@@ -280,8 +281,9 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
- AMDGPU_IB_POOL_DIRECT, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
+ AMDGPU_IB_POOL_DIRECT, &job,
+ AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
@@ -354,9 +356,9 @@ error:
return r;
}
-static int uvd_v6_0_early_init(void *handle)
+static int uvd_v6_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->uvd.num_uvd_inst = 1;
if (!(adev->flags & AMD_IS_APU) &&
@@ -375,11 +377,11 @@ static int uvd_v6_0_early_init(void *handle)
return 0;
}
-static int uvd_v6_0_sw_init(void *handle)
+static int uvd_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int i, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* UVD TRAP */
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_UVD_SYSTEM_MESSAGE, &adev->uvd.inst->irq);
@@ -432,15 +434,13 @@ static int uvd_v6_0_sw_init(void *handle)
}
}
- r = amdgpu_uvd_entity_init(adev);
-
return r;
}
-static int uvd_v6_0_sw_fini(void *handle)
+static int uvd_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_uvd_suspend(adev);
if (r)
@@ -457,19 +457,19 @@ static int uvd_v6_0_sw_fini(void *handle)
/**
* uvd_v6_0_hw_init - start and test UVD block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int uvd_v6_0_hw_init(void *handle)
+static int uvd_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t tmp;
int i, r;
amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
- uvd_v6_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
+ uvd_v6_0_set_clockgating_state(ip_block, AMD_CG_STATE_UNGATE);
uvd_v6_0_enable_mgcg(adev, true);
r = amdgpu_ring_test_helper(ring);
@@ -526,13 +526,13 @@ done:
/**
* uvd_v6_0_hw_fini - stop the hardware block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the UVD block, mark ring as not ready any more
*/
-static int uvd_v6_0_hw_fini(void *handle)
+static int uvd_v6_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
cancel_delayed_work_sync(&adev->uvd.idle_work);
@@ -542,10 +542,17 @@ static int uvd_v6_0_hw_fini(void *handle)
return 0;
}
-static int uvd_v6_0_suspend(void *handle)
+static int uvd_v6_0_prepare_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ return amdgpu_uvd_prepare_suspend(adev);
+}
+
+static int uvd_v6_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/*
* Proper cleanups before halting the HW engine:
@@ -571,23 +578,22 @@ static int uvd_v6_0_suspend(void *handle)
AMD_CG_STATE_GATE);
}
- r = uvd_v6_0_hw_fini(adev);
+ r = uvd_v6_0_hw_fini(ip_block);
if (r)
return r;
return amdgpu_uvd_suspend(adev);
}
-static int uvd_v6_0_resume(void *handle)
+static int uvd_v6_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_uvd_resume(adev);
+ r = amdgpu_uvd_resume(ip_block->adev);
if (r)
return r;
- return uvd_v6_0_hw_init(adev);
+ return uvd_v6_0_hw_init(ip_block);
}
/**
@@ -1139,29 +1145,29 @@ static void uvd_v6_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, vmid);
}
-static bool uvd_v6_0_is_idle(void *handle)
+static bool uvd_v6_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
}
-static int uvd_v6_0_wait_for_idle(void *handle)
+static int uvd_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (uvd_v6_0_is_idle(handle))
+ if (uvd_v6_0_is_idle(ip_block))
return 0;
}
return -ETIMEDOUT;
}
#define AMDGPU_UVD_STATUS_BUSY_MASK 0xfd
-static bool uvd_v6_0_check_soft_reset(void *handle)
+static bool uvd_v6_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
u32 tmp = RREG32(mmSRBM_STATUS);
@@ -1179,9 +1185,9 @@ static bool uvd_v6_0_check_soft_reset(void *handle)
}
}
-static int uvd_v6_0_pre_soft_reset(void *handle)
+static int uvd_v6_0_pre_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->uvd.inst->srbm_soft_reset)
return 0;
@@ -1190,9 +1196,9 @@ static int uvd_v6_0_pre_soft_reset(void *handle)
return 0;
}
-static int uvd_v6_0_soft_reset(void *handle)
+static int uvd_v6_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset;
if (!adev->uvd.inst->srbm_soft_reset)
@@ -1221,9 +1227,9 @@ static int uvd_v6_0_soft_reset(void *handle)
return 0;
}
-static int uvd_v6_0_post_soft_reset(void *handle)
+static int uvd_v6_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->uvd.inst->srbm_soft_reset)
return 0;
@@ -1446,15 +1452,15 @@ static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev,
}
}
-static int uvd_v6_0_set_clockgating_state(void *handle,
+static int uvd_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
/* wait for STATUS to clear */
- if (uvd_v6_0_wait_for_idle(handle))
+ if (uvd_v6_0_wait_for_idle(ip_block))
return -EBUSY;
uvd_v6_0_enable_clock_gating(adev, true);
/* enable HW gates because UVD is idle */
@@ -1467,7 +1473,7 @@ static int uvd_v6_0_set_clockgating_state(void *handle,
return 0;
}
-static int uvd_v6_0_set_powergating_state(void *handle,
+static int uvd_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the UVD block.
@@ -1477,7 +1483,7 @@ static int uvd_v6_0_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret = 0;
WREG32(mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK);
@@ -1494,9 +1500,9 @@ out:
return ret;
}
-static void uvd_v6_0_get_clockgating_state(void *handle, u32 *flags)
+static void uvd_v6_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
mutex_lock(&adev->pm.mutex);
@@ -1523,11 +1529,11 @@ out:
static const struct amd_ip_funcs uvd_v6_0_ip_funcs = {
.name = "uvd_v6_0",
.early_init = uvd_v6_0_early_init,
- .late_init = NULL,
.sw_init = uvd_v6_0_sw_init,
.sw_fini = uvd_v6_0_sw_fini,
.hw_init = uvd_v6_0_hw_init,
.hw_fini = uvd_v6_0_hw_fini,
+ .prepare_suspend = uvd_v6_0_prepare_suspend,
.suspend = uvd_v6_0_suspend,
.resume = uvd_v6_0_resume,
.is_idle = uvd_v6_0_is_idle,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index b483f03b4591..1f8866f3f63c 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -25,6 +25,7 @@
#include "amdgpu.h"
#include "amdgpu_uvd.h"
+#include "amdgpu_cs.h"
#include "soc15.h"
#include "soc15d.h"
#include "soc15_common.h"
@@ -117,7 +118,7 @@ static uint64_t uvd_v7_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell)
- return adev->wb.wb[ring->wptr_offs];
+ return *ring->wptr_cpu_addr;
if (ring == &adev->uvd.inst[ring->me].ring_enc[0])
return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR);
@@ -152,7 +153,7 @@ static void uvd_v7_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
return;
}
@@ -212,7 +213,7 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)
*
* Open up a stream for HW test
*/
-static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, u32 handle,
struct amdgpu_bo *bo,
struct dma_fence **fence)
{
@@ -223,8 +224,9 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
- AMDGPU_IB_POOL_DIRECT, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
+ AMDGPU_IB_POOL_DIRECT, &job,
+ AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
@@ -275,7 +277,7 @@ err:
*
* Close up a stream for HW test or if userspace failed to do so
*/
-static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, u32 handle,
struct amdgpu_bo *bo,
struct dma_fence **fence)
{
@@ -286,8 +288,9 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
- AMDGPU_IB_POOL_DIRECT, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
+ AMDGPU_IB_POOL_DIRECT, &job,
+ AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
@@ -360,9 +363,9 @@ error:
return r;
}
-static int uvd_v7_0_early_init(void *handle)
+static int uvd_v7_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (adev->asic_type == CHIP_VEGA20) {
u32 harvest;
@@ -394,12 +397,12 @@ static int uvd_v7_0_early_init(void *handle)
return 0;
}
-static int uvd_v7_0_sw_init(void *handle)
+static int uvd_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int i, j, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
if (adev->uvd.harvest_config & (1 << j))
@@ -443,6 +446,7 @@ static int uvd_v7_0_sw_init(void *handle)
continue;
if (!amdgpu_sriov_vf(adev)) {
ring = &adev->uvd.inst[j].ring;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "uvd_%d", ring->me);
r = amdgpu_ring_init(adev, ring, 512,
&adev->uvd.inst[j].irq, 0,
@@ -453,6 +457,7 @@ static int uvd_v7_0_sw_init(void *handle)
for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
ring = &adev->uvd.inst[j].ring_enc[i];
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "uvd_enc_%d.%d", ring->me, i);
if (amdgpu_sriov_vf(adev)) {
ring->use_doorbell = true;
@@ -477,10 +482,6 @@ static int uvd_v7_0_sw_init(void *handle)
if (r)
return r;
- r = amdgpu_uvd_entity_init(adev);
- if (r)
- return r;
-
r = amdgpu_virt_alloc_mm_table(adev);
if (r)
return r;
@@ -488,10 +489,10 @@ static int uvd_v7_0_sw_init(void *handle)
return r;
}
-static int uvd_v7_0_sw_fini(void *handle)
+static int uvd_v7_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i, j, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_virt_free_mm_table(adev);
@@ -511,13 +512,13 @@ static int uvd_v7_0_sw_fini(void *handle)
/**
* uvd_v7_0_hw_init - start and test UVD block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int uvd_v7_0_hw_init(void *handle)
+static int uvd_v7_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
uint32_t tmp;
int i, j, r;
@@ -589,13 +590,13 @@ done:
/**
* uvd_v7_0_hw_fini - stop the hardware block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the UVD block, mark ring as not ready any more
*/
-static int uvd_v7_0_hw_fini(void *handle)
+static int uvd_v7_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
cancel_delayed_work_sync(&adev->uvd.idle_work);
@@ -609,10 +610,17 @@ static int uvd_v7_0_hw_fini(void *handle)
return 0;
}
-static int uvd_v7_0_suspend(void *handle)
+static int uvd_v7_0_prepare_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ return amdgpu_uvd_prepare_suspend(adev);
+}
+
+static int uvd_v7_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/*
* Proper cleanups before halting the HW engine:
@@ -638,23 +646,22 @@ static int uvd_v7_0_suspend(void *handle)
AMD_CG_STATE_GATE);
}
- r = uvd_v7_0_hw_fini(adev);
+ r = uvd_v7_0_hw_fini(ip_block);
if (r)
return r;
return amdgpu_uvd_suspend(adev);
}
-static int uvd_v7_0_resume(void *handle)
+static int uvd_v7_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_uvd_resume(adev);
+ r = amdgpu_uvd_resume(ip_block->adev);
if (r)
return r;
- return uvd_v7_0_hw_init(adev);
+ return uvd_v7_0_hw_init(ip_block);
}
/**
@@ -676,11 +683,11 @@ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev)
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
i == 0 ?
- adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_lo:
+ adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_lo :
adev->firmware.ucode[AMDGPU_UCODE_ID_UVD1].tmr_mc_addr_lo);
WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
i == 0 ?
- adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_hi:
+ adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_hi :
adev->firmware.ucode[AMDGPU_UCODE_ID_UVD1].tmr_mc_addr_hi);
WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0);
offset = 0;
@@ -753,7 +760,7 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
if (adev->uvd.harvest_config & (1 << i))
continue;
WDOORBELL32(adev->uvd.inst[i].ring_enc[0].doorbell_index, 0);
- adev->wb.wb[adev->uvd.inst[i].ring_enc[0].wptr_offs] = 0;
+ *adev->uvd.inst[i].ring_enc[0].wptr_cpu_addr = 0;
adev->uvd.inst[i].ring_enc[0].wptr = 0;
adev->uvd.inst[i].ring_enc[0].wptr_old = 0;
}
@@ -1275,14 +1282,15 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)
* uvd_v7_0_ring_patch_cs_in_place - Patch the IB for command submission.
*
* @p: the CS parser with the IBs
- * @ib_idx: which IB to patch
+ * @job: which job this ib is in
+ * @ib: which IB to patch
*
*/
static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
- uint32_t ib_idx)
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
{
- struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
- struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
+ struct amdgpu_ring *ring = amdgpu_job_ring(job);
unsigned i;
/* No patching necessary for the first instance */
@@ -1290,12 +1298,12 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
return 0;
for (i = 0; i < ib->length_dw; i += 2) {
- uint32_t reg = amdgpu_get_ib_value(p, ib_idx, i);
+ uint32_t reg = amdgpu_ib_get_value(ib, i);
reg -= p->adev->reg_offset[UVD_HWIP][0][1];
reg += p->adev->reg_offset[UVD_HWIP][1][1];
- amdgpu_set_ib_value(p, ib_idx, i, reg);
+ amdgpu_ib_set_value(ib, i, reg);
}
return 0;
}
@@ -1395,7 +1403,7 @@ static void uvd_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
- struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
uint32_t data0, data1, mask;
pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
@@ -1438,7 +1446,7 @@ static void uvd_v7_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring,
static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned int vmid, uint64_t pd_addr)
{
- struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
@@ -1456,104 +1464,6 @@ static void uvd_v7_0_enc_ring_emit_wreg(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, val);
}
-#if 0
-static bool uvd_v7_0_is_idle(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
-}
-
-static int uvd_v7_0_wait_for_idle(void *handle)
-{
- unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- for (i = 0; i < adev->usec_timeout; i++) {
- if (uvd_v7_0_is_idle(handle))
- return 0;
- }
- return -ETIMEDOUT;
-}
-
-#define AMDGPU_UVD_STATUS_BUSY_MASK 0xfd
-static bool uvd_v7_0_check_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 srbm_soft_reset = 0;
- u32 tmp = RREG32(mmSRBM_STATUS);
-
- if (REG_GET_FIELD(tmp, SRBM_STATUS, UVD_RQ_PENDING) ||
- REG_GET_FIELD(tmp, SRBM_STATUS, UVD_BUSY) ||
- (RREG32_SOC15(UVD, ring->me, mmUVD_STATUS) &
- AMDGPU_UVD_STATUS_BUSY_MASK))
- srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
- SRBM_SOFT_RESET, SOFT_RESET_UVD, 1);
-
- if (srbm_soft_reset) {
- adev->uvd.inst[ring->me].srbm_soft_reset = srbm_soft_reset;
- return true;
- } else {
- adev->uvd.inst[ring->me].srbm_soft_reset = 0;
- return false;
- }
-}
-
-static int uvd_v7_0_pre_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (!adev->uvd.inst[ring->me].srbm_soft_reset)
- return 0;
-
- uvd_v7_0_stop(adev);
- return 0;
-}
-
-static int uvd_v7_0_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 srbm_soft_reset;
-
- if (!adev->uvd.inst[ring->me].srbm_soft_reset)
- return 0;
- srbm_soft_reset = adev->uvd.inst[ring->me].srbm_soft_reset;
-
- if (srbm_soft_reset) {
- u32 tmp;
-
- tmp = RREG32(mmSRBM_SOFT_RESET);
- tmp |= srbm_soft_reset;
- dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
-
- udelay(50);
-
- tmp &= ~srbm_soft_reset;
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
-
- /* Wait a little for things to settle down */
- udelay(50);
- }
-
- return 0;
-}
-
-static int uvd_v7_0_post_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (!adev->uvd.inst[ring->me].srbm_soft_reset)
- return 0;
-
- mdelay(5);
-
- return uvd_v7_0_start(adev);
-}
-#endif
-
static int uvd_v7_0_set_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned type,
@@ -1603,172 +1513,7 @@ static int uvd_v7_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-#if 0
-static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev)
-{
- uint32_t data, data1, data2, suvd_flags;
-
- data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL);
- data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE);
- data2 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL);
-
- data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK |
- UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK);
-
- suvd_flags = UVD_SUVD_CGC_GATE__SRE_MASK |
- UVD_SUVD_CGC_GATE__SIT_MASK |
- UVD_SUVD_CGC_GATE__SMP_MASK |
- UVD_SUVD_CGC_GATE__SCM_MASK |
- UVD_SUVD_CGC_GATE__SDB_MASK;
-
- data |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK |
- (1 << REG_FIELD_SHIFT(UVD_CGC_CTRL, CLK_GATE_DLY_TIMER)) |
- (4 << REG_FIELD_SHIFT(UVD_CGC_CTRL, CLK_OFF_DELAY));
-
- data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
- UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
- UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
- UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
- UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
- UVD_CGC_CTRL__SYS_MODE_MASK |
- UVD_CGC_CTRL__UDEC_MODE_MASK |
- UVD_CGC_CTRL__MPEG2_MODE_MASK |
- UVD_CGC_CTRL__REGS_MODE_MASK |
- UVD_CGC_CTRL__RBC_MODE_MASK |
- UVD_CGC_CTRL__LMI_MC_MODE_MASK |
- UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
- UVD_CGC_CTRL__IDCT_MODE_MASK |
- UVD_CGC_CTRL__MPRD_MODE_MASK |
- UVD_CGC_CTRL__MPC_MODE_MASK |
- UVD_CGC_CTRL__LBSI_MODE_MASK |
- UVD_CGC_CTRL__LRBBM_MODE_MASK |
- UVD_CGC_CTRL__WCB_MODE_MASK |
- UVD_CGC_CTRL__VCPU_MODE_MASK |
- UVD_CGC_CTRL__JPEG_MODE_MASK |
- UVD_CGC_CTRL__JPEG2_MODE_MASK |
- UVD_CGC_CTRL__SCPU_MODE_MASK);
- data2 &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK |
- UVD_SUVD_CGC_CTRL__SIT_MODE_MASK |
- UVD_SUVD_CGC_CTRL__SMP_MODE_MASK |
- UVD_SUVD_CGC_CTRL__SCM_MODE_MASK |
- UVD_SUVD_CGC_CTRL__SDB_MODE_MASK);
- data1 |= suvd_flags;
-
- WREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL, data);
- WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, 0);
- WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1);
- WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL, data2);
-}
-
-static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev)
-{
- uint32_t data, data1, cgc_flags, suvd_flags;
-
- data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE);
- data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE);
-
- cgc_flags = UVD_CGC_GATE__SYS_MASK |
- UVD_CGC_GATE__UDEC_MASK |
- UVD_CGC_GATE__MPEG2_MASK |
- UVD_CGC_GATE__RBC_MASK |
- UVD_CGC_GATE__LMI_MC_MASK |
- UVD_CGC_GATE__IDCT_MASK |
- UVD_CGC_GATE__MPRD_MASK |
- UVD_CGC_GATE__MPC_MASK |
- UVD_CGC_GATE__LBSI_MASK |
- UVD_CGC_GATE__LRBBM_MASK |
- UVD_CGC_GATE__UDEC_RE_MASK |
- UVD_CGC_GATE__UDEC_CM_MASK |
- UVD_CGC_GATE__UDEC_IT_MASK |
- UVD_CGC_GATE__UDEC_DB_MASK |
- UVD_CGC_GATE__UDEC_MP_MASK |
- UVD_CGC_GATE__WCB_MASK |
- UVD_CGC_GATE__VCPU_MASK |
- UVD_CGC_GATE__SCPU_MASK |
- UVD_CGC_GATE__JPEG_MASK |
- UVD_CGC_GATE__JPEG2_MASK;
-
- suvd_flags = UVD_SUVD_CGC_GATE__SRE_MASK |
- UVD_SUVD_CGC_GATE__SIT_MASK |
- UVD_SUVD_CGC_GATE__SMP_MASK |
- UVD_SUVD_CGC_GATE__SCM_MASK |
- UVD_SUVD_CGC_GATE__SDB_MASK;
-
- data |= cgc_flags;
- data1 |= suvd_flags;
-
- WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, data);
- WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1);
-}
-
-static void uvd_v7_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
-{
- u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
-
- if (enable)
- tmp |= (GCK_DFS_BYPASS_CNTL__BYPASSDCLK_MASK |
- GCK_DFS_BYPASS_CNTL__BYPASSVCLK_MASK);
- else
- tmp &= ~(GCK_DFS_BYPASS_CNTL__BYPASSDCLK_MASK |
- GCK_DFS_BYPASS_CNTL__BYPASSVCLK_MASK);
-
- WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
-}
-
-
-static int uvd_v7_0_set_clockgating_state(void *handle,
- enum amd_clockgating_state state)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_CG_STATE_GATE);
-
- uvd_v7_0_set_bypass_mode(adev, enable);
-
- if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG))
- return 0;
-
- if (enable) {
- /* disable HW gating and enable Sw gating */
- uvd_v7_0_set_sw_clock_gating(adev);
- } else {
- /* wait for STATUS to clear */
- if (uvd_v7_0_wait_for_idle(handle))
- return -EBUSY;
-
- /* enable HW gates because UVD is idle */
- /* uvd_v7_0_set_hw_clock_gating(adev); */
- }
-
- return 0;
-}
-
-static int uvd_v7_0_set_powergating_state(void *handle,
- enum amd_powergating_state state)
-{
- /* This doesn't actually powergate the UVD block.
- * That's done in the dpm code via the SMC. This
- * just re-inits the block as necessary. The actual
- * gating still happens in the dpm code. We should
- * revisit this when there is a cleaner line between
- * the smc and the hw blocks
- */
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD))
- return 0;
-
- WREG32_SOC15(UVD, ring->me, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK);
-
- if (state == AMD_PG_STATE_GATE) {
- uvd_v7_0_stop(adev);
- return 0;
- } else {
- return uvd_v7_0_start(adev);
- }
-}
-#endif
-
-static int uvd_v7_0_set_clockgating_state(void *handle,
+static int uvd_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
/* needed for driver unload*/
@@ -1778,19 +1523,13 @@ static int uvd_v7_0_set_clockgating_state(void *handle,
const struct amd_ip_funcs uvd_v7_0_ip_funcs = {
.name = "uvd_v7_0",
.early_init = uvd_v7_0_early_init,
- .late_init = NULL,
.sw_init = uvd_v7_0_sw_init,
.sw_fini = uvd_v7_0_sw_fini,
.hw_init = uvd_v7_0_hw_init,
.hw_fini = uvd_v7_0_hw_fini,
+ .prepare_suspend = uvd_v7_0_prepare_suspend,
.suspend = uvd_v7_0_suspend,
.resume = uvd_v7_0_resume,
- .is_idle = NULL /* uvd_v7_0_is_idle */,
- .wait_for_idle = NULL /* uvd_v7_0_wait_for_idle */,
- .check_soft_reset = NULL /* uvd_v7_0_check_soft_reset */,
- .pre_soft_reset = NULL /* uvd_v7_0_pre_soft_reset */,
- .soft_reset = NULL /* uvd_v7_0_soft_reset */,
- .post_soft_reset = NULL /* uvd_v7_0_post_soft_reset */,
.set_clockgating_state = uvd_v7_0_set_clockgating_state,
.set_powergating_state = NULL /* uvd_v7_0_set_powergating_state */,
};
@@ -1800,7 +1539,6 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
.align_mask = 0xf,
.support_64bit_ptrs = false,
.no_user_fence = true,
- .vmhub = AMDGPU_MMHUB_0,
.get_rptr = uvd_v7_0_ring_get_rptr,
.get_wptr = uvd_v7_0_ring_get_wptr,
.set_wptr = uvd_v7_0_ring_set_wptr,
@@ -1833,7 +1571,6 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
.nop = HEVC_ENC_CMD_NO_OP,
.support_64bit_ptrs = false,
.no_user_fence = true,
- .vmhub = AMDGPU_MMHUB_0,
.get_rptr = uvd_v7_0_enc_ring_get_rptr,
.get_wptr = uvd_v7_0_enc_ring_get_wptr,
.set_wptr = uvd_v7_0_enc_ring_set_wptr,
@@ -1906,8 +1643,7 @@ static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev)
}
}
-const struct amdgpu_ip_block_version uvd_v7_0_ip_block =
-{
+const struct amdgpu_ip_block_version uvd_v7_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_UVD,
.major = 7,
.minor = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c
new file mode 100644
index 000000000000..9ae424618556
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c
@@ -0,0 +1,839 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * Copyright 2025 Valve Corporation
+ * Copyright 2025 Alexandre Demers
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * Authors: Christian König <christian.koenig@amd.com>
+ * Timur Kristóf <timur.kristof@gmail.com>
+ * Alexandre Demers <alexandre.f.demers@gmail.com>
+ */
+
+#include <linux/firmware.h>
+
+#include "amdgpu.h"
+#include "amdgpu_vce.h"
+#include "amdgpu_gart.h"
+#include "sid.h"
+#include "vce_v1_0.h"
+#include "vce/vce_1_0_d.h"
+#include "vce/vce_1_0_sh_mask.h"
+#include "oss/oss_1_0_d.h"
+#include "oss/oss_1_0_sh_mask.h"
+
+#define VCE_V1_0_FW_SIZE (256 * 1024)
+#define VCE_V1_0_STACK_SIZE (64 * 1024)
+#define VCE_V1_0_DATA_SIZE (7808 * (AMDGPU_MAX_VCE_HANDLES + 1))
+#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
+
+#define VCE_V1_0_GART_PAGE_START \
+ (AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS)
+#define VCE_V1_0_GART_ADDR_START \
+ (VCE_V1_0_GART_PAGE_START * AMDGPU_GPU_PAGE_SIZE)
+
+static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev);
+static void vce_v1_0_set_irq_funcs(struct amdgpu_device *adev);
+
+struct vce_v1_0_fw_signature {
+ int32_t offset;
+ uint32_t length;
+ int32_t number;
+ struct {
+ uint32_t chip_id;
+ uint32_t keyselect;
+ uint32_t nonce[4];
+ uint32_t sigval[4];
+ } val[8];
+};
+
+/**
+ * vce_v1_0_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t vce_v1_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->me == 0)
+ return RREG32(mmVCE_RB_RPTR);
+ else
+ return RREG32(mmVCE_RB_RPTR2);
+}
+
+/**
+ * vce_v1_0_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t vce_v1_0_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->me == 0)
+ return RREG32(mmVCE_RB_WPTR);
+ else
+ return RREG32(mmVCE_RB_WPTR2);
+}
+
+/**
+ * vce_v1_0_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void vce_v1_0_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->me == 0)
+ WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
+ else
+ WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
+}
+
+static int vce_v1_0_lmi_clean(struct amdgpu_device *adev)
+{
+ int i, j;
+
+ for (i = 0; i < 10; ++i) {
+ for (j = 0; j < 100; ++j) {
+ if (RREG32(mmVCE_LMI_STATUS) & 0x337f)
+ return 0;
+
+ mdelay(10);
+ }
+ }
+
+ return -ETIMEDOUT;
+}
+
+static int vce_v1_0_firmware_loaded(struct amdgpu_device *adev)
+{
+ int i, j;
+
+ for (i = 0; i < 10; ++i) {
+ for (j = 0; j < 100; ++j) {
+ if (RREG32(mmVCE_STATUS) & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
+ return 0;
+ mdelay(10);
+ }
+
+ dev_err(adev->dev, "VCE not responding, trying to reset the ECPU\n");
+
+ WREG32_P(mmVCE_SOFT_RESET,
+ VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
+ ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
+ mdelay(10);
+ WREG32_P(mmVCE_SOFT_RESET, 0,
+ ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
+ mdelay(10);
+ }
+
+ return -ETIMEDOUT;
+}
+
+static void vce_v1_0_init_cg(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ tmp = RREG32(mmVCE_CLOCK_GATING_A);
+ tmp |= VCE_CLOCK_GATING_A__CGC_DYN_CLOCK_MODE_MASK;
+ WREG32(mmVCE_CLOCK_GATING_A, tmp);
+
+ tmp = RREG32(mmVCE_CLOCK_GATING_B);
+ tmp |= 0x1e;
+ tmp &= ~0xe100e1;
+ WREG32(mmVCE_CLOCK_GATING_B, tmp);
+
+ tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
+ tmp &= ~0xff9ff000;
+ WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
+
+ tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
+ tmp &= ~0x3ff;
+ WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp);
+}
+
+/**
+ * vce_v1_0_load_fw_signature - load firmware signature into VCPU BO
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * The VCE1 firmware validation mechanism needs a firmware signature.
+ * This function finds the signature appropriate for the current
+ * ASIC and writes that into the VCPU BO.
+ */
+static int vce_v1_0_load_fw_signature(struct amdgpu_device *adev)
+{
+ const struct common_firmware_header *hdr;
+ struct vce_v1_0_fw_signature *sign;
+ unsigned int ucode_offset;
+ uint32_t chip_id;
+ u32 *cpu_addr;
+ int i;
+
+ hdr = (const struct common_firmware_header *)adev->vce.fw->data;
+ ucode_offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+ cpu_addr = adev->vce.cpu_addr;
+
+ sign = (void *)adev->vce.fw->data + ucode_offset;
+
+ switch (adev->asic_type) {
+ case CHIP_TAHITI:
+ chip_id = 0x01000014;
+ break;
+ case CHIP_VERDE:
+ chip_id = 0x01000015;
+ break;
+ case CHIP_PITCAIRN:
+ chip_id = 0x01000016;
+ break;
+ default:
+ dev_err(adev->dev, "asic_type %#010x was not found!", adev->asic_type);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < le32_to_cpu(sign->number); ++i) {
+ if (le32_to_cpu(sign->val[i].chip_id) == chip_id)
+ break;
+ }
+
+ if (i == le32_to_cpu(sign->number)) {
+ dev_err(adev->dev, "chip_id 0x%x for %s was not found in VCE firmware",
+ chip_id, amdgpu_asic_name[adev->asic_type]);
+ return -EINVAL;
+ }
+
+ cpu_addr += (256 - 64) / 4;
+ memcpy_toio(&cpu_addr[0], &sign->val[i].nonce[0], 16);
+ cpu_addr[4] = cpu_to_le32(le32_to_cpu(sign->length) + 64);
+
+ memset_io(&cpu_addr[5], 0, 44);
+ memcpy_toio(&cpu_addr[16], &sign[1], hdr->ucode_size_bytes - sizeof(*sign));
+
+ cpu_addr += (le32_to_cpu(sign->length) + 64) / 4;
+ memcpy_toio(&cpu_addr[0], &sign->val[i].sigval[0], 16);
+
+ adev->vce.keyselect = le32_to_cpu(sign->val[i].keyselect);
+
+ return 0;
+}
+
+static int vce_v1_0_wait_for_fw_validation(struct amdgpu_device *adev)
+{
+ int i;
+
+ dev_dbg(adev->dev, "VCE keyselect: %d", adev->vce.keyselect);
+ WREG32(mmVCE_LMI_FW_START_KEYSEL, adev->vce.keyselect);
+
+ for (i = 0; i < 10; ++i) {
+ mdelay(10);
+ if (RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__DONE_MASK)
+ break;
+ }
+
+ if (!(RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__DONE_MASK)) {
+ dev_err(adev->dev, "VCE FW validation timeout\n");
+ return -ETIMEDOUT;
+ }
+
+ if (!(RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__PASS_MASK)) {
+ dev_err(adev->dev, "VCE FW validation failed\n");
+ return -EINVAL;
+ }
+
+ for (i = 0; i < 10; ++i) {
+ mdelay(10);
+ if (!(RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__BUSY_MASK))
+ break;
+ }
+
+ if (RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__BUSY_MASK) {
+ dev_err(adev->dev, "VCE FW busy timeout\n");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static int vce_v1_0_mc_resume(struct amdgpu_device *adev)
+{
+ uint32_t offset;
+ uint32_t size;
+
+ /*
+ * When the keyselect is already set, don't perturb VCE FW.
+ * Validation seems to always fail the second time.
+ */
+ if (RREG32(mmVCE_LMI_FW_START_KEYSEL)) {
+ dev_dbg(adev->dev, "keyselect already set: 0x%x (on CPU: 0x%x)\n",
+ RREG32(mmVCE_LMI_FW_START_KEYSEL), adev->vce.keyselect);
+
+ WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
+ return 0;
+ }
+
+ WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
+ WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
+ WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
+ WREG32(mmVCE_CLOCK_GATING_B, 0);
+
+ WREG32_P(mmVCE_LMI_FW_PERIODIC_CTRL, 0x4, ~0x4);
+
+ WREG32(mmVCE_LMI_CTRL, 0x00398000);
+
+ WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
+ WREG32(mmVCE_LMI_SWAP_CNTL, 0);
+ WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
+ WREG32(mmVCE_LMI_VM_CTRL, 0);
+
+ WREG32(mmVCE_VCPU_SCRATCH7, AMDGPU_MAX_VCE_HANDLES);
+
+ offset = adev->vce.gpu_addr + AMDGPU_VCE_FIRMWARE_OFFSET;
+ size = VCE_V1_0_FW_SIZE;
+ WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
+ WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
+
+ offset += size;
+ size = VCE_V1_0_STACK_SIZE;
+ WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
+ WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
+
+ offset += size;
+ size = VCE_V1_0_DATA_SIZE;
+ WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
+ WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
+
+ WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
+
+ return vce_v1_0_wait_for_fw_validation(adev);
+}
+
+/**
+ * vce_v1_0_is_idle() - Check idle status of VCE1 IP block
+ *
+ * @ip_block: amdgpu_ip_block pointer
+ *
+ * Check whether VCE is busy according to VCE_STATUS.
+ * Also check whether the SRBM thinks VCE is busy, although
+ * SRBM_STATUS.VCE_BUSY seems to be bogus because it
+ * appears to mirror the VCE_STATUS.VCPU_REPORT_FW_LOADED bit.
+ */
+static bool vce_v1_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool busy =
+ (RREG32(mmVCE_STATUS) & (VCE_STATUS__JOB_BUSY_MASK | VCE_STATUS__UENC_BUSY_MASK)) ||
+ (RREG32(mmSRBM_STATUS2) & SRBM_STATUS2__VCE_BUSY_MASK);
+
+ return !busy;
+}
+
+static int vce_v1_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ unsigned int i;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ udelay(1);
+ if (vce_v1_0_is_idle(ip_block))
+ return 0;
+ }
+ return -ETIMEDOUT;
+}
+
+/**
+ * vce_v1_0_start - start VCE block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the VCE block
+ */
+static int vce_v1_0_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int r;
+
+ WREG32_P(mmVCE_STATUS, 1, ~1);
+
+ r = vce_v1_0_mc_resume(adev);
+ if (r)
+ return r;
+
+ ring = &adev->vce.ring[0];
+ WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
+ WREG32(mmVCE_RB_BASE_LO, lower_32_bits(ring->gpu_addr));
+ WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
+
+ ring = &adev->vce.ring[1];
+ WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
+ WREG32(mmVCE_RB_BASE_LO2, lower_32_bits(ring->gpu_addr));
+ WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
+
+ WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK,
+ ~VCE_VCPU_CNTL__CLK_EN_MASK);
+
+ WREG32_P(mmVCE_SOFT_RESET,
+ VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
+ VCE_SOFT_RESET__FME_SOFT_RESET_MASK,
+ ~(VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
+ VCE_SOFT_RESET__FME_SOFT_RESET_MASK));
+
+ mdelay(100);
+
+ WREG32_P(mmVCE_SOFT_RESET, 0,
+ ~(VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
+ VCE_SOFT_RESET__FME_SOFT_RESET_MASK));
+
+ r = vce_v1_0_firmware_loaded(adev);
+
+ /* Clear VCE_STATUS, otherwise SRBM thinks VCE1 is busy. */
+ WREG32(mmVCE_STATUS, 0);
+
+ if (r) {
+ dev_err(adev->dev, "VCE not responding, giving up\n");
+ return r;
+ }
+
+ return 0;
+}
+
+static int vce_v1_0_stop(struct amdgpu_device *adev)
+{
+ struct amdgpu_ip_block *ip_block;
+ int status;
+ int i;
+
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE);
+ if (!ip_block)
+ return -EINVAL;
+
+ if (vce_v1_0_lmi_clean(adev))
+ dev_warn(adev->dev, "VCE not idle\n");
+
+ if (vce_v1_0_wait_for_idle(ip_block))
+ dev_warn(adev->dev, "VCE busy: VCE_STATUS=0x%x, SRBM_STATUS2=0x%x\n",
+ RREG32(mmVCE_STATUS), RREG32(mmSRBM_STATUS2));
+
+ /* Stall UMC and register bus before resetting VCPU */
+ WREG32_P(mmVCE_LMI_CTRL2, 1 << 8, ~(1 << 8));
+
+ for (i = 0; i < 100; ++i) {
+ status = RREG32(mmVCE_LMI_STATUS);
+ if (status & 0x240)
+ break;
+ mdelay(1);
+ }
+
+ WREG32_P(mmVCE_VCPU_CNTL, 0, ~VCE_VCPU_CNTL__CLK_EN_MASK);
+
+ WREG32_P(mmVCE_SOFT_RESET,
+ VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
+ VCE_SOFT_RESET__FME_SOFT_RESET_MASK,
+ ~(VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
+ VCE_SOFT_RESET__FME_SOFT_RESET_MASK));
+
+ WREG32(mmVCE_STATUS, 0);
+
+ return 0;
+}
+
+static void vce_v1_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
+{
+ u32 tmp;
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) {
+ tmp = RREG32(mmVCE_CLOCK_GATING_A);
+ tmp |= VCE_CLOCK_GATING_A__CGC_DYN_CLOCK_MODE_MASK;
+ WREG32(mmVCE_CLOCK_GATING_A, tmp);
+
+ tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
+ tmp &= ~0x1ff000;
+ tmp |= 0xff800000;
+ WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
+
+ tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
+ tmp &= ~0x3ff;
+ WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp);
+ } else {
+ tmp = RREG32(mmVCE_CLOCK_GATING_A);
+ tmp &= ~VCE_CLOCK_GATING_A__CGC_DYN_CLOCK_MODE_MASK;
+ WREG32(mmVCE_CLOCK_GATING_A, tmp);
+
+ tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
+ tmp |= 0x1ff000;
+ tmp &= ~0xff800000;
+ WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
+
+ tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
+ tmp |= 0x3ff;
+ WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp);
+ }
+}
+
+static int vce_v1_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_vce_early_init(adev);
+ if (r)
+ return r;
+
+ adev->vce.num_rings = 2;
+
+ vce_v1_0_set_ring_funcs(adev);
+ vce_v1_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * vce_v1_0_ensure_vcpu_bo_32bit_addr() - ensure the VCPU BO has a 32-bit address
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Due to various hardware limitations, the VCE1 requires
+ * the VCPU BO to be in the low 32 bit address range.
+ * Ensure that the VCPU BO has a 32-bit GPU address,
+ * or return an error code when that isn't possible.
+ *
+ * To accomodate that, we put GART to the LOW address range
+ * and reserve some GART pages where we map the VCPU BO,
+ * so that it gets a 32-bit address.
+ */
+static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device *adev)
+{
+ u64 gpu_addr = amdgpu_bo_gpu_offset(adev->vce.vcpu_bo);
+ u64 bo_size = amdgpu_bo_size(adev->vce.vcpu_bo);
+ u64 max_vcpu_bo_addr = 0xffffffff - bo_size;
+ u64 num_pages = ALIGN(bo_size, AMDGPU_GPU_PAGE_SIZE) / AMDGPU_GPU_PAGE_SIZE;
+ u64 pa = amdgpu_gmc_vram_pa(adev, adev->vce.vcpu_bo);
+ u64 flags = AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_VALID;
+
+ /*
+ * Check if the VCPU BO already has a 32-bit address.
+ * Eg. if MC is configured to put VRAM in the low address range.
+ */
+ if (gpu_addr <= max_vcpu_bo_addr)
+ return 0;
+
+ /* Check if we can map the VCPU BO in GART to a 32-bit address. */
+ if (adev->gmc.gart_start + VCE_V1_0_GART_ADDR_START > max_vcpu_bo_addr)
+ return -EINVAL;
+
+ amdgpu_gart_map_vram_range(adev, pa, VCE_V1_0_GART_PAGE_START,
+ num_pages, flags, adev->gart.ptr);
+ adev->vce.gpu_addr = adev->gmc.gart_start + VCE_V1_0_GART_ADDR_START;
+ if (adev->vce.gpu_addr > max_vcpu_bo_addr)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int vce_v1_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int r, i;
+
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 167, &adev->vce.irq);
+ if (r)
+ return r;
+
+ r = amdgpu_vce_sw_init(adev, VCE_V1_0_FW_SIZE +
+ VCE_V1_0_STACK_SIZE + VCE_V1_0_DATA_SIZE);
+ if (r)
+ return r;
+
+ r = amdgpu_vce_resume(adev);
+ if (r)
+ return r;
+ r = vce_v1_0_load_fw_signature(adev);
+ if (r)
+ return r;
+ r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vce.num_rings; i++) {
+ enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
+
+ ring = &adev->vce.ring[i];
+ sprintf(ring->name, "vce%d", i);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
+ hw_prio, NULL);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+static int vce_v1_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_vce_suspend(adev);
+ if (r)
+ return r;
+
+ return amdgpu_vce_sw_fini(adev);
+}
+
+/**
+ * vce_v1_0_hw_init - start and test VCE block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int vce_v1_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_vce(adev, true);
+ else
+ amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
+
+ for (i = 0; i < adev->vce.num_rings; i++) {
+ r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
+ if (r)
+ return r;
+ }
+
+ dev_info(adev->dev, "VCE initialized successfully.\n");
+
+ return 0;
+}
+
+static int vce_v1_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = vce_v1_0_stop(ip_block->adev);
+ if (r)
+ return r;
+
+ cancel_delayed_work_sync(&ip_block->adev->vce.idle_work);
+ return 0;
+}
+
+static int vce_v1_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ /*
+ * Proper cleanups before halting the HW engine:
+ * - cancel the delayed idle work
+ * - enable powergating
+ * - enable clockgating
+ * - disable dpm
+ *
+ * TODO: to align with the VCN implementation, move the
+ * jobs for clockgating/powergating/dpm setting to
+ * ->set_powergating_state().
+ */
+ cancel_delayed_work_sync(&adev->vce.idle_work);
+
+ if (adev->pm.dpm_enabled) {
+ amdgpu_dpm_enable_vce(adev, false);
+ } else {
+ amdgpu_asic_set_vce_clocks(adev, 0, 0);
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+ AMD_PG_STATE_GATE);
+ amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+ AMD_CG_STATE_GATE);
+ }
+
+ r = vce_v1_0_hw_fini(ip_block);
+ if (r) {
+ dev_err(adev->dev, "vce_v1_0_hw_fini() failed with error %i", r);
+ return r;
+ }
+
+ return amdgpu_vce_suspend(adev);
+}
+
+static int vce_v1_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_vce_resume(adev);
+ if (r)
+ return r;
+ r = vce_v1_0_load_fw_signature(adev);
+ if (r)
+ return r;
+ r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev);
+ if (r)
+ return r;
+
+ return vce_v1_0_hw_init(ip_block);
+}
+
+static int vce_v1_0_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ uint32_t val = 0;
+
+ if (state == AMDGPU_IRQ_STATE_ENABLE)
+ val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
+
+ WREG32_P(mmVCE_SYS_INT_EN, val,
+ ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
+ return 0;
+}
+
+static int vce_v1_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ dev_dbg(adev->dev, "IH: VCE\n");
+ switch (entry->src_data[0]) {
+ case 0:
+ case 1:
+ amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
+ break;
+ default:
+ dev_err(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static int vce_v1_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ vce_v1_0_init_cg(adev);
+ vce_v1_0_enable_mgcg(adev, state == AMD_CG_STATE_GATE);
+
+ return 0;
+}
+
+static int vce_v1_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /*
+ * This doesn't actually powergate the VCE block.
+ * That's done in the dpm code via the SMC. This
+ * just re-inits the block as necessary. The actual
+ * gating still happens in the dpm code. We should
+ * revisit this when there is a cleaner line between
+ * the smc and the hw blocks
+ */
+ if (state == AMD_PG_STATE_GATE)
+ return vce_v1_0_stop(adev);
+ else
+ return vce_v1_0_start(adev);
+}
+
+static const struct amd_ip_funcs vce_v1_0_ip_funcs = {
+ .name = "vce_v1_0",
+ .early_init = vce_v1_0_early_init,
+ .sw_init = vce_v1_0_sw_init,
+ .sw_fini = vce_v1_0_sw_fini,
+ .hw_init = vce_v1_0_hw_init,
+ .hw_fini = vce_v1_0_hw_fini,
+ .suspend = vce_v1_0_suspend,
+ .resume = vce_v1_0_resume,
+ .is_idle = vce_v1_0_is_idle,
+ .wait_for_idle = vce_v1_0_wait_for_idle,
+ .set_clockgating_state = vce_v1_0_set_clockgating_state,
+ .set_powergating_state = vce_v1_0_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs vce_v1_0_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_VCE,
+ .align_mask = 0xf,
+ .nop = VCE_CMD_NO_OP,
+ .support_64bit_ptrs = false,
+ .no_user_fence = true,
+ .get_rptr = vce_v1_0_ring_get_rptr,
+ .get_wptr = vce_v1_0_ring_get_wptr,
+ .set_wptr = vce_v1_0_ring_set_wptr,
+ .parse_cs = amdgpu_vce_ring_parse_cs,
+ .emit_frame_size = 6, /* amdgpu_vce_ring_emit_fence x1 no user fence */
+ .emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
+ .emit_ib = amdgpu_vce_ring_emit_ib,
+ .emit_fence = amdgpu_vce_ring_emit_fence,
+ .test_ring = amdgpu_vce_ring_test_ring,
+ .test_ib = amdgpu_vce_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vce_ring_begin_use,
+ .end_use = amdgpu_vce_ring_end_use,
+};
+
+static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vce.num_rings; i++) {
+ adev->vce.ring[i].funcs = &vce_v1_0_ring_funcs;
+ adev->vce.ring[i].me = i;
+ }
+};
+
+static const struct amdgpu_irq_src_funcs vce_v1_0_irq_funcs = {
+ .set = vce_v1_0_set_interrupt_state,
+ .process = vce_v1_0_process_interrupt,
+};
+
+static void vce_v1_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->vce.irq.num_types = 1;
+ adev->vce.irq.funcs = &vce_v1_0_irq_funcs;
+};
+
+const struct amdgpu_ip_block_version vce_v1_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_VCE,
+ .major = 1,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &vce_v1_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.h b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.h
new file mode 100644
index 000000000000..206e7bec897f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ * Copyright 2025 Valve Corporation
+ * Copyright 2025 Alexandre Demers
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCE_V1_0_H__
+#define __VCE_V1_0_H__
+
+extern const struct amdgpu_ip_block_version vce_v1_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index 67eb01fef789..8ea8a6193492 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -201,20 +201,20 @@ static void vce_v2_0_mc_resume(struct amdgpu_device *adev)
WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
}
-static bool vce_v2_0_is_idle(void *handle)
+static bool vce_v2_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return !(RREG32(mmSRBM_STATUS2) & SRBM_STATUS2__VCE_BUSY_MASK);
}
-static int vce_v2_0_wait_for_idle(void *handle)
+static int vce_v2_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
unsigned i;
for (i = 0; i < adev->usec_timeout; i++) {
- if (vce_v2_0_is_idle(handle))
+ if (vce_v2_0_is_idle(ip_block))
return 0;
}
return -ETIMEDOUT;
@@ -274,15 +274,21 @@ static int vce_v2_0_start(struct amdgpu_device *adev)
static int vce_v2_0_stop(struct amdgpu_device *adev)
{
+ struct amdgpu_ip_block *ip_block;
int i;
int status;
+
if (vce_v2_0_lmi_clean(adev)) {
- DRM_INFO("vce is not idle \n");
+ DRM_INFO("VCE is not idle \n");
return 0;
}
- if (vce_v2_0_wait_for_idle(adev)) {
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE);
+ if (!ip_block)
+ return -EINVAL;
+
+ if (vce_v2_0_wait_for_idle(ip_block)) {
DRM_INFO("VCE is busy, Can't set clock gating");
return 0;
}
@@ -398,9 +404,14 @@ static void vce_v2_0_enable_mgcg(struct amdgpu_device *adev, bool enable,
}
}
-static int vce_v2_0_early_init(void *handle)
+static int vce_v2_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_vce_early_init(adev);
+ if (r)
+ return r;
adev->vce.num_rings = 2;
@@ -410,11 +421,11 @@ static int vce_v2_0_early_init(void *handle)
return 0;
}
-static int vce_v2_0_sw_init(void *handle)
+static int vce_v2_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* VCE */
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 167, &adev->vce.irq);
@@ -441,15 +452,13 @@ static int vce_v2_0_sw_init(void *handle)
return r;
}
- r = amdgpu_vce_entity_init(adev);
-
return r;
}
-static int vce_v2_0_sw_fini(void *handle)
+static int vce_v2_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_vce_suspend(adev);
if (r)
@@ -458,10 +467,10 @@ static int vce_v2_0_sw_fini(void *handle)
return amdgpu_vce_sw_fini(adev);
}
-static int vce_v2_0_hw_init(void *handle)
+static int vce_v2_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
vce_v2_0_enable_mgcg(adev, true, false);
@@ -477,19 +486,17 @@ static int vce_v2_0_hw_init(void *handle)
return 0;
}
-static int vce_v2_0_hw_fini(void *handle)
+static int vce_v2_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- cancel_delayed_work_sync(&adev->vce.idle_work);
+ cancel_delayed_work_sync(&ip_block->adev->vce.idle_work);
return 0;
}
-static int vce_v2_0_suspend(void *handle)
+static int vce_v2_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/*
@@ -515,28 +522,27 @@ static int vce_v2_0_suspend(void *handle)
AMD_CG_STATE_GATE);
}
- r = vce_v2_0_hw_fini(adev);
+ r = vce_v2_0_hw_fini(ip_block);
if (r)
return r;
return amdgpu_vce_suspend(adev);
}
-static int vce_v2_0_resume(void *handle)
+static int vce_v2_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_vce_resume(adev);
+ r = amdgpu_vce_resume(ip_block->adev);
if (r)
return r;
- return vce_v2_0_hw_init(adev);
+ return vce_v2_0_hw_init(ip_block);
}
-static int vce_v2_0_soft_reset(void *handle)
+static int vce_v2_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
WREG32_FIELD(SRBM_SOFT_RESET, SOFT_RESET_VCE, 1);
mdelay(5);
@@ -577,13 +583,13 @@ static int vce_v2_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static int vce_v2_0_set_clockgating_state(void *handle,
+static int vce_v2_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
bool gate = false;
bool sw_cg = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_CG_STATE_GATE) {
gate = true;
@@ -595,7 +601,7 @@ static int vce_v2_0_set_clockgating_state(void *handle,
return 0;
}
-static int vce_v2_0_set_powergating_state(void *handle,
+static int vce_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the VCE block.
@@ -605,7 +611,7 @@ static int vce_v2_0_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_PG_STATE_GATE)
return vce_v2_0_stop(adev);
@@ -616,7 +622,6 @@ static int vce_v2_0_set_powergating_state(void *handle,
static const struct amd_ip_funcs vce_v2_0_ip_funcs = {
.name = "vce_v2_0",
.early_init = vce_v2_0_early_init,
- .late_init = NULL,
.sw_init = vce_v2_0_sw_init,
.sw_fini = vce_v2_0_sw_fini,
.hw_init = vce_v2_0_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 142e291983b4..719e9643c43d 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -64,8 +64,8 @@
static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
-static int vce_v3_0_wait_for_idle(void *handle);
-static int vce_v3_0_set_clockgating_state(void *handle,
+static int vce_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block);
+static int vce_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state);
/**
* vce_v3_0_ring_get_rptr - get read pointer
@@ -396,9 +396,10 @@ static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
}
}
-static int vce_v3_0_early_init(void *handle)
+static int vce_v3_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
@@ -407,6 +408,10 @@ static int vce_v3_0_early_init(void *handle)
(AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
return -ENOENT;
+ r = amdgpu_vce_early_init(adev);
+ if (r)
+ return r;
+
adev->vce.num_rings = 3;
vce_v3_0_set_ring_funcs(adev);
@@ -415,9 +420,9 @@ static int vce_v3_0_early_init(void *handle)
return 0;
}
-static int vce_v3_0_sw_init(void *handle)
+static int vce_v3_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int r, i;
@@ -450,15 +455,13 @@ static int vce_v3_0_sw_init(void *handle)
return r;
}
- r = amdgpu_vce_entity_init(adev);
-
return r;
}
-static int vce_v3_0_sw_fini(void *handle)
+static int vce_v3_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_vce_suspend(adev);
if (r)
@@ -467,10 +470,10 @@ static int vce_v3_0_sw_fini(void *handle)
return amdgpu_vce_sw_fini(adev);
}
-static int vce_v3_0_hw_init(void *handle)
+static int vce_v3_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
vce_v3_0_override_vce_clock_gating(adev, true);
@@ -487,25 +490,25 @@ static int vce_v3_0_hw_init(void *handle)
return 0;
}
-static int vce_v3_0_hw_fini(void *handle)
+static int vce_v3_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
cancel_delayed_work_sync(&adev->vce.idle_work);
- r = vce_v3_0_wait_for_idle(handle);
+ r = vce_v3_0_wait_for_idle(ip_block);
if (r)
return r;
vce_v3_0_stop(adev);
- return vce_v3_0_set_clockgating_state(adev, AMD_CG_STATE_GATE);
+ return vce_v3_0_set_clockgating_state(ip_block, AMD_CG_STATE_GATE);
}
-static int vce_v3_0_suspend(void *handle)
+static int vce_v3_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/*
* Proper cleanups before halting the HW engine:
@@ -530,23 +533,22 @@ static int vce_v3_0_suspend(void *handle)
AMD_CG_STATE_GATE);
}
- r = vce_v3_0_hw_fini(adev);
+ r = vce_v3_0_hw_fini(ip_block);
if (r)
return r;
return amdgpu_vce_suspend(adev);
}
-static int vce_v3_0_resume(void *handle)
+static int vce_v3_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_vce_resume(adev);
+ r = amdgpu_vce_resume(ip_block->adev);
if (r)
return r;
- return vce_v3_0_hw_init(adev);
+ return vce_v3_0_hw_init(ip_block);
}
static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
@@ -600,9 +602,9 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
}
-static bool vce_v3_0_is_idle(void *handle)
+static bool vce_v3_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 mask = 0;
mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
@@ -611,13 +613,13 @@ static bool vce_v3_0_is_idle(void *handle)
return !(RREG32(mmSRBM_STATUS2) & mask);
}
-static int vce_v3_0_wait_for_idle(void *handle)
+static int vce_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++)
- if (vce_v3_0_is_idle(handle))
+ if (vce_v3_0_is_idle(ip_block))
return 0;
return -ETIMEDOUT;
@@ -629,9 +631,9 @@ static int vce_v3_0_wait_for_idle(void *handle)
#define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
-static bool vce_v3_0_check_soft_reset(void *handle)
+static bool vce_v3_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
/* According to VCE team , we should use VCE_STATUS instead
@@ -670,9 +672,9 @@ static bool vce_v3_0_check_soft_reset(void *handle)
}
}
-static int vce_v3_0_soft_reset(void *handle)
+static int vce_v3_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset;
if (!adev->vce.srbm_soft_reset)
@@ -701,29 +703,29 @@ static int vce_v3_0_soft_reset(void *handle)
return 0;
}
-static int vce_v3_0_pre_soft_reset(void *handle)
+static int vce_v3_0_pre_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->vce.srbm_soft_reset)
return 0;
mdelay(5);
- return vce_v3_0_suspend(adev);
+ return vce_v3_0_suspend(ip_block);
}
-static int vce_v3_0_post_soft_reset(void *handle)
+static int vce_v3_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->vce.srbm_soft_reset)
return 0;
mdelay(5);
- return vce_v3_0_resume(adev);
+ return vce_v3_0_resume(ip_block);
}
static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev,
@@ -763,10 +765,10 @@ static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static int vce_v3_0_set_clockgating_state(void *handle,
+static int vce_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
int i;
@@ -804,7 +806,7 @@ static int vce_v3_0_set_clockgating_state(void *handle,
return 0;
}
-static int vce_v3_0_set_powergating_state(void *handle,
+static int vce_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the VCE block.
@@ -814,7 +816,7 @@ static int vce_v3_0_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret = 0;
if (state == AMD_PG_STATE_GATE) {
@@ -831,9 +833,9 @@ out:
return ret;
}
-static void vce_v3_0_get_clockgating_state(void *handle, u32 *flags)
+static void vce_v3_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
mutex_lock(&adev->pm.mutex);
@@ -899,7 +901,6 @@ static void vce_v3_0_emit_pipeline_sync(struct amdgpu_ring *ring)
static const struct amd_ip_funcs vce_v3_0_ip_funcs = {
.name = "vce_v3_0",
.early_init = vce_v3_0_early_init,
- .late_init = NULL,
.sw_init = vce_v3_0_sw_init,
.sw_fini = vce_v3_0_sw_fini,
.hw_init = vce_v3_0_hw_init,
@@ -950,7 +951,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
.get_rptr = vce_v3_0_ring_get_rptr,
.get_wptr = vce_v3_0_ring_get_wptr,
.set_wptr = vce_v3_0_ring_set_wptr,
- .parse_cs = amdgpu_vce_ring_parse_cs_vm,
+ .patch_cs_in_place = amdgpu_vce_ring_parse_cs_vm,
.emit_frame_size =
6 + /* vce_v3_0_emit_vm_flush */
4 + /* vce_v3_0_emit_pipeline_sync */
@@ -998,8 +999,7 @@ static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev)
adev->vce.irq.funcs = &vce_v3_0_irq_funcs;
};
-const struct amdgpu_ip_block_version vce_v3_0_ip_block =
-{
+const struct amdgpu_ip_block_version vce_v3_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_VCE,
.major = 3,
.minor = 0,
@@ -1007,8 +1007,7 @@ const struct amdgpu_ip_block_version vce_v3_0_ip_block =
.funcs = &vce_v3_0_ip_funcs,
};
-const struct amdgpu_ip_block_version vce_v3_1_ip_block =
-{
+const struct amdgpu_ip_block_version vce_v3_1_ip_block = {
.type = AMD_IP_BLOCK_TYPE_VCE,
.major = 3,
.minor = 1,
@@ -1016,8 +1015,7 @@ const struct amdgpu_ip_block_version vce_v3_1_ip_block =
.funcs = &vce_v3_0_ip_funcs,
};
-const struct amdgpu_ip_block_version vce_v3_4_ip_block =
-{
+const struct amdgpu_ip_block_version vce_v3_4_ip_block = {
.type = AMD_IP_BLOCK_TYPE_VCE,
.major = 3,
.minor = 4,
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index d1fc4e0b8265..2d64002bed61 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -83,7 +83,7 @@ static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell)
- return adev->wb.wb[ring->wptr_offs];
+ return *ring->wptr_cpu_addr;
if (ring->me == 0)
return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
@@ -106,7 +106,7 @@ static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
return;
}
@@ -177,7 +177,7 @@ static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
- adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
+ *adev->vce.ring[0].wptr_cpu_addr = 0;
adev->vce.ring[0].wptr = 0;
adev->vce.ring[0].wptr_old = 0;
@@ -407,9 +407,14 @@ static int vce_v4_0_stop(struct amdgpu_device *adev)
return 0;
}
-static int vce_v4_0_early_init(void *handle)
+static int vce_v4_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_vce_early_init(adev);
+ if (r)
+ return r;
if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
adev->vce.num_rings = 1;
@@ -422,9 +427,9 @@ static int vce_v4_0_early_init(void *handle)
return 0;
}
-static int vce_v4_0_sw_init(void *handle)
+static int vce_v4_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
unsigned size;
@@ -466,6 +471,7 @@ static int vce_v4_0_sw_init(void *handle)
enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
ring = &adev->vce.ring[i];
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "vce%d", i);
if (amdgpu_sriov_vf(adev)) {
/* DOORBELL only works under SRIOV */
@@ -485,11 +491,6 @@ static int vce_v4_0_sw_init(void *handle)
return r;
}
-
- r = amdgpu_vce_entity_init(adev);
- if (r)
- return r;
-
r = amdgpu_virt_alloc_mm_table(adev);
if (r)
return r;
@@ -497,10 +498,10 @@ static int vce_v4_0_sw_init(void *handle)
return r;
}
-static int vce_v4_0_sw_fini(void *handle)
+static int vce_v4_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* free MM table */
amdgpu_virt_free_mm_table(adev);
@@ -517,10 +518,10 @@ static int vce_v4_0_sw_fini(void *handle)
return amdgpu_vce_sw_fini(adev);
}
-static int vce_v4_0_hw_init(void *handle)
+static int vce_v4_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
r = vce_v4_0_sriov_start(adev);
@@ -540,14 +541,14 @@ static int vce_v4_0_hw_init(void *handle)
return 0;
}
-static int vce_v4_0_hw_fini(void *handle)
+static int vce_v4_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
cancel_delayed_work_sync(&adev->vce.idle_work);
if (!amdgpu_sriov_vf(adev)) {
- /* vce_v4_0_wait_for_idle(handle); */
+ /* vce_v4_0_wait_for_idle(ip_block); */
vce_v4_0_stop(adev);
} else {
/* full access mode, so don't touch any VCE register */
@@ -557,9 +558,9 @@ static int vce_v4_0_hw_fini(void *handle)
return 0;
}
-static int vce_v4_0_suspend(void *handle)
+static int vce_v4_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r, idx;
if (adev->vce.vcpu_bo == NULL)
@@ -598,16 +599,16 @@ static int vce_v4_0_suspend(void *handle)
AMD_CG_STATE_GATE);
}
- r = vce_v4_0_hw_fini(adev);
+ r = vce_v4_0_hw_fini(ip_block);
if (r)
return r;
return amdgpu_vce_suspend(adev);
}
-static int vce_v4_0_resume(void *handle)
+static int vce_v4_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r, idx;
if (adev->vce.vcpu_bo == NULL)
@@ -628,7 +629,7 @@ static int vce_v4_0_resume(void *handle)
return r;
}
- return vce_v4_0_hw_init(adev);
+ return vce_v4_0_hw_init(ip_block);
}
static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
@@ -688,281 +689,14 @@ static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
}
-static int vce_v4_0_set_clockgating_state(void *handle,
+static int vce_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
/* needed for driver unload*/
return 0;
}
-#if 0
-static bool vce_v4_0_is_idle(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 mask = 0;
-
- mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
- mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
-
- return !(RREG32(mmSRBM_STATUS2) & mask);
-}
-
-static int vce_v4_0_wait_for_idle(void *handle)
-{
- unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- for (i = 0; i < adev->usec_timeout; i++)
- if (vce_v4_0_is_idle(handle))
- return 0;
-
- return -ETIMEDOUT;
-}
-
-#define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */
-#define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */
-#define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */
-#define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
- VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
-
-static bool vce_v4_0_check_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 srbm_soft_reset = 0;
-
- /* According to VCE team , we should use VCE_STATUS instead
- * SRBM_STATUS.VCE_BUSY bit for busy status checking.
- * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
- * instance's registers are accessed
- * (0 for 1st instance, 10 for 2nd instance).
- *
- *VCE_STATUS
- *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB |
- *|----+----+-----------+----+----+----+----------+---------+----|
- *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0|
- *
- * VCE team suggest use bit 3--bit 6 for busy status check
- */
- mutex_lock(&adev->grbm_idx_mutex);
- WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
- if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
- srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
- srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
- }
- WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
- if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
- srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
- srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
- }
- WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
- mutex_unlock(&adev->grbm_idx_mutex);
-
- if (srbm_soft_reset) {
- adev->vce.srbm_soft_reset = srbm_soft_reset;
- return true;
- } else {
- adev->vce.srbm_soft_reset = 0;
- return false;
- }
-}
-
-static int vce_v4_0_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 srbm_soft_reset;
-
- if (!adev->vce.srbm_soft_reset)
- return 0;
- srbm_soft_reset = adev->vce.srbm_soft_reset;
-
- if (srbm_soft_reset) {
- u32 tmp;
-
- tmp = RREG32(mmSRBM_SOFT_RESET);
- tmp |= srbm_soft_reset;
- dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
-
- udelay(50);
-
- tmp &= ~srbm_soft_reset;
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
-
- /* Wait a little for things to settle down */
- udelay(50);
- }
-
- return 0;
-}
-
-static int vce_v4_0_pre_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (!adev->vce.srbm_soft_reset)
- return 0;
-
- mdelay(5);
-
- return vce_v4_0_suspend(adev);
-}
-
-
-static int vce_v4_0_post_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (!adev->vce.srbm_soft_reset)
- return 0;
-
- mdelay(5);
-
- return vce_v4_0_resume(adev);
-}
-
-static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
-{
- u32 tmp, data;
-
- tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
- if (override)
- data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
- else
- data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
-
- if (tmp != data)
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
-}
-
-static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
- bool gated)
-{
- u32 data;
-
- /* Set Override to disable Clock Gating */
- vce_v4_0_override_vce_clock_gating(adev, true);
-
- /* This function enables MGCG which is controlled by firmware.
- With the clocks in the gated state the core is still
- accessible but the firmware will throttle the clocks on the
- fly as necessary.
- */
- if (gated) {
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
- data |= 0x1ff;
- data &= ~0xef0000;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
- data |= 0x3ff000;
- data &= ~0xffc00000;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
- data |= 0x2;
- data &= ~0x00010000;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
- data |= 0x37f;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
- data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
- VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
- VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
- 0x8;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
- } else {
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
- data &= ~0x80010;
- data |= 0xe70008;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
- data |= 0xffc00000;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
- data |= 0x10000;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
- data &= ~0xffc00000;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
- data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
- VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
- VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
- 0x8);
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
- }
- vce_v4_0_override_vce_clock_gating(adev, false);
-}
-
-static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
-{
- u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
-
- if (enable)
- tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
- else
- tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
-
- WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
-}
-
-static int vce_v4_0_set_clockgating_state(void *handle,
- enum amd_clockgating_state state)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_CG_STATE_GATE);
- int i;
-
- if ((adev->asic_type == CHIP_POLARIS10) ||
- (adev->asic_type == CHIP_TONGA) ||
- (adev->asic_type == CHIP_FIJI))
- vce_v4_0_set_bypass_mode(adev, enable);
-
- if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
- return 0;
-
- mutex_lock(&adev->grbm_idx_mutex);
- for (i = 0; i < 2; i++) {
- /* Program VCE Instance 0 or 1 if not harvested */
- if (adev->vce.harvest_config & (1 << i))
- continue;
-
- WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
-
- if (enable) {
- /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
- uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
- data &= ~(0xf | 0xff0);
- data |= ((0x0 << 0) | (0x04 << 4));
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
-
- /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
- data &= ~(0xf | 0xff0);
- data |= ((0x0 << 0) | (0x04 << 4));
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
- }
-
- vce_v4_0_set_vce_sw_clock_gating(adev, enable);
- }
-
- WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
- mutex_unlock(&adev->grbm_idx_mutex);
-
- return 0;
-}
-#endif
-
-static int vce_v4_0_set_powergating_state(void *handle,
+static int vce_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the VCE block.
@@ -972,7 +706,7 @@ static int vce_v4_0_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_PG_STATE_GATE)
return vce_v4_0_stop(adev);
@@ -1021,7 +755,7 @@ static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
unsigned int vmid, uint64_t pd_addr)
{
- struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
@@ -1080,19 +814,12 @@ static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
const struct amd_ip_funcs vce_v4_0_ip_funcs = {
.name = "vce_v4_0",
.early_init = vce_v4_0_early_init,
- .late_init = NULL,
.sw_init = vce_v4_0_sw_init,
.sw_fini = vce_v4_0_sw_fini,
.hw_init = vce_v4_0_hw_init,
.hw_fini = vce_v4_0_hw_fini,
.suspend = vce_v4_0_suspend,
.resume = vce_v4_0_resume,
- .is_idle = NULL /* vce_v4_0_is_idle */,
- .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
- .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
- .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
- .soft_reset = NULL /* vce_v4_0_soft_reset */,
- .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
.set_clockgating_state = vce_v4_0_set_clockgating_state,
.set_powergating_state = vce_v4_0_set_powergating_state,
};
@@ -1103,11 +830,10 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
.nop = VCE_CMD_NO_OP,
.support_64bit_ptrs = false,
.no_user_fence = true,
- .vmhub = AMDGPU_MMHUB_0,
.get_rptr = vce_v4_0_ring_get_rptr,
.get_wptr = vce_v4_0_ring_get_wptr,
.set_wptr = vce_v4_0_ring_set_wptr,
- .parse_cs = amdgpu_vce_ring_parse_cs_vm,
+ .patch_cs_in_place = amdgpu_vce_ring_parse_cs_vm,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.c b/drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.c
new file mode 100644
index 000000000000..2b9ddb3d2fe1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "vcn_sw_ring.h"
+
+void vcn_dec_sw_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, uint32_t flags)
+{
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring, VCN_DEC_SW_CMD_FENCE);
+ amdgpu_ring_write(ring, addr);
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, seq);
+ amdgpu_ring_write(ring, VCN_DEC_SW_CMD_TRAP);
+}
+
+void vcn_dec_sw_ring_insert_end(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, VCN_DEC_SW_CMD_END);
+}
+
+void vcn_dec_sw_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
+ struct amdgpu_ib *ib, uint32_t flags)
+{
+ uint32_t vmid = AMDGPU_JOB_GET_VMID(job);
+
+ amdgpu_ring_write(ring, VCN_DEC_SW_CMD_IB);
+ amdgpu_ring_write(ring, vmid);
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+}
+
+void vcn_dec_sw_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ amdgpu_ring_write(ring, VCN_DEC_SW_CMD_REG_WAIT);
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, val);
+}
+
+void vcn_dec_sw_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ uint32_t vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t data0, data1, mask;
+
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for register write */
+ data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance;
+ data1 = lower_32_bits(pd_addr);
+ mask = 0xffffffff;
+ vcn_dec_sw_ring_emit_reg_wait(ring, data0, data1, mask);
+}
+
+void vcn_dec_sw_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val)
+{
+ amdgpu_ring_write(ring, VCN_DEC_SW_CMD_REG_WRITE);
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, val);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.h b/drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.h
new file mode 100644
index 000000000000..7e775725f120
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCN_SW_RING_H__
+#define __VCN_SW_RING_H__
+
+#define VCN_SW_RING_EMIT_FRAME_SIZE \
+ (4 + /* vcn_dec_sw_ring_emit_vm_flush */ \
+ 5 + 5 + /* vcn_dec_sw_ring_emit_fence x2 vm fence */ \
+ 1) /* vcn_dec_sw_ring_insert_end */
+
+void vcn_dec_sw_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, uint32_t flags);
+void vcn_dec_sw_ring_insert_end(struct amdgpu_ring *ring);
+void vcn_dec_sw_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
+ struct amdgpu_ib *ib, uint32_t flags);
+void vcn_dec_sw_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask);
+void vcn_dec_sw_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ uint32_t vmid, uint64_t pd_addr);
+void vcn_dec_sw_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val);
+
+#endif /* __VCN_SW_RING_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index d54d720b3cf6..a316797875a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -24,6 +24,7 @@
#include <linux/firmware.h>
#include "amdgpu.h"
+#include "amdgpu_cs.h"
#include "amdgpu_vcn.h"
#include "amdgpu_pm.h"
#include "soc15.h"
@@ -44,51 +45,92 @@
#define mmUVD_REG_XX_MASK_1_0 0x05ac
#define mmUVD_REG_XX_MASK_1_0_BASE_IDX 1
-static int vcn_v1_0_stop(struct amdgpu_device *adev);
+static const struct amdgpu_hwip_reg_entry vcn_reg_list_1_0[] = {
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE)
+};
+
+static int vcn_v1_0_stop(struct amdgpu_vcn_inst *vinst);
static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev);
static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state);
-static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state);
+static int vcn_v1_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static int vcn_v1_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
static void vcn_v1_0_idle_work_handler(struct work_struct *work);
static void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring);
/**
- * vcn_v1_0_early_init - set function pointers
+ * vcn_v1_0_early_init - set function pointers and load microcode
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
+ * Load microcode from filesystem
*/
-static int vcn_v1_0_early_init(void *handle)
+static int vcn_v1_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- adev->vcn.num_enc_rings = 2;
+ adev->vcn.inst[0].num_enc_rings = 2;
+ adev->vcn.inst[0].set_pg_state = vcn_v1_0_set_pg_state;
vcn_v1_0_set_dec_ring_funcs(adev);
vcn_v1_0_set_enc_ring_funcs(adev);
vcn_v1_0_set_irq_funcs(adev);
- jpeg_v1_0_early_init(handle);
+ jpeg_v1_0_early_init(ip_block);
- return 0;
+ return amdgpu_vcn_early_init(adev, 0);
}
/**
* vcn_v1_0_sw_init - sw init for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int vcn_v1_0_sw_init(void *handle)
+static int vcn_v1_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int i, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_1_0);
+ uint32_t *ptr;
+ struct amdgpu_device *adev = ip_block->adev;
/* VCN DEC TRAP */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
@@ -97,48 +139,50 @@ static int vcn_v1_0_sw_init(void *handle)
return r;
/* VCN ENC TRAP */
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) {
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + VCN_1_0__SRCID__UVD_ENC_GENERAL_PURPOSE,
&adev->vcn.inst->irq);
if (r)
return r;
}
- r = amdgpu_vcn_sw_init(adev);
+ r = amdgpu_vcn_sw_init(adev, 0);
if (r)
return r;
/* Override the work func */
- adev->vcn.idle_work.work.func = vcn_v1_0_idle_work_handler;
+ adev->vcn.inst[0].idle_work.work.func = vcn_v1_0_idle_work_handler;
- amdgpu_vcn_setup_ucode(adev);
+ amdgpu_vcn_setup_ucode(adev, 0);
- r = amdgpu_vcn_resume(adev);
+ r = amdgpu_vcn_resume(adev, 0);
if (r)
return r;
ring = &adev->vcn.inst->ring_dec;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "vcn_dec");
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
- adev->vcn.internal.scratch9 = adev->vcn.inst->external.scratch9 =
+ adev->vcn.inst[0].internal.scratch9 = adev->vcn.inst->external.scratch9 =
SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9);
- adev->vcn.internal.data0 = adev->vcn.inst->external.data0 =
+ adev->vcn.inst[0].internal.data0 = adev->vcn.inst->external.data0 =
SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0);
- adev->vcn.internal.data1 = adev->vcn.inst->external.data1 =
+ adev->vcn.inst[0].internal.data1 = adev->vcn.inst->external.data1 =
SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1);
- adev->vcn.internal.cmd = adev->vcn.inst->external.cmd =
+ adev->vcn.inst[0].internal.cmd = adev->vcn.inst->external.cmd =
SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD);
- adev->vcn.internal.nop = adev->vcn.inst->external.nop =
+ adev->vcn.inst[0].internal.nop = adev->vcn.inst->external.nop =
SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP);
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) {
enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(i);
ring = &adev->vcn.inst->ring_enc[i];
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "vcn_enc%d", i);
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
hw_prio, NULL);
@@ -146,69 +190,79 @@ static int vcn_v1_0_sw_init(void *handle)
return r;
}
- adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode;
+ adev->vcn.inst[0].pause_dpg_mode = vcn_v1_0_pause_dpg_mode;
+
+ if (amdgpu_vcnfw_log) {
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
+
+ fw_shared->present_flag_0 = 0;
+ amdgpu_vcn_fwlog_init(adev->vcn.inst);
+ }
- r = jpeg_v1_0_sw_init(handle);
+ r = jpeg_v1_0_sw_init(ip_block);
+ /* Allocate memory for VCN IP Dump buffer */
+ ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for VCN IP Dump\n");
+ adev->vcn.ip_dump = NULL;
+ } else {
+ adev->vcn.ip_dump = ptr;
+ }
return r;
}
/**
* vcn_v1_0_sw_fini - sw fini for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* VCN suspend and free up sw allocation
*/
-static int vcn_v1_0_sw_fini(void *handle)
+static int vcn_v1_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- r = amdgpu_vcn_suspend(adev);
+ r = amdgpu_vcn_suspend(adev, 0);
if (r)
return r;
- jpeg_v1_0_sw_fini(handle);
+ jpeg_v1_0_sw_fini(ip_block);
- r = amdgpu_vcn_sw_fini(adev);
+ amdgpu_vcn_sw_fini(adev, 0);
- return r;
+ kfree(adev->vcn.ip_dump);
+
+ return 0;
}
/**
* vcn_v1_0_hw_init - start and test VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int vcn_v1_0_hw_init(void *handle)
+static int vcn_v1_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
int i, r;
r = amdgpu_ring_test_helper(ring);
if (r)
- goto done;
+ return r;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) {
ring = &adev->vcn.inst->ring_enc[i];
r = amdgpu_ring_test_helper(ring);
if (r)
- goto done;
+ return r;
}
- ring = &adev->jpeg.inst->ring_dec;
+ ring = adev->jpeg.inst->ring_dec;
r = amdgpu_ring_test_helper(ring);
- if (r)
- goto done;
-
-done:
- if (!r)
- DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
return r;
}
@@ -216,20 +270,21 @@ done:
/**
* vcn_v1_0_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the VCN block, mark ring as not ready any more
*/
-static int vcn_v1_0_hw_fini(void *handle)
+static int vcn_v1_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vcn_inst *vinst = adev->vcn.inst;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ cancel_delayed_work_sync(&vinst->idle_work);
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
- (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
- RREG32_SOC15(VCN, 0, mmUVD_STATUS))) {
- vcn_v1_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ (vinst->cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, 0, mmUVD_STATUS))) {
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
}
return 0;
@@ -238,20 +293,27 @@ static int vcn_v1_0_hw_fini(void *handle)
/**
* vcn_v1_0_suspend - suspend VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend VCN block
*/
-static int vcn_v1_0_suspend(void *handle)
+static int vcn_v1_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ bool idle_work_unexecuted;
- r = vcn_v1_0_hw_fini(adev);
+ idle_work_unexecuted = cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work);
+ if (idle_work_unexecuted) {
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_vcn(adev, false, 0);
+ }
+
+ r = vcn_v1_0_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_vcn_suspend(adev);
+ r = amdgpu_vcn_suspend(adev, 0);
return r;
}
@@ -259,20 +321,19 @@ static int vcn_v1_0_suspend(void *handle)
/**
* vcn_v1_0_resume - resume VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init VCN block
*/
-static int vcn_v1_0_resume(void *handle)
+static int vcn_v1_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_vcn_resume(adev);
+ r = amdgpu_vcn_resume(ip_block->adev, 0);
if (r)
return r;
- r = vcn_v1_0_hw_init(adev);
+ r = vcn_v1_0_hw_init(ip_block);
return r;
}
@@ -280,13 +341,14 @@ static int vcn_v1_0_resume(void *handle)
/**
* vcn_v1_0_mc_resume_spg_mode - memory controller programming
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Let the VCN memory controller know it's offsets
*/
-static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev)
+static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_vcn_inst *vinst)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ struct amdgpu_device *adev = vinst->adev;
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4);
uint32_t offset;
/* cache window 0: fw */
@@ -351,9 +413,10 @@ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev)
adev->gfx.config.gb_addr_config);
}
-static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev)
+static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ struct amdgpu_device *adev = vinst->adev;
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4);
uint32_t offset;
/* cache window 0: fw */
@@ -426,12 +489,13 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev)
/**
* vcn_v1_0_disable_clock_gating - disable VCN clock gating
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Disable clock gating for VCN block
*/
-static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev)
+static void vcn_v1_0_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data;
/* JPEG disable CGC */
@@ -455,7 +519,7 @@ static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev)
if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
else
- data &= ~ UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
@@ -552,12 +616,13 @@ static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev)
/**
* vcn_v1_0_enable_clock_gating - enable VCN clock gating
*
- * @adev: amdgpu_device pointer
+ * @vinst: Pointer to the VCN instance structure
*
* Enable clock gating for VCN block
*/
-static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev)
+static void vcn_v1_0_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data = 0;
/* enable JPEG CGC */
@@ -621,8 +686,10 @@ static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev)
WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
}
-static void vcn_v1_0_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel)
+static void vcn_v1_0_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ uint8_t sram_sel)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t reg_data = 0;
/* disable JPEG CGC */
@@ -675,8 +742,9 @@ static void vcn_v1_0_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t s
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_SUVD_CGC_CTRL, 0, 0xFFFFFFFF, sram_sel);
}
-static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev)
+static void vcn_1_0_disable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data = 0;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
@@ -720,8 +788,9 @@ static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev)
WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data);
}
-static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev)
+static void vcn_1_0_enable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data = 0;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
@@ -764,12 +833,13 @@ static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev)
/**
* vcn_v1_0_start_spg_mode - start VCN block
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Setup and start the VCN block
*/
-static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
+static int vcn_v1_0_start_spg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
uint32_t rb_bufsz, tmp;
uint32_t lmi_swap_cntl;
@@ -778,13 +848,13 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
/* disable byte swapping */
lmi_swap_cntl = 0;
- vcn_1_0_disable_static_power_gating(adev);
+ vcn_1_0_disable_static_power_gating(vinst);
tmp = RREG32_SOC15(UVD, 0, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
WREG32_SOC15(UVD, 0, mmUVD_STATUS, tmp);
/* disable clock gating */
- vcn_v1_0_disable_clock_gating(adev);
+ vcn_v1_0_disable_clock_gating(vinst);
/* disable interupt */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0,
@@ -826,7 +896,7 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
- vcn_v1_0_mc_resume_spg_mode(adev);
+ vcn_v1_0_mc_resume_spg_mode(vinst);
WREG32_SOC15(UVD, 0, mmUVD_REG_XX_MASK_1_0, 0x10);
WREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK_1_0,
@@ -939,11 +1009,17 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
jpeg_v1_0_start(adev, 0);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
+static int vcn_v1_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
uint32_t rb_bufsz, tmp;
uint32_t lmi_swap_cntl;
@@ -951,7 +1027,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
/* disable byte swapping */
lmi_swap_cntl = 0;
- vcn_1_0_enable_static_power_gating(adev);
+ vcn_1_0_enable_static_power_gating(vinst);
/* enable dynamic power gating mode */
tmp = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
@@ -960,7 +1036,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, tmp);
/* enable clock gating */
- vcn_v1_0_clock_gating_dpg_mode(adev, 0);
+ vcn_v1_0_clock_gating_dpg_mode(vinst, 0);
/* enable VCPU clock */
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
@@ -1009,7 +1085,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0xFFFFFFFF, 0);
- vcn_v1_0_mc_resume_dpg_mode(adev);
+ vcn_v1_0_mc_resume_dpg_mode(vinst);
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_REG_XX_MASK, 0x10, 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK, 0x3, 0xFFFFFFFF, 0);
@@ -1026,7 +1102,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MASTINT_EN,
UVD_MASTINT_EN__VCPU_EN_MASK, UVD_MASTINT_EN__VCPU_EN_MASK, 0);
- vcn_v1_0_clock_gating_dpg_mode(adev, 1);
+ vcn_v1_0_clock_gating_dpg_mode(vinst, 1);
/* setup mmUVD_LMI_CTRL */
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_CTRL,
(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
@@ -1083,29 +1159,32 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
jpeg_v1_0_start(adev, 1);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v1_0_start(struct amdgpu_device *adev)
+static int vcn_v1_0_start(struct amdgpu_vcn_inst *vinst)
{
- int r;
+ struct amdgpu_device *adev = vinst->adev;
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
- r = vcn_v1_0_start_dpg_mode(adev);
- else
- r = vcn_v1_0_start_spg_mode(adev);
- return r;
+ return (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ?
+ vcn_v1_0_start_dpg_mode(vinst) : vcn_v1_0_start_spg_mode(vinst);
}
/**
* vcn_v1_0_stop_spg_mode - stop VCN block
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* stop the VCN block
*/
-static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev)
+static int vcn_v1_0_stop_spg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
int tmp;
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
@@ -1145,13 +1224,20 @@ static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev)
WREG32_SOC15(UVD, 0, mmUVD_STATUS, 0);
- vcn_v1_0_enable_clock_gating(adev);
- vcn_1_0_enable_static_power_gating(adev);
+ vcn_v1_0_enable_clock_gating(vinst);
+ vcn_1_0_enable_static_power_gating(vinst);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v1_0_stop_dpg_mode(struct amdgpu_device *adev)
+static int vcn_v1_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t tmp;
/* Wait for power status to be UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF */
@@ -1180,24 +1266,32 @@ static int vcn_v1_0_stop_dpg_mode(struct amdgpu_device *adev)
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0,
~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v1_0_stop(struct amdgpu_device *adev)
+static int vcn_v1_0_stop(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
int r;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
- r = vcn_v1_0_stop_dpg_mode(adev);
+ r = vcn_v1_0_stop_dpg_mode(vinst);
else
- r = vcn_v1_0_stop_spg_mode(adev);
+ r = vcn_v1_0_stop_spg_mode(vinst);
return r;
}
-static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state)
+static int vcn_v1_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
int ret_code;
uint32_t reg_data = 0;
uint32_t reg_data2 = 0;
@@ -1244,7 +1338,6 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
- ring = &adev->vcn.inst->ring_dec;
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
@@ -1291,7 +1384,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK);
/* Restore */
- ring = &adev->jpeg.inst->ring_dec;
+ ring = adev->jpeg.inst->ring_dec;
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
@@ -1305,7 +1398,6 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
- ring = &adev->vcn.inst->ring_dec;
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
@@ -1323,16 +1415,16 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
return 0;
}
-static bool vcn_v1_0_is_idle(void *handle)
+static bool vcn_v1_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == UVD_STATUS__IDLE);
}
-static int vcn_v1_0_wait_for_idle(void *handle)
+static int vcn_v1_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE,
@@ -1341,20 +1433,21 @@ static int vcn_v1_0_wait_for_idle(void *handle)
return ret;
}
-static int vcn_v1_0_set_clockgating_state(void *handle,
+static int vcn_v1_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vcn_inst *vinst = adev->vcn.inst;
bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
/* wait for STATUS to clear */
- if (!vcn_v1_0_is_idle(handle))
+ if (!vcn_v1_0_is_idle(ip_block))
return -EBUSY;
- vcn_v1_0_enable_clock_gating(adev);
+ vcn_v1_0_enable_clock_gating(vinst);
} else {
/* disable HW gating and enable Sw gating */
- vcn_v1_0_disable_clock_gating(adev);
+ vcn_v1_0_disable_clock_gating(vinst);
}
return 0;
}
@@ -1537,7 +1630,7 @@ static void vcn_v1_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring,
static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
- struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
uint32_t data0, data1, mask;
pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
@@ -1682,7 +1775,7 @@ static void vcn_v1_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring,
static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned int vmid, uint64_t pd_addr)
{
- struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
@@ -1746,8 +1839,8 @@ static void vcn_v1_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t coun
}
}
-static int vcn_v1_0_set_powergating_state(void *handle,
- enum amd_powergating_state state)
+static int vcn_v1_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
{
/* This doesn't actually powergate the VCN block.
* That's done in the dpm code via the SMC. This
@@ -1757,28 +1850,29 @@ static int vcn_v1_0_set_powergating_state(void *handle,
* the smc and the hw blocks
*/
int ret;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if(state == adev->vcn.cur_state)
+ if (state == vinst->cur_state)
return 0;
if (state == AMD_PG_STATE_GATE)
- ret = vcn_v1_0_stop(adev);
+ ret = vcn_v1_0_stop(vinst);
else
- ret = vcn_v1_0_start(adev);
+ ret = vcn_v1_0_start(vinst);
+
+ if (!ret)
+ vinst->cur_state = state;
- if(!ret)
- adev->vcn.cur_state = state;
return ret;
}
static void vcn_v1_0_idle_work_handler(struct work_struct *work)
{
- struct amdgpu_device *adev =
- container_of(work, struct amdgpu_device, vcn.idle_work.work);
+ struct amdgpu_vcn_inst *vcn_inst =
+ container_of(work, struct amdgpu_vcn_inst, idle_work.work);
+ struct amdgpu_device *adev = vcn_inst->adev;
unsigned int fences = 0, i;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i)
fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_enc[i]);
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
@@ -1789,37 +1883,37 @@ static void vcn_v1_0_idle_work_handler(struct work_struct *work)
else
new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
- if (amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec))
+ if (amdgpu_fence_count_emitted(adev->jpeg.inst->ring_dec))
new_state.jpeg = VCN_DPG_STATE__PAUSE;
else
new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
- adev->vcn.pause_dpg_mode(adev, 0, &new_state);
+ adev->vcn.inst->pause_dpg_mode(vcn_inst, &new_state);
}
- fences += amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec);
+ fences += amdgpu_fence_count_emitted(adev->jpeg.inst->ring_dec);
fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_dec);
if (fences == 0) {
amdgpu_gfx_off_ctrl(adev, true);
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ amdgpu_dpm_enable_vcn(adev, false, 0);
else
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
AMD_PG_STATE_GATE);
} else {
- schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+ schedule_delayed_work(&adev->vcn.inst[0].idle_work, VCN_IDLE_TIMEOUT);
}
}
static void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
+ bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work);
- mutex_lock(&adev->vcn.vcn1_jpeg1_workaround);
+ mutex_lock(&adev->vcn.inst[0].vcn1_jpeg1_workaround);
- if (amdgpu_fence_wait_empty(&ring->adev->jpeg.inst->ring_dec))
+ if (amdgpu_fence_wait_empty(ring->adev->jpeg.inst->ring_dec))
DRM_ERROR("VCN dec: jpeg dec ring may not be empty\n");
vcn_v1_0_set_pg_for_begin_use(ring, set_clocks);
@@ -1833,7 +1927,7 @@ void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks)
if (set_clocks) {
amdgpu_gfx_off_ctrl(adev, false);
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ amdgpu_dpm_enable_vcn(adev, true, 0);
else
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
AMD_PG_STATE_UNGATE);
@@ -1843,7 +1937,7 @@ void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks)
struct dpg_pause_state new_state;
unsigned int fences = 0, i;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i)
fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_enc[i]);
if (fences)
@@ -1851,7 +1945,7 @@ void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks)
else
new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
- if (amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec))
+ if (amdgpu_fence_count_emitted(adev->jpeg.inst->ring_dec))
new_state.jpeg = VCN_DPG_STATE__PAUSE;
else
new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
@@ -1861,20 +1955,79 @@ void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks)
else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
new_state.jpeg = VCN_DPG_STATE__PAUSE;
- adev->vcn.pause_dpg_mode(adev, 0, &new_state);
+ adev->vcn.inst->pause_dpg_mode(adev->vcn.inst, &new_state);
}
}
void vcn_v1_0_ring_end_use(struct amdgpu_ring *ring)
{
- schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
- mutex_unlock(&ring->adev->vcn.vcn1_jpeg1_workaround);
+ schedule_delayed_work(&ring->adev->vcn.inst[0].idle_work, VCN_IDLE_TIMEOUT);
+ mutex_unlock(&ring->adev->vcn.inst[0].vcn1_jpeg1_workaround);
+}
+
+static void vcn_v1_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_1_0);
+ uint32_t inst_off, is_powered;
+
+ if (!adev->vcn.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i)) {
+ drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i);
+ continue;
+ }
+
+ inst_off = i * reg_count;
+ is_powered = (adev->vcn.ip_dump[inst_off] &
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1;
+
+ if (is_powered) {
+ drm_printf(p, "\nActive Instance:VCN%d\n", i);
+ for (j = 0; j < reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_1_0[j].reg_name,
+ adev->vcn.ip_dump[inst_off + j]);
+ } else {
+ drm_printf(p, "\nInactive Instance:VCN%d\n", i);
+ }
+ }
+}
+
+static void vcn_v1_0_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ bool is_powered;
+ uint32_t inst_off;
+ uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_1_0);
+
+ if (!adev->vcn.ip_dump)
+ return;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ inst_off = i * reg_count;
+ /* mmUVD_POWER_STATUS is always readable and is first element of the array */
+ adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS);
+ is_powered = (adev->vcn.ip_dump[inst_off] &
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1;
+
+ if (is_powered)
+ for (j = 1; j < reg_count; j++)
+ adev->vcn.ip_dump[inst_off + j] =
+ RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_1_0[j], i));
+ }
}
static const struct amd_ip_funcs vcn_v1_0_ip_funcs = {
.name = "vcn_v1_0",
.early_init = vcn_v1_0_early_init,
- .late_init = NULL,
.sw_init = vcn_v1_0_sw_init,
.sw_fini = vcn_v1_0_sw_fini,
.hw_init = vcn_v1_0_hw_init,
@@ -1883,23 +2036,91 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = {
.resume = vcn_v1_0_resume,
.is_idle = vcn_v1_0_is_idle,
.wait_for_idle = vcn_v1_0_wait_for_idle,
- .check_soft_reset = NULL /* vcn_v1_0_check_soft_reset */,
- .pre_soft_reset = NULL /* vcn_v1_0_pre_soft_reset */,
- .soft_reset = NULL /* vcn_v1_0_soft_reset */,
- .post_soft_reset = NULL /* vcn_v1_0_post_soft_reset */,
.set_clockgating_state = vcn_v1_0_set_clockgating_state,
- .set_powergating_state = vcn_v1_0_set_powergating_state,
+ .set_powergating_state = vcn_set_powergating_state,
+ .dump_ip_state = vcn_v1_0_dump_ip_state,
+ .print_ip_state = vcn_v1_0_print_ip_state,
};
+/*
+ * It is a hardware issue that VCN can't handle a GTT TMZ buffer on
+ * CHIP_RAVEN series ASIC. Move such a GTT TMZ buffer to VRAM domain
+ * before command submission as a workaround.
+ */
+static int vcn_v1_0_validate_bo(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ uint64_t addr)
+{
+ struct ttm_operation_ctx ctx = { false, false };
+ struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
+ struct amdgpu_bo_va_mapping *mapping;
+ struct amdgpu_bo *bo;
+ int r;
+
+ addr &= AMDGPU_GMC_HOLE_MASK;
+ if (addr & 0x7) {
+ DRM_ERROR("VCN messages must be 8 byte aligned!\n");
+ return -EINVAL;
+ }
+
+ mapping = amdgpu_vm_bo_lookup_mapping(vm, addr/AMDGPU_GPU_PAGE_SIZE);
+ if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo)
+ return -EINVAL;
+
+ bo = mapping->bo_va->base.bo;
+ if (!(bo->flags & AMDGPU_GEM_CREATE_ENCRYPTED))
+ return 0;
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (r) {
+ DRM_ERROR("Failed to validate the VCN message BO (%d)!\n", r);
+ return r;
+ }
+
+ return r;
+}
+
+static int vcn_v1_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
+{
+ uint32_t msg_lo = 0, msg_hi = 0;
+ int i, r;
+
+ if (!(ib->flags & AMDGPU_IB_FLAGS_SECURE))
+ return 0;
+
+ for (i = 0; i < ib->length_dw; i += 2) {
+ uint32_t reg = amdgpu_ib_get_value(ib, i);
+ uint32_t val = amdgpu_ib_get_value(ib, i + 1);
+
+ if (reg == PACKET0(p->adev->vcn.inst[0].internal.data0, 0)) {
+ msg_lo = val;
+ } else if (reg == PACKET0(p->adev->vcn.inst[0].internal.data1, 0)) {
+ msg_hi = val;
+ } else if (reg == PACKET0(p->adev->vcn.inst[0].internal.cmd, 0)) {
+ r = vcn_v1_0_validate_bo(p, job,
+ ((u64)msg_hi) << 32 | msg_lo);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_DEC,
.align_mask = 0xf,
.support_64bit_ptrs = false,
.no_user_fence = true,
- .vmhub = AMDGPU_MMHUB_0,
+ .secure_submission_supported = true,
.get_rptr = vcn_v1_0_dec_ring_get_rptr,
.get_wptr = vcn_v1_0_dec_ring_get_wptr,
.set_wptr = vcn_v1_0_dec_ring_set_wptr,
+ .patch_cs_in_place = vcn_v1_0_ring_patch_cs_in_place,
.emit_frame_size =
6 + 6 + /* hdp invalidate / flush */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
@@ -1930,7 +2151,6 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
.nop = VCN_ENC_CMD_NO_OP,
.support_64bit_ptrs = false,
.no_user_fence = true,
- .vmhub = AMDGPU_MMHUB_0,
.get_rptr = vcn_v1_0_enc_ring_get_rptr,
.get_wptr = vcn_v1_0_enc_ring_get_wptr,
.set_wptr = vcn_v1_0_enc_ring_set_wptr,
@@ -1959,17 +2179,14 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
{
adev->vcn.inst->ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs;
- DRM_INFO("VCN decode is enabled in VM mode\n");
}
static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i)
adev->vcn.inst->ring_enc[i].funcs = &vcn_v1_0_enc_ring_vm_funcs;
-
- DRM_INFO("VCN encode is enabled in VM mode\n");
}
static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = {
@@ -1979,12 +2196,11 @@ static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = {
static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev)
{
- adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 2;
+ adev->vcn.inst->irq.num_types = adev->vcn.inst[0].num_enc_rings + 2;
adev->vcn.inst->irq.funcs = &vcn_v1_0_irq_funcs;
}
-const struct amdgpu_ip_block_version vcn_v1_0_ip_block =
-{
+const struct amdgpu_ip_block_version vcn_v1_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_VCN,
.major = 1,
.minor = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index 313fc1b53999..8897dcc9c1a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -37,6 +37,10 @@
#include "vcn/vcn_2_0_0_sh_mask.h"
#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
+#define VCN_VID_SOC_ADDRESS_2_0 0x1fa00
+#define VCN1_VID_SOC_ADDRESS_3_0 0x48200
+#define VCN1_AON_SOC_ADDRESS_3_0 0x48000
+
#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x1fd
#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x503
#define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET 0x504
@@ -50,50 +54,90 @@
#define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x5a7
#define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x1e2
+static const struct amdgpu_hwip_reg_entry vcn_reg_list_2_0[] = {
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE)
+};
+
static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v2_0_set_powergating_state(void *handle,
- enum amd_powergating_state state);
-static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state);
+static int vcn_v2_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static int vcn_v2_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
static int vcn_v2_0_start_sriov(struct amdgpu_device *adev);
+static int vcn_v2_0_reset(struct amdgpu_vcn_inst *vinst);
+
/**
- * vcn_v2_0_early_init - set function pointers
+ * vcn_v2_0_early_init - set function pointers and load microcode
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
+ * Load microcode from filesystem
*/
-static int vcn_v2_0_early_init(void *handle)
+static int vcn_v2_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
- adev->vcn.num_enc_rings = 1;
+ adev->vcn.inst[0].num_enc_rings = 1;
else
- adev->vcn.num_enc_rings = 2;
+ adev->vcn.inst[0].num_enc_rings = 2;
+ adev->vcn.inst->set_pg_state = vcn_v2_0_set_pg_state;
vcn_v2_0_set_dec_ring_funcs(adev);
vcn_v2_0_set_enc_ring_funcs(adev);
vcn_v2_0_set_irq_funcs(adev);
- return 0;
+ return amdgpu_vcn_early_init(adev, 0);
}
/**
* vcn_v2_0_sw_init - sw init for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int vcn_v2_0_sw_init(void *handle)
+static int vcn_v2_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int i, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- volatile struct amdgpu_fw_shared *fw_shared;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_fw_shared *fw_shared;
/* VCN DEC TRAP */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
@@ -103,7 +147,7 @@ static int vcn_v2_0_sw_init(void *handle)
return r;
/* VCN ENC TRAP */
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) {
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE,
&adev->vcn.inst->irq);
@@ -111,13 +155,13 @@ static int vcn_v2_0_sw_init(void *handle)
return r;
}
- r = amdgpu_vcn_sw_init(adev);
+ r = amdgpu_vcn_sw_init(adev, 0);
if (r)
return r;
- amdgpu_vcn_setup_ucode(adev);
+ amdgpu_vcn_setup_ucode(adev, 0);
- r = amdgpu_vcn_resume(adev);
+ r = amdgpu_vcn_resume(adev, 0);
if (r)
return r;
@@ -125,6 +169,7 @@ static int vcn_v2_0_sw_init(void *handle)
ring->use_doorbell = true;
ring->doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 << 1;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "vcn_dec");
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
@@ -132,29 +177,30 @@ static int vcn_v2_0_sw_init(void *handle)
if (r)
return r;
- adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
- adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
- adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
- adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
- adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
- adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
- adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
adev->vcn.inst->external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9);
- adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
adev->vcn.inst->external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0);
- adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
adev->vcn.inst->external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1);
- adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
adev->vcn.inst->external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD);
- adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
adev->vcn.inst->external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP);
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) {
enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(i);
ring = &adev->vcn.inst->ring_enc[i];
ring->use_doorbell = true;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
if (!amdgpu_sriov_vf(adev))
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i;
else
@@ -166,29 +212,47 @@ static int vcn_v2_0_sw_init(void *handle)
return r;
}
- adev->vcn.pause_dpg_mode = vcn_v2_0_pause_dpg_mode;
+ adev->vcn.inst[0].pause_dpg_mode = vcn_v2_0_pause_dpg_mode;
+ adev->vcn.inst[0].reset = vcn_v2_0_reset;
+
+ adev->vcn.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+ if (!amdgpu_sriov_vf(adev))
+ adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
r = amdgpu_virt_alloc_mm_table(adev);
if (r)
return r;
- fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
+ fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG);
+
+ if (amdgpu_vcnfw_log)
+ amdgpu_vcn_fwlog_init(adev->vcn.inst);
+
+ r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_2_0, ARRAY_SIZE(vcn_reg_list_2_0));
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
return 0;
}
/**
* vcn_v2_0_sw_fini - sw fini for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* VCN suspend and free up sw allocation
*/
-static int vcn_v2_0_sw_fini(void *handle)
+static int vcn_v2_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r, idx;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
fw_shared->present_flag_0 = 0;
@@ -197,25 +261,27 @@ static int vcn_v2_0_sw_fini(void *handle)
amdgpu_virt_free_mm_table(adev);
- r = amdgpu_vcn_suspend(adev);
+ r = amdgpu_vcn_suspend(adev, 0);
if (r)
return r;
- r = amdgpu_vcn_sw_fini(adev);
+ amdgpu_vcn_sysfs_reset_mask_fini(adev);
- return r;
+ amdgpu_vcn_sw_fini(adev, 0);
+
+ return 0;
}
/**
* vcn_v2_0_hw_init - start and test VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int vcn_v2_0_hw_init(void *handle)
+static int vcn_v2_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
int i, r;
@@ -227,44 +293,40 @@ static int vcn_v2_0_hw_init(void *handle)
r = amdgpu_ring_test_helper(ring);
if (r)
- goto done;
+ return r;
//Disable vcn decode for sriov
if (amdgpu_sriov_vf(adev))
ring->sched.ready = false;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) {
ring = &adev->vcn.inst->ring_enc[i];
r = amdgpu_ring_test_helper(ring);
if (r)
- goto done;
+ return r;
}
-done:
- if (!r)
- DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
-
- return r;
+ return 0;
}
/**
* vcn_v2_0_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the VCN block, mark ring as not ready any more
*/
-static int vcn_v2_0_hw_fini(void *handle)
+static int vcn_v2_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vcn_inst *vinst = adev->vcn.inst;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ cancel_delayed_work_sync(&vinst->idle_work);
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
- (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
- RREG32_SOC15(VCN, 0, mmUVD_STATUS)))
- vcn_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ (vinst->cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, 0, mmUVD_STATUS)))
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
return 0;
}
@@ -272,20 +334,19 @@ static int vcn_v2_0_hw_fini(void *handle)
/**
* vcn_v2_0_suspend - suspend VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend VCN block
*/
-static int vcn_v2_0_suspend(void *handle)
+static int vcn_v2_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = vcn_v2_0_hw_fini(adev);
+ r = vcn_v2_0_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_vcn_suspend(adev);
+ r = amdgpu_vcn_suspend(ip_block->adev, 0);
return r;
}
@@ -293,20 +354,19 @@ static int vcn_v2_0_suspend(void *handle)
/**
* vcn_v2_0_resume - resume VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init VCN block
*/
-static int vcn_v2_0_resume(void *handle)
+static int vcn_v2_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_vcn_resume(adev);
+ r = amdgpu_vcn_resume(ip_block->adev, 0);
if (r)
return r;
- r = vcn_v2_0_hw_init(adev);
+ r = vcn_v2_0_hw_init(ip_block);
return r;
}
@@ -314,13 +374,14 @@ static int vcn_v2_0_resume(void *handle)
/**
* vcn_v2_0_mc_resume - memory controller programming
*
- * @adev: amdgpu_device pointer
+ * @vinst: Pointer to the VCN instance structure
*
* Let the VCN memory controller know it's offsets
*/
-static void vcn_v2_0_mc_resume(struct amdgpu_device *adev)
+static void vcn_v2_0_mc_resume(struct amdgpu_vcn_inst *vinst)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ struct amdgpu_device *adev = vinst->adev;
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4);
uint32_t offset;
if (amdgpu_sriov_vf(adev))
@@ -364,9 +425,9 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev)
/* non-cache window */
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
- lower_32_bits(adev->vcn.inst->fw_shared_gpu_addr));
+ lower_32_bits(adev->vcn.inst->fw_shared.gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
- upper_32_bits(adev->vcn.inst->fw_shared_gpu_addr));
+ upper_32_bits(adev->vcn.inst->fw_shared.gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0, 0);
WREG32_SOC15(UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0,
AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
@@ -374,9 +435,11 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev)
WREG32_SOC15(UVD, 0, mmUVD_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
}
-static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirect)
+static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ struct amdgpu_device *adev = vinst->adev;
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4);
uint32_t offset;
/* cache window 0: fw */
@@ -455,10 +518,10 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec
/* non-cache window */
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
- lower_32_bits(adev->vcn.inst->fw_shared_gpu_addr), 0, indirect);
+ lower_32_bits(adev->vcn.inst->fw_shared.gpu_addr), 0, indirect);
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
- upper_32_bits(adev->vcn.inst->fw_shared_gpu_addr), 0, indirect);
+ upper_32_bits(adev->vcn.inst->fw_shared.gpu_addr), 0, indirect);
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
@@ -473,12 +536,13 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec
/**
* vcn_v2_0_disable_clock_gating - disable VCN clock gating
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Disable clock gating for VCN block
*/
-static void vcn_v2_0_disable_clock_gating(struct amdgpu_device *adev)
+static void vcn_v2_0_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data;
if (amdgpu_sriov_vf(adev))
@@ -582,9 +646,10 @@ static void vcn_v2_0_disable_clock_gating(struct amdgpu_device *adev)
WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
}
-static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
+static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst,
uint8_t sram_sel, uint8_t indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t reg_data = 0;
/* enable sw clock gating control */
@@ -633,12 +698,13 @@ static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
/**
* vcn_v2_0_enable_clock_gating - enable VCN clock gating
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Enable clock gating for VCN block
*/
-static void vcn_v2_0_enable_clock_gating(struct amdgpu_device *adev)
+static void vcn_v2_0_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data = 0;
if (amdgpu_sriov_vf(adev))
@@ -691,8 +757,9 @@ static void vcn_v2_0_enable_clock_gating(struct amdgpu_device *adev)
WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
}
-static void vcn_v2_0_disable_static_power_gating(struct amdgpu_device *adev)
+static void vcn_v2_0_disable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data = 0;
if (amdgpu_sriov_vf(adev))
@@ -740,8 +807,9 @@ static void vcn_v2_0_disable_static_power_gating(struct amdgpu_device *adev)
WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data);
}
-static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev)
+static void vcn_v2_0_enable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data = 0;
if (amdgpu_sriov_vf(adev))
@@ -782,13 +850,15 @@ static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev)
}
}
-static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
+static int vcn_v2_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect)
{
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
+ struct amdgpu_device *adev = vinst->adev;
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
uint32_t rb_bufsz, tmp;
+ int ret;
- vcn_v2_0_enable_static_power_gating(adev);
+ vcn_v2_0_enable_static_power_gating(vinst);
/* enable dynamic power gating mode */
tmp = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
@@ -800,7 +870,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
adev->vcn.inst->dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst->dpg_sram_cpu_addr;
/* enable clock gating */
- vcn_v2_0_clock_gating_dpg_mode(adev, 0, indirect);
+ vcn_v2_0_clock_gating_dpg_mode(vinst, 0, indirect);
/* enable VCPU clock */
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
@@ -849,7 +919,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
- vcn_v2_0_mc_resume_dpg_mode(adev, indirect);
+ vcn_v2_0_mc_resume_dpg_mode(vinst, indirect);
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
@@ -870,10 +940,13 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
UVD, 0, mmUVD_MASTINT_EN),
UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
- if (indirect)
- psp_update_vcn_sram(adev, 0, adev->vcn.inst->dpg_sram_gpu_addr,
- (uint32_t)((uintptr_t)adev->vcn.inst->dpg_sram_curr_addr -
- (uintptr_t)adev->vcn.inst->dpg_sram_cpu_addr));
+ if (indirect) {
+ ret = amdgpu_vcn_psp_update_sram(adev, 0, 0);
+ if (ret) {
+ dev_err(adev->dev, "vcn sram load failed %d\n", ret);
+ return ret;
+ }
+ }
/* force RBC into idle state */
rb_bufsz = order_base_2(ring->ring_size);
@@ -916,31 +989,38 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
/* Unstall DPG */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS),
0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v2_0_start(struct amdgpu_device *adev)
+static int vcn_v2_0_start(struct amdgpu_vcn_inst *vinst)
{
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
+ struct amdgpu_device *adev = vinst->adev;
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
uint32_t rb_bufsz, tmp;
uint32_t lmi_swap_cntl;
int i, j, r;
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ amdgpu_dpm_enable_vcn(adev, true, 0);
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
- return vcn_v2_0_start_dpg_mode(adev, adev->vcn.indirect_sram);
+ return vcn_v2_0_start_dpg_mode(vinst, adev->vcn.inst->indirect_sram);
- vcn_v2_0_disable_static_power_gating(adev);
+ vcn_v2_0_disable_static_power_gating(vinst);
/* set uvd status busy */
tmp = RREG32_SOC15(UVD, 0, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
WREG32_SOC15(UVD, 0, mmUVD_STATUS, tmp);
/*SW clock gating */
- vcn_v2_0_disable_clock_gating(adev);
+ vcn_v2_0_disable_clock_gating(vinst);
/* enable VCPU clock */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL),
@@ -984,7 +1064,7 @@ static int vcn_v2_0_start(struct amdgpu_device *adev)
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
- vcn_v2_0_mc_resume(adev);
+ vcn_v2_0_mc_resume(vinst);
/* release VCPU reset to boot */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0,
@@ -1089,13 +1169,21 @@ static int vcn_v2_0_start(struct amdgpu_device *adev)
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev)
+static int vcn_v2_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE};
uint32_t tmp;
+ vcn_v2_0_pause_dpg_mode(vinst, &state);
/* Wait for power status to be 1 */
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 1,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
@@ -1117,16 +1205,22 @@ static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev)
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0,
~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v2_0_stop(struct amdgpu_device *adev)
+static int vcn_v2_0_stop(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t tmp;
int r;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- r = vcn_v2_0_stop_dpg_mode(adev);
+ r = vcn_v2_0_stop_dpg_mode(vinst);
if (r)
return r;
goto power_off;
@@ -1178,19 +1272,26 @@ static int vcn_v2_0_stop(struct amdgpu_device *adev)
/* clear status */
WREG32_SOC15(VCN, 0, mmUVD_STATUS, 0);
- vcn_v2_0_enable_clock_gating(adev);
- vcn_v2_0_enable_static_power_gating(adev);
+ vcn_v2_0_enable_clock_gating(vinst);
+ vcn_v2_0_enable_static_power_gating(vinst);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, 0, mmUVD_STATUS);
power_off:
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ amdgpu_dpm_enable_vcn(adev, false, 0);
return 0;
}
-static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state)
+static int vcn_v2_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
struct amdgpu_ring *ring;
uint32_t reg_data = 0;
int ret_code;
@@ -1207,7 +1308,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
if (!ret_code) {
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
/* pause DPG */
reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
@@ -1265,16 +1366,26 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
return 0;
}
-static bool vcn_v2_0_is_idle(void *handle)
+static int vcn_v2_0_reset(struct amdgpu_vcn_inst *vinst)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = vcn_v2_0_stop(vinst);
+ if (r)
+ return r;
+ return vcn_v2_0_start(vinst);
+}
+
+static bool vcn_v2_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == UVD_STATUS__IDLE);
}
-static int vcn_v2_0_wait_for_idle(void *handle)
+static int vcn_v2_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE,
@@ -1283,10 +1394,10 @@ static int vcn_v2_0_wait_for_idle(void *handle)
return ret;
}
-static int vcn_v2_0_set_clockgating_state(void *handle,
+static int vcn_v2_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
if (amdgpu_sriov_vf(adev))
@@ -1294,12 +1405,12 @@ static int vcn_v2_0_set_clockgating_state(void *handle,
if (enable) {
/* wait for STATUS to clear */
- if (!vcn_v2_0_is_idle(handle))
+ if (!vcn_v2_0_is_idle(ip_block))
return -EBUSY;
- vcn_v2_0_enable_clock_gating(adev);
+ vcn_v2_0_enable_clock_gating(&adev->vcn.inst[0]);
} else {
/* disable HW gating and enable Sw gating */
- vcn_v2_0_disable_clock_gating(adev);
+ vcn_v2_0_disable_clock_gating(&adev->vcn.inst[0]);
}
return 0;
}
@@ -1330,7 +1441,7 @@ static uint64_t vcn_v2_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell)
- return adev->wb.wb[ring->wptr_offs];
+ return *ring->wptr_cpu_addr;
else
return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR);
}
@@ -1351,7 +1462,7 @@ static void vcn_v2_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
lower_32_bits(ring->wptr) | 0x80000000);
if (ring->use_doorbell) {
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
@@ -1369,9 +1480,9 @@ void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data0, 0));
amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1));
}
@@ -1386,7 +1497,7 @@ void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[0].internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_END << 1));
}
@@ -1406,7 +1517,7 @@ void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
WARN_ON(ring->wptr % 2 || count % 2);
for (i = 0; i < count / 2; i++) {
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.nop, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.nop, 0));
amdgpu_ring_write(ring, 0);
}
}
@@ -1427,25 +1538,25 @@ void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
struct amdgpu_device *adev = ring->adev;
WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.context_id, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.context_id, 0));
amdgpu_ring_write(ring, seq);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data0, 0));
amdgpu_ring_write(ring, addr & 0xffffffff);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data1, 0));
amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_FENCE << 1));
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data0, 0));
amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data1, 0));
amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_TRAP << 1));
}
@@ -1468,14 +1579,14 @@ void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_device *adev = ring->adev;
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_vmid, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.ib_vmid, 0));
amdgpu_ring_write(ring, vmid);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_low, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.ib_bar_low, 0));
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_high, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.ib_bar_high, 0));
amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_size, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.ib_size, 0));
amdgpu_ring_write(ring, ib->length_dw);
}
@@ -1484,16 +1595,16 @@ void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
{
struct amdgpu_device *adev = ring->adev;
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data0, 0));
amdgpu_ring_write(ring, reg << 2);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data1, 0));
amdgpu_ring_write(ring, val);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.gp_scratch8, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.gp_scratch8, 0));
amdgpu_ring_write(ring, mask);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_REG_READ_COND_WAIT << 1));
}
@@ -1501,7 +1612,7 @@ void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
- struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
uint32_t data0, data1, mask;
pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
@@ -1518,13 +1629,13 @@ void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring,
{
struct amdgpu_device *adev = ring->adev;
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data0, 0));
amdgpu_ring_write(ring, reg << 2);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data1, 0));
amdgpu_ring_write(ring, val);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_WRITE_REG << 1));
}
@@ -1559,12 +1670,12 @@ static uint64_t vcn_v2_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
if (ring == &adev->vcn.inst->ring_enc[0]) {
if (ring->use_doorbell)
- return adev->wb.wb[ring->wptr_offs];
+ return *ring->wptr_cpu_addr;
else
return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR);
} else {
if (ring->use_doorbell)
- return adev->wb.wb[ring->wptr_offs];
+ return *ring->wptr_cpu_addr;
else
return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2);
}
@@ -1583,14 +1694,14 @@ static void vcn_v2_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
if (ring == &adev->vcn.inst->ring_enc[0]) {
if (ring->use_doorbell) {
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
}
} else {
if (ring->use_doorbell) {
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
@@ -1661,7 +1772,7 @@ void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned int vmid, uint64_t pd_addr)
{
- struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
@@ -1725,9 +1836,9 @@ int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring)
r = amdgpu_ring_alloc(ring, 4);
if (r)
return r;
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1));
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.scratch9, 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
@@ -1744,8 +1855,8 @@ int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring)
}
-static int vcn_v2_0_set_powergating_state(void *handle,
- enum amd_powergating_state state)
+static int vcn_v2_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
{
/* This doesn't actually powergate the VCN block.
* That's done in the dpm code via the SMC. This
@@ -1755,23 +1866,24 @@ static int vcn_v2_0_set_powergating_state(void *handle,
* the smc and the hw blocks
*/
int ret;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = vinst->adev;
if (amdgpu_sriov_vf(adev)) {
- adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
+ vinst->cur_state = AMD_PG_STATE_UNGATE;
return 0;
}
- if (state == adev->vcn.cur_state)
+ if (state == vinst->cur_state)
return 0;
if (state == AMD_PG_STATE_GATE)
- ret = vcn_v2_0_stop(adev);
+ ret = vcn_v2_0_stop(vinst);
else
- ret = vcn_v2_0_start(adev);
+ ret = vcn_v2_0_start(vinst);
if (!ret)
- adev->vcn.cur_state = state;
+ vinst->cur_state = state;
+
return ret;
}
@@ -1810,7 +1922,7 @@ static int vcn_v2_0_start_mmsch(struct amdgpu_device *adev,
adev->vcn.inst->ring_dec.wptr_old = 0;
vcn_v2_0_dec_ring_set_wptr(&adev->vcn.inst->ring_dec);
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) {
adev->vcn.inst->ring_enc[i].wptr = 0;
adev->vcn.inst->ring_enc[i].wptr_old = 0;
vcn_v2_0_enc_ring_set_wptr(&adev->vcn.inst->ring_enc[i]);
@@ -1868,7 +1980,7 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev)
init_table += header->vcn_table_offset;
- size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4);
MMSCH_V2_0_INSERT_DIRECT_RD_MOD_WT(
SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
@@ -1936,7 +2048,7 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev)
SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2),
AMDGPU_VCN_CONTEXT_SIZE);
- for (r = 0; r < adev->vcn.num_enc_rings; ++r) {
+ for (r = 0; r < adev->vcn.inst[0].num_enc_rings; ++r) {
ring = &adev->vcn.inst->ring_enc[r];
ring->wptr = 0;
MMSCH_V2_0_INSERT_DIRECT_WT(
@@ -1983,7 +2095,6 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev)
static const struct amd_ip_funcs vcn_v2_0_ip_funcs = {
.name = "vcn_v2_0",
.early_init = vcn_v2_0_early_init,
- .late_init = NULL,
.sw_init = vcn_v2_0_sw_init,
.sw_fini = vcn_v2_0_sw_fini,
.hw_init = vcn_v2_0_hw_init,
@@ -1992,18 +2103,16 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = {
.resume = vcn_v2_0_resume,
.is_idle = vcn_v2_0_is_idle,
.wait_for_idle = vcn_v2_0_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = vcn_v2_0_set_clockgating_state,
- .set_powergating_state = vcn_v2_0_set_powergating_state,
+ .set_powergating_state = vcn_set_powergating_state,
+ .dump_ip_state = amdgpu_vcn_dump_ip_state,
+ .print_ip_state = amdgpu_vcn_print_ip_state,
};
static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_DEC,
.align_mask = 0xf,
- .vmhub = AMDGPU_MMHUB_0,
+ .secure_submission_supported = true,
.get_rptr = vcn_v2_0_dec_ring_get_rptr,
.get_wptr = vcn_v2_0_dec_ring_get_wptr,
.set_wptr = vcn_v2_0_dec_ring_set_wptr,
@@ -2028,13 +2137,13 @@ static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = {
.emit_wreg = vcn_v2_0_dec_ring_emit_wreg,
.emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = amdgpu_vcn_ring_reset,
};
static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
.nop = VCN_ENC_CMD_NO_OP,
- .vmhub = AMDGPU_MMHUB_0,
.get_rptr = vcn_v2_0_enc_ring_get_rptr,
.get_wptr = vcn_v2_0_enc_ring_get_wptr,
.set_wptr = vcn_v2_0_enc_ring_set_wptr,
@@ -2058,22 +2167,20 @@ static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = {
.emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
.emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = amdgpu_vcn_ring_reset,
};
static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev)
{
adev->vcn.inst->ring_dec.funcs = &vcn_v2_0_dec_ring_vm_funcs;
- DRM_INFO("VCN decode is enabled in VM mode\n");
}
static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i)
adev->vcn.inst->ring_enc[i].funcs = &vcn_v2_0_enc_ring_vm_funcs;
-
- DRM_INFO("VCN encode is enabled in VM mode\n");
}
static const struct amdgpu_irq_src_funcs vcn_v2_0_irq_funcs = {
@@ -2083,7 +2190,7 @@ static const struct amdgpu_irq_src_funcs vcn_v2_0_irq_funcs = {
static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev)
{
- adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst->irq.num_types = adev->vcn.inst[0].num_enc_rings + 1;
adev->vcn.inst->irq.funcs = &vcn_v2_0_irq_funcs;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index 44fc4c218433..cebee453871c 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -31,11 +31,16 @@
#include "soc15d.h"
#include "vcn_v2_0.h"
#include "mmsch_v1_0.h"
+#include "vcn_v2_5.h"
#include "vcn/vcn_2_5_offset.h"
#include "vcn/vcn_2_5_sh_mask.h"
#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
+#define VCN_VID_SOC_ADDRESS_2_0 0x1fa00
+#define VCN1_VID_SOC_ADDRESS_3_0 0x48200
+#define VCN1_AON_SOC_ADDRESS_3_0 0x48000
+
#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27
#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f
#define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET 0x10
@@ -51,35 +56,181 @@
#define VCN25_MAX_HW_INSTANCES_ARCTURUS 2
+static const struct amdgpu_hwip_reg_entry vcn_reg_list_2_5[] = {
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE)
+};
+
static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v2_5_set_powergating_state(void *handle,
- enum amd_powergating_state state);
-static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state);
+static int vcn_v2_5_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static int vcn_v2_5_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
static int vcn_v2_5_sriov_start(struct amdgpu_device *adev);
+static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev);
+static int vcn_v2_5_reset(struct amdgpu_vcn_inst *vinst);
static int amdgpu_ih_clientid_vcns[] = {
SOC15_IH_CLIENTID_VCN,
SOC15_IH_CLIENTID_VCN1
};
+static void vcn_v2_5_idle_work_handler(struct work_struct *work)
+{
+ struct amdgpu_vcn_inst *vcn_inst =
+ container_of(work, struct amdgpu_vcn_inst, idle_work.work);
+ struct amdgpu_device *adev = vcn_inst->adev;
+ unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
+ unsigned int i, j;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *v = &adev->vcn.inst[i];
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ for (j = 0; j < v->num_enc_rings; ++j)
+ fence[i] += amdgpu_fence_count_emitted(&v->ring_enc[j]);
+
+ /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
+ !v->using_unified_queue) {
+ struct dpg_pause_state new_state;
+
+ if (fence[i] ||
+ unlikely(atomic_read(&v->dpg_enc_submission_cnt)))
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+
+ v->pause_dpg_mode(v, &new_state);
+ }
+
+ fence[i] += amdgpu_fence_count_emitted(&v->ring_dec);
+ fences += fence[i];
+
+ }
+
+ if (!fences && !atomic_read(&adev->vcn.inst[0].total_submission_cnt)) {
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+ AMD_PG_STATE_GATE);
+ amdgpu_vcn_put_profile(adev);
+ } else {
+ schedule_delayed_work(&adev->vcn.inst[0].idle_work, VCN_IDLE_TIMEOUT);
+ }
+}
+
+static void vcn_v2_5_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vcn_inst *v = &adev->vcn.inst[ring->me];
+
+ atomic_inc(&adev->vcn.inst[0].total_submission_cnt);
+
+ cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work);
+
+ /* We can safely return early here because we've cancelled the
+ * the delayed work so there is no one else to set it to false
+ * and we don't care if someone else sets it to true.
+ */
+ mutex_lock(&adev->vcn.inst[0].vcn_pg_lock);
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+ AMD_PG_STATE_UNGATE);
+
+ /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
+ !v->using_unified_queue) {
+ struct dpg_pause_state new_state;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) {
+ atomic_inc(&v->dpg_enc_submission_cnt);
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ } else {
+ unsigned int fences = 0;
+ unsigned int i;
+
+ for (i = 0; i < v->num_enc_rings; ++i)
+ fences += amdgpu_fence_count_emitted(&v->ring_enc[i]);
+
+ if (fences || atomic_read(&v->dpg_enc_submission_cnt))
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+ }
+ v->pause_dpg_mode(v, &new_state);
+ }
+ mutex_unlock(&adev->vcn.inst[0].vcn_pg_lock);
+ amdgpu_vcn_get_profile(adev);
+}
+
+static void vcn_v2_5_ring_end_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
+ if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC &&
+ !adev->vcn.inst[ring->me].using_unified_queue)
+ atomic_dec(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
+
+ atomic_dec(&adev->vcn.inst[0].total_submission_cnt);
+
+ schedule_delayed_work(&adev->vcn.inst[0].idle_work,
+ VCN_IDLE_TIMEOUT);
+}
+
/**
- * vcn_v2_5_early_init - set function pointers
+ * vcn_v2_5_early_init - set function pointers and load microcode
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
+ * Load microcode from filesystem
*/
-static int vcn_v2_5_early_init(void *handle)
+static int vcn_v2_5_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
if (amdgpu_sriov_vf(adev)) {
adev->vcn.num_vcn_inst = 2;
adev->vcn.harvest_config = 0;
- adev->vcn.num_enc_rings = 1;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ adev->vcn.inst[i].num_enc_rings = 1;
} else {
u32 harvest;
int i;
@@ -88,18 +239,26 @@ static int vcn_v2_5_early_init(void *handle)
harvest = RREG32_SOC15(VCN, i, mmCC_UVD_HARVESTING);
if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
adev->vcn.harvest_config |= 1 << i;
+ adev->vcn.inst[i].num_enc_rings = 2;
}
if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 |
- AMDGPU_VCN_HARVEST_VCN1))
+ AMDGPU_VCN_HARVEST_VCN1))
/* both instances are harvested, disable the block */
return -ENOENT;
-
- adev->vcn.num_enc_rings = 2;
}
vcn_v2_5_set_dec_ring_funcs(adev);
vcn_v2_5_set_enc_ring_funcs(adev);
vcn_v2_5_set_irq_funcs(adev);
+ vcn_v2_5_set_ras_funcs(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ adev->vcn.inst[i].set_pg_state = vcn_v2_5_set_pg_state;
+
+ r = amdgpu_vcn_early_init(adev, i);
+ if (r)
+ return r;
+ }
return 0;
}
@@ -107,17 +266,19 @@ static int vcn_v2_5_early_init(void *handle)
/**
* vcn_v2_5_sw_init - sw init for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int vcn_v2_5_sw_init(void *handle)
+static int vcn_v2_5_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int i, j, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (j = 0; j < adev->vcn.num_vcn_inst; j++) {
+ struct amdgpu_fw_shared *fw_shared;
+
if (adev->vcn.harvest_config & (1 << j))
continue;
/* VCN DEC TRAP */
@@ -127,45 +288,48 @@ static int vcn_v2_5_sw_init(void *handle)
return r;
/* VCN ENC TRAP */
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[j].num_enc_rings; ++i) {
r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j],
i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[j].irq);
if (r)
return r;
}
- }
- r = amdgpu_vcn_sw_init(adev);
- if (r)
- return r;
+ /* VCN POISON TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j],
+ VCN_2_6__SRCID_UVD_POISON, &adev->vcn.inst[j].ras_poison_irq);
+ if (r)
+ return r;
- amdgpu_vcn_setup_ucode(adev);
+ r = amdgpu_vcn_sw_init(adev, j);
+ if (r)
+ return r;
- r = amdgpu_vcn_resume(adev);
- if (r)
- return r;
+ /* Override the work func */
+ adev->vcn.inst[j].idle_work.work.func = vcn_v2_5_idle_work_handler;
- for (j = 0; j < adev->vcn.num_vcn_inst; j++) {
- volatile struct amdgpu_fw_shared *fw_shared;
+ amdgpu_vcn_setup_ucode(adev, j);
- if (adev->vcn.harvest_config & (1 << j))
- continue;
- adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
- adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
- adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
- adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
- adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
- adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
-
- adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
+ r = amdgpu_vcn_resume(adev, j);
+ if (r)
+ return r;
+
+ adev->vcn.inst[j].internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
+
+ adev->vcn.inst[j].internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
adev->vcn.inst[j].external.scratch9 = SOC15_REG_OFFSET(VCN, j, mmUVD_SCRATCH9);
- adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
adev->vcn.inst[j].external.data0 = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_DATA0);
- adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
adev->vcn.inst[j].external.data1 = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_DATA1);
- adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
adev->vcn.inst[j].external.cmd = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_CMD);
- adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(VCN, j, mmUVD_NO_OP);
ring = &adev->vcn.inst[j].ring_dec;
@@ -173,13 +337,19 @@ static int vcn_v2_5_sw_init(void *handle)
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
(amdgpu_sriov_vf(adev) ? 2*j : 8*j);
+
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(2, 5, 0))
+ ring->vm_hub = AMDGPU_MMHUB1(0);
+ else
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+
sprintf(ring->name, "vcn_dec_%d", j);
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq,
0, AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[j].num_enc_rings; ++i) {
enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(i);
ring = &adev->vcn.inst[j].ring_enc[i];
@@ -188,6 +358,12 @@ static int vcn_v2_5_sw_init(void *handle)
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
(amdgpu_sriov_vf(adev) ? (1 + i + 2*j) : (2 + i + 8*j));
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) ==
+ IP_VERSION(2, 5, 0))
+ ring->vm_hub = AMDGPU_MMHUB1(0);
+ else
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+
sprintf(ring->name, "vcn_enc_%d.%d", j, i);
r = amdgpu_ring_init(adev, ring, 512,
&adev->vcn.inst[j].irq, 0,
@@ -196,18 +372,39 @@ static int vcn_v2_5_sw_init(void *handle)
return r;
}
- fw_shared = adev->vcn.inst[j].fw_shared_cpu_addr;
+ fw_shared = adev->vcn.inst[j].fw_shared.cpu_addr;
fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG);
+
+ if (amdgpu_vcnfw_log)
+ amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.inst[j].pause_dpg_mode = vcn_v2_5_pause_dpg_mode;
+ adev->vcn.inst[j].reset = vcn_v2_5_reset;
}
+ adev->vcn.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+ if (!amdgpu_sriov_vf(adev))
+ adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+
if (amdgpu_sriov_vf(adev)) {
r = amdgpu_virt_alloc_mm_table(adev);
if (r)
return r;
}
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
- adev->vcn.pause_dpg_mode = vcn_v2_5_pause_dpg_mode;
+ r = amdgpu_vcn_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_2_5, ARRAY_SIZE(vcn_reg_list_2_5));
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
return 0;
}
@@ -215,21 +412,21 @@ static int vcn_v2_5_sw_init(void *handle)
/**
* vcn_v2_5_sw_fini - sw fini for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* VCN suspend and free up sw allocation
*/
-static int vcn_v2_5_sw_fini(void *handle)
+static int vcn_v2_5_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i, r, idx;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- volatile struct amdgpu_fw_shared *fw_shared;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_fw_shared *fw_shared;
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
if (adev->vcn.harvest_config & (1 << i))
continue;
- fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr;
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
fw_shared->present_flag_0 = 0;
}
drm_dev_exit(idx);
@@ -239,25 +436,28 @@ static int vcn_v2_5_sw_fini(void *handle)
if (amdgpu_sriov_vf(adev))
amdgpu_virt_free_mm_table(adev);
- r = amdgpu_vcn_suspend(adev);
- if (r)
- return r;
+ amdgpu_vcn_sysfs_reset_mask_fini(adev);
- r = amdgpu_vcn_sw_fini(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(adev, i);
+ if (r)
+ return r;
+ amdgpu_vcn_sw_fini(adev, i);
+ }
- return r;
+ return 0;
}
/**
* vcn_v2_5_hw_init - start and test VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int vcn_v2_5_hw_init(void *handle)
+static int vcn_v2_5_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int i, j, r = 0;
@@ -282,47 +482,47 @@ static int vcn_v2_5_hw_init(void *handle)
r = amdgpu_ring_test_helper(ring);
if (r)
- goto done;
+ return r;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[j].num_enc_rings; ++i) {
ring = &adev->vcn.inst[j].ring_enc[i];
r = amdgpu_ring_test_helper(ring);
if (r)
- goto done;
+ return r;
}
}
}
-done:
- if (!r)
- DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
-
return r;
}
/**
* vcn_v2_5_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the VCN block, mark ring as not ready any more
*/
-static int vcn_v2_5_hw_fini(void *handle)
+static int vcn_v2_5_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
-
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
if (adev->vcn.harvest_config & (1 << i))
continue;
+ cancel_delayed_work_sync(&vinst->idle_work);
+
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
- (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
+ (vinst->cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(VCN, i, mmUVD_STATUS)))
- vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
+ amdgpu_irq_put(adev, &vinst->ras_poison_irq, 0);
}
return 0;
@@ -331,41 +531,47 @@ static int vcn_v2_5_hw_fini(void *handle)
/**
* vcn_v2_5_suspend - suspend VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend VCN block
*/
-static int vcn_v2_5_suspend(void *handle)
+static int vcn_v2_5_suspend(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
- r = vcn_v2_5_hw_fini(adev);
+ r = vcn_v2_5_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_vcn_suspend(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(ip_block->adev, i);
+ if (r)
+ return r;
+ }
- return r;
+ return 0;
}
/**
* vcn_v2_5_resume - resume VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init VCN block
*/
-static int vcn_v2_5_resume(void *handle)
+static int vcn_v2_5_resume(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
- r = amdgpu_vcn_resume(adev);
- if (r)
- return r;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_resume(ip_block->adev, i);
+ if (r)
+ return r;
+ }
- r = vcn_v2_5_hw_init(adev);
+ r = vcn_v2_5_hw_init(ip_block);
return r;
}
@@ -373,68 +579,72 @@ static int vcn_v2_5_resume(void *handle)
/**
* vcn_v2_5_mc_resume - memory controller programming
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Let the VCN memory controller know it's offsets
*/
-static void vcn_v2_5_mc_resume(struct amdgpu_device *adev)
+static void vcn_v2_5_mc_resume(struct amdgpu_vcn_inst *vinst)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ uint32_t size;
uint32_t offset;
- int i;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- /* cache window 0: fw */
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
- (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo));
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
- (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi));
- WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0, 0);
- offset = 0;
- } else {
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
- lower_32_bits(adev->vcn.inst[i].gpu_addr));
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
- upper_32_bits(adev->vcn.inst[i].gpu_addr));
- offset = size;
- WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0,
- AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
- }
- WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE0, size);
+ if (adev->vcn.harvest_config & (1 << i))
+ return;
- /* cache window 1: stack */
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
- lower_32_bits(adev->vcn.inst[i].gpu_addr + offset));
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
- upper_32_bits(adev->vcn.inst[i].gpu_addr + offset));
- WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET1, 0);
- WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
-
- /* cache window 2: context */
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
- lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
- upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
- WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET2, 0);
- WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
-
- /* non-cache window */
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
- lower_32_bits(adev->vcn.inst[i].fw_shared_gpu_addr));
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
- upper_32_bits(adev->vcn.inst[i].fw_shared_gpu_addr));
- WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_OFFSET0, 0);
- WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_SIZE0,
- AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
+ size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4);
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi));
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[i].gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[i].gpu_addr));
+ offset = size;
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0,
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
}
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE0, size);
+
+ /* cache window 1: stack */
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[i].gpu_addr + offset));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[i].gpu_addr + offset));
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET1, 0);
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
+
+ /* cache window 2: context */
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET2, 0);
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
+
+ /* non-cache window */
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_OFFSET0, 0);
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_SIZE0,
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
}
-static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[inst_idx].fw->size + 4);
uint32_t offset;
/* cache window 0: fw */
@@ -513,10 +723,10 @@ static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx
/* non-cache window */
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
- lower_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect);
+ lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
- upper_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect);
+ upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
@@ -531,123 +741,124 @@ static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx
/**
* vcn_v2_5_disable_clock_gating - disable VCN clock gating
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Disable clock gating for VCN block
*/
-static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev)
+static void vcn_v2_5_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
uint32_t data;
- int i;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- /* UVD disable CGC */
- data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
- if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
- data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
- else
- data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
- data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
- data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
- WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
-
- data = RREG32_SOC15(VCN, i, mmUVD_CGC_GATE);
- data &= ~(UVD_CGC_GATE__SYS_MASK
- | UVD_CGC_GATE__UDEC_MASK
- | UVD_CGC_GATE__MPEG2_MASK
- | UVD_CGC_GATE__REGS_MASK
- | UVD_CGC_GATE__RBC_MASK
- | UVD_CGC_GATE__LMI_MC_MASK
- | UVD_CGC_GATE__LMI_UMC_MASK
- | UVD_CGC_GATE__IDCT_MASK
- | UVD_CGC_GATE__MPRD_MASK
- | UVD_CGC_GATE__MPC_MASK
- | UVD_CGC_GATE__LBSI_MASK
- | UVD_CGC_GATE__LRBBM_MASK
- | UVD_CGC_GATE__UDEC_RE_MASK
- | UVD_CGC_GATE__UDEC_CM_MASK
- | UVD_CGC_GATE__UDEC_IT_MASK
- | UVD_CGC_GATE__UDEC_DB_MASK
- | UVD_CGC_GATE__UDEC_MP_MASK
- | UVD_CGC_GATE__WCB_MASK
- | UVD_CGC_GATE__VCPU_MASK
- | UVD_CGC_GATE__MMSCH_MASK);
-
- WREG32_SOC15(VCN, i, mmUVD_CGC_GATE, data);
-
- SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0, 0xFFFFFFFF);
-
- data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
- data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
- | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
- | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
- | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
- | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
- | UVD_CGC_CTRL__SYS_MODE_MASK
- | UVD_CGC_CTRL__UDEC_MODE_MASK
- | UVD_CGC_CTRL__MPEG2_MODE_MASK
- | UVD_CGC_CTRL__REGS_MODE_MASK
- | UVD_CGC_CTRL__RBC_MODE_MASK
- | UVD_CGC_CTRL__LMI_MC_MODE_MASK
- | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
- | UVD_CGC_CTRL__IDCT_MODE_MASK
- | UVD_CGC_CTRL__MPRD_MODE_MASK
- | UVD_CGC_CTRL__MPC_MODE_MASK
- | UVD_CGC_CTRL__LBSI_MODE_MASK
- | UVD_CGC_CTRL__LRBBM_MODE_MASK
- | UVD_CGC_CTRL__WCB_MODE_MASK
- | UVD_CGC_CTRL__VCPU_MODE_MASK
- | UVD_CGC_CTRL__MMSCH_MODE_MASK);
- WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
-
- /* turn on */
- data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE);
- data |= (UVD_SUVD_CGC_GATE__SRE_MASK
- | UVD_SUVD_CGC_GATE__SIT_MASK
- | UVD_SUVD_CGC_GATE__SMP_MASK
- | UVD_SUVD_CGC_GATE__SCM_MASK
- | UVD_SUVD_CGC_GATE__SDB_MASK
- | UVD_SUVD_CGC_GATE__SRE_H264_MASK
- | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
- | UVD_SUVD_CGC_GATE__SIT_H264_MASK
- | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
- | UVD_SUVD_CGC_GATE__SCM_H264_MASK
- | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
- | UVD_SUVD_CGC_GATE__SDB_H264_MASK
- | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
- | UVD_SUVD_CGC_GATE__SCLR_MASK
- | UVD_SUVD_CGC_GATE__UVD_SC_MASK
- | UVD_SUVD_CGC_GATE__ENT_MASK
- | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
- | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
- | UVD_SUVD_CGC_GATE__SITE_MASK
- | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
- | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
- | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
- | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
- | UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
- WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE, data);
-
- data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL);
- data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
- | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
- | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ if (adev->vcn.harvest_config & (1 << i))
+ return;
+ /* UVD disable CGC */
+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_GATE);
+ data &= ~(UVD_CGC_GATE__SYS_MASK
+ | UVD_CGC_GATE__UDEC_MASK
+ | UVD_CGC_GATE__MPEG2_MASK
+ | UVD_CGC_GATE__REGS_MASK
+ | UVD_CGC_GATE__RBC_MASK
+ | UVD_CGC_GATE__LMI_MC_MASK
+ | UVD_CGC_GATE__LMI_UMC_MASK
+ | UVD_CGC_GATE__IDCT_MASK
+ | UVD_CGC_GATE__MPRD_MASK
+ | UVD_CGC_GATE__MPC_MASK
+ | UVD_CGC_GATE__LBSI_MASK
+ | UVD_CGC_GATE__LRBBM_MASK
+ | UVD_CGC_GATE__UDEC_RE_MASK
+ | UVD_CGC_GATE__UDEC_CM_MASK
+ | UVD_CGC_GATE__UDEC_IT_MASK
+ | UVD_CGC_GATE__UDEC_DB_MASK
+ | UVD_CGC_GATE__UDEC_MP_MASK
+ | UVD_CGC_GATE__WCB_MASK
+ | UVD_CGC_GATE__VCPU_MASK
+ | UVD_CGC_GATE__MMSCH_MASK);
+
+ WREG32_SOC15(VCN, i, mmUVD_CGC_GATE, data);
+
+ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0, 0xFFFFFFFF);
+
+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
+ data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK
+ | UVD_CGC_CTRL__MMSCH_MODE_MASK);
+ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
+
+ /* turn on */
+ data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE);
+ data |= (UVD_SUVD_CGC_GATE__SRE_MASK
+ | UVD_SUVD_CGC_GATE__SIT_MASK
+ | UVD_SUVD_CGC_GATE__SMP_MASK
+ | UVD_SUVD_CGC_GATE__SCM_MASK
+ | UVD_SUVD_CGC_GATE__SDB_MASK
+ | UVD_SUVD_CGC_GATE__SRE_H264_MASK
+ | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_H264_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCM_H264_MASK
+ | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_H264_MASK
+ | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCLR_MASK
+ | UVD_SUVD_CGC_GATE__UVD_SC_MASK
+ | UVD_SUVD_CGC_GATE__ENT_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
+ | UVD_SUVD_CGC_GATE__SITE_MASK
+ | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
+ | UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
+ WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE, data);
+
+ data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL);
+ data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
| UVD_SUVD_CGC_CTRL__IME_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
- WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data);
- }
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data);
}
-static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev,
- uint8_t sram_sel, int inst_idx, uint8_t indirect)
+static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ uint8_t sram_sel, uint8_t indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t reg_data = 0;
/* enable sw clock gating control */
@@ -696,70 +907,101 @@ static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev,
/**
* vcn_v2_5_enable_clock_gating - enable VCN clock gating
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Enable clock gating for VCN block
*/
-static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev)
+static void vcn_v2_5_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
uint32_t data = 0;
- int i;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- /* enable UVD CGC */
- data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
- if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
- data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
- else
- data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
- data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
- data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
- WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
-
- data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
- data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
- | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
- | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
- | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
- | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
- | UVD_CGC_CTRL__SYS_MODE_MASK
- | UVD_CGC_CTRL__UDEC_MODE_MASK
- | UVD_CGC_CTRL__MPEG2_MODE_MASK
- | UVD_CGC_CTRL__REGS_MODE_MASK
- | UVD_CGC_CTRL__RBC_MODE_MASK
- | UVD_CGC_CTRL__LMI_MC_MODE_MASK
- | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
- | UVD_CGC_CTRL__IDCT_MODE_MASK
- | UVD_CGC_CTRL__MPRD_MODE_MASK
- | UVD_CGC_CTRL__MPC_MODE_MASK
- | UVD_CGC_CTRL__LBSI_MODE_MASK
- | UVD_CGC_CTRL__LRBBM_MODE_MASK
- | UVD_CGC_CTRL__WCB_MODE_MASK
- | UVD_CGC_CTRL__VCPU_MODE_MASK);
- WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
-
- data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL);
- data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
- | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
- | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
- | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
- WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data);
- }
+ if (adev->vcn.harvest_config & (1 << i))
+ return;
+ /* enable UVD CGC */
+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
+ data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK);
+ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL);
+ data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data);
+}
+
+static void vcn_v2_6_enable_ras(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t tmp;
+
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(2, 6, 0))
+ return;
+
+ tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx,
+ SOC15_DPG_MODE_OFFSET(VCN, 0, mmVCN_RAS_CNTL),
+ tmp, 0, indirect);
+
+ tmp = UVD_VCPU_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx,
+ SOC15_DPG_MODE_OFFSET(VCN, 0, mmUVD_VCPU_INT_EN),
+ tmp, 0, indirect);
+
+ tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx,
+ SOC15_DPG_MODE_OFFSET(VCN, 0, mmUVD_SYS_INT_EN),
+ tmp, 0, indirect);
}
-static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+static int vcn_v2_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect)
{
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr;
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
uint32_t rb_bufsz, tmp;
+ int ret;
/* disable register anti-hang mechanism */
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 1,
@@ -774,7 +1016,7 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
/* enable clock gating */
- vcn_v2_5_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
+ vcn_v2_5_clock_gating_dpg_mode(vinst, 0, indirect);
/* enable VCPU clock */
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
@@ -823,7 +1065,7 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
- vcn_v2_5_mc_resume_dpg_mode(adev, inst_idx, indirect);
+ vcn_v2_5_mc_resume_dpg_mode(vinst, indirect);
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
@@ -834,6 +1076,8 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_CTRL2), 0, 0, indirect);
+ vcn_v2_6_enable_ras(vinst, indirect);
+
/* unblock VCPU register access */
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_RB_ARB_CTRL), 0, 0, indirect);
@@ -848,10 +1092,13 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
VCN, 0, mmUVD_MASTINT_EN),
UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
- if (indirect)
- psp_update_vcn_sram(adev, inst_idx, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr,
- (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr -
- (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr));
+ if (indirect) {
+ ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, 0);
+ if (ret) {
+ dev_err(adev->dev, "vcn sram load failed %d\n", ret);
+ return ret;
+ }
+ }
ring = &adev->vcn.inst[inst_idx].ring_dec;
/* force RBC into idle state */
@@ -896,198 +1143,200 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, inst_idx, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v2_5_start(struct amdgpu_device *adev)
+static int vcn_v2_5_start(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ struct amdgpu_fw_shared *fw_shared =
+ adev->vcn.inst[i].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
uint32_t rb_bufsz, tmp;
- int i, j, k, r;
+ int j, k, r;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ amdgpu_dpm_enable_vcn(adev, true, i);
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- r = vcn_v2_5_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
- continue;
- }
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return vcn_v2_5_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram);
- /* disable register anti-hang mechanism */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS), 0,
- ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ /* disable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
- /* set uvd status busy */
- tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
- WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp);
- }
+ /* set uvd status busy */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp);
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
return 0;
- /*SW clock gating */
- vcn_v2_5_disable_clock_gating(adev);
+ /* SW clock gating */
+ vcn_v2_5_disable_clock_gating(vinst);
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- /* enable VCPU clock */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
-
- /* disable master interrupt */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
-
- /* setup mmUVD_LMI_CTRL */
- tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL);
- tmp &= ~0xff;
- WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp | 0x8|
- UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
- UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
- UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
- UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
-
- /* setup mmUVD_MPC_CNTL */
- tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL);
- tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
- tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
- WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp);
-
- /* setup UVD_MPC_SET_MUXA0 */
- WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0,
- ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
- (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
- (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
-
- /* setup UVD_MPC_SET_MUXB0 */
- WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0,
- ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
- (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
- (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
-
- /* setup mmUVD_MPC_SET_MUX */
- WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX,
- ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
- (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
- }
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
- vcn_v2_5_mc_resume(adev);
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr;
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- /* VCN global tiling registers */
- WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
- WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
+ /* setup mmUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL);
+ tmp &= ~0xff;
+ WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp | 0x8|
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ /* setup mmUVD_MPC_CNTL */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL);
+ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
+ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
+ WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp);
+
+ /* setup UVD_MPC_SET_MUXA0 */
+ WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUXB0 */
+ WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+
+ /* setup mmUVD_MPC_SET_MUX */
+ WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
+
+ vcn_v2_5_mc_resume(vinst);
- /* enable LMI MC and UMC channels */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0,
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
- /* unblock VCPU register access */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0,
- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
-
- for (k = 0; k < 10; ++k) {
- uint32_t status;
-
- for (j = 0; j < 100; ++j) {
- status = RREG32_SOC15(VCN, i, mmUVD_STATUS);
- if (status & 2)
- break;
- if (amdgpu_emu_mode == 1)
- msleep(500);
- else
- mdelay(10);
- }
- r = 0;
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (k = 0; k < 10; ++k) {
+ uint32_t status;
+
+ for (j = 0; j < 100; ++j) {
+ status = RREG32_SOC15(VCN, i, mmUVD_STATUS);
if (status & 2)
break;
+ if (amdgpu_emu_mode == 1)
+ msleep(500);
+ else
+ mdelay(10);
+ }
+ r = 0;
+ if (status & 2)
+ break;
- DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n");
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__BLK_RST_MASK,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
- mdelay(10);
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n");
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
- mdelay(10);
- r = -1;
- }
+ mdelay(10);
+ r = -1;
+ }
- if (r) {
- DRM_ERROR("VCN decode not responding, giving up!!!\n");
- return r;
- }
+ if (r) {
+ DRM_ERROR("VCN decode not responding, giving up!!!\n");
+ return r;
+ }
- /* enable master interrupt */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN),
- UVD_MASTINT_EN__VCPU_EN_MASK,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
- /* clear the busy bit of VCN_STATUS */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0,
- ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
- WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0);
+ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0);
- ring = &adev->vcn.inst[i].ring_dec;
- /* force RBC into idle state */
- rb_bufsz = order_base_2(ring->ring_size);
- tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
- WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp);
+ ring = &adev->vcn.inst[i].ring_dec;
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp);
- fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
- /* program the RB_BASE for ring buffer */
- WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
- lower_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
- upper_32_bits(ring->gpu_addr));
+ fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
+ /* program the RB_BASE for ring buffer */
+ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
- /* Initialize the ring buffer's read and write pointers */
- WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0);
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0);
- ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR);
- WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR,
- lower_32_bits(ring->wptr));
- fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
+ ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR);
+ WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR,
+ lower_32_bits(ring->wptr));
+ fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
- fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET;
- ring = &adev->vcn.inst[i].ring_enc[0];
- WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
- WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
- WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
- WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4);
- fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET;
-
- fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET;
- ring = &adev->vcn.inst[i].ring_enc[1];
- WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
- WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
- WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
- WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
- fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
- }
+ fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET;
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4);
+ fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET;
+
+ fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET;
+ ring = &adev->vcn.inst[i].ring_enc[1];
+ WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, i, mmUVD_STATUS);
return 0;
}
@@ -1178,7 +1427,7 @@ static int vcn_v2_5_sriov_start(struct amdgpu_device *adev)
SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS),
~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
- size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4);
/* mc resume*/
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
MMSCH_V1_0_INSERT_DIRECT_WT(
@@ -1288,8 +1537,10 @@ static int vcn_v2_5_sriov_start(struct amdgpu_device *adev)
return vcn_v2_5_mmsch_start(adev, &adev->virt.mm_table);
}
-static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+static int vcn_v2_5_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t tmp;
/* Wait for power status to be 1 */
@@ -1313,80 +1564,93 @@ static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0,
~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, inst_idx, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v2_5_stop(struct amdgpu_device *adev)
+static int vcn_v2_5_stop(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
uint32_t tmp;
- int i, r = 0;
+ int r;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- r = vcn_v2_5_stop_dpg_mode(adev, i);
- continue;
- }
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
- /* wait for vcn idle */
- r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
- if (r)
- return r;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ r = vcn_v2_5_stop_dpg_mode(vinst);
+ goto done;
+ }
- tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
- UVD_LMI_STATUS__READ_CLEAN_MASK |
- UVD_LMI_STATUS__WRITE_CLEAN_MASK |
- UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
- r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
- if (r)
- return r;
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
+ if (r)
+ goto done;
- /* block LMI UMC channel */
- tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2);
- tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
- WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
- tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
- UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
- r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
- if (r)
- return r;
+ /* block LMI UMC channel */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp);
- /* block VCPU register access */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL),
- UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
- /* reset VCPU */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__BLK_RST_MASK,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ /* block VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
- /* disable VCPU clock */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
- ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
- /* clear status */
- WREG32_SOC15(VCN, i, mmUVD_STATUS, 0);
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
- vcn_v2_5_enable_clock_gating(adev);
+ /* clear status */
+ WREG32_SOC15(VCN, i, mmUVD_STATUS, 0);
- /* enable register anti-hang mechanism */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS),
- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK,
- ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
- }
+ vcn_v2_5_enable_clock_gating(vinst);
+ /* enable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS),
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, i, mmUVD_STATUS);
+done:
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ amdgpu_dpm_enable_vcn(adev, false, i);
- return 0;
+ return r;
}
-static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state)
+static int vcn_v2_5_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
struct amdgpu_ring *ring;
uint32_t reg_data = 0;
int ret_code = 0;
@@ -1403,7 +1667,7 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
if (!ret_code) {
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr;
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
/* pause DPG */
reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
@@ -1485,7 +1749,7 @@ static uint64_t vcn_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell)
- return adev->wb.wb[ring->wptr_offs];
+ return *ring->wptr_cpu_addr;
else
return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR);
}
@@ -1502,7 +1766,7 @@ static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell) {
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
WREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
@@ -1512,7 +1776,7 @@ static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring)
static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_DEC,
.align_mask = 0xf,
- .vmhub = AMDGPU_MMHUB_1,
+ .secure_submission_supported = true,
.get_rptr = vcn_v2_5_dec_ring_get_rptr,
.get_wptr = vcn_v2_5_dec_ring_get_wptr,
.set_wptr = vcn_v2_5_dec_ring_set_wptr,
@@ -1532,41 +1796,12 @@ static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = {
.insert_start = vcn_v2_0_dec_ring_insert_start,
.insert_end = vcn_v2_0_dec_ring_insert_end,
.pad_ib = amdgpu_ring_generic_pad_ib,
- .begin_use = amdgpu_vcn_ring_begin_use,
- .end_use = amdgpu_vcn_ring_end_use,
- .emit_wreg = vcn_v2_0_dec_ring_emit_wreg,
- .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait,
- .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
-};
-
-static const struct amdgpu_ring_funcs vcn_v2_6_dec_ring_vm_funcs = {
- .type = AMDGPU_RING_TYPE_VCN_DEC,
- .align_mask = 0xf,
- .vmhub = AMDGPU_MMHUB_0,
- .get_rptr = vcn_v2_5_dec_ring_get_rptr,
- .get_wptr = vcn_v2_5_dec_ring_get_wptr,
- .set_wptr = vcn_v2_5_dec_ring_set_wptr,
- .emit_frame_size =
- SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
- SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
- 8 + /* vcn_v2_0_dec_ring_emit_vm_flush */
- 14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */
- 6,
- .emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */
- .emit_ib = vcn_v2_0_dec_ring_emit_ib,
- .emit_fence = vcn_v2_0_dec_ring_emit_fence,
- .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush,
- .test_ring = vcn_v2_0_dec_ring_test_ring,
- .test_ib = amdgpu_vcn_dec_ring_test_ib,
- .insert_nop = vcn_v2_0_dec_ring_insert_nop,
- .insert_start = vcn_v2_0_dec_ring_insert_start,
- .insert_end = vcn_v2_0_dec_ring_insert_end,
- .pad_ib = amdgpu_ring_generic_pad_ib,
- .begin_use = amdgpu_vcn_ring_begin_use,
- .end_use = amdgpu_vcn_ring_end_use,
+ .begin_use = vcn_v2_5_ring_begin_use,
+ .end_use = vcn_v2_5_ring_end_use,
.emit_wreg = vcn_v2_0_dec_ring_emit_wreg,
.emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = amdgpu_vcn_ring_reset,
};
/**
@@ -1599,12 +1834,12 @@ static uint64_t vcn_v2_5_enc_ring_get_wptr(struct amdgpu_ring *ring)
if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
if (ring->use_doorbell)
- return adev->wb.wb[ring->wptr_offs];
+ return *ring->wptr_cpu_addr;
else
return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR);
} else {
if (ring->use_doorbell)
- return adev->wb.wb[ring->wptr_offs];
+ return *ring->wptr_cpu_addr;
else
return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2);
}
@@ -1623,14 +1858,14 @@ static void vcn_v2_5_enc_ring_set_wptr(struct amdgpu_ring *ring)
if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
if (ring->use_doorbell) {
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
}
} else {
if (ring->use_doorbell) {
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
@@ -1642,7 +1877,6 @@ static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
.nop = VCN_ENC_CMD_NO_OP,
- .vmhub = AMDGPU_MMHUB_1,
.get_rptr = vcn_v2_5_enc_ring_get_rptr,
.get_wptr = vcn_v2_5_enc_ring_get_wptr,
.set_wptr = vcn_v2_5_enc_ring_set_wptr,
@@ -1661,41 +1895,12 @@ static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = {
.insert_nop = amdgpu_ring_insert_nop,
.insert_end = vcn_v2_0_enc_ring_insert_end,
.pad_ib = amdgpu_ring_generic_pad_ib,
- .begin_use = amdgpu_vcn_ring_begin_use,
- .end_use = amdgpu_vcn_ring_end_use,
+ .begin_use = vcn_v2_5_ring_begin_use,
+ .end_use = vcn_v2_5_ring_end_use,
.emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
.emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
-};
-
-static const struct amdgpu_ring_funcs vcn_v2_6_enc_ring_vm_funcs = {
- .type = AMDGPU_RING_TYPE_VCN_ENC,
- .align_mask = 0x3f,
- .nop = VCN_ENC_CMD_NO_OP,
- .vmhub = AMDGPU_MMHUB_0,
- .get_rptr = vcn_v2_5_enc_ring_get_rptr,
- .get_wptr = vcn_v2_5_enc_ring_get_wptr,
- .set_wptr = vcn_v2_5_enc_ring_set_wptr,
- .emit_frame_size =
- SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
- SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
- 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
- 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
- 1, /* vcn_v2_0_enc_ring_insert_end */
- .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
- .emit_ib = vcn_v2_0_enc_ring_emit_ib,
- .emit_fence = vcn_v2_0_enc_ring_emit_fence,
- .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
- .test_ring = amdgpu_vcn_enc_ring_test_ring,
- .test_ib = amdgpu_vcn_enc_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
- .insert_end = vcn_v2_0_enc_ring_insert_end,
- .pad_ib = amdgpu_ring_generic_pad_ib,
- .begin_use = amdgpu_vcn_ring_begin_use,
- .end_use = amdgpu_vcn_ring_end_use,
- .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
- .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
- .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = amdgpu_vcn_ring_reset,
};
static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev)
@@ -1705,12 +1910,8 @@ static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev)
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
continue;
- if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0))
- adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_5_dec_ring_vm_funcs;
- else /* CHIP_ALDEBARAN */
- adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_6_dec_ring_vm_funcs;
+ adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_5_dec_ring_vm_funcs;
adev->vcn.inst[i].ring_dec.me = i;
- DRM_INFO("VCN(%d) decode is enabled in VM mode\n", i);
}
}
@@ -1721,34 +1922,41 @@ static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev)
for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
if (adev->vcn.harvest_config & (1 << j))
continue;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
- if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0))
- adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs;
- else /* CHIP_ALDEBARAN */
- adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_6_enc_ring_vm_funcs;
+ for (i = 0; i < adev->vcn.inst[j].num_enc_rings; ++i) {
+ adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs;
adev->vcn.inst[j].ring_enc[i].me = j;
}
- DRM_INFO("VCN(%d) encode is enabled in VM mode\n", j);
}
}
-static bool vcn_v2_5_is_idle(void *handle)
+static int vcn_v2_5_reset(struct amdgpu_vcn_inst *vinst)
+{
+ int r;
+
+ r = vcn_v2_5_stop(vinst);
+ if (r)
+ return r;
+ return vcn_v2_5_start(vinst);
+}
+
+static bool vcn_v2_5_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 1;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
continue;
+
ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE);
}
return ret;
}
-static int vcn_v2_5_wait_for_idle(void *handle)
+static int vcn_v2_5_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 0;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
@@ -1763,45 +1971,50 @@ static int vcn_v2_5_wait_for_idle(void *handle)
return ret;
}
-static int vcn_v2_5_set_clockgating_state(void *handle,
+static int vcn_v2_5_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
+ int i;
if (amdgpu_sriov_vf(adev))
return 0;
- if (enable) {
- if (!vcn_v2_5_is_idle(handle))
- return -EBUSY;
- vcn_v2_5_enable_clock_gating(adev);
- } else {
- vcn_v2_5_disable_clock_gating(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ if (enable) {
+ if (!vcn_v2_5_is_idle(ip_block))
+ return -EBUSY;
+ vcn_v2_5_enable_clock_gating(vinst);
+ } else {
+ vcn_v2_5_disable_clock_gating(vinst);
+ }
}
return 0;
}
-static int vcn_v2_5_set_powergating_state(void *handle,
- enum amd_powergating_state state)
+static int vcn_v2_5_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = vinst->adev;
int ret;
if (amdgpu_sriov_vf(adev))
return 0;
- if(state == adev->vcn.cur_state)
+ if (state == vinst->cur_state)
return 0;
if (state == AMD_PG_STATE_GATE)
- ret = vcn_v2_5_stop(adev);
+ ret = vcn_v2_5_stop(vinst);
else
- ret = vcn_v2_5_start(adev);
+ ret = vcn_v2_5_start(vinst);
- if(!ret)
- adev->vcn.cur_state = state;
+ if (!ret)
+ vinst->cur_state = state;
return ret;
}
@@ -1814,6 +2027,14 @@ static int vcn_v2_5_set_interrupt_state(struct amdgpu_device *adev,
return 0;
}
+static int vcn_v2_6_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
static int vcn_v2_5_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -1858,6 +2079,11 @@ static const struct amdgpu_irq_src_funcs vcn_v2_5_irq_funcs = {
.process = vcn_v2_5_process_interrupt,
};
+static const struct amdgpu_irq_src_funcs vcn_v2_6_ras_irq_funcs = {
+ .set = vcn_v2_6_set_ras_interrupt_state,
+ .process = amdgpu_vcn_process_poison_irq,
+};
+
static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev)
{
int i;
@@ -1865,15 +2091,17 @@ static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev)
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
continue;
- adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst[i].irq.num_types = adev->vcn.inst[i].num_enc_rings + 1;
adev->vcn.inst[i].irq.funcs = &vcn_v2_5_irq_funcs;
+
+ adev->vcn.inst[i].ras_poison_irq.num_types = adev->vcn.inst[i].num_enc_rings + 1;
+ adev->vcn.inst[i].ras_poison_irq.funcs = &vcn_v2_6_ras_irq_funcs;
}
}
static const struct amd_ip_funcs vcn_v2_5_ip_funcs = {
.name = "vcn_v2_5",
.early_init = vcn_v2_5_early_init,
- .late_init = NULL,
.sw_init = vcn_v2_5_sw_init,
.sw_fini = vcn_v2_5_sw_fini,
.hw_init = vcn_v2_5_hw_init,
@@ -1882,18 +2110,15 @@ static const struct amd_ip_funcs vcn_v2_5_ip_funcs = {
.resume = vcn_v2_5_resume,
.is_idle = vcn_v2_5_is_idle,
.wait_for_idle = vcn_v2_5_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = vcn_v2_5_set_clockgating_state,
- .set_powergating_state = vcn_v2_5_set_powergating_state,
+ .set_powergating_state = vcn_set_powergating_state,
+ .dump_ip_state = amdgpu_vcn_dump_ip_state,
+ .print_ip_state = amdgpu_vcn_print_ip_state,
};
static const struct amd_ip_funcs vcn_v2_6_ip_funcs = {
.name = "vcn_v2_6",
.early_init = vcn_v2_5_early_init,
- .late_init = NULL,
.sw_init = vcn_v2_5_sw_init,
.sw_fini = vcn_v2_5_sw_fini,
.hw_init = vcn_v2_5_hw_init,
@@ -1902,12 +2127,10 @@ static const struct amd_ip_funcs vcn_v2_6_ip_funcs = {
.resume = vcn_v2_5_resume,
.is_idle = vcn_v2_5_is_idle,
.wait_for_idle = vcn_v2_5_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = vcn_v2_5_set_clockgating_state,
- .set_powergating_state = vcn_v2_5_set_powergating_state,
+ .set_powergating_state = vcn_set_powergating_state,
+ .dump_ip_state = amdgpu_vcn_dump_ip_state,
+ .print_ip_state = amdgpu_vcn_print_ip_state,
};
const struct amdgpu_ip_block_version vcn_v2_5_ip_block =
@@ -1927,3 +2150,59 @@ const struct amdgpu_ip_block_version vcn_v2_6_ip_block =
.rev = 0,
.funcs = &vcn_v2_6_ip_funcs,
};
+
+static uint32_t vcn_v2_6_query_poison_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, uint32_t sub_block)
+{
+ uint32_t poison_stat = 0, reg_value = 0;
+
+ switch (sub_block) {
+ case AMDGPU_VCN_V2_6_VCPU_VCODEC:
+ reg_value = RREG32_SOC15(VCN, instance, mmUVD_RAS_VCPU_VCODEC_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_VCPU_VCODEC_STATUS, POISONED_PF);
+ break;
+ default:
+ break;
+ }
+
+ if (poison_stat)
+ dev_info(adev->dev, "Poison detected in VCN%d, sub_block%d\n",
+ instance, sub_block);
+
+ return poison_stat;
+}
+
+static bool vcn_v2_6_query_poison_status(struct amdgpu_device *adev)
+{
+ uint32_t inst, sub;
+ uint32_t poison_stat = 0;
+
+ for (inst = 0; inst < adev->vcn.num_vcn_inst; inst++)
+ for (sub = 0; sub < AMDGPU_VCN_V2_6_MAX_SUB_BLOCK; sub++)
+ poison_stat +=
+ vcn_v2_6_query_poison_by_instance(adev, inst, sub);
+
+ return !!poison_stat;
+}
+
+const struct amdgpu_ras_block_hw_ops vcn_v2_6_ras_hw_ops = {
+ .query_poison_status = vcn_v2_6_query_poison_status,
+};
+
+static struct amdgpu_vcn_ras vcn_v2_6_ras = {
+ .ras_block = {
+ .hw_ops = &vcn_v2_6_ras_hw_ops,
+ .ras_late_init = amdgpu_vcn_ras_late_init,
+ },
+};
+
+static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
+ case IP_VERSION(2, 6, 0):
+ adev->vcn.ras = &vcn_v2_6_ras;
+ break;
+ default:
+ break;
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h
index e72f799ed0fd..1c19af74e4fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h
@@ -24,6 +24,12 @@
#ifndef __VCN_V2_5_H__
#define __VCN_V2_5_H__
+enum amdgpu_vcn_v2_6_sub_block {
+ AMDGPU_VCN_V2_6_VCPU_VCODEC = 0,
+
+ AMDGPU_VCN_V2_6_MAX_SUB_BLOCK,
+};
+
extern const struct amdgpu_ip_block_version vcn_v2_5_ip_block;
extern const struct amdgpu_ip_block_version vcn_v2_6_ip_block;
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index da11ceba0698..d9cf8f0feeb3 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -25,10 +25,12 @@
#include "amdgpu.h"
#include "amdgpu_vcn.h"
#include "amdgpu_pm.h"
+#include "amdgpu_cs.h"
#include "soc15.h"
#include "soc15d.h"
#include "vcn_v2_0.h"
#include "mmsch_v3_0.h"
+#include "vcn_sw_ring.h"
#include "vcn/vcn_3_0_0_offset.h"
#include "vcn/vcn_3_0_0_sh_mask.h"
@@ -36,6 +38,10 @@
#include <drm/drm_drv.h>
+#define VCN_VID_SOC_ADDRESS_2_0 0x1fa00
+#define VCN1_VID_SOC_ADDRESS_3_0 0x48200
+#define VCN1_AON_SOC_ADDRESS_3_0 0x48000
+
#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27
#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f
#define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET 0x10
@@ -55,6 +61,42 @@
#define RDECODE_MSG_CREATE 0x00000000
#define RDECODE_MESSAGE_CREATE 0x00000001
+static const struct amdgpu_hwip_reg_entry vcn_reg_list_3_0[] = {
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE)
+};
+
static int amdgpu_ih_clientid_vcns[] = {
SOC15_IH_CLIENTID_VCN,
SOC15_IH_CLIENTID_VCN1
@@ -64,29 +106,33 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev);
static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev);
static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v3_0_set_powergating_state(void *handle,
- enum amd_powergating_state state);
-static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state);
+static int vcn_v3_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static int vcn_v3_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
+static int vcn_v3_0_reset(struct amdgpu_vcn_inst *vinst);
static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring);
static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring);
/**
- * vcn_v3_0_early_init - set function pointers
+ * vcn_v3_0_early_init - set function pointers and load microcode
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
+ * Load microcode from filesystem
*/
-static int vcn_v3_0_early_init(void *handle)
+static int vcn_v3_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
if (amdgpu_sriov_vf(adev)) {
adev->vcn.num_vcn_inst = VCN_INSTANCES_SIENNA_CICHLID;
adev->vcn.harvest_config = 0;
- adev->vcn.num_enc_rings = 1;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ adev->vcn.inst[i].num_enc_rings = 1;
} else {
if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 |
@@ -94,42 +140,42 @@ static int vcn_v3_0_early_init(void *handle)
/* both instances are harvested, disable the block */
return -ENOENT;
- if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 0, 33))
- adev->vcn.num_enc_rings = 0;
- else
- adev->vcn.num_enc_rings = 2;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) ==
+ IP_VERSION(3, 0, 33))
+ adev->vcn.inst[i].num_enc_rings = 0;
+ else
+ adev->vcn.inst[i].num_enc_rings = 2;
+ }
}
vcn_v3_0_set_dec_ring_funcs(adev);
vcn_v3_0_set_enc_ring_funcs(adev);
vcn_v3_0_set_irq_funcs(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ adev->vcn.inst[i].set_pg_state = vcn_v3_0_set_pg_state;
+
+ r = amdgpu_vcn_early_init(adev, i);
+ if (r)
+ return r;
+ }
return 0;
}
/**
* vcn_v3_0_sw_init - sw init for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int vcn_v3_0_sw_init(void *handle)
+static int vcn_v3_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int i, j, r;
int vcn_doorbell_index = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- r = amdgpu_vcn_sw_init(adev);
- if (r)
- return r;
-
- amdgpu_vcn_setup_ucode(adev);
-
- r = amdgpu_vcn_resume(adev);
- if (r)
- return r;
+ struct amdgpu_device *adev = ip_block->adev;
/*
* Note: doorbell assignment is fixed for SRIOV multiple VCN engines
@@ -145,27 +191,37 @@ static int vcn_v3_0_sw_init(void *handle)
}
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- volatile struct amdgpu_fw_shared *fw_shared;
+ struct amdgpu_fw_shared *fw_shared;
if (adev->vcn.harvest_config & (1 << i))
continue;
- adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
- adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
- adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
- adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
- adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
- adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
+ r = amdgpu_vcn_sw_init(adev, i);
+ if (r)
+ return r;
+
+ amdgpu_vcn_setup_ucode(adev, i);
+
+ r = amdgpu_vcn_resume(adev, i);
+ if (r)
+ return r;
+
+ adev->vcn.inst[i].internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
- adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
adev->vcn.inst[i].external.scratch9 = SOC15_REG_OFFSET(VCN, i, mmUVD_SCRATCH9);
- adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
adev->vcn.inst[i].external.data0 = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_DATA0);
- adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
adev->vcn.inst[i].external.data1 = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_DATA1);
- adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
adev->vcn.inst[i].external.cmd = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_CMD);
- adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
adev->vcn.inst[i].external.nop = SOC15_REG_OFFSET(VCN, i, mmUVD_NO_OP);
/* VCN DEC TRAP */
@@ -179,10 +235,11 @@ static int vcn_v3_0_sw_init(void *handle)
ring = &adev->vcn.inst[i].ring_dec;
ring->use_doorbell = true;
if (amdgpu_sriov_vf(adev)) {
- ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1);
+ ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.inst[i].num_enc_rings + 1);
} else {
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i;
}
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "vcn_dec_%d", i);
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
AMDGPU_RING_PRIO_DEFAULT,
@@ -190,7 +247,7 @@ static int vcn_v3_0_sw_init(void *handle)
if (r)
return r;
- for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) {
enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(j);
/* VCN ENC TRAP */
@@ -202,10 +259,11 @@ static int vcn_v3_0_sw_init(void *handle)
ring = &adev->vcn.inst[i].ring_enc[j];
ring->use_doorbell = true;
if (amdgpu_sriov_vf(adev)) {
- ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1) + 1 + j;
+ ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.inst[i].num_enc_rings + 1) + 1 + j;
} else {
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i;
}
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "vcn_enc_%d.%d", i, j);
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
hw_prio, &adev->vcn.inst[i].sched_score);
@@ -213,20 +271,44 @@ static int vcn_v3_0_sw_init(void *handle)
return r;
}
- fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr;
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SW_RING_FLAG) |
cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG) |
cpu_to_le32(AMDGPU_VCN_FW_SHARED_FLAG_0_RB);
fw_shared->sw_ring.is_enabled = cpu_to_le32(DEC_SW_RING_ENABLED);
+ fw_shared->present_flag_0 |= AMDGPU_VCN_SMU_VERSION_INFO_FLAG;
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(3, 1, 2))
+ fw_shared->smu_interface_info.smu_interface_type = 2;
+ else if (amdgpu_ip_version(adev, UVD_HWIP, 0) ==
+ IP_VERSION(3, 1, 1))
+ fw_shared->smu_interface_info.smu_interface_type = 1;
+
+ if (amdgpu_vcnfw_log)
+ amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.inst[i].pause_dpg_mode = vcn_v3_0_pause_dpg_mode;
+ adev->vcn.inst[i].reset = vcn_v3_0_reset;
}
+ adev->vcn.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+ if (!amdgpu_sriov_vf(adev))
+ adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+
if (amdgpu_sriov_vf(adev)) {
r = amdgpu_virt_alloc_mm_table(adev);
if (r)
return r;
}
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
- adev->vcn.pause_dpg_mode = vcn_v3_0_pause_dpg_mode;
+
+ r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_3_0, ARRAY_SIZE(vcn_reg_list_3_0));
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
return 0;
}
@@ -234,22 +316,22 @@ static int vcn_v3_0_sw_init(void *handle)
/**
* vcn_v3_0_sw_fini - sw fini for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* VCN suspend and free up sw allocation
*/
-static int vcn_v3_0_sw_fini(void *handle)
+static int vcn_v3_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, r, idx;
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- volatile struct amdgpu_fw_shared *fw_shared;
+ struct amdgpu_fw_shared *fw_shared;
if (adev->vcn.harvest_config & (1 << i))
continue;
- fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr;
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
fw_shared->present_flag_0 = 0;
fw_shared->sw_ring.is_enabled = false;
}
@@ -260,32 +342,36 @@ static int vcn_v3_0_sw_fini(void *handle)
if (amdgpu_sriov_vf(adev))
amdgpu_virt_free_mm_table(adev);
- r = amdgpu_vcn_suspend(adev);
- if (r)
- return r;
+ amdgpu_vcn_sysfs_reset_mask_fini(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(adev, i);
+ if (r)
+ return r;
- r = amdgpu_vcn_sw_fini(adev);
+ amdgpu_vcn_sw_fini(adev, i);
+ }
- return r;
+ return 0;
}
/**
* vcn_v3_0_hw_init - start and test VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int vcn_v3_0_hw_init(void *handle)
+static int vcn_v3_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int i, j, r;
if (amdgpu_sriov_vf(adev)) {
r = vcn_v3_0_start_sriov(adev);
if (r)
- goto done;
+ return r;
/* initialize VCN dec and enc ring buffers */
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
@@ -295,6 +381,7 @@ static int vcn_v3_0_hw_init(void *handle)
ring = &adev->vcn.inst[i].ring_dec;
if (amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, i)) {
ring->sched.ready = false;
+ ring->no_scheduler = true;
dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name);
} else {
ring->wptr = 0;
@@ -303,10 +390,11 @@ static int vcn_v3_0_hw_init(void *handle)
ring->sched.ready = true;
}
- for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) {
ring = &adev->vcn.inst[i].ring_enc[j];
if (amdgpu_vcn_is_disabled_vcn(adev, VCN_ENCODE_RING, i)) {
ring->sched.ready = false;
+ ring->no_scheduler = true;
dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name);
} else {
ring->wptr = 0;
@@ -328,48 +416,45 @@ static int vcn_v3_0_hw_init(void *handle)
r = amdgpu_ring_test_helper(ring);
if (r)
- goto done;
+ return r;
- for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) {
ring = &adev->vcn.inst[i].ring_enc[j];
r = amdgpu_ring_test_helper(ring);
if (r)
- goto done;
+ return r;
}
}
}
-done:
- if (!r)
- DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
-
- return r;
+ return 0;
}
/**
* vcn_v3_0_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the VCN block, mark ring as not ready any more
*/
-static int vcn_v3_0_hw_fini(void *handle)
+static int vcn_v3_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
-
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
if (adev->vcn.harvest_config & (1 << i))
continue;
+ cancel_delayed_work_sync(&vinst->idle_work);
+
if (!amdgpu_sriov_vf(adev)) {
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
- (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
- RREG32_SOC15(VCN, i, mmUVD_STATUS))) {
- vcn_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ (vinst->cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, i, mmUVD_STATUS))) {
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
}
}
}
@@ -380,41 +465,47 @@ static int vcn_v3_0_hw_fini(void *handle)
/**
* vcn_v3_0_suspend - suspend VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend VCN block
*/
-static int vcn_v3_0_suspend(void *handle)
+static int vcn_v3_0_suspend(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
- r = vcn_v3_0_hw_fini(adev);
+ r = vcn_v3_0_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_vcn_suspend(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(ip_block->adev, i);
+ if (r)
+ return r;
+ }
- return r;
+ return 0;
}
/**
* vcn_v3_0_resume - resume VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init VCN block
*/
-static int vcn_v3_0_resume(void *handle)
+static int vcn_v3_0_resume(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
- r = amdgpu_vcn_resume(adev);
- if (r)
- return r;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_resume(ip_block->adev, i);
+ if (r)
+ return r;
+ }
- r = vcn_v3_0_hw_init(adev);
+ r = vcn_v3_0_hw_init(ip_block);
return r;
}
@@ -422,14 +513,15 @@ static int vcn_v3_0_resume(void *handle)
/**
* vcn_v3_0_mc_resume - memory controller programming
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: VCN instance
*
* Let the VCN memory controller know it's offsets
*/
-static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst)
+static void vcn_v3_0_mc_resume(struct amdgpu_vcn_inst *vinst)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[inst].fw->size + 4);
uint32_t offset;
/* cache window 0: fw */
@@ -469,17 +561,20 @@ static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst)
/* non-cache window */
WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
- lower_32_bits(adev->vcn.inst[inst].fw_shared_gpu_addr));
+ lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
- upper_32_bits(adev->vcn.inst[inst].fw_shared_gpu_addr));
+ upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
WREG32_SOC15(VCN, inst, mmUVD_VCPU_NONCACHE_OFFSET0, 0);
WREG32_SOC15(VCN, inst, mmUVD_VCPU_NONCACHE_SIZE0,
AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
}
-static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[inst_idx].fw->size + 4);
uint32_t offset;
/* cache window 0: fw */
@@ -558,10 +653,10 @@ static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx
/* non-cache window */
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
- lower_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect);
+ lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
- upper_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect);
+ upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, inst_idx, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
@@ -569,12 +664,14 @@ static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx
AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect);
/* VCN global tiling registers */
- WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
- UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ UVD, inst_idx, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
}
-static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v3_0_disable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t data = 0;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
@@ -624,8 +721,10 @@ static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int
WREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS, data);
}
-static void vcn_v3_0_enable_static_power_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v3_0_enable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t data;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
@@ -672,13 +771,14 @@ static void vcn_v3_0_enable_static_power_gating(struct amdgpu_device *adev, int
/**
* vcn_v3_0_disable_clock_gating - disable VCN clock gating
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: Pointer to the VCN instance structure
*
* Disable clock gating for VCN block
*/
-static void vcn_v3_0_disable_clock_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v3_0_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t data;
/* VCN disable CGC */
@@ -805,9 +905,12 @@ static void vcn_v3_0_disable_clock_gating(struct amdgpu_device *adev, int inst)
WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL, data);
}
-static void vcn_v3_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
- uint8_t sram_sel, int inst_idx, uint8_t indirect)
+static void vcn_v3_0_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ uint8_t sram_sel,
+ uint8_t indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t reg_data = 0;
/* enable sw clock gating control */
@@ -856,13 +959,14 @@ static void vcn_v3_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
/**
* vcn_v3_0_enable_clock_gating - enable VCN clock gating
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: Pointer to the VCN instance structure
*
* Enable clock gating for VCN block
*/
-static void vcn_v3_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v3_0_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t data;
/* enable VCN CGC */
@@ -921,11 +1025,14 @@ static void vcn_v3_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL, data);
}
-static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+static int vcn_v3_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect)
{
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr;
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
uint32_t rb_bufsz, tmp;
+ int ret;
/* disable register anti-hang mechanism */
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 1,
@@ -940,7 +1047,7 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
/* enable clock gating */
- vcn_v3_0_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
+ vcn_v3_0_clock_gating_dpg_mode(vinst, 0, indirect);
/* enable VCPU clock */
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
@@ -989,7 +1096,7 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
- vcn_v3_0_mc_resume_dpg_mode(adev, inst_idx, indirect);
+ vcn_v3_0_mc_resume_dpg_mode(vinst, indirect);
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, inst_idx, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
@@ -1018,10 +1125,13 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect);
- if (indirect)
- psp_update_vcn_sram(adev, inst_idx, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr,
- (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr -
- (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr));
+ if (indirect) {
+ ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, 0);
+ if (ret) {
+ dev_err(adev->dev, "vcn sram load failed %d\n", ret);
+ return ret;
+ }
+ }
ring = &adev->vcn.inst[inst_idx].ring_dec;
/* force RBC into idle state */
@@ -1072,194 +1182,203 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, inst_idx, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v3_0_start(struct amdgpu_device *adev)
+static int vcn_v3_0_start(struct amdgpu_vcn_inst *vinst)
{
- volatile struct amdgpu_fw_shared *fw_shared;
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ struct amdgpu_fw_shared *fw_shared;
struct amdgpu_ring *ring;
uint32_t rb_bufsz, tmp;
- int i, j, k, r;
+ int j, k, r;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ amdgpu_dpm_enable_vcn(adev, true, i);
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return vcn_v3_0_start_dpg_mode(vinst, vinst->indirect_sram);
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG){
- r = vcn_v3_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
- continue;
- }
+ /* disable VCN power gating */
+ vcn_v3_0_disable_static_power_gating(vinst);
+
+ /* set VCN status busy */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp);
- /* disable VCN power gating */
- vcn_v3_0_disable_static_power_gating(adev, i);
+ /* SW clock gating */
+ vcn_v3_0_disable_clock_gating(vinst);
- /* set VCN status busy */
- tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
- WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp);
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
- /*SW clock gating */
- vcn_v3_0_disable_clock_gating(adev, i);
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
- /* enable VCPU clock */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
-
- /* disable master interrupt */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
-
- /* enable LMI MC and UMC channels */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0,
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
-
- tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
- tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
- tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
- WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
-
- /* setup mmUVD_LMI_CTRL */
- tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL);
- WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp |
- UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
- UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
- UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
- UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
-
- /* setup mmUVD_MPC_CNTL */
- tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL);
- tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
- tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
- WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp);
-
- /* setup UVD_MPC_SET_MUXA0 */
- WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0,
- ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
- (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
- (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
-
- /* setup UVD_MPC_SET_MUXB0 */
- WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0,
- ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
- (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
- (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
-
- /* setup mmUVD_MPC_SET_MUX */
- WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX,
- ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
- (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
-
- vcn_v3_0_mc_resume(adev, i);
-
- /* VCN global tiling registers */
- WREG32_SOC15(VCN, i, mmUVD_GFX10_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
-
- /* unblock VCPU register access */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0,
- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
-
- /* release VCPU reset to boot */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
- for (j = 0; j < 10; ++j) {
- uint32_t status;
+ tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
- for (k = 0; k < 100; ++k) {
- status = RREG32_SOC15(VCN, i, mmUVD_STATUS);
- if (status & 2)
- break;
- mdelay(10);
- }
- r = 0;
- if (status & 2)
- break;
+ /* setup mmUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL);
+ WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ /* setup mmUVD_MPC_CNTL */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL);
+ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
+ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
+ WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp);
+
+ /* setup UVD_MPC_SET_MUXA0 */
+ WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUXB0 */
+ WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+
+ /* setup mmUVD_MPC_SET_MUX */
+ WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
+
+ vcn_v3_0_mc_resume(vinst);
- DRM_ERROR("VCN[%d] decode not responding, trying to reset the VCPU!!!\n", i);
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__BLK_RST_MASK,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
- mdelay(10);
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, i, mmUVD_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (j = 0; j < 10; ++j) {
+ uint32_t status;
+
+ for (k = 0; k < 100; ++k) {
+ status = RREG32_SOC15(VCN, i, mmUVD_STATUS);
+ if (status & 2)
+ break;
mdelay(10);
- r = -1;
}
+ r = 0;
+ if (status & 2)
+ break;
- if (r) {
- DRM_ERROR("VCN[%d] decode not responding, giving up!!!\n", i);
- return r;
- }
+ DRM_ERROR("VCN[%d] decode not responding, trying to reset the VCPU!!!\n", i);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
- /* enable master interrupt */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN),
- UVD_MASTINT_EN__VCPU_EN_MASK,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
+ mdelay(10);
+ r = -1;
+ }
- /* clear the busy bit of VCN_STATUS */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0,
- ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+ if (r) {
+ DRM_ERROR("VCN[%d] decode not responding, giving up!!!\n", i);
+ return r;
+ }
- WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0);
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
- ring = &adev->vcn.inst[i].ring_dec;
- /* force RBC into idle state */
- rb_bufsz = order_base_2(ring->ring_size);
- tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
- WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp);
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
- fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr;
- fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
+ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0);
- /* programm the RB_BASE for ring buffer */
- WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
- lower_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
- upper_32_bits(ring->gpu_addr));
+ ring = &adev->vcn.inst[i].ring_dec;
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp);
- /* Initialize the ring buffer's read and write pointers */
- WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0);
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
- WREG32_SOC15(VCN, i, mmUVD_SCRATCH2, 0);
- ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR);
- WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR,
- lower_32_bits(ring->wptr));
- fw_shared->rb.wptr = lower_32_bits(ring->wptr);
- fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
-
- if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(3, 0, 33)) {
- fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
- ring = &adev->vcn.inst[i].ring_enc[0];
- WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
- WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
- WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
- WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4);
- fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
-
- fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
- ring = &adev->vcn.inst[i].ring_enc[1];
- WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
- WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
- WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
- WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
- fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
- }
+ /* programm the RB_BASE for ring buffer */
+ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0);
+
+ WREG32_SOC15(VCN, i, mmUVD_SCRATCH2, 0);
+ ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR);
+ WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR,
+ lower_32_bits(ring->wptr));
+ fw_shared->rb.wptr = lower_32_bits(ring->wptr);
+ fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
+
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) !=
+ IP_VERSION(3, 0, 33)) {
+ fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4);
+ fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
+
+ fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
+ ring = &adev->vcn.inst[i].ring_enc[1];
+ WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
}
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, i, mmUVD_STATUS);
+
return 0;
}
@@ -1295,7 +1414,7 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev)
header.version = MMSCH_VERSION;
header.total_size = sizeof(struct mmsch_v3_0_init_header) >> 2;
- for (i = 0; i < AMDGPU_MAX_VCN_INSTANCES; i++) {
+ for (i = 0; i < MMSCH_V3_0_VCN_INSTANCES; i++) {
header.inst[i].init_status = 0;
header.inst[i].table_offset = 0;
header.inst[i].table_size = 0;
@@ -1313,7 +1432,7 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev)
mmUVD_STATUS),
~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
- cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
@@ -1372,7 +1491,7 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev)
mmUVD_VCPU_CACHE_SIZE2),
AMDGPU_VCN_CONTEXT_SIZE);
- for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) {
ring = &adev->vcn.inst[i].ring_enc[j];
ring->wptr = 0;
rb_addr = ring->gpu_addr;
@@ -1472,10 +1591,15 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev)
return 0;
}
-static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+static int vcn_v3_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE};
uint32_t tmp;
+ vcn_v3_0_pause_dpg_mode(vinst, &state);
+
/* Wait for power status to be 1 */
SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
@@ -1497,88 +1621,101 @@ static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0,
~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, inst_idx, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v3_0_stop(struct amdgpu_device *adev)
+static int vcn_v3_0_stop(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
uint32_t tmp;
- int i, r = 0;
+ int r = 0;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- r = vcn_v3_0_stop_dpg_mode(adev, i);
- continue;
- }
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ r = vcn_v3_0_stop_dpg_mode(vinst);
+ goto done;
+ }
- /* wait for vcn idle */
- r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
- if (r)
- return r;
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
+ if (r)
+ goto done;
- tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
- UVD_LMI_STATUS__READ_CLEAN_MASK |
- UVD_LMI_STATUS__WRITE_CLEAN_MASK |
- UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
- r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
- if (r)
- return r;
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
+
+ /* disable LMI UMC channel */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
- /* disable LMI UMC channel */
- tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2);
- tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
- WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp);
- tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
- UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
- r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
- if (r)
- return r;
+ /* block VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
- /* block VCPU register access */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL),
- UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
- /* reset VCPU */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__BLK_RST_MASK,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
- /* disable VCPU clock */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
- ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+ /* apply soft reset */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
- /* apply soft reset */
- tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
- tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
- WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
- tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
- WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
+ /* clear status */
+ WREG32_SOC15(VCN, i, mmUVD_STATUS, 0);
- /* clear status */
- WREG32_SOC15(VCN, i, mmUVD_STATUS, 0);
+ /* apply HW clock gating */
+ vcn_v3_0_enable_clock_gating(vinst);
- /* apply HW clock gating */
- vcn_v3_0_enable_clock_gating(adev, i);
+ /* enable VCN power gating */
+ vcn_v3_0_enable_static_power_gating(vinst);
- /* enable VCN power gating */
- vcn_v3_0_enable_static_power_gating(adev, i);
- }
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, i, mmUVD_STATUS);
+done:
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ amdgpu_dpm_enable_vcn(adev, false, i);
- return 0;
+ return r;
}
-static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state)
+static int vcn_v3_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
{
- volatile struct amdgpu_fw_shared *fw_shared;
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ struct amdgpu_fw_shared *fw_shared;
struct amdgpu_ring *ring;
uint32_t reg_data = 0;
int ret_code;
@@ -1609,9 +1746,10 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
- if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(3, 0, 33)) {
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) !=
+ IP_VERSION(3, 0, 33)) {
/* Restore */
- fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr;
+ fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
ring = &adev->vcn.inst[inst_idx].ring_enc[0];
ring->wptr = 0;
@@ -1681,7 +1819,7 @@ static uint64_t vcn_v3_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell)
- return adev->wb.wb[ring->wptr_offs];
+ return *ring->wptr_cpu_addr;
else
return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR);
}
@@ -1696,131 +1834,77 @@ static uint64_t vcn_v3_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- volatile struct amdgpu_fw_shared *fw_shared;
+ struct amdgpu_fw_shared *fw_shared;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
/*whenever update RBC_RB_WPTR, we save the wptr in shared rb.wptr and scratch2 */
- fw_shared = adev->vcn.inst[ring->me].fw_shared_cpu_addr;
+ fw_shared = adev->vcn.inst[ring->me].fw_shared.cpu_addr;
fw_shared->rb.wptr = lower_32_bits(ring->wptr);
WREG32_SOC15(VCN, ring->me, mmUVD_SCRATCH2,
lower_32_bits(ring->wptr));
}
if (ring->use_doorbell) {
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
WREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
}
}
-static void vcn_v3_0_dec_sw_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
- u64 seq, uint32_t flags)
-{
- WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
-
- amdgpu_ring_write(ring, VCN_DEC_SW_CMD_FENCE);
- amdgpu_ring_write(ring, addr);
- amdgpu_ring_write(ring, upper_32_bits(addr));
- amdgpu_ring_write(ring, seq);
- amdgpu_ring_write(ring, VCN_DEC_SW_CMD_TRAP);
-}
-
-static void vcn_v3_0_dec_sw_ring_insert_end(struct amdgpu_ring *ring)
-{
- amdgpu_ring_write(ring, VCN_DEC_SW_CMD_END);
-}
-
-static void vcn_v3_0_dec_sw_ring_emit_ib(struct amdgpu_ring *ring,
- struct amdgpu_job *job,
- struct amdgpu_ib *ib,
- uint32_t flags)
-{
- uint32_t vmid = AMDGPU_JOB_GET_VMID(job);
-
- amdgpu_ring_write(ring, VCN_DEC_SW_CMD_IB);
- amdgpu_ring_write(ring, vmid);
- amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
- amdgpu_ring_write(ring, ib->length_dw);
-}
-
-static void vcn_v3_0_dec_sw_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
- uint32_t val, uint32_t mask)
-{
- amdgpu_ring_write(ring, VCN_DEC_SW_CMD_REG_WAIT);
- amdgpu_ring_write(ring, reg << 2);
- amdgpu_ring_write(ring, mask);
- amdgpu_ring_write(ring, val);
-}
-
-static void vcn_v3_0_dec_sw_ring_emit_vm_flush(struct amdgpu_ring *ring,
- uint32_t vmid, uint64_t pd_addr)
-{
- struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
- uint32_t data0, data1, mask;
-
- pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
-
- /* wait for register write */
- data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance;
- data1 = lower_32_bits(pd_addr);
- mask = 0xffffffff;
- vcn_v3_0_dec_sw_ring_emit_reg_wait(ring, data0, data1, mask);
-}
-
-static void vcn_v3_0_dec_sw_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
-{
- amdgpu_ring_write(ring, VCN_DEC_SW_CMD_REG_WRITE);
- amdgpu_ring_write(ring, reg << 2);
- amdgpu_ring_write(ring, val);
-}
-
static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_DEC,
.align_mask = 0x3f,
.nop = VCN_DEC_SW_CMD_NO_OP,
- .vmhub = AMDGPU_MMHUB_0,
+ .secure_submission_supported = true,
.get_rptr = vcn_v3_0_dec_ring_get_rptr,
.get_wptr = vcn_v3_0_dec_ring_get_wptr,
.set_wptr = vcn_v3_0_dec_ring_set_wptr,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
- 4 + /* vcn_v3_0_dec_sw_ring_emit_vm_flush */
- 5 + 5 + /* vcn_v3_0_dec_sw_ring_emit_fdec_swe x2 vm fdec_swe */
- 1, /* vcn_v3_0_dec_sw_ring_insert_end */
- .emit_ib_size = 5, /* vcn_v3_0_dec_sw_ring_emit_ib */
- .emit_ib = vcn_v3_0_dec_sw_ring_emit_ib,
- .emit_fence = vcn_v3_0_dec_sw_ring_emit_fence,
- .emit_vm_flush = vcn_v3_0_dec_sw_ring_emit_vm_flush,
+ VCN_SW_RING_EMIT_FRAME_SIZE,
+ .emit_ib_size = 5, /* vcn_dec_sw_ring_emit_ib */
+ .emit_ib = vcn_dec_sw_ring_emit_ib,
+ .emit_fence = vcn_dec_sw_ring_emit_fence,
+ .emit_vm_flush = vcn_dec_sw_ring_emit_vm_flush,
.test_ring = amdgpu_vcn_dec_sw_ring_test_ring,
.test_ib = NULL,//amdgpu_vcn_dec_sw_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
- .insert_end = vcn_v3_0_dec_sw_ring_insert_end,
+ .insert_end = vcn_dec_sw_ring_insert_end,
.pad_ib = amdgpu_ring_generic_pad_ib,
.begin_use = amdgpu_vcn_ring_begin_use,
.end_use = amdgpu_vcn_ring_end_use,
- .emit_wreg = vcn_v3_0_dec_sw_ring_emit_wreg,
- .emit_reg_wait = vcn_v3_0_dec_sw_ring_emit_reg_wait,
+ .emit_wreg = vcn_dec_sw_ring_emit_wreg,
+ .emit_reg_wait = vcn_dec_sw_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
-static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p)
+static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job)
{
struct drm_gpu_scheduler **scheds;
+ struct dma_fence *fence;
- /* The create msg must be in the first IB submitted */
- if (atomic_read(&p->entity->fence_seq))
+ /* if VCN0 is harvested, we can't support AV1 */
+ if (p->adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0)
return -EINVAL;
+ /* wait for all jobs to finish before switching to instance 0 */
+ fence = amdgpu_ctx_get_fence(p->ctx, job->base.entity, ~0ull);
+ if (fence) {
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+ }
+
scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC]
[AMDGPU_RING_PRIO_DEFAULT].sched;
- drm_sched_entity_modify_sched(p->entity, scheds, 1);
+ drm_sched_entity_modify_sched(job->base.entity, scheds, 1);
return 0;
}
-static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr)
+static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
+ uint64_t addr)
{
struct ttm_operation_ctx ctx = { false, false };
struct amdgpu_bo_va_mapping *map;
@@ -1828,7 +1912,7 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr)
struct amdgpu_bo *bo;
uint64_t start, end;
unsigned int i;
- void * ptr;
+ void *ptr;
int r;
addr &= AMDGPU_GMC_HOLE_MASK;
@@ -1891,7 +1975,7 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr)
if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
continue;
- r = vcn_v3_0_limit_sched(p);
+ r = vcn_v3_0_limit_sched(p, job);
if (r)
goto out;
}
@@ -1902,10 +1986,10 @@ out:
}
static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
- uint32_t ib_idx)
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
{
- struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
- struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
+ struct amdgpu_ring *ring = amdgpu_job_ring(job);
uint32_t msg_lo = 0, msg_hi = 0;
unsigned i;
int r;
@@ -1915,16 +1999,17 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
return 0;
for (i = 0; i < ib->length_dw; i += 2) {
- uint32_t reg = amdgpu_get_ib_value(p, ib_idx, i);
- uint32_t val = amdgpu_get_ib_value(p, ib_idx, i + 1);
+ uint32_t reg = amdgpu_ib_get_value(ib, i);
+ uint32_t val = amdgpu_ib_get_value(ib, i + 1);
- if (reg == PACKET0(p->adev->vcn.internal.data0, 0)) {
+ if (reg == PACKET0(p->adev->vcn.inst[ring->me].internal.data0, 0)) {
msg_lo = val;
- } else if (reg == PACKET0(p->adev->vcn.internal.data1, 0)) {
+ } else if (reg == PACKET0(p->adev->vcn.inst[ring->me].internal.data1, 0)) {
msg_hi = val;
- } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0) &&
+ } else if (reg == PACKET0(p->adev->vcn.inst[ring->me].internal.cmd, 0) &&
val == 0) {
- r = vcn_v3_0_dec_msg(p, ((u64)msg_hi) << 32 | msg_lo);
+ r = vcn_v3_0_dec_msg(p, job,
+ ((u64)msg_hi) << 32 | msg_lo);
if (r)
return r;
}
@@ -1935,7 +2020,7 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_DEC,
.align_mask = 0xf,
- .vmhub = AMDGPU_MMHUB_0,
+ .secure_submission_supported = true,
.get_rptr = vcn_v3_0_dec_ring_get_rptr,
.get_wptr = vcn_v3_0_dec_ring_get_wptr,
.set_wptr = vcn_v3_0_dec_ring_set_wptr,
@@ -1961,6 +2046,7 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = {
.emit_wreg = vcn_v2_0_dec_ring_emit_wreg,
.emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = amdgpu_vcn_ring_reset,
};
/**
@@ -1993,12 +2079,12 @@ static uint64_t vcn_v3_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
if (ring->use_doorbell)
- return adev->wb.wb[ring->wptr_offs];
+ return *ring->wptr_cpu_addr;
else
return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR);
} else {
if (ring->use_doorbell)
- return adev->wb.wb[ring->wptr_offs];
+ return *ring->wptr_cpu_addr;
else
return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2);
}
@@ -2017,14 +2103,14 @@ static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
if (ring->use_doorbell) {
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
}
} else {
if (ring->use_doorbell) {
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
@@ -2036,7 +2122,6 @@ static const struct amdgpu_ring_funcs vcn_v3_0_enc_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
.nop = VCN_ENC_CMD_NO_OP,
- .vmhub = AMDGPU_MMHUB_0,
.get_rptr = vcn_v3_0_enc_ring_get_rptr,
.get_wptr = vcn_v3_0_enc_ring_get_wptr,
.set_wptr = vcn_v3_0_enc_ring_set_wptr,
@@ -2060,6 +2145,7 @@ static const struct amdgpu_ring_funcs vcn_v3_0_enc_ring_vm_funcs = {
.emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
.emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = amdgpu_vcn_ring_reset,
};
static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev)
@@ -2075,8 +2161,6 @@ static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev)
else
adev->vcn.inst[i].ring_dec.funcs = &vcn_v3_0_dec_sw_ring_vm_funcs;
adev->vcn.inst[i].ring_dec.me = i;
- DRM_INFO("VCN(%d) decode%s is enabled in VM mode\n", i,
- DEC_SW_RING_ENABLED?"(Software Ring)":"");
}
}
@@ -2088,18 +2172,28 @@ static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev)
if (adev->vcn.harvest_config & (1 << i))
continue;
- for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) {
adev->vcn.inst[i].ring_enc[j].funcs = &vcn_v3_0_enc_ring_vm_funcs;
adev->vcn.inst[i].ring_enc[j].me = i;
}
- if (adev->vcn.num_enc_rings > 0)
- DRM_INFO("VCN(%d) encode is enabled in VM mode\n", i);
}
}
-static bool vcn_v3_0_is_idle(void *handle)
+static int vcn_v3_0_reset(struct amdgpu_vcn_inst *vinst)
+{
+ int r;
+
+ r = vcn_v3_0_stop(vinst);
+ if (r)
+ return r;
+ vcn_v3_0_enable_clock_gating(vinst);
+ vcn_v3_0_enable_static_power_gating(vinst);
+ return vcn_v3_0_start(vinst);
+}
+
+static bool vcn_v3_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 1;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
@@ -2112,9 +2206,9 @@ static bool vcn_v3_0_is_idle(void *handle)
return ret;
}
-static int vcn_v3_0_wait_for_idle(void *handle)
+static int vcn_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 0;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
@@ -2130,54 +2224,55 @@ static int vcn_v3_0_wait_for_idle(void *handle)
return ret;
}
-static int vcn_v3_0_set_clockgating_state(void *handle,
+static int vcn_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
int i;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
if (adev->vcn.harvest_config & (1 << i))
continue;
if (enable) {
if (RREG32_SOC15(VCN, i, mmUVD_STATUS) != UVD_STATUS__IDLE)
return -EBUSY;
- vcn_v3_0_enable_clock_gating(adev, i);
+ vcn_v3_0_enable_clock_gating(vinst);
} else {
- vcn_v3_0_disable_clock_gating(adev, i);
+ vcn_v3_0_disable_clock_gating(vinst);
}
}
return 0;
}
-static int vcn_v3_0_set_powergating_state(void *handle,
- enum amd_powergating_state state)
+static int vcn_v3_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int ret;
+ struct amdgpu_device *adev = vinst->adev;
+ int ret = 0;
/* for SRIOV, guest should not control VCN Power-gating
* MMSCH FW should control Power-gating and clock-gating
* guest should avoid touching CGC and PG
*/
if (amdgpu_sriov_vf(adev)) {
- adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
+ vinst->cur_state = AMD_PG_STATE_UNGATE;
return 0;
}
- if(state == adev->vcn.cur_state)
+ if (state == vinst->cur_state)
return 0;
if (state == AMD_PG_STATE_GATE)
- ret = vcn_v3_0_stop(adev);
+ ret = vcn_v3_0_stop(vinst);
else
- ret = vcn_v3_0_start(adev);
+ ret = vcn_v3_0_start(vinst);
- if(!ret)
- adev->vcn.cur_state = state;
+ if (!ret)
+ vinst->cur_state = state;
return ret;
}
@@ -2242,7 +2337,7 @@ static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev)
if (adev->vcn.harvest_config & (1 << i))
continue;
- adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst[i].irq.num_types = adev->vcn.inst[i].num_enc_rings + 1;
adev->vcn.inst[i].irq.funcs = &vcn_v3_0_irq_funcs;
}
}
@@ -2250,7 +2345,6 @@ static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev)
static const struct amd_ip_funcs vcn_v3_0_ip_funcs = {
.name = "vcn_v3_0",
.early_init = vcn_v3_0_early_init,
- .late_init = NULL,
.sw_init = vcn_v3_0_sw_init,
.sw_fini = vcn_v3_0_sw_fini,
.hw_init = vcn_v3_0_hw_init,
@@ -2259,16 +2353,13 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = {
.resume = vcn_v3_0_resume,
.is_idle = vcn_v3_0_is_idle,
.wait_for_idle = vcn_v3_0_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = vcn_v3_0_set_clockgating_state,
- .set_powergating_state = vcn_v3_0_set_powergating_state,
+ .set_powergating_state = vcn_set_powergating_state,
+ .dump_ip_state = amdgpu_vcn_dump_ip_state,
+ .print_ip_state = amdgpu_vcn_print_ip_state,
};
-const struct amdgpu_ip_block_version vcn_v3_0_ip_block =
-{
+const struct amdgpu_ip_block_version vcn_v3_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_VCN,
.major = 3,
.minor = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
new file mode 100644
index 000000000000..3ae666522d57
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -0,0 +1,2315 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_vcn.h"
+#include "amdgpu_pm.h"
+#include "amdgpu_cs.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "soc15_hw_ip.h"
+#include "vcn_v2_0.h"
+#include "mmsch_v4_0.h"
+#include "vcn_v4_0.h"
+
+#include "vcn/vcn_4_0_0_offset.h"
+#include "vcn/vcn_4_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
+
+#include <drm/drm_drv.h>
+
+#define mmUVD_DPG_LMA_CTL regUVD_DPG_LMA_CTL
+#define mmUVD_DPG_LMA_CTL_BASE_IDX regUVD_DPG_LMA_CTL_BASE_IDX
+#define mmUVD_DPG_LMA_DATA regUVD_DPG_LMA_DATA
+#define mmUVD_DPG_LMA_DATA_BASE_IDX regUVD_DPG_LMA_DATA_BASE_IDX
+
+#define VCN_VID_SOC_ADDRESS_2_0 0x1fb00
+#define VCN1_VID_SOC_ADDRESS_3_0 0x48300
+#define VCN1_AON_SOC_ADDRESS_3_0 0x48000
+
+#define VCN_HARVEST_MMSCH 0
+
+#define RDECODE_MSG_CREATE 0x00000000
+#define RDECODE_MESSAGE_CREATE 0x00000001
+
+static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0[] = {
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_CONFIG),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE)
+};
+
+static int amdgpu_ih_clientid_vcns[] = {
+ SOC15_IH_CLIENTID_VCN,
+ SOC15_IH_CLIENTID_VCN1
+};
+
+static int vcn_v4_0_start_sriov(struct amdgpu_device *adev);
+static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev);
+static int vcn_v4_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static int vcn_v4_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
+static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring);
+static void vcn_v4_0_set_ras_funcs(struct amdgpu_device *adev);
+
+/**
+ * vcn_v4_0_early_init - set function pointers and load microcode
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ * Load microcode from filesystem
+ */
+static int vcn_v4_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
+
+ if (amdgpu_sriov_vf(adev)) {
+ adev->vcn.harvest_config = VCN_HARVEST_MMSCH;
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (amdgpu_vcn_is_disabled_vcn(adev, VCN_ENCODE_RING, i)) {
+ adev->vcn.harvest_config |= 1 << i;
+ dev_info(adev->dev, "VCN%d is disabled by hypervisor\n", i);
+ }
+ }
+ }
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ /* re-use enc ring as unified ring */
+ adev->vcn.inst[i].num_enc_rings = 1;
+
+ vcn_v4_0_set_unified_ring_funcs(adev);
+ vcn_v4_0_set_irq_funcs(adev);
+ vcn_v4_0_set_ras_funcs(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst[i].set_pg_state = vcn_v4_0_set_pg_state;
+
+ r = amdgpu_vcn_early_init(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int vcn_v4_0_fw_shared_init(struct amdgpu_device *adev, int inst_idx)
+{
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+
+ fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
+ fw_shared->sq.is_enabled = 1;
+
+ fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG);
+ fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ?
+ AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU;
+
+ if (amdgpu_ip_version(adev, VCN_HWIP, 0) ==
+ IP_VERSION(4, 0, 2)) {
+ fw_shared->present_flag_0 |= AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT;
+ fw_shared->drm_key_wa.method =
+ AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING;
+ }
+
+ if (amdgpu_vcnfw_log)
+ amdgpu_vcn_fwlog_init(&adev->vcn.inst[inst_idx]);
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_sw_init - sw init for VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int vcn_v4_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_ring *ring;
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ r = amdgpu_vcn_sw_init(adev, i);
+ if (r)
+ return r;
+
+ amdgpu_vcn_setup_ucode(adev, i);
+
+ r = amdgpu_vcn_resume(adev, i);
+ if (r)
+ return r;
+
+ /* Init instance 0 sched_score to 1, so it's scheduled after other instances */
+ if (i == 0)
+ atomic_set(&adev->vcn.inst[i].sched_score, 1);
+ else
+ atomic_set(&adev->vcn.inst[i].sched_score, 0);
+
+ /* VCN UNIFIED TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
+ VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq);
+ if (r)
+ return r;
+
+ /* VCN POISON TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
+ VCN_4_0__SRCID_UVD_POISON, &adev->vcn.inst[i].ras_poison_irq);
+ if (r)
+ return r;
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->use_doorbell = true;
+ if (amdgpu_sriov_vf(adev))
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + i *
+ (adev->vcn.inst[i].num_enc_rings + 1) + 1;
+ else
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+ sprintf(ring->name, "vcn_unified_%d", i);
+
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
+ AMDGPU_RING_PRIO_0, &adev->vcn.inst[i].sched_score);
+ if (r)
+ return r;
+
+ vcn_v4_0_fw_shared_init(adev, i);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.inst[i].pause_dpg_mode = vcn_v4_0_pause_dpg_mode;
+ }
+
+ adev->vcn.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+ if (!amdgpu_sriov_vf(adev))
+ adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = amdgpu_virt_alloc_mm_table(adev);
+ if (r)
+ return r;
+ }
+
+
+ r = amdgpu_vcn_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_4_0, ARRAY_SIZE(vcn_reg_list_4_0));
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_sw_fini - sw fini for VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * VCN suspend and free up sw allocation
+ */
+static int vcn_v4_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r, idx;
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = 0;
+ fw_shared->sq.is_enabled = 0;
+ }
+
+ drm_dev_exit(idx);
+ }
+
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_free_mm_table(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(adev, i);
+ if (r)
+ return r;
+ }
+
+ amdgpu_vcn_sysfs_reset_mask_fini(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ amdgpu_vcn_sw_fini(adev, i);
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_hw_init - start and test VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int vcn_v4_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, r;
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = vcn_v4_0_start_sriov(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->wptr = 0;
+ ring->wptr_old = 0;
+ vcn_v4_0_unified_ring_set_wptr(ring);
+ ring->sched.ready = true;
+ }
+ } else {
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the VCN block, mark ring as not ready any more
+ */
+static int vcn_v4_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ cancel_delayed_work_sync(&vinst->idle_work);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
+ (vinst->cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, i, regUVD_STATUS))) {
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
+ }
+ }
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
+ amdgpu_irq_put(adev, &vinst->ras_poison_irq, 0);
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_suspend - suspend VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend VCN block
+ */
+static int vcn_v4_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
+
+ r = vcn_v4_0_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(ip_block->adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_resume - resume VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init VCN block
+ */
+static int vcn_v4_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_resume(ip_block->adev, i);
+ if (r)
+ return r;
+ }
+
+ r = vcn_v4_0_hw_init(ip_block);
+
+ return r;
+}
+
+/**
+ * vcn_v4_0_mc_resume - memory controller programming
+ *
+ * @vinst: VCN instance
+ *
+ * Let the VCN memory controller know it's offsets
+ */
+static void vcn_v4_0_mc_resume(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t offset, size;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi));
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET0, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr));
+ offset = size;
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE0, size);
+
+ /* cache window 1: stack */
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET1, 0);
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
+
+ /* cache window 2: context */
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET2, 0);
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
+
+ /* non-cache window */
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_OFFSET0, 0);
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_SIZE0,
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
+}
+
+/**
+ * vcn_v4_0_mc_resume_dpg_mode - memory controller programming for dpg mode
+ *
+ * @vinst: VCN instance
+ * @indirect: indirectly write sram
+ *
+ * Let the VCN memory controller know it's offsets with dpg mode
+ */
+static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t offset, size;
+ const struct common_firmware_header *hdr;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ }
+ offset = 0;
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ offset = size;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
+ }
+
+ if (!indirect)
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
+ else
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
+
+ /* cache window 1: stack */
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ }
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
+
+ /* cache window 2: context */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
+
+ /* non-cache window */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)), 0, indirect);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_GFX10_ADDR_CONFIG),
+ adev->gfx.config.gb_addr_config, 0, indirect);
+}
+
+/**
+ * vcn_v4_0_disable_static_power_gating - disable VCN static power gating
+ *
+ * @vinst: VCN instance
+ *
+ * Disable static power gating for VCN block
+ */
+static void vcn_v4_0_disable_static_power_gating(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t data = 0;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDS_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTC_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTA_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
+
+ WREG32_SOC15(VCN, inst, regUVD_PGFSM_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_PGFSM_STATUS,
+ UVD_PGFSM_STATUS__UVDM_UVDU_UVDLM_PWR_ON_3_0, 0x3F3FFFFF);
+ } else {
+ uint32_t value;
+
+ value = (inst) ? 0x2200800 : 0;
+ data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDS_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDTC_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDTA_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDTB_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
+
+ WREG32_SOC15(VCN, inst, regUVD_PGFSM_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_PGFSM_STATUS, value, 0x3F3FFFFF);
+ }
+
+ data = RREG32_SOC15(VCN, inst, regUVD_POWER_STATUS);
+ data &= ~0x103;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN)
+ data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON |
+ UVD_POWER_STATUS__UVD_PG_EN_MASK;
+
+ WREG32_SOC15(VCN, inst, regUVD_POWER_STATUS, data);
+
+ return;
+}
+
+/**
+ * vcn_v4_0_enable_static_power_gating - enable VCN static power gating
+ *
+ * @vinst: VCN instance
+ *
+ * Enable static power gating for VCN block
+ */
+static void vcn_v4_0_enable_static_power_gating(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t data;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ /* Before power off, this indicator has to be turned on */
+ data = RREG32_SOC15(VCN, inst, regUVD_POWER_STATUS);
+ data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK;
+ data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
+ WREG32_SOC15(VCN, inst, regUVD_POWER_STATUS, data);
+
+ data = (2 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDS_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTC_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTA_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
+ WREG32_SOC15(VCN, inst, regUVD_PGFSM_CONFIG, data);
+
+ data = (2 << UVD_PGFSM_STATUS__UVDM_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDS_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDF_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDTC_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDB_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDTA_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDLM_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDAB_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDTB_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDNA_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDNB_PWR_STATUS__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_PGFSM_STATUS, data, 0x3F3FFFFF);
+ }
+
+ return;
+}
+
+/**
+ * vcn_v4_0_disable_clock_gating - disable VCN clock gating
+ *
+ * @vinst: VCN instance
+ *
+ * Disable clock gating for VCN block
+ */
+static void vcn_v4_0_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t data;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ return;
+
+ /* VCN disable CGC */
+ data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
+ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, inst, regUVD_CGC_GATE);
+ data &= ~(UVD_CGC_GATE__SYS_MASK
+ | UVD_CGC_GATE__UDEC_MASK
+ | UVD_CGC_GATE__MPEG2_MASK
+ | UVD_CGC_GATE__REGS_MASK
+ | UVD_CGC_GATE__RBC_MASK
+ | UVD_CGC_GATE__LMI_MC_MASK
+ | UVD_CGC_GATE__LMI_UMC_MASK
+ | UVD_CGC_GATE__IDCT_MASK
+ | UVD_CGC_GATE__MPRD_MASK
+ | UVD_CGC_GATE__MPC_MASK
+ | UVD_CGC_GATE__LBSI_MASK
+ | UVD_CGC_GATE__LRBBM_MASK
+ | UVD_CGC_GATE__UDEC_RE_MASK
+ | UVD_CGC_GATE__UDEC_CM_MASK
+ | UVD_CGC_GATE__UDEC_IT_MASK
+ | UVD_CGC_GATE__UDEC_DB_MASK
+ | UVD_CGC_GATE__UDEC_MP_MASK
+ | UVD_CGC_GATE__WCB_MASK
+ | UVD_CGC_GATE__VCPU_MASK
+ | UVD_CGC_GATE__MMSCH_MASK);
+
+ WREG32_SOC15(VCN, inst, regUVD_CGC_GATE, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_CGC_GATE, 0, 0xFFFFFFFF);
+
+ data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
+ data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK
+ | UVD_CGC_CTRL__MMSCH_MODE_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_GATE);
+ data |= (UVD_SUVD_CGC_GATE__SRE_MASK
+ | UVD_SUVD_CGC_GATE__SIT_MASK
+ | UVD_SUVD_CGC_GATE__SMP_MASK
+ | UVD_SUVD_CGC_GATE__SCM_MASK
+ | UVD_SUVD_CGC_GATE__SDB_MASK
+ | UVD_SUVD_CGC_GATE__SRE_H264_MASK
+ | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_H264_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCM_H264_MASK
+ | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_H264_MASK
+ | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCLR_MASK
+ | UVD_SUVD_CGC_GATE__UVD_SC_MASK
+ | UVD_SUVD_CGC_GATE__ENT_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
+ | UVD_SUVD_CGC_GATE__SITE_MASK
+ | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
+ | UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_GATE, data);
+
+ data = RREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL);
+ data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL, data);
+}
+
+/**
+ * vcn_v4_0_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode
+ *
+ * @vinst: VCN instance
+ * @sram_sel: sram select
+ * @indirect: indirectly write sram
+ *
+ * Disable clock gating for VCN block with dpg mode
+ */
+static void vcn_v4_0_disable_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ uint8_t sram_sel,
+ uint8_t indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t reg_data = 0;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ return;
+
+ /* enable sw clock gating control */
+ reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
+ UVD_CGC_CTRL__SYS_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MODE_MASK |
+ UVD_CGC_CTRL__MPEG2_MODE_MASK |
+ UVD_CGC_CTRL__REGS_MODE_MASK |
+ UVD_CGC_CTRL__RBC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_MC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
+ UVD_CGC_CTRL__IDCT_MODE_MASK |
+ UVD_CGC_CTRL__MPRD_MODE_MASK |
+ UVD_CGC_CTRL__MPC_MODE_MASK |
+ UVD_CGC_CTRL__LBSI_MODE_MASK |
+ UVD_CGC_CTRL__LRBBM_MODE_MASK |
+ UVD_CGC_CTRL__WCB_MODE_MASK |
+ UVD_CGC_CTRL__VCPU_MODE_MASK);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_CGC_CTRL), reg_data, sram_sel, indirect);
+
+ /* turn off clock gating */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_CGC_GATE), 0, sram_sel, indirect);
+
+ /* turn on SUVD clock gating */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
+
+ /* turn on sw mode in UVD_SUVD_CGC_CTRL */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
+}
+
+/**
+ * vcn_v4_0_enable_clock_gating - enable VCN clock gating
+ *
+ * @vinst: VCN instance
+ *
+ * Enable clock gating for VCN block
+ */
+static void vcn_v4_0_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t data;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ return;
+
+ /* enable VCN CGC */
+ data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
+ data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
+ data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK
+ | UVD_CGC_CTRL__MMSCH_MODE_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL);
+ data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL, data);
+}
+
+static void vcn_v4_0_enable_ras(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t tmp;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
+ return;
+
+ tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx,
+ SOC15_DPG_MODE_OFFSET(VCN, 0, regVCN_RAS_CNTL),
+ tmp, 0, indirect);
+
+ tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx,
+ SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_SYS_INT_EN),
+ tmp, 0, indirect);
+}
+
+/**
+ * vcn_v4_0_start_dpg_mode - VCN start with dpg mode
+ *
+ * @vinst: VCN instance
+ * @indirect: indirectly write sram
+ *
+ * Start VCN block with dpg mode
+ */
+static int vcn_v4_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ struct amdgpu_ring *ring;
+ uint32_t tmp;
+ int ret;
+
+ /* disable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ /* enable dynamic power gating mode */
+ tmp = RREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS);
+ tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
+ tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
+ WREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS, tmp);
+
+ if (indirect)
+ adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
+
+ /* enable clock gating */
+ vcn_v4_0_disable_clock_gating_dpg_mode(vinst, 0, indirect);
+
+ /* enable VCPU clock */
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK | UVD_VCPU_CNTL__BLK_RST_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* disable master interupt */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MASTINT_EN), 0, 0, indirect);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__CRC_RESET_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ 0x00100000L);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_CTRL), tmp, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MPC_CNTL),
+ 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MPC_SET_MUXA0),
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MPC_SET_MUXB0),
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MPC_SET_MUX),
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
+
+ vcn_v4_0_mc_resume_dpg_mode(vinst, indirect);
+
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* enable LMI MC and UMC channels */
+ tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_CTRL2), tmp, 0, indirect);
+
+ vcn_v4_0_enable_ras(vinst, indirect);
+
+ /* enable master interrupt */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
+
+
+ if (indirect) {
+ ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, 0);
+ if (ret) {
+ dev_err(adev->dev, "vcn sram load failed %d\n", ret);
+ return ret;
+ }
+ }
+
+ ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4);
+
+ tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, inst_idx, regUVD_STATUS);
+
+ return 0;
+}
+
+
+/**
+ * vcn_v4_0_start - VCN start
+ *
+ * @vinst: VCN instance
+ *
+ * Start VCN block
+ */
+static int vcn_v4_0_start(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_ring *ring;
+ uint32_t tmp;
+ int j, k, r;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_vcn(adev, true, i);
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return vcn_v4_0_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram);
+
+ /* disable VCN power gating */
+ vcn_v4_0_disable_static_power_gating(vinst);
+
+ /* set VCN status busy */
+ tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, i, regUVD_STATUS, tmp);
+
+ /*SW clock gating */
+ vcn_v4_0_disable_clock_gating(vinst);
+
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL);
+ WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ /* setup regUVD_MPC_CNTL */
+ tmp = RREG32_SOC15(VCN, i, regUVD_MPC_CNTL);
+ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
+ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
+ WREG32_SOC15(VCN, i, regUVD_MPC_CNTL, tmp);
+
+ /* setup UVD_MPC_SET_MUXA0 */
+ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUXB0 */
+ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUX */
+ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
+
+ vcn_v4_0_mc_resume(vinst);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (j = 0; j < 10; ++j) {
+ uint32_t status;
+
+ for (k = 0; k < 100; ++k) {
+ status = RREG32_SOC15(VCN, i, regUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(10);
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ }
+
+ if (amdgpu_emu_mode == 1) {
+ r = -1;
+ if (status & 2) {
+ r = 0;
+ break;
+ }
+ } else {
+ r = 0;
+ if (status & 2)
+ break;
+
+ dev_err(adev->dev, "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ mdelay(10);
+ r = -1;
+ }
+ }
+
+ if (r) {
+ dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i);
+ return r;
+ }
+
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4);
+
+ tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+ WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, i, regUVD_STATUS);
+
+ return 0;
+}
+
+static int vcn_v4_0_init_ring_metadata(struct amdgpu_device *adev, uint32_t vcn_inst, struct amdgpu_ring *ring_enc)
+{
+ struct amdgpu_vcn_rb_metadata *rb_metadata = NULL;
+ uint8_t *rb_ptr = (uint8_t *)ring_enc->ring;
+
+ rb_ptr += ring_enc->ring_size;
+ rb_metadata = (struct amdgpu_vcn_rb_metadata *)rb_ptr;
+
+ memset(rb_metadata, 0, sizeof(struct amdgpu_vcn_rb_metadata));
+ rb_metadata->size = sizeof(struct amdgpu_vcn_rb_metadata);
+ rb_metadata->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
+ rb_metadata->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_DECOUPLE_FLAG);
+ rb_metadata->version = 1;
+ rb_metadata->ring_id = vcn_inst & 0xFF;
+
+ return 0;
+}
+
+static int vcn_v4_0_start_sriov(struct amdgpu_device *adev)
+{
+ int i;
+ struct amdgpu_ring *ring_enc;
+ uint64_t cache_addr;
+ uint64_t rb_enc_addr;
+ uint64_t ctx_addr;
+ uint32_t param, resp, expected;
+ uint32_t offset, cache_size;
+ uint32_t tmp, timeout;
+
+ struct amdgpu_mm_table *table = &adev->virt.mm_table;
+ uint32_t *table_loc;
+ uint32_t table_size;
+ uint32_t size, size_dw;
+ uint32_t init_status;
+ uint32_t enabled_vcn;
+
+ struct mmsch_v4_0_cmd_direct_write
+ direct_wt = { {0} };
+ struct mmsch_v4_0_cmd_direct_read_modify_write
+ direct_rd_mod_wt = { {0} };
+ struct mmsch_v4_0_cmd_end end = { {0} };
+ struct mmsch_v4_0_init_header header;
+
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_fw_shared_rb_setup *rb_setup;
+
+ direct_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_WRITE;
+ direct_rd_mod_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
+ end.cmd_header.command_type =
+ MMSCH_COMMAND__END;
+
+ header.version = MMSCH_VERSION;
+ header.total_size = sizeof(struct mmsch_v4_0_init_header) >> 2;
+ for (i = 0; i < MMSCH_V4_0_VCN_INSTANCES; i++) {
+ header.inst[i].init_status = 0;
+ header.inst[i].table_offset = 0;
+ header.inst[i].table_size = 0;
+ }
+
+ table_loc = (uint32_t *)table->cpu_addr;
+ table_loc += header.total_size;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ // Must re/init fw_shared at beginning
+ vcn_v4_0_fw_shared_init(adev, i);
+
+ table_size = 0;
+
+ MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_STATUS),
+ ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
+
+ cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
+ offset = 0;
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_VCPU_CACHE_OFFSET0),
+ 0);
+ } else {
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[i].gpu_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[i].gpu_addr));
+ offset = cache_size;
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_VCPU_CACHE_SIZE0),
+ cache_size);
+
+ cache_addr = adev->vcn.inst[i].gpu_addr + offset;
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(cache_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(cache_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_VCPU_CACHE_OFFSET1),
+ 0);
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_VCPU_CACHE_SIZE1),
+ AMDGPU_VCN_STACK_SIZE);
+
+ cache_addr = adev->vcn.inst[i].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE;
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(cache_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(cache_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_VCPU_CACHE_OFFSET2),
+ 0);
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_VCPU_CACHE_SIZE2),
+ AMDGPU_VCN_CONTEXT_SIZE);
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ rb_setup = &fw_shared->rb_setup;
+
+ ring_enc = &adev->vcn.inst[i].ring_enc[0];
+ ring_enc->wptr = 0;
+ rb_enc_addr = ring_enc->gpu_addr;
+
+ rb_setup->is_rb_enabled_flags |= RB_ENABLED;
+ fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
+
+ if (amdgpu_sriov_is_vcn_rb_decouple(adev)) {
+ vcn_v4_0_init_ring_metadata(adev, i, ring_enc);
+
+ memset((void *)&rb_setup->rb_info, 0, sizeof(struct amdgpu_vcn_rb_setup_info) * MAX_NUM_VCN_RB_SETUP);
+ if (!(adev->vcn.harvest_config & (1 << 0))) {
+ rb_setup->rb_info[0].rb_addr_lo = lower_32_bits(adev->vcn.inst[0].ring_enc[0].gpu_addr);
+ rb_setup->rb_info[0].rb_addr_hi = upper_32_bits(adev->vcn.inst[0].ring_enc[0].gpu_addr);
+ rb_setup->rb_info[0].rb_size = adev->vcn.inst[0].ring_enc[0].ring_size / 4;
+ }
+ if (!(adev->vcn.harvest_config & (1 << 1))) {
+ rb_setup->rb_info[2].rb_addr_lo = lower_32_bits(adev->vcn.inst[1].ring_enc[0].gpu_addr);
+ rb_setup->rb_info[2].rb_addr_hi = upper_32_bits(adev->vcn.inst[1].ring_enc[0].gpu_addr);
+ rb_setup->rb_info[2].rb_size = adev->vcn.inst[1].ring_enc[0].ring_size / 4;
+ }
+ fw_shared->decouple.is_enabled = 1;
+ fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_DECOUPLE_FLAG);
+ } else {
+ rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
+ rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
+ rb_setup->rb_size = ring_enc->ring_size / 4;
+ }
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
+
+ /* add end packet */
+ MMSCH_V4_0_INSERT_END();
+
+ /* refine header */
+ header.inst[i].init_status = 0;
+ header.inst[i].table_offset = header.total_size;
+ header.inst[i].table_size = table_size;
+ header.total_size += table_size;
+ }
+
+ /* Update init table header in memory */
+ size = sizeof(struct mmsch_v4_0_init_header);
+ table_loc = (uint32_t *)table->cpu_addr;
+ memcpy((void *)table_loc, &header, size);
+
+ /* message MMSCH (in VCN[0]) to initialize this client
+ * 1, write to mmsch_vf_ctx_addr_lo/hi register with GPU mc addr
+ * of memory descriptor location
+ */
+ ctx_addr = table->gpu_addr;
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
+
+ /* 2, update vmid of descriptor */
+ tmp = RREG32_SOC15(VCN, 0, regMMSCH_VF_VMID);
+ tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ /* use domain0 for MM scheduler */
+ tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_VMID, tmp);
+
+ /* 3, notify mmsch about the size of this descriptor */
+ size = header.total_size;
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_SIZE, size);
+
+ /* 4, set resp to zero */
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP, 0);
+
+ /* 5, kick off the initialization and wait until
+ * MMSCH_VF_MAILBOX_RESP becomes non-zero
+ */
+ param = 0x00000001;
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_HOST, param);
+ tmp = 0;
+ timeout = 1000;
+ resp = 0;
+ expected = MMSCH_VF_MAILBOX_RESP__OK;
+ while (resp != expected) {
+ resp = RREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP);
+ if (resp != 0)
+ break;
+
+ udelay(10);
+ tmp = tmp + 10;
+ if (tmp >= timeout) {
+ DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
+ " waiting for regMMSCH_VF_MAILBOX_RESP "\
+ "(expected=0x%08x, readback=0x%08x)\n",
+ tmp, expected, resp);
+ return -EBUSY;
+ }
+ }
+ enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0;
+ init_status = ((struct mmsch_v4_0_init_header *)(table_loc))->inst[enabled_vcn].init_status;
+ if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
+ && init_status != MMSCH_VF_ENGINE_STATUS__PASS)
+ DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\
+ "status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status);
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_stop_dpg_mode - VCN stop with dpg mode
+ *
+ * @vinst: VCN instance
+ *
+ * Stop VCN block with dpg mode
+ */
+static void vcn_v4_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE};
+ uint32_t tmp;
+
+ vcn_v4_0_pause_dpg_mode(vinst, &state);
+ /* Wait for power status to be 1 */
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* wait for read ptr to be equal to write ptr */
+ tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR, tmp, 0xFFFFFFFF);
+
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* disable dynamic power gating mode */
+ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, inst_idx, regUVD_STATUS);
+}
+
+/**
+ * vcn_v4_0_stop - VCN stop
+ *
+ * @vinst: VCN instance
+ *
+ * Stop VCN block
+ */
+static int vcn_v4_0_stop(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+ uint32_t tmp;
+ int r = 0;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ vcn_v4_0_stop_dpg_mode(vinst);
+ goto done;
+ }
+
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7);
+ if (r)
+ goto done;
+
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
+
+ /* disable LMI UMC channel */
+ tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
+
+ /* block VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+
+ /* apply soft reset */
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+
+ /* clear status */
+ WREG32_SOC15(VCN, i, regUVD_STATUS, 0);
+
+ /* apply HW clock gating */
+ vcn_v4_0_enable_clock_gating(vinst);
+
+ /* enable VCN power gating */
+ vcn_v4_0_enable_static_power_gating(vinst);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, i, regUVD_STATUS);
+
+done:
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_vcn(adev, false, i);
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_pause_dpg_mode - VCN pause with dpg mode
+ *
+ * @vinst: VCN instance
+ * @new_state: pause state
+ *
+ * Pause dpg mode for VCN block
+ */
+static int vcn_v4_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t reg_data = 0;
+ int ret_code;
+
+ /* pause/unpause if state is changed */
+ if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
+ DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d",
+ adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based);
+ reg_data = RREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE) &
+ (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
+ ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 0x1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ if (!ret_code) {
+ /* pause DPG */
+ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data);
+
+ /* wait for ACK */
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_DPG_PAUSE,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS,
+ UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ }
+ } else {
+ /* unpause dpg, no need to wait */
+ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data);
+ }
+ adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_unified_ring_get_rptr - get unified read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified read pointer
+ */
+static uint64_t vcn_v4_0_unified_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR);
+}
+
+/**
+ * vcn_v4_0_unified_ring_get_wptr - get unified write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified write pointer
+ */
+static uint64_t vcn_v4_0_unified_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR);
+}
+
+/**
+ * vcn_v4_0_unified_ring_set_wptr - set enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the enc write pointer to the hardware
+ */
+static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job)
+{
+ struct drm_gpu_scheduler **scheds;
+ struct dma_fence *fence;
+
+ /* if VCN0 is harvested, we can't support AV1 */
+ if (p->adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0)
+ return -EINVAL;
+
+ /* wait for all jobs to finish before switching to instance 0 */
+ fence = amdgpu_ctx_get_fence(p->ctx, job->base.entity, ~0ull);
+ if (fence) {
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+ }
+
+ scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC]
+ [AMDGPU_RING_PRIO_0].sched;
+ drm_sched_entity_modify_sched(job->base.entity, scheds, 1);
+ return 0;
+}
+
+static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
+ uint64_t addr)
+{
+ struct ttm_operation_ctx ctx = { false, false };
+ struct amdgpu_bo_va_mapping *map;
+ uint32_t *msg, num_buffers;
+ struct amdgpu_bo *bo;
+ uint64_t start, end;
+ unsigned int i;
+ void *ptr;
+ int r;
+
+ addr &= AMDGPU_GMC_HOLE_MASK;
+ r = amdgpu_cs_find_mapping(p, addr, &bo, &map);
+ if (r) {
+ DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
+ return r;
+ }
+
+ start = map->start * AMDGPU_GPU_PAGE_SIZE;
+ end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE;
+ if (addr & 0x7) {
+ DRM_ERROR("VCN messages must be 8 byte aligned!\n");
+ return -EINVAL;
+ }
+
+ bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (r) {
+ DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r);
+ return r;
+ }
+
+ r = amdgpu_bo_kmap(bo, &ptr);
+ if (r) {
+ DRM_ERROR("Failed mapping the VCN message (%d)!\n", r);
+ return r;
+ }
+
+ msg = ptr + addr - start;
+
+ /* Check length */
+ if (msg[1] > end - addr) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (msg[3] != RDECODE_MSG_CREATE)
+ goto out;
+
+ num_buffers = msg[2];
+ for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) {
+ uint32_t offset, size, *create;
+
+ if (msg[0] != RDECODE_MESSAGE_CREATE)
+ continue;
+
+ offset = msg[1];
+ size = msg[2];
+
+ if (offset + size > end) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ create = ptr + addr + offset - start;
+
+ /* H264, HEVC and VP9 can run on any instance */
+ if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
+ continue;
+
+ r = vcn_v4_0_limit_sched(p, job);
+ if (r)
+ goto out;
+ }
+
+out:
+ amdgpu_bo_kunmap(bo);
+ return r;
+}
+
+#define RADEON_VCN_ENGINE_TYPE_ENCODE (0x00000002)
+#define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003)
+#define RADEON_VCN_ENGINE_INFO (0x30000001)
+#define RENCODE_ENCODE_STANDARD_AV1 2
+#define RENCODE_IB_PARAM_SESSION_INIT 0x00000003
+
+/* return the offset in ib if id is found, -1 otherwise */
+static int vcn_v4_0_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int start)
+{
+ int i;
+
+ for (i = start; i < ib->length_dw && ib->ptr[i] >= 8; i += ib->ptr[i] / 4) {
+ if (ib->ptr[i + 1] == id)
+ return i;
+ }
+ return -1;
+}
+
+static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
+{
+ struct amdgpu_ring *ring = amdgpu_job_ring(job);
+ struct amdgpu_vcn_decode_buffer *decode_buffer;
+ uint64_t addr;
+ uint32_t val;
+ int idx = 0, sidx;
+
+ /* The first instance can decode anything */
+ if (!ring->me)
+ return 0;
+
+ while ((idx = vcn_v4_0_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO, idx)) >= 0) {
+ val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */
+ if (val == RADEON_VCN_ENGINE_TYPE_DECODE) {
+ decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6];
+
+ if (!(decode_buffer->valid_buf_flag & 0x1))
+ return 0;
+
+ addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 |
+ decode_buffer->msg_buffer_address_lo;
+ return vcn_v4_0_dec_msg(p, job, addr);
+ } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) {
+ sidx = vcn_v4_0_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT, idx);
+ if (sidx >= 0 && ib->ptr[sidx + 2] == RENCODE_ENCODE_STANDARD_AV1)
+ return vcn_v4_0_limit_sched(p, job);
+ }
+ idx += ib->ptr[idx] / 4;
+ }
+ return 0;
+}
+
+static int vcn_v4_0_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ r = vcn_v4_0_stop(vinst);
+ if (r)
+ return r;
+ r = vcn_v4_0_start(vinst);
+ if (r)
+ return r;
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_ENC,
+ .align_mask = 0x3f,
+ .nop = VCN_ENC_CMD_NO_OP,
+ .extra_bytes = sizeof(struct amdgpu_vcn_rb_metadata),
+ .get_rptr = vcn_v4_0_unified_ring_get_rptr,
+ .get_wptr = vcn_v4_0_unified_ring_get_wptr,
+ .set_wptr = vcn_v4_0_unified_ring_set_wptr,
+ .patch_cs_in_place = vcn_v4_0_ring_patch_cs_in_place,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
+ 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
+ 1, /* vcn_v2_0_enc_ring_insert_end */
+ .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
+ .emit_ib = vcn_v2_0_enc_ring_emit_ib,
+ .emit_fence = vcn_v2_0_enc_ring_emit_fence,
+ .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
+ .test_ring = amdgpu_vcn_enc_ring_test_ring,
+ .test_ib = amdgpu_vcn_unified_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .insert_end = vcn_v2_0_enc_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
+ .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = vcn_v4_0_ring_reset,
+};
+
+/**
+ * vcn_v4_0_set_unified_ring_funcs - set unified ring functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set unified ring functions
+ */
+static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 2))
+ vcn_v4_0_unified_ring_vm_funcs.secure_submission_supported = true;
+
+ adev->vcn.inst[i].ring_enc[0].funcs =
+ (const struct amdgpu_ring_funcs *)&vcn_v4_0_unified_ring_vm_funcs;
+ adev->vcn.inst[i].ring_enc[0].me = i;
+ }
+}
+
+/**
+ * vcn_v4_0_is_idle - check VCN block is idle
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block structure
+ *
+ * Check whether VCN block is idle
+ */
+static bool vcn_v4_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, ret = 1;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ret &= (RREG32_SOC15(VCN, i, regUVD_STATUS) == UVD_STATUS__IDLE);
+ }
+
+ return ret;
+}
+
+/**
+ * vcn_v4_0_wait_for_idle - wait for VCN block idle
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Wait for VCN block idle
+ */
+static int vcn_v4_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, ret = 0;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ret = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE,
+ UVD_STATUS__IDLE);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+/**
+ * vcn_v4_0_set_clockgating_state - set VCN block clockgating state
+ *
+ * @ip_block: amdgpu_ip_block pointer
+ * @state: clock gating state
+ *
+ * Set VCN block clockgating state
+ */
+static int vcn_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ if (enable) {
+ if (RREG32_SOC15(VCN, i, regUVD_STATUS) != UVD_STATUS__IDLE)
+ return -EBUSY;
+ vcn_v4_0_enable_clock_gating(vinst);
+ } else {
+ vcn_v4_0_disable_clock_gating(vinst);
+ }
+ }
+
+ return 0;
+}
+
+static int vcn_v4_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int ret = 0;
+
+ /* for SRIOV, guest should not control VCN Power-gating
+ * MMSCH FW should control Power-gating and clock-gating
+ * guest should avoid touching CGC and PG
+ */
+ if (amdgpu_sriov_vf(adev)) {
+ vinst->cur_state = AMD_PG_STATE_UNGATE;
+ return 0;
+ }
+
+ if (state == vinst->cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = vcn_v4_0_stop(vinst);
+ else
+ ret = vcn_v4_0_start(vinst);
+
+ if (!ret)
+ vinst->cur_state = state;
+
+ return ret;
+}
+
+/**
+ * vcn_v4_0_set_ras_interrupt_state - set VCN block RAS interrupt state
+ *
+ * @adev: amdgpu_device pointer
+ * @source: interrupt sources
+ * @type: interrupt types
+ * @state: interrupt states
+ *
+ * Set VCN block RAS interrupt state
+ */
+static int vcn_v4_0_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+/**
+ * vcn_v4_0_process_interrupt - process VCN block interrupt
+ *
+ * @adev: amdgpu_device pointer
+ * @source: interrupt sources
+ * @entry: interrupt entry from clients and sources
+ *
+ * Process VCN block interrupt
+ */
+static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t ip_instance;
+
+ if (amdgpu_sriov_is_vcn_rb_decouple(adev)) {
+ ip_instance = entry->ring_id;
+ } else {
+ switch (entry->client_id) {
+ case SOC15_IH_CLIENTID_VCN:
+ ip_instance = 0;
+ break;
+ case SOC15_IH_CLIENTID_VCN1:
+ ip_instance = 1;
+ break;
+ default:
+ DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
+ return 0;
+ }
+ }
+
+ DRM_DEBUG("IH: VCN TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
+ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs vcn_v4_0_irq_funcs = {
+ .process = vcn_v4_0_process_interrupt,
+};
+
+static const struct amdgpu_irq_src_funcs vcn_v4_0_ras_irq_funcs = {
+ .set = vcn_v4_0_set_ras_interrupt_state,
+ .process = amdgpu_vcn_process_poison_irq,
+};
+
+/**
+ * vcn_v4_0_set_irq_funcs - set VCN block interrupt irq functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set VCN block interrupt irq functions
+ */
+static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ adev->vcn.inst[i].irq.num_types = adev->vcn.inst[i].num_enc_rings + 1;
+ adev->vcn.inst[i].irq.funcs = &vcn_v4_0_irq_funcs;
+
+ adev->vcn.inst[i].ras_poison_irq.num_types = adev->vcn.inst[i].num_enc_rings + 1;
+ adev->vcn.inst[i].ras_poison_irq.funcs = &vcn_v4_0_ras_irq_funcs;
+ }
+}
+
+static const struct amd_ip_funcs vcn_v4_0_ip_funcs = {
+ .name = "vcn_v4_0",
+ .early_init = vcn_v4_0_early_init,
+ .sw_init = vcn_v4_0_sw_init,
+ .sw_fini = vcn_v4_0_sw_fini,
+ .hw_init = vcn_v4_0_hw_init,
+ .hw_fini = vcn_v4_0_hw_fini,
+ .suspend = vcn_v4_0_suspend,
+ .resume = vcn_v4_0_resume,
+ .is_idle = vcn_v4_0_is_idle,
+ .wait_for_idle = vcn_v4_0_wait_for_idle,
+ .set_clockgating_state = vcn_v4_0_set_clockgating_state,
+ .set_powergating_state = vcn_set_powergating_state,
+ .dump_ip_state = amdgpu_vcn_dump_ip_state,
+ .print_ip_state = amdgpu_vcn_print_ip_state,
+};
+
+const struct amdgpu_ip_block_version vcn_v4_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_VCN,
+ .major = 4,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &vcn_v4_0_ip_funcs,
+};
+
+static uint32_t vcn_v4_0_query_poison_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, uint32_t sub_block)
+{
+ uint32_t poison_stat = 0, reg_value = 0;
+
+ switch (sub_block) {
+ case AMDGPU_VCN_V4_0_VCPU_VCODEC:
+ reg_value = RREG32_SOC15(VCN, instance, regUVD_RAS_VCPU_VCODEC_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_VCPU_VCODEC_STATUS, POISONED_PF);
+ break;
+ default:
+ break;
+ }
+
+ if (poison_stat)
+ dev_info(adev->dev, "Poison detected in VCN%d, sub_block%d\n",
+ instance, sub_block);
+
+ return poison_stat;
+}
+
+static bool vcn_v4_0_query_ras_poison_status(struct amdgpu_device *adev)
+{
+ uint32_t inst, sub;
+ uint32_t poison_stat = 0;
+
+ for (inst = 0; inst < adev->vcn.num_vcn_inst; inst++)
+ for (sub = 0; sub < AMDGPU_VCN_V4_0_MAX_SUB_BLOCK; sub++)
+ poison_stat +=
+ vcn_v4_0_query_poison_by_instance(adev, inst, sub);
+
+ return !!poison_stat;
+}
+
+const struct amdgpu_ras_block_hw_ops vcn_v4_0_ras_hw_ops = {
+ .query_poison_status = vcn_v4_0_query_ras_poison_status,
+};
+
+static struct amdgpu_vcn_ras vcn_v4_0_ras = {
+ .ras_block = {
+ .hw_ops = &vcn_v4_0_ras_hw_ops,
+ .ras_late_init = amdgpu_vcn_ras_late_init,
+ },
+};
+
+static void vcn_v4_0_set_ras_funcs(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
+ case IP_VERSION(4, 0, 0):
+ adev->vcn.ras = &vcn_v4_0_ras;
+ break;
+ default:
+ break;
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.h
new file mode 100644
index 000000000000..7d3d11f40f27
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCN_V4_0_H__
+#define __VCN_V4_0_H__
+
+enum amdgpu_vcn_v4_0_sub_block {
+ AMDGPU_VCN_V4_0_VCPU_VCODEC = 0,
+
+ AMDGPU_VCN_V4_0_MAX_SUB_BLOCK,
+};
+
+extern const struct amdgpu_ip_block_version vcn_v4_0_ip_block;
+
+#endif /* __VCN_V4_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
new file mode 100644
index 000000000000..cb7123ec1a5d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -0,0 +1,2131 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_vcn.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "soc15_hw_ip.h"
+#include "vcn_v2_0.h"
+#include "vcn_v4_0_3.h"
+#include "mmsch_v4_0_3.h"
+
+#include "vcn/vcn_4_0_3_offset.h"
+#include "vcn/vcn_4_0_3_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
+
+#define mmUVD_DPG_LMA_CTL regUVD_DPG_LMA_CTL
+#define mmUVD_DPG_LMA_CTL_BASE_IDX regUVD_DPG_LMA_CTL_BASE_IDX
+#define mmUVD_DPG_LMA_DATA regUVD_DPG_LMA_DATA
+#define mmUVD_DPG_LMA_DATA_BASE_IDX regUVD_DPG_LMA_DATA_BASE_IDX
+
+#define VCN_VID_SOC_ADDRESS_2_0 0x1fb00
+#define VCN1_VID_SOC_ADDRESS_3_0 0x48300
+#define VCN1_AON_SOC_ADDRESS_3_0 0x48000
+
+static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_3[] = {
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_CONFIG),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE)
+};
+
+#define NORMALIZE_VCN_REG_OFFSET(offset) \
+ (offset & 0x1FFFF)
+
+static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev);
+static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
+static int vcn_v4_0_3_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
+static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring);
+static void vcn_v4_0_3_set_ras_funcs(struct amdgpu_device *adev);
+static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev,
+ int inst_idx, bool indirect);
+
+static inline bool vcn_v4_0_3_normalizn_reqd(struct amdgpu_device *adev)
+{
+ return (adev->vcn.caps & AMDGPU_VCN_CAPS(RRMT_ENABLED)) == 0;
+}
+
+/**
+ * vcn_v4_0_3_early_init - set function pointers
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ */
+static int vcn_v4_0_3_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ /* re-use enc ring as unified ring */
+ adev->vcn.inst[i].num_enc_rings = 1;
+
+ vcn_v4_0_3_set_unified_ring_funcs(adev);
+ vcn_v4_0_3_set_irq_funcs(adev);
+ vcn_v4_0_3_set_ras_funcs(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst[i].set_pg_state = vcn_v4_0_3_set_pg_state;
+
+ r = amdgpu_vcn_early_init(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int vcn_v4_0_3_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->vcn.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+
+ if (amdgpu_dpm_reset_vcn_is_supported(adev) && !amdgpu_sriov_vf(adev))
+ adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+
+ return 0;
+}
+
+static int vcn_v4_0_3_fw_shared_init(struct amdgpu_device *adev, int inst_idx)
+{
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+
+ fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
+ fw_shared->sq.is_enabled = 1;
+
+ if (amdgpu_vcnfw_log)
+ amdgpu_vcn_fwlog_init(&adev->vcn.inst[inst_idx]);
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_sw_init - sw init for VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int vcn_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, r, vcn_inst;
+
+ /* VCN DEC TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst->irq);
+ if (r)
+ return r;
+
+ /* VCN POISON TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_4_0__SRCID_UVD_POISON, &adev->vcn.inst->ras_poison_irq);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+
+ r = amdgpu_vcn_sw_init(adev, i);
+ if (r)
+ return r;
+
+ amdgpu_vcn_setup_ucode(adev, i);
+
+ r = amdgpu_vcn_resume(adev, i);
+ if (r)
+ return r;
+
+ vcn_inst = GET_INST(VCN, i);
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->use_doorbell = true;
+
+ if (!amdgpu_sriov_vf(adev))
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 9 * vcn_inst;
+ else
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 32 * vcn_inst;
+
+ ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id);
+ sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id);
+
+ /* There are no per-instance irq source IDs on 4.0.3, the IH
+ * packets use a separate field to differentiate instances.
+ */
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[0].irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT,
+ &adev->vcn.inst[i].sched_score);
+ if (r)
+ return r;
+
+ vcn_v4_0_3_fw_shared_init(adev, i);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.inst[i].pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode;
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = amdgpu_virt_alloc_mm_table(adev);
+ if (r)
+ return r;
+ }
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) {
+ r = amdgpu_vcn_ras_sw_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to initialize vcn ras block!\n");
+ return r;
+ }
+ }
+
+ r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_4_0_3, ARRAY_SIZE(vcn_reg_list_4_0_3));
+ if (r)
+ return r;
+
+ return amdgpu_vcn_sysfs_reset_mask_init(adev);
+}
+
+/**
+ * vcn_v4_0_3_sw_fini - sw fini for VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * VCN suspend and free up sw allocation
+ */
+static int vcn_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r, idx;
+
+ if (drm_dev_enter(&adev->ddev, &idx)) {
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = 0;
+ fw_shared->sq.is_enabled = cpu_to_le32(false);
+ }
+ drm_dev_exit(idx);
+ }
+
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_free_mm_table(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(adev, i);
+ if (r)
+ return r;
+ }
+
+ amdgpu_vcn_sysfs_reset_mask_fini(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ amdgpu_vcn_sw_fini(adev, i);
+
+ return 0;
+}
+
+static int vcn_v4_0_3_hw_init_inst(struct amdgpu_vcn_inst *vinst)
+{
+ int vcn_inst;
+ struct amdgpu_device *adev = vinst->adev;
+ struct amdgpu_ring *ring;
+ int inst_idx = vinst->inst;
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+ ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+ if (ring->use_doorbell) {
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 9 * vcn_inst,
+ adev->vcn.inst[inst_idx].aid_id);
+
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ /* Read DB_CTRL to flush the write DB_CTRL command. */
+ RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL);
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_hw_init - start and test VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int vcn_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ struct amdgpu_vcn_inst *vinst;
+ int i, r;
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = vcn_v4_0_3_start_sriov(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->wptr = 0;
+ ring->wptr_old = 0;
+ vcn_v4_0_3_unified_ring_set_wptr(ring);
+ ring->sched.ready = true;
+ }
+ } else {
+ /* This flag is not set for VF, assumed to be disabled always */
+ if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) &
+ 0x100)
+ adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ vinst = &adev->vcn.inst[i];
+ vcn_v4_0_3_hw_init_inst(vinst);
+
+ /* Re-init fw_shared when RAS fatal error occurred */
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ if (!fw_shared->sq.is_enabled)
+ vcn_v4_0_3_fw_shared_init(adev, i);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+ }
+
+ return r;
+}
+
+/**
+ * vcn_v4_0_3_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the VCN block, mark ring as not ready any more
+ */
+static int vcn_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ cancel_delayed_work_sync(&vinst->idle_work);
+
+ if (vinst->cur_state != AMD_PG_STATE_GATE)
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
+ }
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN) && !amdgpu_sriov_vf(adev))
+ amdgpu_irq_put(adev, &adev->vcn.inst->ras_poison_irq, 0);
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_suspend - suspend VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend VCN block
+ */
+static int vcn_v4_0_3_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
+
+ r = vcn_v4_0_3_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_resume - resume VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init VCN block
+ */
+static int vcn_v4_0_3_resume(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_resume(ip_block->adev, i);
+ if (r)
+ return r;
+ }
+
+ r = vcn_v4_0_3_hw_init(ip_block);
+
+ return r;
+}
+
+/**
+ * vcn_v4_0_3_mc_resume - memory controller programming
+ *
+ * @vinst: VCN instance
+ *
+ * Let the VCN memory controller know it's offsets
+ */
+static void vcn_v4_0_3_mc_resume(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t offset, size, vcn_inst;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(
+ VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx]
+ .tmr_mc_addr_lo));
+ WREG32_SOC15(
+ VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx]
+ .tmr_mc_addr_hi));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr));
+ offset = size;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0,
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE0, size);
+
+ /* cache window 1: stack */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET1, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE1,
+ AMDGPU_VCN_STACK_SIZE);
+
+ /* cache window 2: context */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET2, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE2,
+ AMDGPU_VCN_CONTEXT_SIZE);
+
+ /* non-cache window */
+ WREG32_SOC15(
+ VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr));
+ WREG32_SOC15(
+ VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_OFFSET0, 0);
+ WREG32_SOC15(
+ VCN, vcn_inst, regUVD_VCPU_NONCACHE_SIZE0,
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
+}
+
+/**
+ * vcn_v4_0_3_mc_resume_dpg_mode - memory controller programming for dpg mode
+ *
+ * @vinst: VCN instance
+ * @indirect: indirectly write sram
+ *
+ * Let the VCN memory controller know it's offsets with dpg mode
+ */
+static void vcn_v4_0_3_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t offset, size;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN +
+ inst_idx].tmr_mc_addr_lo), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN +
+ inst_idx].tmr_mc_addr_hi), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ }
+ offset = 0;
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ offset = size;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
+ }
+
+ if (!indirect)
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
+ else
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
+
+ /* cache window 1: stack */
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ }
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
+
+ /* cache window 2: context */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
+
+ /* non-cache window */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)), 0, indirect);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+}
+
+/**
+ * vcn_v4_0_3_disable_clock_gating - disable VCN clock gating
+ *
+ * @vinst: VCN instance
+ *
+ * Disable clock gating for VCN block
+ */
+static void vcn_v4_0_3_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t data;
+ int vcn_inst;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ return;
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+
+ /* VCN disable CGC */
+ data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL);
+ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_GATE);
+ data &= ~(UVD_CGC_GATE__SYS_MASK
+ | UVD_CGC_GATE__MPEG2_MASK
+ | UVD_CGC_GATE__REGS_MASK
+ | UVD_CGC_GATE__RBC_MASK
+ | UVD_CGC_GATE__LMI_MC_MASK
+ | UVD_CGC_GATE__LMI_UMC_MASK
+ | UVD_CGC_GATE__MPC_MASK
+ | UVD_CGC_GATE__LBSI_MASK
+ | UVD_CGC_GATE__LRBBM_MASK
+ | UVD_CGC_GATE__WCB_MASK
+ | UVD_CGC_GATE__VCPU_MASK
+ | UVD_CGC_GATE__MMSCH_MASK);
+
+ WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_GATE, data);
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_CGC_GATE, 0, 0xFFFFFFFF);
+
+ data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL);
+ data &= ~(UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK
+ | UVD_CGC_CTRL__MMSCH_MODE_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_GATE);
+ data |= (UVD_SUVD_CGC_GATE__SRE_MASK
+ | UVD_SUVD_CGC_GATE__SIT_MASK
+ | UVD_SUVD_CGC_GATE__SMP_MASK
+ | UVD_SUVD_CGC_GATE__SCM_MASK
+ | UVD_SUVD_CGC_GATE__SDB_MASK
+ | UVD_SUVD_CGC_GATE__SRE_H264_MASK
+ | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_H264_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCM_H264_MASK
+ | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_H264_MASK
+ | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__ENT_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SITE_MASK
+ | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
+ | UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_GATE, data);
+
+ data = RREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL);
+ data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL, data);
+}
+
+/**
+ * vcn_v4_0_3_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode
+ *
+ * @vinst: VCN instance
+ * @sram_sel: sram select
+ * @indirect: indirectly write sram
+ *
+ * Disable clock gating for VCN block with dpg mode
+ */
+static void vcn_v4_0_3_disable_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ uint8_t sram_sel,
+ uint8_t indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t reg_data = 0;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ return;
+
+ /* enable sw clock gating control */
+ reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ reg_data &= ~(UVD_CGC_CTRL__SYS_MODE_MASK |
+ UVD_CGC_CTRL__MPEG2_MODE_MASK |
+ UVD_CGC_CTRL__REGS_MODE_MASK |
+ UVD_CGC_CTRL__RBC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_MC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
+ UVD_CGC_CTRL__IDCT_MODE_MASK |
+ UVD_CGC_CTRL__MPRD_MODE_MASK |
+ UVD_CGC_CTRL__MPC_MODE_MASK |
+ UVD_CGC_CTRL__LBSI_MODE_MASK |
+ UVD_CGC_CTRL__LRBBM_MODE_MASK |
+ UVD_CGC_CTRL__WCB_MODE_MASK |
+ UVD_CGC_CTRL__VCPU_MODE_MASK);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_CGC_CTRL), reg_data, sram_sel, indirect);
+
+ /* turn off clock gating */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_CGC_GATE), 0, sram_sel, indirect);
+
+ /* turn on SUVD clock gating */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
+
+ /* turn on sw mode in UVD_SUVD_CGC_CTRL */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
+}
+
+/**
+ * vcn_v4_0_3_enable_clock_gating - enable VCN clock gating
+ *
+ * @vinst: VCN instance
+ *
+ * Enable clock gating for VCN block
+ */
+static void vcn_v4_0_3_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t data;
+ int vcn_inst;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ return;
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+
+ /* enable VCN CGC */
+ data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL);
+ data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL);
+ data |= (UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL);
+ data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL, data);
+}
+
+/**
+ * vcn_v4_0_3_start_dpg_mode - VCN start with dpg mode
+ *
+ * @vinst: VCN instance
+ * @indirect: indirectly write sram
+ *
+ * Start VCN block with dpg mode
+ */
+static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ struct amdgpu_vcn4_fw_shared *fw_shared =
+ adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ struct amdgpu_ring *ring;
+ int vcn_inst, ret;
+ uint32_t tmp;
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+ /* disable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 1,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ /* enable dynamic power gating mode */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS);
+ tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
+ tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS, tmp);
+
+ if (indirect) {
+ DRM_DEV_DEBUG(adev->dev, "VCN %d start: on AID %d",
+ inst_idx, adev->vcn.inst[inst_idx].aid_id);
+ adev->vcn.inst[inst_idx].dpg_sram_curr_addr =
+ (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
+ /* Use dummy register 0xDEADBEEF passing AID selection to PSP FW */
+ WREG32_SOC15_DPG_MODE(inst_idx, 0xDEADBEEF,
+ adev->vcn.inst[inst_idx].aid_id, 0, true);
+ }
+
+ /* enable clock gating */
+ vcn_v4_0_3_disable_clock_gating_dpg_mode(vinst, 0, indirect);
+
+ /* enable VCPU clock */
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ tmp |= UVD_VCPU_CNTL__BLK_RST_MASK;
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* disable master interrupt */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MASTINT_EN), 0, 0, indirect);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__CRC_RESET_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ 0x00100000L);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_CTRL), tmp, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MPC_CNTL),
+ 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MPC_SET_MUXA0),
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MPC_SET_MUXB0),
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MPC_SET_MUX),
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
+
+ vcn_v4_0_3_mc_resume_dpg_mode(vinst, indirect);
+
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* enable LMI MC and UMC channels */
+ tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_CTRL2), tmp, 0, indirect);
+
+ vcn_v4_0_3_enable_ras(adev, inst_idx, indirect);
+
+ /* enable master interrupt */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
+
+ if (indirect) {
+ ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM);
+ if (ret) {
+ dev_err(adev->dev, "vcn sram load failed %d\n", ret);
+ return ret;
+ }
+ }
+
+ ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+
+ /* program the RB_BASE for ring buffer */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI,
+ upper_32_bits(ring->gpu_addr));
+
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE,
+ ring->ring_size / sizeof(uint32_t));
+
+ /* resetting ring, fw should not check RB ring */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB_EN_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
+ ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB_EN_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ /*resetting done, fw can check RB ring */
+ fw_shared->sq.queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS);
+
+ return 0;
+}
+
+static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev)
+{
+ int i, vcn_inst;
+ struct amdgpu_ring *ring_enc;
+ uint64_t cache_addr;
+ uint64_t rb_enc_addr;
+ uint64_t ctx_addr;
+ uint32_t param, resp, expected;
+ uint32_t offset, cache_size;
+ uint32_t tmp, timeout;
+
+ struct amdgpu_mm_table *table = &adev->virt.mm_table;
+ uint32_t *table_loc;
+ uint32_t table_size;
+ uint32_t size, size_dw;
+ uint32_t init_status;
+ uint32_t enabled_vcn;
+
+ struct mmsch_v4_0_cmd_direct_write
+ direct_wt = { {0} };
+ struct mmsch_v4_0_cmd_direct_read_modify_write
+ direct_rd_mod_wt = { {0} };
+ struct mmsch_v4_0_cmd_end end = { {0} };
+ struct mmsch_v4_0_3_init_header header;
+
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_fw_shared_rb_setup *rb_setup;
+
+ direct_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_WRITE;
+ direct_rd_mod_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
+ end.cmd_header.command_type = MMSCH_COMMAND__END;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ vcn_inst = GET_INST(VCN, i);
+
+ vcn_v4_0_3_fw_shared_init(adev, vcn_inst);
+
+ memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header));
+ header.version = MMSCH_VERSION;
+ header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2;
+
+ table_loc = (uint32_t *)table->cpu_addr;
+ table_loc += header.total_size;
+
+ table_size = 0;
+
+ MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS),
+ ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
+
+ cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
+
+ offset = 0;
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET0), 0);
+ } else {
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[i].gpu_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[i].gpu_addr));
+ offset = cache_size;
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_SIZE0),
+ cache_size);
+
+ cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset;
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(cache_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET1), 0);
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE);
+
+ cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE;
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(cache_addr));
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET2), 0);
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE);
+
+ fw_shared = adev->vcn.inst[vcn_inst].fw_shared.cpu_addr;
+ rb_setup = &fw_shared->rb_setup;
+
+ ring_enc = &adev->vcn.inst[vcn_inst].ring_enc[0];
+ ring_enc->wptr = 0;
+ rb_enc_addr = ring_enc->gpu_addr;
+
+ rb_setup->is_rb_enabled_flags |= RB_ENABLED;
+ rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
+ rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
+ rb_setup->rb_size = ring_enc->ring_size / 4;
+ fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
+ MMSCH_V4_0_INSERT_END();
+
+ header.vcn0.init_status = 0;
+ header.vcn0.table_offset = header.total_size;
+ header.vcn0.table_size = table_size;
+ header.total_size += table_size;
+
+ /* Send init table to mmsch */
+ size = sizeof(struct mmsch_v4_0_3_init_header);
+ table_loc = (uint32_t *)table->cpu_addr;
+ memcpy((void *)table_loc, &header, size);
+
+ ctx_addr = table->gpu_addr;
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID);
+ tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID, tmp);
+
+ size = header.total_size;
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_SIZE, size);
+
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP, 0);
+
+ param = 0x00000001;
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_HOST, param);
+ tmp = 0;
+ timeout = 1000;
+ resp = 0;
+ expected = MMSCH_VF_MAILBOX_RESP__OK;
+ while (resp != expected) {
+ resp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP);
+ if (resp != 0)
+ break;
+
+ udelay(10);
+ tmp = tmp + 10;
+ if (tmp >= timeout) {
+ DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
+ " waiting for regMMSCH_VF_MAILBOX_RESP "\
+ "(expected=0x%08x, readback=0x%08x)\n",
+ tmp, expected, resp);
+ return -EBUSY;
+ }
+ }
+
+ enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0;
+ init_status = ((struct mmsch_v4_0_3_init_header *)(table_loc))->vcn0.init_status;
+ if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
+ && init_status != MMSCH_VF_ENGINE_STATUS__PASS) {
+ DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\
+ "status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_start - VCN start
+ *
+ * @vinst: VCN instance
+ *
+ * Start VCN block
+ */
+static int vcn_v4_0_3_start(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_ring *ring;
+ int j, k, r, vcn_inst;
+ uint32_t tmp;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return vcn_v4_0_3_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram);
+
+ vcn_inst = GET_INST(VCN, i);
+ /* set VCN status busy */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS) |
+ UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, tmp);
+
+ /* SW clock gating */
+ vcn_v4_0_3_disable_clock_gating(vinst);
+
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK,
+ ~UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL,
+ tmp | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ /* setup regUVD_MPC_CNTL */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL);
+ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
+ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL, tmp);
+
+ /* setup UVD_MPC_SET_MUXA0 */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUXB0 */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUX */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
+
+ vcn_v4_0_3_mc_resume(vinst);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (j = 0; j < 10; ++j) {
+ uint32_t status;
+
+ for (k = 0; k < 100; ++k) {
+ status = RREG32_SOC15(VCN, vcn_inst,
+ regUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(10);
+ }
+ r = 0;
+ if (status & 2)
+ break;
+
+ DRM_DEV_ERROR(adev->dev,
+ "VCN decode not responding, trying to reset the VCPU!!!\n");
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst,
+ regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst,
+ regUVD_VCPU_CNTL),
+ 0, ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ mdelay(10);
+ r = -1;
+ }
+
+ if (r) {
+ DRM_DEV_ERROR(adev->dev, "VCN decode not responding, giving up!!!\n");
+ return r;
+ }
+
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+
+ /* program the RB_BASE for ring buffer */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI,
+ upper_32_bits(ring->gpu_addr));
+
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE,
+ ring->ring_size / sizeof(uint32_t));
+
+ /* resetting ring, fw should not check RB ring */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB_EN_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB_EN_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+
+ ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+ fw_shared->sq.queue_mode &=
+ cpu_to_le32(~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF));
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_stop_dpg_mode - VCN stop with dpg mode
+ *
+ * @vinst: VCN instance
+ *
+ * Stop VCN block with dpg mode
+ */
+static int vcn_v4_0_3_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t tmp;
+ int vcn_inst;
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+
+ /* Wait for power status to be 1 */
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* wait for read ptr to be equal to write ptr */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_RB_RPTR, tmp, 0xFFFFFFFF);
+
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* disable dynamic power gating mode */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS);
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_stop - VCN stop
+ *
+ * @vinst: VCN instance
+ *
+ * Stop VCN block
+ */
+static int vcn_v4_0_3_stop(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+ int r = 0, vcn_inst;
+ uint32_t tmp;
+
+ vcn_inst = GET_INST(VCN, i);
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ vcn_v4_0_3_stop_dpg_mode(vinst);
+ goto Done;
+ }
+
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_STATUS,
+ UVD_STATUS__IDLE, 0x7);
+ if (r)
+ goto Done;
+
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp,
+ tmp);
+ if (r)
+ goto Done;
+
+ /* stall UMC channel */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp,
+ tmp);
+ if (r)
+ goto Done;
+
+ /* Unblock VCPU Register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+
+ /* reset LMI UMC/LMI/VCPU */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+
+ /* clear VCN status */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0);
+
+ /* apply HW clock gating */
+ vcn_v4_0_3_enable_clock_gating(vinst);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS);
+
+Done:
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_pause_dpg_mode - VCN pause with dpg mode
+ *
+ * @vinst: VCN instance
+ * @new_state: pause state
+ *
+ * Pause dpg mode for VCN block
+ */
+static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
+{
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_unified_ring_get_rptr - get unified read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified read pointer
+ */
+static uint64_t vcn_v4_0_3_unified_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_RPTR);
+}
+
+/**
+ * vcn_v4_0_3_unified_ring_get_wptr - get unified write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified write pointer
+ */
+static uint64_t vcn_v4_0_3_unified_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(VCN, GET_INST(VCN, ring->me),
+ regUVD_RB_WPTR);
+}
+
+void vcn_v4_0_3_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ /* Use normalized offsets when required */
+ if (vcn_v4_0_3_normalizn_reqd(ring->adev))
+ reg = NORMALIZE_VCN_REG_OFFSET(reg);
+
+ amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT);
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, val);
+}
+
+void vcn_v4_0_3_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val)
+{
+ /* Use normalized offsets when required */
+ if (vcn_v4_0_3_normalizn_reqd(ring->adev))
+ reg = NORMALIZE_VCN_REG_OFFSET(reg);
+
+ amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE);
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, val);
+}
+
+void vcn_v4_0_3_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for reg writes */
+ vcn_v4_0_3_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
+ vmid * hub->ctx_addr_distance,
+ lower_32_bits(pd_addr), 0xffffffff);
+}
+
+void vcn_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ /* VCN engine access for HDP flush doesn't work when RRMT is enabled.
+ * This is a workaround to avoid any HDP flush through VCN ring.
+ */
+}
+
+/**
+ * vcn_v4_0_3_unified_ring_set_wptr - set enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the enc write pointer to the hardware
+ */
+static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR,
+ lower_32_bits(ring->wptr));
+ }
+}
+
+static int vcn_v4_0_3_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ int r = 0;
+ int vcn_inst;
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ vcn_inst = GET_INST(VCN, ring->me);
+ r = amdgpu_dpm_reset_vcn(adev, 1 << vcn_inst);
+
+ if (r) {
+ DRM_DEV_ERROR(adev->dev, "VCN reset fail : %d\n", r);
+ return r;
+ }
+
+ /* This flag is not set for VF, assumed to be disabled always */
+ if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100)
+ adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED);
+ vcn_v4_0_3_hw_init_inst(vinst);
+ vcn_v4_0_3_start_dpg_mode(vinst, adev->vcn.inst[ring->me].indirect_sram);
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_ENC,
+ .align_mask = 0x3f,
+ .nop = VCN_ENC_CMD_NO_OP,
+ .get_rptr = vcn_v4_0_3_unified_ring_get_rptr,
+ .get_wptr = vcn_v4_0_3_unified_ring_get_wptr,
+ .set_wptr = vcn_v4_0_3_unified_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
+ 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
+ 1, /* vcn_v2_0_enc_ring_insert_end */
+ .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
+ .emit_ib = vcn_v2_0_enc_ring_emit_ib,
+ .emit_fence = vcn_v2_0_enc_ring_emit_fence,
+ .emit_vm_flush = vcn_v4_0_3_enc_ring_emit_vm_flush,
+ .emit_hdp_flush = vcn_v4_0_3_ring_emit_hdp_flush,
+ .test_ring = amdgpu_vcn_enc_ring_test_ring,
+ .test_ib = amdgpu_vcn_unified_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .insert_end = vcn_v2_0_enc_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg,
+ .emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = vcn_v4_0_3_ring_reset,
+};
+
+/**
+ * vcn_v4_0_3_set_unified_ring_funcs - set unified ring functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set unified ring functions
+ */
+static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, vcn_inst;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v4_0_3_unified_ring_vm_funcs;
+ adev->vcn.inst[i].ring_enc[0].me = i;
+ vcn_inst = GET_INST(VCN, i);
+ adev->vcn.inst[i].aid_id =
+ vcn_inst / adev->vcn.num_inst_per_aid;
+ }
+}
+
+/**
+ * vcn_v4_0_3_is_idle - check VCN block is idle
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block structure
+ *
+ * Check whether VCN block is idle
+ */
+static bool vcn_v4_0_3_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, ret = 1;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ret &= (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) ==
+ UVD_STATUS__IDLE);
+ }
+
+ return ret;
+}
+
+/**
+ * vcn_v4_0_3_wait_for_idle - wait for VCN block idle
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Wait for VCN block idle
+ */
+static int vcn_v4_0_3_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, ret = 0;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ret = SOC15_WAIT_ON_RREG(VCN, GET_INST(VCN, i), regUVD_STATUS,
+ UVD_STATUS__IDLE, UVD_STATUS__IDLE);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+/* vcn_v4_0_3_set_clockgating_state - set VCN block clockgating state
+ *
+ * @ip_block: amdgpu_ip_block pointer
+ * @state: clock gating state
+ *
+ * Set VCN block clockgating state
+ */
+static int vcn_v4_0_3_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ if (enable) {
+ if (RREG32_SOC15(VCN, GET_INST(VCN, i),
+ regUVD_STATUS) != UVD_STATUS__IDLE)
+ return -EBUSY;
+ vcn_v4_0_3_enable_clock_gating(vinst);
+ } else {
+ vcn_v4_0_3_disable_clock_gating(vinst);
+ }
+ }
+ return 0;
+}
+
+static int vcn_v4_0_3_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int ret = 0;
+
+ /* for SRIOV, guest should not control VCN Power-gating
+ * MMSCH FW should control Power-gating and clock-gating
+ * guest should avoid touching CGC and PG
+ */
+ if (amdgpu_sriov_vf(adev)) {
+ vinst->cur_state = AMD_PG_STATE_UNGATE;
+ return 0;
+ }
+
+ if (state == vinst->cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = vcn_v4_0_3_stop(vinst);
+ else
+ ret = vcn_v4_0_3_start(vinst);
+
+ if (!ret)
+ vinst->cur_state = state;
+
+ return ret;
+}
+
+/**
+ * vcn_v4_0_3_set_interrupt_state - set VCN block interrupt state
+ *
+ * @adev: amdgpu_device pointer
+ * @source: interrupt sources
+ * @type: interrupt types
+ * @state: interrupt states
+ *
+ * Set VCN block interrupt state
+ */
+static int vcn_v4_0_3_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_process_interrupt - process VCN block interrupt
+ *
+ * @adev: amdgpu_device pointer
+ * @source: interrupt sources
+ * @entry: interrupt entry from clients and sources
+ *
+ * Process VCN block interrupt
+ */
+static int vcn_v4_0_3_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t i, inst;
+
+ i = node_id_to_phys_map[entry->node_id];
+
+ DRM_DEV_DEBUG(adev->dev, "IH: VCN TRAP\n");
+
+ for (inst = 0; inst < adev->vcn.num_vcn_inst; ++inst)
+ if (adev->vcn.inst[inst].aid_id == i)
+ break;
+
+ if (inst >= adev->vcn.num_vcn_inst) {
+ dev_WARN_ONCE(adev->dev, 1,
+ "Interrupt received for unknown VCN instance %d",
+ entry->node_id);
+ return 0;
+ }
+
+ switch (entry->src_id) {
+ case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
+ amdgpu_fence_process(&adev->vcn.inst[inst].ring_enc[0]);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static int vcn_v4_0_3_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs vcn_v4_0_3_irq_funcs = {
+ .set = vcn_v4_0_3_set_interrupt_state,
+ .process = vcn_v4_0_3_process_interrupt,
+};
+
+static const struct amdgpu_irq_src_funcs vcn_v4_0_3_ras_irq_funcs = {
+ .set = vcn_v4_0_3_set_ras_interrupt_state,
+ .process = amdgpu_vcn_process_poison_irq,
+};
+
+/**
+ * vcn_v4_0_3_set_irq_funcs - set VCN block interrupt irq functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set VCN block interrupt irq functions
+ */
+static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst->irq.num_types++;
+ }
+ adev->vcn.inst->irq.funcs = &vcn_v4_0_3_irq_funcs;
+
+ adev->vcn.inst->ras_poison_irq.num_types = 1;
+ adev->vcn.inst->ras_poison_irq.funcs = &vcn_v4_0_3_ras_irq_funcs;
+}
+
+static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = {
+ .name = "vcn_v4_0_3",
+ .early_init = vcn_v4_0_3_early_init,
+ .late_init = vcn_v4_0_3_late_init,
+ .sw_init = vcn_v4_0_3_sw_init,
+ .sw_fini = vcn_v4_0_3_sw_fini,
+ .hw_init = vcn_v4_0_3_hw_init,
+ .hw_fini = vcn_v4_0_3_hw_fini,
+ .suspend = vcn_v4_0_3_suspend,
+ .resume = vcn_v4_0_3_resume,
+ .is_idle = vcn_v4_0_3_is_idle,
+ .wait_for_idle = vcn_v4_0_3_wait_for_idle,
+ .set_clockgating_state = vcn_v4_0_3_set_clockgating_state,
+ .set_powergating_state = vcn_set_powergating_state,
+ .dump_ip_state = amdgpu_vcn_dump_ip_state,
+ .print_ip_state = amdgpu_vcn_print_ip_state,
+};
+
+const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_VCN,
+ .major = 4,
+ .minor = 0,
+ .rev = 3,
+ .funcs = &vcn_v4_0_3_ip_funcs,
+};
+
+static const struct amdgpu_ras_err_status_reg_entry vcn_v4_0_3_ue_reg_list[] = {
+ {AMDGPU_RAS_REG_ENTRY(VCN, 0, regVCN_UE_ERR_STATUS_LO_VIDD, regVCN_UE_ERR_STATUS_HI_VIDD),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "VIDD"},
+ {AMDGPU_RAS_REG_ENTRY(VCN, 0, regVCN_UE_ERR_STATUS_LO_VIDV, regVCN_UE_ERR_STATUS_HI_VIDV),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "VIDV"},
+};
+
+static void vcn_v4_0_3_inst_query_ras_error_count(struct amdgpu_device *adev,
+ uint32_t vcn_inst,
+ void *ras_err_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status;
+
+ /* vcn v4_0_3 only support query uncorrectable errors */
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ vcn_v4_0_3_ue_reg_list,
+ ARRAY_SIZE(vcn_v4_0_3_ue_reg_list),
+ NULL, 0, GET_INST(VCN, vcn_inst),
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ &err_data->ue_count);
+}
+
+static void vcn_v4_0_3_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_err_status)
+{
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) {
+ dev_warn(adev->dev, "VCN RAS is not supported\n");
+ return;
+ }
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ vcn_v4_0_3_inst_query_ras_error_count(adev, i, ras_err_status);
+}
+
+static void vcn_v4_0_3_inst_reset_ras_error_count(struct amdgpu_device *adev,
+ uint32_t vcn_inst)
+{
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ vcn_v4_0_3_ue_reg_list,
+ ARRAY_SIZE(vcn_v4_0_3_ue_reg_list),
+ GET_INST(VCN, vcn_inst));
+}
+
+static void vcn_v4_0_3_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) {
+ dev_warn(adev->dev, "VCN RAS is not supported\n");
+ return;
+ }
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ vcn_v4_0_3_inst_reset_ras_error_count(adev, i);
+}
+
+static uint32_t vcn_v4_0_3_query_poison_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, uint32_t sub_block)
+{
+ uint32_t poison_stat = 0, reg_value = 0;
+
+ switch (sub_block) {
+ case AMDGPU_VCN_V4_0_3_VCPU_VCODEC:
+ reg_value = RREG32_SOC15(VCN, instance, regUVD_RAS_VCPU_VCODEC_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_VCPU_VCODEC_STATUS, POISONED_PF);
+ break;
+ default:
+ break;
+ }
+
+ if (poison_stat)
+ dev_info(adev->dev, "Poison detected in VCN%d, sub_block%d\n",
+ instance, sub_block);
+
+ return poison_stat;
+}
+
+static bool vcn_v4_0_3_query_poison_status(struct amdgpu_device *adev)
+{
+ uint32_t inst, sub;
+ uint32_t poison_stat = 0;
+
+ for (inst = 0; inst < adev->vcn.num_vcn_inst; inst++)
+ for (sub = 0; sub < AMDGPU_VCN_V4_0_3_MAX_SUB_BLOCK; sub++)
+ poison_stat +=
+ vcn_v4_0_3_query_poison_by_instance(adev, inst, sub);
+
+ return !!poison_stat;
+}
+
+static const struct amdgpu_ras_block_hw_ops vcn_v4_0_3_ras_hw_ops = {
+ .query_ras_error_count = vcn_v4_0_3_query_ras_error_count,
+ .reset_ras_error_count = vcn_v4_0_3_reset_ras_error_count,
+ .query_poison_status = vcn_v4_0_3_query_poison_status,
+};
+
+static int vcn_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct aca_bank_info info;
+ u64 misc0;
+ int ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
+ 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+/* reference to smu driver if header file */
+static int vcn_v4_0_3_err_codes[] = {
+ 14, 15, /* VCN */
+};
+
+static bool vcn_v4_0_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ u32 instlo;
+
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+
+ if (instlo != mmSMNAID_AID0_MCA_SMU)
+ return false;
+
+ if (aca_bank_check_error_codes(handle->adev, bank,
+ vcn_v4_0_3_err_codes,
+ ARRAY_SIZE(vcn_v4_0_3_err_codes)))
+ return false;
+
+ return true;
+}
+
+static const struct aca_bank_ops vcn_v4_0_3_aca_bank_ops = {
+ .aca_bank_parser = vcn_v4_0_3_aca_bank_parser,
+ .aca_bank_is_valid = vcn_v4_0_3_aca_bank_is_valid,
+};
+
+static const struct aca_info vcn_v4_0_3_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK,
+ .bank_ops = &vcn_v4_0_3_aca_bank_ops,
+};
+
+static int vcn_v4_0_3_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ if (amdgpu_ras_is_supported(adev, ras_block->block) &&
+ adev->vcn.inst->ras_poison_irq.funcs) {
+ r = amdgpu_irq_get(adev, &adev->vcn.inst->ras_poison_irq, 0);
+ if (r)
+ goto late_fini;
+ }
+
+ r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__VCN,
+ &vcn_v4_0_3_aca_info, NULL);
+ if (r)
+ goto late_fini;
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+
+ return r;
+}
+
+static struct amdgpu_vcn_ras vcn_v4_0_3_ras = {
+ .ras_block = {
+ .hw_ops = &vcn_v4_0_3_ras_hw_ops,
+ .ras_late_init = vcn_v4_0_3_ras_late_init,
+ },
+};
+
+static void vcn_v4_0_3_set_ras_funcs(struct amdgpu_device *adev)
+{
+ adev->vcn.ras = &vcn_v4_0_3_ras;
+}
+
+static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev,
+ int inst_idx, bool indirect)
+{
+ uint32_t tmp;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
+ return;
+
+ tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx,
+ SOC15_DPG_MODE_OFFSET(VCN, 0, regVCN_RAS_CNTL),
+ tmp, 0, indirect);
+
+ tmp = UVD_VCPU_INT_EN2__RASCNTL_VCPU_VCODEC_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx,
+ SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_VCPU_INT_EN2),
+ tmp, 0, indirect);
+
+ tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx,
+ SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_SYS_INT_EN),
+ tmp, 0, indirect);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h
new file mode 100644
index 000000000000..aeab89853a92
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCN_V4_0_3_H__
+#define __VCN_V4_0_3_H__
+
+enum amdgpu_vcn_v4_0_3_sub_block {
+ AMDGPU_VCN_V4_0_3_VCPU_VCODEC = 0,
+
+ AMDGPU_VCN_V4_0_3_MAX_SUB_BLOCK,
+};
+
+extern const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block;
+
+void vcn_v4_0_3_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask);
+
+void vcn_v4_0_3_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val);
+void vcn_v4_0_3_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr);
+void vcn_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring);
+
+#endif /* __VCN_V4_0_3_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
new file mode 100644
index 000000000000..b107ee80e472
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
@@ -0,0 +1,1723 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_vcn.h"
+#include "amdgpu_pm.h"
+#include "amdgpu_cs.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "soc15_hw_ip.h"
+#include "vcn_v2_0.h"
+#include "mmsch_v4_0.h"
+#include "vcn_v4_0_5.h"
+
+#include "vcn/vcn_4_0_5_offset.h"
+#include "vcn/vcn_4_0_5_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
+
+#include <drm/drm_drv.h>
+
+#define mmUVD_DPG_LMA_CTL regUVD_DPG_LMA_CTL
+#define mmUVD_DPG_LMA_CTL_BASE_IDX regUVD_DPG_LMA_CTL_BASE_IDX
+#define mmUVD_DPG_LMA_DATA regUVD_DPG_LMA_DATA
+#define mmUVD_DPG_LMA_DATA_BASE_IDX regUVD_DPG_LMA_DATA_BASE_IDX
+
+#define VCN_VID_SOC_ADDRESS_2_0 0x1fb00
+#define VCN1_VID_SOC_ADDRESS_3_0 (0x48300 + 0x38000)
+#define VCN1_AON_SOC_ADDRESS_3_0 (0x48000 + 0x38000)
+
+#define VCN_HARVEST_MMSCH 0
+
+#define RDECODE_MSG_CREATE 0x00000000
+#define RDECODE_MESSAGE_CREATE 0x00000001
+
+static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_5[] = {
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_CONFIG),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE)
+};
+
+static int amdgpu_ih_clientid_vcns[] = {
+ SOC15_IH_CLIENTID_VCN,
+ SOC15_IH_CLIENTID_VCN1
+};
+
+static void vcn_v4_0_5_set_unified_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v4_0_5_set_irq_funcs(struct amdgpu_device *adev);
+static int vcn_v4_0_5_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static int vcn_v4_0_5_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
+static void vcn_v4_0_5_unified_ring_set_wptr(struct amdgpu_ring *ring);
+
+/**
+ * vcn_v4_0_5_early_init - set function pointers and load microcode
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ * Load microcode from filesystem
+ */
+static int vcn_v4_0_5_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
+
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 6))
+ adev->vcn.per_inst_fw = true;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ /* re-use enc ring as unified ring */
+ adev->vcn.inst[i].num_enc_rings = 1;
+ vcn_v4_0_5_set_unified_ring_funcs(adev);
+ vcn_v4_0_5_set_irq_funcs(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst[i].set_pg_state = vcn_v4_0_5_set_pg_state;
+
+ r = amdgpu_vcn_early_init(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_5_sw_init - sw init for VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int vcn_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_ring *ring;
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ r = amdgpu_vcn_sw_init(adev, i);
+ if (r)
+ return r;
+
+ amdgpu_vcn_setup_ucode(adev, i);
+
+ r = amdgpu_vcn_resume(adev, i);
+ if (r)
+ return r;
+
+ atomic_set(&adev->vcn.inst[i].sched_score, 0);
+
+ /* VCN UNIFIED TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
+ VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq);
+ if (r)
+ return r;
+
+ /* VCN POISON TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
+ VCN_4_0__SRCID_UVD_POISON, &adev->vcn.inst[i].irq);
+ if (r)
+ return r;
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->use_doorbell = true;
+ if (amdgpu_sriov_vf(adev))
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ i * (adev->vcn.inst[i].num_enc_rings + 1) + 1;
+ else
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 2 + 8 * i;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+ sprintf(ring->name, "vcn_unified_%d", i);
+
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
+ AMDGPU_RING_PRIO_0, &adev->vcn.inst[i].sched_score);
+ if (r)
+ return r;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
+ fw_shared->sq.is_enabled = 1;
+
+ fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG);
+ fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ?
+ AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU;
+
+ if (amdgpu_sriov_vf(adev))
+ fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
+
+ fw_shared->present_flag_0 |= AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT;
+ fw_shared->drm_key_wa.method =
+ AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING;
+
+ if (amdgpu_vcnfw_log)
+ amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.inst[i].pause_dpg_mode = vcn_v4_0_5_pause_dpg_mode;
+ }
+
+ adev->vcn.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+ if (!amdgpu_sriov_vf(adev))
+ adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+
+ r = amdgpu_vcn_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = amdgpu_virt_alloc_mm_table(adev);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_4_0_5, ARRAY_SIZE(vcn_reg_list_4_0_5));
+
+ return r;
+}
+
+/**
+ * vcn_v4_0_5_sw_fini - sw fini for VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * VCN suspend and free up sw allocation
+ */
+static int vcn_v4_0_5_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r, idx;
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = 0;
+ fw_shared->sq.is_enabled = 0;
+ }
+
+ drm_dev_exit(idx);
+ }
+
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_free_mm_table(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(adev, i);
+ if (r)
+ return r;
+
+ amdgpu_vcn_sw_fini(adev, i);
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_5_hw_init - start and test VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int vcn_v4_0_5_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_5_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the VCN block, mark ring as not ready any more
+ */
+static int vcn_v4_0_5_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ cancel_delayed_work_sync(&vinst->idle_work);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
+ (vinst->cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, i, regUVD_STATUS))) {
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
+ }
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_5_suspend - suspend VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend VCN block
+ */
+static int vcn_v4_0_5_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
+
+ r = vcn_v4_0_5_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(ip_block->adev, i);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+/**
+ * vcn_v4_0_5_resume - resume VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init VCN block
+ */
+static int vcn_v4_0_5_resume(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_resume(ip_block->adev, i);
+ if (r)
+ return r;
+ }
+
+ r = vcn_v4_0_5_hw_init(ip_block);
+
+ return r;
+}
+
+/**
+ * vcn_v4_0_5_mc_resume - memory controller programming
+ *
+ * @vinst: VCN instance
+ *
+ * Let the VCN memory controller know it's offsets
+ */
+static void vcn_v4_0_5_mc_resume(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t offset, size;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi));
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET0, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr));
+ offset = size;
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE0, size);
+
+ /* cache window 1: stack */
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET1, 0);
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
+
+ /* cache window 2: context */
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET2, 0);
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
+
+ /* non-cache window */
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_OFFSET0, 0);
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_SIZE0,
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
+}
+
+/**
+ * vcn_v4_0_5_mc_resume_dpg_mode - memory controller programming for dpg mode
+ *
+ * @vinst: VCN instance
+ * @indirect: indirectly write sram
+ *
+ * Let the VCN memory controller know it's offsets with dpg mode
+ */
+static void vcn_v4_0_5_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t offset, size;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo),
+ 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi),
+ 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ }
+ offset = 0;
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ offset = size;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
+ }
+
+ if (!indirect)
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
+ else
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
+
+ /* cache window 1: stack */
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ }
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
+
+ /* cache window 2: context */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE),
+ 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE),
+ 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
+
+ /* non-cache window */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)), 0, indirect);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_GFX10_ADDR_CONFIG),
+ adev->gfx.config.gb_addr_config, 0, indirect);
+}
+
+/**
+ * vcn_v4_0_5_disable_static_power_gating - disable VCN static power gating
+ *
+ * @vinst: VCN instance
+ *
+ * Disable static power gating for VCN block
+ */
+static void vcn_v4_0_5_disable_static_power_gating(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t data = 0;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG,
+ 1 << UVD_IPX_DLDO_CONFIG__ONO2_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0,
+ UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG,
+ 2 << UVD_IPX_DLDO_CONFIG__ONO3_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG,
+ 2 << UVD_IPX_DLDO_CONFIG__ONO4_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG,
+ 2 << UVD_IPX_DLDO_CONFIG__ONO5_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS_MASK);
+ } else {
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG,
+ 1 << UVD_IPX_DLDO_CONFIG__ONO2_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 0, UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG,
+ 1 << UVD_IPX_DLDO_CONFIG__ONO3_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 0, UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG,
+ 1 << UVD_IPX_DLDO_CONFIG__ONO4_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 0, UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG,
+ 1 << UVD_IPX_DLDO_CONFIG__ONO5_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 0, UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS_MASK);
+ }
+
+ data = RREG32_SOC15(VCN, inst, regUVD_POWER_STATUS);
+ data &= ~0x103;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN)
+ data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON |
+ UVD_POWER_STATUS__UVD_PG_EN_MASK;
+ WREG32_SOC15(VCN, inst, regUVD_POWER_STATUS, data);
+}
+
+/**
+ * vcn_v4_0_5_enable_static_power_gating - enable VCN static power gating
+ *
+ * @vinst: VCN instance
+ *
+ * Enable static power gating for VCN block
+ */
+static void vcn_v4_0_5_enable_static_power_gating(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t data;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ /* Before power off, this indicator has to be turned on */
+ data = RREG32_SOC15(VCN, inst, regUVD_POWER_STATUS);
+ data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK;
+ data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
+ WREG32_SOC15(VCN, inst, regUVD_POWER_STATUS, data);
+
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG,
+ 2 << UVD_IPX_DLDO_CONFIG__ONO5_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG,
+ 2 << UVD_IPX_DLDO_CONFIG__ONO4_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG,
+ 2 << UVD_IPX_DLDO_CONFIG__ONO3_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG,
+ 2 << UVD_IPX_DLDO_CONFIG__ONO2_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS_MASK);
+ }
+}
+
+/**
+ * vcn_v4_0_5_disable_clock_gating - disable VCN clock gating
+ *
+ * @vinst: VCN instance
+ *
+ * Disable clock gating for VCN block
+ */
+static void vcn_v4_0_5_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t data;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ return;
+
+ /* VCN disable CGC */
+ data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
+ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, inst, regUVD_CGC_GATE);
+ data &= ~(UVD_CGC_GATE__SYS_MASK
+ | UVD_CGC_GATE__UDEC_MASK
+ | UVD_CGC_GATE__MPEG2_MASK
+ | UVD_CGC_GATE__REGS_MASK
+ | UVD_CGC_GATE__RBC_MASK
+ | UVD_CGC_GATE__LMI_MC_MASK
+ | UVD_CGC_GATE__LMI_UMC_MASK
+ | UVD_CGC_GATE__IDCT_MASK
+ | UVD_CGC_GATE__MPRD_MASK
+ | UVD_CGC_GATE__MPC_MASK
+ | UVD_CGC_GATE__LBSI_MASK
+ | UVD_CGC_GATE__LRBBM_MASK
+ | UVD_CGC_GATE__UDEC_RE_MASK
+ | UVD_CGC_GATE__UDEC_CM_MASK
+ | UVD_CGC_GATE__UDEC_IT_MASK
+ | UVD_CGC_GATE__UDEC_DB_MASK
+ | UVD_CGC_GATE__UDEC_MP_MASK
+ | UVD_CGC_GATE__WCB_MASK
+ | UVD_CGC_GATE__VCPU_MASK
+ | UVD_CGC_GATE__MMSCH_MASK);
+
+ WREG32_SOC15(VCN, inst, regUVD_CGC_GATE, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_CGC_GATE, 0, 0xFFFFFFFF);
+
+ data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
+ data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK
+ | UVD_CGC_CTRL__MMSCH_MODE_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_GATE);
+ data |= (UVD_SUVD_CGC_GATE__SRE_MASK
+ | UVD_SUVD_CGC_GATE__SIT_MASK
+ | UVD_SUVD_CGC_GATE__SMP_MASK
+ | UVD_SUVD_CGC_GATE__SCM_MASK
+ | UVD_SUVD_CGC_GATE__SDB_MASK
+ | UVD_SUVD_CGC_GATE__SRE_H264_MASK
+ | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_H264_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCM_H264_MASK
+ | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_H264_MASK
+ | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCLR_MASK
+ | UVD_SUVD_CGC_GATE__UVD_SC_MASK
+ | UVD_SUVD_CGC_GATE__ENT_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
+ | UVD_SUVD_CGC_GATE__SITE_MASK
+ | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
+ | UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_GATE, data);
+
+ data = RREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL);
+ data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL, data);
+}
+
+/**
+ * vcn_v4_0_5_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode
+ *
+ * @vinst: VCN instance
+ * @sram_sel: sram select
+ * @indirect: indirectly write sram
+ *
+ * Disable clock gating for VCN block with dpg mode
+ */
+static void vcn_v4_0_5_disable_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ uint8_t sram_sel,
+ uint8_t indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t reg_data = 0;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ return;
+
+ /* enable sw clock gating control */
+ reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
+ UVD_CGC_CTRL__SYS_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MODE_MASK |
+ UVD_CGC_CTRL__MPEG2_MODE_MASK |
+ UVD_CGC_CTRL__REGS_MODE_MASK |
+ UVD_CGC_CTRL__RBC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_MC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
+ UVD_CGC_CTRL__IDCT_MODE_MASK |
+ UVD_CGC_CTRL__MPRD_MODE_MASK |
+ UVD_CGC_CTRL__MPC_MODE_MASK |
+ UVD_CGC_CTRL__LBSI_MODE_MASK |
+ UVD_CGC_CTRL__LRBBM_MODE_MASK |
+ UVD_CGC_CTRL__WCB_MODE_MASK |
+ UVD_CGC_CTRL__VCPU_MODE_MASK);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_CGC_CTRL), reg_data, sram_sel, indirect);
+
+ /* turn off clock gating */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_CGC_GATE), 0, sram_sel, indirect);
+
+ /* turn on SUVD clock gating */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
+
+ /* turn on sw mode in UVD_SUVD_CGC_CTRL */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
+}
+
+/**
+ * vcn_v4_0_5_enable_clock_gating - enable VCN clock gating
+ *
+ * @vinst: VCN instance
+ *
+ * Enable clock gating for VCN block
+ */
+static void vcn_v4_0_5_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t data;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ return;
+
+ /* enable VCN CGC */
+ data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
+ data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
+ data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK
+ | UVD_CGC_CTRL__MMSCH_MODE_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL);
+ data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL, data);
+}
+
+/**
+ * vcn_v4_0_5_start_dpg_mode - VCN start with dpg mode
+ *
+ * @vinst: VCN instance
+ * @indirect: indirectly write sram
+ *
+ * Start VCN block with dpg mode
+ */
+static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ struct amdgpu_ring *ring;
+ uint32_t tmp;
+ int ret;
+
+ /* disable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ /* enable dynamic power gating mode */
+ tmp = RREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS);
+ tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
+ tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
+ WREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS, tmp);
+
+ if (indirect)
+ adev->vcn.inst[inst_idx].dpg_sram_curr_addr =
+ (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
+
+ /* enable clock gating */
+ vcn_v4_0_5_disable_clock_gating_dpg_mode(vinst, 0, indirect);
+
+ /* enable VCPU clock */
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK | UVD_VCPU_CNTL__BLK_RST_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* disable master interrupt */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MASTINT_EN), 0, 0, indirect);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__CRC_RESET_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ 0x00100000L);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_CTRL), tmp, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MPC_CNTL),
+ 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MPC_SET_MUXA0),
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MPC_SET_MUXB0),
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MPC_SET_MUX),
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
+
+ vcn_v4_0_5_mc_resume_dpg_mode(vinst, indirect);
+
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* enable LMI MC and UMC channels */
+ tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_CTRL2), tmp, 0, indirect);
+
+ /* enable master interrupt */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
+
+ if (indirect) {
+ ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, 0);
+ if (ret) {
+ dev_err(adev->dev, "vcn sram load failed %d\n", ret);
+ return ret;
+ }
+ }
+
+ ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4);
+
+ tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ /* Keeping one read-back to ensure all register writes are done, otherwise
+ * it may introduce race conditions */
+ RREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL);
+
+ return 0;
+}
+
+
+/**
+ * vcn_v4_0_5_start - VCN start
+ *
+ * @vinst: VCN instance
+ *
+ * Start VCN block
+ */
+static int vcn_v4_0_5_start(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_ring *ring;
+ uint32_t tmp;
+ int j, k, r;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_vcn(adev, true, i);
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return vcn_v4_0_5_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram);
+
+ /* disable VCN power gating */
+ vcn_v4_0_5_disable_static_power_gating(vinst);
+
+ /* set VCN status busy */
+ tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, i, regUVD_STATUS, tmp);
+
+ /* SW clock gating */
+ vcn_v4_0_5_disable_clock_gating(vinst);
+
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL);
+ WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ /* setup regUVD_MPC_CNTL */
+ tmp = RREG32_SOC15(VCN, i, regUVD_MPC_CNTL);
+ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
+ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
+ WREG32_SOC15(VCN, i, regUVD_MPC_CNTL, tmp);
+
+ /* setup UVD_MPC_SET_MUXA0 */
+ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUXB0 */
+ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUX */
+ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
+
+ vcn_v4_0_5_mc_resume(vinst);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (j = 0; j < 10; ++j) {
+ uint32_t status;
+
+ for (k = 0; k < 100; ++k) {
+ status = RREG32_SOC15(VCN, i, regUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(10);
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ }
+
+ if (amdgpu_emu_mode == 1) {
+ r = -1;
+ if (status & 2) {
+ r = 0;
+ break;
+ }
+ } else {
+ r = 0;
+ if (status & 2)
+ break;
+
+ dev_err(adev->dev,
+ "VCN[%d] is not responding, trying to reset VCPU!!!\n", i);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ mdelay(10);
+ r = -1;
+ }
+ }
+
+ if (r) {
+ dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i);
+ return r;
+ }
+
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4);
+
+ tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+ WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ /* Keeping one read-back to ensure all register writes are done, otherwise
+ * it may introduce race conditions */
+ RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_5_stop_dpg_mode - VCN stop with dpg mode
+ *
+ * @vinst: VCN instance
+ *
+ * Stop VCN block with dpg mode
+ */
+static void vcn_v4_0_5_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t tmp;
+
+ /* Wait for power status to be 1 */
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* wait for read ptr to be equal to write ptr */
+ tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR, tmp, 0xFFFFFFFF);
+
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* disable dynamic power gating mode */
+ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, inst_idx, regUVD_STATUS);
+}
+
+/**
+ * vcn_v4_0_5_stop - VCN stop
+ *
+ * @vinst: VCN instance
+ *
+ * Stop VCN block
+ */
+static int vcn_v4_0_5_stop(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+ uint32_t tmp;
+ int r = 0;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ vcn_v4_0_5_stop_dpg_mode(vinst);
+ r = 0;
+ goto done;
+ }
+
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7);
+ if (r)
+ goto done;
+
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
+
+ /* disable LMI UMC channel */
+ tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
+
+ /* block VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+
+ /* apply soft reset */
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+
+ /* clear status */
+ WREG32_SOC15(VCN, i, regUVD_STATUS, 0);
+
+ /* apply HW clock gating */
+ vcn_v4_0_5_enable_clock_gating(vinst);
+
+ /* enable VCN power gating */
+ vcn_v4_0_5_enable_static_power_gating(vinst);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, i, regUVD_STATUS);
+
+done:
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_vcn(adev, false, i);
+
+ return r;
+}
+
+/**
+ * vcn_v4_0_5_pause_dpg_mode - VCN pause with dpg mode
+ *
+ * @vinst: VCN instance
+ * @new_state: pause state
+ *
+ * Pause dpg mode for VCN block
+ */
+static int vcn_v4_0_5_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t reg_data = 0;
+ int ret_code;
+
+ /* pause/unpause if state is changed */
+ if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
+ DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d",
+ adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based);
+ reg_data = RREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE) &
+ (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
+ ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 0x1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ if (!ret_code) {
+ /* pause DPG */
+ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data);
+
+ /* wait for ACK */
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_DPG_PAUSE,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS,
+ UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ }
+ } else {
+ /* unpause dpg, no need to wait */
+ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data);
+ }
+ adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_5_unified_ring_get_rptr - get unified read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified read pointer
+ */
+static uint64_t vcn_v4_0_5_unified_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR);
+}
+
+/**
+ * vcn_v4_0_5_unified_ring_get_wptr - get unified write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified write pointer
+ */
+static uint64_t vcn_v4_0_5_unified_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR);
+}
+
+/**
+ * vcn_v4_0_5_unified_ring_set_wptr - set enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the enc write pointer to the hardware
+ */
+static void vcn_v4_0_5_unified_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+static int vcn_v4_0_5_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ r = vcn_v4_0_5_stop(vinst);
+ if (r)
+ return r;
+ r = vcn_v4_0_5_start(vinst);
+ if (r)
+ return r;
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_ENC,
+ .align_mask = 0x3f,
+ .nop = VCN_ENC_CMD_NO_OP,
+ .get_rptr = vcn_v4_0_5_unified_ring_get_rptr,
+ .get_wptr = vcn_v4_0_5_unified_ring_get_wptr,
+ .set_wptr = vcn_v4_0_5_unified_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
+ 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
+ 1, /* vcn_v2_0_enc_ring_insert_end */
+ .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
+ .emit_ib = vcn_v2_0_enc_ring_emit_ib,
+ .emit_fence = vcn_v2_0_enc_ring_emit_fence,
+ .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
+ .test_ring = amdgpu_vcn_enc_ring_test_ring,
+ .test_ib = amdgpu_vcn_unified_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .insert_end = vcn_v2_0_enc_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
+ .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = vcn_v4_0_5_ring_reset,
+};
+
+/**
+ * vcn_v4_0_5_set_unified_ring_funcs - set unified ring functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set unified ring functions
+ */
+static void vcn_v4_0_5_set_unified_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5))
+ vcn_v4_0_5_unified_ring_vm_funcs.secure_submission_supported = true;
+
+ adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v4_0_5_unified_ring_vm_funcs;
+ adev->vcn.inst[i].ring_enc[0].me = i;
+ }
+}
+
+/**
+ * vcn_v4_0_5_is_idle - check VCN block is idle
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block structure
+ *
+ * Check whether VCN block is idle
+ */
+static bool vcn_v4_0_5_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, ret = 1;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ret &= (RREG32_SOC15(VCN, i, regUVD_STATUS) == UVD_STATUS__IDLE);
+ }
+
+ return ret;
+}
+
+/**
+ * vcn_v4_0_5_wait_for_idle - wait for VCN block idle
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Wait for VCN block idle
+ */
+static int vcn_v4_0_5_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, ret = 0;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ret = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE,
+ UVD_STATUS__IDLE);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+/**
+ * vcn_v4_0_5_set_clockgating_state - set VCN block clockgating state
+ *
+ * @ip_block: amdgpu_ip_block pointer
+ * @state: clock gating state
+ *
+ * Set VCN block clockgating state
+ */
+static int vcn_v4_0_5_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ if (enable) {
+ if (RREG32_SOC15(VCN, i, regUVD_STATUS) != UVD_STATUS__IDLE)
+ return -EBUSY;
+ vcn_v4_0_5_enable_clock_gating(vinst);
+ } else {
+ vcn_v4_0_5_disable_clock_gating(vinst);
+ }
+ }
+
+ return 0;
+}
+
+static int vcn_v4_0_5_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
+{
+ int ret = 0;
+
+ if (state == vinst->cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = vcn_v4_0_5_stop(vinst);
+ else
+ ret = vcn_v4_0_5_start(vinst);
+
+ if (!ret)
+ vinst->cur_state = state;
+
+ return ret;
+}
+
+/**
+ * vcn_v4_0_5_process_interrupt - process VCN block interrupt
+ *
+ * @adev: amdgpu_device pointer
+ * @source: interrupt sources
+ * @entry: interrupt entry from clients and sources
+ *
+ * Process VCN block interrupt
+ */
+static int vcn_v4_0_5_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t ip_instance;
+
+ switch (entry->client_id) {
+ case SOC15_IH_CLIENTID_VCN:
+ ip_instance = 0;
+ break;
+ case SOC15_IH_CLIENTID_VCN1:
+ ip_instance = 1;
+ break;
+ default:
+ DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
+ return 0;
+ }
+
+ DRM_DEBUG("IH: VCN TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
+ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
+ break;
+ case VCN_4_0__SRCID_UVD_POISON:
+ amdgpu_vcn_process_poison_irq(adev, source, entry);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs vcn_v4_0_5_irq_funcs = {
+ .process = vcn_v4_0_5_process_interrupt,
+};
+
+/**
+ * vcn_v4_0_5_set_irq_funcs - set VCN block interrupt irq functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set VCN block interrupt irq functions
+ */
+static void vcn_v4_0_5_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ adev->vcn.inst[i].irq.num_types = adev->vcn.inst[i].num_enc_rings + 1;
+ adev->vcn.inst[i].irq.funcs = &vcn_v4_0_5_irq_funcs;
+ }
+}
+
+static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = {
+ .name = "vcn_v4_0_5",
+ .early_init = vcn_v4_0_5_early_init,
+ .sw_init = vcn_v4_0_5_sw_init,
+ .sw_fini = vcn_v4_0_5_sw_fini,
+ .hw_init = vcn_v4_0_5_hw_init,
+ .hw_fini = vcn_v4_0_5_hw_fini,
+ .suspend = vcn_v4_0_5_suspend,
+ .resume = vcn_v4_0_5_resume,
+ .is_idle = vcn_v4_0_5_is_idle,
+ .wait_for_idle = vcn_v4_0_5_wait_for_idle,
+ .set_clockgating_state = vcn_v4_0_5_set_clockgating_state,
+ .set_powergating_state = vcn_set_powergating_state,
+ .dump_ip_state = amdgpu_vcn_dump_ip_state,
+ .print_ip_state = amdgpu_vcn_print_ip_state,
+};
+
+const struct amdgpu_ip_block_version vcn_v4_0_5_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_VCN,
+ .major = 4,
+ .minor = 0,
+ .rev = 5,
+ .funcs = &vcn_v4_0_5_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.h b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.h
new file mode 100644
index 000000000000..ff9b3d6f6a47
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCN_V4_0_5_H__
+#define __VCN_V4_0_5_H__
+
+enum amdgpu_vcn_v4_0_5_sub_block {
+ AMDGPU_VCN_V4_0_5_VCPU_VCODEC = 0,
+
+ AMDGPU_VCN_V4_0_5_MAX_SUB_BLOCK,
+};
+
+extern const struct amdgpu_ip_block_version vcn_v4_0_5_ip_block;
+
+#endif /* __VCN_V4_0_5_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
new file mode 100644
index 000000000000..0202df5db1e1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
@@ -0,0 +1,1442 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_vcn.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "soc15_hw_ip.h"
+#include "vcn_v2_0.h"
+
+#include "vcn/vcn_5_0_0_offset.h"
+#include "vcn/vcn_5_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h"
+#include "vcn_v5_0_0.h"
+
+#include <drm/drm_drv.h>
+
+static const struct amdgpu_hwip_reg_entry vcn_reg_list_5_0[] = {
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE)
+};
+
+static int amdgpu_ih_clientid_vcns[] = {
+ SOC15_IH_CLIENTID_VCN,
+ SOC15_IH_CLIENTID_VCN1
+};
+
+static void vcn_v5_0_0_set_unified_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev);
+static int vcn_v5_0_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static int vcn_v5_0_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
+static void vcn_v5_0_0_unified_ring_set_wptr(struct amdgpu_ring *ring);
+
+/**
+ * vcn_v5_0_0_early_init - set function pointers and load microcode
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ * Load microcode from filesystem
+ */
+static int vcn_v5_0_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ /* re-use enc ring as unified ring */
+ adev->vcn.inst[i].num_enc_rings = 1;
+
+ vcn_v5_0_0_set_unified_ring_funcs(adev);
+ vcn_v5_0_0_set_irq_funcs(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst[i].set_pg_state = vcn_v5_0_0_set_pg_state;
+
+ r = amdgpu_vcn_early_init(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_0_sw_init - sw init for VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_ring *ring;
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ struct amdgpu_vcn5_fw_shared *fw_shared;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ r = amdgpu_vcn_sw_init(adev, i);
+ if (r)
+ return r;
+
+ amdgpu_vcn_setup_ucode(adev, i);
+
+ r = amdgpu_vcn_resume(adev, i);
+ if (r)
+ return r;
+
+ atomic_set(&adev->vcn.inst[i].sched_score, 0);
+
+ /* VCN UNIFIED TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
+ VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq);
+ if (r)
+ return r;
+
+ /* VCN POISON TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
+ VCN_5_0__SRCID_UVD_POISON, &adev->vcn.inst[i].irq);
+ if (r)
+ return r;
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i;
+
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+ sprintf(ring->name, "vcn_unified_%d", i);
+
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
+ AMDGPU_RING_PRIO_0, &adev->vcn.inst[i].sched_score);
+ if (r)
+ return r;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
+ fw_shared->sq.is_enabled = 1;
+
+ if (amdgpu_vcnfw_log)
+ amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.inst[i].pause_dpg_mode = vcn_v5_0_0_pause_dpg_mode;
+ }
+
+ adev->vcn.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+ if (!amdgpu_sriov_vf(adev))
+ adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+
+ r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_5_0, ARRAY_SIZE(vcn_reg_list_5_0));
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_0_sw_fini - sw fini for VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * VCN suspend and free up sw allocation
+ */
+static int vcn_v5_0_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r, idx;
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ struct amdgpu_vcn5_fw_shared *fw_shared;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = 0;
+ fw_shared->sq.is_enabled = 0;
+ }
+
+ drm_dev_exit(idx);
+ }
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(adev, i);
+ if (r)
+ return r;
+ }
+
+ amdgpu_vcn_sysfs_reset_mask_fini(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ amdgpu_vcn_sw_fini(adev, i);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_0_hw_init - start and test VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int vcn_v5_0_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_0_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the VCN block, mark ring as not ready any more
+ */
+static int vcn_v5_0_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ cancel_delayed_work_sync(&vinst->idle_work);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
+ (vinst->cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, i, regUVD_STATUS))) {
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
+ }
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_0_suspend - suspend VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend VCN block
+ */
+static int vcn_v5_0_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
+
+ r = vcn_v5_0_0_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(ip_block->adev, i);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+/**
+ * vcn_v5_0_0_resume - resume VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init VCN block
+ */
+static int vcn_v5_0_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_resume(ip_block->adev, i);
+ if (r)
+ return r;
+ }
+
+ r = vcn_v5_0_0_hw_init(ip_block);
+
+ return r;
+}
+
+/**
+ * vcn_v5_0_0_mc_resume - memory controller programming
+ *
+ * @vinst: VCN instance
+ *
+ * Let the VCN memory controller know it's offsets
+ */
+static void vcn_v5_0_0_mc_resume(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t offset, size;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi));
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET0, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr));
+ offset = size;
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE0, size);
+
+ /* cache window 1: stack */
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET1, 0);
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
+
+ /* cache window 2: context */
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET2, 0);
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
+
+ /* non-cache window */
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_OFFSET0, 0);
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_SIZE0,
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)));
+}
+
+/**
+ * vcn_v5_0_0_mc_resume_dpg_mode - memory controller programming for dpg mode
+ *
+ * @vinst: VCN instance
+ * @indirect: indirectly write sram
+ *
+ * Let the VCN memory controller know it's offsets with dpg mode
+ */
+static void vcn_v5_0_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t offset, size;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (!indirect) {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ } else {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ }
+ offset = 0;
+ } else {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ offset = size;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
+ }
+
+ if (!indirect)
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
+ else
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
+
+ /* cache window 1: stack */
+ if (!indirect) {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ } else {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ }
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
+
+ /* cache window 2: context */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
+
+ /* non-cache window */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)), 0, indirect);
+
+ /* VCN global tiling registers */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_GFX10_ADDR_CONFIG),
+ adev->gfx.config.gb_addr_config, 0, indirect);
+
+ return;
+}
+
+/**
+ * vcn_v5_0_0_disable_static_power_gating - disable VCN static power gating
+ *
+ * @vinst: VCN instance
+ *
+ * Disable static power gating for VCN block
+ */
+static void vcn_v5_0_0_disable_static_power_gating(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t data = 0;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ data = 1 << UVD_IPX_DLDO_CONFIG__ONO2_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0,
+ UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS_MASK);
+
+ data = 2 << UVD_IPX_DLDO_CONFIG__ONO3_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS_MASK);
+
+ data = 2 << UVD_IPX_DLDO_CONFIG__ONO4_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS_MASK);
+
+ data = 2 << UVD_IPX_DLDO_CONFIG__ONO5_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS_MASK);
+ } else {
+ data = 1 << UVD_IPX_DLDO_CONFIG__ONO2_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0,
+ UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS_MASK);
+
+ data = 1 << UVD_IPX_DLDO_CONFIG__ONO3_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0,
+ UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS_MASK);
+
+ data = 1 << UVD_IPX_DLDO_CONFIG__ONO4_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0,
+ UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS_MASK);
+
+ data = 1 << UVD_IPX_DLDO_CONFIG__ONO5_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0,
+ UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS_MASK);
+ }
+
+ data = RREG32_SOC15(VCN, inst, regUVD_POWER_STATUS);
+ data &= ~0x103;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN)
+ data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON |
+ UVD_POWER_STATUS__UVD_PG_EN_MASK;
+
+ WREG32_SOC15(VCN, inst, regUVD_POWER_STATUS, data);
+ return;
+}
+
+/**
+ * vcn_v5_0_0_enable_static_power_gating - enable VCN static power gating
+ *
+ * @vinst: VCN instance
+ *
+ * Enable static power gating for VCN block
+ */
+static void vcn_v5_0_0_enable_static_power_gating(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t data;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ /* Before power off, this indicator has to be turned on */
+ data = RREG32_SOC15(VCN, inst, regUVD_POWER_STATUS);
+ data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK;
+ data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
+ WREG32_SOC15(VCN, inst, regUVD_POWER_STATUS, data);
+
+ data = 2 << UVD_IPX_DLDO_CONFIG__ONO5_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS_MASK);
+
+ data = 2 << UVD_IPX_DLDO_CONFIG__ONO4_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS_MASK);
+
+ data = 2 << UVD_IPX_DLDO_CONFIG__ONO3_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS_MASK);
+
+ data = 2 << UVD_IPX_DLDO_CONFIG__ONO2_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS_MASK);
+ }
+ return;
+}
+
+/**
+ * vcn_v5_0_0_disable_clock_gating - disable VCN clock gating
+ *
+ * @vinst: VCN instance
+ *
+ * Disable clock gating for VCN block
+ */
+static void vcn_v5_0_0_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
+{
+ return;
+}
+
+#if 0
+/**
+ * vcn_v5_0_0_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode
+ *
+ * @vinst: VCN instance
+ * @sram_sel: sram select
+ * @indirect: indirectly write sram
+ *
+ * Disable clock gating for VCN block with dpg mode
+ */
+static void vcn_v5_0_0_disable_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ uint8_t sram_sel,
+ uint8_t indirect)
+{
+ return;
+}
+#endif
+
+/**
+ * vcn_v5_0_0_enable_clock_gating - enable VCN clock gating
+ *
+ * @vinst: VCN instance
+ *
+ * Enable clock gating for VCN block
+ */
+static void vcn_v5_0_0_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
+{
+ return;
+}
+
+/**
+ * vcn_v5_0_0_start_dpg_mode - VCN start with dpg mode
+ *
+ * @vinst: VCN instance
+ * @indirect: indirectly write sram
+ *
+ * Start VCN block with dpg mode
+ */
+static int vcn_v5_0_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ struct amdgpu_vcn5_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ struct amdgpu_ring *ring;
+ uint32_t tmp;
+ int ret;
+
+ /* disable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* enable dynamic power gating mode */
+ tmp = RREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS);
+ tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
+ tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
+ WREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS, tmp);
+
+ if (indirect)
+ adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
+
+ /* enable VCPU clock */
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK | UVD_VCPU_CNTL__BLK_RST_MASK;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* disable master interrupt */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MASTINT_EN), 0, 0, indirect);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__CRC_RESET_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ 0x00100000L);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_CTRL), tmp, 0, indirect);
+
+ vcn_v5_0_0_mc_resume_dpg_mode(vinst, indirect);
+
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* enable LMI MC and UMC channels */
+ tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_CTRL2), tmp, 0, indirect);
+
+ /* enable master interrupt */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
+
+ if (indirect) {
+ ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, 0);
+ if (ret) {
+ dev_err(adev->dev, "%s: vcn sram load failed %d\n", __func__, ret);
+ return ret;
+ }
+ }
+
+ ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4);
+
+ tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, inst_idx, regUVD_STATUS);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_0_start - VCN start
+ *
+ * @vinst: VCN instance
+ *
+ * Start VCN block
+ */
+static int vcn_v5_0_0_start(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ struct amdgpu_vcn5_fw_shared *fw_shared;
+ struct amdgpu_ring *ring;
+ uint32_t tmp;
+ int j, k, r;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_vcn(adev, true, i);
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return vcn_v5_0_0_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram);
+
+ /* disable VCN power gating */
+ vcn_v5_0_0_disable_static_power_gating(vinst);
+
+ /* set VCN status busy */
+ tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, i, regUVD_STATUS, tmp);
+
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL);
+ WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ vcn_v5_0_0_mc_resume(vinst);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (j = 0; j < 10; ++j) {
+ uint32_t status;
+
+ for (k = 0; k < 100; ++k) {
+ status = RREG32_SOC15(VCN, i, regUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(10);
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ }
+
+ if (amdgpu_emu_mode == 1) {
+ r = -1;
+ if (status & 2) {
+ r = 0;
+ break;
+ }
+ } else {
+ r = 0;
+ if (status & 2)
+ break;
+
+ dev_err(adev->dev,
+ "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ mdelay(10);
+ r = -1;
+ }
+ }
+
+ if (r) {
+ dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i);
+ return r;
+ }
+
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4);
+
+ tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+ WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, i, regUVD_STATUS);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_0_stop_dpg_mode - VCN stop with dpg mode
+ *
+ * @vinst: VCN instance
+ *
+ * Stop VCN block with dpg mode
+ */
+static void vcn_v5_0_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE};
+ uint32_t tmp;
+
+ vcn_v5_0_0_pause_dpg_mode(vinst, &state);
+
+ /* Wait for power status to be 1 */
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* wait for read ptr to be equal to write ptr */
+ tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR, tmp, 0xFFFFFFFF);
+
+ /* disable dynamic power gating mode */
+ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, inst_idx, regUVD_STATUS);
+
+ return;
+}
+
+/**
+ * vcn_v5_0_0_stop - VCN stop
+ *
+ * @vinst: VCN instance
+ *
+ * Stop VCN block
+ */
+static int vcn_v5_0_0_stop(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ struct amdgpu_vcn5_fw_shared *fw_shared;
+ uint32_t tmp;
+ int r = 0;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ vcn_v5_0_0_stop_dpg_mode(vinst);
+ r = 0;
+ goto done;
+ }
+
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7);
+ if (r)
+ goto done;
+
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
+
+ /* disable LMI UMC channel */
+ tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
+
+ /* block VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+
+ /* apply soft reset */
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+
+ /* clear status */
+ WREG32_SOC15(VCN, i, regUVD_STATUS, 0);
+
+ /* enable VCN power gating */
+ vcn_v5_0_0_enable_static_power_gating(vinst);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, i, regUVD_STATUS);
+
+done:
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_vcn(adev, false, i);
+
+ return r;
+}
+
+/**
+ * vcn_v5_0_0_pause_dpg_mode - VCN pause with dpg mode
+ *
+ * @vinst: VCN instance
+ * @new_state: pause state
+ *
+ * Pause dpg mode for VCN block
+ */
+static int vcn_v5_0_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t reg_data = 0;
+ int ret_code;
+
+ /* pause/unpause if state is changed */
+ if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
+ DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d",
+ adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based);
+ reg_data = RREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE) &
+ (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
+ ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 0x1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ if (!ret_code) {
+ /* pause DPG */
+ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data);
+
+ /* wait for ACK */
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_DPG_PAUSE,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+ }
+ } else {
+ /* unpause dpg, no need to wait */
+ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data);
+ }
+ adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_0_unified_ring_get_rptr - get unified read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified read pointer
+ */
+static uint64_t vcn_v5_0_0_unified_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR);
+}
+
+/**
+ * vcn_v5_0_0_unified_ring_get_wptr - get unified write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified write pointer
+ */
+static uint64_t vcn_v5_0_0_unified_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR);
+}
+
+/**
+ * vcn_v5_0_0_unified_ring_set_wptr - set enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the enc write pointer to the hardware
+ */
+static void vcn_v5_0_0_unified_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+static int vcn_v5_0_0_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ r = vcn_v5_0_0_stop(vinst);
+ if (r)
+ return r;
+ r = vcn_v5_0_0_start(vinst);
+ if (r)
+ return r;
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static const struct amdgpu_ring_funcs vcn_v5_0_0_unified_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_ENC,
+ .align_mask = 0x3f,
+ .nop = VCN_ENC_CMD_NO_OP,
+ .get_rptr = vcn_v5_0_0_unified_ring_get_rptr,
+ .get_wptr = vcn_v5_0_0_unified_ring_get_wptr,
+ .set_wptr = vcn_v5_0_0_unified_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
+ 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
+ 1, /* vcn_v2_0_enc_ring_insert_end */
+ .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
+ .emit_ib = vcn_v2_0_enc_ring_emit_ib,
+ .emit_fence = vcn_v2_0_enc_ring_emit_fence,
+ .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
+ .test_ring = amdgpu_vcn_enc_ring_test_ring,
+ .test_ib = amdgpu_vcn_unified_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .insert_end = vcn_v2_0_enc_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
+ .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = vcn_v5_0_0_ring_reset,
+};
+
+/**
+ * vcn_v5_0_0_set_unified_ring_funcs - set unified ring functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set unified ring functions
+ */
+static void vcn_v5_0_0_set_unified_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v5_0_0_unified_ring_vm_funcs;
+ adev->vcn.inst[i].ring_enc[0].me = i;
+ }
+}
+
+/**
+ * vcn_v5_0_0_is_idle - check VCN block is idle
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block structure
+ *
+ * Check whether VCN block is idle
+ */
+static bool vcn_v5_0_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, ret = 1;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ret &= (RREG32_SOC15(VCN, i, regUVD_STATUS) == UVD_STATUS__IDLE);
+ }
+
+ return ret;
+}
+
+/**
+ * vcn_v5_0_0_wait_for_idle - wait for VCN block idle
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Wait for VCN block idle
+ */
+static int vcn_v5_0_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, ret = 0;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ret = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE,
+ UVD_STATUS__IDLE);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+/**
+ * vcn_v5_0_0_set_clockgating_state - set VCN block clockgating state
+ *
+ * @ip_block: amdgpu_ip_block pointer
+ * @state: clock gating state
+ *
+ * Set VCN block clockgating state
+ */
+static int vcn_v5_0_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ if (enable) {
+ if (RREG32_SOC15(VCN, i, regUVD_STATUS) != UVD_STATUS__IDLE)
+ return -EBUSY;
+ vcn_v5_0_0_enable_clock_gating(vinst);
+ } else {
+ vcn_v5_0_0_disable_clock_gating(vinst);
+ }
+ }
+
+ return 0;
+}
+
+static int vcn_v5_0_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
+{
+ int ret = 0;
+
+ if (state == vinst->cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = vcn_v5_0_0_stop(vinst);
+ else
+ ret = vcn_v5_0_0_start(vinst);
+
+ if (!ret)
+ vinst->cur_state = state;
+
+ return ret;
+}
+
+/**
+ * vcn_v5_0_0_process_interrupt - process VCN block interrupt
+ *
+ * @adev: amdgpu_device pointer
+ * @source: interrupt sources
+ * @entry: interrupt entry from clients and sources
+ *
+ * Process VCN block interrupt
+ */
+static int vcn_v5_0_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t ip_instance;
+
+ switch (entry->client_id) {
+ case SOC15_IH_CLIENTID_VCN:
+ ip_instance = 0;
+ break;
+ case SOC15_IH_CLIENTID_VCN1:
+ ip_instance = 1;
+ break;
+ default:
+ DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
+ return 0;
+ }
+
+ DRM_DEBUG("IH: VCN TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
+ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
+ break;
+ case VCN_5_0__SRCID_UVD_POISON:
+ amdgpu_vcn_process_poison_irq(adev, source, entry);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs vcn_v5_0_0_irq_funcs = {
+ .process = vcn_v5_0_0_process_interrupt,
+};
+
+/**
+ * vcn_v5_0_0_set_irq_funcs - set VCN block interrupt irq functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set VCN block interrupt irq functions
+ */
+static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ adev->vcn.inst[i].irq.num_types = adev->vcn.inst[i].num_enc_rings + 1;
+ adev->vcn.inst[i].irq.funcs = &vcn_v5_0_0_irq_funcs;
+ }
+}
+
+static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = {
+ .name = "vcn_v5_0_0",
+ .early_init = vcn_v5_0_0_early_init,
+ .sw_init = vcn_v5_0_0_sw_init,
+ .sw_fini = vcn_v5_0_0_sw_fini,
+ .hw_init = vcn_v5_0_0_hw_init,
+ .hw_fini = vcn_v5_0_0_hw_fini,
+ .suspend = vcn_v5_0_0_suspend,
+ .resume = vcn_v5_0_0_resume,
+ .is_idle = vcn_v5_0_0_is_idle,
+ .wait_for_idle = vcn_v5_0_0_wait_for_idle,
+ .set_clockgating_state = vcn_v5_0_0_set_clockgating_state,
+ .set_powergating_state = vcn_set_powergating_state,
+ .dump_ip_state = amdgpu_vcn_dump_ip_state,
+ .print_ip_state = amdgpu_vcn_print_ip_state,
+};
+
+const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_VCN,
+ .major = 5,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &vcn_v5_0_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h
new file mode 100644
index 000000000000..51bbccd4360f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCN_V5_0_0_H__
+#define __VCN_V5_0_0_H__
+
+#define VCN_VID_SOC_ADDRESS 0x1FC00
+#define VCN_AON_SOC_ADDRESS 0x1F800
+#define VCN1_VID_SOC_ADDRESS 0x48300
+#define VCN1_AON_SOC_ADDRESS 0x48000
+
+#define VCN_VID_IP_ADDRESS 0x0
+#define VCN_AON_IP_ADDRESS 0x30000
+
+extern const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block;
+
+#endif /* __VCN_V5_0_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
new file mode 100644
index 000000000000..8bd457dea4cf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
@@ -0,0 +1,1729 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_vcn.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "soc15_hw_ip.h"
+#include "vcn_v2_0.h"
+#include "vcn_v4_0_3.h"
+#include "mmsch_v5_0.h"
+
+#include "vcn/vcn_5_0_0_offset.h"
+#include "vcn/vcn_5_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h"
+#include "vcn_v5_0_0.h"
+#include "vcn_v5_0_1.h"
+
+#include <drm/drm_drv.h>
+
+static const struct amdgpu_hwip_reg_entry vcn_reg_list_5_0_1[] = {
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE)
+};
+
+static int vcn_v5_0_1_start_sriov(struct amdgpu_device *adev);
+static void vcn_v5_0_1_set_unified_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev);
+static int vcn_v5_0_1_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static void vcn_v5_0_1_unified_ring_set_wptr(struct amdgpu_ring *ring);
+static void vcn_v5_0_1_set_ras_funcs(struct amdgpu_device *adev);
+/**
+ * vcn_v5_0_1_early_init - set function pointers and load microcode
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ * Load microcode from filesystem
+ */
+static int vcn_v5_0_1_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ /* re-use enc ring as unified ring */
+ adev->vcn.inst[i].num_enc_rings = 1;
+
+ vcn_v5_0_1_set_unified_ring_funcs(adev);
+ vcn_v5_0_1_set_irq_funcs(adev);
+ vcn_v5_0_1_set_ras_funcs(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst[i].set_pg_state = vcn_v5_0_1_set_pg_state;
+
+ r = amdgpu_vcn_early_init(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int vcn_v5_0_1_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->vcn.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(13, 0, 12):
+ if ((adev->psp.sos.fw_version >= 0x00450025) &&
+ amdgpu_dpm_reset_vcn_is_supported(adev) &&
+ !amdgpu_sriov_vf(adev))
+ adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void vcn_v5_0_1_fw_shared_init(struct amdgpu_device *adev, int inst_idx)
+{
+ struct amdgpu_vcn5_fw_shared *fw_shared;
+
+ fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+
+ if (fw_shared->sq.is_enabled)
+ return;
+ fw_shared->present_flag_0 =
+ cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
+ fw_shared->sq.is_enabled = 1;
+
+ if (amdgpu_vcnfw_log)
+ amdgpu_vcn_fwlog_init(&adev->vcn.inst[inst_idx]);
+}
+
+/**
+ * vcn_v5_0_1_sw_init - sw init for VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int vcn_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, r, vcn_inst;
+
+ /* VCN UNIFIED TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst->irq);
+ if (r)
+ return r;
+
+ /* VCN POISON TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_5_0__SRCID_UVD_POISON, &adev->vcn.inst->ras_poison_irq);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ vcn_inst = GET_INST(VCN, i);
+
+ r = amdgpu_vcn_sw_init(adev, i);
+ if (r)
+ return r;
+
+ amdgpu_vcn_setup_ucode(adev, i);
+
+ r = amdgpu_vcn_resume(adev, i);
+ if (r)
+ return r;
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->use_doorbell = true;
+ if (!amdgpu_sriov_vf(adev))
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 11 * vcn_inst;
+ else
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 32 * vcn_inst;
+
+ ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id);
+ sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id);
+
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, &adev->vcn.inst[i].sched_score);
+ if (r)
+ return r;
+
+ vcn_v5_0_1_fw_shared_init(adev, i);
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = amdgpu_virt_alloc_mm_table(adev);
+ if (r)
+ return r;
+ }
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) {
+ r = amdgpu_vcn_ras_sw_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to initialize vcn ras block!\n");
+ return r;
+ }
+ }
+
+ r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_5_0_1, ARRAY_SIZE(vcn_reg_list_5_0_1));
+ if (r)
+ return r;
+
+ return amdgpu_vcn_sysfs_reset_mask_init(adev);
+}
+
+/**
+ * vcn_v5_0_1_sw_fini - sw fini for VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * VCN suspend and free up sw allocation
+ */
+static int vcn_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r, idx;
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ struct amdgpu_vcn5_fw_shared *fw_shared;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = 0;
+ fw_shared->sq.is_enabled = 0;
+ }
+
+ drm_dev_exit(idx);
+ }
+
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_free_mm_table(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(adev, i);
+ if (r)
+ return r;
+ }
+
+ amdgpu_vcn_sysfs_reset_mask_fini(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ amdgpu_vcn_sw_fini(adev, i);
+
+ return 0;
+}
+
+static int vcn_v5_0_1_hw_init_inst(struct amdgpu_device *adev, int i)
+{
+ struct amdgpu_ring *ring;
+ int vcn_inst;
+
+ vcn_inst = GET_INST(VCN, i);
+ ring = &adev->vcn.inst[i].ring_enc[0];
+
+ if (ring->use_doorbell)
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 11 * vcn_inst),
+ adev->vcn.inst[i].aid_id);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_hw_init - start and test VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int vcn_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, r;
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = vcn_v5_0_1_start_sriov(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->wptr = 0;
+ ring->wptr_old = 0;
+ vcn_v5_0_1_unified_ring_set_wptr(ring);
+ ring->sched.ready = true;
+ }
+ } else {
+ if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100)
+ adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ vcn_v5_0_1_hw_init_inst(adev, i);
+
+ /* Re-init fw_shared, if required */
+ vcn_v5_0_1_fw_shared_init(adev, i);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the VCN block, mark ring as not ready any more
+ */
+static int vcn_v5_0_1_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work);
+ if (vinst->cur_state != AMD_PG_STATE_GATE)
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
+ }
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN) && !amdgpu_sriov_vf(adev))
+ amdgpu_irq_put(adev, &adev->vcn.inst->ras_poison_irq, 0);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_suspend - suspend VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend VCN block
+ */
+static int vcn_v5_0_1_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
+
+ r = vcn_v5_0_1_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(ip_block->adev, i);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+/**
+ * vcn_v5_0_1_resume - resume VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init VCN block
+ */
+static int vcn_v5_0_1_resume(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ if (amdgpu_in_reset(adev))
+ vinst->cur_state = AMD_PG_STATE_GATE;
+
+ r = amdgpu_vcn_resume(ip_block->adev, i);
+ if (r)
+ return r;
+ }
+
+ r = vcn_v5_0_1_hw_init(ip_block);
+
+ return r;
+}
+
+/**
+ * vcn_v5_0_1_mc_resume - memory controller programming
+ *
+ * @vinst: VCN instance
+ *
+ * Let the VCN memory controller know it's offsets
+ */
+static void vcn_v5_0_1_mc_resume(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t offset, size, vcn_inst;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ vcn_inst = GET_INST(VCN, inst);
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr));
+ offset = size;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0,
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE0, size);
+
+ /* cache window 1: stack */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET1, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
+
+ /* cache window 2: context */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET2, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
+
+ /* non-cache window */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_OFFSET0, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_SIZE0,
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)));
+}
+
+/**
+ * vcn_v5_0_1_mc_resume_dpg_mode - memory controller programming for dpg mode
+ *
+ * @vinst: VCN instance
+ * @indirect: indirectly write sram
+ *
+ * Let the VCN memory controller know it's offsets with dpg mode
+ */
+static void vcn_v5_0_1_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t offset, size;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (!indirect) {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN +
+ inst_idx].tmr_mc_addr_lo), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN +
+ inst_idx].tmr_mc_addr_hi), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ } else {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ }
+ offset = 0;
+ } else {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ offset = size;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
+ }
+
+ if (!indirect)
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
+ else
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
+
+ /* cache window 1: stack */
+ if (!indirect) {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ } else {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ }
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
+
+ /* cache window 2: context */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
+
+ /* non-cache window */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)), 0, indirect);
+
+ /* VCN global tiling registers */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+}
+
+/**
+ * vcn_v5_0_1_disable_clock_gating - disable VCN clock gating
+ *
+ * @vinst: VCN instance
+ *
+ * Disable clock gating for VCN block
+ */
+static void vcn_v5_0_1_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
+{
+}
+
+/**
+ * vcn_v5_0_1_enable_clock_gating - enable VCN clock gating
+ *
+ * @vinst: VCN instance
+ *
+ * Enable clock gating for VCN block
+ */
+static void vcn_v5_0_1_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
+{
+}
+
+/**
+ * vcn_v5_0_1_pause_dpg_mode - VCN pause with dpg mode
+ *
+ * @vinst: VCN instance
+ * @new_state: pause state
+ *
+ * Pause dpg mode for VCN block
+ */
+static int vcn_v5_0_1_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ uint32_t reg_data = 0;
+ int vcn_inst;
+
+ vcn_inst = GET_INST(VCN, vinst->inst);
+
+ /* pause/unpause if state is changed */
+ if (vinst->pause_state.fw_based != new_state->fw_based) {
+ DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d %s\n",
+ vinst->pause_state.fw_based, new_state->fw_based,
+ new_state->fw_based ? "VCN_DPG_STATE__PAUSE" : "VCN_DPG_STATE__UNPAUSE");
+ reg_data = RREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE) &
+ (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
+ /* pause DPG */
+ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data);
+
+ /* wait for ACK */
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_DPG_PAUSE,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+ } else {
+ /* unpause DPG, no need to wait */
+ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data);
+ }
+ vinst->pause_state.fw_based = new_state->fw_based;
+ }
+
+ return 0;
+}
+
+
+/**
+ * vcn_v5_0_1_start_dpg_mode - VCN start with dpg mode
+ *
+ * @vinst: VCN instance
+ * @indirect: indirectly write sram
+ *
+ * Start VCN block with dpg mode
+ */
+static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ struct amdgpu_vcn5_fw_shared *fw_shared =
+ adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ struct amdgpu_ring *ring;
+ struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__PAUSE};
+ int vcn_inst, ret;
+ uint32_t tmp;
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+
+ /* disable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 1,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* enable dynamic power gating mode */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS);
+ tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS, tmp);
+
+ if (indirect) {
+ adev->vcn.inst[inst_idx].dpg_sram_curr_addr =
+ (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
+ /* Use dummy register 0xDEADBEEF passing AID selection to PSP FW */
+ WREG32_SOC24_DPG_MODE(inst_idx, 0xDEADBEEF,
+ adev->vcn.inst[inst_idx].aid_id, 0, true);
+ }
+
+ /* enable VCPU clock */
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK | UVD_VCPU_CNTL__BLK_RST_MASK;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* disable master interrupt */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MASTINT_EN), 0, 0, indirect);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__CRC_RESET_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ 0x00100000L);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_CTRL), tmp, 0, indirect);
+
+ vcn_v5_0_1_mc_resume_dpg_mode(vinst, indirect);
+
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* enable LMI MC and UMC channels */
+ tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_CTRL2), tmp, 0, indirect);
+
+ /* enable master interrupt */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
+
+ if (indirect) {
+ ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM);
+ if (ret) {
+ dev_err(adev->dev, "vcn sram load failed %d\n", ret);
+ return ret;
+ }
+ }
+
+ /* resetting ring, fw should not check RB ring */
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+
+ /* Pause dpg */
+ vcn_v5_0_1_pause_dpg_mode(vinst, &state);
+
+ ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, ring->ring_size / sizeof(uint32_t));
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ /* resetting done, fw can check RB ring */
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+ /* Read DB_CTRL to flush the write DB_CTRL command. */
+ RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL);
+
+ return 0;
+}
+
+static int vcn_v5_0_1_start_sriov(struct amdgpu_device *adev)
+{
+ int i, vcn_inst;
+ struct amdgpu_ring *ring_enc;
+ uint64_t cache_addr;
+ uint64_t rb_enc_addr;
+ uint64_t ctx_addr;
+ uint32_t param, resp, expected;
+ uint32_t offset, cache_size;
+ uint32_t tmp, timeout;
+
+ struct amdgpu_mm_table *table = &adev->virt.mm_table;
+ uint32_t *table_loc;
+ uint32_t table_size;
+ uint32_t size, size_dw;
+ uint32_t init_status;
+ uint32_t enabled_vcn;
+
+ struct mmsch_v5_0_cmd_direct_write
+ direct_wt = { {0} };
+ struct mmsch_v5_0_cmd_direct_read_modify_write
+ direct_rd_mod_wt = { {0} };
+ struct mmsch_v5_0_cmd_end end = { {0} };
+ struct mmsch_v5_0_init_header header;
+
+ struct amdgpu_vcn5_fw_shared *fw_shared;
+ struct amdgpu_fw_shared_rb_setup *rb_setup;
+
+ direct_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_WRITE;
+ direct_rd_mod_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
+ end.cmd_header.command_type = MMSCH_COMMAND__END;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ vcn_inst = GET_INST(VCN, i);
+
+ vcn_v5_0_1_fw_shared_init(adev, vcn_inst);
+
+ memset(&header, 0, sizeof(struct mmsch_v5_0_init_header));
+ header.version = MMSCH_VERSION;
+ header.total_size = sizeof(struct mmsch_v5_0_init_header) >> 2;
+
+ table_loc = (uint32_t *)table->cpu_addr;
+ table_loc += header.total_size;
+
+ table_size = 0;
+
+ MMSCH_V5_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS),
+ ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
+
+ cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
+
+ offset = 0;
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET0), 0);
+ } else {
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[i].gpu_addr));
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[i].gpu_addr));
+ offset = cache_size;
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_SIZE0),
+ cache_size);
+
+ cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset;
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(cache_addr));
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET1), 0);
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE);
+
+ cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE;
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(cache_addr));
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET2), 0);
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE);
+
+ fw_shared = adev->vcn.inst[vcn_inst].fw_shared.cpu_addr;
+ rb_setup = &fw_shared->rb_setup;
+
+ ring_enc = &adev->vcn.inst[vcn_inst].ring_enc[0];
+ ring_enc->wptr = 0;
+ rb_enc_addr = ring_enc->gpu_addr;
+
+ rb_setup->is_rb_enabled_flags |= RB_ENABLED;
+ rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
+ rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
+ rb_setup->rb_size = ring_enc->ring_size / 4;
+ fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
+ MMSCH_V5_0_INSERT_END();
+
+ header.vcn0.init_status = 0;
+ header.vcn0.table_offset = header.total_size;
+ header.vcn0.table_size = table_size;
+ header.total_size += table_size;
+
+ /* Send init table to mmsch */
+ size = sizeof(struct mmsch_v5_0_init_header);
+ table_loc = (uint32_t *)table->cpu_addr;
+ memcpy((void *)table_loc, &header, size);
+
+ ctx_addr = table->gpu_addr;
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID);
+ tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID, tmp);
+
+ size = header.total_size;
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_SIZE, size);
+
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP, 0);
+
+ param = 0x00000001;
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_HOST, param);
+ tmp = 0;
+ timeout = 1000;
+ resp = 0;
+ expected = MMSCH_VF_MAILBOX_RESP__OK;
+ while (resp != expected) {
+ resp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP);
+ if (resp != 0)
+ break;
+
+ udelay(10);
+ tmp = tmp + 10;
+ if (tmp >= timeout) {
+ DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
+ " waiting for regMMSCH_VF_MAILBOX_RESP "\
+ "(expected=0x%08x, readback=0x%08x)\n",
+ tmp, expected, resp);
+ return -EBUSY;
+ }
+ }
+
+ enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0;
+ init_status = ((struct mmsch_v5_0_init_header *)(table_loc))->vcn0.init_status;
+ if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
+ && init_status != MMSCH_VF_ENGINE_STATUS__PASS) {
+ DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\
+ "status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_start - VCN start
+ *
+ * @vinst: VCN instance
+ *
+ * Start VCN block
+ */
+static int vcn_v5_0_1_start(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ struct amdgpu_vcn5_fw_shared *fw_shared;
+ struct amdgpu_ring *ring;
+ uint32_t tmp;
+ int j, k, r, vcn_inst;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return vcn_v5_0_1_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram);
+
+ vcn_inst = GET_INST(VCN, i);
+
+ /* set VCN status busy */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, tmp);
+
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL, tmp |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ vcn_v5_0_1_mc_resume(vinst);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (j = 0; j < 10; ++j) {
+ uint32_t status;
+
+ for (k = 0; k < 100; ++k) {
+ status = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(100);
+ if (amdgpu_emu_mode == 1)
+ msleep(20);
+ }
+
+ if (amdgpu_emu_mode == 1) {
+ r = -1;
+ if (status & 2) {
+ r = 0;
+ break;
+ }
+ } else {
+ r = 0;
+ if (status & 2)
+ break;
+
+ dev_err(adev->dev,
+ "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i);
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ mdelay(10);
+ r = -1;
+ }
+ }
+
+ if (r) {
+ dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i);
+ return r;
+ }
+
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ /* Read DB_CTRL to flush the write DB_CTRL command. */
+ RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL);
+
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, ring->ring_size / 4);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_stop_dpg_mode - VCN stop with dpg mode
+ *
+ * @vinst: VCN instance
+ *
+ * Stop VCN block with dpg mode
+ */
+static void vcn_v5_0_1_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t tmp;
+ int vcn_inst;
+ struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE};
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+
+ /* Unpause dpg */
+ vcn_v5_0_1_pause_dpg_mode(vinst, &state);
+
+ /* Wait for power status to be 1 */
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* wait for read ptr to be equal to write ptr */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_RB_RPTR, tmp, 0xFFFFFFFF);
+
+ /* disable dynamic power gating mode */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS);
+}
+
+/**
+ * vcn_v5_0_1_stop - VCN stop
+ *
+ * @vinst: VCN instance
+ *
+ * Stop VCN block
+ */
+static int vcn_v5_0_1_stop(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ struct amdgpu_vcn5_fw_shared *fw_shared;
+ uint32_t tmp;
+ int r = 0, vcn_inst;
+
+ vcn_inst = GET_INST(VCN, i);
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ vcn_v5_0_1_stop_dpg_mode(vinst);
+ return 0;
+ }
+
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_STATUS, UVD_STATUS__IDLE, 0x7);
+ if (r)
+ return r;
+
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ return r;
+
+ /* disable LMI UMC channel */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ return r;
+
+ /* block VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+
+ /* apply soft reset */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+
+ /* clear status */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_unified_ring_get_rptr - get unified read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified read pointer
+ */
+static uint64_t vcn_v5_0_1_unified_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_RPTR);
+}
+
+/**
+ * vcn_v5_0_1_unified_ring_get_wptr - get unified write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified write pointer
+ */
+static uint64_t vcn_v5_0_1_unified_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR);
+}
+
+/**
+ * vcn_v5_0_1_unified_ring_set_wptr - set enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the enc write pointer to the hardware
+ */
+static void vcn_v5_0_1_unified_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR,
+ lower_32_bits(ring->wptr));
+ }
+}
+
+static int vcn_v5_0_1_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ int r = 0;
+ int vcn_inst;
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ vcn_inst = GET_INST(VCN, ring->me);
+ r = amdgpu_dpm_reset_vcn(adev, 1 << vcn_inst);
+
+ if (r) {
+ DRM_DEV_ERROR(adev->dev, "VCN reset fail : %d\n", r);
+ return r;
+ }
+
+ vcn_v5_0_1_hw_init_inst(adev, ring->me);
+ vcn_v5_0_1_start_dpg_mode(vinst, vinst->indirect_sram);
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static const struct amdgpu_ring_funcs vcn_v5_0_1_unified_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_ENC,
+ .align_mask = 0x3f,
+ .nop = VCN_ENC_CMD_NO_OP,
+ .get_rptr = vcn_v5_0_1_unified_ring_get_rptr,
+ .get_wptr = vcn_v5_0_1_unified_ring_get_wptr,
+ .set_wptr = vcn_v5_0_1_unified_ring_set_wptr,
+ .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
+ 5 +
+ 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
+ 1, /* vcn_v2_0_enc_ring_insert_end */
+ .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
+ .emit_ib = vcn_v2_0_enc_ring_emit_ib,
+ .emit_fence = vcn_v2_0_enc_ring_emit_fence,
+ .emit_vm_flush = vcn_v4_0_3_enc_ring_emit_vm_flush,
+ .emit_hdp_flush = vcn_v4_0_3_ring_emit_hdp_flush,
+ .test_ring = amdgpu_vcn_enc_ring_test_ring,
+ .test_ib = amdgpu_vcn_unified_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .insert_end = vcn_v2_0_enc_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg,
+ .emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = vcn_v5_0_1_ring_reset,
+};
+
+/**
+ * vcn_v5_0_1_set_unified_ring_funcs - set unified ring functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set unified ring functions
+ */
+static void vcn_v5_0_1_set_unified_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, vcn_inst;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v5_0_1_unified_ring_vm_funcs;
+ adev->vcn.inst[i].ring_enc[0].me = i;
+ vcn_inst = GET_INST(VCN, i);
+ adev->vcn.inst[i].aid_id = vcn_inst / adev->vcn.num_inst_per_aid;
+ }
+}
+
+/**
+ * vcn_v5_0_1_is_idle - check VCN block is idle
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block structure
+ *
+ * Check whether VCN block is idle
+ */
+static bool vcn_v5_0_1_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, ret = 1;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ ret &= (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) == UVD_STATUS__IDLE);
+
+ return ret;
+}
+
+/**
+ * vcn_v5_0_1_wait_for_idle - wait for VCN block idle
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Wait for VCN block idle
+ */
+static int vcn_v5_0_1_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, ret = 0;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ret = SOC15_WAIT_ON_RREG(VCN, GET_INST(VCN, i), regUVD_STATUS, UVD_STATUS__IDLE,
+ UVD_STATUS__IDLE);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+/**
+ * vcn_v5_0_1_set_clockgating_state - set VCN block clockgating state
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ * @state: clock gating state
+ *
+ * Set VCN block clockgating state
+ */
+static int vcn_v5_0_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ if (enable) {
+ if (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) != UVD_STATUS__IDLE)
+ return -EBUSY;
+ vcn_v5_0_1_enable_clock_gating(vinst);
+ } else {
+ vcn_v5_0_1_disable_clock_gating(vinst);
+ }
+ }
+
+ return 0;
+}
+
+static int vcn_v5_0_1_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int ret = 0;
+
+ /* for SRIOV, guest should not control VCN Power-gating
+ * MMSCH FW should control Power-gating and clock-gating
+ * guest should avoid touching CGC and PG
+ */
+ if (amdgpu_sriov_vf(adev)) {
+ vinst->cur_state = AMD_PG_STATE_UNGATE;
+ return 0;
+ }
+
+ if (state == vinst->cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = vcn_v5_0_1_stop(vinst);
+ else
+ ret = vcn_v5_0_1_start(vinst);
+
+ if (!ret)
+ vinst->cur_state = state;
+
+ return ret;
+}
+
+/**
+ * vcn_v5_0_1_process_interrupt - process VCN block interrupt
+ *
+ * @adev: amdgpu_device pointer
+ * @source: interrupt sources
+ * @entry: interrupt entry from clients and sources
+ *
+ * Process VCN block interrupt
+ */
+static int vcn_v5_0_1_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t i, inst;
+
+ i = node_id_to_phys_map[entry->node_id];
+
+ DRM_DEV_DEBUG(adev->dev, "IH: VCN TRAP\n");
+
+ for (inst = 0; inst < adev->vcn.num_vcn_inst; ++inst)
+ if (adev->vcn.inst[inst].aid_id == i)
+ break;
+ if (inst >= adev->vcn.num_vcn_inst) {
+ dev_WARN_ONCE(adev->dev, 1,
+ "Interrupt received for unknown VCN instance %d",
+ entry->node_id);
+ return 0;
+ }
+
+ switch (entry->src_id) {
+ case VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
+ amdgpu_fence_process(&adev->vcn.inst[inst].ring_enc[0]);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static int vcn_v5_0_1_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs vcn_v5_0_1_irq_funcs = {
+ .process = vcn_v5_0_1_process_interrupt,
+};
+
+static const struct amdgpu_irq_src_funcs vcn_v5_0_1_ras_irq_funcs = {
+ .set = vcn_v5_0_1_set_ras_interrupt_state,
+ .process = amdgpu_vcn_process_poison_irq,
+};
+
+
+/**
+ * vcn_v5_0_1_set_irq_funcs - set VCN block interrupt irq functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set VCN block interrupt irq functions
+ */
+static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ adev->vcn.inst->irq.num_types++;
+
+ adev->vcn.inst->irq.funcs = &vcn_v5_0_1_irq_funcs;
+
+ adev->vcn.inst->ras_poison_irq.num_types = 1;
+ adev->vcn.inst->ras_poison_irq.funcs = &vcn_v5_0_1_ras_irq_funcs;
+
+}
+
+static const struct amd_ip_funcs vcn_v5_0_1_ip_funcs = {
+ .name = "vcn_v5_0_1",
+ .early_init = vcn_v5_0_1_early_init,
+ .late_init = vcn_v5_0_1_late_init,
+ .sw_init = vcn_v5_0_1_sw_init,
+ .sw_fini = vcn_v5_0_1_sw_fini,
+ .hw_init = vcn_v5_0_1_hw_init,
+ .hw_fini = vcn_v5_0_1_hw_fini,
+ .suspend = vcn_v5_0_1_suspend,
+ .resume = vcn_v5_0_1_resume,
+ .is_idle = vcn_v5_0_1_is_idle,
+ .wait_for_idle = vcn_v5_0_1_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = vcn_v5_0_1_set_clockgating_state,
+ .set_powergating_state = vcn_set_powergating_state,
+ .dump_ip_state = amdgpu_vcn_dump_ip_state,
+ .print_ip_state = amdgpu_vcn_print_ip_state,
+};
+
+const struct amdgpu_ip_block_version vcn_v5_0_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_VCN,
+ .major = 5,
+ .minor = 0,
+ .rev = 1,
+ .funcs = &vcn_v5_0_1_ip_funcs,
+};
+
+static uint32_t vcn_v5_0_1_query_poison_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, uint32_t sub_block)
+{
+ uint32_t poison_stat = 0, reg_value = 0;
+
+ switch (sub_block) {
+ case AMDGPU_VCN_V5_0_1_VCPU_VCODEC:
+ reg_value = RREG32_SOC15(VCN, instance, regUVD_RAS_VCPU_VCODEC_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_VCPU_VCODEC_STATUS, POISONED_PF);
+ break;
+ default:
+ break;
+ }
+
+ if (poison_stat)
+ dev_info(adev->dev, "Poison detected in VCN%d, sub_block%d\n",
+ instance, sub_block);
+
+ return poison_stat;
+}
+
+static bool vcn_v5_0_1_query_poison_status(struct amdgpu_device *adev)
+{
+ uint32_t inst, sub;
+ uint32_t poison_stat = 0;
+
+ for (inst = 0; inst < adev->vcn.num_vcn_inst; inst++)
+ for (sub = 0; sub < AMDGPU_VCN_V5_0_1_MAX_SUB_BLOCK; sub++)
+ poison_stat +=
+ vcn_v5_0_1_query_poison_by_instance(adev, inst, sub);
+
+ return !!poison_stat;
+}
+
+static const struct amdgpu_ras_block_hw_ops vcn_v5_0_1_ras_hw_ops = {
+ .query_poison_status = vcn_v5_0_1_query_poison_status,
+};
+
+static int vcn_v5_0_1_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct aca_bank_info info;
+ u64 misc0;
+ int ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
+ 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+/* reference to smu driver if header file */
+static int vcn_v5_0_1_err_codes[] = {
+ 14, 15, 47, /* VCN [D|V|S] */
+};
+
+static bool vcn_v5_0_1_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ u32 instlo;
+
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+
+ if (instlo != mmSMNAID_AID0_MCA_SMU)
+ return false;
+
+ if (aca_bank_check_error_codes(handle->adev, bank,
+ vcn_v5_0_1_err_codes,
+ ARRAY_SIZE(vcn_v5_0_1_err_codes)))
+ return false;
+
+ return true;
+}
+
+static const struct aca_bank_ops vcn_v5_0_1_aca_bank_ops = {
+ .aca_bank_parser = vcn_v5_0_1_aca_bank_parser,
+ .aca_bank_is_valid = vcn_v5_0_1_aca_bank_is_valid,
+};
+
+static const struct aca_info vcn_v5_0_1_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK,
+ .bank_ops = &vcn_v5_0_1_aca_bank_ops,
+};
+
+static int vcn_v5_0_1_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__VCN,
+ &vcn_v5_0_1_aca_info, NULL);
+ if (r)
+ goto late_fini;
+
+ if (amdgpu_ras_is_supported(adev, ras_block->block) &&
+ adev->vcn.inst->ras_poison_irq.funcs) {
+ r = amdgpu_irq_get(adev, &adev->vcn.inst->ras_poison_irq, 0);
+ if (r)
+ goto late_fini;
+ }
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+
+ return r;
+}
+
+static struct amdgpu_vcn_ras vcn_v5_0_1_ras = {
+ .ras_block = {
+ .hw_ops = &vcn_v5_0_1_ras_hw_ops,
+ .ras_late_init = vcn_v5_0_1_ras_late_init,
+ },
+};
+
+static void vcn_v5_0_1_set_ras_funcs(struct amdgpu_device *adev)
+{
+ adev->vcn.ras = &vcn_v5_0_1_ras;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h
new file mode 100644
index 000000000000..b72e4da68317
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCN_v5_0_1_H__
+#define __VCN_v5_0_1_H__
+
+#define regVCN_RRMT_CNTL 0x0940
+#define regVCN_RRMT_CNTL_BASE_IDX 1
+
+
+enum amdgpu_vcn_v5_0_1_sub_block {
+ AMDGPU_VCN_V5_0_1_VCPU_VCODEC = 0,
+
+ AMDGPU_VCN_V5_0_1_MAX_SUB_BLOCK,
+};
+
+extern const struct amdgpu_ip_block_version vcn_v5_0_1_ip_block;
+
+#endif /* __VCN_v5_0_1_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index a9ca6988009e..eb16916c6473 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -289,6 +289,10 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
}
}
+ if (!amdgpu_sriov_vf(adev))
+ adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
+ adev->irq.ih.doorbell_index);
+
pci_set_master(adev->pdev);
/* enable interrupts */
@@ -334,9 +338,11 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev,
u32 wptr, tmp;
struct amdgpu_ih_regs *ih_regs;
- if (ih == &adev->irq.ih) {
+ if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) {
/* Only ring0 supports writeback. On other rings fall back
* to register-based code with overflow checking below.
+ * ih_soft ring doesn't have any backing hardware registers,
+ * update wptr and return.
*/
wptr = le32_to_cpu(*ih->wptr_cpu);
@@ -358,15 +364,20 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev,
* this should allow us to catchup.
*/
tmp = (wptr + 32) & ih->ptr_mask;
- dev_warn(adev->dev, "IH ring buffer overflow "
- "(0x%08X, 0x%08X, 0x%08X)\n",
- wptr, ih->rptr, tmp);
+ dev_warn_ratelimited(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
+ amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp);
ih->rptr = tmp;
tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
out:
return (wptr & ih->ptr_mask);
}
@@ -409,6 +420,9 @@ static void vega10_ih_set_rptr(struct amdgpu_device *adev,
{
struct amdgpu_ih_regs *ih_regs;
+ if (ih == &adev->irq.ih_soft)
+ return;
+
if (ih->use_doorbell) {
/* XXX check if swapping is necessary on BE */
*ih->rptr_cpu = ih->rptr;
@@ -457,18 +471,18 @@ static void vega10_ih_set_self_irq_funcs(struct amdgpu_device *adev)
adev->irq.self_irq.funcs = &vega10_ih_self_irq_funcs;
}
-static int vega10_ih_early_init(void *handle)
+static int vega10_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
vega10_ih_set_interrupt_funcs(adev);
vega10_ih_set_self_irq_funcs(adev);
return 0;
}
-static int vega10_ih_sw_init(void *handle)
+static int vega10_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_IH, 0,
@@ -476,7 +490,7 @@ static int vega10_ih_sw_init(void *handle)
if (r)
return r;
- r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, true);
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, true);
if (r)
return r;
@@ -501,7 +515,7 @@ static int vega10_ih_sw_init(void *handle)
/* initialize ih control registers offset */
vega10_ih_init_register_offset(adev);
- r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true);
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
if (r)
return r;
@@ -510,63 +524,50 @@ static int vega10_ih_sw_init(void *handle)
return r;
}
-static int vega10_ih_sw_fini(void *handle)
+static int vega10_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
return 0;
}
-static int vega10_ih_hw_init(void *handle)
+static int vega10_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- r = vega10_ih_irq_init(adev);
- if (r)
- return r;
-
- return 0;
+ return vega10_ih_irq_init(ip_block->adev);
}
-static int vega10_ih_hw_fini(void *handle)
+static int vega10_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- vega10_ih_irq_disable(adev);
+ vega10_ih_irq_disable(ip_block->adev);
return 0;
}
-static int vega10_ih_suspend(void *handle)
+static int vega10_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return vega10_ih_hw_fini(adev);
+ return vega10_ih_hw_fini(ip_block);
}
-static int vega10_ih_resume(void *handle)
+static int vega10_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return vega10_ih_hw_init(adev);
+ return vega10_ih_hw_init(ip_block);
}
-static bool vega10_ih_is_idle(void *handle)
+static bool vega10_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return true;
}
-static int vega10_ih_wait_for_idle(void *handle)
+static int vega10_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return -ETIMEDOUT;
}
-static int vega10_ih_soft_reset(void *handle)
+static int vega10_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* todo */
@@ -603,10 +604,10 @@ static void vega10_ih_update_clockgating_state(struct amdgpu_device *adev,
}
}
-static int vega10_ih_set_clockgating_state(void *handle,
+static int vega10_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
vega10_ih_update_clockgating_state(adev,
state == AMD_CG_STATE_GATE);
@@ -614,7 +615,7 @@ static int vega10_ih_set_clockgating_state(void *handle,
}
-static int vega10_ih_set_powergating_state(void *handle,
+static int vega10_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -623,7 +624,6 @@ static int vega10_ih_set_powergating_state(void *handle,
const struct amd_ip_funcs vega10_ih_ip_funcs = {
.name = "vega10_ih",
.early_init = vega10_ih_early_init,
- .late_init = NULL,
.sw_init = vega10_ih_sw_init,
.sw_fini = vega10_ih_sw_fini,
.hw_init = vega10_ih_hw_init,
@@ -640,6 +640,7 @@ const struct amd_ip_funcs vega10_ih_ip_funcs = {
static const struct amdgpu_ih_funcs vega10_ih_funcs = {
.get_wptr = vega10_ih_get_wptr,
.decode_iv = amdgpu_ih_decode_iv_helper,
+ .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
.set_rptr = vega10_ih_set_rptr
};
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
index f51dfc38ac65..85846fd08ce4 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
@@ -38,6 +38,11 @@
#define mmIH_CHICKEN_ALDEBARAN 0x18d
#define mmIH_CHICKEN_ALDEBARAN_BASE_IDX 0
+#define mmIH_RETRY_INT_CAM_CNTL_ALDEBARAN 0x00ea
+#define mmIH_RETRY_INT_CAM_CNTL_ALDEBARAN_BASE_IDX 0
+#define IH_RETRY_INT_CAM_CNTL_ALDEBARAN__ENABLE__SHIFT 0x10
+#define IH_RETRY_INT_CAM_CNTL_ALDEBARAN__ENABLE_MASK 0x00010000L
+
static void vega20_ih_set_interrupt_funcs(struct amdgpu_device *adev);
/**
@@ -109,6 +114,33 @@ static int vega20_ih_toggle_ring_interrupts(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_GPU_TS_ENABLE, 1);
+ if (enable) {
+ /* Unset the CLEAR_OVERFLOW bit to make sure the next step
+ * is switching the bit from 0 to 1
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp))
+ return -ETIMEDOUT;
+ } else {
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ /* Clear RB_OVERFLOW bit */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp))
+ return -ETIMEDOUT;
+ } else {
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ }
+
/* enable_intr field is only valid in ring0 */
if (ih == &adev->irq.ih)
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0));
@@ -251,36 +283,14 @@ static int vega20_ih_enable_ring(struct amdgpu_device *adev,
return 0;
}
-/**
- * vega20_ih_reroute_ih - reroute VMC/UTCL2 ih to an ih ring
- *
- * @adev: amdgpu_device pointer
- *
- * Reroute VMC and UMC interrupts on primary ih ring to
- * ih ring 1 so they won't lose when bunches of page faults
- * interrupts overwhelms the interrupt handler(VEGA20)
- */
-static void vega20_ih_reroute_ih(struct amdgpu_device *adev)
+static uint32_t vega20_setup_retry_doorbell(u32 doorbell_index)
{
- uint32_t tmp;
+ u32 val = 0;
- /* vega20 ih reroute will go through psp this
- * function is used for newer asics starting arcturus
- */
- if (adev->asic_type >= CHIP_ARCTURUS) {
- /* Reroute to IH ring 1 for VMC */
- WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_INDEX, 0x12);
- tmp = RREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA);
- tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, CLIENT_TYPE, 1);
- tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1);
- WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA, tmp);
-
- /* Reroute IH ring 1 for UTCL2 */
- WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_INDEX, 0x1B);
- tmp = RREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA);
- tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1);
- WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA, tmp);
- }
+ val = REG_SET_FIELD(val, IH_DOORBELL_RPTR, OFFSET, doorbell_index);
+ val = REG_SET_FIELD(val, IH_DOORBELL_RPTR, ENABLE, 1);
+
+ return val;
}
/**
@@ -308,40 +318,64 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev)
adev->nbio.funcs->ih_control(adev);
- if (adev->asic_type == CHIP_ARCTURUS &&
- adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
- ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN);
- if (adev->irq.ih.use_bus_addr) {
- ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN,
- MC_SPACE_GPA_ENABLE, 1);
+ if (!amdgpu_sriov_vf(adev)) {
+ if ((amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 2, 1)) &&
+ adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN);
+ if (adev->irq.ih.use_bus_addr) {
+ ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN,
+ MC_SPACE_GPA_ENABLE, 1);
+ }
+ WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken);
}
- WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken);
- }
- /* psp firmware won't program IH_CHICKEN for aldebaran
- * driver needs to program it properly according to
- * MC_SPACE type in IH_RB_CNTL */
- if (adev->asic_type == CHIP_ALDEBARAN) {
- ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN);
- if (adev->irq.ih.use_bus_addr) {
- ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN,
- MC_SPACE_GPA_ENABLE, 1);
+ /* psp firmware won't program IH_CHICKEN for aldebaran
+ * driver needs to program it properly according to
+ * MC_SPACE type in IH_RB_CNTL */
+ if ((amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 0)) ||
+ (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 2)) ||
+ (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 5))) {
+ ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN);
+ if (adev->irq.ih.use_bus_addr) {
+ ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN,
+ MC_SPACE_GPA_ENABLE, 1);
+ }
+ WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN, ih_chicken);
}
- WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN, ih_chicken);
}
for (i = 0; i < ARRAY_SIZE(ih); i++) {
if (ih[i]->ring_size) {
- if (i == 1)
- vega20_ih_reroute_ih(adev);
ret = vega20_ih_enable_ring(adev, ih[i]);
if (ret)
return ret;
}
+ ih[i]->overflow = false;
}
+ if (!amdgpu_sriov_vf(adev))
+ adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
+ adev->irq.ih.doorbell_index);
+
pci_set_master(adev->pdev);
+ /* Allocate the doorbell for IH Retry CAM */
+ adev->irq.retry_cam_doorbell_index = (adev->doorbell_index.ih + 3) << 1;
+ WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RETRY_CAM,
+ vega20_setup_retry_doorbell(adev->irq.retry_cam_doorbell_index));
+
+ /* Enable IH Retry CAM */
+ if (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 0) ||
+ amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 2) ||
+ amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 4) ||
+ amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 5))
+ WREG32_FIELD15(OSSSYS, 0, IH_RETRY_INT_CAM_CNTL_ALDEBARAN,
+ ENABLE, 1);
+ else
+ WREG32_FIELD15(OSSSYS, 0, IH_RETRY_INT_CAM_CNTL, ENABLE, 1);
+
+ adev->irq.retry_cam_enabled = true;
+
/* enable interrupts */
ret = vega20_ih_toggle_interrupts(adev, true);
if (ret)
@@ -385,9 +419,11 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev,
u32 wptr, tmp;
struct amdgpu_ih_regs *ih_regs;
- if (ih == &adev->irq.ih) {
+ if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) {
/* Only ring0 supports writeback. On other rings fall back
* to register-based code with overflow checking below.
+ * ih_soft ring doesn't have any backing hardware registers,
+ * update wptr and return.
*/
wptr = le32_to_cpu(*ih->wptr_cpu);
@@ -402,22 +438,30 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev,
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
goto out;
- wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+ if (!amdgpu_sriov_vf(adev))
+ wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+ else
+ ih->overflow = true;
/* When a ring buffer overflow happen start parsing interrupt
* from the last not overwritten vector (wptr + 32). Hopefully
* this should allow us to catchup.
*/
tmp = (wptr + 32) & ih->ptr_mask;
- dev_warn(adev->dev, "IH ring buffer overflow "
- "(0x%08X, 0x%08X, 0x%08X)\n",
- wptr, ih->rptr, tmp);
+ dev_warn_ratelimited(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
+ amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp);
ih->rptr = tmp;
tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
out:
return (wptr & ih->ptr_mask);
}
@@ -461,6 +505,9 @@ static void vega20_ih_set_rptr(struct amdgpu_device *adev,
{
struct amdgpu_ih_regs *ih_regs;
+ if (ih == &adev->irq.ih_soft)
+ return;
+
if (ih->use_doorbell) {
/* XXX check if swapping is necessary on BE */
*ih->rptr_cpu = ih->rptr;
@@ -494,7 +541,8 @@ static int vega20_ih_self_irq(struct amdgpu_device *adev,
case 2:
schedule_work(&adev->irq.ih2_work);
break;
- default: break;
+ default:
+ break;
}
return 0;
}
@@ -509,18 +557,19 @@ static void vega20_ih_set_self_irq_funcs(struct amdgpu_device *adev)
adev->irq.self_irq.funcs = &vega20_ih_self_irq_funcs;
}
-static int vega20_ih_early_init(void *handle)
+static int vega20_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
vega20_ih_set_interrupt_funcs(adev);
vega20_ih_set_self_irq_funcs(adev);
return 0;
}
-static int vega20_ih_sw_init(void *handle)
+static int vega20_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ bool use_bus_addr = true;
int r;
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_IH, 0,
@@ -528,31 +577,38 @@ static int vega20_ih_sw_init(void *handle)
if (r)
return r;
- r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, true);
+ if ((adev->flags & AMD_IS_APU) &&
+ (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 2)))
+ use_bus_addr = false;
+
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, use_bus_addr);
if (r)
return r;
adev->irq.ih.use_doorbell = true;
adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1;
- r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, PAGE_SIZE, true);
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, PAGE_SIZE, use_bus_addr);
if (r)
return r;
adev->irq.ih1.use_doorbell = true;
adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1;
- r = amdgpu_ih_ring_init(adev, &adev->irq.ih2, PAGE_SIZE, true);
- if (r)
- return r;
+ if (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) != IP_VERSION(4, 4, 2) &&
+ amdgpu_ip_version(adev, OSSSYS_HWIP, 0) != IP_VERSION(4, 4, 5)) {
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih2, PAGE_SIZE, true);
+ if (r)
+ return r;
- adev->irq.ih2.use_doorbell = true;
- adev->irq.ih2.doorbell_index = (adev->doorbell_index.ih + 2) << 1;
+ adev->irq.ih2.use_doorbell = true;
+ adev->irq.ih2.doorbell_index = (adev->doorbell_index.ih + 2) << 1;
+ }
/* initialize ih control registers offset */
vega20_ih_init_register_offset(adev);
- r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true);
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, use_bus_addr);
if (r)
return r;
@@ -561,19 +617,19 @@ static int vega20_ih_sw_init(void *handle)
return r;
}
-static int vega20_ih_sw_fini(void *handle)
+static int vega20_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
return 0;
}
-static int vega20_ih_hw_init(void *handle)
+static int vega20_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = vega20_ih_irq_init(adev);
if (r)
@@ -582,42 +638,36 @@ static int vega20_ih_hw_init(void *handle)
return 0;
}
-static int vega20_ih_hw_fini(void *handle)
+static int vega20_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- vega20_ih_irq_disable(adev);
+ vega20_ih_irq_disable(ip_block->adev);
return 0;
}
-static int vega20_ih_suspend(void *handle)
+static int vega20_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return vega20_ih_hw_fini(adev);
+ return vega20_ih_hw_fini(ip_block);
}
-static int vega20_ih_resume(void *handle)
+static int vega20_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return vega20_ih_hw_init(adev);
+ return vega20_ih_hw_init(ip_block);
}
-static bool vega20_ih_is_idle(void *handle)
+static bool vega20_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return true;
}
-static int vega20_ih_wait_for_idle(void *handle)
+static int vega20_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return -ETIMEDOUT;
}
-static int vega20_ih_soft_reset(void *handle)
+static int vega20_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* todo */
@@ -651,10 +701,10 @@ static void vega20_ih_update_clockgating_state(struct amdgpu_device *adev,
}
}
-static int vega20_ih_set_clockgating_state(void *handle,
+static int vega20_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
vega20_ih_update_clockgating_state(adev,
state == AMD_CG_STATE_GATE);
@@ -662,7 +712,7 @@ static int vega20_ih_set_clockgating_state(void *handle,
}
-static int vega20_ih_set_powergating_state(void *handle,
+static int vega20_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -671,7 +721,6 @@ static int vega20_ih_set_powergating_state(void *handle,
const struct amd_ip_funcs vega20_ih_ip_funcs = {
.name = "vega20_ih",
.early_init = vega20_ih_early_init,
- .late_init = NULL,
.sw_init = vega20_ih_sw_init,
.sw_fini = vega20_ih_sw_fini,
.hw_init = vega20_ih_hw_init,
@@ -688,6 +737,7 @@ const struct amd_ip_funcs vega20_ih_ip_funcs = {
static const struct amdgpu_ih_funcs vega20_ih_funcs = {
.get_wptr = vega20_ih_get_wptr,
.decode_iv = amdgpu_ih_decode_iv_helper,
+ .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
.set_rptr = vega20_ih_set_rptr
};
@@ -696,8 +746,7 @@ static void vega20_ih_set_interrupt_funcs(struct amdgpu_device *adev)
adev->irq.ih_funcs = &vega20_ih_funcs;
}
-const struct amdgpu_ip_block_version vega20_ih_ip_block =
-{
+const struct amdgpu_ip_block_version vega20_ih_ip_block = {
.type = AMD_IP_BLOCK_TYPE_IH,
.major = 4,
.minor = 2,
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index fe9a7cc8d9eb..a611a7345125 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -67,7 +67,6 @@
#include "sdma_v2_4.h"
#include "sdma_v3_0.h"
#include "dce_v10_0.h"
-#include "dce_v11_0.h"
#include "iceland_ih.h"
#include "tonga_ih.h"
#include "cz_ih.h"
@@ -136,15 +135,15 @@ static const struct amdgpu_video_codec_info polaris_video_codecs_encode_array[]
{
.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
.max_width = 4096,
- .max_height = 2304,
- .max_pixels_per_frame = 4096 * 2304,
+ .max_height = 4096,
+ .max_pixels_per_frame = 4096 * 4096,
.max_level = 0,
},
{
.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
.max_width = 4096,
- .max_height = 2304,
- .max_pixels_per_frame = 4096 * 2304,
+ .max_height = 4096,
+ .max_pixels_per_frame = 4096 * 4096,
.max_level = 0,
},
};
@@ -167,16 +166,16 @@ static const struct amdgpu_video_codec_info tonga_video_codecs_decode_array[] =
{
{
.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
+ .max_width = 1920,
+ .max_height = 1088,
+ .max_pixels_per_frame = 1920 * 1088,
.max_level = 3,
},
{
.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
+ .max_width = 1920,
+ .max_height = 1088,
+ .max_pixels_per_frame = 1920 * 1088,
.max_level = 5,
},
{
@@ -188,9 +187,9 @@ static const struct amdgpu_video_codec_info tonga_video_codecs_decode_array[] =
},
{
.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
+ .max_width = 1920,
+ .max_height = 1088,
+ .max_pixels_per_frame = 1920 * 1088,
.max_level = 4,
},
};
@@ -206,16 +205,16 @@ static const struct amdgpu_video_codec_info cz_video_codecs_decode_array[] =
{
{
.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
+ .max_width = 1920,
+ .max_height = 1088,
+ .max_pixels_per_frame = 1920 * 1088,
.max_level = 3,
},
{
.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
+ .max_width = 1920,
+ .max_height = 1088,
+ .max_pixels_per_frame = 1920 * 1088,
.max_level = 5,
},
{
@@ -227,9 +226,9 @@ static const struct amdgpu_video_codec_info cz_video_codecs_decode_array[] =
},
{
.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
+ .max_width = 1920,
+ .max_height = 1088,
+ .max_pixels_per_frame = 1920 * 1088,
.max_level = 4,
},
{
@@ -542,8 +541,15 @@ static u32 vi_get_xclk(struct amdgpu_device *adev)
u32 reference_clock = adev->clock.spll.reference_freq;
u32 tmp;
- if (adev->flags & AMD_IS_APU)
- return reference_clock;
+ if (adev->flags & AMD_IS_APU) {
+ switch (adev->asic_type) {
+ case CHIP_STONEY:
+ /* vbios says 48Mhz, but the actual freq is 100Mhz */
+ return 10000;
+ default:
+ return reference_clock;
+ }
+ }
tmp = RREG32_SMC(ixCG_CLKPIN_CNTL_2);
if (REG_GET_FIELD(tmp, CG_CLKPIN_CNTL_2, MUX_TCLK_TO_XCLK))
@@ -580,11 +586,6 @@ void vi_srbm_select(struct amdgpu_device *adev,
WREG32(mmSRBM_GFX_CNTL, srbm_gfx_cntl);
}
-static void vi_vga_set_state(struct amdgpu_device *adev, bool state)
-{
- /* todo */
-}
-
static bool vi_read_disabled_bios(struct amdgpu_device *adev)
{
u32 bus_cntl;
@@ -762,12 +763,12 @@ static uint32_t vi_get_register_value(struct amdgpu_device *adev,
mutex_lock(&adev->grbm_idx_mutex);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
val = RREG32(reg_offset);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
return val;
} else {
@@ -895,7 +896,7 @@ static int vi_asic_pci_config_reset(struct amdgpu_device *adev)
return r;
}
-static bool vi_asic_supports_baco(struct amdgpu_device *adev)
+static int vi_asic_supports_baco(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
case CHIP_FIJI:
@@ -906,14 +907,14 @@ static bool vi_asic_supports_baco(struct amdgpu_device *adev)
case CHIP_TOPAZ:
return amdgpu_dpm_is_baco_supported(adev);
default:
- return false;
+ return 0;
}
}
static enum amd_reset_method
vi_asic_reset_method(struct amdgpu_device *adev)
{
- bool baco_reset;
+ int baco_reset;
if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY ||
amdgpu_reset_method == AMD_RESET_METHOD_BACO)
@@ -933,7 +934,7 @@ vi_asic_reset_method(struct amdgpu_device *adev)
baco_reset = amdgpu_dpm_is_baco_supported(adev);
break;
default:
- baco_reset = false;
+ baco_reset = 0;
break;
}
@@ -956,6 +957,10 @@ static int vi_asic_reset(struct amdgpu_device *adev)
{
int r;
+ /* APUs don't have full asic reset */
+ if (adev->flags & AMD_IS_APU)
+ return 0;
+
if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
dev_info(adev->dev, "BACO reset\n");
r = amdgpu_dpm_baco_reset(adev);
@@ -1097,24 +1102,6 @@ static int vi_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
return 0;
}
-static void vi_pcie_gen3_enable(struct amdgpu_device *adev)
-{
- if (pci_is_root_bus(adev->pdev->bus))
- return;
-
- if (amdgpu_pcie_gen2 == 0)
- return;
-
- if (adev->flags & AMD_IS_APU)
- return;
-
- if (!(adev->pm.pcie_gen_mask & (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
- CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)))
- return;
-
- /* todo */
-}
-
static void vi_enable_aspm(struct amdgpu_device *adev)
{
u32 data, orig;
@@ -1136,11 +1123,10 @@ static void vi_program_aspm(struct amdgpu_device *adev)
bool bL1SS = false;
bool bClkReqSupport = true;
- if (!amdgpu_aspm)
+ if (!amdgpu_device_should_use_aspm(adev))
return;
- if (adev->flags & AMD_IS_APU ||
- adev->asic_type < CHIP_POLARIS10)
+ if (adev->asic_type < CHIP_POLARIS10)
return;
orig = data = RREG32_PCIE(ixPCIE_LC_CNTL);
@@ -1449,7 +1435,6 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
.read_register = &vi_read_register,
.reset = &vi_asic_reset,
.reset_method = &vi_asic_reset_method,
- .set_vga_state = &vi_vga_set_state,
.get_xclk = &vi_get_xclk,
.set_uvd_clocks = &vi_set_uvd_clocks,
.set_vce_clocks = &vi_set_vce_clocks,
@@ -1469,9 +1454,9 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
#define CZ_REV_BRISTOL(rev) \
((rev >= 0xC8 && rev <= 0xCE) || (rev >= 0xE1 && rev <= 0xE6))
-static int vi_common_early_init(void *handle)
+static int vi_common_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (adev->flags & AMD_IS_APU) {
adev->smc_rreg = &cz_smc_rreg;
@@ -1693,9 +1678,9 @@ static int vi_common_early_init(void *handle)
return 0;
}
-static int vi_common_late_init(void *handle)
+static int vi_common_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
xgpu_vi_mailbox_get_irq(adev);
@@ -1703,9 +1688,9 @@ static int vi_common_late_init(void *handle)
return 0;
}
-static int vi_common_sw_init(void *handle)
+static int vi_common_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
xgpu_vi_mailbox_add_irq_id(adev);
@@ -1713,19 +1698,12 @@ static int vi_common_sw_init(void *handle)
return 0;
}
-static int vi_common_sw_fini(void *handle)
-{
- return 0;
-}
-
-static int vi_common_hw_init(void *handle)
+static int vi_common_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* move the golden regs per IP block */
vi_init_golden_registers(adev);
- /* enable pcie gen2/3 link */
- vi_pcie_gen3_enable(adev);
/* enable aspm */
vi_program_aspm(adev);
/* enable the doorbell aperture */
@@ -1734,9 +1712,9 @@ static int vi_common_hw_init(void *handle)
return 0;
}
-static int vi_common_hw_fini(void *handle)
+static int vi_common_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* enable the doorbell aperture */
vi_enable_doorbell_aperture(adev, false);
@@ -1747,35 +1725,21 @@ static int vi_common_hw_fini(void *handle)
return 0;
}
-static int vi_common_suspend(void *handle)
+static int vi_common_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return vi_common_hw_fini(adev);
+ return vi_common_hw_fini(ip_block);
}
-static int vi_common_resume(void *handle)
+static int vi_common_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return vi_common_hw_init(adev);
+ return vi_common_hw_init(ip_block);
}
-static bool vi_common_is_idle(void *handle)
+static bool vi_common_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int vi_common_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int vi_common_soft_reset(void *handle)
-{
- return 0;
-}
-
static void vi_update_bif_medium_grain_light_sleep(struct amdgpu_device *adev,
bool enable)
{
@@ -1980,10 +1944,10 @@ static int vi_common_set_clockgating_state_by_smu(void *handle,
return 0;
}
-static int vi_common_set_clockgating_state(void *handle,
+static int vi_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -2023,15 +1987,15 @@ static int vi_common_set_clockgating_state(void *handle,
return 0;
}
-static int vi_common_set_powergating_state(void *handle,
+static int vi_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void vi_common_get_clockgating_state(void *handle, u32 *flags)
+static void vi_common_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -2063,14 +2027,11 @@ static const struct amd_ip_funcs vi_common_ip_funcs = {
.early_init = vi_common_early_init,
.late_init = vi_common_late_init,
.sw_init = vi_common_sw_init,
- .sw_fini = vi_common_sw_fini,
.hw_init = vi_common_hw_init,
.hw_fini = vi_common_hw_fini,
.suspend = vi_common_suspend,
.resume = vi_common_resume,
.is_idle = vi_common_is_idle,
- .wait_for_idle = vi_common_wait_for_idle,
- .soft_reset = vi_common_soft_reset,
.set_clockgating_state = vi_common_set_clockgating_state,
.set_powergating_state = vi_common_set_powergating_state,
.get_clockgating_state = vi_common_get_clockgating_state,
@@ -2092,6 +2053,8 @@ void vi_set_virt_ops(struct amdgpu_device *adev)
int vi_set_ip_blocks(struct amdgpu_device *adev)
{
+ amdgpu_device_set_sriov_virtual_display(adev);
+
switch (adev->asic_type) {
case CHIP_TOPAZ:
/* topaz has no DCE, UVD, VCE */
@@ -2111,7 +2074,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);
amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block);
amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
- if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
+ if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
#if defined(CONFIG_DRM_AMD_DC)
else if (amdgpu_device_has_dc_support(adev))
@@ -2131,7 +2094,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);
amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block);
amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
- if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
+ if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
#if defined(CONFIG_DRM_AMD_DC)
else if (amdgpu_device_has_dc_support(adev))
@@ -2160,8 +2123,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
else if (amdgpu_device_has_dc_support(adev))
amdgpu_device_ip_block_add(adev, &dm_ip_block);
#endif
- else
- amdgpu_device_ip_block_add(adev, &dce_v11_2_ip_block);
amdgpu_device_ip_block_add(adev, &uvd_v6_3_ip_block);
amdgpu_device_ip_block_add(adev, &vce_v3_4_ip_block);
break;
@@ -2178,8 +2139,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
else if (amdgpu_device_has_dc_support(adev))
amdgpu_device_ip_block_add(adev, &dm_ip_block);
#endif
- else
- amdgpu_device_ip_block_add(adev, &dce_v11_0_ip_block);
amdgpu_device_ip_block_add(adev, &uvd_v6_0_ip_block);
amdgpu_device_ip_block_add(adev, &vce_v3_1_ip_block);
#if defined(CONFIG_DRM_AMD_ACP)
@@ -2199,8 +2158,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
else if (amdgpu_device_has_dc_support(adev))
amdgpu_device_ip_block_add(adev, &dm_ip_block);
#endif
- else
- amdgpu_device_ip_block_add(adev, &dce_v11_0_ip_block);
amdgpu_device_ip_block_add(adev, &uvd_v6_2_ip_block);
amdgpu_device_ip_block_add(adev, &vce_v3_4_ip_block);
#if defined(CONFIG_DRM_AMD_ACP)
diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h
index 80ce42aacc0c..b61f6b838ec2 100644
--- a/drivers/gpu/drm/amd/amdgpu/vid.h
+++ b/drivers/gpu/drm/amd/amdgpu/vid.h
@@ -246,6 +246,7 @@
* 1 - Stream
* 2 - Bypass
*/
+#define EOP_EXEC (1 << 28) /* For Trailing Fence */
#define DATA_SEL(x) ((x) << 29)
/* 0 - discard
* 1 - send low 32bit data
diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h b/drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h
new file mode 100644
index 000000000000..47534dbbd137
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h
@@ -0,0 +1,218 @@
+/* Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __VPE_6_1_FW_IF_H_
+#define __VPE_6_1_FW_IF_H_
+
+/****************
+ * VPE OP Codes
+ ****************/
+enum VPE_CMD_OPCODE {
+ VPE_CMD_OPCODE_NOP = 0x0,
+ VPE_CMD_OPCODE_VPE_DESC = 0x1,
+ VPE_CMD_OPCODE_PLANE_CFG = 0x2,
+ VPE_CMD_OPCODE_VPEP_CFG = 0x3,
+ VPE_CMD_OPCODE_INDIRECT = 0x4,
+ VPE_CMD_OPCODE_FENCE = 0x5,
+ VPE_CMD_OPCODE_TRAP = 0x6,
+ VPE_CMD_OPCODE_REG_WRITE = 0x7,
+ VPE_CMD_OPCODE_POLL_REGMEM = 0x8,
+ VPE_CMD_OPCODE_COND_EXE = 0x9,
+ VPE_CMD_OPCODE_ATOMIC = 0xA,
+ VPE_CMD_OPCODE_PRED_EXE = 0xB,
+ VPE_CMD_OPCODE_COLLAB_SYNC = 0xC,
+ VPE_CMD_OPCODE_TIMESTAMP = 0xD
+};
+
+/** Generic Command Header
+ * Generic Commands include:
+ * Noop, Fence, Trap,
+ * RegisterWrite, PollRegisterWriteMemory,
+ * SetLocalTimestamp, GetLocalTimestamp
+ * GetGlobalGPUTimestamp */
+#define VPE_HEADER_SUB_OPCODE__SHIFT 8
+#define VPE_HEADER_SUB_OPCODE_MASK 0x0000FF00
+#define VPE_HEADER_OPCODE__SHIFT 0
+#define VPE_HEADER_OPCODE_MASK 0x000000FF
+
+#define VPE_CMD_HEADER(op, subop) \
+ (((subop << VPE_HEADER_SUB_OPCODE__SHIFT) & VPE_HEADER_SUB_OPCODE_MASK) | \
+ ((op << VPE_HEADER_OPCODE__SHIFT) & VPE_HEADER_OPCODE_MASK))
+
+
+ /***************************
+ * VPE NOP
+ ***************************/
+#define VPE_CMD_NOP_HEADER_COUNT__SHIFT 16
+#define VPE_CMD_NOP_HEADER_COUNT_MASK 0x00003FFF
+
+#define VPE_CMD_NOP_HEADER_COUNT(count) \
+ (((count) & VPE_CMD_NOP_HEADER_COUNT_MASK) << VPE_CMD_NOP_HEADER_COUNT__SHIFT)
+
+ /***************************
+ * VPE Descriptor
+ ***************************/
+#define VPE_DESC_CD__SHIFT 16
+#define VPE_DESC_CD_MASK 0x000F0000
+
+#define VPE_DESC_CMD_HEADER(cd) \
+ (VPE_CMD_HEADER(VPE_CMD_OPCODE_VPE_DESC, 0) | \
+ (((cd) << VPE_DESC_CD__SHIFT) & VPE_DESC_CD_MASK))
+
+ /***************************
+ * VPE Plane Config
+ ***************************/
+enum VPE_PLANE_CFG_SUBOP {
+ VPE_PLANE_CFG_SUBOP_1_TO_1 = 0x0,
+ VPE_PLANE_CFG_SUBOP_2_TO_1 = 0x1,
+ VPE_PLANE_CFG_SUBOP_2_TO_2 = 0x2
+};
+
+#define VPE_PLANE_CFG_ONE_PLANE 0
+#define VPE_PLANE_CFG_TWO_PLANES 1
+
+#define VPE_PLANE_CFG_NPS0__SHIFT 16
+#define VPE_PLANE_CFG_NPS0_MASK 0x00030000
+
+#define VPE_PLANE_CFG_NPD0__SHIFT 18
+#define VPE_PLANE_CFG_NPD0_MASK 0x000C0000
+
+#define VPE_PLANE_CFG_NPS1__SHIFT 20
+#define VPE_PLANE_CFG_NPS1_MASK 0x00300000
+
+#define VPE_PLANE_CFG_NPD1__SHIFT 22
+#define VPE_PLANE_CFG_NPD1_MASK 0x00C00000
+
+#define VPE_PLANE_CFG_TMZ__SHIFT 16
+#define VPE_PLANE_CFG_TMZ_MASK 0x00010000
+
+#define VPE_PLANE_CFG_SWIZZLE_MODE__SHIFT 3
+#define VPE_PLANE_CFG_SWIZZLE_MODE_MASK 0x000000F8
+
+#define VPE_PLANE_CFG_ROTATION__SHIFT 0
+#define VPE_PLANE_CFG_ROTATION_MASK 0x00000003
+
+#define VPE_PLANE_ADDR_LO__SHIFT 0
+#define VPE_PLANE_ADDR_LO_MASK 0xFFFFFF00
+
+#define VPE_PLANE_CFG_PITCH__SHIFT 0
+#define VPE_PLANE_CFG_PITCH_MASK 0x00003FFF
+
+#define VPE_PLANE_CFG_VIEWPORT_Y__SHIFT 16
+#define VPE_PLANE_CFG_VIEWPORT_Y_MASK 0x3FFF0000
+#define VPE_PLANE_CFG_VIEWPORT_X__SHIFT 0
+#define VPE_PLANE_CFG_VIEWPORT_X_MASK 0x00003FFF
+
+
+#define VPE_PLANE_CFG_VIEWPORT_HEIGHT__SHIFT 16
+#define VPE_PLANE_CFG_VIEWPORT_HEIGHT_MASK 0x1FFF0000
+#define VPE_PLANE_CFG_VIEWPORT_ELEMENT_SIZE__SHIFT 13
+#define VPE_PLANE_CFG_VIEWPORT_ELEMENT_SIZE_MASK 0x0000E000
+#define VPE_PLANE_CFG_VIEWPORT_WIDTH__SHIFT 0
+#define VPE_PLANE_CFG_VIEWPORT_WIDTH_MASK 0x00001FFF
+
+enum VPE_PLANE_CFG_ELEMENT_SIZE {
+ VPE_PLANE_CFG_ELEMENT_SIZE_8BPE = 0,
+ VPE_PLANE_CFG_ELEMENT_SIZE_16BPE = 1,
+ VPE_PLANE_CFG_ELEMENT_SIZE_32BPE = 2,
+ VPE_PLANE_CFG_ELEMENT_SIZE_64BPE = 3
+};
+
+#define VPE_PLANE_CFG_CMD_HEADER(subop, nps0, npd0, nps1, npd1) \
+ (VPE_CMD_HEADER(VPE_CMD_OPCODE_PLANE_CFG, subop) | \
+ (((nps0) << VPE_PLANE_CFG_NPS0__SHIFT) & VPE_PLANE_CFG_NPS0_MASK) | \
+ (((npd0) << VPE_PLANE_CFG_NPD0__SHIFT) & VPE_PLANE_CFG_NPD0_MASK) | \
+ (((nps1) << VPE_PLANE_CFG_NPS1__SHIFT) & VPE_PLANE_CFG_NPS1_MASK) | \
+ (((npd0) << VPE_PLANE_CFG_NPD1__SHIFT) & VPE_PLANE_CFG_NPD1_MASK))
+
+
+/************************
+ * VPEP Config
+ ************************/
+enum VPE_VPEP_CFG_SUBOP {
+ VPE_VPEP_CFG_SUBOP_DIR_CFG = 0x0,
+ VPE_VPEP_CFG_SUBOP_IND_CFG = 0x1
+};
+
+
+// Direct Config Command Header
+#define VPE_DIR_CFG_HEADER_ARRAY_SIZE__SHIFT 16
+#define VPE_DIR_CFG_HEADER_ARRAY_SIZE_MASK 0xFFFF0000
+
+#define VPE_DIR_CFG_CMD_HEADER(subop, arr_sz) \
+ (VPE_CMD_HEADER(VPE_CMD_OPCODE_VPEP_CFG, subop) | \
+ (((arr_sz) << VPE_DIR_CFG_HEADER_ARRAY_SIZE__SHIFT) & VPE_DIR_CFG_HEADER_ARRAY_SIZE_MASK))
+
+
+#define VPE_DIR_CFG_PKT_REGISTER_OFFSET__SHIFT 2
+#define VPE_DIR_CFG_PKT_REGISTER_OFFSET_MASK 0x000FFFFC
+
+#define VPE_DIR_CFG_PKT_DATA_SIZE__SHIFT 20
+#define VPE_DIR_CFG_PKT_DATA_SIZE_MASK 0xFFF00000
+
+
+// InDirect Config Command Header
+#define VPE_IND_CFG_HEADER_NUM_DST__SHIFT 28
+#define VPE_IND_CFG_HEADER_NUM_DST_MASK 0xF0000000
+
+#define VPE_IND_CFG_CMD_HEADER(subop, num_dst) \
+ (VPE_CMD_HEADER(VPE_CMD_OPCODE_VPEP_CFG, subop) | \
+ (((num_dst) << VPE_IND_CFG_HEADER_NUM_DST__SHIFT) & VPE_IND_CFG_HEADER_NUM_DST_MASK))
+
+// Indirect Buffer Command Header
+#define VPE_CMD_INDIRECT_HEADER_VMID__SHIFT 16
+#define VPE_CMD_INDIRECT_HEADER_VMID_MASK 0x0000000F
+#define VPE_CMD_INDIRECT_HEADER_VMID(vmid) \
+ (((vmid) & VPE_CMD_INDIRECT_HEADER_VMID_MASK) << VPE_CMD_INDIRECT_HEADER_VMID__SHIFT)
+
+
+/**************************
+ * Poll Reg/Mem Sub-OpCode
+ **************************/
+enum VPE_POLL_REGMEM_SUBOP {
+ VPE_POLL_REGMEM_SUBOP_REGMEM = 0x0,
+ VPE_POLL_REGMEM_SUBOP_REGMEM_WRITE = 0x1
+};
+
+#define VPE_CMD_POLL_REGMEM_HEADER_FUNC__SHIFT 28
+#define VPE_CMD_POLL_REGMEM_HEADER_FUNC_MASK 0x00000007
+#define VPE_CMD_POLL_REGMEM_HEADER_FUNC(func) \
+ (((func) & VPE_CMD_POLL_REGMEM_HEADER_FUNC_MASK) << VPE_CMD_POLL_REGMEM_HEADER_FUNC__SHIFT)
+
+#define VPE_CMD_POLL_REGMEM_HEADER_MEM__SHIFT 31
+#define VPE_CMD_POLL_REGMEM_HEADER_MEM_MASK 0x00000001
+#define VPE_CMD_POLL_REGMEM_HEADER_MEM(mem) \
+ (((mem) & VPE_CMD_POLL_REGMEM_HEADER_MEM_MASK) << VPE_CMD_POLL_REGMEM_HEADER_MEM__SHIFT)
+
+#define VPE_CMD_POLL_REGMEM_DW5_INTERVAL__SHIFT 0
+#define VPE_CMD_POLL_REGMEM_DW5_INTERVAL_MASK 0x0000FFFF
+#define VPE_CMD_POLL_REGMEM_DW5_INTERVAL(interval) \
+ (((interval) & VPE_CMD_POLL_REGMEM_DW5_INTERVAL_MASK) << VPE_CMD_POLL_REGMEM_DW5_INTERVAL__SHIFT)
+
+#define VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT__SHIFT 16
+#define VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT_MASK 0x00000FFF
+#define VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT(count) \
+ (((count) & VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT_MASK) << VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT__SHIFT)
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c
new file mode 100644
index 000000000000..45876883bbf3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c
@@ -0,0 +1,398 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/firmware.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_vpe.h"
+#include "vpe_v6_1.h"
+#include "soc15_common.h"
+#include "ivsrcid/vpe/irqsrcs_vpe_6_1.h"
+#include "vpe/vpe_6_1_0_offset.h"
+#include "vpe/vpe_6_1_0_sh_mask.h"
+
+MODULE_FIRMWARE("amdgpu/vpe_6_1_0.bin");
+MODULE_FIRMWARE("amdgpu/vpe_6_1_1.bin");
+MODULE_FIRMWARE("amdgpu/vpe_6_1_3.bin");
+
+#define VPE_THREAD1_UCODE_OFFSET 0x8000
+
+#define regVPEC_COLLABORATE_CNTL 0x0013
+#define regVPEC_COLLABORATE_CNTL_BASE_IDX 0
+#define VPEC_COLLABORATE_CNTL__COLLABORATE_MODE_EN__SHIFT 0x0
+#define VPEC_COLLABORATE_CNTL__COLLABORATE_MODE_EN_MASK 0x00000001L
+
+#define regVPEC_COLLABORATE_CFG 0x0014
+#define regVPEC_COLLABORATE_CFG_BASE_IDX 0
+#define VPEC_COLLABORATE_CFG__MASTER_ID__SHIFT 0x0
+#define VPEC_COLLABORATE_CFG__MASTER_EN__SHIFT 0x3
+#define VPEC_COLLABORATE_CFG__SLAVE0_ID__SHIFT 0x4
+#define VPEC_COLLABORATE_CFG__SLAVE0_EN__SHIFT 0x7
+#define VPEC_COLLABORATE_CFG__MASTER_ID_MASK 0x00000007L
+#define VPEC_COLLABORATE_CFG__MASTER_EN_MASK 0x00000008L
+#define VPEC_COLLABORATE_CFG__SLAVE0_ID_MASK 0x00000070L
+#define VPEC_COLLABORATE_CFG__SLAVE0_EN_MASK 0x00000080L
+
+#define regVPEC_CNTL_6_1_1 0x0016
+#define regVPEC_CNTL_6_1_1_BASE_IDX 0
+#define regVPEC_QUEUE_RESET_REQ_6_1_1 0x002c
+#define regVPEC_QUEUE_RESET_REQ_6_1_1_BASE_IDX 0
+#define regVPEC_PUB_DUMMY2_6_1_1 0x004c
+#define regVPEC_PUB_DUMMY2_6_1_1_BASE_IDX 0
+
+static uint32_t vpe_v6_1_get_reg_offset(struct amdgpu_vpe *vpe, uint32_t inst, uint32_t offset)
+{
+ uint32_t base;
+
+ base = vpe->ring.adev->reg_offset[VPE_HWIP][inst][0];
+
+ return base + offset;
+}
+
+static void vpe_v6_1_halt(struct amdgpu_vpe *vpe, bool halt)
+{
+ struct amdgpu_device *adev = vpe->ring.adev;
+ uint32_t i, f32_cntl;
+
+ for (i = 0; i < vpe->num_instances; i++) {
+ f32_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_F32_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, HALT, halt ? 1 : 0);
+ f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, TH1_RESET, halt ? 1 : 0);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_F32_CNTL), f32_cntl);
+ }
+}
+
+static int vpe_v6_1_irq_init(struct amdgpu_vpe *vpe)
+{
+ struct amdgpu_device *adev = container_of(vpe, struct amdgpu_device, vpe);
+ int ret;
+
+ ret = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_VPE,
+ VPE_6_1_SRCID__VPE_TRAP,
+ &adev->vpe.trap_irq);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static void vpe_v6_1_set_collaborate_mode(struct amdgpu_vpe *vpe, bool enable)
+{
+ struct amdgpu_device *adev = vpe->ring.adev;
+ uint32_t vpe_colla_cntl, vpe_colla_cfg, i;
+
+ if (!vpe->collaborate_mode)
+ return;
+
+ for (i = 0; i < vpe->num_instances; i++) {
+ vpe_colla_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CNTL));
+ vpe_colla_cntl = REG_SET_FIELD(vpe_colla_cntl, VPEC_COLLABORATE_CNTL,
+ COLLABORATE_MODE_EN, enable ? 1 : 0);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CNTL), vpe_colla_cntl);
+
+ vpe_colla_cfg = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CFG));
+ vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, MASTER_ID, 0);
+ vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, MASTER_EN, enable ? 1 : 0);
+ vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, SLAVE0_ID, 1);
+ vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, SLAVE0_EN, enable ? 1 : 0);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CFG), vpe_colla_cfg);
+ }
+}
+
+static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe)
+{
+ struct amdgpu_device *adev = vpe->ring.adev;
+ const struct vpe_firmware_header_v1_0 *vpe_hdr;
+ const __le32 *data;
+ uint32_t ucode_offset[2], ucode_size[2];
+ uint32_t i, j, size_dw;
+ uint32_t ret;
+
+ /* disable UMSCH_INT_ENABLE */
+ for (j = 0; j < vpe->num_instances; j++) {
+
+ if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1))
+ ret = RREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL_6_1_1));
+ else
+ ret = RREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL));
+
+ ret = REG_SET_FIELD(ret, VPEC_CNTL, UMSCH_INT_ENABLE, 0);
+
+ if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1))
+ WREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL_6_1_1), ret);
+ else
+ WREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL), ret);
+ }
+
+ /* setup collaborate mode */
+ vpe_v6_1_set_collaborate_mode(vpe, true);
+ /* setup DPM */
+ if (amdgpu_vpe_configure_dpm(vpe))
+ dev_warn(adev->dev, "VPE failed to enable DPM\n");
+
+ /*
+ * For VPE 6.1.1, still only need to add master's offset, and psp will apply it to slave as well.
+ * Here use instance 0 as master.
+ */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ uint32_t f32_offset, f32_cntl;
+
+ f32_offset = vpe_get_reg_offset(vpe, 0, regVPEC_F32_CNTL);
+ f32_cntl = RREG32(f32_offset);
+ f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, HALT, 0);
+ f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, TH1_RESET, 0);
+
+ adev->vpe.cmdbuf_cpu_addr[0] = f32_offset;
+ adev->vpe.cmdbuf_cpu_addr[1] = f32_cntl;
+
+ return amdgpu_vpe_psp_update_sram(adev);
+ }
+
+ vpe_hdr = (const struct vpe_firmware_header_v1_0 *)adev->vpe.fw->data;
+
+ /* Thread 0(command thread) ucode offset/size */
+ ucode_offset[0] = le32_to_cpu(vpe_hdr->header.ucode_array_offset_bytes);
+ ucode_size[0] = le32_to_cpu(vpe_hdr->ctx_ucode_size_bytes);
+ /* Thread 1(control thread) ucode offset/size */
+ ucode_offset[1] = le32_to_cpu(vpe_hdr->ctl_ucode_offset);
+ ucode_size[1] = le32_to_cpu(vpe_hdr->ctl_ucode_size_bytes);
+
+ vpe_v6_1_halt(vpe, true);
+
+ for (j = 0; j < vpe->num_instances; j++) {
+ for (i = 0; i < 2; i++) {
+ if (i > 0)
+ WREG32(vpe_get_reg_offset(vpe, j, regVPEC_UCODE_ADDR), VPE_THREAD1_UCODE_OFFSET);
+ else
+ WREG32(vpe_get_reg_offset(vpe, j, regVPEC_UCODE_ADDR), 0);
+
+ data = (const __le32 *)(adev->vpe.fw->data + ucode_offset[i]);
+ size_dw = ucode_size[i] / sizeof(__le32);
+
+ while (size_dw--) {
+ if (amdgpu_emu_mode && size_dw % 500 == 0)
+ msleep(1);
+ WREG32(vpe_get_reg_offset(vpe, j, regVPEC_UCODE_DATA), le32_to_cpup(data++));
+ }
+ }
+ }
+
+ vpe_v6_1_halt(vpe, false);
+
+ return 0;
+}
+
+static int vpe_v6_1_ring_start(struct amdgpu_vpe *vpe)
+{
+ struct amdgpu_ring *ring = &vpe->ring;
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t doorbell, doorbell_offset;
+ uint32_t rb_bufsz, rb_cntl;
+ uint32_t ib_cntl, i;
+ int ret;
+
+ for (i = 0; i < vpe->num_instances; i++) {
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz);
+ rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_PRIV, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_VMID, 0);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_CNTL), rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR), 0);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR_HI), 0);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR), 0);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR_HI), 0);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR_ADDR_LO),
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR_ADDR_HI),
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_BASE), ring->gpu_addr >> 8);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40);
+
+ ring->wptr = 0;
+
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 1);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 0);
+
+ doorbell_offset = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL_OFFSET));
+ doorbell_offset = REG_SET_FIELD(doorbell_offset, VPEC_QUEUE0_DOORBELL_OFFSET, OFFSET, ring->doorbell_index + i*4);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL_OFFSET), doorbell_offset);
+
+ doorbell = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL));
+ doorbell = REG_SET_FIELD(doorbell, VPEC_QUEUE0_DOORBELL, ENABLE, ring->use_doorbell ? 1 : 0);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL), doorbell);
+
+ adev->nbio.funcs->vpe_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index + i*4, 4);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_ENABLE, 1);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_CNTL), rb_cntl);
+
+ ib_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, VPEC_QUEUE0_IB_CNTL, IB_ENABLE, 1);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_IB_CNTL), ib_cntl);
+ }
+
+ ret = amdgpu_ring_test_helper(ring);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int vpe_v_6_1_ring_stop(struct amdgpu_vpe *vpe)
+{
+ struct amdgpu_device *adev = vpe->ring.adev;
+ uint32_t queue_reset, i;
+ int ret;
+
+ for (i = 0; i < vpe->num_instances; i++) {
+ if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1))
+ queue_reset = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ_6_1_1));
+ else
+ queue_reset = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ));
+
+ queue_reset = REG_SET_FIELD(queue_reset, VPEC_QUEUE_RESET_REQ, QUEUE0_RESET, 1);
+
+ if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) {
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ_6_1_1), queue_reset);
+ ret = SOC15_WAIT_ON_RREG(VPE, i, regVPEC_QUEUE_RESET_REQ_6_1_1, 0,
+ VPEC_QUEUE_RESET_REQ__QUEUE0_RESET_MASK);
+ } else {
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ), queue_reset);
+ ret = SOC15_WAIT_ON_RREG(VPE, i, regVPEC_QUEUE_RESET_REQ, 0,
+ VPEC_QUEUE_RESET_REQ__QUEUE0_RESET_MASK);
+ }
+
+ if (ret)
+ dev_err(adev->dev, "VPE queue reset failed\n");
+ }
+
+ vpe->ring.sched.ready = false;
+
+ return ret;
+}
+
+static int vpe_v6_1_set_trap_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ struct amdgpu_vpe *vpe = &adev->vpe;
+ uint32_t vpe_cntl;
+
+ if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1))
+ vpe_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL_6_1_1));
+ else
+ vpe_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL));
+
+ vpe_cntl = REG_SET_FIELD(vpe_cntl, VPEC_CNTL, TRAP_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+
+ if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1))
+ WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL_6_1_1), vpe_cntl);
+ else
+ WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL), vpe_cntl);
+
+ return 0;
+}
+
+static int vpe_v6_1_process_trap_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+
+ dev_dbg(adev->dev, "IH: VPE trap\n");
+
+ switch (entry->client_id) {
+ case SOC21_IH_CLIENTID_VPE:
+ amdgpu_fence_process(&adev->vpe.ring);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int vpe_v6_1_set_regs(struct amdgpu_vpe *vpe)
+{
+ struct amdgpu_device *adev = container_of(vpe, struct amdgpu_device, vpe);
+
+ vpe->regs.queue0_rb_rptr_lo = regVPEC_QUEUE0_RB_RPTR;
+ vpe->regs.queue0_rb_rptr_hi = regVPEC_QUEUE0_RB_RPTR_HI;
+ vpe->regs.queue0_rb_wptr_lo = regVPEC_QUEUE0_RB_WPTR;
+ vpe->regs.queue0_rb_wptr_hi = regVPEC_QUEUE0_RB_WPTR_HI;
+ vpe->regs.queue0_preempt = regVPEC_QUEUE0_PREEMPT;
+
+ if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1))
+ vpe->regs.dpm_enable = regVPEC_PUB_DUMMY2_6_1_1;
+ else
+ vpe->regs.dpm_enable = regVPEC_PUB_DUMMY2;
+
+ vpe->regs.dpm_pratio = regVPEC_QUEUE6_DUMMY4;
+ vpe->regs.dpm_request_interval = regVPEC_QUEUE5_DUMMY3;
+ vpe->regs.dpm_decision_threshold = regVPEC_QUEUE5_DUMMY4;
+ vpe->regs.dpm_busy_clamp_threshold = regVPEC_QUEUE7_DUMMY2;
+ vpe->regs.dpm_idle_clamp_threshold = regVPEC_QUEUE7_DUMMY3;
+ vpe->regs.dpm_request_lv = regVPEC_QUEUE7_DUMMY1;
+ vpe->regs.context_indicator = regVPEC_QUEUE6_DUMMY3;
+
+ return 0;
+}
+
+static const struct vpe_funcs vpe_v6_1_funcs = {
+ .get_reg_offset = vpe_v6_1_get_reg_offset,
+ .set_regs = vpe_v6_1_set_regs,
+ .irq_init = vpe_v6_1_irq_init,
+ .init_microcode = amdgpu_vpe_init_microcode,
+ .load_microcode = vpe_v6_1_load_microcode,
+ .ring_init = amdgpu_vpe_ring_init,
+ .ring_start = vpe_v6_1_ring_start,
+ .ring_stop = vpe_v_6_1_ring_stop,
+ .ring_fini = amdgpu_vpe_ring_fini,
+};
+
+static const struct amdgpu_irq_src_funcs vpe_v6_1_trap_irq_funcs = {
+ .set = vpe_v6_1_set_trap_irq_state,
+ .process = vpe_v6_1_process_trap_irq,
+};
+
+void vpe_v6_1_set_funcs(struct amdgpu_vpe *vpe)
+{
+ vpe->funcs = &vpe_v6_1_funcs;
+ vpe->trap_irq.funcs = &vpe_v6_1_trap_irq_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.h b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.h
new file mode 100644
index 000000000000..a9bea7905a77
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __VPE_V6_1_H__
+#define __VPE_V6_1_H__
+
+#include "amdgpu_vpe.h"
+
+void vpe_v6_1_set_funcs(struct amdgpu_vpe *vpe);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig
index 8cc0a76ddf9f..16e12c9913f9 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -5,8 +5,7 @@
config HSA_AMD
bool "HSA kernel driver for AMD GPU devices"
- depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64)
- imply AMD_IOMMU_V2 if X86_64
+ depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64 || (RISCV && 64BIT) || (LOONGARCH && 64BIT))
select HMM_MIRROR
select MMU_NOTIFIER
select DRM_AMDGPU_USERPTR
@@ -25,3 +24,17 @@ config HSA_AMD_SVM
preemptions and one based on page faults. To enable page fault
based memory management on most GFXv9 GPUs, set the module
parameter amdgpu.noretry=0.
+
+config HSA_AMD_P2P
+ bool "HSA kernel driver support for peer-to-peer for AMD GPU devices"
+ depends on HSA_AMD && PCI_P2PDMA && DMABUF_MOVE_NOTIFY
+ help
+ Enable peer-to-peer (P2P) communication between AMD GPUs over
+ the PCIe bus. This can improve performance of multi-GPU compute
+ applications and libraries by enabling GPUs to access data directly
+ in peer GPUs' memory without intermediate copies in system memory.
+
+ This P2P feature is only enabled on compatible chipsets, and between
+ GPUs with large memory BARs that expose the entire VRAM in PCIe bus
+ address space within the physical address limits of the GPUs.
+
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index c4f3aff11072..0ce08113c9f0 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -27,7 +27,6 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_device.o \
$(AMDKFD_PATH)/kfd_chardev.o \
$(AMDKFD_PATH)/kfd_topology.o \
- $(AMDKFD_PATH)/kfd_pasid.o \
$(AMDKFD_PATH)/kfd_doorbell.o \
$(AMDKFD_PATH)/kfd_flat_memory.o \
$(AMDKFD_PATH)/kfd_process.o \
@@ -37,6 +36,8 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_mqd_manager_vi.o \
$(AMDKFD_PATH)/kfd_mqd_manager_v9.o \
$(AMDKFD_PATH)/kfd_mqd_manager_v10.o \
+ $(AMDKFD_PATH)/kfd_mqd_manager_v11.o \
+ $(AMDKFD_PATH)/kfd_mqd_manager_v12.o \
$(AMDKFD_PATH)/kfd_kernel_queue.o \
$(AMDKFD_PATH)/kfd_packet_manager.o \
$(AMDKFD_PATH)/kfd_packet_manager_vi.o \
@@ -47,18 +48,17 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_device_queue_manager_vi.o \
$(AMDKFD_PATH)/kfd_device_queue_manager_v9.o \
$(AMDKFD_PATH)/kfd_device_queue_manager_v10.o \
+ $(AMDKFD_PATH)/kfd_device_queue_manager_v11.o \
+ $(AMDKFD_PATH)/kfd_device_queue_manager_v12.o \
$(AMDKFD_PATH)/kfd_interrupt.o \
$(AMDKFD_PATH)/kfd_events.o \
$(AMDKFD_PATH)/cik_event_interrupt.o \
$(AMDKFD_PATH)/kfd_int_process_v9.o \
- $(AMDKFD_PATH)/kfd_dbgdev.o \
- $(AMDKFD_PATH)/kfd_dbgmgr.o \
+ $(AMDKFD_PATH)/kfd_int_process_v10.o \
+ $(AMDKFD_PATH)/kfd_int_process_v11.o \
$(AMDKFD_PATH)/kfd_smi_events.o \
- $(AMDKFD_PATH)/kfd_crat.o
-
-ifneq ($(CONFIG_AMD_IOMMU_V2),)
-AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o
-endif
+ $(AMDKFD_PATH)/kfd_crat.o \
+ $(AMDKFD_PATH)/kfd_debug.o
ifneq ($(CONFIG_DEBUG_FS),)
AMDKFD_FILES += $(AMDKFD_PATH)/kfd_debugfs.o
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index f6233019f042..73acbe0b7c21 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -26,7 +26,7 @@
#include "amdgpu_amdkfd.h"
#include "kfd_smi_events.h"
-static bool cik_event_interrupt_isr(struct kfd_dev *dev,
+static bool cik_event_interrupt_isr(struct kfd_node *dev,
const uint32_t *ih_ring_entry,
uint32_t *patched_ihre,
bool *patched_flag)
@@ -43,15 +43,15 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,
*/
if ((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) &&
- dev->device_info->asic_family == CHIP_HAWAII) {
+ dev->adev->asic_type == CHIP_HAWAII) {
struct cik_ih_ring_entry *tmp_ihre =
(struct cik_ih_ring_entry *)patched_ihre;
*patched_flag = true;
*tmp_ihre = *ihre;
- vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd);
- ret = f2g->get_atc_vmid_pasid_mapping_info(dev->kgd, vmid, &pasid);
+ vmid = f2g->read_vmid_from_vmfault_reg(dev->adev);
+ ret = f2g->get_atc_vmid_pasid_mapping_info(dev->adev, vmid, &pasid);
tmp_ihre->ring_id &= 0x000000ff;
tmp_ihre->ring_id |= vmid << 8;
@@ -85,13 +85,12 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,
!amdgpu_no_queue_eviction_on_vm_fault);
}
-static void cik_event_interrupt_wq(struct kfd_dev *dev,
+static void cik_event_interrupt_wq(struct kfd_node *dev,
const uint32_t *ih_ring_entry)
{
const struct cik_ih_ring_entry *ihre =
(const struct cik_ih_ring_entry *)ih_ring_entry;
uint32_t context_id = ihre->data & 0xfffffff;
- unsigned int vmid = (ihre->ring_id & 0x0000ff00) >> 8;
u32 pasid = (ihre->ring_id & 0xffff0000) >> 16;
if (pasid == 0)
@@ -107,20 +106,26 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev,
kfd_signal_hw_exception_event(pasid);
else if (ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) {
+ struct kfd_process_device *pdd = NULL;
struct kfd_vm_fault_info info;
+ struct kfd_process *p;
kfd_smi_event_update_vmfault(dev, pasid);
- kfd_process_vm_fault(dev->dqm, pasid);
+ p = kfd_lookup_process_by_pasid(pasid, &pdd);
+ if (!pdd)
+ return;
+
+ kfd_evict_process_device(pdd);
memset(&info, 0, sizeof(info));
- amdgpu_amdkfd_gpuvm_get_vm_fault_info(dev->kgd, &info);
- if (!info.page_addr && !info.status)
+ amdgpu_amdkfd_gpuvm_get_vm_fault_info(dev->adev, &info);
+ if (!info.page_addr && !info.status) {
+ kfd_unref_process(p);
return;
+ }
- if (info.vmid == vmid)
- kfd_signal_vm_fault_event(dev, pasid, &info);
- else
- kfd_signal_vm_fault_event(dev, pasid, NULL);
+ kfd_signal_vm_fault_event(pdd, &info, NULL);
+ kfd_unref_process(p);
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index 475f89700c74..0320163b6e74 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -166,7 +166,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
0x807c847c, 0x806eff6e,
0x00000400, 0xbf0a757c,
0xbf85ffef, 0xbf9c0000,
- 0xbf8200cd, 0xbef8007e,
+ 0xbf8200ce, 0xbef8007e,
0x8679ff7f, 0x0000ffff,
0x8779ff79, 0x00040000,
0xbefa0080, 0xbefb00ff,
@@ -212,176 +212,185 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
0x761e0000, 0xe0524100,
0x761e0100, 0xe0524200,
0x761e0200, 0xe0524300,
- 0x761e0300, 0xb8f22a05,
- 0x80728172, 0x8e728a72,
- 0xb8f61605, 0x80768176,
- 0x8e768676, 0x80727672,
- 0x80f2c072, 0xb8f31605,
- 0x80738173, 0x8e738473,
- 0x8e7a8273, 0xbefa00ff,
- 0x01000000, 0xbefc0073,
- 0xc031003c, 0x00000072,
- 0x80f2c072, 0xbf8c007f,
- 0x80fc907c, 0xbe802d00,
- 0xbe822d02, 0xbe842d04,
- 0xbe862d06, 0xbe882d08,
- 0xbe8a2d0a, 0xbe8c2d0c,
- 0xbe8e2d0e, 0xbf06807c,
- 0xbf84fff1, 0xb8f22a05,
- 0x80728172, 0x8e728a72,
- 0xb8f61605, 0x80768176,
- 0x8e768676, 0x80727672,
- 0xbefa0084, 0xbefa00ff,
- 0x01000000, 0xc0211cfc,
+ 0x761e0300, 0xbf8c0f70,
+ 0xb8f22a05, 0x80728172,
+ 0x8e728a72, 0xb8f61605,
+ 0x80768176, 0x8e768676,
+ 0x80727672, 0x80f2c072,
+ 0xb8f31605, 0x80738173,
+ 0x8e738473, 0x8e7a8273,
+ 0xbefa00ff, 0x01000000,
+ 0xbefc0073, 0xc031003c,
+ 0x00000072, 0x80f2c072,
+ 0xbf8c007f, 0x80fc907c,
+ 0xbe802d00, 0xbe822d02,
+ 0xbe842d04, 0xbe862d06,
+ 0xbe882d08, 0xbe8a2d0a,
+ 0xbe8c2d0c, 0xbe8e2d0e,
+ 0xbf06807c, 0xbf84fff1,
+ 0xb8f22a05, 0x80728172,
+ 0x8e728a72, 0xb8f61605,
+ 0x80768176, 0x8e768676,
+ 0x80727672, 0xbefa0084,
+ 0xbefa00ff, 0x01000000,
+ 0xc0211cfc, 0x00000072,
+ 0x80728472, 0xc0211c3c,
0x00000072, 0x80728472,
- 0xc0211c3c, 0x00000072,
- 0x80728472, 0xc0211c7c,
+ 0xc0211c7c, 0x00000072,
+ 0x80728472, 0xc0211bbc,
0x00000072, 0x80728472,
- 0xc0211bbc, 0x00000072,
- 0x80728472, 0xc0211bfc,
+ 0xc0211bfc, 0x00000072,
+ 0x80728472, 0xc0211d3c,
0x00000072, 0x80728472,
- 0xc0211d3c, 0x00000072,
- 0x80728472, 0xc0211d7c,
+ 0xc0211d7c, 0x00000072,
+ 0x80728472, 0xc0211a3c,
0x00000072, 0x80728472,
- 0xc0211a3c, 0x00000072,
- 0x80728472, 0xc0211a7c,
+ 0xc0211a7c, 0x00000072,
+ 0x80728472, 0xc0211dfc,
0x00000072, 0x80728472,
- 0xc0211dfc, 0x00000072,
- 0x80728472, 0xc0211b3c,
+ 0xc0211b3c, 0x00000072,
+ 0x80728472, 0xc0211b7c,
0x00000072, 0x80728472,
- 0xc0211b7c, 0x00000072,
- 0x80728472, 0xbf8c007f,
- 0xbefc0073, 0xbefe006e,
- 0xbeff006f, 0x867375ff,
- 0x000003ff, 0xb9734803,
- 0x867375ff, 0xfffff800,
- 0x8f738b73, 0xb973a2c3,
- 0xb977f801, 0x8673ff71,
- 0xf0000000, 0x8f739c73,
- 0x8e739073, 0xbef60080,
- 0x87767376, 0x8673ff71,
- 0x08000000, 0x8f739b73,
- 0x8e738f73, 0x87767376,
- 0x8673ff74, 0x00800000,
- 0x8f739773, 0xb976f807,
- 0x8671ff71, 0x0000ffff,
- 0x86fe7e7e, 0x86ea6a6a,
- 0x8f768374, 0xb976e0c2,
- 0xbf800002, 0xb9740002,
- 0xbf8a0000, 0x95807370,
- 0xbf810000, 0x00000000,
+ 0xbf8c007f, 0xbefc0073,
+ 0xbefe006e, 0xbeff006f,
+ 0x867375ff, 0x000003ff,
+ 0xb9734803, 0x867375ff,
+ 0xfffff800, 0x8f738b73,
+ 0xb973a2c3, 0xb977f801,
+ 0x8673ff71, 0xf0000000,
+ 0x8f739c73, 0x8e739073,
+ 0xbef60080, 0x87767376,
+ 0x8673ff71, 0x08000000,
+ 0x8f739b73, 0x8e738f73,
+ 0x87767376, 0x8673ff74,
+ 0x00800000, 0x8f739773,
+ 0xb976f807, 0x8671ff71,
+ 0x0000ffff, 0x86fe7e7e,
+ 0x86ea6a6a, 0x8f768374,
+ 0xb976e0c2, 0xbf800002,
+ 0xb9740002, 0xbf8a0000,
+ 0x95807370, 0xbf810000,
};
static const uint32_t cwsr_trap_gfx9_hex[] = {
- 0xbf820001, 0xbf820248,
- 0xb8f8f802, 0x89788678,
- 0xb8eef801, 0x866eff6e,
- 0x00000800, 0xbf840003,
+ 0xbf820001, 0xbf820259,
+ 0xb8f8f802, 0x8978ff78,
+ 0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
- 0xbf840016, 0xb8fbf803,
+ 0xbf840009, 0x866eff6d,
+ 0x00ff0000, 0xbf85001e,
0x866eff7b, 0x00000400,
- 0xbf85003b, 0x866eff7b,
- 0x00000800, 0xbf850003,
- 0x866eff7b, 0x00000100,
- 0xbf84000c, 0x866eff78,
- 0x00002000, 0xbf840005,
- 0xbf8e0010, 0xb8eef803,
- 0x866eff6e, 0x00000400,
- 0xbf84fffb, 0x8778ff78,
- 0x00002000, 0x80ec886c,
- 0x82ed806d, 0xb8eef807,
- 0x866fff6e, 0x001f8000,
- 0x8e6f8b6f, 0x8977ff77,
- 0xfc000000, 0x87776f77,
- 0x896eff6e, 0x001f8000,
- 0xb96ef807, 0xb8faf812,
+ 0xbf850055, 0xbf8e0010,
+ 0xb8fbf803, 0xbf82fffa,
+ 0x866eff7b, 0x03c00900,
+ 0xbf850015, 0x866eff7b,
+ 0x000071ff, 0xbf840008,
+ 0x866fff7b, 0x00007080,
+ 0xbf840001, 0xbeee1a87,
+ 0xb8eff801, 0x8e6e8c6e,
+ 0x866e6f6e, 0xbf85000a,
+ 0x866eff6d, 0x00ff0000,
+ 0xbf850007, 0xb8eef801,
+ 0x866eff6e, 0x00000800,
+ 0xbf850003, 0x866eff7b,
+ 0x00000400, 0xbf85003a,
+ 0xb8faf807, 0x867aff7a,
+ 0x001f8000, 0x8e7a8b7a,
+ 0x8977ff77, 0xfc000000,
+ 0x87777a77, 0xba7ff807,
+ 0x00000000, 0xb8faf812,
0xb8fbf813, 0x8efa887a,
- 0xc0071bbd, 0x00000000,
- 0xbf8cc07f, 0xc0071ebd,
- 0x00000008, 0xbf8cc07f,
- 0x86ee6e6e, 0xbf840001,
- 0xbe801d6e, 0xb8fbf803,
- 0x867bff7b, 0x000001ff,
+ 0xbf0d8f7b, 0xbf840002,
+ 0x877bff7b, 0xffff0000,
+ 0xc0031bbd, 0x00000010,
+ 0xbf8cc07f, 0x8e6e976e,
+ 0x8977ff77, 0x00800000,
+ 0x87776e77, 0xc0071bbd,
+ 0x00000000, 0xbf8cc07f,
+ 0xc0071ebd, 0x00000008,
+ 0xbf8cc07f, 0x86ee6e6e,
+ 0xbf840001, 0xbe801d6e,
+ 0x866eff6d, 0x01ff0000,
+ 0xbf850005, 0x8778ff78,
+ 0x00002000, 0x80ec886c,
+ 0x82ed806d, 0xbf820005,
+ 0x866eff6d, 0x01000000,
0xbf850002, 0x806c846c,
0x826d806d, 0x866dff6d,
- 0x0000ffff, 0x8f6e8b77,
- 0x866eff6e, 0x001f8000,
- 0xb96ef807, 0x86fe7e7e,
+ 0x0000ffff, 0x8f7a8b77,
+ 0x867aff7a, 0x001f8000,
+ 0xb97af807, 0x86fe7e7e,
0x86ea6a6a, 0x8f6e8378,
0xb96ee0c2, 0xbf800002,
0xb9780002, 0xbe801f6c,
0x866dff6d, 0x0000ffff,
0xbefa0080, 0xb97a0283,
- 0xb8fa2407, 0x8e7a9b7a,
- 0x876d7a6d, 0xb8fa03c7,
- 0x8e7a9a7a, 0x876d7a6d,
0xb8faf807, 0x867aff7a,
- 0x00007fff, 0xb97af807,
- 0xbeee007e, 0xbeef007f,
- 0xbefe0180, 0xbf900004,
- 0x877a8478, 0xb97af802,
- 0xbf8e0002, 0xbf88fffe,
- 0xb8fa2a05, 0x807a817a,
- 0x8e7a8a7a, 0xb8fb1605,
- 0x807b817b, 0x8e7b867b,
- 0x807a7b7a, 0x807a7e7a,
- 0x827b807f, 0x867bff7b,
- 0x0000ffff, 0xc04b1c3d,
- 0x00000050, 0xbf8cc07f,
- 0xc04b1d3d, 0x00000060,
- 0xbf8cc07f, 0xc0431e7d,
- 0x00000074, 0xbf8cc07f,
- 0xbef4007e, 0x8675ff7f,
- 0x0000ffff, 0x8775ff75,
- 0x00040000, 0xbef60080,
- 0xbef700ff, 0x00807fac,
- 0x867aff7f, 0x08000000,
- 0x8f7a837a, 0x87777a77,
- 0x867aff7f, 0x70000000,
- 0x8f7a817a, 0x87777a77,
- 0xbef1007c, 0xbef00080,
- 0xb8f02a05, 0x80708170,
- 0x8e708a70, 0xb8fa1605,
- 0x807a817a, 0x8e7a867a,
- 0x80707a70, 0xbef60084,
- 0xbef600ff, 0x01000000,
- 0xbefe007c, 0xbefc0070,
- 0xc0611c7a, 0x0000007c,
- 0xbf8cc07f, 0x80708470,
- 0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611b3a,
+ 0x001f8000, 0x8e7a8b7a,
+ 0x8977ff77, 0xfc000000,
+ 0x87777a77, 0xba7ff807,
+ 0x00000000, 0xbeee007e,
+ 0xbeef007f, 0xbefe0180,
+ 0xbf900004, 0x877a8478,
+ 0xb97af802, 0xbf8e0002,
+ 0xbf88fffe, 0xb8fa2a05,
+ 0x807a817a, 0x8e7a8a7a,
+ 0xb8fb1605, 0x807b817b,
+ 0x8e7b867b, 0x807a7b7a,
+ 0x807a7e7a, 0x827b807f,
+ 0x867bff7b, 0x0000ffff,
+ 0xc04b1c3d, 0x00000050,
+ 0xbf8cc07f, 0xc04b1d3d,
+ 0x00000060, 0xbf8cc07f,
+ 0xc0431e7d, 0x00000074,
+ 0xbf8cc07f, 0xbef4007e,
+ 0x8675ff7f, 0x0000ffff,
+ 0x8775ff75, 0x00040000,
+ 0xbef60080, 0xbef700ff,
+ 0x00807fac, 0xbef1007c,
+ 0xbef00080, 0xb8f02a05,
+ 0x80708170, 0x8e708a70,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xbefe007c,
+ 0xbefc0070, 0xc0611c7a,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
- 0xc0611b7a, 0x0000007c,
+ 0xc0611b3a, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611bba,
+ 0xbefc0070, 0xc0611b7a,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
- 0xc0611bfa, 0x0000007c,
+ 0xc0611bba, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611e3a,
- 0x0000007c, 0xbf8cc07f,
- 0x80708470, 0xbefc007e,
- 0xb8fbf803, 0xbefe007c,
- 0xbefc0070, 0xc0611efa,
+ 0xbefc0070, 0xc0611bfa,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
- 0xc0611a3a, 0x0000007c,
+ 0xc0611e3a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xb8fbf803,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611efa, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611a7a,
- 0x0000007c, 0xbf8cc07f,
- 0x80708470, 0xbefc007e,
- 0xb8f1f801, 0xbefe007c,
- 0xbefc0070, 0xc0611c7a,
+ 0xbefc0070, 0xc0611a3a,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611a7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xb8f1f801,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611c7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xbf108080,
0x867aff7f, 0x04000000,
0xbeef0080, 0x876f6f7a,
0xb8f02a05, 0x80708170,
@@ -509,97 +518,92 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0x00000400, 0xbefe00c1,
0xbeff00c1, 0xb8fb2a05,
0x807b817b, 0x8e7b827b,
- 0x8e76887b, 0xbef600ff,
- 0x01000000, 0xbefc0084,
- 0xbf0a7b7c, 0xbf84006d,
- 0xbf11017c, 0x807bff7b,
- 0x00001000, 0x867aff78,
- 0x00400000, 0xbf850003,
- 0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf850051,
- 0xbe840080, 0xd2890000,
- 0x00000900, 0x80048104,
- 0xd2890001, 0x00000900,
- 0x80048104, 0xd2890002,
- 0x00000900, 0x80048104,
- 0xd2890003, 0x00000900,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000901,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0084, 0xbf0a7b7c,
+ 0xbf84006d, 0xbf11017c,
+ 0x807bff7b, 0x00001000,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf850051, 0xbe840080,
+ 0xd2890000, 0x00000900,
0x80048104, 0xd2890001,
- 0x00000901, 0x80048104,
- 0xd2890002, 0x00000901,
+ 0x00000900, 0x80048104,
+ 0xd2890002, 0x00000900,
0x80048104, 0xd2890003,
- 0x00000901, 0x80048104,
+ 0x00000900, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
+ 0xd2890000, 0x00000902,
0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
+ 0x00000902, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
- 0x807c847c, 0xbf0a7b7c,
- 0xbf85ffb1, 0xbf9c0000,
- 0xbf820012, 0x7e000300,
- 0x7e020301, 0x7e040302,
- 0x7e060303, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0x807c847c,
- 0x8070ff70, 0x00000400,
- 0xbf0a7b7c, 0xbf85ffef,
- 0xbf9c0000, 0xbf8200da,
- 0xbef4007e, 0x8675ff7f,
- 0x0000ffff, 0x8775ff75,
- 0x00040000, 0xbef60080,
- 0xbef700ff, 0x00807fac,
- 0x866eff7f, 0x08000000,
- 0x8f6e836e, 0x87776e77,
- 0x866eff7f, 0x70000000,
- 0x8f6e816e, 0x87776e77,
- 0x866eff7f, 0x04000000,
- 0xbf84001e, 0xbefe00c1,
- 0xbeff00c1, 0xb8ef4306,
- 0x866fc16f, 0xbf840019,
- 0x8e6f866f, 0x8e6f826f,
- 0xbef6006f, 0xb8f82a05,
- 0x80788178, 0x8e788a78,
- 0xb8ee1605, 0x806e816e,
- 0x8e6e866e, 0x80786e78,
- 0x8078ff78, 0x00000080,
- 0xbef600ff, 0x01000000,
- 0xbefc0080, 0xe0510000,
- 0x781d0000, 0xe0510100,
- 0x781d0000, 0x807cff7c,
- 0x00000200, 0x8078ff78,
- 0x00000200, 0xbf0a6f7c,
- 0xbf85fff6, 0xbef80080,
+ 0xbe840080, 0xd2890000,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
+ 0x80048104, 0xd2890002,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0x807c847c,
+ 0xbf0a7b7c, 0xbf85ffb1,
+ 0xbf9c0000, 0xbf820012,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7c,
+ 0xbf85ffef, 0xbf9c0000,
+ 0xbf8200c7, 0xbef4007e,
+ 0x8675ff7f, 0x0000ffff,
+ 0x8775ff75, 0x00040000,
+ 0xbef60080, 0xbef700ff,
+ 0x00807fac, 0x866eff7f,
+ 0x04000000, 0xbf84001e,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8ef4306, 0x866fc16f,
+ 0xbf840019, 0x8e6f866f,
+ 0x8e6f826f, 0xbef6006f,
+ 0xb8f82a05, 0x80788178,
+ 0x8e788a78, 0xb8ee1605,
+ 0x806e816e, 0x8e6e866e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0xbefc0080,
+ 0xe0510000, 0x781d0000,
+ 0xe0510100, 0x781d0000,
+ 0x807cff7c, 0x00000200,
+ 0x8078ff78, 0x00000200,
+ 0xbf0a6f7c, 0xbf85fff6,
0xbefe00c1, 0xbeff00c1,
- 0xb8ef2a05, 0x806f816f,
- 0x8e6f826f, 0x8e76886f,
0xbef600ff, 0x01000000,
+ 0xb8ef2a05, 0x806f816f,
+ 0x8e6f826f, 0x806fff6f,
+ 0x00008000, 0xbef80080,
0xbeee0078, 0x8078ff78,
0x00000400, 0xbefc0084,
- 0xbf11087c, 0x806fff6f,
- 0x00008000, 0xe0524000,
+ 0xbf11087c, 0xe0524000,
0x781d0000, 0xe0524100,
0x781d0100, 0xe0524200,
0x781d0200, 0xe0524300,
@@ -613,157 +617,248 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0xe0524100, 0x6e1d0100,
0xe0524200, 0x6e1d0200,
0xe0524300, 0x6e1d0300,
+ 0xbf8c0f70, 0xb8f82a05,
+ 0x80788178, 0x8e788a78,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0x80f8c078, 0xb8ef1605,
+ 0x806f816f, 0x8e6f846f,
+ 0x8e76826f, 0xbef600ff,
+ 0x01000000, 0xbefc006f,
+ 0xc031003a, 0x00000078,
+ 0x80f8c078, 0xbf8cc07f,
+ 0x80fc907c, 0xbf800000,
+ 0xbe802d00, 0xbe822d02,
+ 0xbe842d04, 0xbe862d06,
+ 0xbe882d08, 0xbe8a2d0a,
+ 0xbe8c2d0c, 0xbe8e2d0e,
+ 0xbf06807c, 0xbf84fff0,
0xb8f82a05, 0x80788178,
0x8e788a78, 0xb8ee1605,
0x806e816e, 0x8e6e866e,
- 0x80786e78, 0x80f8c078,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f846f, 0x8e76826f,
+ 0x80786e78, 0xbef60084,
0xbef600ff, 0x01000000,
- 0xbefc006f, 0xc031003a,
- 0x00000078, 0x80f8c078,
- 0xbf8cc07f, 0x80fc907c,
- 0xbf800000, 0xbe802d00,
- 0xbe822d02, 0xbe842d04,
- 0xbe862d06, 0xbe882d08,
- 0xbe8a2d0a, 0xbe8c2d0c,
- 0xbe8e2d0e, 0xbf06807c,
- 0xbf84fff0, 0xb8f82a05,
- 0x80788178, 0x8e788a78,
- 0xb8ee1605, 0x806e816e,
- 0x8e6e866e, 0x80786e78,
- 0xbef60084, 0xbef600ff,
- 0x01000000, 0xc0211bfa,
+ 0xc0211bfa, 0x00000078,
+ 0x80788478, 0xc0211b3a,
0x00000078, 0x80788478,
- 0xc0211b3a, 0x00000078,
- 0x80788478, 0xc0211b7a,
+ 0xc0211b7a, 0x00000078,
+ 0x80788478, 0xc0211c3a,
0x00000078, 0x80788478,
- 0xc0211c3a, 0x00000078,
- 0x80788478, 0xc0211c7a,
+ 0xc0211c7a, 0x00000078,
+ 0x80788478, 0xc0211eba,
0x00000078, 0x80788478,
- 0xc0211eba, 0x00000078,
- 0x80788478, 0xc0211efa,
+ 0xc0211efa, 0x00000078,
+ 0x80788478, 0xc0211a3a,
0x00000078, 0x80788478,
- 0xc0211a3a, 0x00000078,
- 0x80788478, 0xc0211a7a,
+ 0xc0211a7a, 0x00000078,
+ 0x80788478, 0xc0211cfa,
0x00000078, 0x80788478,
- 0xc0211cfa, 0x00000078,
- 0x80788478, 0xbf8cc07f,
- 0xbefc006f, 0xbefe0070,
- 0xbeff0071, 0x866f7bff,
- 0x000003ff, 0xb96f4803,
- 0x866f7bff, 0xfffff800,
- 0x8f6f8b6f, 0xb96fa2c3,
- 0xb973f801, 0xb8ee2a05,
- 0x806e816e, 0x8e6e8a6e,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f866f, 0x806e6f6e,
- 0x806e746e, 0x826f8075,
- 0x866fff6f, 0x0000ffff,
- 0xc00b1c37, 0x00000050,
- 0xc00b1d37, 0x00000060,
- 0xc0031e77, 0x00000074,
- 0xbf8cc07f, 0x866fff6d,
- 0xf8000000, 0x8f6f9b6f,
- 0x8e6f906f, 0xbeee0080,
- 0x876e6f6e, 0x866fff6d,
- 0x04000000, 0x8f6f9a6f,
- 0x8e6f8f6f, 0x876e6f6e,
- 0x866fff7a, 0x00800000,
- 0x8f6f976f, 0xb96ef807,
+ 0xbf8cc07f, 0xbefc006f,
+ 0xbefe0070, 0xbeff0071,
+ 0x866f7bff, 0x000003ff,
+ 0xb96f4803, 0x866f7bff,
+ 0xfffff800, 0x8f6f8b6f,
+ 0xb96fa2c3, 0xb973f801,
+ 0xb8ee2a05, 0x806e816e,
+ 0x8e6e8a6e, 0xb8ef1605,
+ 0x806f816f, 0x8e6f866f,
+ 0x806e6f6e, 0x806e746e,
+ 0x826f8075, 0x866fff6f,
+ 0x0000ffff, 0xc00b1c37,
+ 0x00000050, 0xc00b1d37,
+ 0x00000060, 0xc0031e77,
+ 0x00000074, 0xbf8cc07f,
+ 0x8f6e8b77, 0x866eff6e,
+ 0x001f8000, 0xb96ef807,
0x866dff6d, 0x0000ffff,
0x86fe7e7e, 0x86ea6a6a,
0x8f6e837a, 0xb96ee0c2,
0xbf800002, 0xb97a0002,
- 0xbf8a0000, 0x95806f6c,
- 0xbf810000, 0x00000000,
+ 0xbf8a0000, 0xbe801f6c,
+ 0xbf9b0000, 0x00000000,
};
static const uint32_t cwsr_trap_nv1x_hex[] = {
- 0xbf820001, 0xbf8201cd,
+ 0xbf820001, 0xbf820393,
0xb0804004, 0xb978f802,
- 0x8a788678, 0xb96ef801,
- 0x876eff6e, 0x00000800,
- 0xbf840003, 0x876eff78,
+ 0x8a78ff78, 0x00020006,
+ 0xb97bf803, 0x876eff78,
0x00002000, 0xbf840009,
- 0xb97bf803, 0x876eff7b,
- 0x00000400, 0xbf850033,
- 0x876eff7b, 0x00000100,
- 0xbf840002, 0x8878ff78,
- 0x00002000, 0x8a77ff77,
- 0xff000000, 0xb96ef807,
- 0x876fff6e, 0x02000000,
- 0x8f6f866f, 0x88776f77,
- 0x876fff6e, 0x003f8000,
- 0x8f6f896f, 0x88776f77,
- 0x8a6eff6e, 0x023f8000,
- 0xb9eef807, 0xb97af812,
+ 0x876eff6d, 0x00ff0000,
+ 0xbf85001e, 0x876eff7b,
+ 0x00000400, 0xbf85005b,
+ 0xbf8e0010, 0xb97bf803,
+ 0xbf82fffa, 0x876eff7b,
+ 0x00000900, 0xbf850015,
+ 0x876eff7b, 0x000071ff,
+ 0xbf840008, 0x876fff7b,
+ 0x00007080, 0xbf840001,
+ 0xbeee1d87, 0xb96ff801,
+ 0x8f6e8c6e, 0x876e6f6e,
+ 0xbf85000a, 0x876eff6d,
+ 0x00ff0000, 0xbf850007,
+ 0xb96ef801, 0x876eff6e,
+ 0x00000800, 0xbf850003,
+ 0x876eff7b, 0x00000400,
+ 0xbf850040, 0x8a77ff77,
+ 0xff000000, 0xb97af807,
+ 0x877bff7a, 0x02000000,
+ 0x8f7b867b, 0x88777b77,
+ 0x877bff7a, 0x003f8000,
+ 0x8f7b897b, 0x88777b77,
+ 0x8a7aff7a, 0x023f8000,
+ 0xb9faf807, 0xb97af812,
0xb97bf813, 0x8ffa887a,
- 0xf4051bbd, 0xfa000000,
- 0xbf8cc07f, 0xf4051ebd,
- 0xfa000008, 0xbf8cc07f,
- 0x87ee6e6e, 0xbf840001,
- 0xbe80206e, 0xb97bf803,
- 0x877bff7b, 0x000001ff,
- 0xbf850002, 0x806c846c,
+ 0xbf0d8f7b, 0xbf840002,
+ 0x887bff7b, 0xffff0000,
+ 0xf4011bbd, 0xfa000010,
+ 0xbf8c0000, 0x8f6e976e,
+ 0x8a77ff77, 0x00800000,
+ 0x88776e77, 0xf4051bbd,
+ 0xfa000000, 0xbf8c0000,
+ 0xf4051ebd, 0xfa000008,
+ 0xbf8c0000, 0x87ee6e6e,
+ 0xbf840001, 0xbe80206e,
+ 0x876eff6d, 0x00ff0000,
+ 0xbf850008, 0x876eff6d,
+ 0x01000000, 0xbf850007,
+ 0x8878ff78, 0x00002000,
+ 0x80ec886c, 0x82ed806d,
+ 0xbf820002, 0x806c846c,
0x826d806d, 0x876dff6d,
- 0x0000ffff, 0x906e8977,
- 0x876fff6e, 0x003f8000,
- 0x906e8677, 0x876eff6e,
- 0x02000000, 0x886e6f6e,
- 0xb9eef807, 0x87fe7e7e,
+ 0x0000ffff, 0x907a8977,
+ 0x877bff7a, 0x003f8000,
+ 0x907a8677, 0x877aff7a,
+ 0x02000000, 0x887a7b7a,
+ 0xb9faf807, 0x87fe7e7e,
0x87ea6a6a, 0xb9f8f802,
0xbe80226c, 0x876dff6d,
0x0000ffff, 0xbefa0380,
- 0xb9fa0283, 0xb97a2c07,
- 0x8f7a9a7a, 0x886d7a6d,
- 0xb97a03c7, 0x8f7a997a,
- 0x886d7a6d, 0xb97a0647,
- 0x8f7a987a, 0x886d7a6d,
- 0xb97af807, 0x877aff7a,
- 0x00007fff, 0xb9faf807,
- 0xbeee037e, 0xbeef037f,
- 0xbefe0480, 0xbf900004,
- 0xbf8e0002, 0xbf88fffe,
- 0xb97b02dc, 0x8f7b997b,
- 0x887b7b7f, 0xb97a2a05,
+ 0xb9fa0283, 0x8a77ff77,
+ 0xff000000, 0xb97af807,
+ 0x877bff7a, 0x02000000,
+ 0x8f7b867b, 0x88777b77,
+ 0x877bff7a, 0x003f8000,
+ 0x8f7b897b, 0x88777b77,
+ 0x8a7aff7a, 0x023f8000,
+ 0xb9faf807, 0xbeee037e,
+ 0xbeef037f, 0xbefe0480,
+ 0xbf900004, 0xbf8e0002,
+ 0xbf88fffe, 0x877aff7f,
+ 0x04000000, 0x8f7a857a,
+ 0x886d7a6d, 0xb97b02dc,
+ 0x8f7b997b, 0xb97a3a05,
0x807a817a, 0xbf0d997b,
0xbf850002, 0x8f7a897a,
0xbf820001, 0x8f7a8a7a,
- 0x877bff7f, 0x0000ffff,
- 0x807aff7a, 0x00000200,
- 0x807a7e7a, 0x827b807b,
- 0xf4491c3d, 0xfa000050,
- 0xf4491d3d, 0xfa000060,
- 0xf4411e7d, 0xfa000074,
- 0xbef4037e, 0x8775ff7f,
- 0x0000ffff, 0x8875ff75,
- 0x00040000, 0xbef60380,
- 0xbef703ff, 0x10807fac,
- 0x877aff7f, 0x08000000,
- 0x907a837a, 0x88777a77,
- 0x877aff7f, 0x70000000,
- 0x907a817a, 0x88777a77,
- 0xbef1037c, 0xbef00380,
- 0xb97302dc, 0x8f739973,
- 0x8873737f, 0xb97bf816,
+ 0xb97b1e06, 0x8f7b8a7b,
+ 0x807a7b7a, 0x877bff7f,
+ 0x0000ffff, 0x807aff7a,
+ 0x00000200, 0x807a7e7a,
+ 0x827b807b, 0xf4491c3d,
+ 0xfa000050, 0xf4491d3d,
+ 0xfa000060, 0xf4411e7d,
+ 0xfa000074, 0xbef4037e,
+ 0x8775ff7f, 0x0000ffff,
+ 0x8875ff75, 0x00040000,
+ 0xbef60380, 0xbef703ff,
+ 0x10807fac, 0xbef1037c,
+ 0xbef00380, 0xb97302dc,
+ 0x8f739973, 0xb97bf816,
0xba80f816, 0x00000000,
0xbefe03c1, 0x907c9973,
0x877c817c, 0xbf06817c,
0xbf850002, 0xbeff0380,
0xbf820002, 0xbeff03c1,
- 0xbf82000b, 0xbef603ff,
- 0x01000000, 0xe0704000,
- 0x705d0000, 0xe0704080,
- 0x705d0100, 0xe0704100,
- 0x705d0200, 0xe0704180,
- 0x705d0300, 0xbf82000a,
- 0xbef603ff, 0x01000000,
+ 0xbf820058, 0xbef603ff,
+ 0x01000000, 0xb97af803,
+ 0x8a7a7aff, 0x10000000,
+ 0xbf850049, 0xbe840380,
+ 0xd7600000, 0x00000900,
+ 0x80048104, 0xd7600001,
+ 0x00000900, 0x80048104,
+ 0xd7600002, 0x00000900,
+ 0x80048104, 0xd7600003,
+ 0x00000900, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06a004,
+ 0xbf84ffef, 0xbe840380,
+ 0xd7600000, 0x00000901,
+ 0x80048104, 0xd7600001,
+ 0x00000901, 0x80048104,
+ 0xd7600002, 0x00000901,
+ 0x80048104, 0xd7600003,
+ 0x00000901, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06a004,
+ 0xbf84ffef, 0xbe840380,
+ 0xd7600000, 0x00000902,
+ 0x80048104, 0xd7600001,
+ 0x00000902, 0x80048104,
+ 0xd7600002, 0x00000902,
+ 0x80048104, 0xd7600003,
+ 0x00000902, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06a004,
+ 0xbf84ffef, 0xbe840380,
+ 0xd7600000, 0x00000903,
+ 0x80048104, 0xd7600001,
+ 0x00000903, 0x80048104,
+ 0xd7600002, 0x00000903,
+ 0x80048104, 0xd7600003,
+ 0x00000903, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06a004,
+ 0xbf84ffef, 0xbf820060,
+ 0xe0704000, 0x705d0000,
+ 0xe0704080, 0x705d0100,
+ 0xe0704100, 0x705d0200,
+ 0xe0704180, 0x705d0300,
+ 0xbf820057, 0xbef603ff,
+ 0x01000000, 0xb97af803,
+ 0x8a7a7aff, 0x10000000,
+ 0xbf850049, 0xbe840380,
+ 0xd7600000, 0x00000900,
+ 0x80048104, 0xd7600001,
+ 0x00000900, 0x80048104,
+ 0xd7600002, 0x00000900,
+ 0x80048104, 0xd7600003,
+ 0x00000900, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffef, 0xbe840380,
+ 0xd7600000, 0x00000901,
+ 0x80048104, 0xd7600001,
+ 0x00000901, 0x80048104,
+ 0xd7600002, 0x00000901,
+ 0x80048104, 0xd7600003,
+ 0x00000901, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffef, 0xbe840380,
+ 0xd7600000, 0x00000902,
+ 0x80048104, 0xd7600001,
+ 0x00000902, 0x80048104,
+ 0xd7600002, 0x00000902,
+ 0x80048104, 0xd7600003,
+ 0x00000902, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffef, 0xbe840380,
+ 0xd7600000, 0x00000903,
+ 0x80048104, 0xd7600001,
+ 0x00000903, 0x80048104,
+ 0xd7600002, 0x00000903,
+ 0x80048104, 0xd7600003,
+ 0x00000903, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffef, 0xbf820008,
0xe0704000, 0x705d0000,
0xe0704100, 0x705d0100,
0xe0704200, 0x705d0200,
0xe0704300, 0x705d0300,
- 0xb9702a05, 0x80708170,
+ 0xb9703a05, 0x80708170,
0xbf0d9973, 0xbf850002,
0x8f708970, 0xbf820001,
0x8f708a70, 0xb97a1e06,
@@ -776,8 +871,9 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xbefe037c, 0xbefc0370,
0xf4611b3a, 0xf8000000,
0x80708470, 0xbefc037e,
+ 0x8a7aff6d, 0x80000000,
0xbefe037c, 0xbefc0370,
- 0xf4611b7a, 0xf8000000,
+ 0xf4611eba, 0xf8000000,
0x80708470, 0xbefc037e,
0xbefe037c, 0xbefc0370,
0xf4611bba, 0xf8000000,
@@ -805,7 +901,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xbefe037c, 0xbefc0370,
0xf4611c7a, 0xf8000000,
0x80708470, 0xbefc037e,
- 0xb9702a05, 0x80708170,
+ 0xb9703a05, 0x80708170,
0xbf0d9973, 0xbf850002,
0x8f708970, 0xbf820001,
0x8f708a70, 0xb97a1e06,
@@ -837,99 +933,224 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xbf850002, 0xbeff0380,
0xbf820001, 0xbeff03c1,
0xb97b4306, 0x877bc17b,
- 0xbf840044, 0xbf8a0000,
- 0x877aff73, 0x04000000,
- 0xbf840040, 0x8f7b867b,
- 0x8f7b827b, 0xbef6037b,
- 0xb9702a05, 0x80708170,
- 0xbf0d9973, 0xbf850002,
- 0x8f708970, 0xbf820001,
- 0x8f708a70, 0xb97a1e06,
- 0x8f7a8a7a, 0x80707a70,
- 0x8070ff70, 0x00000200,
- 0x8070ff70, 0x00000080,
- 0xbef603ff, 0x01000000,
- 0xd7650000, 0x000100c1,
- 0xd7660000, 0x000200c1,
- 0x16000084, 0x907c9973,
- 0x877c817c, 0xbf06817c,
- 0xbefc0380, 0xbf850012,
- 0xbe8303ff, 0x00000080,
- 0xbf800000, 0xbf800000,
- 0xbf800000, 0xd8d80000,
+ 0xbf840085, 0xbf8a0000,
+ 0x877aff6d, 0x80000000,
+ 0xbf840081, 0x8f7b887b,
+ 0xbef6037b, 0xb9703a05,
+ 0x80708170, 0xbf0d9973,
+ 0xbf850002, 0x8f708970,
+ 0xbf820001, 0x8f708a70,
+ 0xb97a1e06, 0x8f7a8a7a,
+ 0x80707a70, 0x8070ff70,
+ 0x00000200, 0x8070ff70,
+ 0x00000080, 0xbef603ff,
+ 0x01000000, 0xd7650000,
+ 0x000100c1, 0xd7660000,
+ 0x000200c1, 0x16000084,
+ 0x907c9973, 0x877c817c,
+ 0xbf06817c, 0xbefc0380,
+ 0xbf850033, 0xb97af803,
+ 0x8a7a7aff, 0x10000000,
+ 0xbf85001d, 0xd8d80000,
0x01000000, 0xbf8c0000,
- 0xe0704000, 0x705d0100,
- 0x807c037c, 0x80700370,
+ 0xbe840380, 0xd7600000,
+ 0x00000901, 0x80048104,
+ 0xd7600001, 0x00000901,
+ 0x80048104, 0xd7600002,
+ 0x00000901, 0x80048104,
+ 0xd7600003, 0x00000901,
+ 0x80048104, 0xf469003a,
+ 0xe0000000, 0x80709070,
+ 0xbf06a004, 0xbf84ffef,
+ 0x807cff7c, 0x00000080,
0xd5250000, 0x0001ff00,
0x00000080, 0xbf0a7b7c,
- 0xbf85fff4, 0xbf820011,
- 0xbe8303ff, 0x00000100,
+ 0xbf85ffe4, 0xbf820044,
+ 0xbe8303ff, 0x00000080,
0xbf800000, 0xbf800000,
0xbf800000, 0xd8d80000,
0x01000000, 0xbf8c0000,
0xe0704000, 0x705d0100,
0x807c037c, 0x80700370,
0xd5250000, 0x0001ff00,
- 0x00000100, 0xbf0a7b7c,
- 0xbf85fff4, 0xbefe03c1,
- 0x907c9973, 0x877c817c,
- 0xbf06817c, 0xbf850004,
- 0xbef003ff, 0x00000200,
- 0xbeff0380, 0xbf820003,
- 0xbef003ff, 0x00000400,
- 0xbeff03c1, 0xb97b2a05,
- 0x807b817b, 0x8f7b827b,
- 0x907c9973, 0x877c817c,
- 0xbf06817c, 0xbf850017,
+ 0x00000080, 0xbf0a7b7c,
+ 0xbf85fff4, 0xbf820032,
+ 0xb97af803, 0x8a7a7aff,
+ 0x10000000, 0xbf85001d,
+ 0xd8d80000, 0x01000000,
+ 0xbf8c0000, 0xbe840380,
+ 0xd7600000, 0x00000901,
+ 0x80048104, 0xd7600001,
+ 0x00000901, 0x80048104,
+ 0xd7600002, 0x00000901,
+ 0x80048104, 0xd7600003,
+ 0x00000901, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffef, 0x807cff7c,
+ 0x00000100, 0xd5250000,
+ 0x0001ff00, 0x00000100,
+ 0xbf0a7b7c, 0xbf85ffe4,
+ 0xbf820011, 0xbe8303ff,
+ 0x00000100, 0xbf800000,
+ 0xbf800000, 0xbf800000,
+ 0xd8d80000, 0x01000000,
+ 0xbf8c0000, 0xe0704000,
+ 0x705d0100, 0x807c037c,
+ 0x80700370, 0xd5250000,
+ 0x0001ff00, 0x00000100,
+ 0xbf0a7b7c, 0xbf85fff4,
+ 0xbefe03c1, 0x907c9973,
+ 0x877c817c, 0xbf06817c,
+ 0xbf850004, 0xbef003ff,
+ 0x00000200, 0xbeff0380,
+ 0xbf820003, 0xbef003ff,
+ 0x00000400, 0xbeff03c1,
+ 0xb97b3a05, 0x807b817b,
+ 0x8f7b827b, 0x907c9973,
+ 0x877c817c, 0xbf06817c,
+ 0xbf85006b, 0xbef603ff,
+ 0x01000000, 0xbefc0384,
+ 0xbf0a7b7c, 0xbf8400fa,
+ 0xb97af803, 0x8a7a7aff,
+ 0x10000000, 0xbf850050,
+ 0x7e008700, 0x7e028701,
+ 0x7e048702, 0x7e068703,
+ 0xbe840380, 0xd7600000,
+ 0x00000900, 0x80048104,
+ 0xd7600001, 0x00000900,
+ 0x80048104, 0xd7600002,
+ 0x00000900, 0x80048104,
+ 0xd7600003, 0x00000900,
+ 0x80048104, 0xf469003a,
+ 0xe0000000, 0x80709070,
+ 0xbf06a004, 0xbf84ffef,
+ 0xbe840380, 0xd7600000,
+ 0x00000901, 0x80048104,
+ 0xd7600001, 0x00000901,
+ 0x80048104, 0xd7600002,
+ 0x00000901, 0x80048104,
+ 0xd7600003, 0x00000901,
+ 0x80048104, 0xf469003a,
+ 0xe0000000, 0x80709070,
+ 0xbf06a004, 0xbf84ffef,
+ 0xbe840380, 0xd7600000,
+ 0x00000902, 0x80048104,
+ 0xd7600001, 0x00000902,
+ 0x80048104, 0xd7600002,
+ 0x00000902, 0x80048104,
+ 0xd7600003, 0x00000902,
+ 0x80048104, 0xf469003a,
+ 0xe0000000, 0x80709070,
+ 0xbf06a004, 0xbf84ffef,
+ 0xbe840380, 0xd7600000,
+ 0x00000903, 0x80048104,
+ 0xd7600001, 0x00000903,
+ 0x80048104, 0xd7600002,
+ 0x00000903, 0x80048104,
+ 0xd7600003, 0x00000903,
+ 0x80048104, 0xf469003a,
+ 0xe0000000, 0x80709070,
+ 0xbf06a004, 0xbf84ffef,
+ 0x807c847c, 0xbf0a7b7c,
+ 0xbf85ffb1, 0xbf8200a6,
+ 0x7e008700, 0x7e028701,
+ 0x7e048702, 0x7e068703,
+ 0xe0704000, 0x705d0000,
+ 0xe0704080, 0x705d0100,
+ 0xe0704100, 0x705d0200,
+ 0xe0704180, 0x705d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000200, 0xbf0a7b7c,
+ 0xbf85ffef, 0xbf820094,
0xbef603ff, 0x01000000,
0xbefc0384, 0xbf0a7b7c,
- 0xbf840037, 0x7e008700,
+ 0xbf840065, 0xb97af803,
+ 0x8a7a7aff, 0x10000000,
+ 0xbf850050, 0x7e008700,
+ 0x7e028701, 0x7e048702,
+ 0x7e068703, 0xbe840380,
+ 0xd7600000, 0x00000900,
+ 0x80048104, 0xd7600001,
+ 0x00000900, 0x80048104,
+ 0xd7600002, 0x00000900,
+ 0x80048104, 0xd7600003,
+ 0x00000900, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffef, 0xbe840380,
+ 0xd7600000, 0x00000901,
+ 0x80048104, 0xd7600001,
+ 0x00000901, 0x80048104,
+ 0xd7600002, 0x00000901,
+ 0x80048104, 0xd7600003,
+ 0x00000901, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffef, 0xbe840380,
+ 0xd7600000, 0x00000902,
+ 0x80048104, 0xd7600001,
+ 0x00000902, 0x80048104,
+ 0xd7600002, 0x00000902,
+ 0x80048104, 0xd7600003,
+ 0x00000902, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffef, 0xbe840380,
+ 0xd7600000, 0x00000903,
+ 0x80048104, 0xd7600001,
+ 0x00000903, 0x80048104,
+ 0xd7600002, 0x00000903,
+ 0x80048104, 0xd7600003,
+ 0x00000903, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffef, 0x807c847c,
+ 0xbf0a7b7c, 0xbf85ffb1,
+ 0xbf82003b, 0x7e008700,
0x7e028701, 0x7e048702,
0x7e068703, 0xe0704000,
- 0x705d0000, 0xe0704080,
- 0x705d0100, 0xe0704100,
- 0x705d0200, 0xe0704180,
+ 0x705d0000, 0xe0704100,
+ 0x705d0100, 0xe0704200,
+ 0x705d0200, 0xe0704300,
0x705d0300, 0x807c847c,
- 0x8070ff70, 0x00000200,
+ 0x8070ff70, 0x00000400,
0xbf0a7b7c, 0xbf85ffef,
- 0xbf820025, 0xbef603ff,
- 0x01000000, 0xbefc0384,
- 0xbf0a7b7c, 0xbf840020,
- 0x7e008700, 0x7e028701,
- 0x7e048702, 0x7e068703,
- 0xe0704000, 0x705d0000,
- 0xe0704100, 0x705d0100,
- 0xe0704200, 0x705d0200,
- 0xe0704300, 0x705d0300,
- 0x807c847c, 0x8070ff70,
- 0x00000400, 0xbf0a7b7c,
- 0xbf85ffef, 0xb97b1e06,
- 0x877bc17b, 0xbf84000c,
- 0x8f7b837b, 0x807b7c7b,
- 0xbefe03c1, 0xbeff0380,
+ 0xb97b1e06, 0x877bc17b,
+ 0xbf840027, 0x8f7b837b,
+ 0x807b7c7b, 0xbefe03c1,
+ 0xbeff0380, 0xb97af803,
+ 0x8a7a7aff, 0x10000000,
+ 0xbf850017, 0x7e008700,
+ 0xbe840380, 0xd7600000,
+ 0x00000900, 0x80048104,
+ 0xd7600001, 0x00000900,
+ 0x80048104, 0xd7600002,
+ 0x00000900, 0x80048104,
+ 0xd7600003, 0x00000900,
+ 0x80048104, 0xf469003a,
+ 0xe0000000, 0x80709070,
+ 0xbf06c004, 0xbf84ffef,
+ 0x807c817c, 0xbf0a7b7c,
+ 0xbf85ffea, 0xbf820008,
0x7e008700, 0xe0704000,
0x705d0000, 0x807c817c,
0x8070ff70, 0x00000080,
0xbf0a7b7c, 0xbf85fff8,
- 0xbf820151, 0xbef4037e,
+ 0xbf82013f, 0xbef4037e,
0x8775ff7f, 0x0000ffff,
0x8875ff75, 0x00040000,
0xbef60380, 0xbef703ff,
- 0x10807fac, 0x876eff7f,
- 0x08000000, 0x906e836e,
- 0x88776e77, 0x876eff7f,
- 0x70000000, 0x906e816e,
- 0x88776e77, 0xb97202dc,
- 0x8f729972, 0x8872727f,
- 0x876eff7f, 0x04000000,
- 0xbf840034, 0xbefe03c1,
- 0x907c9972, 0x877c817c,
- 0xbf06817c, 0xbf850002,
- 0xbeff0380, 0xbf820001,
- 0xbeff03c1, 0xb96f4306,
- 0x876fc16f, 0xbf840029,
- 0x8f6f866f, 0x8f6f826f,
- 0xbef6036f, 0xb9782a05,
+ 0x10807fac, 0xb97202dc,
+ 0x8f729972, 0x876eff7f,
+ 0x04000000, 0xbf840033,
+ 0xbefe03c1, 0x907c9972,
+ 0x877c817c, 0xbf06817c,
+ 0xbf850002, 0xbeff0380,
+ 0xbf820001, 0xbeff03c1,
+ 0xb96f4306, 0x876fc16f,
+ 0xbf840028, 0x8f6f886f,
+ 0xbef6036f, 0xb9783a05,
0x80788178, 0xbf0d9972,
0xbf850002, 0x8f788978,
0xbf820001, 0x8f788a78,
@@ -953,17 +1174,18 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0x877c817c, 0xbf06817c,
0xbf850002, 0xbeff0380,
0xbf820001, 0xbeff03c1,
- 0xb96f2a05, 0x806f816f,
+ 0xb96f3a05, 0x806f816f,
0x8f6f826f, 0x907c9972,
0x877c817c, 0xbf06817c,
- 0xbf850021, 0xbef603ff,
+ 0xbf850024, 0xbef603ff,
0x01000000, 0xbeee0378,
0x8078ff78, 0x00000200,
- 0xbefc0384, 0xe0304000,
+ 0xbefc0384, 0xbf0a6f7c,
+ 0xbf840050, 0xe0304000,
0x785d0000, 0xe0304080,
0x785d0100, 0xe0304100,
0x785d0200, 0xe0304180,
- 0x785d0300, 0xbf8c3f70,
+ 0x785d0300, 0xbf8c0000,
0x7e008500, 0x7e028501,
0x7e048502, 0x7e068503,
0x807c847c, 0x8078ff78,
@@ -972,229 +1194,233 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0x6e5d0000, 0xe0304080,
0x6e5d0100, 0xe0304100,
0x6e5d0200, 0xe0304180,
- 0x6e5d0300, 0xbf820032,
- 0xbef603ff, 0x01000000,
- 0xbeee0378, 0x8078ff78,
- 0x00000400, 0xbefc0384,
- 0xe0304000, 0x785d0000,
- 0xe0304100, 0x785d0100,
- 0xe0304200, 0x785d0200,
- 0xe0304300, 0x785d0300,
- 0xbf8c3f70, 0x7e008500,
- 0x7e028501, 0x7e048502,
- 0x7e068503, 0x807c847c,
+ 0x6e5d0300, 0xbf8c0000,
+ 0xbf820034, 0xbef603ff,
+ 0x01000000, 0xbeee0378,
0x8078ff78, 0x00000400,
- 0xbf0a6f7c, 0xbf85ffee,
- 0xb96f1e06, 0x876fc16f,
- 0xbf84000e, 0x8f6f836f,
- 0x806f7c6f, 0xbefe03c1,
- 0xbeff0380, 0xe0304000,
- 0x785d0000, 0xbf8c3f70,
- 0x7e008500, 0x807c817c,
- 0x8078ff78, 0x00000080,
- 0xbf0a6f7c, 0xbf85fff7,
- 0xbeff03c1, 0xe0304000,
- 0x6e5d0000, 0xe0304100,
- 0x6e5d0100, 0xe0304200,
- 0x6e5d0200, 0xe0304300,
- 0x6e5d0300, 0xbf8c3f70,
- 0xb9782a05, 0x80788178,
+ 0xbefc0384, 0xbf0a6f7c,
+ 0xbf840012, 0xe0304000,
+ 0x785d0000, 0xe0304100,
+ 0x785d0100, 0xe0304200,
+ 0x785d0200, 0xe0304300,
+ 0x785d0300, 0xbf8c0000,
+ 0x7e008500, 0x7e028501,
+ 0x7e048502, 0x7e068503,
+ 0x807c847c, 0x8078ff78,
+ 0x00000400, 0xbf0a6f7c,
+ 0xbf85ffee, 0xb96f1e06,
+ 0x876fc16f, 0xbf84000e,
+ 0x8f6f836f, 0x806f7c6f,
+ 0xbefe03c1, 0xbeff0380,
+ 0xe0304000, 0x785d0000,
+ 0xbf8c0000, 0x7e008500,
+ 0x807c817c, 0x8078ff78,
+ 0x00000080, 0xbf0a6f7c,
+ 0xbf85fff7, 0xbeff03c1,
+ 0xe0304000, 0x6e5d0000,
+ 0xe0304100, 0x6e5d0100,
+ 0xe0304200, 0x6e5d0200,
+ 0xe0304300, 0x6e5d0300,
+ 0xbf8c0000, 0xb9783a05,
+ 0x80788178, 0xbf0d9972,
+ 0xbf850002, 0x8f788978,
+ 0xbf820001, 0x8f788a78,
+ 0xb96e1e06, 0x8f6e8a6e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000200, 0x80f8ff78,
+ 0x00000050, 0xbef603ff,
+ 0x01000000, 0xbefc03ff,
+ 0x0000006c, 0x80f89078,
+ 0xf429003a, 0xf0000000,
+ 0xbf8c0000, 0x80fc847c,
+ 0xbf800000, 0xbe803100,
+ 0xbe823102, 0x80f8a078,
+ 0xf42d003a, 0xf0000000,
+ 0xbf8c0000, 0x80fc887c,
+ 0xbf800000, 0xbe803100,
+ 0xbe823102, 0xbe843104,
+ 0xbe863106, 0x80f8c078,
+ 0xf431003a, 0xf0000000,
+ 0xbf8c0000, 0x80fc907c,
+ 0xbf800000, 0xbe803100,
+ 0xbe823102, 0xbe843104,
+ 0xbe863106, 0xbe883108,
+ 0xbe8a310a, 0xbe8c310c,
+ 0xbe8e310e, 0xbf06807c,
+ 0xbf84fff0, 0xba80f801,
+ 0x00000000, 0xbf8a0000,
+ 0xb9783a05, 0x80788178,
0xbf0d9972, 0xbf850002,
0x8f788978, 0xbf820001,
0x8f788a78, 0xb96e1e06,
0x8f6e8a6e, 0x80786e78,
0x8078ff78, 0x00000200,
- 0x80f8ff78, 0x00000050,
0xbef603ff, 0x01000000,
- 0xbefc03ff, 0x0000006c,
- 0x80f89078, 0xf429003a,
- 0xf0000000, 0xbf8cc07f,
- 0x80fc847c, 0xbf800000,
- 0xbe803100, 0xbe823102,
- 0x80f8a078, 0xf42d003a,
- 0xf0000000, 0xbf8cc07f,
- 0x80fc887c, 0xbf800000,
- 0xbe803100, 0xbe823102,
- 0xbe843104, 0xbe863106,
- 0x80f8c078, 0xf431003a,
- 0xf0000000, 0xbf8cc07f,
- 0x80fc907c, 0xbf800000,
- 0xbe803100, 0xbe823102,
- 0xbe843104, 0xbe863106,
- 0xbe883108, 0xbe8a310a,
- 0xbe8c310c, 0xbe8e310e,
- 0xbf06807c, 0xbf84fff0,
- 0xba80f801, 0x00000000,
- 0xbf8a0000, 0xb9782a05,
- 0x80788178, 0xbf0d9972,
- 0xbf850002, 0x8f788978,
- 0xbf820001, 0x8f788a78,
- 0xb96e1e06, 0x8f6e8a6e,
- 0x80786e78, 0x8078ff78,
- 0x00000200, 0xbef603ff,
- 0x01000000, 0xf4211bfa,
+ 0xf4211bfa, 0xf0000000,
+ 0x80788478, 0xf4211b3a,
0xf0000000, 0x80788478,
- 0xf4211b3a, 0xf0000000,
- 0x80788478, 0xf4211b7a,
+ 0xf4211b7a, 0xf0000000,
+ 0x80788478, 0xf4211c3a,
0xf0000000, 0x80788478,
- 0xf4211c3a, 0xf0000000,
- 0x80788478, 0xf4211c7a,
+ 0xf4211c7a, 0xf0000000,
+ 0x80788478, 0xf4211eba,
0xf0000000, 0x80788478,
- 0xf4211eba, 0xf0000000,
- 0x80788478, 0xf4211efa,
+ 0xf4211efa, 0xf0000000,
+ 0x80788478, 0xf4211e7a,
0xf0000000, 0x80788478,
- 0xf4211e7a, 0xf0000000,
- 0x80788478, 0xf4211cfa,
+ 0xf4211cfa, 0xf0000000,
+ 0x80788478, 0xf4211bba,
0xf0000000, 0x80788478,
+ 0xbf8c0000, 0xb9eef814,
0xf4211bba, 0xf0000000,
- 0x80788478, 0xbf8cc07f,
- 0xb9eef814, 0xf4211bba,
- 0xf0000000, 0x80788478,
- 0xbf8cc07f, 0xb9eef815,
- 0xbefc036f, 0xbefe0370,
- 0xbeff0371, 0x876f7bff,
- 0x000003ff, 0xb9ef4803,
- 0xb9f9f816, 0x876f7bff,
- 0xfffff800, 0x906f8b6f,
- 0xb9efa2c3, 0xb9f3f801,
- 0xb96e2a05, 0x806e816e,
- 0xbf0d9972, 0xbf850002,
- 0x8f6e896e, 0xbf820001,
- 0x8f6e8a6e, 0x806eff6e,
+ 0x80788478, 0xbf8c0000,
+ 0xb9eef815, 0xbefc036f,
+ 0xbefe0370, 0xbeff0371,
+ 0xb9f9f816, 0xb9fb4803,
+ 0x907b8b7b, 0xb9fba2c3,
+ 0xb9f3f801, 0xb96e3a05,
+ 0x806e816e, 0xbf0d9972,
+ 0xbf850002, 0x8f6e896e,
+ 0xbf820001, 0x8f6e8a6e,
+ 0xb96f1e06, 0x8f6f8a6f,
+ 0x806e6f6e, 0x806eff6e,
0x00000200, 0x806e746e,
0x826f8075, 0x876fff6f,
0x0000ffff, 0xf4091c37,
0xfa000050, 0xf4091d37,
0xfa000060, 0xf4011e77,
- 0xfa000074, 0xbf8cc07f,
- 0x876fff6d, 0xfc000000,
- 0x906f9a6f, 0x8f6f906f,
- 0xbeee0380, 0x886e6f6e,
- 0x876fff6d, 0x02000000,
- 0x906f996f, 0x8f6f8f6f,
- 0x886e6f6e, 0x876fff6d,
- 0x01000000, 0x906f986f,
- 0x8f6f996f, 0x886e6f6e,
- 0x876fff7a, 0x00800000,
- 0x906f976f, 0xb9eef807,
+ 0xfa000074, 0xbf8c0000,
+ 0x906e8977, 0x876fff6e,
+ 0x003f8000, 0x906e8677,
+ 0x876eff6e, 0x02000000,
+ 0x886e6f6e, 0xb9eef807,
0x876dff6d, 0x0000ffff,
0x87fe7e7e, 0x87ea6a6a,
0xb9faf802, 0xbe80226c,
- 0xbf810000, 0xbf9f0000,
+ 0xbf9b0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
};
static const uint32_t cwsr_trap_arcturus_hex[] = {
- 0xbf820001, 0xbf8202c4,
- 0xb8f8f802, 0x89788678,
- 0xb8eef801, 0x866eff6e,
- 0x00000800, 0xbf840003,
+ 0xbf820001, 0xbf8202d5,
+ 0xb8f8f802, 0x8978ff78,
+ 0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
- 0xbf840016, 0xb8fbf803,
+ 0xbf840009, 0x866eff6d,
+ 0x00ff0000, 0xbf85001e,
0x866eff7b, 0x00000400,
- 0xbf85003b, 0x866eff7b,
- 0x00000800, 0xbf850003,
- 0x866eff7b, 0x00000100,
- 0xbf84000c, 0x866eff78,
- 0x00002000, 0xbf840005,
- 0xbf8e0010, 0xb8eef803,
- 0x866eff6e, 0x00000400,
- 0xbf84fffb, 0x8778ff78,
- 0x00002000, 0x80ec886c,
- 0x82ed806d, 0xb8eef807,
- 0x866fff6e, 0x001f8000,
- 0x8e6f8b6f, 0x8977ff77,
- 0xfc000000, 0x87776f77,
- 0x896eff6e, 0x001f8000,
- 0xb96ef807, 0xb8faf812,
+ 0xbf850055, 0xbf8e0010,
+ 0xb8fbf803, 0xbf82fffa,
+ 0x866eff7b, 0x03c00900,
+ 0xbf850015, 0x866eff7b,
+ 0x000071ff, 0xbf840008,
+ 0x866fff7b, 0x00007080,
+ 0xbf840001, 0xbeee1a87,
+ 0xb8eff801, 0x8e6e8c6e,
+ 0x866e6f6e, 0xbf85000a,
+ 0x866eff6d, 0x00ff0000,
+ 0xbf850007, 0xb8eef801,
+ 0x866eff6e, 0x00000800,
+ 0xbf850003, 0x866eff7b,
+ 0x00000400, 0xbf85003a,
+ 0xb8faf807, 0x867aff7a,
+ 0x001f8000, 0x8e7a8b7a,
+ 0x8977ff77, 0xfc000000,
+ 0x87777a77, 0xba7ff807,
+ 0x00000000, 0xb8faf812,
0xb8fbf813, 0x8efa887a,
- 0xc0071bbd, 0x00000000,
- 0xbf8cc07f, 0xc0071ebd,
- 0x00000008, 0xbf8cc07f,
- 0x86ee6e6e, 0xbf840001,
- 0xbe801d6e, 0xb8fbf803,
- 0x867bff7b, 0x000001ff,
+ 0xbf0d8f7b, 0xbf840002,
+ 0x877bff7b, 0xffff0000,
+ 0xc0031bbd, 0x00000010,
+ 0xbf8cc07f, 0x8e6e976e,
+ 0x8977ff77, 0x00800000,
+ 0x87776e77, 0xc0071bbd,
+ 0x00000000, 0xbf8cc07f,
+ 0xc0071ebd, 0x00000008,
+ 0xbf8cc07f, 0x86ee6e6e,
+ 0xbf840001, 0xbe801d6e,
+ 0x866eff6d, 0x01ff0000,
+ 0xbf850005, 0x8778ff78,
+ 0x00002000, 0x80ec886c,
+ 0x82ed806d, 0xbf820005,
+ 0x866eff6d, 0x01000000,
0xbf850002, 0x806c846c,
0x826d806d, 0x866dff6d,
- 0x0000ffff, 0x8f6e8b77,
- 0x866eff6e, 0x001f8000,
- 0xb96ef807, 0x86fe7e7e,
+ 0x0000ffff, 0x8f7a8b77,
+ 0x867aff7a, 0x001f8000,
+ 0xb97af807, 0x86fe7e7e,
0x86ea6a6a, 0x8f6e8378,
0xb96ee0c2, 0xbf800002,
0xb9780002, 0xbe801f6c,
0x866dff6d, 0x0000ffff,
0xbefa0080, 0xb97a0283,
- 0xb8fa2407, 0x8e7a9b7a,
- 0x876d7a6d, 0xb8fa03c7,
- 0x8e7a9a7a, 0x876d7a6d,
0xb8faf807, 0x867aff7a,
- 0x00007fff, 0xb97af807,
- 0xbeee007e, 0xbeef007f,
- 0xbefe0180, 0xbf900004,
- 0x877a8478, 0xb97af802,
- 0xbf8e0002, 0xbf88fffe,
- 0xb8fa2a05, 0x807a817a,
- 0x8e7a8a7a, 0x8e7a817a,
- 0xb8fb1605, 0x807b817b,
- 0x8e7b867b, 0x807a7b7a,
- 0x807a7e7a, 0x827b807f,
- 0x867bff7b, 0x0000ffff,
- 0xc04b1c3d, 0x00000050,
- 0xbf8cc07f, 0xc04b1d3d,
- 0x00000060, 0xbf8cc07f,
- 0xc0431e7d, 0x00000074,
- 0xbf8cc07f, 0xbef4007e,
- 0x8675ff7f, 0x0000ffff,
- 0x8775ff75, 0x00040000,
- 0xbef60080, 0xbef700ff,
- 0x00807fac, 0x867aff7f,
- 0x08000000, 0x8f7a837a,
- 0x87777a77, 0x867aff7f,
- 0x70000000, 0x8f7a817a,
- 0x87777a77, 0xbef1007c,
- 0xbef00080, 0xb8f02a05,
- 0x80708170, 0x8e708a70,
- 0x8e708170, 0xb8fa1605,
- 0x807a817a, 0x8e7a867a,
- 0x80707a70, 0xbef60084,
- 0xbef600ff, 0x01000000,
- 0xbefe007c, 0xbefc0070,
- 0xc0611c7a, 0x0000007c,
- 0xbf8cc07f, 0x80708470,
- 0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611b3a,
+ 0x001f8000, 0x8e7a8b7a,
+ 0x8977ff77, 0xfc000000,
+ 0x87777a77, 0xba7ff807,
+ 0x00000000, 0xbeee007e,
+ 0xbeef007f, 0xbefe0180,
+ 0xbf900004, 0x877a8478,
+ 0xb97af802, 0xbf8e0002,
+ 0xbf88fffe, 0xb8fa2a05,
+ 0x807a817a, 0x8e7a8a7a,
+ 0x8e7a817a, 0xb8fb1605,
+ 0x807b817b, 0x8e7b867b,
+ 0x807a7b7a, 0x807a7e7a,
+ 0x827b807f, 0x867bff7b,
+ 0x0000ffff, 0xc04b1c3d,
+ 0x00000050, 0xbf8cc07f,
+ 0xc04b1d3d, 0x00000060,
+ 0xbf8cc07f, 0xc0431e7d,
+ 0x00000074, 0xbf8cc07f,
+ 0xbef4007e, 0x8675ff7f,
+ 0x0000ffff, 0x8775ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x00807fac,
+ 0xbef1007c, 0xbef00080,
+ 0xb8f02a05, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xbefe007c,
+ 0xbefc0070, 0xc0611c7a,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
- 0xc0611b7a, 0x0000007c,
+ 0xc0611b3a, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611bba,
+ 0xbefc0070, 0xc0611b7a,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
- 0xc0611bfa, 0x0000007c,
+ 0xc0611bba, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611e3a,
- 0x0000007c, 0xbf8cc07f,
- 0x80708470, 0xbefc007e,
- 0xb8fbf803, 0xbefe007c,
- 0xbefc0070, 0xc0611efa,
+ 0xbefc0070, 0xc0611bfa,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
- 0xc0611a3a, 0x0000007c,
+ 0xc0611e3a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xb8fbf803,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611efa, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611a7a,
- 0x0000007c, 0xbf8cc07f,
- 0x80708470, 0xbefc007e,
- 0xb8f1f801, 0xbefe007c,
- 0xbefc0070, 0xc0611c7a,
+ 0xbefc0070, 0xc0611a3a,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611a7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xb8f1f801,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611c7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xbf108080,
0x867aff7f, 0x04000000,
0xbeef0080, 0x876f6f7a,
0xb8f02a05, 0x80708170,
@@ -1323,368 +1549,359 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
0x00000400, 0xbefe00c1,
0xbeff00c1, 0xb8fb2a05,
0x807b817b, 0x8e7b827b,
- 0x8e76887b, 0xbef600ff,
- 0x01000000, 0xbefc0084,
- 0xbf0a7b7c, 0xbf84006d,
- 0xbf11017c, 0x807bff7b,
- 0x00001000, 0x867aff78,
- 0x00400000, 0xbf850003,
- 0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf850051,
- 0xbe840080, 0xd2890000,
- 0x00000900, 0x80048104,
- 0xd2890001, 0x00000900,
- 0x80048104, 0xd2890002,
- 0x00000900, 0x80048104,
- 0xd2890003, 0x00000900,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000901,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0084, 0xbf0a7b7c,
+ 0xbf84006d, 0xbf11017c,
+ 0x807bff7b, 0x00001000,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf850051, 0xbe840080,
+ 0xd2890000, 0x00000900,
0x80048104, 0xd2890001,
- 0x00000901, 0x80048104,
- 0xd2890002, 0x00000901,
+ 0x00000900, 0x80048104,
+ 0xd2890002, 0x00000900,
0x80048104, 0xd2890003,
- 0x00000901, 0x80048104,
+ 0x00000900, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
+ 0xd2890000, 0x00000902,
0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
+ 0x00000902, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
- 0x807c847c, 0xbf0a7b7c,
- 0xbf85ffb1, 0xbf9c0000,
- 0xbf820012, 0x7e000300,
- 0x7e020301, 0x7e040302,
- 0x7e060303, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0x807c847c,
- 0x8070ff70, 0x00000400,
- 0xbf0a7b7c, 0xbf85ffef,
- 0xbf9c0000, 0xbefc0080,
- 0xbf11017c, 0x867aff78,
- 0x00400000, 0xbf850003,
- 0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf850059,
- 0xd3d84000, 0x18000100,
- 0xd3d84001, 0x18000101,
- 0xd3d84002, 0x18000102,
- 0xd3d84003, 0x18000103,
0xbe840080, 0xd2890000,
- 0x00000900, 0x80048104,
- 0xd2890001, 0x00000900,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
0x80048104, 0xd2890002,
- 0x00000900, 0x80048104,
- 0xd2890003, 0x00000900,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000901,
+ 0xbf84ffee, 0x807c847c,
+ 0xbf0a7b7c, 0xbf85ffb1,
+ 0xbf9c0000, 0xbf820012,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7c,
+ 0xbf85ffef, 0xbf9c0000,
+ 0xbefc0080, 0xbf11017c,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf850059, 0xd3d84000,
+ 0x18000100, 0xd3d84001,
+ 0x18000101, 0xd3d84002,
+ 0x18000102, 0xd3d84003,
+ 0x18000103, 0xbe840080,
+ 0xd2890000, 0x00000900,
0x80048104, 0xd2890001,
- 0x00000901, 0x80048104,
- 0xd2890002, 0x00000901,
+ 0x00000900, 0x80048104,
+ 0xd2890002, 0x00000900,
0x80048104, 0xd2890003,
- 0x00000901, 0x80048104,
+ 0x00000900, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
+ 0xd2890000, 0x00000902,
0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
+ 0x00000902, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
- 0x807c847c, 0xbf0a7b7c,
- 0xbf85ffa9, 0xbf9c0000,
- 0xbf820016, 0xd3d84000,
- 0x18000100, 0xd3d84001,
- 0x18000101, 0xd3d84002,
- 0x18000102, 0xd3d84003,
- 0x18000103, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0x807c847c,
- 0x8070ff70, 0x00000400,
- 0xbf0a7b7c, 0xbf85ffeb,
- 0xbf9c0000, 0xbf820106,
- 0xbef4007e, 0x8675ff7f,
- 0x0000ffff, 0x8775ff75,
- 0x00040000, 0xbef60080,
- 0xbef700ff, 0x00807fac,
- 0x866eff7f, 0x08000000,
- 0x8f6e836e, 0x87776e77,
- 0x866eff7f, 0x70000000,
- 0x8f6e816e, 0x87776e77,
- 0x866eff7f, 0x04000000,
- 0xbf84001f, 0xbefe00c1,
- 0xbeff00c1, 0xb8ef4306,
- 0x866fc16f, 0xbf84001a,
- 0x8e6f866f, 0x8e6f826f,
- 0xbef6006f, 0xb8f82a05,
- 0x80788178, 0x8e788a78,
- 0x8e788178, 0xb8ee1605,
- 0x806e816e, 0x8e6e866e,
- 0x80786e78, 0x8078ff78,
- 0x00000080, 0xbef600ff,
- 0x01000000, 0xbefc0080,
- 0xe0510000, 0x781d0000,
- 0xe0510100, 0x781d0000,
- 0x807cff7c, 0x00000200,
- 0x8078ff78, 0x00000200,
- 0xbf0a6f7c, 0xbf85fff6,
- 0xbef80080, 0xbefe00c1,
- 0xbeff00c1, 0xb8ef2a05,
- 0x806f816f, 0x8e6f826f,
- 0x8e76886f, 0xbef90076,
+ 0xbe840080, 0xd2890000,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
+ 0x80048104, 0xd2890002,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0x807c847c,
+ 0xbf0a7b7c, 0xbf85ffa9,
+ 0xbf9c0000, 0xbf820016,
+ 0xd3d84000, 0x18000100,
+ 0xd3d84001, 0x18000101,
+ 0xd3d84002, 0x18000102,
+ 0xd3d84003, 0x18000103,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7c,
+ 0xbf85ffeb, 0xbf9c0000,
+ 0xbf8200e3, 0xbef4007e,
+ 0x8675ff7f, 0x0000ffff,
+ 0x8775ff75, 0x00040000,
+ 0xbef60080, 0xbef700ff,
+ 0x00807fac, 0x866eff7f,
+ 0x04000000, 0xbf84001f,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8ef4306, 0x866fc16f,
+ 0xbf84001a, 0x8e6f866f,
+ 0x8e6f826f, 0xbef6006f,
+ 0xb8f82a05, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0x8078ff78, 0x00000080,
0xbef600ff, 0x01000000,
- 0xbeee0078, 0x8078ff78,
- 0x00000400, 0xbef30079,
- 0x8079ff79, 0x00000400,
- 0xbefc0084, 0xbf11087c,
+ 0xbefc0080, 0xe0510000,
+ 0x781d0000, 0xe0510100,
+ 0x781d0000, 0x807cff7c,
+ 0x00000200, 0x8078ff78,
+ 0x00000200, 0xbf0a6f7c,
+ 0xbf85fff6, 0xbefe00c1,
+ 0xbeff00c1, 0xbef600ff,
+ 0x01000000, 0xb8ef2a05,
+ 0x806f816f, 0x8e6f826f,
0x806fff6f, 0x00008000,
- 0xe0524000, 0x791d0000,
- 0xe0524100, 0x791d0100,
- 0xe0524200, 0x791d0200,
- 0xe0524300, 0x791d0300,
- 0x8079ff79, 0x00000400,
- 0xbf8c0f70, 0xd3d94000,
- 0x18000100, 0xd3d94001,
- 0x18000101, 0xd3d94002,
- 0x18000102, 0xd3d94003,
- 0x18000103, 0xe0524000,
- 0x781d0000, 0xe0524100,
- 0x781d0100, 0xe0524200,
- 0x781d0200, 0xe0524300,
- 0x781d0300, 0xbf8c0f70,
- 0x7e000300, 0x7e020301,
- 0x7e040302, 0x7e060303,
- 0x807c847c, 0x8078ff78,
- 0x00000400, 0xbf0a6f7c,
- 0xbf85ffdb, 0xbf9c0000,
- 0xe0524000, 0x731d0000,
- 0xe0524100, 0x731d0100,
- 0xe0524200, 0x731d0200,
- 0xe0524300, 0x731d0300,
+ 0xbef80080, 0xbeee0078,
+ 0x8078ff78, 0x00000400,
+ 0xbefc0084, 0xbf11087c,
+ 0xe0524000, 0x781d0000,
+ 0xe0524100, 0x781d0100,
+ 0xe0524200, 0x781d0200,
+ 0xe0524300, 0x781d0300,
+ 0xbf8c0f70, 0x7e000300,
+ 0x7e020301, 0x7e040302,
+ 0x7e060303, 0x807c847c,
+ 0x8078ff78, 0x00000400,
+ 0xbf0a6f7c, 0xbf85ffee,
+ 0xbefc0080, 0xbf11087c,
+ 0xe0524000, 0x781d0000,
+ 0xe0524100, 0x781d0100,
+ 0xe0524200, 0x781d0200,
+ 0xe0524300, 0x781d0300,
0xbf8c0f70, 0xd3d94000,
0x18000100, 0xd3d94001,
0x18000101, 0xd3d94002,
0x18000102, 0xd3d94003,
- 0x18000103, 0xe0524000,
+ 0x18000103, 0x807c847c,
+ 0x8078ff78, 0x00000400,
+ 0xbf0a6f7c, 0xbf85ffea,
+ 0xbf9c0000, 0xe0524000,
0x6e1d0000, 0xe0524100,
0x6e1d0100, 0xe0524200,
0x6e1d0200, 0xe0524300,
- 0x6e1d0300, 0xb8f82a05,
- 0x80788178, 0x8e788a78,
- 0x8e788178, 0xb8ee1605,
- 0x806e816e, 0x8e6e866e,
- 0x80786e78, 0x80f8c078,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f846f, 0x8e76826f,
- 0xbef600ff, 0x01000000,
- 0xbefc006f, 0xc031003a,
- 0x00000078, 0x80f8c078,
- 0xbf8cc07f, 0x80fc907c,
- 0xbf800000, 0xbe802d00,
- 0xbe822d02, 0xbe842d04,
- 0xbe862d06, 0xbe882d08,
- 0xbe8a2d0a, 0xbe8c2d0c,
- 0xbe8e2d0e, 0xbf06807c,
- 0xbf84fff0, 0xb8f82a05,
- 0x80788178, 0x8e788a78,
- 0x8e788178, 0xb8ee1605,
- 0x806e816e, 0x8e6e866e,
- 0x80786e78, 0xbef60084,
- 0xbef600ff, 0x01000000,
- 0xc0211bfa, 0x00000078,
- 0x80788478, 0xc0211b3a,
+ 0x6e1d0300, 0xbf8c0f70,
+ 0xb8f82a05, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0x80f8c078, 0xb8ef1605,
+ 0x806f816f, 0x8e6f846f,
+ 0x8e76826f, 0xbef600ff,
+ 0x01000000, 0xbefc006f,
+ 0xc031003a, 0x00000078,
+ 0x80f8c078, 0xbf8cc07f,
+ 0x80fc907c, 0xbf800000,
+ 0xbe802d00, 0xbe822d02,
+ 0xbe842d04, 0xbe862d06,
+ 0xbe882d08, 0xbe8a2d0a,
+ 0xbe8c2d0c, 0xbe8e2d0e,
+ 0xbf06807c, 0xbf84fff0,
+ 0xb8f82a05, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xc0211bfa,
0x00000078, 0x80788478,
- 0xc0211b7a, 0x00000078,
- 0x80788478, 0xc0211c3a,
+ 0xc0211b3a, 0x00000078,
+ 0x80788478, 0xc0211b7a,
0x00000078, 0x80788478,
- 0xc0211c7a, 0x00000078,
- 0x80788478, 0xc0211eba,
+ 0xc0211c3a, 0x00000078,
+ 0x80788478, 0xc0211c7a,
0x00000078, 0x80788478,
- 0xc0211efa, 0x00000078,
- 0x80788478, 0xc0211a3a,
+ 0xc0211eba, 0x00000078,
+ 0x80788478, 0xc0211efa,
0x00000078, 0x80788478,
- 0xc0211a7a, 0x00000078,
- 0x80788478, 0xc0211cfa,
+ 0xc0211a3a, 0x00000078,
+ 0x80788478, 0xc0211a7a,
0x00000078, 0x80788478,
- 0xbf8cc07f, 0xbefc006f,
- 0xbefe0070, 0xbeff0071,
- 0x866f7bff, 0x000003ff,
- 0xb96f4803, 0x866f7bff,
- 0xfffff800, 0x8f6f8b6f,
- 0xb96fa2c3, 0xb973f801,
- 0xb8ee2a05, 0x806e816e,
- 0x8e6e8a6e, 0x8e6e816e,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f866f, 0x806e6f6e,
- 0x806e746e, 0x826f8075,
- 0x866fff6f, 0x0000ffff,
- 0xc00b1c37, 0x00000050,
- 0xc00b1d37, 0x00000060,
- 0xc0031e77, 0x00000074,
- 0xbf8cc07f, 0x866fff6d,
- 0xf8000000, 0x8f6f9b6f,
- 0x8e6f906f, 0xbeee0080,
- 0x876e6f6e, 0x866fff6d,
- 0x04000000, 0x8f6f9a6f,
- 0x8e6f8f6f, 0x876e6f6e,
- 0x866fff7a, 0x00800000,
- 0x8f6f976f, 0xb96ef807,
+ 0xc0211cfa, 0x00000078,
+ 0x80788478, 0xbf8cc07f,
+ 0xbefc006f, 0xbefe0070,
+ 0xbeff0071, 0x866f7bff,
+ 0x000003ff, 0xb96f4803,
+ 0x866f7bff, 0xfffff800,
+ 0x8f6f8b6f, 0xb96fa2c3,
+ 0xb973f801, 0xb8ee2a05,
+ 0x806e816e, 0x8e6e8a6e,
+ 0x8e6e816e, 0xb8ef1605,
+ 0x806f816f, 0x8e6f866f,
+ 0x806e6f6e, 0x806e746e,
+ 0x826f8075, 0x866fff6f,
+ 0x0000ffff, 0xc00b1c37,
+ 0x00000050, 0xc00b1d37,
+ 0x00000060, 0xc0031e77,
+ 0x00000074, 0xbf8cc07f,
+ 0x8f6e8b77, 0x866eff6e,
+ 0x001f8000, 0xb96ef807,
0x866dff6d, 0x0000ffff,
0x86fe7e7e, 0x86ea6a6a,
0x8f6e837a, 0xb96ee0c2,
0xbf800002, 0xb97a0002,
- 0xbf8a0000, 0x95806f6c,
- 0xbf810000, 0x00000000,
+ 0xbf8a0000, 0xbe801f6c,
+ 0xbf9b0000, 0x00000000,
};
static const uint32_t cwsr_trap_aldebaran_hex[] = {
- 0xbf820001, 0xbf8202ce,
- 0xb8f8f802, 0x89788678,
- 0xb8eef801, 0x866eff6e,
- 0x00000800, 0xbf840003,
+ 0xbf820001, 0xbf8202e0,
+ 0xb8f8f802, 0x8978ff78,
+ 0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
- 0xbf840016, 0xb8fbf803,
+ 0xbf840009, 0x866eff6d,
+ 0x00ff0000, 0xbf85001e,
0x866eff7b, 0x00000400,
- 0xbf85003b, 0x866eff7b,
- 0x00000800, 0xbf850003,
- 0x866eff7b, 0x00000100,
- 0xbf84000c, 0x866eff78,
- 0x00002000, 0xbf840005,
- 0xbf8e0010, 0xb8eef803,
- 0x866eff6e, 0x00000400,
- 0xbf84fffb, 0x8778ff78,
- 0x00002000, 0x80ec886c,
- 0x82ed806d, 0xb8eef807,
- 0x866fff6e, 0x001f8000,
- 0x8e6f8b6f, 0x8977ff77,
- 0xfc000000, 0x87776f77,
- 0x896eff6e, 0x001f8000,
- 0xb96ef807, 0xb8faf812,
+ 0xbf850055, 0xbf8e0010,
+ 0xb8fbf803, 0xbf82fffa,
+ 0x866eff7b, 0x03c00900,
+ 0xbf850015, 0x866eff7b,
+ 0x000071ff, 0xbf840008,
+ 0x866fff7b, 0x00007080,
+ 0xbf840001, 0xbeee1a87,
+ 0xb8eff801, 0x8e6e8c6e,
+ 0x866e6f6e, 0xbf85000a,
+ 0x866eff6d, 0x00ff0000,
+ 0xbf850007, 0xb8eef801,
+ 0x866eff6e, 0x00000800,
+ 0xbf850003, 0x866eff7b,
+ 0x00000400, 0xbf85003a,
+ 0xb8faf807, 0x867aff7a,
+ 0x001f8000, 0x8e7a8b7a,
+ 0x8977ff77, 0xfc000000,
+ 0x87777a77, 0xba7ff807,
+ 0x00000000, 0xb8faf812,
0xb8fbf813, 0x8efa887a,
- 0xc0071bbd, 0x00000000,
- 0xbf8cc07f, 0xc0071ebd,
- 0x00000008, 0xbf8cc07f,
- 0x86ee6e6e, 0xbf840001,
- 0xbe801d6e, 0xb8fbf803,
- 0x867bff7b, 0x000001ff,
+ 0xbf0d8f7b, 0xbf840002,
+ 0x877bff7b, 0xffff0000,
+ 0xc0031bbd, 0x00000010,
+ 0xbf8cc07f, 0x8e6e976e,
+ 0x8977ff77, 0x00800000,
+ 0x87776e77, 0xc0071bbd,
+ 0x00000000, 0xbf8cc07f,
+ 0xc0071ebd, 0x00000008,
+ 0xbf8cc07f, 0x86ee6e6e,
+ 0xbf840001, 0xbe801d6e,
+ 0x866eff6d, 0x01ff0000,
+ 0xbf850005, 0x8778ff78,
+ 0x00002000, 0x80ec886c,
+ 0x82ed806d, 0xbf820005,
+ 0x866eff6d, 0x01000000,
0xbf850002, 0x806c846c,
0x826d806d, 0x866dff6d,
- 0x0000ffff, 0x8f6e8b77,
- 0x866eff6e, 0x001f8000,
- 0xb96ef807, 0x86fe7e7e,
+ 0x0000ffff, 0x8f7a8b77,
+ 0x867aff7a, 0x001f8000,
+ 0xb97af807, 0x86fe7e7e,
0x86ea6a6a, 0x8f6e8378,
0xb96ee0c2, 0xbf800002,
0xb9780002, 0xbe801f6c,
0x866dff6d, 0x0000ffff,
0xbefa0080, 0xb97a0283,
- 0xb8fa2407, 0x8e7a9b7a,
- 0x876d7a6d, 0xb8fa03c7,
- 0x8e7a9a7a, 0x876d7a6d,
0xb8faf807, 0x867aff7a,
- 0x00007fff, 0xb97af807,
- 0xbeee007e, 0xbeef007f,
- 0xbefe0180, 0xbf900004,
- 0x877a8478, 0xb97af802,
- 0xbf8e0002, 0xbf88fffe,
- 0xb8fa2985, 0x807a817a,
- 0x8e7a8a7a, 0x8e7a817a,
- 0xb8fb1605, 0x807b817b,
- 0x8e7b867b, 0x807a7b7a,
- 0x807a7e7a, 0x827b807f,
- 0x867bff7b, 0x0000ffff,
- 0xc04b1c3d, 0x00000050,
- 0xbf8cc07f, 0xc04b1d3d,
- 0x00000060, 0xbf8cc07f,
- 0xc0431e7d, 0x00000074,
- 0xbf8cc07f, 0xbef4007e,
- 0x8675ff7f, 0x0000ffff,
- 0x8775ff75, 0x00040000,
- 0xbef60080, 0xbef700ff,
- 0x00807fac, 0x867aff7f,
- 0x08000000, 0x8f7a837a,
- 0x87777a77, 0x867aff7f,
- 0x70000000, 0x8f7a817a,
- 0x87777a77, 0xbef1007c,
- 0xbef00080, 0xb8f02985,
- 0x80708170, 0x8e708a70,
- 0x8e708170, 0xb8fa1605,
- 0x807a817a, 0x8e7a867a,
- 0x80707a70, 0xbef60084,
- 0xbef600ff, 0x01000000,
- 0xbefe007c, 0xbefc0070,
- 0xc0611c7a, 0x0000007c,
- 0xbf8cc07f, 0x80708470,
- 0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611b3a,
+ 0x001f8000, 0x8e7a8b7a,
+ 0x8977ff77, 0xfc000000,
+ 0x87777a77, 0xba7ff807,
+ 0x00000000, 0xbeee007e,
+ 0xbeef007f, 0xbefe0180,
+ 0xbf900004, 0x877a8478,
+ 0xb97af802, 0xbf8e0002,
+ 0xbf88fffe, 0xb8fa2985,
+ 0x807a817a, 0x8e7a8a7a,
+ 0x8e7a817a, 0xb8fb1605,
+ 0x807b817b, 0x8e7b867b,
+ 0x807a7b7a, 0x807a7e7a,
+ 0x827b807f, 0x867bff7b,
+ 0x0000ffff, 0xc04b1c3d,
+ 0x00000050, 0xbf8cc07f,
+ 0xc04b1d3d, 0x00000060,
+ 0xbf8cc07f, 0xc0431e7d,
+ 0x00000074, 0xbf8cc07f,
+ 0xbef4007e, 0x8675ff7f,
+ 0x0000ffff, 0x8775ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x00807fac,
+ 0xbef1007c, 0xbef00080,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xbefe007c,
+ 0xbefc0070, 0xc0611c7a,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
- 0xc0611b7a, 0x0000007c,
+ 0xc0611b3a, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611bba,
+ 0xbefc0070, 0xc0611b7a,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
- 0xc0611bfa, 0x0000007c,
+ 0xc0611bba, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611e3a,
- 0x0000007c, 0xbf8cc07f,
- 0x80708470, 0xbefc007e,
- 0xb8fbf803, 0xbefe007c,
- 0xbefc0070, 0xc0611efa,
+ 0xbefc0070, 0xc0611bfa,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
- 0xc0611a3a, 0x0000007c,
+ 0xc0611e3a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xb8fbf803,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611efa, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611a7a,
- 0x0000007c, 0xbf8cc07f,
- 0x80708470, 0xbefc007e,
- 0xb8f1f801, 0xbefe007c,
- 0xbefc0070, 0xc0611c7a,
+ 0xbefc0070, 0xc0611a3a,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611a7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xb8f1f801,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611c7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xbf108080,
0x867aff7f, 0x04000000,
0xbeef0080, 0x876f6f7a,
0xb8f02985, 0x80708170,
@@ -1935,15 +2152,11 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
0x701d0300, 0x807c847c,
0x8070ff70, 0x00000400,
0xbf0a7b7c, 0xbf85ffeb,
- 0xbf9c0000, 0xbf820101,
+ 0xbf9c0000, 0xbf8200ee,
0xbef4007e, 0x8675ff7f,
0x0000ffff, 0x8775ff75,
0x00040000, 0xbef60080,
0xbef700ff, 0x00807fac,
- 0x866eff7f, 0x08000000,
- 0x8f6e836e, 0x87776e77,
- 0x866eff7f, 0x70000000,
- 0x8f6e816e, 0x87776e77,
0x866eff7f, 0x04000000,
0xbf84001f, 0xbefe00c1,
0xbeff00c1, 0xb8ef4306,
@@ -2052,41 +2265,56 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
0x00000050, 0xc00b1d37,
0x00000060, 0xc0031e77,
0x00000074, 0xbf8cc07f,
- 0x866fff6d, 0xf8000000,
- 0x8f6f9b6f, 0x8e6f906f,
- 0xbeee0080, 0x876e6f6e,
- 0x866fff6d, 0x04000000,
- 0x8f6f9a6f, 0x8e6f8f6f,
- 0x876e6f6e, 0x866fff7a,
- 0x00800000, 0x8f6f976f,
- 0xb96ef807, 0x866dff6d,
- 0x0000ffff, 0x86fe7e7e,
- 0x86ea6a6a, 0x8f6e837a,
- 0xb96ee0c2, 0xbf800002,
- 0xb97a0002, 0xbf8a0000,
- 0x95806f6c, 0xbf810000,
+ 0x8f6e8b77, 0x866eff6e,
+ 0x001f8000, 0xb96ef807,
+ 0x866dff6d, 0x0000ffff,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0x8f6e837a, 0xb96ee0c2,
+ 0xbf800002, 0xb97a0002,
+ 0xbf8a0000, 0xbe801f6c,
+ 0xbf9b0000, 0x00000000,
};
static const uint32_t cwsr_trap_gfx10_hex[] = {
- 0xbf820001, 0xbf8201cf,
+ 0xbf820001, 0xbf820220,
0xb0804004, 0xb978f802,
- 0x8a788678, 0xb96ef801,
- 0x876eff6e, 0x00000800,
- 0xbf840003, 0x876eff78,
+ 0x8a78ff78, 0x00020006,
+ 0xb97bf803, 0x876eff78,
0x00002000, 0xbf840009,
- 0xb97bf803, 0x876eff7b,
- 0x00000400, 0xbf85001d,
- 0x876eff7b, 0x00000100,
- 0xbf840002, 0x8878ff78,
- 0x00002000, 0xb97af812,
+ 0x876eff6d, 0x00ff0000,
+ 0xbf85001e, 0x876eff7b,
+ 0x00000400, 0xbf850045,
+ 0xbf8e0010, 0xb97bf803,
+ 0xbf82fffa, 0x876eff7b,
+ 0x00000900, 0xbf850015,
+ 0x876eff7b, 0x000071ff,
+ 0xbf840008, 0x876fff7b,
+ 0x00007080, 0xbf840001,
+ 0xbeee1d87, 0xb96ff801,
+ 0x8f6e8c6e, 0x876e6f6e,
+ 0xbf85000a, 0x876eff6d,
+ 0x00ff0000, 0xbf850007,
+ 0xb96ef801, 0x876eff6e,
+ 0x00000800, 0xbf850003,
+ 0x876eff7b, 0x00000400,
+ 0xbf85002a, 0xb97af812,
0xb97bf813, 0x8ffa887a,
- 0xf4051bbd, 0xfa000000,
- 0xbf8cc07f, 0xf4051ebd,
- 0xfa000008, 0xbf8cc07f,
- 0x87ee6e6e, 0xbf840001,
- 0xbe80206e, 0xb97bf803,
- 0x877bff7b, 0x000001ff,
- 0xbf850002, 0x806c846c,
+ 0xbf0d8f7b, 0xbf840002,
+ 0x887bff7b, 0xffff0000,
+ 0xf4011bbd, 0xfa000010,
+ 0xbf8c0000, 0x8f6e976e,
+ 0x8a77ff77, 0x00800000,
+ 0x88776e77, 0xf4051bbd,
+ 0xfa000000, 0xbf8c0000,
+ 0xf4051ebd, 0xfa000008,
+ 0xbf8c0000, 0x87ee6e6e,
+ 0xbf840001, 0xbe80206e,
+ 0x876eff6d, 0x00ff0000,
+ 0xbf850008, 0x876eff6d,
+ 0x01000000, 0xbf850007,
+ 0x8878ff78, 0x00002000,
+ 0x80ec886c, 0x82ed806d,
+ 0xbf820002, 0x806c846c,
0x826d806d, 0x876dff6d,
0x0000ffff, 0x87fe7e7e,
0x87ea6a6a, 0xb9f8f802,
@@ -2094,70 +2322,91 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x0000ffff, 0xbefa0380,
0xb9fa0283, 0xbeee037e,
0xbeef037f, 0xbefe0480,
- 0xbf900004, 0xbf8cc07f,
- 0xb97b02dc, 0x8f7b997b,
- 0x887b7b7f, 0xb97a2a05,
+ 0xbf900004, 0xbf8c0000,
+ 0x877aff7f, 0x04000000,
+ 0x8f7a857a, 0x886d7a6d,
+ 0x7e008200, 0xbefa037e,
+ 0x877bff7f, 0x0000ffff,
+ 0xbefe03c1, 0xbeff03c1,
+ 0xdc5f8000, 0x007a0000,
+ 0x7e000280, 0xbefe037a,
+ 0xbeff037b, 0xb97b02dc,
+ 0x8f7b997b, 0xb97a3a05,
0x807a817a, 0xbf0d997b,
0xbf850002, 0x8f7a897a,
0xbf820001, 0x8f7a8a7a,
- 0x877bff7f, 0x0000ffff,
- 0x807aff7a, 0x00000200,
- 0x807a7e7a, 0x827b807b,
+ 0xb97b1e06, 0x8f7b8a7b,
+ 0x807a7b7a, 0x877bff7f,
+ 0x0000ffff, 0x807aff7a,
+ 0x00000200, 0x807a7e7a,
+ 0x827b807b, 0xd7610000,
+ 0x00010870, 0xd7610000,
+ 0x00010a71, 0xd7610000,
+ 0x00010c72, 0xd7610000,
+ 0x00010e73, 0xd7610000,
+ 0x00011074, 0xd7610000,
+ 0x00011275, 0xd7610000,
+ 0x00011476, 0xd7610000,
+ 0x00011677, 0xd7610000,
+ 0x00011a79, 0xd7610000,
+ 0x00011c7e, 0xd7610000,
+ 0x00011e7f, 0xbefe03ff,
+ 0x00003fff, 0xbeff0380,
+ 0xdc5f8040, 0x007a0000,
+ 0xd760007a, 0x00011d00,
+ 0xd760007b, 0x00011f00,
+ 0xbefe037a, 0xbeff037b,
0xbef4037e, 0x8775ff7f,
0x0000ffff, 0x8875ff75,
0x00040000, 0xbef60380,
0xbef703ff, 0x10807fac,
- 0x877aff7f, 0x08000000,
- 0x907a837a, 0x88777a77,
- 0x877aff7f, 0x70000000,
- 0x907a817a, 0x88777a77,
0xbef1037c, 0xbef00380,
0xb97302dc, 0x8f739973,
- 0x8873737f, 0xbefe03c1,
- 0x907c9973, 0x877c817c,
- 0xbf06817c, 0xbf850002,
- 0xbeff0380, 0xbf820002,
- 0xbeff03c1, 0xbf82000b,
+ 0xbefe03c1, 0x907c9973,
+ 0x877c817c, 0xbf06817c,
+ 0xbf850002, 0xbeff0380,
+ 0xbf820002, 0xbeff03c1,
+ 0xbf820009, 0xbef603ff,
+ 0x01000000, 0xe0704080,
+ 0x705d0100, 0xe0704100,
+ 0x705d0200, 0xe0704180,
+ 0x705d0300, 0xbf820008,
0xbef603ff, 0x01000000,
- 0xe0704000, 0x705d0000,
- 0xe0704080, 0x705d0100,
- 0xe0704100, 0x705d0200,
- 0xe0704180, 0x705d0300,
- 0xbf82000a, 0xbef603ff,
- 0x01000000, 0xe0704000,
- 0x705d0000, 0xe0704100,
- 0x705d0100, 0xe0704200,
- 0x705d0200, 0xe0704300,
- 0x705d0300, 0xb9702a05,
- 0x80708170, 0xbf0d9973,
- 0xbf850002, 0x8f708970,
- 0xbf820001, 0x8f708a70,
- 0xb97a1e06, 0x8f7a8a7a,
- 0x80707a70, 0x8070ff70,
- 0x00000200, 0xbef603ff,
- 0x01000000, 0x7e000280,
- 0x7e020280, 0x7e040280,
- 0xbefc0380, 0xd7610002,
- 0x0000f871, 0x807c817c,
- 0xd7610002, 0x0000f86c,
- 0x807c817c, 0xd7610002,
- 0x0000f86d, 0x807c817c,
- 0xd7610002, 0x0000f86e,
+ 0xe0704100, 0x705d0100,
+ 0xe0704200, 0x705d0200,
+ 0xe0704300, 0x705d0300,
+ 0xb9703a05, 0x80708170,
+ 0xbf0d9973, 0xbf850002,
+ 0x8f708970, 0xbf820001,
+ 0x8f708a70, 0xb97a1e06,
+ 0x8f7a8a7a, 0x80707a70,
+ 0x8070ff70, 0x00000200,
+ 0xbef603ff, 0x01000000,
+ 0x7e000280, 0x7e020280,
+ 0x7e040280, 0xbefc0380,
+ 0xd7610002, 0x0000f871,
0x807c817c, 0xd7610002,
- 0x0000f86f, 0x807c817c,
- 0xd7610002, 0x0000f878,
- 0x807c817c, 0xb97af803,
+ 0x0000f86c, 0x807c817c,
+ 0x8a7aff6d, 0x80000000,
0xd7610002, 0x0000f87a,
0x807c817c, 0xd7610002,
- 0x0000f87b, 0x807c817c,
- 0xb971f801, 0xd7610002,
- 0x0000f871, 0x807c817c,
- 0xb971f814, 0xd7610002,
- 0x0000f871, 0x807c817c,
- 0xb971f815, 0xd7610002,
- 0x0000f871, 0x807c817c,
- 0xbeff0380, 0xe0704000,
- 0x705d0200, 0xb9702a05,
+ 0x0000f86e, 0x807c817c,
+ 0xd7610002, 0x0000f86f,
+ 0x807c817c, 0xd7610002,
+ 0x0000f878, 0x807c817c,
+ 0xb97af803, 0xd7610002,
+ 0x0000f87a, 0x807c817c,
+ 0xd7610002, 0x0000f87b,
+ 0x807c817c, 0xb971f801,
+ 0xd7610002, 0x0000f871,
+ 0x807c817c, 0xb971f814,
+ 0xd7610002, 0x0000f871,
+ 0x807c817c, 0xb971f815,
+ 0xd7610002, 0x0000f871,
+ 0x807c817c, 0xbefe03ff,
+ 0x0000ffff, 0xbeff0380,
+ 0xe0704000, 0x705d0200,
+ 0xbefe03c1, 0xb9703a05,
0x80708170, 0xbf0d9973,
0xbf850002, 0x8f708970,
0xbf820001, 0x8f708a70,
@@ -2226,150 +2475,147 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbf850002, 0xbeff0380,
0xbf820001, 0xbeff03c1,
0xb97b4306, 0x877bc17b,
- 0xbf840044, 0xbf8a0000,
- 0x877aff73, 0x04000000,
- 0xbf840040, 0x8f7b867b,
- 0x8f7b827b, 0xbef6037b,
- 0xb9702a05, 0x80708170,
- 0xbf0d9973, 0xbf850002,
- 0x8f708970, 0xbf820001,
- 0x8f708a70, 0xb97a1e06,
- 0x8f7a8a7a, 0x80707a70,
- 0x8070ff70, 0x00000200,
- 0x8070ff70, 0x00000080,
- 0xbef603ff, 0x01000000,
- 0xd7650000, 0x000100c1,
- 0xd7660000, 0x000200c1,
- 0x16000084, 0x907c9973,
- 0x877c817c, 0xbf06817c,
- 0xbefc0380, 0xbf850012,
- 0xbe8303ff, 0x00000080,
+ 0xbf840043, 0xbf8a0000,
+ 0x877aff6d, 0x80000000,
+ 0xbf84003f, 0x8f7b887b,
+ 0xbef6037b, 0xb9703a05,
+ 0x80708170, 0xbf0d9973,
+ 0xbf850002, 0x8f708970,
+ 0xbf820001, 0x8f708a70,
+ 0xb97a1e06, 0x8f7a8a7a,
+ 0x80707a70, 0x8070ff70,
+ 0x00000200, 0x8070ff70,
+ 0x00000080, 0xbef603ff,
+ 0x01000000, 0xd7650000,
+ 0x000100c1, 0xd7660000,
+ 0x000200c1, 0x16000084,
+ 0x907c9973, 0x877c817c,
+ 0xbf06817c, 0xbefc0380,
+ 0xbf850012, 0xbe8303ff,
+ 0x00000080, 0xbf800000,
0xbf800000, 0xbf800000,
- 0xbf800000, 0xd8d80000,
- 0x01000000, 0xbf8c0000,
- 0xe0704000, 0x705d0100,
- 0x807c037c, 0x80700370,
- 0xd5250000, 0x0001ff00,
- 0x00000080, 0xbf0a7b7c,
- 0xbf85fff4, 0xbf820011,
- 0xbe8303ff, 0x00000100,
+ 0xd8d80000, 0x01000000,
+ 0xbf8c0000, 0xe0704000,
+ 0x705d0100, 0x807c037c,
+ 0x80700370, 0xd5250000,
+ 0x0001ff00, 0x00000080,
+ 0xbf0a7b7c, 0xbf85fff4,
+ 0xbf820011, 0xbe8303ff,
+ 0x00000100, 0xbf800000,
0xbf800000, 0xbf800000,
- 0xbf800000, 0xd8d80000,
- 0x01000000, 0xbf8c0000,
- 0xe0704000, 0x705d0100,
- 0x807c037c, 0x80700370,
- 0xd5250000, 0x0001ff00,
- 0x00000100, 0xbf0a7b7c,
- 0xbf85fff4, 0xbefe03c1,
- 0x907c9973, 0x877c817c,
- 0xbf06817c, 0xbf850004,
- 0xbef003ff, 0x00000200,
- 0xbeff0380, 0xbf820003,
- 0xbef003ff, 0x00000400,
- 0xbeff03c1, 0xb97b2a05,
- 0x807b817b, 0x8f7b827b,
- 0x907c9973, 0x877c817c,
- 0xbf06817c, 0xbf850017,
+ 0xd8d80000, 0x01000000,
+ 0xbf8c0000, 0xe0704000,
+ 0x705d0100, 0x807c037c,
+ 0x80700370, 0xd5250000,
+ 0x0001ff00, 0x00000100,
+ 0xbf0a7b7c, 0xbf85fff4,
+ 0xbefe03c1, 0x907c9973,
+ 0x877c817c, 0xbf06817c,
+ 0xbf850004, 0xbef003ff,
+ 0x00000200, 0xbeff0380,
+ 0xbf820003, 0xbef003ff,
+ 0x00000400, 0xbeff03c1,
+ 0xb97b3a05, 0x807b817b,
+ 0x8f7b827b, 0x907c9973,
+ 0x877c817c, 0xbf06817c,
+ 0xbf850017, 0xbef603ff,
+ 0x01000000, 0xbefc0384,
+ 0xbf0a7b7c, 0xbf840037,
+ 0x7e008700, 0x7e028701,
+ 0x7e048702, 0x7e068703,
+ 0xe0704000, 0x705d0000,
+ 0xe0704080, 0x705d0100,
+ 0xe0704100, 0x705d0200,
+ 0xe0704180, 0x705d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000200, 0xbf0a7b7c,
+ 0xbf85ffef, 0xbf820025,
0xbef603ff, 0x01000000,
0xbefc0384, 0xbf0a7b7c,
- 0xbf840037, 0x7e008700,
+ 0xbf840011, 0x7e008700,
0x7e028701, 0x7e048702,
0x7e068703, 0xe0704000,
- 0x705d0000, 0xe0704080,
- 0x705d0100, 0xe0704100,
- 0x705d0200, 0xe0704180,
+ 0x705d0000, 0xe0704100,
+ 0x705d0100, 0xe0704200,
+ 0x705d0200, 0xe0704300,
0x705d0300, 0x807c847c,
- 0x8070ff70, 0x00000200,
+ 0x8070ff70, 0x00000400,
0xbf0a7b7c, 0xbf85ffef,
- 0xbf820025, 0xbef603ff,
- 0x01000000, 0xbefc0384,
- 0xbf0a7b7c, 0xbf840020,
- 0x7e008700, 0x7e028701,
- 0x7e048702, 0x7e068703,
+ 0xb97b1e06, 0x877bc17b,
+ 0xbf84000c, 0x8f7b837b,
+ 0x807b7c7b, 0xbefe03c1,
+ 0xbeff0380, 0x7e008700,
0xe0704000, 0x705d0000,
- 0xe0704100, 0x705d0100,
- 0xe0704200, 0x705d0200,
- 0xe0704300, 0x705d0300,
- 0x807c847c, 0x8070ff70,
- 0x00000400, 0xbf0a7b7c,
- 0xbf85ffef, 0xb97b1e06,
- 0x877bc17b, 0xbf84000c,
- 0x8f7b837b, 0x807b7c7b,
- 0xbefe03c1, 0xbeff0380,
- 0x7e008700, 0xe0704000,
- 0x705d0000, 0x807c817c,
- 0x8070ff70, 0x00000080,
- 0xbf0a7b7c, 0xbf85fff8,
- 0xbf82013c, 0xbef4037e,
- 0x8775ff7f, 0x0000ffff,
- 0x8875ff75, 0x00040000,
- 0xbef60380, 0xbef703ff,
- 0x10807fac, 0x876eff7f,
- 0x08000000, 0x906e836e,
- 0x88776e77, 0x876eff7f,
- 0x70000000, 0x906e816e,
- 0x88776e77, 0xb97202dc,
- 0x8f729972, 0x8872727f,
+ 0x807c817c, 0x8070ff70,
+ 0x00000080, 0xbf0a7b7c,
+ 0xbf85fff8, 0xbf820136,
+ 0xbef4037e, 0x8775ff7f,
+ 0x0000ffff, 0x8875ff75,
+ 0x00040000, 0xbef60380,
+ 0xbef703ff, 0x10807fac,
+ 0xb97202dc, 0x8f729972,
0x876eff7f, 0x04000000,
- 0xbf840034, 0xbefe03c1,
+ 0xbf840033, 0xbefe03c1,
0x907c9972, 0x877c817c,
0xbf06817c, 0xbf850002,
0xbeff0380, 0xbf820001,
0xbeff03c1, 0xb96f4306,
- 0x876fc16f, 0xbf840029,
- 0x8f6f866f, 0x8f6f826f,
- 0xbef6036f, 0xb9782a05,
- 0x80788178, 0xbf0d9972,
- 0xbf850002, 0x8f788978,
- 0xbf820001, 0x8f788a78,
- 0xb96e1e06, 0x8f6e8a6e,
- 0x80786e78, 0x8078ff78,
- 0x00000200, 0x8078ff78,
- 0x00000080, 0xbef603ff,
- 0x01000000, 0x907c9972,
- 0x877c817c, 0xbf06817c,
- 0xbefc0380, 0xbf850009,
- 0xe0310000, 0x781d0000,
- 0x807cff7c, 0x00000080,
+ 0x876fc16f, 0xbf840028,
+ 0x8f6f886f, 0xbef6036f,
+ 0xb9783a05, 0x80788178,
+ 0xbf0d9972, 0xbf850002,
+ 0x8f788978, 0xbf820001,
+ 0x8f788a78, 0xb96e1e06,
+ 0x8f6e8a6e, 0x80786e78,
+ 0x8078ff78, 0x00000200,
0x8078ff78, 0x00000080,
- 0xbf0a6f7c, 0xbf85fff8,
- 0xbf820008, 0xe0310000,
+ 0xbef603ff, 0x01000000,
+ 0x907c9972, 0x877c817c,
+ 0xbf06817c, 0xbefc0380,
+ 0xbf850009, 0xe0310000,
0x781d0000, 0x807cff7c,
- 0x00000100, 0x8078ff78,
- 0x00000100, 0xbf0a6f7c,
- 0xbf85fff8, 0xbef80380,
- 0xbefe03c1, 0x907c9972,
- 0x877c817c, 0xbf06817c,
- 0xbf850002, 0xbeff0380,
- 0xbf820001, 0xbeff03c1,
- 0xb96f2a05, 0x806f816f,
- 0x8f6f826f, 0x907c9972,
- 0x877c817c, 0xbf06817c,
- 0xbf850021, 0xbef603ff,
- 0x01000000, 0xbeee0378,
+ 0x00000080, 0x8078ff78,
+ 0x00000080, 0xbf0a6f7c,
+ 0xbf85fff8, 0xbf820008,
+ 0xe0310000, 0x781d0000,
+ 0x807cff7c, 0x00000100,
+ 0x8078ff78, 0x00000100,
+ 0xbf0a6f7c, 0xbf85fff8,
+ 0xbef80380, 0xbefe03c1,
+ 0x907c9972, 0x877c817c,
+ 0xbf06817c, 0xbf850002,
+ 0xbeff0380, 0xbf820001,
+ 0xbeff03c1, 0xb96f3a05,
+ 0x806f816f, 0x8f6f826f,
+ 0x907c9972, 0x877c817c,
+ 0xbf06817c, 0xbf850024,
+ 0xbef603ff, 0x01000000,
+ 0xbeee0378, 0x8078ff78,
+ 0x00000200, 0xbefc0384,
+ 0xbf0a6f7c, 0xbf840050,
+ 0xe0304000, 0x785d0000,
+ 0xe0304080, 0x785d0100,
+ 0xe0304100, 0x785d0200,
+ 0xe0304180, 0x785d0300,
+ 0xbf8c0000, 0x7e008500,
+ 0x7e028501, 0x7e048502,
+ 0x7e068503, 0x807c847c,
0x8078ff78, 0x00000200,
- 0xbefc0384, 0xe0304000,
- 0x785d0000, 0xe0304080,
- 0x785d0100, 0xe0304100,
- 0x785d0200, 0xe0304180,
- 0x785d0300, 0xbf8c3f70,
- 0x7e008500, 0x7e028501,
- 0x7e048502, 0x7e068503,
- 0x807c847c, 0x8078ff78,
- 0x00000200, 0xbf0a6f7c,
- 0xbf85ffee, 0xe0304000,
- 0x6e5d0000, 0xe0304080,
- 0x6e5d0100, 0xe0304100,
- 0x6e5d0200, 0xe0304180,
- 0x6e5d0300, 0xbf820032,
+ 0xbf0a6f7c, 0xbf85ffee,
+ 0xe0304000, 0x6e5d0000,
+ 0xe0304080, 0x6e5d0100,
+ 0xe0304100, 0x6e5d0200,
+ 0xe0304180, 0x6e5d0300,
+ 0xbf8c0000, 0xbf820034,
0xbef603ff, 0x01000000,
0xbeee0378, 0x8078ff78,
0x00000400, 0xbefc0384,
+ 0xbf0a6f7c, 0xbf840012,
0xe0304000, 0x785d0000,
0xe0304100, 0x785d0100,
0xe0304200, 0x785d0200,
0xe0304300, 0x785d0300,
- 0xbf8c3f70, 0x7e008500,
+ 0xbf8c0000, 0x7e008500,
0x7e028501, 0x7e048502,
0x7e068503, 0x807c847c,
0x8078ff78, 0x00000400,
@@ -2378,7 +2624,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbf84000e, 0x8f6f836f,
0x806f7c6f, 0xbefe03c1,
0xbeff0380, 0xe0304000,
- 0x785d0000, 0xbf8c3f70,
+ 0x785d0000, 0xbf8c0000,
0x7e008500, 0x807c817c,
0x8078ff78, 0x00000080,
0xbf0a6f7c, 0xbf85fff7,
@@ -2386,8 +2632,8 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x6e5d0000, 0xe0304100,
0x6e5d0100, 0xe0304200,
0x6e5d0200, 0xe0304300,
- 0x6e5d0300, 0xbf8c3f70,
- 0xb9782a05, 0x80788178,
+ 0x6e5d0300, 0xbf8c0000,
+ 0xb9783a05, 0x80788178,
0xbf0d9972, 0xbf850002,
0x8f788978, 0xbf820001,
0x8f788a78, 0xb96e1e06,
@@ -2397,16 +2643,16 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbef603ff, 0x01000000,
0xbefc03ff, 0x0000006c,
0x80f89078, 0xf429003a,
- 0xf0000000, 0xbf8cc07f,
+ 0xf0000000, 0xbf8c0000,
0x80fc847c, 0xbf800000,
0xbe803100, 0xbe823102,
0x80f8a078, 0xf42d003a,
- 0xf0000000, 0xbf8cc07f,
+ 0xf0000000, 0xbf8c0000,
0x80fc887c, 0xbf800000,
0xbe803100, 0xbe823102,
0xbe843104, 0xbe863106,
0x80f8c078, 0xf431003a,
- 0xf0000000, 0xbf8cc07f,
+ 0xf0000000, 0xbf8c0000,
0x80fc907c, 0xbf800000,
0xbe803100, 0xbe823102,
0xbe843104, 0xbe863106,
@@ -2414,7 +2660,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbe8c310c, 0xbe8e310e,
0xbf06807c, 0xbf84fff0,
0xba80f801, 0x00000000,
- 0xbf8a0000, 0xb9782a05,
+ 0xbf8a0000, 0xb9783a05,
0x80788178, 0xbf0d9972,
0xbf850002, 0x8f788978,
0xbf820001, 0x8f788a78,
@@ -2436,30 +2682,1975 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x80788478, 0xf4211cfa,
0xf0000000, 0x80788478,
0xf4211bba, 0xf0000000,
- 0x80788478, 0xbf8cc07f,
+ 0x80788478, 0xbf8c0000,
0xb9eef814, 0xf4211bba,
0xf0000000, 0x80788478,
- 0xbf8cc07f, 0xb9eef815,
+ 0xbf8c0000, 0xb9eef815,
0xbefc036f, 0xbefe0370,
- 0xbeff0371, 0x876f7bff,
- 0x000003ff, 0xb9ef4803,
- 0x876f7bff, 0xfffff800,
- 0x906f8b6f, 0xb9efa2c3,
- 0xb9f3f801, 0xb96e2a05,
+ 0xbeff0371, 0xb9fb4803,
+ 0x907b8b7b, 0xb9fba2c3,
+ 0xb9f3f801, 0xb96e3a05,
0x806e816e, 0xbf0d9972,
0xbf850002, 0x8f6e896e,
0xbf820001, 0x8f6e8a6e,
+ 0xb96f1e06, 0x8f6f8a6f,
+ 0x806e6f6e, 0x806eff6e,
+ 0x00000200, 0x806e746e,
+ 0x826f8075, 0x876fff6f,
+ 0x0000ffff, 0xf4091c37,
+ 0xfa000050, 0xf4091d37,
+ 0xfa000060, 0xf4011e77,
+ 0xfa000074, 0xbf8c0000,
+ 0x876dff6d, 0x0000ffff,
+ 0x87fe7e7e, 0x87ea6a6a,
+ 0xb9faf802, 0xbe80226c,
+ 0xbf9b0000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+};
+
+static const uint32_t cwsr_trap_gfx11_hex[] = {
+ 0xbfa00001, 0xbfa00227,
+ 0xb0804006, 0xb8f8f802,
+ 0x9178ff78, 0x00020006,
+ 0xb8fbf803, 0xbf0d9e6d,
+ 0xbfa10001, 0xbfbd0000,
+ 0xbf0d9f6d, 0xbfa20006,
+ 0x8b6eff78, 0x00002000,
+ 0xbfa10009, 0x8b6eff6d,
+ 0x00ff0000, 0xbfa2001e,
+ 0x8b6eff7b, 0x00000400,
+ 0xbfa20045, 0xbf830010,
+ 0xb8fbf803, 0xbfa0fffa,
+ 0x8b6eff7b, 0x00160900,
+ 0xbfa20015, 0x8b6eff7b,
+ 0x000071ff, 0xbfa10008,
+ 0x8b6fff7b, 0x00007080,
+ 0xbfa10001, 0xbeee1287,
+ 0xb8eff801, 0x846e8c6e,
+ 0x8b6e6f6e, 0xbfa2000a,
+ 0x8b6eff6d, 0x00ff0000,
+ 0xbfa20007, 0xb8eef801,
+ 0x8b6eff6e, 0x00000800,
+ 0xbfa20003, 0x8b6eff7b,
+ 0x00000400, 0xbfa2002a,
+ 0xbefa4d82, 0xbf890000,
+ 0x84fa887a, 0xbf0d8f7b,
+ 0xbfa10002, 0x8c7bff7b,
+ 0xffff0000, 0xf4005bbd,
+ 0xf8000010, 0xbf890000,
+ 0x846e976e, 0x9177ff77,
+ 0x00800000, 0x8c776e77,
+ 0xf4045bbd, 0xf8000000,
+ 0xbf890000, 0xf4045ebd,
+ 0xf8000008, 0xbf890000,
+ 0x8bee6e6e, 0xbfa10001,
+ 0xbe80486e, 0x8b6eff6d,
+ 0x00ff0000, 0xbfa20008,
+ 0x8b6eff6d, 0x01000000,
+ 0xbfa20007, 0x8c78ff78,
+ 0x00002000, 0x80ec886c,
+ 0x82ed806d, 0xbfa00002,
+ 0x806c846c, 0x826d806d,
+ 0x8b6dff6d, 0x0000ffff,
+ 0x8bfe7e7e, 0x8bea6a6a,
+ 0xb978f802, 0xbe804a6c,
+ 0xbf0d9878, 0xbfa10001,
+ 0xbfb00000, 0x8b6dff6d,
+ 0x0000ffff, 0xbefa0080,
+ 0xb97a0283, 0xbeee007e,
+ 0xbeef007f, 0xbefe0180,
+ 0xbefe4d84, 0xbf890000,
+ 0x8b7aff7f, 0x04000000,
+ 0x847a857a, 0x8c6d7a6d,
+ 0xbefa007e, 0x8b7bff7f,
+ 0x0000ffff, 0xbefe00c1,
+ 0xbeff00c1, 0xdca6c000,
+ 0x007a0000, 0x7e000280,
+ 0xbefe007a, 0xbeff007b,
+ 0xb8fb02dc, 0x847b997b,
+ 0xb8fa3b05, 0x807a817a,
+ 0xbf0d997b, 0xbfa20002,
+ 0x847a897a, 0xbfa00001,
+ 0x847a8a7a, 0xb8fb1e06,
+ 0x847b8a7b, 0x807a7b7a,
+ 0x8b7bff7f, 0x0000ffff,
+ 0x807aff7a, 0x00000200,
+ 0x807a7e7a, 0x827b807b,
+ 0xd7610000, 0x00010870,
+ 0xd7610000, 0x00010a71,
+ 0xd7610000, 0x00010c72,
+ 0xd7610000, 0x00010e73,
+ 0xd7610000, 0x00011074,
+ 0xd7610000, 0x00011275,
+ 0xd7610000, 0x00011476,
+ 0xd7610000, 0x00011677,
+ 0xd7610000, 0x00011a79,
+ 0xd7610000, 0x00011c7e,
+ 0xd7610000, 0x00011e7f,
+ 0xbefe00ff, 0x00003fff,
+ 0xbeff0080, 0xdca6c040,
+ 0x007a0000, 0xd760007a,
+ 0x00011d00, 0xd760007b,
+ 0x00011f00, 0xbefe007a,
+ 0xbeff007b, 0xbef4007e,
+ 0x8b75ff7f, 0x0000ffff,
+ 0x8c75ff75, 0x00040000,
+ 0xbef60080, 0xbef700ff,
+ 0x10807fac, 0xbef1007d,
+ 0xbef00080, 0xb8f302dc,
+ 0x84739973, 0xbefe00c1,
+ 0x857d9973, 0x8b7d817d,
+ 0xbf06817d, 0xbfa20002,
+ 0xbeff0080, 0xbfa00002,
+ 0xbeff00c1, 0xbfa00009,
+ 0xbef600ff, 0x01000000,
+ 0xe0685080, 0x701d0100,
+ 0xe0685100, 0x701d0200,
+ 0xe0685180, 0x701d0300,
+ 0xbfa00008, 0xbef600ff,
+ 0x01000000, 0xe0685100,
+ 0x701d0100, 0xe0685200,
+ 0x701d0200, 0xe0685300,
+ 0x701d0300, 0xb8f03b05,
+ 0x80708170, 0xbf0d9973,
+ 0xbfa20002, 0x84708970,
+ 0xbfa00001, 0x84708a70,
+ 0xb8fa1e06, 0x847a8a7a,
+ 0x80707a70, 0x8070ff70,
+ 0x00000200, 0xbef600ff,
+ 0x01000000, 0x7e000280,
+ 0x7e020280, 0x7e040280,
+ 0xbefd0080, 0xd7610002,
+ 0x0000fa71, 0x807d817d,
+ 0xd7610002, 0x0000fa6c,
+ 0x807d817d, 0x917aff6d,
+ 0x80000000, 0xd7610002,
+ 0x0000fa7a, 0x807d817d,
+ 0xd7610002, 0x0000fa6e,
+ 0x807d817d, 0xd7610002,
+ 0x0000fa6f, 0x807d817d,
+ 0xd7610002, 0x0000fa78,
+ 0x807d817d, 0xb8faf803,
+ 0xd7610002, 0x0000fa7a,
+ 0x807d817d, 0xd7610002,
+ 0x0000fa7b, 0x807d817d,
+ 0xb8f1f801, 0xd7610002,
+ 0x0000fa71, 0x807d817d,
+ 0xb8f1f814, 0xd7610002,
+ 0x0000fa71, 0x807d817d,
+ 0xb8f1f815, 0xd7610002,
+ 0x0000fa71, 0x807d817d,
+ 0xbefe00ff, 0x0000ffff,
+ 0xbeff0080, 0xe0685000,
+ 0x701d0200, 0xbefe00c1,
+ 0xb8f03b05, 0x80708170,
+ 0xbf0d9973, 0xbfa20002,
+ 0x84708970, 0xbfa00001,
+ 0x84708a70, 0xb8fa1e06,
+ 0x847a8a7a, 0x80707a70,
+ 0xbef600ff, 0x01000000,
+ 0xbef90080, 0xbefd0080,
+ 0xbf800000, 0xbe804100,
+ 0xbe824102, 0xbe844104,
+ 0xbe864106, 0xbe884108,
+ 0xbe8a410a, 0xbe8c410c,
+ 0xbe8e410e, 0xd7610002,
+ 0x0000f200, 0x80798179,
+ 0xd7610002, 0x0000f201,
+ 0x80798179, 0xd7610002,
+ 0x0000f202, 0x80798179,
+ 0xd7610002, 0x0000f203,
+ 0x80798179, 0xd7610002,
+ 0x0000f204, 0x80798179,
+ 0xd7610002, 0x0000f205,
+ 0x80798179, 0xd7610002,
+ 0x0000f206, 0x80798179,
+ 0xd7610002, 0x0000f207,
+ 0x80798179, 0xd7610002,
+ 0x0000f208, 0x80798179,
+ 0xd7610002, 0x0000f209,
+ 0x80798179, 0xd7610002,
+ 0x0000f20a, 0x80798179,
+ 0xd7610002, 0x0000f20b,
+ 0x80798179, 0xd7610002,
+ 0x0000f20c, 0x80798179,
+ 0xd7610002, 0x0000f20d,
+ 0x80798179, 0xd7610002,
+ 0x0000f20e, 0x80798179,
+ 0xd7610002, 0x0000f20f,
+ 0x80798179, 0xbf06a079,
+ 0xbfa10006, 0xe0685000,
+ 0x701d0200, 0x8070ff70,
+ 0x00000080, 0xbef90080,
+ 0x7e040280, 0x807d907d,
+ 0xbf0aff7d, 0x00000060,
+ 0xbfa2ffbc, 0xbe804100,
+ 0xbe824102, 0xbe844104,
+ 0xbe864106, 0xbe884108,
+ 0xbe8a410a, 0xd7610002,
+ 0x0000f200, 0x80798179,
+ 0xd7610002, 0x0000f201,
+ 0x80798179, 0xd7610002,
+ 0x0000f202, 0x80798179,
+ 0xd7610002, 0x0000f203,
+ 0x80798179, 0xd7610002,
+ 0x0000f204, 0x80798179,
+ 0xd7610002, 0x0000f205,
+ 0x80798179, 0xd7610002,
+ 0x0000f206, 0x80798179,
+ 0xd7610002, 0x0000f207,
+ 0x80798179, 0xd7610002,
+ 0x0000f208, 0x80798179,
+ 0xd7610002, 0x0000f209,
+ 0x80798179, 0xd7610002,
+ 0x0000f20a, 0x80798179,
+ 0xd7610002, 0x0000f20b,
+ 0x80798179, 0xe0685000,
+ 0x701d0200, 0xbefe00c1,
+ 0x857d9973, 0x8b7d817d,
+ 0xbf06817d, 0xbfa20002,
+ 0xbeff0080, 0xbfa00001,
+ 0xbeff00c1, 0xb8fb4306,
+ 0x8b7bc17b, 0xbfa10043,
+ 0xbfbd0000, 0x8b7aff6d,
+ 0x80000000, 0xbfa1003f,
+ 0x847b887b, 0xbef6007b,
+ 0xb8f03b05, 0x80708170,
+ 0xbf0d9973, 0xbfa20002,
+ 0x84708970, 0xbfa00001,
+ 0x84708a70, 0xb8fa1e06,
+ 0x847a8a7a, 0x80707a70,
+ 0x8070ff70, 0x00000200,
+ 0x8070ff70, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xd71f0000, 0x000100c1,
+ 0xd7200000, 0x000200c1,
+ 0x16000084, 0x857d9973,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbefd0080, 0xbfa20012,
+ 0xbe8300ff, 0x00000080,
+ 0xbf800000, 0xbf800000,
+ 0xbf800000, 0xd8d80000,
+ 0x01000000, 0xbf890000,
+ 0xe0685000, 0x701d0100,
+ 0x807d037d, 0x80700370,
+ 0xd5250000, 0x0001ff00,
+ 0x00000080, 0xbf0a7b7d,
+ 0xbfa2fff4, 0xbfa00011,
+ 0xbe8300ff, 0x00000100,
+ 0xbf800000, 0xbf800000,
+ 0xbf800000, 0xd8d80000,
+ 0x01000000, 0xbf890000,
+ 0xe0685000, 0x701d0100,
+ 0x807d037d, 0x80700370,
+ 0xd5250000, 0x0001ff00,
+ 0x00000100, 0xbf0a7b7d,
+ 0xbfa2fff4, 0xbefe00c1,
+ 0x857d9973, 0x8b7d817d,
+ 0xbf06817d, 0xbfa20004,
+ 0xbef000ff, 0x00000200,
+ 0xbeff0080, 0xbfa00003,
+ 0xbef000ff, 0x00000400,
+ 0xbeff00c1, 0xb8fb3b05,
+ 0x807b817b, 0x847b827b,
+ 0x857d9973, 0x8b7d817d,
+ 0xbf06817d, 0xbfa20017,
+ 0xbef600ff, 0x01000000,
+ 0xbefd0084, 0xbf0a7b7d,
+ 0xbfa10037, 0x7e008700,
+ 0x7e028701, 0x7e048702,
+ 0x7e068703, 0xe0685000,
+ 0x701d0000, 0xe0685080,
+ 0x701d0100, 0xe0685100,
+ 0x701d0200, 0xe0685180,
+ 0x701d0300, 0x807d847d,
+ 0x8070ff70, 0x00000200,
+ 0xbf0a7b7d, 0xbfa2ffef,
+ 0xbfa00025, 0xbef600ff,
+ 0x01000000, 0xbefd0084,
+ 0xbf0a7b7d, 0xbfa10011,
+ 0x7e008700, 0x7e028701,
+ 0x7e048702, 0x7e068703,
+ 0xe0685000, 0x701d0000,
+ 0xe0685100, 0x701d0100,
+ 0xe0685200, 0x701d0200,
+ 0xe0685300, 0x701d0300,
+ 0x807d847d, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7d,
+ 0xbfa2ffef, 0xb8fb1e06,
+ 0x8b7bc17b, 0xbfa1000c,
+ 0x847b837b, 0x807b7d7b,
+ 0xbefe00c1, 0xbeff0080,
+ 0x7e008700, 0xe0685000,
+ 0x701d0000, 0x807d817d,
+ 0x8070ff70, 0x00000080,
+ 0xbf0a7b7d, 0xbfa2fff8,
+ 0xbfa00143, 0xbef4007e,
+ 0x8b75ff7f, 0x0000ffff,
+ 0x8c75ff75, 0x00040000,
+ 0xbef60080, 0xbef700ff,
+ 0x10807fac, 0xb8f202dc,
+ 0x84729972, 0x8b6eff7f,
+ 0x04000000, 0xbfa10039,
+ 0xbefe00c1, 0x857d9972,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbfa20002, 0xbeff0080,
+ 0xbfa00001, 0xbeff00c1,
+ 0xb8ef4306, 0x8b6fc16f,
+ 0xbfa1002e, 0x846f886f,
+ 0xbef6006f, 0xb8f83b05,
+ 0x80788178, 0xbf0d9972,
+ 0xbfa20002, 0x84788978,
+ 0xbfa00001, 0x84788a78,
+ 0xb8ee1e06, 0x846e8a6e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000200, 0x8078ff78,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0x857d9972,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbefd0080, 0xbfa2000c,
+ 0xe0500000, 0x781d0000,
+ 0xbf890000, 0xdac00000,
+ 0x00000000, 0x807dff7d,
+ 0x00000080, 0x8078ff78,
+ 0x00000080, 0xbf0a6f7d,
+ 0xbfa2fff5, 0xbfa0000b,
+ 0xe0500000, 0x781d0000,
+ 0xbf890000, 0xdac00000,
+ 0x00000000, 0x807dff7d,
+ 0x00000100, 0x8078ff78,
+ 0x00000100, 0xbf0a6f7d,
+ 0xbfa2fff5, 0xbef80080,
+ 0xbefe00c1, 0x857d9972,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbfa20002, 0xbeff0080,
+ 0xbfa00001, 0xbeff00c1,
+ 0xb8ef3b05, 0x806f816f,
+ 0x846f826f, 0x857d9972,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbfa20024, 0xbef600ff,
+ 0x01000000, 0xbeee0078,
+ 0x8078ff78, 0x00000200,
+ 0xbefd0084, 0xbf0a6f7d,
+ 0xbfa10050, 0xe0505000,
+ 0x781d0000, 0xe0505080,
+ 0x781d0100, 0xe0505100,
+ 0x781d0200, 0xe0505180,
+ 0x781d0300, 0xbf890000,
+ 0x7e008500, 0x7e028501,
+ 0x7e048502, 0x7e068503,
+ 0x807d847d, 0x8078ff78,
+ 0x00000200, 0xbf0a6f7d,
+ 0xbfa2ffee, 0xe0505000,
+ 0x6e1d0000, 0xe0505080,
+ 0x6e1d0100, 0xe0505100,
+ 0x6e1d0200, 0xe0505180,
+ 0x6e1d0300, 0xbf890000,
+ 0xbfa00034, 0xbef600ff,
+ 0x01000000, 0xbeee0078,
+ 0x8078ff78, 0x00000400,
+ 0xbefd0084, 0xbf0a6f7d,
+ 0xbfa10012, 0xe0505000,
+ 0x781d0000, 0xe0505100,
+ 0x781d0100, 0xe0505200,
+ 0x781d0200, 0xe0505300,
+ 0x781d0300, 0xbf890000,
+ 0x7e008500, 0x7e028501,
+ 0x7e048502, 0x7e068503,
+ 0x807d847d, 0x8078ff78,
+ 0x00000400, 0xbf0a6f7d,
+ 0xbfa2ffee, 0xb8ef1e06,
+ 0x8b6fc16f, 0xbfa1000e,
+ 0x846f836f, 0x806f7d6f,
+ 0xbefe00c1, 0xbeff0080,
+ 0xe0505000, 0x781d0000,
+ 0xbf890000, 0x7e008500,
+ 0x807d817d, 0x8078ff78,
+ 0x00000080, 0xbf0a6f7d,
+ 0xbfa2fff7, 0xbeff00c1,
+ 0xe0505000, 0x6e1d0000,
+ 0xe0505100, 0x6e1d0100,
+ 0xe0505200, 0x6e1d0200,
+ 0xe0505300, 0x6e1d0300,
+ 0xbf890000, 0xb8f83b05,
+ 0x80788178, 0xbf0d9972,
+ 0xbfa20002, 0x84788978,
+ 0xbfa00001, 0x84788a78,
+ 0xb8ee1e06, 0x846e8a6e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000200, 0x80f8ff78,
+ 0x00000050, 0xbef600ff,
+ 0x01000000, 0xbefd00ff,
+ 0x0000006c, 0x80f89078,
+ 0xf428403a, 0xf0000000,
+ 0xbf890000, 0x80fd847d,
+ 0xbf800000, 0xbe804300,
+ 0xbe824302, 0x80f8a078,
+ 0xf42c403a, 0xf0000000,
+ 0xbf890000, 0x80fd887d,
+ 0xbf800000, 0xbe804300,
+ 0xbe824302, 0xbe844304,
+ 0xbe864306, 0x80f8c078,
+ 0xf430403a, 0xf0000000,
+ 0xbf890000, 0x80fd907d,
+ 0xbf800000, 0xbe804300,
+ 0xbe824302, 0xbe844304,
+ 0xbe864306, 0xbe884308,
+ 0xbe8a430a, 0xbe8c430c,
+ 0xbe8e430e, 0xbf06807d,
+ 0xbfa1fff0, 0xb980f801,
+ 0x00000000, 0xbfbd0000,
+ 0xb8f83b05, 0x80788178,
+ 0xbf0d9972, 0xbfa20002,
+ 0x84788978, 0xbfa00001,
+ 0x84788a78, 0xb8ee1e06,
+ 0x846e8a6e, 0x80786e78,
+ 0x8078ff78, 0x00000200,
+ 0xbef600ff, 0x01000000,
+ 0xf4205bfa, 0xf0000000,
+ 0x80788478, 0xf4205b3a,
+ 0xf0000000, 0x80788478,
+ 0xf4205b7a, 0xf0000000,
+ 0x80788478, 0xf4205c3a,
+ 0xf0000000, 0x80788478,
+ 0xf4205c7a, 0xf0000000,
+ 0x80788478, 0xf4205eba,
+ 0xf0000000, 0x80788478,
+ 0xf4205efa, 0xf0000000,
+ 0x80788478, 0xf4205e7a,
+ 0xf0000000, 0x80788478,
+ 0xf4205cfa, 0xf0000000,
+ 0x80788478, 0xf4205bba,
+ 0xf0000000, 0x80788478,
+ 0xbf890000, 0xb96ef814,
+ 0xf4205bba, 0xf0000000,
+ 0x80788478, 0xbf890000,
+ 0xb96ef815, 0xbefd006f,
+ 0xbefe0070, 0xbeff0071,
+ 0xb97b4803, 0x857b8b7b,
+ 0xb97b22c3, 0x857b867b,
+ 0xb97b7443, 0xb973f801,
+ 0xb8ee3b05, 0x806e816e,
+ 0xbf0d9972, 0xbfa20002,
+ 0x846e896e, 0xbfa00001,
+ 0x846e8a6e, 0xb8ef1e06,
+ 0x846f8a6f, 0x806e6f6e,
0x806eff6e, 0x00000200,
0x806e746e, 0x826f8075,
- 0x876fff6f, 0x0000ffff,
- 0xf4091c37, 0xfa000050,
- 0xf4091d37, 0xfa000060,
- 0xf4011e77, 0xfa000074,
- 0xbf8cc07f, 0x876dff6d,
- 0x0000ffff, 0x87fe7e7e,
- 0x87ea6a6a, 0xb9faf802,
- 0xbe80226c, 0xbf810000,
+ 0x8b6fff6f, 0x0000ffff,
+ 0xf4085c37, 0xf8000050,
+ 0xf4085d37, 0xf8000060,
+ 0xf4005e77, 0xf8000074,
+ 0xbf890000, 0x8b6dff6d,
+ 0x0000ffff, 0x8bfe7e7e,
+ 0x8bea6a6a, 0xb8eef802,
+ 0xbf0d866e, 0xbfa20002,
+ 0xb97af802, 0xbe80486c,
+ 0xb97af802, 0xbe804a6c,
+ 0xbfb10000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+};
+
+static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
+ 0xbf820001, 0xbf8202dc,
+ 0xb8f8f802, 0x8978ff78,
+ 0x00020006, 0xb8fbf803,
+ 0x866eff78, 0x00002000,
+ 0xbf840009, 0x866eff6d,
+ 0x00ff0000, 0xbf85001a,
+ 0x866eff7b, 0x00000400,
+ 0xbf850051, 0xbf8e0010,
+ 0xb8fbf803, 0xbf82fffa,
+ 0x866eff7b, 0x03c00900,
+ 0xbf850011, 0x866eff7b,
+ 0x000071ff, 0xbf840008,
+ 0x866fff7b, 0x00007080,
+ 0xbf840001, 0xbeee1a87,
+ 0xb8eff801, 0x8e6e8c6e,
+ 0x866e6f6e, 0xbf850006,
+ 0x866eff6d, 0x00ff0000,
+ 0xbf850003, 0x866eff7b,
+ 0x00000400, 0xbf85003a,
+ 0xb8faf807, 0x867aff7a,
+ 0x001f8000, 0x8e7a8b7a,
+ 0x8979ff79, 0xfc000000,
+ 0x87797a79, 0xba7ff807,
+ 0x00000000, 0xb8faf812,
+ 0xb8fbf813, 0x8efa887a,
+ 0xbf0d8f7b, 0xbf840002,
+ 0x877bff7b, 0xffff0000,
+ 0xc0031bbd, 0x00000010,
+ 0xbf8cc07f, 0x8e6e976e,
+ 0x8979ff79, 0x00800000,
+ 0x87796e79, 0xc0071bbd,
+ 0x00000000, 0xbf8cc07f,
+ 0xc0071ebd, 0x00000008,
+ 0xbf8cc07f, 0x86ee6e6e,
+ 0xbf840001, 0xbe801d6e,
+ 0x866eff6d, 0x01ff0000,
+ 0xbf850005, 0x8778ff78,
+ 0x00002000, 0x80ec886c,
+ 0x82ed806d, 0xbf820005,
+ 0x866eff6d, 0x01000000,
+ 0xbf850002, 0x806c846c,
+ 0x826d806d, 0x866dff6d,
+ 0x0000ffff, 0x8f7a8b79,
+ 0x867aff7a, 0x001f8000,
+ 0xb97af807, 0x86fe7e7e,
+ 0x86ea6a6a, 0x8f6e8378,
+ 0xb96ee0c2, 0xbf800002,
+ 0xb9780002, 0xbe801f6c,
+ 0x866dff6d, 0x0000ffff,
+ 0xbefa0080, 0xb97a0283,
+ 0xb8faf807, 0x867aff7a,
+ 0x001f8000, 0x8e7a8b7a,
+ 0x8979ff79, 0xfc000000,
+ 0x87797a79, 0xba7ff807,
+ 0x00000000, 0xbeee007e,
+ 0xbeef007f, 0xbefe0180,
+ 0xbf900004, 0x877a8478,
+ 0xb97af802, 0xbf8e0002,
+ 0xbf88fffe, 0xb8fa2985,
+ 0x807a817a, 0x8e7a8a7a,
+ 0x8e7a817a, 0xb8fb1605,
+ 0x807b817b, 0x8e7b867b,
+ 0x807a7b7a, 0x807a7e7a,
+ 0x827b807f, 0x867bff7b,
+ 0x0000ffff, 0xc04b1c3d,
+ 0x00000050, 0xbf8cc07f,
+ 0xc04b1d3d, 0x00000060,
+ 0xbf8cc07f, 0xc0431e7d,
+ 0x00000074, 0xbf8cc07f,
+ 0xbef4007e, 0x8675ff7f,
+ 0x0000ffff, 0x8775ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x00807fac,
+ 0xbef1007c, 0xbef00080,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xbefe007c,
+ 0xbefc0070, 0xc0611c7a,
+ 0x0000007c, 0xbf8cc07f,
+ 0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611b3a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc0070, 0xc0611b7a,
+ 0x0000007c, 0xbf8cc07f,
+ 0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611bba, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc0070, 0xc0611bfa,
+ 0x0000007c, 0xbf8cc07f,
+ 0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611e3a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xb8fbf803,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611efa, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc0070, 0xc0611a3a,
+ 0x0000007c, 0xbf8cc07f,
+ 0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611a7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xb8f1f801,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611c7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xbf108080,
+ 0x867aff7f, 0x04000000,
+ 0xbeef0080, 0x876f6f7a,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fb1605, 0x807b817b,
+ 0x8e7b847b, 0x8e76827b,
+ 0xbef600ff, 0x01000000,
+ 0xbef20174, 0x80747074,
+ 0x82758075, 0xbefc0080,
+ 0xbf800000, 0xbe802b00,
+ 0xbe822b02, 0xbe842b04,
+ 0xbe862b06, 0xbe882b08,
+ 0xbe8a2b0a, 0xbe8c2b0c,
+ 0xbe8e2b0e, 0xc06b003a,
+ 0x00000000, 0xbf8cc07f,
+ 0xc06b013a, 0x00000010,
+ 0xbf8cc07f, 0xc06b023a,
+ 0x00000020, 0xbf8cc07f,
+ 0xc06b033a, 0x00000030,
+ 0xbf8cc07f, 0x8074c074,
+ 0x82758075, 0x807c907c,
+ 0xbf0a7b7c, 0xbf85ffe7,
+ 0xbef40172, 0xbef00080,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xbee80080, 0xbee90080,
+ 0xbef600ff, 0x01000000,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf85004d, 0xbe840080,
+ 0xd2890000, 0x00000900,
+ 0x80048104, 0xd2890001,
+ 0x00000900, 0x80048104,
+ 0xd2890002, 0x00000900,
+ 0x80048104, 0xd2890003,
+ 0x00000900, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
+ 0x80048104, 0xd2890002,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000902,
+ 0x80048104, 0xd2890001,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
+ 0x80048104, 0xd2890003,
+ 0x00000902, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
+ 0x80048104, 0xd2890002,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbf820008,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8fb4306, 0x867bc17b,
+ 0xbf840064, 0xbf8a0000,
+ 0x867aff6f, 0x04000000,
+ 0xbf840060, 0x8e7b867b,
+ 0x8e7b827b, 0xbef6007b,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0x8070ff70, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0080, 0xd28c0002,
+ 0x000100c1, 0xd28d0003,
+ 0x000204c1, 0x867aff78,
+ 0x00400000, 0xbf850003,
+ 0xb8faf803, 0x897a7aff,
+ 0x10000000, 0xbf850030,
+ 0x24040682, 0xd86e4000,
+ 0x00000002, 0xbf8cc07f,
+ 0xbe840080, 0xd2890000,
+ 0x00000900, 0x80048104,
+ 0xd2890001, 0x00000900,
+ 0x80048104, 0xd2890002,
+ 0x00000900, 0x80048104,
+ 0xd2890003, 0x00000900,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000901,
+ 0x80048104, 0xd2890001,
+ 0x00000901, 0x80048104,
+ 0xd2890002, 0x00000901,
+ 0x80048104, 0xd2890003,
+ 0x00000901, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0x680404ff, 0x00000200,
+ 0xd0c9006a, 0x0000f702,
+ 0xbf87ffd2, 0xbf820015,
+ 0xd1060002, 0x00011103,
+ 0x7e0602ff, 0x00000200,
+ 0xbefc00ff, 0x00010000,
+ 0xbe800077, 0x8677ff77,
+ 0xff7fffff, 0x8777ff77,
+ 0x00058000, 0xd8ec0000,
+ 0x00000002, 0xbf8cc07f,
+ 0xe0765000, 0x701d0002,
+ 0x68040702, 0xd0c9006a,
+ 0x0000f702, 0xbf87fff7,
+ 0xbef70000, 0xbef000ff,
+ 0x00000400, 0xbefe00c1,
+ 0xbeff00c1, 0xb8fb2b05,
+ 0x807b817b, 0x8e7b827b,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0084, 0xbf0a7b7c,
+ 0xbf84006d, 0xbf11017c,
+ 0x807bff7b, 0x00001000,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf850051, 0xbe840080,
+ 0xd2890000, 0x00000900,
+ 0x80048104, 0xd2890001,
+ 0x00000900, 0x80048104,
+ 0xd2890002, 0x00000900,
+ 0x80048104, 0xd2890003,
+ 0x00000900, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
+ 0x80048104, 0xd2890002,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000902,
+ 0x80048104, 0xd2890001,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
+ 0x80048104, 0xd2890003,
+ 0x00000902, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
+ 0x80048104, 0xd2890002,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0x807c847c,
+ 0xbf0a7b7c, 0xbf85ffb1,
+ 0xbf9c0000, 0xbf820012,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7c,
+ 0xbf85ffef, 0xbf9c0000,
+ 0xb8fb2985, 0x807b817b,
+ 0x8e7b837b, 0xb8fa2b05,
+ 0x807a817a, 0x8e7a827a,
+ 0x80fb7a7b, 0x867b7b7b,
+ 0xbf84007a, 0x807bff7b,
+ 0x00001000, 0xbefc0080,
+ 0xbf11017c, 0x867aff78,
+ 0x00400000, 0xbf850003,
+ 0xb8faf803, 0x897a7aff,
+ 0x10000000, 0xbf850059,
+ 0xd3d84000, 0x18000100,
+ 0xd3d84001, 0x18000101,
+ 0xd3d84002, 0x18000102,
+ 0xd3d84003, 0x18000103,
+ 0xbe840080, 0xd2890000,
+ 0x00000900, 0x80048104,
+ 0xd2890001, 0x00000900,
+ 0x80048104, 0xd2890002,
+ 0x00000900, 0x80048104,
+ 0xd2890003, 0x00000900,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000901,
+ 0x80048104, 0xd2890001,
+ 0x00000901, 0x80048104,
+ 0xd2890002, 0x00000901,
+ 0x80048104, 0xd2890003,
+ 0x00000901, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000902, 0x80048104,
+ 0xd2890001, 0x00000902,
+ 0x80048104, 0xd2890002,
+ 0x00000902, 0x80048104,
+ 0xd2890003, 0x00000902,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000903,
+ 0x80048104, 0xd2890001,
+ 0x00000903, 0x80048104,
+ 0xd2890002, 0x00000903,
+ 0x80048104, 0xd2890003,
+ 0x00000903, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0x807c847c, 0xbf0a7b7c,
+ 0xbf85ffa9, 0xbf9c0000,
+ 0xbf820016, 0xd3d84000,
+ 0x18000100, 0xd3d84001,
+ 0x18000101, 0xd3d84002,
+ 0x18000102, 0xd3d84003,
+ 0x18000103, 0xe0724000,
+ 0x701d0000, 0xe0724100,
+ 0x701d0100, 0xe0724200,
+ 0x701d0200, 0xe0724300,
+ 0x701d0300, 0x807c847c,
+ 0x8070ff70, 0x00000400,
+ 0xbf0a7b7c, 0xbf85ffeb,
+ 0xbf9c0000, 0xbf8200ee,
+ 0xbef4007e, 0x8675ff7f,
+ 0x0000ffff, 0x8775ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x00807fac,
+ 0x866eff7f, 0x04000000,
+ 0xbf84001f, 0xbefe00c1,
+ 0xbeff00c1, 0xb8ef4306,
+ 0x866fc16f, 0xbf84001a,
+ 0x8e6f866f, 0x8e6f826f,
+ 0xbef6006f, 0xb8f82985,
+ 0x80788178, 0x8e788a78,
+ 0x8e788178, 0xb8ee1605,
+ 0x806e816e, 0x8e6e866e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0xbefc0080,
+ 0xe0510000, 0x781d0000,
+ 0xe0510100, 0x781d0000,
+ 0x807cff7c, 0x00000200,
+ 0x8078ff78, 0x00000200,
+ 0xbf0a6f7c, 0xbf85fff6,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xbef600ff, 0x01000000,
+ 0xb8ef2b05, 0x806f816f,
+ 0x8e6f826f, 0x806fff6f,
+ 0x00008000, 0xbef80080,
+ 0xbeee0078, 0x8078ff78,
+ 0x00000400, 0xbefc0084,
+ 0xbf11087c, 0xe0524000,
+ 0x781d0000, 0xe0524100,
+ 0x781d0100, 0xe0524200,
+ 0x781d0200, 0xe0524300,
+ 0x781d0300, 0xbf8c0f70,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0x807c847c, 0x8078ff78,
+ 0x00000400, 0xbf0a6f7c,
+ 0xbf85ffee, 0xb8ef2985,
+ 0x806f816f, 0x8e6f836f,
+ 0xb8f92b05, 0x80798179,
+ 0x8e798279, 0x80ef796f,
+ 0x866f6f6f, 0xbf84001a,
+ 0x806fff6f, 0x00008000,
+ 0xbefc0080, 0xbf11087c,
+ 0xe0524000, 0x781d0000,
+ 0xe0524100, 0x781d0100,
+ 0xe0524200, 0x781d0200,
+ 0xe0524300, 0x781d0300,
+ 0xbf8c0f70, 0xd3d94000,
+ 0x18000100, 0xd3d94001,
+ 0x18000101, 0xd3d94002,
+ 0x18000102, 0xd3d94003,
+ 0x18000103, 0x807c847c,
+ 0x8078ff78, 0x00000400,
+ 0xbf0a6f7c, 0xbf85ffea,
+ 0xbf9c0000, 0xe0524000,
+ 0x6e1d0000, 0xe0524100,
+ 0x6e1d0100, 0xe0524200,
+ 0x6e1d0200, 0xe0524300,
+ 0x6e1d0300, 0xbf8c0f70,
+ 0xb8f82985, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0x80f8c078, 0xb8ef1605,
+ 0x806f816f, 0x8e6f846f,
+ 0x8e76826f, 0xbef600ff,
+ 0x01000000, 0xbefc006f,
+ 0xc031003a, 0x00000078,
+ 0x80f8c078, 0xbf8cc07f,
+ 0x80fc907c, 0xbf800000,
+ 0xbe802d00, 0xbe822d02,
+ 0xbe842d04, 0xbe862d06,
+ 0xbe882d08, 0xbe8a2d0a,
+ 0xbe8c2d0c, 0xbe8e2d0e,
+ 0xbf06807c, 0xbf84fff0,
+ 0xb8f82985, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xc0211bfa,
+ 0x00000078, 0x80788478,
+ 0xc0211b3a, 0x00000078,
+ 0x80788478, 0xc0211b7a,
+ 0x00000078, 0x80788478,
+ 0xc0211c3a, 0x00000078,
+ 0x80788478, 0xc0211c7a,
+ 0x00000078, 0x80788478,
+ 0xc0211eba, 0x00000078,
+ 0x80788478, 0xc0211efa,
+ 0x00000078, 0x80788478,
+ 0xc0211a3a, 0x00000078,
+ 0x80788478, 0xc0211a7a,
+ 0x00000078, 0x80788478,
+ 0xc0211cfa, 0x00000078,
+ 0x80788478, 0xbf8cc07f,
+ 0xbefc006f, 0xbefe0070,
+ 0xbeff0071, 0x866f7bff,
+ 0x000003ff, 0xb96f4803,
+ 0x866f7bff, 0xfffff800,
+ 0x8f6f8b6f, 0xb96fa2c3,
+ 0xb973f801, 0xb8ee2985,
+ 0x806e816e, 0x8e6e8a6e,
+ 0x8e6e816e, 0xb8ef1605,
+ 0x806f816f, 0x8e6f866f,
+ 0x806e6f6e, 0x806e746e,
+ 0x826f8075, 0x866fff6f,
+ 0x0000ffff, 0xc00b1c37,
+ 0x00000050, 0xc00b1d37,
+ 0x00000060, 0xc0031e77,
+ 0x00000074, 0xbf8cc07f,
+ 0x8f6e8b79, 0x866eff6e,
+ 0x001f8000, 0xb96ef807,
+ 0x866dff6d, 0x0000ffff,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0x8f6e837a, 0xb96ee0c2,
+ 0xbf800002, 0xb97a0002,
+ 0xbf8a0000, 0xbe801f6c,
+ 0xbf9b0000, 0x00000000,
+};
+
+static const uint32_t cwsr_trap_gfx12_hex[] = {
+ 0xbfa00001, 0xbfa002a2,
+ 0xb0804009, 0xb8f8f804,
+ 0x9178ff78, 0x00008c00,
+ 0xb8fbf811, 0x8b6eff78,
+ 0x00004000, 0xbfa10008,
+ 0x8b6eff7b, 0x00000080,
+ 0xbfa20018, 0x8b6ea07b,
+ 0xbfa20042, 0xbf830010,
+ 0xb8fbf811, 0xbfa0fffb,
+ 0x8b6eff7b, 0x00000bd0,
+ 0xbfa20010, 0xb8eef812,
+ 0x8b6f8f7b, 0xbfa10002,
+ 0x8c6eff6e, 0x00000080,
+ 0xb8eff813, 0x8b6e6e6f,
+ 0xbfa20008, 0x8b6eff6d,
+ 0xf0000000, 0xbfa20005,
+ 0x8b6fff6f, 0x00000200,
+ 0xbfa20002, 0x8b6ea07b,
+ 0xbfa2002c, 0xbefa4d82,
+ 0xbf8a0000, 0x84fa887a,
+ 0xbf0d8f7b, 0xbfa10002,
+ 0x8c7bff7b, 0xffff0000,
+ 0xf4601bbd, 0xf8000010,
+ 0xbf8a0000, 0x846e976e,
+ 0x9177ff77, 0x00800000,
+ 0x8c776e77, 0xf4603bbd,
+ 0xf8000000, 0xbf8a0000,
+ 0xf4603ebd, 0xf8000008,
+ 0xbf8a0000, 0x8bee6e6e,
+ 0xbfa10001, 0xbe80486e,
+ 0x8b6eff6d, 0xf0000000,
+ 0xbfa20009, 0xb8eef811,
+ 0x8b6eff6e, 0x00000080,
+ 0xbfa20007, 0x8c78ff78,
+ 0x00004000, 0x80ec886c,
+ 0x82ed806d, 0xbfa00002,
+ 0x806c846c, 0x826d806d,
+ 0x8b6dff6d, 0x0000ffff,
+ 0x8bfe7e7e, 0x8bea6a6a,
+ 0x85788978, 0xb9783244,
+ 0xbe804a6c, 0xb8faf802,
+ 0xbf0d987a, 0xbfa10001,
+ 0xbfb00000, 0x8b6dff6d,
+ 0x0000ffff, 0xbefa0080,
+ 0xb97a0151, 0xbeee007e,
+ 0xbeef007f, 0xbefe0180,
+ 0xbefe4d84, 0xbf8a0000,
+ 0x8b7aff7f, 0x04000000,
+ 0x847a857a, 0x8c6d7a6d,
+ 0xbefa007e, 0x8b7bff7f,
+ 0x0000ffff, 0xbefe00c1,
+ 0xbeff00c1, 0xee0a407a,
+ 0x000c0000, 0x00000000,
+ 0x7e000280, 0xbefe007a,
+ 0xbeff007b, 0xb8fb0742,
+ 0x847b997b, 0xb8fa3b05,
+ 0x807a817a, 0xbf0d997b,
+ 0xbfa20002, 0x847a897a,
+ 0xbfa00001, 0x847a8a7a,
+ 0xb8fb1e06, 0x847b8a7b,
+ 0x807a7b7a, 0x8b7bff7f,
+ 0x0000ffff, 0x807aff7a,
+ 0x00000200, 0x807a7e7a,
+ 0x827b807b, 0xd7610000,
+ 0x00010870, 0xd7610000,
+ 0x00010a71, 0xd7610000,
+ 0x00010c72, 0xd7610000,
+ 0x00010e73, 0xd7610000,
+ 0x00011074, 0xd7610000,
+ 0x00011275, 0xd7610000,
+ 0x00011476, 0xd7610000,
+ 0x00011677, 0xd7610000,
+ 0x00011a79, 0xd7610000,
+ 0x00011c7e, 0xd7610000,
+ 0x00011e7f, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xbefe00ff,
+ 0x00003fff, 0xbeff0080,
+ 0xee0a407a, 0x000c0000,
+ 0x00004000, 0xd760007a,
+ 0x00011d00, 0xd760007b,
+ 0x00011f00, 0xbefe007a,
+ 0xbeff007b, 0xbef4007e,
+ 0x8b75ff7f, 0x0000ffff,
+ 0x8c75ff75, 0x00040000,
+ 0xbef60080, 0xbef700ff,
+ 0x10807fac, 0xbef1007d,
+ 0xbef00080, 0xb8f30742,
+ 0x84739973, 0xbefe00c1,
+ 0x857d9973, 0x8b7d817d,
+ 0xbf06817d, 0xbfa20002,
+ 0xbeff0080, 0xbfa00002,
+ 0xbeff00c1, 0xbfa0000c,
+ 0xbef600ff, 0x01000000,
+ 0xc4068070, 0x008ce801,
+ 0x00008000, 0xc4068070,
+ 0x008ce802, 0x00010000,
+ 0xc4068070, 0x008ce803,
+ 0x00018000, 0xbfa0000b,
+ 0xbef600ff, 0x01000000,
+ 0xc4068070, 0x008ce801,
+ 0x00010000, 0xc4068070,
+ 0x008ce802, 0x00020000,
+ 0xc4068070, 0x008ce803,
+ 0x00030000, 0xb8f03b05,
+ 0x80708170, 0xbf0d9973,
+ 0xbfa20002, 0x84708970,
+ 0xbfa00001, 0x84708a70,
+ 0xb8fa1e06, 0x847a8a7a,
+ 0x80707a70, 0x8070ff70,
+ 0x00000200, 0xbef600ff,
+ 0x01000000, 0x7e000280,
+ 0x7e020280, 0x7e040280,
+ 0xbe804ec2, 0xbf94fffe,
+ 0xb8faf804, 0x8b7a847a,
+ 0x91788478, 0x8c787a78,
+ 0x917aff6d, 0x80000000,
+ 0xd7610002, 0x00010071,
+ 0xd7610002, 0x0001026c,
+ 0xd7610002, 0x0001047a,
+ 0xd7610002, 0x0001066e,
+ 0xd7610002, 0x0001086f,
+ 0xd7610002, 0x00010a78,
+ 0xd7610002, 0x00010e7b,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xb8faf811, 0xd7610002,
+ 0x00010c7a, 0xb8faf801,
+ 0xd7610002, 0x0001107a,
+ 0xb8faf814, 0xd7610002,
+ 0x0001127a, 0xb8faf815,
+ 0xd7610002, 0x0001147a,
+ 0xb8faf812, 0xd7610002,
+ 0x0001167a, 0xb8faf813,
+ 0xd7610002, 0x0001187a,
+ 0xb8faf802, 0xd7610002,
+ 0x00011a7a, 0xbefa50c1,
+ 0xbfc70000, 0xd7610002,
+ 0x00011c7a, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xbefe00ff,
+ 0x0000ffff, 0xbeff0080,
+ 0xc4068070, 0x008ce802,
+ 0x00000000, 0xbefe00c1,
+ 0xb8f03b05, 0x80708170,
+ 0xbf0d9973, 0xbfa20002,
+ 0x84708970, 0xbfa00001,
+ 0x84708a70, 0xb8fa1e06,
+ 0x847a8a7a, 0x80707a70,
+ 0xbef600ff, 0x01000000,
+ 0xbef90080, 0xbefd0080,
+ 0xbf800000, 0xbe804100,
+ 0xbe824102, 0xbe844104,
+ 0xbe864106, 0xbe884108,
+ 0xbe8a410a, 0xbe8c410c,
+ 0xbe8e410e, 0xbf068079,
+ 0xbfa10032, 0xd7610002,
+ 0x00010000, 0xd7610002,
+ 0x00010201, 0xd7610002,
+ 0x00010402, 0xd7610002,
+ 0x00010603, 0xd7610002,
+ 0x00010804, 0xd7610002,
+ 0x00010a05, 0xd7610002,
+ 0x00010c06, 0xd7610002,
+ 0x00010e07, 0xd7610002,
+ 0x00011008, 0xd7610002,
+ 0x00011209, 0xd7610002,
+ 0x0001140a, 0xd7610002,
+ 0x0001160b, 0xd7610002,
+ 0x0001180c, 0xd7610002,
+ 0x00011a0d, 0xd7610002,
+ 0x00011c0e, 0xd7610002,
+ 0x00011e0f, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x80799079,
+ 0xbfa00038, 0xd7610002,
+ 0x00012000, 0xd7610002,
+ 0x00012201, 0xd7610002,
+ 0x00012402, 0xd7610002,
+ 0x00012603, 0xd7610002,
+ 0x00012804, 0xd7610002,
+ 0x00012a05, 0xd7610002,
+ 0x00012c06, 0xd7610002,
+ 0x00012e07, 0xd7610002,
+ 0x00013008, 0xd7610002,
+ 0x00013209, 0xd7610002,
+ 0x0001340a, 0xd7610002,
+ 0x0001360b, 0xd7610002,
+ 0x0001380c, 0xd7610002,
+ 0x00013a0d, 0xd7610002,
+ 0x00013c0e, 0xd7610002,
+ 0x00013e0f, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x80799079,
+ 0xc4068070, 0x008ce802,
+ 0x00000000, 0x8070ff70,
+ 0x00000080, 0xbef90080,
+ 0x7e040280, 0x807d907d,
+ 0xbf0aff7d, 0x00000060,
+ 0xbfa2ff88, 0xbe804100,
+ 0xbe824102, 0xbe844104,
+ 0xbe864106, 0xbe884108,
+ 0xbe8a410a, 0xd7610002,
+ 0x00010000, 0xd7610002,
+ 0x00010201, 0xd7610002,
+ 0x00010402, 0xd7610002,
+ 0x00010603, 0xd7610002,
+ 0x00010804, 0xd7610002,
+ 0x00010a05, 0xd7610002,
+ 0x00010c06, 0xd7610002,
+ 0x00010e07, 0xd7610002,
+ 0x00011008, 0xd7610002,
+ 0x00011209, 0xd7610002,
+ 0x0001140a, 0xd7610002,
+ 0x0001160b, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xc4068070,
+ 0x008ce802, 0x00000000,
+ 0xbefe00c1, 0x857d9973,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbfa20002, 0xbeff0080,
+ 0xbfa00001, 0xbeff00c1,
+ 0xb8fb4306, 0x8b7bc17b,
+ 0xbfa10044, 0x8b7aff6d,
+ 0x80000000, 0xbfa10041,
+ 0x847b897b, 0xbef6007b,
+ 0xb8f03b05, 0x80708170,
+ 0xbf0d9973, 0xbfa20002,
+ 0x84708970, 0xbfa00001,
+ 0x84708a70, 0xb8fa1e06,
+ 0x847a8a7a, 0x80707a70,
+ 0x8070ff70, 0x00000200,
+ 0x8070ff70, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xd71f0000, 0x000100c1,
+ 0xd7200000, 0x000200c1,
+ 0x16000084, 0x857d9973,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbefd0080, 0xbfa20013,
+ 0xbe8300ff, 0x00000080,
+ 0xbf800000, 0xbf800000,
+ 0xbf800000, 0xd8d80000,
+ 0x01000000, 0xbf8a0000,
+ 0xc4068070, 0x008ce801,
+ 0x00000000, 0x807d037d,
+ 0x80700370, 0xd5250000,
+ 0x0001ff00, 0x00000080,
+ 0xbf0a7b7d, 0xbfa2fff3,
+ 0xbfa00012, 0xbe8300ff,
+ 0x00000100, 0xbf800000,
+ 0xbf800000, 0xbf800000,
+ 0xd8d80000, 0x01000000,
+ 0xbf8a0000, 0xc4068070,
+ 0x008ce801, 0x00000000,
+ 0x807d037d, 0x80700370,
+ 0xd5250000, 0x0001ff00,
+ 0x00000100, 0xbf0a7b7d,
+ 0xbfa2fff3, 0xbefe00c1,
+ 0x857d9973, 0x8b7d817d,
+ 0xbf06817d, 0xbfa20004,
+ 0xbef000ff, 0x00000200,
+ 0xbeff0080, 0xbfa00003,
+ 0xbef000ff, 0x00000400,
+ 0xbeff00c1, 0xb8fb3b05,
+ 0x807b817b, 0x847b827b,
+ 0x857d9973, 0x8b7d817d,
+ 0xbf06817d, 0xbfa2001b,
+ 0xbef600ff, 0x01000000,
+ 0xbefd0084, 0xbf0a7b7d,
+ 0xbfa10040, 0x7e008700,
+ 0x7e028701, 0x7e048702,
+ 0x7e068703, 0xc4068070,
+ 0x008ce800, 0x00000000,
+ 0xc4068070, 0x008ce801,
+ 0x00008000, 0xc4068070,
+ 0x008ce802, 0x00010000,
+ 0xc4068070, 0x008ce803,
+ 0x00018000, 0x807d847d,
+ 0x8070ff70, 0x00000200,
+ 0xbf0a7b7d, 0xbfa2ffeb,
+ 0xbfa0002a, 0xbef600ff,
+ 0x01000000, 0xbefd0084,
+ 0xbf0a7b7d, 0xbfa10015,
+ 0x7e008700, 0x7e028701,
+ 0x7e048702, 0x7e068703,
+ 0xc4068070, 0x008ce800,
+ 0x00000000, 0xc4068070,
+ 0x008ce801, 0x00010000,
+ 0xc4068070, 0x008ce802,
+ 0x00020000, 0xc4068070,
+ 0x008ce803, 0x00030000,
+ 0x807d847d, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7d,
+ 0xbfa2ffeb, 0xb8fb1e06,
+ 0x8b7bc17b, 0xbfa1000d,
+ 0x847b837b, 0x807b7d7b,
+ 0xbefe00c1, 0xbeff0080,
+ 0x7e008700, 0xc4068070,
+ 0x008ce800, 0x00000000,
+ 0x807d817d, 0x8070ff70,
+ 0x00000080, 0xbf0a7b7d,
+ 0xbfa2fff7, 0xbfa0016e,
+ 0xbef4007e, 0x8b75ff7f,
+ 0x0000ffff, 0x8c75ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x10807fac,
+ 0xbef1007f, 0xb8f20742,
+ 0x84729972, 0x8b6eff7f,
+ 0x04000000, 0xbfa1003b,
+ 0xbefe00c1, 0x857d9972,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbfa20002, 0xbeff0080,
+ 0xbfa00001, 0xbeff00c1,
+ 0xb8ef4306, 0x8b6fc16f,
+ 0xbfa10030, 0x846f896f,
+ 0xbef6006f, 0xb8f83b05,
+ 0x80788178, 0xbf0d9972,
+ 0xbfa20002, 0x84788978,
+ 0xbfa00001, 0x84788a78,
+ 0xb8ee1e06, 0x846e8a6e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000200, 0x8078ff78,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0x857d9972,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbefd0080, 0xbfa2000d,
+ 0xc4050078, 0x0080e800,
+ 0x00000000, 0xbf8a0000,
+ 0xdac00000, 0x00000000,
+ 0x807dff7d, 0x00000080,
+ 0x8078ff78, 0x00000080,
+ 0xbf0a6f7d, 0xbfa2fff4,
+ 0xbfa0000c, 0xc4050078,
+ 0x0080e800, 0x00000000,
+ 0xbf8a0000, 0xdac00000,
+ 0x00000000, 0x807dff7d,
+ 0x00000100, 0x8078ff78,
+ 0x00000100, 0xbf0a6f7d,
+ 0xbfa2fff4, 0xbef80080,
+ 0xbefe00c1, 0x857d9972,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbfa20002, 0xbeff0080,
+ 0xbfa00001, 0xbeff00c1,
+ 0xb8ef3b05, 0x806f816f,
+ 0x846f826f, 0x857d9972,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbfa2002c, 0xbef600ff,
+ 0x01000000, 0xbeee0078,
+ 0x8078ff78, 0x00000200,
+ 0xbefd0084, 0xbf0a6f7d,
+ 0xbfa10061, 0xc4050078,
+ 0x008ce800, 0x00000000,
+ 0xc4050078, 0x008ce801,
+ 0x00008000, 0xc4050078,
+ 0x008ce802, 0x00010000,
+ 0xc4050078, 0x008ce803,
+ 0x00018000, 0xbf8a0000,
+ 0x7e008500, 0x7e028501,
+ 0x7e048502, 0x7e068503,
+ 0x807d847d, 0x8078ff78,
+ 0x00000200, 0xbf0a6f7d,
+ 0xbfa2ffea, 0xc405006e,
+ 0x008ce800, 0x00000000,
+ 0xc405006e, 0x008ce801,
+ 0x00008000, 0xc405006e,
+ 0x008ce802, 0x00010000,
+ 0xc405006e, 0x008ce803,
+ 0x00018000, 0xbf8a0000,
+ 0xbfa0003d, 0xbef600ff,
+ 0x01000000, 0xbeee0078,
+ 0x8078ff78, 0x00000400,
+ 0xbefd0084, 0xbf0a6f7d,
+ 0xbfa10016, 0xc4050078,
+ 0x008ce800, 0x00000000,
+ 0xc4050078, 0x008ce801,
+ 0x00010000, 0xc4050078,
+ 0x008ce802, 0x00020000,
+ 0xc4050078, 0x008ce803,
+ 0x00030000, 0xbf8a0000,
+ 0x7e008500, 0x7e028501,
+ 0x7e048502, 0x7e068503,
+ 0x807d847d, 0x8078ff78,
+ 0x00000400, 0xbf0a6f7d,
+ 0xbfa2ffea, 0xb8ef1e06,
+ 0x8b6fc16f, 0xbfa1000f,
+ 0x846f836f, 0x806f7d6f,
+ 0xbefe00c1, 0xbeff0080,
+ 0xc4050078, 0x008ce800,
+ 0x00000000, 0xbf8a0000,
+ 0x7e008500, 0x807d817d,
+ 0x8078ff78, 0x00000080,
+ 0xbf0a6f7d, 0xbfa2fff6,
+ 0xbeff00c1, 0xc405006e,
+ 0x008ce800, 0x00000000,
+ 0xc405006e, 0x008ce801,
+ 0x00010000, 0xc405006e,
+ 0x008ce802, 0x00020000,
+ 0xc405006e, 0x008ce803,
+ 0x00030000, 0xbf8a0000,
+ 0xb8f83b05, 0x80788178,
+ 0xbf0d9972, 0xbfa20002,
+ 0x84788978, 0xbfa00001,
+ 0x84788a78, 0xb8ee1e06,
+ 0x846e8a6e, 0x80786e78,
+ 0x8078ff78, 0x00000200,
+ 0x80f8ff78, 0x00000050,
+ 0xbef600ff, 0x01000000,
+ 0xbefd00ff, 0x0000006c,
+ 0x80f89078, 0xf462403a,
+ 0xf0000000, 0xbf8a0000,
+ 0x80fd847d, 0xbf800000,
+ 0xbe804300, 0xbe824302,
+ 0x80f8a078, 0xf462603a,
+ 0xf0000000, 0xbf8a0000,
+ 0x80fd887d, 0xbf800000,
+ 0xbe804300, 0xbe824302,
+ 0xbe844304, 0xbe864306,
+ 0x80f8c078, 0xf462803a,
+ 0xf0000000, 0xbf8a0000,
+ 0x80fd907d, 0xbf800000,
+ 0xbe804300, 0xbe824302,
+ 0xbe844304, 0xbe864306,
+ 0xbe884308, 0xbe8a430a,
+ 0xbe8c430c, 0xbe8e430e,
+ 0xbf06807d, 0xbfa1fff0,
+ 0xb980f801, 0x00000000,
+ 0xb8f83b05, 0x80788178,
+ 0xbf0d9972, 0xbfa20002,
+ 0x84788978, 0xbfa00001,
+ 0x84788a78, 0xb8ee1e06,
+ 0x846e8a6e, 0x80786e78,
+ 0x8078ff78, 0x00000200,
+ 0xbef600ff, 0x01000000,
+ 0xbeff0071, 0xf4621bfa,
+ 0xf0000000, 0x80788478,
+ 0xf4621b3a, 0xf0000000,
+ 0x80788478, 0xf4621b7a,
+ 0xf0000000, 0x80788478,
+ 0xf4621c3a, 0xf0000000,
+ 0x80788478, 0xf4621c7a,
+ 0xf0000000, 0x80788478,
+ 0xf4621eba, 0xf0000000,
+ 0x80788478, 0xf4621efa,
+ 0xf0000000, 0x80788478,
+ 0xf4621e7a, 0xf0000000,
+ 0x80788478, 0xf4621cfa,
+ 0xf0000000, 0x80788478,
+ 0xf4621bba, 0xf0000000,
+ 0x80788478, 0xbf8a0000,
+ 0xb96ef814, 0xf4621bba,
+ 0xf0000000, 0x80788478,
+ 0xbf8a0000, 0xb96ef815,
+ 0xf4621bba, 0xf0000000,
+ 0x80788478, 0xbf8a0000,
+ 0xb96ef812, 0xf4621bba,
+ 0xf0000000, 0x80788478,
+ 0xbf8a0000, 0xb96ef813,
+ 0x8b6eff7f, 0x04000000,
+ 0xbfa1000d, 0x80788478,
+ 0xf4621bba, 0xf0000000,
+ 0x80788478, 0xbf8a0000,
+ 0xbf0d806e, 0xbfa10006,
+ 0x856e906e, 0x8b6e6e6e,
+ 0xbfa10003, 0xbe804ec1,
+ 0x816ec16e, 0xbfa0fffb,
+ 0xbefd006f, 0xbefe0070,
+ 0xbeff0071, 0xb97b2011,
+ 0x857b867b, 0xb97b0191,
+ 0x857b827b, 0xb97bba11,
+ 0xb973f801, 0xb8ee3b05,
+ 0x806e816e, 0xbf0d9972,
+ 0xbfa20002, 0x846e896e,
+ 0xbfa00001, 0x846e8a6e,
+ 0xb8ef1e06, 0x846f8a6f,
+ 0x806e6f6e, 0x806eff6e,
+ 0x00000200, 0x806e746e,
+ 0x826f8075, 0x8b6fff6f,
+ 0x0000ffff, 0xf4605c37,
+ 0xf8000050, 0xf4605d37,
+ 0xf8000060, 0xf4601e77,
+ 0xf8000074, 0xbf8a0000,
+ 0x8b6dff6d, 0x0000ffff,
+ 0x8bfe7e7e, 0x8bea6a6a,
+ 0xb97af804, 0xbe804ec2,
+ 0xbf94fffe, 0xbe804a6c,
+ 0xbe804ec2, 0xbf94fffe,
+ 0xbfb10000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
- 0xbf9f0000, 0x00000000,
+};
+
+static const uint32_t cwsr_trap_gfx9_5_0_hex[] = {
+ 0xbf820001, 0xbf8202ca,
+ 0xb8f8f802, 0x8978ff78,
+ 0x00020006, 0xb8fbf803,
+ 0x866eff78, 0x00002000,
+ 0xbf840009, 0x866eff6d,
+ 0x00ff0000, 0xbf85001a,
+ 0x866eff7b, 0x00000400,
+ 0xbf850051, 0xbf8e0010,
+ 0xb8fbf803, 0xbf82fffa,
+ 0x866eff7b, 0x03c00900,
+ 0xbf850011, 0x866eff7b,
+ 0x000071ff, 0xbf840008,
+ 0x866fff7b, 0x00007080,
+ 0xbf840001, 0xbeee1a87,
+ 0xb8eff801, 0x8e6e8c6e,
+ 0x866e6f6e, 0xbf850006,
+ 0x866eff6d, 0x00ff0000,
+ 0xbf850003, 0x866eff7b,
+ 0x00000400, 0xbf85003a,
+ 0xb8faf807, 0x867aff7a,
+ 0x001f8000, 0x8e7a8b7a,
+ 0x8979ff79, 0xfc000000,
+ 0x87797a79, 0xba7ff807,
+ 0x00000000, 0xb8faf812,
+ 0xb8fbf813, 0x8efa887a,
+ 0xbf0d8f7b, 0xbf840002,
+ 0x877bff7b, 0xffff0000,
+ 0xc0031bbd, 0x00000010,
+ 0xbf8cc07f, 0x8e6e976e,
+ 0x8979ff79, 0x00800000,
+ 0x87796e79, 0xc0071bbd,
+ 0x00000000, 0xbf8cc07f,
+ 0xc0071ebd, 0x00000008,
+ 0xbf8cc07f, 0x86ee6e6e,
+ 0xbf840001, 0xbe801d6e,
+ 0x866eff6d, 0x01ff0000,
+ 0xbf850005, 0x8778ff78,
+ 0x00002000, 0x80ec886c,
+ 0x82ed806d, 0xbf820005,
+ 0x866eff6d, 0x01000000,
+ 0xbf850002, 0x806c846c,
+ 0x826d806d, 0x866dff6d,
+ 0x0000ffff, 0x8f7a8b79,
+ 0x867aff7a, 0x001f8000,
+ 0xb97af807, 0x86fe7e7e,
+ 0x86ea6a6a, 0x8f6e8378,
+ 0xb96ee0c2, 0xbf800002,
+ 0xb9780002, 0xbe801f6c,
+ 0x866dff6d, 0x0000ffff,
+ 0xbefa0080, 0xb97a0283,
+ 0xb8faf807, 0x867aff7a,
+ 0x001f8000, 0x8e7a8b7a,
+ 0x8979ff79, 0xfc000000,
+ 0x87797a79, 0xba7ff807,
+ 0x00000000, 0xbeee007e,
+ 0xbeef007f, 0xbefe0180,
+ 0xbf900004, 0x877a8478,
+ 0xb97af802, 0xbf8e0002,
+ 0xbf88fffe, 0xb8fa2985,
+ 0x807a817a, 0x8e7a8a7a,
+ 0x8e7a817a, 0xb8fb1605,
+ 0x807b817b, 0x8e7b867b,
+ 0x807a7b7a, 0x807a7e7a,
+ 0x827b807f, 0x867bff7b,
+ 0x0000ffff, 0xc04b1c3d,
+ 0x00000050, 0xbf8cc07f,
+ 0xc04b1d3d, 0x00000060,
+ 0xbf8cc07f, 0xc0431e7d,
+ 0x00000074, 0xbf8cc07f,
+ 0xbef4007e, 0x8675ff7f,
+ 0x0000ffff, 0x8775ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x00807fac,
+ 0xbef1007c, 0xbef00080,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xbefe007c,
+ 0xbefc0070, 0xc0611c7a,
+ 0x0000007c, 0xbf8cc07f,
+ 0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611b3a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc0070, 0xc0611b7a,
+ 0x0000007c, 0xbf8cc07f,
+ 0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611bba, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc0070, 0xc0611bfa,
+ 0x0000007c, 0xbf8cc07f,
+ 0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611e3a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xb8fbf803,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611efa, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc0070, 0xc0611a3a,
+ 0x0000007c, 0xbf8cc07f,
+ 0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611a7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xb8f1f801,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611c7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xbf108080,
+ 0x867aff7f, 0x04000000,
+ 0xbeef0080, 0x876f6f7a,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fb1605, 0x807b817b,
+ 0x8e7b847b, 0x8e76827b,
+ 0xbef600ff, 0x01000000,
+ 0xbef20174, 0x80747074,
+ 0x82758075, 0xbefc0080,
+ 0xbf800000, 0xbe802b00,
+ 0xbe822b02, 0xbe842b04,
+ 0xbe862b06, 0xbe882b08,
+ 0xbe8a2b0a, 0xbe8c2b0c,
+ 0xbe8e2b0e, 0xc06b003a,
+ 0x00000000, 0xbf8cc07f,
+ 0xc06b013a, 0x00000010,
+ 0xbf8cc07f, 0xc06b023a,
+ 0x00000020, 0xbf8cc07f,
+ 0xc06b033a, 0x00000030,
+ 0xbf8cc07f, 0x8074c074,
+ 0x82758075, 0x807c907c,
+ 0xbf0a7b7c, 0xbf85ffe7,
+ 0xbef40172, 0xbef00080,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xbee80080, 0xbee90080,
+ 0xbef600ff, 0x01000000,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf85004d, 0xbe840080,
+ 0xd2890000, 0x00000900,
+ 0x80048104, 0xd2890001,
+ 0x00000900, 0x80048104,
+ 0xd2890002, 0x00000900,
+ 0x80048104, 0xd2890003,
+ 0x00000900, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
+ 0x80048104, 0xd2890002,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000902,
+ 0x80048104, 0xd2890001,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
+ 0x80048104, 0xd2890003,
+ 0x00000902, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
+ 0x80048104, 0xd2890002,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbf820008,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8fb5306, 0x867bc17b,
+ 0xbf840052, 0xbf8a0000,
+ 0x867aff6f, 0x04000000,
+ 0xbf84004e, 0x8e7b867b,
+ 0x8e7b827b, 0xbef6007b,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0x8070ff70, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0080, 0xd28c0002,
+ 0x000100c1, 0xd28d0003,
+ 0x000204c1, 0x867aff78,
+ 0x00400000, 0xbf850003,
+ 0xb8faf803, 0x897a7aff,
+ 0x10000000, 0xbf85001d,
+ 0x24040682, 0xd86c0000,
+ 0x00000002, 0xbf8cc07f,
+ 0xbe840080, 0xd2890000,
+ 0x00000900, 0x80048104,
+ 0xd2890001, 0x00000900,
+ 0x80048104, 0xd2890002,
+ 0x00000900, 0x80048104,
+ 0xd2890003, 0x00000900,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0x680404ff,
+ 0x00000100, 0xd0c9006a,
+ 0x0000f702, 0xbf87ffe5,
+ 0xbf820016, 0xd1060002,
+ 0x00011103, 0x7e0602ff,
+ 0x00000200, 0xbefc00ff,
+ 0x00010000, 0xbe800077,
+ 0x8677ff77, 0xff7fffff,
+ 0x8777ff77, 0x00058000,
+ 0xd8ec0000, 0x00000002,
+ 0xbf8cc07f, 0xe0765000,
+ 0x701d0002, 0x68040702,
+ 0xd0c9006a, 0x0000f702,
+ 0xbefe016a, 0xbf87fff6,
+ 0xbef70000, 0xbef000ff,
+ 0x00000400, 0xbefe00c1,
+ 0xbeff00c1, 0xb8fb2b05,
+ 0x807b817b, 0x8e7b827b,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0084, 0xbf0a7b7c,
+ 0xbf84006d, 0xbf11017c,
+ 0x807bff7b, 0x00001000,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf850051, 0xbe840080,
+ 0xd2890000, 0x00000900,
+ 0x80048104, 0xd2890001,
+ 0x00000900, 0x80048104,
+ 0xd2890002, 0x00000900,
+ 0x80048104, 0xd2890003,
+ 0x00000900, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
+ 0x80048104, 0xd2890002,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000902,
+ 0x80048104, 0xd2890001,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
+ 0x80048104, 0xd2890003,
+ 0x00000902, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
+ 0x80048104, 0xd2890002,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0x807c847c,
+ 0xbf0a7b7c, 0xbf85ffb1,
+ 0xbf9c0000, 0xbf820012,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7c,
+ 0xbf85ffef, 0xbf9c0000,
+ 0xb8fb2985, 0x807b817b,
+ 0x8e7b837b, 0xb8fa2b05,
+ 0x807a817a, 0x8e7a827a,
+ 0x80fb7a7b, 0x867b7b7b,
+ 0xbf84007a, 0x807bff7b,
+ 0x00001000, 0xbefc0080,
+ 0xbf11017c, 0x867aff78,
+ 0x00400000, 0xbf850003,
+ 0xb8faf803, 0x897a7aff,
+ 0x10000000, 0xbf850059,
+ 0xd3d84000, 0x18000100,
+ 0xd3d84001, 0x18000101,
+ 0xd3d84002, 0x18000102,
+ 0xd3d84003, 0x18000103,
+ 0xbe840080, 0xd2890000,
+ 0x00000900, 0x80048104,
+ 0xd2890001, 0x00000900,
+ 0x80048104, 0xd2890002,
+ 0x00000900, 0x80048104,
+ 0xd2890003, 0x00000900,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000901,
+ 0x80048104, 0xd2890001,
+ 0x00000901, 0x80048104,
+ 0xd2890002, 0x00000901,
+ 0x80048104, 0xd2890003,
+ 0x00000901, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000902, 0x80048104,
+ 0xd2890001, 0x00000902,
+ 0x80048104, 0xd2890002,
+ 0x00000902, 0x80048104,
+ 0xd2890003, 0x00000902,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000903,
+ 0x80048104, 0xd2890001,
+ 0x00000903, 0x80048104,
+ 0xd2890002, 0x00000903,
+ 0x80048104, 0xd2890003,
+ 0x00000903, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0x807c847c, 0xbf0a7b7c,
+ 0xbf85ffa9, 0xbf9c0000,
+ 0xbf820016, 0xd3d84000,
+ 0x18000100, 0xd3d84001,
+ 0x18000101, 0xd3d84002,
+ 0x18000102, 0xd3d84003,
+ 0x18000103, 0xe0724000,
+ 0x701d0000, 0xe0724100,
+ 0x701d0100, 0xe0724200,
+ 0x701d0200, 0xe0724300,
+ 0x701d0300, 0x807c847c,
+ 0x8070ff70, 0x00000400,
+ 0xbf0a7b7c, 0xbf85ffeb,
+ 0xbf9c0000, 0xbf8200f4,
+ 0xbef4007e, 0x8675ff7f,
+ 0x0000ffff, 0x8775ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x00807fac,
+ 0x866eff7f, 0x04000000,
+ 0xbf840025, 0xbefe00c1,
+ 0xbeff00c1, 0xb8ef5306,
+ 0x866fc16f, 0xbf840020,
+ 0x8e6f866f, 0x8e6f826f,
+ 0xbef6006f, 0xb8f82985,
+ 0x80788178, 0x8e788a78,
+ 0x8e788178, 0xb8ee1605,
+ 0x806e816e, 0x8e6e866e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0xbefc0080,
+ 0xe0510000, 0x781d0000,
+ 0xe0510100, 0x781d0000,
+ 0xe0510200, 0x781d0000,
+ 0xe0510300, 0x781d0000,
+ 0xe0510400, 0x781d0000,
+ 0x807cff7c, 0x00000500,
+ 0x8078ff78, 0x00000500,
+ 0xbf0a6f7c, 0xbf85fff0,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xbef600ff, 0x01000000,
+ 0xb8ef2b05, 0x806f816f,
+ 0x8e6f826f, 0x806fff6f,
+ 0x00008000, 0xbef80080,
+ 0xbeee0078, 0x8078ff78,
+ 0x00000400, 0xbefc0084,
+ 0xbf11087c, 0xe0524000,
+ 0x781d0000, 0xe0524100,
+ 0x781d0100, 0xe0524200,
+ 0x781d0200, 0xe0524300,
+ 0x781d0300, 0xbf8c0f70,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0x807c847c, 0x8078ff78,
+ 0x00000400, 0xbf0a6f7c,
+ 0xbf85ffee, 0xb8ef2985,
+ 0x806f816f, 0x8e6f836f,
+ 0xb8f92b05, 0x80798179,
+ 0x8e798279, 0x80ef796f,
+ 0x866f6f6f, 0xbf84001a,
+ 0x806fff6f, 0x00008000,
+ 0xbefc0080, 0xbf11087c,
+ 0xe0524000, 0x781d0000,
+ 0xe0524100, 0x781d0100,
+ 0xe0524200, 0x781d0200,
+ 0xe0524300, 0x781d0300,
+ 0xbf8c0f70, 0xd3d94000,
+ 0x18000100, 0xd3d94001,
+ 0x18000101, 0xd3d94002,
+ 0x18000102, 0xd3d94003,
+ 0x18000103, 0x807c847c,
+ 0x8078ff78, 0x00000400,
+ 0xbf0a6f7c, 0xbf85ffea,
+ 0xbf9c0000, 0xe0524000,
+ 0x6e1d0000, 0xe0524100,
+ 0x6e1d0100, 0xe0524200,
+ 0x6e1d0200, 0xe0524300,
+ 0x6e1d0300, 0xbf8c0f70,
+ 0xb8f82985, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0x80f8c078, 0xb8ef1605,
+ 0x806f816f, 0x8e6f846f,
+ 0x8e76826f, 0xbef600ff,
+ 0x01000000, 0xbefc006f,
+ 0xc031003a, 0x00000078,
+ 0x80f8c078, 0xbf8cc07f,
+ 0x80fc907c, 0xbf800000,
+ 0xbe802d00, 0xbe822d02,
+ 0xbe842d04, 0xbe862d06,
+ 0xbe882d08, 0xbe8a2d0a,
+ 0xbe8c2d0c, 0xbe8e2d0e,
+ 0xbf06807c, 0xbf84fff0,
+ 0xb8f82985, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xc0211bfa,
+ 0x00000078, 0x80788478,
+ 0xc0211b3a, 0x00000078,
+ 0x80788478, 0xc0211b7a,
+ 0x00000078, 0x80788478,
+ 0xc0211c3a, 0x00000078,
+ 0x80788478, 0xc0211c7a,
+ 0x00000078, 0x80788478,
+ 0xc0211eba, 0x00000078,
+ 0x80788478, 0xc0211efa,
+ 0x00000078, 0x80788478,
+ 0xc0211a3a, 0x00000078,
+ 0x80788478, 0xc0211a7a,
+ 0x00000078, 0x80788478,
+ 0xc0211cfa, 0x00000078,
+ 0x80788478, 0xbf8cc07f,
+ 0xbefc006f, 0xbefe0070,
+ 0xbeff0071, 0x866f7bff,
+ 0x000003ff, 0xb96f4803,
+ 0x866f7bff, 0xfffff800,
+ 0x8f6f8b6f, 0xb96fa2c3,
+ 0xb973f801, 0xb8ee2985,
+ 0x806e816e, 0x8e6e8a6e,
+ 0x8e6e816e, 0xb8ef1605,
+ 0x806f816f, 0x8e6f866f,
+ 0x806e6f6e, 0x806e746e,
+ 0x826f8075, 0x866fff6f,
+ 0x0000ffff, 0xc00b1c37,
+ 0x00000050, 0xc00b1d37,
+ 0x00000060, 0xc0031e77,
+ 0x00000074, 0xbf8cc07f,
+ 0x8f6e8b79, 0x866eff6e,
+ 0x001f8000, 0xb96ef807,
+ 0x866dff6d, 0x0000ffff,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0x8f6e837a, 0xb96ee0c2,
+ 0xbf800002, 0xb97a0002,
+ 0xbf8a0000, 0xbe801f6c,
+ 0xbf9b0000, 0x00000000,
};
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
index 5081f91190b8..96fbb16ceb21 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
@@ -23,86 +23,129 @@
/* To compile this assembly code:
*
* Navi1x:
- * cpp -DASIC_TARGET_NAVI1X=1 cwsr_trap_handler_gfx10.asm -P -o nv1x.sp3
- * sp3-nv1x nv1x.sp3 -hex nv1x.hex
+ * cpp -DASIC_FAMILY=CHIP_NAVI10 cwsr_trap_handler_gfx10.asm -P -o nv1x.sp3
+ * sp3 nv1x.sp3 -hex nv1x.hex
+ *
+ * gfx10:
+ * cpp -DASIC_FAMILY=CHIP_SIENNA_CICHLID cwsr_trap_handler_gfx10.asm -P -o gfx10.sp3
+ * sp3 gfx10.sp3 -hex gfx10.hex
+ *
+ * gfx11:
+ * cpp -DASIC_FAMILY=CHIP_PLUM_BONITO cwsr_trap_handler_gfx10.asm -P -o gfx11.sp3
+ * sp3 gfx11.sp3 -hex gfx11.hex
*
- * Others:
- * cpp -DASIC_TARGET_NAVI1X=0 cwsr_trap_handler_gfx10.asm -P -o gfx10.sp3
- * sp3-gfx10 gfx10.sp3 -hex gfx10.hex
*/
-#define NO_SQC_STORE !ASIC_TARGET_NAVI1X
+#define CHIP_NAVI10 26
+#define CHIP_SIENNA_CICHLID 30
+#define CHIP_PLUM_BONITO 36
+
+#define NO_SQC_STORE (ASIC_FAMILY >= CHIP_SIENNA_CICHLID)
+#define HAVE_XNACK (ASIC_FAMILY < CHIP_SIENNA_CICHLID)
+#define HAVE_SENDMSG_RTN (ASIC_FAMILY >= CHIP_PLUM_BONITO)
+#define HAVE_BUFFER_LDS_LOAD (ASIC_FAMILY < CHIP_PLUM_BONITO)
+#define SW_SA_TRAP (ASIC_FAMILY == CHIP_PLUM_BONITO)
+#define SAVE_AFTER_XNACK_ERROR (HAVE_XNACK && !NO_SQC_STORE) // workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger
+#define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
-var SINGLE_STEP_MISSED_WORKAROUND = 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
+#define S_COHERENCE glc:1
+#define V_COHERENCE slc:1 glc:1
+#define S_WAITCNT_0 s_waitcnt 0
-var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23
-var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
var SQ_WAVE_STATUS_HALT_MASK = 0x2000
-
+var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000
+var SQ_WAVE_STATUS_TRAP_EN_SHIFT = 6
+var SQ_WAVE_IB_STS2_WAVE64_SHIFT = 11
+var SQ_WAVE_IB_STS2_WAVE64_SIZE = 1
+var SQ_WAVE_LDS_ALLOC_GRANULARITY = 8
+var S_STATUS_HWREG = HW_REG_STATUS
+var S_STATUS_ALWAYS_CLEAR_MASK = SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_ECC_ERR_MASK
+var S_STATUS_HALT_MASK = SQ_WAVE_STATUS_HALT_MASK
+var S_SAVE_PC_HI_TRAP_ID_MASK = 0x00FF0000
+var S_SAVE_PC_HI_HT_MASK = 0x01000000
+
+var SQ_WAVE_STATUS_NO_VGPRS_SHIFT = 24
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
-var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8
-var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6
-var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24
-var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 4
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 8
var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT = 24
var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE = 4
-var SQ_WAVE_IB_STS2_WAVE64_SHIFT = 11
-var SQ_WAVE_IB_STS2_WAVE64_SIZE = 1
+
+#if ASIC_FAMILY < CHIP_PLUM_BONITO
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8
+#else
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 12
+#endif
var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400
-var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF
+var SQ_WAVE_TRAPSTS_EXCP_MASK = 0x1FF
var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10
+var SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK = 0x80
+var SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT = 7
var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100
var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8
-var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF
-var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT = 0x0
-var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE = 10
-var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800
-var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11
-var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21
var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800
+var SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT = 11
+var SQ_WAVE_TRAPSTS_EXCP_HI_MASK = 0x7000
+#if ASIC_FAMILY >= CHIP_PLUM_BONITO
+var SQ_WAVE_TRAPSTS_HOST_TRAP_SHIFT = 16
+var SQ_WAVE_TRAPSTS_WAVE_START_MASK = 0x20000
+var SQ_WAVE_TRAPSTS_WAVE_START_SHIFT = 17
+var SQ_WAVE_TRAPSTS_WAVE_END_MASK = 0x40000
+var SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK = 0x100000
+#endif
+var SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK = 0x10000000
+
+var SQ_WAVE_MODE_EXCP_EN_SHIFT = 12
+var SQ_WAVE_MODE_EXCP_EN_ADDR_WATCH_SHIFT = 19
-var SQ_WAVE_IB_STS_RCNT_SHIFT = 16
var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15
var SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT = 25
-var SQ_WAVE_IB_STS_REPLAY_W64H_SIZE = 1
var SQ_WAVE_IB_STS_REPLAY_W64H_MASK = 0x02000000
-var SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE = 1
-var SQ_WAVE_IB_STS_RCNT_SIZE = 6
var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x003F8000
-var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF
var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800
-var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24
-var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27
+var S_TRAPSTS_RESTORE_PART_1_SIZE = SQ_WAVE_TRAPSTS_SAVECTX_SHIFT
+var S_TRAPSTS_RESTORE_PART_2_SHIFT = SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT
+
+#if ASIC_FAMILY < CHIP_PLUM_BONITO
+var S_TRAPSTS_NON_MASKABLE_EXCP_MASK = SQ_WAVE_TRAPSTS_MEM_VIOL_MASK|SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
+var S_TRAPSTS_RESTORE_PART_2_SIZE = 32 - S_TRAPSTS_RESTORE_PART_2_SHIFT
+var S_TRAPSTS_RESTORE_PART_3_SHIFT = 0
+var S_TRAPSTS_RESTORE_PART_3_SIZE = 0
+#else
+var S_TRAPSTS_NON_MASKABLE_EXCP_MASK = SQ_WAVE_TRAPSTS_MEM_VIOL_MASK |\
+ SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK |\
+ SQ_WAVE_TRAPSTS_WAVE_START_MASK |\
+ SQ_WAVE_TRAPSTS_WAVE_END_MASK |\
+ SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK
+var S_TRAPSTS_RESTORE_PART_2_SIZE = SQ_WAVE_TRAPSTS_HOST_TRAP_SHIFT - SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT
+var S_TRAPSTS_RESTORE_PART_3_SHIFT = SQ_WAVE_TRAPSTS_WAVE_START_SHIFT
+var S_TRAPSTS_RESTORE_PART_3_SIZE = 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT
+#endif
+var S_TRAPSTS_HWREG = HW_REG_TRAPSTS
+var S_TRAPSTS_SAVE_CONTEXT_MASK = SQ_WAVE_TRAPSTS_SAVECTX_MASK
+var S_TRAPSTS_SAVE_CONTEXT_SHIFT = SQ_WAVE_TRAPSTS_SAVECTX_SHIFT
// bits [31:24] unused by SPI debug data
var TTMP11_SAVE_REPLAY_W64H_SHIFT = 31
var TTMP11_SAVE_REPLAY_W64H_MASK = 0x80000000
var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT = 24
var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK = 0x7F000000
+var TTMP11_DEBUG_TRAP_ENABLED_SHIFT = 23
+var TTMP11_DEBUG_TRAP_ENABLED_MASK = 0x800000
// SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14]
// when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000
var S_SAVE_BUF_RSRC_WORD3_MISC = 0x10807FAC
-
-var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000
-var S_SAVE_SPI_INIT_ATC_SHIFT = 27
-var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000
-var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28
var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000
var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
-var S_SAVE_PC_HI_RCNT_SHIFT = 26
-var S_SAVE_PC_HI_RCNT_MASK = 0xFC000000
-var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 25
-var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x02000000
-var S_SAVE_PC_HI_REPLAY_W64H_SHIFT = 24
-var S_SAVE_PC_HI_REPLAY_W64H_MASK = 0x01000000
+var S_SAVE_PC_HI_FIRST_WAVE_MASK = 0x80000000
+var S_SAVE_PC_HI_FIRST_WAVE_SHIFT = 31
var s_sgpr_save_num = 108
@@ -130,19 +173,10 @@ var s_save_ttmps_hi = s_save_trapsts
var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE
var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC
-var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000
-var S_RESTORE_SPI_INIT_ATC_SHIFT = 27
-var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000
-var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28
var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000
var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26
var S_WAVE_SIZE = 25
-var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT
-var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK
-var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
-var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK
-
var s_restore_spi_init_lo = exec_lo
var s_restore_spi_init_hi = exec_hi
var s_restore_mem_offset = ttmp12
@@ -166,6 +200,7 @@ var s_restore_buf_rsrc3 = ttmp11
var s_restore_size = ttmp6
var s_restore_ttmps_lo = s_restore_tmp
var s_restore_ttmps_hi = s_restore_alloc_size
+var s_restore_spi_init_hi_save = s_restore_exec_hi
shader main
asic(DEFAULT)
@@ -178,113 +213,176 @@ L_JUMP_TO_RESTORE:
s_branch L_RESTORE
L_SKIP_RESTORE:
- s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC
- s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK
+ s_getreg_b32 s_save_status, hwreg(S_STATUS_HWREG) //save STATUS since we will change SCC
-if SINGLE_STEP_MISSED_WORKAROUND
- // No single step exceptions if MODE.DEBUG_EN=0.
- s_getreg_b32 ttmp2, hwreg(HW_REG_MODE)
- s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
- s_cbranch_scc0 L_NO_SINGLE_STEP_WORKAROUND
+ // Clear SPI_PRIO: do not save with elevated priority.
+ // Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd.
+ s_andn2_b32 s_save_status, s_save_status, S_STATUS_ALWAYS_CLEAR_MASK
- // Second-level trap already handled exception if STATUS.HALT=1.
- s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+ s_getreg_b32 s_save_trapsts, hwreg(S_TRAPSTS_HWREG)
- // Prioritize single step exception over context save.
- // Second-level trap will halt wave and RFE, re-entering for SAVECTX.
- s_cbranch_scc0 L_FETCH_2ND_TRAP
+#if SW_SA_TRAP
+ // If ttmp1[30] is set then issue s_barrier to unblock dependent waves.
+ s_bitcmp1_b32 s_save_pc_hi, 30
+ s_cbranch_scc0 L_TRAP_NO_BARRIER
+ s_barrier
-L_NO_SINGLE_STEP_WORKAROUND:
-end
+L_TRAP_NO_BARRIER:
+ // If ttmp1[31] is set then trap may occur early.
+ // Spin wait until SAVECTX exception is raised.
+ s_bitcmp1_b32 s_save_pc_hi, 31
+ s_cbranch_scc1 L_CHECK_SAVE
+#endif
+ s_and_b32 ttmp2, s_save_status, S_STATUS_HALT_MASK
+ s_cbranch_scc0 L_NOT_HALTED
- s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
- s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save
+L_HALTED:
+ // Host trap may occur while wave is halted.
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+L_CHECK_SAVE:
+ s_and_b32 ttmp2, s_save_trapsts, S_TRAPSTS_SAVE_CONTEXT_MASK
s_cbranch_scc1 L_SAVE
- // If STATUS.MEM_VIOL is asserted then halt the wave to prevent
- // the exception raising again and blocking context save.
- s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
- s_cbranch_scc0 L_FETCH_2ND_TRAP
- s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+ // Wave is halted but neither host trap nor SAVECTX is raised.
+ // Caused by instruction fetch memory violation.
+ // Spin wait until context saved to prevent interrupt storm.
+ s_sleep 0x10
+ s_getreg_b32 s_save_trapsts, hwreg(S_TRAPSTS_HWREG)
+ s_branch L_CHECK_SAVE
+
+L_NOT_HALTED:
+ // Let second-level handle non-SAVECTX exception or trap.
+ // Any concurrent SAVECTX will be handled upon re-entry once halted.
+
+ // Check non-maskable exceptions. memory_violation, illegal_instruction
+ // and xnack_error exceptions always cause the wave to enter the trap
+ // handler.
+ s_and_b32 ttmp2, s_save_trapsts, S_TRAPSTS_NON_MASKABLE_EXCP_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+ // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi.
+ // Maskable exceptions only cause the wave to enter the trap handler if
+ // their respective bit in mode.excp_en is set.
+ s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCP_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
+ s_cbranch_scc0 L_CHECK_TRAP_ID
+
+ s_and_b32 ttmp3, s_save_trapsts, SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
+ s_cbranch_scc0 L_NOT_ADDR_WATCH
+ s_bitset1_b32 ttmp2, SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT // Check all addr_watch[123] exceptions against excp_en.addr_watch
+
+L_NOT_ADDR_WATCH:
+ s_getreg_b32 ttmp3, hwreg(HW_REG_MODE)
+ s_lshl_b32 ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT
+ s_and_b32 ttmp2, ttmp2, ttmp3
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+L_CHECK_TRAP_ID:
+ // Check trap_id != 0
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+#if SINGLE_STEP_MISSED_WORKAROUND
+ // Prioritize single step exception over context save.
+ // Second-level trap will halt wave and RFE, re-entering for SAVECTX.
+ s_getreg_b32 ttmp2, hwreg(HW_REG_MODE)
+ s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+#endif
-L_FETCH_2ND_TRAP:
+ s_and_b32 ttmp2, s_save_trapsts, S_TRAPSTS_SAVE_CONTEXT_MASK
+ s_cbranch_scc1 L_SAVE
-#if ASIC_TARGET_NAVI1X
- // Preserve and clear scalar XNACK state before issuing scalar loads.
- // Save IB_STS.REPLAY_W64H[25], RCNT[21:16], FIRST_REPLAY[15] into
- // unused space ttmp11[31:24].
- s_andn2_b32 ttmp11, ttmp11, (TTMP11_SAVE_REPLAY_W64H_MASK | TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK)
- s_getreg_b32 ttmp2, hwreg(HW_REG_IB_STS)
- s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
- s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
- s_or_b32 ttmp11, ttmp11, ttmp3
- s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
- s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
- s_or_b32 ttmp11, ttmp11, ttmp3
- s_andn2_b32 ttmp2, ttmp2, (SQ_WAVE_IB_STS_REPLAY_W64H_MASK | SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK)
- s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
+L_FETCH_2ND_TRAP:
+#if HAVE_XNACK
+ save_and_clear_ib_sts(ttmp14, ttmp15)
#endif
// Read second-level TBA/TMA from first-level TMA and jump if available.
// ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
// ttmp12 holds SQ_WAVE_STATUS
+#if HAVE_SENDMSG_RTN
+ s_sendmsg_rtn_b64 [ttmp14, ttmp15], sendmsg(MSG_RTN_GET_TMA)
+ S_WAITCNT_0
+#else
s_getreg_b32 ttmp14, hwreg(HW_REG_SHADER_TMA_LO)
s_getreg_b32 ttmp15, hwreg(HW_REG_SHADER_TMA_HI)
+#endif
s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
- s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA
- s_waitcnt lgkmcnt(0)
- s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA
- s_waitcnt lgkmcnt(0)
+
+ s_bitcmp1_b32 ttmp15, 0xF
+ s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA
+ s_or_b32 ttmp15, ttmp15, 0xFFFF0000
+L_NO_SIGN_EXTEND_TMA:
+
+ s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 S_COHERENCE // debug trap enabled flag
+ S_WAITCNT_0
+ s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT
+ s_andn2_b32 ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK
+ s_or_b32 ttmp11, ttmp11, ttmp2
+
+ s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 S_COHERENCE // second-level TBA
+ S_WAITCNT_0
+ s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 S_COHERENCE // second-level TMA
+ S_WAITCNT_0
+
s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler
L_NO_NEXT_TRAP:
- s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
- s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK
- s_cbranch_scc1 L_EXCP_CASE // Exception, jump back to the shader program directly.
- s_add_u32 ttmp0, ttmp0, 4 // S_TRAP case, add 4 to ttmp0
- s_addc_u32 ttmp1, ttmp1, 0
-L_EXCP_CASE:
+ // If not caused by trap then halt wave to prevent re-entry.
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+ s_cbranch_scc1 L_TRAP_CASE
+
+ // Host trap will not cause trap re-entry.
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK
+ s_cbranch_scc1 L_EXIT_TRAP
+ s_or_b32 s_save_status, s_save_status, S_STATUS_HALT_MASK
+
+ // If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set.
+ // Rewind the PC to prevent this from occurring.
+ s_sub_u32 ttmp0, ttmp0, 0x8
+ s_subb_u32 ttmp1, ttmp1, 0x0
+
+ s_branch L_EXIT_TRAP
+
+L_TRAP_CASE:
+ // Advance past trap instruction to prevent re-entry.
+ s_add_u32 ttmp0, ttmp0, 0x4
+ s_addc_u32 ttmp1, ttmp1, 0x0
+
+L_EXIT_TRAP:
s_and_b32 ttmp1, ttmp1, 0xFFFF
-#if ASIC_TARGET_NAVI1X
- // Restore SQ_WAVE_IB_STS.
- s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
- s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
- s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
- s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
- s_or_b32 ttmp2, ttmp2, ttmp3
- s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
+#if HAVE_XNACK
+ restore_ib_sts(ttmp14, ttmp15)
#endif
// Restore SQ_WAVE_STATUS.
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
- s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status
+ s_setreg_b32 hwreg(S_STATUS_HWREG), s_save_status
s_rfe_b64 [ttmp0, ttmp1]
L_SAVE:
+ // If VGPRs have been deallocated then terminate the wavefront.
+ // It has no remaining program to run and cannot save without VGPRs.
+#if ASIC_FAMILY == CHIP_PLUM_BONITO
+ s_bitcmp1_b32 s_save_status, SQ_WAVE_STATUS_NO_VGPRS_SHIFT
+ s_cbranch_scc0 L_HAVE_VGPRS
+ s_endpgm
+L_HAVE_VGPRS:
+#endif
s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
s_mov_b32 s_save_tmp, 0
- s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit
+ s_setreg_b32 hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_SAVE_CONTEXT_SHIFT, 1), s_save_tmp //clear saveCtx bit
-#if ASIC_TARGET_NAVI1X
- s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE)
- s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT
- s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
- s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE)
- s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
- s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
- s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT, SQ_WAVE_IB_STS_REPLAY_W64H_SIZE)
- s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_REPLAY_W64H_SHIFT
- s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
- s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY and REPLAY_W64H in IB_STS
- s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG
-
- s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp
+#if HAVE_XNACK
+ save_and_clear_ib_sts(s_save_tmp, s_save_trapsts)
#endif
/* inform SPI the readiness and wait for SPI's go signal */
@@ -292,9 +390,13 @@ L_SAVE:
s_mov_b32 s_save_exec_hi, exec_hi
s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive
+#if HAVE_SENDMSG_RTN
+ s_sendmsg_rtn_b64 [exec_lo, exec_hi], sendmsg(MSG_RTN_SAVE_WAVE)
+#else
s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC
+#endif
-#if ASIC_TARGET_NAVI1X
+#if ASIC_FAMILY < CHIP_SIENNA_CICHLID
L_SLEEP:
// sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause
// SQ hang, since the 7,8th wave could not get arbit to exec inst, while
@@ -302,22 +404,69 @@ L_SLEEP:
s_sleep 0x2
s_cbranch_execz L_SLEEP
#else
- s_waitcnt lgkmcnt(0)
+ S_WAITCNT_0
+#endif
+
+ // Save first_wave flag so we can clear high bits of save address.
+ s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK
+ s_lshl_b32 s_save_tmp, s_save_tmp, (S_SAVE_PC_HI_FIRST_WAVE_SHIFT - S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT)
+ s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
+
+#if NO_SQC_STORE
+#if ASIC_FAMILY <= CHIP_SIENNA_CICHLID
+ // gfx10: If there was a VALU exception, the exception state must be
+ // cleared before executing the VALU instructions below.
+ v_clrexcp
+#endif
+
+ // Trap temporaries must be saved via VGPR but all VGPRs are in use.
+ // There is no ttmp space to hold the resource constant for VGPR save.
+ // Save v0 by itself since it requires only two SGPRs.
+ s_mov_b32 s_save_ttmps_lo, exec_lo
+ s_and_b32 s_save_ttmps_hi, exec_hi, 0xFFFF
+ s_mov_b32 exec_lo, 0xFFFFFFFF
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+ global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] V_COHERENCE
+ v_mov_b32 v0, 0x0
+ s_mov_b32 exec_lo, s_save_ttmps_lo
+ s_mov_b32 exec_hi, s_save_ttmps_hi
#endif
// Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
- // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
- get_wave_size(s_save_ttmps_hi)
+ // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40
+ get_wave_size2(s_save_ttmps_hi)
get_vgpr_size_bytes(s_save_ttmps_lo, s_save_ttmps_hi)
+ get_svgpr_size_bytes(s_save_ttmps_hi)
+ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi
s_and_b32 s_save_ttmps_hi, s_save_spi_init_hi, 0xFFFF
s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, get_sgpr_size_bytes()
s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
s_addc_u32 s_save_ttmps_hi, s_save_ttmps_hi, 0x0
-#if ASIC_TARGET_NAVI1X
- s_store_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 glc:1
- s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 glc:1
- s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 glc:1
+#if NO_SQC_STORE
+ v_writelane_b32 v0, ttmp4, 0x4
+ v_writelane_b32 v0, ttmp5, 0x5
+ v_writelane_b32 v0, ttmp6, 0x6
+ v_writelane_b32 v0, ttmp7, 0x7
+ v_writelane_b32 v0, ttmp8, 0x8
+ v_writelane_b32 v0, ttmp9, 0x9
+ v_writelane_b32 v0, ttmp10, 0xA
+ v_writelane_b32 v0, ttmp11, 0xB
+ v_writelane_b32 v0, ttmp13, 0xD
+ v_writelane_b32 v0, exec_lo, 0xE
+ v_writelane_b32 v0, exec_hi, 0xF
+
+ s_mov_b32 exec_lo, 0x3FFF
+ s_mov_b32 exec_hi, 0x0
+ global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] inst_offset:0x40 V_COHERENCE
+ v_readlane_b32 ttmp14, v0, 0xE
+ v_readlane_b32 ttmp15, v0, 0xF
+ s_mov_b32 exec_lo, ttmp14
+ s_mov_b32 exec_hi, ttmp15
+#else
+ s_store_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 S_COHERENCE
+ s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 S_COHERENCE
+ s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 S_COHERENCE
#endif
/* setup Resource Contants */
@@ -326,20 +475,14 @@ L_SLEEP:
s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE
s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited
s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC
- s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK
- s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT)
- s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC
- s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK
- s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT)
- s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE
s_mov_b32 s_save_m0, m0
/* global mem offset */
s_mov_b32 s_save_mem_offset, 0x0
- get_wave_size(s_wave_size)
+ get_wave_size2(s_wave_size)
-#if ASIC_TARGET_NAVI1X
+#if HAVE_XNACK
// Save and clear vector XNACK state late to free up SGPRs.
s_getreg_b32 s_save_xnack_mask, hwreg(HW_REG_SHADER_XNACK_MASK)
s_setreg_imm32_b32 hwreg(HW_REG_SHADER_XNACK_MASK), 0x0
@@ -361,10 +504,22 @@ L_SAVE_4VGPR_WAVE32:
// VGPR Allocated in 4-GPR granularity
- buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
- buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128
- buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*2
- buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*3
+#if SAVE_AFTER_XNACK_ERROR
+ check_if_tcp_store_ok()
+ s_cbranch_scc1 L_SAVE_FIRST_VGPRS32_WITH_TCP
+
+ write_vgprs_to_mem_with_sqc_w32(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
+ s_branch L_SAVE_HWREG
+
+L_SAVE_FIRST_VGPRS32_WITH_TCP:
+#endif
+
+#if !NO_SQC_STORE
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
+#endif
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:128
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:128*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:128*3
s_branch L_SAVE_HWREG
L_SAVE_4VGPR_WAVE64:
@@ -372,10 +527,22 @@ L_SAVE_4VGPR_WAVE64:
// VGPR Allocated in 4-GPR granularity
- buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
- buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
- buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
- buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
+#if SAVE_AFTER_XNACK_ERROR
+ check_if_tcp_store_ok()
+ s_cbranch_scc1 L_SAVE_FIRST_VGPRS64_WITH_TCP
+
+ write_vgprs_to_mem_with_sqc_w64(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
+ s_branch L_SAVE_HWREG
+
+L_SAVE_FIRST_VGPRS64_WITH_TCP:
+#endif
+
+#if !NO_SQC_STORE
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
+#endif
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:256
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:256*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:256*3
/* save HW registers */
@@ -397,12 +564,13 @@ L_SAVE_HWREG:
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset)
- write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset)
+ s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
+ write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset)
- s_getreg_b32 s_save_tmp, hwreg(HW_REG_TRAPSTS)
+ s_getreg_b32 s_save_tmp, hwreg(S_TRAPSTS_HWREG)
write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset)
// Not used on Sienna_Cichlid but keep layout same for debugger.
@@ -418,9 +586,13 @@ L_SAVE_HWREG:
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
#if NO_SQC_STORE
- // Write HWREG/SGPRs with 32 VGPR lanes, wave32 is common case.
+ // Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this.
+ s_mov_b32 exec_lo, 0xFFFF
s_mov_b32 exec_hi, 0x0
- buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
+
+ // Write SGPRs with 32 VGPR lanes. This works in wave32 and wave64 mode.
+ s_mov_b32 exec_lo, 0xFFFFFFFF
#endif
/* save SGPRs */
@@ -460,7 +632,7 @@ L_SAVE_SGPR_LOOP:
s_cmp_eq_u32 ttmp13, 0x20 //have 32 VGPR lanes filled?
s_cbranch_scc0 L_SAVE_SGPR_SKIP_TCP_STORE
- buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
s_add_u32 s_save_mem_offset, s_save_mem_offset, 0x80
s_mov_b32 ttmp13, 0x0
v_mov_b32 v2, 0x0
@@ -481,7 +653,7 @@ L_SAVE_SGPR_SKIP_TCP_STORE:
write_12sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset)
#if NO_SQC_STORE
- buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
#else
// restore s_save_buf_rsrc0,1
s_mov_b32 s_save_buf_rsrc0, s_save_xnack_mask
@@ -506,13 +678,12 @@ L_SAVE_LDS_NORMAL:
s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE
s_barrier //LDS is used? wait for other waves in the same TG
- s_and_b32 s_save_tmp, s_wave_size, S_SAVE_SPI_INIT_FIRST_WAVE_MASK
+ s_and_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
s_cbranch_scc0 L_SAVE_LDS_DONE
// first wave do LDS save;
- s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 6 //LDS size in dwords = lds_size * 64dw
- s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //LDS size in bytes
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY
s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes
// LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG)
@@ -537,16 +708,36 @@ L_SAVE_LDS_NORMAL:
s_cbranch_scc1 L_SAVE_LDS_W64
L_SAVE_LDS_W32:
+#if SAVE_AFTER_XNACK_ERROR
+ check_if_tcp_store_ok()
+ s_cbranch_scc1 L_SAVE_LDS_WITH_TCP_W32
+
+L_SAVE_LDS_LOOP_SQC_W32:
+ ds_read_b32 v1, v0
+ S_WAITCNT_0
+
+ write_vgprs_to_mem_with_sqc_w32(v1, 1, s_save_buf_rsrc0, s_save_mem_offset)
+
+ s_add_u32 m0, m0, 128 //every buffer_store_lds does 128 bytes
+ v_add_nc_u32 v0, v0, 128 //mem offset increased by 128 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_LDS_LOOP_SQC_W32 //LDS save is complete?
+
+ s_branch L_SAVE_LDS_DONE
+
+L_SAVE_LDS_WITH_TCP_W32:
+#endif
+
s_mov_b32 s3, 128
s_nop 0
s_nop 0
s_nop 0
L_SAVE_LDS_LOOP_W32:
ds_read_b32 v1, v0
- s_waitcnt 0
- buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ S_WAITCNT_0
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
- s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes
+ s_add_u32 m0, m0, s3 //every buffer_store_lds does 128 bytes
s_add_u32 s_save_mem_offset, s_save_mem_offset, s3
v_add_nc_u32 v0, v0, 128 //mem offset increased by 128 bytes
s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
@@ -555,14 +746,34 @@ L_SAVE_LDS_LOOP_W32:
s_branch L_SAVE_LDS_DONE
L_SAVE_LDS_W64:
+#if SAVE_AFTER_XNACK_ERROR
+ check_if_tcp_store_ok()
+ s_cbranch_scc1 L_SAVE_LDS_WITH_TCP_W64
+
+L_SAVE_LDS_LOOP_SQC_W64:
+ ds_read_b32 v1, v0
+ S_WAITCNT_0
+
+ write_vgprs_to_mem_with_sqc_w64(v1, 1, s_save_buf_rsrc0, s_save_mem_offset)
+
+ s_add_u32 m0, m0, 256 //every buffer_store_lds does 256 bytes
+ v_add_nc_u32 v0, v0, 256 //mem offset increased by 256 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_LDS_LOOP_SQC_W64 //LDS save is complete?
+
+ s_branch L_SAVE_LDS_DONE
+
+L_SAVE_LDS_WITH_TCP_W64:
+#endif
+
s_mov_b32 s3, 256
s_nop 0
s_nop 0
s_nop 0
L_SAVE_LDS_LOOP_W64:
ds_read_b32 v1, v0
- s_waitcnt 0
- buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ S_WAITCNT_0
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes
s_add_u32 s_save_mem_offset, s_save_mem_offset, s3
@@ -604,16 +815,35 @@ L_SAVE_VGPR_NORMAL:
s_cmp_lt_u32 m0, s_save_alloc_size
s_cbranch_scc0 L_SAVE_VGPR_END
+#if SAVE_AFTER_XNACK_ERROR
+ check_if_tcp_store_ok()
+ s_cbranch_scc1 L_SAVE_VGPR_W32_LOOP
+
+L_SAVE_VGPR_LOOP_SQC_W32:
+ v_movrels_b32 v0, v0 //v0 = v[0+m0]
+ v_movrels_b32 v1, v1 //v1 = v[1+m0]
+ v_movrels_b32 v2, v2 //v2 = v[2+m0]
+ v_movrels_b32 v3, v3 //v3 = v[3+m0]
+
+ write_vgprs_to_mem_with_sqc_w32(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
+
+ s_add_u32 m0, m0, 4
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc1 L_SAVE_VGPR_LOOP_SQC_W32
+
+ s_branch L_SAVE_VGPR_END
+#endif
+
L_SAVE_VGPR_W32_LOOP:
v_movrels_b32 v0, v0 //v0 = v[0+m0]
v_movrels_b32 v1, v1 //v1 = v[1+m0]
v_movrels_b32 v2, v2 //v2 = v[2+m0]
v_movrels_b32 v3, v3 //v3 = v[3+m0]
- buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
- buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128
- buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*2
- buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*3
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:128
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:128*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:128*3
s_add_u32 m0, m0, 4 //next vgpr index
s_add_u32 s_save_mem_offset, s_save_mem_offset, 128*4 //every buffer_store_dword does 128 bytes
@@ -628,7 +858,26 @@ L_SAVE_VGPR_WAVE64:
// VGPR store using dw burst
s_mov_b32 m0, 0x4 //VGPR initial index value =4
s_cmp_lt_u32 m0, s_save_alloc_size
- s_cbranch_scc0 L_SAVE_VGPR_END
+ s_cbranch_scc0 L_SAVE_SHARED_VGPR
+
+#if SAVE_AFTER_XNACK_ERROR
+ check_if_tcp_store_ok()
+ s_cbranch_scc1 L_SAVE_VGPR_W64_LOOP
+
+L_SAVE_VGPR_LOOP_SQC_W64:
+ v_movrels_b32 v0, v0 //v0 = v[0+m0]
+ v_movrels_b32 v1, v1 //v1 = v[1+m0]
+ v_movrels_b32 v2, v2 //v2 = v[2+m0]
+ v_movrels_b32 v3, v3 //v3 = v[3+m0]
+
+ write_vgprs_to_mem_with_sqc_w64(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
+
+ s_add_u32 m0, m0, 4
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc1 L_SAVE_VGPR_LOOP_SQC_W64
+
+ s_branch L_SAVE_VGPR_END
+#endif
L_SAVE_VGPR_W64_LOOP:
v_movrels_b32 v0, v0 //v0 = v[0+m0]
@@ -636,16 +885,17 @@ L_SAVE_VGPR_W64_LOOP:
v_movrels_b32 v2, v2 //v2 = v[2+m0]
v_movrels_b32 v3, v3 //v3 = v[3+m0]
- buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
- buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
- buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
- buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:256
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:256*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:256*3
s_add_u32 m0, m0, 4 //next vgpr index
s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes
s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
s_cbranch_scc1 L_SAVE_VGPR_W64_LOOP //VGPR save is complete?
+L_SAVE_SHARED_VGPR:
//Below part will be the save shared vgpr part (new for gfx10)
s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE)
s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero?
@@ -656,9 +906,26 @@ L_SAVE_VGPR_W64_LOOP:
s_add_u32 s_save_alloc_size, s_save_alloc_size, m0
s_mov_b32 exec_lo, 0xFFFFFFFF
s_mov_b32 exec_hi, 0x00000000
+
+#if SAVE_AFTER_XNACK_ERROR
+ check_if_tcp_store_ok()
+ s_cbranch_scc1 L_SAVE_SHARED_VGPR_WAVE64_LOOP
+
+L_SAVE_SHARED_VGPR_WAVE64_LOOP_SQC:
+ v_movrels_b32 v0, v0
+
+ write_vgprs_to_mem_with_sqc_w64(v0, 1, s_save_buf_rsrc0, s_save_mem_offset)
+
+ s_add_u32 m0, m0, 1
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc1 L_SAVE_SHARED_VGPR_WAVE64_LOOP_SQC
+
+ s_branch L_SAVE_VGPR_END
+#endif
+
L_SAVE_SHARED_VGPR_WAVE64_LOOP:
v_movrels_b32 v0, v0 //v0 = v[0+m0]
- buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
s_add_u32 m0, m0, 1 //next vgpr index
s_add_u32 s_save_mem_offset, s_save_mem_offset, 128
s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
@@ -674,14 +941,9 @@ L_RESTORE:
s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE
s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes)
s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
- s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK
- s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT)
- s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC
- s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK
- s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT)
- s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE
+
//determine it is wave32 or wave64
- get_wave_size(s_restore_size)
+ get_wave_size2(s_restore_size)
s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
s_cbranch_scc0 L_RESTORE_VGPR
@@ -701,8 +963,7 @@ L_RESTORE_LDS_NORMAL:
s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)
s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero?
s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR
- s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 6 //LDS size in dwords = lds_size * 64dw
- s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //LDS size in bytes
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY
s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes
// LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG)
@@ -722,7 +983,13 @@ L_RESTORE_LDS_NORMAL:
s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64
L_RESTORE_LDS_LOOP_W32:
+#if HAVE_BUFFER_LDS_LOAD
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW
+#else
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
+ S_WAITCNT_0
+ ds_store_addtid_b32 v0
+#endif
s_add_u32 m0, m0, 128 // 128 DW
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 //mem offset increased by 128DW
s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
@@ -730,7 +997,13 @@ L_RESTORE_LDS_LOOP_W32:
s_branch L_RESTORE_VGPR
L_RESTORE_LDS_LOOP_W64:
+#if HAVE_BUFFER_LDS_LOAD
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW
+#else
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
+ S_WAITCNT_0
+ ds_store_addtid_b32 v0
+#endif
s_add_u32 m0, m0, 256 // 256 DW
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //mem offset increased by 256DW
s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
@@ -765,13 +1038,15 @@ L_RESTORE_VGPR_NORMAL:
s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4
s_mov_b32 m0, 4 //VGPR initial index value = 4
+ s_cmp_lt_u32 m0, s_restore_alloc_size
+ s_cbranch_scc0 L_RESTORE_SGPR
L_RESTORE_VGPR_WAVE32_LOOP:
- buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
- buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:128
- buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:128*2
- buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:128*3
- s_waitcnt vmcnt(0)
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:128
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:128*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:128*3
+ S_WAITCNT_0
v_movreld_b32 v0, v0 //v[0+m0] = v0
v_movreld_b32 v1, v1
v_movreld_b32 v2, v2
@@ -782,10 +1057,11 @@ L_RESTORE_VGPR_WAVE32_LOOP:
s_cbranch_scc1 L_RESTORE_VGPR_WAVE32_LOOP //VGPR restore (except v0) is complete?
/* VGPR restore on v0 */
- buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1
- buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128
- buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128*2
- buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128*3
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:128
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:128*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:128*3
+ S_WAITCNT_0
s_branch L_RESTORE_SGPR
@@ -796,13 +1072,15 @@ L_RESTORE_VGPR_WAVE64:
s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v4, v0 will be the last
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
s_mov_b32 m0, 4 //VGPR initial index value = 4
+ s_cmp_lt_u32 m0, s_restore_alloc_size
+ s_cbranch_scc0 L_RESTORE_SHARED_VGPR
L_RESTORE_VGPR_WAVE64_LOOP:
- buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
- buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256
- buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*2
- buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*3
- s_waitcnt vmcnt(0)
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:256
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:256*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:256*3
+ S_WAITCNT_0
v_movreld_b32 v0, v0 //v[0+m0] = v0
v_movreld_b32 v1, v1
v_movreld_b32 v2, v2
@@ -812,6 +1090,7 @@ L_RESTORE_VGPR_WAVE64_LOOP:
s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
s_cbranch_scc1 L_RESTORE_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete?
+L_RESTORE_SHARED_VGPR:
//Below part will be the restore shared vgpr part (new for gfx10)
s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) //shared_vgpr_size
s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero?
@@ -823,8 +1102,8 @@ L_RESTORE_VGPR_WAVE64_LOOP:
s_mov_b32 exec_lo, 0xFFFFFFFF
s_mov_b32 exec_hi, 0x00000000
L_RESTORE_SHARED_VGPR_WAVE64_LOOP:
- buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
- s_waitcnt vmcnt(0)
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE
+ S_WAITCNT_0
v_movreld_b32 v0, v0 //v[0+m0] = v0
s_add_u32 m0, m0, 1 //next vgpr index
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128
@@ -835,11 +1114,11 @@ L_RESTORE_SHARED_VGPR_WAVE64_LOOP:
/* VGPR restore on v0 */
L_RESTORE_V0:
- buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1
- buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256
- buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*2
- buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*3
- s_waitcnt vmcnt(0)
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:256
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:256*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:256*3
+ S_WAITCNT_0
/* restore SGPRs */
//will be 2+8+16*6
@@ -856,7 +1135,7 @@ L_RESTORE_SGPR:
s_mov_b32 m0, s_sgpr_save_num
read_4sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
- s_waitcnt lgkmcnt(0)
+ S_WAITCNT_0
s_sub_u32 m0, m0, 4 // Restore from S[0] to S[104]
s_nop 0 // hazard SALU M0=> S_MOVREL
@@ -865,7 +1144,7 @@ L_RESTORE_SGPR:
s_movreld_b64 s2, s2
read_8sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
- s_waitcnt lgkmcnt(0)
+ S_WAITCNT_0
s_sub_u32 m0, m0, 8 // Restore from S[0] to S[96]
s_nop 0 // hazard SALU M0=> S_MOVREL
@@ -877,7 +1156,7 @@ L_RESTORE_SGPR:
L_RESTORE_SGPR_LOOP:
read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
- s_waitcnt lgkmcnt(0)
+ S_WAITCNT_0
s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0]
s_nop 0 // hazard SALU M0=> S_MOVREL
@@ -919,12 +1198,12 @@ L_RESTORE_HWREG:
read_hwreg_from_mem(s_restore_xnack_mask, s_restore_buf_rsrc0, s_restore_mem_offset)
read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset)
read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset)
- s_waitcnt lgkmcnt(0)
+ S_WAITCNT_0
s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_LO), s_restore_flat_scratch
read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset)
- s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS
+ S_WAITCNT_0
s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI), s_restore_flat_scratch
@@ -932,59 +1211,63 @@ L_RESTORE_HWREG:
s_mov_b32 exec_lo, s_restore_exec_lo
s_mov_b32 exec_hi, s_restore_exec_hi
- s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts
- s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0
-
-#if ASIC_TARGET_NAVI1X
+#if HAVE_XNACK
s_setreg_b32 hwreg(HW_REG_SHADER_XNACK_MASK), s_restore_xnack_mask
#endif
- s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts
- s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT
- s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0
+ // {TRAPSTS/EXCP_FLAG_PRIV}.SAVE_CONTEXT and HOST_TRAP may have changed.
+ // Only restore the other fields to avoid clobbering them.
+ s_setreg_b32 hwreg(S_TRAPSTS_HWREG, 0, S_TRAPSTS_RESTORE_PART_1_SIZE), s_restore_trapsts
+ s_lshr_b32 s_restore_trapsts, s_restore_trapsts, S_TRAPSTS_RESTORE_PART_2_SHIFT
+ s_setreg_b32 hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_RESTORE_PART_2_SHIFT, S_TRAPSTS_RESTORE_PART_2_SIZE), s_restore_trapsts
+
+if S_TRAPSTS_RESTORE_PART_3_SIZE > 0
+ s_lshr_b32 s_restore_trapsts, s_restore_trapsts, S_TRAPSTS_RESTORE_PART_3_SHIFT - S_TRAPSTS_RESTORE_PART_2_SHIFT
+ s_setreg_b32 hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_RESTORE_PART_3_SHIFT, S_TRAPSTS_RESTORE_PART_3_SIZE), s_restore_trapsts
+end
+
s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode
// Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
- // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
+ // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40
get_vgpr_size_bytes(s_restore_ttmps_lo, s_restore_size)
+ get_svgpr_size_bytes(s_restore_ttmps_hi)
+ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi
s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, get_sgpr_size_bytes()
s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF
- s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 glc:1
- s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 glc:1
- s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1
- s_waitcnt lgkmcnt(0)
-
-#if ASIC_TARGET_NAVI1X
- s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK
- s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT
- s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT
- s_mov_b32 s_restore_tmp, 0x0
- s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
- s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK
- s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
- s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT
- s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
- s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_REPLAY_W64H_MASK
- s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_REPLAY_W64H_SHIFT
- s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT
- s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
-
- s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK
- s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT
- s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp
+ s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 S_COHERENCE
+ s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 S_COHERENCE
+ s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 S_COHERENCE
+ S_WAITCNT_0
+
+#if HAVE_XNACK
+ restore_ib_sts(s_restore_tmp, s_restore_m0)
#endif
s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
+
+#if SW_SA_TRAP
+ // If traps are enabled then return to the shader with PRIV=0.
+ // Otherwise retain PRIV=1 for subsequent context save requests.
+ s_getreg_b32 s_restore_tmp, hwreg(HW_REG_STATUS)
+ s_bitcmp1_b32 s_restore_tmp, SQ_WAVE_STATUS_TRAP_EN_SHIFT
+ s_cbranch_scc1 L_RETURN_WITHOUT_PRIV
+
s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu
+ s_setpc_b64 [s_restore_pc_lo, s_restore_pc_hi]
+L_RETURN_WITHOUT_PRIV:
+#endif
+
+ s_setreg_b32 hwreg(S_STATUS_HWREG), s_restore_status // SCC is included, which is changed by previous salu
s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
L_END_PGM:
- s_endpgm
+ s_endpgm_saved
end
function write_hwreg_to_mem(s, s_rsrc, s_mem_offset)
@@ -995,7 +1278,7 @@ function write_hwreg_to_mem(s, s_rsrc, s_mem_offset)
#else
s_mov_b32 exec_lo, m0
s_mov_b32 m0, s_mem_offset
- s_buffer_store_dword s, s_rsrc, m0 glc:1
+ s_buffer_store_dword s, s_rsrc, m0 S_COHERENCE
s_add_u32 s_mem_offset, s_mem_offset, 4
s_mov_b32 m0, exec_lo
#endif
@@ -1010,10 +1293,10 @@ function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset)
s_add_u32 ttmp13, ttmp13, 0x1
end
#else
- s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1
- s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1
- s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1
- s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1
+ s_buffer_store_dwordx4 s[0], s_rsrc, 0 S_COHERENCE
+ s_buffer_store_dwordx4 s[4], s_rsrc, 16 S_COHERENCE
+ s_buffer_store_dwordx4 s[8], s_rsrc, 32 S_COHERENCE
+ s_buffer_store_dwordx4 s[12], s_rsrc, 48 S_COHERENCE
s_add_u32 s_rsrc[0], s_rsrc[0], 4*16
s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0
#endif
@@ -1027,40 +1310,72 @@ function write_12sgpr_to_mem(s, s_rsrc, s_mem_offset)
s_add_u32 ttmp13, ttmp13, 0x1
end
#else
- s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1
- s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1
- s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1
+ s_buffer_store_dwordx4 s[0], s_rsrc, 0 S_COHERENCE
+ s_buffer_store_dwordx4 s[4], s_rsrc, 16 S_COHERENCE
+ s_buffer_store_dwordx4 s[8], s_rsrc, 32 S_COHERENCE
s_add_u32 s_rsrc[0], s_rsrc[0], 4*12
s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0
#endif
end
function read_hwreg_from_mem(s, s_rsrc, s_mem_offset)
- s_buffer_load_dword s, s_rsrc, s_mem_offset glc:1
+ s_buffer_load_dword s, s_rsrc, s_mem_offset S_COHERENCE
s_add_u32 s_mem_offset, s_mem_offset, 4
end
function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset)
s_sub_u32 s_mem_offset, s_mem_offset, 4*16
- s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset glc:1
+ s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset S_COHERENCE
end
function read_8sgpr_from_mem(s, s_rsrc, s_mem_offset)
s_sub_u32 s_mem_offset, s_mem_offset, 4*8
- s_buffer_load_dwordx8 s, s_rsrc, s_mem_offset glc:1
+ s_buffer_load_dwordx8 s, s_rsrc, s_mem_offset S_COHERENCE
end
function read_4sgpr_from_mem(s, s_rsrc, s_mem_offset)
s_sub_u32 s_mem_offset, s_mem_offset, 4*4
- s_buffer_load_dwordx4 s, s_rsrc, s_mem_offset glc:1
+ s_buffer_load_dwordx4 s, s_rsrc, s_mem_offset S_COHERENCE
end
+#if SAVE_AFTER_XNACK_ERROR
+function check_if_tcp_store_ok
+ // If TRAPSTS.XNACK_ERROR=1 then TCP stores will fail.
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_TRAPSTS)
+ s_andn2_b32 s_save_tmp, SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK, s_save_tmp
-function get_lds_size_bytes(s_lds_size_byte)
- s_getreg_b32 s_lds_size_byte, hwreg(HW_REG_LDS_ALLOC, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)
- s_lshl_b32 s_lds_size_byte, s_lds_size_byte, 8 //LDS size in dwords = lds_size * 64 *4Bytes // granularity 64DW
+L_TCP_STORE_CHECK_DONE:
end
+function write_vgpr_to_mem_with_sqc(vgpr, n_lanes, s_rsrc, s_mem_offset)
+ s_mov_b32 s4, 0
+
+L_WRITE_VGPR_LANE_LOOP:
+ for var lane = 0; lane < 4; ++lane
+ v_readlane_b32 s[lane], vgpr, s4
+ s_add_u32 s4, s4, 1
+ end
+
+ s_buffer_store_dwordx4 s[0:3], s_rsrc, s_mem_offset glc:1
+
+ s_add_u32 s_mem_offset, s_mem_offset, 0x10
+ s_cmp_eq_u32 s4, n_lanes
+ s_cbranch_scc0 L_WRITE_VGPR_LANE_LOOP
+end
+
+function write_vgprs_to_mem_with_sqc_w32(vgpr0, n_vgprs, s_rsrc, s_mem_offset)
+ for var vgpr = 0; vgpr < n_vgprs; ++vgpr
+ write_vgpr_to_mem_with_sqc(vgpr0[vgpr], 32, s_rsrc, s_mem_offset)
+ end
+end
+
+function write_vgprs_to_mem_with_sqc_w64(vgpr0, n_vgprs, s_rsrc, s_mem_offset)
+ for var vgpr = 0; vgpr < n_vgprs; ++vgpr
+ write_vgpr_to_mem_with_sqc(vgpr0[vgpr], 64, s_rsrc, s_mem_offset)
+ end
+end
+#endif
+
function get_vgpr_size_bytes(s_vgpr_size_byte, s_size)
s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1
@@ -1086,8 +1401,34 @@ function get_hwreg_size_bytes
return 128
end
-function get_wave_size(s_reg)
+function get_wave_size2(s_reg)
s_getreg_b32 s_reg, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE)
s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE
- s_or_b32 s_reg, s_save_spi_init_hi, s_reg //share with exec_hi, it's at bit25
end
+
+#if HAVE_XNACK
+function save_and_clear_ib_sts(tmp1, tmp2)
+ // Preserve and clear scalar XNACK state before issuing scalar loads.
+ // Save IB_STS.REPLAY_W64H[25], RCNT[21:16], FIRST_REPLAY[15] into
+ // unused space ttmp11[31:24].
+ s_andn2_b32 ttmp11, ttmp11, (TTMP11_SAVE_REPLAY_W64H_MASK | TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK)
+ s_getreg_b32 tmp1, hwreg(HW_REG_IB_STS)
+ s_and_b32 tmp2, tmp1, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
+ s_lshl_b32 tmp2, tmp2, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
+ s_or_b32 ttmp11, ttmp11, tmp2
+ s_and_b32 tmp2, tmp1, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+ s_lshl_b32 tmp2, tmp2, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
+ s_or_b32 ttmp11, ttmp11, tmp2
+ s_andn2_b32 tmp1, tmp1, (SQ_WAVE_IB_STS_REPLAY_W64H_MASK | SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK)
+ s_setreg_b32 hwreg(HW_REG_IB_STS), tmp1
+end
+
+function restore_ib_sts(tmp1, tmp2)
+ s_lshr_b32 tmp1, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
+ s_and_b32 tmp2, tmp1, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+ s_lshr_b32 tmp1, ttmp11, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
+ s_and_b32 tmp1, tmp1, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
+ s_or_b32 tmp1, tmp1, tmp2
+ s_setreg_b32 hwreg(HW_REG_IB_STS), tmp1
+end
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
new file mode 100644
index 000000000000..5a1a1b1f897f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
@@ -0,0 +1,1136 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* To compile this assembly code:
+ *
+ * gfx12:
+ * cpp -DASIC_FAMILY=CHIP_GFX12 cwsr_trap_handler_gfx12.asm -P -o gfx12.sp3
+ * sp3 gfx12.sp3 -hex gfx12.hex
+ */
+
+#define CHIP_GFX12 37
+
+#define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost TRAP_AFTER_INST exception when SAVECTX raised
+#define HAVE_VALU_SGPR_HAZARD (ASIC_FAMILY == CHIP_GFX12)
+
+var SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK = 0x4
+var SQ_WAVE_STATE_PRIV_SCC_SHIFT = 9
+var SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK = 0xC00
+var SQ_WAVE_STATE_PRIV_HALT_MASK = 0x4000
+var SQ_WAVE_STATE_PRIV_POISON_ERR_MASK = 0x8000
+var SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT = 15
+var SQ_WAVE_STATUS_WAVE64_SHIFT = 29
+var SQ_WAVE_STATUS_WAVE64_SIZE = 1
+var SQ_WAVE_STATUS_NO_VGPRS_SHIFT = 24
+var SQ_WAVE_STATE_PRIV_ALWAYS_CLEAR_MASK = SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK|SQ_WAVE_STATE_PRIV_POISON_ERR_MASK
+var S_SAVE_PC_HI_TRAP_ID_MASK = 0xF0000000
+
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 8
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 12
+var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT = 24
+var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE = 4
+var SQ_WAVE_LDS_ALLOC_GRANULARITY = 9
+
+var SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK = 0xF
+var SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK = 0x10
+var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT = 5
+var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK = 0x20
+var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK = 0x40
+var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT = 6
+var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK = 0x80
+var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT = 7
+var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK = 0x100
+var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT = 8
+var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK = 0x200
+var SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK = 0x800
+var SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK = 0x80
+var SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK = 0x200
+
+var SQ_WAVE_EXCP_FLAG_PRIV_NON_MASKABLE_EXCP_MASK= SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK |\
+ SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK |\
+ SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK |\
+ SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK |\
+ SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK |\
+ SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK
+var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_1_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT
+var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
+var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
+var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT
+var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SIZE = 32 - SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT
+var BARRIER_STATE_SIGNAL_OFFSET = 16
+var BARRIER_STATE_VALID_OFFSET = 0
+
+var TTMP11_DEBUG_TRAP_ENABLED_SHIFT = 23
+var TTMP11_DEBUG_TRAP_ENABLED_MASK = 0x800000
+
+// SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14]
+// when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
+var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000
+var S_SAVE_BUF_RSRC_WORD3_MISC = 0x10807FAC
+var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000
+var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
+
+var S_SAVE_PC_HI_FIRST_WAVE_MASK = 0x80000000
+var S_SAVE_PC_HI_FIRST_WAVE_SHIFT = 31
+
+var s_sgpr_save_num = 108
+
+var s_save_spi_init_lo = exec_lo
+var s_save_spi_init_hi = exec_hi
+var s_save_pc_lo = ttmp0
+var s_save_pc_hi = ttmp1
+var s_save_exec_lo = ttmp2
+var s_save_exec_hi = ttmp3
+var s_save_state_priv = ttmp12
+var s_save_excp_flag_priv = ttmp15
+var s_save_xnack_mask = s_save_excp_flag_priv
+var s_wave_size = ttmp7
+var s_save_buf_rsrc0 = ttmp8
+var s_save_buf_rsrc1 = ttmp9
+var s_save_buf_rsrc2 = ttmp10
+var s_save_buf_rsrc3 = ttmp11
+var s_save_mem_offset = ttmp4
+var s_save_alloc_size = s_save_excp_flag_priv
+var s_save_tmp = ttmp14
+var s_save_m0 = ttmp5
+var s_save_ttmps_lo = s_save_tmp
+var s_save_ttmps_hi = s_save_excp_flag_priv
+
+var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE
+var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC
+
+var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000
+var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26
+var S_WAVE_SIZE = 25
+
+var s_restore_spi_init_lo = exec_lo
+var s_restore_spi_init_hi = exec_hi
+var s_restore_mem_offset = ttmp12
+var s_restore_alloc_size = ttmp3
+var s_restore_tmp = ttmp2
+var s_restore_mem_offset_save = s_restore_tmp
+var s_restore_m0 = s_restore_alloc_size
+var s_restore_mode = ttmp7
+var s_restore_flat_scratch = s_restore_tmp
+var s_restore_pc_lo = ttmp0
+var s_restore_pc_hi = ttmp1
+var s_restore_exec_lo = ttmp4
+var s_restore_exec_hi = ttmp5
+var s_restore_state_priv = ttmp14
+var s_restore_excp_flag_priv = ttmp15
+var s_restore_xnack_mask = ttmp13
+var s_restore_buf_rsrc0 = ttmp8
+var s_restore_buf_rsrc1 = ttmp9
+var s_restore_buf_rsrc2 = ttmp10
+var s_restore_buf_rsrc3 = ttmp11
+var s_restore_size = ttmp6
+var s_restore_ttmps_lo = s_restore_tmp
+var s_restore_ttmps_hi = s_restore_alloc_size
+var s_restore_spi_init_hi_save = s_restore_exec_hi
+
+shader main
+ asic(DEFAULT)
+ type(CS)
+ wave_size(32)
+
+ s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save
+
+L_JUMP_TO_RESTORE:
+ s_branch L_RESTORE
+
+L_SKIP_RESTORE:
+ s_getreg_b32 s_save_state_priv, hwreg(HW_REG_WAVE_STATE_PRIV) //save STATUS since we will change SCC
+
+ // Clear SPI_PRIO: do not save with elevated priority.
+ // Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd.
+ s_andn2_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_ALWAYS_CLEAR_MASK
+
+ s_getreg_b32 s_save_excp_flag_priv, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
+
+ s_and_b32 ttmp2, s_save_state_priv, SQ_WAVE_STATE_PRIV_HALT_MASK
+ s_cbranch_scc0 L_NOT_HALTED
+
+L_HALTED:
+ // Host trap may occur while wave is halted.
+ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+L_CHECK_SAVE:
+ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK
+ s_cbranch_scc1 L_SAVE
+
+ // Wave is halted but neither host trap nor SAVECTX is raised.
+ // Caused by instruction fetch memory violation.
+ // Spin wait until context saved to prevent interrupt storm.
+ s_sleep 0x10
+ s_getreg_b32 s_save_excp_flag_priv, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
+ s_branch L_CHECK_SAVE
+
+L_NOT_HALTED:
+ // Let second-level handle non-SAVECTX exception or trap.
+ // Any concurrent SAVECTX will be handled upon re-entry once halted.
+
+ // Check non-maskable exceptions. memory_violation, illegal_instruction
+ // and xnack_error exceptions always cause the wave to enter the trap
+ // handler.
+ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_NON_MASKABLE_EXCP_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+ // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi.
+ // Maskable exceptions only cause the wave to enter the trap handler if
+ // their respective bit in mode.excp_en is set.
+ s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_USER)
+ s_and_b32 ttmp3, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK
+ s_cbranch_scc0 L_NOT_ADDR_WATCH
+ s_or_b32 ttmp2, ttmp2, SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK
+
+L_NOT_ADDR_WATCH:
+ s_getreg_b32 ttmp3, hwreg(HW_REG_WAVE_TRAP_CTRL)
+ s_and_b32 ttmp2, ttmp3, ttmp2
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+L_CHECK_TRAP_ID:
+ // Check trap_id != 0
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+#if SINGLE_STEP_MISSED_WORKAROUND
+ // Prioritize single step exception over context save.
+ // Second-level trap will halt wave and RFE, re-entering for SAVECTX.
+ // WAVE_TRAP_CTRL is already in ttmp3.
+ s_and_b32 ttmp3, ttmp3, SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+#endif
+
+ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK
+ s_cbranch_scc1 L_SAVE
+
+L_FETCH_2ND_TRAP:
+ // Read second-level TBA/TMA from first-level TMA and jump if available.
+ // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
+ // ttmp12 holds SQ_WAVE_STATUS
+ s_sendmsg_rtn_b64 [ttmp14, ttmp15], sendmsg(MSG_RTN_GET_TMA)
+ s_wait_idle
+ s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
+
+ s_bitcmp1_b32 ttmp15, 0xF
+ s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA
+ s_or_b32 ttmp15, ttmp15, 0xFFFF0000
+L_NO_SIGN_EXTEND_TMA:
+
+ s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 scope:SCOPE_SYS // debug trap enabled flag
+ s_wait_idle
+ s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT
+ s_andn2_b32 ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK
+ s_or_b32 ttmp11, ttmp11, ttmp2
+
+ s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 scope:SCOPE_SYS // second-level TBA
+ s_wait_idle
+ s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 scope:SCOPE_SYS // second-level TMA
+ s_wait_idle
+
+ s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
+ s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
+ s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler
+
+L_NO_NEXT_TRAP:
+ // If not caused by trap then halt wave to prevent re-entry.
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+ s_cbranch_scc1 L_TRAP_CASE
+
+ // Host trap will not cause trap re-entry.
+ s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
+ s_and_b32 ttmp2, ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK
+ s_cbranch_scc1 L_EXIT_TRAP
+ s_or_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_HALT_MASK
+
+ // If the PC points to S_ENDPGM then context save will fail if STATE_PRIV.HALT is set.
+ // Rewind the PC to prevent this from occurring.
+ s_sub_u32 ttmp0, ttmp0, 0x8
+ s_subb_u32 ttmp1, ttmp1, 0x0
+
+ s_branch L_EXIT_TRAP
+
+L_TRAP_CASE:
+ // Advance past trap instruction to prevent re-entry.
+ s_add_u32 ttmp0, ttmp0, 0x4
+ s_addc_u32 ttmp1, ttmp1, 0x0
+
+L_EXIT_TRAP:
+ s_and_b32 ttmp1, ttmp1, 0xFFFF
+
+ // Restore SQ_WAVE_STATUS.
+ s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
+ s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
+
+ // STATE_PRIV.BARRIER_COMPLETE may have changed since we read it.
+ // Only restore fields which the trap handler changes.
+ s_lshr_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_SCC_SHIFT
+ s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV, SQ_WAVE_STATE_PRIV_SCC_SHIFT, \
+ SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_state_priv
+
+ s_rfe_b64 [ttmp0, ttmp1]
+
+L_SAVE:
+ // If VGPRs have been deallocated then terminate the wavefront.
+ // It has no remaining program to run and cannot save without VGPRs.
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS)
+ s_bitcmp1_b32 s_save_tmp, SQ_WAVE_STATUS_NO_VGPRS_SHIFT
+ s_cbranch_scc0 L_HAVE_VGPRS
+ s_endpgm
+L_HAVE_VGPRS:
+
+ s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
+ s_mov_b32 s_save_tmp, 0
+ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT, 1), s_save_tmp //clear saveCtx bit
+
+ /* inform SPI the readiness and wait for SPI's go signal */
+ s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI
+ s_mov_b32 s_save_exec_hi, exec_hi
+ s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive
+
+ s_sendmsg_rtn_b64 [exec_lo, exec_hi], sendmsg(MSG_RTN_SAVE_WAVE)
+ s_wait_idle
+
+ // Save first_wave flag so we can clear high bits of save address.
+ s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK
+ s_lshl_b32 s_save_tmp, s_save_tmp, (S_SAVE_PC_HI_FIRST_WAVE_SHIFT - S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT)
+ s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
+
+ // Trap temporaries must be saved via VGPR but all VGPRs are in use.
+ // There is no ttmp space to hold the resource constant for VGPR save.
+ // Save v0 by itself since it requires only two SGPRs.
+ s_mov_b32 s_save_ttmps_lo, exec_lo
+ s_and_b32 s_save_ttmps_hi, exec_hi, 0xFFFF
+ s_mov_b32 exec_lo, 0xFFFFFFFF
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+ global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] scope:SCOPE_SYS
+ v_mov_b32 v0, 0x0
+ s_mov_b32 exec_lo, s_save_ttmps_lo
+ s_mov_b32 exec_hi, s_save_ttmps_hi
+
+ // Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
+ // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40
+ get_wave_size2(s_save_ttmps_hi)
+ get_vgpr_size_bytes(s_save_ttmps_lo, s_save_ttmps_hi)
+ get_svgpr_size_bytes(s_save_ttmps_hi)
+ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi
+ s_and_b32 s_save_ttmps_hi, s_save_spi_init_hi, 0xFFFF
+ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, get_sgpr_size_bytes()
+ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
+ s_addc_u32 s_save_ttmps_hi, s_save_ttmps_hi, 0x0
+
+ v_writelane_b32 v0, ttmp4, 0x4
+ v_writelane_b32 v0, ttmp5, 0x5
+ v_writelane_b32 v0, ttmp6, 0x6
+ v_writelane_b32 v0, ttmp7, 0x7
+ v_writelane_b32 v0, ttmp8, 0x8
+ v_writelane_b32 v0, ttmp9, 0x9
+ v_writelane_b32 v0, ttmp10, 0xA
+ v_writelane_b32 v0, ttmp11, 0xB
+ v_writelane_b32 v0, ttmp13, 0xD
+ v_writelane_b32 v0, exec_lo, 0xE
+ v_writelane_b32 v0, exec_hi, 0xF
+ valu_sgpr_hazard()
+
+ s_mov_b32 exec_lo, 0x3FFF
+ s_mov_b32 exec_hi, 0x0
+ global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] offset:0x40 scope:SCOPE_SYS
+ v_readlane_b32 ttmp14, v0, 0xE
+ v_readlane_b32 ttmp15, v0, 0xF
+ s_mov_b32 exec_lo, ttmp14
+ s_mov_b32 exec_hi, ttmp15
+
+ /* setup Resource Contants */
+ s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo
+ s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi
+ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE
+ s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited
+ s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC
+
+ s_mov_b32 s_save_m0, m0
+
+ /* global mem offset */
+ s_mov_b32 s_save_mem_offset, 0x0
+ get_wave_size2(s_wave_size)
+
+ /* save first 4 VGPRs, needed for SGPR save */
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_ENABLE_SAVE_4VGPR_EXEC_HI
+ s_mov_b32 exec_hi, 0x00000000
+ s_branch L_SAVE_4VGPR_WAVE32
+L_ENABLE_SAVE_4VGPR_EXEC_HI:
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+ s_branch L_SAVE_4VGPR_WAVE64
+L_SAVE_4VGPR_WAVE32:
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR Allocated in 4-GPR granularity
+
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*3
+ s_branch L_SAVE_HWREG
+
+L_SAVE_4VGPR_WAVE64:
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR Allocated in 4-GPR granularity
+
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*3
+
+ /* save HW registers */
+
+L_SAVE_HWREG:
+ // HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)
+ get_vgpr_size_bytes(s_save_mem_offset, s_wave_size)
+ get_svgpr_size_bytes(s_save_tmp)
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes()
+
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ v_mov_b32 v0, 0x0 //Offset[31:0] from buffer resource
+ v_mov_b32 v1, 0x0 //Offset[63:32] from buffer resource
+ v_mov_b32 v2, 0x0 //Set of SGPRs for TCP store
+
+ // Ensure no further changes to barrier or LDS state.
+ // STATE_PRIV.BARRIER_COMPLETE may change up to this point.
+ s_barrier_signal -2
+ s_barrier_wait -2
+
+ // Re-read final state of BARRIER_COMPLETE field for save.
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATE_PRIV)
+ s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK
+ s_andn2_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK
+ s_or_b32 s_save_state_priv, s_save_state_priv, s_save_tmp
+
+ s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
+ v_writelane_b32 v2, s_save_m0, 0x0
+ v_writelane_b32 v2, s_save_pc_lo, 0x1
+ v_writelane_b32 v2, s_save_tmp, 0x2
+ v_writelane_b32 v2, s_save_exec_lo, 0x3
+ v_writelane_b32 v2, s_save_exec_hi, 0x4
+ v_writelane_b32 v2, s_save_state_priv, 0x5
+ v_writelane_b32 v2, s_save_xnack_mask, 0x7
+ valu_sgpr_hazard()
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
+ v_writelane_b32 v2, s_save_tmp, 0x6
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_MODE)
+ v_writelane_b32 v2, s_save_tmp, 0x8
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_SCRATCH_BASE_LO)
+ v_writelane_b32 v2, s_save_tmp, 0x9
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_SCRATCH_BASE_HI)
+ v_writelane_b32 v2, s_save_tmp, 0xA
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_EXCP_FLAG_USER)
+ v_writelane_b32 v2, s_save_tmp, 0xB
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_TRAP_CTRL)
+ v_writelane_b32 v2, s_save_tmp, 0xC
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS)
+ v_writelane_b32 v2, s_save_tmp, 0xD
+
+ s_get_barrier_state s_save_tmp, -1
+ s_wait_kmcnt (0)
+ v_writelane_b32 v2, s_save_tmp, 0xE
+ valu_sgpr_hazard()
+
+ // Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this.
+ s_mov_b32 exec_lo, 0xFFFF
+ s_mov_b32 exec_hi, 0x0
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+
+ // Write SGPRs with 32 VGPR lanes. This works in wave32 and wave64 mode.
+ s_mov_b32 exec_lo, 0xFFFFFFFF
+
+ /* save SGPRs */
+ // Save SGPR before LDS save, then the s0 to s4 can be used during LDS save...
+
+ // SGPR SR memory offset : size(VGPR)+size(SVGPR)
+ get_vgpr_size_bytes(s_save_mem_offset, s_wave_size)
+ get_svgpr_size_bytes(s_save_tmp)
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ s_mov_b32 ttmp13, 0x0 //next VGPR lane to copy SGPR into
+
+ s_mov_b32 m0, 0x0 //SGPR initial index value =0
+ s_nop 0x0 //Manually inserted wait states
+L_SAVE_SGPR_LOOP:
+ // SGPR is allocated in 16 SGPR granularity
+ s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0]
+ s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0]
+ s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0]
+ s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0]
+ s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0]
+ s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0]
+ s_movrels_b64 s12, s12 //s12 = s[12+m0], s13 = s[13+m0]
+ s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0]
+
+ s_cmp_eq_u32 ttmp13, 0x0
+ s_cbranch_scc0 L_WRITE_V2_SECOND_HALF
+ write_16sgpr_to_v2(s0, 0x0)
+ s_branch L_SAVE_SGPR_SKIP_TCP_STORE
+L_WRITE_V2_SECOND_HALF:
+ write_16sgpr_to_v2(s0, 0x10)
+
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 0x80
+ s_mov_b32 ttmp13, 0x0
+ v_mov_b32 v2, 0x0
+L_SAVE_SGPR_SKIP_TCP_STORE:
+
+ s_add_u32 m0, m0, 16 //next sgpr index
+ s_cmp_lt_u32 m0, 96 //scc = (m0 < first 96 SGPR) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_SGPR_LOOP //first 96 SGPR save is complete?
+
+ //save the rest 12 SGPR
+ s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0]
+ s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0]
+ s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0]
+ s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0]
+ s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0]
+ s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0]
+ write_12sgpr_to_v2(s0)
+
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+
+ /* save LDS */
+
+L_SAVE_LDS:
+ // Change EXEC to all threads...
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_ENABLE_SAVE_LDS_EXEC_HI
+ s_mov_b32 exec_hi, 0x00000000
+ s_branch L_SAVE_LDS_NORMAL
+L_ENABLE_SAVE_LDS_EXEC_HI:
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+L_SAVE_LDS_NORMAL:
+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)
+ s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero?
+ s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE
+
+ s_and_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
+ s_cbranch_scc0 L_SAVE_LDS_DONE
+
+ // first wave do LDS save;
+
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY
+ s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes
+
+ // LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG)
+ //
+ get_vgpr_size_bytes(s_save_mem_offset, s_wave_size)
+ get_svgpr_size_bytes(s_save_tmp)
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes()
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes()
+
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ //load 0~63*4(byte address) to vgpr v0
+ v_mbcnt_lo_u32_b32 v0, -1, 0
+ v_mbcnt_hi_u32_b32 v0, -1, v0
+ v_mul_u32_u24 v0, 4, v0
+
+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_mov_b32 m0, 0x0
+ s_cbranch_scc1 L_SAVE_LDS_W64
+
+L_SAVE_LDS_W32:
+ s_mov_b32 s3, 128
+ s_nop 0
+ s_nop 0
+ s_nop 0
+L_SAVE_LDS_LOOP_W32:
+ ds_read_b32 v1, v0
+ s_wait_idle
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+
+ s_add_u32 m0, m0, s3 //every buffer_store_lds does 128 bytes
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s3
+ v_add_nc_u32 v0, v0, 128 //mem offset increased by 128 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_LDS_LOOP_W32 //LDS save is complete?
+
+ s_branch L_SAVE_LDS_DONE
+
+L_SAVE_LDS_W64:
+ s_mov_b32 s3, 256
+ s_nop 0
+ s_nop 0
+ s_nop 0
+L_SAVE_LDS_LOOP_W64:
+ ds_read_b32 v1, v0
+ s_wait_idle
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+
+ s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s3
+ v_add_nc_u32 v0, v0, 256 //mem offset increased by 256 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_LDS_LOOP_W64 //LDS save is complete?
+
+L_SAVE_LDS_DONE:
+ /* save VGPRs - set the Rest VGPRs */
+L_SAVE_VGPR:
+ // VGPR SR memory offset: 0
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_ENABLE_SAVE_VGPR_EXEC_HI
+ s_mov_b32 s_save_mem_offset, (0+128*4) // for the rest VGPRs
+ s_mov_b32 exec_hi, 0x00000000
+ s_branch L_SAVE_VGPR_NORMAL
+L_ENABLE_SAVE_VGPR_EXEC_HI:
+ s_mov_b32 s_save_mem_offset, (0+256*4) // for the rest VGPRs
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+L_SAVE_VGPR_NORMAL:
+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, 1
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value)
+ //determine it is wave32 or wave64
+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_SAVE_VGPR_WAVE64
+
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR Allocated in 4-GPR granularity
+
+ // VGPR store using dw burst
+ s_mov_b32 m0, 0x4 //VGPR initial index value =4
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc0 L_SAVE_VGPR_END
+
+L_SAVE_VGPR_W32_LOOP:
+ v_movrels_b32 v0, v0 //v0 = v[0+m0]
+ v_movrels_b32 v1, v1 //v1 = v[1+m0]
+ v_movrels_b32 v2, v2 //v2 = v[2+m0]
+ v_movrels_b32 v3, v3 //v3 = v[3+m0]
+
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*3
+
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 128*4 //every buffer_store_dword does 128 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_VGPR_W32_LOOP //VGPR save is complete?
+
+ s_branch L_SAVE_VGPR_END
+
+L_SAVE_VGPR_WAVE64:
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR store using dw burst
+ s_mov_b32 m0, 0x4 //VGPR initial index value =4
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc0 L_SAVE_SHARED_VGPR
+
+L_SAVE_VGPR_W64_LOOP:
+ v_movrels_b32 v0, v0 //v0 = v[0+m0]
+ v_movrels_b32 v1, v1 //v1 = v[1+m0]
+ v_movrels_b32 v2, v2 //v2 = v[2+m0]
+ v_movrels_b32 v3, v3 //v3 = v[3+m0]
+
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*3
+
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_VGPR_W64_LOOP //VGPR save is complete?
+
+L_SAVE_SHARED_VGPR:
+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE)
+ s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero?
+ s_cbranch_scc0 L_SAVE_VGPR_END //no shared_vgpr used? jump to L_SAVE_LDS
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value)
+ //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count.
+ //save shared_vgpr will start from the index of m0
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, m0
+ s_mov_b32 exec_lo, 0xFFFFFFFF
+ s_mov_b32 exec_hi, 0x00000000
+
+L_SAVE_SHARED_VGPR_WAVE64_LOOP:
+ v_movrels_b32 v0, v0 //v0 = v[0+m0]
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+ s_add_u32 m0, m0, 1 //next vgpr index
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 128
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_SHARED_VGPR_WAVE64_LOOP //SHARED_VGPR save is complete?
+
+L_SAVE_VGPR_END:
+ s_branch L_END_PGM
+
+L_RESTORE:
+ /* Setup Resource Contants */
+ s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo
+ s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi
+ s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE
+ s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes)
+ s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
+
+ // Save s_restore_spi_init_hi for later use.
+ s_mov_b32 s_restore_spi_init_hi_save, s_restore_spi_init_hi
+
+ //determine it is wave32 or wave64
+ get_wave_size2(s_restore_size)
+
+ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
+ s_cbranch_scc0 L_RESTORE_VGPR
+
+ /* restore LDS */
+L_RESTORE_LDS:
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_ENABLE_RESTORE_LDS_EXEC_HI
+ s_mov_b32 exec_hi, 0x00000000
+ s_branch L_RESTORE_LDS_NORMAL
+L_ENABLE_RESTORE_LDS_EXEC_HI:
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+L_RESTORE_LDS_NORMAL:
+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)
+ s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero?
+ s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY
+ s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes
+
+ // LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG)
+ //
+ get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size)
+ get_svgpr_size_bytes(s_restore_tmp)
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes()
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes()
+
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_mov_b32 m0, 0x0
+ s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64
+
+L_RESTORE_LDS_LOOP_W32:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
+ s_wait_idle
+ ds_store_addtid_b32 v0
+ s_add_u32 m0, m0, 128 // 128 DW
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 //mem offset increased by 128DW
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_LDS_LOOP_W32 //LDS restore is complete?
+ s_branch L_RESTORE_VGPR
+
+L_RESTORE_LDS_LOOP_W64:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
+ s_wait_idle
+ ds_store_addtid_b32 v0
+ s_add_u32 m0, m0, 256 // 256 DW
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //mem offset increased by 256DW
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64 //LDS restore is complete?
+
+ /* restore VGPRs */
+L_RESTORE_VGPR:
+ // VGPR SR memory offset : 0
+ s_mov_b32 s_restore_mem_offset, 0x0
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_ENABLE_RESTORE_VGPR_EXEC_HI
+ s_mov_b32 exec_hi, 0x00000000
+ s_branch L_RESTORE_VGPR_NORMAL
+L_ENABLE_RESTORE_VGPR_EXEC_HI:
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+L_RESTORE_VGPR_NORMAL:
+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
+ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value)
+ //determine it is wave32 or wave64
+ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_RESTORE_VGPR_WAVE64
+
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR load using dw burst
+ s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4
+ s_mov_b32 m0, 4 //VGPR initial index value = 4
+ s_cmp_lt_u32 m0, s_restore_alloc_size
+ s_cbranch_scc0 L_RESTORE_SGPR
+
+L_RESTORE_VGPR_WAVE32_LOOP:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128*3
+ s_wait_idle
+ v_movreld_b32 v0, v0 //v[0+m0] = v0
+ v_movreld_b32 v1, v1
+ v_movreld_b32 v2, v2
+ v_movreld_b32 v3, v3
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4 //every buffer_load_dword does 128 bytes
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_VGPR_WAVE32_LOOP //VGPR restore (except v0) is complete?
+
+ /* VGPR restore on v0 */
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128*3
+ s_wait_idle
+
+ s_branch L_RESTORE_SGPR
+
+L_RESTORE_VGPR_WAVE64:
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR load using dw burst
+ s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v4, v0 will be the last
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
+ s_mov_b32 m0, 4 //VGPR initial index value = 4
+ s_cmp_lt_u32 m0, s_restore_alloc_size
+ s_cbranch_scc0 L_RESTORE_SHARED_VGPR
+
+L_RESTORE_VGPR_WAVE64_LOOP:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256*3
+ s_wait_idle
+ v_movreld_b32 v0, v0 //v[0+m0] = v0
+ v_movreld_b32 v1, v1
+ v_movreld_b32 v2, v2
+ v_movreld_b32 v3, v3
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 //every buffer_load_dword does 256 bytes
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete?
+
+L_RESTORE_SHARED_VGPR:
+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) //shared_vgpr_size
+ s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero?
+ s_cbranch_scc0 L_RESTORE_V0 //no shared_vgpr used?
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value)
+ //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count.
+ //restore shared_vgpr will start from the index of m0
+ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, m0
+ s_mov_b32 exec_lo, 0xFFFFFFFF
+ s_mov_b32 exec_hi, 0x00000000
+L_RESTORE_SHARED_VGPR_WAVE64_LOOP:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS
+ s_wait_idle
+ v_movreld_b32 v0, v0 //v[0+m0] = v0
+ s_add_u32 m0, m0, 1 //next vgpr index
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_SHARED_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete?
+
+ s_mov_b32 exec_hi, 0xFFFFFFFF //restore back exec_hi before restoring V0!!
+
+ /* VGPR restore on v0 */
+L_RESTORE_V0:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256*3
+ s_wait_idle
+
+ /* restore SGPRs */
+ //will be 2+8+16*6
+ // SGPR SR memory offset : size(VGPR)+size(SVGPR)
+L_RESTORE_SGPR:
+ get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size)
+ get_svgpr_size_bytes(s_restore_tmp)
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes()
+ s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 20*4 //s108~s127 is not saved
+
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ s_mov_b32 m0, s_sgpr_save_num
+
+ read_4sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_sub_u32 m0, m0, 4 // Restore from S[0] to S[104]
+ s_nop 0 // hazard SALU M0=> S_MOVREL
+
+ s_movreld_b64 s0, s0 //s[0+m0] = s0
+ s_movreld_b64 s2, s2
+
+ read_8sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_sub_u32 m0, m0, 8 // Restore from S[0] to S[96]
+ s_nop 0 // hazard SALU M0=> S_MOVREL
+
+ s_movreld_b64 s0, s0 //s[0+m0] = s0
+ s_movreld_b64 s2, s2
+ s_movreld_b64 s4, s4
+ s_movreld_b64 s6, s6
+
+ L_RESTORE_SGPR_LOOP:
+ read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0]
+ s_nop 0 // hazard SALU M0=> S_MOVREL
+
+ s_movreld_b64 s0, s0 //s[0+m0] = s0
+ s_movreld_b64 s2, s2
+ s_movreld_b64 s4, s4
+ s_movreld_b64 s6, s6
+ s_movreld_b64 s8, s8
+ s_movreld_b64 s10, s10
+ s_movreld_b64 s12, s12
+ s_movreld_b64 s14, s14
+
+ s_cmp_eq_u32 m0, 0 //scc = (m0 < s_sgpr_save_num) ? 1 : 0
+ s_cbranch_scc0 L_RESTORE_SGPR_LOOP
+
+ // s_barrier with STATE_PRIV.TRAP_AFTER_INST=1, STATUS.PRIV=1 incorrectly asserts debug exception.
+ // Clear DEBUG_EN before and restore MODE after the barrier.
+ s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE), 0
+
+ /* restore HW registers */
+L_RESTORE_HWREG:
+ // HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)
+ get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size)
+ get_svgpr_size_bytes(s_restore_tmp)
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes()
+
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // Restore s_restore_spi_init_hi before the saved value gets clobbered.
+ s_mov_b32 s_restore_spi_init_hi, s_restore_spi_init_hi_save
+
+ read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_state_priv, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_excp_flag_priv, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_xnack_mask, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_setreg_b32 hwreg(HW_REG_WAVE_SCRATCH_BASE_LO), s_restore_flat_scratch
+
+ read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_setreg_b32 hwreg(HW_REG_WAVE_SCRATCH_BASE_HI), s_restore_flat_scratch
+
+ read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_USER), s_restore_tmp
+
+ read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+ s_setreg_b32 hwreg(HW_REG_WAVE_TRAP_CTRL), s_restore_tmp
+
+ // Only the first wave needs to restore the workgroup barrier.
+ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
+ s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
+
+ // Skip over WAVE_STATUS, since there is no state to restore from it
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 4
+
+ read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_bitcmp1_b32 s_restore_tmp, BARRIER_STATE_VALID_OFFSET
+ s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
+
+ // extract the saved signal count from s_restore_tmp
+ s_lshr_b32 s_restore_tmp, s_restore_tmp, BARRIER_STATE_SIGNAL_OFFSET
+
+ // We need to call s_barrier_signal repeatedly to restore the signal
+ // count of the work group barrier. The member count is already
+ // initialized with the number of waves in the work group.
+L_BARRIER_RESTORE_LOOP:
+ s_and_b32 s_restore_tmp, s_restore_tmp, s_restore_tmp
+ s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
+ s_barrier_signal -1
+ s_add_i32 s_restore_tmp, s_restore_tmp, -1
+ s_branch L_BARRIER_RESTORE_LOOP
+
+L_SKIP_BARRIER_RESTORE:
+
+ s_mov_b32 m0, s_restore_m0
+ s_mov_b32 exec_lo, s_restore_exec_lo
+ s_mov_b32 exec_hi, s_restore_exec_hi
+
+ // EXCP_FLAG_PRIV.SAVE_CONTEXT and HOST_TRAP may have changed.
+ // Only restore the other fields to avoid clobbering them.
+ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, 0, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_1_SIZE), s_restore_excp_flag_priv
+ s_lshr_b32 s_restore_excp_flag_priv, s_restore_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT
+ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SIZE), s_restore_excp_flag_priv
+ s_lshr_b32 s_restore_excp_flag_priv, s_restore_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT
+ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SIZE), s_restore_excp_flag_priv
+
+ s_setreg_b32 hwreg(HW_REG_WAVE_MODE), s_restore_mode
+
+ // Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
+ // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40
+ get_vgpr_size_bytes(s_restore_ttmps_lo, s_restore_size)
+ get_svgpr_size_bytes(s_restore_ttmps_hi)
+ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi
+ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, get_sgpr_size_bytes()
+ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
+ s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
+ s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF
+ s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 scope:SCOPE_SYS
+ s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 scope:SCOPE_SYS
+ s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 scope:SCOPE_SYS
+ s_wait_idle
+
+ s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
+ s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
+ s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
+
+ s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV), s_restore_state_priv // SCC is included, which is changed by previous salu
+
+ // Make barrier and LDS state visible to all waves in the group.
+ // STATE_PRIV.BARRIER_COMPLETE may change after this point.
+ s_barrier_signal -2
+ s_barrier_wait -2
+
+ s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
+
+L_END_PGM:
+ // Make sure that no wave of the workgroup can exit the trap handler
+ // before the workgroup barrier state is saved.
+ s_barrier_signal -2
+ s_barrier_wait -2
+ s_endpgm_saved
+end
+
+function write_16sgpr_to_v2(s, lane_offset)
+ // Copy into VGPR for later TCP store.
+ for var sgpr_idx = 0; sgpr_idx < 16; sgpr_idx ++
+ v_writelane_b32 v2, s[sgpr_idx], sgpr_idx + lane_offset
+ end
+ valu_sgpr_hazard()
+ s_add_u32 ttmp13, ttmp13, 0x10
+end
+
+function write_12sgpr_to_v2(s)
+ // Copy into VGPR for later TCP store.
+ for var sgpr_idx = 0; sgpr_idx < 12; sgpr_idx ++
+ v_writelane_b32 v2, s[sgpr_idx], sgpr_idx
+ end
+ valu_sgpr_hazard()
+end
+
+function read_hwreg_from_mem(s, s_rsrc, s_mem_offset)
+ s_buffer_load_dword s, s_rsrc, s_mem_offset scope:SCOPE_SYS
+ s_add_u32 s_mem_offset, s_mem_offset, 4
+end
+
+function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset)
+ s_sub_u32 s_mem_offset, s_mem_offset, 4*16
+ s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset scope:SCOPE_SYS
+end
+
+function read_8sgpr_from_mem(s, s_rsrc, s_mem_offset)
+ s_sub_u32 s_mem_offset, s_mem_offset, 4*8
+ s_buffer_load_dwordx8 s, s_rsrc, s_mem_offset scope:SCOPE_SYS
+end
+
+function read_4sgpr_from_mem(s, s_rsrc, s_mem_offset)
+ s_sub_u32 s_mem_offset, s_mem_offset, 4*4
+ s_buffer_load_dwordx4 s, s_rsrc, s_mem_offset scope:SCOPE_SYS
+end
+
+function get_vgpr_size_bytes(s_vgpr_size_byte, s_size)
+ s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
+ s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1
+ s_bitcmp1_b32 s_size, S_WAVE_SIZE
+ s_cbranch_scc1 L_ENABLE_SHIFT_W64
+ s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+7) //Number of VGPRs = (vgpr_size + 1) * 4 * 32 * 4 (non-zero value)
+ s_branch L_SHIFT_DONE
+L_ENABLE_SHIFT_W64:
+ s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value)
+L_SHIFT_DONE:
+end
+
+function get_svgpr_size_bytes(s_svgpr_size_byte)
+ s_getreg_b32 s_svgpr_size_byte, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE)
+ s_lshl_b32 s_svgpr_size_byte, s_svgpr_size_byte, (3+7)
+end
+
+function get_sgpr_size_bytes
+ return 512
+end
+
+function get_hwreg_size_bytes
+ return 128
+end
+
+function get_wave_size2(s_reg)
+ s_getreg_b32 s_reg, hwreg(HW_REG_WAVE_STATUS,SQ_WAVE_STATUS_WAVE64_SHIFT,SQ_WAVE_STATUS_WAVE64_SIZE)
+ s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE
+end
+
+function valu_sgpr_hazard
+#if HAVE_VALU_SGPR_HAZARD
+ for var rep = 0; rep < 8; rep ++
+ ds_nop
+ end
+#endif
+end
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
index eed78a04e7c7..6869e07a2fff 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -33,21 +33,35 @@
* aldebaran:
* cpp -DASIC_FAMILY=CHIP_ALDEBARAN cwsr_trap_handler_gfx9.asm -P -o aldebaran.sp3
* sp3 aldebaran.sp3 -hex aldebaran.hex
+ *
+ * gc_9_4_3:
+ * cpp -DASIC_FAMILY=GC_9_4_3 cwsr_trap_handler_gfx9.asm -P -o gc_9_4_3.sp3
+ * sp3 gc_9_4_3.sp3 -hex gc_9_4_3.hex
+ *
+ * gc_9_5_0:
+ * cpp -DASIC_FAMILY=GC_9_5_0 cwsr_trap_handler_gfx9.asm -P -o gc_9_5_0.sp3
+ * sp3 gc_9_5_0.sp3 -hex gc_9_5_0.hex
*/
#define CHIP_VEGAM 18
#define CHIP_ARCTURUS 23
#define CHIP_ALDEBARAN 25
+#define CHIP_GC_9_4_3 26
+#define CHIP_GC_9_5_0 27
var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing incorrect stores under concurrency
var SAVE_AFTER_XNACK_ERROR = 1 //workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger
-var SINGLE_STEP_MISSED_WORKAROUND = 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
+var SINGLE_STEP_MISSED_WORKAROUND = (ASIC_FAMILY <= CHIP_ALDEBARAN) //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
+
+#if ASIC_FAMILY < CHIP_GC_9_4_3
+#define VMEM_MODIFIERS slc:1 glc:1
+#else
+#define VMEM_MODIFIERS sc0:1 nt:1
+#endif
/**************************************************************************/
/* variables */
/**************************************************************************/
-var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23
-var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1
var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
var SQ_WAVE_STATUS_HALT_MASK = 0x2000
@@ -56,9 +70,16 @@ var SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE = 1
var SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT = 3
var SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE = 29
var SQ_WAVE_STATUS_ALLOW_REPLAY_MASK = 0x400000
+var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
+#if ASIC_FAMILY >= CHIP_GC_9_5_0
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 11
+var LDS_RESTORE_GRANULARITY_BYTES = 1280
+#else
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
+var LDS_RESTORE_GRANULARITY_BYTES = 512
+#endif
var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6
var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 3 //FIXME sq.blk still has 4 bits at this time while SQ programming guide has 3 bits
var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24
@@ -72,10 +93,16 @@ var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8
#endif
var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400
-var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF // Exception mask
+var SQ_WAVE_TRAPSTS_EXCP_MASK = 0x1FF
var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10
+var SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK = 0x80
+var SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT = 7
var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100
var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8
+var SQ_WAVE_TRAPSTS_HOST_TRAP_MASK = 0x400000
+var SQ_WAVE_TRAPSTS_WAVE_BEGIN_MASK = 0x800000
+var SQ_WAVE_TRAPSTS_WAVE_END_MASK = 0x1000000
+var SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK = 0x2000000
var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF
var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT = 0x0
var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE = 10
@@ -83,37 +110,30 @@ var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800
var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11
var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21
var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800
+var SQ_WAVE_TRAPSTS_EXCP_HI_MASK = 0x7000
var SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK = 0x10000000
-var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME
+var SQ_WAVE_MODE_EXCP_EN_SHIFT = 12
+var SQ_WAVE_MODE_EXCP_EN_ADDR_WATCH_SHIFT = 19
+
var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME
var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x1F8000
-var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF //FIXME
var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800
-var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24
-var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27
-
-var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT = 26 // bits [31:26] unused by SPI debug data
-var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK = 0xFC000000
+var TTMP_SAVE_RCNT_FIRST_REPLAY_SHIFT = 26 // bits [31:26] unused by SPI debug data
+var TTMP_SAVE_RCNT_FIRST_REPLAY_MASK = 0xFC000000
+var TTMP_DEBUG_TRAP_ENABLED_SHIFT = 23
+var TTMP_DEBUG_TRAP_ENABLED_MASK = 0x800000
/* Save */
var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 //stride is 4 bytes
var S_SAVE_BUF_RSRC_WORD3_MISC = 0x00807FAC //SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
-
-var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit
-var S_SAVE_SPI_INIT_ATC_SHIFT = 27
-var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype
-var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28
+var S_SAVE_PC_HI_TRAP_ID_MASK = 0x00FF0000
+var S_SAVE_PC_HI_HT_MASK = 0x01000000
var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG
var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
-var S_SAVE_PC_HI_RCNT_SHIFT = 27 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used
-var S_SAVE_PC_HI_RCNT_MASK = 0xF8000000 //FIXME
-var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 26 //FIXME
-var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x04000000 //FIXME
-
var s_save_spi_init_lo = exec_lo
var s_save_spi_init_hi = exec_hi
@@ -135,23 +155,19 @@ var s_save_alloc_size = s_save_trapsts //conflict
var s_save_m0 = ttmp5
var s_save_ttmps_lo = s_save_tmp //no conflict
var s_save_ttmps_hi = s_save_trapsts //no conflict
+#if ASIC_FAMILY >= CHIP_GC_9_4_3
+var s_save_ib_sts = ttmp13
+#else
+var s_save_ib_sts = ttmp11
+#endif
/* Restore */
var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE
var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC
-var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit
-var S_RESTORE_SPI_INIT_ATC_SHIFT = 27
-var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype
-var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28
var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG
var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26
-var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT
-var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK
-var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
-var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK
-
var s_restore_spi_init_lo = exec_lo
var s_restore_spi_init_hi = exec_hi
@@ -199,71 +215,83 @@ L_JUMP_TO_RESTORE:
L_SKIP_RESTORE:
s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC
- s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK //check whether this is for save
-if SINGLE_STEP_MISSED_WORKAROUND
- // No single step exceptions if MODE.DEBUG_EN=0.
- s_getreg_b32 ttmp2, hwreg(HW_REG_MODE)
- s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
- s_cbranch_scc0 L_NO_SINGLE_STEP_WORKAROUND
+ // Clear SPI_PRIO: do not save with elevated priority.
+ // Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd.
+ s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_ECC_ERR_MASK
- // Second-level trap already handled exception if STATUS.HALT=1.
- s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+ s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
- // Prioritize single step exception over context save.
- // Second-level trap will halt wave and RFE, re-entering for SAVECTX.
- s_cbranch_scc0 L_FETCH_2ND_TRAP
+ s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+ s_cbranch_scc0 L_NOT_HALTED
-L_NO_SINGLE_STEP_WORKAROUND:
-end
+L_HALTED:
+ // Host trap may occur while wave is halted.
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
- s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+L_CHECK_SAVE:
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save
s_cbranch_scc1 L_SAVE //this is the operation for save
- // ********* Handle non-CWSR traps *******************
-
- // Illegal instruction is a non-maskable exception which blocks context save.
- // Halt the wavefront and return from the trap.
- s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
- s_cbranch_scc1 L_HALT_WAVE
-
- // If STATUS.MEM_VIOL is asserted then we cannot fetch from the TMA.
- // Instead, halt the wavefront and return from the trap.
- s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
- s_cbranch_scc0 L_FETCH_2ND_TRAP
-
-L_HALT_WAVE:
- // If STATUS.HALT is set then this fault must come from SQC instruction fetch.
- // We cannot prevent further faults. Spin wait until context saved.
- s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
- s_cbranch_scc0 L_NOT_ALREADY_HALTED
-
-L_WAIT_CTX_SAVE:
+ // Wave is halted but neither host trap nor SAVECTX is raised.
+ // Caused by instruction fetch memory violation.
+ // Spin wait until context saved to prevent interrupt storm.
s_sleep 0x10
- s_getreg_b32 ttmp2, hwreg(HW_REG_TRAPSTS)
- s_and_b32 ttmp2, ttmp2, SQ_WAVE_TRAPSTS_SAVECTX_MASK
- s_cbranch_scc0 L_WAIT_CTX_SAVE
+ s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+ s_branch L_CHECK_SAVE
+
+L_NOT_HALTED:
+ // Let second-level handle non-SAVECTX exception or trap.
+ // Any concurrent SAVECTX will be handled upon re-entry once halted.
+
+ // Check non-maskable exceptions. memory_violation, illegal_instruction
+ // and debugger (host trap, wave start/end, trap after instruction)
+ // exceptions always cause the wave to enter the trap handler.
+ s_and_b32 ttmp2, s_save_trapsts, \
+ SQ_WAVE_TRAPSTS_MEM_VIOL_MASK | \
+ SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK | \
+ SQ_WAVE_TRAPSTS_HOST_TRAP_MASK | \
+ SQ_WAVE_TRAPSTS_WAVE_BEGIN_MASK | \
+ SQ_WAVE_TRAPSTS_WAVE_END_MASK | \
+ SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+ // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi.
+ // Maskable exceptions only cause the wave to enter the trap handler if
+ // their respective bit in mode.excp_en is set.
+ s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCP_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
+ s_cbranch_scc0 L_CHECK_TRAP_ID
+
+ s_and_b32 ttmp3, s_save_trapsts, SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
+ s_cbranch_scc0 L_NOT_ADDR_WATCH
+ s_bitset1_b32 ttmp2, SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT // Check all addr_watch[123] exceptions against excp_en.addr_watch
+
+L_NOT_ADDR_WATCH:
+ s_getreg_b32 ttmp3, hwreg(HW_REG_MODE)
+ s_lshl_b32 ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT
+ s_and_b32 ttmp2, ttmp2, ttmp3
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+L_CHECK_TRAP_ID:
+ // Check trap_id != 0
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
-L_NOT_ALREADY_HALTED:
- s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+if SINGLE_STEP_MISSED_WORKAROUND
+ // Prioritize single step exception over context save.
+ // Second-level trap will halt wave and RFE, re-entering for SAVECTX.
+ s_getreg_b32 ttmp2, hwreg(HW_REG_MODE)
+ s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+end
- // If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set.
- // Rewind the PC to prevent this from occurring. The debugger compensates for this.
- s_sub_u32 ttmp0, ttmp0, 0x8
- s_subb_u32 ttmp1, ttmp1, 0x0
+ s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK
+ s_cbranch_scc1 L_SAVE
L_FETCH_2ND_TRAP:
// Preserve and clear scalar XNACK state before issuing scalar reads.
- // Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space ttmp11[31:26].
- s_getreg_b32 ttmp2, hwreg(HW_REG_IB_STS)
- s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
- s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
- s_andn2_b32 ttmp11, ttmp11, TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK
- s_or_b32 ttmp11, ttmp11, ttmp3
-
- s_andn2_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
- s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
+ save_and_clear_ib_sts(ttmp14)
// Read second-level TBA/TMA from first-level TMA and jump if available.
// ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
@@ -271,27 +299,53 @@ L_FETCH_2ND_TRAP:
s_getreg_b32 ttmp14, hwreg(HW_REG_SQ_SHADER_TMA_LO)
s_getreg_b32 ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI)
s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
+
+ s_bitcmp1_b32 ttmp15, 0xF
+ s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA
+ s_or_b32 ttmp15, ttmp15, 0xFFFF0000
+L_NO_SIGN_EXTEND_TMA:
+
+ s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag
+ s_waitcnt lgkmcnt(0)
+ s_lshl_b32 ttmp2, ttmp2, TTMP_DEBUG_TRAP_ENABLED_SHIFT
+ s_andn2_b32 s_save_ib_sts, s_save_ib_sts, TTMP_DEBUG_TRAP_ENABLED_MASK
+ s_or_b32 s_save_ib_sts, s_save_ib_sts, ttmp2
+
s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA
s_waitcnt lgkmcnt(0)
s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA
s_waitcnt lgkmcnt(0)
+
s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler
L_NO_NEXT_TRAP:
- s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
- s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK // Check whether it is an exception
- s_cbranch_scc1 L_EXCP_CASE // Exception, jump back to the shader program directly.
- s_add_u32 ttmp0, ttmp0, 4 // S_TRAP case, add 4 to ttmp0
- s_addc_u32 ttmp1, ttmp1, 0
-L_EXCP_CASE:
+ // If not caused by trap then halt wave to prevent re-entry.
+ s_and_b32 ttmp2, s_save_pc_hi, (S_SAVE_PC_HI_TRAP_ID_MASK|S_SAVE_PC_HI_HT_MASK)
+ s_cbranch_scc1 L_TRAP_CASE
+ s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+
+ // If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set.
+ // Rewind the PC to prevent this from occurring.
+ s_sub_u32 ttmp0, ttmp0, 0x8
+ s_subb_u32 ttmp1, ttmp1, 0x0
+
+ s_branch L_EXIT_TRAP
+
+L_TRAP_CASE:
+ // Host trap will not cause trap re-entry.
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK
+ s_cbranch_scc1 L_EXIT_TRAP
+
+ // Advance past trap instruction to prevent re-entry.
+ s_add_u32 ttmp0, ttmp0, 0x4
+ s_addc_u32 ttmp1, ttmp1, 0x0
+
+L_EXIT_TRAP:
s_and_b32 ttmp1, ttmp1, 0xFFFF
- // Restore SQ_WAVE_IB_STS.
- s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
- s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
- s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
+ restore_ib_sts(ttmp14)
// Restore SQ_WAVE_STATUS.
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
@@ -312,16 +366,7 @@ L_SAVE:
s_mov_b32 s_save_tmp, 0 //clear saveCtx bit
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit
- s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE) //save RCNT
- s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT
- s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
- s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE) //save FIRST_REPLAY
- s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
- s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
- s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY in IB_STS
- s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG
-
- s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp
+ save_and_clear_ib_sts(s_save_tmp)
/* inform SPI the readiness and wait for SPI's go signal */
s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI
@@ -360,12 +405,6 @@ L_SAVE:
s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE
s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited
s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC
- s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK
- s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position
- s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC
- s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK
- s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position
- s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE
//FIXME right now s_save_m0/s_save_mem_offset use tma_lo/tma_hi (might need to save them before using them?)
s_mov_b32 s_save_m0, m0 //save M0
@@ -408,7 +447,9 @@ L_SAVE:
s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
-
+ // Clear VSKIP state now that MODE.VSKIP has been saved.
+ // If user shader set it then vector instructions would be skipped.
+ s_setvskip 0,0
/* the first wave in the threadgroup */
s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK // extract fisrt wave bit
@@ -535,12 +576,21 @@ if SAVE_AFTER_XNACK_ERROR
v_lshlrev_b32 v2, 2, v3
L_SAVE_LDS_LOOP_SQC:
+#if ASIC_FAMILY < CHIP_GC_9_5_0
ds_read2_b32 v[0:1], v2 offset0:0 offset1:0x40
s_waitcnt lgkmcnt(0)
-
write_vgprs_to_mem_with_sqc(v0, 2, s_save_buf_rsrc0, s_save_mem_offset)
v_add_u32 v2, 0x200, v2
+#else
+ // gfx950 needs to save in multiple of 256 bytes.
+ ds_read_b32 v0, v2
+ s_waitcnt lgkmcnt(0)
+ write_vgprs_to_mem_with_sqc(v0, 1, s_save_buf_rsrc0, s_save_mem_offset)
+
+ v_add_u32 v2, 0x100, v2
+#endif
+
v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size
s_cbranch_vccnz L_SAVE_LDS_LOOP_SQC
@@ -559,11 +609,14 @@ end
L_SAVE_LDS_LOOP_VECTOR:
ds_read_b64 v[0:1], v2 //x =LDS[a], byte address
s_waitcnt lgkmcnt(0)
- buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset offen:1 glc:1 slc:1
+ buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset VMEM_MODIFIERS offen:1
// s_waitcnt vmcnt(0)
// v_add_u32 v2, vcc[0:1], v2, v3
v_add_u32 v2, v2, v3
v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size
+#if ASIC_FAMILY >= CHIP_GC_9_5_0
+ s_mov_b64 exec, vcc
+#endif
s_cbranch_vccnz L_SAVE_LDS_LOOP_VECTOR
// restore rsrc3
@@ -690,12 +743,6 @@ L_RESTORE:
s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE
s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes)
s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
- s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK
- s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position
- s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC
- s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK
- s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position
- s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE
/* global mem offset */
// s_mov_b32 s_restore_mem_offset, 0x0 //mem offset initial value = 0
@@ -732,8 +779,13 @@ L_RESTORE:
L_RESTORE_LDS_LOOP:
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:256 // second 64DW
- s_add_u32 m0, m0, 256*2 // 128 DW
- s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*2 //mem offset increased by 128DW
+#if ASIC_FAMILY >= CHIP_GC_9_5_0
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:512 // third 64DW
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:768 // forth 64DW
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:1024 // fifth 64DW
+#endif
+ s_add_u32 m0, m0, LDS_RESTORE_GRANULARITY_BYTES // 128/320 DW
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, LDS_RESTORE_GRANULARITY_BYTES //mem offset increased by 128/320 DW
s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
s_cbranch_scc1 L_RESTORE_LDS_LOOP //LDS restore is complete?
@@ -889,19 +941,7 @@ L_RESTORE:
s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1
s_waitcnt lgkmcnt(0)
- //reuse s_restore_m0 as a temp register
- s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK
- s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT
- s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT
- s_mov_b32 s_restore_tmp, 0x0 //IB_STS is zero
- s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
- s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK
- s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
- s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT
- s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
- s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK
- s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT
- s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp
+ restore_ib_sts(s_restore_tmp)
s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
@@ -910,15 +950,14 @@ L_RESTORE:
s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time
-// s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
- s_rfe_restore_b64 s_restore_pc_lo, s_restore_m0 // s_restore_m0[0] is used to set STATUS.inst_atc
+ s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
/**************************************************************************/
/* the END */
/**************************************************************************/
L_END_PGM:
- s_endpgm
+ s_endpgm_saved
end
@@ -976,17 +1015,17 @@ L_TCP_STORE_CHECK_DONE:
end
function write_4vgprs_to_mem(s_rsrc, s_mem_offset)
- buffer_store_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1
- buffer_store_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256
- buffer_store_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*2
- buffer_store_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*3
+ buffer_store_dword v0, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS
+ buffer_store_dword v1, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256
+ buffer_store_dword v2, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*2
+ buffer_store_dword v3, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*3
end
function read_4vgprs_from_mem(s_rsrc, s_mem_offset)
- buffer_load_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1
- buffer_load_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256
- buffer_load_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*2
- buffer_load_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*3
+ buffer_load_dword v0, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS
+ buffer_load_dword v1, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256
+ buffer_load_dword v2, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*2
+ buffer_load_dword v3, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*3
s_waitcnt vmcnt(0)
end
@@ -1078,3 +1117,19 @@ function set_status_without_spi_prio(status, tmp)
s_nop 0x2 // avoid S_SETREG => S_SETREG hazard
s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE), status
end
+
+function save_and_clear_ib_sts(tmp)
+ // Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space s_save_ib_sts[31:26].
+ s_getreg_b32 tmp, hwreg(HW_REG_IB_STS)
+ s_and_b32 tmp, tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+ s_lshl_b32 tmp, tmp, (TTMP_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
+ s_andn2_b32 s_save_ib_sts, s_save_ib_sts, TTMP_SAVE_RCNT_FIRST_REPLAY_MASK
+ s_or_b32 s_save_ib_sts, s_save_ib_sts, tmp
+ s_setreg_imm32_b32 hwreg(HW_REG_IB_STS), 0x0
+end
+
+function restore_ib_sts(tmp)
+ s_lshr_b32 tmp, s_save_ib_sts, (TTMP_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
+ s_and_b32 tmp, tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+ s_setreg_b32 hwreg(HW_REG_IB_STS), tmp
+end
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 24ebd61395d8..22925df6a791 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -21,7 +22,6 @@
*/
#include <linux/device.h>
-#include <linux/export.h>
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/file.h>
@@ -33,14 +33,16 @@
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/mman.h>
+#include <linux/ptrace.h>
#include <linux/dma-buf.h>
-#include <asm/processor.h>
+#include <linux/processor.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
-#include "kfd_dbgmgr.h"
#include "kfd_svm.h"
#include "amdgpu_amdkfd.h"
#include "kfd_smi_events.h"
+#include "amdgpu_dma_buf.h"
+#include "kfd_debug.h"
static long kfd_ioctl(struct file *, unsigned int, unsigned long);
static int kfd_open(struct inode *, struct file *);
@@ -59,8 +61,29 @@ static const struct file_operations kfd_fops = {
};
static int kfd_char_dev_major = -1;
-static struct class *kfd_class;
struct device *kfd_device;
+static const struct class kfd_class = {
+ .name = kfd_dev_name,
+};
+
+static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id)
+{
+ struct kfd_process_device *pdd;
+
+ mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, gpu_id);
+
+ if (pdd)
+ return pdd;
+
+ mutex_unlock(&p->mutex);
+ return NULL;
+}
+
+static inline void kfd_unlock_pdd(struct kfd_process_device *pdd)
+{
+ mutex_unlock(&pdd->process->mutex);
+}
int kfd_chardev_init(void)
{
@@ -71,14 +94,13 @@ int kfd_chardev_init(void)
if (err < 0)
goto err_register_chrdev;
- kfd_class = class_create(THIS_MODULE, kfd_dev_name);
- err = PTR_ERR(kfd_class);
- if (IS_ERR(kfd_class))
+ err = class_register(&kfd_class);
+ if (err)
goto err_class_create;
- kfd_device = device_create(kfd_class, NULL,
- MKDEV(kfd_char_dev_major, 0),
- NULL, kfd_dev_name);
+ kfd_device = device_create(&kfd_class, NULL,
+ MKDEV(kfd_char_dev_major, 0),
+ NULL, kfd_dev_name);
err = PTR_ERR(kfd_device);
if (IS_ERR(kfd_device))
goto err_device_create;
@@ -86,7 +108,7 @@ int kfd_chardev_init(void)
return 0;
err_device_create:
- class_destroy(kfd_class);
+ class_unregister(&kfd_class);
err_class_create:
unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
err_register_chrdev:
@@ -95,17 +117,12 @@ err_register_chrdev:
void kfd_chardev_exit(void)
{
- device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
- class_destroy(kfd_class);
+ device_destroy(&kfd_class, MKDEV(kfd_char_dev_major, 0));
+ class_unregister(&kfd_class);
unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
kfd_device = NULL;
}
-struct device *kfd_chardev(void)
-{
- return kfd_device;
-}
-
static int kfd_open(struct inode *inode, struct file *filep)
{
@@ -125,22 +142,20 @@ static int kfd_open(struct inode *inode, struct file *filep)
return -EPERM;
}
- process = kfd_create_process(filep);
+ process = kfd_create_process(current);
if (IS_ERR(process))
return PTR_ERR(process);
- if (kfd_is_locked()) {
- dev_dbg(kfd_device, "kfd is locked!\n"
- "process %d unreferenced", process->pasid);
+ if (kfd_process_init_cwsr_apu(process, filep)) {
kfd_unref_process(process);
- return -EAGAIN;
+ return -EFAULT;
}
/* filep now owns the reference returned by kfd_create_process */
filep->private_data = process;
- dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
- process->pasid, process->is_32bit_user_mode);
+ dev_dbg(kfd_device, "process pid %d opened kfd node, compat mode (32 bit) - %d\n",
+ process->lead_thread->pid, process->is_32bit_user_mode);
return 0;
}
@@ -169,7 +184,12 @@ static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
static int set_queue_properties_from_user(struct queue_properties *q_properties,
struct kfd_ioctl_create_queue_args *args)
{
- if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
+ /*
+ * Repurpose queue percentage to accommodate new features:
+ * bit 0-7: queue percentage
+ * bit 8-15: pm4_target_xcc
+ */
+ if ((args->queue_percentage & 0xFF) > KFD_MAX_QUEUE_PERCENTAGE) {
pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
return -EINVAL;
}
@@ -191,6 +211,11 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
return -EINVAL;
}
+ if (args->ring_size < KFD_MIN_QUEUE_RING_SIZE) {
+ args->ring_size = KFD_MIN_QUEUE_RING_SIZE;
+ pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE");
+ }
+
if (!access_ok((const void __user *) args->read_pointer_address,
sizeof(uint32_t))) {
pr_err("Can't access read pointer\n");
@@ -219,18 +244,21 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
q_properties->is_interop = false;
q_properties->is_gws = false;
- q_properties->queue_percent = args->queue_percentage;
+ q_properties->queue_percent = args->queue_percentage & 0xFF;
+ /* bit 8-15 are repurposed to be PM4 target XCC */
+ q_properties->pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF;
q_properties->priority = args->queue_priority;
q_properties->queue_address = args->ring_base_address;
q_properties->queue_size = args->ring_size;
- q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
- q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
+ q_properties->read_ptr = (void __user *)args->read_pointer_address;
+ q_properties->write_ptr = (void __user *)args->write_pointer_address;
q_properties->eop_ring_buffer_address = args->eop_buffer_address;
q_properties->eop_ring_buffer_size = args->eop_buffer_size;
q_properties->ctx_save_restore_area_address =
args->ctx_save_restore_address;
q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
q_properties->ctl_stack_size = args->ctl_stack_size;
+ q_properties->sdma_engine_id = args->sdma_engine_id;
if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
@@ -238,6 +266,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
q_properties->type = KFD_QUEUE_TYPE_SDMA;
else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
+ else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID)
+ q_properties->type = KFD_QUEUE_TYPE_SDMA_BY_ENG_ID;
else
return -ENOTSUPP;
@@ -276,7 +306,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
void *data)
{
struct kfd_ioctl_create_queue_args *args = data;
- struct kfd_dev *dev;
+ struct kfd_node *dev;
int err = 0;
unsigned int queue_id;
struct kfd_process_device *pdd;
@@ -292,26 +322,55 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
return err;
pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev) {
- pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
- return -EINVAL;
- }
mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+ if (!pdd) {
+ pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
+ err = -EINVAL;
+ goto err_pdd;
+ }
+ dev = pdd->dev;
+
pdd = kfd_bind_process_to_device(dev, p);
if (IS_ERR(pdd)) {
err = -ESRCH;
goto err_bind_process;
}
- pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
- p->pasid,
+ if (q_properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) {
+ int max_sdma_eng_id = kfd_get_num_sdma_engines(dev) +
+ kfd_get_num_xgmi_sdma_engines(dev) - 1;
+
+ if (q_properties.sdma_engine_id > max_sdma_eng_id) {
+ err = -EINVAL;
+ pr_err("sdma_engine_id %i exceeds maximum id of %i\n",
+ q_properties.sdma_engine_id, max_sdma_eng_id);
+ goto err_sdma_engine_id;
+ }
+ }
+
+ if (!pdd->qpd.proc_doorbells) {
+ err = kfd_alloc_process_doorbells(dev->kfd, pdd);
+ if (err) {
+ pr_debug("failed to allocate process doorbells\n");
+ goto err_bind_process;
+ }
+ }
+
+ err = kfd_queue_acquire_buffers(pdd, &q_properties);
+ if (err) {
+ pr_debug("failed to acquire user queue buffers\n");
+ goto err_acquire_queue_buf;
+ }
+
+ pr_debug("Creating queue for process pid %d on gpu 0x%x\n",
+ p->lead_thread->pid,
dev->id);
- err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id,
- &doorbell_offset_in_process);
+ err = pqm_create_queue(&p->pqm, dev, &q_properties, &queue_id,
+ NULL, NULL, NULL, &doorbell_offset_in_process);
if (err != 0)
goto err_create_queue;
@@ -321,7 +380,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
/* Return gpu_id as doorbell offset for mmap usage */
args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
- if (KFD_IS_SOC15(dev->device_info->asic_family))
+ if (KFD_IS_SOC15(dev))
/* On SOC15 ASICs, include the doorbell offset within the
* process doorbell frame, which is 2 pages.
*/
@@ -340,10 +399,16 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
pr_debug("Write ptr address == 0x%016llX\n",
args->write_pointer_address);
+ kfd_dbg_ev_raise(KFD_EC_MASK(EC_QUEUE_NEW), p, dev, queue_id, false, NULL, 0);
return 0;
err_create_queue:
+ kfd_queue_unref_bo_vas(pdd, &q_properties);
+ kfd_queue_release_buffers(pdd, &q_properties);
+err_acquire_queue_buf:
+err_sdma_engine_id:
err_bind_process:
+err_pdd:
mutex_unlock(&p->mutex);
return err;
}
@@ -354,9 +419,9 @@ static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
int retval;
struct kfd_ioctl_destroy_queue_args *args = data;
- pr_debug("Destroying queue id %d for pasid 0x%x\n",
+ pr_debug("Destroying queue id %d for process pid %d\n",
args->queue_id,
- p->pasid);
+ p->lead_thread->pid);
mutex_lock(&p->mutex);
@@ -373,7 +438,12 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
struct kfd_ioctl_update_queue_args *args = data;
struct queue_properties properties;
- if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
+ /*
+ * Repurpose queue percentage to accommodate new features:
+ * bit 0-7: queue percentage
+ * bit 8-15: pm4_target_xcc
+ */
+ if ((args->queue_percentage & 0xFF) > KFD_MAX_QUEUE_PERCENTAGE) {
pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
return -EINVAL;
}
@@ -395,13 +465,20 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
return -EINVAL;
}
+ if (args->ring_size < KFD_MIN_QUEUE_RING_SIZE) {
+ args->ring_size = KFD_MIN_QUEUE_RING_SIZE;
+ pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE");
+ }
+
properties.queue_address = args->ring_base_address;
properties.queue_size = args->ring_size;
- properties.queue_percent = args->queue_percentage;
+ properties.queue_percent = args->queue_percentage & 0xFF;
+ /* bit 8-15 are repurposed to be PM4 target XCC */
+ properties.pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF;
properties.priority = args->queue_priority;
- pr_debug("Updating queue id %d for pasid 0x%x\n",
- args->queue_id, p->pasid);
+ pr_debug("Updating queue id %d for process pid %d\n",
+ args->queue_id, p->lead_thread->pid);
mutex_lock(&p->mutex);
@@ -444,26 +521,18 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
}
- minfo.cu_mask.ptr = kzalloc(cu_mask_size, GFP_KERNEL);
- if (!minfo.cu_mask.ptr)
- return -ENOMEM;
-
- retval = copy_from_user(minfo.cu_mask.ptr, cu_mask_ptr, cu_mask_size);
- if (retval) {
+ minfo.cu_mask.ptr = memdup_user(cu_mask_ptr, cu_mask_size);
+ if (IS_ERR(minfo.cu_mask.ptr)) {
pr_debug("Could not copy CU mask from userspace");
- retval = -EFAULT;
- goto out;
+ return PTR_ERR(minfo.cu_mask.ptr);
}
- minfo.update_flag = UPDATE_FLAG_CU_MASK;
-
mutex_lock(&p->mutex);
retval = pqm_update_mqd(&p->pqm, args->queue_id, &minfo);
mutex_unlock(&p->mutex);
-out:
kfree(minfo.cu_mask.ptr);
return retval;
}
@@ -490,7 +559,6 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_set_memory_policy_args *args = data;
- struct kfd_dev *dev;
int err = 0;
struct kfd_process_device *pdd;
enum cache_policy default_policy, alternate_policy;
@@ -505,13 +573,15 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
return -EINVAL;
}
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
- return -EINVAL;
-
mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+ if (!pdd) {
+ pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
+ err = -EINVAL;
+ goto err_pdd;
+ }
- pdd = kfd_bind_process_to_device(dev, p);
+ pdd = kfd_bind_process_to_device(pdd->dev, p);
if (IS_ERR(pdd)) {
err = -ESRCH;
goto out;
@@ -524,15 +594,17 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
(args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
? cache_policy_coherent : cache_policy_noncoherent;
- if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
+ if (!pdd->dev->dqm->ops.set_cache_memory_policy(pdd->dev->dqm,
&pdd->qpd,
default_policy,
alternate_policy,
(void __user *)args->alternate_aperture_base,
- args->alternate_aperture_size))
+ args->alternate_aperture_size,
+ args->misc_process_flag))
err = -EINVAL;
out:
+err_pdd:
mutex_unlock(&p->mutex);
return err;
@@ -542,17 +614,18 @@ static int kfd_ioctl_set_trap_handler(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_set_trap_handler_args *args = data;
- struct kfd_dev *dev;
int err = 0;
struct kfd_process_device *pdd;
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
- return -EINVAL;
-
mutex_lock(&p->mutex);
- pdd = kfd_bind_process_to_device(dev, p);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+ if (!pdd) {
+ err = -EINVAL;
+ goto err_pdd;
+ }
+
+ pdd = kfd_bind_process_to_device(pdd->dev, p);
if (IS_ERR(pdd)) {
err = -ESRCH;
goto out;
@@ -561,6 +634,7 @@ static int kfd_ioctl_set_trap_handler(struct file *filep,
kfd_process_set_trap_handler(&pdd->qpd, args->tba_addr, args->tma_addr);
out:
+err_pdd:
mutex_unlock(&p->mutex);
return err;
@@ -569,289 +643,40 @@ out:
static int kfd_ioctl_dbg_register(struct file *filep,
struct kfd_process *p, void *data)
{
- struct kfd_ioctl_dbg_register_args *args = data;
- struct kfd_dev *dev;
- struct kfd_dbgmgr *dbgmgr_ptr;
- struct kfd_process_device *pdd;
- bool create_ok;
- long status = 0;
-
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
- return -EINVAL;
-
- if (dev->device_info->asic_family == CHIP_CARRIZO) {
- pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
- return -EINVAL;
- }
-
- mutex_lock(&p->mutex);
- mutex_lock(kfd_get_dbgmgr_mutex());
-
- /*
- * make sure that we have pdd, if this the first queue created for
- * this process
- */
- pdd = kfd_bind_process_to_device(dev, p);
- if (IS_ERR(pdd)) {
- status = PTR_ERR(pdd);
- goto out;
- }
-
- if (!dev->dbgmgr) {
- /* In case of a legal call, we have no dbgmgr yet */
- create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
- if (create_ok) {
- status = kfd_dbgmgr_register(dbgmgr_ptr, p);
- if (status != 0)
- kfd_dbgmgr_destroy(dbgmgr_ptr);
- else
- dev->dbgmgr = dbgmgr_ptr;
- }
- } else {
- pr_debug("debugger already registered\n");
- status = -EINVAL;
- }
-
-out:
- mutex_unlock(kfd_get_dbgmgr_mutex());
- mutex_unlock(&p->mutex);
-
- return status;
+ return -EPERM;
}
static int kfd_ioctl_dbg_unregister(struct file *filep,
struct kfd_process *p, void *data)
{
- struct kfd_ioctl_dbg_unregister_args *args = data;
- struct kfd_dev *dev;
- long status;
-
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev || !dev->dbgmgr)
- return -EINVAL;
-
- if (dev->device_info->asic_family == CHIP_CARRIZO) {
- pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n");
- return -EINVAL;
- }
-
- mutex_lock(kfd_get_dbgmgr_mutex());
-
- status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
- if (!status) {
- kfd_dbgmgr_destroy(dev->dbgmgr);
- dev->dbgmgr = NULL;
- }
-
- mutex_unlock(kfd_get_dbgmgr_mutex());
-
- return status;
+ return -EPERM;
}
-/*
- * Parse and generate variable size data structure for address watch.
- * Total size of the buffer and # watch points is limited in order
- * to prevent kernel abuse. (no bearing to the much smaller HW limitation
- * which is enforced by dbgdev module)
- * please also note that the watch address itself are not "copied from user",
- * since it be set into the HW in user mode values.
- *
- */
static int kfd_ioctl_dbg_address_watch(struct file *filep,
struct kfd_process *p, void *data)
{
- struct kfd_ioctl_dbg_address_watch_args *args = data;
- struct kfd_dev *dev;
- struct dbg_address_watch_info aw_info;
- unsigned char *args_buff;
- long status;
- void __user *cmd_from_user;
- uint64_t watch_mask_value = 0;
- unsigned int args_idx = 0;
-
- memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
-
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
- return -EINVAL;
-
- if (dev->device_info->asic_family == CHIP_CARRIZO) {
- pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
- return -EINVAL;
- }
-
- cmd_from_user = (void __user *) args->content_ptr;
-
- /* Validate arguments */
-
- if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
- (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) ||
- (cmd_from_user == NULL))
- return -EINVAL;
-
- /* this is the actual buffer to work with */
- args_buff = memdup_user(cmd_from_user,
- args->buf_size_in_bytes - sizeof(*args));
- if (IS_ERR(args_buff))
- return PTR_ERR(args_buff);
-
- aw_info.process = p;
-
- aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
- args_idx += sizeof(aw_info.num_watch_points);
-
- aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
- args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
-
- /*
- * set watch address base pointer to point on the array base
- * within args_buff
- */
- aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
-
- /* skip over the addresses buffer */
- args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
-
- if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) {
- status = -EINVAL;
- goto out;
- }
-
- watch_mask_value = (uint64_t) args_buff[args_idx];
-
- if (watch_mask_value > 0) {
- /*
- * There is an array of masks.
- * set watch mask base pointer to point on the array base
- * within args_buff
- */
- aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
-
- /* skip over the masks buffer */
- args_idx += sizeof(aw_info.watch_mask) *
- aw_info.num_watch_points;
- } else {
- /* just the NULL mask, set to NULL and skip over it */
- aw_info.watch_mask = NULL;
- args_idx += sizeof(aw_info.watch_mask);
- }
-
- if (args_idx >= args->buf_size_in_bytes - sizeof(args)) {
- status = -EINVAL;
- goto out;
- }
-
- /* Currently HSA Event is not supported for DBG */
- aw_info.watch_event = NULL;
-
- mutex_lock(kfd_get_dbgmgr_mutex());
-
- status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
-
- mutex_unlock(kfd_get_dbgmgr_mutex());
-
-out:
- kfree(args_buff);
-
- return status;
+ return -EPERM;
}
/* Parse and generate fixed size data structure for wave control */
static int kfd_ioctl_dbg_wave_control(struct file *filep,
struct kfd_process *p, void *data)
{
- struct kfd_ioctl_dbg_wave_control_args *args = data;
- struct kfd_dev *dev;
- struct dbg_wave_control_info wac_info;
- unsigned char *args_buff;
- uint32_t computed_buff_size;
- long status;
- void __user *cmd_from_user;
- unsigned int args_idx = 0;
-
- memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
-
- /* we use compact form, independent of the packing attribute value */
- computed_buff_size = sizeof(*args) +
- sizeof(wac_info.mode) +
- sizeof(wac_info.operand) +
- sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
- sizeof(wac_info.dbgWave_msg.MemoryVA) +
- sizeof(wac_info.trapId);
-
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
- return -EINVAL;
-
- if (dev->device_info->asic_family == CHIP_CARRIZO) {
- pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
- return -EINVAL;
- }
-
- /* input size must match the computed "compact" size */
- if (args->buf_size_in_bytes != computed_buff_size) {
- pr_debug("size mismatch, computed : actual %u : %u\n",
- args->buf_size_in_bytes, computed_buff_size);
- return -EINVAL;
- }
-
- cmd_from_user = (void __user *) args->content_ptr;
-
- if (cmd_from_user == NULL)
- return -EINVAL;
-
- /* copy the entire buffer from user */
-
- args_buff = memdup_user(cmd_from_user,
- args->buf_size_in_bytes - sizeof(*args));
- if (IS_ERR(args_buff))
- return PTR_ERR(args_buff);
-
- /* move ptr to the start of the "pay-load" area */
- wac_info.process = p;
-
- wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
- args_idx += sizeof(wac_info.operand);
-
- wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
- args_idx += sizeof(wac_info.mode);
-
- wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
- args_idx += sizeof(wac_info.trapId);
-
- wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
- *((uint32_t *)(&args_buff[args_idx]));
- wac_info.dbgWave_msg.MemoryVA = NULL;
-
- mutex_lock(kfd_get_dbgmgr_mutex());
-
- pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
- wac_info.process, wac_info.operand,
- wac_info.mode, wac_info.trapId,
- wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
-
- status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
-
- pr_debug("Returned status of dbg manager is %ld\n", status);
-
- mutex_unlock(kfd_get_dbgmgr_mutex());
-
- kfree(args_buff);
-
- return status;
+ return -EPERM;
}
static int kfd_ioctl_get_clock_counters(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_get_clock_counters_args *args = data;
- struct kfd_dev *dev;
+ struct kfd_process_device *pdd;
- dev = kfd_device_by_id(args->gpu_id);
- if (dev)
+ mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+ mutex_unlock(&p->mutex);
+ if (pdd)
/* Reading GPU clock counter from KGD */
- args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->kgd);
+ args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(pdd->dev->adev);
else
/* Node without GPU resource */
args->gpu_clock_counter = 0;
@@ -874,7 +699,7 @@ static int kfd_ioctl_get_process_apertures(struct file *filp,
struct kfd_process_device_apertures *pAperture;
int i;
- dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
+ dev_dbg(kfd_device, "get apertures for process pid %d", p->lead_thread->pid);
args->num_of_nodes = 0;
@@ -926,7 +751,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp,
int ret;
int i;
- dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
+ dev_dbg(kfd_device, "get apertures for process pid %d",
+ p->lead_thread->pid);
if (args->num_of_nodes == 0) {
/* Return number of nodes, so that user space can alloacate
@@ -941,8 +767,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp,
* nodes, but not more than args->num_of_nodes as that is
* the amount of memory allocated by user
*/
- pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
- args->num_of_nodes), GFP_KERNEL);
+ pa = kcalloc(args->num_of_nodes, sizeof(struct kfd_process_device_apertures),
+ GFP_KERNEL);
if (!pa)
return -ENOMEM;
@@ -1007,57 +833,11 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
* through the event_page_offset field.
*/
if (args->event_page_offset) {
- struct kfd_dev *kfd;
- struct kfd_process_device *pdd;
- void *mem, *kern_addr;
- uint64_t size;
-
- kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
- if (!kfd) {
- pr_err("Getting device by id failed in %s\n", __func__);
- return -EINVAL;
- }
-
mutex_lock(&p->mutex);
-
- if (p->signal_page) {
- pr_err("Event page is already set\n");
- err = -EINVAL;
- goto out_unlock;
- }
-
- pdd = kfd_bind_process_to_device(kfd, p);
- if (IS_ERR(pdd)) {
- err = PTR_ERR(pdd);
- goto out_unlock;
- }
-
- mem = kfd_process_device_translate_handle(pdd,
- GET_IDR_HANDLE(args->event_page_offset));
- if (!mem) {
- pr_err("Can't find BO, offset is 0x%llx\n",
- args->event_page_offset);
- err = -EINVAL;
- goto out_unlock;
- }
-
- err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd,
- mem, &kern_addr, &size);
- if (err) {
- pr_err("Failed to map event page to kernel\n");
- goto out_unlock;
- }
-
- err = kfd_event_page_set(p, kern_addr, size);
- if (err) {
- pr_err("Failed to set event page\n");
- amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kfd->kgd, mem);
- goto out_unlock;
- }
-
- p->signal_handle = args->event_page_offset;
-
+ err = kfd_kmap_event_page(p, args->event_page_offset);
mutex_unlock(&p->mutex);
+ if (err)
+ return err;
}
err = kfd_event_create(filp, p, args->event_type,
@@ -1066,10 +846,7 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
&args->event_page_offset,
&args->event_slot_index);
- return err;
-
-out_unlock:
- mutex_unlock(&p->mutex);
+ pr_debug("Created event (id:0x%08x) (%s)\n", args->event_id, __func__);
return err;
}
@@ -1101,28 +878,27 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
void *data)
{
struct kfd_ioctl_wait_events_args *args = data;
- int err;
- err = kfd_wait_on_events(p, args->num_events,
+ return kfd_wait_on_events(p, args->num_events,
(void __user *)args->events_ptr,
(args->wait_for_all != 0),
- args->timeout, &args->wait_result);
-
- return err;
+ &args->timeout, &args->wait_result);
}
static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_set_scratch_backing_va_args *args = data;
struct kfd_process_device *pdd;
- struct kfd_dev *dev;
+ struct kfd_node *dev;
long err;
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
- return -EINVAL;
-
mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+ if (!pdd) {
+ err = -EINVAL;
+ goto err_pdd;
+ }
+ dev = pdd->dev;
pdd = kfd_bind_process_to_device(dev, p);
if (IS_ERR(pdd)) {
@@ -1137,11 +913,12 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va)
dev->kfd2kgd->set_scratch_backing_va(
- dev->kgd, args->va_addr, pdd->qpd.vmid);
+ dev->adev, args->va_addr, pdd->qpd.vmid);
return 0;
bind_process_to_device_fail:
+err_pdd:
mutex_unlock(&p->mutex);
return err;
}
@@ -1150,15 +927,17 @@ static int kfd_ioctl_get_tile_config(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_get_tile_config_args *args = data;
- struct kfd_dev *dev;
+ struct kfd_process_device *pdd;
struct tile_config config;
int err = 0;
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
+ mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+ mutex_unlock(&p->mutex);
+ if (!pdd)
return -EINVAL;
- amdgpu_amdkfd_get_tile_config(dev->kgd, &config);
+ amdgpu_amdkfd_get_tile_config(pdd->dev->adev, &config);
args->gb_addr_config = config.gb_addr_config;
args->num_banks = config.num_banks;
@@ -1193,71 +972,83 @@ static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
{
struct kfd_ioctl_acquire_vm_args *args = data;
struct kfd_process_device *pdd;
- struct kfd_dev *dev;
struct file *drm_file;
int ret;
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
- return -EINVAL;
-
drm_file = fget(args->drm_fd);
if (!drm_file)
return -EINVAL;
mutex_lock(&p->mutex);
-
- pdd = kfd_get_process_device_data(dev, p);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
if (!pdd) {
ret = -EINVAL;
- goto err_unlock;
+ goto err_pdd;
}
if (pdd->drm_file) {
ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
- goto err_unlock;
+ goto err_drm_file;
}
ret = kfd_process_device_init_vm(pdd, drm_file);
if (ret)
goto err_unlock;
+
/* On success, the PDD keeps the drm_file reference */
mutex_unlock(&p->mutex);
return 0;
err_unlock:
+err_pdd:
+err_drm_file:
mutex_unlock(&p->mutex);
fput(drm_file);
return ret;
}
-bool kfd_dev_is_large_bar(struct kfd_dev *dev)
+bool kfd_dev_is_large_bar(struct kfd_node *dev)
{
- struct kfd_local_mem_info mem_info;
-
- if (debug_largebar) {
+ if (dev->kfd->adev->debug_largebar) {
pr_debug("Simulate large-bar allocation on non large-bar machine\n");
return true;
}
- if (dev->use_iommu_v2)
- return false;
+ if (dev->local_mem_info.local_mem_size_private == 0 &&
+ dev->local_mem_info.local_mem_size_public > 0)
+ return true;
- amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info);
- if (mem_info.local_mem_size_private == 0 &&
- mem_info.local_mem_size_public > 0)
+ if (dev->local_mem_info.local_mem_size_public == 0 &&
+ dev->kfd->adev->gmc.is_app_apu) {
+ pr_debug("APP APU, Consider like a large bar system\n");
return true;
+ }
+
return false;
}
+static int kfd_ioctl_get_available_memory(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_get_available_memory_args *args = data;
+ struct kfd_process_device *pdd = kfd_lock_pdd_by_id(p, args->gpu_id);
+
+ if (!pdd)
+ return -EINVAL;
+ args->available = amdgpu_amdkfd_get_available_memory(pdd->dev->adev,
+ pdd->dev->node_id);
+ kfd_unlock_pdd(pdd);
+ return 0;
+}
+
static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
struct kfd_process_device *pdd;
void *mem;
- struct kfd_dev *dev;
+ struct kfd_node *dev;
int idr_handle;
long err;
uint64_t offset = args->mmap_offset;
@@ -1273,7 +1064,12 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
svm_range_list_lock_and_flush_work(&p->svms, current->mm);
mutex_lock(&p->svms.lock);
mmap_write_unlock(current->mm);
- if (interval_tree_iter_first(&p->svms.objects,
+
+ /* Skip a special case that allocates VRAM without VA,
+ * VA will be invalid of 0.
+ */
+ if (!(!args->va_addr && (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) &&
+ interval_tree_iter_first(&p->svms.objects,
args->va_addr >> PAGE_SHIFT,
(args->va_addr + args->size - 1) >> PAGE_SHIFT)) {
pr_err("Address: 0x%llx already allocated by SVM\n",
@@ -1281,21 +1077,39 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
mutex_unlock(&p->svms.lock);
return -EADDRINUSE;
}
+
+ /* When register user buffer check if it has been registered by svm by
+ * buffer cpu virtual address.
+ */
+ if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) &&
+ interval_tree_iter_first(&p->svms.objects,
+ args->mmap_offset >> PAGE_SHIFT,
+ (args->mmap_offset + args->size - 1) >> PAGE_SHIFT)) {
+ pr_err("User Buffer Address: 0x%llx already allocated by SVM\n",
+ args->mmap_offset);
+ mutex_unlock(&p->svms.lock);
+ return -EADDRINUSE;
+ }
+
mutex_unlock(&p->svms.lock);
#endif
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
- return -EINVAL;
+ mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+ if (!pdd) {
+ err = -EINVAL;
+ goto err_pdd;
+ }
+
+ dev = pdd->dev;
if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
(flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
!kfd_dev_is_large_bar(dev)) {
pr_err("Alloc host visible vram on small bar is not allowed\n");
- return -EINVAL;
+ err = -EINVAL;
+ goto err_large_bar;
}
- mutex_lock(&p->mutex);
-
pdd = kfd_bind_process_to_device(dev, p);
if (IS_ERR(pdd)) {
err = PTR_ERR(pdd);
@@ -1303,27 +1117,31 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
}
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
- if (args->size != kfd_doorbell_process_slice(dev)) {
+ if (args->size != kfd_doorbell_process_slice(dev->kfd)) {
err = -EINVAL;
goto err_unlock;
}
offset = kfd_get_process_doorbells(pdd);
+ if (!offset) {
+ err = -ENOMEM;
+ goto err_unlock;
+ }
} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
if (args->size != PAGE_SIZE) {
err = -EINVAL;
goto err_unlock;
}
- offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
- if (!offset) {
+ offset = dev->adev->rmmio_remap.bus_addr;
+ if (!offset || (PAGE_SIZE > 4096)) {
err = -ENOMEM;
goto err_unlock;
}
}
err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
- dev->kgd, args->va_addr, args->size,
+ dev->adev, args->va_addr, args->size,
pdd->drm_priv, (struct kgd_mem **) &mem, &offset,
- flags);
+ flags, false);
if (err)
goto err_unlock;
@@ -1335,8 +1153,13 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
}
/* Update the VRAM usage count */
- if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
- WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + args->size);
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+ uint64_t size = args->size;
+
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
+ size >>= 1;
+ atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage);
+ }
mutex_unlock(&p->mutex);
@@ -1353,9 +1176,11 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
return 0;
err_free:
- amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem,
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, (struct kgd_mem *)mem,
pdd->drm_priv, NULL);
err_unlock:
+err_pdd:
+err_large_bar:
mutex_unlock(&p->mutex);
return err;
}
@@ -1366,14 +1191,9 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
struct kfd_ioctl_free_memory_of_gpu_args *args = data;
struct kfd_process_device *pdd;
void *mem;
- struct kfd_dev *dev;
int ret;
uint64_t size = 0;
- dev = kfd_device_by_id(GET_GPU_ID(args->handle));
- if (!dev)
- return -EINVAL;
-
mutex_lock(&p->mutex);
/*
* Safeguard to prevent user space from freeing signal BO.
@@ -1385,11 +1205,11 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
goto err_unlock;
}
- pdd = kfd_get_process_device_data(dev, p);
+ pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));
if (!pdd) {
pr_err("Process device data doesn't exist\n");
ret = -EINVAL;
- goto err_unlock;
+ goto err_pdd;
}
mem = kfd_process_device_translate_handle(
@@ -1399,7 +1219,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
goto err_unlock;
}
- ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd,
+ ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev,
(struct kgd_mem *)mem, pdd->drm_priv, &size);
/* If freeing the buffer failed, leave the handle in place for
@@ -1409,9 +1229,10 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
kfd_process_device_remove_obj_handle(
pdd, GET_IDR_HANDLE(args->handle));
- WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
+ atomic64_sub(size, &pdd->vram_usage);
err_unlock:
+err_pdd:
mutex_unlock(&p->mutex);
return ret;
}
@@ -1422,15 +1243,10 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
struct kfd_ioctl_map_memory_to_gpu_args *args = data;
struct kfd_process_device *pdd, *peer_pdd;
void *mem;
- struct kfd_dev *dev, *peer;
+ struct kfd_node *dev;
long err = 0;
int i;
uint32_t *devices_arr = NULL;
- bool table_freed = false;
-
- dev = kfd_device_by_id(GET_GPU_ID(args->handle));
- if (!dev)
- return -EINVAL;
if (!args->n_devices) {
pr_debug("Device IDs array empty\n");
@@ -1455,6 +1271,12 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
}
mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));
+ if (!pdd) {
+ err = -EINVAL;
+ goto get_process_device_data_failed;
+ }
+ dev = pdd->dev;
pdd = kfd_bind_process_to_device(dev, p);
if (IS_ERR(pdd)) {
@@ -1470,60 +1292,64 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
}
for (i = args->n_success; i < args->n_devices; i++) {
- peer = kfd_device_by_id(devices_arr[i]);
- if (!peer) {
+ peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
+ if (!peer_pdd) {
pr_debug("Getting device by id failed for 0x%x\n",
devices_arr[i]);
err = -EINVAL;
goto get_mem_obj_from_handle_failed;
}
- peer_pdd = kfd_bind_process_to_device(peer, p);
+ peer_pdd = kfd_bind_process_to_device(peer_pdd->dev, p);
if (IS_ERR(peer_pdd)) {
err = PTR_ERR(peer_pdd);
goto get_mem_obj_from_handle_failed;
}
+
err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
- peer->kgd, (struct kgd_mem *)mem,
- peer_pdd->drm_priv, &table_freed);
+ peer_pdd->dev->adev, (struct kgd_mem *)mem,
+ peer_pdd->drm_priv);
if (err) {
- pr_err("Failed to map to gpu %d/%d\n",
- i, args->n_devices);
+ struct pci_dev *pdev = peer_pdd->dev->adev->pdev;
+
+ dev_err(dev->adev->dev,
+ "Failed to map peer:%04x:%02x:%02x.%d mem_domain:%d\n",
+ pci_domain_nr(pdev->bus),
+ pdev->bus->number,
+ PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn),
+ ((struct kgd_mem *)mem)->domain);
goto map_memory_to_gpu_failed;
}
args->n_success = i+1;
}
- mutex_unlock(&p->mutex);
-
- err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
+ err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true);
if (err) {
pr_debug("Sync memory failed, wait interrupted by user signal\n");
goto sync_memory_failed;
}
+ mutex_unlock(&p->mutex);
+
/* Flush TLBs after waiting for the page table updates to complete */
- if (table_freed) {
- for (i = 0; i < args->n_devices; i++) {
- peer = kfd_device_by_id(devices_arr[i]);
- if (WARN_ON_ONCE(!peer))
- continue;
- peer_pdd = kfd_get_process_device_data(peer, p);
- if (WARN_ON_ONCE(!peer_pdd))
- continue;
- kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
- }
+ for (i = 0; i < args->n_devices; i++) {
+ peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
+ if (WARN_ON_ONCE(!peer_pdd))
+ continue;
+ kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
}
kfree(devices_arr);
return err;
+get_process_device_data_failed:
bind_process_to_device_failed:
get_mem_obj_from_handle_failed:
map_memory_to_gpu_failed:
+sync_memory_failed:
mutex_unlock(&p->mutex);
copy_from_user_failed:
-sync_memory_failed:
kfree(devices_arr);
return err;
@@ -1535,13 +1361,9 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
struct kfd_process_device *pdd, *peer_pdd;
void *mem;
- struct kfd_dev *dev, *peer;
long err = 0;
uint32_t *devices_arr = NULL, i;
-
- dev = kfd_device_by_id(GET_GPU_ID(args->handle));
- if (!dev)
- return -EINVAL;
+ bool flush_tlb;
if (!args->n_devices) {
pr_debug("Device IDs array empty\n");
@@ -1566,8 +1388,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
}
mutex_lock(&p->mutex);
-
- pdd = kfd_get_process_device_data(dev, p);
+ pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));
if (!pdd) {
err = -EINVAL;
goto bind_process_to_device_failed;
@@ -1581,47 +1402,46 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
}
for (i = args->n_success; i < args->n_devices; i++) {
- peer = kfd_device_by_id(devices_arr[i]);
- if (!peer) {
- err = -EINVAL;
- goto get_mem_obj_from_handle_failed;
- }
-
- peer_pdd = kfd_get_process_device_data(peer, p);
+ peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
if (!peer_pdd) {
- err = -ENODEV;
+ err = -EINVAL;
goto get_mem_obj_from_handle_failed;
}
err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
- peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv);
+ peer_pdd->dev->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv);
if (err) {
- pr_err("Failed to unmap from gpu %d/%d\n",
- i, args->n_devices);
+ pr_debug("Failed to unmap from gpu %d/%d\n", i, args->n_devices);
goto unmap_memory_from_gpu_failed;
}
args->n_success = i+1;
}
- mutex_unlock(&p->mutex);
- if (dev->device_info->asic_family == CHIP_ALDEBARAN) {
- err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd,
+ flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev->kfd);
+ if (flush_tlb) {
+ err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev,
(struct kgd_mem *) mem, true);
if (err) {
pr_debug("Sync memory failed, wait interrupted by user signal\n");
goto sync_memory_failed;
}
+ }
- /* Flush TLBs after waiting for the page table updates to complete */
- for (i = 0; i < args->n_devices; i++) {
- peer = kfd_device_by_id(devices_arr[i]);
- if (WARN_ON_ONCE(!peer))
- continue;
- peer_pdd = kfd_get_process_device_data(peer, p);
- if (WARN_ON_ONCE(!peer_pdd))
- continue;
+ /* Flush TLBs after waiting for the page table updates to complete */
+ for (i = 0; i < args->n_devices; i++) {
+ peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
+ if (WARN_ON_ONCE(!peer_pdd))
+ continue;
+ if (flush_tlb)
kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
- }
+
+ /* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */
+ err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv);
+ if (err)
+ goto sync_memory_failed;
}
+
+ mutex_unlock(&p->mutex);
+
kfree(devices_arr);
return 0;
@@ -1629,9 +1449,9 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
bind_process_to_device_failed:
get_mem_obj_from_handle_failed:
unmap_memory_from_gpu_failed:
+sync_memory_failed:
mutex_unlock(&p->mutex);
copy_from_user_failed:
-sync_memory_failed:
kfree(devices_arr);
return err;
}
@@ -1642,7 +1462,7 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep,
int retval;
struct kfd_ioctl_alloc_queue_gws_args *args = data;
struct queue *q;
- struct kfd_dev *dev;
+ struct kfd_node *dev;
mutex_lock(&p->mutex);
q = pqm_get_user_queue(&p->pqm, args->queue_id);
@@ -1664,6 +1484,12 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep,
goto out_unlock;
}
+ if (p->debug_trap_enabled && (!kfd_dbg_has_gws_support(dev) ||
+ kfd_dbg_has_cwsr_workaround(dev))) {
+ retval = -EBUSY;
+ goto out_unlock;
+ }
+
retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
mutex_unlock(&p->mutex);
@@ -1679,16 +1505,17 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_get_dmabuf_info_args *args = data;
- struct kfd_dev *dev = NULL;
- struct kgd_dev *dma_buf_kgd;
+ struct kfd_node *dev = NULL;
+ struct amdgpu_device *dmabuf_adev;
void *metadata_buffer = NULL;
uint32_t flags;
+ int8_t xcp_id;
unsigned int i;
int r;
/* Find a KFD GPU device that supports the get_dmabuf_info query */
for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)
- if (dev)
+ if (dev && !kfd_devcgroup_check_permission(dev))
break;
if (!dev)
return -EINVAL;
@@ -1700,20 +1527,17 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep,
}
/* Get dmabuf info from KGD */
- r = amdgpu_amdkfd_get_dmabuf_info(dev->kgd, args->dmabuf_fd,
- &dma_buf_kgd, &args->size,
+ r = amdgpu_amdkfd_get_dmabuf_info(dev->adev, args->dmabuf_fd,
+ &dmabuf_adev, &args->size,
metadata_buffer, args->metadata_size,
- &args->metadata_size, &flags);
+ &args->metadata_size, &flags, &xcp_id);
if (r)
goto exit;
- /* Reverse-lookup gpu_id from kgd pointer */
- dev = kfd_device_by_kgd(dma_buf_kgd);
- if (!dev) {
- r = -EINVAL;
- goto exit;
- }
- args->gpu_id = dev->id;
+ if (xcp_id >= 0)
+ args->gpu_id = dmabuf_adev->kfd.dev->nodes[xcp_id]->id;
+ else
+ args->gpu_id = dev->id;
args->flags = flags;
/* Copy metadata buffer to user mode */
@@ -1735,33 +1559,28 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
{
struct kfd_ioctl_import_dmabuf_args *args = data;
struct kfd_process_device *pdd;
- struct dma_buf *dmabuf;
- struct kfd_dev *dev;
int idr_handle;
uint64_t size;
void *mem;
int r;
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
- return -EINVAL;
-
- dmabuf = dma_buf_get(args->dmabuf_fd);
- if (IS_ERR(dmabuf))
- return PTR_ERR(dmabuf);
-
mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+ if (!pdd) {
+ r = -EINVAL;
+ goto err_unlock;
+ }
- pdd = kfd_bind_process_to_device(dev, p);
+ pdd = kfd_bind_process_to_device(pdd->dev, p);
if (IS_ERR(pdd)) {
r = PTR_ERR(pdd);
goto err_unlock;
}
- r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf,
- args->va_addr, pdd->drm_priv,
- (struct kgd_mem **)&mem, &size,
- NULL);
+ r = amdgpu_amdkfd_gpuvm_import_dmabuf_fd(pdd->dev->adev, args->dmabuf_fd,
+ args->va_addr, pdd->drm_priv,
+ (struct kgd_mem **)&mem, &size,
+ NULL);
if (r)
goto err_unlock;
@@ -1772,35 +1591,90 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
}
mutex_unlock(&p->mutex);
- dma_buf_put(dmabuf);
args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
return 0;
err_free:
- amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem,
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, (struct kgd_mem *)mem,
pdd->drm_priv, NULL);
err_unlock:
mutex_unlock(&p->mutex);
- dma_buf_put(dmabuf);
return r;
}
+static int kfd_ioctl_export_dmabuf(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_export_dmabuf_args *args = data;
+ struct kfd_process_device *pdd;
+ struct dma_buf *dmabuf;
+ struct kfd_node *dev;
+ void *mem;
+ int ret = 0;
+
+ dev = kfd_device_by_id(GET_GPU_ID(args->handle));
+ if (!dev)
+ return -EINVAL;
+
+ mutex_lock(&p->mutex);
+
+ pdd = kfd_get_process_device_data(dev, p);
+ if (!pdd) {
+ ret = -EINVAL;
+ goto err_unlock;
+ }
+
+ mem = kfd_process_device_translate_handle(pdd,
+ GET_IDR_HANDLE(args->handle));
+ if (!mem) {
+ ret = -EINVAL;
+ goto err_unlock;
+ }
+
+ ret = amdgpu_amdkfd_gpuvm_export_dmabuf(mem, &dmabuf);
+ mutex_unlock(&p->mutex);
+ if (ret)
+ goto err_out;
+
+ ret = dma_buf_fd(dmabuf, args->flags);
+ if (ret < 0) {
+ dma_buf_put(dmabuf);
+ goto err_out;
+ }
+ /* dma_buf_fd assigns the reference count to the fd, no need to
+ * put the reference here.
+ */
+ args->dmabuf_fd = ret;
+
+ return 0;
+
+err_unlock:
+ mutex_unlock(&p->mutex);
+err_out:
+ return ret;
+}
+
/* Handle requests for watching SMI events */
static int kfd_ioctl_smi_events(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_smi_events_args *args = data;
- struct kfd_dev *dev;
+ struct kfd_process_device *pdd;
- dev = kfd_device_by_id(args->gpuid);
- if (!dev)
+ mutex_lock(&p->mutex);
+
+ pdd = kfd_process_device_data_by_id(p, args->gpuid);
+ mutex_unlock(&p->mutex);
+ if (!pdd)
return -EINVAL;
- return kfd_smi_event_open(dev, &args->anon_fd);
+ return kfd_smi_event_open(pdd->dev, &args->anon_fd);
}
+#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
+
static int kfd_ioctl_set_xnack_mode(struct file *filep,
struct kfd_process *p, void *data)
{
@@ -1811,22 +1685,29 @@ static int kfd_ioctl_set_xnack_mode(struct file *filep,
if (args->xnack_enabled >= 0) {
if (!list_empty(&p->pqm.queues)) {
pr_debug("Process has user queues running\n");
- mutex_unlock(&p->mutex);
- return -EBUSY;
+ r = -EBUSY;
+ goto out_unlock;
}
- if (args->xnack_enabled && !kfd_process_xnack_mode(p, true))
+
+ if (p->xnack_enabled == args->xnack_enabled)
+ goto out_unlock;
+
+ if (args->xnack_enabled && !kfd_process_xnack_mode(p, true)) {
r = -EPERM;
- else
- p->xnack_enabled = args->xnack_enabled;
+ goto out_unlock;
+ }
+
+ r = svm_range_switch_xnack_reserve_mem(p, args->xnack_enabled);
} else {
args->xnack_enabled = p->xnack_enabled;
}
+
+out_unlock:
mutex_unlock(&p->mutex);
return r;
}
-#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
{
struct kfd_ioctl_svm_args *args = data;
@@ -1840,22 +1721,1406 @@ static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
if (!args->start_addr || !args->size)
return -EINVAL;
- mutex_lock(&p->mutex);
-
r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr,
args->attrs);
- mutex_unlock(&p->mutex);
-
return r;
}
#else
+static int kfd_ioctl_set_xnack_mode(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ return -EPERM;
+}
static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
{
return -EPERM;
}
#endif
+static int criu_checkpoint_process(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_offset)
+{
+ struct kfd_criu_process_priv_data process_priv;
+ int ret;
+
+ memset(&process_priv, 0, sizeof(process_priv));
+
+ process_priv.version = KFD_CRIU_PRIV_VERSION;
+ /* For CR, we don't consider negative xnack mode which is used for
+ * querying without changing it, here 0 simply means disabled and 1
+ * means enabled so retry for finding a valid PTE.
+ */
+ process_priv.xnack_mode = p->xnack_enabled ? 1 : 0;
+
+ ret = copy_to_user(user_priv_data + *priv_offset,
+ &process_priv, sizeof(process_priv));
+
+ if (ret) {
+ pr_err("Failed to copy process information to user\n");
+ ret = -EFAULT;
+ }
+
+ *priv_offset += sizeof(process_priv);
+ return ret;
+}
+
+static int criu_checkpoint_devices(struct kfd_process *p,
+ uint32_t num_devices,
+ uint8_t __user *user_addr,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_offset)
+{
+ struct kfd_criu_device_priv_data *device_priv = NULL;
+ struct kfd_criu_device_bucket *device_buckets = NULL;
+ int ret = 0, i;
+
+ device_buckets = kvzalloc(num_devices * sizeof(*device_buckets), GFP_KERNEL);
+ if (!device_buckets) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ device_priv = kvzalloc(num_devices * sizeof(*device_priv), GFP_KERNEL);
+ if (!device_priv) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ for (i = 0; i < num_devices; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ device_buckets[i].user_gpu_id = pdd->user_gpu_id;
+ device_buckets[i].actual_gpu_id = pdd->dev->id;
+
+ /*
+ * priv_data does not contain useful information for now and is reserved for
+ * future use, so we do not set its contents.
+ */
+ }
+
+ ret = copy_to_user(user_addr, device_buckets, num_devices * sizeof(*device_buckets));
+ if (ret) {
+ pr_err("Failed to copy device information to user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ ret = copy_to_user(user_priv_data + *priv_offset,
+ device_priv,
+ num_devices * sizeof(*device_priv));
+ if (ret) {
+ pr_err("Failed to copy device information to user\n");
+ ret = -EFAULT;
+ }
+ *priv_offset += num_devices * sizeof(*device_priv);
+
+exit:
+ kvfree(device_buckets);
+ kvfree(device_priv);
+ return ret;
+}
+
+static uint32_t get_process_num_bos(struct kfd_process *p)
+{
+ uint32_t num_of_bos = 0;
+ int i;
+
+ /* Run over all PDDs of the process */
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+ void *mem;
+ int id;
+
+ idr_for_each_entry(&pdd->alloc_idr, mem, id) {
+ struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
+
+ if (!kgd_mem->va || kgd_mem->va > pdd->gpuvm_base)
+ num_of_bos++;
+ }
+ }
+ return num_of_bos;
+}
+
+static int criu_get_prime_handle(struct kgd_mem *mem,
+ int flags, u32 *shared_fd,
+ struct file **file)
+{
+ struct dma_buf *dmabuf;
+ int ret;
+
+ ret = amdgpu_amdkfd_gpuvm_export_dmabuf(mem, &dmabuf);
+ if (ret) {
+ pr_err("dmabuf export failed for the BO\n");
+ return ret;
+ }
+
+ ret = get_unused_fd_flags(flags);
+ if (ret < 0) {
+ pr_err("dmabuf create fd failed, ret:%d\n", ret);
+ goto out_free_dmabuf;
+ }
+
+ *shared_fd = ret;
+ *file = dmabuf->file;
+ return 0;
+
+out_free_dmabuf:
+ dma_buf_put(dmabuf);
+ return ret;
+}
+
+static void commit_files(struct file **files,
+ struct kfd_criu_bo_bucket *bo_buckets,
+ unsigned int count,
+ int err)
+{
+ while (count--) {
+ struct file *file = files[count];
+
+ if (!file)
+ continue;
+ if (err) {
+ fput(file);
+ put_unused_fd(bo_buckets[count].dmabuf_fd);
+ } else {
+ fd_install(bo_buckets[count].dmabuf_fd, file);
+ }
+ }
+}
+
+static int criu_checkpoint_bos(struct kfd_process *p,
+ uint32_t num_bos,
+ uint8_t __user *user_bos,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_offset)
+{
+ struct kfd_criu_bo_bucket *bo_buckets;
+ struct kfd_criu_bo_priv_data *bo_privs;
+ struct file **files = NULL;
+ int ret = 0, pdd_index, bo_index = 0, id;
+ void *mem;
+
+ bo_buckets = kvzalloc(num_bos * sizeof(*bo_buckets), GFP_KERNEL);
+ if (!bo_buckets)
+ return -ENOMEM;
+
+ bo_privs = kvzalloc(num_bos * sizeof(*bo_privs), GFP_KERNEL);
+ if (!bo_privs) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ files = kvzalloc(num_bos * sizeof(struct file *), GFP_KERNEL);
+ if (!files) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
+ struct kfd_process_device *pdd = p->pdds[pdd_index];
+ struct amdgpu_bo *dumper_bo;
+ struct kgd_mem *kgd_mem;
+
+ idr_for_each_entry(&pdd->alloc_idr, mem, id) {
+ struct kfd_criu_bo_bucket *bo_bucket;
+ struct kfd_criu_bo_priv_data *bo_priv;
+ int i, dev_idx = 0;
+
+ kgd_mem = (struct kgd_mem *)mem;
+ dumper_bo = kgd_mem->bo;
+
+ /* Skip checkpointing BOs that are used for Trap handler
+ * code and state. Currently, these BOs have a VA that
+ * is less GPUVM Base
+ */
+ if (kgd_mem->va && kgd_mem->va <= pdd->gpuvm_base)
+ continue;
+
+ bo_bucket = &bo_buckets[bo_index];
+ bo_priv = &bo_privs[bo_index];
+
+ bo_bucket->gpu_id = pdd->user_gpu_id;
+ bo_bucket->addr = (uint64_t)kgd_mem->va;
+ bo_bucket->size = amdgpu_bo_size(dumper_bo);
+ bo_bucket->alloc_flags = (uint32_t)kgd_mem->alloc_flags;
+ bo_priv->idr_handle = id;
+
+ if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+ ret = amdgpu_ttm_tt_get_userptr(&dumper_bo->tbo,
+ &bo_priv->user_addr);
+ if (ret) {
+ pr_err("Failed to obtain user address for user-pointer bo\n");
+ goto exit;
+ }
+ }
+ if (bo_bucket->alloc_flags
+ & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
+ ret = criu_get_prime_handle(kgd_mem,
+ bo_bucket->alloc_flags &
+ KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0,
+ &bo_bucket->dmabuf_fd, &files[bo_index]);
+ if (ret)
+ goto exit;
+ } else {
+ bo_bucket->dmabuf_fd = KFD_INVALID_FD;
+ }
+
+ if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
+ bo_bucket->offset = KFD_MMAP_TYPE_DOORBELL |
+ KFD_MMAP_GPU_ID(pdd->dev->id);
+ else if (bo_bucket->alloc_flags &
+ KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)
+ bo_bucket->offset = KFD_MMAP_TYPE_MMIO |
+ KFD_MMAP_GPU_ID(pdd->dev->id);
+ else
+ bo_bucket->offset = amdgpu_bo_mmap_offset(dumper_bo);
+
+ for (i = 0; i < p->n_pdds; i++) {
+ if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->drm_priv, kgd_mem))
+ bo_priv->mapped_gpuids[dev_idx++] = p->pdds[i]->user_gpu_id;
+ }
+
+ pr_debug("bo_size = 0x%llx, bo_addr = 0x%llx bo_offset = 0x%llx\n"
+ "gpu_id = 0x%x alloc_flags = 0x%x idr_handle = 0x%x",
+ bo_bucket->size,
+ bo_bucket->addr,
+ bo_bucket->offset,
+ bo_bucket->gpu_id,
+ bo_bucket->alloc_flags,
+ bo_priv->idr_handle);
+ bo_index++;
+ }
+ }
+
+ ret = copy_to_user(user_bos, bo_buckets, num_bos * sizeof(*bo_buckets));
+ if (ret) {
+ pr_err("Failed to copy BO information to user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ ret = copy_to_user(user_priv_data + *priv_offset, bo_privs, num_bos * sizeof(*bo_privs));
+ if (ret) {
+ pr_err("Failed to copy BO priv information to user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ *priv_offset += num_bos * sizeof(*bo_privs);
+
+exit:
+ commit_files(files, bo_buckets, bo_index, ret);
+ kvfree(files);
+ kvfree(bo_buckets);
+ kvfree(bo_privs);
+ return ret;
+}
+
+static int criu_get_process_object_info(struct kfd_process *p,
+ uint32_t *num_devices,
+ uint32_t *num_bos,
+ uint32_t *num_objects,
+ uint64_t *objs_priv_size)
+{
+ uint64_t queues_priv_data_size, svm_priv_data_size, priv_size;
+ uint32_t num_queues, num_events, num_svm_ranges;
+ int ret;
+
+ *num_devices = p->n_pdds;
+ *num_bos = get_process_num_bos(p);
+
+ ret = kfd_process_get_queue_info(p, &num_queues, &queues_priv_data_size);
+ if (ret)
+ return ret;
+
+ num_events = kfd_get_num_events(p);
+
+ svm_range_get_info(p, &num_svm_ranges, &svm_priv_data_size);
+
+ *num_objects = num_queues + num_events + num_svm_ranges;
+
+ if (objs_priv_size) {
+ priv_size = sizeof(struct kfd_criu_process_priv_data);
+ priv_size += *num_devices * sizeof(struct kfd_criu_device_priv_data);
+ priv_size += *num_bos * sizeof(struct kfd_criu_bo_priv_data);
+ priv_size += queues_priv_data_size;
+ priv_size += num_events * sizeof(struct kfd_criu_event_priv_data);
+ priv_size += svm_priv_data_size;
+ *objs_priv_size = priv_size;
+ }
+ return 0;
+}
+
+static int criu_checkpoint(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args)
+{
+ int ret;
+ uint32_t num_devices, num_bos, num_objects;
+ uint64_t priv_size, priv_offset = 0, bo_priv_offset;
+
+ if (!args->devices || !args->bos || !args->priv_data)
+ return -EINVAL;
+
+ mutex_lock(&p->mutex);
+
+ if (!p->n_pdds) {
+ pr_err("No pdd for given process\n");
+ ret = -ENODEV;
+ goto exit_unlock;
+ }
+
+ /* Confirm all process queues are evicted */
+ if (!p->queues_paused) {
+ pr_err("Cannot dump process when queues are not in evicted state\n");
+ /* CRIU plugin did not call op PROCESS_INFO before checkpointing */
+ ret = -EINVAL;
+ goto exit_unlock;
+ }
+
+ ret = criu_get_process_object_info(p, &num_devices, &num_bos, &num_objects, &priv_size);
+ if (ret)
+ goto exit_unlock;
+
+ if (num_devices != args->num_devices ||
+ num_bos != args->num_bos ||
+ num_objects != args->num_objects ||
+ priv_size != args->priv_data_size) {
+
+ ret = -EINVAL;
+ goto exit_unlock;
+ }
+
+ /* each function will store private data inside priv_data and adjust priv_offset */
+ ret = criu_checkpoint_process(p, (uint8_t __user *)args->priv_data, &priv_offset);
+ if (ret)
+ goto exit_unlock;
+
+ ret = criu_checkpoint_devices(p, num_devices, (uint8_t __user *)args->devices,
+ (uint8_t __user *)args->priv_data, &priv_offset);
+ if (ret)
+ goto exit_unlock;
+
+ /* Leave room for BOs in the private data. They need to be restored
+ * before events, but we checkpoint them last to simplify the error
+ * handling.
+ */
+ bo_priv_offset = priv_offset;
+ priv_offset += num_bos * sizeof(struct kfd_criu_bo_priv_data);
+
+ if (num_objects) {
+ ret = kfd_criu_checkpoint_queues(p, (uint8_t __user *)args->priv_data,
+ &priv_offset);
+ if (ret)
+ goto exit_unlock;
+
+ ret = kfd_criu_checkpoint_events(p, (uint8_t __user *)args->priv_data,
+ &priv_offset);
+ if (ret)
+ goto exit_unlock;
+
+ ret = kfd_criu_checkpoint_svm(p, (uint8_t __user *)args->priv_data, &priv_offset);
+ if (ret)
+ goto exit_unlock;
+ }
+
+ /* This must be the last thing in this function that can fail.
+ * Otherwise we leak dmabuf file descriptors.
+ */
+ ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos,
+ (uint8_t __user *)args->priv_data, &bo_priv_offset);
+
+exit_unlock:
+ mutex_unlock(&p->mutex);
+ if (ret)
+ pr_err("Failed to dump CRIU ret:%d\n", ret);
+ else
+ pr_debug("CRIU dump ret:%d\n", ret);
+
+ return ret;
+}
+
+static int criu_restore_process(struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args,
+ uint64_t *priv_offset,
+ uint64_t max_priv_data_size)
+{
+ int ret = 0;
+ struct kfd_criu_process_priv_data process_priv;
+
+ if (*priv_offset + sizeof(process_priv) > max_priv_data_size)
+ return -EINVAL;
+
+ ret = copy_from_user(&process_priv,
+ (void __user *)(args->priv_data + *priv_offset),
+ sizeof(process_priv));
+ if (ret) {
+ pr_err("Failed to copy process private information from user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+ *priv_offset += sizeof(process_priv);
+
+ if (process_priv.version != KFD_CRIU_PRIV_VERSION) {
+ pr_err("Invalid CRIU API version (checkpointed:%d current:%d)\n",
+ process_priv.version, KFD_CRIU_PRIV_VERSION);
+ return -EINVAL;
+ }
+
+ pr_debug("Setting XNACK mode\n");
+ if (process_priv.xnack_mode && !kfd_process_xnack_mode(p, true)) {
+ pr_err("xnack mode cannot be set\n");
+ ret = -EPERM;
+ goto exit;
+ } else {
+ pr_debug("set xnack mode: %d\n", process_priv.xnack_mode);
+ p->xnack_enabled = process_priv.xnack_mode;
+ }
+
+exit:
+ return ret;
+}
+
+static int criu_restore_devices(struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args,
+ uint64_t *priv_offset,
+ uint64_t max_priv_data_size)
+{
+ struct kfd_criu_device_bucket *device_buckets;
+ struct kfd_criu_device_priv_data *device_privs;
+ int ret = 0;
+ uint32_t i;
+
+ if (args->num_devices != p->n_pdds)
+ return -EINVAL;
+
+ if (*priv_offset + (args->num_devices * sizeof(*device_privs)) > max_priv_data_size)
+ return -EINVAL;
+
+ device_buckets = kmalloc_array(args->num_devices, sizeof(*device_buckets), GFP_KERNEL);
+ if (!device_buckets)
+ return -ENOMEM;
+
+ ret = copy_from_user(device_buckets, (void __user *)args->devices,
+ args->num_devices * sizeof(*device_buckets));
+ if (ret) {
+ pr_err("Failed to copy devices buckets from user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ for (i = 0; i < args->num_devices; i++) {
+ struct kfd_node *dev;
+ struct kfd_process_device *pdd;
+ struct file *drm_file;
+
+ /* device private data is not currently used */
+
+ if (!device_buckets[i].user_gpu_id) {
+ pr_err("Invalid user gpu_id\n");
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ dev = kfd_device_by_id(device_buckets[i].actual_gpu_id);
+ if (!dev) {
+ pr_err("Failed to find device with gpu_id = %x\n",
+ device_buckets[i].actual_gpu_id);
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ pdd = kfd_get_process_device_data(dev, p);
+ if (!pdd) {
+ pr_err("Failed to get pdd for gpu_id = %x\n",
+ device_buckets[i].actual_gpu_id);
+ ret = -EINVAL;
+ goto exit;
+ }
+ pdd->user_gpu_id = device_buckets[i].user_gpu_id;
+
+ drm_file = fget(device_buckets[i].drm_fd);
+ if (!drm_file) {
+ pr_err("Invalid render node file descriptor sent from plugin (%d)\n",
+ device_buckets[i].drm_fd);
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ if (pdd->drm_file) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ /* create the vm using render nodes for kfd pdd */
+ if (kfd_process_device_init_vm(pdd, drm_file)) {
+ pr_err("could not init vm for given pdd\n");
+ /* On success, the PDD keeps the drm_file reference */
+ fput(drm_file);
+ ret = -EINVAL;
+ goto exit;
+ }
+ /*
+ * pdd now already has the vm bound to render node so below api won't create a new
+ * exclusive kfd mapping but use existing one with renderDXXX but is still needed
+ * for iommu v2 binding and runtime pm.
+ */
+ pdd = kfd_bind_process_to_device(dev, p);
+ if (IS_ERR(pdd)) {
+ ret = PTR_ERR(pdd);
+ goto exit;
+ }
+
+ if (!pdd->qpd.proc_doorbells) {
+ ret = kfd_alloc_process_doorbells(dev->kfd, pdd);
+ if (ret)
+ goto exit;
+ }
+ }
+
+ /*
+ * We are not copying device private data from user as we are not using the data for now,
+ * but we still adjust for its private data.
+ */
+ *priv_offset += args->num_devices * sizeof(*device_privs);
+
+exit:
+ kfree(device_buckets);
+ return ret;
+}
+
+static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
+ struct kfd_criu_bo_bucket *bo_bucket,
+ struct kfd_criu_bo_priv_data *bo_priv,
+ struct kgd_mem **kgd_mem)
+{
+ int idr_handle;
+ int ret;
+ const bool criu_resume = true;
+ u64 offset;
+
+ if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
+ if (bo_bucket->size !=
+ kfd_doorbell_process_slice(pdd->dev->kfd))
+ return -EINVAL;
+
+ offset = kfd_get_process_doorbells(pdd);
+ if (!offset)
+ return -ENOMEM;
+ } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
+ /* MMIO BOs need remapped bus address */
+ if (bo_bucket->size != PAGE_SIZE) {
+ pr_err("Invalid page size\n");
+ return -EINVAL;
+ }
+ offset = pdd->dev->adev->rmmio_remap.bus_addr;
+ if (!offset || (PAGE_SIZE > 4096)) {
+ pr_err("amdgpu_amdkfd_get_mmio_remap_phys_addr failed\n");
+ return -ENOMEM;
+ }
+ } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+ offset = bo_priv->user_addr;
+ }
+ /* Create the BO */
+ ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(pdd->dev->adev, bo_bucket->addr,
+ bo_bucket->size, pdd->drm_priv, kgd_mem,
+ &offset, bo_bucket->alloc_flags, criu_resume);
+ if (ret) {
+ pr_err("Could not create the BO\n");
+ return ret;
+ }
+ pr_debug("New BO created: size:0x%llx addr:0x%llx offset:0x%llx\n",
+ bo_bucket->size, bo_bucket->addr, offset);
+
+ /* Restore previous IDR handle */
+ pr_debug("Restoring old IDR handle for the BO");
+ idr_handle = idr_alloc(&pdd->alloc_idr, *kgd_mem, bo_priv->idr_handle,
+ bo_priv->idr_handle + 1, GFP_KERNEL);
+
+ if (idr_handle < 0) {
+ pr_err("Could not allocate idr\n");
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, *kgd_mem, pdd->drm_priv,
+ NULL);
+ return -ENOMEM;
+ }
+
+ if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
+ bo_bucket->restored_offset = KFD_MMAP_TYPE_DOORBELL | KFD_MMAP_GPU_ID(pdd->dev->id);
+ if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
+ bo_bucket->restored_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(pdd->dev->id);
+ } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
+ bo_bucket->restored_offset = offset;
+ } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+ bo_bucket->restored_offset = offset;
+ /* Update the VRAM usage count */
+ atomic64_add(bo_bucket->size, &pdd->vram_usage);
+ }
+ return 0;
+}
+
+static int criu_restore_bo(struct kfd_process *p,
+ struct kfd_criu_bo_bucket *bo_bucket,
+ struct kfd_criu_bo_priv_data *bo_priv,
+ struct file **file)
+{
+ struct kfd_process_device *pdd;
+ struct kgd_mem *kgd_mem;
+ int ret;
+ int j;
+
+ pr_debug("Restoring BO size:0x%llx addr:0x%llx gpu_id:0x%x flags:0x%x idr_handle:0x%x\n",
+ bo_bucket->size, bo_bucket->addr, bo_bucket->gpu_id, bo_bucket->alloc_flags,
+ bo_priv->idr_handle);
+
+ pdd = kfd_process_device_data_by_id(p, bo_bucket->gpu_id);
+ if (!pdd) {
+ pr_err("Failed to get pdd\n");
+ return -ENODEV;
+ }
+
+ ret = criu_restore_memory_of_gpu(pdd, bo_bucket, bo_priv, &kgd_mem);
+ if (ret)
+ return ret;
+
+ /* now map these BOs to GPU/s */
+ for (j = 0; j < p->n_pdds; j++) {
+ struct kfd_node *peer;
+ struct kfd_process_device *peer_pdd;
+
+ if (!bo_priv->mapped_gpuids[j])
+ break;
+
+ peer_pdd = kfd_process_device_data_by_id(p, bo_priv->mapped_gpuids[j]);
+ if (!peer_pdd)
+ return -EINVAL;
+
+ peer = peer_pdd->dev;
+
+ peer_pdd = kfd_bind_process_to_device(peer, p);
+ if (IS_ERR(peer_pdd))
+ return PTR_ERR(peer_pdd);
+
+ ret = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(peer->adev, kgd_mem,
+ peer_pdd->drm_priv);
+ if (ret) {
+ pr_err("Failed to map to gpu %d/%d\n", j, p->n_pdds);
+ return ret;
+ }
+ }
+
+ pr_debug("map memory was successful for the BO\n");
+ /* create the dmabuf object and export the bo */
+ if (bo_bucket->alloc_flags
+ & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
+ ret = criu_get_prime_handle(kgd_mem, DRM_RDWR,
+ &bo_bucket->dmabuf_fd, file);
+ if (ret)
+ return ret;
+ } else {
+ bo_bucket->dmabuf_fd = KFD_INVALID_FD;
+ }
+
+ return 0;
+}
+
+static int criu_restore_bos(struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args,
+ uint64_t *priv_offset,
+ uint64_t max_priv_data_size)
+{
+ struct kfd_criu_bo_bucket *bo_buckets = NULL;
+ struct kfd_criu_bo_priv_data *bo_privs = NULL;
+ struct file **files = NULL;
+ int ret = 0;
+ uint32_t i = 0;
+
+ if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > max_priv_data_size)
+ return -EINVAL;
+
+ /* Prevent MMU notifications until stage-4 IOCTL (CRIU_RESUME) is received */
+ amdgpu_amdkfd_block_mmu_notifications(p->kgd_process_info);
+
+ bo_buckets = kvmalloc_array(args->num_bos, sizeof(*bo_buckets), GFP_KERNEL);
+ if (!bo_buckets)
+ return -ENOMEM;
+
+ files = kvzalloc(args->num_bos * sizeof(struct file *), GFP_KERNEL);
+ if (!files) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ ret = copy_from_user(bo_buckets, (void __user *)args->bos,
+ args->num_bos * sizeof(*bo_buckets));
+ if (ret) {
+ pr_err("Failed to copy BOs information from user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ bo_privs = kvmalloc_array(args->num_bos, sizeof(*bo_privs), GFP_KERNEL);
+ if (!bo_privs) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ ret = copy_from_user(bo_privs, (void __user *)args->priv_data + *priv_offset,
+ args->num_bos * sizeof(*bo_privs));
+ if (ret) {
+ pr_err("Failed to copy BOs information from user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+ *priv_offset += args->num_bos * sizeof(*bo_privs);
+
+ /* Create and map new BOs */
+ for (; i < args->num_bos; i++) {
+ ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i], &files[i]);
+ if (ret) {
+ pr_debug("Failed to restore BO[%d] ret%d\n", i, ret);
+ goto exit;
+ }
+ } /* done */
+
+ /* Copy only the buckets back so user can read bo_buckets[N].restored_offset */
+ ret = copy_to_user((void __user *)args->bos,
+ bo_buckets,
+ (args->num_bos * sizeof(*bo_buckets)));
+ if (ret)
+ ret = -EFAULT;
+
+exit:
+ commit_files(files, bo_buckets, i, ret);
+ kvfree(files);
+ kvfree(bo_buckets);
+ kvfree(bo_privs);
+ return ret;
+}
+
+static int criu_restore_objects(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args,
+ uint64_t *priv_offset,
+ uint64_t max_priv_data_size)
+{
+ int ret = 0;
+ uint32_t i;
+
+ BUILD_BUG_ON(offsetof(struct kfd_criu_queue_priv_data, object_type));
+ BUILD_BUG_ON(offsetof(struct kfd_criu_event_priv_data, object_type));
+ BUILD_BUG_ON(offsetof(struct kfd_criu_svm_range_priv_data, object_type));
+
+ for (i = 0; i < args->num_objects; i++) {
+ uint32_t object_type;
+
+ if (*priv_offset + sizeof(object_type) > max_priv_data_size) {
+ pr_err("Invalid private data size\n");
+ return -EINVAL;
+ }
+
+ ret = get_user(object_type, (uint32_t __user *)(args->priv_data + *priv_offset));
+ if (ret) {
+ pr_err("Failed to copy private information from user\n");
+ goto exit;
+ }
+
+ switch (object_type) {
+ case KFD_CRIU_OBJECT_TYPE_QUEUE:
+ ret = kfd_criu_restore_queue(p, (uint8_t __user *)args->priv_data,
+ priv_offset, max_priv_data_size);
+ if (ret)
+ goto exit;
+ break;
+ case KFD_CRIU_OBJECT_TYPE_EVENT:
+ ret = kfd_criu_restore_event(filep, p, (uint8_t __user *)args->priv_data,
+ priv_offset, max_priv_data_size);
+ if (ret)
+ goto exit;
+ break;
+ case KFD_CRIU_OBJECT_TYPE_SVM_RANGE:
+ ret = kfd_criu_restore_svm(p, (uint8_t __user *)args->priv_data,
+ priv_offset, max_priv_data_size);
+ if (ret)
+ goto exit;
+ break;
+ default:
+ pr_err("Invalid object type:%u at index:%d\n", object_type, i);
+ ret = -EINVAL;
+ goto exit;
+ }
+ }
+exit:
+ return ret;
+}
+
+static int criu_restore(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args)
+{
+ uint64_t priv_offset = 0;
+ int ret = 0;
+
+ pr_debug("CRIU restore (num_devices:%u num_bos:%u num_objects:%u priv_data_size:%llu)\n",
+ args->num_devices, args->num_bos, args->num_objects, args->priv_data_size);
+
+ if ((args->num_bos > 0 && !args->bos) || !args->devices || !args->priv_data ||
+ !args->priv_data_size || !args->num_devices)
+ return -EINVAL;
+
+ mutex_lock(&p->mutex);
+
+ /*
+ * Set the process to evicted state to avoid running any new queues before all the memory
+ * mappings are ready.
+ */
+ ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_RESTORE);
+ if (ret)
+ goto exit_unlock;
+
+ /* Each function will adjust priv_offset based on how many bytes they consumed */
+ ret = criu_restore_process(p, args, &priv_offset, args->priv_data_size);
+ if (ret)
+ goto exit_unlock;
+
+ ret = criu_restore_devices(p, args, &priv_offset, args->priv_data_size);
+ if (ret)
+ goto exit_unlock;
+
+ ret = criu_restore_bos(p, args, &priv_offset, args->priv_data_size);
+ if (ret)
+ goto exit_unlock;
+
+ ret = criu_restore_objects(filep, p, args, &priv_offset, args->priv_data_size);
+ if (ret)
+ goto exit_unlock;
+
+ if (priv_offset != args->priv_data_size) {
+ pr_err("Invalid private data size\n");
+ ret = -EINVAL;
+ }
+
+exit_unlock:
+ mutex_unlock(&p->mutex);
+ if (ret)
+ pr_err("Failed to restore CRIU ret:%d\n", ret);
+ else
+ pr_debug("CRIU restore successful\n");
+
+ return ret;
+}
+
+static int criu_unpause(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args)
+{
+ int ret;
+
+ mutex_lock(&p->mutex);
+
+ if (!p->queues_paused) {
+ mutex_unlock(&p->mutex);
+ return -EINVAL;
+ }
+
+ ret = kfd_process_restore_queues(p);
+ if (ret)
+ pr_err("Failed to unpause queues ret:%d\n", ret);
+ else
+ p->queues_paused = false;
+
+ mutex_unlock(&p->mutex);
+
+ return ret;
+}
+
+static int criu_resume(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args)
+{
+ struct kfd_process *target = NULL;
+ struct pid *pid = NULL;
+ int ret = 0;
+
+ pr_debug("Inside %s, target pid for criu restore: %d\n", __func__,
+ args->pid);
+
+ pid = find_get_pid(args->pid);
+ if (!pid) {
+ pr_err("Cannot find pid info for %i\n", args->pid);
+ return -ESRCH;
+ }
+
+ pr_debug("calling kfd_lookup_process_by_pid\n");
+ target = kfd_lookup_process_by_pid(pid);
+
+ put_pid(pid);
+
+ if (!target) {
+ pr_debug("Cannot find process info for %i\n", args->pid);
+ return -ESRCH;
+ }
+
+ mutex_lock(&target->mutex);
+ ret = kfd_criu_resume_svm(target);
+ if (ret) {
+ pr_err("kfd_criu_resume_svm failed for %i\n", args->pid);
+ goto exit;
+ }
+
+ ret = amdgpu_amdkfd_criu_resume(target->kgd_process_info);
+ if (ret)
+ pr_err("amdgpu_amdkfd_criu_resume failed for %i\n", args->pid);
+
+exit:
+ mutex_unlock(&target->mutex);
+
+ kfd_unref_process(target);
+ return ret;
+}
+
+static int criu_process_info(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args)
+{
+ int ret = 0;
+
+ mutex_lock(&p->mutex);
+
+ if (!p->n_pdds) {
+ pr_err("No pdd for given process\n");
+ ret = -ENODEV;
+ goto err_unlock;
+ }
+
+ ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT);
+ if (ret)
+ goto err_unlock;
+
+ p->queues_paused = true;
+
+ args->pid = task_pid_nr_ns(p->lead_thread,
+ task_active_pid_ns(p->lead_thread));
+
+ ret = criu_get_process_object_info(p, &args->num_devices, &args->num_bos,
+ &args->num_objects, &args->priv_data_size);
+ if (ret)
+ goto err_unlock;
+
+ dev_dbg(kfd_device, "Num of devices:%u bos:%u objects:%u priv_data_size:%lld\n",
+ args->num_devices, args->num_bos, args->num_objects,
+ args->priv_data_size);
+
+err_unlock:
+ if (ret) {
+ kfd_process_restore_queues(p);
+ p->queues_paused = false;
+ }
+ mutex_unlock(&p->mutex);
+ return ret;
+}
+
+static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_criu_args *args = data;
+ int ret;
+
+ dev_dbg(kfd_device, "CRIU operation: %d\n", args->op);
+ switch (args->op) {
+ case KFD_CRIU_OP_PROCESS_INFO:
+ ret = criu_process_info(filep, p, args);
+ break;
+ case KFD_CRIU_OP_CHECKPOINT:
+ ret = criu_checkpoint(filep, p, args);
+ break;
+ case KFD_CRIU_OP_UNPAUSE:
+ ret = criu_unpause(filep, p, args);
+ break;
+ case KFD_CRIU_OP_RESTORE:
+ ret = criu_restore(filep, p, args);
+ break;
+ case KFD_CRIU_OP_RESUME:
+ ret = criu_resume(filep, p, args);
+ break;
+ default:
+ dev_dbg(kfd_device, "Unsupported CRIU operation:%d\n", args->op);
+ ret = -EINVAL;
+ break;
+ }
+
+ if (ret)
+ dev_dbg(kfd_device, "CRIU operation:%d err:%d\n", args->op, ret);
+
+ return ret;
+}
+
+static int runtime_enable(struct kfd_process *p, uint64_t r_debug,
+ bool enable_ttmp_setup)
+{
+ int i = 0, ret = 0;
+
+ if (p->is_runtime_retry)
+ goto retry;
+
+ if (p->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_DISABLED)
+ return -EBUSY;
+
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ if (pdd->qpd.queue_count)
+ return -EEXIST;
+
+ /*
+ * Setup TTMPs by default.
+ * Note that this call must remain here for MES ADD QUEUE to
+ * skip_process_ctx_clear unconditionally as the first call to
+ * SET_SHADER_DEBUGGER clears any stale process context data
+ * saved in MES.
+ */
+ if (pdd->dev->kfd->shared_resources.enable_mes)
+ kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));
+ }
+
+ p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;
+ p->runtime_info.r_debug = r_debug;
+ p->runtime_info.ttmp_setup = enable_ttmp_setup;
+
+ if (p->runtime_info.ttmp_setup) {
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) {
+ amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
+ pdd->dev->kfd2kgd->enable_debug_trap(
+ pdd->dev->adev,
+ true,
+ pdd->dev->vm_info.last_vmid_kfd);
+ } else if (kfd_dbg_is_per_vmid_supported(pdd->dev)) {
+ pdd->spi_dbg_override = pdd->dev->kfd2kgd->enable_debug_trap(
+ pdd->dev->adev,
+ false,
+ 0);
+ }
+ }
+ }
+
+retry:
+ if (p->debug_trap_enabled) {
+ if (!p->is_runtime_retry) {
+ kfd_dbg_trap_activate(p);
+ kfd_dbg_ev_raise(KFD_EC_MASK(EC_PROCESS_RUNTIME),
+ p, NULL, 0, false, NULL, 0);
+ }
+
+ mutex_unlock(&p->mutex);
+ ret = down_interruptible(&p->runtime_enable_sema);
+ mutex_lock(&p->mutex);
+
+ p->is_runtime_retry = !!ret;
+ }
+
+ return ret;
+}
+
+static int runtime_disable(struct kfd_process *p)
+{
+ int i = 0, ret = 0;
+ bool was_enabled = p->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED;
+
+ p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_DISABLED;
+ p->runtime_info.r_debug = 0;
+
+ if (p->debug_trap_enabled) {
+ if (was_enabled)
+ kfd_dbg_trap_deactivate(p, false, 0);
+
+ if (!p->is_runtime_retry)
+ kfd_dbg_ev_raise(KFD_EC_MASK(EC_PROCESS_RUNTIME),
+ p, NULL, 0, false, NULL, 0);
+
+ mutex_unlock(&p->mutex);
+ ret = down_interruptible(&p->runtime_enable_sema);
+ mutex_lock(&p->mutex);
+
+ p->is_runtime_retry = !!ret;
+ if (ret)
+ return ret;
+ }
+
+ if (was_enabled && p->runtime_info.ttmp_setup) {
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ if (!kfd_dbg_is_rlc_restore_supported(pdd->dev))
+ amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
+ }
+ }
+
+ p->runtime_info.ttmp_setup = false;
+
+ /* disable ttmp setup */
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+ int last_err = 0;
+
+ if (kfd_dbg_is_per_vmid_supported(pdd->dev)) {
+ pdd->spi_dbg_override =
+ pdd->dev->kfd2kgd->disable_debug_trap(
+ pdd->dev->adev,
+ false,
+ pdd->dev->vm_info.last_vmid_kfd);
+
+ if (!pdd->dev->kfd->shared_resources.enable_mes)
+ last_err = debug_refresh_runlist(pdd->dev->dqm);
+ else
+ last_err = kfd_dbg_set_mes_debug_mode(pdd,
+ !kfd_dbg_has_cwsr_workaround(pdd->dev));
+
+ if (last_err)
+ ret = last_err;
+ }
+ }
+
+ return ret;
+}
+
+static int kfd_ioctl_runtime_enable(struct file *filep, struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_runtime_enable_args *args = data;
+ int r;
+
+ mutex_lock(&p->mutex);
+
+ if (args->mode_mask & KFD_RUNTIME_ENABLE_MODE_ENABLE_MASK)
+ r = runtime_enable(p, args->r_debug,
+ !!(args->mode_mask & KFD_RUNTIME_ENABLE_MODE_TTMP_SAVE_MASK));
+ else
+ r = runtime_disable(p);
+
+ mutex_unlock(&p->mutex);
+
+ return r;
+}
+
+static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_dbg_trap_args *args = data;
+ struct task_struct *thread = NULL;
+ struct mm_struct *mm = NULL;
+ struct pid *pid = NULL;
+ struct kfd_process *target = NULL;
+ struct kfd_process_device *pdd = NULL;
+ int r = 0;
+
+ if (sched_policy == KFD_SCHED_POLICY_NO_HWS) {
+ pr_err("Debugging does not support sched_policy %i", sched_policy);
+ return -EINVAL;
+ }
+
+ pid = find_get_pid(args->pid);
+ if (!pid) {
+ pr_debug("Cannot find pid info for %i\n", args->pid);
+ r = -ESRCH;
+ goto out;
+ }
+
+ thread = get_pid_task(pid, PIDTYPE_PID);
+ if (!thread) {
+ r = -ESRCH;
+ goto out;
+ }
+
+ mm = get_task_mm(thread);
+ if (!mm) {
+ r = -ESRCH;
+ goto out;
+ }
+
+ if (args->op == KFD_IOC_DBG_TRAP_ENABLE) {
+ bool create_process;
+
+ rcu_read_lock();
+ create_process = thread && thread != current && ptrace_parent(thread) == current;
+ rcu_read_unlock();
+
+ target = create_process ? kfd_create_process(thread) :
+ kfd_lookup_process_by_pid(pid);
+ } else {
+ target = kfd_lookup_process_by_pid(pid);
+ }
+
+ if (IS_ERR_OR_NULL(target)) {
+ pr_debug("Cannot find process PID %i to debug\n", args->pid);
+ r = target ? PTR_ERR(target) : -ESRCH;
+ target = NULL;
+ goto out;
+ }
+
+ /* Check if target is still PTRACED. */
+ rcu_read_lock();
+ if (target != p && args->op != KFD_IOC_DBG_TRAP_DISABLE
+ && ptrace_parent(target->lead_thread) != current) {
+ pr_err("PID %i is not PTRACED and cannot be debugged\n", args->pid);
+ r = -EPERM;
+ }
+ rcu_read_unlock();
+
+ if (r)
+ goto out;
+
+ mutex_lock(&target->mutex);
+
+ if (args->op != KFD_IOC_DBG_TRAP_ENABLE && !target->debug_trap_enabled) {
+ pr_err("PID %i not debug enabled for op %i\n", args->pid, args->op);
+ r = -EINVAL;
+ goto unlock_out;
+ }
+
+ if (target->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_ENABLED &&
+ (args->op == KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE ||
+ args->op == KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE ||
+ args->op == KFD_IOC_DBG_TRAP_SUSPEND_QUEUES ||
+ args->op == KFD_IOC_DBG_TRAP_RESUME_QUEUES ||
+ args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH ||
+ args->op == KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH ||
+ args->op == KFD_IOC_DBG_TRAP_SET_FLAGS)) {
+ r = -EPERM;
+ goto unlock_out;
+ }
+
+ if (args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH ||
+ args->op == KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH) {
+ int user_gpu_id = kfd_process_get_user_gpu_id(target,
+ args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH ?
+ args->set_node_address_watch.gpu_id :
+ args->clear_node_address_watch.gpu_id);
+
+ pdd = kfd_process_device_data_by_id(target, user_gpu_id);
+ if (user_gpu_id == -EINVAL || !pdd) {
+ r = -ENODEV;
+ goto unlock_out;
+ }
+ }
+
+ switch (args->op) {
+ case KFD_IOC_DBG_TRAP_ENABLE:
+ if (target != p)
+ target->debugger_process = p;
+
+ r = kfd_dbg_trap_enable(target,
+ args->enable.dbg_fd,
+ (void __user *)args->enable.rinfo_ptr,
+ &args->enable.rinfo_size);
+ if (!r)
+ target->exception_enable_mask = args->enable.exception_mask;
+
+ break;
+ case KFD_IOC_DBG_TRAP_DISABLE:
+ r = kfd_dbg_trap_disable(target);
+ break;
+ case KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT:
+ r = kfd_dbg_send_exception_to_runtime(target,
+ args->send_runtime_event.gpu_id,
+ args->send_runtime_event.queue_id,
+ args->send_runtime_event.exception_mask);
+ break;
+ case KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED:
+ kfd_dbg_set_enabled_debug_exception_mask(target,
+ args->set_exceptions_enabled.exception_mask);
+ break;
+ case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE:
+ r = kfd_dbg_trap_set_wave_launch_override(target,
+ args->launch_override.override_mode,
+ args->launch_override.enable_mask,
+ args->launch_override.support_request_mask,
+ &args->launch_override.enable_mask,
+ &args->launch_override.support_request_mask);
+ break;
+ case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE:
+ r = kfd_dbg_trap_set_wave_launch_mode(target,
+ args->launch_mode.launch_mode);
+ break;
+ case KFD_IOC_DBG_TRAP_SUSPEND_QUEUES:
+ r = suspend_queues(target,
+ args->suspend_queues.num_queues,
+ args->suspend_queues.grace_period,
+ args->suspend_queues.exception_mask,
+ (uint32_t *)args->suspend_queues.queue_array_ptr);
+
+ break;
+ case KFD_IOC_DBG_TRAP_RESUME_QUEUES:
+ r = resume_queues(target, args->resume_queues.num_queues,
+ (uint32_t *)args->resume_queues.queue_array_ptr);
+ break;
+ case KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH:
+ r = kfd_dbg_trap_set_dev_address_watch(pdd,
+ args->set_node_address_watch.address,
+ args->set_node_address_watch.mask,
+ &args->set_node_address_watch.id,
+ args->set_node_address_watch.mode);
+ break;
+ case KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH:
+ r = kfd_dbg_trap_clear_dev_address_watch(pdd,
+ args->clear_node_address_watch.id);
+ break;
+ case KFD_IOC_DBG_TRAP_SET_FLAGS:
+ r = kfd_dbg_trap_set_flags(target, &args->set_flags.flags);
+ break;
+ case KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT:
+ r = kfd_dbg_ev_query_debug_event(target,
+ &args->query_debug_event.queue_id,
+ &args->query_debug_event.gpu_id,
+ args->query_debug_event.exception_mask,
+ &args->query_debug_event.exception_mask);
+ break;
+ case KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO:
+ r = kfd_dbg_trap_query_exception_info(target,
+ args->query_exception_info.source_id,
+ args->query_exception_info.exception_code,
+ args->query_exception_info.clear_exception,
+ (void __user *)args->query_exception_info.info_ptr,
+ &args->query_exception_info.info_size);
+ break;
+ case KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT:
+ r = pqm_get_queue_snapshot(&target->pqm,
+ args->queue_snapshot.exception_mask,
+ (void __user *)args->queue_snapshot.snapshot_buf_ptr,
+ &args->queue_snapshot.num_queues,
+ &args->queue_snapshot.entry_size);
+ break;
+ case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT:
+ r = kfd_dbg_trap_device_snapshot(target,
+ args->device_snapshot.exception_mask,
+ (void __user *)args->device_snapshot.snapshot_buf_ptr,
+ &args->device_snapshot.num_devices,
+ &args->device_snapshot.entry_size);
+ break;
+ default:
+ pr_err("Invalid option: %i\n", args->op);
+ r = -EINVAL;
+ }
+
+unlock_out:
+ mutex_unlock(&target->mutex);
+
+out:
+ if (thread)
+ put_task_struct(thread);
+
+ if (mm)
+ mmput(mm);
+
+ if (pid)
+ put_pid(pid);
+
+ if (target)
+ kfd_unref_process(target);
+
+ return r;
+}
+
#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
.cmd_drv = 0, .name = #ioctl}
@@ -1898,16 +3163,16 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
kfd_ioctl_wait_events, 0),
- AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER_DEPRECATED,
kfd_ioctl_dbg_register, 0),
- AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED,
kfd_ioctl_dbg_unregister, 0),
- AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED,
kfd_ioctl_dbg_address_watch, 0),
- AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED,
kfd_ioctl_dbg_wave_control, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
@@ -1959,6 +3224,21 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,
kfd_ioctl_set_xnack_mode, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP,
+ kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY,
+ kfd_ioctl_get_available_memory, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_EXPORT_DMABUF,
+ kfd_ioctl_export_dmabuf, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_RUNTIME_ENABLE,
+ kfd_ioctl_runtime_enable, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_TRAP,
+ kfd_ioctl_set_debug_trap, 0),
};
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
@@ -1973,9 +3253,12 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
char *kdata = NULL;
unsigned int usize, asize;
int retcode = -EINVAL;
+ bool ptrace_attached = false;
- if (nr >= AMDKFD_CORE_IOCTL_COUNT)
+ if (nr >= AMDKFD_CORE_IOCTL_COUNT) {
+ retcode = -ENOTTY;
goto err_i1;
+ }
if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
u32 amdkfd_size;
@@ -1988,8 +3271,10 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
asize = amdkfd_size;
cmd = ioctl->cmd;
- } else
+ } else {
+ retcode = -ENOTTY;
goto err_i1;
+ }
dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg);
@@ -1998,7 +3283,15 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
* processes need to create their own KFD device context.
*/
process = filep->private_data;
- if (process->lead_thread != current->group_leader) {
+
+ rcu_read_lock();
+ if ((ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE) &&
+ ptrace_parent(process->lead_thread) == current)
+ ptrace_attached = true;
+ rcu_read_unlock();
+
+ if (process->lead_thread != current->group_leader
+ && !ptrace_attached) {
dev_dbg(kfd_device, "Using KFD FD in wrong process\n");
retcode = -EBADF;
goto err_i1;
@@ -2013,6 +3306,19 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
goto err_i1;
}
+ /*
+ * Versions of docker shipped in Ubuntu 18.xx and 20.xx do not support
+ * CAP_CHECKPOINT_RESTORE, so we also allow access if CAP_SYS_ADMIN as CAP_SYS_ADMIN is a
+ * more priviledged access.
+ */
+ if (unlikely(ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE)) {
+ if (!capable(CAP_CHECKPOINT_RESTORE) &&
+ !capable(CAP_SYS_ADMIN)) {
+ retcode = -EACCES;
+ goto err_i1;
+ }
+ }
+
if (cmd & (IOC_IN | IOC_OUT)) {
if (asize <= sizeof(stack_kdata)) {
kdata = stack_kdata;
@@ -2057,43 +3363,44 @@ err_i1:
return retcode;
}
-static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
+static int kfd_mmio_mmap(struct kfd_node *dev, struct kfd_process *process,
struct vm_area_struct *vma)
{
phys_addr_t address;
- int ret;
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
- address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
+ if (PAGE_SIZE > 4096)
+ return -EINVAL;
+
+ address = dev->adev->rmmio_remap.bus_addr;
- vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
- VM_DONTDUMP | VM_PFNMAP;
+ vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
+ VM_DONTDUMP | VM_PFNMAP);
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- pr_debug("pasid 0x%x mapping mmio page\n"
+ pr_debug("process pid %d mapping mmio page\n"
" target user address == 0x%08llX\n"
" physical address == 0x%08llX\n"
" vm_flags == 0x%04lX\n"
" size == 0x%04lX\n",
- process->pasid, (unsigned long long) vma->vm_start,
+ process->lead_thread->pid, (unsigned long long) vma->vm_start,
address, vma->vm_flags, PAGE_SIZE);
- ret = io_remap_pfn_range(vma,
+ return io_remap_pfn_range(vma,
vma->vm_start,
address >> PAGE_SHIFT,
PAGE_SIZE,
vma->vm_page_prot);
- return ret;
}
static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct kfd_process *process;
- struct kfd_dev *dev = NULL;
+ struct kfd_node *dev = NULL;
unsigned long mmap_offset;
unsigned int gpu_id;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index cfedfb1e8596..4a7180b46b71 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2015-2017 Advanced Micro Devices, Inc.
+ * Copyright 2015-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -25,9 +26,9 @@
#include "kfd_crat.h"
#include "kfd_priv.h"
#include "kfd_topology.h"
-#include "kfd_iommu.h"
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_xgmi.h"
/* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
* GPU processor ID are expressed with Bit[31]=1.
@@ -49,22 +50,13 @@ static inline unsigned int get_and_inc_gpu_processor_id(
return current_id;
}
-/* Static table to describe GPU Cache information */
-struct kfd_gpu_cache_info {
- uint32_t cache_size;
- uint32_t cache_level;
- uint32_t flags;
- /* Indicates how many Compute Units share this cache
- * within a SA. Value = 1 indicates the cache is not shared
- */
- uint32_t num_cu_shared;
-};
static struct kfd_gpu_cache_info kaveri_cache_info[] = {
{
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -74,6 +66,7 @@ static struct kfd_gpu_cache_info kaveri_cache_info[] = {
/* Scalar L1 Instruction Cache (in SQC module) per bank */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -83,6 +76,7 @@ static struct kfd_gpu_cache_info kaveri_cache_info[] = {
/* Scalar L1 Data Cache (in SQC module) per bank */
.cache_size = 8,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -98,6 +92,7 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -105,8 +100,9 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
},
{
/* Scalar L1 Instruction Cache (in SQC module) per bank */
- .cache_size = 8,
+ .cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -114,8 +110,9 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
},
{
/* Scalar L1 Data Cache (in SQC module) per bank. */
- .cache_size = 4,
+ .cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -145,6 +142,7 @@ static struct kfd_gpu_cache_info vega10_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -154,6 +152,7 @@ static struct kfd_gpu_cache_info vega10_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -163,6 +162,7 @@ static struct kfd_gpu_cache_info vega10_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -172,6 +172,7 @@ static struct kfd_gpu_cache_info vega10_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 4096,
.cache_level = 2,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -184,6 +185,7 @@ static struct kfd_gpu_cache_info raven_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -193,6 +195,7 @@ static struct kfd_gpu_cache_info raven_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -202,6 +205,7 @@ static struct kfd_gpu_cache_info raven_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -211,6 +215,7 @@ static struct kfd_gpu_cache_info raven_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 1024,
.cache_level = 2,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -223,6 +228,7 @@ static struct kfd_gpu_cache_info renoir_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -232,6 +238,7 @@ static struct kfd_gpu_cache_info renoir_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -241,6 +248,7 @@ static struct kfd_gpu_cache_info renoir_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -250,6 +258,7 @@ static struct kfd_gpu_cache_info renoir_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 1024,
.cache_level = 2,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -262,6 +271,7 @@ static struct kfd_gpu_cache_info vega12_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -271,6 +281,7 @@ static struct kfd_gpu_cache_info vega12_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -280,6 +291,7 @@ static struct kfd_gpu_cache_info vega12_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -289,6 +301,7 @@ static struct kfd_gpu_cache_info vega12_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 2048,
.cache_level = 2,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -301,6 +314,7 @@ static struct kfd_gpu_cache_info vega20_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -310,6 +324,7 @@ static struct kfd_gpu_cache_info vega20_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -319,6 +334,7 @@ static struct kfd_gpu_cache_info vega20_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -328,6 +344,7 @@ static struct kfd_gpu_cache_info vega20_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 8192,
.cache_level = 2,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -340,6 +357,7 @@ static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -349,6 +367,7 @@ static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -358,6 +377,7 @@ static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -367,6 +387,7 @@ static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 8192,
.cache_level = 2,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -379,6 +400,7 @@ static struct kfd_gpu_cache_info navi10_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -388,6 +410,7 @@ static struct kfd_gpu_cache_info navi10_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -397,6 +420,7 @@ static struct kfd_gpu_cache_info navi10_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -406,6 +430,7 @@ static struct kfd_gpu_cache_info navi10_cache_info[] = {
/* GL1 Data Cache per SA */
.cache_size = 128,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -415,6 +440,7 @@ static struct kfd_gpu_cache_info navi10_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 4096,
.cache_level = 2,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -427,6 +453,7 @@ static struct kfd_gpu_cache_info vangogh_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -436,6 +463,7 @@ static struct kfd_gpu_cache_info vangogh_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -445,6 +473,7 @@ static struct kfd_gpu_cache_info vangogh_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -454,6 +483,7 @@ static struct kfd_gpu_cache_info vangogh_cache_info[] = {
/* GL1 Data Cache per SA */
.cache_size = 128,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -463,6 +493,7 @@ static struct kfd_gpu_cache_info vangogh_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 1024,
.cache_level = 2,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -475,6 +506,7 @@ static struct kfd_gpu_cache_info navi14_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -484,6 +516,7 @@ static struct kfd_gpu_cache_info navi14_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -493,6 +526,7 @@ static struct kfd_gpu_cache_info navi14_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -502,6 +536,7 @@ static struct kfd_gpu_cache_info navi14_cache_info[] = {
/* GL1 Data Cache per SA */
.cache_size = 128,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -511,6 +546,7 @@ static struct kfd_gpu_cache_info navi14_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 2048,
.cache_level = 2,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -523,6 +559,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -532,6 +569,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -541,6 +579,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -550,6 +589,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
/* GL1 Data Cache per SA */
.cache_size = 128,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -559,6 +599,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 4096,
.cache_level = 2,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -568,6 +609,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
/* L3 Data Cache per GPU */
.cache_size = 128*1024,
.cache_level = 3,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -580,6 +622,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -589,6 +632,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -598,6 +642,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -607,6 +652,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
/* GL1 Data Cache per SA */
.cache_size = 128,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -616,6 +662,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 3072,
.cache_level = 2,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -625,6 +672,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
/* L3 Data Cache per GPU */
.cache_size = 96*1024,
.cache_level = 3,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -637,6 +685,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -646,6 +695,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -655,6 +705,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -664,6 +715,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
/* GL1 Data Cache per SA */
.cache_size = 128,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -673,6 +725,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 2048,
.cache_level = 2,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -682,6 +735,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
/* L3 Data Cache per GPU */
.cache_size = 32*1024,
.cache_level = 3,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -694,6 +748,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -703,6 +758,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -712,6 +768,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -721,6 +778,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
/* GL1 Data Cache per SA */
.cache_size = 128,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -730,6 +788,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 1024,
.cache_level = 2,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -739,6 +798,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
/* L3 Data Cache per GPU */
.cache_size = 16*1024,
.cache_level = 3,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -751,6 +811,7 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -760,6 +821,7 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -769,6 +831,7 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -778,6 +841,7 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
/* GL1 Data Cache per SA */
.cache_size = 128,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -787,6 +851,166 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 2048,
.cache_level = 2,
+ .cache_line_size = 128,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 6,
+ },
+};
+
+static struct kfd_gpu_cache_info gfx1037_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .cache_line_size = 128,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .cache_line_size = 64,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .cache_line_size = 64,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* GL1 Data Cache per SA */
+ .cache_size = 128,
+ .cache_level = 1,
+ .cache_line_size = 128,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 256,
+ .cache_level = 2,
+ .cache_line_size = 128,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+};
+
+static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .cache_line_size = 128,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .cache_line_size = 64,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .cache_line_size = 64,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* GL1 Data Cache per SA */
+ .cache_size = 128,
+ .cache_level = 1,
+ .cache_line_size = 128,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 256,
+ .cache_level = 2,
+ .cache_line_size = 128,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+};
+
+static struct kfd_gpu_cache_info dummy_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .cache_line_size = 64,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .cache_line_size = 64,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .cache_line_size = 64,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* GL1 Data Cache per SA */
+ .cache_size = 128,
+ .cache_level = 1,
+ .cache_line_size = 64,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 6,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 2048,
+ .cache_level = 2,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -974,8 +1198,12 @@ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache,
props->cachelines_per_tag = cache->lines_per_tag;
props->cache_assoc = cache->associativity;
props->cache_latency = cache->cache_latency;
+
memcpy(props->sibling_map, cache->sibling_map,
- sizeof(props->sibling_map));
+ CRAT_SIBLINGMAP_SIZE);
+
+ /* set the sibling_map_size as 32 for CRAT from ACPI */
+ props->sibling_map_size = CRAT_SIBLINGMAP_SIZE;
if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE)
props->cache_type |= HSA_CACHE_TYPE_DATA;
@@ -986,7 +1214,6 @@ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache,
if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
props->cache_type |= HSA_CACHE_TYPE_HSACU;
- dev->cache_count++;
dev->node_props.caches_count++;
list_add_tail(&props->list, &dev->cache_props);
@@ -1028,7 +1255,7 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
props->weight = 20;
else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)
- props->weight = 15 * iolink->num_hops_xgmi;
+ props->weight = iolink->weight_xgmi;
else
props->weight = node_distance(id_from, id_to);
@@ -1039,7 +1266,6 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
props->rec_transfer_size =
iolink->recommended_transfer_size;
- dev->io_link_count++;
dev->node_props.io_links_count++;
list_add_tail(&props->list, &dev->io_link_props);
break;
@@ -1055,15 +1281,17 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
* table, add corresponded reversed direction link now.
*/
if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) {
- to_dev = kfd_topology_device_by_proximity_domain(id_to);
+ to_dev = kfd_topology_device_by_proximity_domain_no_lock(id_to);
if (!to_dev)
return -ENODEV;
/* same everything but the other direction */
props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL);
+ if (!props2)
+ return -ENOMEM;
+
props2->node_from = id_to;
props2->node_to = id_from;
props2->kobj = NULL;
- to_dev->io_link_count++;
to_dev->node_props.io_links_count++;
list_add_tail(&props2->list, &to_dev->io_link_props);
}
@@ -1193,394 +1421,310 @@ err:
return ret;
}
-/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
-static int fill_in_l1_pcache(struct crat_subtype_cache *pcache,
- struct kfd_gpu_cache_info *pcache_info,
- struct kfd_cu_info *cu_info,
- int mem_available,
- int cu_bitmask,
- int cache_type, unsigned int cu_processor_id,
- int cu_block)
-{
- unsigned int cu_sibling_map_mask;
- int first_active_cu;
-
- /* First check if enough memory is available */
- if (sizeof(struct crat_subtype_cache) > mem_available)
- return -ENOMEM;
-
- cu_sibling_map_mask = cu_bitmask;
- cu_sibling_map_mask >>= cu_block;
- cu_sibling_map_mask &=
- ((1 << pcache_info[cache_type].num_cu_shared) - 1);
- first_active_cu = ffs(cu_sibling_map_mask);
- /* CU could be inactive. In case of shared cache find the first active
- * CU. and incase of non-shared cache check if the CU is inactive. If
- * inactive active skip it
- */
- if (first_active_cu) {
- memset(pcache, 0, sizeof(struct crat_subtype_cache));
- pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;
- pcache->length = sizeof(struct crat_subtype_cache);
- pcache->flags = pcache_info[cache_type].flags;
- pcache->processor_id_low = cu_processor_id
- + (first_active_cu - 1);
- pcache->cache_level = pcache_info[cache_type].cache_level;
- pcache->cache_size = pcache_info[cache_type].cache_size;
-
- /* Sibling map is w.r.t processor_id_low, so shift out
- * inactive CU
- */
- cu_sibling_map_mask =
- cu_sibling_map_mask >> (first_active_cu - 1);
-
- pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF);
- pcache->sibling_map[1] =
- (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
- pcache->sibling_map[2] =
- (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
- pcache->sibling_map[3] =
- (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
- return 0;
+static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
+ bool cache_line_size_missing,
+ struct kfd_gpu_cache_info *pcache_info)
+{
+ struct amdgpu_device *adev = kdev->adev;
+ int i = 0;
+
+ /* TCP L1 Cache per CU */
+ if (adev->gfx.config.gc_tcp_l1_size) {
+ pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size;
+ pcache_info[i].cache_level = 1;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
+ pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size;
+ if (cache_line_size_missing && !pcache_info[i].cache_line_size)
+ pcache_info[i].cache_line_size = 128;
+ i++;
+ }
+ /* Scalar L1 Instruction Cache per SQC */
+ if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {
+ pcache_info[i].cache_size =
+ adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
+ pcache_info[i].cache_level = 1;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
+ pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size;
+ if (cache_line_size_missing && !pcache_info[i].cache_line_size)
+ pcache_info[i].cache_line_size = 128;
+ i++;
}
- return 1;
+ /* Scalar L1 Data Cache per SQC */
+ if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
+ pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
+ pcache_info[i].cache_level = 1;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
+ pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size;
+ if (cache_line_size_missing && !pcache_info[i].cache_line_size)
+ pcache_info[i].cache_line_size = 64;
+ i++;
+ }
+ /* GL1 Data Cache per SA */
+ if (adev->gfx.config.gc_gl1c_per_sa &&
+ adev->gfx.config.gc_gl1c_size_per_instance) {
+ pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa *
+ adev->gfx.config.gc_gl1c_size_per_instance;
+ pcache_info[i].cache_level = 1;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+ if (cache_line_size_missing)
+ pcache_info[i].cache_line_size = 128;
+ i++;
+ }
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ if (adev->gfx.config.gc_gl2c_per_gpu) {
+ pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu;
+ pcache_info[i].cache_level = 2;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+ pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size;
+ if (cache_line_size_missing && !pcache_info[i].cache_line_size)
+ pcache_info[i].cache_line_size = 128;
+ i++;
+ }
+ /* L3 Data Cache per GPU */
+ if (adev->gmc.mall_size) {
+ pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
+ pcache_info[i].cache_level = 3;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+ pcache_info[i].cache_line_size = 64;
+ i++;
+ }
+ return i;
}
-/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
-static int fill_in_l2_l3_pcache(struct crat_subtype_cache *pcache,
- struct kfd_gpu_cache_info *pcache_info,
- struct kfd_cu_info *cu_info,
- int mem_available,
- int cache_type, unsigned int cu_processor_id)
+static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
+ struct kfd_gpu_cache_info *pcache_info)
{
- unsigned int cu_sibling_map_mask;
- int first_active_cu;
- int i, j, k;
-
- /* First check if enough memory is available */
- if (sizeof(struct crat_subtype_cache) > mem_available)
- return -ENOMEM;
-
- cu_sibling_map_mask = cu_info->cu_bitmap[0][0];
- cu_sibling_map_mask &=
- ((1 << pcache_info[cache_type].num_cu_shared) - 1);
- first_active_cu = ffs(cu_sibling_map_mask);
-
- /* CU could be inactive. In case of shared cache find the first active
- * CU. and incase of non-shared cache check if the CU is inactive. If
- * inactive active skip it
- */
- if (first_active_cu) {
- memset(pcache, 0, sizeof(struct crat_subtype_cache));
- pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;
- pcache->length = sizeof(struct crat_subtype_cache);
- pcache->flags = pcache_info[cache_type].flags;
- pcache->processor_id_low = cu_processor_id
- + (first_active_cu - 1);
- pcache->cache_level = pcache_info[cache_type].cache_level;
- pcache->cache_size = pcache_info[cache_type].cache_size;
-
- /* Sibling map is w.r.t processor_id_low, so shift out
- * inactive CU
- */
- cu_sibling_map_mask =
- cu_sibling_map_mask >> (first_active_cu - 1);
- k = 0;
- for (i = 0; i < cu_info->num_shader_engines; i++) {
- for (j = 0; j < cu_info->num_shader_arrays_per_engine;
- j++) {
- pcache->sibling_map[k] =
- (uint8_t)(cu_sibling_map_mask & 0xFF);
- pcache->sibling_map[k+1] =
- (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
- pcache->sibling_map[k+2] =
- (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
- pcache->sibling_map[k+3] =
- (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
- k += 4;
- cu_sibling_map_mask =
- cu_info->cu_bitmap[i % 4][j + i / 4];
- cu_sibling_map_mask &= (
- (1 << pcache_info[cache_type].num_cu_shared)
- - 1);
- }
- }
- return 0;
+ struct amdgpu_device *adev = kdev->adev;
+ int i = 0;
+
+ /* TCP L1 Cache per CU */
+ if (adev->gfx.config.gc_tcp_size_per_cu) {
+ pcache_info[i].cache_size = adev->gfx.config.gc_tcp_size_per_cu;
+ pcache_info[i].cache_level = 1;
+ /* Cacheline size not available in IP discovery for gc943,gc944 */
+ pcache_info[i].cache_line_size = 128;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = 1;
+ i++;
+ }
+ /* Scalar L1 Instruction Cache per SQC */
+ if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {
+ pcache_info[i].cache_size =
+ adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
+ pcache_info[i].cache_level = 1;
+ pcache_info[i].cache_line_size = 64;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_cu_per_sqc;
+ i++;
+ }
+ /* Scalar L1 Data Cache per SQC */
+ if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
+ pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
+ pcache_info[i].cache_level = 1;
+ pcache_info[i].cache_line_size = 64;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_cu_per_sqc;
+ i++;
}
- return 1;
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ if (adev->gfx.config.gc_tcc_size) {
+ pcache_info[i].cache_size = adev->gfx.config.gc_tcc_size;
+ pcache_info[i].cache_level = 2;
+ pcache_info[i].cache_line_size = 128;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+ i++;
+ }
+ /* L3 Data Cache per GPU */
+ if (adev->gmc.mall_size) {
+ pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
+ pcache_info[i].cache_level = 3;
+ pcache_info[i].cache_line_size = 64;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+ i++;
+ }
+ return i;
}
-/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info
- * tables
- *
- * @kdev - [IN] GPU device
- * @gpu_processor_id - [IN] GPU processor ID to which these caches
- * associate
- * @available_size - [IN] Amount of memory available in pcache
- * @cu_info - [IN] Compute Unit info obtained from KGD
- * @pcache - [OUT] memory into which cache data is to be filled in.
- * @size_filled - [OUT] amount of data used up in pcache.
- * @num_of_entries - [OUT] number of caches added
- */
-static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
- int gpu_processor_id,
- int available_size,
- struct kfd_cu_info *cu_info,
- struct crat_subtype_cache *pcache,
- int *size_filled,
- int *num_of_entries)
+int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info)
{
- struct kfd_gpu_cache_info *pcache_info;
int num_of_cache_types = 0;
- int i, j, k;
- int ct = 0;
- int mem_available = available_size;
- unsigned int cu_processor_id;
- int ret;
- unsigned int num_cu_shared;
-
- switch (kdev->device_info->asic_family) {
+ bool cache_line_size_missing = false;
+
+ switch (kdev->adev->asic_type) {
case CHIP_KAVERI:
- pcache_info = kaveri_cache_info;
+ *pcache_info = kaveri_cache_info;
num_of_cache_types = ARRAY_SIZE(kaveri_cache_info);
break;
case CHIP_HAWAII:
- pcache_info = hawaii_cache_info;
+ *pcache_info = hawaii_cache_info;
num_of_cache_types = ARRAY_SIZE(hawaii_cache_info);
break;
case CHIP_CARRIZO:
- pcache_info = carrizo_cache_info;
+ *pcache_info = carrizo_cache_info;
num_of_cache_types = ARRAY_SIZE(carrizo_cache_info);
break;
case CHIP_TONGA:
- pcache_info = tonga_cache_info;
+ *pcache_info = tonga_cache_info;
num_of_cache_types = ARRAY_SIZE(tonga_cache_info);
break;
case CHIP_FIJI:
- pcache_info = fiji_cache_info;
+ *pcache_info = fiji_cache_info;
num_of_cache_types = ARRAY_SIZE(fiji_cache_info);
break;
case CHIP_POLARIS10:
- pcache_info = polaris10_cache_info;
+ *pcache_info = polaris10_cache_info;
num_of_cache_types = ARRAY_SIZE(polaris10_cache_info);
break;
case CHIP_POLARIS11:
- pcache_info = polaris11_cache_info;
+ *pcache_info = polaris11_cache_info;
num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
break;
case CHIP_POLARIS12:
- pcache_info = polaris12_cache_info;
+ *pcache_info = polaris12_cache_info;
num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
break;
case CHIP_VEGAM:
- pcache_info = vegam_cache_info;
+ *pcache_info = vegam_cache_info;
num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
break;
- case CHIP_VEGA10:
- pcache_info = vega10_cache_info;
- num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
- break;
- case CHIP_VEGA12:
- pcache_info = vega12_cache_info;
- num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
- break;
- case CHIP_VEGA20:
- case CHIP_ARCTURUS:
- pcache_info = vega20_cache_info;
- num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
- break;
- case CHIP_ALDEBARAN:
- pcache_info = aldebaran_cache_info;
- num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
- break;
- case CHIP_RAVEN:
- pcache_info = raven_cache_info;
- num_of_cache_types = ARRAY_SIZE(raven_cache_info);
- break;
- case CHIP_RENOIR:
- pcache_info = renoir_cache_info;
- num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
- break;
- case CHIP_NAVI10:
- case CHIP_NAVI12:
- case CHIP_CYAN_SKILLFISH:
- pcache_info = navi10_cache_info;
- num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
- break;
- case CHIP_NAVI14:
- pcache_info = navi14_cache_info;
- num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
- break;
- case CHIP_SIENNA_CICHLID:
- pcache_info = sienna_cichlid_cache_info;
- num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
- break;
- case CHIP_NAVY_FLOUNDER:
- pcache_info = navy_flounder_cache_info;
- num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
- break;
- case CHIP_DIMGREY_CAVEFISH:
- pcache_info = dimgrey_cavefish_cache_info;
- num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
- break;
- case CHIP_VANGOGH:
- pcache_info = vangogh_cache_info;
- num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
- break;
- case CHIP_BEIGE_GOBY:
- pcache_info = beige_goby_cache_info;
- num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
- break;
- case CHIP_YELLOW_CARP:
- pcache_info = yellow_carp_cache_info;
- num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
- break;
default:
- return -EINVAL;
- }
-
- *size_filled = 0;
- *num_of_entries = 0;
-
- /* For each type of cache listed in the kfd_gpu_cache_info table,
- * go through all available Compute Units.
- * The [i,j,k] loop will
- * if kfd_gpu_cache_info.num_cu_shared = 1
- * will parse through all available CU
- * If (kfd_gpu_cache_info.num_cu_shared != 1)
- * then it will consider only one CU from
- * the shared unit
- */
-
- for (ct = 0; ct < num_of_cache_types; ct++) {
- cu_processor_id = gpu_processor_id;
- if (pcache_info[ct].cache_level == 1) {
- for (i = 0; i < cu_info->num_shader_engines; i++) {
- for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
- for (k = 0; k < cu_info->num_cu_per_sh;
- k += pcache_info[ct].num_cu_shared) {
- ret = fill_in_l1_pcache(pcache,
- pcache_info,
- cu_info,
- mem_available,
- cu_info->cu_bitmap[i % 4][j + i / 4],
- ct,
- cu_processor_id,
- k);
-
- if (ret < 0)
+ switch (KFD_GC_VERSION(kdev)) {
+ case IP_VERSION(9, 0, 1):
+ *pcache_info = vega10_cache_info;
+ num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
+ break;
+ case IP_VERSION(9, 2, 1):
+ *pcache_info = vega12_cache_info;
+ num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
+ break;
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 4, 1):
+ *pcache_info = vega20_cache_info;
+ num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
+ break;
+ case IP_VERSION(9, 4, 2):
+ *pcache_info = aldebaran_cache_info;
+ num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
+ break;
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
+ num_of_cache_types =
+ kfd_fill_gpu_cache_info_from_gfx_config_v2(kdev->kfd,
+ *pcache_info);
+ break;
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 2):
+ *pcache_info = raven_cache_info;
+ num_of_cache_types = ARRAY_SIZE(raven_cache_info);
+ break;
+ case IP_VERSION(9, 3, 0):
+ *pcache_info = renoir_cache_info;
+ num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
+ break;
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ *pcache_info = navi10_cache_info;
+ num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
+ break;
+ case IP_VERSION(10, 1, 1):
+ *pcache_info = navi14_cache_info;
+ num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
+ break;
+ case IP_VERSION(10, 3, 0):
+ *pcache_info = sienna_cichlid_cache_info;
+ num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
+ break;
+ case IP_VERSION(10, 3, 2):
+ *pcache_info = navy_flounder_cache_info;
+ num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
+ break;
+ case IP_VERSION(10, 3, 4):
+ *pcache_info = dimgrey_cavefish_cache_info;
+ num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
+ break;
+ case IP_VERSION(10, 3, 1):
+ *pcache_info = vangogh_cache_info;
+ num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
+ break;
+ case IP_VERSION(10, 3, 5):
+ *pcache_info = beige_goby_cache_info;
+ num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
+ break;
+ case IP_VERSION(10, 3, 3):
+ *pcache_info = yellow_carp_cache_info;
+ num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
+ break;
+ case IP_VERSION(10, 3, 6):
+ *pcache_info = gc_10_3_6_cache_info;
+ num_of_cache_types = ARRAY_SIZE(gc_10_3_6_cache_info);
+ break;
+ case IP_VERSION(10, 3, 7):
+ *pcache_info = gfx1037_cache_info;
+ num_of_cache_types = ARRAY_SIZE(gfx1037_cache_info);
+ break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ /* Cacheline size not available in IP discovery for gc11.
+ * kfd_fill_gpu_cache_info_from_gfx_config to hard code it
+ */
+ cache_line_size_missing = true;
+ fallthrough;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ num_of_cache_types =
+ kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd,
+ cache_line_size_missing,
+ *pcache_info);
+ break;
+ default:
+ *pcache_info = dummy_cache_info;
+ num_of_cache_types = ARRAY_SIZE(dummy_cache_info);
+ pr_warn("dummy cache info is used temporarily and real cache info need update later.\n");
break;
-
- if (!ret) {
- pcache++;
- (*num_of_entries)++;
- mem_available -= sizeof(*pcache);
- (*size_filled) += sizeof(*pcache);
- }
-
- /* Move to next CU block */
- num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
- cu_info->num_cu_per_sh) ?
- pcache_info[ct].num_cu_shared :
- (cu_info->num_cu_per_sh - k);
- cu_processor_id += num_cu_shared;
}
- }
- }
- } else {
- ret = fill_in_l2_l3_pcache(pcache,
- pcache_info,
- cu_info,
- mem_available,
- ct,
- cu_processor_id);
-
- if (ret < 0)
- break;
-
- if (!ret) {
- pcache++;
- (*num_of_entries)++;
- mem_available -= sizeof(*pcache);
- (*size_filled) += sizeof(*pcache);
- }
- }
- }
-
- pr_debug("Added [%d] GPU cache entries\n", *num_of_entries);
-
- return 0;
-}
-
-static bool kfd_ignore_crat(void)
-{
- bool ret;
-
- if (ignore_crat)
- return true;
-
-#ifndef KFD_SUPPORT_IOMMU_V2
- ret = true;
-#else
- ret = false;
-#endif
-
- return ret;
-}
-
-/*
- * kfd_create_crat_image_acpi - Allocates memory for CRAT image and
- * copies CRAT from ACPI (if available).
- * NOTE: Call kfd_destroy_crat_image to free CRAT image memory
- *
- * @crat_image: CRAT read from ACPI. If no CRAT in ACPI then
- * crat_image will be NULL
- * @size: [OUT] size of crat_image
- *
- * Return 0 if successful else return error code
- */
-int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
-{
- struct acpi_table_header *crat_table;
- acpi_status status;
- void *pcrat_image;
- int rc = 0;
-
- if (!crat_image)
- return -EINVAL;
-
- *crat_image = NULL;
-
- if (kfd_ignore_crat()) {
- pr_info("CRAT table disabled by module option\n");
- return -ENODATA;
- }
-
- /* Fetch the CRAT table from ACPI */
- status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table);
- if (status == AE_NOT_FOUND) {
- pr_warn("CRAT table not found\n");
- return -ENODATA;
- } else if (ACPI_FAILURE(status)) {
- const char *err = acpi_format_exception(status);
-
- pr_err("CRAT table error: %s\n", err);
- return -EINVAL;
- }
-
- pcrat_image = kvmalloc(crat_table->length, GFP_KERNEL);
- if (!pcrat_image) {
- rc = -ENOMEM;
- goto out;
}
-
- memcpy(pcrat_image, crat_table, crat_table->length);
- *crat_image = pcrat_image;
- *size = crat_table->length;
-out:
- acpi_put_table(crat_table);
- return rc;
+ return num_of_cache_types;
}
/* Memory required to create Virtual CRAT.
@@ -1835,7 +1979,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
}
static int kfd_fill_gpu_memory_affinity(int *avail_size,
- struct kfd_dev *kdev, uint8_t type, uint64_t size,
+ struct kfd_node *kdev, uint8_t type, uint64_t size,
struct crat_subtype_memory *sub_type_hdr,
uint32_t proximity_domain,
const struct kfd_local_mem_info *local_mem_info)
@@ -1864,13 +2008,13 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
}
#ifdef CONFIG_ACPI_NUMA
-static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
+static void kfd_find_numa_node_in_srat(struct kfd_node *kdev)
{
struct acpi_table_header *table_header = NULL;
struct acpi_subtable_header *sub_header = NULL;
unsigned long table_end, subtable_len;
- u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
- pci_dev_id(kdev->pdev);
+ u32 pci_id = pci_domain_nr(kdev->adev->pdev->bus) << 16 |
+ pci_dev_id(kdev->adev->pdev);
u32 bdf;
acpi_status status;
struct acpi_srat_cpu_affinity *cpu;
@@ -1945,10 +2089,13 @@ static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
numa_node = 0;
if (numa_node != NUMA_NO_NODE)
- set_dev_node(&kdev->pdev->dev, numa_node);
+ set_dev_node(&kdev->adev->pdev->dev, numa_node);
}
#endif
+#define KFD_CRAT_INTRA_SOCKET_WEIGHT 13
+#define KFD_CRAT_XGMI_WEIGHT 15
+
/* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
* to its NUMA node
* @avail_size: Available size in the memory
@@ -1959,12 +2106,10 @@ static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
* Return 0 if successful else return -ve value
*/
static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
- struct kfd_dev *kdev,
+ struct kfd_node *kdev,
struct crat_subtype_iolink *sub_type_hdr,
uint32_t proximity_domain)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kdev->kgd;
-
*avail_size -= sizeof(struct crat_subtype_iolink);
if (*avail_size < 0)
return -ENOMEM;
@@ -1981,7 +2126,13 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
/* Fill in IOLINK subtype.
* TODO: Fill-in other fields of iolink subtype
*/
- if (adev->gmc.xgmi.connected_to_cpu) {
+ if (kdev->adev->gmc.xgmi.connected_to_cpu ||
+ (KFD_GC_VERSION(kdev) == IP_VERSION(9, 4, 3) &&
+ kdev->adev->smuio.funcs->get_pkg_type(kdev->adev) ==
+ AMDGPU_PKG_TYPE_APU)) {
+ bool ext_cpu = KFD_GC_VERSION(kdev) != IP_VERSION(9, 4, 3);
+ int mem_bw = 819200, weight = ext_cpu ? KFD_CRAT_XGMI_WEIGHT :
+ KFD_CRAT_INTRA_SOCKET_WEIGHT;
/*
* with host gpu xgmi link, host can access gpu memory whether
* or not pcie bar type is large, so always create bidirectional
@@ -1989,33 +2140,37 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
*/
sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
- sub_type_hdr->num_hops_xgmi = 1;
- if (adev->asic_type == CHIP_ALDEBARAN) {
- sub_type_hdr->minimum_bandwidth_mbs =
- amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(
- kdev->kgd, NULL, true);
- sub_type_hdr->maximum_bandwidth_mbs =
- sub_type_hdr->minimum_bandwidth_mbs;
+ sub_type_hdr->weight_xgmi = weight;
+ if (ext_cpu) {
+ amdgpu_xgmi_get_bandwidth(kdev->adev, NULL,
+ AMDGPU_XGMI_BW_MODE_PER_LINK,
+ AMDGPU_XGMI_BW_UNIT_MBYTES,
+ &sub_type_hdr->minimum_bandwidth_mbs,
+ &sub_type_hdr->maximum_bandwidth_mbs);
+ } else {
+ sub_type_hdr->minimum_bandwidth_mbs = mem_bw;
+ sub_type_hdr->maximum_bandwidth_mbs = mem_bw;
}
} else {
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;
sub_type_hdr->minimum_bandwidth_mbs =
- amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->kgd, true);
+ amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, true);
sub_type_hdr->maximum_bandwidth_mbs =
- amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->kgd, false);
+ amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, false);
}
sub_type_hdr->proximity_domain_from = proximity_domain;
#ifdef CONFIG_ACPI_NUMA
- if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
+ if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE &&
+ num_possible_nodes() > 1)
kfd_find_numa_node_in_srat(kdev);
#endif
#ifdef CONFIG_NUMA
- if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
+ if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE)
sub_type_hdr->proximity_domain_to = 0;
else
- sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node;
+ sub_type_hdr->proximity_domain_to = kdev->adev->pdev->dev.numa_node;
#else
sub_type_hdr->proximity_domain_to = 0;
#endif
@@ -2023,12 +2178,14 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
}
static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
- struct kfd_dev *kdev,
- struct kfd_dev *peer_kdev,
+ struct kfd_node *kdev,
+ struct kfd_node *peer_kdev,
struct crat_subtype_iolink *sub_type_hdr,
uint32_t proximity_domain_from,
uint32_t proximity_domain_to)
{
+ bool use_ta_info = kdev->kfd->num_nodes == 1;
+
*avail_size -= sizeof(struct crat_subtype_iolink);
if (*avail_size < 0)
return -ENOMEM;
@@ -2043,12 +2200,25 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
sub_type_hdr->proximity_domain_from = proximity_domain_from;
sub_type_hdr->proximity_domain_to = proximity_domain_to;
- sub_type_hdr->num_hops_xgmi =
- amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, peer_kdev->kgd);
- sub_type_hdr->maximum_bandwidth_mbs =
- amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->kgd, peer_kdev->kgd, false);
- sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ?
- amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->kgd, NULL, true) : 0;
+
+ if (use_ta_info) {
+ sub_type_hdr->weight_xgmi = KFD_CRAT_XGMI_WEIGHT *
+ amdgpu_xgmi_get_hops_count(kdev->adev, peer_kdev->adev);
+ amdgpu_xgmi_get_bandwidth(kdev->adev, peer_kdev->adev,
+ AMDGPU_XGMI_BW_MODE_PER_PEER,
+ AMDGPU_XGMI_BW_UNIT_MBYTES,
+ &sub_type_hdr->minimum_bandwidth_mbs,
+ &sub_type_hdr->maximum_bandwidth_mbs);
+ } else {
+ bool is_single_hop = kdev->kfd == peer_kdev->kfd;
+ int weight = is_single_hop ? KFD_CRAT_INTRA_SOCKET_WEIGHT :
+ (2 * KFD_CRAT_INTRA_SOCKET_WEIGHT) + KFD_CRAT_XGMI_WEIGHT;
+ int mem_bw = 819200;
+
+ sub_type_hdr->weight_xgmi = weight;
+ sub_type_hdr->maximum_bandwidth_mbs = is_single_hop ? mem_bw : 0;
+ sub_type_hdr->minimum_bandwidth_mbs = is_single_hop ? mem_bw : 0;
+ }
return 0;
}
@@ -2060,19 +2230,18 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
* [OUT] actual size of data filled in crat_image
*/
static int kfd_create_vcrat_image_gpu(void *pcrat_image,
- size_t *size, struct kfd_dev *kdev,
+ size_t *size, struct kfd_node *kdev,
uint32_t proximity_domain)
{
struct crat_header *crat_table = (struct crat_header *)pcrat_image;
+ struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config;
+ struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info;
struct crat_subtype_generic *sub_type_hdr;
struct kfd_local_mem_info local_mem_info;
struct kfd_topology_device *peer_dev;
struct crat_subtype_computeunit *cu;
- struct kfd_cu_info cu_info;
int avail_size = *size;
uint32_t total_num_of_cu;
- int num_of_cache_entries = 0;
- int cache_mem_filled = 0;
uint32_t nid = 0;
int ret = 0;
@@ -2083,9 +2252,6 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
* Modify length and total_entries as subunits are added.
*/
avail_size -= sizeof(struct crat_header);
- if (avail_size < 0)
- return -ENOMEM;
-
memset(crat_table, 0, sizeof(struct crat_header));
memcpy(&crat_table->signature, CRAT_SIGNATURE,
@@ -2099,9 +2265,6 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
* First fill in the sub type header and then sub type data
*/
avail_size -= sizeof(struct crat_subtype_computeunit);
- if (avail_size < 0)
- return -ENOMEM;
-
sub_type_hdr = (struct crat_subtype_generic *)(crat_table + 1);
memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit));
@@ -2114,29 +2277,23 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT;
cu->proximity_domain = proximity_domain;
- amdgpu_amdkfd_get_cu_info(kdev->kgd, &cu_info);
- cu->num_simd_per_cu = cu_info.simd_per_cu;
- cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number;
- cu->max_waves_simd = cu_info.max_waves_per_simd;
+ cu->num_simd_per_cu = cu_info->simd_per_cu;
+ cu->num_simd_cores = cu_info->simd_per_cu *
+ (cu_info->number / kdev->kfd->num_nodes);
+ cu->max_waves_simd = cu_info->max_waves_per_simd;
- cu->wave_front_size = cu_info.wave_front_size;
- cu->array_count = cu_info.num_shader_arrays_per_engine *
- cu_info.num_shader_engines;
- total_num_of_cu = (cu->array_count * cu_info.num_cu_per_sh);
+ cu->wave_front_size = cu_info->wave_front_size;
+ cu->array_count = gfx_info->max_sh_per_se *
+ gfx_info->max_shader_engines;
+ total_num_of_cu = (cu->array_count * gfx_info->max_cu_per_sh);
cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu);
- cu->num_cu_per_array = cu_info.num_cu_per_sh;
- cu->max_slots_scatch_cu = cu_info.max_scratch_slots_per_cu;
- cu->num_banks = cu_info.num_shader_engines;
- cu->lds_size_in_kb = cu_info.lds_size;
+ cu->num_cu_per_array = gfx_info->max_cu_per_sh;
+ cu->max_slots_scatch_cu = cu_info->max_scratch_slots_per_cu;
+ cu->num_banks = gfx_info->max_shader_engines;
+ cu->lds_size_in_kb = cu_info->lds_size;
cu->hsa_capability = 0;
- /* Check if this node supports IOMMU. During parsing this flag will
- * translate to HSA_CAP_ATS_PRESENT
- */
- if (!kfd_iommu_check_device(kdev))
- cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
-
crat_table->length += sub_type_hdr->length;
crat_table->total_entries++;
@@ -2145,11 +2302,11 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
* report the total FB size (public+private) as a single
* private heap.
*/
- amdgpu_amdkfd_get_local_mem_info(kdev->kgd, &local_mem_info);
+ local_mem_info = kdev->local_mem_info;
sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
sub_type_hdr->length);
- if (debug_largebar)
+ if (kdev->adev->debug_largebar)
local_mem_info.local_mem_size_private = 0;
if (local_mem_info.local_mem_size_private == 0)
@@ -2173,31 +2330,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
crat_table->length += sizeof(struct crat_subtype_memory);
crat_table->total_entries++;
- /* TODO: Fill in cache information. This information is NOT readily
- * available in KGD
- */
- sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
- sub_type_hdr->length);
- ret = kfd_fill_gpu_cache_info(kdev, cu->processor_id_low,
- avail_size,
- &cu_info,
- (struct crat_subtype_cache *)sub_type_hdr,
- &cache_mem_filled,
- &num_of_cache_entries);
-
- if (ret < 0)
- return ret;
-
- crat_table->length += cache_mem_filled;
- crat_table->total_entries += num_of_cache_entries;
- avail_size -= cache_mem_filled;
-
/* Fill in Subtype: IO_LINKS
* Only direct links are added here which is Link from GPU to
- * to its NUMA node. Indirect links are added by userspace.
+ * its NUMA node. Indirect links are added by userspace.
*/
sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
- cache_mem_filled);
+ sub_type_hdr->length);
ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev,
(struct crat_subtype_iolink *)sub_type_hdr, proximity_domain);
@@ -2216,12 +2354,14 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
* (from other GPU to this GPU) will be added
* in kfd_parse_subtype_iolink.
*/
- if (kdev->hive_id) {
+ if (kdev->kfd->hive_id) {
for (nid = 0; nid < proximity_domain; ++nid) {
- peer_dev = kfd_topology_device_by_proximity_domain(nid);
+ peer_dev = kfd_topology_device_by_proximity_domain_no_lock(nid);
if (!peer_dev->gpu)
continue;
- if (peer_dev->gpu->hive_id != kdev->hive_id)
+ if (peer_dev->gpu->kfd->hive_id != kdev->kfd->hive_id)
+ continue;
+ if (!amdgpu_xgmi_get_is_sharing_enabled(kdev->adev, peer_dev->gpu->adev))
continue;
sub_type_hdr = (typeof(sub_type_hdr))(
(char *)sub_type_hdr +
@@ -2255,12 +2395,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
* (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU
* -- this option is not currently implemented.
* The assumption is that all AMD APUs will have CRAT
- * @kdev: Valid kfd_device required if flags contain COMPUTE_UNIT_GPU
+ * @kdev: Valid kfd_node required if flags contain COMPUTE_UNIT_GPU
*
* Return 0 if successful else return -ve value
*/
int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
- int flags, struct kfd_dev *kdev,
+ int flags, struct kfd_node *kdev,
uint32_t proximity_domain)
{
void *pcrat_image = NULL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
index d54ceebd346b..a8ca7ecb6d27 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -28,11 +29,10 @@
#pragma pack(1)
/*
- * 4CC signature values for the CRAT and CDIT ACPI tables
+ * 4CC signature value for the CRAT ACPI table
*/
#define CRAT_SIGNATURE "CRAT"
-#define CDIT_SIGNATURE "CDIT"
/*
* Component Resource Association Table (CRAT)
@@ -42,8 +42,6 @@
#define CRAT_OEMTABLEID_LENGTH 8
#define CRAT_RESERVED_LENGTH 6
-#define CRAT_OEMID_64BIT_MASK ((1ULL << (CRAT_OEMID_LENGTH * 8)) - 1)
-
/* Compute Unit flags */
#define COMPUTE_UNIT_CPU (1 << 0) /* Create Virtual CRAT for CPU */
#define COMPUTE_UNIT_GPU (1 << 1) /* Create Virtual CRAT for GPU */
@@ -79,6 +77,10 @@ struct crat_header {
#define CRAT_SUBTYPE_IOLINK_AFFINITY 5
#define CRAT_SUBTYPE_MAX 6
+/*
+ * Do not change the value of CRAT_SIBLINGMAP_SIZE from 32
+ * as it breaks the ABI.
+ */
#define CRAT_SIBLINGMAP_SIZE 32
/*
@@ -232,7 +234,7 @@ struct crat_subtype_ccompute {
#define CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT (1 << 2)
#define CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT (1 << 3)
#define CRAT_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA (1 << 4)
-#define CRAT_IOLINK_FLAGS_BI_DIRECTIONAL (1 << 31)
+#define CRAT_IOLINK_FLAGS_BI_DIRECTIONAL (1 << 31)
#define CRAT_IOLINK_FLAGS_RESERVED_MASK 0x7fffffe0
/*
@@ -275,7 +277,7 @@ struct crat_subtype_iolink {
uint32_t maximum_bandwidth_mbs;
uint32_t recommended_transfer_size;
uint8_t reserved2[CRAT_IOLINK_RESERVED_LENGTH - 1];
- uint8_t num_hops_xgmi;
+ uint8_t weight_xgmi;
};
/*
@@ -291,37 +293,28 @@ struct crat_subtype_generic {
uint32_t flags;
};
-/*
- * Component Locality Distance Information Table (CDIT)
- */
-#define CDIT_OEMID_LENGTH 6
-#define CDIT_OEMTABLEID_LENGTH 8
-
-struct cdit_header {
- uint32_t signature;
- uint32_t length;
- uint8_t revision;
- uint8_t checksum;
- uint8_t oem_id[CDIT_OEMID_LENGTH];
- uint8_t oem_table_id[CDIT_OEMTABLEID_LENGTH];
- uint32_t oem_revision;
- uint32_t creator_id;
- uint32_t creator_revision;
- uint32_t total_entries;
- uint16_t num_domains;
- uint8_t entry[1];
-};
-
#pragma pack()
-struct kfd_dev;
+struct kfd_node;
+
+/* Static table to describe GPU Cache information */
+struct kfd_gpu_cache_info {
+ uint32_t cache_size;
+ uint32_t cache_level;
+ uint32_t cache_line_size;
+ uint32_t flags;
+ /* Indicates how many Compute Units share this cache
+ * within a SA. Value = 1 indicates the cache is not shared
+ */
+ uint32_t num_cu_shared;
+};
+int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info);
-int kfd_create_crat_image_acpi(void **crat_image, size_t *size);
void kfd_destroy_crat_image(void *crat_image);
int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
uint32_t proximity_domain);
int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
- int flags, struct kfd_dev *kdev,
+ int flags, struct kfd_node *kdev,
uint32_t proximity_domain);
#endif /* KFD_CRAT_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
deleted file mode 100644
index 159add0f5aaa..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ /dev/null
@@ -1,845 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/log2.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/mutex.h>
-#include <linux/device.h>
-
-#include "kfd_pm4_headers.h"
-#include "kfd_pm4_headers_diq.h"
-#include "kfd_kernel_queue.h"
-#include "kfd_priv.h"
-#include "kfd_pm4_opcodes.h"
-#include "cik_regs.h"
-#include "kfd_dbgmgr.h"
-#include "kfd_dbgdev.h"
-#include "kfd_device_queue_manager.h"
-
-static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
-{
- dev->kfd2kgd->address_watch_disable(dev->kgd);
-}
-
-static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
- u32 pasid, uint64_t vmid0_address,
- uint32_t *packet_buff, size_t size_in_bytes)
-{
- struct pm4__release_mem *rm_packet;
- struct pm4__indirect_buffer_pasid *ib_packet;
- struct kfd_mem_obj *mem_obj;
- size_t pq_packets_size_in_bytes;
- union ULARGE_INTEGER *largep;
- union ULARGE_INTEGER addr;
- struct kernel_queue *kq;
- uint64_t *rm_state;
- unsigned int *ib_packet_buff;
- int status;
-
- if (WARN_ON(!size_in_bytes))
- return -EINVAL;
-
- kq = dbgdev->kq;
-
- pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
- sizeof(struct pm4__indirect_buffer_pasid);
-
- /*
- * We acquire a buffer from DIQ
- * The receive packet buff will be sitting on the Indirect Buffer
- * and in the PQ we put the IB packet + sync packet(s).
- */
- status = kq_acquire_packet_buffer(kq,
- pq_packets_size_in_bytes / sizeof(uint32_t),
- &ib_packet_buff);
- if (status) {
- pr_err("kq_acquire_packet_buffer failed\n");
- return status;
- }
-
- memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
-
- ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
-
- ib_packet->header.count = 3;
- ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
- ib_packet->header.type = PM4_TYPE_3;
-
- largep = (union ULARGE_INTEGER *) &vmid0_address;
-
- ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
- ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
-
- ib_packet->control = (1 << 23) | (1 << 31) |
- ((size_in_bytes / 4) & 0xfffff);
-
- ib_packet->bitfields5.pasid = pasid;
-
- /*
- * for now we use release mem for GPU-CPU synchronization
- * Consider WaitRegMem + WriteData as a better alternative
- * we get a GART allocations ( gpu/cpu mapping),
- * for the sync variable, and wait until:
- * (a) Sync with HW
- * (b) Sync var is written by CP to mem.
- */
- rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
- (sizeof(struct pm4__indirect_buffer_pasid) /
- sizeof(unsigned int)));
-
- status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
- &mem_obj);
-
- if (status) {
- pr_err("Failed to allocate GART memory\n");
- kq_rollback_packet(kq);
- return status;
- }
-
- rm_state = (uint64_t *) mem_obj->cpu_ptr;
-
- *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
-
- rm_packet->header.opcode = IT_RELEASE_MEM;
- rm_packet->header.type = PM4_TYPE_3;
- rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2;
-
- rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
- rm_packet->bitfields2.event_index =
- event_index___release_mem__end_of_pipe;
-
- rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
- rm_packet->bitfields2.atc = 0;
- rm_packet->bitfields2.tc_wb_action_ena = 1;
-
- addr.quad_part = mem_obj->gpu_addr;
-
- rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
- rm_packet->address_hi = addr.u.high_part;
-
- rm_packet->bitfields3.data_sel =
- data_sel___release_mem__send_64_bit_data;
-
- rm_packet->bitfields3.int_sel =
- int_sel___release_mem__send_data_after_write_confirm;
-
- rm_packet->bitfields3.dst_sel =
- dst_sel___release_mem__memory_controller;
-
- rm_packet->data_lo = QUEUESTATE__ACTIVE;
-
- kq_submit_packet(kq);
-
- /* Wait till CP writes sync code: */
- status = amdkfd_fence_wait_timeout(
- rm_state,
- QUEUESTATE__ACTIVE, 1500);
-
- kfd_gtt_sa_free(dbgdev->dev, mem_obj);
-
- return status;
-}
-
-static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
-{
- /*
- * no action is needed in this case,
- * just make sure diq will not be used
- */
-
- dbgdev->kq = NULL;
-
- return 0;
-}
-
-static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
-{
- struct queue_properties properties;
- unsigned int qid;
- struct kernel_queue *kq = NULL;
- int status;
-
- properties.type = KFD_QUEUE_TYPE_DIQ;
-
- status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
- &properties, &qid, NULL);
-
- if (status) {
- pr_err("Failed to create DIQ\n");
- return status;
- }
-
- pr_debug("DIQ Created with queue id: %d\n", qid);
-
- kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
-
- if (!kq) {
- pr_err("Error getting DIQ\n");
- pqm_destroy_queue(dbgdev->pqm, qid);
- return -EFAULT;
- }
-
- dbgdev->kq = kq;
-
- return status;
-}
-
-static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
-{
- /* disable watch address */
- dbgdev_address_watch_disable_nodiq(dbgdev->dev);
- return 0;
-}
-
-static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
-{
- /* todo - disable address watch */
- int status;
-
- status = pqm_destroy_queue(dbgdev->pqm,
- dbgdev->kq->queue->properties.queue_id);
- dbgdev->kq = NULL;
-
- return status;
-}
-
-static void dbgdev_address_watch_set_registers(
- const struct dbg_address_watch_info *adw_info,
- union TCP_WATCH_ADDR_H_BITS *addrHi,
- union TCP_WATCH_ADDR_L_BITS *addrLo,
- union TCP_WATCH_CNTL_BITS *cntl,
- unsigned int index, unsigned int vmid)
-{
- union ULARGE_INTEGER addr;
-
- addr.quad_part = 0;
- addrHi->u32All = 0;
- addrLo->u32All = 0;
- cntl->u32All = 0;
-
- if (adw_info->watch_mask)
- cntl->bitfields.mask =
- (uint32_t) (adw_info->watch_mask[index] &
- ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
- else
- cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
-
- addr.quad_part = (unsigned long long) adw_info->watch_address[index];
-
- addrHi->bitfields.addr = addr.u.high_part &
- ADDRESS_WATCH_REG_ADDHIGH_MASK;
- addrLo->bitfields.addr =
- (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
-
- cntl->bitfields.mode = adw_info->watch_mode[index];
- cntl->bitfields.vmid = (uint32_t) vmid;
- /* for now assume it is an ATC address */
- cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
-
- pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
- pr_debug("\t\t%20s %08x\n", "set reg add high :",
- addrHi->bitfields.addr);
- pr_debug("\t\t%20s %08x\n", "set reg add low :",
- addrLo->bitfields.addr);
-}
-
-static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
- struct dbg_address_watch_info *adw_info)
-{
- union TCP_WATCH_ADDR_H_BITS addrHi;
- union TCP_WATCH_ADDR_L_BITS addrLo;
- union TCP_WATCH_CNTL_BITS cntl;
- struct kfd_process_device *pdd;
- unsigned int i;
-
- /* taking the vmid for that process on the safe way using pdd */
- pdd = kfd_get_process_device_data(dbgdev->dev,
- adw_info->process);
- if (!pdd) {
- pr_err("Failed to get pdd for wave control no DIQ\n");
- return -EFAULT;
- }
-
- addrHi.u32All = 0;
- addrLo.u32All = 0;
- cntl.u32All = 0;
-
- if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
- (adw_info->num_watch_points == 0)) {
- pr_err("num_watch_points is invalid\n");
- return -EINVAL;
- }
-
- if (!adw_info->watch_mode || !adw_info->watch_address) {
- pr_err("adw_info fields are not valid\n");
- return -EINVAL;
- }
-
- for (i = 0; i < adw_info->num_watch_points; i++) {
- dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
- &cntl, i, pdd->qpd.vmid);
-
- pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
- pr_debug("\t\t%20s %08x\n", "register index :", i);
- pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
- pr_debug("\t\t%20s %08x\n", "Address Low is :",
- addrLo.bitfields.addr);
- pr_debug("\t\t%20s %08x\n", "Address high is :",
- addrHi.bitfields.addr);
- pr_debug("\t\t%20s %08x\n", "Address high is :",
- addrHi.bitfields.addr);
- pr_debug("\t\t%20s %08x\n", "Control Mask is :",
- cntl.bitfields.mask);
- pr_debug("\t\t%20s %08x\n", "Control Mode is :",
- cntl.bitfields.mode);
- pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
- cntl.bitfields.vmid);
- pr_debug("\t\t%20s %08x\n", "Control atc is :",
- cntl.bitfields.atc);
- pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
-
- pdd->dev->kfd2kgd->address_watch_execute(
- dbgdev->dev->kgd,
- i,
- cntl.u32All,
- addrHi.u32All,
- addrLo.u32All);
- }
-
- return 0;
-}
-
-static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
- struct dbg_address_watch_info *adw_info)
-{
- struct pm4__set_config_reg *packets_vec;
- union TCP_WATCH_ADDR_H_BITS addrHi;
- union TCP_WATCH_ADDR_L_BITS addrLo;
- union TCP_WATCH_CNTL_BITS cntl;
- struct kfd_mem_obj *mem_obj;
- unsigned int aw_reg_add_dword;
- uint32_t *packet_buff_uint;
- unsigned int i;
- int status;
- size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
- /* we do not control the vmid in DIQ mode, just a place holder */
- unsigned int vmid = 0;
-
- addrHi.u32All = 0;
- addrLo.u32All = 0;
- cntl.u32All = 0;
-
- if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
- (adw_info->num_watch_points == 0)) {
- pr_err("num_watch_points is invalid\n");
- return -EINVAL;
- }
-
- if (!adw_info->watch_mode || !adw_info->watch_address) {
- pr_err("adw_info fields are not valid\n");
- return -EINVAL;
- }
-
- status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
-
- if (status) {
- pr_err("Failed to allocate GART memory\n");
- return status;
- }
-
- packet_buff_uint = mem_obj->cpu_ptr;
-
- memset(packet_buff_uint, 0, ib_size);
-
- packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
-
- packets_vec[0].header.count = 1;
- packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
- packets_vec[0].header.type = PM4_TYPE_3;
- packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
- packets_vec[0].bitfields2.insert_vmid = 1;
- packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
- packets_vec[1].bitfields2.insert_vmid = 0;
- packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
- packets_vec[2].bitfields2.insert_vmid = 0;
- packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
- packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
- packets_vec[3].bitfields2.insert_vmid = 1;
-
- for (i = 0; i < adw_info->num_watch_points; i++) {
- dbgdev_address_watch_set_registers(adw_info,
- &addrHi,
- &addrLo,
- &cntl,
- i,
- vmid);
-
- pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
- pr_debug("\t\t%20s %08x\n", "register index :", i);
- pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
- pr_debug("\t\t%20s %p\n", "Add ptr is :",
- adw_info->watch_address);
- pr_debug("\t\t%20s %08llx\n", "Add is :",
- adw_info->watch_address[i]);
- pr_debug("\t\t%20s %08x\n", "Address Low is :",
- addrLo.bitfields.addr);
- pr_debug("\t\t%20s %08x\n", "Address high is :",
- addrHi.bitfields.addr);
- pr_debug("\t\t%20s %08x\n", "Control Mask is :",
- cntl.bitfields.mask);
- pr_debug("\t\t%20s %08x\n", "Control Mode is :",
- cntl.bitfields.mode);
- pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
- cntl.bitfields.vmid);
- pr_debug("\t\t%20s %08x\n", "Control atc is :",
- cntl.bitfields.atc);
- pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
-
- aw_reg_add_dword =
- dbgdev->dev->kfd2kgd->address_watch_get_offset(
- dbgdev->dev->kgd,
- i,
- ADDRESS_WATCH_REG_CNTL);
-
- packets_vec[0].bitfields2.reg_offset =
- aw_reg_add_dword - AMD_CONFIG_REG_BASE;
-
- packets_vec[0].reg_data[0] = cntl.u32All;
-
- aw_reg_add_dword =
- dbgdev->dev->kfd2kgd->address_watch_get_offset(
- dbgdev->dev->kgd,
- i,
- ADDRESS_WATCH_REG_ADDR_HI);
-
- packets_vec[1].bitfields2.reg_offset =
- aw_reg_add_dword - AMD_CONFIG_REG_BASE;
- packets_vec[1].reg_data[0] = addrHi.u32All;
-
- aw_reg_add_dword =
- dbgdev->dev->kfd2kgd->address_watch_get_offset(
- dbgdev->dev->kgd,
- i,
- ADDRESS_WATCH_REG_ADDR_LO);
-
- packets_vec[2].bitfields2.reg_offset =
- aw_reg_add_dword - AMD_CONFIG_REG_BASE;
- packets_vec[2].reg_data[0] = addrLo.u32All;
-
- /* enable watch flag if address is not zero*/
- if (adw_info->watch_address[i] > 0)
- cntl.bitfields.valid = 1;
- else
- cntl.bitfields.valid = 0;
-
- aw_reg_add_dword =
- dbgdev->dev->kfd2kgd->address_watch_get_offset(
- dbgdev->dev->kgd,
- i,
- ADDRESS_WATCH_REG_CNTL);
-
- packets_vec[3].bitfields2.reg_offset =
- aw_reg_add_dword - AMD_CONFIG_REG_BASE;
- packets_vec[3].reg_data[0] = cntl.u32All;
-
- status = dbgdev_diq_submit_ib(
- dbgdev,
- adw_info->process->pasid,
- mem_obj->gpu_addr,
- packet_buff_uint,
- ib_size);
-
- if (status) {
- pr_err("Failed to submit IB to DIQ\n");
- break;
- }
- }
-
- kfd_gtt_sa_free(dbgdev->dev, mem_obj);
- return status;
-}
-
-static int dbgdev_wave_control_set_registers(
- struct dbg_wave_control_info *wac_info,
- union SQ_CMD_BITS *in_reg_sq_cmd,
- union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
-{
- int status = 0;
- union SQ_CMD_BITS reg_sq_cmd;
- union GRBM_GFX_INDEX_BITS reg_gfx_index;
- struct HsaDbgWaveMsgAMDGen2 *pMsg;
-
- reg_sq_cmd.u32All = 0;
- reg_gfx_index.u32All = 0;
- pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
-
- switch (wac_info->mode) {
- /* Send command to single wave */
- case HSA_DBG_WAVEMODE_SINGLE:
- /*
- * Limit access to the process waves only,
- * by setting vmid check
- */
- reg_sq_cmd.bits.check_vmid = 1;
- reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
- reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
- reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
-
- reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
- reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
- reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
-
- break;
-
- /* Send command to all waves with matching VMID */
- case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
-
- reg_gfx_index.bits.sh_broadcast_writes = 1;
- reg_gfx_index.bits.se_broadcast_writes = 1;
- reg_gfx_index.bits.instance_broadcast_writes = 1;
-
- reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
-
- break;
-
- /* Send command to all CU waves with matching VMID */
- case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
-
- reg_sq_cmd.bits.check_vmid = 1;
- reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
-
- reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
- reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
- reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
-
- break;
-
- default:
- return -EINVAL;
- }
-
- switch (wac_info->operand) {
- case HSA_DBG_WAVEOP_HALT:
- reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
- break;
-
- case HSA_DBG_WAVEOP_RESUME:
- reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
- break;
-
- case HSA_DBG_WAVEOP_KILL:
- reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
- break;
-
- case HSA_DBG_WAVEOP_DEBUG:
- reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
- break;
-
- case HSA_DBG_WAVEOP_TRAP:
- if (wac_info->trapId < MAX_TRAPID) {
- reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
- reg_sq_cmd.bits.trap_id = wac_info->trapId;
- } else {
- status = -EINVAL;
- }
- break;
-
- default:
- status = -EINVAL;
- break;
- }
-
- if (status == 0) {
- *in_reg_sq_cmd = reg_sq_cmd;
- *in_reg_gfx_index = reg_gfx_index;
- }
-
- return status;
-}
-
-static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
- struct dbg_wave_control_info *wac_info)
-{
-
- int status;
- union SQ_CMD_BITS reg_sq_cmd;
- union GRBM_GFX_INDEX_BITS reg_gfx_index;
- struct kfd_mem_obj *mem_obj;
- uint32_t *packet_buff_uint;
- struct pm4__set_config_reg *packets_vec;
- size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
-
- reg_sq_cmd.u32All = 0;
-
- status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
- &reg_gfx_index);
- if (status) {
- pr_err("Failed to set wave control registers\n");
- return status;
- }
-
- /* we do not control the VMID in DIQ, so reset it to a known value */
- reg_sq_cmd.bits.vm_id = 0;
-
- pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
-
- pr_debug("\t\t mode is: %u\n", wac_info->mode);
- pr_debug("\t\t operand is: %u\n", wac_info->operand);
- pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
- pr_debug("\t\t msg value is: %u\n",
- wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
- pr_debug("\t\t vmid is: N/A\n");
-
- pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
- pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
- pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
- pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
- pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
- pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
- pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
-
- pr_debug("\t\t ibw is : %u\n",
- reg_gfx_index.bitfields.instance_broadcast_writes);
- pr_debug("\t\t ii is : %u\n",
- reg_gfx_index.bitfields.instance_index);
- pr_debug("\t\t sebw is : %u\n",
- reg_gfx_index.bitfields.se_broadcast_writes);
- pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
- pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
- pr_debug("\t\t sbw is : %u\n",
- reg_gfx_index.bitfields.sh_broadcast_writes);
-
- pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
-
- status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
-
- if (status != 0) {
- pr_err("Failed to allocate GART memory\n");
- return status;
- }
-
- packet_buff_uint = mem_obj->cpu_ptr;
-
- memset(packet_buff_uint, 0, ib_size);
-
- packets_vec = (struct pm4__set_config_reg *) packet_buff_uint;
- packets_vec[0].header.count = 1;
- packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
- packets_vec[0].header.type = PM4_TYPE_3;
- packets_vec[0].bitfields2.reg_offset =
- GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
-
- packets_vec[0].bitfields2.insert_vmid = 0;
- packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
-
- packets_vec[1].header.count = 1;
- packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
- packets_vec[1].header.type = PM4_TYPE_3;
- packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE;
-
- packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
- packets_vec[1].bitfields2.insert_vmid = 1;
- packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
-
- /* Restore the GRBM_GFX_INDEX register */
-
- reg_gfx_index.u32All = 0;
- reg_gfx_index.bits.sh_broadcast_writes = 1;
- reg_gfx_index.bits.instance_broadcast_writes = 1;
- reg_gfx_index.bits.se_broadcast_writes = 1;
-
-
- packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
- packets_vec[2].bitfields2.reg_offset =
- GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
-
- packets_vec[2].bitfields2.insert_vmid = 0;
- packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
-
- status = dbgdev_diq_submit_ib(
- dbgdev,
- wac_info->process->pasid,
- mem_obj->gpu_addr,
- packet_buff_uint,
- ib_size);
-
- if (status)
- pr_err("Failed to submit IB to DIQ\n");
-
- kfd_gtt_sa_free(dbgdev->dev, mem_obj);
-
- return status;
-}
-
-static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
- struct dbg_wave_control_info *wac_info)
-{
- int status;
- union SQ_CMD_BITS reg_sq_cmd;
- union GRBM_GFX_INDEX_BITS reg_gfx_index;
- struct kfd_process_device *pdd;
-
- reg_sq_cmd.u32All = 0;
-
- /* taking the VMID for that process on the safe way using PDD */
- pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
-
- if (!pdd) {
- pr_err("Failed to get pdd for wave control no DIQ\n");
- return -EFAULT;
- }
- status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
- &reg_gfx_index);
- if (status) {
- pr_err("Failed to set wave control registers\n");
- return status;
- }
-
- /* for non DIQ we need to patch the VMID: */
-
- reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
-
- pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
-
- pr_debug("\t\t mode is: %u\n", wac_info->mode);
- pr_debug("\t\t operand is: %u\n", wac_info->operand);
- pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
- pr_debug("\t\t msg value is: %u\n",
- wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
- pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid);
-
- pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
- pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
- pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
- pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
- pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
- pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
- pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
-
- pr_debug("\t\t ibw is : %u\n",
- reg_gfx_index.bitfields.instance_broadcast_writes);
- pr_debug("\t\t ii is : %u\n",
- reg_gfx_index.bitfields.instance_index);
- pr_debug("\t\t sebw is : %u\n",
- reg_gfx_index.bitfields.se_broadcast_writes);
- pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
- pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
- pr_debug("\t\t sbw is : %u\n",
- reg_gfx_index.bitfields.sh_broadcast_writes);
-
- pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
-
- return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
- reg_gfx_index.u32All,
- reg_sq_cmd.u32All);
-}
-
-int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
-{
- int status = 0;
- unsigned int vmid;
- uint16_t queried_pasid;
- union SQ_CMD_BITS reg_sq_cmd;
- union GRBM_GFX_INDEX_BITS reg_gfx_index;
- struct kfd_process_device *pdd;
- struct dbg_wave_control_info wac_info;
- int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
- int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
-
- reg_sq_cmd.u32All = 0;
- status = 0;
-
- wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
- wac_info.operand = HSA_DBG_WAVEOP_KILL;
-
- pr_debug("Killing all process wavefronts\n");
-
- /* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
- * ATC_VMID15_PASID_MAPPING
- * to check which VMID the current process is mapped to.
- */
-
- for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
- status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
- (dev->kgd, vmid, &queried_pasid);
-
- if (status && queried_pasid == p->pasid) {
- pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
- vmid, p->pasid);
- break;
- }
- }
-
- if (vmid > last_vmid_to_scan) {
- pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
- return -EFAULT;
- }
-
- /* taking the VMID for that process on the safe way using PDD */
- pdd = kfd_get_process_device_data(dev, p);
- if (!pdd)
- return -EFAULT;
-
- status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
- &reg_gfx_index);
- if (status != 0)
- return -EINVAL;
-
- /* for non DIQ we need to patch the VMID: */
- reg_sq_cmd.bits.vm_id = vmid;
-
- dev->kfd2kgd->wave_control_execute(dev->kgd,
- reg_gfx_index.u32All,
- reg_sq_cmd.u32All);
-
- return 0;
-}
-
-void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
- enum DBGDEV_TYPE type)
-{
- pdbgdev->dev = pdev;
- pdbgdev->kq = NULL;
- pdbgdev->type = type;
- pdbgdev->pqm = NULL;
-
- switch (type) {
- case DBGDEV_TYPE_NODIQ:
- pdbgdev->dbgdev_register = dbgdev_register_nodiq;
- pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
- pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
- pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
- break;
- case DBGDEV_TYPE_DIQ:
- default:
- pdbgdev->dbgdev_register = dbgdev_register_diq;
- pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
- pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq;
- pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
- break;
- }
-
-}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h
deleted file mode 100644
index 0619c777b47e..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef KFD_DBGDEV_H_
-#define KFD_DBGDEV_H_
-
-enum {
- SQ_CMD_VMID_OFFSET = 28,
- ADDRESS_WATCH_CNTL_OFFSET = 24
-};
-
-enum {
- PRIV_QUEUE_SYNC_TIME_MS = 200
-};
-
-/* CONTEXT reg space definition */
-enum {
- CONTEXT_REG_BASE = 0xA000,
- CONTEXT_REG_END = 0xA400,
- CONTEXT_REG_SIZE = CONTEXT_REG_END - CONTEXT_REG_BASE
-};
-
-/* USER CONFIG reg space definition */
-enum {
- USERCONFIG_REG_BASE = 0xC000,
- USERCONFIG_REG_END = 0x10000,
- USERCONFIG_REG_SIZE = USERCONFIG_REG_END - USERCONFIG_REG_BASE
-};
-
-/* CONFIG reg space definition */
-enum {
- AMD_CONFIG_REG_BASE = 0x2000, /* in dwords */
- AMD_CONFIG_REG_END = 0x2B00,
- AMD_CONFIG_REG_SIZE = AMD_CONFIG_REG_END - AMD_CONFIG_REG_BASE
-};
-
-/* SH reg space definition */
-enum {
- SH_REG_BASE = 0x2C00,
- SH_REG_END = 0x3000,
- SH_REG_SIZE = SH_REG_END - SH_REG_BASE
-};
-
-/* SQ_CMD definitions */
-#define SQ_CMD 0x8DEC
-
-enum SQ_IND_CMD_CMD {
- SQ_IND_CMD_CMD_NULL = 0x00000000,
- SQ_IND_CMD_CMD_HALT = 0x00000001,
- SQ_IND_CMD_CMD_RESUME = 0x00000002,
- SQ_IND_CMD_CMD_KILL = 0x00000003,
- SQ_IND_CMD_CMD_DEBUG = 0x00000004,
- SQ_IND_CMD_CMD_TRAP = 0x00000005,
-};
-
-enum SQ_IND_CMD_MODE {
- SQ_IND_CMD_MODE_SINGLE = 0x00000000,
- SQ_IND_CMD_MODE_BROADCAST = 0x00000001,
- SQ_IND_CMD_MODE_BROADCAST_QUEUE = 0x00000002,
- SQ_IND_CMD_MODE_BROADCAST_PIPE = 0x00000003,
- SQ_IND_CMD_MODE_BROADCAST_ME = 0x00000004,
-};
-
-union SQ_IND_INDEX_BITS {
- struct {
- uint32_t wave_id:4;
- uint32_t simd_id:2;
- uint32_t thread_id:6;
- uint32_t:1;
- uint32_t force_read:1;
- uint32_t read_timeout:1;
- uint32_t unindexed:1;
- uint32_t index:16;
-
- } bitfields, bits;
- uint32_t u32All;
- signed int i32All;
- float f32All;
-};
-
-union SQ_IND_CMD_BITS {
- struct {
- uint32_t data:32;
- } bitfields, bits;
- uint32_t u32All;
- signed int i32All;
- float f32All;
-};
-
-union SQ_CMD_BITS {
- struct {
- uint32_t cmd:3;
- uint32_t:1;
- uint32_t mode:3;
- uint32_t check_vmid:1;
- uint32_t trap_id:3;
- uint32_t:5;
- uint32_t wave_id:4;
- uint32_t simd_id:2;
- uint32_t:2;
- uint32_t queue_id:3;
- uint32_t:1;
- uint32_t vm_id:4;
- } bitfields, bits;
- uint32_t u32All;
- signed int i32All;
- float f32All;
-};
-
-union SQ_IND_DATA_BITS {
- struct {
- uint32_t data:32;
- } bitfields, bits;
- uint32_t u32All;
- signed int i32All;
- float f32All;
-};
-
-union GRBM_GFX_INDEX_BITS {
- struct {
- uint32_t instance_index:8;
- uint32_t sh_index:8;
- uint32_t se_index:8;
- uint32_t:5;
- uint32_t sh_broadcast_writes:1;
- uint32_t instance_broadcast_writes:1;
- uint32_t se_broadcast_writes:1;
- } bitfields, bits;
- uint32_t u32All;
- signed int i32All;
- float f32All;
-};
-
-union TCP_WATCH_ADDR_H_BITS {
- struct {
- uint32_t addr:16;
- uint32_t:16;
-
- } bitfields, bits;
- uint32_t u32All;
- signed int i32All;
- float f32All;
-};
-
-union TCP_WATCH_ADDR_L_BITS {
- struct {
- uint32_t:6;
- uint32_t addr:26;
- } bitfields, bits;
- uint32_t u32All;
- signed int i32All;
- float f32All;
-};
-
-enum {
- QUEUESTATE__INVALID = 0, /* so by default we'll get invalid state */
- QUEUESTATE__ACTIVE_COMPLETION_PENDING,
- QUEUESTATE__ACTIVE
-};
-
-union ULARGE_INTEGER {
- struct {
- uint32_t low_part;
- uint32_t high_part;
- } u;
- unsigned long long quad_part;
-};
-
-
-#define KFD_CIK_VMID_START_OFFSET (8)
-#define KFD_CIK_VMID_END_OFFSET (KFD_CIK_VMID_START_OFFSET + (8))
-
-
-void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
- enum DBGDEV_TYPE type);
-
-union TCP_WATCH_CNTL_BITS {
- struct {
- uint32_t mask:24;
- uint32_t vmid:4;
- uint32_t atc:1;
- uint32_t mode:2;
- uint32_t valid:1;
- } bitfields, bits;
- uint32_t u32All;
- signed int i32All;
- float f32All;
-};
-
-enum {
- ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL,
- ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF,
- ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000,
- /* extend the mask to 26 bits in order to match the low address field */
- ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6,
- ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF
-};
-
-enum {
- MAX_TRAPID = 8, /* 3 bits in the bitfield. */
- MAX_WATCH_ADDRESSES = 4
-};
-
-enum {
- ADDRESS_WATCH_REG_ADDR_HI = 0,
- ADDRESS_WATCH_REG_ADDR_LO,
- ADDRESS_WATCH_REG_CNTL,
- ADDRESS_WATCH_REG_MAX
-};
-
-#endif /* KFD_DBGDEV_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
deleted file mode 100644
index 9bfa50633654..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/log2.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/device.h>
-
-#include "kfd_priv.h"
-#include "cik_regs.h"
-#include "kfd_pm4_headers.h"
-#include "kfd_pm4_headers_diq.h"
-#include "kfd_dbgmgr.h"
-#include "kfd_dbgdev.h"
-#include "kfd_device_queue_manager.h"
-
-static DEFINE_MUTEX(kfd_dbgmgr_mutex);
-
-struct mutex *kfd_get_dbgmgr_mutex(void)
-{
- return &kfd_dbgmgr_mutex;
-}
-
-
-static void kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr)
-{
- kfree(pmgr->dbgdev);
-
- pmgr->dbgdev = NULL;
- pmgr->pasid = 0;
- pmgr->dev = NULL;
-}
-
-void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr)
-{
- if (pmgr) {
- kfd_dbgmgr_uninitialize(pmgr);
- kfree(pmgr);
- }
-}
-
-bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
-{
- enum DBGDEV_TYPE type = DBGDEV_TYPE_DIQ;
- struct kfd_dbgmgr *new_buff;
-
- if (WARN_ON(!pdev->init_complete))
- return false;
-
- new_buff = kfd_alloc_struct(new_buff);
- if (!new_buff) {
- pr_err("Failed to allocate dbgmgr instance\n");
- return false;
- }
-
- new_buff->pasid = 0;
- new_buff->dev = pdev;
- new_buff->dbgdev = kfd_alloc_struct(new_buff->dbgdev);
- if (!new_buff->dbgdev) {
- pr_err("Failed to allocate dbgdev instance\n");
- kfree(new_buff);
- return false;
- }
-
- /* get actual type of DBGDevice cpsch or not */
- if (pdev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
- type = DBGDEV_TYPE_NODIQ;
-
- kfd_dbgdev_init(new_buff->dbgdev, pdev, type);
- *ppmgr = new_buff;
-
- return true;
-}
-
-long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
-{
- if (pmgr->pasid != 0) {
- pr_debug("H/W debugger is already active using pasid 0x%x\n",
- pmgr->pasid);
- return -EBUSY;
- }
-
- /* remember pasid */
- pmgr->pasid = p->pasid;
-
- /* provide the pqm for diq generation */
- pmgr->dbgdev->pqm = &p->pqm;
-
- /* activate the actual registering */
- pmgr->dbgdev->dbgdev_register(pmgr->dbgdev);
-
- return 0;
-}
-
-long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
-{
- /* Is the requests coming from the already registered process? */
- if (pmgr->pasid != p->pasid) {
- pr_debug("H/W debugger is not registered by calling pasid 0x%x\n",
- p->pasid);
- return -EINVAL;
- }
-
- pmgr->dbgdev->dbgdev_unregister(pmgr->dbgdev);
-
- pmgr->pasid = 0;
-
- return 0;
-}
-
-long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr,
- struct dbg_wave_control_info *wac_info)
-{
- /* Is the requests coming from the already registered process? */
- if (pmgr->pasid != wac_info->process->pasid) {
- pr_debug("H/W debugger support was not registered for requester pasid 0x%x\n",
- wac_info->process->pasid);
- return -EINVAL;
- }
-
- return (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev, wac_info);
-}
-
-long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr,
- struct dbg_address_watch_info *adw_info)
-{
- /* Is the requests coming from the already registered process? */
- if (pmgr->pasid != adw_info->process->pasid) {
- pr_debug("H/W debugger support was not registered for requester pasid 0x%x\n",
- adw_info->process->pasid);
- return -EINVAL;
- }
-
- return (long) pmgr->dbgdev->dbgdev_address_watch(pmgr->dbgdev,
- adw_info);
-}
-
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h
deleted file mode 100644
index f9c6df1fdc5c..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h
+++ /dev/null
@@ -1,293 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef KFD_DBGMGR_H_
-#define KFD_DBGMGR_H_
-
-#include "kfd_priv.h"
-
-/* must align with hsakmttypes definition */
-#pragma pack(push, 4)
-
-enum HSA_DBG_WAVEOP {
- HSA_DBG_WAVEOP_HALT = 1, /* Halts a wavefront */
- HSA_DBG_WAVEOP_RESUME = 2, /* Resumes a wavefront */
- HSA_DBG_WAVEOP_KILL = 3, /* Kills a wavefront */
- HSA_DBG_WAVEOP_DEBUG = 4, /* Causes wavefront to enter dbg mode */
- HSA_DBG_WAVEOP_TRAP = 5, /* Causes wavefront to take a trap */
- HSA_DBG_NUM_WAVEOP = 5,
- HSA_DBG_MAX_WAVEOP = 0xFFFFFFFF
-};
-
-enum HSA_DBG_WAVEMODE {
- /* send command to a single wave */
- HSA_DBG_WAVEMODE_SINGLE = 0,
- /*
- * Broadcast to all wavefronts of all processes is not
- * supported for HSA user mode
- */
-
- /* send to waves within current process */
- HSA_DBG_WAVEMODE_BROADCAST_PROCESS = 2,
- /* send to waves within current process on CU */
- HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3,
- HSA_DBG_NUM_WAVEMODE = 3,
- HSA_DBG_MAX_WAVEMODE = 0xFFFFFFFF
-};
-
-enum HSA_DBG_WAVEMSG_TYPE {
- HSA_DBG_WAVEMSG_AUTO = 0,
- HSA_DBG_WAVEMSG_USER = 1,
- HSA_DBG_WAVEMSG_ERROR = 2,
- HSA_DBG_NUM_WAVEMSG,
- HSA_DBG_MAX_WAVEMSG = 0xFFFFFFFF
-};
-
-enum HSA_DBG_WATCH_MODE {
- HSA_DBG_WATCH_READ = 0, /* Read operations only */
- HSA_DBG_WATCH_NONREAD = 1, /* Write or Atomic operations only */
- HSA_DBG_WATCH_ATOMIC = 2, /* Atomic Operations only */
- HSA_DBG_WATCH_ALL = 3, /* Read, Write or Atomic operations */
- HSA_DBG_WATCH_NUM,
- HSA_DBG_WATCH_SIZE = 0xFFFFFFFF
-};
-
-/* This structure is hardware specific and may change in the future */
-struct HsaDbgWaveMsgAMDGen2 {
- union {
- struct ui32 {
- uint32_t UserData:8; /* user data */
- uint32_t ShaderArray:1; /* Shader array */
- uint32_t Priv:1; /* Privileged */
- uint32_t Reserved0:4; /* Reserved, should be 0 */
- uint32_t WaveId:4; /* wave id */
- uint32_t SIMD:2; /* SIMD id */
- uint32_t HSACU:4; /* Compute unit */
- uint32_t ShaderEngine:2;/* Shader engine */
- uint32_t MessageType:2; /* see HSA_DBG_WAVEMSG_TYPE */
- uint32_t Reserved1:4; /* Reserved, should be 0 */
- } ui32;
- uint32_t Value;
- };
- uint32_t Reserved2;
-};
-
-union HsaDbgWaveMessageAMD {
- struct HsaDbgWaveMsgAMDGen2 WaveMsgInfoGen2;
- /* for future HsaDbgWaveMsgAMDGen3; */
-};
-
-struct HsaDbgWaveMessage {
- void *MemoryVA; /* ptr to associated host-accessible data */
- union HsaDbgWaveMessageAMD DbgWaveMsg;
-};
-
-/*
- * TODO: This definitions to be MOVED to kfd_event, once it is implemented.
- *
- * HSA sync primitive, Event and HW Exception notification API definitions.
- * The API functions allow the runtime to define a so-called sync-primitive,
- * a SW object combining a user-mode provided "syncvar" and a scheduler event
- * that can be signaled through a defined GPU interrupt. A syncvar is
- * a process virtual memory location of a certain size that can be accessed
- * by CPU and GPU shader code within the process to set and query the content
- * within that memory. The definition of the content is determined by the HSA
- * runtime and potentially GPU shader code interfacing with the HSA runtime.
- * The syncvar values may be commonly written through an PM4 WRITE_DATA packet
- * in the user mode instruction stream. The OS scheduler event is typically
- * associated and signaled by an interrupt issued by the GPU, but other HSA
- * system interrupt conditions from other HW (e.g. IOMMUv2) may be surfaced
- * by the KFD by this mechanism, too.
- */
-
-/* these are the new definitions for events */
-enum HSA_EVENTTYPE {
- HSA_EVENTTYPE_SIGNAL = 0, /* user-mode generated GPU signal */
- HSA_EVENTTYPE_NODECHANGE = 1, /* HSA node change (attach/detach) */
- HSA_EVENTTYPE_DEVICESTATECHANGE = 2, /* HSA device state change
- * (start/stop)
- */
- HSA_EVENTTYPE_HW_EXCEPTION = 3, /* GPU shader exception event */
- HSA_EVENTTYPE_SYSTEM_EVENT = 4, /* GPU SYSCALL with parameter info */
- HSA_EVENTTYPE_DEBUG_EVENT = 5, /* GPU signal for debugging */
- HSA_EVENTTYPE_PROFILE_EVENT = 6,/* GPU signal for profiling */
- HSA_EVENTTYPE_QUEUE_EVENT = 7, /* GPU signal queue idle state
- * (EOP pm4)
- */
- /* ... */
- HSA_EVENTTYPE_MAXID,
- HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF
-};
-
-/* Sub-definitions for various event types: Syncvar */
-struct HsaSyncVar {
- union SyncVar {
- void *UserData; /* pointer to user mode data */
- uint64_t UserDataPtrValue; /* 64bit compatibility of value */
- } SyncVar;
- uint64_t SyncVarSize;
-};
-
-/* Sub-definitions for various event types: NodeChange */
-
-enum HSA_EVENTTYPE_NODECHANGE_FLAGS {
- HSA_EVENTTYPE_NODECHANGE_ADD = 0,
- HSA_EVENTTYPE_NODECHANGE_REMOVE = 1,
- HSA_EVENTTYPE_NODECHANGE_SIZE = 0xFFFFFFFF
-};
-
-struct HsaNodeChange {
- /* HSA node added/removed on the platform */
- enum HSA_EVENTTYPE_NODECHANGE_FLAGS Flags;
-};
-
-/* Sub-definitions for various event types: DeviceStateChange */
-enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS {
- /* device started (and available) */
- HSA_EVENTTYPE_DEVICESTATUSCHANGE_START = 0,
- /* device stopped (i.e. unavailable) */
- HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP = 1,
- HSA_EVENTTYPE_DEVICESTATUSCHANGE_SIZE = 0xFFFFFFFF
-};
-
-enum HSA_DEVICE {
- HSA_DEVICE_CPU = 0,
- HSA_DEVICE_GPU = 1,
- MAX_HSA_DEVICE = 2
-};
-
-struct HsaDeviceStateChange {
- uint32_t NodeId; /* F-NUMA node that contains the device */
- enum HSA_DEVICE Device; /* device type: GPU or CPU */
- enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags; /* event flags */
-};
-
-struct HsaEventData {
- enum HSA_EVENTTYPE EventType; /* event type */
- union EventData {
- /*
- * return data associated with HSA_EVENTTYPE_SIGNAL
- * and other events
- */
- struct HsaSyncVar SyncVar;
-
- /* data associated with HSA_EVENTTYPE_NODE_CHANGE */
- struct HsaNodeChange NodeChangeState;
-
- /* data associated with HSA_EVENTTYPE_DEVICE_STATE_CHANGE */
- struct HsaDeviceStateChange DeviceState;
- } EventData;
-
- /* the following data entries are internal to the KFD & thunk itself */
-
- /* internal thunk store for Event data (OsEventHandle) */
- uint64_t HWData1;
- /* internal thunk store for Event data (HWAddress) */
- uint64_t HWData2;
- /* internal thunk store for Event data (HWData) */
- uint32_t HWData3;
-};
-
-struct HsaEventDescriptor {
- /* event type to allocate */
- enum HSA_EVENTTYPE EventType;
- /* H-NUMA node containing GPU device that is event source */
- uint32_t NodeId;
- /* pointer to user mode syncvar data, syncvar->UserDataPtrValue
- * may be NULL
- */
- struct HsaSyncVar SyncVar;
-};
-
-struct HsaEvent {
- uint32_t EventId;
- struct HsaEventData EventData;
-};
-
-#pragma pack(pop)
-
-enum DBGDEV_TYPE {
- DBGDEV_TYPE_ILLEGAL = 0,
- DBGDEV_TYPE_NODIQ = 1,
- DBGDEV_TYPE_DIQ = 2,
- DBGDEV_TYPE_TEST = 3
-};
-
-struct dbg_address_watch_info {
- struct kfd_process *process;
- enum HSA_DBG_WATCH_MODE *watch_mode;
- uint64_t *watch_address;
- uint64_t *watch_mask;
- struct HsaEvent *watch_event;
- uint32_t num_watch_points;
-};
-
-struct dbg_wave_control_info {
- struct kfd_process *process;
- uint32_t trapId;
- enum HSA_DBG_WAVEOP operand;
- enum HSA_DBG_WAVEMODE mode;
- struct HsaDbgWaveMessage dbgWave_msg;
-};
-
-struct kfd_dbgdev {
-
- /* The device that owns this data. */
- struct kfd_dev *dev;
-
- /* kernel queue for DIQ */
- struct kernel_queue *kq;
-
- /* a pointer to the pqm of the calling process */
- struct process_queue_manager *pqm;
-
- /* type of debug device ( DIQ, non DIQ, etc. ) */
- enum DBGDEV_TYPE type;
-
- /* virtualized function pointers to device dbg */
- int (*dbgdev_register)(struct kfd_dbgdev *dbgdev);
- int (*dbgdev_unregister)(struct kfd_dbgdev *dbgdev);
- int (*dbgdev_address_watch)(struct kfd_dbgdev *dbgdev,
- struct dbg_address_watch_info *adw_info);
- int (*dbgdev_wave_control)(struct kfd_dbgdev *dbgdev,
- struct dbg_wave_control_info *wac_info);
-
-};
-
-struct kfd_dbgmgr {
- u32 pasid;
- struct kfd_dev *dev;
- struct kfd_dbgdev *dbgdev;
-};
-
-/* prototypes for debug manager functions */
-struct mutex *kfd_get_dbgmgr_mutex(void);
-void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr);
-bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev);
-long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p);
-long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p);
-long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr,
- struct dbg_wave_control_info *wac_info);
-long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr,
- struct dbg_address_watch_info *adw_info);
-#endif /* KFD_DBGMGR_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
new file mode 100644
index 000000000000..ba99e0f258ae
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -0,0 +1,1152 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "kfd_debug.h"
+#include "kfd_device_queue_manager.h"
+#include "kfd_topology.h"
+#include <linux/file.h>
+#include <uapi/linux/kfd_ioctl.h>
+#include <uapi/linux/kfd_sysfs.h>
+
+#define MAX_WATCH_ADDRESSES 4
+
+int kfd_dbg_ev_query_debug_event(struct kfd_process *process,
+ unsigned int *queue_id,
+ unsigned int *gpu_id,
+ uint64_t exception_clear_mask,
+ uint64_t *event_status)
+{
+ struct process_queue_manager *pqm;
+ struct process_queue_node *pqn;
+ int i;
+
+ if (!(process && process->debug_trap_enabled))
+ return -ENODATA;
+
+ mutex_lock(&process->event_mutex);
+ *event_status = 0;
+ *queue_id = 0;
+ *gpu_id = 0;
+
+ /* find and report queue events */
+ pqm = &process->pqm;
+ list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
+ uint64_t tmp = process->exception_enable_mask;
+
+ if (!pqn->q)
+ continue;
+
+ tmp &= pqn->q->properties.exception_status;
+
+ if (!tmp)
+ continue;
+
+ *event_status = pqn->q->properties.exception_status;
+ *queue_id = pqn->q->properties.queue_id;
+ *gpu_id = pqn->q->device->id;
+ pqn->q->properties.exception_status &= ~exception_clear_mask;
+ goto out;
+ }
+
+ /* find and report device events */
+ for (i = 0; i < process->n_pdds; i++) {
+ struct kfd_process_device *pdd = process->pdds[i];
+ uint64_t tmp = process->exception_enable_mask
+ & pdd->exception_status;
+
+ if (!tmp)
+ continue;
+
+ *event_status = pdd->exception_status;
+ *gpu_id = pdd->dev->id;
+ pdd->exception_status &= ~exception_clear_mask;
+ goto out;
+ }
+
+ /* report process events */
+ if (process->exception_enable_mask & process->exception_status) {
+ *event_status = process->exception_status;
+ process->exception_status &= ~exception_clear_mask;
+ }
+
+out:
+ mutex_unlock(&process->event_mutex);
+ return *event_status ? 0 : -EAGAIN;
+}
+
+void debug_event_write_work_handler(struct work_struct *work)
+{
+ struct kfd_process *process;
+
+ static const char write_data = '.';
+ loff_t pos = 0;
+
+ process = container_of(work,
+ struct kfd_process,
+ debug_event_workarea);
+
+ if (process->debug_trap_enabled && process->dbg_ev_file)
+ kernel_write(process->dbg_ev_file, &write_data, 1, &pos);
+}
+
+/* update process/device/queue exception status, write to descriptor
+ * only if exception_status is enabled.
+ */
+bool kfd_dbg_ev_raise(uint64_t event_mask,
+ struct kfd_process *process, struct kfd_node *dev,
+ unsigned int source_id, bool use_worker,
+ void *exception_data, size_t exception_data_size)
+{
+ struct process_queue_manager *pqm;
+ struct process_queue_node *pqn;
+ int i;
+ static const char write_data = '.';
+ loff_t pos = 0;
+ bool is_subscribed = true;
+
+ if (!(process && process->debug_trap_enabled))
+ return false;
+
+ mutex_lock(&process->event_mutex);
+
+ if (event_mask & KFD_EC_MASK_DEVICE) {
+ for (i = 0; i < process->n_pdds; i++) {
+ struct kfd_process_device *pdd = process->pdds[i];
+
+ if (pdd->dev != dev)
+ continue;
+
+ pdd->exception_status |= event_mask & KFD_EC_MASK_DEVICE;
+
+ if (event_mask & KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION)) {
+ if (!pdd->vm_fault_exc_data) {
+ pdd->vm_fault_exc_data = kmemdup(
+ exception_data,
+ exception_data_size,
+ GFP_KERNEL);
+ if (!pdd->vm_fault_exc_data)
+ pr_debug("Failed to allocate exception data memory");
+ } else {
+ pr_debug("Debugger exception data not saved\n");
+ print_hex_dump_bytes("exception data: ",
+ DUMP_PREFIX_OFFSET,
+ exception_data,
+ exception_data_size);
+ }
+ }
+ break;
+ }
+ } else if (event_mask & KFD_EC_MASK_PROCESS) {
+ process->exception_status |= event_mask & KFD_EC_MASK_PROCESS;
+ } else {
+ pqm = &process->pqm;
+ list_for_each_entry(pqn, &pqm->queues,
+ process_queue_list) {
+ int target_id;
+
+ if (!pqn->q)
+ continue;
+
+ target_id = event_mask & KFD_EC_MASK(EC_QUEUE_NEW) ?
+ pqn->q->properties.queue_id :
+ pqn->q->doorbell_id;
+
+ if (pqn->q->device != dev || target_id != source_id)
+ continue;
+
+ pqn->q->properties.exception_status |= event_mask;
+ break;
+ }
+ }
+
+ if (process->exception_enable_mask & event_mask) {
+ if (use_worker)
+ schedule_work(&process->debug_event_workarea);
+ else
+ kernel_write(process->dbg_ev_file,
+ &write_data,
+ 1,
+ &pos);
+ } else {
+ is_subscribed = false;
+ }
+
+ mutex_unlock(&process->event_mutex);
+
+ return is_subscribed;
+}
+
+/* set pending event queue entry from ring entry */
+bool kfd_set_dbg_ev_from_interrupt(struct kfd_node *dev,
+ unsigned int pasid,
+ uint32_t doorbell_id,
+ uint64_t trap_mask,
+ void *exception_data,
+ size_t exception_data_size)
+{
+ struct kfd_process *p;
+ struct kfd_process_device *pdd = NULL;
+ bool signaled_to_debugger_or_runtime = false;
+
+ p = kfd_lookup_process_by_pasid(pasid, &pdd);
+
+ if (!pdd)
+ return false;
+
+ if (!kfd_dbg_ev_raise(trap_mask, p, dev, doorbell_id, true,
+ exception_data, exception_data_size)) {
+ struct process_queue_manager *pqm;
+ struct process_queue_node *pqn;
+
+ if (!!(trap_mask & KFD_EC_MASK_QUEUE) &&
+ p->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED) {
+ mutex_lock(&p->mutex);
+
+ pqm = &p->pqm;
+ list_for_each_entry(pqn, &pqm->queues,
+ process_queue_list) {
+
+ if (!(pqn->q && pqn->q->device == dev &&
+ pqn->q->doorbell_id == doorbell_id))
+ continue;
+
+ kfd_send_exception_to_runtime(p, pqn->q->properties.queue_id,
+ trap_mask);
+
+ signaled_to_debugger_or_runtime = true;
+
+ break;
+ }
+
+ mutex_unlock(&p->mutex);
+ } else if (trap_mask & KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION)) {
+ kfd_evict_process_device(pdd);
+ kfd_signal_vm_fault_event(pdd, NULL, exception_data);
+
+ signaled_to_debugger_or_runtime = true;
+ }
+ } else {
+ signaled_to_debugger_or_runtime = true;
+ }
+
+ kfd_unref_process(p);
+
+ return signaled_to_debugger_or_runtime;
+}
+
+int kfd_dbg_send_exception_to_runtime(struct kfd_process *p,
+ unsigned int dev_id,
+ unsigned int queue_id,
+ uint64_t error_reason)
+{
+ if (error_reason & KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION)) {
+ struct kfd_process_device *pdd = NULL;
+ struct kfd_hsa_memory_exception_data *data;
+ int i;
+
+ for (i = 0; i < p->n_pdds; i++) {
+ if (p->pdds[i]->dev->id == dev_id) {
+ pdd = p->pdds[i];
+ break;
+ }
+ }
+
+ if (!pdd)
+ return -ENODEV;
+
+ data = (struct kfd_hsa_memory_exception_data *)
+ pdd->vm_fault_exc_data;
+
+ kfd_evict_process_device(pdd);
+ kfd_signal_vm_fault_event(pdd, NULL, data);
+ error_reason &= ~KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION);
+ }
+
+ if (error_reason & (KFD_EC_MASK(EC_PROCESS_RUNTIME))) {
+ /*
+ * block should only happen after the debugger receives runtime
+ * enable notice.
+ */
+ up(&p->runtime_enable_sema);
+ error_reason &= ~KFD_EC_MASK(EC_PROCESS_RUNTIME);
+ }
+
+ if (error_reason)
+ return kfd_send_exception_to_runtime(p, queue_id, error_reason);
+
+ return 0;
+}
+
+static int kfd_dbg_set_queue_workaround(struct queue *q, bool enable)
+{
+ struct mqd_update_info minfo = {0};
+ int err;
+
+ if (!q)
+ return 0;
+
+ if (!kfd_dbg_has_cwsr_workaround(q->device))
+ return 0;
+
+ if (enable && q->properties.is_user_cu_masked)
+ return -EBUSY;
+
+ minfo.update_flag = enable ? UPDATE_FLAG_DBG_WA_ENABLE : UPDATE_FLAG_DBG_WA_DISABLE;
+
+ q->properties.is_dbg_wa = enable;
+ err = q->device->dqm->ops.update_queue(q->device->dqm, q, &minfo);
+ if (err)
+ q->properties.is_dbg_wa = false;
+
+ return err;
+}
+
+static int kfd_dbg_set_workaround(struct kfd_process *target, bool enable)
+{
+ struct process_queue_manager *pqm = &target->pqm;
+ struct process_queue_node *pqn;
+ int r = 0;
+
+ list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
+ r = kfd_dbg_set_queue_workaround(pqn->q, enable);
+ if (enable && r)
+ goto unwind;
+ }
+
+ return 0;
+
+unwind:
+ list_for_each_entry(pqn, &pqm->queues, process_queue_list)
+ kfd_dbg_set_queue_workaround(pqn->q, false);
+
+ if (enable)
+ target->runtime_info.runtime_state = r == -EBUSY ?
+ DEBUG_RUNTIME_STATE_ENABLED_BUSY :
+ DEBUG_RUNTIME_STATE_ENABLED_ERROR;
+
+ return r;
+}
+
+int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en)
+{
+ uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode;
+ uint32_t flags = pdd->process->dbg_flags;
+ struct amdgpu_device *adev = pdd->dev->adev;
+ int r;
+
+ if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
+ return 0;
+
+ if (!pdd->proc_ctx_cpu_ptr) {
+ r = amdgpu_amdkfd_alloc_gtt_mem(adev,
+ AMDGPU_MES_PROC_CTX_SIZE,
+ &pdd->proc_ctx_bo,
+ &pdd->proc_ctx_gpu_addr,
+ &pdd->proc_ctx_cpu_ptr,
+ false);
+ if (r) {
+ dev_err(adev->dev,
+ "failed to allocate process context bo\n");
+ return r;
+ }
+ memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
+ }
+
+ return amdgpu_mes_set_shader_debugger(pdd->dev->adev, pdd->proc_ctx_gpu_addr, spi_dbg_cntl,
+ pdd->watch_points, flags, sq_trap_en);
+}
+
+#define KFD_DEBUGGER_INVALID_WATCH_POINT_ID -1
+static int kfd_dbg_get_dev_watch_id(struct kfd_process_device *pdd, int *watch_id)
+{
+ int i;
+
+ *watch_id = KFD_DEBUGGER_INVALID_WATCH_POINT_ID;
+
+ spin_lock(&pdd->dev->watch_points_lock);
+
+ for (i = 0; i < MAX_WATCH_ADDRESSES; i++) {
+ /* device watchpoint in use so skip */
+ if ((pdd->dev->alloc_watch_ids >> i) & 0x1)
+ continue;
+
+ pdd->alloc_watch_ids |= 0x1 << i;
+ pdd->dev->alloc_watch_ids |= 0x1 << i;
+ *watch_id = i;
+ spin_unlock(&pdd->dev->watch_points_lock);
+ return 0;
+ }
+
+ spin_unlock(&pdd->dev->watch_points_lock);
+
+ return -ENOMEM;
+}
+
+static void kfd_dbg_clear_dev_watch_id(struct kfd_process_device *pdd, int watch_id)
+{
+ spin_lock(&pdd->dev->watch_points_lock);
+
+ /* process owns device watch point so safe to clear */
+ if ((pdd->alloc_watch_ids >> watch_id) & 0x1) {
+ pdd->alloc_watch_ids &= ~(0x1 << watch_id);
+ pdd->dev->alloc_watch_ids &= ~(0x1 << watch_id);
+ }
+
+ spin_unlock(&pdd->dev->watch_points_lock);
+}
+
+static bool kfd_dbg_owns_dev_watch_id(struct kfd_process_device *pdd, int watch_id)
+{
+ bool owns_watch_id = false;
+
+ spin_lock(&pdd->dev->watch_points_lock);
+ owns_watch_id = watch_id < MAX_WATCH_ADDRESSES &&
+ ((pdd->alloc_watch_ids >> watch_id) & 0x1);
+
+ spin_unlock(&pdd->dev->watch_points_lock);
+
+ return owns_watch_id;
+}
+
+int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd,
+ uint32_t watch_id)
+{
+ int r;
+
+ if (!kfd_dbg_owns_dev_watch_id(pdd, watch_id))
+ return -EINVAL;
+
+ if (!pdd->dev->kfd->shared_resources.enable_mes) {
+ r = debug_lock_and_unmap(pdd->dev->dqm);
+ if (r)
+ return r;
+ }
+
+ amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
+ pdd->watch_points[watch_id] = pdd->dev->kfd2kgd->clear_address_watch(
+ pdd->dev->adev,
+ watch_id);
+ amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
+
+ if (!pdd->dev->kfd->shared_resources.enable_mes)
+ r = debug_map_and_unlock(pdd->dev->dqm);
+ else
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
+
+ kfd_dbg_clear_dev_watch_id(pdd, watch_id);
+
+ return r;
+}
+
+int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t *watch_id,
+ uint32_t watch_mode)
+{
+ int xcc_id, r = kfd_dbg_get_dev_watch_id(pdd, watch_id);
+ uint32_t xcc_mask = pdd->dev->xcc_mask;
+
+ if (r)
+ return r;
+
+ if (!pdd->dev->kfd->shared_resources.enable_mes) {
+ r = debug_lock_and_unmap(pdd->dev->dqm);
+ if (r) {
+ kfd_dbg_clear_dev_watch_id(pdd, *watch_id);
+ return r;
+ }
+ }
+
+ amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
+ for_each_inst(xcc_id, xcc_mask)
+ pdd->watch_points[*watch_id] = pdd->dev->kfd2kgd->set_address_watch(
+ pdd->dev->adev,
+ watch_address,
+ watch_address_mask,
+ *watch_id,
+ watch_mode,
+ pdd->dev->vm_info.last_vmid_kfd,
+ xcc_id);
+ amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
+
+ if (!pdd->dev->kfd->shared_resources.enable_mes)
+ r = debug_map_and_unlock(pdd->dev->dqm);
+ else
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
+
+ /* HWS is broken so no point in HW rollback but release the watchpoint anyways */
+ if (r)
+ kfd_dbg_clear_dev_watch_id(pdd, *watch_id);
+
+ return 0;
+}
+
+static void kfd_dbg_clear_process_address_watch(struct kfd_process *target)
+{
+ int i, j;
+
+ for (i = 0; i < target->n_pdds; i++)
+ for (j = 0; j < MAX_WATCH_ADDRESSES; j++)
+ kfd_dbg_trap_clear_dev_address_watch(target->pdds[i], j);
+}
+
+int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags)
+{
+ uint32_t prev_flags = target->dbg_flags;
+ int i, r = 0, rewind_count = 0;
+
+ for (i = 0; i < target->n_pdds; i++) {
+ struct kfd_topology_device *topo_dev =
+ kfd_topology_device_by_id(target->pdds[i]->dev->id);
+ uint32_t caps = topo_dev->node_props.capability;
+
+ if (!(caps & HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED) &&
+ (*flags & KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP)) {
+ *flags = prev_flags;
+ return -EACCES;
+ }
+
+ if (!(caps & HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED) &&
+ (*flags & KFD_DBG_TRAP_FLAG_SINGLE_ALU_OP)) {
+ *flags = prev_flags;
+ return -EACCES;
+ }
+ }
+
+ target->dbg_flags = *flags;
+ *flags = prev_flags;
+ for (i = 0; i < target->n_pdds; i++) {
+ struct kfd_process_device *pdd = target->pdds[i];
+
+ if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
+ continue;
+
+ if (!pdd->dev->kfd->shared_resources.enable_mes)
+ r = debug_refresh_runlist(pdd->dev->dqm);
+ else
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
+
+ if (r) {
+ target->dbg_flags = prev_flags;
+ break;
+ }
+
+ rewind_count++;
+ }
+
+ /* Rewind flags */
+ if (r) {
+ target->dbg_flags = prev_flags;
+
+ for (i = 0; i < rewind_count; i++) {
+ struct kfd_process_device *pdd = target->pdds[i];
+
+ if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
+ continue;
+
+ if (!pdd->dev->kfd->shared_resources.enable_mes)
+ debug_refresh_runlist(pdd->dev->dqm);
+ else
+ kfd_dbg_set_mes_debug_mode(pdd, true);
+ }
+ }
+
+ return r;
+}
+
+/* kfd_dbg_trap_deactivate:
+ * target: target process
+ * unwind: If this is unwinding a failed kfd_dbg_trap_enable()
+ * unwind_count:
+ * If unwind == true, how far down the pdd list we need
+ * to unwind
+ * else: ignored
+ */
+void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind_count)
+{
+ int i;
+
+ if (!unwind) {
+ uint32_t flags = 0;
+ int resume_count = resume_queues(target, 0, NULL);
+
+ if (resume_count)
+ pr_debug("Resumed %d queues\n", resume_count);
+
+ cancel_work_sync(&target->debug_event_workarea);
+ kfd_dbg_clear_process_address_watch(target);
+ kfd_dbg_trap_set_wave_launch_mode(target, 0);
+
+ kfd_dbg_trap_set_flags(target, &flags);
+ }
+
+ for (i = 0; i < target->n_pdds; i++) {
+ struct kfd_process_device *pdd = target->pdds[i];
+
+ /* If this is an unwind, and we have unwound the required
+ * enable calls on the pdd list, we need to stop now
+ * otherwise we may mess up another debugger session.
+ */
+ if (unwind && i == unwind_count)
+ break;
+
+ kfd_process_set_trap_debug_flag(&pdd->qpd, false);
+
+ /* GFX off is already disabled by debug activate if not RLC restore supported. */
+ if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
+ amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
+ pdd->spi_dbg_override =
+ pdd->dev->kfd2kgd->disable_debug_trap(
+ pdd->dev->adev,
+ target->runtime_info.ttmp_setup,
+ pdd->dev->vm_info.last_vmid_kfd);
+ amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
+
+ if (!kfd_dbg_is_per_vmid_supported(pdd->dev) &&
+ release_debug_trap_vmid(pdd->dev->dqm, &pdd->qpd))
+ pr_err("Failed to release debug vmid on [%i]\n", pdd->dev->id);
+
+ if (!pdd->dev->kfd->shared_resources.enable_mes)
+ debug_refresh_runlist(pdd->dev->dqm);
+ else
+ kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));
+ }
+
+ kfd_dbg_set_workaround(target, false);
+}
+
+static void kfd_dbg_clean_exception_status(struct kfd_process *target)
+{
+ struct process_queue_manager *pqm;
+ struct process_queue_node *pqn;
+ int i;
+
+ for (i = 0; i < target->n_pdds; i++) {
+ struct kfd_process_device *pdd = target->pdds[i];
+
+ kfd_process_drain_interrupts(pdd);
+
+ pdd->exception_status = 0;
+ }
+
+ pqm = &target->pqm;
+ list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
+ if (!pqn->q)
+ continue;
+
+ pqn->q->properties.exception_status = 0;
+ }
+
+ target->exception_status = 0;
+}
+
+int kfd_dbg_trap_disable(struct kfd_process *target)
+{
+ if (!target->debug_trap_enabled)
+ return 0;
+
+ /*
+ * Defer deactivation to runtime if runtime not enabled otherwise reset
+ * attached running target runtime state to enable for re-attach.
+ */
+ if (target->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED)
+ kfd_dbg_trap_deactivate(target, false, 0);
+ else if (target->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_DISABLED)
+ target->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;
+
+ cancel_work_sync(&target->debug_event_workarea);
+ fput(target->dbg_ev_file);
+ target->dbg_ev_file = NULL;
+
+ if (target->debugger_process) {
+ atomic_dec(&target->debugger_process->debugged_process_count);
+ target->debugger_process = NULL;
+ }
+
+ target->debug_trap_enabled = false;
+ kfd_dbg_clean_exception_status(target);
+ kfd_unref_process(target);
+
+ return 0;
+}
+
+int kfd_dbg_trap_activate(struct kfd_process *target)
+{
+ int i, r = 0;
+
+ r = kfd_dbg_set_workaround(target, true);
+ if (r)
+ return r;
+
+ for (i = 0; i < target->n_pdds; i++) {
+ struct kfd_process_device *pdd = target->pdds[i];
+
+ if (!kfd_dbg_is_per_vmid_supported(pdd->dev)) {
+ r = reserve_debug_trap_vmid(pdd->dev->dqm, &pdd->qpd);
+
+ if (r) {
+ target->runtime_info.runtime_state = (r == -EBUSY) ?
+ DEBUG_RUNTIME_STATE_ENABLED_BUSY :
+ DEBUG_RUNTIME_STATE_ENABLED_ERROR;
+
+ goto unwind_err;
+ }
+ }
+
+ /* Disable GFX OFF to prevent garbage read/writes to debug registers.
+ * If RLC restore of debug registers is not supported and runtime enable
+ * hasn't done so already on ttmp setup request, restore the trap config registers.
+ *
+ * If RLC restore of debug registers is not supported, keep gfx off disabled for
+ * the debug session.
+ */
+ amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
+ if (!(kfd_dbg_is_rlc_restore_supported(pdd->dev) ||
+ target->runtime_info.ttmp_setup))
+ pdd->dev->kfd2kgd->enable_debug_trap(pdd->dev->adev, true,
+ pdd->dev->vm_info.last_vmid_kfd);
+
+ pdd->spi_dbg_override = pdd->dev->kfd2kgd->enable_debug_trap(
+ pdd->dev->adev,
+ false,
+ pdd->dev->vm_info.last_vmid_kfd);
+
+ if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
+ amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
+
+ /*
+ * Setting the debug flag in the trap handler requires that the TMA has been
+ * allocated, which occurs during CWSR initialization.
+ * In the event that CWSR has not been initialized at this point, setting the
+ * flag will be called again during CWSR initialization if the target process
+ * is still debug enabled.
+ */
+ kfd_process_set_trap_debug_flag(&pdd->qpd, true);
+
+ if (!pdd->dev->kfd->shared_resources.enable_mes)
+ r = debug_refresh_runlist(pdd->dev->dqm);
+ else
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
+
+ if (r) {
+ target->runtime_info.runtime_state =
+ DEBUG_RUNTIME_STATE_ENABLED_ERROR;
+ goto unwind_err;
+ }
+ }
+
+ return 0;
+
+unwind_err:
+ /* Enabling debug failed, we need to disable on
+ * all GPUs so the enable is all or nothing.
+ */
+ kfd_dbg_trap_deactivate(target, true, i);
+ return r;
+}
+
+int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd,
+ void __user *runtime_info, uint32_t *runtime_size)
+{
+ struct file *f;
+ uint32_t copy_size;
+ int i, r = 0;
+
+ if (target->debug_trap_enabled)
+ return -EALREADY;
+
+ /* Enable pre-checks */
+ for (i = 0; i < target->n_pdds; i++) {
+ struct kfd_process_device *pdd = target->pdds[i];
+
+ if (!KFD_IS_SOC15(pdd->dev))
+ return -ENODEV;
+
+ if (pdd->qpd.num_gws && (!kfd_dbg_has_gws_support(pdd->dev) ||
+ kfd_dbg_has_cwsr_workaround(pdd->dev)))
+ return -EBUSY;
+ }
+
+ copy_size = min((size_t)(*runtime_size), sizeof(target->runtime_info));
+
+ f = fget(fd);
+ if (!f) {
+ pr_err("Failed to get file for (%i)\n", fd);
+ return -EBADF;
+ }
+
+ target->dbg_ev_file = f;
+
+ /* defer activation to runtime if not runtime enabled */
+ if (target->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED)
+ kfd_dbg_trap_activate(target);
+
+ /* We already hold the process reference but hold another one for the
+ * debug session.
+ */
+ kref_get(&target->ref);
+ target->debug_trap_enabled = true;
+
+ if (target->debugger_process)
+ atomic_inc(&target->debugger_process->debugged_process_count);
+
+ if (copy_to_user(runtime_info, (void *)&target->runtime_info, copy_size)) {
+ kfd_dbg_trap_deactivate(target, false, 0);
+ r = -EFAULT;
+ }
+
+ *runtime_size = sizeof(target->runtime_info);
+
+ return r;
+}
+
+static int kfd_dbg_validate_trap_override_request(struct kfd_process *p,
+ uint32_t trap_override,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_supported)
+{
+ int i = 0;
+
+ *trap_mask_supported = 0xffffffff;
+
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+ int err = pdd->dev->kfd2kgd->validate_trap_override_request(
+ pdd->dev->adev,
+ trap_override,
+ trap_mask_supported);
+
+ if (err)
+ return err;
+ }
+
+ if (trap_mask_request & ~*trap_mask_supported)
+ return -EACCES;
+
+ return 0;
+}
+
+int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t *trap_mask_supported)
+{
+ int r = 0, i;
+
+ r = kfd_dbg_validate_trap_override_request(target,
+ trap_override,
+ trap_mask_request,
+ trap_mask_supported);
+
+ if (r)
+ return r;
+
+ for (i = 0; i < target->n_pdds; i++) {
+ struct kfd_process_device *pdd = target->pdds[i];
+
+ amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
+ pdd->spi_dbg_override = pdd->dev->kfd2kgd->set_wave_launch_trap_override(
+ pdd->dev->adev,
+ pdd->dev->vm_info.last_vmid_kfd,
+ trap_override,
+ trap_mask_bits,
+ trap_mask_request,
+ trap_mask_prev,
+ pdd->spi_dbg_override);
+ amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
+
+ if (!pdd->dev->kfd->shared_resources.enable_mes)
+ r = debug_refresh_runlist(pdd->dev->dqm);
+ else
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
+
+ if (r)
+ break;
+ }
+
+ return r;
+}
+
+int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target,
+ uint8_t wave_launch_mode)
+{
+ int r = 0, i;
+
+ if (wave_launch_mode != KFD_DBG_TRAP_WAVE_LAUNCH_MODE_NORMAL &&
+ wave_launch_mode != KFD_DBG_TRAP_WAVE_LAUNCH_MODE_HALT &&
+ wave_launch_mode != KFD_DBG_TRAP_WAVE_LAUNCH_MODE_DEBUG)
+ return -EINVAL;
+
+ for (i = 0; i < target->n_pdds; i++) {
+ struct kfd_process_device *pdd = target->pdds[i];
+
+ amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
+ pdd->spi_dbg_launch_mode = pdd->dev->kfd2kgd->set_wave_launch_mode(
+ pdd->dev->adev,
+ wave_launch_mode,
+ pdd->dev->vm_info.last_vmid_kfd);
+ amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
+
+ if (!pdd->dev->kfd->shared_resources.enable_mes)
+ r = debug_refresh_runlist(pdd->dev->dqm);
+ else
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
+
+ if (r)
+ break;
+ }
+
+ return r;
+}
+
+int kfd_dbg_trap_query_exception_info(struct kfd_process *target,
+ uint32_t source_id,
+ uint32_t exception_code,
+ bool clear_exception,
+ void __user *info,
+ uint32_t *info_size)
+{
+ bool found = false;
+ int r = 0;
+ uint32_t copy_size, actual_info_size = 0;
+ uint64_t *exception_status_ptr = NULL;
+
+ if (!target)
+ return -EINVAL;
+
+ if (!info || !info_size)
+ return -EINVAL;
+
+ mutex_lock(&target->event_mutex);
+
+ if (KFD_DBG_EC_TYPE_IS_QUEUE(exception_code)) {
+ /* Per queue exceptions */
+ struct queue *queue = NULL;
+ int i;
+
+ for (i = 0; i < target->n_pdds; i++) {
+ struct kfd_process_device *pdd = target->pdds[i];
+ struct qcm_process_device *qpd = &pdd->qpd;
+
+ list_for_each_entry(queue, &qpd->queues_list, list) {
+ if (!found && queue->properties.queue_id == source_id) {
+ found = true;
+ break;
+ }
+ }
+ if (found)
+ break;
+ }
+
+ if (!found) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (!(queue->properties.exception_status & KFD_EC_MASK(exception_code))) {
+ r = -ENODATA;
+ goto out;
+ }
+ exception_status_ptr = &queue->properties.exception_status;
+ } else if (KFD_DBG_EC_TYPE_IS_DEVICE(exception_code)) {
+ /* Per device exceptions */
+ struct kfd_process_device *pdd = NULL;
+ int i;
+
+ for (i = 0; i < target->n_pdds; i++) {
+ pdd = target->pdds[i];
+ if (pdd->dev->id == source_id) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (!(pdd->exception_status & KFD_EC_MASK(exception_code))) {
+ r = -ENODATA;
+ goto out;
+ }
+
+ if (exception_code == EC_DEVICE_MEMORY_VIOLATION) {
+ copy_size = min((size_t)(*info_size), pdd->vm_fault_exc_data_size);
+
+ if (copy_to_user(info, pdd->vm_fault_exc_data, copy_size)) {
+ r = -EFAULT;
+ goto out;
+ }
+ actual_info_size = pdd->vm_fault_exc_data_size;
+ if (clear_exception) {
+ kfree(pdd->vm_fault_exc_data);
+ pdd->vm_fault_exc_data = NULL;
+ pdd->vm_fault_exc_data_size = 0;
+ }
+ }
+ exception_status_ptr = &pdd->exception_status;
+ } else if (KFD_DBG_EC_TYPE_IS_PROCESS(exception_code)) {
+ /* Per process exceptions */
+ if (!(target->exception_status & KFD_EC_MASK(exception_code))) {
+ r = -ENODATA;
+ goto out;
+ }
+
+ if (exception_code == EC_PROCESS_RUNTIME) {
+ copy_size = min((size_t)(*info_size), sizeof(target->runtime_info));
+
+ if (copy_to_user(info, (void *)&target->runtime_info, copy_size)) {
+ r = -EFAULT;
+ goto out;
+ }
+
+ actual_info_size = sizeof(target->runtime_info);
+ }
+
+ exception_status_ptr = &target->exception_status;
+ } else {
+ pr_debug("Bad exception type [%i]\n", exception_code);
+ r = -EINVAL;
+ goto out;
+ }
+
+ *info_size = actual_info_size;
+ if (clear_exception)
+ *exception_status_ptr &= ~KFD_EC_MASK(exception_code);
+out:
+ mutex_unlock(&target->event_mutex);
+ return r;
+}
+
+int kfd_dbg_trap_device_snapshot(struct kfd_process *target,
+ uint64_t exception_clear_mask,
+ void __user *user_info,
+ uint32_t *number_of_device_infos,
+ uint32_t *entry_size)
+{
+ struct kfd_dbg_device_info_entry device_info;
+ uint32_t tmp_entry_size, tmp_num_devices;
+ int i, r = 0;
+
+ if (!(target && user_info && number_of_device_infos && entry_size))
+ return -EINVAL;
+
+ tmp_entry_size = *entry_size;
+
+ tmp_num_devices = min_t(size_t, *number_of_device_infos, target->n_pdds);
+ *number_of_device_infos = target->n_pdds;
+ *entry_size = min_t(size_t, *entry_size, sizeof(device_info));
+
+ if (!tmp_num_devices)
+ return 0;
+
+ memset(&device_info, 0, sizeof(device_info));
+
+ mutex_lock(&target->event_mutex);
+
+ /* Run over all pdd of the process */
+ for (i = 0; i < tmp_num_devices; i++) {
+ struct kfd_process_device *pdd = target->pdds[i];
+ struct kfd_topology_device *topo_dev = kfd_topology_device_by_id(pdd->dev->id);
+
+ device_info.gpu_id = pdd->dev->id;
+ device_info.exception_status = pdd->exception_status;
+ device_info.lds_base = pdd->lds_base;
+ device_info.lds_limit = pdd->lds_limit;
+ device_info.scratch_base = pdd->scratch_base;
+ device_info.scratch_limit = pdd->scratch_limit;
+ device_info.gpuvm_base = pdd->gpuvm_base;
+ device_info.gpuvm_limit = pdd->gpuvm_limit;
+ device_info.location_id = topo_dev->node_props.location_id;
+ device_info.vendor_id = topo_dev->node_props.vendor_id;
+ device_info.device_id = topo_dev->node_props.device_id;
+ device_info.revision_id = pdd->dev->adev->pdev->revision;
+ device_info.subsystem_vendor_id = pdd->dev->adev->pdev->subsystem_vendor;
+ device_info.subsystem_device_id = pdd->dev->adev->pdev->subsystem_device;
+ device_info.fw_version = pdd->dev->kfd->mec_fw_version;
+ device_info.gfx_target_version =
+ topo_dev->node_props.gfx_target_version;
+ device_info.simd_count = topo_dev->node_props.simd_count;
+ device_info.max_waves_per_simd =
+ topo_dev->node_props.max_waves_per_simd;
+ device_info.array_count = topo_dev->node_props.array_count;
+ device_info.simd_arrays_per_engine =
+ topo_dev->node_props.simd_arrays_per_engine;
+ device_info.num_xcc = NUM_XCC(pdd->dev->xcc_mask);
+ device_info.capability = topo_dev->node_props.capability;
+ device_info.debug_prop = topo_dev->node_props.debug_prop;
+
+ if (exception_clear_mask)
+ pdd->exception_status &= ~exception_clear_mask;
+
+ if (copy_to_user(user_info, &device_info, *entry_size)) {
+ r = -EFAULT;
+ break;
+ }
+
+ user_info += tmp_entry_size;
+ }
+
+ mutex_unlock(&target->event_mutex);
+
+ return r;
+}
+
+void kfd_dbg_set_enabled_debug_exception_mask(struct kfd_process *target,
+ uint64_t exception_set_mask)
+{
+ uint64_t found_mask = 0;
+ struct process_queue_manager *pqm;
+ struct process_queue_node *pqn;
+ static const char write_data = '.';
+ loff_t pos = 0;
+ int i;
+
+ mutex_lock(&target->event_mutex);
+
+ found_mask |= target->exception_status;
+
+ pqm = &target->pqm;
+ list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
+ if (!pqn->q)
+ continue;
+
+ found_mask |= pqn->q->properties.exception_status;
+ }
+
+ for (i = 0; i < target->n_pdds; i++) {
+ struct kfd_process_device *pdd = target->pdds[i];
+
+ found_mask |= pdd->exception_status;
+ }
+
+ if (exception_set_mask & found_mask)
+ kernel_write(target->dbg_ev_file, &write_data, 1, &pos);
+
+ target->exception_enable_mask = exception_set_mask;
+
+ mutex_unlock(&target->event_mutex);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
new file mode 100644
index 000000000000..27aa1a5b120f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef KFD_DEBUG_EVENTS_H_INCLUDED
+#define KFD_DEBUG_EVENTS_H_INCLUDED
+
+#include "kfd_priv.h"
+
+void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind_count);
+int kfd_dbg_trap_activate(struct kfd_process *target);
+int kfd_dbg_ev_query_debug_event(struct kfd_process *process,
+ unsigned int *queue_id,
+ unsigned int *gpu_id,
+ uint64_t exception_clear_mask,
+ uint64_t *event_status);
+bool kfd_set_dbg_ev_from_interrupt(struct kfd_node *dev,
+ unsigned int pasid,
+ uint32_t doorbell_id,
+ uint64_t trap_mask,
+ void *exception_data,
+ size_t exception_data_size);
+bool kfd_dbg_ev_raise(uint64_t event_mask,
+ struct kfd_process *process, struct kfd_node *dev,
+ unsigned int source_id, bool use_worker,
+ void *exception_data,
+ size_t exception_data_size);
+int kfd_dbg_trap_disable(struct kfd_process *target);
+int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd,
+ void __user *runtime_info,
+ uint32_t *runtime_info_size);
+int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t *trap_mask_supported);
+int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target,
+ uint8_t wave_launch_mode);
+int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd,
+ uint32_t watch_id);
+int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t *watch_id,
+ uint32_t watch_mode);
+int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags);
+int kfd_dbg_trap_query_exception_info(struct kfd_process *target,
+ uint32_t source_id,
+ uint32_t exception_code,
+ bool clear_exception,
+ void __user *info,
+ uint32_t *info_size);
+int kfd_dbg_send_exception_to_runtime(struct kfd_process *p,
+ unsigned int dev_id,
+ unsigned int queue_id,
+ uint64_t error_reason);
+
+static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_node *dev)
+{
+ return (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0) ||
+ KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0));
+}
+
+void debug_event_write_work_handler(struct work_struct *work);
+int kfd_dbg_trap_device_snapshot(struct kfd_process *target,
+ uint64_t exception_clear_mask,
+ void __user *user_info,
+ uint32_t *number_of_device_infos,
+ uint32_t *entry_size);
+
+void kfd_dbg_set_enabled_debug_exception_mask(struct kfd_process *target,
+ uint64_t exception_set_mask);
+/*
+ * If GFX off is enabled, chips that do not support RLC restore for the debug
+ * registers will disable GFX off temporarily for the entire debug session.
+ * See disable_on_trap_action_entry and enable_on_trap_action_exit for details.
+ */
+static inline bool kfd_dbg_is_rlc_restore_supported(struct kfd_node *dev)
+{
+ return !(KFD_GC_VERSION(dev) == IP_VERSION(10, 1, 10) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(10, 1, 1));
+}
+
+static inline bool kfd_dbg_has_cwsr_workaround(struct kfd_node *dev)
+{
+ return KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) &&
+ KFD_GC_VERSION(dev) <= IP_VERSION(11, 0, 3);
+}
+
+static inline bool kfd_dbg_has_gws_support(struct kfd_node *dev)
+{
+ if ((KFD_GC_VERSION(dev) == IP_VERSION(9, 0, 1)
+ && dev->kfd->mec2_fw_version < 0x81b6) ||
+ (KFD_GC_VERSION(dev) >= IP_VERSION(9, 1, 0)
+ && KFD_GC_VERSION(dev) <= IP_VERSION(9, 2, 2)
+ && dev->kfd->mec2_fw_version < 0x1b6) ||
+ (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0)
+ && dev->kfd->mec2_fw_version < 0x1b6) ||
+ (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1)
+ && dev->kfd->mec2_fw_version < 0x30) ||
+ (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) &&
+ KFD_GC_VERSION(dev) < IP_VERSION(12, 0, 0)))
+ return false;
+
+ /* Assume debugging and cooperative launch supported otherwise. */
+ return true;
+}
+
+int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en);
+
+static inline bool kfd_dbg_has_ttmps_always_setup(struct kfd_node *dev)
+{
+ return (KFD_GC_VERSION(dev) < IP_VERSION(11, 0, 0) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 2)) ||
+ (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) &&
+ KFD_GC_VERSION(dev) < IP_VERSION(12, 0, 0) &&
+ (dev->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 70) ||
+ (KFD_GC_VERSION(dev) >= IP_VERSION(12, 0, 0));
+}
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
index 673d5e34f213..9bde2c64540f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2016-2017 Advanced Micro Devices, Inc.
+ * Copyright 2016-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -26,6 +27,16 @@
#include "kfd_priv.h"
static struct dentry *debugfs_root;
+static struct dentry *debugfs_proc;
+static struct list_head procs;
+
+struct debugfs_proc_entry {
+ struct list_head list;
+ struct dentry *proc_dentry;
+ pid_t pid;
+};
+
+#define MAX_DEBUGFS_FILENAME_LEN 32
static int kfd_debugfs_open(struct inode *inode, struct file *file)
{
@@ -35,14 +46,14 @@ static int kfd_debugfs_open(struct inode *inode, struct file *file)
}
static int kfd_debugfs_hang_hws_read(struct seq_file *m, void *data)
{
- seq_printf(m, "echo gpu_id > hang_hws\n");
+ seq_puts(m, "echo gpu_id > hang_hws\n");
return 0;
}
static ssize_t kfd_debugfs_hang_hws_write(struct file *file,
const char __user *user_buf, size_t size, loff_t *ppos)
{
- struct kfd_dev *dev;
+ struct kfd_node *dev;
char tmp[16];
uint32_t gpu_id;
int ret = -EINVAL;
@@ -91,6 +102,8 @@ static const struct file_operations kfd_debugfs_hang_hws_fops = {
void kfd_debugfs_init(void)
{
debugfs_root = debugfs_create_dir("kfd", NULL);
+ debugfs_proc = debugfs_create_dir("proc", debugfs_root);
+ INIT_LIST_HEAD(&procs);
debugfs_create_file("mqds", S_IFREG | 0444, debugfs_root,
kfd_debugfs_mqds_by_process, &kfd_debugfs_fops);
@@ -100,9 +113,75 @@ void kfd_debugfs_init(void)
kfd_debugfs_rls_by_device, &kfd_debugfs_fops);
debugfs_create_file("hang_hws", S_IFREG | 0200, debugfs_root,
kfd_debugfs_hang_hws_read, &kfd_debugfs_hang_hws_fops);
+ debugfs_create_file("mem_limit", S_IFREG | 0200, debugfs_root,
+ kfd_debugfs_kfd_mem_limits, &kfd_debugfs_fops);
}
void kfd_debugfs_fini(void)
{
+ debugfs_remove_recursive(debugfs_proc);
debugfs_remove_recursive(debugfs_root);
}
+
+static ssize_t kfd_debugfs_pasid_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct kfd_process_device *pdd = file_inode(file)->i_private;
+ char tmp[32];
+ int len;
+
+ len = snprintf(tmp, sizeof(tmp), "%u\n", pdd->pasid);
+
+ return simple_read_from_buffer(buf, count, ppos, tmp, len);
+}
+
+static const struct file_operations kfd_debugfs_pasid_fops = {
+ .owner = THIS_MODULE,
+ .read = kfd_debugfs_pasid_read,
+};
+
+void kfd_debugfs_add_process(struct kfd_process *p)
+{
+ int i;
+ char name[MAX_DEBUGFS_FILENAME_LEN];
+ struct debugfs_proc_entry *entry;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return;
+
+ list_add(&entry->list, &procs);
+ entry->pid = p->lead_thread->pid;
+ snprintf(name, MAX_DEBUGFS_FILENAME_LEN, "%d",
+ (int)entry->pid);
+ entry->proc_dentry = debugfs_create_dir(name, debugfs_proc);
+
+ /* Create debugfs files for each GPU:
+ * - proc/<pid>/pasid_<gpuid>
+ */
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ snprintf(name, MAX_DEBUGFS_FILENAME_LEN, "pasid_%u",
+ pdd->dev->id);
+ debugfs_create_file((const char *)name, S_IFREG | 0444,
+ entry->proc_dentry, pdd,
+ &kfd_debugfs_pasid_fops);
+ }
+}
+
+void kfd_debugfs_remove_process(struct kfd_process *p)
+{
+ struct debugfs_proc_entry *entry, *next;
+
+ mutex_lock(&kfd_processes_mutex);
+ list_for_each_entry_safe(entry, next, &procs, list) {
+ if (entry->pid != p->lead_thread->pid)
+ continue;
+
+ debugfs_remove_recursive(entry->proc_dentry);
+ list_del(&entry->list);
+ kfree(entry);
+ }
+ mutex_unlock(&kfd_processes_mutex);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 3b119db16003..e9cfb80bd436 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -28,11 +29,12 @@
#include "kfd_pm4_headers_vi.h"
#include "kfd_pm4_headers_aldebaran.h"
#include "cwsr_trap_handler.h"
-#include "kfd_iommu.h"
#include "amdgpu_amdkfd.h"
#include "kfd_smi_events.h"
+#include "kfd_svm.h"
#include "kfd_migrate.h"
#include "amdgpu.h"
+#include "amdgpu_xcp.h"
#define MQD_SIZE_ALIGNED 768
@@ -41,7 +43,7 @@
* once locked, kfd driver will stop any further GPU execution.
* create process (open) will return -EAGAIN.
*/
-static atomic_t kfd_locked = ATOMIC_INIT(0);
+static int kfd_locked;
#ifdef CONFIG_DRM_AMDGPU_CIK
extern const struct kfd2kgd_calls gfx_v7_kfd2kgd;
@@ -50,773 +52,433 @@ extern const struct kfd2kgd_calls gfx_v8_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v9_kfd2kgd;
extern const struct kfd2kgd_calls arcturus_kfd2kgd;
extern const struct kfd2kgd_calls aldebaran_kfd2kgd;
+extern const struct kfd2kgd_calls gc_9_4_3_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v10_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd;
-
-#ifdef KFD_SUPPORT_IOMMU_V2
-static const struct kfd_device_info kaveri_device_info = {
- .asic_family = CHIP_KAVERI,
- .asic_name = "kaveri",
- .gfx_target_version = 70000,
- .max_pasid_bits = 16,
- /* max num of queues for KV.TODO should be a dynamic value */
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = false,
- .needs_iommu_device = true,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info carrizo_device_info = {
- .asic_family = CHIP_CARRIZO,
- .asic_name = "carrizo",
- .gfx_target_version = 80001,
- .max_pasid_bits = 16,
- /* max num of queues for CZ.TODO should be a dynamic value */
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = true,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info raven_device_info = {
- .asic_family = CHIP_RAVEN,
- .asic_name = "raven",
- .gfx_target_version = 90002,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = true,
- .needs_pci_atomics = true,
- .num_sdma_engines = 1,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-#endif
-
-#ifdef CONFIG_DRM_AMDGPU_CIK
-static const struct kfd_device_info hawaii_device_info = {
- .asic_family = CHIP_HAWAII,
- .asic_name = "hawaii",
- .gfx_target_version = 70001,
- .max_pasid_bits = 16,
- /* max num of queues for KV.TODO should be a dynamic value */
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = false,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-#endif
-
-static const struct kfd_device_info tonga_device_info = {
- .asic_family = CHIP_TONGA,
- .asic_name = "tonga",
- .gfx_target_version = 80002,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = false,
- .needs_iommu_device = false,
- .needs_pci_atomics = true,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info fiji_device_info = {
- .asic_family = CHIP_FIJI,
- .asic_name = "fiji",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = true,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info fiji_vf_device_info = {
- .asic_family = CHIP_FIJI,
- .asic_name = "fiji",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-
-static const struct kfd_device_info polaris10_device_info = {
- .asic_family = CHIP_POLARIS10,
- .asic_name = "polaris10",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = true,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info polaris10_vf_device_info = {
- .asic_family = CHIP_POLARIS10,
- .asic_name = "polaris10",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info polaris11_device_info = {
- .asic_family = CHIP_POLARIS11,
- .asic_name = "polaris11",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = true,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info polaris12_device_info = {
- .asic_family = CHIP_POLARIS12,
- .asic_name = "polaris12",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = true,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info vegam_device_info = {
- .asic_family = CHIP_VEGAM,
- .asic_name = "vegam",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = true,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info vega10_device_info = {
- .asic_family = CHIP_VEGA10,
- .asic_name = "vega10",
- .gfx_target_version = 90000,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info vega10_vf_device_info = {
- .asic_family = CHIP_VEGA10,
- .asic_name = "vega10",
- .gfx_target_version = 90000,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info vega12_device_info = {
- .asic_family = CHIP_VEGA12,
- .asic_name = "vega12",
- .gfx_target_version = 90004,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info vega20_device_info = {
- .asic_family = CHIP_VEGA20,
- .asic_name = "vega20",
- .gfx_target_version = 90006,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info arcturus_device_info = {
- .asic_family = CHIP_ARCTURUS,
- .asic_name = "arcturus",
- .gfx_target_version = 90008,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 6,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info aldebaran_device_info = {
- .asic_family = CHIP_ALDEBARAN,
- .asic_name = "aldebaran",
- .gfx_target_version = 90010,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 3,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info renoir_device_info = {
- .asic_family = CHIP_RENOIR,
- .asic_name = "renoir",
- .gfx_target_version = 90012,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 1,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info navi10_device_info = {
- .asic_family = CHIP_NAVI10,
- .asic_name = "navi10",
- .gfx_target_version = 100100,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 145,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info navi12_device_info = {
- .asic_family = CHIP_NAVI12,
- .asic_name = "navi12",
- .gfx_target_version = 100101,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 145,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info navi14_device_info = {
- .asic_family = CHIP_NAVI14,
- .asic_name = "navi14",
- .gfx_target_version = 100102,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 145,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info sienna_cichlid_device_info = {
- .asic_family = CHIP_SIENNA_CICHLID,
- .asic_name = "sienna_cichlid",
- .gfx_target_version = 100300,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 92,
- .num_sdma_engines = 4,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info navy_flounder_device_info = {
- .asic_family = CHIP_NAVY_FLOUNDER,
- .asic_name = "navy_flounder",
- .gfx_target_version = 100301,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 92,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info vangogh_device_info = {
- .asic_family = CHIP_VANGOGH,
- .asic_name = "vangogh",
- .gfx_target_version = 100303,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 92,
- .num_sdma_engines = 1,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info dimgrey_cavefish_device_info = {
- .asic_family = CHIP_DIMGREY_CAVEFISH,
- .asic_name = "dimgrey_cavefish",
- .gfx_target_version = 100302,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 92,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info beige_goby_device_info = {
- .asic_family = CHIP_BEIGE_GOBY,
- .asic_name = "beige_goby",
- .gfx_target_version = 100304,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 92,
- .num_sdma_engines = 1,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info yellow_carp_device_info = {
- .asic_family = CHIP_YELLOW_CARP,
- .asic_name = "yellow_carp",
- .gfx_target_version = 100305,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 92,
- .num_sdma_engines = 1,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info cyan_skillfish_device_info = {
- .asic_family = CHIP_CYAN_SKILLFISH,
- .asic_name = "cyan_skillfish",
- .gfx_target_version = 100103,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 8,
-};
+extern const struct kfd2kgd_calls gfx_v11_kfd2kgd;
+extern const struct kfd2kgd_calls gfx_v12_kfd2kgd;
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
unsigned int chunk_size);
static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
-static int kfd_resume(struct kfd_dev *kfd);
+static int kfd_resume(struct kfd_node *kfd);
-struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf)
+static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
{
- struct kfd_dev *kfd;
- const struct kfd_device_info *device_info;
- const struct kfd2kgd_calls *f2g;
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
- struct pci_dev *pdev = adev->pdev;
+ uint32_t sdma_version = amdgpu_ip_version(kfd->adev, SDMA0_HWIP, 0);
+
+ switch (sdma_version) {
+ case IP_VERSION(4, 0, 0):/* VEGA10 */
+ case IP_VERSION(4, 0, 1):/* VEGA12 */
+ case IP_VERSION(4, 1, 0):/* RAVEN */
+ case IP_VERSION(4, 1, 1):/* RAVEN */
+ case IP_VERSION(4, 1, 2):/* RENOIR */
+ case IP_VERSION(5, 2, 1):/* VANGOGH */
+ case IP_VERSION(5, 2, 3):/* YELLOW_CARP */
+ case IP_VERSION(5, 2, 6):/* GC 10.3.6 */
+ case IP_VERSION(5, 2, 7):/* GC 10.3.7 */
+ kfd->device_info.num_sdma_queues_per_engine = 2;
+ break;
+ case IP_VERSION(4, 2, 0):/* VEGA20 */
+ case IP_VERSION(4, 2, 2):/* ARCTURUS */
+ case IP_VERSION(4, 4, 0):/* ALDEBARAN */
+ case IP_VERSION(4, 4, 2):
+ case IP_VERSION(4, 4, 5):
+ case IP_VERSION(4, 4, 4):
+ case IP_VERSION(5, 0, 0):/* NAVI10 */
+ case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
+ case IP_VERSION(5, 0, 2):/* NAVI14 */
+ case IP_VERSION(5, 0, 5):/* NAVI12 */
+ case IP_VERSION(5, 2, 0):/* SIENNA_CICHLID */
+ case IP_VERSION(5, 2, 2):/* NAVY_FLOUNDER */
+ case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */
+ case IP_VERSION(5, 2, 5):/* BEIGE_GOBY */
+ case IP_VERSION(6, 0, 0):
+ case IP_VERSION(6, 0, 1):
+ case IP_VERSION(6, 0, 2):
+ case IP_VERSION(6, 0, 3):
+ case IP_VERSION(6, 1, 0):
+ case IP_VERSION(6, 1, 1):
+ case IP_VERSION(6, 1, 2):
+ case IP_VERSION(6, 1, 3):
+ case IP_VERSION(7, 0, 0):
+ case IP_VERSION(7, 0, 1):
+ kfd->device_info.num_sdma_queues_per_engine = 8;
+ break;
+ default:
+ dev_warn(kfd_device,
+ "Default sdma queue per engine(8) is set due to mismatch of sdma ip block(SDMA_HWIP:0x%x).\n",
+ sdma_version);
+ kfd->device_info.num_sdma_queues_per_engine = 8;
+ }
+
+ bitmap_zero(kfd->device_info.reserved_sdma_queues_bitmap, KFD_MAX_SDMA_QUEUES);
+
+ switch (sdma_version) {
+ case IP_VERSION(6, 0, 0):
+ case IP_VERSION(6, 0, 1):
+ case IP_VERSION(6, 0, 2):
+ case IP_VERSION(6, 0, 3):
+ case IP_VERSION(6, 1, 0):
+ case IP_VERSION(6, 1, 1):
+ case IP_VERSION(6, 1, 2):
+ case IP_VERSION(6, 1, 3):
+ case IP_VERSION(7, 0, 0):
+ case IP_VERSION(7, 0, 1):
+ /* Reserve 1 for paging and 1 for gfx */
+ kfd->device_info.num_reserved_sdma_queues_per_engine = 2;
+ /* BIT(0)=engine-0 queue-0; BIT(1)=engine-1 queue-0; BIT(2)=engine-0 queue-1; ... */
+ bitmap_set(kfd->device_info.reserved_sdma_queues_bitmap, 0,
+ kfd->adev->sdma.num_instances *
+ kfd->device_info.num_reserved_sdma_queues_per_engine);
+ break;
+ default:
+ break;
+ }
+}
+
+static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
+{
+ uint32_t gc_version = KFD_GC_VERSION(kfd);
+
+ switch (gc_version) {
+ case IP_VERSION(9, 0, 1): /* VEGA10 */
+ case IP_VERSION(9, 1, 0): /* RAVEN */
+ case IP_VERSION(9, 2, 1): /* VEGA12 */
+ case IP_VERSION(9, 2, 2): /* RAVEN */
+ case IP_VERSION(9, 3, 0): /* RENOIR */
+ case IP_VERSION(9, 4, 0): /* VEGA20 */
+ case IP_VERSION(9, 4, 1): /* ARCTURUS */
+ case IP_VERSION(9, 4, 2): /* ALDEBARAN */
+ kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
+ break;
+ case IP_VERSION(9, 4, 3): /* GC 9.4.3 */
+ case IP_VERSION(9, 4, 4): /* GC 9.4.4 */
+ case IP_VERSION(9, 5, 0): /* GC 9.5.0 */
+ kfd->device_info.event_interrupt_class =
+ &event_interrupt_class_v9_4_3;
+ break;
+ case IP_VERSION(10, 3, 1): /* VANGOGH */
+ case IP_VERSION(10, 3, 3): /* YELLOW_CARP */
+ case IP_VERSION(10, 3, 6): /* GC 10.3.6 */
+ case IP_VERSION(10, 3, 7): /* GC 10.3.7 */
+ case IP_VERSION(10, 1, 3): /* CYAN_SKILLFISH */
+ case IP_VERSION(10, 1, 4):
+ case IP_VERSION(10, 1, 10): /* NAVI10 */
+ case IP_VERSION(10, 1, 2): /* NAVI12 */
+ case IP_VERSION(10, 1, 1): /* NAVI14 */
+ case IP_VERSION(10, 3, 0): /* SIENNA_CICHLID */
+ case IP_VERSION(10, 3, 2): /* NAVY_FLOUNDER */
+ case IP_VERSION(10, 3, 4): /* DIMGREY_CAVEFISH */
+ case IP_VERSION(10, 3, 5): /* BEIGE_GOBY */
+ kfd->device_info.event_interrupt_class = &event_interrupt_class_v10;
+ break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ kfd->device_info.event_interrupt_class = &event_interrupt_class_v11;
+ break;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ /* GFX12_TODO: Change to v12 version. */
+ kfd->device_info.event_interrupt_class = &event_interrupt_class_v11;
+ break;
+ default:
+ dev_warn(kfd_device, "v9 event interrupt handler is set due to "
+ "mismatch of gc ip block(GC_HWIP:0x%x).\n", gc_version);
+ kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
+ }
+}
+
+static void kfd_device_info_init(struct kfd_dev *kfd,
+ bool vf, uint32_t gfx_target_version)
+{
+ uint32_t gc_version = KFD_GC_VERSION(kfd);
+ uint32_t asic_type = kfd->adev->asic_type;
+
+ kfd->device_info.max_pasid_bits = 16;
+ kfd->device_info.max_no_of_hqd = 24;
+ kfd->device_info.num_of_watch_points = 4;
+ kfd->device_info.mqd_size_aligned = MQD_SIZE_ALIGNED;
+ kfd->device_info.gfx_target_version = gfx_target_version;
+
+ if (KFD_IS_SOC15(kfd)) {
+ kfd->device_info.doorbell_size = 8;
+ kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t);
+ kfd->device_info.supports_cwsr = true;
+
+ kfd_device_info_set_sdma_info(kfd);
+
+ kfd_device_info_set_event_interrupt_class(kfd);
+
+ if (gc_version < IP_VERSION(11, 0, 0)) {
+ /* Navi2x+, Navi1x+ */
+ if (gc_version == IP_VERSION(10, 3, 6))
+ kfd->device_info.no_atomic_fw_version = 14;
+ else if (gc_version == IP_VERSION(10, 3, 7))
+ kfd->device_info.no_atomic_fw_version = 3;
+ else if (gc_version >= IP_VERSION(10, 3, 0))
+ kfd->device_info.no_atomic_fw_version = 92;
+ else if (gc_version >= IP_VERSION(10, 1, 1))
+ kfd->device_info.no_atomic_fw_version = 145;
+
+ /* Navi1x+ */
+ if (gc_version >= IP_VERSION(10, 1, 1))
+ kfd->device_info.needs_pci_atomics = true;
+ } else if (gc_version < IP_VERSION(12, 0, 0)) {
+ /*
+ * PCIe atomics support acknowledgment in GFX11 RS64 CPFW requires
+ * MEC version >= 509. Prior RS64 CPFW versions (and all F32) require
+ * PCIe atomics support.
+ */
+ kfd->device_info.needs_pci_atomics = true;
+ kfd->device_info.no_atomic_fw_version = kfd->adev->gfx.rs64_enable ? 509 : 0;
+ } else if (gc_version < IP_VERSION(13, 0, 0)) {
+ kfd->device_info.needs_pci_atomics = true;
+ kfd->device_info.no_atomic_fw_version = 2090;
+ } else {
+ kfd->device_info.needs_pci_atomics = true;
+ }
+ } else {
+ kfd->device_info.doorbell_size = 4;
+ kfd->device_info.ih_ring_entry_size = 4 * sizeof(uint32_t);
+ kfd->device_info.event_interrupt_class = &event_interrupt_class_cik;
+ kfd->device_info.num_sdma_queues_per_engine = 2;
+
+ if (asic_type != CHIP_KAVERI &&
+ asic_type != CHIP_HAWAII &&
+ asic_type != CHIP_TONGA)
+ kfd->device_info.supports_cwsr = true;
+
+ if (asic_type != CHIP_HAWAII && !vf)
+ kfd->device_info.needs_pci_atomics = true;
+ }
+}
+
+struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
+{
+ struct kfd_dev *kfd = NULL;
+ const struct kfd2kgd_calls *f2g = NULL;
+ uint32_t gfx_target_version = 0;
switch (adev->asic_type) {
-#ifdef KFD_SUPPORT_IOMMU_V2
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_KAVERI:
- if (vf)
- device_info = NULL;
- else
- device_info = &kaveri_device_info;
- f2g = &gfx_v7_kfd2kgd;
+ gfx_target_version = 70000;
+ if (!vf)
+ f2g = &gfx_v7_kfd2kgd;
break;
#endif
case CHIP_CARRIZO:
- if (vf)
- device_info = NULL;
- else
- device_info = &carrizo_device_info;
- f2g = &gfx_v8_kfd2kgd;
+ gfx_target_version = 80001;
+ if (!vf)
+ f2g = &gfx_v8_kfd2kgd;
break;
-#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_HAWAII:
- if (vf)
- device_info = NULL;
- else
- device_info = &hawaii_device_info;
- f2g = &gfx_v7_kfd2kgd;
+ gfx_target_version = 70001;
+ if (!amdgpu_exp_hw_support)
+ pr_info(
+ "KFD support on Hawaii is experimental. See modparam exp_hw_support\n"
+ );
+ else if (!vf)
+ f2g = &gfx_v7_kfd2kgd;
break;
#endif
case CHIP_TONGA:
- if (vf)
- device_info = NULL;
- else
- device_info = &tonga_device_info;
- f2g = &gfx_v8_kfd2kgd;
+ gfx_target_version = 80002;
+ if (!vf)
+ f2g = &gfx_v8_kfd2kgd;
break;
case CHIP_FIJI:
- if (vf)
- device_info = &fiji_vf_device_info;
- else
- device_info = &fiji_device_info;
- f2g = &gfx_v8_kfd2kgd;
- break;
case CHIP_POLARIS10:
- if (vf)
- device_info = &polaris10_vf_device_info;
- else
- device_info = &polaris10_device_info;
+ gfx_target_version = 80003;
f2g = &gfx_v8_kfd2kgd;
break;
case CHIP_POLARIS11:
- if (vf)
- device_info = NULL;
- else
- device_info = &polaris11_device_info;
- f2g = &gfx_v8_kfd2kgd;
- break;
case CHIP_POLARIS12:
- if (vf)
- device_info = NULL;
- else
- device_info = &polaris12_device_info;
- f2g = &gfx_v8_kfd2kgd;
- break;
case CHIP_VEGAM:
- if (vf)
- device_info = NULL;
- else
- device_info = &vegam_device_info;
- f2g = &gfx_v8_kfd2kgd;
+ gfx_target_version = 80003;
+ if (!vf)
+ f2g = &gfx_v8_kfd2kgd;
break;
default:
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ /* Vega 10 */
case IP_VERSION(9, 0, 1):
- if (vf)
- device_info = &vega10_vf_device_info;
- else
- device_info = &vega10_device_info;
+ gfx_target_version = 90000;
f2g = &gfx_v9_kfd2kgd;
break;
-#ifdef KFD_SUPPORT_IOMMU_V2
+ /* Raven */
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 2, 2):
- if (vf)
- device_info = NULL;
- else
- device_info = &raven_device_info;
- f2g = &gfx_v9_kfd2kgd;
+ gfx_target_version = 90002;
+ if (!vf)
+ f2g = &gfx_v9_kfd2kgd;
break;
-#endif
+ /* Vega12 */
case IP_VERSION(9, 2, 1):
- if (vf)
- device_info = NULL;
- else
- device_info = &vega12_device_info;
- f2g = &gfx_v9_kfd2kgd;
+ gfx_target_version = 90004;
+ if (!vf)
+ f2g = &gfx_v9_kfd2kgd;
break;
+ /* Renoir */
case IP_VERSION(9, 3, 0):
- if (vf)
- device_info = NULL;
- else
- device_info = &renoir_device_info;
- f2g = &gfx_v9_kfd2kgd;
+ gfx_target_version = 90012;
+ if (!vf)
+ f2g = &gfx_v9_kfd2kgd;
break;
+ /* Vega20 */
case IP_VERSION(9, 4, 0):
- if (vf)
- device_info = NULL;
- else
- device_info = &vega20_device_info;
- f2g = &gfx_v9_kfd2kgd;
+ gfx_target_version = 90006;
+ if (!vf)
+ f2g = &gfx_v9_kfd2kgd;
break;
+ /* Arcturus */
case IP_VERSION(9, 4, 1):
- device_info = &arcturus_device_info;
+ gfx_target_version = 90008;
f2g = &arcturus_kfd2kgd;
break;
+ /* Aldebaran */
case IP_VERSION(9, 4, 2):
- device_info = &aldebaran_device_info;
+ gfx_target_version = 90010;
f2g = &aldebaran_kfd2kgd;
break;
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ gfx_target_version = 90402;
+ f2g = &gc_9_4_3_kfd2kgd;
+ break;
+ case IP_VERSION(9, 5, 0):
+ gfx_target_version = 90500;
+ f2g = &gc_9_4_3_kfd2kgd;
+ break;
+ /* Navi10 */
case IP_VERSION(10, 1, 10):
- if (vf)
- device_info = NULL;
- else
- device_info = &navi10_device_info;
- f2g = &gfx_v10_kfd2kgd;
+ gfx_target_version = 100100;
+ if (!vf)
+ f2g = &gfx_v10_kfd2kgd;
break;
+ /* Navi12 */
case IP_VERSION(10, 1, 2):
- device_info = &navi12_device_info;
+ gfx_target_version = 100101;
f2g = &gfx_v10_kfd2kgd;
break;
+ /* Navi14 */
case IP_VERSION(10, 1, 1):
- if (vf)
- device_info = NULL;
- else
- device_info = &navi14_device_info;
- f2g = &gfx_v10_kfd2kgd;
+ gfx_target_version = 100102;
+ if (!vf)
+ f2g = &gfx_v10_kfd2kgd;
break;
+ /* Cyan Skillfish */
case IP_VERSION(10, 1, 3):
- if (vf)
- device_info = NULL;
- else
- device_info = &cyan_skillfish_device_info;
- f2g = &gfx_v10_kfd2kgd;
+ case IP_VERSION(10, 1, 4):
+ gfx_target_version = 100103;
+ if (!vf)
+ f2g = &gfx_v10_kfd2kgd;
break;
+ /* Sienna Cichlid */
case IP_VERSION(10, 3, 0):
- device_info = &sienna_cichlid_device_info;
+ gfx_target_version = 100300;
f2g = &gfx_v10_3_kfd2kgd;
break;
+ /* Navy Flounder */
case IP_VERSION(10, 3, 2):
- device_info = &navy_flounder_device_info;
+ gfx_target_version = 100301;
f2g = &gfx_v10_3_kfd2kgd;
break;
+ /* Van Gogh */
case IP_VERSION(10, 3, 1):
- if (vf)
- device_info = NULL;
- else
- device_info = &vangogh_device_info;
- f2g = &gfx_v10_3_kfd2kgd;
+ gfx_target_version = 100303;
+ if (!vf)
+ f2g = &gfx_v10_3_kfd2kgd;
break;
+ /* Dimgrey Cavefish */
case IP_VERSION(10, 3, 4):
- device_info = &dimgrey_cavefish_device_info;
+ gfx_target_version = 100302;
f2g = &gfx_v10_3_kfd2kgd;
break;
+ /* Beige Goby */
case IP_VERSION(10, 3, 5):
- device_info = &beige_goby_device_info;
+ gfx_target_version = 100304;
f2g = &gfx_v10_3_kfd2kgd;
break;
+ /* Yellow Carp */
case IP_VERSION(10, 3, 3):
- if (vf)
- device_info = NULL;
- else
- device_info = &yellow_carp_device_info;
- f2g = &gfx_v10_3_kfd2kgd;
+ gfx_target_version = 100305;
+ if (!vf)
+ f2g = &gfx_v10_3_kfd2kgd;
+ break;
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 7):
+ gfx_target_version = 100306;
+ if (!vf)
+ f2g = &gfx_v10_3_kfd2kgd;
+ break;
+ case IP_VERSION(11, 0, 0):
+ gfx_target_version = 110000;
+ f2g = &gfx_v11_kfd2kgd;
+ break;
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ gfx_target_version = 110003;
+ f2g = &gfx_v11_kfd2kgd;
+ break;
+ case IP_VERSION(11, 0, 2):
+ gfx_target_version = 110002;
+ f2g = &gfx_v11_kfd2kgd;
+ break;
+ case IP_VERSION(11, 0, 3):
+ /* Note: Compiler version is 11.0.1 while HW version is 11.0.3 */
+ gfx_target_version = 110001;
+ f2g = &gfx_v11_kfd2kgd;
+ break;
+ case IP_VERSION(11, 5, 0):
+ gfx_target_version = 110500;
+ f2g = &gfx_v11_kfd2kgd;
+ break;
+ case IP_VERSION(11, 5, 1):
+ gfx_target_version = 110501;
+ f2g = &gfx_v11_kfd2kgd;
+ break;
+ case IP_VERSION(11, 5, 2):
+ gfx_target_version = 110502;
+ f2g = &gfx_v11_kfd2kgd;
+ break;
+ case IP_VERSION(11, 5, 3):
+ gfx_target_version = 110503;
+ f2g = &gfx_v11_kfd2kgd;
+ break;
+ case IP_VERSION(12, 0, 0):
+ gfx_target_version = 120000;
+ f2g = &gfx_v12_kfd2kgd;
+ break;
+ case IP_VERSION(12, 0, 1):
+ gfx_target_version = 120001;
+ f2g = &gfx_v12_kfd2kgd;
break;
default:
- return NULL;
+ break;
}
break;
}
- if (!device_info || !f2g) {
- dev_err(kfd_device, "%s %s not supported in kfd\n",
- amdgpu_asic_name[adev->asic_type], vf ? "VF" : "");
+ if (!f2g) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0))
+ dev_info(kfd_device,
+ "GC IP %06x %s not supported in kfd\n",
+ amdgpu_ip_version(adev, GC_HWIP, 0),
+ vf ? "VF" : "");
+ else
+ dev_info(kfd_device, "%s %s not supported in kfd\n",
+ amdgpu_asic_name[adev->asic_type], vf ? "VF" : "");
return NULL;
}
@@ -824,145 +486,315 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf)
if (!kfd)
return NULL;
- kfd->kgd = kgd;
- kfd->device_info = device_info;
- kfd->pdev = pdev;
+ kfd->adev = adev;
+ kfd_device_info_init(kfd, vf, gfx_target_version);
kfd->init_complete = false;
kfd->kfd2kgd = f2g;
atomic_set(&kfd->compute_profile, 0);
mutex_init(&kfd->doorbell_mutex);
- memset(&kfd->doorbell_available_index, 0,
- sizeof(kfd->doorbell_available_index));
-
- atomic_set(&kfd->sram_ecc_flag, 0);
ida_init(&kfd->doorbell_ida);
+ atomic_set(&kfd->kfd_processes_count, 0);
return kfd;
}
static void kfd_cwsr_init(struct kfd_dev *kfd)
{
- if (cwsr_enable && kfd->device_info->supports_cwsr) {
- if (kfd->device_info->asic_family < CHIP_VEGA10) {
- BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
+ if (cwsr_enable && kfd->device_info.supports_cwsr) {
+ if (KFD_GC_VERSION(kfd) < IP_VERSION(9, 0, 1)) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex)
+ > KFD_CWSR_TMA_OFFSET);
kfd->cwsr_isa = cwsr_trap_gfx8_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
- } else if (kfd->device_info->asic_family == CHIP_ARCTURUS) {
- BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE);
+ } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex)
+ > KFD_CWSR_TMA_OFFSET);
kfd->cwsr_isa = cwsr_trap_arcturus_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex);
- } else if (kfd->device_info->asic_family == CHIP_ALDEBARAN) {
- BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) > PAGE_SIZE);
+ } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex)
+ > KFD_CWSR_TMA_OFFSET);
kfd->cwsr_isa = cwsr_trap_aldebaran_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex);
- } else if (kfd->device_info->asic_family < CHIP_NAVI10) {
- BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
+ } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4)) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_4_3_hex)
+ > KFD_CWSR_TMA_OFFSET);
+ kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex;
+ kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex);
+ } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 5, 0)) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_5_0_hex) > PAGE_SIZE);
+ kfd->cwsr_isa = cwsr_trap_gfx9_5_0_hex;
+ kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_5_0_hex);
+ } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex)
+ > KFD_CWSR_TMA_OFFSET);
kfd->cwsr_isa = cwsr_trap_gfx9_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
- } else if (kfd->device_info->asic_family < CHIP_SIENNA_CICHLID) {
- BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE);
+ } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 3, 0)) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex)
+ > KFD_CWSR_TMA_OFFSET);
kfd->cwsr_isa = cwsr_trap_nv1x_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex);
- } else {
- BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE);
+ } else if (KFD_GC_VERSION(kfd) < IP_VERSION(11, 0, 0)) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex)
+ > KFD_CWSR_TMA_OFFSET);
kfd->cwsr_isa = cwsr_trap_gfx10_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex);
+ } else if (KFD_GC_VERSION(kfd) < IP_VERSION(12, 0, 0)) {
+ /* The gfx11 cwsr trap handler must fit inside a single
+ page. */
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx11_hex) > PAGE_SIZE);
+ kfd->cwsr_isa = cwsr_trap_gfx11_hex;
+ kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx11_hex);
+ } else {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx12_hex)
+ > KFD_CWSR_TMA_OFFSET);
+ kfd->cwsr_isa = cwsr_trap_gfx12_hex;
+ kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx12_hex);
}
kfd->cwsr_enabled = true;
}
}
-static int kfd_gws_init(struct kfd_dev *kfd)
+static int kfd_gws_init(struct kfd_node *node)
{
int ret = 0;
+ struct kfd_dev *kfd = node->kfd;
+ uint32_t mes_rev = node->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
- if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
+ if (node->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
return 0;
- if (hws_gws_support
- || (kfd->device_info->asic_family == CHIP_VEGA10
- && kfd->mec2_fw_version >= 0x81b3)
- || (kfd->device_info->asic_family >= CHIP_VEGA12
- && kfd->device_info->asic_family <= CHIP_RAVEN
- && kfd->mec2_fw_version >= 0x1b3)
- || (kfd->device_info->asic_family == CHIP_ARCTURUS
- && kfd->mec2_fw_version >= 0x30)
- || (kfd->device_info->asic_family == CHIP_ALDEBARAN
- && kfd->mec2_fw_version >= 0x28))
- ret = amdgpu_amdkfd_alloc_gws(kfd->kgd,
- amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws);
+ if (hws_gws_support || (KFD_IS_SOC15(node) &&
+ ((KFD_GC_VERSION(node) == IP_VERSION(9, 0, 1)
+ && kfd->mec2_fw_version >= 0x81b3) ||
+ (KFD_GC_VERSION(node) <= IP_VERSION(9, 4, 0)
+ && kfd->mec2_fw_version >= 0x1b3) ||
+ (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 1)
+ && kfd->mec2_fw_version >= 0x30) ||
+ (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 2)
+ && kfd->mec2_fw_version >= 0x28) ||
+ (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(node) == IP_VERSION(9, 4, 4)) ||
+ (KFD_GC_VERSION(node) == IP_VERSION(9, 5, 0)) ||
+ (KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0)
+ && KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0)
+ && kfd->mec2_fw_version >= 0x6b) ||
+ (KFD_GC_VERSION(node) >= IP_VERSION(11, 0, 0)
+ && KFD_GC_VERSION(node) < IP_VERSION(12, 0, 0)
+ && mes_rev >= 68) ||
+ (KFD_GC_VERSION(node) >= IP_VERSION(12, 0, 0))))) {
+ if (KFD_GC_VERSION(node) >= IP_VERSION(12, 0, 0))
+ node->adev->gds.gws_size = 64;
+ ret = amdgpu_amdkfd_alloc_gws(node->adev,
+ node->adev->gds.gws_size, &node->gws);
+ }
return ret;
}
-static void kfd_smi_init(struct kfd_dev *dev) {
+static void kfd_smi_init(struct kfd_node *dev)
+{
INIT_LIST_HEAD(&dev->smi_clients);
spin_lock_init(&dev->smi_lock);
}
+static int kfd_init_node(struct kfd_node *node)
+{
+ int err = -1;
+
+ if (kfd_interrupt_init(node)) {
+ dev_err(kfd_device, "Error initializing interrupts\n");
+ goto kfd_interrupt_error;
+ }
+
+ node->dqm = device_queue_manager_init(node);
+ if (!node->dqm) {
+ dev_err(kfd_device, "Error initializing queue manager\n");
+ goto device_queue_manager_error;
+ }
+
+ if (kfd_gws_init(node)) {
+ dev_err(kfd_device, "Could not allocate %d gws\n",
+ node->adev->gds.gws_size);
+ goto gws_error;
+ }
+
+ if (kfd_resume(node))
+ goto kfd_resume_error;
+
+ if (kfd_topology_add_device(node)) {
+ dev_err(kfd_device, "Error adding device to topology\n");
+ goto kfd_topology_add_device_error;
+ }
+
+ kfd_smi_init(node);
+
+ return 0;
+
+kfd_topology_add_device_error:
+kfd_resume_error:
+gws_error:
+ device_queue_manager_uninit(node->dqm);
+device_queue_manager_error:
+ kfd_interrupt_exit(node);
+kfd_interrupt_error:
+ if (node->gws)
+ amdgpu_amdkfd_free_gws(node->adev, node->gws);
+
+ /* Cleanup the node memory here */
+ kfree(node);
+ return err;
+}
+
+static void kfd_cleanup_nodes(struct kfd_dev *kfd, unsigned int num_nodes)
+{
+ struct kfd_node *knode;
+ unsigned int i;
+
+ /*
+ * flush_work ensures that there are no outstanding
+ * work-queue items that will access interrupt_ring. New work items
+ * can't be created because we stopped interrupt handling above.
+ */
+ flush_workqueue(kfd->ih_wq);
+ destroy_workqueue(kfd->ih_wq);
+
+ for (i = 0; i < num_nodes; i++) {
+ knode = kfd->nodes[i];
+ device_queue_manager_uninit(knode->dqm);
+ kfd_interrupt_exit(knode);
+ kfd_topology_remove_device(knode);
+ if (knode->gws)
+ amdgpu_amdkfd_free_gws(knode->adev, knode->gws);
+ kfree(knode);
+ kfd->nodes[i] = NULL;
+ }
+}
+
+static void kfd_setup_interrupt_bitmap(struct kfd_node *node,
+ unsigned int kfd_node_idx)
+{
+ struct amdgpu_device *adev = node->adev;
+ uint32_t xcc_mask = node->xcc_mask;
+ uint32_t xcc, mapped_xcc;
+ /*
+ * Interrupt bitmap is setup for processing interrupts from
+ * different XCDs and AIDs.
+ * Interrupt bitmap is defined as follows:
+ * 1. Bits 0-15 - correspond to the NodeId field.
+ * Each bit corresponds to NodeId number. For example, if
+ * a KFD node has interrupt bitmap set to 0x7, then this
+ * KFD node will process interrupts with NodeId = 0, 1 and 2
+ * in the IH cookie.
+ * 2. Bits 16-31 - unused.
+ *
+ * Please note that the kfd_node_idx argument passed to this
+ * function is not related to NodeId field received in the
+ * IH cookie.
+ *
+ * In CPX mode, a KFD node will process an interrupt if:
+ * - the Node Id matches the corresponding bit set in
+ * Bits 0-15.
+ * - AND VMID reported in the interrupt lies within the
+ * VMID range of the node.
+ */
+ for_each_inst(xcc, xcc_mask) {
+ mapped_xcc = GET_INST(GC, xcc);
+ node->interrupt_bitmap |= (mapped_xcc % 2 ? 5 : 3) << (4 * (mapped_xcc / 2));
+ }
+ dev_info(kfd_device, "Node: %d, interrupt_bitmap: %x\n", kfd_node_idx,
+ node->interrupt_bitmap);
+}
+
bool kgd2kfd_device_init(struct kfd_dev *kfd,
- struct drm_device *ddev,
const struct kgd2kfd_shared_resources *gpu_resources)
{
- unsigned int size, map_process_packet_size;
-
- kfd->ddev = ddev;
- kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
+ unsigned int size, map_process_packet_size, i;
+ struct kfd_node *node;
+ uint32_t first_vmid_kfd, last_vmid_kfd, vmid_num_kfd;
+ unsigned int max_proc_per_quantum;
+ int partition_mode;
+ int xcp_idx;
+
+ kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
KGD_ENGINE_MEC1);
- kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
+ kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
KGD_ENGINE_MEC2);
- kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
+ kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
KGD_ENGINE_SDMA1);
kfd->shared_resources = *gpu_resources;
- kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1;
- kfd->vm_info.last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
- kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd
- - kfd->vm_info.first_vmid_kfd + 1;
+ kfd->num_nodes = amdgpu_xcp_get_num_xcp(kfd->adev->xcp_mgr);
+
+ if (kfd->num_nodes == 0) {
+ dev_err(kfd_device,
+ "KFD num nodes cannot be 0, num_xcc_in_node: %d\n",
+ kfd->adev->gfx.num_xcc_per_xcp);
+ goto out;
+ }
/* Allow BIF to recode atomics to PCIe 3.0 AtomicOps.
* 32 and 64-bit requests are possible and must be
* supported.
*/
- kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->kgd);
+ kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->adev);
if (!kfd->pci_atomic_requested &&
- kfd->device_info->needs_pci_atomics &&
- (!kfd->device_info->no_atomic_fw_version ||
- kfd->mec_fw_version < kfd->device_info->no_atomic_fw_version)) {
+ kfd->device_info.needs_pci_atomics &&
+ (!kfd->device_info.no_atomic_fw_version ||
+ kfd->mec_fw_version < kfd->device_info.no_atomic_fw_version)) {
dev_info(kfd_device,
"skipped device %x:%x, PCI rejects atomics %d<%d\n",
- kfd->pdev->vendor, kfd->pdev->device,
+ kfd->adev->pdev->vendor, kfd->adev->pdev->device,
kfd->mec_fw_version,
- kfd->device_info->no_atomic_fw_version);
+ kfd->device_info.no_atomic_fw_version);
return false;
}
+ first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1;
+ last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
+ vmid_num_kfd = last_vmid_kfd - first_vmid_kfd + 1;
+
+ /* For multi-partition capable GPUs, we need special handling for VMIDs
+ * depending on partition mode.
+ * In CPX mode, the VMID range needs to be shared between XCDs.
+ * Additionally, there are 13 VMIDs (3-15) available for KFD. To
+ * divide them equally, we change starting VMID to 4 and not use
+ * VMID 3.
+ * If the VMID range changes for multi-partition capable GPUs, then
+ * this code MUST be revisited.
+ */
+ if (kfd->adev->xcp_mgr) {
+ partition_mode = amdgpu_xcp_query_partition_mode(kfd->adev->xcp_mgr,
+ AMDGPU_XCP_FL_LOCKED);
+ if (partition_mode == AMDGPU_CPX_PARTITION_MODE &&
+ kfd->num_nodes != 1) {
+ vmid_num_kfd /= 2;
+ first_vmid_kfd = last_vmid_kfd + 1 - vmid_num_kfd*2;
+ }
+ }
+
/* Verify module parameters regarding mapped process number*/
- if ((hws_max_conc_proc < 0)
- || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) {
- dev_err(kfd_device,
- "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n",
- hws_max_conc_proc, kfd->vm_info.vmid_num_kfd,
- kfd->vm_info.vmid_num_kfd);
- kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd;
- } else
- kfd->max_proc_per_quantum = hws_max_conc_proc;
+ if (hws_max_conc_proc >= 0)
+ max_proc_per_quantum = min((u32)hws_max_conc_proc, vmid_num_kfd);
+ else
+ max_proc_per_quantum = vmid_num_kfd;
/* calculate max size of mqds needed for queues */
size = max_num_of_queues_per_device *
- kfd->device_info->mqd_size_aligned;
+ kfd->device_info.mqd_size_aligned;
/*
* calculate max size of runlist packet.
* There can be only 2 packets at once
*/
- map_process_packet_size =
- kfd->device_info->asic_family == CHIP_ALDEBARAN ?
+ map_process_packet_size = KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2) ?
sizeof(struct pm4_mes_map_process_aldebaran) :
- sizeof(struct pm4_mes_map_process);
+ sizeof(struct pm4_mes_map_process);
size += (KFD_MAX_NUM_OF_PROCESSES * map_process_packet_size +
max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues)
+ sizeof(struct pm4_mes_runlist)) * 2;
@@ -974,7 +806,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
size += 512 * 1024;
if (amdgpu_amdkfd_alloc_gtt_mem(
- kfd->kgd, size, &kfd->gtt_mem,
+ kfd->adev, size, &kfd->gtt_mem,
&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
false)) {
dev_err(kfd_device, "Could not allocate %d bytes\n", size);
@@ -995,86 +827,118 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto kfd_doorbell_error;
}
- kfd->hive_id = amdgpu_amdkfd_get_hive_id(kfd->kgd);
-
- kfd->noretry = amdgpu_amdkfd_get_noretry(kfd->kgd);
+ if (amdgpu_use_xgmi_p2p)
+ kfd->hive_id = kfd->adev->gmc.xgmi.hive_id;
- if (kfd_interrupt_init(kfd)) {
- dev_err(kfd_device, "Error initializing interrupts\n");
- goto kfd_interrupt_error;
- }
+ /*
+ * For multi-partition capable GPUs, the KFD abstracts all partitions
+ * within a socket as xGMI connected in the topology so assign a unique
+ * hive id per device based on the pci device location if device is in
+ * PCIe mode.
+ */
+ if (!kfd->hive_id && kfd->num_nodes > 1)
+ kfd->hive_id = pci_dev_id(kfd->adev->pdev);
- kfd->dqm = device_queue_manager_init(kfd);
- if (!kfd->dqm) {
- dev_err(kfd_device, "Error initializing queue manager\n");
- goto device_queue_manager_error;
- }
+ kfd->noretry = kfd->adev->gmc.noretry;
- /* If supported on this device, allocate global GWS that is shared
- * by all KFD processes
- */
- if (kfd_gws_init(kfd)) {
- dev_err(kfd_device, "Could not allocate %d gws\n",
- amdgpu_amdkfd_get_num_gws(kfd->kgd));
- goto gws_error;
- }
+ kfd_cwsr_init(kfd);
- /* If CRAT is broken, won't set iommu enabled */
- kfd_double_confirm_iommu_support(kfd);
+ dev_info(kfd_device, "Total number of KFD nodes to be created: %d\n",
+ kfd->num_nodes);
+
+ /* Allocate the KFD nodes */
+ for (i = 0, xcp_idx = 0; i < kfd->num_nodes; i++) {
+ node = kzalloc(sizeof(struct kfd_node), GFP_KERNEL);
+ if (!node)
+ goto node_alloc_error;
+
+ node->node_id = i;
+ node->adev = kfd->adev;
+ node->kfd = kfd;
+ node->kfd2kgd = kfd->kfd2kgd;
+ node->vm_info.vmid_num_kfd = vmid_num_kfd;
+ node->xcp = amdgpu_get_next_xcp(kfd->adev->xcp_mgr, &xcp_idx);
+ /* TODO : Check if error handling is needed */
+ if (node->xcp) {
+ amdgpu_xcp_get_inst_details(node->xcp, AMDGPU_XCP_GFX,
+ &node->xcc_mask);
+ ++xcp_idx;
+ } else {
+ node->xcc_mask =
+ (1U << NUM_XCC(kfd->adev->gfx.xcc_mask)) - 1;
+ }
- if (kfd_iommu_device_init(kfd)) {
- kfd->use_iommu_v2 = false;
- dev_err(kfd_device, "Error initializing iommuv2\n");
- goto device_iommu_error;
- }
+ if (node->xcp) {
+ dev_info(kfd_device, "KFD node %d partition %d size %lldM\n",
+ node->node_id, node->xcp->mem_id,
+ KFD_XCP_MEMORY_SIZE(node->adev, node->node_id) >> 20);
+ }
- kfd_cwsr_init(kfd);
+ if (partition_mode == AMDGPU_CPX_PARTITION_MODE &&
+ kfd->num_nodes != 1) {
+ /* For multi-partition capable GPUs and CPX mode, first
+ * XCD gets VMID range 4-9 and second XCD gets VMID
+ * range 10-15.
+ */
+
+ node->vm_info.first_vmid_kfd = (i%2 == 0) ?
+ first_vmid_kfd :
+ first_vmid_kfd+vmid_num_kfd;
+ node->vm_info.last_vmid_kfd = (i%2 == 0) ?
+ last_vmid_kfd-vmid_num_kfd :
+ last_vmid_kfd;
+ node->compute_vmid_bitmap =
+ ((0x1 << (node->vm_info.last_vmid_kfd + 1)) - 1) -
+ ((0x1 << (node->vm_info.first_vmid_kfd)) - 1);
+ } else {
+ node->vm_info.first_vmid_kfd = first_vmid_kfd;
+ node->vm_info.last_vmid_kfd = last_vmid_kfd;
+ node->compute_vmid_bitmap =
+ gpu_resources->compute_vmid_bitmap;
+ }
+ node->max_proc_per_quantum = max_proc_per_quantum;
+ atomic_set(&node->sram_ecc_flag, 0);
- svm_migrate_init((struct amdgpu_device *)kfd->kgd);
+ amdgpu_amdkfd_get_local_mem_info(kfd->adev,
+ &node->local_mem_info, node->xcp);
- if(kgd2kfd_resume_iommu(kfd))
- goto device_iommu_error;
+ if (kfd->adev->xcp_mgr)
+ kfd_setup_interrupt_bitmap(node, i);
- if (kfd_resume(kfd))
- goto kfd_resume_error;
+ /* Initialize the KFD node */
+ if (kfd_init_node(node)) {
+ dev_err(kfd_device, "Error initializing KFD node\n");
+ goto node_init_error;
+ }
- kfd->dbgmgr = NULL;
+ spin_lock_init(&node->watch_points_lock);
- if (kfd_topology_add_device(kfd)) {
- dev_err(kfd_device, "Error adding device to topology\n");
- goto kfd_topology_add_device_error;
+ kfd->nodes[i] = node;
}
- kfd_smi_init(kfd);
+ svm_range_set_max_pages(kfd->adev);
kfd->init_complete = true;
- dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor,
- kfd->pdev->device);
+ dev_info(kfd_device, "added device %x:%x\n", kfd->adev->pdev->vendor,
+ kfd->adev->pdev->device);
pr_debug("Starting kfd with the following scheduling policy %d\n",
- kfd->dqm->sched_policy);
+ node->dqm->sched_policy);
goto out;
-kfd_topology_add_device_error:
-kfd_resume_error:
-device_iommu_error:
-gws_error:
- device_queue_manager_uninit(kfd->dqm);
-device_queue_manager_error:
- kfd_interrupt_exit(kfd);
-kfd_interrupt_error:
+node_init_error:
+node_alloc_error:
+ kfd_cleanup_nodes(kfd, i);
kfd_doorbell_fini(kfd);
kfd_doorbell_error:
kfd_gtt_sa_fini(kfd);
kfd_gtt_sa_init_error:
- amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
+ amdgpu_amdkfd_free_gtt_mem(kfd->adev, &kfd->gtt_mem);
alloc_gtt_mem_failure:
- if (kfd->gws)
- amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
dev_err(kfd_device,
"device %x:%x NOT added due to errors\n",
- kfd->pdev->vendor, kfd->pdev->device);
+ kfd->adev->pdev->vendor, kfd->adev->pdev->device);
out:
return kfd->init_complete;
}
@@ -1082,32 +946,37 @@ out:
void kgd2kfd_device_exit(struct kfd_dev *kfd)
{
if (kfd->init_complete) {
- device_queue_manager_uninit(kfd->dqm);
- kfd_interrupt_exit(kfd);
- kfd_topology_remove_device(kfd);
+ /* Cleanup KFD nodes */
+ kfd_cleanup_nodes(kfd, kfd->num_nodes);
+ /* Cleanup common/shared resources */
kfd_doorbell_fini(kfd);
ida_destroy(&kfd->doorbell_ida);
kfd_gtt_sa_fini(kfd);
- amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
- if (kfd->gws)
- amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
+ amdgpu_amdkfd_free_gtt_mem(kfd->adev, &kfd->gtt_mem);
}
kfree(kfd);
}
-int kgd2kfd_pre_reset(struct kfd_dev *kfd)
+int kgd2kfd_pre_reset(struct kfd_dev *kfd,
+ struct amdgpu_reset_context *reset_context)
{
+ struct kfd_node *node;
+ int i;
+
if (!kfd->init_complete)
return 0;
- kfd_smi_event_update_gpu_reset(kfd, false);
+ for (i = 0; i < kfd->num_nodes; i++) {
+ node = kfd->nodes[i];
+ kfd_smi_event_update_gpu_reset(node, false, reset_context);
+ }
- kfd->dqm->ops.pre_reset(kfd->dqm);
+ kgd2kfd_suspend(kfd, true);
- kgd2kfd_suspend(kfd, false);
+ for (i = 0; i < kfd->num_nodes; i++)
+ kfd_signal_reset_event(kfd->nodes[i]);
- kfd_signal_reset_event(kfd);
return 0;
}
@@ -1120,133 +989,177 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd)
int kgd2kfd_post_reset(struct kfd_dev *kfd)
{
int ret;
+ struct kfd_node *node;
+ int i;
if (!kfd->init_complete)
return 0;
- ret = kfd_resume(kfd);
- if (ret)
- return ret;
- atomic_dec(&kfd_locked);
+ for (i = 0; i < kfd->num_nodes; i++) {
+ ret = kfd_resume(kfd->nodes[i]);
+ if (ret)
+ return ret;
+ }
- atomic_set(&kfd->sram_ecc_flag, 0);
+ mutex_lock(&kfd_processes_mutex);
+ --kfd_locked;
+ mutex_unlock(&kfd_processes_mutex);
- kfd_smi_event_update_gpu_reset(kfd, true);
+ for (i = 0; i < kfd->num_nodes; i++) {
+ node = kfd->nodes[i];
+ atomic_set(&node->sram_ecc_flag, 0);
+ kfd_smi_event_update_gpu_reset(node, true, NULL);
+ }
return 0;
}
-bool kfd_is_locked(void)
+bool kfd_is_locked(struct kfd_dev *kfd)
{
- return (atomic_read(&kfd_locked) > 0);
+ uint8_t id = 0;
+ struct kfd_node *dev;
+
+ lockdep_assert_held(&kfd_processes_mutex);
+
+ /* check reset/suspend lock */
+ if (kfd_locked > 0)
+ return true;
+
+ if (kfd)
+ return kfd->kfd_dev_lock > 0;
+
+ /* check lock on all cgroup accessible devices */
+ while (kfd_topology_enum_kfd_devices(id++, &dev) == 0) {
+ if (!dev || kfd_devcgroup_check_permission(dev))
+ continue;
+
+ if (dev->kfd->kfd_dev_lock > 0)
+ return true;
+ }
+
+ return false;
}
-void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
+void kgd2kfd_suspend(struct kfd_dev *kfd, bool suspend_proc)
{
+ struct kfd_node *node;
+ int i;
+
if (!kfd->init_complete)
return;
- /* for runtime suspend, skip locking kfd */
- if (!run_pm) {
- /* For first KFD device suspend all the KFD processes */
- if (atomic_inc_return(&kfd_locked) == 1)
- kfd_suspend_all_processes();
- }
+ if (suspend_proc)
+ kgd2kfd_suspend_process(kfd);
- kfd->dqm->ops.stop(kfd->dqm);
- kfd_iommu_suspend(kfd);
+ for (i = 0; i < kfd->num_nodes; i++) {
+ node = kfd->nodes[i];
+ node->dqm->ops.stop(node->dqm);
+ }
}
-int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
+int kgd2kfd_resume(struct kfd_dev *kfd, bool resume_proc)
{
- int ret, count;
+ int ret, i;
if (!kfd->init_complete)
return 0;
- ret = kfd_resume(kfd);
- if (ret)
- return ret;
-
- /* for runtime resume, skip unlocking kfd */
- if (!run_pm) {
- count = atomic_dec_return(&kfd_locked);
- WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
- if (count == 0)
- ret = kfd_resume_all_processes();
+ for (i = 0; i < kfd->num_nodes; i++) {
+ ret = kfd_resume(kfd->nodes[i]);
+ if (ret)
+ return ret;
}
+ if (resume_proc)
+ ret = kgd2kfd_resume_process(kfd);
+
return ret;
}
-int kgd2kfd_resume_iommu(struct kfd_dev *kfd)
+void kgd2kfd_suspend_process(struct kfd_dev *kfd)
{
- int err = 0;
+ if (!kfd->init_complete)
+ return;
- err = kfd_iommu_resume(kfd);
- if (err)
- dev_err(kfd_device,
- "Failed to resume IOMMU for device %x:%x\n",
- kfd->pdev->vendor, kfd->pdev->device);
- return err;
+ mutex_lock(&kfd_processes_mutex);
+ /* For first KFD device suspend all the KFD processes */
+ if (++kfd_locked == 1)
+ kfd_suspend_all_processes();
+ mutex_unlock(&kfd_processes_mutex);
}
-static int kfd_resume(struct kfd_dev *kfd)
+int kgd2kfd_resume_process(struct kfd_dev *kfd)
{
- int err = 0;
+ int ret = 0;
- err = kfd->dqm->ops.start(kfd->dqm);
- if (err)
- dev_err(kfd_device,
- "Error starting queue manager for device %x:%x\n",
- kfd->pdev->vendor, kfd->pdev->device);
+ if (!kfd->init_complete)
+ return 0;
- return err;
+ mutex_lock(&kfd_processes_mutex);
+ if (--kfd_locked == 0)
+ ret = kfd_resume_all_processes();
+ WARN_ONCE(kfd_locked < 0, "KFD suspend / resume ref. error");
+ mutex_unlock(&kfd_processes_mutex);
+
+ return ret;
}
-static inline void kfd_queue_work(struct workqueue_struct *wq,
- struct work_struct *work)
+static int kfd_resume(struct kfd_node *node)
{
- int cpu, new_cpu;
+ int err = 0;
- cpu = new_cpu = smp_processor_id();
- do {
- new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids;
- if (cpu_to_node(new_cpu) == numa_node_id())
- break;
- } while (cpu != new_cpu);
+ err = node->dqm->ops.start(node->dqm);
+ if (err)
+ dev_err(kfd_device,
+ "Error starting queue manager for device %x:%x\n",
+ node->adev->pdev->vendor, node->adev->pdev->device);
- queue_work_on(new_cpu, wq, work);
+ return err;
}
/* This is called directly from KGD at ISR. */
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
{
- uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE];
+ uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE], i;
bool is_patched = false;
unsigned long flags;
+ struct kfd_node *node;
if (!kfd->init_complete)
return;
- if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) {
+ if (kfd->device_info.ih_ring_entry_size > sizeof(patched_ihre)) {
dev_err_once(kfd_device, "Ring entry too small\n");
return;
}
- spin_lock_irqsave(&kfd->interrupt_lock, flags);
-
- if (kfd->interrupts_active
- && interrupt_is_wanted(kfd, ih_ring_entry,
- patched_ihre, &is_patched)
- && enqueue_ih_ring_entry(kfd,
- is_patched ? patched_ihre : ih_ring_entry))
- kfd_queue_work(kfd->ih_wq, &kfd->interrupt_work);
+ for (i = 0; i < kfd->num_nodes; i++) {
+ /* Race if another thread in b/w
+ * kfd_cleanup_nodes and kfree(kfd),
+ * when kfd->nodes[i] = NULL
+ */
+ if (kfd->nodes[i])
+ node = kfd->nodes[i];
+ else
+ return;
+
+ spin_lock_irqsave(&node->interrupt_lock, flags);
+
+ if (node->interrupts_active
+ && interrupt_is_wanted(node, ih_ring_entry,
+ patched_ihre, &is_patched)
+ && enqueue_ih_ring_entry(node,
+ is_patched ? patched_ihre : ih_ring_entry)) {
+ queue_work(node->kfd->ih_wq, &node->interrupt_work);
+ spin_unlock_irqrestore(&node->interrupt_lock, flags);
+ return;
+ }
+ spin_unlock_irqrestore(&node->interrupt_lock, flags);
+ }
- spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
}
-int kgd2kfd_quiesce_mm(struct mm_struct *mm)
+int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger)
{
struct kfd_process *p;
int r;
@@ -1260,7 +1173,7 @@ int kgd2kfd_quiesce_mm(struct mm_struct *mm)
return -ESRCH;
WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
- r = kfd_process_evict_queues(p);
+ r = kfd_process_evict_queues(p, trigger);
kfd_unref_process(p);
return r;
@@ -1338,8 +1251,6 @@ out:
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
unsigned int chunk_size)
{
- unsigned int num_of_longs;
-
if (WARN_ON(buf_size < chunk_size))
return -EINVAL;
if (WARN_ON(buf_size == 0))
@@ -1350,11 +1261,8 @@ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
kfd->gtt_sa_chunk_size = chunk_size;
kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;
- num_of_longs = (kfd->gtt_sa_num_of_chunks + BITS_PER_LONG - 1) /
- BITS_PER_LONG;
-
- kfd->gtt_sa_bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL);
-
+ kfd->gtt_sa_bitmap = bitmap_zalloc(kfd->gtt_sa_num_of_chunks,
+ GFP_KERNEL);
if (!kfd->gtt_sa_bitmap)
return -ENOMEM;
@@ -1364,13 +1272,12 @@ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
mutex_init(&kfd->gtt_sa_lock);
return 0;
-
}
static void kfd_gtt_sa_fini(struct kfd_dev *kfd)
{
mutex_destroy(&kfd->gtt_sa_lock);
- kfree(kfd->gtt_sa_bitmap);
+ bitmap_free(kfd->gtt_sa_bitmap);
}
static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr,
@@ -1387,10 +1294,11 @@ static inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr,
return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size);
}
-int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
+int kfd_gtt_sa_allocate(struct kfd_node *node, unsigned int size,
struct kfd_mem_obj **mem_obj)
{
unsigned int found, start_search, cur_size;
+ struct kfd_dev *kfd = node->kfd;
if (size == 0)
return -EINVAL;
@@ -1438,7 +1346,7 @@ kfd_gtt_restart_search:
/* If we need only one chunk, mark it as allocated and get out */
if (size <= kfd->gtt_sa_chunk_size) {
pr_debug("Single bit\n");
- set_bit(found, kfd->gtt_sa_bitmap);
+ __set_bit(found, kfd->gtt_sa_bitmap);
goto kfd_gtt_out;
}
@@ -1476,10 +1384,8 @@ kfd_gtt_restart_search:
(*mem_obj)->range_start, (*mem_obj)->range_end);
/* Mark the chunks as allocated */
- for (found = (*mem_obj)->range_start;
- found <= (*mem_obj)->range_end;
- found++)
- set_bit(found, kfd->gtt_sa_bitmap);
+ bitmap_set(kfd->gtt_sa_bitmap, (*mem_obj)->range_start,
+ (*mem_obj)->range_end - (*mem_obj)->range_start + 1);
kfd_gtt_out:
mutex_unlock(&kfd->gtt_sa_lock);
@@ -1492,9 +1398,9 @@ kfd_gtt_no_free_chunk:
return -ENOMEM;
}
-int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
+int kfd_gtt_sa_free(struct kfd_node *node, struct kfd_mem_obj *mem_obj)
{
- unsigned int bit;
+ struct kfd_dev *kfd = node->kfd;
/* Act like kfree when trying to free a NULL object */
if (!mem_obj)
@@ -1506,10 +1412,8 @@ int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
mutex_lock(&kfd->gtt_sa_lock);
/* Mark the chunks as free */
- for (bit = mem_obj->range_start;
- bit <= mem_obj->range_end;
- bit++)
- clear_bit(bit, kfd->gtt_sa_bitmap);
+ bitmap_clear(kfd->gtt_sa_bitmap, mem_obj->range_start,
+ mem_obj->range_end - mem_obj->range_start + 1);
mutex_unlock(&kfd->gtt_sa_lock);
@@ -1519,29 +1423,287 @@ int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
{
+ /*
+ * TODO: Currently update SRAM ECC flag for first node.
+ * This needs to be updated later when we can
+ * identify SRAM ECC error on other nodes also.
+ */
if (kfd)
- atomic_inc(&kfd->sram_ecc_flag);
+ atomic_inc(&kfd->nodes[0]->sram_ecc_flag);
}
-void kfd_inc_compute_active(struct kfd_dev *kfd)
+void kfd_inc_compute_active(struct kfd_node *node)
{
- if (atomic_inc_return(&kfd->compute_profile) == 1)
- amdgpu_amdkfd_set_compute_idle(kfd->kgd, false);
+ if (atomic_inc_return(&node->kfd->compute_profile) == 1)
+ amdgpu_amdkfd_set_compute_idle(node->adev, false);
}
-void kfd_dec_compute_active(struct kfd_dev *kfd)
+void kfd_dec_compute_active(struct kfd_node *node)
{
- int count = atomic_dec_return(&kfd->compute_profile);
+ int count = atomic_dec_return(&node->kfd->compute_profile);
if (count == 0)
- amdgpu_amdkfd_set_compute_idle(kfd->kgd, true);
+ amdgpu_amdkfd_set_compute_idle(node->adev, true);
WARN_ONCE(count < 0, "Compute profile ref. count error");
}
+static bool kfd_compute_active(struct kfd_node *node)
+{
+ if (atomic_read(&node->kfd->compute_profile))
+ return true;
+ return false;
+}
+
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
{
+ /*
+ * TODO: For now, raise the throttling event only on first node.
+ * This will need to change after we are able to determine
+ * which node raised the throttling event.
+ */
if (kfd && kfd->init_complete)
- kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask);
+ kfd_smi_event_update_thermal_throttling(kfd->nodes[0],
+ throttle_bitmask);
+}
+
+/* kfd_get_num_sdma_engines returns the number of PCIe optimized SDMA and
+ * kfd_get_num_xgmi_sdma_engines returns the number of XGMI SDMA.
+ * When the device has more than two engines, we reserve two for PCIe to enable
+ * full-duplex and the rest are used as XGMI.
+ */
+unsigned int kfd_get_num_sdma_engines(struct kfd_node *node)
+{
+ /* If XGMI is not supported, all SDMA engines are PCIe */
+ if (!node->adev->gmc.xgmi.supported)
+ return node->adev->sdma.num_instances/(int)node->kfd->num_nodes;
+
+ return min(node->adev->sdma.num_instances/(int)node->kfd->num_nodes, 2);
+}
+
+unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_node *node)
+{
+ /* After reserved for PCIe, the rest of engines are XGMI */
+ return node->adev->sdma.num_instances/(int)node->kfd->num_nodes -
+ kfd_get_num_sdma_engines(node);
+}
+
+int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd)
+{
+ struct kfd_process *p;
+ int r = 0, temp, idx;
+
+ mutex_lock(&kfd_processes_mutex);
+
+ /* kfd_processes_count is per kfd_dev, return -EBUSY without
+ * further check
+ */
+ if (!!atomic_read(&kfd->kfd_processes_count)) {
+ pr_debug("process_wq_release not finished\n");
+ r = -EBUSY;
+ goto out;
+ }
+
+ if (hash_empty(kfd_processes_table) && !kfd_is_locked(kfd))
+ goto out;
+
+ /* fail under system reset/resume or kfd device is partition switching. */
+ if (kfd_is_locked(kfd)) {
+ r = -EBUSY;
+ goto out;
+ }
+
+ /*
+ * ensure all running processes are cgroup excluded from device before mode switch.
+ * i.e. no pdd was created on the process socket.
+ */
+ idx = srcu_read_lock(&kfd_processes_srcu);
+ hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
+ int i;
+
+ for (i = 0; i < p->n_pdds; i++) {
+ if (p->pdds[i]->dev->kfd != kfd)
+ continue;
+
+ r = -EBUSY;
+ goto proc_check_unlock;
+ }
+ }
+
+proc_check_unlock:
+ srcu_read_unlock(&kfd_processes_srcu, idx);
+out:
+ if (!r)
+ ++kfd->kfd_dev_lock;
+ mutex_unlock(&kfd_processes_mutex);
+
+ return r;
+}
+
+void kgd2kfd_unlock_kfd(struct kfd_dev *kfd)
+{
+ mutex_lock(&kfd_processes_mutex);
+ --kfd->kfd_dev_lock;
+ mutex_unlock(&kfd_processes_mutex);
+}
+
+int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id)
+{
+ struct kfd_node *node;
+ int ret;
+
+ if (!kfd->init_complete)
+ return 0;
+
+ if (node_id >= kfd->num_nodes) {
+ dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n",
+ node_id, kfd->num_nodes - 1);
+ return -EINVAL;
+ }
+ node = kfd->nodes[node_id];
+
+ ret = node->dqm->ops.unhalt(node->dqm);
+ if (ret)
+ dev_err(kfd_device, "Error in starting scheduler\n");
+
+ return ret;
+}
+
+int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd)
+{
+ struct kfd_node *node;
+ int i, r;
+
+ if (!kfd->init_complete)
+ return 0;
+
+ for (i = 0; i < kfd->num_nodes; i++) {
+ node = kfd->nodes[i];
+ r = node->dqm->ops.unhalt(node->dqm);
+ if (r) {
+ dev_err(kfd_device, "Error in starting scheduler\n");
+ return r;
+ }
+ }
+ return 0;
+}
+
+int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id)
+{
+ struct kfd_node *node;
+
+ if (!kfd->init_complete)
+ return 0;
+
+ if (node_id >= kfd->num_nodes) {
+ dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n",
+ node_id, kfd->num_nodes - 1);
+ return -EINVAL;
+ }
+
+ node = kfd->nodes[node_id];
+ return node->dqm->ops.halt(node->dqm);
+}
+
+int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd)
+{
+ struct kfd_node *node;
+ int i, r;
+
+ if (!kfd->init_complete)
+ return 0;
+
+ for (i = 0; i < kfd->num_nodes; i++) {
+ node = kfd->nodes[i];
+ r = node->dqm->ops.halt(node->dqm);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id)
+{
+ struct kfd_node *node;
+
+ if (!kfd->init_complete)
+ return false;
+
+ if (node_id >= kfd->num_nodes) {
+ dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n",
+ node_id, kfd->num_nodes - 1);
+ return false;
+ }
+
+ node = kfd->nodes[node_id];
+
+ return kfd_compute_active(node);
+}
+
+/**
+ * kgd2kfd_vmfault_fast_path() - KFD vm page fault interrupt handling fast path for gmc v9
+ * @adev: amdgpu device
+ * @entry: vm fault interrupt vector
+ * @retry_fault: if this is retry fault
+ *
+ * retry fault -
+ * with CAM enabled, adev primary ring
+ * | gmc_v9_0_process_interrupt()
+ * adev soft_ring
+ * | gmc_v9_0_process_interrupt() worker failed to recover page fault
+ * KFD node ih_fifo
+ * | KFD interrupt_wq worker
+ * kfd_signal_vm_fault_event
+ *
+ * without CAM, adev primary ring1
+ * | gmc_v9_0_process_interrupt worker failed to recvoer page fault
+ * KFD node ih_fifo
+ * | KFD interrupt_wq worker
+ * kfd_signal_vm_fault_event
+ *
+ * no-retry fault -
+ * adev primary ring
+ * | gmc_v9_0_process_interrupt()
+ * KFD node ih_fifo
+ * | KFD interrupt_wq worker
+ * kfd_signal_vm_fault_event
+ *
+ * fast path - After kfd_signal_vm_fault_event, gmc_v9_0_process_interrupt drop the page fault
+ * of same process, don't copy interrupt to KFD node ih_fifo.
+ * With gdb debugger enabled, need convert the retry fault to no-retry fault for
+ * debugger, cannot use the fast path.
+ *
+ * Return:
+ * true - use the fast path to handle this fault
+ * false - use normal path to handle it
+ */
+bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry,
+ bool retry_fault)
+{
+ struct kfd_process *p;
+ u32 cam_index;
+
+ if (entry->ih == &adev->irq.ih_soft || entry->ih == &adev->irq.ih1) {
+ p = kfd_lookup_process_by_pasid(entry->pasid, NULL);
+ if (!p)
+ return true;
+
+ if (p->gpu_page_fault && !p->debug_trap_enabled) {
+ if (retry_fault && adev->irq.retry_cam_enabled) {
+ cam_index = entry->src_data[2] & 0x3ff;
+ WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index);
+ }
+
+ kfd_unref_process(p);
+ return true;
+ }
+
+ /*
+ * This is the first page fault, set flag and then signal user space
+ */
+ p->gpu_page_fault = true;
+ kfd_unref_process(p);
+ }
+ return false;
}
#if defined(CONFIG_DEBUG_FS)
@@ -1549,13 +1711,18 @@ void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
/* This function will send a package to HIQ to hang the HWS
* which will trigger a GPU reset and bring the HWS back to normal state
*/
-int kfd_debugfs_hang_hws(struct kfd_dev *dev)
+int kfd_debugfs_hang_hws(struct kfd_node *dev)
{
if (dev->dqm->sched_policy != KFD_SCHED_POLICY_HWS) {
pr_err("HWS is not enabled");
return -EINVAL;
}
+ if (dev->kfd->shared_resources.enable_mes) {
+ dev_err(dev->adev->dev, "Inducing MES hang is not supported\n");
+ return -EINVAL;
+ }
+
return dqm_debugfs_hang_hws(dev->dqm);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 93e33dd84dd4..d7a2e7178ea9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -34,20 +35,29 @@
#include "cik_regs.h"
#include "kfd_kernel_queue.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_sdma.h"
+#include "mes_v11_api_def.h"
+#include "kfd_debug.h"
/* Size of the per-pipe EOP queue */
#define CIK_HPD_EOP_BYTES_LOG2 11
#define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
+/* See unmap_queues_cpsch() */
+#define USE_DEFAULT_GRACE_PERIOD 0xffffffff
static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
u32 pasid, unsigned int vmid);
static int execute_queues_cpsch(struct device_queue_manager *dqm,
enum kfd_unmap_queues_filter filter,
- uint32_t filter_param);
+ uint32_t filter_param,
+ uint32_t grace_period);
static int unmap_queues_cpsch(struct device_queue_manager *dqm,
enum kfd_unmap_queues_filter filter,
- uint32_t filter_param);
+ uint32_t filter_param,
+ uint32_t grace_period,
+ bool reset);
static int map_queues_cpsch(struct device_queue_manager *dqm);
@@ -58,8 +68,9 @@ static inline void deallocate_hqd(struct device_queue_manager *dqm,
struct queue *q);
static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
static int allocate_sdma_queue(struct device_queue_manager *dqm,
- struct queue *q);
-static void kfd_process_hw_exception(struct work_struct *work);
+ struct queue *q, const uint32_t *restore_sdma_id);
+
+static int reset_queues_on_hws_hang(struct device_queue_manager *dqm, bool is_sdma);
static inline
enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
@@ -72,95 +83,365 @@ enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
{
int i;
- int pipe_offset = (mec * dqm->dev->shared_resources.num_pipe_per_mec
- + pipe) * dqm->dev->shared_resources.num_queue_per_pipe;
+ int pipe_offset = (mec * dqm->dev->kfd->shared_resources.num_pipe_per_mec
+ + pipe) * dqm->dev->kfd->shared_resources.num_queue_per_pipe;
/* queue is available for KFD usage if bit is 1 */
- for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i)
+ for (i = 0; i < dqm->dev->kfd->shared_resources.num_queue_per_pipe; ++i)
if (test_bit(pipe_offset + i,
- dqm->dev->shared_resources.cp_queue_bitmap))
+ dqm->dev->kfd->shared_resources.cp_queue_bitmap))
return true;
return false;
}
unsigned int get_cp_queues_num(struct device_queue_manager *dqm)
{
- return bitmap_weight(dqm->dev->shared_resources.cp_queue_bitmap,
- KGD_MAX_QUEUES);
+ return bitmap_weight(dqm->dev->kfd->shared_resources.cp_queue_bitmap,
+ AMDGPU_MAX_QUEUES);
}
unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
{
- return dqm->dev->shared_resources.num_queue_per_pipe;
+ return dqm->dev->kfd->shared_resources.num_queue_per_pipe;
}
unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
{
- return dqm->dev->shared_resources.num_pipe_per_mec;
+ return dqm->dev->kfd->shared_resources.num_pipe_per_mec;
}
-static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm)
+static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
{
- return dqm->dev->device_info->num_sdma_engines;
+ return kfd_get_num_sdma_engines(dqm->dev) +
+ kfd_get_num_xgmi_sdma_engines(dqm->dev);
}
-static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm)
+unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
{
- return dqm->dev->device_info->num_xgmi_sdma_engines;
+ return kfd_get_num_sdma_engines(dqm->dev) *
+ dqm->dev->kfd->device_info.num_sdma_queues_per_engine;
}
-static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
+unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
{
- return get_num_sdma_engines(dqm) + get_num_xgmi_sdma_engines(dqm);
+ return kfd_get_num_xgmi_sdma_engines(dqm->dev) *
+ dqm->dev->kfd->device_info.num_sdma_queues_per_engine;
}
-unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
+static void init_sdma_bitmaps(struct device_queue_manager *dqm)
{
- return dqm->dev->device_info->num_sdma_engines
- * dqm->dev->device_info->num_sdma_queues_per_engine;
-}
+ bitmap_zero(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES);
+ bitmap_set(dqm->sdma_bitmap, 0, get_num_sdma_queues(dqm));
-unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
-{
- return dqm->dev->device_info->num_xgmi_sdma_engines
- * dqm->dev->device_info->num_sdma_queues_per_engine;
+ bitmap_zero(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES);
+ bitmap_set(dqm->xgmi_sdma_bitmap, 0, get_num_xgmi_sdma_queues(dqm));
+
+ /* Mask out the reserved queues */
+ bitmap_andnot(dqm->sdma_bitmap, dqm->sdma_bitmap,
+ dqm->dev->kfd->device_info.reserved_sdma_queues_bitmap,
+ KFD_MAX_SDMA_QUEUES);
}
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
- return dqm->dev->kfd2kgd->program_sh_mem_settings(
- dqm->dev->kgd, qpd->vmid,
- qpd->sh_mem_config,
- qpd->sh_mem_ape1_base,
- qpd->sh_mem_ape1_limit,
- qpd->sh_mem_bases);
+ uint32_t xcc_mask = dqm->dev->xcc_mask;
+ int xcc_id;
+
+ for_each_inst(xcc_id, xcc_mask)
+ dqm->dev->kfd2kgd->program_sh_mem_settings(
+ dqm->dev->adev, qpd->vmid, qpd->sh_mem_config,
+ qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit,
+ qpd->sh_mem_bases, xcc_id);
+}
+
+static void kfd_hws_hang(struct device_queue_manager *dqm)
+{
+ struct device_process_node *cur;
+ struct qcm_process_device *qpd;
+ struct queue *q;
+
+ /* Mark all device queues as reset. */
+ list_for_each_entry(cur, &dqm->queues, list) {
+ qpd = cur->qpd;
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
+
+ pdd->has_reset_queue = true;
+ }
+ }
+
+ /*
+ * Issue a GPU reset if HWS is unresponsive
+ */
+ amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
+}
+
+static int convert_to_mes_queue_type(int queue_type)
+{
+ int mes_queue_type;
+
+ switch (queue_type) {
+ case KFD_QUEUE_TYPE_COMPUTE:
+ mes_queue_type = MES_QUEUE_TYPE_COMPUTE;
+ break;
+ case KFD_QUEUE_TYPE_SDMA:
+ mes_queue_type = MES_QUEUE_TYPE_SDMA;
+ break;
+ default:
+ WARN(1, "Invalid queue type %d", queue_type);
+ mes_queue_type = -EINVAL;
+ break;
+ }
+
+ return mes_queue_type;
+}
+
+static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
+ struct mes_add_queue_input queue_input;
+ int r, queue_type;
+ uint64_t wptr_addr_off;
+
+ if (!dqm->sched_running || dqm->sched_halt)
+ return 0;
+ if (!down_read_trylock(&adev->reset_domain->sem))
+ return -EIO;
+
+ memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
+ queue_input.process_id = pdd->pasid;
+ queue_input.page_table_base_addr = qpd->page_table_base;
+ queue_input.process_va_start = 0;
+ queue_input.process_va_end = adev->vm_manager.max_pfn - 1;
+ /* MES unit for quantum is 100ns */
+ queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM; /* Equivalent to 10ms. */
+ queue_input.process_context_addr = pdd->proc_ctx_gpu_addr;
+ queue_input.gang_quantum = KFD_MES_GANG_QUANTUM; /* Equivalent to 1ms */
+ queue_input.gang_context_addr = q->gang_ctx_gpu_addr;
+ queue_input.inprocess_gang_priority = q->properties.priority;
+ queue_input.gang_global_priority_level =
+ AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
+ queue_input.doorbell_offset = q->properties.doorbell_off;
+ queue_input.mqd_addr = q->gart_mqd_addr;
+ queue_input.wptr_addr = (uint64_t)q->properties.write_ptr;
+
+ wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1);
+ queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->properties.wptr_bo) + wptr_addr_off;
+
+ queue_input.is_kfd_process = 1;
+ queue_input.is_aql_queue = (q->properties.format == KFD_QUEUE_FORMAT_AQL);
+ queue_input.queue_size = q->properties.queue_size >> 2;
+
+ queue_input.paging = false;
+ queue_input.tba_addr = qpd->tba_addr;
+ queue_input.tma_addr = qpd->tma_addr;
+ queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device);
+ queue_input.skip_process_ctx_clear =
+ qpd->pqm->process->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED &&
+ (qpd->pqm->process->debug_trap_enabled ||
+ kfd_dbg_has_ttmps_always_setup(q->device));
+
+ queue_type = convert_to_mes_queue_type(q->properties.type);
+ if (queue_type < 0) {
+ dev_err(adev->dev, "Queue type not supported with MES, queue:%d\n",
+ q->properties.type);
+ up_read(&adev->reset_domain->sem);
+ return -EINVAL;
+ }
+ queue_input.queue_type = (uint32_t)queue_type;
+
+ queue_input.exclusively_scheduled = q->properties.is_gws;
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ up_read(&adev->reset_domain->sem);
+ if (r) {
+ dev_err(adev->dev, "failed to add hardware queue to MES, doorbell=0x%x\n",
+ q->properties.doorbell_off);
+ dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n");
+ kfd_hws_hang(dqm);
+ }
+
+ return r;
+}
+
+static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
+ int r;
+ struct mes_remove_queue_input queue_input;
+
+ if (!dqm->sched_running || dqm->sched_halt)
+ return 0;
+ if (!down_read_trylock(&adev->reset_domain->sem))
+ return -EIO;
+
+ memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
+ queue_input.doorbell_offset = q->properties.doorbell_off;
+ queue_input.gang_context_addr = q->gang_ctx_gpu_addr;
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ up_read(&adev->reset_domain->sem);
+
+ if (r) {
+ dev_err(adev->dev, "failed to remove hardware queue from MES, doorbell=0x%x\n",
+ q->properties.doorbell_off);
+ dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n");
+ kfd_hws_hang(dqm);
+ }
+
+ return r;
+}
+
+static int remove_all_kfd_queues_mes(struct device_queue_manager *dqm)
+{
+ struct device_process_node *cur;
+ struct device *dev = dqm->dev->adev->dev;
+ struct qcm_process_device *qpd;
+ struct queue *q;
+ int retval = 0;
+
+ list_for_each_entry(cur, &dqm->queues, list) {
+ qpd = cur->qpd;
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if (q->properties.is_active) {
+ retval = remove_queue_mes(dqm, q, qpd);
+ if (retval) {
+ dev_err(dev, "%s: Failed to remove queue %d for dev %d",
+ __func__,
+ q->properties.queue_id,
+ dqm->dev->id);
+ return retval;
+ }
+ }
+ }
+ }
+
+ return retval;
+}
+
+static int add_all_kfd_queues_mes(struct device_queue_manager *dqm)
+{
+ struct device_process_node *cur;
+ struct device *dev = dqm->dev->adev->dev;
+ struct qcm_process_device *qpd;
+ struct queue *q;
+ int retval = 0;
+
+ list_for_each_entry(cur, &dqm->queues, list) {
+ qpd = cur->qpd;
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if (!q->properties.is_active)
+ continue;
+ retval = add_queue_mes(dqm, q, qpd);
+ if (retval) {
+ dev_err(dev, "%s: Failed to add queue %d for dev %d",
+ __func__,
+ q->properties.queue_id,
+ dqm->dev->id);
+ return retval;
+ }
+ }
+ }
+
+ return retval;
+}
+
+static int suspend_all_queues_mes(struct device_queue_manager *dqm)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
+ int r = 0;
+
+ if (!down_read_trylock(&adev->reset_domain->sem))
+ return -EIO;
+
+ r = amdgpu_mes_suspend(adev);
+ up_read(&adev->reset_domain->sem);
+
+ if (r) {
+ dev_err(adev->dev, "failed to suspend gangs from MES\n");
+ dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n");
+ kfd_hws_hang(dqm);
+ }
+
+ return r;
+}
+
+static int resume_all_queues_mes(struct device_queue_manager *dqm)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
+ int r = 0;
+
+ if (!down_read_trylock(&adev->reset_domain->sem))
+ return -EIO;
+
+ r = amdgpu_mes_resume(adev);
+ up_read(&adev->reset_domain->sem);
+
+ if (r) {
+ dev_err(adev->dev, "failed to resume gangs from MES\n");
+ dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n");
+ kfd_hws_hang(dqm);
+ }
+
+ return r;
}
static void increment_queue_count(struct device_queue_manager *dqm,
- enum kfd_queue_type type)
+ struct qcm_process_device *qpd,
+ struct queue *q)
{
dqm->active_queue_count++;
- if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
+ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
+ q->properties.type == KFD_QUEUE_TYPE_DIQ)
dqm->active_cp_queue_count++;
+
+ if (q->properties.is_gws) {
+ dqm->gws_queue_count++;
+ qpd->mapped_gws_queue = true;
+ }
}
static void decrement_queue_count(struct device_queue_manager *dqm,
- enum kfd_queue_type type)
+ struct qcm_process_device *qpd,
+ struct queue *q)
{
dqm->active_queue_count--;
- if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
+ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
+ q->properties.type == KFD_QUEUE_TYPE_DIQ)
dqm->active_cp_queue_count--;
+
+ if (q->properties.is_gws) {
+ dqm->gws_queue_count--;
+ qpd->mapped_gws_queue = false;
+ }
}
-static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
+/*
+ * Allocate a doorbell ID to this queue.
+ * If doorbell_id is passed in, make sure requested ID is valid then allocate it.
+ */
+static int allocate_doorbell(struct qcm_process_device *qpd,
+ struct queue *q,
+ uint32_t const *restore_id)
{
- struct kfd_dev *dev = qpd->dqm->dev;
+ struct kfd_node *dev = qpd->dqm->dev;
- if (!KFD_IS_SOC15(dev->device_info->asic_family)) {
+ if (!KFD_IS_SOC15(dev)) {
/* On pre-SOC15 chips we need to use the queue ID to
* preserve the user mode ABI.
*/
+
+ if (restore_id && *restore_id != q->properties.queue_id)
+ return -EINVAL;
+
q->doorbell_id = q->properties.queue_id;
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
@@ -169,30 +450,52 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
* The doobell index distance between RLC (2*i) and (2*i+1)
* for a SDMA engine is 512.
*/
- uint32_t *idx_offset =
- dev->shared_resources.sdma_doorbell_idx;
- q->doorbell_id = idx_offset[q->properties.sdma_engine_id]
- + (q->properties.sdma_queue_id & 1)
- * KFD_QUEUE_DOORBELL_MIRROR_OFFSET
- + (q->properties.sdma_queue_id >> 1);
+ uint32_t *idx_offset = dev->kfd->shared_resources.sdma_doorbell_idx;
+
+ /*
+ * q->properties.sdma_engine_id corresponds to the virtual
+ * sdma engine number. However, for doorbell allocation,
+ * we need the physical sdma engine id in order to get the
+ * correct doorbell offset.
+ */
+ uint32_t valid_id = idx_offset[qpd->dqm->dev->node_id *
+ get_num_all_sdma_engines(qpd->dqm) +
+ q->properties.sdma_engine_id]
+ + (q->properties.sdma_queue_id & 1)
+ * KFD_QUEUE_DOORBELL_MIRROR_OFFSET
+ + (q->properties.sdma_queue_id >> 1);
+
+ if (restore_id && *restore_id != valid_id)
+ return -EINVAL;
+ q->doorbell_id = valid_id;
} else {
- /* For CP queues on SOC15 reserve a free doorbell ID */
- unsigned int found;
-
- found = find_first_zero_bit(qpd->doorbell_bitmap,
- KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
- if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
- pr_debug("No doorbells available");
- return -EBUSY;
+ /* For CP queues on SOC15 */
+ if (restore_id) {
+ /* make sure that ID is free */
+ if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap))
+ return -EINVAL;
+
+ q->doorbell_id = *restore_id;
+ } else {
+ /* or reserve a free doorbell ID */
+ unsigned int found;
+
+ found = find_first_zero_bit(qpd->doorbell_bitmap,
+ KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
+ if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
+ pr_debug("No doorbells available");
+ return -EBUSY;
+ }
+ set_bit(found, qpd->doorbell_bitmap);
+ q->doorbell_id = found;
}
- set_bit(found, qpd->doorbell_bitmap);
- q->doorbell_id = found;
}
- q->properties.doorbell_off =
- kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd),
- q->doorbell_id);
+ q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev->adev,
+ qpd->proc_doorbells,
+ q->doorbell_id,
+ dev->kfd->device_info.doorbell_size);
return 0;
}
@@ -200,9 +503,9 @@ static void deallocate_doorbell(struct qcm_process_device *qpd,
struct queue *q)
{
unsigned int old;
- struct kfd_dev *dev = qpd->dqm->dev;
+ struct kfd_node *dev = qpd->dqm->dev;
- if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
+ if (!KFD_IS_SOC15(dev) ||
q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
return;
@@ -214,16 +517,22 @@ static void deallocate_doorbell(struct qcm_process_device *qpd,
static void program_trap_handler_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
+ uint32_t xcc_mask = dqm->dev->xcc_mask;
+ int xcc_id;
+
if (dqm->dev->kfd2kgd->program_trap_handler_settings)
- dqm->dev->kfd2kgd->program_trap_handler_settings(
- dqm->dev->kgd, qpd->vmid,
- qpd->tba_addr, qpd->tma_addr);
+ for_each_inst(xcc_id, xcc_mask)
+ dqm->dev->kfd2kgd->program_trap_handler_settings(
+ dqm->dev->adev, qpd->vmid, qpd->tba_addr,
+ qpd->tma_addr, xcc_id);
}
static int allocate_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q)
{
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
+ struct device *dev = dqm->dev->adev->dev;
int allocated_vmid = -1, i;
for (i = dqm->dev->vm_info.first_vmid_kfd;
@@ -235,42 +544,41 @@ static int allocate_vmid(struct device_queue_manager *dqm,
}
if (allocated_vmid < 0) {
- pr_err("no more vmid to allocate\n");
+ dev_err(dev, "no more vmid to allocate\n");
return -ENOSPC;
}
pr_debug("vmid allocated: %d\n", allocated_vmid);
- dqm->vmid_pasid[allocated_vmid] = q->process->pasid;
+ dqm->vmid_pasid[allocated_vmid] = pdd->pasid;
- set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
+ set_pasid_vmid_mapping(dqm, pdd->pasid, allocated_vmid);
qpd->vmid = allocated_vmid;
q->properties.vmid = allocated_vmid;
program_sh_mem_settings(dqm, qpd);
- if (dqm->dev->device_info->asic_family >= CHIP_VEGA10 &&
- dqm->dev->cwsr_enabled)
+ if (KFD_IS_SOC15(dqm->dev) && dqm->dev->kfd->cwsr_enabled)
program_trap_handler_settings(dqm, qpd);
/* qpd->page_table_base is set earlier when register_process()
* is called, i.e. when the first queue is created.
*/
- dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd,
+ dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev,
qpd->vmid,
qpd->page_table_base);
/* invalidate the VM context after pasid and vmid mapping is set up */
kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
if (dqm->dev->kfd2kgd->set_scratch_backing_va)
- dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd,
+ dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev,
qpd->sh_hidden_private_base, qpd->vmid);
return 0;
}
-static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
+static int flush_texture_cache_nocpsch(struct kfd_node *kdev,
struct qcm_process_device *qpd)
{
const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf;
@@ -283,7 +591,7 @@ static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
if (ret)
return ret;
- return amdgpu_amdkfd_submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
+ return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid,
qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
pmf->release_mem_size / sizeof(uint32_t));
}
@@ -292,10 +600,12 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q)
{
+ struct device *dev = dqm->dev->adev->dev;
+
/* On GFX v7, CP doesn't flush TC at dequeue */
- if (q->device->device_info->asic_family == CHIP_HAWAII)
+ if (q->device->adev->asic_type == CHIP_HAWAII)
if (flush_texture_cache_nocpsch(q->device, qpd))
- pr_err("Failed to flush TC\n");
+ dev_err(dev, "Failed to flush TC\n");
kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
@@ -309,7 +619,9 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
static int create_queue_nocpsch(struct device_queue_manager *dqm,
struct queue *q,
- struct qcm_process_device *qpd)
+ struct qcm_process_device *qpd,
+ const struct kfd_criu_queue_priv_data *qd,
+ const void *restore_mqd, const void *restore_ctl_stack)
{
struct mqd_manager *mqd_mgr;
int retval;
@@ -349,13 +661,13 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
q->pipe, q->queue);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
- retval = allocate_sdma_queue(dqm, q);
+ retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
if (retval)
goto deallocate_vmid;
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
}
- retval = allocate_doorbell(qpd, q);
+ retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL);
if (retval)
goto out_deallocate_hqd;
@@ -368,8 +680,15 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
retval = -ENOMEM;
goto out_deallocate_doorbell;
}
- mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
- &q->gart_mqd_addr, &q->properties);
+
+ if (qd)
+ mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
+ &q->properties, restore_mqd, restore_ctl_stack,
+ qd->ctl_stack_size);
+ else
+ mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
+ &q->gart_mqd_addr, &q->properties);
+
if (q->properties.is_active) {
if (!dqm->sched_running) {
WARN_ONCE(1, "Load non-HWS mqd while stopped\n");
@@ -390,7 +709,7 @@ add_queue_to_list:
list_add(&q->list, &qpd->queues_list);
qpd->queue_count++;
if (q->properties.is_active)
- increment_queue_count(dqm, q->properties.type);
+ increment_queue_count(dqm, qpd, q);
/*
* Unconditionally increment this counter, regardless of the queue's
@@ -459,6 +778,74 @@ static inline void deallocate_hqd(struct device_queue_manager *dqm,
dqm->allocated_queues[q->pipe] |= (1 << q->queue);
}
+#define SQ_IND_CMD_CMD_KILL 0x00000003
+#define SQ_IND_CMD_MODE_BROADCAST 0x00000001
+
+static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process *p)
+{
+ int status = 0;
+ unsigned int vmid;
+ uint16_t queried_pasid;
+ union SQ_CMD_BITS reg_sq_cmd;
+ union GRBM_GFX_INDEX_BITS reg_gfx_index;
+ struct kfd_process_device *pdd;
+ int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
+ int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
+ uint32_t xcc_mask = dev->xcc_mask;
+ int xcc_id;
+
+ reg_sq_cmd.u32All = 0;
+ reg_gfx_index.u32All = 0;
+
+ pr_debug("Killing all process wavefronts\n");
+
+ if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) {
+ dev_err(dev->adev->dev, "no vmid pasid mapping supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* taking the VMID for that process on the safe way using PDD */
+ pdd = kfd_get_process_device_data(dev, p);
+ if (!pdd)
+ return -EFAULT;
+
+ /* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
+ * ATC_VMID15_PASID_MAPPING
+ * to check which VMID the current process is mapped to.
+ */
+
+ for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
+ status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
+ (dev->adev, vmid, &queried_pasid);
+
+ if (status && queried_pasid == pdd->pasid) {
+ pr_debug("Killing wave fronts of vmid %d and process pid %d\n",
+ vmid, p->lead_thread->pid);
+ break;
+ }
+ }
+
+ if (vmid > last_vmid_to_scan) {
+ dev_err(dev->adev->dev, "Didn't find vmid for process pid %d\n",
+ p->lead_thread->pid);
+ return -EFAULT;
+ }
+
+ reg_gfx_index.bits.sh_broadcast_writes = 1;
+ reg_gfx_index.bits.se_broadcast_writes = 1;
+ reg_gfx_index.bits.instance_broadcast_writes = 1;
+ reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
+ reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
+ reg_sq_cmd.bits.vm_id = vmid;
+
+ for_each_inst(xcc_id, xcc_mask)
+ dev->kfd2kgd->wave_control_execute(
+ dev->adev, reg_gfx_index.u32All,
+ reg_sq_cmd.u32All, xcc_id);
+
+ return 0;
+}
+
/* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
* to avoid asynchronized access
*/
@@ -515,13 +902,8 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
deallocate_vmid(dqm, qpd, q);
}
qpd->queue_count--;
- if (q->properties.is_active) {
- decrement_queue_count(dqm, q->properties.type);
- if (q->properties.is_gws) {
- dqm->gws_queue_count--;
- qpd->mapped_gws_queue = false;
- }
- }
+ if (q->properties.is_active)
+ decrement_queue_count(dqm, qpd, q);
return retval;
}
@@ -532,6 +914,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
{
int retval;
uint64_t sdma_val = 0;
+ struct device *dev = dqm->dev->adev->dev;
struct kfd_process_device *pdd = qpd_to_pdd(qpd);
struct mqd_manager *mqd_mgr =
dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)];
@@ -542,7 +925,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
&sdma_val);
if (retval)
- pr_err("Failed to read SDMA queue counter for queue: %d\n",
+ dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n",
q->properties.queue_id);
}
@@ -561,6 +944,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
struct mqd_update_info *minfo)
{
int retval = 0;
+ struct device *dev = dqm->dev->adev->dev;
struct mqd_manager *mqd_mgr;
struct kfd_process_device *pdd;
bool prev_active = false;
@@ -579,10 +963,20 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
/* Make sure the queue is unmapped before updating the MQD */
if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
- retval = unmap_queues_cpsch(dqm,
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+ if (!dqm->dev->kfd->shared_resources.enable_mes)
+ retval = unmap_queues_cpsch(dqm,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false);
+ else if (prev_active)
+ retval = remove_queue_mes(dqm, q, &pdd->qpd);
+
+ /* queue is reset so inaccessable */
+ if (pdd->has_reset_queue) {
+ retval = -EACCES;
+ goto out_unlock;
+ }
+
if (retval) {
- pr_err("unmap queue failed\n");
+ dev_err(dev, "unmap queue failed\n");
goto out_unlock;
}
} else if (prev_active &&
@@ -596,12 +990,12 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
}
retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
- (dqm->dev->cwsr_enabled?
- KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
- KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
+ (dqm->dev->kfd->cwsr_enabled ?
+ KFD_PREEMPT_TYPE_WAVEFRONT_SAVE :
+ KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
if (retval) {
- pr_err("destroy mqd failed\n");
+ dev_err(dev, "destroy mqd failed\n");
goto out_unlock;
}
}
@@ -614,12 +1008,11 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
* dqm->active_queue_count to determine whether a new runlist must be
* uploaded.
*/
- if (q->properties.is_active && !prev_active)
- increment_queue_count(dqm, q->properties.type);
- else if (!q->properties.is_active && prev_active)
- decrement_queue_count(dqm, q->properties.type);
-
- if (q->gws && !q->properties.is_gws) {
+ if (q->properties.is_active && !prev_active) {
+ increment_queue_count(dqm, &pdd->qpd, q);
+ } else if (!q->properties.is_active && prev_active) {
+ decrement_queue_count(dqm, &pdd->qpd, q);
+ } else if (q->gws && !q->properties.is_gws) {
if (q->properties.is_active) {
dqm->gws_queue_count++;
pdd->qpd.mapped_gws_queue = true;
@@ -633,9 +1026,12 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
q->properties.is_gws = false;
}
- if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
- retval = map_queues_cpsch(dqm);
- else if (q->properties.is_active &&
+ if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
+ if (!dqm->dev->kfd->shared_resources.enable_mes)
+ retval = map_queues_cpsch(dqm);
+ else if (q->properties.is_active)
+ retval = add_queue_mes(dqm, q, &pdd->qpd);
+ } else if (q->properties.is_active &&
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
@@ -653,6 +1049,92 @@ out_unlock:
return retval;
}
+/* suspend_single_queue does not lock the dqm like the
+ * evict_process_queues_cpsch or evict_process_queues_nocpsch. You should
+ * lock the dqm before calling, and unlock after calling.
+ *
+ * The reason we don't lock the dqm is because this function may be
+ * called on multiple queues in a loop, so rather than locking/unlocking
+ * multiple times, we will just keep the dqm locked for all of the calls.
+ */
+static int suspend_single_queue(struct device_queue_manager *dqm,
+ struct kfd_process_device *pdd,
+ struct queue *q)
+{
+ bool is_new;
+
+ if (q->properties.is_suspended)
+ return 0;
+
+ pr_debug("Suspending process pid %d queue [%i]\n",
+ pdd->process->lead_thread->pid,
+ q->properties.queue_id);
+
+ is_new = q->properties.exception_status & KFD_EC_MASK(EC_QUEUE_NEW);
+
+ if (is_new || q->properties.is_being_destroyed) {
+ pr_debug("Suspend: skip %s queue id %i\n",
+ is_new ? "new" : "destroyed",
+ q->properties.queue_id);
+ return -EBUSY;
+ }
+
+ q->properties.is_suspended = true;
+ if (q->properties.is_active) {
+ if (dqm->dev->kfd->shared_resources.enable_mes) {
+ int r = remove_queue_mes(dqm, q, &pdd->qpd);
+
+ if (r)
+ return r;
+ }
+
+ decrement_queue_count(dqm, &pdd->qpd, q);
+ q->properties.is_active = false;
+ }
+
+ return 0;
+}
+
+/* resume_single_queue does not lock the dqm like the functions
+ * restore_process_queues_cpsch or restore_process_queues_nocpsch. You should
+ * lock the dqm before calling, and unlock after calling.
+ *
+ * The reason we don't lock the dqm is because this function may be
+ * called on multiple queues in a loop, so rather than locking/unlocking
+ * multiple times, we will just keep the dqm locked for all of the calls.
+ */
+static int resume_single_queue(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ struct queue *q)
+{
+ struct kfd_process_device *pdd;
+
+ if (!q->properties.is_suspended)
+ return 0;
+
+ pdd = qpd_to_pdd(qpd);
+
+ pr_debug("Restoring from suspend process pid %d queue [%i]\n",
+ pdd->process->lead_thread->pid,
+ q->properties.queue_id);
+
+ q->properties.is_suspended = false;
+
+ if (QUEUE_IS_ACTIVE(q->properties)) {
+ if (dqm->dev->kfd->shared_resources.enable_mes) {
+ int r = add_queue_mes(dqm, q, &pdd->qpd);
+
+ if (r)
+ return r;
+ }
+
+ q->properties.is_active = true;
+ increment_queue_count(dqm, qpd, q);
+ }
+
+ return 0;
+}
+
static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
@@ -666,8 +1148,8 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
goto out;
pdd = qpd_to_pdd(qpd);
- pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
- pdd->process->pasid);
+ pr_debug_ratelimited("Evicting process pid %d queues\n",
+ pdd->process->lead_thread->pid);
pdd->last_evict_timestamp = get_jiffies_64();
/* Mark all queues as evicted. Deactivate all active queues on
@@ -681,19 +1163,15 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
q->properties.is_active = false;
- decrement_queue_count(dqm, q->properties.type);
- if (q->properties.is_gws) {
- dqm->gws_queue_count--;
- qpd->mapped_gws_queue = false;
- }
+ decrement_queue_count(dqm, qpd, q);
if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
continue;
retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
- (dqm->dev->cwsr_enabled?
- KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
- KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
+ (dqm->dev->kfd->cwsr_enabled ?
+ KFD_PREEMPT_TYPE_WAVEFRONT_SAVE :
+ KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
if (retval && !ret)
/* Return the first error, but keep going to
@@ -711,6 +1189,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
struct queue *q;
+ struct device *dev = dqm->dev->adev->dev;
struct kfd_process_device *pdd;
int retval = 0;
@@ -719,8 +1198,25 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
goto out;
pdd = qpd_to_pdd(qpd);
- pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
- pdd->process->pasid);
+
+ /* The debugger creates processes that temporarily have not acquired
+ * all VMs for all devices and has no VMs itself.
+ * Skip queue eviction on process eviction.
+ */
+ if (!pdd->drm_priv)
+ goto out;
+
+ pr_debug_ratelimited("Evicting process pid %d queues\n",
+ pdd->process->lead_thread->pid);
+
+ if (dqm->dev->kfd->shared_resources.enable_mes) {
+ pdd->last_evict_timestamp = get_jiffies_64();
+ retval = suspend_all_queues_mes(dqm);
+ if (retval) {
+ dev_err(dev, "Suspending all queues failed");
+ goto out;
+ }
+ }
/* Mark all queues as evicted. Deactivate all active queues on
* the qpd.
@@ -731,13 +1227,30 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
continue;
q->properties.is_active = false;
- decrement_queue_count(dqm, q->properties.type);
+ decrement_queue_count(dqm, qpd, q);
+
+ if (dqm->dev->kfd->shared_resources.enable_mes) {
+ retval = remove_queue_mes(dqm, q, qpd);
+ if (retval) {
+ dev_err(dev, "Failed to evict queue %d\n",
+ q->properties.queue_id);
+ goto out;
+ }
+ }
+ }
+
+ if (!dqm->dev->kfd->shared_resources.enable_mes) {
+ pdd->last_evict_timestamp = get_jiffies_64();
+ retval = execute_queues_cpsch(dqm,
+ qpd->is_debug ?
+ KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
+ USE_DEFAULT_GRACE_PERIOD);
+ } else {
+ retval = resume_all_queues_mes(dqm);
+ if (retval)
+ dev_err(dev, "Resuming all queues failed");
}
- pdd->last_evict_timestamp = get_jiffies_64();
- retval = execute_queues_cpsch(dqm,
- qpd->is_debug ?
- KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
out:
dqm_unlock(dqm);
@@ -767,8 +1280,8 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
goto out;
}
- pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
- pdd->process->pasid);
+ pr_debug_ratelimited("Restoring process pid %d queues\n",
+ pdd->process->lead_thread->pid);
/* Update PD Base in QPD */
qpd->page_table_base = pd_base;
@@ -776,7 +1289,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
if (!list_empty(&qpd->queues_list)) {
dqm->dev->kfd2kgd->set_vm_context_page_table_base(
- dqm->dev->kgd,
+ dqm->dev->adev,
qpd->vmid,
qpd->page_table_base);
kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
@@ -802,11 +1315,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
q->properties.is_active = true;
- increment_queue_count(dqm, q->properties.type);
- if (q->properties.is_gws) {
- dqm->gws_queue_count++;
- qpd->mapped_gws_queue = true;
- }
+ increment_queue_count(dqm, qpd, q);
if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
continue;
@@ -833,14 +1342,12 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
struct queue *q;
+ struct device *dev = dqm->dev->adev->dev;
struct kfd_process_device *pdd;
- uint64_t pd_base;
uint64_t eviction_duration;
int retval = 0;
pdd = qpd_to_pdd(qpd);
- /* Retrieve PD base */
- pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
dqm_lock(dqm);
if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
@@ -850,12 +1357,19 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
goto out;
}
- pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
- pdd->process->pasid);
+ /* The debugger creates processes that temporarily have not acquired
+ * all VMs for all devices and has no VMs itself.
+ * Skip queue restore on process restore.
+ */
+ if (!pdd->drm_priv)
+ goto vm_not_acquired;
+
+ pr_debug_ratelimited("Restoring process pid %d queues\n",
+ pdd->process->lead_thread->pid);
/* Update PD Base in QPD */
- qpd->page_table_base = pd_base;
- pr_debug("Updated PD address to 0x%llx\n", pd_base);
+ qpd->page_table_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
+ pr_debug("Updated PD address to 0x%llx\n", qpd->page_table_base);
/* activate all active queues on the qpd */
list_for_each_entry(q, &qpd->queues_list, list) {
@@ -864,13 +1378,24 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
continue;
q->properties.is_active = true;
- increment_queue_count(dqm, q->properties.type);
+ increment_queue_count(dqm, &pdd->qpd, q);
+
+ if (dqm->dev->kfd->shared_resources.enable_mes) {
+ retval = add_queue_mes(dqm, q, qpd);
+ if (retval) {
+ dev_err(dev, "Failed to restore queue %d\n",
+ q->properties.queue_id);
+ goto out;
+ }
+ }
}
- retval = execute_queues_cpsch(dqm,
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
- qpd->evicted = 0;
+ if (!dqm->dev->kfd->shared_resources.enable_mes)
+ retval = execute_queues_cpsch(dqm,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD);
eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
atomic64_add(eviction_duration, &pdd->evict_duration_counter);
+vm_not_acquired:
+ qpd->evicted = 0;
out:
dqm_unlock(dqm);
return retval;
@@ -953,17 +1478,32 @@ static int
set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid,
unsigned int vmid)
{
- return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
- dqm->dev->kgd, pasid, vmid);
+ uint32_t xcc_mask = dqm->dev->xcc_mask;
+ int xcc_id, ret;
+
+ for_each_inst(xcc_id, xcc_mask) {
+ ret = dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
+ dqm->dev->adev, pasid, vmid, xcc_id);
+ if (ret)
+ break;
+ }
+
+ return ret;
}
static void init_interrupts(struct device_queue_manager *dqm)
{
- unsigned int i;
+ uint32_t xcc_mask = dqm->dev->xcc_mask;
+ unsigned int i, xcc_id;
- for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
- if (is_pipe_enabled(dqm, 0, i))
- dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i);
+ for_each_inst(xcc_id, xcc_mask) {
+ for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) {
+ if (is_pipe_enabled(dqm, 0, i)) {
+ dqm->dev->kfd2kgd->init_interrupts(
+ dqm->dev->adev, i, xcc_id);
+ }
+ }
+ }
}
static int initialize_nocpsch(struct device_queue_manager *dqm)
@@ -988,14 +1528,13 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
if (test_bit(pipe_offset + queue,
- dqm->dev->shared_resources.cp_queue_bitmap))
+ dqm->dev->kfd->shared_resources.cp_queue_bitmap))
dqm->allocated_queues[pipe] |= 1 << queue;
}
memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid));
- dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
- dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
+ init_sdma_bitmaps(dqm);
return 0;
}
@@ -1014,68 +1553,134 @@ static void uninitialize(struct device_queue_manager *dqm)
static int start_nocpsch(struct device_queue_manager *dqm)
{
+ int r = 0;
+
pr_info("SW scheduler is used");
init_interrupts(dqm);
-
- if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
- return pm_init(&dqm->packet_mgr, dqm);
- dqm->sched_running = true;
- return 0;
+ if (dqm->dev->adev->asic_type == CHIP_HAWAII)
+ r = pm_init(&dqm->packet_mgr, dqm);
+ if (!r)
+ dqm->sched_running = true;
+
+ return r;
}
static int stop_nocpsch(struct device_queue_manager *dqm)
{
- if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
- pm_uninit(&dqm->packet_mgr, false);
+ dqm_lock(dqm);
+ if (!dqm->sched_running) {
+ dqm_unlock(dqm);
+ return 0;
+ }
+
+ if (dqm->dev->adev->asic_type == CHIP_HAWAII)
+ pm_uninit(&dqm->packet_mgr);
dqm->sched_running = false;
+ dqm_unlock(dqm);
return 0;
}
-static void pre_reset(struct device_queue_manager *dqm)
-{
- dqm_lock(dqm);
- dqm->is_resetting = true;
- dqm_unlock(dqm);
-}
-
static int allocate_sdma_queue(struct device_queue_manager *dqm,
- struct queue *q)
+ struct queue *q, const uint32_t *restore_sdma_id)
{
+ struct device *dev = dqm->dev->adev->dev;
int bit;
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
- if (dqm->sdma_bitmap == 0) {
- pr_err("No more SDMA queue to allocate\n");
+ if (bitmap_empty(dqm->sdma_bitmap, get_num_sdma_queues(dqm))) {
+ dev_warn(dev, "No more SDMA queue to allocate (%d total queues)\n",
+ get_num_sdma_queues(dqm));
return -ENOMEM;
}
- bit = __ffs64(dqm->sdma_bitmap);
- dqm->sdma_bitmap &= ~(1ULL << bit);
- q->sdma_id = bit;
- q->properties.sdma_engine_id = q->sdma_id %
- get_num_sdma_engines(dqm);
+ if (restore_sdma_id) {
+ /* Re-use existing sdma_id */
+ if (!test_bit(*restore_sdma_id, dqm->sdma_bitmap)) {
+ dev_err(dev, "SDMA queue already in use\n");
+ return -EBUSY;
+ }
+ clear_bit(*restore_sdma_id, dqm->sdma_bitmap);
+ q->sdma_id = *restore_sdma_id;
+ } else {
+ /* Find first available sdma_id */
+ bit = find_first_bit(dqm->sdma_bitmap,
+ get_num_sdma_queues(dqm));
+ clear_bit(bit, dqm->sdma_bitmap);
+ q->sdma_id = bit;
+ }
+
+ q->properties.sdma_engine_id =
+ q->sdma_id % kfd_get_num_sdma_engines(dqm->dev);
q->properties.sdma_queue_id = q->sdma_id /
- get_num_sdma_engines(dqm);
+ kfd_get_num_sdma_engines(dqm->dev);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
- if (dqm->xgmi_sdma_bitmap == 0) {
- pr_err("No more XGMI SDMA queue to allocate\n");
+ if (bitmap_empty(dqm->xgmi_sdma_bitmap, get_num_xgmi_sdma_queues(dqm))) {
+ dev_warn(dev, "No more XGMI SDMA queue to allocate (%d total queues)\n",
+ get_num_xgmi_sdma_queues(dqm));
return -ENOMEM;
}
- bit = __ffs64(dqm->xgmi_sdma_bitmap);
- dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
- q->sdma_id = bit;
+ if (restore_sdma_id) {
+ /* Re-use existing sdma_id */
+ if (!test_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap)) {
+ dev_err(dev, "SDMA queue already in use\n");
+ return -EBUSY;
+ }
+ clear_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap);
+ q->sdma_id = *restore_sdma_id;
+ } else {
+ bit = find_first_bit(dqm->xgmi_sdma_bitmap,
+ get_num_xgmi_sdma_queues(dqm));
+ clear_bit(bit, dqm->xgmi_sdma_bitmap);
+ q->sdma_id = bit;
+ }
/* sdma_engine_id is sdma id including
* both PCIe-optimized SDMAs and XGMI-
* optimized SDMAs. The calculation below
* assumes the first N engines are always
* PCIe-optimized ones
*/
- q->properties.sdma_engine_id = get_num_sdma_engines(dqm) +
- q->sdma_id % get_num_xgmi_sdma_engines(dqm);
+ q->properties.sdma_engine_id =
+ kfd_get_num_sdma_engines(dqm->dev) +
+ q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev);
q->properties.sdma_queue_id = q->sdma_id /
- get_num_xgmi_sdma_engines(dqm);
+ kfd_get_num_xgmi_sdma_engines(dqm->dev);
+ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) {
+ int i, num_queues, num_engines, eng_offset = 0, start_engine;
+ bool free_bit_found = false, is_xgmi = false;
+
+ if (q->properties.sdma_engine_id < kfd_get_num_sdma_engines(dqm->dev)) {
+ num_queues = get_num_sdma_queues(dqm);
+ num_engines = kfd_get_num_sdma_engines(dqm->dev);
+ q->properties.type = KFD_QUEUE_TYPE_SDMA;
+ } else {
+ num_queues = get_num_xgmi_sdma_queues(dqm);
+ num_engines = kfd_get_num_xgmi_sdma_engines(dqm->dev);
+ eng_offset = kfd_get_num_sdma_engines(dqm->dev);
+ q->properties.type = KFD_QUEUE_TYPE_SDMA_XGMI;
+ is_xgmi = true;
+ }
+
+ /* Scan available bit based on target engine ID. */
+ start_engine = q->properties.sdma_engine_id - eng_offset;
+ for (i = start_engine; i < num_queues; i += num_engines) {
+
+ if (!test_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap))
+ continue;
+
+ clear_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap);
+ q->sdma_id = i;
+ q->properties.sdma_queue_id = q->sdma_id / num_engines;
+ free_bit_found = true;
+ break;
+ }
+
+ if (!free_bit_found) {
+ dev_warn(dev, "No more SDMA queue to allocate for target ID %i (%d total queues)\n",
+ q->properties.sdma_engine_id, num_queues);
+ return -ENOMEM;
+ }
}
pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
@@ -1090,11 +1695,11 @@ static void deallocate_sdma_queue(struct device_queue_manager *dqm,
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
if (q->sdma_id >= get_num_sdma_queues(dqm))
return;
- dqm->sdma_bitmap |= (1ULL << q->sdma_id);
+ set_bit(q->sdma_id, dqm->sdma_bitmap);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
return;
- dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
+ set_bit(q->sdma_id, dqm->xgmi_sdma_bitmap);
}
}
@@ -1106,15 +1711,16 @@ static int set_sched_resources(struct device_queue_manager *dqm)
{
int i, mec;
struct scheduling_resources res;
+ struct device *dev = dqm->dev->adev->dev;
- res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap;
+ res.vmid_mask = dqm->dev->compute_vmid_bitmap;
res.queue_mask = 0;
- for (i = 0; i < KGD_MAX_QUEUES; ++i) {
- mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
- / dqm->dev->shared_resources.num_pipe_per_mec;
+ for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) {
+ mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe)
+ / dqm->dev->kfd->shared_resources.num_pipe_per_mec;
- if (!test_bit(i, dqm->dev->shared_resources.cp_queue_bitmap))
+ if (!test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap))
continue;
/* only acquire queues from the first MEC */
@@ -1126,13 +1732,13 @@ static int set_sched_resources(struct device_queue_manager *dqm)
* definition of res.queue_mask needs updating
*/
if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
- pr_err("Invalid queue enabled by amdgpu: %d\n", i);
+ dev_err(dev, "Invalid queue enabled by amdgpu: %d\n", i);
break;
}
res.queue_mask |= 1ull
<< amdgpu_queue_mask_bit_to_set_resource_bit(
- (struct amdgpu_device *)dqm->dev->kgd, i);
+ dqm->dev->adev, i);
}
res.gws_mask = ~0ull;
res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0;
@@ -1147,9 +1753,6 @@ static int set_sched_resources(struct device_queue_manager *dqm)
static int initialize_cpsch(struct device_queue_manager *dqm)
{
- uint64_t num_sdma_queues;
- uint64_t num_xgmi_sdma_queues;
-
pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
mutex_init(&dqm->lock_hidden);
@@ -1158,39 +1761,89 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
dqm->active_cp_queue_count = 0;
dqm->gws_queue_count = 0;
dqm->active_runlist = false;
+ dqm->trap_debug_vmid = 0;
- num_sdma_queues = get_num_sdma_queues(dqm);
- if (num_sdma_queues >= BITS_PER_TYPE(dqm->sdma_bitmap))
- dqm->sdma_bitmap = ULLONG_MAX;
- else
- dqm->sdma_bitmap = (BIT_ULL(num_sdma_queues) - 1);
+ init_sdma_bitmaps(dqm);
- num_xgmi_sdma_queues = get_num_xgmi_sdma_queues(dqm);
- if (num_xgmi_sdma_queues >= BITS_PER_TYPE(dqm->xgmi_sdma_bitmap))
- dqm->xgmi_sdma_bitmap = ULLONG_MAX;
+ update_dqm_wait_times(dqm);
+ return 0;
+}
+
+/* halt_cpsch:
+ * Unmap queues so the schedule doesn't continue remaining jobs in the queue.
+ * Then set dqm->sched_halt so queues don't map to runlist until unhalt_cpsch
+ * is called.
+ */
+static int halt_cpsch(struct device_queue_manager *dqm)
+{
+ int ret = 0;
+
+ dqm_lock(dqm);
+ if (!dqm->sched_running) {
+ dqm_unlock(dqm);
+ return 0;
+ }
+
+ WARN_ONCE(dqm->sched_halt, "Scheduling is already on halt\n");
+
+ if (!dqm->is_hws_hang) {
+ if (!dqm->dev->kfd->shared_resources.enable_mes)
+ ret = unmap_queues_cpsch(dqm,
+ KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
+ USE_DEFAULT_GRACE_PERIOD, false);
+ else
+ ret = remove_all_kfd_queues_mes(dqm);
+ }
+ dqm->sched_halt = true;
+ dqm_unlock(dqm);
+
+ return ret;
+}
+
+/* unhalt_cpsch
+ * Unset dqm->sched_halt and map queues back to runlist
+ */
+static int unhalt_cpsch(struct device_queue_manager *dqm)
+{
+ int ret = 0;
+
+ dqm_lock(dqm);
+ if (!dqm->sched_running || !dqm->sched_halt) {
+ WARN_ONCE(!dqm->sched_halt, "Scheduling is not on halt.\n");
+ dqm_unlock(dqm);
+ return 0;
+ }
+ dqm->sched_halt = false;
+ if (!dqm->dev->kfd->shared_resources.enable_mes)
+ ret = execute_queues_cpsch(dqm,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES,
+ 0, USE_DEFAULT_GRACE_PERIOD);
else
- dqm->xgmi_sdma_bitmap = (BIT_ULL(num_xgmi_sdma_queues) - 1);
+ ret = add_all_kfd_queues_mes(dqm);
- INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
+ dqm_unlock(dqm);
- return 0;
+ return ret;
}
static int start_cpsch(struct device_queue_manager *dqm)
{
- int retval;
+ struct device *dev = dqm->dev->adev->dev;
+ int retval, num_hw_queue_slots;
retval = 0;
dqm_lock(dqm);
- retval = pm_init(&dqm->packet_mgr, dqm);
- if (retval)
- goto fail_packet_manager_init;
- retval = set_sched_resources(dqm);
- if (retval)
- goto fail_set_sched_resources;
+ if (!dqm->dev->kfd->shared_resources.enable_mes) {
+ retval = pm_init(&dqm->packet_mgr, dqm);
+ if (retval)
+ goto fail_packet_manager_init;
+ retval = set_sched_resources(dqm);
+ if (retval)
+ goto fail_set_sched_resources;
+ }
pr_debug("Allocating fence memory\n");
/* allocate fence memory on the gart */
@@ -1206,16 +1859,37 @@ static int start_cpsch(struct device_queue_manager *dqm)
init_interrupts(dqm);
/* clear hang status when driver try to start the hw scheduler */
- dqm->is_hws_hang = false;
- dqm->is_resetting = false;
dqm->sched_running = true;
- execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+
+ if (!dqm->dev->kfd->shared_resources.enable_mes) {
+ if (pm_config_dequeue_wait_counts(&dqm->packet_mgr,
+ KFD_DEQUEUE_WAIT_INIT, 0 /* unused */))
+ dev_err(dev, "Setting optimized dequeue wait failed. Using default values\n");
+ execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD);
+ }
+
+ /* setup per-queue reset detection buffer */
+ num_hw_queue_slots = dqm->dev->kfd->shared_resources.num_queue_per_pipe *
+ dqm->dev->kfd->shared_resources.num_pipe_per_mec *
+ NUM_XCC(dqm->dev->xcc_mask);
+
+ dqm->detect_hang_info_size = num_hw_queue_slots * sizeof(struct dqm_detect_hang_info);
+ dqm->detect_hang_info = kzalloc(dqm->detect_hang_info_size, GFP_KERNEL);
+
+ if (!dqm->detect_hang_info) {
+ retval = -ENOMEM;
+ goto fail_detect_hang_buffer;
+ }
+
dqm_unlock(dqm);
return 0;
+fail_detect_hang_buffer:
+ kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
fail_allocate_vidmem:
fail_set_sched_resources:
- pm_uninit(&dqm->packet_mgr, false);
+ if (!dqm->dev->kfd->shared_resources.enable_mes)
+ pm_uninit(&dqm->packet_mgr);
fail_packet_manager_init:
dqm_unlock(dqm);
return retval;
@@ -1223,7 +1897,7 @@ fail_packet_manager_init:
static int stop_cpsch(struct device_queue_manager *dqm)
{
- bool hanging;
+ int ret = 0;
dqm_lock(dqm);
if (!dqm->sched_running) {
@@ -1231,18 +1905,25 @@ static int stop_cpsch(struct device_queue_manager *dqm)
return 0;
}
- if (!dqm->is_hws_hang)
- unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
- hanging = dqm->is_hws_hang || dqm->is_resetting;
+ if (!dqm->dev->kfd->shared_resources.enable_mes)
+ ret = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES,
+ 0, USE_DEFAULT_GRACE_PERIOD, false);
+ else
+ ret = remove_all_kfd_queues_mes(dqm);
+
dqm->sched_running = false;
- pm_release_ib(&dqm->packet_mgr);
+ if (!dqm->dev->kfd->shared_resources.enable_mes)
+ pm_release_ib(&dqm->packet_mgr);
kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
- pm_uninit(&dqm->packet_mgr, hanging);
+ if (!dqm->dev->kfd->shared_resources.enable_mes)
+ pm_uninit(&dqm->packet_mgr);
+ kfree(dqm->detect_hang_info);
+ dqm->detect_hang_info = NULL;
dqm_unlock(dqm);
- return 0;
+ return ret;
}
static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
@@ -1266,9 +1947,10 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
dqm->total_queue_count);
list_add(&kq->list, &qpd->priv_queue_list);
- increment_queue_count(dqm, kq->queue->properties.type);
+ increment_queue_count(dqm, qpd, kq->queue);
qpd->is_debug = true;
- execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+ execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
+ USE_DEFAULT_GRACE_PERIOD);
dqm_unlock(dqm);
return 0;
@@ -1280,9 +1962,10 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
{
dqm_lock(dqm);
list_del(&kq->list);
- decrement_queue_count(dqm, kq->queue->properties.type);
+ decrement_queue_count(dqm, qpd, kq->queue);
qpd->is_debug = false;
- execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
+ execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
+ USE_DEFAULT_GRACE_PERIOD);
/*
* Unconditionally decrement this counter, regardless of the queue's
* type.
@@ -1294,7 +1977,9 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
}
static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
- struct qcm_process_device *qpd)
+ struct qcm_process_device *qpd,
+ const struct kfd_criu_queue_priv_data *qd,
+ const void *restore_mqd, const void *restore_ctl_stack)
{
int retval;
struct mqd_manager *mqd_mgr;
@@ -1307,15 +1992,16 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
}
if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
- q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI ||
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) {
dqm_lock(dqm);
- retval = allocate_sdma_queue(dqm, q);
+ retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
dqm_unlock(dqm);
if (retval)
goto out;
}
- retval = allocate_doorbell(qpd, q);
+ retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL);
if (retval)
goto out_deallocate_sdma_queue;
@@ -1340,17 +2026,30 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
* updates the is_evicted flag but is a no-op otherwise.
*/
q->properties.is_evicted = !!qpd->evicted;
- mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
- &q->gart_mqd_addr, &q->properties);
+ q->properties.is_dbg_wa = qpd->pqm->process->debug_trap_enabled &&
+ kfd_dbg_has_cwsr_workaround(q->device);
+
+ if (qd)
+ mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
+ &q->properties, restore_mqd, restore_ctl_stack,
+ qd->ctl_stack_size);
+ else
+ mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
+ &q->gart_mqd_addr, &q->properties);
list_add(&q->list, &qpd->queues_list);
qpd->queue_count++;
if (q->properties.is_active) {
- increment_queue_count(dqm, q->properties.type);
+ increment_queue_count(dqm, qpd, q);
- execute_queues_cpsch(dqm,
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+ if (!dqm->dev->kfd->shared_resources.enable_mes)
+ retval = execute_queues_cpsch(dqm,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD);
+ else
+ retval = add_queue_mes(dqm, q, qpd);
+ if (retval)
+ goto cleanup_queue;
}
/*
@@ -1365,6 +2064,13 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
dqm_unlock(dqm);
return retval;
+cleanup_queue:
+ qpd->queue_count--;
+ list_del(&q->list);
+ if (q->properties.is_active)
+ decrement_queue_count(dqm, qpd, q);
+ mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
+ dqm_unlock(dqm);
out_deallocate_doorbell:
deallocate_doorbell(qpd, q);
out_deallocate_sdma_queue:
@@ -1378,15 +2084,22 @@ out:
return retval;
}
-int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
- uint64_t fence_value,
- unsigned int timeout_ms)
+int amdkfd_fence_wait_timeout(struct device_queue_manager *dqm,
+ uint64_t fence_value,
+ unsigned int timeout_ms)
{
unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
+ struct device *dev = dqm->dev->adev->dev;
+ uint64_t *fence_addr = dqm->fence_addr;
while (*fence_addr != fence_value) {
+ /* Fatal err detected, this response won't come */
+ if (amdgpu_amdkfd_is_fed(dqm->dev->adev) ||
+ amdgpu_in_reset(dqm->dev->adev))
+ return -EIO;
+
if (time_after(jiffies, end_jiffies)) {
- pr_err("qcm fence wait loop timeout expired\n");
+ dev_err(dev, "qcm fence wait loop timeout expired\n");
/* In HWS case, this is used to halt the driver thread
* in order not to mess up CP states before doing
* scandumps for FW debugging.
@@ -1405,9 +2118,10 @@ int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
/* dqm->lock mutex has to be locked before calling this function */
static int map_queues_cpsch(struct device_queue_manager *dqm)
{
+ struct device *dev = dqm->dev->adev->dev;
int retval;
- if (!dqm->sched_running)
+ if (!dqm->sched_running || dqm->sched_halt)
return 0;
if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0)
return 0;
@@ -1417,7 +2131,7 @@ static int map_queues_cpsch(struct device_queue_manager *dqm)
retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues);
pr_debug("%s sent runlist\n", __func__);
if (retval) {
- pr_err("failed to execute runlist\n");
+ dev_err(dev, "failed to execute runlist\n");
return retval;
}
dqm->active_runlist = true;
@@ -1425,42 +2139,288 @@ static int map_queues_cpsch(struct device_queue_manager *dqm)
return retval;
}
-/* dqm->lock mutex has to be locked before calling this function */
+static void set_queue_as_reset(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd)
+{
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
+
+ dev_err(dqm->dev->adev->dev, "queue id 0x%0x at pasid %d is reset\n",
+ q->properties.queue_id, pdd->process->lead_thread->pid);
+
+ pdd->has_reset_queue = true;
+ if (q->properties.is_active) {
+ q->properties.is_active = false;
+ decrement_queue_count(dqm, qpd, q);
+ }
+}
+
+static int detect_queue_hang(struct device_queue_manager *dqm)
+{
+ int i;
+
+ /* detect should be used only in dqm locked queue reset */
+ if (WARN_ON(dqm->detect_hang_count > 0))
+ return 0;
+
+ memset(dqm->detect_hang_info, 0, dqm->detect_hang_info_size);
+
+ for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) {
+ uint32_t mec, pipe, queue;
+ int xcc_id;
+
+ mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe)
+ / dqm->dev->kfd->shared_resources.num_pipe_per_mec;
+
+ if (mec || !test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap))
+ continue;
+
+ amdgpu_queue_mask_bit_to_mec_queue(dqm->dev->adev, i, &mec, &pipe, &queue);
+
+ for_each_inst(xcc_id, dqm->dev->xcc_mask) {
+ uint64_t queue_addr = dqm->dev->kfd2kgd->hqd_get_pq_addr(
+ dqm->dev->adev, pipe, queue, xcc_id);
+ struct dqm_detect_hang_info hang_info;
+
+ if (!queue_addr)
+ continue;
+
+ hang_info.pipe_id = pipe;
+ hang_info.queue_id = queue;
+ hang_info.xcc_id = xcc_id;
+ hang_info.queue_address = queue_addr;
+
+ dqm->detect_hang_info[dqm->detect_hang_count] = hang_info;
+ dqm->detect_hang_count++;
+ }
+ }
+
+ return dqm->detect_hang_count;
+}
+
+static struct queue *find_queue_by_address(struct device_queue_manager *dqm, uint64_t queue_address)
+{
+ struct device_process_node *cur;
+ struct qcm_process_device *qpd;
+ struct queue *q;
+
+ list_for_each_entry(cur, &dqm->queues, list) {
+ qpd = cur->qpd;
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if (queue_address == q->properties.queue_address)
+ return q;
+ }
+ }
+
+ return NULL;
+}
+
+static int reset_hung_queues(struct device_queue_manager *dqm)
+{
+ int r = 0, reset_count = 0, i;
+
+ if (!dqm->detect_hang_info || dqm->is_hws_hang)
+ return -EIO;
+
+ /* assume dqm locked. */
+ if (!detect_queue_hang(dqm))
+ return -ENOTRECOVERABLE;
+
+ for (i = 0; i < dqm->detect_hang_count; i++) {
+ struct dqm_detect_hang_info hang_info = dqm->detect_hang_info[i];
+ struct queue *q = find_queue_by_address(dqm, hang_info.queue_address);
+ struct kfd_process_device *pdd;
+ uint64_t queue_addr = 0;
+
+ if (!q) {
+ r = -ENOTRECOVERABLE;
+ goto reset_fail;
+ }
+
+ pdd = kfd_get_process_device_data(dqm->dev, q->process);
+ if (!pdd) {
+ r = -ENOTRECOVERABLE;
+ goto reset_fail;
+ }
+
+ queue_addr = dqm->dev->kfd2kgd->hqd_reset(dqm->dev->adev,
+ hang_info.pipe_id, hang_info.queue_id, hang_info.xcc_id,
+ KFD_UNMAP_LATENCY_MS);
+
+ /* either reset failed or we reset an unexpected queue. */
+ if (queue_addr != q->properties.queue_address) {
+ r = -ENOTRECOVERABLE;
+ goto reset_fail;
+ }
+
+ set_queue_as_reset(dqm, q, &pdd->qpd);
+ reset_count++;
+ }
+
+ if (reset_count == dqm->detect_hang_count)
+ kfd_signal_reset_event(dqm->dev);
+ else
+ r = -ENOTRECOVERABLE;
+
+reset_fail:
+ dqm->detect_hang_count = 0;
+
+ return r;
+}
+
+static bool sdma_has_hang(struct device_queue_manager *dqm)
+{
+ int engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm);
+ int engine_end = engine_start + get_num_all_sdma_engines(dqm);
+ int num_queues_per_eng = dqm->dev->kfd->device_info.num_sdma_queues_per_engine;
+ int i, j;
+
+ for (i = engine_start; i < engine_end; i++) {
+ for (j = 0; j < num_queues_per_eng; j++) {
+ if (!dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j))
+ continue;
+
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool set_sdma_queue_as_reset(struct device_queue_manager *dqm,
+ uint32_t doorbell_off)
+{
+ struct device_process_node *cur;
+ struct qcm_process_device *qpd;
+ struct queue *q;
+
+ list_for_each_entry(cur, &dqm->queues, list) {
+ qpd = cur->qpd;
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if ((q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) &&
+ q->properties.doorbell_off == doorbell_off) {
+ set_queue_as_reset(dqm, q, qpd);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+static int reset_hung_queues_sdma(struct device_queue_manager *dqm)
+{
+ int engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm);
+ int engine_end = engine_start + get_num_all_sdma_engines(dqm);
+ int num_queues_per_eng = dqm->dev->kfd->device_info.num_sdma_queues_per_engine;
+ int r = 0, i, j;
+
+ if (dqm->is_hws_hang)
+ return -EIO;
+
+ /* Scan for hung HW queues and reset engine. */
+ dqm->detect_hang_count = 0;
+ for (i = engine_start; i < engine_end; i++) {
+ for (j = 0; j < num_queues_per_eng; j++) {
+ uint32_t doorbell_off =
+ dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j);
+
+ if (!doorbell_off)
+ continue;
+
+ /* Reset engine and check. */
+ if (amdgpu_sdma_reset_engine(dqm->dev->adev, i, false) ||
+ dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j) ||
+ !set_sdma_queue_as_reset(dqm, doorbell_off)) {
+ r = -ENOTRECOVERABLE;
+ goto reset_fail;
+ }
+
+ /* Should only expect one queue active per engine */
+ dqm->detect_hang_count++;
+ break;
+ }
+ }
+
+ /* Signal process reset */
+ if (dqm->detect_hang_count)
+ kfd_signal_reset_event(dqm->dev);
+ else
+ r = -ENOTRECOVERABLE;
+
+reset_fail:
+ dqm->detect_hang_count = 0;
+
+ return r;
+}
+
+static int reset_queues_on_hws_hang(struct device_queue_manager *dqm, bool is_sdma)
+{
+ struct amdgpu_device *adev = dqm->dev->adev;
+
+ while (halt_if_hws_hang)
+ schedule();
+
+ if (adev->debug_disable_gpu_ring_reset) {
+ dev_info_once(adev->dev,
+ "%s queue hung, but ring reset disabled",
+ is_sdma ? "sdma" : "compute");
+
+ return -EPERM;
+ }
+ if (!amdgpu_gpu_recovery)
+ return -ENOTRECOVERABLE;
+
+ return is_sdma ? reset_hung_queues_sdma(dqm) : reset_hung_queues(dqm);
+}
+
+/* dqm->lock mutex has to be locked before calling this function
+ *
+ * @grace_period: If USE_DEFAULT_GRACE_PERIOD then default wait time
+ * for context switch latency. Lower values are used by debugger
+ * since context switching are triggered at high frequency.
+ * This is configured by setting CP_IQ_WAIT_TIME2.SCH_WAVE
+ *
+ */
static int unmap_queues_cpsch(struct device_queue_manager *dqm,
enum kfd_unmap_queues_filter filter,
- uint32_t filter_param)
+ uint32_t filter_param,
+ uint32_t grace_period,
+ bool reset)
{
- int retval = 0;
+ struct device *dev = dqm->dev->adev->dev;
struct mqd_manager *mqd_mgr;
+ int retval;
if (!dqm->sched_running)
return 0;
- if (dqm->is_hws_hang || dqm->is_resetting)
- return -EIO;
if (!dqm->active_runlist)
- return retval;
+ return 0;
+ if (!down_read_trylock(&dqm->dev->adev->reset_domain->sem))
+ return -EIO;
+
+ if (grace_period != USE_DEFAULT_GRACE_PERIOD) {
+ retval = pm_config_dequeue_wait_counts(&dqm->packet_mgr,
+ KFD_DEQUEUE_WAIT_SET_SCH_WAVE, grace_period);
+ if (retval)
+ goto out;
+ }
- retval = pm_send_unmap_queue(&dqm->packet_mgr, KFD_QUEUE_TYPE_COMPUTE,
- filter, filter_param, false, 0);
+ retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset);
if (retval)
- return retval;
+ goto out;
*dqm->fence_addr = KFD_FENCE_INIT;
+ mb();
pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr,
KFD_FENCE_COMPLETED);
/* should be timed out */
- retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
- queue_preemption_timeout_ms);
+ retval = amdkfd_fence_wait_timeout(dqm, KFD_FENCE_COMPLETED,
+ queue_preemption_timeout_ms);
if (retval) {
- pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
- dqm->is_hws_hang = true;
- /* It's possible we're detecting a HWS hang in the
- * middle of a GPU reset. No need to schedule another
- * reset in this case.
- */
- if (!dqm->is_resetting)
- schedule_work(&dqm->hw_exception_work);
- return retval;
+ dev_err(dev, "The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
+ kfd_hws_hang(dqm);
+ goto out;
}
/* In the current MEC firmware implementation, if compute queue
@@ -1472,33 +2432,91 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
* check those fields
*/
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
- if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) {
- pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n");
- while (halt_if_hws_hang)
- schedule();
- return -ETIME;
+ if (mqd_mgr->check_preemption_failed(mqd_mgr, dqm->packet_mgr.priv_queue->queue->mqd) &&
+ reset_queues_on_hws_hang(dqm, false))
+ goto reset_fail;
+
+ /* Check for SDMA hang and attempt SDMA reset */
+ if (sdma_has_hang(dqm) && reset_queues_on_hws_hang(dqm, true))
+ goto reset_fail;
+
+ /* We need to reset the grace period value for this device */
+ if (grace_period != USE_DEFAULT_GRACE_PERIOD) {
+ if (pm_config_dequeue_wait_counts(&dqm->packet_mgr,
+ KFD_DEQUEUE_WAIT_RESET, 0 /* unused */))
+ dev_err(dev, "Failed to reset grace period\n");
}
pm_release_ib(&dqm->packet_mgr);
dqm->active_runlist = false;
+out:
+ up_read(&dqm->dev->adev->reset_domain->sem);
+ return retval;
+reset_fail:
+ dqm->is_hws_hang = true;
+ kfd_hws_hang(dqm);
+ up_read(&dqm->dev->adev->reset_domain->sem);
+ return -ETIME;
+}
+
+/* only for compute queue */
+static int reset_queues_cpsch(struct device_queue_manager *dqm, uint16_t pasid)
+{
+ int retval;
+
+ dqm_lock(dqm);
+
+ retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID,
+ pasid, USE_DEFAULT_GRACE_PERIOD, true);
+
+ dqm_unlock(dqm);
return retval;
}
/* dqm->lock mutex has to be locked before calling this function */
static int execute_queues_cpsch(struct device_queue_manager *dqm,
enum kfd_unmap_queues_filter filter,
- uint32_t filter_param)
+ uint32_t filter_param,
+ uint32_t grace_period)
{
int retval;
- if (dqm->is_hws_hang)
+ if (!down_read_trylock(&dqm->dev->adev->reset_domain->sem))
return -EIO;
- retval = unmap_queues_cpsch(dqm, filter, filter_param);
- if (retval)
- return retval;
+ retval = unmap_queues_cpsch(dqm, filter, filter_param, grace_period, false);
+ if (!retval)
+ retval = map_queues_cpsch(dqm);
+ up_read(&dqm->dev->adev->reset_domain->sem);
+ return retval;
+}
+
+static int wait_on_destroy_queue(struct device_queue_manager *dqm,
+ struct queue *q)
+{
+ struct kfd_process_device *pdd = kfd_get_process_device_data(q->device,
+ q->process);
+ int ret = 0;
+
+ if (WARN_ON(!pdd))
+ return ret;
+
+ if (pdd->qpd.is_debug)
+ return ret;
- return map_queues_cpsch(dqm);
+ q->properties.is_being_destroyed = true;
+
+ if (pdd->process->debug_trap_enabled && q->properties.is_suspended) {
+ dqm_unlock(dqm);
+ mutex_unlock(&q->process->mutex);
+ ret = wait_event_interruptible(dqm->destroy_wait,
+ !q->properties.is_suspended);
+
+ mutex_lock(&q->process->mutex);
+ dqm_lock(dqm);
+ }
+
+ return ret;
}
static int destroy_queue_cpsch(struct device_queue_manager *dqm,
@@ -1509,6 +2527,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
struct mqd_manager *mqd_mgr;
uint64_t sdma_val = 0;
struct kfd_process_device *pdd = qpd_to_pdd(qpd);
+ struct device *dev = dqm->dev->adev->dev;
/* Get the SDMA queue stats */
if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
@@ -1516,15 +2535,20 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
&sdma_val);
if (retval)
- pr_err("Failed to read SDMA queue counter for queue: %d\n",
+ dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n",
q->properties.queue_id);
}
- retval = 0;
-
/* remove queue from list to prevent rescheduling after preemption */
dqm_lock(dqm);
+ retval = wait_on_destroy_queue(dqm, q);
+
+ if (retval) {
+ dqm_unlock(dqm);
+ return retval;
+ }
+
if (qpd->is_debug) {
/*
* error, currently we do not allow to destroy a queue
@@ -1546,19 +2570,21 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
pdd->sdma_past_activity_counter += sdma_val;
}
- list_del(&q->list);
- qpd->queue_count--;
if (q->properties.is_active) {
- decrement_queue_count(dqm, q->properties.type);
- retval = execute_queues_cpsch(dqm,
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
- if (retval == -ETIME)
- qpd->reset_wavefronts = true;
- if (q->properties.is_gws) {
- dqm->gws_queue_count--;
- qpd->mapped_gws_queue = false;
+ decrement_queue_count(dqm, qpd, q);
+ q->properties.is_active = false;
+ if (!dqm->dev->kfd->shared_resources.enable_mes) {
+ retval = execute_queues_cpsch(dqm,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
+ USE_DEFAULT_GRACE_PERIOD);
+ if (retval == -ETIME)
+ qpd->reset_wavefronts = true;
+ } else {
+ retval = remove_queue_mes(dqm, q, qpd);
}
}
+ list_del(&q->list);
+ qpd->queue_count--;
/*
* Unconditionally decrement this counter, regardless of the queue's
@@ -1570,7 +2596,14 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
dqm_unlock(dqm);
- /* Do free_mqd after dqm_unlock(dqm) to avoid circular locking */
+ /*
+ * Do free_mqd and raise delete event after dqm_unlock(dqm) to avoid
+ * circular locking
+ */
+ kfd_dbg_ev_raise(KFD_EC_MASK(EC_DEVICE_QUEUE_DELETE),
+ qpd->pqm->process, q->device,
+ -1, false, NULL, 0);
+
mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
return retval;
@@ -1581,20 +2614,13 @@ failed_try_destroy_debugged_queue:
return retval;
}
-/*
- * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
- * stay in user mode.
- */
-#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
-/* APE1 limit is inclusive and 64K aligned. */
-#define APE1_LIMIT_ALIGNMENT 0xFFFF
-
static bool set_cache_memory_policy(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size)
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
{
bool retval = true;
@@ -1603,41 +2629,17 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
dqm_lock(dqm);
- if (alternate_aperture_size == 0) {
- /* base > limit disables APE1 */
- qpd->sh_mem_ape1_base = 1;
- qpd->sh_mem_ape1_limit = 0;
- } else {
- /*
- * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
- * SH_MEM_APE1_BASE[31:0], 0x0000 }
- * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
- * SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
- * Verify that the base and size parameters can be
- * represented in this format and convert them.
- * Additionally restrict APE1 to user-mode addresses.
- */
-
- uint64_t base = (uintptr_t)alternate_aperture_base;
- uint64_t limit = base + alternate_aperture_size - 1;
-
- if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
- (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
- retval = false;
- goto out;
- }
-
- qpd->sh_mem_ape1_base = base >> 16;
- qpd->sh_mem_ape1_limit = limit >> 16;
- }
-
retval = dqm->asic_ops.set_cache_memory_policy(
dqm,
qpd,
default_policy,
alternate_policy,
alternate_aperture_base,
- alternate_aperture_size);
+ alternate_aperture_size,
+ misc_process_properties);
+
+ if (retval)
+ goto out;
if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
program_sh_mem_settings(dqm, qpd);
@@ -1712,7 +2714,7 @@ static int get_wave_state(struct device_queue_manager *dqm,
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE ||
- q->properties.is_active || !q->device->cwsr_enabled ||
+ q->properties.is_active || !q->device->kfd->cwsr_enabled ||
!mqd_mgr->get_wave_state) {
dqm_unlock(dqm);
return -EINVAL;
@@ -1725,8 +2727,58 @@ static int get_wave_state(struct device_queue_manager *dqm,
* and the queue should be protected against destruction by the process
* lock.
*/
- return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack,
- ctl_stack_used_size, save_area_used_size);
+ return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, &q->properties,
+ ctl_stack, ctl_stack_used_size, save_area_used_size);
+}
+
+static void get_queue_checkpoint_info(struct device_queue_manager *dqm,
+ const struct queue *q,
+ u32 *mqd_size,
+ u32 *ctl_stack_size)
+{
+ struct mqd_manager *mqd_mgr;
+ enum KFD_MQD_TYPE mqd_type =
+ get_mqd_type_from_queue_type(q->properties.type);
+
+ dqm_lock(dqm);
+ mqd_mgr = dqm->mqd_mgrs[mqd_type];
+ *mqd_size = mqd_mgr->mqd_size * NUM_XCC(mqd_mgr->dev->xcc_mask);
+ *ctl_stack_size = 0;
+
+ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info)
+ mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size);
+
+ dqm_unlock(dqm);
+}
+
+static int checkpoint_mqd(struct device_queue_manager *dqm,
+ const struct queue *q,
+ void *mqd,
+ void *ctl_stack)
+{
+ struct mqd_manager *mqd_mgr;
+ int r = 0;
+ enum KFD_MQD_TYPE mqd_type =
+ get_mqd_type_from_queue_type(q->properties.type);
+
+ dqm_lock(dqm);
+
+ if (q->properties.is_active || !q->device->kfd->cwsr_enabled) {
+ r = -EINVAL;
+ goto dqm_unlock;
+ }
+
+ mqd_mgr = dqm->mqd_mgrs[mqd_type];
+ if (!mqd_mgr->checkpoint_mqd) {
+ r = -EOPNOTSUPP;
+ goto dqm_unlock;
+ }
+
+ mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack);
+
+dqm_unlock:
+ dqm_unlock(dqm);
+ return r;
}
static int process_termination_cpsch(struct device_queue_manager *dqm,
@@ -1734,6 +2786,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
{
int retval;
struct queue *q;
+ struct device *dev = dqm->dev->adev->dev;
struct kernel_queue *kq, *kq_next;
struct mqd_manager *mqd_mgr;
struct device_process_node *cur, *next_dpn;
@@ -1748,7 +2801,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
/* Clean all kernel queues */
list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
list_del(&kq->list);
- decrement_queue_count(dqm, kq->queue->properties.type);
+ decrement_queue_count(dqm, qpd, kq->queue);
qpd->is_debug = false;
dqm->total_queue_count--;
filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
@@ -1762,10 +2815,13 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
deallocate_sdma_queue(dqm, q);
if (q->properties.is_active) {
- decrement_queue_count(dqm, q->properties.type);
- if (q->properties.is_gws) {
- dqm->gws_queue_count--;
- qpd->mapped_gws_queue = false;
+ decrement_queue_count(dqm, qpd, q);
+
+ if (dqm->dev->kfd->shared_resources.enable_mes) {
+ retval = remove_queue_mes(dqm, q, qpd);
+ if (retval)
+ dev_err(dev, "Failed to remove queue %d\n",
+ q->properties.queue_id);
}
}
@@ -1783,11 +2839,15 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
}
}
- retval = execute_queues_cpsch(dqm, filter, 0);
- if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
+ if (!dqm->dev->kfd->shared_resources.enable_mes)
+ retval = execute_queues_cpsch(dqm, filter, 0, USE_DEFAULT_GRACE_PERIOD);
+
+ if ((retval || qpd->reset_wavefronts) &&
+ down_read_trylock(&dqm->dev->adev->reset_domain->sem)) {
pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
qpd->reset_wavefronts = false;
+ up_read(&dqm->dev->adev->reset_domain->sem);
}
/* Lastly, free mqd resources.
@@ -1817,12 +2877,13 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
static int init_mqd_managers(struct device_queue_manager *dqm)
{
int i, j;
+ struct device *dev = dqm->dev->adev->dev;
struct mqd_manager *mqd_mgr;
for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
if (!mqd_mgr) {
- pr_err("mqd manager [%d] initialization failed\n", i);
+ dev_err(dev, "mqd manager [%d] initialization failed\n", i);
goto out_free;
}
dqm->mqd_mgrs[i] = mqd_mgr;
@@ -1843,21 +2904,22 @@ out_free:
static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
{
int retval;
- struct kfd_dev *dev = dqm->dev;
+ struct kfd_node *dev = dqm->dev;
struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
get_num_all_sdma_engines(dqm) *
- dev->device_info->num_sdma_queues_per_engine +
- dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
+ dev->kfd->device_info.num_sdma_queues_per_engine +
+ (dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size *
+ NUM_XCC(dqm->dev->xcc_mask));
- retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size,
+ retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,
&(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
(void *)&(mem_obj->cpu_ptr), false);
return retval;
}
-struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
+struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
{
struct device_queue_manager *dqm;
@@ -1867,7 +2929,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
if (!dqm)
return NULL;
- switch (dev->device_info->asic_family) {
+ switch (dev->adev->asic_type) {
/* HWS is not available on Hawaii. */
case CHIP_HAWAII:
/* HWS depends on CWSR for timely dequeue. CWSR is not
@@ -1892,7 +2954,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.initialize = initialize_cpsch;
dqm->ops.start = start_cpsch;
dqm->ops.stop = stop_cpsch;
- dqm->ops.pre_reset = pre_reset;
+ dqm->ops.halt = halt_cpsch;
+ dqm->ops.unhalt = unhalt_cpsch;
dqm->ops.destroy_queue = destroy_queue_cpsch;
dqm->ops.update_queue = update_queue;
dqm->ops.register_process = register_process;
@@ -1905,12 +2968,14 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.evict_process_queues = evict_process_queues_cpsch;
dqm->ops.restore_process_queues = restore_process_queues_cpsch;
dqm->ops.get_wave_state = get_wave_state;
+ dqm->ops.reset_queues = reset_queues_cpsch;
+ dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
+ dqm->ops.checkpoint_mqd = checkpoint_mqd;
break;
case KFD_SCHED_POLICY_NO_HWS:
/* initialize dqm for no cp scheduling */
dqm->ops.start = start_nocpsch;
dqm->ops.stop = stop_nocpsch;
- dqm->ops.pre_reset = pre_reset;
dqm->ops.create_queue = create_queue_nocpsch;
dqm->ops.destroy_queue = destroy_queue_nocpsch;
dqm->ops.update_queue = update_queue;
@@ -1924,116 +2989,662 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.restore_process_queues =
restore_process_queues_nocpsch;
dqm->ops.get_wave_state = get_wave_state;
+ dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
+ dqm->ops.checkpoint_mqd = checkpoint_mqd;
break;
default:
- pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
+ dev_err(dev->adev->dev, "Invalid scheduling policy %d\n", dqm->sched_policy);
goto out_free;
}
- switch (dev->device_info->asic_family) {
- case CHIP_CARRIZO:
- device_queue_manager_init_vi(&dqm->asic_ops);
- break;
-
+ switch (dev->adev->asic_type) {
case CHIP_KAVERI:
- device_queue_manager_init_cik(&dqm->asic_ops);
- break;
-
case CHIP_HAWAII:
- device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
+ device_queue_manager_init_cik(&dqm->asic_ops);
break;
+ case CHIP_CARRIZO:
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
- device_queue_manager_init_vi_tonga(&dqm->asic_ops);
+ device_queue_manager_init_vi(&dqm->asic_ops);
break;
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_RAVEN:
- case CHIP_RENOIR:
- case CHIP_ARCTURUS:
- case CHIP_ALDEBARAN:
- device_queue_manager_init_v9(&dqm->asic_ops);
- break;
- case CHIP_NAVI10:
- case CHIP_NAVI12:
- case CHIP_NAVI14:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
- case CHIP_BEIGE_GOBY:
- case CHIP_YELLOW_CARP:
- case CHIP_CYAN_SKILLFISH:
- device_queue_manager_init_v10_navi10(&dqm->asic_ops);
- break;
default:
- WARN(1, "Unexpected ASIC family %u",
- dev->device_info->asic_family);
- goto out_free;
+ if (KFD_GC_VERSION(dev) >= IP_VERSION(12, 0, 0))
+ device_queue_manager_init_v12(&dqm->asic_ops);
+ else if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0))
+ device_queue_manager_init_v11(&dqm->asic_ops);
+ else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
+ device_queue_manager_init_v10(&dqm->asic_ops);
+ else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1))
+ device_queue_manager_init_v9(&dqm->asic_ops);
+ else {
+ WARN(1, "Unexpected ASIC family %u",
+ dev->adev->asic_type);
+ goto out_free;
+ }
}
if (init_mqd_managers(dqm))
goto out_free;
- if (allocate_hiq_sdma_mqd(dqm)) {
- pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
+ if (!dev->kfd->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) {
+ dev_err(dev->adev->dev, "Failed to allocate hiq sdma mqd trunk buffer\n");
goto out_free;
}
- if (!dqm->ops.initialize(dqm))
+ if (!dqm->ops.initialize(dqm)) {
+ init_waitqueue_head(&dqm->destroy_wait);
return dqm;
+ }
out_free:
kfree(dqm);
return NULL;
}
-static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev,
+static void deallocate_hiq_sdma_mqd(struct kfd_node *dev,
struct kfd_mem_obj *mqd)
{
WARN(!mqd, "No hiq sdma mqd trunk to free");
- amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem);
+ amdgpu_amdkfd_free_gtt_mem(dev->adev, &mqd->gtt_mem);
}
void device_queue_manager_uninit(struct device_queue_manager *dqm)
{
+ dqm->ops.stop(dqm);
dqm->ops.uninitialize(dqm);
- deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
+ if (!dqm->dev->kfd->shared_resources.enable_mes)
+ deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
kfree(dqm);
}
-int kfd_process_vm_fault(struct device_queue_manager *dqm, u32 pasid)
+int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbell_id)
{
- struct kfd_process_device *pdd;
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_process_device *pdd = NULL;
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, &pdd);
+ struct device_queue_manager *dqm = knode->dqm;
+ struct device *dev = dqm->dev->adev->dev;
+ struct qcm_process_device *qpd;
+ struct queue *q = NULL;
int ret = 0;
- if (!p)
+ if (!pdd)
return -EINVAL;
- WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
- pdd = kfd_get_process_device_data(dqm->dev, p);
- if (pdd)
- ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
- kfd_unref_process(p);
+ dqm_lock(dqm);
+
+ if (pdd) {
+ qpd = &pdd->qpd;
+
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if (q->doorbell_id == doorbell_id && q->properties.is_active) {
+ ret = suspend_all_queues_mes(dqm);
+ if (ret) {
+ dev_err(dev, "Suspending all queues failed");
+ goto out;
+ }
+
+ q->properties.is_evicted = true;
+ q->properties.is_active = false;
+ decrement_queue_count(dqm, qpd, q);
+
+ ret = remove_queue_mes(dqm, q, qpd);
+ if (ret) {
+ dev_err(dev, "Removing bad queue failed");
+ goto out;
+ }
+
+ ret = resume_all_queues_mes(dqm);
+ if (ret)
+ dev_err(dev, "Resuming all queues failed");
+
+ break;
+ }
+ }
+ }
+
+out:
+ dqm_unlock(dqm);
+ kfd_unref_process(p);
return ret;
}
-static void kfd_process_hw_exception(struct work_struct *work)
+int kfd_evict_process_device(struct kfd_process_device *pdd)
{
- struct device_queue_manager *dqm = container_of(work,
- struct device_queue_manager, hw_exception_work);
- amdgpu_amdkfd_gpu_reset(dqm->dev->kgd);
+ struct device_queue_manager *dqm;
+ struct kfd_process *p;
+
+ p = pdd->process;
+ dqm = pdd->dev->dqm;
+
+ WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
+
+ return dqm->ops.evict_process_queues(dqm, &pdd->qpd);
}
+int reserve_debug_trap_vmid(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
+ int r;
+ struct device *dev = dqm->dev->adev->dev;
+ int updated_vmid_mask;
+
+ if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
+ dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy);
+ return -EINVAL;
+ }
+
+ dqm_lock(dqm);
+
+ if (dqm->trap_debug_vmid != 0) {
+ dev_err(dev, "Trap debug id already reserved\n");
+ r = -EBUSY;
+ goto out_unlock;
+ }
+
+ r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
+ USE_DEFAULT_GRACE_PERIOD, false);
+ if (r)
+ goto out_unlock;
+
+ updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap;
+ updated_vmid_mask &= ~(1 << dqm->dev->vm_info.last_vmid_kfd);
+
+ dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask;
+ dqm->trap_debug_vmid = dqm->dev->vm_info.last_vmid_kfd;
+ r = set_sched_resources(dqm);
+ if (r)
+ goto out_unlock;
+
+ r = map_queues_cpsch(dqm);
+ if (r)
+ goto out_unlock;
+
+ pr_debug("Reserved VMID for trap debug: %i\n", dqm->trap_debug_vmid);
+
+out_unlock:
+ dqm_unlock(dqm);
+ return r;
+}
+
+/*
+ * Releases vmid for the trap debugger
+ */
+int release_debug_trap_vmid(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
+ struct device *dev = dqm->dev->adev->dev;
+ int r;
+ int updated_vmid_mask;
+ uint32_t trap_debug_vmid;
+
+ if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
+ dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy);
+ return -EINVAL;
+ }
+
+ dqm_lock(dqm);
+ trap_debug_vmid = dqm->trap_debug_vmid;
+ if (dqm->trap_debug_vmid == 0) {
+ dev_err(dev, "Trap debug id is not reserved\n");
+ r = -EINVAL;
+ goto out_unlock;
+ }
+
+ r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
+ USE_DEFAULT_GRACE_PERIOD, false);
+ if (r)
+ goto out_unlock;
+
+ updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap;
+ updated_vmid_mask |= (1 << dqm->dev->vm_info.last_vmid_kfd);
+
+ dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask;
+ dqm->trap_debug_vmid = 0;
+ r = set_sched_resources(dqm);
+ if (r)
+ goto out_unlock;
+
+ r = map_queues_cpsch(dqm);
+ if (r)
+ goto out_unlock;
+
+ pr_debug("Released VMID for trap debug: %i\n", trap_debug_vmid);
+
+out_unlock:
+ dqm_unlock(dqm);
+ return r;
+}
+
+#define QUEUE_NOT_FOUND -1
+/* invalidate queue operation in array */
+static void q_array_invalidate(uint32_t num_queues, uint32_t *queue_ids)
+{
+ int i;
+
+ for (i = 0; i < num_queues; i++)
+ queue_ids[i] |= KFD_DBG_QUEUE_INVALID_MASK;
+}
+
+/* find queue index in array */
+static int q_array_get_index(unsigned int queue_id,
+ uint32_t num_queues,
+ uint32_t *queue_ids)
+{
+ int i;
+
+ for (i = 0; i < num_queues; i++)
+ if (queue_id == (queue_ids[i] & ~KFD_DBG_QUEUE_INVALID_MASK))
+ return i;
+
+ return QUEUE_NOT_FOUND;
+}
+
+struct copy_context_work_handler_workarea {
+ struct work_struct copy_context_work;
+ struct kfd_process *p;
+};
+
+static void copy_context_work_handler(struct work_struct *work)
+{
+ struct copy_context_work_handler_workarea *workarea;
+ struct mqd_manager *mqd_mgr;
+ struct queue *q;
+ struct mm_struct *mm;
+ struct kfd_process *p;
+ uint32_t tmp_ctl_stack_used_size, tmp_save_area_used_size;
+ int i;
+
+ workarea = container_of(work,
+ struct copy_context_work_handler_workarea,
+ copy_context_work);
+
+ p = workarea->p;
+ mm = get_task_mm(p->lead_thread);
+
+ if (!mm)
+ return;
+
+ kthread_use_mm(mm);
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+ struct device_queue_manager *dqm = pdd->dev->dqm;
+ struct qcm_process_device *qpd = &pdd->qpd;
+
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE)
+ continue;
+
+ mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
+
+ /* We ignore the return value from get_wave_state
+ * because
+ * i) right now, it always returns 0, and
+ * ii) if we hit an error, we would continue to the
+ * next queue anyway.
+ */
+ mqd_mgr->get_wave_state(mqd_mgr,
+ q->mqd,
+ &q->properties,
+ (void __user *) q->properties.ctx_save_restore_area_address,
+ &tmp_ctl_stack_used_size,
+ &tmp_save_area_used_size);
+ }
+ }
+ kthread_unuse_mm(mm);
+ mmput(mm);
+}
+
+static uint32_t *get_queue_ids(uint32_t num_queues, uint32_t *usr_queue_id_array)
+{
+ size_t array_size = num_queues * sizeof(uint32_t);
+
+ if (!usr_queue_id_array)
+ return NULL;
+
+ return memdup_user(usr_queue_id_array, array_size);
+}
+
+int resume_queues(struct kfd_process *p,
+ uint32_t num_queues,
+ uint32_t *usr_queue_id_array)
+{
+ uint32_t *queue_ids = NULL;
+ int total_resumed = 0;
+ int i;
+
+ if (usr_queue_id_array) {
+ queue_ids = get_queue_ids(num_queues, usr_queue_id_array);
+
+ if (IS_ERR(queue_ids))
+ return PTR_ERR(queue_ids);
+
+ /* mask all queues as invalid. unmask per successful request */
+ q_array_invalidate(num_queues, queue_ids);
+ }
+
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+ struct device_queue_manager *dqm = pdd->dev->dqm;
+ struct device *dev = dqm->dev->adev->dev;
+ struct qcm_process_device *qpd = &pdd->qpd;
+ struct queue *q;
+ int r, per_device_resumed = 0;
+
+ dqm_lock(dqm);
+
+ /* unmask queues that resume or already resumed as valid */
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ int q_idx = QUEUE_NOT_FOUND;
+
+ if (queue_ids)
+ q_idx = q_array_get_index(
+ q->properties.queue_id,
+ num_queues,
+ queue_ids);
+
+ if (!queue_ids || q_idx != QUEUE_NOT_FOUND) {
+ int err = resume_single_queue(dqm, &pdd->qpd, q);
+
+ if (queue_ids) {
+ if (!err) {
+ queue_ids[q_idx] &=
+ ~KFD_DBG_QUEUE_INVALID_MASK;
+ } else {
+ queue_ids[q_idx] |=
+ KFD_DBG_QUEUE_ERROR_MASK;
+ break;
+ }
+ }
+
+ if (dqm->dev->kfd->shared_resources.enable_mes) {
+ wake_up_all(&dqm->destroy_wait);
+ if (!err)
+ total_resumed++;
+ } else {
+ per_device_resumed++;
+ }
+ }
+ }
+
+ if (!per_device_resumed) {
+ dqm_unlock(dqm);
+ continue;
+ }
+
+ r = execute_queues_cpsch(dqm,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES,
+ 0,
+ USE_DEFAULT_GRACE_PERIOD);
+ if (r) {
+ dev_err(dev, "Failed to resume process queues\n");
+ if (queue_ids) {
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ int q_idx = q_array_get_index(
+ q->properties.queue_id,
+ num_queues,
+ queue_ids);
+
+ /* mask queue as error on resume fail */
+ if (q_idx != QUEUE_NOT_FOUND)
+ queue_ids[q_idx] |=
+ KFD_DBG_QUEUE_ERROR_MASK;
+ }
+ }
+ } else {
+ wake_up_all(&dqm->destroy_wait);
+ total_resumed += per_device_resumed;
+ }
+
+ dqm_unlock(dqm);
+ }
+
+ if (queue_ids) {
+ if (copy_to_user((void __user *)usr_queue_id_array, queue_ids,
+ num_queues * sizeof(uint32_t)))
+ pr_err("copy_to_user failed on queue resume\n");
+
+ kfree(queue_ids);
+ }
+
+ return total_resumed;
+}
+
+int suspend_queues(struct kfd_process *p,
+ uint32_t num_queues,
+ uint32_t grace_period,
+ uint64_t exception_clear_mask,
+ uint32_t *usr_queue_id_array)
+{
+ uint32_t *queue_ids = get_queue_ids(num_queues, usr_queue_id_array);
+ int total_suspended = 0;
+ int i;
+
+ if (IS_ERR(queue_ids))
+ return PTR_ERR(queue_ids);
+
+ /* mask all queues as invalid. umask on successful request */
+ q_array_invalidate(num_queues, queue_ids);
+
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+ struct device_queue_manager *dqm = pdd->dev->dqm;
+ struct device *dev = dqm->dev->adev->dev;
+ struct qcm_process_device *qpd = &pdd->qpd;
+ struct queue *q;
+ int r, per_device_suspended = 0;
+
+ mutex_lock(&p->event_mutex);
+ dqm_lock(dqm);
+
+ /* unmask queues that suspend or already suspended */
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ int q_idx = q_array_get_index(q->properties.queue_id,
+ num_queues,
+ queue_ids);
+
+ if (q_idx != QUEUE_NOT_FOUND) {
+ int err = suspend_single_queue(dqm, pdd, q);
+ bool is_mes = dqm->dev->kfd->shared_resources.enable_mes;
+
+ if (!err) {
+ queue_ids[q_idx] &= ~KFD_DBG_QUEUE_INVALID_MASK;
+ if (exception_clear_mask && is_mes)
+ q->properties.exception_status &=
+ ~exception_clear_mask;
+
+ if (is_mes)
+ total_suspended++;
+ else
+ per_device_suspended++;
+ } else if (err != -EBUSY) {
+ r = err;
+ queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK;
+ break;
+ }
+ }
+ }
+
+ if (!per_device_suspended) {
+ dqm_unlock(dqm);
+ mutex_unlock(&p->event_mutex);
+ if (total_suspended)
+ amdgpu_amdkfd_debug_mem_fence(dqm->dev->adev);
+ continue;
+ }
+
+ r = execute_queues_cpsch(dqm,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
+ grace_period);
+
+ if (r)
+ dev_err(dev, "Failed to suspend process queues.\n");
+ else
+ total_suspended += per_device_suspended;
+
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ int q_idx = q_array_get_index(q->properties.queue_id,
+ num_queues, queue_ids);
+
+ if (q_idx == QUEUE_NOT_FOUND)
+ continue;
+
+ /* mask queue as error on suspend fail */
+ if (r)
+ queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK;
+ else if (exception_clear_mask)
+ q->properties.exception_status &=
+ ~exception_clear_mask;
+ }
+
+ dqm_unlock(dqm);
+ mutex_unlock(&p->event_mutex);
+ amdgpu_device_flush_hdp(dqm->dev->adev, NULL);
+ }
+
+ if (total_suspended) {
+ struct copy_context_work_handler_workarea copy_context_worker;
+
+ INIT_WORK_ONSTACK(
+ &copy_context_worker.copy_context_work,
+ copy_context_work_handler);
+
+ copy_context_worker.p = p;
+
+ schedule_work(&copy_context_worker.copy_context_work);
+
+
+ flush_work(&copy_context_worker.copy_context_work);
+ destroy_work_on_stack(&copy_context_worker.copy_context_work);
+ }
+
+ if (copy_to_user((void __user *)usr_queue_id_array, queue_ids,
+ num_queues * sizeof(uint32_t)))
+ pr_err("copy_to_user failed on queue suspend\n");
+
+ kfree(queue_ids);
+
+ return total_suspended;
+}
+
+static uint32_t set_queue_type_for_user(struct queue_properties *q_props)
+{
+ switch (q_props->type) {
+ case KFD_QUEUE_TYPE_COMPUTE:
+ return q_props->format == KFD_QUEUE_FORMAT_PM4
+ ? KFD_IOC_QUEUE_TYPE_COMPUTE
+ : KFD_IOC_QUEUE_TYPE_COMPUTE_AQL;
+ case KFD_QUEUE_TYPE_SDMA:
+ return KFD_IOC_QUEUE_TYPE_SDMA;
+ case KFD_QUEUE_TYPE_SDMA_XGMI:
+ return KFD_IOC_QUEUE_TYPE_SDMA_XGMI;
+ default:
+ WARN_ONCE(true, "queue type not recognized!");
+ return 0xffffffff;
+ };
+}
+
+void set_queue_snapshot_entry(struct queue *q,
+ uint64_t exception_clear_mask,
+ struct kfd_queue_snapshot_entry *qss_entry)
+{
+ qss_entry->ring_base_address = q->properties.queue_address;
+ qss_entry->write_pointer_address = (uint64_t)q->properties.write_ptr;
+ qss_entry->read_pointer_address = (uint64_t)q->properties.read_ptr;
+ qss_entry->ctx_save_restore_address =
+ q->properties.ctx_save_restore_area_address;
+ qss_entry->ctx_save_restore_area_size =
+ q->properties.ctx_save_restore_area_size;
+ qss_entry->exception_status = q->properties.exception_status;
+ qss_entry->queue_id = q->properties.queue_id;
+ qss_entry->gpu_id = q->device->id;
+ qss_entry->ring_size = (uint32_t)q->properties.queue_size;
+ qss_entry->queue_type = set_queue_type_for_user(&q->properties);
+ q->properties.exception_status &= ~exception_clear_mask;
+}
+
+int debug_lock_and_unmap(struct device_queue_manager *dqm)
+{
+ struct device *dev = dqm->dev->adev->dev;
+ int r;
+
+ if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
+ dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy);
+ return -EINVAL;
+ }
+
+ if (!kfd_dbg_is_per_vmid_supported(dqm->dev))
+ return 0;
+
+ dqm_lock(dqm);
+
+ r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 0, false);
+ if (r)
+ dqm_unlock(dqm);
+
+ return r;
+}
+
+int debug_map_and_unlock(struct device_queue_manager *dqm)
+{
+ struct device *dev = dqm->dev->adev->dev;
+ int r;
+
+ if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
+ dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy);
+ return -EINVAL;
+ }
+
+ if (!kfd_dbg_is_per_vmid_supported(dqm->dev))
+ return 0;
+
+ r = map_queues_cpsch(dqm);
+
+ dqm_unlock(dqm);
+
+ return r;
+}
+
+int debug_refresh_runlist(struct device_queue_manager *dqm)
+{
+ int r = debug_lock_and_unmap(dqm);
+
+ if (r)
+ return r;
+
+ return debug_map_and_unlock(dqm);
+}
+
+bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ int doorbell_off, u32 *queue_format)
+{
+ struct queue *q;
+ bool r = false;
+
+ if (!queue_format)
+ return r;
+
+ dqm_lock(dqm);
+
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if (q->properties.doorbell_off == doorbell_off) {
+ *queue_format = q->properties.format;
+ r = true;
+ goto out;
+ }
+ }
+
+out:
+ dqm_unlock(dqm);
+ return r;
+}
#if defined(CONFIG_DEBUG_FS)
static void seq_reg_dump(struct seq_file *m,
@@ -2060,56 +3671,69 @@ static void seq_reg_dump(struct seq_file *m,
int dqm_debugfs_hqds(struct seq_file *m, void *data)
{
struct device_queue_manager *dqm = data;
+ uint32_t xcc_mask = dqm->dev->xcc_mask;
uint32_t (*dump)[2], n_regs;
int pipe, queue;
- int r = 0;
+ int r = 0, xcc_id;
+ uint32_t sdma_engine_start;
if (!dqm->sched_running) {
- seq_printf(m, " Device is stopped\n");
-
+ seq_puts(m, " Device is stopped\n");
return 0;
}
- r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd,
- KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE,
- &dump, &n_regs);
- if (!r) {
- seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n",
- KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
- KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
- KFD_CIK_HIQ_QUEUE);
- seq_reg_dump(m, dump, n_regs);
+ for_each_inst(xcc_id, xcc_mask) {
+ r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev,
+ KFD_CIK_HIQ_PIPE,
+ KFD_CIK_HIQ_QUEUE, &dump,
+ &n_regs, xcc_id);
+ if (!r) {
+ seq_printf(
+ m,
+ " Inst %d, HIQ on MEC %d Pipe %d Queue %d\n",
+ xcc_id,
+ KFD_CIK_HIQ_PIPE / get_pipes_per_mec(dqm) + 1,
+ KFD_CIK_HIQ_PIPE % get_pipes_per_mec(dqm),
+ KFD_CIK_HIQ_QUEUE);
+ seq_reg_dump(m, dump, n_regs);
- kfree(dump);
- }
+ kfree(dump);
+ }
- for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
- int pipe_offset = pipe * get_queues_per_pipe(dqm);
+ for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
+ int pipe_offset = pipe * get_queues_per_pipe(dqm);
- for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
- if (!test_bit(pipe_offset + queue,
- dqm->dev->shared_resources.cp_queue_bitmap))
- continue;
+ for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
+ if (!test_bit(pipe_offset + queue,
+ dqm->dev->kfd->shared_resources.cp_queue_bitmap))
+ continue;
- r = dqm->dev->kfd2kgd->hqd_dump(
- dqm->dev->kgd, pipe, queue, &dump, &n_regs);
- if (r)
- break;
+ r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev,
+ pipe, queue,
+ &dump, &n_regs,
+ xcc_id);
+ if (r)
+ break;
- seq_printf(m, " CP Pipe %d, Queue %d\n",
- pipe, queue);
- seq_reg_dump(m, dump, n_regs);
+ seq_printf(m,
+ " Inst %d, CP Pipe %d, Queue %d\n",
+ xcc_id, pipe, queue);
+ seq_reg_dump(m, dump, n_regs);
- kfree(dump);
+ kfree(dump);
+ }
}
}
- for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) {
+ sdma_engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm);
+ for (pipe = sdma_engine_start;
+ pipe < (sdma_engine_start + get_num_all_sdma_engines(dqm));
+ pipe++) {
for (queue = 0;
- queue < dqm->dev->device_info->num_sdma_queues_per_engine;
+ queue < dqm->dev->kfd->device_info.num_sdma_queues_per_engine;
queue++) {
r = dqm->dev->kfd2kgd->hqd_sdma_dump(
- dqm->dev->kgd, pipe, queue, &dump, &n_regs);
+ dqm->dev->adev, pipe, queue, &dump, &n_regs);
if (r)
break;
@@ -2135,7 +3759,8 @@ int dqm_debugfs_hang_hws(struct device_queue_manager *dqm)
return r;
}
dqm->active_runlist = true;
- r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
+ r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES,
+ 0, USE_DEFAULT_GRACE_PERIOD);
dqm_unlock(dqm);
return r;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 499fc0ea387f..74a61b5b2f0b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -34,11 +35,49 @@
#define VMID_NUM 16
+#define KFD_MES_PROCESS_QUANTUM 100000
+#define KFD_MES_GANG_QUANTUM 10000
+
struct device_process_node {
struct qcm_process_device *qpd;
struct list_head list;
};
+union SQ_CMD_BITS {
+ struct {
+ uint32_t cmd:3;
+ uint32_t:1;
+ uint32_t mode:3;
+ uint32_t check_vmid:1;
+ uint32_t trap_id:3;
+ uint32_t:5;
+ uint32_t wave_id:4;
+ uint32_t simd_id:2;
+ uint32_t:2;
+ uint32_t queue_id:3;
+ uint32_t:1;
+ uint32_t vm_id:4;
+ } bitfields, bits;
+ uint32_t u32All;
+ signed int i32All;
+ float f32All;
+};
+
+union GRBM_GFX_INDEX_BITS {
+ struct {
+ uint32_t instance_index:8;
+ uint32_t sh_index:8;
+ uint32_t se_index:8;
+ uint32_t:5;
+ uint32_t sh_broadcast_writes:1;
+ uint32_t instance_broadcast_writes:1;
+ uint32_t se_broadcast_writes:1;
+ } bitfields, bits;
+ uint32_t u32All;
+ signed int i32All;
+ float f32All;
+};
+
/**
* struct device_queue_manager_ops
*
@@ -56,7 +95,7 @@ struct device_process_node {
*
* @initialize: Initializes the pipelines and memory module for that device.
*
- * @start: Initializes the resources/modules the the device needs for queues
+ * @start: Initializes the resources/modules the device needs for queues
* execution. This function is called on device initialization and after the
* system woke up after suspension.
*
@@ -66,6 +105,12 @@ struct device_process_node {
* @uninitialize: Destroys all the device queue manager resources allocated in
* initialize routine.
*
+ * @halt: This routine unmaps queues from runlist and set halt status to true
+ * so no more queues will be mapped to runlist until unhalt.
+ *
+ * @unhalt: This routine unset halt status to flase and maps queues back to
+ * runlist.
+ *
* @create_kernel_queue: Creates kernel queue. Used for debug queue.
*
* @destroy_kernel_queue: Destroys kernel queue. Used for debug queue.
@@ -77,16 +122,24 @@ struct device_process_node {
*
* @evict_process_queues: Evict all active queues of a process
*
- * @restore_process_queues: Restore all evicted queues queues of a process
+ * @restore_process_queues: Restore all evicted queues of a process
*
* @get_wave_state: Retrieves context save state and optionally copies the
* control stack, if kept in the MQD, to the given userspace address.
+ *
+ * @reset_queues: reset queues which consume RAS poison
+ * @get_queue_checkpoint_info: Retrieves queue size information for CRIU checkpoint.
+ *
+ * @checkpoint_mqd: checkpoint queue MQD contents for CRIU.
*/
struct device_queue_manager_ops {
int (*create_queue)(struct device_queue_manager *dqm,
struct queue *q,
- struct qcm_process_device *qpd);
+ struct qcm_process_device *qpd,
+ const struct kfd_criu_queue_priv_data *qd,
+ const void *restore_mqd,
+ const void *restore_ctl_stack);
int (*destroy_queue)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
@@ -104,8 +157,9 @@ struct device_queue_manager_ops {
int (*initialize)(struct device_queue_manager *dqm);
int (*start)(struct device_queue_manager *dqm);
int (*stop)(struct device_queue_manager *dqm);
- void (*pre_reset)(struct device_queue_manager *dqm);
void (*uninitialize)(struct device_queue_manager *dqm);
+ int (*halt)(struct device_queue_manager *dqm);
+ int (*unhalt)(struct device_queue_manager *dqm);
int (*create_kernel_queue)(struct device_queue_manager *dqm,
struct kernel_queue *kq,
struct qcm_process_device *qpd);
@@ -119,7 +173,8 @@ struct device_queue_manager_ops {
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size);
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
int (*process_termination)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
@@ -134,6 +189,17 @@ struct device_queue_manager_ops {
void __user *ctl_stack,
u32 *ctl_stack_used_size,
u32 *save_area_used_size);
+
+ int (*reset_queues)(struct device_queue_manager *dqm,
+ uint16_t pasid);
+ void (*get_queue_checkpoint_info)(struct device_queue_manager *dqm,
+ const struct queue *q, u32 *mqd_size,
+ u32 *ctl_stack_size);
+
+ int (*checkpoint_mqd)(struct device_queue_manager *dqm,
+ const struct queue *q,
+ void *mqd,
+ void *ctl_stack);
};
struct device_queue_manager_asic_ops {
@@ -144,12 +210,20 @@ struct device_queue_manager_asic_ops {
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size);
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
void (*init_sdma_vm)(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd);
struct mqd_manager * (*mqd_manager_init)(enum KFD_MQD_TYPE type,
- struct kfd_dev *dev);
+ struct kfd_node *dev);
+};
+
+struct dqm_detect_hang_info {
+ int pipe_id;
+ int queue_id;
+ int xcc_id;
+ uint64_t queue_address;
};
/**
@@ -170,7 +244,7 @@ struct device_queue_manager {
struct mqd_manager *mqd_mgrs[KFD_MQD_TYPE_MAX];
struct packet_manager packet_mgr;
- struct kfd_dev *dev;
+ struct kfd_node *dev;
struct mutex lock_hidden; /* use dqm_lock/unlock(dqm) */
struct list_head queues;
unsigned int saved_flags;
@@ -181,8 +255,8 @@ struct device_queue_manager {
unsigned int total_queue_count;
unsigned int next_pipe_to_allocate;
unsigned int *allocated_queues;
- uint64_t sdma_bitmap;
- uint64_t xgmi_sdma_bitmap;
+ DECLARE_BITMAP(sdma_bitmap, KFD_MAX_SDMA_QUEUES);
+ DECLARE_BITMAP(xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES);
/* the pasid mapping for each kfd vmid */
uint16_t vmid_pasid[VMID_NUM];
uint64_t pipelines_addr;
@@ -191,26 +265,39 @@ struct device_queue_manager {
struct kfd_mem_obj *fence_mem;
bool active_runlist;
int sched_policy;
+ uint32_t trap_debug_vmid;
/* hw exception */
bool is_hws_hang;
bool is_resetting;
- struct work_struct hw_exception_work;
struct kfd_mem_obj hiq_sdma_mqd;
bool sched_running;
+ bool sched_halt;
+
+ /* used for GFX 9.4.3 only */
+ uint32_t current_logical_xcc_start;
+
+ uint32_t wait_times;
+
+ wait_queue_head_t destroy_wait;
+
+ /* for per-queue reset support */
+ struct dqm_detect_hang_info *detect_hang_info;
+ size_t detect_hang_info_size;
+ int detect_hang_count;
};
void device_queue_manager_init_cik(
struct device_queue_manager_asic_ops *asic_ops);
-void device_queue_manager_init_cik_hawaii(
- struct device_queue_manager_asic_ops *asic_ops);
void device_queue_manager_init_vi(
struct device_queue_manager_asic_ops *asic_ops);
-void device_queue_manager_init_vi_tonga(
- struct device_queue_manager_asic_ops *asic_ops);
void device_queue_manager_init_v9(
struct device_queue_manager_asic_ops *asic_ops);
-void device_queue_manager_init_v10_navi10(
+void device_queue_manager_init_v10(
+ struct device_queue_manager_asic_ops *asic_ops);
+void device_queue_manager_init_v11(
+ struct device_queue_manager_asic_ops *asic_ops);
+void device_queue_manager_init_v12(
struct device_queue_manager_asic_ops *asic_ops);
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
@@ -219,6 +306,27 @@ unsigned int get_queues_per_pipe(struct device_queue_manager *dqm);
unsigned int get_pipes_per_mec(struct device_queue_manager *dqm);
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm);
unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm);
+int reserve_debug_trap_vmid(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd);
+int release_debug_trap_vmid(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd);
+int suspend_queues(struct kfd_process *p,
+ uint32_t num_queues,
+ uint32_t grace_period,
+ uint64_t exception_clear_mask,
+ uint32_t *usr_queue_id_array);
+int resume_queues(struct kfd_process *p,
+ uint32_t num_queues,
+ uint32_t *usr_queue_id_array);
+void set_queue_snapshot_entry(struct queue *q,
+ uint64_t exception_clear_mask,
+ struct kfd_queue_snapshot_entry *qss_entry);
+int debug_lock_and_unmap(struct device_queue_manager *dqm);
+int debug_map_and_unlock(struct device_queue_manager *dqm);
+int debug_refresh_runlist(struct device_queue_manager *dqm);
+bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ int doorbell_off, u32 *queue_format);
static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
{
@@ -248,9 +356,17 @@ static inline void dqm_unlock(struct device_queue_manager *dqm)
static inline int read_sdma_queue_counter(uint64_t __user *q_rptr, uint64_t *val)
{
- /*
- * SDMA activity counter is stored at queue's RPTR + 0x8 location.
- */
+ /* SDMA activity counter is stored at queue's RPTR + 0x8 location. */
return get_user(*val, q_rptr + 1);
}
+
+static inline void update_dqm_wait_times(struct device_queue_manager *dqm)
+{
+ if (dqm->dev->kfd2kgd->get_iq_wait_times)
+ dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
+ &dqm->wait_times,
+ ffs(dqm->dev->xcc_mask) - 1);
+}
+
+
#endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index 0d26506798cf..0508ef5a41d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -26,24 +27,29 @@
#include "oss/oss_2_4_sh_mask.h"
#include "gca/gfx_7_2_sh_mask.h"
+/*
+ * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
+ * stay in user mode.
+ */
+#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
+/* APE1 limit is inclusive and 64K aligned. */
+#define APE1_LIMIT_ALIGNMENT 0xFFFF
+
static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size);
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
static int update_qpd_cik(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd);
-static int update_qpd_cik_hawaii(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd);
-static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
- struct qcm_process_device *qpd);
-static void init_sdma_vm_hawaii(struct device_queue_manager *dqm,
- struct queue *q,
- struct qcm_process_device *qpd);
+ struct qcm_process_device *qpd);
+static void init_sdma_vm(struct device_queue_manager *dqm,
+ struct queue *q,
+ struct qcm_process_device *qpd);
void device_queue_manager_init_cik(
- struct device_queue_manager_asic_ops *asic_ops)
+ struct device_queue_manager_asic_ops *asic_ops)
{
asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
asic_ops->update_qpd = update_qpd_cik;
@@ -51,15 +57,6 @@ void device_queue_manager_init_cik(
asic_ops->mqd_manager_init = mqd_manager_init_cik;
}
-void device_queue_manager_init_cik_hawaii(
- struct device_queue_manager_asic_ops *asic_ops)
-{
- asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
- asic_ops->update_qpd = update_qpd_cik_hawaii;
- asic_ops->init_sdma_vm = init_sdma_vm_hawaii;
- asic_ops->mqd_manager_init = mqd_manager_init_cik_hawaii;
-}
-
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
{
/* In 64-bit mode, we can only control the top 3 bits of the LDS,
@@ -92,10 +89,41 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size)
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
{
uint32_t default_mtype;
uint32_t ape1_mtype;
+ unsigned int temp;
+ bool retval = true;
+
+ if (alternate_aperture_size == 0) {
+ /* base > limit disables APE1 */
+ qpd->sh_mem_ape1_base = 1;
+ qpd->sh_mem_ape1_limit = 0;
+ } else {
+ /*
+ * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
+ * SH_MEM_APE1_BASE[31:0], 0x0000 }
+ * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
+ * SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
+ * Verify that the base and size parameters can be
+ * represented in this format and convert them.
+ * Additionally restrict APE1 to user-mode addresses.
+ */
+
+ uint64_t base = (uintptr_t)alternate_aperture_base;
+ uint64_t limit = base + alternate_aperture_size - 1;
+
+ if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
+ (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
+ retval = false;
+ goto out;
+ }
+
+ qpd->sh_mem_ape1_base = base >> 16;
+ qpd->sh_mem_ape1_limit = limit >> 16;
+ }
default_mtype = (default_policy == cache_policy_coherent) ?
MTYPE_NONCACHED :
@@ -109,93 +137,28 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
| ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
| DEFAULT_MTYPE(default_mtype)
| APE1_MTYPE(ape1_mtype);
-
- return true;
-}
-
-static int update_qpd_cik(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
-{
- struct kfd_process_device *pdd;
- unsigned int temp;
-
- pdd = qpd_to_pdd(qpd);
-
- /* check if sh_mem_config register already configured */
- if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
- DEFAULT_MTYPE(MTYPE_NONCACHED) |
- APE1_MTYPE(MTYPE_NONCACHED);
- qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 0;
- }
-
- if (qpd->pqm->process->is_32bit_user_mode) {
- temp = get_sh_mem_bases_32(pdd);
- qpd->sh_mem_bases = SHARED_BASE(temp);
- qpd->sh_mem_config |= PTR32;
- } else {
- temp = get_sh_mem_bases_nybble_64(pdd);
- qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
- qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__PRIVATE_ATC__SHIFT;
- }
-
- pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
- qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
-
- return 0;
-}
-
-static int update_qpd_cik_hawaii(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
-{
- struct kfd_process_device *pdd;
- unsigned int temp;
-
- pdd = qpd_to_pdd(qpd);
-
- /* check if sh_mem_config register already configured */
- if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
- DEFAULT_MTYPE(MTYPE_NONCACHED) |
- APE1_MTYPE(MTYPE_NONCACHED);
- qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 0;
- }
-
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
* aperture addresses.
*/
- temp = get_sh_mem_bases_nybble_64(pdd);
+ temp = get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd));
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
- return 0;
+out:
+ return retval;
}
-static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
- struct qcm_process_device *qpd)
+static int update_qpd_cik(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
{
- uint32_t value = (1 << SDMA0_RLC0_VIRTUAL_ADDR__ATC__SHIFT);
-
- if (q->process->is_32bit_user_mode)
- value |= (1 << SDMA0_RLC0_VIRTUAL_ADDR__PTR32__SHIFT) |
- get_sh_mem_bases_32(qpd_to_pdd(qpd));
- else
- value |= ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
- SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &
- SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
-
- q->properties.sdma_vm_addr = value;
+ return 0;
}
-static void init_sdma_vm_hawaii(struct device_queue_manager *dqm,
- struct queue *q,
- struct qcm_process_device *qpd)
+static void init_sdma_vm(struct device_queue_manager *dqm,
+ struct queue *q,
+ struct qcm_process_device *qpd)
{
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
* aperture addresses.
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
index ad0593342333..ba6e3d747ccd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2018 Advanced Micro Devices, Inc.
+ * Copyright 2018-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -30,10 +31,18 @@ static int update_qpd_v10(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm_v10(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v10(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
-void device_queue_manager_init_v10_navi10(
+void device_queue_manager_init_v10(
struct device_queue_manager_asic_ops *asic_ops)
{
+ asic_ops->set_cache_memory_policy = set_cache_memory_policy_v10;
asic_ops->update_qpd = update_qpd_v10;
asic_ops->init_sdma_vm = init_sdma_vm_v10;
asic_ops->mqd_manager_init = mqd_manager_init_v10;
@@ -48,36 +57,28 @@ static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
private_base;
}
-static int update_qpd_v10(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
+static bool set_cache_memory_policy_v10(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
{
- struct kfd_process_device *pdd;
-
- pdd = qpd_to_pdd(qpd);
-
- /* check if sh_mem_config register already configured */
- if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
- SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
- (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
-#if 0
- /* TODO:
- * This shouldn't be an issue with Navi10. Verify.
- */
- if (vega10_noretry)
- qpd->sh_mem_config |=
- 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
-#endif
-
- qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 0;
- }
-
- qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
+ qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
+ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+ return true;
+}
+static int update_qpd_v10(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
new file mode 100644
index 000000000000..8b447d04558f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_device_queue_manager.h"
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "soc21_enum.h"
+
+static int update_qpd_v11(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd);
+static void init_sdma_vm_v11(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v11(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
+
+void device_queue_manager_init_v11(
+ struct device_queue_manager_asic_ops *asic_ops)
+{
+ asic_ops->set_cache_memory_policy = set_cache_memory_policy_v11;
+ asic_ops->update_qpd = update_qpd_v11;
+ asic_ops->init_sdma_vm = init_sdma_vm_v11;
+ asic_ops->mqd_manager_init = mqd_manager_init_v11;
+}
+
+static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
+{
+ uint32_t shared_base = pdd->lds_base >> 48;
+ uint32_t private_base = pdd->scratch_base >> 48;
+
+ return (shared_base << SH_MEM_BASES__SHARED_BASE__SHIFT) |
+ private_base;
+}
+
+static bool set_cache_memory_policy_v11(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
+{
+ qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
+ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
+
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
+
+ pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+ return true;
+}
+
+static int update_qpd_v11(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
+ return 0;
+}
+
+static void init_sdma_vm_v11(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd)
+{
+ /* Not needed on SDMAv4 onwards any more */
+ q->properties.sdma_vm_addr = 0;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c
new file mode 100644
index 000000000000..3550da3a46f9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_device_queue_manager.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "soc24_enum.h"
+
+static int update_qpd_v12(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd);
+static void init_sdma_vm_v12(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v12(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
+
+void device_queue_manager_init_v12(
+ struct device_queue_manager_asic_ops *asic_ops)
+{
+ asic_ops->set_cache_memory_policy = set_cache_memory_policy_v12;
+ asic_ops->update_qpd = update_qpd_v12;
+ asic_ops->init_sdma_vm = init_sdma_vm_v12;
+ asic_ops->mqd_manager_init = mqd_manager_init_v12;
+}
+
+static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
+{
+ uint32_t shared_base = pdd->lds_base >> 48;
+ uint32_t private_base = pdd->scratch_base >> 48;
+
+ return (shared_base << SH_MEM_BASES__SHARED_BASE__SHIFT) |
+ private_base;
+}
+
+static bool set_cache_memory_policy_v12(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
+{
+ qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
+ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
+
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
+
+ pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+ return true;
+}
+
+static int update_qpd_v12(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
+ return 0;
+}
+
+static void init_sdma_vm_v12(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd)
+{
+ /* Not needed on SDMAv4 onwards any more */
+ q->properties.sdma_vm_addr = 0;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
index b5c3d13643f1..9fcc8c6e57b7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ * Copyright 2016-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -23,18 +24,24 @@
#include "kfd_device_queue_manager.h"
#include "vega10_enum.h"
-#include "gc/gc_9_0_offset.h"
-#include "gc/gc_9_0_sh_mask.h"
-#include "sdma0/sdma0_4_0_sh_mask.h"
+#include "gc/gc_9_4_3_sh_mask.h"
static int update_qpd_v9(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v9(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
void device_queue_manager_init_v9(
struct device_queue_manager_asic_ops *asic_ops)
{
+ asic_ops->set_cache_memory_policy = set_cache_memory_policy_v9;
asic_ops->update_qpd = update_qpd_v9;
asic_ops->init_sdma_vm = init_sdma_vm_v9;
asic_ops->mqd_manager_init = mqd_manager_init_v9;
@@ -49,39 +56,73 @@ static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
private_base;
}
+static bool set_cache_memory_policy_v9(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
+{
+ qpd->sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
+
+ if (dqm->dev->kfd->noretry)
+ qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
+
+ if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4))
+ qpd->sh_mem_config |= (1 << SH_MEM_CONFIG__F8_MODE__SHIFT);
+
+ if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 5, 0)) {
+ if (misc_process_properties & KFD_PROC_FLAG_MFMA_HIGH_PRECISION)
+ qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__PRECISION_MODE__SHIFT;
+ }
+
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
+
+ pr_debug("sh_mem_bases 0x%X sh_mem_config 0x%X\n", qpd->sh_mem_bases,
+ qpd->sh_mem_config);
+ return true;
+}
+
static int update_qpd_v9(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
- struct kfd_process_device *pdd;
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
pdd = qpd_to_pdd(qpd);
/* check if sh_mem_config register already configured */
if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ qpd->sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
- if (dqm->dev->device_info->asic_family == CHIP_ALDEBARAN) {
- /* Aldebaran can safely support different XNACK modes
- * per process
- */
- if (!pdd->process->xnack_enabled)
- qpd->sh_mem_config |=
- 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
- } else if (dqm->dev->noretry &&
- !dqm->dev->use_iommu_v2) {
+ if (dqm->dev->kfd->noretry)
+ qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
+
+ if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4))
qpd->sh_mem_config |=
- 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
- }
+ (1 << SH_MEM_CONFIG__F8_MODE__SHIFT);
qpd->sh_mem_ape1_limit = 0;
qpd->sh_mem_ape1_base = 0;
}
+ if (KFD_SUPPORT_XNACK_PER_PROCESS(dqm->dev)) {
+ if (!pdd->process->xnack_enabled)
+ qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
+ else
+ qpd->sh_mem_config &= ~(1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT);
+ }
+
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
- pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+ pr_debug("sh_mem_bases 0x%X sh_mem_config 0x%X\n", qpd->sh_mem_bases,
+ qpd->sh_mem_config);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
index 3a7cb2f88366..dad83356e976 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -26,30 +27,29 @@
#include "gca/gfx_8_0_sh_mask.h"
#include "oss/oss_3_0_sh_mask.h"
+/*
+ * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
+ * stay in user mode.
+ */
+#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
+/* APE1 limit is inclusive and 64K aligned. */
+#define APE1_LIMIT_ALIGNMENT 0xFFFF
+
static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd,
- enum cache_policy default_policy,
- enum cache_policy alternate_policy,
- void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size);
-static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd,
- enum cache_policy default_policy,
- enum cache_policy alternate_policy,
- void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size);
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
static int update_qpd_vi(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd);
-static int update_qpd_vi_tonga(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd);
-static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
- struct qcm_process_device *qpd);
-static void init_sdma_vm_tonga(struct device_queue_manager *dqm,
- struct queue *q,
- struct qcm_process_device *qpd);
+ struct qcm_process_device *qpd);
+static void init_sdma_vm(struct device_queue_manager *dqm,
+ struct queue *q,
+ struct qcm_process_device *qpd);
void device_queue_manager_init_vi(
- struct device_queue_manager_asic_ops *asic_ops)
+ struct device_queue_manager_asic_ops *asic_ops)
{
asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi;
asic_ops->update_qpd = update_qpd_vi;
@@ -57,15 +57,6 @@ void device_queue_manager_init_vi(
asic_ops->mqd_manager_init = mqd_manager_init_vi;
}
-void device_queue_manager_init_vi_tonga(
- struct device_queue_manager_asic_ops *asic_ops)
-{
- asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga;
- asic_ops->update_qpd = update_qpd_vi_tonga;
- asic_ops->init_sdma_vm = init_sdma_vm_tonga;
- asic_ops->mqd_manager_init = mqd_manager_init_vi_tonga;
-}
-
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
{
/* In 64-bit mode, we can only control the top 3 bits of the LDS,
@@ -95,43 +86,45 @@ static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
}
static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd,
- enum cache_policy default_policy,
- enum cache_policy alternate_policy,
- void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size)
-{
- uint32_t default_mtype;
- uint32_t ape1_mtype;
-
- default_mtype = (default_policy == cache_policy_coherent) ?
- MTYPE_CC :
- MTYPE_NC;
-
- ape1_mtype = (alternate_policy == cache_policy_coherent) ?
- MTYPE_CC :
- MTYPE_NC;
-
- qpd->sh_mem_config = (qpd->sh_mem_config &
- SH_MEM_CONFIG__ADDRESS_MODE_MASK) |
- SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
- SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
- default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
- ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT |
- SH_MEM_CONFIG__PRIVATE_ATC_MASK;
-
- return true;
-}
-
-static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size)
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
{
uint32_t default_mtype;
uint32_t ape1_mtype;
+ unsigned int temp;
+ bool retval = true;
+
+ if (alternate_aperture_size == 0) {
+ /* base > limit disables APE1 */
+ qpd->sh_mem_ape1_base = 1;
+ qpd->sh_mem_ape1_limit = 0;
+ } else {
+ /*
+ * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
+ * SH_MEM_APE1_BASE[31:0], 0x0000 }
+ * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
+ * SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
+ * Verify that the base and size parameters can be
+ * represented in this format and convert them.
+ * Additionally restrict APE1 to user-mode addresses.
+ */
+
+ uint64_t base = (uintptr_t)alternate_aperture_base;
+ uint64_t limit = base + alternate_aperture_size - 1;
+
+ if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
+ (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
+ retval = false;
+ goto out;
+ }
+
+ qpd->sh_mem_ape1_base = base >> 16;
+ qpd->sh_mem_ape1_limit = limit >> 16;
+ }
default_mtype = (default_policy == cache_policy_coherent) ?
MTYPE_UC :
@@ -147,103 +140,27 @@ static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm,
default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT;
- return true;
-}
-
-static int update_qpd_vi(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
-{
- struct kfd_process_device *pdd;
- unsigned int temp;
-
- pdd = qpd_to_pdd(qpd);
-
- /* check if sh_mem_config register already configured */
- if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
- SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
- MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
- MTYPE_CC << SH_MEM_CONFIG__APE1_MTYPE__SHIFT |
- SH_MEM_CONFIG__PRIVATE_ATC_MASK;
-
- qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 0;
- }
-
- if (qpd->pqm->process->is_32bit_user_mode) {
- temp = get_sh_mem_bases_32(pdd);
- qpd->sh_mem_bases = temp << SH_MEM_BASES__SHARED_BASE__SHIFT;
- qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA32 <<
- SH_MEM_CONFIG__ADDRESS_MODE__SHIFT;
- } else {
- temp = get_sh_mem_bases_nybble_64(pdd);
- qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
- qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA64 <<
- SH_MEM_CONFIG__ADDRESS_MODE__SHIFT;
- qpd->sh_mem_config |= 1 <<
- SH_MEM_CONFIG__PRIVATE_ATC__SHIFT;
- }
-
- pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
- qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
-
- return 0;
-}
-
-static int update_qpd_vi_tonga(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
-{
- struct kfd_process_device *pdd;
- unsigned int temp;
-
- pdd = qpd_to_pdd(qpd);
-
- /* check if sh_mem_config register already configured */
- if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
- SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
- MTYPE_UC <<
- SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
- MTYPE_UC <<
- SH_MEM_CONFIG__APE1_MTYPE__SHIFT;
-
- qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 0;
- }
-
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
* aperture addresses.
*/
- temp = get_sh_mem_bases_nybble_64(pdd);
+ temp = get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd));
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
pr_debug("sh_mem_bases nybble: 0x%X and register 0x%X\n",
temp, qpd->sh_mem_bases);
-
- return 0;
+out:
+ return retval;
}
-static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
- struct qcm_process_device *qpd)
+static int update_qpd_vi(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
{
- uint32_t value = (1 << SDMA0_RLC0_VIRTUAL_ADDR__ATC__SHIFT);
-
- if (q->process->is_32bit_user_mode)
- value |= (1 << SDMA0_RLC0_VIRTUAL_ADDR__PTR32__SHIFT) |
- get_sh_mem_bases_32(qpd_to_pdd(qpd));
- else
- value |= ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
- SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &
- SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
-
- q->properties.sdma_vm_addr = value;
+ return 0;
}
-static void init_sdma_vm_tonga(struct device_queue_manager *dqm,
- struct queue *q,
- struct qcm_process_device *qpd)
+static void init_sdma_vm(struct device_queue_manager *dqm,
+ struct queue *q,
+ struct qcm_process_device *qpd)
{
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
* aperture addresses.
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index 768d153acff4..05c74887fd6f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -48,82 +49,61 @@
/* # of doorbell bytes allocated for each process. */
size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
{
- return roundup(kfd->device_info->doorbell_size *
- KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
- PAGE_SIZE);
+ if (!kfd->shared_resources.enable_mes)
+ return roundup(kfd->device_info.doorbell_size *
+ KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
+ PAGE_SIZE);
+ else
+ return amdgpu_mes_doorbell_process_slice(
+ (struct amdgpu_device *)kfd->adev);
}
/* Doorbell calculations for device init. */
int kfd_doorbell_init(struct kfd_dev *kfd)
{
- size_t doorbell_start_offset;
- size_t doorbell_aperture_size;
- size_t doorbell_process_limit;
+ int size = PAGE_SIZE;
+ int r;
/*
- * We start with calculations in bytes because the input data might
- * only be byte-aligned.
- * Only after we have done the rounding can we assume any alignment.
+ * Todo: KFD kernel level operations need only one doorbell for
+ * ring test/HWS. So instead of reserving a whole page here for
+ * kernel, reserve and consume a doorbell from existing KGD kernel
+ * doorbell page.
*/
- doorbell_start_offset =
- roundup(kfd->shared_resources.doorbell_start_offset,
- kfd_doorbell_process_slice(kfd));
-
- doorbell_aperture_size =
- rounddown(kfd->shared_resources.doorbell_aperture_size,
- kfd_doorbell_process_slice(kfd));
-
- if (doorbell_aperture_size > doorbell_start_offset)
- doorbell_process_limit =
- (doorbell_aperture_size - doorbell_start_offset) /
- kfd_doorbell_process_slice(kfd);
- else
- return -ENOSPC;
-
- if (!kfd->max_doorbell_slices ||
- doorbell_process_limit < kfd->max_doorbell_slices)
- kfd->max_doorbell_slices = doorbell_process_limit;
-
- kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
- doorbell_start_offset;
-
- kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32);
-
- kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
- kfd_doorbell_process_slice(kfd));
-
- if (!kfd->doorbell_kernel_ptr)
+ /* Bitmap to dynamically allocate doorbells from kernel page */
+ kfd->doorbell_bitmap = bitmap_zalloc(size / sizeof(u32), GFP_KERNEL);
+ if (!kfd->doorbell_bitmap) {
+ DRM_ERROR("Failed to allocate kernel doorbell bitmap\n");
return -ENOMEM;
+ }
- pr_debug("Doorbell initialization:\n");
- pr_debug("doorbell base == 0x%08lX\n",
- (uintptr_t)kfd->doorbell_base);
-
- pr_debug("doorbell_base_dw_offset == 0x%08lX\n",
- kfd->doorbell_base_dw_offset);
-
- pr_debug("doorbell_process_limit == 0x%08lX\n",
- doorbell_process_limit);
-
- pr_debug("doorbell_kernel_offset == 0x%08lX\n",
- (uintptr_t)kfd->doorbell_base);
-
- pr_debug("doorbell aperture size == 0x%08lX\n",
- kfd->shared_resources.doorbell_aperture_size);
-
- pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
+ /* Alloc a doorbell page for KFD kernel usages */
+ r = amdgpu_bo_create_kernel(kfd->adev,
+ size,
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_DOORBELL,
+ &kfd->doorbells,
+ NULL,
+ (void **)&kfd->doorbell_kernel_ptr);
+ if (r) {
+ pr_err("failed to allocate kernel doorbells\n");
+ bitmap_free(kfd->doorbell_bitmap);
+ return r;
+ }
+ pr_debug("Doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
return 0;
}
void kfd_doorbell_fini(struct kfd_dev *kfd)
{
- if (kfd->doorbell_kernel_ptr)
- iounmap(kfd->doorbell_kernel_ptr);
+ bitmap_free(kfd->doorbell_bitmap);
+ amdgpu_bo_free_kernel(&kfd->doorbells, NULL,
+ (void **)&kfd->doorbell_kernel_ptr);
}
-int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
+int kfd_doorbell_mmap(struct kfd_node *dev, struct kfd_process *process,
struct vm_area_struct *vma)
{
phys_addr_t address;
@@ -133,7 +113,7 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
* For simplicitly we only allow mapping of the entire doorbell
* allocation of a single device & process.
*/
- if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev))
+ if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev->kfd))
return -EINVAL;
pdd = kfd_get_process_device_data(dev, process);
@@ -142,8 +122,10 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
/* Calculate physical address of doorbell */
address = kfd_get_process_doorbells(pdd);
- vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
- VM_DONTDUMP | VM_PFNMAP;
+ if (!address)
+ return -ENOMEM;
+ vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
+ VM_DONTDUMP | VM_PFNMAP);
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
@@ -153,13 +135,13 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
" vm_flags == 0x%04lX\n"
" size == 0x%04lX\n",
(unsigned long long) vma->vm_start, address, vma->vm_flags,
- kfd_doorbell_process_slice(dev));
+ kfd_doorbell_process_slice(dev->kfd));
return io_remap_pfn_range(vma,
vma->vm_start,
address >> PAGE_SHIFT,
- kfd_doorbell_process_slice(dev),
+ kfd_doorbell_process_slice(dev->kfd),
vma->vm_page_prot);
}
@@ -171,22 +153,19 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
u32 inx;
mutex_lock(&kfd->doorbell_mutex);
- inx = find_first_zero_bit(kfd->doorbell_available_index,
- KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
+ inx = find_first_zero_bit(kfd->doorbell_bitmap, PAGE_SIZE / sizeof(u32));
- __set_bit(inx, kfd->doorbell_available_index);
+ __set_bit(inx, kfd->doorbell_bitmap);
mutex_unlock(&kfd->doorbell_mutex);
if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
return NULL;
- inx *= kfd->device_info->doorbell_size / sizeof(u32);
-
- /*
- * Calculating the kernel doorbell offset using the first
- * doorbell page.
- */
- *doorbell_off = kfd->doorbell_base_dw_offset + inx;
+ *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev,
+ kfd->doorbells,
+ inx,
+ kfd->device_info.doorbell_size);
+ inx *= 2;
pr_debug("Get kernel queue doorbell\n"
" doorbell offset == 0x%08X\n"
@@ -200,11 +179,11 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
{
unsigned int inx;
- inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr)
- * sizeof(u32) / kfd->device_info->doorbell_size;
+ inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
+ inx /= 2;
mutex_lock(&kfd->doorbell_mutex);
- __clear_bit(inx, kfd->doorbell_available_index);
+ __clear_bit(inx, kfd->doorbell_bitmap);
mutex_unlock(&kfd->doorbell_mutex);
}
@@ -226,50 +205,99 @@ void write_kernel_doorbell64(void __iomem *db, u64 value)
}
}
-unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
- struct kfd_process_device *pdd,
- unsigned int doorbell_id)
+static int init_doorbell_bitmap(struct qcm_process_device *qpd,
+ struct kfd_dev *dev)
{
- /*
- * doorbell_base_dw_offset accounts for doorbells taken by KGD.
- * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to
- * the process's doorbells. The offset returned is in dword
- * units regardless of the ASIC-dependent doorbell size.
- */
- return kfd->doorbell_base_dw_offset +
- pdd->doorbell_index
- * kfd_doorbell_process_slice(kfd) / sizeof(u32) +
- doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
+ unsigned int i;
+ int range_start = dev->shared_resources.non_cp_doorbells_start;
+ int range_end = dev->shared_resources.non_cp_doorbells_end;
+
+ if (!KFD_IS_SOC15(dev))
+ return 0;
+
+ /* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
+ pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
+ pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
+ range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
+ range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
+
+ for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
+ if (i >= range_start && i <= range_end) {
+ __set_bit(i, qpd->doorbell_bitmap);
+ __set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
+ qpd->doorbell_bitmap);
+ }
+ }
+
+ return 0;
}
-uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
+phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
{
- uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size -
- kfd->shared_resources.doorbell_start_offset) /
- kfd_doorbell_process_slice(kfd) + 1;
+ struct amdgpu_device *adev = pdd->dev->adev;
+ uint32_t first_db_index;
- return num_of_elems;
+ if (!pdd->qpd.proc_doorbells) {
+ if (kfd_alloc_process_doorbells(pdd->dev->kfd, pdd))
+ /* phys_addr_t 0 is error */
+ return 0;
+ }
+ first_db_index = amdgpu_doorbell_index_on_bar(adev,
+ pdd->qpd.proc_doorbells,
+ 0,
+ pdd->dev->kfd->device_info.doorbell_size);
+ return adev->doorbell.base + first_db_index * sizeof(uint32_t);
}
-phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
+int kfd_alloc_process_doorbells(struct kfd_dev *kfd, struct kfd_process_device *pdd)
{
- return pdd->dev->doorbell_base +
- pdd->doorbell_index * kfd_doorbell_process_slice(pdd->dev);
-}
+ int r;
+ struct qcm_process_device *qpd = &pdd->qpd;
+
+ /* Allocate bitmap for dynamic doorbell allocation */
+ qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
+ GFP_KERNEL);
+ if (!qpd->doorbell_bitmap) {
+ DRM_ERROR("Failed to allocate process doorbell bitmap\n");
+ return -ENOMEM;
+ }
-int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index)
-{
- int r = ida_simple_get(&kfd->doorbell_ida, 1, kfd->max_doorbell_slices,
- GFP_KERNEL);
- if (r > 0)
- *doorbell_index = r;
+ r = init_doorbell_bitmap(&pdd->qpd, kfd);
+ if (r) {
+ DRM_ERROR("Failed to initialize process doorbells\n");
+ r = -ENOMEM;
+ goto err;
+ }
+
+ /* Allocate doorbells for this process */
+ r = amdgpu_bo_create_kernel(kfd->adev,
+ kfd_doorbell_process_slice(kfd),
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_DOORBELL,
+ &qpd->proc_doorbells,
+ NULL,
+ NULL);
+ if (r) {
+ DRM_ERROR("Failed to allocate process doorbells\n");
+ goto err;
+ }
+ return 0;
+err:
+ bitmap_free(qpd->doorbell_bitmap);
+ qpd->doorbell_bitmap = NULL;
return r;
}
-void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int doorbell_index)
+void kfd_free_process_doorbells(struct kfd_dev *kfd, struct kfd_process_device *pdd)
{
- if (doorbell_index)
- ida_simple_remove(&kfd->doorbell_ida, doorbell_index);
+ struct qcm_process_device *qpd = &pdd->qpd;
+
+ if (qpd->doorbell_bitmap) {
+ bitmap_free(qpd->doorbell_bitmap);
+ qpd->doorbell_bitmap = NULL;
+ }
+
+ amdgpu_bo_free_kernel(&qpd->proc_doorbells, NULL, NULL);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 3eea4edee355..5a190dd6be4e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -30,7 +31,7 @@
#include <linux/memory.h>
#include "kfd_priv.h"
#include "kfd_events.h"
-#include "kfd_iommu.h"
+#include "kfd_device_queue_manager.h"
#include <linux/device.h>
/*
@@ -40,6 +41,7 @@ struct kfd_event_waiter {
wait_queue_entry_t wait;
struct kfd_event *event; /* Event to wait for */
bool activated; /* Becomes true when event is signaled */
+ bool event_age_enabled; /* set to true when last_event_age is non-zero */
};
/*
@@ -55,7 +57,6 @@ struct kfd_signal_page {
bool need_to_free_pages;
};
-
static uint64_t *page_slots(struct kfd_signal_page *page)
{
return page->kernel_address;
@@ -92,7 +93,8 @@ fail_alloc_signal_store:
}
static int allocate_event_notification_slot(struct kfd_process *p,
- struct kfd_event *ev)
+ struct kfd_event *ev,
+ const int *restore_id)
{
int id;
@@ -104,14 +106,19 @@ static int allocate_event_notification_slot(struct kfd_process *p,
p->signal_mapped_size = 256*8;
}
- /*
- * Compatibility with old user mode: Only use signal slots
- * user mode has mapped, may be less than
- * KFD_SIGNAL_EVENT_LIMIT. This also allows future increase
- * of the event limit without breaking user mode.
- */
- id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8,
- GFP_KERNEL);
+ if (restore_id) {
+ id = idr_alloc(&p->event_idr, ev, *restore_id, *restore_id + 1,
+ GFP_KERNEL);
+ } else {
+ /*
+ * Compatibility with old user mode: Only use signal slots
+ * user mode has mapped, may be less than
+ * KFD_SIGNAL_EVENT_LIMIT. This also allows future increase
+ * of the event limit without breaking user mode.
+ */
+ id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8,
+ GFP_KERNEL);
+ }
if (id < 0)
return id;
@@ -122,8 +129,8 @@ static int allocate_event_notification_slot(struct kfd_process *p,
}
/*
- * Assumes that p->event_mutex is held and of course that p is not going
- * away (current or locked).
+ * Assumes that p->event_mutex or rcu_readlock is held and of course that p is
+ * not going away.
*/
static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id)
{
@@ -178,9 +185,8 @@ static struct kfd_event *lookup_signaled_event_by_partial_id(
return ev;
}
-static int create_signal_event(struct file *devkfd,
- struct kfd_process *p,
- struct kfd_event *ev)
+static int create_signal_event(struct file *devkfd, struct kfd_process *p,
+ struct kfd_event *ev, const int *restore_id)
{
int ret;
@@ -193,7 +199,7 @@ static int create_signal_event(struct file *devkfd,
return -ENOSPC;
}
- ret = allocate_event_notification_slot(p, ev);
+ ret = allocate_event_notification_slot(p, ev, restore_id);
if (ret) {
pr_warn("Signal event wasn't created because out of kernel memory\n");
return ret;
@@ -209,16 +215,22 @@ static int create_signal_event(struct file *devkfd,
return 0;
}
-static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
+static int create_other_event(struct kfd_process *p, struct kfd_event *ev, const int *restore_id)
{
- /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
- * intentional integer overflow to -1 without a compiler
- * warning. idr_alloc treats a negative value as "maximum
- * signed integer".
- */
- int id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
- (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
- GFP_KERNEL);
+ int id;
+
+ if (restore_id)
+ id = idr_alloc(&p->event_idr, ev, *restore_id, *restore_id + 1,
+ GFP_KERNEL);
+ else
+ /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
+ * intentional integer overflow to -1 without a compiler
+ * warning. idr_alloc treats a negative value as "maximum
+ * signed integer".
+ */
+ id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
+ (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
+ GFP_KERNEL);
if (id < 0)
return id;
@@ -227,12 +239,24 @@ static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
return 0;
}
-void kfd_event_init_process(struct kfd_process *p)
+int kfd_event_init_process(struct kfd_process *p)
{
+ int id;
+
mutex_init(&p->event_mutex);
idr_init(&p->event_idr);
p->signal_page = NULL;
- p->signal_event_count = 0;
+ p->signal_event_count = 1;
+ /* Allocate event ID 0. It is used for a fast path to ignore bogus events
+ * that are sent by the CP without a context ID
+ */
+ id = idr_alloc(&p->event_idr, NULL, 0, 1, GFP_KERNEL);
+ if (id < 0) {
+ idr_destroy(&p->event_idr);
+ mutex_destroy(&p->event_mutex);
+ return id;
+ }
+ return 0;
}
static void destroy_event(struct kfd_process *p, struct kfd_event *ev)
@@ -240,16 +264,18 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev)
struct kfd_event_waiter *waiter;
/* Wake up pending waiters. They will return failure */
+ spin_lock(&ev->lock);
list_for_each_entry(waiter, &ev->wq.head, wait.entry)
- waiter->event = NULL;
+ WRITE_ONCE(waiter->event, NULL);
wake_up_all(&ev->wq);
+ spin_unlock(&ev->lock);
if (ev->type == KFD_EVENT_TYPE_SIGNAL ||
ev->type == KFD_EVENT_TYPE_DEBUG)
p->signal_event_count--;
idr_remove(&p->event_idr, ev->event_id);
- kfree(ev);
+ kfree_rcu(ev, rcu);
}
static void destroy_events(struct kfd_process *p)
@@ -258,8 +284,10 @@ static void destroy_events(struct kfd_process *p)
uint32_t id;
idr_for_each_entry(&p->event_idr, ev, id)
- destroy_event(p, ev);
+ if (ev)
+ destroy_event(p, ev);
idr_destroy(&p->event_idr);
+ mutex_destroy(&p->event_mutex);
}
/*
@@ -295,8 +323,8 @@ static bool event_can_be_cpu_signaled(const struct kfd_event *ev)
return ev->type == KFD_EVENT_TYPE_SIGNAL;
}
-int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
- uint64_t size)
+static int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
+ uint64_t size, uint64_t user_handle)
{
struct kfd_signal_page *page;
@@ -315,10 +343,56 @@ int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
p->signal_page = page;
p->signal_mapped_size = size;
-
+ p->signal_handle = user_handle;
return 0;
}
+int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset)
+{
+ struct kfd_node *kfd;
+ struct kfd_process_device *pdd;
+ void *mem, *kern_addr;
+ uint64_t size;
+ int err = 0;
+
+ if (p->signal_page) {
+ pr_err("Event page is already set\n");
+ return -EINVAL;
+ }
+
+ pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(event_page_offset));
+ if (!pdd) {
+ pr_err("Getting device by id failed in %s\n", __func__);
+ return -EINVAL;
+ }
+ kfd = pdd->dev;
+
+ pdd = kfd_bind_process_to_device(kfd, p);
+ if (IS_ERR(pdd))
+ return PTR_ERR(pdd);
+
+ mem = kfd_process_device_translate_handle(pdd,
+ GET_IDR_HANDLE(event_page_offset));
+ if (!mem) {
+ pr_err("Can't find BO, offset is 0x%llx\n", event_page_offset);
+ return -EINVAL;
+ }
+
+ err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(mem, &kern_addr, &size);
+ if (err) {
+ pr_err("Failed to map event page to kernel\n");
+ return err;
+ }
+
+ err = kfd_event_page_set(p, kern_addr, size, event_page_offset);
+ if (err) {
+ pr_err("Failed to set event page\n");
+ amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
+ return err;
+ }
+ return err;
+}
+
int kfd_event_create(struct file *devkfd, struct kfd_process *p,
uint32_t event_type, bool auto_reset, uint32_t node_id,
uint32_t *event_id, uint32_t *event_trigger_data,
@@ -334,6 +408,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
ev->auto_reset = auto_reset;
ev->signaled = false;
+ spin_lock_init(&ev->lock);
init_waitqueue_head(&ev->wq);
*event_page_offset = 0;
@@ -343,20 +418,21 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
switch (event_type) {
case KFD_EVENT_TYPE_SIGNAL:
case KFD_EVENT_TYPE_DEBUG:
- ret = create_signal_event(devkfd, p, ev);
+ ret = create_signal_event(devkfd, p, ev, NULL);
if (!ret) {
*event_page_offset = KFD_MMAP_TYPE_EVENTS;
*event_slot_index = ev->event_id;
}
break;
default:
- ret = create_other_event(p, ev);
+ ret = create_other_event(p, ev, NULL);
break;
}
if (!ret) {
*event_id = ev->event_id;
*event_trigger_data = ev->event_id;
+ ev->event_age = 1;
} else {
kfree(ev);
}
@@ -366,6 +442,166 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
return ret;
}
+int kfd_criu_restore_event(struct file *devkfd,
+ struct kfd_process *p,
+ uint8_t __user *user_priv_ptr,
+ uint64_t *priv_data_offset,
+ uint64_t max_priv_data_size)
+{
+ struct kfd_criu_event_priv_data *ev_priv;
+ struct kfd_event *ev = NULL;
+ int ret = 0;
+
+ ev_priv = kmalloc(sizeof(*ev_priv), GFP_KERNEL);
+ if (!ev_priv)
+ return -ENOMEM;
+
+ ev = kzalloc(sizeof(*ev), GFP_KERNEL);
+ if (!ev) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ if (*priv_data_offset + sizeof(*ev_priv) > max_priv_data_size) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ ret = copy_from_user(ev_priv, user_priv_ptr + *priv_data_offset, sizeof(*ev_priv));
+ if (ret) {
+ ret = -EFAULT;
+ goto exit;
+ }
+ *priv_data_offset += sizeof(*ev_priv);
+
+ if (ev_priv->user_handle) {
+ ret = kfd_kmap_event_page(p, ev_priv->user_handle);
+ if (ret)
+ goto exit;
+ }
+
+ ev->type = ev_priv->type;
+ ev->auto_reset = ev_priv->auto_reset;
+ ev->signaled = ev_priv->signaled;
+
+ spin_lock_init(&ev->lock);
+ init_waitqueue_head(&ev->wq);
+
+ mutex_lock(&p->event_mutex);
+ switch (ev->type) {
+ case KFD_EVENT_TYPE_SIGNAL:
+ case KFD_EVENT_TYPE_DEBUG:
+ ret = create_signal_event(devkfd, p, ev, &ev_priv->event_id);
+ break;
+ case KFD_EVENT_TYPE_MEMORY:
+ memcpy(&ev->memory_exception_data,
+ &ev_priv->memory_exception_data,
+ sizeof(struct kfd_hsa_memory_exception_data));
+
+ ret = create_other_event(p, ev, &ev_priv->event_id);
+ break;
+ case KFD_EVENT_TYPE_HW_EXCEPTION:
+ memcpy(&ev->hw_exception_data,
+ &ev_priv->hw_exception_data,
+ sizeof(struct kfd_hsa_hw_exception_data));
+
+ ret = create_other_event(p, ev, &ev_priv->event_id);
+ break;
+ }
+ mutex_unlock(&p->event_mutex);
+
+exit:
+ if (ret)
+ kfree(ev);
+
+ kfree(ev_priv);
+
+ return ret;
+}
+
+int kfd_criu_checkpoint_events(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_data_offset)
+{
+ struct kfd_criu_event_priv_data *ev_privs;
+ int i = 0;
+ int ret = 0;
+ struct kfd_event *ev;
+ uint32_t ev_id;
+
+ uint32_t num_events = kfd_get_num_events(p);
+
+ if (!num_events)
+ return 0;
+
+ ev_privs = kvzalloc(num_events * sizeof(*ev_privs), GFP_KERNEL);
+ if (!ev_privs)
+ return -ENOMEM;
+
+
+ idr_for_each_entry(&p->event_idr, ev, ev_id) {
+ struct kfd_criu_event_priv_data *ev_priv;
+
+ /*
+ * Currently, all events have same size of private_data, but the current ioctl's
+ * and CRIU plugin supports private_data of variable sizes
+ */
+ ev_priv = &ev_privs[i];
+
+ ev_priv->object_type = KFD_CRIU_OBJECT_TYPE_EVENT;
+
+ /* We store the user_handle with the first event */
+ if (i == 0 && p->signal_page)
+ ev_priv->user_handle = p->signal_handle;
+
+ ev_priv->event_id = ev->event_id;
+ ev_priv->auto_reset = ev->auto_reset;
+ ev_priv->type = ev->type;
+ ev_priv->signaled = ev->signaled;
+
+ if (ev_priv->type == KFD_EVENT_TYPE_MEMORY)
+ memcpy(&ev_priv->memory_exception_data,
+ &ev->memory_exception_data,
+ sizeof(struct kfd_hsa_memory_exception_data));
+ else if (ev_priv->type == KFD_EVENT_TYPE_HW_EXCEPTION)
+ memcpy(&ev_priv->hw_exception_data,
+ &ev->hw_exception_data,
+ sizeof(struct kfd_hsa_hw_exception_data));
+
+ pr_debug("Checkpointed event[%d] id = 0x%08x auto_reset = %x type = %x signaled = %x\n",
+ i,
+ ev_priv->event_id,
+ ev_priv->auto_reset,
+ ev_priv->type,
+ ev_priv->signaled);
+ i++;
+ }
+
+ ret = copy_to_user(user_priv_data + *priv_data_offset,
+ ev_privs, num_events * sizeof(*ev_privs));
+ if (ret) {
+ pr_err("Failed to copy events priv to user\n");
+ ret = -EFAULT;
+ }
+
+ *priv_data_offset += num_events * sizeof(*ev_privs);
+
+ kvfree(ev_privs);
+ return ret;
+}
+
+int kfd_get_num_events(struct kfd_process *p)
+{
+ struct kfd_event *ev;
+ uint32_t id;
+ u32 num_events = 0;
+
+ idr_for_each_entry(&p->event_idr, ev, id)
+ num_events++;
+
+ return num_events;
+}
+
/* Assumes that p is current. */
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id)
{
@@ -391,13 +627,18 @@ static void set_event(struct kfd_event *ev)
/* Auto reset if the list is non-empty and we're waking
* someone. waitqueue_active is safe here because we're
- * protected by the p->event_mutex, which is also held when
+ * protected by the ev->lock, which is also held when
* updating the wait queues in kfd_wait_on_events.
*/
ev->signaled = !ev->auto_reset || !waitqueue_active(&ev->wq);
+ if (!(++ev->event_age)) {
+ /* Never wrap back to reserved/default event age 0/1 */
+ ev->event_age = 2;
+ WARN_ONCE(1, "event_age wrap back!");
+ }
list_for_each_entry(waiter, &ev->wq.head, wait.entry)
- waiter->activated = true;
+ WRITE_ONCE(waiter->activated, true);
wake_up_all(&ev->wq);
}
@@ -408,16 +649,23 @@ int kfd_set_event(struct kfd_process *p, uint32_t event_id)
int ret = 0;
struct kfd_event *ev;
- mutex_lock(&p->event_mutex);
+ rcu_read_lock();
ev = lookup_event_by_id(p, event_id);
+ if (!ev) {
+ ret = -EINVAL;
+ goto unlock_rcu;
+ }
+ spin_lock(&ev->lock);
- if (ev && event_can_be_cpu_signaled(ev))
+ if (event_can_be_cpu_signaled(ev))
set_event(ev);
else
ret = -EINVAL;
- mutex_unlock(&p->event_mutex);
+ spin_unlock(&ev->lock);
+unlock_rcu:
+ rcu_read_unlock();
return ret;
}
@@ -432,23 +680,30 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id)
int ret = 0;
struct kfd_event *ev;
- mutex_lock(&p->event_mutex);
+ rcu_read_lock();
ev = lookup_event_by_id(p, event_id);
+ if (!ev) {
+ ret = -EINVAL;
+ goto unlock_rcu;
+ }
+ spin_lock(&ev->lock);
- if (ev && event_can_be_cpu_signaled(ev))
+ if (event_can_be_cpu_signaled(ev))
reset_event(ev);
else
ret = -EINVAL;
- mutex_unlock(&p->event_mutex);
+ spin_unlock(&ev->lock);
+unlock_rcu:
+ rcu_read_unlock();
return ret;
}
static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev)
{
- page_slots(p->signal_page)[ev->event_id] = UNSIGNALED_EVENT_SLOT;
+ WRITE_ONCE(page_slots(p->signal_page)[ev->event_id], UNSIGNALED_EVENT_SLOT);
}
static void set_event_from_interrupt(struct kfd_process *p,
@@ -456,7 +711,9 @@ static void set_event_from_interrupt(struct kfd_process *p,
{
if (ev && event_can_be_gpu_signaled(ev)) {
acknowledge_signal(p, ev);
+ spin_lock(&ev->lock);
set_event(ev);
+ spin_unlock(&ev->lock);
}
}
@@ -470,12 +727,12 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
* to process context, kfd_process could attempt to exit while we are
* running so the lookup function increments the process ref count.
*/
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL);
if (!p)
return; /* Presumably process exited. */
- mutex_lock(&p->event_mutex);
+ rcu_read_lock();
if (valid_id_bits)
ev = lookup_signaled_event_by_partial_id(p, partial_id,
@@ -503,7 +760,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
if (id >= KFD_SIGNAL_EVENT_LIMIT)
break;
- if (slots[id] != UNSIGNALED_EVENT_SLOT)
+ if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT)
set_event_from_interrupt(p, ev);
}
} else {
@@ -511,15 +768,15 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
* iterate over the signal slots and lookup
* only signaled events from the IDR.
*/
- for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++)
- if (slots[id] != UNSIGNALED_EVENT_SLOT) {
+ for (id = 1; id < KFD_SIGNAL_EVENT_LIMIT; id++)
+ if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT) {
ev = lookup_event_by_id(p, id);
set_event_from_interrupt(p, ev);
}
}
}
- mutex_unlock(&p->event_mutex);
+ rcu_read_unlock();
kfd_unref_process(p);
}
@@ -528,43 +785,44 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events)
struct kfd_event_waiter *event_waiters;
uint32_t i;
- event_waiters = kmalloc_array(num_events,
- sizeof(struct kfd_event_waiter),
- GFP_KERNEL);
+ event_waiters = kcalloc(num_events, sizeof(struct kfd_event_waiter),
+ GFP_KERNEL);
+ if (!event_waiters)
+ return NULL;
- for (i = 0; (event_waiters) && (i < num_events) ; i++) {
+ for (i = 0; i < num_events; i++)
init_wait(&event_waiters[i].wait);
- event_waiters[i].activated = false;
- }
return event_waiters;
}
-static int init_event_waiter_get_status(struct kfd_process *p,
+static int init_event_waiter(struct kfd_process *p,
struct kfd_event_waiter *waiter,
- uint32_t event_id)
+ struct kfd_event_data *event_data)
{
- struct kfd_event *ev = lookup_event_by_id(p, event_id);
+ struct kfd_event *ev = lookup_event_by_id(p, event_data->event_id);
if (!ev)
return -EINVAL;
+ spin_lock(&ev->lock);
waiter->event = ev;
waiter->activated = ev->signaled;
ev->signaled = ev->signaled && !ev->auto_reset;
- return 0;
-}
-
-static void init_event_waiter_add_to_waitlist(struct kfd_event_waiter *waiter)
-{
- struct kfd_event *ev = waiter->event;
+ /* last_event_age = 0 reserved for backward compatible */
+ if (waiter->event->type == KFD_EVENT_TYPE_SIGNAL &&
+ event_data->signal_event_data.last_event_age) {
+ waiter->event_age_enabled = true;
+ if (ev->event_age != event_data->signal_event_data.last_event_age)
+ waiter->activated = true;
+ }
- /* Only add to the wait list if we actually need to
- * wait on this event.
- */
if (!waiter->activated)
add_wait_queue(&ev->wq, &waiter->wait);
+ spin_unlock(&ev->lock);
+
+ return 0;
}
/* test_event_condition - Test condition of events being waited for
@@ -584,10 +842,10 @@ static uint32_t test_event_condition(bool all, uint32_t num_events,
uint32_t activated_count = 0;
for (i = 0; i < num_events; i++) {
- if (!event_waiters[i].event)
+ if (!READ_ONCE(event_waiters[i].event))
return KFD_IOC_WAIT_RESULT_FAIL;
- if (event_waiters[i].activated) {
+ if (READ_ONCE(event_waiters[i].activated)) {
if (!all)
return KFD_IOC_WAIT_RESULT_COMPLETE;
@@ -607,30 +865,40 @@ static int copy_signaled_event_data(uint32_t num_events,
struct kfd_event_waiter *event_waiters,
struct kfd_event_data __user *data)
{
- struct kfd_hsa_memory_exception_data *src;
- struct kfd_hsa_memory_exception_data __user *dst;
+ void *src;
+ void __user *dst;
struct kfd_event_waiter *waiter;
struct kfd_event *event;
- uint32_t i;
+ uint32_t i, size = 0;
for (i = 0; i < num_events; i++) {
waiter = &event_waiters[i];
event = waiter->event;
- if (waiter->activated && event->type == KFD_EVENT_TYPE_MEMORY) {
- dst = &data[i].memory_exception_data;
- src = &event->memory_exception_data;
- if (copy_to_user(dst, src,
- sizeof(struct kfd_hsa_memory_exception_data)))
+ if (!event)
+ return -EINVAL; /* event was destroyed */
+ if (waiter->activated) {
+ if (event->type == KFD_EVENT_TYPE_MEMORY) {
+ dst = &data[i].memory_exception_data;
+ src = &event->memory_exception_data;
+ size = sizeof(struct kfd_hsa_memory_exception_data);
+ } else if (event->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
+ dst = &data[i].memory_exception_data;
+ src = &event->hw_exception_data;
+ size = sizeof(struct kfd_hsa_hw_exception_data);
+ } else if (event->type == KFD_EVENT_TYPE_SIGNAL &&
+ waiter->event_age_enabled) {
+ dst = &data[i].signal_event_data.last_event_age;
+ src = &event->event_age;
+ size = sizeof(u64);
+ }
+ if (size && copy_to_user(dst, src, size))
return -EFAULT;
}
}
return 0;
-
}
-
-
static long user_timeout_to_jiffies(uint32_t user_timeout_ms)
{
if (user_timeout_ms == KFD_EVENT_TIMEOUT_IMMEDIATE)
@@ -649,21 +917,28 @@ static long user_timeout_to_jiffies(uint32_t user_timeout_ms)
return msecs_to_jiffies(user_timeout_ms) + 1;
}
-static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters)
+static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters,
+ bool undo_auto_reset)
{
uint32_t i;
for (i = 0; i < num_events; i++)
- if (waiters[i].event)
+ if (waiters[i].event) {
+ spin_lock(&waiters[i].event->lock);
remove_wait_queue(&waiters[i].event->wq,
&waiters[i].wait);
+ if (undo_auto_reset && waiters[i].activated &&
+ waiters[i].event && waiters[i].event->auto_reset)
+ set_event(waiters[i].event);
+ spin_unlock(&waiters[i].event->lock);
+ }
kfree(waiters);
}
int kfd_wait_on_events(struct kfd_process *p,
uint32_t num_events, void __user *data,
- bool all, uint32_t user_timeout_ms,
+ bool all, uint32_t *user_timeout_ms,
uint32_t *wait_result)
{
struct kfd_event_data __user *events =
@@ -672,7 +947,7 @@ int kfd_wait_on_events(struct kfd_process *p,
int ret = 0;
struct kfd_event_waiter *event_waiters = NULL;
- long timeout = user_timeout_to_jiffies(user_timeout_ms);
+ long timeout = user_timeout_to_jiffies(*user_timeout_ms);
event_waiters = alloc_event_waiters(num_events);
if (!event_waiters) {
@@ -680,6 +955,9 @@ int kfd_wait_on_events(struct kfd_process *p,
goto out;
}
+ /* Use p->event_mutex here to protect against concurrent creation and
+ * destruction of events while we initialize event_waiters.
+ */
mutex_lock(&p->event_mutex);
for (i = 0; i < num_events; i++) {
@@ -691,8 +969,7 @@ int kfd_wait_on_events(struct kfd_process *p,
goto out_unlock;
}
- ret = init_event_waiter_get_status(p, &event_waiters[i],
- event_data.event_id);
+ ret = init_event_waiter(p, &event_waiters[i], &event_data);
if (ret)
goto out_unlock;
}
@@ -710,10 +987,6 @@ int kfd_wait_on_events(struct kfd_process *p,
goto out_unlock;
}
- /* Add to wait lists if we need to wait. */
- for (i = 0; i < num_events; i++)
- init_event_waiter_add_to_waitlist(&event_waiters[i]);
-
mutex_unlock(&p->event_mutex);
while (true) {
@@ -723,15 +996,11 @@ int kfd_wait_on_events(struct kfd_process *p,
}
if (signal_pending(current)) {
- /*
- * This is wrong when a nonzero, non-infinite timeout
- * is specified. We need to use
- * ERESTARTSYS_RESTARTBLOCK, but struct restart_block
- * contains a union with data for each user and it's
- * in generic kernel code that I don't want to
- * touch yet.
- */
ret = -ERESTARTSYS;
+ if (*user_timeout_ms != KFD_EVENT_TIMEOUT_IMMEDIATE &&
+ *user_timeout_ms != KFD_EVENT_TIMEOUT_INFINITE)
+ *user_timeout_ms = jiffies_to_msecs(
+ max(0l, timeout-1));
break;
}
@@ -758,16 +1027,21 @@ int kfd_wait_on_events(struct kfd_process *p,
}
__set_current_state(TASK_RUNNING);
+ mutex_lock(&p->event_mutex);
/* copy_signaled_event_data may sleep. So this has to happen
* after the task state is set back to RUNNING.
+ *
+ * The event may also have been destroyed after signaling. So
+ * copy_signaled_event_data also must confirm that the event
+ * still exists. Therefore this must be under the p->event_mutex
+ * which is also held when events are destroyed.
*/
if (!ret && *wait_result == KFD_IOC_WAIT_RESULT_COMPLETE)
ret = copy_signaled_event_data(num_events,
event_waiters, events);
- mutex_lock(&p->event_mutex);
out_unlock:
- free_waiters(num_events, event_waiters);
+ free_waiters(num_events, event_waiters, ret == -ERESTARTSYS);
mutex_unlock(&p->event_mutex);
out:
if (ret)
@@ -801,8 +1075,8 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
pfn = __pa(page->kernel_address);
pfn >>= PAGE_SHIFT;
- vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE
- | VM_DONTDUMP | VM_PFNMAP;
+ vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE
+ | VM_DONTDUMP | VM_PFNMAP);
pr_debug("Mapping signal page\n");
pr_debug(" start user address == 0x%08lx\n", vma->vm_start);
@@ -824,8 +1098,7 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
}
/*
- * Assumes that p->event_mutex is held and of course
- * that p is not going away (current or locked).
+ * Assumes that p is not going away.
*/
static void lookup_events_by_type_and_signal(struct kfd_process *p,
int type, void *event_data)
@@ -837,6 +1110,8 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
ev_data = (struct kfd_hsa_memory_exception_data *) event_data;
+ rcu_read_lock();
+
id = KFD_FIRST_NONSIGNAL_EVENT_ID;
idr_for_each_entry_continue(&p->event_idr, ev, id)
if (ev->type == type) {
@@ -844,15 +1119,17 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
dev_dbg(kfd_device,
"Event found: id %X type %d",
ev->event_id, ev->type);
+ spin_lock(&ev->lock);
set_event(ev);
if (ev->type == KFD_EVENT_TYPE_MEMORY && ev_data)
ev->memory_exception_data = *ev_data;
+ spin_unlock(&ev->lock);
}
if (type == KFD_EVENT_TYPE_MEMORY) {
dev_warn(kfd_device,
- "Sending SIGSEGV to process %d (pasid 0x%x)",
- p->lead_thread->pid, p->pasid);
+ "Sending SIGSEGV to process pid %d",
+ p->lead_thread->pid);
send_sig(SIGSEGV, p->lead_thread, 0);
}
@@ -860,95 +1137,18 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
if (send_signal) {
if (send_sigterm) {
dev_warn(kfd_device,
- "Sending SIGTERM to process %d (pasid 0x%x)",
- p->lead_thread->pid, p->pasid);
+ "Sending SIGTERM to process pid %d",
+ p->lead_thread->pid);
send_sig(SIGTERM, p->lead_thread, 0);
} else {
dev_err(kfd_device,
- "Process %d (pasid 0x%x) got unhandled exception",
- p->lead_thread->pid, p->pasid);
+ "Process pid %d got unhandled exception",
+ p->lead_thread->pid);
}
}
-}
-
-#ifdef KFD_SUPPORT_IOMMU_V2
-void kfd_signal_iommu_event(struct kfd_dev *dev, u32 pasid,
- unsigned long address, bool is_write_requested,
- bool is_execute_requested)
-{
- struct kfd_hsa_memory_exception_data memory_exception_data;
- struct vm_area_struct *vma;
-
- /*
- * Because we are called from arbitrary context (workqueue) as opposed
- * to process context, kfd_process could attempt to exit while we are
- * running so the lookup function increments the process ref count.
- */
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
- struct mm_struct *mm;
-
- if (!p)
- return; /* Presumably process exited. */
-
- /* Take a safe reference to the mm_struct, which may otherwise
- * disappear even while the kfd_process is still referenced.
- */
- mm = get_task_mm(p->lead_thread);
- if (!mm) {
- kfd_unref_process(p);
- return; /* Process is exiting */
- }
-
- memset(&memory_exception_data, 0, sizeof(memory_exception_data));
-
- mmap_read_lock(mm);
- vma = find_vma(mm, address);
-
- memory_exception_data.gpu_id = dev->id;
- memory_exception_data.va = address;
- /* Set failure reason */
- memory_exception_data.failure.NotPresent = 1;
- memory_exception_data.failure.NoExecute = 0;
- memory_exception_data.failure.ReadOnly = 0;
- if (vma && address >= vma->vm_start) {
- memory_exception_data.failure.NotPresent = 0;
-
- if (is_write_requested && !(vma->vm_flags & VM_WRITE))
- memory_exception_data.failure.ReadOnly = 1;
- else
- memory_exception_data.failure.ReadOnly = 0;
-
- if (is_execute_requested && !(vma->vm_flags & VM_EXEC))
- memory_exception_data.failure.NoExecute = 1;
- else
- memory_exception_data.failure.NoExecute = 0;
- }
-
- mmap_read_unlock(mm);
- mmput(mm);
- pr_debug("notpresent %d, noexecute %d, readonly %d\n",
- memory_exception_data.failure.NotPresent,
- memory_exception_data.failure.NoExecute,
- memory_exception_data.failure.ReadOnly);
-
- /* Workaround on Raven to not kill the process when memory is freed
- * before IOMMU is able to finish processing all the excessive PPRs
- */
- if (dev->device_info->asic_family != CHIP_RAVEN &&
- dev->device_info->asic_family != CHIP_RENOIR) {
- mutex_lock(&p->event_mutex);
-
- /* Lookup events by type and signal them */
- lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_MEMORY,
- &memory_exception_data);
-
- mutex_unlock(&p->event_mutex);
- }
-
- kfd_unref_process(p);
+ rcu_read_unlock();
}
-#endif /* KFD_SUPPORT_IOMMU_V2 */
void kfd_signal_hw_exception_event(u32 pasid)
{
@@ -957,58 +1157,87 @@ void kfd_signal_hw_exception_event(u32 pasid)
* to process context, kfd_process could attempt to exit while we are
* running so the lookup function increments the process ref count.
*/
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL);
if (!p)
return; /* Presumably process exited. */
- mutex_lock(&p->event_mutex);
-
- /* Lookup events by type and signal them */
lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL);
-
- mutex_unlock(&p->event_mutex);
kfd_unref_process(p);
}
-void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid,
- struct kfd_vm_fault_info *info)
+void kfd_signal_vm_fault_event_with_userptr(struct kfd_process *p, uint64_t gpu_va)
+{
+ struct kfd_process_device *pdd;
+ struct kfd_hsa_memory_exception_data exception_data;
+ int i;
+
+ memset(&exception_data, 0, sizeof(exception_data));
+ exception_data.va = gpu_va;
+ exception_data.failure.NotPresent = 1;
+
+ // Send VM seg fault to all kfd process device
+ for (i = 0; i < p->n_pdds; i++) {
+ pdd = p->pdds[i];
+ exception_data.gpu_id = pdd->user_gpu_id;
+ kfd_evict_process_device(pdd);
+ kfd_signal_vm_fault_event(pdd, NULL, &exception_data);
+ }
+}
+
+void kfd_signal_vm_fault_event(struct kfd_process_device *pdd,
+ struct kfd_vm_fault_info *info,
+ struct kfd_hsa_memory_exception_data *data)
{
struct kfd_event *ev;
uint32_t id;
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_process *p = pdd->process;
struct kfd_hsa_memory_exception_data memory_exception_data;
+ int user_gpu_id;
- if (!p)
- return; /* Presumably process exited. */
- memset(&memory_exception_data, 0, sizeof(memory_exception_data));
- memory_exception_data.gpu_id = dev->id;
- memory_exception_data.failure.imprecise = true;
- /* Set failure reason */
- if (info) {
- memory_exception_data.va = (info->page_addr) << PAGE_SHIFT;
- memory_exception_data.failure.NotPresent =
- info->prot_valid ? 1 : 0;
- memory_exception_data.failure.NoExecute =
- info->prot_exec ? 1 : 0;
- memory_exception_data.failure.ReadOnly =
- info->prot_write ? 1 : 0;
- memory_exception_data.failure.imprecise = 0;
+ user_gpu_id = kfd_process_get_user_gpu_id(p, pdd->dev->id);
+ if (unlikely(user_gpu_id == -EINVAL)) {
+ WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n",
+ pdd->dev->id);
+ return;
}
- mutex_lock(&p->event_mutex);
+
+ /* SoC15 chips and onwards will pass in data from now on. */
+ if (!data) {
+ memset(&memory_exception_data, 0, sizeof(memory_exception_data));
+ memory_exception_data.gpu_id = user_gpu_id;
+ memory_exception_data.failure.imprecise = true;
+
+ /* Set failure reason */
+ if (info) {
+ memory_exception_data.va = (info->page_addr) <<
+ PAGE_SHIFT;
+ memory_exception_data.failure.NotPresent =
+ info->prot_valid ? 1 : 0;
+ memory_exception_data.failure.NoExecute =
+ info->prot_exec ? 1 : 0;
+ memory_exception_data.failure.ReadOnly =
+ info->prot_write ? 1 : 0;
+ memory_exception_data.failure.imprecise = 0;
+ }
+ }
+
+ rcu_read_lock();
id = KFD_FIRST_NONSIGNAL_EVENT_ID;
idr_for_each_entry_continue(&p->event_idr, ev, id)
if (ev->type == KFD_EVENT_TYPE_MEMORY) {
- ev->memory_exception_data = memory_exception_data;
+ spin_lock(&ev->lock);
+ ev->memory_exception_data = data ? *data :
+ memory_exception_data;
set_event(ev);
+ spin_unlock(&ev->lock);
}
- mutex_unlock(&p->event_mutex);
- kfd_unref_process(p);
+ rcu_read_unlock();
}
-void kfd_signal_reset_event(struct kfd_dev *dev)
+void kfd_signal_reset_event(struct kfd_node *dev)
{
struct kfd_hsa_hw_exception_data hw_exception_data;
struct kfd_hsa_memory_exception_data memory_exception_data;
@@ -1022,69 +1251,129 @@ void kfd_signal_reset_event(struct kfd_dev *dev)
/* Whole gpu reset caused by GPU hang and memory is lost */
memset(&hw_exception_data, 0, sizeof(hw_exception_data));
- hw_exception_data.gpu_id = dev->id;
hw_exception_data.memory_lost = 1;
hw_exception_data.reset_cause = reset_cause;
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
memory_exception_data.ErrorType = KFD_MEM_ERR_SRAM_ECC;
- memory_exception_data.gpu_id = dev->id;
memory_exception_data.failure.imprecise = true;
idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
- mutex_lock(&p->event_mutex);
+ int user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
+ struct kfd_process_device *pdd = kfd_get_process_device_data(dev, p);
+
+ if (unlikely(user_gpu_id == -EINVAL)) {
+ WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
+ continue;
+ }
+
+ if (unlikely(!pdd)) {
+ WARN_ONCE(1, "Could not get device data from process pid:%d\n",
+ p->lead_thread->pid);
+ continue;
+ }
+
+ if (dev->dqm->detect_hang_count && !pdd->has_reset_queue)
+ continue;
+
+ if (dev->dqm->detect_hang_count) {
+ struct amdgpu_task_info *ti;
+ struct amdgpu_fpriv *drv_priv;
+
+ if (unlikely(amdgpu_file_to_fpriv(pdd->drm_file, &drv_priv))) {
+ WARN_ONCE(1, "Could not get vm for device %x from pid:%d\n",
+ dev->id, p->lead_thread->pid);
+ continue;
+ }
+
+ ti = amdgpu_vm_get_task_info_vm(&drv_priv->vm);
+ if (ti) {
+ dev_err(dev->adev->dev,
+ "Queues reset on process %s tid %d thread %s pid %d\n",
+ ti->process_name, ti->tgid, ti->task.comm, ti->task.pid);
+ amdgpu_vm_put_task_info(ti);
+ }
+ }
+
+ rcu_read_lock();
+
id = KFD_FIRST_NONSIGNAL_EVENT_ID;
idr_for_each_entry_continue(&p->event_idr, ev, id) {
if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
+ spin_lock(&ev->lock);
ev->hw_exception_data = hw_exception_data;
+ ev->hw_exception_data.gpu_id = user_gpu_id;
set_event(ev);
+ spin_unlock(&ev->lock);
}
if (ev->type == KFD_EVENT_TYPE_MEMORY &&
reset_cause == KFD_HW_EXCEPTION_ECC) {
+ spin_lock(&ev->lock);
ev->memory_exception_data = memory_exception_data;
+ ev->memory_exception_data.gpu_id = user_gpu_id;
set_event(ev);
+ spin_unlock(&ev->lock);
}
}
- mutex_unlock(&p->event_mutex);
+
+ rcu_read_unlock();
}
srcu_read_unlock(&kfd_processes_srcu, idx);
}
-void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid)
+void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid)
{
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL);
struct kfd_hsa_memory_exception_data memory_exception_data;
struct kfd_hsa_hw_exception_data hw_exception_data;
struct kfd_event *ev;
uint32_t id = KFD_FIRST_NONSIGNAL_EVENT_ID;
+ int user_gpu_id;
- if (!p)
+ if (!p) {
+ dev_warn(dev->adev->dev, "Not find process with pasid:%d\n", pasid);
return; /* Presumably process exited. */
+ }
+
+ user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
+ if (unlikely(user_gpu_id == -EINVAL)) {
+ WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
+ kfd_unref_process(p);
+ return;
+ }
memset(&hw_exception_data, 0, sizeof(hw_exception_data));
- hw_exception_data.gpu_id = dev->id;
+ hw_exception_data.gpu_id = user_gpu_id;
hw_exception_data.memory_lost = 1;
hw_exception_data.reset_cause = KFD_HW_EXCEPTION_ECC;
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
memory_exception_data.ErrorType = KFD_MEM_ERR_POISON_CONSUMED;
- memory_exception_data.gpu_id = dev->id;
+ memory_exception_data.gpu_id = user_gpu_id;
memory_exception_data.failure.imprecise = true;
- mutex_lock(&p->event_mutex);
+ rcu_read_lock();
+
idr_for_each_entry_continue(&p->event_idr, ev, id) {
if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
+ spin_lock(&ev->lock);
ev->hw_exception_data = hw_exception_data;
set_event(ev);
+ spin_unlock(&ev->lock);
}
if (ev->type == KFD_EVENT_TYPE_MEMORY) {
+ spin_lock(&ev->lock);
ev->memory_exception_data = memory_exception_data;
set_event(ev);
+ spin_unlock(&ev->lock);
}
}
- mutex_unlock(&p->event_mutex);
+
+ dev_warn(dev->adev->dev, "Send SIGBUS to process %s(pasid:%d)\n",
+ p->lead_thread->comm, pasid);
+ rcu_read_unlock();
/* user application will handle SIGBUS signal */
send_sig(SIGBUS, p->lead_thread, 0);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
index c8fe5dbdad55..52ccfd397c2b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -52,12 +53,14 @@ struct signal_page;
struct kfd_event {
u32 event_id;
+ u64 event_age;
bool signaled;
bool auto_reset;
int type;
+ spinlock_t lock;
wait_queue_head_t wq; /* List of event waiters. */
/* Only for signal events. */
@@ -68,6 +71,8 @@ struct kfd_event {
struct kfd_hsa_memory_exception_data memory_exception_data;
struct kfd_hsa_hw_exception_data hw_exception_data;
};
+
+ struct rcu_head rcu; /* for asynchronous kfree_rcu */
};
#define KFD_EVENT_TIMEOUT_IMMEDIATE 0
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index d1388896f9c1..1d170dc50df3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -22,7 +23,6 @@
*/
#include <linux/device.h>
-#include <linux/export.h>
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/sched.h>
@@ -34,7 +34,8 @@
#include "kfd_priv.h"
#include <linux/mm.h>
#include <linux/mman.h>
-#include <asm/processor.h>
+#include <linux/processor.h>
+#include "amdgpu_vm.h"
/*
* The primary memory I/O features being added for revisions of gfxip
@@ -321,22 +322,19 @@ static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id)
pdd->lds_base = MAKE_LDS_APP_BASE_VI();
pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
- if (!pdd->dev->use_iommu_v2) {
- /* dGPUs: SVM aperture starting at 0
- * with small reserved space for kernel.
- * Set them to CANONICAL addresses.
- */
- pdd->gpuvm_base = SVM_USER_BASE;
- pdd->gpuvm_limit =
- pdd->dev->shared_resources.gpuvm_size - 1;
- } else {
- /* set them to non CANONICAL addresses, and no SVM is
- * allocated.
- */
- pdd->gpuvm_base = MAKE_GPUVM_APP_BASE_VI(id + 1);
- pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base,
- pdd->dev->shared_resources.gpuvm_size);
- }
+ /* dGPUs: SVM aperture starting at 0
+ * with small reserved space for kernel.
+ * Set them to CANONICAL addresses.
+ */
+ pdd->gpuvm_base = max(SVM_USER_BASE, AMDGPU_VA_RESERVED_BOTTOM);
+ pdd->gpuvm_limit =
+ pdd->dev->kfd->shared_resources.gpuvm_size - 1;
+
+ /* dGPUs: the reserved space for kernel
+ * before SVM
+ */
+ pdd->qpd.cwsr_base = SVM_CWSR_BASE;
+ pdd->qpd.ib_base = SVM_IB_BASE;
pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI();
pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
@@ -347,24 +345,24 @@ static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id)
pdd->lds_base = MAKE_LDS_APP_BASE_V9();
pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
- /* Raven needs SVM to support graphic handle, etc. Leave the small
- * reserved space before SVM on Raven as well, even though we don't
- * have to.
- * Set gpuvm_base and gpuvm_limit to CANONICAL addresses so that they
- * are used in Thunk to reserve SVM.
- */
- pdd->gpuvm_base = SVM_USER_BASE;
+ pdd->gpuvm_base = AMDGPU_VA_RESERVED_BOTTOM;
pdd->gpuvm_limit =
- pdd->dev->shared_resources.gpuvm_size - 1;
+ pdd->dev->kfd->shared_resources.gpuvm_size - 1;
pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9();
pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
+
+ /*
+ * Place TBA/TMA on opposite side of VM hole to prevent
+ * stray faults from triggering SVM on these pages.
+ */
+ pdd->qpd.cwsr_base = AMDGPU_VA_RESERVED_TRAP_START(pdd->dev->adev);
}
int kfd_init_apertures(struct kfd_process *process)
{
uint8_t id = 0;
- struct kfd_dev *dev;
+ struct kfd_node *dev;
struct kfd_process_device *pdd;
/*Iterating over all devices*/
@@ -381,7 +379,8 @@ int kfd_init_apertures(struct kfd_process *process)
pdd = kfd_create_process_device_data(dev, process);
if (!pdd) {
- pr_err("Failed to create process device data\n");
+ dev_err(dev->adev->dev,
+ "Failed to create process device data\n");
return -ENOMEM;
}
/*
@@ -394,7 +393,7 @@ int kfd_init_apertures(struct kfd_process *process)
pdd->gpuvm_base = pdd->gpuvm_limit = 0;
pdd->scratch_base = pdd->scratch_limit = 0;
} else {
- switch (dev->device_info->asic_family) {
+ switch (dev->adev->asic_type) {
case CHIP_KAVERI:
case CHIP_HAWAII:
case CHIP_CARRIZO:
@@ -406,37 +405,14 @@ int kfd_init_apertures(struct kfd_process *process)
case CHIP_VEGAM:
kfd_init_apertures_vi(pdd, id);
break;
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_RAVEN:
- case CHIP_RENOIR:
- case CHIP_ARCTURUS:
- case CHIP_ALDEBARAN:
- case CHIP_NAVI10:
- case CHIP_NAVI12:
- case CHIP_NAVI14:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
- case CHIP_BEIGE_GOBY:
- case CHIP_YELLOW_CARP:
- case CHIP_CYAN_SKILLFISH:
- kfd_init_apertures_v9(pdd, id);
- break;
default:
- WARN(1, "Unexpected ASIC family %u",
- dev->device_info->asic_family);
- return -EINVAL;
- }
-
- if (!dev->use_iommu_v2) {
- /* dGPUs: the reserved space for kernel
- * before SVM
- */
- pdd->qpd.cwsr_base = SVM_CWSR_BASE;
- pdd->qpd.ib_base = SVM_IB_BASE;
+ if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1))
+ kfd_init_apertures_v9(pdd, id);
+ else {
+ WARN(1, "Unexpected ASIC family %u",
+ dev->adev->asic_type);
+ return -EINVAL;
+ }
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
new file mode 100644
index 000000000000..3e1ad8974797
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
@@ -0,0 +1,386 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "kfd_events.h"
+#include "kfd_debug.h"
+#include "soc15_int.h"
+#include "kfd_device_queue_manager.h"
+
+/*
+ * GFX10 SQ Interrupts
+ *
+ * There are 3 encoding types of interrupts sourced from SQ sent as a 44-bit
+ * packet to the Interrupt Handler:
+ * Auto - Generated by the SQG (various cmd overflows, timestamps etc)
+ * Wave - Generated by S_SENDMSG through a shader program
+ * Error - HW generated errors (Illegal instructions, Memviols, EDC etc)
+ *
+ * The 44-bit packet is mapped as {context_id1[7:0],context_id0[31:0]} plus
+ * 4-bits for VMID (SOC15_VMID_FROM_IH_ENTRY) as such:
+ *
+ * - context_id1[7:6]
+ * Encoding type (0 = Auto, 1 = Wave, 2 = Error)
+ *
+ * - context_id0[24]
+ * PRIV bit indicates that Wave S_SEND or error occurred within trap
+ *
+ * - context_id0[22:0]
+ * 23-bit data with the following layout per encoding type:
+ * Auto - only context_id0[8:0] is used, which reports various interrupts
+ * generated by SQG. The rest is 0.
+ * Wave - user data sent from m0 via S_SENDMSG
+ * Error - Error type (context_id0[22:19]), Error Details (rest of bits)
+ *
+ * The other context_id bits show coordinates (SE/SH/CU/SIMD/WGP) for wave
+ * S_SENDMSG and Errors. These are 0 for Auto.
+ */
+
+enum SQ_INTERRUPT_WORD_ENCODING {
+ SQ_INTERRUPT_WORD_ENCODING_AUTO = 0x0,
+ SQ_INTERRUPT_WORD_ENCODING_INST,
+ SQ_INTERRUPT_WORD_ENCODING_ERROR,
+};
+
+enum SQ_INTERRUPT_ERROR_TYPE {
+ SQ_INTERRUPT_ERROR_TYPE_EDC_FUE = 0x0,
+ SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST,
+ SQ_INTERRUPT_ERROR_TYPE_MEMVIOL,
+ SQ_INTERRUPT_ERROR_TYPE_EDC_FED,
+};
+
+/* SQ_INTERRUPT_WORD_AUTO_CTXID */
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE__SHIFT 0
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__WLT__SHIFT 1
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_BUF0_FULL__SHIFT 2
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_BUF1_FULL__SHIFT 3
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_UTC_ERROR__SHIFT 7
+#define SQ_INTERRUPT_WORD_AUTO_CTXID1__SE_ID__SHIFT 4
+#define SQ_INTERRUPT_WORD_AUTO_CTXID1__ENCODING__SHIFT 6
+
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_MASK 0x00000001
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__WLT_MASK 0x00000002
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_BUF0_FULL_MASK 0x00000004
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_BUF1_FULL_MASK 0x00000008
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_UTC_ERROR_MASK 0x00000080
+#define SQ_INTERRUPT_WORD_AUTO_CTXID1__SE_ID_MASK 0x030
+#define SQ_INTERRUPT_WORD_AUTO_CTXID1__ENCODING_MASK 0x0c0
+
+/* SQ_INTERRUPT_WORD_WAVE_CTXID */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__DATA__SHIFT 0
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__SA_ID__SHIFT 23
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__PRIV__SHIFT 24
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__WAVE_ID__SHIFT 25
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__SIMD_ID__SHIFT 30
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__WGP_ID__SHIFT 0
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__SE_ID__SHIFT 4
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__ENCODING__SHIFT 6
+
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__DATA_MASK 0x000007fffff
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__SA_ID_MASK 0x0000800000
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__PRIV_MASK 0x00001000000
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__WAVE_ID_MASK 0x0003e000000
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__SIMD_ID_MASK 0x000c0000000
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__WGP_ID_MASK 0x00f
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__SE_ID_MASK 0x030
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__ENCODING_MASK 0x0c0
+
+#define KFD_CTXID0__ERR_TYPE_MASK 0x780000
+#define KFD_CTXID0__ERR_TYPE__SHIFT 19
+
+/* GFX10 SQ interrupt ENC type bit (context_id1[7:6]) for wave s_sendmsg */
+#define KFD_CONTEXT_ID1_ENC_TYPE_WAVE_MASK 0x40
+/* GFX10 SQ interrupt PRIV bit (context_id0[24]) for s_sendmsg inside trap */
+#define KFD_CONTEXT_ID0_PRIV_MASK 0x1000000
+/*
+ * The debugger will send user data(m0) with PRIV=1 to indicate it requires
+ * notification from the KFD with the following queue id (DOORBELL_ID) and
+ * trap code (TRAP_CODE).
+ */
+#define KFD_CONTEXT_ID0_DEBUG_DOORBELL_MASK 0x0003ff
+#define KFD_CONTEXT_ID0_DEBUG_TRAP_CODE_SHIFT 10
+#define KFD_CONTEXT_ID0_DEBUG_TRAP_CODE_MASK 0x07fc00
+#define KFD_DEBUG_DOORBELL_ID(ctxid0) ((ctxid0) & \
+ KFD_CONTEXT_ID0_DEBUG_DOORBELL_MASK)
+#define KFD_DEBUG_TRAP_CODE(ctxid0) (((ctxid0) & \
+ KFD_CONTEXT_ID0_DEBUG_TRAP_CODE_MASK) \
+ >> KFD_CONTEXT_ID0_DEBUG_TRAP_CODE_SHIFT)
+#define KFD_DEBUG_CP_BAD_OP_ECODE_MASK 0x3fffc00
+#define KFD_DEBUG_CP_BAD_OP_ECODE_SHIFT 10
+#define KFD_DEBUG_CP_BAD_OP_ECODE(ctxid0) (((ctxid0) & \
+ KFD_DEBUG_CP_BAD_OP_ECODE_MASK) \
+ >> KFD_DEBUG_CP_BAD_OP_ECODE_SHIFT)
+
+static bool event_interrupt_isr_v10(struct kfd_node *dev,
+ const uint32_t *ih_ring_entry,
+ uint32_t *patched_ihre,
+ bool *patched_flag)
+{
+ uint16_t source_id, client_id, pasid, vmid;
+ const uint32_t *data = ih_ring_entry;
+
+ source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
+ client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
+
+ /* Only handle interrupts from KFD VMIDs */
+ vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
+ if (!KFD_IRQ_IS_FENCE(client_id, source_id) &&
+ (vmid < dev->vm_info.first_vmid_kfd ||
+ vmid > dev->vm_info.last_vmid_kfd))
+ return false;
+
+ pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
+
+ /* Only handle clients we care about */
+ if (client_id != SOC15_IH_CLIENTID_GRBM_CP &&
+ client_id != SOC15_IH_CLIENTID_SDMA0 &&
+ client_id != SOC15_IH_CLIENTID_SDMA1 &&
+ client_id != SOC15_IH_CLIENTID_SDMA2 &&
+ client_id != SOC15_IH_CLIENTID_SDMA3 &&
+ client_id != SOC15_IH_CLIENTID_SDMA4 &&
+ client_id != SOC15_IH_CLIENTID_SDMA5 &&
+ client_id != SOC15_IH_CLIENTID_SDMA6 &&
+ client_id != SOC15_IH_CLIENTID_SDMA7 &&
+ client_id != SOC15_IH_CLIENTID_VMC &&
+ client_id != SOC15_IH_CLIENTID_VMC1 &&
+ client_id != SOC15_IH_CLIENTID_UTCL2 &&
+ client_id != SOC15_IH_CLIENTID_SE0SH &&
+ client_id != SOC15_IH_CLIENTID_SE1SH &&
+ client_id != SOC15_IH_CLIENTID_SE2SH &&
+ client_id != SOC15_IH_CLIENTID_SE3SH)
+ return false;
+
+ dev_dbg(dev->adev->dev,
+ "client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
+ client_id, source_id, vmid, pasid);
+ dev_dbg(dev->adev->dev, "%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
+ data[0], data[1], data[2], data[3], data[4], data[5], data[6],
+ data[7]);
+
+ if (pasid == 0)
+ return 0;
+
+ /* Interrupt types we care about: various signals and faults.
+ * They will be forwarded to a work queue (see below).
+ */
+ return source_id == SOC15_INTSRC_CP_END_OF_PIPE ||
+ source_id == SOC15_INTSRC_SDMA_TRAP ||
+ source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
+ source_id == SOC15_INTSRC_CP_BAD_OPCODE ||
+ client_id == SOC15_IH_CLIENTID_VMC ||
+ client_id == SOC15_IH_CLIENTID_VMC1 ||
+ client_id == SOC15_IH_CLIENTID_UTCL2 ||
+ KFD_IRQ_IS_FENCE(client_id, source_id);
+}
+
+static void event_interrupt_wq_v10(struct kfd_node *dev,
+ const uint32_t *ih_ring_entry)
+{
+ uint16_t source_id, client_id, pasid, vmid;
+ uint32_t context_id0, context_id1;
+ uint32_t encoding, sq_intr_err_type;
+
+ source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
+ client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
+ pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
+ vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
+ context_id0 = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry);
+ context_id1 = SOC15_CONTEXT_ID1_FROM_IH_ENTRY(ih_ring_entry);
+
+ if (client_id == SOC15_IH_CLIENTID_GRBM_CP ||
+ client_id == SOC15_IH_CLIENTID_SE0SH ||
+ client_id == SOC15_IH_CLIENTID_SE1SH ||
+ client_id == SOC15_IH_CLIENTID_SE2SH ||
+ client_id == SOC15_IH_CLIENTID_SE3SH) {
+ if (source_id == SOC15_INTSRC_CP_END_OF_PIPE)
+ kfd_signal_event_interrupt(pasid, context_id0, 32);
+ else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG) {
+ encoding = REG_GET_FIELD(context_id1,
+ SQ_INTERRUPT_WORD_WAVE_CTXID1, ENCODING);
+ switch (encoding) {
+ case SQ_INTERRUPT_WORD_ENCODING_AUTO:
+ dev_dbg_ratelimited(
+ dev->adev->dev,
+ "sq_intr: auto, se %d, ttrace %d, wlt %d, ttrac_buf0_full %d, ttrac_buf1_full %d, ttrace_utc_err %d\n",
+ REG_GET_FIELD(
+ context_id1,
+ SQ_INTERRUPT_WORD_AUTO_CTXID1,
+ SE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ WLT),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE_BUF0_FULL),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE_BUF1_FULL),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE_UTC_ERROR));
+ break;
+ case SQ_INTERRUPT_WORD_ENCODING_INST:
+ dev_dbg_ratelimited(
+ dev->adev->dev,
+ "sq_intr: inst, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
+ REG_GET_FIELD(
+ context_id1,
+ SQ_INTERRUPT_WORD_WAVE_CTXID1,
+ SE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ DATA),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ SA_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ PRIV),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ WAVE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ SIMD_ID),
+ REG_GET_FIELD(
+ context_id1,
+ SQ_INTERRUPT_WORD_WAVE_CTXID1,
+ WGP_ID));
+ if (context_id0 & SQ_INTERRUPT_WORD_WAVE_CTXID0__PRIV_MASK) {
+ if (kfd_set_dbg_ev_from_interrupt(dev, pasid,
+ KFD_DEBUG_DOORBELL_ID(context_id0),
+ KFD_DEBUG_TRAP_CODE(context_id0),
+ NULL, 0))
+ return;
+ }
+ break;
+ case SQ_INTERRUPT_WORD_ENCODING_ERROR:
+ sq_intr_err_type = REG_GET_FIELD(context_id0, KFD_CTXID0,
+ ERR_TYPE);
+ dev_warn_ratelimited(
+ dev->adev->dev,
+ "sq_intr: error, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d, err_type %d\n",
+ REG_GET_FIELD(
+ context_id1,
+ SQ_INTERRUPT_WORD_WAVE_CTXID1,
+ SE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ DATA),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ SA_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ PRIV),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ WAVE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ SIMD_ID),
+ REG_GET_FIELD(
+ context_id1,
+ SQ_INTERRUPT_WORD_WAVE_CTXID1,
+ WGP_ID),
+ sq_intr_err_type);
+ break;
+ default:
+ break;
+ }
+ kfd_signal_event_interrupt(pasid, context_id0 & 0x7fffff, 23);
+ } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE &&
+ KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) {
+ kfd_set_dbg_ev_from_interrupt(dev, pasid,
+ KFD_DEBUG_DOORBELL_ID(context_id0),
+ KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)),
+ NULL,
+ 0);
+ }
+ } else if (client_id == SOC15_IH_CLIENTID_SDMA0 ||
+ client_id == SOC15_IH_CLIENTID_SDMA1 ||
+ client_id == SOC15_IH_CLIENTID_SDMA2 ||
+ client_id == SOC15_IH_CLIENTID_SDMA3 ||
+ (client_id == SOC15_IH_CLIENTID_SDMA3_Sienna_Cichlid &&
+ KFD_GC_VERSION(dev) == IP_VERSION(10, 3, 0)) ||
+ client_id == SOC15_IH_CLIENTID_SDMA4 ||
+ client_id == SOC15_IH_CLIENTID_SDMA5 ||
+ client_id == SOC15_IH_CLIENTID_SDMA6 ||
+ client_id == SOC15_IH_CLIENTID_SDMA7) {
+ if (source_id == SOC15_INTSRC_SDMA_TRAP) {
+ kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28);
+ }
+ } else if (client_id == SOC15_IH_CLIENTID_VMC ||
+ client_id == SOC15_IH_CLIENTID_VMC1 ||
+ client_id == SOC15_IH_CLIENTID_UTCL2) {
+ struct kfd_vm_fault_info info = {0};
+ uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
+ struct kfd_hsa_memory_exception_data exception_data;
+
+ info.vmid = vmid;
+ info.mc_id = client_id;
+ info.page_addr = ih_ring_entry[4] |
+ (uint64_t)(ih_ring_entry[5] & 0xf) << 32;
+ info.prot_valid = ring_id & 0x08;
+ info.prot_read = ring_id & 0x10;
+ info.prot_write = ring_id & 0x20;
+
+ memset(&exception_data, 0, sizeof(exception_data));
+ exception_data.gpu_id = dev->id;
+ exception_data.va = (info.page_addr) << PAGE_SHIFT;
+ exception_data.failure.NotPresent = info.prot_valid ? 1 : 0;
+ exception_data.failure.NoExecute = info.prot_exec ? 1 : 0;
+ exception_data.failure.ReadOnly = info.prot_write ? 1 : 0;
+ exception_data.failure.imprecise = 0;
+
+ kfd_set_dbg_ev_from_interrupt(dev,
+ pasid,
+ -1,
+ KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION),
+ &exception_data,
+ sizeof(exception_data));
+ } else if (KFD_IRQ_IS_FENCE(client_id, source_id)) {
+ kfd_process_close_interrupt_drain(pasid);
+ }
+}
+
+const struct kfd_event_interrupt_class event_interrupt_class_v10 = {
+ .interrupt_isr = event_interrupt_isr_v10,
+ .interrupt_wq = event_interrupt_wq_v10,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
new file mode 100644
index 000000000000..2788a52714d1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
@@ -0,0 +1,418 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "kfd_priv.h"
+#include "kfd_events.h"
+#include "soc15_int.h"
+#include "kfd_device_queue_manager.h"
+#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
+#include "kfd_smi_events.h"
+#include "kfd_debug.h"
+
+/*
+ * GFX11 SQ Interrupts
+ *
+ * There are 3 encoding types of interrupts sourced from SQ sent as a 44-bit
+ * packet to the Interrupt Handler:
+ * Auto - Generated by the SQG (various cmd overflows, timestamps etc)
+ * Wave - Generated by S_SENDMSG through a shader program
+ * Error - HW generated errors (Illegal instructions, Memviols, EDC etc)
+ *
+ * The 44-bit packet is mapped as {context_id1[7:0],context_id0[31:0]} plus
+ * 4-bits for VMID (SOC15_VMID_FROM_IH_ENTRY) as such:
+ *
+ * - context_id1[7:6]
+ * Encoding type (0 = Auto, 1 = Wave, 2 = Error)
+ *
+ * - context_id0[26]
+ * PRIV bit indicates that Wave S_SEND or error occurred within trap
+ *
+ * - context_id0[24:0]
+ * 25-bit data with the following layout per encoding type:
+ * Auto - only context_id0[8:0] is used, which reports various interrupts
+ * generated by SQG. The rest is 0.
+ * Wave - user data sent from m0 via S_SENDMSG (context_id0[23:0])
+ * Error - Error Type (context_id0[24:21]), Error Details (context_id0[20:0])
+ *
+ * The other context_id bits show coordinates (SE/SH/CU/SIMD/WGP) for wave
+ * S_SENDMSG and Errors. These are 0 for Auto.
+ */
+
+enum SQ_INTERRUPT_WORD_ENCODING {
+ SQ_INTERRUPT_WORD_ENCODING_AUTO = 0x0,
+ SQ_INTERRUPT_WORD_ENCODING_INST,
+ SQ_INTERRUPT_WORD_ENCODING_ERROR,
+};
+
+enum SQ_INTERRUPT_ERROR_TYPE {
+ SQ_INTERRUPT_ERROR_TYPE_EDC_FUE = 0x0,
+ SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST,
+ SQ_INTERRUPT_ERROR_TYPE_MEMVIOL,
+ SQ_INTERRUPT_ERROR_TYPE_EDC_FED,
+};
+
+/* SQ_INTERRUPT_WORD_AUTO_CTXID */
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE__SHIFT 0
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__WLT__SHIFT 1
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_BUF_FULL__SHIFT 2
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__REG_TIMESTAMP__SHIFT 3
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__CMD_TIMESTAMP__SHIFT 4
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__HOST_CMD_OVERFLOW__SHIFT 5
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__HOST_REG_OVERFLOW__SHIFT 6
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__IMMED_OVERFLOW__SHIFT 7
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_UTC_ERROR__SHIFT 8
+#define SQ_INTERRUPT_WORD_AUTO_CTXID1__ENCODING__SHIFT 6
+
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_MASK 0x00000001
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__WLT_MASK 0x00000002
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_BUF_FULL_MASK 0x00000004
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__REG_TIMESTAMP_MASK 0x00000008
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__CMD_TIMESTAMP_MASK 0x00000010
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__HOST_CMD_OVERFLOW_MASK 0x00000020
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__HOST_REG_OVERFLOW_MASK 0x00000040
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__IMMED_OVERFLOW_MASK 0x00000080
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_UTC_ERROR_MASK 0x00000100
+#define SQ_INTERRUPT_WORD_AUTO_CTXID1__ENCODING_MASK 0x000000c0
+
+/* SQ_INTERRUPT_WORD_WAVE_CTXID */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__DATA__SHIFT 0
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__SH_ID__SHIFT 25
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__PRIV__SHIFT 26
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__WAVE_ID__SHIFT 27
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__SIMD_ID__SHIFT 0
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__WGP_ID__SHIFT 2
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__ENCODING__SHIFT 6
+
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__DATA_MASK 0x00ffffff /* [23:0] */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__SH_ID_MASK 0x02000000 /* [25] */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__PRIV_MASK 0x04000000 /* [26] */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__WAVE_ID_MASK 0xf8000000 /* [31:27] */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__SIMD_ID_MASK 0x00000003 /* [33:32] */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__WGP_ID_MASK 0x0000003c /* [37:34] */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__ENCODING_MASK 0x000000c0 /* [39:38] */
+
+/* SQ_INTERRUPT_WORD_ERROR_CTXID */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__DETAIL__SHIFT 0
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__TYPE__SHIFT 21
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__SH_ID__SHIFT 25
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__PRIV__SHIFT 26
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__WAVE_ID__SHIFT 27
+#define SQ_INTERRUPT_WORD_ERROR_CTXID1__SIMD_ID__SHIFT 0
+#define SQ_INTERRUPT_WORD_ERROR_CTXID1__WGP_ID__SHIFT 2
+#define SQ_INTERRUPT_WORD_ERROR_CTXID1__ENCODING__SHIFT 6
+
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__DETAIL_MASK 0x001fffff /* [20:0] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__TYPE_MASK 0x01e00000 /* [24:21] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__SH_ID_MASK 0x02000000 /* [25] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__PRIV_MASK 0x04000000 /* [26] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__WAVE_ID_MASK 0xf8000000 /* [31:27] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID1__SIMD_ID_MASK 0x00000003 /* [33:32] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID1__WGP_ID_MASK 0x0000003c /* [37:34] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID1__ENCODING_MASK 0x000000c0 /* [39:38] */
+
+/*
+ * The debugger will send user data(m0) with PRIV=1 to indicate it requires
+ * notification from the KFD with the following queue id (DOORBELL_ID) and
+ * trap code (TRAP_CODE).
+ */
+#define KFD_CTXID0_TRAP_CODE_SHIFT 10
+#define KFD_CTXID0_TRAP_CODE_MASK 0xfffc00
+#define KFD_CTXID0_CP_BAD_OP_ECODE_MASK 0x3ffffff
+#define KFD_CTXID0_DOORBELL_ID_MASK 0x0003ff
+
+#define KFD_CTXID0_TRAP_CODE(ctxid0) (((ctxid0) & \
+ KFD_CTXID0_TRAP_CODE_MASK) >> \
+ KFD_CTXID0_TRAP_CODE_SHIFT)
+#define KFD_CTXID0_CP_BAD_OP_ECODE(ctxid0) (((ctxid0) & \
+ KFD_CTXID0_CP_BAD_OP_ECODE_MASK) >> \
+ KFD_CTXID0_TRAP_CODE_SHIFT)
+#define KFD_CTXID0_DOORBELL_ID(ctxid0) ((ctxid0) & \
+ KFD_CTXID0_DOORBELL_ID_MASK)
+
+static void print_sq_intr_info_auto(struct kfd_node *dev, uint32_t context_id0,
+ uint32_t context_id1)
+{
+ dev_dbg_ratelimited(
+ dev->adev->dev,
+ "sq_intr: auto, ttrace %d, wlt %d, ttrace_buf_full %d, reg_tms %d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err %d\n",
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, WLT),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE_BUF_FULL),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ REG_TIMESTAMP),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ CMD_TIMESTAMP),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ HOST_CMD_OVERFLOW),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ HOST_REG_OVERFLOW),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ IMMED_OVERFLOW),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE_UTC_ERROR));
+}
+
+static void print_sq_intr_info_inst(struct kfd_node *dev, uint32_t context_id0,
+ uint32_t context_id1)
+{
+ dev_dbg_ratelimited(
+ dev->adev->dev,
+ "sq_intr: inst, data 0x%08x, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, DATA),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ SH_ID),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, PRIV),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ WAVE_ID),
+ REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
+ SIMD_ID),
+ REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
+ WGP_ID));
+}
+
+static void print_sq_intr_info_error(struct kfd_node *dev, uint32_t context_id0,
+ uint32_t context_id1)
+{
+ dev_warn_ratelimited(
+ dev->adev->dev,
+ "sq_intr: error, detail 0x%08x, type %d, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0,
+ DETAIL),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0,
+ TYPE),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0,
+ SH_ID),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0,
+ PRIV),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0,
+ WAVE_ID),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1,
+ SIMD_ID),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1,
+ WGP_ID));
+}
+
+static void event_interrupt_poison_consumption_v11(struct kfd_node *dev,
+ uint16_t pasid, uint16_t source_id)
+{
+ enum amdgpu_ras_block block = 0;
+ int ret = -EINVAL;
+ uint32_t reset = 0;
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL);
+
+ if (!p)
+ return;
+
+ /* all queues of a process will be unmapped in one time */
+ if (atomic_read(&p->poison)) {
+ kfd_unref_process(p);
+ return;
+ }
+
+ atomic_set(&p->poison, 1);
+ kfd_unref_process(p);
+
+ switch (source_id) {
+ case SOC15_INTSRC_SQ_INTERRUPT_MSG:
+ if (dev->dqm->ops.reset_queues)
+ ret = dev->dqm->ops.reset_queues(dev->dqm, pasid);
+ block = AMDGPU_RAS_BLOCK__GFX;
+ if (ret)
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ break;
+ case SOC21_INTSRC_SDMA_ECC:
+ default:
+ block = AMDGPU_RAS_BLOCK__GFX;
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ break;
+ }
+
+ kfd_signal_poison_consumed_event(dev, pasid);
+
+ /* resetting queue passes, do page retirement without gpu reset
+ resetting queue fails, fallback to gpu reset solution */
+ amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, reset);
+}
+
+static bool event_interrupt_isr_v11(struct kfd_node *dev,
+ const uint32_t *ih_ring_entry,
+ uint32_t *patched_ihre,
+ bool *patched_flag)
+{
+ uint16_t source_id, client_id, pasid, vmid;
+ const uint32_t *data = ih_ring_entry;
+ uint32_t context_id0;
+
+ source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
+ client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
+ /* Only handle interrupts from KFD VMIDs */
+ vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
+ if (!KFD_IRQ_IS_FENCE(client_id, source_id) &&
+ (vmid < dev->vm_info.first_vmid_kfd ||
+ vmid > dev->vm_info.last_vmid_kfd))
+ return false;
+
+ pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
+ context_id0 = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry);
+
+ if ((source_id == SOC15_INTSRC_CP_END_OF_PIPE) &&
+ (context_id0 & AMDGPU_FENCE_MES_QUEUE_FLAG))
+ return false;
+
+ dev_dbg(dev->adev->dev,
+ "client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
+ client_id, source_id, vmid, pasid);
+ dev_dbg(dev->adev->dev, "%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
+ data[0], data[1], data[2], data[3], data[4], data[5], data[6],
+ data[7]);
+
+ if (pasid == 0)
+ return false;
+
+ /* Interrupt types we care about: various signals and faults.
+ * They will be forwarded to a work queue (see below).
+ */
+ return source_id == SOC15_INTSRC_CP_END_OF_PIPE ||
+ source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
+ source_id == SOC15_INTSRC_CP_BAD_OPCODE ||
+ source_id == SOC21_INTSRC_SDMA_TRAP ||
+ KFD_IRQ_IS_FENCE(client_id, source_id) ||
+ (((client_id == SOC21_IH_CLIENTID_VMC) ||
+ ((client_id == SOC21_IH_CLIENTID_GFX) &&
+ (source_id == UTCL2_1_0__SRCID__FAULT))) &&
+ !amdgpu_no_queue_eviction_on_vm_fault);
+}
+
+static void event_interrupt_wq_v11(struct kfd_node *dev,
+ const uint32_t *ih_ring_entry)
+{
+ uint16_t source_id, client_id, ring_id, pasid, vmid;
+ uint32_t context_id0, context_id1;
+ uint8_t sq_int_enc, sq_int_priv, sq_int_errtype;
+ struct kfd_vm_fault_info info = {0};
+ struct kfd_hsa_memory_exception_data exception_data;
+
+ source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
+ client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
+ ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
+ pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
+ vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
+ context_id0 = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry);
+ context_id1 = SOC15_CONTEXT_ID1_FROM_IH_ENTRY(ih_ring_entry);
+
+ /* VMC, UTCL2 */
+ if (client_id == SOC21_IH_CLIENTID_VMC ||
+ ((client_id == SOC21_IH_CLIENTID_GFX) &&
+ (source_id == UTCL2_1_0__SRCID__FAULT))) {
+
+ info.vmid = vmid;
+ info.mc_id = client_id;
+ info.page_addr = ih_ring_entry[4] |
+ (uint64_t)(ih_ring_entry[5] & 0xf) << 32;
+ info.prot_valid = ring_id & 0x08;
+ info.prot_read = ring_id & 0x10;
+ info.prot_write = ring_id & 0x20;
+
+ memset(&exception_data, 0, sizeof(exception_data));
+ exception_data.gpu_id = dev->id;
+ exception_data.va = (info.page_addr) << PAGE_SHIFT;
+ exception_data.failure.NotPresent = info.prot_valid ? 1 : 0;
+ exception_data.failure.NoExecute = info.prot_exec ? 1 : 0;
+ exception_data.failure.ReadOnly = info.prot_write ? 1 : 0;
+ exception_data.failure.imprecise = 0;
+
+ kfd_set_dbg_ev_from_interrupt(dev, pasid, -1,
+ KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION),
+ &exception_data, sizeof(exception_data));
+ kfd_smi_event_update_vmfault(dev, pasid);
+
+ /* GRBM, SDMA, SE, PMM */
+ } else if (client_id == SOC21_IH_CLIENTID_GRBM_CP ||
+ client_id == SOC21_IH_CLIENTID_GFX) {
+
+ /* CP */
+ if (source_id == SOC15_INTSRC_CP_END_OF_PIPE)
+ kfd_signal_event_interrupt(pasid, context_id0, 32);
+ else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE &&
+ KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0))) {
+ u32 doorbell_id = KFD_CTXID0_DOORBELL_ID(context_id0);
+
+ kfd_set_dbg_ev_from_interrupt(dev, pasid, doorbell_id,
+ KFD_EC_MASK(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)),
+ NULL, 0);
+ kfd_dqm_suspend_bad_queue_mes(dev, pasid, doorbell_id);
+ }
+
+ /* SDMA */
+ else if (source_id == SOC21_INTSRC_SDMA_TRAP)
+ kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28);
+ else if (source_id == SOC21_INTSRC_SDMA_ECC) {
+ event_interrupt_poison_consumption_v11(dev, pasid, source_id);
+ return;
+ }
+
+ /* SQ */
+ else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG) {
+ sq_int_enc = REG_GET_FIELD(context_id1,
+ SQ_INTERRUPT_WORD_WAVE_CTXID1, ENCODING);
+ switch (sq_int_enc) {
+ case SQ_INTERRUPT_WORD_ENCODING_AUTO:
+ print_sq_intr_info_auto(dev, context_id0, context_id1);
+ break;
+ case SQ_INTERRUPT_WORD_ENCODING_INST:
+ print_sq_intr_info_inst(dev, context_id0, context_id1);
+ sq_int_priv = REG_GET_FIELD(context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0, PRIV);
+ if (sq_int_priv && (kfd_set_dbg_ev_from_interrupt(dev, pasid,
+ KFD_CTXID0_DOORBELL_ID(context_id0),
+ KFD_CTXID0_TRAP_CODE(context_id0),
+ NULL, 0)))
+ return;
+ break;
+ case SQ_INTERRUPT_WORD_ENCODING_ERROR:
+ print_sq_intr_info_error(dev, context_id0, context_id1);
+ sq_int_errtype = REG_GET_FIELD(context_id0,
+ SQ_INTERRUPT_WORD_ERROR_CTXID0, TYPE);
+ if (sq_int_errtype != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST &&
+ sq_int_errtype != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) {
+ event_interrupt_poison_consumption_v11(
+ dev, pasid, source_id);
+ return;
+ }
+ break;
+ default:
+ break;
+ }
+ kfd_signal_event_interrupt(pasid, context_id0 & 0xffffff, 24);
+ }
+
+ } else if (KFD_IRQ_IS_FENCE(client_id, source_id)) {
+ kfd_process_close_interrupt_drain(pasid);
+ }
+}
+
+const struct kfd_event_interrupt_class event_interrupt_class_v11 = {
+ .interrupt_isr = event_interrupt_isr_v11,
+ .interrupt_wq = event_interrupt_wq_v11,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index 543e7ea75593..d76fb61869c7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ * Copyright 2016-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -22,9 +23,41 @@
#include "kfd_priv.h"
#include "kfd_events.h"
+#include "kfd_debug.h"
#include "soc15_int.h"
#include "kfd_device_queue_manager.h"
#include "kfd_smi_events.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_ras_mgr.h"
+
+/*
+ * GFX9 SQ Interrupts
+ *
+ * There are 3 encoding types of interrupts sourced from SQ sent as a 44-bit
+ * packet to the Interrupt Handler:
+ * Auto - Generated by the SQG (various cmd overflows, timestamps etc)
+ * Wave - Generated by S_SENDMSG through a shader program
+ * Error - HW generated errors (Illegal instructions, Memviols, EDC etc)
+ *
+ * The 44-bit packet is mapped as {context_id1[7:0],context_id0[31:0]} plus
+ * 4-bits for VMID (SOC15_VMID_FROM_IH_ENTRY) as such:
+ *
+ * - context_id0[27:26]
+ * Encoding type (0 = Auto, 1 = Wave, 2 = Error)
+ *
+ * - context_id0[13]
+ * PRIV bit indicates that Wave S_SEND or error occurred within trap
+ *
+ * - {context_id1[7:0],context_id0[31:28],context_id0[11:0]}
+ * 24-bit data with the following layout per encoding type:
+ * Auto - only context_id0[8:0] is used, which reports various interrupts
+ * generated by SQG. The rest is 0.
+ * Wave - user data sent from m0 via S_SENDMSG
+ * Error - Error type (context_id1[7:4]), Error Details (rest of bits)
+ *
+ * The other context_id bits show coordinates (SE/SH/CU/SIMD/WAVE) for wave
+ * S_SENDMSG and Errors. These are 0 for Auto.
+ */
enum SQ_INTERRUPT_WORD_ENCODING {
SQ_INTERRUPT_WORD_ENCODING_AUTO = 0x0,
@@ -83,13 +116,152 @@ enum SQ_INTERRUPT_ERROR_TYPE {
#define SQ_INTERRUPT_WORD_WAVE_CTXID__SE_ID_MASK 0x03000000
#define SQ_INTERRUPT_WORD_WAVE_CTXID__ENCODING_MASK 0x0c000000
+/* GFX9 SQ interrupt 24-bit data from context_id<0,1> */
#define KFD_CONTEXT_ID_GET_SQ_INT_DATA(ctx0, ctx1) \
((ctx0 & 0xfff) | ((ctx0 >> 16) & 0xf000) | ((ctx1 << 16) & 0xff0000))
#define KFD_SQ_INT_DATA__ERR_TYPE_MASK 0xF00000
#define KFD_SQ_INT_DATA__ERR_TYPE__SHIFT 20
-static bool event_interrupt_isr_v9(struct kfd_dev *dev,
+/*
+ * The debugger will send user data(m0) with PRIV=1 to indicate it requires
+ * notification from the KFD with the following queue id (DOORBELL_ID) and
+ * trap code (TRAP_CODE).
+ */
+#define KFD_INT_DATA_DEBUG_DOORBELL_MASK 0x0003ff
+#define KFD_INT_DATA_DEBUG_TRAP_CODE_SHIFT 10
+#define KFD_INT_DATA_DEBUG_TRAP_CODE_MASK 0x07fc00
+#define KFD_DEBUG_DOORBELL_ID(sq_int_data) ((sq_int_data) & \
+ KFD_INT_DATA_DEBUG_DOORBELL_MASK)
+#define KFD_DEBUG_TRAP_CODE(sq_int_data) (((sq_int_data) & \
+ KFD_INT_DATA_DEBUG_TRAP_CODE_MASK) \
+ >> KFD_INT_DATA_DEBUG_TRAP_CODE_SHIFT)
+#define KFD_DEBUG_CP_BAD_OP_ECODE_MASK 0x3fffc00
+#define KFD_DEBUG_CP_BAD_OP_ECODE_SHIFT 10
+#define KFD_DEBUG_CP_BAD_OP_ECODE(ctxid0) (((ctxid0) & \
+ KFD_DEBUG_CP_BAD_OP_ECODE_MASK) \
+ >> KFD_DEBUG_CP_BAD_OP_ECODE_SHIFT)
+
+static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
+ uint16_t pasid, uint16_t client_id)
+{
+ enum amdgpu_ras_block block = 0;
+ uint32_t reset = 0;
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL);
+ enum ras_event_type type = RAS_EVENT_TYPE_POISON_CONSUMPTION;
+ u64 event_id;
+ int old_poison, ret;
+
+ if (!p)
+ return;
+
+ /* all queues of a process will be unmapped in one time */
+ old_poison = atomic_cmpxchg(&p->poison, 0, 1);
+ kfd_unref_process(p);
+ if (old_poison)
+ return;
+
+ switch (client_id) {
+ case SOC15_IH_CLIENTID_SE0SH:
+ case SOC15_IH_CLIENTID_SE1SH:
+ case SOC15_IH_CLIENTID_SE2SH:
+ case SOC15_IH_CLIENTID_SE3SH:
+ case SOC15_IH_CLIENTID_UTCL2:
+ block = AMDGPU_RAS_BLOCK__GFX;
+ if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) {
+ /* driver mode-2 for gfx poison is only supported by
+ * pmfw 0x00557300 and onwards */
+ if (dev->adev->pm.fw_version < 0x00557300)
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ else
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ } else if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) {
+ /* driver mode-2 for gfx poison is only supported by
+ * pmfw 0x05550C00 and onwards */
+ if (dev->adev->pm.fw_version < 0x05550C00)
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ else
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ } else {
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ }
+ amdgpu_ras_set_err_poison(dev->adev, AMDGPU_RAS_BLOCK__GFX);
+ break;
+ case SOC15_IH_CLIENTID_VMC:
+ case SOC15_IH_CLIENTID_VMC1:
+ block = AMDGPU_RAS_BLOCK__MMHUB;
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ break;
+ case SOC15_IH_CLIENTID_SDMA0:
+ case SOC15_IH_CLIENTID_SDMA1:
+ case SOC15_IH_CLIENTID_SDMA2:
+ case SOC15_IH_CLIENTID_SDMA3:
+ case SOC15_IH_CLIENTID_SDMA4:
+ block = AMDGPU_RAS_BLOCK__SDMA;
+ if (amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2)) {
+ /* driver mode-2 for gfx poison is only supported by
+ * pmfw 0x00557300 and onwards */
+ if (dev->adev->pm.fw_version < 0x00557300)
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ else
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ } else if (amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) {
+ /* driver mode-2 for gfx poison is only supported by
+ * pmfw 0x05550C00 and onwards */
+ if (dev->adev->pm.fw_version < 0x05550C00)
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ else
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ } else {
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ }
+ amdgpu_ras_set_err_poison(dev->adev, AMDGPU_RAS_BLOCK__SDMA);
+ break;
+ default:
+ dev_warn(dev->adev->dev,
+ "client %d does not support poison consumption\n", client_id);
+ return;
+ }
+
+ ret = amdgpu_ras_mark_ras_event(dev->adev, type);
+ if (ret)
+ return;
+
+ kfd_signal_poison_consumed_event(dev, pasid);
+
+ if (amdgpu_uniras_enabled(dev->adev))
+ event_id = amdgpu_ras_mgr_gen_ras_event_seqno(dev->adev,
+ RAS_SEQNO_TYPE_POISON_CONSUMPTION);
+ else
+ event_id = amdgpu_ras_acquire_event_id(dev->adev, type);
+
+ RAS_EVENT_LOG(dev->adev, event_id,
+ "poison is consumed by client %d, kick off gpu reset flow\n", client_id);
+
+ amdgpu_amdkfd_ras_pasid_poison_consumption_handler(dev->adev,
+ block, pasid, NULL, NULL, reset);
+}
+
+static bool context_id_expected(struct kfd_dev *dev)
+{
+ switch (KFD_GC_VERSION(dev)) {
+ case IP_VERSION(9, 0, 1):
+ return dev->mec_fw_version >= 0x817a;
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 3, 0):
+ case IP_VERSION(9, 4, 0):
+ return dev->mec_fw_version >= 0x17a;
+ default:
+ /* Other GFXv9 and later GPUs always sent valid context IDs
+ * on legitimate events
+ */
+ return KFD_GC_VERSION(dev) >= IP_VERSION(9, 4, 1);
+ }
+}
+
+static bool event_interrupt_isr_v9(struct kfd_node *dev,
const uint32_t *ih_ring_entry,
uint32_t *patched_ihre,
bool *patched_flag)
@@ -97,14 +269,16 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
uint16_t source_id, client_id, pasid, vmid;
const uint32_t *data = ih_ring_entry;
+ source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
+ client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
+
/* Only handle interrupts from KFD VMIDs */
vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
- if (vmid < dev->vm_info.first_vmid_kfd ||
- vmid > dev->vm_info.last_vmid_kfd)
+ if (!KFD_IRQ_IS_FENCE(client_id, source_id) &&
+ (vmid < dev->vm_info.first_vmid_kfd ||
+ vmid > dev->vm_info.last_vmid_kfd))
return false;
- source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
- client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
/* Only handle clients we care about */
@@ -123,7 +297,8 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
client_id != SOC15_IH_CLIENTID_SE0SH &&
client_id != SOC15_IH_CLIENTID_SE1SH &&
client_id != SOC15_IH_CLIENTID_SE2SH &&
- client_id != SOC15_IH_CLIENTID_SE3SH)
+ client_id != SOC15_IH_CLIENTID_SE3SH &&
+ !KFD_IRQ_IS_FENCE(client_id, source_id))
return false;
/* This is a known issue for gfx9. Under non HWS, pasid is not set
@@ -135,7 +310,7 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
*patched_flag = true;
memcpy(patched_ihre, ih_ring_entry,
- dev->device_info->ih_ring_entry_size);
+ dev->kfd->device_info.ih_ring_entry_size);
pasid = dev->dqm->vmid_pasid[vmid];
@@ -144,30 +319,47 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
& ~pasid_mask) | pasid);
}
- pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
- client_id, source_id, vmid, pasid);
- pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
- data[0], data[1], data[2], data[3],
- data[4], data[5], data[6], data[7]);
+ dev_dbg(dev->adev->dev,
+ "client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
+ client_id, source_id, vmid, pasid);
+ dev_dbg(dev->adev->dev, "%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
+ data[0], data[1], data[2], data[3], data[4], data[5], data[6],
+ data[7]);
/* If there is no valid PASID, it's likely a bug */
if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt"))
return false;
+ /* Workaround CP firmware sending bogus signals with 0 context_id.
+ * Those can be safely ignored on hardware and firmware versions that
+ * include a valid context_id on legitimate signals. This avoids the
+ * slow path in kfd_signal_event_interrupt that scans all event slots
+ * for signaled events.
+ */
+ if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) {
+ uint32_t context_id =
+ SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry);
+
+ if (context_id == 0 && context_id_expected(dev->kfd))
+ return false;
+ }
+
/* Interrupt types we care about: various signals and faults.
* They will be forwarded to a work queue (see below).
*/
return source_id == SOC15_INTSRC_CP_END_OF_PIPE ||
source_id == SOC15_INTSRC_SDMA_TRAP ||
+ source_id == SOC15_INTSRC_SDMA_ECC ||
source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
source_id == SOC15_INTSRC_CP_BAD_OPCODE ||
+ KFD_IRQ_IS_FENCE(client_id, source_id) ||
((client_id == SOC15_IH_CLIENTID_VMC ||
client_id == SOC15_IH_CLIENTID_VMC1 ||
client_id == SOC15_IH_CLIENTID_UTCL2) &&
!amdgpu_no_queue_eviction_on_vm_fault);
}
-static void event_interrupt_wq_v9(struct kfd_dev *dev,
+static void event_interrupt_wq_v9(struct kfd_node *dev,
const uint32_t *ih_ring_entry)
{
uint16_t source_id, client_id, pasid, vmid;
@@ -193,54 +385,142 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
encoding = REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, ENCODING);
switch (encoding) {
case SQ_INTERRUPT_WORD_ENCODING_AUTO:
- pr_debug(
+ dev_dbg_ratelimited(
+ dev->adev->dev,
"sq_intr: auto, se %d, ttrace %d, wlt %d, ttrac_buf_full %d, reg_tms %d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err %d\n",
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, SE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, WLT),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE_BUF_FULL),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, REG_TIMESTAMP),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, CMD_TIMESTAMP),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, HOST_CMD_OVERFLOW),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, HOST_REG_OVERFLOW),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, IMMED_OVERFLOW),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE_UTC_ERROR));
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ SE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ THREAD_TRACE),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ WLT),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ THREAD_TRACE_BUF_FULL),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ REG_TIMESTAMP),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ CMD_TIMESTAMP),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ HOST_CMD_OVERFLOW),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ HOST_REG_OVERFLOW),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ IMMED_OVERFLOW),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ THREAD_TRACE_UTC_ERROR));
break;
case SQ_INTERRUPT_WORD_ENCODING_INST:
- pr_debug("sq_intr: inst, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, intr_data 0x%x\n",
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, DATA),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SH_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, PRIV),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, WAVE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SIMD_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, CU_ID),
+ dev_dbg_ratelimited(
+ dev->adev->dev,
+ "sq_intr: inst, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, intr_data 0x%x\n",
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ SE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ DATA),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ SH_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ PRIV),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ WAVE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ SIMD_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ CU_ID),
sq_int_data);
+ if (context_id0 & SQ_INTERRUPT_WORD_WAVE_CTXID__PRIV_MASK) {
+ if (kfd_set_dbg_ev_from_interrupt(dev, pasid,
+ KFD_DEBUG_DOORBELL_ID(sq_int_data),
+ KFD_DEBUG_TRAP_CODE(sq_int_data),
+ NULL, 0))
+ return;
+ }
break;
case SQ_INTERRUPT_WORD_ENCODING_ERROR:
sq_intr_err = REG_GET_FIELD(sq_int_data, KFD_SQ_INT_DATA, ERR_TYPE);
- pr_warn("sq_intr: error, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, err_type %d\n",
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, DATA),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SH_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, PRIV),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, WAVE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SIMD_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, CU_ID),
+ dev_warn_ratelimited(
+ dev->adev->dev,
+ "sq_intr: error, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, err_type %d\n",
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ SE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ DATA),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ SH_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ PRIV),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ WAVE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ SIMD_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ CU_ID),
sq_intr_err);
if (sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST &&
sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) {
- kfd_signal_poison_consumed_event(dev, pasid);
- amdgpu_amdkfd_ras_poison_consumption_handler(dev->kgd);
+ event_interrupt_poison_consumption_v9(dev, pasid, client_id);
return;
}
break;
default:
break;
}
- kfd_signal_event_interrupt(pasid, context_id0 & 0xffffff, 24);
- } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE)
- kfd_signal_hw_exception_event(pasid);
+ kfd_signal_event_interrupt(pasid, sq_int_data, 24);
+ } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE &&
+ KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) {
+ kfd_set_dbg_ev_from_interrupt(dev, pasid,
+ KFD_DEBUG_DOORBELL_ID(context_id0),
+ KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)),
+ NULL, 0);
+ }
} else if (client_id == SOC15_IH_CLIENTID_SDMA0 ||
client_id == SOC15_IH_CLIENTID_SDMA1 ||
client_id == SOC15_IH_CLIENTID_SDMA2 ||
@@ -252,8 +532,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
if (source_id == SOC15_INTSRC_SDMA_TRAP) {
kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28);
} else if (source_id == SOC15_INTSRC_SDMA_ECC) {
- kfd_signal_poison_consumed_event(dev, pasid);
- amdgpu_amdkfd_ras_poison_consumption_handler(dev->kgd);
+ event_interrupt_poison_consumption_v9(dev, pasid, client_id);
return;
}
} else if (client_id == SOC15_IH_CLIENTID_VMC ||
@@ -261,6 +540,12 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
client_id == SOC15_IH_CLIENTID_UTCL2) {
struct kfd_vm_fault_info info = {0};
uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
+ struct kfd_hsa_memory_exception_data exception_data;
+
+ if (source_id == SOC15_INTSRC_VMC_UTCL2_POISON) {
+ event_interrupt_poison_consumption_v9(dev, pasid, client_id);
+ return;
+ }
info.vmid = vmid;
info.mc_id = client_id;
@@ -270,13 +555,56 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
info.prot_read = ring_id & 0x10;
info.prot_write = ring_id & 0x20;
+ memset(&exception_data, 0, sizeof(exception_data));
+ exception_data.gpu_id = dev->id;
+ exception_data.va = (info.page_addr) << PAGE_SHIFT;
+ exception_data.failure.NotPresent = info.prot_valid ? 1 : 0;
+ exception_data.failure.NoExecute = info.prot_exec ? 1 : 0;
+ exception_data.failure.ReadOnly = info.prot_write ? 1 : 0;
+ exception_data.failure.imprecise = 0;
+
+ kfd_set_dbg_ev_from_interrupt(dev,
+ pasid,
+ -1,
+ KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION),
+ &exception_data,
+ sizeof(exception_data));
kfd_smi_event_update_vmfault(dev, pasid);
- kfd_process_vm_fault(dev->dqm, pasid);
- kfd_signal_vm_fault_event(dev, pasid, &info);
+ } else if (KFD_IRQ_IS_FENCE(client_id, source_id)) {
+ kfd_process_close_interrupt_drain(pasid);
}
}
+static bool event_interrupt_isr_v9_4_3(struct kfd_node *node,
+ const uint32_t *ih_ring_entry,
+ uint32_t *patched_ihre,
+ bool *patched_flag)
+{
+ uint16_t node_id, vmid;
+
+ /*
+ * For GFX 9.4.3, process the interrupt if:
+ * - NodeID field in IH entry matches the corresponding bit
+ * set in interrupt_bitmap Bits 0-15.
+ * OR
+ * - If partition mode is CPX and interrupt came from
+ * Node_id 0,4,8,12, then check if the Bit (16 + client id)
+ * is set in interrupt bitmap Bits 16-31.
+ */
+ node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry);
+ vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
+ if (kfd_irq_is_from_node(node, node_id, vmid))
+ return event_interrupt_isr_v9(node, ih_ring_entry,
+ patched_ihre, patched_flag);
+ return false;
+}
+
const struct kfd_event_interrupt_class event_interrupt_class_v9 = {
.interrupt_isr = event_interrupt_isr_v9,
.interrupt_wq = event_interrupt_wq_v9,
};
+
+const struct kfd_event_interrupt_class event_interrupt_class_v9_4_3 = {
+ .interrupt_isr = event_interrupt_isr_v9_4_3,
+ .interrupt_wq = event_interrupt_wq_v9,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index bc47f6a44456..783c2f5a04e4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -45,33 +46,36 @@
#include <linux/kfifo.h>
#include "kfd_priv.h"
-#define KFD_IH_NUM_ENTRIES 8192
+#define KFD_IH_NUM_ENTRIES 16384
static void interrupt_wq(struct work_struct *);
-int kfd_interrupt_init(struct kfd_dev *kfd)
+int kfd_interrupt_init(struct kfd_node *node)
{
int r;
- r = kfifo_alloc(&kfd->ih_fifo,
- KFD_IH_NUM_ENTRIES * kfd->device_info->ih_ring_entry_size,
+ r = kfifo_alloc(&node->ih_fifo,
+ KFD_IH_NUM_ENTRIES * node->kfd->device_info.ih_ring_entry_size,
GFP_KERNEL);
if (r) {
- dev_err(kfd_chardev(), "Failed to allocate IH fifo\n");
+ dev_err(node->adev->dev, "Failed to allocate IH fifo\n");
return r;
}
- kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1);
- if (unlikely(!kfd->ih_wq)) {
- kfifo_free(&kfd->ih_fifo);
- dev_err(kfd_chardev(), "Failed to allocate KFD IH workqueue\n");
- return -ENOMEM;
+ if (!node->kfd->ih_wq) {
+ node->kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI | WQ_UNBOUND,
+ node->kfd->num_nodes);
+ if (unlikely(!node->kfd->ih_wq)) {
+ kfifo_free(&node->ih_fifo);
+ dev_err(node->adev->dev, "Failed to allocate KFD IH workqueue\n");
+ return -ENOMEM;
+ }
}
- spin_lock_init(&kfd->interrupt_lock);
+ spin_lock_init(&node->interrupt_lock);
- INIT_WORK(&kfd->interrupt_work, interrupt_wq);
+ INIT_WORK(&node->interrupt_work, interrupt_wq);
- kfd->interrupts_active = true;
+ node->interrupts_active = true;
/*
* After this function returns, the interrupt will be enabled. This
@@ -83,7 +87,7 @@ int kfd_interrupt_init(struct kfd_dev *kfd)
return 0;
}
-void kfd_interrupt_exit(struct kfd_dev *kfd)
+void kfd_interrupt_exit(struct kfd_node *node)
{
/*
* Stop the interrupt handler from writing to the ring and scheduling
@@ -92,78 +96,72 @@ void kfd_interrupt_exit(struct kfd_dev *kfd)
*/
unsigned long flags;
- spin_lock_irqsave(&kfd->interrupt_lock, flags);
- kfd->interrupts_active = false;
- spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
-
- /*
- * flush_work ensures that there are no outstanding
- * work-queue items that will access interrupt_ring. New work items
- * can't be created because we stopped interrupt handling above.
- */
- flush_workqueue(kfd->ih_wq);
-
- kfifo_free(&kfd->ih_fifo);
+ spin_lock_irqsave(&node->interrupt_lock, flags);
+ node->interrupts_active = false;
+ spin_unlock_irqrestore(&node->interrupt_lock, flags);
+ kfifo_free(&node->ih_fifo);
}
/*
* Assumption: single reader/writer. This function is not re-entrant
*/
-bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry)
+bool enqueue_ih_ring_entry(struct kfd_node *node, const void *ih_ring_entry)
{
- int count;
-
- count = kfifo_in(&kfd->ih_fifo, ih_ring_entry,
- kfd->device_info->ih_ring_entry_size);
- if (count != kfd->device_info->ih_ring_entry_size) {
- dev_err_ratelimited(kfd_chardev(),
- "Interrupt ring overflow, dropping interrupt %d\n",
- count);
+ if (kfifo_is_full(&node->ih_fifo)) {
+ dev_warn_ratelimited(node->adev->dev, "KFD node %d ih_fifo overflow\n",
+ node->node_id);
return false;
}
+ kfifo_in(&node->ih_fifo, ih_ring_entry, node->kfd->device_info.ih_ring_entry_size);
return true;
}
/*
* Assumption: single reader/writer. This function is not re-entrant
*/
-static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry)
+static bool dequeue_ih_ring_entry(struct kfd_node *node, u32 **ih_ring_entry)
{
int count;
- count = kfifo_out(&kfd->ih_fifo, ih_ring_entry,
- kfd->device_info->ih_ring_entry_size);
-
- WARN_ON(count && count != kfd->device_info->ih_ring_entry_size);
+ if (kfifo_is_empty(&node->ih_fifo))
+ return false;
- return count == kfd->device_info->ih_ring_entry_size;
+ count = kfifo_out_linear_ptr(&node->ih_fifo, ih_ring_entry,
+ node->kfd->device_info.ih_ring_entry_size);
+ WARN_ON(count != node->kfd->device_info.ih_ring_entry_size);
+ return count == node->kfd->device_info.ih_ring_entry_size;
}
static void interrupt_wq(struct work_struct *work)
{
- struct kfd_dev *dev = container_of(work, struct kfd_dev,
- interrupt_work);
- uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE];
+ struct kfd_node *dev = container_of(work, struct kfd_node, interrupt_work);
+ uint32_t *ih_ring_entry;
+ unsigned long start_jiffies = jiffies;
- if (dev->device_info->ih_ring_entry_size > sizeof(ih_ring_entry)) {
- dev_err_once(kfd_chardev(), "Ring entry too small\n");
- return;
- }
-
- while (dequeue_ih_ring_entry(dev, ih_ring_entry))
- dev->device_info->event_interrupt_class->interrupt_wq(dev,
+ while (dequeue_ih_ring_entry(dev, &ih_ring_entry)) {
+ dev->kfd->device_info.event_interrupt_class->interrupt_wq(dev,
ih_ring_entry);
+ kfifo_skip_count(&dev->ih_fifo, dev->kfd->device_info.ih_ring_entry_size);
+
+ if (time_is_before_jiffies(start_jiffies + HZ)) {
+ /* If we spent more than a second processing signals,
+ * reschedule the worker to avoid soft-lockup warnings
+ */
+ queue_work(dev->kfd->ih_wq, &dev->interrupt_work);
+ break;
+ }
+ }
}
-bool interrupt_is_wanted(struct kfd_dev *dev,
+bool interrupt_is_wanted(struct kfd_node *dev,
const uint32_t *ih_ring_entry,
uint32_t *patched_ihre, bool *flag)
{
/* integer and bitwise OR so there is no boolean short-circuiting */
unsigned int wanted = 0;
- wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev,
+ wanted |= dev->kfd->device_info.event_interrupt_class->interrupt_isr(dev,
ih_ring_entry, patched_ihre, flag);
return wanted != 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
deleted file mode 100644
index 73f2257acc23..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
+++ /dev/null
@@ -1,359 +0,0 @@
-/*
- * Copyright 2018 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <linux/kconfig.h>
-
-#if IS_REACHABLE(CONFIG_AMD_IOMMU_V2)
-
-#include <linux/printk.h>
-#include <linux/device.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-#include <linux/amd-iommu.h>
-#include "kfd_priv.h"
-#include "kfd_dbgmgr.h"
-#include "kfd_topology.h"
-#include "kfd_iommu.h"
-
-static const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
- AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
- AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
-
-/** kfd_iommu_check_device - Check whether IOMMU is available for device
- */
-int kfd_iommu_check_device(struct kfd_dev *kfd)
-{
- struct amd_iommu_device_info iommu_info;
- int err;
-
- if (!kfd->use_iommu_v2)
- return -ENODEV;
-
- iommu_info.flags = 0;
- err = amd_iommu_device_info(kfd->pdev, &iommu_info);
- if (err)
- return err;
-
- if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags)
- return -ENODEV;
-
- return 0;
-}
-
-/** kfd_iommu_device_init - Initialize IOMMU for device
- */
-int kfd_iommu_device_init(struct kfd_dev *kfd)
-{
- struct amd_iommu_device_info iommu_info;
- unsigned int pasid_limit;
- int err;
-
- if (!kfd->use_iommu_v2)
- return 0;
-
- iommu_info.flags = 0;
- err = amd_iommu_device_info(kfd->pdev, &iommu_info);
- if (err < 0) {
- dev_err(kfd_device,
- "error getting iommu info. is the iommu enabled?\n");
- return -ENODEV;
- }
-
- if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) {
- dev_err(kfd_device,
- "error required iommu flags ats %i, pri %i, pasid %i\n",
- (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0,
- (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0,
- (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP)
- != 0);
- return -ENODEV;
- }
-
- pasid_limit = min_t(unsigned int,
- (unsigned int)(1 << kfd->device_info->max_pasid_bits),
- iommu_info.max_pasids);
-
- if (!kfd_set_pasid_limit(pasid_limit)) {
- dev_err(kfd_device, "error setting pasid limit\n");
- return -EBUSY;
- }
-
- return 0;
-}
-
-/** kfd_iommu_bind_process_to_device - Have the IOMMU bind a process
- *
- * Binds the given process to the given device using its PASID. This
- * enables IOMMUv2 address translation for the process on the device.
- *
- * This function assumes that the process mutex is held.
- */
-int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd)
-{
- struct kfd_dev *dev = pdd->dev;
- struct kfd_process *p = pdd->process;
- int err;
-
- if (!dev->use_iommu_v2 || pdd->bound == PDD_BOUND)
- return 0;
-
- if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) {
- pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n");
- return -EINVAL;
- }
-
- err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread);
- if (!err)
- pdd->bound = PDD_BOUND;
-
- return err;
-}
-
-/** kfd_iommu_unbind_process - Unbind process from all devices
- *
- * This removes all IOMMU device bindings of the process. To be used
- * before process termination.
- */
-void kfd_iommu_unbind_process(struct kfd_process *p)
-{
- int i;
-
- for (i = 0; i < p->n_pdds; i++)
- if (p->pdds[i]->bound == PDD_BOUND)
- amd_iommu_unbind_pasid(p->pdds[i]->dev->pdev, p->pasid);
-}
-
-/* Callback for process shutdown invoked by the IOMMU driver */
-static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, u32 pasid)
-{
- struct kfd_dev *dev = kfd_device_by_pci_dev(pdev);
- struct kfd_process *p;
- struct kfd_process_device *pdd;
-
- if (!dev)
- return;
-
- /*
- * Look for the process that matches the pasid. If there is no such
- * process, we either released it in amdkfd's own notifier, or there
- * is a bug. Unfortunately, there is no way to tell...
- */
- p = kfd_lookup_process_by_pasid(pasid);
- if (!p)
- return;
-
- pr_debug("Unbinding process 0x%x from IOMMU\n", pasid);
-
- mutex_lock(kfd_get_dbgmgr_mutex());
-
- if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
- if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
- kfd_dbgmgr_destroy(dev->dbgmgr);
- dev->dbgmgr = NULL;
- }
- }
-
- mutex_unlock(kfd_get_dbgmgr_mutex());
-
- mutex_lock(&p->mutex);
-
- pdd = kfd_get_process_device_data(dev, p);
- if (pdd)
- /* For GPU relying on IOMMU, we need to dequeue here
- * when PASID is still bound.
- */
- kfd_process_dequeue_from_device(pdd);
-
- mutex_unlock(&p->mutex);
-
- kfd_unref_process(p);
-}
-
-/* This function called by IOMMU driver on PPR failure */
-static int iommu_invalid_ppr_cb(struct pci_dev *pdev, u32 pasid,
- unsigned long address, u16 flags)
-{
- struct kfd_dev *dev;
-
- dev_warn_ratelimited(kfd_device,
- "Invalid PPR device %x:%x.%x pasid 0x%x address 0x%lX flags 0x%X",
- pdev->bus->number,
- PCI_SLOT(pdev->devfn),
- PCI_FUNC(pdev->devfn),
- pasid,
- address,
- flags);
-
- dev = kfd_device_by_pci_dev(pdev);
- if (!WARN_ON(!dev))
- kfd_signal_iommu_event(dev, pasid, address,
- flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC);
-
- return AMD_IOMMU_INV_PRI_RSP_INVALID;
-}
-
-/*
- * Bind processes do the device that have been temporarily unbound
- * (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device.
- */
-static int kfd_bind_processes_to_device(struct kfd_dev *kfd)
-{
- struct kfd_process_device *pdd;
- struct kfd_process *p;
- unsigned int temp;
- int err = 0;
-
- int idx = srcu_read_lock(&kfd_processes_srcu);
-
- hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
- mutex_lock(&p->mutex);
- pdd = kfd_get_process_device_data(kfd, p);
-
- if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) {
- mutex_unlock(&p->mutex);
- continue;
- }
-
- err = amd_iommu_bind_pasid(kfd->pdev, p->pasid,
- p->lead_thread);
- if (err < 0) {
- pr_err("Unexpected pasid 0x%x binding failure\n",
- p->pasid);
- mutex_unlock(&p->mutex);
- break;
- }
-
- pdd->bound = PDD_BOUND;
- mutex_unlock(&p->mutex);
- }
-
- srcu_read_unlock(&kfd_processes_srcu, idx);
-
- return err;
-}
-
-/*
- * Mark currently bound processes as PDD_BOUND_SUSPENDED. These
- * processes will be restored to PDD_BOUND state in
- * kfd_bind_processes_to_device.
- */
-static void kfd_unbind_processes_from_device(struct kfd_dev *kfd)
-{
- struct kfd_process_device *pdd;
- struct kfd_process *p;
- unsigned int temp;
-
- int idx = srcu_read_lock(&kfd_processes_srcu);
-
- hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
- mutex_lock(&p->mutex);
- pdd = kfd_get_process_device_data(kfd, p);
-
- if (WARN_ON(!pdd)) {
- mutex_unlock(&p->mutex);
- continue;
- }
-
- if (pdd->bound == PDD_BOUND)
- pdd->bound = PDD_BOUND_SUSPENDED;
- mutex_unlock(&p->mutex);
- }
-
- srcu_read_unlock(&kfd_processes_srcu, idx);
-}
-
-/** kfd_iommu_suspend - Prepare IOMMU for suspend
- *
- * This unbinds processes from the device and disables the IOMMU for
- * the device.
- */
-void kfd_iommu_suspend(struct kfd_dev *kfd)
-{
- if (!kfd->use_iommu_v2)
- return;
-
- kfd_unbind_processes_from_device(kfd);
-
- amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
- amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
- amd_iommu_free_device(kfd->pdev);
-}
-
-/** kfd_iommu_resume - Restore IOMMU after resume
- *
- * This reinitializes the IOMMU for the device and re-binds previously
- * suspended processes to the device.
- */
-int kfd_iommu_resume(struct kfd_dev *kfd)
-{
- unsigned int pasid_limit;
- int err;
-
- if (!kfd->use_iommu_v2)
- return 0;
-
- pasid_limit = kfd_get_pasid_limit();
-
- err = amd_iommu_init_device(kfd->pdev, pasid_limit);
- if (err)
- return -ENXIO;
-
- amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
- iommu_pasid_shutdown_callback);
- amd_iommu_set_invalid_ppr_cb(kfd->pdev,
- iommu_invalid_ppr_cb);
-
- err = kfd_bind_processes_to_device(kfd);
- if (err) {
- amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
- amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
- amd_iommu_free_device(kfd->pdev);
- return err;
- }
-
- return 0;
-}
-
-/** kfd_iommu_add_perf_counters - Add IOMMU performance counters to topology
- */
-int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev)
-{
- struct kfd_perf_properties *props;
-
- if (!(kdev->node_props.capability & HSA_CAP_ATS_PRESENT))
- return 0;
-
- if (!amd_iommu_pc_supported())
- return 0;
-
- props = kfd_alloc_struct(props);
- if (!props)
- return -ENOMEM;
- strcpy(props->block_name, "iommu");
- props->max_concurrent = amd_iommu_pc_get_max_banks(0) *
- amd_iommu_pc_get_max_counters(0); /* assume one iommu */
- list_add_tail(&props->list, &kdev->perf_props);
-
- return 0;
-}
-
-#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h
deleted file mode 100644
index afd420b01a0c..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright 2018 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef __KFD_IOMMU_H__
-#define __KFD_IOMMU_H__
-
-#include <linux/kconfig.h>
-
-#if IS_REACHABLE(CONFIG_AMD_IOMMU_V2)
-
-#define KFD_SUPPORT_IOMMU_V2
-
-int kfd_iommu_check_device(struct kfd_dev *kfd);
-int kfd_iommu_device_init(struct kfd_dev *kfd);
-
-int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd);
-void kfd_iommu_unbind_process(struct kfd_process *p);
-
-void kfd_iommu_suspend(struct kfd_dev *kfd);
-int kfd_iommu_resume(struct kfd_dev *kfd);
-
-int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev);
-
-#else
-
-static inline int kfd_iommu_check_device(struct kfd_dev *kfd)
-{
- return -ENODEV;
-}
-static inline int kfd_iommu_device_init(struct kfd_dev *kfd)
-{
-#if IS_MODULE(CONFIG_AMD_IOMMU_V2)
- WARN_ONCE(1, "iommu_v2 module is not usable by built-in KFD");
-#endif
- return 0;
-}
-
-static inline int kfd_iommu_bind_process_to_device(
- struct kfd_process_device *pdd)
-{
- return 0;
-}
-static inline void kfd_iommu_unbind_process(struct kfd_process *p)
-{
- /* empty */
-}
-
-static inline void kfd_iommu_suspend(struct kfd_dev *kfd)
-{
- /* empty */
-}
-static inline int kfd_iommu_resume(struct kfd_dev *kfd)
-{
- return 0;
-}
-
-static inline int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev)
-{
- return 0;
-}
-
-#endif /* IS_REACHABLE(CONFIG_AMD_IOMMU_V2) */
-
-#endif /* __KFD_IOMMU_H__ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 64b4ac339904..fb3129883a4c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -31,24 +32,21 @@
#include "kfd_device_queue_manager.h"
#include "kfd_pm4_headers.h"
#include "kfd_pm4_opcodes.h"
+#include "amdgpu_reset.h"
#define PM4_COUNT_ZERO (((1 << 15) - 1) << 16)
/* Initialize a kernel queue, including allocations of GART memory
* needed for the queue.
*/
-static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev,
+static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
enum kfd_queue_type type, unsigned int queue_size)
{
struct queue_properties prop;
int retval;
union PM4_MES_TYPE_3_HEADER nop;
- if (WARN_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ))
- return false;
-
- pr_debug("Initializing queue type %d size %d\n", KFD_QUEUE_TYPE_HIQ,
- queue_size);
+ pr_debug("Initializing queue type %d size %d\n", type, queue_size);
memset(&prop, 0, sizeof(prop));
memset(&nop, 0, sizeof(nop));
@@ -67,23 +65,25 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev,
kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
break;
default:
- pr_err("Invalid queue type %d\n", type);
+ WARN(1, "Invalid queue type %d\n", type);
+ dev_err(dev->adev->dev, "Invalid queue type %d\n", type);
return false;
}
if (!kq->mqd_mgr)
return false;
- prop.doorbell_ptr = kfd_get_kernel_doorbell(dev, &prop.doorbell_off);
+ prop.doorbell_ptr = kfd_get_kernel_doorbell(dev->kfd, &prop.doorbell_off);
if (!prop.doorbell_ptr) {
- pr_err("Failed to initialize doorbell");
+ dev_err(dev->adev->dev, "Failed to initialize doorbell");
goto err_get_kernel_doorbell;
}
retval = kfd_gtt_sa_allocate(dev, queue_size, &kq->pq);
if (retval != 0) {
- pr_err("Failed to init pq queues size %d\n", queue_size);
+ dev_err(dev->adev->dev, "Failed to init pq queues size %d\n",
+ queue_size);
goto err_pq_allocate_vidmem;
}
@@ -91,7 +91,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev,
kq->pq_gpu_addr = kq->pq->gpu_addr;
/* For CIK family asics, kq->eop_mem is not needed */
- if (dev->device_info->asic_family > CHIP_MULLINS) {
+ if (dev->adev->asic_type > CHIP_MULLINS) {
retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
if (retval != 0)
goto err_eop_allocate_vidmem;
@@ -111,7 +111,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev,
kq->rptr_kernel = kq->rptr_mem->cpu_ptr;
kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr;
- retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size,
+ retval = kfd_gtt_sa_allocate(dev, dev->kfd->device_info.doorbell_size,
&kq->wptr_mem);
if (retval != 0)
@@ -122,7 +122,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev,
memset(kq->pq_kernel_addr, 0, queue_size);
memset(kq->rptr_kernel, 0, sizeof(*kq->rptr_kernel));
- memset(kq->wptr_kernel, 0, sizeof(*kq->wptr_kernel));
+ memset(kq->wptr_kernel, 0, dev->kfd->device_info.doorbell_size);
prop.queue_size = queue_size;
prop.is_interop = false;
@@ -188,22 +188,24 @@ err_rptr_allocate_vidmem:
err_eop_allocate_vidmem:
kfd_gtt_sa_free(dev, kq->pq);
err_pq_allocate_vidmem:
- kfd_release_kernel_doorbell(dev, prop.doorbell_ptr);
+ kfd_release_kernel_doorbell(dev->kfd, prop.doorbell_ptr);
err_get_kernel_doorbell:
return false;
}
/* Uninitialize a kernel queue and free all its memory usages. */
-static void kq_uninitialize(struct kernel_queue *kq, bool hanging)
+static void kq_uninitialize(struct kernel_queue *kq)
{
- if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ && !hanging)
+ if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ && down_read_trylock(&kq->dev->adev->reset_domain->sem)) {
kq->mqd_mgr->destroy_mqd(kq->mqd_mgr,
kq->queue->mqd,
KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
KFD_UNMAP_LATENCY_MS,
kq->queue->pipe,
kq->queue->queue);
+ up_read(&kq->dev->adev->reset_domain->sem);
+ }
else if (kq->queue->properties.type == KFD_QUEUE_TYPE_DIQ)
kfd_gtt_sa_free(kq->dev, kq->fence_mem_obj);
@@ -219,7 +221,7 @@ static void kq_uninitialize(struct kernel_queue *kq, bool hanging)
kfd_gtt_sa_free(kq->dev, kq->eop_mem);
kfd_gtt_sa_free(kq->dev, kq->pq);
- kfd_release_kernel_doorbell(kq->dev,
+ kfd_release_kernel_doorbell(kq->dev->kfd,
kq->queue->properties.doorbell_ptr);
uninit_queue(kq->queue);
}
@@ -285,7 +287,7 @@ err_no_space:
return -ENOMEM;
}
-void kq_submit_packet(struct kernel_queue *kq)
+int kq_submit_packet(struct kernel_queue *kq)
{
#ifdef DEBUG
int i;
@@ -297,20 +299,31 @@ void kq_submit_packet(struct kernel_queue *kq)
}
pr_debug("\n");
#endif
- if (kq->dev->device_info->doorbell_size == 8) {
+ /* Fatal err detected, packet submission won't go through */
+ if (amdgpu_amdkfd_is_fed(kq->dev->adev))
+ return -EIO;
+
+ /* Make sure ring buffer is updated before wptr updated */
+ mb();
+
+ if (kq->dev->kfd->device_info.doorbell_size == 8) {
*kq->wptr64_kernel = kq->pending_wptr64;
+ mb(); /* Make sure wptr updated before ring doorbell */
write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
kq->pending_wptr64);
} else {
*kq->wptr_kernel = kq->pending_wptr;
+ mb(); /* Make sure wptr updated before ring doorbell */
write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
kq->pending_wptr);
}
+
+ return 0;
}
void kq_rollback_packet(struct kernel_queue *kq)
{
- if (kq->dev->device_info->doorbell_size == 8) {
+ if (kq->dev->kfd->device_info.doorbell_size == 8) {
kq->pending_wptr64 = *kq->wptr64_kernel;
kq->pending_wptr = *kq->wptr_kernel %
(kq->queue->properties.queue_size / 4);
@@ -319,7 +332,7 @@ void kq_rollback_packet(struct kernel_queue *kq)
}
}
-struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
+struct kernel_queue *kernel_queue_init(struct kfd_node *dev,
enum kfd_queue_type type)
{
struct kernel_queue *kq;
@@ -331,45 +344,45 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
if (kq_initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE))
return kq;
- pr_err("Failed to init kernel queue\n");
+ dev_err(dev->adev->dev, "Failed to init kernel queue\n");
kfree(kq);
return NULL;
}
-void kernel_queue_uninit(struct kernel_queue *kq, bool hanging)
+void kernel_queue_uninit(struct kernel_queue *kq)
{
- kq_uninitialize(kq, hanging);
+ kq_uninitialize(kq);
kfree(kq);
}
/* FIXME: Can this test be removed? */
-static __attribute__((unused)) void test_kq(struct kfd_dev *dev)
+static __attribute__((unused)) void test_kq(struct kfd_node *dev)
{
struct kernel_queue *kq;
uint32_t *buffer, i;
int retval;
- pr_err("Starting kernel queue test\n");
+ dev_err(dev->adev->dev, "Starting kernel queue test\n");
kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ);
if (unlikely(!kq)) {
- pr_err(" Failed to initialize HIQ\n");
- pr_err("Kernel queue test failed\n");
+ dev_err(dev->adev->dev, " Failed to initialize HIQ\n");
+ dev_err(dev->adev->dev, "Kernel queue test failed\n");
return;
}
retval = kq_acquire_packet_buffer(kq, 5, &buffer);
if (unlikely(retval != 0)) {
- pr_err(" Failed to acquire packet buffer\n");
- pr_err("Kernel queue test failed\n");
+ dev_err(dev->adev->dev, " Failed to acquire packet buffer\n");
+ dev_err(dev->adev->dev, "Kernel queue test failed\n");
return;
}
for (i = 0; i < 5; i++)
buffer[i] = kq->nop_packet;
kq_submit_packet(kq);
- pr_err("Ending kernel queue test\n");
+ dev_err(dev->adev->dev, "Ending kernel queue test\n");
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
index f4cfe9f1871c..e24ee50acdf0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -46,13 +47,13 @@
int kq_acquire_packet_buffer(struct kernel_queue *kq,
size_t packet_size_in_dwords,
unsigned int **buffer_ptr);
-void kq_submit_packet(struct kernel_queue *kq);
+int kq_submit_packet(struct kernel_queue *kq);
void kq_rollback_packet(struct kernel_queue *kq);
struct kernel_queue {
/* data */
- struct kfd_dev *dev;
+ struct kfd_node *dev;
struct mqd_manager *mqd_mgr;
struct queue *queue;
uint64_t pending_wptr64;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 9b9c2b9bf2ef..af53e796ea1b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -21,39 +21,39 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <linux/types.h>
-#include <linux/hmm.h>
#include <linux/dma-direction.h>
#include <linux/dma-mapping.h>
+#include <linux/migrate.h>
#include "amdgpu_sync.h"
#include "amdgpu_object.h"
#include "amdgpu_vm.h"
-#include "amdgpu_mn.h"
#include "amdgpu_res_cursor.h"
#include "kfd_priv.h"
#include "kfd_svm.h"
#include "kfd_migrate.h"
+#include "kfd_smi_events.h"
#ifdef dev_fmt
#undef dev_fmt
#endif
-#define dev_fmt(fmt) "kfd_migrate: %s: " fmt, __func__
+#define dev_fmt(fmt) "kfd_migrate: " fmt
-static uint64_t
-svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr)
+static u64
+svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, u64 addr)
{
return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM);
}
static int
-svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
- dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags)
+svm_migrate_gart_map(struct amdgpu_ring *ring, u64 npages,
+ dma_addr_t *addr, u64 *gart_addr, u64 flags)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_job *job;
unsigned int num_dw, num_bytes;
struct dma_fence *fence;
- uint64_t src_addr, dst_addr;
- uint64_t pte_flags;
+ u64 src_addr, dst_addr;
+ u64 pte_flags;
void *cpu_addr;
int r;
@@ -63,8 +63,12 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
num_bytes = npages * 8;
- r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
- AMDGPU_IB_POOL_DELAYED, &job);
+ r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
+ AMDGPU_FENCE_OWNER_UNDEFINED,
+ num_dw * 4 + num_bytes,
+ AMDGPU_IB_POOL_DELAYED,
+ &job,
+ AMDGPU_KERNEL_JOB_ID_KFD_GART_MAP);
if (r)
return r;
@@ -73,7 +77,7 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
- dst_addr, num_bytes, false);
+ dst_addr, num_bytes, 0);
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);
@@ -86,30 +90,19 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
cpu_addr = &job->ibs[0].ptr[num_dw];
- r = amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr);
- if (r)
- goto error_free;
-
- r = amdgpu_job_submit(job, &adev->mman.entity,
- AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
- if (r)
- goto error_free;
-
+ amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr);
+ fence = amdgpu_job_submit(job);
dma_fence_put(fence);
return r;
-
-error_free:
- amdgpu_job_free(job);
- return r;
}
/**
* svm_migrate_copy_memory_gart - sdma copy data between ram and vram
*
* @adev: amdgpu device the sdma ring running
- * @src: source page address array
- * @dst: destination page address array
+ * @sys: system DMA pointer to be copied
+ * @vram: vram destination DMA pointer
* @npages: number of pages to copy
* @direction: enum MIGRATION_COPY_DIR
* @mfence: output, sdma fence to signal after sdma is done
@@ -129,15 +122,15 @@ error_free:
static int
svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
- uint64_t *vram, uint64_t npages,
+ u64 *vram, u64 npages,
enum MIGRATION_COPY_DIR direction,
struct dma_fence **mfence)
{
- const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE;
+ const u64 GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE;
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
- uint64_t gart_s, gart_d;
+ u64 gart_s, gart_d;
struct dma_fence *next;
- uint64_t size;
+ u64 size;
int r;
mutex_lock(&adev->mman.gtt_window_lock);
@@ -160,7 +153,7 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
}
r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
- NULL, &next, false, true, false);
+ NULL, &next, false, true, 0);
if (r) {
dev_err(adev->dev, "fail %d to copy memory\n", r);
goto out_unlock;
@@ -213,7 +206,7 @@ svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence)
unsigned long
svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr)
{
- return (addr + adev->kfd.dev->pgmap.range.start) >> PAGE_SHIFT;
+ return (addr + adev->kfd.pgmap.range.start) >> PAGE_SHIFT;
}
static void
@@ -224,8 +217,7 @@ svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn)
page = pfn_to_page(pfn);
svm_range_bo_ref(prange->svm_bo);
page->zone_device_data = prange->svm_bo;
- get_page(page);
- lock_page(page);
+ zone_device_page_init(page, 0);
}
static void
@@ -244,7 +236,7 @@ svm_migrate_addr(struct amdgpu_device *adev, struct page *page)
unsigned long addr;
addr = page_to_pfn(page) << PAGE_SHIFT;
- return (addr - adev->kfd.dev->pgmap.range.start);
+ return (addr - adev->kfd.pgmap.range.start);
}
static struct page *
@@ -270,71 +262,58 @@ static void svm_migrate_put_sys_page(unsigned long addr)
static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate)
{
- unsigned long cpages = 0;
+ unsigned long mpages = 0;
unsigned long i;
for (i = 0; i < migrate->npages; i++) {
- if (migrate->src[i] & MIGRATE_PFN_VALID &&
+ if (migrate->dst[i] & MIGRATE_PFN_VALID &&
migrate->src[i] & MIGRATE_PFN_MIGRATE)
- cpages++;
+ mpages++;
}
- return cpages;
-}
-
-static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate)
-{
- unsigned long upages = 0;
- unsigned long i;
-
- for (i = 0; i < migrate->npages; i++) {
- if (migrate->src[i] & MIGRATE_PFN_VALID &&
- !(migrate->src[i] & MIGRATE_PFN_MIGRATE))
- upages++;
- }
- return upages;
+ return mpages;
}
static int
-svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
+svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
struct migrate_vma *migrate, struct dma_fence **mfence,
- dma_addr_t *scratch)
+ dma_addr_t *scratch, u64 ttm_res_offset)
{
- uint64_t npages = migrate->cpages;
+ u64 npages = migrate->npages;
+ struct amdgpu_device *adev = node->adev;
struct device *dev = adev->dev;
struct amdgpu_res_cursor cursor;
+ u64 mpages = 0;
dma_addr_t *src;
- uint64_t *dst;
- uint64_t i, j;
+ u64 *dst;
+ u64 i, j;
int r;
- pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
- prange->last);
+ pr_debug("svms 0x%p [0x%lx 0x%lx 0x%llx]\n", prange->svms, prange->start,
+ prange->last, ttm_res_offset);
src = scratch;
- dst = (uint64_t *)(scratch + npages);
+ dst = (u64 *)(scratch + npages);
- r = svm_range_vram_node_new(adev, prange, true);
- if (r) {
- dev_err(adev->dev, "fail %d to alloc vram\n", r);
- goto out;
- }
-
- amdgpu_res_first(prange->ttm_res, prange->offset << PAGE_SHIFT,
+ amdgpu_res_first(prange->ttm_res, ttm_res_offset,
npages << PAGE_SHIFT, &cursor);
- for (i = j = 0; i < npages; i++) {
+ for (i = j = 0; (i < npages) && (mpages < migrate->cpages); i++) {
struct page *spage;
- spage = migrate_pfn_to_page(migrate->src[i]);
- if (spage && !is_zone_device_page(spage)) {
+ if (migrate->src[i] & MIGRATE_PFN_MIGRATE) {
dst[i] = cursor.start + (j << PAGE_SHIFT);
migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
svm_migrate_get_vram_page(prange, migrate->dst[i]);
migrate->dst[i] = migrate_pfn(migrate->dst[i]);
+ mpages++;
+ }
+ spage = migrate_pfn_to_page(migrate->src[i]);
+ if (spage && !is_zone_device_page(spage)) {
src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
- DMA_TO_DEVICE);
+ DMA_BIDIRECTIONAL);
r = dma_mapping_error(dev, src[i]);
if (r) {
- dev_err(adev->dev, "fail %d dma_map_page\n", r);
+ dev_err(dev, "%s: fail %d dma_map_page\n",
+ __func__, r);
goto out_free_vram_pages;
}
} else {
@@ -346,7 +325,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
mfence);
if (r)
goto out_free_vram_pages;
- amdgpu_res_next(&cursor, j << PAGE_SHIFT);
+ amdgpu_res_next(&cursor, (j + 1) << PAGE_SHIFT);
j = 0;
} else {
amdgpu_res_next(&cursor, PAGE_SIZE);
@@ -365,7 +344,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
if (r)
goto out_free_vram_pages;
amdgpu_res_next(&cursor, (j + 1) * PAGE_SIZE);
- j= 0;
+ j = 0;
} else {
j++;
}
@@ -377,9 +356,12 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
out_free_vram_pages:
if (r) {
pr_debug("failed %d to copy memory to vram\n", r);
- while (i--) {
+ for (i = 0; i < npages && mpages; i++) {
+ if (!dst[i])
+ continue;
svm_migrate_put_vram_page(adev, dst[i]);
migrate->dst[i] = 0;
+ mpages--;
}
}
@@ -397,22 +379,24 @@ out_free_vram_pages:
migrate->dst[i + 3] = 0;
}
#endif
-out:
+
return r;
}
static long
-svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
- struct vm_area_struct *vma, uint64_t start,
- uint64_t end)
+svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
+ struct vm_area_struct *vma, u64 start,
+ u64 end, uint32_t trigger, u64 ttm_res_offset)
{
- uint64_t npages = (end - start) >> PAGE_SHIFT;
+ struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
+ u64 npages = (end - start) >> PAGE_SHIFT;
+ struct amdgpu_device *adev = node->adev;
struct kfd_process_device *pdd;
struct dma_fence *mfence = NULL;
- struct migrate_vma migrate;
+ struct migrate_vma migrate = { 0 };
unsigned long cpages = 0;
+ unsigned long mpages = 0;
dma_addr_t *scratch;
- size_t size;
void *buf;
int r = -ENOMEM;
@@ -423,9 +407,9 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
migrate.flags = MIGRATE_VMA_SELECT_SYSTEM;
migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
- size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
- size *= npages;
- buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
+ buf = kvcalloc(npages,
+ 2 * sizeof(*migrate.src) + sizeof(u64) + sizeof(dma_addr_t),
+ GFP_KERNEL);
if (!buf)
goto out;
@@ -433,10 +417,15 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
migrate.dst = migrate.src + npages;
scratch = (dma_addr_t *)(migrate.dst + npages);
+ kfd_smi_event_migration_start(node, p->lead_thread->pid,
+ start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+ 0, node->id, prange->prefetch_loc,
+ prange->preferred_loc, trigger);
+
r = migrate_vma_setup(&migrate);
if (r) {
- dev_err(adev->dev, "vma setup fail %d range [0x%lx 0x%lx]\n", r,
- prange->start, prange->last);
+ dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n",
+ __func__, r, prange->start, prange->last);
goto out_free;
}
@@ -447,32 +436,35 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
goto out_free;
}
if (cpages != npages)
- pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+ pr_debug("partial migration, 0x%lx/0x%llx pages collected\n",
cpages, npages);
else
- pr_debug("0x%lx pages migrated\n", cpages);
+ pr_debug("0x%lx pages collected\n", cpages);
- r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch);
+ r = svm_migrate_copy_to_vram(node, prange, &migrate, &mfence, scratch, ttm_res_offset);
migrate_vma_pages(&migrate);
- pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
- svm_migrate_successful_pages(&migrate), cpages, migrate.npages);
-
svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize(&migrate);
- svm_range_dma_unmap(adev->dev, scratch, 0, npages);
- svm_range_free_dma_mappings(prange);
+ mpages = svm_migrate_successful_pages(&migrate);
+ pr_debug("migrated/collected/requested 0x%lx/0x%lx/0x%lx\n",
+ mpages, cpages, migrate.npages);
+
+ svm_range_dma_unmap_dev(adev->dev, scratch, 0, npages);
out_free:
kvfree(buf);
+ kfd_smi_event_migration_end(node, p->lead_thread->pid,
+ start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+ 0, node->id, trigger, r);
out:
- if (!r && cpages) {
- pdd = svm_range_get_pdd_by_adev(prange, adev);
+ if (!r && mpages) {
+ pdd = svm_range_get_pdd_by_node(prange, node);
if (pdd)
- WRITE_ONCE(pdd->page_in, pdd->page_in + cpages);
+ WRITE_ONCE(pdd->page_in, pdd->page_in + mpages);
- return cpages;
+ return mpages;
}
return r;
}
@@ -481,7 +473,10 @@ out:
* svm_migrate_ram_to_vram - migrate svm range from system to device
* @prange: range structure
* @best_loc: the device to migrate to
+ * @start_mgr: start page to migrate
+ * @last_mgr: last page to migrate
* @mm: the process mm structure
+ * @trigger: reason of migration
*
* Context: Process context, caller hold mmap read lock, svms lock, prange lock
*
@@ -490,88 +485,118 @@ out:
*/
static int
svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
- struct mm_struct *mm)
+ unsigned long start_mgr, unsigned long last_mgr,
+ struct mm_struct *mm, uint32_t trigger)
{
unsigned long addr, start, end;
struct vm_area_struct *vma;
- struct amdgpu_device *adev;
- unsigned long cpages = 0;
+ u64 ttm_res_offset;
+ struct kfd_node *node;
+ unsigned long mpages = 0;
long r = 0;
- if (prange->actual_loc == best_loc) {
- pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
- prange->svms, prange->start, prange->last, best_loc);
- return 0;
+ if (start_mgr < prange->start || last_mgr > prange->last) {
+ pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
+ start_mgr, last_mgr, prange->start, prange->last);
+ return -EFAULT;
}
- adev = svm_range_get_adev_by_id(prange, best_loc);
- if (!adev) {
- pr_debug("failed to get device by id 0x%x\n", best_loc);
+ node = svm_range_get_node_by_id(prange, best_loc);
+ if (!node) {
+ pr_debug("failed to get kfd node by id 0x%x\n", best_loc);
return -ENODEV;
}
- pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
- prange->start, prange->last, best_loc);
+ pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n",
+ prange->svms, start_mgr, last_mgr, prange->start, prange->last,
+ best_loc);
- /* FIXME: workaround for page locking bug with invalid pages */
- svm_range_prefault(prange, mm, SVM_ADEV_PGMAP_OWNER(adev));
+ start = start_mgr << PAGE_SHIFT;
+ end = (last_mgr + 1) << PAGE_SHIFT;
- start = prange->start << PAGE_SHIFT;
- end = (prange->last + 1) << PAGE_SHIFT;
+ r = amdgpu_amdkfd_reserve_mem_limit(node->adev,
+ prange->npages * PAGE_SIZE,
+ KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
+ node->xcp ? node->xcp->id : 0);
+ if (r) {
+ dev_dbg(node->adev->dev, "failed to reserve VRAM, r: %ld\n", r);
+ return -ENOSPC;
+ }
+
+ r = svm_range_vram_node_new(node, prange, true);
+ if (r) {
+ dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r);
+ goto out;
+ }
+ ttm_res_offset = (start_mgr - prange->start + prange->offset) << PAGE_SHIFT;
for (addr = start; addr < end;) {
unsigned long next;
- vma = find_vma(mm, addr);
- if (!vma || addr < vma->vm_start)
+ vma = vma_lookup(mm, addr);
+ if (!vma)
break;
next = min(vma->vm_end, end);
- r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next);
+ r = svm_migrate_vma_to_vram(node, prange, vma, addr, next, trigger, ttm_res_offset);
if (r < 0) {
pr_debug("failed %ld to migrate\n", r);
break;
} else {
- cpages += r;
+ mpages += r;
}
+ ttm_res_offset += next - addr;
addr = next;
}
- if (cpages)
+ if (mpages) {
prange->actual_loc = best_loc;
+ prange->vram_pages += mpages;
+ } else if (!prange->actual_loc) {
+ /* if no page migrated and all pages from prange are at
+ * sys ram drop svm_bo got from svm_range_vram_node_new
+ */
+ svm_range_vram_node_free(prange);
+ }
+out:
+ amdgpu_amdkfd_unreserve_mem_limit(node->adev,
+ prange->npages * PAGE_SIZE,
+ KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
+ node->xcp ? node->xcp->id : 0);
return r < 0 ? r : 0;
}
-static void svm_migrate_page_free(struct page *page)
+static void svm_migrate_folio_free(struct folio *folio)
{
+ struct page *page = &folio->page;
struct svm_range_bo *svm_bo = page->zone_device_data;
if (svm_bo) {
pr_debug_ratelimited("ref: %d\n", kref_read(&svm_bo->kref));
- svm_range_bo_unref(svm_bo);
+ svm_range_bo_unref_async(svm_bo);
}
}
static int
svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
struct migrate_vma *migrate, struct dma_fence **mfence,
- dma_addr_t *scratch, uint64_t npages)
+ dma_addr_t *scratch, u64 npages)
{
struct device *dev = adev->dev;
- uint64_t *src;
+ u64 *src;
dma_addr_t *dst;
struct page *dpage;
- uint64_t i = 0, j;
- uint64_t addr;
+ u64 i = 0, j;
+ u64 addr;
int r = 0;
pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
prange->last);
- addr = prange->start << PAGE_SHIFT;
+ addr = migrate->start;
- src = (uint64_t *)(scratch + npages);
+ src = (u64 *)(scratch + npages);
dst = scratch;
for (i = 0, j = 0; i < npages; i++, addr += PAGE_SIZE) {
@@ -593,7 +618,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
continue;
}
src[i] = svm_migrate_addr(adev, spage);
- if (i > 0 && src[i] != src[i - 1] + PAGE_SIZE) {
+ if (j > 0 && src[i] != src[i - 1] + PAGE_SIZE) {
r = svm_migrate_copy_memory_gart(adev, dst + i - j,
src + i - j, j,
FROM_VRAM_TO_RAM,
@@ -611,10 +636,10 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
goto out_oom;
}
- dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+ dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
r = dma_mapping_error(dev, dst[i]);
if (r) {
- dev_err(adev->dev, "fail %d dma_map_page\n", r);
+ dev_err(adev->dev, "%s: fail %d dma_map_page\n", __func__, r);
goto out_oom;
}
@@ -640,18 +665,37 @@ out_oom:
return r;
}
+/**
+ * svm_migrate_vma_to_ram - migrate range inside one vma from device to system
+ *
+ * @prange: svm range structure
+ * @vma: vm_area_struct that range [start, end] belongs to
+ * @start: range start virtual address in pages
+ * @end: range end virtual address in pages
+ * @node: kfd node device to migrate from
+ * @trigger: reason of migration
+ * @fault_page: is from vmf->page, svm_migrate_to_ram(), this is CPU page fault callback
+ *
+ * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
+ *
+ * Return:
+ * negative values - indicate error
+ * positive values or zero - number of pages got migrated
+ */
static long
-svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
- struct vm_area_struct *vma, uint64_t start, uint64_t end)
+svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
+ struct vm_area_struct *vma, u64 start, u64 end,
+ uint32_t trigger, struct page *fault_page)
{
- uint64_t npages = (end - start) >> PAGE_SHIFT;
- unsigned long upages = npages;
+ struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
+ u64 npages = (end - start) >> PAGE_SHIFT;
unsigned long cpages = 0;
+ unsigned long mpages = 0;
+ struct amdgpu_device *adev = node->adev;
struct kfd_process_device *pdd;
struct dma_fence *mfence = NULL;
- struct migrate_vma migrate;
+ struct migrate_vma migrate = { 0 };
dma_addr_t *scratch;
- size_t size;
void *buf;
int r = -ENOMEM;
@@ -659,23 +703,32 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
migrate.vma = vma;
migrate.start = start;
migrate.end = end;
- migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
+ if (adev->gmc.xgmi.connected_to_cpu)
+ migrate.flags = MIGRATE_VMA_SELECT_DEVICE_COHERENT;
+ else
+ migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
- size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
- size *= npages;
- buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
+ buf = kvcalloc(npages,
+ 2 * sizeof(*migrate.src) + sizeof(u64) + sizeof(dma_addr_t),
+ GFP_KERNEL);
if (!buf)
goto out;
migrate.src = buf;
migrate.dst = migrate.src + npages;
+ migrate.fault_page = fault_page;
scratch = (dma_addr_t *)(migrate.dst + npages);
+ kfd_smi_event_migration_start(node, p->lead_thread->pid,
+ start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+ node->id, 0, prange->prefetch_loc,
+ prange->preferred_loc, trigger);
+
r = migrate_vma_setup(&migrate);
if (r) {
- dev_err(adev->dev, "vma setup fail %d range [0x%lx 0x%lx]\n", r,
- prange->start, prange->last);
+ dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n",
+ __func__, r, prange->start, prange->last);
goto out_free;
}
@@ -683,101 +736,128 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
if (!cpages) {
pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n",
prange->start, prange->last);
- upages = svm_migrate_unsuccessful_pages(&migrate);
goto out_free;
}
if (cpages != npages)
- pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+ pr_debug("partial migration, 0x%lx/0x%llx pages collected\n",
cpages, npages);
else
- pr_debug("0x%lx pages migrated\n", cpages);
+ pr_debug("0x%lx pages collected\n", cpages);
r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
scratch, npages);
migrate_vma_pages(&migrate);
- upages = svm_migrate_unsuccessful_pages(&migrate);
- pr_debug("unsuccessful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
- upages, cpages, migrate.npages);
+ mpages = svm_migrate_successful_pages(&migrate);
+ pr_debug("migrated/collected/requested 0x%lx/0x%lx/0x%lx\n",
+ mpages, cpages, migrate.npages);
svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize(&migrate);
- svm_range_dma_unmap(adev->dev, scratch, 0, npages);
+
+ svm_range_dma_unmap_dev(adev->dev, scratch, 0, npages);
out_free:
kvfree(buf);
+ kfd_smi_event_migration_end(node, p->lead_thread->pid,
+ start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+ node->id, 0, trigger, r);
out:
- if (!r && cpages) {
- pdd = svm_range_get_pdd_by_adev(prange, adev);
+ if (!r && mpages) {
+ pdd = svm_range_get_pdd_by_node(prange, node);
if (pdd)
- WRITE_ONCE(pdd->page_out, pdd->page_out + cpages);
-
- return upages;
+ WRITE_ONCE(pdd->page_out, pdd->page_out + mpages);
}
- return r ? r : upages;
+
+ return r ? r : mpages;
}
/**
* svm_migrate_vram_to_ram - migrate svm range from device to system
* @prange: range structure
* @mm: process mm, use current->mm if NULL
+ * @start_mgr: start page need be migrated to sys ram
+ * @last_mgr: last page need be migrated to sys ram
+ * @trigger: reason of migration
+ * @fault_page: is from vmf->page, svm_migrate_to_ram(), this is CPU page fault callback
*
- * Context: Process context, caller hold mmap read lock, svms lock, prange lock
+ * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
*
* Return:
* 0 - OK, otherwise error code
*/
-int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
+int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
+ unsigned long start_mgr, unsigned long last_mgr,
+ uint32_t trigger, struct page *fault_page)
{
- struct amdgpu_device *adev;
+ struct kfd_node *node;
struct vm_area_struct *vma;
unsigned long addr;
unsigned long start;
unsigned long end;
- unsigned long upages = 0;
+ unsigned long mpages = 0;
long r = 0;
+ /* this pragne has no any vram page to migrate to sys ram */
if (!prange->actual_loc) {
pr_debug("[0x%lx 0x%lx] already migrated to ram\n",
prange->start, prange->last);
return 0;
}
- adev = svm_range_get_adev_by_id(prange, prange->actual_loc);
- if (!adev) {
- pr_debug("failed to get device by id 0x%x\n",
- prange->actual_loc);
- return -ENODEV;
+ if (start_mgr < prange->start || last_mgr > prange->last) {
+ pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
+ start_mgr, last_mgr, prange->start, prange->last);
+ return -EFAULT;
}
+ node = svm_range_get_node_by_id(prange, prange->actual_loc);
+ if (!node) {
+ pr_debug("failed to get kfd node by id 0x%x\n", prange->actual_loc);
+ return -ENODEV;
+ }
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n",
- prange->svms, prange, prange->start, prange->last,
+ prange->svms, prange, start_mgr, last_mgr,
prange->actual_loc);
- start = prange->start << PAGE_SHIFT;
- end = (prange->last + 1) << PAGE_SHIFT;
+ start = start_mgr << PAGE_SHIFT;
+ end = (last_mgr + 1) << PAGE_SHIFT;
for (addr = start; addr < end;) {
unsigned long next;
- vma = find_vma(mm, addr);
- if (!vma || addr < vma->vm_start)
+ vma = vma_lookup(mm, addr);
+ if (!vma) {
+ pr_debug("failed to find vma for prange %p\n", prange);
+ r = -EFAULT;
break;
+ }
next = min(vma->vm_end, end);
- r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next);
+ r = svm_migrate_vma_to_ram(node, prange, vma, addr, next, trigger,
+ fault_page);
if (r < 0) {
- pr_debug("failed %ld to migrate\n", r);
+ pr_debug("failed %ld to migrate prange %p\n", r, prange);
break;
} else {
- upages += r;
+ mpages += r;
}
addr = next;
}
- if (!upages) {
- svm_range_vram_node_free(prange);
- prange->actual_loc = 0;
+ if (r >= 0) {
+ WARN_ONCE(prange->vram_pages < mpages,
+ "Recorded vram pages(0x%llx) should not be less than migration pages(0x%lx).",
+ prange->vram_pages, mpages);
+ prange->vram_pages -= mpages;
+
+ /* prange does not have vram page set its actual_loc to system
+ * and drop its svm_bo ref
+ */
+ if (prange->vram_pages == 0 && prange->ttm_res) {
+ prange->actual_loc = 0;
+ svm_range_vram_node_free(prange);
+ }
}
return r < 0 ? r : 0;
@@ -787,16 +867,23 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
* svm_migrate_vram_to_vram - migrate svm range from device to device
* @prange: range structure
* @best_loc: the device to migrate to
+ * @start: start page need be migrated to sys ram
+ * @last: last page need be migrated to sys ram
* @mm: process mm, use current->mm if NULL
+ * @trigger: reason of migration
*
* Context: Process context, caller hold mmap read lock, svms lock, prange lock
*
+ * migrate all vram pages in prange to sys ram, then migrate
+ * [start, last] pages from sys ram to gpu node best_loc.
+ *
* Return:
* 0 - OK, otherwise error code
*/
static int
svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
- struct mm_struct *mm)
+ unsigned long start, unsigned long last,
+ struct mm_struct *mm, uint32_t trigger)
{
int r, retries = 3;
@@ -808,7 +895,8 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
do {
- r = svm_migrate_vram_to_ram(prange, mm);
+ r = svm_migrate_vram_to_ram(prange, mm, prange->start, prange->last,
+ trigger, NULL);
if (r)
return r;
} while (prange->actual_loc && --retries);
@@ -816,17 +904,21 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
if (prange->actual_loc)
return -EDEADLK;
- return svm_migrate_ram_to_vram(prange, best_loc, mm);
+ return svm_migrate_ram_to_vram(prange, best_loc, start, last, mm, trigger);
}
int
svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
- struct mm_struct *mm)
+ unsigned long start, unsigned long last,
+ struct mm_struct *mm, uint32_t trigger)
{
- if (!prange->actual_loc)
- return svm_migrate_ram_to_vram(prange, best_loc, mm);
+ if (!prange->actual_loc || prange->actual_loc == best_loc)
+ return svm_migrate_ram_to_vram(prange, best_loc, start, last,
+ mm, trigger);
+
else
- return svm_migrate_vram_to_vram(prange, best_loc, mm);
+ return svm_migrate_vram_to_vram(prange, best_loc, start, last,
+ mm, trigger);
}
@@ -842,105 +934,103 @@ svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
*/
static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
{
+ unsigned long start, last, size;
unsigned long addr = vmf->address;
- struct vm_area_struct *vma;
- enum svm_work_list_ops op;
- struct svm_range *parent;
+ struct svm_range_bo *svm_bo;
struct svm_range *prange;
struct kfd_process *p;
struct mm_struct *mm;
int r = 0;
- vma = vmf->vma;
- mm = vma->vm_mm;
+ svm_bo = vmf->page->zone_device_data;
+ if (!svm_bo) {
+ pr_debug("failed get device page at addr 0x%lx\n", addr);
+ return VM_FAULT_SIGBUS;
+ }
+ if (!mmget_not_zero(svm_bo->eviction_fence->mm)) {
+ pr_debug("addr 0x%lx of process mm is destroyed\n", addr);
+ return VM_FAULT_SIGBUS;
+ }
- p = kfd_lookup_process_by_mm(vma->vm_mm);
+ mm = svm_bo->eviction_fence->mm;
+ if (mm != vmf->vma->vm_mm)
+ pr_debug("addr 0x%lx is COW mapping in child process\n", addr);
+
+ p = kfd_lookup_process_by_mm(mm);
if (!p) {
pr_debug("failed find process at fault address 0x%lx\n", addr);
- return VM_FAULT_SIGBUS;
+ r = VM_FAULT_SIGBUS;
+ goto out_mmput;
}
if (READ_ONCE(p->svms.faulting_task) == current) {
pr_debug("skipping ram migration\n");
- kfd_unref_process(p);
- return 0;
+ r = 0;
+ goto out_unref_process;
}
- addr >>= PAGE_SHIFT;
+
pr_debug("CPU page fault svms 0x%p address 0x%lx\n", &p->svms, addr);
+ addr >>= PAGE_SHIFT;
mutex_lock(&p->svms.lock);
- prange = svm_range_from_addr(&p->svms, addr, &parent);
+ prange = svm_range_from_addr(&p->svms, addr, NULL);
if (!prange) {
- pr_debug("cannot find svm range at 0x%lx\n", addr);
+ pr_debug("failed get range svms 0x%p addr 0x%lx\n", &p->svms, addr);
r = -EFAULT;
- goto out;
+ goto out_unlock_svms;
}
- mutex_lock(&parent->migrate_mutex);
- if (prange != parent)
- mutex_lock_nested(&prange->migrate_mutex, 1);
+ mutex_lock(&prange->migrate_mutex);
if (!prange->actual_loc)
goto out_unlock_prange;
- svm_range_lock(parent);
- if (prange != parent)
- mutex_lock_nested(&prange->lock, 1);
- r = svm_range_split_by_granularity(p, mm, addr, parent, prange);
- if (prange != parent)
- mutex_unlock(&prange->lock);
- svm_range_unlock(parent);
- if (r) {
- pr_debug("failed %d to split range by granularity\n", r);
- goto out_unlock_prange;
- }
+ /* Align migration range start and size to granularity size */
+ size = 1UL << prange->granularity;
+ start = max(ALIGN_DOWN(addr, size), prange->start);
+ last = min(ALIGN(addr + 1, size) - 1, prange->last);
- r = svm_migrate_vram_to_ram(prange, mm);
+ r = svm_migrate_vram_to_ram(prange, vmf->vma->vm_mm, start, last,
+ KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU, vmf->page);
if (r)
- pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r,
- prange, prange->start, prange->last);
-
- /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
- if (p->xnack_enabled && parent == prange)
- op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP;
- else
- op = SVM_OP_UPDATE_RANGE_NOTIFIER;
- svm_range_add_list_work(&p->svms, parent, mm, op);
- schedule_deferred_list_work(&p->svms);
+ pr_debug("failed %d migrate svms 0x%p range 0x%p [0x%lx 0x%lx]\n",
+ r, prange->svms, prange, start, last);
out_unlock_prange:
- if (prange != parent)
- mutex_unlock(&prange->migrate_mutex);
- mutex_unlock(&parent->migrate_mutex);
-out:
+ mutex_unlock(&prange->migrate_mutex);
+out_unlock_svms:
mutex_unlock(&p->svms.lock);
- kfd_unref_process(p);
-
+out_unref_process:
pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr);
-
+ kfd_unref_process(p);
+out_mmput:
+ mmput(mm);
return r ? VM_FAULT_SIGBUS : 0;
}
static const struct dev_pagemap_ops svm_migrate_pgmap_ops = {
- .page_free = svm_migrate_page_free,
+ .folio_free = svm_migrate_folio_free,
.migrate_to_ram = svm_migrate_to_ram,
};
/* Each VRAM page uses sizeof(struct page) on system memory */
#define SVM_HMM_PAGE_STRUCT_SIZE(size) ((size)/PAGE_SIZE * sizeof(struct page))
-int svm_migrate_init(struct amdgpu_device *adev)
+int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
{
- struct kfd_dev *kfddev = adev->kfd.dev;
+ struct amdgpu_kfd_dev *kfddev = &adev->kfd;
struct dev_pagemap *pgmap;
- struct resource *res;
+ struct resource *res = NULL;
unsigned long size;
void *r;
- /* Page migration works on Vega10 or newer */
- if (kfddev->device_info->asic_family < CHIP_VEGA10)
+ /* Page migration works on gfx9 or newer */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 0, 1))
return -EINVAL;
+ if (adev->apu_prefer_gtt)
+ return 0;
+
pgmap = &kfddev->pgmap;
memset(pgmap, 0, sizeof(*pgmap));
@@ -948,28 +1038,33 @@ int svm_migrate_init(struct amdgpu_device *adev)
* should remove reserved size
*/
size = ALIGN(adev->gmc.real_vram_size, 2ULL << 20);
- res = devm_request_free_mem_region(adev->dev, &iomem_resource, size);
- if (IS_ERR(res))
- return -ENOMEM;
+ if (adev->gmc.xgmi.connected_to_cpu) {
+ pgmap->range.start = adev->gmc.aper_base;
+ pgmap->range.end = adev->gmc.aper_base + adev->gmc.aper_size - 1;
+ pgmap->type = MEMORY_DEVICE_COHERENT;
+ } else {
+ res = devm_request_free_mem_region(adev->dev, &iomem_resource, size);
+ if (IS_ERR(res))
+ return PTR_ERR(res);
+ pgmap->range.start = res->start;
+ pgmap->range.end = res->end;
+ pgmap->type = MEMORY_DEVICE_PRIVATE;
+ }
- pgmap->type = MEMORY_DEVICE_PRIVATE;
pgmap->nr_range = 1;
- pgmap->range.start = res->start;
- pgmap->range.end = res->end;
pgmap->ops = &svm_migrate_pgmap_ops;
pgmap->owner = SVM_ADEV_PGMAP_OWNER(adev);
- pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
-
+ pgmap->flags = 0;
/* Device manager releases device-specific resources, memory region and
* pgmap when driver disconnects from device.
*/
r = devm_memremap_pages(adev->dev, pgmap);
if (IS_ERR(r)) {
pr_err("failed to register HMM device memory\n");
-
+ if (pgmap->type == MEMORY_DEVICE_PRIVATE)
+ devm_release_mem_region(adev->dev, res->start, resource_size(res));
/* Disable SVM support capability */
pgmap->type = 0;
- devm_release_mem_region(adev->dev, res->start, resource_size(res));
return PTR_ERR(r);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
index 2f5b3394c9ed..2b7fd442d29c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
@@ -31,7 +31,6 @@
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/sched/mm.h>
-#include <linux/hmm.h>
#include "kfd_priv.h"
#include "kfd_svm.h"
@@ -41,19 +40,15 @@ enum MIGRATION_COPY_DIR {
};
int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
- struct mm_struct *mm);
-int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm);
-unsigned long
-svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
-
-int svm_migrate_init(struct amdgpu_device *adev);
+ unsigned long start, unsigned long last,
+ struct mm_struct *mm, uint32_t trigger);
-#else
+int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
+ unsigned long start, unsigned long last,
+ uint32_t trigger, struct page *fault_page);
-static inline int svm_migrate_init(struct amdgpu_device *adev)
-{
- return 0;
-}
+unsigned long
+svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
#endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index 5e90fe642192..33aa23450b3f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -76,8 +77,9 @@ err_ioctl:
static void kfd_exit(void)
{
- kfd_debugfs_fini();
+ kfd_cleanup_processes();
kfd_process_destroy_wq();
+ kfd_debugfs_fini();
kfd_procfs_shutdown();
kfd_topology_shutdown();
kfd_chardev_exit();
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index c021519af810..d9ae854b6908 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -45,9 +46,9 @@ int pipe_priority_map[] = {
KFD_PIPE_PRIORITY_CS_HIGH
};
-struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev, struct queue_properties *q)
+struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_node *dev, struct queue_properties *q)
{
- struct kfd_mem_obj *mqd_mem_obj = NULL;
+ struct kfd_mem_obj *mqd_mem_obj;
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
if (!mqd_mem_obj)
@@ -60,10 +61,10 @@ struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev, struct queue_propertie
return mqd_mem_obj;
}
-struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,
+struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_node *dev,
struct queue_properties *q)
{
- struct kfd_mem_obj *mqd_mem_obj = NULL;
+ struct kfd_mem_obj *mqd_mem_obj;
uint64_t offset;
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
@@ -71,11 +72,12 @@ struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,
return NULL;
offset = (q->sdma_engine_id *
- dev->device_info->num_sdma_queues_per_engine +
+ dev->kfd->device_info.num_sdma_queues_per_engine +
q->sdma_queue_id) *
dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size;
- offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
+ offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size *
+ NUM_XCC(dev->xcc_mask);
mqd_mem_obj->gtt_mem = (void *)((uint64_t)dev->dqm->hiq_sdma_mqd.gtt_mem
+ offset);
@@ -95,29 +97,42 @@ void free_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
const uint32_t *cu_mask, uint32_t cu_mask_count,
- uint32_t *se_mask)
+ uint32_t *se_mask, uint32_t inst)
{
- struct kfd_cu_info cu_info;
+ struct amdgpu_cu_info *cu_info = &mm->dev->adev->gfx.cu_info;
+ struct amdgpu_gfx_config *gfx_info = &mm->dev->adev->gfx.config;
uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0};
- int i, se, sh, cu;
- amdgpu_amdkfd_get_cu_info(mm->dev->kgd, &cu_info);
+ bool wgp_mode_req = KFD_GC_VERSION(mm->dev) >= IP_VERSION(10, 0, 0);
+ uint32_t en_mask = wgp_mode_req ? 0x3 : 0x1;
+ int i, se, sh, cu, cu_bitmap_sh_mul, cu_inc = wgp_mode_req ? 2 : 1;
+ uint32_t cu_active_per_node;
+ int inc = cu_inc * NUM_XCC(mm->dev->xcc_mask);
+ int xcc_inst = inst + ffs(mm->dev->xcc_mask) - 1;
- if (cu_mask_count > cu_info.cu_active_number)
- cu_mask_count = cu_info.cu_active_number;
+ cu_active_per_node = cu_info->number / mm->dev->kfd->num_nodes;
+ if (cu_mask_count > cu_active_per_node)
+ cu_mask_count = cu_active_per_node;
/* Exceeding these bounds corrupts the stack and indicates a coding error.
* Returning with no CU's enabled will hang the queue, which should be
* attention grabbing.
*/
- if (cu_info.num_shader_engines > KFD_MAX_NUM_SE) {
- pr_err("Exceeded KFD_MAX_NUM_SE, chip reports %d\n", cu_info.num_shader_engines);
+ if (gfx_info->max_shader_engines > KFD_MAX_NUM_SE) {
+ dev_err(mm->dev->adev->dev,
+ "Exceeded KFD_MAX_NUM_SE, chip reports %d\n",
+ gfx_info->max_shader_engines);
return;
}
- if (cu_info.num_shader_arrays_per_engine > KFD_MAX_NUM_SH_PER_SE) {
- pr_err("Exceeded KFD_MAX_NUM_SH, chip reports %d\n",
- cu_info.num_shader_arrays_per_engine * cu_info.num_shader_engines);
+ if (gfx_info->max_sh_per_se > KFD_MAX_NUM_SH_PER_SE) {
+ dev_err(mm->dev->adev->dev,
+ "Exceeded KFD_MAX_NUM_SH, chip reports %d\n",
+ gfx_info->max_sh_per_se * gfx_info->max_shader_engines);
return;
}
+
+ cu_bitmap_sh_mul = (KFD_GC_VERSION(mm->dev) >= IP_VERSION(11, 0, 0) &&
+ KFD_GC_VERSION(mm->dev) < IP_VERSION(13, 0, 0)) ? 2 : 1;
+
/* Count active CUs per SH.
*
* Some CUs in an SH may be disabled. HW expects disabled CUs to be
@@ -127,10 +142,13 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
* Each half of se_mask must be filled only on bits 0-cu_per_sh[se][sh]-1.
*
* See note on Arcturus cu_bitmap layout in gfx_v9_0_get_cu_info.
+ * See note on GFX11 cu_bitmap layout in gfx_v11_0_get_cu_info.
*/
- for (se = 0; se < cu_info.num_shader_engines; se++)
- for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++)
- cu_per_sh[se][sh] = hweight32(cu_info.cu_bitmap[se % 4][sh + (se / 4)]);
+ for (se = 0; se < gfx_info->max_shader_engines; se++)
+ for (sh = 0; sh < gfx_info->max_sh_per_se; sh++)
+ cu_per_sh[se][sh] = hweight32(
+ cu_info->bitmap[xcc_inst][se % 4][sh + (se / 4) *
+ cu_bitmap_sh_mul]);
/* Symmetrically map cu_mask to all SEs & SHs:
* se_mask programs up to 2 SH in the upper and lower 16 bits.
@@ -153,23 +171,142 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
* cu_mask[0] bit8 -> se_mask[0] bit1 (SE0,SH0,CU1)
* ...
*
+ * For GFX 9.4.3, the following code only looks at a
+ * subset of the cu_mask corresponding to the inst parameter.
+ * If we have n XCCs under one GPU node
+ * cu_mask[0] bit0 -> XCC0 se_mask[0] bit0 (XCC0,SE0,SH0,CU0)
+ * cu_mask[0] bit1 -> XCC1 se_mask[0] bit0 (XCC1,SE0,SH0,CU0)
+ * ..
+ * cu_mask[0] bitn -> XCCn se_mask[0] bit0 (XCCn,SE0,SH0,CU0)
+ * cu_mask[0] bit n+1 -> XCC0 se_mask[1] bit0 (XCC0,SE1,SH0,CU0)
+ *
+ * For example, if there are 6 XCCs under 1 KFD node, this code
+ * running for each inst, will look at the bits as:
+ * inst, inst + 6, inst + 12...
+ *
* First ensure all CUs are disabled, then enable user specified CUs.
*/
- for (i = 0; i < cu_info.num_shader_engines; i++)
+ for (i = 0; i < gfx_info->max_shader_engines; i++)
se_mask[i] = 0;
- i = 0;
- for (cu = 0; cu < 16; cu++) {
- for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) {
- for (se = 0; se < cu_info.num_shader_engines; se++) {
+ i = inst;
+ for (cu = 0; cu < 16; cu += cu_inc) {
+ for (sh = 0; sh < gfx_info->max_sh_per_se; sh++) {
+ for (se = 0; se < gfx_info->max_shader_engines; se++) {
if (cu_per_sh[se][sh] > cu) {
- if (cu_mask[i / 32] & (1 << (i % 32)))
- se_mask[se] |= 1 << (cu + sh * 16);
- i++;
- if (i == cu_mask_count)
+ if (cu_mask[i / 32] & (en_mask << (i % 32)))
+ se_mask[se] |= en_mask << (cu + sh * 16);
+ i += inc;
+ if (i >= cu_mask_count)
return;
}
}
}
}
}
+
+int kfd_hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+ return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, mqd, pipe_id,
+ queue_id, p->doorbell_off, 0);
+}
+
+int kfd_destroy_mqd_cp(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type, unsigned int timeout,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, mqd, type, timeout,
+ pipe_id, queue_id, 0);
+}
+
+void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd,
+ struct kfd_mem_obj *mqd_mem_obj)
+{
+ if (mqd_mem_obj->gtt_mem) {
+ amdgpu_amdkfd_free_gtt_mem(mm->dev->adev, &mqd_mem_obj->gtt_mem);
+ kfree(mqd_mem_obj);
+ } else {
+ kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+ }
+}
+
+bool kfd_is_occupied_cp(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_is_occupied(mm->dev->adev, queue_address,
+ pipe_id, queue_id, 0);
+}
+
+int kfd_load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+ return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd,
+ (uint32_t __user *)p->write_ptr,
+ mms);
+}
+
+/*
+ * preempt type here is ignored because there is only one way
+ * to preempt sdma queue
+ */
+int kfd_destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type,
+ unsigned int timeout, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout);
+}
+
+bool kfd_is_occupied_sdma(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd);
+}
+
+uint64_t kfd_hiq_mqd_stride(struct kfd_node *dev)
+{
+ return dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
+}
+
+void kfd_get_hiq_xcc_mqd(struct kfd_node *dev, struct kfd_mem_obj *mqd_mem_obj,
+ uint32_t virtual_xcc_id)
+{
+ uint64_t offset;
+
+ offset = kfd_hiq_mqd_stride(dev) * virtual_xcc_id;
+
+ mqd_mem_obj->gtt_mem = (virtual_xcc_id == 0) ?
+ dev->dqm->hiq_sdma_mqd.gtt_mem : NULL;
+ mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr + offset;
+ mqd_mem_obj->cpu_ptr = (uint32_t *)((uintptr_t)
+ dev->dqm->hiq_sdma_mqd.cpu_ptr + offset);
+}
+
+uint64_t kfd_mqd_stride(struct mqd_manager *mm,
+ struct queue_properties *q)
+{
+ return mm->mqd_size;
+}
+
+bool kfd_check_hiq_mqd_doorbell_id(struct kfd_node *node, uint32_t doorbell_id,
+ uint32_t inst)
+{
+ if (doorbell_id) {
+ struct device *dev = node->adev->dev;
+
+ if (node->adev->xcp_mgr && node->adev->xcp_mgr->num_xcps > 0)
+ dev_err(dev, "XCC %d: Queue preemption failed for queue with doorbell_id: %x\n",
+ inst, doorbell_id);
+ else
+ dev_err(dev, "Queue preemption failed for queue with doorbell_id: %x\n",
+ doorbell_id);
+ return true;
+ }
+
+ return false;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index 965e17c5dbb4..17cc1f25c8d0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -67,7 +68,7 @@
*/
extern int pipe_priority_map[];
struct mqd_manager {
- struct kfd_mem_obj* (*allocate_mqd)(struct kfd_dev *kfd,
+ struct kfd_mem_obj* (*allocate_mqd)(struct kfd_node *kfd,
struct queue_properties *q);
void (*init_mqd)(struct mqd_manager *mm, void **mqd,
@@ -96,30 +97,107 @@ struct mqd_manager {
uint32_t queue_id);
int (*get_wave_state)(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
void __user *ctl_stack,
u32 *ctl_stack_used_size,
u32 *save_area_used_size);
+ void (*get_checkpoint_info)(struct mqd_manager *mm, void *mqd, uint32_t *ctl_stack_size);
+
+ void (*checkpoint_mqd)(struct mqd_manager *mm,
+ void *mqd,
+ void *mqd_dst,
+ void *ctl_stack_dst);
+
+ void (*restore_mqd)(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *p,
+ const void *mqd_src,
+ const void *ctl_stack_src,
+ const u32 ctl_stack_size);
+
#if defined(CONFIG_DEBUG_FS)
int (*debugfs_show_mqd)(struct seq_file *m, void *data);
#endif
- uint32_t (*read_doorbell_id)(void *mqd);
+ bool (*check_preemption_failed)(struct mqd_manager *mm, void *mqd);
+ uint64_t (*mqd_stride)(struct mqd_manager *mm,
+ struct queue_properties *p);
struct mutex mqd_mutex;
- struct kfd_dev *dev;
+ struct kfd_node *dev;
uint32_t mqd_size;
};
-struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev,
+struct mqd_user_context_save_area_header {
+ /* Byte offset from start of user context
+ * save area to the last saved top (lowest
+ * address) of control stack data. Must be
+ * 4 byte aligned.
+ */
+ uint32_t control_stack_offset;
+
+ /* Byte size of the last saved control stack
+ * data. Must be 4 byte aligned.
+ */
+ uint32_t control_stack_size;
+
+ /* Byte offset from start of user context save
+ * area to the last saved base (lowest address)
+ * of wave state data. Must be 4 byte aligned.
+ */
+ uint32_t wave_state_offset;
+
+ /* Byte size of the last saved wave state data.
+ * Must be 4 byte aligned.
+ */
+ uint32_t wave_state_size;
+};
+
+struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_node *dev,
struct queue_properties *q);
-struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,
+struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_node *dev,
struct queue_properties *q);
void free_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
struct kfd_mem_obj *mqd_mem_obj);
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
const uint32_t *cu_mask, uint32_t cu_mask_count,
- uint32_t *se_mask);
+ uint32_t *se_mask, uint32_t inst);
+
+int kfd_hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms);
+
+int kfd_destroy_mqd_cp(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type, unsigned int timeout,
+ uint32_t pipe_id, uint32_t queue_id);
+void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd,
+ struct kfd_mem_obj *mqd_mem_obj);
+
+bool kfd_is_occupied_cp(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id);
+
+int kfd_load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms);
+
+int kfd_destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type, unsigned int timeout,
+ uint32_t pipe_id, uint32_t queue_id);
+
+bool kfd_is_occupied_sdma(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id);
+
+void kfd_get_hiq_xcc_mqd(struct kfd_node *dev,
+ struct kfd_mem_obj *mqd_mem_obj, uint32_t virtual_xcc_id);
+
+uint64_t kfd_hiq_mqd_stride(struct kfd_node *dev);
+uint64_t kfd_mqd_stride(struct mqd_manager *mm,
+ struct queue_properties *q);
+bool kfd_check_hiq_mqd_doorbell_id(struct kfd_node *node, uint32_t doorbell_id,
+ uint32_t inst);
#endif /* KFD_MQD_MANAGER_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 8128f4d312f1..05f3ac2eaef9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -47,12 +48,11 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
struct cik_mqd *m;
uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
- if (!minfo || (minfo->update_flag != UPDATE_FLAG_CU_MASK) ||
- !minfo->cu_mask.ptr)
+ if (!minfo || !minfo->cu_mask.ptr)
return;
mqd_symmetrically_map_cu_mask(mm,
- minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
+ minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
m = get_mqd(mqd);
m->compute_static_thread_mgmt_se0 = se_mask[0];
@@ -73,7 +73,7 @@ static void set_priority(struct cik_mqd *m, struct queue_properties *q)
m->cp_hqd_queue_priority = q->priority;
}
-static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
+static struct kfd_mem_obj *allocate_mqd(struct kfd_node *kfd,
struct queue_properties *q)
{
struct kfd_mem_obj *mqd_mem_obj;
@@ -156,13 +156,6 @@ static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
mm->update_mqd(mm, m, q, NULL);
}
-static void free_mqd(struct mqd_manager *mm, void *mqd,
- struct kfd_mem_obj *mqd_mem_obj)
-{
- kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
-}
-
-
static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
uint32_t queue_id, struct queue_properties *p,
struct mm_struct *mms)
@@ -171,18 +164,9 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1);
- return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
+ return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
(uint32_t __user *)p->write_ptr,
- wptr_shift, wptr_mask, mms);
-}
-
-static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
- uint32_t pipe_id, uint32_t queue_id,
- struct queue_properties *p, struct mm_struct *mms)
-{
- return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
- (uint32_t __user *)p->write_ptr,
- mms);
+ wptr_shift, wptr_mask, mms, 0);
}
static void __update_mqd(struct mqd_manager *mm, void *mqd,
@@ -222,23 +206,16 @@ static void __update_mqd(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
}
-static void update_mqd(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q,
- struct mqd_update_info *minfo)
-{
- __update_mqd(mm, mqd, q, minfo, 1);
-}
-
-static uint32_t read_doorbell_id(void *mqd)
+static bool check_preemption_failed(struct mqd_manager *mm, void *mqd)
{
struct cik_mqd *m = (struct cik_mqd *)mqd;
- return m->queue_doorbell_id0;
+ return kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, 0);
}
-static void update_mqd_hawaii(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q,
- struct mqd_update_info *minfo)
+static void update_mqd(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
+ struct mqd_update_info *minfo)
{
__update_mqd(mm, mqd, q, minfo, 0);
}
@@ -271,42 +248,75 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
}
-static int destroy_mqd(struct mqd_manager *mm, void *mqd,
- enum kfd_preempt_type type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id)
+static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
{
- return mm->dev->kfd2kgd->hqd_destroy(mm->dev->kgd, mqd, type, timeout,
- pipe_id, queue_id);
+ struct cik_mqd *m;
+
+ m = get_mqd(mqd);
+
+ memcpy(mqd_dst, m, sizeof(struct cik_mqd));
}
-/*
- * preempt type here is ignored because there is only one way
- * to preempt sdma queue
- */
-static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
- enum kfd_preempt_type type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id)
+static void restore_mqd(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src, const u32 ctl_stack_size)
{
- return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
+ uint64_t addr;
+ struct cik_mqd *m;
+
+ m = (struct cik_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memcpy(m, mqd_src, sizeof(*m));
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+
+ m->cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(qp->doorbell_off);
+
+ pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+ m->cp_hqd_pq_doorbell_control);
+
+ qp->is_active = 0;
}
-static bool is_occupied(struct mqd_manager *mm, void *mqd,
- uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
+static void checkpoint_mqd_sdma(struct mqd_manager *mm,
+ void *mqd,
+ void *mqd_dst,
+ void *ctl_stack_dst)
{
+ struct cik_sdma_rlc_registers *m;
- return mm->dev->kfd2kgd->hqd_is_occupied(mm->dev->kgd, queue_address,
- pipe_id, queue_id);
+ m = get_sdma_mqd(mqd);
+ memcpy(mqd_dst, m, sizeof(struct cik_sdma_rlc_registers));
}
-static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
- uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
+static void restore_mqd_sdma(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src, const u32 ctl_stack_size)
{
- return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
+ uint64_t addr;
+ struct cik_sdma_rlc_registers *m;
+
+ m = (struct cik_sdma_rlc_registers *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memcpy(m, mqd_src, sizeof(*m));
+
+ m->sdma_rlc_doorbell =
+ qp->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT;
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+
+ qp->is_active = 0;
}
/*
@@ -370,9 +380,8 @@ static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
#endif
-
struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
- struct kfd_dev *dev)
+ struct kfd_node *dev)
{
struct mqd_manager *mqd;
@@ -389,11 +398,13 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
case KFD_MQD_TYPE_CP:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd;
- mqd->free_mqd = free_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
+ mqd->checkpoint_mqd = checkpoint_mqd;
+ mqd->restore_mqd = restore_mqd;
mqd->mqd_size = sizeof(struct cik_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
@@ -405,23 +416,25 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
mqd->free_mqd = free_mqd_hiq_sdma;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct cik_mqd);
+ mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
- mqd->read_doorbell_id = read_doorbell_id;
+ mqd->check_preemption_failed = check_preemption_failed;
break;
case KFD_MQD_TYPE_DIQ:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd_hiq;
- mqd->free_mqd = free_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct cik_mqd);
+ mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
@@ -430,11 +443,14 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
mqd->allocate_mqd = allocate_sdma_mqd;
mqd->init_mqd = init_mqd_sdma;
mqd->free_mqd = free_mqd_hiq_sdma;
- mqd->load_mqd = load_mqd_sdma;
+ mqd->load_mqd = kfd_load_mqd_sdma;
mqd->update_mqd = update_mqd_sdma;
- mqd->destroy_mqd = destroy_mqd_sdma;
- mqd->is_occupied = is_occupied_sdma;
+ mqd->destroy_mqd = kfd_destroy_mqd_sdma;
+ mqd->is_occupied = kfd_is_occupied_sdma;
+ mqd->checkpoint_mqd = checkpoint_mqd_sdma;
+ mqd->restore_mqd = restore_mqd_sdma;
mqd->mqd_size = sizeof(struct cik_sdma_rlc_registers);
+ mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
#endif
@@ -446,16 +462,3 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
return mqd;
}
-
-struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
- struct kfd_dev *dev)
-{
- struct mqd_manager *mqd;
-
- mqd = mqd_manager_init_cik(type, dev);
- if (!mqd)
- return NULL;
- if (type == KFD_MQD_TYPE_CP)
- mqd->update_mqd = update_mqd_hawaii;
- return mqd;
-}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 270160fc401b..1695dd78ede8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2018 Advanced Micro Devices, Inc.
+ * Copyright 2018-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -47,12 +48,11 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
struct v10_compute_mqd *m;
uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
- if (!minfo || (minfo->update_flag != UPDATE_FLAG_CU_MASK) ||
- !minfo->cu_mask.ptr)
+ if (!minfo || !minfo->cu_mask.ptr)
return;
mqd_symmetrically_map_cu_mask(mm,
- minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
+ minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
m = get_mqd(mqd);
m->compute_static_thread_mgmt_se0 = se_mask[0];
@@ -73,7 +73,7 @@ static void set_priority(struct v10_compute_mqd *m, struct queue_properties *q)
m->cp_hqd_queue_priority = q->priority;
}
-static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
+static struct kfd_mem_obj *allocate_mqd(struct kfd_node *kfd,
struct queue_properties *q)
{
struct kfd_mem_obj *mqd_mem_obj;
@@ -107,6 +107,8 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+ m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
m->cp_mqd_base_addr_lo = lower_32_bits(addr);
@@ -116,12 +118,17 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
+ /* Set cp_hqd_hq_scheduler0 bit 14 to 1 to have the CP set up the
+ * DISPATCH_PTR. This is required for the kfd debugger
+ */
+ m->cp_hqd_hq_scheduler0 = 1 << 14;
+
if (q->format == KFD_QUEUE_FORMAT_AQL) {
m->cp_hqd_aql_control =
1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
}
- if (mm->dev->cwsr_enabled) {
+ if (mm->dev->kfd->cwsr_enabled) {
m->cp_hqd_persistent_state |=
(1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
m->cp_hqd_ctx_save_base_addr_lo =
@@ -148,20 +155,12 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,
/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
- r = mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
+ r = mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
(uint32_t __user *)p->write_ptr,
- wptr_shift, 0, mms);
+ wptr_shift, 0, mms, 0);
return r;
}
-static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
- uint32_t pipe_id, uint32_t queue_id,
- struct queue_properties *p, struct mm_struct *mms)
-{
- return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id,
- queue_id, p->doorbell_off);
-}
-
static void update_mqd(struct mqd_manager *mm, void *mqd,
struct queue_properties *q,
struct mqd_update_info *minfo)
@@ -170,9 +169,10 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
m = get_mqd(mqd);
- m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK;
m->cp_hqd_pq_control |=
ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
+
pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
@@ -213,11 +213,11 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
/* GC 10 removed WPP_CLAMP from PQ Control */
m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT |
- 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT ;
+ 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT;
m->cp_hqd_pq_doorbell_control |=
1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT;
}
- if (mm->dev->cwsr_enabled)
+ if (mm->dev->kfd->cwsr_enabled)
m->cp_hqd_ctx_save_control = 0;
update_cu_mask(mm, mqd, minfo);
@@ -226,44 +226,21 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
}
-static uint32_t read_doorbell_id(void *mqd)
+static bool check_preemption_failed(struct mqd_manager *mm, void *mqd)
{
struct v10_compute_mqd *m = (struct v10_compute_mqd *)mqd;
- return m->queue_doorbell_id0;
-}
-
-static int destroy_mqd(struct mqd_manager *mm, void *mqd,
- enum kfd_preempt_type type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id)
-{
- return mm->dev->kfd2kgd->hqd_destroy
- (mm->dev->kgd, mqd, type, timeout,
- pipe_id, queue_id);
-}
-
-static void free_mqd(struct mqd_manager *mm, void *mqd,
- struct kfd_mem_obj *mqd_mem_obj)
-{
- kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
-}
-
-static bool is_occupied(struct mqd_manager *mm, void *mqd,
- uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
-{
- return mm->dev->kfd2kgd->hqd_is_occupied(
- mm->dev->kgd, queue_address,
- pipe_id, queue_id);
+ return kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, 0);
}
static int get_wave_state(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
void __user *ctl_stack,
u32 *ctl_stack_used_size,
u32 *save_area_used_size)
{
struct v10_compute_mqd *m;
+ struct kfd_context_save_area_header header;
m = get_mqd(mqd);
@@ -282,9 +259,54 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
* accessible to user mode
*/
+ header.wave_state.control_stack_size = *ctl_stack_used_size;
+ header.wave_state.wave_state_size = *save_area_used_size;
+
+ header.wave_state.wave_state_offset = m->cp_hqd_wg_state_offset;
+ header.wave_state.control_stack_offset = m->cp_hqd_cntl_stack_offset;
+
+ if (copy_to_user(ctl_stack, &header, sizeof(header.wave_state)))
+ return -EFAULT;
+
return 0;
}
+static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
+{
+ struct v10_compute_mqd *m;
+
+ m = get_mqd(mqd);
+
+ memcpy(mqd_dst, m, sizeof(struct v10_compute_mqd));
+}
+
+static void restore_mqd(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src, const u32 ctl_stack_size)
+{
+ uint64_t addr;
+ struct v10_compute_mqd *m;
+
+ m = (struct v10_compute_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memcpy(m, mqd_src, sizeof(*m));
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+
+ m->cp_hqd_pq_doorbell_control =
+ qp->doorbell_off <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+ m->cp_hqd_pq_doorbell_control);
+
+ qp->is_active = 0;
+}
+
static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -299,6 +321,26 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
}
+static int destroy_hiq_mqd(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type, unsigned int timeout,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ int err;
+ struct v10_compute_mqd *m;
+ u32 doorbell_off;
+
+ m = get_mqd(mqd);
+
+ doorbell_off = m->cp_hqd_pq_doorbell_control >>
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+
+ err = amdgpu_amdkfd_unmap_hiq(mm->dev->adev, doorbell_off, 0);
+ if (err)
+ pr_debug("Destroy HIQ MQD failed: %d\n", err);
+
+ return err;
+}
+
static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -316,15 +358,6 @@ static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
mm->update_mqd(mm, m, q, NULL);
}
-static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
- uint32_t pipe_id, uint32_t queue_id,
- struct queue_properties *p, struct mm_struct *mms)
-{
- return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
- (uint32_t __user *)p->write_ptr,
- mms);
-}
-
#define SDMA_RLC_DUMMY_DEFAULT 0xf
static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
@@ -354,23 +387,41 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
}
-/*
- * * preempt type here is ignored because there is only one way
- * * to preempt sdma queue
- */
-static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
- enum kfd_preempt_type type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id)
+static void checkpoint_mqd_sdma(struct mqd_manager *mm,
+ void *mqd,
+ void *mqd_dst,
+ void *ctl_stack_dst)
{
- return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
+ struct v10_sdma_mqd *m;
+
+ m = get_sdma_mqd(mqd);
+
+ memcpy(mqd_dst, m, sizeof(struct v10_sdma_mqd));
}
-static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
- uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
+static void restore_mqd_sdma(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src,
+ const u32 ctl_stack_size)
{
- return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
+ uint64_t addr;
+ struct v10_sdma_mqd *m;
+
+ m = (struct v10_sdma_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memcpy(m, mqd_src, sizeof(*m));
+
+ m->sdmax_rlcx_doorbell_offset =
+ qp->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+
+ qp->is_active = 0;
}
#if defined(CONFIG_DEBUG_FS)
@@ -392,7 +443,7 @@ static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
#endif
struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
- struct kfd_dev *dev)
+ struct kfd_node *dev)
{
struct mqd_manager *mqd;
@@ -410,13 +461,16 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
pr_debug("%s@%i\n", __func__, __LINE__);
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd;
- mqd->free_mqd = free_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v10_compute_mqd);
mqd->get_wave_state = get_wave_state;
+ mqd->checkpoint_mqd = checkpoint_mqd;
+ mqd->restore_mqd = restore_mqd;
+ mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
@@ -427,25 +481,26 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
mqd->allocate_mqd = allocate_hiq_mqd;
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd_hiq_sdma;
- mqd->load_mqd = hiq_load_mqd_kiq;
+ mqd->load_mqd = kfd_hiq_load_mqd_kiq;
mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = destroy_hiq_mqd;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v10_compute_mqd);
+ mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
- mqd->read_doorbell_id = read_doorbell_id;
+ mqd->check_preemption_failed = check_preemption_failed;
pr_debug("%s@%i\n", __func__, __LINE__);
break;
case KFD_MQD_TYPE_DIQ:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd_hiq;
- mqd->free_mqd = free_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v10_compute_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
@@ -456,11 +511,14 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
mqd->allocate_mqd = allocate_sdma_mqd;
mqd->init_mqd = init_mqd_sdma;
mqd->free_mqd = free_mqd_hiq_sdma;
- mqd->load_mqd = load_mqd_sdma;
+ mqd->load_mqd = kfd_load_mqd_sdma;
mqd->update_mqd = update_mqd_sdma;
- mqd->destroy_mqd = destroy_mqd_sdma;
- mqd->is_occupied = is_occupied_sdma;
+ mqd->destroy_mqd = kfd_destroy_mqd_sdma;
+ mqd->is_occupied = kfd_is_occupied_sdma;
+ mqd->checkpoint_mqd = checkpoint_mqd_sdma;
+ mqd->restore_mqd = restore_mqd_sdma;
mqd->mqd_size = sizeof(struct v10_sdma_mqd);
+ mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
new file mode 100644
index 000000000000..3c0ae28c5923
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
@@ -0,0 +1,569 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include "kfd_priv.h"
+#include "kfd_mqd_manager.h"
+#include "v11_structs.h"
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "amdgpu_amdkfd.h"
+
+static inline struct v11_compute_mqd *get_mqd(void *mqd)
+{
+ return (struct v11_compute_mqd *)mqd;
+}
+
+static inline struct v11_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct v11_sdma_mqd *)mqd;
+}
+
+static void update_cu_mask(struct mqd_manager *mm, void *mqd,
+ struct mqd_update_info *minfo)
+{
+ struct v11_compute_mqd *m;
+ uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
+ bool has_wa_flag = minfo && (minfo->update_flag & (UPDATE_FLAG_DBG_WA_ENABLE |
+ UPDATE_FLAG_DBG_WA_DISABLE));
+
+ if (!minfo || !(has_wa_flag || minfo->cu_mask.ptr))
+ return;
+
+ m = get_mqd(mqd);
+
+ if (has_wa_flag) {
+ uint32_t wa_mask =
+ (minfo->update_flag & UPDATE_FLAG_DBG_WA_ENABLE) ? 0xffff : 0xffffffff;
+
+ m->compute_static_thread_mgmt_se0 = wa_mask;
+ m->compute_static_thread_mgmt_se1 = wa_mask;
+ m->compute_static_thread_mgmt_se2 = wa_mask;
+ m->compute_static_thread_mgmt_se3 = wa_mask;
+ m->compute_static_thread_mgmt_se4 = wa_mask;
+ m->compute_static_thread_mgmt_se5 = wa_mask;
+ m->compute_static_thread_mgmt_se6 = wa_mask;
+ m->compute_static_thread_mgmt_se7 = wa_mask;
+
+ return;
+ }
+
+ mqd_symmetrically_map_cu_mask(mm,
+ minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
+
+ m->compute_static_thread_mgmt_se0 = se_mask[0];
+ m->compute_static_thread_mgmt_se1 = se_mask[1];
+ m->compute_static_thread_mgmt_se2 = se_mask[2];
+ m->compute_static_thread_mgmt_se3 = se_mask[3];
+ m->compute_static_thread_mgmt_se4 = se_mask[4];
+ m->compute_static_thread_mgmt_se5 = se_mask[5];
+ m->compute_static_thread_mgmt_se6 = se_mask[6];
+ m->compute_static_thread_mgmt_se7 = se_mask[7];
+
+ pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
+ m->compute_static_thread_mgmt_se0,
+ m->compute_static_thread_mgmt_se1,
+ m->compute_static_thread_mgmt_se2,
+ m->compute_static_thread_mgmt_se3,
+ m->compute_static_thread_mgmt_se4,
+ m->compute_static_thread_mgmt_se5,
+ m->compute_static_thread_mgmt_se6,
+ m->compute_static_thread_mgmt_se7);
+}
+
+static void set_priority(struct v11_compute_mqd *m, struct queue_properties *q)
+{
+ m->cp_hqd_pipe_priority = pipe_priority_map[q->priority];
+ m->cp_hqd_queue_priority = q->priority;
+}
+
+static struct kfd_mem_obj *allocate_mqd(struct kfd_node *node,
+ struct queue_properties *q)
+{
+ struct kfd_mem_obj *mqd_mem_obj;
+ int size;
+
+ /*
+ * MES write to areas beyond MQD size. So allocate
+ * 1 PAGE_SIZE memory for MQD is MES is enabled.
+ */
+ if (node->kfd->shared_resources.enable_mes)
+ size = PAGE_SIZE;
+ else
+ size = sizeof(struct v11_compute_mqd);
+
+ if (kfd_gtt_sa_allocate(node, size, &mqd_mem_obj))
+ return NULL;
+
+ return mqd_mem_obj;
+}
+
+static void init_mqd(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ uint64_t addr;
+ struct v11_compute_mqd *m;
+ int size;
+ uint32_t wa_mask = q->is_dbg_wa ? 0xffff : 0xffffffff;
+
+ m = (struct v11_compute_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ if (mm->dev->kfd->shared_resources.enable_mes)
+ size = PAGE_SIZE;
+ else
+ size = sizeof(struct v11_compute_mqd);
+
+ memset(m, 0, size);
+
+ m->header = 0xC0310800;
+ m->compute_pipelinestat_enable = 1;
+
+ m->compute_static_thread_mgmt_se0 = wa_mask;
+ m->compute_static_thread_mgmt_se1 = wa_mask;
+ m->compute_static_thread_mgmt_se2 = wa_mask;
+ m->compute_static_thread_mgmt_se3 = wa_mask;
+ m->compute_static_thread_mgmt_se4 = wa_mask;
+ m->compute_static_thread_mgmt_se5 = wa_mask;
+ m->compute_static_thread_mgmt_se6 = wa_mask;
+ m->compute_static_thread_mgmt_se7 = wa_mask;
+
+ m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
+ 0x55 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+
+ m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
+ m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
+
+ m->cp_mqd_base_addr_lo = lower_32_bits(addr);
+ m->cp_mqd_base_addr_hi = upper_32_bits(addr);
+
+ m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
+ 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
+ 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
+
+ /* Set cp_hqd_hq_scheduler0 bit 14 to 1 to have the CP set up the
+ * DISPATCH_PTR. This is required for the kfd debugger
+ */
+ m->cp_hqd_hq_status0 = 1 << 14;
+
+ /*
+ * GFX11 RS64 CPFW version >= 509 supports PCIe atomics support
+ * acknowledgment.
+ */
+ if (amdgpu_amdkfd_have_atomics_support(mm->dev->adev))
+ m->cp_hqd_hq_status0 |= 1 << 29;
+
+ if (q->format == KFD_QUEUE_FORMAT_AQL) {
+ m->cp_hqd_aql_control =
+ 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
+ }
+
+ if (mm->dev->kfd->cwsr_enabled) {
+ m->cp_hqd_persistent_state |=
+ (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
+ m->cp_hqd_ctx_save_base_addr_lo =
+ lower_32_bits(q->ctx_save_restore_area_address);
+ m->cp_hqd_ctx_save_base_addr_hi =
+ upper_32_bits(q->ctx_save_restore_area_address);
+ m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
+ m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
+ m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
+ m->cp_hqd_wg_state_offset = q->ctl_stack_size;
+ }
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+ mm->update_mqd(mm, m, q, NULL);
+}
+
+static int load_mqd(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+ int r = 0;
+ /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
+ uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
+
+ r = mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
+ (uint32_t __user *)p->write_ptr,
+ wptr_shift, 0, mms, 0);
+ return r;
+}
+
+static void update_mqd(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
+ struct mqd_update_info *minfo)
+{
+ struct v11_compute_mqd *m;
+
+ m = get_mqd(mqd);
+
+ m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK;
+ m->cp_hqd_pq_control |=
+ ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
+ pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
+
+ m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
+ m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
+
+ m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+ m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+ m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
+ m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
+
+ m->cp_hqd_pq_doorbell_control =
+ q->doorbell_off <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+ m->cp_hqd_pq_doorbell_control);
+
+ m->cp_hqd_ib_control = 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT;
+
+ /*
+ * HW does not clamp this field correctly. Maximum EOP queue size
+ * is constrained by per-SE EOP done signal count, which is 8-bit.
+ * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
+ * more than (EOP entry count - 1) so a queue size of 0x800 dwords
+ * is safe, giving a maximum field value of 0xA.
+ */
+ m->cp_hqd_eop_control = min(0xA,
+ ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1);
+ m->cp_hqd_eop_base_addr_lo =
+ lower_32_bits(q->eop_ring_buffer_address >> 8);
+ m->cp_hqd_eop_base_addr_hi =
+ upper_32_bits(q->eop_ring_buffer_address >> 8);
+
+ m->cp_hqd_iq_timer = 0;
+
+ m->cp_hqd_vmid = q->vmid;
+
+ if (q->format == KFD_QUEUE_FORMAT_AQL) {
+ /* GC 10 removed WPP_CLAMP from PQ Control */
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
+ 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT |
+ 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT ;
+ m->cp_hqd_pq_doorbell_control |=
+ 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT;
+ }
+ if (mm->dev->kfd->cwsr_enabled)
+ m->cp_hqd_ctx_save_control = 0;
+
+ update_cu_mask(mm, mqd, minfo);
+ set_priority(m, q);
+
+ q->is_active = QUEUE_IS_ACTIVE(*q);
+}
+
+static bool check_preemption_failed(struct mqd_manager *mm, void *mqd)
+{
+ struct v11_compute_mqd *m = (struct v11_compute_mqd *)mqd;
+
+ return kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, 0);
+}
+
+static int get_wave_state(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
+ void __user *ctl_stack,
+ u32 *ctl_stack_used_size,
+ u32 *save_area_used_size)
+{
+ struct v11_compute_mqd *m;
+ struct kfd_context_save_area_header header;
+
+ m = get_mqd(mqd);
+
+ /* Control stack is written backwards, while workgroup context data
+ * is written forwards. Both starts from m->cp_hqd_cntl_stack_size.
+ * Current position is at m->cp_hqd_cntl_stack_offset and
+ * m->cp_hqd_wg_state_offset, respectively.
+ */
+ *ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
+ m->cp_hqd_cntl_stack_offset;
+ *save_area_used_size = m->cp_hqd_wg_state_offset -
+ m->cp_hqd_cntl_stack_size;
+
+ /* Control stack is not copied to user mode for GFXv11 because
+ * it's part of the context save area that is already
+ * accessible to user mode
+ */
+ header.wave_state.control_stack_size = *ctl_stack_used_size;
+ header.wave_state.wave_state_size = *save_area_used_size;
+
+ header.wave_state.wave_state_offset = m->cp_hqd_wg_state_offset;
+ header.wave_state.control_stack_offset = m->cp_hqd_cntl_stack_offset;
+
+ if (copy_to_user(ctl_stack, &header, sizeof(header.wave_state)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
+{
+ struct v11_compute_mqd *m;
+
+ m = get_mqd(mqd);
+
+ memcpy(mqd_dst, m, sizeof(struct v11_compute_mqd));
+}
+
+static void restore_mqd(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src, const u32 ctl_stack_size)
+{
+ uint64_t addr;
+ struct v11_compute_mqd *m;
+
+ m = (struct v11_compute_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memcpy(m, mqd_src, sizeof(*m));
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+
+ m->cp_hqd_pq_doorbell_control =
+ qp->doorbell_off <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+ m->cp_hqd_pq_doorbell_control);
+
+ qp->is_active = 0;
+}
+
+
+static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ struct v11_compute_mqd *m;
+
+ init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
+
+ m = get_mqd(*mqd);
+
+ m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
+ 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
+}
+
+static int destroy_hiq_mqd(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type, unsigned int timeout,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ int err;
+ struct v11_compute_mqd *m;
+ u32 doorbell_off;
+
+ m = get_mqd(mqd);
+
+ doorbell_off = m->cp_hqd_pq_doorbell_control >>
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+
+ err = amdgpu_amdkfd_unmap_hiq(mm->dev->adev, doorbell_off, 0);
+ if (err)
+ pr_debug("Destroy HIQ MQD failed: %d\n", err);
+
+ return err;
+}
+
+static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ struct v11_sdma_mqd *m;
+ int size;
+
+ m = (struct v11_sdma_mqd *) mqd_mem_obj->cpu_ptr;
+
+ if (mm->dev->kfd->shared_resources.enable_mes)
+ size = PAGE_SIZE;
+ else
+ size = sizeof(struct v11_sdma_mqd);
+
+ memset(m, 0, size);
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = mqd_mem_obj->gpu_addr;
+
+ mm->update_mqd(mm, m, q, NULL);
+}
+
+#define SDMA_RLC_DUMMY_DEFAULT 0xf
+
+static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
+ struct mqd_update_info *minfo)
+{
+ struct v11_sdma_mqd *m;
+
+ m = get_sdma_mqd(mqd);
+ m->sdmax_rlcx_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1)
+ << SDMA0_QUEUE0_RB_CNTL__RB_SIZE__SHIFT |
+ q->vmid << SDMA0_QUEUE0_RB_CNTL__RB_VMID__SHIFT |
+ 1 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+ 6 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
+ 1 << SDMA0_QUEUE0_RB_CNTL__F32_WPTR_POLL_ENABLE__SHIFT;
+
+ m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
+ m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
+ m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+ m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+ m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
+ m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
+ m->sdmax_rlcx_doorbell_offset =
+ q->doorbell_off << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ m->sdmax_rlcx_sched_cntl = (amdgpu_sdma_phase_quantum
+ << SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM__SHIFT)
+ & SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM_MASK;
+
+ m->sdma_engine_id = q->sdma_engine_id;
+ m->sdma_queue_id = q->sdma_queue_id;
+ m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
+
+ q->is_active = QUEUE_IS_ACTIVE(*q);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int debugfs_show_mqd(struct seq_file *m, void *data)
+{
+ seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
+ data, sizeof(struct v11_compute_mqd), false);
+ return 0;
+}
+
+static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
+{
+ seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
+ data, sizeof(struct v11_sdma_mqd), false);
+ return 0;
+}
+
+#endif
+
+struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
+ struct kfd_node *dev)
+{
+ struct mqd_manager *mqd;
+
+ if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
+ return NULL;
+
+ mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
+ if (!mqd)
+ return NULL;
+
+ mqd->dev = dev;
+
+ switch (type) {
+ case KFD_MQD_TYPE_CP:
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ mqd->allocate_mqd = allocate_mqd;
+ mqd->init_mqd = init_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
+ mqd->mqd_size = sizeof(struct v11_compute_mqd);
+ mqd->get_wave_state = get_wave_state;
+ mqd->mqd_stride = kfd_mqd_stride;
+ mqd->checkpoint_mqd = checkpoint_mqd;
+ mqd->restore_mqd = restore_mqd;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ break;
+ case KFD_MQD_TYPE_HIQ:
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ mqd->allocate_mqd = allocate_hiq_mqd;
+ mqd->init_mqd = init_mqd_hiq;
+ mqd->free_mqd = free_mqd_hiq_sdma;
+ mqd->load_mqd = kfd_hiq_load_mqd_kiq;
+ mqd->update_mqd = update_mqd;
+ mqd->destroy_mqd = destroy_hiq_mqd;
+ mqd->is_occupied = kfd_is_occupied_cp;
+ mqd->mqd_size = sizeof(struct v11_compute_mqd);
+ mqd->mqd_stride = kfd_mqd_stride;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+ mqd->check_preemption_failed = check_preemption_failed;
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ break;
+ case KFD_MQD_TYPE_DIQ:
+ mqd->allocate_mqd = allocate_mqd;
+ mqd->init_mqd = init_mqd_hiq;
+ mqd->free_mqd = kfd_free_mqd_cp;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
+ mqd->mqd_size = sizeof(struct v11_compute_mqd);
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+ break;
+ case KFD_MQD_TYPE_SDMA:
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ mqd->allocate_mqd = allocate_sdma_mqd;
+ mqd->init_mqd = init_mqd_sdma;
+ mqd->free_mqd = free_mqd_hiq_sdma;
+ mqd->load_mqd = kfd_load_mqd_sdma;
+ mqd->update_mqd = update_mqd_sdma;
+ mqd->destroy_mqd = kfd_destroy_mqd_sdma;
+ mqd->is_occupied = kfd_is_occupied_sdma;
+ mqd->checkpoint_mqd = checkpoint_mqd;
+ mqd->restore_mqd = restore_mqd;
+ mqd->mqd_size = sizeof(struct v11_sdma_mqd);
+ mqd->mqd_stride = kfd_mqd_stride;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
+#endif
+ /*
+ * To allocate SDMA MQDs by generic functions
+ * when MES is enabled.
+ */
+ if (dev->kfd->shared_resources.enable_mes) {
+ mqd->allocate_mqd = allocate_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
+ }
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ break;
+ default:
+ kfree(mqd);
+ return NULL;
+ }
+
+ return mqd;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c
new file mode 100644
index 000000000000..565858b9044d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c
@@ -0,0 +1,459 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include "kfd_priv.h"
+#include "kfd_mqd_manager.h"
+#include "v12_structs.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "amdgpu_amdkfd.h"
+
+static inline struct v12_compute_mqd *get_mqd(void *mqd)
+{
+ return (struct v12_compute_mqd *)mqd;
+}
+
+static inline struct v12_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct v12_sdma_mqd *)mqd;
+}
+
+static void update_cu_mask(struct mqd_manager *mm, void *mqd,
+ struct mqd_update_info *minfo)
+{
+ struct v12_compute_mqd *m;
+ uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
+
+ if (!minfo || !minfo->cu_mask.ptr)
+ return;
+
+ mqd_symmetrically_map_cu_mask(mm,
+ minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
+
+ m = get_mqd(mqd);
+ m->compute_static_thread_mgmt_se0 = se_mask[0];
+ m->compute_static_thread_mgmt_se1 = se_mask[1];
+ m->compute_static_thread_mgmt_se2 = se_mask[2];
+ m->compute_static_thread_mgmt_se3 = se_mask[3];
+ m->compute_static_thread_mgmt_se4 = se_mask[4];
+ m->compute_static_thread_mgmt_se5 = se_mask[5];
+ m->compute_static_thread_mgmt_se6 = se_mask[6];
+ m->compute_static_thread_mgmt_se7 = se_mask[7];
+
+ pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
+ m->compute_static_thread_mgmt_se0,
+ m->compute_static_thread_mgmt_se1,
+ m->compute_static_thread_mgmt_se2,
+ m->compute_static_thread_mgmt_se3,
+ m->compute_static_thread_mgmt_se4,
+ m->compute_static_thread_mgmt_se5,
+ m->compute_static_thread_mgmt_se6,
+ m->compute_static_thread_mgmt_se7);
+}
+
+static void set_priority(struct v12_compute_mqd *m, struct queue_properties *q)
+{
+ m->cp_hqd_pipe_priority = pipe_priority_map[q->priority];
+ m->cp_hqd_queue_priority = q->priority;
+}
+
+static struct kfd_mem_obj *allocate_mqd(struct kfd_node *node,
+ struct queue_properties *q)
+{
+ struct kfd_mem_obj *mqd_mem_obj;
+
+ /*
+ * Allocate one PAGE_SIZE memory for MQD as MES writes to areas beyond
+ * struct MQD size.
+ */
+ if (kfd_gtt_sa_allocate(node, PAGE_SIZE, &mqd_mem_obj))
+ return NULL;
+
+ return mqd_mem_obj;
+}
+
+static void init_mqd(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ uint64_t addr;
+ struct v12_compute_mqd *m;
+
+ m = (struct v12_compute_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memset(m, 0, PAGE_SIZE);
+
+ m->header = 0xC0310800;
+ m->compute_pipelinestat_enable = 1;
+ m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se4 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se5 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se6 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se7 = 0xFFFFFFFF;
+
+ m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
+ 0x55 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+
+ m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
+ m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
+
+ m->cp_mqd_base_addr_lo = lower_32_bits(addr);
+ m->cp_mqd_base_addr_hi = upper_32_bits(addr);
+
+ m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
+ 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
+ 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
+
+ /* Set cp_hqd_hq_status0.c_queue_debug_en to 1 to have the CP set up the
+ * DISPATCH_PTR. This is required for the kfd debugger
+ */
+ m->cp_hqd_hq_status0 = 1 << 14;
+
+ if (amdgpu_amdkfd_have_atomics_support(mm->dev->adev))
+ m->cp_hqd_hq_status0 |= 1 << 29;
+
+ if (q->format == KFD_QUEUE_FORMAT_AQL) {
+ m->cp_hqd_aql_control =
+ 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
+ }
+
+ if (mm->dev->kfd->cwsr_enabled) {
+ m->cp_hqd_persistent_state |=
+ (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
+ m->cp_hqd_ctx_save_base_addr_lo =
+ lower_32_bits(q->ctx_save_restore_area_address);
+ m->cp_hqd_ctx_save_base_addr_hi =
+ upper_32_bits(q->ctx_save_restore_area_address);
+ m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
+ m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
+ m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
+ m->cp_hqd_wg_state_offset = q->ctl_stack_size;
+ }
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+ mm->update_mqd(mm, m, q, NULL);
+}
+
+static int load_mqd(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+ int r = 0;
+ /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
+ uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
+
+ r = mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
+ (uint32_t __user *)p->write_ptr,
+ wptr_shift, 0, mms, 0);
+ return r;
+}
+
+static void update_mqd(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
+ struct mqd_update_info *minfo)
+{
+ struct v12_compute_mqd *m;
+
+ m = get_mqd(mqd);
+
+ m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK;
+ m->cp_hqd_pq_control |=
+ ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
+ pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
+
+ m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
+ m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
+
+ m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+ m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+ m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
+ m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
+
+ m->cp_hqd_pq_doorbell_control =
+ q->doorbell_off <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+ m->cp_hqd_pq_doorbell_control);
+
+ m->cp_hqd_ib_control = 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT;
+
+ /*
+ * HW does not clamp this field correctly. Maximum EOP queue size
+ * is constrained by per-SE EOP done signal count, which is 8-bit.
+ * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
+ * more than (EOP entry count - 1) so a queue size of 0x800 dwords
+ * is safe, giving a maximum field value of 0xA.
+ */
+ m->cp_hqd_eop_control = min(0xA,
+ ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1);
+ m->cp_hqd_eop_base_addr_lo =
+ lower_32_bits(q->eop_ring_buffer_address >> 8);
+ m->cp_hqd_eop_base_addr_hi =
+ upper_32_bits(q->eop_ring_buffer_address >> 8);
+
+ m->cp_hqd_iq_timer = 0;
+
+ m->cp_hqd_vmid = q->vmid;
+
+ if (q->format == KFD_QUEUE_FORMAT_AQL) {
+ /* GC 10 removed WPP_CLAMP from PQ Control */
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
+ 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT |
+ 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT;
+ m->cp_hqd_pq_doorbell_control |=
+ 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT;
+ }
+ if (mm->dev->kfd->cwsr_enabled)
+ m->cp_hqd_ctx_save_control = 0;
+
+ update_cu_mask(mm, mqd, minfo);
+ set_priority(m, q);
+
+ q->is_active = QUEUE_IS_ACTIVE(*q);
+}
+
+static bool check_preemption_failed(struct mqd_manager *mm, void *mqd)
+{
+ struct v12_compute_mqd *m = (struct v12_compute_mqd *)mqd;
+
+ return kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, 0);
+}
+
+static int get_wave_state(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
+ void __user *ctl_stack,
+ u32 *ctl_stack_used_size,
+ u32 *save_area_used_size)
+{
+ struct v12_compute_mqd *m;
+ struct mqd_user_context_save_area_header header;
+
+ m = get_mqd(mqd);
+
+ /* Control stack is written backwards, while workgroup context data
+ * is written forwards. Both starts from m->cp_hqd_cntl_stack_size.
+ * Current position is at m->cp_hqd_cntl_stack_offset and
+ * m->cp_hqd_wg_state_offset, respectively.
+ */
+ *ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
+ m->cp_hqd_cntl_stack_offset;
+ *save_area_used_size = m->cp_hqd_wg_state_offset -
+ m->cp_hqd_cntl_stack_size;
+
+ /* Control stack is not copied to user mode for GFXv12 because
+ * it's part of the context save area that is already
+ * accessible to user mode
+ */
+ header.control_stack_size = *ctl_stack_used_size;
+ header.wave_state_size = *save_area_used_size;
+
+ header.wave_state_offset = m->cp_hqd_wg_state_offset;
+ header.control_stack_offset = m->cp_hqd_cntl_stack_offset;
+
+ if (copy_to_user(ctl_stack, &header, sizeof(header)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ struct v12_compute_mqd *m;
+
+ init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
+
+ m = get_mqd(*mqd);
+
+ m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
+ 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
+}
+
+static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ struct v12_sdma_mqd *m;
+
+ m = (struct v12_sdma_mqd *) mqd_mem_obj->cpu_ptr;
+
+ memset(m, 0, sizeof(struct v12_sdma_mqd));
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = mqd_mem_obj->gpu_addr;
+
+ mm->update_mqd(mm, m, q, NULL);
+}
+
+#define SDMA_RLC_DUMMY_DEFAULT 0xf
+
+static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
+ struct mqd_update_info *minfo)
+{
+ struct v12_sdma_mqd *m;
+
+ m = get_sdma_mqd(mqd);
+ m->sdmax_rlcx_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1)
+ << SDMA0_QUEUE0_RB_CNTL__RB_SIZE__SHIFT |
+ q->vmid << SDMA0_QUEUE0_RB_CNTL__RB_VMID__SHIFT |
+ 1 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+ 6 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
+ 1 << SDMA0_QUEUE0_RB_CNTL__MCU_WPTR_POLL_ENABLE__SHIFT;
+
+ m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
+ m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
+ m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+ m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+ m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
+ m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
+ m->sdmax_rlcx_doorbell_offset =
+ q->doorbell_off << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ m->sdmax_rlcx_sched_cntl = (amdgpu_sdma_phase_quantum
+ << SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM__SHIFT)
+ & SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM_MASK;
+
+ m->sdma_engine_id = q->sdma_engine_id;
+ m->sdma_queue_id = q->sdma_queue_id;
+
+ m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
+
+ q->is_active = QUEUE_IS_ACTIVE(*q);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int debugfs_show_mqd(struct seq_file *m, void *data)
+{
+ seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
+ data, sizeof(struct v12_compute_mqd), false);
+ return 0;
+}
+
+static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
+{
+ seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
+ data, sizeof(struct v12_sdma_mqd), false);
+ return 0;
+}
+
+#endif
+
+struct mqd_manager *mqd_manager_init_v12(enum KFD_MQD_TYPE type,
+ struct kfd_node *dev)
+{
+ struct mqd_manager *mqd;
+
+ if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
+ return NULL;
+
+ mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
+ if (!mqd)
+ return NULL;
+
+ mqd->dev = dev;
+
+ switch (type) {
+ case KFD_MQD_TYPE_CP:
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ mqd->allocate_mqd = allocate_mqd;
+ mqd->init_mqd = init_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
+ mqd->mqd_size = sizeof(struct v12_compute_mqd);
+ mqd->get_wave_state = get_wave_state;
+ mqd->mqd_stride = kfd_mqd_stride;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ break;
+ case KFD_MQD_TYPE_HIQ:
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ mqd->allocate_mqd = allocate_hiq_mqd;
+ mqd->init_mqd = init_mqd_hiq;
+ mqd->free_mqd = free_mqd_hiq_sdma;
+ mqd->load_mqd = kfd_hiq_load_mqd_kiq;
+ mqd->update_mqd = update_mqd;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
+ mqd->mqd_size = sizeof(struct v12_compute_mqd);
+ mqd->mqd_stride = kfd_mqd_stride;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+ mqd->check_preemption_failed = check_preemption_failed;
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ break;
+ case KFD_MQD_TYPE_DIQ:
+ mqd->allocate_mqd = allocate_mqd;
+ mqd->init_mqd = init_mqd_hiq;
+ mqd->free_mqd = kfd_free_mqd_cp;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
+ mqd->mqd_size = sizeof(struct v12_compute_mqd);
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+ break;
+ case KFD_MQD_TYPE_SDMA:
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ mqd->allocate_mqd = allocate_mqd;
+ mqd->init_mqd = init_mqd_sdma;
+ mqd->free_mqd = kfd_free_mqd_cp;
+ mqd->load_mqd = kfd_load_mqd_sdma;
+ mqd->update_mqd = update_mqd_sdma;
+ mqd->destroy_mqd = kfd_destroy_mqd_sdma;
+ mqd->is_occupied = kfd_is_occupied_sdma;
+ mqd->mqd_size = sizeof(struct v12_sdma_mqd);
+ mqd->mqd_stride = kfd_mqd_stride;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
+#endif
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ break;
+ default:
+ kfree(mqd);
+ return NULL;
+ }
+
+ return mqd;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 4e5932f54b5a..f2dee320fada 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ * Copyright 2016-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -31,6 +32,22 @@
#include "gc/gc_9_0_sh_mask.h"
#include "sdma0/sdma0_4_0_sh_mask.h"
#include "amdgpu_amdkfd.h"
+#include "kfd_device_queue_manager.h"
+
+static void update_mqd(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
+ struct mqd_update_info *minfo);
+
+static uint64_t mqd_stride_v9(struct mqd_manager *mm,
+ struct queue_properties *q)
+{
+ if (mm->dev->kfd->cwsr_enabled &&
+ q->type == KFD_QUEUE_TYPE_COMPUTE)
+ return ALIGN(q->ctl_stack_size, PAGE_SIZE) +
+ ALIGN(sizeof(struct v9_mqd), PAGE_SIZE);
+
+ return mm->mqd_size;
+}
static inline struct v9_mqd *get_mqd(void *mqd)
{
@@ -43,37 +60,47 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
}
static void update_cu_mask(struct mqd_manager *mm, void *mqd,
- struct mqd_update_info *minfo)
+ struct mqd_update_info *minfo, uint32_t inst)
{
struct v9_mqd *m;
uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
- if (!minfo || (minfo->update_flag != UPDATE_FLAG_CU_MASK) ||
- !minfo->cu_mask.ptr)
+ if (!minfo || !minfo->cu_mask.ptr)
return;
mqd_symmetrically_map_cu_mask(mm,
- minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
+ minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, inst);
m = get_mqd(mqd);
+
m->compute_static_thread_mgmt_se0 = se_mask[0];
m->compute_static_thread_mgmt_se1 = se_mask[1];
m->compute_static_thread_mgmt_se2 = se_mask[2];
m->compute_static_thread_mgmt_se3 = se_mask[3];
- m->compute_static_thread_mgmt_se4 = se_mask[4];
- m->compute_static_thread_mgmt_se5 = se_mask[5];
- m->compute_static_thread_mgmt_se6 = se_mask[6];
- m->compute_static_thread_mgmt_se7 = se_mask[7];
-
- pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
- m->compute_static_thread_mgmt_se0,
- m->compute_static_thread_mgmt_se1,
- m->compute_static_thread_mgmt_se2,
- m->compute_static_thread_mgmt_se3,
- m->compute_static_thread_mgmt_se4,
- m->compute_static_thread_mgmt_se5,
- m->compute_static_thread_mgmt_se6,
- m->compute_static_thread_mgmt_se7);
+ if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) &&
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0)) {
+ m->compute_static_thread_mgmt_se4 = se_mask[4];
+ m->compute_static_thread_mgmt_se5 = se_mask[5];
+ m->compute_static_thread_mgmt_se6 = se_mask[6];
+ m->compute_static_thread_mgmt_se7 = se_mask[7];
+
+ pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
+ m->compute_static_thread_mgmt_se0,
+ m->compute_static_thread_mgmt_se1,
+ m->compute_static_thread_mgmt_se2,
+ m->compute_static_thread_mgmt_se3,
+ m->compute_static_thread_mgmt_se4,
+ m->compute_static_thread_mgmt_se5,
+ m->compute_static_thread_mgmt_se6,
+ m->compute_static_thread_mgmt_se7);
+ } else {
+ pr_debug("inst: %u, update cu mask to %#x %#x %#x %#x\n",
+ inst, m->compute_static_thread_mgmt_se0,
+ m->compute_static_thread_mgmt_se1,
+ m->compute_static_thread_mgmt_se2,
+ m->compute_static_thread_mgmt_se3);
+ }
}
static void set_priority(struct v9_mqd *m, struct queue_properties *q)
@@ -82,7 +109,7 @@ static void set_priority(struct v9_mqd *m, struct queue_properties *q)
m->cp_hqd_queue_priority = q->priority;
}
-static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
+static struct kfd_mem_obj *allocate_mqd(struct kfd_node *node,
struct queue_properties *q)
{
int retval;
@@ -104,28 +131,30 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
* pass a special bo flag AMDGPU_GEM_CREATE_CP_MQD_GFX9 to instruct
* amdgpu memory functions to do so.
*/
- if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
+ if (node->kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
if (!mqd_mem_obj)
return NULL;
- retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
- ALIGN(q->ctl_stack_size, PAGE_SIZE) +
- ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),
+ retval = amdgpu_amdkfd_alloc_gtt_mem(node->adev,
+ (ALIGN(q->ctl_stack_size, PAGE_SIZE) +
+ ALIGN(sizeof(struct v9_mqd), PAGE_SIZE)) *
+ NUM_XCC(node->xcc_mask),
&(mqd_mem_obj->gtt_mem),
&(mqd_mem_obj->gpu_addr),
(void *)&(mqd_mem_obj->cpu_ptr), true);
+
+ if (retval) {
+ kfree(mqd_mem_obj);
+ return NULL;
+ }
} else {
- retval = kfd_gtt_sa_allocate(kfd, sizeof(struct v9_mqd),
+ retval = kfd_gtt_sa_allocate(node, sizeof(struct v9_mqd),
&mqd_mem_obj);
- }
-
- if (retval) {
- kfree(mqd_mem_obj);
- return NULL;
+ if (retval)
+ return NULL;
}
return mqd_mem_obj;
-
}
static void init_mqd(struct mqd_manager *mm, void **mqd,
@@ -154,6 +183,9 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+ m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
+
m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
m->cp_mqd_base_addr_lo = lower_32_bits(addr);
@@ -163,17 +195,21 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
- if (q->format == KFD_QUEUE_FORMAT_AQL) {
+ /* Set cp_hqd_hq_scheduler0 bit 14 to 1 to have the CP set up the
+ * DISPATCH_PTR. This is required for the kfd debugger
+ */
+ m->cp_hqd_hq_status0 = 1 << 14;
+
+ if (q->format == KFD_QUEUE_FORMAT_AQL)
m->cp_hqd_aql_control =
1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
- }
if (q->tba_addr) {
m->compute_pgm_rsrc2 |=
(1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT);
}
- if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) {
+ if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address) {
m->cp_hqd_persistent_state |=
(1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
m->cp_hqd_ctx_save_base_addr_lo =
@@ -189,7 +225,7 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
*mqd = m;
if (gart_addr)
*gart_addr = addr;
- mm->update_mqd(mm, m, q, NULL);
+ update_mqd(mm, m, q, NULL);
}
static int load_mqd(struct mqd_manager *mm, void *mqd,
@@ -199,17 +235,9 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,
/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
- return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
+ return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
(uint32_t __user *)p->write_ptr,
- wptr_shift, 0, mms);
-}
-
-static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
- uint32_t pipe_id, uint32_t queue_id,
- struct queue_properties *p, struct mm_struct *mms)
-{
- return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id,
- queue_id, p->doorbell_off);
+ wptr_shift, 0, mms, 0);
}
static void update_mqd(struct mqd_manager *mm, void *mqd,
@@ -220,7 +248,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
m = get_mqd(mqd);
- m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK;
m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1;
pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
@@ -248,9 +276,14 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
* Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
* more than (EOP entry count - 1) so a queue size of 0x800 dwords
* is safe, giving a maximum field value of 0xA.
+ *
+ * Also, do calculation only if EOP is used (size > 0), otherwise
+ * the order_base_2 calculation provides incorrect result.
+ *
*/
- m->cp_hqd_eop_control = min(0xA,
- order_base_2(q->eop_ring_buffer_size / 4) - 1);
+ m->cp_hqd_eop_control = q->eop_ring_buffer_size ?
+ min(0xA, order_base_2(q->eop_ring_buffer_size / 4) - 1) : 0;
+
m->cp_hqd_eop_base_addr_lo =
lower_32_bits(q->eop_ring_buffer_address >> 8);
m->cp_hqd_eop_base_addr_hi =
@@ -268,61 +301,46 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
m->cp_hqd_pq_doorbell_control |= 1 <<
CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT;
}
- if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address)
+ if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address)
m->cp_hqd_ctx_save_control = 0;
- update_cu_mask(mm, mqd, minfo);
+ if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) &&
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0))
+ update_cu_mask(mm, mqd, minfo, 0);
set_priority(m, q);
+ if (minfo && KFD_GC_VERSION(mm->dev) >= IP_VERSION(9, 4, 2)) {
+ if (minfo->update_flag & UPDATE_FLAG_IS_GWS)
+ m->compute_resource_limits |=
+ COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK;
+ else
+ m->compute_resource_limits &=
+ ~COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK;
+ }
+
q->is_active = QUEUE_IS_ACTIVE(*q);
}
-static uint32_t read_doorbell_id(void *mqd)
+static bool check_preemption_failed(struct mqd_manager *mm, void *mqd)
{
struct v9_mqd *m = (struct v9_mqd *)mqd;
+ uint32_t doorbell_id = m->queue_doorbell_id0;
- return m->queue_doorbell_id0;
-}
-
-static int destroy_mqd(struct mqd_manager *mm, void *mqd,
- enum kfd_preempt_type type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id)
-{
- return mm->dev->kfd2kgd->hqd_destroy
- (mm->dev->kgd, mqd, type, timeout,
- pipe_id, queue_id);
-}
-
-static void free_mqd(struct mqd_manager *mm, void *mqd,
- struct kfd_mem_obj *mqd_mem_obj)
-{
- struct kfd_dev *kfd = mm->dev;
-
- if (mqd_mem_obj->gtt_mem) {
- amdgpu_amdkfd_free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem);
- kfree(mqd_mem_obj);
- } else {
- kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
- }
-}
+ m->queue_doorbell_id0 = 0;
-static bool is_occupied(struct mqd_manager *mm, void *mqd,
- uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
-{
- return mm->dev->kfd2kgd->hqd_is_occupied(
- mm->dev->kgd, queue_address,
- pipe_id, queue_id);
+ return kfd_check_hiq_mqd_doorbell_id(mm->dev, doorbell_id, 0);
}
static int get_wave_state(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
void __user *ctl_stack,
u32 *ctl_stack_used_size,
u32 *save_area_used_size)
{
struct v9_mqd *m;
+ struct kfd_context_save_area_header header;
/* Control stack is located one page after MQD. */
void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
@@ -334,12 +352,92 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
*save_area_used_size = m->cp_hqd_wg_state_offset -
m->cp_hqd_cntl_stack_size;
- if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size))
+ header.wave_state.control_stack_size = *ctl_stack_used_size;
+ header.wave_state.wave_state_size = *save_area_used_size;
+
+ header.wave_state.wave_state_offset = m->cp_hqd_wg_state_offset;
+ header.wave_state.control_stack_offset = m->cp_hqd_cntl_stack_offset;
+
+ if (copy_to_user(ctl_stack, &header, sizeof(header.wave_state)))
+ return -EFAULT;
+
+ if (copy_to_user(ctl_stack + m->cp_hqd_cntl_stack_offset,
+ mqd_ctl_stack + m->cp_hqd_cntl_stack_offset,
+ *ctl_stack_used_size))
return -EFAULT;
return 0;
}
+static void get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 *ctl_stack_size)
+{
+ struct v9_mqd *m = get_mqd(mqd);
+
+ *ctl_stack_size = m->cp_hqd_cntl_stack_size * NUM_XCC(mm->dev->xcc_mask);
+}
+
+static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
+{
+ struct v9_mqd *m;
+ /* Control stack is located one page after MQD. */
+ void *ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
+
+ m = get_mqd(mqd);
+
+ memcpy(mqd_dst, m, sizeof(struct v9_mqd));
+ memcpy(ctl_stack_dst, ctl_stack, m->cp_hqd_cntl_stack_size);
+}
+
+static void checkpoint_mqd_v9_4_3(struct mqd_manager *mm,
+ void *mqd,
+ void *mqd_dst,
+ void *ctl_stack_dst)
+{
+ struct v9_mqd *m;
+ int xcc;
+ uint64_t size = get_mqd(mqd)->cp_mqd_stride_size;
+
+ for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
+ m = get_mqd(mqd + size * xcc);
+
+ checkpoint_mqd(mm, m,
+ (uint8_t *)mqd_dst + sizeof(*m) * xcc,
+ (uint8_t *)ctl_stack_dst + m->cp_hqd_cntl_stack_size * xcc);
+ }
+}
+
+static void restore_mqd(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src, u32 ctl_stack_size)
+{
+ uint64_t addr;
+ struct v9_mqd *m;
+ void *ctl_stack;
+
+ m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memcpy(m, mqd_src, sizeof(*m));
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+
+ /* Control stack is located one page after MQD. */
+ ctl_stack = (void *)((uintptr_t)*mqd + PAGE_SIZE);
+ memcpy(ctl_stack, ctl_stack_src, ctl_stack_size);
+
+ m->cp_hqd_pq_doorbell_control =
+ qp->doorbell_off <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+ m->cp_hqd_pq_doorbell_control);
+
+ qp->is_active = 0;
+}
+
static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -354,6 +452,25 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
}
+static int destroy_hiq_mqd(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type, unsigned int timeout,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ int err;
+ struct v9_mqd *m;
+ u32 doorbell_off;
+
+ m = get_mqd(mqd);
+
+ doorbell_off = m->cp_hqd_pq_doorbell_control >>
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ err = amdgpu_amdkfd_unmap_hiq(mm->dev->adev, doorbell_off, 0);
+ if (err)
+ pr_debug("Destroy HIQ MQD failed: %d\n", err);
+
+ return err;
+}
+
static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -371,15 +488,6 @@ static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
mm->update_mqd(mm, m, q, NULL);
}
-static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
- uint32_t pipe_id, uint32_t queue_id,
- struct queue_properties *p, struct mm_struct *mms)
-{
- return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
- (uint32_t __user *)p->write_ptr,
- mms);
-}
-
#define SDMA_RLC_DUMMY_DEFAULT 0xf
static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
@@ -405,27 +513,399 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
m->sdma_engine_id = q->sdma_engine_id;
m->sdma_queue_id = q->sdma_queue_id;
m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
+ /* Allow context switch so we don't cross-process starve with a massive
+ * command buffer of long-running SDMA commands
+ */
+ m->sdmax_rlcx_ib_cntl |= SDMA0_GFX_IB_CNTL__SWITCH_INSIDE_IB_MASK;
q->is_active = QUEUE_IS_ACTIVE(*q);
}
-/*
- * * preempt type here is ignored because there is only one way
- * * to preempt sdma queue
- */
-static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
- enum kfd_preempt_type type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id)
+static void checkpoint_mqd_sdma(struct mqd_manager *mm,
+ void *mqd,
+ void *mqd_dst,
+ void *ctl_stack_dst)
{
- return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
+ struct v9_sdma_mqd *m;
+
+ m = get_sdma_mqd(mqd);
+
+ memcpy(mqd_dst, m, sizeof(struct v9_sdma_mqd));
+}
+
+static void restore_mqd_sdma(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src, const u32 ctl_stack_size)
+{
+ uint64_t addr;
+ struct v9_sdma_mqd *m;
+
+ m = (struct v9_sdma_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memcpy(m, mqd_src, sizeof(*m));
+
+ m->sdmax_rlcx_doorbell_offset =
+ qp->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+
+ qp->is_active = 0;
+}
+
+static void init_mqd_hiq_v9_4_3(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ struct v9_mqd *m;
+ int xcc = 0;
+ struct kfd_mem_obj xcc_mqd_mem_obj;
+ uint64_t xcc_gart_addr = 0;
+
+ memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj));
+
+ for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
+ kfd_get_hiq_xcc_mqd(mm->dev, &xcc_mqd_mem_obj, xcc);
+
+ init_mqd(mm, (void **)&m, &xcc_mqd_mem_obj, &xcc_gart_addr, q);
+
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
+ 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
+ 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
+ if (amdgpu_sriov_multi_vf_mode(mm->dev->adev))
+ m->cp_hqd_pq_doorbell_control |= 1 <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT;
+ m->cp_mqd_stride_size = kfd_hiq_mqd_stride(mm->dev);
+ if (xcc == 0) {
+ /* Set no_update_rptr = 0 in Master XCC */
+ m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK;
+
+ /* Set the MQD pointer and gart address to XCC0 MQD */
+ *mqd = m;
+ *gart_addr = xcc_gart_addr;
+ }
+ }
+}
+
+static int hiq_load_mqd_kiq_v9_4_3(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+ uint32_t xcc_mask = mm->dev->xcc_mask;
+ int xcc_id, err, inst = 0;
+ void *xcc_mqd;
+ uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev);
+
+ for_each_inst(xcc_id, xcc_mask) {
+ xcc_mqd = mqd + hiq_mqd_size * inst;
+ err = mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, xcc_mqd,
+ pipe_id, queue_id,
+ p->doorbell_off, xcc_id);
+ if (err) {
+ pr_debug("Failed to load HIQ MQD for XCC: %d\n", inst);
+ break;
+ }
+ ++inst;
+ }
+
+ return err;
+}
+
+static int destroy_hiq_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type, unsigned int timeout,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ uint32_t xcc_mask = mm->dev->xcc_mask;
+ int xcc_id, err, inst = 0;
+ uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev);
+ struct v9_mqd *m;
+ u32 doorbell_off;
+
+ for_each_inst(xcc_id, xcc_mask) {
+ m = get_mqd(mqd + hiq_mqd_size * inst);
+
+ doorbell_off = m->cp_hqd_pq_doorbell_control >>
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+
+ err = amdgpu_amdkfd_unmap_hiq(mm->dev->adev, doorbell_off, xcc_id);
+ if (err) {
+ pr_debug("Destroy HIQ MQD failed for xcc: %d\n", inst);
+ break;
+ }
+ ++inst;
+ }
+
+ return err;
}
-static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
- uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
+static bool check_preemption_failed_v9_4_3(struct mqd_manager *mm, void *mqd)
{
- return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
+ uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev);
+ uint32_t xcc_mask = mm->dev->xcc_mask;
+ int inst = 0, xcc_id;
+ struct v9_mqd *m;
+ bool ret = false;
+
+ for_each_inst(xcc_id, xcc_mask) {
+ m = get_mqd(mqd + hiq_mqd_size * inst);
+ ret |= kfd_check_hiq_mqd_doorbell_id(mm->dev,
+ m->queue_doorbell_id0, inst);
+ m->queue_doorbell_id0 = 0;
+ ++inst;
+ }
+
+ return ret;
+}
+
+static void get_xcc_mqd(struct kfd_mem_obj *mqd_mem_obj,
+ struct kfd_mem_obj *xcc_mqd_mem_obj,
+ uint64_t offset)
+{
+ xcc_mqd_mem_obj->gtt_mem = (offset == 0) ?
+ mqd_mem_obj->gtt_mem : NULL;
+ xcc_mqd_mem_obj->gpu_addr = mqd_mem_obj->gpu_addr + offset;
+ xcc_mqd_mem_obj->cpu_ptr = (uint32_t *)((uintptr_t)mqd_mem_obj->cpu_ptr
+ + offset);
+}
+
+static void init_mqd_v9_4_3(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ struct v9_mqd *m;
+ int xcc = 0;
+ struct kfd_mem_obj xcc_mqd_mem_obj;
+ uint64_t xcc_gart_addr = 0;
+ uint64_t xcc_ctx_save_restore_area_address;
+ uint64_t offset = mm->mqd_stride(mm, q);
+ uint32_t local_xcc_start = mm->dev->dqm->current_logical_xcc_start++;
+
+ memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj));
+ for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
+ get_xcc_mqd(mqd_mem_obj, &xcc_mqd_mem_obj, offset*xcc);
+
+ init_mqd(mm, (void **)&m, &xcc_mqd_mem_obj, &xcc_gart_addr, q);
+ if (amdgpu_sriov_multi_vf_mode(mm->dev->adev))
+ m->cp_hqd_pq_doorbell_control |= 1 <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT;
+ m->cp_mqd_stride_size = offset;
+
+ /*
+ * Update the CWSR address for each XCC if CWSR is enabled
+ * and CWSR area is allocated in thunk
+ */
+ if (mm->dev->kfd->cwsr_enabled &&
+ q->ctx_save_restore_area_address) {
+ xcc_ctx_save_restore_area_address =
+ q->ctx_save_restore_area_address +
+ (xcc * q->ctx_save_restore_area_size);
+
+ m->cp_hqd_ctx_save_base_addr_lo =
+ lower_32_bits(xcc_ctx_save_restore_area_address);
+ m->cp_hqd_ctx_save_base_addr_hi =
+ upper_32_bits(xcc_ctx_save_restore_area_address);
+ }
+
+ if (q->format == KFD_QUEUE_FORMAT_AQL) {
+ m->compute_tg_chunk_size = 1;
+ m->compute_current_logic_xcc_id =
+ (local_xcc_start + xcc) %
+ NUM_XCC(mm->dev->xcc_mask);
+
+ switch (xcc) {
+ case 0:
+ /* Master XCC */
+ m->cp_hqd_pq_control &=
+ ~CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK;
+ break;
+ default:
+ break;
+ }
+ } else {
+ /* PM4 Queue */
+ m->compute_current_logic_xcc_id = 0;
+ m->compute_tg_chunk_size = 0;
+ m->pm4_target_xcc_in_xcp = q->pm4_target_xcc;
+ }
+
+ if (xcc == 0) {
+ /* Set the MQD pointer and gart address to XCC0 MQD */
+ *mqd = m;
+ *gart_addr = xcc_gart_addr;
+ }
+ }
+}
+
+static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q, struct mqd_update_info *minfo)
+{
+ struct v9_mqd *m;
+ int xcc = 0;
+ uint64_t size = mm->mqd_stride(mm, q);
+
+ for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
+ m = get_mqd(mqd + size * xcc);
+ update_mqd(mm, m, q, minfo);
+
+ if (amdgpu_sriov_multi_vf_mode(mm->dev->adev))
+ m->cp_hqd_pq_doorbell_control |= 1 <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT;
+ update_cu_mask(mm, m, minfo, xcc);
+
+ if (q->format == KFD_QUEUE_FORMAT_AQL) {
+ switch (xcc) {
+ case 0:
+ /* Master XCC */
+ m->cp_hqd_pq_control &=
+ ~CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK;
+ break;
+ default:
+ break;
+ }
+ m->compute_tg_chunk_size = 1;
+ } else {
+ /* PM4 Queue */
+ m->compute_current_logic_xcc_id = 0;
+ m->compute_tg_chunk_size = 0;
+ m->pm4_target_xcc_in_xcp = q->pm4_target_xcc;
+ }
+ }
+}
+
+static void restore_mqd_v9_4_3(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src, u32 ctl_stack_size)
+{
+ struct kfd_mem_obj xcc_mqd_mem_obj;
+ u32 mqd_ctl_stack_size;
+ struct v9_mqd *m;
+ u32 num_xcc;
+ int xcc;
+
+ uint64_t offset = mm->mqd_stride(mm, qp);
+
+ mm->dev->dqm->current_logical_xcc_start++;
+
+ num_xcc = NUM_XCC(mm->dev->xcc_mask);
+ mqd_ctl_stack_size = ctl_stack_size / num_xcc;
+
+ memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj));
+
+ /* Set the MQD pointer and gart address to XCC0 MQD */
+ *mqd = mqd_mem_obj->cpu_ptr;
+ if (gart_addr)
+ *gart_addr = mqd_mem_obj->gpu_addr;
+
+ for (xcc = 0; xcc < num_xcc; xcc++) {
+ get_xcc_mqd(mqd_mem_obj, &xcc_mqd_mem_obj, offset * xcc);
+ restore_mqd(mm, (void **)&m,
+ &xcc_mqd_mem_obj,
+ NULL,
+ qp,
+ (uint8_t *)mqd_src + xcc * sizeof(*m),
+ (uint8_t *)ctl_stack_src + xcc * mqd_ctl_stack_size,
+ mqd_ctl_stack_size);
+ }
+}
+static int destroy_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type, unsigned int timeout,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ uint32_t xcc_mask = mm->dev->xcc_mask;
+ int xcc_id, err, inst = 0;
+ void *xcc_mqd;
+ struct v9_mqd *m;
+ uint64_t mqd_offset;
+
+ m = get_mqd(mqd);
+ mqd_offset = m->cp_mqd_stride_size;
+
+ for_each_inst(xcc_id, xcc_mask) {
+ xcc_mqd = mqd + mqd_offset * inst;
+ err = mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, xcc_mqd,
+ type, timeout, pipe_id,
+ queue_id, xcc_id);
+ if (err) {
+ pr_debug("Destroy MQD failed for xcc: %d\n", inst);
+ break;
+ }
+ ++inst;
+ }
+
+ return err;
+}
+
+static int load_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+ /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
+ uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
+ uint32_t xcc_mask = mm->dev->xcc_mask;
+ int xcc_id, err, inst = 0;
+ void *xcc_mqd;
+ uint64_t mqd_stride_size = mm->mqd_stride(mm, p);
+
+ for_each_inst(xcc_id, xcc_mask) {
+ xcc_mqd = mqd + mqd_stride_size * inst;
+ err = mm->dev->kfd2kgd->hqd_load(
+ mm->dev->adev, xcc_mqd, pipe_id, queue_id,
+ (uint32_t __user *)p->write_ptr, wptr_shift, 0, mms,
+ xcc_id);
+ if (err) {
+ pr_debug("Load MQD failed for xcc: %d\n", inst);
+ break;
+ }
+ ++inst;
+ }
+
+ return err;
+}
+
+static int get_wave_state_v9_4_3(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
+ void __user *ctl_stack,
+ u32 *ctl_stack_used_size,
+ u32 *save_area_used_size)
+{
+ int xcc, err = 0;
+ void *xcc_mqd;
+ void __user *xcc_ctl_stack;
+ uint64_t mqd_stride_size = mm->mqd_stride(mm, q);
+ u32 tmp_ctl_stack_used_size = 0, tmp_save_area_used_size = 0;
+
+ for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
+ xcc_mqd = mqd + mqd_stride_size * xcc;
+ xcc_ctl_stack = (void __user *)((uintptr_t)ctl_stack +
+ q->ctx_save_restore_area_size * xcc);
+
+ err = get_wave_state(mm, xcc_mqd, q, xcc_ctl_stack,
+ &tmp_ctl_stack_used_size,
+ &tmp_save_area_used_size);
+ if (err)
+ break;
+
+ /*
+ * Set the ctl_stack_used_size and save_area_used_size to
+ * ctl_stack_used_size and save_area_used_size of XCC 0 when
+ * passing the info the user-space.
+ * For multi XCC, user-space would have to look at the header
+ * info of each Control stack area to determine the control
+ * stack size and save area used.
+ */
+ if (xcc == 0) {
+ *ctl_stack_used_size = tmp_ctl_stack_used_size;
+ *save_area_used_size = tmp_save_area_used_size;
+ }
+ }
+
+ return err;
}
#if defined(CONFIG_DEBUG_FS)
@@ -447,7 +927,7 @@ static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
#endif
struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
- struct kfd_dev *dev)
+ struct kfd_node *dev)
{
struct mqd_manager *mqd;
@@ -463,40 +943,67 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
switch (type) {
case KFD_MQD_TYPE_CP:
mqd->allocate_mqd = allocate_mqd;
- mqd->init_mqd = init_mqd;
- mqd->free_mqd = free_mqd;
- mqd->load_mqd = load_mqd;
- mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
- mqd->get_wave_state = get_wave_state;
+ mqd->free_mqd = kfd_free_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
+ mqd->get_checkpoint_info = get_checkpoint_info;
mqd->mqd_size = sizeof(struct v9_mqd);
+ mqd->mqd_stride = mqd_stride_v9;
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
+ if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) {
+ mqd->init_mqd = init_mqd_v9_4_3;
+ mqd->load_mqd = load_mqd_v9_4_3;
+ mqd->update_mqd = update_mqd_v9_4_3;
+ mqd->destroy_mqd = destroy_mqd_v9_4_3;
+ mqd->get_wave_state = get_wave_state_v9_4_3;
+ mqd->checkpoint_mqd = checkpoint_mqd_v9_4_3;
+ mqd->restore_mqd = restore_mqd_v9_4_3;
+ } else {
+ mqd->init_mqd = init_mqd;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->get_wave_state = get_wave_state;
+ mqd->checkpoint_mqd = checkpoint_mqd;
+ mqd->restore_mqd = restore_mqd;
+ }
break;
case KFD_MQD_TYPE_HIQ:
mqd->allocate_mqd = allocate_hiq_mqd;
- mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd_hiq_sdma;
- mqd->load_mqd = hiq_load_mqd_kiq;
mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v9_mqd);
+ mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
- mqd->read_doorbell_id = read_doorbell_id;
+ mqd->check_preemption_failed = check_preemption_failed;
+ if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) {
+ mqd->init_mqd = init_mqd_hiq_v9_4_3;
+ mqd->load_mqd = hiq_load_mqd_kiq_v9_4_3;
+ mqd->destroy_mqd = destroy_hiq_mqd_v9_4_3;
+ mqd->check_preemption_failed = check_preemption_failed_v9_4_3;
+ } else {
+ mqd->init_mqd = init_mqd_hiq;
+ mqd->load_mqd = kfd_hiq_load_mqd_kiq;
+ mqd->destroy_mqd = destroy_hiq_mqd;
+ mqd->check_preemption_failed = check_preemption_failed;
+ }
break;
case KFD_MQD_TYPE_DIQ:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd_hiq;
- mqd->free_mqd = free_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v9_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
@@ -506,11 +1013,14 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
mqd->allocate_mqd = allocate_sdma_mqd;
mqd->init_mqd = init_mqd_sdma;
mqd->free_mqd = free_mqd_hiq_sdma;
- mqd->load_mqd = load_mqd_sdma;
+ mqd->load_mqd = kfd_load_mqd_sdma;
mqd->update_mqd = update_mqd_sdma;
- mqd->destroy_mqd = destroy_mqd_sdma;
- mqd->is_occupied = is_occupied_sdma;
+ mqd->destroy_mqd = kfd_destroy_mqd_sdma;
+ mqd->is_occupied = kfd_is_occupied_sdma;
+ mqd->checkpoint_mqd = checkpoint_mqd_sdma;
+ mqd->restore_mqd = restore_mqd_sdma;
mqd->mqd_size = sizeof(struct v9_sdma_mqd);
+ mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index cd9220eb8a7a..c1fafc502515 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -50,12 +51,11 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
struct vi_mqd *m;
uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
- if (!minfo || (minfo->update_flag != UPDATE_FLAG_CU_MASK) ||
- !minfo->cu_mask.ptr)
+ if (!minfo || !minfo->cu_mask.ptr)
return;
mqd_symmetrically_map_cu_mask(mm,
- minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
+ minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
m = get_mqd(mqd);
m->compute_static_thread_mgmt_se0 = se_mask[0];
@@ -76,7 +76,7 @@ static void set_priority(struct vi_mqd *m, struct queue_properties *q)
m->cp_hqd_queue_priority = q->priority;
}
-static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
+static struct kfd_mem_obj *allocate_mqd(struct kfd_node *kfd,
struct queue_properties *q)
{
struct kfd_mem_obj *mqd_mem_obj;
@@ -135,7 +135,7 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
(1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT);
}
- if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) {
+ if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address) {
m->cp_hqd_persistent_state |=
(1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
m->cp_hqd_ctx_save_base_addr_lo =
@@ -162,9 +162,9 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,
uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1);
- return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
+ return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
(uint32_t __user *)p->write_ptr,
- wptr_shift, wptr_mask, mms);
+ wptr_shift, wptr_mask, mms, 0);
}
static void __update_mqd(struct mqd_manager *mm, void *mqd,
@@ -226,7 +226,7 @@ static void __update_mqd(struct mqd_manager *mm, void *mqd,
2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
}
- if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address)
+ if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address)
m->cp_hqd_ctx_save_control =
atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT |
mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT;
@@ -237,54 +237,22 @@ static void __update_mqd(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
}
-
-static void update_mqd(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q,
- struct mqd_update_info *minfo)
-{
- __update_mqd(mm, mqd, q, minfo, MTYPE_CC, 1);
-}
-
-static uint32_t read_doorbell_id(void *mqd)
+static bool check_preemption_failed(struct mqd_manager *mm, void *mqd)
{
struct vi_mqd *m = (struct vi_mqd *)mqd;
- return m->queue_doorbell_id0;
+ return kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, 0);
}
-static void update_mqd_tonga(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q,
- struct mqd_update_info *minfo)
+static void update_mqd(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
+ struct mqd_update_info *minfo)
{
__update_mqd(mm, mqd, q, minfo, MTYPE_UC, 0);
}
-static int destroy_mqd(struct mqd_manager *mm, void *mqd,
- enum kfd_preempt_type type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id)
-{
- return mm->dev->kfd2kgd->hqd_destroy
- (mm->dev->kgd, mqd, type, timeout,
- pipe_id, queue_id);
-}
-
-static void free_mqd(struct mqd_manager *mm, void *mqd,
- struct kfd_mem_obj *mqd_mem_obj)
-{
- kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
-}
-
-static bool is_occupied(struct mqd_manager *mm, void *mqd,
- uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
-{
- return mm->dev->kfd2kgd->hqd_is_occupied(
- mm->dev->kgd, queue_address,
- pipe_id, queue_id);
-}
-
static int get_wave_state(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
void __user *ctl_stack,
u32 *ctl_stack_used_size,
u32 *save_area_used_size)
@@ -306,11 +274,54 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
return 0;
}
+static void get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 *ctl_stack_size)
+{
+ /* Control stack is stored in user mode */
+ *ctl_stack_size = 0;
+}
+
+static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
+{
+ struct vi_mqd *m;
+
+ m = get_mqd(mqd);
+
+ memcpy(mqd_dst, m, sizeof(struct vi_mqd));
+}
+
+static void restore_mqd(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src, const u32 ctl_stack_size)
+{
+ uint64_t addr;
+ struct vi_mqd *m;
+
+ m = (struct vi_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memcpy(m, mqd_src, sizeof(*m));
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+
+ m->cp_hqd_pq_doorbell_control =
+ qp->doorbell_off <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+ m->cp_hqd_pq_doorbell_control);
+
+ qp->is_active = 0;
+}
+
static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
{
struct vi_mqd *m;
+
init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
m = get_mqd(*mqd);
@@ -343,15 +354,6 @@ static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
mm->update_mqd(mm, m, q, NULL);
}
-static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
- uint32_t pipe_id, uint32_t queue_id,
- struct queue_properties *p, struct mm_struct *mms)
-{
- return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
- (uint32_t __user *)p->write_ptr,
- mms);
-}
-
static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
struct queue_properties *q,
struct mqd_update_info *minfo)
@@ -380,27 +382,45 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
}
-/*
- * * preempt type here is ignored because there is only one way
- * * to preempt sdma queue
- */
-static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
- enum kfd_preempt_type type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id)
+static void checkpoint_mqd_sdma(struct mqd_manager *mm,
+ void *mqd,
+ void *mqd_dst,
+ void *ctl_stack_dst)
{
- return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
+ struct vi_sdma_mqd *m;
+
+ m = get_sdma_mqd(mqd);
+
+ memcpy(mqd_dst, m, sizeof(struct vi_sdma_mqd));
}
-static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
- uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
+static void restore_mqd_sdma(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src, const u32 ctl_stack_size)
{
- return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
+ uint64_t addr;
+ struct vi_sdma_mqd *m;
+
+ m = (struct vi_sdma_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memcpy(m, mqd_src, sizeof(*m));
+
+ m->sdmax_rlcx_doorbell =
+ qp->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT;
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+
+ qp->is_active = 0;
}
#if defined(CONFIG_DEBUG_FS)
+
static int debugfs_show_mqd(struct seq_file *m, void *data)
{
seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
@@ -418,7 +438,7 @@ static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
#endif
struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
- struct kfd_dev *dev)
+ struct kfd_node *dev)
{
struct mqd_manager *mqd;
@@ -435,12 +455,15 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
case KFD_MQD_TYPE_CP:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd;
- mqd->free_mqd = free_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->get_wave_state = get_wave_state;
+ mqd->get_checkpoint_info = get_checkpoint_info;
+ mqd->checkpoint_mqd = checkpoint_mqd;
+ mqd->restore_mqd = restore_mqd;
mqd->mqd_size = sizeof(struct vi_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
@@ -452,23 +475,25 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
mqd->free_mqd = free_mqd_hiq_sdma;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct vi_mqd);
+ mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
- mqd->read_doorbell_id = read_doorbell_id;
+ mqd->check_preemption_failed = check_preemption_failed;
break;
case KFD_MQD_TYPE_DIQ:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd_hiq;
- mqd->free_mqd = free_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct vi_mqd);
+ mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
@@ -477,11 +502,14 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
mqd->allocate_mqd = allocate_sdma_mqd;
mqd->init_mqd = init_mqd_sdma;
mqd->free_mqd = free_mqd_hiq_sdma;
- mqd->load_mqd = load_mqd_sdma;
+ mqd->load_mqd = kfd_load_mqd_sdma;
mqd->update_mqd = update_mqd_sdma;
- mqd->destroy_mqd = destroy_mqd_sdma;
- mqd->is_occupied = is_occupied_sdma;
+ mqd->destroy_mqd = kfd_destroy_mqd_sdma;
+ mqd->is_occupied = kfd_is_occupied_sdma;
+ mqd->checkpoint_mqd = checkpoint_mqd_sdma;
+ mqd->restore_mqd = restore_mqd_sdma;
mqd->mqd_size = sizeof(struct vi_sdma_mqd);
+ mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
#endif
@@ -493,16 +521,3 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
return mqd;
}
-
-struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
- struct kfd_dev *dev)
-{
- struct mqd_manager *mqd;
-
- mqd = mqd_manager_init_vi(type, dev);
- if (!mqd)
- return NULL;
- if (type == KFD_MQD_TYPE_CP)
- mqd->update_mqd = update_mqd_tonga;
- return mqd;
-}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index e547f1f8c49f..b1a6eb349bb3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -27,6 +28,11 @@
#include "kfd_kernel_queue.h"
#include "kfd_priv.h"
+#define OVER_SUBSCRIPTION_PROCESS_COUNT (1 << 0)
+#define OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT (1 << 1)
+#define OVER_SUBSCRIPTION_GWS_QUEUE_COUNT (1 << 2)
+#define OVER_SUBSCRIPTION_XNACK_CONFLICT (1 << 3)
+
static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
unsigned int buffer_size_bytes)
{
@@ -39,12 +45,14 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
static void pm_calc_rlib_size(struct packet_manager *pm,
unsigned int *rlib_size,
- bool *over_subscription)
+ int *over_subscription,
+ int xnack_conflict)
{
unsigned int process_count, queue_count, compute_queue_count, gws_queue_count;
unsigned int map_queue_size;
unsigned int max_proc_per_quantum = 1;
- struct kfd_dev *dev = pm->dqm->dev;
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
process_count = pm->dqm->processes_count;
queue_count = pm->dqm->active_queue_count;
@@ -56,17 +64,22 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
* hws_max_conc_proc has been done in
* kgd2kfd_device_init().
*/
- *over_subscription = false;
+ *over_subscription = 0;
- if (dev->max_proc_per_quantum > 1)
- max_proc_per_quantum = dev->max_proc_per_quantum;
+ if (node->max_proc_per_quantum > 1)
+ max_proc_per_quantum = node->max_proc_per_quantum;
- if ((process_count > max_proc_per_quantum) ||
- compute_queue_count > get_cp_queues_num(pm->dqm) ||
- gws_queue_count > 1) {
- *over_subscription = true;
- pr_debug("Over subscribed runlist\n");
- }
+ if (process_count > max_proc_per_quantum)
+ *over_subscription |= OVER_SUBSCRIPTION_PROCESS_COUNT;
+ if (compute_queue_count > get_cp_queues_num(pm->dqm))
+ *over_subscription |= OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT;
+ if (gws_queue_count > 1)
+ *over_subscription |= OVER_SUBSCRIPTION_GWS_QUEUE_COUNT;
+ if (xnack_conflict && (node->adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN))
+ *over_subscription |= OVER_SUBSCRIPTION_XNACK_CONFLICT;
+
+ if (*over_subscription)
+ dev_dbg(dev, "Over subscribed runlist\n");
map_queue_size = pm->pmf->map_queues_size;
/* calculate run list ib allocation size */
@@ -80,29 +93,32 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
if (*over_subscription)
*rlib_size += pm->pmf->runlist_size;
- pr_debug("runlist ib size %d\n", *rlib_size);
+ dev_dbg(dev, "runlist ib size %d\n", *rlib_size);
}
static int pm_allocate_runlist_ib(struct packet_manager *pm,
unsigned int **rl_buffer,
uint64_t *rl_gpu_buffer,
unsigned int *rl_buffer_size,
- bool *is_over_subscription)
+ int *is_over_subscription,
+ int xnack_conflict)
{
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
int retval;
if (WARN_ON(pm->allocated))
return -EINVAL;
- pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription);
+ pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription,
+ xnack_conflict);
mutex_lock(&pm->lock);
- retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size,
- &pm->ib_buffer_obj);
+ retval = kfd_gtt_sa_allocate(node, *rl_buffer_size, &pm->ib_buffer_obj);
if (retval) {
- pr_err("Failed to allocate runlist IB\n");
+ dev_err(dev, "Failed to allocate runlist IB\n");
goto out;
}
@@ -124,32 +140,54 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
{
unsigned int alloc_size_bytes;
unsigned int *rl_buffer, rl_wptr, i;
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
int retval, processes_mapped;
struct device_process_node *cur;
struct qcm_process_device *qpd;
struct queue *q;
struct kernel_queue *kq;
- bool is_over_subscription;
+ int is_over_subscription;
+ int xnack_enabled = -1;
+ bool xnack_conflict = 0;
rl_wptr = retval = processes_mapped = 0;
+ /* Check if processes set different xnack modes */
+ list_for_each_entry(cur, queues, list) {
+ qpd = cur->qpd;
+ if (xnack_enabled < 0)
+ /* First process */
+ xnack_enabled = qpd->pqm->process->xnack_enabled;
+ else if (qpd->pqm->process->xnack_enabled != xnack_enabled) {
+ /* Found a process with a different xnack mode */
+ xnack_conflict = 1;
+ break;
+ }
+ }
+
retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr,
- &alloc_size_bytes, &is_over_subscription);
+ &alloc_size_bytes, &is_over_subscription,
+ xnack_conflict);
if (retval)
return retval;
*rl_size_bytes = alloc_size_bytes;
pm->ib_size_bytes = alloc_size_bytes;
- pr_debug("Building runlist ib process count: %d queues count %d\n",
+ dev_dbg(dev, "Building runlist ib process count: %d queues count %d\n",
pm->dqm->processes_count, pm->dqm->active_queue_count);
+build_runlist_ib:
/* build the run list ib packet */
list_for_each_entry(cur, queues, list) {
qpd = cur->qpd;
+ /* group processes with the same xnack mode together */
+ if (qpd->pqm->process->xnack_enabled != xnack_enabled)
+ continue;
/* build map process packet */
if (processes_mapped >= pm->dqm->processes_count) {
- pr_debug("Not enough space left in runlist IB\n");
+ dev_dbg(dev, "Not enough space left in runlist IB\n");
pm_release_ib(pm);
return -ENOMEM;
}
@@ -166,7 +204,8 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
if (!kq->queue->properties.is_active)
continue;
- pr_debug("static_queue, mapping kernel q %d, is debug status %d\n",
+ dev_dbg(dev,
+ "static_queue, mapping kernel q %d, is debug status %d\n",
kq->queue->queue, qpd->is_debug);
retval = pm->pmf->map_queues(pm,
@@ -185,7 +224,8 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
if (!q->properties.is_active)
continue;
- pr_debug("static_queue, mapping user queue %d, is debug status %d\n",
+ dev_dbg(dev,
+ "static_queue, mapping user queue %d, is debug status %d\n",
q->queue, qpd->is_debug);
retval = pm->pmf->map_queues(pm,
@@ -201,18 +241,33 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
alloc_size_bytes);
}
}
+ if (xnack_conflict) {
+ /* pick up processes with the other xnack mode */
+ xnack_enabled = !xnack_enabled;
+ xnack_conflict = 0;
+ goto build_runlist_ib;
+ }
- pr_debug("Finished map process and queues to runlist\n");
+ dev_dbg(dev, "Finished map process and queues to runlist\n");
if (is_over_subscription) {
if (!pm->is_over_subscription)
- pr_warn("Runlist is getting oversubscribed. Expect reduced ROCm performance.\n");
+ dev_warn(dev, "Runlist is getting oversubscribed due to%s%s%s%s. Expect reduced ROCm performance.\n",
+ is_over_subscription & OVER_SUBSCRIPTION_PROCESS_COUNT ?
+ " too many processes" : "",
+ is_over_subscription & OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT ?
+ " too many queues" : "",
+ is_over_subscription & OVER_SUBSCRIPTION_GWS_QUEUE_COUNT ?
+ " multiple processes using cooperative launch" : "",
+ is_over_subscription & OVER_SUBSCRIPTION_XNACK_CONFLICT ?
+ " xnack on/off processes mixed on gfx9" : "");
+
retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr],
*rl_gpu_addr,
alloc_size_bytes / sizeof(uint32_t),
true);
}
- pm->is_over_subscription = is_over_subscription;
+ pm->is_over_subscription = !!is_over_subscription;
for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++)
pr_debug("0x%2X ", rl_buffer[i]);
@@ -223,7 +278,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
{
- switch (dqm->dev->device_info->asic_family) {
+ switch (dqm->dev->adev->asic_type) {
case CHIP_KAVERI:
case CHIP_HAWAII:
/* PM4 packet structures on CIK are the same as on VI */
@@ -236,31 +291,19 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
case CHIP_VEGAM:
pm->pmf = &kfd_vi_pm_funcs;
break;
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_RAVEN:
- case CHIP_RENOIR:
- case CHIP_ARCTURUS:
- case CHIP_NAVI10:
- case CHIP_NAVI12:
- case CHIP_NAVI14:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
- case CHIP_BEIGE_GOBY:
- case CHIP_YELLOW_CARP:
- case CHIP_CYAN_SKILLFISH:
- pm->pmf = &kfd_v9_pm_funcs;
- break;
- case CHIP_ALDEBARAN:
- pm->pmf = &kfd_aldebaran_pm_funcs;
- break;
default:
- WARN(1, "Unexpected ASIC family %u",
- dqm->dev->device_info->asic_family);
- return -EINVAL;
+ if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2) ||
+ KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 5, 0))
+ pm->pmf = &kfd_aldebaran_pm_funcs;
+ else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1))
+ pm->pmf = &kfd_v9_pm_funcs;
+ else {
+ WARN(1, "Unexpected ASIC family %u",
+ dqm->dev->adev->asic_type);
+ return -EINVAL;
+ }
}
pm->dqm = dqm;
@@ -275,16 +318,18 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
return 0;
}
-void pm_uninit(struct packet_manager *pm, bool hanging)
+void pm_uninit(struct packet_manager *pm)
{
mutex_destroy(&pm->lock);
- kernel_queue_uninit(pm->priv_queue, hanging);
+ kernel_queue_uninit(pm->priv_queue);
pm->priv_queue = NULL;
}
int pm_send_set_resources(struct packet_manager *pm,
struct scheduling_resources *res)
{
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
uint32_t *buffer, size;
int retval = 0;
@@ -294,14 +339,14 @@ int pm_send_set_resources(struct packet_manager *pm,
size / sizeof(uint32_t),
(unsigned int **)&buffer);
if (!buffer) {
- pr_err("Failed to allocate buffer on kernel queue\n");
+ dev_err(dev, "Failed to allocate buffer on kernel queue\n");
retval = -ENOMEM;
goto out;
}
retval = pm->pmf->set_resources(pm, buffer, res);
if (!retval)
- kq_submit_packet(pm->priv_queue);
+ retval = kq_submit_packet(pm->priv_queue);
else
kq_rollback_packet(pm->priv_queue);
@@ -338,7 +383,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
if (retval)
goto fail_create_runlist;
- kq_submit_packet(pm->priv_queue);
+ retval = kq_submit_packet(pm->priv_queue);
mutex_unlock(&pm->lock);
@@ -356,6 +401,8 @@ fail_create_runlist_ib:
int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
uint64_t fence_value)
{
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
uint32_t *buffer, size;
int retval = 0;
@@ -367,14 +414,14 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
kq_acquire_packet_buffer(pm->priv_queue,
size / sizeof(uint32_t), (unsigned int **)&buffer);
if (!buffer) {
- pr_err("Failed to allocate buffer on kernel queue\n");
+ dev_err(dev, "Failed to allocate buffer on kernel queue\n");
retval = -ENOMEM;
goto out;
}
retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value);
if (!retval)
- kq_submit_packet(pm->priv_queue);
+ retval = kq_submit_packet(pm->priv_queue);
else
kq_rollback_packet(pm->priv_queue);
@@ -383,11 +430,71 @@ out:
return retval;
}
-int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
+/* pm_config_dequeue_wait_counts: Configure dequeue timer Wait Counts
+ * by writing to CP_IQ_WAIT_TIME2 registers.
+ *
+ * @cmd: See emum kfd_config_dequeue_wait_counts_cmd definition
+ * @value: Depends on the cmd. This parameter is unused for
+ * KFD_DEQUEUE_WAIT_INIT and KFD_DEQUEUE_WAIT_RESET. For
+ * KFD_DEQUEUE_WAIT_SET_SCH_WAVE it holds value to be set
+ *
+ */
+int pm_config_dequeue_wait_counts(struct packet_manager *pm,
+ enum kfd_config_dequeue_wait_counts_cmd cmd,
+ uint32_t value)
+{
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
+ int retval = 0;
+ uint32_t *buffer, size;
+
+ if (!pm->pmf->config_dequeue_wait_counts ||
+ !pm->pmf->config_dequeue_wait_counts_size)
+ return 0;
+
+ if (cmd == KFD_DEQUEUE_WAIT_INIT && (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) ||
+ KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0)))
+ return 0;
+
+ size = pm->pmf->config_dequeue_wait_counts_size;
+
+ mutex_lock(&pm->lock);
+
+ if (size) {
+ kq_acquire_packet_buffer(pm->priv_queue,
+ size / sizeof(uint32_t),
+ (unsigned int **)&buffer);
+
+ if (!buffer) {
+ dev_err(dev,
+ "Failed to allocate buffer on kernel queue\n");
+ retval = -ENOMEM;
+ goto out;
+ }
+
+ retval = pm->pmf->config_dequeue_wait_counts(pm, buffer,
+ cmd, value);
+ if (!retval) {
+ retval = kq_submit_packet(pm->priv_queue);
+
+ /* If default value is modified, cache that in dqm->wait_times */
+ if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT)
+ update_dqm_wait_times(pm->dqm);
+ } else {
+ kq_rollback_packet(pm->priv_queue);
+ }
+ }
+out:
+ mutex_unlock(&pm->lock);
+ return retval;
+}
+
+int pm_send_unmap_queue(struct packet_manager *pm,
enum kfd_unmap_queues_filter filter,
- uint32_t filter_param, bool reset,
- unsigned int sdma_engine)
+ uint32_t filter_param, bool reset)
{
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
uint32_t *buffer, size;
int retval = 0;
@@ -396,15 +503,14 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
kq_acquire_packet_buffer(pm->priv_queue,
size / sizeof(uint32_t), (unsigned int **)&buffer);
if (!buffer) {
- pr_err("Failed to allocate buffer on kernel queue\n");
+ dev_err(dev, "Failed to allocate buffer on kernel queue\n");
retval = -ENOMEM;
goto out;
}
- retval = pm->pmf->unmap_queues(pm, buffer, type, filter, filter_param,
- reset, sdma_engine);
+ retval = pm->pmf->unmap_queues(pm, buffer, filter, filter_param, reset);
if (!retval)
- kq_submit_packet(pm->priv_queue);
+ retval = kq_submit_packet(pm->priv_queue);
else
kq_rollback_packet(pm->priv_queue);
@@ -446,6 +552,8 @@ out:
int pm_debugfs_hang_hws(struct packet_manager *pm)
{
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
uint32_t *buffer, size;
int r = 0;
@@ -457,16 +565,16 @@ int pm_debugfs_hang_hws(struct packet_manager *pm)
kq_acquire_packet_buffer(pm->priv_queue,
size / sizeof(uint32_t), (unsigned int **)&buffer);
if (!buffer) {
- pr_err("Failed to allocate buffer on kernel queue\n");
+ dev_err(dev, "Failed to allocate buffer on kernel queue\n");
r = -ENOMEM;
goto out;
}
memset(buffer, 0x55, size);
kq_submit_packet(pm->priv_queue);
- pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
- buffer[0], buffer[1], buffer[2], buffer[3],
- buffer[4], buffer[5], buffer[6]);
+ dev_info(dev, "Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
+ buffer[0], buffer[1], buffer[2], buffer[3], buffer[4],
+ buffer[5], buffer[6]);
out:
mutex_unlock(&pm->lock);
return r;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
index 7ea3f671b325..505036968a77 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ * Copyright 2016-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -33,14 +34,20 @@ static int pm_map_process_v9(struct packet_manager *pm,
{
struct pm4_mes_map_process *packet;
uint64_t vm_page_table_base_addr = qpd->page_table_base;
+ struct kfd_node *kfd = pm->dqm->dev;
+ struct kfd_process_device *pdd =
+ container_of(qpd, struct kfd_process_device, qpd);
+ struct amdgpu_device *adev = kfd->adev;
packet = (struct pm4_mes_map_process *)buffer;
memset(buffer, 0, sizeof(struct pm4_mes_map_process));
packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
sizeof(struct pm4_mes_map_process));
+ if (adev->enforce_isolation[kfd->node_id] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
+ packet->bitfields2.exec_cleaner_shader = 1;
packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
packet->bitfields2.process_quantum = 10;
- packet->bitfields2.pasid = qpd->pqm->process->pasid;
+ packet->bitfields2.pasid = pdd->pasid;
packet->bitfields14.gds_size = qpd->gds_size & 0x3F;
packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF;
packet->bitfields14.num_gws = (qpd->mapped_gws_queue) ? qpd->num_gws : 0;
@@ -48,6 +55,12 @@ static int pm_map_process_v9(struct packet_manager *pm,
packet->bitfields14.sdma_enable = 1;
packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
+ if (kfd->dqm->trap_debug_vmid && pdd->process->debug_trap_enabled &&
+ pdd->process->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED) {
+ packet->bitfields2.debug_vmid = kfd->dqm->trap_debug_vmid;
+ packet->bitfields2.new_debug = 1;
+ }
+
packet->sh_mem_config = qpd->sh_mem_config;
packet->sh_mem_bases = qpd->sh_mem_bases;
if (qpd->tba_addr) {
@@ -78,25 +91,45 @@ static int pm_map_process_aldebaran(struct packet_manager *pm,
{
struct pm4_mes_map_process_aldebaran *packet;
uint64_t vm_page_table_base_addr = qpd->page_table_base;
+ struct kfd_dev *kfd = pm->dqm->dev->kfd;
+ struct kfd_node *knode = pm->dqm->dev;
+ struct kfd_process_device *pdd =
+ container_of(qpd, struct kfd_process_device, qpd);
+ int i;
+ struct amdgpu_device *adev = kfd->adev;
packet = (struct pm4_mes_map_process_aldebaran *)buffer;
memset(buffer, 0, sizeof(struct pm4_mes_map_process_aldebaran));
packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
sizeof(struct pm4_mes_map_process_aldebaran));
+ if (adev->enforce_isolation[knode->node_id] ==
+ AMDGPU_ENFORCE_ISOLATION_ENABLE)
+ packet->bitfields2.exec_cleaner_shader = 1;
packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
packet->bitfields2.process_quantum = 10;
- packet->bitfields2.pasid = qpd->pqm->process->pasid;
+ packet->bitfields2.pasid = pdd->pasid;
packet->bitfields14.gds_size = qpd->gds_size & 0x3F;
packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF;
packet->bitfields14.num_gws = (qpd->mapped_gws_queue) ? qpd->num_gws : 0;
packet->bitfields14.num_oac = qpd->num_oac;
packet->bitfields14.sdma_enable = 1;
packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
+ packet->spi_gdbg_per_vmid_cntl = pdd->spi_dbg_override |
+ pdd->spi_dbg_launch_mode;
+
+ if (pdd->process->debug_trap_enabled) {
+ for (i = 0; i < kfd->device_info.num_of_watch_points; i++)
+ packet->tcp_watch_cntl[i] = pdd->watch_points[i];
+
+ packet->bitfields2.single_memops =
+ !!(pdd->process->dbg_flags & KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP);
+ }
packet->sh_mem_config = qpd->sh_mem_config;
packet->sh_mem_bases = qpd->sh_mem_bases;
if (qpd->tba_addr) {
packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
+ packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8);
packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
}
@@ -118,19 +151,24 @@ static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer,
struct pm4_mes_runlist *packet;
int concurrent_proc_cnt = 0;
- struct kfd_dev *kfd = pm->dqm->dev;
+ struct kfd_node *kfd = pm->dqm->dev;
+ struct amdgpu_device *adev = kfd->adev;
/* Determine the number of processes to map together to HW:
* it can not exceed the number of VMIDs available to the
* scheduler, and it is determined by the smaller of the number
* of processes in the runlist and kfd module parameter
* hws_max_conc_proc.
+ * However, if enforce_isolation is set (toggle LDS/VGPRs/SGPRs
+ * cleaner between process switch), enable single-process mode
+ * in HWS.
* Note: the arbitration between the number of VMIDs and
* hws_max_conc_proc has been done in
* kgd2kfd_device_init().
*/
- concurrent_proc_cnt = min(pm->dqm->processes_count,
- kfd->max_proc_per_quantum);
+ concurrent_proc_cnt = (adev->enforce_isolation[kfd->node_id] ==
+ AMDGPU_ENFORCE_ISOLATION_ENABLE) ?
+ 1 : min(pm->dqm->processes_count, kfd->max_proc_per_quantum);
packet = (struct pm4_mes_runlist *)buffer;
@@ -165,6 +203,8 @@ static int pm_set_resources_v9(struct packet_manager *pm, uint32_t *buffer,
queue_type__mes_set_resources__hsa_interface_queue_hiq;
packet->bitfields2.vmid_mask = res->vmid_mask;
packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100;
+ if (pm->dqm->dev->adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN)
+ packet->bitfields2.enb_xnack_retry_disable_check = 1;
packet->bitfields7.oac_mask = res->oac_mask;
packet->bitfields8.gds_heap_base = res->gds_heap_base;
packet->bitfields8.gds_heap_size = res->gds_heap_size;
@@ -178,11 +218,16 @@ static int pm_set_resources_v9(struct packet_manager *pm, uint32_t *buffer,
return 0;
}
+static inline bool pm_use_ext_eng(struct kfd_dev *dev)
+{
+ return amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) >=
+ IP_VERSION(5, 2, 0);
+}
+
static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
struct queue *q, bool is_static)
{
struct pm4_mes_map_queues *packet;
- bool use_static = is_static;
packet = (struct pm4_mes_map_queues *)buffer;
memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
@@ -195,7 +240,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
packet->bitfields2.engine_sel =
engine_sel__mes_map_queues__compute_vi;
- packet->bitfields2.gws_control_queue = q->gws ? 1 : 0;
+ packet->bitfields2.gws_control_queue = q->properties.is_gws ? 1 : 0;
packet->bitfields2.extended_engine_sel =
extended_engine_sel__mes_map_queues__legacy_engine_sel;
packet->bitfields2.queue_type =
@@ -203,7 +248,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
switch (q->properties.type) {
case KFD_QUEUE_TYPE_COMPUTE:
- if (use_static)
+ if (is_static)
packet->bitfields2.queue_type =
queue_type__mes_map_queues__normal_latency_static_queue_vi;
break;
@@ -213,14 +258,24 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
break;
case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_SDMA_XGMI:
- use_static = false; /* no static queues under SDMA */
- if (q->properties.sdma_engine_id < 2)
+ if (q->properties.sdma_engine_id < 2 &&
+ !pm_use_ext_eng(q->device->kfd))
packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
engine_sel__mes_map_queues__sdma0_vi;
else {
- packet->bitfields2.extended_engine_sel =
- extended_engine_sel__mes_map_queues__sdma0_to_7_sel;
- packet->bitfields2.engine_sel = q->properties.sdma_engine_id;
+ /*
+ * For GFX9.4.3, SDMA engine id can be greater than 8.
+ * For such cases, set extended_engine_sel to 2 and
+ * ensure engine_sel lies between 0-7.
+ */
+ if (q->properties.sdma_engine_id >= 8)
+ packet->bitfields2.extended_engine_sel =
+ extended_engine_sel__mes_map_queues__sdma8_to_15_sel;
+ else
+ packet->bitfields2.extended_engine_sel =
+ extended_engine_sel__mes_map_queues__sdma0_to_7_sel;
+
+ packet->bitfields2.engine_sel = q->properties.sdma_engine_id % 8;
}
break;
default:
@@ -245,11 +300,100 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
return 0;
}
+static inline void pm_build_dequeue_wait_counts_packet_info(struct packet_manager *pm,
+ uint32_t sch_value, uint32_t que_sleep, uint32_t *reg_offset,
+ uint32_t *reg_data)
+{
+ pm->dqm->dev->kfd2kgd->build_dequeue_wait_counts_packet_info(
+ pm->dqm->dev->adev,
+ pm->dqm->wait_times,
+ sch_value,
+ que_sleep,
+ reg_offset,
+ reg_data);
+}
+
+/* pm_config_dequeue_wait_counts_v9: Builds WRITE_DATA packet with
+ * register/value for configuring dequeue wait counts
+ *
+ * @return: -ve for failure and 0 for success and buffer is
+ * filled in with packet
+ *
+ **/
+static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm,
+ uint32_t *buffer,
+ enum kfd_config_dequeue_wait_counts_cmd cmd,
+ uint32_t value)
+{
+ struct pm4_mec_write_data_mmio *packet;
+ uint32_t reg_offset = 0;
+ uint32_t reg_data = 0;
+
+ switch (cmd) {
+ case KFD_DEQUEUE_WAIT_INIT: {
+ uint32_t sch_wave = 0, que_sleep = 1;
+
+ /* For all gfx9 ASICs > gfx941,
+ * Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default 0x40.
+ * On a 1GHz machine this is roughly 1 microsecond, which is
+ * about how long it takes to load data out of memory during
+ * queue connect
+ * QUE_SLEEP: Wait Count for Dequeue Retry.
+ *
+ * Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU
+ */
+ if (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) ||
+ KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0))
+ return -EPERM;
+
+ if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu &&
+ (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3)))
+ sch_wave = 1;
+
+ pm_build_dequeue_wait_counts_packet_info(pm, sch_wave, que_sleep,
+ &reg_offset, &reg_data);
+
+ break;
+ }
+ case KFD_DEQUEUE_WAIT_RESET:
+ /* reg_data would be set to dqm->wait_times */
+ pm_build_dequeue_wait_counts_packet_info(pm, 0, 0, &reg_offset, &reg_data);
+ break;
+
+ case KFD_DEQUEUE_WAIT_SET_SCH_WAVE:
+ /* The CP cannot handle value 0 and it will result in
+ * an infinite grace period being set so set to 1 to prevent this. Also
+ * avoid debugger API breakage as it sets 0 and expects a low value.
+ */
+ if (!value)
+ value = 1;
+ pm_build_dequeue_wait_counts_packet_info(pm, value, 0, &reg_offset, &reg_data);
+ break;
+ default:
+ pr_err("Invalid dequeue wait cmd\n");
+ return -EINVAL;
+ }
+
+ packet = (struct pm4_mec_write_data_mmio *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mec_write_data_mmio));
+
+ packet->header.u32All = pm_build_pm4_header(IT_WRITE_DATA,
+ sizeof(struct pm4_mec_write_data_mmio));
+
+ packet->bitfields2.dst_sel = dst_sel___write_data__mem_mapped_register;
+ packet->bitfields2.addr_incr =
+ addr_incr___write_data__do_not_increment_address;
+
+ packet->bitfields3.dst_mmreg_addr = reg_offset;
+
+ packet->data = reg_data;
+
+ return 0;
+}
+
static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
- enum kfd_queue_type type,
enum kfd_unmap_queues_filter filter,
- uint32_t filter_param, bool reset,
- unsigned int sdma_engine)
+ uint32_t filter_param, bool reset)
{
struct pm4_mes_unmap_queues *packet;
@@ -258,31 +402,14 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
sizeof(struct pm4_mes_unmap_queues));
- switch (type) {
- case KFD_QUEUE_TYPE_COMPUTE:
- case KFD_QUEUE_TYPE_DIQ:
- packet->bitfields2.extended_engine_sel =
- extended_engine_sel__mes_unmap_queues__legacy_engine_sel;
- packet->bitfields2.engine_sel =
- engine_sel__mes_unmap_queues__compute;
- break;
- case KFD_QUEUE_TYPE_SDMA:
- case KFD_QUEUE_TYPE_SDMA_XGMI:
- if (sdma_engine < 2) {
- packet->bitfields2.extended_engine_sel =
- extended_engine_sel__mes_unmap_queues__legacy_engine_sel;
- packet->bitfields2.engine_sel =
- engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
- } else {
- packet->bitfields2.extended_engine_sel =
- extended_engine_sel__mes_unmap_queues__sdma0_to_7_sel;
- packet->bitfields2.engine_sel = sdma_engine;
- }
- break;
- default:
- WARN(1, "queue type %d", type);
- return -EINVAL;
- }
+
+ packet->bitfields2.extended_engine_sel =
+ pm_use_ext_eng(pm->dqm->dev->kfd) ?
+ extended_engine_sel__mes_unmap_queues__sdma0_to_7_sel :
+ extended_engine_sel__mes_unmap_queues__legacy_engine_sel;
+
+ packet->bitfields2.engine_sel =
+ engine_sel__mes_unmap_queues__compute;
if (reset)
packet->bitfields2.action =
@@ -292,12 +419,6 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
action__mes_unmap_queues__preempt_queues;
switch (filter) {
- case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
- packet->bitfields2.queue_sel =
- queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
- packet->bitfields2.num_queues = 1;
- packet->bitfields3b.doorbell_offset0 = filter_param;
- break;
case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
packet->bitfields2.queue_sel =
queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
@@ -353,6 +474,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
.set_resources = pm_set_resources_v9,
.map_queues = pm_map_queues_v9,
.unmap_queues = pm_unmap_queues_v9,
+ .config_dequeue_wait_counts = pm_config_dequeue_wait_counts_v9,
.query_status = pm_query_status_v9,
.release_mem = NULL,
.map_process_size = sizeof(struct pm4_mes_map_process),
@@ -360,6 +482,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
.set_resources_size = sizeof(struct pm4_mes_set_resources),
.map_queues_size = sizeof(struct pm4_mes_map_queues),
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
+ .config_dequeue_wait_counts_size = sizeof(struct pm4_mec_write_data_mmio),
.query_status_size = sizeof(struct pm4_mes_query_status),
.release_mem_size = 0,
};
@@ -370,6 +493,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = {
.set_resources = pm_set_resources_v9,
.map_queues = pm_map_queues_v9,
.unmap_queues = pm_unmap_queues_v9,
+ .config_dequeue_wait_counts = pm_config_dequeue_wait_counts_v9,
.query_status = pm_query_status_v9,
.release_mem = NULL,
.map_process_size = sizeof(struct pm4_mes_map_process_aldebaran),
@@ -377,6 +501,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = {
.set_resources_size = sizeof(struct pm4_mes_set_resources),
.map_queues_size = sizeof(struct pm4_mes_map_queues),
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
+ .config_dequeue_wait_counts_size = sizeof(struct pm4_mec_write_data_mmio),
.query_status_size = sizeof(struct pm4_mes_query_status),
.release_mem_size = 0,
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
index 08442e7d9944..a1de5d7e173a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -41,6 +42,7 @@ unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size)
static int pm_map_process_vi(struct packet_manager *pm, uint32_t *buffer,
struct qcm_process_device *qpd)
{
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
struct pm4_mes_map_process *packet;
packet = (struct pm4_mes_map_process *)buffer;
@@ -51,7 +53,7 @@ static int pm_map_process_vi(struct packet_manager *pm, uint32_t *buffer,
sizeof(struct pm4_mes_map_process));
packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
packet->bitfields2.process_quantum = 10;
- packet->bitfields2.pasid = qpd->pqm->process->pasid;
+ packet->bitfields2.pasid = pdd->pasid;
packet->bitfields3.page_table_base = qpd->page_table_base;
packet->bitfields10.gds_size = qpd->gds_size;
packet->bitfields10.num_gws = qpd->num_gws;
@@ -76,7 +78,7 @@ static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer,
{
struct pm4_mes_runlist *packet;
int concurrent_proc_cnt = 0;
- struct kfd_dev *kfd = pm->dqm->dev;
+ struct kfd_node *kfd = pm->dqm->dev;
if (WARN_ON(!ib))
return -EFAULT;
@@ -110,8 +112,8 @@ static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer,
return 0;
}
-int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
- struct scheduling_resources *res)
+static int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
+ struct scheduling_resources *res)
{
struct pm4_mes_set_resources *packet;
@@ -197,10 +199,8 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
}
static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
- enum kfd_queue_type type,
enum kfd_unmap_queues_filter filter,
- uint32_t filter_param, bool reset,
- unsigned int sdma_engine)
+ uint32_t filter_param, bool reset)
{
struct pm4_mes_unmap_queues *packet;
@@ -209,21 +209,9 @@ static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
sizeof(struct pm4_mes_unmap_queues));
- switch (type) {
- case KFD_QUEUE_TYPE_COMPUTE:
- case KFD_QUEUE_TYPE_DIQ:
- packet->bitfields2.engine_sel =
+
+ packet->bitfields2.engine_sel =
engine_sel__mes_unmap_queues__compute;
- break;
- case KFD_QUEUE_TYPE_SDMA:
- case KFD_QUEUE_TYPE_SDMA_XGMI:
- packet->bitfields2.engine_sel =
- engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
- break;
- default:
- WARN(1, "queue type %d", type);
- return -EINVAL;
- }
if (reset)
packet->bitfields2.action =
@@ -233,12 +221,6 @@ static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
action__mes_unmap_queues__preempt_queues;
switch (filter) {
- case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
- packet->bitfields2.queue_sel =
- queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
- packet->bitfields2.num_queues = 1;
- packet->bitfields3b.doorbell_offset0 = filter_param;
- break;
case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
packet->bitfields2.queue_sel =
queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
@@ -322,6 +304,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = {
.set_resources = pm_set_resources_vi,
.map_queues = pm_map_queues_vi,
.unmap_queues = pm_unmap_queues_vi,
+ .config_dequeue_wait_counts = NULL,
.query_status = pm_query_status_vi,
.release_mem = pm_release_mem_vi,
.map_process_size = sizeof(struct pm4_mes_map_process),
@@ -329,6 +312,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = {
.set_resources_size = sizeof(struct pm4_mes_set_resources),
.map_queues_size = sizeof(struct pm4_mes_map_queues),
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
+ .config_dequeue_wait_counts_size = 0,
.query_status_size = sizeof(struct pm4_mes_query_status),
.release_mem_size = sizeof(struct pm4_mec_release_mem)
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
deleted file mode 100644
index af5816f51e55..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <linux/types.h>
-#include "kfd_priv.h"
-#include "amdgpu_ids.h"
-
-static unsigned int pasid_bits = 16;
-static bool pasids_allocated; /* = false */
-
-bool kfd_set_pasid_limit(unsigned int new_limit)
-{
- if (new_limit < 2)
- return false;
-
- if (new_limit < (1U << pasid_bits)) {
- if (pasids_allocated)
- /* We've already allocated user PASIDs, too late to
- * change the limit
- */
- return false;
-
- while (new_limit < (1U << pasid_bits))
- pasid_bits--;
- }
-
- return true;
-}
-
-unsigned int kfd_get_pasid_limit(void)
-{
- return 1U << pasid_bits;
-}
-
-u32 kfd_pasid_alloc(void)
-{
- int r = amdgpu_pasid_alloc(pasid_bits);
-
- if (r > 0) {
- pasids_allocated = true;
- return r;
- }
-
- return 0;
-}
-
-void kfd_pasid_free(u32 pasid)
-{
- amdgpu_pasid_free(pasid);
-}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
index e50f73d25de6..7274edfd3f38 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
index 4d7add843746..e356a207d03c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2016 Advanced Micro Devices, Inc.
+ * Copyright 2016-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -62,7 +63,8 @@ struct pm4_mes_set_resources {
struct {
uint32_t vmid_mask:16;
uint32_t unmap_latency:8;
- uint32_t reserved1:5;
+ uint32_t reserved1:4;
+ uint32_t enb_xnack_retry_disable_check:1;
enum mes_set_resources_queue_type_enum queue_type:3;
} bitfields2;
uint32_t ordinal2;
@@ -144,8 +146,12 @@ struct pm4_mes_map_process {
union {
struct {
- uint32_t pasid:16;
- uint32_t reserved1:8;
+ uint32_t pasid:16; /* 0 - 15 */
+ uint32_t reserved1:1; /* 16 */
+ uint32_t exec_cleaner_shader:1; /* 17 */
+ uint32_t debug_vmid:4;
+ uint32_t new_debug:1;
+ uint32_t reserved2:1;
uint32_t diq_enable:1;
uint32_t process_quantum:7;
} bitfields2;
@@ -262,7 +268,8 @@ enum mes_map_queues_engine_sel_enum {
enum mes_map_queues_extended_engine_sel_enum {
extended_engine_sel__mes_map_queues__legacy_engine_sel = 0,
- extended_engine_sel__mes_map_queues__sdma0_to_7_sel = 1
+ extended_engine_sel__mes_map_queues__sdma0_to_7_sel = 1,
+ extended_engine_sel__mes_map_queues__sdma8_to_15_sel = 2
};
struct pm4_mes_map_queues {
@@ -582,6 +589,71 @@ struct pm4_mec_release_mem {
#endif
+#ifndef PM4_MEC_WRITE_DATA_DEFINED
+#define PM4_MEC_WRITE_DATA_DEFINED
+
+enum WRITE_DATA_dst_sel_enum {
+ dst_sel___write_data__mem_mapped_register = 0,
+ dst_sel___write_data__tc_l2 = 2,
+ dst_sel___write_data__gds = 3,
+ dst_sel___write_data__memory = 5,
+ dst_sel___write_data__memory_mapped_adc_persistent_state = 6,
+};
+
+enum WRITE_DATA_addr_incr_enum {
+ addr_incr___write_data__increment_address = 0,
+ addr_incr___write_data__do_not_increment_address = 1
+};
+
+enum WRITE_DATA_wr_confirm_enum {
+ wr_confirm___write_data__do_not_wait_for_write_confirmation = 0,
+ wr_confirm___write_data__wait_for_write_confirmation = 1
+};
+
+enum WRITE_DATA_cache_policy_enum {
+ cache_policy___write_data__lru = 0,
+ cache_policy___write_data__stream = 1
+};
+
+
+struct pm4_mec_write_data_mmio {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /*header */
+ unsigned int ordinal1;
+ };
+
+ union {
+ struct {
+ unsigned int reserved1:8;
+ unsigned int dst_sel:4;
+ unsigned int reserved2:4;
+ unsigned int addr_incr:1;
+ unsigned int reserved3:2;
+ unsigned int resume_vf:1;
+ unsigned int wr_confirm:1;
+ unsigned int reserved4:4;
+ unsigned int cache_policy:2;
+ unsigned int reserved5:5;
+ } bitfields2;
+ unsigned int ordinal2;
+ };
+
+ union {
+ struct {
+ unsigned int dst_mmreg_addr:18;
+ unsigned int reserved6:14;
+ } bitfields3;
+ unsigned int ordinal3;
+ };
+
+ uint32_t reserved7;
+
+ uint32_t data;
+
+};
+
+#endif
+
enum {
CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h
index f795ec815e2a..e0ed62c4ade0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2020 Advanced Micro Devices, Inc.
+ * Copyright 2020-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -36,7 +37,7 @@ struct pm4_mes_map_process_aldebaran {
struct {
uint32_t pasid:16; /* 0 - 15 */
uint32_t single_memops:1; /* 16 */
- uint32_t reserved1:1; /* 17 */
+ uint32_t exec_cleaner_shader:1; /* 17 */
uint32_t debug_vmid:4; /* 18 - 21 */
uint32_t new_debug:1; /* 22 */
uint32_t tmz:1; /* 23 */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h
deleted file mode 100644
index a0ff34878163..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef KFD_PM4_HEADERS_DIQ_H_
-#define KFD_PM4_HEADERS_DIQ_H_
-
-/*--------------------_INDIRECT_BUFFER-------------------- */
-
-#ifndef _PM4__INDIRECT_BUFFER_DEFINED
-#define _PM4__INDIRECT_BUFFER_DEFINED
-enum _INDIRECT_BUFFER_cache_policy_enum {
- cache_policy___indirect_buffer__lru = 0,
- cache_policy___indirect_buffer__stream = 1,
- cache_policy___indirect_buffer__bypass = 2
-};
-
-enum {
- IT_INDIRECT_BUFFER_PASID = 0x5C
-};
-
-struct pm4__indirect_buffer_pasid {
- union {
- union PM4_MES_TYPE_3_HEADER header; /* header */
- unsigned int ordinal1;
- };
-
- union {
- struct {
- unsigned int reserved1:2;
- unsigned int ib_base_lo:30;
- } bitfields2;
- unsigned int ordinal2;
- };
-
- union {
- struct {
- unsigned int ib_base_hi:16;
- unsigned int reserved2:16;
- } bitfields3;
- unsigned int ordinal3;
- };
-
- union {
- unsigned int control;
- unsigned int ordinal4;
- };
-
- union {
- struct {
- unsigned int pasid:10;
- unsigned int reserved4:22;
- } bitfields5;
- unsigned int ordinal5;
- };
-
-};
-
-#endif
-
-/*--------------------_RELEASE_MEM-------------------- */
-
-#ifndef _PM4__RELEASE_MEM_DEFINED
-#define _PM4__RELEASE_MEM_DEFINED
-enum _RELEASE_MEM_event_index_enum {
- event_index___release_mem__end_of_pipe = 5,
- event_index___release_mem__shader_done = 6
-};
-
-enum _RELEASE_MEM_cache_policy_enum {
- cache_policy___release_mem__lru = 0,
- cache_policy___release_mem__stream = 1,
- cache_policy___release_mem__bypass = 2
-};
-
-enum _RELEASE_MEM_dst_sel_enum {
- dst_sel___release_mem__memory_controller = 0,
- dst_sel___release_mem__tc_l2 = 1,
- dst_sel___release_mem__queue_write_pointer_register = 2,
- dst_sel___release_mem__queue_write_pointer_poll_mask_bit = 3
-};
-
-enum _RELEASE_MEM_int_sel_enum {
- int_sel___release_mem__none = 0,
- int_sel___release_mem__send_interrupt_only = 1,
- int_sel___release_mem__send_interrupt_after_write_confirm = 2,
- int_sel___release_mem__send_data_after_write_confirm = 3
-};
-
-enum _RELEASE_MEM_data_sel_enum {
- data_sel___release_mem__none = 0,
- data_sel___release_mem__send_32_bit_low = 1,
- data_sel___release_mem__send_64_bit_data = 2,
- data_sel___release_mem__send_gpu_clock_counter = 3,
- data_sel___release_mem__send_cp_perfcounter_hi_lo = 4,
- data_sel___release_mem__store_gds_data_to_memory = 5
-};
-
-struct pm4__release_mem {
- union {
- union PM4_MES_TYPE_3_HEADER header; /*header */
- unsigned int ordinal1;
- };
-
- union {
- struct {
- unsigned int event_type:6;
- unsigned int reserved1:2;
- enum _RELEASE_MEM_event_index_enum event_index:4;
- unsigned int tcl1_vol_action_ena:1;
- unsigned int tc_vol_action_ena:1;
- unsigned int reserved2:1;
- unsigned int tc_wb_action_ena:1;
- unsigned int tcl1_action_ena:1;
- unsigned int tc_action_ena:1;
- unsigned int reserved3:6;
- unsigned int atc:1;
- enum _RELEASE_MEM_cache_policy_enum cache_policy:2;
- unsigned int reserved4:5;
- } bitfields2;
- unsigned int ordinal2;
- };
-
- union {
- struct {
- unsigned int reserved5:16;
- enum _RELEASE_MEM_dst_sel_enum dst_sel:2;
- unsigned int reserved6:6;
- enum _RELEASE_MEM_int_sel_enum int_sel:3;
- unsigned int reserved7:2;
- enum _RELEASE_MEM_data_sel_enum data_sel:3;
- } bitfields3;
- unsigned int ordinal3;
- };
-
- union {
- struct {
- unsigned int reserved8:2;
- unsigned int address_lo_32b:30;
- } bitfields4;
- struct {
- unsigned int reserved9:3;
- unsigned int address_lo_64b:29;
- } bitfields5;
- unsigned int ordinal4;
- };
-
- unsigned int address_hi;
-
- unsigned int data_lo;
-
- unsigned int data_hi;
-
-};
-#endif
-
-
-/*--------------------_SET_CONFIG_REG-------------------- */
-
-#ifndef _PM4__SET_CONFIG_REG_DEFINED
-#define _PM4__SET_CONFIG_REG_DEFINED
-
-struct pm4__set_config_reg {
- union {
- union PM4_MES_TYPE_3_HEADER header; /*header */
- unsigned int ordinal1;
- };
-
- union {
- struct {
- unsigned int reg_offset:16;
- unsigned int reserved1:7;
- unsigned int vmid_shift:5;
- unsigned int insert_vmid:1;
- unsigned int reserved2:3;
- } bitfields2;
- unsigned int ordinal2;
- };
-
- unsigned int reg_data[1]; /*1..N of these fields */
-
-};
-#endif
-
-/*--------------------_WAIT_REG_MEM-------------------- */
-
-#ifndef _PM4__WAIT_REG_MEM_DEFINED
-#define _PM4__WAIT_REG_MEM_DEFINED
-enum _WAIT_REG_MEM_function_enum {
- function___wait_reg_mem__always_pass = 0,
- function___wait_reg_mem__less_than_ref_value = 1,
- function___wait_reg_mem__less_than_equal_to_the_ref_value = 2,
- function___wait_reg_mem__equal_to_the_reference_value = 3,
- function___wait_reg_mem__not_equal_reference_value = 4,
- function___wait_reg_mem__greater_than_or_equal_reference_value = 5,
- function___wait_reg_mem__greater_than_reference_value = 6,
- function___wait_reg_mem__reserved = 7
-};
-
-enum _WAIT_REG_MEM_mem_space_enum {
- mem_space___wait_reg_mem__register_space = 0,
- mem_space___wait_reg_mem__memory_space = 1
-};
-
-enum _WAIT_REG_MEM_operation_enum {
- operation___wait_reg_mem__wait_reg_mem = 0,
- operation___wait_reg_mem__wr_wait_wr_reg = 1
-};
-
-struct pm4__wait_reg_mem {
- union {
- union PM4_MES_TYPE_3_HEADER header; /*header */
- unsigned int ordinal1;
- };
-
- union {
- struct {
- enum _WAIT_REG_MEM_function_enum function:3;
- unsigned int reserved1:1;
- enum _WAIT_REG_MEM_mem_space_enum mem_space:2;
- enum _WAIT_REG_MEM_operation_enum operation:2;
- unsigned int reserved2:24;
- } bitfields2;
- unsigned int ordinal2;
- };
-
- union {
- struct {
- unsigned int reserved3:2;
- unsigned int memory_poll_addr_lo:30;
- } bitfields3;
- struct {
- unsigned int register_poll_addr:16;
- unsigned int reserved4:16;
- } bitfields4;
- struct {
- unsigned int register_write_addr:16;
- unsigned int reserved5:16;
- } bitfields5;
- unsigned int ordinal3;
- };
-
- union {
- struct {
- unsigned int poll_address_hi:16;
- unsigned int reserved6:16;
- } bitfields6;
- struct {
- unsigned int register_write_addr:16;
- unsigned int reserved7:16;
- } bitfields7;
- unsigned int ordinal4;
- };
-
- unsigned int reference;
-
- unsigned int mask;
-
- union {
- struct {
- unsigned int poll_interval:16;
- unsigned int reserved8:16;
- } bitfields8;
- unsigned int ordinal7;
- };
-
-};
-#endif
-
-
-#endif /* KFD_PM4_HEADERS_DIQ_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
index 5466cfe1c3cc..8147395c083b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -101,8 +102,8 @@ struct pm4_mes_set_resources {
struct pm4_mes_runlist {
union {
- union PM4_MES_TYPE_3_HEADER header; /* header */
- uint32_t ordinal1;
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
};
union {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h
index b72fa3b8c2d4..5bfd0f9cbe23 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 8fd48d0ed240..70ef051511bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -25,12 +26,13 @@
#include <linux/hashtable.h>
#include <linux/mmu_notifier.h>
+#include <linux/memremap.h>
#include <linux/mutex.h>
#include <linux/types.h>
#include <linux/atomic.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
-#include <linux/kfd_ioctl.h>
+#include <uapi/linux/kfd_ioctl.h>
#include <linux/idr.h>
#include <linux/kfifo.h>
#include <linux/seq_file.h>
@@ -97,11 +99,11 @@
/*
* Size of the per-process TBA+TMA buffer: 2 pages
*
- * The first page is the TBA used for the CWSR ISA code. The second
- * page is used as TMA for user-mode trap handler setup in daisy-chain mode.
+ * The first chunk is the TBA used for the CWSR ISA code. The second
+ * chunk is used as TMA for user-mode trap handler setup in daisy-chain mode.
*/
#define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2)
-#define KFD_CWSR_TMA_OFFSET PAGE_SIZE
+#define KFD_CWSR_TMA_OFFSET (PAGE_SIZE + 2048)
#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE \
(KFD_MAX_NUM_OF_PROCESSES * \
@@ -109,7 +111,16 @@
#define KFD_KERNEL_QUEUE_SIZE 2048
-#define KFD_UNMAP_LATENCY_MS (4000)
+/* KFD_UNMAP_LATENCY_MS is the timeout CP waiting for SDMA preemption. One XCC
+ * can be associated to 2 SDMA engines. queue_preemption_timeout_ms is the time
+ * driver waiting for CP returning the UNMAP_QUEUE fence. Thus the math is
+ * queue_preemption_timeout_ms = sdma_preemption_time * 2 + cp workload
+ * The format here makes CP workload 10% of total timeout
+ */
+#define KFD_UNMAP_LATENCY_MS \
+ ((queue_preemption_timeout_ms - queue_preemption_timeout_ms / 10) >> 1)
+
+#define KFD_MAX_SDMA_QUEUES 128
/*
* 512 = 0x200
@@ -121,7 +132,26 @@
*/
#define KFD_QUEUE_DOORBELL_MIRROR_OFFSET 512
-
+/**
+ * enum kfd_ioctl_flags - KFD ioctl flags
+ * Various flags that can be set in &amdkfd_ioctl_desc.flags to control how
+ * userspace can use a given ioctl.
+ */
+enum kfd_ioctl_flags {
+ /*
+ * @KFD_IOC_FLAG_CHECKPOINT_RESTORE:
+ * Certain KFD ioctls such as AMDKFD_IOC_CRIU_OP can potentially
+ * perform privileged operations and load arbitrary data into MQDs and
+ * eventually HQD registers when the queue is mapped by HWS. In order to
+ * prevent this we should perform additional security checks.
+ *
+ * This is equivalent to callers with the CHECKPOINT_RESTORE capability.
+ *
+ * Note: Since earlier versions of docker do not support CHECKPOINT_RESTORE,
+ * we also allow ioctls with SYS_ADMIN capability.
+ */
+ KFD_IOC_FLAG_CHECKPOINT_RESTORE = BIT(0),
+};
/*
* Kernel module parameter to specify maximum number of supported queues per
* device
@@ -152,12 +182,6 @@ extern int send_sigterm;
*/
extern int debug_largebar;
-/*
- * Ignore CRAT table during KFD initialization, can be used to work around
- * broken CRAT tables on some AMD systems
- */
-extern int ignore_crat;
-
/* Set sh_mem_config.retry_disable on GFX v9 */
extern int amdgpu_noretry;
@@ -178,24 +202,32 @@ extern int amdgpu_no_queue_eviction_on_vm_fault;
/* Enable eviction debug messages */
extern bool debug_evictions;
+extern struct mutex kfd_processes_mutex;
+
enum cache_policy {
cache_policy_coherent,
cache_policy_noncoherent
};
-#define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10)
+#define KFD_GC_VERSION(dev) (amdgpu_ip_version((dev)->adev, GC_HWIP, 0))
+#define KFD_IS_SOC15(dev) ((KFD_GC_VERSION(dev)) >= (IP_VERSION(9, 0, 1)))
+#define KFD_SUPPORT_XNACK_PER_PROCESS(dev)\
+ ((KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2)) || \
+ (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)) || \
+ (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) || \
+ (KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)))
+
+struct kfd_node;
struct kfd_event_interrupt_class {
- bool (*interrupt_isr)(struct kfd_dev *dev,
+ bool (*interrupt_isr)(struct kfd_node *dev,
const uint32_t *ih_ring_entry, uint32_t *patched_ihre,
bool *patched_flag);
- void (*interrupt_wq)(struct kfd_dev *dev,
+ void (*interrupt_wq)(struct kfd_node *dev,
const uint32_t *ih_ring_entry);
};
struct kfd_device_info {
- enum amd_asic_type asic_family;
- const char *asic_name;
uint32_t gfx_target_version;
const struct kfd_event_interrupt_class *event_interrupt_class;
unsigned int max_pasid_bits;
@@ -205,14 +237,16 @@ struct kfd_device_info {
uint8_t num_of_watch_points;
uint16_t mqd_size_aligned;
bool supports_cwsr;
- bool needs_iommu_device;
bool needs_pci_atomics;
uint32_t no_atomic_fw_version;
- unsigned int num_sdma_engines;
- unsigned int num_xgmi_sdma_engines;
unsigned int num_sdma_queues_per_engine;
+ unsigned int num_reserved_sdma_queues_per_engine;
+ DECLARE_BITMAP(reserved_sdma_queues_bitmap, KFD_MAX_SDMA_QUEUES);
};
+unsigned int kfd_get_num_sdma_engines(struct kfd_node *kdev);
+unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_node *kdev);
+
struct kfd_mem_obj {
uint32_t range_start;
uint32_t range_end;
@@ -227,35 +261,80 @@ struct kfd_vmid_info {
uint32_t vmid_num_kfd;
};
-struct kfd_dev {
- struct kgd_dev *kgd;
-
- const struct kfd_device_info *device_info;
- struct pci_dev *pdev;
- struct drm_device *ddev;
+#define MAX_KFD_NODES 8
- unsigned int id; /* topology stub index */
+struct kfd_dev;
- phys_addr_t doorbell_base; /* Start of actual doorbells used by
- * KFD. It is aligned for mapping
- * into user mode
- */
- size_t doorbell_base_dw_offset; /* Offset from the start of the PCI
- * doorbell BAR to the first KFD
- * doorbell in dwords. GFX reserves
- * the segment before this offset.
+struct kfd_node {
+ unsigned int node_id;
+ struct amdgpu_device *adev; /* Duplicated here along with keeping
+ * a copy in kfd_dev to save a hop
*/
+ const struct kfd2kgd_calls *kfd2kgd; /* Duplicated here along with
+ * keeping a copy in kfd_dev to
+ * save a hop
+ */
+ struct kfd_vmid_info vm_info;
+ unsigned int id; /* topology stub index */
+ uint32_t xcc_mask; /* Instance mask of XCCs present */
+ struct amdgpu_xcp *xcp;
+
+ /* Interrupts */
+ struct kfifo ih_fifo;
+ struct work_struct interrupt_work;
+ spinlock_t interrupt_lock;
+
+ /*
+ * Interrupts of interest to KFD are copied
+ * from the HW ring into a SW ring.
+ */
+ bool interrupts_active;
+ uint32_t interrupt_bitmap; /* Only used for GFX 9.4.3 */
+
+ /* QCM Device instance */
+ struct device_queue_manager *dqm;
+
+ /* Global GWS resource shared between processes */
+ void *gws;
+
+ /* Clients watching SMI events */
+ struct list_head smi_clients;
+ spinlock_t smi_lock;
+ uint32_t reset_seq_num;
+
+ /* SRAM ECC flag */
+ atomic_t sram_ecc_flag;
+
+ /*spm process id */
+ unsigned int spm_pasid;
+
+ /* Maximum process number mapped to HW scheduler */
+ unsigned int max_proc_per_quantum;
+
+ unsigned int compute_vmid_bitmap;
+
+ struct kfd_local_mem_info local_mem_info;
+
+ struct kfd_dev *kfd;
+
+ /* Track per device allocated watch points */
+ uint32_t alloc_watch_ids;
+ spinlock_t watch_points_lock;
+};
+
+struct kfd_dev {
+ struct amdgpu_device *adev;
+
+ struct kfd_device_info device_info;
+
u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells
* page used by kernel queue
*/
struct kgd2kfd_shared_resources shared_resources;
- struct kfd_vmid_info vm_info;
const struct kfd2kgd_calls *kfd2kgd;
struct mutex doorbell_mutex;
- DECLARE_BITMAP(doorbell_available_index,
- KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
void *gtt_mem;
uint64_t gtt_start_gpu_addr;
@@ -265,33 +344,13 @@ struct kfd_dev {
unsigned int gtt_sa_chunk_size;
unsigned int gtt_sa_num_of_chunks;
- /* Interrupts */
- struct kfifo ih_fifo;
- struct workqueue_struct *ih_wq;
- struct work_struct interrupt_work;
- spinlock_t interrupt_lock;
-
- /* QCM Device instance */
- struct device_queue_manager *dqm;
-
bool init_complete;
- /*
- * Interrupts of interest to KFD are copied
- * from the HW ring into a SW ring.
- */
- bool interrupts_active;
-
- /* Debug manager */
- struct kfd_dbgmgr *dbgmgr;
/* Firmware versions */
uint16_t mec_fw_version;
uint16_t mec2_fw_version;
uint16_t sdma_fw_version;
- /* Maximum process number mapped to HW scheduler */
- unsigned int max_proc_per_quantum;
-
/* CWSR */
bool cwsr_enabled;
const void *cwsr_isa;
@@ -302,31 +361,29 @@ struct kfd_dev {
bool pci_atomic_requested;
- /* Use IOMMU v2 flag */
- bool use_iommu_v2;
-
- /* SRAM ECC flag */
- atomic_t sram_ecc_flag;
-
/* Compute Profile ref. count */
atomic_t compute_profile;
- /* Global GWS resource shared between processes */
- void *gws;
-
- /* Clients watching SMI events */
- struct list_head smi_clients;
- spinlock_t smi_lock;
-
- uint32_t reset_seq_num;
-
struct ida doorbell_ida;
unsigned int max_doorbell_slices;
int noretry;
- /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
- struct dev_pagemap pgmap;
+ struct kfd_node *nodes[MAX_KFD_NODES];
+ unsigned int num_nodes;
+
+ struct workqueue_struct *ih_wq;
+
+ /* Kernel doorbells for KFD device */
+ struct amdgpu_bo *doorbells;
+
+ /* bitmap for dynamic doorbell allocation from doorbell object */
+ unsigned long *doorbell_bitmap;
+
+ /* for dynamic partitioning */
+ int kfd_dev_lock;
+
+ atomic_t kfd_processes_count;
};
enum kfd_mempool {
@@ -338,25 +395,24 @@ enum kfd_mempool {
/* Character device interface */
int kfd_chardev_init(void);
void kfd_chardev_exit(void);
-struct device *kfd_chardev(void);
/**
* enum kfd_unmap_queues_filter - Enum for queue filters.
*
- * @KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: Preempts single queue.
- *
* @KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: Preempts all queues in the
* running queues list.
*
+ * @KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES: Preempts all non-static queues
+ * in the run list.
+ *
* @KFD_UNMAP_QUEUES_FILTER_BY_PASID: Preempts queues that belongs to
* specific process.
*
*/
enum kfd_unmap_queues_filter {
- KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE,
- KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES,
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES,
- KFD_UNMAP_QUEUES_FILTER_BY_PASID
+ KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES = 1,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES = 2,
+ KFD_UNMAP_QUEUES_FILTER_BY_PASID = 3
};
/**
@@ -371,13 +427,16 @@ enum kfd_unmap_queues_filter {
* @KFD_QUEUE_TYPE_DIQ: DIQ queue type.
*
* @KFD_QUEUE_TYPE_SDMA_XGMI: Special SDMA queue for XGMI interface.
+ *
+ * @KFD_QUEUE_TYPE_SDMA_BY_ENG_ID: SDMA user mode queue with target SDMA engine ID.
*/
enum kfd_queue_type {
KFD_QUEUE_TYPE_COMPUTE,
KFD_QUEUE_TYPE_SDMA,
KFD_QUEUE_TYPE_HIQ,
KFD_QUEUE_TYPE_DIQ,
- KFD_QUEUE_TYPE_SDMA_XGMI
+ KFD_QUEUE_TYPE_SDMA_XGMI,
+ KFD_QUEUE_TYPE_SDMA_BY_ENG_ID
};
enum kfd_queue_format {
@@ -442,6 +501,7 @@ enum KFD_QUEUE_PRIORITY {
* it's user mode or kernel mode queue.
*
*/
+
struct queue_properties {
enum kfd_queue_type type;
enum kfd_queue_format format;
@@ -450,14 +510,19 @@ struct queue_properties {
uint64_t queue_size;
uint32_t priority;
uint32_t queue_percent;
- uint32_t *read_ptr;
- uint32_t *write_ptr;
+ void __user *read_ptr;
+ void __user *write_ptr;
void __iomem *doorbell_ptr;
uint32_t doorbell_off;
bool is_interop;
bool is_evicted;
+ bool is_suspended;
+ bool is_being_destroyed;
bool is_active;
bool is_gws;
+ uint32_t pm4_target_xcc;
+ bool is_dbg_wa;
+ bool is_user_cu_masked;
/* Not relevant for user mode queues in cp scheduling */
unsigned int vmid;
/* Relevant only for sdma queues*/
@@ -472,15 +537,25 @@ struct queue_properties {
uint32_t ctl_stack_size;
uint64_t tba_addr;
uint64_t tma_addr;
+ uint64_t exception_status;
+
+ struct amdgpu_bo *wptr_bo;
+ struct amdgpu_bo *rptr_bo;
+ struct amdgpu_bo *ring_bo;
+ struct amdgpu_bo *eop_buf_bo;
+ struct amdgpu_bo *cwsr_bo;
};
#define QUEUE_IS_ACTIVE(q) ((q).queue_size > 0 && \
(q).queue_address != 0 && \
(q).queue_percent > 0 && \
- !(q).is_evicted)
+ !(q).is_evicted && \
+ !(q).is_suspended)
enum mqd_update_flag {
- UPDATE_FLAG_CU_MASK = 0,
+ UPDATE_FLAG_DBG_WA_ENABLE = 1,
+ UPDATE_FLAG_DBG_WA_DISABLE = 2,
+ UPDATE_FLAG_IS_GWS = 4, /* quirk for gfx9 IP */
};
struct mqd_update_info {
@@ -541,11 +616,17 @@ struct queue {
unsigned int doorbell_id;
struct kfd_process *process;
- struct kfd_dev *device;
+ struct kfd_node *device;
void *gws;
/* procfs */
struct kobject kobj;
+
+ void *gang_ctx_bo;
+ uint64_t gang_ctx_gpu_addr;
+ void *gang_ctx_cpu_ptr;
+
+ struct amdgpu_bo *wptr_bo_gart;
};
enum KFD_MQD_TYPE {
@@ -630,7 +711,10 @@ struct qcm_process_device {
uint64_t ib_base;
void *ib_kaddr;
- /* doorbell resources per process per device */
+ /* doorbells for kfd process */
+ struct amdgpu_bo *proc_doorbells;
+
+ /* bitmap for dynamic doorbell allocation from the bo */
unsigned long *doorbell_bitmap;
};
@@ -669,7 +753,7 @@ enum kfd_pdd_bound {
/* Data that is per-process-per device. */
struct kfd_process_device {
/* The device that owns this data. */
- struct kfd_dev *dev;
+ struct kfd_node *dev;
/* The process that owns this kfd_process_device. */
struct kfd_process *process;
@@ -704,7 +788,7 @@ struct kfd_process_device {
enum kfd_pdd_bound bound;
/* VRAM usage */
- uint64_t vram_usage;
+ atomic64_t vram_usage;
struct attribute attr_vram;
char vram_filename[MAX_SYSFS_FILENAME_LEN];
@@ -719,7 +803,6 @@ struct kfd_process_device {
struct attribute attr_evict;
struct kobject *kobj_stats;
- unsigned int doorbell_index;
/*
* @cu_occupancy: Reports occupancy of Compute Units (CU) of a process
@@ -754,6 +837,33 @@ struct kfd_process_device {
uint64_t faults;
uint64_t page_in;
uint64_t page_out;
+
+ /* Exception code status*/
+ uint64_t exception_status;
+ void *vm_fault_exc_data;
+ size_t vm_fault_exc_data_size;
+
+ /* Tracks debug per-vmid request settings */
+ uint32_t spi_dbg_override;
+ uint32_t spi_dbg_launch_mode;
+ uint32_t watch_points[4];
+ uint32_t alloc_watch_ids;
+
+ /*
+ * If this process has been checkpointed before, then the user
+ * application will use the original gpu_id on the
+ * checkpointed node to refer to this device.
+ */
+ uint32_t user_gpu_id;
+
+ void *proc_ctx_bo;
+ uint64_t proc_ctx_gpu_addr;
+ void *proc_ctx_cpu_ptr;
+
+ /* Tracks queue reset status */
+ bool has_reset_queue;
+
+ u32 pasid;
};
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
@@ -764,12 +874,21 @@ struct svm_range_list {
struct list_head list;
struct work_struct deferred_list_work;
struct list_head deferred_range_list;
+ struct list_head criu_svm_metadata_list;
spinlock_t deferred_list_lock;
atomic_t evicted_ranges;
atomic_t drain_pagefaults;
struct delayed_work restore_work;
DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE);
- struct task_struct *faulting_task;
+ struct task_struct *faulting_task;
+ /* check point ts decides if page fault recovery need be dropped */
+ uint64_t checkpoint_ts[MAX_GPU_INSTANCE];
+
+ /* Default granularity to use in buffer migration
+ * and restoration of backing memory while handling
+ * recoverable page faults
+ */
+ uint8_t default_granularity;
};
/* Process data */
@@ -804,8 +923,6 @@ struct kfd_process {
/* We want to receive a notification when the mm_struct is destroyed */
struct mmu_notifier mmu_notifier;
- u32 pasid;
-
/*
* Array of kfd_process_device pointers,
* one for each device the process is using.
@@ -835,7 +952,7 @@ struct kfd_process {
* fence will be triggered during eviction and new one will be created
* during restore
*/
- struct dma_fence *ef;
+ struct dma_fence __rcu *ef;
/* Work items for evicting and restoring BOs */
struct delayed_work eviction_work;
@@ -847,15 +964,60 @@ struct kfd_process {
*/
unsigned long last_restore_timestamp;
+ /* Indicates device process is debug attached with reserved vmid. */
+ bool debug_trap_enabled;
+
+ /* per-process-per device debug event fd file */
+ struct file *dbg_ev_file;
+
+ /* If the process is a kfd debugger, we need to know so we can clean
+ * up at exit time. If a process enables debugging on itself, it does
+ * its own clean-up, so we don't set the flag here. We track this by
+ * counting the number of processes this process is debugging.
+ */
+ atomic_t debugged_process_count;
+
+ /* If the process is a debugged, this is the debugger process */
+ struct kfd_process *debugger_process;
+
/* Kobj for our procfs */
struct kobject *kobj;
struct kobject *kobj_queues;
struct attribute attr_pasid;
+ /* Keep track cwsr init */
+ bool has_cwsr;
+
+ /* Exception code enable mask and status */
+ uint64_t exception_enable_mask;
+ uint64_t exception_status;
+
+ /* Used to drain stale interrupts */
+ wait_queue_head_t wait_irq_drain;
+ bool irq_drain_is_open;
+
/* shared virtual memory registered by this process */
struct svm_range_list svms;
bool xnack_enabled;
+
+ /* Work area for debugger event writer worker. */
+ struct work_struct debug_event_workarea;
+
+ /* Tracks debug per-vmid request for debug flags */
+ u32 dbg_flags;
+
+ atomic_t poison;
+ /* Queues are in paused stated because we are in the process of doing a CRIU checkpoint */
+ bool queues_paused;
+
+ /* Tracks runtime enable status */
+ struct semaphore runtime_enable_sema;
+ bool is_runtime_retry;
+ struct kfd_runtime_info runtime_info;
+
+ /* if gpu page fault sent to KFD */
+ bool gpu_page_fault;
};
#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
@@ -881,19 +1043,20 @@ struct amdkfd_ioctl_desc {
unsigned int cmd_drv;
const char *name;
};
-bool kfd_dev_is_large_bar(struct kfd_dev *dev);
+bool kfd_dev_is_large_bar(struct kfd_node *dev);
int kfd_process_create_wq(void);
void kfd_process_destroy_wq(void);
-struct kfd_process *kfd_create_process(struct file *filep);
-struct kfd_process *kfd_get_process(const struct task_struct *);
-struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);
+void kfd_cleanup_processes(void);
+struct kfd_process *kfd_create_process(struct task_struct *thread);
+struct kfd_process *kfd_get_process(const struct task_struct *task);
+struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid,
+ struct kfd_process_device **pdd);
struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id);
-int kfd_process_gpuid_from_kgd(struct kfd_process *p,
- struct amdgpu_device *adev, uint32_t *gpuid,
- uint32_t *gpuidx);
+int kfd_process_gpuid_from_node(struct kfd_process *p, struct kfd_node *node,
+ uint32_t *gpuid, uint32_t *gpuidx);
static inline int kfd_process_gpuid_from_gpuidx(struct kfd_process *p,
uint32_t gpuidx, uint32_t *gpuid) {
return gpuidx < p->n_pdds ? p->pdds[gpuidx]->dev->id : -EINVAL;
@@ -904,23 +1067,28 @@ static inline struct kfd_process_device *kfd_process_device_from_gpuidx(
}
void kfd_unref_process(struct kfd_process *p);
-int kfd_process_evict_queues(struct kfd_process *p);
+int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger);
int kfd_process_restore_queues(struct kfd_process *p);
void kfd_suspend_all_processes(void);
int kfd_resume_all_processes(void);
+struct kfd_process_device *kfd_process_device_data_by_id(struct kfd_process *process,
+ uint32_t gpu_id);
+
+int kfd_process_get_user_gpu_id(struct kfd_process *p, uint32_t actual_gpu_id);
+
int kfd_process_device_init_vm(struct kfd_process_device *pdd,
struct file *drm_file);
-struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
+struct kfd_process_device *kfd_bind_process_to_device(struct kfd_node *dev,
struct kfd_process *p);
-struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
+struct kfd_process_device *kfd_get_process_device_data(struct kfd_node *dev,
struct kfd_process *p);
-struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
+struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
struct kfd_process *p);
bool kfd_process_xnack_mode(struct kfd_process *p, bool supported);
-int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
+int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process,
struct vm_area_struct *vma);
/* KFD process API for creating and translating handles */
@@ -930,12 +1098,11 @@ void *kfd_process_device_translate_handle(struct kfd_process_device *p,
int handle);
void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
int handle);
+struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid);
/* PASIDs */
int kfd_pasid_init(void);
void kfd_pasid_exit(void);
-bool kfd_set_pasid_limit(unsigned int new_limit);
-unsigned int kfd_get_pasid_limit(void);
u32 kfd_pasid_alloc(void);
void kfd_pasid_free(u32 pasid);
@@ -943,7 +1110,7 @@ void kfd_pasid_free(u32 pasid);
size_t kfd_doorbell_process_slice(struct kfd_dev *kfd);
int kfd_doorbell_init(struct kfd_dev *kfd);
void kfd_doorbell_fini(struct kfd_dev *kfd);
-int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
+int kfd_doorbell_mmap(struct kfd_node *dev, struct kfd_process *process,
struct vm_area_struct *vma);
void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
unsigned int *doorbell_off);
@@ -956,15 +1123,15 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
unsigned int doorbell_id);
phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd);
int kfd_alloc_process_doorbells(struct kfd_dev *kfd,
- unsigned int *doorbell_index);
+ struct kfd_process_device *pdd);
void kfd_free_process_doorbells(struct kfd_dev *kfd,
- unsigned int doorbell_index);
+ struct kfd_process_device *pdd);
/* GTT Sub-Allocator */
-int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
+int kfd_gtt_sa_allocate(struct kfd_node *node, unsigned int size,
struct kfd_mem_obj **mem_obj);
-int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj);
+int kfd_gtt_sa_free(struct kfd_node *node, struct kfd_mem_obj *mem_obj);
extern struct device *kfd_device;
@@ -977,25 +1144,53 @@ void kfd_procfs_del_queue(struct queue *q);
/* Topology */
int kfd_topology_init(void);
void kfd_topology_shutdown(void);
-int kfd_topology_add_device(struct kfd_dev *gpu);
-int kfd_topology_remove_device(struct kfd_dev *gpu);
+int kfd_topology_add_device(struct kfd_node *gpu);
+int kfd_topology_remove_device(struct kfd_node *gpu);
struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
uint32_t proximity_domain);
+struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock(
+ uint32_t proximity_domain);
struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id);
-struct kfd_dev *kfd_device_by_id(uint32_t gpu_id);
-struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
-struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd);
-int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev);
+struct kfd_node *kfd_device_by_id(uint32_t gpu_id);
+static inline bool kfd_irq_is_from_node(struct kfd_node *node, uint32_t node_id,
+ uint32_t vmid)
+{
+ return (node->interrupt_bitmap & (1 << node_id)) != 0 &&
+ (node->compute_vmid_bitmap & (1 << vmid)) != 0;
+}
+static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev,
+ uint32_t node_id, uint32_t vmid) {
+ struct kfd_dev *dev = adev->kfd.dev;
+ uint32_t i;
+
+ if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 5, 0))
+ return dev->nodes[0];
+
+ for (i = 0; i < dev->num_nodes; i++)
+ if (kfd_irq_is_from_node(dev->nodes[i], node_id, vmid))
+ return dev->nodes[i];
+
+ return NULL;
+}
+int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_node **kdev);
int kfd_numa_node_to_apic_id(int numa_node_id);
-void kfd_double_confirm_iommu_support(struct kfd_dev *gpu);
/* Interrupts */
-int kfd_interrupt_init(struct kfd_dev *dev);
-void kfd_interrupt_exit(struct kfd_dev *dev);
-bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry);
-bool interrupt_is_wanted(struct kfd_dev *dev,
+#define KFD_IRQ_FENCE_CLIENTID 0xff
+#define KFD_IRQ_FENCE_SOURCEID 0xff
+#define KFD_IRQ_IS_FENCE(client, source) \
+ ((client) == KFD_IRQ_FENCE_CLIENTID && \
+ (source) == KFD_IRQ_FENCE_SOURCEID)
+int kfd_interrupt_init(struct kfd_node *dev);
+void kfd_interrupt_exit(struct kfd_node *dev);
+bool enqueue_ih_ring_entry(struct kfd_node *kfd, const void *ih_ring_entry);
+bool interrupt_is_wanted(struct kfd_node *dev,
const uint32_t *ih_ring_entry,
uint32_t *patched_ihre, bool *flag);
+int kfd_process_drain_interrupts(struct kfd_process_device *pdd);
+void kfd_process_close_interrupt_drain(unsigned int pasid);
/* amdkfd Apertures */
int kfd_init_apertures(struct kfd_process *process);
@@ -1003,31 +1198,156 @@ int kfd_init_apertures(struct kfd_process *process);
void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
uint64_t tba_addr,
uint64_t tma_addr);
+void kfd_process_set_trap_debug_flag(struct qcm_process_device *qpd,
+ bool enabled);
+
+/* CWSR initialization */
+int kfd_process_init_cwsr_apu(struct kfd_process *process, struct file *filep);
+
+/* CRIU */
+/*
+ * Need to increment KFD_CRIU_PRIV_VERSION each time a change is made to any of the CRIU private
+ * structures:
+ * kfd_criu_process_priv_data
+ * kfd_criu_device_priv_data
+ * kfd_criu_bo_priv_data
+ * kfd_criu_queue_priv_data
+ * kfd_criu_event_priv_data
+ * kfd_criu_svm_range_priv_data
+ */
+
+#define KFD_CRIU_PRIV_VERSION 1
+
+struct kfd_criu_process_priv_data {
+ uint32_t version;
+ uint32_t xnack_mode;
+};
+
+struct kfd_criu_device_priv_data {
+ /* For future use */
+ uint64_t reserved;
+};
+
+struct kfd_criu_bo_priv_data {
+ uint64_t user_addr;
+ uint32_t idr_handle;
+ uint32_t mapped_gpuids[MAX_GPU_INSTANCE];
+};
+
+/*
+ * The first 4 bytes of kfd_criu_queue_priv_data, kfd_criu_event_priv_data,
+ * kfd_criu_svm_range_priv_data is the object type
+ */
+enum kfd_criu_object_type {
+ KFD_CRIU_OBJECT_TYPE_QUEUE,
+ KFD_CRIU_OBJECT_TYPE_EVENT,
+ KFD_CRIU_OBJECT_TYPE_SVM_RANGE,
+};
+
+struct kfd_criu_svm_range_priv_data {
+ uint32_t object_type;
+ uint64_t start_addr;
+ uint64_t size;
+ /* Variable length array of attributes */
+ struct kfd_ioctl_svm_attribute attrs[];
+};
+
+struct kfd_criu_queue_priv_data {
+ uint32_t object_type;
+ uint64_t q_address;
+ uint64_t q_size;
+ uint64_t read_ptr_addr;
+ uint64_t write_ptr_addr;
+ uint64_t doorbell_off;
+ uint64_t eop_ring_buffer_address;
+ uint64_t ctx_save_restore_area_address;
+ uint32_t gpu_id;
+ uint32_t type;
+ uint32_t format;
+ uint32_t q_id;
+ uint32_t priority;
+ uint32_t q_percent;
+ uint32_t doorbell_id;
+ uint32_t gws;
+ uint32_t sdma_id;
+ uint32_t eop_ring_buffer_size;
+ uint32_t ctx_save_restore_area_size;
+ uint32_t ctl_stack_size;
+ uint32_t mqd_size;
+};
+
+struct kfd_criu_event_priv_data {
+ uint32_t object_type;
+ uint64_t user_handle;
+ uint32_t event_id;
+ uint32_t auto_reset;
+ uint32_t type;
+ uint32_t signaled;
+
+ union {
+ struct kfd_hsa_memory_exception_data memory_exception_data;
+ struct kfd_hsa_hw_exception_data hw_exception_data;
+ };
+};
+
+int kfd_process_get_queue_info(struct kfd_process *p,
+ uint32_t *num_queues,
+ uint64_t *priv_data_sizes);
+
+int kfd_criu_checkpoint_queues(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_data_offset);
+
+int kfd_criu_restore_queue(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_data_offset,
+ uint64_t max_priv_data_size);
+
+int kfd_criu_checkpoint_events(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_data_offset);
+
+int kfd_criu_restore_event(struct file *devkfd,
+ struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_data_offset,
+ uint64_t max_priv_data_size);
+/* CRIU - End */
/* Queue Context Management */
int init_queue(struct queue **q, const struct queue_properties *properties);
void uninit_queue(struct queue *q);
void print_queue_properties(struct queue_properties *q);
void print_queue(struct queue *q);
+int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_bo **pbo,
+ u64 expected_size);
+void kfd_queue_buffer_put(struct amdgpu_bo **bo);
+int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties);
+int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties);
+void kfd_queue_unref_bo_va(struct amdgpu_vm *vm, struct amdgpu_bo **bo);
+int kfd_queue_unref_bo_vas(struct kfd_process_device *pdd,
+ struct queue_properties *properties);
+void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev);
struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
- struct kfd_dev *dev);
-struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
- struct kfd_dev *dev);
+ struct kfd_node *dev);
struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
- struct kfd_dev *dev);
-struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
- struct kfd_dev *dev);
+ struct kfd_node *dev);
struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
- struct kfd_dev *dev);
+ struct kfd_node *dev);
struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
- struct kfd_dev *dev);
-struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev);
+ struct kfd_node *dev);
+struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
+ struct kfd_node *dev);
+struct mqd_manager *mqd_manager_init_v12(enum KFD_MQD_TYPE type,
+ struct kfd_node *dev);
+struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev);
void device_queue_manager_uninit(struct device_queue_manager *dqm);
-struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
+struct kernel_queue *kernel_queue_init(struct kfd_node *dev,
enum kfd_queue_type type);
-void kernel_queue_uninit(struct kernel_queue *kq, bool hanging);
-int kfd_process_vm_fault(struct device_queue_manager *dqm, u32 pasid);
+void kernel_queue_uninit(struct kernel_queue *kq);
+int kfd_evict_process_device(struct kfd_process_device *pdd);
+int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbell_id);
/* Process Queue Manager */
struct process_queue_node {
@@ -1041,10 +1361,12 @@ void kfd_process_dequeue_from_all_devices(struct kfd_process *p);
int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p);
void pqm_uninit(struct process_queue_manager *pqm);
int pqm_create_queue(struct process_queue_manager *pqm,
- struct kfd_dev *dev,
- struct file *f,
+ struct kfd_node *dev,
struct queue_properties *properties,
unsigned int *qid,
+ const struct kfd_criu_queue_priv_data *q_data,
+ const void *restore_mqd,
+ const void *restore_ctl_stack,
uint32_t *p_doorbell_offset_in_process);
int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
int pqm_update_queue_properties(struct process_queue_manager *pqm, unsigned int qid,
@@ -1053,8 +1375,6 @@ int pqm_update_mqd(struct process_queue_manager *pqm, unsigned int qid,
struct mqd_update_info *minfo);
int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
void *gws);
-struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
- unsigned int qid);
struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
unsigned int qid);
int pqm_get_wave_state(struct process_queue_manager *pqm,
@@ -1062,16 +1382,43 @@ int pqm_get_wave_state(struct process_queue_manager *pqm,
void __user *ctl_stack,
u32 *ctl_stack_used_size,
u32 *save_area_used_size);
+int pqm_get_queue_snapshot(struct process_queue_manager *pqm,
+ uint64_t exception_clear_mask,
+ void __user *buf,
+ int *num_qss_entries,
+ uint32_t *entry_size);
-int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
+int amdkfd_fence_wait_timeout(struct device_queue_manager *dqm,
uint64_t fence_value,
unsigned int timeout_ms);
+int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm,
+ unsigned int qid,
+ u32 *mqd_size,
+ u32 *ctl_stack_size);
/* Packet Manager */
#define KFD_FENCE_COMPLETED (100)
#define KFD_FENCE_INIT (10)
+/**
+ * enum kfd_config_dequeue_wait_counts_cmd - Command for configuring
+ * dequeue wait counts.
+ *
+ * @KFD_DEQUEUE_WAIT_INIT: Set optimized dequeue wait counts for a
+ * certain ASICs. For these ASICs, this is default value used by RESET
+ * @KFD_DEQUEUE_WAIT_RESET: Reset dequeue wait counts to the optimized value
+ * for certain ASICs. For others set it to default hardware reset value
+ * @KFD_DEQUEUE_WAIT_SET_SCH_WAVE: Set context switch latency wait
+ *
+ */
+enum kfd_config_dequeue_wait_counts_cmd {
+ KFD_DEQUEUE_WAIT_INIT = 1,
+ KFD_DEQUEUE_WAIT_RESET = 2,
+ KFD_DEQUEUE_WAIT_SET_SCH_WAVE = 3
+};
+
+
struct packet_manager {
struct device_queue_manager *dqm;
struct kernel_queue *priv_queue;
@@ -1095,10 +1442,10 @@ struct packet_manager_funcs {
int (*map_queues)(struct packet_manager *pm, uint32_t *buffer,
struct queue *q, bool is_static);
int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer,
- enum kfd_queue_type type,
enum kfd_unmap_queues_filter mode,
- uint32_t filter_param, bool reset,
- unsigned int sdma_engine);
+ uint32_t filter_param, bool reset);
+ int (*config_dequeue_wait_counts)(struct packet_manager *pm, uint32_t *buffer,
+ enum kfd_config_dequeue_wait_counts_cmd cmd, uint32_t value);
int (*query_status)(struct packet_manager *pm, uint32_t *buffer,
uint64_t fence_address, uint64_t fence_value);
int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer);
@@ -1109,6 +1456,7 @@ struct packet_manager_funcs {
int set_resources_size;
int map_queues_size;
int unmap_queues_size;
+ int config_dequeue_wait_counts_size;
int query_status_size;
int release_mem_size;
};
@@ -1118,20 +1466,23 @@ extern const struct packet_manager_funcs kfd_v9_pm_funcs;
extern const struct packet_manager_funcs kfd_aldebaran_pm_funcs;
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
-void pm_uninit(struct packet_manager *pm, bool hanging);
+void pm_uninit(struct packet_manager *pm);
int pm_send_set_resources(struct packet_manager *pm,
struct scheduling_resources *res);
int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues);
int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
uint64_t fence_value);
-int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
+int pm_send_unmap_queue(struct packet_manager *pm,
enum kfd_unmap_queues_filter mode,
- uint32_t filter_param, bool reset,
- unsigned int sdma_engine);
+ uint32_t filter_param, bool reset);
void pm_release_ib(struct packet_manager *pm);
+int pm_config_dequeue_wait_counts(struct packet_manager *pm,
+ enum kfd_config_dequeue_wait_counts_cmd cmd,
+ uint32_t wait_counts_config);
+
/* Following PM funcs can be shared among VI and AI */
unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
@@ -1140,55 +1491,80 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
/* Events */
extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
extern const struct kfd_event_interrupt_class event_interrupt_class_v9;
+extern const struct kfd_event_interrupt_class event_interrupt_class_v9_4_3;
+extern const struct kfd_event_interrupt_class event_interrupt_class_v10;
+extern const struct kfd_event_interrupt_class event_interrupt_class_v11;
extern const struct kfd_device_global_init_class device_global_init_class_cik;
-void kfd_event_init_process(struct kfd_process *p);
+int kfd_event_init_process(struct kfd_process *p);
void kfd_event_free_process(struct kfd_process *p);
int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma);
int kfd_wait_on_events(struct kfd_process *p,
uint32_t num_events, void __user *data,
- bool all, uint32_t user_timeout_ms,
+ bool all, uint32_t *user_timeout_ms,
uint32_t *wait_result);
void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
uint32_t valid_id_bits);
-void kfd_signal_iommu_event(struct kfd_dev *dev,
- u32 pasid, unsigned long address,
- bool is_write_requested, bool is_execute_requested);
void kfd_signal_hw_exception_event(u32 pasid);
int kfd_set_event(struct kfd_process *p, uint32_t event_id);
int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
-int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
- uint64_t size);
+int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset);
+
int kfd_event_create(struct file *devkfd, struct kfd_process *p,
uint32_t event_type, bool auto_reset, uint32_t node_id,
uint32_t *event_id, uint32_t *event_trigger_data,
uint64_t *event_page_offset, uint32_t *event_slot_index);
+
+int kfd_get_num_events(struct kfd_process *p);
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
-void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid,
- struct kfd_vm_fault_info *info);
+void kfd_signal_vm_fault_event_with_userptr(struct kfd_process *p, uint64_t gpu_va);
-void kfd_signal_reset_event(struct kfd_dev *dev);
+void kfd_signal_vm_fault_event(struct kfd_process_device *pdd,
+ struct kfd_vm_fault_info *info,
+ struct kfd_hsa_memory_exception_data *data);
-void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid);
+void kfd_signal_reset_event(struct kfd_node *dev);
-void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
+void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid);
-int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
+static inline void kfd_flush_tlb(struct kfd_process_device *pdd,
+ enum TLB_FLUSH_TYPE type)
+{
+ struct amdgpu_device *adev = pdd->dev->adev;
+ struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
+
+ amdgpu_vm_flush_compute_tlb(adev, vm, type, pdd->dev->xcc_mask);
+}
+
+static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
+{
+ return KFD_GC_VERSION(dev) >= IP_VERSION(9, 4, 2) ||
+ (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) && dev->sdma_fw_version >= 18) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0);
+}
-bool kfd_is_locked(void);
+int kfd_send_exception_to_runtime(struct kfd_process *p,
+ unsigned int queue_id,
+ uint64_t error_reason);
+bool kfd_is_locked(struct kfd_dev *kfd);
/* Compute profile */
-void kfd_inc_compute_active(struct kfd_dev *dev);
-void kfd_dec_compute_active(struct kfd_dev *dev);
+void kfd_inc_compute_active(struct kfd_node *dev);
+void kfd_dec_compute_active(struct kfd_node *dev);
/* Cgroup Support */
/* Check with device cgroup if @kfd device is accessible */
-static inline int kfd_devcgroup_check_permission(struct kfd_dev *kfd)
+static inline int kfd_devcgroup_check_permission(struct kfd_node *node)
{
#if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF)
- struct drm_device *ddev = kfd->ddev;
+ struct drm_device *ddev;
+
+ if (node->xcp)
+ ddev = node->xcp->ddev;
+ else
+ ddev = adev_to_drm(node->adev);
return devcgroup_check_permission(DEVCG_DEV_CHAR, DRM_MAJOR,
ddev->render->index,
@@ -1198,6 +1574,11 @@ static inline int kfd_devcgroup_check_permission(struct kfd_dev *kfd)
#endif
}
+static inline bool kfd_is_first_node(struct kfd_node *node)
+{
+ return (node == node->kfd->nodes[0]);
+}
+
/* Debugfs */
#if defined(CONFIG_DEBUG_FS)
@@ -1210,14 +1591,19 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data);
int kfd_debugfs_rls_by_device(struct seq_file *m, void *data);
int pm_debugfs_runlist(struct seq_file *m, void *data);
-int kfd_debugfs_hang_hws(struct kfd_dev *dev);
+int kfd_debugfs_hang_hws(struct kfd_node *dev);
int pm_debugfs_hang_hws(struct packet_manager *pm);
int dqm_debugfs_hang_hws(struct device_queue_manager *dqm);
+void kfd_debugfs_add_process(struct kfd_process *p);
+void kfd_debugfs_remove_process(struct kfd_process *p);
+
#else
static inline void kfd_debugfs_init(void) {}
static inline void kfd_debugfs_fini(void) {}
+static inline void kfd_debugfs_add_process(struct kfd_process *p) {}
+static inline void kfd_debugfs_remove_process(struct kfd_process *p) {}
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index b993011cfa64..a085faac9fe1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -27,7 +28,6 @@
#include <linux/sched/task.h>
#include <linux/mmu_context.h>
#include <linux/slab.h>
-#include <linux/amd-iommu.h>
#include <linux/notifier.h>
#include <linux/compat.h>
#include <linux/mman.h>
@@ -35,21 +35,22 @@
#include <linux/pm_runtime.h>
#include "amdgpu_amdkfd.h"
#include "amdgpu.h"
+#include "amdgpu_reset.h"
struct mm_struct;
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
-#include "kfd_dbgmgr.h"
-#include "kfd_iommu.h"
#include "kfd_svm.h"
+#include "kfd_smi_events.h"
+#include "kfd_debug.h"
/*
* List of struct kfd_process (field kfd_process).
* Unique/indexed by mm_struct*
*/
DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
-static DEFINE_MUTEX(kfd_processes_mutex);
+DEFINE_MUTEX(kfd_processes_mutex);
DEFINE_SRCU(kfd_processes_srcu);
@@ -64,10 +65,10 @@ static struct workqueue_struct *kfd_process_wq;
*/
static struct workqueue_struct *kfd_restore_wq;
-static struct kfd_process *find_process(const struct task_struct *thread);
+static struct kfd_process *find_process(const struct task_struct *thread,
+ bool ref);
static void kfd_process_ref_release(struct kref *ref);
static struct kfd_process *create_process(const struct task_struct *thread);
-static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep);
static void evict_process_worker(struct work_struct *work);
static void restore_process_worker(struct work_struct *work);
@@ -251,14 +252,13 @@ cleanup:
}
/**
- * @kfd_get_cu_occupancy - Collect number of waves in-flight on this device
+ * kfd_get_cu_occupancy - Collect number of waves in-flight on this device
* by current process. Translates acquired wave count into number of compute units
* that are occupied.
*
- * @atr: Handle of attribute that allows reporting of wave count. The attribute
+ * @attr: Handle of attribute that allows reporting of wave count. The attribute
* handle encapsulates GPU device it is associated with, thereby allowing collection
* of waves in flight, etc
- *
* @buffer: Handle of user provided buffer updated with wave count
*
* Return: Number of bytes written to user buffer or an error value
@@ -268,9 +268,12 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
int cu_cnt;
int wave_cnt;
int max_waves_per_cu;
- struct kfd_dev *dev = NULL;
+ struct kfd_node *dev = NULL;
struct kfd_process *proc = NULL;
struct kfd_process_device *pdd = NULL;
+ int i;
+ struct kfd_cu_occupancy *cu_occupancy;
+ u32 queue_format;
pdd = container_of(attr, struct kfd_process_device, attr_cu_occupancy);
dev = pdd->dev;
@@ -280,41 +283,64 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
cu_cnt = 0;
proc = pdd->process;
if (pdd->qpd.queue_count == 0) {
- pr_debug("Gpu-Id: %d has no active queues for process %d\n",
- dev->id, proc->pasid);
+ pr_debug("Gpu-Id: %d has no active queues for process pid %d\n",
+ dev->id, (int)proc->lead_thread->pid);
return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
}
/* Collect wave count from device if it supports */
wave_cnt = 0;
max_waves_per_cu = 0;
- dev->kfd2kgd->get_cu_occupancy(dev->kgd, proc->pasid, &wave_cnt,
- &max_waves_per_cu);
+
+ cu_occupancy = kcalloc(AMDGPU_MAX_QUEUES, sizeof(*cu_occupancy), GFP_KERNEL);
+ if (!cu_occupancy)
+ return -ENOMEM;
+
+ /*
+ * For GFX 9.4.3, fetch the CU occupancy from the first XCC in the partition.
+ * For AQL queues, because of cooperative dispatch we multiply the wave count
+ * by number of XCCs in the partition to get the total wave counts across all
+ * XCCs in the partition.
+ * For PM4 queues, there is no cooperative dispatch so wave_cnt stay as it is.
+ */
+ dev->kfd2kgd->get_cu_occupancy(dev->adev, cu_occupancy,
+ &max_waves_per_cu, ffs(dev->xcc_mask) - 1);
+
+ for (i = 0; i < AMDGPU_MAX_QUEUES; i++) {
+ if (cu_occupancy[i].wave_cnt != 0 &&
+ kfd_dqm_is_queue_in_process(dev->dqm, &pdd->qpd,
+ cu_occupancy[i].doorbell_off,
+ &queue_format)) {
+ if (unlikely(queue_format == KFD_QUEUE_FORMAT_PM4))
+ wave_cnt += cu_occupancy[i].wave_cnt;
+ else
+ wave_cnt += (NUM_XCC(dev->xcc_mask) *
+ cu_occupancy[i].wave_cnt);
+ }
+ }
/* Translate wave count to number of compute units */
cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu;
+ kfree(cu_occupancy);
return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
}
static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
char *buffer)
{
- if (strcmp(attr->name, "pasid") == 0) {
- struct kfd_process *p = container_of(attr, struct kfd_process,
- attr_pasid);
-
- return snprintf(buffer, PAGE_SIZE, "%d\n", p->pasid);
- } else if (strncmp(attr->name, "vram_", 5) == 0) {
+ if (strcmp(attr->name, "pasid") == 0)
+ return snprintf(buffer, PAGE_SIZE, "%d\n", 0);
+ else if (strncmp(attr->name, "vram_", 5) == 0) {
struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
attr_vram);
- return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
+ return snprintf(buffer, PAGE_SIZE, "%llu\n", atomic64_read(&pdd->vram_usage));
} else if (strncmp(attr->name, "sdma_", 5) == 0) {
struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
attr_sdma);
struct kfd_sdma_activity_handler_workarea sdma_activity_work_handler;
- INIT_WORK(&sdma_activity_work_handler.sdma_activity_work,
- kfd_sdma_activity_worker);
+ INIT_WORK_ONSTACK(&sdma_activity_work_handler.sdma_activity_work,
+ kfd_sdma_activity_worker);
sdma_activity_work_handler.pdd = pdd;
sdma_activity_work_handler.sdma_activity_counter = 0;
@@ -322,6 +348,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
schedule_work(&sdma_activity_work_handler.sdma_activity_work);
flush_work(&sdma_activity_work_handler.sdma_activity_work);
+ destroy_work_on_stack(&sdma_activity_work_handler.sdma_activity_work);
return snprintf(buffer, PAGE_SIZE, "%llu\n",
(sdma_activity_work_handler.sdma_activity_counter)/
@@ -343,7 +370,7 @@ static const struct sysfs_ops kfd_procfs_ops = {
.show = kfd_procfs_show,
};
-static struct kobj_type procfs_type = {
+static const struct kobj_type procfs_type = {
.release = kfd_procfs_kobj_release,
.sysfs_ops = &kfd_procfs_ops,
};
@@ -462,21 +489,22 @@ static struct attribute *procfs_queue_attrs[] = {
&attr_queue_gpuid,
NULL
};
+ATTRIBUTE_GROUPS(procfs_queue);
static const struct sysfs_ops procfs_queue_ops = {
.show = kfd_procfs_queue_show,
};
-static struct kobj_type procfs_queue_type = {
+static const struct kobj_type procfs_queue_type = {
.sysfs_ops = &procfs_queue_ops,
- .default_attrs = procfs_queue_attrs,
+ .default_groups = procfs_queue_groups,
};
static const struct sysfs_ops procfs_stats_ops = {
.show = kfd_procfs_stats_show,
};
-static struct kobj_type procfs_stats_type = {
+static const struct kobj_type procfs_stats_type = {
.sysfs_ops = &procfs_stats_ops,
.release = kfd_procfs_kobj_release,
};
@@ -485,7 +513,7 @@ static const struct sysfs_ops sysfs_counters_ops = {
.show = kfd_sysfs_counters_show,
};
-static struct kobj_type sysfs_counters_type = {
+static const struct kobj_type sysfs_counters_type = {
.sysfs_ops = &sysfs_counters_ops,
.release = kfd_procfs_kobj_release,
};
@@ -664,7 +692,8 @@ int kfd_process_create_wq(void)
if (!kfd_process_wq)
kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
if (!kfd_restore_wq)
- kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0);
+ kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq",
+ WQ_FREEZABLE);
if (!kfd_process_wq || !kfd_restore_wq) {
kfd_process_destroy_wq();
@@ -687,17 +716,17 @@ void kfd_process_destroy_wq(void)
}
static void kfd_process_free_gpuvm(struct kgd_mem *mem,
- struct kfd_process_device *pdd, void *kptr)
+ struct kfd_process_device *pdd, void **kptr)
{
- struct kfd_dev *dev = pdd->dev;
+ struct kfd_node *dev = pdd->dev;
- if (kptr) {
- amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(dev->kgd, mem);
- kptr = NULL;
+ if (kptr && *kptr) {
+ amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
+ *kptr = NULL;
}
- amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->drm_priv);
- amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, pdd->drm_priv,
+ amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->adev, mem, pdd->drm_priv);
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, mem, pdd->drm_priv,
NULL);
}
@@ -711,27 +740,28 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
uint64_t gpu_va, uint32_t size,
uint32_t flags, struct kgd_mem **mem, void **kptr)
{
- struct kfd_dev *kdev = pdd->dev;
+ struct kfd_node *kdev = pdd->dev;
int err;
- err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
- pdd->drm_priv, mem, NULL, flags);
+ err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, gpu_va, size,
+ pdd->drm_priv, mem, NULL,
+ flags, false);
if (err)
goto err_alloc_mem;
- err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, *mem,
- pdd->drm_priv, NULL);
+ err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->adev, *mem,
+ pdd->drm_priv);
if (err)
goto err_map_mem;
- err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, *mem, true);
+ err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->adev, *mem, true);
if (err) {
pr_debug("Sync memory failed, wait interrupted by user signal\n");
goto sync_memory_failed;
}
if (kptr) {
- err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->kgd,
+ err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(
(struct kgd_mem *)*mem, kptr, NULL);
if (err) {
pr_debug("Map GTT BO to kernel failed\n");
@@ -742,10 +772,10 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
return err;
sync_memory_failed:
- amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(kdev->kgd, *mem, pdd->drm_priv);
+ amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(kdev->adev, *mem, pdd->drm_priv);
err_map_mem:
- amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, *mem, pdd->drm_priv,
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->adev, *mem, pdd->drm_priv,
NULL);
err_alloc_mem:
*mem = NULL;
@@ -792,21 +822,30 @@ static void kfd_process_device_destroy_ib_mem(struct kfd_process_device *pdd)
if (!qpd->ib_kaddr || !qpd->ib_base)
return;
- kfd_process_free_gpuvm(qpd->ib_mem, pdd, qpd->ib_kaddr);
+ kfd_process_free_gpuvm(qpd->ib_mem, pdd, &qpd->ib_kaddr);
}
-struct kfd_process *kfd_create_process(struct file *filep)
+struct kfd_process *kfd_create_process(struct task_struct *thread)
{
struct kfd_process *process;
- struct task_struct *thread = current;
int ret;
- if (!thread->mm)
+ if (!(thread->mm && mmget_not_zero(thread->mm)))
return ERR_PTR(-EINVAL);
/* Only the pthreads threading model is supported. */
- if (thread->group_leader->mm != thread->mm)
+ if (thread->group_leader->mm != thread->mm) {
+ mmput(thread->mm);
return ERR_PTR(-EINVAL);
+ }
+
+ /* If the process just called exec(3), it is possible that the
+ * cleanup of the kfd_process (following the release of the mm
+ * of the old process image) is still in the cleanup work queue.
+ * Make sure to drain any job before trying to recreate any
+ * resource for this process.
+ */
+ flush_workqueue(kfd_process_wq);
/*
* take kfd processes mutex before starting of process creation
@@ -815,8 +854,16 @@ struct kfd_process *kfd_create_process(struct file *filep)
*/
mutex_lock(&kfd_processes_mutex);
- /* A prior open of /dev/kfd could have already created the process. */
- process = find_process(thread);
+ if (kfd_is_locked(NULL)) {
+ pr_debug("KFD is locked! Cannot create process");
+ process = ERR_PTR(-EINVAL);
+ goto out;
+ }
+
+ /* A prior open of /dev/kfd could have already created the process.
+ * find_process will increase process kref in this case
+ */
+ process = find_process(thread, true);
if (process) {
pr_debug("Process already found\n");
} else {
@@ -824,10 +871,6 @@ struct kfd_process *kfd_create_process(struct file *filep)
if (IS_ERR(process))
goto out;
- ret = kfd_process_init_cwsr_apu(process, filep);
- if (ret)
- goto out_destroy;
-
if (!procfs.kobj)
goto out;
@@ -856,21 +899,16 @@ struct kfd_process *kfd_create_process(struct file *filep)
kfd_procfs_add_sysfs_stats(process);
kfd_procfs_add_sysfs_files(process);
kfd_procfs_add_sysfs_counters(process);
+
+ kfd_debugfs_add_process(process);
+
+ init_waitqueue_head(&process->wait_irq_drain);
}
out:
- if (!IS_ERR(process))
- kref_get(&process->ref);
mutex_unlock(&kfd_processes_mutex);
+ mmput(thread->mm);
return process;
-
-out_destroy:
- hash_del_rcu(&process->kfd_processes);
- mutex_unlock(&kfd_processes_mutex);
- synchronize_srcu(&kfd_processes_srcu);
- /* kfd_process_free_notifier will trigger the cleanup */
- mmu_notifier_put(&process->mmu_notifier);
- return ERR_PTR(ret);
}
struct kfd_process *kfd_get_process(const struct task_struct *thread)
@@ -884,7 +922,7 @@ struct kfd_process *kfd_get_process(const struct task_struct *thread)
if (thread->group_leader->mm != thread->mm)
return ERR_PTR(-EINVAL);
- process = find_process(thread);
+ process = find_process(thread, false);
if (!process)
return ERR_PTR(-EINVAL);
@@ -903,13 +941,16 @@ static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
return NULL;
}
-static struct kfd_process *find_process(const struct task_struct *thread)
+static struct kfd_process *find_process(const struct task_struct *thread,
+ bool ref)
{
struct kfd_process *p;
int idx;
idx = srcu_read_lock(&kfd_processes_srcu);
p = find_process_by_mm(thread->mm);
+ if (p && ref)
+ kref_get(&p->ref);
srcu_read_unlock(&kfd_processes_srcu, idx);
return p;
@@ -920,6 +961,26 @@ void kfd_unref_process(struct kfd_process *p)
kref_put(&p->ref, kfd_process_ref_release);
}
+/* This increments the process->ref counter. */
+struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid)
+{
+ struct task_struct *task = NULL;
+ struct kfd_process *p = NULL;
+
+ if (!pid) {
+ task = current;
+ get_task_struct(task);
+ } else {
+ task = get_pid_task(pid, PIDTYPE_PID);
+ }
+
+ if (task) {
+ p = find_process(task, true);
+ put_task_struct(task);
+ }
+
+ return p;
+}
static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
{
@@ -940,10 +1001,10 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
if (!peer_pdd->drm_priv)
continue;
amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
- peer_pdd->dev->kgd, mem, peer_pdd->drm_priv);
+ peer_pdd->dev->adev, mem, peer_pdd->drm_priv);
}
- amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem,
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, mem,
pdd->drm_priv, NULL);
kfd_process_device_remove_obj_handle(pdd, id);
}
@@ -956,7 +1017,7 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
static void kfd_process_kunmap_signal_bo(struct kfd_process *p)
{
struct kfd_process_device *pdd;
- struct kfd_dev *kdev;
+ struct kfd_node *kdev;
void *mem;
kdev = kfd_device_by_id(GET_GPU_ID(p->signal_handle));
@@ -974,7 +1035,7 @@ static void kfd_process_kunmap_signal_bo(struct kfd_process *p)
if (!mem)
goto out;
- amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kdev->kgd, mem);
+ amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
out:
mutex_unlock(&p->mutex);
@@ -995,37 +1056,39 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
- pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n",
- pdd->dev->id, p->pasid);
+ kfd_smi_event_process(pdd, false);
+ pr_debug("Releasing pdd (topology id %d, for pid %d)\n",
+ pdd->dev->id, p->lead_thread->pid);
kfd_process_device_destroy_cwsr_dgpu(pdd);
kfd_process_device_destroy_ib_mem(pdd);
- if (pdd->drm_file) {
- amdgpu_amdkfd_gpuvm_release_process_vm(
- pdd->dev->kgd, pdd->drm_priv);
+ if (pdd->drm_file)
fput(pdd->drm_file);
- }
if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base)
free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
get_order(KFD_CWSR_TBA_TMA_SIZE));
- kfree(pdd->qpd.doorbell_bitmap);
idr_destroy(&pdd->alloc_idr);
- kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index);
+ kfd_free_process_doorbells(pdd->dev->kfd, pdd);
+ if (pdd->dev->kfd->shared_resources.enable_mes &&
+ pdd->proc_ctx_cpu_ptr)
+ amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev,
+ &pdd->proc_ctx_bo);
/*
* before destroying pdd, make sure to report availability
* for auto suspend
*/
if (pdd->runtime_inuse) {
- pm_runtime_mark_last_busy(pdd->dev->ddev->dev);
- pm_runtime_put_autosuspend(pdd->dev->ddev->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(pdd->dev->adev)->dev);
pdd->runtime_inuse = false;
}
+ atomic_dec(&pdd->dev->kfd->kfd_processes_count);
+
kfree(pdd);
p->pdds[i] = NULL;
}
@@ -1076,6 +1139,17 @@ static void kfd_process_remove_sysfs(struct kfd_process *p)
p->kobj = NULL;
}
+/*
+ * If any GPU is ongoing reset, wait for reset complete.
+ */
+static void kfd_process_wait_gpu_reset_complete(struct kfd_process *p)
+{
+ int i;
+
+ for (i = 0; i < p->n_pdds; i++)
+ flush_workqueue(p->pdds[i]->dev->adev->reset_domain->wq);
+}
+
/* No process locking is needed in this function, because the process
* is not findable any more. We must assume that no other thread is
* using it any more, otherwise we couldn't safely free the process
@@ -1085,20 +1159,34 @@ static void kfd_process_wq_release(struct work_struct *work)
{
struct kfd_process *p = container_of(work, struct kfd_process,
release_work);
+ struct dma_fence *ef;
+
+ /*
+ * If GPU in reset, user queues may still running, wait for reset complete.
+ */
+ kfd_process_wait_gpu_reset_complete(p);
+
+ /* Signal the eviction fence after user mode queues are
+ * destroyed. This allows any BOs to be freed without
+ * triggering pointless evictions or waiting for fences.
+ */
+ synchronize_rcu();
+ ef = rcu_access_pointer(p->ef);
+ if (ef)
+ dma_fence_signal(ef);
kfd_process_remove_sysfs(p);
- kfd_iommu_unbind_process(p);
+ kfd_debugfs_remove_process(p);
kfd_process_kunmap_signal_bo(p);
kfd_process_free_outstanding_kfd_bos(p);
svm_range_list_fini(p);
kfd_process_destroy_pdds(p);
- dma_fence_put(p->ef);
+ dma_fence_put(ef);
kfd_event_free_process(p);
- kfd_pasid_free(p->pasid);
mutex_destroy(&p->mutex);
put_task_struct(p->lead_thread);
@@ -1116,10 +1204,8 @@ static void kfd_process_ref_release(struct kref *ref)
static struct mmu_notifier *kfd_process_alloc_notifier(struct mm_struct *mm)
{
- int idx = srcu_read_lock(&kfd_processes_srcu);
- struct kfd_process *p = find_process_by_mm(mm);
-
- srcu_read_unlock(&kfd_processes_srcu, idx);
+ /* This increments p->ref counter if kfd process p exists */
+ struct kfd_process *p = kfd_lookup_process_by_mm(mm);
return p ? &p->mmu_notifier : ERR_PTR(-ESRCH);
}
@@ -1129,11 +1215,58 @@ static void kfd_process_free_notifier(struct mmu_notifier *mn)
kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier));
}
+static void kfd_process_notifier_release_internal(struct kfd_process *p)
+{
+ int i;
+
+ cancel_delayed_work_sync(&p->eviction_work);
+ cancel_delayed_work_sync(&p->restore_work);
+
+ /*
+ * Dequeue and destroy user queues, it is not safe for GPU to access
+ * system memory after mmu release notifier callback returns because
+ * exit_mmap free process memory afterwards.
+ */
+ kfd_process_dequeue_from_all_devices(p);
+ pqm_uninit(&p->pqm);
+
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ /* re-enable GFX OFF since runtime enable with ttmp setup disabled it. */
+ if (!kfd_dbg_is_rlc_restore_supported(pdd->dev) && p->runtime_info.ttmp_setup)
+ amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
+ }
+
+ /* Indicate to other users that MM is no longer valid */
+ p->mm = NULL;
+ kfd_dbg_trap_disable(p);
+
+ if (atomic_read(&p->debugged_process_count) > 0) {
+ struct kfd_process *target;
+ unsigned int temp;
+ int idx = srcu_read_lock(&kfd_processes_srcu);
+
+ hash_for_each_rcu(kfd_processes_table, temp, target, kfd_processes) {
+ if (target->debugger_process && target->debugger_process == p) {
+ mutex_lock_nested(&target->mutex, 1);
+ kfd_dbg_trap_disable(target);
+ mutex_unlock(&target->mutex);
+ if (atomic_read(&p->debugged_process_count) == 0)
+ break;
+ }
+ }
+
+ srcu_read_unlock(&kfd_processes_srcu, idx);
+ }
+
+ mmu_notifier_put(&p->mmu_notifier);
+}
+
static void kfd_process_notifier_release(struct mmu_notifier *mn,
struct mm_struct *mm)
{
struct kfd_process *p;
- int i;
/*
* The kfd_process structure can not be free because the
@@ -1144,65 +1277,80 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
return;
mutex_lock(&kfd_processes_mutex);
+ /*
+ * Do early return if table is empty.
+ *
+ * This could potentially happen if this function is called concurrently
+ * by mmu_notifier and by kfd_cleanup_pocesses.
+ *
+ */
+ if (hash_empty(kfd_processes_table)) {
+ mutex_unlock(&kfd_processes_mutex);
+ return;
+ }
hash_del_rcu(&p->kfd_processes);
mutex_unlock(&kfd_processes_mutex);
synchronize_srcu(&kfd_processes_srcu);
- cancel_delayed_work_sync(&p->eviction_work);
- cancel_delayed_work_sync(&p->restore_work);
- cancel_delayed_work_sync(&p->svms.restore_work);
+ kfd_process_notifier_release_internal(p);
+}
- mutex_lock(&p->mutex);
+static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
+ .release = kfd_process_notifier_release,
+ .alloc_notifier = kfd_process_alloc_notifier,
+ .free_notifier = kfd_process_free_notifier,
+};
- /* Iterate over all process device data structures and if the
- * pdd is in debug mode, we should first force unregistration,
- * then we will be able to destroy the queues
- */
- for (i = 0; i < p->n_pdds; i++) {
- struct kfd_dev *dev = p->pdds[i]->dev;
+/*
+ * This code handles the case when driver is being unloaded before all
+ * mm_struct are released. We need to safely free the kfd_process and
+ * avoid race conditions with mmu_notifier that might try to free them.
+ *
+ */
+void kfd_cleanup_processes(void)
+{
+ struct kfd_process *p;
+ struct hlist_node *p_temp;
+ unsigned int temp;
+ HLIST_HEAD(cleanup_list);
- mutex_lock(kfd_get_dbgmgr_mutex());
- if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
- if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
- kfd_dbgmgr_destroy(dev->dbgmgr);
- dev->dbgmgr = NULL;
- }
- }
- mutex_unlock(kfd_get_dbgmgr_mutex());
+ /*
+ * Move all remaining kfd_process from the process table to a
+ * temp list for processing. Once done, callback from mmu_notifier
+ * release will not see the kfd_process in the table and do early return,
+ * avoiding double free issues.
+ */
+ mutex_lock(&kfd_processes_mutex);
+ hash_for_each_safe(kfd_processes_table, temp, p_temp, p, kfd_processes) {
+ hash_del_rcu(&p->kfd_processes);
+ synchronize_srcu(&kfd_processes_srcu);
+ hlist_add_head(&p->kfd_processes, &cleanup_list);
}
+ mutex_unlock(&kfd_processes_mutex);
- kfd_process_dequeue_from_all_devices(p);
- pqm_uninit(&p->pqm);
+ hlist_for_each_entry_safe(p, p_temp, &cleanup_list, kfd_processes)
+ kfd_process_notifier_release_internal(p);
- /* Indicate to other users that MM is no longer valid */
- p->mm = NULL;
- /* Signal the eviction fence after user mode queues are
- * destroyed. This allows any BOs to be freed without
- * triggering pointless evictions or waiting for fences.
+ /*
+ * Ensures that all outstanding free_notifier get called, triggering
+ * the release of the kfd_process struct.
*/
- dma_fence_signal(p->ef);
-
- mutex_unlock(&p->mutex);
-
- mmu_notifier_put(&p->mmu_notifier);
+ mmu_notifier_synchronize();
}
-static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
- .release = kfd_process_notifier_release,
- .alloc_notifier = kfd_process_alloc_notifier,
- .free_notifier = kfd_process_free_notifier,
-};
-
-static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
+int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
{
unsigned long offset;
int i;
+ if (p->has_cwsr)
+ return 0;
+
for (i = 0; i < p->n_pdds; i++) {
- struct kfd_dev *dev = p->pdds[i]->dev;
+ struct kfd_node *dev = p->pdds[i]->dev;
struct qcm_process_device *qpd = &p->pdds[i]->qpd;
- if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
+ if (!dev->kfd->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
continue;
offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id);
@@ -1213,25 +1361,30 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
if (IS_ERR_VALUE(qpd->tba_addr)) {
int err = qpd->tba_addr;
- pr_err("Failure to set tba address. error %d.\n", err);
+ dev_err(dev->adev->dev,
+ "Failure to set tba address. error %d.\n", err);
qpd->tba_addr = 0;
qpd->cwsr_kaddr = NULL;
return err;
}
- memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
+ memcpy(qpd->cwsr_kaddr, dev->kfd->cwsr_isa, dev->kfd->cwsr_isa_size);
+
+ kfd_process_set_trap_debug_flag(qpd, p->debug_trap_enabled);
qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
}
+ p->has_cwsr = true;
+
return 0;
}
static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
{
- struct kfd_dev *dev = pdd->dev;
+ struct kfd_node *dev = pdd->dev;
struct qcm_process_device *qpd = &pdd->qpd;
uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT
| KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE
@@ -1240,7 +1393,7 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
void *kaddr;
int ret;
- if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base)
+ if (!dev->kfd->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base)
return 0;
/* cwsr_base is only set for dGPU */
@@ -1253,7 +1406,10 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
qpd->cwsr_kaddr = kaddr;
qpd->tba_addr = qpd->cwsr_base;
- memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
+ memcpy(qpd->cwsr_kaddr, dev->kfd->cwsr_isa, dev->kfd->cwsr_isa_size);
+
+ kfd_process_set_trap_debug_flag(&pdd->qpd,
+ pdd->process->debug_trap_enabled);
qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
@@ -1264,13 +1420,13 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd)
{
- struct kfd_dev *dev = pdd->dev;
+ struct kfd_node *dev = pdd->dev;
struct qcm_process_device *qpd = &pdd->qpd;
- if (!dev->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base)
+ if (!dev->kfd->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base)
return;
- kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, qpd->cwsr_kaddr);
+ kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, &qpd->cwsr_kaddr);
}
void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
@@ -1311,37 +1467,51 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)
* support retry.
*/
for (i = 0; i < p->n_pdds; i++) {
- struct kfd_dev *dev = p->pdds[i]->dev;
+ struct kfd_node *dev = p->pdds[i]->dev;
/* Only consider GFXv9 and higher GPUs. Older GPUs don't
* support the SVM APIs and don't need to be considered
* for the XNACK mode selection.
*/
- if (dev->device_info->asic_family < CHIP_VEGA10)
+ if (!KFD_IS_SOC15(dev))
continue;
/* Aldebaran can always support XNACK because it can support
* per-process XNACK mode selection. But let the dev->noretry
* setting still influence the default XNACK mode.
*/
- if (supported &&
- dev->device_info->asic_family == CHIP_ALDEBARAN)
+ if (supported && KFD_SUPPORT_XNACK_PER_PROCESS(dev)) {
+ if (!amdgpu_sriov_xnack_support(dev->kfd->adev)) {
+ pr_debug("SRIOV platform xnack not supported\n");
+ return false;
+ }
continue;
+ }
/* GFXv10 and later GPUs do not support shader preemption
* during page faults. This can lead to poor QoS for queue
* management and memory-manager-related preemptions or
* even deadlocks.
*/
- if (dev->device_info->asic_family >= CHIP_NAVI10)
+ if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
return false;
- if (dev->noretry)
+ if (dev->kfd->noretry)
return false;
}
return true;
}
+void kfd_process_set_trap_debug_flag(struct qcm_process_device *qpd,
+ bool enabled)
+{
+ if (qpd->cwsr_kaddr) {
+ uint64_t *tma =
+ (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET);
+ tma[2] = enabled;
+ }
+}
+
/*
* On return the kfd_process is fully operational and will be freed when the
* mm is released
@@ -1361,15 +1531,19 @@ static struct kfd_process *create_process(const struct task_struct *thread)
process->mm = thread->mm;
process->lead_thread = thread->group_leader;
process->n_pdds = 0;
+ process->queues_paused = false;
INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
process->last_restore_timestamp = get_jiffies_64();
- kfd_event_init_process(process);
+ err = kfd_event_init_process(process);
+ if (err)
+ goto err_event_init;
process->is_32bit_user_mode = in_compat_syscall();
-
- process->pasid = kfd_pasid_alloc();
- if (process->pasid == 0)
- goto err_alloc_pasid;
+ process->debug_trap_enabled = false;
+ process->debugger_process = NULL;
+ process->exception_enable_mask = 0;
+ atomic_set(&process->debugged_process_count, 0);
+ sema_init(&process->runtime_enable_sema, 0);
err = pqm_init(&process->pqm, process);
if (err != 0)
@@ -1391,6 +1565,11 @@ static struct kfd_process *create_process(const struct task_struct *thread)
hash_add_rcu(kfd_processes_table, &process->kfd_processes,
(uintptr_t)process->mm);
+ /* Avoid free_notifier to start kfd_process_wq_release if
+ * mmu_notifier_get failed because of pending signal.
+ */
+ kref_get(&process->ref);
+
/* MMU notifier registration must be the last call that can fail
* because after this point we cannot unwind the process creation.
* After this point, mmu_notifier_put will trigger the cleanup by
@@ -1403,8 +1582,11 @@ static struct kfd_process *create_process(const struct task_struct *thread)
}
BUG_ON(mn != &process->mmu_notifier);
+ kfd_unref_process(process);
get_task_struct(process->lead_thread);
+ INIT_WORK(&process->debug_event_workarea, debug_event_write_work_handler);
+
return process;
err_register_notifier:
@@ -1416,48 +1598,15 @@ err_init_svm_range_list:
err_init_apertures:
pqm_uninit(&process->pqm);
err_process_pqm_init:
- kfd_pasid_free(process->pasid);
-err_alloc_pasid:
+ kfd_event_free_process(process);
+err_event_init:
mutex_destroy(&process->mutex);
kfree(process);
err_alloc_process:
return ERR_PTR(err);
}
-static int init_doorbell_bitmap(struct qcm_process_device *qpd,
- struct kfd_dev *dev)
-{
- unsigned int i;
- int range_start = dev->shared_resources.non_cp_doorbells_start;
- int range_end = dev->shared_resources.non_cp_doorbells_end;
-
- if (!KFD_IS_SOC15(dev->device_info->asic_family))
- return 0;
-
- qpd->doorbell_bitmap =
- kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
- BITS_PER_BYTE), GFP_KERNEL);
- if (!qpd->doorbell_bitmap)
- return -ENOMEM;
-
- /* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
- pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
- pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
- range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
- range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
-
- for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
- if (i >= range_start && i <= range_end) {
- set_bit(i, qpd->doorbell_bitmap);
- set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
- qpd->doorbell_bitmap);
- }
- }
-
- return 0;
-}
-
-struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
+struct kfd_process_device *kfd_get_process_device_data(struct kfd_node *dev,
struct kfd_process *p)
{
int i;
@@ -1469,7 +1618,7 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
return NULL;
}
-struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
+struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
struct kfd_process *p)
{
struct kfd_process_device *pdd = NULL;
@@ -1480,16 +1629,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
if (!pdd)
return NULL;
- if (kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) {
- pr_err("Failed to alloc doorbell for pdd\n");
- goto err_free_pdd;
- }
-
- if (init_doorbell_bitmap(&pdd->qpd, dev)) {
- pr_err("Failed to init doorbell for process\n");
- goto err_free_pdd;
- }
-
pdd->dev = dev;
INIT_LIST_HEAD(&pdd->qpd.queues_list);
INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
@@ -1501,19 +1640,24 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
pdd->bound = PDD_UNBOUND;
pdd->already_dequeued = false;
pdd->runtime_inuse = false;
- pdd->vram_usage = 0;
+ atomic64_set(&pdd->vram_usage, 0);
pdd->sdma_past_activity_counter = 0;
+ pdd->user_gpu_id = dev->id;
atomic64_set(&pdd->evict_duration_counter, 0);
+
p->pdds[p->n_pdds++] = pdd;
+ if (kfd_dbg_is_per_vmid_supported(pdd->dev))
+ pdd->spi_dbg_override = pdd->dev->kfd2kgd->disable_debug_trap(
+ pdd->dev->adev,
+ false,
+ 0);
/* Init idr used for memory handle translation */
idr_init(&pdd->alloc_idr);
- return pdd;
+ atomic_inc(&dev->kfd->kfd_processes_count);
-err_free_pdd:
- kfree(pdd);
- return NULL;
+ return pdd;
}
/**
@@ -1533,8 +1677,11 @@ err_free_pdd:
int kfd_process_device_init_vm(struct kfd_process_device *pdd,
struct file *drm_file)
{
+ struct amdgpu_fpriv *drv_priv;
+ struct amdgpu_vm *avm;
struct kfd_process *p;
- struct kfd_dev *dev;
+ struct dma_fence *ef;
+ struct kfd_node *dev;
int ret;
if (!drm_file)
@@ -1543,16 +1690,25 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
if (pdd->drm_priv)
return -EBUSY;
+ ret = amdgpu_file_to_fpriv(drm_file, &drv_priv);
+ if (ret)
+ return ret;
+ avm = &drv_priv->vm;
+
p = pdd->process;
dev = pdd->dev;
- ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(
- dev->kgd, drm_file, p->pasid,
- &p->kgd_process_info, &p->ef);
+ ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, avm,
+ &p->kgd_process_info,
+ p->ef ? NULL : &ef);
if (ret) {
- pr_err("Failed to create process VM object\n");
+ dev_err(dev->adev->dev, "Failed to create process VM object\n");
return ret;
}
+
+ if (!p->ef)
+ RCU_INIT_POINTER(p->ef, ef);
+
pdd->drm_priv = drm_file->private_data;
ret = kfd_process_device_reserve_ib_mem(pdd);
@@ -1562,14 +1718,27 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
if (ret)
goto err_init_cwsr;
+ if (unlikely(!avm->pasid)) {
+ dev_warn(pdd->dev->adev->dev, "WARN: vm %p has no pasid associated",
+ avm);
+ ret = -EINVAL;
+ goto err_get_pasid;
+ }
+
+ pdd->pasid = avm->pasid;
pdd->drm_file = drm_file;
+ kfd_smi_event_process(pdd, true);
+
return 0;
+err_get_pasid:
+ kfd_process_device_destroy_cwsr_dgpu(pdd);
err_init_cwsr:
+ kfd_process_device_destroy_ib_mem(pdd);
err_reserve_ib_mem:
- kfd_process_device_free_bos(pdd);
pdd->drm_priv = NULL;
+ amdgpu_amdkfd_gpuvm_destroy_cb(dev->adev, avm);
return ret;
}
@@ -1581,7 +1750,7 @@ err_reserve_ib_mem:
*
* Assumes that the process lock is held.
*/
-struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
+struct kfd_process_device *kfd_bind_process_to_device(struct kfd_node *dev,
struct kfd_process *p)
{
struct kfd_process_device *pdd;
@@ -1589,7 +1758,7 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
pdd = kfd_get_process_device_data(dev, p);
if (!pdd) {
- pr_err("Process device data doesn't exist\n");
+ dev_err(dev->adev->dev, "Process device data doesn't exist\n");
return ERR_PTR(-ENOMEM);
}
@@ -1602,17 +1771,13 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
* pdd is destroyed.
*/
if (!pdd->runtime_inuse) {
- err = pm_runtime_get_sync(dev->ddev->dev);
+ err = pm_runtime_get_sync(adev_to_drm(dev->adev)->dev);
if (err < 0) {
- pm_runtime_put_autosuspend(dev->ddev->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(dev->adev)->dev);
return ERR_PTR(err);
}
}
- err = kfd_iommu_bind_process_to_device(pdd);
- if (err)
- goto out;
-
/*
* make sure that runtime_usage counter is incremented just once
* per pdd
@@ -1620,15 +1785,6 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
pdd->runtime_inuse = true;
return pdd;
-
-out:
- /* balance runpm reference count and exit with error */
- if (!pdd->runtime_inuse) {
- pm_runtime_mark_last_busy(dev->ddev->dev);
- pm_runtime_put_autosuspend(dev->ddev->dev);
- }
-
- return ERR_PTR(err);
}
/* Create specific handle mapped to mem from process local memory idr
@@ -1662,25 +1818,50 @@ void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
idr_remove(&pdd->alloc_idr, handle);
}
-/* This increments the process->ref counter. */
-struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid)
+static struct kfd_process_device *kfd_lookup_process_device_by_pasid(u32 pasid)
{
- struct kfd_process *p, *ret_p = NULL;
+ struct kfd_process_device *ret_p = NULL;
+ struct kfd_process *p;
unsigned int temp;
-
- int idx = srcu_read_lock(&kfd_processes_srcu);
+ int i;
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
- if (p->pasid == pasid) {
- kref_get(&p->ref);
- ret_p = p;
- break;
+ for (i = 0; i < p->n_pdds; i++) {
+ if (p->pdds[i]->pasid == pasid) {
+ ret_p = p->pdds[i];
+ break;
+ }
}
+ if (ret_p)
+ break;
+ }
+ return ret_p;
+}
+
+/* This increments the process->ref counter. */
+struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid,
+ struct kfd_process_device **pdd)
+{
+ struct kfd_process_device *ret_p;
+
+ int idx = srcu_read_lock(&kfd_processes_srcu);
+
+ ret_p = kfd_lookup_process_device_by_pasid(pasid);
+ if (ret_p) {
+ if (pdd)
+ *pdd = ret_p;
+ kref_get(&ret_p->process->ref);
+
+ srcu_read_unlock(&kfd_processes_srcu, idx);
+ return ret_p->process;
}
srcu_read_unlock(&kfd_processes_srcu, idx);
- return ret_p;
+ if (pdd)
+ *pdd = NULL;
+
+ return NULL;
}
/* This increments the process->ref counter. */
@@ -1704,7 +1885,7 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
* Eviction is reference-counted per process-device. This means multiple
* evictions from different sources can be nested safely.
*/
-int kfd_process_evict_queues(struct kfd_process *p)
+int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger)
{
int r = 0;
int i;
@@ -1712,6 +1893,10 @@ int kfd_process_evict_queues(struct kfd_process *p)
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
+ struct device *dev = pdd->dev->adev->dev;
+
+ kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread->pid,
+ trigger);
r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
&pdd->qpd);
@@ -1720,10 +1905,12 @@ int kfd_process_evict_queues(struct kfd_process *p)
* them been add back since they actually not be saved right now.
*/
if (r && r != -EIO) {
- pr_err("Failed to evict process queues\n");
+ dev_err(dev, "Failed to evict process queues\n");
goto fail;
}
n_evicted++;
+
+ pdd->dev->dqm->is_hws_hang = false;
}
return r;
@@ -1737,9 +1924,13 @@ fail:
if (n_evicted == 0)
break;
+
+ kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);
+
if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
&pdd->qpd))
- pr_err("Failed to restore queues\n");
+ dev_err(pdd->dev->adev->dev,
+ "Failed to restore queues\n");
n_evicted--;
}
@@ -1755,11 +1946,14 @@ int kfd_process_restore_queues(struct kfd_process *p)
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
+ struct device *dev = pdd->dev->adev->dev;
+
+ kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);
r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
&pdd->qpd);
if (r) {
- pr_err("Failed to restore process queues\n");
+ dev_err(dev, "Failed to restore process queues\n");
if (!ret)
ret = r;
}
@@ -1773,27 +1967,43 @@ int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id)
int i;
for (i = 0; i < p->n_pdds; i++)
- if (p->pdds[i] && gpu_id == p->pdds[i]->dev->id)
+ if (p->pdds[i] && gpu_id == p->pdds[i]->user_gpu_id)
return i;
return -EINVAL;
}
int
-kfd_process_gpuid_from_kgd(struct kfd_process *p, struct amdgpu_device *adev,
- uint32_t *gpuid, uint32_t *gpuidx)
+kfd_process_gpuid_from_node(struct kfd_process *p, struct kfd_node *node,
+ uint32_t *gpuid, uint32_t *gpuidx)
{
- struct kgd_dev *kgd = (struct kgd_dev *)adev;
int i;
for (i = 0; i < p->n_pdds; i++)
- if (p->pdds[i] && p->pdds[i]->dev->kgd == kgd) {
- *gpuid = p->pdds[i]->dev->id;
+ if (p->pdds[i] && p->pdds[i]->dev == node) {
+ *gpuid = p->pdds[i]->user_gpu_id;
*gpuidx = i;
return 0;
}
return -EINVAL;
}
+static int signal_eviction_fence(struct kfd_process *p)
+{
+ struct dma_fence *ef;
+ int ret;
+
+ rcu_read_lock();
+ ef = dma_fence_get_rcu_safe(&p->ef);
+ rcu_read_unlock();
+ if (!ef)
+ return -EINVAL;
+
+ ret = dma_fence_signal(ef);
+ dma_fence_put(ef);
+
+ return ret;
+}
+
static void evict_process_worker(struct work_struct *work)
{
int ret;
@@ -1806,29 +2016,45 @@ static void evict_process_worker(struct work_struct *work)
* lifetime of this thread, kfd_process p will be valid
*/
p = container_of(dwork, struct kfd_process, eviction_work);
- WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
- "Eviction fence mismatch\n");
-
- /* Narrow window of overlap between restore and evict work
- * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
- * unreserves KFD BOs, it is possible to evicted again. But
- * restore has few more steps of finish. So lets wait for any
- * previous restore work to complete
- */
- flush_delayed_work(&p->restore_work);
- pr_debug("Started evicting pasid 0x%x\n", p->pasid);
- ret = kfd_process_evict_queues(p);
+ pr_debug("Started evicting process pid %d\n", p->lead_thread->pid);
+ ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_TTM);
if (!ret) {
- dma_fence_signal(p->ef);
- dma_fence_put(p->ef);
- p->ef = NULL;
- queue_delayed_work(kfd_restore_wq, &p->restore_work,
- msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
+ /* If another thread already signaled the eviction fence,
+ * they are responsible stopping the queues and scheduling
+ * the restore work.
+ */
+ if (signal_eviction_fence(p) ||
+ mod_delayed_work(kfd_restore_wq, &p->restore_work,
+ msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)))
+ kfd_process_restore_queues(p);
- pr_debug("Finished evicting pasid 0x%x\n", p->pasid);
+ pr_debug("Finished evicting process pid %d\n", p->lead_thread->pid);
} else
- pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid);
+ pr_err("Failed to evict queues of process pid %d\n", p->lead_thread->pid);
+}
+
+static int restore_process_helper(struct kfd_process *p)
+{
+ int ret = 0;
+
+ /* VMs may not have been acquired yet during debugging. */
+ if (p->kgd_process_info) {
+ ret = amdgpu_amdkfd_gpuvm_restore_process_bos(
+ p->kgd_process_info, &p->ef);
+ if (ret)
+ return ret;
+ }
+
+ ret = kfd_process_restore_queues(p);
+ if (!ret)
+ pr_debug("Finished restoring process pid %d\n",
+ p->lead_thread->pid);
+ else
+ pr_err("Failed to restore queues of process pid %d\n",
+ p->lead_thread->pid);
+
+ return ret;
}
static void restore_process_worker(struct work_struct *work)
@@ -1843,7 +2069,7 @@ static void restore_process_worker(struct work_struct *work)
* lifetime of this thread, kfd_process p will be valid
*/
p = container_of(dwork, struct kfd_process, restore_work);
- pr_debug("Started restoring pasid 0x%x\n", p->pasid);
+ pr_debug("Started restoring process pasid %d\n", (int)p->lead_thread->pid);
/* Setting last_restore_timestamp before successful restoration.
* Otherwise this would have to be set by KGD (restore_process_bos)
@@ -1856,22 +2082,15 @@ static void restore_process_worker(struct work_struct *work)
*/
p->last_restore_timestamp = get_jiffies_64();
- ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info,
- &p->ef);
+
+ ret = restore_process_helper(p);
if (ret) {
- pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
- p->pasid, PROCESS_BACK_OFF_TIME_MS);
- ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
- msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
- WARN(!ret, "reschedule restore work failed\n");
- return;
+ pr_debug("Failed to restore BOs of process pid %d, retry after %d ms\n",
+ p->lead_thread->pid, PROCESS_BACK_OFF_TIME_MS);
+ if (mod_delayed_work(kfd_restore_wq, &p->restore_work,
+ msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)))
+ kfd_process_restore_queues(p);
}
-
- ret = kfd_process_restore_queues(p);
- if (!ret)
- pr_debug("Finished restoring pasid 0x%x\n", p->pasid);
- else
- pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
}
void kfd_suspend_all_processes(void)
@@ -1882,14 +2101,9 @@ void kfd_suspend_all_processes(void)
WARN(debug_evictions, "Evicting all processes");
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
- cancel_delayed_work_sync(&p->eviction_work);
- cancel_delayed_work_sync(&p->restore_work);
-
- if (kfd_process_evict_queues(p))
- pr_err("Failed to suspend process 0x%x\n", p->pasid);
- dma_fence_signal(p->ef);
- dma_fence_put(p->ef);
- p->ef = NULL;
+ if (kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND))
+ pr_err("Failed to suspend process pid %d\n", p->lead_thread->pid);
+ signal_eviction_fence(p);
}
srcu_read_unlock(&kfd_processes_srcu, idx);
}
@@ -1901,9 +2115,9 @@ int kfd_resume_all_processes(void)
int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
- if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) {
- pr_err("Restore process %d failed during resume\n",
- p->pasid);
+ if (restore_process_helper(p)) {
+ pr_err("Restore process pid %d failed during resume\n",
+ p->lead_thread->pid);
ret = -EFAULT;
}
}
@@ -1911,14 +2125,14 @@ int kfd_resume_all_processes(void)
return ret;
}
-int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
+int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process,
struct vm_area_struct *vma)
{
struct kfd_process_device *pdd;
struct qcm_process_device *qpd;
if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
- pr_err("Incorrect CWSR mapping size.\n");
+ dev_err(dev->adev->dev, "Incorrect CWSR mapping size.\n");
return -EINVAL;
}
@@ -1930,33 +2144,173 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
get_order(KFD_CWSR_TBA_TMA_SIZE));
if (!qpd->cwsr_kaddr) {
- pr_err("Error allocating per process CWSR buffer.\n");
+ dev_err(dev->adev->dev,
+ "Error allocating per process CWSR buffer.\n");
return -ENOMEM;
}
- vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND
- | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP;
+ vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND
+ | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP);
/* Mapping pages to user process */
return remap_pfn_range(vma, vma->vm_start,
PFN_DOWN(__pa(qpd->cwsr_kaddr)),
KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
}
-void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
+/* assumes caller holds process lock. */
+int kfd_process_drain_interrupts(struct kfd_process_device *pdd)
{
- struct kfd_dev *dev = pdd->dev;
+ uint32_t irq_drain_fence[8];
+ uint8_t node_id = 0;
+ int r = 0;
- if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
- /* Nothing to flush until a VMID is assigned, which
- * only happens when the first queue is created.
- */
- if (pdd->qpd.vmid)
- amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
- pdd->qpd.vmid);
- } else {
- amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
- pdd->process->pasid, type);
+ if (!KFD_IS_SOC15(pdd->dev))
+ return 0;
+
+ pdd->process->irq_drain_is_open = true;
+
+ memset(irq_drain_fence, 0, sizeof(irq_drain_fence));
+ irq_drain_fence[0] = (KFD_IRQ_FENCE_SOURCEID << 8) |
+ KFD_IRQ_FENCE_CLIENTID;
+ irq_drain_fence[3] = pdd->pasid;
+
+ /*
+ * For GFX 9.4.3/9.5.0, send the NodeId also in IH cookie DW[3]
+ */
+ if (KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 5, 0)) {
+ node_id = ffs(pdd->dev->interrupt_bitmap) - 1;
+ irq_drain_fence[3] |= node_id << 16;
+ }
+
+ /* ensure stale irqs scheduled KFD interrupts and send drain fence. */
+ if (amdgpu_amdkfd_send_close_event_drain_irq(pdd->dev->adev,
+ irq_drain_fence)) {
+ pdd->process->irq_drain_is_open = false;
+ return 0;
+ }
+
+ r = wait_event_interruptible(pdd->process->wait_irq_drain,
+ !READ_ONCE(pdd->process->irq_drain_is_open));
+ if (r)
+ pdd->process->irq_drain_is_open = false;
+
+ return r;
+}
+
+void kfd_process_close_interrupt_drain(unsigned int pasid)
+{
+ struct kfd_process *p;
+
+ p = kfd_lookup_process_by_pasid(pasid, NULL);
+
+ if (!p)
+ return;
+
+ WRITE_ONCE(p->irq_drain_is_open, false);
+ wake_up_all(&p->wait_irq_drain);
+ kfd_unref_process(p);
+}
+
+struct send_exception_work_handler_workarea {
+ struct work_struct work;
+ struct kfd_process *p;
+ unsigned int queue_id;
+ uint64_t error_reason;
+};
+
+static void send_exception_work_handler(struct work_struct *work)
+{
+ struct send_exception_work_handler_workarea *workarea;
+ struct kfd_process *p;
+ struct queue *q;
+ struct mm_struct *mm;
+ struct kfd_context_save_area_header __user *csa_header;
+ uint64_t __user *err_payload_ptr;
+ uint64_t cur_err;
+ uint32_t ev_id;
+
+ workarea = container_of(work,
+ struct send_exception_work_handler_workarea,
+ work);
+ p = workarea->p;
+
+ mm = get_task_mm(p->lead_thread);
+
+ if (!mm)
+ return;
+
+ kthread_use_mm(mm);
+
+ q = pqm_get_user_queue(&p->pqm, workarea->queue_id);
+
+ if (!q)
+ goto out;
+
+ csa_header = (void __user *)q->properties.ctx_save_restore_area_address;
+
+ get_user(err_payload_ptr, (uint64_t __user **)&csa_header->err_payload_addr);
+ get_user(cur_err, err_payload_ptr);
+ cur_err |= workarea->error_reason;
+ put_user(cur_err, err_payload_ptr);
+ get_user(ev_id, &csa_header->err_event_id);
+
+ kfd_set_event(p, ev_id);
+
+out:
+ kthread_unuse_mm(mm);
+ mmput(mm);
+}
+
+int kfd_send_exception_to_runtime(struct kfd_process *p,
+ unsigned int queue_id,
+ uint64_t error_reason)
+{
+ struct send_exception_work_handler_workarea worker;
+
+ INIT_WORK_ONSTACK(&worker.work, send_exception_work_handler);
+
+ worker.p = p;
+ worker.queue_id = queue_id;
+ worker.error_reason = error_reason;
+
+ schedule_work(&worker.work);
+ flush_work(&worker.work);
+ destroy_work_on_stack(&worker.work);
+
+ return 0;
+}
+
+struct kfd_process_device *kfd_process_device_data_by_id(struct kfd_process *p, uint32_t gpu_id)
+{
+ int i;
+
+ if (gpu_id) {
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ if (pdd->user_gpu_id == gpu_id)
+ return pdd;
+ }
+ }
+ return NULL;
+}
+
+int kfd_process_get_user_gpu_id(struct kfd_process *p, uint32_t actual_gpu_id)
+{
+ int i;
+
+ if (!actual_gpu_id)
+ return 0;
+
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ if (pdd->dev->id == actual_gpu_id)
+ return pdd->user_gpu_id;
}
+ return -EINVAL;
}
#if defined(CONFIG_DEBUG_FS)
@@ -1970,8 +2324,8 @@ int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
int idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
- seq_printf(m, "Process %d PASID 0x%x:\n",
- p->lead_thread->tgid, p->pasid);
+ seq_printf(m, "Process %d PASID %d:\n",
+ p->lead_thread->tgid, p->lead_thread->pid);
mutex_lock(&p->mutex);
r = pqm_debugfs_mqds(m, &p->pqm);
@@ -1987,4 +2341,3 @@ int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
}
#endif
-
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 3627e7ac161b..7fbb5c274ccc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -27,6 +28,7 @@
#include "kfd_priv.h"
#include "kfd_kernel_queue.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_reset.h"
static inline struct process_queue_node *get_queue_by_qid(
struct process_queue_manager *pqm, unsigned int qid)
@@ -42,6 +44,20 @@ static inline struct process_queue_node *get_queue_by_qid(
return NULL;
}
+static int assign_queue_slot_by_qid(struct process_queue_manager *pqm,
+ unsigned int qid)
+{
+ if (qid >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
+ return -EINVAL;
+
+ if (__test_and_set_bit(qid, pqm->queue_slot_bitmap)) {
+ pr_err("Cannot create new queue because requested qid(%u) is in use\n", qid);
+ return -ENOSPC;
+ }
+
+ return 0;
+}
+
static int find_available_queue_slot(struct process_queue_manager *pqm,
unsigned int *qid)
{
@@ -53,8 +69,8 @@ static int find_available_queue_slot(struct process_queue_manager *pqm,
pr_debug("The new slot id %lu\n", found);
if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
- pr_info("Cannot open more queues for process with pasid 0x%x\n",
- pqm->process->pasid);
+ pr_info("Cannot open more queues for process with pid %d\n",
+ pqm->process->lead_thread->pid);
return -ENOMEM;
}
@@ -66,19 +82,29 @@ static int find_available_queue_slot(struct process_queue_manager *pqm,
void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
{
- struct kfd_dev *dev = pdd->dev;
+ struct kfd_node *dev = pdd->dev;
if (pdd->already_dequeued)
return;
-
+ /* The MES context flush needs to filter out the case which the
+ * KFD process is created without setting up the MES context and
+ * queue for creating a compute queue.
+ */
dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd);
+ if (dev->kfd->shared_resources.enable_mes && !!pdd->proc_ctx_gpu_addr &&
+ down_read_trylock(&dev->adev->reset_domain->sem)) {
+ amdgpu_mes_flush_shader_debugger(dev->adev,
+ pdd->proc_ctx_gpu_addr);
+ up_read(&dev->adev->reset_domain->sem);
+ }
pdd->already_dequeued = true;
}
int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
void *gws)
{
- struct kfd_dev *dev = NULL;
+ struct mqd_update_info minfo = {0};
+ struct kfd_node *dev = NULL;
struct process_queue_node *pqn;
struct kfd_process_device *pdd;
struct kgd_mem *mem = NULL;
@@ -108,20 +134,34 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
if (!gws && pdd->qpd.num_gws == 0)
return -EINVAL;
- if (gws)
- ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
- gws, &mem);
- else
- ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info,
- pqn->q->gws);
- if (unlikely(ret))
- return ret;
+ if ((KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 5, 0)) &&
+ !dev->kfd->shared_resources.enable_mes) {
+ if (gws)
+ ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
+ gws, &mem);
+ else
+ ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info,
+ pqn->q->gws);
+ if (unlikely(ret))
+ return ret;
+ pqn->q->gws = mem;
+ } else {
+ /*
+ * Intentionally set GWS to a non-NULL value
+ * for devices that do not use GWS for global wave
+ * synchronization but require the formality
+ * of setting GWS for cooperative groups.
+ */
+ pqn->q->gws = gws ? ERR_PTR(-ENOMEM) : NULL;
+ }
- pqn->q->gws = mem;
- pdd->qpd.num_gws = gws ? amdgpu_amdkfd_get_num_gws(dev->kgd) : 0;
+ pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0;
+ minfo.update_flag = gws ? UPDATE_FLAG_IS_GWS : 0;
return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
- pqn->q, NULL);
+ pqn->q, &minfo);
}
void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
@@ -135,9 +175,8 @@ void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
{
INIT_LIST_HEAD(&pqm->queues);
- pqm->queue_slot_bitmap =
- kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
- BITS_PER_BYTE), GFP_KERNEL);
+ pqm->queue_slot_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
+ GFP_KERNEL);
if (!pqm->queue_slot_bitmap)
return -ENOMEM;
pqm->process = p;
@@ -145,33 +184,73 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
return 0;
}
+static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
+ struct process_queue_node *pqn)
+{
+ struct kfd_node *dev;
+ struct kfd_process_device *pdd;
+
+ dev = pqn->q->device;
+
+ pdd = kfd_get_process_device_data(dev, pqm->process);
+ if (!pdd) {
+ pr_err("Process device data doesn't exist\n");
+ return;
+ }
+
+ if (pqn->q->gws) {
+ if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
+ KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 5, 0) &&
+ !dev->kfd->shared_resources.enable_mes)
+ amdgpu_amdkfd_remove_gws_from_process(
+ pqm->process->kgd_process_info, pqn->q->gws);
+ pdd->qpd.num_gws = 0;
+ }
+
+ if (dev->kfd->shared_resources.enable_mes) {
+ amdgpu_amdkfd_free_gtt_mem(dev->adev, &pqn->q->gang_ctx_bo);
+ amdgpu_amdkfd_free_gtt_mem(dev->adev, (void **)&pqn->q->wptr_bo_gart);
+ }
+}
+
void pqm_uninit(struct process_queue_manager *pqm)
{
struct process_queue_node *pqn, *next;
list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
- if (pqn->q && pqn->q->gws)
- amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
- pqn->q->gws);
+ if (pqn->q) {
+ struct kfd_process_device *pdd = kfd_get_process_device_data(pqn->q->device,
+ pqm->process);
+ if (pdd) {
+ kfd_queue_unref_bo_vas(pdd, &pqn->q->properties);
+ kfd_queue_release_buffers(pdd, &pqn->q->properties);
+ } else {
+ WARN_ON(!pdd);
+ }
+ pqm_clean_queue_resource(pqm, pqn);
+ }
+
kfd_procfs_del_queue(pqn->q);
uninit_queue(pqn->q);
list_del(&pqn->process_queue_list);
kfree(pqn);
}
- kfree(pqm->queue_slot_bitmap);
+ bitmap_free(pqm->queue_slot_bitmap);
pqm->queue_slot_bitmap = NULL;
}
static int init_user_queue(struct process_queue_manager *pqm,
- struct kfd_dev *dev, struct queue **q,
+ struct kfd_node *dev, struct queue **q,
struct queue_properties *q_properties,
- struct file *f, unsigned int qid)
+ unsigned int qid)
{
int retval;
/* Doorbell initialized in user space*/
q_properties->doorbell_ptr = NULL;
+ q_properties->exception_status = KFD_EC_MASK(EC_QUEUE_NEW);
/* let DQM handle it*/
q_properties->vmid = 0;
@@ -184,16 +263,54 @@ static int init_user_queue(struct process_queue_manager *pqm,
(*q)->device = dev;
(*q)->process = pqm->process;
+ if (dev->kfd->shared_resources.enable_mes) {
+ retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
+ AMDGPU_MES_GANG_CTX_SIZE,
+ &(*q)->gang_ctx_bo,
+ &(*q)->gang_ctx_gpu_addr,
+ &(*q)->gang_ctx_cpu_ptr,
+ false);
+ if (retval) {
+ pr_err("failed to allocate gang context bo\n");
+ goto cleanup;
+ }
+ memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
+
+ /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work
+ * on unmapped queues for usermode queue oversubscription (no aggregated doorbell)
+ */
+ if (dev->adev != amdgpu_ttm_adev(q_properties->wptr_bo->tbo.bdev)) {
+ pr_err("Queue memory allocated to wrong device\n");
+ retval = -EINVAL;
+ goto free_gang_ctx_bo;
+ }
+
+ retval = amdgpu_amdkfd_map_gtt_bo_to_gart(q_properties->wptr_bo,
+ &(*q)->wptr_bo_gart);
+ if (retval) {
+ pr_err("Failed to map wptr bo to GART\n");
+ goto free_gang_ctx_bo;
+ }
+ }
+
pr_debug("PQM After init queue");
+ return 0;
+free_gang_ctx_bo:
+ amdgpu_amdkfd_free_gtt_mem(dev->adev, &(*q)->gang_ctx_bo);
+cleanup:
+ uninit_queue(*q);
+ *q = NULL;
return retval;
}
int pqm_create_queue(struct process_queue_manager *pqm,
- struct kfd_dev *dev,
- struct file *f,
+ struct kfd_node *dev,
struct queue_properties *properties,
unsigned int *qid,
+ const struct kfd_criu_queue_priv_data *q_data,
+ const void *restore_mqd,
+ const void *restore_ctl_stack,
uint32_t *p_doorbell_offset_in_process)
{
int retval;
@@ -204,6 +321,15 @@ int pqm_create_queue(struct process_queue_manager *pqm,
enum kfd_queue_type type = properties->type;
unsigned int max_queues = 127; /* HWS limit */
+ /*
+ * On GFX 9.4.3/9.5.0, increase the number of queues that
+ * can be created to 255. No HWS limit on GFX 9.4.3/9.5.0.
+ */
+ if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0))
+ max_queues = 255;
+
q = NULL;
kq = NULL;
@@ -220,19 +346,40 @@ int pqm_create_queue(struct process_queue_manager *pqm,
* Hence we also check the type as well
*/
if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ))
- max_queues = dev->device_info->max_no_of_hqd/2;
+ max_queues = dev->kfd->device_info.max_no_of_hqd/2;
if (pdd->qpd.queue_count >= max_queues)
return -ENOSPC;
- retval = find_available_queue_slot(pqm, qid);
+ if (q_data) {
+ retval = assign_queue_slot_by_qid(pqm, q_data->q_id);
+ *qid = q_data->q_id;
+ } else
+ retval = find_available_queue_slot(pqm, qid);
+
if (retval != 0)
return retval;
+ /* Register process if this is the first queue */
if (list_empty(&pdd->qpd.queues_list) &&
list_empty(&pdd->qpd.priv_queue_list))
dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
+ /* Allocate proc_ctx_bo only if MES is enabled and this is the first queue */
+ if (!pdd->proc_ctx_cpu_ptr && dev->kfd->shared_resources.enable_mes) {
+ retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
+ AMDGPU_MES_PROC_CTX_SIZE,
+ &pdd->proc_ctx_bo,
+ &pdd->proc_ctx_gpu_addr,
+ &pdd->proc_ctx_cpu_ptr,
+ false);
+ if (retval) {
+ dev_err(dev->adev->dev, "failed to allocate process context bo\n");
+ return retval;
+ }
+ memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
+ }
+
pqn = kzalloc(sizeof(*pqn), GFP_KERNEL);
if (!pqn) {
retval = -ENOMEM;
@@ -242,18 +389,20 @@ int pqm_create_queue(struct process_queue_manager *pqm,
switch (type) {
case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_SDMA_XGMI:
+ case KFD_QUEUE_TYPE_SDMA_BY_ENG_ID:
/* SDMA queues are always allocated statically no matter
* which scheduler mode is used. We also do not need to
* check whether a SDMA queue can be allocated here, because
* allocate_sdma_queue() in create_queue() has the
* corresponding check logic.
*/
- retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
+ retval = init_user_queue(pqm, dev, &q, properties, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
pqn->kq = NULL;
- retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd);
+ retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data,
+ restore_mqd, restore_ctl_stack);
print_queue(q);
break;
@@ -268,12 +417,13 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
}
- retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
+ retval = init_user_queue(pqm, dev, &q, properties, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
pqn->kq = NULL;
- retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd);
+ retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data,
+ restore_mqd, restore_ctl_stack);
print_queue(q);
break;
case KFD_QUEUE_TYPE_DIQ:
@@ -285,6 +435,10 @@ int pqm_create_queue(struct process_queue_manager *pqm,
kq->queue->properties.queue_id = *qid;
pqn->kq = kq;
pqn->q = NULL;
+ retval = kfd_process_drain_interrupts(pdd);
+ if (retval)
+ break;
+
retval = dev->dqm->ops.create_kernel_queue(dev->dqm,
kq, &pdd->qpd);
break;
@@ -294,22 +448,33 @@ int pqm_create_queue(struct process_queue_manager *pqm,
}
if (retval != 0) {
- pr_err("Pasid 0x%x DQM create queue type %d failed. ret %d\n",
- pqm->process->pasid, type, retval);
+ if ((type == KFD_QUEUE_TYPE_SDMA ||
+ type == KFD_QUEUE_TYPE_SDMA_XGMI ||
+ type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) &&
+ retval == -ENOMEM)
+ pr_warn("process pid %d DQM create queue type %d failed. ret %d\n",
+ pqm->process->lead_thread->pid, type, retval);
+ else
+ pr_err("process pid %d DQM create queue type %d failed. ret %d\n",
+ pqm->process->lead_thread->pid, type, retval);
goto err_create_queue;
}
- if (q && p_doorbell_offset_in_process)
+ if (q && p_doorbell_offset_in_process) {
/* Return the doorbell offset within the doorbell page
* to the caller so it can be passed up to user mode
* (in bytes).
- * There are always 1024 doorbells per process, so in case
- * of 8-byte doorbells, there are two doorbell pages per
- * process.
+ * relative doorbell index = Absolute doorbell index -
+ * absolute index of first doorbell in the page.
*/
- *p_doorbell_offset_in_process =
- (q->properties.doorbell_off * sizeof(uint32_t)) &
- (kfd_doorbell_process_slice(dev) - 1);
+ uint32_t first_db_index = amdgpu_doorbell_index_on_bar(pdd->dev->adev,
+ pdd->qpd.proc_doorbells,
+ 0,
+ pdd->dev->kfd->device_info.doorbell_size);
+
+ *p_doorbell_offset_in_process = (q->properties.doorbell_off
+ - first_db_index) * sizeof(uint32_t);
+ }
pr_debug("PQM After DQM create queue\n");
@@ -326,7 +491,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
err_create_queue:
uninit_queue(q);
if (kq)
- kernel_queue_uninit(kq, false);
+ kernel_queue_uninit(kq);
kfree(pqn);
err_allocate_pqn:
/* check if queues list is empty unregister process from device */
@@ -342,7 +507,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
struct process_queue_node *pqn;
struct kfd_process_device *pdd;
struct device_queue_manager *dqm;
- struct kfd_dev *dev;
+ struct kfd_node *dev;
int retval;
dqm = NULL;
@@ -373,27 +538,26 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
/* destroy kernel queue (DIQ) */
dqm = pqn->kq->dev->dqm;
dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd);
- kernel_queue_uninit(pqn->kq, false);
+ kernel_queue_uninit(pqn->kq);
}
if (pqn->q) {
- kfd_procfs_del_queue(pqn->q);
+ retval = kfd_queue_unref_bo_vas(pdd, &pqn->q->properties);
+ if (retval)
+ goto err_destroy_queue;
+
dqm = pqn->q->device->dqm;
retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
if (retval) {
pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n",
- pqm->process->pasid,
+ pdd->pasid,
pqn->q->properties.queue_id, retval);
- if (retval != -ETIME)
+ if (retval != -ETIME && retval != -EIO)
goto err_destroy_queue;
}
-
- if (pqn->q->gws) {
- amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
- pqn->q->gws);
- pdd->qpd.num_gws = 0;
- }
-
+ kfd_procfs_del_queue(pqn->q);
+ kfd_queue_release_buffers(pdd, &pqn->q->properties);
+ pqm_clean_queue_resource(pqm, pqn);
uninit_queue(pqn->q);
}
@@ -416,15 +580,47 @@ int pqm_update_queue_properties(struct process_queue_manager *pqm,
struct process_queue_node *pqn;
pqn = get_queue_by_qid(pqm, qid);
- if (!pqn) {
+ if (!pqn || !pqn->q) {
pr_debug("No queue %d exists for update operation\n", qid);
return -EFAULT;
}
+ /*
+ * Update with NULL ring address is used to disable the queue
+ */
+ if (p->queue_address && p->queue_size) {
+ struct kfd_process_device *pdd;
+ struct amdgpu_vm *vm;
+ struct queue *q = pqn->q;
+ int err;
+
+ pdd = kfd_get_process_device_data(q->device, q->process);
+ if (!pdd)
+ return -ENODEV;
+ vm = drm_priv_to_vm(pdd->drm_priv);
+ err = amdgpu_bo_reserve(vm->root.bo, false);
+ if (err)
+ return err;
+
+ if (kfd_queue_buffer_get(vm, (void *)p->queue_address, &p->ring_bo,
+ p->queue_size)) {
+ pr_debug("ring buf 0x%llx size 0x%llx not mapped on GPU\n",
+ p->queue_address, p->queue_size);
+ return -EFAULT;
+ }
+
+ kfd_queue_unref_bo_va(vm, &pqn->q->properties.ring_bo);
+ kfd_queue_buffer_put(&pqn->q->properties.ring_bo);
+ amdgpu_bo_unreserve(vm->root.bo);
+
+ pqn->q->properties.ring_bo = p->ring_bo;
+ }
+
pqn->q->properties.queue_address = p->queue_address;
pqn->q->properties.queue_size = p->queue_size;
pqn->q->properties.queue_percent = p->queue_percent;
pqn->q->properties.priority = p->priority;
+ pqn->q->properties.pm4_target_xcc = p->pm4_target_xcc;
retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
pqn->q, NULL);
@@ -446,25 +642,34 @@ int pqm_update_mqd(struct process_queue_manager *pqm,
return -EFAULT;
}
+ /* CUs are masked for debugger requirements so deny user mask */
+ if (pqn->q->properties.is_dbg_wa && minfo && minfo->cu_mask.ptr)
+ return -EBUSY;
+
+ /* ASICs that have WGPs must enforce pairwise enabled mask checks. */
+ if (minfo && minfo->cu_mask.ptr &&
+ KFD_GC_VERSION(pqn->q->device) >= IP_VERSION(10, 0, 0)) {
+ int i;
+
+ for (i = 0; i < minfo->cu_mask.count; i += 2) {
+ uint32_t cu_pair = (minfo->cu_mask.ptr[i / 32] >> (i % 32)) & 0x3;
+
+ if (cu_pair && cu_pair != 0x3) {
+ pr_debug("CUs must be adjacent pairwise enabled.\n");
+ return -EINVAL;
+ }
+ }
+ }
+
retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
pqn->q, minfo);
if (retval != 0)
return retval;
- return 0;
-}
-
-struct kernel_queue *pqm_get_kernel_queue(
- struct process_queue_manager *pqm,
- unsigned int qid)
-{
- struct process_queue_node *pqn;
-
- pqn = get_queue_by_qid(pqm, qid);
- if (pqn && pqn->kq)
- return pqn->kq;
+ if (minfo && minfo->cu_mask.ptr)
+ pqn->q->properties.is_user_cu_masked = true;
- return NULL;
+ return 0;
}
struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
@@ -498,6 +703,403 @@ int pqm_get_wave_state(struct process_queue_manager *pqm,
save_area_used_size);
}
+int pqm_get_queue_snapshot(struct process_queue_manager *pqm,
+ uint64_t exception_clear_mask,
+ void __user *buf,
+ int *num_qss_entries,
+ uint32_t *entry_size)
+{
+ struct process_queue_node *pqn;
+ struct kfd_queue_snapshot_entry src;
+ uint32_t tmp_entry_size = *entry_size, tmp_qss_entries = *num_qss_entries;
+ int r = 0;
+
+ *num_qss_entries = 0;
+ if (!(*entry_size))
+ return -EINVAL;
+
+ *entry_size = min_t(size_t, *entry_size, sizeof(struct kfd_queue_snapshot_entry));
+ mutex_lock(&pqm->process->event_mutex);
+
+ memset(&src, 0, sizeof(src));
+
+ list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
+ if (!pqn->q)
+ continue;
+
+ if (*num_qss_entries < tmp_qss_entries) {
+ set_queue_snapshot_entry(pqn->q, exception_clear_mask, &src);
+
+ if (copy_to_user(buf, &src, *entry_size)) {
+ r = -EFAULT;
+ break;
+ }
+ buf += tmp_entry_size;
+ }
+ *num_qss_entries += 1;
+ }
+
+ mutex_unlock(&pqm->process->event_mutex);
+ return r;
+}
+
+static int get_queue_data_sizes(struct kfd_process_device *pdd,
+ struct queue *q,
+ uint32_t *mqd_size,
+ uint32_t *ctl_stack_size)
+{
+ int ret;
+
+ ret = pqm_get_queue_checkpoint_info(&pdd->process->pqm,
+ q->properties.queue_id,
+ mqd_size,
+ ctl_stack_size);
+ if (ret)
+ pr_err("Failed to get queue dump info (%d)\n", ret);
+
+ return ret;
+}
+
+int kfd_process_get_queue_info(struct kfd_process *p,
+ uint32_t *num_queues,
+ uint64_t *priv_data_sizes)
+{
+ uint32_t extra_data_sizes = 0;
+ struct queue *q;
+ int i;
+ int ret;
+
+ *num_queues = 0;
+
+ /* Run over all PDDs of the process */
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ list_for_each_entry(q, &pdd->qpd.queues_list, list) {
+ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
+ q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+ uint32_t mqd_size, ctl_stack_size;
+
+ *num_queues = *num_queues + 1;
+
+ ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size);
+ if (ret)
+ return ret;
+
+ extra_data_sizes += mqd_size + ctl_stack_size;
+ } else {
+ pr_err("Unsupported queue type (%d)\n", q->properties.type);
+ return -EOPNOTSUPP;
+ }
+ }
+ }
+ *priv_data_sizes = extra_data_sizes +
+ (*num_queues * sizeof(struct kfd_criu_queue_priv_data));
+
+ return 0;
+}
+
+static int pqm_checkpoint_mqd(struct process_queue_manager *pqm,
+ unsigned int qid,
+ void *mqd,
+ void *ctl_stack)
+{
+ struct process_queue_node *pqn;
+
+ pqn = get_queue_by_qid(pqm, qid);
+ if (!pqn) {
+ pr_debug("amdkfd: No queue %d exists for operation\n", qid);
+ return -EFAULT;
+ }
+
+ if (!pqn->q->device->dqm->ops.checkpoint_mqd) {
+ pr_err("amdkfd: queue dumping not supported on this device\n");
+ return -EOPNOTSUPP;
+ }
+
+ return pqn->q->device->dqm->ops.checkpoint_mqd(pqn->q->device->dqm,
+ pqn->q, mqd, ctl_stack);
+}
+
+static int criu_checkpoint_queue(struct kfd_process_device *pdd,
+ struct queue *q,
+ struct kfd_criu_queue_priv_data *q_data)
+{
+ uint8_t *mqd, *ctl_stack;
+ int ret;
+
+ mqd = (void *)(q_data + 1);
+ ctl_stack = mqd + q_data->mqd_size;
+
+ q_data->gpu_id = pdd->user_gpu_id;
+ q_data->type = q->properties.type;
+ q_data->format = q->properties.format;
+ q_data->q_id = q->properties.queue_id;
+ q_data->q_address = q->properties.queue_address;
+ q_data->q_size = q->properties.queue_size;
+ q_data->priority = q->properties.priority;
+ q_data->q_percent = q->properties.queue_percent;
+ q_data->read_ptr_addr = (uint64_t)q->properties.read_ptr;
+ q_data->write_ptr_addr = (uint64_t)q->properties.write_ptr;
+ q_data->doorbell_id = q->doorbell_id;
+
+ q_data->sdma_id = q->sdma_id;
+
+ q_data->eop_ring_buffer_address =
+ q->properties.eop_ring_buffer_address;
+
+ q_data->eop_ring_buffer_size = q->properties.eop_ring_buffer_size;
+
+ q_data->ctx_save_restore_area_address =
+ q->properties.ctx_save_restore_area_address;
+
+ q_data->ctx_save_restore_area_size =
+ q->properties.ctx_save_restore_area_size;
+
+ q_data->gws = !!q->gws;
+
+ ret = pqm_checkpoint_mqd(&pdd->process->pqm, q->properties.queue_id, mqd, ctl_stack);
+ if (ret) {
+ pr_err("Failed checkpoint queue_mqd (%d)\n", ret);
+ return ret;
+ }
+
+ pr_debug("Dumping Queue: gpu_id:%x queue_id:%u\n", q_data->gpu_id, q_data->q_id);
+ return ret;
+}
+
+static int criu_checkpoint_queues_device(struct kfd_process_device *pdd,
+ uint8_t __user *user_priv,
+ unsigned int *q_index,
+ uint64_t *queues_priv_data_offset)
+{
+ unsigned int q_private_data_size = 0;
+ uint8_t *q_private_data = NULL; /* Local buffer to store individual queue private data */
+ struct queue *q;
+ int ret = 0;
+
+ list_for_each_entry(q, &pdd->qpd.queues_list, list) {
+ struct kfd_criu_queue_priv_data *q_data;
+ uint64_t q_data_size;
+ uint32_t mqd_size;
+ uint32_t ctl_stack_size;
+
+ if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE &&
+ q->properties.type != KFD_QUEUE_TYPE_SDMA &&
+ q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI) {
+
+ pr_err("Unsupported queue type (%d)\n", q->properties.type);
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size);
+ if (ret)
+ break;
+
+ q_data_size = sizeof(*q_data) + mqd_size + ctl_stack_size;
+
+ /* Increase local buffer space if needed */
+ if (q_private_data_size < q_data_size) {
+ kfree(q_private_data);
+
+ q_private_data = kzalloc(q_data_size, GFP_KERNEL);
+ if (!q_private_data) {
+ ret = -ENOMEM;
+ break;
+ }
+ q_private_data_size = q_data_size;
+ }
+
+ q_data = (struct kfd_criu_queue_priv_data *)q_private_data;
+
+ /*
+ * data stored in this order:
+ * priv_data, mqd[xcc0], mqd[xcc1],..., ctl_stack[xcc0], ctl_stack[xcc1]...
+ */
+ q_data->mqd_size = mqd_size;
+ q_data->ctl_stack_size = ctl_stack_size;
+
+ ret = criu_checkpoint_queue(pdd, q, q_data);
+ if (ret)
+ break;
+
+ q_data->object_type = KFD_CRIU_OBJECT_TYPE_QUEUE;
+
+ ret = copy_to_user(user_priv + *queues_priv_data_offset,
+ q_data, q_data_size);
+ if (ret) {
+ ret = -EFAULT;
+ break;
+ }
+ *queues_priv_data_offset += q_data_size;
+ *q_index = *q_index + 1;
+ }
+
+ kfree(q_private_data);
+
+ return ret;
+}
+
+int kfd_criu_checkpoint_queues(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_data_offset)
+{
+ int ret = 0, pdd_index, q_index = 0;
+
+ for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
+ struct kfd_process_device *pdd = p->pdds[pdd_index];
+
+ /*
+ * criu_checkpoint_queues_device will copy data to user and update q_index and
+ * queues_priv_data_offset
+ */
+ ret = criu_checkpoint_queues_device(pdd, user_priv_data, &q_index,
+ priv_data_offset);
+
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+static void set_queue_properties_from_criu(struct queue_properties *qp,
+ struct kfd_criu_queue_priv_data *q_data, uint32_t num_xcc)
+{
+ qp->is_interop = false;
+ qp->queue_percent = q_data->q_percent;
+ qp->priority = q_data->priority;
+ qp->queue_address = q_data->q_address;
+ qp->queue_size = q_data->q_size;
+ qp->read_ptr = (uint32_t *) q_data->read_ptr_addr;
+ qp->write_ptr = (uint32_t *) q_data->write_ptr_addr;
+ qp->eop_ring_buffer_address = q_data->eop_ring_buffer_address;
+ qp->eop_ring_buffer_size = q_data->eop_ring_buffer_size;
+ qp->ctx_save_restore_area_address = q_data->ctx_save_restore_area_address;
+ qp->ctx_save_restore_area_size = q_data->ctx_save_restore_area_size;
+ if (q_data->type == KFD_QUEUE_TYPE_COMPUTE)
+ qp->ctl_stack_size = q_data->ctl_stack_size / num_xcc;
+ else
+ qp->ctl_stack_size = q_data->ctl_stack_size;
+
+ qp->type = q_data->type;
+ qp->format = q_data->format;
+}
+
+int kfd_criu_restore_queue(struct kfd_process *p,
+ uint8_t __user *user_priv_ptr,
+ uint64_t *priv_data_offset,
+ uint64_t max_priv_data_size)
+{
+ uint8_t *mqd, *ctl_stack, *q_extra_data = NULL;
+ struct kfd_criu_queue_priv_data *q_data;
+ struct kfd_process_device *pdd;
+ uint64_t q_extra_data_size;
+ struct queue_properties qp;
+ unsigned int queue_id;
+ int ret = 0;
+
+ if (*priv_data_offset + sizeof(*q_data) > max_priv_data_size)
+ return -EINVAL;
+
+ q_data = kmalloc(sizeof(*q_data), GFP_KERNEL);
+ if (!q_data)
+ return -ENOMEM;
+
+ ret = copy_from_user(q_data, user_priv_ptr + *priv_data_offset, sizeof(*q_data));
+ if (ret) {
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ *priv_data_offset += sizeof(*q_data);
+ q_extra_data_size = (uint64_t)q_data->ctl_stack_size + q_data->mqd_size;
+
+ if (*priv_data_offset + q_extra_data_size > max_priv_data_size) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ q_extra_data = kmalloc(q_extra_data_size, GFP_KERNEL);
+ if (!q_extra_data) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ ret = copy_from_user(q_extra_data, user_priv_ptr + *priv_data_offset, q_extra_data_size);
+ if (ret) {
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ *priv_data_offset += q_extra_data_size;
+
+ pdd = kfd_process_device_data_by_id(p, q_data->gpu_id);
+ if (!pdd) {
+ pr_err("Failed to get pdd\n");
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ /*
+ * data stored in this order:
+ * mqd[xcc0], mqd[xcc1],..., ctl_stack[xcc0], ctl_stack[xcc1]...
+ */
+ mqd = q_extra_data;
+ ctl_stack = mqd + q_data->mqd_size;
+
+ memset(&qp, 0, sizeof(qp));
+ set_queue_properties_from_criu(&qp, q_data, NUM_XCC(pdd->dev->adev->gfx.xcc_mask));
+
+ print_queue_properties(&qp);
+
+ ret = pqm_create_queue(&p->pqm, pdd->dev, &qp, &queue_id, q_data, mqd, ctl_stack, NULL);
+ if (ret) {
+ pr_err("Failed to create new queue err:%d\n", ret);
+ goto exit;
+ }
+
+ if (q_data->gws)
+ ret = pqm_set_gws(&p->pqm, q_data->q_id, pdd->dev->gws);
+
+exit:
+ if (ret)
+ pr_err("Failed to restore queue (%d)\n", ret);
+ else
+ pr_debug("Queue id %d was restored successfully\n", queue_id);
+
+ kfree(q_data);
+ kfree(q_extra_data);
+
+ return ret;
+}
+
+int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm,
+ unsigned int qid,
+ uint32_t *mqd_size,
+ uint32_t *ctl_stack_size)
+{
+ struct process_queue_node *pqn;
+
+ pqn = get_queue_by_qid(pqm, qid);
+ if (!pqn) {
+ pr_debug("amdkfd: No queue %d exists for operation\n", qid);
+ return -EFAULT;
+ }
+
+ if (!pqn->q->device->dqm->ops.get_queue_checkpoint_info) {
+ pr_err("amdkfd: queue dumping not supported on this device\n");
+ return -EOPNOTSUPP;
+ }
+
+ pqn->q->device->dqm->ops.get_queue_checkpoint_info(pqn->q->device->dqm,
+ pqn->q, mqd_size,
+ ctl_stack_size);
+ return 0;
+}
+
#if defined(CONFIG_DEBUG_FS)
int pqm_debugfs_mqds(struct seq_file *m, void *data)
@@ -507,7 +1109,9 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
struct queue *q;
enum KFD_MQD_TYPE mqd_type;
struct mqd_manager *mqd_mgr;
- int r = 0;
+ int r = 0, xcc, num_xccs = 1;
+ void *mqd;
+ uint64_t size = 0;
list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
if (pqn->q) {
@@ -523,6 +1127,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
seq_printf(m, " Compute queue on device %x\n",
q->device->id);
mqd_type = KFD_MQD_TYPE_CP;
+ num_xccs = NUM_XCC(q->device->xcc_mask);
break;
default:
seq_printf(m,
@@ -531,6 +1136,8 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
continue;
}
mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type];
+ size = mqd_mgr->mqd_stride(mqd_mgr,
+ &q->properties);
} else if (pqn->kq) {
q = pqn->kq->queue;
mqd_mgr = pqn->kq->mqd_mgr;
@@ -552,9 +1159,12 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
continue;
}
- r = mqd_mgr->debugfs_show_mqd(m, q->mqd);
- if (r != 0)
- break;
+ for (xcc = 0; xcc < num_xccs; xcc++) {
+ mqd = q->mqd + size * xcc;
+ r = mqd_mgr->debugfs_show_mqd(m, mqd);
+ if (r != 0)
+ break;
+ }
}
return r;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
index 6dcd621e5b71..f1e7583650c4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -23,6 +24,8 @@
#include <linux/slab.h>
#include "kfd_priv.h"
+#include "kfd_topology.h"
+#include "kfd_svm.h"
void print_queue_properties(struct queue_properties *q)
{
@@ -81,3 +84,386 @@ void uninit_queue(struct queue *q)
{
kfree(q);
}
+
+#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
+
+static int kfd_queue_buffer_svm_get(struct kfd_process_device *pdd, u64 addr, u64 size)
+{
+ struct kfd_process *p = pdd->process;
+ struct list_head update_list;
+ struct svm_range *prange;
+ int ret = -EINVAL;
+
+ INIT_LIST_HEAD(&update_list);
+ addr >>= PAGE_SHIFT;
+ size >>= PAGE_SHIFT;
+
+ mutex_lock(&p->svms.lock);
+
+ /*
+ * range may split to multiple svm pranges aligned to granularity boundaery.
+ */
+ while (size) {
+ uint32_t gpuid, gpuidx;
+ int r;
+
+ prange = svm_range_from_addr(&p->svms, addr, NULL);
+ if (!prange)
+ break;
+
+ if (!prange->mapped_to_gpu)
+ break;
+
+ r = kfd_process_gpuid_from_node(p, pdd->dev, &gpuid, &gpuidx);
+ if (r < 0)
+ break;
+ if (!test_bit(gpuidx, prange->bitmap_access) &&
+ !test_bit(gpuidx, prange->bitmap_aip))
+ break;
+
+ if (!(prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED))
+ break;
+
+ list_add(&prange->update_list, &update_list);
+
+ if (prange->last - prange->start + 1 >= size) {
+ size = 0;
+ break;
+ }
+
+ size -= prange->last - prange->start + 1;
+ addr += prange->last - prange->start + 1;
+ }
+ if (size) {
+ pr_debug("[0x%llx 0x%llx] not registered\n", addr, addr + size - 1);
+ goto out_unlock;
+ }
+
+ list_for_each_entry(prange, &update_list, update_list)
+ atomic_inc(&prange->queue_refcount);
+ ret = 0;
+
+out_unlock:
+ mutex_unlock(&p->svms.lock);
+ return ret;
+}
+
+static void kfd_queue_buffer_svm_put(struct kfd_process_device *pdd, u64 addr, u64 size)
+{
+ struct kfd_process *p = pdd->process;
+ struct svm_range *prange, *pchild;
+ struct interval_tree_node *node;
+ unsigned long last;
+
+ addr >>= PAGE_SHIFT;
+ last = addr + (size >> PAGE_SHIFT) - 1;
+
+ mutex_lock(&p->svms.lock);
+
+ node = interval_tree_iter_first(&p->svms.objects, addr, last);
+ while (node) {
+ struct interval_tree_node *next_node;
+ unsigned long next_start;
+
+ prange = container_of(node, struct svm_range, it_node);
+ next_node = interval_tree_iter_next(node, addr, last);
+ next_start = min(node->last, last) + 1;
+
+ if (atomic_add_unless(&prange->queue_refcount, -1, 0)) {
+ list_for_each_entry(pchild, &prange->child_list, child_list)
+ atomic_add_unless(&pchild->queue_refcount, -1, 0);
+ }
+
+ node = next_node;
+ addr = next_start;
+ }
+
+ mutex_unlock(&p->svms.lock);
+}
+#else
+
+static int kfd_queue_buffer_svm_get(struct kfd_process_device *pdd, u64 addr, u64 size)
+{
+ return -EINVAL;
+}
+
+static void kfd_queue_buffer_svm_put(struct kfd_process_device *pdd, u64 addr, u64 size)
+{
+}
+
+#endif
+
+int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_bo **pbo,
+ u64 expected_size)
+{
+ struct amdgpu_bo_va_mapping *mapping;
+ u64 user_addr;
+ u64 size;
+
+ user_addr = (u64)addr >> AMDGPU_GPU_PAGE_SHIFT;
+ size = expected_size >> AMDGPU_GPU_PAGE_SHIFT;
+
+ mapping = amdgpu_vm_bo_lookup_mapping(vm, user_addr);
+ if (!mapping)
+ goto out_err;
+
+ if (user_addr != mapping->start ||
+ (size != 0 && user_addr + size - 1 != mapping->last)) {
+ pr_debug("expected size 0x%llx not equal to mapping addr 0x%llx size 0x%llx\n",
+ expected_size, mapping->start << AMDGPU_GPU_PAGE_SHIFT,
+ (mapping->last - mapping->start + 1) << AMDGPU_GPU_PAGE_SHIFT);
+ goto out_err;
+ }
+
+ *pbo = amdgpu_bo_ref(mapping->bo_va->base.bo);
+ mapping->bo_va->queue_refcount++;
+ return 0;
+
+out_err:
+ *pbo = NULL;
+ return -EINVAL;
+}
+
+/* FIXME: remove this function, just call amdgpu_bo_unref directly */
+void kfd_queue_buffer_put(struct amdgpu_bo **bo)
+{
+ amdgpu_bo_unref(bo);
+}
+
+int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties)
+{
+ struct kfd_topology_device *topo_dev;
+ u64 expected_queue_size;
+ struct amdgpu_vm *vm;
+ u32 total_cwsr_size;
+ int err;
+
+ topo_dev = kfd_topology_device_by_id(pdd->dev->id);
+ if (!topo_dev)
+ return -EINVAL;
+
+ /* AQL queues on GFX7 and GFX8 appear twice their actual size */
+ if (properties->type == KFD_QUEUE_TYPE_COMPUTE &&
+ properties->format == KFD_QUEUE_FORMAT_AQL &&
+ topo_dev->node_props.gfx_target_version >= 70000 &&
+ topo_dev->node_props.gfx_target_version < 90000)
+ expected_queue_size = properties->queue_size / 2;
+ else
+ expected_queue_size = properties->queue_size;
+
+ vm = drm_priv_to_vm(pdd->drm_priv);
+ err = amdgpu_bo_reserve(vm->root.bo, false);
+ if (err)
+ return err;
+
+ err = kfd_queue_buffer_get(vm, properties->write_ptr, &properties->wptr_bo, PAGE_SIZE);
+ if (err)
+ goto out_err_unreserve;
+
+ err = kfd_queue_buffer_get(vm, properties->read_ptr, &properties->rptr_bo, PAGE_SIZE);
+ if (err)
+ goto out_err_unreserve;
+
+ err = kfd_queue_buffer_get(vm, (void *)properties->queue_address,
+ &properties->ring_bo, expected_queue_size);
+ if (err)
+ goto out_err_unreserve;
+
+ /* only compute queue requires EOP buffer and CWSR area */
+ if (properties->type != KFD_QUEUE_TYPE_COMPUTE)
+ goto out_unreserve;
+
+ /* EOP buffer is not required for all ASICs */
+ if (properties->eop_ring_buffer_address) {
+ if (properties->eop_ring_buffer_size != topo_dev->node_props.eop_buffer_size) {
+ pr_debug("queue eop bo size 0x%x not equal to node eop buf size 0x%x\n",
+ properties->eop_ring_buffer_size,
+ topo_dev->node_props.eop_buffer_size);
+ err = -EINVAL;
+ goto out_err_unreserve;
+ }
+ err = kfd_queue_buffer_get(vm, (void *)properties->eop_ring_buffer_address,
+ &properties->eop_buf_bo,
+ properties->eop_ring_buffer_size);
+ if (err)
+ goto out_err_unreserve;
+ }
+
+ if (properties->ctl_stack_size != topo_dev->node_props.ctl_stack_size) {
+ pr_debug("queue ctl stack size 0x%x not equal to node ctl stack size 0x%x\n",
+ properties->ctl_stack_size,
+ topo_dev->node_props.ctl_stack_size);
+ err = -EINVAL;
+ goto out_err_unreserve;
+ }
+
+ if (properties->ctx_save_restore_area_size < topo_dev->node_props.cwsr_size) {
+ pr_debug("queue cwsr size 0x%x not sufficient for node cwsr size 0x%x\n",
+ properties->ctx_save_restore_area_size,
+ topo_dev->node_props.cwsr_size);
+ err = -EINVAL;
+ goto out_err_unreserve;
+ }
+
+ total_cwsr_size = (properties->ctx_save_restore_area_size +
+ topo_dev->node_props.debug_memory_size) * NUM_XCC(pdd->dev->xcc_mask);
+ total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE);
+
+ err = kfd_queue_buffer_get(vm, (void *)properties->ctx_save_restore_area_address,
+ &properties->cwsr_bo, total_cwsr_size);
+ if (!err)
+ goto out_unreserve;
+
+ amdgpu_bo_unreserve(vm->root.bo);
+
+ err = kfd_queue_buffer_svm_get(pdd, properties->ctx_save_restore_area_address,
+ total_cwsr_size);
+ if (err)
+ goto out_err_release;
+
+ return 0;
+
+out_unreserve:
+ amdgpu_bo_unreserve(vm->root.bo);
+ return 0;
+
+out_err_unreserve:
+ amdgpu_bo_unreserve(vm->root.bo);
+out_err_release:
+ /* FIXME: make a _locked version of this that can be called before
+ * dropping the VM reservation.
+ */
+ kfd_queue_unref_bo_vas(pdd, properties);
+ kfd_queue_release_buffers(pdd, properties);
+ return err;
+}
+
+int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties)
+{
+ struct kfd_topology_device *topo_dev;
+ u32 total_cwsr_size;
+
+ kfd_queue_buffer_put(&properties->wptr_bo);
+ kfd_queue_buffer_put(&properties->rptr_bo);
+ kfd_queue_buffer_put(&properties->ring_bo);
+ kfd_queue_buffer_put(&properties->eop_buf_bo);
+ kfd_queue_buffer_put(&properties->cwsr_bo);
+
+ topo_dev = kfd_topology_device_by_id(pdd->dev->id);
+ if (!topo_dev)
+ return -EINVAL;
+ total_cwsr_size = (properties->ctx_save_restore_area_size +
+ topo_dev->node_props.debug_memory_size) * NUM_XCC(pdd->dev->xcc_mask);
+ total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE);
+
+ kfd_queue_buffer_svm_put(pdd, properties->ctx_save_restore_area_address, total_cwsr_size);
+ return 0;
+}
+
+void kfd_queue_unref_bo_va(struct amdgpu_vm *vm, struct amdgpu_bo **bo)
+{
+ if (*bo) {
+ struct amdgpu_bo_va *bo_va;
+
+ bo_va = amdgpu_vm_bo_find(vm, *bo);
+ if (bo_va && bo_va->queue_refcount)
+ bo_va->queue_refcount--;
+ }
+}
+
+int kfd_queue_unref_bo_vas(struct kfd_process_device *pdd,
+ struct queue_properties *properties)
+{
+ struct amdgpu_vm *vm;
+ int err;
+
+ vm = drm_priv_to_vm(pdd->drm_priv);
+ err = amdgpu_bo_reserve(vm->root.bo, false);
+ if (err)
+ return err;
+
+ kfd_queue_unref_bo_va(vm, &properties->wptr_bo);
+ kfd_queue_unref_bo_va(vm, &properties->rptr_bo);
+ kfd_queue_unref_bo_va(vm, &properties->ring_bo);
+ kfd_queue_unref_bo_va(vm, &properties->eop_buf_bo);
+ kfd_queue_unref_bo_va(vm, &properties->cwsr_bo);
+
+ amdgpu_bo_unreserve(vm->root.bo);
+ return 0;
+}
+
+#define SGPR_SIZE_PER_CU 0x4000
+#define LDS_SIZE_PER_CU 0x10000
+#define HWREG_SIZE_PER_CU 0x1000
+#define DEBUGGER_BYTES_ALIGN 64
+#define DEBUGGER_BYTES_PER_WAVE 32
+
+static u32 kfd_get_vgpr_size_per_cu(u32 gfxv)
+{
+ u32 vgpr_size = 0x40000;
+
+ if (gfxv == 90402 || /* GFX_VERSION_AQUA_VANJARAM */
+ gfxv == 90010 || /* GFX_VERSION_ALDEBARAN */
+ gfxv == 90008 || /* GFX_VERSION_ARCTURUS */
+ gfxv == 90500)
+ vgpr_size = 0x80000;
+ else if (gfxv == 110000 || /* GFX_VERSION_PLUM_BONITO */
+ gfxv == 110001 || /* GFX_VERSION_WHEAT_NAS */
+ gfxv == 120000 || /* GFX_VERSION_GFX1200 */
+ gfxv == 120001) /* GFX_VERSION_GFX1201 */
+ vgpr_size = 0x60000;
+
+ return vgpr_size;
+}
+
+#define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props) \
+ (kfd_get_vgpr_size_per_cu(gfxv) + SGPR_SIZE_PER_CU +\
+ (((gfxv) == 90500) ? (props->lds_size_in_kb << 10) : LDS_SIZE_PER_CU) +\
+ HWREG_SIZE_PER_CU)
+
+#define CNTL_STACK_BYTES_PER_WAVE(gfxv) \
+ ((gfxv) >= 100100 ? 12 : 8) /* GFX_VERSION_NAVI10*/
+
+#define SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER 40
+
+void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev)
+{
+ struct kfd_node_properties *props = &dev->node_props;
+ u32 gfxv = props->gfx_target_version;
+ u32 ctl_stack_size;
+ u32 wg_data_size;
+ u32 wave_num;
+ u32 cu_num;
+
+ if (gfxv < 80001) /* GFX_VERSION_CARRIZO */
+ return;
+
+ cu_num = props->simd_count / props->simd_per_cu / NUM_XCC(dev->gpu->xcc_mask);
+ wave_num = (gfxv < 100100) ? /* GFX_VERSION_NAVI10 */
+ min(cu_num * 40, props->array_count / props->simd_arrays_per_engine * 512)
+ : cu_num * 32;
+
+ wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props), PAGE_SIZE);
+ ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(gfxv) + 8;
+ ctl_stack_size = ALIGN(SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER + ctl_stack_size,
+ PAGE_SIZE);
+
+ if ((gfxv / 10000 * 10000) == 100000) {
+ /* HW design limits control stack size to 0x7000.
+ * This is insufficient for theoretical PM4 cases
+ * but sufficient for AQL, limited by SPI events.
+ */
+ ctl_stack_size = min(ctl_stack_size, 0x7000);
+ }
+
+ props->ctl_stack_size = ctl_stack_size;
+ props->debug_memory_size = ALIGN(wave_num * DEBUGGER_BYTES_PER_WAVE, DEBUGGER_BYTES_ALIGN);
+ props->cwsr_size = ctl_stack_size + wg_data_size;
+
+ if (gfxv == 80002) /* GFX_VERSION_TONGA */
+ props->eop_buffer_size = 0x8000;
+ else if (gfxv == 90402) /* GFX_VERSION_AQUA_VANJARAM */
+ props->eop_buffer_size = 4096;
+ else if (gfxv >= 80000)
+ props->eop_buffer_size = 4096;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index ed4bc5f844ce..a499449fcb06 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2020 Advanced Micro Devices, Inc.
+ * Copyright 2020-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -28,6 +29,7 @@
#include "amdgpu_vm.h"
#include "kfd_priv.h"
#include "kfd_smi_events.h"
+#include "amdgpu_reset.h"
struct kfd_smi_client {
struct list_head list;
@@ -35,11 +37,14 @@ struct kfd_smi_client {
wait_queue_head_t wait_queue;
/* events enabled */
uint64_t events;
- struct kfd_dev *dev;
+ struct kfd_node *dev;
spinlock_t lock;
+ struct rcu_head rcu;
+ pid_t pid;
+ bool suser;
};
-#define MAX_KFIFO_SIZE 1024
+#define KFD_MAX_KFIFO_SIZE 8192
static __poll_t kfd_smi_ev_poll(struct file *, struct poll_table_struct *);
static ssize_t kfd_smi_ev_read(struct file *, char __user *, size_t, loff_t *);
@@ -81,7 +86,8 @@ static ssize_t kfd_smi_ev_read(struct file *filep, char __user *user,
struct kfd_smi_client *client = filep->private_data;
unsigned char *buf;
- buf = kmalloc_array(MAX_KFIFO_SIZE, sizeof(*buf), GFP_KERNEL);
+ size = min_t(size_t, size, KFD_MAX_KFIFO_SIZE);
+ buf = kmalloc(size, GFP_KERNEL);
if (!buf)
return -ENOMEM;
@@ -95,7 +101,7 @@ static ssize_t kfd_smi_ev_read(struct file *filep, char __user *user,
ret = -EAGAIN;
goto ret_err;
}
- to_copy = min3(size, sizeof(buf), to_copy);
+ to_copy = min(size, to_copy);
ret = kfifo_out(&client->fifo, buf, to_copy);
spin_unlock(&client->lock);
if (ret <= 0) {
@@ -133,32 +139,47 @@ static ssize_t kfd_smi_ev_write(struct file *filep, const char __user *user,
return sizeof(events);
}
+static void kfd_smi_ev_client_free(struct rcu_head *p)
+{
+ struct kfd_smi_client *ev = container_of(p, struct kfd_smi_client, rcu);
+
+ kfifo_free(&ev->fifo);
+ kfree(ev);
+}
+
static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
{
struct kfd_smi_client *client = filep->private_data;
- struct kfd_dev *dev = client->dev;
+ struct kfd_node *dev = client->dev;
spin_lock(&dev->smi_lock);
list_del_rcu(&client->list);
spin_unlock(&dev->smi_lock);
- synchronize_rcu();
- kfifo_free(&client->fifo);
- kfree(client);
-
+ call_rcu(&client->rcu, kfd_smi_ev_client_free);
return 0;
}
-static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event,
- char *event_msg, int len)
+static bool kfd_smi_ev_enabled(pid_t pid, struct kfd_smi_client *client,
+ unsigned int event)
+{
+ uint64_t events = READ_ONCE(client->events);
+
+ if (pid && client->pid != pid && !client->suser)
+ return false;
+
+ return events & KFD_SMI_EVENT_MASK_FROM_INDEX(event);
+}
+
+static void add_event_to_kfifo(pid_t pid, struct kfd_node *dev,
+ unsigned int smi_event, char *event_msg, int len)
{
struct kfd_smi_client *client;
rcu_read_lock();
list_for_each_entry_rcu(client, &dev->smi_clients, list) {
- if (!(READ_ONCE(client->events) &
- KFD_SMI_EVENT_MASK_FROM_INDEX(smi_event)))
+ if (!kfd_smi_ev_enabled(pid, client, smi_event))
continue;
spin_lock(&client->lock);
if (kfifo_avail(&client->fifo) >= len) {
@@ -174,22 +195,31 @@ static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event,
rcu_read_unlock();
}
-void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
+__printf(4, 5)
+static void kfd_smi_event_add(pid_t pid, struct kfd_node *dev,
+ unsigned int event, char *fmt, ...)
{
- /*
- * GpuReset msg = Reset seq number (incremented for
- * every reset message sent before GPU reset).
- * 1 byte event + 1 byte space + 8 bytes seq num +
- * 1 byte \n + 1 byte \0 = 12
- */
- char fifo_in[12];
+ char fifo_in[KFD_SMI_EVENT_MSG_SIZE];
int len;
- unsigned int event;
+ va_list args;
if (list_empty(&dev->smi_clients))
return;
- memset(fifo_in, 0x0, sizeof(fifo_in));
+ len = snprintf(fifo_in, sizeof(fifo_in), "%x ", event);
+
+ va_start(args, fmt);
+ len += vsnprintf(fifo_in + len, sizeof(fifo_in) - len, fmt, args);
+ va_end(args);
+
+ add_event_to_kfifo(pid, dev, event, fifo_in, len);
+}
+
+void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset,
+ struct amdgpu_reset_context *reset_context)
+{
+ unsigned int event;
+ char reset_cause[64];
if (post_reset) {
event = KFD_SMI_EVENT_GPU_POST_RESET;
@@ -198,63 +228,144 @@ void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
++(dev->reset_seq_num);
}
- len = snprintf(fifo_in, sizeof(fifo_in), "%x %x\n", event,
- dev->reset_seq_num);
+ memset(reset_cause, 0, sizeof(reset_cause));
+
+ if (reset_context)
+ amdgpu_reset_get_desc(reset_context, reset_cause,
+ sizeof(reset_cause));
- add_event_to_kfifo(dev, event, fifo_in, len);
+ kfd_smi_event_add(0, dev, event, KFD_EVENT_FMT_UPDATE_GPU_RESET(
+ dev->reset_seq_num, reset_cause));
}
-void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
+void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev,
uint64_t throttle_bitmask)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
- /*
- * ThermalThrottle msg = throttle_bitmask(8):
- * thermal_interrupt_count(16):
- * 1 byte event + 1 byte space + 16 byte throttle_bitmask +
- * 1 byte : + 16 byte thermal_interupt_counter + 1 byte \n +
- * 1 byte \0 = 37
- */
- char fifo_in[37];
- int len;
+ kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, KFD_EVENT_FMT_THERMAL_THROTTLING(
+ throttle_bitmask,
+ amdgpu_dpm_get_thermal_throttling_counter(dev->adev)));
+}
- if (list_empty(&dev->smi_clients))
- return;
+void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid)
+{
+ struct amdgpu_task_info *task_info;
+
+ task_info = amdgpu_vm_get_task_info_pasid(dev->adev, pasid);
+ if (task_info) {
+ /* Report VM faults from user applications, not retry from kernel */
+ if (task_info->task.pid)
+ kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, KFD_EVENT_FMT_VMFAULT(
+ task_info->task.pid, task_info->task.comm));
+ amdgpu_vm_put_task_info(task_info);
+ }
+}
- len = snprintf(fifo_in, sizeof(fifo_in), "%x %llx:%llx\n",
- KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
- atomic64_read(&adev->smu.throttle_int_counter));
+void kfd_smi_event_page_fault_start(struct kfd_node *node, pid_t pid,
+ unsigned long address, bool write_fault,
+ ktime_t ts)
+{
+ kfd_smi_event_add(pid, node, KFD_SMI_EVENT_PAGE_FAULT_START,
+ KFD_EVENT_FMT_PAGEFAULT_START(ktime_to_ns(ts), pid,
+ address, node->id, write_fault ? 'W' : 'R'));
+}
- add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, fifo_in, len);
+void kfd_smi_event_page_fault_end(struct kfd_node *node, pid_t pid,
+ unsigned long address, bool migration)
+{
+ kfd_smi_event_add(pid, node, KFD_SMI_EVENT_PAGE_FAULT_END,
+ KFD_EVENT_FMT_PAGEFAULT_END(ktime_get_boottime_ns(),
+ pid, address, node->id, migration ? 'M' : 'U'));
}
-void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
+void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid,
+ unsigned long start, unsigned long end,
+ uint32_t from, uint32_t to,
+ uint32_t prefetch_loc, uint32_t preferred_loc,
+ uint32_t trigger)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
- struct amdgpu_task_info task_info;
- /* VmFault msg = (hex)uint32_pid(8) + :(1) + task name(16) = 25 */
- /* 1 byte event + 1 byte space + 25 bytes msg + 1 byte \n +
- * 1 byte \0 = 29
- */
- char fifo_in[29];
- int len;
+ kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_START,
+ KFD_EVENT_FMT_MIGRATE_START(
+ ktime_get_boottime_ns(), pid, start, end - start,
+ from, to, prefetch_loc, preferred_loc, trigger));
+}
- if (list_empty(&dev->smi_clients))
+void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid,
+ unsigned long start, unsigned long end,
+ uint32_t from, uint32_t to, uint32_t trigger,
+ int error_code)
+{
+ kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_END,
+ KFD_EVENT_FMT_MIGRATE_END(
+ ktime_get_boottime_ns(), pid, start, end - start,
+ from, to, trigger, error_code));
+}
+
+void kfd_smi_event_queue_eviction(struct kfd_node *node, pid_t pid,
+ uint32_t trigger)
+{
+ kfd_smi_event_add(pid, node, KFD_SMI_EVENT_QUEUE_EVICTION,
+ KFD_EVENT_FMT_QUEUE_EVICTION(ktime_get_boottime_ns(), pid,
+ node->id, trigger));
+}
+
+void kfd_smi_event_queue_restore(struct kfd_node *node, pid_t pid)
+{
+ kfd_smi_event_add(pid, node, KFD_SMI_EVENT_QUEUE_RESTORE,
+ KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(), pid,
+ node->id, 0));
+}
+
+void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm)
+{
+ struct kfd_process *p;
+ int i;
+
+ p = kfd_lookup_process_by_mm(mm);
+ if (!p)
return;
- memset(&task_info, 0, sizeof(struct amdgpu_task_info));
- amdgpu_vm_get_task_info(adev, pasid, &task_info);
- /* Report VM faults from user applications, not retry from kernel */
- if (!task_info.pid)
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ kfd_smi_event_add(p->lead_thread->pid, pdd->dev,
+ KFD_SMI_EVENT_QUEUE_RESTORE,
+ KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(),
+ p->lead_thread->pid, pdd->dev->id, 'R'));
+ }
+ kfd_unref_process(p);
+}
+
+void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid,
+ unsigned long address, unsigned long last,
+ uint32_t trigger)
+{
+ kfd_smi_event_add(pid, node, KFD_SMI_EVENT_UNMAP_FROM_GPU,
+ KFD_EVENT_FMT_UNMAP_FROM_GPU(ktime_get_boottime_ns(),
+ pid, address, last - address + 1, node->id, trigger));
+}
+
+void kfd_smi_event_process(struct kfd_process_device *pdd, bool start)
+{
+ struct amdgpu_task_info *task_info;
+ struct amdgpu_vm *avm;
+
+ if (!pdd->drm_priv)
return;
- len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
- task_info.pid, task_info.task_name);
+ avm = drm_priv_to_vm(pdd->drm_priv);
+ task_info = amdgpu_vm_get_task_info_vm(avm);
- add_event_to_kfifo(dev, KFD_SMI_EVENT_VMFAULT, fifo_in, len);
+ if (task_info) {
+ kfd_smi_event_add(0, pdd->dev,
+ start ? KFD_SMI_EVENT_PROCESS_START :
+ KFD_SMI_EVENT_PROCESS_END,
+ KFD_EVENT_FMT_PROCESS(task_info->task.pid,
+ task_info->task.comm));
+ amdgpu_vm_put_task_info(task_info);
+ }
}
-int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
+int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd)
{
struct kfd_smi_client *client;
int ret;
@@ -264,29 +375,37 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
return -ENOMEM;
INIT_LIST_HEAD(&client->list);
- ret = kfifo_alloc(&client->fifo, MAX_KFIFO_SIZE, GFP_KERNEL);
+ ret = kfifo_alloc(&client->fifo, KFD_MAX_KFIFO_SIZE, GFP_KERNEL);
if (ret) {
kfree(client);
return ret;
}
- ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client,
- O_RDWR);
- if (ret < 0) {
- kfifo_free(&client->fifo);
- kfree(client);
- return ret;
- }
- *fd = ret;
-
init_waitqueue_head(&client->wait_queue);
spin_lock_init(&client->lock);
client->events = 0;
client->dev = dev;
+ client->pid = current->tgid;
+ client->suser = capable(CAP_SYS_ADMIN);
spin_lock(&dev->smi_lock);
list_add_rcu(&client->list, &dev->smi_clients);
spin_unlock(&dev->smi_lock);
+ ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client,
+ O_RDWR);
+ if (ret < 0) {
+ spin_lock(&dev->smi_lock);
+ list_del_rcu(&client->list);
+ spin_unlock(&dev->smi_lock);
+
+ synchronize_rcu();
+
+ kfifo_free(&client->fifo);
+ kfree(client);
+ return ret;
+ }
+ *fd = ret;
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
index bffd0c32b060..bb4d72b57387 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2020 Advanced Micro Devices, Inc.
+ * Copyright 2020-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -23,10 +24,34 @@
#ifndef KFD_SMI_EVENTS_H_INCLUDED
#define KFD_SMI_EVENTS_H_INCLUDED
-int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd);
-void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
-void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
- uint64_t throttle_bitmask);
-void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset);
+struct amdgpu_reset_context;
+int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd);
+void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid);
+void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev,
+ uint64_t throttle_bitmask);
+void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset,
+ struct amdgpu_reset_context *reset_context);
+void kfd_smi_event_page_fault_start(struct kfd_node *node, pid_t pid,
+ unsigned long address, bool write_fault,
+ ktime_t ts);
+void kfd_smi_event_page_fault_end(struct kfd_node *node, pid_t pid,
+ unsigned long address, bool migration);
+void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid,
+ unsigned long start, unsigned long end,
+ uint32_t from, uint32_t to,
+ uint32_t prefetch_loc, uint32_t preferred_loc,
+ uint32_t trigger);
+void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid,
+ unsigned long start, unsigned long end,
+ uint32_t from, uint32_t to, uint32_t trigger,
+ int error_code);
+void kfd_smi_event_queue_eviction(struct kfd_node *node, pid_t pid,
+ uint32_t trigger);
+void kfd_smi_event_queue_restore(struct kfd_node *node, pid_t pid);
+void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm);
+void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid,
+ unsigned long address, unsigned long last,
+ uint32_t trigger);
+void kfd_smi_event_process(struct kfd_process_device *pdd, bool start);
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 3cb4681c5f53..97c2270f278f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -23,15 +23,20 @@
#include <linux/types.h>
#include <linux/sched/task.h>
+#include <linux/dynamic_debug.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/drm_exec.h>
+
#include "amdgpu_sync.h"
#include "amdgpu_object.h"
#include "amdgpu_vm.h"
-#include "amdgpu_mn.h"
+#include "amdgpu_hmm.h"
#include "amdgpu.h"
#include "amdgpu_xgmi.h"
#include "kfd_priv.h"
#include "kfd_svm.h"
#include "kfd_migrate.h"
+#include "kfd_smi_events.h"
#ifdef dev_fmt
#undef dev_fmt
@@ -43,7 +48,25 @@
/* Long enough to ensure no retry fault comes after svm range is restored and
* page table is updated.
*/
-#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING 2000
+#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING (2UL * NSEC_PER_MSEC)
+#if IS_ENABLED(CONFIG_DYNAMIC_DEBUG)
+#define dynamic_svm_range_dump(svms) \
+ _dynamic_func_call_no_desc("svm_range_dump", svm_range_debug_dump, svms)
+#else
+#define dynamic_svm_range_dump(svms) \
+ do { if (0) svm_range_debug_dump(svms); } while (0)
+#endif
+
+/* Giant svm range split into smaller ranges based on this, it is decided using
+ * minimum of all dGPU/APU 1/32 VRAM size, between 2MB to 1GB and alignment to
+ * power of 2MB.
+ */
+static uint64_t max_svm_range_pages;
+
+struct criu_svm_metadata {
+ struct list_head list;
+ struct kfd_criu_svm_range_priv_data data;
+};
static void svm_range_evict_svm_bo_worker(struct work_struct *work);
static bool
@@ -107,7 +130,7 @@ static void svm_range_add_to_svms(struct svm_range *prange)
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms,
prange, prange->start, prange->last);
- list_add_tail(&prange->list, &prange->svms->list);
+ list_move_tail(&prange->list, &prange->svms->list);
prange->it_node.start = prange->start;
prange->it_node.last = prange->last;
interval_tree_insert(&prange->it_node, &prange->svms->objects);
@@ -144,8 +167,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
int i, r;
if (!addr) {
- addr = kvmalloc_array(prange->npages, sizeof(*addr),
- GFP_KERNEL | __GFP_ZERO);
+ addr = kvcalloc(prange->npages, sizeof(*addr), GFP_KERNEL);
if (!addr)
return -ENOMEM;
prange->dma_addr[gpuidx] = addr;
@@ -158,12 +180,11 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
page = hmm_pfn_to_page(hmm_pfns[i]);
if (is_zone_device_page(page)) {
- struct amdgpu_device *bo_adev =
- amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
+ struct amdgpu_device *bo_adev = prange->svm_bo->node->adev;
addr[i] = (hmm_pfns[i] << PAGE_SHIFT) +
bo_adev->vm_manager.vram_base_offset -
- bo_adev->kfd.dev->pgmap.range.start;
+ bo_adev->kfd.pgmap.range.start;
addr[i] |= SVM_RANGE_VRAM_DOMAIN;
pr_debug_ratelimited("vram address: 0x%llx\n", addr[i]);
continue;
@@ -177,6 +198,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
pr_debug_ratelimited("dma mapping 0x%llx for page addr 0x%lx\n",
addr[i] >> PAGE_SHIFT, page_to_pfn(page));
}
+
return 0;
}
@@ -193,7 +215,6 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
struct kfd_process_device *pdd;
- struct amdgpu_device *adev;
pr_debug("mapping to gpu idx 0x%x\n", gpuidx);
pdd = kfd_process_device_from_gpuidx(p, gpuidx);
@@ -201,9 +222,8 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
pr_debug("failed to find device idx %d\n", gpuidx);
return -EINVAL;
}
- adev = (struct amdgpu_device *)pdd->dev->kgd;
- r = svm_range_dma_map_dev(adev, prange, offset, npages,
+ r = svm_range_dma_map_dev(pdd->dev->adev, prange, offset, npages,
hmm_pfns, gpuidx);
if (r)
break;
@@ -212,7 +232,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
return r;
}
-void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
+void svm_range_dma_unmap_dev(struct device *dev, dma_addr_t *dma_addr,
unsigned long offset, unsigned long npages)
{
enum dma_data_direction dir = DMA_BIDIRECTIONAL;
@@ -230,7 +250,7 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
}
}
-void svm_range_free_dma_mappings(struct svm_range *prange)
+void svm_range_dma_unmap(struct svm_range *prange)
{
struct kfd_process_device *pdd;
dma_addr_t *dma_addr;
@@ -250,39 +270,59 @@ void svm_range_free_dma_mappings(struct svm_range *prange)
pr_debug("failed to find device idx %d\n", gpuidx);
continue;
}
- dev = &pdd->dev->pdev->dev;
- svm_range_dma_unmap(dev, dma_addr, 0, prange->npages);
- kvfree(dma_addr);
- prange->dma_addr[gpuidx] = NULL;
+ dev = &pdd->dev->adev->pdev->dev;
+
+ svm_range_dma_unmap_dev(dev, dma_addr, 0, prange->npages);
}
}
-static void svm_range_free(struct svm_range *prange)
+static void svm_range_free(struct svm_range *prange, bool do_unmap)
{
+ uint64_t size = (prange->last - prange->start + 1) << PAGE_SHIFT;
+ struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
+ uint32_t gpuidx;
+
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange,
prange->start, prange->last);
svm_range_vram_node_free(prange);
- svm_range_free_dma_mappings(prange);
+ if (do_unmap)
+ svm_range_dma_unmap(prange);
+
+ if (do_unmap && !p->xnack_enabled) {
+ pr_debug("unreserve prange 0x%p size: 0x%llx\n", prange, size);
+ amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
+ }
+
+ /* free dma_addr array for each gpu */
+ for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) {
+ if (prange->dma_addr[gpuidx]) {
+ kvfree(prange->dma_addr[gpuidx]);
+ prange->dma_addr[gpuidx] = NULL;
+ }
+ }
+
mutex_destroy(&prange->lock);
mutex_destroy(&prange->migrate_mutex);
kfree(prange);
}
static void
-svm_range_set_default_attributes(int32_t *location, int32_t *prefetch_loc,
- uint8_t *granularity, uint32_t *flags)
+svm_range_set_default_attributes(struct svm_range_list *svms, int32_t *location,
+ int32_t *prefetch_loc, uint8_t *granularity,
+ uint32_t *flags)
{
*location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
*prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
- *granularity = 9;
+ *granularity = svms->default_granularity;
*flags =
KFD_IOCTL_SVM_FLAG_HOST_ACCESS | KFD_IOCTL_SVM_FLAG_COHERENT;
}
static struct
svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
- uint64_t last)
+ uint64_t last, bool update_mem_usage)
{
uint64_t size = last - start + 1;
struct svm_range *prange;
@@ -291,28 +331,35 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
prange = kzalloc(sizeof(*prange), GFP_KERNEL);
if (!prange)
return NULL;
+
+ p = container_of(svms, struct kfd_process, svms);
+ if (!p->xnack_enabled && update_mem_usage &&
+ amdgpu_amdkfd_reserve_mem_limit(NULL, size << PAGE_SHIFT,
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0)) {
+ pr_info("SVM mapping failed, exceeds resident system memory limit\n");
+ kfree(prange);
+ return NULL;
+ }
prange->npages = size;
prange->svms = svms;
prange->start = start;
prange->last = last;
INIT_LIST_HEAD(&prange->list);
INIT_LIST_HEAD(&prange->update_list);
- INIT_LIST_HEAD(&prange->remove_list);
- INIT_LIST_HEAD(&prange->insert_list);
INIT_LIST_HEAD(&prange->svm_bo_list);
INIT_LIST_HEAD(&prange->deferred_list);
INIT_LIST_HEAD(&prange->child_list);
atomic_set(&prange->invalid, 0);
prange->validate_timestamp = 0;
+ prange->vram_pages = 0;
mutex_init(&prange->migrate_mutex);
mutex_init(&prange->lock);
- p = container_of(svms, struct kfd_process, svms);
if (p->xnack_enabled)
bitmap_copy(prange->bitmap_access, svms->bitmap_supported,
MAX_GPU_INSTANCE);
- svm_range_set_default_attributes(&prange->preferred_loc,
+ svm_range_set_default_attributes(svms, &prange->preferred_loc,
&prange->prefetch_loc,
&prange->granularity, &prange->flags);
@@ -334,6 +381,8 @@ static void svm_range_bo_release(struct kref *kref)
struct svm_range_bo *svm_bo;
svm_bo = container_of(kref, struct svm_range_bo, kref);
+ pr_debug("svm_bo 0x%p\n", svm_bo);
+
spin_lock(&svm_bo->list_lock);
while (!list_empty(&svm_bo->range_list)) {
struct svm_range *prange =
@@ -349,37 +398,74 @@ static void svm_range_bo_release(struct kref *kref)
prange->start, prange->last);
mutex_lock(&prange->lock);
prange->svm_bo = NULL;
+ /* prange should not hold vram page now */
+ WARN_ONCE(prange->actual_loc, "prange should not hold vram page");
mutex_unlock(&prange->lock);
spin_lock(&svm_bo->list_lock);
}
spin_unlock(&svm_bo->list_lock);
- if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) {
- /* We're not in the eviction worker.
- * Signal the fence and synchronize with any
- * pending eviction work.
+
+ if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
+ struct kfd_process_device *pdd;
+ struct kfd_process *p;
+ struct mm_struct *mm;
+
+ mm = svm_bo->eviction_fence->mm;
+ /*
+ * The forked child process takes svm_bo device pages ref, svm_bo could be
+ * released after parent process is gone.
*/
- dma_fence_signal(&svm_bo->eviction_fence->base);
- cancel_work_sync(&svm_bo->eviction_work);
+ p = kfd_lookup_process_by_mm(mm);
+ if (p) {
+ pdd = kfd_get_process_device_data(svm_bo->node, p);
+ if (pdd)
+ atomic64_sub(amdgpu_bo_size(svm_bo->bo), &pdd->vram_usage);
+ kfd_unref_process(p);
+ }
+ mmput(mm);
}
+
+ if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base))
+ /* We're not in the eviction worker. Signal the fence. */
+ dma_fence_signal(&svm_bo->eviction_fence->base);
dma_fence_put(&svm_bo->eviction_fence->base);
amdgpu_bo_unref(&svm_bo->bo);
kfree(svm_bo);
}
-void svm_range_bo_unref(struct svm_range_bo *svm_bo)
+static void svm_range_bo_wq_release(struct work_struct *work)
{
- if (!svm_bo)
- return;
+ struct svm_range_bo *svm_bo;
- kref_put(&svm_bo->kref, svm_range_bo_release);
+ svm_bo = container_of(work, struct svm_range_bo, release_work);
+ svm_range_bo_release(&svm_bo->kref);
}
-static bool
-svm_range_validate_svm_bo(struct amdgpu_device *adev, struct svm_range *prange)
+static void svm_range_bo_release_async(struct kref *kref)
{
- struct amdgpu_device *bo_adev;
+ struct svm_range_bo *svm_bo;
+ svm_bo = container_of(kref, struct svm_range_bo, kref);
+ pr_debug("svm_bo 0x%p\n", svm_bo);
+ INIT_WORK(&svm_bo->release_work, svm_range_bo_wq_release);
+ schedule_work(&svm_bo->release_work);
+}
+
+void svm_range_bo_unref_async(struct svm_range_bo *svm_bo)
+{
+ kref_put(&svm_bo->kref, svm_range_bo_release_async);
+}
+
+static void svm_range_bo_unref(struct svm_range_bo *svm_bo)
+{
+ if (svm_bo)
+ kref_put(&svm_bo->kref, svm_range_bo_release);
+}
+
+static bool
+svm_range_validate_svm_bo(struct kfd_node *node, struct svm_range *prange)
+{
mutex_lock(&prange->lock);
if (!prange->svm_bo) {
mutex_unlock(&prange->lock);
@@ -392,12 +478,11 @@ svm_range_validate_svm_bo(struct amdgpu_device *adev, struct svm_range *prange)
}
if (svm_bo_ref_unless_zero(prange->svm_bo)) {
/*
- * Migrate from GPU to GPU, remove range from source bo_adev
- * svm_bo range list, and return false to allocate svm_bo from
- * destination adev.
+ * Migrate from GPU to GPU, remove range from source svm_bo->node
+ * range list, and return false to allocate svm_bo from destination
+ * node.
*/
- bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
- if (bo_adev != adev) {
+ if (prange->svm_bo->node != node) {
mutex_unlock(&prange->lock);
spin_lock(&prange->svm_bo->list_lock);
@@ -440,11 +525,11 @@ svm_range_validate_svm_bo(struct amdgpu_device *adev, struct svm_range *prange)
/* We need a new svm_bo. Spin-loop to wait for concurrent
* svm_range_bo_release to finish removing this range from
- * its range list. After this, it is safe to reuse the
- * svm_bo pointer and svm_bo_list head.
+ * its range list and set prange->svm_bo to null. After this,
+ * it is safe to reuse the svm_bo pointer and svm_bo_list head.
*/
- while (!list_empty_careful(&prange->svm_bo_list))
- ;
+ while (!list_empty_careful(&prange->svm_bo_list) || prange->svm_bo)
+ cond_resched();
return false;
}
@@ -465,9 +550,10 @@ static struct svm_range_bo *svm_range_bo_new(void)
}
int
-svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
+svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
bool clear)
{
+ struct kfd_process_device *pdd;
struct amdgpu_bo_param bp;
struct svm_range_bo *svm_bo;
struct amdgpu_bo_user *ubo;
@@ -477,10 +563,11 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
int r;
p = container_of(prange->svms, struct kfd_process, svms);
- pr_debug("pasid: %x svms 0x%p [0x%lx 0x%lx]\n", p->pasid, prange->svms,
+ pr_debug("process pid: %d svms 0x%p [0x%lx 0x%lx]\n",
+ p->lead_thread->pid, prange->svms,
prange->start, prange->last);
- if (svm_range_validate_svm_bo(adev, prange))
+ if (svm_range_validate_svm_bo(node, prange))
return 0;
svm_bo = svm_range_bo_new();
@@ -494,7 +581,7 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
kfree(svm_bo);
return -ESRCH;
}
- svm_bo->svms = prange->svms;
+ svm_bo->node = node;
svm_bo->eviction_fence =
amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
mm,
@@ -508,23 +595,39 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
bp.flags |= clear ? AMDGPU_GEM_CREATE_VRAM_CLEARED : 0;
- bp.flags |= AMDGPU_AMDKFD_CREATE_SVM_BO;
+ bp.flags |= AMDGPU_GEM_CREATE_DISCARDABLE;
bp.type = ttm_bo_type_device;
bp.resv = NULL;
+ if (node->xcp)
+ bp.xcp_id_plus1 = node->xcp->id + 1;
- r = amdgpu_bo_create_user(adev, &bp, &ubo);
+ r = amdgpu_bo_create_user(node->adev, &bp, &ubo);
if (r) {
pr_debug("failed %d to create bo\n", r);
goto create_bo_failed;
}
bo = &ubo->bo;
+
+ pr_debug("alloc bo at offset 0x%lx size 0x%lx on partition %d\n",
+ bo->tbo.resource->start << PAGE_SHIFT, bp.size,
+ bp.xcp_id_plus1 - 1);
+
r = amdgpu_bo_reserve(bo, true);
if (r) {
pr_debug("failed %d to reserve bo\n", r);
goto reserve_bo_failed;
}
- r = dma_resv_reserve_shared(bo->tbo.base.resv, 1);
+ if (clear) {
+ r = amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
+ if (r) {
+ pr_debug("failed %d to sync bo\n", r);
+ amdgpu_bo_unreserve(bo);
+ goto reserve_bo_failed;
+ }
+ }
+
+ r = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
if (r) {
pr_debug("failed %d to reserve bo\n", r);
amdgpu_bo_unreserve(bo);
@@ -543,6 +646,10 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
list_add(&prange->svm_bo_list, &svm_bo->range_list);
spin_unlock(&svm_bo->list_lock);
+ pdd = svm_range_get_pdd_by_node(prange, node);
+ if (pdd)
+ atomic64_add(amdgpu_bo_size(bo), &pdd->vram_usage);
+
return 0;
reserve_bo_failed:
@@ -557,49 +664,41 @@ create_bo_failed:
void svm_range_vram_node_free(struct svm_range *prange)
{
- svm_range_bo_unref(prange->svm_bo);
- prange->ttm_res = NULL;
+ /* serialize prange->svm_bo unref */
+ mutex_lock(&prange->lock);
+ /* prange->svm_bo has not been unref */
+ if (prange->ttm_res) {
+ prange->ttm_res = NULL;
+ mutex_unlock(&prange->lock);
+ svm_range_bo_unref(prange->svm_bo);
+ } else
+ mutex_unlock(&prange->lock);
}
-struct amdgpu_device *
-svm_range_get_adev_by_id(struct svm_range *prange, uint32_t gpu_id)
+struct kfd_node *
+svm_range_get_node_by_id(struct svm_range *prange, uint32_t gpu_id)
{
- struct kfd_process_device *pdd;
struct kfd_process *p;
- int32_t gpu_idx;
+ struct kfd_process_device *pdd;
p = container_of(prange->svms, struct kfd_process, svms);
-
- gpu_idx = kfd_process_gpuidx_from_gpuid(p, gpu_id);
- if (gpu_idx < 0) {
- pr_debug("failed to get device by id 0x%x\n", gpu_id);
- return NULL;
- }
- pdd = kfd_process_device_from_gpuidx(p, gpu_idx);
+ pdd = kfd_process_device_data_by_id(p, gpu_id);
if (!pdd) {
- pr_debug("failed to get device by idx 0x%x\n", gpu_idx);
+ pr_debug("failed to get kfd process device by id 0x%x\n", gpu_id);
return NULL;
}
- return (struct amdgpu_device *)pdd->dev->kgd;
+ return pdd->dev;
}
struct kfd_process_device *
-svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev)
+svm_range_get_pdd_by_node(struct svm_range *prange, struct kfd_node *node)
{
struct kfd_process *p;
- int32_t gpu_idx, gpuid;
- int r;
p = container_of(prange->svms, struct kfd_process, svms);
- r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpu_idx);
- if (r) {
- pr_debug("failed to get device id by adev %p\n", adev);
- return NULL;
- }
-
- return kfd_process_device_from_gpuidx(p, gpu_idx);
+ return kfd_get_process_device_data(node, p);
}
static int svm_range_bo_validate(void *param, struct amdgpu_bo *bo)
@@ -662,7 +761,8 @@ svm_range_check_attr(struct kfd_process *p,
static void
svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
- uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+ uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
+ bool *update_mapping)
{
uint32_t i;
int gpuidx;
@@ -678,6 +778,9 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
case KFD_IOCTL_SVM_ATTR_ACCESS:
case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
+ if (!p->xnack_enabled)
+ *update_mapping = true;
+
gpuidx = kfd_process_gpuidx_from_gpuid(p,
attrs[i].value);
if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) {
@@ -692,18 +795,75 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
}
break;
case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+ *update_mapping = true;
prange->flags |= attrs[i].value;
break;
case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
+ *update_mapping = true;
prange->flags &= ~attrs[i].value;
break;
case KFD_IOCTL_SVM_ATTR_GRANULARITY:
- prange->granularity = attrs[i].value;
+ prange->granularity = min_t(uint32_t, attrs[i].value, 0x3F);
+ break;
+ default:
+ WARN_ONCE(1, "svm_range_check_attrs wasn't called?");
+ }
+ }
+}
+
+static bool
+svm_range_is_same_attrs(struct kfd_process *p, struct svm_range *prange,
+ uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+{
+ uint32_t i;
+ int gpuidx;
+
+ for (i = 0; i < nattr; i++) {
+ switch (attrs[i].type) {
+ case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
+ if (prange->preferred_loc != attrs[i].value)
+ return false;
+ break;
+ case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
+ /* Prefetch should always trigger a migration even
+ * if the value of the attribute didn't change.
+ */
+ return false;
+ case KFD_IOCTL_SVM_ATTR_ACCESS:
+ case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
+ case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
+ gpuidx = kfd_process_gpuidx_from_gpuid(p,
+ attrs[i].value);
+ if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) {
+ if (test_bit(gpuidx, prange->bitmap_access) ||
+ test_bit(gpuidx, prange->bitmap_aip))
+ return false;
+ } else if (attrs[i].type == KFD_IOCTL_SVM_ATTR_ACCESS) {
+ if (!test_bit(gpuidx, prange->bitmap_access))
+ return false;
+ } else {
+ if (!test_bit(gpuidx, prange->bitmap_aip))
+ return false;
+ }
+ break;
+ case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+ if ((prange->flags & attrs[i].value) != attrs[i].value)
+ return false;
+ break;
+ case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
+ if ((prange->flags & attrs[i].value) != 0)
+ return false;
+ break;
+ case KFD_IOCTL_SVM_ATTR_GRANULARITY:
+ if (prange->granularity != attrs[i].value)
+ return false;
break;
default:
WARN_ONCE(1, "svm_range_check_attrs wasn't called?");
}
}
+
+ return true;
}
/**
@@ -743,18 +903,56 @@ static void svm_range_debug_dump(struct svm_range_list *svms)
}
}
-static bool
-svm_range_is_same_attrs(struct svm_range *old, struct svm_range *new)
+static void *
+svm_range_copy_array(void *psrc, size_t size, uint64_t num_elements,
+ uint64_t offset, uint64_t *vram_pages)
{
- return (old->prefetch_loc == new->prefetch_loc &&
- old->flags == new->flags &&
- old->granularity == new->granularity);
+ unsigned char *src = (unsigned char *)psrc + offset;
+ unsigned char *dst;
+ uint64_t i;
+
+ dst = kvmalloc_array(num_elements, size, GFP_KERNEL);
+ if (!dst)
+ return NULL;
+
+ if (!vram_pages) {
+ memcpy(dst, src, num_elements * size);
+ return (void *)dst;
+ }
+
+ *vram_pages = 0;
+ for (i = 0; i < num_elements; i++) {
+ dma_addr_t *temp;
+ temp = (dma_addr_t *)dst + i;
+ *temp = *((dma_addr_t *)src + i);
+ if (*temp&SVM_RANGE_VRAM_DOMAIN)
+ (*vram_pages)++;
+ }
+
+ return (void *)dst;
+}
+
+static int
+svm_range_copy_dma_addrs(struct svm_range *dst, struct svm_range *src)
+{
+ int i;
+
+ for (i = 0; i < MAX_GPU_INSTANCE; i++) {
+ if (!src->dma_addr[i])
+ continue;
+ dst->dma_addr[i] = svm_range_copy_array(src->dma_addr[i],
+ sizeof(*src->dma_addr[i]), src->npages, 0, NULL);
+ if (!dst->dma_addr[i])
+ return -ENOMEM;
+ }
+
+ return 0;
}
static int
svm_range_split_array(void *ppnew, void *ppold, size_t size,
uint64_t old_start, uint64_t old_n,
- uint64_t new_start, uint64_t new_n)
+ uint64_t new_start, uint64_t new_n, uint64_t *new_vram_pages)
{
unsigned char *new, *old, *pold;
uint64_t d;
@@ -765,22 +963,17 @@ svm_range_split_array(void *ppnew, void *ppold, size_t size,
if (!pold)
return 0;
- new = kvmalloc_array(new_n, size, GFP_KERNEL);
+ d = (new_start - old_start) * size;
+ /* get dma addr array for new range and calculte its vram page number */
+ new = svm_range_copy_array(pold, size, new_n, d, new_vram_pages);
if (!new)
return -ENOMEM;
-
- d = (new_start - old_start) * size;
- memcpy(new, pold + d, new_n * size);
-
- old = kvmalloc_array(old_n, size, GFP_KERNEL);
+ d = (new_start == old_start) ? new_n * size : 0;
+ old = svm_range_copy_array(pold, size, old_n, d, NULL);
if (!old) {
kvfree(new);
return -ENOMEM;
}
-
- d = (new_start == old_start) ? new_n * size : 0;
- memcpy(old, pold + d, old_n * size);
-
kvfree(pold);
*(void **)ppold = old;
*(void **)ppnew = new;
@@ -798,10 +991,13 @@ svm_range_split_pages(struct svm_range *new, struct svm_range *old,
for (i = 0; i < MAX_GPU_INSTANCE; i++) {
r = svm_range_split_array(&new->dma_addr[i], &old->dma_addr[i],
sizeof(*old->dma_addr[i]), old->start,
- npages, new->start, new->npages);
+ npages, new->start, new->npages,
+ old->actual_loc ? &new->vram_pages : NULL);
if (r)
return r;
}
+ if (old->actual_loc)
+ old->vram_pages -= new->vram_pages;
return 0;
}
@@ -880,8 +1076,10 @@ svm_range_split_adjust(struct svm_range *new, struct svm_range *old,
new->prefetch_loc = old->prefetch_loc;
new->actual_loc = old->actual_loc;
new->granularity = old->granularity;
+ new->mapped_to_gpu = old->mapped_to_gpu;
bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
+ atomic_set(&new->queue_refcount, atomic_read(&old->queue_refcount));
return 0;
}
@@ -925,9 +1123,9 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last,
svms = prange->svms;
if (old_start == start)
- *new = svm_range_new(svms, last + 1, old_last);
+ *new = svm_range_new(svms, last + 1, old_last, false);
else
- *new = svm_range_new(svms, old_start, start - 1);
+ *new = svm_range_new(svms, old_start, start - 1, false);
if (!*new)
return -ENOMEM;
@@ -935,7 +1133,7 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last,
if (r) {
pr_debug("failed %d split [0x%llx 0x%llx] to [0x%llx 0x%llx]\n",
r, old_start, old_last, start, last);
- svm_range_free(*new);
+ svm_range_free(*new, false);
*new = NULL;
}
@@ -943,126 +1141,80 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last,
}
static int
-svm_range_split_tail(struct svm_range *prange, struct svm_range *new,
- uint64_t new_last, struct list_head *insert_list)
+svm_range_split_tail(struct svm_range *prange, uint64_t new_last,
+ struct list_head *insert_list, struct list_head *remap_list)
{
- struct svm_range *tail;
+ struct svm_range *tail = NULL;
int r = svm_range_split(prange, prange->start, new_last, &tail);
- if (!r)
- list_add(&tail->insert_list, insert_list);
+ if (!r) {
+ list_add(&tail->list, insert_list);
+ if (!IS_ALIGNED(new_last + 1, 1UL << prange->granularity))
+ list_add(&tail->update_list, remap_list);
+ }
return r;
}
static int
-svm_range_split_head(struct svm_range *prange, struct svm_range *new,
- uint64_t new_start, struct list_head *insert_list)
+svm_range_split_head(struct svm_range *prange, uint64_t new_start,
+ struct list_head *insert_list, struct list_head *remap_list)
{
- struct svm_range *head;
+ struct svm_range *head = NULL;
int r = svm_range_split(prange, new_start, prange->last, &head);
- if (!r)
- list_add(&head->insert_list, insert_list);
+ if (!r) {
+ list_add(&head->list, insert_list);
+ if (!IS_ALIGNED(new_start, 1UL << prange->granularity))
+ list_add(&head->update_list, remap_list);
+ }
return r;
}
static void
-svm_range_add_child(struct svm_range *prange, struct mm_struct *mm,
- struct svm_range *pchild, enum svm_work_list_ops op)
+svm_range_add_child(struct svm_range *prange, struct svm_range *pchild, enum svm_work_list_ops op)
{
pr_debug("add child 0x%p [0x%lx 0x%lx] to prange 0x%p child list %d\n",
pchild, pchild->start, pchild->last, prange, op);
- pchild->work_item.mm = mm;
+ pchild->work_item.mm = NULL;
pchild->work_item.op = op;
list_add_tail(&pchild->child_list, &prange->child_list);
}
-/**
- * svm_range_split_by_granularity - collect ranges within granularity boundary
- *
- * @p: the process with svms list
- * @mm: mm structure
- * @addr: the vm fault address in pages, to split the prange
- * @parent: parent range if prange is from child list
- * @prange: prange to split
- *
- * Trims @prange to be a single aligned block of prange->granularity if
- * possible. The head and tail are added to the child_list in @parent.
- *
- * Context: caller must hold mmap_read_lock and prange->lock
- *
- * Return:
- * 0 - OK, otherwise error code
- */
-int
-svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
- unsigned long addr, struct svm_range *parent,
- struct svm_range *prange)
+static bool
+svm_nodes_in_same_hive(struct kfd_node *node_a, struct kfd_node *node_b)
{
- struct svm_range *head, *tail;
- unsigned long start, last, size;
- int r;
-
- /* Align splited range start and size to granularity size, then a single
- * PTE will be used for whole range, this reduces the number of PTE
- * updated and the L1 TLB space used for translation.
- */
- size = 1UL << prange->granularity;
- start = ALIGN_DOWN(addr, size);
- last = ALIGN(addr + 1, size) - 1;
-
- pr_debug("svms 0x%p split [0x%lx 0x%lx] to [0x%lx 0x%lx] size 0x%lx\n",
- prange->svms, prange->start, prange->last, start, last, size);
-
- if (start > prange->start) {
- r = svm_range_split(prange, start, prange->last, &head);
- if (r)
- return r;
- svm_range_add_child(parent, mm, head, SVM_OP_ADD_RANGE);
- }
-
- if (last < prange->last) {
- r = svm_range_split(prange, prange->start, last, &tail);
- if (r)
- return r;
- svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE);
- }
-
- /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
- if (p->xnack_enabled && prange->work_item.op == SVM_OP_ADD_RANGE) {
- prange->work_item.op = SVM_OP_ADD_RANGE_AND_MAP;
- pr_debug("change prange 0x%p [0x%lx 0x%lx] op %d\n",
- prange, prange->start, prange->last,
- SVM_OP_ADD_RANGE_AND_MAP);
- }
- return 0;
+ return (node_a->adev == node_b->adev ||
+ amdgpu_xgmi_same_hive(node_a->adev, node_b->adev));
}
static uint64_t
-svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,
- int domain)
+svm_range_get_pte_flags(struct kfd_node *node, struct amdgpu_vm *vm,
+ struct svm_range *prange, int domain)
{
- struct amdgpu_device *bo_adev;
+ struct kfd_node *bo_node;
uint32_t flags = prange->flags;
uint32_t mapping_flags = 0;
+ uint32_t gc_ip_version = KFD_GC_VERSION(node);
uint64_t pte_flags;
bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN);
- bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT;
+ bool coherent = flags & (KFD_IOCTL_SVM_FLAG_COHERENT | KFD_IOCTL_SVM_FLAG_EXT_COHERENT);
+ bool ext_coherent = flags & KFD_IOCTL_SVM_FLAG_EXT_COHERENT;
+ unsigned int mtype_local;
if (domain == SVM_RANGE_VRAM_DOMAIN)
- bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
+ bo_node = prange->svm_bo->node;
- switch (adev->asic_type) {
- case CHIP_ARCTURUS:
+ switch (gc_ip_version) {
+ case IP_VERSION(9, 4, 1):
if (domain == SVM_RANGE_VRAM_DOMAIN) {
- if (bo_adev == adev) {
+ if (bo_node == node) {
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
} else {
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
- if (amdgpu_xgmi_same_hive(adev, bo_adev))
+ if (svm_nodes_in_same_hive(node, bo_node))
snoop = true;
}
} else {
@@ -1070,17 +1222,17 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
}
break;
- case CHIP_ALDEBARAN:
+ case IP_VERSION(9, 4, 2):
if (domain == SVM_RANGE_VRAM_DOMAIN) {
- if (bo_adev == adev) {
+ if (bo_node == node) {
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
- if (adev->gmc.xgmi.connected_to_cpu)
+ if (node->adev->gmc.xgmi.connected_to_cpu)
snoop = true;
} else {
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
- if (amdgpu_xgmi_same_hive(adev, bo_adev))
+ if (svm_nodes_in_same_hive(node, bo_node))
snoop = true;
}
} else {
@@ -1088,23 +1240,71 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
}
break;
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
+ if (ext_coherent)
+ mtype_local = AMDGPU_VM_MTYPE_CC;
+ else
+ mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC :
+ amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
+ snoop = true;
+ if (domain == SVM_RANGE_VRAM_DOMAIN) {
+ /* local HBM region close to partition */
+ if (bo_node->adev == node->adev &&
+ (!bo_node->xcp || !node->xcp || bo_node->xcp->mem_id == node->xcp->mem_id))
+ mapping_flags |= mtype_local;
+ /* local HBM region far from partition or remote XGMI GPU
+ * with regular system scope coherence
+ */
+ else if (svm_nodes_in_same_hive(bo_node, node) && !ext_coherent)
+ mapping_flags |= AMDGPU_VM_MTYPE_NC;
+ /* PCIe P2P on GPUs pre-9.5.0 */
+ else if (gc_ip_version < IP_VERSION(9, 5, 0) &&
+ !svm_nodes_in_same_hive(bo_node, node))
+ mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ /* Other remote memory */
+ else
+ mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+ /* system memory accessed by the APU */
+ } else if (node->adev->flags & AMD_IS_APU) {
+ /* On NUMA systems, locality is determined per-page
+ * in amdgpu_gmc_override_vm_pte_flags
+ */
+ if (num_possible_nodes() <= 1)
+ mapping_flags |= mtype_local;
+ else
+ mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+ /* system memory accessed by the dGPU */
+ } else {
+ if (gc_ip_version < IP_VERSION(9, 5, 0) || ext_coherent)
+ mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ else
+ mapping_flags |= AMDGPU_VM_MTYPE_NC;
+ }
+ break;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ mapping_flags |= AMDGPU_VM_MTYPE_NC;
+ break;
default:
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
}
- mapping_flags |= AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE;
-
- if (flags & KFD_IOCTL_SVM_FLAG_GPU_RO)
- mapping_flags &= ~AMDGPU_VM_PAGE_WRITEABLE;
if (flags & KFD_IOCTL_SVM_FLAG_GPU_EXEC)
mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
pte_flags = AMDGPU_PTE_VALID;
pte_flags |= (domain == SVM_RANGE_VRAM_DOMAIN) ? 0 : AMDGPU_PTE_SYSTEM;
pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
+ if (gc_ip_version >= IP_VERSION(12, 0, 0))
+ pte_flags |= AMDGPU_PTE_IS_PTE;
- pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags);
+ amdgpu_gmc_get_vm_pte(node->adev, vm, NULL, mapping_flags, &pte_flags);
+ pte_flags |= AMDGPU_PTE_READABLE;
+ if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO))
+ pte_flags |= AMDGPU_PTE_WRITEABLE;
return pte_flags;
}
@@ -1117,23 +1317,33 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
pr_debug("[0x%llx 0x%llx]\n", start, last);
- return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, true, NULL,
- start, last, init_pte_value, 0,
- NULL, NULL, fence, NULL);
+ return amdgpu_vm_update_range(adev, vm, false, true, true, false, NULL, start,
+ last, init_pte_value, 0, 0, NULL, NULL,
+ fence);
}
static int
svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
- unsigned long last)
+ unsigned long last, uint32_t trigger)
{
DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
struct kfd_process_device *pdd;
struct dma_fence *fence = NULL;
- struct amdgpu_device *adev;
struct kfd_process *p;
uint32_t gpuidx;
int r = 0;
+ if (!prange->mapped_to_gpu) {
+ pr_debug("prange 0x%p [0x%lx 0x%lx] not mapped to GPU\n",
+ prange, prange->start, prange->last);
+ return 0;
+ }
+
+ if (prange->start == start && prange->last == last) {
+ pr_debug("unmap svms 0x%p prange 0x%p\n", prange->svms, prange);
+ prange->mapped_to_gpu = false;
+ }
+
bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
MAX_GPU_INSTANCE);
p = container_of(prange->svms, struct kfd_process, svms);
@@ -1145,9 +1355,12 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
pr_debug("failed to find device idx %d\n", gpuidx);
return -EINVAL;
}
- adev = (struct amdgpu_device *)pdd->dev->kgd;
- r = svm_range_unmap_from_gpu(adev, drm_priv_to_vm(pdd->drm_priv),
+ kfd_smi_event_unmap_from_gpu(pdd->dev, p->lead_thread->pid,
+ start, last, trigger);
+
+ r = svm_range_unmap_from_gpu(pdd->dev->adev,
+ drm_priv_to_vm(pdd->drm_priv),
start, last, &fence);
if (r)
break;
@@ -1159,21 +1372,20 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
if (r)
break;
}
- amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev,
- p->pasid, TLB_FLUSH_HEAVYWEIGHT);
+ kfd_flush_tlb(pdd, TLB_FLUSH_HEAVYWEIGHT);
}
return r;
}
static int
-svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- struct svm_range *prange, unsigned long offset,
- unsigned long npages, bool readonly, dma_addr_t *dma_addr,
- struct amdgpu_device *bo_adev, struct dma_fence **fence)
+svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
+ unsigned long offset, unsigned long npages, bool readonly,
+ dma_addr_t *dma_addr, struct amdgpu_device *bo_adev,
+ struct dma_fence **fence, bool flush_tlb)
{
- struct amdgpu_bo_va bo_va;
- bool table_freed = false;
+ struct amdgpu_device *adev = pdd->dev->adev;
+ struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
uint64_t pte_flags;
unsigned long last_start;
int last_domain;
@@ -1185,9 +1397,6 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
pr_debug("svms 0x%p [0x%lx 0x%lx] readonly %d\n", prange->svms,
last_start, last_start + npages - 1, readonly);
- if (prange->svm_bo && prange->ttm_res)
- bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev);
-
for (i = offset; i < offset + npages; i++) {
last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN;
dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN;
@@ -1202,7 +1411,7 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n",
last_start, prange->start + i, last_domain ? "GPU" : "CPU");
- pte_flags = svm_range_get_pte_flags(adev, prange, last_domain);
+ pte_flags = svm_range_get_pte_flags(pdd->dev, vm, prange, last_domain);
if (readonly)
pte_flags &= ~AMDGPU_PTE_WRITEABLE;
@@ -1211,13 +1420,16 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
(last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
pte_flags);
- r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
- NULL, last_start,
- prange->start + i, pte_flags,
- last_start - prange->start,
- NULL, dma_addr,
- &vm->last_update,
- &table_freed);
+ /* For dGPU mode, we use same vm_manager to allocate VRAM for
+ * different memory partition based on fpfn/lpfn, we should use
+ * same vm_manager.vram_base_offset regardless memory partition.
+ */
+ r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, true,
+ NULL, last_start, prange->start + i,
+ pte_flags,
+ (last_start - prange->start) << PAGE_SHIFT,
+ bo_adev ? bo_adev->vm_manager.vram_base_offset : 0,
+ NULL, dma_addr, &vm->last_update);
for (j = last_start - prange->start; j <= i; j++)
dma_addr[j] |= last_domain;
@@ -1239,13 +1451,6 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (fence)
*fence = dma_fence_get(vm->last_update);
- if (table_freed) {
- struct kfd_process *p;
-
- p = container_of(prange->svms, struct kfd_process, svms);
- amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev,
- p->pasid, TLB_FLUSH_LEGACY);
- }
out:
return r;
}
@@ -1253,20 +1458,17 @@ out:
static int
svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
unsigned long npages, bool readonly,
- unsigned long *bitmap, bool wait)
+ unsigned long *bitmap, bool wait, bool flush_tlb)
{
struct kfd_process_device *pdd;
- struct amdgpu_device *bo_adev;
- struct amdgpu_device *adev;
+ struct amdgpu_device *bo_adev = NULL;
struct kfd_process *p;
struct dma_fence *fence = NULL;
uint32_t gpuidx;
int r = 0;
if (prange->svm_bo && prange->ttm_res)
- bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
- else
- bo_adev = NULL;
+ bo_adev = prange->svm_bo->node->adev;
p = container_of(prange->svms, struct kfd_process, svms);
for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
@@ -1276,22 +1478,21 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
pr_debug("failed to find device idx %d\n", gpuidx);
return -EINVAL;
}
- adev = (struct amdgpu_device *)pdd->dev->kgd;
pdd = kfd_bind_process_to_device(pdd->dev, p);
if (IS_ERR(pdd))
return -EINVAL;
- if (bo_adev && adev != bo_adev &&
- !amdgpu_xgmi_same_hive(adev, bo_adev)) {
+ if (bo_adev && pdd->dev->adev != bo_adev &&
+ !amdgpu_xgmi_same_hive(pdd->dev->adev, bo_adev)) {
pr_debug("cannot map to device idx %d\n", gpuidx);
continue;
}
- r = svm_range_map_to_gpu(adev, drm_priv_to_vm(pdd->drm_priv),
- prange, offset, npages, readonly,
+ r = svm_range_map_to_gpu(pdd, prange, offset, npages, readonly,
prange->dma_addr[gpuidx],
- bo_adev, wait ? &fence : NULL);
+ bo_adev, wait ? &fence : NULL,
+ flush_tlb);
if (r)
break;
@@ -1304,6 +1505,8 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
break;
}
}
+
+ kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
}
return r;
@@ -1313,40 +1516,35 @@ struct svm_validate_context {
struct kfd_process *process;
struct svm_range *prange;
bool intr;
- unsigned long bitmap[MAX_GPU_INSTANCE];
- struct ttm_validate_buffer tv[MAX_GPU_INSTANCE];
- struct list_head validate_list;
- struct ww_acquire_ctx ticket;
+ DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
+ struct drm_exec exec;
};
-static int svm_range_reserve_bos(struct svm_validate_context *ctx)
+static int svm_range_reserve_bos(struct svm_validate_context *ctx, bool intr)
{
struct kfd_process_device *pdd;
- struct amdgpu_device *adev;
struct amdgpu_vm *vm;
uint32_t gpuidx;
int r;
- INIT_LIST_HEAD(&ctx->validate_list);
- for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
- pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
- if (!pdd) {
- pr_debug("failed to find device idx %d\n", gpuidx);
- return -EINVAL;
- }
- adev = (struct amdgpu_device *)pdd->dev->kgd;
- vm = drm_priv_to_vm(pdd->drm_priv);
-
- ctx->tv[gpuidx].bo = &vm->root.bo->tbo;
- ctx->tv[gpuidx].num_shared = 4;
- list_add(&ctx->tv[gpuidx].head, &ctx->validate_list);
- }
+ drm_exec_init(&ctx->exec, intr ? DRM_EXEC_INTERRUPTIBLE_WAIT: 0, 0);
+ drm_exec_until_all_locked(&ctx->exec) {
+ for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
+ pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
+ if (!pdd) {
+ pr_debug("failed to find device idx %d\n", gpuidx);
+ r = -EINVAL;
+ goto unreserve_out;
+ }
+ vm = drm_priv_to_vm(pdd->drm_priv);
- r = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->validate_list,
- ctx->intr, NULL);
- if (r) {
- pr_debug("failed %d to reserve bo\n", r);
- return r;
+ r = amdgpu_vm_lock_pd(vm, &ctx->exec, 2);
+ drm_exec_retry_on_contention(&ctx->exec);
+ if (unlikely(r)) {
+ pr_debug("failed %d to reserve bo\n", r);
+ goto unreserve_out;
+ }
+ }
}
for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
@@ -1356,10 +1554,10 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx)
r = -EINVAL;
goto unreserve_out;
}
- adev = (struct amdgpu_device *)pdd->dev->kgd;
- r = amdgpu_vm_validate_pt_bos(adev, drm_priv_to_vm(pdd->drm_priv),
- svm_range_bo_validate, NULL);
+ r = amdgpu_vm_validate(pdd->dev->adev,
+ drm_priv_to_vm(pdd->drm_priv), NULL,
+ svm_range_bo_validate, NULL);
if (r) {
pr_debug("failed %d validate pt bos\n", r);
goto unreserve_out;
@@ -1369,24 +1567,24 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx)
return 0;
unreserve_out:
- ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list);
+ drm_exec_fini(&ctx->exec);
return r;
}
static void svm_range_unreserve_bos(struct svm_validate_context *ctx)
{
- ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list);
+ drm_exec_fini(&ctx->exec);
}
static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
{
struct kfd_process_device *pdd;
- struct amdgpu_device *adev;
pdd = kfd_process_device_from_gpuidx(p, gpuidx);
- adev = (struct amdgpu_device *)pdd->dev->kgd;
+ if (!pdd)
+ return NULL;
- return SVM_ADEV_PGMAP_OWNER(adev);
+ return SVM_ADEV_PGMAP_OWNER(pdd->dev->adev);
}
/*
@@ -1414,135 +1612,196 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
* 5. Release page table (and SVM BO) reservation
*/
static int svm_range_validate_and_map(struct mm_struct *mm,
- struct svm_range *prange,
- int32_t gpuidx, bool intr, bool wait)
+ unsigned long map_start, unsigned long map_last,
+ struct svm_range *prange, int32_t gpuidx,
+ bool intr, bool wait, bool flush_tlb)
{
- struct svm_validate_context ctx;
+ struct svm_validate_context *ctx;
unsigned long start, end, addr;
struct kfd_process *p;
void *owner;
int32_t idx;
int r = 0;
- ctx.process = container_of(prange->svms, struct kfd_process, svms);
- ctx.prange = prange;
- ctx.intr = intr;
+ ctx = kzalloc(sizeof(struct svm_validate_context), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+ ctx->process = container_of(prange->svms, struct kfd_process, svms);
+ ctx->prange = prange;
+ ctx->intr = intr;
if (gpuidx < MAX_GPU_INSTANCE) {
- bitmap_zero(ctx.bitmap, MAX_GPU_INSTANCE);
- bitmap_set(ctx.bitmap, gpuidx, 1);
- } else if (ctx.process->xnack_enabled) {
- bitmap_copy(ctx.bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE);
+ bitmap_zero(ctx->bitmap, MAX_GPU_INSTANCE);
+ bitmap_set(ctx->bitmap, gpuidx, 1);
+ } else if (ctx->process->xnack_enabled) {
+ bitmap_copy(ctx->bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE);
/* If prefetch range to GPU, or GPU retry fault migrate range to
* GPU, which has ACCESS attribute to the range, create mapping
* on that GPU.
*/
if (prange->actual_loc) {
- gpuidx = kfd_process_gpuidx_from_gpuid(ctx.process,
+ gpuidx = kfd_process_gpuidx_from_gpuid(ctx->process,
prange->actual_loc);
if (gpuidx < 0) {
WARN_ONCE(1, "failed get device by id 0x%x\n",
prange->actual_loc);
- return -EINVAL;
+ r = -EINVAL;
+ goto free_ctx;
}
if (test_bit(gpuidx, prange->bitmap_access))
- bitmap_set(ctx.bitmap, gpuidx, 1);
+ bitmap_set(ctx->bitmap, gpuidx, 1);
+ }
+
+ /*
+ * If prange is already mapped or with always mapped flag,
+ * update mapping on GPUs with ACCESS attribute
+ */
+ if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
+ if (prange->mapped_to_gpu ||
+ prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)
+ bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
}
} else {
- bitmap_or(ctx.bitmap, prange->bitmap_access,
+ bitmap_or(ctx->bitmap, prange->bitmap_access,
prange->bitmap_aip, MAX_GPU_INSTANCE);
}
- if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE))
- return 0;
+ if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
+ r = 0;
+ goto free_ctx;
+ }
if (prange->actual_loc && !prange->ttm_res) {
/* This should never happen. actual_loc gets set by
* svm_migrate_ram_to_vram after allocating a BO.
*/
WARN_ONCE(1, "VRAM BO missing during validation\n");
- return -EINVAL;
+ r = -EINVAL;
+ goto free_ctx;
}
- svm_range_reserve_bos(&ctx);
+ r = svm_range_reserve_bos(ctx, intr);
+ if (r)
+ goto free_ctx;
p = container_of(prange->svms, struct kfd_process, svms);
- owner = kfd_svm_page_owner(p, find_first_bit(ctx.bitmap,
+ owner = kfd_svm_page_owner(p, find_first_bit(ctx->bitmap,
MAX_GPU_INSTANCE));
- for_each_set_bit(idx, ctx.bitmap, MAX_GPU_INSTANCE) {
+ for_each_set_bit(idx, ctx->bitmap, MAX_GPU_INSTANCE) {
if (kfd_svm_page_owner(p, idx) != owner) {
owner = NULL;
break;
}
}
- start = prange->start << PAGE_SHIFT;
- end = (prange->last + 1) << PAGE_SHIFT;
- for (addr = start; addr < end && !r; ) {
- struct hmm_range *hmm_range;
+ start = map_start << PAGE_SHIFT;
+ end = (map_last + 1) << PAGE_SHIFT;
+ for (addr = start; !r && addr < end; ) {
+ struct amdgpu_hmm_range *range = NULL;
+ unsigned long map_start_vma;
+ unsigned long map_last_vma;
struct vm_area_struct *vma;
- unsigned long next;
+ unsigned long next = 0;
unsigned long offset;
unsigned long npages;
bool readonly;
- vma = find_vma(mm, addr);
- if (!vma || addr < vma->vm_start) {
- r = -EFAULT;
- goto unreserve_out;
- }
- readonly = !(vma->vm_flags & VM_WRITE);
+ vma = vma_lookup(mm, addr);
+ if (vma) {
+ readonly = !(vma->vm_flags & VM_WRITE);
- next = min(vma->vm_end, end);
- npages = (next - addr) >> PAGE_SHIFT;
- WRITE_ONCE(p->svms.faulting_task, current);
- r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
- addr, npages, &hmm_range,
- readonly, true, owner);
- WRITE_ONCE(p->svms.faulting_task, NULL);
- if (r) {
- pr_debug("failed %d to get svm range pages\n", r);
- goto unreserve_out;
+ next = min(vma->vm_end, end);
+ npages = (next - addr) >> PAGE_SHIFT;
+ /* HMM requires at least READ permissions. If provided with PROT_NONE,
+ * unmap the memory. If it's not already mapped, this is a no-op
+ * If PROT_WRITE is provided without READ, warn first then unmap
+ */
+ if (!(vma->vm_flags & VM_READ)) {
+ unsigned long e, s;
+
+ svm_range_lock(prange);
+ if (vma->vm_flags & VM_WRITE)
+ pr_debug("VM_WRITE without VM_READ is not supported");
+ s = max(start, prange->start);
+ e = min(end, prange->last);
+ if (e >= s)
+ r = svm_range_unmap_from_gpus(prange, s, e,
+ KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU);
+ svm_range_unlock(prange);
+ /* If unmap returns non-zero, we'll bail on the next for loop
+ * iteration, so just leave r and continue
+ */
+ addr = next;
+ continue;
+ }
+
+ WRITE_ONCE(p->svms.faulting_task, current);
+ range = amdgpu_hmm_range_alloc(NULL);
+ if (likely(range))
+ r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages,
+ readonly, owner, range);
+ else
+ r = -ENOMEM;
+ WRITE_ONCE(p->svms.faulting_task, NULL);
+ if (r)
+ pr_debug("failed %d to get svm range pages\n", r);
+ } else {
+ r = -EFAULT;
}
- offset = (addr - start) >> PAGE_SHIFT;
- r = svm_range_dma_map(prange, ctx.bitmap, offset, npages,
- hmm_range->hmm_pfns);
- if (r) {
- pr_debug("failed %d to dma map range\n", r);
- goto unreserve_out;
+ if (!r) {
+ offset = (addr >> PAGE_SHIFT) - prange->start;
+ r = svm_range_dma_map(prange, ctx->bitmap, offset, npages,
+ range->hmm_range.hmm_pfns);
+ if (r)
+ pr_debug("failed %d to dma map range\n", r);
}
svm_range_lock(prange);
- if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
+
+ /* Free backing memory of hmm_range if it was initialized
+ * Override return value to TRY AGAIN only if prior returns
+ * were successful
+ */
+ if (range && !amdgpu_hmm_range_valid(range) && !r) {
pr_debug("hmm update the range, need validate again\n");
r = -EAGAIN;
- goto unlock_out;
}
- if (!list_empty(&prange->child_list)) {
+
+ /* Free the hmm range */
+ amdgpu_hmm_range_free(range);
+
+ if (!r && !list_empty(&prange->child_list)) {
pr_debug("range split by unmap in parallel, validate again\n");
r = -EAGAIN;
- goto unlock_out;
}
- r = svm_range_map_to_gpus(prange, offset, npages, readonly,
- ctx.bitmap, wait);
+ if (!r) {
+ map_start_vma = max(map_start, prange->start + offset);
+ map_last_vma = min(map_last, prange->start + offset + npages - 1);
+ if (map_start_vma <= map_last_vma) {
+ offset = map_start_vma - prange->start;
+ npages = map_last_vma - map_start_vma + 1;
+ r = svm_range_map_to_gpus(prange, offset, npages, readonly,
+ ctx->bitmap, wait, flush_tlb);
+ }
+ }
+
+ if (!r && next == end)
+ prange->mapped_to_gpu = true;
-unlock_out:
svm_range_unlock(prange);
addr = next;
}
- if (addr == end)
- prange->validated_once = true;
-
-unreserve_out:
- svm_range_unreserve_bos(&ctx);
-
+ svm_range_unreserve_bos(ctx);
if (!r)
- prange->validate_timestamp = ktime_to_us(ktime_get());
+ prange->validate_timestamp = ktime_get_boottime();
+
+free_ctx:
+ kfree(ctx);
return r;
}
@@ -1574,6 +1833,7 @@ retry_flush_work:
static void svm_range_restore_work(struct work_struct *work)
{
struct delayed_work *dwork = to_delayed_work(work);
+ struct amdkfd_process_info *process_info;
struct svm_range_list *svms;
struct svm_range *prange;
struct kfd_process *p;
@@ -1589,14 +1849,17 @@ static void svm_range_restore_work(struct work_struct *work)
pr_debug("restore svm ranges\n");
- /* kfd_process_notifier_release destroys this worker thread. So during
- * the lifetime of this thread, kfd_process and mm will be valid.
- */
p = container_of(svms, struct kfd_process, svms);
- mm = p->mm;
- if (!mm)
+ process_info = p->kgd_process_info;
+
+ /* Keep mm reference when svm_range_validate_and_map ranges */
+ mm = get_task_mm(p->lead_thread);
+ if (!mm) {
+ pr_debug("svms 0x%p process mm gone\n", svms);
return;
+ }
+ mutex_lock(&process_info->lock);
svm_range_list_lock_and_flush_work(svms, mm);
mutex_lock(&svms->lock);
@@ -1616,8 +1879,8 @@ static void svm_range_restore_work(struct work_struct *work)
*/
mutex_lock(&prange->migrate_mutex);
- r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
- false, true);
+ r = svm_range_validate_and_map(mm, prange->start, prange->last, prange,
+ MAX_GPU_INSTANCE, false, true, false);
if (r)
pr_debug("failed %d to map 0x%lx to gpus\n", r,
prange->start);
@@ -1649,17 +1912,26 @@ static void svm_range_restore_work(struct work_struct *work)
out_reschedule:
mutex_unlock(&svms->lock);
mmap_write_unlock(mm);
+ mutex_unlock(&process_info->lock);
/* If validation failed, reschedule another attempt */
if (evicted_ranges) {
pr_debug("reschedule to restore svm range\n");
- schedule_delayed_work(&svms->restore_work,
+ queue_delayed_work(system_freezable_wq, &svms->restore_work,
msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+
+ kfd_smi_event_queue_restore_rescheduled(mm);
}
+ mmput(mm);
}
/**
* svm_range_evict - evict svm range
+ * @prange: svm range structure
+ * @mm: current process mm_struct
+ * @start: starting process queue number
+ * @last: last process queue number
+ * @event: mmu notifier event when range is evicted or migrated
*
* Stop all queues of the process to ensure GPU doesn't access the memory, then
* return to let CPU evict the buffer and proceed CPU pagetable update.
@@ -1671,7 +1943,8 @@ out_reschedule:
*/
static int
svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
- unsigned long start, unsigned long last)
+ unsigned long start, unsigned long last,
+ enum mmu_notifier_event event)
{
struct svm_range_list *svms = prange->svms;
struct svm_range *pchild;
@@ -1683,10 +1956,15 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
pr_debug("invalidate svms 0x%p prange [0x%lx 0x%lx] [0x%lx 0x%lx]\n",
svms, prange->start, prange->last, start, last);
- if (!p->xnack_enabled) {
+ if (!p->xnack_enabled ||
+ (prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) {
int evicted_ranges;
+ bool mapped = prange->mapped_to_gpu;
list_for_each_entry(pchild, &prange->child_list, child_list) {
+ if (!pchild->mapped_to_gpu)
+ continue;
+ mapped = true;
mutex_lock_nested(&pchild->lock, 1);
if (pchild->start <= last && pchild->last >= start) {
pr_debug("increment pchild invalid [0x%lx 0x%lx]\n",
@@ -1696,6 +1974,9 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
mutex_unlock(&pchild->lock);
}
+ if (!mapped)
+ return r;
+
if (prange->start <= last && prange->last >= start)
atomic_inc(&prange->invalid);
@@ -1707,15 +1988,21 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
prange->svms, prange->start, prange->last);
/* First eviction, stop the queues */
- r = kgd2kfd_quiesce_mm(mm);
+ r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_SVM);
if (r)
pr_debug("failed to quiesce KFD\n");
pr_debug("schedule to restore svm %p ranges\n", svms);
- schedule_delayed_work(&svms->restore_work,
+ queue_delayed_work(system_freezable_wq, &svms->restore_work,
msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
} else {
unsigned long s, l;
+ uint32_t trigger;
+
+ if (event == MMU_NOTIFY_MIGRATE)
+ trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE;
+ else
+ trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY;
pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n",
prange->svms, start, last);
@@ -1724,13 +2011,13 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
s = max(start, pchild->start);
l = min(last, pchild->last);
if (l >= s)
- svm_range_unmap_from_gpus(pchild, s, l);
+ svm_range_unmap_from_gpus(pchild, s, l, trigger);
mutex_unlock(&pchild->lock);
}
s = max(start, prange->start);
l = min(last, prange->last);
if (l >= s)
- svm_range_unmap_from_gpus(prange, s, l);
+ svm_range_unmap_from_gpus(prange, s, l, trigger);
}
return r;
@@ -1740,10 +2027,13 @@ static struct svm_range *svm_range_clone(struct svm_range *old)
{
struct svm_range *new;
- new = svm_range_new(old->svms, old->start, old->last);
+ new = svm_range_new(old->svms, old->start, old->last, false);
if (!new)
return NULL;
-
+ if (svm_range_copy_dma_addrs(new, old)) {
+ svm_range_free(new, false);
+ return NULL;
+ }
if (old->svm_bo) {
new->ttm_res = old->ttm_res;
new->offset = old->offset;
@@ -1757,92 +2047,159 @@ static struct svm_range *svm_range_clone(struct svm_range *old)
new->prefetch_loc = old->prefetch_loc;
new->actual_loc = old->actual_loc;
new->granularity = old->granularity;
+ new->mapped_to_gpu = old->mapped_to_gpu;
+ new->vram_pages = old->vram_pages;
bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
+ atomic_set(&new->queue_refcount, atomic_read(&old->queue_refcount));
return new;
}
+void svm_range_set_max_pages(struct amdgpu_device *adev)
+{
+ uint64_t max_pages;
+ uint64_t pages, _pages;
+ uint64_t min_pages = 0;
+ int i, id;
+
+ for (i = 0; i < adev->kfd.dev->num_nodes; i++) {
+ if (adev->kfd.dev->nodes[i]->xcp)
+ id = adev->kfd.dev->nodes[i]->xcp->id;
+ else
+ id = -1;
+ pages = KFD_XCP_MEMORY_SIZE(adev, id) >> 17;
+ pages = clamp(pages, 1ULL << 9, 1ULL << 18);
+ pages = rounddown_pow_of_two(pages);
+ min_pages = min_not_zero(min_pages, pages);
+ }
+
+ do {
+ max_pages = READ_ONCE(max_svm_range_pages);
+ _pages = min_not_zero(max_pages, min_pages);
+ } while (cmpxchg(&max_svm_range_pages, max_pages, _pages) != max_pages);
+}
+
+static int
+svm_range_split_new(struct svm_range_list *svms, uint64_t start, uint64_t last,
+ uint64_t max_pages, struct list_head *insert_list,
+ struct list_head *update_list)
+{
+ struct svm_range *prange;
+ uint64_t l;
+
+ pr_debug("max_svm_range_pages 0x%llx adding [0x%llx 0x%llx]\n",
+ max_pages, start, last);
+
+ while (last >= start) {
+ l = min(last, ALIGN_DOWN(start + max_pages, max_pages) - 1);
+
+ prange = svm_range_new(svms, start, l, true);
+ if (!prange)
+ return -ENOMEM;
+ list_add(&prange->list, insert_list);
+ list_add(&prange->update_list, update_list);
+
+ start = l + 1;
+ }
+ return 0;
+}
+
/**
- * svm_range_handle_overlap - split overlap ranges
- * @svms: svm range list header
- * @new: range added with this attributes
- * @start: range added start address, in pages
- * @last: range last address, in pages
- * @update_list: output, the ranges attributes are updated. For set_attr, this
- * will do validation and map to GPUs. For unmap, this will be
- * removed and unmap from GPUs
- * @insert_list: output, the ranges will be inserted into svms, attributes are
- * not changes. For set_attr, this will add into svms.
- * @remove_list:output, the ranges will be removed from svms
- * @left: the remaining range after overlap, For set_attr, this will be added
- * as new range.
+ * svm_range_add - add svm range and handle overlap
+ * @p: the range add to this process svms
+ * @start: page size aligned
+ * @size: page size aligned
+ * @nattr: number of attributes
+ * @attrs: array of attributes
+ * @update_list: output, the ranges need validate and update GPU mapping
+ * @insert_list: output, the ranges need insert to svms
+ * @remove_list: output, the ranges are replaced and need remove from svms
+ * @remap_list: output, remap unaligned svm ranges
*
- * Total have 5 overlap cases.
+ * Check if the virtual address range has overlap with any existing ranges,
+ * split partly overlapping ranges and add new ranges in the gaps. All changes
+ * should be applied to the range_list and interval tree transactionally. If
+ * any range split or allocation fails, the entire update fails. Therefore any
+ * existing overlapping svm_ranges are cloned and the original svm_ranges left
+ * unchanged.
*
- * This function handles overlap of an address interval with existing
- * struct svm_ranges for applying new attributes. This may require
- * splitting existing struct svm_ranges. All changes should be applied to
- * the range_list and interval tree transactionally. If any split operation
- * fails, the entire update fails. Therefore the existing overlapping
- * svm_ranges are cloned and the original svm_ranges left unchanged. If the
- * transaction succeeds, the modified clones are added and the originals
- * freed. Otherwise the clones are removed and the old svm_ranges remain.
+ * If the transaction succeeds, the caller can update and insert clones and
+ * new ranges, then free the originals.
*
- * Context: The caller must hold svms->lock
+ * Otherwise the caller can free the clones and new ranges, while the old
+ * svm_ranges remain unchanged.
+ *
+ * Context: Process context, caller must hold svms->lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
*/
static int
-svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new,
- unsigned long start, unsigned long last,
- struct list_head *update_list,
- struct list_head *insert_list,
- struct list_head *remove_list,
- unsigned long *left)
+svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
+ uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
+ struct list_head *update_list, struct list_head *insert_list,
+ struct list_head *remove_list, struct list_head *remap_list)
{
+ unsigned long last = start + size - 1UL;
+ struct svm_range_list *svms = &p->svms;
struct interval_tree_node *node;
struct svm_range *prange;
struct svm_range *tmp;
+ struct list_head new_list;
int r = 0;
+ pr_debug("svms 0x%p [0x%llx 0x%lx]\n", &p->svms, start, last);
+
INIT_LIST_HEAD(update_list);
INIT_LIST_HEAD(insert_list);
INIT_LIST_HEAD(remove_list);
+ INIT_LIST_HEAD(&new_list);
+ INIT_LIST_HEAD(remap_list);
node = interval_tree_iter_first(&svms->objects, start, last);
while (node) {
struct interval_tree_node *next;
- struct svm_range *old;
unsigned long next_start;
pr_debug("found overlap node [0x%lx 0x%lx]\n", node->start,
node->last);
- old = container_of(node, struct svm_range, it_node);
+ prange = container_of(node, struct svm_range, it_node);
next = interval_tree_iter_next(node, start, last);
next_start = min(node->last, last) + 1;
- if (node->start < start || node->last > last) {
- /* node intersects the updated range, clone+split it */
+ if (svm_range_is_same_attrs(p, prange, nattr, attrs) &&
+ prange->mapped_to_gpu) {
+ /* nothing to do */
+ } else if (node->start < start || node->last > last) {
+ /* node intersects the update range and its attributes
+ * will change. Clone and split it, apply updates only
+ * to the overlapping part
+ */
+ struct svm_range *old = prange;
+
prange = svm_range_clone(old);
if (!prange) {
r = -ENOMEM;
goto out;
}
- list_add(&old->remove_list, remove_list);
- list_add(&prange->insert_list, insert_list);
+ list_add(&old->update_list, remove_list);
+ list_add(&prange->list, insert_list);
+ list_add(&prange->update_list, update_list);
if (node->start < start) {
pr_debug("change old range start\n");
- r = svm_range_split_head(prange, new, start,
- insert_list);
+ r = svm_range_split_head(prange, start,
+ insert_list, remap_list);
if (r)
goto out;
}
if (node->last > last) {
pr_debug("change old range last\n");
- r = svm_range_split_tail(prange, new, last,
- insert_list);
+ r = svm_range_split_tail(prange, last,
+ insert_list, remap_list);
if (r)
goto out;
}
@@ -1850,36 +2207,37 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new,
/* The node is contained within start..last,
* just update it
*/
- prange = old;
- }
-
- if (!svm_range_is_same_attrs(prange, new))
list_add(&prange->update_list, update_list);
+ }
/* insert a new node if needed */
if (node->start > start) {
- prange = svm_range_new(prange->svms, start,
- node->start - 1);
- if (!prange) {
- r = -ENOMEM;
+ r = svm_range_split_new(svms, start, node->start - 1,
+ READ_ONCE(max_svm_range_pages),
+ &new_list, update_list);
+ if (r)
goto out;
- }
-
- list_add(&prange->insert_list, insert_list);
- list_add(&prange->update_list, update_list);
}
node = next;
start = next_start;
}
- if (left && start <= last)
- *left = last - start + 1;
+ /* add a final range at the end if needed */
+ if (start <= last)
+ r = svm_range_split_new(svms, start, last,
+ READ_ONCE(max_svm_range_pages),
+ &new_list, update_list);
out:
- if (r)
- list_for_each_entry_safe(prange, tmp, insert_list, insert_list)
- svm_range_free(prange);
+ if (r) {
+ list_for_each_entry_safe(prange, tmp, insert_list, list)
+ svm_range_free(prange, false);
+ list_for_each_entry_safe(prange, tmp, &new_list, list)
+ svm_range_free(prange, true);
+ } else {
+ list_splice(&new_list, insert_list);
+ }
return r;
}
@@ -1913,10 +2271,9 @@ svm_range_update_notifier_and_interval_tree(struct mm_struct *mm,
}
static void
-svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange)
+svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange,
+ struct mm_struct *mm)
{
- struct mm_struct *mm = prange->work_item.mm;
-
switch (prange->work_item.op) {
case SVM_OP_NULL:
pr_debug("NULL OP 0x%p prange 0x%p [0x%lx 0x%lx]\n",
@@ -1927,7 +2284,7 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange)
svms, prange, prange->start, prange->last);
svm_range_unlink(prange);
svm_range_remove_notifier(prange);
- svm_range_free(prange);
+ svm_range_free(prange, true);
break;
case SVM_OP_UPDATE_RANGE_NOTIFIER:
pr_debug("update notifier 0x%p prange 0x%p [0x%lx 0x%lx]\n",
@@ -1962,31 +2319,30 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange)
static void svm_range_drain_retry_fault(struct svm_range_list *svms)
{
struct kfd_process_device *pdd;
- struct amdgpu_device *adev;
struct kfd_process *p;
- int drain;
uint32_t i;
p = container_of(svms, struct kfd_process, svms);
-restart:
- drain = atomic_read(&svms->drain_pagefaults);
- if (!drain)
- return;
-
for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) {
pdd = p->pdds[i];
if (!pdd)
continue;
pr_debug("drain retry fault gpu %d svms %p\n", i, svms);
- adev = (struct amdgpu_device *)pdd->dev->kgd;
- amdgpu_ih_wait_on_checkpoint_process(adev, &adev->irq.ih1);
+ amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev,
+ pdd->dev->adev->irq.retry_cam_enabled ?
+ &pdd->dev->adev->irq.ih :
+ &pdd->dev->adev->irq.ih1);
+
+ if (pdd->dev->adev->irq.retry_cam_enabled)
+ amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev,
+ &pdd->dev->adev->irq.ih_soft);
+
+
pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
}
- if (atomic_cmpxchg(&svms->drain_pagefaults, drain, 0) != drain)
- goto restart;
}
static void svm_range_deferred_list_work(struct work_struct *work)
@@ -1994,40 +2350,35 @@ static void svm_range_deferred_list_work(struct work_struct *work)
struct svm_range_list *svms;
struct svm_range *prange;
struct mm_struct *mm;
- struct kfd_process *p;
svms = container_of(work, struct svm_range_list, deferred_list_work);
pr_debug("enter svms 0x%p\n", svms);
- p = container_of(svms, struct kfd_process, svms);
- /* Avoid mm is gone when inserting mmu notifier */
- mm = get_task_mm(p->lead_thread);
- if (!mm) {
- pr_debug("svms 0x%p process mm gone\n", svms);
- return;
- }
-retry:
- mmap_write_lock(mm);
-
- /* Checking for the need to drain retry faults must be inside
- * mmap write lock to serialize with munmap notifiers.
- */
- if (unlikely(atomic_read(&svms->drain_pagefaults))) {
- mmap_write_unlock(mm);
- svm_range_drain_retry_fault(svms);
- goto retry;
- }
-
spin_lock(&svms->deferred_list_lock);
while (!list_empty(&svms->deferred_range_list)) {
prange = list_first_entry(&svms->deferred_range_list,
struct svm_range, deferred_list);
- list_del_init(&prange->deferred_list);
spin_unlock(&svms->deferred_list_lock);
pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange,
prange->start, prange->last, prange->work_item.op);
+ mm = prange->work_item.mm;
+
+ mmap_write_lock(mm);
+
+ /* Remove from deferred_list must be inside mmap write lock, for
+ * two race cases:
+ * 1. unmap_from_cpu may change work_item.op and add the range
+ * to deferred_list again, cause use after free bug.
+ * 2. svm_range_list_lock_and_flush_work may hold mmap write
+ * lock and continue because deferred_list is empty, but
+ * deferred_list work is actually waiting for mmap lock.
+ */
+ spin_lock(&svms->deferred_list_lock);
+ list_del_init(&prange->deferred_list);
+ spin_unlock(&svms->deferred_list_lock);
+
mutex_lock(&svms->lock);
mutex_lock(&prange->migrate_mutex);
while (!list_empty(&prange->child_list)) {
@@ -2038,19 +2389,22 @@ retry:
pr_debug("child prange 0x%p op %d\n", pchild,
pchild->work_item.op);
list_del_init(&pchild->child_list);
- svm_range_handle_list_op(svms, pchild);
+ svm_range_handle_list_op(svms, pchild, mm);
}
mutex_unlock(&prange->migrate_mutex);
- svm_range_handle_list_op(svms, prange);
+ svm_range_handle_list_op(svms, prange, mm);
mutex_unlock(&svms->lock);
+ mmap_write_unlock(mm);
+
+ /* Pairs with mmget in svm_range_add_list_work. If dropping the
+ * last mm refcount, schedule release work to avoid circular locking
+ */
+ mmput_async(mm);
spin_lock(&svms->deferred_list_lock);
}
spin_unlock(&svms->deferred_list_lock);
-
- mmap_write_unlock(mm);
- mmput(mm);
pr_debug("exit svms 0x%p\n", svms);
}
@@ -2067,12 +2421,17 @@ svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange,
prange->work_item.op != SVM_OP_UNMAP_RANGE)
prange->work_item.op = op;
} else {
- prange->work_item.op = op;
- prange->work_item.mm = mm;
- list_add_tail(&prange->deferred_list,
- &prange->svms->deferred_range_list);
- pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n",
- prange, prange->start, prange->last, op);
+ /* Pairs with mmput in deferred_list_work.
+ * If process is exiting and mm is gone, don't update mmu notifier.
+ */
+ if (mmget_not_zero(mm)) {
+ prange->work_item.mm = mm;
+ prange->work_item.op = op;
+ list_add_tail(&prange->deferred_list,
+ &prange->svms->deferred_range_list);
+ pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n",
+ prange, prange->start, prange->last, op);
+ }
}
spin_unlock(&svms->deferred_list_lock);
}
@@ -2086,8 +2445,7 @@ void schedule_deferred_list_work(struct svm_range_list *svms)
}
static void
-svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent,
- struct svm_range *prange, unsigned long start,
+svm_range_unmap_split(struct svm_range *parent, struct svm_range *prange, unsigned long start,
unsigned long last)
{
struct svm_range *head;
@@ -2108,12 +2466,12 @@ svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent,
svm_range_split(tail, last + 1, tail->last, &head);
if (head != prange && tail != prange) {
- svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE);
- svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE);
+ svm_range_add_child(parent, head, SVM_OP_UNMAP_RANGE);
+ svm_range_add_child(parent, tail, SVM_OP_ADD_RANGE);
} else if (tail != prange) {
- svm_range_add_child(parent, mm, tail, SVM_OP_UNMAP_RANGE);
+ svm_range_add_child(parent, tail, SVM_OP_UNMAP_RANGE);
} else if (head != prange) {
- svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE);
+ svm_range_add_child(parent, head, SVM_OP_UNMAP_RANGE);
} else if (parent != prange) {
prange->work_item.op = SVM_OP_UNMAP_RANGE;
}
@@ -2123,11 +2481,23 @@ static void
svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
unsigned long start, unsigned long last)
{
+ uint32_t trigger = KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU;
struct svm_range_list *svms;
struct svm_range *pchild;
struct kfd_process *p;
unsigned long s, l;
bool unmap_parent;
+ uint32_t i;
+
+ if (atomic_read(&prange->queue_refcount)) {
+ int r;
+
+ pr_warn("Freeing queue vital buffer 0x%lx, queue evicted\n",
+ prange->start << PAGE_SHIFT);
+ r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_SVM);
+ if (r)
+ pr_debug("failed %d to quiesce KFD queues\n", r);
+ }
p = kfd_lookup_process_by_mm(mm);
if (!p)
@@ -2137,11 +2507,38 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms,
prange, prange->start, prange->last, start, last);
- /* Make sure pending page faults are drained in the deferred worker
- * before the range is freed to avoid straggler interrupts on
- * unmapped memory causing "phantom faults".
+ /* calculate time stamps that are used to decide which page faults need be
+ * dropped or handled before unmap pages from gpu vm
*/
- atomic_inc(&svms->drain_pagefaults);
+ for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) {
+ struct kfd_process_device *pdd;
+ struct amdgpu_device *adev;
+ struct amdgpu_ih_ring *ih;
+ uint32_t checkpoint_wptr;
+
+ pdd = p->pdds[i];
+ if (!pdd)
+ continue;
+
+ adev = pdd->dev->adev;
+
+ /* Check and drain ih1 ring if cam not available */
+ if (adev->irq.ih1.ring_size) {
+ ih = &adev->irq.ih1;
+ checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
+ if (ih->rptr != checkpoint_wptr) {
+ svms->checkpoint_ts[i] =
+ amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1);
+ continue;
+ }
+ }
+
+ /* check if dev->irq.ih_soft is not empty */
+ ih = &adev->irq.ih_soft;
+ checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
+ if (ih->rptr != checkpoint_wptr)
+ svms->checkpoint_ts[i] = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1);
+ }
unmap_parent = start <= prange->start && last >= prange->last;
@@ -2150,15 +2547,15 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
s = max(start, pchild->start);
l = min(last, pchild->last);
if (l >= s)
- svm_range_unmap_from_gpus(pchild, s, l);
- svm_range_unmap_split(mm, prange, pchild, start, last);
+ svm_range_unmap_from_gpus(pchild, s, l, trigger);
+ svm_range_unmap_split(prange, pchild, start, last);
mutex_unlock(&pchild->lock);
}
s = max(start, prange->start);
l = min(last, prange->last);
if (l >= s)
- svm_range_unmap_from_gpus(prange, s, l);
- svm_range_unmap_split(mm, prange, prange, start, last);
+ svm_range_unmap_from_gpus(prange, s, l, trigger);
+ svm_range_unmap_split(prange, prange, start, last);
if (unmap_parent)
svm_range_add_list_work(svms, prange, mm, SVM_OP_UNMAP_RANGE);
@@ -2172,6 +2569,9 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
/**
* svm_range_cpu_invalidate_pagetables - interval notifier callback
+ * @mni: mmu_interval_notifier struct
+ * @range: mmu_notifier_range struct
+ * @cur_seq: value to pass to mmu_interval_set_seq()
*
* If event is MMU_NOTIFY_UNMAP, this is from CPU unmap range, otherwise, it
* is from migration, or CPU page invalidation callback.
@@ -2201,8 +2601,8 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
start = mni->interval_tree.start;
last = mni->interval_tree.last;
- start = (start > range->start ? start : range->start) >> PAGE_SHIFT;
- last = (last < (range->end - 1) ? last : range->end - 1) >> PAGE_SHIFT;
+ start = max(start, range->start) >> PAGE_SHIFT;
+ last = min(last, range->end - 1) >> PAGE_SHIFT;
pr_debug("[0x%lx 0x%lx] range[0x%lx 0x%lx] notifier[0x%lx 0x%lx] %d\n",
start, last, range->start >> PAGE_SHIFT,
(range->end - 1) >> PAGE_SHIFT,
@@ -2219,7 +2619,7 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
svm_range_unmap_from_cpu(mni->mm, prange, start, last);
break;
default:
- svm_range_evict(prange, mni->mm, start, last);
+ svm_range_evict(prange, mni->mm, start, last, range->event);
break;
}
@@ -2294,29 +2694,31 @@ svm_range_from_addr(struct svm_range_list *svms, unsigned long addr,
*/
static int32_t
svm_range_best_restore_location(struct svm_range *prange,
- struct amdgpu_device *adev,
+ struct kfd_node *node,
int32_t *gpuidx)
{
- struct amdgpu_device *bo_adev, *preferred_adev;
+ struct kfd_node *bo_node, *preferred_node;
struct kfd_process *p;
uint32_t gpuid;
int r;
p = container_of(prange->svms, struct kfd_process, svms);
- r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, gpuidx);
+ r = kfd_process_gpuid_from_node(p, node, &gpuid, gpuidx);
if (r < 0) {
pr_debug("failed to get gpuid from kgd\n");
return -1;
}
+ if (node->adev->apu_prefer_gtt)
+ return 0;
+
if (prange->preferred_loc == gpuid ||
prange->preferred_loc == KFD_IOCTL_SVM_LOCATION_SYSMEM) {
return prange->preferred_loc;
} else if (prange->preferred_loc != KFD_IOCTL_SVM_LOCATION_UNDEFINED) {
- preferred_adev = svm_range_get_adev_by_id(prange,
- prange->preferred_loc);
- if (amdgpu_xgmi_same_hive(adev, preferred_adev))
+ preferred_node = svm_range_get_node_by_id(prange, prange->preferred_loc);
+ if (preferred_node && svm_nodes_in_same_hive(node, preferred_node))
return prange->preferred_loc;
/* fall through */
}
@@ -2328,8 +2730,8 @@ svm_range_best_restore_location(struct svm_range *prange,
if (!prange->actual_loc)
return 0;
- bo_adev = svm_range_get_adev_by_id(prange, prange->actual_loc);
- if (amdgpu_xgmi_same_hive(adev, bo_adev))
+ bo_node = svm_range_get_node_by_id(prange, prange->actual_loc);
+ if (bo_node && svm_nodes_in_same_hive(node, bo_node))
return prange->actual_loc;
else
return 0;
@@ -2345,38 +2747,36 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
{
struct vm_area_struct *vma;
struct interval_tree_node *node;
+ struct rb_node *rb_node;
unsigned long start_limit, end_limit;
- vma = find_vma(p->mm, addr << PAGE_SHIFT);
- if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
+ vma = vma_lookup(p->mm, addr << PAGE_SHIFT);
+ if (!vma) {
pr_debug("VMA does not exist in address [0x%llx]\n", addr);
return -EFAULT;
}
- *is_heap_stack = (vma->vm_start <= vma->vm_mm->brk &&
- vma->vm_end >= vma->vm_mm->start_brk) ||
- (vma->vm_start <= vma->vm_mm->start_stack &&
- vma->vm_end >= vma->vm_mm->start_stack);
+ *is_heap_stack = vma_is_initial_heap(vma) || vma_is_initial_stack(vma);
start_limit = max(vma->vm_start >> PAGE_SHIFT,
- (unsigned long)ALIGN_DOWN(addr, 2UL << 8));
+ (unsigned long)ALIGN_DOWN(addr, 1UL << p->svms.default_granularity));
end_limit = min(vma->vm_end >> PAGE_SHIFT,
- (unsigned long)ALIGN(addr + 1, 2UL << 8));
+ (unsigned long)ALIGN(addr + 1, 1UL << p->svms.default_granularity));
+
/* First range that starts after the fault address */
node = interval_tree_iter_first(&p->svms.objects, addr + 1, ULONG_MAX);
if (node) {
end_limit = min(end_limit, node->start);
/* Last range that ends before the fault address */
- node = container_of(rb_prev(&node->rb),
- struct interval_tree_node, rb);
+ rb_node = rb_prev(&node->rb);
} else {
/* Last range must end before addr because
* there was no range after addr
*/
- node = container_of(rb_last(&p->svms.objects.rb_root),
- struct interval_tree_node, rb);
+ rb_node = rb_last(&p->svms.objects.rb_root);
}
- if (node) {
+ if (rb_node) {
+ node = container_of(rb_node, struct interval_tree_node, rb);
if (node->last >= addr) {
WARN(1, "Overlap with prev node and page fault addr\n");
return -EFAULT;
@@ -2446,7 +2846,7 @@ svm_range_check_vm_userptr(struct kfd_process *p, uint64_t start, uint64_t last,
}
static struct
-svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
+svm_range *svm_range_create_unregistered_range(struct kfd_node *node,
struct kfd_process *p,
struct mm_struct *mm,
int64_t addr)
@@ -2476,14 +2876,14 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
last = addr;
}
- prange = svm_range_new(&p->svms, start, last);
+ prange = svm_range_new(&p->svms, start, last, true);
if (!prange) {
pr_debug("Failed to create prange in address [0x%llx]\n", addr);
return NULL;
}
- if (kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpuidx)) {
+ if (kfd_process_gpuid_from_node(p, node, &gpuid, &gpuidx)) {
pr_debug("failed to get gpuid from kgd\n");
- svm_range_free(prange);
+ svm_range_free(prange, true);
return NULL;
}
@@ -2535,7 +2935,7 @@ static bool svm_range_skip_recover(struct svm_range *prange)
}
static void
-svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p,
+svm_range_count_fault(struct kfd_node *node, struct kfd_process *p,
int32_t gpuidx)
{
struct kfd_process_device *pdd;
@@ -2548,7 +2948,7 @@ svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p,
uint32_t gpuid;
int r;
- r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpuidx);
+ r = kfd_process_gpuid_from_node(p, node, &gpuid, &gpuidx);
if (r < 0)
return;
}
@@ -2576,44 +2976,63 @@ svm_fault_allowed(struct vm_area_struct *vma, bool write_fault)
int
svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
- uint64_t addr, bool write_fault)
+ uint32_t vmid, uint32_t node_id,
+ uint64_t addr, uint64_t ts, bool write_fault)
{
+ unsigned long start, last, size;
struct mm_struct *mm = NULL;
struct svm_range_list *svms;
struct svm_range *prange;
struct kfd_process *p;
- uint64_t timestamp;
+ ktime_t timestamp = ktime_get_boottime();
+ struct kfd_node *node;
int32_t best_loc;
- int32_t gpuidx = MAX_GPU_INSTANCE;
+ int32_t gpuid, gpuidx = MAX_GPU_INSTANCE;
bool write_locked = false;
struct vm_area_struct *vma;
+ bool migration = false;
int r = 0;
- if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) {
+ if (!KFD_IS_SVM_API_SUPPORTED(adev)) {
pr_debug("device does not support SVM\n");
return -EFAULT;
}
- p = kfd_lookup_process_by_pasid(pasid);
+ p = kfd_lookup_process_by_pasid(pasid, NULL);
if (!p) {
pr_debug("kfd process not founded pasid 0x%x\n", pasid);
return 0;
}
- if (!p->xnack_enabled) {
- pr_debug("XNACK not enabled for pasid 0x%x\n", pasid);
- r = -EFAULT;
- goto out;
- }
svms = &p->svms;
pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr);
if (atomic_read(&svms->drain_pagefaults)) {
- pr_debug("draining retry fault, drop fault 0x%llx\n", addr);
+ pr_debug("page fault handling disabled, drop fault 0x%llx\n", addr);
r = 0;
goto out;
}
+ node = kfd_node_by_irq_ids(adev, node_id, vmid);
+ if (!node) {
+ pr_debug("kfd node does not exist node_id: %d, vmid: %d\n", node_id,
+ vmid);
+ r = -EFAULT;
+ goto out;
+ }
+
+ if (kfd_process_gpuid_from_node(p, node, &gpuid, &gpuidx)) {
+ pr_debug("failed to get gpuid/gpuidex for node_id: %d\n", node_id);
+ r = -EFAULT;
+ goto out;
+ }
+
+ if (!p->xnack_enabled) {
+ pr_debug("XNACK not enabled for pasid 0x%x\n", pasid);
+ r = -EFAULT;
+ goto out;
+ }
+
/* p->lead_thread is available as kfd_process_wq_release flush the work
* before releasing task ref.
*/
@@ -2627,6 +3046,23 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
mmap_read_lock(mm);
retry_write_locked:
mutex_lock(&svms->lock);
+
+ /* check if this page fault time stamp is before svms->checkpoint_ts */
+ if (svms->checkpoint_ts[gpuidx] != 0) {
+ if (amdgpu_ih_ts_after_or_equal(ts, svms->checkpoint_ts[gpuidx])) {
+ pr_debug("draining retry fault, drop fault 0x%llx\n", addr);
+ if (write_locked)
+ mmap_write_downgrade(mm);
+ r = -EAGAIN;
+ goto out_unlock_svms;
+ } else {
+ /* ts is after svms->checkpoint_ts now, reset svms->checkpoint_ts
+ * to zero to avoid following ts wrap around give wrong comparing
+ */
+ svms->checkpoint_ts[gpuidx] = 0;
+ }
+ }
+
prange = svm_range_from_addr(svms, addr, NULL);
if (!prange) {
pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
@@ -2642,7 +3078,7 @@ retry_write_locked:
write_locked = true;
goto retry_write_locked;
}
- prange = svm_range_create_unregistered_range(adev, p, mm, addr);
+ prange = svm_range_create_unregistered_range(node, p, mm, addr);
if (!prange) {
pr_debug("failed to create unregistered range svms 0x%p address [0x%llx]\n",
svms, addr);
@@ -2657,14 +3093,14 @@ retry_write_locked:
mutex_lock(&prange->migrate_mutex);
if (svm_range_skip_recover(prange)) {
- amdgpu_gmc_filter_faults_remove(adev, addr, pasid);
+ amdgpu_gmc_filter_faults_remove(node->adev, addr, pasid);
r = 0;
goto out_unlock_range;
}
- timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp;
/* skip duplicate vm fault on different pages of same range */
- if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) {
+ if (ktime_before(timestamp, ktime_add_ns(prange->validate_timestamp,
+ AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING))) {
pr_debug("svms 0x%p [0x%lx %lx] already restored\n",
svms, prange->start, prange->last);
r = 0;
@@ -2674,8 +3110,8 @@ retry_write_locked:
/* __do_munmap removed VMA, return success as we are handling stale
* retry fault.
*/
- vma = find_vma(mm, addr << PAGE_SHIFT);
- if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
+ vma = vma_lookup(mm, addr << PAGE_SHIFT);
+ if (!vma) {
pr_debug("address 0x%llx VMA is removed\n", addr);
r = 0;
goto out_unlock_range;
@@ -2688,7 +3124,7 @@ retry_write_locked:
goto out_unlock_range;
}
- best_loc = svm_range_best_restore_location(prange, adev, &gpuidx);
+ best_loc = svm_range_best_restore_location(prange, node, &gpuidx);
if (best_loc == -1) {
pr_debug("svms %p failed get best restore loc [0x%lx 0x%lx]\n",
svms, prange->start, prange->last);
@@ -2700,34 +3136,51 @@ retry_write_locked:
svms, prange->start, prange->last, best_loc,
prange->actual_loc);
- if (prange->actual_loc != best_loc) {
+ kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr,
+ write_fault, timestamp);
+
+ /* Align migration range start and size to granularity size */
+ size = 1UL << prange->granularity;
+ start = max_t(unsigned long, ALIGN_DOWN(addr, size), prange->start);
+ last = min_t(unsigned long, ALIGN(addr + 1, size) - 1, prange->last);
+ if (prange->actual_loc != 0 || best_loc != 0) {
if (best_loc) {
- r = svm_migrate_to_vram(prange, best_loc, mm);
+ r = svm_migrate_to_vram(prange, best_loc, start, last,
+ mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
if (r) {
pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n",
r, addr);
/* Fallback to system memory if migration to
* VRAM failed
*/
- if (prange->actual_loc)
- r = svm_migrate_vram_to_ram(prange, mm);
+ if (prange->actual_loc && prange->actual_loc != best_loc)
+ r = svm_migrate_vram_to_ram(prange, mm, start, last,
+ KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, NULL);
else
r = 0;
}
} else {
- r = svm_migrate_vram_to_ram(prange, mm);
+ r = svm_migrate_vram_to_ram(prange, mm, start, last,
+ KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, NULL);
}
if (r) {
pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n",
- r, svms, prange->start, prange->last);
- goto out_unlock_range;
+ r, svms, start, last);
+ goto out_migrate_fail;
+ } else {
+ migration = true;
}
}
- r = svm_range_validate_and_map(mm, prange, gpuidx, false, false);
+ r = svm_range_validate_and_map(mm, start, last, prange, gpuidx, false,
+ false, false);
if (r)
pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
- r, svms, prange->start, prange->last);
+ r, svms, start, last);
+
+out_migrate_fail:
+ kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr,
+ migration);
out_unlock_range:
mutex_unlock(&prange->migrate_mutex);
@@ -2735,7 +3188,8 @@ out_unlock_svms:
mutex_unlock(&svms->lock);
mmap_read_unlock(mm);
- svm_range_count_fault(adev, p, gpuidx);
+ if (r != -EAGAIN)
+ svm_range_count_fault(node, p, gpuidx);
mmput(mm);
out:
@@ -2743,18 +3197,79 @@ out:
if (r == -EAGAIN) {
pr_debug("recover vm fault later\n");
- amdgpu_gmc_filter_faults_remove(adev, addr, pasid);
+ amdgpu_gmc_filter_faults_remove(node->adev, addr, pasid);
r = 0;
}
return r;
}
+int
+svm_range_switch_xnack_reserve_mem(struct kfd_process *p, bool xnack_enabled)
+{
+ struct svm_range *prange, *pchild;
+ uint64_t reserved_size = 0;
+ uint64_t size;
+ int r = 0;
+
+ pr_debug("switching xnack from %d to %d\n", p->xnack_enabled, xnack_enabled);
+
+ mutex_lock(&p->svms.lock);
+
+ list_for_each_entry(prange, &p->svms.list, list) {
+ svm_range_lock(prange);
+ list_for_each_entry(pchild, &prange->child_list, child_list) {
+ size = (pchild->last - pchild->start + 1) << PAGE_SHIFT;
+ if (xnack_enabled) {
+ amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
+ } else {
+ r = amdgpu_amdkfd_reserve_mem_limit(NULL, size,
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
+ if (r)
+ goto out_unlock;
+ reserved_size += size;
+ }
+ }
+
+ size = (prange->last - prange->start + 1) << PAGE_SHIFT;
+ if (xnack_enabled) {
+ amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
+ } else {
+ r = amdgpu_amdkfd_reserve_mem_limit(NULL, size,
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
+ if (r)
+ goto out_unlock;
+ reserved_size += size;
+ }
+out_unlock:
+ svm_range_unlock(prange);
+ if (r)
+ break;
+ }
+
+ if (r)
+ amdgpu_amdkfd_unreserve_mem_limit(NULL, reserved_size,
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
+ else
+ /* Change xnack mode must be inside svms lock, to avoid race with
+ * svm_range_deferred_list_work unreserve memory in parallel.
+ */
+ p->xnack_enabled = xnack_enabled;
+
+ mutex_unlock(&p->svms.lock);
+ return r;
+}
+
void svm_range_list_fini(struct kfd_process *p)
{
struct svm_range *prange;
struct svm_range *next;
- pr_debug("pasid 0x%x svms 0x%p\n", p->pasid, &p->svms);
+ pr_debug("process pid %d svms 0x%p\n", p->lead_thread->pid,
+ &p->svms);
+
+ cancel_delayed_work_sync(&p->svms.restore_work);
/* Ensure list work is finished before process is destroyed */
flush_work(&p->svms.deferred_list_work);
@@ -2762,20 +3277,21 @@ void svm_range_list_fini(struct kfd_process *p)
/*
* Ensure no retry fault comes in afterwards, as page fault handler will
* not find kfd process and take mm lock to recover fault.
+ * stop kfd page fault handing, then wait pending page faults got drained
*/
- atomic_inc(&p->svms.drain_pagefaults);
+ atomic_set(&p->svms.drain_pagefaults, 1);
svm_range_drain_retry_fault(&p->svms);
-
list_for_each_entry_safe(prange, next, &p->svms.list, list) {
svm_range_unlink(prange);
svm_range_remove_notifier(prange);
- svm_range_free(prange);
+ svm_range_free(prange, true);
}
mutex_destroy(&p->svms.lock);
- pr_debug("pasid 0x%x svms 0x%p done\n", p->pasid, &p->svms);
+ pr_debug("process pid %d svms 0x%p done\n",
+ p->lead_thread->pid, &p->svms);
}
int svm_range_list_init(struct kfd_process *p)
@@ -2791,12 +3307,19 @@ int svm_range_list_init(struct kfd_process *p)
INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work);
INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work);
INIT_LIST_HEAD(&svms->deferred_range_list);
+ INIT_LIST_HEAD(&svms->criu_svm_metadata_list);
spin_lock_init(&svms->deferred_list_lock);
for (i = 0; i < p->n_pdds; i++)
- if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev))
+ if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev->adev))
bitmap_set(svms->bitmap_supported, i, 1);
+ /* Value of default granularity cannot exceed 0x1B, the
+ * number of pages supported by a 4-level paging table
+ */
+ svms->default_granularity = min_t(u8, amdgpu_svm_default_granularity, 0x1B);
+ pr_debug("Default SVM Granularity to use: %d\n", svms->default_granularity);
+
return 0;
}
@@ -2883,9 +3406,8 @@ svm_range_is_valid(struct kfd_process *p, uint64_t start, uint64_t size)
start <<= PAGE_SHIFT;
end = start + (size << PAGE_SHIFT);
do {
- vma = find_vma(p->mm, start);
- if (!vma || start < vma->vm_start ||
- (vma->vm_flags & device_vma))
+ vma = vma_lookup(p->mm, start);
+ if (!vma || (vma->vm_flags & device_vma))
return -EFAULT;
start = min(end, vma->vm_end);
} while (start < end);
@@ -2895,59 +3417,6 @@ svm_range_is_valid(struct kfd_process *p, uint64_t start, uint64_t size)
}
/**
- * svm_range_add - add svm range and handle overlap
- * @p: the range add to this process svms
- * @start: page size aligned
- * @size: page size aligned
- * @nattr: number of attributes
- * @attrs: array of attributes
- * @update_list: output, the ranges need validate and update GPU mapping
- * @insert_list: output, the ranges need insert to svms
- * @remove_list: output, the ranges are replaced and need remove from svms
- *
- * Check if the virtual address range has overlap with the registered ranges,
- * split the overlapped range, copy and adjust pages address and vram nodes in
- * old and new ranges.
- *
- * Context: Process context, caller must hold svms->lock
- *
- * Return:
- * 0 - OK, otherwise error code
- */
-static int
-svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
- uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
- struct list_head *update_list, struct list_head *insert_list,
- struct list_head *remove_list)
-{
- uint64_t last = start + size - 1UL;
- struct svm_range_list *svms;
- struct svm_range new = {0};
- struct svm_range *prange;
- unsigned long left = 0;
- int r = 0;
-
- pr_debug("svms 0x%p [0x%llx 0x%llx]\n", &p->svms, start, last);
-
- svm_range_apply_attrs(p, &new, nattr, attrs);
-
- svms = &p->svms;
-
- r = svm_range_handle_overlap(svms, &new, start, last, update_list,
- insert_list, remove_list, &left);
- if (r)
- return r;
-
- if (left) {
- prange = svm_range_new(svms, last - left + 1, last);
- list_add(&prange->insert_list, insert_list);
- list_add(&prange->update_list, update_list);
- }
-
- return 0;
-}
-
-/**
* svm_range_best_prefetch_location - decide the best prefetch location
* @prange: svm range structure
*
@@ -2979,8 +3448,7 @@ svm_range_best_prefetch_location(struct svm_range *prange)
DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
uint32_t best_loc = prange->prefetch_loc;
struct kfd_process_device *pdd;
- struct amdgpu_device *bo_adev;
- struct amdgpu_device *adev;
+ struct kfd_node *bo_node;
struct kfd_process *p;
uint32_t gpuidx;
@@ -2989,9 +3457,14 @@ svm_range_best_prefetch_location(struct svm_range *prange)
if (!best_loc || best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED)
goto out;
- bo_adev = svm_range_get_adev_by_id(prange, best_loc);
- if (!bo_adev) {
- WARN_ONCE(1, "failed to get device by id 0x%x\n", best_loc);
+ bo_node = svm_range_get_node_by_id(prange, best_loc);
+ if (!bo_node) {
+ WARN_ONCE(1, "failed to get valid kfd node at id%x\n", best_loc);
+ best_loc = 0;
+ goto out;
+ }
+
+ if (bo_node->adev->apu_prefer_gtt) {
best_loc = 0;
goto out;
}
@@ -3008,12 +3481,11 @@ svm_range_best_prefetch_location(struct svm_range *prange)
pr_debug("failed to get device by idx 0x%x\n", gpuidx);
continue;
}
- adev = (struct amdgpu_device *)pdd->dev->kgd;
- if (adev == bo_adev)
+ if (pdd->dev->adev == bo_node->adev)
continue;
- if (!amdgpu_xgmi_same_hive(adev, bo_adev)) {
+ if (!svm_nodes_in_same_hive(pdd->dev, bo_node)) {
best_loc = 0;
break;
}
@@ -3027,28 +3499,6 @@ out:
return best_loc;
}
-/* FIXME: This is a workaround for page locking bug when some pages are
- * invalid during migration to VRAM
- */
-void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm,
- void *owner)
-{
- struct hmm_range *hmm_range;
- int r;
-
- if (prange->validated_once)
- return;
-
- r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
- prange->start << PAGE_SHIFT,
- prange->npages, &hmm_range,
- false, true, owner);
- if (!r) {
- amdgpu_hmm_range_get_pages_done(hmm_range);
- prange->validated_once = true;
- }
-}
-
/* svm_range_trigger_migration - start page migration if prefetch loc changed
* @mm: current process mm_struct
* @prange: svm range structure
@@ -3083,31 +3533,39 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
*migrated = false;
best_loc = svm_range_best_prefetch_location(prange);
- if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
- best_loc == prange->actual_loc)
+ /* when best_loc is a gpu node and same as prange->actual_loc
+ * we still need do migration as prange->actual_loc !=0 does
+ * not mean all pages in prange are vram. hmm migrate will pick
+ * up right pages during migration.
+ */
+ if ((best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED) ||
+ (best_loc == 0 && prange->actual_loc == 0))
return 0;
if (!best_loc) {
- r = svm_migrate_vram_to_ram(prange, mm);
+ r = svm_migrate_vram_to_ram(prange, mm, prange->start, prange->last,
+ KFD_MIGRATE_TRIGGER_PREFETCH, NULL);
*migrated = !r;
return r;
}
- r = svm_migrate_to_vram(prange, best_loc, mm);
+ r = svm_migrate_to_vram(prange, best_loc, prange->start, prange->last,
+ mm, KFD_MIGRATE_TRIGGER_PREFETCH);
*migrated = !r;
- return r;
+ return 0;
}
int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence)
{
- if (!fence)
- return -EINVAL;
-
- if (dma_fence_is_signaled(&fence->base))
- return 0;
-
- if (fence->svm_bo) {
+ /* Dereferencing fence->svm_bo is safe here because the fence hasn't
+ * signaled yet and we're under the protection of the fence->lock.
+ * After the fence is signaled in svm_range_bo_release, we cannot get
+ * here any more.
+ *
+ * Reference is dropped in svm_range_evict_svm_bo_worker.
+ */
+ if (svm_bo_ref_unless_zero(fence->svm_bo)) {
WRITE_ONCE(fence->svm_bo->evicting, 1);
schedule_work(&fence->svm_bo->eviction_work);
}
@@ -3118,24 +3576,21 @@ int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence)
static void svm_range_evict_svm_bo_worker(struct work_struct *work)
{
struct svm_range_bo *svm_bo;
- struct kfd_process *p;
struct mm_struct *mm;
+ int r = 0;
svm_bo = container_of(work, struct svm_range_bo, eviction_work);
- if (!svm_bo_ref_unless_zero(svm_bo))
- return; /* svm_bo was freed while eviction was pending */
- /* svm_range_bo_release destroys this worker thread. So during
- * the lifetime of this thread, kfd_process and mm will be valid.
- */
- p = container_of(svm_bo->svms, struct kfd_process, svms);
- mm = p->mm;
- if (!mm)
+ if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
+ mm = svm_bo->eviction_fence->mm;
+ } else {
+ svm_range_bo_unref(svm_bo);
return;
+ }
mmap_read_lock(mm);
spin_lock(&svm_bo->list_lock);
- while (!list_empty(&svm_bo->range_list)) {
+ while (!list_empty(&svm_bo->range_list) && !r) {
struct svm_range *prange =
list_first_entry(&svm_bo->range_list,
struct svm_range, svm_bo_list);
@@ -3149,45 +3604,58 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
mutex_lock(&prange->migrate_mutex);
do {
- svm_migrate_vram_to_ram(prange,
- svm_bo->eviction_fence->mm);
- } while (prange->actual_loc && --retries);
- WARN(prange->actual_loc, "Migration failed during eviction");
+ /* migrate all vram pages in this prange to sys ram
+ * after that prange->actual_loc should be zero
+ */
+ r = svm_migrate_vram_to_ram(prange, mm,
+ prange->start, prange->last,
+ KFD_MIGRATE_TRIGGER_TTM_EVICTION, NULL);
+ } while (!r && prange->actual_loc && --retries);
- mutex_lock(&prange->lock);
- prange->svm_bo = NULL;
- mutex_unlock(&prange->lock);
+ if (!r && prange->actual_loc)
+ pr_info_once("Migration failed during eviction");
+ if (!prange->actual_loc) {
+ mutex_lock(&prange->lock);
+ prange->svm_bo = NULL;
+ mutex_unlock(&prange->lock);
+ }
mutex_unlock(&prange->migrate_mutex);
spin_lock(&svm_bo->list_lock);
}
spin_unlock(&svm_bo->list_lock);
mmap_read_unlock(mm);
+ mmput(mm);
dma_fence_signal(&svm_bo->eviction_fence->base);
+
/* This is the last reference to svm_bo, after svm_range_vram_node_free
* has been called in svm_migrate_vram_to_ram
*/
- WARN_ONCE(kref_read(&svm_bo->kref) != 1, "This was not the last reference\n");
+ WARN_ONCE(!r && kref_read(&svm_bo->kref) != 1, "This was not the last reference\n");
svm_range_bo_unref(svm_bo);
}
static int
-svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
- uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
+ uint64_t start, uint64_t size, uint32_t nattr,
+ struct kfd_ioctl_svm_attribute *attrs)
{
- struct mm_struct *mm = current->mm;
+ struct amdkfd_process_info *process_info = p->kgd_process_info;
struct list_head update_list;
struct list_head insert_list;
struct list_head remove_list;
+ struct list_head remap_list;
struct svm_range_list *svms;
struct svm_range *prange;
struct svm_range *next;
- int r = 0;
+ bool update_mapping = false;
+ bool flush_tlb;
+ int r, ret = 0;
- pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
- p->pasid, &p->svms, start, start + size - 1, size);
+ pr_debug("process pid %d svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
+ p->lead_thread->pid, &p->svms, start, start + size - 1, size);
r = svm_range_check_attr(p, nattr, attrs);
if (r)
@@ -3195,6 +3663,8 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
svms = &p->svms;
+ mutex_lock(&process_info->lock);
+
svm_range_list_lock_and_flush_work(svms, mm);
r = svm_range_is_valid(p, start, size);
@@ -3208,29 +3678,30 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
/* Add new range and split existing ranges as needed */
r = svm_range_add(p, start, size, nattr, attrs, &update_list,
- &insert_list, &remove_list);
+ &insert_list, &remove_list, &remap_list);
if (r) {
mutex_unlock(&svms->lock);
mmap_write_unlock(mm);
goto out;
}
/* Apply changes as a transaction */
- list_for_each_entry_safe(prange, next, &insert_list, insert_list) {
+ list_for_each_entry_safe(prange, next, &insert_list, list) {
svm_range_add_to_svms(prange);
svm_range_add_notifier_locked(mm, prange);
}
list_for_each_entry(prange, &update_list, update_list) {
- svm_range_apply_attrs(p, prange, nattr, attrs);
+ svm_range_apply_attrs(p, prange, nattr, attrs, &update_mapping);
/* TODO: unmap ranges from GPU that lost access */
}
- list_for_each_entry_safe(prange, next, &remove_list,
- remove_list) {
+ update_mapping |= !p->xnack_enabled && !list_empty(&remap_list);
+
+ list_for_each_entry_safe(prange, next, &remove_list, update_list) {
pr_debug("unlink old 0x%p prange 0x%p [0x%lx 0x%lx]\n",
prange->svms, prange, prange->start,
prange->last);
svm_range_unlink(prange);
svm_range_remove_notifier(prange);
- svm_range_free(prange);
+ svm_range_free(prange, false);
}
mmap_write_downgrade(mm);
@@ -3248,37 +3719,62 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
if (r)
goto out_unlock_range;
- if (migrated && !p->xnack_enabled) {
+ if (migrated && (!p->xnack_enabled ||
+ (prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) &&
+ prange->mapped_to_gpu) {
pr_debug("restore_work will update mappings of GPUs\n");
mutex_unlock(&prange->migrate_mutex);
continue;
}
- r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
- true, true);
+ if (!migrated && !update_mapping) {
+ mutex_unlock(&prange->migrate_mutex);
+ continue;
+ }
+
+ flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu;
+
+ r = svm_range_validate_and_map(mm, prange->start, prange->last, prange,
+ MAX_GPU_INSTANCE, true, true, flush_tlb);
if (r)
pr_debug("failed %d to map svm range\n", r);
out_unlock_range:
mutex_unlock(&prange->migrate_mutex);
if (r)
- break;
+ ret = r;
}
- svm_range_debug_dump(svms);
+ list_for_each_entry(prange, &remap_list, update_list) {
+ pr_debug("Remapping prange 0x%p [0x%lx 0x%lx]\n",
+ prange, prange->start, prange->last);
+ mutex_lock(&prange->migrate_mutex);
+ r = svm_range_validate_and_map(mm, prange->start, prange->last, prange,
+ MAX_GPU_INSTANCE, true, true, prange->mapped_to_gpu);
+ if (r)
+ pr_debug("failed %d on remap svm range\n", r);
+ mutex_unlock(&prange->migrate_mutex);
+ if (r)
+ ret = r;
+ }
+
+ dynamic_svm_range_dump(svms);
mutex_unlock(&svms->lock);
mmap_read_unlock(mm);
out:
- pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid,
- &p->svms, start, start + size - 1, r);
+ mutex_unlock(&process_info->lock);
- return r;
+ pr_debug("process pid %d svms 0x%p [0x%llx 0x%llx] done, r=%d\n",
+ p->lead_thread->pid, &p->svms, start, start + size - 1, r);
+
+ return ret ? ret : r;
}
static int
-svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size,
- uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+svm_range_get_attr(struct kfd_process *p, struct mm_struct *mm,
+ uint64_t start, uint64_t size, uint32_t nattr,
+ struct kfd_ioctl_svm_attribute *attrs)
{
DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
@@ -3288,7 +3784,6 @@ svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size,
bool get_accessible = false;
bool get_flags = false;
uint64_t last = start + size - 1UL;
- struct mm_struct *mm = current->mm;
uint8_t granularity = 0xff;
struct interval_tree_node *node;
struct svm_range_list *svms;
@@ -3354,7 +3849,7 @@ svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size,
node = interval_tree_iter_first(&svms->objects, start, last);
if (!node) {
pr_debug("range attrs not found return default values\n");
- svm_range_set_default_attributes(&location, &prefetch_loc,
+ svm_range_set_default_attributes(svms, &location, &prefetch_loc,
&granularity, &flags_and);
flags_or = flags_and;
if (p->xnack_enabled)
@@ -3453,10 +3948,316 @@ fill_values:
return 0;
}
+int kfd_criu_resume_svm(struct kfd_process *p)
+{
+ struct kfd_ioctl_svm_attribute *set_attr_new, *set_attr = NULL;
+ int nattr_common = 4, nattr_accessibility = 1;
+ struct criu_svm_metadata *criu_svm_md = NULL;
+ struct svm_range_list *svms = &p->svms;
+ struct criu_svm_metadata *next = NULL;
+ uint32_t set_flags = 0xffffffff;
+ int i, j, num_attrs, ret = 0;
+ uint64_t set_attr_size;
+ struct mm_struct *mm;
+
+ if (list_empty(&svms->criu_svm_metadata_list)) {
+ pr_debug("No SVM data from CRIU restore stage 2\n");
+ return ret;
+ }
+
+ mm = get_task_mm(p->lead_thread);
+ if (!mm) {
+ pr_err("failed to get mm for the target process\n");
+ return -ESRCH;
+ }
+
+ num_attrs = nattr_common + (nattr_accessibility * p->n_pdds);
+
+ i = j = 0;
+ list_for_each_entry(criu_svm_md, &svms->criu_svm_metadata_list, list) {
+ pr_debug("criu_svm_md[%d]\n\tstart: 0x%llx size: 0x%llx (npages)\n",
+ i, criu_svm_md->data.start_addr, criu_svm_md->data.size);
+
+ for (j = 0; j < num_attrs; j++) {
+ pr_debug("\ncriu_svm_md[%d]->attrs[%d].type : 0x%x\ncriu_svm_md[%d]->attrs[%d].value : 0x%x\n",
+ i, j, criu_svm_md->data.attrs[j].type,
+ i, j, criu_svm_md->data.attrs[j].value);
+ switch (criu_svm_md->data.attrs[j].type) {
+ /* During Checkpoint operation, the query for
+ * KFD_IOCTL_SVM_ATTR_PREFETCH_LOC attribute might
+ * return KFD_IOCTL_SVM_LOCATION_UNDEFINED if they were
+ * not used by the range which was checkpointed. Care
+ * must be taken to not restore with an invalid value
+ * otherwise the gpuidx value will be invalid and
+ * set_attr would eventually fail so just replace those
+ * with another dummy attribute such as
+ * KFD_IOCTL_SVM_ATTR_SET_FLAGS.
+ */
+ case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
+ if (criu_svm_md->data.attrs[j].value ==
+ KFD_IOCTL_SVM_LOCATION_UNDEFINED) {
+ criu_svm_md->data.attrs[j].type =
+ KFD_IOCTL_SVM_ATTR_SET_FLAGS;
+ criu_svm_md->data.attrs[j].value = 0;
+ }
+ break;
+ case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+ set_flags = criu_svm_md->data.attrs[j].value;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* CLR_FLAGS is not available via get_attr during checkpoint but
+ * it needs to be inserted before restoring the ranges so
+ * allocate extra space for it before calling set_attr
+ */
+ set_attr_size = sizeof(struct kfd_ioctl_svm_attribute) *
+ (num_attrs + 1);
+ set_attr_new = krealloc(set_attr, set_attr_size,
+ GFP_KERNEL);
+ if (!set_attr_new) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+ set_attr = set_attr_new;
+
+ memcpy(set_attr, criu_svm_md->data.attrs, num_attrs *
+ sizeof(struct kfd_ioctl_svm_attribute));
+ set_attr[num_attrs].type = KFD_IOCTL_SVM_ATTR_CLR_FLAGS;
+ set_attr[num_attrs].value = ~set_flags;
+
+ ret = svm_range_set_attr(p, mm, criu_svm_md->data.start_addr,
+ criu_svm_md->data.size, num_attrs + 1,
+ set_attr);
+ if (ret) {
+ pr_err("CRIU: failed to set range attributes\n");
+ goto exit;
+ }
+
+ i++;
+ }
+exit:
+ kfree(set_attr);
+ list_for_each_entry_safe(criu_svm_md, next, &svms->criu_svm_metadata_list, list) {
+ pr_debug("freeing criu_svm_md[]\n\tstart: 0x%llx\n",
+ criu_svm_md->data.start_addr);
+ kfree(criu_svm_md);
+ }
+
+ mmput(mm);
+ return ret;
+
+}
+
+int kfd_criu_restore_svm(struct kfd_process *p,
+ uint8_t __user *user_priv_ptr,
+ uint64_t *priv_data_offset,
+ uint64_t max_priv_data_size)
+{
+ uint64_t svm_priv_data_size, svm_object_md_size, svm_attrs_size;
+ int nattr_common = 4, nattr_accessibility = 1;
+ struct criu_svm_metadata *criu_svm_md = NULL;
+ struct svm_range_list *svms = &p->svms;
+ uint32_t num_devices;
+ int ret = 0;
+
+ num_devices = p->n_pdds;
+ /* Handle one SVM range object at a time, also the number of gpus are
+ * assumed to be same on the restore node, checking must be done while
+ * evaluating the topology earlier
+ */
+
+ svm_attrs_size = sizeof(struct kfd_ioctl_svm_attribute) *
+ (nattr_common + nattr_accessibility * num_devices);
+ svm_object_md_size = sizeof(struct criu_svm_metadata) + svm_attrs_size;
+
+ svm_priv_data_size = sizeof(struct kfd_criu_svm_range_priv_data) +
+ svm_attrs_size;
+
+ criu_svm_md = kzalloc(svm_object_md_size, GFP_KERNEL);
+ if (!criu_svm_md) {
+ pr_err("failed to allocate memory to store svm metadata\n");
+ return -ENOMEM;
+ }
+ if (*priv_data_offset + svm_priv_data_size > max_priv_data_size) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ ret = copy_from_user(&criu_svm_md->data, user_priv_ptr + *priv_data_offset,
+ svm_priv_data_size);
+ if (ret) {
+ ret = -EFAULT;
+ goto exit;
+ }
+ *priv_data_offset += svm_priv_data_size;
+
+ list_add_tail(&criu_svm_md->list, &svms->criu_svm_metadata_list);
+
+ return 0;
+
+
+exit:
+ kfree(criu_svm_md);
+ return ret;
+}
+
+void svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
+ uint64_t *svm_priv_data_size)
+{
+ uint64_t total_size, accessibility_size, common_attr_size;
+ int nattr_common = 4, nattr_accessibility = 1;
+ int num_devices = p->n_pdds;
+ struct svm_range_list *svms;
+ struct svm_range *prange;
+ uint32_t count = 0;
+
+ *svm_priv_data_size = 0;
+
+ svms = &p->svms;
+
+ mutex_lock(&svms->lock);
+ list_for_each_entry(prange, &svms->list, list) {
+ pr_debug("prange: 0x%p start: 0x%lx\t npages: 0x%llx\t end: 0x%llx\n",
+ prange, prange->start, prange->npages,
+ prange->start + prange->npages - 1);
+ count++;
+ }
+ mutex_unlock(&svms->lock);
+
+ *num_svm_ranges = count;
+ /* Only the accessbility attributes need to be queried for all the gpus
+ * individually, remaining ones are spanned across the entire process
+ * regardless of the various gpu nodes. Of the remaining attributes,
+ * KFD_IOCTL_SVM_ATTR_CLR_FLAGS need not be saved.
+ *
+ * KFD_IOCTL_SVM_ATTR_PREFERRED_LOC
+ * KFD_IOCTL_SVM_ATTR_PREFETCH_LOC
+ * KFD_IOCTL_SVM_ATTR_SET_FLAGS
+ * KFD_IOCTL_SVM_ATTR_GRANULARITY
+ *
+ * ** ACCESSBILITY ATTRIBUTES **
+ * (Considered as one, type is altered during query, value is gpuid)
+ * KFD_IOCTL_SVM_ATTR_ACCESS
+ * KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE
+ * KFD_IOCTL_SVM_ATTR_NO_ACCESS
+ */
+ if (*num_svm_ranges > 0) {
+ common_attr_size = sizeof(struct kfd_ioctl_svm_attribute) *
+ nattr_common;
+ accessibility_size = sizeof(struct kfd_ioctl_svm_attribute) *
+ nattr_accessibility * num_devices;
+
+ total_size = sizeof(struct kfd_criu_svm_range_priv_data) +
+ common_attr_size + accessibility_size;
+
+ *svm_priv_data_size = *num_svm_ranges * total_size;
+ }
+
+ pr_debug("num_svm_ranges %u total_priv_size %llu\n", *num_svm_ranges,
+ *svm_priv_data_size);
+}
+
+int kfd_criu_checkpoint_svm(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_data_offset)
+{
+ struct kfd_criu_svm_range_priv_data *svm_priv = NULL;
+ struct kfd_ioctl_svm_attribute *query_attr = NULL;
+ uint64_t svm_priv_data_size, query_attr_size = 0;
+ int index, nattr_common = 4, ret = 0;
+ struct svm_range_list *svms;
+ int num_devices = p->n_pdds;
+ struct svm_range *prange;
+ struct mm_struct *mm;
+
+ svms = &p->svms;
+
+ mm = get_task_mm(p->lead_thread);
+ if (!mm) {
+ pr_err("failed to get mm for the target process\n");
+ return -ESRCH;
+ }
+
+ query_attr_size = sizeof(struct kfd_ioctl_svm_attribute) *
+ (nattr_common + num_devices);
+
+ query_attr = kzalloc(query_attr_size, GFP_KERNEL);
+ if (!query_attr) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ query_attr[0].type = KFD_IOCTL_SVM_ATTR_PREFERRED_LOC;
+ query_attr[1].type = KFD_IOCTL_SVM_ATTR_PREFETCH_LOC;
+ query_attr[2].type = KFD_IOCTL_SVM_ATTR_SET_FLAGS;
+ query_attr[3].type = KFD_IOCTL_SVM_ATTR_GRANULARITY;
+
+ for (index = 0; index < num_devices; index++) {
+ struct kfd_process_device *pdd = p->pdds[index];
+
+ query_attr[index + nattr_common].type =
+ KFD_IOCTL_SVM_ATTR_ACCESS;
+ query_attr[index + nattr_common].value = pdd->user_gpu_id;
+ }
+
+ svm_priv_data_size = sizeof(*svm_priv) + query_attr_size;
+
+ svm_priv = kzalloc(svm_priv_data_size, GFP_KERNEL);
+ if (!svm_priv) {
+ ret = -ENOMEM;
+ goto exit_query;
+ }
+
+ index = 0;
+ list_for_each_entry(prange, &svms->list, list) {
+
+ svm_priv->object_type = KFD_CRIU_OBJECT_TYPE_SVM_RANGE;
+ svm_priv->start_addr = prange->start;
+ svm_priv->size = prange->npages;
+ memcpy(&svm_priv->attrs, query_attr, query_attr_size);
+ pr_debug("CRIU: prange: 0x%p start: 0x%lx\t npages: 0x%llx end: 0x%llx\t size: 0x%llx\n",
+ prange, prange->start, prange->npages,
+ prange->start + prange->npages - 1,
+ prange->npages * PAGE_SIZE);
+
+ ret = svm_range_get_attr(p, mm, svm_priv->start_addr,
+ svm_priv->size,
+ (nattr_common + num_devices),
+ svm_priv->attrs);
+ if (ret) {
+ pr_err("CRIU: failed to obtain range attributes\n");
+ goto exit_priv;
+ }
+
+ if (copy_to_user(user_priv_data + *priv_data_offset, svm_priv,
+ svm_priv_data_size)) {
+ pr_err("Failed to copy svm priv to user\n");
+ ret = -EFAULT;
+ goto exit_priv;
+ }
+
+ *priv_data_offset += svm_priv_data_size;
+
+ }
+
+
+exit_priv:
+ kfree(svm_priv);
+exit_query:
+ kfree(query_attr);
+exit:
+ mmput(mm);
+ return ret;
+}
+
int
svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
uint64_t size, uint32_t nattrs, struct kfd_ioctl_svm_attribute *attrs)
{
+ struct mm_struct *mm = current->mm;
int r;
start >>= PAGE_SHIFT;
@@ -3464,13 +4265,13 @@ svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
switch (op) {
case KFD_IOCTL_SVM_OP_SET_ATTR:
- r = svm_range_set_attr(p, start, size, nattrs, attrs);
+ r = svm_range_set_attr(p, mm, start, size, nattrs, attrs);
break;
case KFD_IOCTL_SVM_OP_GET_ATTR:
- r = svm_range_get_attr(p, start, size, nattrs, attrs);
+ r = svm_range_get_attr(p, mm, start, size, nattrs, attrs);
break;
default:
- r = EINVAL;
+ r = -EINVAL;
break;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 6dc91c33e80f..a63dfc95b602 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -31,7 +31,6 @@
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/sched/mm.h>
-#include <linux/hmm.h>
#include "amdgpu.h"
#include "kfd_priv.h"
@@ -46,8 +45,9 @@ struct svm_range_bo {
spinlock_t list_lock;
struct amdgpu_amdkfd_fence *eviction_fence;
struct work_struct eviction_work;
- struct svm_range_list *svms;
uint32_t evicting;
+ struct work_struct release_work;
+ struct kfd_node *node;
};
enum svm_work_list_ops {
@@ -75,10 +75,9 @@ struct svm_work_list_item {
* aligned, page size is (last - start + 1)
* @list: link list node, used to scan all ranges of svms
* @update_list:link list node used to add to update_list
- * @remove_list:link list node used to add to remove list
- * @insert_list:link list node used to add to insert list
* @mapping: bo_va mapping structure to create and update GPU page table
* @npages: number of pages
+ * @vram_pages: vram pages number in this svm_range
* @dma_addr: dma mapping address on each GPU for system memory physical page
* @ttm_res: vram ttm resource map
* @offset: range start offset within mm_nodes
@@ -89,7 +88,9 @@ struct svm_work_list_item {
* @flags: flags defined as KFD_IOCTL_SVM_FLAG_*
* @perferred_loc: perferred location, 0 for CPU, or GPU id
* @perfetch_loc: last prefetch location, 0 for CPU, or GPU id
- * @actual_loc: the actual location, 0 for CPU, or GPU id
+ * @actual_loc: this svm_range location. 0: all pages are from sys ram;
+ * GPU id: this svm_range may include vram pages from GPU with
+ * id actual_loc.
* @granularity:migration granularity, log2 num pages
* @invalid: not 0 means cpu page table is invalidated
* @validate_timestamp: system timestamp when range is validated
@@ -112,9 +113,8 @@ struct svm_range {
struct interval_tree_node it_node;
struct list_head list;
struct list_head update_list;
- struct list_head remove_list;
- struct list_head insert_list;
uint64_t npages;
+ uint64_t vram_pages;
dma_addr_t *dma_addr[MAX_GPU_INSTANCE];
struct ttm_resource *ttm_res;
uint64_t offset;
@@ -128,14 +128,15 @@ struct svm_range {
uint32_t actual_loc;
uint8_t granularity;
atomic_t invalid;
- uint64_t validate_timestamp;
+ ktime_t validate_timestamp;
struct mmu_interval_notifier notifier;
struct svm_work_list_item work_item;
struct list_head deferred_list;
struct list_head child_list;
DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
- bool validated_once;
+ bool mapped_to_gpu;
+ atomic_t queue_refcount;
};
static inline void svm_range_lock(struct svm_range *prange)
@@ -166,36 +167,47 @@ int svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
struct svm_range *svm_range_from_addr(struct svm_range_list *svms,
unsigned long addr,
struct svm_range **parent);
-struct amdgpu_device *svm_range_get_adev_by_id(struct svm_range *prange,
- uint32_t id);
-int svm_range_vram_node_new(struct amdgpu_device *adev,
- struct svm_range *prange, bool clear);
+struct kfd_node *svm_range_get_node_by_id(struct svm_range *prange,
+ uint32_t gpu_id);
+int svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
+ bool clear);
void svm_range_vram_node_free(struct svm_range *prange);
-int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
- unsigned long addr, struct svm_range *parent,
- struct svm_range *prange);
-int svm_range_restore_pages(struct amdgpu_device *adev,
- unsigned int pasid, uint64_t addr, bool write_fault);
+int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
+ uint32_t vmid, uint32_t node_id, uint64_t addr, uint64_t ts,
+ bool write_fault);
int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence);
void svm_range_add_list_work(struct svm_range_list *svms,
struct svm_range *prange, struct mm_struct *mm,
enum svm_work_list_ops op);
void schedule_deferred_list_work(struct svm_range_list *svms);
-void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
+void svm_range_dma_unmap_dev(struct device *dev, dma_addr_t *dma_addr,
unsigned long offset, unsigned long npages);
-void svm_range_free_dma_mappings(struct svm_range *prange);
-void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm,
- void *owner);
+void svm_range_dma_unmap(struct svm_range *prange);
+void svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
+ uint64_t *svm_priv_data_size);
+int kfd_criu_checkpoint_svm(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_offset);
+int kfd_criu_restore_svm(struct kfd_process *p,
+ uint8_t __user *user_priv_ptr,
+ uint64_t *priv_data_offset,
+ uint64_t max_priv_data_size);
+int kfd_criu_resume_svm(struct kfd_process *p);
struct kfd_process_device *
-svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev);
+svm_range_get_pdd_by_node(struct svm_range *prange, struct kfd_node *node);
void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_struct *mm);
/* SVM API and HMM page migration work together, device memory type
* is initialized to not 0 when page migration register device memory.
*/
-#define KFD_IS_SVM_API_SUPPORTED(dev) ((dev)->pgmap.type != 0)
+#define KFD_IS_SVM_API_SUPPORTED(adev) ((adev)->kfd.pgmap.type != 0 ||\
+ ((adev)->apu_prefer_gtt))
+
+void svm_range_bo_unref_async(struct svm_range_bo *svm_bo);
+
+void svm_range_set_max_pages(struct amdgpu_device *adev);
+int svm_range_switch_xnack_reserve_mem(struct kfd_process *p, bool xnack_enabled);
-void svm_range_bo_unref(struct svm_range_bo *svm_bo);
#else
struct kfd_process;
@@ -210,8 +222,9 @@ static inline void svm_range_list_fini(struct kfd_process *p)
}
static inline int svm_range_restore_pages(struct amdgpu_device *adev,
- unsigned int pasid, uint64_t addr,
- bool write_fault)
+ unsigned int pasid,
+ uint32_t client_id, uint32_t node_id,
+ uint64_t addr, uint64_t ts, bool write_fault)
{
return -EFAULT;
}
@@ -223,6 +236,38 @@ static inline int svm_range_schedule_evict_svm_bo(
return -EINVAL;
}
+static inline void svm_range_get_info(struct kfd_process *p,
+ uint32_t *num_svm_ranges,
+ uint64_t *svm_priv_data_size)
+{
+ *num_svm_ranges = 0;
+ *svm_priv_data_size = 0;
+}
+
+static inline int kfd_criu_checkpoint_svm(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_offset)
+{
+ return 0;
+}
+
+static inline int kfd_criu_restore_svm(struct kfd_process *p,
+ uint8_t __user *user_priv_ptr,
+ uint64_t *priv_data_offset,
+ uint64_t max_priv_data_size)
+{
+ return -EINVAL;
+}
+
+static inline int kfd_criu_resume_svm(struct kfd_process *p)
+{
+ return 0;
+}
+
+static inline void svm_range_set_max_pages(struct amdgpu_device *adev)
+{
+}
+
#define KFD_IS_SVM_API_SUPPORTED(dev) false
#endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index dd593ad0614a..811636af14ea 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -30,37 +31,49 @@
#include <linux/log2.h>
#include <linux/dmi.h>
#include <linux/atomic.h>
+#include <linux/crc16.h>
#include "kfd_priv.h"
#include "kfd_crat.h"
#include "kfd_topology.h"
#include "kfd_device_queue_manager.h"
-#include "kfd_iommu.h"
#include "kfd_svm.h"
+#include "kfd_debug.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_ras.h"
+#include "amdgpu.h"
/* topology_device_list - Master list of all topology devices */
static struct list_head topology_device_list;
static struct kfd_system_properties sys_props;
static DECLARE_RWSEM(topology_lock);
-static atomic_t topology_crat_proximity_domain;
+static uint32_t topology_crat_proximity_domain;
-struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
+struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock(
uint32_t proximity_domain)
{
struct kfd_topology_device *top_dev;
struct kfd_topology_device *device = NULL;
- down_read(&topology_lock);
-
list_for_each_entry(top_dev, &topology_device_list, list)
if (top_dev->proximity_domain == proximity_domain) {
device = top_dev;
break;
}
+ return device;
+}
+
+struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
+ uint32_t proximity_domain)
+{
+ struct kfd_topology_device *device = NULL;
+
+ down_read(&topology_lock);
+
+ device = kfd_topology_device_by_proximity_domain_no_lock(
+ proximity_domain);
up_read(&topology_lock);
return device;
@@ -84,7 +97,7 @@ struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id)
return ret;
}
-struct kfd_dev *kfd_device_by_id(uint32_t gpu_id)
+struct kfd_node *kfd_device_by_id(uint32_t gpu_id)
{
struct kfd_topology_device *top_dev;
@@ -95,48 +108,13 @@ struct kfd_dev *kfd_device_by_id(uint32_t gpu_id)
return top_dev->gpu;
}
-struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)
-{
- struct kfd_topology_device *top_dev;
- struct kfd_dev *device = NULL;
-
- down_read(&topology_lock);
-
- list_for_each_entry(top_dev, &topology_device_list, list)
- if (top_dev->gpu && top_dev->gpu->pdev == pdev) {
- device = top_dev->gpu;
- break;
- }
-
- up_read(&topology_lock);
-
- return device;
-}
-
-struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd)
-{
- struct kfd_topology_device *top_dev;
- struct kfd_dev *device = NULL;
-
- down_read(&topology_lock);
-
- list_for_each_entry(top_dev, &topology_device_list, list)
- if (top_dev->gpu && top_dev->gpu->kgd == kgd) {
- device = top_dev->gpu;
- break;
- }
-
- up_read(&topology_lock);
-
- return device;
-}
-
/* Called with write topology_lock acquired */
static void kfd_release_topology_device(struct kfd_topology_device *dev)
{
struct kfd_mem_properties *mem;
struct kfd_cache_properties *cache;
struct kfd_iolink_properties *iolink;
+ struct kfd_iolink_properties *p2plink;
struct kfd_perf_properties *perf;
list_del(&dev->list);
@@ -162,6 +140,13 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev)
kfree(iolink);
}
+ while (dev->p2p_link_props.next != &dev->p2p_link_props) {
+ p2plink = container_of(dev->p2p_link_props.next,
+ struct kfd_iolink_properties, list);
+ list_del(&p2plink->list);
+ kfree(p2plink);
+ }
+
while (dev->perf_props.next != &dev->perf_props) {
perf = container_of(dev->perf_props.next,
struct kfd_perf_properties, list);
@@ -203,6 +188,7 @@ struct kfd_topology_device *kfd_create_topology_device(
INIT_LIST_HEAD(&dev->mem_props);
INIT_LIST_HEAD(&dev->cache_props);
INIT_LIST_HEAD(&dev->io_link_props);
+ INIT_LIST_HEAD(&dev->p2p_link_props);
INIT_LIST_HEAD(&dev->perf_props);
list_add_tail(&dev->list, device_list);
@@ -257,7 +243,7 @@ static const struct sysfs_ops sysprops_ops = {
.show = sysprops_show,
};
-static struct kobj_type sysprops_type = {
+static const struct kobj_type sysprops_type = {
.release = kfd_topology_kobj_release,
.sysfs_ops = &sysprops_ops,
};
@@ -288,6 +274,8 @@ static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr,
iolink->max_bandwidth);
sysfs_show_32bit_prop(buffer, offs, "recommended_transfer_size",
iolink->rec_transfer_size);
+ sysfs_show_32bit_prop(buffer, offs, "recommended_sdma_engine_id_mask",
+ iolink->rec_sdma_eng_id_mask);
sysfs_show_32bit_prop(buffer, offs, "flags", iolink->flags);
return offs;
@@ -297,7 +285,7 @@ static const struct sysfs_ops iolink_ops = {
.show = iolink_show,
};
-static struct kobj_type iolink_type = {
+static const struct kobj_type iolink_type = {
.release = kfd_topology_kobj_release,
.sysfs_ops = &iolink_ops,
};
@@ -329,7 +317,7 @@ static const struct sysfs_ops mem_ops = {
.show = mem_show,
};
-static struct kobj_type mem_type = {
+static const struct kobj_type mem_type = {
.release = kfd_topology_kobj_release,
.sysfs_ops = &mem_ops,
};
@@ -343,7 +331,6 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
/* Making sure that the buffer is an empty string */
buffer[0] = 0;
-
cache = container_of(attr, struct kfd_cache_properties, attr);
if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu))
return -EPERM;
@@ -358,12 +345,13 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc);
sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency);
sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type);
+
offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map ");
- for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++)
+ for (i = 0; i < cache->sibling_map_size; i++)
for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++)
/* Check each bit */
offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,",
- (cache->sibling_map[i] >> j) & 1);
+ (cache->sibling_map[i] >> j) & 1);
/* Replace the last "," with end of line */
buffer[offs-1] = '\n';
@@ -374,7 +362,7 @@ static const struct sysfs_ops cache_ops = {
.show = kfd_cache_show,
};
-static struct kobj_type cache_type = {
+static const struct kobj_type cache_type = {
.release = kfd_topology_kobj_release,
.sysfs_ops = &cache_ops,
};
@@ -454,6 +442,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->node_props.caches_count);
sysfs_show_32bit_prop(buffer, offs, "io_links_count",
dev->node_props.io_links_count);
+ sysfs_show_32bit_prop(buffer, offs, "p2p_links_count",
+ dev->node_props.p2p_links_count);
sysfs_show_32bit_prop(buffer, offs, "cpu_core_id_base",
dev->node_props.cpu_core_id_base);
sysfs_show_32bit_prop(buffer, offs, "simd_id_base",
@@ -469,7 +459,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
sysfs_show_32bit_prop(buffer, offs, "wave_front_size",
dev->node_props.wave_front_size);
sysfs_show_32bit_prop(buffer, offs, "array_count",
- dev->node_props.array_count);
+ dev->gpu ? (dev->node_props.array_count *
+ NUM_XCC(dev->gpu->xcc_mask)) : 0);
sysfs_show_32bit_prop(buffer, offs, "simd_arrays_per_engine",
dev->node_props.simd_arrays_per_engine);
sysfs_show_32bit_prop(buffer, offs, "cu_per_simd_array",
@@ -503,7 +494,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
if (dev->gpu) {
log_max_watch_addr =
- __ilog2_u32(dev->gpu->device_info->num_of_watch_points);
+ __ilog2_u32(dev->gpu->kfd->device_info.num_of_watch_points);
if (log_max_watch_addr) {
dev->node_props.capability |=
@@ -515,24 +506,37 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
HSA_CAP_WATCH_POINTS_TOTALBITS_MASK);
}
- if (dev->gpu->device_info->asic_family == CHIP_TONGA)
+ if (dev->gpu->adev->asic_type == CHIP_TONGA)
dev->node_props.capability |=
HSA_CAP_AQL_QUEUE_DOUBLE_MAP;
+ if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0) &&
+ (dev->gpu->adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
+ dev->node_props.capability2 |= HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED;
+
sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_fcompute",
dev->node_props.max_engine_clk_fcompute);
sysfs_show_64bit_prop(buffer, offs, "local_mem_size", 0ULL);
sysfs_show_32bit_prop(buffer, offs, "fw_version",
- dev->gpu->mec_fw_version);
+ dev->gpu->kfd->mec_fw_version);
sysfs_show_32bit_prop(buffer, offs, "capability",
dev->node_props.capability);
+ sysfs_show_32bit_prop(buffer, offs, "capability2",
+ dev->node_props.capability2);
+ sysfs_show_64bit_prop(buffer, offs, "debug_prop",
+ dev->node_props.debug_prop);
sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version",
- dev->gpu->sdma_fw_version);
+ dev->gpu->kfd->sdma_fw_version);
sysfs_show_64bit_prop(buffer, offs, "unique_id",
- amdgpu_amdkfd_get_unique_id(dev->gpu->kgd));
-
+ dev->gpu->xcp &&
+ (dev->gpu->xcp->xcp_mgr->mode !=
+ AMDGPU_SPX_PARTITION_MODE) ?
+ dev->gpu->xcp->unique_id :
+ dev->gpu->adev->unique_id);
+ sysfs_show_32bit_prop(buffer, offs, "num_xcc",
+ NUM_XCC(dev->gpu->xcc_mask));
}
return sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_ccompute",
@@ -543,7 +547,7 @@ static const struct sysfs_ops node_ops = {
.show = node_show,
};
-static struct kobj_type node_type = {
+static const struct kobj_type node_type = {
.release = kfd_topology_kobj_release,
.sysfs_ops = &node_ops,
};
@@ -557,6 +561,7 @@ static void kfd_remove_sysfs_file(struct kobject *kobj, struct attribute *attr)
static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
{
+ struct kfd_iolink_properties *p2plink;
struct kfd_iolink_properties *iolink;
struct kfd_cache_properties *cache;
struct kfd_mem_properties *mem;
@@ -574,6 +579,18 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
dev->kobj_iolink = NULL;
}
+ if (dev->kobj_p2plink) {
+ list_for_each_entry(p2plink, &dev->p2p_link_props, list)
+ if (p2plink->kobj) {
+ kfd_remove_sysfs_file(p2plink->kobj,
+ &p2plink->attr);
+ p2plink->kobj = NULL;
+ }
+ kobject_del(dev->kobj_p2plink);
+ kobject_put(dev->kobj_p2plink);
+ dev->kobj_p2plink = NULL;
+ }
+
if (dev->kobj_cache) {
list_for_each_entry(cache, &dev->cache_props, list)
if (cache->kobj) {
@@ -620,6 +637,7 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
uint32_t id)
{
+ struct kfd_iolink_properties *p2plink;
struct kfd_iolink_properties *iolink;
struct kfd_cache_properties *cache;
struct kfd_mem_properties *mem;
@@ -657,6 +675,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
if (!dev->kobj_iolink)
return -ENOMEM;
+ dev->kobj_p2plink = kobject_create_and_add("p2p_links", dev->kobj_node);
+ if (!dev->kobj_p2plink)
+ return -ENOMEM;
+
dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node);
if (!dev->kobj_perf)
return -ENOMEM;
@@ -746,6 +768,27 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
i++;
}
+ i = 0;
+ list_for_each_entry(p2plink, &dev->p2p_link_props, list) {
+ p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
+ if (!p2plink->kobj)
+ return -ENOMEM;
+ ret = kobject_init_and_add(p2plink->kobj, &iolink_type,
+ dev->kobj_p2plink, "%d", i);
+ if (ret < 0) {
+ kobject_put(p2plink->kobj);
+ return ret;
+ }
+
+ p2plink->attr.name = "properties";
+ p2plink->attr.mode = KFD_SYSFS_FILE_MODE;
+ sysfs_attr_init(&p2plink->attr);
+ ret = sysfs_create_file(p2plink->kobj, &p2plink->attr);
+ if (ret < 0)
+ return ret;
+ i++;
+ }
+
/* All hardware blocks have the same number of attributes. */
num_attrs = ARRAY_SIZE(perf_attr_iommu);
list_for_each_entry(perf, &dev->perf_props, list) {
@@ -910,44 +953,31 @@ static void kfd_update_system_properties(void)
dev = list_last_entry(&topology_device_list,
struct kfd_topology_device, list);
if (dev) {
- sys_props.platform_id =
- (*((uint64_t *)dev->oem_id)) & CRAT_OEMID_64BIT_MASK;
+ sys_props.platform_id = dev->oem_id64;
sys_props.platform_oem = *((uint64_t *)dev->oem_table_id);
sys_props.platform_rev = dev->oem_revision;
}
up_read(&topology_lock);
}
-static void find_system_memory(const struct dmi_header *dm,
- void *private)
+static void find_system_memory(const struct dmi_header *dm, void *private)
{
+ struct dmi_mem_device *memdev = container_of(dm, struct dmi_mem_device, header);
struct kfd_mem_properties *mem;
- u16 mem_width, mem_clock;
struct kfd_topology_device *kdev =
(struct kfd_topology_device *)private;
- const u8 *dmi_data = (const u8 *)(dm + 1);
-
- if (dm->type == DMI_ENTRY_MEM_DEVICE && dm->length >= 0x15) {
- mem_width = (u16)(*(const u16 *)(dmi_data + 0x6));
- mem_clock = (u16)(*(const u16 *)(dmi_data + 0x11));
- list_for_each_entry(mem, &kdev->mem_props, list) {
- if (mem_width != 0xFFFF && mem_width != 0)
- mem->width = mem_width;
- if (mem_clock != 0)
- mem->mem_clk_max = mem_clock;
- }
- }
-}
-/*
- * Performance counters information is not part of CRAT but we would like to
- * put them in the sysfs under topology directory for Thunk to get the data.
- * This function is called before updating the sysfs.
- */
-static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev)
-{
- /* These are the only counters supported so far */
- return kfd_iommu_add_perf_counters(kdev);
+ if (memdev->header.type != DMI_ENTRY_MEM_DEVICE)
+ return;
+ if (memdev->header.length < sizeof(struct dmi_mem_device))
+ return;
+
+ list_for_each_entry(mem, &kdev->mem_props, list) {
+ if (memdev->total_width != 0xFFFF && memdev->total_width != 0)
+ mem->width = memdev->total_width;
+ if (memdev->speed != 0)
+ mem->mem_clk_max = memdev->speed;
+ }
}
/* kfd_add_non_crat_information - Add information that is not currently
@@ -964,25 +994,6 @@ static void kfd_add_non_crat_information(struct kfd_topology_device *kdev)
/* TODO: For GPU node, rearrange code from kfd_topology_add_device */
}
-/* kfd_is_acpi_crat_invalid - CRAT from ACPI is valid only for AMD APU devices.
- * Ignore CRAT for all other devices. AMD APU is identified if both CPU
- * and GPU cores are present.
- * @device_list - topology device list created by parsing ACPI CRAT table.
- * @return - TRUE if invalid, FALSE is valid.
- */
-static bool kfd_is_acpi_crat_invalid(struct list_head *device_list)
-{
- struct kfd_topology_device *dev;
-
- list_for_each_entry(dev, device_list, list) {
- if (dev->node_props.cpu_cores_count &&
- dev->node_props.simd_count)
- return false;
- }
- pr_info("Ignoring ACPI CRAT on non-APU system\n");
- return true;
-}
-
int kfd_topology_init(void)
{
void *crat_image = NULL;
@@ -1013,53 +1024,30 @@ int kfd_topology_init(void)
*/
proximity_domain = 0;
- /*
- * Get the CRAT image from the ACPI. If ACPI doesn't have one
- * or if ACPI CRAT is invalid create a virtual CRAT.
- * NOTE: The current implementation expects all AMD APUs to have
- * CRAT. If no CRAT is available, it is assumed to be a CPU
- */
- ret = kfd_create_crat_image_acpi(&crat_image, &image_size);
- if (!ret) {
- ret = kfd_parse_crat_table(crat_image,
- &temp_topology_device_list,
- proximity_domain);
- if (ret ||
- kfd_is_acpi_crat_invalid(&temp_topology_device_list)) {
- kfd_release_topology_device_list(
- &temp_topology_device_list);
- kfd_destroy_crat_image(crat_image);
- crat_image = NULL;
- }
+ ret = kfd_create_crat_image_virtual(&crat_image, &image_size,
+ COMPUTE_UNIT_CPU, NULL,
+ proximity_domain);
+ cpu_only_node = 1;
+ if (ret) {
+ pr_err("Error creating VCRAT table for CPU\n");
+ return ret;
}
- if (!crat_image) {
- ret = kfd_create_crat_image_virtual(&crat_image, &image_size,
- COMPUTE_UNIT_CPU, NULL,
- proximity_domain);
- cpu_only_node = 1;
- if (ret) {
- pr_err("Error creating VCRAT table for CPU\n");
- return ret;
- }
-
- ret = kfd_parse_crat_table(crat_image,
- &temp_topology_device_list,
- proximity_domain);
- if (ret) {
- pr_err("Error parsing VCRAT table for CPU\n");
- goto err;
- }
+ ret = kfd_parse_crat_table(crat_image,
+ &temp_topology_device_list,
+ proximity_domain);
+ if (ret) {
+ pr_err("Error parsing VCRAT table for CPU\n");
+ goto err;
}
kdev = list_first_entry(&temp_topology_device_list,
struct kfd_topology_device, list);
- kfd_add_perf_to_topology(kdev);
down_write(&topology_lock);
kfd_topology_update_device_list(&temp_topology_device_list,
&topology_device_list);
- atomic_set(&topology_crat_proximity_domain, sys_props.num_devices-1);
+ topology_crat_proximity_domain = sys_props.num_devices-1;
ret = kfd_topology_update_sysfs();
up_write(&topology_lock);
@@ -1095,56 +1083,79 @@ void kfd_topology_shutdown(void)
up_write(&topology_lock);
}
-static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
+static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu)
{
- uint32_t hashout;
- uint32_t buf[7];
+ uint32_t gpu_id;
+ uint32_t buf[8];
uint64_t local_mem_size;
- int i;
- struct kfd_local_mem_info local_mem_info;
+ struct kfd_topology_device *dev;
+ bool is_unique;
+ uint8_t *crc_buf;
if (!gpu)
return 0;
- amdgpu_amdkfd_get_local_mem_info(gpu->kgd, &local_mem_info);
-
- local_mem_size = local_mem_info.local_mem_size_private +
- local_mem_info.local_mem_size_public;
-
- buf[0] = gpu->pdev->devfn;
- buf[1] = gpu->pdev->subsystem_vendor |
- (gpu->pdev->subsystem_device << 16);
- buf[2] = pci_domain_nr(gpu->pdev->bus);
- buf[3] = gpu->pdev->device;
- buf[4] = gpu->pdev->bus->number;
+ crc_buf = (uint8_t *)&buf;
+ local_mem_size = gpu->local_mem_info.local_mem_size_private +
+ gpu->local_mem_info.local_mem_size_public;
+ buf[0] = gpu->adev->pdev->devfn;
+ buf[1] = gpu->adev->pdev->subsystem_vendor |
+ (gpu->adev->pdev->subsystem_device << 16);
+ buf[2] = pci_domain_nr(gpu->adev->pdev->bus);
+ buf[3] = gpu->adev->pdev->device;
+ buf[4] = gpu->adev->pdev->bus->number;
buf[5] = lower_32_bits(local_mem_size);
buf[6] = upper_32_bits(local_mem_size);
+ buf[7] = (ffs(gpu->xcc_mask) - 1) | (NUM_XCC(gpu->xcc_mask) << 16);
+
+ gpu_id = crc16(0, crc_buf, sizeof(buf)) &
+ ((1 << KFD_GPU_ID_HASH_WIDTH) - 1);
+
+ /* There is a very small possibility when generating a
+ * 16 (KFD_GPU_ID_HASH_WIDTH) bit value from 8 word buffer
+ * that the value could be 0 or non-unique. So, check if
+ * it is unique and non-zero. If not unique increment till
+ * unique one is found. In case of overflow, restart from 1
+ */
- for (i = 0, hashout = 0; i < 7; i++)
- hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH);
+ down_read(&topology_lock);
+ do {
+ is_unique = true;
+ if (!gpu_id)
+ gpu_id = 1;
+ list_for_each_entry(dev, &topology_device_list, list) {
+ if (dev->gpu && dev->gpu_id == gpu_id) {
+ is_unique = false;
+ break;
+ }
+ }
+ if (unlikely(!is_unique))
+ gpu_id = (gpu_id + 1) &
+ ((1 << KFD_GPU_ID_HASH_WIDTH) - 1);
+ } while (!is_unique);
+ up_read(&topology_lock);
- return hashout;
+ return gpu_id;
}
/* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If
* the GPU device is not already present in the topology device
* list then return NULL. This means a new topology device has to
* be created for this GPU.
*/
-static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
+static struct kfd_topology_device *kfd_assign_gpu(struct kfd_node *gpu)
{
struct kfd_topology_device *dev;
struct kfd_topology_device *out_dev = NULL;
struct kfd_mem_properties *mem;
struct kfd_cache_properties *cache;
struct kfd_iolink_properties *iolink;
+ struct kfd_iolink_properties *p2plink;
- down_write(&topology_lock);
list_for_each_entry(dev, &topology_device_list, list) {
/* Discrete GPUs need their own topology device list
* entries. Don't assign them to CPU/APU nodes.
*/
- if (!gpu->use_iommu_v2 &&
- dev->node_props.cpu_cores_count)
+ if (dev->node_props.cpu_cores_count)
continue;
if (!dev->gpu && (dev->node_props.simd_count > 0)) {
@@ -1157,10 +1168,11 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
cache->gpu = dev->gpu;
list_for_each_entry(iolink, &dev->io_link_props, list)
iolink->gpu = dev->gpu;
+ list_for_each_entry(p2plink, &dev->p2p_link_props, list)
+ p2plink->gpu = dev->gpu;
break;
}
}
- up_write(&topology_lock);
return out_dev;
}
@@ -1189,7 +1201,8 @@ static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev)
* for APUs - If CRAT from ACPI reports more than one bank, then
* all the banks will report the same mem_clk_max information
*/
- amdgpu_amdkfd_get_local_mem_info(dev->gpu->kgd, &local_mem_info);
+ amdgpu_amdkfd_get_local_mem_info(dev->gpu->adev, &local_mem_info,
+ dev->gpu->xcp);
list_for_each_entry(mem, &dev->mem_props, list)
mem->mem_clk_max = local_mem_info.mem_clk_max;
@@ -1207,7 +1220,7 @@ static void kfd_set_iolink_no_atomics(struct kfd_topology_device *dev,
if (target_gpu_dev) {
uint32_t cap;
- pcie_capability_read_dword(target_gpu_dev->gpu->pdev,
+ pcie_capability_read_dword(target_gpu_dev->gpu->adev->pdev,
PCI_EXP_DEVCAP2, &cap);
if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
@@ -1216,9 +1229,8 @@ static void kfd_set_iolink_no_atomics(struct kfd_topology_device *dev,
CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
/* set gpu (dev) flags. */
} else {
- if (!dev->gpu->pci_atomic_requested ||
- dev->gpu->device_info->asic_family ==
- CHIP_HAWAII)
+ if (!dev->gpu->kfd->pci_atomic_requested ||
+ dev->gpu->adev->asic_type == CHIP_HAWAII)
link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
}
@@ -1239,13 +1251,68 @@ static void kfd_set_iolink_non_coherent(struct kfd_topology_device *to_dev,
*/
if (inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS ||
(inbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI &&
- to_dev->gpu->device_info->asic_family == CHIP_VEGA20)) {
+ KFD_GC_VERSION(to_dev->gpu) == IP_VERSION(9, 4, 0))) {
outbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT;
inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT;
}
}
}
+#define REC_SDMA_NUM_GPU 8
+static const int rec_sdma_eng_map[REC_SDMA_NUM_GPU][REC_SDMA_NUM_GPU] = {
+ { -1, 14, 12, 2, 4, 8, 10, 6 },
+ { 14, -1, 2, 10, 8, 4, 6, 12 },
+ { 10, 2, -1, 12, 14, 6, 4, 8 },
+ { 2, 12, 10, -1, 6, 14, 8, 4 },
+ { 4, 8, 14, 6, -1, 10, 12, 2 },
+ { 8, 4, 6, 14, 12, -1, 2, 10 },
+ { 10, 6, 4, 8, 12, 2, -1, 14 },
+ { 6, 12, 8, 4, 2, 10, 14, -1 }};
+
+static void kfd_set_recommended_sdma_engines(struct kfd_topology_device *to_dev,
+ struct kfd_iolink_properties *outbound_link,
+ struct kfd_iolink_properties *inbound_link)
+{
+ struct kfd_node *gpu = outbound_link->gpu;
+ struct amdgpu_device *adev = gpu->adev;
+ unsigned int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes;
+ unsigned int num_xgmi_sdma_engines = kfd_get_num_xgmi_sdma_engines(gpu);
+ unsigned int num_sdma_engines = kfd_get_num_sdma_engines(gpu);
+ uint32_t sdma_eng_id_mask = (1 << num_sdma_engines) - 1;
+ uint32_t xgmi_sdma_eng_id_mask =
+ ((1 << num_xgmi_sdma_engines) - 1) << num_sdma_engines;
+
+ bool support_rec_eng = !amdgpu_sriov_vf(adev) && to_dev->gpu &&
+ adev->aid_mask && num_xgmi_nodes && gpu->kfd->num_nodes == 1 &&
+ num_xgmi_sdma_engines >= 6 && (!(adev->flags & AMD_IS_APU) &&
+ num_xgmi_nodes == 8);
+
+ if (support_rec_eng) {
+ int src_socket_id = adev->gmc.xgmi.physical_node_id;
+ int dst_socket_id = to_dev->gpu->adev->gmc.xgmi.physical_node_id;
+ unsigned int reshift = num_xgmi_sdma_engines == 6 ? 1 : 0;
+
+ outbound_link->rec_sdma_eng_id_mask =
+ 1 << (rec_sdma_eng_map[src_socket_id][dst_socket_id] >> reshift);
+ inbound_link->rec_sdma_eng_id_mask =
+ 1 << (rec_sdma_eng_map[dst_socket_id][src_socket_id] >> reshift);
+
+ /* If recommended engine is out of range, need to reset the mask */
+ if (outbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask)
+ outbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask;
+ if (inbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask)
+ inbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask;
+
+ } else {
+ uint32_t engine_mask = (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI &&
+ num_xgmi_sdma_engines && to_dev->gpu) ? xgmi_sdma_eng_id_mask :
+ sdma_eng_id_mask;
+
+ outbound_link->rec_sdma_eng_id_mask = engine_mask;
+ inbound_link->rec_sdma_eng_id_mask = engine_mask;
+ }
+}
+
static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
{
struct kfd_iolink_properties *link, *inbound_link;
@@ -1264,6 +1331,18 @@ static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
if (!peer_dev)
continue;
+ /* Include the CPU peer in GPU hive if connected over xGMI. */
+ if (!peer_dev->gpu &&
+ link->iolink_type == CRAT_IOLINK_TYPE_XGMI) {
+ /*
+ * If the GPU is not part of a GPU hive, use its pci
+ * device location as the hive ID to bind with the CPU.
+ */
+ if (!dev->node_props.hive_id)
+ dev->node_props.hive_id = pci_dev_id(dev->gpu->adev->pdev);
+ peer_dev->node_props.hive_id = dev->node_props.hive_id;
+ }
+
list_for_each_entry(inbound_link, &peer_dev->io_link_props,
list) {
if (inbound_link->node_to != link->node_from)
@@ -1272,46 +1351,700 @@ static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED;
kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link);
kfd_set_iolink_non_coherent(peer_dev, link, inbound_link);
+ kfd_set_recommended_sdma_engines(peer_dev, link, inbound_link);
+ }
+ }
+
+ /* Create indirect links so apply flags setting to all */
+ list_for_each_entry(link, &dev->p2p_link_props, list) {
+ link->flags = CRAT_IOLINK_FLAGS_ENABLED;
+ kfd_set_iolink_no_atomics(dev, NULL, link);
+ peer_dev = kfd_topology_device_by_proximity_domain(
+ link->node_to);
+
+ if (!peer_dev)
+ continue;
+
+ list_for_each_entry(inbound_link, &peer_dev->p2p_link_props,
+ list) {
+ if (inbound_link->node_to != link->node_from)
+ continue;
+
+ inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED;
+ kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link);
+ kfd_set_iolink_non_coherent(peer_dev, link, inbound_link);
}
}
}
-int kfd_topology_add_device(struct kfd_dev *gpu)
+static int kfd_build_p2p_node_entry(struct kfd_topology_device *dev,
+ struct kfd_iolink_properties *p2plink)
+{
+ int ret;
+
+ p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
+ if (!p2plink->kobj)
+ return -ENOMEM;
+
+ ret = kobject_init_and_add(p2plink->kobj, &iolink_type,
+ dev->kobj_p2plink, "%d", dev->node_props.p2p_links_count - 1);
+ if (ret < 0) {
+ kobject_put(p2plink->kobj);
+ return ret;
+ }
+
+ p2plink->attr.name = "properties";
+ p2plink->attr.mode = KFD_SYSFS_FILE_MODE;
+ sysfs_attr_init(&p2plink->attr);
+ ret = sysfs_create_file(p2plink->kobj, &p2plink->attr);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int gpu_node)
+{
+ struct kfd_iolink_properties *gpu_link, *tmp_link, *cpu_link;
+ struct kfd_iolink_properties *props = NULL, *props2 = NULL;
+ struct kfd_topology_device *cpu_dev;
+ int ret = 0;
+ int i, num_cpu;
+
+ num_cpu = 0;
+ list_for_each_entry(cpu_dev, &topology_device_list, list) {
+ if (cpu_dev->gpu)
+ break;
+ num_cpu++;
+ }
+
+ if (list_empty(&kdev->io_link_props))
+ return -ENODATA;
+
+ gpu_link = list_first_entry(&kdev->io_link_props,
+ struct kfd_iolink_properties, list);
+
+ for (i = 0; i < num_cpu; i++) {
+ /* CPU <--> GPU */
+ if (gpu_link->node_to == i)
+ continue;
+
+ /* find CPU <--> CPU links */
+ cpu_link = NULL;
+ cpu_dev = kfd_topology_device_by_proximity_domain(i);
+ if (cpu_dev) {
+ list_for_each_entry(tmp_link,
+ &cpu_dev->io_link_props, list) {
+ if (tmp_link->node_to == gpu_link->node_to) {
+ cpu_link = tmp_link;
+ break;
+ }
+ }
+ }
+
+ if (!cpu_link)
+ return -ENOMEM;
+
+ /* CPU <--> CPU <--> GPU, GPU node*/
+ props = kfd_alloc_struct(props);
+ if (!props)
+ return -ENOMEM;
+
+ memcpy(props, gpu_link, sizeof(struct kfd_iolink_properties));
+ props->weight = gpu_link->weight + cpu_link->weight;
+ props->min_latency = gpu_link->min_latency + cpu_link->min_latency;
+ props->max_latency = gpu_link->max_latency + cpu_link->max_latency;
+ props->min_bandwidth = min(gpu_link->min_bandwidth, cpu_link->min_bandwidth);
+ props->max_bandwidth = min(gpu_link->max_bandwidth, cpu_link->max_bandwidth);
+
+ props->node_from = gpu_node;
+ props->node_to = i;
+ kdev->node_props.p2p_links_count++;
+ list_add_tail(&props->list, &kdev->p2p_link_props);
+ ret = kfd_build_p2p_node_entry(kdev, props);
+ if (ret < 0)
+ return ret;
+
+ /* for small Bar, no CPU --> GPU in-direct links */
+ if (kfd_dev_is_large_bar(kdev->gpu)) {
+ /* CPU <--> CPU <--> GPU, CPU node*/
+ props2 = kfd_alloc_struct(props2);
+ if (!props2)
+ return -ENOMEM;
+
+ memcpy(props2, props, sizeof(struct kfd_iolink_properties));
+ props2->node_from = i;
+ props2->node_to = gpu_node;
+ props2->kobj = NULL;
+ cpu_dev->node_props.p2p_links_count++;
+ list_add_tail(&props2->list, &cpu_dev->p2p_link_props);
+ ret = kfd_build_p2p_node_entry(cpu_dev, props2);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ return ret;
+}
+
+#if defined(CONFIG_HSA_AMD_P2P)
+static int kfd_add_peer_prop(struct kfd_topology_device *kdev,
+ struct kfd_topology_device *peer, int from, int to)
+{
+ struct kfd_iolink_properties *props = NULL;
+ struct kfd_iolink_properties *iolink1, *iolink2, *iolink3;
+ struct kfd_topology_device *cpu_dev;
+ int ret = 0;
+
+ if (!amdgpu_device_is_peer_accessible(
+ kdev->gpu->adev,
+ peer->gpu->adev))
+ return ret;
+
+ if (list_empty(&kdev->io_link_props))
+ return -ENODATA;
+
+ iolink1 = list_first_entry(&kdev->io_link_props,
+ struct kfd_iolink_properties, list);
+
+ if (list_empty(&peer->io_link_props))
+ return -ENODATA;
+
+ iolink2 = list_first_entry(&peer->io_link_props,
+ struct kfd_iolink_properties, list);
+
+ props = kfd_alloc_struct(props);
+ if (!props)
+ return -ENOMEM;
+
+ memcpy(props, iolink1, sizeof(struct kfd_iolink_properties));
+
+ props->weight = iolink1->weight + iolink2->weight;
+ props->min_latency = iolink1->min_latency + iolink2->min_latency;
+ props->max_latency = iolink1->max_latency + iolink2->max_latency;
+ props->min_bandwidth = min(iolink1->min_bandwidth, iolink2->min_bandwidth);
+ props->max_bandwidth = min(iolink2->max_bandwidth, iolink2->max_bandwidth);
+
+ if (iolink1->node_to != iolink2->node_to) {
+ /* CPU->CPU link*/
+ cpu_dev = kfd_topology_device_by_proximity_domain(iolink1->node_to);
+ if (cpu_dev) {
+ list_for_each_entry(iolink3, &cpu_dev->io_link_props, list) {
+ if (iolink3->node_to != iolink2->node_to)
+ continue;
+
+ props->weight += iolink3->weight;
+ props->min_latency += iolink3->min_latency;
+ props->max_latency += iolink3->max_latency;
+ props->min_bandwidth = min(props->min_bandwidth,
+ iolink3->min_bandwidth);
+ props->max_bandwidth = min(props->max_bandwidth,
+ iolink3->max_bandwidth);
+ break;
+ }
+ } else {
+ WARN(1, "CPU node not found");
+ }
+ }
+
+ props->node_from = from;
+ props->node_to = to;
+ peer->node_props.p2p_links_count++;
+ list_add_tail(&props->list, &peer->p2p_link_props);
+ ret = kfd_build_p2p_node_entry(peer, props);
+
+ return ret;
+}
+#endif
+
+static int kfd_dev_create_p2p_links(void)
{
- uint32_t gpu_id;
struct kfd_topology_device *dev;
- struct kfd_cu_info cu_info;
- int res = 0;
+ struct kfd_topology_device *new_dev;
+#if defined(CONFIG_HSA_AMD_P2P)
+ uint32_t i;
+#endif
+ uint32_t k;
+ int ret = 0;
+
+ k = 0;
+ list_for_each_entry(dev, &topology_device_list, list)
+ k++;
+ if (k < 2)
+ return 0;
+
+ new_dev = list_last_entry(&topology_device_list, struct kfd_topology_device, list);
+ if (WARN_ON(!new_dev->gpu))
+ return 0;
+
+ k--;
+
+ /* create in-direct links */
+ ret = kfd_create_indirect_link_prop(new_dev, k);
+ if (ret < 0)
+ goto out;
+
+ /* create p2p links */
+#if defined(CONFIG_HSA_AMD_P2P)
+ i = 0;
+ list_for_each_entry(dev, &topology_device_list, list) {
+ if (dev == new_dev)
+ break;
+ if (!dev->gpu || !dev->gpu->adev ||
+ (dev->gpu->kfd->hive_id &&
+ dev->gpu->kfd->hive_id == new_dev->gpu->kfd->hive_id &&
+ amdgpu_xgmi_get_is_sharing_enabled(dev->gpu->adev, new_dev->gpu->adev)))
+ goto next;
+
+ /* check if node(s) is/are peer accessible in one direction or bi-direction */
+ ret = kfd_add_peer_prop(new_dev, dev, i, k);
+ if (ret < 0)
+ goto out;
+
+ ret = kfd_add_peer_prop(dev, new_dev, k, i);
+ if (ret < 0)
+ goto out;
+next:
+ i++;
+ }
+#endif
+
+out:
+ return ret;
+}
+
+/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
+static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext,
+ struct kfd_gpu_cache_info *pcache_info,
+ int cu_bitmask,
+ int cache_type, unsigned int cu_processor_id,
+ int cu_block)
+{
+ unsigned int cu_sibling_map_mask;
+ int first_active_cu;
+ struct kfd_cache_properties *pcache = NULL;
+
+ cu_sibling_map_mask = cu_bitmask;
+ cu_sibling_map_mask >>= cu_block;
+ cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
+ first_active_cu = ffs(cu_sibling_map_mask);
+
+ /* CU could be inactive. In case of shared cache find the first active
+ * CU. and incase of non-shared cache check if the CU is inactive. If
+ * inactive active skip it
+ */
+ if (first_active_cu) {
+ pcache = kfd_alloc_struct(pcache);
+ if (!pcache)
+ return -ENOMEM;
+
+ memset(pcache, 0, sizeof(struct kfd_cache_properties));
+ pcache->processor_id_low = cu_processor_id + (first_active_cu - 1);
+ pcache->cache_level = pcache_info[cache_type].cache_level;
+ pcache->cache_size = pcache_info[cache_type].cache_size;
+ pcache->cacheline_size = pcache_info[cache_type].cache_line_size;
+
+ if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
+ pcache->cache_type |= HSA_CACHE_TYPE_DATA;
+ if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE)
+ pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
+ if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE)
+ pcache->cache_type |= HSA_CACHE_TYPE_CPU;
+ if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
+ pcache->cache_type |= HSA_CACHE_TYPE_HSACU;
+
+ /* Sibling map is w.r.t processor_id_low, so shift out
+ * inactive CU
+ */
+ cu_sibling_map_mask =
+ cu_sibling_map_mask >> (first_active_cu - 1);
+
+ pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF);
+ pcache->sibling_map[1] =
+ (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
+ pcache->sibling_map[2] =
+ (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
+ pcache->sibling_map[3] =
+ (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
+
+ pcache->sibling_map_size = 4;
+ *props_ext = pcache;
+
+ return 0;
+ }
+ return 1;
+}
+
+/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
+static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
+ struct kfd_gpu_cache_info *pcache_info,
+ struct amdgpu_cu_info *cu_info,
+ struct amdgpu_gfx_config *gfx_info,
+ int cache_type, unsigned int cu_processor_id,
+ struct kfd_node *knode)
+{
+ unsigned int cu_sibling_map_mask = 0;
+ int first_active_cu;
+ int i, j, k, xcc, start, end;
+ int num_xcc = NUM_XCC(knode->xcc_mask);
+ struct kfd_cache_properties *pcache = NULL;
+ enum amdgpu_memory_partition mode;
+ struct amdgpu_device *adev = knode->adev;
+ bool found = false;
+
+ start = ffs(knode->xcc_mask) - 1;
+ end = start + num_xcc;
+
+ /* To find the bitmap in the first active cu in the first
+ * xcc, it is based on the assumption that evrey xcc must
+ * have at least one active cu.
+ */
+ for (i = 0; i < gfx_info->max_shader_engines && !found; i++) {
+ for (j = 0; j < gfx_info->max_sh_per_se && !found; j++) {
+ if (cu_info->bitmap[start][i % 4][j % 4]) {
+ cu_sibling_map_mask =
+ cu_info->bitmap[start][i % 4][j % 4];
+ found = true;
+ }
+ }
+ }
+
+ cu_sibling_map_mask &=
+ ((1 << pcache_info[cache_type].num_cu_shared) - 1);
+ first_active_cu = ffs(cu_sibling_map_mask);
+
+ /* CU could be inactive. In case of shared cache find the first active
+ * CU. and incase of non-shared cache check if the CU is inactive. If
+ * inactive active skip it
+ */
+ if (first_active_cu) {
+ pcache = kfd_alloc_struct(pcache);
+ if (!pcache)
+ return -ENOMEM;
+
+ memset(pcache, 0, sizeof(struct kfd_cache_properties));
+ pcache->processor_id_low = cu_processor_id
+ + (first_active_cu - 1);
+ pcache->cache_level = pcache_info[cache_type].cache_level;
+ pcache->cacheline_size = pcache_info[cache_type].cache_line_size;
+
+ if (KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(knode) == IP_VERSION(9, 5, 0))
+ mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ else
+ mode = UNKNOWN_MEMORY_PARTITION_MODE;
+
+ pcache->cache_size = pcache_info[cache_type].cache_size;
+ /* Partition mode only affects L3 cache size */
+ if (mode && pcache->cache_level == 3)
+ pcache->cache_size /= mode;
+
+ if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
+ pcache->cache_type |= HSA_CACHE_TYPE_DATA;
+ if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE)
+ pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
+ if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE)
+ pcache->cache_type |= HSA_CACHE_TYPE_CPU;
+ if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
+ pcache->cache_type |= HSA_CACHE_TYPE_HSACU;
+
+ /* Sibling map is w.r.t processor_id_low, so shift out
+ * inactive CU
+ */
+ cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1);
+ k = 0;
+
+ for (xcc = start; xcc < end; xcc++) {
+ for (i = 0; i < gfx_info->max_shader_engines; i++) {
+ for (j = 0; j < gfx_info->max_sh_per_se; j++) {
+ pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
+ pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
+ pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
+ pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
+ k += 4;
+
+ cu_sibling_map_mask = cu_info->bitmap[xcc][i % 4][j + i / 4];
+ cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
+ }
+ }
+ }
+ pcache->sibling_map_size = k;
+ *props_ext = pcache;
+ return 0;
+ }
+ return 1;
+}
+
+#define KFD_MAX_CACHE_TYPES 6
+
+/* kfd_fill_cache_non_crat_info - Fill GPU cache info using kfd_gpu_cache_info
+ * tables
+ */
+static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_node *kdev)
+{
+ struct kfd_gpu_cache_info *pcache_info = NULL;
+ int i, j, k, xcc, start, end;
+ int ct = 0;
+ unsigned int cu_processor_id;
+ int ret;
+ unsigned int num_cu_shared;
+ struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info;
+ struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config;
+ int gpu_processor_id;
+ struct kfd_cache_properties *props_ext = NULL;
+ int num_of_entries = 0;
+ int num_of_cache_types = 0;
+ struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
+
+
+ gpu_processor_id = dev->node_props.simd_id_base;
+
+ memset(cache_info, 0, sizeof(cache_info));
+ pcache_info = cache_info;
+ num_of_cache_types = kfd_get_gpu_cache_info(kdev, &pcache_info);
+ if (!num_of_cache_types) {
+ pr_warn("no cache info found\n");
+ return;
+ }
+
+ /* For each type of cache listed in the kfd_gpu_cache_info table,
+ * go through all available Compute Units.
+ * The [i,j,k] loop will
+ * if kfd_gpu_cache_info.num_cu_shared = 1
+ * will parse through all available CU
+ * If (kfd_gpu_cache_info.num_cu_shared != 1)
+ * then it will consider only one CU from
+ * the shared unit
+ */
+ start = ffs(kdev->xcc_mask) - 1;
+ end = start + NUM_XCC(kdev->xcc_mask);
+
+ for (ct = 0; ct < num_of_cache_types; ct++) {
+ cu_processor_id = gpu_processor_id;
+ if (pcache_info[ct].cache_level == 1) {
+ for (xcc = start; xcc < end; xcc++) {
+ for (i = 0; i < gfx_info->max_shader_engines; i++) {
+ for (j = 0; j < gfx_info->max_sh_per_se; j++) {
+ for (k = 0; k < gfx_info->max_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
+
+ ret = fill_in_l1_pcache(&props_ext, pcache_info,
+ cu_info->bitmap[xcc][i % 4][j + i / 4], ct,
+ cu_processor_id, k);
+
+ if (ret < 0)
+ break;
+
+ if (!ret) {
+ num_of_entries++;
+ list_add_tail(&props_ext->list, &dev->cache_props);
+ }
+
+ /* Move to next CU block */
+ num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
+ gfx_info->max_cu_per_sh) ?
+ pcache_info[ct].num_cu_shared :
+ (gfx_info->max_cu_per_sh - k);
+ cu_processor_id += num_cu_shared;
+ }
+ }
+ }
+ }
+ } else {
+ ret = fill_in_l2_l3_pcache(&props_ext, pcache_info,
+ cu_info, gfx_info, ct, cu_processor_id, kdev);
+
+ if (ret < 0)
+ break;
+
+ if (!ret) {
+ num_of_entries++;
+ list_add_tail(&props_ext->list, &dev->cache_props);
+ }
+ }
+ }
+ dev->node_props.caches_count += num_of_entries;
+ pr_debug("Added [%d] GPU cache entries\n", num_of_entries);
+}
+
+static int kfd_topology_add_device_locked(struct kfd_node *gpu,
+ struct kfd_topology_device **dev)
+{
+ int proximity_domain = ++topology_crat_proximity_domain;
struct list_head temp_topology_device_list;
void *crat_image = NULL;
size_t image_size = 0;
- int proximity_domain;
- struct amdgpu_device *adev;
+ int res;
+
+ res = kfd_create_crat_image_virtual(&crat_image, &image_size,
+ COMPUTE_UNIT_GPU, gpu,
+ proximity_domain);
+ if (res) {
+ dev_err(gpu->adev->dev, "Error creating VCRAT\n");
+ topology_crat_proximity_domain--;
+ goto err;
+ }
INIT_LIST_HEAD(&temp_topology_device_list);
- gpu_id = kfd_generate_gpu_id(gpu);
+ res = kfd_parse_crat_table(crat_image,
+ &temp_topology_device_list,
+ proximity_domain);
+ if (res) {
+ dev_err(gpu->adev->dev, "Error parsing VCRAT\n");
+ topology_crat_proximity_domain--;
+ goto err;
+ }
- pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
+ kfd_topology_update_device_list(&temp_topology_device_list,
+ &topology_device_list);
- proximity_domain = atomic_inc_return(&topology_crat_proximity_domain);
+ *dev = kfd_assign_gpu(gpu);
+ if (WARN_ON(!*dev)) {
+ res = -ENODEV;
+ goto err;
+ }
- adev = (struct amdgpu_device *)(gpu->kgd);
+ /* Fill the cache affinity information here for the GPUs
+ * using VCRAT
+ */
+ kfd_fill_cache_non_crat_info(*dev, gpu);
- /* Include the CPU in xGMI hive if xGMI connected by assigning it the hive ID. */
- if (gpu->hive_id && adev->gmc.xgmi.connected_to_cpu) {
- struct kfd_topology_device *top_dev;
+ /* Update the SYSFS tree, since we added another topology
+ * device
+ */
+ res = kfd_topology_update_sysfs();
+ if (!res)
+ sys_props.generation_count++;
+ else
+ dev_err(gpu->adev->dev, "Failed to update GPU to sysfs topology. res=%d\n",
+ res);
- down_read(&topology_lock);
+err:
+ kfd_destroy_crat_image(crat_image);
+ return res;
+}
- list_for_each_entry(top_dev, &topology_device_list, list) {
- if (top_dev->gpu)
- break;
+static void kfd_topology_set_dbg_firmware_support(struct kfd_topology_device *dev)
+{
+ bool firmware_supported = true;
+
+ if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0) &&
+ KFD_GC_VERSION(dev->gpu) < IP_VERSION(12, 0, 0)) {
+ uint32_t mes_api_rev = (dev->gpu->adev->mes.sched_version &
+ AMDGPU_MES_API_VERSION_MASK) >>
+ AMDGPU_MES_API_VERSION_SHIFT;
+ uint32_t mes_rev = dev->gpu->adev->mes.sched_version &
+ AMDGPU_MES_VERSION_MASK;
+
+ firmware_supported = (mes_api_rev >= 14) && (mes_rev >= 64);
+ goto out;
+ }
- top_dev->node_props.hive_id = gpu->hive_id;
- }
+ /*
+ * Note: Any unlisted devices here are assumed to support exception handling.
+ * Add additional checks here as needed.
+ */
+ switch (KFD_GC_VERSION(dev->gpu)) {
+ case IP_VERSION(9, 0, 1):
+ firmware_supported = dev->gpu->kfd->mec_fw_version >= 459 + 32768;
+ break;
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 3, 0):
+ case IP_VERSION(9, 4, 0):
+ firmware_supported = dev->gpu->kfd->mec_fw_version >= 459;
+ break;
+ case IP_VERSION(9, 4, 1):
+ firmware_supported = dev->gpu->kfd->mec_fw_version >= 60;
+ break;
+ case IP_VERSION(9, 4, 2):
+ firmware_supported = dev->gpu->kfd->mec_fw_version >= 51;
+ break;
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 1, 1):
+ firmware_supported = dev->gpu->kfd->mec_fw_version >= 144;
+ break;
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ firmware_supported = dev->gpu->kfd->mec_fw_version >= 89;
+ break;
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 3, 3):
+ firmware_supported = false;
+ break;
+ default:
+ break;
+ }
- up_read(&topology_lock);
+out:
+ if (firmware_supported)
+ dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED;
+}
+
+static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
+{
+ dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
+ HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
+ HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
+
+ dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_SUPPORT |
+ HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_TRAP_OVERRIDE_SUPPORTED |
+ HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_MODE_SUPPORTED;
+
+ if (kfd_dbg_has_ttmps_always_setup(dev->gpu))
+ dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
+
+ if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) {
+ if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 4))
+ dev->node_props.debug_prop |=
+ HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9_4_3 |
+ HSA_DBG_WATCH_ADDR_MASK_HI_BIT_GFX9_4_3;
+ else
+ dev->node_props.debug_prop |=
+ HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 |
+ HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
+
+ if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 4, 2))
+ dev->node_props.capability |=
+ HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
+
+ if (!amdgpu_sriov_vf(dev->gpu->adev))
+ dev->node_props.capability |= HSA_CAP_PER_QUEUE_RESET_SUPPORTED;
+
+ } else {
+ dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 |
+ HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
+
+ if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(12, 0, 0))
+ dev->node_props.capability |=
+ HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED;
+ }
+
+ kfd_topology_set_dbg_firmware_support(dev);
+}
+
+int kfd_topology_add_device(struct kfd_node *gpu)
+{
+ uint32_t gpu_id;
+ struct kfd_topology_device *dev;
+ int res = 0;
+ int i;
+ const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type];
+ struct amdgpu_gfx_config *gfx_info = &gpu->adev->gfx.config;
+ struct amdgpu_cu_info *cu_info = &gpu->adev->gfx.cu_info;
+
+ if (gpu->xcp && !gpu->xcp->ddev) {
+ dev_warn(gpu->adev->dev,
+ "Won't add GPU to topology since it has no drm node assigned.");
+ return 0;
+ } else {
+ dev_dbg(gpu->adev->dev, "Adding new GPU to topology\n");
}
/* Check to see if this gpu device exists in the topology_device_list.
@@ -1320,50 +2053,20 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
* CRAT to create a new topology device. Once created assign the gpu to
* that topology device
*/
+ down_write(&topology_lock);
dev = kfd_assign_gpu(gpu);
- if (!dev) {
- res = kfd_create_crat_image_virtual(&crat_image, &image_size,
- COMPUTE_UNIT_GPU, gpu,
- proximity_domain);
- if (res) {
- pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n",
- gpu_id);
- return res;
- }
- res = kfd_parse_crat_table(crat_image,
- &temp_topology_device_list,
- proximity_domain);
- if (res) {
- pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n",
- gpu_id);
- goto err;
- }
-
- down_write(&topology_lock);
- kfd_topology_update_device_list(&temp_topology_device_list,
- &topology_device_list);
-
- /* Update the SYSFS tree, since we added another topology
- * device
- */
- res = kfd_topology_update_sysfs();
- up_write(&topology_lock);
-
- if (!res)
- sys_props.generation_count++;
- else
- pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",
- gpu_id, res);
- dev = kfd_assign_gpu(gpu);
- if (WARN_ON(!dev)) {
- res = -ENODEV;
- goto err;
- }
- }
+ if (!dev)
+ res = kfd_topology_add_device_locked(gpu, &dev);
+ up_write(&topology_lock);
+ if (res)
+ return res;
+ gpu_id = kfd_generate_gpu_id(gpu);
dev->gpu_id = gpu_id;
gpu->id = gpu_id;
+ kfd_dev_create_p2p_links();
+
/* TODO: Move the following lines to function
* kfd_add_non_crat_information
*/
@@ -1371,46 +2074,56 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
/* Fill-in additional information that is not available in CRAT but
* needed for the topology
*/
-
- amdgpu_amdkfd_get_cu_info(dev->gpu->kgd, &cu_info);
-
- strncpy(dev->node_props.name, gpu->device_info->asic_name,
- KFD_TOPOLOGY_PUBLIC_NAME_SIZE);
+ for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1; i++) {
+ dev->node_props.name[i] = __tolower(asic_name[i]);
+ if (asic_name[i] == '\0')
+ break;
+ }
+ dev->node_props.name[i] = '\0';
dev->node_props.simd_arrays_per_engine =
- cu_info.num_shader_arrays_per_engine;
+ gfx_info->max_sh_per_se;
- dev->node_props.gfx_target_version = gpu->device_info->gfx_target_version;
- dev->node_props.vendor_id = gpu->pdev->vendor;
- dev->node_props.device_id = gpu->pdev->device;
+ dev->node_props.gfx_target_version =
+ gpu->kfd->device_info.gfx_target_version;
+ dev->node_props.vendor_id = gpu->adev->pdev->vendor;
+ dev->node_props.device_id = gpu->adev->pdev->device;
dev->node_props.capability |=
- ((amdgpu_amdkfd_get_asic_rev_id(dev->gpu->kgd) <<
- HSA_CAP_ASIC_REVISION_SHIFT) &
+ ((dev->gpu->adev->rev_id << HSA_CAP_ASIC_REVISION_SHIFT) &
HSA_CAP_ASIC_REVISION_MASK);
- dev->node_props.location_id = pci_dev_id(gpu->pdev);
- dev->node_props.domain = pci_domain_nr(gpu->pdev->bus);
+
+ dev->node_props.location_id = pci_dev_id(gpu->adev->pdev);
+ if (gpu->kfd->num_nodes > 1)
+ dev->node_props.location_id |= dev->gpu->node_id;
+
+ dev->node_props.domain = pci_domain_nr(gpu->adev->pdev->bus);
dev->node_props.max_engine_clk_fcompute =
- amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->kgd);
+ amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->adev);
dev->node_props.max_engine_clk_ccompute =
cpufreq_quick_get_max(0) / 1000;
- dev->node_props.drm_render_minor =
- gpu->shared_resources.drm_render_minor;
- dev->node_props.hive_id = gpu->hive_id;
- dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines;
+ if (gpu->xcp)
+ dev->node_props.drm_render_minor = gpu->xcp->ddev->render->index;
+ else
+ dev->node_props.drm_render_minor =
+ gpu->kfd->shared_resources.drm_render_minor;
+
+ dev->node_props.hive_id = gpu->kfd->hive_id;
+ dev->node_props.num_sdma_engines = kfd_get_num_sdma_engines(gpu);
dev->node_props.num_sdma_xgmi_engines =
- gpu->device_info->num_xgmi_sdma_engines;
+ kfd_get_num_xgmi_sdma_engines(gpu);
dev->node_props.num_sdma_queues_per_engine =
- gpu->device_info->num_sdma_queues_per_engine;
+ gpu->kfd->device_info.num_sdma_queues_per_engine -
+ gpu->kfd->device_info.num_reserved_sdma_queues_per_engine;
dev->node_props.num_gws = (dev->gpu->gws &&
dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
- amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0;
+ dev->gpu->adev->gds.gws_size : 0;
dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm);
kfd_fill_mem_clk_max_info(dev);
kfd_fill_iolink_non_crat_info(dev);
- switch (dev->gpu->device_info->asic_family) {
+ switch (dev->gpu->adev->asic_type) {
case CHIP_KAVERI:
case CHIP_HAWAII:
case CHIP_TONGA:
@@ -1429,94 +2142,146 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
break;
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_RAVEN:
- case CHIP_RENOIR:
- case CHIP_ARCTURUS:
- case CHIP_ALDEBARAN:
- case CHIP_NAVI10:
- case CHIP_NAVI12:
- case CHIP_NAVI14:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
- case CHIP_BEIGE_GOBY:
- case CHIP_YELLOW_CARP:
- case CHIP_CYAN_SKILLFISH:
- dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
- HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
- HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
- break;
default:
- WARN(1, "Unexpected ASIC family %u",
- dev->gpu->device_info->asic_family);
+ if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(9, 0, 1))
+ WARN(1, "Unexpected ASIC family %u",
+ dev->gpu->adev->asic_type);
+ else
+ kfd_topology_set_capabilities(dev);
}
/*
- * Overwrite ATS capability according to needs_iommu_device to fix
- * potential missing corresponding bit in CRAT of BIOS.
- */
- if (dev->gpu->use_iommu_v2)
- dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
- else
- dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT;
+ * Overwrite ATS capability according to needs_iommu_device to fix
+ * potential missing corresponding bit in CRAT of BIOS.
+ */
+ dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT;
/* Fix errors in CZ CRAT.
* simd_count: Carrizo CRAT reports wrong simd_count, probably
* because it doesn't consider masked out CUs
* max_waves_per_simd: Carrizo reports wrong max_waves_per_simd
*/
- if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) {
+ if (dev->gpu->adev->asic_type == CHIP_CARRIZO) {
dev->node_props.simd_count =
- cu_info.simd_per_cu * cu_info.cu_active_number;
+ cu_info->simd_per_cu * cu_info->number;
dev->node_props.max_waves_per_simd = 10;
}
/* kfd only concerns sram ecc on GFX and HBM ecc on UMC */
dev->node_props.capability |=
- ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
+ ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
HSA_CAP_SRAM_EDCSUPPORTED : 0;
- dev->node_props.capability |= ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
+ dev->node_props.capability |=
+ ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
HSA_CAP_MEM_EDCSUPPORTED : 0;
- if (adev->asic_type != CHIP_VEGA10)
- dev->node_props.capability |= (adev->ras_enabled != 0) ?
+ if (KFD_GC_VERSION(dev->gpu) != IP_VERSION(9, 0, 1))
+ dev->node_props.capability |= (dev->gpu->adev->ras_enabled != 0) ?
HSA_CAP_RASEVENTNOTIFY : 0;
- if (KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev))
+ if (KFD_IS_SVM_API_SUPPORTED(dev->gpu->adev))
dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED;
+ if (dev->gpu->adev->gmc.is_app_apu ||
+ dev->gpu->adev->gmc.xgmi.connected_to_cpu)
+ dev->node_props.capability |= HSA_CAP_FLAGS_COHERENTHOSTACCESS;
+
+ kfd_queue_ctx_save_restore_size(dev);
+
kfd_debug_print_topology();
- if (!res)
- kfd_notify_gpu_change(gpu_id, 1);
-err:
- kfd_destroy_crat_image(crat_image);
- return res;
+ kfd_notify_gpu_change(gpu_id, 1);
+
+ return 0;
}
-int kfd_topology_remove_device(struct kfd_dev *gpu)
+/**
+ * kfd_topology_update_io_links() - Update IO links after device removal.
+ * @proximity_domain: Proximity domain value of the dev being removed.
+ *
+ * The topology list currently is arranged in increasing order of
+ * proximity domain.
+ *
+ * Two things need to be done when a device is removed:
+ * 1. All the IO links to this device need to be removed.
+ * 2. All nodes after the current device node need to move
+ * up once this device node is removed from the topology
+ * list. As a result, the proximity domain values for
+ * all nodes after the node being deleted reduce by 1.
+ * This would also cause the proximity domain values for
+ * io links to be updated based on new proximity domain
+ * values.
+ *
+ * Context: The caller must hold write topology_lock.
+ */
+static void kfd_topology_update_io_links(int proximity_domain)
+{
+ struct kfd_topology_device *dev;
+ struct kfd_iolink_properties *iolink, *p2plink, *tmp;
+
+ list_for_each_entry(dev, &topology_device_list, list) {
+ if (dev->proximity_domain > proximity_domain)
+ dev->proximity_domain--;
+
+ list_for_each_entry_safe(iolink, tmp, &dev->io_link_props, list) {
+ /*
+ * If there is an io link to the dev being deleted
+ * then remove that IO link also.
+ */
+ if (iolink->node_to == proximity_domain) {
+ list_del(&iolink->list);
+ dev->node_props.io_links_count--;
+ } else {
+ if (iolink->node_from > proximity_domain)
+ iolink->node_from--;
+ if (iolink->node_to > proximity_domain)
+ iolink->node_to--;
+ }
+ }
+
+ list_for_each_entry_safe(p2plink, tmp, &dev->p2p_link_props, list) {
+ /*
+ * If there is a p2p link to the dev being deleted
+ * then remove that p2p link also.
+ */
+ if (p2plink->node_to == proximity_domain) {
+ list_del(&p2plink->list);
+ dev->node_props.p2p_links_count--;
+ } else {
+ if (p2plink->node_from > proximity_domain)
+ p2plink->node_from--;
+ if (p2plink->node_to > proximity_domain)
+ p2plink->node_to--;
+ }
+ }
+ }
+}
+
+int kfd_topology_remove_device(struct kfd_node *gpu)
{
struct kfd_topology_device *dev, *tmp;
uint32_t gpu_id;
int res = -ENODEV;
+ int i = 0;
down_write(&topology_lock);
- list_for_each_entry_safe(dev, tmp, &topology_device_list, list)
+ list_for_each_entry_safe(dev, tmp, &topology_device_list, list) {
if (dev->gpu == gpu) {
gpu_id = dev->gpu_id;
kfd_remove_sysfs_node_entry(dev);
kfd_release_topology_device(dev);
sys_props.num_devices--;
+ kfd_topology_update_io_links(i);
+ topology_crat_proximity_domain = sys_props.num_devices-1;
+ sys_props.generation_count++;
res = 0;
if (kfd_topology_update_sysfs() < 0)
kfd_topology_release_sysfs();
break;
}
+ i++;
+ }
up_write(&topology_lock);
@@ -1532,7 +2297,7 @@ int kfd_topology_remove_device(struct kfd_dev *gpu)
* Return - 0: On success (@kdev will be NULL for non GPU nodes)
* -1: If end of list
*/
-int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev)
+int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_node **kdev)
{
struct kfd_topology_device *top_dev;
@@ -1567,7 +2332,7 @@ static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask)
if (first_cpu_of_numa_node >= nr_cpu_ids)
return -1;
#ifdef CONFIG_X86_64
- return cpu_data(first_cpu_of_numa_node).apicid;
+ return cpu_data(first_cpu_of_numa_node).topo.apicid;
#else
return first_cpu_of_numa_node;
#endif
@@ -1586,29 +2351,6 @@ int kfd_numa_node_to_apic_id(int numa_node_id)
return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id));
}
-void kfd_double_confirm_iommu_support(struct kfd_dev *gpu)
-{
- struct kfd_topology_device *dev;
-
- gpu->use_iommu_v2 = false;
-
- if (!gpu->device_info->needs_iommu_device)
- return;
-
- down_read(&topology_lock);
-
- /* Only use IOMMUv2 if there is an APU topology node with no GPU
- * assigned yet. This GPU will be assigned to it.
- */
- list_for_each_entry(dev, &topology_device_list, list)
- if (dev->node_props.cpu_cores_count &&
- dev->node_props.simd_count &&
- !dev->gpu)
- gpu->use_iommu_v2 = true;
-
- up_read(&topology_lock);
-}
-
#if defined(CONFIG_DEBUG_FS)
int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index a8db017c9b8e..3de8ec0043bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -23,39 +24,21 @@
#ifndef __KFD_TOPOLOGY_H__
#define __KFD_TOPOLOGY_H__
+#include <linux/dmi.h>
#include <linux/types.h>
#include <linux/list.h>
+#include <linux/kfd_sysfs.h>
#include "kfd_crat.h"
#define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 32
-#define HSA_CAP_HOT_PLUGGABLE 0x00000001
-#define HSA_CAP_ATS_PRESENT 0x00000002
-#define HSA_CAP_SHARED_WITH_GRAPHICS 0x00000004
-#define HSA_CAP_QUEUE_SIZE_POW2 0x00000008
-#define HSA_CAP_QUEUE_SIZE_32BIT 0x00000010
-#define HSA_CAP_QUEUE_IDLE_EVENT 0x00000020
-#define HSA_CAP_VA_LIMIT 0x00000040
-#define HSA_CAP_WATCH_POINTS_SUPPORTED 0x00000080
-#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK 0x00000f00
-#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8
-#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK 0x00003000
-#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT 12
-
-#define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0
-#define HSA_CAP_DOORBELL_TYPE_1_0 0x1
-#define HSA_CAP_DOORBELL_TYPE_2_0 0x2
-#define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000
-
-#define HSA_CAP_RESERVED_WAS_SRAM_EDCSUPPORTED 0x00080000 /* Old buggy user mode depends on this being 0 */
-#define HSA_CAP_MEM_EDCSUPPORTED 0x00100000
-#define HSA_CAP_RASEVENTNOTIFY 0x00200000
-#define HSA_CAP_ASIC_REVISION_MASK 0x03c00000
-#define HSA_CAP_ASIC_REVISION_SHIFT 22
-#define HSA_CAP_SRAM_EDCSUPPORTED 0x04000000
-#define HSA_CAP_SVMAPI_SUPPORTED 0x08000000
-#define HSA_CAP_FLAGS_COHERENTHOSTACCESS 0x10000000
-#define HSA_CAP_RESERVED 0xe00f8000
+#define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 6
+#define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9_4_3 7
+#define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 7
+#define HSA_DBG_WATCH_ADDR_MASK_HI_BIT \
+ (29 << HSA_DBG_WATCH_ADDR_MASK_HI_BIT_SHIFT)
+#define HSA_DBG_WATCH_ADDR_MASK_HI_BIT_GFX9_4_3 \
+ (30 << HSA_DBG_WATCH_ADDR_MASK_HI_BIT_SHIFT)
struct kfd_node_properties {
uint64_t hive_id;
@@ -64,9 +47,12 @@ struct kfd_node_properties {
uint32_t mem_banks_count;
uint32_t caches_count;
uint32_t io_links_count;
+ uint32_t p2p_links_count;
uint32_t cpu_core_id_base;
uint32_t simd_id_base;
uint32_t capability;
+ uint32_t capability2;
+ uint64_t debug_prop;
uint32_t max_waves_per_simd;
uint32_t lds_size_in_kb;
uint32_t gds_size_in_kb;
@@ -90,20 +76,13 @@ struct kfd_node_properties {
uint32_t num_sdma_xgmi_engines;
uint32_t num_sdma_queues_per_engine;
uint32_t num_cp_queues;
+ uint32_t cwsr_size;
+ uint32_t ctl_stack_size;
+ uint32_t eop_buffer_size;
+ uint32_t debug_memory_size;
char name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
};
-#define HSA_MEM_HEAP_TYPE_SYSTEM 0
-#define HSA_MEM_HEAP_TYPE_FB_PUBLIC 1
-#define HSA_MEM_HEAP_TYPE_FB_PRIVATE 2
-#define HSA_MEM_HEAP_TYPE_GPU_GDS 3
-#define HSA_MEM_HEAP_TYPE_GPU_LDS 4
-#define HSA_MEM_HEAP_TYPE_GPU_SCRATCH 5
-
-#define HSA_MEM_FLAGS_HOT_PLUGGABLE 0x00000001
-#define HSA_MEM_FLAGS_NON_VOLATILE 0x00000002
-#define HSA_MEM_FLAGS_RESERVED 0xfffffffc
-
struct kfd_mem_properties {
struct list_head list;
uint32_t heap_type;
@@ -111,16 +90,12 @@ struct kfd_mem_properties {
uint32_t flags;
uint32_t width;
uint32_t mem_clk_max;
- struct kfd_dev *gpu;
+ struct kfd_node *gpu;
struct kobject *kobj;
struct attribute attr;
};
-#define HSA_CACHE_TYPE_DATA 0x00000001
-#define HSA_CACHE_TYPE_INSTRUCTION 0x00000002
-#define HSA_CACHE_TYPE_CPU 0x00000004
-#define HSA_CACHE_TYPE_HSACU 0x00000008
-#define HSA_CACHE_TYPE_RESERVED 0xfffffff0
+#define CACHE_SIBLINGMAP_SIZE 128
struct kfd_cache_properties {
struct list_head list;
@@ -132,10 +107,11 @@ struct kfd_cache_properties {
uint32_t cache_assoc;
uint32_t cache_latency;
uint32_t cache_type;
- uint8_t sibling_map[CRAT_SIBLINGMAP_SIZE];
- struct kfd_dev *gpu;
+ uint8_t sibling_map[CACHE_SIBLINGMAP_SIZE];
+ struct kfd_node *gpu;
struct kobject *kobj;
struct attribute attr;
+ uint32_t sibling_map_size;
};
struct kfd_iolink_properties {
@@ -151,8 +127,9 @@ struct kfd_iolink_properties {
uint32_t min_bandwidth;
uint32_t max_bandwidth;
uint32_t rec_transfer_size;
+ uint32_t rec_sdma_eng_id_mask;
uint32_t flags;
- struct kfd_dev *gpu;
+ struct kfd_node *gpu;
struct kobject *kobj;
struct attribute attr;
};
@@ -170,21 +147,24 @@ struct kfd_topology_device {
uint32_t proximity_domain;
struct kfd_node_properties node_props;
struct list_head mem_props;
- uint32_t cache_count;
struct list_head cache_props;
- uint32_t io_link_count;
struct list_head io_link_props;
+ struct list_head p2p_link_props;
struct list_head perf_props;
- struct kfd_dev *gpu;
+ struct kfd_node *gpu;
struct kobject *kobj_node;
struct kobject *kobj_mem;
struct kobject *kobj_cache;
struct kobject *kobj_iolink;
+ struct kobject *kobj_p2plink;
struct kobject *kobj_perf;
struct attribute attr_gpuid;
struct attribute attr_name;
struct attribute attr_props;
- uint8_t oem_id[CRAT_OEMID_LENGTH];
+ union {
+ uint8_t oem_id[CRAT_OEMID_LENGTH];
+ uint64_t oem_id64;
+ };
uint8_t oem_table_id[CRAT_OEMTABLEID_LENGTH];
uint32_t oem_revision;
};
@@ -201,6 +181,22 @@ struct kfd_system_properties {
struct attribute attr_props;
};
+struct dmi_mem_device {
+ struct dmi_header header;
+ u16 physical_handle;
+ u16 error_handle;
+ u16 total_width;
+ u16 data_width;
+ u16 size;
+ u8 form_factor;
+ u8 device_set;
+ u8 device_locator;
+ u8 bank_locator;
+ u8 memory_type;
+ u16 type_detail;
+ u16 speed;
+} __packed;
+
struct kfd_topology_device *kfd_create_topology_device(
struct list_head *device_list);
void kfd_release_topology_device_list(struct list_head *device_list);
diff --git a/drivers/gpu/drm/amd/amdkfd/soc15_int.h b/drivers/gpu/drm/amd/amdkfd/soc15_int.h
index daf3c44547d3..e5c0205f2618 100644
--- a/drivers/gpu/drm/amd/amdkfd/soc15_int.h
+++ b/drivers/gpu/drm/amd/amdkfd/soc15_int.h
@@ -29,9 +29,11 @@
#define SOC15_INTSRC_CP_BAD_OPCODE 183
#define SOC15_INTSRC_SQ_INTERRUPT_MSG 239
#define SOC15_INTSRC_VMC_FAULT 0
+#define SOC15_INTSRC_VMC_UTCL2_POISON 1
#define SOC15_INTSRC_SDMA_TRAP 224
#define SOC15_INTSRC_SDMA_ECC 220
-
+#define SOC21_INTSRC_SDMA_TRAP 49
+#define SOC21_INTSRC_SDMA_ECC 62
#define SOC15_CLIENT_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) & 0xff)
#define SOC15_SOURCE_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 8 & 0xff)
@@ -39,6 +41,7 @@
#define SOC15_VMID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 24 & 0xf)
#define SOC15_VMID_TYPE_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 31 & 0x1)
#define SOC15_PASID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[3]) & 0xffff)
+#define SOC15_NODEID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[3]) >> 16 & 0xff)
#define SOC15_CONTEXT_ID0_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[4]))
#define SOC15_CONTEXT_ID1_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[5]))
#define SOC15_CONTEXT_ID2_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[6]))
diff --git a/drivers/gpu/drm/amd/amdxcp/Makefile b/drivers/gpu/drm/amd/amdxcp/Makefile
new file mode 100644
index 000000000000..870501a4bb8c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdxcp/Makefile
@@ -0,0 +1,25 @@
+#
+# Copyright 2023 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+
+amdxcp-y := amdgpu_xcp_drv.o
+
+obj-$(CONFIG_DRM_AMDGPU) += amdxcp.o
diff --git a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c
new file mode 100644
index 000000000000..44009aa8216e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#include <drm/drm_drv.h>
+
+#include "amdgpu_xcp_drv.h"
+
+#define MAX_XCP_PLATFORM_DEVICE 64
+
+struct xcp_device {
+ struct drm_device drm;
+ struct platform_device *pdev;
+};
+
+static const struct drm_driver amdgpu_xcp_driver = {
+ .driver_features = DRIVER_GEM | DRIVER_RENDER,
+ .name = "amdgpu_xcp_drv",
+ .major = 1,
+ .minor = 0,
+};
+
+static int8_t pdev_num;
+static struct xcp_device *xcp_dev[MAX_XCP_PLATFORM_DEVICE];
+static DEFINE_MUTEX(xcp_mutex);
+
+int amdgpu_xcp_drm_dev_alloc(struct drm_device **ddev)
+{
+ struct platform_device *pdev;
+ struct xcp_device *pxcp_dev;
+ char dev_name[20];
+ int ret, i;
+
+ guard(mutex)(&xcp_mutex);
+
+ if (pdev_num >= MAX_XCP_PLATFORM_DEVICE)
+ return -ENODEV;
+
+ for (i = 0; i < MAX_XCP_PLATFORM_DEVICE; i++) {
+ if (!xcp_dev[i])
+ break;
+ }
+
+ if (i >= MAX_XCP_PLATFORM_DEVICE)
+ return -ENODEV;
+
+ snprintf(dev_name, sizeof(dev_name), "amdgpu_xcp_%d", i);
+ pdev = platform_device_register_simple(dev_name, -1, NULL, 0);
+ if (IS_ERR(pdev))
+ return PTR_ERR(pdev);
+
+ if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto out_unregister;
+ }
+
+ pxcp_dev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_xcp_driver, struct xcp_device, drm);
+ if (IS_ERR(pxcp_dev)) {
+ ret = PTR_ERR(pxcp_dev);
+ goto out_devres;
+ }
+
+ xcp_dev[i] = pxcp_dev;
+ xcp_dev[i]->pdev = pdev;
+ *ddev = &pxcp_dev->drm;
+ pdev_num++;
+
+ return 0;
+
+out_devres:
+ devres_release_group(&pdev->dev, NULL);
+out_unregister:
+ platform_device_unregister(pdev);
+
+ return ret;
+}
+EXPORT_SYMBOL(amdgpu_xcp_drm_dev_alloc);
+
+static void free_xcp_dev(int8_t index)
+{
+ if ((index < MAX_XCP_PLATFORM_DEVICE) && (xcp_dev[index])) {
+ struct platform_device *pdev = xcp_dev[index]->pdev;
+
+ devres_release_group(&pdev->dev, NULL);
+ platform_device_unregister(pdev);
+
+ xcp_dev[index] = NULL;
+ pdev_num--;
+ }
+}
+
+void amdgpu_xcp_drm_dev_free(struct drm_device *ddev)
+{
+ int8_t i;
+
+ guard(mutex)(&xcp_mutex);
+
+ for (i = 0; i < MAX_XCP_PLATFORM_DEVICE; i++) {
+ if ((xcp_dev[i]) && (&xcp_dev[i]->drm == ddev)) {
+ free_xcp_dev(i);
+ break;
+ }
+ }
+}
+EXPORT_SYMBOL(amdgpu_xcp_drm_dev_free);
+
+void amdgpu_xcp_drv_release(void)
+{
+ int8_t i;
+
+ guard(mutex)(&xcp_mutex);
+
+ for (i = 0; pdev_num && i < MAX_XCP_PLATFORM_DEVICE; i++) {
+ free_xcp_dev(i);
+ }
+}
+EXPORT_SYMBOL(amdgpu_xcp_drv_release);
+
+static void __exit amdgpu_xcp_drv_exit(void)
+{
+ amdgpu_xcp_drv_release();
+}
+
+module_exit(amdgpu_xcp_drv_exit);
+
+MODULE_AUTHOR("AMD linux driver team");
+MODULE_DESCRIPTION("AMD XCP PLATFORM DEVICES");
+MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h
new file mode 100644
index 000000000000..580a1602c8e3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _AMDGPU_XCP_DRV_H_
+#define _AMDGPU_XCP_DRV_H_
+
+int amdgpu_xcp_drm_dev_alloc(struct drm_device **ddev);
+void amdgpu_xcp_drm_dev_free(struct drm_device *ddev);
+void amdgpu_xcp_drv_release(void);
+#endif /* _AMDGPU_XCP_DRV_H_ */
diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
index 127667e549c1..abd3b6564373 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -1,33 +1,39 @@
# SPDX-License-Identifier: MIT
+# Copyright © 2019-2024 Advanced Micro Devices, Inc. All rights reserved.
+
menu "Display Engine Configuration"
depends on DRM && DRM_AMDGPU
config DRM_AMD_DC
bool "AMD DC - Enable new display engine"
default y
+ depends on BROKEN || !CC_IS_CLANG || ARM64 || LOONGARCH || RISCV || SPARC64 || X86_64
+ select CEC_CORE
+ select CEC_NOTIFIER
select SND_HDA_COMPONENT if SND_HDA_CORE
- select DRM_AMD_DC_DCN if (X86 || PPC64) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS)
+ # !CC_IS_CLANG: https://github.com/ClangBuiltLinux/linux/issues/1752
+ select DRM_AMD_DC_FP if ARCH_HAS_KERNEL_FPU_SUPPORT && !(CC_IS_CLANG && (ARM64 || LOONGARCH || RISCV))
help
Choose this option if you want to use the new display engine
support for AMDGPU. This adds required support for Vega and
Raven ASICs.
-config DRM_AMD_DC_DCN
- def_bool n
- help
- Raven, Navi, and newer family support for display engine
+ calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 ||
+ ARM64 || LOONGARCH || RISCV) architectures built with Clang (all released
+ versions), whereby the stack frame gets blown up to well over 5k. This
+ would cause an immediate kernel panic on most architectures. We'll revert
+ this when the following bug report has been resolved:
+ https://github.com/llvm/llvm-project/issues/41896.
-config DRM_AMD_DC_HDCP
- bool "Enable HDCP support in DC"
- depends on DRM_AMD_DC
+config DRM_AMD_DC_FP
+ def_bool n
help
- Choose this option if you want to support HDCP authentication.
+ Floating point support, required for DCN-based SoCs
config DRM_AMD_DC_SI
bool "AMD DC support for Southern Islands ASICs"
depends on DRM_AMDGPU_SI
depends on DRM_AMD_DC
- default n
help
Choose this option to enable new AMD DC support for SI asics
by default. This includes Tahiti, Pitcairn, Cape Verde, Oland.
@@ -41,17 +47,13 @@ config DEBUG_KERNEL_DC
Choose this option if you want to hit kdgb_break in assert.
config DRM_AMD_SECURE_DISPLAY
- bool "Enable secure display support"
- default n
- depends on DEBUG_FS
- depends on DRM_AMD_DC_DCN
- help
- Choose this option if you want to
- support secure display
-
- This option enables the calculation
- of crc of specific region via debugfs.
- Cooperate with specific DMCU FW.
+ bool "Enable secure display support"
+ depends on DEBUG_FS
+ depends on DRM_AMD_DC_FP
+ help
+ Choose this option if you want to support secure display
+ This option enables the calculation of crc of specific region via
+ debugfs. Cooperate with specific DMCU FW.
endmenu
diff --git a/drivers/gpu/drm/amd/display/Makefile b/drivers/gpu/drm/amd/display/Makefile
index 2633de77de5e..0084a8d55254 100644
--- a/drivers/gpu/drm/amd/display/Makefile
+++ b/drivers/gpu/drm/amd/display/Makefile
@@ -29,6 +29,22 @@ AMDDALPATH = $(RELATIVE_AMD_DISPLAY_PATH)
subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/inc/
subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/inc/hw
subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/clk_mgr
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/hwss
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/resource
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dsc
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/optc
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dpp
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/hubbub
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dccg
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/hubp
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dio
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dwb
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/hpo
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/mmhubbub
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/mpc
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/opp
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/pg
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/soc_and_ip_translator
subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/inc
subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/freesync
subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/color
@@ -36,18 +52,14 @@ subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/info_packet
subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/power
subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dmub/inc
-ifdef CONFIG_DRM_AMD_DC_HDCP
subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/hdcp
-endif
#TODO: remove when Timing Sync feature is complete
subdir-ccflags-y += -DBUILD_FEATURE_TIMING_SYNC=0
DAL_LIBS = amdgpu_dm dc modules/freesync modules/color modules/info_packet modules/power dmub/src
-ifdef CONFIG_DRM_AMD_DC_HDCP
DAL_LIBS += modules/hdcp
-endif
AMD_DAL = $(addsuffix /Makefile, $(addprefix $(FULL_AMD_DISPLAY_PATH)/,$(DAL_LIBS)))
diff --git a/drivers/gpu/drm/amd/display/TODO b/drivers/gpu/drm/amd/display/TODO
deleted file mode 100644
index a8a6c106e8c7..000000000000
--- a/drivers/gpu/drm/amd/display/TODO
+++ /dev/null
@@ -1,110 +0,0 @@
-===============================================================================
-TODOs
-===============================================================================
-
-1. Base this on drm-next - WIP
-
-
-2. Cleanup commit history
-
-
-3. WIP - Drop page flip helper and use DRM's version
-
-
-4. DONE - Flatten all DC objects
- * dc_stream/core_stream/stream should just be dc_stream
- * Same for other DC objects
-
- "Is there any major reason to keep all those abstractions?
-
- Could you collapse everything into struct dc_stream?
-
- I haven't looked recently but I didn't get the impression there was a
- lot of design around what was public/protected, more whatever needed
- to be used by someone else was in public."
- ~ Dave Airlie
-
-
-5. DONE - Rename DC objects to align more with DRM
- * dc_surface -> dc_plane_state
- * dc_stream -> dc_stream_state
-
-
-6. DONE - Per-plane and per-stream validation
-
-
-7. WIP - Per-plane and per-stream commit
-
-
-8. WIP - Split pipe_ctx into plane and stream resource structs
-
-
-9. Attach plane and stream reources to state object instead of validate_context
-
-
-10. Remove dc_edid_caps and drm_helpers_parse_edid_caps
- * Use drm_display_info instead
- * Remove DC's edid quirks and rely on DRM's quirks (add quirks if needed)
-
- "Making sure you use the sink-specific helper libraries and kernel
- subsystems, since there's really no good reason to have 2nd
- implementation of those in the kernel. Looks likes that's done for mst
- and edid parsing. There's still a bit a midlayer feeling to the edid
- parsing side (e.g. dc_edid_caps and dm_helpers_parse_edid_caps, I
- think it'd be much better if you convert that over to reading stuff
- from drm_display_info and if needed, push stuff into the core). Also,
- I can't come up with a good reason why DC needs all this (except to
- reimplement half of our edid quirk table, which really isn't a good
- idea). Might be good if you put this onto the list of things to fix
- long-term, but imo not a blocker. Definitely make sure new stuff
- doesn't slip in (i.e. if you start adding edid quirks to DC instead of
- the drm core, refactoring to use the core edid stuff was pointless)."
- ~ Daniel Vetter
-
-
-11. Remove dc/i2caux. This folder can be somewhat misleading. It's basically an
-overy complicated HW programming function for sendind and receiving i2c/aux
-commands. We can greatly simplify that and move it into dc/dceXYZ like other
-HW blocks.
-
-12. drm_modeset_lock in MST should no longer be needed in recent kernels
- * Adopt appropriate locking scheme
-
-13. get_modes and best_encoder callbacks look a bit funny. Can probably rip out
-a few indirections, and consider removing entirely and using the
-drm_atomic_helper_best_encoder default behaviour.
-
-14. core/dc_debug.c, consider switching to the atomic state debug helpers and
-moving all your driver state printing into the various atomic_print_state
-callbacks. There's also plans to expose this stuff in a standard way across all
-drivers, to make debugging userspace compositors easier across different hw.
-
-15. Move DP/HDMI dual mode adaptors to drm_dp_dual_mode_helper.c. See
-dal_ddc_service_i2c_query_dp_dual_mode_adaptor.
-
-16. Move to core SCDC helpers (I think those are new since initial DC review).
-
-17. There's still a pretty massive layer cake around dp aux and DPCD handling,
-with like 3 levels of abstraction and using your own structures instead of the
-stuff in drm_dp_helper.h. drm_dp_helper.h isn't really great and already has 2
-incompatible styles, just means more reasons not to add a third (or well third
-one gets to do the cleanup refactor).
-
-18. There's a pile of sink handling code, both for DP and HDMI where I didn't
-immediately recognize the standard. I think long term it'd be best for the drm
-subsystem if we try to move as much of that into helpers/core as possible, and
-share it with drivers. But that's a very long term goal, and by far not just an
-issue with DC - other drivers, especially around DP sink handling, are equally
-guilty.
-
-19. DONE - The DC logger is still a rather sore thing, but I know that the
-DRM_DEBUG stuff just isn't up to the challenges either. We need to figure out
-something that integrates better with DRM and linux debug printing, while not
-being useless with filtering output. dynamic debug printing might be an option.
-
-20. Use kernel i2c device to program HDMI retimer. Some boards have an HDMI
-retimer that we need to program to pass PHY compliance. Currently that's
-bypassing the i2c device and goes directly to HW. This should be changed.
-
-21. Remove vector.c from dc/basics. It's used in DDC code which can probably
-be simplified enough to no longer need a vector implementation.
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
index 718e123a3230..8e949fe77312 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
@@ -25,19 +25,28 @@
-AMDGPUDM = amdgpu_dm.o amdgpu_dm_irq.o amdgpu_dm_mst_types.o amdgpu_dm_color.o
-
-ifdef CONFIG_DRM_AMD_DC_DCN
-AMDGPUDM += dc_fpu.o
-endif
-
ifneq ($(CONFIG_DRM_AMD_DC),)
-AMDGPUDM += amdgpu_dm_services.o amdgpu_dm_helpers.o amdgpu_dm_pp_smu.o amdgpu_dm_psr.o
+AMDGPUDM = \
+ amdgpu_dm.o \
+ amdgpu_dm_plane.o \
+ amdgpu_dm_crtc.o \
+ amdgpu_dm_irq.o \
+ amdgpu_dm_mst_types.o \
+ amdgpu_dm_color.o \
+ amdgpu_dm_services.o \
+ amdgpu_dm_helpers.o \
+ amdgpu_dm_pp_smu.o \
+ amdgpu_dm_psr.o \
+ amdgpu_dm_replay.o \
+ amdgpu_dm_quirks.o \
+ amdgpu_dm_wb.o \
+ amdgpu_dm_colorop.o
+
+ifdef CONFIG_DRM_AMD_DC_FP
+AMDGPUDM += dc_fpu.o
endif
-ifdef CONFIG_DRM_AMD_DC_HDCP
AMDGPUDM += amdgpu_dm_hdcp.o
-endif
ifneq ($(CONFIG_DEBUG_FS),)
AMDGPUDM += amdgpu_dm_crc.o amdgpu_dm_debugfs.o
@@ -48,3 +57,4 @@ subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc
AMDGPU_DM = $(addprefix $(AMDDALPATH)/amdgpu_dm/,$(AMDGPUDM))
AMD_DISPLAY_FILES += $(AMDGPU_DM)
+endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index e727f1dd2a9a..740711ac1037 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
@@ -28,7 +29,6 @@
#include "dm_services_types.h"
#include "dc.h"
-#include "dc_link_dp.h"
#include "link_enc_cfg.h"
#include "dc/inc/core_types.h"
#include "dal_asic_id.h"
@@ -38,19 +38,23 @@
#include "dc/dc_dmub_srv.h"
#include "dc/dc_edid_parser.h"
#include "dc/dc_stat.h"
+#include "dc/dc_state.h"
#include "amdgpu_dm_trace.h"
+#include "link/protocols/link_dpcd.h"
+#include "link_service_types.h"
+#include "link/protocols/link_dp_capability.h"
+#include "link/protocols/link_ddc.h"
-#include "vid.h"
#include "amdgpu.h"
#include "amdgpu_display.h"
#include "amdgpu_ucode.h"
#include "atom.h"
#include "amdgpu_dm.h"
-#ifdef CONFIG_DRM_AMD_DC_HDCP
+#include "amdgpu_dm_plane.h"
+#include "amdgpu_dm_crtc.h"
#include "amdgpu_dm_hdcp.h"
-#include <drm/drm_hdcp.h>
-#endif
-#include "amdgpu_pm.h"
+#include <drm/display/drm_hdcp_helper.h>
+#include "amdgpu_dm_wb.h"
#include "amdgpu_atombios.h"
#include "amd_shared.h"
@@ -61,42 +65,46 @@
#include "amdgpu_dm_debugfs.h"
#endif
#include "amdgpu_dm_psr.h"
+#include "amdgpu_dm_replay.h"
#include "ivsrcid/ivsrcid_vislands30.h"
-#include "i2caux_interface.h"
+#include <linux/backlight.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/types.h>
#include <linux/pm_runtime.h>
#include <linux/pci.h>
+#include <linux/power_supply.h>
#include <linux/firmware.h>
#include <linux/component.h>
+#include <linux/sort.h>
+#include <drm/drm_privacy_screen_consumer.h>
+#include <drm/display/drm_dp_mst_helper.h>
+#include <drm/display/drm_hdmi_helper.h>
#include <drm/drm_atomic.h>
#include <drm/drm_atomic_uapi.h>
#include <drm/drm_atomic_helper.h>
-#include <drm/drm_dp_mst_helper.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_blend.h>
+#include <drm/drm_fixed.h>
#include <drm/drm_fourcc.h>
#include <drm/drm_edid.h>
+#include <drm/drm_eld.h>
+#include <drm/drm_utils.h>
#include <drm/drm_vblank.h>
#include <drm/drm_audio_component.h>
+#include <drm/drm_gem_atomic_helper.h>
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-#include "ivsrcid/dcn/irqsrcs_dcn_1_0.h"
-
-#include "dcn/dcn_1_0_offset.h"
-#include "dcn/dcn_1_0_sh_mask.h"
-#include "soc15_hw_ip.h"
-#include "vega10_ip_offset.h"
+#include <media/cec-notifier.h>
+#include <acpi/video.h>
-#include "soc15_common.h"
-#endif
+#include "ivsrcid/dcn/irqsrcs_dcn_1_0.h"
#include "modules/inc/mod_freesync.h"
#include "modules/power/power_helpers.h"
-#include "modules/inc/mod_info_packet.h"
+
+static_assert(AMDGPU_DMUB_NOTIFICATION_MAX == DMUB_NOTIFICATION_MAX, "AMDGPU_DMUB_NOTIFICATION_MAX mismatch");
#define FIRMWARE_RENOIR_DMUB "amdgpu/renoir_dmcub.bin"
MODULE_FIRMWARE(FIRMWARE_RENOIR_DMUB);
@@ -114,6 +122,17 @@ MODULE_FIRMWARE(FIRMWARE_DIMGREY_CAVEFISH_DMUB);
MODULE_FIRMWARE(FIRMWARE_BEIGE_GOBY_DMUB);
#define FIRMWARE_YELLOW_CARP_DMUB "amdgpu/yellow_carp_dmcub.bin"
MODULE_FIRMWARE(FIRMWARE_YELLOW_CARP_DMUB);
+#define FIRMWARE_DCN_314_DMUB "amdgpu/dcn_3_1_4_dmcub.bin"
+MODULE_FIRMWARE(FIRMWARE_DCN_314_DMUB);
+#define FIRMWARE_DCN_315_DMUB "amdgpu/dcn_3_1_5_dmcub.bin"
+MODULE_FIRMWARE(FIRMWARE_DCN_315_DMUB);
+#define FIRMWARE_DCN316_DMUB "amdgpu/dcn_3_1_6_dmcub.bin"
+MODULE_FIRMWARE(FIRMWARE_DCN316_DMUB);
+
+#define FIRMWARE_DCN_V3_2_0_DMCUB "amdgpu/dcn_3_2_0_dmcub.bin"
+MODULE_FIRMWARE(FIRMWARE_DCN_V3_2_0_DMCUB);
+#define FIRMWARE_DCN_V3_2_1_DMCUB "amdgpu/dcn_3_2_1_dmcub.bin"
+MODULE_FIRMWARE(FIRMWARE_DCN_V3_2_1_DMCUB);
#define FIRMWARE_RAVEN_DMCU "amdgpu/raven_dmcu.bin"
MODULE_FIRMWARE(FIRMWARE_RAVEN_DMCU);
@@ -121,6 +140,18 @@ MODULE_FIRMWARE(FIRMWARE_RAVEN_DMCU);
#define FIRMWARE_NAVI12_DMCU "amdgpu/navi12_dmcu.bin"
MODULE_FIRMWARE(FIRMWARE_NAVI12_DMCU);
+#define FIRMWARE_DCN_35_DMUB "amdgpu/dcn_3_5_dmcub.bin"
+MODULE_FIRMWARE(FIRMWARE_DCN_35_DMUB);
+
+#define FIRMWARE_DCN_351_DMUB "amdgpu/dcn_3_5_1_dmcub.bin"
+MODULE_FIRMWARE(FIRMWARE_DCN_351_DMUB);
+
+#define FIRMWARE_DCN_36_DMUB "amdgpu/dcn_3_6_dmcub.bin"
+MODULE_FIRMWARE(FIRMWARE_DCN_36_DMUB);
+
+#define FIRMWARE_DCN_401_DMUB "amdgpu/dcn_4_0_1_dmcub.bin"
+MODULE_FIRMWARE(FIRMWARE_DCN_401_DMUB);
+
/* Number of bytes in PSP header for firmware. */
#define PSP_HEADER_BYTES 0x100
@@ -141,6 +172,9 @@ MODULE_FIRMWARE(FIRMWARE_NAVI12_DMCU);
static int amdgpu_dm_init(struct amdgpu_device *adev);
static void amdgpu_dm_fini(struct amdgpu_device *adev);
static bool is_freesync_video_mode(const struct drm_display_mode *mode, struct amdgpu_dm_connector *aconnector);
+static void reset_freesync_config_for_crtc(struct dm_crtc_state *new_crtc_state);
+static struct amdgpu_i2c_adapter *
+create_i2c(struct ddc_service *ddc_service, bool oem);
static enum drm_mode_subconnector get_subconnector_type(struct dc_link *link)
{
@@ -189,16 +223,9 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev);
/* removes and deallocates the drm structures, created by the above function */
static void amdgpu_dm_destroy_drm_device(struct amdgpu_display_manager *dm);
-static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
- struct drm_plane *plane,
- unsigned long possible_crtcs,
- const struct dc_plane_cap *plane_cap);
-static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
- struct drm_plane *plane,
- uint32_t link_index);
static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm,
struct amdgpu_dm_connector *amdgpu_dm_connector,
- uint32_t link_index,
+ u32 link_index,
struct amdgpu_encoder *amdgpu_encoder);
static int amdgpu_dm_encoder_init(struct drm_device *dev,
struct amdgpu_encoder *aencoder,
@@ -206,20 +233,19 @@ static int amdgpu_dm_encoder_init(struct drm_device *dev,
static int amdgpu_dm_connector_get_modes(struct drm_connector *connector);
+static int amdgpu_dm_atomic_setup_commit(struct drm_atomic_state *state);
static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state);
static int amdgpu_dm_atomic_check(struct drm_device *dev,
struct drm_atomic_state *state);
-static void handle_cursor_update(struct drm_plane *plane,
- struct drm_plane_state *old_plane_state);
-
-static const struct drm_format_info *
-amd_get_format_info(const struct drm_mode_fb_cmd2 *cmd);
-
static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector);
static void handle_hpd_rx_irq(void *param);
+static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm,
+ int bl_idx,
+ u32 user_brightness);
+
static bool
is_timing_unchanged_for_freesync(struct drm_crtc_state *old_crtc_state,
struct drm_crtc_state *new_crtc_state);
@@ -238,72 +264,77 @@ is_timing_unchanged_for_freesync(struct drm_crtc_state *old_crtc_state,
*/
static u32 dm_vblank_get_counter(struct amdgpu_device *adev, int crtc)
{
+ struct amdgpu_crtc *acrtc = NULL;
+
if (crtc >= adev->mode_info.num_crtc)
return 0;
- else {
- struct amdgpu_crtc *acrtc = adev->mode_info.crtcs[crtc];
- if (acrtc->dm_irq_params.stream == NULL) {
- DRM_ERROR("dc_stream_state is NULL for crtc '%d'!\n",
- crtc);
- return 0;
- }
+ acrtc = adev->mode_info.crtcs[crtc];
- return dc_stream_get_vblank_counter(acrtc->dm_irq_params.stream);
+ if (!acrtc->dm_irq_params.stream) {
+ drm_err(adev_to_drm(adev), "dc_stream_state is NULL for crtc '%d'!\n",
+ crtc);
+ return 0;
}
+
+ return dc_stream_get_vblank_counter(acrtc->dm_irq_params.stream);
}
static int dm_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
u32 *vbl, u32 *position)
{
- uint32_t v_blank_start, v_blank_end, h_position, v_position;
+ u32 v_blank_start = 0, v_blank_end = 0, h_position = 0, v_position = 0;
+ struct amdgpu_crtc *acrtc = NULL;
+ struct dc *dc = adev->dm.dc;
if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc))
return -EINVAL;
- else {
- struct amdgpu_crtc *acrtc = adev->mode_info.crtcs[crtc];
-
- if (acrtc->dm_irq_params.stream == NULL) {
- DRM_ERROR("dc_stream_state is NULL for crtc '%d'!\n",
- crtc);
- return 0;
- }
- /*
- * TODO rework base driver to use values directly.
- * for now parse it back into reg-format
- */
- dc_stream_get_scanoutpos(acrtc->dm_irq_params.stream,
- &v_blank_start,
- &v_blank_end,
- &h_position,
- &v_position);
+ acrtc = adev->mode_info.crtcs[crtc];
- *position = v_position | (h_position << 16);
- *vbl = v_blank_start | (v_blank_end << 16);
+ if (!acrtc->dm_irq_params.stream) {
+ drm_err(adev_to_drm(adev), "dc_stream_state is NULL for crtc '%d'!\n",
+ crtc);
+ return 0;
}
+ if (dc && dc->caps.ips_support && dc->idle_optimizations_allowed)
+ dc_allow_idle_optimizations(dc, false);
+
+ /*
+ * TODO rework base driver to use values directly.
+ * for now parse it back into reg-format
+ */
+ dc_stream_get_scanoutpos(acrtc->dm_irq_params.stream,
+ &v_blank_start,
+ &v_blank_end,
+ &h_position,
+ &v_position);
+
+ *position = v_position | (h_position << 16);
+ *vbl = v_blank_start | (v_blank_end << 16);
+
return 0;
}
-static bool dm_is_idle(void *handle)
+static bool dm_is_idle(struct amdgpu_ip_block *ip_block)
{
/* XXX todo */
return true;
}
-static int dm_wait_for_idle(void *handle)
+static int dm_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* XXX todo */
return 0;
}
-static bool dm_check_soft_reset(void *handle)
+static bool dm_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
return false;
}
-static int dm_soft_reset(void *handle)
+static int dm_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* XXX todo */
return 0;
@@ -330,31 +361,72 @@ get_crtc_by_otg_inst(struct amdgpu_device *adev,
return NULL;
}
-static inline bool amdgpu_dm_vrr_active_irq(struct amdgpu_crtc *acrtc)
-{
- return acrtc->dm_irq_params.freesync_config.state ==
- VRR_STATE_ACTIVE_VARIABLE ||
- acrtc->dm_irq_params.freesync_config.state ==
- VRR_STATE_ACTIVE_FIXED;
-}
-
-static inline bool amdgpu_dm_vrr_active(struct dm_crtc_state *dm_state)
-{
- return dm_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE ||
- dm_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED;
-}
-
static inline bool is_dc_timing_adjust_needed(struct dm_crtc_state *old_state,
struct dm_crtc_state *new_state)
{
+ if (new_state->stream->adjust.timing_adjust_pending)
+ return true;
if (new_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED)
return true;
- else if (amdgpu_dm_vrr_active(old_state) != amdgpu_dm_vrr_active(new_state))
+ else if (amdgpu_dm_crtc_vrr_active(old_state) != amdgpu_dm_crtc_vrr_active(new_state))
return true;
else
return false;
}
+/*
+ * DC will program planes with their z-order determined by their ordering
+ * in the dc_surface_updates array. This comparator is used to sort them
+ * by descending zpos.
+ */
+static int dm_plane_layer_index_cmp(const void *a, const void *b)
+{
+ const struct dc_surface_update *sa = (struct dc_surface_update *)a;
+ const struct dc_surface_update *sb = (struct dc_surface_update *)b;
+
+ /* Sort by descending dc_plane layer_index (i.e. normalized_zpos) */
+ return sb->surface->layer_index - sa->surface->layer_index;
+}
+
+/**
+ * update_planes_and_stream_adapter() - Send planes to be updated in DC
+ *
+ * DC has a generic way to update planes and stream via
+ * dc_update_planes_and_stream function; however, DM might need some
+ * adjustments and preparation before calling it. This function is a wrapper
+ * for the dc_update_planes_and_stream that does any required configuration
+ * before passing control to DC.
+ *
+ * @dc: Display Core control structure
+ * @update_type: specify whether it is FULL/MEDIUM/FAST update
+ * @planes_count: planes count to update
+ * @stream: stream state
+ * @stream_update: stream update
+ * @array_of_surface_update: dc surface update pointer
+ *
+ */
+static inline bool update_planes_and_stream_adapter(struct dc *dc,
+ int update_type,
+ int planes_count,
+ struct dc_stream_state *stream,
+ struct dc_stream_update *stream_update,
+ struct dc_surface_update *array_of_surface_update)
+{
+ sort(array_of_surface_update, planes_count,
+ sizeof(*array_of_surface_update), dm_plane_layer_index_cmp, NULL);
+
+ /*
+ * Previous frame finished and HW is ready for optimization.
+ */
+ dc_post_update_surfaces_to_stream(dc);
+
+ return dc_update_planes_and_stream(dc,
+ array_of_surface_update,
+ planes_count,
+ stream,
+ stream_update);
+}
+
/**
* dm_pflip_high_irq() - Handle pageflip interrupt
* @interrupt_params: ignored
@@ -367,9 +439,10 @@ static void dm_pflip_high_irq(void *interrupt_params)
struct amdgpu_crtc *amdgpu_crtc;
struct common_irq_params *irq_params = interrupt_params;
struct amdgpu_device *adev = irq_params->adev;
+ struct drm_device *dev = adev_to_drm(adev);
unsigned long flags;
struct drm_pending_vblank_event *e;
- uint32_t vpos, hpos, v_blank_start, v_blank_end;
+ u32 vpos, hpos, v_blank_start, v_blank_end;
bool vrr_active;
amdgpu_crtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - IRQ_TYPE_PFLIP);
@@ -377,18 +450,17 @@ static void dm_pflip_high_irq(void *interrupt_params)
/* IRQ could occur when in initial stage */
/* TODO work and BO cleanup */
if (amdgpu_crtc == NULL) {
- DC_LOG_PFLIP("CRTC is null, returning.\n");
+ drm_dbg_state(dev, "CRTC is null, returning.\n");
return;
}
spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
- if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){
- DC_LOG_PFLIP("amdgpu_crtc->pflip_status = %d !=AMDGPU_FLIP_SUBMITTED(%d) on crtc:%d[%p] \n",
- amdgpu_crtc->pflip_status,
- AMDGPU_FLIP_SUBMITTED,
- amdgpu_crtc->crtc_id,
- amdgpu_crtc);
+ if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) {
+ drm_dbg_state(dev,
+ "amdgpu_crtc->pflip_status = %d != AMDGPU_FLIP_SUBMITTED(%d) on crtc:%d[%p]\n",
+ amdgpu_crtc->pflip_status, AMDGPU_FLIP_SUBMITTED,
+ amdgpu_crtc->crtc_id, amdgpu_crtc);
spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
return;
}
@@ -399,7 +471,7 @@ static void dm_pflip_high_irq(void *interrupt_params)
WARN_ON(!e);
- vrr_active = amdgpu_dm_vrr_active_irq(amdgpu_crtc);
+ vrr_active = amdgpu_dm_crtc_vrr_active_irq(amdgpu_crtc);
/* Fixed refresh rate, or VRR scanout position outside front-porch? */
if (!vrr_active ||
@@ -454,9 +526,53 @@ static void dm_pflip_high_irq(void *interrupt_params)
amdgpu_crtc->pflip_status = AMDGPU_FLIP_NONE;
spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
- DC_LOG_PFLIP("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_NONE, vrr[%d]-fp %d\n",
- amdgpu_crtc->crtc_id, amdgpu_crtc,
- vrr_active, (int) !e);
+ drm_dbg_state(dev,
+ "crtc:%d[%p], pflip_stat:AMDGPU_FLIP_NONE, vrr[%d]-fp %d\n",
+ amdgpu_crtc->crtc_id, amdgpu_crtc, vrr_active, (int)!e);
+}
+
+static void dm_handle_vmin_vmax_update(struct work_struct *offload_work)
+{
+ struct vupdate_offload_work *work = container_of(offload_work, struct vupdate_offload_work, work);
+ struct amdgpu_device *adev = work->adev;
+ struct dc_stream_state *stream = work->stream;
+ struct dc_crtc_timing_adjust *adjust = work->adjust;
+
+ mutex_lock(&adev->dm.dc_lock);
+ dc_stream_adjust_vmin_vmax(adev->dm.dc, stream, adjust);
+ mutex_unlock(&adev->dm.dc_lock);
+
+ dc_stream_release(stream);
+ kfree(work->adjust);
+ kfree(work);
+}
+
+static void schedule_dc_vmin_vmax(struct amdgpu_device *adev,
+ struct dc_stream_state *stream,
+ struct dc_crtc_timing_adjust *adjust)
+{
+ struct vupdate_offload_work *offload_work = kzalloc(sizeof(*offload_work), GFP_NOWAIT);
+ if (!offload_work) {
+ drm_dbg_driver(adev_to_drm(adev), "Failed to allocate vupdate_offload_work\n");
+ return;
+ }
+
+ struct dc_crtc_timing_adjust *adjust_copy = kzalloc(sizeof(*adjust_copy), GFP_NOWAIT);
+ if (!adjust_copy) {
+ drm_dbg_driver(adev_to_drm(adev), "Failed to allocate adjust_copy\n");
+ kfree(offload_work);
+ return;
+ }
+
+ dc_stream_retain(stream);
+ memcpy(adjust_copy, adjust, sizeof(*adjust_copy));
+
+ INIT_WORK(&offload_work->work, dm_handle_vmin_vmax_update);
+ offload_work->adev = adev;
+ offload_work->stream = stream;
+ offload_work->adjust = adjust_copy;
+
+ queue_work(system_wq, &offload_work->work);
}
static void dm_vupdate_high_irq(void *interrupt_params)
@@ -473,9 +589,9 @@ static void dm_vupdate_high_irq(void *interrupt_params)
acrtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - IRQ_TYPE_VUPDATE);
if (acrtc) {
- vrr_active = amdgpu_dm_vrr_active_irq(acrtc);
+ vrr_active = amdgpu_dm_crtc_vrr_active_irq(acrtc);
drm_dev = acrtc->base.dev;
- vblank = &drm_dev->vblank[acrtc->base.index];
+ vblank = drm_crtc_vblank_crtc(&acrtc->base);
previous_timestamp = atomic64_read(&irq_params->previous_timestamp);
frame_duration_ns = vblank->time - previous_timestamp;
@@ -486,9 +602,9 @@ static void dm_vupdate_high_irq(void *interrupt_params)
atomic64_set(&irq_params->previous_timestamp, vblank->time);
}
- DC_LOG_VBLANK("crtc:%d, vupdate-vrr:%d\n",
- acrtc->crtc_id,
- vrr_active);
+ drm_dbg_vbl(drm_dev,
+ "crtc:%d, vupdate-vrr:%d\n", acrtc->crtc_id,
+ vrr_active);
/* Core vblank handling is done here after end of front-porch in
* vrr mode, as vblank timestamping will give valid results
@@ -496,22 +612,27 @@ static void dm_vupdate_high_irq(void *interrupt_params)
* page-flip completion events that have been queued to us
* if a pageflip happened inside front-porch.
*/
- if (vrr_active) {
- drm_crtc_handle_vblank(&acrtc->base);
+ if (vrr_active && acrtc->dm_irq_params.stream) {
+ bool replay_en = acrtc->dm_irq_params.stream->link->replay_settings.replay_feature_enabled;
+ bool psr_en = acrtc->dm_irq_params.stream->link->psr_settings.psr_feature_enabled;
+ bool fs_active_var_en = acrtc->dm_irq_params.freesync_config.state
+ == VRR_STATE_ACTIVE_VARIABLE;
+
+ amdgpu_dm_crtc_handle_vblank(acrtc);
/* BTR processing for pre-DCE12 ASICs */
- if (acrtc->dm_irq_params.stream &&
- adev->family < AMDGPU_FAMILY_AI) {
+ if (adev->family < AMDGPU_FAMILY_AI) {
spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
mod_freesync_handle_v_update(
adev->dm.freesync_module,
acrtc->dm_irq_params.stream,
&acrtc->dm_irq_params.vrr_params);
- dc_stream_adjust_vmin_vmax(
- adev->dm.dc,
- acrtc->dm_irq_params.stream,
- &acrtc->dm_irq_params.vrr_params.adjust);
+ if (fs_active_var_en || (!fs_active_var_en && !replay_en && !psr_en)) {
+ schedule_dc_vmin_vmax(adev,
+ acrtc->dm_irq_params.stream,
+ &acrtc->dm_irq_params.vrr_params.adjust);
+ }
spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
}
}
@@ -529,6 +650,7 @@ static void dm_crtc_high_irq(void *interrupt_params)
{
struct common_irq_params *irq_params = interrupt_params;
struct amdgpu_device *adev = irq_params->adev;
+ struct drm_writeback_job *job;
struct amdgpu_crtc *acrtc;
unsigned long flags;
int vrr_active;
@@ -537,10 +659,39 @@ static void dm_crtc_high_irq(void *interrupt_params)
if (!acrtc)
return;
- vrr_active = amdgpu_dm_vrr_active_irq(acrtc);
+ if (acrtc->wb_conn) {
+ spin_lock_irqsave(&acrtc->wb_conn->job_lock, flags);
+
+ if (acrtc->wb_pending) {
+ job = list_first_entry_or_null(&acrtc->wb_conn->job_queue,
+ struct drm_writeback_job,
+ list_entry);
+ acrtc->wb_pending = false;
+ spin_unlock_irqrestore(&acrtc->wb_conn->job_lock, flags);
+
+ if (job) {
+ unsigned int v_total, refresh_hz;
+ struct dc_stream_state *stream = acrtc->dm_irq_params.stream;
+
+ v_total = stream->adjust.v_total_max ?
+ stream->adjust.v_total_max : stream->timing.v_total;
+ refresh_hz = div_u64((uint64_t) stream->timing.pix_clk_100hz *
+ 100LL, (v_total * stream->timing.h_total));
+ mdelay(1000 / refresh_hz);
+
+ drm_writeback_signal_completion(acrtc->wb_conn, 0);
+ dc_stream_fc_disable_writeback(adev->dm.dc,
+ acrtc->dm_irq_params.stream, 0);
+ }
+ } else
+ spin_unlock_irqrestore(&acrtc->wb_conn->job_lock, flags);
+ }
+
+ vrr_active = amdgpu_dm_crtc_vrr_active_irq(acrtc);
- DC_LOG_VBLANK("crtc:%d, vupdate-vrr:%d, planes:%d\n", acrtc->crtc_id,
- vrr_active, acrtc->dm_irq_params.active_planes);
+ drm_dbg_vbl(adev_to_drm(adev),
+ "crtc:%d, vupdate-vrr:%d, planes:%d\n", acrtc->crtc_id,
+ vrr_active, acrtc->dm_irq_params.active_planes);
/**
* Core vblank handling at start of front-porch is only possible
@@ -549,7 +700,7 @@ static void dm_crtc_high_irq(void *interrupt_params)
* to dm_vupdate_high_irq after end of front-porch.
*/
if (!vrr_active)
- drm_crtc_handle_vblank(&acrtc->base);
+ amdgpu_dm_crtc_handle_vblank(acrtc);
/**
* Following stuff must happen at start of vblank, for crc
@@ -564,15 +715,20 @@ static void dm_crtc_high_irq(void *interrupt_params)
spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
if (acrtc->dm_irq_params.stream &&
- acrtc->dm_irq_params.vrr_params.supported &&
- acrtc->dm_irq_params.freesync_config.state ==
- VRR_STATE_ACTIVE_VARIABLE) {
+ acrtc->dm_irq_params.vrr_params.supported) {
+ bool replay_en = acrtc->dm_irq_params.stream->link->replay_settings.replay_feature_enabled;
+ bool psr_en = acrtc->dm_irq_params.stream->link->psr_settings.psr_feature_enabled;
+ bool fs_active_var_en = acrtc->dm_irq_params.freesync_config.state == VRR_STATE_ACTIVE_VARIABLE;
+
mod_freesync_handle_v_update(adev->dm.freesync_module,
acrtc->dm_irq_params.stream,
&acrtc->dm_irq_params.vrr_params);
- dc_stream_adjust_vmin_vmax(adev->dm.dc, acrtc->dm_irq_params.stream,
- &acrtc->dm_irq_params.vrr_params.adjust);
+ /* update vmin_vmax only if freesync is enabled, or only if PSR and REPLAY are disabled */
+ if (fs_active_var_en || (!fs_active_var_en && !replay_en && !psr_en)) {
+ schedule_dc_vmin_vmax(adev, acrtc->dm_irq_params.stream,
+ &acrtc->dm_irq_params.vrr_params.adjust);
+ }
}
/*
@@ -599,7 +755,6 @@ static void dm_crtc_high_irq(void *interrupt_params)
spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
/**
* dm_dcn_vertical_interrupt0_high_irq() - Handles OTG Vertical interrupt0 for
@@ -624,7 +779,7 @@ static void dm_dcn_vertical_interrupt0_high_irq(void *interrupt_params)
#endif /* CONFIG_DRM_AMD_SECURE_DISPLAY */
/**
- * dmub_aux_setconfig_reply_callback - Callback for AUX or SET_CONFIG command.
+ * dmub_aux_setconfig_callback - Callback for AUX or SET_CONFIG command.
* @adev: amdgpu_device pointer
* @notify: dmub notification structure
*
@@ -632,7 +787,8 @@ static void dm_dcn_vertical_interrupt0_high_irq(void *interrupt_params)
* Copies dmub notification to DM which is to be read by AUX command.
* issuing thread and also signals the event to wake up the thread.
*/
-void dmub_aux_setconfig_callback(struct amdgpu_device *adev, struct dmub_notification *notify)
+static void dmub_aux_setconfig_callback(struct amdgpu_device *adev,
+ struct dmub_notification *notify)
{
if (adev->dm.dmub_notify)
memcpy(adev->dm.dmub_notify, notify, sizeof(struct dmub_notification));
@@ -640,6 +796,29 @@ void dmub_aux_setconfig_callback(struct amdgpu_device *adev, struct dmub_notific
complete(&adev->dm.dmub_aux_transfer_done);
}
+static void dmub_aux_fused_io_callback(struct amdgpu_device *adev,
+ struct dmub_notification *notify)
+{
+ if (!adev || !notify) {
+ ASSERT(false);
+ return;
+ }
+
+ const struct dmub_cmd_fused_request *req = &notify->fused_request;
+ const uint8_t ddc_line = req->u.aux.ddc_line;
+
+ if (ddc_line >= ARRAY_SIZE(adev->dm.fused_io)) {
+ ASSERT(false);
+ return;
+ }
+
+ struct fused_io_sync *sync = &adev->dm.fused_io[ddc_line];
+
+ static_assert(sizeof(*req) <= sizeof(sync->reply_data), "Size mismatch");
+ memcpy(sync->reply_data, req, sizeof(*req));
+ complete(&sync->replied);
+}
+
/**
* dmub_hpd_callback - DMUB HPD interrupt processing callback.
* @adev: amdgpu_device pointer
@@ -648,37 +827,56 @@ void dmub_aux_setconfig_callback(struct amdgpu_device *adev, struct dmub_notific
* Dmub Hpd interrupt processing callback. Gets displayindex through the
* ink index and calls helper to do the processing.
*/
-void dmub_hpd_callback(struct amdgpu_device *adev, struct dmub_notification *notify)
+static void dmub_hpd_callback(struct amdgpu_device *adev,
+ struct dmub_notification *notify)
{
struct amdgpu_dm_connector *aconnector;
struct amdgpu_dm_connector *hpd_aconnector = NULL;
struct drm_connector *connector;
struct drm_connector_list_iter iter;
struct dc_link *link;
- uint8_t link_index = 0;
- struct drm_device *dev = adev->dm.ddev;
+ u8 link_index = 0;
+ struct drm_device *dev;
if (adev == NULL)
return;
if (notify == NULL) {
- DRM_ERROR("DMUB HPD callback notification was NULL");
+ drm_err(adev_to_drm(adev), "DMUB HPD callback notification was NULL");
return;
}
if (notify->link_index > adev->dm.dc->link_count) {
- DRM_ERROR("DMUB HPD index (%u)is abnormal", notify->link_index);
+ drm_err(adev_to_drm(adev), "DMUB HPD index (%u)is abnormal", notify->link_index);
+ return;
+ }
+
+ /* Skip DMUB HPD IRQ in suspend/resume. We will probe them later. */
+ if (notify->type == DMUB_NOTIFICATION_HPD && adev->in_suspend) {
+ drm_info(adev_to_drm(adev), "Skip DMUB HPD IRQ callback in suspend/resume\n");
return;
}
link_index = notify->link_index;
link = adev->dm.dc->links[link_index];
+ dev = adev->dm.ddev;
drm_connector_list_iter_begin(dev, &iter);
drm_for_each_connector_iter(connector, &iter) {
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
aconnector = to_amdgpu_dm_connector(connector);
if (link && aconnector->dc_link == link) {
- DRM_INFO("DMUB HPD callback: link_index=%u\n", link_index);
+ if (notify->type == DMUB_NOTIFICATION_HPD)
+ drm_info(adev_to_drm(adev), "DMUB HPD IRQ callback: link_index=%u\n", link_index);
+ else if (notify->type == DMUB_NOTIFICATION_HPD_IRQ)
+ drm_info(adev_to_drm(adev), "DMUB HPD RX IRQ callback: link_index=%u\n", link_index);
+ else
+ drm_warn(adev_to_drm(adev), "DMUB Unknown HPD callback type %d, link_index=%u\n",
+ notify->type, link_index);
+
hpd_aconnector = aconnector;
break;
}
@@ -686,14 +884,31 @@ void dmub_hpd_callback(struct amdgpu_device *adev, struct dmub_notification *not
drm_connector_list_iter_end(&iter);
if (hpd_aconnector) {
- if (notify->type == DMUB_NOTIFICATION_HPD)
+ if (notify->type == DMUB_NOTIFICATION_HPD) {
+ if (hpd_aconnector->dc_link->hpd_status == (notify->hpd_status == DP_HPD_PLUG))
+ drm_warn(adev_to_drm(adev), "DMUB reported hpd status unchanged. link_index=%u\n", link_index);
handle_hpd_irq_helper(hpd_aconnector);
- else if (notify->type == DMUB_NOTIFICATION_HPD_IRQ)
+ } else if (notify->type == DMUB_NOTIFICATION_HPD_IRQ) {
handle_hpd_rx_irq(hpd_aconnector);
+ }
}
}
/**
+ * dmub_hpd_sense_callback - DMUB HPD sense processing callback.
+ * @adev: amdgpu_device pointer
+ * @notify: dmub notification structure
+ *
+ * HPD sense changes can occur during low power states and need to be
+ * notified from firmware to driver.
+ */
+static void dmub_hpd_sense_callback(struct amdgpu_device *adev,
+ struct dmub_notification *notify)
+{
+ drm_dbg_driver(adev_to_drm(adev), "DMUB HPD SENSE callback.\n");
+}
+
+/**
* register_dmub_notify_callback - Sets callback for DMUB notify
* @adev: amdgpu_device pointer
* @type: Type of dmub notification
@@ -705,8 +920,10 @@ void dmub_hpd_callback(struct amdgpu_device *adev, struct dmub_notification *not
* to dmub interrupt handling thread
* Return: true if successfully registered, false if there is existing registration
*/
-bool register_dmub_notify_callback(struct amdgpu_device *adev, enum dmub_notification_type type,
-dmub_notify_interrupt_callback_t callback, bool dmub_int_thread_offload)
+static bool register_dmub_notify_callback(struct amdgpu_device *adev,
+ enum dmub_notification_type type,
+ dmub_notify_interrupt_callback_t callback,
+ bool dmub_int_thread_offload)
{
if (callback != NULL && type < ARRAY_SIZE(adev->dm.dmub_thread_offload)) {
adev->dm.dmub_callback[type] = callback;
@@ -724,7 +941,7 @@ static void dm_handle_hpd_work(struct work_struct *work)
dmub_hpd_wrk = container_of(work, struct dmub_hpd_work, handle_hpd_work);
if (!dmub_hpd_wrk->dmub_notify) {
- DRM_ERROR("dmub_hpd_wrk dmub_notify is NULL");
+ drm_err(adev_to_drm(dmub_hpd_wrk->adev), "dmub_hpd_wrk dmub_notify is NULL");
return;
}
@@ -738,6 +955,30 @@ static void dm_handle_hpd_work(struct work_struct *work)
}
+static const char *dmub_notification_type_str(enum dmub_notification_type e)
+{
+ switch (e) {
+ case DMUB_NOTIFICATION_NO_DATA:
+ return "NO_DATA";
+ case DMUB_NOTIFICATION_AUX_REPLY:
+ return "AUX_REPLY";
+ case DMUB_NOTIFICATION_HPD:
+ return "HPD";
+ case DMUB_NOTIFICATION_HPD_IRQ:
+ return "HPD_IRQ";
+ case DMUB_NOTIFICATION_SET_CONFIG_REPLY:
+ return "SET_CONFIG_REPLY";
+ case DMUB_NOTIFICATION_DPIA_NOTIFICATION:
+ return "DPIA_NOTIFICATION";
+ case DMUB_NOTIFICATION_HPD_SENSE_NOTIFY:
+ return "HPD_SENSE_NOTIFY";
+ case DMUB_NOTIFICATION_FUSED_IO:
+ return "FUSED_IO";
+ default:
+ return "<unknown>";
+ }
+}
+
#define DMUB_TRACE_MAX_READ 64
/**
* dm_dmub_outbox1_low_irq() - Handles Outbox interrupt
@@ -748,99 +989,87 @@ static void dm_handle_hpd_work(struct work_struct *work)
*/
static void dm_dmub_outbox1_low_irq(void *interrupt_params)
{
- struct dmub_notification notify;
+ struct dmub_notification notify = {0};
struct common_irq_params *irq_params = interrupt_params;
struct amdgpu_device *adev = irq_params->adev;
struct amdgpu_display_manager *dm = &adev->dm;
struct dmcub_trace_buf_entry entry = { 0 };
- uint32_t count = 0;
+ u32 count = 0;
struct dmub_hpd_work *dmub_hpd_wrk;
- struct dc_link *plink = NULL;
+
+ do {
+ if (dc_dmub_srv_get_dmub_outbox0_msg(dm->dc, &entry)) {
+ trace_amdgpu_dmub_trace_high_irq(entry.trace_code, entry.tick_count,
+ entry.param0, entry.param1);
+
+ drm_dbg_driver(adev_to_drm(adev), "trace_code:%u, tick_count:%u, param0:%u, param1:%u\n",
+ entry.trace_code, entry.tick_count, entry.param0, entry.param1);
+ } else
+ break;
+
+ count++;
+
+ } while (count <= DMUB_TRACE_MAX_READ);
+
+ if (count > DMUB_TRACE_MAX_READ)
+ drm_dbg_driver(adev_to_drm(adev), "Warning : count > DMUB_TRACE_MAX_READ");
if (dc_enable_dmub_notifications(adev->dm.dc) &&
irq_params->irq_src == DC_IRQ_SOURCE_DMCUB_OUTBOX) {
do {
dc_stat_get_dmub_notification(adev->dm.dc, &notify);
- if (notify.type > ARRAY_SIZE(dm->dmub_thread_offload)) {
- DRM_ERROR("DM: notify type %d invalid!", notify.type);
+ if (notify.type >= ARRAY_SIZE(dm->dmub_thread_offload)) {
+ drm_err(adev_to_drm(adev), "DM: notify type %d invalid!", notify.type);
continue;
}
if (!dm->dmub_callback[notify.type]) {
- DRM_DEBUG_DRIVER("DMUB notification skipped, no handler: type=%d\n", notify.type);
+ drm_warn(adev_to_drm(adev), "DMUB notification skipped due to no handler: type=%s\n",
+ dmub_notification_type_str(notify.type));
continue;
}
if (dm->dmub_thread_offload[notify.type] == true) {
dmub_hpd_wrk = kzalloc(sizeof(*dmub_hpd_wrk), GFP_ATOMIC);
if (!dmub_hpd_wrk) {
- DRM_ERROR("Failed to allocate dmub_hpd_wrk");
+ drm_err(adev_to_drm(adev), "Failed to allocate dmub_hpd_wrk");
return;
}
- dmub_hpd_wrk->dmub_notify = kzalloc(sizeof(struct dmub_notification), GFP_ATOMIC);
+ dmub_hpd_wrk->dmub_notify = kmemdup(&notify, sizeof(struct dmub_notification),
+ GFP_ATOMIC);
if (!dmub_hpd_wrk->dmub_notify) {
kfree(dmub_hpd_wrk);
- DRM_ERROR("Failed to allocate dmub_hpd_wrk->dmub_notify");
+ drm_err(adev_to_drm(adev), "Failed to allocate dmub_hpd_wrk->dmub_notify");
return;
}
INIT_WORK(&dmub_hpd_wrk->handle_hpd_work, dm_handle_hpd_work);
- if (dmub_hpd_wrk->dmub_notify)
- memcpy(dmub_hpd_wrk->dmub_notify, &notify, sizeof(struct dmub_notification));
dmub_hpd_wrk->adev = adev;
- if (notify.type == DMUB_NOTIFICATION_HPD) {
- plink = adev->dm.dc->links[notify.link_index];
- if (plink) {
- plink->hpd_status =
- notify.hpd_status ==
- DP_HPD_PLUG ? true : false;
- }
- }
queue_work(adev->dm.delayed_hpd_wq, &dmub_hpd_wrk->handle_hpd_work);
} else {
dm->dmub_callback[notify.type](adev, &notify);
}
} while (notify.pending_notification);
}
-
-
- do {
- if (dc_dmub_srv_get_dmub_outbox0_msg(dm->dc, &entry)) {
- trace_amdgpu_dmub_trace_high_irq(entry.trace_code, entry.tick_count,
- entry.param0, entry.param1);
-
- DRM_DEBUG_DRIVER("trace_code:%u, tick_count:%u, param0:%u, param1:%u\n",
- entry.trace_code, entry.tick_count, entry.param0, entry.param1);
- } else
- break;
-
- count++;
-
- } while (count <= DMUB_TRACE_MAX_READ);
-
- if (count > DMUB_TRACE_MAX_READ)
- DRM_DEBUG_DRIVER("Warning : count > DMUB_TRACE_MAX_READ");
}
-#endif /* CONFIG_DRM_AMD_DC_DCN */
-static int dm_set_clockgating_state(void *handle,
+static int dm_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int dm_set_powergating_state(void *handle,
+static int dm_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
/* Prototypes of private functions */
-static int dm_early_init(void* handle);
+static int dm_early_init(struct amdgpu_ip_block *ip_block);
/* Allocate memory for FBC compressed data */
static void amdgpu_dm_fbc_init(struct drm_connector *connector)
{
- struct drm_device *dev = connector->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_device *adev = drm_to_adev(connector->dev);
struct dm_compressor_info *compressor = &adev->dm.compressor;
struct amdgpu_dm_connector *aconn = to_amdgpu_dm_connector(connector);
struct drm_display_mode *mode;
@@ -857,8 +1086,8 @@ static void amdgpu_dm_fbc_init(struct drm_connector *connector)
list_for_each_entry(mode, &connector->modes, head) {
- if (max_size < mode->htotal * mode->vtotal)
- max_size = mode->htotal * mode->vtotal;
+ if (max_size < (unsigned long) mode->htotal * mode->vtotal)
+ max_size = (unsigned long) mode->htotal * mode->vtotal;
}
if (max_size) {
@@ -867,10 +1096,10 @@ static void amdgpu_dm_fbc_init(struct drm_connector *connector)
&compressor->gpu_addr, &compressor->cpu_addr);
if (r)
- DRM_ERROR("DM: Failed to initialize FBC\n");
+ drm_err(adev_to_drm(adev), "DM: Failed to initialize FBC\n");
else {
adev->dm.dc->ctx->fbc_gpu_addr = compressor->gpu_addr;
- DRM_INFO("DM: FBC alloc %lu\n", max_size*4);
+ drm_info(adev_to_drm(adev), "DM: FBC alloc %lu\n", max_size*4);
}
}
@@ -894,13 +1123,19 @@ static int amdgpu_dm_audio_component_get_eld(struct device *kdev, int port,
drm_connector_list_iter_begin(dev, &conn_iter);
drm_for_each_connector_iter(connector, &conn_iter) {
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
aconnector = to_amdgpu_dm_connector(connector);
if (aconnector->audio_inst != port)
continue;
*enabled = true;
+ mutex_lock(&connector->eld_mutex);
ret = drm_eld_size(connector->eld);
memcpy(buf, connector->eld, min(max_bytes, ret));
+ mutex_unlock(&connector->eld_mutex);
break;
}
@@ -934,8 +1169,7 @@ static int amdgpu_dm_audio_component_bind(struct device *kdev,
static void amdgpu_dm_audio_component_unbind(struct device *kdev,
struct device *hda_kdev, void *data)
{
- struct drm_device *dev = dev_get_drvdata(kdev);
- struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_device *adev = drm_to_adev(dev_get_drvdata(kdev));
struct drm_audio_component *acomp = data;
acomp->ops = NULL;
@@ -1018,43 +1252,47 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
const struct firmware *dmub_fw = adev->dm.dmub_fw;
struct dmcu *dmcu = adev->dm.dc->res_pool->dmcu;
struct abm *abm = adev->dm.dc->res_pool->abm;
+ struct dc_context *ctx = adev->dm.dc->ctx;
struct dmub_srv_hw_params hw_params;
enum dmub_status status;
const unsigned char *fw_inst_const, *fw_bss_data;
- uint32_t i, fw_inst_const_size, fw_bss_data_size;
+ u32 i, fw_inst_const_size, fw_bss_data_size;
bool has_hw_support;
- struct dc *dc = adev->dm.dc;
if (!dmub_srv)
/* DMUB isn't supported on the ASIC. */
return 0;
if (!fb_info) {
- DRM_ERROR("No framebuffer info for DMUB service.\n");
+ drm_err(adev_to_drm(adev), "No framebuffer info for DMUB service.\n");
return -EINVAL;
}
if (!dmub_fw) {
/* Firmware required for DMUB support. */
- DRM_ERROR("No firmware provided for DMUB.\n");
+ drm_err(adev_to_drm(adev), "No firmware provided for DMUB.\n");
return -EINVAL;
}
+ /* initialize register offsets for ASICs with runtime initialization available */
+ if (dmub_srv->hw_funcs.init_reg_offsets)
+ dmub_srv->hw_funcs.init_reg_offsets(dmub_srv, ctx);
+
status = dmub_srv_has_hw_support(dmub_srv, &has_hw_support);
if (status != DMUB_STATUS_OK) {
- DRM_ERROR("Error checking HW support for DMUB: %d\n", status);
+ drm_err(adev_to_drm(adev), "Error checking HW support for DMUB: %d\n", status);
return -EINVAL;
}
if (!has_hw_support) {
- DRM_INFO("DMUB unsupported on ASIC\n");
+ drm_info(adev_to_drm(adev), "DMUB unsupported on ASIC\n");
return 0;
}
/* Reset DMCUB if it was previously running - before we overwrite its memory. */
status = dmub_srv_hw_reset(dmub_srv);
if (status != DMUB_STATUS_OK)
- DRM_WARN("Error resetting DMUB HW: %d\n", status);
+ drm_warn(adev_to_drm(adev), "Error resetting DMUB HW: %d\n", status);
hdr = (const struct dmcub_firmware_header_v1_0 *)dmub_fw->data;
@@ -1100,10 +1338,13 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
memset(fb_info->fb[DMUB_WINDOW_6_FW_STATE].cpu_addr, 0,
fb_info->fb[DMUB_WINDOW_6_FW_STATE].size);
+ memset(fb_info->fb[DMUB_WINDOW_SHARED_STATE].cpu_addr, 0,
+ fb_info->fb[DMUB_WINDOW_SHARED_STATE].size);
+
/* Initialize hardware. */
memset(&hw_params, 0, sizeof(hw_params));
hw_params.fb_base = adev->gmc.fb_start;
- hw_params.fb_offset = adev->gmc.aper_base;
+ hw_params.fb_offset = adev->vm_manager.vram_base_offset;
/* backdoor load firmware and trigger dmub running */
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
@@ -1115,14 +1356,26 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
for (i = 0; i < fb_info->num_fb; ++i)
hw_params.fb[i] = &fb_info->fb[i];
- switch (adev->asic_type) {
- case CHIP_YELLOW_CARP:
- if (dc->ctx->asic_id.hw_internal_rev != YELLOW_CARP_A0) {
- hw_params.dpia_supported = true;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- hw_params.disable_dpia = dc->debug.dpia_debug.bits.disable_dpia;
-#endif
- }
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
+ case IP_VERSION(3, 1, 3):
+ case IP_VERSION(3, 1, 4):
+ case IP_VERSION(3, 5, 0):
+ case IP_VERSION(3, 5, 1):
+ case IP_VERSION(3, 6, 0):
+ case IP_VERSION(4, 0, 1):
+ hw_params.dpia_supported = true;
+ hw_params.disable_dpia = adev->dm.dc->debug.dpia_debug.bits.disable_dpia;
+ break;
+ default:
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
+ case IP_VERSION(3, 5, 0):
+ case IP_VERSION(3, 5, 1):
+ case IP_VERSION(3, 6, 0):
+ hw_params.ips_sequential_ono = adev->external_rev_id > 0x10;
+ hw_params.lower_hbr3_phy_ssc = true;
break;
default:
break;
@@ -1130,14 +1383,14 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
status = dmub_srv_hw_init(dmub_srv, &hw_params);
if (status != DMUB_STATUS_OK) {
- DRM_ERROR("Error initializing DMUB HW: %d\n", status);
+ drm_err(adev_to_drm(adev), "Error initializing DMUB HW: %d\n", status);
return -EINVAL;
}
/* Wait for firmware load to finish. */
status = dmub_srv_wait_for_auto_load(dmub_srv, 100000);
if (status != DMUB_STATUS_OK)
- DRM_WARN("Wait for DMUB auto-load failed: %d\n", status);
+ drm_warn(adev_to_drm(adev), "Wait for DMUB auto-load failed: %d\n", status);
/* Init DMCU and ABM if available. */
if (dmcu && abm) {
@@ -1148,111 +1401,160 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
if (!adev->dm.dc->ctx->dmub_srv)
adev->dm.dc->ctx->dmub_srv = dc_dmub_srv_create(adev->dm.dc, dmub_srv);
if (!adev->dm.dc->ctx->dmub_srv) {
- DRM_ERROR("Couldn't allocate DC DMUB server!\n");
+ drm_err(adev_to_drm(adev), "Couldn't allocate DC DMUB server!\n");
return -ENOMEM;
}
- DRM_INFO("DMUB hardware initialized: version=0x%08X\n",
+ drm_info(adev_to_drm(adev), "DMUB hardware initialized: version=0x%08X\n",
adev->dm.dmcub_fw_version);
+ /* Keeping sanity checks off if
+ * DCN31 >= 4.0.59.0
+ * DCN314 >= 8.0.16.0
+ * Otherwise, turn on sanity checks
+ */
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
+ case IP_VERSION(3, 1, 2):
+ case IP_VERSION(3, 1, 3):
+ if (adev->dm.dmcub_fw_version &&
+ adev->dm.dmcub_fw_version >= DMUB_FW_VERSION(4, 0, 0) &&
+ adev->dm.dmcub_fw_version < DMUB_FW_VERSION(4, 0, 59))
+ adev->dm.dc->debug.sanity_checks = true;
+ break;
+ case IP_VERSION(3, 1, 4):
+ if (adev->dm.dmcub_fw_version &&
+ adev->dm.dmcub_fw_version >= DMUB_FW_VERSION(4, 0, 0) &&
+ adev->dm.dmcub_fw_version < DMUB_FW_VERSION(8, 0, 16))
+ adev->dm.dc->debug.sanity_checks = true;
+ break;
+ default:
+ break;
+ }
+
return 0;
}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
+static void dm_dmub_hw_resume(struct amdgpu_device *adev)
+{
+ struct dmub_srv *dmub_srv = adev->dm.dmub_srv;
+ enum dmub_status status;
+ bool init;
+ int r;
+
+ if (!dmub_srv) {
+ /* DMUB isn't supported on the ASIC. */
+ return;
+ }
+
+ status = dmub_srv_is_hw_init(dmub_srv, &init);
+ if (status != DMUB_STATUS_OK)
+ drm_warn(adev_to_drm(adev), "DMUB hardware init check failed: %d\n", status);
+
+ if (status == DMUB_STATUS_OK && init) {
+ /* Wait for firmware load to finish. */
+ status = dmub_srv_wait_for_auto_load(dmub_srv, 100000);
+ if (status != DMUB_STATUS_OK)
+ drm_warn(adev_to_drm(adev), "Wait for DMUB auto-load failed: %d\n", status);
+ } else {
+ /* Perform the full hardware initialization. */
+ r = dm_dmub_hw_init(adev);
+ if (r)
+ drm_err(adev_to_drm(adev), "DMUB interface failed to initialize: status=%d\n", r);
+ }
+}
+
static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_addr_space_config *pa_config)
{
- uint64_t pt_base;
- uint32_t logical_addr_low;
- uint32_t logical_addr_high;
- uint32_t agp_base, agp_bot, agp_top;
+ u64 pt_base;
+ u32 logical_addr_low;
+ u32 logical_addr_high;
+ u32 agp_base, agp_bot, agp_top;
PHYSICAL_ADDRESS_LOC page_table_start, page_table_end, page_table_base;
memset(pa_config, 0, sizeof(*pa_config));
- logical_addr_low = min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18;
- pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
-
- if (adev->apu_flags & AMD_APU_IS_RAVEN2)
- /*
- * Raven2 has a HW issue that it is unable to use the vram which
- * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
- * workaround that increase system aperture high address (add 1)
- * to get rid of the VM fault and hardware hang.
- */
- logical_addr_high = max((adev->gmc.fb_end >> 18) + 0x1, adev->gmc.agp_end >> 18);
- else
- logical_addr_high = max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18;
-
agp_base = 0;
agp_bot = adev->gmc.agp_start >> 24;
agp_top = adev->gmc.agp_end >> 24;
+ /* AGP aperture is disabled */
+ if (agp_bot > agp_top) {
+ logical_addr_low = adev->gmc.fb_start >> 18;
+ if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+ AMD_APU_IS_RENOIR |
+ AMD_APU_IS_GREEN_SARDINE))
+ /*
+ * Raven2 has a HW issue that it is unable to use the vram which
+ * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
+ * workaround that increase system aperture high address (add 1)
+ * to get rid of the VM fault and hardware hang.
+ */
+ logical_addr_high = (adev->gmc.fb_end >> 18) + 0x1;
+ else
+ logical_addr_high = adev->gmc.fb_end >> 18;
+ } else {
+ logical_addr_low = min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18;
+ if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+ AMD_APU_IS_RENOIR |
+ AMD_APU_IS_GREEN_SARDINE))
+ /*
+ * Raven2 has a HW issue that it is unable to use the vram which
+ * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
+ * workaround that increase system aperture high address (add 1)
+ * to get rid of the VM fault and hardware hang.
+ */
+ logical_addr_high = max((adev->gmc.fb_end >> 18) + 0x1, adev->gmc.agp_end >> 18);
+ else
+ logical_addr_high = max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18;
+ }
+
+ pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
- page_table_start.high_part = (u32)(adev->gmc.gart_start >> 44) & 0xF;
- page_table_start.low_part = (u32)(adev->gmc.gart_start >> 12);
- page_table_end.high_part = (u32)(adev->gmc.gart_end >> 44) & 0xF;
- page_table_end.low_part = (u32)(adev->gmc.gart_end >> 12);
- page_table_base.high_part = upper_32_bits(pt_base) & 0xF;
+ page_table_start.high_part = upper_32_bits(adev->gmc.gart_start >>
+ AMDGPU_GPU_PAGE_SHIFT);
+ page_table_start.low_part = lower_32_bits(adev->gmc.gart_start >>
+ AMDGPU_GPU_PAGE_SHIFT);
+ page_table_end.high_part = upper_32_bits(adev->gmc.gart_end >>
+ AMDGPU_GPU_PAGE_SHIFT);
+ page_table_end.low_part = lower_32_bits(adev->gmc.gart_end >>
+ AMDGPU_GPU_PAGE_SHIFT);
+ page_table_base.high_part = upper_32_bits(pt_base);
page_table_base.low_part = lower_32_bits(pt_base);
pa_config->system_aperture.start_addr = (uint64_t)logical_addr_low << 18;
pa_config->system_aperture.end_addr = (uint64_t)logical_addr_high << 18;
- pa_config->system_aperture.agp_base = (uint64_t)agp_base << 24 ;
+ pa_config->system_aperture.agp_base = (uint64_t)agp_base << 24;
pa_config->system_aperture.agp_bot = (uint64_t)agp_bot << 24;
pa_config->system_aperture.agp_top = (uint64_t)agp_top << 24;
pa_config->system_aperture.fb_base = adev->gmc.fb_start;
- pa_config->system_aperture.fb_offset = adev->gmc.aper_base;
+ pa_config->system_aperture.fb_offset = adev->vm_manager.vram_base_offset;
pa_config->system_aperture.fb_top = adev->gmc.fb_end;
pa_config->gart_config.page_table_start_addr = page_table_start.quad_part << 12;
pa_config->gart_config.page_table_end_addr = page_table_end.quad_part << 12;
pa_config->gart_config.page_table_base_addr = page_table_base.quad_part;
- pa_config->is_hvm_enabled = 0;
+ pa_config->is_hvm_enabled = adev->mode_info.gpu_vm_support;
}
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-static void vblank_control_worker(struct work_struct *work)
-{
- struct vblank_control_work *vblank_work =
- container_of(work, struct vblank_control_work, work);
- struct amdgpu_display_manager *dm = vblank_work->dm;
-
- mutex_lock(&dm->dc_lock);
-
- if (vblank_work->enable)
- dm->active_vblank_irq_count++;
- else if(dm->active_vblank_irq_count)
- dm->active_vblank_irq_count--;
- dc_allow_idle_optimizations(dm->dc, dm->active_vblank_irq_count == 0);
-
- DRM_DEBUG_KMS("Allow idle optimizations (MALL): %d\n", dm->active_vblank_irq_count == 0);
-
- /* Control PSR based on vblank requirements from OS */
- if (vblank_work->stream && vblank_work->stream->link) {
- if (vblank_work->enable) {
- if (vblank_work->stream->link->psr_settings.psr_allow_active)
- amdgpu_dm_psr_disable(vblank_work->stream);
- } else if (vblank_work->stream->link->psr_settings.psr_feature_enabled &&
- !vblank_work->stream->link->psr_settings.psr_allow_active &&
- vblank_work->acrtc->dm_irq_params.allow_psr_entry) {
- amdgpu_dm_psr_enable(vblank_work->stream);
- }
- }
-
- mutex_unlock(&dm->dc_lock);
+static void force_connector_state(
+ struct amdgpu_dm_connector *aconnector,
+ enum drm_connector_force force_state)
+{
+ struct drm_connector *connector = &aconnector->base;
- dc_stream_release(vblank_work->stream);
+ mutex_lock(&connector->dev->mode_config.mutex);
+ aconnector->base.force = force_state;
+ mutex_unlock(&connector->dev->mode_config.mutex);
- kfree(vblank_work);
+ mutex_lock(&aconnector->hpd_lock);
+ drm_kms_helper_connector_hotplug_event(connector);
+ mutex_unlock(&aconnector->hpd_lock);
}
-#endif
-
static void dm_handle_hpd_rx_offload_work(struct work_struct *work)
{
struct hpd_rx_irq_offload_work *offload_work;
@@ -1261,21 +1563,24 @@ static void dm_handle_hpd_rx_offload_work(struct work_struct *work)
struct amdgpu_device *adev;
enum dc_connection_type new_connection_type = dc_connection_none;
unsigned long flags;
+ union test_response test_response;
+
+ memset(&test_response, 0, sizeof(test_response));
offload_work = container_of(work, struct hpd_rx_irq_offload_work, work);
aconnector = offload_work->offload_wq->aconnector;
+ adev = offload_work->adev;
if (!aconnector) {
- DRM_ERROR("Can't retrieve aconnector in hpd_rx_irq_offload_work");
+ drm_err(adev_to_drm(adev), "Can't retrieve aconnector in hpd_rx_irq_offload_work");
goto skip;
}
- adev = drm_to_adev(aconnector->base.dev);
dc_link = aconnector->dc_link;
mutex_lock(&aconnector->hpd_lock);
- if (!dc_link_detect_sink(dc_link, &new_connection_type))
- DRM_ERROR("KMS: Failed to detect connector\n");
+ if (!dc_link_detect_connection_type(dc_link, &new_connection_type))
+ drm_err(adev_to_drm(adev), "KMS: Failed to detect connector\n");
mutex_unlock(&aconnector->hpd_lock);
if (new_connection_type == dc_connection_none)
@@ -1284,16 +1589,58 @@ static void dm_handle_hpd_rx_offload_work(struct work_struct *work)
if (amdgpu_in_reset(adev))
goto skip;
+ if (offload_work->data.bytes.device_service_irq.bits.UP_REQ_MSG_RDY ||
+ offload_work->data.bytes.device_service_irq.bits.DOWN_REP_MSG_RDY) {
+ dm_handle_mst_sideband_msg_ready_event(&aconnector->mst_mgr, DOWN_OR_UP_MSG_RDY_EVENT);
+ spin_lock_irqsave(&offload_work->offload_wq->offload_lock, flags);
+ offload_work->offload_wq->is_handling_mst_msg_rdy_event = false;
+ spin_unlock_irqrestore(&offload_work->offload_wq->offload_lock, flags);
+ goto skip;
+ }
+
mutex_lock(&adev->dm.dc_lock);
- if (offload_work->data.bytes.device_service_irq.bits.AUTOMATED_TEST)
+ if (offload_work->data.bytes.device_service_irq.bits.AUTOMATED_TEST) {
dc_link_dp_handle_automated_test(dc_link);
- else if ((dc_link->connector_signal != SIGNAL_TYPE_EDP) &&
- hpd_rx_irq_check_link_loss_status(dc_link, &offload_work->data) &&
+
+ if (aconnector->timing_changed) {
+ /* force connector disconnect and reconnect */
+ force_connector_state(aconnector, DRM_FORCE_OFF);
+ msleep(100);
+ force_connector_state(aconnector, DRM_FORCE_UNSPECIFIED);
+ }
+
+ test_response.bits.ACK = 1;
+
+ core_link_write_dpcd(
+ dc_link,
+ DP_TEST_RESPONSE,
+ &test_response.raw,
+ sizeof(test_response));
+ } else if ((dc_link->connector_signal != SIGNAL_TYPE_EDP) &&
+ dc_link_check_link_loss_status(dc_link, &offload_work->data) &&
dc_link_dp_allow_hpd_rx_irq(dc_link)) {
- dc_link_dp_handle_link_loss(dc_link);
+ /* offload_work->data is from handle_hpd_rx_irq->
+ * schedule_hpd_rx_offload_work.this is defer handle
+ * for hpd short pulse. upon here, link status may be
+ * changed, need get latest link status from dpcd
+ * registers. if link status is good, skip run link
+ * training again.
+ */
+ union hpd_irq_data irq_data;
+
+ memset(&irq_data, 0, sizeof(irq_data));
+
+ /* before dc_link_dp_handle_link_loss, allow new link lost handle
+ * request be added to work queue if link lost at end of dc_link_
+ * dp_handle_link_loss
+ */
spin_lock_irqsave(&offload_work->offload_wq->offload_lock, flags);
offload_work->offload_wq->is_handling_link_loss = false;
spin_unlock_irqrestore(&offload_work->offload_wq->offload_lock, flags);
+
+ if ((dc_link_dp_read_hpd_rx_irq_data(dc_link, &irq_data) == DC_OK) &&
+ dc_link_check_link_loss_status(dc_link, &irq_data))
+ dc_link_dp_handle_link_loss(dc_link);
}
mutex_unlock(&adev->dm.dc_lock);
@@ -1302,8 +1649,9 @@ skip:
}
-static struct hpd_rx_irq_offload_work_queue *hpd_rx_irq_create_workqueue(struct dc *dc)
+static struct hpd_rx_irq_offload_work_queue *hpd_rx_irq_create_workqueue(struct amdgpu_device *adev)
{
+ struct dc *dc = adev->dm.dc;
int max_caps = dc->caps.max_links;
int i = 0;
struct hpd_rx_irq_offload_work_queue *hpd_rx_offload_wq = NULL;
@@ -1319,14 +1667,22 @@ static struct hpd_rx_irq_offload_work_queue *hpd_rx_irq_create_workqueue(struct
create_singlethread_workqueue("amdgpu_dm_hpd_rx_offload_wq");
if (hpd_rx_offload_wq[i].wq == NULL) {
- DRM_ERROR("create amdgpu_dm_hpd_rx_offload_wq fail!");
- return NULL;
+ drm_err(adev_to_drm(adev), "create amdgpu_dm_hpd_rx_offload_wq fail!");
+ goto out_err;
}
spin_lock_init(&hpd_rx_offload_wq[i].offload_lock);
}
return hpd_rx_offload_wq;
+
+out_err:
+ for (i = 0; i < max_caps; i++) {
+ if (hpd_rx_offload_wq[i].wq)
+ destroy_workqueue(hpd_rx_offload_wq[i].wq);
+ }
+ kfree(hpd_rx_offload_wq);
+ return NULL;
}
struct amdgpu_stutter_quirk {
@@ -1360,12 +1716,169 @@ static bool dm_should_disable_stutter(struct pci_dev *pdev)
return false;
}
+
+void*
+dm_allocate_gpu_mem(
+ struct amdgpu_device *adev,
+ enum dc_gpu_mem_alloc_type type,
+ size_t size,
+ long long *addr)
+{
+ struct dal_allocation *da;
+ u32 domain = (type == DC_MEM_ALLOC_TYPE_GART) ?
+ AMDGPU_GEM_DOMAIN_GTT : AMDGPU_GEM_DOMAIN_VRAM;
+ int ret;
+
+ da = kzalloc(sizeof(struct dal_allocation), GFP_KERNEL);
+ if (!da)
+ return NULL;
+
+ ret = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
+ domain, &da->bo,
+ &da->gpu_addr, &da->cpu_ptr);
+
+ *addr = da->gpu_addr;
+
+ if (ret) {
+ kfree(da);
+ return NULL;
+ }
+
+ /* add da to list in dm */
+ list_add(&da->list, &adev->dm.da_list);
+
+ return da->cpu_ptr;
+}
+
+void
+dm_free_gpu_mem(
+ struct amdgpu_device *adev,
+ enum dc_gpu_mem_alloc_type type,
+ void *pvMem)
+{
+ struct dal_allocation *da;
+
+ /* walk the da list in DM */
+ list_for_each_entry(da, &adev->dm.da_list, list) {
+ if (pvMem == da->cpu_ptr) {
+ amdgpu_bo_free_kernel(&da->bo, &da->gpu_addr, &da->cpu_ptr);
+ list_del(&da->list);
+ kfree(da);
+ break;
+ }
+ }
+
+}
+
+static enum dmub_status
+dm_dmub_send_vbios_gpint_command(struct amdgpu_device *adev,
+ enum dmub_gpint_command command_code,
+ uint16_t param,
+ uint32_t timeout_us)
+{
+ union dmub_gpint_data_register reg, test;
+ uint32_t i;
+
+ /* Assume that VBIOS DMUB is ready to take commands */
+
+ reg.bits.status = 1;
+ reg.bits.command_code = command_code;
+ reg.bits.param = param;
+
+ cgs_write_register(adev->dm.cgs_device, 0x34c0 + 0x01f8, reg.all);
+
+ for (i = 0; i < timeout_us; ++i) {
+ udelay(1);
+
+ /* Check if our GPINT got acked */
+ reg.bits.status = 0;
+ test = (union dmub_gpint_data_register)
+ cgs_read_register(adev->dm.cgs_device, 0x34c0 + 0x01f8);
+
+ if (test.all == reg.all)
+ return DMUB_STATUS_OK;
+ }
+
+ return DMUB_STATUS_TIMEOUT;
+}
+
+static void *dm_dmub_get_vbios_bounding_box(struct amdgpu_device *adev)
+{
+ void *bb;
+ long long addr;
+ unsigned int bb_size;
+ int i = 0;
+ uint16_t chunk;
+ enum dmub_gpint_command send_addrs[] = {
+ DMUB_GPINT__SET_BB_ADDR_WORD0,
+ DMUB_GPINT__SET_BB_ADDR_WORD1,
+ DMUB_GPINT__SET_BB_ADDR_WORD2,
+ DMUB_GPINT__SET_BB_ADDR_WORD3,
+ };
+ enum dmub_status ret;
+
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
+ case IP_VERSION(4, 0, 1):
+ bb_size = sizeof(struct dml2_soc_bb);
+ break;
+ default:
+ return NULL;
+ }
+
+ bb = dm_allocate_gpu_mem(adev,
+ DC_MEM_ALLOC_TYPE_GART,
+ bb_size,
+ &addr);
+ if (!bb)
+ return NULL;
+
+ for (i = 0; i < 4; i++) {
+ /* Extract 16-bit chunk */
+ chunk = ((uint64_t) addr >> (i * 16)) & 0xFFFF;
+ /* Send the chunk */
+ ret = dm_dmub_send_vbios_gpint_command(adev, send_addrs[i], chunk, 30000);
+ if (ret != DMUB_STATUS_OK)
+ goto free_bb;
+ }
+
+ /* Now ask DMUB to copy the bb */
+ ret = dm_dmub_send_vbios_gpint_command(adev, DMUB_GPINT__BB_COPY, 1, 200000);
+ if (ret != DMUB_STATUS_OK)
+ goto free_bb;
+
+ return bb;
+
+free_bb:
+ dm_free_gpu_mem(adev, DC_MEM_ALLOC_TYPE_GART, (void *) bb);
+ return NULL;
+
+}
+
+static enum dmub_ips_disable_type dm_get_default_ips_mode(
+ struct amdgpu_device *adev)
+{
+ enum dmub_ips_disable_type ret = DMUB_IPS_ENABLE;
+
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
+ case IP_VERSION(3, 5, 0):
+ case IP_VERSION(3, 6, 0):
+ case IP_VERSION(3, 5, 1):
+ ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF;
+ break;
+ default:
+ /* ASICs older than DCN35 do not have IPSs */
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0) < IP_VERSION(3, 5, 0))
+ ret = DMUB_IPS_DISABLE_ALL;
+ break;
+ }
+
+ return ret;
+}
+
static int amdgpu_dm_init(struct amdgpu_device *adev)
{
struct dc_init_data init_data;
-#ifdef CONFIG_DRM_AMD_DC_HDCP
struct dc_callback_init init_params;
-#endif
int r;
adev->dm.ddev = adev_to_drm(adev);
@@ -1373,18 +1886,14 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
/* Zero all the fields */
memset(&init_data, 0, sizeof(init_data));
-#ifdef CONFIG_DRM_AMD_DC_HDCP
memset(&init_params, 0, sizeof(init_params));
-#endif
+ mutex_init(&adev->dm.dpia_aux_lock);
mutex_init(&adev->dm.dc_lock);
mutex_init(&adev->dm.audio_lock);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- spin_lock_init(&adev->dm.vblank_lock);
-#endif
- if(amdgpu_dm_irq_init(adev)) {
- DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n");
+ if (amdgpu_dm_irq_init(adev)) {
+ drm_err(adev_to_drm(adev), "failed to initialize DM IRQ support.\n");
goto error;
}
@@ -1401,52 +1910,52 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
init_data.driver = adev;
- adev->dm.cgs_device = amdgpu_cgs_create_device(adev);
-
- if (!adev->dm.cgs_device) {
- DRM_ERROR("amdgpu: failed to create cgs device.\n");
- goto error;
- }
-
+ /* cgs_device was created in dm_sw_init() */
init_data.cgs_device = adev->dm.cgs_device;
init_data.dce_environment = DCE_ENV_PRODUCTION_DRV;
- switch (adev->asic_type) {
- case CHIP_CARRIZO:
- case CHIP_STONEY:
- init_data.flags.gpu_vm_support = true;
- break;
- default:
- switch (adev->ip_versions[DCE_HWIP][0]) {
- case IP_VERSION(2, 1, 0):
- init_data.flags.gpu_vm_support = true;
- switch (adev->dm.dmcub_fw_version) {
- case 0: /* development */
- case 0x1: /* linux-firmware.git hash 6d9f399 */
- case 0x01000000: /* linux-firmware.git hash 9a0b0f4 */
- init_data.flags.disable_dmcu = false;
- break;
- default:
- init_data.flags.disable_dmcu = true;
- }
- break;
- case IP_VERSION(1, 0, 0):
- case IP_VERSION(1, 0, 1):
- case IP_VERSION(3, 0, 1):
- case IP_VERSION(3, 1, 2):
- case IP_VERSION(3, 1, 3):
- init_data.flags.gpu_vm_support = true;
- break;
- case IP_VERSION(2, 0, 3):
- init_data.flags.disable_dmcu = true;
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
+ case IP_VERSION(2, 1, 0):
+ switch (adev->dm.dmcub_fw_version) {
+ case 0: /* development */
+ case 0x1: /* linux-firmware.git hash 6d9f399 */
+ case 0x01000000: /* linux-firmware.git hash 9a0b0f4 */
+ init_data.flags.disable_dmcu = false;
break;
default:
- break;
+ init_data.flags.disable_dmcu = true;
}
break;
+ case IP_VERSION(2, 0, 3):
+ init_data.flags.disable_dmcu = true;
+ break;
+ default:
+ break;
}
+ /* APU support S/G display by default except:
+ * ASICs before Carrizo,
+ * RAVEN1 (Users reported stability issue)
+ */
+
+ if (adev->asic_type < CHIP_CARRIZO) {
+ init_data.flags.gpu_vm_support = false;
+ } else if (adev->asic_type == CHIP_RAVEN) {
+ if (adev->apu_flags & AMD_APU_IS_RAVEN)
+ init_data.flags.gpu_vm_support = false;
+ else
+ init_data.flags.gpu_vm_support = (amdgpu_sg_display != 0);
+ } else {
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(2, 0, 3))
+ init_data.flags.gpu_vm_support = (amdgpu_sg_display == 1);
+ else
+ init_data.flags.gpu_vm_support =
+ (amdgpu_sg_display != 0) && (adev->flags & AMD_IS_APU);
+ }
+
+ adev->mode_info.gpu_vm_support = init_data.flags.gpu_vm_support;
+
if (amdgpu_dc_feature_mask & DC_FBC_MASK)
init_data.flags.fbc_support = true;
@@ -1459,16 +1968,63 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
if (amdgpu_dc_feature_mask & DC_EDP_NO_POWER_SEQUENCING)
init_data.flags.edp_no_power_sequencing = true;
- init_data.flags.power_down_display_on_boot = true;
+ if (amdgpu_dc_feature_mask & DC_DISABLE_LTTPR_DP1_4A)
+ init_data.flags.allow_lttpr_non_transparent_mode.bits.DP1_4A = true;
+ if (amdgpu_dc_feature_mask & DC_DISABLE_LTTPR_DP2_0)
+ init_data.flags.allow_lttpr_non_transparent_mode.bits.DP2_0 = true;
+
+ init_data.flags.seamless_boot_edp_requested = false;
+
+ if (amdgpu_device_seamless_boot_supported(adev)) {
+ init_data.flags.seamless_boot_edp_requested = true;
+ init_data.flags.allow_seamless_boot_optimization = true;
+ drm_dbg(adev->dm.ddev, "Seamless boot requested\n");
+ }
+
+ init_data.flags.enable_mipi_converter_optimization = true;
+
+ init_data.dcn_reg_offsets = adev->reg_offset[DCE_HWIP][0];
+ init_data.nbio_reg_offsets = adev->reg_offset[NBIO_HWIP][0];
+ init_data.clk_reg_offsets = adev->reg_offset[CLK_HWIP][0];
+
+ if (amdgpu_dc_debug_mask & DC_DISABLE_IPS)
+ init_data.flags.disable_ips = DMUB_IPS_DISABLE_ALL;
+ else if (amdgpu_dc_debug_mask & DC_DISABLE_IPS_DYNAMIC)
+ init_data.flags.disable_ips = DMUB_IPS_DISABLE_DYNAMIC;
+ else if (amdgpu_dc_debug_mask & DC_DISABLE_IPS2_DYNAMIC)
+ init_data.flags.disable_ips = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF;
+ else if (amdgpu_dc_debug_mask & DC_FORCE_IPS_ENABLE)
+ init_data.flags.disable_ips = DMUB_IPS_ENABLE;
+ else
+ init_data.flags.disable_ips = dm_get_default_ips_mode(adev);
+
+ init_data.flags.disable_ips_in_vpb = 0;
+
+ /* DCN35 and above supports dynamic DTBCLK switch */
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 5, 0))
+ init_data.flags.allow_0_dtb_clk = true;
+
+ /* Enable DWB for tested platforms only */
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0))
+ init_data.num_virtual_links = 1;
+
+ retrieve_dmi_info(&adev->dm);
+ if (adev->dm.edp0_on_dp1_quirk)
+ init_data.flags.support_edp0_on_dp1 = true;
+
+ if (adev->dm.bb_from_dmub)
+ init_data.bb_from_dmub = adev->dm.bb_from_dmub;
+ else
+ init_data.bb_from_dmub = NULL;
- INIT_LIST_HEAD(&adev->dm.da_list);
/* Display Core create. */
adev->dm.dc = dc_create(&init_data);
if (adev->dm.dc) {
- DRM_INFO("Display Core initialized with v%s!\n", DC_VER);
+ drm_info(adev_to_drm(adev), "Display Core v%s initialized on %s\n", DC_VER,
+ dce_version_to_string(adev->dm.dc->ctx->dce_version));
} else {
- DRM_INFO("Display Core failed to initialize with v%s!\n", DC_VER);
+ drm_info(adev_to_drm(adev), "Display Core failed to initialize with v%s!\n", DC_VER);
goto error;
}
@@ -1491,21 +2047,50 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
if (amdgpu_dc_debug_mask & DC_DISABLE_CLOCK_GATING)
adev->dm.dc->debug.disable_clock_gate = true;
+ if (amdgpu_dc_debug_mask & DC_FORCE_SUBVP_MCLK_SWITCH)
+ adev->dm.dc->debug.force_subvp_mclk_switch = true;
+
+ if (amdgpu_dc_debug_mask & DC_DISABLE_SUBVP_FAMS) {
+ adev->dm.dc->debug.force_disable_subvp = true;
+ adev->dm.dc->debug.fams2_config.bits.enable = false;
+ }
+
+ if (amdgpu_dc_debug_mask & DC_ENABLE_DML2) {
+ adev->dm.dc->debug.using_dml2 = true;
+ adev->dm.dc->debug.using_dml21 = true;
+ }
+
+ if (amdgpu_dc_debug_mask & DC_HDCP_LC_FORCE_FW_ENABLE)
+ adev->dm.dc->debug.hdcp_lc_force_fw_enable = true;
+
+ if (amdgpu_dc_debug_mask & DC_HDCP_LC_ENABLE_SW_FALLBACK)
+ adev->dm.dc->debug.hdcp_lc_enable_sw_fallback = true;
+
+ if (amdgpu_dc_debug_mask & DC_SKIP_DETECTION_LT)
+ adev->dm.dc->debug.skip_detection_link_training = true;
+
+ adev->dm.dc->debug.visual_confirm = amdgpu_dc_visual_confirm;
+
+ /* TODO: Remove after DP2 receiver gets proper support of Cable ID feature */
+ adev->dm.dc->debug.ignore_cable_id = true;
+
+ if (adev->dm.dc->caps.dp_hdmi21_pcon_support)
+ drm_info(adev_to_drm(adev), "DP-HDMI FRL PCON supported\n");
+
r = dm_dmub_hw_init(adev);
if (r) {
- DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r);
+ drm_err(adev_to_drm(adev), "DMUB interface failed to initialize: status=%d\n", r);
goto error;
}
dc_hardware_init(adev->dm.dc);
- adev->dm.hpd_rx_offload_wq = hpd_rx_irq_create_workqueue(adev->dm.dc);
+ adev->dm.hpd_rx_offload_wq = hpd_rx_irq_create_workqueue(adev);
if (!adev->dm.hpd_rx_offload_wq) {
- DRM_ERROR("amdgpu: failed to create hpd rx offload workqueue.\n");
+ drm_err(adev_to_drm(adev), "failed to create hpd rx offload workqueue.\n");
goto error;
}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
if ((adev->flags & AMD_IS_APU) && (adev->asic_type >= CHIP_CARRIZO)) {
struct dc_phy_addr_space_config pa_config;
@@ -1514,77 +2099,83 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
// Call the DC init_memory func
dc_setup_system_context(adev->dm.dc, &pa_config);
}
-#endif
adev->dm.freesync_module = mod_freesync_create(adev->dm.dc);
if (!adev->dm.freesync_module) {
- DRM_ERROR(
- "amdgpu: failed to initialize freesync_module.\n");
+ drm_err(adev_to_drm(adev),
+ "failed to initialize freesync_module.\n");
} else
- DRM_DEBUG_DRIVER("amdgpu: freesync_module init done %p.\n",
+ drm_dbg_driver(adev_to_drm(adev), "amdgpu: freesync_module init done %p.\n",
adev->dm.freesync_module);
amdgpu_dm_init_color_mod();
-#if defined(CONFIG_DRM_AMD_DC_DCN)
if (adev->dm.dc->caps.max_links > 0) {
adev->dm.vblank_control_workqueue =
create_singlethread_workqueue("dm_vblank_control_workqueue");
if (!adev->dm.vblank_control_workqueue)
- DRM_ERROR("amdgpu: failed to initialize vblank_workqueue.\n");
+ drm_err(adev_to_drm(adev), "failed to initialize vblank_workqueue.\n");
}
-#endif
-#ifdef CONFIG_DRM_AMD_DC_HDCP
+ if (adev->dm.dc->caps.ips_support &&
+ adev->dm.dc->config.disable_ips != DMUB_IPS_DISABLE_ALL)
+ adev->dm.idle_workqueue = idle_create_workqueue(adev);
+
if (adev->dm.dc->caps.max_links > 0 && adev->family >= AMDGPU_FAMILY_RV) {
adev->dm.hdcp_workqueue = hdcp_create_workqueue(adev, &init_params.cp_psp, adev->dm.dc);
if (!adev->dm.hdcp_workqueue)
- DRM_ERROR("amdgpu: failed to initialize hdcp_workqueue.\n");
+ drm_err(adev_to_drm(adev), "failed to initialize hdcp_workqueue.\n");
else
- DRM_DEBUG_DRIVER("amdgpu: hdcp_workqueue init done %p.\n", adev->dm.hdcp_workqueue);
+ drm_dbg_driver(adev_to_drm(adev), "amdgpu: hdcp_workqueue init done %p.\n", adev->dm.hdcp_workqueue);
dc_init_callbacks(adev->dm.dc, &init_params);
}
-#endif
-#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
- adev->dm.crc_rd_wrk = amdgpu_dm_crtc_secure_display_create_work();
-#endif
- if (dc_enable_dmub_notifications(adev->dm.dc)) {
+ if (dc_is_dmub_outbox_supported(adev->dm.dc)) {
init_completion(&adev->dm.dmub_aux_transfer_done);
adev->dm.dmub_notify = kzalloc(sizeof(struct dmub_notification), GFP_KERNEL);
if (!adev->dm.dmub_notify) {
- DRM_INFO("amdgpu: fail to allocate adev->dm.dmub_notify");
+ drm_info(adev_to_drm(adev), "fail to allocate adev->dm.dmub_notify");
goto error;
}
adev->dm.delayed_hpd_wq = create_singlethread_workqueue("amdgpu_dm_hpd_wq");
if (!adev->dm.delayed_hpd_wq) {
- DRM_ERROR("amdgpu: failed to create hpd offload workqueue.\n");
+ drm_err(adev_to_drm(adev), "failed to create hpd offload workqueue.\n");
goto error;
}
amdgpu_dm_outbox_init(adev);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_AUX_REPLY,
dmub_aux_setconfig_callback, false)) {
- DRM_ERROR("amdgpu: fail to register dmub aux callback");
- goto error;
- }
- if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD, dmub_hpd_callback, true)) {
- DRM_ERROR("amdgpu: fail to register dmub hpd callback");
+ drm_err(adev_to_drm(adev), "fail to register dmub aux callback");
goto error;
}
- if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ, dmub_hpd_callback, true)) {
- DRM_ERROR("amdgpu: fail to register dmub hpd callback");
+
+ for (size_t i = 0; i < ARRAY_SIZE(adev->dm.fused_io); i++)
+ init_completion(&adev->dm.fused_io[i].replied);
+
+ if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_FUSED_IO,
+ dmub_aux_fused_io_callback, false)) {
+ drm_err(adev_to_drm(adev), "fail to register dmub fused io callback");
goto error;
}
-#endif /* CONFIG_DRM_AMD_DC_DCN */
+ /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive.
+ * It is expected that DMUB will resend any pending notifications at this point. Note
+ * that hpd and hpd_irq handler registration are deferred to register_hpd_handlers() to
+ * align legacy interface initialization sequence. Connection status will be proactivly
+ * detected once in the amdgpu_dm_initialize_drm_device.
+ */
+ dc_enable_dmub_outbox(adev->dm.dc);
+
+ /* DPIA trace goes to dmesg logs only if outbox is enabled */
+ if (amdgpu_dc_debug_mask & DC_ENABLE_DPIA_TRACE)
+ dc_dmub_srv_enable_dpia_trace(adev->dm.dc);
}
if (amdgpu_dm_initialize_drm_device(adev)) {
- DRM_ERROR(
- "amdgpu: failed to initialize sw for display support.\n");
+ drm_err(adev_to_drm(adev),
+ "failed to initialize sw for display support.\n");
goto error;
}
@@ -1598,13 +2189,22 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
adev_to_drm(adev)->mode_config.cursor_height = adev->dm.dc->caps.max_cursor_size;
if (drm_vblank_init(adev_to_drm(adev), adev->dm.display_indexes_num)) {
- DRM_ERROR(
- "amdgpu: failed to initialize sw for display support.\n");
+ drm_err(adev_to_drm(adev),
+ "failed to initialize vblank for display support.\n");
goto error;
}
+#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
+ amdgpu_dm_crtc_secure_display_create_contexts(adev);
+ if (!adev->dm.secure_display_ctx.crtc_ctx)
+ drm_err(adev_to_drm(adev), "failed to initialize secure display contexts.\n");
+
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(4, 0, 1))
+ adev->dm.secure_display_ctx.support_mul_roi = true;
+
+#endif
- DRM_DEBUG_DRIVER("KMS initialized.\n");
+ drm_dbg_driver(adev_to_drm(adev), "KMS initialized.\n");
return 0;
error:
@@ -1613,9 +2213,9 @@ error:
return -EINVAL;
}
-static int amdgpu_dm_early_fini(void *handle)
+static int amdgpu_dm_early_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_dm_audio_fini(adev);
@@ -1626,43 +2226,49 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
{
int i;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
if (adev->dm.vblank_control_workqueue) {
destroy_workqueue(adev->dm.vblank_control_workqueue);
adev->dm.vblank_control_workqueue = NULL;
}
-#endif
- for (i = 0; i < adev->dm.display_indexes_num; i++) {
- drm_encoder_cleanup(&adev->dm.mst_encoders[i].base);
+ if (adev->dm.idle_workqueue) {
+ if (adev->dm.idle_workqueue->running) {
+ adev->dm.idle_workqueue->enable = false;
+ flush_work(&adev->dm.idle_workqueue->work);
+ }
+
+ kfree(adev->dm.idle_workqueue);
+ adev->dm.idle_workqueue = NULL;
}
amdgpu_dm_destroy_drm_device(&adev->dm);
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
- if (adev->dm.crc_rd_wrk) {
- flush_work(&adev->dm.crc_rd_wrk->notify_ta_work);
- kfree(adev->dm.crc_rd_wrk);
- adev->dm.crc_rd_wrk = NULL;
+ if (adev->dm.secure_display_ctx.crtc_ctx) {
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ if (adev->dm.secure_display_ctx.crtc_ctx[i].crtc) {
+ flush_work(&adev->dm.secure_display_ctx.crtc_ctx[i].notify_ta_work);
+ flush_work(&adev->dm.secure_display_ctx.crtc_ctx[i].forward_roi_work);
+ }
+ }
+ kfree(adev->dm.secure_display_ctx.crtc_ctx);
+ adev->dm.secure_display_ctx.crtc_ctx = NULL;
}
#endif
-#ifdef CONFIG_DRM_AMD_DC_HDCP
if (adev->dm.hdcp_workqueue) {
hdcp_destroy(&adev->dev->kobj, adev->dm.hdcp_workqueue);
adev->dm.hdcp_workqueue = NULL;
}
- if (adev->dm.dc)
+ if (adev->dm.dc) {
dc_deinit_callbacks(adev->dm.dc);
-#endif
-
- dc_dmub_srv_destroy(&adev->dm.dc->ctx->dmub_srv);
-
- if (dc_enable_dmub_notifications(adev->dm.dc)) {
- kfree(adev->dm.dmub_notify);
- adev->dm.dmub_notify = NULL;
- destroy_workqueue(adev->dm.delayed_hpd_wq);
- adev->dm.delayed_hpd_wq = NULL;
+ dc_dmub_srv_destroy(&adev->dm.dc->ctx->dmub_srv);
+ if (dc_enable_dmub_notifications(adev->dm.dc)) {
+ kfree(adev->dm.dmub_notify);
+ adev->dm.dmub_notify = NULL;
+ destroy_workqueue(adev->dm.delayed_hpd_wq);
+ adev->dm.delayed_hpd_wq = NULL;
+ }
}
if (adev->dm.dmub_bo)
@@ -1670,7 +2276,7 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
&adev->dm.dmub_bo_gpu_addr,
&adev->dm.dmub_bo_cpu_addr);
- if (adev->dm.hpd_rx_offload_wq) {
+ if (adev->dm.hpd_rx_offload_wq && adev->dm.dc) {
for (i = 0; i < adev->dm.dc->caps.max_links; i++) {
if (adev->dm.hpd_rx_offload_wq[i].wq) {
destroy_workqueue(adev->dm.hpd_rx_offload_wq[i].wq);
@@ -1702,8 +2308,7 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
mutex_destroy(&adev->dm.audio_lock);
mutex_destroy(&adev->dm.dc_lock);
-
- return;
+ mutex_destroy(&adev->dm.dpia_aux_lock);
}
static int load_dmcu_fw(struct amdgpu_device *adev)
@@ -1712,7 +2317,7 @@ static int load_dmcu_fw(struct amdgpu_device *adev)
int r;
const struct dmcu_firmware_header_v1_0 *hdr;
- switch(adev->asic_type) {
+ switch (adev->asic_type) {
#if defined(CONFIG_DRM_AMD_DC_SI)
case CHIP_TAHITI:
case CHIP_PITCAIRN:
@@ -1748,7 +2353,7 @@ static int load_dmcu_fw(struct amdgpu_device *adev)
return 0;
break;
default:
- switch (adev->ip_versions[DCE_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
case IP_VERSION(2, 0, 2):
case IP_VERSION(2, 0, 3):
case IP_VERSION(2, 0, 0):
@@ -1759,11 +2364,20 @@ static int load_dmcu_fw(struct amdgpu_device *adev)
case IP_VERSION(3, 0, 1):
case IP_VERSION(3, 1, 2):
case IP_VERSION(3, 1, 3):
+ case IP_VERSION(3, 1, 4):
+ case IP_VERSION(3, 1, 5):
+ case IP_VERSION(3, 1, 6):
+ case IP_VERSION(3, 2, 0):
+ case IP_VERSION(3, 2, 1):
+ case IP_VERSION(3, 5, 0):
+ case IP_VERSION(3, 5, 1):
+ case IP_VERSION(3, 6, 0):
+ case IP_VERSION(4, 0, 1):
return 0;
default:
break;
}
- DRM_ERROR("Unsupported ASIC type: 0x%X\n", adev->asic_type);
+ drm_err(adev_to_drm(adev), "Unsupported ASIC type: 0x%X\n", adev->asic_type);
return -EINVAL;
}
@@ -1772,25 +2386,18 @@ static int load_dmcu_fw(struct amdgpu_device *adev)
return 0;
}
- r = request_firmware_direct(&adev->dm.fw_dmcu, fw_name_dmcu, adev->dev);
- if (r == -ENOENT) {
+ r = amdgpu_ucode_request(adev, &adev->dm.fw_dmcu, AMDGPU_UCODE_REQUIRED,
+ "%s", fw_name_dmcu);
+ if (r == -ENODEV) {
/* DMCU firmware is not necessary, so don't raise a fuss if it's missing */
DRM_DEBUG_KMS("dm: DMCU firmware not found\n");
adev->dm.fw_dmcu = NULL;
return 0;
}
if (r) {
- dev_err(adev->dev, "amdgpu_dm: Can't load firmware \"%s\"\n",
- fw_name_dmcu);
- return r;
- }
-
- r = amdgpu_ucode_validate(adev->dm.fw_dmcu);
- if (r) {
- dev_err(adev->dev, "amdgpu_dm: Can't validate firmware \"%s\"\n",
+ drm_err(adev_to_drm(adev), "amdgpu_dm: Can't validate firmware \"%s\"\n",
fw_name_dmcu);
- release_firmware(adev->dm.fw_dmcu);
- adev->dm.fw_dmcu = NULL;
+ amdgpu_ucode_release(&adev->dm.fw_dmcu);
return r;
}
@@ -1832,47 +2439,70 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
struct dmub_srv_create_params create_params;
struct dmub_srv_region_params region_params;
struct dmub_srv_region_info region_info;
- struct dmub_srv_fb_params fb_params;
+ struct dmub_srv_memory_params memory_params;
struct dmub_srv_fb_info *fb_info;
struct dmub_srv *dmub_srv;
const struct dmcub_firmware_header_v1_0 *hdr;
- const char *fw_name_dmub;
enum dmub_asic dmub_asic;
enum dmub_status status;
+ static enum dmub_window_memory_type window_memory_type[DMUB_WINDOW_TOTAL] = {
+ DMUB_WINDOW_MEMORY_TYPE_FB, //DMUB_WINDOW_0_INST_CONST
+ DMUB_WINDOW_MEMORY_TYPE_FB, //DMUB_WINDOW_1_STACK
+ DMUB_WINDOW_MEMORY_TYPE_FB, //DMUB_WINDOW_2_BSS_DATA
+ DMUB_WINDOW_MEMORY_TYPE_FB, //DMUB_WINDOW_3_VBIOS
+ DMUB_WINDOW_MEMORY_TYPE_FB, //DMUB_WINDOW_4_MAILBOX
+ DMUB_WINDOW_MEMORY_TYPE_FB, //DMUB_WINDOW_5_TRACEBUFF
+ DMUB_WINDOW_MEMORY_TYPE_FB, //DMUB_WINDOW_6_FW_STATE
+ DMUB_WINDOW_MEMORY_TYPE_FB, //DMUB_WINDOW_7_SCRATCH_MEM
+ DMUB_WINDOW_MEMORY_TYPE_FB, //DMUB_WINDOW_IB_MEM
+ DMUB_WINDOW_MEMORY_TYPE_FB, //DMUB_WINDOW_SHARED_STATE
+ };
int r;
- switch (adev->ip_versions[DCE_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
case IP_VERSION(2, 1, 0):
dmub_asic = DMUB_ASIC_DCN21;
- fw_name_dmub = FIRMWARE_RENOIR_DMUB;
- if (ASICREV_IS_GREEN_SARDINE(adev->external_rev_id))
- fw_name_dmub = FIRMWARE_GREEN_SARDINE_DMUB;
break;
case IP_VERSION(3, 0, 0):
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) {
- dmub_asic = DMUB_ASIC_DCN30;
- fw_name_dmub = FIRMWARE_SIENNA_CICHLID_DMUB;
- } else {
- dmub_asic = DMUB_ASIC_DCN30;
- fw_name_dmub = FIRMWARE_NAVY_FLOUNDER_DMUB;
- }
+ dmub_asic = DMUB_ASIC_DCN30;
break;
case IP_VERSION(3, 0, 1):
dmub_asic = DMUB_ASIC_DCN301;
- fw_name_dmub = FIRMWARE_VANGOGH_DMUB;
break;
case IP_VERSION(3, 0, 2):
dmub_asic = DMUB_ASIC_DCN302;
- fw_name_dmub = FIRMWARE_DIMGREY_CAVEFISH_DMUB;
break;
case IP_VERSION(3, 0, 3):
dmub_asic = DMUB_ASIC_DCN303;
- fw_name_dmub = FIRMWARE_BEIGE_GOBY_DMUB;
break;
case IP_VERSION(3, 1, 2):
case IP_VERSION(3, 1, 3):
dmub_asic = (adev->external_rev_id == YELLOW_CARP_B0) ? DMUB_ASIC_DCN31B : DMUB_ASIC_DCN31;
- fw_name_dmub = FIRMWARE_YELLOW_CARP_DMUB;
+ break;
+ case IP_VERSION(3, 1, 4):
+ dmub_asic = DMUB_ASIC_DCN314;
+ break;
+ case IP_VERSION(3, 1, 5):
+ dmub_asic = DMUB_ASIC_DCN315;
+ break;
+ case IP_VERSION(3, 1, 6):
+ dmub_asic = DMUB_ASIC_DCN316;
+ break;
+ case IP_VERSION(3, 2, 0):
+ dmub_asic = DMUB_ASIC_DCN32;
+ break;
+ case IP_VERSION(3, 2, 1):
+ dmub_asic = DMUB_ASIC_DCN321;
+ break;
+ case IP_VERSION(3, 5, 0):
+ case IP_VERSION(3, 5, 1):
+ dmub_asic = DMUB_ASIC_DCN35;
+ break;
+ case IP_VERSION(3, 6, 0):
+ dmub_asic = DMUB_ASIC_DCN36;
+ break;
+ case IP_VERSION(4, 0, 1):
+ dmub_asic = DMUB_ASIC_DCN401;
break;
default:
@@ -1880,18 +2510,6 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
return 0;
}
- r = request_firmware_direct(&adev->dm.dmub_fw, fw_name_dmub, adev->dev);
- if (r) {
- DRM_ERROR("DMUB firmware loading failed: %d\n", r);
- return 0;
- }
-
- r = amdgpu_ucode_validate(adev->dm.dmub_fw);
- if (r) {
- DRM_ERROR("Couldn't validate DMUB firmware: %d\n", r);
- return 0;
- }
-
hdr = (const struct dmcub_firmware_header_v1_0 *)adev->dm.dmub_fw->data;
adev->dm.dmcub_fw_version = le32_to_cpu(hdr->header.ucode_version);
@@ -1903,7 +2521,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(hdr->inst_const_bytes), PAGE_SIZE);
- DRM_INFO("Loading DMUB firmware via PSP: version=0x%08X\n",
+ drm_info(adev_to_drm(adev), "Loading DMUB firmware via PSP: version=0x%08X\n",
adev->dm.dmcub_fw_version);
}
@@ -1912,7 +2530,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
dmub_srv = adev->dm.dmub_srv;
if (!dmub_srv) {
- DRM_ERROR("Failed to allocate DMUB service!\n");
+ drm_err(adev_to_drm(adev), "Failed to allocate DMUB service!\n");
return -ENOMEM;
}
@@ -1925,7 +2543,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
/* Create the DMUB service. */
status = dmub_srv_create(dmub_srv, &create_params);
if (status != DMUB_STATUS_OK) {
- DRM_ERROR("Error creating DMUB service: %d\n", status);
+ drm_err(adev_to_drm(adev), "Error creating DMUB service: %d\n", status);
return -EINVAL;
}
@@ -1944,12 +2562,13 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
adev->dm.dmub_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes) +
PSP_HEADER_BYTES;
+ region_params.window_memory_type = window_memory_type;
status = dmub_srv_calc_region_info(dmub_srv, &region_params,
&region_info);
if (status != DMUB_STATUS_OK) {
- DRM_ERROR("Error calculating DMUB region info: %d\n", status);
+ drm_err(adev_to_drm(adev), "Error calculating DMUB region info: %d\n", status);
return -EINVAL;
}
@@ -1958,42 +2577,57 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
* TODO: Move this into GART.
*/
r = amdgpu_bo_create_kernel(adev, region_info.fb_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM, &adev->dm.dmub_bo,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->dm.dmub_bo,
&adev->dm.dmub_bo_gpu_addr,
&adev->dm.dmub_bo_cpu_addr);
if (r)
return r;
/* Rebase the regions on the framebuffer address. */
- memset(&fb_params, 0, sizeof(fb_params));
- fb_params.cpu_addr = adev->dm.dmub_bo_cpu_addr;
- fb_params.gpu_addr = adev->dm.dmub_bo_gpu_addr;
- fb_params.region_info = &region_info;
+ memset(&memory_params, 0, sizeof(memory_params));
+ memory_params.cpu_fb_addr = adev->dm.dmub_bo_cpu_addr;
+ memory_params.gpu_fb_addr = adev->dm.dmub_bo_gpu_addr;
+ memory_params.region_info = &region_info;
+ memory_params.window_memory_type = window_memory_type;
adev->dm.dmub_fb_info =
kzalloc(sizeof(*adev->dm.dmub_fb_info), GFP_KERNEL);
fb_info = adev->dm.dmub_fb_info;
if (!fb_info) {
- DRM_ERROR(
+ drm_err(adev_to_drm(adev),
"Failed to allocate framebuffer info for DMUB service!\n");
return -ENOMEM;
}
- status = dmub_srv_calc_fb_info(dmub_srv, &fb_params, fb_info);
+ status = dmub_srv_calc_mem_info(dmub_srv, &memory_params, fb_info);
if (status != DMUB_STATUS_OK) {
- DRM_ERROR("Error calculating DMUB FB info: %d\n", status);
+ drm_err(adev_to_drm(adev), "Error calculating DMUB FB info: %d\n", status);
return -EINVAL;
}
+ adev->dm.bb_from_dmub = dm_dmub_get_vbios_bounding_box(adev);
+
return 0;
}
-static int dm_sw_init(void *handle)
+static int dm_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
+ adev->dm.cgs_device = amdgpu_cgs_create_device(adev);
+
+ if (!adev->dm.cgs_device) {
+ drm_err(adev_to_drm(adev), "failed to create cgs device.\n");
+ return -EINVAL;
+ }
+
+ /* Moved from dm init since we need to use allocations for storing bounding box data */
+ INIT_LIST_HEAD(&adev->dm.da_list);
+
r = dm_dmub_sw_init(adev);
if (r)
return r;
@@ -2001,23 +2635,33 @@ static int dm_sw_init(void *handle)
return load_dmcu_fw(adev);
}
-static int dm_sw_fini(void *handle)
+static int dm_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct dal_allocation *da;
+
+ list_for_each_entry(da, &adev->dm.da_list, list) {
+ if (adev->dm.bb_from_dmub == (void *) da->cpu_ptr) {
+ amdgpu_bo_free_kernel(&da->bo, &da->gpu_addr, &da->cpu_ptr);
+ list_del(&da->list);
+ kfree(da);
+ adev->dm.bb_from_dmub = NULL;
+ break;
+ }
+ }
+
kfree(adev->dm.dmub_fb_info);
adev->dm.dmub_fb_info = NULL;
if (adev->dm.dmub_srv) {
dmub_srv_destroy(adev->dm.dmub_srv);
+ kfree(adev->dm.dmub_srv);
adev->dm.dmub_srv = NULL;
}
- release_firmware(adev->dm.dmub_fw);
- adev->dm.dmub_fw = NULL;
-
- release_firmware(adev->dm.fw_dmcu);
- adev->dm.fw_dmcu = NULL;
+ amdgpu_ucode_release(&adev->dm.dmub_fw);
+ amdgpu_ucode_release(&adev->dm.fw_dmcu);
return 0;
}
@@ -2031,18 +2675,24 @@ static int detect_mst_link_for_all_connectors(struct drm_device *dev)
drm_connector_list_iter_begin(dev, &iter);
drm_for_each_connector_iter(connector, &iter) {
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
aconnector = to_amdgpu_dm_connector(connector);
if (aconnector->dc_link->type == dc_connection_mst_branch &&
aconnector->mst_mgr.aux) {
- DRM_DEBUG_DRIVER("DM_MST: starting TM on aconnector: %p [id: %d]\n",
+ drm_dbg_kms(dev, "DM_MST: starting TM on aconnector: %p [id: %d]\n",
aconnector,
aconnector->base.base.id);
ret = drm_dp_mst_topology_mgr_set_mst(&aconnector->mst_mgr, true);
if (ret < 0) {
- DRM_ERROR("DM_MST: Failed to start MST\n");
+ drm_err(dev, "DM_MST: Failed to start MST\n");
aconnector->dc_link->type =
dc_connection_single;
+ ret = dm_helpers_dp_mst_stop_top_mgr(aconnector->dc_link->ctx,
+ aconnector->dc_link);
break;
}
}
@@ -2052,9 +2702,9 @@ static int detect_mst_link_for_all_connectors(struct drm_device *dev)
return ret;
}
-static int dm_late_init(void *handle)
+static int dm_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct dmcu_iram_parameters params;
unsigned int linear_lut[16];
@@ -2078,9 +2728,9 @@ static int dm_late_init(void *handle)
*/
params.min_abm_backlight = 0x28F;
/* In the case where abm is implemented on dmcub,
- * dmcu object will be null.
- * ABM 2.4 and up are implemented on dmcub.
- */
+ * dmcu object will be null.
+ * ABM 2.4 and up are implemented on dmcub.
+ */
if (dmcu) {
if (!dmcu_load_iram(dmcu, params))
return -EINVAL;
@@ -2088,7 +2738,7 @@ static int dm_late_init(void *handle)
struct dc_link *edp_links[MAX_NUM_EDP];
int edp_num;
- get_edp_links(adev->dm.dc, edp_links, &edp_num);
+ dc_get_edp_links(adev->dm.dc, edp_links, &edp_num);
for (i = 0; i < edp_num; i++) {
if (!dmub_init_abm_config(adev->dm.dc->res_pool, params, i))
return -EINVAL;
@@ -2098,20 +2748,115 @@ static int dm_late_init(void *handle)
return detect_mst_link_for_all_connectors(adev_to_drm(adev));
}
+static void resume_mst_branch_status(struct drm_dp_mst_topology_mgr *mgr)
+{
+ u8 buf[UUID_SIZE];
+ guid_t guid;
+ int ret;
+
+ mutex_lock(&mgr->lock);
+ if (!mgr->mst_primary)
+ goto out_fail;
+
+ if (drm_dp_read_dpcd_caps(mgr->aux, mgr->dpcd) < 0) {
+ drm_dbg_kms(mgr->dev, "dpcd read failed - undocked during suspend?\n");
+ goto out_fail;
+ }
+
+ ret = drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL,
+ DP_MST_EN |
+ DP_UP_REQ_EN |
+ DP_UPSTREAM_IS_SRC);
+ if (ret < 0) {
+ drm_dbg_kms(mgr->dev, "mst write failed - undocked during suspend?\n");
+ goto out_fail;
+ }
+
+ /* Some hubs forget their guids after they resume */
+ ret = drm_dp_dpcd_read(mgr->aux, DP_GUID, buf, sizeof(buf));
+ if (ret != sizeof(buf)) {
+ drm_dbg_kms(mgr->dev, "dpcd read failed - undocked during suspend?\n");
+ goto out_fail;
+ }
+
+ import_guid(&guid, buf);
+
+ if (guid_is_null(&guid)) {
+ guid_gen(&guid);
+ export_guid(buf, &guid);
+
+ ret = drm_dp_dpcd_write(mgr->aux, DP_GUID, buf, sizeof(buf));
+
+ if (ret != sizeof(buf)) {
+ drm_dbg_kms(mgr->dev, "check mstb guid failed - undocked during suspend?\n");
+ goto out_fail;
+ }
+ }
+
+ guid_copy(&mgr->mst_primary->guid, &guid);
+
+out_fail:
+ mutex_unlock(&mgr->lock);
+}
+
+void hdmi_cec_unset_edid(struct amdgpu_dm_connector *aconnector)
+{
+ struct cec_notifier *n = aconnector->notifier;
+
+ if (!n)
+ return;
+
+ cec_notifier_phys_addr_invalidate(n);
+}
+
+void hdmi_cec_set_edid(struct amdgpu_dm_connector *aconnector)
+{
+ struct drm_connector *connector = &aconnector->base;
+ struct cec_notifier *n = aconnector->notifier;
+
+ if (!n)
+ return;
+
+ cec_notifier_set_phys_addr(n,
+ connector->display_info.source_physical_address);
+}
+
+static void s3_handle_hdmi_cec(struct drm_device *ddev, bool suspend)
+{
+ struct amdgpu_dm_connector *aconnector;
+ struct drm_connector *connector;
+ struct drm_connector_list_iter conn_iter;
+
+ drm_connector_list_iter_begin(ddev, &conn_iter);
+ drm_for_each_connector_iter(connector, &conn_iter) {
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
+ aconnector = to_amdgpu_dm_connector(connector);
+ if (suspend)
+ hdmi_cec_unset_edid(aconnector);
+ else
+ hdmi_cec_set_edid(aconnector);
+ }
+ drm_connector_list_iter_end(&conn_iter);
+}
+
static void s3_handle_mst(struct drm_device *dev, bool suspend)
{
struct amdgpu_dm_connector *aconnector;
struct drm_connector *connector;
struct drm_connector_list_iter iter;
struct drm_dp_mst_topology_mgr *mgr;
- int ret;
- bool need_hotplug = false;
drm_connector_list_iter_begin(dev, &iter);
drm_for_each_connector_iter(connector, &iter) {
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
aconnector = to_amdgpu_dm_connector(connector);
if (aconnector->dc_link->type != dc_connection_mst_branch ||
- aconnector->mst_port)
+ aconnector->mst_root)
continue;
mgr = &aconnector->mst_mgr;
@@ -2119,27 +2864,29 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend)
if (suspend) {
drm_dp_mst_topology_mgr_suspend(mgr);
} else {
- ret = drm_dp_mst_topology_mgr_resume(mgr, true);
- if (ret < 0) {
- drm_dp_mst_topology_mgr_set_mst(mgr, false);
- need_hotplug = true;
- }
+ /* if extended timeout is supported in hardware,
+ * default to LTTPR timeout (3.2ms) first as a W/A for DP link layer
+ * CTS 4.2.1.1 regression introduced by CTS specs requirement update.
+ */
+ try_to_configure_aux_timeout(aconnector->dc_link->ddc, LINK_AUX_DEFAULT_LTTPR_TIMEOUT_PERIOD);
+ if (!dp_is_lttpr_present(aconnector->dc_link))
+ try_to_configure_aux_timeout(aconnector->dc_link->ddc, LINK_AUX_DEFAULT_TIMEOUT_PERIOD);
+
+ /* TODO: move resume_mst_branch_status() into drm mst resume again
+ * once topology probing work is pulled out from mst resume into mst
+ * resume 2nd step. mst resume 2nd step should be called after old
+ * state getting restored (i.e. drm_atomic_helper_resume()).
+ */
+ resume_mst_branch_status(mgr);
}
}
drm_connector_list_iter_end(&iter);
-
- if (need_hotplug)
- drm_kms_helper_hotplug_event(dev);
}
static int amdgpu_dm_smu_write_watermarks_table(struct amdgpu_device *adev)
{
- struct smu_context *smu = &adev->smu;
int ret = 0;
- if (!is_support_sw_smu(adev))
- return 0;
-
/* This interface is for dGPU Navi1x.Linux dc-pplib interface depends
* on window driver dc implementation.
* For Navi1x, clock settings of dcn watermarks are fixed. the settings
@@ -2170,7 +2917,7 @@ static int amdgpu_dm_smu_write_watermarks_table(struct amdgpu_device *adev)
* therefore, this function apply to navi10/12/14 but not Renoir
* *
*/
- switch (adev->ip_versions[DCE_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
case IP_VERSION(2, 0, 2):
case IP_VERSION(2, 0, 0):
break;
@@ -2178,18 +2925,45 @@ static int amdgpu_dm_smu_write_watermarks_table(struct amdgpu_device *adev)
return 0;
}
- ret = smu_write_watermarks_table(smu);
+ ret = amdgpu_dpm_write_watermarks_table(adev);
if (ret) {
- DRM_ERROR("Failed to update WMTABLE!\n");
+ drm_err(adev_to_drm(adev), "Failed to update WMTABLE!\n");
return ret;
}
return 0;
}
+static int dm_oem_i2c_hw_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_display_manager *dm = &adev->dm;
+ struct amdgpu_i2c_adapter *oem_i2c;
+ struct ddc_service *oem_ddc_service;
+ int r;
+
+ oem_ddc_service = dc_get_oem_i2c_device(adev->dm.dc);
+ if (oem_ddc_service) {
+ oem_i2c = create_i2c(oem_ddc_service, true);
+ if (!oem_i2c) {
+ drm_info(adev_to_drm(adev), "Failed to create oem i2c adapter data\n");
+ return -ENOMEM;
+ }
+
+ r = devm_i2c_add_adapter(adev->dev, &oem_i2c->base);
+ if (r) {
+ drm_info(adev_to_drm(adev), "Failed to register oem i2c\n");
+ kfree(oem_i2c);
+ return r;
+ }
+ dm->oem_i2c = oem_i2c;
+ }
+
+ return 0;
+}
+
/**
* dm_hw_init() - Initialize DC device
- * @handle: The base driver device containing the amdgpu_dm device.
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the &struct amdgpu_display_manager device. This involves calling
* the initializers of each DM component, then populating the struct with them.
@@ -2207,27 +2981,35 @@ static int amdgpu_dm_smu_write_watermarks_table(struct amdgpu_device *adev)
* - Vblank support
* - Debug FS entries, if enabled
*/
-static int dm_hw_init(void *handle)
+static int dm_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
/* Create DAL display manager */
- amdgpu_dm_init(adev);
+ r = amdgpu_dm_init(adev);
+ if (r)
+ return r;
amdgpu_dm_hpd_init(adev);
+ r = dm_oem_i2c_hw_init(adev);
+ if (r)
+ drm_info(adev_to_drm(adev), "Failed to add OEM i2c bus\n");
+
return 0;
}
/**
* dm_hw_fini() - Teardown DC device
- * @handle: The base driver device containing the amdgpu_dm device.
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Teardown components within &struct amdgpu_display_manager that require
* cleanup. This involves cleaning up the DRM device, DC, and any modules that
* were loaded. Also flush IRQ workqueues and disable them.
*/
-static int dm_hw_fini(void *handle)
+static int dm_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_dm_hpd_fini(adev);
@@ -2237,9 +3019,6 @@ static int dm_hw_fini(void *handle)
}
-static int dm_enable_vblank(struct drm_crtc *crtc);
-static void dm_disable_vblank(struct drm_crtc *crtc);
-
static void dm_gpureset_toggle_interrupts(struct amdgpu_device *adev,
struct dc_state *state, bool enable)
{
@@ -2255,40 +3034,51 @@ static void dm_gpureset_toggle_interrupts(struct amdgpu_device *adev,
if (acrtc && state->stream_status[i].plane_count != 0) {
irq_source = IRQ_TYPE_PFLIP + acrtc->otg_inst;
rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY;
- DRM_DEBUG_VBL("crtc %d - vupdate irq %sabling: r=%d\n",
- acrtc->crtc_id, enable ? "en" : "dis", rc);
if (rc)
- DRM_WARN("Failed to %s pflip interrupts\n",
+ drm_warn(adev_to_drm(adev), "Failed to %s pflip interrupts\n",
enable ? "enable" : "disable");
- if (enable) {
- rc = dm_enable_vblank(&acrtc->base);
+ if (dc_supports_vrr(adev->dm.dc->ctx->dce_version)) {
+ if (enable) {
+ if (amdgpu_dm_crtc_vrr_active(
+ to_dm_crtc_state(acrtc->base.state)))
+ rc = amdgpu_dm_crtc_set_vupdate_irq(
+ &acrtc->base, true);
+ } else
+ rc = amdgpu_dm_crtc_set_vupdate_irq(
+ &acrtc->base, false);
+
if (rc)
- DRM_WARN("Failed to enable vblank interrupts\n");
- } else {
- dm_disable_vblank(&acrtc->base);
+ drm_warn(adev_to_drm(adev), "Failed to %sable vupdate interrupt\n",
+ enable ? "en" : "dis");
}
+ irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst;
+ /* During gpu-reset we disable and then enable vblank irq, so
+ * don't use amdgpu_irq_get/put() to avoid refcount change.
+ */
+ if (!dc_interrupt_set(adev->dm.dc, irq_source, enable))
+ drm_warn(adev_to_drm(adev), "Failed to %sable vblank interrupt\n", enable ? "en" : "dis");
}
}
}
+DEFINE_FREE(state_release, struct dc_state *, if (_T) dc_state_release(_T))
+
static enum dc_status amdgpu_dm_commit_zero_streams(struct dc *dc)
{
- struct dc_state *context = NULL;
- enum dc_status res = DC_ERROR_UNEXPECTED;
+ struct dc_state *context __free(state_release) = NULL;
int i;
struct dc_stream_state *del_streams[MAX_PIPES];
int del_streams_count = 0;
+ struct dc_commit_streams_params params = {};
memset(del_streams, 0, sizeof(del_streams));
- context = dc_create_state(dc);
+ context = dc_state_create_current_copy(dc);
if (context == NULL)
- goto context_alloc_fail;
-
- dc_resource_state_copy_construct_current(dc, context);
+ return DC_ERROR_UNEXPECTED;
/* First remove from context all streams */
for (i = 0; i < context->stream_count; i++) {
@@ -2299,71 +3089,134 @@ static enum dc_status amdgpu_dm_commit_zero_streams(struct dc *dc)
/* Remove all planes for removed streams and then remove the streams */
for (i = 0; i < del_streams_count; i++) {
- if (!dc_rem_all_planes_for_stream(dc, del_streams[i], context)) {
- res = DC_FAIL_DETACH_SURFACES;
- goto fail;
- }
+ enum dc_status res;
+
+ if (!dc_state_rem_all_planes_for_stream(dc, del_streams[i], context))
+ return DC_FAIL_DETACH_SURFACES;
- res = dc_remove_stream_from_ctx(dc, context, del_streams[i]);
+ res = dc_state_remove_stream(dc, context, del_streams[i]);
if (res != DC_OK)
- goto fail;
+ return res;
}
+ params.streams = context->streams;
+ params.stream_count = context->stream_count;
- res = dc_validate_global_state(dc, context, false);
+ return dc_commit_streams(dc, &params);
+}
- if (res != DC_OK) {
- DRM_ERROR("%s:resource validation failed, dc_status:%d\n", __func__, res);
- goto fail;
+static void hpd_rx_irq_work_suspend(struct amdgpu_display_manager *dm)
+{
+ int i;
+
+ if (dm->hpd_rx_offload_wq) {
+ for (i = 0; i < dm->dc->caps.max_links; i++)
+ flush_workqueue(dm->hpd_rx_offload_wq[i].wq);
}
+}
- res = dc_commit_state(dc, context);
+static int dm_cache_state(struct amdgpu_device *adev)
+{
+ int r;
-fail:
- dc_release_state(context);
+ adev->dm.cached_state = drm_atomic_helper_suspend(adev_to_drm(adev));
+ if (IS_ERR(adev->dm.cached_state)) {
+ r = PTR_ERR(adev->dm.cached_state);
+ adev->dm.cached_state = NULL;
+ }
-context_alloc_fail:
- return res;
+ return adev->dm.cached_state ? 0 : r;
}
-static void hpd_rx_irq_work_suspend(struct amdgpu_display_manager *dm)
+static void dm_destroy_cached_state(struct amdgpu_device *adev)
{
+ struct amdgpu_display_manager *dm = &adev->dm;
+ struct drm_device *ddev = adev_to_drm(adev);
+ struct dm_plane_state *dm_new_plane_state;
+ struct drm_plane_state *new_plane_state;
+ struct dm_crtc_state *dm_new_crtc_state;
+ struct drm_crtc_state *new_crtc_state;
+ struct drm_plane *plane;
+ struct drm_crtc *crtc;
int i;
- if (dm->hpd_rx_offload_wq) {
- for (i = 0; i < dm->dc->caps.max_links; i++)
- flush_workqueue(dm->hpd_rx_offload_wq[i].wq);
+ if (!dm->cached_state)
+ return;
+
+ /* Force mode set in atomic commit */
+ for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i) {
+ new_crtc_state->active_changed = true;
+ dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+ reset_freesync_config_for_crtc(dm_new_crtc_state);
+ }
+
+ /*
+ * atomic_check is expected to create the dc states. We need to release
+ * them here, since they were duplicated as part of the suspend
+ * procedure.
+ */
+ for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i) {
+ dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+ if (dm_new_crtc_state->stream) {
+ WARN_ON(kref_read(&dm_new_crtc_state->stream->refcount) > 1);
+ dc_stream_release(dm_new_crtc_state->stream);
+ dm_new_crtc_state->stream = NULL;
+ }
+ dm_new_crtc_state->base.color_mgmt_changed = true;
}
+
+ for_each_new_plane_in_state(dm->cached_state, plane, new_plane_state, i) {
+ dm_new_plane_state = to_dm_plane_state(new_plane_state);
+ if (dm_new_plane_state->dc_state) {
+ WARN_ON(kref_read(&dm_new_plane_state->dc_state->refcount) > 1);
+ dc_plane_state_release(dm_new_plane_state->dc_state);
+ dm_new_plane_state->dc_state = NULL;
+ }
+ }
+
+ drm_atomic_helper_resume(ddev, dm->cached_state);
+
+ dm->cached_state = NULL;
}
-static int dm_suspend(void *handle)
+static int dm_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_display_manager *dm = &adev->dm;
- int ret = 0;
if (amdgpu_in_reset(adev)) {
+ enum dc_status res;
+
mutex_lock(&dm->dc_lock);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
dc_allow_idle_optimizations(adev->dm.dc, false);
-#endif
- dm->cached_dc_state = dc_copy_state(dm->dc->current_state);
+ dm->cached_dc_state = dc_state_create_copy(dm->dc->current_state);
- dm_gpureset_toggle_interrupts(adev, dm->cached_dc_state, false);
+ if (dm->cached_dc_state)
+ dm_gpureset_toggle_interrupts(adev, dm->cached_dc_state, false);
- amdgpu_dm_commit_zero_streams(dm->dc);
+ res = amdgpu_dm_commit_zero_streams(dm->dc);
+ if (res != DC_OK) {
+ drm_err(adev_to_drm(adev), "Failed to commit zero streams: %d\n", res);
+ return -EINVAL;
+ }
amdgpu_dm_irq_suspend(adev);
hpd_rx_irq_work_suspend(dm);
- return ret;
+ return 0;
}
- WARN_ON(adev->dm.cached_state);
- adev->dm.cached_state = drm_atomic_helper_suspend(adev_to_drm(adev));
+ if (!adev->dm.cached_state) {
+ int r = dm_cache_state(adev);
+
+ if (r)
+ return r;
+ }
+
+ s3_handle_hdmi_cec(adev_to_drm(adev), true);
s3_handle_mst(adev_to_drm(adev), true);
@@ -2373,14 +3226,19 @@ static int dm_suspend(void *handle)
dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3);
+ if (dm->dc->caps.ips_support && adev->in_s0ix)
+ dc_allow_idle_optimizations(dm->dc, true);
+
+ dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D3);
+
return 0;
}
-static struct amdgpu_dm_connector *
+struct drm_connector *
amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state,
struct drm_crtc *crtc)
{
- uint32_t i;
+ u32 i;
struct drm_connector_state *new_con_state;
struct drm_connector *connector;
struct drm_crtc *crtc_from_state;
@@ -2389,7 +3247,7 @@ amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state,
crtc_from_state = new_con_state->crtc;
if (crtc_from_state == crtc)
- return to_amdgpu_dm_connector(connector);
+ return connector;
}
return NULL;
@@ -2401,6 +3259,7 @@ static void emulated_link_detect(struct dc_link *link)
struct display_sink_capability sink_caps = { 0 };
enum dc_edid_status edid_status;
struct dc_context *dc_ctx = link->ctx;
+ struct drm_device *dev = adev_to_drm(dc_ctx->driver_context);
struct dc_sink *sink = NULL;
struct dc_sink *prev_sink = NULL;
@@ -2450,7 +3309,7 @@ static void emulated_link_detect(struct dc_link *link)
}
default:
- DC_ERROR("Invalid connector type! signal:%d\n",
+ drm_err(dev, "Invalid connector type! signal:%d\n",
link->connector_signal);
return;
}
@@ -2460,7 +3319,7 @@ static void emulated_link_detect(struct dc_link *link)
sink = dc_sink_create(&sink_init_data);
if (!sink) {
- DC_ERROR("Failed to create sink!\n");
+ drm_err(dev, "Failed to create sink!\n");
return;
}
@@ -2473,7 +3332,7 @@ static void emulated_link_detect(struct dc_link *link)
sink);
if (edid_status != EDID_OK)
- DC_ERROR("Failed to read EDID");
+ drm_err(dev, "Failed to read EDID\n");
}
@@ -2486,83 +3345,131 @@ static void dm_gpureset_commit_state(struct dc_state *dc_state,
struct dc_scaling_info scaling_infos[MAX_SURFACES];
struct dc_flip_addrs flip_addrs[MAX_SURFACES];
struct dc_stream_update stream_update;
- } * bundle;
+ } *bundle __free(kfree);
int k, m;
bundle = kzalloc(sizeof(*bundle), GFP_KERNEL);
if (!bundle) {
- dm_error("Failed to allocate update bundle\n");
- goto cleanup;
+ drm_err(dm->ddev, "Failed to allocate update bundle\n");
+ return;
}
for (k = 0; k < dc_state->stream_count; k++) {
bundle->stream_update.stream = dc_state->streams[k];
- for (m = 0; m < dc_state->stream_status->plane_count; m++) {
+ for (m = 0; m < dc_state->stream_status[k].plane_count; m++) {
bundle->surface_updates[m].surface =
- dc_state->stream_status->plane_states[m];
+ dc_state->stream_status[k].plane_states[m];
bundle->surface_updates[m].surface->force_full_update =
true;
}
- dc_commit_updates_for_stream(
- dm->dc, bundle->surface_updates,
- dc_state->stream_status->plane_count,
- dc_state->streams[k], &bundle->stream_update, dc_state);
+
+ update_planes_and_stream_adapter(dm->dc,
+ UPDATE_TYPE_FULL,
+ dc_state->stream_status[k].plane_count,
+ dc_state->streams[k],
+ &bundle->stream_update,
+ bundle->surface_updates);
}
+}
-cleanup:
- kfree(bundle);
+static void apply_delay_after_dpcd_poweroff(struct amdgpu_device *adev,
+ struct dc_sink *sink)
+{
+ struct dc_panel_patch *ppatch = NULL;
- return;
+ if (!sink)
+ return;
+
+ ppatch = &sink->edid_caps.panel_patch;
+ if (ppatch->wait_after_dpcd_poweroff_ms) {
+ msleep(ppatch->wait_after_dpcd_poweroff_ms);
+ drm_dbg_driver(adev_to_drm(adev),
+ "%s: adding a %ds delay as w/a for panel\n",
+ __func__,
+ ppatch->wait_after_dpcd_poweroff_ms / 1000);
+ }
}
-static void dm_set_dpms_off(struct dc_link *link, struct dm_crtc_state *acrtc_state)
+/**
+ * amdgpu_dm_dump_links_and_sinks - Debug dump of all DC links and their sinks
+ * @adev: amdgpu device pointer
+ *
+ * Iterates through all DC links and dumps information about local and remote
+ * (MST) sinks. Should be called after connector detection is complete to see
+ * the final state of all links.
+ */
+static void amdgpu_dm_dump_links_and_sinks(struct amdgpu_device *adev)
{
- struct dc_stream_state *stream_state;
- struct amdgpu_dm_connector *aconnector = link->priv;
- struct amdgpu_device *adev = drm_to_adev(aconnector->base.dev);
- struct dc_stream_update stream_update;
- bool dpms_off = true;
+ struct dc *dc = adev->dm.dc;
+ struct drm_device *dev = adev_to_drm(adev);
+ int li;
- memset(&stream_update, 0, sizeof(stream_update));
- stream_update.dpms_off = &dpms_off;
+ if (!dc)
+ return;
- mutex_lock(&adev->dm.dc_lock);
- stream_state = dc_stream_find_from_link(link);
+ for (li = 0; li < dc->link_count; li++) {
+ struct dc_link *l = dc->links[li];
+ const char *name = NULL;
+ int rs;
- if (stream_state == NULL) {
- DRM_DEBUG_DRIVER("Error finding stream state associated with link!\n");
- mutex_unlock(&adev->dm.dc_lock);
- return;
+ if (!l)
+ continue;
+ if (l->local_sink && l->local_sink->edid_caps.display_name[0])
+ name = l->local_sink->edid_caps.display_name;
+ else
+ name = "n/a";
+
+ drm_dbg_kms(dev,
+ "LINK_DUMP[%d]: local_sink=%p type=%d sink_signal=%d sink_count=%u edid_name=%s mst_capable=%d mst_alloc_streams=%d\n",
+ li,
+ l->local_sink,
+ l->type,
+ l->local_sink ? l->local_sink->sink_signal : SIGNAL_TYPE_NONE,
+ l->sink_count,
+ name,
+ l->dpcd_caps.is_mst_capable,
+ l->mst_stream_alloc_table.stream_count);
+
+ /* Dump remote (MST) sinks if any */
+ for (rs = 0; rs < l->sink_count; rs++) {
+ struct dc_sink *rsink = l->remote_sinks[rs];
+ const char *rname = NULL;
+
+ if (!rsink)
+ continue;
+ if (rsink->edid_caps.display_name[0])
+ rname = rsink->edid_caps.display_name;
+ else
+ rname = "n/a";
+ drm_dbg_kms(dev,
+ " REMOTE_SINK[%d:%d]: sink=%p signal=%d edid_name=%s\n",
+ li, rs,
+ rsink,
+ rsink->sink_signal,
+ rname);
+ }
}
-
- stream_update.stream = stream_state;
- acrtc_state->force_dpms_off = true;
- dc_commit_updates_for_stream(stream_state->ctx->dc, NULL, 0,
- stream_state, &stream_update,
- stream_state->ctx->dc->current_state);
- mutex_unlock(&adev->dm.dc_lock);
}
-static int dm_resume(void *handle)
+static int dm_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct drm_device *ddev = adev_to_drm(adev);
struct amdgpu_display_manager *dm = &adev->dm;
struct amdgpu_dm_connector *aconnector;
struct drm_connector *connector;
struct drm_connector_list_iter iter;
- struct drm_crtc *crtc;
- struct drm_crtc_state *new_crtc_state;
- struct dm_crtc_state *dm_new_crtc_state;
- struct drm_plane *plane;
- struct drm_plane_state *new_plane_state;
- struct dm_plane_state *dm_new_plane_state;
struct dm_atomic_state *dm_state = to_dm_atomic_state(dm->atomic_obj.state);
enum dc_connection_type new_connection_type = dc_connection_none;
struct dc_state *dc_state;
int i, r, j;
+ struct dc_commit_streams_params commit_params = {};
+
+ if (dm->dc->caps.ips_support) {
+ dc_dmub_srv_apply_idle_power_optimizations(dm->dc, false);
+ }
if (amdgpu_in_reset(adev)) {
dc_state = dm->cached_dc_state;
@@ -2576,19 +3483,23 @@ static int dm_resume(void *handle)
* before the 0 streams commit.
*
* DC expects that link encoder assignments are *not* valid
- * when committing a state, so as a workaround it needs to be
- * cleared here.
+ * when committing a state, so as a workaround we can copy
+ * off of the current state.
+ *
+ * We lose the previous assignments, but we had already
+ * commit 0 streams anyway.
*/
- link_enc_cfg_init(dm->dc, dc_state);
-
- if (dc_enable_dmub_notifications(adev->dm.dc))
- amdgpu_dm_outbox_init(adev);
+ link_enc_cfg_copy(adev->dm.dc->current_state, dc_state);
r = dm_dmub_hw_init(adev);
- if (r)
- DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r);
+ if (r) {
+ drm_err(adev_to_drm(adev), "DMUB interface failed to initialize: status=%d\n", r);
+ return r;
+ }
+ dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D0);
dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
+
dc_resume(dm->dc);
amdgpu_dm_irq_resume_early(adev);
@@ -2600,47 +3511,52 @@ static int dm_resume(void *handle)
= 0xffffffff;
}
}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- /*
- * Resource allocation happens for link encoders for newer ASIC in
- * dc_validate_global_state, so we need to revalidate it.
- *
- * This shouldn't fail (it passed once before), so warn if it does.
- */
- WARN_ON(dc_validate_global_state(dm->dc, dc_state, false) != DC_OK);
-#endif
- WARN_ON(!dc_commit_state(dm->dc, dc_state));
+ if (dc_is_dmub_outbox_supported(adev->dm.dc)) {
+ amdgpu_dm_outbox_init(adev);
+ dc_enable_dmub_outbox(adev->dm.dc);
+ }
+
+ commit_params.streams = dc_state->streams;
+ commit_params.stream_count = dc_state->stream_count;
+ dc_exit_ips_for_hw_access(dm->dc);
+ WARN_ON(!dc_commit_streams(dm->dc, &commit_params));
dm_gpureset_commit_state(dm->cached_dc_state, dm);
dm_gpureset_toggle_interrupts(adev, dm->cached_dc_state, true);
- dc_release_state(dm->cached_dc_state);
+ dc_state_release(dm->cached_dc_state);
dm->cached_dc_state = NULL;
amdgpu_dm_irq_resume_late(adev);
mutex_unlock(&dm->dc_lock);
+ /* set the backlight after a reset */
+ for (i = 0; i < dm->num_of_edps; i++) {
+ if (dm->backlight_dev[i])
+ amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]);
+ }
+
return 0;
}
/* Recreate dc_state - DC invalidates it when setting power state to S3. */
- dc_release_state(dm_state->context);
- dm_state->context = dc_create_state(dm->dc);
+ dc_state_release(dm_state->context);
+ dm_state->context = dc_state_create(dm->dc, NULL);
/* TODO: Remove dc_state->dccg, use dc->dccg directly. */
- dc_resource_state_construct(dm->dc, dm_state->context);
+
+ /* Before powering on DC we need to re-initialize DMUB. */
+ dm_dmub_hw_resume(adev);
/* Re-enable outbox interrupts for DPIA. */
- if (dc_enable_dmub_notifications(adev->dm.dc))
+ if (dc_is_dmub_outbox_supported(adev->dm.dc)) {
amdgpu_dm_outbox_init(adev);
-
- /* Before powering on DC we need to re-initialize DMUB. */
- r = dm_dmub_hw_init(adev);
- if (r)
- DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r);
+ dc_enable_dmub_outbox(adev->dm.dc);
+ }
/* power on hardware */
+ dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D0);
dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
/* program HPD filter */
@@ -2652,29 +3568,46 @@ static int dm_resume(void *handle)
*/
amdgpu_dm_irq_resume_early(adev);
+ s3_handle_hdmi_cec(ddev, false);
+
/* On resume we need to rewrite the MSTM control bits to enable MST*/
s3_handle_mst(ddev, false);
/* Do detection*/
drm_connector_list_iter_begin(ddev, &iter);
drm_for_each_connector_iter(connector, &iter) {
+ bool ret;
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
aconnector = to_amdgpu_dm_connector(connector);
+ if (!aconnector->dc_link)
+ continue;
+
/*
- * this is the case when traversing through already created
+ * this is the case when traversing through already created end sink
* MST connectors, should be skipped
*/
- if (aconnector->mst_port)
+ if (aconnector->mst_root)
continue;
- mutex_lock(&aconnector->hpd_lock);
- if (!dc_link_detect_sink(aconnector->dc_link, &new_connection_type))
- DRM_ERROR("KMS: Failed to detect connector\n");
+ guard(mutex)(&aconnector->hpd_lock);
+ if (!dc_link_detect_connection_type(aconnector->dc_link, &new_connection_type))
+ drm_err(adev_to_drm(adev), "KMS: Failed to detect connector\n");
- if (aconnector->base.force && new_connection_type == dc_connection_none)
+ if (aconnector->base.force && new_connection_type == dc_connection_none) {
emulated_link_detect(aconnector->dc_link);
- else
- dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD);
+ } else {
+ guard(mutex)(&dm->dc_lock);
+ dc_exit_ips_for_hw_access(dm->dc);
+ ret = dc_link_detect(aconnector->dc_link, DETECT_REASON_RESUMEFROMS3S4);
+ if (ret) {
+ /* w/a delay for certain panels */
+ apply_delay_after_dpcd_poweroff(adev, aconnector->dc_sink);
+ }
+ }
if (aconnector->fake_enable && aconnector->dc_link->local_sink)
aconnector->fake_enable = false;
@@ -2683,45 +3616,47 @@ static int dm_resume(void *handle)
dc_sink_release(aconnector->dc_sink);
aconnector->dc_sink = NULL;
amdgpu_dm_update_connector_after_detect(aconnector);
- mutex_unlock(&aconnector->hpd_lock);
}
drm_connector_list_iter_end(&iter);
- /* Force mode set in atomic commit */
- for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i)
- new_crtc_state->active_changed = true;
+ dm_destroy_cached_state(adev);
- /*
- * atomic_check is expected to create the dc states. We need to release
- * them here, since they were duplicated as part of the suspend
- * procedure.
- */
- for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i) {
- dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
- if (dm_new_crtc_state->stream) {
- WARN_ON(kref_read(&dm_new_crtc_state->stream->refcount) > 1);
- dc_stream_release(dm_new_crtc_state->stream);
- dm_new_crtc_state->stream = NULL;
- }
- }
+ /* Do mst topology probing after resuming cached state*/
+ drm_connector_list_iter_begin(ddev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ bool init = false;
- for_each_new_plane_in_state(dm->cached_state, plane, new_plane_state, i) {
- dm_new_plane_state = to_dm_plane_state(new_plane_state);
- if (dm_new_plane_state->dc_state) {
- WARN_ON(kref_read(&dm_new_plane_state->dc_state->refcount) > 1);
- dc_plane_state_release(dm_new_plane_state->dc_state);
- dm_new_plane_state->dc_state = NULL;
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
+ aconnector = to_amdgpu_dm_connector(connector);
+ if (aconnector->dc_link->type != dc_connection_mst_branch ||
+ aconnector->mst_root)
+ continue;
+
+ scoped_guard(mutex, &aconnector->mst_mgr.lock) {
+ init = !aconnector->mst_mgr.mst_primary;
}
+ if (init)
+ dm_helpers_dp_mst_start_top_mgr(aconnector->dc_link->ctx,
+ aconnector->dc_link, false);
+ else
+ drm_dp_mst_topology_queue_probe(&aconnector->mst_mgr);
}
+ drm_connector_list_iter_end(&iter);
- drm_atomic_helper_resume(ddev, dm->cached_state);
-
- dm->cached_state = NULL;
+ /* Debug dump: list all DC links and their associated sinks after detection
+ * is complete for all connectors. This provides a comprehensive view of the
+ * final state without repeating the dump for each connector.
+ */
+ amdgpu_dm_dump_links_and_sinks(adev);
amdgpu_dm_irq_resume_late(adev);
amdgpu_dm_smu_write_watermarks_table(adev);
+ drm_kms_helper_hotplug_event(ddev);
+
return 0;
}
@@ -2754,8 +3689,7 @@ static const struct amd_ip_funcs amdgpu_dm_funcs = {
.set_powergating_state = dm_set_powergating_state,
};
-const struct amdgpu_ip_block_version dm_ip_block =
-{
+const struct amdgpu_ip_block_version dm_ip_block = {
.type = AMD_IP_BLOCK_TYPE_DCE,
.major = 1,
.minor = 0,
@@ -2772,94 +3706,92 @@ const struct amdgpu_ip_block_version dm_ip_block =
static const struct drm_mode_config_funcs amdgpu_dm_mode_funcs = {
.fb_create = amdgpu_display_user_framebuffer_create,
- .get_format_info = amd_get_format_info,
- .output_poll_changed = drm_fb_helper_output_poll_changed,
+ .get_format_info = amdgpu_dm_plane_get_format_info,
.atomic_check = amdgpu_dm_atomic_check,
.atomic_commit = drm_atomic_helper_commit,
};
static struct drm_mode_config_helper_funcs amdgpu_dm_mode_config_helperfuncs = {
- .atomic_commit_tail = amdgpu_dm_atomic_commit_tail
+ .atomic_commit_tail = amdgpu_dm_atomic_commit_tail,
+ .atomic_commit_setup = amdgpu_dm_atomic_setup_commit,
};
static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
{
- u32 max_cll, min_cll, max, min, q, r;
+ const struct drm_panel_backlight_quirk *panel_backlight_quirk;
struct amdgpu_dm_backlight_caps *caps;
- struct amdgpu_display_manager *dm;
struct drm_connector *conn_base;
struct amdgpu_device *adev;
- struct dc_link *link = NULL;
- static const u8 pre_computed_values[] = {
- 50, 51, 52, 53, 55, 56, 57, 58, 59, 61, 62, 63, 65, 66, 68, 69,
- 71, 72, 74, 75, 77, 79, 81, 82, 84, 86, 88, 90, 92, 94, 96, 98};
- int i;
-
- if (!aconnector || !aconnector->dc_link)
- return;
+ struct drm_luminance_range_info *luminance_range;
+ struct drm_device *drm;
- link = aconnector->dc_link;
- if (link->connector_signal != SIGNAL_TYPE_EDP)
+ if (aconnector->bl_idx == -1 ||
+ aconnector->dc_link->connector_signal != SIGNAL_TYPE_EDP)
return;
conn_base = &aconnector->base;
- adev = drm_to_adev(conn_base->dev);
- dm = &adev->dm;
- for (i = 0; i < dm->num_of_edps; i++) {
- if (link == dm->backlight_link[i])
- break;
- }
- if (i >= dm->num_of_edps)
- return;
- caps = &dm->backlight_caps[i];
+ drm = conn_base->dev;
+ adev = drm_to_adev(drm);
+
+ caps = &adev->dm.backlight_caps[aconnector->bl_idx];
caps->ext_caps = &aconnector->dc_link->dpcd_sink_ext_caps;
caps->aux_support = false;
- max_cll = conn_base->hdr_sink_metadata.hdmi_type1.max_cll;
- min_cll = conn_base->hdr_sink_metadata.hdmi_type1.min_cll;
- if (caps->ext_caps->bits.oled == 1 /*||
- caps->ext_caps->bits.sdr_aux_backlight_control == 1 ||
- caps->ext_caps->bits.hdr_aux_backlight_control == 1*/)
+ if (caps->ext_caps->bits.oled == 1
+ /*
+ * ||
+ * caps->ext_caps->bits.sdr_aux_backlight_control == 1 ||
+ * caps->ext_caps->bits.hdr_aux_backlight_control == 1
+ */)
caps->aux_support = true;
if (amdgpu_backlight == 0)
caps->aux_support = false;
else if (amdgpu_backlight == 1)
caps->aux_support = true;
+ if (caps->aux_support)
+ aconnector->dc_link->backlight_control_type = BACKLIGHT_CONTROL_AMD_AUX;
- /* From the specification (CTA-861-G), for calculating the maximum
- * luminance we need to use:
- * Luminance = 50*2**(CV/32)
- * Where CV is a one-byte value.
- * For calculating this expression we may need float point precision;
- * to avoid this complexity level, we take advantage that CV is divided
- * by a constant. From the Euclids division algorithm, we know that CV
- * can be written as: CV = 32*q + r. Next, we replace CV in the
- * Luminance expression and get 50*(2**q)*(2**(r/32)), hence we just
- * need to pre-compute the value of r/32. For pre-computing the values
- * We just used the following Ruby line:
- * (0...32).each {|cv| puts (50*2**(cv/32.0)).round}
- * The results of the above expressions can be verified at
- * pre_computed_values.
- */
- q = max_cll >> 5;
- r = max_cll % 32;
- max = (1 << q) * pre_computed_values[r];
+ luminance_range = &conn_base->display_info.luminance_range;
- // min luminance: maxLum * (CV/255)^2 / 100
- q = DIV_ROUND_CLOSEST(min_cll, 255);
- min = max * DIV_ROUND_CLOSEST((q * q), 100);
+ if (luminance_range->max_luminance)
+ caps->aux_max_input_signal = luminance_range->max_luminance;
+ else
+ caps->aux_max_input_signal = 512;
+
+ if (luminance_range->min_luminance)
+ caps->aux_min_input_signal = luminance_range->min_luminance;
+ else
+ caps->aux_min_input_signal = 1;
- caps->aux_max_input_signal = max;
- caps->aux_min_input_signal = min;
+ panel_backlight_quirk =
+ drm_get_panel_backlight_quirk(aconnector->drm_edid);
+ if (!IS_ERR_OR_NULL(panel_backlight_quirk)) {
+ if (panel_backlight_quirk->min_brightness) {
+ caps->min_input_signal =
+ panel_backlight_quirk->min_brightness - 1;
+ drm_info(drm,
+ "Applying panel backlight quirk, min_brightness: %d\n",
+ caps->min_input_signal);
+ }
+ if (panel_backlight_quirk->brightness_mask) {
+ drm_info(drm,
+ "Applying panel backlight quirk, brightness_mask: 0x%X\n",
+ panel_backlight_quirk->brightness_mask);
+ caps->brightness_mask =
+ panel_backlight_quirk->brightness_mask;
+ }
+ }
}
+DEFINE_FREE(sink_release, struct dc_sink *, if (_T) dc_sink_release(_T))
+
void amdgpu_dm_update_connector_after_detect(
struct amdgpu_dm_connector *aconnector)
{
struct drm_connector *connector = &aconnector->base;
+ struct dc_sink *sink __free(sink_release) = NULL;
struct drm_device *dev = connector->dev;
- struct dc_sink *sink;
/* MST handled by drm_mst framework */
if (aconnector->mst_mgr.mst_state == true)
@@ -2881,7 +3813,7 @@ void amdgpu_dm_update_connector_after_detect(
* For S3 resume with headless use eml_sink to fake stream
* because on resume connector->sink is set to NULL
*/
- mutex_lock(&dev->mode_config.mutex);
+ guard(mutex)(&dev->mode_config.mutex);
if (sink) {
if (aconnector->dc_sink) {
@@ -2897,7 +3829,7 @@ void amdgpu_dm_update_connector_after_detect(
aconnector->dc_sink = sink;
dc_sink_retain(aconnector->dc_sink);
amdgpu_dm_update_freesync_caps(connector,
- aconnector->edid);
+ aconnector->drm_edid);
} else {
amdgpu_dm_update_freesync_caps(connector, NULL);
if (!aconnector->dc_sink) {
@@ -2906,10 +3838,6 @@ void amdgpu_dm_update_connector_after_detect(
}
}
- mutex_unlock(&dev->mode_config.mutex);
-
- if (sink)
- dc_sink_release(sink);
return;
}
@@ -2917,27 +3845,25 @@ void amdgpu_dm_update_connector_after_detect(
* TODO: temporary guard to look for proper fix
* if this sink is MST sink, we should not do anything
*/
- if (sink && sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
- dc_sink_release(sink);
+ if (sink && sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
return;
- }
if (aconnector->dc_sink == sink) {
/*
* We got a DP short pulse (Link Loss, DP CTS, etc...).
* Do nothing!!
*/
- DRM_DEBUG_DRIVER("DCHPD: connector_id=%d: dc_sink didn't change.\n",
- aconnector->connector_id);
- if (sink)
- dc_sink_release(sink);
+ drm_dbg_kms(dev, "DCHPD: connector_id=%d: dc_sink didn't change.\n",
+ aconnector->connector_id);
return;
}
- DRM_DEBUG_DRIVER("DCHPD: connector_id=%d: Old sink=%p New sink=%p\n",
- aconnector->connector_id, aconnector->dc_sink, sink);
+ drm_dbg_kms(dev, "DCHPD: connector_id=%d: Old sink=%p New sink=%p\n",
+ aconnector->connector_id, aconnector->dc_sink, sink);
- mutex_lock(&dev->mode_config.mutex);
+ /* When polling, DRM has already locked the mutex for us. */
+ if (!drm_kms_helper_is_poll_worker())
+ mutex_lock(&dev->mode_config.mutex);
/*
* 1. Update status of the drm connector
@@ -2956,45 +3882,145 @@ void amdgpu_dm_update_connector_after_detect(
aconnector->dc_sink = sink;
dc_sink_retain(aconnector->dc_sink);
if (sink->dc_edid.length == 0) {
- aconnector->edid = NULL;
+ aconnector->drm_edid = NULL;
+ hdmi_cec_unset_edid(aconnector);
if (aconnector->dc_link->aux_mode) {
- drm_dp_cec_unset_edid(
- &aconnector->dm_dp_aux.aux);
+ drm_dp_cec_unset_edid(&aconnector->dm_dp_aux.aux);
}
} else {
- aconnector->edid =
- (struct edid *)sink->dc_edid.raw_edid;
+ const struct edid *edid = (const struct edid *)sink->dc_edid.raw_edid;
+
+ aconnector->drm_edid = drm_edid_alloc(edid, sink->dc_edid.length);
+ drm_edid_connector_update(connector, aconnector->drm_edid);
- drm_connector_update_edid_property(connector,
- aconnector->edid);
+ hdmi_cec_set_edid(aconnector);
if (aconnector->dc_link->aux_mode)
- drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux,
- aconnector->edid);
+ drm_dp_cec_attach(&aconnector->dm_dp_aux.aux,
+ connector->display_info.source_physical_address);
+ }
+
+ if (!aconnector->timing_requested) {
+ aconnector->timing_requested =
+ kzalloc(sizeof(struct dc_crtc_timing), GFP_KERNEL);
+ if (!aconnector->timing_requested)
+ drm_err(dev,
+ "failed to create aconnector->requested_timing\n");
}
- amdgpu_dm_update_freesync_caps(connector, aconnector->edid);
+ amdgpu_dm_update_freesync_caps(connector, aconnector->drm_edid);
update_connector_ext_caps(aconnector);
} else {
+ hdmi_cec_unset_edid(aconnector);
drm_dp_cec_unset_edid(&aconnector->dm_dp_aux.aux);
amdgpu_dm_update_freesync_caps(connector, NULL);
- drm_connector_update_edid_property(connector, NULL);
aconnector->num_modes = 0;
dc_sink_release(aconnector->dc_sink);
aconnector->dc_sink = NULL;
- aconnector->edid = NULL;
-#ifdef CONFIG_DRM_AMD_DC_HDCP
+ drm_edid_free(aconnector->drm_edid);
+ aconnector->drm_edid = NULL;
+ kfree(aconnector->timing_requested);
+ aconnector->timing_requested = NULL;
/* Set CP to DESIRED if it was ENABLED, so we can re-enable it again on hotplug */
if (connector->state->content_protection == DRM_MODE_CONTENT_PROTECTION_ENABLED)
connector->state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED;
-#endif
}
- mutex_unlock(&dev->mode_config.mutex);
-
update_subconnector_property(aconnector);
- if (sink)
- dc_sink_release(sink);
+ /* When polling, the mutex will be unlocked for us by DRM. */
+ if (!drm_kms_helper_is_poll_worker())
+ mutex_unlock(&dev->mode_config.mutex);
+}
+
+static bool are_sinks_equal(const struct dc_sink *sink1, const struct dc_sink *sink2)
+{
+ if (!sink1 || !sink2)
+ return false;
+ if (sink1->sink_signal != sink2->sink_signal)
+ return false;
+
+ if (sink1->dc_edid.length != sink2->dc_edid.length)
+ return false;
+
+ if (memcmp(sink1->dc_edid.raw_edid, sink2->dc_edid.raw_edid,
+ sink1->dc_edid.length) != 0)
+ return false;
+ return true;
+}
+
+
+/**
+ * DOC: hdmi_hpd_debounce_work
+ *
+ * HDMI HPD debounce delay in milliseconds. When an HDMI display toggles HPD
+ * (such as during power save transitions), this delay determines how long to
+ * wait before processing the HPD event. This allows distinguishing between a
+ * physical unplug (>hdmi_hpd_debounce_delay)
+ * and a spontaneous RX HPD toggle (<hdmi_hpd_debounce_delay).
+ *
+ * If the toggle is less than this delay, the driver compares sink capabilities
+ * and permits a hotplug event if they changed.
+ *
+ * The default value of 1500ms was chosen based on experimental testing with
+ * various monitors that exhibit spontaneous HPD toggling behavior.
+ */
+static void hdmi_hpd_debounce_work(struct work_struct *work)
+{
+ struct amdgpu_dm_connector *aconnector =
+ container_of(to_delayed_work(work), struct amdgpu_dm_connector,
+ hdmi_hpd_debounce_work);
+ struct drm_connector *connector = &aconnector->base;
+ struct drm_device *dev = connector->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct dc *dc = aconnector->dc_link->ctx->dc;
+ bool fake_reconnect = false;
+ bool reallow_idle = false;
+ bool ret = false;
+ guard(mutex)(&aconnector->hpd_lock);
+
+ /* Re-detect the display */
+ scoped_guard(mutex, &adev->dm.dc_lock) {
+ if (dc->caps.ips_support && dc->ctx->dmub_srv->idle_allowed) {
+ dc_allow_idle_optimizations(dc, false);
+ reallow_idle = true;
+ }
+ ret = dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD);
+ }
+
+ if (ret) {
+ /* Apply workaround delay for certain panels */
+ apply_delay_after_dpcd_poweroff(adev, aconnector->dc_sink);
+ /* Compare sinks to determine if this was a spontaneous HPD toggle */
+ if (are_sinks_equal(aconnector->dc_link->local_sink, aconnector->hdmi_prev_sink)) {
+ /*
+ * Sinks match - this was a spontaneous HDMI HPD toggle.
+ */
+ drm_dbg_kms(dev, "HDMI HPD: Sink unchanged after debounce, internal re-enable\n");
+ fake_reconnect = true;
+ }
+
+ /* Update connector state */
+ amdgpu_dm_update_connector_after_detect(aconnector);
+
+ drm_modeset_lock_all(dev);
+ dm_restore_drm_connector_state(dev, connector);
+ drm_modeset_unlock_all(dev);
+
+ /* Only notify OS if sink actually changed */
+ if (!fake_reconnect && aconnector->base.force == DRM_FORCE_UNSPECIFIED)
+ drm_kms_helper_hotplug_event(dev);
+ }
+
+ /* Release the cached sink reference */
+ if (aconnector->hdmi_prev_sink) {
+ dc_sink_release(aconnector->hdmi_prev_sink);
+ aconnector->hdmi_prev_sink = NULL;
+ }
+
+ scoped_guard(mutex, &adev->dm.dc_lock) {
+ if (reallow_idle && dc->caps.ips_support)
+ dc_allow_idle_optimizations(dc, true);
+ }
}
static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector)
@@ -3004,32 +4030,38 @@ static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector)
enum dc_connection_type new_connection_type = dc_connection_none;
struct amdgpu_device *adev = drm_to_adev(dev);
struct dm_connector_state *dm_con_state = to_dm_connector_state(connector->state);
- struct dm_crtc_state *dm_crtc_state = NULL;
+ struct dc *dc = aconnector->dc_link->ctx->dc;
+ bool ret = false;
+ bool debounce_required = false;
if (adev->dm.disable_hpd_irq)
return;
- if (dm_con_state->base.state && dm_con_state->base.crtc)
- dm_crtc_state = to_dm_crtc_state(drm_atomic_get_crtc_state(
- dm_con_state->base.state,
- dm_con_state->base.crtc));
/*
* In case of failure or MST no need to update connector status or notify the OS
* since (for MST case) MST does this in its own context.
*/
- mutex_lock(&aconnector->hpd_lock);
+ guard(mutex)(&aconnector->hpd_lock);
-#ifdef CONFIG_DRM_AMD_DC_HDCP
if (adev->dm.hdcp_workqueue) {
hdcp_reset_display(adev->dm.hdcp_workqueue, aconnector->dc_link->link_index);
dm_con_state->update_hdcp = true;
}
-#endif
if (aconnector->fake_enable)
aconnector->fake_enable = false;
- if (!dc_link_detect_sink(aconnector->dc_link, &new_connection_type))
- DRM_ERROR("KMS: Failed to detect connector\n");
+ aconnector->timing_changed = false;
+
+ if (!dc_link_detect_connection_type(aconnector->dc_link, &new_connection_type))
+ drm_err(adev_to_drm(adev), "KMS: Failed to detect connector\n");
+
+ /*
+ * Check for HDMI disconnect with debounce enabled.
+ */
+ debounce_required = (aconnector->hdmi_hpd_debounce_delay_ms > 0 &&
+ dc_is_hdmi_signal(aconnector->dc_link->connector_signal) &&
+ new_connection_type == dc_connection_none &&
+ aconnector->dc_link->local_sink != NULL);
if (aconnector->base.force && new_connection_type == dc_connection_none) {
emulated_link_detect(aconnector->dc_link);
@@ -3039,25 +4071,52 @@ static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector)
drm_modeset_unlock_all(dev);
if (aconnector->base.force == DRM_FORCE_UNSPECIFIED)
- drm_kms_helper_hotplug_event(dev);
+ drm_kms_helper_connector_hotplug_event(connector);
+ } else if (debounce_required) {
+ /*
+ * HDMI disconnect detected - schedule delayed work instead of
+ * processing immediately. This allows us to coalesce spurious
+ * HDMI signals from physical unplugs.
+ */
+ drm_dbg_kms(dev, "HDMI HPD: Disconnect detected, scheduling debounce work (%u ms)\n",
+ aconnector->hdmi_hpd_debounce_delay_ms);
+
+ /* Cache the current sink for later comparison */
+ if (aconnector->hdmi_prev_sink)
+ dc_sink_release(aconnector->hdmi_prev_sink);
+ aconnector->hdmi_prev_sink = aconnector->dc_link->local_sink;
+ if (aconnector->hdmi_prev_sink)
+ dc_sink_retain(aconnector->hdmi_prev_sink);
+
+ /* Schedule delayed detection. */
+ if (mod_delayed_work(system_wq,
+ &aconnector->hdmi_hpd_debounce_work,
+ msecs_to_jiffies(aconnector->hdmi_hpd_debounce_delay_ms)))
+ drm_dbg_kms(dev, "HDMI HPD: Re-scheduled debounce work\n");
- } else if (dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD)) {
- if (new_connection_type == dc_connection_none &&
- aconnector->dc_link->type == dc_connection_none &&
- dm_crtc_state)
- dm_set_dpms_off(aconnector->dc_link, dm_crtc_state);
+ } else {
- amdgpu_dm_update_connector_after_detect(aconnector);
+ /* If the aconnector->hdmi_hpd_debounce_work is scheduled, exit early */
+ if (delayed_work_pending(&aconnector->hdmi_hpd_debounce_work))
+ return;
- drm_modeset_lock_all(dev);
- dm_restore_drm_connector_state(dev, connector);
- drm_modeset_unlock_all(dev);
+ scoped_guard(mutex, &adev->dm.dc_lock) {
+ dc_exit_ips_for_hw_access(dc);
+ ret = dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD);
+ }
+ if (ret) {
+ /* w/a delay for certain panels */
+ apply_delay_after_dpcd_poweroff(adev, aconnector->dc_sink);
+ amdgpu_dm_update_connector_after_detect(aconnector);
- if (aconnector->base.force == DRM_FORCE_UNSPECIFIED)
- drm_kms_helper_hotplug_event(dev);
- }
- mutex_unlock(&aconnector->hpd_lock);
+ drm_modeset_lock_all(dev);
+ dm_restore_drm_connector_state(dev, connector);
+ drm_modeset_unlock_all(dev);
+ if (aconnector->base.force == DRM_FORCE_UNSPECIFIED)
+ drm_kms_helper_connector_hotplug_event(connector);
+ }
+ }
}
static void handle_hpd_irq(void *param)
@@ -3068,98 +4127,21 @@ static void handle_hpd_irq(void *param)
}
-static void dm_handle_mst_sideband_msg(struct amdgpu_dm_connector *aconnector)
-{
- uint8_t esi[DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI] = { 0 };
- uint8_t dret;
- bool new_irq_handled = false;
- int dpcd_addr;
- int dpcd_bytes_to_read;
-
- const int max_process_count = 30;
- int process_count = 0;
-
- const struct dc_link_status *link_status = dc_link_get_status(aconnector->dc_link);
-
- if (link_status->dpcd_caps->dpcd_rev.raw < 0x12) {
- dpcd_bytes_to_read = DP_LANE0_1_STATUS - DP_SINK_COUNT;
- /* DPCD 0x200 - 0x201 for downstream IRQ */
- dpcd_addr = DP_SINK_COUNT;
- } else {
- dpcd_bytes_to_read = DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI;
- /* DPCD 0x2002 - 0x2005 for downstream IRQ */
- dpcd_addr = DP_SINK_COUNT_ESI;
- }
-
- dret = drm_dp_dpcd_read(
- &aconnector->dm_dp_aux.aux,
- dpcd_addr,
- esi,
- dpcd_bytes_to_read);
-
- while (dret == dpcd_bytes_to_read &&
- process_count < max_process_count) {
- uint8_t retry;
- dret = 0;
-
- process_count++;
-
- DRM_DEBUG_DRIVER("ESI %02x %02x %02x\n", esi[0], esi[1], esi[2]);
- /* handle HPD short pulse irq */
- if (aconnector->mst_mgr.mst_state)
- drm_dp_mst_hpd_irq(
- &aconnector->mst_mgr,
- esi,
- &new_irq_handled);
-
- if (new_irq_handled) {
- /* ACK at DPCD to notify down stream */
- const int ack_dpcd_bytes_to_write =
- dpcd_bytes_to_read - 1;
-
- for (retry = 0; retry < 3; retry++) {
- uint8_t wret;
-
- wret = drm_dp_dpcd_write(
- &aconnector->dm_dp_aux.aux,
- dpcd_addr + 1,
- &esi[1],
- ack_dpcd_bytes_to_write);
- if (wret == ack_dpcd_bytes_to_write)
- break;
- }
-
- /* check if there is new irq to be handled */
- dret = drm_dp_dpcd_read(
- &aconnector->dm_dp_aux.aux,
- dpcd_addr,
- esi,
- dpcd_bytes_to_read);
-
- new_irq_handled = false;
- } else {
- break;
- }
- }
-
- if (process_count == max_process_count)
- DRM_DEBUG_DRIVER("Loop exceeded max iterations\n");
-}
-
-static void schedule_hpd_rx_offload_work(struct hpd_rx_irq_offload_work_queue *offload_wq,
+static void schedule_hpd_rx_offload_work(struct amdgpu_device *adev, struct hpd_rx_irq_offload_work_queue *offload_wq,
union hpd_irq_data hpd_irq_data)
{
struct hpd_rx_irq_offload_work *offload_work =
kzalloc(sizeof(*offload_work), GFP_KERNEL);
if (!offload_work) {
- DRM_ERROR("Failed to allocate hpd_rx_irq_offload_work.\n");
+ drm_err(adev_to_drm(adev), "Failed to allocate hpd_rx_irq_offload_work.\n");
return;
}
INIT_WORK(&offload_work->work, dm_handle_hpd_rx_offload_work);
offload_work->data = hpd_irq_data;
offload_work->offload_wq = offload_wq;
+ offload_work->adev = adev;
queue_work(offload_wq->wq, &offload_work->work);
DRM_DEBUG_KMS("queue work to handle hpd_rx offload work");
@@ -3178,8 +4160,9 @@ static void handle_hpd_rx_irq(void *param)
union hpd_irq_data hpd_irq_data;
bool link_loss = false;
bool has_left_work = false;
- int idx = aconnector->base.index;
+ int idx = dc_link->link_index;
struct hpd_rx_irq_offload_work_queue *offload_wq = &adev->dm.hpd_rx_offload_wq[idx];
+ struct dc *dc = aconnector->dc_link->ctx->dc;
memset(&hpd_irq_data, 0, sizeof(hpd_irq_data));
@@ -3200,14 +4183,30 @@ static void handle_hpd_rx_irq(void *param)
goto out;
if (hpd_irq_data.bytes.device_service_irq.bits.AUTOMATED_TEST) {
- schedule_hpd_rx_offload_work(offload_wq, hpd_irq_data);
+ schedule_hpd_rx_offload_work(adev, offload_wq, hpd_irq_data);
goto out;
}
if (dc_link_dp_allow_hpd_rx_irq(dc_link)) {
if (hpd_irq_data.bytes.device_service_irq.bits.UP_REQ_MSG_RDY ||
hpd_irq_data.bytes.device_service_irq.bits.DOWN_REP_MSG_RDY) {
- dm_handle_mst_sideband_msg(aconnector);
+ bool skip = false;
+
+ /*
+ * DOWN_REP_MSG_RDY is also handled by polling method
+ * mgr->cbs->poll_hpd_irq()
+ */
+ spin_lock(&offload_wq->offload_lock);
+ skip = offload_wq->is_handling_mst_msg_rdy_event;
+
+ if (!skip)
+ offload_wq->is_handling_mst_msg_rdy_event = true;
+
+ spin_unlock(&offload_wq->offload_lock);
+
+ if (!skip)
+ schedule_hpd_rx_offload_work(adev, offload_wq, hpd_irq_data);
+
goto out;
}
@@ -3223,7 +4222,7 @@ static void handle_hpd_rx_irq(void *param)
spin_unlock(&offload_wq->offload_lock);
if (!skip)
- schedule_hpd_rx_offload_work(offload_wq, hpd_irq_data);
+ schedule_hpd_rx_offload_work(adev, offload_wq, hpd_irq_data);
goto out;
}
@@ -3232,8 +4231,8 @@ static void handle_hpd_rx_irq(void *param)
out:
if (result && !is_mst_root_connector) {
/* Downstream Port status changed. */
- if (!dc_link_detect_sink(dc_link, &new_connection_type))
- DRM_ERROR("KMS: Failed to detect connector\n");
+ if (!dc_link_detect_connection_type(dc_link, &new_connection_type))
+ drm_err(adev_to_drm(adev), "KMS: Failed to detect connector\n");
if (aconnector->base.force && new_connection_type == dc_connection_none) {
emulated_link_detect(dc_link);
@@ -3248,28 +4247,33 @@ out:
dm_restore_drm_connector_state(dev, connector);
drm_modeset_unlock_all(dev);
- drm_kms_helper_hotplug_event(dev);
- } else if (dc_link_detect(dc_link, DETECT_REASON_HPDRX)) {
+ drm_kms_helper_connector_hotplug_event(connector);
+ } else {
+ bool ret = false;
- if (aconnector->fake_enable)
- aconnector->fake_enable = false;
+ mutex_lock(&adev->dm.dc_lock);
+ dc_exit_ips_for_hw_access(dc);
+ ret = dc_link_detect(dc_link, DETECT_REASON_HPDRX);
+ mutex_unlock(&adev->dm.dc_lock);
- amdgpu_dm_update_connector_after_detect(aconnector);
+ if (ret) {
+ if (aconnector->fake_enable)
+ aconnector->fake_enable = false;
+ amdgpu_dm_update_connector_after_detect(aconnector);
- drm_modeset_lock_all(dev);
- dm_restore_drm_connector_state(dev, connector);
- drm_modeset_unlock_all(dev);
+ drm_modeset_lock_all(dev);
+ dm_restore_drm_connector_state(dev, connector);
+ drm_modeset_unlock_all(dev);
- drm_kms_helper_hotplug_event(dev);
+ drm_kms_helper_connector_hotplug_event(connector);
+ }
}
}
-#ifdef CONFIG_DRM_AMD_DC_HDCP
if (hpd_irq_data.bytes.device_service_irq.bits.CP_IRQ) {
if (adev->dm.hdcp_workqueue)
hdcp_handle_cpirq(adev->dm.hdcp_workqueue, aconnector->base.index);
}
-#endif
if (dc_link->type != dc_connection_mst_branch)
drm_dp_cec_irq(&aconnector->dm_dp_aux.aux);
@@ -3277,7 +4281,7 @@ out:
mutex_unlock(&aconnector->hpd_lock);
}
-static void register_hpd_handlers(struct amdgpu_device *adev)
+static int register_hpd_handlers(struct amdgpu_device *adev)
{
struct drm_device *dev = adev_to_drm(adev);
struct drm_connector *connector;
@@ -3288,36 +4292,70 @@ static void register_hpd_handlers(struct amdgpu_device *adev)
int_params.requested_polarity = INTERRUPT_POLARITY_DEFAULT;
int_params.current_polarity = INTERRUPT_POLARITY_DEFAULT;
+ if (dc_is_dmub_outbox_supported(adev->dm.dc)) {
+ if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD,
+ dmub_hpd_callback, true)) {
+ drm_err(adev_to_drm(adev), "fail to register dmub hpd callback");
+ return -EINVAL;
+ }
+
+ if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ,
+ dmub_hpd_callback, true)) {
+ drm_err(adev_to_drm(adev), "fail to register dmub hpd callback");
+ return -EINVAL;
+ }
+
+ if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_SENSE_NOTIFY,
+ dmub_hpd_sense_callback, true)) {
+ drm_err(adev_to_drm(adev), "fail to register dmub hpd sense callback");
+ return -EINVAL;
+ }
+ }
+
list_for_each_entry(connector,
&dev->mode_config.connector_list, head) {
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
aconnector = to_amdgpu_dm_connector(connector);
dc_link = aconnector->dc_link;
- if (DC_IRQ_SOURCE_INVALID != dc_link->irq_source_hpd) {
+ if (dc_link->irq_source_hpd != DC_IRQ_SOURCE_INVALID) {
int_params.int_context = INTERRUPT_LOW_IRQ_CONTEXT;
int_params.irq_source = dc_link->irq_source_hpd;
- amdgpu_dm_irq_register_interrupt(adev, &int_params,
- handle_hpd_irq,
- (void *) aconnector);
+ if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
+ int_params.irq_source < DC_IRQ_SOURCE_HPD1 ||
+ int_params.irq_source > DC_IRQ_SOURCE_HPD6) {
+ drm_err(adev_to_drm(adev), "Failed to register hpd irq!\n");
+ return -EINVAL;
+ }
+
+ if (!amdgpu_dm_irq_register_interrupt(adev, &int_params,
+ handle_hpd_irq, (void *) aconnector))
+ return -ENOMEM;
}
- if (DC_IRQ_SOURCE_INVALID != dc_link->irq_source_hpd_rx) {
+ if (dc_link->irq_source_hpd_rx != DC_IRQ_SOURCE_INVALID) {
/* Also register for DP short pulse (hpd_rx). */
int_params.int_context = INTERRUPT_LOW_IRQ_CONTEXT;
int_params.irq_source = dc_link->irq_source_hpd_rx;
- amdgpu_dm_irq_register_interrupt(adev, &int_params,
- handle_hpd_rx_irq,
- (void *) aconnector);
+ if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
+ int_params.irq_source < DC_IRQ_SOURCE_HPD1RX ||
+ int_params.irq_source > DC_IRQ_SOURCE_HPD6RX) {
+ drm_err(adev_to_drm(adev), "Failed to register hpd rx irq!\n");
+ return -EINVAL;
+ }
- if (adev->dm.hpd_rx_offload_wq)
- adev->dm.hpd_rx_offload_wq[connector->index].aconnector =
- aconnector;
+ if (!amdgpu_dm_irq_register_interrupt(adev, &int_params,
+ handle_hpd_rx_irq, (void *) aconnector))
+ return -ENOMEM;
}
}
+ return 0;
}
#if defined(CONFIG_DRM_AMD_DC_SI)
@@ -3329,7 +4367,7 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev)
struct dc_interrupt_params int_params = {0};
int r;
int i;
- unsigned client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
+ unsigned int client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
int_params.requested_polarity = INTERRUPT_POLARITY_DEFAULT;
int_params.current_polarity = INTERRUPT_POLARITY_DEFAULT;
@@ -3343,27 +4381,36 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev)
* Base driver will call amdgpu_dm_irq_handler() for ALL interrupts
* coming from DC hardware.
* amdgpu_dm_irq_handler() will re-direct the interrupt to DC
- * for acknowledging and handling. */
+ * for acknowledging and handling.
+ */
/* Use VBLANK interrupt */
for (i = 0; i < adev->mode_info.num_crtc; i++) {
- r = amdgpu_irq_add_id(adev, client_id, i+1 , &adev->crtc_irq);
+ r = amdgpu_irq_add_id(adev, client_id, i + 1, &adev->crtc_irq);
if (r) {
- DRM_ERROR("Failed to add crtc irq id!\n");
+ drm_err(adev_to_drm(adev), "Failed to add crtc irq id!\n");
return r;
}
int_params.int_context = INTERRUPT_HIGH_IRQ_CONTEXT;
int_params.irq_source =
- dc_interrupt_to_irq_source(dc, i+1 , 0);
+ dc_interrupt_to_irq_source(dc, i + 1, 0);
+
+ if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
+ int_params.irq_source < DC_IRQ_SOURCE_VBLANK1 ||
+ int_params.irq_source > DC_IRQ_SOURCE_VBLANK6) {
+ drm_err(adev_to_drm(adev), "Failed to register vblank irq!\n");
+ return -EINVAL;
+ }
c_irq_params = &adev->dm.vblank_params[int_params.irq_source - DC_IRQ_SOURCE_VBLANK1];
c_irq_params->adev = adev;
c_irq_params->irq_src = int_params.irq_source;
- amdgpu_dm_irq_register_interrupt(adev, &int_params,
- dm_crtc_high_irq, c_irq_params);
+ if (!amdgpu_dm_irq_register_interrupt(adev, &int_params,
+ dm_crtc_high_irq, c_irq_params))
+ return -ENOMEM;
}
/* Use GRPH_PFLIP interrupt */
@@ -3371,7 +4418,7 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev)
i <= VISLANDS30_IV_SRCID_D6_GRPH_PFLIP; i += 2) {
r = amdgpu_irq_add_id(adev, client_id, i, &adev->pageflip_irq);
if (r) {
- DRM_ERROR("Failed to add page flip irq id!\n");
+ drm_err(adev_to_drm(adev), "Failed to add page flip irq id!\n");
return r;
}
@@ -3379,27 +4426,34 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev)
int_params.irq_source =
dc_interrupt_to_irq_source(dc, i, 0);
+ if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
+ int_params.irq_source < DC_IRQ_SOURCE_PFLIP_FIRST ||
+ int_params.irq_source > DC_IRQ_SOURCE_PFLIP_LAST) {
+ drm_err(adev_to_drm(adev), "Failed to register pflip irq!\n");
+ return -EINVAL;
+ }
+
c_irq_params = &adev->dm.pflip_params[int_params.irq_source - DC_IRQ_SOURCE_PFLIP_FIRST];
c_irq_params->adev = adev;
c_irq_params->irq_src = int_params.irq_source;
- amdgpu_dm_irq_register_interrupt(adev, &int_params,
- dm_pflip_high_irq, c_irq_params);
-
+ if (!amdgpu_dm_irq_register_interrupt(adev, &int_params,
+ dm_pflip_high_irq, c_irq_params))
+ return -ENOMEM;
}
/* HPD */
r = amdgpu_irq_add_id(adev, client_id,
VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq);
if (r) {
- DRM_ERROR("Failed to add hpd irq id!\n");
+ drm_err(adev_to_drm(adev), "Failed to add hpd irq id!\n");
return r;
}
- register_hpd_handlers(adev);
+ r = register_hpd_handlers(adev);
- return 0;
+ return r;
}
#endif
@@ -3411,7 +4465,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev)
struct dc_interrupt_params int_params = {0};
int r;
int i;
- unsigned client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
+ unsigned int client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
if (adev->family >= AMDGPU_FAMILY_AI)
client_id = SOC15_IH_CLIENTID_DCE;
@@ -3428,13 +4482,14 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev)
* Base driver will call amdgpu_dm_irq_handler() for ALL interrupts
* coming from DC hardware.
* amdgpu_dm_irq_handler() will re-direct the interrupt to DC
- * for acknowledging and handling. */
+ * for acknowledging and handling.
+ */
/* Use VBLANK interrupt */
for (i = VISLANDS30_IV_SRCID_D1_VERTICAL_INTERRUPT0; i <= VISLANDS30_IV_SRCID_D6_VERTICAL_INTERRUPT0; i++) {
r = amdgpu_irq_add_id(adev, client_id, i, &adev->crtc_irq);
if (r) {
- DRM_ERROR("Failed to add crtc irq id!\n");
+ drm_err(adev_to_drm(adev), "Failed to add crtc irq id!\n");
return r;
}
@@ -3442,20 +4497,28 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev)
int_params.irq_source =
dc_interrupt_to_irq_source(dc, i, 0);
+ if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
+ int_params.irq_source < DC_IRQ_SOURCE_VBLANK1 ||
+ int_params.irq_source > DC_IRQ_SOURCE_VBLANK6) {
+ drm_err(adev_to_drm(adev), "Failed to register vblank irq!\n");
+ return -EINVAL;
+ }
+
c_irq_params = &adev->dm.vblank_params[int_params.irq_source - DC_IRQ_SOURCE_VBLANK1];
c_irq_params->adev = adev;
c_irq_params->irq_src = int_params.irq_source;
- amdgpu_dm_irq_register_interrupt(adev, &int_params,
- dm_crtc_high_irq, c_irq_params);
+ if (!amdgpu_dm_irq_register_interrupt(adev, &int_params,
+ dm_crtc_high_irq, c_irq_params))
+ return -ENOMEM;
}
/* Use VUPDATE interrupt */
for (i = VISLANDS30_IV_SRCID_D1_V_UPDATE_INT; i <= VISLANDS30_IV_SRCID_D6_V_UPDATE_INT; i += 2) {
r = amdgpu_irq_add_id(adev, client_id, i, &adev->vupdate_irq);
if (r) {
- DRM_ERROR("Failed to add vupdate irq id!\n");
+ drm_err(adev_to_drm(adev), "Failed to add vupdate irq id!\n");
return r;
}
@@ -3463,13 +4526,21 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev)
int_params.irq_source =
dc_interrupt_to_irq_source(dc, i, 0);
+ if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
+ int_params.irq_source < DC_IRQ_SOURCE_VUPDATE1 ||
+ int_params.irq_source > DC_IRQ_SOURCE_VUPDATE6) {
+ drm_err(adev_to_drm(adev), "Failed to register vupdate irq!\n");
+ return -EINVAL;
+ }
+
c_irq_params = &adev->dm.vupdate_params[int_params.irq_source - DC_IRQ_SOURCE_VUPDATE1];
c_irq_params->adev = adev;
c_irq_params->irq_src = int_params.irq_source;
- amdgpu_dm_irq_register_interrupt(adev, &int_params,
- dm_vupdate_high_irq, c_irq_params);
+ if (!amdgpu_dm_irq_register_interrupt(adev, &int_params,
+ dm_vupdate_high_irq, c_irq_params))
+ return -ENOMEM;
}
/* Use GRPH_PFLIP interrupt */
@@ -3477,7 +4548,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev)
i <= VISLANDS30_IV_SRCID_D6_GRPH_PFLIP; i += 2) {
r = amdgpu_irq_add_id(adev, client_id, i, &adev->pageflip_irq);
if (r) {
- DRM_ERROR("Failed to add page flip irq id!\n");
+ drm_err(adev_to_drm(adev), "Failed to add page flip irq id!\n");
return r;
}
@@ -3485,30 +4556,36 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev)
int_params.irq_source =
dc_interrupt_to_irq_source(dc, i, 0);
+ if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
+ int_params.irq_source < DC_IRQ_SOURCE_PFLIP_FIRST ||
+ int_params.irq_source > DC_IRQ_SOURCE_PFLIP_LAST) {
+ drm_err(adev_to_drm(adev), "Failed to register pflip irq!\n");
+ return -EINVAL;
+ }
+
c_irq_params = &adev->dm.pflip_params[int_params.irq_source - DC_IRQ_SOURCE_PFLIP_FIRST];
c_irq_params->adev = adev;
c_irq_params->irq_src = int_params.irq_source;
- amdgpu_dm_irq_register_interrupt(adev, &int_params,
- dm_pflip_high_irq, c_irq_params);
-
+ if (!amdgpu_dm_irq_register_interrupt(adev, &int_params,
+ dm_pflip_high_irq, c_irq_params))
+ return -ENOMEM;
}
/* HPD */
r = amdgpu_irq_add_id(adev, client_id,
VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq);
if (r) {
- DRM_ERROR("Failed to add hpd irq id!\n");
+ drm_err(adev_to_drm(adev), "Failed to add hpd irq id!\n");
return r;
}
- register_hpd_handlers(adev);
+ r = register_hpd_handlers(adev);
- return 0;
+ return r;
}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
/* Register IRQ sources and initialize IRQ callbacks */
static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
{
@@ -3550,7 +4627,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->crtc_irq);
if (r) {
- DRM_ERROR("Failed to add crtc irq id!\n");
+ drm_err(adev_to_drm(adev), "Failed to add crtc irq id!\n");
return r;
}
@@ -3558,13 +4635,21 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
int_params.irq_source =
dc_interrupt_to_irq_source(dc, i, 0);
+ if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
+ int_params.irq_source < DC_IRQ_SOURCE_VBLANK1 ||
+ int_params.irq_source > DC_IRQ_SOURCE_VBLANK6) {
+ drm_err(adev_to_drm(adev), "Failed to register vblank irq!\n");
+ return -EINVAL;
+ }
+
c_irq_params = &adev->dm.vblank_params[int_params.irq_source - DC_IRQ_SOURCE_VBLANK1];
c_irq_params->adev = adev;
c_irq_params->irq_src = int_params.irq_source;
- amdgpu_dm_irq_register_interrupt(
- adev, &int_params, dm_crtc_high_irq, c_irq_params);
+ if (!amdgpu_dm_irq_register_interrupt(adev, &int_params,
+ dm_crtc_high_irq, c_irq_params))
+ return -ENOMEM;
}
/* Use otg vertical line interrupt */
@@ -3574,7 +4659,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
vrtl_int_srcid[i], &adev->vline0_irq);
if (r) {
- DRM_ERROR("Failed to add vline0 irq id!\n");
+ drm_err(adev_to_drm(adev), "Failed to add vline0 irq id!\n");
return r;
}
@@ -3582,9 +4667,11 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
int_params.irq_source =
dc_interrupt_to_irq_source(dc, vrtl_int_srcid[i], 0);
- if (int_params.irq_source == DC_IRQ_SOURCE_INVALID) {
- DRM_ERROR("Failed to register vline0 irq %d!\n", vrtl_int_srcid[i]);
- break;
+ if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
+ int_params.irq_source < DC_IRQ_SOURCE_DC1_VLINE0 ||
+ int_params.irq_source > DC_IRQ_SOURCE_DC6_VLINE0) {
+ drm_err(adev_to_drm(adev), "Failed to register vline0 irq!\n");
+ return -EINVAL;
}
c_irq_params = &adev->dm.vline0_params[int_params.irq_source
@@ -3593,8 +4680,10 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
c_irq_params->adev = adev;
c_irq_params->irq_src = int_params.irq_source;
- amdgpu_dm_irq_register_interrupt(adev, &int_params,
- dm_dcn_vertical_interrupt0_high_irq, c_irq_params);
+ if (!amdgpu_dm_irq_register_interrupt(adev, &int_params,
+ dm_dcn_vertical_interrupt0_high_irq,
+ c_irq_params))
+ return -ENOMEM;
}
#endif
@@ -3609,7 +4698,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->vupdate_irq);
if (r) {
- DRM_ERROR("Failed to add vupdate irq id!\n");
+ drm_err(adev_to_drm(adev), "Failed to add vupdate irq id!\n");
return r;
}
@@ -3617,22 +4706,30 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
int_params.irq_source =
dc_interrupt_to_irq_source(dc, i, 0);
+ if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
+ int_params.irq_source < DC_IRQ_SOURCE_VUPDATE1 ||
+ int_params.irq_source > DC_IRQ_SOURCE_VUPDATE6) {
+ drm_err(adev_to_drm(adev), "Failed to register vupdate irq!\n");
+ return -EINVAL;
+ }
+
c_irq_params = &adev->dm.vupdate_params[int_params.irq_source - DC_IRQ_SOURCE_VUPDATE1];
c_irq_params->adev = adev;
c_irq_params->irq_src = int_params.irq_source;
- amdgpu_dm_irq_register_interrupt(adev, &int_params,
- dm_vupdate_high_irq, c_irq_params);
+ if (!amdgpu_dm_irq_register_interrupt(adev, &int_params,
+ dm_vupdate_high_irq, c_irq_params))
+ return -ENOMEM;
}
/* Use GRPH_PFLIP interrupt */
for (i = DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT;
- i <= DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT + adev->mode_info.num_crtc - 1;
+ i <= DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT + dc->caps.max_otg_num - 1;
i++) {
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->pageflip_irq);
if (r) {
- DRM_ERROR("Failed to add page flip irq id!\n");
+ drm_err(adev_to_drm(adev), "Failed to add page flip irq id!\n");
return r;
}
@@ -3640,27 +4737,34 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
int_params.irq_source =
dc_interrupt_to_irq_source(dc, i, 0);
+ if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
+ int_params.irq_source < DC_IRQ_SOURCE_PFLIP_FIRST ||
+ int_params.irq_source > DC_IRQ_SOURCE_PFLIP_LAST) {
+ drm_err(adev_to_drm(adev), "Failed to register pflip irq!\n");
+ return -EINVAL;
+ }
+
c_irq_params = &adev->dm.pflip_params[int_params.irq_source - DC_IRQ_SOURCE_PFLIP_FIRST];
c_irq_params->adev = adev;
c_irq_params->irq_src = int_params.irq_source;
- amdgpu_dm_irq_register_interrupt(adev, &int_params,
- dm_pflip_high_irq, c_irq_params);
-
+ if (!amdgpu_dm_irq_register_interrupt(adev, &int_params,
+ dm_pflip_high_irq, c_irq_params))
+ return -ENOMEM;
}
/* HPD */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, DCN_1_0__SRCID__DC_HPD1_INT,
&adev->hpd_irq);
if (r) {
- DRM_ERROR("Failed to add hpd irq id!\n");
+ drm_err(adev_to_drm(adev), "Failed to add hpd irq id!\n");
return r;
}
- register_hpd_handlers(adev);
+ r = register_hpd_handlers(adev);
- return 0;
+ return r;
}
/* Register Outbox IRQ sources and initialize IRQ callbacks */
static int register_outbox_irq_handlers(struct amdgpu_device *adev)
@@ -3676,7 +4780,7 @@ static int register_outbox_irq_handlers(struct amdgpu_device *adev)
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, DCN_1_0__SRCID__DMCUB_OUTBOX_LOW_PRIORITY_READY_INT,
&adev->dmub_outbox_irq);
if (r) {
- DRM_ERROR("Failed to add outbox irq id!\n");
+ drm_err(adev_to_drm(adev), "Failed to add outbox irq id!\n");
return r;
}
@@ -3691,13 +4795,13 @@ static int register_outbox_irq_handlers(struct amdgpu_device *adev)
c_irq_params->adev = adev;
c_irq_params->irq_src = int_params.irq_source;
- amdgpu_dm_irq_register_interrupt(adev, &int_params,
- dm_dmub_outbox1_low_irq, c_irq_params);
+ if (!amdgpu_dm_irq_register_interrupt(adev, &int_params,
+ dm_dmub_outbox1_low_irq, c_irq_params))
+ return -ENOMEM;
}
return 0;
}
-#endif
/*
* Acquires the lock for the atomic state object and returns
@@ -3705,8 +4809,8 @@ static int register_outbox_irq_handlers(struct amdgpu_device *adev)
*
* This should only be called during atomic check.
*/
-static int dm_atomic_get_state(struct drm_atomic_state *state,
- struct dm_atomic_state **dm_state)
+int dm_atomic_get_state(struct drm_atomic_state *state,
+ struct dm_atomic_state **dm_state)
{
struct drm_device *dev = state->dev;
struct amdgpu_device *adev = drm_to_adev(dev);
@@ -3757,7 +4861,7 @@ dm_atomic_duplicate_state(struct drm_private_obj *obj)
old_state = to_dm_atomic_state(obj->state);
if (old_state && old_state->context)
- new_state->context = dc_copy_state(old_state->context);
+ new_state->context = dc_state_create_copy(old_state->context);
if (!new_state->context) {
kfree(new_state);
@@ -3773,7 +4877,7 @@ static void dm_atomic_destroy_state(struct drm_private_obj *obj,
struct dm_atomic_state *dm_state = to_dm_atomic_state(state);
if (dm_state && dm_state->context)
- dc_release_state(dm_state->context);
+ dc_state_release(dm_state->context);
kfree(dm_state);
}
@@ -3797,24 +4901,24 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+ if (adev->asic_type == CHIP_HAWAII)
+ /* disable prefer shadow for now due to hibernation issues */
+ adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+ else
+ adev_to_drm(adev)->mode_config.prefer_shadow = 1;
/* indicates support for immediate flip */
adev_to_drm(adev)->mode_config.async_page_flip = true;
- adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base;
-
state = kzalloc(sizeof(*state), GFP_KERNEL);
if (!state)
return -ENOMEM;
- state->context = dc_create_state(adev->dm.dc);
+ state->context = dc_state_create_current_copy(adev->dm.dc);
if (!state->context) {
kfree(state);
return -ENOMEM;
}
- dc_resource_state_copy_construct_current(adev->dm.dc, state->context);
-
drm_atomic_private_obj_init(adev_to_drm(adev),
&adev->dm.atomic_obj,
&state->base,
@@ -3822,14 +4926,22 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
r = amdgpu_display_modeset_create_props(adev);
if (r) {
- dc_release_state(state->context);
+ dc_state_release(state->context);
kfree(state);
return r;
}
+#ifdef AMD_PRIVATE_COLOR
+ if (amdgpu_dm_create_color_properties(adev)) {
+ dc_state_release(state->context);
+ kfree(state);
+ return -ENOMEM;
+ }
+#endif
+
r = amdgpu_dm_audio_init(adev);
if (r) {
- dc_release_state(state->context);
+ dc_state_release(state->context);
kfree(state);
return r;
}
@@ -3839,46 +4951,51 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
#define AMDGPU_DM_DEFAULT_MIN_BACKLIGHT 12
#define AMDGPU_DM_DEFAULT_MAX_BACKLIGHT 255
+#define AMDGPU_DM_MIN_SPREAD ((AMDGPU_DM_DEFAULT_MAX_BACKLIGHT - AMDGPU_DM_DEFAULT_MIN_BACKLIGHT) / 2)
#define AUX_BL_DEFAULT_TRANSITION_TIME_MS 50
-#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||\
- defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
-
static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm,
int bl_idx)
{
+ struct amdgpu_dm_backlight_caps *caps = &dm->backlight_caps[bl_idx];
+
+ if (caps->caps_valid)
+ return;
+
#if defined(CONFIG_ACPI)
- struct amdgpu_dm_backlight_caps caps;
+ amdgpu_acpi_get_backlight_caps(caps);
- memset(&caps, 0, sizeof(caps));
+ /* validate the firmware value is sane */
+ if (caps->caps_valid) {
+ int spread = caps->max_input_signal - caps->min_input_signal;
- if (dm->backlight_caps[bl_idx].caps_valid)
- return;
+ if (caps->max_input_signal > AMDGPU_DM_DEFAULT_MAX_BACKLIGHT ||
+ caps->min_input_signal < 0 ||
+ spread > AMDGPU_DM_DEFAULT_MAX_BACKLIGHT ||
+ spread < AMDGPU_DM_MIN_SPREAD) {
+ DRM_DEBUG_KMS("DM: Invalid backlight caps: min=%d, max=%d\n",
+ caps->min_input_signal, caps->max_input_signal);
+ caps->caps_valid = false;
+ }
+ }
- amdgpu_acpi_get_backlight_caps(&caps);
- if (caps.caps_valid) {
- dm->backlight_caps[bl_idx].caps_valid = true;
- if (caps.aux_support)
- return;
- dm->backlight_caps[bl_idx].min_input_signal = caps.min_input_signal;
- dm->backlight_caps[bl_idx].max_input_signal = caps.max_input_signal;
- } else {
- dm->backlight_caps[bl_idx].min_input_signal =
- AMDGPU_DM_DEFAULT_MIN_BACKLIGHT;
- dm->backlight_caps[bl_idx].max_input_signal =
- AMDGPU_DM_DEFAULT_MAX_BACKLIGHT;
+ if (!caps->caps_valid) {
+ caps->min_input_signal = AMDGPU_DM_DEFAULT_MIN_BACKLIGHT;
+ caps->max_input_signal = AMDGPU_DM_DEFAULT_MAX_BACKLIGHT;
+ caps->caps_valid = true;
}
#else
- if (dm->backlight_caps[bl_idx].aux_support)
+ if (caps->aux_support)
return;
- dm->backlight_caps[bl_idx].min_input_signal = AMDGPU_DM_DEFAULT_MIN_BACKLIGHT;
- dm->backlight_caps[bl_idx].max_input_signal = AMDGPU_DM_DEFAULT_MAX_BACKLIGHT;
+ caps->min_input_signal = AMDGPU_DM_DEFAULT_MIN_BACKLIGHT;
+ caps->max_input_signal = AMDGPU_DM_DEFAULT_MAX_BACKLIGHT;
+ caps->caps_valid = true;
#endif
}
static int get_brightness_range(const struct amdgpu_dm_backlight_caps *caps,
- unsigned *min, unsigned *max)
+ unsigned int *min, unsigned int *max)
{
if (!caps)
return 0;
@@ -3895,66 +5012,187 @@ static int get_brightness_range(const struct amdgpu_dm_backlight_caps *caps,
return 1;
}
+/* Rescale from [min..max] to [0..AMDGPU_MAX_BL_LEVEL] */
+static inline u32 scale_input_to_fw(int min, int max, u64 input)
+{
+ return DIV_ROUND_CLOSEST_ULL(input * AMDGPU_MAX_BL_LEVEL, max - min);
+}
+
+/* Rescale from [0..AMDGPU_MAX_BL_LEVEL] to [min..max] */
+static inline u32 scale_fw_to_input(int min, int max, u64 input)
+{
+ return min + DIV_ROUND_CLOSEST_ULL(input * (max - min), AMDGPU_MAX_BL_LEVEL);
+}
+
+static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *caps,
+ unsigned int min, unsigned int max,
+ uint32_t *user_brightness)
+{
+ u32 brightness = scale_input_to_fw(min, max, *user_brightness);
+ u8 lower_signal, upper_signal, upper_lum, lower_lum, lum;
+ int left, right;
+
+ if (amdgpu_dc_debug_mask & DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE)
+ return;
+
+ if (!caps->data_points)
+ return;
+
+ /*
+ * Handle the case where brightness is below the first data point
+ * Interpolate between (0,0) and (first_signal, first_lum)
+ */
+ if (brightness < caps->luminance_data[0].input_signal) {
+ lum = DIV_ROUND_CLOSEST(caps->luminance_data[0].luminance * brightness,
+ caps->luminance_data[0].input_signal);
+ goto scale;
+ }
+
+ left = 0;
+ right = caps->data_points - 1;
+ while (left <= right) {
+ int mid = left + (right - left) / 2;
+ u8 signal = caps->luminance_data[mid].input_signal;
+
+ /* Exact match found */
+ if (signal == brightness) {
+ lum = caps->luminance_data[mid].luminance;
+ goto scale;
+ }
+
+ if (signal < brightness)
+ left = mid + 1;
+ else
+ right = mid - 1;
+ }
+
+ /* verify bound */
+ if (left >= caps->data_points)
+ left = caps->data_points - 1;
+
+ /* At this point, left > right */
+ lower_signal = caps->luminance_data[right].input_signal;
+ upper_signal = caps->luminance_data[left].input_signal;
+ lower_lum = caps->luminance_data[right].luminance;
+ upper_lum = caps->luminance_data[left].luminance;
+
+ /* interpolate */
+ if (right == left || !lower_lum)
+ lum = upper_lum;
+ else
+ lum = lower_lum + DIV_ROUND_CLOSEST((upper_lum - lower_lum) *
+ (brightness - lower_signal),
+ upper_signal - lower_signal);
+scale:
+ *user_brightness = scale_fw_to_input(min, max,
+ DIV_ROUND_CLOSEST(lum * brightness, 101));
+}
+
static u32 convert_brightness_from_user(const struct amdgpu_dm_backlight_caps *caps,
uint32_t brightness)
{
- unsigned min, max;
+ unsigned int min, max;
if (!get_brightness_range(caps, &min, &max))
return brightness;
- // Rescale 0..255 to min..max
- return min + DIV_ROUND_CLOSEST((max - min) * brightness,
- AMDGPU_MAX_BL_LEVEL);
+ convert_custom_brightness(caps, min, max, &brightness);
+
+ // Rescale 0..max to min..max
+ return min + DIV_ROUND_CLOSEST_ULL((u64)(max - min) * brightness, max);
}
static u32 convert_brightness_to_user(const struct amdgpu_dm_backlight_caps *caps,
uint32_t brightness)
{
- unsigned min, max;
+ unsigned int min, max;
if (!get_brightness_range(caps, &min, &max))
return brightness;
if (brightness < min)
return 0;
- // Rescale min..max to 0..255
- return DIV_ROUND_CLOSEST(AMDGPU_MAX_BL_LEVEL * (brightness - min),
+ // Rescale min..max to 0..max
+ return DIV_ROUND_CLOSEST_ULL((u64)max * (brightness - min),
max - min);
}
-static int amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm,
+static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm,
int bl_idx,
u32 user_brightness)
{
- struct amdgpu_dm_backlight_caps caps;
+ struct amdgpu_dm_backlight_caps *caps;
struct dc_link *link;
u32 brightness;
- bool rc;
+ bool rc, reallow_idle = false;
+ struct drm_connector *connector;
+
+ list_for_each_entry(connector, &dm->ddev->mode_config.connector_list, head) {
+ struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+
+ if (aconnector->bl_idx != bl_idx)
+ continue;
+
+ /* if connector is off, save the brightness for next time it's on */
+ if (!aconnector->base.encoder) {
+ dm->brightness[bl_idx] = user_brightness;
+ dm->actual_brightness[bl_idx] = 0;
+ return;
+ }
+ }
amdgpu_dm_update_backlight_caps(dm, bl_idx);
- caps = dm->backlight_caps[bl_idx];
+ caps = &dm->backlight_caps[bl_idx];
dm->brightness[bl_idx] = user_brightness;
/* update scratch register */
if (bl_idx == 0)
amdgpu_atombios_scratch_regs_set_backlight_level(dm->adev, dm->brightness[bl_idx]);
- brightness = convert_brightness_from_user(&caps, dm->brightness[bl_idx]);
+ brightness = convert_brightness_from_user(caps, dm->brightness[bl_idx]);
link = (struct dc_link *)dm->backlight_link[bl_idx];
+ /* Apply brightness quirk */
+ if (caps->brightness_mask)
+ brightness |= caps->brightness_mask;
+
/* Change brightness based on AUX property */
- if (caps.aux_support) {
+ mutex_lock(&dm->dc_lock);
+ if (dm->dc->caps.ips_support && dm->dc->ctx->dmub_srv->idle_allowed) {
+ dc_allow_idle_optimizations(dm->dc, false);
+ reallow_idle = true;
+ }
+
+ if (trace_amdgpu_dm_brightness_enabled()) {
+ trace_amdgpu_dm_brightness(__builtin_return_address(0),
+ user_brightness,
+ brightness,
+ caps->aux_support,
+ power_supply_is_system_supplied() > 0);
+ }
+
+ if (caps->aux_support) {
rc = dc_link_set_backlight_level_nits(link, true, brightness,
AUX_BL_DEFAULT_TRANSITION_TIME_MS);
if (!rc)
DRM_DEBUG("DM: Failed to update backlight via AUX on eDP[%d]\n", bl_idx);
} else {
- rc = dc_link_set_backlight_level(link, brightness, 0);
+ struct set_backlight_level_params backlight_level_params = { 0 };
+
+ backlight_level_params.backlight_pwm_u16_16 = brightness;
+ backlight_level_params.transition_time_in_ms = 0;
+
+ rc = dc_link_set_backlight_level(link, &backlight_level_params);
if (!rc)
DRM_DEBUG("DM: Failed to update backlight on eDP[%d]\n", bl_idx);
}
- return rc ? 0 : 1;
+ if (dm->dc->caps.ips_support && reallow_idle)
+ dc_allow_idle_optimizations(dm->dc, true);
+
+ mutex_unlock(&dm->dc_lock);
+
+ if (rc)
+ dm->actual_brightness[bl_idx] = user_brightness;
}
static int amdgpu_dm_backlight_update_status(struct backlight_device *bd)
@@ -3976,6 +5214,7 @@ static int amdgpu_dm_backlight_update_status(struct backlight_device *bd)
static u32 amdgpu_dm_backlight_get_level(struct amdgpu_display_manager *dm,
int bl_idx)
{
+ int ret;
struct amdgpu_dm_backlight_caps caps;
struct dc_link *link = (struct dc_link *)dm->backlight_link[bl_idx];
@@ -3984,19 +5223,18 @@ static u32 amdgpu_dm_backlight_get_level(struct amdgpu_display_manager *dm,
if (caps.aux_support) {
u32 avg, peak;
- bool rc;
- rc = dc_link_get_backlight_level_nits(link, &avg, &peak);
- if (!rc)
+ if (!dc_link_get_backlight_level_nits(link, &avg, &peak))
return dm->brightness[bl_idx];
return convert_brightness_to_user(&caps, avg);
- } else {
- int ret = dc_link_get_backlight_level(link);
-
- if (ret == DC_ERROR_UNEXPECTED)
- return dm->brightness[bl_idx];
- return convert_brightness_to_user(&caps, ret);
}
+
+ ret = dc_link_get_backlight_level(link);
+
+ if (ret == DC_ERROR_UNEXPECTED)
+ return dm->brightness[bl_idx];
+
+ return convert_brightness_to_user(&caps, ret);
}
static int amdgpu_dm_backlight_get_brightness(struct backlight_device *bd)
@@ -4020,33 +5258,59 @@ static const struct backlight_ops amdgpu_dm_backlight_ops = {
};
static void
-amdgpu_dm_register_backlight_device(struct amdgpu_display_manager *dm)
+amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector)
{
- char bl_name[16];
+ struct drm_device *drm = aconnector->base.dev;
+ struct amdgpu_display_manager *dm = &drm_to_adev(drm)->dm;
struct backlight_properties props = { 0 };
+ struct amdgpu_dm_backlight_caps *caps;
+ char bl_name[16];
+ int min, max;
- amdgpu_dm_update_backlight_caps(dm, dm->num_of_edps);
- dm->brightness[dm->num_of_edps] = AMDGPU_MAX_BL_LEVEL;
+ if (aconnector->bl_idx == -1)
+ return;
+
+ if (!acpi_video_backlight_use_native()) {
+ drm_info(drm, "Skipping amdgpu DM backlight registration\n");
+ /* Try registering an ACPI video backlight device instead. */
+ acpi_video_register_backlight();
+ return;
+ }
- props.max_brightness = AMDGPU_MAX_BL_LEVEL;
- props.brightness = AMDGPU_MAX_BL_LEVEL;
+ caps = &dm->backlight_caps[aconnector->bl_idx];
+ if (get_brightness_range(caps, &min, &max)) {
+ if (power_supply_is_system_supplied() > 0)
+ props.brightness = DIV_ROUND_CLOSEST((max - min) * caps->ac_level, 100);
+ else
+ props.brightness = DIV_ROUND_CLOSEST((max - min) * caps->dc_level, 100);
+ /* min is zero, so max needs to be adjusted */
+ props.max_brightness = max - min;
+ drm_dbg(drm, "Backlight caps: min: %d, max: %d, ac %d, dc %d\n", min, max,
+ caps->ac_level, caps->dc_level);
+ } else
+ props.brightness = props.max_brightness = MAX_BACKLIGHT_LEVEL;
+
+ if (caps->data_points && !(amdgpu_dc_debug_mask & DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE)) {
+ drm_info(drm, "Using custom brightness curve\n");
+ props.scale = BACKLIGHT_SCALE_NON_LINEAR;
+ } else
+ props.scale = BACKLIGHT_SCALE_LINEAR;
props.type = BACKLIGHT_RAW;
snprintf(bl_name, sizeof(bl_name), "amdgpu_bl%d",
- adev_to_drm(dm->adev)->primary->index + dm->num_of_edps);
+ drm->primary->index + aconnector->bl_idx);
- dm->backlight_dev[dm->num_of_edps] = backlight_device_register(bl_name,
- adev_to_drm(dm->adev)->dev,
- dm,
- &amdgpu_dm_backlight_ops,
- &props);
+ dm->backlight_dev[aconnector->bl_idx] =
+ backlight_device_register(bl_name, aconnector->base.kdev, dm,
+ &amdgpu_dm_backlight_ops, &props);
+ dm->brightness[aconnector->bl_idx] = props.brightness;
- if (IS_ERR(dm->backlight_dev[dm->num_of_edps]))
- DRM_ERROR("DM: Backlight registration failed!\n");
- else
- DRM_DEBUG_DRIVER("DM: Registered Backlight device: %s\n", bl_name);
+ if (IS_ERR(dm->backlight_dev[aconnector->bl_idx])) {
+ drm_err(drm, "DM: Backlight registration failed!\n");
+ dm->backlight_dev[aconnector->bl_idx] = NULL;
+ } else
+ drm_dbg_driver(drm, "DM: Registered Backlight device: %s\n", bl_name);
}
-#endif
static int initialize_plane(struct amdgpu_display_manager *dm,
struct amdgpu_mode_info *mode_info, int plane_id,
@@ -4059,7 +5323,7 @@ static int initialize_plane(struct amdgpu_display_manager *dm,
plane = kzalloc(sizeof(struct drm_plane), GFP_KERNEL);
if (!plane) {
- DRM_ERROR("KMS: Failed to allocate plane\n");
+ drm_err(adev_to_drm(dm->adev), "KMS: Failed to allocate plane\n");
return -ENOMEM;
}
plane->type = plane_type;
@@ -4077,7 +5341,7 @@ static int initialize_plane(struct amdgpu_display_manager *dm,
ret = amdgpu_dm_plane_init(dm, plane, possible_crtcs, plane_cap);
if (ret) {
- DRM_ERROR("KMS: Failed to initialize plane\n");
+ drm_err(adev_to_drm(dm->adev), "KMS: Failed to initialize plane\n");
kfree(plane);
return ret;
}
@@ -4089,30 +5353,39 @@ static int initialize_plane(struct amdgpu_display_manager *dm,
}
-static void register_backlight_device(struct amdgpu_display_manager *dm,
- struct dc_link *link)
+static void setup_backlight_device(struct amdgpu_display_manager *dm,
+ struct amdgpu_dm_connector *aconnector)
{
-#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||\
- defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
+ struct amdgpu_dm_backlight_caps *caps;
+ struct dc_link *link = aconnector->dc_link;
+ int bl_idx = dm->num_of_edps;
- if ((link->connector_signal & (SIGNAL_TYPE_EDP | SIGNAL_TYPE_LVDS)) &&
- link->type != dc_connection_none) {
- /*
- * Event if registration failed, we should continue with
- * DM initialization because not having a backlight control
- * is better then a black screen.
- */
- if (!dm->backlight_dev[dm->num_of_edps])
- amdgpu_dm_register_backlight_device(dm);
+ if (!(link->connector_signal & (SIGNAL_TYPE_EDP | SIGNAL_TYPE_LVDS)) ||
+ link->type == dc_connection_none)
+ return;
- if (dm->backlight_dev[dm->num_of_edps]) {
- dm->backlight_link[dm->num_of_edps] = link;
- dm->num_of_edps++;
- }
+ if (dm->num_of_edps >= AMDGPU_DM_MAX_NUM_EDP) {
+ drm_warn(adev_to_drm(dm->adev), "Too much eDP connections, skipping backlight setup for additional eDPs\n");
+ return;
}
-#endif
+
+ aconnector->bl_idx = bl_idx;
+
+ amdgpu_dm_update_backlight_caps(dm, bl_idx);
+ dm->backlight_link[bl_idx] = link;
+ dm->num_of_edps++;
+
+ update_connector_ext_caps(aconnector);
+ caps = &dm->backlight_caps[aconnector->bl_idx];
+
+ /* Only offer ABM property when non-OLED and user didn't turn off by module parameter */
+ if (!caps->ext_caps->bits.oled && amdgpu_dm_abm_level < 0)
+ drm_object_attach_property(&aconnector->base.base,
+ dm->adev->mode_info.abm_level_property,
+ ABM_SYSFS_CONTROL);
}
+static void amdgpu_set_panel_orientation(struct drm_connector *connector);
/*
* In this architecture, the association
@@ -4125,29 +5398,36 @@ static void register_backlight_device(struct amdgpu_display_manager *dm,
static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
{
struct amdgpu_display_manager *dm = &adev->dm;
- int32_t i;
+ s32 i;
struct amdgpu_dm_connector *aconnector = NULL;
struct amdgpu_encoder *aencoder = NULL;
struct amdgpu_mode_info *mode_info = &adev->mode_info;
- uint32_t link_cnt;
- int32_t primary_planes;
+ u32 link_cnt;
+ s32 primary_planes;
enum dc_connection_type new_connection_type = dc_connection_none;
const struct dc_plane_cap *plane;
bool psr_feature_enabled = false;
+ bool replay_feature_enabled = false;
+ int max_overlay = dm->dc->caps.max_slave_planes;
dm->display_indexes_num = dm->dc->caps.max_streams;
/* Update the actual used number of crtc */
adev->mode_info.num_crtc = adev->dm.display_indexes_num;
+ amdgpu_dm_set_irq_funcs(adev);
+
link_cnt = dm->dc->caps.max_links;
if (amdgpu_dm_mode_config_init(dm->adev)) {
- DRM_ERROR("DM: Failed to initialize mode config\n");
+ drm_err(adev_to_drm(adev), "DM: Failed to initialize mode config\n");
return -EINVAL;
}
/* There is one primary plane per CRTC */
primary_planes = dm->dc->caps.max_streams;
- ASSERT(primary_planes <= AMDGPU_MAX_PLANES);
+ if (primary_planes > AMDGPU_MAX_PLANES) {
+ drm_err(adev_to_drm(adev), "DM: Plane nums out of 6 planes\n");
+ return -EINVAL;
+ }
/*
* Initialize primary planes, implicit planes for legacy IOCTLS.
@@ -4158,7 +5438,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
if (initialize_plane(dm, mode_info, i,
DRM_PLANE_TYPE_PRIMARY, plane)) {
- DRM_ERROR("KMS: Failed to initialize primary plane\n");
+ drm_err(adev_to_drm(adev), "KMS: Failed to initialize primary plane\n");
goto fail;
}
}
@@ -4175,53 +5455,71 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
for (i = 0; i < dm->dc->caps.max_planes; ++i) {
struct dc_plane_cap *plane = &dm->dc->caps.planes[i];
- if (plane->type != DC_PLANE_TYPE_DCN_UNIVERSAL)
- continue;
+ /* Do not create overlay if MPO disabled */
+ if (amdgpu_dc_debug_mask & DC_DISABLE_MPO)
+ break;
- if (!plane->blends_with_above || !plane->blends_with_below)
+ if (plane->type != DC_PLANE_TYPE_DCN_UNIVERSAL)
continue;
if (!plane->pixel_format_support.argb8888)
continue;
+ if (max_overlay-- == 0)
+ break;
+
if (initialize_plane(dm, NULL, primary_planes + i,
DRM_PLANE_TYPE_OVERLAY, plane)) {
- DRM_ERROR("KMS: Failed to initialize overlay plane\n");
+ drm_err(adev_to_drm(adev), "KMS: Failed to initialize overlay plane\n");
goto fail;
}
-
- /* Only create one overlay plane. */
- break;
}
for (i = 0; i < dm->dc->caps.max_streams; i++)
if (amdgpu_dm_crtc_init(dm, mode_info->planes[i], i)) {
- DRM_ERROR("KMS: Failed to initialize crtc\n");
+ drm_err(adev_to_drm(adev), "KMS: Failed to initialize crtc\n");
goto fail;
}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
/* Use Outbox interrupt */
- switch (adev->ip_versions[DCE_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
case IP_VERSION(3, 0, 0):
case IP_VERSION(3, 1, 2):
case IP_VERSION(3, 1, 3):
+ case IP_VERSION(3, 1, 4):
+ case IP_VERSION(3, 1, 5):
+ case IP_VERSION(3, 1, 6):
+ case IP_VERSION(3, 2, 0):
+ case IP_VERSION(3, 2, 1):
case IP_VERSION(2, 1, 0):
+ case IP_VERSION(3, 5, 0):
+ case IP_VERSION(3, 5, 1):
+ case IP_VERSION(3, 6, 0):
+ case IP_VERSION(4, 0, 1):
if (register_outbox_irq_handlers(dm->adev)) {
- DRM_ERROR("DM: Failed to initialize IRQ\n");
+ drm_err(adev_to_drm(adev), "DM: Failed to initialize IRQ\n");
goto fail;
}
break;
default:
DRM_DEBUG_KMS("Unsupported DCN IP version for outbox: 0x%X\n",
- adev->ip_versions[DCE_HWIP][0]);
+ amdgpu_ip_version(adev, DCE_HWIP, 0));
}
/* Determine whether to enable PSR support by default. */
if (!(amdgpu_dc_debug_mask & DC_DISABLE_PSR)) {
- switch (adev->ip_versions[DCE_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
case IP_VERSION(3, 1, 2):
case IP_VERSION(3, 1, 3):
+ case IP_VERSION(3, 1, 4):
+ case IP_VERSION(3, 1, 5):
+ case IP_VERSION(3, 1, 6):
+ case IP_VERSION(3, 2, 0):
+ case IP_VERSION(3, 2, 1):
+ case IP_VERSION(3, 5, 0):
+ case IP_VERSION(3, 5, 1):
+ case IP_VERSION(3, 6, 0):
+ case IP_VERSION(4, 0, 1):
psr_feature_enabled = true;
break;
default:
@@ -4229,16 +5527,55 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
break;
}
}
-#endif
+
+ /* Determine whether to enable Replay support by default. */
+ if (!(amdgpu_dc_debug_mask & DC_DISABLE_REPLAY)) {
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
+ case IP_VERSION(3, 1, 4):
+ case IP_VERSION(3, 2, 0):
+ case IP_VERSION(3, 2, 1):
+ case IP_VERSION(3, 5, 0):
+ case IP_VERSION(3, 5, 1):
+ case IP_VERSION(3, 6, 0):
+ replay_feature_enabled = true;
+ break;
+
+ default:
+ replay_feature_enabled = amdgpu_dc_feature_mask & DC_REPLAY_MASK;
+ break;
+ }
+ }
+
+ if (link_cnt > MAX_LINKS) {
+ drm_err(adev_to_drm(adev),
+ "KMS: Cannot support more than %d display indexes\n",
+ MAX_LINKS);
+ goto fail;
+ }
/* loops over all connectors on the board */
for (i = 0; i < link_cnt; i++) {
struct dc_link *link = NULL;
- if (i > AMDGPU_DM_MAX_DISPLAY_INDEX) {
- DRM_ERROR(
- "KMS: Cannot support more than %d display indexes\n",
- AMDGPU_DM_MAX_DISPLAY_INDEX);
+ link = dc_get_link_at_index(dm->dc, i);
+
+ if (link->connector_signal == SIGNAL_TYPE_VIRTUAL) {
+ struct amdgpu_dm_wb_connector *wbcon = kzalloc(sizeof(*wbcon), GFP_KERNEL);
+
+ if (!wbcon) {
+ drm_err(adev_to_drm(adev), "KMS: Failed to allocate writeback connector\n");
+ continue;
+ }
+
+ if (amdgpu_dm_wb_connector_init(dm, wbcon, i)) {
+ drm_err(adev_to_drm(adev), "KMS: Failed to initialize writeback connector\n");
+ kfree(wbcon);
+ continue;
+ }
+
+ link->psr_settings.psr_feature_enabled = false;
+ link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED;
+
continue;
}
@@ -4251,36 +5588,62 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
goto fail;
if (amdgpu_dm_encoder_init(dm->ddev, aencoder, i)) {
- DRM_ERROR("KMS: Failed to initialize encoder\n");
+ drm_err(adev_to_drm(adev), "KMS: Failed to initialize encoder\n");
goto fail;
}
if (amdgpu_dm_connector_init(dm, aconnector, i, aencoder)) {
- DRM_ERROR("KMS: Failed to initialize connector\n");
+ drm_err(adev_to_drm(adev), "KMS: Failed to initialize connector\n");
goto fail;
}
- link = dc_get_link_at_index(dm->dc, i);
+ if (dm->hpd_rx_offload_wq)
+ dm->hpd_rx_offload_wq[aconnector->base.index].aconnector =
+ aconnector;
- if (!dc_link_detect_sink(link, &new_connection_type))
- DRM_ERROR("KMS: Failed to detect connector\n");
+ if (!dc_link_detect_connection_type(link, &new_connection_type))
+ drm_err(adev_to_drm(adev), "KMS: Failed to detect connector\n");
if (aconnector->base.force && new_connection_type == dc_connection_none) {
emulated_link_detect(link);
amdgpu_dm_update_connector_after_detect(aconnector);
+ } else {
+ bool ret = false;
- } else if (dc_link_detect(link, DETECT_REASON_BOOT)) {
- amdgpu_dm_update_connector_after_detect(aconnector);
- register_backlight_device(dm, link);
- if (dm->num_of_edps)
- update_connector_ext_caps(aconnector);
- if (psr_feature_enabled)
- amdgpu_dm_set_psr_caps(link);
- }
-
+ mutex_lock(&dm->dc_lock);
+ dc_exit_ips_for_hw_access(dm->dc);
+ ret = dc_link_detect(link, DETECT_REASON_BOOT);
+ mutex_unlock(&dm->dc_lock);
+ if (ret) {
+ amdgpu_dm_update_connector_after_detect(aconnector);
+ setup_backlight_device(dm, aconnector);
+
+ /* Disable PSR if Replay can be enabled */
+ if (replay_feature_enabled)
+ if (amdgpu_dm_set_replay_caps(link, aconnector))
+ psr_feature_enabled = false;
+
+ if (psr_feature_enabled) {
+ amdgpu_dm_set_psr_caps(link);
+ drm_info(adev_to_drm(adev), "PSR support %d, DC PSR ver %d, sink PSR ver %d DPCD caps 0x%x su_y_granularity %d\n",
+ link->psr_settings.psr_feature_enabled,
+ link->psr_settings.psr_version,
+ link->dpcd_caps.psr_info.psr_version,
+ link->dpcd_caps.psr_info.psr_dpcd_caps.raw,
+ link->dpcd_caps.psr_info.psr2_su_y_granularity_cap);
+ }
+ }
+ }
+ amdgpu_set_panel_orientation(&aconnector->base);
}
+ /* Debug dump: list all DC links and their associated sinks after detection
+ * is complete for all connectors. This provides a comprehensive view of the
+ * final state without repeating the dump for each connector.
+ */
+ amdgpu_dm_dump_links_and_sinks(adev);
+
/* Software is initialized. Now we can register interrupt handlers. */
switch (adev->asic_type) {
#if defined(CONFIG_DRM_AMD_DC_SI)
@@ -4289,7 +5652,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
case CHIP_VERDE:
case CHIP_OLAND:
if (dce60_register_irq_handlers(dm->adev)) {
- DRM_ERROR("DM: Failed to initialize IRQ\n");
+ drm_err(adev_to_drm(adev), "DM: Failed to initialize IRQ\n");
goto fail;
}
break;
@@ -4311,13 +5674,12 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
case CHIP_VEGA12:
case CHIP_VEGA20:
if (dce110_register_irq_handlers(dm->adev)) {
- DRM_ERROR("DM: Failed to initialize IRQ\n");
+ drm_err(adev_to_drm(adev), "DM: Failed to initialize IRQ\n");
goto fail;
}
break;
default:
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- switch (adev->ip_versions[DCE_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
case IP_VERSION(1, 0, 0):
case IP_VERSION(1, 0, 1):
case IP_VERSION(2, 0, 2):
@@ -4330,17 +5692,25 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
case IP_VERSION(3, 0, 1):
case IP_VERSION(3, 1, 2):
case IP_VERSION(3, 1, 3):
+ case IP_VERSION(3, 1, 4):
+ case IP_VERSION(3, 1, 5):
+ case IP_VERSION(3, 1, 6):
+ case IP_VERSION(3, 2, 0):
+ case IP_VERSION(3, 2, 1):
+ case IP_VERSION(3, 5, 0):
+ case IP_VERSION(3, 5, 1):
+ case IP_VERSION(3, 6, 0):
+ case IP_VERSION(4, 0, 1):
if (dcn10_register_irq_handlers(dm->adev)) {
- DRM_ERROR("DM: Failed to initialize IRQ\n");
+ drm_err(adev_to_drm(adev), "DM: Failed to initialize IRQ\n");
goto fail;
}
break;
default:
- DRM_ERROR("Unsupported DCE IP versions: 0x%X\n",
- adev->ip_versions[DCE_HWIP][0]);
+ drm_err(adev_to_drm(adev), "Unsupported DCE IP versions: 0x%X\n",
+ amdgpu_ip_version(adev, DCE_HWIP, 0));
goto fail;
}
-#endif
break;
}
@@ -4354,8 +5724,8 @@ fail:
static void amdgpu_dm_destroy_drm_device(struct amdgpu_display_manager *dm)
{
- drm_atomic_private_obj_fini(&dm->atomic_obj);
- return;
+ if (dm->atomic_obj.state)
+ drm_atomic_private_obj_fini(&dm->atomic_obj);
}
/******************************************************************************
@@ -4399,15 +5769,20 @@ static ssize_t s3_debug_store(struct device *device,
int s3_state;
struct drm_device *drm_dev = dev_get_drvdata(device);
struct amdgpu_device *adev = drm_to_adev(drm_dev);
+ struct amdgpu_ip_block *ip_block;
+
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE);
+ if (!ip_block)
+ return -EINVAL;
ret = kstrtoint(buf, 0, &s3_state);
if (ret == 0) {
if (s3_state) {
- dm_resume(adev);
+ dm_resume(ip_block);
drm_kms_helper_hotplug_event(adev_to_drm(adev));
} else
- dm_suspend(adev);
+ dm_suspend(ip_block);
}
return ret == 0 ? count : 0;
@@ -4417,9 +5792,86 @@ DEVICE_ATTR_WO(s3_debug);
#endif
-static int dm_early_init(void *handle)
+static int dm_init_microcode(struct amdgpu_device *adev)
+{
+ char *fw_name_dmub;
+ int r;
+
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
+ case IP_VERSION(2, 1, 0):
+ fw_name_dmub = FIRMWARE_RENOIR_DMUB;
+ if (ASICREV_IS_GREEN_SARDINE(adev->external_rev_id))
+ fw_name_dmub = FIRMWARE_GREEN_SARDINE_DMUB;
+ break;
+ case IP_VERSION(3, 0, 0):
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 0))
+ fw_name_dmub = FIRMWARE_SIENNA_CICHLID_DMUB;
+ else
+ fw_name_dmub = FIRMWARE_NAVY_FLOUNDER_DMUB;
+ break;
+ case IP_VERSION(3, 0, 1):
+ fw_name_dmub = FIRMWARE_VANGOGH_DMUB;
+ break;
+ case IP_VERSION(3, 0, 2):
+ fw_name_dmub = FIRMWARE_DIMGREY_CAVEFISH_DMUB;
+ break;
+ case IP_VERSION(3, 0, 3):
+ fw_name_dmub = FIRMWARE_BEIGE_GOBY_DMUB;
+ break;
+ case IP_VERSION(3, 1, 2):
+ case IP_VERSION(3, 1, 3):
+ fw_name_dmub = FIRMWARE_YELLOW_CARP_DMUB;
+ break;
+ case IP_VERSION(3, 1, 4):
+ fw_name_dmub = FIRMWARE_DCN_314_DMUB;
+ break;
+ case IP_VERSION(3, 1, 5):
+ fw_name_dmub = FIRMWARE_DCN_315_DMUB;
+ break;
+ case IP_VERSION(3, 1, 6):
+ fw_name_dmub = FIRMWARE_DCN316_DMUB;
+ break;
+ case IP_VERSION(3, 2, 0):
+ fw_name_dmub = FIRMWARE_DCN_V3_2_0_DMCUB;
+ break;
+ case IP_VERSION(3, 2, 1):
+ fw_name_dmub = FIRMWARE_DCN_V3_2_1_DMCUB;
+ break;
+ case IP_VERSION(3, 5, 0):
+ fw_name_dmub = FIRMWARE_DCN_35_DMUB;
+ break;
+ case IP_VERSION(3, 5, 1):
+ fw_name_dmub = FIRMWARE_DCN_351_DMUB;
+ break;
+ case IP_VERSION(3, 6, 0):
+ fw_name_dmub = FIRMWARE_DCN_36_DMUB;
+ break;
+ case IP_VERSION(4, 0, 1):
+ fw_name_dmub = FIRMWARE_DCN_401_DMUB;
+ break;
+ default:
+ /* ASIC doesn't support DMUB. */
+ return 0;
+ }
+ r = amdgpu_ucode_request(adev, &adev->dm.dmub_fw, AMDGPU_UCODE_REQUIRED,
+ "%s", fw_name_dmub);
+ return r;
+}
+
+static int dm_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ struct atom_context *ctx = mode_info->atom_context;
+ int index = GetIndexIntoMasterTable(DATA, Object_Header);
+ u16 data_offset;
+
+ /* if there is no object header, skip DM */
+ if (!amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) {
+ adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
+ drm_info(adev_to_drm(adev), "No object header, skipping DM\n");
+ return -ENOENT;
+ }
switch (adev->asic_type) {
#if defined(CONFIG_DRM_AMD_DC_SI)
@@ -4489,8 +5941,8 @@ static int dm_early_init(void *handle)
adev->mode_info.num_dig = 6;
break;
default:
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- switch (adev->ip_versions[DCE_HWIP][0]) {
+
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
case IP_VERSION(2, 0, 2):
case IP_VERSION(3, 0, 0):
adev->mode_info.num_crtc = 6;
@@ -4515,21 +5967,27 @@ static int dm_early_init(void *handle)
case IP_VERSION(2, 1, 0):
case IP_VERSION(3, 1, 2):
case IP_VERSION(3, 1, 3):
+ case IP_VERSION(3, 1, 4):
+ case IP_VERSION(3, 1, 5):
+ case IP_VERSION(3, 1, 6):
+ case IP_VERSION(3, 2, 0):
+ case IP_VERSION(3, 2, 1):
+ case IP_VERSION(3, 5, 0):
+ case IP_VERSION(3, 5, 1):
+ case IP_VERSION(3, 6, 0):
+ case IP_VERSION(4, 0, 1):
adev->mode_info.num_crtc = 4;
adev->mode_info.num_hpd = 4;
adev->mode_info.num_dig = 4;
break;
default:
- DRM_ERROR("Unsupported DCE IP versions: 0x%x\n",
- adev->ip_versions[DCE_HWIP][0]);
+ drm_err(adev_to_drm(adev), "Unsupported DCE IP versions: 0x%x\n",
+ amdgpu_ip_version(adev, DCE_HWIP, 0));
return -EINVAL;
}
-#endif
break;
}
- amdgpu_dm_set_irq_funcs(adev);
-
if (adev->mode_info.funcs == NULL)
adev->mode_info.funcs = &dm_display_funcs;
@@ -4543,15 +6001,9 @@ static int dm_early_init(void *handle)
adev_to_drm(adev)->dev,
&dev_attr_s3_debug);
#endif
+ adev->dc_enabled = true;
- return 0;
-}
-
-static bool modeset_required(struct drm_crtc_state *crtc_state,
- struct dc_stream_state *new_stream,
- struct dc_stream_state *old_stream)
-{
- return crtc_state->active && drm_atomic_crtc_needs_modeset(crtc_state);
+ return dm_init_microcode(adev);
}
static bool modereset_required(struct drm_crtc_state *crtc_state)
@@ -4569,811 +6021,6 @@ static const struct drm_encoder_funcs amdgpu_dm_encoder_funcs = {
.destroy = amdgpu_dm_encoder_destroy,
};
-
-static void get_min_max_dc_plane_scaling(struct drm_device *dev,
- struct drm_framebuffer *fb,
- int *min_downscale, int *max_upscale)
-{
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct dc *dc = adev->dm.dc;
- /* Caps for all supported planes are the same on DCE and DCN 1 - 3 */
- struct dc_plane_cap *plane_cap = &dc->caps.planes[0];
-
- switch (fb->format->format) {
- case DRM_FORMAT_P010:
- case DRM_FORMAT_NV12:
- case DRM_FORMAT_NV21:
- *max_upscale = plane_cap->max_upscale_factor.nv12;
- *min_downscale = plane_cap->max_downscale_factor.nv12;
- break;
-
- case DRM_FORMAT_XRGB16161616F:
- case DRM_FORMAT_ARGB16161616F:
- case DRM_FORMAT_XBGR16161616F:
- case DRM_FORMAT_ABGR16161616F:
- *max_upscale = plane_cap->max_upscale_factor.fp16;
- *min_downscale = plane_cap->max_downscale_factor.fp16;
- break;
-
- default:
- *max_upscale = plane_cap->max_upscale_factor.argb8888;
- *min_downscale = plane_cap->max_downscale_factor.argb8888;
- break;
- }
-
- /*
- * A factor of 1 in the plane_cap means to not allow scaling, ie. use a
- * scaling factor of 1.0 == 1000 units.
- */
- if (*max_upscale == 1)
- *max_upscale = 1000;
-
- if (*min_downscale == 1)
- *min_downscale = 1000;
-}
-
-
-static int fill_dc_scaling_info(struct amdgpu_device *adev,
- const struct drm_plane_state *state,
- struct dc_scaling_info *scaling_info)
-{
- int scale_w, scale_h, min_downscale, max_upscale;
-
- memset(scaling_info, 0, sizeof(*scaling_info));
-
- /* Source is fixed 16.16 but we ignore mantissa for now... */
- scaling_info->src_rect.x = state->src_x >> 16;
- scaling_info->src_rect.y = state->src_y >> 16;
-
- /*
- * For reasons we don't (yet) fully understand a non-zero
- * src_y coordinate into an NV12 buffer can cause a
- * system hang on DCN1x.
- * To avoid hangs (and maybe be overly cautious)
- * let's reject both non-zero src_x and src_y.
- *
- * We currently know of only one use-case to reproduce a
- * scenario with non-zero src_x and src_y for NV12, which
- * is to gesture the YouTube Android app into full screen
- * on ChromeOS.
- */
- if (((adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 0)) ||
- (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 1))) &&
- (state->fb && state->fb->format->format == DRM_FORMAT_NV12 &&
- (scaling_info->src_rect.x != 0 || scaling_info->src_rect.y != 0)))
- return -EINVAL;
-
- scaling_info->src_rect.width = state->src_w >> 16;
- if (scaling_info->src_rect.width == 0)
- return -EINVAL;
-
- scaling_info->src_rect.height = state->src_h >> 16;
- if (scaling_info->src_rect.height == 0)
- return -EINVAL;
-
- scaling_info->dst_rect.x = state->crtc_x;
- scaling_info->dst_rect.y = state->crtc_y;
-
- if (state->crtc_w == 0)
- return -EINVAL;
-
- scaling_info->dst_rect.width = state->crtc_w;
-
- if (state->crtc_h == 0)
- return -EINVAL;
-
- scaling_info->dst_rect.height = state->crtc_h;
-
- /* DRM doesn't specify clipping on destination output. */
- scaling_info->clip_rect = scaling_info->dst_rect;
-
- /* Validate scaling per-format with DC plane caps */
- if (state->plane && state->plane->dev && state->fb) {
- get_min_max_dc_plane_scaling(state->plane->dev, state->fb,
- &min_downscale, &max_upscale);
- } else {
- min_downscale = 250;
- max_upscale = 16000;
- }
-
- scale_w = scaling_info->dst_rect.width * 1000 /
- scaling_info->src_rect.width;
-
- if (scale_w < min_downscale || scale_w > max_upscale)
- return -EINVAL;
-
- scale_h = scaling_info->dst_rect.height * 1000 /
- scaling_info->src_rect.height;
-
- if (scale_h < min_downscale || scale_h > max_upscale)
- return -EINVAL;
-
- /*
- * The "scaling_quality" can be ignored for now, quality = 0 has DC
- * assume reasonable defaults based on the format.
- */
-
- return 0;
-}
-
-static void
-fill_gfx8_tiling_info_from_flags(union dc_tiling_info *tiling_info,
- uint64_t tiling_flags)
-{
- /* Fill GFX8 params */
- if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == DC_ARRAY_2D_TILED_THIN1) {
- unsigned int bankw, bankh, mtaspect, tile_split, num_banks;
-
- bankw = AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
- bankh = AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
- mtaspect = AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
- tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT);
- num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
-
- /* XXX fix me for VI */
- tiling_info->gfx8.num_banks = num_banks;
- tiling_info->gfx8.array_mode =
- DC_ARRAY_2D_TILED_THIN1;
- tiling_info->gfx8.tile_split = tile_split;
- tiling_info->gfx8.bank_width = bankw;
- tiling_info->gfx8.bank_height = bankh;
- tiling_info->gfx8.tile_aspect = mtaspect;
- tiling_info->gfx8.tile_mode =
- DC_ADDR_SURF_MICRO_TILING_DISPLAY;
- } else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE)
- == DC_ARRAY_1D_TILED_THIN1) {
- tiling_info->gfx8.array_mode = DC_ARRAY_1D_TILED_THIN1;
- }
-
- tiling_info->gfx8.pipe_config =
- AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
-}
-
-static void
-fill_gfx9_tiling_info_from_device(const struct amdgpu_device *adev,
- union dc_tiling_info *tiling_info)
-{
- tiling_info->gfx9.num_pipes =
- adev->gfx.config.gb_addr_config_fields.num_pipes;
- tiling_info->gfx9.num_banks =
- adev->gfx.config.gb_addr_config_fields.num_banks;
- tiling_info->gfx9.pipe_interleave =
- adev->gfx.config.gb_addr_config_fields.pipe_interleave_size;
- tiling_info->gfx9.num_shader_engines =
- adev->gfx.config.gb_addr_config_fields.num_se;
- tiling_info->gfx9.max_compressed_frags =
- adev->gfx.config.gb_addr_config_fields.max_compress_frags;
- tiling_info->gfx9.num_rb_per_se =
- adev->gfx.config.gb_addr_config_fields.num_rb_per_se;
- tiling_info->gfx9.shaderEnable = 1;
- if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0))
- tiling_info->gfx9.num_pkrs = adev->gfx.config.gb_addr_config_fields.num_pkrs;
-}
-
-static int
-validate_dcc(struct amdgpu_device *adev,
- const enum surface_pixel_format format,
- const enum dc_rotation_angle rotation,
- const union dc_tiling_info *tiling_info,
- const struct dc_plane_dcc_param *dcc,
- const struct dc_plane_address *address,
- const struct plane_size *plane_size)
-{
- struct dc *dc = adev->dm.dc;
- struct dc_dcc_surface_param input;
- struct dc_surface_dcc_cap output;
-
- memset(&input, 0, sizeof(input));
- memset(&output, 0, sizeof(output));
-
- if (!dcc->enable)
- return 0;
-
- if (format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN ||
- !dc->cap_funcs.get_dcc_compression_cap)
- return -EINVAL;
-
- input.format = format;
- input.surface_size.width = plane_size->surface_size.width;
- input.surface_size.height = plane_size->surface_size.height;
- input.swizzle_mode = tiling_info->gfx9.swizzle;
-
- if (rotation == ROTATION_ANGLE_0 || rotation == ROTATION_ANGLE_180)
- input.scan = SCAN_DIRECTION_HORIZONTAL;
- else if (rotation == ROTATION_ANGLE_90 || rotation == ROTATION_ANGLE_270)
- input.scan = SCAN_DIRECTION_VERTICAL;
-
- if (!dc->cap_funcs.get_dcc_compression_cap(dc, &input, &output))
- return -EINVAL;
-
- if (!output.capable)
- return -EINVAL;
-
- if (dcc->independent_64b_blks == 0 &&
- output.grph.rgb.independent_64b_blks != 0)
- return -EINVAL;
-
- return 0;
-}
-
-static bool
-modifier_has_dcc(uint64_t modifier)
-{
- return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC, modifier);
-}
-
-static unsigned
-modifier_gfx9_swizzle_mode(uint64_t modifier)
-{
- if (modifier == DRM_FORMAT_MOD_LINEAR)
- return 0;
-
- return AMD_FMT_MOD_GET(TILE, modifier);
-}
-
-static const struct drm_format_info *
-amd_get_format_info(const struct drm_mode_fb_cmd2 *cmd)
-{
- return amdgpu_lookup_format_info(cmd->pixel_format, cmd->modifier[0]);
-}
-
-static void
-fill_gfx9_tiling_info_from_modifier(const struct amdgpu_device *adev,
- union dc_tiling_info *tiling_info,
- uint64_t modifier)
-{
- unsigned int mod_bank_xor_bits = AMD_FMT_MOD_GET(BANK_XOR_BITS, modifier);
- unsigned int mod_pipe_xor_bits = AMD_FMT_MOD_GET(PIPE_XOR_BITS, modifier);
- unsigned int pkrs_log2 = AMD_FMT_MOD_GET(PACKERS, modifier);
- unsigned int pipes_log2 = min(4u, mod_pipe_xor_bits);
-
- fill_gfx9_tiling_info_from_device(adev, tiling_info);
-
- if (!IS_AMD_FMT_MOD(modifier))
- return;
-
- tiling_info->gfx9.num_pipes = 1u << pipes_log2;
- tiling_info->gfx9.num_shader_engines = 1u << (mod_pipe_xor_bits - pipes_log2);
-
- if (adev->family >= AMDGPU_FAMILY_NV) {
- tiling_info->gfx9.num_pkrs = 1u << pkrs_log2;
- } else {
- tiling_info->gfx9.num_banks = 1u << mod_bank_xor_bits;
-
- /* for DCC we know it isn't rb aligned, so rb_per_se doesn't matter. */
- }
-}
-
-enum dm_micro_swizzle {
- MICRO_SWIZZLE_Z = 0,
- MICRO_SWIZZLE_S = 1,
- MICRO_SWIZZLE_D = 2,
- MICRO_SWIZZLE_R = 3
-};
-
-static bool dm_plane_format_mod_supported(struct drm_plane *plane,
- uint32_t format,
- uint64_t modifier)
-{
- struct amdgpu_device *adev = drm_to_adev(plane->dev);
- const struct drm_format_info *info = drm_format_info(format);
- int i;
-
- enum dm_micro_swizzle microtile = modifier_gfx9_swizzle_mode(modifier) & 3;
-
- if (!info)
- return false;
-
- /*
- * We always have to allow these modifiers:
- * 1. Core DRM checks for LINEAR support if userspace does not provide modifiers.
- * 2. Not passing any modifiers is the same as explicitly passing INVALID.
- */
- if (modifier == DRM_FORMAT_MOD_LINEAR ||
- modifier == DRM_FORMAT_MOD_INVALID) {
- return true;
- }
-
- /* Check that the modifier is on the list of the plane's supported modifiers. */
- for (i = 0; i < plane->modifier_count; i++) {
- if (modifier == plane->modifiers[i])
- break;
- }
- if (i == plane->modifier_count)
- return false;
-
- /*
- * For D swizzle the canonical modifier depends on the bpp, so check
- * it here.
- */
- if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) == AMD_FMT_MOD_TILE_VER_GFX9 &&
- adev->family >= AMDGPU_FAMILY_NV) {
- if (microtile == MICRO_SWIZZLE_D && info->cpp[0] == 4)
- return false;
- }
-
- if (adev->family >= AMDGPU_FAMILY_RV && microtile == MICRO_SWIZZLE_D &&
- info->cpp[0] < 8)
- return false;
-
- if (modifier_has_dcc(modifier)) {
- /* Per radeonsi comments 16/64 bpp are more complicated. */
- if (info->cpp[0] != 4)
- return false;
- /* We support multi-planar formats, but not when combined with
- * additional DCC metadata planes. */
- if (info->num_planes > 1)
- return false;
- }
-
- return true;
-}
-
-static void
-add_modifier(uint64_t **mods, uint64_t *size, uint64_t *cap, uint64_t mod)
-{
- if (!*mods)
- return;
-
- if (*cap - *size < 1) {
- uint64_t new_cap = *cap * 2;
- uint64_t *new_mods = kmalloc(new_cap * sizeof(uint64_t), GFP_KERNEL);
-
- if (!new_mods) {
- kfree(*mods);
- *mods = NULL;
- return;
- }
-
- memcpy(new_mods, *mods, sizeof(uint64_t) * *size);
- kfree(*mods);
- *mods = new_mods;
- *cap = new_cap;
- }
-
- (*mods)[*size] = mod;
- *size += 1;
-}
-
-static void
-add_gfx9_modifiers(const struct amdgpu_device *adev,
- uint64_t **mods, uint64_t *size, uint64_t *capacity)
-{
- int pipes = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes);
- int pipe_xor_bits = min(8, pipes +
- ilog2(adev->gfx.config.gb_addr_config_fields.num_se));
- int bank_xor_bits = min(8 - pipe_xor_bits,
- ilog2(adev->gfx.config.gb_addr_config_fields.num_banks));
- int rb = ilog2(adev->gfx.config.gb_addr_config_fields.num_se) +
- ilog2(adev->gfx.config.gb_addr_config_fields.num_rb_per_se);
-
-
- if (adev->family == AMDGPU_FAMILY_RV) {
- /* Raven2 and later */
- bool has_constant_encode = adev->asic_type > CHIP_RAVEN || adev->external_rev_id >= 0x81;
-
- /*
- * No _D DCC swizzles yet because we only allow 32bpp, which
- * doesn't support _D on DCN
- */
-
- if (has_constant_encode) {
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) |
- AMD_FMT_MOD_SET(DCC, 1) |
- AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
- AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) |
- AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1));
- }
-
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) |
- AMD_FMT_MOD_SET(DCC, 1) |
- AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
- AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) |
- AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 0));
-
- if (has_constant_encode) {
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) |
- AMD_FMT_MOD_SET(DCC, 1) |
- AMD_FMT_MOD_SET(DCC_RETILE, 1) |
- AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
- AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) |
-
- AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
- AMD_FMT_MOD_SET(RB, rb) |
- AMD_FMT_MOD_SET(PIPE, pipes));
- }
-
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) |
- AMD_FMT_MOD_SET(DCC, 1) |
- AMD_FMT_MOD_SET(DCC_RETILE, 1) |
- AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
- AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) |
- AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 0) |
- AMD_FMT_MOD_SET(RB, rb) |
- AMD_FMT_MOD_SET(PIPE, pipes));
- }
-
- /*
- * Only supported for 64bpp on Raven, will be filtered on format in
- * dm_plane_format_mod_supported.
- */
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits));
-
- if (adev->family == AMDGPU_FAMILY_RV) {
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits));
- }
-
- /*
- * Only supported for 64bpp on Raven, will be filtered on format in
- * dm_plane_format_mod_supported.
- */
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
-
- if (adev->family == AMDGPU_FAMILY_RV) {
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
- }
-}
-
-static void
-add_gfx10_1_modifiers(const struct amdgpu_device *adev,
- uint64_t **mods, uint64_t *size, uint64_t *capacity)
-{
- int pipe_xor_bits = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes);
-
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(DCC, 1) |
- AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
- AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
- AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B));
-
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(DCC, 1) |
- AMD_FMT_MOD_SET(DCC_RETILE, 1) |
- AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
- AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
- AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B));
-
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits));
-
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits));
-
-
- /* Only supported for 64bpp, will be filtered in dm_plane_format_mod_supported */
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
-
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
-}
-
-static void
-add_gfx10_3_modifiers(const struct amdgpu_device *adev,
- uint64_t **mods, uint64_t *size, uint64_t *capacity)
-{
- int pipe_xor_bits = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes);
- int pkrs = ilog2(adev->gfx.config.gb_addr_config_fields.num_pkrs);
-
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(PACKERS, pkrs) |
- AMD_FMT_MOD_SET(DCC, 1) |
- AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
- AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
- AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
- AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B));
-
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(PACKERS, pkrs) |
- AMD_FMT_MOD_SET(DCC, 1) |
- AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
- AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
- AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B));
-
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(PACKERS, pkrs) |
- AMD_FMT_MOD_SET(DCC, 1) |
- AMD_FMT_MOD_SET(DCC_RETILE, 1) |
- AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
- AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
- AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
- AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B));
-
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(PACKERS, pkrs) |
- AMD_FMT_MOD_SET(DCC, 1) |
- AMD_FMT_MOD_SET(DCC_RETILE, 1) |
- AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
- AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
- AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B));
-
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(PACKERS, pkrs));
-
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(PACKERS, pkrs));
-
- /* Only supported for 64bpp, will be filtered in dm_plane_format_mod_supported */
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
-
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
-}
-
-static int
-get_plane_modifiers(const struct amdgpu_device *adev, unsigned int plane_type, uint64_t **mods)
-{
- uint64_t size = 0, capacity = 128;
- *mods = NULL;
-
- /* We have not hooked up any pre-GFX9 modifiers. */
- if (adev->family < AMDGPU_FAMILY_AI)
- return 0;
-
- *mods = kmalloc(capacity * sizeof(uint64_t), GFP_KERNEL);
-
- if (plane_type == DRM_PLANE_TYPE_CURSOR) {
- add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_LINEAR);
- add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_INVALID);
- return *mods ? 0 : -ENOMEM;
- }
-
- switch (adev->family) {
- case AMDGPU_FAMILY_AI:
- case AMDGPU_FAMILY_RV:
- add_gfx9_modifiers(adev, mods, &size, &capacity);
- break;
- case AMDGPU_FAMILY_NV:
- case AMDGPU_FAMILY_VGH:
- case AMDGPU_FAMILY_YC:
- if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0))
- add_gfx10_3_modifiers(adev, mods, &size, &capacity);
- else
- add_gfx10_1_modifiers(adev, mods, &size, &capacity);
- break;
- }
-
- add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_LINEAR);
-
- /* INVALID marks the end of the list. */
- add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_INVALID);
-
- if (!*mods)
- return -ENOMEM;
-
- return 0;
-}
-
-static int
-fill_gfx9_plane_attributes_from_modifiers(struct amdgpu_device *adev,
- const struct amdgpu_framebuffer *afb,
- const enum surface_pixel_format format,
- const enum dc_rotation_angle rotation,
- const struct plane_size *plane_size,
- union dc_tiling_info *tiling_info,
- struct dc_plane_dcc_param *dcc,
- struct dc_plane_address *address,
- const bool force_disable_dcc)
-{
- const uint64_t modifier = afb->base.modifier;
- int ret = 0;
-
- fill_gfx9_tiling_info_from_modifier(adev, tiling_info, modifier);
- tiling_info->gfx9.swizzle = modifier_gfx9_swizzle_mode(modifier);
-
- if (modifier_has_dcc(modifier) && !force_disable_dcc) {
- uint64_t dcc_address = afb->address + afb->base.offsets[1];
- bool independent_64b_blks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier);
- bool independent_128b_blks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier);
-
- dcc->enable = 1;
- dcc->meta_pitch = afb->base.pitches[1];
- dcc->independent_64b_blks = independent_64b_blks;
- if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) == AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) {
- if (independent_64b_blks && independent_128b_blks)
- dcc->dcc_ind_blk = hubp_ind_block_64b_no_128bcl;
- else if (independent_128b_blks)
- dcc->dcc_ind_blk = hubp_ind_block_128b;
- else if (independent_64b_blks && !independent_128b_blks)
- dcc->dcc_ind_blk = hubp_ind_block_64b;
- else
- dcc->dcc_ind_blk = hubp_ind_block_unconstrained;
- } else {
- if (independent_64b_blks)
- dcc->dcc_ind_blk = hubp_ind_block_64b;
- else
- dcc->dcc_ind_blk = hubp_ind_block_unconstrained;
- }
-
- address->grph.meta_addr.low_part = lower_32_bits(dcc_address);
- address->grph.meta_addr.high_part = upper_32_bits(dcc_address);
- }
-
- ret = validate_dcc(adev, format, rotation, tiling_info, dcc, address, plane_size);
- if (ret)
- drm_dbg_kms(adev_to_drm(adev), "validate_dcc: returned error: %d\n", ret);
-
- return ret;
-}
-
-static int
-fill_plane_buffer_attributes(struct amdgpu_device *adev,
- const struct amdgpu_framebuffer *afb,
- const enum surface_pixel_format format,
- const enum dc_rotation_angle rotation,
- const uint64_t tiling_flags,
- union dc_tiling_info *tiling_info,
- struct plane_size *plane_size,
- struct dc_plane_dcc_param *dcc,
- struct dc_plane_address *address,
- bool tmz_surface,
- bool force_disable_dcc)
-{
- const struct drm_framebuffer *fb = &afb->base;
- int ret;
-
- memset(tiling_info, 0, sizeof(*tiling_info));
- memset(plane_size, 0, sizeof(*plane_size));
- memset(dcc, 0, sizeof(*dcc));
- memset(address, 0, sizeof(*address));
-
- address->tmz_surface = tmz_surface;
-
- if (format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) {
- uint64_t addr = afb->address + fb->offsets[0];
-
- plane_size->surface_size.x = 0;
- plane_size->surface_size.y = 0;
- plane_size->surface_size.width = fb->width;
- plane_size->surface_size.height = fb->height;
- plane_size->surface_pitch =
- fb->pitches[0] / fb->format->cpp[0];
-
- address->type = PLN_ADDR_TYPE_GRAPHICS;
- address->grph.addr.low_part = lower_32_bits(addr);
- address->grph.addr.high_part = upper_32_bits(addr);
- } else if (format < SURFACE_PIXEL_FORMAT_INVALID) {
- uint64_t luma_addr = afb->address + fb->offsets[0];
- uint64_t chroma_addr = afb->address + fb->offsets[1];
-
- plane_size->surface_size.x = 0;
- plane_size->surface_size.y = 0;
- plane_size->surface_size.width = fb->width;
- plane_size->surface_size.height = fb->height;
- plane_size->surface_pitch =
- fb->pitches[0] / fb->format->cpp[0];
-
- plane_size->chroma_size.x = 0;
- plane_size->chroma_size.y = 0;
- /* TODO: set these based on surface format */
- plane_size->chroma_size.width = fb->width / 2;
- plane_size->chroma_size.height = fb->height / 2;
-
- plane_size->chroma_pitch =
- fb->pitches[1] / fb->format->cpp[1];
-
- address->type = PLN_ADDR_TYPE_VIDEO_PROGRESSIVE;
- address->video_progressive.luma_addr.low_part =
- lower_32_bits(luma_addr);
- address->video_progressive.luma_addr.high_part =
- upper_32_bits(luma_addr);
- address->video_progressive.chroma_addr.low_part =
- lower_32_bits(chroma_addr);
- address->video_progressive.chroma_addr.high_part =
- upper_32_bits(chroma_addr);
- }
-
- if (adev->family >= AMDGPU_FAMILY_AI) {
- ret = fill_gfx9_plane_attributes_from_modifiers(adev, afb, format,
- rotation, plane_size,
- tiling_info, dcc,
- address,
- force_disable_dcc);
- if (ret)
- return ret;
- } else {
- fill_gfx8_tiling_info_from_flags(tiling_info, tiling_flags);
- }
-
- return 0;
-}
-
-static void
-fill_blending_from_plane_state(const struct drm_plane_state *plane_state,
- bool *per_pixel_alpha, bool *global_alpha,
- int *global_alpha_value)
-{
- *per_pixel_alpha = false;
- *global_alpha = false;
- *global_alpha_value = 0xff;
-
- if (plane_state->plane->type != DRM_PLANE_TYPE_OVERLAY)
- return;
-
- if (plane_state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI) {
- static const uint32_t alpha_formats[] = {
- DRM_FORMAT_ARGB8888,
- DRM_FORMAT_RGBA8888,
- DRM_FORMAT_ABGR8888,
- };
- uint32_t format = plane_state->fb->format->format;
- unsigned int i;
-
- for (i = 0; i < ARRAY_SIZE(alpha_formats); ++i) {
- if (format == alpha_formats[i]) {
- *per_pixel_alpha = true;
- break;
- }
- }
- }
-
- if (plane_state->alpha < 0xffff) {
- *global_alpha = true;
- *global_alpha_value = plane_state->alpha >> 8;
- }
-}
-
static int
fill_plane_color_attributes(const struct drm_plane_state *plane_state,
const enum surface_pixel_format format,
@@ -5383,6 +6030,10 @@ fill_plane_color_attributes(const struct drm_plane_state *plane_state,
*color_space = COLOR_SPACE_SRGB;
+ /* Ignore properties when DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE is set */
+ if (plane_state->state && plane_state->state->plane_color_pipeline)
+ return 0;
+
/* DRM color properties only affect non-RGB formats. */
if (format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN)
return 0;
@@ -5406,9 +6057,9 @@ fill_plane_color_attributes(const struct drm_plane_state *plane_state,
case DRM_COLOR_YCBCR_BT2020:
if (full_range)
- *color_space = COLOR_SPACE_2020_YCBCR;
+ *color_space = COLOR_SPACE_2020_YCBCR_FULL;
else
- return -EINVAL;
+ *color_space = COLOR_SPACE_2020_YCBCR_LIMITED;
break;
default:
@@ -5421,11 +6072,10 @@ fill_plane_color_attributes(const struct drm_plane_state *plane_state,
static int
fill_dc_plane_info_and_addr(struct amdgpu_device *adev,
const struct drm_plane_state *plane_state,
- const uint64_t tiling_flags,
+ const u64 tiling_flags,
struct dc_plane_info *plane_info,
struct dc_plane_address *address,
- bool tmz_surface,
- bool force_disable_dcc)
+ bool tmz_surface)
{
const struct drm_framebuffer *fb = plane_state->fb;
const struct amdgpu_framebuffer *afb =
@@ -5484,7 +6134,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev,
plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616;
break;
default:
- DRM_ERROR(
+ drm_err(adev_to_drm(adev),
"Unsupported screen format %p4cc\n",
&fb->format->format);
return -EINVAL;
@@ -5508,27 +6158,28 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev,
break;
}
+
plane_info->visible = true;
plane_info->stereo_format = PLANE_STEREO_FORMAT_NONE;
- plane_info->layer_index = 0;
+ plane_info->layer_index = plane_state->normalized_zpos;
ret = fill_plane_color_attributes(plane_state, plane_info->format,
&plane_info->color_space);
if (ret)
return ret;
- ret = fill_plane_buffer_attributes(adev, afb, plane_info->format,
+ ret = amdgpu_dm_plane_fill_plane_buffer_attributes(adev, afb, plane_info->format,
plane_info->rotation, tiling_flags,
&plane_info->tiling_info,
&plane_info->plane_size,
- &plane_info->dcc, address, tmz_surface,
- force_disable_dcc);
+ &plane_info->dcc, address,
+ tmz_surface);
if (ret)
return ret;
- fill_blending_from_plane_state(
- plane_state, &plane_info->per_pixel_alpha,
+ amdgpu_dm_plane_fill_blending_from_plane_state(
+ plane_state, &plane_info->per_pixel_alpha, &plane_info->pre_multiplied_alpha,
&plane_info->global_alpha, &plane_info->global_alpha_value);
return 0;
@@ -5544,9 +6195,8 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
struct dc_scaling_info scaling_info;
struct dc_plane_info plane_info;
int ret;
- bool force_disable_dcc = false;
- ret = fill_dc_scaling_info(adev, plane_state, &scaling_info);
+ ret = amdgpu_dm_plane_fill_dc_scaling_info(adev, plane_state, &scaling_info);
if (ret)
return ret;
@@ -5555,13 +6205,11 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
dc_plane_state->clip_rect = scaling_info.clip_rect;
dc_plane_state->scaling_quality = scaling_info.scaling_quality;
- force_disable_dcc = adev->asic_type == CHIP_RAVEN && adev->in_suspend;
ret = fill_dc_plane_info_and_addr(adev, plane_state,
afb->tiling_flags,
&plane_info,
&dc_plane_state->address,
- afb->tmz_surface,
- force_disable_dcc);
+ afb->tmz_surface);
if (ret)
return ret;
@@ -5575,23 +6223,187 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
dc_plane_state->tiling_info = plane_info.tiling_info;
dc_plane_state->visible = plane_info.visible;
dc_plane_state->per_pixel_alpha = plane_info.per_pixel_alpha;
+ dc_plane_state->pre_multiplied_alpha = plane_info.pre_multiplied_alpha;
dc_plane_state->global_alpha = plane_info.global_alpha;
dc_plane_state->global_alpha_value = plane_info.global_alpha_value;
dc_plane_state->dcc = plane_info.dcc;
- dc_plane_state->layer_index = plane_info.layer_index; // Always returns 0
+ dc_plane_state->layer_index = plane_info.layer_index;
dc_plane_state->flip_int_enabled = true;
/*
* Always set input transfer function, since plane state is refreshed
* every time.
*/
- ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state, dc_plane_state);
+ ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state,
+ plane_state,
+ dc_plane_state);
if (ret)
return ret;
return 0;
}
+static inline void fill_dc_dirty_rect(struct drm_plane *plane,
+ struct rect *dirty_rect, int32_t x,
+ s32 y, s32 width, s32 height,
+ int *i, bool ffu)
+{
+ WARN_ON(*i >= DC_MAX_DIRTY_RECTS);
+
+ dirty_rect->x = x;
+ dirty_rect->y = y;
+ dirty_rect->width = width;
+ dirty_rect->height = height;
+
+ if (ffu)
+ drm_dbg(plane->dev,
+ "[PLANE:%d] PSR FFU dirty rect size (%d, %d)\n",
+ plane->base.id, width, height);
+ else
+ drm_dbg(plane->dev,
+ "[PLANE:%d] PSR SU dirty rect at (%d, %d) size (%d, %d)",
+ plane->base.id, x, y, width, height);
+
+ (*i)++;
+}
+
+/**
+ * fill_dc_dirty_rects() - Fill DC dirty regions for PSR selective updates
+ *
+ * @plane: DRM plane containing dirty regions that need to be flushed to the eDP
+ * remote fb
+ * @old_plane_state: Old state of @plane
+ * @new_plane_state: New state of @plane
+ * @crtc_state: New state of CRTC connected to the @plane
+ * @flip_addrs: DC flip tracking struct, which also tracts dirty rects
+ * @is_psr_su: Flag indicating whether Panel Self Refresh Selective Update (PSR SU) is enabled.
+ * If PSR SU is enabled and damage clips are available, only the regions of the screen
+ * that have changed will be updated. If PSR SU is not enabled,
+ * or if damage clips are not available, the entire screen will be updated.
+ * @dirty_regions_changed: dirty regions changed
+ *
+ * For PSR SU, DC informs the DMUB uController of dirty rectangle regions
+ * (referred to as "damage clips" in DRM nomenclature) that require updating on
+ * the eDP remote buffer. The responsibility of specifying the dirty regions is
+ * amdgpu_dm's.
+ *
+ * A damage-aware DRM client should fill the FB_DAMAGE_CLIPS property on the
+ * plane with regions that require flushing to the eDP remote buffer. In
+ * addition, certain use cases - such as cursor and multi-plane overlay (MPO) -
+ * implicitly provide damage clips without any client support via the plane
+ * bounds.
+ */
+static void fill_dc_dirty_rects(struct drm_plane *plane,
+ struct drm_plane_state *old_plane_state,
+ struct drm_plane_state *new_plane_state,
+ struct drm_crtc_state *crtc_state,
+ struct dc_flip_addrs *flip_addrs,
+ bool is_psr_su,
+ bool *dirty_regions_changed)
+{
+ struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state);
+ struct rect *dirty_rects = flip_addrs->dirty_rects;
+ u32 num_clips;
+ struct drm_mode_rect *clips;
+ bool bb_changed;
+ bool fb_changed;
+ u32 i = 0;
+ *dirty_regions_changed = false;
+
+ /*
+ * Cursor plane has it's own dirty rect update interface. See
+ * dcn10_dmub_update_cursor_data and dmub_cmd_update_cursor_info_data
+ */
+ if (plane->type == DRM_PLANE_TYPE_CURSOR)
+ return;
+
+ if (new_plane_state->rotation != DRM_MODE_ROTATE_0)
+ goto ffu;
+
+ num_clips = drm_plane_get_damage_clips_count(new_plane_state);
+ clips = drm_plane_get_damage_clips(new_plane_state);
+
+ if (num_clips && (!amdgpu_damage_clips || (amdgpu_damage_clips < 0 &&
+ is_psr_su)))
+ goto ffu;
+
+ if (!dm_crtc_state->mpo_requested) {
+ if (!num_clips || num_clips > DC_MAX_DIRTY_RECTS)
+ goto ffu;
+
+ for (; flip_addrs->dirty_rect_count < num_clips; clips++)
+ fill_dc_dirty_rect(new_plane_state->plane,
+ &dirty_rects[flip_addrs->dirty_rect_count],
+ clips->x1, clips->y1,
+ clips->x2 - clips->x1, clips->y2 - clips->y1,
+ &flip_addrs->dirty_rect_count,
+ false);
+ return;
+ }
+
+ /*
+ * MPO is requested. Add entire plane bounding box to dirty rects if
+ * flipped to or damaged.
+ *
+ * If plane is moved or resized, also add old bounding box to dirty
+ * rects.
+ */
+ fb_changed = old_plane_state->fb->base.id !=
+ new_plane_state->fb->base.id;
+ bb_changed = (old_plane_state->crtc_x != new_plane_state->crtc_x ||
+ old_plane_state->crtc_y != new_plane_state->crtc_y ||
+ old_plane_state->crtc_w != new_plane_state->crtc_w ||
+ old_plane_state->crtc_h != new_plane_state->crtc_h);
+
+ drm_dbg(plane->dev,
+ "[PLANE:%d] PSR bb_changed:%d fb_changed:%d num_clips:%d\n",
+ new_plane_state->plane->base.id,
+ bb_changed, fb_changed, num_clips);
+
+ *dirty_regions_changed = bb_changed;
+
+ if ((num_clips + (bb_changed ? 2 : 0)) > DC_MAX_DIRTY_RECTS)
+ goto ffu;
+
+ if (bb_changed) {
+ fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[i],
+ new_plane_state->crtc_x,
+ new_plane_state->crtc_y,
+ new_plane_state->crtc_w,
+ new_plane_state->crtc_h, &i, false);
+
+ /* Add old plane bounding-box if plane is moved or resized */
+ fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[i],
+ old_plane_state->crtc_x,
+ old_plane_state->crtc_y,
+ old_plane_state->crtc_w,
+ old_plane_state->crtc_h, &i, false);
+ }
+
+ if (num_clips) {
+ for (; i < num_clips; clips++)
+ fill_dc_dirty_rect(new_plane_state->plane,
+ &dirty_rects[i], clips->x1,
+ clips->y1, clips->x2 - clips->x1,
+ clips->y2 - clips->y1, &i, false);
+ } else if (fb_changed && !bb_changed) {
+ fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[i],
+ new_plane_state->crtc_x,
+ new_plane_state->crtc_y,
+ new_plane_state->crtc_w,
+ new_plane_state->crtc_h, &i, false);
+ }
+
+ flip_addrs->dirty_rect_count = i;
+ return;
+
+ffu:
+ fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[0], 0, 0,
+ dm_crtc_state->base.mode.crtc_hdisplay,
+ dm_crtc_state->base.mode.crtc_vdisplay,
+ &flip_addrs->dirty_rect_count, true);
+}
+
static void update_stream_scaling_settings(const struct drm_display_mode *mode,
const struct dm_connector_state *dm_state,
struct dc_stream_state *stream)
@@ -5651,7 +6463,7 @@ static enum dc_color_depth
convert_color_depth_from_display_info(const struct drm_connector *connector,
bool is_y420, int requested_bpc)
{
- uint8_t bpc;
+ u8 bpc;
if (is_y420) {
bpc = 8;
@@ -5717,21 +6529,46 @@ get_aspect_ratio(const struct drm_display_mode *mode_in)
}
static enum dc_color_space
-get_output_color_space(const struct dc_crtc_timing *dc_crtc_timing)
+get_output_color_space(const struct dc_crtc_timing *dc_crtc_timing,
+ const struct drm_connector_state *connector_state)
{
enum dc_color_space color_space = COLOR_SPACE_SRGB;
- switch (dc_crtc_timing->pixel_encoding) {
- case PIXEL_ENCODING_YCBCR422:
- case PIXEL_ENCODING_YCBCR444:
- case PIXEL_ENCODING_YCBCR420:
- {
+ switch (connector_state->colorspace) {
+ case DRM_MODE_COLORIMETRY_BT601_YCC:
+ if (dc_crtc_timing->flags.Y_ONLY)
+ color_space = COLOR_SPACE_YCBCR601_LIMITED;
+ else
+ color_space = COLOR_SPACE_YCBCR601;
+ break;
+ case DRM_MODE_COLORIMETRY_BT709_YCC:
+ if (dc_crtc_timing->flags.Y_ONLY)
+ color_space = COLOR_SPACE_YCBCR709_LIMITED;
+ else
+ color_space = COLOR_SPACE_YCBCR709;
+ break;
+ case DRM_MODE_COLORIMETRY_OPRGB:
+ color_space = COLOR_SPACE_ADOBERGB;
+ break;
+ case DRM_MODE_COLORIMETRY_BT2020_RGB:
+ case DRM_MODE_COLORIMETRY_BT2020_YCC:
+ if (dc_crtc_timing->pixel_encoding == PIXEL_ENCODING_RGB)
+ color_space = COLOR_SPACE_2020_RGB_FULLRANGE;
+ else
+ color_space = COLOR_SPACE_2020_YCBCR_LIMITED;
+ break;
+ case DRM_MODE_COLORIMETRY_DEFAULT: // ITU601
+ default:
+ if (dc_crtc_timing->pixel_encoding == PIXEL_ENCODING_RGB) {
+ color_space = COLOR_SPACE_SRGB;
+ if (connector_state->hdmi.broadcast_rgb == DRM_HDMI_BROADCAST_RGB_LIMITED)
+ color_space = COLOR_SPACE_SRGB_LIMITED;
/*
* 27030khz is the separation point between HDTV and SDTV
* according to HDMI spec, we use YCbCr709 and YCbCr601
* respectively
*/
- if (dc_crtc_timing->pix_clk_100hz > 270300) {
+ } else if (dc_crtc_timing->pix_clk_100hz > 270300) {
if (dc_crtc_timing->flags.Y_ONLY)
color_space =
COLOR_SPACE_YCBCR709_LIMITED;
@@ -5744,27 +6581,37 @@ get_output_color_space(const struct dc_crtc_timing *dc_crtc_timing)
else
color_space = COLOR_SPACE_YCBCR601;
}
-
- }
- break;
- case PIXEL_ENCODING_RGB:
- color_space = COLOR_SPACE_SRGB;
- break;
-
- default:
- WARN_ON(1);
break;
}
return color_space;
}
+static enum display_content_type
+get_output_content_type(const struct drm_connector_state *connector_state)
+{
+ switch (connector_state->content_type) {
+ default:
+ case DRM_MODE_CONTENT_TYPE_NO_DATA:
+ return DISPLAY_CONTENT_TYPE_NO_DATA;
+ case DRM_MODE_CONTENT_TYPE_GRAPHICS:
+ return DISPLAY_CONTENT_TYPE_GRAPHICS;
+ case DRM_MODE_CONTENT_TYPE_PHOTO:
+ return DISPLAY_CONTENT_TYPE_PHOTO;
+ case DRM_MODE_CONTENT_TYPE_CINEMA:
+ return DISPLAY_CONTENT_TYPE_CINEMA;
+ case DRM_MODE_CONTENT_TYPE_GAME:
+ return DISPLAY_CONTENT_TYPE_GAME;
+ }
+}
+
static bool adjust_colour_depth_from_display_info(
struct dc_crtc_timing *timing_out,
const struct drm_display_info *info)
{
enum dc_color_depth depth = timing_out->display_color_depth;
int normalized_clk;
+
do {
normalized_clk = timing_out->pix_clk_100hz / 10;
/* YCbCr 4:2:0 requires additional adjustment of 1/2 */
@@ -5805,9 +6652,13 @@ static void fill_stream_properties_from_drm_display_mode(
{
struct dc_crtc_timing *timing_out = &stream->timing;
const struct drm_display_info *info = &connector->display_info;
- struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+ struct amdgpu_dm_connector *aconnector = NULL;
struct hdmi_vendor_infoframe hv_frame;
struct hdmi_avi_infoframe avi_frame;
+ ssize_t err;
+
+ if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK)
+ aconnector = to_amdgpu_dm_connector(connector);
memset(&hv_frame, 0, sizeof(hv_frame));
memset(&avi_frame, 0, sizeof(avi_frame));
@@ -5821,9 +6672,14 @@ static void fill_stream_properties_from_drm_display_mode(
&& stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR420;
else if (drm_mode_is_420_also(info, mode_in)
+ && aconnector
&& aconnector->force_yuv420_output)
timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR420;
- else if ((connector->display_info.color_formats & DRM_COLOR_FORMAT_YCRCB444)
+ else if ((connector->display_info.color_formats & DRM_COLOR_FORMAT_YCBCR422)
+ && aconnector
+ && aconnector->force_yuv422_output)
+ timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR422;
+ else if ((connector->display_info.color_formats & DRM_COLOR_FORMAT_YCBCR444)
&& stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR444;
else
@@ -5837,7 +6693,7 @@ static void fill_stream_properties_from_drm_display_mode(
timing_out->scan_type = SCANNING_TYPE_NODATA;
timing_out->hdmi_vic = 0;
- if(old_stream) {
+ if (old_stream) {
timing_out->vic = old_stream->timing.vic;
timing_out->flags.HSYNC_POSITIVE_POLARITY = old_stream->timing.flags.HSYNC_POSITIVE_POLARITY;
timing_out->flags.VSYNC_POSITIVE_POLARITY = old_stream->timing.flags.VSYNC_POSITIVE_POLARITY;
@@ -5850,13 +6706,23 @@ static void fill_stream_properties_from_drm_display_mode(
}
if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) {
- drm_hdmi_avi_infoframe_from_display_mode(&avi_frame, (struct drm_connector *)connector, mode_in);
+ err = drm_hdmi_avi_infoframe_from_display_mode(&avi_frame,
+ (struct drm_connector *)connector,
+ mode_in);
+ if (err < 0)
+ drm_warn_once(connector->dev, "Failed to setup avi infoframe on connector %s: %zd\n",
+ connector->name, err);
timing_out->vic = avi_frame.video_code;
- drm_hdmi_vendor_infoframe_from_display_mode(&hv_frame, (struct drm_connector *)connector, mode_in);
+ err = drm_hdmi_vendor_infoframe_from_display_mode(&hv_frame,
+ (struct drm_connector *)connector,
+ mode_in);
+ if (err < 0)
+ drm_warn_once(connector->dev, "Failed to setup vendor infoframe on connector %s: %zd\n",
+ connector->name, err);
timing_out->hdmi_vic = hv_frame.vic;
}
- if (is_freesync_video_mode(mode_in, aconnector)) {
+ if (aconnector && is_freesync_video_mode(mode_in, aconnector)) {
timing_out->h_addressable = mode_in->hdisplay;
timing_out->h_total = mode_in->htotal;
timing_out->h_sync_width = mode_in->hsync_end - mode_in->hsync_start;
@@ -5880,10 +6746,8 @@ static void fill_stream_properties_from_drm_display_mode(
timing_out->aspect_ratio = get_aspect_ratio(mode_in);
- stream->output_color_space = get_output_color_space(timing_out);
-
- stream->out_transfer_func->type = TF_TYPE_PREDEFINED;
- stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB;
+ stream->out_transfer_func.type = TF_TYPE_PREDEFINED;
+ stream->out_transfer_func.tf = TRANSFER_FUNCTION_SRGB;
if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) {
if (!adjust_colour_depth_from_display_info(timing_out, info) &&
drm_mode_is_420_also(info, mode_in) &&
@@ -5892,6 +6756,9 @@ static void fill_stream_properties_from_drm_display_mode(
adjust_colour_depth_from_display_info(timing_out, info);
}
}
+
+ stream->output_color_space = get_output_color_space(timing_out, connector_state);
+ stream->content_type = get_output_content_type(connector_state);
}
static void fill_audio_info(struct audio_info *audio_info,
@@ -5964,28 +6831,29 @@ decide_crtc_timing_for_drm_display_mode(struct drm_display_mode *drm_mode,
const struct drm_display_mode *native_mode,
bool scale_enabled)
{
- if (scale_enabled) {
- copy_crtc_timing_for_drm_display_mode(native_mode, drm_mode);
- } else if (native_mode->clock == drm_mode->clock &&
- native_mode->htotal == drm_mode->htotal &&
- native_mode->vtotal == drm_mode->vtotal) {
- copy_crtc_timing_for_drm_display_mode(native_mode, drm_mode);
+ if (scale_enabled || (
+ native_mode->clock == drm_mode->clock &&
+ native_mode->htotal == drm_mode->htotal &&
+ native_mode->vtotal == drm_mode->vtotal)) {
+ if (native_mode->crtc_clock)
+ copy_crtc_timing_for_drm_display_mode(native_mode, drm_mode);
} else {
/* no scaling nor amdgpu inserted, no need to patch */
}
}
static struct dc_sink *
-create_fake_sink(struct amdgpu_dm_connector *aconnector)
+create_fake_sink(struct drm_device *dev, struct dc_link *link)
{
struct dc_sink_init_data sink_init_data = { 0 };
struct dc_sink *sink = NULL;
- sink_init_data.link = aconnector->dc_link;
- sink_init_data.sink_signal = aconnector->dc_link->connector_signal;
+
+ sink_init_data.link = link;
+ sink_init_data.sink_signal = link->connector_signal;
sink = dc_sink_create(&sink_init_data);
if (!sink) {
- DRM_ERROR("Failed to create sink!\n");
+ drm_err(dev, "Failed to create sink!\n");
return NULL;
}
sink->sink_signal = SIGNAL_TYPE_VIRTUAL;
@@ -6059,69 +6927,6 @@ static void dm_enable_per_frame_crtc_master_sync(struct dc_state *context)
}
}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-static void update_dsc_caps(struct amdgpu_dm_connector *aconnector,
- struct dc_sink *sink, struct dc_stream_state *stream,
- struct dsc_dec_dpcd_caps *dsc_caps)
-{
- stream->timing.flags.DSC = 0;
-
- if (aconnector->dc_link && sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT) {
- dc_dsc_parse_dsc_dpcd(aconnector->dc_link->ctx->dc,
- aconnector->dc_link->dpcd_caps.dsc_caps.dsc_basic_caps.raw,
- aconnector->dc_link->dpcd_caps.dsc_caps.dsc_branch_decoder_caps.raw,
- dsc_caps);
- }
-}
-
-static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector,
- struct dc_sink *sink, struct dc_stream_state *stream,
- struct dsc_dec_dpcd_caps *dsc_caps)
-{
- struct drm_connector *drm_connector = &aconnector->base;
- uint32_t link_bandwidth_kbps;
- uint32_t max_dsc_target_bpp_limit_override = 0;
-
- link_bandwidth_kbps = dc_link_bandwidth_kbps(aconnector->dc_link,
- dc_link_get_link_cap(aconnector->dc_link));
-
- if (stream->link && stream->link->local_sink)
- max_dsc_target_bpp_limit_override =
- stream->link->local_sink->edid_caps.panel_patch.max_dsc_target_bpp_limit;
-
- /* Set DSC policy according to dsc_clock_en */
- dc_dsc_policy_set_enable_dsc_when_not_needed(
- aconnector->dsc_settings.dsc_force_enable == DSC_CLK_FORCE_ENABLE);
-
- if (aconnector->dc_link && sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT) {
-
- if (dc_dsc_compute_config(aconnector->dc_link->ctx->dc->res_pool->dscs[0],
- dsc_caps,
- aconnector->dc_link->ctx->dc->debug.dsc_min_slice_height_override,
- max_dsc_target_bpp_limit_override,
- link_bandwidth_kbps,
- &stream->timing,
- &stream->timing.dsc_cfg)) {
- stream->timing.flags.DSC = 1;
- DRM_DEBUG_DRIVER("%s: [%s] DSC is selected from SST RX\n", __func__, drm_connector->name);
- }
- }
-
- /* Overwrite the stream flag if DSC is enabled through debugfs */
- if (aconnector->dsc_settings.dsc_force_enable == DSC_CLK_FORCE_ENABLE)
- stream->timing.flags.DSC = 1;
-
- if (stream->timing.flags.DSC && aconnector->dsc_settings.dsc_num_slices_h)
- stream->timing.dsc_cfg.num_slices_h = aconnector->dsc_settings.dsc_num_slices_h;
-
- if (stream->timing.flags.DSC && aconnector->dsc_settings.dsc_num_slices_v)
- stream->timing.dsc_cfg.num_slices_v = aconnector->dsc_settings.dsc_num_slices_v;
-
- if (stream->timing.flags.DSC && aconnector->dsc_settings.dsc_bits_per_pixel)
- stream->timing.dsc_cfg.bits_per_pixel = aconnector->dsc_settings.dsc_bits_per_pixel;
-}
-#endif /* CONFIG_DRM_AMD_DC_DCN */
-
/**
* DOC: FreeSync Video
*
@@ -6138,7 +6943,7 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector,
* - Cinema HFR (48 FPS)
* - TV/PAL (50 FPS)
* - Commonly used (60 FPS)
- * - Multiples of 24 (48,72,96,120 FPS)
+ * - Multiples of 24 (48,72,96 FPS)
*
* The list of standards video format is not huge and can be added to the
* connector modeset list beforehand. With that, userspace can leverage
@@ -6154,19 +6959,22 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector,
*/
static struct drm_display_mode *
get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector,
- bool use_probed_modes)
+ bool use_probed_modes)
{
struct drm_display_mode *m, *m_pref = NULL;
u16 current_refresh, highest_refresh;
struct list_head *list_head = use_probed_modes ?
- &aconnector->base.probed_modes :
- &aconnector->base.modes;
+ &aconnector->base.probed_modes :
+ &aconnector->base.modes;
+
+ if (aconnector->base.connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ return NULL;
if (aconnector->freesync_vid_base.clock != 0)
return &aconnector->freesync_vid_base;
/* Find the preferred mode */
- list_for_each_entry (m, list_head, head) {
+ list_for_each_entry(m, list_head, head) {
if (m->type & DRM_MODE_TYPE_PREFERRED) {
m_pref = m;
break;
@@ -6176,9 +6984,9 @@ get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector,
if (!m_pref) {
/* Probably an EDID with no preferred mode. Fallback to first entry */
m_pref = list_first_entry_or_null(
- &aconnector->base.modes, struct drm_display_mode, head);
+ &aconnector->base.modes, struct drm_display_mode, head);
if (!m_pref) {
- DRM_DEBUG_DRIVER("No preferred mode found in EDID\n");
+ drm_dbg_driver(aconnector->base.dev, "No preferred mode found in EDID\n");
return NULL;
}
}
@@ -6190,7 +6998,7 @@ get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector,
* For some monitors, preferred mode is not the mode with highest
* supported refresh rate.
*/
- list_for_each_entry (m, list_head, head) {
+ list_for_each_entry(m, list_head, head) {
current_refresh = drm_mode_vrefresh(m);
if (m->hdisplay == m_pref->hdisplay &&
@@ -6201,12 +7009,12 @@ get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector,
}
}
- aconnector->freesync_vid_base = *m_pref;
+ drm_mode_copy(&aconnector->freesync_vid_base, m_pref);
return m_pref;
}
static bool is_freesync_video_mode(const struct drm_display_mode *mode,
- struct amdgpu_dm_connector *aconnector)
+ struct amdgpu_dm_connector *aconnector)
{
struct drm_display_mode *high_mode;
int timing_diff;
@@ -6232,44 +7040,216 @@ static bool is_freesync_video_mode(const struct drm_display_mode *mode,
return true;
}
+#if defined(CONFIG_DRM_AMD_DC_FP)
+static void update_dsc_caps(struct amdgpu_dm_connector *aconnector,
+ struct dc_sink *sink, struct dc_stream_state *stream,
+ struct dsc_dec_dpcd_caps *dsc_caps)
+{
+ stream->timing.flags.DSC = 0;
+ dsc_caps->is_dsc_supported = false;
+
+ if (aconnector->dc_link && (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT ||
+ sink->sink_signal == SIGNAL_TYPE_EDP)) {
+ if (sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_NONE ||
+ sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER)
+ dc_dsc_parse_dsc_dpcd(aconnector->dc_link->ctx->dc,
+ aconnector->dc_link->dpcd_caps.dsc_caps.dsc_basic_caps.raw,
+ aconnector->dc_link->dpcd_caps.dsc_caps.dsc_branch_decoder_caps.raw,
+ dsc_caps);
+ }
+}
+
+static void apply_dsc_policy_for_edp(struct amdgpu_dm_connector *aconnector,
+ struct dc_sink *sink, struct dc_stream_state *stream,
+ struct dsc_dec_dpcd_caps *dsc_caps,
+ uint32_t max_dsc_target_bpp_limit_override)
+{
+ const struct dc_link_settings *verified_link_cap = NULL;
+ u32 link_bw_in_kbps;
+ u32 edp_min_bpp_x16, edp_max_bpp_x16;
+ struct dc *dc = sink->ctx->dc;
+ struct dc_dsc_bw_range bw_range = {0};
+ struct dc_dsc_config dsc_cfg = {0};
+ struct dc_dsc_config_options dsc_options = {0};
+
+ dc_dsc_get_default_config_option(dc, &dsc_options);
+ dsc_options.max_target_bpp_limit_override_x16 = max_dsc_target_bpp_limit_override * 16;
+
+ verified_link_cap = dc_link_get_link_cap(stream->link);
+ link_bw_in_kbps = dc_link_bandwidth_kbps(stream->link, verified_link_cap);
+ edp_min_bpp_x16 = 8 * 16;
+ edp_max_bpp_x16 = 8 * 16;
+
+ if (edp_max_bpp_x16 > dsc_caps->edp_max_bits_per_pixel)
+ edp_max_bpp_x16 = dsc_caps->edp_max_bits_per_pixel;
+
+ if (edp_max_bpp_x16 < edp_min_bpp_x16)
+ edp_min_bpp_x16 = edp_max_bpp_x16;
+
+ if (dc_dsc_compute_bandwidth_range(dc->res_pool->dscs[0],
+ dc->debug.dsc_min_slice_height_override,
+ edp_min_bpp_x16, edp_max_bpp_x16,
+ dsc_caps,
+ &stream->timing,
+ dc_link_get_highest_encoding_format(aconnector->dc_link),
+ &bw_range)) {
+
+ if (bw_range.max_kbps < link_bw_in_kbps) {
+ if (dc_dsc_compute_config(dc->res_pool->dscs[0],
+ dsc_caps,
+ &dsc_options,
+ 0,
+ &stream->timing,
+ dc_link_get_highest_encoding_format(aconnector->dc_link),
+ &dsc_cfg)) {
+ stream->timing.dsc_cfg = dsc_cfg;
+ stream->timing.flags.DSC = 1;
+ stream->timing.dsc_cfg.bits_per_pixel = edp_max_bpp_x16;
+ }
+ return;
+ }
+ }
+
+ if (dc_dsc_compute_config(dc->res_pool->dscs[0],
+ dsc_caps,
+ &dsc_options,
+ link_bw_in_kbps,
+ &stream->timing,
+ dc_link_get_highest_encoding_format(aconnector->dc_link),
+ &dsc_cfg)) {
+ stream->timing.dsc_cfg = dsc_cfg;
+ stream->timing.flags.DSC = 1;
+ }
+}
+
+static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector,
+ struct dc_sink *sink, struct dc_stream_state *stream,
+ struct dsc_dec_dpcd_caps *dsc_caps)
+{
+ struct drm_connector *drm_connector = &aconnector->base;
+ u32 link_bandwidth_kbps;
+ struct dc *dc = sink->ctx->dc;
+ u32 max_supported_bw_in_kbps, timing_bw_in_kbps;
+ u32 dsc_max_supported_bw_in_kbps;
+ u32 max_dsc_target_bpp_limit_override =
+ drm_connector->display_info.max_dsc_bpp;
+ struct dc_dsc_config_options dsc_options = {0};
+
+ dc_dsc_get_default_config_option(dc, &dsc_options);
+ dsc_options.max_target_bpp_limit_override_x16 = max_dsc_target_bpp_limit_override * 16;
+
+ link_bandwidth_kbps = dc_link_bandwidth_kbps(aconnector->dc_link,
+ dc_link_get_link_cap(aconnector->dc_link));
+
+ /* Set DSC policy according to dsc_clock_en */
+ dc_dsc_policy_set_enable_dsc_when_not_needed(
+ aconnector->dsc_settings.dsc_force_enable == DSC_CLK_FORCE_ENABLE);
+
+ if (sink->sink_signal == SIGNAL_TYPE_EDP &&
+ !aconnector->dc_link->panel_config.dsc.disable_dsc_edp &&
+ dc->caps.edp_dsc_support && aconnector->dsc_settings.dsc_force_enable != DSC_CLK_FORCE_DISABLE) {
+
+ apply_dsc_policy_for_edp(aconnector, sink, stream, dsc_caps, max_dsc_target_bpp_limit_override);
+
+ } else if (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT) {
+ if (sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_NONE) {
+ if (dc_dsc_compute_config(aconnector->dc_link->ctx->dc->res_pool->dscs[0],
+ dsc_caps,
+ &dsc_options,
+ link_bandwidth_kbps,
+ &stream->timing,
+ dc_link_get_highest_encoding_format(aconnector->dc_link),
+ &stream->timing.dsc_cfg)) {
+ stream->timing.flags.DSC = 1;
+ drm_dbg_driver(drm_connector->dev, "%s: SST_DSC [%s] DSC is selected from SST RX\n",
+ __func__, drm_connector->name);
+ }
+ } else if (sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER) {
+ timing_bw_in_kbps = dc_bandwidth_in_kbps_from_timing(&stream->timing,
+ dc_link_get_highest_encoding_format(aconnector->dc_link));
+ max_supported_bw_in_kbps = link_bandwidth_kbps;
+ dsc_max_supported_bw_in_kbps = link_bandwidth_kbps;
+
+ if (timing_bw_in_kbps > max_supported_bw_in_kbps &&
+ max_supported_bw_in_kbps > 0 &&
+ dsc_max_supported_bw_in_kbps > 0)
+ if (dc_dsc_compute_config(aconnector->dc_link->ctx->dc->res_pool->dscs[0],
+ dsc_caps,
+ &dsc_options,
+ dsc_max_supported_bw_in_kbps,
+ &stream->timing,
+ dc_link_get_highest_encoding_format(aconnector->dc_link),
+ &stream->timing.dsc_cfg)) {
+ stream->timing.flags.DSC = 1;
+ drm_dbg_driver(drm_connector->dev, "%s: SST_DSC [%s] DSC is selected from DP-HDMI PCON\n",
+ __func__, drm_connector->name);
+ }
+ }
+ }
+
+ /* Overwrite the stream flag if DSC is enabled through debugfs */
+ if (aconnector->dsc_settings.dsc_force_enable == DSC_CLK_FORCE_ENABLE)
+ stream->timing.flags.DSC = 1;
+
+ if (stream->timing.flags.DSC && aconnector->dsc_settings.dsc_num_slices_h)
+ stream->timing.dsc_cfg.num_slices_h = aconnector->dsc_settings.dsc_num_slices_h;
+
+ if (stream->timing.flags.DSC && aconnector->dsc_settings.dsc_num_slices_v)
+ stream->timing.dsc_cfg.num_slices_v = aconnector->dsc_settings.dsc_num_slices_v;
+
+ if (stream->timing.flags.DSC && aconnector->dsc_settings.dsc_bits_per_pixel)
+ stream->timing.dsc_cfg.bits_per_pixel = aconnector->dsc_settings.dsc_bits_per_pixel;
+}
+#endif
+
static struct dc_stream_state *
-create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
+create_stream_for_sink(struct drm_connector *connector,
const struct drm_display_mode *drm_mode,
const struct dm_connector_state *dm_state,
const struct dc_stream_state *old_stream,
int requested_bpc)
{
+ struct drm_device *dev = connector->dev;
+ struct amdgpu_dm_connector *aconnector = NULL;
struct drm_display_mode *preferred_mode = NULL;
- struct drm_connector *drm_connector;
- const struct drm_connector_state *con_state =
- dm_state ? &dm_state->base : NULL;
+ const struct drm_connector_state *con_state = &dm_state->base;
struct dc_stream_state *stream = NULL;
- struct drm_display_mode mode = *drm_mode;
+ struct drm_display_mode mode;
struct drm_display_mode saved_mode;
struct drm_display_mode *freesync_mode = NULL;
bool native_mode_found = false;
bool recalculate_timing = false;
- bool scale = dm_state ? (dm_state->scaling != RMX_OFF) : false;
+ bool scale = dm_state->scaling != RMX_OFF;
int mode_refresh;
int preferred_refresh = 0;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
+ enum color_transfer_func tf = TRANSFER_FUNC_UNKNOWN;
+#if defined(CONFIG_DRM_AMD_DC_FP)
struct dsc_dec_dpcd_caps dsc_caps;
#endif
+ struct dc_link *link = NULL;
struct dc_sink *sink = NULL;
+ drm_mode_init(&mode, drm_mode);
memset(&saved_mode, 0, sizeof(saved_mode));
- if (aconnector == NULL) {
- DRM_ERROR("aconnector is NULL!\n");
- return stream;
- }
+ if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK) {
+ aconnector = NULL;
+ aconnector = to_amdgpu_dm_connector(connector);
+ link = aconnector->dc_link;
+ } else {
+ struct drm_writeback_connector *wbcon = NULL;
+ struct amdgpu_dm_wb_connector *dm_wbcon = NULL;
- drm_connector = &aconnector->base;
+ wbcon = drm_connector_to_writeback(connector);
+ dm_wbcon = to_amdgpu_dm_wb_connector(wbcon);
+ link = dm_wbcon->link;
+ }
- if (!aconnector->dc_sink) {
- sink = create_fake_sink(aconnector);
+ if (!aconnector || !aconnector->dc_sink) {
+ sink = create_fake_sink(dev, link);
if (!sink)
return stream;
+
} else {
sink = aconnector->dc_sink;
dc_sink_retain(sink);
@@ -6278,16 +7258,17 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
stream = dc_create_stream_for_sink(sink);
if (stream == NULL) {
- DRM_ERROR("Failed to create stream for sink!\n");
+ drm_err(dev, "Failed to create stream for sink!\n");
goto finish;
}
+ /* We leave this NULL for writeback connectors */
stream->dm_stream_context = aconnector;
stream->timing.flags.LTE_340MCSC_SCRAMBLE =
- drm_connector->display_info.hdmi.scdc.scrambling.low_rates;
+ connector->display_info.hdmi.scdc.scrambling.low_rates;
- list_for_each_entry(preferred_mode, &aconnector->base.modes, head) {
+ list_for_each_entry(preferred_mode, &connector->modes, head) {
/* Search for preferred mode */
if (preferred_mode->type & DRM_MODE_TYPE_PREFERRED) {
native_mode_found = true;
@@ -6296,7 +7277,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
}
if (!native_mode_found)
preferred_mode = list_first_entry_or_null(
- &aconnector->base.modes,
+ &connector->modes,
struct drm_display_mode,
head);
@@ -6307,19 +7288,21 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
* This may not be an error, the use case is when we have no
* usermode calls to reset and set mode upon hotplug. In this
* case, we call set mode ourselves to restore the previous mode
- * and the modelist may not be filled in in time.
+ * and the modelist may not be filled in time.
*/
- DRM_DEBUG_DRIVER("No preferred mode found\n");
- } else {
+ drm_dbg_driver(dev, "No preferred mode found\n");
+ } else if (aconnector) {
recalculate_timing = amdgpu_freesync_vid_mode &&
is_freesync_video_mode(&mode, aconnector);
if (recalculate_timing) {
freesync_mode = get_highest_refresh_rate_mode(aconnector, false);
- saved_mode = mode;
- mode = *freesync_mode;
+ drm_mode_copy(&saved_mode, &mode);
+ saved_mode.picture_aspect_ratio = mode.picture_aspect_ratio;
+ drm_mode_copy(&mode, freesync_mode);
+ mode.picture_aspect_ratio = saved_mode.picture_aspect_ratio;
} else {
decide_crtc_timing_for_drm_display_mode(
- &mode, preferred_mode, scale);
+ &mode, preferred_mode, scale);
preferred_refresh = drm_mode_vrefresh(preferred_mode);
}
@@ -6327,23 +7310,33 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
if (recalculate_timing)
drm_mode_set_crtcinfo(&saved_mode, 0);
- else if (!dm_state)
- drm_mode_set_crtcinfo(&mode, 0);
- /*
- * If scaling is enabled and refresh rate didn't change
- * we copy the vic and polarities of the old timings
- */
+ /*
+ * If scaling is enabled and refresh rate didn't change
+ * we copy the vic and polarities of the old timings
+ */
if (!scale || mode_refresh != preferred_refresh)
fill_stream_properties_from_drm_display_mode(
- stream, &mode, &aconnector->base, con_state, NULL,
+ stream, &mode, connector, con_state, NULL,
requested_bpc);
else
fill_stream_properties_from_drm_display_mode(
- stream, &mode, &aconnector->base, con_state, old_stream,
+ stream, &mode, connector, con_state, old_stream,
requested_bpc);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
+ /* The rest isn't needed for writeback connectors */
+ if (!aconnector)
+ goto finish;
+
+ if (aconnector->timing_changed) {
+ drm_dbg(aconnector->base.dev,
+ "overriding timing for automated test, bpc %d, changing to %d\n",
+ stream->timing.display_color_depth,
+ aconnector->timing_requested->display_color_depth);
+ stream->timing = *aconnector->timing_requested;
+ }
+
+#if defined(CONFIG_DRM_AMD_DC_FP)
/* SST DSC determination policy */
update_dsc_caps(aconnector, sink, stream, &dsc_caps);
if (aconnector->dsc_settings.dsc_force_enable != DSC_CLK_FORCE_DISABLE && dsc_caps.is_dsc_supported)
@@ -6354,7 +7347,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
fill_audio_info(
&stream->audio_info,
- drm_connector,
+ connector,
sink);
update_stream_signal(stream, sink);
@@ -6362,21 +7355,29 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
mod_build_hf_vsif_infopacket(stream, &stream->vsp_infopacket);
- if (stream->link->psr_settings.psr_feature_enabled) {
+ if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT ||
+ stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST ||
+ stream->signal == SIGNAL_TYPE_EDP) {
+ const struct dc_edid_caps *edid_caps;
+ unsigned int disable_colorimetry = 0;
+
+ if (aconnector->dc_sink) {
+ edid_caps = &aconnector->dc_sink->edid_caps;
+ disable_colorimetry = edid_caps->panel_patch.disable_colorimetry;
+ }
+
//
// should decide stream support vsc sdp colorimetry capability
// before building vsc info packet
//
- stream->use_vsc_sdp_for_colorimetry = false;
- if (aconnector->dc_sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
- stream->use_vsc_sdp_for_colorimetry =
- aconnector->dc_sink->is_vsc_sdp_colorimetry_supported;
- } else {
- if (stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED)
- stream->use_vsc_sdp_for_colorimetry = true;
- }
- mod_build_vsc_infopacket(stream, &stream->vsc_infopacket);
- aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY;
+ stream->use_vsc_sdp_for_colorimetry = stream->link->dpcd_caps.dpcd_rev.raw >= 0x14 &&
+ stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED &&
+ !disable_colorimetry;
+
+ if (stream->out_transfer_func.tf == TRANSFER_FUNCTION_GAMMA22)
+ tf = TRANSFER_FUNC_GAMMA_22;
+ mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space, tf);
+ aconnector->sr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY;
}
finish:
@@ -6385,208 +7386,117 @@ finish:
return stream;
}
-static void amdgpu_dm_crtc_destroy(struct drm_crtc *crtc)
-{
- drm_crtc_cleanup(crtc);
- kfree(crtc);
-}
-
-static void dm_crtc_destroy_state(struct drm_crtc *crtc,
- struct drm_crtc_state *state)
-{
- struct dm_crtc_state *cur = to_dm_crtc_state(state);
-
- /* TODO Destroy dc_stream objects are stream object is flattened */
- if (cur->stream)
- dc_stream_release(cur->stream);
-
-
- __drm_atomic_helper_crtc_destroy_state(state);
-
-
- kfree(state);
-}
-
-static void dm_crtc_reset_state(struct drm_crtc *crtc)
-{
- struct dm_crtc_state *state;
-
- if (crtc->state)
- dm_crtc_destroy_state(crtc, crtc->state);
-
- state = kzalloc(sizeof(*state), GFP_KERNEL);
- if (WARN_ON(!state))
- return;
-
- __drm_atomic_helper_crtc_reset(crtc, &state->base);
-}
-
-static struct drm_crtc_state *
-dm_crtc_duplicate_state(struct drm_crtc *crtc)
-{
- struct dm_crtc_state *state, *cur;
-
- cur = to_dm_crtc_state(crtc->state);
-
- if (WARN_ON(!crtc->state))
- return NULL;
-
- state = kzalloc(sizeof(*state), GFP_KERNEL);
- if (!state)
- return NULL;
-
- __drm_atomic_helper_crtc_duplicate_state(crtc, &state->base);
-
- if (cur->stream) {
- state->stream = cur->stream;
- dc_stream_retain(state->stream);
- }
-
- state->active_planes = cur->active_planes;
- state->vrr_infopacket = cur->vrr_infopacket;
- state->abm_level = cur->abm_level;
- state->vrr_supported = cur->vrr_supported;
- state->freesync_config = cur->freesync_config;
- state->cm_has_degamma = cur->cm_has_degamma;
- state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb;
- state->force_dpms_off = cur->force_dpms_off;
- /* TODO Duplicate dc_stream after objects are stream object is flattened */
-
- return &state->base;
-}
-
-#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
-static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc)
-{
- crtc_debugfs_init(crtc);
-
- return 0;
-}
-#endif
-
-static inline int dm_set_vupdate_irq(struct drm_crtc *crtc, bool enable)
+/**
+ * amdgpu_dm_connector_poll - Poll a connector to see if it's connected to a display
+ * @aconnector: DM connector to poll (owns @base drm_connector and @dc_link)
+ * @force: if true, force polling even when DAC load detection was used
+ *
+ * Used for connectors that don't support HPD (hotplug detection) to
+ * periodically check whether the connector is connected to a display.
+ *
+ * When connection was determined via DAC load detection, we avoid
+ * re-running it on normal polls to prevent visible glitches, unless
+ * @force is set.
+ *
+ * Return: The probed connector status (connected/disconnected/unknown).
+ */
+static enum drm_connector_status
+amdgpu_dm_connector_poll(struct amdgpu_dm_connector *aconnector, bool force)
{
- enum dc_irq_source irq_source;
- struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
- struct amdgpu_device *adev = drm_to_adev(crtc->dev);
- int rc;
+ struct drm_connector *connector = &aconnector->base;
+ struct drm_device *dev = connector->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct dc_link *link = aconnector->dc_link;
+ enum dc_connection_type conn_type = dc_connection_none;
+ enum drm_connector_status status = connector_status_disconnected;
- irq_source = IRQ_TYPE_VUPDATE + acrtc->otg_inst;
+ /* When we determined the connection using DAC load detection,
+ * do NOT poll the connector do detect disconnect because
+ * that would run DAC load detection again which can cause
+ * visible visual glitches.
+ *
+ * Only allow to poll such a connector again when forcing.
+ */
+ if (!force && link->local_sink && link->type == dc_connection_dac_load)
+ return connector->status;
- rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY;
+ mutex_lock(&aconnector->hpd_lock);
- DRM_DEBUG_VBL("crtc %d - vupdate irq %sabling: r=%d\n",
- acrtc->crtc_id, enable ? "en" : "dis", rc);
- return rc;
-}
+ if (dc_link_detect_connection_type(aconnector->dc_link, &conn_type) &&
+ conn_type != dc_connection_none) {
+ mutex_lock(&adev->dm.dc_lock);
-static inline int dm_set_vblank(struct drm_crtc *crtc, bool enable)
-{
- enum dc_irq_source irq_source;
- struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
- struct amdgpu_device *adev = drm_to_adev(crtc->dev);
- struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc->state);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- struct amdgpu_display_manager *dm = &adev->dm;
- struct vblank_control_work *work;
-#endif
- int rc = 0;
+ /* Only call full link detection when a sink isn't created yet,
+ * ie. just when the display is plugged in, otherwise we risk flickering.
+ */
+ if (link->local_sink ||
+ dc_link_detect(link, DETECT_REASON_HPD))
+ status = connector_status_connected;
- if (enable) {
- /* vblank irq on -> Only need vupdate irq in vrr mode */
- if (amdgpu_dm_vrr_active(acrtc_state))
- rc = dm_set_vupdate_irq(crtc, true);
- } else {
- /* vblank irq off -> vupdate irq off */
- rc = dm_set_vupdate_irq(crtc, false);
+ mutex_unlock(&adev->dm.dc_lock);
}
- if (rc)
- return rc;
-
- irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst;
-
- if (!dc_interrupt_set(adev->dm.dc, irq_source, enable))
- return -EBUSY;
-
- if (amdgpu_in_reset(adev))
- return 0;
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (dm->vblank_control_workqueue) {
- work = kzalloc(sizeof(*work), GFP_ATOMIC);
- if (!work)
- return -ENOMEM;
-
- INIT_WORK(&work->work, vblank_control_worker);
- work->dm = dm;
- work->acrtc = acrtc;
- work->enable = enable;
+ if (connector->status != status) {
+ if (status == connector_status_disconnected) {
+ if (link->local_sink)
+ dc_sink_release(link->local_sink);
- if (acrtc_state->stream) {
- dc_stream_retain(acrtc_state->stream);
- work->stream = acrtc_state->stream;
+ link->local_sink = NULL;
+ link->dpcd_sink_count = 0;
+ link->type = dc_connection_none;
}
- queue_work(dm->vblank_control_workqueue, &work->work);
+ amdgpu_dm_update_connector_after_detect(aconnector);
}
-#endif
-
- return 0;
-}
-static int dm_enable_vblank(struct drm_crtc *crtc)
-{
- return dm_set_vblank(crtc, true);
-}
-
-static void dm_disable_vblank(struct drm_crtc *crtc)
-{
- dm_set_vblank(crtc, false);
+ mutex_unlock(&aconnector->hpd_lock);
+ return status;
}
-/* Implemented only the options currently availible for the driver */
-static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = {
- .reset = dm_crtc_reset_state,
- .destroy = amdgpu_dm_crtc_destroy,
- .set_config = drm_atomic_helper_set_config,
- .page_flip = drm_atomic_helper_page_flip,
- .atomic_duplicate_state = dm_crtc_duplicate_state,
- .atomic_destroy_state = dm_crtc_destroy_state,
- .set_crc_source = amdgpu_dm_crtc_set_crc_source,
- .verify_crc_source = amdgpu_dm_crtc_verify_crc_source,
- .get_crc_sources = amdgpu_dm_crtc_get_crc_sources,
- .get_vblank_counter = amdgpu_get_vblank_counter_kms,
- .enable_vblank = dm_enable_vblank,
- .disable_vblank = dm_disable_vblank,
- .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp,
-#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
- .late_register = amdgpu_dm_crtc_late_register,
-#endif
-};
-
+/**
+ * amdgpu_dm_connector_detect() - Detect whether a DRM connector is connected to a display
+ *
+ * A connector is considered connected when it has a sink that is not NULL.
+ * For connectors that support HPD (hotplug detection), the connection is
+ * handled in the HPD interrupt.
+ * For connectors that may not support HPD, such as analog connectors,
+ * DRM will call this function repeatedly to poll them.
+ *
+ * Notes:
+ * 1. This interface is NOT called in context of HPD irq.
+ * 2. This interface *is called* in context of user-mode ioctl. Which
+ * makes it a bad place for *any* MST-related activity.
+ *
+ * @connector: The DRM connector we are checking. We convert it to
+ * amdgpu_dm_connector so we can read the DC link and state.
+ * @force: If true, do a full detect again. This is used even when
+ * a lighter check would normally be used to avoid flicker.
+ *
+ * Return: The connector status (connected, disconnected, or unknown).
+ *
+ */
static enum drm_connector_status
amdgpu_dm_connector_detect(struct drm_connector *connector, bool force)
{
- bool connected;
struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
- /*
- * Notes:
- * 1. This interface is NOT called in context of HPD irq.
- * 2. This interface *is called* in context of user-mode ioctl. Which
- * makes it a bad place for *any* MST-related activity.
- */
+ update_subconnector_property(aconnector);
- if (aconnector->base.force == DRM_FORCE_UNSPECIFIED &&
- !aconnector->fake_enable)
- connected = (aconnector->dc_sink != NULL);
- else
- connected = (aconnector->base.force == DRM_FORCE_ON);
+ if (aconnector->base.force == DRM_FORCE_ON ||
+ aconnector->base.force == DRM_FORCE_ON_DIGITAL)
+ return connector_status_connected;
+ else if (aconnector->base.force == DRM_FORCE_OFF)
+ return connector_status_disconnected;
- update_subconnector_property(aconnector);
+ /* Poll analog connectors and only when either
+ * disconnected or connected to an analog display.
+ */
+ if (drm_kms_helper_is_poll_worker() &&
+ dc_connector_supports_analog(aconnector->dc_link->link_id.id) &&
+ (!aconnector->dc_sink || aconnector->dc_sink->edid_caps.analog))
+ return amdgpu_dm_connector_poll(aconnector, force);
- return (connected ? connector_status_connected :
+ return (aconnector->dc_sink ? connector_status_connected :
connector_status_disconnected);
}
@@ -6638,7 +7548,18 @@ int amdgpu_dm_connector_atomic_set_property(struct drm_connector *connector,
dm_new_state->underscan_enable = val;
ret = 0;
} else if (property == adev->mode_info.abm_level_property) {
- dm_new_state->abm_level = val;
+ switch (val) {
+ case ABM_SYSFS_CONTROL:
+ dm_new_state->abm_sysfs_forbidden = false;
+ break;
+ case ABM_LEVEL_OFF:
+ dm_new_state->abm_sysfs_forbidden = true;
+ dm_new_state->abm_level = ABM_LEVEL_IMMEDIATE_DISABLE;
+ break;
+ default:
+ dm_new_state->abm_sysfs_forbidden = true;
+ dm_new_state->abm_level = val;
+ }
ret = 0;
}
@@ -6683,44 +7604,149 @@ int amdgpu_dm_connector_atomic_get_property(struct drm_connector *connector,
*val = dm_state->underscan_enable;
ret = 0;
} else if (property == adev->mode_info.abm_level_property) {
- *val = dm_state->abm_level;
+ if (!dm_state->abm_sysfs_forbidden)
+ *val = ABM_SYSFS_CONTROL;
+ else
+ *val = (dm_state->abm_level != ABM_LEVEL_IMMEDIATE_DISABLE) ?
+ dm_state->abm_level : 0;
ret = 0;
}
return ret;
}
+/**
+ * DOC: panel power savings
+ *
+ * The display manager allows you to set your desired **panel power savings**
+ * level (between 0-4, with 0 representing off), e.g. using the following::
+ *
+ * # echo 3 > /sys/class/drm/card0-eDP-1/amdgpu/panel_power_savings
+ *
+ * Modifying this value can have implications on color accuracy, so tread
+ * carefully.
+ */
+
+static ssize_t panel_power_savings_show(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_connector *connector = dev_get_drvdata(device);
+ struct drm_device *dev = connector->dev;
+ u8 val;
+
+ drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
+ val = to_dm_connector_state(connector->state)->abm_level ==
+ ABM_LEVEL_IMMEDIATE_DISABLE ? 0 :
+ to_dm_connector_state(connector->state)->abm_level;
+ drm_modeset_unlock(&dev->mode_config.connection_mutex);
+
+ return sysfs_emit(buf, "%u\n", val);
+}
+
+static ssize_t panel_power_savings_store(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct drm_connector *connector = dev_get_drvdata(device);
+ struct drm_device *dev = connector->dev;
+ long val;
+ int ret;
+
+ ret = kstrtol(buf, 0, &val);
+
+ if (ret)
+ return ret;
+
+ if (val < 0 || val > 4)
+ return -EINVAL;
+
+ drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
+ if (to_dm_connector_state(connector->state)->abm_sysfs_forbidden)
+ ret = -EBUSY;
+ else
+ to_dm_connector_state(connector->state)->abm_level = val ?:
+ ABM_LEVEL_IMMEDIATE_DISABLE;
+ drm_modeset_unlock(&dev->mode_config.connection_mutex);
+
+ if (ret)
+ return ret;
+
+ drm_kms_helper_hotplug_event(dev);
+
+ return count;
+}
+
+static DEVICE_ATTR_RW(panel_power_savings);
+
+static struct attribute *amdgpu_attrs[] = {
+ &dev_attr_panel_power_savings.attr,
+ NULL
+};
+
+static const struct attribute_group amdgpu_group = {
+ .name = "amdgpu",
+ .attrs = amdgpu_attrs
+};
+
+static bool
+amdgpu_dm_should_create_sysfs(struct amdgpu_dm_connector *amdgpu_dm_connector)
+{
+ if (amdgpu_dm_abm_level >= 0)
+ return false;
+
+ if (amdgpu_dm_connector->base.connector_type != DRM_MODE_CONNECTOR_eDP)
+ return false;
+
+ /* check for OLED panels */
+ if (amdgpu_dm_connector->bl_idx >= 0) {
+ struct drm_device *drm = amdgpu_dm_connector->base.dev;
+ struct amdgpu_display_manager *dm = &drm_to_adev(drm)->dm;
+ struct amdgpu_dm_backlight_caps *caps;
+
+ caps = &dm->backlight_caps[amdgpu_dm_connector->bl_idx];
+ if (caps->aux_support)
+ return false;
+ }
+
+ return true;
+}
+
static void amdgpu_dm_connector_unregister(struct drm_connector *connector)
{
struct amdgpu_dm_connector *amdgpu_dm_connector = to_amdgpu_dm_connector(connector);
+ if (amdgpu_dm_should_create_sysfs(amdgpu_dm_connector))
+ sysfs_remove_group(&connector->kdev->kobj, &amdgpu_group);
+
+ cec_notifier_conn_unregister(amdgpu_dm_connector->notifier);
drm_dp_aux_unregister(&amdgpu_dm_connector->dm_dp_aux.aux);
}
static void amdgpu_dm_connector_destroy(struct drm_connector *connector)
{
struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
- const struct dc_link *link = aconnector->dc_link;
struct amdgpu_device *adev = drm_to_adev(connector->dev);
struct amdgpu_display_manager *dm = &adev->dm;
- int i;
/*
- * Call only if mst_mgr was iniitalized before since it's not done
+ * Call only if mst_mgr was initialized before since it's not done
* for all connector types.
*/
if (aconnector->mst_mgr.dev)
drm_dp_mst_topology_mgr_destroy(&aconnector->mst_mgr);
-#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||\
- defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
- for (i = 0; i < dm->num_of_edps; i++) {
- if ((link == dm->backlight_link[i]) && dm->backlight_dev[i]) {
- backlight_device_unregister(dm->backlight_dev[i]);
- dm->backlight_dev[i] = NULL;
- }
+ /* Cancel and flush any pending HDMI HPD debounce work */
+ cancel_delayed_work_sync(&aconnector->hdmi_hpd_debounce_work);
+ if (aconnector->hdmi_prev_sink) {
+ dc_sink_release(aconnector->hdmi_prev_sink);
+ aconnector->hdmi_prev_sink = NULL;
+ }
+
+ if (aconnector->bl_idx != -1) {
+ backlight_device_unregister(dm->backlight_dev[aconnector->bl_idx]);
+ dm->backlight_dev[aconnector->bl_idx] = NULL;
}
-#endif
if (aconnector->dc_em_sink)
dc_sink_release(aconnector->dc_em_sink);
@@ -6732,10 +7758,6 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector)
drm_dp_cec_unregister_connector(&aconnector->dm_dp_aux.aux);
drm_connector_unregister(connector);
drm_connector_cleanup(connector);
- if (aconnector->i2c) {
- i2c_del_adapter(&aconnector->i2c->base);
- kfree(aconnector->i2c);
- }
kfree(aconnector->dm_dp_aux.aux.name);
kfree(connector);
@@ -6761,8 +7783,13 @@ void amdgpu_dm_connector_funcs_reset(struct drm_connector *connector)
state->base.max_requested_bpc = 8;
state->vcpi_slots = 0;
state->pbn = 0;
- if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
- state->abm_level = amdgpu_dm_abm_level;
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
+ if (amdgpu_dm_abm_level <= 0)
+ state->abm_level = ABM_LEVEL_IMMEDIATE_DISABLE;
+ else
+ state->abm_level = amdgpu_dm_abm_level;
+ }
__drm_atomic_helper_connector_reset(connector, &state->base);
}
@@ -6800,6 +7827,15 @@ amdgpu_dm_connector_late_register(struct drm_connector *connector)
to_amdgpu_dm_connector(connector);
int r;
+ if (amdgpu_dm_should_create_sysfs(amdgpu_dm_connector)) {
+ r = sysfs_create_group(&connector->kdev->kobj,
+ &amdgpu_group);
+ if (r)
+ return r;
+ }
+
+ amdgpu_dm_register_backlight_device(amdgpu_dm_connector);
+
if ((connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) ||
(connector->connector_type == DRM_MODE_CONNECTOR_eDP)) {
amdgpu_dm_connector->dm_dp_aux.aux.dev = connector->kdev;
@@ -6815,6 +7851,43 @@ amdgpu_dm_connector_late_register(struct drm_connector *connector)
return 0;
}
+static void amdgpu_dm_connector_funcs_force(struct drm_connector *connector)
+{
+ struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+ struct dc_link *dc_link = aconnector->dc_link;
+ struct dc_sink *dc_em_sink = aconnector->dc_em_sink;
+ const struct drm_edid *drm_edid;
+ struct i2c_adapter *ddc;
+ struct drm_device *dev = connector->dev;
+
+ if (dc_link && dc_link->aux_mode)
+ ddc = &aconnector->dm_dp_aux.aux.ddc;
+ else
+ ddc = &aconnector->i2c->base;
+
+ drm_edid = drm_edid_read_ddc(connector, ddc);
+ drm_edid_connector_update(connector, drm_edid);
+ if (!drm_edid) {
+ drm_err(dev, "No EDID found on connector: %s.\n", connector->name);
+ return;
+ }
+
+ aconnector->drm_edid = drm_edid;
+ /* Update emulated (virtual) sink's EDID */
+ if (dc_em_sink && dc_link) {
+ // FIXME: Get rid of drm_edid_raw()
+ const struct edid *edid = drm_edid_raw(drm_edid);
+
+ memset(&dc_em_sink->edid_caps, 0, sizeof(struct dc_edid_caps));
+ memmove(dc_em_sink->dc_edid.raw_edid, edid,
+ (edid->extensions + 1) * EDID_LENGTH);
+ dm_helpers_parse_edid_caps(
+ dc_link,
+ &dc_em_sink->dc_edid,
+ &dc_em_sink->edid_caps);
+ }
+}
+
static const struct drm_connector_funcs amdgpu_dm_connector_funcs = {
.reset = amdgpu_dm_connector_funcs_reset,
.detect = amdgpu_dm_connector_detect,
@@ -6825,7 +7898,8 @@ static const struct drm_connector_funcs amdgpu_dm_connector_funcs = {
.atomic_set_property = amdgpu_dm_connector_atomic_set_property,
.atomic_get_property = amdgpu_dm_connector_atomic_get_property,
.late_register = amdgpu_dm_connector_late_register,
- .early_unregister = amdgpu_dm_connector_unregister
+ .early_unregister = amdgpu_dm_connector_unregister,
+ .force = amdgpu_dm_connector_funcs_force
};
static int get_modes(struct drm_connector *connector)
@@ -6835,25 +7909,34 @@ static int get_modes(struct drm_connector *connector)
static void create_eml_sink(struct amdgpu_dm_connector *aconnector)
{
+ struct drm_connector *connector = &aconnector->base;
+ struct dc_link *dc_link = aconnector->dc_link;
struct dc_sink_init_data init_params = {
.link = aconnector->dc_link,
.sink_signal = SIGNAL_TYPE_VIRTUAL
};
- struct edid *edid;
+ const struct drm_edid *drm_edid;
+ const struct edid *edid;
+ struct i2c_adapter *ddc;
- if (!aconnector->base.edid_blob_ptr) {
- DRM_ERROR("No EDID firmware found on connector: %s ,forcing to OFF!\n",
- aconnector->base.name);
+ if (dc_link && dc_link->aux_mode)
+ ddc = &aconnector->dm_dp_aux.aux.ddc;
+ else
+ ddc = &aconnector->i2c->base;
- aconnector->base.force = DRM_FORCE_OFF;
- aconnector->base.override_edid = false;
+ drm_edid = drm_edid_read_ddc(connector, ddc);
+ drm_edid_connector_update(connector, drm_edid);
+ if (!drm_edid) {
+ drm_err(connector->dev, "No EDID found on connector: %s.\n", connector->name);
return;
}
- edid = (struct edid *) aconnector->base.edid_blob_ptr->data;
+ if (connector->display_info.is_hdmi)
+ init_params.sink_signal = SIGNAL_TYPE_HDMI_TYPE_A;
- aconnector->edid = edid;
+ aconnector->drm_edid = drm_edid;
+ edid = drm_edid_raw(drm_edid); // FIXME: Get rid of drm_edid_raw()
aconnector->dc_em_sink = dc_link_add_remote_sink(
aconnector->dc_link,
(uint8_t *)edid,
@@ -6864,7 +7947,8 @@ static void create_eml_sink(struct amdgpu_dm_connector *aconnector)
aconnector->dc_sink = aconnector->dc_link->local_sink ?
aconnector->dc_link->local_sink :
aconnector->dc_em_sink;
- dc_sink_retain(aconnector->dc_sink);
+ if (aconnector->dc_sink)
+ dc_sink_retain(aconnector->dc_sink);
}
}
@@ -6881,41 +7965,125 @@ static void handle_edid_mgmt(struct amdgpu_dm_connector *aconnector)
link->verified_link_cap.link_rate = LINK_RATE_HIGH2;
}
-
- aconnector->base.override_edid = true;
create_eml_sink(aconnector);
}
-static struct dc_stream_state *
-create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector,
+static enum dc_status dm_validate_stream_and_context(struct dc *dc,
+ struct dc_stream_state *stream)
+{
+ enum dc_status dc_result = DC_ERROR_UNEXPECTED;
+ struct dc_plane_state *dc_plane_state = NULL;
+ struct dc_state *dc_state = NULL;
+
+ if (!stream)
+ goto cleanup;
+
+ dc_plane_state = dc_create_plane_state(dc);
+ if (!dc_plane_state)
+ goto cleanup;
+
+ dc_state = dc_state_create(dc, NULL);
+ if (!dc_state)
+ goto cleanup;
+
+ /* populate stream to plane */
+ dc_plane_state->src_rect.height = stream->src.height;
+ dc_plane_state->src_rect.width = stream->src.width;
+ dc_plane_state->dst_rect.height = stream->src.height;
+ dc_plane_state->dst_rect.width = stream->src.width;
+ dc_plane_state->clip_rect.height = stream->src.height;
+ dc_plane_state->clip_rect.width = stream->src.width;
+ dc_plane_state->plane_size.surface_pitch = ((stream->src.width + 255) / 256) * 256;
+ dc_plane_state->plane_size.surface_size.height = stream->src.height;
+ dc_plane_state->plane_size.surface_size.width = stream->src.width;
+ dc_plane_state->plane_size.chroma_size.height = stream->src.height;
+ dc_plane_state->plane_size.chroma_size.width = stream->src.width;
+ dc_plane_state->format = SURFACE_PIXEL_FORMAT_GRPH_ARGB8888;
+ dc_plane_state->tiling_info.gfx9.swizzle = DC_SW_UNKNOWN;
+ dc_plane_state->rotation = ROTATION_ANGLE_0;
+ dc_plane_state->is_tiling_rotated = false;
+ dc_plane_state->tiling_info.gfx8.array_mode = DC_ARRAY_LINEAR_GENERAL;
+
+ dc_result = dc_validate_stream(dc, stream);
+ if (dc_result == DC_OK)
+ dc_result = dc_validate_plane(dc, dc_plane_state);
+
+ if (dc_result == DC_OK)
+ dc_result = dc_state_add_stream(dc, dc_state, stream);
+
+ if (dc_result == DC_OK && !dc_state_add_plane(
+ dc,
+ stream,
+ dc_plane_state,
+ dc_state))
+ dc_result = DC_FAIL_ATTACH_SURFACES;
+
+ if (dc_result == DC_OK)
+ dc_result = dc_validate_global_state(dc, dc_state, DC_VALIDATE_MODE_ONLY);
+
+cleanup:
+ if (dc_state)
+ dc_state_release(dc_state);
+
+ if (dc_plane_state)
+ dc_plane_state_release(dc_plane_state);
+
+ return dc_result;
+}
+
+struct dc_stream_state *
+create_validate_stream_for_sink(struct drm_connector *connector,
const struct drm_display_mode *drm_mode,
const struct dm_connector_state *dm_state,
const struct dc_stream_state *old_stream)
{
- struct drm_connector *connector = &aconnector->base;
+ struct amdgpu_dm_connector *aconnector = NULL;
struct amdgpu_device *adev = drm_to_adev(connector->dev);
struct dc_stream_state *stream;
const struct drm_connector_state *drm_state = dm_state ? &dm_state->base : NULL;
int requested_bpc = drm_state ? drm_state->max_requested_bpc : 8;
enum dc_status dc_result = DC_OK;
+ uint8_t bpc_limit = 6;
+
+ if (!dm_state)
+ return NULL;
+
+ if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK)
+ aconnector = to_amdgpu_dm_connector(connector);
+
+ if (aconnector &&
+ (aconnector->dc_link->connector_signal == SIGNAL_TYPE_HDMI_TYPE_A ||
+ aconnector->dc_link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER))
+ bpc_limit = 8;
do {
- stream = create_stream_for_sink(aconnector, drm_mode,
+ drm_dbg_kms(connector->dev, "Trying with %d bpc\n", requested_bpc);
+ stream = create_stream_for_sink(connector, drm_mode,
dm_state, old_stream,
requested_bpc);
if (stream == NULL) {
- DRM_ERROR("Failed to create stream for sink!\n");
+ drm_err(adev_to_drm(adev), "Failed to create stream for sink!\n");
break;
}
dc_result = dc_validate_stream(adev->dm.dc, stream);
+ if (!aconnector) /* writeback connector */
+ return stream;
+
+ if (dc_result == DC_OK && stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
+ dc_result = dm_dp_mst_is_port_support_mode(aconnector, stream);
+
+ if (dc_result == DC_OK)
+ dc_result = dm_validate_stream_and_context(adev->dm.dc, stream);
+
if (dc_result != DC_OK) {
- DRM_DEBUG_KMS("Mode %dx%d (clk %d) failed DC validation with error %d (%s)\n",
+ DRM_DEBUG_KMS("Pruned mode %d x %d (clk %d) %s %s -- %s\n",
drm_mode->hdisplay,
drm_mode->vdisplay,
drm_mode->clock,
- dc_result,
+ dc_pixel_encoding_to_str(stream->timing.pixel_encoding),
+ dc_color_depth_to_str(stream->timing.display_color_depth),
dc_status_to_str(dc_result));
dc_stream_release(stream);
@@ -6923,27 +8091,59 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector,
requested_bpc -= 2; /* lower bpc to retry validation */
}
- } while (stream == NULL && requested_bpc >= 6);
-
- if (dc_result == DC_FAIL_ENC_VALIDATE && !aconnector->force_yuv420_output) {
- DRM_DEBUG_KMS("Retry forcing YCbCr420 encoding\n");
+ } while (stream == NULL && requested_bpc >= bpc_limit);
- aconnector->force_yuv420_output = true;
- stream = create_validate_stream_for_sink(aconnector, drm_mode,
- dm_state, old_stream);
+ switch (dc_result) {
+ /*
+ * If we failed to validate DP bandwidth stream with the requested RGB color depth,
+ * we try to fallback and configure in order:
+ * YUV422 (8bpc, 6bpc)
+ * YUV420 (8bpc, 6bpc)
+ */
+ case DC_FAIL_ENC_VALIDATE:
+ case DC_EXCEED_DONGLE_CAP:
+ case DC_NO_DP_LINK_BANDWIDTH:
+ /* recursively entered twice and already tried both YUV422 and YUV420 */
+ if (aconnector->force_yuv422_output && aconnector->force_yuv420_output)
+ break;
+ /* first failure; try YUV422 */
+ if (!aconnector->force_yuv422_output) {
+ drm_dbg_kms(connector->dev, "%s:%d Validation failed with %d, retrying w/ YUV422\n",
+ __func__, __LINE__, dc_result);
+ aconnector->force_yuv422_output = true;
+ /* recursively entered and YUV422 failed, try YUV420 */
+ } else if (!aconnector->force_yuv420_output) {
+ drm_dbg_kms(connector->dev, "%s:%d Validation failed with %d, retrying w/ YUV420\n",
+ __func__, __LINE__, dc_result);
+ aconnector->force_yuv420_output = true;
+ }
+ stream = create_validate_stream_for_sink(connector, drm_mode,
+ dm_state, old_stream);
+ aconnector->force_yuv422_output = false;
aconnector->force_yuv420_output = false;
+ break;
+ case DC_OK:
+ break;
+ default:
+ drm_dbg_kms(connector->dev, "%s:%d Unhandled validation failure %d\n",
+ __func__, __LINE__, dc_result);
+ break;
}
return stream;
}
enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connector,
- struct drm_display_mode *mode)
+ const struct drm_display_mode *mode)
{
int result = MODE_ERROR;
struct dc_sink *dc_sink;
+ struct drm_display_mode *test_mode;
/* TODO: Unhardcode stream count */
struct dc_stream_state *stream;
+ /* we always have an amdgpu_dm_connector here since we got
+ * here via the amdgpu_dm_connector_helper_funcs
+ */
struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
if ((mode->flags & DRM_MODE_FLAG_INTERLACE) ||
@@ -6962,11 +8162,20 @@ enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connec
if (dc_sink == NULL && aconnector->base.force != DRM_FORCE_ON_DIGITAL &&
aconnector->base.force != DRM_FORCE_ON) {
- DRM_ERROR("dc_sink is NULL!\n");
+ drm_err(connector->dev, "dc_sink is NULL!\n");
goto fail;
}
- stream = create_validate_stream_for_sink(aconnector, mode, NULL, NULL);
+ test_mode = drm_mode_duplicate(connector->dev, mode);
+ if (!test_mode)
+ goto fail;
+
+ drm_mode_set_crtcinfo(test_mode, 0);
+
+ stream = create_validate_stream_for_sink(connector, test_mode,
+ to_dm_connector_state(connector->state),
+ NULL);
+ drm_mode_destroy(connector->dev, test_mode);
if (stream) {
dc_stream_release(stream);
result = MODE_OK;
@@ -7046,13 +8255,47 @@ amdgpu_dm_connector_atomic_check(struct drm_connector *conn,
drm_atomic_get_old_connector_state(state, conn);
struct drm_crtc *crtc = new_con_state->crtc;
struct drm_crtc_state *new_crtc_state;
+ struct amdgpu_dm_connector *aconn = to_amdgpu_dm_connector(conn);
int ret;
+ if (WARN_ON(unlikely(!old_con_state || !new_con_state)))
+ return -EINVAL;
+
trace_amdgpu_dm_connector_atomic_check(new_con_state);
+ if (conn->connector_type == DRM_MODE_CONNECTOR_DisplayPort) {
+ ret = drm_dp_mst_root_conn_atomic_check(new_con_state, &aconn->mst_mgr);
+ if (ret < 0)
+ return ret;
+ }
+
if (!crtc)
return 0;
+ if (new_con_state->privacy_screen_sw_state != old_con_state->privacy_screen_sw_state) {
+ new_crtc_state = drm_atomic_get_crtc_state(state, crtc);
+ if (IS_ERR(new_crtc_state))
+ return PTR_ERR(new_crtc_state);
+
+ new_crtc_state->mode_changed = true;
+ }
+
+ if (new_con_state->colorspace != old_con_state->colorspace) {
+ new_crtc_state = drm_atomic_get_crtc_state(state, crtc);
+ if (IS_ERR(new_crtc_state))
+ return PTR_ERR(new_crtc_state);
+
+ new_crtc_state->mode_changed = true;
+ }
+
+ if (new_con_state->content_type != old_con_state->content_type) {
+ new_crtc_state = drm_atomic_get_crtc_state(state, crtc);
+ if (IS_ERR(new_crtc_state))
+ return PTR_ERR(new_crtc_state);
+
+ new_crtc_state->mode_changed = true;
+ }
+
if (!drm_connector_atomic_hdr_metadata_equal(old_con_state, new_con_state)) {
struct dc_info_packet hdr_infopacket;
@@ -7075,7 +8318,7 @@ amdgpu_dm_connector_atomic_check(struct drm_connector *conn,
* set is permissible, however. So only force a
* modeset if we're entering or exiting HDR.
*/
- new_crtc_state->mode_changed =
+ new_crtc_state->mode_changed = new_crtc_state->mode_changed ||
!old_con_state->hdr_output_metadata ||
!new_con_state->hdr_output_metadata;
}
@@ -7096,136 +8339,29 @@ amdgpu_dm_connector_helper_funcs = {
.atomic_check = amdgpu_dm_connector_atomic_check,
};
-static void dm_crtc_helper_disable(struct drm_crtc *crtc)
-{
-}
-
-static int count_crtc_active_planes(struct drm_crtc_state *new_crtc_state)
-{
- struct drm_atomic_state *state = new_crtc_state->state;
- struct drm_plane *plane;
- int num_active = 0;
-
- drm_for_each_plane_mask(plane, state->dev, new_crtc_state->plane_mask) {
- struct drm_plane_state *new_plane_state;
-
- /* Cursor planes are "fake". */
- if (plane->type == DRM_PLANE_TYPE_CURSOR)
- continue;
-
- new_plane_state = drm_atomic_get_new_plane_state(state, plane);
-
- if (!new_plane_state) {
- /*
- * The plane is enable on the CRTC and hasn't changed
- * state. This means that it previously passed
- * validation and is therefore enabled.
- */
- num_active += 1;
- continue;
- }
-
- /* We need a framebuffer to be considered enabled. */
- num_active += (new_plane_state->fb != NULL);
- }
-
- return num_active;
-}
-
-static void dm_update_crtc_active_planes(struct drm_crtc *crtc,
- struct drm_crtc_state *new_crtc_state)
-{
- struct dm_crtc_state *dm_new_crtc_state =
- to_dm_crtc_state(new_crtc_state);
-
- dm_new_crtc_state->active_planes = 0;
-
- if (!dm_new_crtc_state->stream)
- return;
-
- dm_new_crtc_state->active_planes =
- count_crtc_active_planes(new_crtc_state);
-}
-
-static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc,
- struct drm_atomic_state *state)
-{
- struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state,
- crtc);
- struct amdgpu_device *adev = drm_to_adev(crtc->dev);
- struct dc *dc = adev->dm.dc;
- struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state);
- int ret = -EINVAL;
-
- trace_amdgpu_dm_crtc_atomic_check(crtc_state);
-
- dm_update_crtc_active_planes(crtc, crtc_state);
-
- if (WARN_ON(unlikely(!dm_crtc_state->stream &&
- modeset_required(crtc_state, NULL, dm_crtc_state->stream)))) {
- return ret;
- }
-
- /*
- * We require the primary plane to be enabled whenever the CRTC is, otherwise
- * drm_mode_cursor_universal may end up trying to enable the cursor plane while all other
- * planes are disabled, which is not supported by the hardware. And there is legacy
- * userspace which stops using the HW cursor altogether in response to the resulting EINVAL.
- */
- if (crtc_state->enable &&
- !(crtc_state->plane_mask & drm_plane_mask(crtc->primary))) {
- DRM_DEBUG_ATOMIC("Can't enable a CRTC without enabling the primary plane\n");
- return -EINVAL;
- }
-
- /* In some use cases, like reset, no stream is attached */
- if (!dm_crtc_state->stream)
- return 0;
-
- if (dc_validate_stream(dc, dm_crtc_state->stream) == DC_OK)
- return 0;
-
- DRM_DEBUG_ATOMIC("Failed DC stream validation\n");
- return ret;
-}
-
-static bool dm_crtc_helper_mode_fixup(struct drm_crtc *crtc,
- const struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode)
-{
- return true;
-}
-
-static const struct drm_crtc_helper_funcs amdgpu_dm_crtc_helper_funcs = {
- .disable = dm_crtc_helper_disable,
- .atomic_check = dm_crtc_helper_atomic_check,
- .mode_fixup = dm_crtc_helper_mode_fixup,
- .get_scanout_position = amdgpu_crtc_get_scanout_position,
-};
-
static void dm_encoder_helper_disable(struct drm_encoder *encoder)
{
}
-static int convert_dc_color_depth_into_bpc (enum dc_color_depth display_color_depth)
+int convert_dc_color_depth_into_bpc(enum dc_color_depth display_color_depth)
{
switch (display_color_depth) {
- case COLOR_DEPTH_666:
- return 6;
- case COLOR_DEPTH_888:
- return 8;
- case COLOR_DEPTH_101010:
- return 10;
- case COLOR_DEPTH_121212:
- return 12;
- case COLOR_DEPTH_141414:
- return 14;
- case COLOR_DEPTH_161616:
- return 16;
- default:
- break;
- }
+ case COLOR_DEPTH_666:
+ return 6;
+ case COLOR_DEPTH_888:
+ return 8;
+ case COLOR_DEPTH_101010:
+ return 10;
+ case COLOR_DEPTH_121212:
+ return 12;
+ case COLOR_DEPTH_141414:
+ return 14;
+ case COLOR_DEPTH_161616:
+ return 16;
+ default:
+ break;
+ }
return 0;
}
@@ -7240,35 +8376,59 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder,
const struct drm_display_mode *adjusted_mode = &crtc_state->adjusted_mode;
struct drm_dp_mst_topology_mgr *mst_mgr;
struct drm_dp_mst_port *mst_port;
+ struct drm_dp_mst_topology_state *mst_state;
enum dc_color_depth color_depth;
int clock, bpp = 0;
bool is_y420 = false;
- if (!aconnector->port || !aconnector->dc_sink)
+ if ((connector->connector_type == DRM_MODE_CONNECTOR_eDP) ||
+ (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)) {
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode;
+ enum drm_mode_status result;
+
+ result = drm_crtc_helper_mode_valid_fixed(encoder->crtc, adjusted_mode, native_mode);
+ if (result != MODE_OK && dm_new_connector_state->scaling == RMX_OFF) {
+ drm_dbg_driver(encoder->dev,
+ "mode %dx%d@%dHz is not native, enabling scaling\n",
+ adjusted_mode->hdisplay, adjusted_mode->vdisplay,
+ drm_mode_vrefresh(adjusted_mode));
+ dm_new_connector_state->scaling = RMX_ASPECT;
+ }
return 0;
+ }
- mst_port = aconnector->port;
- mst_mgr = &aconnector->mst_port->mst_mgr;
+ if (!aconnector->mst_output_port)
+ return 0;
+
+ mst_port = aconnector->mst_output_port;
+ mst_mgr = &aconnector->mst_root->mst_mgr;
if (!crtc_state->connectors_changed && !crtc_state->mode_changed)
return 0;
+ mst_state = drm_atomic_get_mst_topology_state(state, mst_mgr);
+ if (IS_ERR(mst_state))
+ return PTR_ERR(mst_state);
+
+ mst_state->pbn_div.full = dm_mst_get_pbn_divider(aconnector->mst_root->dc_link);
+
if (!state->duplicated) {
int max_bpc = conn_state->max_requested_bpc;
+
is_y420 = drm_mode_is_420_also(&connector->display_info, adjusted_mode) &&
- aconnector->force_yuv420_output;
+ aconnector->force_yuv420_output;
color_depth = convert_color_depth_from_display_info(connector,
is_y420,
max_bpc);
bpp = convert_dc_color_depth_into_bpc(color_depth) * 3;
clock = adjusted_mode->clock;
- dm_new_connector_state->pbn = drm_dp_calc_pbn_mode(clock, bpp, false);
+ dm_new_connector_state->pbn = drm_dp_calc_pbn_mode(clock, bpp << 4);
}
- dm_new_connector_state->vcpi_slots = drm_dp_atomic_find_vcpi_slots(state,
- mst_mgr,
- mst_port,
- dm_new_connector_state->pbn,
- dm_mst_get_pbn_divider(aconnector->dc_link));
+
+ dm_new_connector_state->vcpi_slots =
+ drm_dp_atomic_find_time_slots(state, mst_mgr, mst_port,
+ dm_new_connector_state->pbn);
if (dm_new_connector_state->vcpi_slots < 0) {
DRM_DEBUG_ATOMIC("failed finding vcpi slots: %d\n", (int)dm_new_connector_state->vcpi_slots);
return dm_new_connector_state->vcpi_slots;
@@ -7281,7 +8441,6 @@ const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs = {
.atomic_check = dm_encoder_helper_atomic_check
};
-#if defined(CONFIG_DRM_AMD_DC_DCN)
static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state,
struct dc_state *dc_state,
struct dsc_mst_fairness_vars *vars)
@@ -7291,14 +8450,17 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state,
struct drm_connector_state *new_con_state;
struct amdgpu_dm_connector *aconnector;
struct dm_connector_state *dm_conn_state;
- int i, j;
- int vcpi, pbn_div, pbn, slot_num = 0;
+ int i, j, ret;
+ int vcpi, pbn_div, pbn = 0, slot_num = 0;
for_each_new_connector_in_state(state, connector, new_con_state, i) {
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
aconnector = to_amdgpu_dm_connector(connector);
- if (!aconnector->port)
+ if (!aconnector->mst_output_port)
continue;
if (!new_con_state || !new_con_state->crtc)
@@ -7311,7 +8473,7 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state,
if (!stream)
continue;
- if ((struct amdgpu_dm_connector*)stream->dm_stream_context == aconnector)
+ if ((struct amdgpu_dm_connector *)stream->dm_stream_context == aconnector)
break;
stream = NULL;
@@ -7329,7 +8491,7 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state,
}
}
- if (j == dc_state->stream_count)
+ if (j == dc_state->stream_count || pbn_div == 0)
continue;
slot_num = DIV_ROUND_UP(pbn, pbn_div);
@@ -7338,18 +8500,15 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state,
dm_conn_state->pbn = pbn;
dm_conn_state->vcpi_slots = slot_num;
- drm_dp_mst_atomic_enable_dsc(state,
- aconnector->port,
- dm_conn_state->pbn,
- 0,
- false);
+ ret = drm_dp_mst_atomic_enable_dsc(state, aconnector->mst_output_port,
+ dm_conn_state->pbn, false);
+ if (ret < 0)
+ return ret;
+
continue;
}
- vcpi = drm_dp_mst_atomic_enable_dsc(state,
- aconnector->port,
- pbn, pbn_div,
- true);
+ vcpi = drm_dp_mst_atomic_enable_dsc(state, aconnector->mst_output_port, pbn, true);
if (vcpi < 0)
return vcpi;
@@ -7358,525 +8517,8 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state,
}
return 0;
}
-#endif
-
-static void dm_drm_plane_reset(struct drm_plane *plane)
-{
- struct dm_plane_state *amdgpu_state = NULL;
-
- if (plane->state)
- plane->funcs->atomic_destroy_state(plane, plane->state);
-
- amdgpu_state = kzalloc(sizeof(*amdgpu_state), GFP_KERNEL);
- WARN_ON(amdgpu_state == NULL);
-
- if (amdgpu_state)
- __drm_atomic_helper_plane_reset(plane, &amdgpu_state->base);
-}
-
-static struct drm_plane_state *
-dm_drm_plane_duplicate_state(struct drm_plane *plane)
-{
- struct dm_plane_state *dm_plane_state, *old_dm_plane_state;
-
- old_dm_plane_state = to_dm_plane_state(plane->state);
- dm_plane_state = kzalloc(sizeof(*dm_plane_state), GFP_KERNEL);
- if (!dm_plane_state)
- return NULL;
-
- __drm_atomic_helper_plane_duplicate_state(plane, &dm_plane_state->base);
-
- if (old_dm_plane_state->dc_state) {
- dm_plane_state->dc_state = old_dm_plane_state->dc_state;
- dc_plane_state_retain(dm_plane_state->dc_state);
- }
-
- return &dm_plane_state->base;
-}
-
-static void dm_drm_plane_destroy_state(struct drm_plane *plane,
- struct drm_plane_state *state)
-{
- struct dm_plane_state *dm_plane_state = to_dm_plane_state(state);
-
- if (dm_plane_state->dc_state)
- dc_plane_state_release(dm_plane_state->dc_state);
-
- drm_atomic_helper_plane_destroy_state(plane, state);
-}
-
-static const struct drm_plane_funcs dm_plane_funcs = {
- .update_plane = drm_atomic_helper_update_plane,
- .disable_plane = drm_atomic_helper_disable_plane,
- .destroy = drm_primary_helper_destroy,
- .reset = dm_drm_plane_reset,
- .atomic_duplicate_state = dm_drm_plane_duplicate_state,
- .atomic_destroy_state = dm_drm_plane_destroy_state,
- .format_mod_supported = dm_plane_format_mod_supported,
-};
-
-static int dm_plane_helper_prepare_fb(struct drm_plane *plane,
- struct drm_plane_state *new_state)
-{
- struct amdgpu_framebuffer *afb;
- struct drm_gem_object *obj;
- struct amdgpu_device *adev;
- struct amdgpu_bo *rbo;
- struct dm_plane_state *dm_plane_state_new, *dm_plane_state_old;
- struct list_head list;
- struct ttm_validate_buffer tv;
- struct ww_acquire_ctx ticket;
- uint32_t domain;
- int r;
-
- if (!new_state->fb) {
- DRM_DEBUG_KMS("No FB bound\n");
- return 0;
- }
-
- afb = to_amdgpu_framebuffer(new_state->fb);
- obj = new_state->fb->obj[0];
- rbo = gem_to_amdgpu_bo(obj);
- adev = amdgpu_ttm_adev(rbo->tbo.bdev);
- INIT_LIST_HEAD(&list);
-
- tv.bo = &rbo->tbo;
- tv.num_shared = 1;
- list_add(&tv.head, &list);
-
- r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
- if (r) {
- dev_err(adev->dev, "fail to reserve bo (%d)\n", r);
- return r;
- }
-
- if (plane->type != DRM_PLANE_TYPE_CURSOR)
- domain = amdgpu_display_supported_domains(adev, rbo->flags);
- else
- domain = AMDGPU_GEM_DOMAIN_VRAM;
-
- r = amdgpu_bo_pin(rbo, domain);
- if (unlikely(r != 0)) {
- if (r != -ERESTARTSYS)
- DRM_ERROR("Failed to pin framebuffer with error %d\n", r);
- ttm_eu_backoff_reservation(&ticket, &list);
- return r;
- }
-
- r = amdgpu_ttm_alloc_gart(&rbo->tbo);
- if (unlikely(r != 0)) {
- amdgpu_bo_unpin(rbo);
- ttm_eu_backoff_reservation(&ticket, &list);
- DRM_ERROR("%p bind failed\n", rbo);
- return r;
- }
-
- ttm_eu_backoff_reservation(&ticket, &list);
-
- afb->address = amdgpu_bo_gpu_offset(rbo);
-
- amdgpu_bo_ref(rbo);
-
- /**
- * We don't do surface updates on planes that have been newly created,
- * but we also don't have the afb->address during atomic check.
- *
- * Fill in buffer attributes depending on the address here, but only on
- * newly created planes since they're not being used by DC yet and this
- * won't modify global state.
- */
- dm_plane_state_old = to_dm_plane_state(plane->state);
- dm_plane_state_new = to_dm_plane_state(new_state);
-
- if (dm_plane_state_new->dc_state &&
- dm_plane_state_old->dc_state != dm_plane_state_new->dc_state) {
- struct dc_plane_state *plane_state =
- dm_plane_state_new->dc_state;
- bool force_disable_dcc = !plane_state->dcc.enable;
-
- fill_plane_buffer_attributes(
- adev, afb, plane_state->format, plane_state->rotation,
- afb->tiling_flags,
- &plane_state->tiling_info, &plane_state->plane_size,
- &plane_state->dcc, &plane_state->address,
- afb->tmz_surface, force_disable_dcc);
- }
-
- return 0;
-}
-static void dm_plane_helper_cleanup_fb(struct drm_plane *plane,
- struct drm_plane_state *old_state)
-{
- struct amdgpu_bo *rbo;
- int r;
-
- if (!old_state->fb)
- return;
-
- rbo = gem_to_amdgpu_bo(old_state->fb->obj[0]);
- r = amdgpu_bo_reserve(rbo, false);
- if (unlikely(r)) {
- DRM_ERROR("failed to reserve rbo before unpin\n");
- return;
- }
-
- amdgpu_bo_unpin(rbo);
- amdgpu_bo_unreserve(rbo);
- amdgpu_bo_unref(&rbo);
-}
-
-static int dm_plane_helper_check_state(struct drm_plane_state *state,
- struct drm_crtc_state *new_crtc_state)
-{
- struct drm_framebuffer *fb = state->fb;
- int min_downscale, max_upscale;
- int min_scale = 0;
- int max_scale = INT_MAX;
-
- /* Plane enabled? Validate viewport and get scaling factors from plane caps. */
- if (fb && state->crtc) {
- /* Validate viewport to cover the case when only the position changes */
- if (state->plane->type != DRM_PLANE_TYPE_CURSOR) {
- int viewport_width = state->crtc_w;
- int viewport_height = state->crtc_h;
-
- if (state->crtc_x < 0)
- viewport_width += state->crtc_x;
- else if (state->crtc_x + state->crtc_w > new_crtc_state->mode.crtc_hdisplay)
- viewport_width = new_crtc_state->mode.crtc_hdisplay - state->crtc_x;
-
- if (state->crtc_y < 0)
- viewport_height += state->crtc_y;
- else if (state->crtc_y + state->crtc_h > new_crtc_state->mode.crtc_vdisplay)
- viewport_height = new_crtc_state->mode.crtc_vdisplay - state->crtc_y;
-
- if (viewport_width < 0 || viewport_height < 0) {
- DRM_DEBUG_ATOMIC("Plane completely outside of screen\n");
- return -EINVAL;
- } else if (viewport_width < MIN_VIEWPORT_SIZE*2) { /* x2 for width is because of pipe-split. */
- DRM_DEBUG_ATOMIC("Viewport width %d smaller than %d\n", viewport_width, MIN_VIEWPORT_SIZE*2);
- return -EINVAL;
- } else if (viewport_height < MIN_VIEWPORT_SIZE) {
- DRM_DEBUG_ATOMIC("Viewport height %d smaller than %d\n", viewport_height, MIN_VIEWPORT_SIZE);
- return -EINVAL;
- }
-
- }
-
- /* Get min/max allowed scaling factors from plane caps. */
- get_min_max_dc_plane_scaling(state->crtc->dev, fb,
- &min_downscale, &max_upscale);
- /*
- * Convert to drm convention: 16.16 fixed point, instead of dc's
- * 1.0 == 1000. Also drm scaling is src/dst instead of dc's
- * dst/src, so min_scale = 1.0 / max_upscale, etc.
- */
- min_scale = (1000 << 16) / max_upscale;
- max_scale = (1000 << 16) / min_downscale;
- }
-
- return drm_atomic_helper_check_plane_state(
- state, new_crtc_state, min_scale, max_scale, true, true);
-}
-
-static int dm_plane_atomic_check(struct drm_plane *plane,
- struct drm_atomic_state *state)
-{
- struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
- plane);
- struct amdgpu_device *adev = drm_to_adev(plane->dev);
- struct dc *dc = adev->dm.dc;
- struct dm_plane_state *dm_plane_state;
- struct dc_scaling_info scaling_info;
- struct drm_crtc_state *new_crtc_state;
- int ret;
-
- trace_amdgpu_dm_plane_atomic_check(new_plane_state);
-
- dm_plane_state = to_dm_plane_state(new_plane_state);
-
- if (!dm_plane_state->dc_state)
- return 0;
-
- new_crtc_state =
- drm_atomic_get_new_crtc_state(state,
- new_plane_state->crtc);
- if (!new_crtc_state)
- return -EINVAL;
-
- ret = dm_plane_helper_check_state(new_plane_state, new_crtc_state);
- if (ret)
- return ret;
-
- ret = fill_dc_scaling_info(adev, new_plane_state, &scaling_info);
- if (ret)
- return ret;
-
- if (dc_validate_plane(dc, dm_plane_state->dc_state) == DC_OK)
- return 0;
-
- return -EINVAL;
-}
-
-static int dm_plane_atomic_async_check(struct drm_plane *plane,
- struct drm_atomic_state *state)
-{
- /* Only support async updates on cursor planes. */
- if (plane->type != DRM_PLANE_TYPE_CURSOR)
- return -EINVAL;
-
- return 0;
-}
-
-static void dm_plane_atomic_async_update(struct drm_plane *plane,
- struct drm_atomic_state *state)
-{
- struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state,
- plane);
- struct drm_plane_state *old_state =
- drm_atomic_get_old_plane_state(state, plane);
-
- trace_amdgpu_dm_atomic_update_cursor(new_state);
-
- swap(plane->state->fb, new_state->fb);
-
- plane->state->src_x = new_state->src_x;
- plane->state->src_y = new_state->src_y;
- plane->state->src_w = new_state->src_w;
- plane->state->src_h = new_state->src_h;
- plane->state->crtc_x = new_state->crtc_x;
- plane->state->crtc_y = new_state->crtc_y;
- plane->state->crtc_w = new_state->crtc_w;
- plane->state->crtc_h = new_state->crtc_h;
-
- handle_cursor_update(plane, old_state);
-}
-
-static const struct drm_plane_helper_funcs dm_plane_helper_funcs = {
- .prepare_fb = dm_plane_helper_prepare_fb,
- .cleanup_fb = dm_plane_helper_cleanup_fb,
- .atomic_check = dm_plane_atomic_check,
- .atomic_async_check = dm_plane_atomic_async_check,
- .atomic_async_update = dm_plane_atomic_async_update
-};
-
-/*
- * TODO: these are currently initialized to rgb formats only.
- * For future use cases we should either initialize them dynamically based on
- * plane capabilities, or initialize this array to all formats, so internal drm
- * check will succeed, and let DC implement proper check
- */
-static const uint32_t rgb_formats[] = {
- DRM_FORMAT_XRGB8888,
- DRM_FORMAT_ARGB8888,
- DRM_FORMAT_RGBA8888,
- DRM_FORMAT_XRGB2101010,
- DRM_FORMAT_XBGR2101010,
- DRM_FORMAT_ARGB2101010,
- DRM_FORMAT_ABGR2101010,
- DRM_FORMAT_XRGB16161616,
- DRM_FORMAT_XBGR16161616,
- DRM_FORMAT_ARGB16161616,
- DRM_FORMAT_ABGR16161616,
- DRM_FORMAT_XBGR8888,
- DRM_FORMAT_ABGR8888,
- DRM_FORMAT_RGB565,
-};
-
-static const uint32_t overlay_formats[] = {
- DRM_FORMAT_XRGB8888,
- DRM_FORMAT_ARGB8888,
- DRM_FORMAT_RGBA8888,
- DRM_FORMAT_XBGR8888,
- DRM_FORMAT_ABGR8888,
- DRM_FORMAT_RGB565
-};
-
-static const u32 cursor_formats[] = {
- DRM_FORMAT_ARGB8888
-};
-
-static int get_plane_formats(const struct drm_plane *plane,
- const struct dc_plane_cap *plane_cap,
- uint32_t *formats, int max_formats)
-{
- int i, num_formats = 0;
-
- /*
- * TODO: Query support for each group of formats directly from
- * DC plane caps. This will require adding more formats to the
- * caps list.
- */
-
- switch (plane->type) {
- case DRM_PLANE_TYPE_PRIMARY:
- for (i = 0; i < ARRAY_SIZE(rgb_formats); ++i) {
- if (num_formats >= max_formats)
- break;
-
- formats[num_formats++] = rgb_formats[i];
- }
-
- if (plane_cap && plane_cap->pixel_format_support.nv12)
- formats[num_formats++] = DRM_FORMAT_NV12;
- if (plane_cap && plane_cap->pixel_format_support.p010)
- formats[num_formats++] = DRM_FORMAT_P010;
- if (plane_cap && plane_cap->pixel_format_support.fp16) {
- formats[num_formats++] = DRM_FORMAT_XRGB16161616F;
- formats[num_formats++] = DRM_FORMAT_ARGB16161616F;
- formats[num_formats++] = DRM_FORMAT_XBGR16161616F;
- formats[num_formats++] = DRM_FORMAT_ABGR16161616F;
- }
- break;
-
- case DRM_PLANE_TYPE_OVERLAY:
- for (i = 0; i < ARRAY_SIZE(overlay_formats); ++i) {
- if (num_formats >= max_formats)
- break;
-
- formats[num_formats++] = overlay_formats[i];
- }
- break;
-
- case DRM_PLANE_TYPE_CURSOR:
- for (i = 0; i < ARRAY_SIZE(cursor_formats); ++i) {
- if (num_formats >= max_formats)
- break;
-
- formats[num_formats++] = cursor_formats[i];
- }
- break;
- }
-
- return num_formats;
-}
-
-static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
- struct drm_plane *plane,
- unsigned long possible_crtcs,
- const struct dc_plane_cap *plane_cap)
-{
- uint32_t formats[32];
- int num_formats;
- int res = -EPERM;
- unsigned int supported_rotations;
- uint64_t *modifiers = NULL;
-
- num_formats = get_plane_formats(plane, plane_cap, formats,
- ARRAY_SIZE(formats));
-
- res = get_plane_modifiers(dm->adev, plane->type, &modifiers);
- if (res)
- return res;
-
- res = drm_universal_plane_init(adev_to_drm(dm->adev), plane, possible_crtcs,
- &dm_plane_funcs, formats, num_formats,
- modifiers, plane->type, NULL);
- kfree(modifiers);
- if (res)
- return res;
-
- if (plane->type == DRM_PLANE_TYPE_OVERLAY &&
- plane_cap && plane_cap->per_pixel_alpha) {
- unsigned int blend_caps = BIT(DRM_MODE_BLEND_PIXEL_NONE) |
- BIT(DRM_MODE_BLEND_PREMULTI);
-
- drm_plane_create_alpha_property(plane);
- drm_plane_create_blend_mode_property(plane, blend_caps);
- }
-
- if (plane->type == DRM_PLANE_TYPE_PRIMARY &&
- plane_cap &&
- (plane_cap->pixel_format_support.nv12 ||
- plane_cap->pixel_format_support.p010)) {
- /* This only affects YUV formats. */
- drm_plane_create_color_properties(
- plane,
- BIT(DRM_COLOR_YCBCR_BT601) |
- BIT(DRM_COLOR_YCBCR_BT709) |
- BIT(DRM_COLOR_YCBCR_BT2020),
- BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) |
- BIT(DRM_COLOR_YCBCR_FULL_RANGE),
- DRM_COLOR_YCBCR_BT709, DRM_COLOR_YCBCR_LIMITED_RANGE);
- }
-
- supported_rotations =
- DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 |
- DRM_MODE_ROTATE_180 | DRM_MODE_ROTATE_270;
-
- if (dm->adev->asic_type >= CHIP_BONAIRE &&
- plane->type != DRM_PLANE_TYPE_CURSOR)
- drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0,
- supported_rotations);
-
- drm_plane_helper_add(plane, &dm_plane_helper_funcs);
-
- /* Create (reset) the plane state */
- if (plane->funcs->reset)
- plane->funcs->reset(plane);
-
- return 0;
-}
-
-static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
- struct drm_plane *plane,
- uint32_t crtc_index)
-{
- struct amdgpu_crtc *acrtc = NULL;
- struct drm_plane *cursor_plane;
-
- int res = -ENOMEM;
-
- cursor_plane = kzalloc(sizeof(*cursor_plane), GFP_KERNEL);
- if (!cursor_plane)
- goto fail;
-
- cursor_plane->type = DRM_PLANE_TYPE_CURSOR;
- res = amdgpu_dm_plane_init(dm, cursor_plane, 0, NULL);
-
- acrtc = kzalloc(sizeof(struct amdgpu_crtc), GFP_KERNEL);
- if (!acrtc)
- goto fail;
-
- res = drm_crtc_init_with_planes(
- dm->ddev,
- &acrtc->base,
- plane,
- cursor_plane,
- &amdgpu_dm_crtc_funcs, NULL);
-
- if (res)
- goto fail;
-
- drm_crtc_helper_add(&acrtc->base, &amdgpu_dm_crtc_helper_funcs);
-
- /* Create (reset) the plane state */
- if (acrtc->base.funcs->reset)
- acrtc->base.funcs->reset(&acrtc->base);
-
- acrtc->max_cursor_width = dm->adev->dm.dc->caps.max_cursor_size;
- acrtc->max_cursor_height = dm->adev->dm.dc->caps.max_cursor_size;
-
- acrtc->crtc_id = crtc_index;
- acrtc->base.enabled = false;
- acrtc->otg_inst = -1;
-
- dm->adev->mode_info.crtcs[crtc_index] = acrtc;
- drm_crtc_enable_color_mgmt(&acrtc->base, MAX_COLOR_LUT_ENTRIES,
- true, MAX_COLOR_LUT_ENTRIES);
- drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES);
-
- return 0;
-
-fail:
- kfree(acrtc);
- kfree(cursor_plane);
- return res;
-}
-
-
-static int to_drm_connector_type(enum signal_type st)
+static int to_drm_connector_type(enum signal_type st, uint32_t connector_id)
{
switch (st) {
case SIGNAL_TYPE_HDMI_TYPE_A:
@@ -7892,6 +8534,10 @@ static int to_drm_connector_type(enum signal_type st)
return DRM_MODE_CONNECTOR_DisplayPort;
case SIGNAL_TYPE_DVI_DUAL_LINK:
case SIGNAL_TYPE_DVI_SINGLE_LINK:
+ if (connector_id == CONNECTOR_ID_SINGLE_LINK_DVII ||
+ connector_id == CONNECTOR_ID_DUAL_LINK_DVII)
+ return DRM_MODE_CONNECTOR_DVII;
+
return DRM_MODE_CONNECTOR_DVID;
case SIGNAL_TYPE_VIRTUAL:
return DRM_MODE_CONNECTOR_VIRTUAL;
@@ -7943,7 +8589,7 @@ static void amdgpu_dm_get_native_mode(struct drm_connector *connector)
static struct drm_display_mode *
amdgpu_dm_create_common_mode(struct drm_encoder *encoder,
- char *name,
+ const char *name,
int hdisplay, int vdisplay)
{
struct drm_device *dev = encoder->dev;
@@ -7965,6 +8611,24 @@ amdgpu_dm_create_common_mode(struct drm_encoder *encoder,
}
+static const struct amdgpu_dm_mode_size {
+ char name[DRM_DISPLAY_MODE_LEN];
+ int w;
+ int h;
+} common_modes[] = {
+ { "640x480", 640, 480},
+ { "800x600", 800, 600},
+ { "1024x768", 1024, 768},
+ { "1280x720", 1280, 720},
+ { "1280x800", 1280, 800},
+ {"1280x1024", 1280, 1024},
+ { "1440x900", 1440, 900},
+ {"1680x1050", 1680, 1050},
+ {"1600x1200", 1600, 1200},
+ {"1920x1080", 1920, 1080},
+ {"1920x1200", 1920, 1200}
+};
+
static void amdgpu_dm_connector_add_common_modes(struct drm_encoder *encoder,
struct drm_connector *connector)
{
@@ -7975,23 +8639,10 @@ static void amdgpu_dm_connector_add_common_modes(struct drm_encoder *encoder,
to_amdgpu_dm_connector(connector);
int i;
int n;
- struct mode_size {
- char name[DRM_DISPLAY_MODE_LEN];
- int w;
- int h;
- } common_modes[] = {
- { "640x480", 640, 480},
- { "800x600", 800, 600},
- { "1024x768", 1024, 768},
- { "1280x720", 1280, 720},
- { "1280x800", 1280, 800},
- {"1280x1024", 1280, 1024},
- { "1440x900", 1440, 900},
- {"1680x1050", 1680, 1050},
- {"1600x1200", 1600, 1200},
- {"1920x1080", 1920, 1080},
- {"1920x1200", 1920, 1200}
- };
+
+ if ((connector->connector_type != DRM_MODE_CONNECTOR_eDP) &&
+ (connector->connector_type != DRM_MODE_CONNECTOR_LVDS))
+ return;
n = ARRAY_SIZE(common_modes);
@@ -8019,6 +8670,9 @@ static void amdgpu_dm_connector_add_common_modes(struct drm_encoder *encoder,
mode = amdgpu_dm_create_common_mode(encoder,
common_modes[i].name, common_modes[i].w,
common_modes[i].h);
+ if (!mode)
+ continue;
+
drm_mode_probed_add(connector, mode);
amdgpu_dm_connector->num_modes++;
}
@@ -8034,6 +8688,10 @@ static void amdgpu_set_panel_orientation(struct drm_connector *connector)
connector->connector_type != DRM_MODE_CONNECTOR_LVDS)
return;
+ mutex_lock(&connector->dev->mode_config.mutex);
+ amdgpu_dm_connector_get_modes(connector);
+ mutex_unlock(&connector->dev->mode_config.mutex);
+
encoder = amdgpu_dm_connector_to_encoder(connector);
if (!encoder)
return;
@@ -8051,16 +8709,16 @@ static void amdgpu_set_panel_orientation(struct drm_connector *connector)
}
static void amdgpu_dm_connector_ddc_get_modes(struct drm_connector *connector,
- struct edid *edid)
+ const struct drm_edid *drm_edid)
{
struct amdgpu_dm_connector *amdgpu_dm_connector =
to_amdgpu_dm_connector(connector);
- if (edid) {
+ if (drm_edid) {
/* empty probed_modes */
INIT_LIST_HEAD(&connector->probed_modes);
amdgpu_dm_connector->num_modes =
- drm_add_edid_modes(connector, edid);
+ drm_edid_connector_add_modes(connector);
/* sorting the probed modes before calling function
* amdgpu_dm_get_native_mode() since EDID can have
@@ -8074,12 +8732,10 @@ static void amdgpu_dm_connector_ddc_get_modes(struct drm_connector *connector,
amdgpu_dm_get_native_mode(connector);
/* Freesync capabilities are reset by calling
- * drm_add_edid_modes() and need to be
+ * drm_edid_connector_add_modes() and need to be
* restored here.
*/
- amdgpu_dm_update_freesync_caps(connector, edid);
-
- amdgpu_set_panel_orientation(connector);
+ amdgpu_dm_update_freesync_caps(connector, drm_edid);
} else {
amdgpu_dm_connector->num_modes = 0;
}
@@ -8090,7 +8746,7 @@ static bool is_duplicate_mode(struct amdgpu_dm_connector *aconnector,
{
struct drm_display_mode *m;
- list_for_each_entry (m, &aconnector->base.probed_modes, head) {
+ list_for_each_entry(m, &aconnector->base.probed_modes, head) {
if (drm_mode_equal(m, mode))
return true;
}
@@ -8103,21 +8759,21 @@ static uint add_fs_modes(struct amdgpu_dm_connector *aconnector)
const struct drm_display_mode *m;
struct drm_display_mode *new_mode;
uint i;
- uint32_t new_modes_count = 0;
+ u32 new_modes_count = 0;
/* Standard FPS values
*
* 23.976 - TV/NTSC
- * 24 - Cinema
- * 25 - TV/PAL
+ * 24 - Cinema
+ * 25 - TV/PAL
* 29.97 - TV/NTSC
- * 30 - TV/NTSC
- * 48 - Cinema HFR
- * 50 - TV/PAL
- * 60 - Commonly used
+ * 30 - TV/NTSC
+ * 48 - Cinema HFR
+ * 50 - TV/PAL
+ * 60 - Commonly used
* 48,72,96,120 - Multiples of 24
*/
- static const uint32_t common_rates[] = {
+ static const u32 common_rates[] = {
23976, 24000, 25000, 29970, 30000,
48000, 50000, 60000, 72000, 96000, 120000
};
@@ -8133,8 +8789,8 @@ static uint add_fs_modes(struct amdgpu_dm_connector *aconnector)
return 0;
for (i = 0; i < ARRAY_SIZE(common_rates); i++) {
- uint64_t target_vtotal, target_vtotal_diff;
- uint64_t num, den;
+ u64 target_vtotal, target_vtotal_diff;
+ u64 num, den;
if (drm_mode_vrefresh(m) * 1000 < common_rates[i])
continue;
@@ -8175,12 +8831,22 @@ static uint add_fs_modes(struct amdgpu_dm_connector *aconnector)
}
static void amdgpu_dm_connector_add_freesync_modes(struct drm_connector *connector,
- struct edid *edid)
+ const struct drm_edid *drm_edid)
{
struct amdgpu_dm_connector *amdgpu_dm_connector =
to_amdgpu_dm_connector(connector);
- if (!(amdgpu_freesync_vid_mode && edid))
+ if (!(amdgpu_freesync_vid_mode && drm_edid))
+ return;
+
+ if (!amdgpu_dm_connector->dc_sink || !amdgpu_dm_connector->dc_link)
+ return;
+
+ if (!dc_supports_vrr(amdgpu_dm_connector->dc_sink->ctx->dce_version))
+ return;
+
+ if (dc_connector_supports_analog(amdgpu_dm_connector->dc_link->link_id.id) &&
+ amdgpu_dm_connector->dc_sink->edid_caps.analog)
return;
if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10)
@@ -8192,24 +8858,48 @@ static int amdgpu_dm_connector_get_modes(struct drm_connector *connector)
{
struct amdgpu_dm_connector *amdgpu_dm_connector =
to_amdgpu_dm_connector(connector);
+ struct dc_link *dc_link = amdgpu_dm_connector->dc_link;
struct drm_encoder *encoder;
- struct edid *edid = amdgpu_dm_connector->edid;
+ const struct drm_edid *drm_edid = amdgpu_dm_connector->drm_edid;
+ struct dc_link_settings *verified_link_cap = &dc_link->verified_link_cap;
+ const struct dc *dc = dc_link->dc;
encoder = amdgpu_dm_connector_to_encoder(connector);
- if (!drm_edid_is_valid(edid)) {
+ if (!drm_edid) {
amdgpu_dm_connector->num_modes =
drm_add_modes_noedid(connector, 640, 480);
+ if (dc->link_srv->dp_get_encoding_format(verified_link_cap) == DP_128b_132b_ENCODING)
+ amdgpu_dm_connector->num_modes +=
+ drm_add_modes_noedid(connector, 1920, 1080);
+
+ if (amdgpu_dm_connector->dc_sink &&
+ amdgpu_dm_connector->dc_sink->edid_caps.analog &&
+ dc_connector_supports_analog(dc_link->link_id.id)) {
+ /* Analog monitor connected by DAC load detection.
+ * Add common modes. It will be up to the user to select one that works.
+ */
+ for (int i = 0; i < ARRAY_SIZE(common_modes); i++)
+ amdgpu_dm_connector->num_modes += drm_add_modes_noedid(
+ connector, common_modes[i].w, common_modes[i].h);
+ }
} else {
- amdgpu_dm_connector_ddc_get_modes(connector, edid);
- amdgpu_dm_connector_add_common_modes(encoder, connector);
- amdgpu_dm_connector_add_freesync_modes(connector, edid);
+ amdgpu_dm_connector_ddc_get_modes(connector, drm_edid);
+ if (encoder)
+ amdgpu_dm_connector_add_common_modes(encoder, connector);
+ amdgpu_dm_connector_add_freesync_modes(connector, drm_edid);
}
amdgpu_dm_fbc_init(connector);
return amdgpu_dm_connector->num_modes;
}
+static const u32 supported_colorspaces =
+ BIT(DRM_MODE_COLORIMETRY_BT709_YCC) |
+ BIT(DRM_MODE_COLORIMETRY_OPRGB) |
+ BIT(DRM_MODE_COLORIMETRY_BT2020_RGB) |
+ BIT(DRM_MODE_COLORIMETRY_BT2020_YCC);
+
void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
struct amdgpu_dm_connector *aconnector,
int connector_type,
@@ -8226,6 +8916,7 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
aconnector->base.funcs->reset(&aconnector->base);
aconnector->connector_id = link_index;
+ aconnector->bl_idx = -1;
aconnector->dc_link = link;
aconnector->base.interlace_allowed = false;
aconnector->base.doublescan_allowed = false;
@@ -8233,7 +8924,15 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
aconnector->base.dpms = DRM_MODE_DPMS_OFF;
aconnector->hpd.hpd = AMDGPU_HPD_NONE; /* not used */
aconnector->audio_inst = -1;
+ aconnector->pack_sdp_v1_3 = false;
+ aconnector->as_type = ADAPTIVE_SYNC_TYPE_NONE;
+ memset(&aconnector->vsdb_info, 0, sizeof(aconnector->vsdb_info));
mutex_init(&aconnector->hpd_lock);
+ mutex_init(&aconnector->handle_mst_msg_ready);
+
+ aconnector->hdmi_hpd_debounce_delay_ms = AMDGPU_DM_HDMI_HPD_DEBOUNCE_MS;
+ INIT_DELAYED_WORK(&aconnector->hdmi_hpd_debounce_work, hdmi_hpd_debounce_work);
+ aconnector->hdmi_prev_sink = NULL;
/*
* configure support HPD hot plug connector_>polled default value is 0
@@ -8247,15 +8946,8 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
break;
case DRM_MODE_CONNECTOR_DisplayPort:
aconnector->base.polled = DRM_CONNECTOR_POLL_HPD;
- if (link->is_dig_mapping_flexible &&
- link->dc->res_pool->funcs->link_encs_assign) {
- link->link_enc =
- link_enc_cfg_get_link_enc_used_by_link(link->ctx->dc, link);
- if (!link->link_enc)
- link->link_enc =
- link_enc_cfg_get_next_avail_link_enc(link->ctx->dc);
- }
-
+ link->link_enc = link_enc_cfg_get_link_enc(link);
+ ASSERT(link->link_enc);
if (link->link_enc)
aconnector->base.ycbcr_420_allowed =
link->link_enc->features.dp_ycbcr420_supported ? true : false;
@@ -8263,6 +8955,11 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
case DRM_MODE_CONNECTOR_DVID:
aconnector->base.polled = DRM_CONNECTOR_POLL_HPD;
break;
+ case DRM_MODE_CONNECTOR_DVII:
+ case DRM_MODE_CONNECTOR_VGA:
+ aconnector->base.polled =
+ DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT;
+ break;
default:
break;
}
@@ -8271,6 +8968,10 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
dm->ddev->mode_config.scaling_mode_property,
DRM_MODE_SCALE_NONE);
+ if (connector_type == DRM_MODE_CONNECTOR_HDMIA
+ || (connector_type == DRM_MODE_CONNECTOR_DisplayPort && !aconnector->mst_root))
+ drm_connector_attach_broadcast_rgb_property(&aconnector->base);
+
drm_object_attach_property(&aconnector->base.base,
adev->mode_info.underscan_property,
UNDERSCAN_OFF);
@@ -8281,17 +8982,24 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
adev->mode_info.underscan_vborder_property,
0);
- if (!aconnector->mst_port)
+ if (!aconnector->mst_root)
drm_connector_attach_max_bpc_property(&aconnector->base, 8, 16);
- /* This defaults to the max in the range, but we want 8bpc for non-edp. */
- aconnector->base.state->max_bpc = (connector_type == DRM_MODE_CONNECTOR_eDP) ? 16 : 8;
+ aconnector->base.state->max_bpc = 16;
aconnector->base.state->max_requested_bpc = aconnector->base.state->max_bpc;
- if (connector_type == DRM_MODE_CONNECTOR_eDP &&
- (dc_is_dmcu_initialized(adev->dm.dc) || adev->dm.dc->ctx->dmub_srv)) {
- drm_object_attach_property(&aconnector->base.base,
- adev->mode_info.abm_level_property, 0);
+ if (connector_type == DRM_MODE_CONNECTOR_HDMIA) {
+ /* Content Type is currently only implemented for HDMI. */
+ drm_connector_attach_content_type_property(&aconnector->base);
+ }
+
+ if (connector_type == DRM_MODE_CONNECTOR_HDMIA) {
+ if (!drm_mode_create_hdmi_colorspace_property(&aconnector->base, supported_colorspaces))
+ drm_connector_attach_colorspace_property(&aconnector->base);
+ } else if ((connector_type == DRM_MODE_CONNECTOR_DisplayPort && !aconnector->mst_root) ||
+ connector_type == DRM_MODE_CONNECTOR_eDP) {
+ if (!drm_mode_create_dp_colorspace_property(&aconnector->base, supported_colorspaces))
+ drm_connector_attach_colorspace_property(&aconnector->base);
}
if (connector_type == DRM_MODE_CONNECTOR_HDMIA ||
@@ -8299,13 +9007,23 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
connector_type == DRM_MODE_CONNECTOR_eDP) {
drm_connector_attach_hdr_output_metadata_property(&aconnector->base);
- if (!aconnector->mst_port)
+ if (!aconnector->mst_root)
drm_connector_attach_vrr_capable_property(&aconnector->base);
-#ifdef CONFIG_DRM_AMD_DC_HDCP
if (adev->dm.hdcp_workqueue)
drm_connector_attach_content_protection_property(&aconnector->base, true);
-#endif
+ }
+
+ if (connector_type == DRM_MODE_CONNECTOR_eDP) {
+ struct drm_privacy_screen *privacy_screen;
+
+ privacy_screen = drm_privacy_screen_get(adev_to_drm(adev)->dev, NULL);
+ if (!IS_ERR(privacy_screen)) {
+ drm_connector_attach_privacy_screen_provider(&aconnector->base,
+ privacy_screen);
+ } else if (PTR_ERR(privacy_screen) != -ENODEV) {
+ drm_warn(adev_to_drm(adev), "Error getting privacy-screen\n");
+ }
}
}
@@ -8318,6 +9036,9 @@ static int amdgpu_dm_i2c_xfer(struct i2c_adapter *i2c_adap,
int i;
int result = -EIO;
+ if (!ddc_service->ddc_pin)
+ return result;
+
cmd.payloads = kcalloc(num, sizeof(struct i2c_payload), GFP_KERNEL);
if (!cmd.payloads)
@@ -8334,11 +9055,18 @@ static int amdgpu_dm_i2c_xfer(struct i2c_adapter *i2c_adap,
cmd.payloads[i].data = msgs[i].buf;
}
- if (dc_submit_i2c(
- ddc_service->ctx->dc,
- ddc_service->ddc_pin->hw_info.ddc_channel,
- &cmd))
- result = num;
+ if (i2c->oem) {
+ if (dc_submit_i2c_oem(
+ ddc_service->ctx->dc,
+ &cmd))
+ result = num;
+ } else {
+ if (dc_submit_i2c(
+ ddc_service->ctx->dc,
+ ddc_service->link->link_index,
+ &cmd))
+ result = num;
+ }
kfree(cmd.payloads);
return result;
@@ -8355,9 +9083,7 @@ static const struct i2c_algorithm amdgpu_dm_i2c_algo = {
};
static struct amdgpu_i2c_adapter *
-create_i2c(struct ddc_service *ddc_service,
- int link_index,
- int *res)
+create_i2c(struct ddc_service *ddc_service, bool oem)
{
struct amdgpu_device *adev = ddc_service->ctx->driver_context;
struct amdgpu_i2c_adapter *i2c;
@@ -8366,18 +9092,41 @@ create_i2c(struct ddc_service *ddc_service,
if (!i2c)
return NULL;
i2c->base.owner = THIS_MODULE;
- i2c->base.class = I2C_CLASS_DDC;
i2c->base.dev.parent = &adev->pdev->dev;
i2c->base.algo = &amdgpu_dm_i2c_algo;
- snprintf(i2c->base.name, sizeof(i2c->base.name), "AMDGPU DM i2c hw bus %d", link_index);
+ if (oem)
+ snprintf(i2c->base.name, sizeof(i2c->base.name), "AMDGPU DM i2c OEM bus");
+ else
+ snprintf(i2c->base.name, sizeof(i2c->base.name), "AMDGPU DM i2c hw bus %d",
+ ddc_service->link->link_index);
i2c_set_adapdata(&i2c->base, i2c);
i2c->ddc_service = ddc_service;
- if (i2c->ddc_service->ddc_pin)
- i2c->ddc_service->ddc_pin->hw_info.ddc_channel = link_index;
+ i2c->oem = oem;
return i2c;
}
+int amdgpu_dm_initialize_hdmi_connector(struct amdgpu_dm_connector *aconnector)
+{
+ struct cec_connector_info conn_info;
+ struct drm_device *ddev = aconnector->base.dev;
+ struct device *hdmi_dev = ddev->dev;
+
+ if (amdgpu_dc_debug_mask & DC_DISABLE_HDMI_CEC) {
+ drm_info(ddev, "HDMI-CEC feature masked\n");
+ return -EINVAL;
+ }
+
+ cec_fill_conn_info_from_drm(&conn_info, &aconnector->base);
+ aconnector->notifier =
+ cec_notifier_conn_register(hdmi_dev, NULL, &conn_info);
+ if (!aconnector->notifier) {
+ drm_err(ddev, "Failed to create cec notifier\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
/*
* Note: this function assumes that dc_link_detect() was called for the
@@ -8385,7 +9134,7 @@ create_i2c(struct ddc_service *ddc_service,
*/
static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm,
struct amdgpu_dm_connector *aconnector,
- uint32_t link_index,
+ u32 link_index,
struct amdgpu_encoder *aencoder)
{
int res = 0;
@@ -8394,25 +9143,25 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm,
struct dc_link *link = dc_get_link_at_index(dc, link_index);
struct amdgpu_i2c_adapter *i2c;
+ /* Not needed for writeback connector */
link->priv = aconnector;
- DRM_DEBUG_DRIVER("%s()\n", __func__);
- i2c = create_i2c(link->ddc, link->link_index, &res);
+ i2c = create_i2c(link->ddc, false);
if (!i2c) {
- DRM_ERROR("Failed to create i2c adapter data\n");
+ drm_err(adev_to_drm(dm->adev), "Failed to create i2c adapter data\n");
return -ENOMEM;
}
aconnector->i2c = i2c;
- res = i2c_add_adapter(&i2c->base);
+ res = devm_i2c_add_adapter(dm->adev->dev, &i2c->base);
if (res) {
- DRM_ERROR("Failed to register hw i2c %d\n", link->link_index);
+ drm_err(adev_to_drm(dm->adev), "Failed to register hw i2c %d\n", link->link_index);
goto out_free;
}
- connector_type = to_drm_connector_type(link->connector_signal);
+ connector_type = to_drm_connector_type(link->connector_signal, link->link_id.id);
res = drm_connector_init_with_ddc(
dm->ddev,
@@ -8422,7 +9171,7 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm,
&i2c->base);
if (res) {
- DRM_ERROR("connector_init failed\n");
+ drm_err(adev_to_drm(dm->adev), "connector_init failed\n");
aconnector->connector_id = -1;
goto out_free;
}
@@ -8441,6 +9190,10 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm,
drm_connector_attach_encoder(
&aconnector->base, &aencoder->base);
+ if (connector_type == DRM_MODE_CONNECTOR_HDMIA ||
+ connector_type == DRM_MODE_CONNECTOR_HDMIB)
+ amdgpu_dm_initialize_hdmi_connector(aconnector);
+
if (connector_type == DRM_MODE_CONNECTOR_DisplayPort
|| connector_type == DRM_MODE_CONNECTOR_eDP)
amdgpu_dm_initialize_dp_connector(dm, aconnector, link->link_index);
@@ -8498,42 +9251,98 @@ static int amdgpu_dm_encoder_init(struct drm_device *dev,
static void manage_dm_interrupts(struct amdgpu_device *adev,
struct amdgpu_crtc *acrtc,
- bool enable)
-{
- /*
- * We have no guarantee that the frontend index maps to the same
+ struct dm_crtc_state *acrtc_state)
+{ /*
+ * We cannot be sure that the frontend index maps to the same
* backend index - some even map to more than one.
- *
- * TODO: Use a different interrupt or check DC itself for the mapping.
+ * So we have to go through the CRTC to find the right IRQ.
*/
- int irq_type =
- amdgpu_display_crtc_idx_to_irq_type(
+ int irq_type = amdgpu_display_crtc_idx_to_irq_type(
adev,
acrtc->crtc_id);
+ struct drm_device *dev = adev_to_drm(adev);
- if (enable) {
- drm_crtc_vblank_on(&acrtc->base);
- amdgpu_irq_get(
- adev,
- &adev->pageflip_irq,
- irq_type);
+ struct drm_vblank_crtc_config config = {0};
+ struct dc_crtc_timing *timing;
+ int offdelay;
+
+ if (acrtc_state) {
+ timing = &acrtc_state->stream->timing;
+
+ /*
+ * Depending on when the HW latching event of double-buffered
+ * registers happen relative to the PSR SDP deadline, and how
+ * bad the Panel clock has drifted since the last ALPM off
+ * event, there can be up to 3 frames of delay between sending
+ * the PSR exit cmd to DMUB fw, and when the panel starts
+ * displaying live frames.
+ *
+ * We can set:
+ *
+ * 20/100 * offdelay_ms = 3_frames_ms
+ * => offdelay_ms = 5 * 3_frames_ms
+ *
+ * This ensures that `3_frames_ms` will only be experienced as a
+ * 20% delay on top how long the display has been static, and
+ * thus make the delay less perceivable.
+ */
+ if (acrtc_state->stream->link->psr_settings.psr_version <
+ DC_PSR_VERSION_UNSUPPORTED) {
+ offdelay = DIV64_U64_ROUND_UP((u64)5 * 3 * 10 *
+ timing->v_total *
+ timing->h_total,
+ timing->pix_clk_100hz);
+ config.offdelay_ms = offdelay ?: 30;
+ } else if (amdgpu_ip_version(adev, DCE_HWIP, 0) <
+ IP_VERSION(3, 5, 0) ||
+ !(adev->flags & AMD_IS_APU)) {
+ /*
+ * Older HW and DGPU have issues with instant off;
+ * use a 2 frame offdelay.
+ */
+ offdelay = DIV64_U64_ROUND_UP((u64)20 *
+ timing->v_total *
+ timing->h_total,
+ timing->pix_clk_100hz);
+
+ config.offdelay_ms = offdelay ?: 30;
+ } else {
+ /* offdelay_ms = 0 will never disable vblank */
+ config.offdelay_ms = 1;
+ config.disable_immediate = true;
+ }
+
+ drm_crtc_vblank_on_config(&acrtc->base,
+ &config);
+ /* Allow RX6xxx, RX7700, RX7800 GPUs to call amdgpu_irq_get.*/
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
+ case IP_VERSION(3, 0, 0):
+ case IP_VERSION(3, 0, 2):
+ case IP_VERSION(3, 0, 3):
+ case IP_VERSION(3, 2, 0):
+ if (amdgpu_irq_get(adev, &adev->pageflip_irq, irq_type))
+ drm_err(dev, "DM_IRQ: Cannot get pageflip irq!\n");
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
- amdgpu_irq_get(
- adev,
- &adev->vline0_irq,
- irq_type);
+ if (amdgpu_irq_get(adev, &adev->vline0_irq, irq_type))
+ drm_err(dev, "DM_IRQ: Cannot get vline0 irq!\n");
#endif
+ }
+
} else {
+ /* Allow RX6xxx, RX7700, RX7800 GPUs to call amdgpu_irq_put.*/
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
+ case IP_VERSION(3, 0, 0):
+ case IP_VERSION(3, 0, 2):
+ case IP_VERSION(3, 0, 3):
+ case IP_VERSION(3, 2, 0):
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
- amdgpu_irq_put(
- adev,
- &adev->vline0_irq,
- irq_type);
+ if (amdgpu_irq_put(adev, &adev->vline0_irq, irq_type))
+ drm_err(dev, "DM_IRQ: Cannot put vline0 irq!\n");
#endif
- amdgpu_irq_put(
- adev,
- &adev->pageflip_irq,
- irq_type);
+ if (amdgpu_irq_put(adev, &adev->pageflip_irq, irq_type))
+ drm_err(dev, "DM_IRQ: Cannot put pageflip irq!\n");
+ }
+
drm_crtc_vblank_off(&acrtc->base);
}
}
@@ -8569,28 +9378,55 @@ is_scaling_state_different(const struct dm_connector_state *dm_state,
return false;
}
-#ifdef CONFIG_DRM_AMD_DC_HDCP
-static bool is_content_protection_different(struct drm_connector_state *state,
- const struct drm_connector_state *old_state,
- const struct drm_connector *connector, struct hdcp_workqueue *hdcp_w)
+static bool is_content_protection_different(struct drm_crtc_state *new_crtc_state,
+ struct drm_crtc_state *old_crtc_state,
+ struct drm_connector_state *new_conn_state,
+ struct drm_connector_state *old_conn_state,
+ const struct drm_connector *connector,
+ struct hdcp_workqueue *hdcp_w)
{
struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
struct dm_connector_state *dm_con_state = to_dm_connector_state(connector->state);
- /* Handle: Type0/1 change */
- if (old_state->hdcp_content_type != state->hdcp_content_type &&
- state->content_protection != DRM_MODE_CONTENT_PROTECTION_UNDESIRED) {
- state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED;
+ pr_debug("[HDCP_DM] connector->index: %x connect_status: %x dpms: %x\n",
+ connector->index, connector->status, connector->dpms);
+ pr_debug("[HDCP_DM] state protection old: %x new: %x\n",
+ old_conn_state->content_protection, new_conn_state->content_protection);
+
+ if (old_crtc_state)
+ pr_debug("[HDCP_DM] old crtc en: %x a: %x m: %x a-chg: %x c-chg: %x\n",
+ old_crtc_state->enable,
+ old_crtc_state->active,
+ old_crtc_state->mode_changed,
+ old_crtc_state->active_changed,
+ old_crtc_state->connectors_changed);
+
+ if (new_crtc_state)
+ pr_debug("[HDCP_DM] NEW crtc en: %x a: %x m: %x a-chg: %x c-chg: %x\n",
+ new_crtc_state->enable,
+ new_crtc_state->active,
+ new_crtc_state->mode_changed,
+ new_crtc_state->active_changed,
+ new_crtc_state->connectors_changed);
+
+ /* hdcp content type change */
+ if (old_conn_state->hdcp_content_type != new_conn_state->hdcp_content_type &&
+ new_conn_state->content_protection != DRM_MODE_CONTENT_PROTECTION_UNDESIRED) {
+ new_conn_state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED;
+ pr_debug("[HDCP_DM] Type0/1 change %s :true\n", __func__);
return true;
}
- /* CP is being re enabled, ignore this
- *
- * Handles: ENABLED -> DESIRED
- */
- if (old_state->content_protection == DRM_MODE_CONTENT_PROTECTION_ENABLED &&
- state->content_protection == DRM_MODE_CONTENT_PROTECTION_DESIRED) {
- state->content_protection = DRM_MODE_CONTENT_PROTECTION_ENABLED;
+ /* CP is being re enabled, ignore this */
+ if (old_conn_state->content_protection == DRM_MODE_CONTENT_PROTECTION_ENABLED &&
+ new_conn_state->content_protection == DRM_MODE_CONTENT_PROTECTION_DESIRED) {
+ if (new_crtc_state && new_crtc_state->mode_changed) {
+ new_conn_state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED;
+ pr_debug("[HDCP_DM] ENABLED->DESIRED & mode_changed %s :true\n", __func__);
+ return true;
+ }
+ new_conn_state->content_protection = DRM_MODE_CONTENT_PROTECTION_ENABLED;
+ pr_debug("[HDCP_DM] ENABLED -> DESIRED %s :false\n", __func__);
return false;
}
@@ -8598,9 +9434,9 @@ static bool is_content_protection_different(struct drm_connector_state *state,
*
* Handles: UNDESIRED -> ENABLED
*/
- if (old_state->content_protection == DRM_MODE_CONTENT_PROTECTION_UNDESIRED &&
- state->content_protection == DRM_MODE_CONTENT_PROTECTION_ENABLED)
- state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED;
+ if (old_conn_state->content_protection == DRM_MODE_CONTENT_PROTECTION_UNDESIRED &&
+ new_conn_state->content_protection == DRM_MODE_CONTENT_PROTECTION_ENABLED)
+ new_conn_state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED;
/* Stream removed and re-enabled
*
@@ -8610,10 +9446,12 @@ static bool is_content_protection_different(struct drm_connector_state *state,
*
* Handles: DESIRED -> DESIRED (Special case)
*/
- if (!(old_state->crtc && old_state->crtc->enabled) &&
- state->crtc && state->crtc->enabled &&
+ if (!(old_conn_state->crtc && old_conn_state->crtc->enabled) &&
+ new_conn_state->crtc && new_conn_state->crtc->enabled &&
connector->state->content_protection == DRM_MODE_CONTENT_PROTECTION_DESIRED) {
dm_con_state->update_hdcp = false;
+ pr_debug("[HDCP_DM] DESIRED->DESIRED (Stream removed and re-enabled) %s :true\n",
+ __func__);
return true;
}
@@ -8625,35 +9463,41 @@ static bool is_content_protection_different(struct drm_connector_state *state,
*
* Handles: DESIRED -> DESIRED (Special case)
*/
- if (dm_con_state->update_hdcp && state->content_protection == DRM_MODE_CONTENT_PROTECTION_DESIRED &&
- connector->dpms == DRM_MODE_DPMS_ON && aconnector->dc_sink != NULL) {
+ if (dm_con_state->update_hdcp &&
+ new_conn_state->content_protection == DRM_MODE_CONTENT_PROTECTION_DESIRED &&
+ connector->dpms == DRM_MODE_DPMS_ON && aconnector->dc_sink != NULL) {
dm_con_state->update_hdcp = false;
+ pr_debug("[HDCP_DM] DESIRED->DESIRED (Hot-plug, headless s3, dpms) %s :true\n",
+ __func__);
return true;
}
- /*
- * Handles: UNDESIRED -> UNDESIRED
- * DESIRED -> DESIRED
- * ENABLED -> ENABLED
- */
- if (old_state->content_protection == state->content_protection)
+ if (old_conn_state->content_protection == new_conn_state->content_protection) {
+ if (new_conn_state->content_protection >= DRM_MODE_CONTENT_PROTECTION_DESIRED) {
+ if (new_crtc_state && new_crtc_state->mode_changed) {
+ pr_debug("[HDCP_DM] DESIRED->DESIRED or ENABLE->ENABLE mode_change %s :true\n",
+ __func__);
+ return true;
+ }
+ pr_debug("[HDCP_DM] DESIRED->DESIRED & ENABLE->ENABLE %s :false\n",
+ __func__);
+ return false;
+ }
+
+ pr_debug("[HDCP_DM] UNDESIRED->UNDESIRED %s :false\n", __func__);
return false;
+ }
- /*
- * Handles: UNDESIRED -> DESIRED
- * DESIRED -> UNDESIRED
- * ENABLED -> UNDESIRED
- */
- if (state->content_protection != DRM_MODE_CONTENT_PROTECTION_ENABLED)
+ if (new_conn_state->content_protection != DRM_MODE_CONTENT_PROTECTION_ENABLED) {
+ pr_debug("[HDCP_DM] UNDESIRED->DESIRED or DESIRED->UNDESIRED or ENABLED->UNDESIRED %s :true\n",
+ __func__);
return true;
+ }
- /*
- * Handles: DESIRED -> ENABLED
- */
+ pr_debug("[HDCP_DM] DESIRED->ENABLED %s :false\n", __func__);
return false;
}
-#endif
static void remove_stream(struct amdgpu_device *adev,
struct amdgpu_crtc *acrtc,
struct dc_stream_state *stream)
@@ -8664,114 +9508,6 @@ static void remove_stream(struct amdgpu_device *adev,
acrtc->enabled = false;
}
-static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
- struct dc_cursor_position *position)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- int x, y;
- int xorigin = 0, yorigin = 0;
-
- if (!crtc || !plane->state->fb)
- return 0;
-
- if ((plane->state->crtc_w > amdgpu_crtc->max_cursor_width) ||
- (plane->state->crtc_h > amdgpu_crtc->max_cursor_height)) {
- DRM_ERROR("%s: bad cursor width or height %d x %d\n",
- __func__,
- plane->state->crtc_w,
- plane->state->crtc_h);
- return -EINVAL;
- }
-
- x = plane->state->crtc_x;
- y = plane->state->crtc_y;
-
- if (x <= -amdgpu_crtc->max_cursor_width ||
- y <= -amdgpu_crtc->max_cursor_height)
- return 0;
-
- if (x < 0) {
- xorigin = min(-x, amdgpu_crtc->max_cursor_width - 1);
- x = 0;
- }
- if (y < 0) {
- yorigin = min(-y, amdgpu_crtc->max_cursor_height - 1);
- y = 0;
- }
- position->enable = true;
- position->translate_by_source = true;
- position->x = x;
- position->y = y;
- position->x_hotspot = xorigin;
- position->y_hotspot = yorigin;
-
- return 0;
-}
-
-static void handle_cursor_update(struct drm_plane *plane,
- struct drm_plane_state *old_plane_state)
-{
- struct amdgpu_device *adev = drm_to_adev(plane->dev);
- struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb);
- struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc;
- struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL;
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- uint64_t address = afb ? afb->address : 0;
- struct dc_cursor_position position = {0};
- struct dc_cursor_attributes attributes;
- int ret;
-
- if (!plane->state->fb && !old_plane_state->fb)
- return;
-
- DC_LOG_CURSOR("%s: crtc_id=%d with size %d to %d\n",
- __func__,
- amdgpu_crtc->crtc_id,
- plane->state->crtc_w,
- plane->state->crtc_h);
-
- ret = get_cursor_position(plane, crtc, &position);
- if (ret)
- return;
-
- if (!position.enable) {
- /* turn off cursor */
- if (crtc_state && crtc_state->stream) {
- mutex_lock(&adev->dm.dc_lock);
- dc_stream_set_cursor_position(crtc_state->stream,
- &position);
- mutex_unlock(&adev->dm.dc_lock);
- }
- return;
- }
-
- amdgpu_crtc->cursor_width = plane->state->crtc_w;
- amdgpu_crtc->cursor_height = plane->state->crtc_h;
-
- memset(&attributes, 0, sizeof(attributes));
- attributes.address.high_part = upper_32_bits(address);
- attributes.address.low_part = lower_32_bits(address);
- attributes.width = plane->state->crtc_w;
- attributes.height = plane->state->crtc_h;
- attributes.color_format = CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA;
- attributes.rotation_angle = 0;
- attributes.attribute_flags.value = 0;
-
- attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0];
-
- if (crtc_state->stream) {
- mutex_lock(&adev->dm.dc_lock);
- if (!dc_stream_set_cursor_attributes(crtc_state->stream,
- &attributes))
- DRM_ERROR("DC failed to set cursor attributes\n");
-
- if (!dc_stream_set_cursor_position(crtc_state->stream,
- &position))
- DRM_ERROR("DC failed to set cursor position\n");
- mutex_unlock(&adev->dm.dc_lock);
- }
-}
-
static void prepare_flip_isr(struct amdgpu_crtc *acrtc)
{
@@ -8786,8 +9522,9 @@ static void prepare_flip_isr(struct amdgpu_crtc *acrtc)
/* Mark this event as consumed */
acrtc->base.state->event = NULL;
- DC_LOG_PFLIP("crtc:%d, pflip_stat:AMDGPU_FLIP_SUBMITTED\n",
- acrtc->crtc_id);
+ drm_dbg_state(acrtc->base.dev,
+ "crtc:%d, pflip_stat:AMDGPU_FLIP_SUBMITTED\n",
+ acrtc->crtc_id);
}
static void update_freesync_state_on_stream(
@@ -8803,6 +9540,8 @@ static void update_freesync_state_on_stream(
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(new_crtc_state->base.crtc);
unsigned long flags;
bool pack_sdp_v1_3 = false;
+ struct amdgpu_dm_connector *aconn;
+ enum vrr_packet_type packet_type = PACKET_TYPE_VRR;
if (!new_stream)
return;
@@ -8816,7 +9555,7 @@ static void update_freesync_state_on_stream(
return;
spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
- vrr_params = acrtc->dm_irq_params.vrr_params;
+ vrr_params = acrtc->dm_irq_params.vrr_params;
if (surface) {
mod_freesync_handle_preflip(
@@ -8827,7 +9566,7 @@ static void update_freesync_state_on_stream(
&vrr_params);
if (adev->family < AMDGPU_FAMILY_AI &&
- amdgpu_dm_vrr_active(new_crtc_state)) {
+ amdgpu_dm_crtc_vrr_active(new_crtc_state)) {
mod_freesync_handle_v_update(dm->freesync_module,
new_stream, &vrr_params);
@@ -8838,20 +9577,31 @@ static void update_freesync_state_on_stream(
}
}
+ aconn = (struct amdgpu_dm_connector *)new_stream->dm_stream_context;
+
+ if (aconn && (aconn->as_type == FREESYNC_TYPE_PCON_IN_WHITELIST || aconn->vsdb_info.replay_mode)) {
+ pack_sdp_v1_3 = aconn->pack_sdp_v1_3;
+
+ if (aconn->vsdb_info.amd_vsdb_version == 1)
+ packet_type = PACKET_TYPE_FS_V1;
+ else if (aconn->vsdb_info.amd_vsdb_version == 2)
+ packet_type = PACKET_TYPE_FS_V2;
+ else if (aconn->vsdb_info.amd_vsdb_version == 3)
+ packet_type = PACKET_TYPE_FS_V3;
+
+ mod_build_adaptive_sync_infopacket(new_stream, aconn->as_type, NULL,
+ &new_stream->adaptive_sync_infopacket);
+ }
+
mod_freesync_build_vrr_infopacket(
dm->freesync_module,
new_stream,
&vrr_params,
- PACKET_TYPE_VRR,
+ packet_type,
TRANSFER_FUNC_UNKNOWN,
&vrr_infopacket,
pack_sdp_v1_3);
- new_crtc_state->freesync_timing_changed |=
- (memcmp(&acrtc->dm_irq_params.vrr_params.adjust,
- &vrr_params.adjust,
- sizeof(vrr_params.adjust)) != 0);
-
new_crtc_state->freesync_vrr_info_changed |=
(memcmp(&new_crtc_state->vrr_infopacket,
&vrr_infopacket,
@@ -8860,8 +9610,8 @@ static void update_freesync_state_on_stream(
acrtc->dm_irq_params.vrr_params = vrr_params;
new_crtc_state->vrr_infopacket = vrr_infopacket;
- new_stream->adjust = acrtc->dm_irq_params.vrr_params.adjust;
new_stream->vrr_infopacket = vrr_infopacket;
+ new_stream->allow_freesync = mod_freesync_get_freesync_enabled(&vrr_params);
if (new_crtc_state->freesync_vrr_info_changed)
DRM_DEBUG_KMS("VRR packet update: crtc=%u enabled=%d state=%d",
@@ -8923,10 +9673,6 @@ static void update_stream_irq_parameters(
new_stream,
&config, &vrr_params);
- new_crtc_state->freesync_timing_changed |=
- (memcmp(&acrtc->dm_irq_params.vrr_params.adjust,
- &vrr_params.adjust, sizeof(vrr_params.adjust)) != 0);
-
new_crtc_state->freesync_config = config;
/* Copy state for access from DM IRQ handler */
acrtc->dm_irq_params.freesync_config = config;
@@ -8938,8 +9684,8 @@ static void update_stream_irq_parameters(
static void amdgpu_dm_handle_vrr_transition(struct dm_crtc_state *old_state,
struct dm_crtc_state *new_state)
{
- bool old_vrr_active = amdgpu_dm_vrr_active(old_state);
- bool new_vrr_active = amdgpu_dm_vrr_active(new_state);
+ bool old_vrr_active = amdgpu_dm_crtc_vrr_active(old_state);
+ bool new_vrr_active = amdgpu_dm_crtc_vrr_active(new_state);
if (!old_vrr_active && new_vrr_active) {
/* Transition VRR inactive -> active:
@@ -8950,17 +9696,17 @@ static void amdgpu_dm_handle_vrr_transition(struct dm_crtc_state *old_state,
* We also need vupdate irq for the actual core vblank handling
* at end of vblank.
*/
- dm_set_vupdate_irq(new_state->base.crtc, true);
- drm_crtc_vblank_get(new_state->base.crtc);
- DRM_DEBUG_DRIVER("%s: crtc=%u VRR off->on: Get vblank ref\n",
+ WARN_ON(amdgpu_dm_crtc_set_vupdate_irq(new_state->base.crtc, true) != 0);
+ WARN_ON(drm_crtc_vblank_get(new_state->base.crtc) != 0);
+ drm_dbg_driver(new_state->base.crtc->dev, "%s: crtc=%u VRR off->on: Get vblank ref\n",
__func__, new_state->base.crtc->base.id);
} else if (old_vrr_active && !new_vrr_active) {
/* Transition VRR active -> inactive:
* Allow vblank irq disable again for fixed refresh rate.
*/
- dm_set_vupdate_irq(new_state->base.crtc, false);
+ WARN_ON(amdgpu_dm_crtc_set_vupdate_irq(new_state->base.crtc, false) != 0);
drm_crtc_vblank_put(new_state->base.crtc);
- DRM_DEBUG_DRIVER("%s: crtc=%u VRR on->off: Drop vblank ref\n",
+ drm_dbg_driver(new_state->base.crtc->dev, "%s: crtc=%u VRR on->off: Drop vblank ref\n",
__func__, new_state->base.crtc->base.id);
}
}
@@ -8977,18 +9723,148 @@ static void amdgpu_dm_commit_cursors(struct drm_atomic_state *state)
*/
for_each_old_plane_in_state(state, plane, old_plane_state, i)
if (plane->type == DRM_PLANE_TYPE_CURSOR)
- handle_cursor_update(plane, old_plane_state);
+ amdgpu_dm_plane_handle_cursor_update(plane, old_plane_state);
+}
+
+static inline uint32_t get_mem_type(struct drm_framebuffer *fb)
+{
+ struct amdgpu_bo *abo = gem_to_amdgpu_bo(fb->obj[0]);
+
+ return abo->tbo.resource ? abo->tbo.resource->mem_type : 0;
+}
+
+static void amdgpu_dm_update_cursor(struct drm_plane *plane,
+ struct drm_plane_state *old_plane_state,
+ struct dc_stream_update *update)
+{
+ struct amdgpu_device *adev = drm_to_adev(plane->dev);
+ struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb);
+ struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc;
+ struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL;
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ uint64_t address = afb ? afb->address : 0;
+ struct dc_cursor_position position = {0};
+ struct dc_cursor_attributes attributes;
+ int ret;
+
+ if (!plane->state->fb && !old_plane_state->fb)
+ return;
+
+ drm_dbg_atomic(plane->dev, "crtc_id=%d with size %d to %d\n",
+ amdgpu_crtc->crtc_id, plane->state->crtc_w,
+ plane->state->crtc_h);
+
+ ret = amdgpu_dm_plane_get_cursor_position(plane, crtc, &position);
+ if (ret)
+ return;
+
+ if (!position.enable) {
+ /* turn off cursor */
+ if (crtc_state && crtc_state->stream) {
+ dc_stream_set_cursor_position(crtc_state->stream,
+ &position);
+ update->cursor_position = &crtc_state->stream->cursor_position;
+ }
+ return;
+ }
+
+ amdgpu_crtc->cursor_width = plane->state->crtc_w;
+ amdgpu_crtc->cursor_height = plane->state->crtc_h;
+
+ memset(&attributes, 0, sizeof(attributes));
+ attributes.address.high_part = upper_32_bits(address);
+ attributes.address.low_part = lower_32_bits(address);
+ attributes.width = plane->state->crtc_w;
+ attributes.height = plane->state->crtc_h;
+ attributes.color_format = CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA;
+ attributes.rotation_angle = 0;
+ attributes.attribute_flags.value = 0;
+
+ /* Enable cursor degamma ROM on DCN3+ for implicit sRGB degamma in DRM
+ * legacy gamma setup.
+ */
+ if (crtc_state->cm_is_degamma_srgb &&
+ adev->dm.dc->caps.color.dpp.gamma_corr)
+ attributes.attribute_flags.bits.ENABLE_CURSOR_DEGAMMA = 1;
+
+ if (afb)
+ attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0];
+
+ if (crtc_state->stream) {
+ if (!dc_stream_set_cursor_attributes(crtc_state->stream,
+ &attributes))
+ drm_err(adev_to_drm(adev), "DC failed to set cursor attributes\n");
+
+ update->cursor_attributes = &crtc_state->stream->cursor_attributes;
+
+ if (!dc_stream_set_cursor_position(crtc_state->stream,
+ &position))
+ drm_err(adev_to_drm(adev), "DC failed to set cursor position\n");
+
+ update->cursor_position = &crtc_state->stream->cursor_position;
+ }
+}
+
+static void amdgpu_dm_enable_self_refresh(struct amdgpu_crtc *acrtc_attach,
+ const struct dm_crtc_state *acrtc_state,
+ const u64 current_ts)
+{
+ struct psr_settings *psr = &acrtc_state->stream->link->psr_settings;
+ struct replay_settings *pr = &acrtc_state->stream->link->replay_settings;
+ struct amdgpu_dm_connector *aconn =
+ (struct amdgpu_dm_connector *)acrtc_state->stream->dm_stream_context;
+ bool vrr_active = amdgpu_dm_crtc_vrr_active(acrtc_state);
+
+ if (acrtc_state->update_type > UPDATE_TYPE_FAST) {
+ if (pr->config.replay_supported && !pr->replay_feature_enabled)
+ amdgpu_dm_link_setup_replay(acrtc_state->stream->link, aconn);
+ else if (psr->psr_version != DC_PSR_VERSION_UNSUPPORTED &&
+ !psr->psr_feature_enabled)
+ if (!aconn->disallow_edp_enter_psr)
+ amdgpu_dm_link_setup_psr(acrtc_state->stream);
+ }
+
+ /* Decrement skip count when SR is enabled and we're doing fast updates. */
+ if (acrtc_state->update_type == UPDATE_TYPE_FAST &&
+ (psr->psr_feature_enabled || pr->config.replay_supported)) {
+ if (aconn->sr_skip_count > 0)
+ aconn->sr_skip_count--;
+
+ /* Allow SR when skip count is 0. */
+ acrtc_attach->dm_irq_params.allow_sr_entry = !aconn->sr_skip_count;
+
+ /*
+ * If sink supports PSR SU/Panel Replay, there is no need to rely on
+ * a vblank event disable request to enable PSR/RP. PSR SU/RP
+ * can be enabled immediately once OS demonstrates an
+ * adequate number of fast atomic commits to notify KMD
+ * of update events. See `vblank_control_worker()`.
+ */
+ if (!vrr_active &&
+ acrtc_attach->dm_irq_params.allow_sr_entry &&
+#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
+ !amdgpu_dm_crc_window_is_activated(acrtc_state->base.crtc) &&
+#endif
+ (current_ts - psr->psr_dirty_rects_change_timestamp_ns) > 500000000) {
+ if (pr->replay_feature_enabled && !pr->replay_allow_active)
+ amdgpu_dm_replay_enable(acrtc_state->stream, true);
+ if (psr->psr_version == DC_PSR_VERSION_SU_1 &&
+ !psr->psr_allow_active && !aconn->disallow_edp_enter_psr)
+ amdgpu_dm_psr_enable(acrtc_state->stream);
+ }
+ } else {
+ acrtc_attach->dm_irq_params.allow_sr_entry = false;
+ }
}
static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
- struct dc_state *dc_state,
struct drm_device *dev,
struct amdgpu_display_manager *dm,
struct drm_crtc *pcrtc,
bool wait_for_vblank)
{
- uint32_t i;
- uint64_t timestamp_ns;
+ u32 i;
+ u64 timestamp_ns = ktime_get_ns();
struct drm_plane *plane;
struct drm_plane_state *old_plane_state, *new_plane_state;
struct amdgpu_crtc *acrtc_attach = to_amdgpu_crtc(pcrtc);
@@ -8998,12 +9874,13 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
struct dm_crtc_state *dm_old_crtc_state =
to_dm_crtc_state(drm_atomic_get_old_crtc_state(state, pcrtc));
int planes_count = 0, vpos, hpos;
- long r;
unsigned long flags;
- struct amdgpu_bo *abo;
- uint32_t target_vblank, last_flip_vblank;
- bool vrr_active = amdgpu_dm_vrr_active(acrtc_state);
+ u32 target_vblank, last_flip_vblank;
+ bool vrr_active = amdgpu_dm_crtc_vrr_active(acrtc_state);
+ bool cursor_update = false;
bool pflip_present = false;
+ bool dirty_rects_changed = false;
+ bool updated_planes_and_streams = false;
struct {
struct dc_surface_update surface_updates[MAX_SURFACES];
struct dc_plane_info plane_infos[MAX_SURFACES];
@@ -9015,7 +9892,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
bundle = kzalloc(sizeof(*bundle), GFP_KERNEL);
if (!bundle) {
- dm_error("Failed to allocate update bundle\n");
+ drm_err(dev, "Failed to allocate update bundle\n");
goto cleanup;
}
@@ -9023,8 +9900,24 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
* Disable the cursor first if we're disabling all the planes.
* It'll remain on the screen after the planes are re-enabled
* if we don't.
+ *
+ * If the cursor is transitioning from native to overlay mode, the
+ * native cursor needs to be disabled first.
*/
- if (acrtc_state->active_planes == 0)
+ if (acrtc_state->cursor_mode == DM_CURSOR_OVERLAY_MODE &&
+ dm_old_crtc_state->cursor_mode == DM_CURSOR_NATIVE_MODE) {
+ struct dc_cursor_position cursor_position = {0};
+
+ if (!dc_stream_set_cursor_position(acrtc_state->stream,
+ &cursor_position))
+ drm_err(dev, "DC failed to disable native cursor\n");
+
+ bundle->stream_update.cursor_position =
+ &acrtc_state->stream->cursor_position;
+ }
+
+ if (acrtc_state->active_planes == 0 &&
+ dm_old_crtc_state->cursor_mode == DM_CURSOR_NATIVE_MODE)
amdgpu_dm_commit_cursors(state);
/* update planes when needed */
@@ -9038,8 +9931,17 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
struct dm_plane_state *dm_new_plane_state = to_dm_plane_state(new_plane_state);
/* Cursor plane is handled after stream updates */
- if (plane->type == DRM_PLANE_TYPE_CURSOR)
+ if (plane->type == DRM_PLANE_TYPE_CURSOR &&
+ acrtc_state->cursor_mode == DM_CURSOR_NATIVE_MODE) {
+ if ((fb && crtc == pcrtc) ||
+ (old_plane_state->fb && old_plane_state->crtc == pcrtc)) {
+ cursor_update = true;
+ if (amdgpu_ip_version(dm->adev, DCE_HWIP, 0) != 0)
+ amdgpu_dm_update_cursor(plane, old_plane_state, &bundle->stream_update);
+ }
+
continue;
+ }
if (!fb || !crtc || pcrtc != crtc)
continue;
@@ -9049,15 +9951,21 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
continue;
dc_plane = dm_new_plane_state->dc_state;
+ if (!dc_plane)
+ continue;
bundle->surface_updates[planes_count].surface = dc_plane;
if (new_pcrtc_state->color_mgmt_changed) {
- bundle->surface_updates[planes_count].gamma = dc_plane->gamma_correction;
- bundle->surface_updates[planes_count].in_transfer_func = dc_plane->in_transfer_func;
+ bundle->surface_updates[planes_count].gamma = &dc_plane->gamma_correction;
+ bundle->surface_updates[planes_count].in_transfer_func = &dc_plane->in_transfer_func;
bundle->surface_updates[planes_count].gamut_remap_matrix = &dc_plane->gamut_remap_matrix;
+ bundle->surface_updates[planes_count].hdr_mult = dc_plane->hdr_mult;
+ bundle->surface_updates[planes_count].func_shaper = &dc_plane->in_shaper_func;
+ bundle->surface_updates[planes_count].lut3d_func = &dc_plane->lut3d_func;
+ bundle->surface_updates[planes_count].blend_tf = &dc_plane->blend_tf;
}
- fill_dc_scaling_info(dm->adev, new_plane_state,
+ amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state,
&bundle->scaling_infos[planes_count]);
bundle->surface_updates[planes_count].scaling_info =
@@ -9072,39 +9980,69 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
continue;
}
- abo = gem_to_amdgpu_bo(fb->obj[0]);
-
- /*
- * Wait for all fences on this FB. Do limited wait to avoid
- * deadlock during GPU reset when this fence will not signal
- * but we hold reservation lock for the BO.
- */
- r = dma_resv_wait_timeout(abo->tbo.base.resv, true, false,
- msecs_to_jiffies(5000));
- if (unlikely(r <= 0))
- DRM_ERROR("Waiting for fences timed out!");
-
fill_dc_plane_info_and_addr(
dm->adev, new_plane_state,
afb->tiling_flags,
&bundle->plane_infos[planes_count],
&bundle->flip_addrs[planes_count].address,
- afb->tmz_surface, false);
+ afb->tmz_surface);
- DRM_DEBUG_ATOMIC("plane: id=%d dcc_en=%d\n",
+ drm_dbg_state(state->dev, "plane: id=%d dcc_en=%d\n",
new_plane_state->plane->index,
bundle->plane_infos[planes_count].dcc.enable);
bundle->surface_updates[planes_count].plane_info =
&bundle->plane_infos[planes_count];
+ if (acrtc_state->stream->link->psr_settings.psr_feature_enabled ||
+ acrtc_state->stream->link->replay_settings.replay_feature_enabled) {
+ fill_dc_dirty_rects(plane, old_plane_state,
+ new_plane_state, new_crtc_state,
+ &bundle->flip_addrs[planes_count],
+ acrtc_state->stream->link->psr_settings.psr_version ==
+ DC_PSR_VERSION_SU_1,
+ &dirty_rects_changed);
+
+ /*
+ * If the dirty regions changed, PSR-SU need to be disabled temporarily
+ * and enabled it again after dirty regions are stable to avoid video glitch.
+ * PSR-SU will be enabled in vblank_control_worker() if user pause the video
+ * during the PSR-SU was disabled.
+ */
+ if (acrtc_state->stream->link->psr_settings.psr_version >= DC_PSR_VERSION_SU_1 &&
+ acrtc_attach->dm_irq_params.allow_sr_entry &&
+#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
+ !amdgpu_dm_crc_window_is_activated(acrtc_state->base.crtc) &&
+#endif
+ dirty_rects_changed) {
+ mutex_lock(&dm->dc_lock);
+ acrtc_state->stream->link->psr_settings.psr_dirty_rects_change_timestamp_ns =
+ timestamp_ns;
+ if (acrtc_state->stream->link->psr_settings.psr_allow_active)
+ amdgpu_dm_psr_disable(acrtc_state->stream, true);
+ mutex_unlock(&dm->dc_lock);
+ }
+ }
+
/*
* Only allow immediate flips for fast updates that don't
- * change FB pitch, DCC state, rotation or mirroing.
+ * change memory domain, FB pitch, DCC state, rotation or
+ * mirroring.
+ *
+ * dm_crtc_helper_atomic_check() only accepts async flips with
+ * fast updates.
*/
+ if (crtc->state->async_flip &&
+ (acrtc_state->update_type != UPDATE_TYPE_FAST ||
+ get_mem_type(old_plane_state->fb) != get_mem_type(fb)))
+ drm_warn_once(state->dev,
+ "[PLANE:%d:%s] async flip with non-fast update\n",
+ plane->base.id, plane->name);
+
bundle->flip_addrs[planes_count].flip_immediate =
crtc->state->async_flip &&
- acrtc_state->update_type == UPDATE_TYPE_FAST;
+ acrtc_state->update_type == UPDATE_TYPE_FAST &&
+ get_mem_type(old_plane_state->fb) == get_mem_type(fb);
timestamp_ns = ktime_get_ns();
bundle->flip_addrs[planes_count].flip_timestamp_in_us = div_u64(timestamp_ns, 1000);
@@ -9112,7 +10050,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
bundle->surface_updates[planes_count].surface = dc_plane;
if (!bundle->surface_updates[planes_count].surface) {
- DRM_ERROR("No surface for CRTC: id=%d\n",
+ drm_err(dev, "No surface for CRTC: id=%d\n",
acrtc_attach->crtc_id);
continue;
}
@@ -9125,7 +10063,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
dc_plane,
bundle->flip_addrs[planes_count].flip_timestamp_in_us);
- DRM_DEBUG_ATOMIC("%s Flipping to hi: 0x%x, low: 0x%x\n",
+ drm_dbg_state(state->dev, "%s Flipping to hi: 0x%x, low: 0x%x\n",
__func__,
bundle->flip_addrs[planes_count].address.grph.addr.high_part,
bundle->flip_addrs[planes_count].address.grph.addr.low_part);
@@ -9143,8 +10081,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
* DRI3/Present extension with defined target_msc.
*/
last_flip_vblank = amdgpu_get_vblank_counter_kms(pcrtc);
- }
- else {
+ } else {
/* For variable refresh rate mode only:
* Get vblank of last completed flip to avoid > 1 vrr
* flips per video frame by use of throttling, but allow
@@ -9184,8 +10121,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
* and rely on sending it from software.
*/
if (acrtc_attach->base.state->event &&
- acrtc_state->active_planes > 0 &&
- !acrtc_state->force_dpms_off) {
+ acrtc_state->active_planes > 0) {
drm_crtc_vblank_get(pcrtc);
spin_lock_irqsave(&pcrtc->dev->event_lock, flags);
@@ -9201,19 +10137,25 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
bundle->stream_update.vrr_infopacket =
&acrtc_state->stream->vrr_infopacket;
}
+ } else if (cursor_update && acrtc_state->active_planes > 0) {
+ spin_lock_irqsave(&pcrtc->dev->event_lock, flags);
+ if (acrtc_attach->base.state->event) {
+ drm_crtc_vblank_get(pcrtc);
+ acrtc_attach->event = acrtc_attach->base.state->event;
+ acrtc_attach->base.state->event = NULL;
+ }
+ spin_unlock_irqrestore(&pcrtc->dev->event_lock, flags);
}
/* Update the planes if changed or disable if we don't have any. */
if ((planes_count || acrtc_state->active_planes == 0) &&
acrtc_state->stream) {
-#if defined(CONFIG_DRM_AMD_DC_DCN)
/*
* If PSR or idle optimizations are enabled then flush out
* any pending work before hardware programming.
*/
if (dm->vblank_control_workqueue)
flush_workqueue(dm->vblank_control_workqueue);
-#endif
bundle->stream_update.stream = acrtc_state->stream;
if (new_pcrtc_state->mode_changed) {
@@ -9231,13 +10173,26 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
bundle->stream_update.output_csc_transform =
&acrtc_state->stream->csc_color_matrix;
bundle->stream_update.out_transfer_func =
- acrtc_state->stream->out_transfer_func;
+ &acrtc_state->stream->out_transfer_func;
+ bundle->stream_update.lut3d_func =
+ (struct dc_3dlut *) acrtc_state->stream->lut3d_func;
+ bundle->stream_update.func_shaper =
+ (struct dc_transfer_func *) acrtc_state->stream->func_shaper;
}
acrtc_state->stream->abm_level = acrtc_state->abm_level;
if (acrtc_state->abm_level != dm_old_crtc_state->abm_level)
bundle->stream_update.abm_level = &acrtc_state->abm_level;
+ mutex_lock(&dm->dc_lock);
+ if ((acrtc_state->update_type > UPDATE_TYPE_FAST) || vrr_active) {
+ if (acrtc_state->stream->link->replay_settings.replay_allow_active)
+ amdgpu_dm_replay_disable(acrtc_state->stream);
+ if (acrtc_state->stream->link->psr_settings.psr_allow_active)
+ amdgpu_dm_psr_disable(acrtc_state->stream, true);
+ }
+ mutex_unlock(&dm->dc_lock);
+
/*
* If FreeSync state on the stream has changed then we need to
* re-adjust the min/max bounds now that DC doesn't handle this
@@ -9251,16 +10206,13 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
spin_unlock_irqrestore(&pcrtc->dev->event_lock, flags);
}
mutex_lock(&dm->dc_lock);
- if ((acrtc_state->update_type > UPDATE_TYPE_FAST) &&
- acrtc_state->stream->link->psr_settings.psr_allow_active)
- amdgpu_dm_psr_disable(acrtc_state->stream);
-
- dc_commit_updates_for_stream(dm->dc,
- bundle->surface_updates,
- planes_count,
- acrtc_state->stream,
- &bundle->stream_update,
- dc_state);
+ update_planes_and_stream_adapter(dm->dc,
+ acrtc_state->update_type,
+ planes_count,
+ acrtc_state->stream,
+ &bundle->stream_update,
+ bundle->surface_updates);
+ updated_planes_and_streams = true;
/**
* Enable or disable the interrupts on the backend.
@@ -9279,26 +10231,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
dm_update_pflip_irq_state(drm_to_adev(dev),
acrtc_attach);
- if ((acrtc_state->update_type > UPDATE_TYPE_FAST) &&
- acrtc_state->stream->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED &&
- !acrtc_state->stream->link->psr_settings.psr_feature_enabled)
- amdgpu_dm_link_setup_psr(acrtc_state->stream);
-
- /* Decrement skip count when PSR is enabled and we're doing fast updates. */
- if (acrtc_state->update_type == UPDATE_TYPE_FAST &&
- acrtc_state->stream->link->psr_settings.psr_feature_enabled) {
- struct amdgpu_dm_connector *aconn =
- (struct amdgpu_dm_connector *)acrtc_state->stream->dm_stream_context;
-
- if (aconn->psr_skip_count > 0)
- aconn->psr_skip_count--;
-
- /* Allow PSR when skip count is 0. */
- acrtc_attach->dm_irq_params.allow_psr_entry = !aconn->psr_skip_count;
- } else {
- acrtc_attach->dm_irq_params.allow_psr_entry = false;
- }
-
+ amdgpu_dm_enable_self_refresh(acrtc_attach, acrtc_state, timestamp_ns);
mutex_unlock(&dm->dc_lock);
}
@@ -9307,7 +10240,9 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
* This avoids redundant programming in the case where we're going
* to be disabling a single plane - those pipes are being disabled.
*/
- if (acrtc_state->active_planes)
+ if (acrtc_state->active_planes &&
+ (!updated_planes_and_streams || amdgpu_ip_version(dm->adev, DCE_HWIP, 0) == 0) &&
+ acrtc_state->cursor_mode == DM_CURSOR_NATIVE_MODE)
amdgpu_dm_commit_cursors(state);
cleanup:
@@ -9345,7 +10280,10 @@ static void amdgpu_dm_commit_audio(struct drm_device *dev,
if (!drm_atomic_crtc_needs_modeset(new_crtc_state))
continue;
- notify:
+notify:
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
aconnector = to_amdgpu_dm_connector(connector);
mutex_lock(&adev->dm.audio_lock);
@@ -9378,6 +10316,9 @@ static void amdgpu_dm_commit_audio(struct drm_device *dev,
if (!status)
continue;
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
aconnector = to_amdgpu_dm_connector(connector);
mutex_lock(&adev->dm.audio_lock);
@@ -9403,49 +10344,57 @@ static void amdgpu_dm_crtc_copy_transient_flags(struct drm_crtc_state *crtc_stat
stream_state->mode_changed = drm_atomic_crtc_needs_modeset(crtc_state);
}
-/**
- * amdgpu_dm_atomic_commit_tail() - AMDgpu DM's commit tail implementation.
- * @state: The atomic state to commit
- *
- * This will tell DC to commit the constructed DC state from atomic_check,
- * programming the hardware. Any failures here implies a hardware failure, since
- * atomic check should have filtered anything non-kosher.
- */
-static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
+static void dm_clear_writeback(struct amdgpu_display_manager *dm,
+ struct dm_crtc_state *crtc_state)
+{
+ dc_stream_remove_writeback(dm->dc, crtc_state->stream, 0);
+}
+
+static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
+ struct dc_state *dc_state)
{
struct drm_device *dev = state->dev;
struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_display_manager *dm = &adev->dm;
- struct dm_atomic_state *dm_state;
- struct dc_state *dc_state = NULL, *dc_state_temp = NULL;
- uint32_t i, j;
struct drm_crtc *crtc;
struct drm_crtc_state *old_crtc_state, *new_crtc_state;
- unsigned long flags;
- bool wait_for_vblank = true;
- struct drm_connector *connector;
- struct drm_connector_state *old_con_state, *new_con_state;
struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state;
- int crtc_disable_count = 0;
+ struct drm_connector_state *old_con_state;
+ struct drm_connector *connector;
bool mode_set_reset_required = false;
+ u32 i;
+ struct dc_commit_streams_params params = {dc_state->streams, dc_state->stream_count};
+ bool set_backlight_level = false;
- trace_amdgpu_dm_atomic_commit_tail_begin(state);
+ /* Disable writeback */
+ for_each_old_connector_in_state(state, connector, old_con_state, i) {
+ struct dm_connector_state *dm_old_con_state;
+ struct amdgpu_crtc *acrtc;
- drm_atomic_helper_update_legacy_modeset_state(dev, state);
+ if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
- dm_state = dm_atomic_get_new_state(state);
- if (dm_state && dm_state->context) {
- dc_state = dm_state->context;
- } else {
- /* No state changes, retain current state. */
- dc_state_temp = dc_create_state(dm->dc);
- ASSERT(dc_state_temp);
- dc_state = dc_state_temp;
- dc_resource_state_copy_construct_current(dm->dc, dc_state);
+ old_crtc_state = NULL;
+
+ dm_old_con_state = to_dm_connector_state(old_con_state);
+ if (!dm_old_con_state->base.crtc)
+ continue;
+
+ acrtc = to_amdgpu_crtc(dm_old_con_state->base.crtc);
+ if (acrtc)
+ old_crtc_state = drm_atomic_get_old_crtc_state(state, &acrtc->base);
+
+ if (!acrtc || !acrtc->wb_enabled)
+ continue;
+
+ dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
+
+ dm_clear_writeback(dm, dm_old_crtc_state);
+ acrtc->wb_enabled = false;
}
- for_each_oldnew_crtc_in_state (state, crtc, old_crtc_state,
- new_crtc_state, i) {
+ for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state,
+ new_crtc_state, i) {
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
@@ -9453,7 +10402,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
if (old_crtc_state->active &&
(!new_crtc_state->active ||
drm_atomic_crtc_needs_modeset(new_crtc_state))) {
- manage_dm_interrupts(adev, acrtc, false);
+ manage_dm_interrupts(adev, acrtc, NULL);
dc_stream_release(dm_old_crtc_state->stream);
}
}
@@ -9467,10 +10416,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
- DRM_DEBUG_ATOMIC(
- "amdgpu_crtc id:%d crtc_state_flags: enable:%d, active:%d, "
- "planes_changed:%d, mode_changed:%d,active_changed:%d,"
- "connectors_changed:%d\n",
+ drm_dbg_state(state->dev,
+ "amdgpu_crtc id:%d crtc_state_flags: enable:%d, active:%d, planes_changed:%d, mode_changed:%d,active_changed:%d,connectors_changed:%d\n",
acrtc->crtc_id,
new_crtc_state->enable,
new_crtc_state->active,
@@ -9485,7 +10432,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
memset(&position, 0, sizeof(position));
mutex_lock(&dm->dc_lock);
- dc_stream_set_cursor_position(dm_old_crtc_state->stream, &position);
+ dc_exit_ips_for_hw_access(dm->dc);
+ dc_stream_program_cursor_position(dm_old_crtc_state->stream, &position);
mutex_unlock(&dm->dc_lock);
}
@@ -9499,9 +10447,11 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
* aconnector as needed
*/
- if (modeset_required(new_crtc_state, dm_new_crtc_state->stream, dm_old_crtc_state->stream)) {
+ if (amdgpu_dm_crtc_modeset_required(new_crtc_state, dm_new_crtc_state->stream, dm_old_crtc_state->stream)) {
- DRM_DEBUG_ATOMIC("Atomic commit: SET crtc id %d: [%p]\n", acrtc->crtc_id, acrtc);
+ drm_dbg_atomic(dev,
+ "Atomic commit: SET crtc id %d: [%p]\n",
+ acrtc->crtc_id, acrtc);
if (!dm_new_crtc_state->stream) {
/*
@@ -9519,8 +10469,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
* have a sink to keep the pipe running so that
* hw state is consistent with the sw state
*/
- DRM_DEBUG_DRIVER("%s: Failed to create new stream for crtc %d\n",
- __func__, acrtc->base.base.id);
+ drm_dbg_atomic(dev,
+ "Failed to create new stream for crtc %d\n",
+ acrtc->base.base.id);
continue;
}
@@ -9533,8 +10484,11 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
acrtc->hw_mode = new_crtc_state->mode;
crtc->hwmode = new_crtc_state->mode;
mode_set_reset_required = true;
+ set_backlight_level = true;
} else if (modereset_required(new_crtc_state)) {
- DRM_DEBUG_ATOMIC("Atomic commit: RESET. crtc id %d:[%p]\n", acrtc->crtc_id, acrtc);
+ drm_dbg_atomic(dev,
+ "Atomic commit: RESET. crtc id %d:[%p]\n",
+ acrtc->crtc_id, acrtc);
/* i.e. reset mode */
if (dm_old_crtc_state->stream)
remove_stream(adev, acrtc, dm_old_crtc_state->stream);
@@ -9543,27 +10497,25 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
}
} /* for_each_crtc_in_state() */
- if (dc_state) {
- /* if there mode set or reset, disable eDP PSR */
- if (mode_set_reset_required) {
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (dm->vblank_control_workqueue)
- flush_workqueue(dm->vblank_control_workqueue);
-#endif
- amdgpu_dm_psr_disable_all(dm);
- }
+ /* if there mode set or reset, disable eDP PSR, Replay */
+ if (mode_set_reset_required) {
+ if (dm->vblank_control_workqueue)
+ flush_workqueue(dm->vblank_control_workqueue);
- dm_enable_per_frame_crtc_master_sync(dc_state);
- mutex_lock(&dm->dc_lock);
- WARN_ON(!dc_commit_state(dm->dc, dc_state));
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- /* Allow idle optimization when vblank count is 0 for display off */
- if (dm->active_vblank_irq_count == 0)
- dc_allow_idle_optimizations(dm->dc,true);
-#endif
- mutex_unlock(&dm->dc_lock);
+ amdgpu_dm_replay_disable_all(dm);
+ amdgpu_dm_psr_disable_all(dm);
}
+ dm_enable_per_frame_crtc_master_sync(dc_state);
+ mutex_lock(&dm->dc_lock);
+ dc_exit_ips_for_hw_access(dm->dc);
+ WARN_ON(!dc_commit_streams(dm->dc, &params));
+
+ /* Allow idle optimization when vblank count is 0 for display off */
+ if ((dm->active_vblank_irq_count == 0) && amdgpu_dm_is_headless(dm->adev))
+ dc_allow_idle_optimizations(dm->dc, true);
+ mutex_unlock(&dm->dc_lock);
+
for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
@@ -9574,24 +10526,192 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
dc_stream_get_status(dm_new_crtc_state->stream);
if (!status)
- status = dc_stream_get_status_from_state(dc_state,
+ status = dc_state_get_stream_status(dc_state,
dm_new_crtc_state->stream);
if (!status)
- DC_ERR("got no status for stream %p on acrtc%p\n", dm_new_crtc_state->stream, acrtc);
+ drm_err(dev,
+ "got no status for stream %p on acrtc%p\n",
+ dm_new_crtc_state->stream, acrtc);
else
acrtc->otg_inst = status->primary_otg_inst;
}
}
-#ifdef CONFIG_DRM_AMD_DC_HDCP
+
+ /* During boot up and resume the DC layer will reset the panel brightness
+ * to fix a flicker issue.
+ * It will cause the dm->actual_brightness is not the current panel brightness
+ * level. (the dm->brightness is the correct panel level)
+ * So we set the backlight level with dm->brightness value after set mode
+ */
+ if (set_backlight_level) {
+ for (i = 0; i < dm->num_of_edps; i++) {
+ if (dm->backlight_dev[i])
+ amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]);
+ }
+ }
+}
+
+static void dm_set_writeback(struct amdgpu_display_manager *dm,
+ struct dm_crtc_state *crtc_state,
+ struct drm_connector *connector,
+ struct drm_connector_state *new_con_state)
+{
+ struct drm_writeback_connector *wb_conn = drm_connector_to_writeback(connector);
+ struct amdgpu_device *adev = dm->adev;
+ struct amdgpu_crtc *acrtc;
+ struct dc_writeback_info *wb_info;
+ struct pipe_ctx *pipe = NULL;
+ struct amdgpu_framebuffer *afb;
+ int i = 0;
+
+ wb_info = kzalloc(sizeof(*wb_info), GFP_KERNEL);
+ if (!wb_info) {
+ drm_err(adev_to_drm(adev), "Failed to allocate wb_info\n");
+ return;
+ }
+
+ acrtc = to_amdgpu_crtc(wb_conn->encoder.crtc);
+ if (!acrtc) {
+ drm_err(adev_to_drm(adev), "no amdgpu_crtc found\n");
+ kfree(wb_info);
+ return;
+ }
+
+ afb = to_amdgpu_framebuffer(new_con_state->writeback_job->fb);
+ if (!afb) {
+ drm_err(adev_to_drm(adev), "No amdgpu_framebuffer found\n");
+ kfree(wb_info);
+ return;
+ }
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ if (dm->dc->current_state->res_ctx.pipe_ctx[i].stream == crtc_state->stream) {
+ pipe = &dm->dc->current_state->res_ctx.pipe_ctx[i];
+ break;
+ }
+ }
+
+ /* fill in wb_info */
+ wb_info->wb_enabled = true;
+
+ wb_info->dwb_pipe_inst = 0;
+ wb_info->dwb_params.dwbscl_black_color = 0;
+ wb_info->dwb_params.hdr_mult = 0x1F000;
+ wb_info->dwb_params.csc_params.gamut_adjust_type = CM_GAMUT_ADJUST_TYPE_BYPASS;
+ wb_info->dwb_params.csc_params.gamut_coef_format = CM_GAMUT_REMAP_COEF_FORMAT_S2_13;
+ wb_info->dwb_params.output_depth = DWB_OUTPUT_PIXEL_DEPTH_10BPC;
+ wb_info->dwb_params.cnv_params.cnv_out_bpc = DWB_CNV_OUT_BPC_10BPC;
+
+ /* width & height from crtc */
+ wb_info->dwb_params.cnv_params.src_width = acrtc->base.mode.crtc_hdisplay;
+ wb_info->dwb_params.cnv_params.src_height = acrtc->base.mode.crtc_vdisplay;
+ wb_info->dwb_params.dest_width = acrtc->base.mode.crtc_hdisplay;
+ wb_info->dwb_params.dest_height = acrtc->base.mode.crtc_vdisplay;
+
+ wb_info->dwb_params.cnv_params.crop_en = false;
+ wb_info->dwb_params.stereo_params.stereo_enabled = false;
+
+ wb_info->dwb_params.cnv_params.out_max_pix_val = 0x3ff; // 10 bits
+ wb_info->dwb_params.cnv_params.out_min_pix_val = 0;
+ wb_info->dwb_params.cnv_params.fc_out_format = DWB_OUT_FORMAT_32BPP_ARGB;
+ wb_info->dwb_params.cnv_params.out_denorm_mode = DWB_OUT_DENORM_BYPASS;
+
+ wb_info->dwb_params.out_format = dwb_scaler_mode_bypass444;
+
+ wb_info->dwb_params.capture_rate = dwb_capture_rate_0;
+
+ wb_info->dwb_params.scaler_taps.h_taps = 4;
+ wb_info->dwb_params.scaler_taps.v_taps = 4;
+ wb_info->dwb_params.scaler_taps.h_taps_c = 2;
+ wb_info->dwb_params.scaler_taps.v_taps_c = 2;
+ wb_info->dwb_params.subsample_position = DWB_INTERSTITIAL_SUBSAMPLING;
+
+ wb_info->mcif_buf_params.luma_pitch = afb->base.pitches[0];
+ wb_info->mcif_buf_params.chroma_pitch = afb->base.pitches[1];
+
+ for (i = 0; i < DWB_MCIF_BUF_COUNT; i++) {
+ wb_info->mcif_buf_params.luma_address[i] = afb->address;
+ wb_info->mcif_buf_params.chroma_address[i] = 0;
+ }
+
+ wb_info->mcif_buf_params.p_vmid = 1;
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0)) {
+ wb_info->mcif_warmup_params.start_address.quad_part = afb->address;
+ wb_info->mcif_warmup_params.region_size =
+ wb_info->mcif_buf_params.luma_pitch * wb_info->dwb_params.dest_height;
+ }
+ wb_info->mcif_warmup_params.p_vmid = 1;
+ wb_info->writeback_source_plane = pipe->plane_state;
+
+ dc_stream_add_writeback(dm->dc, crtc_state->stream, wb_info);
+
+ acrtc->wb_pending = true;
+ acrtc->wb_conn = wb_conn;
+ drm_writeback_queue_job(wb_conn, new_con_state);
+}
+
+static void amdgpu_dm_update_hdcp(struct drm_atomic_state *state)
+{
+ struct drm_connector_state *old_con_state, *new_con_state;
+ struct drm_device *dev = state->dev;
+ struct drm_connector *connector;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int i;
+
+ if (!adev->dm.hdcp_workqueue)
+ return;
+
for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) {
struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state);
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc);
- struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+ struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+ struct dm_crtc_state *dm_new_crtc_state;
+ struct amdgpu_dm_connector *aconnector;
+
+ if (!connector || connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
+ aconnector = to_amdgpu_dm_connector(connector);
+
+ drm_dbg(dev, "[HDCP_DM] -------------- i : %x ----------\n", i);
+
+ drm_dbg(dev, "[HDCP_DM] connector->index: %x connect_status: %x dpms: %x\n",
+ connector->index, connector->status, connector->dpms);
+ drm_dbg(dev, "[HDCP_DM] state protection old: %x new: %x\n",
+ old_con_state->content_protection, new_con_state->content_protection);
+
+ if (aconnector->dc_sink) {
+ if (aconnector->dc_sink->sink_signal != SIGNAL_TYPE_VIRTUAL &&
+ aconnector->dc_sink->sink_signal != SIGNAL_TYPE_NONE) {
+ drm_dbg(dev, "[HDCP_DM] pipe_ctx dispname=%s\n",
+ aconnector->dc_sink->edid_caps.display_name);
+ }
+ }
new_crtc_state = NULL;
+ old_crtc_state = NULL;
- if (acrtc)
+ if (acrtc) {
new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base);
+ old_crtc_state = drm_atomic_get_old_crtc_state(state, &acrtc->base);
+ }
+
+ if (old_crtc_state)
+ drm_dbg(dev, "old crtc en: %x a: %x m: %x a-chg: %x c-chg: %x\n",
+ old_crtc_state->enable,
+ old_crtc_state->active,
+ old_crtc_state->mode_changed,
+ old_crtc_state->active_changed,
+ old_crtc_state->connectors_changed);
+
+ if (new_crtc_state)
+ drm_dbg(dev, "NEW crtc en: %x a: %x m: %x a-chg: %x c-chg: %x\n",
+ new_crtc_state->enable,
+ new_crtc_state->active,
+ new_crtc_state->mode_changed,
+ new_crtc_state->active_changed,
+ new_crtc_state->connectors_changed);
+
dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
@@ -9603,26 +10723,130 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
continue;
}
- if (is_content_protection_different(new_con_state, old_con_state, connector, adev->dm.hdcp_workqueue))
- hdcp_update_display(
- adev->dm.hdcp_workqueue, aconnector->dc_link->link_index, aconnector,
- new_con_state->hdcp_content_type,
- new_con_state->content_protection == DRM_MODE_CONTENT_PROTECTION_DESIRED);
+ if (is_content_protection_different(new_crtc_state, old_crtc_state, new_con_state,
+ old_con_state, connector, adev->dm.hdcp_workqueue)) {
+ /* when display is unplugged from mst hub, connctor will
+ * be destroyed within dm_dp_mst_connector_destroy. connector
+ * hdcp perperties, like type, undesired, desired, enabled,
+ * will be lost. So, save hdcp properties into hdcp_work within
+ * amdgpu_dm_atomic_commit_tail. if the same display is
+ * plugged back with same display index, its hdcp properties
+ * will be retrieved from hdcp_work within dm_dp_mst_get_modes
+ */
+
+ bool enable_encryption = false;
+
+ if (new_con_state->content_protection == DRM_MODE_CONTENT_PROTECTION_DESIRED)
+ enable_encryption = true;
+
+ if (aconnector->dc_link && aconnector->dc_sink &&
+ aconnector->dc_link->type == dc_connection_mst_branch) {
+ struct hdcp_workqueue *hdcp_work = adev->dm.hdcp_workqueue;
+ struct hdcp_workqueue *hdcp_w =
+ &hdcp_work[aconnector->dc_link->link_index];
+
+ hdcp_w->hdcp_content_type[connector->index] =
+ new_con_state->hdcp_content_type;
+ hdcp_w->content_protection[connector->index] =
+ new_con_state->content_protection;
+ }
+
+ if (new_crtc_state && new_crtc_state->mode_changed &&
+ new_con_state->content_protection >= DRM_MODE_CONTENT_PROTECTION_DESIRED)
+ enable_encryption = true;
+
+ drm_info(dev, "[HDCP_DM] hdcp_update_display enable_encryption = %x\n", enable_encryption);
+
+ if (aconnector->dc_link)
+ hdcp_update_display(
+ adev->dm.hdcp_workqueue, aconnector->dc_link->link_index, aconnector,
+ new_con_state->hdcp_content_type, enable_encryption);
+ }
}
-#endif
+}
+
+static int amdgpu_dm_atomic_setup_commit(struct drm_atomic_state *state)
+{
+ struct drm_crtc *crtc;
+ struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+ struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state;
+ int i, ret;
+
+ ret = drm_dp_mst_atomic_setup_commit(state);
+ if (ret)
+ return ret;
+
+ for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
+ dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
+ dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+ /*
+ * Color management settings. We also update color properties
+ * when a modeset is needed, to ensure it gets reprogrammed.
+ */
+ if (dm_new_crtc_state->base.active && dm_new_crtc_state->stream &&
+ (dm_new_crtc_state->base.color_mgmt_changed ||
+ dm_old_crtc_state->regamma_tf != dm_new_crtc_state->regamma_tf ||
+ drm_atomic_crtc_needs_modeset(new_crtc_state))) {
+ ret = amdgpu_dm_update_crtc_color_mgmt(dm_new_crtc_state);
+ if (ret) {
+ drm_dbg_atomic(state->dev, "Failed to update color state\n");
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_dm_atomic_commit_tail() - AMDgpu DM's commit tail implementation.
+ * @state: The atomic state to commit
+ *
+ * This will tell DC to commit the constructed DC state from atomic_check,
+ * programming the hardware. Any failures here implies a hardware failure, since
+ * atomic check should have filtered anything non-kosher.
+ */
+static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
+{
+ struct drm_device *dev = state->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_display_manager *dm = &adev->dm;
+ struct dm_atomic_state *dm_state;
+ struct dc_state *dc_state = NULL;
+ u32 i, j;
+ struct drm_crtc *crtc;
+ struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+ unsigned long flags;
+ bool wait_for_vblank = true;
+ struct drm_connector *connector;
+ struct drm_connector_state *old_con_state = NULL, *new_con_state = NULL;
+ struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state;
+ int crtc_disable_count = 0;
+
+ trace_amdgpu_dm_atomic_commit_tail_begin(state);
+
+ drm_atomic_helper_update_legacy_modeset_state(dev, state);
+ drm_dp_mst_atomic_wait_for_dependencies(state);
+
+ dm_state = dm_atomic_get_new_state(state);
+ if (dm_state && dm_state->context) {
+ dc_state = dm_state->context;
+ amdgpu_dm_commit_streams(state, dc_state);
+ }
+
+ amdgpu_dm_update_hdcp(state);
/* Handle connector state changes */
for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) {
struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state);
struct dm_connector_state *dm_old_con_state = to_dm_connector_state(old_con_state);
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc);
- struct dc_surface_update dummy_updates[MAX_SURFACES];
+ struct dc_surface_update *dummy_updates;
struct dc_stream_update stream_update;
struct dc_info_packet hdr_packet;
struct dc_stream_status *status = NULL;
- bool abm_changed, hdr_changed, scaling_changed;
+ bool abm_changed, hdr_changed, scaling_changed, output_color_space_changed = false;
- memset(&dummy_updates, 0, sizeof(dummy_updates));
memset(&stream_update, 0, sizeof(stream_update));
if (acrtc) {
@@ -9640,13 +10864,18 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
scaling_changed = is_scaling_state_different(dm_new_con_state,
dm_old_con_state);
+ if ((new_con_state->hdmi.broadcast_rgb != old_con_state->hdmi.broadcast_rgb) &&
+ (dm_old_crtc_state->stream->output_color_space !=
+ get_output_color_space(&dm_new_crtc_state->stream->timing, new_con_state)))
+ output_color_space_changed = true;
+
abm_changed = dm_new_crtc_state->abm_level !=
dm_old_crtc_state->abm_level;
hdr_changed =
!drm_connector_atomic_hdr_metadata_equal(old_con_state, new_con_state);
- if (!scaling_changed && !abm_changed && !hdr_changed)
+ if (!scaling_changed && !abm_changed && !hdr_changed && !output_color_space_changed)
continue;
stream_update.stream = dm_new_crtc_state->stream;
@@ -9658,6 +10887,13 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
stream_update.dst = dm_new_crtc_state->stream->dst;
}
+ if (output_color_space_changed) {
+ dm_new_crtc_state->stream->output_color_space
+ = get_output_color_space(&dm_new_crtc_state->stream->timing, new_con_state);
+
+ stream_update.output_color_space = &dm_new_crtc_state->stream->output_color_space;
+ }
+
if (abm_changed) {
dm_new_crtc_state->stream->abm_level = dm_new_crtc_state->abm_level;
@@ -9681,35 +10917,28 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
* Here we create an empty update on each plane.
* To fix this, DC should permit updating only stream properties.
*/
+ dummy_updates = kzalloc(sizeof(struct dc_surface_update) * MAX_SURFACES, GFP_KERNEL);
+ if (!dummy_updates) {
+ drm_err(adev_to_drm(adev), "Failed to allocate memory for dummy_updates.\n");
+ continue;
+ }
for (j = 0; j < status->plane_count; j++)
dummy_updates[j].surface = status->plane_states[0];
+ sort(dummy_updates, status->plane_count,
+ sizeof(*dummy_updates), dm_plane_layer_index_cmp, NULL);
mutex_lock(&dm->dc_lock);
- dc_commit_updates_for_stream(dm->dc,
- dummy_updates,
- status->plane_count,
- dm_new_crtc_state->stream,
- &stream_update,
- dc_state);
+ dc_exit_ips_for_hw_access(dm->dc);
+ dc_update_planes_and_stream(dm->dc,
+ dummy_updates,
+ status->plane_count,
+ dm_new_crtc_state->stream,
+ &stream_update);
mutex_unlock(&dm->dc_lock);
- }
+ kfree(dummy_updates);
- /* Count number of newly disabled CRTCs for dropping PM refs later. */
- for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state,
- new_crtc_state, i) {
- if (old_crtc_state->active && !new_crtc_state->active)
- crtc_disable_count++;
-
- dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
- dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
-
- /* For freesync config update on crtc state and params for irq */
- update_stream_irq_parameters(dm, dm_new_crtc_state);
-
- /* Handle vrr on->off / off->on transitions */
- amdgpu_dm_handle_vrr_transition(dm_old_crtc_state,
- dm_new_crtc_state);
+ drm_connector_update_privacy_screen(new_con_state);
}
/**
@@ -9721,52 +10950,68 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
#ifdef CONFIG_DEBUG_FS
- bool configure_crc = false;
enum amdgpu_dm_pipe_crc_source cur_crc_src;
-#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
- struct crc_rd_work *crc_rd_wrk = dm->crc_rd_wrk;
#endif
+ /* Count number of newly disabled CRTCs for dropping PM refs later. */
+ if (old_crtc_state->active && !new_crtc_state->active)
+ crtc_disable_count++;
+
+ dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+ dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
+
+ /* For freesync config update on crtc state and params for irq */
+ update_stream_irq_parameters(dm, dm_new_crtc_state);
+
+#ifdef CONFIG_DEBUG_FS
spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
cur_crc_src = acrtc->dm_irq_params.crc_src;
spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
#endif
- dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
if (new_crtc_state->active &&
(!old_crtc_state->active ||
drm_atomic_crtc_needs_modeset(new_crtc_state))) {
dc_stream_retain(dm_new_crtc_state->stream);
acrtc->dm_irq_params.stream = dm_new_crtc_state->stream;
- manage_dm_interrupts(adev, acrtc, true);
+ manage_dm_interrupts(adev, acrtc, dm_new_crtc_state);
+ }
+ /* Handle vrr on->off / off->on transitions */
+ amdgpu_dm_handle_vrr_transition(dm_old_crtc_state, dm_new_crtc_state);
#ifdef CONFIG_DEBUG_FS
+ if (new_crtc_state->active &&
+ (!old_crtc_state->active ||
+ drm_atomic_crtc_needs_modeset(new_crtc_state))) {
/**
* Frontend may have changed so reapply the CRC capture
* settings for the stream.
*/
- dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
-
if (amdgpu_dm_is_valid_crc_source(cur_crc_src)) {
- configure_crc = true;
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
if (amdgpu_dm_crc_window_is_activated(crtc)) {
+ uint8_t cnt;
+
spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
- acrtc->dm_irq_params.crc_window.update_win = true;
- acrtc->dm_irq_params.crc_window.skip_frame_cnt = 2;
- spin_lock_irq(&crc_rd_wrk->crc_rd_work_lock);
- crc_rd_wrk->crtc = crtc;
- spin_unlock_irq(&crc_rd_wrk->crc_rd_work_lock);
+ for (cnt = 0; cnt < MAX_CRC_WINDOW_NUM; cnt++) {
+ if (acrtc->dm_irq_params.window_param[cnt].enable) {
+ acrtc->dm_irq_params.window_param[cnt].update_win = true;
+
+ /**
+ * It takes 2 frames for HW to stably generate CRC when
+ * resuming from suspend, so we set skip_frame_cnt 2.
+ */
+ acrtc->dm_irq_params.window_param[cnt].skip_frame_cnt = 2;
+ }
+ }
spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
}
#endif
- }
-
- if (configure_crc)
if (amdgpu_dm_crtc_configure_crc_source(
crtc, dm_new_crtc_state, cur_crc_src))
- DRM_DEBUG_DRIVER("Failed to configure crc source");
-#endif
+ drm_dbg_atomic(dev, "Failed to configure crc source");
+ }
}
+#endif
}
for_each_new_crtc_in_state(state, crtc, new_crtc_state, j)
@@ -9778,22 +11023,44 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
if (dm_new_crtc_state->stream)
- amdgpu_dm_commit_planes(state, dc_state, dev,
- dm, crtc, wait_for_vblank);
+ amdgpu_dm_commit_planes(state, dev, dm, crtc, wait_for_vblank);
+ }
+
+ /* Enable writeback */
+ for_each_new_connector_in_state(state, connector, new_con_state, i) {
+ struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state);
+ struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc);
+
+ if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
+ if (!new_con_state->writeback_job)
+ continue;
+
+ new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base);
+
+ if (!new_crtc_state)
+ continue;
+
+ if (acrtc->wb_enabled)
+ continue;
+
+ dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+
+ dm_set_writeback(dm, dm_new_crtc_state, connector, new_con_state);
+ acrtc->wb_enabled = true;
}
/* Update audio instances for each connector. */
amdgpu_dm_commit_audio(dev, state);
-#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || \
- defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
/* restore the backlight level */
for (i = 0; i < dm->num_of_edps; i++) {
if (dm->backlight_dev[i] &&
- (amdgpu_dm_backlight_get_level(dm, i) != dm->brightness[i]))
+ (dm->actual_brightness[i] != dm->brightness[i]))
amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]);
}
-#endif
+
/*
* send vblank event on all events not handled in flip and
* mark consumed event for drm_atomic_helper_commit_hw_done
@@ -9816,10 +11083,17 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
drm_atomic_helper_cleanup_planes(dev, state);
- /* return the stolen vga memory back to VRAM */
- if (!adev->mman.keep_stolen_vga_memory)
- amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
- amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
+ /* Don't free the memory if we are hitting this as part of suspend.
+ * This way we don't free any memory during suspend; see
+ * amdgpu_bo_free_kernel(). The memory will be freed in the first
+ * non-suspend modeset or when the driver is torn down.
+ */
+ if (!adev->in_suspend) {
+ /* return the stolen vga memory back to VRAM */
+ if (!adev->mman.keep_stolen_vga_memory)
+ amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
+ }
/*
* Finally, drop a runtime PM reference for each newly disabled CRTC,
@@ -9830,11 +11104,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
pm_runtime_put_autosuspend(dev->dev);
pm_runtime_mark_last_busy(dev->dev);
- if (dc_state_temp)
- dc_release_state(dc_state_temp);
+ trace_amdgpu_dm_atomic_commit_tail_finish(state);
}
-
static int dm_force_atomic_commit(struct drm_connector *connector)
{
int ret = 0;
@@ -9858,16 +11130,20 @@ static int dm_force_atomic_commit(struct drm_connector *connector)
*/
conn_state = drm_atomic_get_connector_state(state, connector);
- ret = PTR_ERR_OR_ZERO(conn_state);
- if (ret)
+ /* Check for error in getting connector state */
+ if (IS_ERR(conn_state)) {
+ ret = PTR_ERR(conn_state);
goto out;
+ }
/* Attach crtc to drm_atomic_state*/
crtc_state = drm_atomic_get_crtc_state(state, &disconnected_acrtc->base);
- ret = PTR_ERR_OR_ZERO(crtc_state);
- if (ret)
+ /* Check for error in getting crtc state */
+ if (IS_ERR(crtc_state)) {
+ ret = PTR_ERR(crtc_state);
goto out;
+ }
/* force a restore */
crtc_state->mode_changed = true;
@@ -9875,9 +11151,11 @@ static int dm_force_atomic_commit(struct drm_connector *connector)
/* Attach plane to drm_atomic_state */
plane_state = drm_atomic_get_plane_state(state, plane);
- ret = PTR_ERR_OR_ZERO(plane_state);
- if (ret)
+ /* Check for error in getting plane state */
+ if (IS_ERR(plane_state)) {
+ ret = PTR_ERR(plane_state);
goto out;
+ }
/* Call commit internally with the state we just constructed */
ret = drm_atomic_commit(state);
@@ -9885,7 +11163,7 @@ static int dm_force_atomic_commit(struct drm_connector *connector)
out:
drm_atomic_state_put(state);
if (ret)
- DRM_ERROR("Restoring old state failed with %i\n", ret);
+ drm_err(ddev, "Restoring old state failed with %i\n", ret);
return ret;
}
@@ -9898,10 +11176,15 @@ out:
void dm_restore_drm_connector_state(struct drm_device *dev,
struct drm_connector *connector)
{
- struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+ struct amdgpu_dm_connector *aconnector;
struct amdgpu_crtc *disconnected_acrtc;
struct dm_crtc_state *acrtc_state;
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ return;
+
+ aconnector = to_amdgpu_dm_connector(connector);
+
if (!aconnector->dc_sink || !connector->state || !connector->encoder)
return;
@@ -9964,8 +11247,8 @@ static int do_aquire_global_lock(struct drm_device *dev,
&commit->flip_done, 10*HZ);
if (ret == 0)
- DRM_ERROR("[CRTC:%d:%s] hw_done or flip_done "
- "timed out\n", crtc->base.id, crtc->name);
+ drm_err(dev, "[CRTC:%d:%s] hw_done or flip_done timed out\n",
+ crtc->base.id, crtc->name);
drm_crtc_commit_put(commit);
}
@@ -9978,12 +11261,16 @@ static void get_freesync_config_for_crtc(
struct dm_connector_state *new_con_state)
{
struct mod_freesync_config config = {0};
- struct amdgpu_dm_connector *aconnector =
- to_amdgpu_dm_connector(new_con_state->base.connector);
+ struct amdgpu_dm_connector *aconnector;
struct drm_display_mode *mode = &new_crtc_state->base.mode;
int vrefresh = drm_mode_vrefresh(mode);
bool fs_vid_mode = false;
+ if (new_con_state->base.connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ return;
+
+ aconnector = to_amdgpu_dm_connector(new_con_state->base.connector);
+
new_crtc_state->vrr_supported = new_con_state->freesync_capable &&
vrefresh >= aconnector->min_vfreq &&
vrefresh <= aconnector->max_vfreq;
@@ -10006,6 +11293,8 @@ static void get_freesync_config_for_crtc(
} else {
config.state = VRR_STATE_INACTIVE;
}
+ } else {
+ config.state = VRR_STATE_UNSUPPORTED;
}
out:
new_crtc_state->freesync_config = config;
@@ -10024,34 +11313,35 @@ static bool
is_timing_unchanged_for_freesync(struct drm_crtc_state *old_crtc_state,
struct drm_crtc_state *new_crtc_state)
{
- struct drm_display_mode old_mode, new_mode;
+ const struct drm_display_mode *old_mode, *new_mode;
if (!old_crtc_state || !new_crtc_state)
return false;
- old_mode = old_crtc_state->mode;
- new_mode = new_crtc_state->mode;
-
- if (old_mode.clock == new_mode.clock &&
- old_mode.hdisplay == new_mode.hdisplay &&
- old_mode.vdisplay == new_mode.vdisplay &&
- old_mode.htotal == new_mode.htotal &&
- old_mode.vtotal != new_mode.vtotal &&
- old_mode.hsync_start == new_mode.hsync_start &&
- old_mode.vsync_start != new_mode.vsync_start &&
- old_mode.hsync_end == new_mode.hsync_end &&
- old_mode.vsync_end != new_mode.vsync_end &&
- old_mode.hskew == new_mode.hskew &&
- old_mode.vscan == new_mode.vscan &&
- (old_mode.vsync_end - old_mode.vsync_start) ==
- (new_mode.vsync_end - new_mode.vsync_start))
+ old_mode = &old_crtc_state->mode;
+ new_mode = &new_crtc_state->mode;
+
+ if (old_mode->clock == new_mode->clock &&
+ old_mode->hdisplay == new_mode->hdisplay &&
+ old_mode->vdisplay == new_mode->vdisplay &&
+ old_mode->htotal == new_mode->htotal &&
+ old_mode->vtotal != new_mode->vtotal &&
+ old_mode->hsync_start == new_mode->hsync_start &&
+ old_mode->vsync_start != new_mode->vsync_start &&
+ old_mode->hsync_end == new_mode->hsync_end &&
+ old_mode->vsync_end != new_mode->vsync_end &&
+ old_mode->hskew == new_mode->hskew &&
+ old_mode->vscan == new_mode->vscan &&
+ (old_mode->vsync_end - old_mode->vsync_start) ==
+ (new_mode->vsync_end - new_mode->vsync_start))
return true;
return false;
}
-static void set_freesync_fixed_config(struct dm_crtc_state *dm_new_crtc_state) {
- uint64_t num, den, res;
+static void set_freesync_fixed_config(struct dm_crtc_state *dm_new_crtc_state)
+{
+ u64 num, den, res;
struct drm_crtc_state *new_crtc_state = &dm_new_crtc_state->base;
dm_new_crtc_state->freesync_config.state = VRR_STATE_ACTIVE_FIXED;
@@ -10065,16 +11355,17 @@ static void set_freesync_fixed_config(struct dm_crtc_state *dm_new_crtc_state) {
}
static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
- struct drm_atomic_state *state,
- struct drm_crtc *crtc,
- struct drm_crtc_state *old_crtc_state,
- struct drm_crtc_state *new_crtc_state,
- bool enable,
- bool *lock_and_validation_needed)
+ struct drm_atomic_state *state,
+ struct drm_crtc *crtc,
+ struct drm_crtc_state *old_crtc_state,
+ struct drm_crtc_state *new_crtc_state,
+ bool enable,
+ bool *lock_and_validation_needed)
{
struct dm_atomic_state *dm_state = NULL;
struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state;
struct dc_stream_state *new_stream;
+ struct amdgpu_device *adev = dm->adev;
int ret = 0;
/*
@@ -10082,6 +11373,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
* update changed items
*/
struct amdgpu_crtc *acrtc = NULL;
+ struct drm_connector *connector = NULL;
struct amdgpu_dm_connector *aconnector = NULL;
struct drm_connector_state *drm_new_conn_state = NULL, *drm_old_conn_state = NULL;
struct dm_connector_state *dm_new_conn_state = NULL, *dm_old_conn_state = NULL;
@@ -10091,18 +11383,20 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
acrtc = to_amdgpu_crtc(crtc);
- aconnector = amdgpu_dm_find_first_crtc_matching_connector(state, crtc);
+ connector = amdgpu_dm_find_first_crtc_matching_connector(state, crtc);
+ if (connector)
+ aconnector = to_amdgpu_dm_connector(connector);
/* TODO This hack should go away */
- if (aconnector && enable) {
+ if (connector && enable) {
/* Make sure fake sink is created in plug-in scenario */
drm_new_conn_state = drm_atomic_get_new_connector_state(state,
- &aconnector->base);
+ connector);
drm_old_conn_state = drm_atomic_get_old_connector_state(state,
- &aconnector->base);
+ connector);
- if (IS_ERR(drm_new_conn_state)) {
- ret = PTR_ERR_OR_ZERO(drm_new_conn_state);
+ if (WARN_ON(!drm_new_conn_state)) {
+ ret = -EINVAL;
goto fail;
}
@@ -10112,7 +11406,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
if (!drm_atomic_crtc_needs_modeset(new_crtc_state))
goto skip_modeset;
- new_stream = create_validate_stream_for_sink(aconnector,
+ new_stream = create_validate_stream_for_sink(connector,
&new_crtc_state->mode,
dm_new_conn_state,
dm_old_crtc_state->stream);
@@ -10125,7 +11419,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
*/
if (!new_stream) {
- DRM_DEBUG_DRIVER("%s: Failed to create new stream for crtc %d\n",
+ drm_dbg_driver(adev_to_drm(adev), "%s: Failed to create new stream for crtc %d\n",
__func__, acrtc->base.base.id);
ret = -ENOMEM;
goto fail;
@@ -10163,7 +11457,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
dc_is_stream_unchanged(new_stream, dm_old_crtc_state->stream) &&
dc_is_stream_scaling_unchanged(new_stream, dm_old_crtc_state->stream)) {
new_crtc_state->mode_changed = false;
- DRM_DEBUG_DRIVER("Mode change not required, setting mode_changed to %d",
+ drm_dbg_driver(adev_to_drm(adev), "Mode change not required, setting mode_changed to %d",
new_crtc_state->mode_changed);
}
}
@@ -10172,10 +11466,8 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
if (!drm_atomic_crtc_needs_modeset(new_crtc_state))
goto skip_modeset;
- DRM_DEBUG_ATOMIC(
- "amdgpu_crtc id:%d crtc_state_flags: enable:%d, active:%d, "
- "planes_changed:%d, mode_changed:%d,active_changed:%d,"
- "connectors_changed:%d\n",
+ drm_dbg_state(state->dev,
+ "amdgpu_crtc id:%d crtc_state_flags: enable:%d, active:%d, planes_changed:%d, mode_changed:%d,active_changed:%d,connectors_changed:%d\n",
acrtc->crtc_id,
new_crtc_state->enable,
new_crtc_state->active,
@@ -10190,13 +11482,21 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
if (!dm_old_crtc_state->stream)
goto skip_modeset;
+ /* Unset freesync video if it was active before */
+ if (dm_old_crtc_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED) {
+ dm_new_crtc_state->freesync_config.state = VRR_STATE_INACTIVE;
+ dm_new_crtc_state->freesync_config.fixed_refresh_in_uhz = 0;
+ }
+
+ /* Now check if we should set freesync video mode */
if (amdgpu_freesync_vid_mode && dm_new_crtc_state->stream &&
+ dc_is_stream_unchanged(new_stream, dm_old_crtc_state->stream) &&
+ dc_is_stream_scaling_unchanged(new_stream, dm_old_crtc_state->stream) &&
is_timing_unchanged_for_freesync(new_crtc_state,
old_crtc_state)) {
new_crtc_state->mode_changed = false;
- DRM_DEBUG_DRIVER(
- "Mode change not required for front porch change, "
- "setting mode_changed to %d",
+ drm_dbg_driver(adev_to_drm(adev),
+ "Mode change not required for front porch change, setting mode_changed to %d",
new_crtc_state->mode_changed);
set_freesync_fixed_config(dm_new_crtc_state);
@@ -10208,20 +11508,19 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
struct drm_display_mode *high_mode;
high_mode = get_highest_refresh_rate_mode(aconnector, false);
- if (!drm_mode_equal(&new_crtc_state->mode, high_mode)) {
+ if (!drm_mode_equal(&new_crtc_state->mode, high_mode))
set_freesync_fixed_config(dm_new_crtc_state);
- }
}
ret = dm_atomic_get_state(state, &dm_state);
if (ret)
goto fail;
- DRM_DEBUG_DRIVER("Disabling DRM crtc: %d\n",
+ drm_dbg_driver(adev_to_drm(adev), "Disabling DRM crtc: %d\n",
crtc->base.id);
/* i.e. reset mode */
- if (dc_remove_stream_from_ctx(
+ if (dc_state_remove_stream(
dm->dc,
dm_state->context,
dm_old_crtc_state->stream) != DC_OK) {
@@ -10242,13 +11541,13 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
* added MST connectors not found in existing crtc_state in the chained mode
* TODO: need to dig out the root cause of that
*/
- if (!aconnector || (!aconnector->dc_sink && aconnector->mst_port))
+ if (!connector)
goto skip_modeset;
if (modereset_required(new_crtc_state))
goto skip_modeset;
- if (modeset_required(new_crtc_state, new_stream,
+ if (amdgpu_dm_crtc_modeset_required(new_crtc_state, new_stream,
dm_old_crtc_state->stream)) {
WARN_ON(dm_new_crtc_state->stream);
@@ -10264,7 +11563,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
DRM_DEBUG_ATOMIC("Enabling DRM crtc: %d\n",
crtc->base.id);
- if (dc_add_stream_to_ctx(
+ if (dc_state_add_stream(
dm->dc,
dm_state->context,
dm_new_crtc_state->stream) != DC_OK) {
@@ -10279,13 +11578,13 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
skip_modeset:
/* Release extra reference */
if (new_stream)
- dc_stream_release(new_stream);
+ dc_stream_release(new_stream);
/*
* We want to do dc stream updates that do not require a
* full modeset below.
*/
- if (!(enable && aconnector && new_crtc_state->active))
+ if (!(enable && connector && new_crtc_state->active))
return 0;
/*
* Given above conditions, the dc state cannot be NULL because:
@@ -10311,8 +11610,9 @@ skip_modeset:
* when a modeset is needed, to ensure it gets reprogrammed.
*/
if (dm_new_crtc_state->base.color_mgmt_changed ||
+ dm_old_crtc_state->regamma_tf != dm_new_crtc_state->regamma_tf ||
drm_atomic_crtc_needs_modeset(new_crtc_state)) {
- ret = amdgpu_dm_update_crtc_color_mgmt(dm_new_crtc_state);
+ ret = amdgpu_dm_check_crtc_color_mgmt(dm_new_crtc_state, true);
if (ret)
goto fail;
}
@@ -10336,15 +11636,20 @@ static bool should_reset_plane(struct drm_atomic_state *state,
{
struct drm_plane *other;
struct drm_plane_state *old_other_state, *new_other_state;
- struct drm_crtc_state *new_crtc_state;
+ struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+ struct dm_crtc_state *old_dm_crtc_state, *new_dm_crtc_state;
+ struct amdgpu_device *adev = drm_to_adev(plane->dev);
int i;
/*
- * TODO: Remove this hack once the checks below are sufficient
- * enough to determine when we need to reset all the planes on
- * the stream.
+ * TODO: Remove this hack for all asics once it proves that the
+ * fast updates works fine on DCN3.2+.
*/
- if (state->allow_modeset)
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0) < IP_VERSION(3, 2, 0) &&
+ state->allow_modeset)
+ return true;
+
+ if (amdgpu_in_reset(adev) && state->allow_modeset)
return true;
/* Exit early if we know that we're adding or removing the plane. */
@@ -10357,14 +11662,38 @@ static bool should_reset_plane(struct drm_atomic_state *state,
new_crtc_state =
drm_atomic_get_new_crtc_state(state, new_plane_state->crtc);
+ old_crtc_state =
+ drm_atomic_get_old_crtc_state(state, old_plane_state->crtc);
if (!new_crtc_state)
return true;
+ /*
+ * A change in cursor mode means a new dc pipe needs to be acquired or
+ * released from the state
+ */
+ old_dm_crtc_state = to_dm_crtc_state(old_crtc_state);
+ new_dm_crtc_state = to_dm_crtc_state(new_crtc_state);
+ if (plane->type == DRM_PLANE_TYPE_CURSOR &&
+ old_dm_crtc_state != NULL &&
+ old_dm_crtc_state->cursor_mode != new_dm_crtc_state->cursor_mode) {
+ return true;
+ }
+
/* CRTC Degamma changes currently require us to recreate planes. */
if (new_crtc_state->color_mgmt_changed)
return true;
+ /*
+ * On zpos change, planes need to be reordered by removing and re-adding
+ * them one by one to the dc state, in order of descending zpos.
+ *
+ * TODO: We can likely skip bandwidth validation if the only thing that
+ * changed about the plane was it'z z-ordering.
+ */
+ if (old_plane_state->normalized_zpos != new_plane_state->normalized_zpos)
+ return true;
+
if (drm_atomic_crtc_needs_modeset(new_crtc_state))
return true;
@@ -10378,6 +11707,11 @@ static bool should_reset_plane(struct drm_atomic_state *state,
*/
for_each_oldnew_plane_in_state(state, other, old_other_state, new_other_state, i) {
struct amdgpu_framebuffer *old_afb, *new_afb;
+ struct dm_plane_state *dm_new_other_state, *dm_old_other_state;
+
+ dm_new_other_state = to_dm_plane_state(new_other_state);
+ dm_old_other_state = to_dm_plane_state(old_other_state);
+
if (other->type == DRM_PLANE_TYPE_CURSOR)
continue;
@@ -10413,6 +11747,18 @@ static bool should_reset_plane(struct drm_atomic_state *state,
old_other_state->color_encoding != new_other_state->color_encoding)
return true;
+ /* HDR/Transfer Function changes. */
+ if (dm_old_other_state->degamma_tf != dm_new_other_state->degamma_tf ||
+ dm_old_other_state->degamma_lut != dm_new_other_state->degamma_lut ||
+ dm_old_other_state->hdr_mult != dm_new_other_state->hdr_mult ||
+ dm_old_other_state->ctm != dm_new_other_state->ctm ||
+ dm_old_other_state->shaper_lut != dm_new_other_state->shaper_lut ||
+ dm_old_other_state->shaper_tf != dm_new_other_state->shaper_tf ||
+ dm_old_other_state->lut3d != dm_new_other_state->lut3d ||
+ dm_old_other_state->blend_lut != dm_new_other_state->blend_lut ||
+ dm_old_other_state->blend_tf != dm_new_other_state->blend_tf)
+ return true;
+
/* Framebuffer checks fall at the end. */
if (!old_other_state->fb || !new_other_state->fb)
continue;
@@ -10476,14 +11822,17 @@ static int dm_check_cursor_fb(struct amdgpu_crtc *new_acrtc,
}
/* Core DRM takes care of checking FB modifiers, so we only need to
- * check tiling flags when the FB doesn't have a modifier. */
+ * check tiling flags when the FB doesn't have a modifier.
+ */
if (!(fb->flags & DRM_MODE_FB_MODIFIERS)) {
- if (adev->family < AMDGPU_FAMILY_AI) {
+ if (adev->family >= AMDGPU_FAMILY_GC_12_0_0) {
+ linear = AMDGPU_TILING_GET(afb->tiling_flags, GFX12_SWIZZLE_MODE) == 0;
+ } else if (adev->family >= AMDGPU_FAMILY_AI) {
+ linear = AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE) == 0;
+ } else {
linear = AMDGPU_TILING_GET(afb->tiling_flags, ARRAY_MODE) != DC_ARRAY_2D_TILED_THIN1 &&
- AMDGPU_TILING_GET(afb->tiling_flags, ARRAY_MODE) != DC_ARRAY_1D_TILED_THIN1 &&
+ AMDGPU_TILING_GET(afb->tiling_flags, ARRAY_MODE) != DC_ARRAY_1D_TILED_THIN1 &&
AMDGPU_TILING_GET(afb->tiling_flags, MICRO_TILE_MODE) == 0;
- } else {
- linear = AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE) == 0;
}
if (!linear) {
DRM_DEBUG_ATOMIC("Cursor FB not linear");
@@ -10494,13 +11843,76 @@ static int dm_check_cursor_fb(struct amdgpu_crtc *new_acrtc,
return 0;
}
+/*
+ * Helper function for checking the cursor in native mode
+ */
+static int dm_check_native_cursor_state(struct drm_crtc *new_plane_crtc,
+ struct drm_plane *plane,
+ struct drm_plane_state *new_plane_state,
+ bool enable)
+{
+
+ struct amdgpu_crtc *new_acrtc;
+ int ret;
+
+ if (!enable || !new_plane_crtc ||
+ drm_atomic_plane_disabling(plane->state, new_plane_state))
+ return 0;
+
+ new_acrtc = to_amdgpu_crtc(new_plane_crtc);
+
+ if (new_plane_state->src_x != 0 || new_plane_state->src_y != 0) {
+ DRM_DEBUG_ATOMIC("Cropping not supported for cursor plane\n");
+ return -EINVAL;
+ }
+
+ if (new_plane_state->fb) {
+ ret = dm_check_cursor_fb(new_acrtc, new_plane_state,
+ new_plane_state->fb);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static bool dm_should_update_native_cursor(struct drm_atomic_state *state,
+ struct drm_crtc *old_plane_crtc,
+ struct drm_crtc *new_plane_crtc,
+ bool enable)
+{
+ struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+ struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state;
+
+ if (!enable) {
+ if (old_plane_crtc == NULL)
+ return true;
+
+ old_crtc_state = drm_atomic_get_old_crtc_state(
+ state, old_plane_crtc);
+ dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
+
+ return dm_old_crtc_state->cursor_mode == DM_CURSOR_NATIVE_MODE;
+ } else {
+ if (new_plane_crtc == NULL)
+ return true;
+
+ new_crtc_state = drm_atomic_get_new_crtc_state(
+ state, new_plane_crtc);
+ dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+
+ return dm_new_crtc_state->cursor_mode == DM_CURSOR_NATIVE_MODE;
+ }
+}
+
static int dm_update_plane_state(struct dc *dc,
struct drm_atomic_state *state,
struct drm_plane *plane,
struct drm_plane_state *old_plane_state,
struct drm_plane_state *new_plane_state,
bool enable,
- bool *lock_and_validation_needed)
+ bool *lock_and_validation_needed,
+ bool *is_top_most_overlay)
{
struct dm_atomic_state *dm_state = NULL;
@@ -10508,8 +11920,7 @@ static int dm_update_plane_state(struct dc *dc,
struct drm_crtc_state *old_crtc_state, *new_crtc_state;
struct dm_crtc_state *dm_new_crtc_state, *dm_old_crtc_state;
struct dm_plane_state *dm_new_plane_state, *dm_old_plane_state;
- struct amdgpu_crtc *new_acrtc;
- bool needs_reset;
+ bool needs_reset, update_native_cursor;
int ret = 0;
@@ -10518,24 +11929,16 @@ static int dm_update_plane_state(struct dc *dc,
dm_new_plane_state = to_dm_plane_state(new_plane_state);
dm_old_plane_state = to_dm_plane_state(old_plane_state);
- if (plane->type == DRM_PLANE_TYPE_CURSOR) {
- if (!enable || !new_plane_crtc ||
- drm_atomic_plane_disabling(plane->state, new_plane_state))
- return 0;
-
- new_acrtc = to_amdgpu_crtc(new_plane_crtc);
+ update_native_cursor = dm_should_update_native_cursor(state,
+ old_plane_crtc,
+ new_plane_crtc,
+ enable);
- if (new_plane_state->src_x != 0 || new_plane_state->src_y != 0) {
- DRM_DEBUG_ATOMIC("Cropping not supported for cursor plane\n");
- return -EINVAL;
- }
-
- if (new_plane_state->fb) {
- ret = dm_check_cursor_fb(new_acrtc, new_plane_state,
- new_plane_state->fb);
- if (ret)
- return ret;
- }
+ if (plane->type == DRM_PLANE_TYPE_CURSOR && update_native_cursor) {
+ ret = dm_check_native_cursor_state(new_plane_crtc, plane,
+ new_plane_state, enable);
+ if (ret)
+ return ret;
return 0;
}
@@ -10565,7 +11968,7 @@ static int dm_update_plane_state(struct dc *dc,
if (ret)
return ret;
- if (!dc_remove_plane_from_context(
+ if (!dc_state_remove_plane(
dc,
dm_old_crtc_state->stream,
dm_old_plane_state->dc_state,
@@ -10574,8 +11977,9 @@ static int dm_update_plane_state(struct dc *dc,
return -EINVAL;
}
+ if (dm_old_plane_state->dc_state)
+ dc_plane_state_release(dm_old_plane_state->dc_state);
- dc_plane_state_release(dm_old_plane_state->dc_state);
dm_new_plane_state->dc_state = NULL;
*lock_and_validation_needed = true;
@@ -10598,15 +12002,17 @@ static int dm_update_plane_state(struct dc *dc,
if (!needs_reset)
return 0;
- ret = dm_plane_helper_check_state(new_plane_state, new_crtc_state);
+ ret = amdgpu_dm_plane_helper_check_state(new_plane_state, new_crtc_state);
if (ret)
- return ret;
+ goto out;
WARN_ON(dm_new_plane_state->dc_state);
dc_new_plane_state = dc_create_plane_state(dc);
- if (!dc_new_plane_state)
- return -ENOMEM;
+ if (!dc_new_plane_state) {
+ ret = -ENOMEM;
+ goto out;
+ }
DRM_DEBUG_ATOMIC("Enabling DRM plane: %d on DRM crtc %d\n",
plane->base.id, new_plane_crtc->base.id);
@@ -10618,13 +12024,13 @@ static int dm_update_plane_state(struct dc *dc,
new_crtc_state);
if (ret) {
dc_plane_state_release(dc_new_plane_state);
- return ret;
+ goto out;
}
ret = dm_atomic_get_state(state, &dm_state);
if (ret) {
dc_plane_state_release(dc_new_plane_state);
- return ret;
+ goto out;
}
/*
@@ -10634,18 +12040,21 @@ static int dm_update_plane_state(struct dc *dc,
* state. It'll be released when the atomic state is
* cleaned.
*/
- if (!dc_add_plane_to_context(
+ if (!dc_state_add_plane(
dc,
dm_new_crtc_state->stream,
dc_new_plane_state,
dm_state->context)) {
dc_plane_state_release(dc_new_plane_state);
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
dm_new_plane_state->dc_state = dc_new_plane_state;
+ dm_new_crtc_state->mpo_requested |= (plane->type == DRM_PLANE_TYPE_OVERLAY);
+
/* Tell DC to do a full surface update every time there
* is a plane change. Inefficient, but works for now.
*/
@@ -10654,80 +12063,126 @@ static int dm_update_plane_state(struct dc *dc,
*lock_and_validation_needed = true;
}
+out:
+ /* If enabling cursor overlay failed, attempt fallback to native mode */
+ if (enable && ret == -EINVAL && plane->type == DRM_PLANE_TYPE_CURSOR) {
+ ret = dm_check_native_cursor_state(new_plane_crtc, plane,
+ new_plane_state, enable);
+ if (ret)
+ return ret;
+
+ dm_new_crtc_state->cursor_mode = DM_CURSOR_NATIVE_MODE;
+ }
return ret;
}
-static int dm_check_crtc_cursor(struct drm_atomic_state *state,
- struct drm_crtc *crtc,
- struct drm_crtc_state *new_crtc_state)
+static void dm_get_oriented_plane_size(struct drm_plane_state *plane_state,
+ int *src_w, int *src_h)
{
- struct drm_plane *cursor = crtc->cursor, *underlying;
- struct drm_plane_state *new_cursor_state, *new_underlying_state;
- int i;
- int cursor_scale_w, cursor_scale_h, underlying_scale_w, underlying_scale_h;
+ switch (plane_state->rotation & DRM_MODE_ROTATE_MASK) {
+ case DRM_MODE_ROTATE_90:
+ case DRM_MODE_ROTATE_270:
+ *src_w = plane_state->src_h >> 16;
+ *src_h = plane_state->src_w >> 16;
+ break;
+ case DRM_MODE_ROTATE_0:
+ case DRM_MODE_ROTATE_180:
+ default:
+ *src_w = plane_state->src_w >> 16;
+ *src_h = plane_state->src_h >> 16;
+ break;
+ }
+}
- /* On DCE and DCN there is no dedicated hardware cursor plane. We get a
- * cursor per pipe but it's going to inherit the scaling and
- * positioning from the underlying pipe. Check the cursor plane's
- * blending properties match the underlying planes'. */
+static void
+dm_get_plane_scale(struct drm_plane_state *plane_state,
+ int *out_plane_scale_w, int *out_plane_scale_h)
+{
+ int plane_src_w, plane_src_h;
- new_cursor_state = drm_atomic_get_new_plane_state(state, cursor);
- if (!new_cursor_state || !new_cursor_state->fb) {
- return 0;
- }
+ dm_get_oriented_plane_size(plane_state, &plane_src_w, &plane_src_h);
+ *out_plane_scale_w = plane_src_w ? plane_state->crtc_w * 1000 / plane_src_w : 0;
+ *out_plane_scale_h = plane_src_h ? plane_state->crtc_h * 1000 / plane_src_h : 0;
+}
- cursor_scale_w = new_cursor_state->crtc_w * 1000 /
- (new_cursor_state->src_w >> 16);
- cursor_scale_h = new_cursor_state->crtc_h * 1000 /
- (new_cursor_state->src_h >> 16);
+/*
+ * The normalized_zpos value cannot be used by this iterator directly. It's only
+ * calculated for enabled planes, potentially causing normalized_zpos collisions
+ * between enabled/disabled planes in the atomic state. We need a unique value
+ * so that the iterator will not generate the same object twice, or loop
+ * indefinitely.
+ */
+static inline struct __drm_planes_state *__get_next_zpos(
+ struct drm_atomic_state *state,
+ struct __drm_planes_state *prev)
+{
+ unsigned int highest_zpos = 0, prev_zpos = 256;
+ uint32_t highest_id = 0, prev_id = UINT_MAX;
+ struct drm_plane_state *new_plane_state;
+ struct drm_plane *plane;
+ int i, highest_i = -1;
- for_each_new_plane_in_state_reverse(state, underlying, new_underlying_state, i) {
- /* Narrow down to non-cursor planes on the same CRTC as the cursor */
- if (new_underlying_state->crtc != crtc || underlying == crtc->cursor)
- continue;
+ if (prev != NULL) {
+ prev_zpos = prev->new_state->zpos;
+ prev_id = prev->ptr->base.id;
+ }
- /* Ignore disabled planes */
- if (!new_underlying_state->fb)
+ for_each_new_plane_in_state(state, plane, new_plane_state, i) {
+ /* Skip planes with higher zpos than the previously returned */
+ if (new_plane_state->zpos > prev_zpos ||
+ (new_plane_state->zpos == prev_zpos &&
+ plane->base.id >= prev_id))
continue;
- underlying_scale_w = new_underlying_state->crtc_w * 1000 /
- (new_underlying_state->src_w >> 16);
- underlying_scale_h = new_underlying_state->crtc_h * 1000 /
- (new_underlying_state->src_h >> 16);
-
- if (cursor_scale_w != underlying_scale_w ||
- cursor_scale_h != underlying_scale_h) {
- drm_dbg_atomic(crtc->dev,
- "Cursor [PLANE:%d:%s] scaling doesn't match underlying [PLANE:%d:%s]\n",
- cursor->base.id, cursor->name, underlying->base.id, underlying->name);
- return -EINVAL;
+ /* Save the index of the plane with highest zpos */
+ if (new_plane_state->zpos > highest_zpos ||
+ (new_plane_state->zpos == highest_zpos &&
+ plane->base.id > highest_id)) {
+ highest_zpos = new_plane_state->zpos;
+ highest_id = plane->base.id;
+ highest_i = i;
}
-
- /* If this plane covers the whole CRTC, no need to check planes underneath */
- if (new_underlying_state->crtc_x <= 0 &&
- new_underlying_state->crtc_y <= 0 &&
- new_underlying_state->crtc_x + new_underlying_state->crtc_w >= new_crtc_state->mode.hdisplay &&
- new_underlying_state->crtc_y + new_underlying_state->crtc_h >= new_crtc_state->mode.vdisplay)
- break;
}
- return 0;
+ if (highest_i < 0)
+ return NULL;
+
+ return &state->planes[highest_i];
}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
+/*
+ * Use the uniqueness of the plane's (zpos, drm obj ID) combination to iterate
+ * by descending zpos, as read from the new plane state. This is the same
+ * ordering as defined by drm_atomic_normalize_zpos().
+ */
+#define for_each_oldnew_plane_in_descending_zpos(__state, plane, old_plane_state, new_plane_state) \
+ for (struct __drm_planes_state *__i = __get_next_zpos((__state), NULL); \
+ __i != NULL; __i = __get_next_zpos((__state), __i)) \
+ for_each_if(((plane) = __i->ptr, \
+ (void)(plane) /* Only to avoid unused-but-set-variable warning */, \
+ (old_plane_state) = __i->old_state, \
+ (new_plane_state) = __i->new_state, 1))
+
static int add_affected_mst_dsc_crtcs(struct drm_atomic_state *state, struct drm_crtc *crtc)
{
struct drm_connector *connector;
- struct drm_connector_state *conn_state;
+ struct drm_connector_state *conn_state, *old_conn_state;
struct amdgpu_dm_connector *aconnector = NULL;
int i;
- for_each_new_connector_in_state(state, connector, conn_state, i) {
+
+ for_each_oldnew_connector_in_state(state, connector, old_conn_state, conn_state, i) {
+ if (!conn_state->crtc)
+ conn_state = old_conn_state;
+
if (conn_state->crtc != crtc)
continue;
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
aconnector = to_amdgpu_dm_connector(connector);
- if (!aconnector->port || !aconnector->mst_port)
+ if (!aconnector->mst_output_port || !aconnector->mst_root)
aconnector = NULL;
else
break;
@@ -10736,12 +12191,199 @@ static int add_affected_mst_dsc_crtcs(struct drm_atomic_state *state, struct drm
if (!aconnector)
return 0;
- return drm_dp_mst_add_affected_dsc_crtcs(state, &aconnector->mst_port->mst_mgr);
+ return drm_dp_mst_add_affected_dsc_crtcs(state, &aconnector->mst_root->mst_mgr);
+}
+
+/**
+ * DOC: Cursor Modes - Native vs Overlay
+ *
+ * In native mode, the cursor uses a integrated cursor pipe within each DCN hw
+ * plane. It does not require a dedicated hw plane to enable, but it is
+ * subjected to the same z-order and scaling as the hw plane. It also has format
+ * restrictions, a RGB cursor in native mode cannot be enabled within a non-RGB
+ * hw plane.
+ *
+ * In overlay mode, the cursor uses a separate DCN hw plane, and thus has its
+ * own scaling and z-pos. It also has no blending restrictions. It lends to a
+ * cursor behavior more akin to a DRM client's expectations. However, it does
+ * occupy an extra DCN plane, and therefore will only be used if a DCN plane is
+ * available.
+ */
+
+/**
+ * dm_crtc_get_cursor_mode() - Determine the required cursor mode on crtc
+ * @adev: amdgpu device
+ * @state: DRM atomic state
+ * @dm_crtc_state: amdgpu state for the CRTC containing the cursor
+ * @cursor_mode: Returns the required cursor mode on dm_crtc_state
+ *
+ * Get whether the cursor should be enabled in native mode, or overlay mode, on
+ * the dm_crtc_state.
+ *
+ * The cursor should be enabled in overlay mode if there exists an underlying
+ * plane - on which the cursor may be blended - that is either YUV formatted, or
+ * scaled differently from the cursor.
+ *
+ * Since zpos info is required, drm_atomic_normalize_zpos must be called before
+ * calling this function.
+ *
+ * Return: 0 on success, or an error code if getting the cursor plane state
+ * failed.
+ */
+static int dm_crtc_get_cursor_mode(struct amdgpu_device *adev,
+ struct drm_atomic_state *state,
+ struct dm_crtc_state *dm_crtc_state,
+ enum amdgpu_dm_cursor_mode *cursor_mode)
+{
+ struct drm_plane_state *old_plane_state, *plane_state, *cursor_state;
+ struct drm_crtc_state *crtc_state = &dm_crtc_state->base;
+ struct drm_plane *plane;
+ bool consider_mode_change = false;
+ bool entire_crtc_covered = false;
+ bool cursor_changed = false;
+ int underlying_scale_w, underlying_scale_h;
+ int cursor_scale_w, cursor_scale_h;
+ int i;
+
+ /* Overlay cursor not supported on HW before DCN
+ * DCN401 does not have the cursor-on-scaled-plane or cursor-on-yuv-plane restrictions
+ * as previous DCN generations, so enable native mode on DCN401 in addition to DCE
+ */
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0) == 0 ||
+ amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(4, 0, 1)) {
+ *cursor_mode = DM_CURSOR_NATIVE_MODE;
+ return 0;
+ }
+
+ /* Init cursor_mode to be the same as current */
+ *cursor_mode = dm_crtc_state->cursor_mode;
+
+ /*
+ * Cursor mode can change if a plane's format changes, scale changes, is
+ * enabled/disabled, or z-order changes.
+ */
+ for_each_oldnew_plane_in_state(state, plane, old_plane_state, plane_state, i) {
+ int new_scale_w, new_scale_h, old_scale_w, old_scale_h;
+
+ /* Only care about planes on this CRTC */
+ if ((drm_plane_mask(plane) & crtc_state->plane_mask) == 0)
+ continue;
+
+ if (plane->type == DRM_PLANE_TYPE_CURSOR)
+ cursor_changed = true;
+
+ if (drm_atomic_plane_enabling(old_plane_state, plane_state) ||
+ drm_atomic_plane_disabling(old_plane_state, plane_state) ||
+ old_plane_state->fb->format != plane_state->fb->format) {
+ consider_mode_change = true;
+ break;
+ }
+
+ dm_get_plane_scale(plane_state, &new_scale_w, &new_scale_h);
+ dm_get_plane_scale(old_plane_state, &old_scale_w, &old_scale_h);
+ if (new_scale_w != old_scale_w || new_scale_h != old_scale_h) {
+ consider_mode_change = true;
+ break;
+ }
+ }
+
+ if (!consider_mode_change && !crtc_state->zpos_changed)
+ return 0;
+
+ /*
+ * If no cursor change on this CRTC, and not enabled on this CRTC, then
+ * no need to set cursor mode. This avoids needlessly locking the cursor
+ * state.
+ */
+ if (!cursor_changed &&
+ !(drm_plane_mask(crtc_state->crtc->cursor) & crtc_state->plane_mask)) {
+ return 0;
+ }
+
+ cursor_state = drm_atomic_get_plane_state(state,
+ crtc_state->crtc->cursor);
+ if (IS_ERR(cursor_state))
+ return PTR_ERR(cursor_state);
+
+ /* Cursor is disabled */
+ if (!cursor_state->fb)
+ return 0;
+
+ /* For all planes in descending z-order (all of which are below cursor
+ * as per zpos definitions), check their scaling and format
+ */
+ for_each_oldnew_plane_in_descending_zpos(state, plane, old_plane_state, plane_state) {
+
+ /* Only care about non-cursor planes on this CRTC */
+ if ((drm_plane_mask(plane) & crtc_state->plane_mask) == 0 ||
+ plane->type == DRM_PLANE_TYPE_CURSOR)
+ continue;
+
+ /* Underlying plane is YUV format - use overlay cursor */
+ if (amdgpu_dm_plane_is_video_format(plane_state->fb->format->format)) {
+ *cursor_mode = DM_CURSOR_OVERLAY_MODE;
+ return 0;
+ }
+
+ dm_get_plane_scale(plane_state,
+ &underlying_scale_w, &underlying_scale_h);
+ dm_get_plane_scale(cursor_state,
+ &cursor_scale_w, &cursor_scale_h);
+
+ /* Underlying plane has different scale - use overlay cursor */
+ if (cursor_scale_w != underlying_scale_w &&
+ cursor_scale_h != underlying_scale_h) {
+ *cursor_mode = DM_CURSOR_OVERLAY_MODE;
+ return 0;
+ }
+
+ /* If this plane covers the whole CRTC, no need to check planes underneath */
+ if (plane_state->crtc_x <= 0 && plane_state->crtc_y <= 0 &&
+ plane_state->crtc_x + plane_state->crtc_w >= crtc_state->mode.hdisplay &&
+ plane_state->crtc_y + plane_state->crtc_h >= crtc_state->mode.vdisplay) {
+ entire_crtc_covered = true;
+ break;
+ }
+ }
+
+ /* If planes do not cover the entire CRTC, use overlay mode to enable
+ * cursor over holes
+ */
+ if (entire_crtc_covered)
+ *cursor_mode = DM_CURSOR_NATIVE_MODE;
+ else
+ *cursor_mode = DM_CURSOR_OVERLAY_MODE;
+
+ return 0;
+}
+
+static bool amdgpu_dm_crtc_mem_type_changed(struct drm_device *dev,
+ struct drm_atomic_state *state,
+ struct drm_crtc_state *crtc_state)
+{
+ struct drm_plane *plane;
+ struct drm_plane_state *new_plane_state, *old_plane_state;
+
+ drm_for_each_plane_mask(plane, dev, crtc_state->plane_mask) {
+ new_plane_state = drm_atomic_get_plane_state(state, plane);
+ old_plane_state = drm_atomic_get_plane_state(state, plane);
+
+ if (IS_ERR(new_plane_state) || IS_ERR(old_plane_state)) {
+ drm_err(dev, "Failed to get plane state for plane %s\n", plane->name);
+ return false;
+ }
+
+ if (old_plane_state->fb && new_plane_state->fb &&
+ get_mem_type(old_plane_state->fb) != get_mem_type(new_plane_state->fb))
+ return true;
+ }
+
+ return false;
}
-#endif
/**
* amdgpu_dm_atomic_check() - Atomic check implementation for AMDgpu DM.
+ *
* @dev: The DRM device
* @state: The atomic state to commit
*
@@ -10775,22 +12417,23 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
struct drm_crtc *crtc;
struct drm_crtc_state *old_crtc_state, *new_crtc_state;
struct drm_plane *plane;
- struct drm_plane_state *old_plane_state, *new_plane_state;
+ struct drm_plane_state *old_plane_state, *new_plane_state, *new_cursor_state;
enum dc_status status;
int ret, i;
bool lock_and_validation_needed = false;
- struct dm_crtc_state *dm_old_crtc_state;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- struct dsc_mst_fairness_vars vars[MAX_PIPES];
- struct drm_dp_mst_topology_state *mst_state;
+ bool is_top_most_overlay = true;
+ struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state;
struct drm_dp_mst_topology_mgr *mgr;
-#endif
+ struct drm_dp_mst_topology_state *mst_state;
+ struct dsc_mst_fairness_vars vars[MAX_PIPES] = {0};
trace_amdgpu_dm_atomic_check_begin(state);
ret = drm_atomic_helper_check_modeset(dev, state);
- if (ret)
+ if (ret) {
+ drm_dbg_atomic(dev, "drm_atomic_helper_check_modeset() failed\n");
goto fail;
+ }
/* Check connector changes */
for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) {
@@ -10798,34 +12441,32 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state);
/* Skip connectors that are disabled or part of modeset already. */
- if (!old_con_state->crtc && !new_con_state->crtc)
- continue;
-
if (!new_con_state->crtc)
continue;
new_crtc_state = drm_atomic_get_crtc_state(state, new_con_state->crtc);
if (IS_ERR(new_crtc_state)) {
+ drm_dbg_atomic(dev, "drm_atomic_get_crtc_state() failed\n");
ret = PTR_ERR(new_crtc_state);
goto fail;
}
- if (dm_old_con_state->abm_level !=
- dm_new_con_state->abm_level)
+ if (dm_old_con_state->abm_level != dm_new_con_state->abm_level ||
+ dm_old_con_state->scaling != dm_new_con_state->scaling)
new_crtc_state->connectors_changed = true;
}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
if (dc_resource_is_dsc_encoding_supported(dc)) {
for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
if (drm_atomic_crtc_needs_modeset(new_crtc_state)) {
ret = add_affected_mst_dsc_crtcs(state, crtc);
- if (ret)
+ if (ret) {
+ drm_dbg_atomic(dev, "add_affected_mst_dsc_crtcs() failed\n");
goto fail;
+ }
}
}
}
-#endif
for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
@@ -10836,19 +12477,25 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
continue;
ret = amdgpu_dm_verify_lut_sizes(new_crtc_state);
- if (ret)
+ if (ret) {
+ drm_dbg_atomic(dev, "amdgpu_dm_verify_lut_sizes() failed\n");
goto fail;
+ }
if (!new_crtc_state->enable)
continue;
ret = drm_atomic_add_affected_connectors(state, crtc);
- if (ret)
+ if (ret) {
+ drm_dbg_atomic(dev, "drm_atomic_add_affected_connectors() failed\n");
goto fail;
+ }
ret = drm_atomic_add_affected_planes(state, crtc);
- if (ret)
+ if (ret) {
+ drm_dbg_atomic(dev, "drm_atomic_add_affected_planes() failed\n");
goto fail;
+ }
if (dm_old_crtc_state->dsc_force_changed)
new_crtc_state->mode_changed = true;
@@ -10885,20 +12532,63 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
if (IS_ERR(new_plane_state)) {
ret = PTR_ERR(new_plane_state);
+ drm_dbg_atomic(dev, "new_plane_state is BAD\n");
goto fail;
}
}
}
+ /*
+ * DC consults the zpos (layer_index in DC terminology) to determine the
+ * hw plane on which to enable the hw cursor (see
+ * `dcn10_can_pipe_disable_cursor`). By now, all modified planes are in
+ * atomic state, so call drm helper to normalize zpos.
+ */
+ ret = drm_atomic_normalize_zpos(dev, state);
+ if (ret) {
+ drm_dbg(dev, "drm_atomic_normalize_zpos() failed\n");
+ goto fail;
+ }
+
+ /*
+ * Determine whether cursors on each CRTC should be enabled in native or
+ * overlay mode.
+ */
+ for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
+ dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+
+ ret = dm_crtc_get_cursor_mode(adev, state, dm_new_crtc_state,
+ &dm_new_crtc_state->cursor_mode);
+ if (ret) {
+ drm_dbg(dev, "Failed to determine cursor mode\n");
+ goto fail;
+ }
+
+ /*
+ * If overlay cursor is needed, DC cannot go through the
+ * native cursor update path. All enabled planes on the CRTC
+ * need to be added for DC to not disable a plane by mistake
+ */
+ if (dm_new_crtc_state->cursor_mode == DM_CURSOR_OVERLAY_MODE) {
+ ret = drm_atomic_add_affected_planes(state, crtc);
+ if (ret)
+ goto fail;
+ }
+ }
+
/* Remove exiting planes if they are modified */
- for_each_oldnew_plane_in_state_reverse(state, plane, old_plane_state, new_plane_state, i) {
+ for_each_oldnew_plane_in_descending_zpos(state, plane, old_plane_state, new_plane_state) {
+
ret = dm_update_plane_state(dc, state, plane,
old_plane_state,
new_plane_state,
false,
- &lock_and_validation_needed);
- if (ret)
+ &lock_and_validation_needed,
+ &is_top_most_overlay);
+ if (ret) {
+ drm_dbg_atomic(dev, "dm_update_plane_state() failed\n");
goto fail;
+ }
}
/* Disable all crtcs which require disable */
@@ -10908,8 +12598,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
new_crtc_state,
false,
&lock_and_validation_needed);
- if (ret)
+ if (ret) {
+ drm_dbg_atomic(dev, "DISABLE: dm_update_crtc_state() failed\n");
goto fail;
+ }
}
/* Enable all crtcs which require enable */
@@ -10919,31 +12611,92 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
new_crtc_state,
true,
&lock_and_validation_needed);
- if (ret)
+ if (ret) {
+ drm_dbg_atomic(dev, "ENABLE: dm_update_crtc_state() failed\n");
goto fail;
+ }
}
/* Add new/modified planes */
- for_each_oldnew_plane_in_state_reverse(state, plane, old_plane_state, new_plane_state, i) {
+ for_each_oldnew_plane_in_descending_zpos(state, plane, old_plane_state, new_plane_state) {
ret = dm_update_plane_state(dc, state, plane,
old_plane_state,
new_plane_state,
true,
- &lock_and_validation_needed);
- if (ret)
+ &lock_and_validation_needed,
+ &is_top_most_overlay);
+ if (ret) {
+ drm_dbg_atomic(dev, "dm_update_plane_state() failed\n");
goto fail;
+ }
}
+#if defined(CONFIG_DRM_AMD_DC_FP)
+ if (dc_resource_is_dsc_encoding_supported(dc)) {
+ ret = pre_validate_dsc(state, &dm_state, vars);
+ if (ret != 0)
+ goto fail;
+ }
+#endif
+
/* Run this here since we want to validate the streams we created */
ret = drm_atomic_helper_check_planes(dev, state);
- if (ret)
+ if (ret) {
+ drm_dbg_atomic(dev, "drm_atomic_helper_check_planes() failed\n");
goto fail;
+ }
- /* Check cursor planes scaling */
for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
- ret = dm_check_crtc_cursor(state, crtc, new_crtc_state);
- if (ret)
+ dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+ if (dm_new_crtc_state->mpo_requested)
+ drm_dbg_atomic(dev, "MPO enablement requested on crtc:[%p]\n", crtc);
+ }
+
+ /* Check cursor restrictions */
+ for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
+ enum amdgpu_dm_cursor_mode required_cursor_mode;
+ int is_rotated, is_scaled;
+
+ /* Overlay cusor not subject to native cursor restrictions */
+ dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+ if (dm_new_crtc_state->cursor_mode == DM_CURSOR_OVERLAY_MODE)
+ continue;
+
+ /* Check if rotation or scaling is enabled on DCN401 */
+ if ((drm_plane_mask(crtc->cursor) & new_crtc_state->plane_mask) &&
+ amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(4, 0, 1)) {
+ new_cursor_state = drm_atomic_get_new_plane_state(state, crtc->cursor);
+
+ is_rotated = new_cursor_state &&
+ ((new_cursor_state->rotation & DRM_MODE_ROTATE_MASK) != DRM_MODE_ROTATE_0);
+ is_scaled = new_cursor_state && ((new_cursor_state->src_w >> 16 != new_cursor_state->crtc_w) ||
+ (new_cursor_state->src_h >> 16 != new_cursor_state->crtc_h));
+
+ if (is_rotated || is_scaled) {
+ drm_dbg_driver(
+ crtc->dev,
+ "[CRTC:%d:%s] cannot enable hardware cursor due to rotation/scaling\n",
+ crtc->base.id, crtc->name);
+ ret = -EINVAL;
+ goto fail;
+ }
+ }
+
+ /* If HW can only do native cursor, check restrictions again */
+ ret = dm_crtc_get_cursor_mode(adev, state, dm_new_crtc_state,
+ &required_cursor_mode);
+ if (ret) {
+ drm_dbg_driver(crtc->dev,
+ "[CRTC:%d:%s] Checking cursor mode failed\n",
+ crtc->base.id, crtc->name);
+ goto fail;
+ } else if (required_cursor_mode == DM_CURSOR_OVERLAY_MODE) {
+ drm_dbg_driver(crtc->dev,
+ "[CRTC:%d:%s] Cannot enable native cursor due to scaling or YUV restrictions\n",
+ crtc->base.id, crtc->name);
+ ret = -EINVAL;
goto fail;
+ }
}
if (state->legacy_cursor_update) {
@@ -10988,7 +12741,6 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
lock_and_validation_needed = true;
}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
/* set the slot info for each mst_state based on the link encoding format */
for_each_new_mst_mgr_in_state(state, mgr, mst_state, i) {
struct amdgpu_dm_connector *aconnector;
@@ -10996,14 +12748,9 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
struct drm_connector_list_iter iter;
u8 link_coding_cap;
- if (!mgr->mst_state )
- continue;
-
drm_connector_list_iter_begin(dev, &iter);
drm_for_each_connector_iter(connector, &iter) {
- int id = connector->index;
-
- if (id == mst_state->mgr->conn_base_id) {
+ if (connector->index == mst_state->mgr->conn_base_id) {
aconnector = to_amdgpu_dm_connector(connector);
link_coding_cap = dc_link_dp_mst_decide_link_encoding_format(aconnector->dc_link);
drm_dp_mst_update_slots(mst_state, link_coding_cap);
@@ -11012,9 +12759,8 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
}
}
drm_connector_list_iter_end(&iter);
-
}
-#endif
+
/**
* Streams and planes are reset when there are changes that affect
* bandwidth. Anything that affects bandwidth needs to go through
@@ -11030,21 +12776,33 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
*/
if (lock_and_validation_needed) {
ret = dm_atomic_get_state(state, &dm_state);
- if (ret)
+ if (ret) {
+ drm_dbg_atomic(dev, "dm_atomic_get_state() failed\n");
goto fail;
+ }
ret = do_aquire_global_lock(dev, state);
- if (ret)
+ if (ret) {
+ drm_dbg_atomic(dev, "do_aquire_global_lock() failed\n");
goto fail;
+ }
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (!compute_mst_dsc_configs_for_state(state, dm_state->context, vars))
- goto fail;
+#if defined(CONFIG_DRM_AMD_DC_FP)
+ if (dc_resource_is_dsc_encoding_supported(dc)) {
+ ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars);
+ if (ret) {
+ drm_dbg_atomic(dev, "MST_DSC compute_mst_dsc_configs_for_state() failed\n");
+ ret = -EINVAL;
+ goto fail;
+ }
+ }
+#endif
ret = dm_update_mst_vcpi_slots_for_dsc(state, dm_state->context, vars);
- if (ret)
+ if (ret) {
+ drm_dbg_atomic(dev, "dm_update_mst_vcpi_slots_for_dsc() failed\n");
goto fail;
-#endif
+ }
/*
* Perform validation of MST topology in the state:
@@ -11053,12 +12811,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
* to get stuck in an infinite loop and hang eventually.
*/
ret = drm_dp_mst_atomic_check(state);
- if (ret)
+ if (ret) {
+ drm_dbg_atomic(dev, "MST drm_dp_mst_atomic_check() failed\n");
goto fail;
- status = dc_validate_global_state(dc, dm_state->context, false);
+ }
+ status = dc_validate_global_state(dc, dm_state->context, DC_VALIDATE_MODE_ONLY);
if (status != DC_OK) {
- drm_dbg_atomic(dev,
- "DC global validation failure: %s (%d)",
+ drm_dbg_atomic(dev, "DC global validation failure: %s (%d)",
dc_status_to_str(status), status);
ret = -EINVAL;
goto fail;
@@ -11085,7 +12844,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
int j = state->num_private_objs-1;
dm_atomic_destroy_state(obj,
- state->private_objs[i].state);
+ state->private_objs[i].state_to_destroy);
/* If i is not at the end of the array then the
* last element needs to be moved to where i was
@@ -11096,7 +12855,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
state->private_objs[j];
state->private_objs[j].ptr = NULL;
- state->private_objs[j].state = NULL;
+ state->private_objs[j].state_to_destroy = NULL;
state->private_objs[j].old_state = NULL;
state->private_objs[j].new_state = NULL;
@@ -11107,13 +12866,26 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
}
/* Store the overall update type for use later in atomic check. */
- for_each_new_crtc_in_state (state, crtc, new_crtc_state, i) {
+ for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
struct dm_crtc_state *dm_new_crtc_state =
to_dm_crtc_state(new_crtc_state);
+ /*
+ * Only allow async flips for fast updates that don't change
+ * the FB pitch, the DCC state, rotation, mem_type, etc.
+ */
+ if (new_crtc_state->async_flip &&
+ (lock_and_validation_needed ||
+ amdgpu_dm_crtc_mem_type_changed(dev, state, new_crtc_state))) {
+ drm_dbg_atomic(crtc->dev,
+ "[CRTC:%d:%s] async flips are only supported for fast updates\n",
+ crtc->base.id, crtc->name);
+ ret = -EINVAL;
+ goto fail;
+ }
+
dm_new_crtc_state->update_type = lock_and_validation_needed ?
- UPDATE_TYPE_FULL :
- UPDATE_TYPE_FAST;
+ UPDATE_TYPE_FULL : UPDATE_TYPE_FAST;
}
/* Must be success */
@@ -11125,40 +12897,21 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
fail:
if (ret == -EDEADLK)
- DRM_DEBUG_DRIVER("Atomic check stopped to avoid deadlock.\n");
+ drm_dbg_atomic(dev, "Atomic check stopped to avoid deadlock.\n");
else if (ret == -EINTR || ret == -EAGAIN || ret == -ERESTARTSYS)
- DRM_DEBUG_DRIVER("Atomic check stopped due to signal.\n");
+ drm_dbg_atomic(dev, "Atomic check stopped due to signal.\n");
else
- DRM_DEBUG_DRIVER("Atomic check failed with err: %d \n", ret);
+ drm_dbg_atomic(dev, "Atomic check failed with err: %d\n", ret);
trace_amdgpu_dm_atomic_check_finish(state, ret);
return ret;
}
-static bool is_dp_capable_without_timing_msa(struct dc *dc,
- struct amdgpu_dm_connector *amdgpu_dm_connector)
-{
- uint8_t dpcd_data;
- bool capable = false;
-
- if (amdgpu_dm_connector->dc_link &&
- dm_helpers_dp_read_dpcd(
- NULL,
- amdgpu_dm_connector->dc_link,
- DP_DOWN_STREAM_PORT_COUNT,
- &dpcd_data,
- sizeof(dpcd_data))) {
- capable = (dpcd_data & DP_MSA_TIMING_PAR_IGNORED) ? true:false;
- }
-
- return capable;
-}
-
static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm,
unsigned int offset,
unsigned int total_length,
- uint8_t *data,
+ u8 *data,
unsigned int length,
struct amdgpu_hdmi_vsdb_info *vsdb)
{
@@ -11180,12 +12933,12 @@ static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm,
sizeof(cmd.edid_cea) - sizeof(cmd.edid_cea.header);
input->offset = offset;
input->length = length;
- input->total_length = total_length;
+ input->cea_total_length = total_length;
memcpy(input->payload, data, length);
- res = dc_dmub_srv_cmd_with_reply_data(dm->dc->ctx->dmub_srv, &cmd);
+ res = dc_wake_and_execute_dmub_cmd(dm->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY);
if (!res) {
- DRM_ERROR("EDID CEA parser failed\n");
+ drm_err(adev_to_drm(dm->adev), "EDID CEA parser failed\n");
return false;
}
@@ -11193,7 +12946,7 @@ static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm,
if (output->type == DMUB_CMD__EDID_CEA_ACK) {
if (!output->ack.success) {
- DRM_ERROR("EDID CEA ack failed at offset %d\n",
+ drm_err(adev_to_drm(dm->adev), "EDID CEA ack failed at offset %d\n",
output->ack.offset);
}
} else if (output->type == DMUB_CMD__EDID_CEA_AMD_VSDB) {
@@ -11205,7 +12958,7 @@ static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm,
vsdb->min_refresh_rate_hz = output->amd_vsdb.min_frame_rate;
vsdb->max_refresh_rate_hz = output->amd_vsdb.max_frame_rate;
} else {
- DRM_WARN("Unknown EDID CEA parser results\n");
+ drm_warn(adev_to_drm(dm->adev), "Unknown EDID CEA parser results\n");
return false;
}
@@ -11213,7 +12966,7 @@ static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm,
}
static bool parse_edid_cea_dmcu(struct amdgpu_display_manager *dm,
- uint8_t *edid_ext, int len,
+ u8 *edid_ext, int len,
struct amdgpu_hdmi_vsdb_info *vsdb_info)
{
int i;
@@ -11254,7 +13007,7 @@ static bool parse_edid_cea_dmcu(struct amdgpu_display_manager *dm,
}
static bool parse_edid_cea_dmub(struct amdgpu_display_manager *dm,
- uint8_t *edid_ext, int len,
+ u8 *edid_ext, int len,
struct amdgpu_hdmi_vsdb_info *vsdb_info)
{
int i;
@@ -11270,21 +13023,104 @@ static bool parse_edid_cea_dmub(struct amdgpu_display_manager *dm,
}
static bool parse_edid_cea(struct amdgpu_dm_connector *aconnector,
- uint8_t *edid_ext, int len,
+ u8 *edid_ext, int len,
struct amdgpu_hdmi_vsdb_info *vsdb_info)
{
struct amdgpu_device *adev = drm_to_adev(aconnector->base.dev);
+ bool ret;
+ mutex_lock(&adev->dm.dc_lock);
if (adev->dm.dmub_srv)
- return parse_edid_cea_dmub(&adev->dm, edid_ext, len, vsdb_info);
+ ret = parse_edid_cea_dmub(&adev->dm, edid_ext, len, vsdb_info);
else
- return parse_edid_cea_dmcu(&adev->dm, edid_ext, len, vsdb_info);
+ ret = parse_edid_cea_dmcu(&adev->dm, edid_ext, len, vsdb_info);
+ mutex_unlock(&adev->dm.dc_lock);
+ return ret;
+}
+
+static void parse_edid_displayid_vrr(struct drm_connector *connector,
+ const struct edid *edid)
+{
+ u8 *edid_ext = NULL;
+ int i;
+ int j = 0;
+ u16 min_vfreq;
+ u16 max_vfreq;
+
+ if (edid == NULL || edid->extensions == 0)
+ return;
+
+ /* Find DisplayID extension */
+ for (i = 0; i < edid->extensions; i++) {
+ edid_ext = (void *)(edid + (i + 1));
+ if (edid_ext[0] == DISPLAYID_EXT)
+ break;
+ }
+
+ if (edid_ext == NULL)
+ return;
+
+ while (j < EDID_LENGTH) {
+ /* Get dynamic video timing range from DisplayID if available */
+ if (EDID_LENGTH - j > 13 && edid_ext[j] == 0x25 &&
+ (edid_ext[j+1] & 0xFE) == 0 && (edid_ext[j+2] == 9)) {
+ min_vfreq = edid_ext[j+9];
+ if (edid_ext[j+1] & 7)
+ max_vfreq = edid_ext[j+10] + ((edid_ext[j+11] & 3) << 8);
+ else
+ max_vfreq = edid_ext[j+10];
+
+ if (max_vfreq && min_vfreq) {
+ connector->display_info.monitor_range.max_vfreq = max_vfreq;
+ connector->display_info.monitor_range.min_vfreq = min_vfreq;
+
+ return;
+ }
+ }
+ j++;
+ }
+}
+
+static int parse_amd_vsdb(struct amdgpu_dm_connector *aconnector,
+ const struct edid *edid, struct amdgpu_hdmi_vsdb_info *vsdb_info)
+{
+ u8 *edid_ext = NULL;
+ int i;
+ int j = 0;
+
+ if (edid == NULL || edid->extensions == 0)
+ return -ENODEV;
+
+ /* Find DisplayID extension */
+ for (i = 0; i < edid->extensions; i++) {
+ edid_ext = (void *)(edid + (i + 1));
+ if (edid_ext[0] == DISPLAYID_EXT)
+ break;
+ }
+
+ while (j < EDID_LENGTH - sizeof(struct amd_vsdb_block)) {
+ struct amd_vsdb_block *amd_vsdb = (struct amd_vsdb_block *)&edid_ext[j];
+ unsigned int ieeeId = (amd_vsdb->ieee_id[2] << 16) | (amd_vsdb->ieee_id[1] << 8) | (amd_vsdb->ieee_id[0]);
+
+ if (ieeeId == HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_IEEE_REGISTRATION_ID &&
+ amd_vsdb->version == HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_VERSION_3) {
+ vsdb_info->replay_mode = (amd_vsdb->feature_caps & AMD_VSDB_VERSION_3_FEATURECAP_REPLAYMODE) ? true : false;
+ vsdb_info->amd_vsdb_version = HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_VERSION_3;
+ DRM_DEBUG_KMS("Panel supports Replay Mode: %d\n", vsdb_info->replay_mode);
+
+ return true;
+ }
+ j++;
+ }
+
+ return false;
}
static int parse_hdmi_amd_vsdb(struct amdgpu_dm_connector *aconnector,
- struct edid *edid, struct amdgpu_hdmi_vsdb_info *vsdb_info)
+ const struct edid *edid,
+ struct amdgpu_hdmi_vsdb_info *vsdb_info)
{
- uint8_t *edid_ext = NULL;
+ u8 *edid_ext = NULL;
int i;
bool valid_vsdb_found = false;
@@ -11312,25 +13148,33 @@ static int parse_hdmi_amd_vsdb(struct amdgpu_dm_connector *aconnector,
return valid_vsdb_found ? i : -ENODEV;
}
+/**
+ * amdgpu_dm_update_freesync_caps - Update Freesync capabilities
+ *
+ * @connector: Connector to query.
+ * @drm_edid: DRM EDID from monitor
+ *
+ * Amdgpu supports Freesync in DP and HDMI displays, and it is required to keep
+ * track of some of the display information in the internal data struct used by
+ * amdgpu_dm. This function checks which type of connector we need to set the
+ * FreeSync parameters.
+ */
void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
- struct edid *edid)
+ const struct drm_edid *drm_edid)
{
int i = 0;
- struct detailed_timing *timing;
- struct detailed_non_pixel *data;
- struct detailed_data_monitor_range *range;
struct amdgpu_dm_connector *amdgpu_dm_connector =
to_amdgpu_dm_connector(connector);
struct dm_connector_state *dm_con_state = NULL;
struct dc_sink *sink;
-
- struct drm_device *dev = connector->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- bool freesync_capable = false;
+ struct amdgpu_device *adev = drm_to_adev(connector->dev);
struct amdgpu_hdmi_vsdb_info vsdb_info = {0};
+ const struct edid *edid;
+ bool freesync_capable = false;
+ enum adaptive_sync_type as_type = ADAPTIVE_SYNC_TYPE_NONE;
if (!connector->state) {
- DRM_ERROR("%s - Connector has no state", __func__);
+ drm_err(adev_to_drm(adev), "%s - Connector has no state", __func__);
goto update;
}
@@ -11338,14 +13182,13 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
amdgpu_dm_connector->dc_sink :
amdgpu_dm_connector->dc_em_sink;
- if (!edid || !sink) {
+ drm_edid_connector_update(connector, drm_edid);
+
+ if (!drm_edid || !sink) {
dm_con_state = to_dm_connector_state(connector->state);
amdgpu_dm_connector->min_vfreq = 0;
amdgpu_dm_connector->max_vfreq = 0;
- amdgpu_dm_connector->pixel_clock_mhz = 0;
- connector->display_info.monitor_range.min_vfreq = 0;
- connector->display_info.monitor_range.max_vfreq = 0;
freesync_capable = false;
goto update;
@@ -11353,63 +13196,57 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
dm_con_state = to_dm_connector_state(connector->state);
- if (!adev->dm.freesync_module)
+ if (!adev->dm.freesync_module || !dc_supports_vrr(sink->ctx->dce_version))
goto update;
+ edid = drm_edid_raw(drm_edid); // FIXME: Get rid of drm_edid_raw()
- if (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT
- || sink->sink_signal == SIGNAL_TYPE_EDP) {
- bool edid_check_required = false;
+ /* Some eDP panels only have the refresh rate range info in DisplayID */
+ if ((connector->display_info.monitor_range.min_vfreq == 0 ||
+ connector->display_info.monitor_range.max_vfreq == 0))
+ parse_edid_displayid_vrr(connector, edid);
- if (edid) {
- edid_check_required = is_dp_capable_without_timing_msa(
- adev->dm.dc,
- amdgpu_dm_connector);
+ if (edid && (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT ||
+ sink->sink_signal == SIGNAL_TYPE_EDP)) {
+ if (amdgpu_dm_connector->dc_link &&
+ amdgpu_dm_connector->dc_link->dpcd_caps.allow_invalid_MSA_timing_param) {
+ amdgpu_dm_connector->min_vfreq = connector->display_info.monitor_range.min_vfreq;
+ amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq;
+ if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10)
+ freesync_capable = true;
}
- if (edid_check_required == true && (edid->version > 1 ||
- (edid->version == 1 && edid->revision > 1))) {
- for (i = 0; i < 4; i++) {
+ parse_amd_vsdb(amdgpu_dm_connector, edid, &vsdb_info);
- timing = &edid->detailed_timings[i];
- data = &timing->data.other_data;
- range = &data->data.range;
- /*
- * Check if monitor has continuous frequency mode
- */
- if (data->type != EDID_DETAIL_MONITOR_RANGE)
- continue;
- /*
- * Check for flag range limits only. If flag == 1 then
- * no additional timing information provided.
- * Default GTF, GTF Secondary curve and CVT are not
- * supported
- */
- if (range->flags != 1)
- continue;
-
- amdgpu_dm_connector->min_vfreq = range->min_vfreq;
- amdgpu_dm_connector->max_vfreq = range->max_vfreq;
- amdgpu_dm_connector->pixel_clock_mhz =
- range->pixel_clock_mhz * 10;
+ if (vsdb_info.replay_mode) {
+ amdgpu_dm_connector->vsdb_info.replay_mode = vsdb_info.replay_mode;
+ amdgpu_dm_connector->vsdb_info.amd_vsdb_version = vsdb_info.amd_vsdb_version;
+ amdgpu_dm_connector->as_type = ADAPTIVE_SYNC_TYPE_EDP;
+ }
- connector->display_info.monitor_range.min_vfreq = range->min_vfreq;
- connector->display_info.monitor_range.max_vfreq = range->max_vfreq;
+ } else if (drm_edid && sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A) {
+ i = parse_hdmi_amd_vsdb(amdgpu_dm_connector, edid, &vsdb_info);
+ if (i >= 0 && vsdb_info.freesync_supported) {
+ amdgpu_dm_connector->min_vfreq = vsdb_info.min_refresh_rate_hz;
+ amdgpu_dm_connector->max_vfreq = vsdb_info.max_refresh_rate_hz;
+ if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10)
+ freesync_capable = true;
- break;
- }
+ connector->display_info.monitor_range.min_vfreq = vsdb_info.min_refresh_rate_hz;
+ connector->display_info.monitor_range.max_vfreq = vsdb_info.max_refresh_rate_hz;
+ }
+ }
- if (amdgpu_dm_connector->max_vfreq -
- amdgpu_dm_connector->min_vfreq > 10) {
+ if (amdgpu_dm_connector->dc_link)
+ as_type = dm_get_adaptive_sync_support_type(amdgpu_dm_connector->dc_link);
- freesync_capable = true;
- }
- }
- } else if (edid && sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A) {
+ if (as_type == FREESYNC_TYPE_PCON_IN_WHITELIST) {
i = parse_hdmi_amd_vsdb(amdgpu_dm_connector, edid, &vsdb_info);
- if (i >= 0 && vsdb_info.freesync_supported) {
- timing = &edid->detailed_timings[i];
- data = &timing->data.other_data;
+ if (i >= 0 && vsdb_info.freesync_supported && vsdb_info.amd_vsdb_version > 0) {
+
+ amdgpu_dm_connector->pack_sdp_v1_3 = true;
+ amdgpu_dm_connector->as_type = as_type;
+ amdgpu_dm_connector->vsdb_info = vsdb_info;
amdgpu_dm_connector->min_vfreq = vsdb_info.min_refresh_rate_hz;
amdgpu_dm_connector->max_vfreq = vsdb_info.max_refresh_rate_hz;
@@ -11425,6 +13262,12 @@ update:
if (dm_con_state)
dm_con_state->freesync_capable = freesync_capable;
+ if (connector->state && amdgpu_dm_connector->dc_link && !freesync_capable &&
+ amdgpu_dm_connector->dc_link->replay_settings.config.replay_supported) {
+ amdgpu_dm_connector->dc_link->replay_settings.config.replay_supported = false;
+ amdgpu_dm_connector->dc_link->replay_settings.replay_feature_enabled = false;
+ }
+
if (connector->vrr_capable_property)
drm_connector_set_vrr_capable_property(connector,
freesync_capable);
@@ -11449,15 +13292,24 @@ void amdgpu_dm_trigger_timing_sync(struct drm_device *dev)
mutex_unlock(&adev->dm.dc_lock);
}
+static inline void amdgpu_dm_exit_ips_for_hw_access(struct dc *dc)
+{
+ if (dc->ctx->dmub_srv && !dc->ctx->dmub_srv->idle_exit_counter)
+ dc_exit_ips_for_hw_access(dc);
+}
+
void dm_write_reg_func(const struct dc_context *ctx, uint32_t address,
- uint32_t value, const char *func_name)
+ u32 value, const char *func_name)
{
#ifdef DM_CHECK_ADDR_0
if (address == 0) {
- DC_ERR("invalid register write. address = 0");
+ drm_err(adev_to_drm(ctx->driver_context),
+ "invalid register write. address = 0");
return;
}
#endif
+
+ amdgpu_dm_exit_ips_for_hw_access(ctx->dc);
cgs_write_register(ctx->cgs_device, address, value);
trace_amdgpu_dc_wreg(&ctx->perf_trace->write_count, address, value);
}
@@ -11465,10 +13317,11 @@ void dm_write_reg_func(const struct dc_context *ctx, uint32_t address,
uint32_t dm_read_reg_func(const struct dc_context *ctx, uint32_t address,
const char *func_name)
{
- uint32_t value;
+ u32 value;
#ifdef DM_CHECK_ADDR_0
if (address == 0) {
- DC_ERR("invalid register read; address = 0\n");
+ drm_err(adev_to_drm(ctx->driver_context),
+ "invalid register read; address = 0\n");
return 0;
}
#endif
@@ -11480,6 +13333,8 @@ uint32_t dm_read_reg_func(const struct dc_context *ctx, uint32_t address,
return 0;
}
+ amdgpu_dm_exit_ips_for_hw_access(ctx->dc);
+
value = cgs_read_register(ctx->cgs_device, address);
trace_amdgpu_dc_rreg(&ctx->perf_trace->read_count, address, value);
@@ -11487,75 +13342,176 @@ uint32_t dm_read_reg_func(const struct dc_context *ctx, uint32_t address,
return value;
}
-int amdgpu_dm_set_dmub_async_sync_status(bool is_cmd_aux, struct dc_context *ctx,
- uint8_t status_type, uint32_t *operation_result)
+int amdgpu_dm_process_dmub_aux_transfer_sync(
+ struct dc_context *ctx,
+ unsigned int link_index,
+ struct aux_payload *payload,
+ enum aux_return_code_type *operation_result)
{
struct amdgpu_device *adev = ctx->driver_context;
- int return_status = -1;
struct dmub_notification *p_notify = adev->dm.dmub_notify;
+ int ret = -1;
- if (is_cmd_aux) {
- if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS) {
- return_status = p_notify->aux_reply.length;
- *operation_result = p_notify->result;
- } else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT) {
- *operation_result = AUX_RET_ERROR_TIMEOUT;
- } else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_FAIL) {
- *operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE;
- } else {
- *operation_result = AUX_RET_ERROR_UNKNOWN;
+ mutex_lock(&adev->dm.dpia_aux_lock);
+ if (!dc_process_dmub_aux_transfer_async(ctx->dc, link_index, payload)) {
+ *operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE;
+ goto out;
+ }
+
+ if (!wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) {
+ drm_err(adev_to_drm(adev), "wait_for_completion_timeout timeout!");
+ *operation_result = AUX_RET_ERROR_TIMEOUT;
+ goto out;
+ }
+
+ if (p_notify->result != AUX_RET_SUCCESS) {
+ /*
+ * Transient states before tunneling is enabled could
+ * lead to this error. We can ignore this for now.
+ */
+ if (p_notify->result == AUX_RET_ERROR_PROTOCOL_ERROR) {
+ drm_warn(adev_to_drm(adev), "DPIA AUX failed on 0x%x(%d), error %d\n",
+ payload->address, payload->length,
+ p_notify->result);
}
- } else {
- if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS) {
- return_status = 0;
- *operation_result = p_notify->sc_status;
- } else {
- *operation_result = SET_CONFIG_UNKNOWN_ERROR;
+ *operation_result = p_notify->result;
+ goto out;
+ }
+
+ payload->reply[0] = adev->dm.dmub_notify->aux_reply.command & 0xF;
+ if (adev->dm.dmub_notify->aux_reply.command & 0xF0)
+ /* The reply is stored in the top nibble of the command. */
+ payload->reply[0] = (adev->dm.dmub_notify->aux_reply.command >> 4) & 0xF;
+
+ /*write req may receive a byte indicating partially written number as well*/
+ if (p_notify->aux_reply.length)
+ memcpy(payload->data, p_notify->aux_reply.data,
+ p_notify->aux_reply.length);
+
+ /* success */
+ ret = p_notify->aux_reply.length;
+ *operation_result = p_notify->result;
+out:
+ reinit_completion(&adev->dm.dmub_aux_transfer_done);
+ mutex_unlock(&adev->dm.dpia_aux_lock);
+ return ret;
+}
+
+static void abort_fused_io(
+ struct dc_context *ctx,
+ const struct dmub_cmd_fused_request *request
+)
+{
+ union dmub_rb_cmd command = { 0 };
+ struct dmub_rb_cmd_fused_io *io = &command.fused_io;
+
+ io->header.type = DMUB_CMD__FUSED_IO;
+ io->header.sub_type = DMUB_CMD__FUSED_IO_ABORT;
+ io->header.payload_bytes = sizeof(*io) - sizeof(io->header);
+ io->request = *request;
+ dm_execute_dmub_cmd(ctx, &command, DM_DMUB_WAIT_TYPE_NO_WAIT);
+}
+
+static bool execute_fused_io(
+ struct amdgpu_device *dev,
+ struct dc_context *ctx,
+ union dmub_rb_cmd *commands,
+ uint8_t count,
+ uint32_t timeout_us
+)
+{
+ const uint8_t ddc_line = commands[0].fused_io.request.u.aux.ddc_line;
+
+ if (ddc_line >= ARRAY_SIZE(dev->dm.fused_io))
+ return false;
+
+ struct fused_io_sync *sync = &dev->dm.fused_io[ddc_line];
+ struct dmub_rb_cmd_fused_io *first = &commands[0].fused_io;
+ const bool result = dm_execute_dmub_cmd_list(ctx, count, commands, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)
+ && first->header.ret_status
+ && first->request.status == FUSED_REQUEST_STATUS_SUCCESS;
+
+ if (!result)
+ return false;
+
+ while (wait_for_completion_timeout(&sync->replied, usecs_to_jiffies(timeout_us))) {
+ reinit_completion(&sync->replied);
+
+ struct dmub_cmd_fused_request *reply = (struct dmub_cmd_fused_request *) sync->reply_data;
+
+ static_assert(sizeof(*reply) <= sizeof(sync->reply_data), "Size mismatch");
+
+ if (reply->identifier == first->request.identifier) {
+ first->request = *reply;
+ return true;
}
}
- return return_status;
+ reinit_completion(&sync->replied);
+ first->request.status = FUSED_REQUEST_STATUS_TIMEOUT;
+ abort_fused_io(ctx, &first->request);
+ return false;
}
-int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux, struct dc_context *ctx,
- unsigned int link_index, void *cmd_payload, void *operation_result)
+bool amdgpu_dm_execute_fused_io(
+ struct amdgpu_device *dev,
+ struct dc_link *link,
+ union dmub_rb_cmd *commands,
+ uint8_t count,
+ uint32_t timeout_us)
+{
+ struct amdgpu_display_manager *dm = &dev->dm;
+
+ mutex_lock(&dm->dpia_aux_lock);
+
+ const bool result = execute_fused_io(dev, link->ctx, commands, count, timeout_us);
+
+ mutex_unlock(&dm->dpia_aux_lock);
+ return result;
+}
+
+int amdgpu_dm_process_dmub_set_config_sync(
+ struct dc_context *ctx,
+ unsigned int link_index,
+ struct set_config_cmd_payload *payload,
+ enum set_config_status *operation_result)
{
struct amdgpu_device *adev = ctx->driver_context;
- int ret = 0;
+ bool is_cmd_complete;
+ int ret;
- if (is_cmd_aux) {
- dc_process_dmub_aux_transfer_async(ctx->dc,
- link_index, (struct aux_payload *)cmd_payload);
- } else if (dc_process_dmub_set_config_async(ctx->dc, link_index,
- (struct set_config_cmd_payload *)cmd_payload,
- adev->dm.dmub_notify)) {
- return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux,
- ctx, DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS,
- (uint32_t *)operation_result);
- }
+ mutex_lock(&adev->dm.dpia_aux_lock);
+ is_cmd_complete = dc_process_dmub_set_config_async(ctx->dc,
+ link_index, payload, adev->dm.dmub_notify);
- ret = wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ);
- if (ret == 0) {
- DRM_ERROR("wait_for_completion_timeout timeout!");
- return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux,
- ctx, DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT,
- (uint32_t *)operation_result);
+ if (is_cmd_complete || wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) {
+ ret = 0;
+ *operation_result = adev->dm.dmub_notify->sc_status;
+ } else {
+ drm_err(adev_to_drm(adev), "wait_for_completion_timeout timeout!");
+ ret = -1;
+ *operation_result = SET_CONFIG_UNKNOWN_ERROR;
}
- if (is_cmd_aux) {
- if (adev->dm.dmub_notify->result == AUX_RET_SUCCESS) {
- struct aux_payload *payload = (struct aux_payload *)cmd_payload;
+ if (!is_cmd_complete)
+ reinit_completion(&adev->dm.dmub_aux_transfer_done);
+ mutex_unlock(&adev->dm.dpia_aux_lock);
+ return ret;
+}
- payload->reply[0] = adev->dm.dmub_notify->aux_reply.command;
- if (!payload->write && adev->dm.dmub_notify->aux_reply.length &&
- payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK) {
- memcpy(payload->data, adev->dm.dmub_notify->aux_reply.data,
- adev->dm.dmub_notify->aux_reply.length);
- }
- }
- }
+bool dm_execute_dmub_cmd(const struct dc_context *ctx, union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type)
+{
+ return dc_dmub_srv_cmd_run(ctx->dmub_srv, cmd, wait_type);
+}
+
+bool dm_execute_dmub_cmd_list(const struct dc_context *ctx, unsigned int count, union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type)
+{
+ return dc_dmub_srv_cmd_run_list(ctx->dmub_srv, count, cmd, wait_type);
+}
- return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux,
- ctx, DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS,
- (uint32_t *)operation_result);
+void dm_acpi_process_phy_transition_interlock(
+ const struct dc_context *ctx,
+ struct dm_process_phy_transition_init_params process_phy_transition_init_params)
+{
+ // Not yet implemented
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 37e61a88d49e..ef97cede9926 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: MIT */
/*
* Copyright (C) 2015-2020 Advanced Micro Devices, Inc. All rights reserved.
*
@@ -26,11 +27,13 @@
#ifndef __AMDGPU_DM_H__
#define __AMDGPU_DM_H__
+#include <drm/display/drm_dp_mst_helper.h>
#include <drm/drm_atomic.h>
#include <drm/drm_connector.h>
#include <drm/drm_crtc.h>
-#include <drm/drm_dp_mst_helper.h>
#include <drm/drm_plane.h>
+#include "link_service_types.h"
+#include <drm/drm_writeback.h>
/*
* This file contains the definition for amdgpu_display_manager
@@ -48,14 +51,15 @@
#define AMDGPU_DM_MAX_NUM_EDP 2
-#define AMDGPU_DMUB_NOTIFICATION_MAX 5
+#define AMDGPU_DMUB_NOTIFICATION_MAX 8
-/**
- * DMUB Async to Sync Mechanism Status
- **/
-#define DMUB_ASYNC_TO_SYNC_ACCESS_FAIL 1
-#define DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT 2
-#define DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS 3
+#define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_IEEE_REGISTRATION_ID 0x00001A
+#define AMD_VSDB_VERSION_3_FEATURECAP_REPLAYMODE 0x40
+#define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_VERSION_3 0x3
+
+#define AMDGPU_HDR_MULT_DEFAULT (0x100000000LL)
+
+#define AMDGPU_DM_HDMI_HPD_DEBOUNCE_MS 1500
/*
#include "include/amdgpu_dal_power_if.h"
#include "amdgpu_dm_irq.h"
@@ -64,8 +68,11 @@
#include "irq_types.h"
#include "signal_types.h"
#include "amdgpu_dm_crc.h"
+#include "mod_info_packet.h"
struct aux_payload;
+struct set_config_cmd_payload;
enum aux_return_code_type;
+enum set_config_status;
/* Forward declarations */
struct amdgpu_device;
@@ -76,6 +83,13 @@ struct amdgpu_bo;
struct dmub_srv;
struct dc_plane_state;
struct dmub_notification;
+struct dmub_cmd_fused_request;
+
+struct amd_vsdb_block {
+ unsigned char ieee_id[3];
+ unsigned char version;
+ unsigned char feature_caps;
+};
struct common_irq_params {
struct amdgpu_device *adev;
@@ -127,6 +141,46 @@ struct vblank_control_work {
};
/**
+ * struct idle_workqueue - Work data for periodic action in idle
+ * @work: Kernel work data for the work event
+ * @dm: amdgpu display manager device
+ * @enable: true if idle worker is enabled
+ * @running: true if idle worker is running
+ */
+struct idle_workqueue {
+ struct work_struct work;
+ struct amdgpu_display_manager *dm;
+ bool enable;
+ bool running;
+};
+
+/**
+ * struct vupdate_offload_work - Work data for offloading task from vupdate handler
+ * @work: Kernel work data for the work event
+ * @adev: amdgpu_device back pointer
+ * @stream: DC stream associated with the crtc
+ * @adjust: DC CRTC timing adjust to be applied to the crtc
+ */
+struct vupdate_offload_work {
+ struct work_struct work;
+ struct amdgpu_device *adev;
+ struct dc_stream_state *stream;
+ struct dc_crtc_timing_adjust *adjust;
+};
+
+#define MAX_LUMINANCE_DATA_POINTS 99
+
+/**
+ * struct amdgpu_dm_luminance_data - Custom luminance data
+ * @luminance: Luminance in percent
+ * @input_signal: Input signal in range 0-255
+ */
+struct amdgpu_dm_luminance_data {
+ u8 luminance;
+ u8 input_signal;
+} __packed;
+
+/**
* struct amdgpu_dm_backlight_caps - Information about backlight
*
* Describe the backlight support for ACPI or eDP AUX.
@@ -162,6 +216,27 @@ struct amdgpu_dm_backlight_caps {
* @aux_support: Describes if the display supports AUX backlight.
*/
bool aux_support;
+ /**
+ * @brightness_mask: After deriving brightness, OR it with this mask.
+ * Workaround for panels with issues with certain brightness values.
+ */
+ u32 brightness_mask;
+ /**
+ * @ac_level: the default brightness if booted on AC
+ */
+ u8 ac_level;
+ /**
+ * @dc_level: the default brightness if booted on DC
+ */
+ u8 dc_level;
+ /**
+ * @data_points: the number of custom luminance data points
+ */
+ u8 data_points;
+ /**
+ * @luminance_data: custom luminance data
+ */
+ struct amdgpu_dm_luminance_data luminance_data[MAX_LUMINANCE_DATA_POINTS];
};
/**
@@ -197,6 +272,11 @@ struct hpd_rx_irq_offload_work_queue {
*/
bool is_handling_link_loss;
/**
+ * @is_handling_mst_msg_rdy_event: Used to prevent inserting mst message
+ * ready event when we're already handling mst message ready event
+ */
+ bool is_handling_mst_msg_rdy_event;
+ /**
* @aconnector: The aconnector that this work queue is attached to
*/
struct amdgpu_dm_connector *aconnector;
@@ -218,6 +298,10 @@ struct hpd_rx_irq_offload_work {
* @offload_wq: offload work queue that this work is queued to
*/
struct hpd_rx_irq_offload_work_queue *offload_wq;
+ /**
+ * @adev: amdgpu_device pointer
+ */
+ struct amdgpu_device *adev;
};
/**
@@ -242,6 +326,13 @@ struct hpd_rx_irq_offload_work {
* @force_timing_sync: set via debugfs. When set, indicates that all connected
* displays will be forced to synchronize.
* @dmcub_trace_event_en: enable dmcub trace events
+ * @dmub_outbox_params: DMUB Outbox parameters
+ * @num_of_edps: number of backlight eDPs
+ * @disable_hpd_irq: disables all HPD and HPD RX interrupt handling in the
+ * driver when true
+ * @dmub_aux_transfer_done: struct completion used to indicate when DMUB
+ * transfers are done
+ * @delayed_hpd_wq: work queue used to delay DMUB HPD work
*/
struct amdgpu_display_manager {
@@ -358,15 +449,6 @@ struct amdgpu_display_manager {
*/
struct mutex audio_lock;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- /**
- * @vblank_lock:
- *
- * Guards access to deferred vblank work state.
- */
- spinlock_t vblank_lock;
-#endif
-
/**
* @audio_component:
*
@@ -465,18 +547,21 @@ struct amdgpu_display_manager {
struct amdgpu_dm_backlight_caps backlight_caps[AMDGPU_DM_MAX_NUM_EDP];
struct mod_freesync *freesync_module;
-#ifdef CONFIG_DRM_AMD_DC_HDCP
struct hdcp_workqueue *hdcp_workqueue;
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN)
/**
* @vblank_control_workqueue:
*
* Deferred work for vblank control events.
*/
struct workqueue_struct *vblank_control_workqueue;
-#endif
+
+ /**
+ * @idle_workqueue:
+ *
+ * Periodic work for idle events.
+ */
+ struct idle_workqueue *idle_workqueue;
struct drm_atomic_state *cached_state;
struct dc_state *cached_dc_state;
@@ -493,22 +578,21 @@ struct amdgpu_display_manager {
*/
const struct gpu_info_soc_bounding_box_v1_0 *soc_bounding_box;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
/**
* @active_vblank_irq_count:
*
* number of currently active vblank irqs
*/
uint32_t active_vblank_irq_count;
-#endif
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
/**
- * @crc_rd_wrk:
+ * @secure_display_ctx:
*
- * Work to be executed in a separate thread to communicate with PSP.
+ * Store secure display relevant info. e.g. the ROI information
+ * , the work_struct to command dmub, etc.
*/
- struct crc_rd_work *crc_rd_wrk;
+ struct secure_display_context secure_display_ctx;
#endif
/**
* @hpd_rx_offload_wq:
@@ -540,6 +624,59 @@ struct amdgpu_display_manager {
* cached backlight values.
*/
u32 brightness[AMDGPU_DM_MAX_NUM_EDP];
+ /**
+ * @actual_brightness:
+ *
+ * last successfully applied backlight values.
+ */
+ u32 actual_brightness[AMDGPU_DM_MAX_NUM_EDP];
+
+ /**
+ * @aux_hpd_discon_quirk:
+ *
+ * quirk for hpd discon while aux is on-going.
+ * occurred on certain intel platform
+ */
+ bool aux_hpd_discon_quirk;
+
+ /**
+ * @edp0_on_dp1_quirk:
+ *
+ * quirk for platforms that put edp0 on DP1.
+ */
+ bool edp0_on_dp1_quirk;
+
+ /**
+ * @dpia_aux_lock:
+ *
+ * Guards access to DPIA AUX
+ */
+ struct mutex dpia_aux_lock;
+
+ /**
+ * @bb_from_dmub:
+ *
+ * Bounding box data read from dmub during early initialization for DCN4+
+ * Data is stored as a byte array that should be casted to the appropriate bb struct
+ */
+ void *bb_from_dmub;
+
+ /**
+ * @oem_i2c:
+ *
+ * OEM i2c bus
+ */
+ struct amdgpu_i2c_adapter *oem_i2c;
+
+ /**
+ * @fused_io:
+ *
+ * dmub fused io interface
+ */
+ struct fused_io_sync {
+ struct completion replied;
+ char reply_data[0x40]; // Cannot include dmub_cmd here
+ } fused_io[8];
};
enum dsc_clock_force_state {
@@ -556,14 +693,60 @@ struct dsc_preferred_settings {
bool dsc_force_disable_passthrough;
};
+enum mst_progress_status {
+ MST_STATUS_DEFAULT = 0,
+ MST_PROBE = BIT(0),
+ MST_REMOTE_EDID = BIT(1),
+ MST_ALLOCATE_NEW_PAYLOAD = BIT(2),
+ MST_CLEAR_ALLOCATED_PAYLOAD = BIT(3),
+};
+
+/**
+ * struct amdgpu_hdmi_vsdb_info - Keep track of the VSDB info
+ *
+ * AMDGPU supports FreeSync over HDMI by using the VSDB section, and this
+ * struct is useful to keep track of the display-specific information about
+ * FreeSync.
+ */
+struct amdgpu_hdmi_vsdb_info {
+ /**
+ * @amd_vsdb_version: Vendor Specific Data Block Version, should be
+ * used to determine which Vendor Specific InfoFrame (VSIF) to send.
+ */
+ unsigned int amd_vsdb_version;
+
+ /**
+ * @freesync_supported: FreeSync Supported.
+ */
+ bool freesync_supported;
+
+ /**
+ * @min_refresh_rate_hz: FreeSync Minimum Refresh Rate in Hz.
+ */
+ unsigned int min_refresh_rate_hz;
+
+ /**
+ * @max_refresh_rate_hz: FreeSync Maximum Refresh Rate in Hz
+ */
+ unsigned int max_refresh_rate_hz;
+
+ /**
+ * @replay_mode: Replay supported
+ */
+ bool replay_mode;
+};
+
struct amdgpu_dm_connector {
struct drm_connector base;
uint32_t connector_id;
+ int bl_idx;
+
+ struct cec_notifier *notifier;
/* we need to mind the EDID between detect
and get modes due to analog/digital/tvencoder */
- struct edid *edid;
+ const struct drm_edid *drm_edid;
/* shared with amdgpu */
struct amdgpu_hpd hpd;
@@ -575,22 +758,40 @@ struct amdgpu_dm_connector {
* The 'current' sink is in dc_link->sink. */
struct dc_sink *dc_sink;
struct dc_link *dc_link;
+
+ /**
+ * @dc_em_sink: Reference to the emulated (virtual) sink.
+ */
struct dc_sink *dc_em_sink;
/* DM only */
struct drm_dp_mst_topology_mgr mst_mgr;
struct amdgpu_dm_dp_aux dm_dp_aux;
- struct drm_dp_mst_port *port;
- struct amdgpu_dm_connector *mst_port;
+ struct drm_dp_mst_port *mst_output_port;
+ struct amdgpu_dm_connector *mst_root;
struct drm_dp_aux *dsc_aux;
+ uint32_t mst_local_bw;
+ uint16_t vc_full_pbn;
+ struct mutex handle_mst_msg_ready;
+
+ /* branch device specific data */
+ uint32_t branch_ieee_oui;
/* TODO see if we can merge with ddc_bus or make a dm_connector */
struct amdgpu_i2c_adapter *i2c;
/* Monitor range limits */
- int min_vfreq ;
+ /**
+ * @min_vfreq: Minimal frequency supported by the display in Hz. This
+ * value is set to zero when there is no FreeSync support.
+ */
+ int min_vfreq;
+
+ /**
+ * @max_vfreq: Maximum frequency supported by the display in Hz. This
+ * value is set to zero when there is no FreeSync support.
+ */
int max_vfreq ;
- int pixel_clock_mhz;
/* Audio instance - protected by audio_lock. */
int audio_inst;
@@ -598,25 +799,151 @@ struct amdgpu_dm_connector {
struct mutex hpd_lock;
bool fake_enable;
-#ifdef CONFIG_DEBUG_FS
- uint32_t debugfs_dpcd_address;
- uint32_t debugfs_dpcd_size;
-#endif
bool force_yuv420_output;
+ bool force_yuv422_output;
struct dsc_preferred_settings dsc_settings;
+ union dp_downstream_port_present mst_downstream_port_present;
/* Cached display modes */
struct drm_display_mode freesync_vid_base;
- int psr_skip_count;
+ int sr_skip_count;
+ bool disallow_edp_enter_psr;
+
+ /* Record progress status of mst*/
+ uint8_t mst_status;
+
+ /* Automated testing */
+ bool timing_changed;
+ struct dc_crtc_timing *timing_requested;
+
+ /* Adaptive Sync */
+ bool pack_sdp_v1_3;
+ enum adaptive_sync_type as_type;
+ struct amdgpu_hdmi_vsdb_info vsdb_info;
+
+ /* HDMI HPD debounce support */
+ unsigned int hdmi_hpd_debounce_delay_ms;
+ struct delayed_work hdmi_hpd_debounce_work;
+ struct dc_sink *hdmi_prev_sink;
};
+static inline void amdgpu_dm_set_mst_status(uint8_t *status,
+ uint8_t flags, bool set)
+{
+ if (set)
+ *status |= flags;
+ else
+ *status &= ~flags;
+}
+
#define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base)
+struct amdgpu_dm_wb_connector {
+ struct drm_writeback_connector base;
+ struct dc_link *link;
+};
+
+#define to_amdgpu_dm_wb_connector(x) container_of(x, struct amdgpu_dm_wb_connector, base)
+
extern const struct amdgpu_ip_block_version dm_ip_block;
+/* enum amdgpu_transfer_function: pre-defined transfer function supported by AMD.
+ *
+ * It includes standardized transfer functions and pure power functions. The
+ * transfer function coefficients are available at modules/color/color_gamma.c
+ */
+enum amdgpu_transfer_function {
+ AMDGPU_TRANSFER_FUNCTION_DEFAULT,
+ AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF,
+ AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF,
+ AMDGPU_TRANSFER_FUNCTION_PQ_EOTF,
+ AMDGPU_TRANSFER_FUNCTION_IDENTITY,
+ AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF,
+ AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF,
+ AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF,
+ AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF,
+ AMDGPU_TRANSFER_FUNCTION_BT709_OETF,
+ AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF,
+ AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF,
+ AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF,
+ AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF,
+ AMDGPU_TRANSFER_FUNCTION_COUNT
+};
+
struct dm_plane_state {
struct drm_plane_state base;
struct dc_plane_state *dc_state;
+
+ /* Plane color mgmt */
+ /**
+ * @degamma_lut:
+ *
+ * 1D LUT for mapping framebuffer/plane pixel data before sampling or
+ * blending operations. It's usually applied to linearize input space.
+ * The blob (if not NULL) is an array of &struct drm_color_lut.
+ */
+ struct drm_property_blob *degamma_lut;
+ /**
+ * @degamma_tf:
+ *
+ * Predefined transfer function to tell DC driver the input space to
+ * linearize.
+ */
+ enum amdgpu_transfer_function degamma_tf;
+ /**
+ * @hdr_mult:
+ *
+ * Multiplier to 'gain' the plane. When PQ is decoded using the fixed
+ * func transfer function to the internal FP16 fb, 1.0 -> 80 nits (on
+ * AMD at least). When sRGB is decoded, 1.0 -> 1.0, obviously.
+ * Therefore, 1.0 multiplier = 80 nits for SDR content. So if you
+ * want, 203 nits for SDR content, pass in (203.0 / 80.0). Format is
+ * S31.32 sign-magnitude.
+ *
+ * HDR multiplier can wide range beyond [0.0, 1.0]. This means that PQ
+ * TF is needed for any subsequent linear-to-non-linear transforms.
+ */
+ __u64 hdr_mult;
+ /**
+ * @ctm:
+ *
+ * Color transformation matrix. The blob (if not NULL) is a &struct
+ * drm_color_ctm_3x4.
+ */
+ struct drm_property_blob *ctm;
+ /**
+ * @shaper_lut: shaper lookup table blob. The blob (if not NULL) is an
+ * array of &struct drm_color_lut.
+ */
+ struct drm_property_blob *shaper_lut;
+ /**
+ * @shaper_tf:
+ *
+ * Predefined transfer function to delinearize color space.
+ */
+ enum amdgpu_transfer_function shaper_tf;
+ /**
+ * @lut3d: 3D lookup table blob. The blob (if not NULL) is an array of
+ * &struct drm_color_lut.
+ */
+ struct drm_property_blob *lut3d;
+ /**
+ * @blend_lut: blend lut lookup table blob. The blob (if not NULL) is an
+ * array of &struct drm_color_lut.
+ */
+ struct drm_property_blob *blend_lut;
+ /**
+ * @blend_tf:
+ *
+ * Pre-defined transfer function for converting plane pixel data before
+ * applying blend LUT.
+ */
+ enum amdgpu_transfer_function blend_tf;
+};
+
+enum amdgpu_dm_cursor_mode {
+ DM_CURSOR_NATIVE_MODE = 0,
+ DM_CURSOR_OVERLAY_MODE,
};
struct dm_crtc_state {
@@ -626,22 +953,31 @@ struct dm_crtc_state {
bool cm_has_degamma;
bool cm_is_degamma_srgb;
+ bool mpo_requested;
+
int update_type;
int active_planes;
int crc_skip_count;
- bool freesync_timing_changed;
bool freesync_vrr_info_changed;
bool dsc_force_changed;
bool vrr_supported;
-
- bool force_dpms_off;
struct mod_freesync_config freesync_config;
struct dc_info_packet vrr_infopacket;
int abm_level;
+
+ /**
+ * @regamma_tf:
+ *
+ * Pre-defined transfer function for converting internal FB -> wire
+ * encoding.
+ */
+ enum amdgpu_transfer_function regamma_tf;
+
+ enum amdgpu_dm_cursor_mode cursor_mode;
};
#define to_dm_crtc_state(x) container_of(x, struct dm_crtc_state, base)
@@ -662,22 +998,13 @@ struct dm_connector_state {
uint8_t underscan_hborder;
bool underscan_enable;
bool freesync_capable;
-#ifdef CONFIG_DRM_AMD_DC_HDCP
bool update_hdcp;
-#endif
+ bool abm_sysfs_forbidden;
uint8_t abm_level;
int vcpi_slots;
uint64_t pbn;
};
-struct amdgpu_hdmi_vsdb_info {
- unsigned int amd_vsdb_version; /* VSDB version, should be used to determine which VSIF to send */
- bool freesync_supported; /* FreeSync Supported */
- unsigned int min_refresh_rate_hz; /* FreeSync Minimum Refresh Rate in Hz */
- unsigned int max_refresh_rate_hz; /* FreeSync Maximum Refresh Rate in Hz */
-};
-
-
#define to_dm_connector_state(x)\
container_of((x), struct dm_connector_state, base)
@@ -703,24 +1030,34 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
int link_index);
enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connector,
- struct drm_display_mode *mode);
+ const struct drm_display_mode *mode);
void dm_restore_drm_connector_state(struct drm_device *dev,
struct drm_connector *connector);
void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
- struct edid *edid);
+ const struct drm_edid *drm_edid);
void amdgpu_dm_trigger_timing_sync(struct drm_device *dev);
+/* 3D LUT max size is 17x17x17 (4913 entries) */
+#define MAX_COLOR_3DLUT_SIZE 17
+#define MAX_COLOR_3DLUT_BITDEPTH 12
+int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev,
+ struct drm_plane_state *plane_state);
+/* 1D LUT size */
#define MAX_COLOR_LUT_ENTRIES 4096
/* Legacy gamm LUT users such as X doesn't like large LUT sizes */
#define MAX_COLOR_LEGACY_LUT_ENTRIES 256
void amdgpu_dm_init_color_mod(void);
+int amdgpu_dm_create_color_properties(struct amdgpu_device *adev);
int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state);
int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc);
+int amdgpu_dm_check_crtc_color_mgmt(struct dm_crtc_state *crtc,
+ bool check_only);
int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
+ struct drm_plane_state *plane_state,
struct dc_plane_state *dc_plane_state);
void amdgpu_dm_update_connector_after_detect(
@@ -728,7 +1065,50 @@ void amdgpu_dm_update_connector_after_detect(
extern const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs;
-int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux,
- struct dc_context *ctx, unsigned int link_index,
- void *payload, void *operation_result);
+int amdgpu_dm_process_dmub_aux_transfer_sync(struct dc_context *ctx, unsigned int link_index,
+ struct aux_payload *payload, enum aux_return_code_type *operation_result);
+
+bool amdgpu_dm_execute_fused_io(
+ struct amdgpu_device *dev,
+ struct dc_link *link,
+ union dmub_rb_cmd *commands,
+ uint8_t count,
+ uint32_t timeout_us
+);
+
+int amdgpu_dm_process_dmub_set_config_sync(struct dc_context *ctx, unsigned int link_index,
+ struct set_config_cmd_payload *payload, enum set_config_status *operation_result);
+
+struct dc_stream_state *
+ create_validate_stream_for_sink(struct drm_connector *connector,
+ const struct drm_display_mode *drm_mode,
+ const struct dm_connector_state *dm_state,
+ const struct dc_stream_state *old_stream);
+
+int dm_atomic_get_state(struct drm_atomic_state *state,
+ struct dm_atomic_state **dm_state);
+
+struct drm_connector *
+amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state,
+ struct drm_crtc *crtc);
+
+int convert_dc_color_depth_into_bpc(enum dc_color_depth display_color_depth);
+struct idle_workqueue *idle_create_workqueue(struct amdgpu_device *adev);
+
+void *dm_allocate_gpu_mem(struct amdgpu_device *adev,
+ enum dc_gpu_mem_alloc_type type,
+ size_t size,
+ long long *addr);
+void dm_free_gpu_mem(struct amdgpu_device *adev,
+ enum dc_gpu_mem_alloc_type type,
+ void *addr);
+
+bool amdgpu_dm_is_headless(struct amdgpu_device *adev);
+
+void hdmi_cec_set_edid(struct amdgpu_dm_connector *aconnector);
+void hdmi_cec_unset_edid(struct amdgpu_dm_connector *aconnector);
+int amdgpu_dm_initialize_hdmi_connector(struct amdgpu_dm_connector *aconnector);
+
+void retrieve_dmi_info(struct amdgpu_display_manager *dm);
+
#endif /* __AMDGPU_DM_H__ */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index a022e5bb30a5..1dcc79b35225 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
@@ -25,11 +26,39 @@
#include "amdgpu.h"
#include "amdgpu_mode.h"
#include "amdgpu_dm.h"
+#include "amdgpu_dm_colorop.h"
#include "dc.h"
#include "modules/color/color_gamma.h"
-#include "basics/conversion.h"
-/*
+/**
+ * DOC: overview
+ *
+ * We have three types of color management in the AMD display driver.
+ * 1. the legacy &drm_crtc DEGAMMA, CTM, and GAMMA properties
+ * 2. AMD driver private color management on &drm_plane and &drm_crtc
+ * 3. AMD plane color pipeline
+ *
+ * The CRTC properties are the original color management. When they were
+ * implemented per-plane color management was not a thing yet. Because
+ * of that we could get away with plumbing the DEGAMMA and CTM
+ * properties to pre-blending HW functions. This is incompatible with
+ * per-plane color management, such as via the AMD private properties or
+ * the new drm_plane color pipeline. The only compatible CRTC property
+ * with per-plane color management is the GAMMA property as it is
+ * applied post-blending.
+ *
+ * The AMD driver private color management properties are only exposed
+ * when the kernel is built explicitly with -DAMD_PRIVATE_COLOR. They
+ * are temporary building blocks on the path to full-fledged &drm_plane
+ * and &drm_crtc color pipelines and lay the driver's groundwork for the
+ * color pipelines.
+ *
+ * The AMD plane color pipeline describes AMD's &drm_colorops via the
+ * &drm_plane's COLOR_PIPELINE property.
+ *
+ * drm_crtc Properties
+ * -------------------
+ *
* The DC interface to HW gives us the following color management blocks
* per pipe (surface):
*
@@ -40,39 +69,97 @@
* - Surface regamma LUT (normalized)
* - Output CSC (normalized)
*
- * But these aren't a direct mapping to DRM color properties. The current DRM
- * interface exposes CRTC degamma, CRTC CTM and CRTC regamma while our hardware
- * is essentially giving:
+ * But these aren't a direct mapping to DRM color properties. The
+ * current DRM interface exposes CRTC degamma, CRTC CTM and CRTC regamma
+ * while our hardware is essentially giving:
*
* Plane CTM -> Plane degamma -> Plane CTM -> Plane regamma -> Plane CTM
*
- * The input gamma LUT block isn't really applicable here since it operates
- * on the actual input data itself rather than the HW fp representation. The
- * input and output CSC blocks are technically available to use as part of
- * the DC interface but are typically used internally by DC for conversions
- * between color spaces. These could be blended together with user
- * adjustments in the future but for now these should remain untouched.
+ * The input gamma LUT block isn't really applicable here since it
+ * operates on the actual input data itself rather than the HW fp
+ * representation. The input and output CSC blocks are technically
+ * available to use as part of the DC interface but are typically used
+ * internally by DC for conversions between color spaces. These could be
+ * blended together with user adjustments in the future but for now
+ * these should remain untouched.
+ *
+ * The pipe blending also happens after these blocks so we don't
+ * actually support any CRTC props with correct blending with multiple
+ * planes - but we can still support CRTC color management properties in
+ * DM in most single plane cases correctly with clever management of the
+ * DC interface in DM.
+ *
+ * As per DRM documentation, blocks should be in hardware bypass when
+ * their respective property is set to NULL. A linear DGM/RGM LUT should
+ * also considered as putting the respective block into bypass mode.
+ *
+ * This means that the following configuration is assumed to be the
+ * default:
+ *
+ * Plane DGM Bypass -> Plane CTM Bypass -> Plane RGM Bypass -> ... CRTC
+ * DGM Bypass -> CRTC CTM Bypass -> CRTC RGM Bypass
+ *
+ * AMD Private Color Management on drm_plane
+ * -----------------------------------------
+ *
+ * The AMD private color management properties on a &drm_plane are:
+ *
+ * - AMD_PLANE_DEGAMMA_LUT
+ * - AMD_PLANE_DEGAMMA_LUT_SIZE
+ * - AMD_PLANE_DEGAMMA_TF
+ * - AMD_PLANE_HDR_MULT
+ * - AMD_PLANE_CTM
+ * - AMD_PLANE_SHAPER_LUT
+ * - AMD_PLANE_SHAPER_LUT_SIZE
+ * - AMD_PLANE_SHAPER_TF
+ * - AMD_PLANE_LUT3D
+ * - AMD_PLANE_LUT3D_SIZE
+ * - AMD_PLANE_BLEND_LUT
+ * - AMD_PLANE_BLEND_LUT_SIZE
+ * - AMD_PLANE_BLEND_TF
+ *
+ * The AMD private color management property on a &drm_crtc is:
+ *
+ * - AMD_CRTC_REGAMMA_TF
+ *
+ * Use of these properties is discouraged.
*
- * The pipe blending also happens after these blocks so we don't actually
- * support any CRTC props with correct blending with multiple planes - but we
- * can still support CRTC color management properties in DM in most single
- * plane cases correctly with clever management of the DC interface in DM.
+ * AMD plane color pipeline
+ * ------------------------
*
- * As per DRM documentation, blocks should be in hardware bypass when their
- * respective property is set to NULL. A linear DGM/RGM LUT should also
- * considered as putting the respective block into bypass mode.
+ * The AMD &drm_plane color pipeline is advertised for DCN generations
+ * 3.0 and newer. It exposes these elements in this order:
*
- * This means that the following
- * configuration is assumed to be the default:
+ * 1. 1D curve colorop
+ * 2. Multiplier
+ * 3. 3x4 CTM
+ * 4. 1D curve colorop
+ * 5. 1D LUT
+ * 6. 3D LUT
+ * 7. 1D curve colorop
+ * 8. 1D LUT
+ *
+ * The multiplier (#2) is a simple multiplier that is applied to all
+ * channels.
+ *
+ * The 3x4 CTM (#3) is a simple 3x4 matrix.
+ *
+ * #1, and #7 are non-linear to linear curves. #4 is a linear to
+ * non-linear curve. They support sRGB, PQ, and BT.709/BT.2020 EOTFs or
+ * their inverse.
+ *
+ * The 1D LUTs (#5 and #8) are plain 4096 entry LUTs.
+ *
+ * The 3DLUT (#6) is a tetrahedrally interpolated 17 cube LUT.
*
- * Plane DGM Bypass -> Plane CTM Bypass -> Plane RGM Bypass -> ...
- * CRTC DGM Bypass -> CRTC CTM Bypass -> CRTC RGM Bypass
*/
#define MAX_DRM_LUT_VALUE 0xFFFF
+#define MAX_DRM_LUT32_VALUE 0xFFFFFFFF
+#define SDR_WHITE_LEVEL_INIT_VALUE 80
-/*
- * Initialize the color module.
+/**
+ * amdgpu_dm_init_color_mod - Initialize the color module.
*
* We're not using the full color module, only certain components.
* Only call setup functions for components that we need.
@@ -82,7 +169,255 @@ void amdgpu_dm_init_color_mod(void)
setup_x_points_distribution();
}
-/* Extracts the DRM lut and lut size from a blob. */
+static inline struct fixed31_32 amdgpu_dm_fixpt_from_s3132(__u64 x)
+{
+ struct fixed31_32 val;
+
+ /* If negative, convert to 2's complement. */
+ if (x & (1ULL << 63))
+ x = -(x & ~(1ULL << 63));
+
+ val.value = x;
+ return val;
+}
+
+#ifdef AMD_PRIVATE_COLOR
+/* Pre-defined Transfer Functions (TF)
+ *
+ * AMD driver supports pre-defined mathematical functions for transferring
+ * between encoded values and optical/linear space. Depending on HW color caps,
+ * ROMs and curves built by the AMD color module support these transforms.
+ *
+ * The driver-specific color implementation exposes properties for pre-blending
+ * degamma TF, shaper TF (before 3D LUT), and blend(dpp.ogam) TF and
+ * post-blending regamma (mpc.ogam) TF. However, only pre-blending degamma
+ * supports ROM curves. AMD color module uses pre-defined coefficients to build
+ * curves for the other blocks. What can be done by each color block is
+ * described by struct dpp_color_capsand struct mpc_color_caps.
+ *
+ * AMD driver-specific color API exposes the following pre-defined transfer
+ * functions:
+ *
+ * - Identity: linear/identity relationship between pixel value and
+ * luminance value;
+ * - Gamma 2.2, Gamma 2.4, Gamma 2.6: pure power functions;
+ * - sRGB: 2.4: The piece-wise transfer function from IEC 61966-2-1:1999;
+ * - BT.709: has a linear segment in the bottom part and then a power function
+ * with a 0.45 (~1/2.22) gamma for the rest of the range; standardized by
+ * ITU-R BT.709-6;
+ * - PQ (Perceptual Quantizer): used for HDR display, allows luminance range
+ * capability of 0 to 10,000 nits; standardized by SMPTE ST 2084.
+ *
+ * The AMD color model is designed with an assumption that SDR (sRGB, BT.709,
+ * Gamma 2.2, etc.) peak white maps (normalized to 1.0 FP) to 80 nits in the PQ
+ * system. This has the implication that PQ EOTF (non-linear to linear) maps to
+ * [0.0..125.0] where 125.0 = 10,000 nits / 80 nits.
+ *
+ * Non-linear and linear forms are described in the table below:
+ *
+ * ┌───────────┬─────────────────────┬──────────────────────┐
+ * │ │ Non-linear │ Linear │
+ * ├───────────┼─────────────────────┼──────────────────────┤
+ * │ sRGB │ UNORM or [0.0, 1.0] │ [0.0, 1.0] │
+ * ├───────────┼─────────────────────┼──────────────────────┤
+ * │ BT709 │ UNORM or [0.0, 1.0] │ [0.0, 1.0] │
+ * ├───────────┼─────────────────────┼──────────────────────┤
+ * │ Gamma 2.x │ UNORM or [0.0, 1.0] │ [0.0, 1.0] │
+ * ├───────────┼─────────────────────┼──────────────────────┤
+ * │ PQ │ UNORM or FP16 CCCS* │ [0.0, 125.0] │
+ * ├───────────┼─────────────────────┼──────────────────────┤
+ * │ Identity │ UNORM or FP16 CCCS* │ [0.0, 1.0] or CCCS** │
+ * └───────────┴─────────────────────┴──────────────────────┘
+ * * CCCS: Windows canonical composition color space
+ * ** Respectively
+ *
+ * In the driver-specific API, color block names attached to TF properties
+ * suggest the intention regarding non-linear encoding pixel's luminance
+ * values. As some newer encodings don't use gamma curve, we make encoding and
+ * decoding explicit by defining an enum list of transfer functions supported
+ * in terms of EOTF and inverse EOTF, where:
+ *
+ * - EOTF (electro-optical transfer function): is the transfer function to go
+ * from the encoded value to an optical (linear) value. De-gamma functions
+ * traditionally do this.
+ * - Inverse EOTF (simply the inverse of the EOTF): is usually intended to go
+ * from an optical/linear space (which might have been used for blending)
+ * back to the encoded values. Gamma functions traditionally do this.
+ */
+static const char * const
+amdgpu_transfer_function_names[] = {
+ [AMDGPU_TRANSFER_FUNCTION_DEFAULT] = "Default",
+ [AMDGPU_TRANSFER_FUNCTION_IDENTITY] = "Identity",
+ [AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF] = "sRGB EOTF",
+ [AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF] = "BT.709 inv_OETF",
+ [AMDGPU_TRANSFER_FUNCTION_PQ_EOTF] = "PQ EOTF",
+ [AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF] = "Gamma 2.2 EOTF",
+ [AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF] = "Gamma 2.4 EOTF",
+ [AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF] = "Gamma 2.6 EOTF",
+ [AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF] = "sRGB inv_EOTF",
+ [AMDGPU_TRANSFER_FUNCTION_BT709_OETF] = "BT.709 OETF",
+ [AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF] = "PQ inv_EOTF",
+ [AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF] = "Gamma 2.2 inv_EOTF",
+ [AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF] = "Gamma 2.4 inv_EOTF",
+ [AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF] = "Gamma 2.6 inv_EOTF",
+};
+
+static const u32 amdgpu_eotf =
+ BIT(AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF) |
+ BIT(AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF) |
+ BIT(AMDGPU_TRANSFER_FUNCTION_PQ_EOTF) |
+ BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF) |
+ BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF) |
+ BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF);
+
+static const u32 amdgpu_inv_eotf =
+ BIT(AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF) |
+ BIT(AMDGPU_TRANSFER_FUNCTION_BT709_OETF) |
+ BIT(AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF) |
+ BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF) |
+ BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF) |
+ BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF);
+
+static struct drm_property *
+amdgpu_create_tf_property(struct drm_device *dev,
+ const char *name,
+ u32 supported_tf)
+{
+ u32 transfer_functions = supported_tf |
+ BIT(AMDGPU_TRANSFER_FUNCTION_DEFAULT) |
+ BIT(AMDGPU_TRANSFER_FUNCTION_IDENTITY);
+ struct drm_prop_enum_list enum_list[AMDGPU_TRANSFER_FUNCTION_COUNT];
+ int i, len;
+
+ len = 0;
+ for (i = 0; i < AMDGPU_TRANSFER_FUNCTION_COUNT; i++) {
+ if ((transfer_functions & BIT(i)) == 0)
+ continue;
+
+ enum_list[len].type = i;
+ enum_list[len].name = amdgpu_transfer_function_names[i];
+ len++;
+ }
+
+ return drm_property_create_enum(dev, DRM_MODE_PROP_ENUM,
+ name, enum_list, len);
+}
+
+int
+amdgpu_dm_create_color_properties(struct amdgpu_device *adev)
+{
+ struct drm_property *prop;
+
+ prop = drm_property_create(adev_to_drm(adev),
+ DRM_MODE_PROP_BLOB,
+ "AMD_PLANE_DEGAMMA_LUT", 0);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.plane_degamma_lut_property = prop;
+
+ prop = drm_property_create_range(adev_to_drm(adev),
+ DRM_MODE_PROP_IMMUTABLE,
+ "AMD_PLANE_DEGAMMA_LUT_SIZE",
+ 0, UINT_MAX);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.plane_degamma_lut_size_property = prop;
+
+ prop = amdgpu_create_tf_property(adev_to_drm(adev),
+ "AMD_PLANE_DEGAMMA_TF",
+ amdgpu_eotf);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.plane_degamma_tf_property = prop;
+
+ prop = drm_property_create_range(adev_to_drm(adev),
+ 0, "AMD_PLANE_HDR_MULT", 0, U64_MAX);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.plane_hdr_mult_property = prop;
+
+ prop = drm_property_create(adev_to_drm(adev),
+ DRM_MODE_PROP_BLOB,
+ "AMD_PLANE_CTM", 0);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.plane_ctm_property = prop;
+
+ prop = drm_property_create(adev_to_drm(adev),
+ DRM_MODE_PROP_BLOB,
+ "AMD_PLANE_SHAPER_LUT", 0);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.plane_shaper_lut_property = prop;
+
+ prop = drm_property_create_range(adev_to_drm(adev),
+ DRM_MODE_PROP_IMMUTABLE,
+ "AMD_PLANE_SHAPER_LUT_SIZE", 0, UINT_MAX);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.plane_shaper_lut_size_property = prop;
+
+ prop = amdgpu_create_tf_property(adev_to_drm(adev),
+ "AMD_PLANE_SHAPER_TF",
+ amdgpu_inv_eotf);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.plane_shaper_tf_property = prop;
+
+ prop = drm_property_create(adev_to_drm(adev),
+ DRM_MODE_PROP_BLOB,
+ "AMD_PLANE_LUT3D", 0);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.plane_lut3d_property = prop;
+
+ prop = drm_property_create_range(adev_to_drm(adev),
+ DRM_MODE_PROP_IMMUTABLE,
+ "AMD_PLANE_LUT3D_SIZE", 0, UINT_MAX);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.plane_lut3d_size_property = prop;
+
+ prop = drm_property_create(adev_to_drm(adev),
+ DRM_MODE_PROP_BLOB,
+ "AMD_PLANE_BLEND_LUT", 0);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.plane_blend_lut_property = prop;
+
+ prop = drm_property_create_range(adev_to_drm(adev),
+ DRM_MODE_PROP_IMMUTABLE,
+ "AMD_PLANE_BLEND_LUT_SIZE", 0, UINT_MAX);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.plane_blend_lut_size_property = prop;
+
+ prop = amdgpu_create_tf_property(adev_to_drm(adev),
+ "AMD_PLANE_BLEND_TF",
+ amdgpu_eotf);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.plane_blend_tf_property = prop;
+
+ prop = amdgpu_create_tf_property(adev_to_drm(adev),
+ "AMD_CRTC_REGAMMA_TF",
+ amdgpu_inv_eotf);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.regamma_tf_property = prop;
+
+ return 0;
+}
+#endif
+
+/**
+ * __extract_blob_lut - Extracts the DRM lut and lut size from a blob.
+ * @blob: DRM color mgmt property blob
+ * @size: lut size
+ *
+ * Returns:
+ * DRM LUT or NULL
+ */
static const struct drm_color_lut *
__extract_blob_lut(const struct drm_property_blob *blob, uint32_t *size)
{
@@ -90,13 +425,33 @@ __extract_blob_lut(const struct drm_property_blob *blob, uint32_t *size)
return blob ? (struct drm_color_lut *)blob->data : NULL;
}
-/*
- * Return true if the given lut is a linear mapping of values, i.e. it acts
- * like a bypass LUT.
+/**
+ * __extract_blob_lut32 - Extracts the DRM lut and lut size from a blob.
+ * @blob: DRM color mgmt property blob
+ * @size: lut size
+ *
+ * Returns:
+ * DRM LUT or NULL
+ */
+static const struct drm_color_lut32 *
+__extract_blob_lut32(const struct drm_property_blob *blob, uint32_t *size)
+{
+ *size = blob ? drm_color_lut32_size(blob) : 0;
+ return blob ? (struct drm_color_lut32 *)blob->data : NULL;
+}
+
+/**
+ * __is_lut_linear - check if the given lut is a linear mapping of values
+ * @lut: given lut to check values
+ * @size: lut size
*
* It is considered linear if the lut represents:
- * f(a) = (0xFF00/MAX_COLOR_LUT_ENTRIES-1)a; for integer a in
- * [0, MAX_COLOR_LUT_ENTRIES)
+ * f(a) = (0xFF00/MAX_COLOR_LUT_ENTRIES-1)a; for integer a in [0,
+ * MAX_COLOR_LUT_ENTRIES)
+ *
+ * Returns:
+ * True if the given lut is a linear mapping of values, i.e. it acts like a
+ * bypass LUT. Otherwise, false.
*/
static bool __is_lut_linear(const struct drm_color_lut *lut, uint32_t size)
{
@@ -119,9 +474,13 @@ static bool __is_lut_linear(const struct drm_color_lut *lut, uint32_t size)
return true;
}
-/*
- * Convert the drm_color_lut to dc_gamma. The conversion depends on the size
- * of the lut - whether or not it's legacy.
+/**
+ * __drm_lut_to_dc_gamma - convert the drm_color_lut to dc_gamma.
+ * @lut: DRM lookup table for color conversion
+ * @gamma: DC gamma to set entries
+ * @is_legacy: legacy or atomic gamma
+ *
+ * The conversion depends on the size of the lut - whether or not it's legacy.
*/
static void __drm_lut_to_dc_gamma(const struct drm_color_lut *lut,
struct dc_gamma *gamma, bool is_legacy)
@@ -154,14 +513,34 @@ static void __drm_lut_to_dc_gamma(const struct drm_color_lut *lut,
}
}
-/*
- * Converts a DRM CTM to a DC CSC float matrix.
+/**
+ * __drm_lut32_to_dc_gamma - convert the drm_color_lut to dc_gamma.
+ * @lut: DRM lookup table for color conversion
+ * @gamma: DC gamma to set entries
+ *
+ * The conversion depends on the size of the lut - whether or not it's legacy.
+ */
+static void __drm_lut32_to_dc_gamma(const struct drm_color_lut32 *lut, struct dc_gamma *gamma)
+{
+ int i;
+
+ for (i = 0; i < MAX_COLOR_LUT_ENTRIES; i++) {
+ gamma->entries.red[i] = dc_fixpt_from_fraction(lut[i].red, MAX_DRM_LUT32_VALUE);
+ gamma->entries.green[i] = dc_fixpt_from_fraction(lut[i].green, MAX_DRM_LUT32_VALUE);
+ gamma->entries.blue[i] = dc_fixpt_from_fraction(lut[i].blue, MAX_DRM_LUT32_VALUE);
+ }
+}
+
+/**
+ * __drm_ctm_to_dc_matrix - converts a DRM CTM to a DC CSC float matrix
+ * @ctm: DRM color transformation matrix
+ * @matrix: DC CSC float matrix
+ *
* The matrix needs to be a 3x4 (12 entry) matrix.
*/
static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm,
struct fixed31_32 *matrix)
{
- int64_t val;
int i;
/*
@@ -180,16 +559,44 @@ static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm,
}
/* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */
- val = ctm->matrix[i - (i / 4)];
- /* If negative, convert to 2's complement. */
- if (val & (1ULL << 63))
- val = -(val & ~(1ULL << 63));
+ matrix[i] = amdgpu_dm_fixpt_from_s3132(ctm->matrix[i - (i / 4)]);
+ }
+}
+
+/**
+ * __drm_ctm_3x4_to_dc_matrix - converts a DRM CTM 3x4 to a DC CSC float matrix
+ * @ctm: DRM color transformation matrix with 3x4 dimensions
+ * @matrix: DC CSC float matrix
+ *
+ * The matrix needs to be a 3x4 (12 entry) matrix.
+ */
+static void __drm_ctm_3x4_to_dc_matrix(const struct drm_color_ctm_3x4 *ctm,
+ struct fixed31_32 *matrix)
+{
+ int i;
- matrix[i].value = val;
+ /* The format provided is S31.32, using signed-magnitude representation.
+ * Our fixed31_32 is also S31.32, but is using 2's complement. We have
+ * to convert from signed-magnitude to 2's complement.
+ */
+ for (i = 0; i < 12; i++) {
+ /* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */
+ matrix[i] = amdgpu_dm_fixpt_from_s3132(ctm->matrix[i]);
}
}
-/* Calculates the legacy transfer function - only for sRGB input space. */
+/**
+ * __set_legacy_tf - Calculates the legacy transfer function
+ * @func: transfer function
+ * @lut: lookup table that defines the color space
+ * @lut_size: size of respective lut
+ * @has_rom: if ROM can be used for hardcoded curve
+ *
+ * Only for sRGB input space
+ *
+ * Returns:
+ * 0 in case of success, -ENOMEM if fails
+ */
static int __set_legacy_tf(struct dc_transfer_func *func,
const struct drm_color_lut *lut, uint32_t lut_size,
bool has_rom)
@@ -218,7 +625,16 @@ static int __set_legacy_tf(struct dc_transfer_func *func,
return res ? 0 : -ENOMEM;
}
-/* Calculates the output transfer function based on expected input space. */
+/**
+ * __set_output_tf - calculates the output transfer function based on expected input space.
+ * @func: transfer function
+ * @lut: lookup table that defines the color space
+ * @lut_size: size of respective lut
+ * @has_rom: if ROM can be used for hardcoded curve
+ *
+ * Returns:
+ * 0 in case of success. -ENOMEM if fails.
+ */
static int __set_output_tf(struct dc_transfer_func *func,
const struct drm_color_lut *lut, uint32_t lut_size,
bool has_rom)
@@ -227,16 +643,74 @@ static int __set_output_tf(struct dc_transfer_func *func,
struct calculate_buffer cal_buffer = {0};
bool res;
- ASSERT(lut && lut_size == MAX_COLOR_LUT_ENTRIES);
+ cal_buffer.buffer_index = -1;
+
+ if (lut_size) {
+ ASSERT(lut && lut_size == MAX_COLOR_LUT_ENTRIES);
+
+ gamma = dc_create_gamma();
+ if (!gamma)
+ return -ENOMEM;
+
+ gamma->num_entries = lut_size;
+ __drm_lut_to_dc_gamma(lut, gamma, false);
+ }
+
+ if (func->tf == TRANSFER_FUNCTION_LINEAR) {
+ /*
+ * Color module doesn't like calculating regamma params
+ * on top of a linear input. But degamma params can be used
+ * instead to simulate this.
+ */
+ if (gamma)
+ gamma->type = GAMMA_CUSTOM;
+ res = mod_color_calculate_degamma_params(NULL, func,
+ gamma, gamma != NULL);
+ } else {
+ /*
+ * Assume sRGB. The actual mapping will depend on whether the
+ * input was legacy or not.
+ */
+ if (gamma)
+ gamma->type = GAMMA_CS_TFM_1D;
+ res = mod_color_calculate_regamma_params(func, gamma, gamma != NULL,
+ has_rom, NULL, &cal_buffer);
+ }
+
+ if (gamma)
+ dc_gamma_release(&gamma);
+
+ return res ? 0 : -ENOMEM;
+}
+
+/**
+ * __set_output_tf_32 - calculates the output transfer function based on expected input space.
+ * @func: transfer function
+ * @lut: lookup table that defines the color space
+ * @lut_size: size of respective lut
+ * @has_rom: if ROM can be used for hardcoded curve
+ *
+ * Returns:
+ * 0 in case of success. -ENOMEM if fails.
+ */
+static int __set_output_tf_32(struct dc_transfer_func *func,
+ const struct drm_color_lut32 *lut, uint32_t lut_size,
+ bool has_rom)
+{
+ struct dc_gamma *gamma = NULL;
+ struct calculate_buffer cal_buffer = {0};
+ bool res;
cal_buffer.buffer_index = -1;
- gamma = dc_create_gamma();
- if (!gamma)
- return -ENOMEM;
+ if (lut_size) {
+ gamma = dc_create_gamma();
+ if (!gamma)
+ return -ENOMEM;
- gamma->num_entries = lut_size;
- __drm_lut_to_dc_gamma(lut, gamma, false);
+ gamma->num_entries = lut_size;
+ __drm_lut32_to_dc_gamma(lut, gamma);
+ }
if (func->tf == TRANSFER_FUNCTION_LINEAR) {
/*
@@ -244,50 +718,428 @@ static int __set_output_tf(struct dc_transfer_func *func,
* on top of a linear input. But degamma params can be used
* instead to simulate this.
*/
- gamma->type = GAMMA_CUSTOM;
+ if (gamma)
+ gamma->type = GAMMA_CUSTOM;
res = mod_color_calculate_degamma_params(NULL, func,
- gamma, true);
+ gamma, gamma != NULL);
} else {
/*
* Assume sRGB. The actual mapping will depend on whether the
* input was legacy or not.
*/
- gamma->type = GAMMA_CS_TFM_1D;
- res = mod_color_calculate_regamma_params(func, gamma, false,
+ if (gamma)
+ gamma->type = GAMMA_CS_TFM_1D;
+ res = mod_color_calculate_regamma_params(func, gamma, gamma != NULL,
has_rom, NULL, &cal_buffer);
}
- dc_gamma_release(&gamma);
+ if (gamma)
+ dc_gamma_release(&gamma);
return res ? 0 : -ENOMEM;
}
-/* Caculates the input transfer function based on expected input space. */
-static int __set_input_tf(struct dc_transfer_func *func,
+
+static int amdgpu_dm_set_atomic_regamma(struct dc_transfer_func *out_tf,
+ const struct drm_color_lut *regamma_lut,
+ uint32_t regamma_size, bool has_rom,
+ enum dc_transfer_func_predefined tf)
+{
+ int ret = 0;
+
+ if (regamma_size || tf != TRANSFER_FUNCTION_LINEAR) {
+ /*
+ * CRTC RGM goes into RGM LUT.
+ *
+ * Note: there is no implicit sRGB regamma here. We are using
+ * degamma calculation from color module to calculate the curve
+ * from a linear base if gamma TF is not set. However, if gamma
+ * TF (!= Linear) and LUT are set at the same time, we will use
+ * regamma calculation, and the color module will combine the
+ * pre-defined TF and the custom LUT values into the LUT that's
+ * actually programmed.
+ */
+ out_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
+ out_tf->tf = tf;
+ out_tf->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
+
+ ret = __set_output_tf(out_tf, regamma_lut, regamma_size, has_rom);
+ } else {
+ /*
+ * No CRTC RGM means we can just put the block into bypass
+ * since we don't have any plane level adjustments using it.
+ */
+ out_tf->type = TF_TYPE_BYPASS;
+ out_tf->tf = TRANSFER_FUNCTION_LINEAR;
+ }
+
+ return ret;
+}
+
+/**
+ * __set_input_tf - calculates the input transfer function based on expected
+ * input space.
+ * @caps: dc color capabilities
+ * @func: transfer function
+ * @lut: lookup table that defines the color space
+ * @lut_size: size of respective lut.
+ *
+ * Returns:
+ * 0 in case of success. -ENOMEM if fails.
+ */
+static int __set_input_tf(struct dc_color_caps *caps, struct dc_transfer_func *func,
const struct drm_color_lut *lut, uint32_t lut_size)
{
struct dc_gamma *gamma = NULL;
bool res;
- gamma = dc_create_gamma();
- if (!gamma)
- return -ENOMEM;
+ if (lut_size) {
+ gamma = dc_create_gamma();
+ if (!gamma)
+ return -ENOMEM;
- gamma->type = GAMMA_CUSTOM;
- gamma->num_entries = lut_size;
+ gamma->type = GAMMA_CUSTOM;
+ gamma->num_entries = lut_size;
- __drm_lut_to_dc_gamma(lut, gamma, false);
+ __drm_lut_to_dc_gamma(lut, gamma, false);
+ }
- res = mod_color_calculate_degamma_params(NULL, func, gamma, true);
- dc_gamma_release(&gamma);
+ res = mod_color_calculate_degamma_params(caps, func, gamma, gamma != NULL);
+
+ if (gamma)
+ dc_gamma_release(&gamma);
+
+ return res ? 0 : -ENOMEM;
+}
+
+/**
+ * __set_input_tf_32 - calculates the input transfer function based on expected
+ * input space.
+ * @caps: dc color capabilities
+ * @func: transfer function
+ * @lut: lookup table that defines the color space
+ * @lut_size: size of respective lut.
+ *
+ * Returns:
+ * 0 in case of success. -ENOMEM if fails.
+ */
+static int __set_input_tf_32(struct dc_color_caps *caps, struct dc_transfer_func *func,
+ const struct drm_color_lut32 *lut, uint32_t lut_size)
+{
+ struct dc_gamma *gamma = NULL;
+ bool res;
+
+ if (lut_size) {
+ gamma = dc_create_gamma();
+ if (!gamma)
+ return -ENOMEM;
+
+ gamma->type = GAMMA_CUSTOM;
+ gamma->num_entries = lut_size;
+
+ __drm_lut32_to_dc_gamma(lut, gamma);
+ }
+
+ res = mod_color_calculate_degamma_params(caps, func, gamma, gamma != NULL);
+
+ if (gamma)
+ dc_gamma_release(&gamma);
return res ? 0 : -ENOMEM;
}
+static enum dc_transfer_func_predefined
+amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf)
+{
+ switch (tf) {
+ default:
+ case AMDGPU_TRANSFER_FUNCTION_DEFAULT:
+ case AMDGPU_TRANSFER_FUNCTION_IDENTITY:
+ return TRANSFER_FUNCTION_LINEAR;
+ case AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF:
+ case AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF:
+ return TRANSFER_FUNCTION_SRGB;
+ case AMDGPU_TRANSFER_FUNCTION_BT709_OETF:
+ case AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF:
+ return TRANSFER_FUNCTION_BT709;
+ case AMDGPU_TRANSFER_FUNCTION_PQ_EOTF:
+ case AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF:
+ return TRANSFER_FUNCTION_PQ;
+ case AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF:
+ case AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF:
+ return TRANSFER_FUNCTION_GAMMA22;
+ case AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF:
+ case AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF:
+ return TRANSFER_FUNCTION_GAMMA24;
+ case AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF:
+ case AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF:
+ return TRANSFER_FUNCTION_GAMMA26;
+ }
+}
+
+static enum dc_transfer_func_predefined
+amdgpu_colorop_tf_to_dc_tf(enum drm_colorop_curve_1d_type tf)
+{
+ switch (tf) {
+ case DRM_COLOROP_1D_CURVE_SRGB_EOTF:
+ case DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF:
+ return TRANSFER_FUNCTION_SRGB;
+ case DRM_COLOROP_1D_CURVE_PQ_125_EOTF:
+ case DRM_COLOROP_1D_CURVE_PQ_125_INV_EOTF:
+ return TRANSFER_FUNCTION_PQ;
+ case DRM_COLOROP_1D_CURVE_BT2020_INV_OETF:
+ case DRM_COLOROP_1D_CURVE_BT2020_OETF:
+ return TRANSFER_FUNCTION_BT709;
+ case DRM_COLOROP_1D_CURVE_GAMMA22:
+ case DRM_COLOROP_1D_CURVE_GAMMA22_INV:
+ return TRANSFER_FUNCTION_GAMMA22;
+ default:
+ return TRANSFER_FUNCTION_LINEAR;
+ }
+}
+
+static void __to_dc_lut3d_color(struct dc_rgb *rgb,
+ const struct drm_color_lut lut,
+ int bit_precision)
+{
+ rgb->red = drm_color_lut_extract(lut.red, bit_precision);
+ rgb->green = drm_color_lut_extract(lut.green, bit_precision);
+ rgb->blue = drm_color_lut_extract(lut.blue, bit_precision);
+}
+
+static void __drm_3dlut_to_dc_3dlut(const struct drm_color_lut *lut,
+ uint32_t lut3d_size,
+ struct tetrahedral_params *params,
+ bool use_tetrahedral_9,
+ int bit_depth)
+{
+ struct dc_rgb *lut0;
+ struct dc_rgb *lut1;
+ struct dc_rgb *lut2;
+ struct dc_rgb *lut3;
+ int lut_i, i;
+
+
+ if (use_tetrahedral_9) {
+ lut0 = params->tetrahedral_9.lut0;
+ lut1 = params->tetrahedral_9.lut1;
+ lut2 = params->tetrahedral_9.lut2;
+ lut3 = params->tetrahedral_9.lut3;
+ } else {
+ lut0 = params->tetrahedral_17.lut0;
+ lut1 = params->tetrahedral_17.lut1;
+ lut2 = params->tetrahedral_17.lut2;
+ lut3 = params->tetrahedral_17.lut3;
+ }
+
+ for (lut_i = 0, i = 0; i < lut3d_size - 4; lut_i++, i += 4) {
+ /*
+ * We should consider the 3D LUT RGB values are distributed
+ * along four arrays lut0-3 where the first sizes 1229 and the
+ * other 1228. The bit depth supported for 3dlut channel is
+ * 12-bit, but DC also supports 10-bit.
+ *
+ * TODO: improve color pipeline API to enable the userspace set
+ * bit depth and 3D LUT size/stride, as specified by VA-API.
+ */
+ __to_dc_lut3d_color(&lut0[lut_i], lut[i], bit_depth);
+ __to_dc_lut3d_color(&lut1[lut_i], lut[i + 1], bit_depth);
+ __to_dc_lut3d_color(&lut2[lut_i], lut[i + 2], bit_depth);
+ __to_dc_lut3d_color(&lut3[lut_i], lut[i + 3], bit_depth);
+ }
+ /* lut0 has 1229 points (lut_size/4 + 1) */
+ __to_dc_lut3d_color(&lut0[lut_i], lut[i], bit_depth);
+}
+
+static void __to_dc_lut3d_32_color(struct dc_rgb *rgb,
+ const struct drm_color_lut32 lut,
+ int bit_precision)
+{
+ rgb->red = drm_color_lut32_extract(lut.red, bit_precision);
+ rgb->green = drm_color_lut32_extract(lut.green, bit_precision);
+ rgb->blue = drm_color_lut32_extract(lut.blue, bit_precision);
+}
+
+static void __drm_3dlut32_to_dc_3dlut(const struct drm_color_lut32 *lut,
+ uint32_t lut3d_size,
+ struct tetrahedral_params *params,
+ bool use_tetrahedral_9,
+ int bit_depth)
+{
+ struct dc_rgb *lut0;
+ struct dc_rgb *lut1;
+ struct dc_rgb *lut2;
+ struct dc_rgb *lut3;
+ int lut_i, i;
+
+
+ if (use_tetrahedral_9) {
+ lut0 = params->tetrahedral_9.lut0;
+ lut1 = params->tetrahedral_9.lut1;
+ lut2 = params->tetrahedral_9.lut2;
+ lut3 = params->tetrahedral_9.lut3;
+ } else {
+ lut0 = params->tetrahedral_17.lut0;
+ lut1 = params->tetrahedral_17.lut1;
+ lut2 = params->tetrahedral_17.lut2;
+ lut3 = params->tetrahedral_17.lut3;
+ }
+
+ for (lut_i = 0, i = 0; i < lut3d_size - 4; lut_i++, i += 4) {
+ /*
+ * We should consider the 3D LUT RGB values are distributed
+ * along four arrays lut0-3 where the first sizes 1229 and the
+ * other 1228. The bit depth supported for 3dlut channel is
+ * 12-bit, but DC also supports 10-bit.
+ *
+ * TODO: improve color pipeline API to enable the userspace set
+ * bit depth and 3D LUT size/stride, as specified by VA-API.
+ */
+ __to_dc_lut3d_32_color(&lut0[lut_i], lut[i], bit_depth);
+ __to_dc_lut3d_32_color(&lut1[lut_i], lut[i + 1], bit_depth);
+ __to_dc_lut3d_32_color(&lut2[lut_i], lut[i + 2], bit_depth);
+ __to_dc_lut3d_32_color(&lut3[lut_i], lut[i + 3], bit_depth);
+ }
+ /* lut0 has 1229 points (lut_size/4 + 1) */
+ __to_dc_lut3d_32_color(&lut0[lut_i], lut[i], bit_depth);
+}
+
+/* amdgpu_dm_atomic_lut3d - set DRM 3D LUT to DC stream
+ * @drm_lut3d: user 3D LUT
+ * @drm_lut3d_size: size of 3D LUT
+ * @lut3d: DC 3D LUT
+ *
+ * Map user 3D LUT data to DC 3D LUT and all necessary bits to program it
+ * on DCN accordingly.
+ */
+static void amdgpu_dm_atomic_lut3d(const struct drm_color_lut *drm_lut3d,
+ uint32_t drm_lut3d_size,
+ struct dc_3dlut *lut)
+{
+ if (!drm_lut3d_size) {
+ lut->state.bits.initialized = 0;
+ } else {
+ /* Stride and bit depth are not programmable by API yet.
+ * Therefore, only supports 17x17x17 3D LUT (12-bit).
+ */
+ lut->lut_3d.use_tetrahedral_9 = false;
+ lut->lut_3d.use_12bits = true;
+ lut->state.bits.initialized = 1;
+ __drm_3dlut_to_dc_3dlut(drm_lut3d, drm_lut3d_size, &lut->lut_3d,
+ lut->lut_3d.use_tetrahedral_9,
+ MAX_COLOR_3DLUT_BITDEPTH);
+ }
+}
+
+static int amdgpu_dm_atomic_shaper_lut(const struct drm_color_lut *shaper_lut,
+ bool has_rom,
+ enum dc_transfer_func_predefined tf,
+ uint32_t shaper_size,
+ struct dc_transfer_func *func_shaper)
+{
+ int ret = 0;
+
+ if (shaper_size || tf != TRANSFER_FUNCTION_LINEAR) {
+ /*
+ * If user shaper LUT is set, we assume a linear color space
+ * (linearized by degamma 1D LUT or not).
+ */
+ func_shaper->type = TF_TYPE_DISTRIBUTED_POINTS;
+ func_shaper->tf = tf;
+ func_shaper->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
+
+ ret = __set_output_tf(func_shaper, shaper_lut, shaper_size, has_rom);
+ } else {
+ func_shaper->type = TF_TYPE_BYPASS;
+ func_shaper->tf = TRANSFER_FUNCTION_LINEAR;
+ }
+
+ return ret;
+}
+
+static int amdgpu_dm_atomic_blend_lut(const struct drm_color_lut *blend_lut,
+ bool has_rom,
+ enum dc_transfer_func_predefined tf,
+ uint32_t blend_size,
+ struct dc_transfer_func *func_blend)
+{
+ int ret = 0;
+
+ if (blend_size || tf != TRANSFER_FUNCTION_LINEAR) {
+ /*
+ * DRM plane gamma LUT or TF means we are linearizing color
+ * space before blending (similar to degamma programming). As
+ * we don't have hardcoded curve support, or we use AMD color
+ * module to fill the parameters that will be translated to HW
+ * points.
+ */
+ func_blend->type = TF_TYPE_DISTRIBUTED_POINTS;
+ func_blend->tf = tf;
+ func_blend->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
+
+ ret = __set_input_tf(NULL, func_blend, blend_lut, blend_size);
+ } else {
+ func_blend->type = TF_TYPE_BYPASS;
+ func_blend->tf = TRANSFER_FUNCTION_LINEAR;
+ }
+
+ return ret;
+}
+
/**
- * Verifies that the Degamma and Gamma LUTs attached to the |crtc_state| are of
- * the expected size.
- * Returns 0 on success.
+ * amdgpu_dm_verify_lut3d_size - verifies if 3D LUT is supported and if user
+ * shaper and 3D LUTs match the hw supported size
+ * @adev: amdgpu device
+ * @plane_state: the DRM plane state
+ *
+ * Verifies if pre-blending (DPP) 3D LUT is supported by the HW (DCN 2.0 or
+ * newer) and if the user shaper and 3D LUTs match the supported size.
+ *
+ * Returns:
+ * 0 on success. -EINVAL if lut size are invalid.
+ */
+int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev,
+ struct drm_plane_state *plane_state)
+{
+ struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+ const struct drm_color_lut *shaper = NULL, *lut3d = NULL;
+ uint32_t exp_size, size, dim_size = MAX_COLOR_3DLUT_SIZE;
+ bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut || adev->dm.dc->caps.color.mpc.preblend;
+
+ /* shaper LUT is only available if 3D LUT color caps */
+ exp_size = has_3dlut ? MAX_COLOR_LUT_ENTRIES : 0;
+ shaper = __extract_blob_lut(dm_plane_state->shaper_lut, &size);
+
+ if (shaper && size != exp_size) {
+ drm_dbg(&adev->ddev,
+ "Invalid Shaper LUT size. Should be %u but got %u.\n",
+ exp_size, size);
+ return -EINVAL;
+ }
+
+ /* The number of 3D LUT entries is the dimension size cubed */
+ exp_size = has_3dlut ? dim_size * dim_size * dim_size : 0;
+ lut3d = __extract_blob_lut(dm_plane_state->lut3d, &size);
+
+ if (lut3d && size != exp_size) {
+ drm_dbg(&adev->ddev,
+ "Invalid 3D LUT size. Should be %u but got %u.\n",
+ exp_size, size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_dm_verify_lut_sizes - verifies if DRM luts match the hw supported sizes
+ * @crtc_state: the DRM CRTC state
+ *
+ * Verifies that the Degamma and Gamma LUTs attached to the &crtc_state
+ * are of the expected size.
+ *
+ * Returns:
+ * 0 on success. -EINVAL if any lut sizes are invalid.
*/
int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state)
{
@@ -316,38 +1168,42 @@ int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state)
}
/**
- * amdgpu_dm_update_crtc_color_mgmt: Maps DRM color management to DC stream.
+ * amdgpu_dm_check_crtc_color_mgmt: Check if DRM color props are programmable by DC.
* @crtc: amdgpu_dm crtc state
+ * @check_only: only check color state without update dc stream
*
- * With no plane level color management properties we're free to use any
- * of the HW blocks as long as the CRTC CTM always comes before the
- * CRTC RGM and after the CRTC DGM.
- *
- * The CRTC RGM block will be placed in the RGM LUT block if it is non-linear.
- * The CRTC DGM block will be placed in the DGM LUT block if it is non-linear.
- * The CRTC CTM will be placed in the gamut remap block if it is non-linear.
+ * This function just verifies CRTC LUT sizes, if there is enough space for
+ * output transfer function and if its parameters can be calculated by AMD
+ * color module. It also adjusts some settings for programming CRTC degamma at
+ * plane stage, using plane DGM block.
*
* The RGM block is typically more fully featured and accurate across
* all ASICs - DCE can't support a custom non-linear CRTC DGM.
*
* For supporting both plane level color management and CRTC level color
- * management at once we have to either restrict the usage of CRTC properties
- * or blend adjustments together.
+ * management at once we have to either restrict the usage of some CRTC
+ * properties or blend adjustments together.
*
- * Returns 0 on success.
+ * Returns:
+ * 0 on success. Error code if validation fails.
*/
-int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
+
+int amdgpu_dm_check_crtc_color_mgmt(struct dm_crtc_state *crtc,
+ bool check_only)
{
struct dc_stream_state *stream = crtc->stream;
struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev);
bool has_rom = adev->asic_type <= CHIP_RAVEN;
- struct drm_color_ctm *ctm = NULL;
+ struct dc_transfer_func *out_tf;
const struct drm_color_lut *degamma_lut, *regamma_lut;
uint32_t degamma_size, regamma_size;
bool has_regamma, has_degamma;
+ enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_LINEAR;
bool is_legacy;
int r;
+ tf = amdgpu_tf_to_dc_tf(crtc->regamma_tf);
+
r = amdgpu_dm_verify_lut_sizes(&crtc->base);
if (r)
return r;
@@ -367,6 +1223,14 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
crtc->cm_has_degamma = false;
crtc->cm_is_degamma_srgb = false;
+ if (check_only) {
+ out_tf = kvzalloc(sizeof(*out_tf), GFP_KERNEL);
+ if (!out_tf)
+ return -ENOMEM;
+ } else {
+ out_tf = &stream->out_transfer_func;
+ }
+
/* Setup regamma and degamma. */
if (is_legacy) {
/*
@@ -381,29 +1245,21 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
* inverse color ramp in legacy userspace.
*/
crtc->cm_is_degamma_srgb = true;
- stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS;
- stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB;
-
- r = __set_legacy_tf(stream->out_transfer_func, regamma_lut,
- regamma_size, has_rom);
- if (r)
- return r;
- } else if (has_regamma) {
- /* CRTC RGM goes into RGM LUT. */
- stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS;
- stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
-
- r = __set_output_tf(stream->out_transfer_func, regamma_lut,
- regamma_size, has_rom);
- if (r)
- return r;
- } else {
+ out_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
+ out_tf->tf = TRANSFER_FUNCTION_SRGB;
/*
- * No CRTC RGM means we can just put the block into bypass
- * since we don't have any plane level adjustments using it.
+ * Note: although we pass has_rom as parameter here, we never
+ * actually use ROM because the color module only takes the ROM
+ * path if transfer_func->type == PREDEFINED.
+ *
+ * See more in mod_color_calculate_regamma_params()
*/
- stream->out_transfer_func->type = TF_TYPE_BYPASS;
- stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
+ r = __set_legacy_tf(out_tf, regamma_lut,
+ regamma_size, has_rom);
+ } else {
+ regamma_size = has_regamma ? regamma_size : 0;
+ r = amdgpu_dm_set_atomic_regamma(out_tf, regamma_lut,
+ regamma_size, has_rom, tf);
}
/*
@@ -412,6 +1268,43 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
* have to place the CTM in the OCSC in that case.
*/
crtc->cm_has_degamma = has_degamma;
+ if (check_only)
+ kvfree(out_tf);
+
+ return r;
+}
+
+/**
+ * amdgpu_dm_update_crtc_color_mgmt: Maps DRM color management to DC stream.
+ * @crtc: amdgpu_dm crtc state
+ *
+ * With no plane level color management properties we're free to use any
+ * of the HW blocks as long as the CRTC CTM always comes before the
+ * CRTC RGM and after the CRTC DGM.
+ *
+ * - The CRTC RGM block will be placed in the RGM LUT block if it is non-linear.
+ * - The CRTC DGM block will be placed in the DGM LUT block if it is non-linear.
+ * - The CRTC CTM will be placed in the gamut remap block if it is non-linear.
+ *
+ * The RGM block is typically more fully featured and accurate across
+ * all ASICs - DCE can't support a custom non-linear CRTC DGM.
+ *
+ * For supporting both plane level color management and CRTC level color
+ * management at once we have to either restrict the usage of CRTC properties
+ * or blend adjustments together.
+ *
+ * Returns:
+ * 0 on success. Error code if setup fails.
+ */
+int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
+{
+ struct dc_stream_state *stream = crtc->stream;
+ struct drm_color_ctm *ctm = NULL;
+ int ret;
+
+ ret = amdgpu_dm_check_crtc_color_mgmt(crtc, false);
+ if (ret)
+ return ret;
/* Setup CRTC CTM. */
if (crtc->base.ctm) {
@@ -439,19 +1332,10 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
return 0;
}
-/**
- * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane.
- * @crtc: amdgpu_dm crtc state
- * @dc_plane_state: target DC surface
- *
- * Update the underlying dc_stream_state's input transfer function (ITF) in
- * preparation for hardware commit. The transfer function used depends on
- * the prepartion done on the stream for color management.
- *
- * Returns 0 on success.
- */
-int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
- struct dc_plane_state *dc_plane_state)
+static int
+map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
+ struct dc_plane_state *dc_plane_state,
+ struct dc_color_caps *caps)
{
const struct drm_color_lut *degamma_lut;
enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB;
@@ -474,8 +1358,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
&degamma_size);
ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES);
- dc_plane_state->in_transfer_func->type =
- TF_TYPE_DISTRIBUTED_POINTS;
+ dc_plane_state->in_transfer_func.type = TF_TYPE_DISTRIBUTED_POINTS;
/*
* This case isn't fully correct, but also fairly
@@ -502,32 +1385,671 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
* map these to the atomic one instead.
*/
if (crtc->cm_is_degamma_srgb)
- dc_plane_state->in_transfer_func->tf = tf;
+ dc_plane_state->in_transfer_func.tf = tf;
else
- dc_plane_state->in_transfer_func->tf =
+ dc_plane_state->in_transfer_func.tf =
TRANSFER_FUNCTION_LINEAR;
- r = __set_input_tf(dc_plane_state->in_transfer_func,
+ r = __set_input_tf(caps, &dc_plane_state->in_transfer_func,
degamma_lut, degamma_size);
if (r)
return r;
- } else if (crtc->cm_is_degamma_srgb) {
+ } else {
/*
* For legacy gamma support we need the regamma input
* in linear space. Assume that the input is sRGB.
*/
- dc_plane_state->in_transfer_func->type = TF_TYPE_PREDEFINED;
- dc_plane_state->in_transfer_func->tf = tf;
+ dc_plane_state->in_transfer_func.type = TF_TYPE_PREDEFINED;
+ dc_plane_state->in_transfer_func.tf = tf;
if (tf != TRANSFER_FUNCTION_SRGB &&
- !mod_color_calculate_degamma_params(NULL,
- dc_plane_state->in_transfer_func, NULL, false))
+ !mod_color_calculate_degamma_params(caps,
+ &dc_plane_state->in_transfer_func,
+ NULL, false))
return -ENOMEM;
- } else {
- /* ...Otherwise we can just bypass the DGM block. */
- dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS;
- dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
}
return 0;
}
+
+static int
+__set_dm_plane_degamma(struct drm_plane_state *plane_state,
+ struct dc_plane_state *dc_plane_state,
+ struct dc_color_caps *color_caps)
+{
+ struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+ const struct drm_color_lut *degamma_lut;
+ enum amdgpu_transfer_function tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+ uint32_t degamma_size;
+ bool has_degamma_lut;
+ int ret;
+
+ degamma_lut = __extract_blob_lut(dm_plane_state->degamma_lut,
+ &degamma_size);
+
+ has_degamma_lut = degamma_lut &&
+ !__is_lut_linear(degamma_lut, degamma_size);
+
+ tf = dm_plane_state->degamma_tf;
+
+ /* If we don't have plane degamma LUT nor TF to set on DC, we have
+ * nothing to do here, return.
+ */
+ if (!has_degamma_lut && tf == AMDGPU_TRANSFER_FUNCTION_DEFAULT)
+ return -EINVAL;
+
+ dc_plane_state->in_transfer_func.tf = amdgpu_tf_to_dc_tf(tf);
+
+ if (has_degamma_lut) {
+ ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES);
+
+ dc_plane_state->in_transfer_func.type =
+ TF_TYPE_DISTRIBUTED_POINTS;
+
+ ret = __set_input_tf(color_caps, &dc_plane_state->in_transfer_func,
+ degamma_lut, degamma_size);
+ if (ret)
+ return ret;
+ } else {
+ dc_plane_state->in_transfer_func.type =
+ TF_TYPE_PREDEFINED;
+
+ if (!mod_color_calculate_degamma_params(color_caps,
+ &dc_plane_state->in_transfer_func, NULL, false))
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static int
+__set_colorop_in_tf_1d_curve(struct dc_plane_state *dc_plane_state,
+ struct drm_colorop_state *colorop_state)
+{
+ struct dc_transfer_func *tf = &dc_plane_state->in_transfer_func;
+ struct drm_colorop *colorop = colorop_state->colorop;
+ struct drm_device *drm = colorop->dev;
+
+ if (colorop->type != DRM_COLOROP_1D_CURVE)
+ return -EINVAL;
+
+ if (!(BIT(colorop_state->curve_1d_type) & amdgpu_dm_supported_degam_tfs))
+ return -EINVAL;
+
+ if (colorop_state->bypass) {
+ tf->type = TF_TYPE_BYPASS;
+ tf->tf = TRANSFER_FUNCTION_LINEAR;
+ return 0;
+ }
+
+ drm_dbg(drm, "Degamma colorop with ID: %d\n", colorop->base.id);
+
+ tf->type = TF_TYPE_PREDEFINED;
+ tf->tf = amdgpu_colorop_tf_to_dc_tf(colorop_state->curve_1d_type);
+
+ return 0;
+}
+
+static int
+__set_dm_plane_colorop_degamma(struct drm_plane_state *plane_state,
+ struct dc_plane_state *dc_plane_state,
+ struct drm_colorop *colorop)
+{
+ struct drm_colorop *old_colorop;
+ struct drm_colorop_state *colorop_state = NULL, *new_colorop_state;
+ struct drm_atomic_state *state = plane_state->state;
+ int i = 0;
+
+ old_colorop = colorop;
+
+ /* 1st op: 1d curve - degamma */
+ for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) {
+ if (new_colorop_state->colorop == old_colorop &&
+ (BIT(new_colorop_state->curve_1d_type) & amdgpu_dm_supported_degam_tfs)) {
+ colorop_state = new_colorop_state;
+ break;
+ }
+ }
+
+ if (!colorop_state)
+ return -EINVAL;
+
+ return __set_colorop_in_tf_1d_curve(dc_plane_state, colorop_state);
+}
+
+static int
+__set_dm_plane_colorop_3x4_matrix(struct drm_plane_state *plane_state,
+ struct dc_plane_state *dc_plane_state,
+ struct drm_colorop *colorop)
+{
+ struct drm_colorop *old_colorop;
+ struct drm_colorop_state *colorop_state = NULL, *new_colorop_state;
+ struct drm_atomic_state *state = plane_state->state;
+ const struct drm_device *dev = colorop->dev;
+ const struct drm_property_blob *blob;
+ struct drm_color_ctm_3x4 *ctm = NULL;
+ int i = 0;
+
+ /* 3x4 matrix */
+ old_colorop = colorop;
+ for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) {
+ if (new_colorop_state->colorop == old_colorop &&
+ new_colorop_state->colorop->type == DRM_COLOROP_CTM_3X4) {
+ colorop_state = new_colorop_state;
+ break;
+ }
+ }
+
+ if (colorop_state && !colorop_state->bypass && colorop->type == DRM_COLOROP_CTM_3X4) {
+ drm_dbg(dev, "3x4 matrix colorop with ID: %d\n", colorop->base.id);
+ blob = colorop_state->data;
+ if (blob->length == sizeof(struct drm_color_ctm_3x4)) {
+ ctm = (struct drm_color_ctm_3x4 *) blob->data;
+ __drm_ctm_3x4_to_dc_matrix(ctm, dc_plane_state->gamut_remap_matrix.matrix);
+ dc_plane_state->gamut_remap_matrix.enable_remap = true;
+ dc_plane_state->input_csc_color_matrix.enable_adjustment = false;
+ } else {
+ drm_warn(dev, "blob->length (%zu) isn't equal to drm_color_ctm_3x4 (%zu)\n",
+ blob->length, sizeof(struct drm_color_ctm_3x4));
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int
+__set_dm_plane_colorop_multiplier(struct drm_plane_state *plane_state,
+ struct dc_plane_state *dc_plane_state,
+ struct drm_colorop *colorop)
+{
+ struct drm_colorop *old_colorop;
+ struct drm_colorop_state *colorop_state = NULL, *new_colorop_state;
+ struct drm_atomic_state *state = plane_state->state;
+ const struct drm_device *dev = colorop->dev;
+ int i = 0;
+
+ /* Multiplier */
+ old_colorop = colorop;
+ for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) {
+ if (new_colorop_state->colorop == old_colorop &&
+ new_colorop_state->colorop->type == DRM_COLOROP_MULTIPLIER) {
+ colorop_state = new_colorop_state;
+ break;
+ }
+ }
+
+ if (colorop_state && !colorop_state->bypass && colorop->type == DRM_COLOROP_MULTIPLIER) {
+ drm_dbg(dev, "Multiplier colorop with ID: %d\n", colorop->base.id);
+ dc_plane_state->hdr_mult = amdgpu_dm_fixpt_from_s3132(colorop_state->multiplier);
+ }
+
+ return 0;
+}
+
+static int
+__set_dm_plane_colorop_shaper(struct drm_plane_state *plane_state,
+ struct dc_plane_state *dc_plane_state,
+ struct drm_colorop *colorop)
+{
+ struct drm_colorop *old_colorop;
+ struct drm_colorop_state *colorop_state = NULL, *new_colorop_state;
+ struct drm_atomic_state *state = plane_state->state;
+ enum dc_transfer_func_predefined default_tf = TRANSFER_FUNCTION_LINEAR;
+ struct dc_transfer_func *tf = &dc_plane_state->in_shaper_func;
+ const struct drm_color_lut32 *shaper_lut;
+ struct drm_device *dev = colorop->dev;
+ bool enabled = false;
+ u32 shaper_size;
+ int i = 0, ret = 0;
+
+ /* 1D Curve - SHAPER TF */
+ old_colorop = colorop;
+ for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) {
+ if (new_colorop_state->colorop == old_colorop &&
+ (BIT(new_colorop_state->curve_1d_type) & amdgpu_dm_supported_shaper_tfs)) {
+ colorop_state = new_colorop_state;
+ break;
+ }
+ }
+
+ if (colorop_state && !colorop_state->bypass && colorop->type == DRM_COLOROP_1D_CURVE) {
+ drm_dbg(dev, "Shaper TF colorop with ID: %d\n", colorop->base.id);
+ tf->type = TF_TYPE_DISTRIBUTED_POINTS;
+ tf->tf = default_tf = amdgpu_colorop_tf_to_dc_tf(colorop_state->curve_1d_type);
+ tf->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
+ ret = __set_output_tf(tf, 0, 0, false);
+ if (ret)
+ return ret;
+ enabled = true;
+ }
+
+ /* 1D LUT - SHAPER LUT */
+ colorop = old_colorop->next;
+ if (!colorop) {
+ drm_dbg(dev, "no Shaper LUT colorop found\n");
+ return -EINVAL;
+ }
+
+ old_colorop = colorop;
+ for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) {
+ if (new_colorop_state->colorop == old_colorop &&
+ new_colorop_state->colorop->type == DRM_COLOROP_1D_LUT) {
+ colorop_state = new_colorop_state;
+ break;
+ }
+ }
+
+ if (colorop_state && !colorop_state->bypass && colorop->type == DRM_COLOROP_1D_LUT) {
+ drm_dbg(dev, "Shaper LUT colorop with ID: %d\n", colorop->base.id);
+ tf->type = TF_TYPE_DISTRIBUTED_POINTS;
+ tf->tf = default_tf;
+ tf->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
+ shaper_lut = __extract_blob_lut32(colorop_state->data, &shaper_size);
+ shaper_size = shaper_lut != NULL ? shaper_size : 0;
+
+ /* Custom LUT size must be the same as supported size */
+ if (shaper_size == colorop->size) {
+ ret = __set_output_tf_32(tf, shaper_lut, shaper_size, false);
+ if (ret)
+ return ret;
+ enabled = true;
+ }
+ }
+
+ if (!enabled)
+ tf->type = TF_TYPE_BYPASS;
+
+ return 0;
+}
+
+/* __set_colorop_3dlut - set DRM 3D LUT to DC stream
+ * @drm_lut3d: user 3D LUT
+ * @drm_lut3d_size: size of 3D LUT
+ * @lut3d: DC 3D LUT
+ *
+ * Map user 3D LUT data to DC 3D LUT and all necessary bits to program it
+ * on DCN accordingly.
+ *
+ * Returns:
+ * 0 on success. -EINVAL if drm_lut3d_size is zero.
+ */
+static int __set_colorop_3dlut(const struct drm_color_lut32 *drm_lut3d,
+ uint32_t drm_lut3d_size,
+ struct dc_3dlut *lut)
+{
+ if (!drm_lut3d_size) {
+ lut->state.bits.initialized = 0;
+ return -EINVAL;
+ }
+
+ /* Only supports 17x17x17 3D LUT (12-bit) now */
+ lut->lut_3d.use_12bits = true;
+ lut->lut_3d.use_tetrahedral_9 = false;
+
+ lut->state.bits.initialized = 1;
+ __drm_3dlut32_to_dc_3dlut(drm_lut3d, drm_lut3d_size, &lut->lut_3d,
+ lut->lut_3d.use_tetrahedral_9, 12);
+
+ return 0;
+}
+
+static int
+__set_dm_plane_colorop_3dlut(struct drm_plane_state *plane_state,
+ struct dc_plane_state *dc_plane_state,
+ struct drm_colorop *colorop)
+{
+ struct drm_colorop *old_colorop;
+ struct drm_colorop_state *colorop_state = NULL, *new_colorop_state;
+ struct dc_transfer_func *tf = &dc_plane_state->in_shaper_func;
+ struct drm_atomic_state *state = plane_state->state;
+ const struct amdgpu_device *adev = drm_to_adev(colorop->dev);
+ const struct drm_device *dev = colorop->dev;
+ const struct drm_color_lut32 *lut3d;
+ uint32_t lut3d_size;
+ int i = 0, ret = 0;
+
+ /* 3D LUT */
+ old_colorop = colorop;
+ for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) {
+ if (new_colorop_state->colorop == old_colorop &&
+ new_colorop_state->colorop->type == DRM_COLOROP_3D_LUT) {
+ colorop_state = new_colorop_state;
+ break;
+ }
+ }
+
+ if (colorop_state && !colorop_state->bypass && colorop->type == DRM_COLOROP_3D_LUT) {
+ if (!adev->dm.dc->caps.color.dpp.hw_3d_lut) {
+ drm_dbg(dev, "3D LUT is not supported by hardware\n");
+ return -EINVAL;
+ }
+
+ drm_dbg(dev, "3D LUT colorop with ID: %d\n", colorop->base.id);
+ lut3d = __extract_blob_lut32(colorop_state->data, &lut3d_size);
+ lut3d_size = lut3d != NULL ? lut3d_size : 0;
+ ret = __set_colorop_3dlut(lut3d, lut3d_size, &dc_plane_state->lut3d_func);
+ if (ret) {
+ drm_dbg(dev, "3D LUT colorop with ID: %d has LUT size = %d\n",
+ colorop->base.id, lut3d_size);
+ return ret;
+ }
+
+ /* 3D LUT requires shaper. If shaper colorop is bypassed, enable shaper curve
+ * with TRANSFER_FUNCTION_LINEAR
+ */
+ if (tf->type == TF_TYPE_BYPASS) {
+ tf->type = TF_TYPE_DISTRIBUTED_POINTS;
+ tf->tf = TRANSFER_FUNCTION_LINEAR;
+ tf->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
+ ret = __set_output_tf_32(tf, NULL, 0, false);
+ }
+ }
+
+ return ret;
+}
+
+static int
+__set_dm_plane_colorop_blend(struct drm_plane_state *plane_state,
+ struct dc_plane_state *dc_plane_state,
+ struct drm_colorop *colorop)
+{
+ struct drm_colorop *old_colorop;
+ struct drm_colorop_state *colorop_state = NULL, *new_colorop_state;
+ struct drm_atomic_state *state = plane_state->state;
+ enum dc_transfer_func_predefined default_tf = TRANSFER_FUNCTION_LINEAR;
+ struct dc_transfer_func *tf = &dc_plane_state->blend_tf;
+ const struct drm_color_lut32 *blend_lut = NULL;
+ struct drm_device *dev = colorop->dev;
+ uint32_t blend_size = 0;
+ int i = 0;
+
+ /* 1D Curve - BLND TF */
+ old_colorop = colorop;
+ for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) {
+ if (new_colorop_state->colorop == old_colorop &&
+ (BIT(new_colorop_state->curve_1d_type) & amdgpu_dm_supported_blnd_tfs)) {
+ colorop_state = new_colorop_state;
+ break;
+ }
+ }
+
+ if (colorop_state && !colorop_state->bypass && colorop->type == DRM_COLOROP_1D_CURVE &&
+ (BIT(colorop_state->curve_1d_type) & amdgpu_dm_supported_blnd_tfs)) {
+ drm_dbg(dev, "Blend TF colorop with ID: %d\n", colorop->base.id);
+ tf->type = TF_TYPE_DISTRIBUTED_POINTS;
+ tf->tf = default_tf = amdgpu_colorop_tf_to_dc_tf(colorop_state->curve_1d_type);
+ tf->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
+ __set_input_tf_32(NULL, tf, blend_lut, blend_size);
+ }
+
+ /* 1D Curve - BLND LUT */
+ colorop = old_colorop->next;
+ if (!colorop) {
+ drm_dbg(dev, "no Blend LUT colorop found\n");
+ return -EINVAL;
+ }
+
+ old_colorop = colorop;
+ for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) {
+ if (new_colorop_state->colorop == old_colorop &&
+ new_colorop_state->colorop->type == DRM_COLOROP_1D_LUT) {
+ colorop_state = new_colorop_state;
+ break;
+ }
+ }
+
+ if (colorop_state && !colorop_state->bypass && colorop->type == DRM_COLOROP_1D_LUT &&
+ (BIT(colorop_state->curve_1d_type) & amdgpu_dm_supported_blnd_tfs)) {
+ drm_dbg(dev, "Blend LUT colorop with ID: %d\n", colorop->base.id);
+ tf->type = TF_TYPE_DISTRIBUTED_POINTS;
+ tf->tf = default_tf;
+ tf->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
+ blend_lut = __extract_blob_lut32(colorop_state->data, &blend_size);
+ blend_size = blend_lut != NULL ? blend_size : 0;
+
+ /* Custom LUT size must be the same as supported size */
+ if (blend_size == colorop->size)
+ __set_input_tf_32(NULL, tf, blend_lut, blend_size);
+ }
+
+ return 0;
+}
+
+static int
+amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state,
+ struct dc_plane_state *dc_plane_state)
+{
+ struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+ enum amdgpu_transfer_function shaper_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+ enum amdgpu_transfer_function blend_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+ const struct drm_color_lut *shaper_lut, *lut3d, *blend_lut;
+ uint32_t shaper_size, lut3d_size, blend_size;
+ int ret;
+
+ dc_plane_state->hdr_mult = amdgpu_dm_fixpt_from_s3132(dm_plane_state->hdr_mult);
+
+ shaper_lut = __extract_blob_lut(dm_plane_state->shaper_lut, &shaper_size);
+ shaper_size = shaper_lut != NULL ? shaper_size : 0;
+ shaper_tf = dm_plane_state->shaper_tf;
+ lut3d = __extract_blob_lut(dm_plane_state->lut3d, &lut3d_size);
+ lut3d_size = lut3d != NULL ? lut3d_size : 0;
+
+ amdgpu_dm_atomic_lut3d(lut3d, lut3d_size, &dc_plane_state->lut3d_func);
+ ret = amdgpu_dm_atomic_shaper_lut(shaper_lut, false,
+ amdgpu_tf_to_dc_tf(shaper_tf),
+ shaper_size,
+ &dc_plane_state->in_shaper_func);
+ if (ret) {
+ drm_dbg_kms(plane_state->plane->dev,
+ "setting plane %d shaper LUT failed.\n",
+ plane_state->plane->index);
+
+ return ret;
+ }
+
+ blend_tf = dm_plane_state->blend_tf;
+ blend_lut = __extract_blob_lut(dm_plane_state->blend_lut, &blend_size);
+ blend_size = blend_lut != NULL ? blend_size : 0;
+
+ ret = amdgpu_dm_atomic_blend_lut(blend_lut, false,
+ amdgpu_tf_to_dc_tf(blend_tf),
+ blend_size, &dc_plane_state->blend_tf);
+ if (ret) {
+ drm_dbg_kms(plane_state->plane->dev,
+ "setting plane %d gamma lut failed.\n",
+ plane_state->plane->index);
+
+ return ret;
+ }
+
+ return 0;
+}
+
+static int
+amdgpu_dm_plane_set_colorop_properties(struct drm_plane_state *plane_state,
+ struct dc_plane_state *dc_plane_state)
+{
+ struct drm_colorop *colorop = plane_state->color_pipeline;
+ struct drm_device *dev = plane_state->plane->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int ret;
+
+ /* 1D Curve - DEGAM TF */
+ if (!colorop)
+ return -EINVAL;
+
+ ret = __set_dm_plane_colorop_degamma(plane_state, dc_plane_state, colorop);
+ if (ret)
+ return ret;
+
+ /* Multiplier */
+ colorop = colorop->next;
+ if (!colorop) {
+ drm_dbg(dev, "no multiplier colorop found\n");
+ return -EINVAL;
+ }
+
+ ret = __set_dm_plane_colorop_multiplier(plane_state, dc_plane_state, colorop);
+ if (ret)
+ return ret;
+
+ /* 3x4 matrix */
+ colorop = colorop->next;
+ if (!colorop) {
+ drm_dbg(dev, "no 3x4 matrix colorop found\n");
+ return -EINVAL;
+ }
+
+ ret = __set_dm_plane_colorop_3x4_matrix(plane_state, dc_plane_state, colorop);
+ if (ret)
+ return ret;
+
+ if (adev->dm.dc->caps.color.dpp.hw_3d_lut) {
+ /* 1D Curve & LUT - SHAPER TF & LUT */
+ colorop = colorop->next;
+ if (!colorop) {
+ drm_dbg(dev, "no Shaper TF colorop found\n");
+ return -EINVAL;
+ }
+
+ ret = __set_dm_plane_colorop_shaper(plane_state, dc_plane_state, colorop);
+ if (ret)
+ return ret;
+
+ /* Shaper LUT colorop is already handled, just skip here */
+ colorop = colorop->next;
+ if (!colorop)
+ return -EINVAL;
+
+ /* 3D LUT */
+ colorop = colorop->next;
+ if (!colorop) {
+ drm_dbg(dev, "no 3D LUT colorop found\n");
+ return -EINVAL;
+ }
+
+ ret = __set_dm_plane_colorop_3dlut(plane_state, dc_plane_state, colorop);
+ if (ret)
+ return ret;
+ }
+
+ /* 1D Curve & LUT - BLND TF & LUT */
+ colorop = colorop->next;
+ if (!colorop) {
+ drm_dbg(dev, "no Blend TF colorop found\n");
+ return -EINVAL;
+ }
+
+ ret = __set_dm_plane_colorop_blend(plane_state, dc_plane_state, colorop);
+ if (ret)
+ return ret;
+
+ /* BLND LUT colorop is already handled, just skip here */
+ colorop = colorop->next;
+ if (!colorop)
+ return -EINVAL;
+
+ return 0;
+}
+
+/**
+ * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane.
+ * @crtc: amdgpu_dm crtc state
+ * @plane_state: DRM plane state
+ * @dc_plane_state: target DC surface
+ *
+ * Update the underlying dc_stream_state's input transfer function (ITF) in
+ * preparation for hardware commit. The transfer function used depends on
+ * the preparation done on the stream for color management.
+ *
+ * Returns:
+ * 0 on success. -ENOMEM if mem allocation fails.
+ */
+int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
+ struct drm_plane_state *plane_state,
+ struct dc_plane_state *dc_plane_state)
+{
+ struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev);
+ struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+ struct drm_color_ctm_3x4 *ctm = NULL;
+ struct dc_color_caps *color_caps = NULL;
+ bool has_crtc_cm_degamma;
+ int ret;
+
+ ret = amdgpu_dm_verify_lut3d_size(adev, plane_state);
+ if (ret) {
+ drm_dbg_driver(&adev->ddev, "amdgpu_dm_verify_lut3d_size() failed\n");
+ return ret;
+ }
+
+ if (dc_plane_state->ctx && dc_plane_state->ctx->dc)
+ color_caps = &dc_plane_state->ctx->dc->caps.color;
+
+ /* Initially, we can just bypass the DGM block. */
+ dc_plane_state->in_transfer_func.type = TF_TYPE_BYPASS;
+ dc_plane_state->in_transfer_func.tf = TRANSFER_FUNCTION_LINEAR;
+
+ /* After, we start to update values according to color props */
+ has_crtc_cm_degamma = (crtc->cm_has_degamma || crtc->cm_is_degamma_srgb);
+
+ ret = __set_dm_plane_degamma(plane_state, dc_plane_state, color_caps);
+ if (ret == -ENOMEM)
+ return ret;
+
+ /* We only have one degamma block available (pre-blending) for the
+ * whole color correction pipeline, so that we can't actually perform
+ * plane and CRTC degamma at the same time. Explicitly reject atomic
+ * updates when userspace sets both plane and CRTC degamma properties.
+ */
+ if (has_crtc_cm_degamma && ret != -EINVAL) {
+ drm_dbg_kms(crtc->base.crtc->dev,
+ "doesn't support plane and CRTC degamma at the same time\n");
+ return -EINVAL;
+ }
+
+ /* If we are here, it means we don't have plane degamma settings, check
+ * if we have CRTC degamma waiting for mapping to pre-blending degamma
+ * block
+ */
+ if (has_crtc_cm_degamma) {
+ /*
+ * AMD HW doesn't have post-blending degamma caps. When DRM
+ * CRTC atomic degamma is set, we maps it to DPP degamma block
+ * (pre-blending) or, on legacy gamma, we use DPP degamma to
+ * linearize (implicit degamma) from sRGB/BT709 according to
+ * the input space.
+ */
+ ret = map_crtc_degamma_to_dc_plane(crtc, dc_plane_state, color_caps);
+ if (ret)
+ return ret;
+ }
+
+ /* Setup CRTC CTM. */
+ if (dm_plane_state->ctm) {
+ ctm = (struct drm_color_ctm_3x4 *)dm_plane_state->ctm->data;
+ /*
+ * DCN2 and older don't support both pre-blending and
+ * post-blending gamut remap. For this HW family, if we have
+ * the plane and CRTC CTMs simultaneously, CRTC CTM takes
+ * priority, and we discard plane CTM, as implemented in
+ * dcn10_program_gamut_remap(). However, DCN3+ has DPP
+ * (pre-blending) and MPC (post-blending) `gamut remap` blocks;
+ * therefore, we can program plane and CRTC CTMs together by
+ * mapping CRTC CTM to MPC and keeping plane CTM setup at DPP,
+ * as it's done by dcn30_program_gamut_remap().
+ */
+ __drm_ctm_3x4_to_dc_matrix(ctm, dc_plane_state->gamut_remap_matrix.matrix);
+
+ dc_plane_state->gamut_remap_matrix.enable_remap = true;
+ dc_plane_state->input_csc_color_matrix.enable_adjustment = false;
+ } else {
+ /* Bypass CTM. */
+ dc_plane_state->gamut_remap_matrix.enable_remap = false;
+ dc_plane_state->input_csc_color_matrix.enable_adjustment = false;
+ }
+
+ if (!amdgpu_dm_plane_set_colorop_properties(plane_state, dc_plane_state))
+ return 0;
+
+ return amdgpu_dm_plane_set_color_properties(plane_state, dc_plane_state);
+}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c
new file mode 100644
index 000000000000..d585618b8064
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include <drm/drm_print.h>
+#include <drm/drm_plane.h>
+#include <drm/drm_property.h>
+#include <drm/drm_colorop.h>
+
+#include "amdgpu.h"
+#include "amdgpu_dm_colorop.h"
+#include "dc.h"
+
+const u64 amdgpu_dm_supported_degam_tfs =
+ BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF) |
+ BIT(DRM_COLOROP_1D_CURVE_PQ_125_EOTF) |
+ BIT(DRM_COLOROP_1D_CURVE_BT2020_INV_OETF) |
+ BIT(DRM_COLOROP_1D_CURVE_GAMMA22_INV);
+
+const u64 amdgpu_dm_supported_shaper_tfs =
+ BIT(DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF) |
+ BIT(DRM_COLOROP_1D_CURVE_PQ_125_INV_EOTF) |
+ BIT(DRM_COLOROP_1D_CURVE_BT2020_OETF) |
+ BIT(DRM_COLOROP_1D_CURVE_GAMMA22);
+
+const u64 amdgpu_dm_supported_blnd_tfs =
+ BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF) |
+ BIT(DRM_COLOROP_1D_CURVE_PQ_125_EOTF) |
+ BIT(DRM_COLOROP_1D_CURVE_BT2020_INV_OETF) |
+ BIT(DRM_COLOROP_1D_CURVE_GAMMA22_INV);
+
+#define MAX_COLOR_PIPELINE_OPS 10
+
+#define LUT3D_SIZE 17
+
+int amdgpu_dm_initialize_default_pipeline(struct drm_plane *plane, struct drm_prop_enum_list *list)
+{
+ struct drm_colorop *ops[MAX_COLOR_PIPELINE_OPS];
+ struct drm_device *dev = plane->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int ret;
+ int i = 0;
+
+ memset(ops, 0, sizeof(ops));
+
+ /* 1D curve - DEGAM TF */
+ ops[i] = kzalloc(sizeof(*ops[0]), GFP_KERNEL);
+ if (!ops[i]) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ ret = drm_plane_colorop_curve_1d_init(dev, ops[i], plane,
+ amdgpu_dm_supported_degam_tfs,
+ DRM_COLOROP_FLAG_ALLOW_BYPASS);
+ if (ret)
+ goto cleanup;
+
+ list->type = ops[i]->base.id;
+ list->name = kasprintf(GFP_KERNEL, "Color Pipeline %d", ops[i]->base.id);
+
+ i++;
+
+ /* Multiplier */
+ ops[i] = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
+ if (!ops[i]) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ ret = drm_plane_colorop_mult_init(dev, ops[i], plane, DRM_COLOROP_FLAG_ALLOW_BYPASS);
+ if (ret)
+ goto cleanup;
+
+ drm_colorop_set_next_property(ops[i-1], ops[i]);
+
+ i++;
+
+ /* 3x4 matrix */
+ ops[i] = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
+ if (!ops[i]) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ ret = drm_plane_colorop_ctm_3x4_init(dev, ops[i], plane, DRM_COLOROP_FLAG_ALLOW_BYPASS);
+ if (ret)
+ goto cleanup;
+
+ drm_colorop_set_next_property(ops[i-1], ops[i]);
+
+ i++;
+
+ if (adev->dm.dc->caps.color.dpp.hw_3d_lut) {
+ /* 1D curve - SHAPER TF */
+ ops[i] = kzalloc(sizeof(*ops[0]), GFP_KERNEL);
+ if (!ops[i]) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ ret = drm_plane_colorop_curve_1d_init(dev, ops[i], plane,
+ amdgpu_dm_supported_shaper_tfs,
+ DRM_COLOROP_FLAG_ALLOW_BYPASS);
+ if (ret)
+ goto cleanup;
+
+ drm_colorop_set_next_property(ops[i-1], ops[i]);
+
+ i++;
+
+ /* 1D LUT - SHAPER LUT */
+ ops[i] = kzalloc(sizeof(*ops[0]), GFP_KERNEL);
+ if (!ops[i]) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ ret = drm_plane_colorop_curve_1d_lut_init(dev, ops[i], plane, MAX_COLOR_LUT_ENTRIES,
+ DRM_COLOROP_LUT1D_INTERPOLATION_LINEAR,
+ DRM_COLOROP_FLAG_ALLOW_BYPASS);
+ if (ret)
+ goto cleanup;
+
+ drm_colorop_set_next_property(ops[i-1], ops[i]);
+
+ i++;
+
+ /* 3D LUT */
+ ops[i] = kzalloc(sizeof(*ops[0]), GFP_KERNEL);
+ if (!ops[i]) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ ret = drm_plane_colorop_3dlut_init(dev, ops[i], plane, LUT3D_SIZE,
+ DRM_COLOROP_LUT3D_INTERPOLATION_TETRAHEDRAL,
+ DRM_COLOROP_FLAG_ALLOW_BYPASS);
+ if (ret)
+ goto cleanup;
+
+ drm_colorop_set_next_property(ops[i-1], ops[i]);
+
+ i++;
+ }
+
+ /* 1D curve - BLND TF */
+ ops[i] = kzalloc(sizeof(*ops[0]), GFP_KERNEL);
+ if (!ops[i]) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ ret = drm_plane_colorop_curve_1d_init(dev, ops[i], plane,
+ amdgpu_dm_supported_blnd_tfs,
+ DRM_COLOROP_FLAG_ALLOW_BYPASS);
+ if (ret)
+ goto cleanup;
+
+ drm_colorop_set_next_property(ops[i - 1], ops[i]);
+
+ i++;
+
+ /* 1D LUT - BLND LUT */
+ ops[i] = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
+ if (!ops[i]) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ ret = drm_plane_colorop_curve_1d_lut_init(dev, ops[i], plane, MAX_COLOR_LUT_ENTRIES,
+ DRM_COLOROP_LUT1D_INTERPOLATION_LINEAR,
+ DRM_COLOROP_FLAG_ALLOW_BYPASS);
+ if (ret)
+ goto cleanup;
+
+ drm_colorop_set_next_property(ops[i-1], ops[i]);
+ return 0;
+
+cleanup:
+ if (ret == -ENOMEM)
+ drm_err(plane->dev, "KMS: Failed to allocate colorop\n");
+
+ drm_colorop_pipeline_destroy(dev);
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.h
new file mode 100644
index 000000000000..2e1617ffc8ee
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __AMDGPU_DM_COLOROP_H__
+#define __AMDGPU_DM_COLOROP_H__
+
+extern const u64 amdgpu_dm_supported_degam_tfs;
+extern const u64 amdgpu_dm_supported_shaper_tfs;
+extern const u64 amdgpu_dm_supported_blnd_tfs;
+
+int amdgpu_dm_initialize_default_pipeline(struct drm_plane *plane, struct drm_prop_enum_list *list);
+
+#endif /* __AMDGPU_DM_COLOROP_H__*/
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
index 8a441a22c46e..e20aa7438066 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
@@ -30,6 +31,7 @@
#include "amdgpu_dm.h"
#include "dc.h"
#include "amdgpu_securedisplay.h"
+#include "amdgpu_dm_psr.h"
static const char *const pipe_crc_sources[] = {
"none",
@@ -83,56 +85,383 @@ const char *const *amdgpu_dm_crtc_get_crc_sources(struct drm_crtc *crtc,
}
#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
-static void amdgpu_dm_set_crc_window_default(struct drm_crtc *crtc)
+static void update_phy_id_mapping(struct amdgpu_device *adev)
+{
+ struct drm_device *ddev = adev_to_drm(adev);
+ struct amdgpu_display_manager *dm = &adev->dm;
+ struct drm_connector *connector;
+ struct amdgpu_dm_connector *aconnector;
+ struct amdgpu_dm_connector *sort_connector[AMDGPU_DM_MAX_CRTC] = {NULL};
+ struct drm_connector_list_iter iter;
+ uint8_t idx = 0, idx_2 = 0, connector_cnt = 0;
+
+ dm->secure_display_ctx.phy_mapping_updated = false;
+
+ mutex_lock(&ddev->mode_config.mutex);
+ drm_connector_list_iter_begin(ddev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+
+ if (connector->status != connector_status_connected)
+ continue;
+
+ if (idx >= AMDGPU_DM_MAX_CRTC) {
+ DRM_WARN("%s connected connectors exceed max crtc\n", __func__);
+ mutex_unlock(&ddev->mode_config.mutex);
+ return;
+ }
+
+ aconnector = to_amdgpu_dm_connector(connector);
+
+ sort_connector[idx] = aconnector;
+ idx++;
+ connector_cnt++;
+ }
+ drm_connector_list_iter_end(&iter);
+
+ /* sort connectors by link_enc_hw_instance first */
+ for (idx = connector_cnt; idx > 1 ; idx--) {
+ for (idx_2 = 0; idx_2 < (idx - 1); idx_2++) {
+ if (sort_connector[idx_2]->dc_link->link_enc_hw_inst >
+ sort_connector[idx_2 + 1]->dc_link->link_enc_hw_inst)
+ swap(sort_connector[idx_2], sort_connector[idx_2 + 1]);
+ }
+ }
+
+ /*
+ * Sort mst connectors by RAD. mst connectors with the same enc_hw_instance are already
+ * sorted together above.
+ */
+ for (idx = 0; idx < connector_cnt; /*Do nothing*/) {
+ if (sort_connector[idx]->mst_root) {
+ uint8_t i, j, k;
+ uint8_t mst_con_cnt = 1;
+
+ for (idx_2 = (idx + 1); idx_2 < connector_cnt; idx_2++) {
+ if (sort_connector[idx_2]->mst_root == sort_connector[idx]->mst_root)
+ mst_con_cnt++;
+ else
+ break;
+ }
+
+ for (i = mst_con_cnt; i > 1; i--) {
+ for (j = idx; j < (idx + i - 2); j++) {
+ int mstb_lct = sort_connector[j]->mst_output_port->parent->lct;
+ int next_mstb_lct = sort_connector[j + 1]->mst_output_port->parent->lct;
+ u8 *rad;
+ u8 *next_rad;
+ bool swap = false;
+
+ /* Sort by mst tree depth first. Then compare RAD if depth is the same*/
+ if (mstb_lct > next_mstb_lct) {
+ swap = true;
+ } else if (mstb_lct == next_mstb_lct) {
+ if (mstb_lct == 1) {
+ if (sort_connector[j]->mst_output_port->port_num > sort_connector[j + 1]->mst_output_port->port_num)
+ swap = true;
+ } else if (mstb_lct > 1) {
+ rad = sort_connector[j]->mst_output_port->parent->rad;
+ next_rad = sort_connector[j + 1]->mst_output_port->parent->rad;
+
+ for (k = 0; k < mstb_lct - 1; k++) {
+ int shift = (k % 2) ? 0 : 4;
+ int port_num = (rad[k / 2] >> shift) & 0xf;
+ int next_port_num = (next_rad[k / 2] >> shift) & 0xf;
+
+ if (port_num > next_port_num) {
+ swap = true;
+ break;
+ }
+ }
+ } else {
+ DRM_ERROR("MST LCT shouldn't be set as < 1");
+ mutex_unlock(&ddev->mode_config.mutex);
+ return;
+ }
+ }
+
+ if (swap)
+ swap(sort_connector[j], sort_connector[j + 1]);
+ }
+ }
+
+ idx += mst_con_cnt;
+ } else {
+ idx++;
+ }
+ }
+
+ /* Complete sorting. Assign relavant result to dm->secure_display_ctx.phy_id_mapping[]*/
+ memset(dm->secure_display_ctx.phy_id_mapping, 0, sizeof(dm->secure_display_ctx.phy_id_mapping));
+ for (idx = 0; idx < connector_cnt; idx++) {
+ aconnector = sort_connector[idx];
+
+ dm->secure_display_ctx.phy_id_mapping[idx].assigned = true;
+ dm->secure_display_ctx.phy_id_mapping[idx].is_mst = false;
+ dm->secure_display_ctx.phy_id_mapping[idx].enc_hw_inst = aconnector->dc_link->link_enc_hw_inst;
+
+ if (sort_connector[idx]->mst_root) {
+ dm->secure_display_ctx.phy_id_mapping[idx].is_mst = true;
+ dm->secure_display_ctx.phy_id_mapping[idx].lct = aconnector->mst_output_port->parent->lct;
+ dm->secure_display_ctx.phy_id_mapping[idx].port_num = aconnector->mst_output_port->port_num;
+ memcpy(dm->secure_display_ctx.phy_id_mapping[idx].rad,
+ aconnector->mst_output_port->parent->rad, sizeof(aconnector->mst_output_port->parent->rad));
+ }
+ }
+ mutex_unlock(&ddev->mode_config.mutex);
+
+ dm->secure_display_ctx.phy_id_mapping_cnt = connector_cnt;
+ dm->secure_display_ctx.phy_mapping_updated = true;
+}
+
+static bool get_phy_id(struct amdgpu_display_manager *dm,
+ struct amdgpu_dm_connector *aconnector, uint8_t *phy_id)
+{
+ int idx, idx_2;
+ bool found = false;
+
+ /*
+ * Assume secure display start after all connectors are probed. The connection
+ * config is static as well
+ */
+ if (!dm->secure_display_ctx.phy_mapping_updated) {
+ DRM_WARN("%s Should update the phy id table before get it's value", __func__);
+ return false;
+ }
+
+ for (idx = 0; idx < dm->secure_display_ctx.phy_id_mapping_cnt; idx++) {
+ if (!dm->secure_display_ctx.phy_id_mapping[idx].assigned) {
+ DRM_ERROR("phy_id_mapping[%d] should be assigned", idx);
+ return false;
+ }
+
+ if (aconnector->dc_link->link_enc_hw_inst ==
+ dm->secure_display_ctx.phy_id_mapping[idx].enc_hw_inst) {
+ if (!dm->secure_display_ctx.phy_id_mapping[idx].is_mst) {
+ found = true;
+ goto out;
+ } else {
+ /* Could caused by wrongly pass mst root connector */
+ if (!aconnector->mst_output_port) {
+ DRM_ERROR("%s Check mst case but connector without a port assigned", __func__);
+ return false;
+ }
+
+ if (aconnector->mst_root &&
+ aconnector->mst_root->mst_mgr.mst_primary == NULL) {
+ DRM_WARN("%s pass in a stale mst connector", __func__);
+ }
+
+ if (aconnector->mst_output_port->parent->lct == dm->secure_display_ctx.phy_id_mapping[idx].lct &&
+ aconnector->mst_output_port->port_num == dm->secure_display_ctx.phy_id_mapping[idx].port_num) {
+ if (aconnector->mst_output_port->parent->lct == 1) {
+ found = true;
+ goto out;
+ } else if (aconnector->mst_output_port->parent->lct > 1) {
+ /* Check RAD */
+ for (idx_2 = 0; idx_2 < aconnector->mst_output_port->parent->lct - 1; idx_2++) {
+ int shift = (idx_2 % 2) ? 0 : 4;
+ int port_num = (aconnector->mst_output_port->parent->rad[idx_2 / 2] >> shift) & 0xf;
+ int port_num2 = (dm->secure_display_ctx.phy_id_mapping[idx].rad[idx_2 / 2] >> shift) & 0xf;
+
+ if (port_num != port_num2)
+ break;
+ }
+
+ if (idx_2 == aconnector->mst_output_port->parent->lct - 1) {
+ found = true;
+ goto out;
+ }
+ } else {
+ DRM_ERROR("lCT should be >= 1");
+ return false;
+ }
+ }
+ }
+ }
+ }
+
+out:
+ if (found) {
+ DRM_DEBUG_DRIVER("Associated secure display PHY ID as %d", idx);
+ *phy_id = idx;
+ } else {
+ DRM_WARN("Can't find associated phy ID");
+ return false;
+ }
+
+ return true;
+}
+
+static void amdgpu_dm_set_crc_window_default(struct drm_crtc *crtc, struct dc_stream_state *stream)
{
struct drm_device *drm_dev = crtc->dev;
+ struct amdgpu_display_manager *dm = &drm_to_adev(drm_dev)->dm;
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
+ struct amdgpu_dm_connector *aconnector;
+ bool was_activated;
+ uint8_t phy_id;
+ unsigned long flags;
+ int i;
- spin_lock_irq(&drm_dev->event_lock);
- acrtc->dm_irq_params.crc_window.x_start = 0;
- acrtc->dm_irq_params.crc_window.y_start = 0;
- acrtc->dm_irq_params.crc_window.x_end = 0;
- acrtc->dm_irq_params.crc_window.y_end = 0;
- acrtc->dm_irq_params.crc_window.activated = false;
- acrtc->dm_irq_params.crc_window.update_win = false;
- acrtc->dm_irq_params.crc_window.skip_frame_cnt = 0;
- spin_unlock_irq(&drm_dev->event_lock);
+ spin_lock_irqsave(&drm_dev->event_lock, flags);
+ was_activated = acrtc->dm_irq_params.crc_window_activated;
+ for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) {
+ acrtc->dm_irq_params.window_param[i].x_start = 0;
+ acrtc->dm_irq_params.window_param[i].y_start = 0;
+ acrtc->dm_irq_params.window_param[i].x_end = 0;
+ acrtc->dm_irq_params.window_param[i].y_end = 0;
+ acrtc->dm_irq_params.window_param[i].enable = false;
+ acrtc->dm_irq_params.window_param[i].update_win = false;
+ acrtc->dm_irq_params.window_param[i].skip_frame_cnt = 0;
+ }
+ acrtc->dm_irq_params.crc_window_activated = false;
+ spin_unlock_irqrestore(&drm_dev->event_lock, flags);
+
+ /* Disable secure_display if it was enabled */
+ if (was_activated && dm->secure_display_ctx.op_mode == LEGACY_MODE) {
+ /* stop ROI update on this crtc */
+ flush_work(&dm->secure_display_ctx.crtc_ctx[crtc->index].notify_ta_work);
+ flush_work(&dm->secure_display_ctx.crtc_ctx[crtc->index].forward_roi_work);
+ aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
+
+ if (aconnector && get_phy_id(dm, aconnector, &phy_id)) {
+ if (dm->secure_display_ctx.support_mul_roi)
+ dc_stream_forward_multiple_crc_window(stream, NULL, phy_id, true);
+ else
+ dc_stream_forward_crc_window(stream, NULL, phy_id, true);
+ } else {
+ DRM_DEBUG_DRIVER("%s Can't find matching phy id", __func__);
+ }
+ }
}
static void amdgpu_dm_crtc_notify_ta_to_read(struct work_struct *work)
{
- struct crc_rd_work *crc_rd_wrk;
- struct amdgpu_device *adev;
+ struct secure_display_crtc_context *crtc_ctx;
struct psp_context *psp;
- struct securedisplay_cmd *securedisplay_cmd;
+ struct ta_securedisplay_cmd *securedisplay_cmd;
struct drm_crtc *crtc;
- uint8_t phy_id;
+ struct dc_stream_state *stream;
+ struct amdgpu_dm_connector *aconnector;
+ uint8_t phy_inst;
+ struct amdgpu_display_manager *dm;
+ struct crc_data crc_cpy[MAX_CRC_WINDOW_NUM];
+ unsigned long flags;
+ uint8_t roi_idx = 0;
int ret;
+ int i;
- crc_rd_wrk = container_of(work, struct crc_rd_work, notify_ta_work);
- spin_lock_irq(&crc_rd_wrk->crc_rd_work_lock);
- crtc = crc_rd_wrk->crtc;
+ crtc_ctx = container_of(work, struct secure_display_crtc_context, notify_ta_work);
+ crtc = crtc_ctx->crtc;
- if (!crtc) {
- spin_unlock_irq(&crc_rd_wrk->crc_rd_work_lock);
+ if (!crtc)
+ return;
+
+ psp = &drm_to_adev(crtc->dev)->psp;
+
+ if (!psp->securedisplay_context.context.initialized) {
+ DRM_DEBUG_DRIVER("Secure Display fails to notify PSP TA\n");
return;
}
- adev = drm_to_adev(crtc->dev);
- psp = &adev->psp;
- phy_id = crc_rd_wrk->phy_inst;
- spin_unlock_irq(&crc_rd_wrk->crc_rd_work_lock);
-
- psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,
- TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC);
- securedisplay_cmd->securedisplay_in_message.send_roi_crc.phy_id =
- phy_id;
- ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC);
+ dm = &drm_to_adev(crtc->dev)->dm;
+ stream = to_amdgpu_crtc(crtc)->dm_irq_params.stream;
+ aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
+ if (!aconnector)
+ return;
+
+ mutex_lock(&crtc->dev->mode_config.mutex);
+ if (!get_phy_id(dm, aconnector, &phy_inst)) {
+ DRM_WARN("%s Can't find mapping phy id!", __func__);
+ mutex_unlock(&crtc->dev->mode_config.mutex);
+ return;
+ }
+ mutex_unlock(&crtc->dev->mode_config.mutex);
+
+ spin_lock_irqsave(&crtc->dev->event_lock, flags);
+ memcpy(crc_cpy, crtc_ctx->crc_info.crc, sizeof(struct crc_data) * MAX_CRC_WINDOW_NUM);
+ spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+
+ /* need lock for multiple crtcs to use the command buffer */
+ mutex_lock(&psp->securedisplay_context.mutex);
+ /* PSP TA is expected to finish data transmission over I2C within current frame,
+ * even there are up to 4 crtcs request to send in this frame.
+ */
+ if (dm->secure_display_ctx.support_mul_roi) {
+ psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,
+ TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2);
+
+ securedisplay_cmd->securedisplay_in_message.send_roi_crc_v2.phy_id = phy_inst;
+
+ for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) {
+ if (crc_cpy[i].crc_ready)
+ roi_idx |= 1 << i;
+ }
+ securedisplay_cmd->securedisplay_in_message.send_roi_crc_v2.roi_idx = roi_idx;
+
+ ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2);
+ } else {
+ psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,
+ TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC);
+
+ securedisplay_cmd->securedisplay_in_message.send_roi_crc.phy_id = phy_inst;
+
+ ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC);
+ }
+
if (!ret) {
- if (securedisplay_cmd->status != TA_SECUREDISPLAY_STATUS__SUCCESS) {
+ if (securedisplay_cmd->status != TA_SECUREDISPLAY_STATUS__SUCCESS)
psp_securedisplay_parse_resp_status(psp, securedisplay_cmd->status);
- }
}
+
+ mutex_unlock(&psp->securedisplay_context.mutex);
+}
+
+static void
+amdgpu_dm_forward_crc_window(struct work_struct *work)
+{
+ struct secure_display_crtc_context *crtc_ctx;
+ struct amdgpu_display_manager *dm;
+ struct drm_crtc *crtc;
+ struct dc_stream_state *stream;
+ struct amdgpu_dm_connector *aconnector;
+ struct crc_window roi_cpy[MAX_CRC_WINDOW_NUM];
+ unsigned long flags;
+ uint8_t phy_id;
+
+ crtc_ctx = container_of(work, struct secure_display_crtc_context, forward_roi_work);
+ crtc = crtc_ctx->crtc;
+
+ if (!crtc)
+ return;
+
+ dm = &drm_to_adev(crtc->dev)->dm;
+ stream = to_amdgpu_crtc(crtc)->dm_irq_params.stream;
+ aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
+
+ if (!aconnector)
+ return;
+
+ mutex_lock(&crtc->dev->mode_config.mutex);
+ if (!get_phy_id(dm, aconnector, &phy_id)) {
+ DRM_WARN("%s Can't find mapping phy id!", __func__);
+ mutex_unlock(&crtc->dev->mode_config.mutex);
+ return;
+ }
+ mutex_unlock(&crtc->dev->mode_config.mutex);
+
+ spin_lock_irqsave(&crtc->dev->event_lock, flags);
+ memcpy(roi_cpy, crtc_ctx->roi, sizeof(struct crc_window) * MAX_CRC_WINDOW_NUM);
+ spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+
+ mutex_lock(&dm->dc_lock);
+ if (dm->secure_display_ctx.support_mul_roi)
+ dc_stream_forward_multiple_crc_window(stream, roi_cpy,
+ phy_id, false);
+ else
+ dc_stream_forward_crc_window(stream, &roi_cpy[0].rect,
+ phy_id, false);
+ mutex_unlock(&dm->dc_lock);
}
bool amdgpu_dm_crc_window_is_activated(struct drm_crtc *crtc)
@@ -142,7 +471,7 @@ bool amdgpu_dm_crc_window_is_activated(struct drm_crtc *crtc)
bool ret = false;
spin_lock_irq(&drm_dev->event_lock);
- ret = acrtc->dm_irq_params.crc_window.activated;
+ ret = acrtc->dm_irq_params.crc_window_activated;
spin_unlock_irq(&drm_dev->event_lock);
return ret;
@@ -180,24 +509,14 @@ int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc,
mutex_lock(&adev->dm.dc_lock);
- /* Enable CRTC CRC generation if necessary. */
+ /* For PSR1, check that the panel has exited PSR */
+ if (stream_state->link->psr_settings.psr_version < DC_PSR_VERSION_SU_1)
+ amdgpu_dm_psr_wait_disable(stream_state);
+
+ /* Enable or disable CRTC CRC generation */
if (dm_is_crc_source_crtc(source) || source == AMDGPU_DM_PIPE_CRC_SOURCE_NONE) {
-#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
- if (!enable) {
- if (adev->dm.crc_rd_wrk) {
- flush_work(&adev->dm.crc_rd_wrk->notify_ta_work);
- spin_lock_irq(&adev->dm.crc_rd_wrk->crc_rd_work_lock);
- if (adev->dm.crc_rd_wrk->crtc == crtc) {
- dc_stream_stop_dmcu_crc_win_update(stream_state->ctx->dc,
- dm_crtc_state->stream);
- adev->dm.crc_rd_wrk->crtc = NULL;
- }
- spin_unlock_irq(&adev->dm.crc_rd_wrk->crc_rd_work_lock);
- }
- }
-#endif
if (!dc_stream_configure_crc(stream_state->ctx->dc,
- stream_state, NULL, enable, enable)) {
+ stream_state, NULL, enable, enable, 0, true)) {
ret = -EINVAL;
goto unlock;
}
@@ -228,6 +547,10 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name)
struct drm_crtc_commit *commit;
struct dm_crtc_state *crtc_state;
struct drm_device *drm_dev = crtc->dev;
+#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+ struct amdgpu_display_manager *dm = &adev->dm;
+#endif
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
struct drm_dp_aux *aux = NULL;
bool enable = false;
@@ -296,6 +619,9 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name)
if (!connector->state || connector->state->crtc != crtc)
continue;
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
aconn = to_amdgpu_dm_connector(connector);
break;
}
@@ -307,7 +633,7 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name)
goto cleanup;
}
- aux = (aconn->port) ? &aconn->port->aux : &aconn->dm_dp_aux.aux;
+ aux = (aconn->mst_output_port) ? &aconn->mst_output_port->aux : &aconn->dm_dp_aux.aux;
if (!aux) {
DRM_DEBUG_DRIVER("No dp aux for amd connector\n");
@@ -324,15 +650,6 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name)
}
-#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
- amdgpu_dm_set_crc_window_default(crtc);
-#endif
-
- if (amdgpu_dm_crtc_configure_crc_source(crtc, crtc_state, source)) {
- ret = -EINVAL;
- goto cleanup;
- }
-
/*
* Reading the CRC requires the vblank interrupt handler to be
* enabled. Keep a reference until CRC capture stops.
@@ -342,7 +659,19 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name)
ret = drm_crtc_vblank_get(crtc);
if (ret)
goto cleanup;
+ }
+
+#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
+ /* Reset secure_display when we change crc source from debugfs */
+ amdgpu_dm_set_crc_window_default(crtc, crtc_state->stream);
+#endif
+
+ if (amdgpu_dm_crtc_configure_crc_source(crtc, crtc_state, source)) {
+ ret = -EINVAL;
+ goto cleanup;
+ }
+ if (!enabled && enable) {
if (dm_is_crc_source_dprx(source)) {
if (drm_dp_start_crc(aux, crtc)) {
DRM_DEBUG_DRIVER("dp start crc failed\n");
@@ -368,6 +697,13 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name)
/* Reset crc_skipped on dm state */
crtc_state->crc_skip_count = 0;
+#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
+ /* Initialize phy id mapping table for secure display*/
+ if (dm->secure_display_ctx.op_mode == LEGACY_MODE &&
+ !dm->secure_display_ctx.phy_mapping_updated)
+ update_phy_id_mapping(adev);
+#endif
+
cleanup:
if (commit)
drm_crtc_commit_put(commit);
@@ -422,7 +758,7 @@ void amdgpu_dm_crtc_handle_crc_irq(struct drm_crtc *crtc)
}
if (dm_is_crc_source_crtc(cur_crc_src)) {
- if (!dc_stream_get_crc(stream_state->ctx->dc, stream_state,
+ if (!dc_stream_get_crc(stream_state->ctx->dc, stream_state, 0,
&crcs[0], &crcs[1], &crcs[2]))
return;
@@ -434,19 +770,21 @@ void amdgpu_dm_crtc_handle_crc_irq(struct drm_crtc *crtc)
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
void amdgpu_dm_crtc_handle_crc_window_irq(struct drm_crtc *crtc)
{
- struct dc_stream_state *stream_state;
struct drm_device *drm_dev = NULL;
enum amdgpu_dm_pipe_crc_source cur_crc_src;
struct amdgpu_crtc *acrtc = NULL;
struct amdgpu_device *adev = NULL;
- struct crc_rd_work *crc_rd_wrk = NULL;
- struct crc_params *crc_window = NULL, tmp_window;
- unsigned long flags1, flags2;
- struct crtc_position position;
- uint32_t v_blank;
- uint32_t v_back_porch;
- uint32_t crc_window_latch_up_line;
- struct dc_crtc_timing *timing_out;
+ struct secure_display_crtc_context *crtc_ctx = NULL;
+ bool reset_crc_frame_count[MAX_CRC_WINDOW_NUM] = {false};
+ uint32_t crc_r[MAX_CRC_WINDOW_NUM] = {0};
+ uint32_t crc_g[MAX_CRC_WINDOW_NUM] = {0};
+ uint32_t crc_b[MAX_CRC_WINDOW_NUM] = {0};
+ unsigned long flags1;
+ bool forward_roi_change = false;
+ bool notify_ta = false;
+ bool all_crc_ready = true;
+ struct dc_stream_state *stream_state;
+ int i;
if (crtc == NULL)
return;
@@ -454,97 +792,160 @@ void amdgpu_dm_crtc_handle_crc_window_irq(struct drm_crtc *crtc)
acrtc = to_amdgpu_crtc(crtc);
adev = drm_to_adev(crtc->dev);
drm_dev = crtc->dev;
+ stream_state = to_dm_crtc_state(crtc->state)->stream;
spin_lock_irqsave(&drm_dev->event_lock, flags1);
- stream_state = acrtc->dm_irq_params.stream;
cur_crc_src = acrtc->dm_irq_params.crc_src;
- timing_out = &stream_state->timing;
/* Early return if CRC capture is not enabled. */
- if (!amdgpu_dm_is_valid_crc_source(cur_crc_src))
- goto cleanup;
+ if (!amdgpu_dm_is_valid_crc_source(cur_crc_src) ||
+ !dm_is_crc_source_crtc(cur_crc_src)) {
+ spin_unlock_irqrestore(&drm_dev->event_lock, flags1);
+ return;
+ }
- if (dm_is_crc_source_crtc(cur_crc_src)) {
- if (acrtc->dm_irq_params.crc_window.activated) {
- if (acrtc->dm_irq_params.crc_window.update_win) {
- if (acrtc->dm_irq_params.crc_window.skip_frame_cnt) {
- acrtc->dm_irq_params.crc_window.skip_frame_cnt -= 1;
- goto cleanup;
- }
- crc_window = &tmp_window;
-
- tmp_window.windowa_x_start =
- acrtc->dm_irq_params.crc_window.x_start;
- tmp_window.windowa_y_start =
- acrtc->dm_irq_params.crc_window.y_start;
- tmp_window.windowa_x_end =
- acrtc->dm_irq_params.crc_window.x_end;
- tmp_window.windowa_y_end =
- acrtc->dm_irq_params.crc_window.y_end;
- tmp_window.windowb_x_start =
- acrtc->dm_irq_params.crc_window.x_start;
- tmp_window.windowb_y_start =
- acrtc->dm_irq_params.crc_window.y_start;
- tmp_window.windowb_x_end =
- acrtc->dm_irq_params.crc_window.x_end;
- tmp_window.windowb_y_end =
- acrtc->dm_irq_params.crc_window.y_end;
-
- dc_stream_forward_dmcu_crc_window(stream_state->ctx->dc,
- stream_state, crc_window);
-
- acrtc->dm_irq_params.crc_window.update_win = false;
-
- dc_stream_get_crtc_position(stream_state->ctx->dc, &stream_state, 1,
- &position.vertical_count,
- &position.nominal_vcount);
-
- v_blank = timing_out->v_total - timing_out->v_border_top -
- timing_out->v_addressable - timing_out->v_border_bottom;
-
- v_back_porch = v_blank - timing_out->v_front_porch -
- timing_out->v_sync_width;
-
- crc_window_latch_up_line = v_back_porch + timing_out->v_sync_width;
-
- /* take 3 lines margin*/
- if ((position.vertical_count + 3) >= crc_window_latch_up_line)
- acrtc->dm_irq_params.crc_window.skip_frame_cnt = 1;
- else
- acrtc->dm_irq_params.crc_window.skip_frame_cnt = 0;
- } else {
- if (acrtc->dm_irq_params.crc_window.skip_frame_cnt == 0) {
- if (adev->dm.crc_rd_wrk) {
- crc_rd_wrk = adev->dm.crc_rd_wrk;
- spin_lock_irqsave(&crc_rd_wrk->crc_rd_work_lock, flags2);
- crc_rd_wrk->phy_inst =
- stream_state->link->link_enc_hw_inst;
- spin_unlock_irqrestore(&crc_rd_wrk->crc_rd_work_lock, flags2);
- schedule_work(&crc_rd_wrk->notify_ta_work);
- }
- } else {
- acrtc->dm_irq_params.crc_window.skip_frame_cnt -= 1;
- }
- }
+ if (!acrtc->dm_irq_params.crc_window_activated) {
+ spin_unlock_irqrestore(&drm_dev->event_lock, flags1);
+ return;
+ }
+
+ crtc_ctx = &adev->dm.secure_display_ctx.crtc_ctx[acrtc->crtc_id];
+ if (WARN_ON(crtc_ctx->crtc != crtc)) {
+ /* We have set the crtc when creating secure_display_crtc_context,
+ * don't expect it to be changed here.
+ */
+ crtc_ctx->crtc = crtc;
+ }
+
+ for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) {
+ struct crc_params crc_window = {
+ .windowa_x_start = acrtc->dm_irq_params.window_param[i].x_start,
+ .windowa_y_start = acrtc->dm_irq_params.window_param[i].y_start,
+ .windowa_x_end = acrtc->dm_irq_params.window_param[i].x_end,
+ .windowa_y_end = acrtc->dm_irq_params.window_param[i].y_end,
+ .windowb_x_start = acrtc->dm_irq_params.window_param[i].x_start,
+ .windowb_y_start = acrtc->dm_irq_params.window_param[i].y_start,
+ .windowb_x_end = acrtc->dm_irq_params.window_param[i].x_end,
+ .windowb_y_end = acrtc->dm_irq_params.window_param[i].y_end,
+ };
+
+ crtc_ctx->roi[i].enable = acrtc->dm_irq_params.window_param[i].enable;
+
+ if (!acrtc->dm_irq_params.window_param[i].enable) {
+ crtc_ctx->crc_info.crc[i].crc_ready = false;
+ continue;
+ }
+
+ if (acrtc->dm_irq_params.window_param[i].skip_frame_cnt) {
+ acrtc->dm_irq_params.window_param[i].skip_frame_cnt -= 1;
+ crtc_ctx->crc_info.crc[i].crc_ready = false;
+ continue;
+ }
+
+ if (acrtc->dm_irq_params.window_param[i].update_win) {
+ crtc_ctx->roi[i].rect.x = crc_window.windowa_x_start;
+ crtc_ctx->roi[i].rect.y = crc_window.windowa_y_start;
+ crtc_ctx->roi[i].rect.width = crc_window.windowa_x_end -
+ crc_window.windowa_x_start;
+ crtc_ctx->roi[i].rect.height = crc_window.windowa_y_end -
+ crc_window.windowa_y_start;
+
+ if (adev->dm.secure_display_ctx.op_mode == LEGACY_MODE)
+ /* forward task to dmub to update ROI */
+ forward_roi_change = true;
+ else if (adev->dm.secure_display_ctx.op_mode == DISPLAY_CRC_MODE)
+ /* update ROI via dm*/
+ dc_stream_configure_crc(stream_state->ctx->dc, stream_state,
+ &crc_window, true, true, i, false);
+
+ reset_crc_frame_count[i] = true;
+
+ acrtc->dm_irq_params.window_param[i].update_win = false;
+
+ /* Statically skip 1 frame, because we may need to wait below things
+ * before sending ROI to dmub:
+ * 1. We defer the work by using system workqueue.
+ * 2. We may need to wait for dc_lock before accessing dmub.
+ */
+ acrtc->dm_irq_params.window_param[i].skip_frame_cnt = 1;
+ crtc_ctx->crc_info.crc[i].crc_ready = false;
+ } else {
+ if (!dc_stream_get_crc(stream_state->ctx->dc, stream_state, i,
+ &crc_r[i], &crc_g[i], &crc_b[i]))
+ DRM_ERROR("Secure Display: fail to get crc from engine %d\n", i);
+
+ if (adev->dm.secure_display_ctx.op_mode == LEGACY_MODE)
+ /* forward task to psp to read ROI/CRC and output via I2C */
+ notify_ta = true;
+ else if (adev->dm.secure_display_ctx.op_mode == DISPLAY_CRC_MODE)
+ /* Avoid ROI window get changed, keep overwriting. */
+ dc_stream_configure_crc(stream_state->ctx->dc, stream_state,
+ &crc_window, true, true, i, false);
+
+ /* crc ready for psp to read out */
+ crtc_ctx->crc_info.crc[i].crc_ready = true;
}
}
-cleanup:
spin_unlock_irqrestore(&drm_dev->event_lock, flags1);
+
+ if (forward_roi_change)
+ schedule_work(&crtc_ctx->forward_roi_work);
+
+ if (notify_ta)
+ schedule_work(&crtc_ctx->notify_ta_work);
+
+ spin_lock_irqsave(&crtc_ctx->crc_info.lock, flags1);
+ for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) {
+ crtc_ctx->crc_info.crc[i].crc_R = crc_r[i];
+ crtc_ctx->crc_info.crc[i].crc_G = crc_g[i];
+ crtc_ctx->crc_info.crc[i].crc_B = crc_b[i];
+
+ if (!crtc_ctx->roi[i].enable) {
+ crtc_ctx->crc_info.crc[i].frame_count = 0;
+ continue;
+ }
+
+ if (!crtc_ctx->crc_info.crc[i].crc_ready)
+ all_crc_ready = false;
+
+ if (reset_crc_frame_count[i] || crtc_ctx->crc_info.crc[i].frame_count == UINT_MAX)
+ /* Reset the reference frame count after user update the ROI
+ * or it reaches the maximum value.
+ */
+ crtc_ctx->crc_info.crc[i].frame_count = 0;
+ else
+ crtc_ctx->crc_info.crc[i].frame_count += 1;
+ }
+ spin_unlock_irqrestore(&crtc_ctx->crc_info.lock, flags1);
+
+ if (all_crc_ready)
+ complete_all(&crtc_ctx->crc_info.completion);
}
-struct crc_rd_work *amdgpu_dm_crtc_secure_display_create_work(void)
+void amdgpu_dm_crtc_secure_display_create_contexts(struct amdgpu_device *adev)
{
- struct crc_rd_work *crc_rd_wrk = NULL;
+ struct secure_display_crtc_context *crtc_ctx = NULL;
+ int i;
- crc_rd_wrk = kzalloc(sizeof(*crc_rd_wrk), GFP_KERNEL);
+ crtc_ctx = kcalloc(adev->mode_info.num_crtc,
+ sizeof(struct secure_display_crtc_context),
+ GFP_KERNEL);
- if (!crc_rd_wrk)
- return NULL;
+ if (!crtc_ctx) {
+ adev->dm.secure_display_ctx.crtc_ctx = NULL;
+ return;
+ }
+
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ INIT_WORK(&crtc_ctx[i].forward_roi_work, amdgpu_dm_forward_crc_window);
+ INIT_WORK(&crtc_ctx[i].notify_ta_work, amdgpu_dm_crtc_notify_ta_to_read);
+ crtc_ctx[i].crtc = &adev->mode_info.crtcs[i]->base;
+ spin_lock_init(&crtc_ctx[i].crc_info.lock);
+ }
- spin_lock_init(&crc_rd_wrk->crc_rd_work_lock);
- INIT_WORK(&crc_rd_wrk->notify_ta_work, amdgpu_dm_crtc_notify_ta_to_read);
+ adev->dm.secure_display_ctx.crtc_ctx = crtc_ctx;
- return crc_rd_wrk;
+ adev->dm.secure_display_ctx.op_mode = DISPLAY_CRC_MODE;
}
#endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h
index f07850db60a6..95bdb8699d7f 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: MIT */
/*
* Copyright 2019 Advanced Micro Devices, Inc.
*
@@ -40,25 +41,76 @@ enum amdgpu_dm_pipe_crc_source {
};
#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
-struct crc_window_parm {
+#define MAX_CRTC 6
+
+enum secure_display_mode {
+ /* via dmub + psp */
+ LEGACY_MODE = 0,
+ /* driver directly */
+ DISPLAY_CRC_MODE,
+ SECURE_DISPLAY_MODE_MAX,
+};
+
+struct phy_id_mapping {
+ bool assigned;
+ bool is_mst;
+ uint8_t enc_hw_inst;
+ u8 lct;
+ u8 port_num;
+ u8 rad[8];
+};
+
+struct crc_data {
+ uint32_t crc_R;
+ uint32_t crc_G;
+ uint32_t crc_B;
+ uint32_t frame_count;
+ bool crc_ready;
+};
+
+struct crc_info {
+ struct crc_data crc[MAX_CRC_WINDOW_NUM];
+ struct completion completion;
+ spinlock_t lock;
+};
+
+struct crc_window_param {
uint16_t x_start;
uint16_t y_start;
uint16_t x_end;
uint16_t y_end;
- /* CRC windwo is activated or not*/
- bool activated;
+ /* CRC window is activated or not*/
+ bool enable;
/* Update crc window during vertical blank or not */
bool update_win;
/* skip reading/writing for few frames */
int skip_frame_cnt;
};
-struct crc_rd_work {
+struct secure_display_crtc_context {
+ /* work to notify PSP TA*/
struct work_struct notify_ta_work;
- /* To protect crc_rd_work carried fields*/
- spinlock_t crc_rd_work_lock;
+
+ /* work to forward ROI to dmcu/dmub */
+ struct work_struct forward_roi_work;
+
struct drm_crtc *crtc;
- uint8_t phy_inst;
+
+ /* Region of Interest (ROI) */
+ struct crc_window roi[MAX_CRC_WINDOW_NUM];
+
+ struct crc_info crc_info;
+};
+
+struct secure_display_context {
+
+ struct secure_display_crtc_context *crtc_ctx;
+ /* Whether dmub support multiple ROI setting */
+ bool support_mul_roi;
+ enum secure_display_mode op_mode;
+ bool phy_mapping_updated;
+ int phy_id_mapping_cnt;
+ struct phy_id_mapping phy_id_mapping[MAX_CRTC];
};
#endif
@@ -90,11 +142,11 @@ void amdgpu_dm_crtc_handle_crc_irq(struct drm_crtc *crtc);
#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
bool amdgpu_dm_crc_window_is_activated(struct drm_crtc *crtc);
void amdgpu_dm_crtc_handle_crc_window_irq(struct drm_crtc *crtc);
-struct crc_rd_work *amdgpu_dm_crtc_secure_display_create_work(void);
+void amdgpu_dm_crtc_secure_display_create_contexts(struct amdgpu_device *adev);
#else
#define amdgpu_dm_crc_window_is_activated(x)
#define amdgpu_dm_crtc_handle_crc_window_irq(x)
-#define amdgpu_dm_crtc_secure_display_create_work()
+#define amdgpu_dm_crtc_secure_display_create_contexts(x)
#endif
#endif /* AMD_DAL_DEV_AMDGPU_DM_AMDGPU_DM_CRC_H_ */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
new file mode 100644
index 000000000000..697e232acebf
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
@@ -0,0 +1,804 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#include <drm/drm_vblank.h>
+#include <drm/drm_atomic_helper.h>
+
+#include "dc.h"
+#include "amdgpu.h"
+#include "amdgpu_dm_psr.h"
+#include "amdgpu_dm_replay.h"
+#include "amdgpu_dm_crtc.h"
+#include "amdgpu_dm_plane.h"
+#include "amdgpu_dm_trace.h"
+#include "amdgpu_dm_debugfs.h"
+
+#define HPD_DETECTION_PERIOD_uS 2000000
+#define HPD_DETECTION_TIME_uS 100000
+
+void amdgpu_dm_crtc_handle_vblank(struct amdgpu_crtc *acrtc)
+{
+ struct drm_crtc *crtc = &acrtc->base;
+ struct drm_device *dev = crtc->dev;
+ unsigned long flags;
+
+ drm_crtc_handle_vblank(crtc);
+
+ spin_lock_irqsave(&dev->event_lock, flags);
+
+ /* Send completion event for cursor-only commits */
+ if (acrtc->event && acrtc->pflip_status != AMDGPU_FLIP_SUBMITTED) {
+ drm_crtc_send_vblank_event(crtc, acrtc->event);
+ drm_crtc_vblank_put(crtc);
+ acrtc->event = NULL;
+ }
+
+ spin_unlock_irqrestore(&dev->event_lock, flags);
+}
+
+bool amdgpu_dm_crtc_modeset_required(struct drm_crtc_state *crtc_state,
+ struct dc_stream_state *new_stream,
+ struct dc_stream_state *old_stream)
+{
+ return crtc_state->active && drm_atomic_crtc_needs_modeset(crtc_state);
+}
+
+bool amdgpu_dm_crtc_vrr_active_irq(struct amdgpu_crtc *acrtc)
+
+{
+ return acrtc->dm_irq_params.freesync_config.state ==
+ VRR_STATE_ACTIVE_VARIABLE ||
+ acrtc->dm_irq_params.freesync_config.state ==
+ VRR_STATE_ACTIVE_FIXED;
+}
+
+int amdgpu_dm_crtc_set_vupdate_irq(struct drm_crtc *crtc, bool enable)
+{
+ enum dc_irq_source irq_source;
+ struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
+ struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+ int rc;
+
+ if (acrtc->otg_inst == -1)
+ return 0;
+
+ irq_source = IRQ_TYPE_VUPDATE + acrtc->otg_inst;
+
+ rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY;
+
+ DRM_DEBUG_VBL("crtc %d - vupdate irq %sabling: r=%d\n",
+ acrtc->crtc_id, enable ? "en" : "dis", rc);
+ return rc;
+}
+
+bool amdgpu_dm_crtc_vrr_active(const struct dm_crtc_state *dm_state)
+{
+ return dm_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE ||
+ dm_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED;
+}
+
+/**
+ * amdgpu_dm_crtc_set_panel_sr_feature() - Manage panel self-refresh features.
+ *
+ * @vblank_work: is a pointer to a struct vblank_control_work object.
+ * @vblank_enabled: indicates whether the DRM vblank counter is currently
+ * enabled (true) or disabled (false).
+ * @allow_sr_entry: represents whether entry into the self-refresh mode is
+ * allowed (true) or not allowed (false).
+ *
+ * The DRM vblank counter enable/disable action is used as the trigger to enable
+ * or disable various panel self-refresh features:
+ *
+ * Panel Replay and PSR SU
+ * - Enable when:
+ * - VRR is disabled
+ * - vblank counter is disabled
+ * - entry is allowed: usermode demonstrates an adequate number of fast
+ * commits)
+ * - CRC capture window isn't active
+ * - Keep enabled even when vblank counter gets enabled
+ *
+ * PSR1
+ * - Enable condition same as above
+ * - Disable when vblank counter is enabled
+ */
+static void amdgpu_dm_crtc_set_panel_sr_feature(
+ struct vblank_control_work *vblank_work,
+ bool vblank_enabled, bool allow_sr_entry)
+{
+ struct dc_link *link = vblank_work->stream->link;
+ bool is_sr_active = (link->replay_settings.replay_allow_active ||
+ link->psr_settings.psr_allow_active);
+ bool is_crc_window_active = false;
+ bool vrr_active = amdgpu_dm_crtc_vrr_active_irq(vblank_work->acrtc);
+
+#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
+ is_crc_window_active =
+ amdgpu_dm_crc_window_is_activated(&vblank_work->acrtc->base);
+#endif
+
+ if (link->replay_settings.replay_feature_enabled && !vrr_active &&
+ allow_sr_entry && !is_sr_active && !is_crc_window_active) {
+ amdgpu_dm_replay_enable(vblank_work->stream, true);
+ } else if (vblank_enabled) {
+ if (link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 && is_sr_active)
+ amdgpu_dm_psr_disable(vblank_work->stream, false);
+ } else if (link->psr_settings.psr_feature_enabled && !vrr_active &&
+ allow_sr_entry && !is_sr_active && !is_crc_window_active) {
+
+ struct amdgpu_dm_connector *aconn =
+ (struct amdgpu_dm_connector *) vblank_work->stream->dm_stream_context;
+
+ if (!aconn->disallow_edp_enter_psr) {
+ struct amdgpu_display_manager *dm = vblank_work->dm;
+
+ amdgpu_dm_psr_enable(vblank_work->stream);
+ if (dm->idle_workqueue &&
+ (dm->dc->config.disable_ips == DMUB_IPS_ENABLE) &&
+ dm->dc->idle_optimizations_allowed &&
+ dm->idle_workqueue->enable &&
+ !dm->idle_workqueue->running)
+ schedule_work(&dm->idle_workqueue->work);
+ }
+ }
+}
+
+bool amdgpu_dm_is_headless(struct amdgpu_device *adev)
+{
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ struct drm_device *dev;
+ bool is_headless = true;
+
+ if (adev == NULL)
+ return true;
+
+ dev = adev->dm.ddev;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
+ if (connector->status == connector_status_connected) {
+ is_headless = false;
+ break;
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+ return is_headless;
+}
+
+static void amdgpu_dm_idle_worker(struct work_struct *work)
+{
+ struct idle_workqueue *idle_work;
+
+ idle_work = container_of(work, struct idle_workqueue, work);
+ idle_work->dm->idle_workqueue->running = true;
+
+ while (idle_work->enable) {
+ fsleep(HPD_DETECTION_PERIOD_uS);
+ mutex_lock(&idle_work->dm->dc_lock);
+ if (!idle_work->dm->dc->idle_optimizations_allowed) {
+ mutex_unlock(&idle_work->dm->dc_lock);
+ break;
+ }
+ dc_allow_idle_optimizations(idle_work->dm->dc, false);
+
+ mutex_unlock(&idle_work->dm->dc_lock);
+ fsleep(HPD_DETECTION_TIME_uS);
+ mutex_lock(&idle_work->dm->dc_lock);
+
+ if (!amdgpu_dm_is_headless(idle_work->dm->adev) &&
+ !amdgpu_dm_psr_is_active_allowed(idle_work->dm)) {
+ mutex_unlock(&idle_work->dm->dc_lock);
+ break;
+ }
+
+ if (idle_work->enable) {
+ dc_post_update_surfaces_to_stream(idle_work->dm->dc);
+ dc_allow_idle_optimizations(idle_work->dm->dc, true);
+ }
+ mutex_unlock(&idle_work->dm->dc_lock);
+ }
+ idle_work->dm->idle_workqueue->running = false;
+}
+
+struct idle_workqueue *idle_create_workqueue(struct amdgpu_device *adev)
+{
+ struct idle_workqueue *idle_work;
+
+ idle_work = kzalloc(sizeof(*idle_work), GFP_KERNEL);
+ if (ZERO_OR_NULL_PTR(idle_work))
+ return NULL;
+
+ idle_work->dm = &adev->dm;
+ idle_work->enable = false;
+ idle_work->running = false;
+ INIT_WORK(&idle_work->work, amdgpu_dm_idle_worker);
+
+ return idle_work;
+}
+
+static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work)
+{
+ struct vblank_control_work *vblank_work =
+ container_of(work, struct vblank_control_work, work);
+ struct amdgpu_display_manager *dm = vblank_work->dm;
+ struct amdgpu_device *adev = drm_to_adev(dm->ddev);
+ int r;
+
+ mutex_lock(&dm->dc_lock);
+
+ if (vblank_work->enable)
+ dm->active_vblank_irq_count++;
+ else if (dm->active_vblank_irq_count)
+ dm->active_vblank_irq_count--;
+
+ if (dm->active_vblank_irq_count > 0)
+ dc_allow_idle_optimizations(dm->dc, false);
+
+ /*
+ * Control PSR based on vblank requirements from OS
+ *
+ * If panel supports PSR SU, there's no need to disable PSR when OS is
+ * submitting fast atomic commits (we infer this by whether the OS
+ * requests vblank events). Fast atomic commits will simply trigger a
+ * full-frame-update (FFU); a specific case of selective-update (SU)
+ * where the SU region is the full hactive*vactive region. See
+ * fill_dc_dirty_rects().
+ */
+ if (vblank_work->stream && vblank_work->stream->link && vblank_work->acrtc) {
+ amdgpu_dm_crtc_set_panel_sr_feature(
+ vblank_work, vblank_work->enable,
+ vblank_work->acrtc->dm_irq_params.allow_sr_entry);
+ }
+
+ if (dm->active_vblank_irq_count == 0) {
+ dc_post_update_surfaces_to_stream(dm->dc);
+
+ r = amdgpu_dpm_pause_power_profile(adev, true);
+ if (r)
+ dev_warn(adev->dev, "failed to set default power profile mode\n");
+
+ dc_allow_idle_optimizations(dm->dc, true);
+
+ r = amdgpu_dpm_pause_power_profile(adev, false);
+ if (r)
+ dev_warn(adev->dev, "failed to restore the power profile mode\n");
+ }
+
+ mutex_unlock(&dm->dc_lock);
+
+ dc_stream_release(vblank_work->stream);
+
+ kfree(vblank_work);
+}
+
+static inline int amdgpu_dm_crtc_set_vblank(struct drm_crtc *crtc, bool enable)
+{
+ struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
+ struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+ struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc->state);
+ struct amdgpu_display_manager *dm = &adev->dm;
+ struct vblank_control_work *work;
+ int irq_type;
+ int rc = 0;
+
+ if (enable && !acrtc->base.enabled) {
+ drm_dbg_vbl(crtc->dev,
+ "Reject vblank enable on unconfigured CRTC %d (enabled=%d)\n",
+ acrtc->crtc_id, acrtc->base.enabled);
+ return -EINVAL;
+ }
+
+ irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, acrtc->crtc_id);
+
+ if (enable) {
+ struct dc *dc = adev->dm.dc;
+ struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc);
+ struct psr_settings *psr = &acrtc_state->stream->link->psr_settings;
+ struct replay_settings *pr = &acrtc_state->stream->link->replay_settings;
+ bool sr_supported = (psr->psr_version != DC_PSR_VERSION_UNSUPPORTED) ||
+ pr->config.replay_supported;
+
+ /*
+ * IPS & self-refresh feature can cause vblank counter resets between
+ * vblank disable and enable.
+ * It may cause system stuck due to waiting for the vblank counter.
+ * Call this function to estimate missed vblanks by using timestamps and
+ * update the vblank counter in DRM.
+ */
+ if (dc->caps.ips_support &&
+ dc->config.disable_ips != DMUB_IPS_DISABLE_ALL &&
+ sr_supported && vblank->config.disable_immediate)
+ drm_crtc_vblank_restore(crtc);
+ }
+
+ if (dc_supports_vrr(dm->dc->ctx->dce_version)) {
+ if (enable) {
+ /* vblank irq on -> Only need vupdate irq in vrr mode */
+ if (amdgpu_dm_crtc_vrr_active(acrtc_state))
+ rc = amdgpu_dm_crtc_set_vupdate_irq(crtc, true);
+ } else {
+ /* vblank irq off -> vupdate irq off */
+ rc = amdgpu_dm_crtc_set_vupdate_irq(crtc, false);
+ }
+ }
+
+ if (rc)
+ return rc;
+
+ /* crtc vblank or vstartup interrupt */
+ if (enable) {
+ rc = amdgpu_irq_get(adev, &adev->crtc_irq, irq_type);
+ drm_dbg_vbl(crtc->dev, "Get crtc_irq ret=%d\n", rc);
+ } else {
+ rc = amdgpu_irq_put(adev, &adev->crtc_irq, irq_type);
+ drm_dbg_vbl(crtc->dev, "Put crtc_irq ret=%d\n", rc);
+ }
+
+ if (rc)
+ return rc;
+
+ /*
+ * hubp surface flip interrupt
+ *
+ * We have no guarantee that the frontend index maps to the same
+ * backend index - some even map to more than one.
+ *
+ * TODO: Use a different interrupt or check DC itself for the mapping.
+ */
+ if (enable) {
+ rc = amdgpu_irq_get(adev, &adev->pageflip_irq, irq_type);
+ drm_dbg_vbl(crtc->dev, "Get pageflip_irq ret=%d\n", rc);
+ } else {
+ rc = amdgpu_irq_put(adev, &adev->pageflip_irq, irq_type);
+ drm_dbg_vbl(crtc->dev, "Put pageflip_irq ret=%d\n", rc);
+ }
+
+ if (rc)
+ return rc;
+
+#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
+ /* crtc vline0 interrupt, only available on DCN+ */
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0) != 0) {
+ if (enable) {
+ rc = amdgpu_irq_get(adev, &adev->vline0_irq, irq_type);
+ drm_dbg_vbl(crtc->dev, "Get vline0_irq ret=%d\n", rc);
+ } else {
+ rc = amdgpu_irq_put(adev, &adev->vline0_irq, irq_type);
+ drm_dbg_vbl(crtc->dev, "Put vline0_irq ret=%d\n", rc);
+ }
+
+ if (rc)
+ return rc;
+ }
+#endif
+
+ if (amdgpu_in_reset(adev))
+ return 0;
+
+ if (dm->vblank_control_workqueue) {
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return -ENOMEM;
+
+ INIT_WORK(&work->work, amdgpu_dm_crtc_vblank_control_worker);
+ work->dm = dm;
+ work->acrtc = acrtc;
+ work->enable = enable;
+
+ if (acrtc_state->stream) {
+ dc_stream_retain(acrtc_state->stream);
+ work->stream = acrtc_state->stream;
+ }
+
+ queue_work(dm->vblank_control_workqueue, &work->work);
+ }
+
+ return 0;
+}
+
+int amdgpu_dm_crtc_enable_vblank(struct drm_crtc *crtc)
+{
+ return amdgpu_dm_crtc_set_vblank(crtc, true);
+}
+
+void amdgpu_dm_crtc_disable_vblank(struct drm_crtc *crtc)
+{
+ amdgpu_dm_crtc_set_vblank(crtc, false);
+}
+
+static void amdgpu_dm_crtc_destroy_state(struct drm_crtc *crtc,
+ struct drm_crtc_state *state)
+{
+ struct dm_crtc_state *cur = to_dm_crtc_state(state);
+
+ /* TODO Destroy dc_stream objects are stream object is flattened */
+ if (cur->stream)
+ dc_stream_release(cur->stream);
+
+
+ __drm_atomic_helper_crtc_destroy_state(state);
+
+
+ kfree(state);
+}
+
+static struct drm_crtc_state *amdgpu_dm_crtc_duplicate_state(struct drm_crtc *crtc)
+{
+ struct dm_crtc_state *state, *cur;
+
+ cur = to_dm_crtc_state(crtc->state);
+
+ if (WARN_ON(!crtc->state))
+ return NULL;
+
+ state = kzalloc(sizeof(*state), GFP_KERNEL);
+ if (!state)
+ return NULL;
+
+ __drm_atomic_helper_crtc_duplicate_state(crtc, &state->base);
+
+ if (cur->stream) {
+ state->stream = cur->stream;
+ dc_stream_retain(state->stream);
+ }
+
+ state->active_planes = cur->active_planes;
+ state->vrr_infopacket = cur->vrr_infopacket;
+ state->abm_level = cur->abm_level;
+ state->vrr_supported = cur->vrr_supported;
+ state->freesync_config = cur->freesync_config;
+ state->cm_has_degamma = cur->cm_has_degamma;
+ state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb;
+ state->regamma_tf = cur->regamma_tf;
+ state->crc_skip_count = cur->crc_skip_count;
+ state->mpo_requested = cur->mpo_requested;
+ state->cursor_mode = cur->cursor_mode;
+ /* TODO Duplicate dc_stream after objects are stream object is flattened */
+
+ return &state->base;
+}
+
+static void amdgpu_dm_crtc_destroy(struct drm_crtc *crtc)
+{
+ drm_crtc_cleanup(crtc);
+ kfree(crtc);
+}
+
+static void amdgpu_dm_crtc_reset_state(struct drm_crtc *crtc)
+{
+ struct dm_crtc_state *state;
+
+ if (crtc->state)
+ amdgpu_dm_crtc_destroy_state(crtc, crtc->state);
+
+ state = kzalloc(sizeof(*state), GFP_KERNEL);
+ if (WARN_ON(!state))
+ return;
+
+ __drm_atomic_helper_crtc_reset(crtc, &state->base);
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc)
+{
+ crtc_debugfs_init(crtc);
+
+ return 0;
+}
+#endif
+
+#ifdef AMD_PRIVATE_COLOR
+/**
+ * dm_crtc_additional_color_mgmt - enable additional color properties
+ * @crtc: DRM CRTC
+ *
+ * This function lets the driver enable post-blending CRTC regamma transfer
+ * function property in addition to DRM CRTC gamma LUT. Default value means
+ * linear transfer function, which is the default CRTC gamma LUT behaviour
+ * without this property.
+ */
+static void
+dm_crtc_additional_color_mgmt(struct drm_crtc *crtc)
+{
+ struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+
+ if (adev->dm.dc->caps.color.mpc.ogam_ram)
+ drm_object_attach_property(&crtc->base,
+ adev->mode_info.regamma_tf_property,
+ AMDGPU_TRANSFER_FUNCTION_DEFAULT);
+}
+
+static int
+amdgpu_dm_atomic_crtc_set_property(struct drm_crtc *crtc,
+ struct drm_crtc_state *state,
+ struct drm_property *property,
+ uint64_t val)
+{
+ struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+ struct dm_crtc_state *acrtc_state = to_dm_crtc_state(state);
+
+ if (property == adev->mode_info.regamma_tf_property) {
+ if (acrtc_state->regamma_tf != val) {
+ acrtc_state->regamma_tf = val;
+ acrtc_state->base.color_mgmt_changed |= 1;
+ }
+ } else {
+ drm_dbg_atomic(crtc->dev,
+ "[CRTC:%d:%s] unknown property [PROP:%d:%s]]\n",
+ crtc->base.id, crtc->name,
+ property->base.id, property->name);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+amdgpu_dm_atomic_crtc_get_property(struct drm_crtc *crtc,
+ const struct drm_crtc_state *state,
+ struct drm_property *property,
+ uint64_t *val)
+{
+ struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+ struct dm_crtc_state *acrtc_state = to_dm_crtc_state(state);
+
+ if (property == adev->mode_info.regamma_tf_property)
+ *val = acrtc_state->regamma_tf;
+ else
+ return -EINVAL;
+
+ return 0;
+}
+#endif
+
+/* Implemented only the options currently available for the driver */
+static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = {
+ .reset = amdgpu_dm_crtc_reset_state,
+ .destroy = amdgpu_dm_crtc_destroy,
+ .set_config = drm_atomic_helper_set_config,
+ .page_flip = drm_atomic_helper_page_flip,
+ .atomic_duplicate_state = amdgpu_dm_crtc_duplicate_state,
+ .atomic_destroy_state = amdgpu_dm_crtc_destroy_state,
+ .set_crc_source = amdgpu_dm_crtc_set_crc_source,
+ .verify_crc_source = amdgpu_dm_crtc_verify_crc_source,
+ .get_crc_sources = amdgpu_dm_crtc_get_crc_sources,
+ .get_vblank_counter = amdgpu_get_vblank_counter_kms,
+ .enable_vblank = amdgpu_dm_crtc_enable_vblank,
+ .disable_vblank = amdgpu_dm_crtc_disable_vblank,
+ .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp,
+#if defined(CONFIG_DEBUG_FS)
+ .late_register = amdgpu_dm_crtc_late_register,
+#endif
+#ifdef AMD_PRIVATE_COLOR
+ .atomic_set_property = amdgpu_dm_atomic_crtc_set_property,
+ .atomic_get_property = amdgpu_dm_atomic_crtc_get_property,
+#endif
+};
+
+static void amdgpu_dm_crtc_helper_disable(struct drm_crtc *crtc)
+{
+}
+
+static int amdgpu_dm_crtc_count_crtc_active_planes(struct drm_crtc_state *new_crtc_state)
+{
+ struct drm_atomic_state *state = new_crtc_state->state;
+ struct drm_plane *plane;
+ int num_active = 0;
+
+ drm_for_each_plane_mask(plane, state->dev, new_crtc_state->plane_mask) {
+ struct drm_plane_state *new_plane_state;
+
+ /* Cursor planes are "fake". */
+ if (plane->type == DRM_PLANE_TYPE_CURSOR)
+ continue;
+
+ new_plane_state = drm_atomic_get_new_plane_state(state, plane);
+
+ if (!new_plane_state) {
+ /*
+ * The plane is enable on the CRTC and hasn't changed
+ * state. This means that it previously passed
+ * validation and is therefore enabled.
+ */
+ num_active += 1;
+ continue;
+ }
+
+ /* We need a framebuffer to be considered enabled. */
+ num_active += (new_plane_state->fb != NULL);
+ }
+
+ return num_active;
+}
+
+static void amdgpu_dm_crtc_update_crtc_active_planes(struct drm_crtc *crtc,
+ struct drm_crtc_state *new_crtc_state)
+{
+ struct dm_crtc_state *dm_new_crtc_state =
+ to_dm_crtc_state(new_crtc_state);
+
+ dm_new_crtc_state->active_planes = 0;
+
+ if (!dm_new_crtc_state->stream)
+ return;
+
+ dm_new_crtc_state->active_planes =
+ amdgpu_dm_crtc_count_crtc_active_planes(new_crtc_state);
+}
+
+static bool amdgpu_dm_crtc_helper_mode_fixup(struct drm_crtc *crtc,
+ const struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode)
+{
+ return true;
+}
+
+static int amdgpu_dm_crtc_helper_atomic_check(struct drm_crtc *crtc,
+ struct drm_atomic_state *state)
+{
+ struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state,
+ crtc);
+ struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+ struct dc *dc = adev->dm.dc;
+ struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state);
+ int ret = -EINVAL;
+
+ trace_amdgpu_dm_crtc_atomic_check(crtc_state);
+
+ amdgpu_dm_crtc_update_crtc_active_planes(crtc, crtc_state);
+
+ if (WARN_ON(unlikely(!dm_crtc_state->stream &&
+ amdgpu_dm_crtc_modeset_required(crtc_state, NULL, dm_crtc_state->stream)))) {
+ return ret;
+ }
+
+ /*
+ * We require the primary plane to be enabled whenever the CRTC is, otherwise
+ * drm_mode_cursor_universal may end up trying to enable the cursor plane while all other
+ * planes are disabled, which is not supported by the hardware. And there is legacy
+ * userspace which stops using the HW cursor altogether in response to the resulting EINVAL.
+ */
+ if (crtc_state->enable &&
+ !(crtc_state->plane_mask & drm_plane_mask(crtc->primary))) {
+ DRM_DEBUG_ATOMIC("Can't enable a CRTC without enabling the primary plane\n");
+ return -EINVAL;
+ }
+
+ /*
+ * Only allow async flips for fast updates that don't change the FB
+ * pitch, the DCC state, rotation, etc.
+ */
+ if (crtc_state->async_flip &&
+ dm_crtc_state->update_type != UPDATE_TYPE_FAST) {
+ drm_dbg_atomic(crtc->dev,
+ "[CRTC:%d:%s] async flips are only supported for fast updates\n",
+ crtc->base.id, crtc->name);
+ return -EINVAL;
+ }
+
+ if (!state->legacy_cursor_update && amdgpu_dm_crtc_vrr_active(dm_crtc_state)) {
+ struct drm_plane_state *primary_state;
+
+ /* Pull in primary plane for correct VRR handling */
+ primary_state = drm_atomic_get_plane_state(state, crtc->primary);
+ if (IS_ERR(primary_state))
+ return PTR_ERR(primary_state);
+ }
+
+ /* In some use cases, like reset, no stream is attached */
+ if (!dm_crtc_state->stream)
+ return 0;
+
+ if (dc_validate_stream(dc, dm_crtc_state->stream) == DC_OK)
+ return 0;
+
+ DRM_DEBUG_ATOMIC("Failed DC stream validation\n");
+ return ret;
+}
+
+static const struct drm_crtc_helper_funcs amdgpu_dm_crtc_helper_funcs = {
+ .disable = amdgpu_dm_crtc_helper_disable,
+ .atomic_check = amdgpu_dm_crtc_helper_atomic_check,
+ .mode_fixup = amdgpu_dm_crtc_helper_mode_fixup,
+ .get_scanout_position = amdgpu_crtc_get_scanout_position,
+};
+
+int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
+ struct drm_plane *plane,
+ uint32_t crtc_index)
+{
+ struct amdgpu_crtc *acrtc = NULL;
+ struct drm_plane *cursor_plane;
+ bool has_degamma;
+ int res = -ENOMEM;
+
+ cursor_plane = kzalloc(sizeof(*cursor_plane), GFP_KERNEL);
+ if (!cursor_plane)
+ goto fail;
+
+ cursor_plane->type = DRM_PLANE_TYPE_CURSOR;
+ res = amdgpu_dm_plane_init(dm, cursor_plane, 0, NULL);
+
+ acrtc = kzalloc(sizeof(struct amdgpu_crtc), GFP_KERNEL);
+ if (!acrtc)
+ goto fail;
+
+ res = drm_crtc_init_with_planes(
+ dm->ddev,
+ &acrtc->base,
+ plane,
+ cursor_plane,
+ &amdgpu_dm_crtc_funcs, NULL);
+
+ if (res)
+ goto fail;
+
+ drm_crtc_helper_add(&acrtc->base, &amdgpu_dm_crtc_helper_funcs);
+
+ /* Create (reset) the plane state */
+ if (acrtc->base.funcs->reset)
+ acrtc->base.funcs->reset(&acrtc->base);
+
+ acrtc->max_cursor_width = dm->adev->dm.dc->caps.max_cursor_size;
+ acrtc->max_cursor_height = dm->adev->dm.dc->caps.max_cursor_size;
+
+ acrtc->crtc_id = crtc_index;
+ acrtc->base.enabled = false;
+ acrtc->otg_inst = -1;
+
+ dm->adev->mode_info.crtcs[crtc_index] = acrtc;
+
+ /* Don't enable DRM CRTC degamma property for
+ * 1. Degamma is replaced by color pipeline.
+ * 2. DCE since it doesn't support programmable degamma anywhere.
+ * 3. DCN401 since pre-blending degamma LUT doesn't apply to cursor.
+ */
+ if (plane->color_pipeline_property)
+ has_degamma = false;
+ else
+ has_degamma = dm->adev->dm.dc->caps.color.dpp.dcn_arch &&
+ dm->adev->dm.dc->ctx->dce_version != DCN_VERSION_4_01;
+
+ drm_crtc_enable_color_mgmt(&acrtc->base, has_degamma ? MAX_COLOR_LUT_ENTRIES : 0,
+ true, MAX_COLOR_LUT_ENTRIES);
+
+ drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES);
+
+#ifdef AMD_PRIVATE_COLOR
+ dm_crtc_additional_color_mgmt(&acrtc->base);
+#endif
+ return 0;
+
+fail:
+ kfree(acrtc);
+ kfree(cursor_plane);
+ return res;
+}
+
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h
new file mode 100644
index 000000000000..c1212947a77b
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __AMDGPU_DM_CRTC_H__
+#define __AMDGPU_DM_CRTC_H__
+
+void amdgpu_dm_crtc_handle_vblank(struct amdgpu_crtc *acrtc);
+
+bool amdgpu_dm_crtc_modeset_required(struct drm_crtc_state *crtc_state,
+ struct dc_stream_state *new_stream,
+ struct dc_stream_state *old_stream);
+
+int amdgpu_dm_crtc_set_vupdate_irq(struct drm_crtc *crtc, bool enable);
+
+bool amdgpu_dm_crtc_vrr_active_irq(struct amdgpu_crtc *acrtc);
+
+bool amdgpu_dm_crtc_vrr_active(const struct dm_crtc_state *dm_state);
+
+int amdgpu_dm_crtc_enable_vblank(struct drm_crtc *crtc);
+
+void amdgpu_dm_crtc_disable_vblank(struct drm_crtc *crtc);
+
+int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
+ struct drm_plane *plane,
+ uint32_t link_index);
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index 9d43ecb1f692..a9839485f2a2 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
@@ -23,19 +24,27 @@
*
*/
+#include <linux/string_helpers.h>
#include <linux/uaccess.h>
+#include <media/cec-notifier.h>
#include "dc.h"
#include "amdgpu.h"
#include "amdgpu_dm.h"
#include "amdgpu_dm_debugfs.h"
+#include "amdgpu_dm_replay.h"
#include "dm_helpers.h"
#include "dmub/dmub_srv.h"
#include "resource.h"
#include "dsc.h"
-#include "dc_link_dp.h"
#include "link_hwss.h"
#include "dc/dc_dmub_srv.h"
+#include "link/protocols/link_dp_capability.h"
+#include "inc/hw/dchubbub.h"
+
+#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
+#include "amdgpu_dm_psr.h"
+#endif
struct dmub_debugfs_trace_header {
uint32_t entry_count;
@@ -49,10 +58,12 @@ struct dmub_debugfs_trace_entry {
uint32_t param1;
};
-static inline const char *yesno(bool v)
-{
- return v ? "yes" : "no";
-}
+static const char *const mst_progress_status[] = {
+ "probe",
+ "remote_edid",
+ "allocate_new_payload",
+ "clear_allocated_payload",
+};
/* parse_write_buffer_into_params - Helper function to parse debugfs write buffer into an array
*
@@ -227,8 +238,10 @@ static ssize_t dp_link_settings_read(struct file *f, char __user *buf,
break;
r = put_user(*(rd_buf + result), buf);
- if (r)
+ if (r) {
+ kfree(rd_buf);
return r; /* r = -EFAULT */
+ }
buf += 1;
size -= 1;
@@ -245,8 +258,9 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf,
{
struct amdgpu_dm_connector *connector = file_inode(f)->i_private;
struct dc_link *link = connector->dc_link;
+ struct amdgpu_device *adev = drm_to_adev(connector->base.dev);
struct dc *dc = (struct dc *)link->dc;
- struct dc_link_settings prefer_link_settings;
+ struct dc_link_settings prefer_link_settings = {0};
char *wr_buf = NULL;
const uint32_t wr_buf_size = 40;
/* 0: lane_count; 1: link_rate */
@@ -292,9 +306,9 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf,
case LINK_RATE_RBR2:
case LINK_RATE_HIGH2:
case LINK_RATE_HIGH3:
-#if defined(CONFIG_DRM_AMD_DC_DCN)
case LINK_RATE_UHBR10:
-#endif
+ case LINK_RATE_UHBR13_5:
+ case LINK_RATE_UHBR20:
break;
default:
valid_input = false;
@@ -304,6 +318,9 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf,
if (!valid_input) {
kfree(wr_buf);
DRM_DEBUG_DRIVER("Invalid Input value No HW will be programmed\n");
+ mutex_lock(&adev->dm.dc_lock);
+ dc_link_set_preferred_training_settings(dc, NULL, NULL, link, false);
+ mutex_unlock(&adev->dm.dc_lock);
return size;
}
@@ -315,7 +332,156 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf,
prefer_link_settings.lane_count = param[0];
prefer_link_settings.link_rate = param[1];
+ mutex_lock(&adev->dm.dc_lock);
+ dc_link_set_preferred_training_settings(dc, &prefer_link_settings, NULL, link, false);
+ mutex_unlock(&adev->dm.dc_lock);
+
+ kfree(wr_buf);
+ return size;
+}
+
+static bool dp_mst_is_end_device(struct amdgpu_dm_connector *aconnector)
+{
+ bool is_end_device = false;
+ struct drm_dp_mst_topology_mgr *mgr = NULL;
+ struct drm_dp_mst_port *port = NULL;
+
+ if (aconnector->mst_root && aconnector->mst_root->mst_mgr.mst_state) {
+ mgr = &aconnector->mst_root->mst_mgr;
+ port = aconnector->mst_output_port;
+
+ drm_modeset_lock(&mgr->base.lock, NULL);
+ if (port->pdt == DP_PEER_DEVICE_SST_SINK ||
+ port->pdt == DP_PEER_DEVICE_DP_LEGACY_CONV)
+ is_end_device = true;
+ drm_modeset_unlock(&mgr->base.lock);
+ }
+
+ return is_end_device;
+}
+
+/* Change MST link setting
+ *
+ * valid lane count value: 1, 2, 4
+ * valid link rate value:
+ * 06h = 1.62Gbps per lane
+ * 0Ah = 2.7Gbps per lane
+ * 0Ch = 3.24Gbps per lane
+ * 14h = 5.4Gbps per lane
+ * 1Eh = 8.1Gbps per lane
+ * 3E8h = 10.0Gbps per lane
+ * 546h = 13.5Gbps per lane
+ * 7D0h = 20.0Gbps per lane
+ *
+ * debugfs is located at /sys/kernel/debug/dri/0/DP-x/mst_link_settings
+ *
+ * for example, to force to 2 lane, 10.0GHz,
+ * echo 2 0x3e8 > /sys/kernel/debug/dri/0/DP-x/mst_link_settings
+ *
+ * Valid input will trigger hotplug event to get new link setting applied
+ * Invalid input will trigger training setting reset
+ *
+ * The usage can be referred to link_settings entry
+ *
+ */
+static ssize_t dp_mst_link_setting(struct file *f, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private;
+ struct dc_link *link = aconnector->dc_link;
+ struct amdgpu_device *adev = drm_to_adev(aconnector->base.dev);
+ struct dc *dc = (struct dc *)link->dc;
+ struct dc_link_settings prefer_link_settings = {0};
+ char *wr_buf = NULL;
+ const uint32_t wr_buf_size = 40;
+ /* 0: lane_count; 1: link_rate */
+ int max_param_num = 2;
+ uint8_t param_nums = 0;
+ long param[2];
+ bool valid_input = true;
+
+ if (!dp_mst_is_end_device(aconnector))
+ return -EINVAL;
+
+ if (size == 0)
+ return -EINVAL;
+
+ wr_buf = kcalloc(wr_buf_size, sizeof(char), GFP_KERNEL);
+ if (!wr_buf)
+ return -ENOSPC;
+
+ if (parse_write_buffer_into_params(wr_buf, wr_buf_size,
+ (long *)param, buf,
+ max_param_num,
+ &param_nums)) {
+ kfree(wr_buf);
+ return -EINVAL;
+ }
+
+ if (param_nums <= 0) {
+ kfree(wr_buf);
+ DRM_DEBUG_DRIVER("user data not be read\n");
+ return -EINVAL;
+ }
+
+ switch (param[0]) {
+ case LANE_COUNT_ONE:
+ case LANE_COUNT_TWO:
+ case LANE_COUNT_FOUR:
+ break;
+ default:
+ valid_input = false;
+ break;
+ }
+
+ switch (param[1]) {
+ case LINK_RATE_LOW:
+ case LINK_RATE_HIGH:
+ case LINK_RATE_RBR2:
+ case LINK_RATE_HIGH2:
+ case LINK_RATE_HIGH3:
+ case LINK_RATE_UHBR10:
+ case LINK_RATE_UHBR13_5:
+ case LINK_RATE_UHBR20:
+ break;
+ default:
+ valid_input = false;
+ break;
+ }
+
+ if (!valid_input) {
+ kfree(wr_buf);
+ DRM_DEBUG_DRIVER("Invalid Input value No HW will be programmed\n");
+ mutex_lock(&adev->dm.dc_lock);
+ dc_link_set_preferred_training_settings(dc, NULL, NULL, link, false);
+ mutex_unlock(&adev->dm.dc_lock);
+ return -EINVAL;
+ }
+
+ /* save user force lane_count, link_rate to preferred settings
+ * spread spectrum will not be changed
+ */
+ prefer_link_settings.link_spread = link->cur_link_settings.link_spread;
+ prefer_link_settings.use_link_rate_set = false;
+ prefer_link_settings.lane_count = param[0];
+ prefer_link_settings.link_rate = param[1];
+
+ /* skip immediate retrain, and train to new link setting after hotplug event triggered */
+ mutex_lock(&adev->dm.dc_lock);
dc_link_set_preferred_training_settings(dc, &prefer_link_settings, NULL, link, true);
+ mutex_unlock(&adev->dm.dc_lock);
+
+ mutex_lock(&aconnector->base.dev->mode_config.mutex);
+ aconnector->base.force = DRM_FORCE_OFF;
+ mutex_unlock(&aconnector->base.dev->mode_config.mutex);
+ drm_kms_helper_hotplug_event(aconnector->base.dev);
+
+ msleep(100);
+
+ mutex_lock(&aconnector->base.dev->mode_config.mutex);
+ aconnector->base.force = DRM_FORCE_UNSPECIFIED;
+ mutex_unlock(&aconnector->base.dev->mode_config.mutex);
+ drm_kms_helper_hotplug_event(aconnector->base.dev);
kfree(wr_buf);
return size;
@@ -389,8 +555,10 @@ static ssize_t dp_phy_settings_read(struct file *f, char __user *buf,
break;
r = put_user((*(rd_buf + result)), buf);
- if (r)
+ if (r) {
+ kfree(rd_buf);
return r; /* r = -EFAULT */
+ }
buf += 1;
size -= 1;
@@ -402,67 +570,38 @@ static ssize_t dp_phy_settings_read(struct file *f, char __user *buf,
return result;
}
-static int dp_lttpr_status_show(struct seq_file *m, void *d)
+static int dp_lttpr_status_show(struct seq_file *m, void *unused)
{
- char *data;
- struct amdgpu_dm_connector *connector = file_inode(m->file)->i_private;
- struct dc_link *link = connector->dc_link;
- uint32_t read_size = 1;
- uint8_t repeater_count = 0;
+ struct drm_connector *connector = m->private;
+ struct amdgpu_dm_connector *aconnector =
+ to_amdgpu_dm_connector(connector);
+ struct dc_lttpr_caps caps = aconnector->dc_link->dpcd_caps.lttpr_caps;
- data = kzalloc(read_size, GFP_KERNEL);
- if (!data)
- return 0;
+ if (connector->status != connector_status_connected)
+ return -ENODEV;
- dm_helpers_dp_read_dpcd(link->ctx, link, 0xF0002, data, read_size);
+ seq_printf(m, "phy repeater count: %u (raw: 0x%x)\n",
+ dp_parse_lttpr_repeater_count(caps.phy_repeater_cnt),
+ caps.phy_repeater_cnt);
- switch ((uint8_t)*data) {
- case 0x80:
- repeater_count = 1;
- break;
- case 0x40:
- repeater_count = 2;
- break;
- case 0x20:
- repeater_count = 3;
- break;
- case 0x10:
- repeater_count = 4;
- break;
- case 0x8:
- repeater_count = 5;
- break;
- case 0x4:
- repeater_count = 6;
- break;
- case 0x2:
- repeater_count = 7;
+ seq_puts(m, "phy repeater mode: ");
+
+ switch (caps.mode) {
+ case DP_PHY_REPEATER_MODE_TRANSPARENT:
+ seq_puts(m, "transparent");
break;
- case 0x1:
- repeater_count = 8;
+ case DP_PHY_REPEATER_MODE_NON_TRANSPARENT:
+ seq_puts(m, "non-transparent");
break;
- case 0x0:
- repeater_count = 0;
+ case 0x00:
+ seq_puts(m, "non lttpr");
break;
default:
- repeater_count = (uint8_t)*data;
+ seq_printf(m, "read error (raw: 0x%x)", caps.mode);
break;
}
- seq_printf(m, "phy repeater count: %d\n", repeater_count);
-
- dm_helpers_dp_read_dpcd(link->ctx, link, 0xF0003, data, read_size);
-
- if ((uint8_t)*data == 0x55)
- seq_printf(m, "phy repeater mode: transparent\n");
- else if ((uint8_t)*data == 0xAA)
- seq_printf(m, "phy repeater mode: non-transparent\n");
- else if ((uint8_t)*data == 0x00)
- seq_printf(m, "phy repeater mode: non lttpr\n");
- else
- seq_printf(m, "phy repeater mode: read error\n");
-
- kfree(data);
+ seq_puts(m, "\n");
return 0;
}
@@ -476,7 +615,7 @@ static ssize_t dp_phy_settings_write(struct file *f, const char __user *buf,
uint32_t wr_buf_size = 40;
long param[3];
bool use_prefer_link_setting;
- struct link_training_settings link_lane_settings;
+ struct link_training_settings link_lane_settings = {0};
int max_param_num = 3;
uint8_t param_nums = 0;
int r = 0;
@@ -536,11 +675,11 @@ static ssize_t dp_phy_settings_write(struct file *f, const char __user *buf,
/* apply phy settings from user */
for (r = 0; r < link_lane_settings.link_settings.lane_count; r++) {
- link_lane_settings.lane_settings[r].VOLTAGE_SWING =
+ link_lane_settings.hw_lane_settings[r].VOLTAGE_SWING =
(enum dc_voltage_swing) (param[0]);
- link_lane_settings.lane_settings[r].PRE_EMPHASIS =
+ link_lane_settings.hw_lane_settings[r].PRE_EMPHASIS =
(enum dc_pre_emphasis) (param[1]);
- link_lane_settings.lane_settings[r].POST_CURSOR2 =
+ link_lane_settings.hw_lane_settings[r].POST_CURSOR2 =
(enum dc_post_cursor2) (param[2]);
}
@@ -620,6 +759,7 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us
int max_param_num = 11;
enum dp_test_pattern test_pattern = DP_TEST_PATTERN_UNSUPPORTED;
bool disable_hpd = false;
+ bool supports_hpd = link->irq_source_hpd != DC_IRQ_SOURCE_INVALID;
bool valid_test_pattern = false;
uint8_t param_nums = 0;
/* init with default 80bit custom pattern */
@@ -631,7 +771,7 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us
LINK_RATE_UNKNOWN, LINK_SPREAD_DISABLED};
struct dc_link_settings cur_link_settings = {LANE_COUNT_UNKNOWN,
LINK_RATE_UNKNOWN, LINK_SPREAD_DISABLED};
- struct link_training_settings link_training_settings;
+ struct link_training_settings link_training_settings = {0};
int i;
if (size == 0)
@@ -711,7 +851,7 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us
* because it might have been disabled after a test pattern was set.
* AUX depends on HPD * sequence dependent, do not move!
*/
- if (!disable_hpd)
+ if (supports_hpd && !disable_hpd)
dc_link_enable_hpd(link);
prefer_link_settings.lane_count = link->verified_link_cap.lane_count;
@@ -734,9 +874,9 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us
}
for (i = 0; i < (unsigned int)(link_training_settings.link_settings.lane_count); i++)
- link_training_settings.lane_settings[i] = link->cur_lane_setting[i];
+ link_training_settings.hw_lane_settings[i] = link->cur_lane_setting[i];
- dc_link_set_test_pattern(
+ dc_link_dp_set_test_pattern(
link,
test_pattern,
DP_TEST_PATTERN_COLOR_SPACE_RGB,
@@ -749,7 +889,7 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us
* Need disable interrupt to avoid SW driver disable DP output. This is
* done after the test pattern is set.
*/
- if (valid_test_pattern && disable_hpd)
+ if (valid_test_pattern && supports_hpd && disable_hpd)
dc_link_disable_hpd(link);
kfree(wr_buf);
@@ -765,9 +905,10 @@ static int dmub_tracebuffer_show(struct seq_file *m, void *data)
{
struct amdgpu_device *adev = m->private;
struct dmub_srv_fb_info *fb_info = adev->dm.dmub_fb_info;
+ struct dmub_fw_meta_info *fw_meta_info = NULL;
struct dmub_debugfs_trace_entry *entries;
uint8_t *tbuf_base;
- uint32_t tbuf_size, max_entries, num_entries, i;
+ uint32_t tbuf_size, max_entries, num_entries, first_entry, i;
if (!fb_info)
return 0;
@@ -776,20 +917,42 @@ static int dmub_tracebuffer_show(struct seq_file *m, void *data)
if (!tbuf_base)
return 0;
- tbuf_size = fb_info->fb[DMUB_WINDOW_5_TRACEBUFF].size;
+ if (adev->dm.dmub_srv)
+ fw_meta_info = &adev->dm.dmub_srv->meta_info;
+
+ tbuf_size = fw_meta_info ? fw_meta_info->trace_buffer_size :
+ DMUB_TRACE_BUFFER_SIZE;
max_entries = (tbuf_size - sizeof(struct dmub_debugfs_trace_header)) /
sizeof(struct dmub_debugfs_trace_entry);
num_entries =
((struct dmub_debugfs_trace_header *)tbuf_base)->entry_count;
+ /* DMCUB tracebuffer is a ring. If it rolled over, print a hint that
+ * entries are being overwritten.
+ */
+ if (num_entries > max_entries)
+ seq_printf(m, "...\n");
+
+ first_entry = num_entries % max_entries;
num_entries = min(num_entries, max_entries);
entries = (struct dmub_debugfs_trace_entry
*)(tbuf_base +
sizeof(struct dmub_debugfs_trace_header));
- for (i = 0; i < num_entries; ++i) {
+ /* To print entries chronologically, start from the first entry till the
+ * top of buffer, then from base of buffer to first entry.
+ */
+ for (i = first_entry; i < num_entries; ++i) {
+ struct dmub_debugfs_trace_entry *entry = &entries[i];
+
+ seq_printf(m,
+ "trace_code=%u tick_count=%u param0=%u param1=%u\n",
+ entry->trace_code, entry->tick_count, entry->param0,
+ entry->param1);
+ }
+ for (i = 0; i < first_entry; ++i) {
struct dmub_debugfs_trace_entry *entry = &entries[i];
seq_printf(m,
@@ -824,29 +987,113 @@ static int dmub_fw_state_show(struct seq_file *m, void *data)
return seq_write(m, state_base, state_size);
}
-/*
- * Returns the current and maximum output bpc for the connector.
- * Example usage: cat /sys/kernel/debug/dri/0/DP-1/output_bpc
+/* replay_capability_show() - show eDP panel replay capability
+ *
+ * The read function: replay_capability_show
+ * Shows if sink and driver has Replay capability or not.
+ *
+ * cat /sys/kernel/debug/dri/0/eDP-X/replay_capability
+ *
+ * Expected output:
+ * "Sink support: no\n" - if panel doesn't support Replay
+ * "Sink support: yes\n" - if panel supports Replay
+ * "Driver support: no\n" - if driver doesn't support Replay
+ * "Driver support: yes\n" - if driver supports Replay
*/
-static int output_bpc_show(struct seq_file *m, void *data)
+static int replay_capability_show(struct seq_file *m, void *data)
{
struct drm_connector *connector = m->private;
- struct drm_device *dev = connector->dev;
- struct drm_crtc *crtc = NULL;
+ struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+ struct dc_link *link = aconnector->dc_link;
+ bool sink_support_replay = false;
+ bool driver_support_replay = false;
+
+ if (!link)
+ return -ENODEV;
+
+ if (link->type == dc_connection_none)
+ return -ENODEV;
+
+ if (!(link->connector_signal & SIGNAL_TYPE_EDP))
+ return -ENODEV;
+
+ /* If Replay is already set to support, skip the checks */
+ if (link->replay_settings.config.replay_supported) {
+ sink_support_replay = true;
+ driver_support_replay = true;
+ } else if ((amdgpu_dc_debug_mask & DC_DISABLE_REPLAY)) {
+ sink_support_replay = amdgpu_dm_link_supports_replay(link, aconnector);
+ } else {
+ struct dc *dc = link->ctx->dc;
+
+ sink_support_replay = amdgpu_dm_link_supports_replay(link, aconnector);
+ if (dc->ctx->dmub_srv && dc->ctx->dmub_srv->dmub)
+ driver_support_replay =
+ (bool)dc->ctx->dmub_srv->dmub->feature_caps.replay_supported;
+ }
+
+ seq_printf(m, "Sink support: %s\n", str_yes_no(sink_support_replay));
+ seq_printf(m, "Driver support: %s\n", str_yes_no(driver_support_replay));
+ seq_printf(m, "Config support: %s\n", str_yes_no(link->replay_settings.config.replay_supported));
+
+ return 0;
+}
+
+/* psr_capability_show() - show eDP panel PSR capability
+ *
+ * The read function: sink_psr_capability_show
+ * Shows if sink has PSR capability or not.
+ * If yes - the PSR version is appended
+ *
+ * cat /sys/kernel/debug/dri/0/eDP-X/psr_capability
+ *
+ * Expected output:
+ * "Sink support: no\n" - if panel doesn't support PSR
+ * "Sink support: yes [0x01]\n" - if panel supports PSR1
+ * "Driver support: no\n" - if driver doesn't support PSR
+ * "Driver support: yes [0x01]\n" - if driver supports PSR1
+ */
+static int psr_capability_show(struct seq_file *m, void *data)
+{
+ struct drm_connector *connector = m->private;
+ struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+ struct dc_link *link = aconnector->dc_link;
+
+ if (!link)
+ return -ENODEV;
+
+ if (link->type == dc_connection_none)
+ return -ENODEV;
+
+ if (!(link->connector_signal & SIGNAL_TYPE_EDP))
+ return -ENODEV;
+
+ seq_printf(m, "Sink support: %s", str_yes_no(link->dpcd_caps.psr_info.psr_version != 0));
+ if (link->dpcd_caps.psr_info.psr_version)
+ seq_printf(m, " [0x%02x]", link->dpcd_caps.psr_info.psr_version);
+ seq_puts(m, "\n");
+
+ seq_printf(m, "Driver support: %s", str_yes_no(link->psr_settings.psr_feature_enabled));
+ if (link->psr_settings.psr_version)
+ seq_printf(m, " [0x%02x]", link->psr_settings.psr_version);
+ seq_puts(m, "\n");
+
+ return 0;
+}
+
+/*
+ * Returns the current bpc for the crtc.
+ * Example usage: cat /sys/kernel/debug/dri/0/crtc-0/amdgpu_current_bpc
+ */
+static int amdgpu_current_bpc_show(struct seq_file *m, void *data)
+{
+ struct drm_crtc *crtc = m->private;
+ struct drm_device *dev = crtc->dev;
struct dm_crtc_state *dm_crtc_state = NULL;
int res = -ENODEV;
unsigned int bpc;
mutex_lock(&dev->mode_config.mutex);
- drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
-
- if (connector->state == NULL)
- goto unlock;
-
- crtc = connector->state->crtc;
- if (crtc == NULL)
- goto unlock;
-
drm_modeset_lock(&crtc->mutex, NULL);
if (crtc->state == NULL)
goto unlock;
@@ -876,18 +1123,70 @@ static int output_bpc_show(struct seq_file *m, void *data)
}
seq_printf(m, "Current: %u\n", bpc);
- seq_printf(m, "Maximum: %u\n", connector->display_info.bpc);
res = 0;
unlock:
- if (crtc)
- drm_modeset_unlock(&crtc->mutex);
+ drm_modeset_unlock(&crtc->mutex);
+ mutex_unlock(&dev->mode_config.mutex);
- drm_modeset_unlock(&dev->mode_config.connection_mutex);
+ return res;
+}
+DEFINE_SHOW_ATTRIBUTE(amdgpu_current_bpc);
+
+/*
+ * Returns the current colorspace for the crtc.
+ * Example usage: cat /sys/kernel/debug/dri/0/crtc-0/amdgpu_current_colorspace
+ */
+static int amdgpu_current_colorspace_show(struct seq_file *m, void *data)
+{
+ struct drm_crtc *crtc = m->private;
+ struct drm_device *dev = crtc->dev;
+ struct dm_crtc_state *dm_crtc_state = NULL;
+ int res = -ENODEV;
+
+ mutex_lock(&dev->mode_config.mutex);
+ drm_modeset_lock(&crtc->mutex, NULL);
+ if (crtc->state == NULL)
+ goto unlock;
+
+ dm_crtc_state = to_dm_crtc_state(crtc->state);
+ if (dm_crtc_state->stream == NULL)
+ goto unlock;
+
+ switch (dm_crtc_state->stream->output_color_space) {
+ case COLOR_SPACE_SRGB:
+ seq_puts(m, "sRGB");
+ break;
+ case COLOR_SPACE_YCBCR601:
+ case COLOR_SPACE_YCBCR601_LIMITED:
+ seq_puts(m, "BT601_YCC");
+ break;
+ case COLOR_SPACE_YCBCR709:
+ case COLOR_SPACE_YCBCR709_LIMITED:
+ seq_puts(m, "BT709_YCC");
+ break;
+ case COLOR_SPACE_ADOBERGB:
+ seq_puts(m, "opRGB");
+ break;
+ case COLOR_SPACE_2020_RGB_FULLRANGE:
+ seq_puts(m, "BT2020_RGB");
+ break;
+ case COLOR_SPACE_2020_YCBCR_LIMITED:
+ seq_puts(m, "BT2020_YCC");
+ break;
+ default:
+ goto unlock;
+ }
+ res = 0;
+
+unlock:
+ drm_modeset_unlock(&crtc->mutex);
mutex_unlock(&dev->mode_config.mutex);
return res;
}
+DEFINE_SHOW_ATTRIBUTE(amdgpu_current_colorspace);
+
/*
* Example usage:
@@ -930,7 +1229,6 @@ static ssize_t dp_dsc_passthrough_set(struct file *f, const char __user *buf,
return 0;
}
-#ifdef CONFIG_DRM_AMD_DC_HDCP
/*
* Returns the HDCP capability of the Display (1.4 for now).
*
@@ -967,7 +1265,6 @@ static int hdcp_sink_capability_show(struct seq_file *m, void *data)
return 0;
}
-#endif
/*
* Returns whether the connected display is internal and not hotpluggable.
@@ -984,6 +1281,36 @@ static int internal_display_show(struct seq_file *m, void *data)
return 0;
}
+/*
+ * Returns the number of segments used if ODM Combine mode is enabled.
+ * Example usage: cat /sys/kernel/debug/dri/0/DP-1/odm_combine_segments
+ */
+static int odm_combine_segments_show(struct seq_file *m, void *unused)
+{
+ struct drm_connector *connector = m->private;
+ struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+ struct dc_link *link = aconnector->dc_link;
+ struct pipe_ctx *pipe_ctx = NULL;
+ int i, segments = -EOPNOTSUPP;
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i];
+ if (pipe_ctx->stream &&
+ pipe_ctx->stream->link == link)
+ break;
+ }
+
+ if (connector->status != connector_status_connected)
+ return -ENODEV;
+
+ if (pipe_ctx && pipe_ctx->stream_res.tg &&
+ pipe_ctx->stream_res.tg->funcs->get_odm_combine_segments)
+ pipe_ctx->stream_res.tg->funcs->get_odm_combine_segments(pipe_ctx->stream_res.tg, &segments);
+
+ seq_printf(m, "%d\n", segments);
+ return 0;
+}
+
/* function description
*
* generic SDP message access for testing
@@ -1002,7 +1329,7 @@ static ssize_t dp_sdp_message_debugfs_write(struct file *f, const char __user *b
size_t size, loff_t *pos)
{
int r;
- uint8_t data[36];
+ uint8_t data[36] = {0};
struct amdgpu_dm_connector *connector = file_inode(f)->i_private;
struct dm_crtc_state *acrtc_state;
uint32_t write_size = 36;
@@ -1024,88 +1351,6 @@ static ssize_t dp_sdp_message_debugfs_write(struct file *f, const char __user *b
return write_size;
}
-static ssize_t dp_dpcd_address_write(struct file *f, const char __user *buf,
- size_t size, loff_t *pos)
-{
- int r;
- struct amdgpu_dm_connector *connector = file_inode(f)->i_private;
-
- if (size < sizeof(connector->debugfs_dpcd_address))
- return -EINVAL;
-
- r = copy_from_user(&connector->debugfs_dpcd_address,
- buf, sizeof(connector->debugfs_dpcd_address));
-
- return size - r;
-}
-
-static ssize_t dp_dpcd_size_write(struct file *f, const char __user *buf,
- size_t size, loff_t *pos)
-{
- int r;
- struct amdgpu_dm_connector *connector = file_inode(f)->i_private;
-
- if (size < sizeof(connector->debugfs_dpcd_size))
- return -EINVAL;
-
- r = copy_from_user(&connector->debugfs_dpcd_size,
- buf, sizeof(connector->debugfs_dpcd_size));
-
- if (connector->debugfs_dpcd_size > 256)
- connector->debugfs_dpcd_size = 0;
-
- return size - r;
-}
-
-static ssize_t dp_dpcd_data_write(struct file *f, const char __user *buf,
- size_t size, loff_t *pos)
-{
- int r;
- char *data;
- struct amdgpu_dm_connector *connector = file_inode(f)->i_private;
- struct dc_link *link = connector->dc_link;
- uint32_t write_size = connector->debugfs_dpcd_size;
-
- if (!write_size || size < write_size)
- return -EINVAL;
-
- data = kzalloc(write_size, GFP_KERNEL);
- if (!data)
- return 0;
-
- r = copy_from_user(data, buf, write_size);
-
- dm_helpers_dp_write_dpcd(link->ctx, link,
- connector->debugfs_dpcd_address, data, write_size - r);
- kfree(data);
- return write_size - r;
-}
-
-static ssize_t dp_dpcd_data_read(struct file *f, char __user *buf,
- size_t size, loff_t *pos)
-{
- int r;
- char *data;
- struct amdgpu_dm_connector *connector = file_inode(f)->i_private;
- struct dc_link *link = connector->dc_link;
- uint32_t read_size = connector->debugfs_dpcd_size;
-
- if (!read_size || size < read_size)
- return 0;
-
- data = kzalloc(read_size, GFP_KERNEL);
- if (!data)
- return 0;
-
- dm_helpers_dp_read_dpcd(link->ctx, link,
- connector->debugfs_dpcd_address, data, read_size);
-
- r = copy_to_user(buf, data, read_size);
-
- kfree(data);
- return read_size - r;
-}
-
/* function: Read link's DSC & FEC capabilities
*
*
@@ -1146,7 +1391,7 @@ static int dp_dsc_fec_support_show(struct seq_file *m, void *data)
break;
}
dpcd_caps = aconnector->dc_link->dpcd_caps;
- if (aconnector->port) {
+ if (aconnector->mst_output_port) {
/* aconnector sets dsc_aux during get_modes call
* if MST connector has it means it can either
* enable DSC on the sink device or on MST branch
@@ -1165,8 +1410,8 @@ static int dp_dsc_fec_support_show(struct seq_file *m, void *data)
drm_modeset_drop_locks(&ctx);
drm_modeset_acquire_fini(&ctx);
- seq_printf(m, "FEC_Sink_Support: %s\n", yesno(is_fec_supported));
- seq_printf(m, "DSC_Sink_Support: %s\n", yesno(is_dsc_supported));
+ seq_printf(m, "FEC_Sink_Support: %s\n", str_yes_no(is_fec_supported));
+ seq_printf(m, "DSC_Sink_Support: %s\n", str_yes_no(is_dsc_supported));
return ret;
}
@@ -1193,14 +1438,16 @@ static ssize_t trigger_hotplug(struct file *f, const char __user *buf,
struct drm_connector *connector = &aconnector->base;
struct dc_link *link = NULL;
struct drm_device *dev = connector->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
enum dc_connection_type new_connection_type = dc_connection_none;
char *wr_buf = NULL;
uint32_t wr_buf_size = 42;
int max_param_num = 1;
long param[1] = {0};
uint8_t param_nums = 0;
+ bool ret = false;
- if (!aconnector || !aconnector->dc_link)
+ if (!aconnector->dc_link)
return -EINVAL;
if (size == 0)
@@ -1221,20 +1468,32 @@ static ssize_t trigger_hotplug(struct file *f, const char __user *buf,
return -EINVAL;
}
+ kfree(wr_buf);
+
if (param_nums <= 0) {
DRM_DEBUG_DRIVER("user data not be read\n");
- kfree(wr_buf);
+ return -EINVAL;
+ }
+
+ mutex_lock(&aconnector->hpd_lock);
+
+ /* Don't support for mst end device*/
+ if (aconnector->mst_root) {
+ mutex_unlock(&aconnector->hpd_lock);
return -EINVAL;
}
if (param[0] == 1) {
- mutex_lock(&aconnector->hpd_lock);
- if (!dc_link_detect_sink(aconnector->dc_link, &new_connection_type) &&
+ if (!dc_link_detect_connection_type(aconnector->dc_link, &new_connection_type) &&
new_connection_type != dc_connection_none)
goto unlock;
- if (!dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD))
+ mutex_lock(&adev->dm.dc_lock);
+ ret = dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD);
+ mutex_unlock(&adev->dm.dc_lock);
+
+ if (!ret)
goto unlock;
amdgpu_dm_update_connector_after_detect(aconnector);
@@ -1243,7 +1502,7 @@ static ssize_t trigger_hotplug(struct file *f, const char __user *buf,
dm_restore_drm_connector_state(dev, connector);
drm_modeset_unlock_all(dev);
- drm_kms_helper_hotplug_event(dev);
+ drm_kms_helper_connector_hotplug_event(connector);
} else if (param[0] == 0) {
if (!aconnector->dc_link)
goto unlock;
@@ -1261,17 +1520,20 @@ static ssize_t trigger_hotplug(struct file *f, const char __user *buf,
amdgpu_dm_update_connector_after_detect(aconnector);
+ /* If the aconnector is the root node in mst topology */
+ if (aconnector->mst_mgr.mst_state == true)
+ dc_link_reset_cur_dp_mst_topology(link);
+
drm_modeset_lock_all(dev);
dm_restore_drm_connector_state(dev, connector);
drm_modeset_unlock_all(dev);
- drm_kms_helper_hotplug_event(dev);
+ drm_kms_helper_connector_hotplug_event(connector);
}
unlock:
mutex_unlock(&aconnector->hpd_lock);
- kfree(wr_buf);
return size;
}
@@ -1294,48 +1556,45 @@ static ssize_t dp_dsc_clock_en_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
char *rd_buf = NULL;
- char *rd_buf_ptr = NULL;
struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private;
struct display_stream_compressor *dsc;
struct dcn_dsc_state dsc_state = {0};
const uint32_t rd_buf_size = 10;
struct pipe_ctx *pipe_ctx;
ssize_t result = 0;
- int i, r, str_len = 30;
+ int i, r, str_len = 10;
rd_buf = kcalloc(rd_buf_size, sizeof(char), GFP_KERNEL);
if (!rd_buf)
return -ENOMEM;
- rd_buf_ptr = rd_buf;
-
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
- if (pipe_ctx && pipe_ctx->stream &&
- pipe_ctx->stream->link == aconnector->dc_link)
- break;
+ if (pipe_ctx->stream &&
+ pipe_ctx->stream->link == aconnector->dc_link &&
+ pipe_ctx->stream->sink &&
+ pipe_ctx->stream->sink == aconnector->dc_sink)
+ break;
}
- if (!pipe_ctx)
- return -ENXIO;
-
dsc = pipe_ctx->stream_res.dsc;
if (dsc)
dsc->funcs->dsc_read_state(dsc, &dsc_state);
- snprintf(rd_buf_ptr, str_len,
+ snprintf(rd_buf, str_len,
"%d\n",
dsc_state.dsc_clock_en);
- rd_buf_ptr += str_len;
while (size) {
if (*pos >= rd_buf_size)
break;
r = put_user(*(rd_buf + result), buf);
- if (r)
+ if (r) {
+ kfree(rd_buf);
return r; /* r = -EFAULT */
+ }
buf += 1;
size -= 1;
@@ -1414,12 +1673,14 @@ static ssize_t dp_dsc_clock_en_write(struct file *f, const char __user *buf,
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
- if (pipe_ctx && pipe_ctx->stream &&
- pipe_ctx->stream->link == aconnector->dc_link)
- break;
+ if (pipe_ctx->stream &&
+ pipe_ctx->stream->link == aconnector->dc_link &&
+ pipe_ctx->stream->sink &&
+ pipe_ctx->stream->sink == aconnector->dc_sink)
+ break;
}
- if (!pipe_ctx || !pipe_ctx->stream)
+ if (!pipe_ctx->stream)
goto done;
// Get CRTC state
@@ -1481,7 +1742,6 @@ static ssize_t dp_dsc_slice_width_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
char *rd_buf = NULL;
- char *rd_buf_ptr = NULL;
struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private;
struct display_stream_compressor *dsc;
struct dcn_dsc_state dsc_state = {0};
@@ -1495,34 +1755,32 @@ static ssize_t dp_dsc_slice_width_read(struct file *f, char __user *buf,
if (!rd_buf)
return -ENOMEM;
- rd_buf_ptr = rd_buf;
-
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
- if (pipe_ctx && pipe_ctx->stream &&
- pipe_ctx->stream->link == aconnector->dc_link)
- break;
+ if (pipe_ctx->stream &&
+ pipe_ctx->stream->link == aconnector->dc_link &&
+ pipe_ctx->stream->sink &&
+ pipe_ctx->stream->sink == aconnector->dc_sink)
+ break;
}
- if (!pipe_ctx)
- return -ENXIO;
-
dsc = pipe_ctx->stream_res.dsc;
if (dsc)
dsc->funcs->dsc_read_state(dsc, &dsc_state);
- snprintf(rd_buf_ptr, str_len,
+ snprintf(rd_buf, str_len,
"%d\n",
dsc_state.dsc_slice_width);
- rd_buf_ptr += str_len;
while (size) {
if (*pos >= rd_buf_size)
break;
r = put_user(*(rd_buf + result), buf);
- if (r)
+ if (r) {
+ kfree(rd_buf);
return r; /* r = -EFAULT */
+ }
buf += 1;
size -= 1;
@@ -1599,12 +1857,14 @@ static ssize_t dp_dsc_slice_width_write(struct file *f, const char __user *buf,
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
- if (pipe_ctx && pipe_ctx->stream &&
- pipe_ctx->stream->link == aconnector->dc_link)
- break;
+ if (pipe_ctx->stream &&
+ pipe_ctx->stream->link == aconnector->dc_link &&
+ pipe_ctx->stream->sink &&
+ pipe_ctx->stream->sink == aconnector->dc_sink)
+ break;
}
- if (!pipe_ctx || !pipe_ctx->stream)
+ if (!pipe_ctx->stream)
goto done;
// Safely get CRTC state
@@ -1666,7 +1926,6 @@ static ssize_t dp_dsc_slice_height_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
char *rd_buf = NULL;
- char *rd_buf_ptr = NULL;
struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private;
struct display_stream_compressor *dsc;
struct dcn_dsc_state dsc_state = {0};
@@ -1680,34 +1939,32 @@ static ssize_t dp_dsc_slice_height_read(struct file *f, char __user *buf,
if (!rd_buf)
return -ENOMEM;
- rd_buf_ptr = rd_buf;
-
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
- if (pipe_ctx && pipe_ctx->stream &&
- pipe_ctx->stream->link == aconnector->dc_link)
- break;
+ if (pipe_ctx->stream &&
+ pipe_ctx->stream->link == aconnector->dc_link &&
+ pipe_ctx->stream->sink &&
+ pipe_ctx->stream->sink == aconnector->dc_sink)
+ break;
}
- if (!pipe_ctx)
- return -ENXIO;
-
dsc = pipe_ctx->stream_res.dsc;
if (dsc)
dsc->funcs->dsc_read_state(dsc, &dsc_state);
- snprintf(rd_buf_ptr, str_len,
+ snprintf(rd_buf, str_len,
"%d\n",
dsc_state.dsc_slice_height);
- rd_buf_ptr += str_len;
while (size) {
if (*pos >= rd_buf_size)
break;
r = put_user(*(rd_buf + result), buf);
- if (r)
+ if (r) {
+ kfree(rd_buf);
return r; /* r = -EFAULT */
+ }
buf += 1;
size -= 1;
@@ -1784,12 +2041,14 @@ static ssize_t dp_dsc_slice_height_write(struct file *f, const char __user *buf,
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
- if (pipe_ctx && pipe_ctx->stream &&
- pipe_ctx->stream->link == aconnector->dc_link)
- break;
+ if (pipe_ctx->stream &&
+ pipe_ctx->stream->link == aconnector->dc_link &&
+ pipe_ctx->stream->sink &&
+ pipe_ctx->stream->sink == aconnector->dc_sink)
+ break;
}
- if (!pipe_ctx || !pipe_ctx->stream)
+ if (!pipe_ctx->stream)
goto done;
// Get CRTC state
@@ -1847,7 +2106,6 @@ static ssize_t dp_dsc_bits_per_pixel_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
char *rd_buf = NULL;
- char *rd_buf_ptr = NULL;
struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private;
struct display_stream_compressor *dsc;
struct dcn_dsc_state dsc_state = {0};
@@ -1861,34 +2119,32 @@ static ssize_t dp_dsc_bits_per_pixel_read(struct file *f, char __user *buf,
if (!rd_buf)
return -ENOMEM;
- rd_buf_ptr = rd_buf;
-
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
- if (pipe_ctx && pipe_ctx->stream &&
- pipe_ctx->stream->link == aconnector->dc_link)
- break;
+ if (pipe_ctx->stream &&
+ pipe_ctx->stream->link == aconnector->dc_link &&
+ pipe_ctx->stream->sink &&
+ pipe_ctx->stream->sink == aconnector->dc_sink)
+ break;
}
- if (!pipe_ctx)
- return -ENXIO;
-
dsc = pipe_ctx->stream_res.dsc;
if (dsc)
dsc->funcs->dsc_read_state(dsc, &dsc_state);
- snprintf(rd_buf_ptr, str_len,
+ snprintf(rd_buf, str_len,
"%d\n",
dsc_state.dsc_bits_per_pixel);
- rd_buf_ptr += str_len;
while (size) {
if (*pos >= rd_buf_size)
break;
r = put_user(*(rd_buf + result), buf);
- if (r)
+ if (r) {
+ kfree(rd_buf);
return r; /* r = -EFAULT */
+ }
buf += 1;
size -= 1;
@@ -1962,12 +2218,14 @@ static ssize_t dp_dsc_bits_per_pixel_write(struct file *f, const char __user *bu
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
- if (pipe_ctx && pipe_ctx->stream &&
- pipe_ctx->stream->link == aconnector->dc_link)
- break;
+ if (pipe_ctx->stream &&
+ pipe_ctx->stream->link == aconnector->dc_link &&
+ pipe_ctx->stream->sink &&
+ pipe_ctx->stream->sink == aconnector->dc_sink)
+ break;
}
- if (!pipe_ctx || !pipe_ctx->stream)
+ if (!pipe_ctx->stream)
goto done;
// Get CRTC state
@@ -2023,7 +2281,6 @@ static ssize_t dp_dsc_pic_width_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
char *rd_buf = NULL;
- char *rd_buf_ptr = NULL;
struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private;
struct display_stream_compressor *dsc;
struct dcn_dsc_state dsc_state = {0};
@@ -2037,34 +2294,32 @@ static ssize_t dp_dsc_pic_width_read(struct file *f, char __user *buf,
if (!rd_buf)
return -ENOMEM;
- rd_buf_ptr = rd_buf;
-
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
- if (pipe_ctx && pipe_ctx->stream &&
- pipe_ctx->stream->link == aconnector->dc_link)
- break;
+ if (pipe_ctx->stream &&
+ pipe_ctx->stream->link == aconnector->dc_link &&
+ pipe_ctx->stream->sink &&
+ pipe_ctx->stream->sink == aconnector->dc_sink)
+ break;
}
- if (!pipe_ctx)
- return -ENXIO;
-
dsc = pipe_ctx->stream_res.dsc;
if (dsc)
dsc->funcs->dsc_read_state(dsc, &dsc_state);
- snprintf(rd_buf_ptr, str_len,
+ snprintf(rd_buf, str_len,
"%d\n",
dsc_state.dsc_pic_width);
- rd_buf_ptr += str_len;
while (size) {
if (*pos >= rd_buf_size)
break;
r = put_user(*(rd_buf + result), buf);
- if (r)
+ if (r) {
+ kfree(rd_buf);
return r; /* r = -EFAULT */
+ }
buf += 1;
size -= 1;
@@ -2080,7 +2335,6 @@ static ssize_t dp_dsc_pic_height_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
char *rd_buf = NULL;
- char *rd_buf_ptr = NULL;
struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private;
struct display_stream_compressor *dsc;
struct dcn_dsc_state dsc_state = {0};
@@ -2094,34 +2348,32 @@ static ssize_t dp_dsc_pic_height_read(struct file *f, char __user *buf,
if (!rd_buf)
return -ENOMEM;
- rd_buf_ptr = rd_buf;
-
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
- if (pipe_ctx && pipe_ctx->stream &&
- pipe_ctx->stream->link == aconnector->dc_link)
- break;
+ if (pipe_ctx->stream &&
+ pipe_ctx->stream->link == aconnector->dc_link &&
+ pipe_ctx->stream->sink &&
+ pipe_ctx->stream->sink == aconnector->dc_sink)
+ break;
}
- if (!pipe_ctx)
- return -ENXIO;
-
dsc = pipe_ctx->stream_res.dsc;
if (dsc)
dsc->funcs->dsc_read_state(dsc, &dsc_state);
- snprintf(rd_buf_ptr, str_len,
+ snprintf(rd_buf, str_len,
"%d\n",
dsc_state.dsc_pic_height);
- rd_buf_ptr += str_len;
while (size) {
if (*pos >= rd_buf_size)
break;
r = put_user(*(rd_buf + result), buf);
- if (r)
+ if (r) {
+ kfree(rd_buf);
return r; /* r = -EFAULT */
+ }
buf += 1;
size -= 1;
@@ -2152,7 +2404,6 @@ static ssize_t dp_dsc_chunk_size_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
char *rd_buf = NULL;
- char *rd_buf_ptr = NULL;
struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private;
struct display_stream_compressor *dsc;
struct dcn_dsc_state dsc_state = {0};
@@ -2166,34 +2417,32 @@ static ssize_t dp_dsc_chunk_size_read(struct file *f, char __user *buf,
if (!rd_buf)
return -ENOMEM;
- rd_buf_ptr = rd_buf;
-
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
- if (pipe_ctx && pipe_ctx->stream &&
- pipe_ctx->stream->link == aconnector->dc_link)
- break;
+ if (pipe_ctx->stream &&
+ pipe_ctx->stream->link == aconnector->dc_link &&
+ pipe_ctx->stream->sink &&
+ pipe_ctx->stream->sink == aconnector->dc_sink)
+ break;
}
- if (!pipe_ctx)
- return -ENXIO;
-
dsc = pipe_ctx->stream_res.dsc;
if (dsc)
dsc->funcs->dsc_read_state(dsc, &dsc_state);
- snprintf(rd_buf_ptr, str_len,
+ snprintf(rd_buf, str_len,
"%d\n",
dsc_state.dsc_chunk_size);
- rd_buf_ptr += str_len;
while (size) {
if (*pos >= rd_buf_size)
break;
r = put_user(*(rd_buf + result), buf);
- if (r)
+ if (r) {
+ kfree(rd_buf);
return r; /* r = -EFAULT */
+ }
buf += 1;
size -= 1;
@@ -2224,7 +2473,6 @@ static ssize_t dp_dsc_slice_bpg_offset_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
char *rd_buf = NULL;
- char *rd_buf_ptr = NULL;
struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private;
struct display_stream_compressor *dsc;
struct dcn_dsc_state dsc_state = {0};
@@ -2238,34 +2486,32 @@ static ssize_t dp_dsc_slice_bpg_offset_read(struct file *f, char __user *buf,
if (!rd_buf)
return -ENOMEM;
- rd_buf_ptr = rd_buf;
-
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
- if (pipe_ctx && pipe_ctx->stream &&
- pipe_ctx->stream->link == aconnector->dc_link)
- break;
+ if (pipe_ctx->stream &&
+ pipe_ctx->stream->link == aconnector->dc_link &&
+ pipe_ctx->stream->sink &&
+ pipe_ctx->stream->sink == aconnector->dc_sink)
+ break;
}
- if (!pipe_ctx)
- return -ENXIO;
-
dsc = pipe_ctx->stream_res.dsc;
if (dsc)
dsc->funcs->dsc_read_state(dsc, &dsc_state);
- snprintf(rd_buf_ptr, str_len,
+ snprintf(rd_buf, str_len,
"%d\n",
dsc_state.dsc_slice_bpg_offset);
- rd_buf_ptr += str_len;
while (size) {
if (*pos >= rd_buf_size)
break;
r = put_user(*(rd_buf + result), buf);
- if (r)
+ if (r) {
+ kfree(rd_buf);
return r; /* r = -EFAULT */
+ }
buf += 1;
size -= 1;
@@ -2421,6 +2667,49 @@ unlock:
}
/*
+ * IPS status. Read only.
+ *
+ * Example usage: cat /sys/kernel/debug/dri/0/amdgpu_dm_ips_status
+ */
+static int ips_status_show(struct seq_file *m, void *unused)
+{
+ struct amdgpu_device *adev = m->private;
+ struct dc *dc = adev->dm.dc;
+ struct dc_dmub_srv *dc_dmub_srv;
+
+ seq_printf(m, "IPS config: %d\n", dc->config.disable_ips);
+ seq_printf(m, "Idle optimization: %d\n", dc->idle_optimizations_allowed);
+
+ if (adev->dm.idle_workqueue) {
+ seq_printf(m, "Idle workqueue - enabled: %d\n", adev->dm.idle_workqueue->enable);
+ seq_printf(m, "Idle workqueue - running: %d\n", adev->dm.idle_workqueue->running);
+ }
+
+ dc_dmub_srv = dc->ctx->dmub_srv;
+ if (dc_dmub_srv && dc_dmub_srv->dmub) {
+ uint32_t rcg_count, ips1_count, ips2_count;
+ volatile const struct dmub_shared_state_ips_fw *ips_fw =
+ &dc_dmub_srv->dmub->shared_state[DMUB_SHARED_SHARE_FEATURE__IPS_FW].data.ips_fw;
+ rcg_count = ips_fw->rcg_entry_count;
+ ips1_count = ips_fw->ips1_entry_count;
+ ips2_count = ips_fw->ips2_entry_count;
+ seq_printf(m, "entry counts: rcg=%u ips1=%u ips2=%u\n",
+ rcg_count,
+ ips1_count,
+ ips2_count);
+ rcg_count = ips_fw->rcg_exit_count;
+ ips1_count = ips_fw->ips1_exit_count;
+ ips2_count = ips_fw->ips2_exit_count;
+ seq_printf(m, "exit counts: rcg=%u ips1=%u ips2=%u",
+ rcg_count,
+ ips1_count,
+ ips2_count);
+ seq_puts(m, "\n");
+ }
+ return 0;
+}
+
+/*
* Backlight at this moment. Read only.
* As written to display, taking ABM and backlight lut into account.
* Ranges from 0x0 to 0x10000 (= 100% PWM)
@@ -2458,15 +2747,185 @@ static int target_backlight_show(struct seq_file *m, void *unused)
return 0;
}
+/*
+ * function description: Determine if the connector is mst connector
+ *
+ * This function helps to determine whether a connector is a mst connector.
+ * - "root" stands for the root connector of the topology
+ * - "branch" stands for branch device of the topology
+ * - "end" stands for leaf node connector of the topology
+ * - "no" stands for the connector is not a device of a mst topology
+ * Access it with the following command:
+ *
+ * cat /sys/kernel/debug/dri/0/DP-X/is_mst_connector
+ *
+ */
+static int dp_is_mst_connector_show(struct seq_file *m, void *unused)
+{
+ struct drm_connector *connector = m->private;
+ struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+ struct drm_dp_mst_topology_mgr *mgr = NULL;
+ struct drm_dp_mst_port *port = NULL;
+ char *role = NULL;
+
+ mutex_lock(&aconnector->hpd_lock);
+
+ if (aconnector->mst_mgr.mst_state) {
+ role = "root";
+ } else if (aconnector->mst_root &&
+ aconnector->mst_root->mst_mgr.mst_state) {
+
+ role = "end";
+
+ mgr = &aconnector->mst_root->mst_mgr;
+ port = aconnector->mst_output_port;
+
+ drm_modeset_lock(&mgr->base.lock, NULL);
+ if (port->pdt == DP_PEER_DEVICE_MST_BRANCHING &&
+ port->mcs)
+ role = "branch";
+ drm_modeset_unlock(&mgr->base.lock);
+
+ } else {
+ role = "no";
+ }
+
+ seq_printf(m, "%s\n", role);
+
+ mutex_unlock(&aconnector->hpd_lock);
+
+ return 0;
+}
+
+/*
+ * function description: Read out the mst progress status
+ *
+ * This function helps to determine the mst progress status of
+ * a mst connector.
+ *
+ * Access it with the following command:
+ *
+ * cat /sys/kernel/debug/dri/0/DP-X/mst_progress_status
+ *
+ */
+static int dp_mst_progress_status_show(struct seq_file *m, void *unused)
+{
+ struct drm_connector *connector = m->private;
+ struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+ struct amdgpu_device *adev = drm_to_adev(connector->dev);
+ int i;
+
+ mutex_lock(&aconnector->hpd_lock);
+ mutex_lock(&adev->dm.dc_lock);
+
+ if (aconnector->mst_status == MST_STATUS_DEFAULT) {
+ seq_puts(m, "disabled\n");
+ } else {
+ for (i = 0; i < sizeof(mst_progress_status)/sizeof(char *); i++)
+ seq_printf(m, "%s:%s\n",
+ mst_progress_status[i],
+ aconnector->mst_status & BIT(i) ? "done" : "not_done");
+ }
+
+ mutex_unlock(&adev->dm.dc_lock);
+ mutex_unlock(&aconnector->hpd_lock);
+
+ return 0;
+}
+
+/*
+ * Reports whether the connected display is a USB4 DPIA tunneled display
+ * Example usage: cat /sys/kernel/debug/dri/0/DP-8/is_dpia_link
+ */
+static int is_dpia_link_show(struct seq_file *m, void *data)
+{
+ struct drm_connector *connector = m->private;
+ struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+ struct dc_link *link = aconnector->dc_link;
+
+ if (connector->status != connector_status_connected)
+ return -ENODEV;
+
+ seq_printf(m, "%s\n", (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) ? "yes" :
+ (link->ep_type == DISPLAY_ENDPOINT_PHY) ? "no" : "unknown");
+
+ return 0;
+}
+
+/**
+ * hdmi_cec_state_show - Read out the HDMI-CEC feature status
+ * @m: sequence file.
+ * @data: unused.
+ *
+ * Return 0 on success
+ */
+static int hdmi_cec_state_show(struct seq_file *m, void *data)
+{
+ struct drm_connector *connector = m->private;
+ struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+
+ seq_printf(m, "%s:%d\n", connector->name, connector->base.id);
+ seq_printf(m, "HDMI-CEC status: %d\n", aconnector->notifier ? 1 : 0);
+
+ return 0;
+}
+
+/**
+ * hdmi_cec_state_write - Enable/Disable HDMI-CEC feature from driver side
+ * @f: file structure.
+ * @buf: userspace buffer. set to '1' to enable; '0' to disable cec feature.
+ * @size: size of buffer from userpsace.
+ * @pos: unused.
+ *
+ * Return size on success, error code on failure
+ */
+static ssize_t hdmi_cec_state_write(struct file *f, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ int ret;
+ bool enable;
+ struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private;
+ struct drm_device *ddev = aconnector->base.dev;
+
+ if (size == 0)
+ return -EINVAL;
+
+ ret = kstrtobool_from_user(buf, size, &enable);
+ if (ret) {
+ drm_dbg_driver(ddev, "invalid user data !\n");
+ return ret;
+ }
+
+ if (enable) {
+ if (aconnector->notifier)
+ return -EINVAL;
+ ret = amdgpu_dm_initialize_hdmi_connector(aconnector);
+ if (ret)
+ return ret;
+ hdmi_cec_set_edid(aconnector);
+ } else {
+ if (!aconnector->notifier)
+ return -EINVAL;
+ cec_notifier_conn_unregister(aconnector->notifier);
+ aconnector->notifier = NULL;
+ }
+
+ return size;
+}
+
DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support);
DEFINE_SHOW_ATTRIBUTE(dmub_fw_state);
DEFINE_SHOW_ATTRIBUTE(dmub_tracebuffer);
-DEFINE_SHOW_ATTRIBUTE(output_bpc);
DEFINE_SHOW_ATTRIBUTE(dp_lttpr_status);
-#ifdef CONFIG_DRM_AMD_DC_HDCP
DEFINE_SHOW_ATTRIBUTE(hdcp_sink_capability);
-#endif
DEFINE_SHOW_ATTRIBUTE(internal_display);
+DEFINE_SHOW_ATTRIBUTE(odm_combine_segments);
+DEFINE_SHOW_ATTRIBUTE(replay_capability);
+DEFINE_SHOW_ATTRIBUTE(psr_capability);
+DEFINE_SHOW_ATTRIBUTE(dp_is_mst_connector);
+DEFINE_SHOW_ATTRIBUTE(dp_mst_progress_status);
+DEFINE_SHOW_ATTRIBUTE(is_dpia_link);
+DEFINE_SHOW_STORE_ATTRIBUTE(hdmi_cec_state);
static const struct file_operations dp_dsc_clock_en_debugfs_fops = {
.owner = THIS_MODULE,
@@ -2552,25 +3011,6 @@ static const struct file_operations sdp_message_fops = {
.llseek = default_llseek
};
-static const struct file_operations dp_dpcd_address_debugfs_fops = {
- .owner = THIS_MODULE,
- .write = dp_dpcd_address_write,
- .llseek = default_llseek
-};
-
-static const struct file_operations dp_dpcd_size_debugfs_fops = {
- .owner = THIS_MODULE,
- .write = dp_dpcd_size_write,
- .llseek = default_llseek
-};
-
-static const struct file_operations dp_dpcd_data_debugfs_fops = {
- .owner = THIS_MODULE,
- .read = dp_dpcd_data_read,
- .write = dp_dpcd_data_write,
- .llseek = default_llseek
-};
-
static const struct file_operations dp_max_bpc_debugfs_fops = {
.owner = THIS_MODULE,
.read = dp_max_bpc_read,
@@ -2584,6 +3024,12 @@ static const struct file_operations dp_dsc_disable_passthrough_debugfs_fops = {
.llseek = default_llseek
};
+static const struct file_operations dp_mst_link_settings_debugfs_fops = {
+ .owner = THIS_MODULE,
+ .write = dp_mst_link_setting,
+ .llseek = default_llseek
+};
+
static const struct {
char *name;
const struct file_operations *fops;
@@ -2592,13 +3038,8 @@ static const struct {
{"phy_settings", &dp_phy_settings_debugfs_fop},
{"lttpr_status", &dp_lttpr_status_fops},
{"test_pattern", &dp_phy_test_pattern_fops},
-#ifdef CONFIG_DRM_AMD_DC_HDCP
{"hdcp_sink_capability", &hdcp_sink_capability_fops},
-#endif
{"sdp_message", &sdp_message_fops},
- {"aux_dpcd_address", &dp_dpcd_address_debugfs_fops},
- {"aux_dpcd_size", &dp_dpcd_size_debugfs_fops},
- {"aux_dpcd_data", &dp_dpcd_data_debugfs_fops},
{"dsc_clock_en", &dp_dsc_clock_en_debugfs_fops},
{"dsc_slice_width", &dp_dsc_slice_width_debugfs_fops},
{"dsc_slice_height", &dp_dsc_slice_height_debugfs_fops},
@@ -2610,16 +3051,20 @@ static const struct {
{"dp_dsc_fec_support", &dp_dsc_fec_support_fops},
{"max_bpc", &dp_max_bpc_debugfs_fops},
{"dsc_disable_passthrough", &dp_dsc_disable_passthrough_debugfs_fops},
+ {"is_mst_connector", &dp_is_mst_connector_fops},
+ {"mst_progress_status", &dp_mst_progress_status_fops},
+ {"is_dpia_link", &is_dpia_link_fops},
+ {"mst_link_settings", &dp_mst_link_settings_debugfs_fops}
};
-#ifdef CONFIG_DRM_AMD_DC_HDCP
static const struct {
char *name;
const struct file_operations *fops;
} hdmi_debugfs_entries[] = {
- {"hdcp_sink_capability", &hdcp_sink_capability_fops}
+ {"hdcp_sink_capability", &hdcp_sink_capability_fops},
+ {"hdmi_cec_state", &hdmi_cec_state_fops}
};
-#endif
+
/*
* Force YUV420 output if available from the given mode
*/
@@ -2648,6 +3093,51 @@ DEFINE_DEBUGFS_ATTRIBUTE(force_yuv420_output_fops, force_yuv420_output_get,
force_yuv420_output_set, "%llu\n");
/*
+ * Read Replay state
+ */
+static int replay_get_state(void *data, u64 *val)
+{
+ struct amdgpu_dm_connector *connector = data;
+ struct dc_link *link = connector->dc_link;
+ uint64_t state = REPLAY_STATE_INVALID;
+
+ dc_link_get_replay_state(link, &state);
+
+ *val = state;
+
+ return 0;
+}
+
+/*
+ * Start / Stop capture Replay residency
+ */
+static int replay_set_residency(void *data, u64 val)
+{
+ struct amdgpu_dm_connector *connector = data;
+ struct dc_link *link = connector->dc_link;
+ bool is_start = (val != 0);
+ u32 residency = 0;
+
+ link->dc->link_srv->edp_replay_residency(link, &residency, is_start, PR_RESIDENCY_MODE_PHY);
+ return 0;
+}
+
+/*
+ * Read Replay residency
+ */
+static int replay_get_residency(void *data, u64 *val)
+{
+ struct amdgpu_dm_connector *connector = data;
+ struct dc_link *link = connector->dc_link;
+ u32 residency = 0;
+
+ link->dc->link_srv->edp_replay_residency(link, &residency, false, PR_RESIDENCY_MODE_PHY);
+ *val = (u64)residency;
+
+ return 0;
+}
+
+/*
* Read PSR state
*/
static int psr_get(void *data, u64 *val)
@@ -2664,6 +3154,174 @@ static int psr_get(void *data, u64 *val)
}
/*
+ * Read PSR state residency
+ */
+static int psr_read_residency(void *data, u64 *val)
+{
+ struct amdgpu_dm_connector *connector = data;
+ struct dc_link *link = connector->dc_link;
+ u32 residency = 0;
+
+ link->dc->link_srv->edp_get_psr_residency(link, &residency, PSR_RESIDENCY_MODE_PHY);
+
+ *val = (u64)residency;
+
+ return 0;
+}
+
+/* read allow_edp_hotplug_detection */
+static int allow_edp_hotplug_detection_get(void *data, u64 *val)
+{
+ struct amdgpu_dm_connector *aconnector = data;
+ struct drm_connector *connector = &aconnector->base;
+ struct drm_device *dev = connector->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ *val = adev->dm.dc->config.allow_edp_hotplug_detection;
+
+ return 0;
+}
+
+/* set allow_edp_hotplug_detection */
+static int allow_edp_hotplug_detection_set(void *data, u64 val)
+{
+ struct amdgpu_dm_connector *aconnector = data;
+ struct drm_connector *connector = &aconnector->base;
+ struct drm_device *dev = connector->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ adev->dm.dc->config.allow_edp_hotplug_detection = (uint32_t) val;
+
+ return 0;
+}
+
+/* check if kernel disallow eDP enter psr state
+ * cat /sys/kernel/debug/dri/0/eDP-X/disallow_edp_enter_psr
+ * 0: allow edp enter psr; 1: disallow
+ */
+static int disallow_edp_enter_psr_get(void *data, u64 *val)
+{
+ struct amdgpu_dm_connector *aconnector = data;
+
+ *val = (u64) aconnector->disallow_edp_enter_psr;
+ return 0;
+}
+
+/* set kernel disallow eDP enter psr state
+ * echo 0x0 /sys/kernel/debug/dri/0/eDP-X/disallow_edp_enter_psr
+ * 0: allow edp enter psr; 1: disallow
+ *
+ * usage: test app read crc from PSR eDP rx.
+ *
+ * during kernel boot up, kernel write dpcd 0x170 = 5.
+ * this notify eDP rx psr enable and let rx check crc.
+ * rx fw will start checking crc for rx internal logic.
+ * crc read count within dpcd 0x246 is not updated and
+ * value is 0. when eDP tx driver wants to read rx crc
+ * from dpcd 0x246, 0x270, read count 0 lead tx driver
+ * timeout.
+ *
+ * to avoid this, we add this debugfs to let test app to disbable
+ * rx crc checking for rx internal logic. then test app can read
+ * non-zero crc read count.
+ *
+ * expected app sequence is as below:
+ * 1. disable eDP PHY and notify eDP rx with dpcd 0x600 = 2.
+ * 2. echo 0x1 /sys/kernel/debug/dri/0/eDP-X/disallow_edp_enter_psr
+ * 3. enable eDP PHY and notify eDP rx with dpcd 0x600 = 1 but
+ * without dpcd 0x170 = 5.
+ * 4. read crc from rx dpcd 0x270, 0x246, etc.
+ * 5. echo 0x0 /sys/kernel/debug/dri/0/eDP-X/disallow_edp_enter_psr.
+ * this will let eDP back to normal with psr setup dpcd 0x170 = 5.
+ */
+static int disallow_edp_enter_psr_set(void *data, u64 val)
+{
+ struct amdgpu_dm_connector *aconnector = data;
+
+ aconnector->disallow_edp_enter_psr = val ? true : false;
+ return 0;
+}
+
+static int dmub_trace_mask_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = data;
+ struct dmub_srv *srv = adev->dm.dc->ctx->dmub_srv->dmub;
+ enum dmub_gpint_command cmd;
+ u64 mask = 0xffff;
+ u8 shift = 0;
+ u32 res;
+ int i;
+
+ if (!srv->fw_version)
+ return -EINVAL;
+
+ for (i = 0; i < 4; i++) {
+ res = (val & mask) >> shift;
+
+ switch (i) {
+ case 0:
+ cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD0;
+ break;
+ case 1:
+ cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1;
+ break;
+ case 2:
+ cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD2;
+ break;
+ case 3:
+ cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD3;
+ break;
+ }
+
+ if (!dc_wake_and_execute_gpint(adev->dm.dc->ctx, cmd, res, NULL, DM_DMUB_WAIT_TYPE_WAIT))
+ return -EIO;
+
+ usleep_range(100, 1000);
+
+ mask <<= 16;
+ shift += 16;
+ }
+
+ return 0;
+}
+
+static int dmub_trace_mask_show(void *data, u64 *val)
+{
+ enum dmub_gpint_command cmd = DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD0;
+ struct amdgpu_device *adev = data;
+ struct dmub_srv *srv = adev->dm.dc->ctx->dmub_srv->dmub;
+ u8 shift = 0;
+ u64 raw = 0;
+ u64 res = 0;
+ int i = 0;
+
+ if (!srv->fw_version)
+ return -EINVAL;
+
+ while (i < 4) {
+ uint32_t response;
+
+ if (!dc_wake_and_execute_gpint(adev->dm.dc->ctx, cmd, 0, &response, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
+ return -EIO;
+
+ raw = response;
+ usleep_range(100, 1000);
+
+ cmd++;
+ res |= (raw << shift);
+ shift += 16;
+ i++;
+ }
+
+ *val = res;
+
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(dmub_trace_mask_fops, dmub_trace_mask_show,
+ dmub_trace_mask_set, "0x%llx\n");
+
+/*
* Set dmcub trace event IRQ enable or disable.
* Usage to enable dmcub trace event IRQ: echo 1 > /sys/kernel/debug/dri/0/amdgpu_dm_dmcub_trace_event_en
* Usage to disable dmcub trace event IRQ: echo 0 > /sys/kernel/debug/dri/0/amdgpu_dm_dmcub_trace_event_en
@@ -2697,19 +3355,167 @@ static int dmcub_trace_event_state_get(void *data, u64 *val)
DEFINE_DEBUGFS_ATTRIBUTE(dmcub_trace_event_state_fops, dmcub_trace_event_state_get,
dmcub_trace_event_state_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(replay_state_fops, replay_get_state, NULL, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(replay_residency_fops, replay_get_residency, replay_set_residency,
+ "%llu\n");
DEFINE_DEBUGFS_ATTRIBUTE(psr_fops, psr_get, NULL, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(psr_residency_fops, psr_read_residency, NULL,
+ "%llu\n");
+
+DEFINE_DEBUGFS_ATTRIBUTE(allow_edp_hotplug_detection_fops,
+ allow_edp_hotplug_detection_get,
+ allow_edp_hotplug_detection_set, "%llu\n");
+
+DEFINE_DEBUGFS_ATTRIBUTE(disallow_edp_enter_psr_fops,
+ disallow_edp_enter_psr_get,
+ disallow_edp_enter_psr_set, "%llu\n");
DEFINE_SHOW_ATTRIBUTE(current_backlight);
DEFINE_SHOW_ATTRIBUTE(target_backlight);
+DEFINE_SHOW_ATTRIBUTE(ips_status);
static const struct {
char *name;
const struct file_operations *fops;
} connector_debugfs_entries[] = {
{"force_yuv420_output", &force_yuv420_output_fops},
- {"output_bpc", &output_bpc_fops},
{"trigger_hotplug", &trigger_hotplug_debugfs_fops},
- {"internal_display", &internal_display_fops}
+ {"internal_display", &internal_display_fops},
+ {"odm_combine_segments", &odm_combine_segments_fops}
+};
+
+/*
+ * Returns supported customized link rates by this eDP panel.
+ * Example usage: cat /sys/kernel/debug/dri/0/eDP-x/ilr_setting
+ */
+static int edp_ilr_show(struct seq_file *m, void *unused)
+{
+ struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(m->private);
+ struct dc_link *link = aconnector->dc_link;
+ uint8_t supported_link_rates[16];
+ uint32_t link_rate_in_khz;
+ uint32_t entry = 0;
+ uint8_t dpcd_rev;
+
+ memset(supported_link_rates, 0, sizeof(supported_link_rates));
+ dm_helpers_dp_read_dpcd(link->ctx, link, DP_SUPPORTED_LINK_RATES,
+ supported_link_rates, sizeof(supported_link_rates));
+
+ dpcd_rev = link->dpcd_caps.dpcd_rev.raw;
+
+ if (dpcd_rev >= DP_DPCD_REV_13 &&
+ (supported_link_rates[entry+1] != 0 || supported_link_rates[entry] != 0)) {
+
+ for (entry = 0; entry < 16; entry += 2) {
+ link_rate_in_khz = (supported_link_rates[entry+1] * 0x100 +
+ supported_link_rates[entry]) * 200;
+ seq_printf(m, "[%d] %d kHz\n", entry/2, link_rate_in_khz);
+ }
+ } else {
+ seq_puts(m, "ILR is not supported by this eDP panel.\n");
+ }
+
+ return 0;
+}
+
+/*
+ * Set supported customized link rate to eDP panel.
+ *
+ * echo <lane_count> <link_rate option> > ilr_setting
+ *
+ * for example, supported ILR : [0] 1620000 kHz [1] 2160000 kHz [2] 2430000 kHz ...
+ * echo 4 1 > /sys/kernel/debug/dri/0/eDP-x/ilr_setting
+ * to set 4 lanes and 2.16 GHz
+ */
+static ssize_t edp_ilr_write(struct file *f, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_dm_connector *connector = file_inode(f)->i_private;
+ struct dc_link *link = connector->dc_link;
+ struct amdgpu_device *adev = drm_to_adev(connector->base.dev);
+ struct dc *dc = (struct dc *)link->dc;
+ struct dc_link_settings prefer_link_settings;
+ char *wr_buf = NULL;
+ const uint32_t wr_buf_size = 40;
+ /* 0: lane_count; 1: link_rate */
+ int max_param_num = 2;
+ uint8_t param_nums = 0;
+ long param[2];
+ bool valid_input = true;
+
+ if (size == 0)
+ return -EINVAL;
+
+ wr_buf = kcalloc(wr_buf_size, sizeof(char), GFP_KERNEL);
+ if (!wr_buf)
+ return -ENOMEM;
+
+ if (parse_write_buffer_into_params(wr_buf, wr_buf_size,
+ (long *)param, buf,
+ max_param_num,
+ &param_nums)) {
+ kfree(wr_buf);
+ return -EINVAL;
+ }
+
+ if (param_nums <= 0) {
+ kfree(wr_buf);
+ return -EINVAL;
+ }
+
+ switch (param[0]) {
+ case LANE_COUNT_ONE:
+ case LANE_COUNT_TWO:
+ case LANE_COUNT_FOUR:
+ break;
+ default:
+ valid_input = false;
+ break;
+ }
+
+ if (param[1] >= link->dpcd_caps.edp_supported_link_rates_count)
+ valid_input = false;
+
+ if (!valid_input) {
+ kfree(wr_buf);
+ DRM_DEBUG_DRIVER("Invalid Input value. No HW will be programmed\n");
+ prefer_link_settings.use_link_rate_set = false;
+ mutex_lock(&adev->dm.dc_lock);
+ dc_link_set_preferred_training_settings(dc, NULL, NULL, link, false);
+ mutex_unlock(&adev->dm.dc_lock);
+ return size;
+ }
+
+ /* save user force lane_count, link_rate to preferred settings
+ * spread spectrum will not be changed
+ */
+ prefer_link_settings.link_spread = link->cur_link_settings.link_spread;
+ prefer_link_settings.lane_count = param[0];
+ prefer_link_settings.use_link_rate_set = true;
+ prefer_link_settings.link_rate_set = param[1];
+ prefer_link_settings.link_rate = link->dpcd_caps.edp_supported_link_rates[param[1]];
+
+ mutex_lock(&adev->dm.dc_lock);
+ dc_link_set_preferred_training_settings(dc, &prefer_link_settings,
+ NULL, link, false);
+ mutex_unlock(&adev->dm.dc_lock);
+
+ kfree(wr_buf);
+ return size;
+}
+
+static int edp_ilr_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, edp_ilr_show, inode->i_private);
+}
+
+static const struct file_operations edp_ilr_debugfs_fops = {
+ .owner = THIS_MODULE,
+ .open = edp_ilr_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .write = edp_ilr_write
};
void connector_debugfs_init(struct amdgpu_dm_connector *connector)
@@ -2726,11 +3532,25 @@ void connector_debugfs_init(struct amdgpu_dm_connector *connector)
}
}
if (connector->base.connector_type == DRM_MODE_CONNECTOR_eDP) {
+ debugfs_create_file("replay_capability", 0444, dir, connector,
+ &replay_capability_fops);
+ debugfs_create_file("replay_state", 0444, dir, connector, &replay_state_fops);
+ debugfs_create_file_unsafe("replay_residency", 0444, dir,
+ connector, &replay_residency_fops);
+ debugfs_create_file_unsafe("psr_capability", 0444, dir, connector, &psr_capability_fops);
debugfs_create_file_unsafe("psr_state", 0444, dir, connector, &psr_fops);
+ debugfs_create_file_unsafe("psr_residency", 0444, dir,
+ connector, &psr_residency_fops);
debugfs_create_file("amdgpu_current_backlight_pwm", 0444, dir, connector,
&current_backlight_fops);
debugfs_create_file("amdgpu_target_backlight_pwm", 0444, dir, connector,
&target_backlight_fops);
+ debugfs_create_file("ilr_setting", 0644, dir, connector,
+ &edp_ilr_debugfs_fops);
+ debugfs_create_file("allow_edp_hotplug_detection", 0644, dir, connector,
+ &allow_edp_hotplug_detection_fops);
+ debugfs_create_file("disallow_edp_enter_psr", 0644, dir, connector,
+ &disallow_edp_enter_psr_fops);
}
for (i = 0; i < ARRAY_SIZE(connector_debugfs_entries); i++) {
@@ -2739,10 +3559,6 @@ void connector_debugfs_init(struct amdgpu_dm_connector *connector)
connector_debugfs_entries[i].fops);
}
- connector->debugfs_dpcd_address = 0;
- connector->debugfs_dpcd_size = 0;
-
-#ifdef CONFIG_DRM_AMD_DC_HDCP
if (connector->base.connector_type == DRM_MODE_CONNECTOR_HDMIA) {
for (i = 0; i < ARRAY_SIZE(hdmi_debugfs_entries); i++) {
debugfs_create_file(hdmi_debugfs_entries[i].name,
@@ -2750,7 +3566,6 @@ void connector_debugfs_init(struct amdgpu_dm_connector *connector)
hdmi_debugfs_entries[i].fops);
}
}
-#endif
}
#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
@@ -2764,8 +3579,8 @@ static int crc_win_x_start_set(void *data, u64 val)
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
spin_lock_irq(&drm_dev->event_lock);
- acrtc->dm_irq_params.crc_window.x_start = (uint16_t) val;
- acrtc->dm_irq_params.crc_window.update_win = false;
+ acrtc->dm_irq_params.window_param[0].x_start = (uint16_t) val;
+ acrtc->dm_irq_params.window_param[0].update_win = false;
spin_unlock_irq(&drm_dev->event_lock);
return 0;
@@ -2781,7 +3596,7 @@ static int crc_win_x_start_get(void *data, u64 *val)
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
spin_lock_irq(&drm_dev->event_lock);
- *val = acrtc->dm_irq_params.crc_window.x_start;
+ *val = acrtc->dm_irq_params.window_param[0].x_start;
spin_unlock_irq(&drm_dev->event_lock);
return 0;
@@ -2801,8 +3616,8 @@ static int crc_win_y_start_set(void *data, u64 val)
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
spin_lock_irq(&drm_dev->event_lock);
- acrtc->dm_irq_params.crc_window.y_start = (uint16_t) val;
- acrtc->dm_irq_params.crc_window.update_win = false;
+ acrtc->dm_irq_params.window_param[0].y_start = (uint16_t) val;
+ acrtc->dm_irq_params.window_param[0].update_win = false;
spin_unlock_irq(&drm_dev->event_lock);
return 0;
@@ -2818,7 +3633,7 @@ static int crc_win_y_start_get(void *data, u64 *val)
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
spin_lock_irq(&drm_dev->event_lock);
- *val = acrtc->dm_irq_params.crc_window.y_start;
+ *val = acrtc->dm_irq_params.window_param[0].y_start;
spin_unlock_irq(&drm_dev->event_lock);
return 0;
@@ -2837,8 +3652,8 @@ static int crc_win_x_end_set(void *data, u64 val)
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
spin_lock_irq(&drm_dev->event_lock);
- acrtc->dm_irq_params.crc_window.x_end = (uint16_t) val;
- acrtc->dm_irq_params.crc_window.update_win = false;
+ acrtc->dm_irq_params.window_param[0].x_end = (uint16_t) val;
+ acrtc->dm_irq_params.window_param[0].update_win = false;
spin_unlock_irq(&drm_dev->event_lock);
return 0;
@@ -2854,7 +3669,7 @@ static int crc_win_x_end_get(void *data, u64 *val)
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
spin_lock_irq(&drm_dev->event_lock);
- *val = acrtc->dm_irq_params.crc_window.x_end;
+ *val = acrtc->dm_irq_params.window_param[0].x_end;
spin_unlock_irq(&drm_dev->event_lock);
return 0;
@@ -2873,8 +3688,8 @@ static int crc_win_y_end_set(void *data, u64 val)
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
spin_lock_irq(&drm_dev->event_lock);
- acrtc->dm_irq_params.crc_window.y_end = (uint16_t) val;
- acrtc->dm_irq_params.crc_window.update_win = false;
+ acrtc->dm_irq_params.window_param[0].y_end = (uint16_t) val;
+ acrtc->dm_irq_params.window_param[0].update_win = false;
spin_unlock_irq(&drm_dev->event_lock);
return 0;
@@ -2890,7 +3705,7 @@ static int crc_win_y_end_get(void *data, u64 *val)
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
spin_lock_irq(&drm_dev->event_lock);
- *val = acrtc->dm_irq_params.crc_window.y_end;
+ *val = acrtc->dm_irq_params.window_param[0].y_end;
spin_unlock_irq(&drm_dev->event_lock);
return 0;
@@ -2903,38 +3718,27 @@ DEFINE_DEBUGFS_ATTRIBUTE(crc_win_y_end_fops, crc_win_y_end_get,
*/
static int crc_win_update_set(void *data, u64 val)
{
- struct drm_crtc *new_crtc = data;
- struct drm_crtc *old_crtc = NULL;
- struct amdgpu_crtc *new_acrtc, *old_acrtc;
- struct amdgpu_device *adev = drm_to_adev(new_crtc->dev);
- struct crc_rd_work *crc_rd_wrk = adev->dm.crc_rd_wrk;
+ struct drm_crtc *crtc = data;
+ struct amdgpu_crtc *acrtc;
+ struct amdgpu_device *adev = drm_to_adev(crtc->dev);
if (val) {
+ acrtc = to_amdgpu_crtc(crtc);
+ mutex_lock(&adev->dm.dc_lock);
+ /* PSR may write to OTG CRC window control register,
+ * so close it before starting secure_display.
+ */
+ amdgpu_dm_psr_disable(acrtc->dm_irq_params.stream, true);
+
spin_lock_irq(&adev_to_drm(adev)->event_lock);
- spin_lock_irq(&crc_rd_wrk->crc_rd_work_lock);
- if (crc_rd_wrk && crc_rd_wrk->crtc) {
- old_crtc = crc_rd_wrk->crtc;
- old_acrtc = to_amdgpu_crtc(old_crtc);
- }
- new_acrtc = to_amdgpu_crtc(new_crtc);
- if (old_crtc && old_crtc != new_crtc) {
- old_acrtc->dm_irq_params.crc_window.activated = false;
- old_acrtc->dm_irq_params.crc_window.update_win = false;
- old_acrtc->dm_irq_params.crc_window.skip_frame_cnt = 0;
+ acrtc->dm_irq_params.window_param[0].enable = true;
+ acrtc->dm_irq_params.window_param[0].update_win = true;
+ acrtc->dm_irq_params.window_param[0].skip_frame_cnt = 0;
+ acrtc->dm_irq_params.crc_window_activated = true;
- new_acrtc->dm_irq_params.crc_window.activated = true;
- new_acrtc->dm_irq_params.crc_window.update_win = true;
- new_acrtc->dm_irq_params.crc_window.skip_frame_cnt = 0;
- crc_rd_wrk->crtc = new_crtc;
- } else {
- new_acrtc->dm_irq_params.crc_window.activated = true;
- new_acrtc->dm_irq_params.crc_window.update_win = true;
- new_acrtc->dm_irq_params.crc_window.skip_frame_cnt = 0;
- crc_rd_wrk->crtc = new_crtc;
- }
- spin_unlock_irq(&crc_rd_wrk->crc_rd_work_lock);
spin_unlock_irq(&adev_to_drm(adev)->event_lock);
+ mutex_unlock(&adev->dm.dc_lock);
}
return 0;
@@ -2951,9 +3755,10 @@ static int crc_win_update_get(void *data, u64 *val)
DEFINE_DEBUGFS_ATTRIBUTE(crc_win_update_fops, crc_win_update_get,
crc_win_update_set, "%llu\n");
-
+#endif
void crtc_debugfs_init(struct drm_crtc *crtc)
{
+#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
struct dentry *dir = debugfs_lookup("crc", crtc->debugfs_entry);
if (!dir)
@@ -2969,9 +3774,14 @@ void crtc_debugfs_init(struct drm_crtc *crtc)
&crc_win_y_end_fops);
debugfs_create_file_unsafe("crc_win_update", 0644, dir, crtc,
&crc_win_update_fops);
-
-}
+ dput(dir);
#endif
+ debugfs_create_file("amdgpu_current_bpc", 0644, crtc->debugfs_entry,
+ crtc, &amdgpu_current_bpc_fops);
+ debugfs_create_file("amdgpu_current_colorspace", 0644, crtc->debugfs_entry,
+ crtc, &amdgpu_current_colorspace_fops);
+}
+
/*
* Writes DTN log state to the user supplied buffer.
* Example usage: cat /sys/kernel/debug/dri/0/amdgpu_dm_dtn_log
@@ -3076,6 +3886,7 @@ static int trigger_hpd_mst_set(void *data, u64 val)
struct amdgpu_dm_connector *aconnector;
struct drm_connector *connector;
struct dc_link *link = NULL;
+ int ret;
if (val == 1) {
drm_connector_list_iter_begin(dev, &iter);
@@ -3083,8 +3894,16 @@ static int trigger_hpd_mst_set(void *data, u64 val)
aconnector = to_amdgpu_dm_connector(connector);
if (aconnector->dc_link->type == dc_connection_mst_branch &&
aconnector->mst_mgr.aux) {
- dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD);
- drm_dp_mst_topology_mgr_set_mst(&aconnector->mst_mgr, true);
+ mutex_lock(&adev->dm.dc_lock);
+ ret = dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD);
+ mutex_unlock(&adev->dm.dc_lock);
+
+ if (!ret)
+ DRM_ERROR("DM_MST: Failed to detect dc link!");
+
+ ret = drm_dp_mst_topology_mgr_set_mst(&aconnector->mst_mgr, true);
+ if (ret < 0)
+ DRM_ERROR("DM_MST: Failed to set the device into MST mode!");
}
}
} else if (val == 0) {
@@ -3094,12 +3913,12 @@ static int trigger_hpd_mst_set(void *data, u64 val)
if (!aconnector->dc_link)
continue;
- if (!aconnector->mst_port)
+ if (!aconnector->mst_root)
continue;
link = aconnector->dc_link;
- dp_receiver_power_ctrl(link, false);
- drm_dp_mst_topology_mgr_set_mst(&aconnector->mst_port->mst_mgr, false);
+ dc_link_dp_receiver_power_ctrl(link, false);
+ drm_dp_mst_topology_mgr_set_mst(&aconnector->mst_root->mst_mgr, false);
link->mst_stream_alloc_table.stream_count = 0;
memset(link->mst_stream_alloc_table.stream_allocations, 0,
sizeof(link->mst_stream_alloc_table.stream_allocations));
@@ -3191,6 +4010,84 @@ DEFINE_DEBUGFS_ATTRIBUTE(disable_hpd_ops, disable_hpd_get,
disable_hpd_set, "%llu\n");
/*
+ * Prints hardware capabilities. These are used for IGT testing.
+ */
+static int capabilities_show(struct seq_file *m, void *unused)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+ struct dc *dc = adev->dm.dc;
+ bool mall_supported = dc->caps.mall_size_total;
+ bool subvp_supported = dc->caps.subvp_fw_processing_delay_us;
+ unsigned int mall_in_use = false;
+ unsigned int subvp_in_use = false;
+
+ struct hubbub *hubbub = dc->res_pool->hubbub;
+
+ if (hubbub && hubbub->funcs->get_mall_en)
+ hubbub->funcs->get_mall_en(hubbub, &mall_in_use);
+
+ if (dc->cap_funcs.get_subvp_en)
+ subvp_in_use = dc->cap_funcs.get_subvp_en(dc, dc->current_state);
+
+ seq_printf(m, "mall supported: %s, enabled: %s\n",
+ mall_supported ? "yes" : "no", mall_in_use ? "yes" : "no");
+ seq_printf(m, "sub-viewport supported: %s, enabled: %s\n",
+ subvp_supported ? "yes" : "no", subvp_in_use ? "yes" : "no");
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(capabilities);
+
+/*
+ * Temporary w/a to force sst sequence in M42D DP2 mst receiver
+ * Example usage: echo 1 > /sys/kernel/debug/dri/0/amdgpu_dm_dp_set_mst_en_for_sst
+ */
+static int dp_force_sst_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = data;
+
+ adev->dm.dc->debug.set_mst_en_for_sst = val;
+
+ return 0;
+}
+
+static int dp_force_sst_get(void *data, u64 *val)
+{
+ struct amdgpu_device *adev = data;
+
+ *val = adev->dm.dc->debug.set_mst_en_for_sst;
+
+ return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(dp_set_mst_en_for_sst_ops, dp_force_sst_get,
+ dp_force_sst_set, "%llu\n");
+
+/*
+ * Force DP2 sequence without VESA certified cable.
+ * Example usage: echo 1 > /sys/kernel/debug/dri/0/amdgpu_dm_dp_ignore_cable_id
+ */
+static int dp_ignore_cable_id_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = data;
+
+ adev->dm.dc->debug.ignore_cable_id = val;
+
+ return 0;
+}
+
+static int dp_ignore_cable_id_get(void *data, u64 *val)
+{
+ struct amdgpu_device *adev = data;
+
+ *val = adev->dm.dc->debug.ignore_cable_id;
+
+ return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(dp_ignore_cable_id_ops, dp_ignore_cable_id_get,
+ dp_ignore_cable_id_set, "%llu\n");
+
+/*
* Sets the DC visual confirm debug option from the given string.
* Example usage: echo 1 > /sys/kernel/debug/dri/0/amdgpu_visual_confirm
*/
@@ -3220,6 +4117,40 @@ DEFINE_SHOW_ATTRIBUTE(mst_topo);
DEFINE_DEBUGFS_ATTRIBUTE(visual_confirm_fops, visual_confirm_get,
visual_confirm_set, "%llu\n");
+
+/*
+ * Sets the DC skip_detection_link_training debug option from the given string.
+ * Example usage: echo 1 > /sys/kernel/debug/dri/0/amdgpu_skip_detection_link_training
+ */
+static int skip_detection_link_training_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = data;
+
+ if (val == 0)
+ adev->dm.dc->debug.skip_detection_link_training = false;
+ else
+ adev->dm.dc->debug.skip_detection_link_training = true;
+
+ return 0;
+}
+
+/*
+ * Reads the DC skip_detection_link_training debug option value into the given buffer.
+ * Example usage: cat /sys/kernel/debug/dri/0/amdgpu_dm_skip_detection_link_training
+ */
+static int skip_detection_link_training_get(void *data, u64 *val)
+{
+ struct amdgpu_device *adev = data;
+
+ *val = adev->dm.dc->debug.skip_detection_link_training;
+
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(skip_detection_link_training_fops,
+ skip_detection_link_training_get,
+ skip_detection_link_training_set, "%llu\n");
+
/*
* Dumps the DCC_EN bit for each pipe.
* Example usage: cat /sys/kernel/debug/dri/0/amdgpu_dm_dcc_en
@@ -3252,8 +4183,10 @@ static ssize_t dcc_en_bits_read(
dc->hwss.get_dcc_en_bits(dc, dcc_en_bits);
rd_buf = kcalloc(rd_buf_size, sizeof(char), GFP_KERNEL);
- if (!rd_buf)
+ if (!rd_buf) {
+ kfree(dcc_en_bits);
return -ENOMEM;
+ }
for (i = 0; i < num_pipes; i++)
offset += snprintf(rd_buf + offset, rd_buf_size - offset,
@@ -3266,8 +4199,10 @@ static ssize_t dcc_en_bits_read(
if (*pos >= rd_buf_size)
break;
r = put_user(*(rd_buf + result), buf);
- if (r)
+ if (r) {
+ kfree(rd_buf);
return r; /* r = -EFAULT */
+ }
buf += 1;
size -= 1;
*pos += 1;
@@ -3297,12 +4232,21 @@ void dtn_debugfs_init(struct amdgpu_device *adev)
debugfs_create_file("amdgpu_mst_topology", 0444, root,
adev, &mst_topo_fops);
+ debugfs_create_file("amdgpu_dm_capabilities", 0444, root,
+ adev, &capabilities_fops);
debugfs_create_file("amdgpu_dm_dtn_log", 0644, root, adev,
&dtn_log_fops);
+ debugfs_create_file("amdgpu_dm_dp_set_mst_en_for_sst", 0644, root, adev,
+ &dp_set_mst_en_for_sst_ops);
+ debugfs_create_file("amdgpu_dm_dp_ignore_cable_id", 0644, root, adev,
+ &dp_ignore_cable_id_ops);
debugfs_create_file_unsafe("amdgpu_dm_visual_confirm", 0644, root, adev,
&visual_confirm_fops);
+ debugfs_create_file_unsafe("amdgpu_dm_skip_detection_link_training", 0644, root, adev,
+ &skip_detection_link_training_fops);
+
debugfs_create_file_unsafe("amdgpu_dm_dmub_tracebuffer", 0644, root,
adev, &dmub_tracebuffer_fops);
@@ -3312,6 +4256,9 @@ void dtn_debugfs_init(struct amdgpu_device *adev)
debugfs_create_file_unsafe("amdgpu_dm_force_timing_sync", 0644, root,
adev, &force_timing_sync_ops);
+ debugfs_create_file_unsafe("amdgpu_dm_dmub_trace_mask", 0644, root,
+ adev, &dmub_trace_mask_fops);
+
debugfs_create_file_unsafe("amdgpu_dm_dmcub_trace_event_en", 0644, root,
adev, &dmcub_trace_event_state_fops);
@@ -3324,4 +4271,7 @@ void dtn_debugfs_init(struct amdgpu_device *adev)
debugfs_create_file_unsafe("amdgpu_dm_disable_hpd", 0644, root, adev,
&disable_hpd_ops);
+ if (adev->dm.dc->caps.ips_support)
+ debugfs_create_file_unsafe("amdgpu_dm_ips_status", 0644, root, adev,
+ &ips_status_fops);
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.h
index 3366cb644053..122cdc124b3b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: MIT */
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
@@ -31,8 +32,6 @@
void connector_debugfs_init(struct amdgpu_dm_connector *connector);
void dtn_debugfs_init(struct amdgpu_device *adev);
-#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
void crtc_debugfs_init(struct drm_crtc *crtc);
-#endif
#endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
index 5bfdc66b5867..85ce558cefc5 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2019 Advanced Micro Devices, Inc.
*
@@ -26,8 +27,9 @@
#include "amdgpu_dm_hdcp.h"
#include "amdgpu.h"
#include "amdgpu_dm.h"
+#include "dc_fused_io.h"
#include "dm_helpers.h"
-#include <drm/drm_hdcp.h>
+#include <drm/display/drm_hdcp_helper.h>
#include "hdcp_psp.h"
/*
@@ -39,10 +41,10 @@
static bool
lp_write_i2c(void *handle, uint32_t address, const uint8_t *data, uint32_t size)
{
-
struct dc_link *link = handle;
struct i2c_payload i2c_payloads[] = {{true, address, size, (void *)data} };
- struct i2c_command cmd = {i2c_payloads, 1, I2C_COMMAND_ENGINE_HW, link->dc->caps.i2c_speed_in_khz};
+ struct i2c_command cmd = {i2c_payloads, 1, I2C_COMMAND_ENGINE_HW,
+ link->dc->caps.i2c_speed_in_khz};
return dm_helpers_submit_i2c(link->ctx, link, &cmd);
}
@@ -52,8 +54,10 @@ lp_read_i2c(void *handle, uint32_t address, uint8_t offset, uint8_t *data, uint3
{
struct dc_link *link = handle;
- struct i2c_payload i2c_payloads[] = {{true, address, 1, &offset}, {false, address, size, data} };
- struct i2c_command cmd = {i2c_payloads, 2, I2C_COMMAND_ENGINE_HW, link->dc->caps.i2c_speed_in_khz};
+ struct i2c_payload i2c_payloads[] = {{true, address, 1, &offset},
+ {false, address, size, data} };
+ struct i2c_command cmd = {i2c_payloads, 2, I2C_COMMAND_ENGINE_HW,
+ link->dc->caps.i2c_speed_in_khz};
return dm_helpers_submit_i2c(link->ctx, link, &cmd);
}
@@ -74,9 +78,36 @@ lp_read_dpcd(void *handle, uint32_t address, uint8_t *data, uint32_t size)
return dm_helpers_dp_read_dpcd(link->ctx, link, address, data, size);
}
-static uint8_t *psp_get_srm(struct psp_context *psp, uint32_t *srm_version, uint32_t *srm_size)
+static bool lp_atomic_write_poll_read_i2c(
+ void *handle,
+ const struct mod_hdcp_atomic_op_i2c *write,
+ const struct mod_hdcp_atomic_op_i2c *poll,
+ struct mod_hdcp_atomic_op_i2c *read,
+ uint32_t poll_timeout_us,
+ uint8_t poll_mask_msb
+)
+{
+ struct dc_link *link = handle;
+
+ return dm_atomic_write_poll_read_i2c(link, write, poll, read, poll_timeout_us, poll_mask_msb);
+}
+
+static bool lp_atomic_write_poll_read_aux(
+ void *handle,
+ const struct mod_hdcp_atomic_op_aux *write,
+ const struct mod_hdcp_atomic_op_aux *poll,
+ struct mod_hdcp_atomic_op_aux *read,
+ uint32_t poll_timeout_us,
+ uint8_t poll_mask_msb
+)
{
+ struct dc_link *link = handle;
+
+ return dm_atomic_write_poll_read_aux(link, write, poll, read, poll_timeout_us, poll_mask_msb);
+}
+static uint8_t *psp_get_srm(struct psp_context *psp, uint32_t *srm_version, uint32_t *srm_size)
+{
struct ta_hdcp_shared_memory *hdcp_cmd;
if (!psp->hdcp_context.context.initialized) {
@@ -96,13 +127,12 @@ static uint8_t *psp_get_srm(struct psp_context *psp, uint32_t *srm_version, uint
*srm_version = hdcp_cmd->out_msg.hdcp_get_srm.srm_version;
*srm_size = hdcp_cmd->out_msg.hdcp_get_srm.srm_buf_size;
-
return hdcp_cmd->out_msg.hdcp_get_srm.srm_buf;
}
-static int psp_set_srm(struct psp_context *psp, uint8_t *srm, uint32_t srm_size, uint32_t *srm_version)
+static int psp_set_srm(struct psp_context *psp,
+ u8 *srm, uint32_t srm_size, uint32_t *srm_version)
{
-
struct ta_hdcp_shared_memory *hdcp_cmd;
if (!psp->hdcp_context.context.initialized) {
@@ -119,7 +149,8 @@ static int psp_set_srm(struct psp_context *psp, uint8_t *srm, uint32_t srm_size,
psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
- if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS || hdcp_cmd->out_msg.hdcp_set_srm.valid_signature != 1 ||
+ if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS ||
+ hdcp_cmd->out_msg.hdcp_set_srm.valid_signature != 1 ||
hdcp_cmd->out_msg.hdcp_set_srm.srm_version == PSP_SRM_VERSION_MAX)
return -EINVAL;
@@ -150,7 +181,6 @@ static void process_output(struct hdcp_workqueue *hdcp_work)
static void link_lock(struct hdcp_workqueue *work, bool lock)
{
-
int i = 0;
for (i = 0; i < work->max_link; i++) {
@@ -160,72 +190,74 @@ static void link_lock(struct hdcp_workqueue *work, bool lock)
mutex_unlock(&work[i].mutex);
}
}
+
void hdcp_update_display(struct hdcp_workqueue *hdcp_work,
unsigned int link_index,
struct amdgpu_dm_connector *aconnector,
- uint8_t content_type,
+ u8 content_type,
bool enable_encryption)
{
struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index];
- struct mod_hdcp_display *display = &hdcp_work[link_index].display;
- struct mod_hdcp_link *link = &hdcp_work[link_index].link;
- struct mod_hdcp_display_query query;
-
- mutex_lock(&hdcp_w->mutex);
- hdcp_w->aconnector = aconnector;
-
- query.display = NULL;
- mod_hdcp_query_display(&hdcp_w->hdcp, aconnector->base.index, &query);
-
- if (query.display != NULL) {
- memcpy(display, query.display, sizeof(struct mod_hdcp_display));
- mod_hdcp_remove_display(&hdcp_w->hdcp, aconnector->base.index, &hdcp_w->output);
-
- hdcp_w->link.adjust.hdcp2.force_type = MOD_HDCP_FORCE_TYPE_0;
-
- if (enable_encryption) {
- /* Explicitly set the saved SRM as sysfs call will be after we already enabled hdcp
- * (s3 resume case)
- */
- if (hdcp_work->srm_size > 0)
- psp_set_srm(hdcp_work->hdcp.config.psp.handle, hdcp_work->srm, hdcp_work->srm_size,
- &hdcp_work->srm_version);
-
- display->adjust.disable = MOD_HDCP_DISPLAY_NOT_DISABLE;
- if (content_type == DRM_MODE_HDCP_CONTENT_TYPE0) {
- hdcp_w->link.adjust.hdcp1.disable = 0;
- hdcp_w->link.adjust.hdcp2.force_type = MOD_HDCP_FORCE_TYPE_0;
- } else if (content_type == DRM_MODE_HDCP_CONTENT_TYPE1) {
- hdcp_w->link.adjust.hdcp1.disable = 1;
- hdcp_w->link.adjust.hdcp2.force_type = MOD_HDCP_FORCE_TYPE_1;
- }
-
- schedule_delayed_work(&hdcp_w->property_validate_dwork,
- msecs_to_jiffies(DRM_HDCP_CHECK_PERIOD_MS));
- } else {
- display->adjust.disable = MOD_HDCP_DISPLAY_DISABLE_AUTHENTICATION;
- hdcp_w->encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF;
- cancel_delayed_work(&hdcp_w->property_validate_dwork);
+ struct mod_hdcp_link_adjustment link_adjust;
+ struct mod_hdcp_display_adjustment display_adjust;
+ unsigned int conn_index = aconnector->base.index;
+ const struct dc *dc = aconnector->dc_link->dc;
+
+ guard(mutex)(&hdcp_w->mutex);
+ drm_connector_get(&aconnector->base);
+ if (hdcp_w->aconnector[conn_index])
+ drm_connector_put(&hdcp_w->aconnector[conn_index]->base);
+ hdcp_w->aconnector[conn_index] = aconnector;
+
+ memset(&link_adjust, 0, sizeof(link_adjust));
+ memset(&display_adjust, 0, sizeof(display_adjust));
+
+ if (enable_encryption) {
+ /* Explicitly set the saved SRM as sysfs call will be after we already enabled hdcp
+ * (s3 resume case)
+ */
+ if (hdcp_work->srm_size > 0)
+ psp_set_srm(hdcp_work->hdcp.config.psp.handle, hdcp_work->srm,
+ hdcp_work->srm_size,
+ &hdcp_work->srm_version);
+
+ display_adjust.disable = MOD_HDCP_DISPLAY_NOT_DISABLE;
+
+ link_adjust.auth_delay = 2;
+ link_adjust.retry_limit = MAX_NUM_OF_ATTEMPTS;
+
+ if (content_type == DRM_MODE_HDCP_CONTENT_TYPE0) {
+ link_adjust.hdcp2.force_type = MOD_HDCP_FORCE_TYPE_0;
+ } else if (content_type == DRM_MODE_HDCP_CONTENT_TYPE1) {
+ link_adjust.hdcp1.disable = 1;
+ link_adjust.hdcp2.force_type = MOD_HDCP_FORCE_TYPE_1;
}
+ link_adjust.hdcp2.use_fw_locality_check =
+ (dc->caps.fused_io_supported || dc->debug.hdcp_lc_force_fw_enable);
+ link_adjust.hdcp2.use_sw_locality_fallback = dc->debug.hdcp_lc_enable_sw_fallback;
- display->state = MOD_HDCP_DISPLAY_ACTIVE;
+ schedule_delayed_work(&hdcp_w->property_validate_dwork,
+ msecs_to_jiffies(DRM_HDCP_CHECK_PERIOD_MS));
+ } else {
+ display_adjust.disable = MOD_HDCP_DISPLAY_DISABLE_AUTHENTICATION;
+ hdcp_w->encryption_status[conn_index] = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF;
+ cancel_delayed_work(&hdcp_w->property_validate_dwork);
}
- mod_hdcp_add_display(&hdcp_w->hdcp, link, display, &hdcp_w->output);
+ mod_hdcp_update_display(&hdcp_w->hdcp, conn_index, &link_adjust, &display_adjust, &hdcp_w->output);
process_output(hdcp_w);
- mutex_unlock(&hdcp_w->mutex);
}
static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work,
- unsigned int link_index,
+ unsigned int link_index,
struct amdgpu_dm_connector *aconnector)
{
struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index];
struct drm_connector_state *conn_state = aconnector->base.state;
+ unsigned int conn_index = aconnector->base.index;
- mutex_lock(&hdcp_w->mutex);
- hdcp_w->aconnector = aconnector;
+ guard(mutex)(&hdcp_w->mutex);
/* the removal of display will invoke auth reset -> hdcp destroy and
* we'd expect the Content Protection (CP) property changed back to
@@ -236,28 +268,39 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work,
conn_state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED;
DRM_DEBUG_DRIVER("[HDCP_DM] display %d, CP 2 -> 1, type %u, DPMS %u\n",
- aconnector->base.index, conn_state->hdcp_content_type, aconnector->base.dpms);
+ aconnector->base.index, conn_state->hdcp_content_type,
+ aconnector->base.dpms);
}
mod_hdcp_remove_display(&hdcp_w->hdcp, aconnector->base.index, &hdcp_w->output);
-
+ if (hdcp_w->aconnector[conn_index]) {
+ drm_connector_put(&hdcp_w->aconnector[conn_index]->base);
+ hdcp_w->aconnector[conn_index] = NULL;
+ }
process_output(hdcp_w);
- mutex_unlock(&hdcp_w->mutex);
}
+
void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_index)
{
struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index];
+ unsigned int conn_index;
- mutex_lock(&hdcp_w->mutex);
+ guard(mutex)(&hdcp_w->mutex);
mod_hdcp_reset_connection(&hdcp_w->hdcp, &hdcp_w->output);
cancel_delayed_work(&hdcp_w->property_validate_dwork);
- hdcp_w->encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF;
- process_output(hdcp_w);
+ for (conn_index = 0; conn_index < AMDGPU_DM_MAX_DISPLAY_INDEX; conn_index++) {
+ hdcp_w->encryption_status[conn_index] =
+ MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF;
+ if (hdcp_w->aconnector[conn_index]) {
+ drm_connector_put(&hdcp_w->aconnector[conn_index]->base);
+ hdcp_w->aconnector[conn_index] = NULL;
+ }
+ }
- mutex_unlock(&hdcp_w->mutex);
+ process_output(hdcp_w);
}
void hdcp_handle_cpirq(struct hdcp_workqueue *hdcp_work, unsigned int link_index)
@@ -267,17 +310,14 @@ void hdcp_handle_cpirq(struct hdcp_workqueue *hdcp_work, unsigned int link_index
schedule_work(&hdcp_w->cpirq_work);
}
-
-
-
static void event_callback(struct work_struct *work)
{
struct hdcp_workqueue *hdcp_work;
hdcp_work = container_of(to_delayed_work(work), struct hdcp_workqueue,
- callback_dwork);
+ callback_dwork);
- mutex_lock(&hdcp_work->mutex);
+ guard(mutex)(&hdcp_work->mutex);
cancel_delayed_work(&hdcp_work->callback_dwork);
@@ -285,46 +325,76 @@ static void event_callback(struct work_struct *work)
&hdcp_work->output);
process_output(hdcp_work);
-
- mutex_unlock(&hdcp_work->mutex);
-
-
}
+
static void event_property_update(struct work_struct *work)
{
-
- struct hdcp_workqueue *hdcp_work = container_of(work, struct hdcp_workqueue, property_update_work);
- struct amdgpu_dm_connector *aconnector = hdcp_work->aconnector;
- struct drm_device *dev = hdcp_work->aconnector->base.dev;
+ struct hdcp_workqueue *hdcp_work = container_of(work, struct hdcp_workqueue,
+ property_update_work);
+ struct amdgpu_dm_connector *aconnector = NULL;
+ struct drm_device *dev;
long ret;
+ unsigned int conn_index;
+ struct drm_connector *connector;
+ struct drm_connector_state *conn_state;
- drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
- mutex_lock(&hdcp_work->mutex);
+ for (conn_index = 0; conn_index < AMDGPU_DM_MAX_DISPLAY_INDEX; conn_index++) {
+ aconnector = hdcp_work->aconnector[conn_index];
+ if (!aconnector)
+ continue;
- if (aconnector->base.state->commit) {
- ret = wait_for_completion_interruptible_timeout(&aconnector->base.state->commit->hw_done, 10 * HZ);
+ connector = &aconnector->base;
- if (ret == 0) {
- DRM_ERROR("HDCP state unknown! Setting it to DESIRED");
- hdcp_work->encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF;
- }
- }
+ /* check if display connected */
+ if (connector->status != connector_status_connected)
+ continue;
- if (hdcp_work->encryption_status != MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF) {
- if (aconnector->base.state->hdcp_content_type == DRM_MODE_HDCP_CONTENT_TYPE0 &&
- hdcp_work->encryption_status <= MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE0_ON)
- drm_hdcp_update_content_protection(&aconnector->base, DRM_MODE_CONTENT_PROTECTION_ENABLED);
- else if (aconnector->base.state->hdcp_content_type == DRM_MODE_HDCP_CONTENT_TYPE1 &&
- hdcp_work->encryption_status == MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE1_ON)
- drm_hdcp_update_content_protection(&aconnector->base, DRM_MODE_CONTENT_PROTECTION_ENABLED);
- } else {
- drm_hdcp_update_content_protection(&aconnector->base, DRM_MODE_CONTENT_PROTECTION_DESIRED);
- }
+ conn_state = aconnector->base.state;
+ if (!conn_state)
+ continue;
- mutex_unlock(&hdcp_work->mutex);
- drm_modeset_unlock(&dev->mode_config.connection_mutex);
+ dev = connector->dev;
+
+ if (!dev)
+ continue;
+
+ drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
+ guard(mutex)(&hdcp_work->mutex);
+
+ if (conn_state->commit) {
+ ret = wait_for_completion_interruptible_timeout(&conn_state->commit->hw_done,
+ 10 * HZ);
+ if (ret == 0) {
+ DRM_ERROR("HDCP state unknown! Setting it to DESIRED\n");
+ hdcp_work->encryption_status[conn_index] =
+ MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF;
+ }
+ }
+ if (hdcp_work->encryption_status[conn_index] !=
+ MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF) {
+ if (conn_state->hdcp_content_type ==
+ DRM_MODE_HDCP_CONTENT_TYPE0 &&
+ hdcp_work->encryption_status[conn_index] <=
+ MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE0_ON) {
+ DRM_DEBUG_DRIVER("[HDCP_DM] DRM_MODE_CONTENT_PROTECTION_ENABLED\n");
+ drm_hdcp_update_content_protection(connector,
+ DRM_MODE_CONTENT_PROTECTION_ENABLED);
+ } else if (conn_state->hdcp_content_type ==
+ DRM_MODE_HDCP_CONTENT_TYPE1 &&
+ hdcp_work->encryption_status[conn_index] ==
+ MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE1_ON) {
+ drm_hdcp_update_content_protection(connector,
+ DRM_MODE_CONTENT_PROTECTION_ENABLED);
+ }
+ } else {
+ DRM_DEBUG_DRIVER("[HDCP_DM] DRM_MODE_CONTENT_PROTECTION_DESIRED\n");
+ drm_hdcp_update_content_protection(connector,
+ DRM_MODE_CONTENT_PROTECTION_DESIRED);
+ }
+ drm_modeset_unlock(&dev->mode_config.connection_mutex);
+ }
}
static void event_property_validate(struct work_struct *work)
@@ -332,22 +402,49 @@ static void event_property_validate(struct work_struct *work)
struct hdcp_workqueue *hdcp_work =
container_of(to_delayed_work(work), struct hdcp_workqueue, property_validate_dwork);
struct mod_hdcp_display_query query;
- struct amdgpu_dm_connector *aconnector = hdcp_work->aconnector;
+ struct amdgpu_dm_connector *aconnector;
+ unsigned int conn_index;
- if (!aconnector)
- return;
+ guard(mutex)(&hdcp_work->mutex);
- mutex_lock(&hdcp_work->mutex);
+ for (conn_index = 0; conn_index < AMDGPU_DM_MAX_DISPLAY_INDEX;
+ conn_index++) {
+ aconnector = hdcp_work->aconnector[conn_index];
- query.encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF;
- mod_hdcp_query_display(&hdcp_work->hdcp, aconnector->base.index, &query);
+ if (!aconnector)
+ continue;
- if (query.encryption_status != hdcp_work->encryption_status) {
- hdcp_work->encryption_status = query.encryption_status;
- schedule_work(&hdcp_work->property_update_work);
- }
+ /* check if display connected */
+ if (aconnector->base.status != connector_status_connected)
+ continue;
+
+ if (!aconnector->base.state)
+ continue;
- mutex_unlock(&hdcp_work->mutex);
+ query.encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF;
+ mod_hdcp_query_display(&hdcp_work->hdcp, aconnector->base.index,
+ &query);
+
+ DRM_DEBUG_DRIVER("[HDCP_DM] disp %d, connector->CP %u, (query, work): (%d, %d)\n",
+ aconnector->base.index,
+ aconnector->base.state->content_protection,
+ query.encryption_status,
+ hdcp_work->encryption_status[conn_index]);
+
+ if (query.encryption_status !=
+ hdcp_work->encryption_status[conn_index]) {
+ DRM_DEBUG_DRIVER("[HDCP_DM] encryption_status change from %x to %x\n",
+ hdcp_work->encryption_status[conn_index],
+ query.encryption_status);
+
+ hdcp_work->encryption_status[conn_index] =
+ query.encryption_status;
+
+ DRM_DEBUG_DRIVER("[HDCP_DM] trigger property_update_work\n");
+
+ schedule_work(&hdcp_work->property_update_work);
+ }
+ }
}
static void event_watchdog_timer(struct work_struct *work)
@@ -355,10 +452,10 @@ static void event_watchdog_timer(struct work_struct *work)
struct hdcp_workqueue *hdcp_work;
hdcp_work = container_of(to_delayed_work(work),
- struct hdcp_workqueue,
+ struct hdcp_workqueue,
watchdog_timer_dwork);
- mutex_lock(&hdcp_work->mutex);
+ guard(mutex)(&hdcp_work->mutex);
cancel_delayed_work(&hdcp_work->watchdog_timer_dwork);
@@ -367,9 +464,6 @@ static void event_watchdog_timer(struct work_struct *work)
&hdcp_work->output);
process_output(hdcp_work);
-
- mutex_unlock(&hdcp_work->mutex);
-
}
static void event_cpirq(struct work_struct *work)
@@ -378,17 +472,13 @@ static void event_cpirq(struct work_struct *work)
hdcp_work = container_of(work, struct hdcp_workqueue, cpirq_work);
- mutex_lock(&hdcp_work->mutex);
+ guard(mutex)(&hdcp_work->mutex);
mod_hdcp_process_event(&hdcp_work->hdcp, MOD_HDCP_EVENT_CPIRQ, &hdcp_work->output);
process_output(hdcp_work);
-
- mutex_unlock(&hdcp_work->mutex);
-
}
-
void hdcp_destroy(struct kobject *kobj, struct hdcp_workqueue *hdcp_work)
{
int i = 0;
@@ -396,6 +486,7 @@ void hdcp_destroy(struct kobject *kobj, struct hdcp_workqueue *hdcp_work)
for (i = 0; i < hdcp_work->max_link; i++) {
cancel_delayed_work_sync(&hdcp_work[i].callback_dwork);
cancel_delayed_work_sync(&hdcp_work[i].watchdog_timer_dwork);
+ cancel_delayed_work_sync(&hdcp_work[i].property_validate_dwork);
}
sysfs_remove_bin_file(kobj, &hdcp_work[0].attr);
@@ -404,15 +495,12 @@ void hdcp_destroy(struct kobject *kobj, struct hdcp_workqueue *hdcp_work)
kfree(hdcp_work);
}
-
static bool enable_assr(void *handle, struct dc_link *link)
{
-
struct hdcp_workqueue *hdcp_work = handle;
struct mod_hdcp hdcp = hdcp_work->hdcp;
struct psp_context *psp = hdcp.config.psp.handle;
struct ta_dtm_shared_memory *dtm_cmd;
- bool res = true;
if (!psp->dtm_context.context.initialized) {
DRM_INFO("Failed to enable ASSR, DTM TA is not initialized.");
@@ -421,23 +509,22 @@ static bool enable_assr(void *handle, struct dc_link *link)
dtm_cmd = (struct ta_dtm_shared_memory *)psp->dtm_context.context.mem_context.shared_buf;
- mutex_lock(&psp->dtm_context.mutex);
+ guard(mutex)(&psp->dtm_context.mutex);
memset(dtm_cmd, 0, sizeof(struct ta_dtm_shared_memory));
dtm_cmd->cmd_id = TA_DTM_COMMAND__TOPOLOGY_ASSR_ENABLE;
- dtm_cmd->dtm_in_message.topology_assr_enable.display_topology_dig_be_index = link->link_enc_hw_inst;
+ dtm_cmd->dtm_in_message.topology_assr_enable.display_topology_dig_be_index =
+ link->link_enc_hw_inst;
dtm_cmd->dtm_status = TA_DTM_STATUS__GENERIC_FAILURE;
psp_dtm_invoke(psp, dtm_cmd->cmd_id);
if (dtm_cmd->dtm_status != TA_DTM_STATUS__SUCCESS) {
DRM_INFO("Failed to enable ASSR");
- res = false;
+ return false;
}
- mutex_unlock(&psp->dtm_context.mutex);
-
- return res;
+ return true;
}
static void update_config(void *handle, struct cp_psp_stream_config *config)
@@ -445,11 +532,13 @@ static void update_config(void *handle, struct cp_psp_stream_config *config)
struct hdcp_workqueue *hdcp_work = handle;
struct amdgpu_dm_connector *aconnector = config->dm_stream_ctx;
int link_index = aconnector->dc_link->link_index;
+ unsigned int conn_index = aconnector->base.index;
struct mod_hdcp_display *display = &hdcp_work[link_index].display;
struct mod_hdcp_link *link = &hdcp_work[link_index].link;
- struct drm_connector_state *conn_state;
+ struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index];
struct dc_sink *sink = NULL;
bool link_is_hdcp14 = false;
+ const struct dc *dc = aconnector->dc_link->dc;
if (config->dpms_off) {
hdcp_remove_display(hdcp_work, link_index, aconnector);
@@ -467,7 +556,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config)
else if (aconnector->dc_em_sink)
sink = aconnector->dc_em_sink;
- if (sink != NULL)
+ if (sink)
link->mode = mod_hdcp_signal_type_to_operation_mode(sink->sink_signal);
display->controller = CONTROLLER_ID_D0 + config->otg_inst;
@@ -476,27 +565,45 @@ static void update_config(void *handle, struct cp_psp_stream_config *config)
link->ddc_line = aconnector->dc_link->ddc_hw_inst + 1;
display->stream_enc_idx = config->stream_enc_idx;
link->link_enc_idx = config->link_enc_idx;
+ link->dio_output_id = config->dio_output_idx;
link->phy_idx = config->phy_idx;
+
if (sink)
link_is_hdcp14 = dc_link_is_hdcp14(aconnector->dc_link, sink->sink_signal);
link->hdcp_supported_informational = link_is_hdcp14;
link->dp.rev = aconnector->dc_link->dpcd_caps.dpcd_rev.raw;
link->dp.assr_enabled = config->assr_enabled;
link->dp.mst_enabled = config->mst_enabled;
+ link->dp.dp2_enabled = config->dp2_enabled;
+ link->dp.usb4_enabled = config->usb4_enabled;
display->adjust.disable = MOD_HDCP_DISPLAY_DISABLE_AUTHENTICATION;
- link->adjust.auth_delay = 3;
+ link->adjust.auth_delay = 2;
+ link->adjust.retry_limit = MAX_NUM_OF_ATTEMPTS;
link->adjust.hdcp1.disable = 0;
- conn_state = aconnector->base.state;
+ link->adjust.hdcp2.use_fw_locality_check = (dc->caps.fused_io_supported || dc->debug.hdcp_lc_force_fw_enable);
+ link->adjust.hdcp2.use_sw_locality_fallback = dc->debug.hdcp_lc_enable_sw_fallback;
+ hdcp_w->encryption_status[display->index] = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF;
DRM_DEBUG_DRIVER("[HDCP_DM] display %d, CP %d, type %d\n", aconnector->base.index,
- (!!aconnector->base.state) ? aconnector->base.state->content_protection : -1,
- (!!aconnector->base.state) ? aconnector->base.state->hdcp_content_type : -1);
+ (!!aconnector->base.state) ?
+ aconnector->base.state->content_protection : -1,
+ (!!aconnector->base.state) ?
+ aconnector->base.state->hdcp_content_type : -1);
- hdcp_update_display(hdcp_work, link_index, aconnector, conn_state->hdcp_content_type, false);
-}
+ guard(mutex)(&hdcp_w->mutex);
+ mod_hdcp_add_display(&hdcp_w->hdcp, link, display, &hdcp_w->output);
+ drm_connector_get(&aconnector->base);
+ if (hdcp_w->aconnector[conn_index])
+ drm_connector_put(&hdcp_w->aconnector[conn_index]->base);
+ hdcp_w->aconnector[conn_index] = aconnector;
+ process_output(hdcp_w);
+}
-/* NOTE: From the usermodes prospective you only need to call write *ONCE*, the kernel
+/**
+ * DOC: Add sysfs interface for set/get srm
+ *
+ * NOTE: From the usermodes prospective you only need to call write *ONCE*, the kernel
* will automatically call once or twice depending on the size
*
* call: "cat file > /sys/class/drm/card0/device/hdcp_srm" from usermode no matter what the size is
@@ -507,23 +614,23 @@ static void update_config(void *handle, struct cp_psp_stream_config *config)
* sysfs interface doesn't tell us the size we will get so we are sending partial SRMs to psp and on
* the last call we will send the full SRM. PSP will fail on every call before the last.
*
- * This means we don't know if the SRM is good until the last call. And because of this limitation we
- * cannot throw errors early as it will stop the kernel from writing to sysfs
+ * This means we don't know if the SRM is good until the last call. And because of this
+ * limitation we cannot throw errors early as it will stop the kernel from writing to sysfs
*
* Example 1:
- * Good SRM size = 5096
- * first call to write 4096 -> PSP fails
- * Second call to write 1000 -> PSP Pass -> SRM is set
+ * Good SRM size = 5096
+ * first call to write 4096 -> PSP fails
+ * Second call to write 1000 -> PSP Pass -> SRM is set
*
* Example 2:
- * Bad SRM size = 4096
- * first call to write 4096 -> PSP fails (This is the same as above, but we don't know if this
- * is the last call)
+ * Bad SRM size = 4096
+ * first call to write 4096 -> PSP fails (This is the same as above, but we don't know if this
+ * is the last call)
*
* Solution?:
- * 1: Parse the SRM? -> It is signed so we don't know the EOF
- * 2: We can have another sysfs that passes the size before calling set. -> simpler solution
- * below
+ * 1: Parse the SRM? -> It is signed so we don't know the EOF
+ * 2: We can have another sysfs that passes the size before calling set. -> simpler solution
+ * below
*
* Easy Solution:
* Always call get after Set to verify if set was successful.
@@ -532,20 +639,21 @@ static void update_config(void *handle, struct cp_psp_stream_config *config)
* +----------------------+
* PSP will only update its srm if its older than the one we are trying to load.
* Always do set first than get.
- * -if we try to "1. SET" a older version PSP will reject it and we can "2. GET" the newer
- * version and save it
+ * -if we try to "1. SET" a older version PSP will reject it and we can "2. GET" the newer
+ * version and save it
*
- * -if we try to "1. SET" a newer version PSP will accept it and we can "2. GET" the
- * same(newer) version back and save it
+ * -if we try to "1. SET" a newer version PSP will accept it and we can "2. GET" the
+ * same(newer) version back and save it
*
- * -if we try to "1. SET" a newer version and PSP rejects it. That means the format is
- * incorrect/corrupted and we should correct our SRM by getting it from PSP
+ * -if we try to "1. SET" a newer version and PSP rejects it. That means the format is
+ * incorrect/corrupted and we should correct our SRM by getting it from PSP
*/
-static ssize_t srm_data_write(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buffer,
+static ssize_t srm_data_write(struct file *filp, struct kobject *kobj,
+ const struct bin_attribute *bin_attr, char *buffer,
loff_t pos, size_t count)
{
struct hdcp_workqueue *work;
- uint32_t srm_version = 0;
+ u32 srm_version = 0;
work = container_of(bin_attr, struct hdcp_workqueue, attr);
link_lock(work, true);
@@ -559,19 +667,19 @@ static ssize_t srm_data_write(struct file *filp, struct kobject *kobj, struct bi
work->srm_version = srm_version;
}
-
link_lock(work, false);
return count;
}
-static ssize_t srm_data_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buffer,
+static ssize_t srm_data_read(struct file *filp, struct kobject *kobj,
+ const struct bin_attribute *bin_attr, char *buffer,
loff_t pos, size_t count)
{
struct hdcp_workqueue *work;
- uint8_t *srm = NULL;
- uint32_t srm_version;
- uint32_t srm_size;
+ u8 *srm = NULL;
+ u32 srm_version;
+ u32 srm_size;
size_t ret = count;
work = container_of(bin_attr, struct hdcp_workqueue, attr);
@@ -604,12 +712,12 @@ ret:
/* From the hdcp spec (5.Renewability) SRM needs to be stored in a non-volatile memory.
*
* For example,
- * if Application "A" sets the SRM (ver 2) and we reboot/suspend and later when Application "B"
- * needs to use HDCP, the version in PSP should be SRM(ver 2). So SRM should be persistent
- * across boot/reboots/suspend/resume/shutdown
+ * if Application "A" sets the SRM (ver 2) and we reboot/suspend and later when Application "B"
+ * needs to use HDCP, the version in PSP should be SRM(ver 2). So SRM should be persistent
+ * across boot/reboots/suspend/resume/shutdown
*
- * Currently when the system goes down (suspend/shutdown) the SRM is cleared from PSP. For HDCP we need
- * to make the SRM persistent.
+ * Currently when the system goes down (suspend/shutdown) the SRM is cleared from PSP. For HDCP
+ * we need to make the SRM persistent.
*
* -PSP owns the checking of SRM but doesn't have the ability to store it in a non-volatile memory.
* -The kernel cannot write to the file systems.
@@ -619,8 +727,8 @@ ret:
*
* Usermode can read/write to/from PSP using the sysfs interface
* For example:
- * to save SRM from PSP to storage : cat /sys/class/drm/card0/device/hdcp_srm > srmfile
- * to load from storage to PSP: cat srmfile > /sys/class/drm/card0/device/hdcp_srm
+ * to save SRM from PSP to storage : cat /sys/class/drm/card0/device/hdcp_srm > srmfile
+ * to load from storage to PSP: cat srmfile > /sys/class/drm/card0/device/hdcp_srm
*/
static const struct bin_attribute data_attr = {
.attr = {.name = "hdcp_srm", .mode = 0664},
@@ -629,10 +737,9 @@ static const struct bin_attribute data_attr = {
.read = srm_data_read,
};
-
-struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, struct cp_psp *cp_psp, struct dc *dc)
+struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev,
+ struct cp_psp *cp_psp, struct dc *dc)
{
-
int max_caps = dc->caps.max_links;
struct hdcp_workqueue *hdcp_work;
int i = 0;
@@ -641,14 +748,16 @@ struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, struct
if (ZERO_OR_NULL_PTR(hdcp_work))
return NULL;
- hdcp_work->srm = kcalloc(PSP_HDCP_SRM_FIRST_GEN_MAX_SIZE, sizeof(*hdcp_work->srm), GFP_KERNEL);
+ hdcp_work->srm = kcalloc(PSP_HDCP_SRM_FIRST_GEN_MAX_SIZE,
+ sizeof(*hdcp_work->srm), GFP_KERNEL);
- if (hdcp_work->srm == NULL)
+ if (!hdcp_work->srm)
goto fail_alloc_context;
- hdcp_work->srm_temp = kcalloc(PSP_HDCP_SRM_FIRST_GEN_MAX_SIZE, sizeof(*hdcp_work->srm_temp), GFP_KERNEL);
+ hdcp_work->srm_temp = kcalloc(PSP_HDCP_SRM_FIRST_GEN_MAX_SIZE,
+ sizeof(*hdcp_work->srm_temp), GFP_KERNEL);
- if (hdcp_work->srm_temp == NULL)
+ if (!hdcp_work->srm_temp)
goto fail_alloc_context;
hdcp_work->max_link = max_caps;
@@ -662,14 +771,37 @@ struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, struct
INIT_DELAYED_WORK(&hdcp_work[i].watchdog_timer_dwork, event_watchdog_timer);
INIT_DELAYED_WORK(&hdcp_work[i].property_validate_dwork, event_property_validate);
- hdcp_work[i].hdcp.config.psp.handle = &adev->psp;
- if (dc->ctx->dce_version == DCN_VERSION_3_1)
- hdcp_work[i].hdcp.config.psp.caps.dtm_v3_supported = 1;
- hdcp_work[i].hdcp.config.ddc.handle = dc_get_link_at_index(dc, i);
- hdcp_work[i].hdcp.config.ddc.funcs.write_i2c = lp_write_i2c;
- hdcp_work[i].hdcp.config.ddc.funcs.read_i2c = lp_read_i2c;
- hdcp_work[i].hdcp.config.ddc.funcs.write_dpcd = lp_write_dpcd;
- hdcp_work[i].hdcp.config.ddc.funcs.read_dpcd = lp_read_dpcd;
+ struct mod_hdcp_config *config = &hdcp_work[i].hdcp.config;
+ struct mod_hdcp_ddc_funcs *ddc_funcs = &config->ddc.funcs;
+
+ config->psp.handle = &adev->psp;
+ if (dc->ctx->dce_version == DCN_VERSION_3_1 ||
+ dc->ctx->dce_version == DCN_VERSION_3_14 ||
+ dc->ctx->dce_version == DCN_VERSION_3_15 ||
+ dc->ctx->dce_version == DCN_VERSION_3_16 ||
+ dc->ctx->dce_version == DCN_VERSION_3_2 ||
+ dc->ctx->dce_version == DCN_VERSION_3_21 ||
+ dc->ctx->dce_version == DCN_VERSION_3_5 ||
+ dc->ctx->dce_version == DCN_VERSION_3_51 ||
+ dc->ctx->dce_version == DCN_VERSION_3_6 ||
+ dc->ctx->dce_version == DCN_VERSION_4_01)
+ config->psp.caps.dtm_v3_supported = 1;
+
+ config->ddc.handle = dc_get_link_at_index(dc, i);
+
+ ddc_funcs->write_i2c = lp_write_i2c;
+ ddc_funcs->read_i2c = lp_read_i2c;
+ ddc_funcs->write_dpcd = lp_write_dpcd;
+ ddc_funcs->read_dpcd = lp_read_dpcd;
+ ddc_funcs->atomic_write_poll_read_i2c = lp_atomic_write_poll_read_i2c;
+ ddc_funcs->atomic_write_poll_read_aux = lp_atomic_write_poll_read_aux;
+
+ memset(hdcp_work[i].aconnector, 0,
+ sizeof(struct amdgpu_dm_connector *) *
+ AMDGPU_DM_MAX_DISPLAY_INDEX);
+ memset(hdcp_work[i].encryption_status, 0,
+ sizeof(enum mod_hdcp_encryption_status) *
+ AMDGPU_DM_MAX_DISPLAY_INDEX);
}
cp_psp->funcs.update_stream_config = update_config;
@@ -691,10 +823,5 @@ fail_alloc_context:
kfree(hdcp_work);
return NULL;
-
-
-
}
-
-
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h
index 09294ff122fe..4faa344f196e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: MIT */
/*
* Copyright 2019 Advanced Micro Devices, Inc.
*
@@ -43,7 +44,7 @@ struct hdcp_workqueue {
struct delayed_work callback_dwork;
struct delayed_work watchdog_timer_dwork;
struct delayed_work property_validate_dwork;
- struct amdgpu_dm_connector *aconnector;
+ struct amdgpu_dm_connector *aconnector[AMDGPU_DM_MAX_DISPLAY_INDEX];
struct mutex mutex;
struct mod_hdcp hdcp;
@@ -51,7 +52,20 @@ struct hdcp_workqueue {
struct mod_hdcp_display display;
struct mod_hdcp_link link;
- enum mod_hdcp_encryption_status encryption_status;
+ enum mod_hdcp_encryption_status encryption_status[AMDGPU_DM_MAX_DISPLAY_INDEX];
+ /* when display is unplugged from mst hub, connctor will be
+ * destroyed within dm_dp_mst_connector_destroy. connector
+ * hdcp perperties, like type, undesired, desired, enabled,
+ * will be lost. So, save hdcp properties into hdcp_work within
+ * amdgpu_dm_atomic_commit_tail. if the same display is
+ * plugged back with same display index, its hdcp properties
+ * will be retrieved from hdcp_work within dm_dp_mst_get_modes
+ */
+ /* un-desired, desired, enabled */
+ unsigned int content_protection[AMDGPU_DM_MAX_DISPLAY_INDEX];
+ /* hdcp1.x, hdcp2.x */
+ unsigned int hdcp_content_type[AMDGPU_DM_MAX_DISPLAY_INDEX];
+
uint8_t max_link;
uint8_t *srm;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index 8cbeeb7c986d..ac98c746c3de 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
@@ -23,13 +24,17 @@
*
*/
+#include <acpi/video.h>
+
#include <linux/string.h>
#include <linux/acpi.h>
#include <linux/i2c.h>
+#include <drm/drm_atomic.h>
#include <drm/drm_probe_helper.h>
#include <drm/amdgpu_drm.h>
#include <drm/drm_edid.h>
+#include <drm/drm_fixed.h>
#include "dm_services.h"
#include "amdgpu.h"
@@ -37,62 +42,78 @@
#include "amdgpu_dm.h"
#include "amdgpu_dm_irq.h"
#include "amdgpu_dm_mst_types.h"
+#include "dpcd_defs.h"
+#include "dc/inc/core_types.h"
#include "dm_helpers.h"
+#include "ddc_service_types.h"
+#include "clk_mgr.h"
-struct monitor_patch_info {
- unsigned int manufacturer_id;
- unsigned int product_id;
- void (*patch_func)(struct dc_edid_caps *edid_caps, unsigned int param);
- unsigned int patch_param;
-};
-static void set_max_dsc_bpp_limit(struct dc_edid_caps *edid_caps, unsigned int param);
-
-static const struct monitor_patch_info monitor_patch_table[] = {
-{0x6D1E, 0x5BBF, set_max_dsc_bpp_limit, 15},
-{0x6D1E, 0x5B9A, set_max_dsc_bpp_limit, 15},
-};
-
-static void set_max_dsc_bpp_limit(struct dc_edid_caps *edid_caps, unsigned int param)
+static u32 edid_extract_panel_id(struct edid *edid)
{
- if (edid_caps)
- edid_caps->panel_patch.max_dsc_target_bpp_limit = param;
+ return (u32)edid->mfg_id[0] << 24 |
+ (u32)edid->mfg_id[1] << 16 |
+ (u32)EDID_PRODUCT_ID(edid);
}
-static int amdgpu_dm_patch_edid_caps(struct dc_edid_caps *edid_caps)
+static void apply_edid_quirks(struct drm_device *dev, struct edid *edid, struct dc_edid_caps *edid_caps)
{
- int i, ret = 0;
-
- for (i = 0; i < ARRAY_SIZE(monitor_patch_table); i++)
- if ((edid_caps->manufacturer_id == monitor_patch_table[i].manufacturer_id)
- && (edid_caps->product_id == monitor_patch_table[i].product_id)) {
- monitor_patch_table[i].patch_func(edid_caps, monitor_patch_table[i].patch_param);
- ret++;
- }
-
- return ret;
+ uint32_t panel_id = edid_extract_panel_id(edid);
+
+ switch (panel_id) {
+ /* Workaround for monitors that need a delay after detecting the link */
+ case drm_edid_encode_panel_id('G', 'B', 'T', 0x3215):
+ drm_dbg_driver(dev, "Add 10s delay for link detection for panel id %X\n", panel_id);
+ edid_caps->panel_patch.wait_after_dpcd_poweroff_ms = 10000;
+ break;
+ /* Workaround for some monitors which does not work well with FAMS */
+ case drm_edid_encode_panel_id('S', 'A', 'M', 0x0E5E):
+ case drm_edid_encode_panel_id('S', 'A', 'M', 0x7053):
+ case drm_edid_encode_panel_id('S', 'A', 'M', 0x71AC):
+ drm_dbg_driver(dev, "Disabling FAMS on monitor with panel id %X\n", panel_id);
+ edid_caps->panel_patch.disable_fams = true;
+ break;
+ /* Workaround for some monitors that do not clear DPCD 0x317 if FreeSync is unsupported */
+ case drm_edid_encode_panel_id('A', 'U', 'O', 0xA7AB):
+ case drm_edid_encode_panel_id('A', 'U', 'O', 0xE69B):
+ case drm_edid_encode_panel_id('B', 'O', 'E', 0x092A):
+ case drm_edid_encode_panel_id('L', 'G', 'D', 0x06D1):
+ case drm_edid_encode_panel_id('M', 'S', 'F', 0x1003):
+ drm_dbg_driver(dev, "Clearing DPCD 0x317 on monitor with panel id %X\n", panel_id);
+ edid_caps->panel_patch.remove_sink_ext_caps = true;
+ break;
+ case drm_edid_encode_panel_id('S', 'D', 'C', 0x4154):
+ case drm_edid_encode_panel_id('S', 'D', 'C', 0x4171):
+ drm_dbg_driver(dev, "Disabling VSC on monitor with panel id %X\n", panel_id);
+ edid_caps->panel_patch.disable_colorimetry = true;
+ break;
+ default:
+ return;
+ }
}
-/* dm_helpers_parse_edid_caps
- *
- * Parse edid caps
+/**
+ * dm_helpers_parse_edid_caps() - Parse edid caps
*
+ * @link: current detected link
* @edid: [in] pointer to edid
- * edid_caps: [in] pointer to edid caps
- * @return
- * void
- * */
+ * @edid_caps: [in] pointer to edid caps
+ *
+ * Return: void
+ */
enum dc_edid_status dm_helpers_parse_edid_caps(
- struct dc_context *ctx,
+ struct dc_link *link,
const struct dc_edid *edid,
struct dc_edid_caps *edid_caps)
{
- struct edid *edid_buf = (struct edid *) edid->raw_edid;
+ struct amdgpu_dm_connector *aconnector = link->priv;
+ struct drm_connector *connector = &aconnector->base;
+ struct drm_device *dev = connector->dev;
+ struct edid *edid_buf = edid ? (struct edid *) edid->raw_edid : NULL;
struct cea_sad *sads;
int sad_count = -1;
int sadb_count = -1;
int i = 0;
- int j = 0;
uint8_t *sadb = NULL;
enum dc_edid_status result = EDID_OK;
@@ -110,30 +131,21 @@ enum dc_edid_status dm_helpers_parse_edid_caps(
edid_caps->serial_number = edid_buf->serial;
edid_caps->manufacture_week = edid_buf->mfg_week;
edid_caps->manufacture_year = edid_buf->mfg_year;
+ edid_caps->analog = !(edid_buf->input & DRM_EDID_INPUT_DIGITAL);
- /* One of the four detailed_timings stores the monitor name. It's
- * stored in an array of length 13. */
- for (i = 0; i < 4; i++) {
- if (edid_buf->detailed_timings[i].data.other_data.type == 0xfc) {
- while (j < 13 && edid_buf->detailed_timings[i].data.other_data.data.str.str[j]) {
- if (edid_buf->detailed_timings[i].data.other_data.data.str.str[j] == '\n')
- break;
-
- edid_caps->display_name[j] =
- edid_buf->detailed_timings[i].data.other_data.data.str.str[j];
- j++;
- }
- }
- }
+ drm_edid_get_monitor_name(edid_buf,
+ edid_caps->display_name,
+ AUDIO_INFO_DISPLAY_NAME_SIZE_IN_CHARS);
- edid_caps->edid_hdmi = drm_detect_hdmi_monitor(
- (struct edid *) edid->raw_edid);
+ edid_caps->edid_hdmi = connector->display_info.is_hdmi;
+
+ apply_edid_quirks(dev, edid_buf, edid_caps);
sad_count = drm_edid_to_sad((struct edid *) edid->raw_edid, &sads);
if (sad_count <= 0)
return result;
- edid_caps->audio_mode_count = sad_count < DC_MAX_AUDIO_DESC_COUNT ? sad_count : DC_MAX_AUDIO_DESC_COUNT;
+ edid_caps->audio_mode_count = min(sad_count, DC_MAX_AUDIO_DESC_COUNT);
for (i = 0; i < edid_caps->audio_mode_count; ++i) {
struct cea_sad *sad = &sads[i];
@@ -158,46 +170,58 @@ enum dc_edid_status dm_helpers_parse_edid_caps(
kfree(sads);
kfree(sadb);
- amdgpu_dm_patch_edid_caps(edid_caps);
-
return result;
}
-static void get_payload_table(
- struct amdgpu_dm_connector *aconnector,
- struct dp_mst_stream_allocation_table *proposed_table)
+static void
+fill_dc_mst_payload_table_from_drm(struct dc_link *link,
+ bool enable,
+ struct drm_dp_mst_atomic_payload *target_payload,
+ struct dc_dp_mst_stream_allocation_table *table)
{
- int i;
- struct drm_dp_mst_topology_mgr *mst_mgr =
- &aconnector->mst_port->mst_mgr;
-
- mutex_lock(&mst_mgr->payload_lock);
-
- proposed_table->stream_count = 0;
+ struct dc_dp_mst_stream_allocation_table new_table = { 0 };
+ struct dc_dp_mst_stream_allocation *sa;
+ struct link_mst_stream_allocation_table copy_of_link_table =
+ link->mst_stream_alloc_table;
- /* number of active streams */
- for (i = 0; i < mst_mgr->max_payloads; i++) {
- if (mst_mgr->payloads[i].num_slots == 0)
- break; /* end of vcp_id table */
-
- ASSERT(mst_mgr->payloads[i].payload_state !=
- DP_PAYLOAD_DELETE_LOCAL);
-
- if (mst_mgr->payloads[i].payload_state == DP_PAYLOAD_LOCAL ||
- mst_mgr->payloads[i].payload_state ==
- DP_PAYLOAD_REMOTE) {
+ int i;
+ int current_hw_table_stream_cnt = copy_of_link_table.stream_count;
+ struct link_mst_stream_allocation *dc_alloc;
- struct dp_mst_stream_allocation *sa =
- &proposed_table->stream_allocations[
- proposed_table->stream_count];
+ /* TODO: refactor to set link->mst_stream_alloc_table directly if possible.*/
+ if (enable) {
+ dc_alloc =
+ &copy_of_link_table.stream_allocations[current_hw_table_stream_cnt];
+ dc_alloc->vcp_id = target_payload->vcpi;
+ dc_alloc->slot_count = target_payload->time_slots;
+ } else {
+ for (i = 0; i < copy_of_link_table.stream_count; i++) {
+ dc_alloc =
+ &copy_of_link_table.stream_allocations[i];
+
+ if (dc_alloc->vcp_id == target_payload->vcpi) {
+ dc_alloc->vcp_id = 0;
+ dc_alloc->slot_count = 0;
+ break;
+ }
+ }
+ ASSERT(i != copy_of_link_table.stream_count);
+ }
- sa->slot_count = mst_mgr->payloads[i].num_slots;
- sa->vcp_id = mst_mgr->proposed_vcpis[i]->vcpi;
- proposed_table->stream_count++;
+ /* Fill payload info*/
+ for (i = 0; i < MAX_CONTROLLER_NUM; i++) {
+ dc_alloc =
+ &copy_of_link_table.stream_allocations[i];
+ if (dc_alloc->vcp_id > 0 && dc_alloc->slot_count > 0) {
+ sa = &new_table.stream_allocations[new_table.stream_count];
+ sa->slot_count = dc_alloc->slot_count;
+ sa->vcp_id = dc_alloc->vcp_id;
+ new_table.stream_count++;
}
}
- mutex_unlock(&mst_mgr->payload_lock);
+ /* Overwrite the old table */
+ *table = new_table;
}
void dm_helpers_dp_update_branch_info(
@@ -205,65 +229,86 @@ void dm_helpers_dp_update_branch_info(
const struct dc_link *link)
{}
+static void dm_helpers_construct_old_payload(
+ struct drm_dp_mst_topology_mgr *mgr,
+ struct drm_dp_mst_topology_state *mst_state,
+ struct drm_dp_mst_atomic_payload *new_payload,
+ struct drm_dp_mst_atomic_payload *old_payload)
+{
+ struct drm_dp_mst_atomic_payload *pos;
+ int pbn_per_slot = dfixed_trunc(mst_state->pbn_div);
+ u8 next_payload_vc_start = mgr->next_start_slot;
+ u8 payload_vc_start = new_payload->vc_start_slot;
+ u8 allocated_time_slots;
+
+ *old_payload = *new_payload;
+
+ /* Set correct time_slots/PBN of old payload.
+ * other fields (delete & dsc_enabled) in
+ * struct drm_dp_mst_atomic_payload are don't care fields
+ * while calling drm_dp_remove_payload_part2()
+ */
+ list_for_each_entry(pos, &mst_state->payloads, next) {
+ if (pos != new_payload &&
+ pos->vc_start_slot > payload_vc_start &&
+ pos->vc_start_slot < next_payload_vc_start)
+ next_payload_vc_start = pos->vc_start_slot;
+ }
+
+ allocated_time_slots = next_payload_vc_start - payload_vc_start;
+
+ old_payload->time_slots = allocated_time_slots;
+ old_payload->pbn = allocated_time_slots * pbn_per_slot;
+}
+
/*
* Writes payload allocation table in immediate downstream device.
*/
bool dm_helpers_dp_mst_write_payload_allocation_table(
struct dc_context *ctx,
const struct dc_stream_state *stream,
- struct dp_mst_stream_allocation_table *proposed_table,
+ struct dc_dp_mst_stream_allocation_table *proposed_table,
bool enable)
{
struct amdgpu_dm_connector *aconnector;
- struct dm_connector_state *dm_conn_state;
+ struct drm_dp_mst_topology_state *mst_state;
+ struct drm_dp_mst_atomic_payload *target_payload, *new_payload, old_payload;
struct drm_dp_mst_topology_mgr *mst_mgr;
- struct drm_dp_mst_port *mst_port;
- bool ret;
- u8 link_coding_cap = DP_8b_10b_ENCODING;
aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
/* Accessing the connector state is required for vcpi_slots allocation
* and directly relies on behaviour in commit check
* that blocks before commit guaranteeing that the state
- * is not gonna be swapped while still in use in commit tail */
-
- if (!aconnector || !aconnector->mst_port)
- return false;
-
- dm_conn_state = to_dm_connector_state(aconnector->base.state);
-
- mst_mgr = &aconnector->mst_port->mst_mgr;
+ * is not gonna be swapped while still in use in commit tail
+ */
- if (!mst_mgr->mst_state)
+ if (!aconnector || !aconnector->mst_root)
return false;
- mst_port = aconnector->port;
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- link_coding_cap = dc_link_dp_mst_decide_link_encoding_format(aconnector->dc_link);
-#endif
+ mst_mgr = &aconnector->mst_root->mst_mgr;
+ mst_state = to_drm_dp_mst_topology_state(mst_mgr->base.state);
+ new_payload = drm_atomic_get_mst_payload_state(mst_state, aconnector->mst_output_port);
if (enable) {
+ target_payload = new_payload;
- ret = drm_dp_mst_allocate_vcpi(mst_mgr, mst_port,
- dm_conn_state->pbn,
- dm_conn_state->vcpi_slots);
- if (!ret)
- return false;
-
+ /* It's OK for this to fail */
+ drm_dp_add_payload_part1(mst_mgr, mst_state, new_payload);
} else {
- drm_dp_mst_reset_vcpi_slots(mst_mgr, mst_port);
- }
+ /* construct old payload by VCPI*/
+ dm_helpers_construct_old_payload(mst_mgr, mst_state,
+ new_payload, &old_payload);
+ target_payload = &old_payload;
- /* It's OK for this to fail */
- drm_dp_update_payload_part1(mst_mgr, (link_coding_cap == DP_CAP_ANSI_128B132B) ? 0:1);
+ drm_dp_remove_payload_part1(mst_mgr, mst_state, new_payload);
+ }
/* mst_mgr->->payloads are VC payload notify MST branch using DPCD or
* AUX message. The sequence is slot 1-63 allocated sequence for each
* stream. AMD ASIC stream slot allocation should follow the same
- * sequence. copy DRM MST allocation to dc */
-
- get_payload_table(aconnector, proposed_table);
+ * sequence. copy DRM MST allocation to dc
+ */
+ fill_dc_mst_payload_table_from_drm(stream->link, enable, target_payload, proposed_table);
return true;
}
@@ -298,10 +343,10 @@ enum act_return_status dm_helpers_dp_mst_poll_for_allocation_change_trigger(
aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
- if (!aconnector || !aconnector->mst_port)
+ if (!aconnector || !aconnector->mst_root)
return ACT_FAILED;
- mst_mgr = &aconnector->mst_port->mst_mgr;
+ mst_mgr = &aconnector->mst_root->mst_mgr;
if (!mst_mgr->mst_state)
return ACT_FAILED;
@@ -314,35 +359,67 @@ enum act_return_status dm_helpers_dp_mst_poll_for_allocation_change_trigger(
return ACT_SUCCESS;
}
-bool dm_helpers_dp_mst_send_payload_allocation(
+void dm_helpers_dp_mst_send_payload_allocation(
struct dc_context *ctx,
- const struct dc_stream_state *stream,
- bool enable)
+ const struct dc_stream_state *stream)
{
struct amdgpu_dm_connector *aconnector;
+ struct drm_dp_mst_topology_state *mst_state;
struct drm_dp_mst_topology_mgr *mst_mgr;
- struct drm_dp_mst_port *mst_port;
+ struct drm_dp_mst_atomic_payload *new_payload;
+ enum mst_progress_status set_flag = MST_ALLOCATE_NEW_PAYLOAD;
+ enum mst_progress_status clr_flag = MST_CLEAR_ALLOCATED_PAYLOAD;
+ int ret = 0;
aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
- if (!aconnector || !aconnector->mst_port)
- return false;
+ if (!aconnector || !aconnector->mst_root)
+ return;
- mst_port = aconnector->port;
+ mst_mgr = &aconnector->mst_root->mst_mgr;
+ mst_state = to_drm_dp_mst_topology_state(mst_mgr->base.state);
+ new_payload = drm_atomic_get_mst_payload_state(mst_state, aconnector->mst_output_port);
- mst_mgr = &aconnector->mst_port->mst_mgr;
+ ret = drm_dp_add_payload_part2(mst_mgr, new_payload);
- if (!mst_mgr->mst_state)
- return false;
+ if (ret) {
+ amdgpu_dm_set_mst_status(&aconnector->mst_status,
+ set_flag, false);
+ } else {
+ amdgpu_dm_set_mst_status(&aconnector->mst_status,
+ set_flag, true);
+ amdgpu_dm_set_mst_status(&aconnector->mst_status,
+ clr_flag, false);
+ }
+}
- /* It's OK for this to fail */
- drm_dp_update_payload_part2(mst_mgr);
+void dm_helpers_dp_mst_update_mst_mgr_for_deallocation(
+ struct dc_context *ctx,
+ const struct dc_stream_state *stream)
+{
+ struct amdgpu_dm_connector *aconnector;
+ struct drm_dp_mst_topology_state *mst_state;
+ struct drm_dp_mst_topology_mgr *mst_mgr;
+ struct drm_dp_mst_atomic_payload *new_payload, old_payload;
+ enum mst_progress_status set_flag = MST_CLEAR_ALLOCATED_PAYLOAD;
+ enum mst_progress_status clr_flag = MST_ALLOCATE_NEW_PAYLOAD;
- if (!enable)
- drm_dp_mst_deallocate_vcpi(mst_mgr, mst_port);
+ aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
- return true;
-}
+ if (!aconnector || !aconnector->mst_root)
+ return;
+
+ mst_mgr = &aconnector->mst_root->mst_mgr;
+ mst_state = to_drm_dp_mst_topology_state(mst_mgr->base.state);
+ new_payload = drm_atomic_get_mst_payload_state(mst_state, aconnector->mst_output_port);
+ dm_helpers_construct_old_payload(mst_mgr, mst_state,
+ new_payload, &old_payload);
+
+ drm_dp_remove_payload_part2(mst_mgr, mst_state, &old_payload, new_payload);
+
+ amdgpu_dm_set_mst_status(&aconnector->mst_status, set_flag, true);
+ amdgpu_dm_set_mst_status(&aconnector->mst_status, clr_flag, false);
+ }
void dm_dtn_log_begin(struct dc_context *ctx,
struct dc_log_buffer_ctx *log_ctx)
@@ -392,7 +469,7 @@ void dm_dtn_log_append_v(struct dc_context *ctx,
total = log_ctx->pos + n + 1;
if (total > log_ctx->size) {
- char *buf = (char *)kvcalloc(total, sizeof(char), GFP_KERNEL);
+ char *buf = kvcalloc(total, sizeof(char), GFP_KERNEL);
if (buf) {
memcpy(buf, log_ctx->buf, log_ctx->pos);
@@ -438,6 +515,7 @@ bool dm_helpers_dp_mst_start_top_mgr(
bool boot)
{
struct amdgpu_dm_connector *aconnector = link->priv;
+ int ret;
if (!aconnector) {
DRM_ERROR("Failed to find connector for link!");
@@ -453,19 +531,27 @@ bool dm_helpers_dp_mst_start_top_mgr(
DRM_INFO("DM_MST: starting TM on aconnector: %p [id: %d]\n",
aconnector, aconnector->base.base.id);
- return (drm_dp_mst_topology_mgr_set_mst(&aconnector->mst_mgr, true) == 0);
+ ret = drm_dp_mst_topology_mgr_set_mst(&aconnector->mst_mgr, true);
+ if (ret < 0) {
+ DRM_ERROR("DM_MST: Failed to set the device into MST mode!");
+ return false;
+ }
+
+ DRM_INFO("DM_MST: DP%x, %d-lane link detected\n", aconnector->mst_mgr.dpcd[0],
+ aconnector->mst_mgr.dpcd[2] & DP_MAX_LANE_COUNT_MASK);
+
+ return true;
}
-void dm_helpers_dp_mst_stop_top_mgr(
+bool dm_helpers_dp_mst_stop_top_mgr(
struct dc_context *ctx,
struct dc_link *link)
{
struct amdgpu_dm_connector *aconnector = link->priv;
- uint8_t i;
if (!aconnector) {
DRM_ERROR("Failed to find connector for link!");
- return;
+ return false;
}
DRM_INFO("DM_MST: stopping TM on aconnector: %p [id: %d]\n",
@@ -473,23 +559,10 @@ void dm_helpers_dp_mst_stop_top_mgr(
if (aconnector->mst_mgr.mst_state == true) {
drm_dp_mst_topology_mgr_set_mst(&aconnector->mst_mgr, false);
-
- for (i = 0; i < MAX_SINKS_PER_LINK; i++) {
- if (link->remote_sinks[i] == NULL)
- continue;
-
- if (link->remote_sinks[i]->sink_signal ==
- SIGNAL_TYPE_DISPLAY_PORT_MST) {
- dc_link_remove_remote_sink(link, link->remote_sinks[i]);
-
- if (aconnector->dc_sink) {
- dc_sink_release(aconnector->dc_sink);
- aconnector->dc_sink = NULL;
- aconnector->dc_link->cur_link_settings.lane_count = 0;
- }
- }
- }
+ link->cur_link_settings.lane_count = 0;
}
+
+ return false;
}
bool dm_helpers_dp_read_dpcd(
@@ -502,13 +575,11 @@ bool dm_helpers_dp_read_dpcd(
struct amdgpu_dm_connector *aconnector = link->priv;
- if (!aconnector) {
- DC_LOG_DC("Failed to find connector for link!\n");
+ if (!aconnector)
return false;
- }
- return drm_dp_dpcd_read(&aconnector->dm_dp_aux.aux, address,
- data, size) > 0;
+ return drm_dp_dpcd_read(&aconnector->dm_dp_aux.aux, address, data,
+ size) == size;
}
bool dm_helpers_dp_write_dpcd(
@@ -520,10 +591,8 @@ bool dm_helpers_dp_write_dpcd(
{
struct amdgpu_dm_connector *aconnector = link->priv;
- if (!aconnector) {
- DRM_ERROR("Failed to find connector for link!");
+ if (!aconnector)
return false;
- }
return drm_dp_dpcd_write(&aconnector->dm_dp_aux.aux,
address, (uint8_t *)data, size) > 0;
@@ -563,33 +632,285 @@ bool dm_helpers_submit_i2c(
return result;
}
-bool dm_helpers_dp_write_dsc_enable(
+
+bool dm_helpers_execute_fused_io(
struct dc_context *ctx,
+ struct dc_link *link,
+ union dmub_rb_cmd *commands,
+ uint8_t count,
+ uint32_t timeout_us
+)
+{
+ struct amdgpu_device *dev = ctx->driver_context;
+
+ return amdgpu_dm_execute_fused_io(dev, link, commands, count, timeout_us);
+}
+
+static bool execute_synaptics_rc_command(struct drm_dp_aux *aux,
+ bool is_write_cmd,
+ unsigned char cmd,
+ unsigned int length,
+ unsigned int offset,
+ unsigned char *data)
+{
+ bool success = false;
+ unsigned char rc_data[16] = {0};
+ unsigned char rc_offset[4] = {0};
+ unsigned char rc_length[2] = {0};
+ unsigned char rc_cmd = 0;
+ unsigned char rc_result = 0xFF;
+ unsigned char i = 0;
+ int ret;
+
+ if (is_write_cmd) {
+ // write rc data
+ memmove(rc_data, data, length);
+ ret = drm_dp_dpcd_write(aux, SYNAPTICS_RC_DATA, rc_data, sizeof(rc_data));
+ if (ret < 0)
+ goto err;
+ }
+
+ // write rc offset
+ rc_offset[0] = (unsigned char) offset & 0xFF;
+ rc_offset[1] = (unsigned char) (offset >> 8) & 0xFF;
+ rc_offset[2] = (unsigned char) (offset >> 16) & 0xFF;
+ rc_offset[3] = (unsigned char) (offset >> 24) & 0xFF;
+ ret = drm_dp_dpcd_write(aux, SYNAPTICS_RC_OFFSET, rc_offset, sizeof(rc_offset));
+ if (ret < 0)
+ goto err;
+
+ // write rc length
+ rc_length[0] = (unsigned char) length & 0xFF;
+ rc_length[1] = (unsigned char) (length >> 8) & 0xFF;
+ ret = drm_dp_dpcd_write(aux, SYNAPTICS_RC_LENGTH, rc_length, sizeof(rc_length));
+ if (ret < 0)
+ goto err;
+
+ // write rc cmd
+ rc_cmd = cmd | 0x80;
+ ret = drm_dp_dpcd_write(aux, SYNAPTICS_RC_COMMAND, &rc_cmd, sizeof(rc_cmd));
+ if (ret < 0)
+ goto err;
+
+ // poll until active is 0
+ for (i = 0; i < 10; i++) {
+ drm_dp_dpcd_read(aux, SYNAPTICS_RC_COMMAND, &rc_cmd, sizeof(rc_cmd));
+ if (rc_cmd == cmd)
+ // active is 0
+ break;
+ msleep(10);
+ }
+
+ // read rc result
+ drm_dp_dpcd_read(aux, SYNAPTICS_RC_RESULT, &rc_result, sizeof(rc_result));
+ success = (rc_result == 0);
+
+ if (success && !is_write_cmd) {
+ // read rc data
+ drm_dp_dpcd_read(aux, SYNAPTICS_RC_DATA, data, length);
+ }
+
+ drm_dbg_dp(aux->drm_dev, "success = %d\n", success);
+
+ return success;
+
+err:
+ DRM_ERROR("%s: write cmd ..., err = %d\n", __func__, ret);
+ return false;
+}
+
+static void apply_synaptics_fifo_reset_wa(struct drm_dp_aux *aux)
+{
+ unsigned char data[16] = {0};
+
+ drm_dbg_dp(aux->drm_dev, "Start\n");
+
+ // Step 2
+ data[0] = 'P';
+ data[1] = 'R';
+ data[2] = 'I';
+ data[3] = 'U';
+ data[4] = 'S';
+
+ if (!execute_synaptics_rc_command(aux, true, 0x01, 5, 0, data))
+ return;
+
+ // Step 3 and 4
+ if (!execute_synaptics_rc_command(aux, false, 0x31, 4, 0x220998, data))
+ return;
+
+ data[0] &= (~(1 << 1)); // set bit 1 to 0
+ if (!execute_synaptics_rc_command(aux, true, 0x21, 4, 0x220998, data))
+ return;
+
+ if (!execute_synaptics_rc_command(aux, false, 0x31, 4, 0x220D98, data))
+ return;
+
+ data[0] &= (~(1 << 1)); // set bit 1 to 0
+ if (!execute_synaptics_rc_command(aux, true, 0x21, 4, 0x220D98, data))
+ return;
+
+ if (!execute_synaptics_rc_command(aux, false, 0x31, 4, 0x221198, data))
+ return;
+
+ data[0] &= (~(1 << 1)); // set bit 1 to 0
+ if (!execute_synaptics_rc_command(aux, true, 0x21, 4, 0x221198, data))
+ return;
+
+ // Step 3 and 5
+ if (!execute_synaptics_rc_command(aux, false, 0x31, 4, 0x220998, data))
+ return;
+
+ data[0] |= (1 << 1); // set bit 1 to 1
+ if (!execute_synaptics_rc_command(aux, true, 0x21, 4, 0x220998, data))
+ return;
+
+ if (!execute_synaptics_rc_command(aux, false, 0x31, 4, 0x220D98, data))
+ return;
+
+ data[0] |= (1 << 1); // set bit 1 to 1
+
+ if (!execute_synaptics_rc_command(aux, false, 0x31, 4, 0x221198, data))
+ return;
+
+ data[0] |= (1 << 1); // set bit 1 to 1
+ if (!execute_synaptics_rc_command(aux, true, 0x21, 4, 0x221198, data))
+ return;
+
+ // Step 6
+ if (!execute_synaptics_rc_command(aux, true, 0x02, 0, 0, NULL))
+ return;
+
+ drm_dbg_dp(aux->drm_dev, "Done\n");
+}
+
+/* MST Dock */
+static const uint8_t SYNAPTICS_DEVICE_ID[] = "SYNA";
+
+static uint8_t write_dsc_enable_synaptics_non_virtual_dpcd_mst(
+ struct drm_dp_aux *aux,
const struct dc_stream_state *stream,
bool enable)
{
- uint8_t enable_dsc = enable ? 1 : 0;
- struct amdgpu_dm_connector *aconnector;
uint8_t ret = 0;
- if (!stream)
- return false;
+ drm_dbg_dp(aux->drm_dev,
+ "MST_DSC Configure DSC to non-virtual dpcd synaptics\n");
- if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
- aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
+ if (enable) {
+ /* When DSC is enabled on previous boot and reboot with the hub,
+ * there is a chance that Synaptics hub gets stuck during reboot sequence.
+ * Applying a workaround to reset Synaptics SDP fifo before enabling the first stream
+ */
+ if (!stream->link->link_status.link_active &&
+ memcmp(stream->link->dpcd_caps.branch_dev_name,
+ (int8_t *)SYNAPTICS_DEVICE_ID, 4) == 0)
+ apply_synaptics_fifo_reset_wa(aux);
+
+ ret = drm_dp_dpcd_write(aux, DP_DSC_ENABLE, &enable, 1);
+ DRM_INFO("MST_DSC Send DSC enable to synaptics\n");
+
+ } else {
+ /* Synaptics hub not support virtual dpcd,
+ * external monitor occur garbage while disable DSC,
+ * Disable DSC only when entire link status turn to false,
+ */
+ if (!stream->link->link_status.link_active) {
+ ret = drm_dp_dpcd_write(aux, DP_DSC_ENABLE, &enable, 1);
+ DRM_INFO("MST_DSC Send DSC disable to synaptics\n");
+ }
+ }
+
+ return ret;
+}
+bool dm_helpers_dp_write_dsc_enable(
+ struct dc_context *ctx,
+ const struct dc_stream_state *stream,
+ bool enable)
+{
+ static const uint8_t DSC_DISABLE;
+ static const uint8_t DSC_DECODING = 0x01;
+ static const uint8_t DSC_PASSTHROUGH = 0x02;
+
+ struct amdgpu_dm_connector *aconnector =
+ (struct amdgpu_dm_connector *)stream->dm_stream_context;
+ struct drm_device *dev = aconnector->base.dev;
+ struct drm_dp_mst_port *port;
+ uint8_t enable_dsc = enable ? DSC_DECODING : DSC_DISABLE;
+ uint8_t enable_passthrough = enable ? DSC_PASSTHROUGH : DSC_DISABLE;
+ uint8_t ret = 0;
+
+ if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
if (!aconnector->dsc_aux)
return false;
- ret = drm_dp_dpcd_write(aconnector->dsc_aux, DP_DSC_ENABLE, &enable_dsc, 1);
+ // apply w/a to synaptics
+ if (needs_dsc_aux_workaround(aconnector->dc_link) &&
+ (aconnector->mst_downstream_port_present.byte & 0x7) != 0x3)
+ return write_dsc_enable_synaptics_non_virtual_dpcd_mst(
+ aconnector->dsc_aux, stream, enable_dsc);
+
+ port = aconnector->mst_output_port;
+
+ if (enable) {
+ if (port->passthrough_aux) {
+ ret = drm_dp_dpcd_write(port->passthrough_aux,
+ DP_DSC_ENABLE,
+ &enable_passthrough, 1);
+ drm_dbg_dp(dev,
+ "MST_DSC Sent DSC pass-through enable to virtual dpcd port, ret = %u\n",
+ ret);
+ }
+
+ ret = drm_dp_dpcd_write(aconnector->dsc_aux,
+ DP_DSC_ENABLE, &enable_dsc, 1);
+ drm_dbg_dp(dev,
+ "MST_DSC Sent DSC decoding enable to %s port, ret = %u\n",
+ (port->passthrough_aux) ? "remote RX" :
+ "virtual dpcd",
+ ret);
+ } else {
+ ret = drm_dp_dpcd_write(aconnector->dsc_aux,
+ DP_DSC_ENABLE, &enable_dsc, 1);
+ drm_dbg_dp(dev,
+ "MST_DSC Sent DSC decoding disable to %s port, ret = %u\n",
+ (port->passthrough_aux) ? "remote RX" :
+ "virtual dpcd",
+ ret);
+
+ if (port->passthrough_aux) {
+ ret = drm_dp_dpcd_write(port->passthrough_aux,
+ DP_DSC_ENABLE,
+ &enable_passthrough, 1);
+ drm_dbg_dp(dev,
+ "MST_DSC Sent DSC pass-through disable to virtual dpcd port, ret = %u\n",
+ ret);
+ }
+ }
}
- if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT) {
- ret = dm_helpers_dp_write_dpcd(ctx, stream->link, DP_DSC_ENABLE, &enable_dsc, 1);
- DC_LOG_DC("Send DSC %s to sst display\n", enable_dsc ? "enable" : "disable");
+ if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT || stream->signal == SIGNAL_TYPE_EDP) {
+ if (stream->sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_NONE) {
+ ret = dm_helpers_dp_write_dpcd(ctx, stream->link, DP_DSC_ENABLE, &enable_dsc, 1);
+ drm_dbg_dp(dev,
+ "SST_DSC Send DSC %s to SST RX\n",
+ enable_dsc ? "enable" : "disable");
+ } else if (stream->sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER) {
+ ret = dm_helpers_dp_write_dpcd(ctx, stream->link, DP_DSC_ENABLE, &enable_dsc, 1);
+ drm_dbg_dp(dev,
+ "SST_DSC Send DSC %s to DP-HDMI PCON\n",
+ enable_dsc ? "enable" : "disable");
+ }
}
- return (ret > 0);
+ return ret;
+}
+
+bool dm_helpers_dp_write_hblank_reduction(struct dc_context *ctx, const struct dc_stream_state *stream)
+{
+ // TODO
+ return false;
}
bool dm_helpers_is_dp_sink_present(struct dc_link *link)
@@ -608,6 +929,67 @@ bool dm_helpers_is_dp_sink_present(struct dc_link *link)
return dp_sink_present;
}
+static int
+dm_helpers_probe_acpi_edid(void *data, u8 *buf, unsigned int block, size_t len)
+{
+ struct drm_connector *connector = data;
+ struct acpi_device *acpidev = ACPI_COMPANION(connector->dev->dev);
+ unsigned short start = block * EDID_LENGTH;
+ struct edid *edid;
+ int r;
+
+ if (!acpidev)
+ return -ENODEV;
+
+ /* fetch the entire edid from BIOS */
+ r = acpi_video_get_edid(acpidev, ACPI_VIDEO_DISPLAY_LCD, -1, (void *)&edid);
+ if (r < 0) {
+ drm_dbg(connector->dev, "Failed to get EDID from ACPI: %d\n", r);
+ return r;
+ }
+ if (len > r || start > r || start + len > r) {
+ r = -EINVAL;
+ goto cleanup;
+ }
+
+ /* sanity check */
+ if (edid->revision < 4 || !(edid->input & DRM_EDID_INPUT_DIGITAL) ||
+ (edid->input & DRM_EDID_DIGITAL_TYPE_MASK) == DRM_EDID_DIGITAL_TYPE_UNDEF) {
+ r = -EINVAL;
+ goto cleanup;
+ }
+
+ memcpy(buf, (void *)edid + start, len);
+ r = 0;
+
+cleanup:
+ kfree(edid);
+
+ return r;
+}
+
+static const struct drm_edid *
+dm_helpers_read_acpi_edid(struct amdgpu_dm_connector *aconnector)
+{
+ struct drm_connector *connector = &aconnector->base;
+
+ if (amdgpu_dc_debug_mask & DC_DISABLE_ACPI_EDID)
+ return NULL;
+
+ switch (connector->connector_type) {
+ case DRM_MODE_CONNECTOR_LVDS:
+ case DRM_MODE_CONNECTOR_eDP:
+ break;
+ default:
+ return NULL;
+ }
+
+ if (connector->force == DRM_FORCE_OFF)
+ return NULL;
+
+ return drm_edid_read_custom(connector, dm_helpers_probe_acpi_edid, connector);
+}
+
enum dc_edid_status dm_helpers_read_local_edid(
struct dc_context *ctx,
struct dc_link *link,
@@ -616,9 +998,10 @@ enum dc_edid_status dm_helpers_read_local_edid(
struct amdgpu_dm_connector *aconnector = link->priv;
struct drm_connector *connector = &aconnector->base;
struct i2c_adapter *ddc;
- int retry = 3;
- enum dc_edid_status edid_status;
- struct edid *edid;
+ int retry = 25;
+ enum dc_edid_status edid_status = EDID_NO_RESPONSE;
+ const struct drm_edid *drm_edid;
+ const struct edid *edid;
if (link->aux_mode)
ddc = &aconnector->dm_dp_aux.aux.ddc;
@@ -629,56 +1012,75 @@ enum dc_edid_status dm_helpers_read_local_edid(
* do check sum and retry to make sure read correct edid.
*/
do {
-
- edid = drm_get_edid(&aconnector->base, ddc);
+ drm_edid = dm_helpers_read_acpi_edid(aconnector);
+ if (drm_edid)
+ drm_info(connector->dev, "Using ACPI provided EDID for %s\n", connector->name);
+ else
+ drm_edid = drm_edid_read_ddc(connector, ddc);
+ drm_edid_connector_update(connector, drm_edid);
/* DP Compliance Test 4.2.2.6 */
if (link->aux_mode && connector->edid_corrupt)
drm_dp_send_real_edid_checksum(&aconnector->dm_dp_aux.aux, connector->real_edid_checksum);
- if (!edid && connector->edid_corrupt) {
+ if (!drm_edid && connector->edid_corrupt) {
connector->edid_corrupt = false;
return EDID_BAD_CHECKSUM;
}
- if (!edid)
- return EDID_NO_RESPONSE;
+ if (!drm_edid)
+ continue;
+
+ edid = drm_edid_raw(drm_edid); // FIXME: Get rid of drm_edid_raw()
+ if (!edid ||
+ edid->extensions >= sizeof(sink->dc_edid.raw_edid) / EDID_LENGTH)
+ return EDID_BAD_INPUT;
sink->dc_edid.length = EDID_LENGTH * (edid->extensions + 1);
memmove(sink->dc_edid.raw_edid, (uint8_t *)edid, sink->dc_edid.length);
/* We don't need the original edid anymore */
- kfree(edid);
-
- /* connector->display_info will be parsed from EDID and saved
- * into drm_connector->display_info from edid by call stack
- * below:
- * drm_parse_ycbcr420_deep_color_info
- * drm_parse_hdmi_forum_vsdb
- * drm_parse_cea_ext
- * drm_add_display_info
- * drm_connector_update_edid_property
- *
- * drm_connector->display_info will be used by amdgpu_dm funcs,
- * like fill_stream_properties_from_drm_display_mode
- */
- amdgpu_dm_update_connector_after_detect(aconnector);
+ drm_edid_free(drm_edid);
edid_status = dm_helpers_parse_edid_caps(
- ctx,
+ link,
&sink->dc_edid,
&sink->edid_caps);
- } while (edid_status == EDID_BAD_CHECKSUM && --retry > 0);
+ } while ((edid_status == EDID_BAD_CHECKSUM || edid_status == EDID_NO_RESPONSE) && --retry > 0);
if (edid_status != EDID_OK)
DRM_ERROR("EDID err: %d, on connector: %s",
edid_status,
aconnector->base.name);
+ if (link->aux_mode) {
+ union test_request test_request = {0};
+ union test_response test_response = {0};
- /* DP Compliance Test 4.2.2.3 */
- if (link->aux_mode)
- drm_dp_send_real_edid_checksum(&aconnector->dm_dp_aux.aux, sink->dc_edid.raw_edid[sink->dc_edid.length-1]);
+ dm_helpers_dp_read_dpcd(ctx,
+ link,
+ DP_TEST_REQUEST,
+ &test_request.raw,
+ sizeof(union test_request));
+
+ if (!test_request.bits.EDID_READ)
+ return edid_status;
+
+ test_response.bits.EDID_CHECKSUM_WRITE = 1;
+
+ dm_helpers_dp_write_dpcd(ctx,
+ link,
+ DP_TEST_EDID_CHECKSUM,
+ &sink->dc_edid.raw_edid[sink->dc_edid.length-1],
+ 1);
+
+ dm_helpers_dp_write_dpcd(ctx,
+ link,
+ DP_TEST_RESPONSE,
+ &test_response.raw,
+ sizeof(test_response));
+
+ }
return edid_status;
}
@@ -688,9 +1090,13 @@ int dm_helper_dmub_aux_transfer_sync(
struct aux_payload *payload,
enum aux_return_code_type *operation_result)
{
- return amdgpu_dm_process_dmub_aux_transfer_sync(true, ctx,
- link->link_index, (void *)payload,
- (void *)operation_result);
+ if (!link->hpd_status) {
+ *operation_result = AUX_RET_ERROR_HPD_DISCON;
+ return -1;
+ }
+
+ return amdgpu_dm_process_dmub_aux_transfer_sync(ctx, link->link_index, payload,
+ operation_result);
}
int dm_helpers_dmub_set_config_sync(struct dc_context *ctx,
@@ -698,9 +1104,8 @@ int dm_helpers_dmub_set_config_sync(struct dc_context *ctx,
struct set_config_cmd_payload *payload,
enum set_config_status *operation_result)
{
- return amdgpu_dm_process_dmub_aux_transfer_sync(false, ctx,
- link->link_index, (void *)payload,
- (void *)operation_result);
+ return amdgpu_dm_process_dmub_set_config_sync(ctx, link->link_index, payload,
+ operation_result);
}
void dm_set_dcn_clocks(struct dc_context *ctx, struct dc_clocks *clks)
@@ -714,6 +1119,33 @@ void dm_helpers_smu_timeout(struct dc_context *ctx, unsigned int msg_id, unsigne
//amdgpu_device_gpu_recover(dc_context->driver-context, NULL);
}
+void dm_helpers_init_panel_settings(
+ struct dc_context *ctx,
+ struct dc_panel_config *panel_config,
+ struct dc_sink *sink)
+{
+ // Extra Panel Power Sequence
+ panel_config->pps.extra_t3_ms = sink->edid_caps.panel_patch.extra_t3_ms;
+ panel_config->pps.extra_t7_ms = sink->edid_caps.panel_patch.extra_t7_ms;
+ panel_config->pps.extra_delay_backlight_off = sink->edid_caps.panel_patch.extra_delay_backlight_off;
+ panel_config->pps.extra_post_t7_ms = 0;
+ panel_config->pps.extra_pre_t11_ms = 0;
+ panel_config->pps.extra_t12_ms = sink->edid_caps.panel_patch.extra_t12_ms;
+ panel_config->pps.extra_post_OUI_ms = 0;
+ // Feature DSC
+ panel_config->dsc.disable_dsc_edp = false;
+ panel_config->dsc.force_dsc_edp_policy = 0;
+}
+
+void dm_helpers_override_panel_settings(
+ struct dc_context *ctx,
+ struct dc_panel_config *panel_config)
+{
+ // Feature DSC
+ if (amdgpu_dc_debug_mask & DC_DISABLE_DSC)
+ panel_config->dsc.disable_dsc_edp = true;
+}
+
void *dm_helpers_allocate_gpu_mem(
struct dc_context *ctx,
enum dc_gpu_mem_alloc_type type,
@@ -721,30 +1153,8 @@ void *dm_helpers_allocate_gpu_mem(
long long *addr)
{
struct amdgpu_device *adev = ctx->driver_context;
- struct dal_allocation *da;
- u32 domain = (type == DC_MEM_ALLOC_TYPE_GART) ?
- AMDGPU_GEM_DOMAIN_GTT : AMDGPU_GEM_DOMAIN_VRAM;
- int ret;
-
- da = kzalloc(sizeof(struct dal_allocation), GFP_KERNEL);
- if (!da)
- return NULL;
-
- ret = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
- domain, &da->bo,
- &da->gpu_addr, &da->cpu_ptr);
-
- *addr = da->gpu_addr;
-
- if (ret) {
- kfree(da);
- return NULL;
- }
-
- /* add da to list in dm */
- list_add(&da->list, &adev->dm.da_list);
- return da->cpu_ptr;
+ return dm_allocate_gpu_mem(adev, type, size, addr);
}
void dm_helpers_free_gpu_mem(
@@ -753,17 +1163,8 @@ void dm_helpers_free_gpu_mem(
void *pvMem)
{
struct amdgpu_device *adev = ctx->driver_context;
- struct dal_allocation *da;
-
- /* walk the da list in DM */
- list_for_each_entry(da, &adev->dm.da_list, list) {
- if (pvMem == da->cpu_ptr) {
- amdgpu_bo_free_kernel(&da->bo, &da->gpu_addr, &da->cpu_ptr);
- list_del(&da->list);
- kfree(da);
- break;
- }
- }
+
+ dm_free_gpu_mem(adev, type, pvMem);
}
bool dm_helpers_dmub_outbox_interrupt_control(struct dc_context *ctx, bool enable)
@@ -805,16 +1206,211 @@ void dm_helpers_mst_enable_stream_features(const struct dc_stream_state *stream)
sizeof(new_downspread));
}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-void dm_set_phyd32clk(struct dc_context *ctx, int freq_khz)
+bool dm_helpers_dp_handle_test_pattern_request(
+ struct dc_context *ctx,
+ const struct dc_link *link,
+ union link_test_pattern dpcd_test_pattern,
+ union test_misc dpcd_test_params)
{
- // FPGA programming for this clock in diags framework that
- // needs to go through dm layer, therefore leave dummy interace here
+ enum dp_test_pattern test_pattern;
+ enum dp_test_pattern_color_space test_pattern_color_space =
+ DP_TEST_PATTERN_COLOR_SPACE_UNDEFINED;
+ enum dc_color_depth requestColorDepth = COLOR_DEPTH_UNDEFINED;
+ enum dc_pixel_encoding requestPixelEncoding = PIXEL_ENCODING_UNDEFINED;
+ struct pipe_ctx *pipes = link->dc->current_state->res_ctx.pipe_ctx;
+ struct pipe_ctx *pipe_ctx = NULL;
+ struct amdgpu_dm_connector *aconnector = link->priv;
+ struct drm_device *dev = aconnector->base.dev;
+ struct dc_state *dc_state = ctx->dc->current_state;
+ struct clk_mgr *clk_mgr = ctx->dc->clk_mgr;
+ int i;
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ if (pipes[i].stream == NULL)
+ continue;
+
+ if (pipes[i].stream->link == link && !pipes[i].top_pipe &&
+ !pipes[i].prev_odm_pipe) {
+ pipe_ctx = &pipes[i];
+ break;
+ }
+ }
+
+ if (pipe_ctx == NULL)
+ return false;
+
+ switch (dpcd_test_pattern.bits.PATTERN) {
+ case LINK_TEST_PATTERN_COLOR_RAMP:
+ test_pattern = DP_TEST_PATTERN_COLOR_RAMP;
+ break;
+ case LINK_TEST_PATTERN_VERTICAL_BARS:
+ test_pattern = DP_TEST_PATTERN_VERTICAL_BARS;
+ break; /* black and white */
+ case LINK_TEST_PATTERN_COLOR_SQUARES:
+ test_pattern = (dpcd_test_params.bits.DYN_RANGE ==
+ TEST_DYN_RANGE_VESA ?
+ DP_TEST_PATTERN_COLOR_SQUARES :
+ DP_TEST_PATTERN_COLOR_SQUARES_CEA);
+ break;
+ default:
+ test_pattern = DP_TEST_PATTERN_VIDEO_MODE;
+ break;
+ }
+
+ if (dpcd_test_params.bits.CLR_FORMAT == 0)
+ test_pattern_color_space = DP_TEST_PATTERN_COLOR_SPACE_RGB;
+ else
+ test_pattern_color_space = dpcd_test_params.bits.YCBCR_COEFS ?
+ DP_TEST_PATTERN_COLOR_SPACE_YCBCR709 :
+ DP_TEST_PATTERN_COLOR_SPACE_YCBCR601;
+
+ switch (dpcd_test_params.bits.BPC) {
+ case 0: // 6 bits
+ requestColorDepth = COLOR_DEPTH_666;
+ break;
+ case 1: // 8 bits
+ requestColorDepth = COLOR_DEPTH_888;
+ break;
+ case 2: // 10 bits
+ requestColorDepth = COLOR_DEPTH_101010;
+ break;
+ case 3: // 12 bits
+ requestColorDepth = COLOR_DEPTH_121212;
+ break;
+ default:
+ break;
+ }
+
+ switch (dpcd_test_params.bits.CLR_FORMAT) {
+ case 0:
+ requestPixelEncoding = PIXEL_ENCODING_RGB;
+ break;
+ case 1:
+ requestPixelEncoding = PIXEL_ENCODING_YCBCR422;
+ break;
+ case 2:
+ requestPixelEncoding = PIXEL_ENCODING_YCBCR444;
+ break;
+ default:
+ requestPixelEncoding = PIXEL_ENCODING_RGB;
+ break;
+ }
+
+ if ((requestColorDepth != COLOR_DEPTH_UNDEFINED
+ && pipe_ctx->stream->timing.display_color_depth != requestColorDepth)
+ || (requestPixelEncoding != PIXEL_ENCODING_UNDEFINED
+ && pipe_ctx->stream->timing.pixel_encoding != requestPixelEncoding)) {
+ drm_dbg(dev,
+ "original bpc %d pix encoding %d, changing to %d %d\n",
+ pipe_ctx->stream->timing.display_color_depth,
+ pipe_ctx->stream->timing.pixel_encoding,
+ requestColorDepth,
+ requestPixelEncoding);
+ pipe_ctx->stream->timing.display_color_depth = requestColorDepth;
+ pipe_ctx->stream->timing.pixel_encoding = requestPixelEncoding;
+
+ dc_link_update_dsc_config(pipe_ctx);
+
+ aconnector->timing_changed = true;
+ /* store current timing */
+ if (aconnector->timing_requested)
+ *aconnector->timing_requested = pipe_ctx->stream->timing;
+ else
+ drm_err(dev, "timing storage failed\n");
+
+ }
+
+ pipe_ctx->stream->test_pattern.type = test_pattern;
+ pipe_ctx->stream->test_pattern.color_space = test_pattern_color_space;
+
+ /* Temp W/A for compliance test failure */
+ dc_state->bw_ctx.bw.dcn.clk.p_state_change_support = false;
+ dc_state->bw_ctx.bw.dcn.clk.dramclk_khz = clk_mgr->dc_mode_softmax_enabled ?
+ clk_mgr->bw_params->dc_mode_softmax_memclk : clk_mgr->bw_params->max_memclk_mhz;
+ dc_state->bw_ctx.bw.dcn.clk.idle_dramclk_khz = dc_state->bw_ctx.bw.dcn.clk.dramclk_khz;
+ ctx->dc->clk_mgr->funcs->update_clocks(
+ ctx->dc->clk_mgr,
+ dc_state,
+ false);
+
+ dc_link_dp_set_test_pattern(
+ (struct dc_link *) link,
+ test_pattern,
+ test_pattern_color_space,
+ NULL,
+ NULL,
+ 0);
+
+ return false;
}
+void dm_set_phyd32clk(struct dc_context *ctx, int freq_khz)
+{
+ // TODO
+}
void dm_helpers_enable_periodic_detection(struct dc_context *ctx, bool enable)
{
- /* TODO: add peridic detection implementation */
+ struct amdgpu_device *adev = ctx->driver_context;
+
+ if (adev->dm.idle_workqueue) {
+ adev->dm.idle_workqueue->enable = enable;
+ if (enable && !adev->dm.idle_workqueue->running && amdgpu_dm_is_headless(adev))
+ schedule_work(&adev->dm.idle_workqueue->work);
+ }
+}
+
+void dm_helpers_dp_mst_update_branch_bandwidth(
+ struct dc_context *ctx,
+ struct dc_link *link)
+{
+ // TODO
+}
+
+static bool dm_is_freesync_pcon_whitelist(const uint32_t branch_dev_id)
+{
+ bool ret_val = false;
+
+ switch (branch_dev_id) {
+ case DP_BRANCH_DEVICE_ID_0060AD:
+ case DP_BRANCH_DEVICE_ID_00E04C:
+ case DP_BRANCH_DEVICE_ID_90CC24:
+ ret_val = true;
+ break;
+ default:
+ break;
+ }
+
+ return ret_val;
+}
+
+enum adaptive_sync_type dm_get_adaptive_sync_support_type(struct dc_link *link)
+{
+ struct dpcd_caps *dpcd_caps = &link->dpcd_caps;
+ enum adaptive_sync_type as_type = ADAPTIVE_SYNC_TYPE_NONE;
+
+ switch (dpcd_caps->dongle_type) {
+ case DISPLAY_DONGLE_DP_HDMI_CONVERTER:
+ if (dpcd_caps->adaptive_sync_caps.dp_adap_sync_caps.bits.ADAPTIVE_SYNC_SDP_SUPPORT == true &&
+ dpcd_caps->allow_invalid_MSA_timing_param == true &&
+ dm_is_freesync_pcon_whitelist(dpcd_caps->branch_dev_id))
+ as_type = FREESYNC_TYPE_PCON_IN_WHITELIST;
+ break;
+ default:
+ break;
+ }
+
+ return as_type;
+}
+
+bool dm_helpers_is_fullscreen(struct dc_context *ctx, struct dc_stream_state *stream)
+{
+ // TODO
+ return false;
+}
+
+bool dm_helpers_is_hdr_on(struct dc_context *ctx, struct dc_stream_state *stream)
+{
+ // TODO
+ return false;
}
-#endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
index 4aba0e8c84f8..0a2a3f233a0e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
@@ -120,7 +121,8 @@ static void dm_irq_work_func(struct work_struct *work)
/* Call a DAL subcomponent which registered for interrupt notification
* at INTERRUPT_LOW_IRQ_CONTEXT.
- * (The most common use is HPD interrupt) */
+ * (The most common use is HPD interrupt)
+ */
}
/*
@@ -172,7 +174,8 @@ static struct list_head *remove_irq_handler(struct amdgpu_device *adev,
if (handler_removed == false) {
/* Not necessarily an error - caller may not
- * know the context. */
+ * know the context.
+ */
return NULL;
}
@@ -261,7 +264,7 @@ validate_irq_registration_params(struct dc_interrupt_params *int_params,
static bool validate_irq_unregistration_params(enum dc_irq_source irq_source,
irq_handler_idx handler_idx)
{
- if (DAL_INVALID_IRQ_HANDLER_IDX == handler_idx) {
+ if (handler_idx == DAL_INVALID_IRQ_HANDLER_IDX) {
DRM_ERROR("DM_IRQ: invalid handler_idx==NULL!\n");
return false;
}
@@ -343,7 +346,8 @@ void *amdgpu_dm_irq_register_interrupt(struct amdgpu_device *adev,
/* This pointer will be stored by code which requested interrupt
* registration.
* The same pointer will be needed in order to unregister the
- * interrupt. */
+ * interrupt.
+ */
DRM_DEBUG_KMS(
"DM_IRQ: added irq handler: %p for: dal_src=%d, irq context=%d\n",
@@ -390,7 +394,8 @@ void amdgpu_dm_irq_unregister_interrupt(struct amdgpu_device *adev,
if (handler_list == NULL) {
/* If we got here, it means we searched all irq contexts
- * for this irq source, but the handler was not found. */
+ * for this irq source, but the handler was not found.
+ */
DRM_ERROR(
"DM_IRQ: failed to find irq handler:%p for irq_source:%d!\n",
ih, irq_source);
@@ -450,7 +455,8 @@ void amdgpu_dm_irq_fini(struct amdgpu_device *adev)
DM_IRQ_TABLE_LOCK(adev, irq_table_flags);
/* The handler was removed from the table,
* it means it is safe to flush all the 'work'
- * (because no code can schedule a new one). */
+ * (because no code can schedule a new one).
+ */
lh = &adev->dm.irq_handler_list_low_tab[src];
DM_IRQ_TABLE_UNLOCK(adev, irq_table_flags);
@@ -468,8 +474,9 @@ void amdgpu_dm_irq_fini(struct amdgpu_device *adev)
unregister_all_irq_handlers(adev);
}
-int amdgpu_dm_irq_suspend(struct amdgpu_device *adev)
+void amdgpu_dm_irq_suspend(struct amdgpu_device *adev)
{
+ struct drm_device *dev = adev_to_drm(adev);
int src;
struct list_head *hnd_list_h;
struct list_head *hnd_list_l;
@@ -494,7 +501,7 @@ int amdgpu_dm_irq_suspend(struct amdgpu_device *adev)
DM_IRQ_TABLE_UNLOCK(adev, irq_table_flags);
if (!list_empty(hnd_list_l)) {
- list_for_each_safe (entry, tmp, hnd_list_l) {
+ list_for_each_safe(entry, tmp, hnd_list_l) {
handler = list_entry(
entry,
struct amdgpu_dm_irq_handler_data,
@@ -506,10 +513,12 @@ int amdgpu_dm_irq_suspend(struct amdgpu_device *adev)
}
DM_IRQ_TABLE_UNLOCK(adev, irq_table_flags);
- return 0;
+
+ if (dev->mode_config.poll_enabled)
+ drm_kms_helper_poll_disable(dev);
}
-int amdgpu_dm_irq_resume_early(struct amdgpu_device *adev)
+void amdgpu_dm_irq_resume_early(struct amdgpu_device *adev)
{
int src;
struct list_head *hnd_list_h, *hnd_list_l;
@@ -517,7 +526,7 @@ int amdgpu_dm_irq_resume_early(struct amdgpu_device *adev)
DM_IRQ_TABLE_LOCK(adev, irq_table_flags);
- DRM_DEBUG_KMS("DM_IRQ: early resume\n");
+ drm_dbg(adev_to_drm(adev), "DM_IRQ: early resume\n");
/* re-enable short pulse interrupts HW interrupt */
for (src = DC_IRQ_SOURCE_HPD1RX; src <= DC_IRQ_SOURCE_HPD6RX; src++) {
@@ -528,19 +537,18 @@ int amdgpu_dm_irq_resume_early(struct amdgpu_device *adev)
}
DM_IRQ_TABLE_UNLOCK(adev, irq_table_flags);
-
- return 0;
}
-int amdgpu_dm_irq_resume_late(struct amdgpu_device *adev)
+void amdgpu_dm_irq_resume_late(struct amdgpu_device *adev)
{
+ struct drm_device *dev = adev_to_drm(adev);
int src;
struct list_head *hnd_list_h, *hnd_list_l;
unsigned long irq_table_flags;
DM_IRQ_TABLE_LOCK(adev, irq_table_flags);
- DRM_DEBUG_KMS("DM_IRQ: resume\n");
+ drm_dbg(adev_to_drm(adev), "DM_IRQ: resume\n");
/**
* Renable HW interrupt for HPD and only since FLIP and VBLANK
@@ -554,7 +562,9 @@ int amdgpu_dm_irq_resume_late(struct amdgpu_device *adev)
}
DM_IRQ_TABLE_UNLOCK(adev, irq_table_flags);
- return 0;
+
+ if (dev->mode_config.poll_enabled)
+ drm_kms_helper_poll_enable(dev);
}
/*
@@ -571,7 +581,7 @@ static void amdgpu_dm_irq_schedule_work(struct amdgpu_device *adev,
if (list_empty(handler_list))
return;
- list_for_each_entry (handler_data, handler_list, list) {
+ list_for_each_entry(handler_data, handler_list, list) {
if (queue_work(system_highpri_wq, &handler_data->work)) {
work_queued = true;
break;
@@ -627,7 +637,8 @@ static void amdgpu_dm_irq_immediate_work(struct amdgpu_device *adev,
&adev->dm.irq_handler_list_high_tab[irq_source],
list) {
/* Call a subcomponent which registered for immediate
- * interrupt notification */
+ * interrupt notification
+ */
handler_data->handler(handler_data->handler_arg);
}
@@ -664,7 +675,7 @@ static int amdgpu_dm_irq_handler(struct amdgpu_device *adev,
return 0;
}
-static enum dc_irq_source amdgpu_dm_hpd_to_dal_irq_source(unsigned type)
+static enum dc_irq_source amdgpu_dm_hpd_to_dal_irq_source(unsigned int type)
{
switch (type) {
case AMDGPU_HPD_1:
@@ -686,7 +697,7 @@ static enum dc_irq_source amdgpu_dm_hpd_to_dal_irq_source(unsigned type)
static int amdgpu_dm_set_hpd_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
- unsigned type,
+ unsigned int type,
enum amdgpu_interrupt_state state)
{
enum dc_irq_source src = amdgpu_dm_hpd_to_dal_irq_source(type);
@@ -698,14 +709,14 @@ static int amdgpu_dm_set_hpd_irq_state(struct amdgpu_device *adev,
static inline int dm_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
- unsigned crtc_id,
+ unsigned int crtc_id,
enum amdgpu_interrupt_state state,
const enum irq_type dal_irq_type,
const char *func)
{
bool st;
enum dc_irq_source irq_source;
-
+ struct dc *dc = adev->dm.dc;
struct amdgpu_crtc *acrtc = adev->mode_info.crtcs[crtc_id];
if (!acrtc) {
@@ -723,13 +734,16 @@ static inline int dm_irq_state(struct amdgpu_device *adev,
st = (state == AMDGPU_IRQ_STATE_ENABLE);
+ if (dc && dc->caps.ips_support && dc->idle_optimizations_allowed)
+ dc_allow_idle_optimizations(dc, false);
+
dc_interrupt_set(adev->dm.dc, irq_source, st);
return 0;
}
static int amdgpu_dm_set_pflip_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
- unsigned crtc_id,
+ unsigned int crtc_id,
enum amdgpu_interrupt_state state)
{
return dm_irq_state(
@@ -743,7 +757,7 @@ static int amdgpu_dm_set_pflip_irq_state(struct amdgpu_device *adev,
static int amdgpu_dm_set_crtc_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
- unsigned crtc_id,
+ unsigned int crtc_id,
enum amdgpu_interrupt_state state)
{
return dm_irq_state(
@@ -885,27 +899,68 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev)
struct drm_device *dev = adev_to_drm(adev);
struct drm_connector *connector;
struct drm_connector_list_iter iter;
+ int irq_type;
+ int i;
+ bool use_polling = false;
+
+ /* First, clear all hpd and hpdrx interrupts */
+ for (i = DC_IRQ_SOURCE_HPD1; i <= DC_IRQ_SOURCE_HPD6RX; i++) {
+ if (!dc_interrupt_set(adev->dm.dc, i, false))
+ drm_err(dev, "Failed to clear hpd(rx) source=%d on init\n",
+ i);
+ }
drm_connector_list_iter_begin(dev, &iter);
drm_for_each_connector_iter(connector, &iter) {
- struct amdgpu_dm_connector *amdgpu_dm_connector =
- to_amdgpu_dm_connector(connector);
+ struct amdgpu_dm_connector *amdgpu_dm_connector;
+ const struct dc_link *dc_link;
- const struct dc_link *dc_link = amdgpu_dm_connector->dc_link;
+ use_polling |= connector->polled != DRM_CONNECTOR_POLL_HPD;
- if (DC_IRQ_SOURCE_INVALID != dc_link->irq_source_hpd) {
- dc_interrupt_set(adev->dm.dc,
- dc_link->irq_source_hpd,
- true);
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
+ amdgpu_dm_connector = to_amdgpu_dm_connector(connector);
+
+ dc_link = amdgpu_dm_connector->dc_link;
+
+ /*
+ * Get a base driver irq reference for hpd ints for the lifetime
+ * of dm. Note that only hpd interrupt types are registered with
+ * base driver; hpd_rx types aren't. IOW, amdgpu_irq_get/put on
+ * hpd_rx isn't available. DM currently controls hpd_rx
+ * explicitly with dc_interrupt_set()
+ */
+ if (dc_link->irq_source_hpd != DC_IRQ_SOURCE_INVALID) {
+ irq_type = dc_link->irq_source_hpd - DC_IRQ_SOURCE_HPD1;
+ /*
+ * TODO: There's a mismatch between mode_info.num_hpd
+ * and what bios reports as the # of connectors with hpd
+ * sources. Since the # of hpd source types registered
+ * with base driver == mode_info.num_hpd, we have to
+ * fallback to dc_interrupt_set for the remaining types.
+ */
+ if (irq_type < adev->mode_info.num_hpd) {
+ if (amdgpu_irq_get(adev, &adev->hpd_irq, irq_type))
+ drm_err(dev, "DM_IRQ: Failed get HPD for source=%d)!\n",
+ dc_link->irq_source_hpd);
+ } else {
+ dc_interrupt_set(adev->dm.dc,
+ dc_link->irq_source_hpd,
+ true);
+ }
}
- if (DC_IRQ_SOURCE_INVALID != dc_link->irq_source_hpd_rx) {
+ if (dc_link->irq_source_hpd_rx != DC_IRQ_SOURCE_INVALID) {
dc_interrupt_set(adev->dm.dc,
dc_link->irq_source_hpd_rx,
true);
}
}
drm_connector_list_iter_end(&iter);
+
+ if (use_polling)
+ drm_kms_helper_poll_init(dev);
}
/**
@@ -921,20 +976,42 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev)
struct drm_device *dev = adev_to_drm(adev);
struct drm_connector *connector;
struct drm_connector_list_iter iter;
+ int irq_type;
drm_connector_list_iter_begin(dev, &iter);
drm_for_each_connector_iter(connector, &iter) {
- struct amdgpu_dm_connector *amdgpu_dm_connector =
- to_amdgpu_dm_connector(connector);
- const struct dc_link *dc_link = amdgpu_dm_connector->dc_link;
+ struct amdgpu_dm_connector *amdgpu_dm_connector;
+ const struct dc_link *dc_link;
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
- dc_interrupt_set(adev->dm.dc, dc_link->irq_source_hpd, false);
+ amdgpu_dm_connector = to_amdgpu_dm_connector(connector);
+ dc_link = amdgpu_dm_connector->dc_link;
+
+ if (dc_link->irq_source_hpd != DC_IRQ_SOURCE_INVALID) {
+ irq_type = dc_link->irq_source_hpd - DC_IRQ_SOURCE_HPD1;
+
+ /* TODO: See same TODO in amdgpu_dm_hpd_init() */
+ if (irq_type < adev->mode_info.num_hpd) {
+ if (amdgpu_irq_put(adev, &adev->hpd_irq, irq_type))
+ drm_err(dev, "DM_IRQ: Failed put HPD for source=%d!\n",
+ dc_link->irq_source_hpd);
+ } else {
+ dc_interrupt_set(adev->dm.dc,
+ dc_link->irq_source_hpd,
+ false);
+ }
+ }
- if (DC_IRQ_SOURCE_INVALID != dc_link->irq_source_hpd_rx) {
+ if (dc_link->irq_source_hpd_rx != DC_IRQ_SOURCE_INVALID) {
dc_interrupt_set(adev->dm.dc,
dc_link->irq_source_hpd_rx,
false);
}
}
drm_connector_list_iter_end(&iter);
+
+ if (dev->mode_config.poll_enabled)
+ drm_kms_helper_poll_fini(dev);
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.h
index 2349238a626b..4f6b58f4f90d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: MIT */
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
@@ -90,14 +91,14 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev);
* amdgpu_dm_irq_suspend - disable ASIC interrupt during suspend.
*
*/
-int amdgpu_dm_irq_suspend(struct amdgpu_device *adev);
+void amdgpu_dm_irq_suspend(struct amdgpu_device *adev);
/**
* amdgpu_dm_irq_resume_early - enable HPDRX ASIC interrupts during resume.
* amdgpu_dm_irq_resume - enable ASIC interrupt during resume.
*
*/
-int amdgpu_dm_irq_resume_early(struct amdgpu_device *adev);
-int amdgpu_dm_irq_resume_late(struct amdgpu_device *adev);
+void amdgpu_dm_irq_resume_early(struct amdgpu_device *adev);
+void amdgpu_dm_irq_resume_late(struct amdgpu_device *adev);
#endif /* __AMDGPU_DM_IRQ_H__ */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h
index 79b5f9999fec..3c9995275cbd 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: MIT */
/*
* Copyright 2020 Advanced Micro Devices, Inc.
*
@@ -33,13 +34,15 @@ struct dm_irq_params {
struct mod_vrr_params vrr_params;
struct dc_stream_state *stream;
int active_planes;
- bool allow_psr_entry;
+ bool allow_sr_entry;
struct mod_freesync_config freesync_config;
#ifdef CONFIG_DEBUG_FS
enum amdgpu_dm_pipe_crc_source crc_src;
#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
- struct crc_window_parm crc_window;
+ struct crc_window_param window_param[MAX_CRC_WINDOW_NUM];
+ /* At least one CRC window is activated or not*/
+ bool crc_window_activated;
#endif
#endif
};
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index cc34a35d0bcb..dbd1da4d85d3 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2012-15 Advanced Micro Devices, Inc.
*
@@ -23,38 +24,46 @@
*
*/
+#include <linux/vmalloc.h>
+#include <drm/display/drm_dp_helper.h>
+#include <drm/display/drm_dp_mst_helper.h>
#include <drm/drm_atomic.h>
#include <drm/drm_atomic_helper.h>
-#include <drm/drm_dp_mst_helper.h>
-#include <drm/drm_dp_helper.h>
+#include <drm/drm_fixed.h>
+#include <drm/drm_edid.h>
#include "dm_services.h"
#include "amdgpu.h"
#include "amdgpu_dm.h"
#include "amdgpu_dm_mst_types.h"
+#include "amdgpu_dm_hdcp.h"
#include "dc.h"
#include "dm_helpers.h"
-#include "dc_link_ddc.h"
#include "ddc_service_types.h"
#include "dpcd_defs.h"
-#include "i2caux_interface.h"
#include "dmub_cmd.h"
#if defined(CONFIG_DEBUG_FS)
#include "amdgpu_dm_debugfs.h"
#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-#include "dc/dcn20/dcn20_resource.h"
-#endif
+#include "dc/resource/dcn20/dcn20_resource.h"
+
+#define PEAK_FACTOR_X1000 1006
+/*
+ * This function handles both native AUX and I2C-Over-AUX transactions.
+ */
static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
struct drm_dp_aux_msg *msg)
{
ssize_t result = 0;
struct aux_payload payload;
enum aux_return_code_type operation_result;
+ struct amdgpu_device *adev;
+ struct ddc_service *ddc;
+ uint8_t copy[16];
if (WARN_ON(msg->size > 16))
return -E2BIG;
@@ -70,13 +79,43 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
(msg->request & DP_AUX_I2C_WRITE_STATUS_UPDATE) != 0;
payload.defer_delay = 0;
+ if (payload.write) {
+ memcpy(copy, msg->buffer, msg->size);
+ payload.data = copy;
+ }
+
result = dc_link_aux_transfer_raw(TO_DM_AUX(aux)->ddc_service, &payload,
&operation_result);
- if (payload.write && result >= 0)
- result = msg->size;
+ /*
+ * w/a on certain intel platform where hpd is unexpected to pull low during
+ * 1st sideband message transaction by return AUX_RET_ERROR_HPD_DISCON
+ * aux transaction is succuess in such case, therefore bypass the error
+ */
+ ddc = TO_DM_AUX(aux)->ddc_service;
+ adev = ddc->ctx->driver_context;
+ if (adev->dm.aux_hpd_discon_quirk) {
+ if (msg->address == DP_SIDEBAND_MSG_DOWN_REQ_BASE &&
+ operation_result == AUX_RET_ERROR_HPD_DISCON) {
+ result = msg->size;
+ operation_result = AUX_RET_SUCCESS;
+ }
+ }
+
+ /*
+ * result equals to 0 includes the cases of AUX_DEFER/I2C_DEFER
+ */
+ if (payload.write && result >= 0) {
+ if (result) {
+ /*one byte indicating partially written bytes*/
+ drm_dbg_dp(adev_to_drm(adev), "AUX partially written\n");
+ result = payload.data[0];
+ } else if (!payload.reply[0])
+ /*I2C_ACK|AUX_ACK*/
+ result = msg->size;
+ }
- if (result < 0)
+ if (result < 0) {
switch (operation_result) {
case AUX_RET_SUCCESS:
break;
@@ -95,6 +134,13 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
break;
}
+ drm_dbg_dp(adev_to_drm(adev), "DP AUX transfer fail:%d\n", operation_result);
+ }
+
+ if (payload.reply[0])
+ drm_dbg_dp(adev_to_drm(adev), "AUX reply command not ACK: 0x%02x.",
+ payload.reply[0]);
+
return result;
}
@@ -110,10 +156,10 @@ dm_dp_mst_connector_destroy(struct drm_connector *connector)
dc_sink_release(aconnector->dc_sink);
}
- kfree(aconnector->edid);
+ drm_edid_free(aconnector->drm_edid);
drm_connector_cleanup(connector);
- drm_dp_mst_put_port_malloc(aconnector->port);
+ drm_dp_mst_put_port_malloc(aconnector->mst_output_port);
kfree(aconnector);
}
@@ -125,7 +171,7 @@ amdgpu_dm_mst_connector_late_register(struct drm_connector *connector)
int r;
r = drm_dp_mst_connector_late_register(connector,
- amdgpu_dm_connector->port);
+ amdgpu_dm_connector->mst_output_port);
if (r < 0)
return r;
@@ -136,14 +182,49 @@ amdgpu_dm_mst_connector_late_register(struct drm_connector *connector)
return 0;
}
+
+static inline void
+amdgpu_dm_mst_reset_mst_connector_setting(struct amdgpu_dm_connector *aconnector)
+{
+ aconnector->drm_edid = NULL;
+ aconnector->dsc_aux = NULL;
+ aconnector->mst_output_port->passthrough_aux = NULL;
+ aconnector->mst_local_bw = 0;
+ aconnector->vc_full_pbn = 0;
+}
+
static void
amdgpu_dm_mst_connector_early_unregister(struct drm_connector *connector)
{
- struct amdgpu_dm_connector *amdgpu_dm_connector =
+ struct amdgpu_dm_connector *aconnector =
to_amdgpu_dm_connector(connector);
- struct drm_dp_mst_port *port = amdgpu_dm_connector->port;
+ struct drm_dp_mst_port *port = aconnector->mst_output_port;
+ struct amdgpu_dm_connector *root = aconnector->mst_root;
+ struct dc_link *dc_link = aconnector->dc_link;
+ struct dc_sink *dc_sink = aconnector->dc_sink;
drm_dp_mst_connector_early_unregister(connector, port);
+
+ /*
+ * Release dc_sink for connector which its attached port is
+ * no longer in the mst topology
+ */
+ drm_modeset_lock(&root->mst_mgr.base.lock, NULL);
+ if (dc_sink) {
+ if (dc_link->sink_count)
+ dc_link_remove_remote_sink(dc_link, dc_sink);
+
+ drm_dbg_dp(connector->dev,
+ "DM_MST: remove remote sink 0x%p, %d remaining\n",
+ dc_sink, dc_link->sink_count);
+
+ dc_sink_release(dc_sink);
+ aconnector->dc_sink = NULL;
+ amdgpu_dm_mst_reset_mst_connector_setting(aconnector);
+ }
+
+ aconnector->mst_status = MST_STATUS_DEFAULT;
+ drm_modeset_unlock(&root->mst_mgr.base.lock);
}
static const struct drm_connector_funcs dm_dp_mst_connector_funcs = {
@@ -158,8 +239,7 @@ static const struct drm_connector_funcs dm_dp_mst_connector_funcs = {
.early_unregister = amdgpu_dm_mst_connector_early_unregister,
};
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-static bool needs_dsc_aux_workaround(struct dc_link *link)
+bool needs_dsc_aux_workaround(struct dc_link *link)
{
if (link->dpcd_caps.branch_dev_id == DP_BRANCH_DEVICE_ID_90CC24 &&
(link->dpcd_caps.dpcd_rev.raw == DPCD_REV_14 || link->dpcd_caps.dpcd_rev.raw == DPCD_REV_12) &&
@@ -169,10 +249,26 @@ static bool needs_dsc_aux_workaround(struct dc_link *link)
return false;
}
+#if defined(CONFIG_DRM_AMD_DC_FP)
+static bool is_synaptics_cascaded_panamera(struct dc_link *link, struct drm_dp_mst_port *port)
+{
+ u8 branch_vendor_data[4] = { 0 }; // Vendor data 0x50C ~ 0x50F
+
+ if (drm_dp_dpcd_read(port->mgr->aux, DP_BRANCH_VENDOR_SPECIFIC_START, &branch_vendor_data, 4) == 4) {
+ if (link->dpcd_caps.branch_dev_id == DP_BRANCH_DEVICE_ID_90CC24 &&
+ IS_SYNAPTICS_CASCADED_PANAMERA(link->dpcd_caps.branch_dev_name, branch_vendor_data)) {
+ DRM_INFO("Synaptics Cascaded MST hub\n");
+ return true;
+ }
+ }
+
+ return false;
+}
+
static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnector)
{
struct dc_sink *dc_sink = aconnector->dc_sink;
- struct drm_dp_mst_port *port = aconnector->port;
+ struct drm_dp_mst_port *port = aconnector->mst_output_port;
u8 dsc_caps[16] = { 0 };
u8 dsc_branch_dec_caps_raw[3] = { 0 }; // DSC branch decoder caps 0xA0 ~ 0xA2
u8 *dsc_branch_dec_caps = NULL;
@@ -190,7 +286,11 @@ static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnecto
*/
if (!aconnector->dsc_aux && !port->parent->port_parent &&
needs_dsc_aux_workaround(aconnector->dc_link))
- aconnector->dsc_aux = &aconnector->mst_port->dm_dp_aux.aux;
+ aconnector->dsc_aux = &aconnector->mst_root->dm_dp_aux.aux;
+
+ /* synaptics cascaded MST hub case */
+ if (is_synaptics_cascaded_panamera(aconnector->dc_link, port))
+ aconnector->dsc_aux = port->mgr->aux;
if (!aconnector->dsc_aux)
return false;
@@ -211,6 +311,53 @@ static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnecto
}
#endif
+static bool retrieve_downstream_port_device(struct amdgpu_dm_connector *aconnector)
+{
+ union dp_downstream_port_present ds_port_present;
+
+ if (!aconnector->dsc_aux)
+ return false;
+
+ if (drm_dp_dpcd_read(aconnector->dsc_aux, DP_DOWNSTREAMPORT_PRESENT, &ds_port_present, 1) < 0) {
+ DRM_INFO("Failed to read downstream_port_present 0x05 from DFP of branch device\n");
+ return false;
+ }
+
+ aconnector->mst_downstream_port_present = ds_port_present;
+ DRM_INFO("Downstream port present %d, type %d\n",
+ ds_port_present.fields.PORT_PRESENT, ds_port_present.fields.PORT_TYPE);
+
+ return true;
+}
+
+static bool retrieve_branch_specific_data(struct amdgpu_dm_connector *aconnector)
+{
+ struct drm_connector *connector = &aconnector->base;
+ struct drm_dp_mst_port *port = aconnector->mst_output_port;
+ struct drm_dp_mst_port *port_parent;
+ struct drm_dp_aux *immediate_upstream_aux;
+ struct drm_dp_desc branch_desc;
+
+ if (!port->parent)
+ return false;
+
+ port_parent = port->parent->port_parent;
+
+ immediate_upstream_aux = port_parent ? &port_parent->aux : port->mgr->aux;
+
+ if (drm_dp_read_desc(immediate_upstream_aux, &branch_desc, true))
+ return false;
+
+ aconnector->branch_ieee_oui = (branch_desc.ident.oui[0] << 16) +
+ (branch_desc.ident.oui[1] << 8) +
+ (branch_desc.ident.oui[2]);
+
+ drm_dbg_dp(port->aux.drm_dev, "MST branch oui 0x%x detected at %s\n",
+ aconnector->branch_ieee_oui, connector->name);
+
+ return true;
+}
+
static int dm_dp_mst_get_modes(struct drm_connector *connector)
{
struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
@@ -219,12 +366,18 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector)
if (!aconnector)
return drm_add_edid_modes(connector, NULL);
- if (!aconnector->edid) {
- struct edid *edid;
- edid = drm_dp_mst_get_edid(connector, &aconnector->mst_port->mst_mgr, aconnector->port);
+ if (!aconnector->drm_edid) {
+ const struct drm_edid *drm_edid;
+
+ drm_edid = drm_dp_mst_edid_read(connector,
+ &aconnector->mst_root->mst_mgr,
+ aconnector->mst_output_port);
- if (!edid) {
- drm_connector_update_edid_property(
+ if (!drm_edid) {
+ amdgpu_dm_set_mst_status(&aconnector->mst_status,
+ MST_REMOTE_EDID, false);
+
+ drm_edid_connector_update(
&aconnector->base,
NULL);
@@ -246,6 +399,11 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector)
return 0;
}
+ drm_dbg_dp(connector->dev,
+ "DM_MST: add remote sink 0x%p, %d remaining\n",
+ dc_sink,
+ aconnector->dc_link->sink_count);
+
dc_sink->priv = aconnector;
aconnector->dc_sink = dc_sink;
}
@@ -253,7 +411,9 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector)
return ret;
}
- aconnector->edid = edid;
+ aconnector->drm_edid = drm_edid;
+ amdgpu_dm_set_mst_status(&aconnector->mst_status,
+ MST_REMOTE_EDID, true);
}
if (aconnector->dc_sink && aconnector->dc_sink->sink_signal == SIGNAL_TYPE_VIRTUAL) {
@@ -266,10 +426,13 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector)
struct dc_sink_init_data init_params = {
.link = aconnector->dc_link,
.sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST };
+ const struct edid *edid;
+
+ edid = drm_edid_raw(aconnector->drm_edid); // FIXME: Get rid of drm_edid_raw()
dc_sink = dc_link_add_remote_sink(
aconnector->dc_link,
- (uint8_t *)aconnector->edid,
- (aconnector->edid->extensions + 1) * EDID_LENGTH,
+ (uint8_t *)edid,
+ (edid->extensions + 1) * EDID_LENGTH,
&init_params);
if (!dc_sink) {
@@ -277,26 +440,57 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector)
return 0;
}
+ drm_dbg_dp(connector->dev,
+ "DM_MST: add remote sink 0x%p, %d remaining\n",
+ dc_sink, aconnector->dc_link->sink_count);
+
dc_sink->priv = aconnector;
/* dc_link_add_remote_sink returns a new reference */
aconnector->dc_sink = dc_sink;
+ /* when display is unplugged from mst hub, connctor will be
+ * destroyed within dm_dp_mst_connector_destroy. connector
+ * hdcp perperties, like type, undesired, desired, enabled,
+ * will be lost. So, save hdcp properties into hdcp_work within
+ * amdgpu_dm_atomic_commit_tail. if the same display is
+ * plugged back with same display index, its hdcp properties
+ * will be retrieved from hdcp_work within dm_dp_mst_get_modes
+ */
+ if (aconnector->dc_sink && connector->state) {
+ struct drm_device *dev = connector->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ if (adev->dm.hdcp_workqueue) {
+ struct hdcp_workqueue *hdcp_work = adev->dm.hdcp_workqueue;
+ struct hdcp_workqueue *hdcp_w =
+ &hdcp_work[aconnector->dc_link->link_index];
+
+ connector->state->hdcp_content_type =
+ hdcp_w->hdcp_content_type[connector->index];
+ connector->state->content_protection =
+ hdcp_w->content_protection[connector->index];
+ }
+ }
+
if (aconnector->dc_sink) {
amdgpu_dm_update_freesync_caps(
- connector, aconnector->edid);
+ connector, aconnector->drm_edid);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
+#if defined(CONFIG_DRM_AMD_DC_FP)
if (!validate_dsc_caps_on_connector(aconnector))
memset(&aconnector->dc_sink->dsc_caps,
0, sizeof(aconnector->dc_sink->dsc_caps));
#endif
+
+ if (!retrieve_downstream_port_device(aconnector))
+ memset(&aconnector->mst_downstream_port_present,
+ 0, sizeof(aconnector->mst_downstream_port_present));
}
}
- drm_connector_update_edid_property(
- &aconnector->base, aconnector->edid);
+ drm_edid_connector_update(&aconnector->base, aconnector->drm_edid);
- ret = drm_add_edid_modes(connector, aconnector->edid);
+ ret = drm_edid_connector_add_modes(connector);
return ret;
}
@@ -307,8 +501,7 @@ dm_mst_atomic_best_encoder(struct drm_connector *connector,
{
struct drm_connector_state *connector_state = drm_atomic_get_new_connector_state(state,
connector);
- struct drm_device *dev = connector->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_device *adev = drm_to_adev(connector->dev);
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(connector_state->crtc);
return &adev->dm.mst_encoders[acrtc->crtc_id].base;
@@ -319,44 +512,80 @@ dm_dp_mst_detect(struct drm_connector *connector,
struct drm_modeset_acquire_ctx *ctx, bool force)
{
struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
- struct amdgpu_dm_connector *master = aconnector->mst_port;
+ struct amdgpu_dm_connector *master = aconnector->mst_root;
+ struct drm_dp_mst_port *port = aconnector->mst_output_port;
+ int connection_status;
if (drm_connector_is_unregistered(connector))
return connector_status_disconnected;
- return drm_dp_mst_detect_port(connector, ctx, &master->mst_mgr,
- aconnector->port);
-}
+ connection_status = drm_dp_mst_detect_port(connector, ctx, &master->mst_mgr,
+ aconnector->mst_output_port);
-static int dm_dp_mst_atomic_check(struct drm_connector *connector,
- struct drm_atomic_state *state)
-{
- struct drm_connector_state *new_conn_state =
- drm_atomic_get_new_connector_state(state, connector);
- struct drm_connector_state *old_conn_state =
- drm_atomic_get_old_connector_state(state, connector);
- struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
- struct drm_crtc_state *new_crtc_state;
- struct drm_dp_mst_topology_mgr *mst_mgr;
- struct drm_dp_mst_port *mst_port;
+ if (port->pdt != DP_PEER_DEVICE_NONE && !port->dpcd_rev) {
+ uint8_t dpcd_rev;
+ int ret;
- mst_port = aconnector->port;
- mst_mgr = &aconnector->mst_port->mst_mgr;
+ ret = drm_dp_dpcd_readb(&port->aux, DP_DP13_DPCD_REV, &dpcd_rev);
- if (!old_conn_state->crtc)
- return 0;
+ if (ret == 1) {
+ port->dpcd_rev = dpcd_rev;
- if (new_conn_state->crtc) {
- new_crtc_state = drm_atomic_get_new_crtc_state(state, new_conn_state->crtc);
- if (!new_crtc_state ||
- !drm_atomic_crtc_needs_modeset(new_crtc_state) ||
- new_crtc_state->enable)
- return 0;
+ /* Could be DP1.2 DP Rx case*/
+ if (!dpcd_rev) {
+ ret = drm_dp_dpcd_readb(&port->aux, DP_DPCD_REV, &dpcd_rev);
+
+ if (ret == 1)
+ port->dpcd_rev = dpcd_rev;
+ }
+
+ if (!dpcd_rev)
+ DRM_DEBUG_KMS("Can't decide DPCD revision number!");
}
- return drm_dp_atomic_release_vcpi_slots(state,
- mst_mgr,
- mst_port);
+ /*
+ * Could be legacy sink, logical port etc on DP1.2.
+ * Will get Nack under these cases when issue remote
+ * DPCD read.
+ */
+ if (ret != 1)
+ DRM_DEBUG_KMS("Can't access DPCD");
+ } else if (port->pdt == DP_PEER_DEVICE_NONE) {
+ port->dpcd_rev = 0;
+ }
+
+ /*
+ * Release dc_sink for connector which unplug event is notified by CSN msg
+ */
+ if (connection_status == connector_status_disconnected && aconnector->dc_sink) {
+ if (aconnector->dc_link->sink_count)
+ dc_link_remove_remote_sink(aconnector->dc_link, aconnector->dc_sink);
+
+ drm_dbg_dp(connector->dev,
+ "DM_MST: remove remote sink 0x%p, %d remaining\n",
+ aconnector->dc_link,
+ aconnector->dc_link->sink_count);
+
+ dc_sink_release(aconnector->dc_sink);
+ aconnector->dc_sink = NULL;
+ amdgpu_dm_mst_reset_mst_connector_setting(aconnector);
+
+ amdgpu_dm_set_mst_status(&aconnector->mst_status,
+ MST_REMOTE_EDID | MST_ALLOCATE_NEW_PAYLOAD | MST_CLEAR_ALLOCATED_PAYLOAD,
+ false);
+ }
+
+ return connection_status;
+}
+
+static int dm_dp_mst_atomic_check(struct drm_connector *connector,
+ struct drm_atomic_state *state)
+{
+ struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+ struct drm_dp_mst_topology_mgr *mst_mgr = &aconnector->mst_root->mst_mgr;
+ struct drm_dp_mst_port *mst_port = aconnector->mst_output_port;
+
+ return drm_dp_atomic_release_time_slots(state, mst_mgr, mst_port);
}
static const struct drm_connector_helper_funcs dm_dp_mst_connector_helper_funcs = {
@@ -370,7 +599,6 @@ static const struct drm_connector_helper_funcs dm_dp_mst_connector_helper_funcs
static void amdgpu_dm_encoder_destroy(struct drm_encoder *encoder)
{
drm_encoder_cleanup(encoder);
- kfree(encoder);
}
static const struct drm_encoder_funcs amdgpu_dm_encoder_funcs = {
@@ -416,15 +644,20 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
if (!aconnector)
return NULL;
+ DRM_DEBUG_DRIVER("%s: Create aconnector 0x%p for port 0x%p\n", __func__, aconnector, port);
+
connector = &aconnector->base;
- aconnector->port = port;
- aconnector->mst_port = master;
+ aconnector->mst_output_port = port;
+ aconnector->mst_root = master;
+ amdgpu_dm_set_mst_status(&aconnector->mst_status,
+ MST_PROBE, true);
- if (drm_connector_init(
+ if (drm_connector_dynamic_init(
dev,
connector,
&dm_dp_mst_connector_funcs,
- DRM_MODE_CONNECTOR_DisplayPort)) {
+ DRM_MODE_CONNECTOR_DisplayPort,
+ NULL)) {
kfree(aconnector);
return NULL;
}
@@ -458,9 +691,15 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
&connector->base,
dev->mode_config.tile_property,
0);
+ connector->colorspace_property = master->base.colorspace_property;
+ if (connector->colorspace_property)
+ drm_connector_attach_colorspace_property(connector);
drm_connector_set_path_property(connector, pathprop);
+ if (!retrieve_branch_specific_data(aconnector))
+ aconnector->branch_ieee_oui = 0;
+
/*
* Initialize connector state before adding the connectror to drm and
* framebuffer lists
@@ -472,8 +711,118 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
return connector;
}
+void dm_handle_mst_sideband_msg_ready_event(
+ struct drm_dp_mst_topology_mgr *mgr,
+ enum mst_msg_ready_type msg_rdy_type)
+{
+ uint8_t esi[DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI] = { 0 };
+ uint8_t dret;
+ bool new_irq_handled = false;
+ int dpcd_addr;
+ uint8_t dpcd_bytes_to_read;
+ const uint8_t max_process_count = 30;
+ uint8_t process_count = 0;
+ u8 retry;
+ struct amdgpu_dm_connector *aconnector =
+ container_of(mgr, struct amdgpu_dm_connector, mst_mgr);
+
+
+ const struct dc_link_status *link_status = dc_link_get_status(aconnector->dc_link);
+
+ if (link_status->dpcd_caps->dpcd_rev.raw < 0x12) {
+ dpcd_bytes_to_read = DP_LANE0_1_STATUS - DP_SINK_COUNT;
+ /* DPCD 0x200 - 0x201 for downstream IRQ */
+ dpcd_addr = DP_SINK_COUNT;
+ } else {
+ dpcd_bytes_to_read = DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI;
+ /* DPCD 0x2002 - 0x2005 for downstream IRQ */
+ dpcd_addr = DP_SINK_COUNT_ESI;
+ }
+
+ mutex_lock(&aconnector->handle_mst_msg_ready);
+
+ while (process_count < max_process_count) {
+ u8 ack[DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI] = {};
+
+ process_count++;
+
+ dret = drm_dp_dpcd_read(
+ &aconnector->dm_dp_aux.aux,
+ dpcd_addr,
+ esi,
+ dpcd_bytes_to_read);
+
+ if (dret != dpcd_bytes_to_read) {
+ DRM_DEBUG_KMS("DPCD read and acked number is not as expected!");
+ break;
+ }
+
+ DRM_DEBUG_DRIVER("ESI %02x %02x %02x\n", esi[0], esi[1], esi[2]);
+
+ switch (msg_rdy_type) {
+ case DOWN_REP_MSG_RDY_EVENT:
+ /* Only handle DOWN_REP_MSG_RDY case*/
+ esi[1] &= DP_DOWN_REP_MSG_RDY;
+ break;
+ case UP_REQ_MSG_RDY_EVENT:
+ /* Only handle UP_REQ_MSG_RDY case*/
+ esi[1] &= DP_UP_REQ_MSG_RDY;
+ break;
+ default:
+ /* Handle both cases*/
+ esi[1] &= (DP_DOWN_REP_MSG_RDY | DP_UP_REQ_MSG_RDY);
+ break;
+ }
+
+ if (!esi[1])
+ break;
+
+ /* handle MST irq */
+ if (aconnector->mst_mgr.mst_state)
+ drm_dp_mst_hpd_irq_handle_event(&aconnector->mst_mgr,
+ esi,
+ ack,
+ &new_irq_handled);
+
+ if (new_irq_handled) {
+ /* ACK at DPCD to notify down stream */
+ for (retry = 0; retry < 3; retry++) {
+ ssize_t wret;
+
+ wret = drm_dp_dpcd_writeb(&aconnector->dm_dp_aux.aux,
+ dpcd_addr + 1,
+ ack[1]);
+ if (wret == 1)
+ break;
+ }
+
+ if (retry == 3) {
+ DRM_ERROR("Failed to ack MST event.\n");
+ break;
+ }
+
+ drm_dp_mst_hpd_irq_send_new_request(&aconnector->mst_mgr);
+
+ new_irq_handled = false;
+ } else {
+ break;
+ }
+ }
+
+ mutex_unlock(&aconnector->handle_mst_msg_ready);
+
+ if (process_count == max_process_count)
+ DRM_DEBUG_DRIVER("Loop exceeded max iterations\n");
+}
+
+static void dm_handle_mst_down_rep_msg_ready(struct drm_dp_mst_topology_mgr *mgr)
+{
+ dm_handle_mst_sideband_msg_ready_event(mgr, DOWN_REP_MSG_RDY_EVENT);
+}
+
static const struct drm_dp_mst_topology_cbs dm_mst_cbs = {
.add_connector = dm_dp_add_mst_connector,
+ .poll_hpd_irq = dm_handle_mst_down_rep_msg_ready,
};
void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
@@ -492,35 +841,34 @@ void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
drm_dp_aux_init(&aconnector->dm_dp_aux.aux);
drm_dp_cec_register_connector(&aconnector->dm_dp_aux.aux,
&aconnector->base);
+ drm_dp_dpcd_set_probe(&aconnector->dm_dp_aux.aux, false);
if (aconnector->base.connector_type == DRM_MODE_CONNECTOR_eDP)
return;
dc_link_dp_get_max_link_enc_cap(aconnector->dc_link, &max_link_enc_cap);
aconnector->mst_mgr.cbs = &dm_mst_cbs;
- drm_dp_mst_topology_mgr_init(
- &aconnector->mst_mgr,
- adev_to_drm(dm->adev),
- &aconnector->dm_dp_aux.aux,
- 16,
- 4,
- max_link_enc_cap.lane_count,
- drm_dp_bw_code_to_link_rate(max_link_enc_cap.link_rate),
- aconnector->connector_id);
+ drm_dp_mst_topology_mgr_init(&aconnector->mst_mgr, adev_to_drm(dm->adev),
+ &aconnector->dm_dp_aux.aux, 16, 4, aconnector->connector_id);
drm_connector_attach_dp_subconnector_property(&aconnector->base);
}
-int dm_mst_get_pbn_divider(struct dc_link *link)
+uint32_t dm_mst_get_pbn_divider(struct dc_link *link)
{
+ uint32_t pbn_div_x100;
+ uint64_t dividend, divisor;
+
if (!link)
return 0;
- return dc_link_bandwidth_kbps(link,
- dc_link_get_link_cap(link)) / (8 * 1000 * 54);
-}
+ dividend = (uint64_t)dc_link_bandwidth_kbps(link, dc_link_get_link_cap(link)) * 100;
+ divisor = 8 * 1000 * 54;
+
+ pbn_div_x100 = div64_u64(dividend, divisor);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
+ return dfixed_const(pbn_div_x100) / 100;
+}
struct dsc_mst_fairness_params {
struct dc_crtc_timing *timing;
@@ -535,13 +883,29 @@ struct dsc_mst_fairness_params {
struct amdgpu_dm_connector *aconnector;
};
-static int kbps_to_peak_pbn(int kbps)
+#if defined(CONFIG_DRM_AMD_DC_FP)
+static uint64_t kbps_to_pbn(int kbps, bool is_peak_pbn)
+{
+ uint64_t effective_kbps = (uint64_t)kbps;
+
+ if (is_peak_pbn) { // add 0.6% (1006/1000) overhead into effective kbps
+ effective_kbps *= 1006;
+ effective_kbps = div_u64(effective_kbps, 1000);
+ }
+
+ return (uint64_t) DIV64_U64_ROUND_UP(effective_kbps * 64, (54 * 8 * 1000));
+}
+
+static uint32_t pbn_to_kbps(unsigned int pbn, bool with_margin)
{
- u64 peak_kbps = kbps;
+ uint64_t pbn_effective = (uint64_t)pbn;
+
+ if (with_margin) // deduct 0.6% (994/1000) overhead from effective pbn
+ pbn_effective *= (1000000 / PEAK_FACTOR_X1000);
+ else
+ pbn_effective *= 1000;
- peak_kbps *= 1006;
- peak_kbps = div_u64(peak_kbps, 1000);
- return (int) DIV64_U64_ROUND_UP(peak_kbps * 64, (54 * 8 * 1000));
+ return DIV_U64_ROUND_UP(pbn_effective * 8 * 54, 64);
}
static void set_dsc_configs_from_fairness_vars(struct dsc_mst_fairness_params *params,
@@ -549,17 +913,24 @@ static void set_dsc_configs_from_fairness_vars(struct dsc_mst_fairness_params *p
int count,
int k)
{
+ struct drm_connector *drm_connector;
int i;
+ struct dc_dsc_config_options dsc_options = {0};
for (i = 0; i < count; i++) {
+ drm_connector = &params[i].aconnector->base;
+
+ dc_dsc_get_default_config_option(params[i].sink->ctx->dc, &dsc_options);
+ dsc_options.max_target_bpp_limit_override_x16 = drm_connector->display_info.max_dsc_bpp * 16;
+
memset(&params[i].timing->dsc_cfg, 0, sizeof(params[i].timing->dsc_cfg));
if (vars[i + k].dsc_enabled && dc_dsc_compute_config(
params[i].sink->ctx->dc->res_pool->dscs[0],
&params[i].sink->dsc_caps.dsc_dec_caps,
- params[i].sink->ctx->dc->debug.dsc_min_slice_height_override,
- params[i].sink->edid_caps.panel_patch.max_dsc_target_bpp_limit,
+ &dsc_options,
0,
params[i].timing,
+ dc_link_get_highest_encoding_format(params[i].aconnector->dc_link),
&params[i].timing->dsc_cfg)) {
params[i].timing->flags.DSC = 1;
@@ -576,6 +947,21 @@ static void set_dsc_configs_from_fairness_vars(struct dsc_mst_fairness_params *p
} else {
params[i].timing->flags.DSC = 0;
}
+ params[i].timing->dsc_cfg.mst_pbn = vars[i + k].pbn;
+ }
+
+ for (i = 0; i < count; i++) {
+ if (params[i].sink) {
+ if (params[i].sink->sink_signal != SIGNAL_TYPE_VIRTUAL &&
+ params[i].sink->sink_signal != SIGNAL_TYPE_NONE)
+ DRM_DEBUG_DRIVER("MST_DSC %s i=%d dispname=%s\n", __func__, i,
+ params[i].sink->edid_caps.display_name);
+ }
+
+ DRM_DEBUG_DRIVER("MST_DSC dsc=%d bits_per_pixel=%d pbn=%d\n",
+ params[i].timing->flags.DSC,
+ params[i].timing->dsc_cfg.bits_per_pixel,
+ vars[i + k].pbn);
}
}
@@ -584,23 +970,31 @@ static int bpp_x16_from_pbn(struct dsc_mst_fairness_params param, int pbn)
struct dc_dsc_config dsc_config;
u64 kbps;
- kbps = div_u64((u64)pbn * 994 * 8 * 54, 64);
+ struct drm_connector *drm_connector = &param.aconnector->base;
+ struct dc_dsc_config_options dsc_options = {0};
+
+ dc_dsc_get_default_config_option(param.sink->ctx->dc, &dsc_options);
+ dsc_options.max_target_bpp_limit_override_x16 = drm_connector->display_info.max_dsc_bpp * 16;
+
+ kbps = pbn_to_kbps(pbn, false);
dc_dsc_compute_config(
param.sink->ctx->dc->res_pool->dscs[0],
&param.sink->dsc_caps.dsc_dec_caps,
- param.sink->ctx->dc->debug.dsc_min_slice_height_override,
- param.sink->edid_caps.panel_patch.max_dsc_target_bpp_limit,
- (int) kbps, param.timing, &dsc_config);
+ &dsc_options,
+ (int) kbps, param.timing,
+ dc_link_get_highest_encoding_format(param.aconnector->dc_link),
+ &dsc_config);
return dsc_config.bits_per_pixel;
}
-static void increase_dsc_bpp(struct drm_atomic_state *state,
- struct dc_link *dc_link,
- struct dsc_mst_fairness_params *params,
- struct dsc_mst_fairness_vars *vars,
- int count,
- int k)
+static int increase_dsc_bpp(struct drm_atomic_state *state,
+ struct drm_dp_mst_topology_state *mst_state,
+ struct dc_link *dc_link,
+ struct dsc_mst_fairness_params *params,
+ struct dsc_mst_fairness_vars *vars,
+ int count,
+ int k)
{
int i;
bool bpp_increased[MAX_PIPES];
@@ -608,16 +1002,14 @@ static void increase_dsc_bpp(struct drm_atomic_state *state,
int min_initial_slack;
int next_index;
int remaining_to_increase = 0;
- int pbn_per_timeslot;
int link_timeslots_used;
int fair_pbn_alloc;
-
- pbn_per_timeslot = dm_mst_get_pbn_divider(dc_link);
+ int ret = 0;
for (i = 0; i < count; i++) {
if (vars[i + k].dsc_enabled) {
initial_slack[i] =
- kbps_to_peak_pbn(params[i].bw_range.max_kbps) - vars[i + k].pbn;
+ kbps_to_pbn(params[i].bw_range.max_kbps, false) - vars[i + k].pbn;
bpp_increased[i] = false;
remaining_to_increase += 1;
} else {
@@ -644,61 +1036,67 @@ static void increase_dsc_bpp(struct drm_atomic_state *state,
link_timeslots_used = 0;
for (i = 0; i < count; i++)
- link_timeslots_used += DIV_ROUND_UP(vars[i + k].pbn, pbn_per_timeslot);
+ link_timeslots_used += DIV_ROUND_UP(vars[i + k].pbn, dfixed_trunc(mst_state->pbn_div));
- fair_pbn_alloc = (63 - link_timeslots_used) / remaining_to_increase * pbn_per_timeslot;
+ fair_pbn_alloc =
+ (63 - link_timeslots_used) / remaining_to_increase * dfixed_trunc(mst_state->pbn_div);
if (initial_slack[next_index] > fair_pbn_alloc) {
vars[next_index].pbn += fair_pbn_alloc;
- if (drm_dp_atomic_find_vcpi_slots(state,
- params[next_index].port->mgr,
- params[next_index].port,
- vars[next_index].pbn,
- pbn_per_timeslot) < 0)
- return;
- if (!drm_dp_mst_atomic_check(state)) {
+ ret = drm_dp_atomic_find_time_slots(state,
+ params[next_index].port->mgr,
+ params[next_index].port,
+ vars[next_index].pbn);
+ if (ret < 0)
+ return ret;
+
+ ret = drm_dp_mst_atomic_check(state);
+ if (ret == 0) {
vars[next_index].bpp_x16 = bpp_x16_from_pbn(params[next_index], vars[next_index].pbn);
} else {
vars[next_index].pbn -= fair_pbn_alloc;
- if (drm_dp_atomic_find_vcpi_slots(state,
- params[next_index].port->mgr,
- params[next_index].port,
- vars[next_index].pbn,
- pbn_per_timeslot) < 0)
- return;
+ ret = drm_dp_atomic_find_time_slots(state,
+ params[next_index].port->mgr,
+ params[next_index].port,
+ vars[next_index].pbn);
+ if (ret < 0)
+ return ret;
}
} else {
vars[next_index].pbn += initial_slack[next_index];
- if (drm_dp_atomic_find_vcpi_slots(state,
- params[next_index].port->mgr,
- params[next_index].port,
- vars[next_index].pbn,
- pbn_per_timeslot) < 0)
- return;
- if (!drm_dp_mst_atomic_check(state)) {
+ ret = drm_dp_atomic_find_time_slots(state,
+ params[next_index].port->mgr,
+ params[next_index].port,
+ vars[next_index].pbn);
+ if (ret < 0)
+ return ret;
+
+ ret = drm_dp_mst_atomic_check(state);
+ if (ret == 0) {
vars[next_index].bpp_x16 = params[next_index].bw_range.max_target_bpp_x16;
} else {
vars[next_index].pbn -= initial_slack[next_index];
- if (drm_dp_atomic_find_vcpi_slots(state,
- params[next_index].port->mgr,
- params[next_index].port,
- vars[next_index].pbn,
- pbn_per_timeslot) < 0)
- return;
+ ret = drm_dp_atomic_find_time_slots(state,
+ params[next_index].port->mgr,
+ params[next_index].port,
+ vars[next_index].pbn);
+ if (ret < 0)
+ return ret;
}
}
bpp_increased[next_index] = true;
remaining_to_increase--;
}
+ return 0;
}
-static void try_disable_dsc(struct drm_atomic_state *state,
- struct dc_link *dc_link,
- struct dsc_mst_fairness_params *params,
- struct dsc_mst_fairness_vars *vars,
- int count,
- int k)
+static int try_disable_dsc(struct drm_atomic_state *state,
+ struct dc_link *dc_link,
+ struct dsc_mst_fairness_params *params,
+ struct dsc_mst_fairness_vars *vars,
+ int count,
+ int k)
{
int i;
bool tried[MAX_PIPES];
@@ -706,6 +1104,8 @@ static void try_disable_dsc(struct drm_atomic_state *state,
int max_kbps_increase;
int next_index;
int remaining_to_try = 0;
+ int ret;
+ int var_pbn;
for (i = 0; i < count; i++) {
if (vars[i + k].dsc_enabled
@@ -735,48 +1135,77 @@ static void try_disable_dsc(struct drm_atomic_state *state,
if (next_index == -1)
break;
- vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps);
- if (drm_dp_atomic_find_vcpi_slots(state,
- params[next_index].port->mgr,
- params[next_index].port,
- vars[next_index].pbn,
- dm_mst_get_pbn_divider(dc_link)) < 0)
- return;
+ DRM_DEBUG_DRIVER("MST_DSC index #%d, try no compression\n", next_index);
+ var_pbn = vars[next_index].pbn;
+ vars[next_index].pbn = kbps_to_pbn(params[next_index].bw_range.stream_kbps, true);
+ ret = drm_dp_atomic_find_time_slots(state,
+ params[next_index].port->mgr,
+ params[next_index].port,
+ vars[next_index].pbn);
+ if (ret < 0) {
+ DRM_DEBUG_DRIVER("%s:%d MST_DSC index #%d, failed to set pbn to the state, %d\n",
+ __func__, __LINE__, next_index, ret);
+ vars[next_index].pbn = var_pbn;
+ return ret;
+ }
- if (!drm_dp_mst_atomic_check(state)) {
+ ret = drm_dp_mst_atomic_check(state);
+ if (ret == 0) {
+ DRM_DEBUG_DRIVER("MST_DSC index #%d, greedily disable dsc\n", next_index);
vars[next_index].dsc_enabled = false;
vars[next_index].bpp_x16 = 0;
} else {
- vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.max_kbps);
- if (drm_dp_atomic_find_vcpi_slots(state,
- params[next_index].port->mgr,
- params[next_index].port,
- vars[next_index].pbn,
- dm_mst_get_pbn_divider(dc_link)) < 0)
- return;
+ DRM_DEBUG_DRIVER("MST_DSC index #%d, restore optimized pbn value\n", next_index);
+ vars[next_index].pbn = var_pbn;
+ ret = drm_dp_atomic_find_time_slots(state,
+ params[next_index].port->mgr,
+ params[next_index].port,
+ vars[next_index].pbn);
+ if (ret < 0) {
+ DRM_DEBUG_DRIVER("%s:%d MST_DSC index #%d, failed to set pbn to the state, %d\n",
+ __func__, __LINE__, next_index, ret);
+ return ret;
+ }
}
tried[next_index] = true;
remaining_to_try--;
}
+ return 0;
+}
+
+static void log_dsc_params(int count, struct dsc_mst_fairness_vars *vars, int k)
+{
+ int i;
+
+ for (i = 0; i < count; i++)
+ DRM_DEBUG_DRIVER("MST_DSC DSC params: stream #%d --- dsc_enabled = %d, bpp_x16 = %d, pbn = %d\n",
+ i, vars[i + k].dsc_enabled, vars[i + k].bpp_x16, vars[i + k].pbn);
}
-static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
- struct dc_state *dc_state,
- struct dc_link *dc_link,
- struct dsc_mst_fairness_vars *vars,
- int *link_vars_start_index)
+static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
+ struct dc_state *dc_state,
+ struct dc_link *dc_link,
+ struct dsc_mst_fairness_vars *vars,
+ struct drm_dp_mst_topology_mgr *mgr,
+ int *link_vars_start_index)
{
- int i, k;
struct dc_stream_state *stream;
struct dsc_mst_fairness_params params[MAX_PIPES];
struct amdgpu_dm_connector *aconnector;
+ struct drm_dp_mst_topology_state *mst_state = drm_atomic_get_mst_topology_state(state, mgr);
int count = 0;
+ int i, k, ret;
bool debugfs_overwrite = false;
+ struct drm_connector_state *new_conn_state;
memset(params, 0, sizeof(params));
+ if (IS_ERR(mst_state))
+ return PTR_ERR(mst_state);
+
/* Set up params */
+ DRM_DEBUG_DRIVER("%s: MST_DSC Try to set up params from %d streams\n", __func__, dc_state->stream_count);
for (i = 0; i < dc_state->stream_count; i++) {
struct dc_dsc_policy dsc_policy = {0};
@@ -789,15 +1218,23 @@ static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
if (!aconnector)
continue;
- if (!aconnector->port)
+ if (!aconnector->mst_output_port)
+ continue;
+
+ new_conn_state = drm_atomic_get_new_connector_state(state, &aconnector->base);
+
+ if (!new_conn_state) {
+ DRM_DEBUG_DRIVER("%s:%d MST_DSC Skip the stream 0x%p with invalid new_conn_state\n",
+ __func__, __LINE__, stream);
continue;
+ }
stream->timing.flags.DSC = 0;
params[count].timing = &stream->timing;
params[count].sink = stream->sink;
params[count].aconnector = aconnector;
- params[count].port = aconnector->port;
+ params[count].port = aconnector->mst_output_port;
params[count].clock_force_enable = aconnector->dsc_settings.dsc_force_enable;
if (params[count].clock_force_enable == DSC_CLK_FORCE_ENABLE)
debugfs_overwrite = true;
@@ -805,22 +1242,30 @@ static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
params[count].num_slices_v = aconnector->dsc_settings.dsc_num_slices_v;
params[count].bpp_overwrite = aconnector->dsc_settings.dsc_bits_per_pixel;
params[count].compression_possible = stream->sink->dsc_caps.dsc_dec_caps.is_dsc_supported;
- dc_dsc_get_policy_for_timing(params[count].timing, 0, &dsc_policy);
+ dc_dsc_get_policy_for_timing(params[count].timing, 0, &dsc_policy, dc_link_get_highest_encoding_format(stream->link));
if (!dc_dsc_compute_bandwidth_range(
stream->sink->ctx->dc->res_pool->dscs[0],
stream->sink->ctx->dc->debug.dsc_min_slice_height_override,
dsc_policy.min_target_bpp * 16,
dsc_policy.max_target_bpp * 16,
&stream->sink->dsc_caps.dsc_dec_caps,
- &stream->timing, &params[count].bw_range))
- params[count].bw_range.stream_kbps = dc_bandwidth_in_kbps_from_timing(&stream->timing);
-
+ &stream->timing,
+ dc_link_get_highest_encoding_format(dc_link),
+ &params[count].bw_range))
+ params[count].bw_range.stream_kbps = dc_bandwidth_in_kbps_from_timing(&stream->timing,
+ dc_link_get_highest_encoding_format(dc_link));
+
+ DRM_DEBUG_DRIVER("MST_DSC #%d stream 0x%p - max_kbps = %u, min_kbps = %u, uncompressed_kbps = %u\n",
+ count, stream, params[count].bw_range.max_kbps, params[count].bw_range.min_kbps,
+ params[count].bw_range.stream_kbps);
count++;
}
+ DRM_DEBUG_DRIVER("%s: MST_DSC Params set up for %d streams\n", __func__, count);
+
if (count == 0) {
ASSERT(0);
- return true;
+ return 0;
}
/* k is start index of vars for current phy link used by mst hub */
@@ -829,58 +1274,76 @@ static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
*link_vars_start_index += count;
/* Try no compression */
+ DRM_DEBUG_DRIVER("MST_DSC Try no compression\n");
for (i = 0; i < count; i++) {
vars[i + k].aconnector = params[i].aconnector;
- vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps);
+ vars[i + k].pbn = kbps_to_pbn(params[i].bw_range.stream_kbps, false);
vars[i + k].dsc_enabled = false;
vars[i + k].bpp_x16 = 0;
- if (drm_dp_atomic_find_vcpi_slots(state,
- params[i].port->mgr,
- params[i].port,
- vars[i + k].pbn,
- dm_mst_get_pbn_divider(dc_link)) < 0)
- return false;
+ ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, params[i].port,
+ vars[i + k].pbn);
+ if (ret < 0)
+ return ret;
}
- if (!drm_dp_mst_atomic_check(state) && !debugfs_overwrite) {
+ ret = drm_dp_mst_atomic_check(state);
+ if (ret == 0 && !debugfs_overwrite) {
set_dsc_configs_from_fairness_vars(params, vars, count, k);
- return true;
+ return 0;
+ } else if (ret != -ENOSPC) {
+ return ret;
}
+ log_dsc_params(count, vars, k);
+
/* Try max compression */
+ DRM_DEBUG_DRIVER("MST_DSC Try max compression\n");
for (i = 0; i < count; i++) {
if (params[i].compression_possible && params[i].clock_force_enable != DSC_CLK_FORCE_DISABLE) {
- vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps);
+ vars[i + k].pbn = kbps_to_pbn(params[i].bw_range.min_kbps, false);
vars[i + k].dsc_enabled = true;
vars[i + k].bpp_x16 = params[i].bw_range.min_target_bpp_x16;
- if (drm_dp_atomic_find_vcpi_slots(state,
- params[i].port->mgr,
- params[i].port,
- vars[i + k].pbn,
- dm_mst_get_pbn_divider(dc_link)) < 0)
- return false;
+ ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr,
+ params[i].port, vars[i + k].pbn);
+ if (ret < 0)
+ return ret;
} else {
- vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps);
+ vars[i + k].pbn = kbps_to_pbn(params[i].bw_range.stream_kbps, false);
vars[i + k].dsc_enabled = false;
vars[i + k].bpp_x16 = 0;
- if (drm_dp_atomic_find_vcpi_slots(state,
- params[i].port->mgr,
- params[i].port,
- vars[i + k].pbn,
- dm_mst_get_pbn_divider(dc_link)) < 0)
- return false;
+ ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr,
+ params[i].port, vars[i + k].pbn);
+ if (ret < 0)
+ return ret;
}
}
- if (drm_dp_mst_atomic_check(state))
- return false;
+ ret = drm_dp_mst_atomic_check(state);
+ if (ret != 0)
+ return ret;
+
+ log_dsc_params(count, vars, k);
/* Optimize degree of compression */
- increase_dsc_bpp(state, dc_link, params, vars, count, k);
+ DRM_DEBUG_DRIVER("MST_DSC Try optimize compression\n");
+ ret = increase_dsc_bpp(state, mst_state, dc_link, params, vars, count, k);
+ if (ret < 0) {
+ DRM_DEBUG_DRIVER("MST_DSC Failed to optimize compression\n");
+ return ret;
+ }
- try_disable_dsc(state, dc_link, params, vars, count, k);
+ log_dsc_params(count, vars, k);
+
+ DRM_DEBUG_DRIVER("MST_DSC Try disable compression\n");
+ ret = try_disable_dsc(state, dc_link, params, vars, count, k);
+ if (ret < 0) {
+ DRM_DEBUG_DRIVER("MST_DSC Failed to disable compression\n");
+ return ret;
+ }
+
+ log_dsc_params(count, vars, k);
set_dsc_configs_from_fairness_vars(params, vars, count, k);
- return true;
+ return 0;
}
static bool is_dsc_need_re_compute(
@@ -888,25 +1351,40 @@ static bool is_dsc_need_re_compute(
struct dc_state *dc_state,
struct dc_link *dc_link)
{
- int i;
+ int i, j;
bool is_dsc_need_re_compute = false;
+ struct amdgpu_dm_connector *stream_on_link[MAX_PIPES];
+ int new_stream_on_link_num = 0;
+ struct amdgpu_dm_connector *aconnector;
+ struct dc_stream_state *stream;
+ const struct dc *dc = dc_link->dc;
- /* only check phy used by mst branch */
+ /* only check phy used by dsc mst branch */
if (dc_link->type != dc_connection_mst_branch)
- return false;
+ goto out;
+
+ /* add a check for older MST DSC with no virtual DPCDs */
+ if (needs_dsc_aux_workaround(dc_link) &&
+ (!(dc_link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_SUPPORT ||
+ dc_link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_PASSTHROUGH_SUPPORT)))
+ goto out;
+
+ for (i = 0; i < MAX_PIPES; i++)
+ stream_on_link[i] = NULL;
+
+ DRM_DEBUG_DRIVER("%s: MST_DSC check on %d streams in new dc_state\n", __func__, dc_state->stream_count);
/* check if there is mode change in new request */
for (i = 0; i < dc_state->stream_count; i++) {
- struct amdgpu_dm_connector *aconnector;
- struct dc_stream_state *stream;
struct drm_crtc_state *new_crtc_state;
struct drm_connector_state *new_conn_state;
stream = dc_state->streams[i];
-
if (!stream)
continue;
+ DRM_DEBUG_DRIVER("%s:%d MST_DSC checking #%d stream 0x%p\n", __func__, __LINE__, i, stream);
+
/* check if stream using the same link for mst */
if (stream->link != dc_link)
continue;
@@ -915,10 +1393,15 @@ static bool is_dsc_need_re_compute(
if (!aconnector)
continue;
- new_conn_state = drm_atomic_get_new_connector_state(state, &aconnector->base);
+ stream_on_link[new_stream_on_link_num] = aconnector;
+ new_stream_on_link_num++;
- if (!new_conn_state)
+ new_conn_state = drm_atomic_get_new_connector_state(state, &aconnector->base);
+ if (!new_conn_state) {
+ DRM_DEBUG_DRIVER("%s:%d MST_DSC no new_conn_state for stream 0x%p, aconnector 0x%p\n",
+ __func__, __LINE__, stream, aconnector);
continue;
+ }
if (IS_ERR(new_conn_state))
continue;
@@ -927,45 +1410,103 @@ static bool is_dsc_need_re_compute(
continue;
new_crtc_state = drm_atomic_get_new_crtc_state(state, new_conn_state->crtc);
-
- if (!new_crtc_state)
+ if (!new_crtc_state) {
+ DRM_DEBUG_DRIVER("%s:%d MST_DSC no new_crtc_state for crtc of stream 0x%p, aconnector 0x%p\n",
+ __func__, __LINE__, stream, aconnector);
continue;
+ }
if (IS_ERR(new_crtc_state))
continue;
if (new_crtc_state->enable && new_crtc_state->active) {
if (new_crtc_state->mode_changed || new_crtc_state->active_changed ||
- new_crtc_state->connectors_changed)
+ new_crtc_state->connectors_changed) {
+ DRM_DEBUG_DRIVER("%s:%d MST_DSC dsc recompute required."
+ "stream 0x%p in new dc_state\n",
+ __func__, __LINE__, stream);
is_dsc_need_re_compute = true;
+ goto out;
+ }
+ }
+ }
+
+ if (new_stream_on_link_num == 0) {
+ DRM_DEBUG_DRIVER("%s:%d MST_DSC no mode change request for streams in new dc_state\n",
+ __func__, __LINE__);
+ is_dsc_need_re_compute = false;
+ goto out;
+ }
+
+ DRM_DEBUG_DRIVER("%s: MST_DSC check on %d streams in current dc_state\n",
+ __func__, dc->current_state->stream_count);
+
+ /* check current_state if there stream on link but it is not in
+ * new request state
+ */
+ for (i = 0; i < dc->current_state->stream_count; i++) {
+ stream = dc->current_state->streams[i];
+ /* only check stream on the mst hub */
+ if (stream->link != dc_link)
+ continue;
+
+ aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
+ if (!aconnector)
+ continue;
+
+ for (j = 0; j < new_stream_on_link_num; j++) {
+ if (stream_on_link[j]) {
+ if (aconnector == stream_on_link[j])
+ break;
+ }
+ }
+
+ if (j == new_stream_on_link_num) {
+ /* not in new state */
+ DRM_DEBUG_DRIVER("%s:%d MST_DSC dsc recompute required."
+ "stream 0x%p in current dc_state but not in new dc_state\n",
+ __func__, __LINE__, stream);
+ is_dsc_need_re_compute = true;
+ break;
}
}
+out:
+ DRM_DEBUG_DRIVER("%s: MST_DSC dsc recompute %s\n",
+ __func__, is_dsc_need_re_compute ? "required" : "not required");
+
return is_dsc_need_re_compute;
}
-bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
- struct dc_state *dc_state,
- struct dsc_mst_fairness_vars *vars)
+int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
+ struct dc_state *dc_state,
+ struct dsc_mst_fairness_vars *vars)
{
int i, j;
struct dc_stream_state *stream;
bool computed_streams[MAX_PIPES];
struct amdgpu_dm_connector *aconnector;
+ struct drm_dp_mst_topology_mgr *mst_mgr;
+ struct resource_pool *res_pool;
int link_vars_start_index = 0;
+ int ret = 0;
for (i = 0; i < dc_state->stream_count; i++)
computed_streams[i] = false;
for (i = 0; i < dc_state->stream_count; i++) {
stream = dc_state->streams[i];
+ res_pool = stream->ctx->dc->res_pool;
if (stream->signal != SIGNAL_TYPE_DISPLAY_PORT_MST)
continue;
aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
- if (!aconnector || !aconnector->dc_sink)
+ DRM_DEBUG_DRIVER("%s: MST_DSC compute mst dsc configs for stream 0x%p, aconnector 0x%p\n",
+ __func__, stream, aconnector);
+
+ if (!aconnector || !aconnector->dc_sink || !aconnector->mst_output_port)
continue;
if (!aconnector->dc_sink->dsc_caps.dsc_dec_caps.is_dsc_supported)
@@ -974,19 +1515,18 @@ bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
if (computed_streams[i])
continue;
- if (dcn20_remove_stream_from_ctx(stream->ctx->dc, dc_state, stream) != DC_OK)
- return false;
+ if (res_pool->funcs->remove_stream_from_ctx &&
+ res_pool->funcs->remove_stream_from_ctx(stream->ctx->dc, dc_state, stream) != DC_OK)
+ return -EINVAL;
if (!is_dsc_need_re_compute(state, dc_state, stream->link))
continue;
- mutex_lock(&aconnector->mst_mgr.lock);
- if (!compute_mst_dsc_configs_for_link(state, dc_state, stream->link,
- vars, &link_vars_start_index)) {
- mutex_unlock(&aconnector->mst_mgr.lock);
- return false;
- }
- mutex_unlock(&aconnector->mst_mgr.lock);
+ mst_mgr = aconnector->mst_output_port->mgr;
+ ret = compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars, mst_mgr,
+ &link_vars_start_index);
+ if (ret != 0)
+ return ret;
for (j = 0; j < dc_state->stream_count; j++) {
if (dc_state->streams[j]->link == stream->link)
@@ -998,11 +1538,437 @@ bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
stream = dc_state->streams[i];
if (stream->timing.flags.DSC == 1)
- if (dc_stream_add_dsc_to_resource(stream->ctx->dc, dc_state, stream) != DC_OK)
- return false;
+ if (dc_stream_add_dsc_to_resource(stream->ctx->dc, dc_state, stream) != DC_OK) {
+ DRM_DEBUG_DRIVER("%s:%d MST_DSC Failed to request dsc hw resource for stream 0x%p\n",
+ __func__, __LINE__, stream);
+ return -EINVAL;
+ }
}
+ return ret;
+}
+
+static int pre_compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
+ struct dc_state *dc_state,
+ struct dsc_mst_fairness_vars *vars)
+{
+ int i, j;
+ struct dc_stream_state *stream;
+ bool computed_streams[MAX_PIPES];
+ struct amdgpu_dm_connector *aconnector;
+ struct drm_dp_mst_topology_mgr *mst_mgr;
+ int link_vars_start_index = 0;
+ int ret = 0;
+
+ for (i = 0; i < dc_state->stream_count; i++)
+ computed_streams[i] = false;
+
+ for (i = 0; i < dc_state->stream_count; i++) {
+ stream = dc_state->streams[i];
+
+ if (stream->signal != SIGNAL_TYPE_DISPLAY_PORT_MST)
+ continue;
+
+ aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
+
+ DRM_DEBUG_DRIVER("MST_DSC pre compute mst dsc configs for #%d stream 0x%p, aconnector 0x%p\n",
+ i, stream, aconnector);
+
+ if (!aconnector || !aconnector->dc_sink || !aconnector->mst_output_port)
+ continue;
+
+ if (!aconnector->dc_sink->dsc_caps.dsc_dec_caps.is_dsc_supported)
+ continue;
+
+ if (computed_streams[i])
+ continue;
+
+ if (!is_dsc_need_re_compute(state, dc_state, stream->link))
+ continue;
+
+ mst_mgr = aconnector->mst_output_port->mgr;
+ ret = compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars, mst_mgr,
+ &link_vars_start_index);
+ if (ret != 0)
+ return ret;
+
+ for (j = 0; j < dc_state->stream_count; j++) {
+ if (dc_state->streams[j]->link == stream->link)
+ computed_streams[j] = true;
+ }
+ }
+
+ return ret;
+}
+
+static int find_crtc_index_in_state_by_stream(struct drm_atomic_state *state,
+ struct dc_stream_state *stream)
+{
+ int i;
+ struct drm_crtc *crtc;
+ struct drm_crtc_state *new_state, *old_state;
+
+ for_each_oldnew_crtc_in_state(state, crtc, old_state, new_state, i) {
+ struct dm_crtc_state *dm_state = to_dm_crtc_state(new_state);
+
+ if (dm_state->stream == stream)
+ return i;
+ }
+ return -1;
+}
+
+static bool is_link_to_dschub(struct dc_link *dc_link)
+{
+ union dpcd_dsc_basic_capabilities *dsc_caps =
+ &dc_link->dpcd_caps.dsc_caps.dsc_basic_caps;
+
+ /* only check phy used by dsc mst branch */
+ if (dc_link->type != dc_connection_mst_branch)
+ return false;
+
+ if (!(dsc_caps->fields.dsc_support.DSC_SUPPORT ||
+ dsc_caps->fields.dsc_support.DSC_PASSTHROUGH_SUPPORT))
+ return false;
+ return true;
+}
+
+static bool is_dsc_precompute_needed(struct drm_atomic_state *state)
+{
+ int i;
+ struct drm_crtc *crtc;
+ struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+ bool ret = false;
+
+ for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
+ struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(new_crtc_state);
+
+ if (!amdgpu_dm_find_first_crtc_matching_connector(state, crtc)) {
+ ret = false;
+ break;
+ }
+ if (dm_crtc_state->stream && dm_crtc_state->stream->link)
+ if (is_link_to_dschub(dm_crtc_state->stream->link))
+ ret = true;
+ }
+ return ret;
+}
+
+int pre_validate_dsc(struct drm_atomic_state *state,
+ struct dm_atomic_state **dm_state_ptr,
+ struct dsc_mst_fairness_vars *vars)
+{
+ int i;
+ struct dm_atomic_state *dm_state;
+ struct dc_state *local_dc_state = NULL;
+ int ret = 0;
+
+ if (!is_dsc_precompute_needed(state)) {
+ DRM_INFO_ONCE("%s:%d MST_DSC dsc precompute is not needed\n", __func__, __LINE__);
+ return 0;
+ }
+ ret = dm_atomic_get_state(state, dm_state_ptr);
+ if (ret != 0) {
+ DRM_INFO_ONCE("%s:%d MST_DSC dm_atomic_get_state() failed\n", __func__, __LINE__);
+ return ret;
+ }
+ dm_state = *dm_state_ptr;
+
+ /*
+ * create local vailable for dc_state. copy content of streams of dm_state->context
+ * to local variable. make sure stream pointer of local variable not the same as stream
+ * from dm_state->context.
+ */
+
+ local_dc_state = vmalloc(sizeof(struct dc_state));
+ if (!local_dc_state)
+ return -ENOMEM;
+ memcpy(local_dc_state, dm_state->context, sizeof(struct dc_state));
+
+ for (i = 0; i < local_dc_state->stream_count; i++) {
+ struct dc_stream_state *stream = dm_state->context->streams[i];
+ int ind = find_crtc_index_in_state_by_stream(state, stream);
+
+ if (ind >= 0) {
+ struct drm_connector *connector;
+ struct drm_connector_state *drm_new_conn_state;
+ struct dm_connector_state *dm_new_conn_state;
+ struct dm_crtc_state *dm_old_crtc_state;
+
+ connector =
+ amdgpu_dm_find_first_crtc_matching_connector(state,
+ state->crtcs[ind].ptr);
+ if (!connector)
+ continue;
+
+ drm_new_conn_state =
+ drm_atomic_get_new_connector_state(state,
+ connector);
+ dm_new_conn_state = to_dm_connector_state(drm_new_conn_state);
+ dm_old_crtc_state = to_dm_crtc_state(state->crtcs[ind].old_state);
+
+ local_dc_state->streams[i] =
+ create_validate_stream_for_sink(connector,
+ &state->crtcs[ind].new_state->mode,
+ dm_new_conn_state,
+ dm_old_crtc_state->stream);
+ if (local_dc_state->streams[i] == NULL) {
+ ret = -EINVAL;
+ break;
+ }
+ }
+ }
+
+ if (ret != 0)
+ goto clean_exit;
+
+ ret = pre_compute_mst_dsc_configs_for_state(state, local_dc_state, vars);
+ if (ret != 0) {
+ DRM_INFO_ONCE("%s:%d MST_DSC dsc pre_compute_mst_dsc_configs_for_state() failed\n",
+ __func__, __LINE__);
+ ret = -EINVAL;
+ goto clean_exit;
+ }
+
+ /*
+ * compare local_streams -> timing with dm_state->context,
+ * if the same set crtc_state->mode-change = 0;
+ */
+ for (i = 0; i < local_dc_state->stream_count; i++) {
+ struct dc_stream_state *stream = dm_state->context->streams[i];
+
+ if (local_dc_state->streams[i] &&
+ dc_is_timing_changed(stream, local_dc_state->streams[i])) {
+ DRM_INFO_ONCE("%s:%d MST_DSC crtc[%d] needs mode_change\n", __func__, __LINE__, i);
+ } else {
+ int ind = find_crtc_index_in_state_by_stream(state, stream);
+
+ if (ind >= 0) {
+ DRM_INFO_ONCE("%s:%d MST_DSC no mode changed for stream 0x%p\n",
+ __func__, __LINE__, stream);
+ state->crtcs[ind].new_state->mode_changed = 0;
+ }
+ }
+ }
+clean_exit:
+ for (i = 0; i < local_dc_state->stream_count; i++) {
+ struct dc_stream_state *stream = dm_state->context->streams[i];
+
+ if (local_dc_state->streams[i] != stream)
+ dc_stream_release(local_dc_state->streams[i]);
+ }
+
+ vfree(local_dc_state);
+
+ return ret;
+}
+
+static bool is_dsc_common_config_possible(struct dc_stream_state *stream,
+ struct dc_dsc_bw_range *bw_range)
+{
+ struct dc_dsc_policy dsc_policy = {0};
+ bool is_dsc_possible;
+
+ dc_dsc_get_policy_for_timing(&stream->timing, 0, &dsc_policy, dc_link_get_highest_encoding_format(stream->link));
+ is_dsc_possible = dc_dsc_compute_bandwidth_range(stream->sink->ctx->dc->res_pool->dscs[0],
+ stream->sink->ctx->dc->debug.dsc_min_slice_height_override,
+ dsc_policy.min_target_bpp * 16,
+ dsc_policy.max_target_bpp * 16,
+ &stream->sink->dsc_caps.dsc_dec_caps,
+ &stream->timing, dc_link_get_highest_encoding_format(stream->link), bw_range);
+
+ return is_dsc_possible;
+}
+#endif
+
+#if defined(CONFIG_DRM_AMD_DC_FP)
+static bool dp_get_link_current_set_bw(struct drm_dp_aux *aux, uint32_t *cur_link_bw)
+{
+ uint32_t total_data_bw_efficiency_x10000 = 0;
+ uint32_t link_rate_per_lane_kbps = 0;
+ enum dc_link_rate link_rate;
+ union lane_count_set lane_count;
+ u8 dp_link_encoding;
+ u8 link_bw_set = 0;
+ u8 data[16] = {0};
+
+ *cur_link_bw = 0;
+
+ if (drm_dp_dpcd_read(aux, DP_LINK_BW_SET, data, 16) != 16)
+ return false;
+
+ dp_link_encoding = data[DP_MAIN_LINK_CHANNEL_CODING_SET - DP_LINK_BW_SET];
+ link_bw_set = data[DP_LINK_BW_SET - DP_LINK_BW_SET];
+ lane_count.raw = data[DP_LANE_COUNT_SET - DP_LINK_BW_SET];
+
+ drm_dbg_dp(aux->drm_dev, "MST_DSC downlink setting: %d, 0x%x x %d\n",
+ dp_link_encoding, link_bw_set, lane_count.bits.LANE_COUNT_SET);
+
+ switch (dp_link_encoding) {
+ case DP_8b_10b_ENCODING:
+ link_rate = link_bw_set;
+ link_rate_per_lane_kbps = link_rate * LINK_RATE_REF_FREQ_IN_KHZ * BITS_PER_DP_BYTE;
+ total_data_bw_efficiency_x10000 = DATA_EFFICIENCY_8b_10b_x10000;
+ total_data_bw_efficiency_x10000 /= 100;
+ total_data_bw_efficiency_x10000 *= DATA_EFFICIENCY_8b_10b_FEC_EFFICIENCY_x100;
+ break;
+ case DP_128b_132b_ENCODING:
+ switch (link_bw_set) {
+ case DP_LINK_BW_10:
+ link_rate = LINK_RATE_UHBR10;
+ break;
+ case DP_LINK_BW_13_5:
+ link_rate = LINK_RATE_UHBR13_5;
+ break;
+ case DP_LINK_BW_20:
+ link_rate = LINK_RATE_UHBR20;
+ break;
+ default:
+ return false;
+ }
+
+ link_rate_per_lane_kbps = link_rate * 10000;
+ total_data_bw_efficiency_x10000 = DATA_EFFICIENCY_128b_132b_x10000;
+ break;
+ default:
+ return false;
+ }
+
+ *cur_link_bw = link_rate_per_lane_kbps * lane_count.bits.LANE_COUNT_SET / 10000 * total_data_bw_efficiency_x10000;
return true;
}
+#endif
+enum dc_status dm_dp_mst_is_port_support_mode(
+ struct amdgpu_dm_connector *aconnector,
+ struct dc_stream_state *stream)
+{
+#if defined(CONFIG_DRM_AMD_DC_FP)
+ int branch_max_throughput_mps = 0;
+ struct dc_link_settings cur_link_settings;
+ uint32_t end_to_end_bw_in_kbps = 0;
+ uint32_t root_link_bw_in_kbps = 0;
+ uint32_t virtual_channel_bw_in_kbps = 0;
+ struct dc_dsc_bw_range bw_range = {0};
+ struct dc_dsc_config_options dsc_options = {0};
+ uint32_t stream_kbps;
+
+ /* DSC unnecessary case
+ * Check if timing could be supported within end-to-end BW
+ */
+ stream_kbps =
+ dc_bandwidth_in_kbps_from_timing(&stream->timing,
+ dc_link_get_highest_encoding_format(stream->link));
+ cur_link_settings = stream->link->verified_link_cap;
+ root_link_bw_in_kbps = dc_link_bandwidth_kbps(aconnector->dc_link, &cur_link_settings);
+ virtual_channel_bw_in_kbps = pbn_to_kbps(aconnector->mst_output_port->full_pbn, true);
+
+ /* pick the end to end bw bottleneck */
+ end_to_end_bw_in_kbps = min(root_link_bw_in_kbps, virtual_channel_bw_in_kbps);
+
+ if (stream_kbps <= end_to_end_bw_in_kbps) {
+ DRM_DEBUG_DRIVER("MST_DSC no dsc required. End-to-end bw sufficient\n");
+ return DC_OK;
+ }
+
+ /*DSC necessary case*/
+ if (!aconnector->dsc_aux)
+ return DC_FAIL_BANDWIDTH_VALIDATE;
+
+ if (is_dsc_common_config_possible(stream, &bw_range)) {
+
+ /*capable of dsc passthough. dsc bitstream along the entire path*/
+ if (aconnector->mst_output_port->passthrough_aux) {
+ if (bw_range.min_kbps > end_to_end_bw_in_kbps) {
+ DRM_DEBUG_DRIVER("MST_DSC dsc passthrough and decode at endpoint"
+ "Max dsc compression bw can't fit into end-to-end bw\n");
+ return DC_FAIL_BANDWIDTH_VALIDATE;
+ }
+ } else {
+ /*dsc bitstream decoded at the dp last link*/
+ struct drm_dp_mst_port *immediate_upstream_port = NULL;
+ uint32_t end_link_bw = 0;
+
+ /*Get last DP link BW capability. Mode shall be supported by Legacy peer*/
+ if (aconnector->mst_output_port->pdt != DP_PEER_DEVICE_DP_LEGACY_CONV &&
+ aconnector->mst_output_port->pdt != DP_PEER_DEVICE_NONE) {
+ if (aconnector->vc_full_pbn != aconnector->mst_output_port->full_pbn) {
+ dp_get_link_current_set_bw(&aconnector->mst_output_port->aux, &end_link_bw);
+ aconnector->vc_full_pbn = aconnector->mst_output_port->full_pbn;
+ aconnector->mst_local_bw = end_link_bw;
+ } else {
+ end_link_bw = aconnector->mst_local_bw;
+ }
+
+ if (end_link_bw > 0 &&
+ stream_kbps > end_link_bw &&
+ aconnector->branch_ieee_oui != DP_BRANCH_DEVICE_ID_90CC24) {
+ DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link. "
+ "Mode required bw can't fit into last link\n");
+ return DC_FAIL_BANDWIDTH_VALIDATE;
+ }
+ }
+
+ /*Get virtual channel bandwidth between source and the link before the last link*/
+ if (aconnector->mst_output_port->parent->port_parent)
+ immediate_upstream_port = aconnector->mst_output_port->parent->port_parent;
+
+ if (immediate_upstream_port) {
+ virtual_channel_bw_in_kbps = pbn_to_kbps(immediate_upstream_port->full_pbn, true);
+ virtual_channel_bw_in_kbps = min(root_link_bw_in_kbps, virtual_channel_bw_in_kbps);
+ } else {
+ /* For topology LCT 1 case - only one mstb*/
+ virtual_channel_bw_in_kbps = root_link_bw_in_kbps;
+ }
+
+ if (bw_range.min_kbps > virtual_channel_bw_in_kbps) {
+ DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link."
+ "Max dsc compression can't fit into MST available bw\n");
+ return DC_FAIL_BANDWIDTH_VALIDATE;
+ }
+ }
+
+ /*Confirm if we can obtain dsc config*/
+ dc_dsc_get_default_config_option(stream->link->dc, &dsc_options);
+ dsc_options.max_target_bpp_limit_override_x16 = aconnector->base.display_info.max_dsc_bpp * 16;
+ if (dc_dsc_compute_config(stream->sink->ctx->dc->res_pool->dscs[0],
+ &stream->sink->dsc_caps.dsc_dec_caps,
+ &dsc_options,
+ end_to_end_bw_in_kbps,
+ &stream->timing,
+ dc_link_get_highest_encoding_format(stream->link),
+ &stream->timing.dsc_cfg)) {
+ stream->timing.flags.DSC = 1;
+ DRM_DEBUG_DRIVER("MST_DSC require dsc and dsc config found\n");
+ } else {
+ DRM_DEBUG_DRIVER("MST_DSC require dsc but can't find appropriate dsc config\n");
+ return DC_FAIL_BANDWIDTH_VALIDATE;
+ }
+
+ /* check is mst dsc output bandwidth branch_overall_throughput_0_mps */
+ switch (stream->timing.pixel_encoding) {
+ case PIXEL_ENCODING_RGB:
+ case PIXEL_ENCODING_YCBCR444:
+ branch_max_throughput_mps =
+ aconnector->dc_sink->dsc_caps.dsc_dec_caps.branch_overall_throughput_0_mps;
+ break;
+ case PIXEL_ENCODING_YCBCR422:
+ case PIXEL_ENCODING_YCBCR420:
+ branch_max_throughput_mps =
+ aconnector->dc_sink->dsc_caps.dsc_dec_caps.branch_overall_throughput_1_mps;
+ break;
+ default:
+ break;
+ }
+
+ if (branch_max_throughput_mps != 0 &&
+ ((stream->timing.pix_clk_100hz / 10) > branch_max_throughput_mps * 1000)) {
+ DRM_DEBUG_DRIVER("MST_DSC require dsc but max throughput mps fails\n");
+ return DC_FAIL_BANDWIDTH_VALIDATE;
+ }
+ } else {
+ DRM_DEBUG_DRIVER("MST_DSC require dsc but can't find common dsc config\n");
+ return DC_FAIL_BANDWIDTH_VALIDATE;
+ }
#endif
+ return DC_OK;
+}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
index 900d3f7a8498..6f7ea684b555 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: MIT */
/*
* Copyright 2012-15 Advanced Micro Devices, Inc.
*
@@ -26,10 +27,40 @@
#ifndef __DAL_AMDGPU_DM_MST_TYPES_H__
#define __DAL_AMDGPU_DM_MST_TYPES_H__
+#define DP_BRANCH_DEVICE_ID_90CC24 0x90CC24
+
+#define SYNAPTICS_RC_COMMAND 0x4B2
+#define SYNAPTICS_RC_RESULT 0x4B3
+#define SYNAPTICS_RC_LENGTH 0x4B8
+#define SYNAPTICS_RC_OFFSET 0x4BC
+#define SYNAPTICS_RC_DATA 0x4C0
+
+#define DP_BRANCH_VENDOR_SPECIFIC_START 0x50C
+
+/**
+ * Panamera MST Hub detection
+ * Offset DPCD 050Eh == 0x5A indicates cascaded MST hub case
+ * Check from beginning of branch device vendor specific field (050Ch)
+ */
+#define IS_SYNAPTICS_PANAMERA(branchDevName) (((int)branchDevName[4] & 0xF0) == 0x50 ? 1 : 0)
+#define BRANCH_HW_REVISION_PANAMERA_A2 0x10
+#define SYNAPTICS_CASCADED_HUB_ID 0x5A
+#define IS_SYNAPTICS_CASCADED_PANAMERA(devName, data) ((IS_SYNAPTICS_PANAMERA(devName) && ((int)data[2] == SYNAPTICS_CASCADED_HUB_ID)) ? 1 : 0)
+
+#define PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B 1031
+#define PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B 1000
+
+enum mst_msg_ready_type {
+ NONE_MSG_RDY_EVENT = 0,
+ DOWN_REP_MSG_RDY_EVENT = 1,
+ UP_REQ_MSG_RDY_EVENT = 2,
+ DOWN_OR_UP_MSG_RDY_EVENT = 3
+};
+
struct amdgpu_display_manager;
struct amdgpu_dm_connector;
-int dm_mst_get_pbn_divider(struct dc_link *link);
+uint32_t dm_mst_get_pbn_divider(struct dc_link *link);
void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
struct amdgpu_dm_connector *aconnector,
@@ -38,7 +69,9 @@ void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
void
dm_dp_create_fake_mst_encoders(struct amdgpu_device *adev);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
+void dm_handle_mst_sideband_msg_ready_event(
+ struct drm_dp_mst_topology_mgr *mgr,
+ enum mst_msg_ready_type msg_rdy_type);
struct dsc_mst_fairness_vars {
int pbn;
@@ -47,9 +80,18 @@ struct dsc_mst_fairness_vars {
struct amdgpu_dm_connector *aconnector;
};
-bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
- struct dc_state *dc_state,
- struct dsc_mst_fairness_vars *vars);
-#endif
+int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
+ struct dc_state *dc_state,
+ struct dsc_mst_fairness_vars *vars);
+
+bool needs_dsc_aux_workaround(struct dc_link *link);
+
+int pre_validate_dsc(struct drm_atomic_state *state,
+ struct dm_atomic_state **dm_state_ptr,
+ struct dsc_mst_fairness_vars *vars);
+
+enum dc_status dm_dp_mst_is_port_support_mode(
+ struct amdgpu_dm_connector *aconnector,
+ struct dc_stream_state *stream);
#endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
new file mode 100644
index 000000000000..2e3ee78999d9
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -0,0 +1,1950 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_blend.h>
+#include "drm/drm_framebuffer.h"
+#include <drm/drm_gem_atomic_helper.h>
+#include <drm/drm_plane_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_fourcc.h>
+
+#include "amdgpu.h"
+#include "dal_asic_id.h"
+#include "amdgpu_display.h"
+#include "amdgpu_dm_trace.h"
+#include "amdgpu_dm_plane.h"
+#include "amdgpu_dm_colorop.h"
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+
+/*
+ * TODO: these are currently initialized to rgb formats only.
+ * For future use cases we should either initialize them dynamically based on
+ * plane capabilities, or initialize this array to all formats, so internal drm
+ * check will succeed, and let DC implement proper check
+ */
+static const uint32_t rgb_formats[] = {
+ DRM_FORMAT_XRGB8888,
+ DRM_FORMAT_ARGB8888,
+ DRM_FORMAT_RGBA8888,
+ DRM_FORMAT_XRGB2101010,
+ DRM_FORMAT_XBGR2101010,
+ DRM_FORMAT_ARGB2101010,
+ DRM_FORMAT_ABGR2101010,
+ DRM_FORMAT_XRGB16161616,
+ DRM_FORMAT_XBGR16161616,
+ DRM_FORMAT_ARGB16161616,
+ DRM_FORMAT_ABGR16161616,
+ DRM_FORMAT_XBGR8888,
+ DRM_FORMAT_ABGR8888,
+ DRM_FORMAT_RGB565,
+};
+
+static const uint32_t overlay_formats[] = {
+ DRM_FORMAT_XRGB8888,
+ DRM_FORMAT_ARGB8888,
+ DRM_FORMAT_RGBA8888,
+ DRM_FORMAT_XBGR8888,
+ DRM_FORMAT_ABGR8888,
+ DRM_FORMAT_RGB565,
+ DRM_FORMAT_NV21,
+ DRM_FORMAT_NV12,
+ DRM_FORMAT_P010
+};
+
+static const uint32_t video_formats[] = {
+ DRM_FORMAT_NV21,
+ DRM_FORMAT_NV12,
+ DRM_FORMAT_P010
+};
+
+static const u32 cursor_formats[] = {
+ DRM_FORMAT_ARGB8888
+};
+
+enum dm_micro_swizzle {
+ MICRO_SWIZZLE_Z = 0,
+ MICRO_SWIZZLE_S = 1,
+ MICRO_SWIZZLE_D = 2,
+ MICRO_SWIZZLE_R = 3
+};
+
+const struct drm_format_info *amdgpu_dm_plane_get_format_info(u32 pixel_format, u64 modifier)
+{
+ return amdgpu_lookup_format_info(pixel_format, modifier);
+}
+
+void amdgpu_dm_plane_fill_blending_from_plane_state(const struct drm_plane_state *plane_state,
+ bool *per_pixel_alpha, bool *pre_multiplied_alpha,
+ bool *global_alpha, int *global_alpha_value)
+{
+ *per_pixel_alpha = false;
+ *pre_multiplied_alpha = true;
+ *global_alpha = false;
+ *global_alpha_value = 0xff;
+
+
+ if (plane_state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI ||
+ plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE) {
+ static const uint32_t alpha_formats[] = {
+ DRM_FORMAT_ARGB8888,
+ DRM_FORMAT_RGBA8888,
+ DRM_FORMAT_ABGR8888,
+ DRM_FORMAT_ARGB2101010,
+ DRM_FORMAT_ABGR2101010,
+ DRM_FORMAT_ARGB16161616,
+ DRM_FORMAT_ABGR16161616,
+ DRM_FORMAT_ARGB16161616F,
+ };
+ uint32_t format = plane_state->fb->format->format;
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(alpha_formats); ++i) {
+ if (format == alpha_formats[i]) {
+ *per_pixel_alpha = true;
+ break;
+ }
+ }
+
+ if (*per_pixel_alpha && plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE)
+ *pre_multiplied_alpha = false;
+ }
+
+ if (plane_state->alpha < 0xffff) {
+ *global_alpha = true;
+ *global_alpha_value = plane_state->alpha >> 8;
+ }
+}
+
+static void amdgpu_dm_plane_add_modifier(uint64_t **mods, uint64_t *size, uint64_t *cap, uint64_t mod)
+{
+ if (!*mods)
+ return;
+
+ if (*cap - *size < 1) {
+ uint64_t new_cap = *cap * 2;
+ uint64_t *new_mods = kmalloc_array(new_cap, sizeof(uint64_t), GFP_KERNEL);
+
+ if (!new_mods) {
+ kfree(*mods);
+ *mods = NULL;
+ return;
+ }
+
+ memcpy(new_mods, *mods, sizeof(uint64_t) * *size);
+ kfree(*mods);
+ *mods = new_mods;
+ *cap = new_cap;
+ }
+
+ (*mods)[*size] = mod;
+ *size += 1;
+}
+
+static bool amdgpu_dm_plane_modifier_has_dcc(uint64_t modifier)
+{
+ return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC, modifier);
+}
+
+static unsigned int amdgpu_dm_plane_modifier_gfx9_swizzle_mode(uint64_t modifier)
+{
+ if (modifier == DRM_FORMAT_MOD_LINEAR)
+ return 0;
+
+ return AMD_FMT_MOD_GET(TILE, modifier);
+}
+
+static void amdgpu_dm_plane_fill_gfx8_tiling_info_from_flags(struct dc_tiling_info *tiling_info,
+ uint64_t tiling_flags)
+{
+ /* Fill GFX8 params */
+ if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == DC_ARRAY_2D_TILED_THIN1) {
+ unsigned int bankw, bankh, mtaspect, tile_split, num_banks;
+
+ bankw = AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
+ bankh = AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
+ mtaspect = AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
+ tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT);
+ num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
+
+ tiling_info->gfxversion = DcGfxVersion8;
+ /* XXX fix me for VI */
+ tiling_info->gfx8.num_banks = num_banks;
+ tiling_info->gfx8.array_mode =
+ DC_ARRAY_2D_TILED_THIN1;
+ tiling_info->gfx8.tile_split = tile_split;
+ tiling_info->gfx8.bank_width = bankw;
+ tiling_info->gfx8.bank_height = bankh;
+ tiling_info->gfx8.tile_aspect = mtaspect;
+ tiling_info->gfx8.tile_mode =
+ DC_ADDR_SURF_MICRO_TILING_DISPLAY;
+ } else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE)
+ == DC_ARRAY_1D_TILED_THIN1) {
+ tiling_info->gfx8.array_mode = DC_ARRAY_1D_TILED_THIN1;
+ }
+
+ tiling_info->gfx8.pipe_config =
+ AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
+}
+
+static void amdgpu_dm_plane_fill_gfx9_tiling_info_from_device(const struct amdgpu_device *adev,
+ struct dc_tiling_info *tiling_info)
+{
+ /* Fill GFX9 params */
+ tiling_info->gfx9.num_pipes =
+ adev->gfx.config.gb_addr_config_fields.num_pipes;
+ tiling_info->gfx9.num_banks =
+ adev->gfx.config.gb_addr_config_fields.num_banks;
+ tiling_info->gfx9.pipe_interleave =
+ adev->gfx.config.gb_addr_config_fields.pipe_interleave_size;
+ tiling_info->gfx9.num_shader_engines =
+ adev->gfx.config.gb_addr_config_fields.num_se;
+ tiling_info->gfx9.max_compressed_frags =
+ adev->gfx.config.gb_addr_config_fields.max_compress_frags;
+ tiling_info->gfx9.num_rb_per_se =
+ adev->gfx.config.gb_addr_config_fields.num_rb_per_se;
+ tiling_info->gfx9.shaderEnable = 1;
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
+ tiling_info->gfx9.num_pkrs = adev->gfx.config.gb_addr_config_fields.num_pkrs;
+}
+
+static void amdgpu_dm_plane_fill_gfx9_tiling_info_from_modifier(const struct amdgpu_device *adev,
+ struct dc_tiling_info *tiling_info,
+ uint64_t modifier)
+{
+ unsigned int mod_bank_xor_bits = AMD_FMT_MOD_GET(BANK_XOR_BITS, modifier);
+ unsigned int mod_pipe_xor_bits = AMD_FMT_MOD_GET(PIPE_XOR_BITS, modifier);
+ unsigned int pkrs_log2 = AMD_FMT_MOD_GET(PACKERS, modifier);
+ unsigned int pipes_log2;
+
+ pipes_log2 = min(5u, mod_pipe_xor_bits);
+
+ amdgpu_dm_plane_fill_gfx9_tiling_info_from_device(adev, tiling_info);
+
+ if (!IS_AMD_FMT_MOD(modifier))
+ return;
+
+ tiling_info->gfx9.num_pipes = 1u << pipes_log2;
+ tiling_info->gfx9.num_shader_engines = 1u << (mod_pipe_xor_bits - pipes_log2);
+
+ if (adev->family >= AMDGPU_FAMILY_NV) {
+ tiling_info->gfx9.num_pkrs = 1u << pkrs_log2;
+ } else {
+ tiling_info->gfx9.num_banks = 1u << mod_bank_xor_bits;
+
+ /* for DCC we know it isn't rb aligned, so rb_per_se doesn't matter. */
+ }
+}
+
+static int amdgpu_dm_plane_validate_dcc(struct amdgpu_device *adev,
+ const enum surface_pixel_format format,
+ const enum dc_rotation_angle rotation,
+ const struct dc_tiling_info *tiling_info,
+ const struct dc_plane_dcc_param *dcc,
+ const struct dc_plane_address *address,
+ const struct plane_size *plane_size)
+{
+ struct dc *dc = adev->dm.dc;
+ struct dc_dcc_surface_param input;
+ struct dc_surface_dcc_cap output;
+
+ memset(&input, 0, sizeof(input));
+ memset(&output, 0, sizeof(output));
+
+ if (!dcc->enable)
+ return 0;
+
+ if (adev->family < AMDGPU_FAMILY_GC_12_0_0 &&
+ format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN)
+ return -EINVAL;
+
+ if (!dc->cap_funcs.get_dcc_compression_cap)
+ return -EINVAL;
+
+ input.format = format;
+ input.surface_size.width = plane_size->surface_size.width;
+ input.surface_size.height = plane_size->surface_size.height;
+ input.swizzle_mode = tiling_info->gfx9.swizzle;
+
+ if (rotation == ROTATION_ANGLE_0 || rotation == ROTATION_ANGLE_180)
+ input.scan = SCAN_DIRECTION_HORIZONTAL;
+ else if (rotation == ROTATION_ANGLE_90 || rotation == ROTATION_ANGLE_270)
+ input.scan = SCAN_DIRECTION_VERTICAL;
+
+ if (!dc->cap_funcs.get_dcc_compression_cap(dc, &input, &output))
+ return -EINVAL;
+
+ if (!output.capable)
+ return -EINVAL;
+
+ if (dcc->independent_64b_blks == 0 &&
+ output.grph.rgb.independent_64b_blks != 0)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int amdgpu_dm_plane_fill_gfx9_plane_attributes_from_modifiers(struct amdgpu_device *adev,
+ const struct amdgpu_framebuffer *afb,
+ const enum surface_pixel_format format,
+ const enum dc_rotation_angle rotation,
+ const struct plane_size *plane_size,
+ struct dc_tiling_info *tiling_info,
+ struct dc_plane_dcc_param *dcc,
+ struct dc_plane_address *address)
+{
+ const uint64_t modifier = afb->base.modifier;
+ int ret = 0;
+
+ amdgpu_dm_plane_fill_gfx9_tiling_info_from_modifier(adev, tiling_info, modifier);
+ tiling_info->gfx9.swizzle = amdgpu_dm_plane_modifier_gfx9_swizzle_mode(modifier);
+ tiling_info->gfxversion = DcGfxVersion9;
+
+ if (amdgpu_dm_plane_modifier_has_dcc(modifier)) {
+ uint64_t dcc_address = afb->address + afb->base.offsets[1];
+ bool independent_64b_blks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier);
+ bool independent_128b_blks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier);
+
+ dcc->enable = 1;
+ dcc->meta_pitch = afb->base.pitches[1];
+ dcc->independent_64b_blks = independent_64b_blks;
+ if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) {
+ if (independent_64b_blks && independent_128b_blks)
+ dcc->dcc_ind_blk = hubp_ind_block_64b_no_128bcl;
+ else if (independent_128b_blks)
+ dcc->dcc_ind_blk = hubp_ind_block_128b;
+ else if (independent_64b_blks && !independent_128b_blks)
+ dcc->dcc_ind_blk = hubp_ind_block_64b;
+ else
+ dcc->dcc_ind_blk = hubp_ind_block_unconstrained;
+ } else {
+ if (independent_64b_blks)
+ dcc->dcc_ind_blk = hubp_ind_block_64b;
+ else
+ dcc->dcc_ind_blk = hubp_ind_block_unconstrained;
+ }
+
+ address->grph.meta_addr.low_part = lower_32_bits(dcc_address);
+ address->grph.meta_addr.high_part = upper_32_bits(dcc_address);
+ }
+
+ ret = amdgpu_dm_plane_validate_dcc(adev, format, rotation, tiling_info, dcc, address, plane_size);
+ if (ret)
+ drm_dbg_kms(adev_to_drm(adev), "amdgpu_dm_plane_validate_dcc: returned error: %d\n", ret);
+
+ return ret;
+}
+
+static int amdgpu_dm_plane_fill_gfx12_plane_attributes_from_modifiers(struct amdgpu_device *adev,
+ const struct amdgpu_framebuffer *afb,
+ const enum surface_pixel_format format,
+ const enum dc_rotation_angle rotation,
+ const struct plane_size *plane_size,
+ struct dc_tiling_info *tiling_info,
+ struct dc_plane_dcc_param *dcc,
+ struct dc_plane_address *address)
+{
+ const uint64_t modifier = afb->base.modifier;
+ int ret = 0;
+
+ /* TODO: Most of this function shouldn't be needed on GFX12. */
+ amdgpu_dm_plane_fill_gfx9_tiling_info_from_device(adev, tiling_info);
+
+ tiling_info->gfx9.swizzle = amdgpu_dm_plane_modifier_gfx9_swizzle_mode(modifier);
+ tiling_info->gfxversion = DcGfxAddr3;
+
+ if (amdgpu_dm_plane_modifier_has_dcc(modifier)) {
+ int max_compressed_block = AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier);
+
+ dcc->enable = 1;
+ dcc->independent_64b_blks = max_compressed_block == 0;
+
+ if (max_compressed_block == 0)
+ dcc->dcc_ind_blk = hubp_ind_block_64b;
+ else if (max_compressed_block == 1)
+ dcc->dcc_ind_blk = hubp_ind_block_128b;
+ else
+ dcc->dcc_ind_blk = hubp_ind_block_unconstrained;
+ }
+
+ /* TODO: This seems wrong because there is no DCC plane on GFX12. */
+ ret = amdgpu_dm_plane_validate_dcc(adev, format, rotation, tiling_info, dcc, address, plane_size);
+ if (ret)
+ drm_dbg_kms(adev_to_drm(adev), "amdgpu_dm_plane_validate_dcc: returned error: %d\n", ret);
+
+ return ret;
+}
+
+static void amdgpu_dm_plane_add_gfx10_1_modifiers(const struct amdgpu_device *adev,
+ uint64_t **mods,
+ uint64_t *size,
+ uint64_t *capacity)
+{
+ int pipe_xor_bits = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes);
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(DCC, 1) |
+ AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
+ AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B));
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(DCC, 1) |
+ AMD_FMT_MOD_SET(DCC_RETILE, 1) |
+ AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
+ AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B));
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits));
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits));
+
+
+ /* Only supported for 64bpp, will be filtered in amdgpu_dm_plane_format_mod_supported */
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
+}
+
+static void amdgpu_dm_plane_add_gfx9_modifiers(const struct amdgpu_device *adev,
+ uint64_t **mods,
+ uint64_t *size,
+ uint64_t *capacity)
+{
+ int pipes = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes);
+ int pipe_xor_bits = min(8, pipes +
+ ilog2(adev->gfx.config.gb_addr_config_fields.num_se));
+ int bank_xor_bits = min(8 - pipe_xor_bits,
+ ilog2(adev->gfx.config.gb_addr_config_fields.num_banks));
+ int rb = ilog2(adev->gfx.config.gb_addr_config_fields.num_se) +
+ ilog2(adev->gfx.config.gb_addr_config_fields.num_rb_per_se);
+
+
+ if (adev->family == AMDGPU_FAMILY_RV) {
+ /* Raven2 and later */
+ bool has_constant_encode = adev->asic_type > CHIP_RAVEN || adev->external_rev_id >= 0x81;
+
+ /*
+ * No _D DCC swizzles yet because we only allow 32bpp, which
+ * doesn't support _D on DCN
+ */
+
+ if (has_constant_encode) {
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) |
+ AMD_FMT_MOD_SET(DCC, 1) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
+ AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) |
+ AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1));
+ }
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) |
+ AMD_FMT_MOD_SET(DCC, 1) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
+ AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) |
+ AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 0));
+
+ if (has_constant_encode) {
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) |
+ AMD_FMT_MOD_SET(DCC, 1) |
+ AMD_FMT_MOD_SET(DCC_RETILE, 1) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
+ AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) |
+ AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
+ AMD_FMT_MOD_SET(RB, rb) |
+ AMD_FMT_MOD_SET(PIPE, pipes));
+ }
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) |
+ AMD_FMT_MOD_SET(DCC, 1) |
+ AMD_FMT_MOD_SET(DCC_RETILE, 1) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
+ AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) |
+ AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 0) |
+ AMD_FMT_MOD_SET(RB, rb) |
+ AMD_FMT_MOD_SET(PIPE, pipes));
+ }
+
+ /*
+ * Only supported for 64bpp on Raven, will be filtered on format in
+ * amdgpu_dm_plane_format_mod_supported.
+ */
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits));
+
+ if (adev->family == AMDGPU_FAMILY_RV) {
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits));
+ }
+
+ /*
+ * Only supported for 64bpp on Raven, will be filtered on format in
+ * amdgpu_dm_plane_format_mod_supported.
+ */
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
+
+ if (adev->family == AMDGPU_FAMILY_RV) {
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
+ }
+}
+
+static void amdgpu_dm_plane_add_gfx10_3_modifiers(const struct amdgpu_device *adev,
+ uint64_t **mods,
+ uint64_t *size,
+ uint64_t *capacity)
+{
+ int pipe_xor_bits = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes);
+ int pkrs = ilog2(adev->gfx.config.gb_addr_config_fields.num_pkrs);
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(PACKERS, pkrs) |
+ AMD_FMT_MOD_SET(DCC, 1) |
+ AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
+ AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B));
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(PACKERS, pkrs) |
+ AMD_FMT_MOD_SET(DCC, 1) |
+ AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
+ AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B));
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(PACKERS, pkrs) |
+ AMD_FMT_MOD_SET(DCC, 1) |
+ AMD_FMT_MOD_SET(DCC_RETILE, 1) |
+ AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
+ AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B));
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(PACKERS, pkrs) |
+ AMD_FMT_MOD_SET(DCC, 1) |
+ AMD_FMT_MOD_SET(DCC_RETILE, 1) |
+ AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
+ AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B));
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(PACKERS, pkrs));
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(PACKERS, pkrs));
+
+ /* Only supported for 64bpp, will be filtered in amdgpu_dm_plane_format_mod_supported */
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
+}
+
+static void amdgpu_dm_plane_add_gfx11_modifiers(struct amdgpu_device *adev,
+ uint64_t **mods, uint64_t *size, uint64_t *capacity)
+{
+ int num_pipes = 0;
+ int pipe_xor_bits = 0;
+ int num_pkrs = 0;
+ int pkrs = 0;
+ u32 gb_addr_config;
+ u8 i = 0;
+ unsigned int swizzle_r_x;
+ uint64_t modifier_r_x;
+ uint64_t modifier_dcc_best;
+ uint64_t modifier_dcc_4k;
+
+ /* TODO: GFX11 IP HW init hasnt finish and we get zero if we read from
+ * adev->gfx.config.gb_addr_config_fields.num_{pkrs,pipes}
+ */
+ gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
+ ASSERT(gb_addr_config != 0);
+
+ num_pkrs = 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
+ pkrs = ilog2(num_pkrs);
+ num_pipes = 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PIPES);
+ pipe_xor_bits = ilog2(num_pipes);
+
+ for (i = 0; i < 2; i++) {
+ /* Insert the best one first. */
+ /* R_X swizzle modes are the best for rendering and DCC requires them. */
+ if (num_pipes > 16)
+ swizzle_r_x = !i ? AMD_FMT_MOD_TILE_GFX11_256K_R_X : AMD_FMT_MOD_TILE_GFX9_64K_R_X;
+ else
+ swizzle_r_x = !i ? AMD_FMT_MOD_TILE_GFX9_64K_R_X : AMD_FMT_MOD_TILE_GFX11_256K_R_X;
+
+ modifier_r_x = AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(TILE, swizzle_r_x) |
+ AMD_FMT_MOD_SET(PACKERS, pkrs);
+
+ /* DCC_CONSTANT_ENCODE is not set because it can't vary with gfx11 (it's implied to be 1). */
+ modifier_dcc_best = modifier_r_x | AMD_FMT_MOD_SET(DCC, 1) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 0) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
+ AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B);
+
+ /* DCC settings for 4K and greater resolutions. (required by display hw) */
+ modifier_dcc_4k = modifier_r_x | AMD_FMT_MOD_SET(DCC, 1) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
+ AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B);
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, modifier_dcc_best);
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, modifier_dcc_4k);
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, modifier_dcc_best | AMD_FMT_MOD_SET(DCC_RETILE, 1));
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, modifier_dcc_4k | AMD_FMT_MOD_SET(DCC_RETILE, 1));
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, modifier_r_x);
+ }
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D));
+}
+
+static void amdgpu_dm_plane_add_gfx12_modifiers(struct amdgpu_device *adev,
+ uint64_t **mods, uint64_t *size, uint64_t *capacity)
+{
+ uint64_t ver = AMD_FMT_MOD | AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX12);
+ uint64_t mod_256k = ver | AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX12_256K_2D);
+ uint64_t mod_64k = ver | AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX12_64K_2D);
+ uint64_t mod_4k = ver | AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX12_4K_2D);
+ uint64_t mod_256b = ver | AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX12_256B_2D);
+ uint64_t dcc = ver | AMD_FMT_MOD_SET(DCC, 1);
+ uint8_t max_comp_block[] = {2, 1, 0};
+ uint64_t max_comp_block_mod[ARRAY_SIZE(max_comp_block)] = {0};
+ uint8_t i = 0, j = 0;
+ uint64_t gfx12_modifiers[] = {mod_256k, mod_64k, mod_4k, mod_256b, DRM_FORMAT_MOD_LINEAR};
+
+ for (i = 0; i < ARRAY_SIZE(max_comp_block); i++)
+ max_comp_block_mod[i] = AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, max_comp_block[i]);
+
+ /* With DCC: Best choice should be kept first. Hence, add all 256k modifiers of different
+ * max compressed blocks first and then move on to the next smaller sized layouts.
+ * Do not add the linear modifier here, and hence the condition of size-1 for the loop
+ */
+ for (j = 0; j < ARRAY_SIZE(gfx12_modifiers) - 1; j++)
+ for (i = 0; i < ARRAY_SIZE(max_comp_block); i++)
+ amdgpu_dm_plane_add_modifier(mods, size, capacity,
+ ver | dcc | max_comp_block_mod[i] | gfx12_modifiers[j]);
+
+ /* Without DCC. Add all modifiers including linear at the end */
+ for (i = 0; i < ARRAY_SIZE(gfx12_modifiers); i++)
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, gfx12_modifiers[i]);
+
+}
+
+static int amdgpu_dm_plane_get_plane_modifiers(struct amdgpu_device *adev, unsigned int plane_type, uint64_t **mods)
+{
+ uint64_t size = 0, capacity = 128;
+ *mods = NULL;
+
+ /* We have not hooked up any pre-GFX9 modifiers. */
+ if (adev->family < AMDGPU_FAMILY_AI)
+ return 0;
+
+ *mods = kmalloc_array(capacity, sizeof(uint64_t), GFP_KERNEL);
+
+ if (plane_type == DRM_PLANE_TYPE_CURSOR) {
+ amdgpu_dm_plane_add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_LINEAR);
+ amdgpu_dm_plane_add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_INVALID);
+ return *mods ? 0 : -ENOMEM;
+ }
+
+ switch (adev->family) {
+ case AMDGPU_FAMILY_AI:
+ case AMDGPU_FAMILY_RV:
+ amdgpu_dm_plane_add_gfx9_modifiers(adev, mods, &size, &capacity);
+ break;
+ case AMDGPU_FAMILY_NV:
+ case AMDGPU_FAMILY_VGH:
+ case AMDGPU_FAMILY_YC:
+ case AMDGPU_FAMILY_GC_10_3_6:
+ case AMDGPU_FAMILY_GC_10_3_7:
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
+ amdgpu_dm_plane_add_gfx10_3_modifiers(adev, mods, &size, &capacity);
+ else
+ amdgpu_dm_plane_add_gfx10_1_modifiers(adev, mods, &size, &capacity);
+ break;
+ case AMDGPU_FAMILY_GC_11_0_0:
+ case AMDGPU_FAMILY_GC_11_0_1:
+ case AMDGPU_FAMILY_GC_11_5_0:
+ amdgpu_dm_plane_add_gfx11_modifiers(adev, mods, &size, &capacity);
+ break;
+ case AMDGPU_FAMILY_GC_12_0_0:
+ amdgpu_dm_plane_add_gfx12_modifiers(adev, mods, &size, &capacity);
+ break;
+ }
+
+ amdgpu_dm_plane_add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_LINEAR);
+
+ /* INVALID marks the end of the list. */
+ amdgpu_dm_plane_add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_INVALID);
+
+ if (!*mods)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int amdgpu_dm_plane_get_plane_formats(const struct drm_plane *plane,
+ const struct dc_plane_cap *plane_cap,
+ uint32_t *formats, int max_formats)
+{
+ int i, num_formats = 0;
+
+ /*
+ * TODO: Query support for each group of formats directly from
+ * DC plane caps. This will require adding more formats to the
+ * caps list.
+ */
+
+ if (plane->type == DRM_PLANE_TYPE_PRIMARY ||
+ (plane_cap && plane_cap->type == DC_PLANE_TYPE_DCN_UNIVERSAL && plane->type != DRM_PLANE_TYPE_CURSOR)) {
+ for (i = 0; i < ARRAY_SIZE(rgb_formats); ++i) {
+ if (num_formats >= max_formats)
+ break;
+
+ formats[num_formats++] = rgb_formats[i];
+ }
+
+ if (plane_cap && plane_cap->pixel_format_support.nv12)
+ formats[num_formats++] = DRM_FORMAT_NV12;
+ if (plane_cap && plane_cap->pixel_format_support.p010)
+ formats[num_formats++] = DRM_FORMAT_P010;
+ if (plane_cap && plane_cap->pixel_format_support.fp16) {
+ formats[num_formats++] = DRM_FORMAT_XRGB16161616F;
+ formats[num_formats++] = DRM_FORMAT_ARGB16161616F;
+ formats[num_formats++] = DRM_FORMAT_XBGR16161616F;
+ formats[num_formats++] = DRM_FORMAT_ABGR16161616F;
+ }
+ } else {
+ switch (plane->type) {
+ case DRM_PLANE_TYPE_OVERLAY:
+ for (i = 0; i < ARRAY_SIZE(overlay_formats); ++i) {
+ if (num_formats >= max_formats)
+ break;
+
+ formats[num_formats++] = overlay_formats[i];
+ }
+ break;
+
+ case DRM_PLANE_TYPE_CURSOR:
+ for (i = 0; i < ARRAY_SIZE(cursor_formats); ++i) {
+ if (num_formats >= max_formats)
+ break;
+
+ formats[num_formats++] = cursor_formats[i];
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return num_formats;
+}
+
+int amdgpu_dm_plane_fill_plane_buffer_attributes(struct amdgpu_device *adev,
+ const struct amdgpu_framebuffer *afb,
+ const enum surface_pixel_format format,
+ const enum dc_rotation_angle rotation,
+ const uint64_t tiling_flags,
+ struct dc_tiling_info *tiling_info,
+ struct plane_size *plane_size,
+ struct dc_plane_dcc_param *dcc,
+ struct dc_plane_address *address,
+ bool tmz_surface)
+{
+ const struct drm_framebuffer *fb = &afb->base;
+ int ret;
+
+ memset(tiling_info, 0, sizeof(*tiling_info));
+ memset(plane_size, 0, sizeof(*plane_size));
+ memset(dcc, 0, sizeof(*dcc));
+ memset(address, 0, sizeof(*address));
+
+ address->tmz_surface = tmz_surface;
+
+ if (format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) {
+ uint64_t addr = afb->address + fb->offsets[0];
+
+ plane_size->surface_size.x = 0;
+ plane_size->surface_size.y = 0;
+ plane_size->surface_size.width = fb->width;
+ plane_size->surface_size.height = fb->height;
+ plane_size->surface_pitch =
+ fb->pitches[0] / fb->format->cpp[0];
+
+ address->type = PLN_ADDR_TYPE_GRAPHICS;
+ address->grph.addr.low_part = lower_32_bits(addr);
+ address->grph.addr.high_part = upper_32_bits(addr);
+ } else if (format < SURFACE_PIXEL_FORMAT_INVALID) {
+ uint64_t luma_addr = afb->address + fb->offsets[0];
+ uint64_t chroma_addr = afb->address + fb->offsets[1];
+
+ plane_size->surface_size.x = 0;
+ plane_size->surface_size.y = 0;
+ plane_size->surface_size.width = fb->width;
+ plane_size->surface_size.height = fb->height;
+ plane_size->surface_pitch =
+ fb->pitches[0] / fb->format->cpp[0];
+
+ plane_size->chroma_size.x = 0;
+ plane_size->chroma_size.y = 0;
+ /* TODO: set these based on surface format */
+ plane_size->chroma_size.width = fb->width / 2;
+ plane_size->chroma_size.height = fb->height / 2;
+
+ plane_size->chroma_pitch =
+ fb->pitches[1] / fb->format->cpp[1];
+
+ address->type = PLN_ADDR_TYPE_VIDEO_PROGRESSIVE;
+ address->video_progressive.luma_addr.low_part =
+ lower_32_bits(luma_addr);
+ address->video_progressive.luma_addr.high_part =
+ upper_32_bits(luma_addr);
+ address->video_progressive.chroma_addr.low_part =
+ lower_32_bits(chroma_addr);
+ address->video_progressive.chroma_addr.high_part =
+ upper_32_bits(chroma_addr);
+ }
+
+ if (adev->family >= AMDGPU_FAMILY_GC_12_0_0) {
+ ret = amdgpu_dm_plane_fill_gfx12_plane_attributes_from_modifiers(adev, afb, format,
+ rotation, plane_size,
+ tiling_info, dcc,
+ address);
+ if (ret)
+ return ret;
+ } else if (adev->family >= AMDGPU_FAMILY_AI) {
+ ret = amdgpu_dm_plane_fill_gfx9_plane_attributes_from_modifiers(adev, afb, format,
+ rotation, plane_size,
+ tiling_info, dcc,
+ address);
+ if (ret)
+ return ret;
+ } else {
+ amdgpu_dm_plane_fill_gfx8_tiling_info_from_flags(tiling_info, tiling_flags);
+ }
+
+ return 0;
+}
+
+static int amdgpu_dm_plane_helper_prepare_fb(struct drm_plane *plane,
+ struct drm_plane_state *new_state)
+{
+ struct amdgpu_framebuffer *afb;
+ struct drm_gem_object *obj;
+ struct amdgpu_device *adev;
+ struct amdgpu_bo *rbo;
+ struct dm_plane_state *dm_plane_state_new, *dm_plane_state_old;
+ uint32_t domain;
+ int r;
+
+ if (!new_state->fb) {
+ DRM_DEBUG_KMS("No FB bound\n");
+ return 0;
+ }
+
+ afb = to_amdgpu_framebuffer(new_state->fb);
+ obj = drm_gem_fb_get_obj(new_state->fb, 0);
+ if (!obj) {
+ DRM_ERROR("Failed to get obj from framebuffer\n");
+ return -EINVAL;
+ }
+
+ rbo = gem_to_amdgpu_bo(obj);
+ adev = amdgpu_ttm_adev(rbo->tbo.bdev);
+ r = amdgpu_bo_reserve(rbo, true);
+ if (r) {
+ drm_err(adev_to_drm(adev), "fail to reserve bo (%d)\n", r);
+ return r;
+ }
+
+ r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1);
+ if (r) {
+ drm_err(adev_to_drm(adev), "reserving fence slot failed (%d)\n", r);
+ goto error_unlock;
+ }
+
+ if (plane->type != DRM_PLANE_TYPE_CURSOR)
+ domain = amdgpu_display_supported_domains(adev, rbo->flags);
+ else
+ domain = AMDGPU_GEM_DOMAIN_VRAM;
+
+ rbo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ r = amdgpu_bo_pin(rbo, domain);
+ if (unlikely(r != 0)) {
+ if (r != -ERESTARTSYS)
+ DRM_ERROR("Failed to pin framebuffer with error %d\n", r);
+ goto error_unlock;
+ }
+
+ r = amdgpu_ttm_alloc_gart(&rbo->tbo);
+ if (unlikely(r != 0)) {
+ DRM_ERROR("%p bind failed\n", rbo);
+ goto error_unpin;
+ }
+
+ r = drm_gem_plane_helper_prepare_fb(plane, new_state);
+ if (unlikely(r != 0))
+ goto error_unpin;
+
+ amdgpu_bo_unreserve(rbo);
+
+ afb->address = amdgpu_bo_gpu_offset(rbo);
+
+ amdgpu_bo_ref(rbo);
+
+ /**
+ * We don't do surface updates on planes that have been newly created,
+ * but we also don't have the afb->address during atomic check.
+ *
+ * Fill in buffer attributes depending on the address here, but only on
+ * newly created planes since they're not being used by DC yet and this
+ * won't modify global state.
+ */
+ dm_plane_state_old = to_dm_plane_state(plane->state);
+ dm_plane_state_new = to_dm_plane_state(new_state);
+
+ if (dm_plane_state_new->dc_state &&
+ dm_plane_state_old->dc_state != dm_plane_state_new->dc_state) {
+ struct dc_plane_state *plane_state =
+ dm_plane_state_new->dc_state;
+
+ amdgpu_dm_plane_fill_plane_buffer_attributes(
+ adev, afb, plane_state->format, plane_state->rotation,
+ afb->tiling_flags,
+ &plane_state->tiling_info, &plane_state->plane_size,
+ &plane_state->dcc, &plane_state->address,
+ afb->tmz_surface);
+ }
+
+ return 0;
+
+error_unpin:
+ amdgpu_bo_unpin(rbo);
+
+error_unlock:
+ amdgpu_bo_unreserve(rbo);
+ return r;
+}
+
+static void amdgpu_dm_plane_helper_cleanup_fb(struct drm_plane *plane,
+ struct drm_plane_state *old_state)
+{
+ struct amdgpu_bo *rbo;
+ int r;
+
+ if (!old_state->fb)
+ return;
+
+ rbo = gem_to_amdgpu_bo(old_state->fb->obj[0]);
+ r = amdgpu_bo_reserve(rbo, false);
+ if (unlikely(r)) {
+ DRM_ERROR("failed to reserve rbo before unpin\n");
+ return;
+ }
+
+ amdgpu_bo_unpin(rbo);
+ amdgpu_bo_unreserve(rbo);
+ amdgpu_bo_unref(&rbo);
+}
+
+static void amdgpu_dm_plane_get_min_max_dc_plane_scaling(struct drm_device *dev,
+ struct drm_framebuffer *fb,
+ int *min_downscale, int *max_upscale)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct dc *dc = adev->dm.dc;
+ /* Caps for all supported planes are the same on DCE and DCN 1 - 3 */
+ struct dc_plane_cap *plane_cap = &dc->caps.planes[0];
+
+ switch (fb->format->format) {
+ case DRM_FORMAT_P010:
+ case DRM_FORMAT_NV12:
+ case DRM_FORMAT_NV21:
+ *max_upscale = plane_cap->max_upscale_factor.nv12;
+ *min_downscale = plane_cap->max_downscale_factor.nv12;
+ break;
+
+ case DRM_FORMAT_XRGB16161616F:
+ case DRM_FORMAT_ARGB16161616F:
+ case DRM_FORMAT_XBGR16161616F:
+ case DRM_FORMAT_ABGR16161616F:
+ *max_upscale = plane_cap->max_upscale_factor.fp16;
+ *min_downscale = plane_cap->max_downscale_factor.fp16;
+ break;
+
+ default:
+ *max_upscale = plane_cap->max_upscale_factor.argb8888;
+ *min_downscale = plane_cap->max_downscale_factor.argb8888;
+ break;
+ }
+
+ /*
+ * A factor of 1 in the plane_cap means to not allow scaling, ie. use a
+ * scaling factor of 1.0 == 1000 units.
+ */
+ if (*max_upscale == 1)
+ *max_upscale = 1000;
+
+ if (*min_downscale == 1)
+ *min_downscale = 1000;
+}
+
+int amdgpu_dm_plane_helper_check_state(struct drm_plane_state *state,
+ struct drm_crtc_state *new_crtc_state)
+{
+ struct drm_framebuffer *fb = state->fb;
+ int min_downscale, max_upscale;
+ int min_scale = 0;
+ int max_scale = INT_MAX;
+
+ /* Plane enabled? Validate viewport and get scaling factors from plane caps. */
+ if (fb && state->crtc) {
+ /* Validate viewport to cover the case when only the position changes */
+ if (state->plane->type != DRM_PLANE_TYPE_CURSOR) {
+ int viewport_width = state->crtc_w;
+ int viewport_height = state->crtc_h;
+
+ if (state->crtc_x < 0)
+ viewport_width += state->crtc_x;
+ else if (state->crtc_x + state->crtc_w > new_crtc_state->mode.crtc_hdisplay)
+ viewport_width = new_crtc_state->mode.crtc_hdisplay - state->crtc_x;
+
+ if (state->crtc_y < 0)
+ viewport_height += state->crtc_y;
+ else if (state->crtc_y + state->crtc_h > new_crtc_state->mode.crtc_vdisplay)
+ viewport_height = new_crtc_state->mode.crtc_vdisplay - state->crtc_y;
+
+ if (viewport_width < 0 || viewport_height < 0) {
+ DRM_DEBUG_ATOMIC("Plane completely outside of screen\n");
+ return -EINVAL;
+ } else if (viewport_width < MIN_VIEWPORT_SIZE*2) { /* x2 for width is because of pipe-split. */
+ DRM_DEBUG_ATOMIC("Viewport width %d smaller than %d\n", viewport_width, MIN_VIEWPORT_SIZE*2);
+ return -EINVAL;
+ } else if (viewport_height < MIN_VIEWPORT_SIZE) {
+ DRM_DEBUG_ATOMIC("Viewport height %d smaller than %d\n", viewport_height, MIN_VIEWPORT_SIZE);
+ return -EINVAL;
+ }
+
+ }
+
+ /* Get min/max allowed scaling factors from plane caps. */
+ amdgpu_dm_plane_get_min_max_dc_plane_scaling(state->crtc->dev, fb,
+ &min_downscale, &max_upscale);
+ /*
+ * Convert to drm convention: 16.16 fixed point, instead of dc's
+ * 1.0 == 1000. Also drm scaling is src/dst instead of dc's
+ * dst/src, so min_scale = 1.0 / max_upscale, etc.
+ */
+ min_scale = (1000 << 16) / max_upscale;
+ max_scale = (1000 << 16) / min_downscale;
+ }
+
+ return drm_atomic_helper_check_plane_state(
+ state, new_crtc_state, min_scale, max_scale, true, true);
+}
+
+int amdgpu_dm_plane_fill_dc_scaling_info(struct amdgpu_device *adev,
+ const struct drm_plane_state *state,
+ struct dc_scaling_info *scaling_info)
+{
+ int scale_w, scale_h, min_downscale, max_upscale;
+
+ memset(scaling_info, 0, sizeof(*scaling_info));
+
+ /* Source is fixed 16.16 but we ignore mantissa for now... */
+ scaling_info->src_rect.x = state->src_x >> 16;
+ scaling_info->src_rect.y = state->src_y >> 16;
+
+ /*
+ * For reasons we don't (yet) fully understand a non-zero
+ * src_y coordinate into an NV12 buffer can cause a
+ * system hang on DCN1x.
+ * To avoid hangs (and maybe be overly cautious)
+ * let's reject both non-zero src_x and src_y.
+ *
+ * We currently know of only one use-case to reproduce a
+ * scenario with non-zero src_x and src_y for NV12, which
+ * is to gesture the YouTube Android app into full screen
+ * on ChromeOS.
+ */
+ if (((amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 0)) ||
+ (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 1))) &&
+ (state->fb && state->fb->format->format == DRM_FORMAT_NV12 &&
+ (scaling_info->src_rect.x != 0 || scaling_info->src_rect.y != 0)))
+ return -EINVAL;
+
+ scaling_info->src_rect.width = state->src_w >> 16;
+ if (scaling_info->src_rect.width == 0)
+ return -EINVAL;
+
+ scaling_info->src_rect.height = state->src_h >> 16;
+ if (scaling_info->src_rect.height == 0)
+ return -EINVAL;
+
+ scaling_info->dst_rect.x = state->crtc_x;
+ scaling_info->dst_rect.y = state->crtc_y;
+
+ if (state->crtc_w == 0)
+ return -EINVAL;
+
+ scaling_info->dst_rect.width = state->crtc_w;
+
+ if (state->crtc_h == 0)
+ return -EINVAL;
+
+ scaling_info->dst_rect.height = state->crtc_h;
+
+ /* DRM doesn't specify clipping on destination output. */
+ scaling_info->clip_rect = scaling_info->dst_rect;
+
+ /* Validate scaling per-format with DC plane caps */
+ if (state->plane && state->plane->dev && state->fb) {
+ amdgpu_dm_plane_get_min_max_dc_plane_scaling(state->plane->dev, state->fb,
+ &min_downscale, &max_upscale);
+ } else {
+ min_downscale = 250;
+ max_upscale = 16000;
+ }
+
+ scale_w = scaling_info->dst_rect.width * 1000 /
+ scaling_info->src_rect.width;
+
+ if (scale_w < min_downscale || scale_w > max_upscale)
+ return -EINVAL;
+
+ scale_h = scaling_info->dst_rect.height * 1000 /
+ scaling_info->src_rect.height;
+
+ if (scale_h < min_downscale || scale_h > max_upscale)
+ return -EINVAL;
+
+ /*
+ * The "scaling_quality" can be ignored for now, quality = 0 has DC
+ * assume reasonable defaults based on the format.
+ */
+
+ return 0;
+}
+
+static int amdgpu_dm_plane_atomic_check(struct drm_plane *plane,
+ struct drm_atomic_state *state)
+{
+ struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
+ plane);
+ struct amdgpu_device *adev = drm_to_adev(plane->dev);
+ struct dc *dc = adev->dm.dc;
+ struct dm_plane_state *dm_plane_state;
+ struct dc_scaling_info scaling_info;
+ struct drm_crtc_state *new_crtc_state;
+ int ret;
+
+ trace_amdgpu_dm_plane_atomic_check(new_plane_state);
+
+ dm_plane_state = to_dm_plane_state(new_plane_state);
+
+ if (!dm_plane_state->dc_state)
+ return 0;
+
+ new_crtc_state =
+ drm_atomic_get_new_crtc_state(state,
+ new_plane_state->crtc);
+ if (!new_crtc_state)
+ return -EINVAL;
+
+ ret = amdgpu_dm_plane_helper_check_state(new_plane_state, new_crtc_state);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_dm_plane_fill_dc_scaling_info(adev, new_plane_state, &scaling_info);
+ if (ret)
+ return ret;
+
+ if (dc_validate_plane(dc, dm_plane_state->dc_state) == DC_OK)
+ return 0;
+
+ return -EINVAL;
+}
+
+static int amdgpu_dm_plane_atomic_async_check(struct drm_plane *plane,
+ struct drm_atomic_state *state, bool flip)
+{
+ struct drm_crtc_state *new_crtc_state;
+ struct drm_plane_state *new_plane_state;
+ struct dm_crtc_state *dm_new_crtc_state;
+
+ if (flip) {
+ if (plane->type != DRM_PLANE_TYPE_OVERLAY)
+ return -EINVAL;
+ } else if (plane->type != DRM_PLANE_TYPE_CURSOR) {
+ return -EINVAL;
+ }
+
+ new_plane_state = drm_atomic_get_new_plane_state(state, plane);
+ new_crtc_state = drm_atomic_get_new_crtc_state(state, new_plane_state->crtc);
+ dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+ /* Reject overlay cursors for now*/
+ if (!flip && dm_new_crtc_state->cursor_mode == DM_CURSOR_OVERLAY_MODE)
+ return -EINVAL;
+
+ return 0;
+}
+
+int amdgpu_dm_plane_get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
+ struct dc_cursor_position *position)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct amdgpu_device *adev = drm_to_adev(plane->dev);
+ int x, y;
+ int xorigin = 0, yorigin = 0;
+
+ if (!crtc || !plane->state->fb)
+ return 0;
+
+ if ((plane->state->crtc_w > amdgpu_crtc->max_cursor_width) ||
+ (plane->state->crtc_h > amdgpu_crtc->max_cursor_height)) {
+ DRM_ERROR("%s: bad cursor width or height %d x %d\n",
+ __func__,
+ plane->state->crtc_w,
+ plane->state->crtc_h);
+ return -EINVAL;
+ }
+
+ x = plane->state->crtc_x;
+ y = plane->state->crtc_y;
+
+ if (x <= -amdgpu_crtc->max_cursor_width ||
+ y <= -amdgpu_crtc->max_cursor_height)
+ return 0;
+
+ if (x < 0) {
+ xorigin = min(-x, amdgpu_crtc->max_cursor_width - 1);
+ x = 0;
+ }
+ if (y < 0) {
+ yorigin = min(-y, amdgpu_crtc->max_cursor_height - 1);
+ y = 0;
+ }
+ position->enable = true;
+ position->x = x;
+ position->y = y;
+ position->x_hotspot = xorigin;
+ position->y_hotspot = yorigin;
+
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0) < IP_VERSION(4, 0, 1))
+ position->translate_by_source = true;
+
+ return 0;
+}
+
+void amdgpu_dm_plane_handle_cursor_update(struct drm_plane *plane,
+ struct drm_plane_state *old_plane_state)
+{
+ struct amdgpu_device *adev = drm_to_adev(plane->dev);
+ struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb);
+ struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc;
+ struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL;
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ uint64_t address = afb ? afb->address : 0;
+ struct dc_cursor_position position = {0};
+ struct dc_cursor_attributes attributes;
+ int ret;
+
+ if (!plane->state->fb && !old_plane_state->fb)
+ return;
+
+ drm_dbg_atomic(plane->dev, "crtc_id=%d with size %d to %d\n",
+ amdgpu_crtc->crtc_id, plane->state->crtc_w,
+ plane->state->crtc_h);
+
+ ret = amdgpu_dm_plane_get_cursor_position(plane, crtc, &position);
+ if (ret)
+ return;
+
+ if (!position.enable) {
+ /* turn off cursor */
+ if (crtc_state && crtc_state->stream) {
+ mutex_lock(&adev->dm.dc_lock);
+ dc_stream_program_cursor_position(crtc_state->stream,
+ &position);
+ mutex_unlock(&adev->dm.dc_lock);
+ }
+ return;
+ }
+
+ amdgpu_crtc->cursor_width = plane->state->crtc_w;
+ amdgpu_crtc->cursor_height = plane->state->crtc_h;
+
+ memset(&attributes, 0, sizeof(attributes));
+ attributes.address.high_part = upper_32_bits(address);
+ attributes.address.low_part = lower_32_bits(address);
+ attributes.width = plane->state->crtc_w;
+ attributes.height = plane->state->crtc_h;
+ attributes.color_format = CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA;
+ attributes.rotation_angle = 0;
+ attributes.attribute_flags.value = 0;
+
+ /* Enable cursor degamma ROM on DCN3+ for implicit sRGB degamma in DRM
+ * legacy gamma setup.
+ */
+ if (crtc_state->cm_is_degamma_srgb &&
+ adev->dm.dc->caps.color.dpp.gamma_corr)
+ attributes.attribute_flags.bits.ENABLE_CURSOR_DEGAMMA = 1;
+
+ if (afb)
+ attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0];
+
+ if (crtc_state->stream) {
+ mutex_lock(&adev->dm.dc_lock);
+ if (!dc_stream_program_cursor_attributes(crtc_state->stream,
+ &attributes))
+ DRM_ERROR("DC failed to set cursor attributes\n");
+
+ if (!dc_stream_program_cursor_position(crtc_state->stream,
+ &position))
+ DRM_ERROR("DC failed to set cursor position\n");
+ mutex_unlock(&adev->dm.dc_lock);
+ }
+}
+
+static void amdgpu_dm_plane_atomic_async_update(struct drm_plane *plane,
+ struct drm_atomic_state *state)
+{
+ struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state,
+ plane);
+ struct drm_plane_state *old_state =
+ drm_atomic_get_old_plane_state(state, plane);
+
+ trace_amdgpu_dm_atomic_update_cursor(new_state);
+
+ swap(plane->state->fb, new_state->fb);
+
+ plane->state->src_x = new_state->src_x;
+ plane->state->src_y = new_state->src_y;
+ plane->state->src_w = new_state->src_w;
+ plane->state->src_h = new_state->src_h;
+ plane->state->crtc_x = new_state->crtc_x;
+ plane->state->crtc_y = new_state->crtc_y;
+ plane->state->crtc_w = new_state->crtc_w;
+ plane->state->crtc_h = new_state->crtc_h;
+
+ amdgpu_dm_plane_handle_cursor_update(plane, old_state);
+}
+
+static void amdgpu_dm_plane_panic_flush(struct drm_plane *plane)
+{
+ struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane->state);
+ struct drm_framebuffer *fb = plane->state->fb;
+ struct dc_plane_state *dc_plane_state;
+
+ if (!dm_plane_state || !dm_plane_state->dc_state)
+ return;
+
+ dc_plane_state = dm_plane_state->dc_state;
+
+ dc_plane_force_dcc_and_tiling_disable(dc_plane_state, fb->modifier ? true : false);
+}
+
+static const struct drm_plane_helper_funcs dm_plane_helper_funcs = {
+ .prepare_fb = amdgpu_dm_plane_helper_prepare_fb,
+ .cleanup_fb = amdgpu_dm_plane_helper_cleanup_fb,
+ .atomic_check = amdgpu_dm_plane_atomic_check,
+ .atomic_async_check = amdgpu_dm_plane_atomic_async_check,
+ .atomic_async_update = amdgpu_dm_plane_atomic_async_update
+};
+
+static const struct drm_plane_helper_funcs dm_primary_plane_helper_funcs = {
+ .prepare_fb = amdgpu_dm_plane_helper_prepare_fb,
+ .cleanup_fb = amdgpu_dm_plane_helper_cleanup_fb,
+ .atomic_check = amdgpu_dm_plane_atomic_check,
+ .atomic_async_check = amdgpu_dm_plane_atomic_async_check,
+ .atomic_async_update = amdgpu_dm_plane_atomic_async_update,
+ .get_scanout_buffer = amdgpu_display_get_scanout_buffer,
+ .panic_flush = amdgpu_dm_plane_panic_flush,
+};
+
+static void amdgpu_dm_plane_drm_plane_reset(struct drm_plane *plane)
+{
+ struct dm_plane_state *amdgpu_state = NULL;
+
+ if (plane->state)
+ plane->funcs->atomic_destroy_state(plane, plane->state);
+
+ amdgpu_state = kzalloc(sizeof(*amdgpu_state), GFP_KERNEL);
+ WARN_ON(amdgpu_state == NULL);
+
+ if (!amdgpu_state)
+ return;
+
+ __drm_atomic_helper_plane_reset(plane, &amdgpu_state->base);
+ amdgpu_state->degamma_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+ amdgpu_state->hdr_mult = AMDGPU_HDR_MULT_DEFAULT;
+ amdgpu_state->shaper_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+ amdgpu_state->blend_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+}
+
+static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct drm_plane *plane)
+{
+ struct dm_plane_state *dm_plane_state, *old_dm_plane_state;
+
+ old_dm_plane_state = to_dm_plane_state(plane->state);
+ dm_plane_state = kzalloc(sizeof(*dm_plane_state), GFP_KERNEL);
+ if (!dm_plane_state)
+ return NULL;
+
+ __drm_atomic_helper_plane_duplicate_state(plane, &dm_plane_state->base);
+
+ if (old_dm_plane_state->dc_state) {
+ dm_plane_state->dc_state = old_dm_plane_state->dc_state;
+ dc_plane_state_retain(dm_plane_state->dc_state);
+ }
+
+ if (old_dm_plane_state->degamma_lut)
+ dm_plane_state->degamma_lut =
+ drm_property_blob_get(old_dm_plane_state->degamma_lut);
+ if (old_dm_plane_state->ctm)
+ dm_plane_state->ctm =
+ drm_property_blob_get(old_dm_plane_state->ctm);
+ if (old_dm_plane_state->shaper_lut)
+ dm_plane_state->shaper_lut =
+ drm_property_blob_get(old_dm_plane_state->shaper_lut);
+ if (old_dm_plane_state->lut3d)
+ dm_plane_state->lut3d =
+ drm_property_blob_get(old_dm_plane_state->lut3d);
+ if (old_dm_plane_state->blend_lut)
+ dm_plane_state->blend_lut =
+ drm_property_blob_get(old_dm_plane_state->blend_lut);
+
+ dm_plane_state->degamma_tf = old_dm_plane_state->degamma_tf;
+ dm_plane_state->hdr_mult = old_dm_plane_state->hdr_mult;
+ dm_plane_state->shaper_tf = old_dm_plane_state->shaper_tf;
+ dm_plane_state->blend_tf = old_dm_plane_state->blend_tf;
+
+ return &dm_plane_state->base;
+}
+
+static bool amdgpu_dm_plane_format_mod_supported(struct drm_plane *plane,
+ uint32_t format,
+ uint64_t modifier)
+{
+ struct amdgpu_device *adev = drm_to_adev(plane->dev);
+ const struct drm_format_info *info = drm_format_info(format);
+ int i;
+
+ if (!info)
+ return false;
+
+ /*
+ * We always have to allow these modifiers:
+ * 1. Core DRM checks for LINEAR support if userspace does not provide modifiers.
+ * 2. Not passing any modifiers is the same as explicitly passing INVALID.
+ */
+ if (modifier == DRM_FORMAT_MOD_LINEAR ||
+ modifier == DRM_FORMAT_MOD_INVALID) {
+ return true;
+ }
+
+ /* Check that the modifier is on the list of the plane's supported modifiers. */
+ for (i = 0; i < plane->modifier_count; i++) {
+ if (modifier == plane->modifiers[i])
+ break;
+ }
+ if (i == plane->modifier_count)
+ return false;
+
+ /* GFX12 doesn't have these limitations. */
+ if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) <= AMD_FMT_MOD_TILE_VER_GFX11) {
+ enum dm_micro_swizzle microtile = amdgpu_dm_plane_modifier_gfx9_swizzle_mode(modifier) & 3;
+
+ /*
+ * For D swizzle the canonical modifier depends on the bpp, so check
+ * it here.
+ */
+ if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) == AMD_FMT_MOD_TILE_VER_GFX9 &&
+ adev->family >= AMDGPU_FAMILY_NV) {
+ if (microtile == MICRO_SWIZZLE_D && info->cpp[0] == 4)
+ return false;
+ }
+
+ if (adev->family >= AMDGPU_FAMILY_RV && microtile == MICRO_SWIZZLE_D &&
+ info->cpp[0] < 8)
+ return false;
+
+ if (amdgpu_dm_plane_modifier_has_dcc(modifier)) {
+ /* Per radeonsi comments 16/64 bpp are more complicated. */
+ if (info->cpp[0] != 4)
+ return false;
+ /* We support multi-planar formats, but not when combined with
+ * additional DCC metadata planes.
+ */
+ if (info->num_planes > 1)
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static void amdgpu_dm_plane_drm_plane_destroy_state(struct drm_plane *plane,
+ struct drm_plane_state *state)
+{
+ struct dm_plane_state *dm_plane_state = to_dm_plane_state(state);
+
+ if (dm_plane_state->degamma_lut)
+ drm_property_blob_put(dm_plane_state->degamma_lut);
+ if (dm_plane_state->ctm)
+ drm_property_blob_put(dm_plane_state->ctm);
+ if (dm_plane_state->lut3d)
+ drm_property_blob_put(dm_plane_state->lut3d);
+ if (dm_plane_state->shaper_lut)
+ drm_property_blob_put(dm_plane_state->shaper_lut);
+ if (dm_plane_state->blend_lut)
+ drm_property_blob_put(dm_plane_state->blend_lut);
+
+ if (dm_plane_state->dc_state)
+ dc_plane_state_release(dm_plane_state->dc_state);
+
+ drm_atomic_helper_plane_destroy_state(plane, state);
+}
+
+#ifdef AMD_PRIVATE_COLOR
+static void
+dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm,
+ struct drm_plane *plane)
+{
+ struct amdgpu_mode_info mode_info = dm->adev->mode_info;
+ struct dpp_color_caps dpp_color_caps = dm->dc->caps.color.dpp;
+
+ /* Check HW color pipeline capabilities on DPP block (pre-blending)
+ * before exposing related properties.
+ */
+ if (dpp_color_caps.dgam_ram || dpp_color_caps.gamma_corr) {
+ drm_object_attach_property(&plane->base,
+ mode_info.plane_degamma_lut_property,
+ 0);
+ drm_object_attach_property(&plane->base,
+ mode_info.plane_degamma_lut_size_property,
+ MAX_COLOR_LUT_ENTRIES);
+ drm_object_attach_property(&plane->base,
+ dm->adev->mode_info.plane_degamma_tf_property,
+ AMDGPU_TRANSFER_FUNCTION_DEFAULT);
+ }
+ /* HDR MULT is always available */
+ drm_object_attach_property(&plane->base,
+ dm->adev->mode_info.plane_hdr_mult_property,
+ AMDGPU_HDR_MULT_DEFAULT);
+
+ /* Only enable plane CTM if both DPP and MPC gamut remap is available. */
+ if (dm->dc->caps.color.mpc.gamut_remap)
+ drm_object_attach_property(&plane->base,
+ dm->adev->mode_info.plane_ctm_property, 0);
+
+ if (dpp_color_caps.hw_3d_lut || dm->dc->caps.color.mpc.preblend) {
+ drm_object_attach_property(&plane->base,
+ mode_info.plane_shaper_lut_property, 0);
+ drm_object_attach_property(&plane->base,
+ mode_info.plane_shaper_lut_size_property,
+ MAX_COLOR_LUT_ENTRIES);
+ drm_object_attach_property(&plane->base,
+ mode_info.plane_shaper_tf_property,
+ AMDGPU_TRANSFER_FUNCTION_DEFAULT);
+ drm_object_attach_property(&plane->base,
+ mode_info.plane_lut3d_property, 0);
+ drm_object_attach_property(&plane->base,
+ mode_info.plane_lut3d_size_property,
+ MAX_COLOR_3DLUT_SIZE);
+ }
+
+ if (dpp_color_caps.ogam_ram) {
+ drm_object_attach_property(&plane->base,
+ mode_info.plane_blend_lut_property, 0);
+ drm_object_attach_property(&plane->base,
+ mode_info.plane_blend_lut_size_property,
+ MAX_COLOR_LUT_ENTRIES);
+ drm_object_attach_property(&plane->base,
+ mode_info.plane_blend_tf_property,
+ AMDGPU_TRANSFER_FUNCTION_DEFAULT);
+ }
+}
+
+static int
+dm_atomic_plane_set_property(struct drm_plane *plane,
+ struct drm_plane_state *state,
+ struct drm_property *property,
+ uint64_t val)
+{
+ struct dm_plane_state *dm_plane_state = to_dm_plane_state(state);
+ struct amdgpu_device *adev = drm_to_adev(plane->dev);
+ bool replaced = false;
+ int ret;
+
+ if (property == adev->mode_info.plane_degamma_lut_property) {
+ ret = drm_property_replace_blob_from_id(plane->dev,
+ &dm_plane_state->degamma_lut,
+ val, -1,
+ sizeof(struct drm_color_lut),
+ &replaced);
+ dm_plane_state->base.color_mgmt_changed |= replaced;
+ return ret;
+ } else if (property == adev->mode_info.plane_degamma_tf_property) {
+ if (dm_plane_state->degamma_tf != val) {
+ dm_plane_state->degamma_tf = val;
+ dm_plane_state->base.color_mgmt_changed = 1;
+ }
+ } else if (property == adev->mode_info.plane_hdr_mult_property) {
+ if (dm_plane_state->hdr_mult != val) {
+ dm_plane_state->hdr_mult = val;
+ dm_plane_state->base.color_mgmt_changed = 1;
+ }
+ } else if (property == adev->mode_info.plane_ctm_property) {
+ ret = drm_property_replace_blob_from_id(plane->dev,
+ &dm_plane_state->ctm,
+ val,
+ sizeof(struct drm_color_ctm_3x4), -1,
+ &replaced);
+ dm_plane_state->base.color_mgmt_changed |= replaced;
+ return ret;
+ } else if (property == adev->mode_info.plane_shaper_lut_property) {
+ ret = drm_property_replace_blob_from_id(plane->dev,
+ &dm_plane_state->shaper_lut,
+ val, -1,
+ sizeof(struct drm_color_lut),
+ &replaced);
+ dm_plane_state->base.color_mgmt_changed |= replaced;
+ return ret;
+ } else if (property == adev->mode_info.plane_shaper_tf_property) {
+ if (dm_plane_state->shaper_tf != val) {
+ dm_plane_state->shaper_tf = val;
+ dm_plane_state->base.color_mgmt_changed = 1;
+ }
+ } else if (property == adev->mode_info.plane_lut3d_property) {
+ ret = drm_property_replace_blob_from_id(plane->dev,
+ &dm_plane_state->lut3d,
+ val, -1,
+ sizeof(struct drm_color_lut),
+ &replaced);
+ dm_plane_state->base.color_mgmt_changed |= replaced;
+ return ret;
+ } else if (property == adev->mode_info.plane_blend_lut_property) {
+ ret = drm_property_replace_blob_from_id(plane->dev,
+ &dm_plane_state->blend_lut,
+ val, -1,
+ sizeof(struct drm_color_lut),
+ &replaced);
+ dm_plane_state->base.color_mgmt_changed |= replaced;
+ return ret;
+ } else if (property == adev->mode_info.plane_blend_tf_property) {
+ if (dm_plane_state->blend_tf != val) {
+ dm_plane_state->blend_tf = val;
+ dm_plane_state->base.color_mgmt_changed = 1;
+ }
+ } else {
+ drm_dbg_atomic(plane->dev,
+ "[PLANE:%d:%s] unknown property [PROP:%d:%s]]\n",
+ plane->base.id, plane->name,
+ property->base.id, property->name);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+dm_atomic_plane_get_property(struct drm_plane *plane,
+ const struct drm_plane_state *state,
+ struct drm_property *property,
+ uint64_t *val)
+{
+ struct dm_plane_state *dm_plane_state = to_dm_plane_state(state);
+ struct amdgpu_device *adev = drm_to_adev(plane->dev);
+
+ if (property == adev->mode_info.plane_degamma_lut_property) {
+ *val = (dm_plane_state->degamma_lut) ?
+ dm_plane_state->degamma_lut->base.id : 0;
+ } else if (property == adev->mode_info.plane_degamma_tf_property) {
+ *val = dm_plane_state->degamma_tf;
+ } else if (property == adev->mode_info.plane_hdr_mult_property) {
+ *val = dm_plane_state->hdr_mult;
+ } else if (property == adev->mode_info.plane_ctm_property) {
+ *val = (dm_plane_state->ctm) ?
+ dm_plane_state->ctm->base.id : 0;
+ } else if (property == adev->mode_info.plane_shaper_lut_property) {
+ *val = (dm_plane_state->shaper_lut) ?
+ dm_plane_state->shaper_lut->base.id : 0;
+ } else if (property == adev->mode_info.plane_shaper_tf_property) {
+ *val = dm_plane_state->shaper_tf;
+ } else if (property == adev->mode_info.plane_lut3d_property) {
+ *val = (dm_plane_state->lut3d) ?
+ dm_plane_state->lut3d->base.id : 0;
+ } else if (property == adev->mode_info.plane_blend_lut_property) {
+ *val = (dm_plane_state->blend_lut) ?
+ dm_plane_state->blend_lut->base.id : 0;
+ } else if (property == adev->mode_info.plane_blend_tf_property) {
+ *val = dm_plane_state->blend_tf;
+
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+#else
+
+#define MAX_COLOR_PIPELINES 5
+
+static int
+dm_plane_init_colorops(struct drm_plane *plane)
+{
+ struct drm_prop_enum_list pipelines[MAX_COLOR_PIPELINES];
+ struct drm_device *dev = plane->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct dc *dc = adev->dm.dc;
+ int len = 0;
+ int ret;
+
+ if (plane->type == DRM_PLANE_TYPE_CURSOR)
+ return 0;
+
+ /* initialize pipeline */
+ if (dc->ctx->dce_version >= DCN_VERSION_3_0) {
+ ret = amdgpu_dm_initialize_default_pipeline(plane, &pipelines[len]);
+ if (ret) {
+ drm_err(plane->dev, "Failed to create color pipeline for plane %d: %d\n",
+ plane->base.id, ret);
+ return ret;
+ }
+ len++;
+
+ /* Create COLOR_PIPELINE property and attach */
+ drm_plane_create_color_pipeline_property(plane, pipelines, len);
+ }
+
+ return 0;
+}
+#endif
+
+static const struct drm_plane_funcs dm_plane_funcs = {
+ .update_plane = drm_atomic_helper_update_plane,
+ .disable_plane = drm_atomic_helper_disable_plane,
+ .destroy = drm_plane_helper_destroy,
+ .reset = amdgpu_dm_plane_drm_plane_reset,
+ .atomic_duplicate_state = amdgpu_dm_plane_drm_plane_duplicate_state,
+ .atomic_destroy_state = amdgpu_dm_plane_drm_plane_destroy_state,
+ .format_mod_supported = amdgpu_dm_plane_format_mod_supported,
+#ifdef AMD_PRIVATE_COLOR
+ .atomic_set_property = dm_atomic_plane_set_property,
+ .atomic_get_property = dm_atomic_plane_get_property,
+#endif
+};
+
+int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
+ struct drm_plane *plane,
+ unsigned long possible_crtcs,
+ const struct dc_plane_cap *plane_cap)
+{
+ uint32_t formats[32];
+ int num_formats;
+ int res = -EPERM;
+ unsigned int supported_rotations;
+ uint64_t *modifiers = NULL;
+ unsigned int primary_zpos = dm->dc->caps.max_slave_planes;
+
+ num_formats = amdgpu_dm_plane_get_plane_formats(plane, plane_cap, formats,
+ ARRAY_SIZE(formats));
+
+ res = amdgpu_dm_plane_get_plane_modifiers(dm->adev, plane->type, &modifiers);
+ if (res)
+ return res;
+
+ if (modifiers == NULL)
+ adev_to_drm(dm->adev)->mode_config.fb_modifiers_not_supported = true;
+
+ res = drm_universal_plane_init(adev_to_drm(dm->adev), plane, possible_crtcs,
+ &dm_plane_funcs, formats, num_formats,
+ modifiers, plane->type, NULL);
+ kfree(modifiers);
+ if (res)
+ return res;
+
+ if (plane->type == DRM_PLANE_TYPE_OVERLAY &&
+ plane_cap && plane_cap->per_pixel_alpha) {
+ unsigned int blend_caps = BIT(DRM_MODE_BLEND_PIXEL_NONE) |
+ BIT(DRM_MODE_BLEND_PREMULTI) |
+ BIT(DRM_MODE_BLEND_COVERAGE);
+
+ drm_plane_create_alpha_property(plane);
+ drm_plane_create_blend_mode_property(plane, blend_caps);
+ }
+
+ if (plane->type == DRM_PLANE_TYPE_PRIMARY) {
+ /*
+ * Allow OVERLAY planes to be used as underlays by assigning an
+ * immutable zpos = # of OVERLAY planes to the PRIMARY plane.
+ */
+ drm_plane_create_zpos_immutable_property(plane, primary_zpos);
+ } else if (plane->type == DRM_PLANE_TYPE_OVERLAY) {
+ /*
+ * OVERLAY planes can be below or above the PRIMARY, but cannot
+ * be above the CURSOR plane.
+ */
+ unsigned int zpos = primary_zpos + 1 + drm_plane_index(plane);
+
+ drm_plane_create_zpos_property(plane, zpos, 0, 254);
+ } else if (plane->type == DRM_PLANE_TYPE_CURSOR) {
+ drm_plane_create_zpos_immutable_property(plane, 255);
+ }
+
+ if (plane->type == DRM_PLANE_TYPE_PRIMARY &&
+ plane_cap &&
+ (plane_cap->pixel_format_support.nv12 ||
+ plane_cap->pixel_format_support.p010)) {
+ /* This only affects YUV formats. */
+ drm_plane_create_color_properties(
+ plane,
+ BIT(DRM_COLOR_YCBCR_BT601) |
+ BIT(DRM_COLOR_YCBCR_BT709) |
+ BIT(DRM_COLOR_YCBCR_BT2020),
+ BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) |
+ BIT(DRM_COLOR_YCBCR_FULL_RANGE),
+ DRM_COLOR_YCBCR_BT709, DRM_COLOR_YCBCR_LIMITED_RANGE);
+ }
+
+ supported_rotations =
+ DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 |
+ DRM_MODE_ROTATE_180 | DRM_MODE_ROTATE_270;
+
+ if (dm->adev->asic_type >= CHIP_BONAIRE &&
+ plane->type != DRM_PLANE_TYPE_CURSOR)
+ drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0,
+ supported_rotations);
+
+ if (amdgpu_ip_version(dm->adev, DCE_HWIP, 0) > IP_VERSION(3, 0, 1) &&
+ plane->type != DRM_PLANE_TYPE_CURSOR)
+ drm_plane_enable_fb_damage_clips(plane);
+
+ if (plane->type == DRM_PLANE_TYPE_PRIMARY)
+ drm_plane_helper_add(plane, &dm_primary_plane_helper_funcs);
+ else
+ drm_plane_helper_add(plane, &dm_plane_helper_funcs);
+
+#ifdef AMD_PRIVATE_COLOR
+ dm_atomic_plane_attach_color_mgmt_properties(dm, plane);
+#else
+ res = dm_plane_init_colorops(plane);
+ if (res)
+ return res;
+#endif
+
+ /* Create (reset) the plane state */
+ if (plane->funcs->reset)
+ plane->funcs->reset(plane);
+
+ return 0;
+}
+
+bool amdgpu_dm_plane_is_video_format(uint32_t format)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(video_formats); i++)
+ if (format == video_formats[i])
+ return true;
+
+ return false;
+}
+
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h
new file mode 100644
index 000000000000..ea2619b507db
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __AMDGPU_DM_PLANE_H__
+#define __AMDGPU_DM_PLANE_H__
+
+#include "dc.h"
+
+int amdgpu_dm_plane_get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
+ struct dc_cursor_position *position);
+
+void amdgpu_dm_plane_handle_cursor_update(struct drm_plane *plane,
+ struct drm_plane_state *old_plane_state);
+
+int amdgpu_dm_plane_fill_dc_scaling_info(struct amdgpu_device *adev,
+ const struct drm_plane_state *state,
+ struct dc_scaling_info *scaling_info);
+
+int amdgpu_dm_plane_helper_check_state(struct drm_plane_state *state,
+ struct drm_crtc_state *new_crtc_state);
+
+int amdgpu_dm_plane_fill_plane_buffer_attributes(struct amdgpu_device *adev,
+ const struct amdgpu_framebuffer *afb,
+ const enum surface_pixel_format format,
+ const enum dc_rotation_angle rotation,
+ const uint64_t tiling_flags,
+ struct dc_tiling_info *tiling_info,
+ struct plane_size *plane_size,
+ struct dc_plane_dcc_param *dcc,
+ struct dc_plane_address *address,
+ bool tmz_surface);
+
+int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
+ struct drm_plane *plane,
+ unsigned long possible_crtcs,
+ const struct dc_plane_cap *plane_cap);
+
+const struct drm_format_info *amdgpu_dm_plane_get_format_info(u32 pixel_format, u64 modifier);
+
+void amdgpu_dm_plane_fill_blending_from_plane_state(const struct drm_plane_state *plane_state,
+ bool *per_pixel_alpha, bool *pre_multiplied_alpha,
+ bool *global_alpha, int *global_alpha_value);
+
+bool amdgpu_dm_plane_is_video_format(uint32_t format);
+#endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
index eba270121698..11b2ea6edf95 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
@@ -97,14 +98,12 @@ bool dm_pp_apply_display_requirements(
const struct dm_pp_single_disp_config *dc_cfg =
&pp_display_cfg->disp_configs[i];
adev->pm.pm_display_cfg.displays[i].controller_id = dc_cfg->pipe_idx + 1;
+ adev->pm.pm_display_cfg.displays[i].pixel_clock = dc_cfg->pixel_clock;
}
- if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->display_configuration_change)
- adev->powerplay.pp_funcs->display_configuration_change(
- adev->powerplay.pp_handle,
- &adev->pm.pm_display_cfg);
+ amdgpu_dpm_display_configuration_change(adev, &adev->pm.pm_display_cfg);
- amdgpu_pm_compute_clocks(adev);
+ amdgpu_dpm_compute_clocks(adev);
}
return true;
@@ -298,31 +297,25 @@ bool dm_pp_get_clock_levels_by_type(
struct dm_pp_clock_levels *dc_clks)
{
struct amdgpu_device *adev = ctx->driver_context;
- void *pp_handle = adev->powerplay.pp_handle;
struct amd_pp_clocks pp_clks = { 0 };
struct amd_pp_simple_clock_info validation_clks = { 0 };
uint32_t i;
- if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_clock_by_type) {
- if (adev->powerplay.pp_funcs->get_clock_by_type(pp_handle,
- dc_to_pp_clock_type(clk_type), &pp_clks)) {
- /* Error in pplib. Provide default values. */
- get_default_clock_levels(clk_type, dc_clks);
- return true;
- }
+ if (amdgpu_dpm_get_clock_by_type(adev,
+ dc_to_pp_clock_type(clk_type), &pp_clks)) {
+ /* Error in pplib. Provide default values. */
+ get_default_clock_levels(clk_type, dc_clks);
+ return true;
}
pp_to_dc_clock_levels(&pp_clks, dc_clks, clk_type);
- if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_display_mode_validation_clocks) {
- if (adev->powerplay.pp_funcs->get_display_mode_validation_clocks(
- pp_handle, &validation_clks)) {
- /* Error in pplib. Provide default values. */
- DRM_INFO("DM_PPLIB: Warning: using default validation clocks!\n");
- validation_clks.engine_max_clock = 72000;
- validation_clks.memory_max_clock = 80000;
- validation_clks.level = 0;
- }
+ if (amdgpu_dpm_get_display_mode_validation_clks(adev, &validation_clks)) {
+ /* Error in pplib. Provide default values. */
+ DRM_INFO("DM_PPLIB: Warning: using default validation clocks!\n");
+ validation_clks.engine_max_clock = 72000;
+ validation_clks.memory_max_clock = 80000;
+ validation_clks.level = 0;
}
DRM_INFO("DM_PPLIB: Validation clocks:\n");
@@ -343,7 +336,8 @@ bool dm_pp_get_clock_levels_by_type(
if (dc_clks->clocks_in_khz[i] > validation_clks.engine_max_clock) {
/* This clock is higher the validation clock.
* Than means the previous one is the highest
- * non-boosted one. */
+ * non-boosted one.
+ */
DRM_INFO("DM_PPLIB: reducing engine clock level from %d to %d\n",
dc_clks->num_levels, i);
dc_clks->num_levels = i > 0 ? i : 1;
@@ -370,18 +364,14 @@ bool dm_pp_get_clock_levels_by_type_with_latency(
struct dm_pp_clock_levels_with_latency *clk_level_info)
{
struct amdgpu_device *adev = ctx->driver_context;
- void *pp_handle = adev->powerplay.pp_handle;
struct pp_clock_levels_with_latency pp_clks = { 0 };
- const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
int ret;
- if (pp_funcs && pp_funcs->get_clock_by_type_with_latency) {
- ret = pp_funcs->get_clock_by_type_with_latency(pp_handle,
- dc_to_pp_clock_type(clk_type),
- &pp_clks);
- if (ret)
- return false;
- }
+ ret = amdgpu_dpm_get_clock_by_type_with_latency(adev,
+ dc_to_pp_clock_type(clk_type),
+ &pp_clks);
+ if (ret)
+ return false;
pp_to_dc_clock_levels_with_latency(&pp_clks, clk_level_info, clk_type);
@@ -394,18 +384,14 @@ bool dm_pp_get_clock_levels_by_type_with_voltage(
struct dm_pp_clock_levels_with_voltage *clk_level_info)
{
struct amdgpu_device *adev = ctx->driver_context;
- void *pp_handle = adev->powerplay.pp_handle;
struct pp_clock_levels_with_voltage pp_clk_info = {0};
- const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
int ret;
- if (pp_funcs && pp_funcs->get_clock_by_type_with_voltage) {
- ret = pp_funcs->get_clock_by_type_with_voltage(pp_handle,
- dc_to_pp_clock_type(clk_type),
- &pp_clk_info);
- if (ret)
- return false;
- }
+ ret = amdgpu_dpm_get_clock_by_type_with_voltage(adev,
+ dc_to_pp_clock_type(clk_type),
+ &pp_clk_info);
+ if (ret)
+ return false;
pp_to_dc_clock_levels_with_voltage(&pp_clk_info, clk_level_info, clk_type);
@@ -417,19 +403,16 @@ bool dm_pp_notify_wm_clock_changes(
struct dm_pp_wm_sets_with_clock_ranges *wm_with_clock_ranges)
{
struct amdgpu_device *adev = ctx->driver_context;
- void *pp_handle = adev->powerplay.pp_handle;
- const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
/*
* Limit this watermark setting for Polaris for now
* TODO: expand this to other ASICs
*/
- if ((adev->asic_type >= CHIP_POLARIS10) && (adev->asic_type <= CHIP_VEGAM)
- && pp_funcs && pp_funcs->set_watermarks_for_clocks_ranges) {
- if (!pp_funcs->set_watermarks_for_clocks_ranges(pp_handle,
- (void *)wm_with_clock_ranges))
- return true;
- }
+ if ((adev->asic_type >= CHIP_POLARIS10) &&
+ (adev->asic_type <= CHIP_VEGAM) &&
+ !amdgpu_dpm_set_watermarks_for_clocks_ranges(adev,
+ (void *)wm_with_clock_ranges))
+ return true;
return false;
}
@@ -456,12 +439,10 @@ bool dm_pp_apply_clock_for_voltage_request(
if (!pp_clock_request.clock_type)
return false;
- if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->display_clock_voltage_request)
- ret = adev->powerplay.pp_funcs->display_clock_voltage_request(
- adev->powerplay.pp_handle,
- &pp_clock_request);
- if (ret)
+ ret = amdgpu_dpm_display_clock_voltage_request(adev, &pp_clock_request);
+ if (ret && (ret != -EOPNOTSUPP))
return false;
+
return true;
}
@@ -471,15 +452,8 @@ bool dm_pp_get_static_clocks(
{
struct amdgpu_device *adev = ctx->driver_context;
struct amd_pp_clock_info pp_clk_info = {0};
- int ret = 0;
- if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_current_clocks)
- ret = adev->powerplay.pp_funcs->get_current_clocks(
- adev->powerplay.pp_handle,
- &pp_clk_info);
- else
- return false;
- if (ret)
+ if (amdgpu_dpm_get_current_clocks(adev, &pp_clk_info))
return false;
static_clk_info->max_clocks_state = pp_to_dc_powerlevel_state(pp_clk_info.max_clocks_state);
@@ -494,8 +468,6 @@ static void pp_rv_set_wm_ranges(struct pp_smu *pp,
{
const struct dc_context *ctx = pp->dm;
struct amdgpu_device *adev = ctx->driver_context;
- void *pp_handle = adev->powerplay.pp_handle;
- const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
struct dm_pp_wm_sets_with_clock_ranges_soc15 wm_with_clock_ranges;
struct dm_pp_clock_range_for_dmif_wm_set_soc15 *wm_dce_clocks = wm_with_clock_ranges.wm_dmif_clocks_ranges;
struct dm_pp_clock_range_for_mcif_wm_set_soc15 *wm_soc_clocks = wm_with_clock_ranges.wm_mcif_clocks_ranges;
@@ -536,72 +508,48 @@ static void pp_rv_set_wm_ranges(struct pp_smu *pp,
ranges->writer_wm_sets[i].min_drain_clk_mhz * 1000;
}
- if (pp_funcs && pp_funcs->set_watermarks_for_clocks_ranges)
- pp_funcs->set_watermarks_for_clocks_ranges(pp_handle,
- &wm_with_clock_ranges);
+ amdgpu_dpm_set_watermarks_for_clocks_ranges(adev,
+ &wm_with_clock_ranges);
}
static void pp_rv_set_pme_wa_enable(struct pp_smu *pp)
{
const struct dc_context *ctx = pp->dm;
struct amdgpu_device *adev = ctx->driver_context;
- void *pp_handle = adev->powerplay.pp_handle;
- const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
- if (pp_funcs && pp_funcs->notify_smu_enable_pwe)
- pp_funcs->notify_smu_enable_pwe(pp_handle);
+ amdgpu_dpm_notify_smu_enable_pwe(adev);
}
static void pp_rv_set_active_display_count(struct pp_smu *pp, int count)
{
const struct dc_context *ctx = pp->dm;
struct amdgpu_device *adev = ctx->driver_context;
- void *pp_handle = adev->powerplay.pp_handle;
- const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
-
- if (!pp_funcs || !pp_funcs->set_active_display_count)
- return;
- pp_funcs->set_active_display_count(pp_handle, count);
+ amdgpu_dpm_set_active_display_count(adev, count);
}
static void pp_rv_set_min_deep_sleep_dcfclk(struct pp_smu *pp, int clock)
{
const struct dc_context *ctx = pp->dm;
struct amdgpu_device *adev = ctx->driver_context;
- void *pp_handle = adev->powerplay.pp_handle;
- const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
-
- if (!pp_funcs || !pp_funcs->set_min_deep_sleep_dcefclk)
- return;
- pp_funcs->set_min_deep_sleep_dcefclk(pp_handle, clock);
+ amdgpu_dpm_set_min_deep_sleep_dcefclk(adev, clock);
}
static void pp_rv_set_hard_min_dcefclk_by_freq(struct pp_smu *pp, int clock)
{
const struct dc_context *ctx = pp->dm;
struct amdgpu_device *adev = ctx->driver_context;
- void *pp_handle = adev->powerplay.pp_handle;
- const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
- if (!pp_funcs || !pp_funcs->set_hard_min_dcefclk_by_freq)
- return;
-
- pp_funcs->set_hard_min_dcefclk_by_freq(pp_handle, clock);
+ amdgpu_dpm_set_hard_min_dcefclk_by_freq(adev, clock);
}
static void pp_rv_set_hard_min_fclk_by_freq(struct pp_smu *pp, int mhz)
{
const struct dc_context *ctx = pp->dm;
struct amdgpu_device *adev = ctx->driver_context;
- void *pp_handle = adev->powerplay.pp_handle;
- const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
-
- if (!pp_funcs || !pp_funcs->set_hard_min_fclk_by_freq)
- return;
- pp_funcs->set_hard_min_fclk_by_freq(pp_handle, mhz);
+ amdgpu_dpm_set_hard_min_fclk_by_freq(adev, mhz);
}
static enum pp_smu_status pp_nv_set_wm_ranges(struct pp_smu *pp,
@@ -609,11 +557,8 @@ static enum pp_smu_status pp_nv_set_wm_ranges(struct pp_smu *pp,
{
const struct dc_context *ctx = pp->dm;
struct amdgpu_device *adev = ctx->driver_context;
- void *pp_handle = adev->powerplay.pp_handle;
- const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
- if (pp_funcs && pp_funcs->set_watermarks_for_clocks_ranges)
- pp_funcs->set_watermarks_for_clocks_ranges(pp_handle, ranges);
+ amdgpu_dpm_set_watermarks_for_clocks_ranges(adev, ranges);
return PP_SMU_RESULT_OK;
}
@@ -622,14 +567,13 @@ static enum pp_smu_status pp_nv_set_display_count(struct pp_smu *pp, int count)
{
const struct dc_context *ctx = pp->dm;
struct amdgpu_device *adev = ctx->driver_context;
- void *pp_handle = adev->powerplay.pp_handle;
- const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+ int ret = 0;
- if (!pp_funcs || !pp_funcs->set_active_display_count)
+ ret = amdgpu_dpm_set_active_display_count(adev, count);
+ if (ret == -EOPNOTSUPP)
return PP_SMU_RESULT_UNSUPPORTED;
-
- /* 0: successful or smu.ppt_funcs->set_display_count = NULL; 1: fail */
- if (pp_funcs->set_active_display_count(pp_handle, count))
+ else if (ret)
+ /* 0: successful or smu.ppt_funcs->set_display_count = NULL; 1: fail */
return PP_SMU_RESULT_FAIL;
return PP_SMU_RESULT_OK;
@@ -640,14 +584,13 @@ pp_nv_set_min_deep_sleep_dcfclk(struct pp_smu *pp, int mhz)
{
const struct dc_context *ctx = pp->dm;
struct amdgpu_device *adev = ctx->driver_context;
- void *pp_handle = adev->powerplay.pp_handle;
- const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
-
- if (!pp_funcs || !pp_funcs->set_min_deep_sleep_dcefclk)
- return PP_SMU_RESULT_UNSUPPORTED;
+ int ret = 0;
/* 0: successful or smu.ppt_funcs->set_deep_sleep_dcefclk = NULL;1: fail */
- if (pp_funcs->set_min_deep_sleep_dcefclk(pp_handle, mhz))
+ ret = amdgpu_dpm_set_min_deep_sleep_dcefclk(adev, mhz);
+ if (ret == -EOPNOTSUPP)
+ return PP_SMU_RESULT_UNSUPPORTED;
+ else if (ret)
return PP_SMU_RESULT_FAIL;
return PP_SMU_RESULT_OK;
@@ -658,12 +601,8 @@ static enum pp_smu_status pp_nv_set_hard_min_dcefclk_by_freq(
{
const struct dc_context *ctx = pp->dm;
struct amdgpu_device *adev = ctx->driver_context;
- void *pp_handle = adev->powerplay.pp_handle;
- const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
struct pp_display_clock_request clock_req;
-
- if (!pp_funcs || !pp_funcs->display_clock_voltage_request)
- return PP_SMU_RESULT_UNSUPPORTED;
+ int ret = 0;
clock_req.clock_type = amd_pp_dcef_clock;
clock_req.clock_freq_in_khz = mhz * 1000;
@@ -671,7 +610,10 @@ static enum pp_smu_status pp_nv_set_hard_min_dcefclk_by_freq(
/* 0: successful or smu.ppt_funcs->display_clock_voltage_request = NULL
* 1: fail
*/
- if (pp_funcs->display_clock_voltage_request(pp_handle, &clock_req))
+ ret = amdgpu_dpm_display_clock_voltage_request(adev, &clock_req);
+ if (ret == -EOPNOTSUPP)
+ return PP_SMU_RESULT_UNSUPPORTED;
+ else if (ret)
return PP_SMU_RESULT_FAIL;
return PP_SMU_RESULT_OK;
@@ -682,12 +624,8 @@ pp_nv_set_hard_min_uclk_by_freq(struct pp_smu *pp, int mhz)
{
const struct dc_context *ctx = pp->dm;
struct amdgpu_device *adev = ctx->driver_context;
- void *pp_handle = adev->powerplay.pp_handle;
- const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
struct pp_display_clock_request clock_req;
-
- if (!pp_funcs || !pp_funcs->display_clock_voltage_request)
- return PP_SMU_RESULT_UNSUPPORTED;
+ int ret = 0;
clock_req.clock_type = amd_pp_mem_clock;
clock_req.clock_freq_in_khz = mhz * 1000;
@@ -695,7 +633,10 @@ pp_nv_set_hard_min_uclk_by_freq(struct pp_smu *pp, int mhz)
/* 0: successful or smu.ppt_funcs->display_clock_voltage_request = NULL
* 1: fail
*/
- if (pp_funcs->display_clock_voltage_request(pp_handle, &clock_req))
+ ret = amdgpu_dpm_display_clock_voltage_request(adev, &clock_req);
+ if (ret == -EOPNOTSUPP)
+ return PP_SMU_RESULT_UNSUPPORTED;
+ else if (ret)
return PP_SMU_RESULT_FAIL;
return PP_SMU_RESULT_OK;
@@ -706,14 +647,10 @@ static enum pp_smu_status pp_nv_set_pstate_handshake_support(
{
const struct dc_context *ctx = pp->dm;
struct amdgpu_device *adev = ctx->driver_context;
- void *pp_handle = adev->powerplay.pp_handle;
- const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
- if (pp_funcs && pp_funcs->display_disable_memory_clock_switch) {
- if (pp_funcs->display_disable_memory_clock_switch(pp_handle,
- !pstate_handshake_supported))
- return PP_SMU_RESULT_FAIL;
- }
+ if (amdgpu_dpm_display_disable_memory_clock_switch(adev,
+ !pstate_handshake_supported))
+ return PP_SMU_RESULT_FAIL;
return PP_SMU_RESULT_OK;
}
@@ -723,12 +660,8 @@ static enum pp_smu_status pp_nv_set_voltage_by_freq(struct pp_smu *pp,
{
const struct dc_context *ctx = pp->dm;
struct amdgpu_device *adev = ctx->driver_context;
- void *pp_handle = adev->powerplay.pp_handle;
- const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
struct pp_display_clock_request clock_req;
-
- if (!pp_funcs || !pp_funcs->display_clock_voltage_request)
- return PP_SMU_RESULT_UNSUPPORTED;
+ int ret = 0;
switch (clock_id) {
case PP_SMU_NV_DISPCLK:
@@ -748,7 +681,10 @@ static enum pp_smu_status pp_nv_set_voltage_by_freq(struct pp_smu *pp,
/* 0: successful or smu.ppt_funcs->display_clock_voltage_request = NULL
* 1: fail
*/
- if (pp_funcs->display_clock_voltage_request(pp_handle, &clock_req))
+ ret = amdgpu_dpm_display_clock_voltage_request(adev, &clock_req);
+ if (ret == -EOPNOTSUPP)
+ return PP_SMU_RESULT_UNSUPPORTED;
+ else if (ret)
return PP_SMU_RESULT_FAIL;
return PP_SMU_RESULT_OK;
@@ -759,16 +695,16 @@ static enum pp_smu_status pp_nv_get_maximum_sustainable_clocks(
{
const struct dc_context *ctx = pp->dm;
struct amdgpu_device *adev = ctx->driver_context;
- void *pp_handle = adev->powerplay.pp_handle;
- const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+ int ret = 0;
- if (!pp_funcs || !pp_funcs->get_max_sustainable_clocks_by_dc)
+ ret = amdgpu_dpm_get_max_sustainable_clocks_by_dc(adev,
+ max_clocks);
+ if (ret == -EOPNOTSUPP)
return PP_SMU_RESULT_UNSUPPORTED;
+ else if (ret)
+ return PP_SMU_RESULT_FAIL;
- if (!pp_funcs->get_max_sustainable_clocks_by_dc(pp_handle, max_clocks))
- return PP_SMU_RESULT_OK;
-
- return PP_SMU_RESULT_FAIL;
+ return PP_SMU_RESULT_OK;
}
static enum pp_smu_status pp_nv_get_uclk_dpm_states(struct pp_smu *pp,
@@ -776,18 +712,17 @@ static enum pp_smu_status pp_nv_get_uclk_dpm_states(struct pp_smu *pp,
{
const struct dc_context *ctx = pp->dm;
struct amdgpu_device *adev = ctx->driver_context;
- void *pp_handle = adev->powerplay.pp_handle;
- const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+ int ret = 0;
- if (!pp_funcs || !pp_funcs->get_uclk_dpm_states)
+ ret = amdgpu_dpm_get_uclk_dpm_states(adev,
+ clock_values_in_khz,
+ num_states);
+ if (ret == -EOPNOTSUPP)
return PP_SMU_RESULT_UNSUPPORTED;
+ else if (ret)
+ return PP_SMU_RESULT_FAIL;
- if (!pp_funcs->get_uclk_dpm_states(pp_handle,
- clock_values_in_khz,
- num_states))
- return PP_SMU_RESULT_OK;
-
- return PP_SMU_RESULT_FAIL;
+ return PP_SMU_RESULT_OK;
}
static enum pp_smu_status pp_rn_get_dpm_clock_table(
@@ -795,16 +730,15 @@ static enum pp_smu_status pp_rn_get_dpm_clock_table(
{
const struct dc_context *ctx = pp->dm;
struct amdgpu_device *adev = ctx->driver_context;
- void *pp_handle = adev->powerplay.pp_handle;
- const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+ int ret = 0;
- if (!pp_funcs || !pp_funcs->get_dpm_clock_table)
+ ret = amdgpu_dpm_get_dpm_clock_table(adev, clock_table);
+ if (ret == -EOPNOTSUPP)
return PP_SMU_RESULT_UNSUPPORTED;
+ else if (ret)
+ return PP_SMU_RESULT_FAIL;
- if (!pp_funcs->get_dpm_clock_table(pp_handle, clock_table))
- return PP_SMU_RESULT_OK;
-
- return PP_SMU_RESULT_FAIL;
+ return PP_SMU_RESULT_OK;
}
static enum pp_smu_status pp_rn_set_wm_ranges(struct pp_smu *pp,
@@ -812,11 +746,8 @@ static enum pp_smu_status pp_rn_set_wm_ranges(struct pp_smu *pp,
{
const struct dc_context *ctx = pp->dm;
struct amdgpu_device *adev = ctx->driver_context;
- void *pp_handle = adev->powerplay.pp_handle;
- const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
- if (pp_funcs && pp_funcs->set_watermarks_for_clocks_ranges)
- pp_funcs->set_watermarks_for_clocks_ranges(pp_handle, ranges);
+ amdgpu_dpm_set_watermarks_for_clocks_ranges(adev, ranges);
return PP_SMU_RESULT_OK;
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
index c022e56f9459..fd491b7a3cd7 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
@@ -24,8 +25,38 @@
*/
#include "amdgpu_dm_psr.h"
+#include "dc_dmub_srv.h"
#include "dc.h"
-#include "dm_helpers.h"
+#include "amdgpu_dm.h"
+#include "modules/power/power_helpers.h"
+
+static bool link_supports_psrsu(struct dc_link *link)
+{
+ struct dc *dc = link->ctx->dc;
+
+ if (!dc->caps.dmcub_support)
+ return false;
+
+ if (dc->ctx->dce_version < DCN_VERSION_3_1)
+ return false;
+
+ if (!is_psr_su_specific_panel(link))
+ return false;
+
+ if (!link->dpcd_caps.alpm_caps.bits.AUX_WAKE_ALPM_CAP ||
+ !link->dpcd_caps.psr_info.psr_dpcd_caps.bits.Y_COORDINATE_REQUIRED)
+ return false;
+
+ if (link->dpcd_caps.psr_info.psr_dpcd_caps.bits.SU_GRANULARITY_REQUIRED &&
+ !link->dpcd_caps.psr_info.psr2_su_y_granularity_cap)
+ return false;
+
+ if (amdgpu_dc_debug_mask & DC_DISABLE_PSR_SU)
+ return false;
+
+ /* Temporarily disable PSR-SU to avoid glitches */
+ return false;
+}
/*
* amdgpu_dm_set_psr_caps() - set link psr capabilities
@@ -34,25 +65,27 @@
*/
void amdgpu_dm_set_psr_caps(struct dc_link *link)
{
- uint8_t dpcd_data[EDP_PSR_RECEIVER_CAP_SIZE];
-
- if (!(link->connector_signal & SIGNAL_TYPE_EDP))
+ if (!(link->connector_signal & SIGNAL_TYPE_EDP)) {
+ link->psr_settings.psr_feature_enabled = false;
return;
- if (link->type == dc_connection_none)
+ }
+
+ if (link->type == dc_connection_none) {
+ link->psr_settings.psr_feature_enabled = false;
return;
- if (dm_helpers_dp_read_dpcd(NULL, link, DP_PSR_SUPPORT,
- dpcd_data, sizeof(dpcd_data))) {
- link->dpcd_caps.psr_caps.psr_version = dpcd_data[0];
-
- if (dpcd_data[0] == 0) {
- link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED;
- link->psr_settings.psr_feature_enabled = false;
- } else {
+ }
+
+ if (link->dpcd_caps.psr_info.psr_version == 0) {
+ link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED;
+ link->psr_settings.psr_feature_enabled = false;
+
+ } else {
+ if (link_supports_psrsu(link))
+ link->psr_settings.psr_version = DC_PSR_VERSION_SU_1;
+ else
link->psr_settings.psr_version = DC_PSR_VERSION_1;
- link->psr_settings.psr_feature_enabled = true;
- }
- DRM_INFO("PSR support:%d\n", link->psr_settings.psr_feature_enabled);
+ link->psr_settings.psr_feature_enabled = true;
}
}
@@ -67,21 +100,29 @@ bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream)
struct dc_link *link = NULL;
struct psr_config psr_config = {0};
struct psr_context psr_context = {0};
+ struct dc *dc = NULL;
bool ret = false;
if (stream == NULL)
return false;
link = stream->link;
+ dc = link->ctx->dc;
- psr_config.psr_version = link->dpcd_caps.psr_caps.psr_version;
+ if (link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED) {
+ mod_power_calc_psr_configs(&psr_config, link, stream);
- if (psr_config.psr_version > 0) {
- psr_config.psr_exit_link_training_required = 0x1;
- psr_config.psr_frame_capture_indication_req = 0;
- psr_config.psr_rfb_setup_time = 0x37;
- psr_config.psr_sdp_transmit_line_num_deadline = 0x20;
- psr_config.allow_smu_optimizations = 0x0;
+ /* linux DM specific updating for psr config fields */
+ psr_config.allow_smu_optimizations =
+ (amdgpu_dc_feature_mask & DC_PSR_ALLOW_SMU_OPT) &&
+ mod_power_only_edp(dc->current_state, stream);
+ psr_config.allow_multi_disp_optimizations =
+ (amdgpu_dc_feature_mask & DC_PSR_ALLOW_MULTI_DISP_OPT);
+
+ if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) {
+ if (!psr_su_set_dsc_slice_height(dc, link, stream, &psr_config))
+ return false;
+ }
ret = dc_link_setup_psr(link, stream, &psr_config, &psr_context);
@@ -95,9 +136,8 @@ bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream)
* amdgpu_dm_psr_enable() - enable psr f/w
* @stream: stream state
*
- * Return: true if success
*/
-bool amdgpu_dm_psr_enable(struct dc_stream_state *stream)
+void amdgpu_dm_psr_enable(struct dc_stream_state *stream)
{
struct dc_link *link = stream->link;
unsigned int vsync_rate_hz = 0;
@@ -113,7 +153,7 @@ bool amdgpu_dm_psr_enable(struct dc_stream_state *stream)
DRM_DEBUG_DRIVER("Enabling psr...\n");
vsync_rate_hz = div64_u64(div64_u64((
- stream->timing.pix_clk_100hz * 100),
+ stream->timing.pix_clk_100hz * (uint64_t)100),
stream->timing.v_total),
stream->timing.h_total);
@@ -123,6 +163,7 @@ bool amdgpu_dm_psr_enable(struct dc_stream_state *stream)
*/
if (vsync_rate_hz != 0) {
unsigned int frame_time_microsec = 1000000 / vsync_rate_hz;
+
num_frames_static = (30000 / frame_time_microsec) + 1;
}
@@ -135,9 +176,18 @@ bool amdgpu_dm_psr_enable(struct dc_stream_state *stream)
&stream, 1,
&params);
- power_opt |= psr_power_opt_z10_static_screen;
+ /*
+ * Only enable static-screen optimizations for PSR1. For PSR SU, this
+ * causes vstartup interrupt issues, used by amdgpu_dm to send vblank
+ * events.
+ */
+ if (link->psr_settings.psr_version < DC_PSR_VERSION_SU_1)
+ power_opt |= psr_power_opt_z10_static_screen;
+
+ dc_link_set_psr_allow_active(link, &psr_enable, false, false, &power_opt);
- return dc_link_set_psr_allow_active(link, &psr_enable, false, false, &power_opt);
+ if (link->ctx->dc->caps.ips_support)
+ dc_allow_idle_optimizations(link->ctx->dc, true);
}
/*
@@ -146,18 +196,17 @@ bool amdgpu_dm_psr_enable(struct dc_stream_state *stream)
*
* Return: true if success
*/
-bool amdgpu_dm_psr_disable(struct dc_stream_state *stream)
+bool amdgpu_dm_psr_disable(struct dc_stream_state *stream, bool wait)
{
- unsigned int power_opt = 0;
bool psr_enable = false;
DRM_DEBUG_DRIVER("Disabling psr...\n");
- return dc_link_set_psr_allow_active(stream->link, &psr_enable, true, false, &power_opt);
+ return dc_link_set_psr_allow_active(stream->link, &psr_enable, wait, false, NULL);
}
/*
- * amdgpu_dm_psr_disable() - disable psr f/w
+ * amdgpu_dm_psr_disable_all() - disable psr f/w for all streams
* if psr is enabled on any stream
*
* Return: true if success
@@ -168,3 +217,61 @@ bool amdgpu_dm_psr_disable_all(struct amdgpu_display_manager *dm)
return dc_set_psr_allow_active(dm->dc, false);
}
+/*
+ * amdgpu_dm_psr_is_active_allowed() - check if psr is allowed on any stream
+ * @dm: pointer to amdgpu_display_manager
+ *
+ * Return: true if allowed
+ */
+
+bool amdgpu_dm_psr_is_active_allowed(struct amdgpu_display_manager *dm)
+{
+ unsigned int i;
+ bool allow_active = false;
+
+ for (i = 0; i < dm->dc->current_state->stream_count ; i++) {
+ struct dc_link *link;
+ struct dc_stream_state *stream = dm->dc->current_state->streams[i];
+
+ link = stream->link;
+ if (!link)
+ continue;
+ if (link->psr_settings.psr_feature_enabled &&
+ link->psr_settings.psr_allow_active) {
+ allow_active = true;
+ break;
+ }
+ }
+
+ return allow_active;
+}
+
+/**
+ * amdgpu_dm_psr_wait_disable() - Wait for eDP panel to exit PSR
+ * @stream: stream state attached to the eDP link
+ *
+ * Waits for a max of 500ms for the eDP panel to exit PSR.
+ *
+ * Return: true if panel exited PSR, false otherwise.
+ */
+bool amdgpu_dm_psr_wait_disable(struct dc_stream_state *stream)
+{
+ enum dc_psr_state psr_state = PSR_STATE0;
+ struct dc_link *link = stream->link;
+ int retry_count;
+
+ if (link == NULL)
+ return false;
+
+ for (retry_count = 0; retry_count <= 1000; retry_count++) {
+ dc_link_get_psr_state(link, &psr_state);
+ if (psr_state == PSR_STATE0)
+ break;
+ udelay(500);
+ }
+
+ if (retry_count == 1000)
+ return false;
+
+ return true;
+}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h
index 6806b3c9c84b..4fb8626913cf 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: MIT */
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
@@ -32,9 +33,11 @@
#define AMDGPU_DM_PSR_ENTRY_DELAY 5
void amdgpu_dm_set_psr_caps(struct dc_link *link);
-bool amdgpu_dm_psr_enable(struct dc_stream_state *stream);
+void amdgpu_dm_psr_enable(struct dc_stream_state *stream);
bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream);
-bool amdgpu_dm_psr_disable(struct dc_stream_state *stream);
+bool amdgpu_dm_psr_disable(struct dc_stream_state *stream, bool wait);
bool amdgpu_dm_psr_disable_all(struct amdgpu_display_manager *dm);
+bool amdgpu_dm_psr_is_active_allowed(struct amdgpu_display_manager *dm);
+bool amdgpu_dm_psr_wait_disable(struct dc_stream_state *stream);
#endif /* AMDGPU_DM_AMDGPU_DM_PSR_H_ */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_quirks.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_quirks.c
new file mode 100644
index 000000000000..1da07ebf9217
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_quirks.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include <linux/dmi.h>
+
+#include "amdgpu.h"
+#include "amdgpu_dm.h"
+
+struct amdgpu_dm_quirks {
+ bool aux_hpd_discon;
+ bool support_edp0_on_dp1;
+};
+
+static struct amdgpu_dm_quirks quirk_entries = {
+ .aux_hpd_discon = false,
+ .support_edp0_on_dp1 = false
+};
+
+static int edp0_on_dp1_callback(const struct dmi_system_id *id)
+{
+ quirk_entries.support_edp0_on_dp1 = true;
+ return 0;
+}
+
+static int aux_hpd_discon_callback(const struct dmi_system_id *id)
+{
+ quirk_entries.aux_hpd_discon = true;
+ return 0;
+}
+
+static const struct dmi_system_id dmi_quirk_table[] = {
+ {
+ .callback = aux_hpd_discon_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3660"),
+ },
+ },
+ {
+ .callback = aux_hpd_discon_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3260"),
+ },
+ },
+ {
+ .callback = aux_hpd_discon_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3460"),
+ },
+ },
+ {
+ .callback = aux_hpd_discon_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower Plus 7010"),
+ },
+ },
+ {
+ .callback = aux_hpd_discon_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower 7010"),
+ },
+ },
+ {
+ .callback = aux_hpd_discon_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF Plus 7010"),
+ },
+ },
+ {
+ .callback = aux_hpd_discon_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF 7010"),
+ },
+ },
+ {
+ .callback = aux_hpd_discon_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro Plus 7010"),
+ },
+ },
+ {
+ .callback = aux_hpd_discon_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro 7010"),
+ },
+ },
+ {
+ .callback = edp0_on_dp1_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP Elite mt645 G8 Mobile Thin Client"),
+ },
+ },
+ {
+ .callback = edp0_on_dp1_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP EliteBook 645 14 inch G11 Notebook PC"),
+ },
+ },
+ {
+ .callback = edp0_on_dp1_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP EliteBook 665 16 inch G11 Notebook PC"),
+ },
+ },
+ {
+ .callback = edp0_on_dp1_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook 445 14 inch G11 Notebook PC"),
+ },
+ },
+ {
+ .callback = edp0_on_dp1_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook 465 16 inch G11 Notebook PC"),
+ },
+ },
+ {}
+ /* TODO: refactor this from a fixed table to a dynamic option */
+};
+
+void retrieve_dmi_info(struct amdgpu_display_manager *dm)
+{
+ struct drm_device *dev = dm->ddev;
+ int dmi_id;
+
+ dm->aux_hpd_discon_quirk = false;
+ dm->edp0_on_dp1_quirk = false;
+
+ dmi_id = dmi_check_system(dmi_quirk_table);
+
+ if (!dmi_id)
+ return;
+
+ if (quirk_entries.aux_hpd_discon) {
+ dm->aux_hpd_discon_quirk = true;
+ drm_info(dev, "aux_hpd_discon_quirk attached\n");
+ }
+ if (quirk_entries.support_edp0_on_dp1) {
+ dm->edp0_on_dp1_quirk = true;
+ drm_info(dev, "support_edp0_on_dp1 attached\n");
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c
new file mode 100644
index 000000000000..da94e3544b65
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "amdgpu_dm_replay.h"
+#include "dc_dmub_srv.h"
+#include "dc.h"
+#include "dm_helpers.h"
+#include "amdgpu_dm.h"
+#include "modules/power/power_helpers.h"
+#include "dmub/inc/dmub_cmd.h"
+#include "dc/inc/link_service.h"
+
+/*
+ * amdgpu_dm_link_supports_replay() - check if the link supports replay
+ * @link: link
+ * @aconnector: aconnector
+ *
+ */
+bool amdgpu_dm_link_supports_replay(struct dc_link *link, struct amdgpu_dm_connector *aconnector)
+{
+ struct dm_connector_state *state = to_dm_connector_state(aconnector->base.state);
+ struct dpcd_caps *dpcd_caps = &link->dpcd_caps;
+ struct adaptive_sync_caps *as_caps = &link->dpcd_caps.adaptive_sync_caps;
+
+ if (!state->freesync_capable)
+ return false;
+
+ if (!aconnector->vsdb_info.replay_mode)
+ return false;
+
+ // Check the eDP version
+ if (dpcd_caps->edp_rev < EDP_REVISION_13)
+ return false;
+
+ if (!dpcd_caps->alpm_caps.bits.AUX_WAKE_ALPM_CAP)
+ return false;
+
+ // Check adaptive sync support cap
+ if (!as_caps->dp_adap_sync_caps.bits.ADAPTIVE_SYNC_SDP_SUPPORT)
+ return false;
+
+ // Sink shall populate line deviation information
+ if (dpcd_caps->pr_info.pixel_deviation_per_line == 0 ||
+ dpcd_caps->pr_info.max_deviation_line == 0)
+ return false;
+
+ return true;
+}
+
+/*
+ * amdgpu_dm_set_replay_caps() - setup Replay capabilities
+ * @link: link
+ * @aconnector: aconnector
+ *
+ */
+bool amdgpu_dm_set_replay_caps(struct dc_link *link, struct amdgpu_dm_connector *aconnector)
+{
+ struct replay_config pr_config = { 0 };
+ union replay_debug_flags *debug_flags = NULL;
+ struct dc *dc = link->ctx->dc;
+
+ // If Replay is already set to support, return true to skip checks
+ if (link->replay_settings.config.replay_supported)
+ return true;
+
+ if (!dc_is_embedded_signal(link->connector_signal))
+ return false;
+
+ if (link->panel_config.psr.disallow_replay)
+ return false;
+
+ if (!amdgpu_dm_link_supports_replay(link, aconnector))
+ return false;
+
+ if (!dc->ctx->dmub_srv || !dc->ctx->dmub_srv->dmub ||
+ !dc->ctx->dmub_srv->dmub->feature_caps.replay_supported)
+ return false;
+
+ // Mark Replay is supported in pr_config
+ pr_config.replay_supported = true;
+
+ debug_flags = (union replay_debug_flags *)&pr_config.debug_flags;
+ debug_flags->u32All = 0;
+ debug_flags->bitfields.visual_confirm =
+ link->ctx->dc->debug.visual_confirm == VISUAL_CONFIRM_REPLAY;
+
+ init_replay_config(link, &pr_config);
+
+ return true;
+}
+
+/*
+ * amdgpu_dm_link_setup_replay() - configure replay link
+ * @link: link
+ * @aconnector: aconnector
+ *
+ */
+bool amdgpu_dm_link_setup_replay(struct dc_link *link, struct amdgpu_dm_connector *aconnector)
+{
+ struct replay_config *pr_config;
+
+ if (link == NULL || aconnector == NULL)
+ return false;
+
+ pr_config = &link->replay_settings.config;
+
+ if (!pr_config->replay_supported)
+ return false;
+
+ pr_config->replay_power_opt_supported = 0x11;
+ pr_config->replay_smu_opt_supported = false;
+ pr_config->replay_enable_option |= pr_enable_option_static_screen;
+ pr_config->replay_support_fast_resync_in_ultra_sleep_mode = aconnector->max_vfreq >= 2 * aconnector->min_vfreq;
+ pr_config->replay_timing_sync_supported = false;
+
+ if (!pr_config->replay_timing_sync_supported)
+ pr_config->replay_enable_option &= ~pr_enable_option_general_ui;
+
+ link->replay_settings.replay_feature_enabled = true;
+
+ return true;
+}
+
+/*
+ * amdgpu_dm_replay_enable() - enable replay f/w
+ * @stream: stream state
+ *
+ * Return: true if success
+ */
+bool amdgpu_dm_replay_enable(struct dc_stream_state *stream, bool wait)
+{
+ bool replay_active = true;
+ struct dc_link *link = NULL;
+
+ if (stream == NULL)
+ return false;
+
+ link = stream->link;
+
+ if (link) {
+ link->dc->link_srv->edp_setup_replay(link, stream);
+ link->dc->link_srv->edp_set_coasting_vtotal(link, stream->timing.v_total, 0);
+ DRM_DEBUG_DRIVER("Enabling replay...\n");
+ link->dc->link_srv->edp_set_replay_allow_active(link, &replay_active, wait, false, NULL);
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * amdgpu_dm_replay_disable() - disable replay f/w
+ * @stream: stream state
+ *
+ * Return: true if success
+ */
+bool amdgpu_dm_replay_disable(struct dc_stream_state *stream)
+{
+ bool replay_active = false;
+ struct dc_link *link = NULL;
+
+ if (stream == NULL)
+ return false;
+
+ link = stream->link;
+
+ if (link) {
+ DRM_DEBUG_DRIVER("Disabling replay...\n");
+ link->dc->link_srv->edp_set_replay_allow_active(stream->link, &replay_active, true, false, NULL);
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * amdgpu_dm_replay_disable_all() - disable replay f/w
+ * if replay is enabled on any stream
+ *
+ * Return: true if success
+ */
+bool amdgpu_dm_replay_disable_all(struct amdgpu_display_manager *dm)
+{
+ DRM_DEBUG_DRIVER("Disabling replay if replay is enabled on any stream\n");
+ return dc_set_replay_allow_active(dm->dc, false);
+}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.h
new file mode 100644
index 000000000000..73b6c67ae5e7
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef AMDGPU_DM_AMDGPU_DM_REPLAY_H_
+#define AMDGPU_DM_AMDGPU_DM_REPLAY_H_
+
+#include "amdgpu.h"
+
+enum replay_enable_option {
+ pr_enable_option_static_screen = 0x1,
+ pr_enable_option_mpo_video = 0x2,
+ pr_enable_option_full_screen_video = 0x4,
+ pr_enable_option_general_ui = 0x8,
+ pr_enable_option_static_screen_coasting = 0x10000,
+ pr_enable_option_mpo_video_coasting = 0x20000,
+ pr_enable_option_full_screen_video_coasting = 0x40000,
+};
+
+bool amdgpu_dm_link_supports_replay(struct dc_link *link, struct amdgpu_dm_connector *aconnector);
+bool amdgpu_dm_replay_enable(struct dc_stream_state *stream, bool enable);
+bool amdgpu_dm_set_replay_caps(struct dc_link *link, struct amdgpu_dm_connector *aconnector);
+bool amdgpu_dm_link_setup_replay(struct dc_link *link, struct amdgpu_dm_connector *aconnector);
+bool amdgpu_dm_replay_disable(struct dc_stream_state *stream);
+bool amdgpu_dm_replay_disable_all(struct amdgpu_display_manager *dm);
+
+#endif /* AMDGPU_DM_AMDGPU_DM_REPLAY_H_ */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c
index d9e33c6bccd9..8550d5e8b753 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c
@@ -1,3 +1,4 @@
+//SPDX-License-Identifier: MIT
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
@@ -52,4 +53,12 @@ void dm_perf_trace_timestamp(const char *func_name, unsigned int line, struct dc
func_name, line);
}
+void dm_trace_smu_enter(uint32_t msg_id, uint32_t param_in, unsigned int delay, struct dc_context *ctx)
+{
+}
+
+void dm_trace_smu_exit(bool success, uint32_t response, struct dc_context *ctx)
+{
+}
+
/**** power component interfaces ****/
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
index fdcaea22b456..aa56fd6d56c3 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
@@ -1,3 +1,4 @@
+//SPDX-License-Identifier: MIT
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
@@ -34,8 +35,10 @@
#include <drm/drm_crtc.h>
#include <drm/drm_plane.h>
#include <drm/drm_fourcc.h>
+#include <drm/drm_framebuffer.h>
#include <drm/drm_encoder.h>
#include <drm/drm_atomic.h>
+#include "dc/inc/hw/optc.h"
#include "dc/inc/core_types.h"
@@ -85,7 +88,7 @@ TRACE_EVENT(amdgpu_dc_performance,
__entry->writes = write_count;
__entry->read_delta = read_count - *last_read;
__entry->write_delta = write_count - *last_write;
- __assign_str(func, func);
+ __assign_str(func);
__entry->line = line;
*last_read = read_count;
*last_write = write_count;
@@ -661,6 +664,95 @@ TRACE_EVENT(dcn_fpu,
)
);
+TRACE_EVENT(dcn_optc_lock_unlock_state,
+ TP_PROTO(const struct optc *optc_state, int instance, bool lock, const char *function, const int line),
+ TP_ARGS(optc_state, instance, lock, function, line),
+
+ TP_STRUCT__entry(
+ __field(const char *, function)
+ __field(int, instance)
+ __field(bool, lock)
+ __field(int, line)
+ __field(int, opp_count)
+ __field(int, max_h_total)
+ __field(int, max_v_total)
+ __field(int, min_h_blank)
+ __field(int, min_h_sync_width)
+ __field(int, min_v_sync_width)
+ __field(int, min_v_blank)
+ __field(int, min_v_blank_interlace)
+ __field(int, vstartup_start)
+ __field(int, vupdate_offset)
+ __field(int, vupdate_width)
+ __field(int, vready_offset)
+ ),
+ TP_fast_assign(
+ __entry->function = function;
+ __entry->instance = instance;
+ __entry->lock = lock;
+ __entry->line = line;
+ __entry->opp_count = optc_state->opp_count;
+ __entry->max_h_total = optc_state->max_h_total;
+ __entry->max_v_total = optc_state->max_v_total;
+ __entry->min_h_blank = optc_state->min_h_blank;
+ __entry->min_h_sync_width = optc_state->min_h_sync_width;
+ __entry->min_v_sync_width = optc_state->min_v_sync_width;
+ __entry->min_v_blank = optc_state->min_v_blank;
+ __entry->min_v_blank_interlace = optc_state->min_v_blank_interlace;
+ __entry->vstartup_start = optc_state->vstartup_start;
+ __entry->vupdate_offset = optc_state->vupdate_offset;
+ __entry->vupdate_width = optc_state->vupdate_width;
+ __entry->vready_offset = optc_state->vupdate_offset;
+ ),
+ TP_printk("%s: %s()+%d: optc_instance=%d opp_count=%d max_h_total=%d max_v_total=%d "
+ "min_h_blank=%d min_h_sync_width=%d min_v_sync_width=%d min_v_blank=%d "
+ "min_v_blank_interlace=%d vstartup_start=%d vupdate_offset=%d vupdate_width=%d "
+ "vready_offset=%d",
+ __entry->lock ? "Lock" : "Unlock",
+ __entry->function,
+ __entry->line,
+ __entry->instance,
+ __entry->opp_count,
+ __entry->max_h_total,
+ __entry->max_v_total,
+ __entry->min_h_blank,
+ __entry->min_h_sync_width,
+ __entry->min_v_sync_width,
+ __entry->min_v_blank,
+ __entry->min_v_blank_interlace,
+ __entry->vstartup_start,
+ __entry->vupdate_offset,
+ __entry->vupdate_width,
+ __entry->vready_offset
+ )
+);
+
+TRACE_EVENT(amdgpu_dm_brightness,
+ TP_PROTO(void *function, u32 user_brightness, u32 converted_brightness, bool aux, bool ac),
+ TP_ARGS(function, user_brightness, converted_brightness, aux, ac),
+ TP_STRUCT__entry(
+ __field(void *, function)
+ __field(u32, user_brightness)
+ __field(u32, converted_brightness)
+ __field(bool, aux)
+ __field(bool, ac)
+ ),
+ TP_fast_assign(
+ __entry->function = function;
+ __entry->user_brightness = user_brightness;
+ __entry->converted_brightness = converted_brightness;
+ __entry->aux = aux;
+ __entry->ac = ac;
+ ),
+ TP_printk("%ps: brightness requested=%u converted=%u aux=%s power=%s",
+ (void *)__entry->function,
+ (u32)__entry->user_brightness,
+ (u32)__entry->converted_brightness,
+ (__entry->aux) ? "true" : "false",
+ (__entry->ac) ? "AC" : "DC"
+ )
+);
+
#endif /* _AMDGPU_DM_TRACE_H_ */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
new file mode 100644
index 000000000000..d9527c05fc87
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dm_services_types.h"
+
+#include "amdgpu.h"
+#include "amdgpu_dm.h"
+#include "amdgpu_dm_wb.h"
+#include "amdgpu_display.h"
+#include "dc.h"
+
+#include <drm/drm_edid.h>
+#include <drm/drm_atomic_state_helper.h>
+#include <drm/drm_modeset_helper_vtables.h>
+
+static const u32 amdgpu_dm_wb_formats[] = {
+ DRM_FORMAT_XRGB2101010,
+};
+
+static int amdgpu_dm_wb_encoder_atomic_check(struct drm_encoder *encoder,
+ struct drm_crtc_state *crtc_state,
+ struct drm_connector_state *conn_state)
+{
+ struct drm_framebuffer *fb;
+ const struct drm_display_mode *mode = &crtc_state->mode;
+ bool found = false;
+ uint8_t i;
+
+ if (!conn_state->writeback_job || !conn_state->writeback_job->fb)
+ return 0;
+
+ fb = conn_state->writeback_job->fb;
+ if (fb->width != mode->hdisplay || fb->height != mode->vdisplay) {
+ DRM_DEBUG_KMS("Invalid framebuffer size %ux%u\n",
+ fb->width, fb->height);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < sizeof(amdgpu_dm_wb_formats) / sizeof(u32); i++) {
+ if (fb->format->format == amdgpu_dm_wb_formats[i])
+ found = true;
+ }
+
+ if (!found) {
+ DRM_DEBUG_KMS("Invalid pixel format %p4cc\n",
+ &fb->format->format);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+
+static int amdgpu_dm_wb_connector_get_modes(struct drm_connector *connector)
+{
+ /* Maximum resolution supported by DWB */
+ return drm_add_modes_noedid(connector, 3840, 2160);
+}
+
+static int amdgpu_dm_wb_prepare_job(struct drm_writeback_connector *wb_connector,
+ struct drm_writeback_job *job)
+{
+ struct amdgpu_framebuffer *afb;
+ struct drm_gem_object *obj;
+ struct amdgpu_device *adev;
+ struct amdgpu_bo *rbo;
+ uint32_t domain;
+ int r;
+
+ if (!job->fb) {
+ DRM_DEBUG_KMS("No FB bound\n");
+ return 0;
+ }
+
+ afb = to_amdgpu_framebuffer(job->fb);
+ obj = job->fb->obj[0];
+ rbo = gem_to_amdgpu_bo(obj);
+ adev = amdgpu_ttm_adev(rbo->tbo.bdev);
+
+ r = amdgpu_bo_reserve(rbo, true);
+ if (r) {
+ drm_err(adev_to_drm(adev), "fail to reserve bo (%d)\n", r);
+ return r;
+ }
+
+ r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1);
+ if (r) {
+ drm_err(adev_to_drm(adev), "reserving fence slot failed (%d)\n", r);
+ goto error_unlock;
+ }
+
+ domain = amdgpu_display_supported_domains(adev, rbo->flags);
+
+ rbo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ r = amdgpu_bo_pin(rbo, domain);
+ if (unlikely(r != 0)) {
+ if (r != -ERESTARTSYS)
+ DRM_ERROR("Failed to pin framebuffer with error %d\n", r);
+ goto error_unlock;
+ }
+
+ r = amdgpu_ttm_alloc_gart(&rbo->tbo);
+ if (unlikely(r != 0)) {
+ DRM_ERROR("%p bind failed\n", rbo);
+ goto error_unpin;
+ }
+
+ amdgpu_bo_unreserve(rbo);
+
+ afb->address = amdgpu_bo_gpu_offset(rbo);
+
+ amdgpu_bo_ref(rbo);
+
+ return 0;
+
+error_unpin:
+ amdgpu_bo_unpin(rbo);
+
+error_unlock:
+ amdgpu_bo_unreserve(rbo);
+ return r;
+}
+
+static void amdgpu_dm_wb_cleanup_job(struct drm_writeback_connector *connector,
+ struct drm_writeback_job *job)
+{
+ struct amdgpu_bo *rbo;
+ int r;
+
+ if (!job->fb)
+ return;
+
+ rbo = gem_to_amdgpu_bo(job->fb->obj[0]);
+ r = amdgpu_bo_reserve(rbo, false);
+ if (unlikely(r)) {
+ DRM_ERROR("failed to reserve rbo before unpin\n");
+ return;
+ }
+
+ amdgpu_bo_unpin(rbo);
+ amdgpu_bo_unreserve(rbo);
+ amdgpu_bo_unref(&rbo);
+}
+
+static const struct drm_encoder_helper_funcs amdgpu_dm_wb_encoder_helper_funcs = {
+ .atomic_check = amdgpu_dm_wb_encoder_atomic_check,
+};
+
+static const struct drm_connector_funcs amdgpu_dm_wb_connector_funcs = {
+ .fill_modes = drm_helper_probe_single_connector_modes,
+ .destroy = drm_connector_cleanup,
+ .reset = amdgpu_dm_connector_funcs_reset,
+ .atomic_duplicate_state = amdgpu_dm_connector_atomic_duplicate_state,
+ .atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+static const struct drm_connector_helper_funcs amdgpu_dm_wb_conn_helper_funcs = {
+ .get_modes = amdgpu_dm_wb_connector_get_modes,
+ .prepare_writeback_job = amdgpu_dm_wb_prepare_job,
+ .cleanup_writeback_job = amdgpu_dm_wb_cleanup_job,
+};
+
+int amdgpu_dm_wb_connector_init(struct amdgpu_display_manager *dm,
+ struct amdgpu_dm_wb_connector *wbcon,
+ uint32_t link_index)
+{
+ struct dc *dc = dm->dc;
+ struct dc_link *link = dc_get_link_at_index(dc, link_index);
+ int res = 0;
+
+ wbcon->link = link;
+
+ drm_connector_helper_add(&wbcon->base.base, &amdgpu_dm_wb_conn_helper_funcs);
+
+ res = drm_writeback_connector_init(&dm->adev->ddev, &wbcon->base,
+ &amdgpu_dm_wb_connector_funcs,
+ &amdgpu_dm_wb_encoder_helper_funcs,
+ amdgpu_dm_wb_formats,
+ ARRAY_SIZE(amdgpu_dm_wb_formats),
+ amdgpu_dm_get_encoder_crtc_mask(dm->adev));
+
+ if (res)
+ return res;
+ /*
+ * Some of the properties below require access to state, like bpc.
+ * Allocate some default initial connector state with our reset helper.
+ */
+ if (wbcon->base.base.funcs->reset)
+ wbcon->base.base.funcs->reset(&wbcon->base.base);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h
new file mode 100644
index 000000000000..13d31c857dee
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __AMDGPU_DM_WB_H__
+#define __AMDGPU_DM_WB_H__
+
+#include <drm/drm_writeback.h>
+
+int amdgpu_dm_wb_connector_init(struct amdgpu_display_manager *dm,
+ struct amdgpu_dm_wb_connector *dm_wbcon,
+ uint32_t link_index);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
index ab0c6d191038..e46f8ce41d87 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
@@ -26,12 +26,7 @@
#include "dc_trace.h"
-#if defined(CONFIG_X86)
-#include <asm/fpu/api.h>
-#elif defined(CONFIG_PPC64)
-#include <asm/switch_to.h>
-#include <asm/cputable.h>
-#endif
+#include <linux/fpu.h>
/**
* DOC: DC FPU manipulation overview
@@ -56,11 +51,9 @@ static DEFINE_PER_CPU(int, fpu_recursion_depth);
*/
inline void dc_assert_fp_enabled(void)
{
- int *pcpu, depth = 0;
+ int depth;
- pcpu = get_cpu_ptr(&fpu_recursion_depth);
- depth = *pcpu;
- put_cpu_ptr(&fpu_recursion_depth);
+ depth = __this_cpu_read(fpu_recursion_depth);
ASSERT(depth >= 1);
}
@@ -80,30 +73,17 @@ inline void dc_assert_fp_enabled(void)
*/
void dc_fpu_begin(const char *function_name, const int line)
{
- int *pcpu;
+ int depth;
- pcpu = get_cpu_ptr(&fpu_recursion_depth);
- *pcpu += 1;
-
- if (*pcpu == 1) {
-#if defined(CONFIG_X86)
+ WARN_ON_ONCE(!in_task());
+ preempt_disable();
+ depth = __this_cpu_inc_return(fpu_recursion_depth);
+ if (depth == 1) {
+ BUG_ON(!kernel_fpu_available());
kernel_fpu_begin();
-#elif defined(CONFIG_PPC64)
- if (cpu_has_feature(CPU_FTR_VSX_COMP)) {
- preempt_disable();
- enable_kernel_vsx();
- } else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP)) {
- preempt_disable();
- enable_kernel_altivec();
- } else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE)) {
- preempt_disable();
- enable_kernel_fp();
- }
-#endif
}
- TRACE_DCN_FPU(true, function_name, line, *pcpu);
- put_cpu_ptr(&fpu_recursion_depth);
+ TRACE_DCN_FPU(true, function_name, line, depth);
}
/**
@@ -118,27 +98,15 @@ void dc_fpu_begin(const char *function_name, const int line)
*/
void dc_fpu_end(const char *function_name, const int line)
{
- int *pcpu;
+ int depth;
- pcpu = get_cpu_ptr(&fpu_recursion_depth);
- *pcpu -= 1;
- if (*pcpu <= 0) {
-#if defined(CONFIG_X86)
+ depth = __this_cpu_dec_return(fpu_recursion_depth);
+ if (depth == 0) {
kernel_fpu_end();
-#elif defined(CONFIG_PPC64)
- if (cpu_has_feature(CPU_FTR_VSX_COMP)) {
- disable_kernel_vsx();
- preempt_enable();
- } else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP)) {
- disable_kernel_altivec();
- preempt_enable();
- } else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE)) {
- disable_kernel_fp();
- preempt_enable();
- }
-#endif
+ } else {
+ WARN_ON_ONCE(depth < 0);
}
- TRACE_DCN_FPU(false, function_name, line, *pcpu);
- put_cpu_ptr(&fpu_recursion_depth);
+ TRACE_DCN_FPU(false, function_name, line, depth);
+ preempt_enable();
}
diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile
index b1f0d6260226..7277ed21552f 100644
--- a/drivers/gpu/drm/amd/display/dc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/Makefile
@@ -21,59 +21,63 @@
#
#
# Makefile for Display Core (dc) component.
-#
-DC_LIBS = basics bios calcs clk_mgr dce gpio irq virtual
+DC_LIBS = basics bios dml clk_mgr dce gpio hwss irq link virtual dsc resource optc dpp hubbub dccg hubp dio dwb hpo mmhubbub mpc opp pg
+
+ifdef CONFIG_DRM_AMD_DC_FP
-ifdef CONFIG_DRM_AMD_DC_DCN
+KCOV_INSTRUMENT := n
+
+DC_LIBS += dcn10
DC_LIBS += dcn20
-DC_LIBS += dsc
-DC_LIBS += dcn10 dml
DC_LIBS += dcn21
DC_LIBS += dcn201
DC_LIBS += dcn30
DC_LIBS += dcn301
-DC_LIBS += dcn302
-DC_LIBS += dcn303
DC_LIBS += dcn31
+DC_LIBS += dml
+DC_LIBS += dml2_0
+DC_LIBS += soc_and_ip_translator
endif
DC_LIBS += dce120
DC_LIBS += dce112
DC_LIBS += dce110
-DC_LIBS += dce100
DC_LIBS += dce80
ifdef CONFIG_DRM_AMD_DC_SI
DC_LIBS += dce60
endif
-ifdef CONFIG_DRM_AMD_DC_HDCP
DC_LIBS += hdcp
+
+ifdef CONFIG_DRM_AMD_DC_FP
+DC_LIBS += sspl
+AMD_DISPLAY_FILES += $(addprefix $(AMDDALPATH)/dc/, dc_spl_translate.o)
endif
AMD_DC = $(addsuffix /Makefile, $(addprefix $(FULL_AMD_DISPLAY_PATH)/dc/,$(DC_LIBS)))
include $(AMD_DC)
-DISPLAY_CORE = dc.o dc_stat.o dc_link.o dc_resource.o dc_hw_sequencer.o dc_sink.o \
-dc_surface.o dc_link_hwss.o dc_link_dp.o dc_link_ddc.o dc_debug.o dc_stream.o \
-dc_link_enc_cfg.o dc_link_dpia.o dc_link_dpcd.o
-
-ifdef CONFIG_DRM_AMD_DC_DCN
-DISPLAY_CORE += dc_vm_helper.o
-endif
-
-AMD_DISPLAY_CORE = $(addprefix $(AMDDALPATH)/dc/core/,$(DISPLAY_CORE))
-
-AMD_DM_REG_UPDATE = $(addprefix $(AMDDALPATH)/dc/,dc_helper.o)
+FILES =
+FILES += dc_dmub_srv.o
+FILES += dc_edid_parser.o
+FILES += dc_fused_io.o
+FILES += dc_helper.o
+FILES += core/dc.o
+FILES += core/dc_debug.o
+FILES += core/dc_hw_sequencer.o
+FILES += core/dc_link_enc_cfg.o
+FILES += core/dc_link_exports.o
+FILES += core/dc_resource.o
+FILES += core/dc_sink.o
+FILES += core/dc_stat.o
+FILES += core/dc_state.o
+FILES += core/dc_stream.o
+FILES += core/dc_surface.o
+FILES += core/dc_vm_helper.o
-AMD_DISPLAY_FILES += $(AMD_DISPLAY_CORE)
-AMD_DISPLAY_FILES += $(AMD_DM_REG_UPDATE)
+AMD_DISPLAY_FILES += $(addprefix $(AMDDALPATH)/dc/, $(FILES))
-DC_DMUB += dc_dmub_srv.o
-DC_EDID += dc_edid_parser.o
-AMD_DISPLAY_DMUB = $(addprefix $(AMDDALPATH)/dc/,$(DC_DMUB))
-AMD_DISPLAY_EDID = $(addprefix $(AMDDALPATH)/dc/,$(DC_EDID))
-AMD_DISPLAY_FILES += $(AMD_DISPLAY_DMUB) $(AMD_DISPLAY_EDID)
diff --git a/drivers/gpu/drm/amd/display/dc/basics/Makefile b/drivers/gpu/drm/amd/display/dc/basics/Makefile
index 01b99e0d788e..aabcebf69049 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/basics/Makefile
@@ -24,7 +24,14 @@
# It provides the general basic services required by other DAL
# subcomponents.
-BASICS = conversion.o fixpt31_32.o vector.o dc_common.o
+BASICS := \
+ conversion.o \
+ fixpt31_32.o \
+ vector.o \
+ dc_common.o \
+ dce_calcs.o \
+ custom_float.o \
+ bw_fixed.o
AMD_DAL_BASICS = $(addprefix $(AMDDALPATH)/dc/basics/,$(BASICS))
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/bw_fixed.c b/drivers/gpu/drm/amd/display/dc/basics/bw_fixed.c
index 6ca288fb5fb9..c8cb89e0d4d0 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/bw_fixed.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/bw_fixed.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: MIT
/*
- * Copyright 2015 Advanced Micro Devices, Inc.
+ * Copyright 2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -25,12 +26,11 @@
#include "dm_services.h"
#include "bw_fixed.h"
+#define MAX_I64 \
+ ((int64_t)((1ULL << 63) - 1))
#define MIN_I64 \
- (int64_t)(-(1LL << 63))
-
-#define MAX_I64 \
- (int64_t)((1ULL << 63) - 1)
+ (-MAX_I64 - 1)
#define FRACTIONAL_PART_MASK \
((1ULL << BW_FIXED_BITS_PER_FRACTIONAL_PART) - 1)
@@ -49,6 +49,7 @@ static uint64_t abs_i64(int64_t arg)
struct bw_fixed bw_int_to_fixed_nonconst(int64_t value)
{
struct bw_fixed res;
+
ASSERT(value < BW_FIXED_MAX_I32 && value > BW_FIXED_MIN_I32);
res.value = value << BW_FIXED_BITS_PER_FRACTIONAL_PART;
return res;
@@ -78,14 +79,12 @@ struct bw_fixed bw_frc_to_fixed(int64_t numerator, int64_t denominator)
{
uint32_t i = BW_FIXED_BITS_PER_FRACTIONAL_PART;
- do
- {
+ do {
remainder <<= 1;
res_value <<= 1;
- if (remainder >= arg2_value)
- {
+ if (remainder >= arg2_value) {
res_value |= 1;
remainder -= arg2_value;
}
@@ -108,9 +107,8 @@ struct bw_fixed bw_frc_to_fixed(int64_t numerator, int64_t denominator)
return res;
}
-struct bw_fixed bw_floor2(
- const struct bw_fixed arg,
- const struct bw_fixed significance)
+struct bw_fixed bw_floor2(const struct bw_fixed arg,
+ const struct bw_fixed significance)
{
struct bw_fixed result;
int64_t multiplicand;
@@ -121,9 +119,8 @@ struct bw_fixed bw_floor2(
return result;
}
-struct bw_fixed bw_ceil2(
- const struct bw_fixed arg,
- const struct bw_fixed significance)
+struct bw_fixed bw_ceil2(const struct bw_fixed arg,
+ const struct bw_fixed significance)
{
struct bw_fixed result;
int64_t multiplicand;
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/calcs_logger.h b/drivers/gpu/drm/amd/display/dc/basics/calcs_logger.h
index 62435bfc274d..62435bfc274d 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/calcs_logger.h
+++ b/drivers/gpu/drm/amd/display/dc/basics/calcs_logger.h
diff --git a/drivers/gpu/drm/amd/display/dc/basics/conversion.c b/drivers/gpu/drm/amd/display/dc/basics/conversion.c
index 6767fab55c26..bd1f60ecaba4 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/conversion.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/conversion.c
@@ -24,7 +24,7 @@
*/
#include "dm_services.h"
-#include "conversion.h"
+#include "basics/conversion.h"
#define DIVIDER 10000
@@ -100,3 +100,59 @@ void convert_float_matrix(
matrix[i] = (uint16_t)reg_value;
}
}
+
+static struct fixed31_32 int_frac_to_fixed_point(uint16_t arg,
+ uint8_t integer_bits,
+ uint8_t fractional_bits)
+{
+ struct fixed31_32 result;
+ uint16_t sign_mask = 1 << (fractional_bits + integer_bits);
+ uint16_t value_mask = sign_mask - 1;
+
+ result.value = (long long)(arg & value_mask) <<
+ (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
+
+ if (arg & sign_mask)
+ result = dc_fixpt_neg(result);
+
+ return result;
+}
+
+/**
+ * convert_hw_matrix - converts HW values into fixed31_32 matrix.
+ * @matrix: fixed point 31.32 matrix
+ * @reg: array of register values
+ * @buffer_size: size of the array of register values
+ *
+ * Converts HW register spec defined format S2D13 into a fixed-point 31.32
+ * matrix.
+ */
+void convert_hw_matrix(struct fixed31_32 *matrix,
+ uint16_t *reg,
+ uint32_t buffer_size)
+{
+ for (int i = 0; i < buffer_size; ++i)
+ matrix[i] = int_frac_to_fixed_point(reg[i], 2, 13);
+}
+
+static uint32_t find_gcd(uint32_t a, uint32_t b)
+{
+ uint32_t remainder;
+
+ while (b != 0) {
+ remainder = a % b;
+ a = b;
+ b = remainder;
+ }
+ return a;
+}
+
+void reduce_fraction(uint32_t num, uint32_t den,
+ uint32_t *out_num, uint32_t *out_den)
+{
+ uint32_t gcd = 0;
+
+ gcd = find_gcd(num, den);
+ *out_num = num / gcd;
+ *out_den = den / gcd;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/basics/conversion.h b/drivers/gpu/drm/amd/display/dc/basics/conversion.h
index ade785c4fdc7..a433cef78496 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/conversion.h
+++ b/drivers/gpu/drm/amd/display/dc/basics/conversion.h
@@ -38,6 +38,13 @@ void convert_float_matrix(
struct fixed31_32 *flt,
uint32_t buffer_size);
+void reduce_fraction(uint32_t num, uint32_t den,
+ uint32_t *out_num, uint32_t *out_den);
+
+void convert_hw_matrix(struct fixed31_32 *matrix,
+ uint16_t *reg,
+ uint32_t buffer_size);
+
static inline unsigned int log_2(unsigned int num)
{
return ilog2(num);
diff --git a/drivers/gpu/drm/amd/display/dc/basics/custom_float.c b/drivers/gpu/drm/amd/display/dc/basics/custom_float.c
new file mode 100644
index 000000000000..ae05ded9a7f3
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/basics/custom_float.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#include "dm_services.h"
+#include "custom_float.h"
+
+static bool build_custom_float(struct fixed31_32 value,
+ const struct custom_float_format *format,
+ bool *negative,
+ uint32_t *mantissa,
+ uint32_t *exponenta)
+{
+ uint32_t exp_offset = (1 << (format->exponenta_bits - 1)) - 1;
+
+ const struct fixed31_32 mantissa_constant_plus_max_fraction =
+ dc_fixpt_from_fraction((1LL << (format->mantissa_bits + 1)) - 1,
+ 1LL << format->mantissa_bits);
+
+ struct fixed31_32 mantiss;
+
+ if (dc_fixpt_eq(value, dc_fixpt_zero)) {
+ *negative = false;
+ *mantissa = 0;
+ *exponenta = 0;
+ return true;
+ }
+
+ if (dc_fixpt_lt(value, dc_fixpt_zero)) {
+ *negative = format->sign;
+ value = dc_fixpt_neg(value);
+ } else {
+ *negative = false;
+ }
+
+ if (dc_fixpt_lt(value, dc_fixpt_one)) {
+ uint32_t i = 1;
+
+ do {
+ value = dc_fixpt_shl(value, 1);
+ ++i;
+ } while (dc_fixpt_lt(value, dc_fixpt_one));
+
+ --i;
+
+ if (exp_offset <= i) {
+ *mantissa = 0;
+ *exponenta = 0;
+ return true;
+ }
+
+ *exponenta = exp_offset - i;
+ } else if (dc_fixpt_le(mantissa_constant_plus_max_fraction, value)) {
+ uint32_t i = 1;
+
+ do {
+ value = dc_fixpt_shr(value, 1);
+ ++i;
+ } while (dc_fixpt_lt(mantissa_constant_plus_max_fraction, value));
+
+ *exponenta = exp_offset + i - 1;
+ } else {
+ *exponenta = exp_offset;
+ }
+
+ mantiss = dc_fixpt_sub(value, dc_fixpt_one);
+
+ if (dc_fixpt_lt(mantiss, dc_fixpt_zero) ||
+ dc_fixpt_lt(dc_fixpt_one, mantiss))
+ mantiss = dc_fixpt_zero;
+ else
+ mantiss = dc_fixpt_shl(mantiss, format->mantissa_bits);
+
+ *mantissa = dc_fixpt_floor(mantiss);
+
+ return true;
+}
+
+static bool setup_custom_float(const struct custom_float_format *format,
+ bool negative,
+ uint32_t mantissa,
+ uint32_t exponenta,
+ uint32_t *result)
+{
+ uint32_t i = 0;
+ uint32_t j = 0;
+ uint32_t value = 0;
+
+ /* verification code:
+ * once calculation is ok we can remove it
+ */
+
+ const uint32_t mantissa_mask =
+ (1 << (format->mantissa_bits + 1)) - 1;
+
+ const uint32_t exponenta_mask =
+ (1 << (format->exponenta_bits + 1)) - 1;
+
+ if (mantissa & ~mantissa_mask) {
+ BREAK_TO_DEBUGGER();
+ mantissa = mantissa_mask;
+ }
+
+ if (exponenta & ~exponenta_mask) {
+ BREAK_TO_DEBUGGER();
+ exponenta = exponenta_mask;
+ }
+
+ /* end of verification code */
+
+ while (i < format->mantissa_bits) {
+ uint32_t mask = 1 << i;
+
+ if (mantissa & mask)
+ value |= mask;
+
+ ++i;
+ }
+
+ while (j < format->exponenta_bits) {
+ uint32_t mask = 1 << j;
+
+ if (exponenta & mask)
+ value |= mask << i;
+
+ ++j;
+ }
+
+ if (negative && format->sign)
+ value |= 1 << (i + j);
+
+ *result = value;
+
+ return true;
+}
+
+bool convert_to_custom_float_format(struct fixed31_32 value,
+ const struct custom_float_format *format,
+ uint32_t *result)
+{
+ uint32_t mantissa;
+ uint32_t exponenta;
+ bool negative;
+
+ return build_custom_float(value, format, &negative, &mantissa, &exponenta) &&
+ setup_custom_float(format,
+ negative,
+ mantissa,
+ exponenta,
+ result);
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/basics/dc_common.c b/drivers/gpu/drm/amd/display/dc/basics/dc_common.c
index b2fc4f8e6482..a51c2701da24 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/dc_common.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/dc_common.c
@@ -40,7 +40,8 @@ bool is_rgb_cspace(enum dc_color_space output_color_space)
case COLOR_SPACE_YCBCR709:
case COLOR_SPACE_YCBCR601_LIMITED:
case COLOR_SPACE_YCBCR709_LIMITED:
- case COLOR_SPACE_2020_YCBCR:
+ case COLOR_SPACE_2020_YCBCR_LIMITED:
+ case COLOR_SPACE_2020_YCBCR_FULL:
return false;
default:
/* Add a case to switch */
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c
index ff5bb152ef49..4da5adab799c 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c
@@ -23,8 +23,6 @@
*
*/
-#include <linux/slab.h>
-
#include "resource.h"
#include "dm_services.h"
#include "dce_calcs.h"
@@ -94,7 +92,7 @@ static void calculate_bandwidth(
const uint32_t s_high = 7;
const uint32_t dmif_chunk_buff_margin = 1;
- uint32_t max_chunks_fbc_mode;
+ uint32_t max_chunks_fbc_mode = 0;
int32_t num_cursor_lines;
int32_t i, j, k;
@@ -571,7 +569,7 @@ static void calculate_bandwidth(
break;
}
data->lb_partitions[i] = bw_floor2(bw_div(data->lb_size_per_component[i], data->lb_line_pitch), bw_int_to_fixed(1));
- /*clamp the partitions to the maxium number supported by the lb*/
+ /* clamp the partitions to the maximum number supported by the lb */
if ((surface_type[i] != bw_def_graphics || dceip->graphics_lb_nodownscaling_multi_line_prefetching == 1)) {
data->lb_partitions_max[i] = bw_int_to_fixed(10);
}
@@ -1138,7 +1136,7 @@ static void calculate_bandwidth(
}
}
}
- data->total_dmifmc_urgent_trips = bw_ceil2(bw_div(data->total_requests_for_adjusted_dmif_size, (bw_add(dceip->dmif_request_buffer_size, bw_int_to_fixed(vbios->number_of_request_slots_gmc_reserves_for_dmif_per_channel * data->number_of_dram_channels)))), bw_int_to_fixed(1));
+ data->total_dmifmc_urgent_trips = bw_ceil2(bw_div(data->total_requests_for_adjusted_dmif_size, (bw_add(dceip->dmif_request_buffer_size, bw_int_to_fixed((uint64_t)vbios->number_of_request_slots_gmc_reserves_for_dmif_per_channel * data->number_of_dram_channels)))), bw_int_to_fixed(1));
data->total_dmifmc_urgent_latency = bw_mul(vbios->dmifmc_urgent_latency, data->total_dmifmc_urgent_trips);
data->total_display_reads_required_data = bw_int_to_fixed(0);
data->total_display_reads_required_dram_access_data = bw_int_to_fixed(0);
@@ -1395,7 +1393,7 @@ static void calculate_bandwidth(
if ((bw_mtn(data->dram_speed_change_margin, bw_int_to_fixed(0)) && bw_ltn(data->dram_speed_change_margin, bw_int_to_fixed(9999)))) {
/*determine the minimum dram clock change margin for each set of clock frequencies*/
data->min_dram_speed_change_margin[i][j] = bw_min2(data->min_dram_speed_change_margin[i][j], data->dram_speed_change_margin);
- /*compute the maximum clock frequuency required for the dram clock change at each set of clock frequencies*/
+ /*compute the maximum clock frequency required for the dram clock change at each set of clock frequencies*/
data->dispclk_required_for_dram_speed_change_pipe[i][j] = bw_max2(bw_div(bw_div(bw_mul(data->src_pixels_for_first_output_pixel[k], dceip->display_pipe_throughput_factor), dceip->lb_write_pixels_per_dispclk), (bw_sub(bw_sub(bw_sub(data->maximum_latency_hiding_with_cursor[k], vbios->nbp_state_change_latency), data->dmif_burst_time[i][j]), data->dram_speed_change_line_source_transfer_time[k][i][j]))), bw_div(bw_div(bw_mul(data->src_pixels_for_last_output_pixel[k], dceip->display_pipe_throughput_factor), dceip->lb_write_pixels_per_dispclk), (bw_add(bw_sub(bw_sub(bw_sub(data->maximum_latency_hiding_with_cursor[k], vbios->nbp_state_change_latency), data->dmif_burst_time[i][j]), data->dram_speed_change_line_source_transfer_time[k][i][j]), data->active_time[k]))));
if ((bw_ltn(data->dispclk_required_for_dram_speed_change_pipe[i][j], vbios->high_voltage_max_dispclk))) {
data->display_pstate_change_enable[k] = 1;
@@ -1409,7 +1407,7 @@ static void calculate_bandwidth(
if ((bw_mtn(data->dram_speed_change_margin, bw_int_to_fixed(0)) && bw_ltn(data->dram_speed_change_margin, bw_int_to_fixed(9999)))) {
/*determine the minimum dram clock change margin for each display pipe*/
data->min_dram_speed_change_margin[i][j] = bw_min2(data->min_dram_speed_change_margin[i][j], data->dram_speed_change_margin);
- /*compute the maximum clock frequuency required for the dram clock change at each set of clock frequencies*/
+ /*compute the maximum clock frequency required for the dram clock change at each set of clock frequencies*/
data->dispclk_required_for_dram_speed_change_pipe[i][j] = bw_max2(bw_div(bw_div(bw_mul(data->src_pixels_for_first_output_pixel[k], dceip->display_pipe_throughput_factor), dceip->lb_write_pixels_per_dispclk), (bw_sub(bw_sub(bw_sub(bw_sub(data->maximum_latency_hiding_with_cursor[k], vbios->nbp_state_change_latency), data->dmif_burst_time[i][j]), data->dram_speed_change_line_source_transfer_time[k][i][j]), data->mcifwr_burst_time[i][j]))), bw_div(bw_div(bw_mul(data->src_pixels_for_last_output_pixel[k], dceip->display_pipe_throughput_factor), dceip->lb_write_pixels_per_dispclk), (bw_add(bw_sub(bw_sub(bw_sub(bw_sub(data->maximum_latency_hiding_with_cursor[k], vbios->nbp_state_change_latency), data->dmif_burst_time[i][j]), data->dram_speed_change_line_source_transfer_time[k][i][j]), data->mcifwr_burst_time[i][j]), data->active_time[k]))));
if ((bw_ltn(data->dispclk_required_for_dram_speed_change_pipe[i][j], vbios->high_voltage_max_dispclk))) {
data->display_pstate_change_enable[k] = 1;
@@ -1855,7 +1853,7 @@ static void calculate_bandwidth(
/*compute total time to request one chunk from each active display pipe*/
for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
if (data->enable[i]) {
- data->chunk_request_time = bw_add(data->chunk_request_time, (bw_div((bw_div(bw_int_to_fixed(pixels_per_chunk * data->bytes_per_pixel[i]), data->useful_bytes_per_request[i])), bw_min2(sclk[data->sclk_level], bw_div(data->dispclk, bw_int_to_fixed(2))))));
+ data->chunk_request_time = bw_add(data->chunk_request_time, (bw_div((bw_div(bw_int_to_fixed(pixels_per_chunk * (int64_t)data->bytes_per_pixel[i]), data->useful_bytes_per_request[i])), bw_min2(sclk[data->sclk_level], bw_div(data->dispclk, bw_int_to_fixed(2))))));
}
}
/*compute total time to request cursor data*/
@@ -2033,10 +2031,10 @@ static void calculate_bandwidth(
kfree(surface_type);
free_tiling_mode:
kfree(tiling_mode);
-free_yclk:
- kfree(yclk);
free_sclk:
kfree(sclk);
+free_yclk:
+ kfree(yclk);
}
/*******************************************************************************
@@ -3015,7 +3013,7 @@ static bool all_displays_in_sync(const struct pipe_ctx pipe[],
int i, num_active_pipes = 0;
for (i = 0; i < pipe_count; i++) {
- if (!pipe[i].stream || pipe[i].top_pipe)
+ if (!resource_is_pipe_type(&pipe[i], OPP_HEAD))
continue;
active_pipes[num_active_pipes++] = &pipe[i];
@@ -3411,35 +3409,33 @@ bool bw_calcs(struct dc_context *ctx,
calcs_output->stutter_exit_wm_ns[5].c_mark =
bw_fixed_to_int(bw_mul(data->
stutter_exit_watermark[9], bw_int_to_fixed(1000)));
-
- calcs_output->stutter_entry_wm_ns[0].c_mark =
- bw_fixed_to_int(bw_mul(data->
- stutter_entry_watermark[4], bw_int_to_fixed(1000)));
- calcs_output->stutter_entry_wm_ns[1].c_mark =
- bw_fixed_to_int(bw_mul(data->
- stutter_entry_watermark[5], bw_int_to_fixed(1000)));
- calcs_output->stutter_entry_wm_ns[2].c_mark =
- bw_fixed_to_int(bw_mul(data->
- stutter_entry_watermark[6], bw_int_to_fixed(1000)));
- if (ctx->dc->caps.max_slave_planes) {
- calcs_output->stutter_entry_wm_ns[3].c_mark =
+ calcs_output->stutter_entry_wm_ns[0].c_mark =
bw_fixed_to_int(bw_mul(data->
- stutter_entry_watermark[0], bw_int_to_fixed(1000)));
- calcs_output->stutter_entry_wm_ns[4].c_mark =
+ stutter_entry_watermark[4], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[1].c_mark =
bw_fixed_to_int(bw_mul(data->
- stutter_entry_watermark[1], bw_int_to_fixed(1000)));
- } else {
- calcs_output->stutter_entry_wm_ns[3].c_mark =
+ stutter_entry_watermark[5], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[2].c_mark =
bw_fixed_to_int(bw_mul(data->
- stutter_entry_watermark[7], bw_int_to_fixed(1000)));
- calcs_output->stutter_entry_wm_ns[4].c_mark =
+ stutter_entry_watermark[6], bw_int_to_fixed(1000)));
+ if (ctx->dc->caps.max_slave_planes) {
+ calcs_output->stutter_entry_wm_ns[3].c_mark =
+ bw_fixed_to_int(bw_mul(data->stutter_entry_watermark[0],
+ bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[4].c_mark =
+ bw_fixed_to_int(bw_mul(data->stutter_entry_watermark[1],
+ bw_int_to_fixed(1000)));
+ } else {
+ calcs_output->stutter_entry_wm_ns[3].c_mark =
+ bw_fixed_to_int(bw_mul(data->stutter_entry_watermark[7],
+ bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[4].c_mark =
+ bw_fixed_to_int(bw_mul(data->stutter_entry_watermark[8],
+ bw_int_to_fixed(1000)));
+ }
+ calcs_output->stutter_entry_wm_ns[5].c_mark =
bw_fixed_to_int(bw_mul(data->
- stutter_entry_watermark[8], bw_int_to_fixed(1000)));
- }
- calcs_output->stutter_entry_wm_ns[5].c_mark =
- bw_fixed_to_int(bw_mul(data->
- stutter_entry_watermark[9], bw_int_to_fixed(1000)));
-
+ stutter_entry_watermark[9], bw_int_to_fixed(1000)));
calcs_output->urgent_wm_ns[0].c_mark =
bw_fixed_to_int(bw_mul(data->
urgent_watermark[4], bw_int_to_fixed(1000)));
diff --git a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c
index 1726bdf89bae..6073cadde76c 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c
@@ -51,8 +51,6 @@ static inline unsigned long long complete_integer_division_u64(
{
unsigned long long result;
- ASSERT(divisor);
-
result = div64_u64_rem(dividend, divisor, remainder);
return result;
@@ -140,8 +138,6 @@ struct fixed31_32 dc_fixpt_mul(struct fixed31_32 arg1, struct fixed31_32 arg2)
res.value = arg1_int * arg2_int;
- ASSERT(res.value <= LONG_MAX);
-
res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
tmp = arg1_int * arg2_fra;
@@ -185,8 +181,6 @@ struct fixed31_32 dc_fixpt_sqr(struct fixed31_32 arg)
res.value = arg_int * arg_int;
- ASSERT(res.value <= LONG_MAX);
-
res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
tmp = arg_int * arg_fra;
@@ -217,9 +211,6 @@ struct fixed31_32 dc_fixpt_recip(struct fixed31_32 arg)
* @note
* Good idea to use Newton's method
*/
-
- ASSERT(arg.value);
-
return dc_fixpt_from_fraction(
dc_fixpt_one.value,
arg.value);
@@ -293,7 +284,7 @@ struct fixed31_32 dc_fixpt_cos(struct fixed31_32 arg)
dc_fixpt_mul(
square,
res),
- n * (n - 1)));
+ (long long)n * (n - 1)));
n -= 2;
} while (n != 0);
@@ -490,3 +481,30 @@ int dc_fixpt_s4d19(struct fixed31_32 arg)
else
return ux_dy(arg.value, 4, 19);
}
+
+struct fixed31_32 dc_fixpt_from_ux_dy(unsigned int value,
+ unsigned int integer_bits,
+ unsigned int fractional_bits)
+{
+ struct fixed31_32 fixpt_value = dc_fixpt_zero;
+ struct fixed31_32 fixpt_int_value = dc_fixpt_zero;
+ long long frac_mask = ((long long)1 << (long long)integer_bits) - 1;
+
+ fixpt_value.value = (long long)value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
+ frac_mask = frac_mask << fractional_bits;
+ fixpt_int_value.value = value & frac_mask;
+ fixpt_int_value.value <<= (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
+ fixpt_value.value |= fixpt_int_value.value;
+ return fixpt_value;
+}
+
+struct fixed31_32 dc_fixpt_from_int_dy(unsigned int int_value,
+ unsigned int frac_value,
+ unsigned int integer_bits,
+ unsigned int fractional_bits)
+{
+ struct fixed31_32 fixpt_value = dc_fixpt_from_int(int_value);
+
+ fixpt_value.value |= (long long)frac_value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
+ return fixpt_value;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/basics/vector.c b/drivers/gpu/drm/amd/display/dc/basics/vector.c
index 706c803c4d3b..b413a672c2c0 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/vector.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/vector.c
@@ -23,8 +23,6 @@
*
*/
-#include <linux/slab.h>
-
#include "dm_services.h"
#include "include/vector.h"
@@ -52,12 +50,11 @@ bool dal_vector_construct(
return true;
}
-static bool dal_vector_presized_costruct(
- struct vector *vector,
- struct dc_context *ctx,
- uint32_t count,
- void *initial_value,
- uint32_t struct_size)
+static bool dal_vector_presized_costruct(struct vector *vector,
+ struct dc_context *ctx,
+ uint32_t count,
+ void *initial_value,
+ uint32_t struct_size)
{
uint32_t i;
@@ -173,7 +170,7 @@ bool dal_vector_remove_at_index(
memmove(
vector->container + (index * vector->struct_size),
vector->container + ((index + 1) * vector->struct_size),
- (vector->count - index - 1) * vector->struct_size);
+ (size_t)(vector->count - index - 1) * vector->struct_size);
vector->count -= 1;
return true;
@@ -222,7 +219,7 @@ bool dal_vector_insert_at(
memmove(
insert_address + vector->struct_size,
insert_address,
- vector->struct_size * (vector->count - position));
+ (size_t)vector->struct_size * (vector->count - position));
memmove(
insert_address,
@@ -274,7 +271,7 @@ struct vector *dal_vector_clone(
/* copy vector's data */
memmove(vec_cloned->container, vector->container,
- vec_cloned->struct_size * vec_cloned->capacity);
+ (size_t)vec_cloned->struct_size * vec_cloned->capacity);
return vec_cloned;
}
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
index 9b8ea6e9a2b9..d1471f34e419 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
@@ -33,7 +33,6 @@
#include "include/gpio_service_interface.h"
#include "include/grph_object_ctrl_defs.h"
#include "include/bios_parser_interface.h"
-#include "include/i2caux_interface.h"
#include "include/logger_interface.h"
#include "command_table.h"
@@ -45,8 +44,6 @@
#include "bios_parser_common.h"
-#include "dc.h"
-
#define THREE_PERCENT_OF_10000 300
#define LAST_RECORD_TYPE 0xff
@@ -70,7 +67,9 @@ static ATOM_HPD_INT_RECORD *get_hpd_record(struct bios_parser *bp,
ATOM_OBJECT *object);
static struct device_id device_type_from_device_id(uint16_t device_id);
static uint32_t signal_to_ss_id(enum as_signal_type signal);
-static uint32_t get_support_mask_for_device_id(struct device_id device_id);
+static uint32_t get_support_mask_for_device_id(
+ enum dal_device_type device_type,
+ uint32_t enum_id);
static ATOM_ENCODER_CAP_RECORD_V2 *get_encoder_cap_record(
struct bios_parser *bp,
ATOM_OBJECT *object);
@@ -97,7 +96,7 @@ struct dc_bios *bios_parser_create(
struct bp_init_data *init,
enum dce_version dce_version)
{
- struct bios_parser *bp = NULL;
+ struct bios_parser *bp;
bp = kzalloc(sizeof(struct bios_parser), GFP_KERNEL);
if (!bp)
@@ -138,7 +137,9 @@ static uint8_t get_number_of_objects(struct bios_parser *bp, uint32_t offset)
uint32_t object_table_offset = bp->object_info_tbl_offset + offset;
- table = GET_IMAGE(ATOM_OBJECT_TABLE, object_table_offset);
+ table = ((ATOM_OBJECT_TABLE *) bios_get_image(&bp->base,
+ object_table_offset,
+ struct_size(table, asObjects, 1)));
if (!table)
return 0;
@@ -166,19 +167,17 @@ static struct graphics_object_id bios_parser_get_connector_id(
uint32_t connector_table_offset = bp->object_info_tbl_offset
+ le16_to_cpu(bp->object_info_tbl.v1_1->usConnectorObjectTableOffset);
- ATOM_OBJECT_TABLE *tbl =
- GET_IMAGE(ATOM_OBJECT_TABLE, connector_table_offset);
+ ATOM_OBJECT_TABLE *tbl = ((ATOM_OBJECT_TABLE *) bios_get_image(&bp->base,
+ connector_table_offset,
+ struct_size(tbl, asObjects, 1)));
if (!tbl) {
dm_error("Can't get connector table from atom bios.\n");
return object_id;
}
- if (tbl->ucNumberOfObjects <= i) {
- dm_error("Can't find connector id %d in connector table of size %d.\n",
- i, tbl->ucNumberOfObjects);
+ if (tbl->ucNumberOfObjects <= i)
return object_id;
- }
id = le16_to_cpu(tbl->asObjects[i].usObjectID);
object_id = object_id_from_bios_object_id(id);
@@ -444,6 +443,7 @@ static enum bp_result get_firmware_info_v1_4(
le32_to_cpu(firmware_info->ulMinPixelClockPLL_Output) * 10;
info->pll_info.max_output_pxl_clk_pll_frequency =
le32_to_cpu(firmware_info->ulMaxPixelClockPLL_Output) * 10;
+ info->max_pixel_clock = le16_to_cpu(firmware_info->usMaxPixelClock) * 10;
if (firmware_info->usFirmwareCapability.sbfAccess.MemoryClockSS_Support)
/* Since there is no information on the SS, report conservative
@@ -500,6 +500,7 @@ static enum bp_result get_firmware_info_v2_1(
info->external_clock_source_frequency_for_dp =
le16_to_cpu(firmwareInfo->usUniphyDPModeExtClkFreq) * 10;
info->min_allowed_bl_level = firmwareInfo->ucMinAllowedBL_Level;
+ info->max_pixel_clock = le16_to_cpu(firmwareInfo->usMaxPixelClock) * 10;
/* There should be only one entry in the SS info table for Memory Clock
*/
@@ -662,8 +663,12 @@ static enum bp_result get_ss_info_v3_1(
if (!DATA_TABLES(ASIC_InternalSS_Info))
return BP_RESULT_UNSUPPORTED;
- ss_table_header_include = GET_IMAGE(ATOM_ASIC_INTERNAL_SS_INFO_V3,
- DATA_TABLES(ASIC_InternalSS_Info));
+ ss_table_header_include = ((ATOM_ASIC_INTERNAL_SS_INFO_V3 *) bios_get_image(&bp->base,
+ DATA_TABLES(ASIC_InternalSS_Info),
+ struct_size(ss_table_header_include, asSpreadSpectrum, 1)));
+ if (!ss_table_header_include)
+ return BP_RESULT_UNSUPPORTED;
+
table_size =
(le16_to_cpu(ss_table_header_include->sHeader.usStructureSize)
- sizeof(ATOM_COMMON_TABLE_HEADER))
@@ -735,18 +740,94 @@ static enum bp_result bios_parser_transmitter_control(
return bp->cmd_tbl.transmitter_control(bp, cntl);
}
+static enum bp_result bios_parser_select_crtc_source(
+ struct dc_bios *dcb,
+ struct bp_crtc_source_select *bp_params)
+{
+ struct bios_parser *bp = BP_FROM_DCB(dcb);
+
+ if (!bp->cmd_tbl.select_crtc_source)
+ return BP_RESULT_FAILURE;
+
+ return bp->cmd_tbl.select_crtc_source(bp, bp_params);
+}
+
static enum bp_result bios_parser_encoder_control(
struct dc_bios *dcb,
struct bp_encoder_control *cntl)
{
struct bios_parser *bp = BP_FROM_DCB(dcb);
+ if (cntl->engine_id == ENGINE_ID_DACA) {
+ if (!bp->cmd_tbl.dac1_encoder_control)
+ return BP_RESULT_FAILURE;
+
+ return bp->cmd_tbl.dac1_encoder_control(
+ bp, cntl->action == ENCODER_CONTROL_ENABLE,
+ cntl->pixel_clock, ATOM_DAC1_PS2);
+ } else if (cntl->engine_id == ENGINE_ID_DACB) {
+ if (!bp->cmd_tbl.dac2_encoder_control)
+ return BP_RESULT_FAILURE;
+
+ return bp->cmd_tbl.dac2_encoder_control(
+ bp, cntl->action == ENCODER_CONTROL_ENABLE,
+ cntl->pixel_clock, ATOM_DAC1_PS2);
+ }
+
if (!bp->cmd_tbl.dig_encoder_control)
return BP_RESULT_FAILURE;
return bp->cmd_tbl.dig_encoder_control(bp, cntl);
}
+static enum bp_result bios_parser_dac_load_detection(
+ struct dc_bios *dcb,
+ enum engine_id engine_id,
+ enum dal_device_type device_type,
+ uint32_t enum_id)
+{
+ struct bios_parser *bp = BP_FROM_DCB(dcb);
+ struct dc_context *ctx = dcb->ctx;
+ struct bp_load_detection_parameters bp_params = {0};
+ enum bp_result bp_result;
+ uint32_t bios_0_scratch;
+ uint32_t device_id_mask = 0;
+
+ bp_params.engine_id = engine_id;
+ bp_params.device_id = get_support_mask_for_device_id(device_type, enum_id);
+
+ if (engine_id != ENGINE_ID_DACA &&
+ engine_id != ENGINE_ID_DACB)
+ return BP_RESULT_UNSUPPORTED;
+
+ if (!bp->cmd_tbl.dac_load_detection)
+ return BP_RESULT_UNSUPPORTED;
+
+ if (bp_params.device_id == ATOM_DEVICE_CRT1_SUPPORT)
+ device_id_mask = ATOM_S0_CRT1_MASK;
+ else if (bp_params.device_id == ATOM_DEVICE_CRT2_SUPPORT)
+ device_id_mask = ATOM_S0_CRT2_MASK;
+ else
+ return BP_RESULT_UNSUPPORTED;
+
+ /* BIOS will write the detected devices to BIOS_SCRATCH_0, clear corresponding bit */
+ bios_0_scratch = dm_read_reg(ctx, bp->base.regs->BIOS_SCRATCH_0);
+ bios_0_scratch &= ~device_id_mask;
+ dm_write_reg(ctx, bp->base.regs->BIOS_SCRATCH_0, bios_0_scratch);
+
+ bp_result = bp->cmd_tbl.dac_load_detection(bp, &bp_params);
+
+ if (bp_result != BP_RESULT_OK)
+ return bp_result;
+
+ bios_0_scratch = dm_read_reg(ctx, bp->base.regs->BIOS_SCRATCH_0);
+
+ if (bios_0_scratch & device_id_mask)
+ return BP_RESULT_OK;
+
+ return BP_RESULT_FAILURE;
+}
+
static enum bp_result bios_parser_adjust_pixel_clock(
struct dc_bios *dcb,
struct bp_adjust_pixel_clock_parameters *bp_params)
@@ -857,7 +938,7 @@ static bool bios_parser_is_device_id_supported(
{
struct bios_parser *bp = BP_FROM_DCB(dcb);
- uint32_t mask = get_support_mask_for_device_id(id);
+ uint32_t mask = get_support_mask_for_device_id(id.device_type, id.enum_id);
return (le16_to_cpu(bp->object_info_tbl.v1_1->usDeviceSupport) & mask) != 0;
}
@@ -1029,8 +1110,12 @@ static enum bp_result get_ss_info_from_internal_ss_info_tbl_V2_1(
if (!DATA_TABLES(ASIC_InternalSS_Info))
return result;
- header = GET_IMAGE(ATOM_ASIC_INTERNAL_SS_INFO_V2,
- DATA_TABLES(ASIC_InternalSS_Info));
+ header = ((ATOM_ASIC_INTERNAL_SS_INFO_V2 *) bios_get_image(
+ &bp->base,
+ DATA_TABLES(ASIC_InternalSS_Info),
+ struct_size(header, asSpreadSpectrum, 1)));
+ if (!header)
+ return result;
memset(info, 0, sizeof(struct spread_spectrum_info));
@@ -1104,6 +1189,8 @@ static enum bp_result get_ss_info_from_ss_info_table(
get_atom_data_table_revision(header, &revision);
tbl = GET_IMAGE(ATOM_SPREAD_SPECTRUM_INFO, DATA_TABLES(SS_Info));
+ if (!tbl)
+ return result;
if (1 != revision.major || 2 > revision.minor)
return result;
@@ -1631,6 +1718,8 @@ static uint32_t get_ss_entry_number_from_ss_info_tbl(
tbl = GET_IMAGE(ATOM_SPREAD_SPECTRUM_INFO,
DATA_TABLES(SS_Info));
+ if (!tbl)
+ return number;
if (1 != revision.major || 2 > revision.minor)
return number;
@@ -1709,8 +1798,12 @@ static uint32_t get_ss_entry_number_from_internal_ss_info_tbl_v2_1(
if (!DATA_TABLES(ASIC_InternalSS_Info))
return 0;
- header_include = GET_IMAGE(ATOM_ASIC_INTERNAL_SS_INFO_V2,
- DATA_TABLES(ASIC_InternalSS_Info));
+ header_include = ((ATOM_ASIC_INTERNAL_SS_INFO_V2 *) bios_get_image(
+ &bp->base,
+ DATA_TABLES(ASIC_InternalSS_Info),
+ struct_size(header_include, asSpreadSpectrum, 1)));
+ if (!header_include)
+ return 0;
size = (le16_to_cpu(header_include->sHeader.usStructureSize)
- sizeof(ATOM_COMMON_TABLE_HEADER))
@@ -1724,6 +1817,7 @@ static uint32_t get_ss_entry_number_from_internal_ss_info_tbl_v2_1(
return 0;
}
+
/**
* get_ss_entry_number_from_internal_ss_info_tbl_V3_1
* Get Number of SpreadSpectrum Entry from the ASIC_InternalSS_Info table of
@@ -1746,8 +1840,12 @@ static uint32_t get_ss_entry_number_from_internal_ss_info_tbl_V3_1(
if (!DATA_TABLES(ASIC_InternalSS_Info))
return number;
- header_include = GET_IMAGE(ATOM_ASIC_INTERNAL_SS_INFO_V3,
- DATA_TABLES(ASIC_InternalSS_Info));
+ header_include = ((ATOM_ASIC_INTERNAL_SS_INFO_V3 *) bios_get_image(&bp->base,
+ DATA_TABLES(ASIC_InternalSS_Info),
+ struct_size(header_include, asSpreadSpectrum, 1)));
+ if (!header_include)
+ return number;
+
size = (le16_to_cpu(header_include->sHeader.usStructureSize) -
sizeof(ATOM_COMMON_TABLE_HEADER)) /
sizeof(ATOM_ASIC_SS_ASSIGNMENT_V3);
@@ -1789,11 +1887,13 @@ static enum bp_result bios_parser_get_gpio_pin_info(
if (!DATA_TABLES(GPIO_Pin_LUT))
return BP_RESULT_BADBIOSTABLE;
- header = GET_IMAGE(ATOM_GPIO_PIN_LUT, DATA_TABLES(GPIO_Pin_LUT));
+ header = ((ATOM_GPIO_PIN_LUT *) bios_get_image(&bp->base,
+ DATA_TABLES(GPIO_Pin_LUT),
+ struct_size(header, asGPIO_Pin, 1)));
if (!header)
return BP_RESULT_BADBIOSTABLE;
- if (sizeof(ATOM_COMMON_TABLE_HEADER) + sizeof(ATOM_GPIO_PIN_LUT)
+ if (sizeof(ATOM_COMMON_TABLE_HEADER) + struct_size(header, asGPIO_Pin, 1)
> le16_to_cpu(header->sHeader.usStructureSize))
return BP_RESULT_BADBIOSTABLE;
@@ -1978,7 +2078,8 @@ static ATOM_OBJECT *get_bios_object(struct bios_parser *bp,
offset += bp->object_info_tbl_offset;
- tbl = GET_IMAGE(ATOM_OBJECT_TABLE, offset);
+ tbl = ((ATOM_OBJECT_TABLE *) bios_get_image(&bp->base, offset,
+ struct_size(tbl, asObjects, 1)));
if (!tbl)
return NULL;
@@ -2128,11 +2229,10 @@ static uint32_t signal_to_ss_id(enum as_signal_type signal)
return clk_id_ss;
}
-static uint32_t get_support_mask_for_device_id(struct device_id device_id)
+static uint32_t get_support_mask_for_device_id(
+ enum dal_device_type device_type,
+ uint32_t enum_id)
{
- enum dal_device_type device_type = device_id.device_type;
- uint32_t enum_id = device_id.enum_id;
-
switch (device_type) {
case DEVICE_TYPE_LCD:
switch (enum_id) {
@@ -2360,10 +2460,10 @@ static enum bp_result get_integrated_info_v8(
}
/*
- * get_integrated_info_v8
+ * get_integrated_info_v9
*
* @brief
- * Get V8 integrated BIOS information
+ * Get V9 integrated BIOS information
*
* @param
* bios_parser *bp - [in]BIOS parser handler to get master data table
@@ -2541,8 +2641,8 @@ static enum bp_result construct_integrated_info(
/* Sort voltage table from low to high*/
if (result == BP_RESULT_OK) {
- uint32_t i;
- uint32_t j;
+ int32_t i;
+ int32_t j;
for (i = 1; i < NUMBER_OF_DISP_CLK_VOLTAGE; ++i) {
for (j = i; j > 0; --j) {
@@ -2565,7 +2665,7 @@ static struct integrated_info *bios_parser_create_integrated_info(
struct dc_bios *dcb)
{
struct bios_parser *bp = BP_FROM_DCB(dcb);
- struct integrated_info *info = NULL;
+ struct integrated_info *info;
info = kzalloc(sizeof(struct integrated_info), GFP_KERNEL);
@@ -2582,11 +2682,10 @@ static struct integrated_info *bios_parser_create_integrated_info(
return NULL;
}
-static enum bp_result update_slot_layout_info(
- struct dc_bios *dcb,
- unsigned int i,
- struct slot_layout_info *slot_layout_info,
- unsigned int record_offset)
+static enum bp_result update_slot_layout_info(struct dc_bios *dcb,
+ unsigned int i,
+ struct slot_layout_info *slot_layout_info,
+ unsigned int record_offset)
{
unsigned int j;
struct bios_parser *bp;
@@ -2600,8 +2699,7 @@ static enum bp_result update_slot_layout_info(
for (;;) {
- record_header = (ATOM_COMMON_RECORD_HEADER *)
- GET_IMAGE(ATOM_COMMON_RECORD_HEADER, record_offset);
+ record_header = GET_IMAGE(ATOM_COMMON_RECORD_HEADER, record_offset);
if (record_header == NULL) {
result = BP_RESULT_BADBIOSTABLE;
break;
@@ -2615,7 +2713,7 @@ static enum bp_result update_slot_layout_info(
if (record_header->ucRecordType ==
ATOM_BRACKET_LAYOUT_RECORD_TYPE &&
- sizeof(ATOM_BRACKET_LAYOUT_RECORD)
+ struct_size(record, asConnInfo, 1)
<= record_header->ucRecordSize) {
record = (ATOM_BRACKET_LAYOUT_RECORD *)
(record_header);
@@ -2686,10 +2784,9 @@ static enum bp_result update_slot_layout_info(
}
-static enum bp_result get_bracket_layout_record(
- struct dc_bios *dcb,
- unsigned int bracket_layout_id,
- struct slot_layout_info *slot_layout_info)
+static enum bp_result get_bracket_layout_record(struct dc_bios *dcb,
+ unsigned int bracket_layout_id,
+ struct slot_layout_info *slot_layout_info)
{
unsigned int i;
unsigned int record_offset;
@@ -2709,8 +2806,9 @@ static enum bp_result get_bracket_layout_record(
genericTableOffset = bp->object_info_tbl_offset +
bp->object_info_tbl.v1_3->usMiscObjectTableOffset;
- object_table = (ATOM_OBJECT_TABLE *)
- GET_IMAGE(ATOM_OBJECT_TABLE, genericTableOffset);
+ object_table = ((ATOM_OBJECT_TABLE *) bios_get_image(&bp->base,
+ genericTableOffset,
+ struct_size(object_table, asObjects, 1)));
if (!object_table)
return BP_RESULT_FAILURE;
@@ -2737,6 +2835,7 @@ static enum bp_result bios_get_board_layout_info(
struct board_layout_info *board_layout_info)
{
unsigned int i;
+ struct bios_parser *bp;
enum bp_result record_result;
const unsigned int slot_index_to_vbios_id[MAX_BOARD_SLOTS] = {
@@ -2745,6 +2844,8 @@ static enum bp_result bios_get_board_layout_info(
0, 0
};
+ bp = BP_FROM_DCB(dcb);
+
if (board_layout_info == NULL) {
DC_LOG_DETECTION_EDID_PARSER("Invalid board_layout_info\n");
return BP_RESULT_BADINPUT;
@@ -2807,8 +2908,12 @@ static const struct dc_vbios_funcs vbios_funcs = {
.is_device_id_supported = bios_parser_is_device_id_supported,
/* COMMANDS */
+ .select_crtc_source = bios_parser_select_crtc_source,
+
.encoder_control = bios_parser_encoder_control,
+ .dac_load_detection = bios_parser_dac_load_detection,
+
.transmitter_control = bios_parser_transmitter_control,
.enable_crtc = bios_parser_enable_crtc,
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index a4bef4364afd..550a9f1d03f8 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -23,9 +23,8 @@
*
*/
-#include <linux/slab.h>
-
#include "dm_services.h"
+#include "core_types.h"
#include "ObjectID.h"
#include "atomfirmware.h"
@@ -33,7 +32,6 @@
#include "dc_bios_types.h"
#include "include/grph_object_ctrl_defs.h"
#include "include/bios_parser_interface.h"
-#include "include/i2caux_interface.h"
#include "include/logger_interface.h"
#include "command_table2.h"
@@ -46,38 +44,12 @@
#include "bios_parser_common.h"
-/* Temporarily add in defines until ObjectID.h patch is updated in a few days */
-#ifndef GENERIC_OBJECT_ID_BRACKET_LAYOUT
-#define GENERIC_OBJECT_ID_BRACKET_LAYOUT 0x05
-#endif /* GENERIC_OBJECT_ID_BRACKET_LAYOUT */
-
-#ifndef GENERICOBJECT_BRACKET_LAYOUT_ENUM_ID1
-#define GENERICOBJECT_BRACKET_LAYOUT_ENUM_ID1 \
- (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\
- GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
- GENERIC_OBJECT_ID_BRACKET_LAYOUT << OBJECT_ID_SHIFT)
-#endif /* GENERICOBJECT_BRACKET_LAYOUT_ENUM_ID1 */
-
-#ifndef GENERICOBJECT_BRACKET_LAYOUT_ENUM_ID2
-#define GENERICOBJECT_BRACKET_LAYOUT_ENUM_ID2 \
- (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\
- GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
- GENERIC_OBJECT_ID_BRACKET_LAYOUT << OBJECT_ID_SHIFT)
-#endif /* GENERICOBJECT_BRACKET_LAYOUT_ENUM_ID2 */
-
#define DC_LOGGER \
bp->base.ctx->logger
#define LAST_RECORD_TYPE 0xff
#define SMU9_SYSPLL0_ID 0
-struct i2c_id_config_access {
- uint8_t bfI2C_LineMux:4;
- uint8_t bfHW_EngineID:3;
- uint8_t bfHW_Capable:1;
- uint8_t ucAccess;
-};
-
static enum bp_result get_gpio_i2c_info(struct bios_parser *bp,
struct atom_i2c_record *record,
struct graphics_object_i2c_info *info);
@@ -103,6 +75,10 @@ static enum bp_result get_firmware_info_v3_4(
struct bios_parser *bp,
struct dc_firmware_info *info);
+static enum bp_result get_firmware_info_v3_5(
+ struct bios_parser *bp,
+ struct dc_firmware_info *info);
+
static struct atom_hpd_int_record *get_hpd_record(struct bios_parser *bp,
struct atom_display_object_path_v2 *object);
@@ -165,9 +141,21 @@ static uint8_t bios_parser_get_connectors_number(struct dc_bios *dcb)
unsigned int count = 0;
unsigned int i;
- for (i = 0; i < bp->object_info_tbl.v1_4->number_of_path; i++) {
- if (bp->object_info_tbl.v1_4->display_path[i].encoderobjid != 0)
- count++;
+ switch (bp->object_info_tbl.revision.minor) {
+ default:
+ case 4:
+ for (i = 0; i < bp->object_info_tbl.v1_4->number_of_path; i++)
+ if (bp->object_info_tbl.v1_4->display_path[i].encoderobjid != 0)
+ count++;
+
+ break;
+
+ case 5:
+ for (i = 0; i < bp->object_info_tbl.v1_5->number_of_path; i++)
+ if (bp->object_info_tbl.v1_5->display_path[i].encoderobjid != 0)
+ count++;
+
+ break;
}
return count;
}
@@ -182,16 +170,34 @@ static struct graphics_object_id bios_parser_get_connector_id(
struct object_info_table *tbl = &bp->object_info_tbl;
struct display_object_info_table_v1_4 *v1_4 = tbl->v1_4;
- if (v1_4->number_of_path > i) {
- /* If display_objid is generic object id, the encoderObj
- * /extencoderobjId should be 0
- */
- if (v1_4->display_path[i].encoderobjid != 0 &&
- v1_4->display_path[i].display_objid != 0)
- object_id = object_id_from_bios_object_id(
+ struct display_object_info_table_v1_5 *v1_5 = tbl->v1_5;
+
+ switch (bp->object_info_tbl.revision.minor) {
+ default:
+ case 4:
+ if (v1_4->number_of_path > i) {
+ /* If display_objid is generic object id, the encoderObj
+ * /extencoderobjId should be 0
+ */
+ if (v1_4->display_path[i].encoderobjid != 0 &&
+ v1_4->display_path[i].display_objid != 0)
+ object_id = object_id_from_bios_object_id(
v1_4->display_path[i].display_objid);
- }
+ }
+ break;
+ case 5:
+ if (v1_5->number_of_path > i) {
+ /* If display_objid is generic object id, the encoderObjId
+ * should be 0
+ */
+ if (v1_5->display_path[i].encoderobjid != 0 &&
+ v1_5->display_path[i].display_objid != 0)
+ object_id = object_id_from_bios_object_id(
+ v1_5->display_path[i].display_objid);
+ }
+ break;
+ }
return object_id;
}
@@ -201,8 +207,8 @@ static enum bp_result bios_parser_get_src_obj(struct dc_bios *dcb,
{
struct bios_parser *bp = BP_FROM_DCB(dcb);
unsigned int i;
- enum bp_result bp_result = BP_RESULT_BADINPUT;
- struct graphics_object_id obj_id = {0};
+ enum bp_result bp_result = BP_RESULT_BADINPUT;
+ struct graphics_object_id obj_id = { 0 };
struct object_info_table *tbl = &bp->object_info_tbl;
if (!src_object_id)
@@ -217,37 +223,84 @@ static enum bp_result bios_parser_get_src_obj(struct dc_bios *dcb,
* If found in for loop, should break.
* DAL2 implementation may be changed too
*/
- for (i = 0; i < tbl->v1_4->number_of_path; i++) {
- obj_id = object_id_from_bios_object_id(
- tbl->v1_4->display_path[i].encoderobjid);
- if (object_id.type == obj_id.type &&
- object_id.id == obj_id.id &&
- object_id.enum_id ==
- obj_id.enum_id) {
- *src_object_id =
- object_id_from_bios_object_id(0x1100);
- /* break; */
+ switch (bp->object_info_tbl.revision.minor) {
+ default:
+ case 4:
+ for (i = 0; i < tbl->v1_4->number_of_path; i++) {
+ obj_id = object_id_from_bios_object_id(
+ tbl->v1_4->display_path[i].encoderobjid);
+ if (object_id.type == obj_id.type &&
+ object_id.id == obj_id.id &&
+ object_id.enum_id == obj_id.enum_id) {
+ *src_object_id =
+ object_id_from_bios_object_id(
+ 0x1100);
+ /* break; */
+ }
}
+ bp_result = BP_RESULT_OK;
+ break;
+
+ case 5:
+ for (i = 0; i < tbl->v1_5->number_of_path; i++) {
+ obj_id = object_id_from_bios_object_id(
+ tbl->v1_5->display_path[i].encoderobjid);
+ if (object_id.type == obj_id.type &&
+ object_id.id == obj_id.id &&
+ object_id.enum_id == obj_id.enum_id) {
+ *src_object_id =
+ object_id_from_bios_object_id(
+ 0x1100);
+ /* break; */
+ }
+ }
+ bp_result = BP_RESULT_OK;
+ break;
}
- bp_result = BP_RESULT_OK;
break;
case OBJECT_TYPE_CONNECTOR:
- for (i = 0; i < tbl->v1_4->number_of_path; i++) {
- obj_id = object_id_from_bios_object_id(
- tbl->v1_4->display_path[i].display_objid);
-
- if (object_id.type == obj_id.type &&
- object_id.id == obj_id.id &&
- object_id.enum_id == obj_id.enum_id) {
- *src_object_id =
- object_id_from_bios_object_id(
- tbl->v1_4->display_path[i].encoderobjid);
- /* break; */
+ switch (bp->object_info_tbl.revision.minor) {
+ default:
+ case 4:
+ for (i = 0; i < tbl->v1_4->number_of_path; i++) {
+ obj_id = object_id_from_bios_object_id(
+ tbl->v1_4->display_path[i]
+ .display_objid);
+
+ if (object_id.type == obj_id.type &&
+ object_id.id == obj_id.id &&
+ object_id.enum_id == obj_id.enum_id) {
+ *src_object_id =
+ object_id_from_bios_object_id(
+ tbl->v1_4
+ ->display_path[i]
+ .encoderobjid);
+ /* break; */
+ }
}
+ bp_result = BP_RESULT_OK;
+ break;
}
bp_result = BP_RESULT_OK;
break;
+ case 5:
+ for (i = 0; i < tbl->v1_5->number_of_path; i++) {
+ obj_id = object_id_from_bios_object_id(
+ tbl->v1_5->display_path[i].display_objid);
+
+ if (object_id.type == obj_id.type &&
+ object_id.id == obj_id.id &&
+ object_id.enum_id == obj_id.enum_id) {
+ *src_object_id = object_id_from_bios_object_id(
+ tbl->v1_5->display_path[i].encoderobjid);
+ /* break; */
+ }
+ }
+ bp_result = BP_RESULT_OK;
+ break;
+
default:
+ bp_result = BP_RESULT_OK;
break;
}
@@ -290,12 +343,54 @@ static struct atom_display_object_path_v2 *get_bios_object(
}
}
+/* from graphics_object_id, find display path which includes the object_id */
+static struct atom_display_object_path_v3 *get_bios_object_from_path_v3(struct bios_parser *bp,
+ struct graphics_object_id id)
+{
+ unsigned int i;
+ struct graphics_object_id obj_id = {0};
+
+ switch (id.type) {
+ case OBJECT_TYPE_ENCODER:
+ for (i = 0; i < bp->object_info_tbl.v1_5->number_of_path; i++) {
+ obj_id = object_id_from_bios_object_id(
+ bp->object_info_tbl.v1_5->display_path[i].encoderobjid);
+ if (id.type == obj_id.type && id.id == obj_id.id
+ && id.enum_id == obj_id.enum_id)
+ return &bp->object_info_tbl.v1_5->display_path[i];
+ }
+ break;
+
+ case OBJECT_TYPE_CONNECTOR:
+ case OBJECT_TYPE_GENERIC:
+ /* Both Generic and Connector Object ID
+ * will be stored on display_objid
+ */
+ for (i = 0; i < bp->object_info_tbl.v1_5->number_of_path; i++) {
+ obj_id = object_id_from_bios_object_id(
+ bp->object_info_tbl.v1_5->display_path[i].display_objid);
+ if (id.type == obj_id.type && id.id == obj_id.id
+ && id.enum_id == obj_id.enum_id)
+ return &bp->object_info_tbl.v1_5->display_path[i];
+ }
+ break;
+
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
static enum bp_result bios_parser_get_i2c_info(struct dc_bios *dcb,
struct graphics_object_id id,
struct graphics_object_i2c_info *info)
{
uint32_t offset;
struct atom_display_object_path_v2 *object;
+
+ struct atom_display_object_path_v3 *object_path_v3;
+
struct atom_common_record_header *header;
struct atom_i2c_record *record;
struct atom_i2c_record dummy_record = {0};
@@ -313,12 +408,25 @@ static enum bp_result bios_parser_get_i2c_info(struct dc_bios *dcb,
return BP_RESULT_NORECORD;
}
- object = get_bios_object(bp, id);
+ switch (bp->object_info_tbl.revision.minor) {
+ case 4:
+ default:
+ object = get_bios_object(bp, id);
- if (!object)
- return BP_RESULT_BADINPUT;
+ if (!object)
+ return BP_RESULT_BADINPUT;
- offset = object->disp_recordoffset + bp->object_info_tbl_offset;
+ offset = object->disp_recordoffset + bp->object_info_tbl_offset;
+ break;
+ case 5:
+ object_path_v3 = get_bios_object_from_path_v3(bp, id);
+
+ if (!object_path_v3)
+ return BP_RESULT_BADINPUT;
+
+ offset = object_path_v3->disp_recordoffset + bp->object_info_tbl_offset;
+ break;
+ }
for (;;) {
header = GET_IMAGE(struct atom_common_record_header, offset);
@@ -356,6 +464,7 @@ static enum bp_result get_gpio_i2c_info(
uint32_t count = 0;
unsigned int table_index = 0;
bool find_valid = false;
+ struct atom_gpio_pin_assignment *pin;
if (!info)
return BP_RESULT_BADINPUT;
@@ -383,20 +492,17 @@ static enum bp_result get_gpio_i2c_info(
- sizeof(struct atom_common_table_header))
/ sizeof(struct atom_gpio_pin_assignment);
+ pin = (struct atom_gpio_pin_assignment *) header->gpio_pin;
+
for (table_index = 0; table_index < count; table_index++) {
- if (((record->i2c_id & I2C_HW_CAP) == (
- header->gpio_pin[table_index].gpio_id &
- I2C_HW_CAP)) &&
- ((record->i2c_id & I2C_HW_ENGINE_ID_MASK) ==
- (header->gpio_pin[table_index].gpio_id &
- I2C_HW_ENGINE_ID_MASK)) &&
- ((record->i2c_id & I2C_HW_LANE_MUX) ==
- (header->gpio_pin[table_index].gpio_id &
- I2C_HW_LANE_MUX))) {
+ if (((record->i2c_id & I2C_HW_CAP) == (pin->gpio_id & I2C_HW_CAP)) &&
+ ((record->i2c_id & I2C_HW_ENGINE_ID_MASK) == (pin->gpio_id & I2C_HW_ENGINE_ID_MASK)) &&
+ ((record->i2c_id & I2C_HW_LANE_MUX) == (pin->gpio_id & I2C_HW_LANE_MUX))) {
/* still valid */
find_valid = true;
break;
}
+ pin = (struct atom_gpio_pin_assignment *)((uint8_t *)pin + sizeof(struct atom_gpio_pin_assignment));
}
/* If we don't find the entry that we are looking for then
@@ -412,15 +518,46 @@ static enum bp_result get_gpio_i2c_info(
info->i2c_slave_address = record->i2c_slave_addr;
/* TODO: check how to get register offset for en, Y, etc. */
- info->gpio_info.clk_a_register_index =
- le16_to_cpu(
- header->gpio_pin[table_index].data_a_reg_index);
- info->gpio_info.clk_a_shift =
- header->gpio_pin[table_index].gpio_bitshift;
+ info->gpio_info.clk_a_register_index = le16_to_cpu(pin->data_a_reg_index);
+ info->gpio_info.clk_a_shift = pin->gpio_bitshift;
return BP_RESULT_OK;
}
+static struct atom_hpd_int_record *get_hpd_record_for_path_v3(struct bios_parser *bp,
+ struct atom_display_object_path_v3 *object)
+{
+ struct atom_common_record_header *header;
+ uint32_t offset;
+
+ if (!object) {
+ BREAK_TO_DEBUGGER(); /* Invalid object */
+ return NULL;
+ }
+
+ offset = object->disp_recordoffset + bp->object_info_tbl_offset;
+
+ for (;;) {
+ header = GET_IMAGE(struct atom_common_record_header, offset);
+
+ if (!header)
+ return NULL;
+
+ if (header->record_type == ATOM_RECORD_END_TYPE ||
+ !header->record_size)
+ break;
+
+ if (header->record_type == ATOM_HPD_INT_RECORD_TYPE
+ && sizeof(struct atom_hpd_int_record) <=
+ header->record_size)
+ return (struct atom_hpd_int_record *) header;
+
+ offset += header->record_size;
+ }
+
+ return NULL;
+}
+
static enum bp_result bios_parser_get_hpd_info(
struct dc_bios *dcb,
struct graphics_object_id id,
@@ -428,17 +565,31 @@ static enum bp_result bios_parser_get_hpd_info(
{
struct bios_parser *bp = BP_FROM_DCB(dcb);
struct atom_display_object_path_v2 *object;
+ struct atom_display_object_path_v3 *object_path_v3;
struct atom_hpd_int_record *record = NULL;
if (!info)
return BP_RESULT_BADINPUT;
- object = get_bios_object(bp, id);
+ switch (bp->object_info_tbl.revision.minor) {
+ case 4:
+ default:
+ object = get_bios_object(bp, id);
+
+ if (!object)
+ return BP_RESULT_BADINPUT;
- if (!object)
- return BP_RESULT_BADINPUT;
+ record = get_hpd_record(bp, object);
+ break;
+ case 5:
+ object_path_v3 = get_bios_object_from_path_v3(bp, id);
- record = get_hpd_record(bp, object);
+ if (!object_path_v3)
+ return BP_RESULT_BADINPUT;
+
+ record = get_hpd_record_for_path_v3(bp, object_path_v3);
+ break;
+ }
if (record != NULL) {
info->hpd_int_gpio_uid = record->pin_id;
@@ -526,25 +677,9 @@ static enum bp_result bios_parser_get_gpio_pin_info(
return BP_RESULT_UNSUPPORTED;
/* Temporary hard code gpio pin info */
-#if defined(FOR_SIMNOW_BOOT)
- {
- struct atom_gpio_pin_assignment gpio_pin[8] = {
- {0x5db5, 0, 0, 1, 0},
- {0x5db5, 8, 8, 2, 0},
- {0x5db5, 0x10, 0x10, 3, 0},
- {0x5db5, 0x18, 0x14, 4, 0},
- {0x5db5, 0x1A, 0x18, 5, 0},
- {0x5db5, 0x1C, 0x1C, 6, 0},
- };
-
- count = 6;
- memmove(header->gpio_pin, gpio_pin, sizeof(gpio_pin));
- }
-#else
count = (le16_to_cpu(header->table_header.structuresize)
- sizeof(struct atom_common_table_header))
/ sizeof(struct atom_gpio_pin_assignment);
-#endif
for (i = 0; i < count; ++i) {
if (header->gpio_pin[i].gpio_id != gpio_id)
continue;
@@ -633,19 +768,37 @@ static enum bp_result bios_parser_get_device_tag(
struct bios_parser *bp = BP_FROM_DCB(dcb);
struct atom_display_object_path_v2 *object;
+ struct atom_display_object_path_v3 *object_path_v3;
+
+
if (!info)
return BP_RESULT_BADINPUT;
- /* getBiosObject will return MXM object */
- object = get_bios_object(bp, connector_object_id);
+ switch (bp->object_info_tbl.revision.minor) {
+ case 4:
+ default:
+ /* getBiosObject will return MXM object */
+ object = get_bios_object(bp, connector_object_id);
- if (!object) {
- BREAK_TO_DEBUGGER(); /* Invalid object id */
- return BP_RESULT_BADINPUT;
- }
+ if (!object) {
+ BREAK_TO_DEBUGGER(); /* Invalid object id */
+ return BP_RESULT_BADINPUT;
+ }
- info->acpi_device = 0; /* BIOS no longer provides this */
- info->dev_id = device_type_from_device_id(object->device_tag);
+ info->acpi_device = 0; /* BIOS no longer provides this */
+ info->dev_id = device_type_from_device_id(object->device_tag);
+ break;
+ case 5:
+ object_path_v3 = get_bios_object_from_path_v3(bp, connector_object_id);
+
+ if (!object_path_v3) {
+ BREAK_TO_DEBUGGER(); /* Invalid object id */
+ return BP_RESULT_BADINPUT;
+ }
+ info->acpi_device = 0; /* BIOS no longer provides this */
+ info->dev_id = device_type_from_device_id(object_path_v3->device_tag);
+ break;
+ }
return BP_RESULT_OK;
}
@@ -685,6 +838,8 @@ static enum bp_result get_ss_info_v4_1(
disp_cntl_tbl->dvi_ss_rate_10hz * 10;
if (disp_cntl_tbl->dvi_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE)
ss_info->type.CENTER_MODE = true;
+
+ DC_LOG_BIOS("AS_SIGNAL_TYPE_DVI ss_percentage: %d\n", ss_info->spread_spectrum_percentage);
break;
case AS_SIGNAL_TYPE_HDMI:
ss_info->spread_spectrum_percentage =
@@ -693,6 +848,8 @@ static enum bp_result get_ss_info_v4_1(
disp_cntl_tbl->hdmi_ss_rate_10hz * 10;
if (disp_cntl_tbl->hdmi_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE)
ss_info->type.CENTER_MODE = true;
+
+ DC_LOG_BIOS("AS_SIGNAL_TYPE_HDMI ss_percentage: %d\n", ss_info->spread_spectrum_percentage);
break;
/* TODO LVDS not support anymore? */
case AS_SIGNAL_TYPE_DISPLAY_PORT:
@@ -702,6 +859,8 @@ static enum bp_result get_ss_info_v4_1(
disp_cntl_tbl->dp_ss_rate_10hz * 10;
if (disp_cntl_tbl->dp_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE)
ss_info->type.CENTER_MODE = true;
+
+ DC_LOG_BIOS("AS_SIGNAL_TYPE_DISPLAY_PORT ss_percentage: %d\n", ss_info->spread_spectrum_percentage);
break;
case AS_SIGNAL_TYPE_GPU_PLL:
/* atom_firmware: DAL only get data from dce_info table.
@@ -715,13 +874,15 @@ static enum bp_result get_ss_info_v4_1(
DATA_TABLES(smu_info));
if (!smu_info)
return BP_RESULT_BADBIOSTABLE;
-
+ DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info->gpuclk_ss_percentage);
ss_info->spread_spectrum_percentage =
smu_info->waflclk_ss_percentage;
ss_info->spread_spectrum_range =
smu_info->gpuclk_ss_rate_10hz * 10;
if (smu_info->waflclk_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE)
ss_info->type.CENTER_MODE = true;
+
+ DC_LOG_BIOS("AS_SIGNAL_TYPE_XGMI ss_percentage: %d\n", ss_info->spread_spectrum_percentage);
break;
default:
result = BP_RESULT_UNSUPPORTED;
@@ -758,6 +919,7 @@ static enum bp_result get_ss_info_v4_2(
if (!smu_info)
return BP_RESULT_BADBIOSTABLE;
+ DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info->gpuclk_ss_percentage);
ss_info->type.STEP_AND_DELAY_INFO = false;
ss_info->spread_percentage_divider = 1000;
/* BIOS no longer uses target clock. Always enable for now */
@@ -771,6 +933,8 @@ static enum bp_result get_ss_info_v4_2(
disp_cntl_tbl->dvi_ss_rate_10hz * 10;
if (disp_cntl_tbl->dvi_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE)
ss_info->type.CENTER_MODE = true;
+
+ DC_LOG_BIOS("AS_SIGNAL_TYPE_DVI ss_percentage: %d\n", ss_info->spread_spectrum_percentage);
break;
case AS_SIGNAL_TYPE_HDMI:
ss_info->spread_spectrum_percentage =
@@ -779,6 +943,8 @@ static enum bp_result get_ss_info_v4_2(
disp_cntl_tbl->hdmi_ss_rate_10hz * 10;
if (disp_cntl_tbl->hdmi_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE)
ss_info->type.CENTER_MODE = true;
+
+ DC_LOG_BIOS("AS_SIGNAL_TYPE_HDMI ss_percentage: %d\n", ss_info->spread_spectrum_percentage);
break;
/* TODO LVDS not support anymore? */
case AS_SIGNAL_TYPE_DISPLAY_PORT:
@@ -788,6 +954,8 @@ static enum bp_result get_ss_info_v4_2(
smu_info->gpuclk_ss_rate_10hz * 10;
if (smu_info->gpuclk_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE)
ss_info->type.CENTER_MODE = true;
+
+ DC_LOG_BIOS("AS_SIGNAL_TYPE_DISPLAY_PORT ss_percentage: %d\n", ss_info->spread_spectrum_percentage);
break;
case AS_SIGNAL_TYPE_GPU_PLL:
/* atom_firmware: DAL only get data from dce_info table.
@@ -803,6 +971,84 @@ static enum bp_result get_ss_info_v4_2(
return result;
}
+static enum bp_result get_ss_info_v4_5(
+ struct bios_parser *bp,
+ uint32_t id,
+ uint32_t index,
+ struct spread_spectrum_info *ss_info)
+{
+ enum bp_result result = BP_RESULT_OK;
+ struct atom_display_controller_info_v4_5 *disp_cntl_tbl = NULL;
+
+ if (!ss_info)
+ return BP_RESULT_BADINPUT;
+
+ if (!DATA_TABLES(dce_info))
+ return BP_RESULT_BADBIOSTABLE;
+
+ disp_cntl_tbl = GET_IMAGE(struct atom_display_controller_info_v4_5,
+ DATA_TABLES(dce_info));
+ if (!disp_cntl_tbl)
+ return BP_RESULT_BADBIOSTABLE;
+
+ ss_info->type.STEP_AND_DELAY_INFO = false;
+ ss_info->spread_percentage_divider = 1000;
+ /* BIOS no longer uses target clock. Always enable for now */
+ ss_info->target_clock_range = 0xffffffff;
+
+ switch (id) {
+ case AS_SIGNAL_TYPE_DVI:
+ ss_info->spread_spectrum_percentage =
+ disp_cntl_tbl->dvi_ss_percentage;
+ ss_info->spread_spectrum_range =
+ disp_cntl_tbl->dvi_ss_rate_10hz * 10;
+ if (disp_cntl_tbl->dvi_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE)
+ ss_info->type.CENTER_MODE = true;
+
+ DC_LOG_BIOS("AS_SIGNAL_TYPE_DVI ss_percentage: %d\n", ss_info->spread_spectrum_percentage);
+ break;
+ case AS_SIGNAL_TYPE_HDMI:
+ ss_info->spread_spectrum_percentage =
+ disp_cntl_tbl->hdmi_ss_percentage;
+ ss_info->spread_spectrum_range =
+ disp_cntl_tbl->hdmi_ss_rate_10hz * 10;
+ if (disp_cntl_tbl->hdmi_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE)
+ ss_info->type.CENTER_MODE = true;
+
+ DC_LOG_BIOS("AS_SIGNAL_TYPE_HDMI ss_percentage: %d\n", ss_info->spread_spectrum_percentage);
+ break;
+ case AS_SIGNAL_TYPE_DISPLAY_PORT:
+ if (bp->base.integrated_info) {
+ DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", bp->base.integrated_info->gpuclk_ss_percentage);
+ ss_info->spread_spectrum_percentage =
+ bp->base.integrated_info->gpuclk_ss_percentage;
+ ss_info->type.CENTER_MODE =
+ bp->base.integrated_info->gpuclk_ss_type;
+ } else {
+ ss_info->spread_spectrum_percentage =
+ disp_cntl_tbl->dp_ss_percentage;
+ ss_info->spread_spectrum_range =
+ disp_cntl_tbl->dp_ss_rate_10hz * 10;
+ if (disp_cntl_tbl->dp_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE)
+ ss_info->type.CENTER_MODE = true;
+ }
+ DC_LOG_BIOS("AS_SIGNAL_TYPE_DISPLAY_PORT ss_percentage: %d\n", ss_info->spread_spectrum_percentage);
+ break;
+ case AS_SIGNAL_TYPE_GPU_PLL:
+ /* atom_smu_info_v4_0 does not have fields for SS for SMU Display PLL anymore.
+ * SMU Display PLL supposed to be without spread.
+ * Better place for it would be in atom_display_controller_info_v4_5 table.
+ */
+ result = BP_RESULT_UNSUPPORTED;
+ break;
+ default:
+ result = BP_RESULT_UNSUPPORTED;
+ break;
+ }
+
+ return result;
+}
+
/**
* bios_parser_get_spread_spectrum_info
* Get spread spectrum information from the ASIC_InternalSS_Info(ver 2.1 or
@@ -847,6 +1093,9 @@ static enum bp_result bios_parser_get_spread_spectrum_info(
case 3:
case 4:
return get_ss_info_v4_2(bp, signal, index, ss_info);
+ case 5:
+ return get_ss_info_v4_5(bp, signal, index, ss_info);
+
default:
ASSERT(0);
break;
@@ -887,6 +1136,31 @@ static enum bp_result get_soc_bb_info_v4_4(
return result;
}
+static enum bp_result get_soc_bb_info_v4_5(
+ struct bios_parser *bp,
+ struct bp_soc_bb_info *soc_bb_info)
+{
+ enum bp_result result = BP_RESULT_OK;
+ struct atom_display_controller_info_v4_5 *disp_cntl_tbl = NULL;
+
+ if (!soc_bb_info)
+ return BP_RESULT_BADINPUT;
+
+ if (!DATA_TABLES(dce_info))
+ return BP_RESULT_BADBIOSTABLE;
+
+ disp_cntl_tbl = GET_IMAGE(struct atom_display_controller_info_v4_5,
+ DATA_TABLES(dce_info));
+ if (!disp_cntl_tbl)
+ return BP_RESULT_BADBIOSTABLE;
+
+ soc_bb_info->dram_clock_change_latency_100ns = disp_cntl_tbl->max_mclk_chg_lat;
+ soc_bb_info->dram_sr_enter_exit_latency_100ns = disp_cntl_tbl->max_sr_enter_exit_lat;
+ soc_bb_info->dram_sr_exit_latency_100ns = disp_cntl_tbl->max_sr_exit_lat;
+
+ return result;
+}
+
static enum bp_result bios_parser_get_soc_bb_info(
struct dc_bios *dcb,
struct bp_soc_bb_info *soc_bb_info)
@@ -916,6 +1190,9 @@ static enum bp_result bios_parser_get_soc_bb_info(
case 4:
result = get_soc_bb_info_v4_4(bp, soc_bb_info);
break;
+ case 5:
+ result = get_soc_bb_info_v4_5(bp, soc_bb_info);
+ break;
default:
break;
}
@@ -1023,6 +1300,30 @@ static enum bp_result get_disp_caps_v4_4(
return result;
}
+static enum bp_result get_disp_caps_v4_5(
+ struct bios_parser *bp,
+ uint8_t *dce_caps)
+{
+ enum bp_result result = BP_RESULT_OK;
+ struct atom_display_controller_info_v4_5 *disp_cntl_tbl = NULL;
+
+ if (!dce_caps)
+ return BP_RESULT_BADINPUT;
+
+ if (!DATA_TABLES(dce_info))
+ return BP_RESULT_BADBIOSTABLE;
+
+ disp_cntl_tbl = GET_IMAGE(struct atom_display_controller_info_v4_5,
+ DATA_TABLES(dce_info));
+
+ if (!disp_cntl_tbl)
+ return BP_RESULT_BADBIOSTABLE;
+
+ *dce_caps = disp_cntl_tbl->display_caps;
+
+ return result;
+}
+
static enum bp_result bios_parser_get_lttpr_interop(
struct dc_bios *dcb,
uint8_t *dce_caps)
@@ -1057,6 +1358,11 @@ static enum bp_result bios_parser_get_lttpr_interop(
result = get_disp_caps_v4_4(bp, dce_caps);
*dce_caps = !!(*dce_caps & DCE_INFO_CAPS_VBIOS_LTTPR_TRANSPARENT_ENABLE);
break;
+ case 5:
+ result = get_disp_caps_v4_5(bp, dce_caps);
+ *dce_caps = !!(*dce_caps & DCE_INFO_CAPS_VBIOS_LTTPR_TRANSPARENT_ENABLE);
+ break;
+
default:
break;
}
@@ -1064,7 +1370,7 @@ static enum bp_result bios_parser_get_lttpr_interop(
default:
break;
}
-
+ DC_LOG_BIOS("DCE_INFO_CAPS_VBIOS_LTTPR_TRANSPARENT_ENABLE: %d tbl_revision.major = %d tbl_revision.minor = %d\n", *dce_caps, tbl_revision.major, tbl_revision.minor);
return result;
}
@@ -1080,6 +1386,7 @@ static enum bp_result bios_parser_get_lttpr_caps(
if (!DATA_TABLES(dce_info))
return BP_RESULT_UNSUPPORTED;
+ *dce_caps = 0;
header = GET_IMAGE(struct atom_common_table_header,
DATA_TABLES(dce_info));
get_atom_data_table_revision(header, &tbl_revision);
@@ -1102,6 +1409,10 @@ static enum bp_result bios_parser_get_lttpr_caps(
result = get_disp_caps_v4_4(bp, dce_caps);
*dce_caps = !!(*dce_caps & DCE_INFO_CAPS_LTTPR_SUPPORT_ENABLE);
break;
+ case 5:
+ result = get_disp_caps_v4_5(bp, dce_caps);
+ *dce_caps = !!(*dce_caps & DCE_INFO_CAPS_LTTPR_SUPPORT_ENABLE);
+ break;
default:
break;
}
@@ -1109,7 +1420,11 @@ static enum bp_result bios_parser_get_lttpr_caps(
default:
break;
}
-
+ DC_LOG_BIOS("DCE_INFO_CAPS_LTTPR_SUPPORT_ENABLE: %d tbl_revision.major = %d tbl_revision.minor = %d\n", *dce_caps, tbl_revision.major, tbl_revision.minor);
+ if (dcb->ctx->dc->config.force_bios_enable_lttpr && *dce_caps == 0) {
+ *dce_caps = 1;
+ DC_LOG_BIOS("DCE_INFO_CAPS_VBIOS_LTTPR_TRANSPARENT_ENABLE: forced enabled");
+ }
return result;
}
@@ -1165,10 +1480,10 @@ static enum bp_result get_embedded_panel_info_v2_1(
/* not provided by VBIOS */
info->lcd_timing.misc_info.HORIZONTAL_CUT_OFF = 0;
- info->lcd_timing.misc_info.H_SYNC_POLARITY = ~(uint32_t) (lvds->lcd_timing.miscinfo
- & ATOM_HSYNC_POLARITY);
- info->lcd_timing.misc_info.V_SYNC_POLARITY = ~(uint32_t) (lvds->lcd_timing.miscinfo
- & ATOM_VSYNC_POLARITY);
+ info->lcd_timing.misc_info.H_SYNC_POLARITY = !(lvds->lcd_timing.miscinfo &
+ ATOM_HSYNC_POLARITY);
+ info->lcd_timing.misc_info.V_SYNC_POLARITY = !(lvds->lcd_timing.miscinfo &
+ ATOM_VSYNC_POLARITY);
/* not provided by VBIOS */
info->lcd_timing.misc_info.VERTICAL_CUT_OFF = 0;
@@ -1274,8 +1589,15 @@ static bool bios_parser_is_device_id_supported(
uint32_t mask = get_support_mask_for_device_id(id);
- return (le16_to_cpu(bp->object_info_tbl.v1_4->supporteddevices) &
- mask) != 0;
+ switch (bp->object_info_tbl.revision.minor) {
+ case 4:
+ default:
+ return (le16_to_cpu(bp->object_info_tbl.v1_4->supporteddevices) & mask) != 0;
+ break;
+ case 5:
+ return (le16_to_cpu(bp->object_info_tbl.v1_5->supporteddevices) & mask) != 0;
+ break;
+ }
}
static uint32_t bios_parser_get_ss_entry_number(
@@ -1378,14 +1700,15 @@ static enum bp_result bios_parser_enable_disp_power_gating(
static enum bp_result bios_parser_enable_lvtma_control(
struct dc_bios *dcb,
uint8_t uc_pwr_on,
- uint8_t panel_instance)
+ uint8_t pwrseq_instance,
+ uint8_t bypass_panel_control_wait)
{
struct bios_parser *bp = BP_FROM_DCB(dcb);
if (!bp->cmd_tbl.enable_lvtma_control)
return BP_RESULT_FAILURE;
- return bp->cmd_tbl.enable_lvtma_control(bp, uc_pwr_on, panel_instance);
+ return bp->cmd_tbl.enable_lvtma_control(bp, uc_pwr_on, pwrseq_instance, bypass_panel_control_wait);
}
static bool bios_parser_is_accelerated_mode(
@@ -1413,7 +1736,7 @@ static enum bp_result bios_parser_get_firmware_info(
struct dc_firmware_info *info)
{
struct bios_parser *bp = BP_FROM_DCB(dcb);
- enum bp_result result = BP_RESULT_BADBIOSTABLE;
+ static enum bp_result result = BP_RESULT_BADBIOSTABLE;
struct atom_common_table_header *header;
struct atom_data_revision revision;
@@ -1431,10 +1754,13 @@ static enum bp_result bios_parser_get_firmware_info(
case 2:
case 3:
result = get_firmware_info_v3_2(bp, info);
- break;
+ break;
case 4:
result = get_firmware_info_v3_4(bp, info);
break;
+ case 5:
+ result = get_firmware_info_v3_5(bp, info);
+ break;
default:
break;
}
@@ -1452,6 +1778,7 @@ static enum bp_result get_firmware_info_v3_1(
struct dc_firmware_info *info)
{
struct atom_firmware_info_v3_1 *firmware_info;
+ struct atom_firmware_info_v3_2 *firmware_info32;
struct atom_display_controller_info_v4_1 *dce_info = NULL;
if (!info)
@@ -1459,11 +1786,13 @@ static enum bp_result get_firmware_info_v3_1(
firmware_info = GET_IMAGE(struct atom_firmware_info_v3_1,
DATA_TABLES(firmwareinfo));
+ firmware_info32 = GET_IMAGE(struct atom_firmware_info_v3_2,
+ DATA_TABLES(firmwareinfo));
dce_info = GET_IMAGE(struct atom_display_controller_info_v4_1,
DATA_TABLES(dce_info));
- if (!firmware_info || !dce_info)
+ if (!firmware_info || !firmware_info32 || !dce_info)
return BP_RESULT_BADBIOSTABLE;
memset(info, 0, sizeof(*info));
@@ -1491,7 +1820,15 @@ static enum bp_result get_firmware_info_v3_1(
bp->cmd_tbl.get_smu_clock_info(bp, SMU9_SYSPLL0_ID) * 10;
}
- info->oem_i2c_present = false;
+ /* These fields are marked as reserved in v3_1, but they appear to be populated
+ * properly.
+ */
+ if (firmware_info32 && firmware_info32->board_i2c_feature_id == 0x2) {
+ info->oem_i2c_present = true;
+ info->oem_i2c_obj_id = firmware_info32->board_i2c_feature_gpio_id;
+ } else {
+ info->oem_i2c_present = false;
+ }
return BP_RESULT_OK;
}
@@ -1529,19 +1866,21 @@ static enum bp_result get_firmware_info_v3_2(
/* Vega12 */
smu_info_v3_2 = GET_IMAGE(struct atom_smu_info_v3_2,
DATA_TABLES(smu_info));
-
if (!smu_info_v3_2)
return BP_RESULT_BADBIOSTABLE;
+ DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_2->gpuclk_ss_percentage);
+
info->default_engine_clk = smu_info_v3_2->bootup_dcefclk_10khz * 10;
} else if (revision.minor == 3) {
/* Vega20 */
smu_info_v3_3 = GET_IMAGE(struct atom_smu_info_v3_3,
DATA_TABLES(smu_info));
-
if (!smu_info_v3_3)
return BP_RESULT_BADBIOSTABLE;
+ DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_3->gpuclk_ss_percentage);
+
info->default_engine_clk = smu_info_v3_3->bootup_dcefclk_10khz * 10;
}
@@ -1590,6 +1929,11 @@ static enum bp_result get_firmware_info_v3_4(
struct atom_data_revision revision;
struct atom_display_controller_info_v4_1 *dce_info_v4_1 = NULL;
struct atom_display_controller_info_v4_4 *dce_info_v4_4 = NULL;
+
+ struct atom_smu_info_v3_5 *smu_info_v3_5 = NULL;
+ struct atom_display_controller_info_v4_5 *dce_info_v4_5 = NULL;
+ struct atom_smu_info_v4_0 *smu_info_v4_0 = NULL;
+
if (!info)
return BP_RESULT_BADINPUT;
@@ -1609,6 +1953,22 @@ static enum bp_result get_firmware_info_v3_4(
switch (revision.major) {
case 4:
switch (revision.minor) {
+ case 5:
+ dce_info_v4_5 = GET_IMAGE(struct atom_display_controller_info_v4_5,
+ DATA_TABLES(dce_info));
+
+ if (!dce_info_v4_5)
+ return BP_RESULT_BADBIOSTABLE;
+
+ /* 100MHz expected */
+ info->pll_info.crystal_frequency = dce_info_v4_5->dce_refclk_10khz * 10;
+ info->dp_phy_ref_clk = dce_info_v4_5->dpphy_refclk_10khz * 10;
+ /* 50MHz expected */
+ info->i2c_engine_ref_clk = dce_info_v4_5->i2c_engine_refclk_10khz * 10;
+
+ /* For DCN32/321 Display PLL VCO Frequency from dce_info_v4_5 may not be reliable */
+ break;
+
case 4:
dce_info_v4_4 = GET_IMAGE(struct atom_display_controller_info_v4_4,
DATA_TABLES(dce_info));
@@ -1650,6 +2010,45 @@ static enum bp_result get_firmware_info_v3_4(
DATA_TABLES(smu_info));
get_atom_data_table_revision(header, &revision);
+ switch (revision.major) {
+ case 3:
+ switch (revision.minor) {
+ case 5:
+ smu_info_v3_5 = GET_IMAGE(struct atom_smu_info_v3_5,
+ DATA_TABLES(smu_info));
+
+ if (!smu_info_v3_5)
+ return BP_RESULT_BADBIOSTABLE;
+ DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_5->gpuclk_ss_percentage);
+ info->default_engine_clk = smu_info_v3_5->bootup_dcefclk_10khz * 10;
+ break;
+
+ default:
+ break;
+ }
+ break;
+
+ case 4:
+ switch (revision.minor) {
+ case 0:
+ smu_info_v4_0 = GET_IMAGE(struct atom_smu_info_v4_0,
+ DATA_TABLES(smu_info));
+
+ if (!smu_info_v4_0)
+ return BP_RESULT_BADBIOSTABLE;
+
+ /* For DCN32/321 bootup DCFCLK from smu_info_v4_0 may not be reliable */
+ break;
+
+ default:
+ break;
+ }
+ break;
+
+ default:
+ break;
+ }
+
// We need to convert from 10KHz units into KHz units.
info->default_memory_clk = firmware_info->bootup_mclk_in10khz * 10;
@@ -1663,6 +2062,63 @@ static enum bp_result get_firmware_info_v3_4(
return BP_RESULT_OK;
}
+static enum bp_result get_firmware_info_v3_5(
+ struct bios_parser *bp,
+ struct dc_firmware_info *info)
+{
+ struct atom_firmware_info_v3_5 *firmware_info;
+ struct atom_common_table_header *header;
+ struct atom_data_revision revision;
+ struct atom_display_controller_info_v4_5 *dce_info_v4_5 = NULL;
+
+ if (!info)
+ return BP_RESULT_BADINPUT;
+
+ firmware_info = GET_IMAGE(struct atom_firmware_info_v3_5,
+ DATA_TABLES(firmwareinfo));
+
+ if (!firmware_info)
+ return BP_RESULT_BADBIOSTABLE;
+
+ memset(info, 0, sizeof(*info));
+
+ if (firmware_info->board_i2c_feature_id == 0x2) {
+ info->oem_i2c_present = true;
+ info->oem_i2c_obj_id = firmware_info->board_i2c_feature_gpio_id;
+ } else {
+ info->oem_i2c_present = false;
+ }
+
+ header = GET_IMAGE(struct atom_common_table_header,
+ DATA_TABLES(dce_info));
+
+ get_atom_data_table_revision(header, &revision);
+
+ switch (revision.major) {
+ case 4:
+ switch (revision.minor) {
+ case 5:
+ dce_info_v4_5 = GET_IMAGE(struct atom_display_controller_info_v4_5,
+ DATA_TABLES(dce_info));
+
+ if (!dce_info_v4_5)
+ return BP_RESULT_BADBIOSTABLE;
+
+ /* 100MHz expected */
+ info->pll_info.crystal_frequency = dce_info_v4_5->dce_refclk_10khz * 10;
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+
+
+ return BP_RESULT_OK;
+}
+
static enum bp_result bios_parser_get_encoder_cap_info(
struct dc_bios *dcb,
struct graphics_object_id object_id,
@@ -1675,6 +2131,12 @@ static enum bp_result bios_parser_get_encoder_cap_info(
if (!info)
return BP_RESULT_BADINPUT;
+#if defined(CONFIG_DRM_AMD_DC_FP)
+ /* encoder cap record not available in v1_5 */
+ if (bp->object_info_tbl.revision.minor == 5)
+ return BP_RESULT_NORECORD;
+#endif
+
object = get_bios_object(bp, object_id);
if (!object)
@@ -1683,6 +2145,7 @@ static enum bp_result bios_parser_get_encoder_cap_info(
record = get_encoder_cap_record(bp, object);
if (!record)
return BP_RESULT_NORECORD;
+ DC_LOG_BIOS("record->encodercaps 0x%x for object_id 0x%x", record->encodercaps, object_id.id);
info->DP_HBR2_CAP = (record->encodercaps &
ATOM_ENCODER_CAP_RECORD_HBR2) ? 1 : 0;
@@ -1692,7 +2155,6 @@ static enum bp_result bios_parser_get_encoder_cap_info(
ATOM_ENCODER_CAP_RECORD_HBR3_EN) ? 1 : 0;
info->HDMI_6GB_EN = (record->encodercaps &
ATOM_ENCODER_CAP_RECORD_HDMI6Gbps_EN) ? 1 : 0;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
info->IS_DP2_CAPABLE = (record->encodercaps &
ATOM_ENCODER_CAP_RECORD_DP2) ? 1 : 0;
info->DP_UHBR10_EN = (record->encodercaps &
@@ -1701,9 +2163,9 @@ static enum bp_result bios_parser_get_encoder_cap_info(
ATOM_ENCODER_CAP_RECORD_UHBR13_5_EN) ? 1 : 0;
info->DP_UHBR20_EN = (record->encodercaps &
ATOM_ENCODER_CAP_RECORD_UHBR20_EN) ? 1 : 0;
-#endif
info->DP_IS_USB_C = (record->encodercaps &
ATOM_ENCODER_CAP_RECORD_USB_C_TYPE) ? 1 : 0;
+ DC_LOG_BIOS("\t info->DP_IS_USB_C %d", info->DP_IS_USB_C);
return BP_RESULT_OK;
}
@@ -1783,6 +2245,41 @@ static struct atom_disp_connector_caps_record *get_disp_connector_caps_record(
return NULL;
}
+static struct atom_connector_caps_record *get_connector_caps_record(struct bios_parser *bp,
+ struct atom_display_object_path_v3 *object)
+{
+ struct atom_common_record_header *header;
+ uint32_t offset;
+
+ if (!object) {
+ BREAK_TO_DEBUGGER(); /* Invalid object */
+ return NULL;
+ }
+
+ offset = object->disp_recordoffset + bp->object_info_tbl_offset;
+
+ for (;;) {
+ header = GET_IMAGE(struct atom_common_record_header, offset);
+
+ if (!header)
+ return NULL;
+
+ offset += header->record_size;
+
+ if (header->record_type == ATOM_RECORD_END_TYPE ||
+ !header->record_size)
+ break;
+
+ if (header->record_type != ATOM_CONNECTOR_CAP_RECORD_TYPE)
+ continue;
+
+ if (sizeof(struct atom_connector_caps_record) <= header->record_size)
+ return (struct atom_connector_caps_record *)header;
+ }
+
+ return NULL;
+}
+
static enum bp_result bios_parser_get_disp_connector_caps_info(
struct dc_bios *dcb,
struct graphics_object_id object_id,
@@ -1790,25 +2287,113 @@ static enum bp_result bios_parser_get_disp_connector_caps_info(
{
struct bios_parser *bp = BP_FROM_DCB(dcb);
struct atom_display_object_path_v2 *object;
+ struct atom_display_object_path_v3 *object_path_v3;
+ struct atom_connector_caps_record *record_path_v3;
struct atom_disp_connector_caps_record *record = NULL;
if (!info)
return BP_RESULT_BADINPUT;
- object = get_bios_object(bp, object_id);
+ switch (bp->object_info_tbl.revision.minor) {
+ case 4:
+ default:
+ object = get_bios_object(bp, object_id);
- if (!object)
+ if (!object)
+ return BP_RESULT_BADINPUT;
+
+ record = get_disp_connector_caps_record(bp, object);
+ if (!record)
+ return BP_RESULT_NORECORD;
+
+ info->INTERNAL_DISPLAY =
+ (record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY) ? 1 : 0;
+ info->INTERNAL_DISPLAY_BL =
+ (record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY_BL) ? 1 : 0;
+ break;
+ case 5:
+ object_path_v3 = get_bios_object_from_path_v3(bp, object_id);
+
+ if (!object_path_v3)
+ return BP_RESULT_BADINPUT;
+
+ record_path_v3 = get_connector_caps_record(bp, object_path_v3);
+ if (!record_path_v3)
+ return BP_RESULT_NORECORD;
+
+ info->INTERNAL_DISPLAY = (record_path_v3->connector_caps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY)
+ ? 1 : 0;
+ info->INTERNAL_DISPLAY_BL = (record_path_v3->connector_caps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY_BL)
+ ? 1 : 0;
+ break;
+ }
+
+ return BP_RESULT_OK;
+}
+
+static struct atom_connector_speed_record *get_connector_speed_cap_record(struct bios_parser *bp,
+ struct atom_display_object_path_v3 *object)
+{
+ struct atom_common_record_header *header;
+ uint32_t offset;
+
+ if (!object) {
+ BREAK_TO_DEBUGGER(); /* Invalid object */
+ return NULL;
+ }
+
+ offset = object->disp_recordoffset + bp->object_info_tbl_offset;
+
+ for (;;) {
+ header = GET_IMAGE(struct atom_common_record_header, offset);
+
+ if (!header)
+ return NULL;
+
+ offset += header->record_size;
+
+ if (header->record_type == ATOM_RECORD_END_TYPE ||
+ !header->record_size)
+ break;
+
+ if (header->record_type != ATOM_CONNECTOR_SPEED_UPTO)
+ continue;
+
+ if (sizeof(struct atom_connector_speed_record) <= header->record_size)
+ return (struct atom_connector_speed_record *)header;
+ }
+
+ return NULL;
+}
+
+static enum bp_result bios_parser_get_connector_speed_cap_info(
+ struct dc_bios *dcb,
+ struct graphics_object_id object_id,
+ struct bp_connector_speed_cap_info *info)
+{
+ struct bios_parser *bp = BP_FROM_DCB(dcb);
+ struct atom_display_object_path_v3 *object_path_v3;
+ //struct atom_connector_speed_record *record = NULL;
+ struct atom_connector_speed_record *record;
+
+ if (!info)
+ return BP_RESULT_BADINPUT;
+
+ object_path_v3 = get_bios_object_from_path_v3(bp, object_id);
+
+ if (!object_path_v3)
return BP_RESULT_BADINPUT;
- record = get_disp_connector_caps_record(bp, object);
+ record = get_connector_speed_cap_record(bp, object_path_v3);
if (!record)
return BP_RESULT_NORECORD;
- info->INTERNAL_DISPLAY = (record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY)
- ? 1 : 0;
- info->INTERNAL_DISPLAY_BL = (record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY_BL)
- ? 1 : 0;
-
+ info->DP_HBR2_EN = (record->connector_max_speed >= 5400) ? 1 : 0;
+ info->DP_HBR3_EN = (record->connector_max_speed >= 8100) ? 1 : 0;
+ info->HDMI_6GB_EN = (record->connector_max_speed >= 5940) ? 1 : 0;
+ info->DP_UHBR10_EN = (record->connector_max_speed >= 10000) ? 1 : 0;
+ info->DP_UHBR13_5_EN = (record->connector_max_speed >= 13500) ? 1 : 0;
+ info->DP_UHBR20_EN = (record->connector_max_speed >= 20000) ? 1 : 0;
return BP_RESULT_OK;
}
@@ -1817,7 +2402,7 @@ static enum bp_result get_vram_info_v23(
struct dc_vram_info *info)
{
struct atom_vram_info_header_v2_3 *info_v23;
- enum bp_result result = BP_RESULT_OK;
+ static enum bp_result result = BP_RESULT_OK;
info_v23 = GET_IMAGE(struct atom_vram_info_header_v2_3,
DATA_TABLES(vram_info));
@@ -1836,7 +2421,7 @@ static enum bp_result get_vram_info_v24(
struct dc_vram_info *info)
{
struct atom_vram_info_header_v2_4 *info_v24;
- enum bp_result result = BP_RESULT_OK;
+ static enum bp_result result = BP_RESULT_OK;
info_v24 = GET_IMAGE(struct atom_vram_info_header_v2_4,
DATA_TABLES(vram_info));
@@ -1855,7 +2440,7 @@ static enum bp_result get_vram_info_v25(
struct dc_vram_info *info)
{
struct atom_vram_info_header_v2_5 *info_v25;
- enum bp_result result = BP_RESULT_OK;
+ static enum bp_result result = BP_RESULT_OK;
info_v25 = GET_IMAGE(struct atom_vram_info_header_v2_5,
DATA_TABLES(vram_info));
@@ -1869,6 +2454,44 @@ static enum bp_result get_vram_info_v25(
return result;
}
+static enum bp_result get_vram_info_v30(
+ struct bios_parser *bp,
+ struct dc_vram_info *info)
+{
+ struct atom_vram_info_header_v3_0 *info_v30;
+ enum bp_result result = BP_RESULT_OK;
+
+ info_v30 = GET_IMAGE(struct atom_vram_info_header_v3_0,
+ DATA_TABLES(vram_info));
+
+ if (info_v30 == NULL)
+ return BP_RESULT_BADBIOSTABLE;
+
+ info->num_chans = info_v30->channel_num;
+ info->dram_channel_width_bytes = (1 << info_v30->channel_width) / 8;
+
+ return result;
+}
+
+static enum bp_result get_vram_info_from_umc_info_v40(
+ struct bios_parser *bp,
+ struct dc_vram_info *info)
+{
+ struct atom_umc_info_v4_0 *info_v40;
+ enum bp_result result = BP_RESULT_OK;
+
+ info_v40 = GET_IMAGE(struct atom_umc_info_v4_0,
+ DATA_TABLES(umc_info));
+
+ if (info_v40 == NULL)
+ return BP_RESULT_BADBIOSTABLE;
+
+ info->num_chans = info_v40->channel_num;
+ info->dram_channel_width_bytes = (1 << info_v40->channel_width) / 8;
+
+ return result;
+}
+
/*
* get_integrated_info_v11
*
@@ -1880,7 +2503,7 @@ static enum bp_result get_vram_info_v25(
* integrated_info *info - [out] store and output integrated info
*
* @return
- * enum bp_result - BP_RESULT_OK if information is available,
+ * static enum bp_result - BP_RESULT_OK if information is available,
* BP_RESULT_BADBIOSTABLE otherwise.
*/
static enum bp_result get_integrated_info_v11(
@@ -1896,6 +2519,8 @@ static enum bp_result get_integrated_info_v11(
if (info_v11 == NULL)
return BP_RESULT_BADBIOSTABLE;
+ DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v11->gpuclk_ss_percentage);
+
info->gpu_cap_info =
le32_to_cpu(info_v11->gpucapinfo);
/*
@@ -2111,6 +2736,8 @@ static enum bp_result get_integrated_info_v2_1(
if (info_v2_1 == NULL)
return BP_RESULT_BADBIOSTABLE;
+ DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_1->gpuclk_ss_percentage);
+
info->gpu_cap_info =
le32_to_cpu(info_v2_1->gpucapinfo);
/*
@@ -2271,6 +2898,8 @@ static enum bp_result get_integrated_info_v2_2(
if (info_v2_2 == NULL)
return BP_RESULT_BADBIOSTABLE;
+ DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_2->gpuclk_ss_percentage);
+
info->gpu_cap_info =
le32_to_cpu(info_v2_2->gpucapinfo);
/*
@@ -2287,6 +2916,8 @@ static enum bp_result get_integrated_info_v2_2(
info->ma_channel_number = info_v2_2->umachannelnumber;
info->dp_ss_control =
le16_to_cpu(info_v2_2->reserved1);
+ info->gpuclk_ss_percentage = info_v2_2->gpuclk_ss_percentage;
+ info->gpuclk_ss_type = info_v2_2->gpuclk_ss_type;
for (i = 0; i < NUMBER_OF_UCHAR_FOR_GUID; ++i) {
info->ext_disp_conn_info.gu_id[i] =
@@ -2371,19 +3002,23 @@ static enum bp_result get_integrated_info_v2_2(
* integrated_info *info - [out] store and output integrated info
*
* @return
- * enum bp_result - BP_RESULT_OK if information is available,
+ * static enum bp_result - BP_RESULT_OK if information is available,
* BP_RESULT_BADBIOSTABLE otherwise.
*/
static enum bp_result construct_integrated_info(
struct bios_parser *bp,
struct integrated_info *info)
{
- enum bp_result result = BP_RESULT_BADBIOSTABLE;
+ static enum bp_result result = BP_RESULT_BADBIOSTABLE;
struct atom_common_table_header *header;
struct atom_data_revision revision;
- uint32_t i;
- uint32_t j;
+
+ int32_t i;
+ int32_t j;
+
+ if (!info)
+ return result;
if (info && DATA_TABLES(integratedsysteminfo)) {
header = GET_IMAGE(struct atom_common_table_header,
@@ -2408,6 +3043,7 @@ static enum bp_result construct_integrated_info(
result = get_integrated_info_v2_1(bp, info);
break;
case 2:
+ case 3:
result = get_integrated_info_v2_2(bp, info);
break;
default:
@@ -2417,21 +3053,76 @@ static enum bp_result construct_integrated_info(
default:
return result;
}
+ if (result == BP_RESULT_OK) {
+
+ DC_LOG_BIOS("edp1:\n"
+ "\tedp_pwr_on_off_delay = %d\n"
+ "\tedp_pwr_on_vary_bl_to_blon = %d\n"
+ "\tedp_pwr_down_bloff_to_vary_bloff = %d\n"
+ "\tedp_bootup_bl_level = %d\n",
+ info->edp1_info.edp_pwr_on_off_delay,
+ info->edp1_info.edp_pwr_on_vary_bl_to_blon,
+ info->edp1_info.edp_pwr_down_bloff_to_vary_bloff,
+ info->edp1_info.edp_bootup_bl_level);
+ DC_LOG_BIOS("edp2:\n"
+ "\tedp_pwr_on_off_delayv = %d\n"
+ "\tedp_pwr_on_vary_bl_to_blon = %d\n"
+ "\tedp_pwr_down_bloff_to_vary_bloff = %d\n"
+ "\tedp_bootup_bl_level = %d\n",
+ info->edp2_info.edp_pwr_on_off_delay,
+ info->edp2_info.edp_pwr_on_vary_bl_to_blon,
+ info->edp2_info.edp_pwr_down_bloff_to_vary_bloff,
+ info->edp2_info.edp_bootup_bl_level);
+ }
}
if (result != BP_RESULT_OK)
return result;
-
+ else {
+ // Log each external path
+ for (i = 0; i < MAX_NUMBER_OF_EXT_DISPLAY_PATH; i++) {
+ if (info->ext_disp_conn_info.path[i].device_tag != 0)
+ DC_LOG_BIOS("integrated_info:For EXTERNAL DISPLAY PATH %d --------------\n"
+ "DEVICE_TAG: 0x%x\n"
+ "DEVICE_ACPI_ENUM: 0x%x\n"
+ "DEVICE_CONNECTOR_ID: 0x%x\n"
+ "EXT_AUX_DDC_LUT_INDEX: %d\n"
+ "EXT_HPD_PIN_LUT_INDEX: %d\n"
+ "EXT_ENCODER_OBJ_ID: 0x%x\n"
+ "Encoder CAPS: 0x%x\n",
+ i,
+ info->ext_disp_conn_info.path[i].device_tag,
+ info->ext_disp_conn_info.path[i].device_acpi_enum,
+ info->ext_disp_conn_info.path[i].device_connector_id.id,
+ info->ext_disp_conn_info.path[i].ext_aux_ddc_lut_index,
+ info->ext_disp_conn_info.path[i].ext_hpd_pin_lut_index,
+ info->ext_disp_conn_info.path[i].ext_encoder_obj_id.id,
+ info->ext_disp_conn_info.path[i].caps
+ );
+ if ((info->ext_disp_conn_info.path[i].caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN)
+ DC_LOG_BIOS("BIOS AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN on path %d\n", i);
+ else if (bp->base.ctx->dc->config.force_bios_fixed_vs) {
+ info->ext_disp_conn_info.path[i].caps &= ~AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK;
+ info->ext_disp_conn_info.path[i].caps |= AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN;
+ DC_LOG_BIOS("driver forced AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN on path %d\n", i);
+ }
+ }
+ // Log the Checksum and Voltage Swing
+ DC_LOG_BIOS("Integrated info table CHECKSUM: %d\n"
+ "Integrated info table FIX_DP_VOLTAGE_SWING: %d\n",
+ info->ext_disp_conn_info.checksum,
+ info->ext_disp_conn_info.fixdpvoltageswing);
+ if (bp->base.ctx->dc->config.force_bios_fixed_vs && info->ext_disp_conn_info.fixdpvoltageswing == 0) {
+ info->ext_disp_conn_info.fixdpvoltageswing = bp->base.ctx->dc->config.force_bios_fixed_vs & 0xF;
+ DC_LOG_BIOS("driver forced fixdpvoltageswing = %d\n", info->ext_disp_conn_info.fixdpvoltageswing);
+ }
+ }
/* Sort voltage table from low to high*/
for (i = 1; i < NUMBER_OF_DISP_CLK_VOLTAGE; ++i) {
for (j = i; j > 0; --j) {
if (info->disp_clk_voltage[j].max_supported_clk <
- info->disp_clk_voltage[j-1].max_supported_clk
- ) {
- /* swap j and j - 1*/
- swap(info->disp_clk_voltage[j - 1],
- info->disp_clk_voltage[j]);
- }
+ info->disp_clk_voltage[j-1].max_supported_clk)
+ swap(info->disp_clk_voltage[j-1], info->disp_clk_voltage[j]);
}
}
@@ -2447,7 +3138,29 @@ static enum bp_result bios_parser_get_vram_info(
struct atom_common_table_header *header;
struct atom_data_revision revision;
- if (info && DATA_TABLES(vram_info)) {
+ // vram info moved to umc_info for DCN4x
+ if (info && DATA_TABLES(umc_info)) {
+ header = GET_IMAGE(struct atom_common_table_header,
+ DATA_TABLES(umc_info));
+
+ get_atom_data_table_revision(header, &revision);
+
+ switch (revision.major) {
+ case 4:
+ switch (revision.minor) {
+ case 0:
+ result = get_vram_info_from_umc_info_v40(bp, info);
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (result != BP_RESULT_OK && info && DATA_TABLES(vram_info)) {
header = GET_IMAGE(struct atom_common_table_header,
DATA_TABLES(vram_info));
@@ -2470,6 +3183,16 @@ static enum bp_result bios_parser_get_vram_info(
}
break;
+ case 3:
+ switch (revision.minor) {
+ case 0:
+ result = get_vram_info_v30(bp, info);
+ break;
+ default:
+ break;
+ }
+ break;
+
default:
return result;
}
@@ -2482,7 +3205,7 @@ static struct integrated_info *bios_parser_create_integrated_info(
struct dc_bios *dcb)
{
struct bios_parser *bp = BP_FROM_DCB(dcb);
- struct integrated_info *info = NULL;
+ struct integrated_info *info;
info = kzalloc(sizeof(struct integrated_info), GFP_KERNEL);
@@ -2509,7 +3232,7 @@ static enum bp_result update_slot_layout_info(
struct atom_display_object_path_v2 *object;
struct atom_bracket_layout_record *record;
struct atom_common_record_header *record_header;
- enum bp_result result;
+ static enum bp_result result;
struct bios_parser *bp;
struct object_info_table *tbl;
struct display_object_info_table_v1_4 *v1_4;
@@ -2615,6 +3338,105 @@ static enum bp_result update_slot_layout_info(
return result;
}
+static enum bp_result update_slot_layout_info_v2(
+ struct dc_bios *dcb,
+ unsigned int i,
+ struct slot_layout_info *slot_layout_info)
+{
+ unsigned int record_offset;
+ struct atom_display_object_path_v3 *object;
+ struct atom_bracket_layout_record_v2 *record;
+ struct atom_common_record_header *record_header;
+ static enum bp_result result;
+ struct bios_parser *bp;
+ struct object_info_table *tbl;
+ struct display_object_info_table_v1_5 *v1_5;
+ struct graphics_object_id connector_id;
+
+ record = NULL;
+ record_header = NULL;
+ result = BP_RESULT_NORECORD;
+
+ bp = BP_FROM_DCB(dcb);
+ tbl = &bp->object_info_tbl;
+ v1_5 = tbl->v1_5;
+
+ object = &v1_5->display_path[i];
+ record_offset = (unsigned int)
+ (object->disp_recordoffset) +
+ (unsigned int)(bp->object_info_tbl_offset);
+
+ for (;;) {
+
+ record_header = (struct atom_common_record_header *)
+ GET_IMAGE(struct atom_common_record_header,
+ record_offset);
+ if (record_header == NULL) {
+ result = BP_RESULT_BADBIOSTABLE;
+ break;
+ }
+
+ /* the end of the list */
+ if (record_header->record_type == ATOM_RECORD_END_TYPE ||
+ record_header->record_size == 0) {
+ break;
+ }
+
+ if (record_header->record_type ==
+ ATOM_BRACKET_LAYOUT_V2_RECORD_TYPE &&
+ sizeof(struct atom_bracket_layout_record_v2)
+ <= record_header->record_size) {
+ record = (struct atom_bracket_layout_record_v2 *)
+ (record_header);
+ result = BP_RESULT_OK;
+ break;
+ }
+
+ record_offset += record_header->record_size;
+ }
+
+ /* return if the record not found */
+ if (result != BP_RESULT_OK)
+ return result;
+
+ /* get slot sizes */
+ connector_id = object_id_from_bios_object_id(object->display_objid);
+
+ slot_layout_info->length = record->bracketlen;
+ slot_layout_info->width = record->bracketwidth;
+ slot_layout_info->num_of_connectors = v1_5->number_of_path;
+ slot_layout_info->connectors[i].position = record->conn_num;
+ slot_layout_info->connectors[i].connector_id = connector_id;
+
+ switch (connector_id.id) {
+ case CONNECTOR_ID_SINGLE_LINK_DVID:
+ case CONNECTOR_ID_DUAL_LINK_DVID:
+ slot_layout_info->connectors[i].connector_type = CONNECTOR_LAYOUT_TYPE_DVI_D;
+ slot_layout_info->connectors[i].length = CONNECTOR_SIZE_DVI;
+ break;
+
+ case CONNECTOR_ID_HDMI_TYPE_A:
+ slot_layout_info->connectors[i].connector_type = CONNECTOR_LAYOUT_TYPE_HDMI;
+ slot_layout_info->connectors[i].length = CONNECTOR_SIZE_HDMI;
+ break;
+
+ case CONNECTOR_ID_DISPLAY_PORT:
+ case CONNECTOR_ID_USBC:
+ if (record->mini_type == MINI_TYPE_NORMAL) {
+ slot_layout_info->connectors[i].connector_type = CONNECTOR_LAYOUT_TYPE_DP;
+ slot_layout_info->connectors[i].length = CONNECTOR_SIZE_DP;
+ } else {
+ slot_layout_info->connectors[i].connector_type = CONNECTOR_LAYOUT_TYPE_MINI_DP;
+ slot_layout_info->connectors[i].length = CONNECTOR_SIZE_MINI_DP;
+ }
+ break;
+
+ default:
+ slot_layout_info->connectors[i].connector_type = CONNECTOR_LAYOUT_TYPE_UNKNOWN;
+ slot_layout_info->connectors[i].length = CONNECTOR_SIZE_UNKNOWN;
+ }
+ return result;
+}
static enum bp_result get_bracket_layout_record(
struct dc_bios *dcb,
@@ -2623,27 +3445,37 @@ static enum bp_result get_bracket_layout_record(
{
unsigned int i;
struct bios_parser *bp = BP_FROM_DCB(dcb);
- enum bp_result result;
+ static enum bp_result result;
struct object_info_table *tbl;
struct display_object_info_table_v1_4 *v1_4;
+ struct display_object_info_table_v1_5 *v1_5;
if (slot_layout_info == NULL) {
DC_LOG_DETECTION_EDID_PARSER("Invalid slot_layout_info\n");
return BP_RESULT_BADINPUT;
}
+
tbl = &bp->object_info_tbl;
v1_4 = tbl->v1_4;
+ v1_5 = tbl->v1_5;
result = BP_RESULT_NORECORD;
- for (i = 0; i < v1_4->number_of_path; ++i) {
-
- if (bracket_layout_id ==
- v1_4->display_path[i].display_objid) {
- result = update_slot_layout_info(dcb, i,
- slot_layout_info);
- break;
+ switch (bp->object_info_tbl.revision.minor) {
+ case 4:
+ default:
+ for (i = 0; i < v1_4->number_of_path; ++i) {
+ if (bracket_layout_id == v1_4->display_path[i].display_objid) {
+ result = update_slot_layout_info(dcb, i, slot_layout_info);
+ break;
+ }
}
+ break;
+ case 5:
+ for (i = 0; i < v1_5->number_of_path; ++i)
+ result = update_slot_layout_info_v2(dcb, i, slot_layout_info);
+ break;
}
+
return result;
}
@@ -2652,7 +3484,9 @@ static enum bp_result bios_get_board_layout_info(
struct board_layout_info *board_layout_info)
{
unsigned int i;
- enum bp_result record_result;
+ struct bios_parser *bp;
+ static enum bp_result record_result;
+ unsigned int max_slots;
const unsigned int slot_index_to_vbios_id[MAX_BOARD_SLOTS] = {
GENERICOBJECT_BRACKET_LAYOUT_ENUM_ID1,
@@ -2660,14 +3494,22 @@ static enum bp_result bios_get_board_layout_info(
0, 0
};
+ bp = BP_FROM_DCB(dcb);
+
if (board_layout_info == NULL) {
DC_LOG_DETECTION_EDID_PARSER("Invalid board_layout_info\n");
return BP_RESULT_BADINPUT;
}
board_layout_info->num_of_slots = 0;
+ max_slots = MAX_BOARD_SLOTS;
+
+ // Assume single slot on v1_5
+ if (bp->object_info_tbl.revision.minor == 5) {
+ max_slots = 1;
+ }
- for (i = 0; i < MAX_BOARD_SLOTS; ++i) {
+ for (i = 0; i < max_slots; ++i) {
record_result = get_bracket_layout_record(dcb,
slot_index_to_vbios_id[i],
&board_layout_info->slots[i]);
@@ -2694,99 +3536,6 @@ static uint16_t bios_parser_pack_data_tables(
struct dc_bios *dcb,
void *dst)
{
-#ifdef PACK_BIOS_DATA
- struct bios_parser *bp = BP_FROM_DCB(dcb);
- struct atom_rom_header_v2_2 *rom_header = NULL;
- struct atom_rom_header_v2_2 *packed_rom_header = NULL;
- struct atom_common_table_header *data_tbl_header = NULL;
- struct atom_master_list_of_data_tables_v2_1 *data_tbl_list = NULL;
- struct atom_master_data_table_v2_1 *packed_master_data_tbl = NULL;
- struct atom_data_revision tbl_rev = {0};
- uint16_t *rom_header_offset = NULL;
- const uint8_t *bios = bp->base.bios;
- uint8_t *bios_dst = (uint8_t *)dst;
- uint16_t packed_rom_header_offset;
- uint16_t packed_masterdatatable_offset;
- uint16_t packed_data_tbl_offset;
- uint16_t data_tbl_offset;
- unsigned int i;
-
- rom_header_offset =
- GET_IMAGE(uint16_t, OFFSET_TO_ATOM_ROM_HEADER_POINTER);
-
- if (!rom_header_offset)
- return 0;
-
- rom_header = GET_IMAGE(struct atom_rom_header_v2_2, *rom_header_offset);
-
- if (!rom_header)
- return 0;
-
- get_atom_data_table_revision(&rom_header->table_header, &tbl_rev);
- if (!(tbl_rev.major >= 2 && tbl_rev.minor >= 2))
- return 0;
-
- get_atom_data_table_revision(&bp->master_data_tbl->table_header, &tbl_rev);
- if (!(tbl_rev.major >= 2 && tbl_rev.minor >= 1))
- return 0;
-
- packed_rom_header_offset =
- OFFSET_TO_ATOM_ROM_HEADER_POINTER + sizeof(*rom_header_offset);
-
- packed_masterdatatable_offset =
- packed_rom_header_offset + rom_header->table_header.structuresize;
-
- packed_data_tbl_offset =
- packed_masterdatatable_offset +
- bp->master_data_tbl->table_header.structuresize;
-
- packed_rom_header =
- (struct atom_rom_header_v2_2 *)(bios_dst + packed_rom_header_offset);
-
- packed_master_data_tbl =
- (struct atom_master_data_table_v2_1 *)(bios_dst +
- packed_masterdatatable_offset);
-
- memcpy(bios_dst, bios, OFFSET_TO_ATOM_ROM_HEADER_POINTER);
-
- *((uint16_t *)(bios_dst + OFFSET_TO_ATOM_ROM_HEADER_POINTER)) =
- packed_rom_header_offset;
-
- memcpy(bios_dst + packed_rom_header_offset, rom_header,
- rom_header->table_header.structuresize);
-
- packed_rom_header->masterdatatable_offset = packed_masterdatatable_offset;
-
- memcpy(&packed_master_data_tbl->table_header,
- &bp->master_data_tbl->table_header,
- sizeof(bp->master_data_tbl->table_header));
-
- data_tbl_list = &bp->master_data_tbl->listOfdatatables;
-
- /* Each data table offset in data table list is 2 bytes,
- * we can use that to iterate through listOfdatatables
- * without knowing the name of each member.
- */
- for (i = 0; i < sizeof(*data_tbl_list)/sizeof(uint16_t); i++) {
- data_tbl_offset = *((uint16_t *)data_tbl_list + i);
-
- if (data_tbl_offset) {
- data_tbl_header =
- (struct atom_common_table_header *)(bios + data_tbl_offset);
-
- memcpy(bios_dst + packed_data_tbl_offset, data_tbl_header,
- data_tbl_header->structuresize);
-
- *((uint16_t *)&packed_master_data_tbl->listOfdatatables + i) =
- packed_data_tbl_offset;
-
- packed_data_tbl_offset += data_tbl_header->structuresize;
- } else {
- *((uint16_t *)&packed_master_data_tbl->listOfdatatables + i) = 0;
- }
- }
- return packed_data_tbl_offset;
-#endif
// TODO: There is data bytes alignment issue, disable it for now.
return 0;
}
@@ -2816,6 +3565,13 @@ static struct atom_dc_golden_table_v1 *bios_get_golden_table(
dc_golden_offset = DATA_TABLES(dce_info) + disp_cntl_tbl_4_4->dc_golden_table_offset;
*dc_golden_table_ver = disp_cntl_tbl_4_4->dc_golden_table_ver;
break;
+ case 5:
+ default:
+ /* For atom_display_controller_info_v4_5 there is no need to get golden table from
+ * dc_golden_table_offset as all these fields previously in golden table used for AUX
+ * pre-charge settings are now available directly in atom_display_controller_info_v4_5.
+ */
+ break;
}
break;
}
@@ -2931,6 +3687,8 @@ static const struct dc_vbios_funcs vbios_funcs = {
.get_lttpr_caps = bios_parser_get_lttpr_caps,
.get_lttpr_interop = bios_parser_get_lttpr_interop,
+
+ .get_connector_speed_cap_info = bios_parser_get_connector_speed_cap_info,
};
static bool bios_parser2_construct(
@@ -2995,7 +3753,7 @@ static bool bios_parser2_construct(
&bp->object_info_tbl.revision);
if (bp->object_info_tbl.revision.major == 1
- && bp->object_info_tbl.revision.minor >= 4) {
+ && bp->object_info_tbl.revision.minor == 4) {
struct display_object_info_table_v1_4 *tbl_v1_4;
tbl_v1_4 = GET_IMAGE(struct display_object_info_table_v1_4,
@@ -3004,8 +3762,20 @@ static bool bios_parser2_construct(
return false;
bp->object_info_tbl.v1_4 = tbl_v1_4;
- } else
+ } else if (bp->object_info_tbl.revision.major == 1
+ && bp->object_info_tbl.revision.minor == 5) {
+ struct display_object_info_table_v1_5 *tbl_v1_5;
+
+ tbl_v1_5 = GET_IMAGE(struct display_object_info_table_v1_5,
+ bp->object_info_tbl_offset);
+ if (!tbl_v1_5)
+ return false;
+
+ bp->object_info_tbl.v1_5 = tbl_v1_5;
+ } else {
+ ASSERT(0);
return false;
+ }
dal_firmware_parser_init_cmd_tbl(bp);
dal_bios_parser_init_cmd_tbl_helper2(&bp->cmd_helper, dce_version);
@@ -3013,7 +3783,7 @@ static bool bios_parser2_construct(
bp->base.integrated_info = bios_parser_create_integrated_info(&bp->base);
bp->base.fw_info_valid = bios_parser_get_firmware_info(&bp->base, &bp->base.fw_info) == BP_RESULT_OK;
bios_parser_get_vram_info(&bp->base, &bp->base.vram_info);
-
+ bios_parser_get_soc_bb_info(&bp->base, &bp->base.bb_info);
return true;
}
@@ -3021,7 +3791,7 @@ struct dc_bios *firmware_parser_create(
struct bp_init_data *init,
enum dce_version dce_version)
{
- struct bios_parser *bp = NULL;
+ struct bios_parser *bp;
bp = kzalloc(sizeof(struct bios_parser), GFP_KERNEL);
if (!bp)
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_common.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_common.c
index a8cb039d2572..34e3a64f556e 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_common.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_common.c
@@ -213,6 +213,9 @@ static enum connector_id connector_id_from_bios_object_id(
case CONNECTOR_OBJECT_ID_MXM:
id = CONNECTOR_ID_MXM;
break;
+ case CONNECTOR_OBJECT_ID_USBC:
+ id = CONNECTOR_ID_USBC;
+ break;
default:
id = CONNECTOR_ID_UNKNOWN;
break;
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c
index adc710fe4a45..8d2cf95ae739 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c
@@ -78,10 +78,3 @@ void bios_set_scratch_critical_state(
uint32_t critial_state = state ? 1 : 0;
REG_UPDATE(BIOS_SCRATCH_6, S6_CRITICAL_STATE, critial_state);
}
-
-uint32_t bios_get_vga_enabled_displays(
- struct dc_bios *bios)
-{
- return REG_READ(BIOS_SCRATCH_3) & 0XFFFF;
-}
-
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.h b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.h
index e1b4a40a353d..ab162f2fe577 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.h
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.h
@@ -34,7 +34,6 @@ uint8_t *bios_get_image(struct dc_bios *bp, uint32_t offset,
bool bios_is_accelerated_mode(struct dc_bios *bios);
void bios_set_scratch_acc_mode_change(struct dc_bios *bios, uint32_t state);
void bios_set_scratch_critical_state(struct dc_bios *bios, bool state);
-uint32_t bios_get_vga_enabled_displays(struct dc_bios *bios);
#define GET_IMAGE(type, offset) ((type *) bios_get_image(&bp->base, offset, sizeof(type)))
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_types_internal2.h b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_types_internal2.h
index bf1f5c86e65c..41d02d473082 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_types_internal2.h
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_types_internal2.h
@@ -40,6 +40,7 @@ struct object_info_table {
struct atom_data_revision revision;
union {
struct display_object_info_table_v1_4 *v1_4;
+ struct display_object_info_table_v1_5 *v1_5;
};
};
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table.c b/drivers/gpu/drm/amd/display/dc/bios/command_table.c
index ad13e4e36d77..22457f417e65 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table.c
@@ -37,7 +37,7 @@
#define EXEC_BIOS_CMD_TABLE(command, params)\
(amdgpu_atom_execute_table(((struct amdgpu_device *)bp->base.ctx->driver_context)->mode_info.atom_context, \
GetIndexIntoMasterTable(COMMAND, command), \
- (uint32_t *)&params) == 0)
+ (uint32_t *)&params, sizeof(params)) == 0)
#define BIOS_CMD_TABLE_REVISION(command, frev, crev)\
amdgpu_atom_parse_cmd_header(((struct amdgpu_device *)bp->base.ctx->driver_context)->mode_info.atom_context, \
@@ -52,7 +52,9 @@ static void init_transmitter_control(struct bios_parser *bp);
static void init_set_pixel_clock(struct bios_parser *bp);
static void init_enable_spread_spectrum_on_ppll(struct bios_parser *bp);
static void init_adjust_display_pll(struct bios_parser *bp);
+static void init_select_crtc_source(struct bios_parser *bp);
static void init_dac_encoder_control(struct bios_parser *bp);
+static void init_dac_load_detection(struct bios_parser *bp);
static void init_dac_output_control(struct bios_parser *bp);
static void init_set_crtc_timing(struct bios_parser *bp);
static void init_enable_crtc(struct bios_parser *bp);
@@ -69,7 +71,9 @@ void dal_bios_parser_init_cmd_tbl(struct bios_parser *bp)
init_set_pixel_clock(bp);
init_enable_spread_spectrum_on_ppll(bp);
init_adjust_display_pll(bp);
+ init_select_crtc_source(bp);
init_dac_encoder_control(bp);
+ init_dac_load_detection(bp);
init_dac_output_control(bp);
init_set_crtc_timing(bp);
init_enable_crtc(bp);
@@ -399,7 +403,7 @@ static enum bp_result transmitter_control_v1_6(
static void init_transmitter_control(struct bios_parser *bp)
{
uint8_t frev;
- uint8_t crev;
+ uint8_t crev = 0;
if (BIOS_CMD_TABLE_REVISION(UNIPHYTransmitterControl,
frev, crev) == false)
@@ -456,7 +460,7 @@ static enum bp_result transmitter_control_v2(
if ((CONNECTOR_ID_DUAL_LINK_DVII == connector_id) ||
(CONNECTOR_ID_DUAL_LINK_DVID == connector_id))
/* on INIT this bit should be set according to the
- * phisycal connector
+ * physical connector
* Bit0: dual link connector flag
* =0 connector is single link connector
* =1 connector is dual link connector
@@ -468,7 +472,7 @@ static enum bp_result transmitter_control_v2(
cpu_to_le16((uint8_t)cntl->connector_obj_id.id);
break;
case TRANSMITTER_CONTROL_SET_VOLTAGE_AND_PREEMPASIS:
- /* votage swing and pre-emphsis */
+ /* voltage swing and pre-emphsis */
params.asMode.ucLaneSel = (uint8_t)cntl->lane_select;
params.asMode.ucLaneSet = (uint8_t)cntl->lane_settings;
break;
@@ -522,7 +526,8 @@ static enum bp_result transmitter_control_v2(
*/
params.acConfig.ucEncoderSel = 1;
- if (CONNECTOR_ID_DISPLAY_PORT == connector_id)
+ if (CONNECTOR_ID_DISPLAY_PORT == connector_id ||
+ CONNECTOR_ID_USBC == connector_id)
/* Bit4: DP connector flag
* =0 connector is none-DP connector
* =1 connector is DP connector
@@ -992,7 +997,7 @@ static enum bp_result set_pixel_clock_v3(
allocation.sPCLKInput.usFbDiv =
cpu_to_le16((uint16_t)bp_params->feedback_divider);
allocation.sPCLKInput.ucFracFbDiv =
- (uint8_t)bp_params->fractional_feedback_divider;
+ (uint8_t)(bp_params->fractional_feedback_divider / 100000);
allocation.sPCLKInput.ucPostDiv =
(uint8_t)bp_params->pixel_clock_post_divider;
@@ -1611,6 +1616,198 @@ static enum bp_result adjust_display_pll_v3(
/*******************************************************************************
********************************************************************************
**
+ ** SELECT CRTC SOURCE
+ **
+ ********************************************************************************
+ *******************************************************************************/
+
+static enum bp_result select_crtc_source_v1(
+ struct bios_parser *bp,
+ struct bp_crtc_source_select *bp_params);
+static enum bp_result select_crtc_source_v2(
+ struct bios_parser *bp,
+ struct bp_crtc_source_select *bp_params);
+static enum bp_result select_crtc_source_v3(
+ struct bios_parser *bp,
+ struct bp_crtc_source_select *bp_params);
+
+static void init_select_crtc_source(struct bios_parser *bp)
+{
+ switch (BIOS_CMD_TABLE_PARA_REVISION(SelectCRTC_Source)) {
+ case 1:
+ bp->cmd_tbl.select_crtc_source = select_crtc_source_v1;
+ break;
+ case 2:
+ bp->cmd_tbl.select_crtc_source = select_crtc_source_v2;
+ break;
+ case 3:
+ bp->cmd_tbl.select_crtc_source = select_crtc_source_v3;
+ break;
+ default:
+ bp->cmd_tbl.select_crtc_source = NULL;
+ break;
+ }
+}
+
+static enum bp_result select_crtc_source_v1(
+ struct bios_parser *bp,
+ struct bp_crtc_source_select *bp_params)
+{
+ enum bp_result result = BP_RESULT_FAILURE;
+ SELECT_CRTC_SOURCE_PS_ALLOCATION params;
+
+ if (!bp->cmd_helper->controller_id_to_atom(bp_params->controller_id, &params.ucCRTC))
+ return BP_RESULT_BADINPUT;
+
+ switch (bp_params->engine_id) {
+ case ENGINE_ID_DACA:
+ params.ucDevice = ATOM_DEVICE_CRT1_INDEX;
+ break;
+ case ENGINE_ID_DACB:
+ params.ucDevice = ATOM_DEVICE_CRT2_INDEX;
+ break;
+ default:
+ return BP_RESULT_BADINPUT;
+ }
+
+ if (EXEC_BIOS_CMD_TABLE(SelectCRTC_Source, params))
+ result = BP_RESULT_OK;
+
+ return result;
+}
+
+static bool select_crtc_source_v2_encoder_id(
+ enum engine_id engine_id, uint8_t *out_encoder_id)
+{
+ uint8_t encoder_id = 0;
+
+ switch (engine_id) {
+ case ENGINE_ID_DIGA:
+ encoder_id = ASIC_INT_DIG1_ENCODER_ID;
+ break;
+ case ENGINE_ID_DIGB:
+ encoder_id = ASIC_INT_DIG2_ENCODER_ID;
+ break;
+ case ENGINE_ID_DIGC:
+ encoder_id = ASIC_INT_DIG3_ENCODER_ID;
+ break;
+ case ENGINE_ID_DIGD:
+ encoder_id = ASIC_INT_DIG4_ENCODER_ID;
+ break;
+ case ENGINE_ID_DIGE:
+ encoder_id = ASIC_INT_DIG5_ENCODER_ID;
+ break;
+ case ENGINE_ID_DIGF:
+ encoder_id = ASIC_INT_DIG6_ENCODER_ID;
+ break;
+ case ENGINE_ID_DIGG:
+ encoder_id = ASIC_INT_DIG7_ENCODER_ID;
+ break;
+ case ENGINE_ID_DACA:
+ encoder_id = ASIC_INT_DAC1_ENCODER_ID;
+ break;
+ case ENGINE_ID_DACB:
+ encoder_id = ASIC_INT_DAC2_ENCODER_ID;
+ break;
+ default:
+ return false;
+ }
+
+ *out_encoder_id = encoder_id;
+ return true;
+}
+
+static bool select_crtc_source_v2_encoder_mode(
+ enum signal_type signal_type, uint8_t *out_encoder_mode)
+{
+ uint8_t encoder_mode = 0;
+
+ switch (signal_type) {
+ case SIGNAL_TYPE_DVI_SINGLE_LINK:
+ case SIGNAL_TYPE_DVI_DUAL_LINK:
+ encoder_mode = ATOM_ENCODER_MODE_DVI;
+ break;
+ case SIGNAL_TYPE_HDMI_TYPE_A:
+ encoder_mode = ATOM_ENCODER_MODE_HDMI;
+ break;
+ case SIGNAL_TYPE_LVDS:
+ encoder_mode = ATOM_ENCODER_MODE_LVDS;
+ break;
+ case SIGNAL_TYPE_RGB:
+ encoder_mode = ATOM_ENCODER_MODE_CRT;
+ break;
+ case SIGNAL_TYPE_DISPLAY_PORT:
+ encoder_mode = ATOM_ENCODER_MODE_DP;
+ break;
+ case SIGNAL_TYPE_DISPLAY_PORT_MST:
+ encoder_mode = ATOM_ENCODER_MODE_DP_MST;
+ break;
+ case SIGNAL_TYPE_EDP:
+ encoder_mode = ATOM_ENCODER_MODE_DP;
+ break;
+ default:
+ return false;
+ }
+
+ *out_encoder_mode = encoder_mode;
+ return true;
+}
+
+static enum bp_result select_crtc_source_v2(
+ struct bios_parser *bp,
+ struct bp_crtc_source_select *bp_params)
+{
+ enum bp_result result = BP_RESULT_FAILURE;
+ SELECT_CRTC_SOURCE_PARAMETERS_V3 params;
+
+ if (!bp->cmd_helper->controller_id_to_atom(bp_params->controller_id, &params.ucCRTC))
+ return BP_RESULT_BADINPUT;
+
+ if (!select_crtc_source_v2_encoder_id(
+ bp_params->engine_id,
+ &params.ucEncoderID))
+ return BP_RESULT_BADINPUT;
+ if (!select_crtc_source_v2_encoder_mode(
+ bp_params->sink_signal,
+ &params.ucEncodeMode))
+ return BP_RESULT_BADINPUT;
+
+ if (EXEC_BIOS_CMD_TABLE(SelectCRTC_Source, params))
+ result = BP_RESULT_OK;
+
+ return result;
+}
+
+static enum bp_result select_crtc_source_v3(
+ struct bios_parser *bp,
+ struct bp_crtc_source_select *bp_params)
+{
+ enum bp_result result = BP_RESULT_FAILURE;
+ SELECT_CRTC_SOURCE_PARAMETERS_V3 params;
+
+ if (!bp->cmd_helper->controller_id_to_atom(bp_params->controller_id, &params.ucCRTC))
+ return BP_RESULT_BADINPUT;
+
+ if (!select_crtc_source_v2_encoder_id(
+ bp_params->engine_id,
+ &params.ucEncoderID))
+ return BP_RESULT_BADINPUT;
+ if (!select_crtc_source_v2_encoder_mode(
+ bp_params->sink_signal,
+ &params.ucEncodeMode))
+ return BP_RESULT_BADINPUT;
+
+ params.ucDstBpc = bp_params->bit_depth;
+
+ if (EXEC_BIOS_CMD_TABLE(SelectCRTC_Source, params))
+ result = BP_RESULT_OK;
+
+ return result;
+}
+
+/*******************************************************************************
+ ********************************************************************************
+ **
** DAC ENCODER CONTROL
**
********************************************************************************
@@ -1710,6 +1907,96 @@ static enum bp_result dac2_encoder_control_v1(
/*******************************************************************************
********************************************************************************
**
+ ** DAC LOAD DETECTION
+ **
+ ********************************************************************************
+ *******************************************************************************/
+
+static enum bp_result dac_load_detection_v1(
+ struct bios_parser *bp,
+ struct bp_load_detection_parameters *bp_params);
+
+static enum bp_result dac_load_detection_v3(
+ struct bios_parser *bp,
+ struct bp_load_detection_parameters *bp_params);
+
+static void init_dac_load_detection(struct bios_parser *bp)
+{
+ switch (BIOS_CMD_TABLE_PARA_REVISION(DAC_LoadDetection)) {
+ case 1:
+ case 2:
+ bp->cmd_tbl.dac_load_detection = dac_load_detection_v1;
+ break;
+ case 3:
+ default:
+ bp->cmd_tbl.dac_load_detection = dac_load_detection_v3;
+ break;
+ }
+}
+
+static void dac_load_detect_prepare_params(
+ struct _DAC_LOAD_DETECTION_PS_ALLOCATION *params,
+ enum engine_id engine_id,
+ uint16_t device_id,
+ uint8_t misc)
+{
+ uint8_t dac_type = ENGINE_ID_DACA;
+
+ if (engine_id == ENGINE_ID_DACB)
+ dac_type = ATOM_DAC_B;
+
+ params->sDacload.usDeviceID = cpu_to_le16(device_id);
+ params->sDacload.ucDacType = dac_type;
+ params->sDacload.ucMisc = misc;
+}
+
+static enum bp_result dac_load_detection_v1(
+ struct bios_parser *bp,
+ struct bp_load_detection_parameters *bp_params)
+{
+ enum bp_result result = BP_RESULT_FAILURE;
+ DAC_LOAD_DETECTION_PS_ALLOCATION params;
+
+ dac_load_detect_prepare_params(
+ &params,
+ bp_params->engine_id,
+ bp_params->device_id,
+ 0);
+
+ if (EXEC_BIOS_CMD_TABLE(DAC_LoadDetection, params))
+ result = BP_RESULT_OK;
+
+ return result;
+}
+
+static enum bp_result dac_load_detection_v3(
+ struct bios_parser *bp,
+ struct bp_load_detection_parameters *bp_params)
+{
+ enum bp_result result = BP_RESULT_FAILURE;
+ DAC_LOAD_DETECTION_PS_ALLOCATION params;
+
+ uint8_t misc = 0;
+
+ if (bp_params->device_id == ATOM_DEVICE_CV_SUPPORT ||
+ bp_params->device_id == ATOM_DEVICE_TV1_SUPPORT)
+ misc = DAC_LOAD_MISC_YPrPb;
+
+ dac_load_detect_prepare_params(
+ &params,
+ bp_params->engine_id,
+ bp_params->device_id,
+ misc);
+
+ if (EXEC_BIOS_CMD_TABLE(DAC_LoadDetection, params))
+ result = BP_RESULT_OK;
+
+ return result;
+}
+
+/*******************************************************************************
+ ********************************************************************************
+ **
** DAC OUTPUT CONTROL
**
********************************************************************************
@@ -2120,7 +2407,7 @@ static enum bp_result program_clock_v5(
memset(&params, 0, sizeof(params));
if (!bp->cmd_helper->clock_source_id_to_atom(
bp_params->pll_id, &atom_pll_id)) {
- BREAK_TO_DEBUGGER(); /* Invalid Inpute!! */
+ BREAK_TO_DEBUGGER(); /* Invalid Input!! */
return BP_RESULT_BADINPUT;
}
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table.h b/drivers/gpu/drm/amd/display/dc/bios/command_table.h
index ad533775e724..e89b1ba0048b 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table.h
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table.h
@@ -52,6 +52,9 @@ struct cmd_tbl {
enum bp_result (*adjust_display_pll)(
struct bios_parser *bp,
struct bp_adjust_pixel_clock_parameters *bp_params);
+ enum bp_result (*select_crtc_source)(
+ struct bios_parser *bp,
+ struct bp_crtc_source_select *bp_params);
enum bp_result (*dac1_encoder_control)(
struct bios_parser *bp,
bool enable,
@@ -68,6 +71,9 @@ struct cmd_tbl {
enum bp_result (*dac2_output_control)(
struct bios_parser *bp,
bool enable);
+ enum bp_result (*dac_load_detection)(
+ struct bios_parser *bp,
+ struct bp_load_detection_parameters *bp_params);
enum bp_result (*set_crtc_timing)(
struct bios_parser *bp,
struct bp_hw_crtc_timing_parameters *bp_params);
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
index 9afa5eb2e6d3..f2b1720a6a66 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
@@ -49,7 +49,7 @@
#define EXEC_BIOS_CMD_TABLE(fname, params)\
(amdgpu_atom_execute_table(((struct amdgpu_device *)bp->base.ctx->driver_context)->mode_info.atom_context, \
GET_INDEX_INTO_MASTER_TABLE(command, fname), \
- (uint32_t *)&params) == 0)
+ (uint32_t *)&params, sizeof(params)) == 0)
#define BIOS_CMD_TABLE_REVISION(fname, frev, crev)\
amdgpu_atom_parse_cmd_header(((struct amdgpu_device *)bp->base.ctx->driver_context)->mode_info.atom_context, \
@@ -101,7 +101,6 @@ static void init_dig_encoder_control(struct bios_parser *bp)
bp->cmd_tbl.dig_encoder_control = encoder_control_digx_v1_5;
break;
default:
- dm_output_to_console("Don't have dig_encoder_control for v%d\n", version);
bp->cmd_tbl.dig_encoder_control = encoder_control_fallback;
break;
}
@@ -123,9 +122,7 @@ static void encoder_control_dmcub(
sizeof(cmd.digx_encoder_control.header);
cmd.digx_encoder_control.encoder_control.dig.stream_param = *dig;
- dc_dmub_srv_cmd_queue(dmcub, &cmd);
- dc_dmub_srv_cmd_execute(dmcub);
- dc_dmub_srv_wait_idle(dmcub);
+ dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
static enum bp_result encoder_control_digx_v1_5(
@@ -212,6 +209,7 @@ static enum bp_result encoder_control_fallback(
******************************************************************************
*****************************************************************************/
+
static enum bp_result transmitter_control_v1_6(
struct bios_parser *bp,
struct bp_transmitter_control *cntl);
@@ -227,9 +225,10 @@ static enum bp_result transmitter_control_fallback(
static void init_transmitter_control(struct bios_parser *bp)
{
uint8_t frev;
- uint8_t crev;
+ uint8_t crev = 0;
- BIOS_CMD_TABLE_REVISION(dig1transmittercontrol, frev, crev);
+ if (!BIOS_CMD_TABLE_REVISION(dig1transmittercontrol, frev, crev) && (bp->base.ctx->dc->ctx->dce_version <= DCN_VERSION_2_0))
+ BREAK_TO_DEBUGGER();
switch (crev) {
case 6:
@@ -239,7 +238,6 @@ static void init_transmitter_control(struct bios_parser *bp)
bp->cmd_tbl.transmitter_control = transmitter_control_v1_7;
break;
default:
- dm_output_to_console("Don't have transmitter_control for v%d\n", crev);
bp->cmd_tbl.transmitter_control = transmitter_control_fallback;
break;
}
@@ -261,9 +259,7 @@ static void transmitter_control_dmcub(
sizeof(cmd.dig1_transmitter_control.header);
cmd.dig1_transmitter_control.transmitter_control.dig = *dig;
- dc_dmub_srv_cmd_queue(dmcub, &cmd);
- dc_dmub_srv_cmd_execute(dmcub);
- dc_dmub_srv_wait_idle(dmcub);
+ dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
static enum bp_result transmitter_control_v1_6(
@@ -325,9 +321,22 @@ static void transmitter_control_dmcub_v1_7(
sizeof(cmd.dig1_transmitter_control.header);
cmd.dig1_transmitter_control.transmitter_control.dig_v1_7 = *dig;
- dc_dmub_srv_cmd_queue(dmcub, &cmd);
- dc_dmub_srv_cmd_execute(dmcub);
- dc_dmub_srv_wait_idle(dmcub);
+ dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+static struct dc_link *get_link_by_phy_id(struct dc *p_dc, uint32_t phy_id)
+{
+ struct dc_link *link = NULL;
+
+ // Get Transition Bitmask from dc_link structure associated with PHY
+ for (uint8_t link_id = 0; link_id < MAX_LINKS; link_id++) {
+ if (phy_id == p_dc->links[link_id]->link_enc->transmitter) {
+ link = p_dc->links[link_id];
+ break;
+ }
+ }
+
+ return link;
}
static enum bp_result transmitter_control_v1_7(
@@ -338,12 +347,10 @@ static enum bp_result transmitter_control_v1_7(
const struct command_table_helper *cmd = bp->cmd_helper;
struct dmub_dig_transmitter_control_data_v1_7 dig_v1_7 = {0};
-#if defined(CONFIG_DRM_AMD_DC_DCN)
uint8_t hpo_instance = (uint8_t)cntl->hpo_engine_id - ENGINE_ID_HPO_0;
if (dc_is_dp_signal(cntl->signal))
hpo_instance = (uint8_t)cntl->hpo_engine_id - ENGINE_ID_HPO_DP_0;
-#endif
dig_v1_7.phyid = cmd->phy_id_to_atom(cntl->transmitter);
dig_v1_7.action = (uint8_t)cntl->action;
@@ -358,9 +365,7 @@ static enum bp_result transmitter_control_v1_7(
dig_v1_7.hpdsel = cmd->hpd_sel_to_atom(cntl->hpd_sel);
dig_v1_7.digfe_sel = cmd->dig_encoder_sel_to_atom(cntl->engine_id);
dig_v1_7.connobj_id = (uint8_t)cntl->connector_obj_id.id;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
dig_v1_7.HPO_instance = hpo_instance;
-#endif
dig_v1_7.symclk_units.symclk_10khz = cntl->pixel_clock/10;
if (cntl->action == TRANSMITTER_CONTROL_ENABLE ||
@@ -372,7 +377,38 @@ static enum bp_result transmitter_control_v1_7(
if (bp->base.ctx->dc->ctx->dmub_srv &&
bp->base.ctx->dc->debug.dmub_command_table) {
+ struct dm_process_phy_transition_init_params process_phy_transition_init_params = {0};
+ struct dc_link *link = get_link_by_phy_id(bp->base.ctx->dc, dig_v1_7.phyid);
+ bool is_phy_transition_interlock_allowed = false;
+ uint8_t action = dig_v1_7.action;
+
+ if (link) {
+ if (link->phy_transition_bitmask &&
+ (action == TRANSMITTER_CONTROL_ENABLE || action == TRANSMITTER_CONTROL_DISABLE)) {
+ is_phy_transition_interlock_allowed = true;
+
+ // Prepare input parameters for processing ACPI retimers
+ process_phy_transition_init_params.action = action;
+ process_phy_transition_init_params.display_port_lanes_count = cntl->lanes_number;
+ process_phy_transition_init_params.phy_id = dig_v1_7.phyid;
+ process_phy_transition_init_params.signal = cntl->signal;
+ process_phy_transition_init_params.sym_clock_10khz = dig_v1_7.symclk_units.symclk_10khz;
+ process_phy_transition_init_params.display_port_link_rate = link->cur_link_settings.link_rate;
+ process_phy_transition_init_params.transition_bitmask = link->phy_transition_bitmask;
+ }
+ dig_v1_7.skip_phy_ssc_reduction = link->wa_flags.skip_phy_ssc_reduction;
+ }
+
+ // Handle PRE_OFF_TO_ON: Process ACPI PHY Transition Interlock
+ if (is_phy_transition_interlock_allowed && action == TRANSMITTER_CONTROL_ENABLE)
+ dm_acpi_process_phy_transition_interlock(bp->base.ctx, process_phy_transition_init_params);
+
transmitter_control_dmcub_v1_7(bp->base.ctx->dmub_srv, &dig_v1_7);
+
+ // Handle POST_ON_TO_OFF: Process ACPI PHY Transition Interlock
+ if (is_phy_transition_interlock_allowed && action == TRANSMITTER_CONTROL_DISABLE)
+ dm_acpi_process_phy_transition_interlock(bp->base.ctx, process_phy_transition_init_params);
+
return BP_RESULT_OK;
}
@@ -417,8 +453,6 @@ static void init_set_pixel_clock(struct bios_parser *bp)
bp->cmd_tbl.set_pixel_clock = set_pixel_clock_v7;
break;
default:
- dm_output_to_console("Don't have set_pixel_clock for v%d\n",
- BIOS_CMD_TABLE_PARA_REVISION(setpixelclock));
bp->cmd_tbl.set_pixel_clock = set_pixel_clock_fallback;
break;
}
@@ -439,9 +473,7 @@ static void set_pixel_clock_dmcub(
sizeof(cmd.set_pixel_clock.header);
cmd.set_pixel_clock.pixel_clock.clk = *clk;
- dc_dmub_srv_cmd_queue(dmcub, &cmd);
- dc_dmub_srv_cmd_execute(dmcub);
- dc_dmub_srv_wait_idle(dmcub);
+ dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
static enum bp_result set_pixel_clock_v7(
@@ -565,7 +597,6 @@ static void init_set_crtc_timing(struct bios_parser *bp)
set_crtc_using_dtd_timing_v3;
break;
default:
- dm_output_to_console("Don't have set_crtc_timing for v%d\n", dtd_version);
bp->cmd_tbl.set_crtc_timing = NULL;
break;
}
@@ -682,8 +713,6 @@ static void init_enable_crtc(struct bios_parser *bp)
bp->cmd_tbl.enable_crtc = enable_crtc_v1;
break;
default:
- dm_output_to_console("Don't have enable_crtc for v%d\n",
- BIOS_CMD_TABLE_PARA_REVISION(enablecrtc));
bp->cmd_tbl.enable_crtc = NULL;
break;
}
@@ -808,9 +837,7 @@ static void enable_disp_power_gating_dmcub(
sizeof(cmd.enable_disp_power_gating.header);
cmd.enable_disp_power_gating.power_gating.pwr = *pwr;
- dc_dmub_srv_cmd_queue(dmcub, &cmd);
- dc_dmub_srv_cmd_execute(dmcub);
- dc_dmub_srv_wait_idle(dmcub);
+ dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
static enum bp_result enable_disp_power_gating_v2_1(
@@ -877,8 +904,6 @@ static void init_set_dce_clock(struct bios_parser *bp)
bp->cmd_tbl.set_dce_clock = set_dce_clock_v2_1;
break;
default:
- dm_output_to_console("Don't have set_dce_clock for v%d\n",
- BIOS_CMD_TABLE_PARA_REVISION(setdceclock));
bp->cmd_tbl.set_dce_clock = NULL;
break;
}
@@ -990,7 +1015,8 @@ static unsigned int get_smu_clock_info_v3_1(struct bios_parser *bp, uint8_t id)
static enum bp_result enable_lvtma_control(
struct bios_parser *bp,
uint8_t uc_pwr_on,
- uint8_t panel_instance);
+ uint8_t pwrseq_instance,
+ uint8_t bypass_panel_control_wait);
static void init_enable_lvtma_control(struct bios_parser *bp)
{
@@ -1002,7 +1028,8 @@ static void init_enable_lvtma_control(struct bios_parser *bp)
static void enable_lvtma_control_dmcub(
struct dc_dmub_srv *dmcub,
uint8_t uc_pwr_on,
- uint8_t panel_instance)
+ uint8_t pwrseq_instance,
+ uint8_t bypass_panel_control_wait)
{
union dmub_rb_cmd cmd;
@@ -1014,18 +1041,18 @@ static void enable_lvtma_control_dmcub(
DMUB_CMD__VBIOS_LVTMA_CONTROL;
cmd.lvtma_control.data.uc_pwr_action =
uc_pwr_on;
- cmd.lvtma_control.data.panel_inst =
- panel_instance;
- dc_dmub_srv_cmd_queue(dmcub, &cmd);
- dc_dmub_srv_cmd_execute(dmcub);
- dc_dmub_srv_wait_idle(dmcub);
-
+ cmd.lvtma_control.data.pwrseq_inst =
+ pwrseq_instance;
+ cmd.lvtma_control.data.bypass_panel_control_wait =
+ bypass_panel_control_wait;
+ dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
static enum bp_result enable_lvtma_control(
struct bios_parser *bp,
uint8_t uc_pwr_on,
- uint8_t panel_instance)
+ uint8_t pwrseq_instance,
+ uint8_t bypass_panel_control_wait)
{
enum bp_result result = BP_RESULT_FAILURE;
@@ -1033,7 +1060,8 @@ static enum bp_result enable_lvtma_control(
bp->base.ctx->dc->debug.dmub_command_table) {
enable_lvtma_control_dmcub(bp->base.ctx->dmub_srv,
uc_pwr_on,
- panel_instance);
+ pwrseq_instance,
+ bypass_panel_control_wait);
return BP_RESULT_OK;
}
return result;
@@ -1056,3 +1084,4 @@ void dal_firmware_parser_init_cmd_tbl(struct bios_parser *bp)
init_enable_lvtma_control(bp);
}
+
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.h b/drivers/gpu/drm/amd/display/dc/bios/command_table2.h
index be060b4b87db..41c8c014397f 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.h
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.h
@@ -96,7 +96,8 @@ struct cmd_tbl {
struct bios_parser *bp, uint8_t id);
enum bp_result (*enable_lvtma_control)(struct bios_parser *bp,
uint8_t uc_pwr_on,
- uint8_t panel_instance);
+ uint8_t pwrseq_instance,
+ uint8_t bypass_panel_control_wait);
};
void dal_firmware_parser_init_cmd_tbl(struct bios_parser *bp);
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c
index e317a3615147..91bc8a06e2cf 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c
@@ -293,3 +293,107 @@ uint8_t dal_cmd_table_helper_encoder_id_to_atom(
return ENCODER_OBJECT_ID_NONE;
}
}
+
+uint8_t phy_id_to_atom(enum transmitter t)
+{
+ uint8_t atom_phy_id;
+
+ switch (t) {
+ case TRANSMITTER_UNIPHY_A:
+ atom_phy_id = ATOM_PHY_ID_UNIPHYA;
+ break;
+ case TRANSMITTER_UNIPHY_B:
+ atom_phy_id = ATOM_PHY_ID_UNIPHYB;
+ break;
+ case TRANSMITTER_UNIPHY_C:
+ atom_phy_id = ATOM_PHY_ID_UNIPHYC;
+ break;
+ case TRANSMITTER_UNIPHY_D:
+ atom_phy_id = ATOM_PHY_ID_UNIPHYD;
+ break;
+ case TRANSMITTER_UNIPHY_E:
+ atom_phy_id = ATOM_PHY_ID_UNIPHYE;
+ break;
+ case TRANSMITTER_UNIPHY_F:
+ atom_phy_id = ATOM_PHY_ID_UNIPHYF;
+ break;
+ case TRANSMITTER_UNIPHY_G:
+ atom_phy_id = ATOM_PHY_ID_UNIPHYG;
+ break;
+ default:
+ atom_phy_id = ATOM_PHY_ID_UNIPHYA;
+ break;
+ }
+ return atom_phy_id;
+}
+
+uint8_t clock_source_id_to_atom_phy_clk_src_id(
+ enum clock_source_id id)
+{
+ uint8_t atom_phy_clk_src_id = 0;
+
+ switch (id) {
+ case CLOCK_SOURCE_ID_PLL0:
+ atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P0PLL;
+ break;
+ case CLOCK_SOURCE_ID_PLL1:
+ atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P1PLL;
+ break;
+ case CLOCK_SOURCE_ID_PLL2:
+ atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P2PLL;
+ break;
+ case CLOCK_SOURCE_ID_EXTERNAL:
+ atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_REFCLK_SRC_EXT;
+ break;
+ default:
+ atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P1PLL;
+ break;
+ }
+
+ return atom_phy_clk_src_id >> 2;
+}
+
+bool engine_bp_to_atom(enum engine_id id, uint32_t *atom_engine_id)
+{
+ bool result = false;
+
+ if (atom_engine_id != NULL)
+ switch (id) {
+ case ENGINE_ID_DIGA:
+ *atom_engine_id = ASIC_INT_DIG1_ENCODER_ID;
+ result = true;
+ break;
+ case ENGINE_ID_DIGB:
+ *atom_engine_id = ASIC_INT_DIG2_ENCODER_ID;
+ result = true;
+ break;
+ case ENGINE_ID_DIGC:
+ *atom_engine_id = ASIC_INT_DIG3_ENCODER_ID;
+ result = true;
+ break;
+ case ENGINE_ID_DIGD:
+ *atom_engine_id = ASIC_INT_DIG4_ENCODER_ID;
+ result = true;
+ break;
+ case ENGINE_ID_DIGE:
+ *atom_engine_id = ASIC_INT_DIG5_ENCODER_ID;
+ result = true;
+ break;
+ case ENGINE_ID_DIGF:
+ *atom_engine_id = ASIC_INT_DIG6_ENCODER_ID;
+ result = true;
+ break;
+ case ENGINE_ID_DIGG:
+ *atom_engine_id = ASIC_INT_DIG7_ENCODER_ID;
+ result = true;
+ break;
+ case ENGINE_ID_DACA:
+ *atom_engine_id = ASIC_INT_DAC1_ENCODER_ID;
+ result = true;
+ break;
+ default:
+ break;
+ }
+
+ return result;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.h b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.h
index dfd30aaf4032..547700e119a6 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.h
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.h
@@ -59,4 +59,12 @@ uint8_t dal_cmd_table_helper_transmitter_bp_to_atom(
uint8_t dal_cmd_table_helper_encoder_id_to_atom(
enum encoder_id id);
+
+uint8_t phy_id_to_atom(enum transmitter t);
+
+uint8_t clock_source_id_to_atom_phy_clk_src_id(
+ enum clock_source_id id);
+
+bool engine_bp_to_atom(enum engine_id id, uint32_t *atom_engine_id);
+
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c
index eedc553f340e..268e2414b34f 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c
@@ -65,7 +65,6 @@ bool dal_bios_parser_init_cmd_tbl_helper2(
case DCE_VERSION_12_1:
*h = dal_cmd_tbl_helper_dce112_get_table2();
return true;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
case DCN_VERSION_1_0:
case DCN_VERSION_1_01:
case DCN_VERSION_2_0:
@@ -76,12 +75,20 @@ bool dal_bios_parser_init_cmd_tbl_helper2(
case DCN_VERSION_3_02:
case DCN_VERSION_3_03:
case DCN_VERSION_3_1:
+ case DCN_VERSION_3_14:
+ case DCN_VERSION_3_15:
+ case DCN_VERSION_3_16:
+ case DCN_VERSION_3_2:
+ case DCN_VERSION_3_21:
+ case DCN_VERSION_3_5:
+ case DCN_VERSION_3_51:
+ case DCN_VERSION_3_6:
+ case DCN_VERSION_4_01:
*h = dal_cmd_tbl_helper_dce112_get_table2();
return true;
-#endif
+
default:
- /* Unsupported DCE */
- BREAK_TO_DEBUGGER();
+ *h = dal_cmd_tbl_helper_dce112_get_table2();
return false;
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/bios/dce110/command_table_helper_dce110.c b/drivers/gpu/drm/amd/display/dc/bios/dce110/command_table_helper_dce110.c
index 11bf247bb180..3099128223df 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/dce110/command_table_helper_dce110.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/dce110/command_table_helper_dce110.c
@@ -31,39 +31,6 @@
#include "../command_table_helper.h"
-static uint8_t phy_id_to_atom(enum transmitter t)
-{
- uint8_t atom_phy_id;
-
- switch (t) {
- case TRANSMITTER_UNIPHY_A:
- atom_phy_id = ATOM_PHY_ID_UNIPHYA;
- break;
- case TRANSMITTER_UNIPHY_B:
- atom_phy_id = ATOM_PHY_ID_UNIPHYB;
- break;
- case TRANSMITTER_UNIPHY_C:
- atom_phy_id = ATOM_PHY_ID_UNIPHYC;
- break;
- case TRANSMITTER_UNIPHY_D:
- atom_phy_id = ATOM_PHY_ID_UNIPHYD;
- break;
- case TRANSMITTER_UNIPHY_E:
- atom_phy_id = ATOM_PHY_ID_UNIPHYE;
- break;
- case TRANSMITTER_UNIPHY_F:
- atom_phy_id = ATOM_PHY_ID_UNIPHYF;
- break;
- case TRANSMITTER_UNIPHY_G:
- atom_phy_id = ATOM_PHY_ID_UNIPHYG;
- break;
- default:
- atom_phy_id = ATOM_PHY_ID_UNIPHYA;
- break;
- }
- return atom_phy_id;
-}
-
static uint8_t signal_type_to_atom_dig_mode(enum signal_type s)
{
uint8_t atom_dig_mode = ATOM_TRANSMITTER_DIGMODE_V5_DP;
@@ -94,32 +61,6 @@ static uint8_t signal_type_to_atom_dig_mode(enum signal_type s)
return atom_dig_mode;
}
-static uint8_t clock_source_id_to_atom_phy_clk_src_id(
- enum clock_source_id id)
-{
- uint8_t atom_phy_clk_src_id = 0;
-
- switch (id) {
- case CLOCK_SOURCE_ID_PLL0:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P0PLL;
- break;
- case CLOCK_SOURCE_ID_PLL1:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P1PLL;
- break;
- case CLOCK_SOURCE_ID_PLL2:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P2PLL;
- break;
- case CLOCK_SOURCE_ID_EXTERNAL:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_REFCLK_SRC_EXT;
- break;
- default:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P1PLL;
- break;
- }
-
- return atom_phy_clk_src_id >> 2;
-}
-
static uint8_t hpd_sel_to_atom(enum hpd_source_id id)
{
uint8_t atom_hpd_sel = 0;
@@ -207,51 +148,6 @@ static bool clock_source_id_to_atom(
return result;
}
-static bool engine_bp_to_atom(enum engine_id id, uint32_t *atom_engine_id)
-{
- bool result = false;
-
- if (atom_engine_id != NULL)
- switch (id) {
- case ENGINE_ID_DIGA:
- *atom_engine_id = ASIC_INT_DIG1_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGB:
- *atom_engine_id = ASIC_INT_DIG2_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGC:
- *atom_engine_id = ASIC_INT_DIG3_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGD:
- *atom_engine_id = ASIC_INT_DIG4_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGE:
- *atom_engine_id = ASIC_INT_DIG5_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGF:
- *atom_engine_id = ASIC_INT_DIG6_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGG:
- *atom_engine_id = ASIC_INT_DIG7_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DACA:
- *atom_engine_id = ASIC_INT_DAC1_ENCODER_ID;
- result = true;
- break;
- default:
- break;
- }
-
- return result;
-}
-
static uint8_t encoder_action_to_atom(enum bp_encoder_control_action action)
{
uint8_t atom_action = 0;
diff --git a/drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper2_dce112.c b/drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper2_dce112.c
index 755b6e33140a..349f0e5d5856 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper2_dce112.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper2_dce112.c
@@ -29,40 +29,9 @@
#include "include/bios_parser_types.h"
-#include "../command_table_helper2.h"
-
-static uint8_t phy_id_to_atom(enum transmitter t)
-{
- uint8_t atom_phy_id;
+#include "../command_table_helper.h"
- switch (t) {
- case TRANSMITTER_UNIPHY_A:
- atom_phy_id = ATOM_PHY_ID_UNIPHYA;
- break;
- case TRANSMITTER_UNIPHY_B:
- atom_phy_id = ATOM_PHY_ID_UNIPHYB;
- break;
- case TRANSMITTER_UNIPHY_C:
- atom_phy_id = ATOM_PHY_ID_UNIPHYC;
- break;
- case TRANSMITTER_UNIPHY_D:
- atom_phy_id = ATOM_PHY_ID_UNIPHYD;
- break;
- case TRANSMITTER_UNIPHY_E:
- atom_phy_id = ATOM_PHY_ID_UNIPHYE;
- break;
- case TRANSMITTER_UNIPHY_F:
- atom_phy_id = ATOM_PHY_ID_UNIPHYF;
- break;
- case TRANSMITTER_UNIPHY_G:
- atom_phy_id = ATOM_PHY_ID_UNIPHYG;
- break;
- default:
- atom_phy_id = ATOM_PHY_ID_UNIPHYA;
- break;
- }
- return atom_phy_id;
-}
+#include "../command_table_helper2.h"
static uint8_t signal_type_to_atom_dig_mode(enum signal_type s)
{
@@ -91,32 +60,6 @@ static uint8_t signal_type_to_atom_dig_mode(enum signal_type s)
return atom_dig_mode;
}
-static uint8_t clock_source_id_to_atom_phy_clk_src_id(
- enum clock_source_id id)
-{
- uint8_t atom_phy_clk_src_id = 0;
-
- switch (id) {
- case CLOCK_SOURCE_ID_PLL0:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P0PLL;
- break;
- case CLOCK_SOURCE_ID_PLL1:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P1PLL;
- break;
- case CLOCK_SOURCE_ID_PLL2:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P2PLL;
- break;
- case CLOCK_SOURCE_ID_EXTERNAL:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_REFCLK_SRC_EXT;
- break;
- default:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P1PLL;
- break;
- }
-
- return atom_phy_clk_src_id >> 2;
-}
-
static uint8_t hpd_sel_to_atom(enum hpd_source_id id)
{
uint8_t atom_hpd_sel = 0;
@@ -209,51 +152,6 @@ static bool clock_source_id_to_atom(
return result;
}
-static bool engine_bp_to_atom(enum engine_id id, uint32_t *atom_engine_id)
-{
- bool result = false;
-
- if (atom_engine_id != NULL)
- switch (id) {
- case ENGINE_ID_DIGA:
- *atom_engine_id = ASIC_INT_DIG1_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGB:
- *atom_engine_id = ASIC_INT_DIG2_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGC:
- *atom_engine_id = ASIC_INT_DIG3_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGD:
- *atom_engine_id = ASIC_INT_DIG4_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGE:
- *atom_engine_id = ASIC_INT_DIG5_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGF:
- *atom_engine_id = ASIC_INT_DIG6_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGG:
- *atom_engine_id = ASIC_INT_DIG7_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DACA:
- *atom_engine_id = ASIC_INT_DAC1_ENCODER_ID;
- result = true;
- break;
- default:
- break;
- }
-
- return result;
-}
-
static uint8_t encoder_action_to_atom(enum bp_encoder_control_action action)
{
uint8_t atom_action = 0;
diff --git a/drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper_dce112.c b/drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper_dce112.c
index 06b4f7fa4a50..1a5fefcde8af 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper_dce112.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper_dce112.c
@@ -31,39 +31,6 @@
#include "../command_table_helper.h"
-static uint8_t phy_id_to_atom(enum transmitter t)
-{
- uint8_t atom_phy_id;
-
- switch (t) {
- case TRANSMITTER_UNIPHY_A:
- atom_phy_id = ATOM_PHY_ID_UNIPHYA;
- break;
- case TRANSMITTER_UNIPHY_B:
- atom_phy_id = ATOM_PHY_ID_UNIPHYB;
- break;
- case TRANSMITTER_UNIPHY_C:
- atom_phy_id = ATOM_PHY_ID_UNIPHYC;
- break;
- case TRANSMITTER_UNIPHY_D:
- atom_phy_id = ATOM_PHY_ID_UNIPHYD;
- break;
- case TRANSMITTER_UNIPHY_E:
- atom_phy_id = ATOM_PHY_ID_UNIPHYE;
- break;
- case TRANSMITTER_UNIPHY_F:
- atom_phy_id = ATOM_PHY_ID_UNIPHYF;
- break;
- case TRANSMITTER_UNIPHY_G:
- atom_phy_id = ATOM_PHY_ID_UNIPHYG;
- break;
- default:
- atom_phy_id = ATOM_PHY_ID_UNIPHYA;
- break;
- }
- return atom_phy_id;
-}
-
static uint8_t signal_type_to_atom_dig_mode(enum signal_type s)
{
uint8_t atom_dig_mode = ATOM_TRANSMITTER_DIGMODE_V6_DP;
@@ -91,32 +58,6 @@ static uint8_t signal_type_to_atom_dig_mode(enum signal_type s)
return atom_dig_mode;
}
-static uint8_t clock_source_id_to_atom_phy_clk_src_id(
- enum clock_source_id id)
-{
- uint8_t atom_phy_clk_src_id = 0;
-
- switch (id) {
- case CLOCK_SOURCE_ID_PLL0:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P0PLL;
- break;
- case CLOCK_SOURCE_ID_PLL1:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P1PLL;
- break;
- case CLOCK_SOURCE_ID_PLL2:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P2PLL;
- break;
- case CLOCK_SOURCE_ID_EXTERNAL:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_REFCLK_SRC_EXT;
- break;
- default:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P1PLL;
- break;
- }
-
- return atom_phy_clk_src_id >> 2;
-}
-
static uint8_t hpd_sel_to_atom(enum hpd_source_id id)
{
uint8_t atom_hpd_sel = 0;
@@ -209,51 +150,6 @@ static bool clock_source_id_to_atom(
return result;
}
-static bool engine_bp_to_atom(enum engine_id id, uint32_t *atom_engine_id)
-{
- bool result = false;
-
- if (atom_engine_id != NULL)
- switch (id) {
- case ENGINE_ID_DIGA:
- *atom_engine_id = ASIC_INT_DIG1_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGB:
- *atom_engine_id = ASIC_INT_DIG2_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGC:
- *atom_engine_id = ASIC_INT_DIG3_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGD:
- *atom_engine_id = ASIC_INT_DIG4_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGE:
- *atom_engine_id = ASIC_INT_DIG5_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGF:
- *atom_engine_id = ASIC_INT_DIG6_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGG:
- *atom_engine_id = ASIC_INT_DIG7_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DACA:
- *atom_engine_id = ASIC_INT_DAC1_ENCODER_ID;
- result = true;
- break;
- default:
- break;
- }
-
- return result;
-}
-
static uint8_t encoder_action_to_atom(enum bp_encoder_control_action action)
{
uint8_t atom_action = 0;
diff --git a/drivers/gpu/drm/amd/display/dc/bios/dce60/command_table_helper_dce60.c b/drivers/gpu/drm/amd/display/dc/bios/dce60/command_table_helper_dce60.c
index 710221b4f5c5..01ccc803040c 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/dce60/command_table_helper_dce60.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/dce60/command_table_helper_dce60.c
@@ -58,51 +58,6 @@ static uint8_t encoder_action_to_atom(enum bp_encoder_control_action action)
return atom_action;
}
-static bool engine_bp_to_atom(enum engine_id id, uint32_t *atom_engine_id)
-{
- bool result = false;
-
- if (atom_engine_id != NULL)
- switch (id) {
- case ENGINE_ID_DIGA:
- *atom_engine_id = ASIC_INT_DIG1_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGB:
- *atom_engine_id = ASIC_INT_DIG2_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGC:
- *atom_engine_id = ASIC_INT_DIG3_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGD:
- *atom_engine_id = ASIC_INT_DIG4_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGE:
- *atom_engine_id = ASIC_INT_DIG5_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGF:
- *atom_engine_id = ASIC_INT_DIG6_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGG:
- *atom_engine_id = ASIC_INT_DIG7_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DACA:
- *atom_engine_id = ASIC_INT_DAC1_ENCODER_ID;
- result = true;
- break;
- default:
- break;
- }
-
- return result;
-}
-
static bool clock_source_id_to_atom(
enum clock_source_id id,
uint32_t *atom_pll_id)
@@ -149,32 +104,6 @@ static bool clock_source_id_to_atom(
return result;
}
-static uint8_t clock_source_id_to_atom_phy_clk_src_id(
- enum clock_source_id id)
-{
- uint8_t atom_phy_clk_src_id = 0;
-
- switch (id) {
- case CLOCK_SOURCE_ID_PLL0:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P0PLL;
- break;
- case CLOCK_SOURCE_ID_PLL1:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P1PLL;
- break;
- case CLOCK_SOURCE_ID_PLL2:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P2PLL;
- break;
- case CLOCK_SOURCE_ID_EXTERNAL:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_REFCLK_SRC_EXT;
- break;
- default:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P1PLL;
- break;
- }
-
- return atom_phy_clk_src_id >> 2;
-}
-
static uint8_t signal_type_to_atom_dig_mode(enum signal_type s)
{
uint8_t atom_dig_mode = ATOM_TRANSMITTER_DIGMODE_V5_DP;
@@ -270,39 +199,6 @@ static uint8_t dig_encoder_sel_to_atom(enum engine_id id)
return atom_dig_encoder_sel;
}
-static uint8_t phy_id_to_atom(enum transmitter t)
-{
- uint8_t atom_phy_id;
-
- switch (t) {
- case TRANSMITTER_UNIPHY_A:
- atom_phy_id = ATOM_PHY_ID_UNIPHYA;
- break;
- case TRANSMITTER_UNIPHY_B:
- atom_phy_id = ATOM_PHY_ID_UNIPHYB;
- break;
- case TRANSMITTER_UNIPHY_C:
- atom_phy_id = ATOM_PHY_ID_UNIPHYC;
- break;
- case TRANSMITTER_UNIPHY_D:
- atom_phy_id = ATOM_PHY_ID_UNIPHYD;
- break;
- case TRANSMITTER_UNIPHY_E:
- atom_phy_id = ATOM_PHY_ID_UNIPHYE;
- break;
- case TRANSMITTER_UNIPHY_F:
- atom_phy_id = ATOM_PHY_ID_UNIPHYF;
- break;
- case TRANSMITTER_UNIPHY_G:
- atom_phy_id = ATOM_PHY_ID_UNIPHYG;
- break;
- default:
- atom_phy_id = ATOM_PHY_ID_UNIPHYA;
- break;
- }
- return atom_phy_id;
-}
-
static uint8_t disp_power_gating_action_to_atom(
enum bp_pipe_control_action action)
{
diff --git a/drivers/gpu/drm/amd/display/dc/bios/dce80/command_table_helper_dce80.c b/drivers/gpu/drm/amd/display/dc/bios/dce80/command_table_helper_dce80.c
index 8b30b558cf1f..2ec5264536c7 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/dce80/command_table_helper_dce80.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/dce80/command_table_helper_dce80.c
@@ -58,51 +58,6 @@ static uint8_t encoder_action_to_atom(enum bp_encoder_control_action action)
return atom_action;
}
-static bool engine_bp_to_atom(enum engine_id id, uint32_t *atom_engine_id)
-{
- bool result = false;
-
- if (atom_engine_id != NULL)
- switch (id) {
- case ENGINE_ID_DIGA:
- *atom_engine_id = ASIC_INT_DIG1_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGB:
- *atom_engine_id = ASIC_INT_DIG2_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGC:
- *atom_engine_id = ASIC_INT_DIG3_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGD:
- *atom_engine_id = ASIC_INT_DIG4_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGE:
- *atom_engine_id = ASIC_INT_DIG5_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGF:
- *atom_engine_id = ASIC_INT_DIG6_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGG:
- *atom_engine_id = ASIC_INT_DIG7_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DACA:
- *atom_engine_id = ASIC_INT_DAC1_ENCODER_ID;
- result = true;
- break;
- default:
- break;
- }
-
- return result;
-}
-
static bool clock_source_id_to_atom(
enum clock_source_id id,
uint32_t *atom_pll_id)
@@ -149,32 +104,6 @@ static bool clock_source_id_to_atom(
return result;
}
-static uint8_t clock_source_id_to_atom_phy_clk_src_id(
- enum clock_source_id id)
-{
- uint8_t atom_phy_clk_src_id = 0;
-
- switch (id) {
- case CLOCK_SOURCE_ID_PLL0:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P0PLL;
- break;
- case CLOCK_SOURCE_ID_PLL1:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P1PLL;
- break;
- case CLOCK_SOURCE_ID_PLL2:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P2PLL;
- break;
- case CLOCK_SOURCE_ID_EXTERNAL:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_REFCLK_SRC_EXT;
- break;
- default:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P1PLL;
- break;
- }
-
- return atom_phy_clk_src_id >> 2;
-}
-
static uint8_t signal_type_to_atom_dig_mode(enum signal_type s)
{
uint8_t atom_dig_mode = ATOM_TRANSMITTER_DIGMODE_V5_DP;
@@ -270,39 +199,6 @@ static uint8_t dig_encoder_sel_to_atom(enum engine_id id)
return atom_dig_encoder_sel;
}
-static uint8_t phy_id_to_atom(enum transmitter t)
-{
- uint8_t atom_phy_id;
-
- switch (t) {
- case TRANSMITTER_UNIPHY_A:
- atom_phy_id = ATOM_PHY_ID_UNIPHYA;
- break;
- case TRANSMITTER_UNIPHY_B:
- atom_phy_id = ATOM_PHY_ID_UNIPHYB;
- break;
- case TRANSMITTER_UNIPHY_C:
- atom_phy_id = ATOM_PHY_ID_UNIPHYC;
- break;
- case TRANSMITTER_UNIPHY_D:
- atom_phy_id = ATOM_PHY_ID_UNIPHYD;
- break;
- case TRANSMITTER_UNIPHY_E:
- atom_phy_id = ATOM_PHY_ID_UNIPHYE;
- break;
- case TRANSMITTER_UNIPHY_F:
- atom_phy_id = ATOM_PHY_ID_UNIPHYF;
- break;
- case TRANSMITTER_UNIPHY_G:
- atom_phy_id = ATOM_PHY_ID_UNIPHYG;
- break;
- default:
- atom_phy_id = ATOM_PHY_ID_UNIPHYA;
- break;
- }
- return atom_phy_id;
-}
-
static uint8_t disp_power_gating_action_to_atom(
enum bp_pipe_control_action action)
{
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
deleted file mode 100644
index f3c00f479e1c..000000000000
--- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile
+++ /dev/null
@@ -1,68 +0,0 @@
-#
-# Copyright 2017 Advanced Micro Devices, Inc.
-# Copyright 2019 Raptor Engineering, LLC
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-#
-# Makefile for the 'calcs' sub-component of DAL.
-# It calculates Bandwidth and Watermarks values for HW programming
-#
-
-ifdef CONFIG_X86
-calcs_ccflags := -mhard-float -msse
-endif
-
-ifdef CONFIG_PPC64
-calcs_ccflags := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
-IS_OLD_GCC = 1
-endif
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-calcs_ccflags += -mpreferred-stack-boundary=4
-else
-calcs_ccflags += -msse2
-endif
-endif
-
-CFLAGS_$(AMDDALPATH)/dc/calcs/dcn_calcs.o := $(calcs_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/calcs/dcn_calc_auto.o := $(calcs_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/calcs/dcn_calc_math.o := $(calcs_ccflags) -Wno-tautological-compare
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/calcs/dcn_calcs.o := $(calcs_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/calcs/dcn_calc_auto.o := $(calcs_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/calcs/dcn_calc_math.o := $(calcs_rcflags)
-
-BW_CALCS = dce_calcs.o bw_fixed.o custom_float.o
-
-ifdef CONFIG_DRM_AMD_DC_DCN
-BW_CALCS += dcn_calcs.o dcn_calc_math.o dcn_calc_auto.o
-endif
-
-AMD_DAL_BW_CALCS = $(addprefix $(AMDDALPATH)/dc/calcs/,$(BW_CALCS))
-
-AMD_DISPLAY_FILES += $(AMD_DAL_BW_CALCS)
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/custom_float.c b/drivers/gpu/drm/amd/display/dc/calcs/custom_float.c
deleted file mode 100644
index 31d167bc548f..000000000000
--- a/drivers/gpu/drm/amd/display/dc/calcs/custom_float.c
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * Copyright 2017 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-#include "dm_services.h"
-#include "custom_float.h"
-
-
-static bool build_custom_float(
- struct fixed31_32 value,
- const struct custom_float_format *format,
- bool *negative,
- uint32_t *mantissa,
- uint32_t *exponenta)
-{
- uint32_t exp_offset = (1 << (format->exponenta_bits - 1)) - 1;
-
- const struct fixed31_32 mantissa_constant_plus_max_fraction =
- dc_fixpt_from_fraction(
- (1LL << (format->mantissa_bits + 1)) - 1,
- 1LL << format->mantissa_bits);
-
- struct fixed31_32 mantiss;
-
- if (dc_fixpt_eq(
- value,
- dc_fixpt_zero)) {
- *negative = false;
- *mantissa = 0;
- *exponenta = 0;
- return true;
- }
-
- if (dc_fixpt_lt(
- value,
- dc_fixpt_zero)) {
- *negative = format->sign;
- value = dc_fixpt_neg(value);
- } else {
- *negative = false;
- }
-
- if (dc_fixpt_lt(
- value,
- dc_fixpt_one)) {
- uint32_t i = 1;
-
- do {
- value = dc_fixpt_shl(value, 1);
- ++i;
- } while (dc_fixpt_lt(
- value,
- dc_fixpt_one));
-
- --i;
-
- if (exp_offset <= i) {
- *mantissa = 0;
- *exponenta = 0;
- return true;
- }
-
- *exponenta = exp_offset - i;
- } else if (dc_fixpt_le(
- mantissa_constant_plus_max_fraction,
- value)) {
- uint32_t i = 1;
-
- do {
- value = dc_fixpt_shr(value, 1);
- ++i;
- } while (dc_fixpt_lt(
- mantissa_constant_plus_max_fraction,
- value));
-
- *exponenta = exp_offset + i - 1;
- } else {
- *exponenta = exp_offset;
- }
-
- mantiss = dc_fixpt_sub(
- value,
- dc_fixpt_one);
-
- if (dc_fixpt_lt(
- mantiss,
- dc_fixpt_zero) ||
- dc_fixpt_lt(
- dc_fixpt_one,
- mantiss))
- mantiss = dc_fixpt_zero;
- else
- mantiss = dc_fixpt_shl(
- mantiss,
- format->mantissa_bits);
-
- *mantissa = dc_fixpt_floor(mantiss);
-
- return true;
-}
-
-static bool setup_custom_float(
- const struct custom_float_format *format,
- bool negative,
- uint32_t mantissa,
- uint32_t exponenta,
- uint32_t *result)
-{
- uint32_t i = 0;
- uint32_t j = 0;
-
- uint32_t value = 0;
-
- /* verification code:
- * once calculation is ok we can remove it
- */
-
- const uint32_t mantissa_mask =
- (1 << (format->mantissa_bits + 1)) - 1;
-
- const uint32_t exponenta_mask =
- (1 << (format->exponenta_bits + 1)) - 1;
-
- if (mantissa & ~mantissa_mask) {
- BREAK_TO_DEBUGGER();
- mantissa = mantissa_mask;
- }
-
- if (exponenta & ~exponenta_mask) {
- BREAK_TO_DEBUGGER();
- exponenta = exponenta_mask;
- }
-
- /* end of verification code */
-
- while (i < format->mantissa_bits) {
- uint32_t mask = 1 << i;
-
- if (mantissa & mask)
- value |= mask;
-
- ++i;
- }
-
- while (j < format->exponenta_bits) {
- uint32_t mask = 1 << j;
-
- if (exponenta & mask)
- value |= mask << i;
-
- ++j;
- }
-
- if (negative && format->sign)
- value |= 1 << (i + j);
-
- *result = value;
-
- return true;
-}
-
-bool convert_to_custom_float_format(
- struct fixed31_32 value,
- const struct custom_float_format *format,
- uint32_t *result)
-{
- uint32_t mantissa;
- uint32_t exponenta;
- bool negative;
-
- return build_custom_float(
- value, format, &negative, &mantissa, &exponenta) &&
- setup_custom_float(
- format, negative, mantissa, exponenta, result);
-}
-
-
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
index 6bd73e49a6d2..60021671b386 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
@@ -74,7 +74,7 @@ CLK_MGR_DCE120 = dce120_clk_mgr.o
AMD_DAL_CLK_MGR_DCE120 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dce120/,$(CLK_MGR_DCE120))
AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCE120)
-ifdef CONFIG_DRM_AMD_DC_DCN
+ifdef CONFIG_DRM_AMD_DC_FP
###############################################################################
# DCN10
###############################################################################
@@ -83,7 +83,6 @@ CLK_MGR_DCN10 = rv1_clk_mgr.o rv1_clk_mgr_vbios_smu.o rv2_clk_mgr.o
AMD_DAL_CLK_MGR_DCN10 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn10/,$(CLK_MGR_DCN10))
AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN10)
-
###############################################################################
# DCN20
###############################################################################
@@ -107,25 +106,13 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN201)
###############################################################################
CLK_MGR_DCN21 = rn_clk_mgr.o rn_clk_mgr_vbios_smu.o
-# prevent build errors regarding soft-float vs hard-float FP ABI tags
-# this code is currently unused on ppc64, as it applies to Renoir APUs only
-ifdef CONFIG_PPC64
-CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn21/rn_clk_mgr.o := $(call cc-option,-mno-gnu-attribute)
-endif
-
AMD_DAL_CLK_MGR_DCN21 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn21/,$(CLK_MGR_DCN21))
AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21)
###############################################################################
# DCN30
###############################################################################
-CLK_MGR_DCN30 = dcn30_clk_mgr.o dcn30_clk_mgr_smu_msg.o
-
-# prevent build errors regarding soft-float vs hard-float FP ABI tags
-# this code is currently unused on ppc64, as it applies to VanGogh APUs only
-ifdef CONFIG_PPC64
-CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn30/dcn30_clk_mgr.o := $(call cc-option,-mno-gnu-attribute)
-endif
+CLK_MGR_DCN30 = dcn30_clk_mgr.o dcn30_clk_mgr_smu_msg.o dcn30m_clk_mgr.o dcn30m_clk_mgr_smu_msg.o
AMD_DAL_CLK_MGR_DCN30 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn30/,$(CLK_MGR_DCN30))
@@ -135,12 +122,6 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN30)
###############################################################################
CLK_MGR_DCN301 = vg_clk_mgr.o dcn301_smu.o
-# prevent build errors regarding soft-float vs hard-float FP ABI tags
-# this code is currently unused on ppc64, as it applies to VanGogh APUs only
-ifdef CONFIG_PPC64
-CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn301/vg_clk_mgr.o := $(call cc-option,-mno-gnu-attribute)
-endif
-
AMD_DAL_CLK_MGR_DCN301 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn301/,$(CLK_MGR_DCN301))
AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN301)
@@ -153,4 +134,58 @@ CLK_MGR_DCN31 = dcn31_smu.o dcn31_clk_mgr.o
AMD_DAL_CLK_MGR_DCN31 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn31/,$(CLK_MGR_DCN31))
AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN31)
+
+###############################################################################
+# DCN314
+###############################################################################
+CLK_MGR_DCN314 = dcn314_smu.o dcn314_clk_mgr.o
+
+AMD_DAL_CLK_MGR_DCN314 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn314/,$(CLK_MGR_DCN314))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN314)
+
+###############################################################################
+# DCN315
+###############################################################################
+CLK_MGR_DCN315 = dcn315_smu.o dcn315_clk_mgr.o
+
+AMD_DAL_CLK_MGR_DCN315 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn315/,$(CLK_MGR_DCN315))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN315)
+
+###############################################################################
+# DCN316
+###############################################################################
+CLK_MGR_DCN316 = dcn316_smu.o dcn316_clk_mgr.o
+
+AMD_DAL_CLK_MGR_DCN316 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn316/,$(CLK_MGR_DCN316))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN316)
+
+###############################################################################
+# DCN32
+###############################################################################
+CLK_MGR_DCN32 = dcn32_clk_mgr.o dcn32_clk_mgr_smu_msg.o
+
+AMD_DAL_CLK_MGR_DCN32 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn32/,$(CLK_MGR_DCN32))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN32)
+
+###############################################################################
+# DCN35
+###############################################################################
+CLK_MGR_DCN35 = dcn35_smu.o dcn351_clk_mgr.o dcn35_clk_mgr.o
+
+AMD_DAL_CLK_MGR_DCN35 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn35/,$(CLK_MGR_DCN35))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN35)
+
+###############################################################################
+# DCN401
+###############################################################################
+CLK_MGR_DCN401 = dcn401_clk_mgr.o dcn401_clk_mgr_smu_msg.o
+
+AMD_DAL_CLK_MGR_DCN401 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn401/,$(CLK_MGR_DCN401))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN401)
endif
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
index 26f96ee32472..15cf13ec5302 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
@@ -23,12 +23,12 @@
*
*/
-#include <linux/slab.h>
-
#include "dal_asic_id.h"
#include "dc_types.h"
#include "dccg.h"
#include "clk_mgr_internal.h"
+#include "dc_state_priv.h"
+#include "link_service.h"
#include "dce100/dce_clk_mgr.h"
#include "dce110/dce110_clk_mgr.h"
@@ -43,7 +43,12 @@
#include "dcn30/dcn30_clk_mgr.h"
#include "dcn301/vg_clk_mgr.h"
#include "dcn31/dcn31_clk_mgr.h"
-
+#include "dcn314/dcn314_clk_mgr.h"
+#include "dcn315/dcn315_clk_mgr.h"
+#include "dcn316/dcn316_clk_mgr.h"
+#include "dcn32/dcn32_clk_mgr.h"
+#include "dcn35/dcn35_clk_mgr.h"
+#include "dcn401/dcn401_clk_mgr.h"
int clk_mgr_helper_get_active_display_cnt(
struct dc *dc,
@@ -54,14 +59,15 @@ int clk_mgr_helper_get_active_display_cnt(
display_count = 0;
for (i = 0; i < context->stream_count; i++) {
const struct dc_stream_state *stream = context->streams[i];
+ const struct dc_stream_status *stream_status = &context->stream_status[i];
- /*
- * Only notify active stream or virtual stream.
- * Need to notify virtual stream to work around
- * headless case. HPD does not fire when system is in
- * S0i2.
+ /* Don't count SubVP phantom pipes as part of active
+ * display count
*/
- if (!stream->dpms_off || stream->signal == SIGNAL_TYPE_VIRTUAL)
+ if (dc_state_get_stream_subvp_type(context, stream) == SUBVP_PHANTOM)
+ continue;
+
+ if (!stream->dpms_off || dc->is_switch_in_progress_dest || (stream_status && stream_status->plane_count))
display_count++;
}
@@ -94,7 +100,7 @@ void clk_mgr_exit_optimized_pwr_state(const struct dc *dc, struct clk_mgr *clk_m
int edp_num;
unsigned int panel_inst;
- get_edp_links(dc, edp_links, &edp_num);
+ dc_get_edp_links(dc, edp_links, &edp_num);
if (dc->hwss.exit_optimized_pwr_state)
dc->hwss.exit_optimized_pwr_state(dc, dc->current_state);
@@ -106,7 +112,8 @@ void clk_mgr_exit_optimized_pwr_state(const struct dc *dc, struct clk_mgr *clk_m
if (!edp_link->psr_settings.psr_feature_enabled)
continue;
clk_mgr->psr_allow_active_cache = edp_link->psr_settings.psr_allow_active;
- dc_link_set_psr_allow_active(edp_link, &allow_active, false, false, NULL);
+ dc->link_srv->edp_set_psr_allow_active(edp_link, &allow_active, false, false, NULL);
+ dc->link_srv->edp_set_replay_allow_active(edp_link, &allow_active, false, false, NULL);
}
}
@@ -119,13 +126,15 @@ void clk_mgr_optimize_pwr_state(const struct dc *dc, struct clk_mgr *clk_mgr)
int edp_num;
unsigned int panel_inst;
- get_edp_links(dc, edp_links, &edp_num);
+ dc_get_edp_links(dc, edp_links, &edp_num);
if (edp_num) {
for (panel_inst = 0; panel_inst < edp_num; panel_inst++) {
edp_link = edp_links[panel_inst];
if (!edp_link->psr_settings.psr_feature_enabled)
continue;
- dc_link_set_psr_allow_active(edp_link,
+ dc->link_srv->edp_set_psr_allow_active(edp_link,
+ &clk_mgr->psr_allow_active_cache, false, false, NULL);
+ dc->link_srv->edp_set_replay_allow_active(edp_link,
&clk_mgr->psr_allow_active_cache, false, false, NULL);
}
}
@@ -149,7 +158,6 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p
return NULL;
}
dce60_clk_mgr_construct(ctx, clk_mgr);
- dce_clk_mgr_construct(ctx, clk_mgr);
return &clk_mgr->base;
}
#endif
@@ -211,7 +219,7 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p
dce120_clk_mgr_construct(ctx, clk_mgr);
return &clk_mgr->base;
}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
+#if defined(CONFIG_DRM_AMD_DC_FP)
case FAMILY_RV: {
struct clk_mgr_internal *clk_mgr = kzalloc(sizeof(*clk_mgr), GFP_KERNEL);
@@ -259,7 +267,7 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p
dcn3_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
return &clk_mgr->base;
}
- if (asic_id.chip_id == DEVICE_ID_NV_13FE) {
+ if (ctx->dce_version == DCN_VERSION_2_01) {
dcn201_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
return &clk_mgr->base;
}
@@ -278,6 +286,7 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p
return &clk_mgr->base.base;
}
break;
+
case FAMILY_YELLOW_CARP: {
struct clk_mgr_dcn31 *clk_mgr = kzalloc(sizeof(*clk_mgr), GFP_KERNEL);
@@ -289,8 +298,83 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p
dcn31_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
return &clk_mgr->base.base;
}
-#endif
+ break;
+ case AMDGPU_FAMILY_GC_10_3_6: {
+ struct clk_mgr_dcn315 *clk_mgr = kzalloc(sizeof(*clk_mgr), GFP_KERNEL);
+
+ if (clk_mgr == NULL) {
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+
+ dcn315_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
+ return &clk_mgr->base.base;
+ }
+ break;
+ case AMDGPU_FAMILY_GC_10_3_7: {
+ struct clk_mgr_dcn316 *clk_mgr = kzalloc(sizeof(*clk_mgr), GFP_KERNEL);
+
+ if (clk_mgr == NULL) {
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+
+ dcn316_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
+ return &clk_mgr->base.base;
+ }
+ break;
+ case AMDGPU_FAMILY_GC_11_0_0: {
+ struct clk_mgr_internal *clk_mgr = kzalloc(sizeof(*clk_mgr), GFP_KERNEL);
+ if (clk_mgr == NULL) {
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+ dcn32_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
+ return &clk_mgr->base;
+ }
+
+ case AMDGPU_FAMILY_GC_11_0_1: {
+ struct clk_mgr_dcn314 *clk_mgr = kzalloc(sizeof(*clk_mgr), GFP_KERNEL);
+
+ if (clk_mgr == NULL) {
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+
+ dcn314_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
+ return &clk_mgr->base.base;
+ }
+ break;
+
+ case AMDGPU_FAMILY_GC_11_5_0: {
+ struct clk_mgr_dcn35 *clk_mgr = kzalloc(sizeof(*clk_mgr), GFP_KERNEL);
+
+ if (clk_mgr == NULL) {
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+ if (ctx->dce_version == DCN_VERSION_3_51)
+ dcn351_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
+ else
+ dcn35_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
+
+ return &clk_mgr->base.base;
+ }
+ break;
+
+ case AMDGPU_FAMILY_GC_12_0_0: {
+ struct clk_mgr_internal *clk_mgr = dcn401_clk_mgr_construct(ctx, dccg);
+
+ if (clk_mgr == NULL) {
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+
+ return &clk_mgr->base;
+ }
+ break;
+#endif /* CONFIG_DRM_AMD_DC_FP */
default:
ASSERT(0); /* Unknown Asic */
break;
@@ -303,13 +387,12 @@ void dc_destroy_clk_mgr(struct clk_mgr *clk_mgr_base)
{
struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
-#ifdef CONFIG_DRM_AMD_DC_DCN
+#ifdef CONFIG_DRM_AMD_DC_FP
switch (clk_mgr_base->ctx->asic_id.chip_family) {
case FAMILY_NV:
if (ASICREV_IS_SIENNA_CICHLID_P(clk_mgr_base->ctx->asic_id.hw_internal_rev)) {
dcn3_clk_mgr_destroy(clk_mgr);
- }
- if (ASICREV_IS_DIMGREY_CAVEFISH_P(clk_mgr_base->ctx->asic_id.hw_internal_rev)) {
+ } else if (ASICREV_IS_DIMGREY_CAVEFISH_P(clk_mgr_base->ctx->asic_id.hw_internal_rev)) {
dcn3_clk_mgr_destroy(clk_mgr);
}
if (ASICREV_IS_BEIGE_GOBY_P(clk_mgr_base->ctx->asic_id.hw_internal_rev)) {
@@ -323,13 +406,36 @@ void dc_destroy_clk_mgr(struct clk_mgr *clk_mgr_base)
break;
case FAMILY_YELLOW_CARP:
- dcn31_clk_mgr_destroy(clk_mgr);
+ dcn31_clk_mgr_destroy(clk_mgr);
+ break;
+
+ case AMDGPU_FAMILY_GC_10_3_6:
+ dcn315_clk_mgr_destroy(clk_mgr);
+ break;
+
+ case AMDGPU_FAMILY_GC_10_3_7:
+ dcn316_clk_mgr_destroy(clk_mgr);
+ break;
+
+ case AMDGPU_FAMILY_GC_11_0_0:
+ dcn32_clk_mgr_destroy(clk_mgr);
+ break;
+
+ case AMDGPU_FAMILY_GC_11_0_1:
+ dcn314_clk_mgr_destroy(clk_mgr);
+ break;
+
+ case AMDGPU_FAMILY_GC_11_5_0:
+ dcn35_clk_mgr_destroy(clk_mgr);
+ break;
+ case AMDGPU_FAMILY_GC_12_0_0:
+ dcn401_clk_mgr_destroy(clk_mgr);
break;
default:
break;
}
-#endif
+#endif /* CONFIG_DRM_AMD_DC_FP */
kfree(clk_mgr);
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c
index b210f8e9d592..6131ede2db7a 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c
@@ -72,9 +72,9 @@ static const struct state_dependent_clocks dce80_max_clks_by_state[] = {
/* ClocksStateLow */
{ .display_clk_khz = 352000, .pixel_clk_khz = 330000},
/* ClocksStateNominal */
-{ .display_clk_khz = 600000, .pixel_clk_khz = 400000 },
+{ .display_clk_khz = 625000, .pixel_clk_khz = 400000 },
/* ClocksStatePerformance */
-{ .display_clk_khz = 600000, .pixel_clk_khz = 400000 } };
+{ .display_clk_khz = 625000, .pixel_clk_khz = 400000 } };
int dentist_get_divider_from_did(int did)
{
@@ -245,6 +245,11 @@ int dce_set_clock(
pxl_clk_params.target_pixel_clock_100hz = requested_clk_khz * 10;
pxl_clk_params.pll_id = CLOCK_SOURCE_ID_DFS;
+ /* DCE 6.0, DCE 6.4: engine clock is the same as PLL0 */
+ if (clk_mgr_base->ctx->dce_version == DCE_VERSION_6_0 ||
+ clk_mgr_base->ctx->dce_version == DCE_VERSION_6_4)
+ pxl_clk_params.pll_id = CLOCK_SOURCE_ID_PLL0;
+
if (clk_mgr_dce->dfs_bypass_active)
pxl_clk_params.flags.SET_DISPCLK_DFS_BYPASS = true;
@@ -374,6 +379,8 @@ void dce_clock_read_ss_info(struct clk_mgr_internal *clk_mgr_dce)
clk_mgr_dce->dprefclk_ss_percentage =
info.spread_spectrum_percentage;
}
+ if (clk_mgr_dce->base.ctx->dc->config.ignore_dpref_ss)
+ clk_mgr_dce->dprefclk_ss_percentage = 0;
}
}
}
@@ -384,8 +391,6 @@ static void dce_pplib_apply_display_requirements(
{
struct dm_pp_display_configuration *pp_display_cfg = &context->pp_display_cfg;
- pp_display_cfg->avail_mclk_switch_time_us = dce110_get_min_vblank_time_us(context);
-
dce110_fill_display_configs(context, pp_display_cfg);
if (memcmp(&dc->current_state->pp_display_cfg, pp_display_cfg, sizeof(*pp_display_cfg)) != 0)
@@ -398,11 +403,9 @@ static void dce_update_clocks(struct clk_mgr *clk_mgr_base,
{
struct clk_mgr_internal *clk_mgr_dce = TO_CLK_MGR_INTERNAL(clk_mgr_base);
struct dm_pp_power_level_change_request level_change_req;
- int patched_disp_clk = context->bw_ctx.bw.dce.dispclk_khz;
-
- /*TODO: W/A for dal3 linux, investigate why this works */
- if (!clk_mgr_dce->dfs_bypass_active)
- patched_disp_clk = patched_disp_clk * 115 / 100;
+ const int max_disp_clk =
+ clk_mgr_dce->max_clks_by_state[DM_PP_CLOCKS_STATE_PERFORMANCE].display_clk_khz;
+ int patched_disp_clk = MIN(max_disp_clk, context->bw_ctx.bw.dce.dispclk_khz);
level_change_req.power_level = dce_get_required_clocks_state(clk_mgr_base, context);
/* get max clock state from PPLIB */
@@ -460,6 +463,9 @@ void dce_clk_mgr_construct(
clk_mgr->max_clks_state = DM_PP_CLOCKS_STATE_NOMINAL;
clk_mgr->cur_min_clks_state = DM_PP_CLOCKS_STATE_INVALID;
+ base->clks.max_supported_dispclk_khz =
+ clk_mgr->max_clks_by_state[DM_PP_CLOCKS_STATE_PERFORMANCE].display_clk_khz;
+
dce_clock_read_integrated_info(clk_mgr);
dce_clock_read_ss_info(clk_mgr);
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c
index 78df96882d6e..d50b9440210e 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c
@@ -120,9 +120,15 @@ void dce110_fill_display_configs(
const struct dc_state *context,
struct dm_pp_display_configuration *pp_display_cfg)
{
+ struct dc *dc = context->clk_mgr->ctx->dc;
int j;
int num_cfgs = 0;
+ pp_display_cfg->avail_mclk_switch_time_us = dce110_get_min_vblank_time_us(context);
+ pp_display_cfg->disp_clk_khz = dc->clk_mgr->clks.dispclk_khz;
+ pp_display_cfg->avail_mclk_switch_time_in_disp_active_us = 0;
+ pp_display_cfg->crtc_index = dc->res_pool->res_cap->num_timing_generator;
+
for (j = 0; j < context->stream_count; j++) {
int k;
@@ -158,12 +164,29 @@ void dce110_fill_display_configs(
stream->link->cur_link_settings.link_rate;
cfg->link_settings.link_spread =
stream->link->cur_link_settings.link_spread;
- cfg->sym_clock = stream->phy_pix_clk;
+ cfg->pixel_clock = stream->phy_pix_clk;
/* Round v_refresh*/
cfg->v_refresh = stream->timing.pix_clk_100hz * 100;
cfg->v_refresh /= stream->timing.h_total;
cfg->v_refresh = (cfg->v_refresh + stream->timing.v_total / 2)
/ stream->timing.v_total;
+
+ /* Find first CRTC index and calculate its line time.
+ * This is necessary for DPM on SI GPUs.
+ */
+ if (cfg->pipe_idx < pp_display_cfg->crtc_index) {
+ const struct dc_crtc_timing *timing =
+ &context->streams[0]->timing;
+
+ pp_display_cfg->crtc_index = cfg->pipe_idx;
+ pp_display_cfg->line_time_in_us =
+ timing->h_total * 10000 / timing->pix_clk_100hz;
+ }
+ }
+
+ if (!num_cfgs) {
+ pp_display_cfg->crtc_index = 0;
+ pp_display_cfg->line_time_in_us = 0;
}
pp_display_cfg->display_count = num_cfgs;
@@ -195,7 +218,7 @@ void dce11_pplib_apply_display_requirements(
* , then change minimum memory clock based on real-time bandwidth
* limitation.
*/
- if ((dc->ctx->asic_id.chip_family == FAMILY_AI) &&
+ if (dc->bw_vbios && (dc->ctx->asic_id.chip_family == FAMILY_AI) &&
ASICREV_IS_VEGA20_P(dc->ctx->asic_id.hw_internal_rev) && (context->stream_count >= 2)) {
pp_display_cfg->min_memory_clock_khz = max(pp_display_cfg->min_memory_clock_khz,
(uint32_t) div64_s64(
@@ -223,25 +246,8 @@ void dce11_pplib_apply_display_requirements(
pp_display_cfg->min_engine_clock_deep_sleep_khz
= context->bw_ctx.bw.dce.sclk_deep_sleep_khz;
- pp_display_cfg->avail_mclk_switch_time_us =
- dce110_get_min_vblank_time_us(context);
- /* TODO: dce11.2*/
- pp_display_cfg->avail_mclk_switch_time_in_disp_active_us = 0;
-
- pp_display_cfg->disp_clk_khz = dc->clk_mgr->clks.dispclk_khz;
-
dce110_fill_display_configs(context, pp_display_cfg);
- /* TODO: is this still applicable?*/
- if (pp_display_cfg->display_count == 1) {
- const struct dc_crtc_timing *timing =
- &context->streams[0]->timing;
-
- pp_display_cfg->crtc_index =
- pp_display_cfg->disp_configs[0].pipe_idx;
- pp_display_cfg->line_time_in_us = timing->h_total * 10000 / timing->pix_clk_100hz;
- }
-
if (memcmp(&dc->current_state->pp_display_cfg, pp_display_cfg, sizeof(*pp_display_cfg)) != 0)
dm_pp_apply_display_requirements(dc->ctx, pp_display_cfg);
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.c
index 934e6423dc1a..1f36ad8a7de4 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.c
@@ -111,12 +111,10 @@ int dce112_set_clock(struct clk_mgr *clk_mgr_base, int requested_clk_khz)
bp->funcs->set_dce_clock(bp, &dce_clk_params);
- if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
- if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) {
- if (clk_mgr_dce->dfs_bypass_disp_clk != actual_clock)
- dmcu->funcs->set_psr_wait_loop(dmcu,
- actual_clock / 1000 / 7);
- }
+ if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) {
+ if (clk_mgr_dce->dfs_bypass_disp_clk != actual_clock)
+ dmcu->funcs->set_psr_wait_loop(dmcu,
+ actual_clock / 1000 / 7);
}
clk_mgr_dce->dfs_bypass_disp_clk = actual_clock;
@@ -153,12 +151,10 @@ int dce112_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_clk_khz)
clk_mgr->cur_min_clks_state = DM_PP_CLOCKS_STATE_NOMINAL;
- if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
- if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) {
- if (clk_mgr->dfs_bypass_disp_clk != actual_clock)
- dmcu->funcs->set_psr_wait_loop(dmcu,
- actual_clock / 1000 / 7);
- }
+ if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) {
+ if (clk_mgr->dfs_bypass_disp_clk != actual_clock)
+ dmcu->funcs->set_psr_wait_loop(dmcu,
+ actual_clock / 1000 / 7);
}
clk_mgr->dfs_bypass_disp_clk = actual_clock;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce120/dce120_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce120/dce120_clk_mgr.c
index 5399b8cf6b75..c9ba7b3fd2c3 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce120/dce120_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce120/dce120_clk_mgr.c
@@ -30,7 +30,7 @@
#include "dce110/dce110_clk_mgr.h"
#include "dce120_clk_mgr.h"
#include "dce100/dce_clk_mgr.h"
-#include "dce120/dce120_hw_sequencer.h"
+#include "dce120/dce120_hwseq.h"
static const struct state_dependent_clocks dce120_max_clks_by_state[] = {
/*ClocksStateInvalid - should not be used*/
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c
index 0267644717b2..69dd80d9f738 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c
@@ -83,22 +83,13 @@ static const struct state_dependent_clocks dce60_max_clks_by_state[] = {
static int dce60_get_dp_ref_freq_khz(struct clk_mgr *clk_mgr_base)
{
struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
- int dprefclk_wdivider;
- int dp_ref_clk_khz;
- int target_div;
+ struct dc_context *ctx = clk_mgr_base->ctx;
+ int dp_ref_clk_khz = 0;
- /* DCE6 has no DPREFCLK_CNTL to read DP Reference Clock source */
-
- /* Read the mmDENTIST_DISPCLK_CNTL to get the currently
- * programmed DID DENTIST_DPREFCLK_WDIVIDER*/
- REG_GET(DENTIST_DISPCLK_CNTL, DENTIST_DPREFCLK_WDIVIDER, &dprefclk_wdivider);
-
- /* Convert DENTIST_DPREFCLK_WDIVIDERto actual divider*/
- target_div = dentist_get_divider_from_did(dprefclk_wdivider);
-
- /* Calculate the current DFS clock, in kHz.*/
- dp_ref_clk_khz = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
- * clk_mgr->base.dentist_vco_freq_khz) / target_div;
+ if (ASIC_REV_IS_TAHITI_P(ctx->asic_id.hw_internal_rev))
+ dp_ref_clk_khz = ctx->dc_bios->fw_info.default_display_engine_pll_frequency;
+ else
+ dp_ref_clk_khz = clk_mgr_base->clks.dispclk_khz;
return dce_adjust_dp_ref_freq_for_ss(clk_mgr, dp_ref_clk_khz);
}
@@ -109,8 +100,6 @@ static void dce60_pplib_apply_display_requirements(
{
struct dm_pp_display_configuration *pp_display_cfg = &context->pp_display_cfg;
- pp_display_cfg->avail_mclk_switch_time_us = dce110_get_min_vblank_time_us(context);
-
dce110_fill_display_configs(context, pp_display_cfg);
if (memcmp(&dc->current_state->pp_display_cfg, pp_display_cfg, sizeof(*pp_display_cfg)) != 0)
@@ -123,11 +112,9 @@ static void dce60_update_clocks(struct clk_mgr *clk_mgr_base,
{
struct clk_mgr_internal *clk_mgr_dce = TO_CLK_MGR_INTERNAL(clk_mgr_base);
struct dm_pp_power_level_change_request level_change_req;
- int patched_disp_clk = context->bw_ctx.bw.dce.dispclk_khz;
-
- /*TODO: W/A for dal3 linux, investigate why this works */
- if (!clk_mgr_dce->dfs_bypass_active)
- patched_disp_clk = patched_disp_clk * 115 / 100;
+ const int max_disp_clk =
+ clk_mgr_dce->max_clks_by_state[DM_PP_CLOCKS_STATE_PERFORMANCE].display_clk_khz;
+ int patched_disp_clk = MIN(max_disp_clk, context->bw_ctx.bw.dce.dispclk_khz);
level_change_req.power_level = dce_get_required_clocks_state(clk_mgr_base, context);
/* get max clock state from PPLIB */
@@ -160,6 +147,8 @@ void dce60_clk_mgr_construct(
struct dc_context *ctx,
struct clk_mgr_internal *clk_mgr)
{
+ struct clk_mgr *base = &clk_mgr->base;
+
dce_clk_mgr_construct(ctx, clk_mgr);
memcpy(clk_mgr->max_clks_by_state,
@@ -170,5 +159,8 @@ void dce60_clk_mgr_construct(
clk_mgr->clk_mgr_shift = &disp_clk_shift;
clk_mgr->clk_mgr_mask = &disp_clk_mask;
clk_mgr->base.funcs = &dce60_funcs;
+
+ base->clks.max_supported_dispclk_khz =
+ clk_mgr->max_clks_by_state[DM_PP_CLOCKS_STATE_PERFORMANCE].display_clk_khz;
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c
index 76ec8ec92efd..369421e46c52 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c
@@ -23,9 +23,6 @@
*
*/
-#include <linux/slab.h>
-
-#include "reg_helper.h"
#include "core_types.h"
#include "clk_mgr_internal.h"
#include "rv1_clk_mgr.h"
@@ -34,7 +31,7 @@
#include "rv1_clk_mgr_vbios_smu.h"
#include "rv1_clk_mgr_clk.h"
-void rv1_init_clocks(struct clk_mgr *clk_mgr)
+static void rv1_init_clocks(struct clk_mgr *clk_mgr)
{
memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_clk.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_clk.c
deleted file mode 100644
index 61dd12198a3c..000000000000
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_clk.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright 2012-16 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#include "reg_helper.h"
-#include "clk_mgr_internal.h"
-#include "rv1_clk_mgr_clk.h"
-
-#include "ip/Discovery/hwid.h"
-#include "ip/Discovery/v1/ip_offset_1.h"
-#include "ip/CLK/clk_10_0_default.h"
-#include "ip/CLK/clk_10_0_offset.h"
-#include "ip/CLK/clk_10_0_reg.h"
-#include "ip/CLK/clk_10_0_sh_mask.h"
-
-#include "dce100/dce_clk_mgr.h"
-
-#define CLK_BASE_INNER(inst) \
- CLK_BASE__INST ## inst ## _SEG0
-
-
-#define CLK_REG(reg_name, block, inst)\
- CLK_BASE(mm ## block ## _ ## inst ## _ ## reg_name ## _BASE_IDX) + \
- mm ## block ## _ ## inst ## _ ## reg_name
-
-#define REG(reg_name) \
- CLK_REG(reg_name, CLK0, 0)
-
-
-/* Only used by testing framework*/
-void rv1_dump_clk_registers(struct clk_state_registers *regs, struct clk_bypass *bypass, struct clk_mgr *clk_mgr_base)
-{
- struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
-
- regs->CLK0_CLK8_CURRENT_CNT = REG_READ(CLK0_CLK8_CURRENT_CNT) / 10; //dcf clk
-
- bypass->dcfclk_bypass = REG_READ(CLK0_CLK8_BYPASS_CNTL) & 0x0007;
- if (bypass->dcfclk_bypass < 0 || bypass->dcfclk_bypass > 4)
- bypass->dcfclk_bypass = 0;
-
-
- regs->CLK0_CLK8_DS_CNTL = REG_READ(CLK0_CLK8_DS_CNTL) / 10; //dcf deep sleep divider
-
- regs->CLK0_CLK8_ALLOW_DS = REG_READ(CLK0_CLK8_ALLOW_DS); //dcf deep sleep allow
-
- regs->CLK0_CLK10_CURRENT_CNT = REG_READ(CLK0_CLK10_CURRENT_CNT) / 10; //dpref clk
-
- bypass->dispclk_pypass = REG_READ(CLK0_CLK10_BYPASS_CNTL) & 0x0007;
- if (bypass->dispclk_pypass < 0 || bypass->dispclk_pypass > 4)
- bypass->dispclk_pypass = 0;
-
- regs->CLK0_CLK11_CURRENT_CNT = REG_READ(CLK0_CLK11_CURRENT_CNT) / 10; //disp clk
-
- bypass->dprefclk_bypass = REG_READ(CLK0_CLK11_BYPASS_CNTL) & 0x0007;
- if (bypass->dprefclk_bypass < 0 || bypass->dprefclk_bypass > 4)
- bypass->dprefclk_bypass = 0;
-
-}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c
index fe18bb9e19aa..d82a52319088 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c
@@ -26,7 +26,8 @@
#include "core_types.h"
#include "clk_mgr_internal.h"
#include "reg_helper.h"
-#include <linux/delay.h>
+
+#include "rv1_clk_mgr_vbios_smu.h"
#define MAX_INSTANCE 5
#define MAX_SEGMENT 5
@@ -99,7 +100,8 @@ static uint32_t rv1_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, unsi
return res_val;
}
-int rv1_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, unsigned int msg_id, unsigned int param)
+static int rv1_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
+ unsigned int msg_id, unsigned int param)
{
uint32_t result;
@@ -132,27 +134,11 @@ int rv1_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_di
VBIOSSMC_MSG_SetDispclkFreq,
khz_to_mhz_ceil(requested_dispclk_khz));
- if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
- if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) {
- if (clk_mgr->dfs_bypass_disp_clk != actual_dispclk_set_mhz)
- dmcu->funcs->set_psr_wait_loop(dmcu,
- actual_dispclk_set_mhz / 7);
- }
+ if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) {
+ if (clk_mgr->dfs_bypass_disp_clk != actual_dispclk_set_mhz)
+ dmcu->funcs->set_psr_wait_loop(dmcu,
+ actual_dispclk_set_mhz / 7);
}
return actual_dispclk_set_mhz * 1000;
}
-
-int rv1_vbios_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr)
-{
- int actual_dprefclk_set_mhz = -1;
-
- actual_dprefclk_set_mhz = rv1_vbios_smu_send_msg_with_param(
- clk_mgr,
- VBIOSSMC_MSG_SetDprefclkFreq,
- khz_to_mhz_ceil(clk_mgr->base.dprefclk_khz));
-
- /* TODO: add code for programing DP DTO, currently this is down by command table */
-
- return actual_dprefclk_set_mhz * 1000;
-}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.h
index 083cb3158859..81d7c912549c 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.h
@@ -27,6 +27,5 @@
#define DAL_DC_DCN10_RV1_CLK_MGR_VBIOS_SMU_H_
int rv1_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispclk_khz);
-int rv1_vbios_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr);
#endif /* DAL_DC_DCN10_RV1_CLK_MGR_VBIOS_SMU_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
index 2108bff49d4e..bb4f3bd7532e 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
@@ -38,7 +38,6 @@
#include "clk/clk_11_0_0_offset.h"
#include "clk/clk_11_0_0_sh_mask.h"
-#include "irq/dcn20/irq_service_dcn20.h"
#undef FN
#define FN(reg_name, field_name) \
@@ -127,16 +126,24 @@ void dcn20_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr,
void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr, struct dc_state *context)
{
- int dpp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR
- * clk_mgr->base.dentist_vco_freq_khz / clk_mgr->base.clks.dppclk_khz;
- int disp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR
- * clk_mgr->base.dentist_vco_freq_khz / clk_mgr->base.clks.dispclk_khz;
-
- uint32_t dppclk_wdivider = dentist_get_did_from_divider(dpp_divider);
- uint32_t dispclk_wdivider = dentist_get_did_from_divider(disp_divider);
+ int dpp_divider = 0;
+ int disp_divider = 0;
+ uint32_t dppclk_wdivider = 0;
+ uint32_t dispclk_wdivider = 0;
uint32_t current_dispclk_wdivider;
uint32_t i;
+ if (clk_mgr->base.clks.dppclk_khz == 0 || clk_mgr->base.clks.dispclk_khz == 0)
+ return;
+
+ dpp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+ * clk_mgr->base.dentist_vco_freq_khz / clk_mgr->base.clks.dppclk_khz;
+ disp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+ * clk_mgr->base.dentist_vco_freq_khz / clk_mgr->base.clks.dispclk_khz;
+
+ dppclk_wdivider = dentist_get_did_from_divider(dpp_divider);
+ dispclk_wdivider = dentist_get_did_from_divider(disp_divider);
+
REG_GET(DENTIST_DISPCLK_CNTL,
DENTIST_DISPCLK_WDIVIDER, &current_dispclk_wdivider);
@@ -150,7 +157,7 @@ void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr, struct
int32_t N;
int32_t j;
- if (!pipe_ctx->stream)
+ if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER))
continue;
/* Virtual encoders don't have this function */
if (!stream_enc->funcs->get_fifo_cal_average_level)
@@ -172,7 +179,7 @@ void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr, struct
} else if (dispclk_wdivider == 127 && current_dispclk_wdivider != 127) {
REG_UPDATE(DENTIST_DISPCLK_CNTL,
DENTIST_DISPCLK_WDIVIDER, 126);
- REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 50, 100);
+ REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 50, 2000);
for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
struct dccg *dccg = clk_mgr->base.ctx->dc->res_pool->dccg;
@@ -181,7 +188,7 @@ void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr, struct
int32_t N;
int32_t j;
- if (!pipe_ctx->stream)
+ if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER))
continue;
/* Virtual encoders don't have this function */
if (!stream_enc->funcs->get_fifo_cal_average_level)
@@ -199,7 +206,7 @@ void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr, struct
REG_UPDATE(DENTIST_DISPCLK_CNTL,
DENTIST_DISPCLK_WDIVIDER, dispclk_wdivider);
- REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 50, 1000);
+ REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 50, 2000);
REG_UPDATE(DENTIST_DISPCLK_CNTL,
DENTIST_DPPCLK_WDIVIDER, dppclk_wdivider);
REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DPPCLK_CHG_DONE, 1, 5, 100);
@@ -223,8 +230,6 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base,
bool force_reset = false;
bool p_state_change_support;
int total_plane_count;
- int irq_src;
- uint32_t hpd_state;
if (dc->work_arounds.skip_clock_update)
return;
@@ -242,13 +247,7 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base,
if (dc->res_pool->pp_smu)
pp_smu = &dc->res_pool->pp_smu->nv_funcs;
- for (irq_src = DC_IRQ_SOURCE_HPD1; irq_src <= DC_IRQ_SOURCE_HPD6; irq_src++) {
- hpd_state = dc_get_hpd_state_dcn20(dc->res_pool->irqs, irq_src);
- if (hpd_state)
- break;
- }
-
- if (display_count == 0 && !hpd_state)
+ if (display_count == 0)
enter_display_off = true;
if (enter_display_off == safe_to_lower) {
@@ -409,7 +408,7 @@ void dcn2_init_clocks(struct clk_mgr *clk_mgr)
clk_mgr->clks.prev_p_state_change_support = true;
}
-void dcn2_enable_pme_wa(struct clk_mgr *clk_mgr_base)
+static void dcn2_enable_pme_wa(struct clk_mgr *clk_mgr_base)
{
struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
struct pp_smu_funcs_nv *pp_smu = NULL;
@@ -445,7 +444,6 @@ void dcn2_read_clocks_from_hw_dentist(struct clk_mgr *clk_mgr_base)
clk_mgr_base->clks.dppclk_khz = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
* clk_mgr->base.dentist_vco_freq_khz) / dpp_divider;
}
-
}
void dcn2_get_clock(struct clk_mgr *clk_mgr,
@@ -505,7 +503,7 @@ static void dcn2_notify_link_rate_change(struct clk_mgr *clk_mgr_base, struct dc
clk_mgr->cur_phyclk_req_table[link->link_index] = link->cur_link_settings.link_rate * LINK_RATE_REF_FREQ_IN_KHZ;
- for (i = 0; i < MAX_PIPES * 2; i++) {
+ for (i = 0; i < MAX_LINKS; i++) {
if (clk_mgr->cur_phyclk_req_table[i] > max_phyclk_req)
max_phyclk_req = clk_mgr->cur_phyclk_req_table[i];
}
@@ -533,6 +531,11 @@ void dcn20_clk_mgr_construct(
struct pp_smu_funcs *pp_smu,
struct dccg *dccg)
{
+ int dprefclk_did;
+ int target_div;
+ uint32_t pll_req_reg;
+ struct fixed31_32 pll_req;
+
clk_mgr->base.ctx = ctx;
clk_mgr->pp_smu = pp_smu;
clk_mgr->base.funcs = &dcn2_funcs;
@@ -549,42 +552,34 @@ void dcn20_clk_mgr_construct(
clk_mgr->base.dprefclk_khz = 700000; // 700 MHz planned if VCO is 3.85 GHz, will be retrieved
- if (IS_FPGA_MAXIMUS_DC(ctx->dce_environment)) {
- dcn2_funcs.update_clocks = dcn2_update_clocks_fpga;
- clk_mgr->base.dentist_vco_freq_khz = 3850000;
+ /* DFS Slice 2 should be used for DPREFCLK */
+ dprefclk_did = REG_READ(CLK3_CLK2_DFS_CNTL);
+ /* Convert DPREFCLK DFS Slice DID to actual divider */
+ target_div = dentist_get_divider_from_did(dprefclk_did);
+ /* get FbMult value */
+ pll_req_reg = REG_READ(CLK3_CLK_PLL_REQ);
- } else {
- /* DFS Slice 2 should be used for DPREFCLK */
- int dprefclk_did = REG_READ(CLK3_CLK2_DFS_CNTL);
- /* Convert DPREFCLK DFS Slice DID to actual divider*/
- int target_div = dentist_get_divider_from_did(dprefclk_did);
-
- /* get FbMult value */
- uint32_t pll_req_reg = REG_READ(CLK3_CLK_PLL_REQ);
- struct fixed31_32 pll_req;
-
- /* set up a fixed-point number
- * this works because the int part is on the right edge of the register
- * and the frac part is on the left edge
- */
+ /* set up a fixed-point number
+ * this works because the int part is on the right edge of the register
+ * and the frac part is on the left edge
+ */
- pll_req = dc_fixpt_from_int(pll_req_reg & clk_mgr->clk_mgr_mask->FbMult_int);
- pll_req.value |= pll_req_reg & clk_mgr->clk_mgr_mask->FbMult_frac;
+ pll_req = dc_fixpt_from_int(pll_req_reg & clk_mgr->clk_mgr_mask->FbMult_int);
+ pll_req.value |= pll_req_reg & clk_mgr->clk_mgr_mask->FbMult_frac;
- /* multiply by REFCLK period */
- pll_req = dc_fixpt_mul_int(pll_req, 100000);
+ /* multiply by REFCLK period */
+ pll_req = dc_fixpt_mul_int(pll_req, 100000);
- /* integer part is now VCO frequency in kHz */
- clk_mgr->base.dentist_vco_freq_khz = dc_fixpt_floor(pll_req);
+ /* integer part is now VCO frequency in kHz */
+ clk_mgr->base.dentist_vco_freq_khz = dc_fixpt_floor(pll_req);
- /* in case we don't get a value from the register, use default */
- if (clk_mgr->base.dentist_vco_freq_khz == 0)
- clk_mgr->base.dentist_vco_freq_khz = 3850000;
+ /* in case we don't get a value from the register, use default */
+ if (clk_mgr->base.dentist_vco_freq_khz == 0)
+ clk_mgr->base.dentist_vco_freq_khz = 3850000;
- /* Calculate the DPREFCLK in kHz.*/
- clk_mgr->base.dprefclk_khz = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
- * clk_mgr->base.dentist_vco_freq_khz) / target_div;
- }
+ /* Calculate the DPREFCLK in kHz.*/
+ clk_mgr->base.dprefclk_khz = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+ * clk_mgr->base.dentist_vco_freq_khz) / target_div;
//Integrated_info table does not exist on dGPU projects so should not be referenced
//anywhere in code for dGPUs.
//Also there is no plan for now that DFS BYPASS will be used on NV10/12/14.
@@ -592,4 +587,3 @@ void dcn20_clk_mgr_construct(
dce_clock_read_ss_info(clk_mgr);
}
-
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c
index db9950244c7b..76c612ecfe3c 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c
@@ -34,8 +34,8 @@
#include "dm_services.h"
#include "cyan_skillfish_ip_offset.h"
-#include "dcn/dcn_2_0_3_offset.h"
-#include "dcn/dcn_2_0_3_sh_mask.h"
+#include "dcn/dcn_2_0_1_offset.h"
+#include "dcn/dcn_2_0_1_sh_mask.h"
#include "clk/clk_11_0_1_offset.h"
#include "clk/clk_11_0_1_sh_mask.h"
@@ -59,8 +59,6 @@
#define CTX \
clk_mgr->base.ctx
-#define DC_LOGGER \
- clk_mgr->base.ctx->logger
static const struct clk_mgr_registers clk_mgr_regs = {
CLK_COMMON_REG_LIST_DCN_201()
@@ -74,42 +72,6 @@ static const struct clk_mgr_mask clk_mgr_mask = {
CLK_COMMON_MASK_SH_LIST_DCN201_BASE(_MASK)
};
-void dcn201_update_clocks_vbios(struct clk_mgr *clk_mgr,
- struct dc_state *context,
- bool safe_to_lower)
-{
- struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
-
- bool update_dppclk = false;
- bool update_dispclk = false;
-
- if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->clks.dppclk_khz)) {
- clk_mgr->clks.dppclk_khz = new_clocks->dppclk_khz;
- update_dppclk = true;
- }
-
- if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr->clks.dispclk_khz)) {
- clk_mgr->clks.dispclk_khz = new_clocks->dispclk_khz;
- update_dispclk = true;
- }
-
- if (update_dppclk || update_dispclk) {
- struct bp_set_dce_clock_parameters dce_clk_params;
- struct dc_bios *bp = clk_mgr->ctx->dc_bios;
-
- if (update_dispclk) {
- memset(&dce_clk_params, 0, sizeof(dce_clk_params));
- dce_clk_params.target_clock_frequency = new_clocks->dispclk_khz;
- dce_clk_params.pll_id = CLOCK_SOURCE_ID_DFS;
- dce_clk_params.clock_type = DCECLOCK_TYPE_DISPLAY_CLOCK;
- bp->funcs->set_dce_clock(bp, &dce_clk_params);
- }
- /* currently there is no DCECLOCK_TYPE_DPPCLK type defined in VBIOS interface.
- * vbios program DPPCLK to the same DispCLK limitation
- */
- }
-}
-
static void dcn201_init_clocks(struct clk_mgr *clk_mgr)
{
memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
@@ -126,10 +88,8 @@ static void dcn201_update_clocks(struct clk_mgr *clk_mgr_base,
struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
struct dc *dc = clk_mgr_base->ctx->dc;
- int display_count;
bool update_dppclk = false;
bool update_dispclk = false;
- bool enter_display_off = false;
bool dpp_clock_lowered = false;
bool force_reset = false;
bool p_state_change_support;
@@ -139,17 +99,20 @@ static void dcn201_update_clocks(struct clk_mgr *clk_mgr_base,
return;
if (clk_mgr_base->clks.dispclk_khz == 0 ||
- dc->debug.force_clock_mode & 0x1) {
- force_reset = true;
+ dc->debug.force_clock_mode & 0x1) {
+ /* this is from resume or boot up, if forced_clock cfg option
+ * used, we bypass program dispclk and DPPCLK, but need set them
+ * for S3.
+ */
+
+ force_reset = true;
+ /* force_clock_mode 0x1: force reset the clock even it is the
+ * same clock as long as it is in Passive level.
+ */
dcn2_read_clocks_from_hw_dentist(clk_mgr_base);
}
- display_count = clk_mgr_helper_get_active_display_cnt(dc, context);
-
- if (display_count == 0)
- enter_display_off = true;
-
if (should_set_clock(safe_to_lower, new_clocks->phyclk_khz, clk_mgr_base->clks.phyclk_khz))
clk_mgr_base->clks.phyclk_khz = new_clocks->phyclk_khz;
@@ -193,18 +156,20 @@ static void dcn201_update_clocks(struct clk_mgr *clk_mgr_base,
if (dc->config.forced_clocks == false || (force_reset && safe_to_lower)) {
if (dpp_clock_lowered) {
+ // if clock is being lowered, increase DTO before lowering refclk
dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower);
dcn20_update_clocks_update_dentist(clk_mgr, context);
} else {
+ // if clock is being raised, increase refclk before lowering DTO
if (update_dppclk || update_dispclk)
dcn20_update_clocks_update_dentist(clk_mgr, context);
- if (new_clocks->dppclk_khz >= dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz)
- dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower);
+ // always update dtos unless clock is lowered and not safe to lower
+ dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower);
}
}
}
-struct clk_mgr_funcs dcn201_funcs = {
+static struct clk_mgr_funcs dcn201_funcs = {
.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
.update_clocks = dcn201_update_clocks,
.init_clocks = dcn201_init_clocks,
@@ -232,23 +197,17 @@ void dcn201_clk_mgr_construct(struct dc_context *ctx,
clk_mgr->dprefclk_ss_divider = 1000;
clk_mgr->ss_on_dprefclk = false;
- if (IS_FPGA_MAXIMUS_DC(ctx->dce_environment)) {
- dcn201_funcs.update_clocks = dcn2_update_clocks_fpga;
- clk_mgr->base.dprefclk_khz = 600000;
- clk_mgr->base.dentist_vco_freq_khz = 3000000;
- } else {
- clk_mgr->base.dprefclk_khz = REG_READ(CLK4_CLK2_CURRENT_CNT);
- clk_mgr->base.dprefclk_khz *= 100;
+ clk_mgr->base.dprefclk_khz = REG_READ(CLK4_CLK2_CURRENT_CNT);
+ clk_mgr->base.dprefclk_khz *= 100;
- if (clk_mgr->base.dprefclk_khz == 0)
- clk_mgr->base.dprefclk_khz = 600000;
+ if (clk_mgr->base.dprefclk_khz == 0)
+ clk_mgr->base.dprefclk_khz = 600000;
- REG_GET(CLK4_CLK_PLL_REQ, FbMult_int, &clk_mgr->base.dentist_vco_freq_khz);
- clk_mgr->base.dentist_vco_freq_khz *= 100000;
+ REG_GET(CLK4_CLK_PLL_REQ, FbMult_int, &clk_mgr->base.dentist_vco_freq_khz);
+ clk_mgr->base.dentist_vco_freq_khz *= 100000;
- if (clk_mgr->base.dentist_vco_freq_khz == 0)
- clk_mgr->base.dentist_vco_freq_khz = 3000000;
- }
+ if (clk_mgr->base.dentist_vco_freq_khz == 0)
+ clk_mgr->base.dentist_vco_freq_khz = 3000000;
if (!debug->disable_dfs_bypass && bp->integrated_info)
if (bp->integrated_info->gpu_cap_info & DFS_BYPASS_ENABLE)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
index ac2d4c4f04e4..e18097f82091 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
@@ -24,12 +24,10 @@
*/
#include "dccg.h"
-#include "clk_mgr_internal.h"
-
-
-#include "dcn20/dcn20_clk_mgr.h"
#include "rn_clk_mgr.h"
+#include "dcn20/dcn20_clk_mgr.h"
+#include "dml/dcn20/dcn20_fpu.h"
#include "dce100/dce_clk_mgr.h"
#include "rn_clk_mgr_vbios_smu.h"
@@ -42,11 +40,9 @@
#include "clk/clk_10_0_2_sh_mask.h"
#include "renoir_ip_offset.h"
-#include "irq/dcn21/irq_service_dcn21.h"
/* Constants */
-#define LPDDR_MEM_RETRAIN_LATENCY 4.977 /* Number obtained from LPDDR4 Training Counter Requirement doc */
#define SMU_VER_55_51_0 0x373300 /* SMU Version that is able to set DISPCLK below 100MHz */
/* Macros */
@@ -56,9 +52,7 @@
/* TODO: evaluate how to lower or disable all dcn clocks in screen off case */
-int rn_get_active_display_cnt_wa(
- struct dc *dc,
- struct dc_state *context)
+static int rn_get_active_display_cnt_wa(struct dc *dc, struct dc_state *context)
{
int i, display_count;
bool tmds_present = false;
@@ -77,7 +71,8 @@ int rn_get_active_display_cnt_wa(
const struct dc_link *link = dc->links[i];
/* abusing the fact that the dig and phy are coupled to see if the phy is enabled */
- if (link->link_enc->funcs->is_dig_enabled(link->link_enc))
+ if (link->link_enc->funcs->is_dig_enabled &&
+ link->link_enc->funcs->is_dig_enabled(link->link_enc))
display_count++;
}
@@ -88,13 +83,24 @@ int rn_get_active_display_cnt_wa(
return display_count;
}
-void rn_set_low_power_state(struct clk_mgr *clk_mgr_base)
+static void rn_set_low_power_state(struct clk_mgr *clk_mgr_base)
{
+ int display_count;
struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct dc *dc = clk_mgr_base->ctx->dc;
+ struct dc_state *context = dc->current_state;
+
+ if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) {
- rn_vbios_smu_set_dcn_low_power_state(clk_mgr, DCN_PWR_STATE_LOW_POWER);
- /* update power state */
- clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
+ display_count = rn_get_active_display_cnt_wa(dc, context);
+
+ /* if we can go lower, go lower */
+ if (display_count == 0) {
+ rn_vbios_smu_set_dcn_low_power_state(clk_mgr, DCN_PWR_STATE_LOW_POWER);
+ /* update power state */
+ clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
+ }
+ }
}
static void rn_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr,
@@ -113,7 +119,7 @@ static void rn_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr,
dpp_inst = clk_mgr->base.ctx->dc->res_pool->dpps[i]->inst;
dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz;
- prev_dppclk_khz = clk_mgr->dccg->pipe_dppclk_khz[i];
+ prev_dppclk_khz = clk_mgr->dccg->pipe_dppclk_khz[dpp_inst];
if (safe_to_lower || prev_dppclk_khz < dppclk_khz)
clk_mgr->dccg->funcs->update_dpp_dto(
@@ -122,7 +128,7 @@ static void rn_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr,
}
-void rn_update_clocks(struct clk_mgr *clk_mgr_base,
+static void rn_update_clocks(struct clk_mgr *clk_mgr_base,
struct dc_state *context,
bool safe_to_lower)
{
@@ -130,11 +136,9 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base,
struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
struct dc *dc = clk_mgr_base->ctx->dc;
int display_count;
- int irq_src;
bool update_dppclk = false;
bool update_dispclk = false;
bool dpp_clock_lowered = false;
- uint32_t hpd_state;
struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu;
@@ -151,14 +155,8 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base,
display_count = rn_get_active_display_cnt_wa(dc, context);
- for (irq_src = DC_IRQ_SOURCE_HPD1; irq_src <= DC_IRQ_SOURCE_HPD5; irq_src++) {
- hpd_state = dc_get_hpd_state_dcn21(dc->res_pool->irqs, irq_src);
- if (hpd_state)
- break;
- }
-
/* if we can go lower, go lower */
- if (display_count == 0 && !hpd_state) {
+ if (display_count == 0) {
rn_vbios_smu_set_dcn_low_power_state(clk_mgr, DCN_PWR_STATE_LOW_POWER);
/* update power state */
clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
@@ -322,16 +320,16 @@ static void rn_dump_clk_registers(struct clk_state_registers_and_bypass *regs_an
regs_and_bypass->dppclk = internal.CLK1_CLK1_CURRENT_CNT / 10;
regs_and_bypass->dppclk_bypass = internal.CLK1_CLK1_BYPASS_CNTL & 0x0007;
- if (regs_and_bypass->dppclk_bypass < 0 || regs_and_bypass->dppclk_bypass > 4)
+ if (regs_and_bypass->dppclk_bypass > 4)
regs_and_bypass->dppclk_bypass = 0;
regs_and_bypass->dcfclk_bypass = internal.CLK1_CLK3_BYPASS_CNTL & 0x0007;
- if (regs_and_bypass->dcfclk_bypass < 0 || regs_and_bypass->dcfclk_bypass > 4)
+ if (regs_and_bypass->dcfclk_bypass > 4)
regs_and_bypass->dcfclk_bypass = 0;
regs_and_bypass->dispclk_bypass = internal.CLK1_CLK0_BYPASS_CNTL & 0x0007;
- if (regs_and_bypass->dispclk_bypass < 0 || regs_and_bypass->dispclk_bypass > 4)
+ if (regs_and_bypass->dispclk_bypass > 4)
regs_and_bypass->dispclk_bypass = 0;
regs_and_bypass->dprefclk_bypass = internal.CLK1_CLK2_BYPASS_CNTL & 0x0007;
- if (regs_and_bypass->dprefclk_bypass < 0 || regs_and_bypass->dprefclk_bypass > 4)
+ if (regs_and_bypass->dprefclk_bypass > 4)
regs_and_bypass->dprefclk_bypass = 0;
if (log_info->enabled) {
@@ -437,25 +435,14 @@ static void rn_dump_clk_registers(struct clk_state_registers_and_bypass *regs_an
}
}
-/* This function produce translated logical clk state values*/
-void rn_get_clk_states(struct clk_mgr *clk_mgr_base, struct clk_states *s)
-{
- struct clk_state_registers_and_bypass sb = { 0 };
- struct clk_log_info log_info = { 0 };
-
- rn_dump_clk_registers(&sb, clk_mgr_base, &log_info);
-
- s->dprefclk_khz = sb.dprefclk * 1000;
-}
-
-void rn_enable_pme_wa(struct clk_mgr *clk_mgr_base)
+static void rn_enable_pme_wa(struct clk_mgr *clk_mgr_base)
{
struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
rn_vbios_smu_enable_pme_wa(clk_mgr);
}
-void rn_init_clocks(struct clk_mgr *clk_mgr)
+static void rn_init_clocks(struct clk_mgr *clk_mgr)
{
memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
// Assumption is that boot state always supports pstate
@@ -497,7 +484,8 @@ static void build_watermark_ranges(struct clk_bw_params *bw_params, struct pp_sm
ranges->reader_wm_sets[num_valid_sets].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX;
/* Modify previous watermark range to cover up to max */
- ranges->reader_wm_sets[num_valid_sets - 1].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX;
+ if (num_valid_sets > 0)
+ ranges->reader_wm_sets[num_valid_sets - 1].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX;
}
num_valid_sets++;
}
@@ -561,7 +549,7 @@ static void rn_notify_link_rate_change(struct clk_mgr *clk_mgr_base, struct dc_l
clk_mgr->cur_phyclk_req_table[link->link_index] = link->cur_link_settings.link_rate * LINK_RATE_REF_FREQ_IN_KHZ;
- for (i = 0; i < MAX_PIPES * 2; i++) {
+ for (i = 0; i < MAX_LINKS; i++) {
if (clk_mgr->cur_phyclk_req_table[i] > max_phyclk_req)
max_phyclk_req = clk_mgr->cur_phyclk_req_table[i];
}
@@ -623,228 +611,6 @@ static struct clk_bw_params rn_bw_params = {
};
-static struct wm_table ddr4_wm_table_gs = {
- .entries = {
- {
- .wm_inst = WM_A,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.72,
- .sr_exit_time_us = 7.09,
- .sr_enter_plus_exit_time_us = 8.14,
- .valid = true,
- },
- {
- .wm_inst = WM_B,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.72,
- .sr_exit_time_us = 10.12,
- .sr_enter_plus_exit_time_us = 11.48,
- .valid = true,
- },
- {
- .wm_inst = WM_C,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.72,
- .sr_exit_time_us = 10.12,
- .sr_enter_plus_exit_time_us = 11.48,
- .valid = true,
- },
- {
- .wm_inst = WM_D,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.72,
- .sr_exit_time_us = 10.12,
- .sr_enter_plus_exit_time_us = 11.48,
- .valid = true,
- },
- }
-};
-
-static struct wm_table lpddr4_wm_table_gs = {
- .entries = {
- {
- .wm_inst = WM_A,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.65333,
- .sr_exit_time_us = 5.32,
- .sr_enter_plus_exit_time_us = 6.38,
- .valid = true,
- },
- {
- .wm_inst = WM_B,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.65333,
- .sr_exit_time_us = 9.82,
- .sr_enter_plus_exit_time_us = 11.196,
- .valid = true,
- },
- {
- .wm_inst = WM_C,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.65333,
- .sr_exit_time_us = 9.89,
- .sr_enter_plus_exit_time_us = 11.24,
- .valid = true,
- },
- {
- .wm_inst = WM_D,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.65333,
- .sr_exit_time_us = 9.748,
- .sr_enter_plus_exit_time_us = 11.102,
- .valid = true,
- },
- }
-};
-
-static struct wm_table lpddr4_wm_table_with_disabled_ppt = {
- .entries = {
- {
- .wm_inst = WM_A,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.65333,
- .sr_exit_time_us = 8.32,
- .sr_enter_plus_exit_time_us = 9.38,
- .valid = true,
- },
- {
- .wm_inst = WM_B,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.65333,
- .sr_exit_time_us = 9.82,
- .sr_enter_plus_exit_time_us = 11.196,
- .valid = true,
- },
- {
- .wm_inst = WM_C,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.65333,
- .sr_exit_time_us = 9.89,
- .sr_enter_plus_exit_time_us = 11.24,
- .valid = true,
- },
- {
- .wm_inst = WM_D,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.65333,
- .sr_exit_time_us = 9.748,
- .sr_enter_plus_exit_time_us = 11.102,
- .valid = true,
- },
- }
-};
-
-static struct wm_table ddr4_wm_table_rn = {
- .entries = {
- {
- .wm_inst = WM_A,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.72,
- .sr_exit_time_us = 11.90,
- .sr_enter_plus_exit_time_us = 12.80,
- .valid = true,
- },
- {
- .wm_inst = WM_B,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.72,
- .sr_exit_time_us = 13.18,
- .sr_enter_plus_exit_time_us = 14.30,
- .valid = true,
- },
- {
- .wm_inst = WM_C,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.72,
- .sr_exit_time_us = 13.18,
- .sr_enter_plus_exit_time_us = 14.30,
- .valid = true,
- },
- {
- .wm_inst = WM_D,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.72,
- .sr_exit_time_us = 13.18,
- .sr_enter_plus_exit_time_us = 14.30,
- .valid = true,
- },
- }
-};
-
-static struct wm_table ddr4_1R_wm_table_rn = {
- .entries = {
- {
- .wm_inst = WM_A,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.72,
- .sr_exit_time_us = 13.90,
- .sr_enter_plus_exit_time_us = 14.80,
- .valid = true,
- },
- {
- .wm_inst = WM_B,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.72,
- .sr_exit_time_us = 13.90,
- .sr_enter_plus_exit_time_us = 14.80,
- .valid = true,
- },
- {
- .wm_inst = WM_C,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.72,
- .sr_exit_time_us = 13.90,
- .sr_enter_plus_exit_time_us = 14.80,
- .valid = true,
- },
- {
- .wm_inst = WM_D,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.72,
- .sr_exit_time_us = 13.90,
- .sr_enter_plus_exit_time_us = 14.80,
- .valid = true,
- },
- }
-};
-
-static struct wm_table lpddr4_wm_table_rn = {
- .entries = {
- {
- .wm_inst = WM_A,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.65333,
- .sr_exit_time_us = 7.32,
- .sr_enter_plus_exit_time_us = 8.38,
- .valid = true,
- },
- {
- .wm_inst = WM_B,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.65333,
- .sr_exit_time_us = 9.82,
- .sr_enter_plus_exit_time_us = 11.196,
- .valid = true,
- },
- {
- .wm_inst = WM_C,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.65333,
- .sr_exit_time_us = 9.89,
- .sr_enter_plus_exit_time_us = 11.24,
- .valid = true,
- },
- {
- .wm_inst = WM_D,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.65333,
- .sr_exit_time_us = 9.748,
- .sr_enter_plus_exit_time_us = 11.102,
- .valid = true,
- },
- }
-};
-
static unsigned int find_socclk_for_voltage(struct dpm_clocks *clock_table, unsigned int voltage)
{
int i;
@@ -877,7 +643,8 @@ static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params
j = -1;
- ASSERT(PP_SMU_NUM_FCLK_DPM_LEVELS <= MAX_NUM_DPM_LVL);
+ static_assert(PP_SMU_NUM_FCLK_DPM_LEVELS <= MAX_NUM_DPM_LVL,
+ "number of reported FCLK DPM levels exceed maximum");
/* Find lowest DPM, FCLK is filled in reverse order*/
@@ -924,12 +691,10 @@ static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params
/*
* WM set D will be re-purposed for memory retraining
*/
- bw_params->wm_table.entries[WM_D].pstate_latency_us = LPDDR_MEM_RETRAIN_LATENCY;
- bw_params->wm_table.entries[WM_D].wm_inst = WM_D;
- bw_params->wm_table.entries[WM_D].wm_type = WM_TYPE_RETRAINING;
- bw_params->wm_table.entries[WM_D].valid = true;
+ DC_FP_START();
+ dcn21_clk_mgr_set_bw_params_wm_table(bw_params);
+ DC_FP_END();
}
-
}
void rn_clk_mgr_construct(
@@ -942,10 +707,9 @@ void rn_clk_mgr_construct(
struct dpm_clocks clock_table = { 0 };
enum pp_smu_status status = 0;
int is_green_sardine = 0;
+ struct clk_log_info log_info = {0};
-#if defined(CONFIG_DRM_AMD_DC_DCN)
is_green_sardine = ASICREV_IS_GREEN_SARDINE(ctx->asic_id.hw_internal_rev);
-#endif
clk_mgr->base.ctx = ctx;
clk_mgr->base.funcs = &dcn21_funcs;
@@ -962,48 +726,41 @@ void rn_clk_mgr_construct(
clk_mgr->smu_ver = rn_vbios_smu_get_smu_version(clk_mgr);
- if (IS_FPGA_MAXIMUS_DC(ctx->dce_environment)) {
- dcn21_funcs.update_clocks = dcn2_update_clocks_fpga;
+ clk_mgr->periodic_retraining_disabled = rn_vbios_smu_is_periodic_retraining_disabled(clk_mgr);
+
+ /* SMU Version 55.51.0 and up no longer have an issue
+ * that needs to limit minimum dispclk */
+ if (clk_mgr->smu_ver >= SMU_VER_55_51_0)
+ debug->min_disp_clk_khz = 0;
+
+ /* TODO: Check we get what we expect during bringup */
+ clk_mgr->base.dentist_vco_freq_khz = get_vco_frequency_from_reg(clk_mgr);
+
+ /* in case we don't get a value from the register, use default */
+ if (clk_mgr->base.dentist_vco_freq_khz == 0)
clk_mgr->base.dentist_vco_freq_khz = 3600000;
- } else {
- struct clk_log_info log_info = {0};
-
- clk_mgr->periodic_retraining_disabled = rn_vbios_smu_is_periodic_retraining_disabled(clk_mgr);
-
- /* SMU Version 55.51.0 and up no longer have an issue
- * that needs to limit minimum dispclk */
- if (clk_mgr->smu_ver >= SMU_VER_55_51_0)
- debug->min_disp_clk_khz = 0;
-
- /* TODO: Check we get what we expect during bringup */
- clk_mgr->base.dentist_vco_freq_khz = get_vco_frequency_from_reg(clk_mgr);
-
- /* in case we don't get a value from the register, use default */
- if (clk_mgr->base.dentist_vco_freq_khz == 0)
- clk_mgr->base.dentist_vco_freq_khz = 3600000;
-
- if (ctx->dc_bios->integrated_info->memory_type == LpDdr4MemType) {
- if (clk_mgr->periodic_retraining_disabled) {
- rn_bw_params.wm_table = lpddr4_wm_table_with_disabled_ppt;
- } else {
- if (is_green_sardine)
- rn_bw_params.wm_table = lpddr4_wm_table_gs;
- else
- rn_bw_params.wm_table = lpddr4_wm_table_rn;
- }
+
+ if (ctx->dc_bios->integrated_info->memory_type == LpDdr4MemType) {
+ if (clk_mgr->periodic_retraining_disabled) {
+ rn_bw_params.wm_table = lpddr4_wm_table_with_disabled_ppt;
} else {
if (is_green_sardine)
- rn_bw_params.wm_table = ddr4_wm_table_gs;
- else {
- if (ctx->dc->config.is_single_rank_dimm)
- rn_bw_params.wm_table = ddr4_1R_wm_table_rn;
- else
- rn_bw_params.wm_table = ddr4_wm_table_rn;
- }
+ rn_bw_params.wm_table = lpddr4_wm_table_gs;
+ else
+ rn_bw_params.wm_table = lpddr4_wm_table_rn;
+ }
+ } else {
+ if (is_green_sardine)
+ rn_bw_params.wm_table = ddr4_wm_table_gs;
+ else {
+ if (ctx->dc->config.is_single_rank_dimm)
+ rn_bw_params.wm_table = ddr4_1R_wm_table_rn;
+ else
+ rn_bw_params.wm_table = ddr4_wm_table_rn;
}
- /* Saved clocks configured at boot for debug purposes */
- rn_dump_clk_registers(&clk_mgr->base.boot_snapshot, &clk_mgr->base, &log_info);
}
+ /* Saved clocks configured at boot for debug purposes */
+ rn_dump_clk_registers(&clk_mgr->base.boot_snapshot, &clk_mgr->base, &log_info);
clk_mgr->base.dprefclk_khz = 600000;
dce_clock_read_ss_info(clk_mgr);
@@ -1015,7 +772,7 @@ void rn_clk_mgr_construct(
status = pp_smu->rn_funcs.get_dpm_clock_table(&pp_smu->rn_funcs.pp_smu, &clock_table);
if (status == PP_SMU_RESULT_OK &&
- ctx->dc_bios && ctx->dc_bios->integrated_info) {
+ ctx->dc_bios->integrated_info) {
rn_clk_mgr_helper_populate_bw_params (clk_mgr->base.bw_params, &clock_table, ctx->dc_bios->integrated_info);
/* treat memory config as single channel if memory is asymmetrics. */
if (ctx->dc->config.is_asymmetric_memory)
@@ -1023,9 +780,8 @@ void rn_clk_mgr_construct(
}
}
- if (!IS_FPGA_MAXIMUS_DC(ctx->dce_environment) && clk_mgr->smu_ver >= 0x00371500) {
- /* enable powerfeatures when displaycount goes to 0 */
+ /* enable powerfeatures when displaycount goes to 0 */
+ if (clk_mgr->smu_ver >= 0x00371500)
rn_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(clk_mgr, !debug->disable_48mhz_pwrdwn);
- }
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h
index e4322fa5475b..f1319957e400 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h
@@ -28,6 +28,14 @@
#include "clk_mgr.h"
#include "dm_pp_smu.h"
+#include "clk_mgr_internal.h"
+
+extern struct wm_table ddr4_wm_table_gs;
+extern struct wm_table lpddr4_wm_table_gs;
+extern struct wm_table lpddr4_wm_table_with_disabled_ppt;
+extern struct wm_table ddr4_wm_table_rn;
+extern struct wm_table ddr4_1R_wm_table_rn;
+extern struct wm_table lpddr4_wm_table_rn;
struct rn_clk_registers {
uint32_t CLK1_CLK0_CURRENT_CNT; /* DPREFCLK */
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c
index 9f7eed6688c4..5a633333dbb5 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c
@@ -26,6 +26,10 @@
#include "core_types.h"
#include "clk_mgr_internal.h"
#include "reg_helper.h"
+#include "dm_helpers.h"
+
+#include "rn_clk_mgr_vbios_smu.h"
+
#include <linux/delay.h>
#include "renoir_ip_offset.h"
@@ -39,6 +43,12 @@
#define FN(reg_name, field) \
FD(reg_name##__##field)
+#include "logger_types.h"
+#undef DC_LOGGER
+#define DC_LOGGER \
+ CTX->logger
+#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
+
#define VBIOSSMC_MSG_TestMessage 0x1
#define VBIOSSMC_MSG_GetSmuVersion 0x2
#define VBIOSSMC_MSG_PowerUpGfx 0x3
@@ -86,10 +96,21 @@ static uint32_t rn_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, unsig
}
-int rn_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, unsigned int msg_id, unsigned int param)
+static int rn_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
+ unsigned int msg_id,
+ unsigned int param)
{
uint32_t result;
+ result = rn_smu_wait_for_response(clk_mgr, 10, 200000);
+
+ if (result != VBIOSSMC_Result_OK)
+ smu_print("SMU Response was not OK. SMU response after wait received is: %d\n", result);
+
+ if (result == VBIOSSMC_Status_BUSY) {
+ return -1;
+ }
+
/* First clear response register */
REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Status_BUSY);
@@ -101,7 +122,10 @@ int rn_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, unsigned
result = rn_smu_wait_for_response(clk_mgr, 10, 200000);
- ASSERT(result == VBIOSSMC_Result_OK || result == VBIOSSMC_Result_UnknownCmd);
+ if (IS_SMU_TIMEOUT(result)) {
+ ASSERT(0);
+ dm_helpers_smu_timeout(CTX, msg_id, param, 10 * 200000);
+ }
/* Actual dispclk set is returned in the parameter register */
return REG_READ(MP1_SMN_C2PMSG_83);
@@ -128,35 +152,18 @@ int rn_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dis
VBIOSSMC_MSG_SetDispclkFreq,
khz_to_mhz_ceil(requested_dispclk_khz));
- if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
- if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) {
- if (clk_mgr->dfs_bypass_disp_clk != actual_dispclk_set_mhz)
- dmcu->funcs->set_psr_wait_loop(dmcu,
- actual_dispclk_set_mhz / 7);
- }
+ if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) {
+ if (clk_mgr->dfs_bypass_disp_clk != actual_dispclk_set_mhz)
+ dmcu->funcs->set_psr_wait_loop(dmcu,
+ actual_dispclk_set_mhz / 7);
}
// pmfw always set clock more than or equal requested clock
- if (!IS_DIAG_DC(dc->ctx->dce_environment))
- ASSERT(actual_dispclk_set_mhz >= khz_to_mhz_ceil(requested_dispclk_khz));
+ ASSERT(actual_dispclk_set_mhz >= khz_to_mhz_ceil(requested_dispclk_khz));
return actual_dispclk_set_mhz * 1000;
}
-int rn_vbios_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr)
-{
- int actual_dprefclk_set_mhz = -1;
-
- actual_dprefclk_set_mhz = rn_vbios_smu_send_msg_with_param(
- clk_mgr,
- VBIOSSMC_MSG_SetDprefclkFreq,
- khz_to_mhz_ceil(clk_mgr->base.dprefclk_khz));
-
- /* TODO: add code for programing DP DTO, currently this is down by command table */
-
- return actual_dprefclk_set_mhz * 1000;
-}
-
int rn_vbios_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_dcfclk_khz)
{
int actual_dcfclk_set_mhz = -1;
@@ -198,15 +205,13 @@ void rn_vbios_smu_set_phyclk(struct clk_mgr_internal *clk_mgr, int requested_phy
int rn_vbios_smu_set_dppclk(struct clk_mgr_internal *clk_mgr, int requested_dpp_khz)
{
int actual_dppclk_set_mhz = -1;
- struct dc *dc = clk_mgr->base.ctx->dc;
actual_dppclk_set_mhz = rn_vbios_smu_send_msg_with_param(
clk_mgr,
VBIOSSMC_MSG_SetDppclkFreq,
khz_to_mhz_ceil(requested_dpp_khz));
- if (!IS_DIAG_DC(dc->ctx->dce_environment))
- ASSERT(actual_dppclk_set_mhz >= khz_to_mhz_ceil(requested_dpp_khz));
+ ASSERT(actual_dppclk_set_mhz >= khz_to_mhz_ceil(requested_dpp_khz));
return actual_dppclk_set_mhz * 1000;
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h
index 3e5df27aa96f..f76fad87f0e1 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h
@@ -26,14 +26,15 @@
#ifndef DAL_DC_RN_CLK_MGR_VBIOS_SMU_H_
#define DAL_DC_RN_CLK_MGR_VBIOS_SMU_H_
+enum dcn_pwr_state;
+
int rn_vbios_smu_get_smu_version(struct clk_mgr_internal *clk_mgr);
int rn_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispclk_khz);
-int rn_vbios_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr);
int rn_vbios_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_dcfclk_khz);
int rn_vbios_smu_set_min_deep_sleep_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_min_ds_dcfclk_khz);
void rn_vbios_smu_set_phyclk(struct clk_mgr_internal *clk_mgr, int requested_phyclk_khz);
int rn_vbios_smu_set_dppclk(struct clk_mgr_internal *clk_mgr, int requested_dpp_khz);
-void rn_vbios_smu_set_dcn_low_power_state(struct clk_mgr_internal *clk_mgr, int display_count);
+void rn_vbios_smu_set_dcn_low_power_state(struct clk_mgr_internal *clk_mgr, enum dcn_pwr_state);
void rn_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable);
void rn_vbios_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr);
int rn_vbios_smu_is_periodic_retraining_disabled(struct clk_mgr_internal *clk_mgr);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h
index fa09c594fd36..06da34676965 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h
@@ -56,6 +56,7 @@
#define DALSMC_MSG_SetDisplayRefreshFromMall 0xF
#define DALSMC_MSG_SetExternalClientDfCstateAllow 0x10
#define DALSMC_MSG_BacoAudioD3PME 0x11
-#define DALSMC_Message_Count 0x12
+#define DALSMC_MSG_SmartAccess 0x12
+#define DALSMC_Message_Count 0x13
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
index 1861a147a7fa..ef77fcd164ed 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
@@ -25,30 +25,25 @@
#include "dccg.h"
#include "clk_mgr_internal.h"
-
#include "dcn30_clk_mgr_smu_msg.h"
#include "dcn20/dcn20_clk_mgr.h"
#include "dce100/dce_clk_mgr.h"
+#include "dcn30/dcn30_clk_mgr.h"
+#include "dml/dcn30/dcn30_fpu.h"
+#include "dcn30/dcn30m_clk_mgr.h"
#include "reg_helper.h"
#include "core_types.h"
#include "dm_helpers.h"
-
#include "atomfirmware.h"
-
-
#include "sienna_cichlid_ip_offset.h"
#include "dcn/dcn_3_0_0_offset.h"
#include "dcn/dcn_3_0_0_sh_mask.h"
-
#include "nbio/nbio_7_4_offset.h"
-
-#include "dcn/dpcs_3_0_0_offset.h"
-#include "dcn/dpcs_3_0_0_sh_mask.h"
-
+#include "dpcs/dpcs_3_0_0_offset.h"
+#include "dpcs/dpcs_3_0_0_sh_mask.h"
#include "mmhub/mmhub_2_0_0_offset.h"
#include "mmhub/mmhub_2_0_0_sh_mask.h"
-/*we don't have clk folder yet*/
-#include "dcn30/dcn30_clk_mgr.h"
+#include "dcn30_smu11_driver_if.h"
#undef FN
#define FN(reg_name, field_name) \
@@ -83,7 +78,7 @@ static const struct clk_mgr_mask clk_mgr_mask = {
/* Query SMU for all clock states for a particular clock */
-static void dcn3_init_single_clock(struct clk_mgr_internal *clk_mgr, PPCLK_e clk, unsigned int *entry_0, unsigned int *num_levels)
+static void dcn3_init_single_clock(struct clk_mgr_internal *clk_mgr, uint32_t clk, unsigned int *entry_0, unsigned int *num_levels)
{
unsigned int i;
char *entry_i = (char *)entry_0;
@@ -104,57 +99,11 @@ static void dcn3_init_single_clock(struct clk_mgr_internal *clk_mgr, PPCLK_e clk
}
}
-static noinline void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr)
+static void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr)
{
- /* defaults */
- double pstate_latency_us = clk_mgr->base.ctx->dc->dml.soc.dram_clock_change_latency_us;
- double sr_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_exit_time_us;
- double sr_enter_plus_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_enter_plus_exit_time_us;
- uint16_t min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz;
-
- /* Set A - Normal - default values*/
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].valid = true;
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us;
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us;
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE;
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = 0;
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF;
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz;
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF;
-
- /* Set B - Performance - higher minimum clocks */
-// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].valid = true;
-// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us;
-// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us;
-// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
-// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE;
-// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = TUNED VALUE;
-// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF;
-// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = TUNED VALUE;
-// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF;
-
- /* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].valid = true;
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = clk_mgr->base.ctx->dc->dml.soc.dummy_pstate_latency_us;
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us;
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE;
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = 0;
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF;
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz;
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF;
-
- /* Set D - MALL - SR enter and exit times adjusted for MALL */
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].valid = true;
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = pstate_latency_us;
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = 2;
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = 4;
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL;
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = 0;
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF;
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz;
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF;
+ DC_FP_START();
+ dcn3_fpu_build_wm_range_table(&clk_mgr->base);
+ DC_FP_END();
}
void dcn3_init_clocks(struct clk_mgr *clk_mgr_base)
@@ -184,6 +133,7 @@ void dcn3_init_clocks(struct clk_mgr *clk_mgr_base)
dcn3_init_single_clock(clk_mgr, PPCLK_DCEFCLK,
&clk_mgr_base->bw_params->clk_table.entries[0].dcfclk_mhz,
&num_levels);
+ dcn30_smu_set_min_deep_sleep_dcef_clk(clk_mgr, 0);
/* DTBCLK */
dcn3_init_single_clock(clk_mgr, PPCLK_DTBCLK,
@@ -252,11 +202,11 @@ static void dcn3_update_clocks(struct clk_mgr *clk_mgr_base,
bool update_dispclk = false;
bool enter_display_off = false;
bool dpp_clock_lowered = false;
+ bool update_pstate_unsupported_clk = false;
struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu;
bool force_reset = false;
bool update_uclk = false;
bool p_state_change_support;
- int total_plane_count;
if (dc->work_arounds.skip_clock_update || !clk_mgr->smu_present)
return;
@@ -297,15 +247,29 @@ static void dcn3_update_clocks(struct clk_mgr *clk_mgr_base,
clk_mgr_base->clks.socclk_khz = new_clocks->socclk_khz;
clk_mgr_base->clks.prev_p_state_change_support = clk_mgr_base->clks.p_state_change_support;
- total_plane_count = clk_mgr_helper_get_active_plane_cnt(dc, context);
- p_state_change_support = new_clocks->p_state_change_support || (total_plane_count == 0);
- if (should_update_pstate_support(safe_to_lower, p_state_change_support, clk_mgr_base->clks.p_state_change_support)) {
+ p_state_change_support = new_clocks->p_state_change_support;
+
+ // invalidate the current P-State forced min in certain dc_mode_softmax situations
+ if (dc->clk_mgr->dc_mode_softmax_enabled && safe_to_lower && !p_state_change_support) {
+ if ((new_clocks->dramclk_khz <= dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000) !=
+ (clk_mgr_base->clks.dramclk_khz <= dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000))
+ update_pstate_unsupported_clk = true;
+ }
+
+ if (should_update_pstate_support(safe_to_lower, p_state_change_support, clk_mgr_base->clks.p_state_change_support) ||
+ update_pstate_unsupported_clk) {
clk_mgr_base->clks.p_state_change_support = p_state_change_support;
/* to disable P-State switching, set UCLK min = max */
- if (!clk_mgr_base->clks.p_state_change_support)
- dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK,
+ if (!clk_mgr_base->clks.p_state_change_support) {
+ if (dc->clk_mgr->dc_mode_softmax_enabled &&
+ new_clocks->dramclk_khz <= dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000)
+ dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK,
+ dc->clk_mgr->bw_params->dc_mode_softmax_memclk);
+ else
+ dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK,
clk_mgr_base->bw_params->clk_table.entries[clk_mgr_base->bw_params->clk_table.num_entries - 1].memclk_mhz);
+ }
}
/* Always update saved value, even if new value not set due to P-State switching unsupported */
@@ -421,6 +385,24 @@ static void dcn3_set_hard_max_memclk(struct clk_mgr *clk_mgr_base)
clk_mgr_base->bw_params->clk_table.entries[clk_mgr_base->bw_params->clk_table.num_entries - 1].memclk_mhz);
}
+static void dcn3_set_max_memclk(struct clk_mgr *clk_mgr_base, unsigned int memclk_mhz)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn30_smu_set_hard_max_by_freq(clk_mgr, PPCLK_UCLK, memclk_mhz);
+}
+static void dcn3_set_min_memclk(struct clk_mgr *clk_mgr_base, unsigned int memclk_mhz)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ if (!clk_mgr->smu_present)
+ return;
+ dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, memclk_mhz);
+}
+
/* Get current memclk states, update bounding box */
static void dcn3_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base)
{
@@ -436,9 +418,13 @@ static void dcn3_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base)
&num_levels);
clk_mgr_base->bw_params->clk_table.num_entries = num_levels ? num_levels : 1;
+ clk_mgr_base->bw_params->dc_mode_softmax_memclk = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_UCLK);
+
/* Refresh bounding box */
+ DC_FP_START();
clk_mgr_base->ctx->dc->res_pool->funcs->update_bw_bounding_box(
clk_mgr->base.ctx->dc, clk_mgr_base->bw_params);
+ DC_FP_END();
}
static bool dcn3_is_smu_present(struct clk_mgr *clk_mgr_base)
@@ -485,9 +471,11 @@ static void dcn30_notify_link_rate_change(struct clk_mgr *clk_mgr_base, struct d
if (!clk_mgr->smu_present)
return;
+ /* TODO - DP2.0 HW: calculate link 128b/132 link rate in clock manager with new formula */
+
clk_mgr->cur_phyclk_req_table[link->link_index] = link->cur_link_settings.link_rate * LINK_RATE_REF_FREQ_IN_KHZ;
- for (i = 0; i < MAX_PIPES * 2; i++) {
+ for (i = 0; i < MAX_LINKS; i++) {
if (clk_mgr->cur_phyclk_req_table[i] > max_phyclk_req)
max_phyclk_req = clk_mgr->cur_phyclk_req_table[i];
}
@@ -505,11 +493,14 @@ static struct clk_mgr_funcs dcn3_funcs = {
.notify_wm_ranges = dcn3_notify_wm_ranges,
.set_hard_min_memclk = dcn3_set_hard_min_memclk,
.set_hard_max_memclk = dcn3_set_hard_max_memclk,
+ .set_max_memclk = dcn3_set_max_memclk,
+ .set_min_memclk = dcn3_set_min_memclk,
.get_memclk_states_from_smu = dcn3_get_memclk_states_from_smu,
.are_clock_states_equal = dcn3_are_clock_states_equal,
.enable_pme_wa = dcn3_enable_pme_wa,
.notify_link_rate_change = dcn30_notify_link_rate_change,
- .is_smu_present = dcn3_is_smu_present
+ .is_smu_present = dcn3_is_smu_present,
+ .set_smartmux_switch = dcn30m_set_smartmux_switch
};
static void dcn3_init_clocks_fpga(struct clk_mgr *clk_mgr)
@@ -532,6 +523,8 @@ void dcn3_clk_mgr_construct(
struct pp_smu_funcs *pp_smu,
struct dccg *dccg)
{
+ struct clk_state_registers_and_bypass s = { 0 };
+
clk_mgr->base.ctx = ctx;
clk_mgr->base.funcs = &dcn3_funcs;
clk_mgr->regs = &clk_mgr_regs;
@@ -548,27 +541,19 @@ void dcn3_clk_mgr_construct(
clk_mgr->base.dprefclk_khz = 730000; // 700 MHz planned if VCO is 3.85 GHz, will be retrieved
- if (IS_FPGA_MAXIMUS_DC(ctx->dce_environment)) {
- clk_mgr->base.funcs = &dcn3_fpga_funcs;
- clk_mgr->base.dentist_vco_freq_khz = 3650000;
-
- } else {
- struct clk_state_registers_and_bypass s = { 0 };
+ /* integer part is now VCO frequency in kHz */
+ clk_mgr->base.dentist_vco_freq_khz = dcn30_get_vco_frequency_from_reg(clk_mgr);
- /* integer part is now VCO frequency in kHz */
- clk_mgr->base.dentist_vco_freq_khz = dcn30_get_vco_frequency_from_reg(clk_mgr);
+ /* in case we don't get a value from the register, use default */
+ if (clk_mgr->base.dentist_vco_freq_khz == 0)
+ clk_mgr->base.dentist_vco_freq_khz = 3650000;
+ /* Convert dprefclk units from MHz to KHz */
+ /* Value already divided by 10, some resolution lost */
- /* in case we don't get a value from the register, use default */
- if (clk_mgr->base.dentist_vco_freq_khz == 0)
- clk_mgr->base.dentist_vco_freq_khz = 3650000;
- /* Convert dprefclk units from MHz to KHz */
- /* Value already divided by 10, some resolution lost */
-
- /*TODO: uncomment assert once dcn3_dump_clk_registers is implemented */
- //ASSERT(s.dprefclk != 0);
- if (s.dprefclk != 0)
- clk_mgr->base.dprefclk_khz = s.dprefclk * 1000;
- }
+ /*TODO: uncomment assert once dcn3_dump_clk_registers is implemented */
+ //ASSERT(s.dprefclk != 0);
+ if (s.dprefclk != 0)
+ clk_mgr->base.dprefclk_khz = s.dprefclk * 1000;
clk_mgr->dfs_bypass_enabled = false;
@@ -577,11 +562,19 @@ void dcn3_clk_mgr_construct(
dce_clock_read_ss_info(clk_mgr);
clk_mgr->base.bw_params = kzalloc(sizeof(*clk_mgr->base.bw_params), GFP_KERNEL);
+ if (!clk_mgr->base.bw_params) {
+ BREAK_TO_DEBUGGER();
+ return;
+ }
/* need physical address of table to give to PMFW */
clk_mgr->wm_range_table = dm_helpers_allocate_gpu_mem(clk_mgr->base.ctx,
DC_MEM_ALLOC_TYPE_GART, sizeof(WatermarksExternal_t),
&clk_mgr->wm_range_table_addr);
+ if (!clk_mgr->wm_range_table) {
+ BREAK_TO_DEBUGGER();
+ return;
+ }
}
void dcn3_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.h
index dd4a0bd72458..2cd95ec38266 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.h
@@ -26,6 +26,66 @@
#ifndef __DCN30_CLK_MGR_H__
#define __DCN30_CLK_MGR_H__
+//CLK1_CLK_PLL_REQ
+#ifndef CLK11_CLK1_CLK_PLL_REQ__FbMult_int__SHIFT
+#define CLK11_CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0
+#define CLK11_CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc
+#define CLK11_CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10
+#define CLK11_CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL
+#define CLK11_CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L
+#define CLK11_CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L
+//CLK1_CLK0_DFS_CNTL
+#define CLK11_CLK1_CLK0_DFS_CNTL__CLK0_DIVIDER__SHIFT 0x0
+#define CLK11_CLK1_CLK0_DFS_CNTL__CLK0_DIVIDER_MASK 0x0000007FL
+/*DPREF clock related*/
+#define CLK0_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0
+#define CLK0_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL
+#define CLK1_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0
+#define CLK1_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL
+#define CLK2_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0
+#define CLK2_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL
+#define CLK3_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0
+#define CLK3_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL
+
+//CLK3_0_CLK3_CLK_PLL_REQ
+#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_int__SHIFT 0x0
+#define CLK3_0_CLK3_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc
+#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10
+#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL
+#define CLK3_0_CLK3_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L
+#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L
+
+#define mmCLK0_CLK2_DFS_CNTL 0x16C55
+#define mmCLK00_CLK0_CLK2_DFS_CNTL 0x16C55
+#define mmCLK01_CLK0_CLK2_DFS_CNTL 0x16E55
+#define mmCLK02_CLK0_CLK2_DFS_CNTL 0x17055
+
+#define mmCLK0_CLK3_DFS_CNTL 0x16C60
+#define mmCLK00_CLK0_CLK3_DFS_CNTL 0x16C60
+#define mmCLK01_CLK0_CLK3_DFS_CNTL 0x16E60
+#define mmCLK02_CLK0_CLK3_DFS_CNTL 0x17060
+#define mmCLK03_CLK0_CLK3_DFS_CNTL 0x17260
+
+#define mmCLK0_CLK_PLL_REQ 0x16C10
+#define mmCLK00_CLK0_CLK_PLL_REQ 0x16C10
+#define mmCLK01_CLK0_CLK_PLL_REQ 0x16E10
+#define mmCLK02_CLK0_CLK_PLL_REQ 0x17010
+#define mmCLK03_CLK0_CLK_PLL_REQ 0x17210
+
+#define mmCLK1_CLK_PLL_REQ 0x1B00D
+#define mmCLK10_CLK1_CLK_PLL_REQ 0x1B00D
+#define mmCLK11_CLK1_CLK_PLL_REQ 0x1B20D
+#define mmCLK12_CLK1_CLK_PLL_REQ 0x1B40D
+#define mmCLK13_CLK1_CLK_PLL_REQ 0x1B60D
+
+#define mmCLK2_CLK_PLL_REQ 0x17E0D
+
+/*AMCLK*/
+
+#define mmCLK11_CLK1_CLK0_DFS_CNTL 0x1B23F
+#define mmCLK11_CLK1_CLK_PLL_REQ 0x1B20D
+
+#endif
void dcn3_init_clocks(struct clk_mgr *clk_mgr_base);
void dcn3_clk_mgr_construct(struct dc_context *ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c
index 8ecc708bcd9e..827bc2431d5d 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c
@@ -23,12 +23,14 @@
*
*/
-#include <linux/delay.h>
#include "dcn30_clk_mgr_smu_msg.h"
#include "clk_mgr_internal.h"
#include "reg_helper.h"
+#include "dm_helpers.h"
+
#include "dalsmc.h"
+#include "dcn30_smu11_driver_if.h"
#define mmDAL_MSG_REG 0x1628A
#define mmDAL_ARG_REG 0x16273
@@ -51,6 +53,7 @@
*/
static uint32_t dcn30_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, unsigned int delay_us, unsigned int max_retries)
{
+ const uint32_t initial_max_retries = max_retries;
uint32_t reg = 0;
do {
@@ -66,13 +69,14 @@ static uint32_t dcn30_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, un
/* handle DALSMC_Result_CmdRejectedBusy? */
- /* Log? */
+ TRACE_SMU_MSG_DELAY(0, 0, delay_us * (initial_max_retries - max_retries), clk_mgr->base.ctx);
return reg;
}
static bool dcn30_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, uint32_t msg_id, uint32_t param_in, uint32_t *param_out)
{
+ uint32_t result;
/* Wait for response register to be ready */
dcn30_smu_wait_for_response(clk_mgr, 10, 200000);
@@ -85,8 +89,16 @@ static bool dcn30_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, uint
/* Trigger the message transaction by writing the message ID */
REG_WRITE(DAL_MSG_REG, msg_id);
+ TRACE_SMU_MSG(msg_id, param_in, clk_mgr->base.ctx);
+
+ result = dcn30_smu_wait_for_response(clk_mgr, 10, 200000);
+
+ if (IS_SMU_TIMEOUT(result)) {
+ dm_helpers_smu_timeout(CTX, msg_id, param_in, 10 * 200000);
+ }
+
/* Wait for response */
- if (dcn30_smu_wait_for_response(clk_mgr, 10, 200000) == DALSMC_Result_OK) {
+ if (result == DALSMC_Result_OK) {
if (param_out)
*param_out = REG_READ(DAL_ARG_REG);
@@ -197,7 +209,7 @@ void dcn30_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr)
}
/* Returns the actual frequency that was set in MHz, 0 on failure */
-unsigned int dcn30_smu_set_hard_min_by_freq(struct clk_mgr_internal *clk_mgr, PPCLK_e clk, uint16_t freq_mhz)
+unsigned int dcn30_smu_set_hard_min_by_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint16_t freq_mhz)
{
uint32_t response = 0;
@@ -215,7 +227,7 @@ unsigned int dcn30_smu_set_hard_min_by_freq(struct clk_mgr_internal *clk_mgr, PP
}
/* Returns the actual frequency that was set in MHz, 0 on failure */
-unsigned int dcn30_smu_set_hard_max_by_freq(struct clk_mgr_internal *clk_mgr, PPCLK_e clk, uint16_t freq_mhz)
+unsigned int dcn30_smu_set_hard_max_by_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint16_t freq_mhz)
{
uint32_t response = 0;
@@ -246,7 +258,7 @@ unsigned int dcn30_smu_set_hard_max_by_freq(struct clk_mgr_internal *clk_mgr, PP
*
* Returns 0 on failure
*/
-unsigned int dcn30_smu_get_dpm_freq_by_index(struct clk_mgr_internal *clk_mgr, PPCLK_e clk, uint8_t dpm_level)
+unsigned int dcn30_smu_get_dpm_freq_by_index(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint8_t dpm_level)
{
uint32_t response = 0;
@@ -264,7 +276,7 @@ unsigned int dcn30_smu_get_dpm_freq_by_index(struct clk_mgr_internal *clk_mgr, P
}
/* Returns the max DPM frequency in DC mode in MHz, 0 on failure */
-unsigned int dcn30_smu_get_dc_mode_max_dpm_freq(struct clk_mgr_internal *clk_mgr, PPCLK_e clk)
+unsigned int dcn30_smu_get_dc_mode_max_dpm_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk)
{
uint32_t response = 0;
@@ -302,6 +314,9 @@ void dcn30_smu_set_display_refresh_from_mall(struct clk_mgr_internal *clk_mgr, b
/* bits 8:7 for cache timer scale, bits 6:1 for cache timer delay, bit 0 = 1 for enable, = 0 for disable */
uint32_t param = (cache_timer_scale << 7) | (cache_timer_delay << 1) | (enable ? 1 : 0);
+ smu_print("SMU Set display refresh from mall: enable = %d, cache_timer_delay = %d, cache_timer_scale = %d\n",
+ enable, cache_timer_delay, cache_timer_scale);
+
dcn30_smu_send_msg_with_param(clk_mgr,
DALSMC_MSG_SetDisplayRefreshFromMall, param, NULL);
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.h
index dd2640a3ce5d..ca9f5296be94 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.h
@@ -28,65 +28,6 @@
#include "core_types.h"
-#define SMU11_DRIVER_IF_VERSION 0x1F
-
-typedef enum {
- PPCLK_GFXCLK = 0,
- PPCLK_SOCCLK,
- PPCLK_UCLK,
- PPCLK_FCLK,
- PPCLK_DCLK_0,
- PPCLK_VCLK_0,
- PPCLK_DCLK_1,
- PPCLK_VCLK_1,
- PPCLK_DCEFCLK,
- PPCLK_DISPCLK,
- PPCLK_PIXCLK,
- PPCLK_PHYCLK,
- PPCLK_DTBCLK,
- PPCLK_COUNT,
-} PPCLK_e;
-
-typedef struct {
- uint16_t MinClock; // This is either DCEFCLK or SOCCLK (in MHz)
- uint16_t MaxClock; // This is either DCEFCLK or SOCCLK (in MHz)
- uint16_t MinUclk;
- uint16_t MaxUclk;
-
- uint8_t WmSetting;
- uint8_t Flags;
- uint8_t Padding[2];
-
-} WatermarkRowGeneric_t;
-
-#define NUM_WM_RANGES 4
-
-typedef enum {
- WM_SOCCLK = 0,
- WM_DCEFCLK,
- WM_COUNT,
-} WM_CLOCK_e;
-
-typedef enum {
- WATERMARKS_CLOCK_RANGE = 0,
- WATERMARKS_DUMMY_PSTATE,
- WATERMARKS_MALL,
- WATERMARKS_COUNT,
-} WATERMARKS_FLAGS_e;
-
-typedef struct {
- // Watermarks
- WatermarkRowGeneric_t WatermarkRow[WM_COUNT][NUM_WM_RANGES];
-} Watermarks_t;
-
-typedef struct {
- Watermarks_t Watermarks;
-
- uint32_t MmHubPadding[8]; // SMU internal use
-} WatermarksExternal_t;
-
-#define TABLE_WATERMARKS 1
-
struct clk_mgr_internal;
bool dcn30_smu_test_message(struct clk_mgr_internal *clk_mgr, uint32_t input);
@@ -97,10 +38,10 @@ void dcn30_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint
void dcn30_smu_set_dram_addr_low(struct clk_mgr_internal *clk_mgr, uint32_t addr_low);
void dcn30_smu_transfer_wm_table_smu_2_dram(struct clk_mgr_internal *clk_mgr);
void dcn30_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr);
-unsigned int dcn30_smu_set_hard_min_by_freq(struct clk_mgr_internal *clk_mgr, PPCLK_e clk, uint16_t freq_mhz);
-unsigned int dcn30_smu_set_hard_max_by_freq(struct clk_mgr_internal *clk_mgr, PPCLK_e clk, uint16_t freq_mhz);
-unsigned int dcn30_smu_get_dpm_freq_by_index(struct clk_mgr_internal *clk_mgr, PPCLK_e clk, uint8_t dpm_level);
-unsigned int dcn30_smu_get_dc_mode_max_dpm_freq(struct clk_mgr_internal *clk_mgr, PPCLK_e clk);
+unsigned int dcn30_smu_set_hard_min_by_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint16_t freq_mhz);
+unsigned int dcn30_smu_set_hard_max_by_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint16_t freq_mhz);
+unsigned int dcn30_smu_get_dpm_freq_by_index(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint8_t dpm_level);
+unsigned int dcn30_smu_get_dc_mode_max_dpm_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk);
void dcn30_smu_set_min_deep_sleep_dcef_clk(struct clk_mgr_internal *clk_mgr, uint32_t freq_mhz);
void dcn30_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t num_displays);
void dcn30_smu_set_display_refresh_from_mall(struct clk_mgr_internal *clk_mgr, bool enable, uint8_t cache_timer_delay, uint8_t cache_timer_scale);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_smu11_driver_if.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_smu11_driver_if.h
new file mode 100644
index 000000000000..1bfd6f66f035
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_smu11_driver_if.h
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: MIT
+/* Copyright © 2022-2024 Advanced Micro Devices, Inc. All rights reserved. */
+
+#define SMU11_DRIVER_IF_VERSION 0x40
+
+//Only Clks that have DPM descriptors are listed here
+typedef enum {
+ PPCLK_GFXCLK = 0,
+ PPCLK_SOCCLK,
+ PPCLK_UCLK,
+ PPCLK_FCLK,
+ PPCLK_DCLK_0,
+ PPCLK_VCLK_0,
+ PPCLK_DCLK_1,
+ PPCLK_VCLK_1,
+ PPCLK_DCEFCLK,
+ PPCLK_DISPCLK,
+ PPCLK_PIXCLK,
+ PPCLK_PHYCLK,
+ PPCLK_DTBCLK,
+ PPCLK_COUNT,
+} PPCLK_e;
+
+typedef struct {
+ uint16_t MinClock; // This is either DCEFCLK or SOCCLK (in MHz)
+ uint16_t MaxClock; // This is either DCEFCLK or SOCCLK (in MHz)
+ uint16_t MinUclk;
+ uint16_t MaxUclk;
+
+ uint8_t WmSetting;
+ uint8_t Flags;
+ uint8_t Padding[2];
+
+} WatermarkRowGeneric_t;
+
+#define NUM_WM_RANGES 4
+
+typedef enum {
+ WM_SOCCLK = 0,
+ WM_DCEFCLK,
+ WM_COUNT,
+} WM_CLOCK_e;
+
+typedef enum {
+ WATERMARKS_CLOCK_RANGE = 0,
+ WATERMARKS_DUMMY_PSTATE,
+ WATERMARKS_MALL,
+ WATERMARKS_COUNT,
+} WATERMARKS_FLAGS_e;
+
+typedef struct {
+ // Watermarks
+ WatermarkRowGeneric_t WatermarkRow[WM_COUNT][NUM_WM_RANGES];
+} Watermarks_t;
+
+typedef struct {
+ Watermarks_t Watermarks;
+
+ uint32_t MmHubPadding[8]; // SMU internal use
+} WatermarksExternal_t;
+
+// Table types
+#define TABLE_PPTABLE 0
+#define TABLE_WATERMARKS 1
+#define TABLE_AVFS_PSM_DEBUG 2
+#define TABLE_AVFS_FUSE_OVERRIDE 3
+#define TABLE_PMSTATUSLOG 4
+#define TABLE_SMU_METRICS 5
+#define TABLE_DRIVER_SMU_CONFIG 6
+#define TABLE_ACTIVITY_MONITOR_COEFF 7
+#define TABLE_OVERDRIVE 8
+#define TABLE_I2C_COMMANDS 9
+#define TABLE_PACE 10
+#define TABLE_ECCINFO 11
+#define TABLE_COUNT 12
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.c
new file mode 100644
index 000000000000..8e8a11c7437e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "clk_mgr_internal.h"
+#include "dcn30/dcn30m_clk_mgr.h"
+#include "dcn30m_clk_mgr_smu_msg.h"
+
+
+uint32_t dcn30m_set_smartmux_switch(struct clk_mgr *clk_mgr_base, uint32_t pins_to_set)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ return dcn30m_smu_set_smart_mux_switch(clk_mgr, pins_to_set);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.h
new file mode 100644
index 000000000000..757985b2eadc
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN30M_CLK_MGR_H__
+#define __DCN30M_CLK_MGR_H__
+
+uint32_t dcn30m_set_smartmux_switch(struct clk_mgr *clk_mgr_base, uint32_t pins_to_set);
+
+#endif //__DCN30M_CLK_MGR_H__
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.c
new file mode 100644
index 000000000000..0dd0583ff21e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dcn30m_clk_mgr_smu_msg.h"
+
+#include "clk_mgr_internal.h"
+#include "reg_helper.h"
+#include "dm_helpers.h"
+
+#include "dalsmc.h"
+
+#define mmDAL_MSG_REG 0x1628A
+#define mmDAL_ARG_REG 0x16273
+#define mmDAL_RESP_REG 0x16274
+
+#define REG(reg_name) \
+ mm ## reg_name
+
+#include "logger_types.h"
+#undef DC_LOGGER
+#define DC_LOGGER \
+ CTX->logger
+#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
+
+
+/*
+ * Function to be used instead of REG_WAIT macro because the wait ends when
+ * the register is NOT EQUAL to zero, and because the translation in msg_if.h
+ * won't work with REG_WAIT.
+ */
+static uint32_t dcn30m_smu_wait_for_response(struct clk_mgr_internal *clk_mgr,
+ unsigned int delay_us, unsigned int max_retries)
+{
+ uint32_t reg = 0;
+
+ do {
+ reg = REG_READ(DAL_RESP_REG);
+ if (reg)
+ break;
+
+ if (delay_us >= 1000)
+ msleep(delay_us/1000);
+ else if (delay_us > 0)
+ udelay(delay_us);
+ } while (max_retries--);
+
+ /* handle DALSMC_Result_CmdRejectedBusy? */
+
+ /* Log? */
+
+ return reg;
+}
+
+static bool dcn30m_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
+ uint32_t msg_id, uint32_t param_in, uint32_t *param_out)
+{
+ uint32_t result;
+ /* Wait for response register to be ready */
+ dcn30m_smu_wait_for_response(clk_mgr, 10, 200000);
+
+ /* Clear response register */
+ REG_WRITE(DAL_RESP_REG, 0);
+
+ /* Set the parameter register for the SMU message */
+ REG_WRITE(DAL_ARG_REG, param_in);
+
+ /* Trigger the message transaction by writing the message ID */
+ REG_WRITE(DAL_MSG_REG, msg_id);
+
+ result = dcn30m_smu_wait_for_response(clk_mgr, 10, 200000);
+
+ if (IS_SMU_TIMEOUT(result))
+ dm_helpers_smu_timeout(CTX, msg_id, param_in, 10 * 200000);
+
+ /* Wait for response */
+ if (result == DALSMC_Result_OK) {
+ if (param_out)
+ *param_out = REG_READ(DAL_ARG_REG);
+
+ return true;
+ }
+
+ return false;
+}
+
+uint32_t dcn30m_smu_set_smart_mux_switch(struct clk_mgr_internal *clk_mgr, uint32_t pins_to_set)
+{
+ uint32_t response = 0;
+
+ smu_print("SMU Set SmartMux Switch: switch_dgpu = %d\n", pins_to_set);
+
+ dcn30m_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_SmartAccess, pins_to_set, &response);
+
+ return response;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.h
new file mode 100644
index 000000000000..8a59a473fc5e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef DAL_DC_DCN30M_CLK_MGR_SMU_MSG_H_
+#define DAL_DC_DCN30M_CLK_MGR_SMU_MSG_H_
+
+#include "core_types.h"
+
+struct clk_mgr_internal;
+
+uint32_t dcn30m_smu_set_smart_mux_switch(struct clk_mgr_internal *clk_mgr, uint32_t pins_to_set);
+#endif /* DAL_DC_DCN30M_CLK_MGR_SMU_MSG_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c
index 6ea642615854..b4fb17b7a096 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c
@@ -29,6 +29,7 @@
#include <linux/delay.h>
#include "dcn301_smu.h"
+#include "dm_helpers.h"
#include "vangogh_ip_offset.h"
@@ -41,6 +42,12 @@
#define FN(reg_name, field) \
FD(reg_name##__##field)
+#include "logger_types.h"
+#undef DC_LOGGER
+#define DC_LOGGER \
+ CTX->logger
+#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
+
#define VBIOSSMC_MSG_GetSmuVersion 0x2
#define VBIOSSMC_MSG_SetDispclkFreq 0x4
#define VBIOSSMC_MSG_SetDprefclkFreq 0x5
@@ -88,12 +95,21 @@ static uint32_t dcn301_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, u
return res_val;
}
-int dcn301_smu_send_msg_with_param(
- struct clk_mgr_internal *clk_mgr,
- unsigned int msg_id, unsigned int param)
+static int dcn301_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
+ unsigned int msg_id,
+ unsigned int param)
{
uint32_t result;
+ result = dcn301_smu_wait_for_response(clk_mgr, 10, 200000);
+
+ if (result != VBIOSSMC_Result_OK)
+ smu_print("SMU Response was not OK. SMU response after wait received is: %d\n", result);
+
+ if (result == VBIOSSMC_Status_BUSY) {
+ return -1;
+ }
+
/* First clear response register */
REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Status_BUSY);
@@ -105,7 +121,10 @@ int dcn301_smu_send_msg_with_param(
result = dcn301_smu_wait_for_response(clk_mgr, 10, 200000);
- ASSERT(result == VBIOSSMC_Result_OK);
+ if (IS_SMU_TIMEOUT(result)) {
+ ASSERT(0);
+ dm_helpers_smu_timeout(CTX, msg_id, param, 10 * 200000);
+ }
/* Actual dispclk set is returned in the parameter register */
return REG_READ(MP1_SMN_C2PMSG_83);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c
index 3eee32faa208..7aee02d56292 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c
@@ -32,6 +32,9 @@
// For dcn20_update_clocks_update_dpp_dto
#include "dcn20/dcn20_clk_mgr.h"
+// For DML FPU code
+#include "dml/dcn20/dcn20_fpu.h"
+
#include "vg_clk_mgr.h"
#include "dcn301_smu.h"
#include "reg_helper.h"
@@ -89,9 +92,9 @@ static int vg_get_active_display_cnt_wa(
return display_count;
}
-void vg_update_clocks(struct clk_mgr *clk_mgr_base,
- struct dc_state *context,
- bool safe_to_lower)
+static void vg_update_clocks(struct clk_mgr *clk_mgr_base,
+ struct dc_state *context,
+ bool safe_to_lower)
{
struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
@@ -114,7 +117,7 @@ void vg_update_clocks(struct clk_mgr *clk_mgr_base,
display_count = vg_get_active_display_cnt_wa(dc, context);
/* if we can go lower, go lower */
- if (display_count == 0 && !IS_DIAG_DC(dc->ctx->dce_environment)) {
+ if (display_count == 0) {
union display_idle_optimization_u idle_info = { 0 };
idle_info.idle_info.df_request_disabled = 1;
@@ -148,10 +151,8 @@ void vg_update_clocks(struct clk_mgr *clk_mgr_base,
}
// workaround: Limit dppclk to 100Mhz to avoid lower eDP panel switch to plus 4K monitor underflow.
- if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
- if (new_clocks->dppclk_khz < 100000)
- new_clocks->dppclk_khz = 100000;
- }
+ if (new_clocks->dppclk_khz < 100000)
+ new_clocks->dppclk_khz = 100000;
if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) {
if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz)
@@ -176,8 +177,7 @@ void vg_update_clocks(struct clk_mgr *clk_mgr_base,
if (update_dppclk || update_dispclk)
dcn301_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz);
// always update dtos unless clock is lowered and not safe to lower
- if (new_clocks->dppclk_khz >= dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz)
- dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower);
+ dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower);
}
}
@@ -252,16 +252,16 @@ static void vg_dump_clk_registers(struct clk_state_registers_and_bypass *regs_an
regs_and_bypass->dppclk = internal.CLK1_CLK1_CURRENT_CNT / 10;
regs_and_bypass->dppclk_bypass = internal.CLK1_CLK1_BYPASS_CNTL & 0x0007;
- if (regs_and_bypass->dppclk_bypass < 0 || regs_and_bypass->dppclk_bypass > 4)
+ if (regs_and_bypass->dppclk_bypass > 4)
regs_and_bypass->dppclk_bypass = 0;
regs_and_bypass->dcfclk_bypass = internal.CLK1_CLK3_BYPASS_CNTL & 0x0007;
- if (regs_and_bypass->dcfclk_bypass < 0 || regs_and_bypass->dcfclk_bypass > 4)
+ if (regs_and_bypass->dcfclk_bypass > 4)
regs_and_bypass->dcfclk_bypass = 0;
regs_and_bypass->dispclk_bypass = internal.CLK1_CLK0_BYPASS_CNTL & 0x0007;
- if (regs_and_bypass->dispclk_bypass < 0 || regs_and_bypass->dispclk_bypass > 4)
+ if (regs_and_bypass->dispclk_bypass > 4)
regs_and_bypass->dispclk_bypass = 0;
regs_and_bypass->dprefclk_bypass = internal.CLK1_CLK2_BYPASS_CNTL & 0x0007;
- if (regs_and_bypass->dprefclk_bypass < 0 || regs_and_bypass->dprefclk_bypass > 4)
+ if (regs_and_bypass->dprefclk_bypass > 4)
regs_and_bypass->dprefclk_bypass = 0;
if (log_info->enabled) {
@@ -367,18 +367,6 @@ static void vg_dump_clk_registers(struct clk_state_registers_and_bypass *regs_an
}
}
-/* This function produce translated logical clk state values*/
-void vg_get_clk_states(struct clk_mgr *clk_mgr_base, struct clk_states *s)
-{
-
- struct clk_state_registers_and_bypass sb = { 0 };
- struct clk_log_info log_info = { 0 };
-
- vg_dump_clk_registers(&sb, clk_mgr_base, &log_info);
-
- s->dprefclk_khz = sb.dprefclk * 1000;
-}
-
static void vg_enable_pme_wa(struct clk_mgr *clk_mgr_base)
{
struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
@@ -386,7 +374,7 @@ static void vg_enable_pme_wa(struct clk_mgr *clk_mgr_base)
dcn301_smu_enable_pme_wa(clk_mgr);
}
-void vg_init_clocks(struct clk_mgr *clk_mgr)
+static void vg_init_clocks(struct clk_mgr *clk_mgr)
{
memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
// Assumption is that boot state always supports pstate
@@ -539,80 +527,18 @@ static struct clk_bw_params vg_bw_params = {
};
-static struct wm_table ddr4_wm_table = {
- .entries = {
- {
- .wm_inst = WM_A,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.72,
- .sr_exit_time_us = 6.09,
- .sr_enter_plus_exit_time_us = 7.14,
- .valid = true,
- },
- {
- .wm_inst = WM_B,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.72,
- .sr_exit_time_us = 10.12,
- .sr_enter_plus_exit_time_us = 11.48,
- .valid = true,
- },
- {
- .wm_inst = WM_C,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.72,
- .sr_exit_time_us = 10.12,
- .sr_enter_plus_exit_time_us = 11.48,
- .valid = true,
- },
- {
- .wm_inst = WM_D,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.72,
- .sr_exit_time_us = 10.12,
- .sr_enter_plus_exit_time_us = 11.48,
- .valid = true,
- },
- }
-};
+static uint32_t find_max_clk_value(const uint32_t clocks[], uint32_t num_clocks)
+{
+ uint32_t max = 0;
+ int i;
-static struct wm_table lpddr5_wm_table = {
- .entries = {
- {
- .wm_inst = WM_A,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.65333,
- .sr_exit_time_us = 7.95,
- .sr_enter_plus_exit_time_us = 9,
- .valid = true,
- },
- {
- .wm_inst = WM_B,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.65333,
- .sr_exit_time_us = 9.82,
- .sr_enter_plus_exit_time_us = 11.196,
- .valid = true,
- },
- {
- .wm_inst = WM_C,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.65333,
- .sr_exit_time_us = 9.89,
- .sr_enter_plus_exit_time_us = 11.24,
- .valid = true,
- },
- {
- .wm_inst = WM_D,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.65333,
- .sr_exit_time_us = 9.748,
- .sr_enter_plus_exit_time_us = 11.102,
- .valid = true,
- },
+ for (i = 0; i < num_clocks; ++i) {
+ if (clocks[i] > max)
+ max = clocks[i];
}
-};
+ return max;
+}
static unsigned int find_dcfclk_for_voltage(const struct vg_dpm_clocks *clock_table,
unsigned int voltage)
@@ -620,6 +546,8 @@ static unsigned int find_dcfclk_for_voltage(const struct vg_dpm_clocks *clock_ta
int i;
for (i = 0; i < VG_NUM_SOC_VOLTAGE_LEVELS; i++) {
+ if (i >= VG_NUM_DCFCLK_DPM_LEVELS)
+ break;
if (clock_table->SocVoltage[i] == voltage)
return clock_table->DcfClocks[i];
}
@@ -635,10 +563,12 @@ static void vg_clk_mgr_helper_populate_bw_params(
{
int i, j;
struct clk_bw_params *bw_params = clk_mgr->base.bw_params;
+ uint32_t max_dispclk = 0, max_dppclk = 0;
j = -1;
- ASSERT(VG_NUM_FCLK_DPM_LEVELS <= MAX_NUM_DPM_LVL);
+ static_assert(VG_NUM_FCLK_DPM_LEVELS <= MAX_NUM_DPM_LVL,
+ "number of reported FCLK DPM levels exceeds maximum");
/* Find lowest DPM, FCLK is filled in reverse order*/
@@ -655,14 +585,33 @@ static void vg_clk_mgr_helper_populate_bw_params(
return;
}
+ /* dispclk and dppclk can be max at any voltage, same number of levels for both */
+ if (clock_table->NumDispClkLevelsEnabled <= VG_NUM_DISPCLK_DPM_LEVELS &&
+ clock_table->NumDispClkLevelsEnabled <= VG_NUM_DPPCLK_DPM_LEVELS) {
+ max_dispclk = find_max_clk_value(clock_table->DispClocks, clock_table->NumDispClkLevelsEnabled);
+ max_dppclk = find_max_clk_value(clock_table->DppClocks, clock_table->NumDispClkLevelsEnabled);
+ } else {
+ ASSERT(0);
+ }
+
bw_params->clk_table.num_entries = j + 1;
- for (i = 0; i < bw_params->clk_table.num_entries; i++, j--) {
+ for (i = 0; i < bw_params->clk_table.num_entries - 1; i++, j--) {
bw_params->clk_table.entries[i].fclk_mhz = clock_table->DfPstateTable[j].fclk;
bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[j].memclk;
bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[j].voltage;
bw_params->clk_table.entries[i].dcfclk_mhz = find_dcfclk_for_voltage(clock_table, clock_table->DfPstateTable[j].voltage);
+
+ /* Now update clocks we do read */
+ bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk;
+ bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk;
}
+ bw_params->clk_table.entries[i].fclk_mhz = clock_table->DfPstateTable[j].fclk;
+ bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[j].memclk;
+ bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[j].voltage;
+ bw_params->clk_table.entries[i].dcfclk_mhz = find_max_clk_value(clock_table->DcfClocks, VG_NUM_DCFCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].dispclk_mhz = find_max_clk_value(clock_table->DispClocks, VG_NUM_DISPCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].dppclk_mhz = find_max_clk_value(clock_table->DppClocks, VG_NUM_DPPCLK_DPM_LEVELS);
bw_params->vram_type = bios_info->memory_type;
bw_params->num_channels = bios_info->ma_channel_number;
@@ -683,10 +632,9 @@ static void vg_clk_mgr_helper_populate_bw_params(
/*
* WM set D will be re-purposed for memory retraining
*/
- bw_params->wm_table.entries[WM_D].pstate_latency_us = LPDDR_MEM_RETRAIN_LATENCY;
- bw_params->wm_table.entries[WM_D].wm_inst = WM_D;
- bw_params->wm_table.entries[WM_D].wm_type = WM_TYPE_RETRAINING;
- bw_params->wm_table.entries[WM_D].valid = true;
+ DC_FP_START();
+ dcn21_clk_mgr_set_bw_params_wm_table(bw_params);
+ DC_FP_END();
}
}
@@ -733,6 +681,7 @@ void vg_clk_mgr_construct(
struct dccg *dccg)
{
struct smu_dpm_clks smu_dpm_clks = { 0 };
+ struct clk_log_info log_info = {0};
clk_mgr->base.base.ctx = ctx;
clk_mgr->base.base.funcs = &vg_funcs;
@@ -753,7 +702,7 @@ void vg_clk_mgr_construct(
sizeof(struct watermarks),
&clk_mgr->smu_wm_set.mc_address.quad_part);
- if (clk_mgr->smu_wm_set.wm_set == 0) {
+ if (!clk_mgr->smu_wm_set.wm_set) {
clk_mgr->smu_wm_set.wm_set = &dummy_wms;
clk_mgr->smu_wm_set.mc_address.quad_part = 0;
}
@@ -772,32 +721,25 @@ void vg_clk_mgr_construct(
ASSERT(smu_dpm_clks.dpm_clks);
- if (IS_FPGA_MAXIMUS_DC(ctx->dce_environment)) {
- vg_funcs.update_clocks = dcn2_update_clocks_fpga;
- clk_mgr->base.base.dentist_vco_freq_khz = 3600000;
- } else {
- struct clk_log_info log_info = {0};
-
- clk_mgr->base.smu_ver = dcn301_smu_get_smu_version(&clk_mgr->base);
+ clk_mgr->base.smu_ver = dcn301_smu_get_smu_version(&clk_mgr->base);
- if (clk_mgr->base.smu_ver)
- clk_mgr->base.smu_present = true;
+ if (clk_mgr->base.smu_ver)
+ clk_mgr->base.smu_present = true;
- /* TODO: Check we get what we expect during bringup */
- clk_mgr->base.base.dentist_vco_freq_khz = get_vco_frequency_from_reg(&clk_mgr->base);
+ /* TODO: Check we get what we expect during bringup */
+ clk_mgr->base.base.dentist_vco_freq_khz = get_vco_frequency_from_reg(&clk_mgr->base);
- /* in case we don't get a value from the register, use default */
- if (clk_mgr->base.base.dentist_vco_freq_khz == 0)
- clk_mgr->base.base.dentist_vco_freq_khz = 3600000;
+ /* in case we don't get a value from the register, use default */
+ if (clk_mgr->base.base.dentist_vco_freq_khz == 0)
+ clk_mgr->base.base.dentist_vco_freq_khz = 3600000;
- if (ctx->dc_bios->integrated_info->memory_type == LpDdr5MemType) {
- vg_bw_params.wm_table = lpddr5_wm_table;
- } else {
- vg_bw_params.wm_table = ddr4_wm_table;
- }
- /* Saved clocks configured at boot for debug purposes */
- vg_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, &clk_mgr->base.base, &log_info);
+ if (ctx->dc_bios->integrated_info->memory_type == LpDdr5MemType) {
+ vg_bw_params.wm_table = lpddr5_wm_table;
+ } else {
+ vg_bw_params.wm_table = ddr4_wm_table;
}
+ /* Saved clocks configured at boot for debug purposes */
+ vg_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, &clk_mgr->base.base, &log_info);
clk_mgr->base.base.dprefclk_khz = 600000;
dce_clock_read_ss_info(&clk_mgr->base);
@@ -805,7 +747,7 @@ void vg_clk_mgr_construct(
clk_mgr->base.base.bw_params = &vg_bw_params;
vg_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks);
- if (ctx->dc_bios && ctx->dc_bios->integrated_info) {
+ if (ctx->dc_bios->integrated_info) {
vg_clk_mgr_helper_populate_bw_params(
&clk_mgr->base,
ctx->dc_bios->integrated_info,
@@ -815,12 +757,6 @@ void vg_clk_mgr_construct(
if (smu_dpm_clks.dpm_clks && smu_dpm_clks.mc_address.quad_part != 0)
dm_helpers_free_gpu_mem(clk_mgr->base.base.ctx, DC_MEM_ALLOC_TYPE_FRAME_BUFFER,
smu_dpm_clks.dpm_clks);
-/*
- if (!IS_FPGA_MAXIMUS_DC(ctx->dce_environment) && clk_mgr->base.smu_ver) {
- enable powerfeatures when displaycount goes to 0
- dcn301_smu_enable_phy_refclk_pwrdwn(clk_mgr, !debug->disable_48mhz_pwrdwn);
- }
-*/
}
void vg_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.h
index 7255477307f1..75884f572989 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.h
@@ -29,6 +29,9 @@
struct watermarks;
+extern struct wm_table ddr4_wm_table;
+extern struct wm_table lpddr5_wm_table;
+
struct smu_watermark_set {
struct watermarks *wm_set;
union large_integer mc_address;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
index f4c9a458ace8..051052bd10c9 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
@@ -47,8 +47,15 @@
#include "dcn30/dcn30_clk_mgr.h"
#include "dc_dmub_srv.h"
+#include "link_service.h"
+
+#include "logger_types.h"
+
#include "yellow_carp_offset.h"
+#undef DC_LOGGER
+#define DC_LOGGER \
+ clk_mgr->base.base.ctx->logger
#define regCLK1_CLK_PLL_REQ 0x0237
#define regCLK1_CLK_PLL_REQ_BASE_IDX 0
@@ -66,7 +73,7 @@
#define TO_CLK_MGR_DCN31(clk_mgr)\
container_of(clk_mgr, struct clk_mgr_dcn31, base)
-int dcn31_get_active_display_cnt_wa(
+static int dcn31_get_active_display_cnt_wa(
struct dc *dc,
struct dc_state *context)
{
@@ -81,6 +88,11 @@ int dcn31_get_active_display_cnt_wa(
stream->signal == SIGNAL_TYPE_DVI_SINGLE_LINK ||
stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK)
tmds_present = true;
+
+ /* Checking stream / link detection ensuring that PHY is active*/
+ if (dc_is_dp_signal(stream->signal) && !stream->dpms_off)
+ display_count++;
+
}
for (i = 0; i < dc->link_count; i++) {
@@ -99,7 +111,7 @@ int dcn31_get_active_display_cnt_wa(
return display_count;
}
-static void dcn31_disable_otg_wa(struct clk_mgr *clk_mgr_base, bool disable)
+static void dcn31_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool disable)
{
struct dc *dc = clk_mgr_base->ctx->dc;
int i;
@@ -110,15 +122,16 @@ static void dcn31_disable_otg_wa(struct clk_mgr *clk_mgr_base, bool disable)
if (pipe->top_pipe || pipe->prev_odm_pipe)
continue;
if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) {
- if (disable)
+ if (disable) {
pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
- else
+ reset_sync_context_for_pipe(dc, context, i);
+ } else
pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg);
}
}
}
-static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base,
+void dcn31_update_clocks(struct clk_mgr *clk_mgr_base,
struct dc_state *context,
bool safe_to_lower)
{
@@ -139,9 +152,9 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base,
* also if safe to lower is false, we just go in the higher state
*/
if (safe_to_lower) {
- if (new_clocks->zstate_support == DCN_ZSTATE_SUPPORT_ALLOW &&
+ if (new_clocks->zstate_support != DCN_ZSTATE_SUPPORT_DISALLOW &&
new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) {
- dcn31_smu_set_Z9_support(clk_mgr, true);
+ dcn31_smu_set_zstate_support(clk_mgr, new_clocks->zstate_support);
dm_helpers_enable_periodic_detection(clk_mgr_base->ctx, true);
clk_mgr_base->clks.zstate_support = new_clocks->zstate_support;
}
@@ -158,6 +171,7 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base,
union display_idle_optimization_u idle_info = { 0 };
idle_info.idle_info.df_request_disabled = 1;
idle_info.idle_info.phy_ref_clk_off = 1;
+ idle_info.idle_info.s0i2_rdy = 1;
dcn31_smu_set_display_idle_optimization(clk_mgr, idle_info.data);
/* update power state */
clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
@@ -166,7 +180,7 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base,
} else {
if (new_clocks->zstate_support == DCN_ZSTATE_SUPPORT_DISALLOW &&
new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) {
- dcn31_smu_set_Z9_support(clk_mgr, false);
+ dcn31_smu_set_zstate_support(clk_mgr, DCN_ZSTATE_SUPPORT_DISALLOW);
dm_helpers_enable_periodic_detection(clk_mgr_base->ctx, false);
clk_mgr_base->clks.zstate_support = new_clocks->zstate_support;
}
@@ -197,10 +211,8 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base,
}
// workaround: Limit dppclk to 100Mhz to avoid lower eDP panel switch to plus 4K monitor underflow.
- if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
- if (new_clocks->dppclk_khz < 100000)
- new_clocks->dppclk_khz = 100000;
- }
+ if (new_clocks->dppclk_khz < 100000)
+ new_clocks->dppclk_khz = 100000;
if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) {
if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz)
@@ -210,11 +222,11 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base,
}
if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) {
- dcn31_disable_otg_wa(clk_mgr_base, true);
+ dcn31_disable_otg_wa(clk_mgr_base, context, true);
clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
dcn31_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz);
- dcn31_disable_otg_wa(clk_mgr_base, false);
+ dcn31_disable_otg_wa(clk_mgr_base, context, false);
update_dispclk = true;
}
@@ -242,9 +254,7 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base,
cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz;
cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz;
- dc_dmub_srv_cmd_queue(dc->ctx->dmub_srv, &cmd);
- dc_dmub_srv_cmd_execute(dc->ctx->dmub_srv);
- dc_dmub_srv_wait_idle(dc->ctx->dmub_srv);
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr)
@@ -284,17 +294,20 @@ static void dcn31_enable_pme_wa(struct clk_mgr *clk_mgr_base)
dcn31_smu_enable_pme_wa(clk_mgr);
}
-static void dcn31_init_clocks(struct clk_mgr *clk_mgr)
+void dcn31_init_clocks(struct clk_mgr *clk_mgr)
{
+ uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz;
+
memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
// Assumption is that boot state always supports pstate
+ clk_mgr->clks.ref_dtbclk_khz = ref_dtbclk; // restore ref_dtbclk
clk_mgr->clks.p_state_change_support = true;
clk_mgr->clks.prev_p_state_change_support = true;
clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN;
clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN;
}
-static bool dcn31_are_clock_states_equal(struct dc_clocks *a,
+bool dcn31_are_clock_states_equal(struct dc_clocks *a,
struct dc_clocks *b)
{
if (a->dispclk_khz != b->dispclk_khz)
@@ -328,38 +341,38 @@ static struct clk_bw_params dcn31_bw_params = {
};
-static struct wm_table ddr4_wm_table = {
+static struct wm_table ddr5_wm_table = {
.entries = {
{
.wm_inst = WM_A,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
- .sr_exit_time_us = 6.09,
- .sr_enter_plus_exit_time_us = 7.14,
+ .sr_exit_time_us = 9,
+ .sr_enter_plus_exit_time_us = 11,
.valid = true,
},
{
.wm_inst = WM_B,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
- .sr_exit_time_us = 10.12,
- .sr_enter_plus_exit_time_us = 11.48,
+ .sr_exit_time_us = 9,
+ .sr_enter_plus_exit_time_us = 11,
.valid = true,
},
{
.wm_inst = WM_C,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
- .sr_exit_time_us = 10.12,
- .sr_enter_plus_exit_time_us = 11.48,
+ .sr_exit_time_us = 9,
+ .sr_enter_plus_exit_time_us = 11,
.valid = true,
},
{
.wm_inst = WM_D,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
- .sr_exit_time_us = 10.12,
- .sr_enter_plus_exit_time_us = 11.48,
+ .sr_exit_time_us = 9,
+ .sr_enter_plus_exit_time_us = 11,
.valid = true,
},
}
@@ -540,10 +553,9 @@ static unsigned int find_clk_for_voltage(
return clock;
}
-void dcn31_clk_mgr_helper_populate_bw_params(
- struct clk_mgr_internal *clk_mgr,
- struct integrated_info *bios_info,
- const DpmClocks_t *clock_table)
+static void dcn31_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk_mgr,
+ struct integrated_info *bios_info,
+ const DpmClocks_t *clock_table)
{
int i, j;
struct clk_bw_params *bw_params = clk_mgr->base.bw_params;
@@ -551,7 +563,8 @@ void dcn31_clk_mgr_helper_populate_bw_params(
j = -1;
- ASSERT(NUM_DF_PSTATE_LEVELS <= MAX_NUM_DPM_LVL);
+ static_assert(NUM_DF_PSTATE_LEVELS <= MAX_NUM_DPM_LVL,
+ "number of reported pstate levels exceeds maximum");
/* Find lowest DPM, FCLK is filled in reverse order*/
@@ -600,8 +613,10 @@ void dcn31_clk_mgr_helper_populate_bw_params(
}
bw_params->vram_type = bios_info->memory_type;
- bw_params->num_channels = bios_info->ma_channel_number;
+ bw_params->dram_channel_width_bytes = bios_info->memory_type == 0x22 ? 8 : 4;
+ //bw_params->dram_channel_width_bytes = dc->ctx->asic_id.vram_width;
+ bw_params->num_channels = bios_info->ma_channel_number ? bios_info->ma_channel_number : 4;
for (i = 0; i < WM_SET_COUNT; i++) {
bw_params->wm_table.entries[i].wm_inst = i;
@@ -615,13 +630,43 @@ void dcn31_clk_mgr_helper_populate_bw_params(
}
}
+static void dcn31_set_low_power_state(struct clk_mgr *clk_mgr_base)
+{
+ int display_count;
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct dc *dc = clk_mgr_base->ctx->dc;
+ struct dc_state *context = dc->current_state;
+
+ if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) {
+ display_count = dcn31_get_active_display_cnt_wa(dc, context);
+ /* if we can go lower, go lower */
+ if (display_count == 0) {
+ union display_idle_optimization_u idle_info = { 0 };
+
+ idle_info.idle_info.df_request_disabled = 1;
+ idle_info.idle_info.phy_ref_clk_off = 1;
+ idle_info.idle_info.s0i2_rdy = 1;
+ dcn31_smu_set_display_idle_optimization(clk_mgr, idle_info.data);
+ /* update power state */
+ clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
+ }
+ }
+}
+
+int dcn31_get_dtb_ref_freq_khz(struct clk_mgr *clk_mgr_base)
+{
+ return clk_mgr_base->clks.ref_dtbclk_khz;
+}
+
static struct clk_mgr_funcs dcn31_funcs = {
.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
+ .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz,
.update_clocks = dcn31_update_clocks,
.init_clocks = dcn31_init_clocks,
.enable_pme_wa = dcn31_enable_pme_wa,
.are_clock_states_equal = dcn31_are_clock_states_equal,
- .notify_wm_ranges = dcn31_notify_wm_ranges
+ .notify_wm_ranges = dcn31_notify_wm_ranges,
+ .set_low_power_state = dcn31_set_low_power_state
};
extern struct clk_mgr_funcs dcn3_fpga_funcs;
@@ -632,6 +677,7 @@ void dcn31_clk_mgr_construct(
struct dccg *dccg)
{
struct dcn31_smu_dpm_clks smu_dpm_clks = { 0 };
+ struct clk_log_info log_info = {0};
clk_mgr->base.base.ctx = ctx;
clk_mgr->base.base.funcs = &dcn31_funcs;
@@ -671,39 +717,76 @@ void dcn31_clk_mgr_construct(
ASSERT(smu_dpm_clks.dpm_clks);
- if (IS_FPGA_MAXIMUS_DC(ctx->dce_environment)) {
- clk_mgr->base.base.funcs = &dcn3_fpga_funcs;
- } else {
- struct clk_log_info log_info = {0};
-
- clk_mgr->base.smu_ver = dcn31_smu_get_smu_version(&clk_mgr->base);
+ clk_mgr->base.smu_ver = dcn31_smu_get_smu_version(&clk_mgr->base);
- if (clk_mgr->base.smu_ver)
- clk_mgr->base.smu_present = true;
+ if (clk_mgr->base.smu_ver)
+ clk_mgr->base.smu_present = true;
- /* TODO: Check we get what we expect during bringup */
- clk_mgr->base.base.dentist_vco_freq_khz = get_vco_frequency_from_reg(&clk_mgr->base);
-
- if (ctx->dc_bios->integrated_info->memory_type == LpDdr5MemType) {
- dcn31_bw_params.wm_table = lpddr5_wm_table;
- } else {
- dcn31_bw_params.wm_table = ddr4_wm_table;
- }
- /* Saved clocks configured at boot for debug purposes */
- dcn31_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, &clk_mgr->base.base, &log_info);
+ /* TODO: Check we get what we expect during bringup */
+ clk_mgr->base.base.dentist_vco_freq_khz = get_vco_frequency_from_reg(&clk_mgr->base);
+ if (ctx->dc_bios->integrated_info->memory_type == LpDdr5MemType) {
+ dcn31_bw_params.wm_table = lpddr5_wm_table;
+ } else {
+ dcn31_bw_params.wm_table = ddr5_wm_table;
}
+ /* Saved clocks configured at boot for debug purposes */
+ dcn31_dump_clk_registers(&clk_mgr->base.base.boot_snapshot,
+ &clk_mgr->base.base, &log_info);
clk_mgr->base.base.dprefclk_khz = 600000;
- clk_mgr->base.dccg->ref_dtbclk_khz = 600000;
+ clk_mgr->base.base.clks.ref_dtbclk_khz = 600000;
dce_clock_read_ss_info(&clk_mgr->base);
+ /*if bios enabled SS, driver needs to adjust dtb clock, only enable with correct bios*/
+ //clk_mgr->base.dccg->ref_dtbclk_khz = dce_adjust_dp_ref_freq_for_ss(clk_mgr_internal, clk_mgr->base.base.dprefclk_khz);
clk_mgr->base.base.bw_params = &dcn31_bw_params;
if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) {
+ int i;
+
dcn31_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks);
- if (ctx->dc_bios && ctx->dc_bios->integrated_info) {
+ DC_LOG_SMU("NumDcfClkLevelsEnabled: %d\n"
+ "NumDispClkLevelsEnabled: %d\n"
+ "NumSocClkLevelsEnabled: %d\n"
+ "VcnClkLevelsEnabled: %d\n"
+ "NumDfPst atesEnabled: %d\n"
+ "MinGfxClk: %d\n"
+ "MaxGfxClk: %d\n",
+ smu_dpm_clks.dpm_clks->NumDcfClkLevelsEnabled,
+ smu_dpm_clks.dpm_clks->NumDispClkLevelsEnabled,
+ smu_dpm_clks.dpm_clks->NumSocClkLevelsEnabled,
+ smu_dpm_clks.dpm_clks->VcnClkLevelsEnabled,
+ smu_dpm_clks.dpm_clks->NumDfPstatesEnabled,
+ smu_dpm_clks.dpm_clks->MinGfxClk,
+ smu_dpm_clks.dpm_clks->MaxGfxClk);
+ for (i = 0; i < smu_dpm_clks.dpm_clks->NumDcfClkLevelsEnabled; i++) {
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks->DcfClocks[%d] = %d\n",
+ i,
+ smu_dpm_clks.dpm_clks->DcfClocks[i]);
+ }
+ for (i = 0; i < smu_dpm_clks.dpm_clks->NumDispClkLevelsEnabled; i++) {
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks->DispClocks[%d] = %d\n",
+ i, smu_dpm_clks.dpm_clks->DispClocks[i]);
+ }
+ for (i = 0; i < smu_dpm_clks.dpm_clks->NumSocClkLevelsEnabled; i++) {
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks->SocClocks[%d] = %d\n",
+ i, smu_dpm_clks.dpm_clks->SocClocks[i]);
+ }
+ for (i = 0; i < NUM_SOC_VOLTAGE_LEVELS; i++)
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks->SocVoltage[%d] = %d\n",
+ i, smu_dpm_clks.dpm_clks->SocVoltage[i]);
+
+ for (i = 0; i < NUM_DF_PSTATE_LEVELS; i++) {
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks.DfPstateTable[%d].FClk = %d\n"
+ "smu_dpm_clks.dpm_clks->DfPstateTable[%d].MemClk= %d\n"
+ "smu_dpm_clks.dpm_clks->DfPstateTable[%d].Voltage = %d\n",
+ i, smu_dpm_clks.dpm_clks->DfPstateTable[i].FClk,
+ i, smu_dpm_clks.dpm_clks->DfPstateTable[i].MemClk,
+ i, smu_dpm_clks.dpm_clks->DfPstateTable[i].Voltage);
+ }
+ if (ctx->dc_bios->integrated_info) {
dcn31_clk_mgr_helper_populate_bw_params(
&clk_mgr->base,
ctx->dc_bios->integrated_info,
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h
index f8f100535526..be06fdbd0c22 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h
@@ -39,11 +39,20 @@ struct clk_mgr_dcn31 {
struct dcn31_smu_watermark_set smu_wm_set;
};
+bool dcn31_are_clock_states_equal(struct dc_clocks *a,
+ struct dc_clocks *b);
+void dcn31_init_clocks(struct clk_mgr *clk_mgr);
+void dcn31_update_clocks(struct clk_mgr *clk_mgr_base,
+ struct dc_state *context,
+ bool safe_to_lower);
+
void dcn31_clk_mgr_construct(struct dc_context *ctx,
struct clk_mgr_dcn31 *clk_mgr,
struct pp_smu_funcs *pp_smu,
struct dccg *dccg);
+int dcn31_get_dtb_ref_freq_khz(struct clk_mgr *clk_mgr_base);
+
void dcn31_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int);
#endif //__DCN31_CLK_MGR_H__
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
index 8c2b77eb9459..f201628e4e98 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
@@ -23,7 +23,6 @@
*
*/
-#include <linux/delay.h>
#include "core_types.h"
#include "clk_mgr_internal.h"
#include "reg_helper.h"
@@ -40,6 +39,12 @@
#define FN(reg_name, field) \
FD(reg_name##__##field)
+#include "logger_types.h"
+#undef DC_LOGGER
+#define DC_LOGGER \
+ CTX->logger
+#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
+
#define VBIOSSMC_MSG_TestMessage 0x1
#define VBIOSSMC_MSG_GetSmuVersion 0x2
#define VBIOSSMC_MSG_PowerUpGfx 0x3
@@ -95,14 +100,16 @@ static uint32_t dcn31_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, un
return res_val;
}
-int dcn31_smu_send_msg_with_param(
- struct clk_mgr_internal *clk_mgr,
- unsigned int msg_id, unsigned int param)
+static int dcn31_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
+ unsigned int msg_id,
+ unsigned int param)
{
uint32_t result;
result = dcn31_smu_wait_for_response(clk_mgr, 10, 200000);
- ASSERT(result == VBIOSSMC_Result_OK);
+
+ if (result != VBIOSSMC_Result_OK)
+ smu_print("SMU Response was not OK. SMU response after wait received is: %d\n", result);
if (result == VBIOSSMC_Status_BUSY) {
return -1;
@@ -119,6 +126,16 @@ int dcn31_smu_send_msg_with_param(
result = dcn31_smu_wait_for_response(clk_mgr, 10, 200000);
+ if (result == VBIOSSMC_Result_Failed) {
+ if (msg_id == VBIOSSMC_MSG_TransferTableDram2Smu &&
+ param == TABLE_WATERMARKS)
+ DC_LOG_DEBUG("Watermarks table not configured properly by SMU");
+ else
+ ASSERT(0);
+ REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Result_OK);
+ return -1;
+ }
+
if (IS_SMU_TIMEOUT(result)) {
ASSERT(0);
dm_helpers_smu_timeout(CTX, msg_id, param, 10 * 200000);
@@ -300,23 +317,32 @@ void dcn31_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr)
VBIOSSMC_MSG_TransferTableDram2Smu, TABLE_WATERMARKS);
}
-void dcn31_smu_set_Z9_support(struct clk_mgr_internal *clk_mgr, bool support)
+void dcn31_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zstate_support_state support)
{
- //TODO: Work with smu team to define optimization options.
- unsigned int msg_id;
+ unsigned int msg_id, param;
if (!clk_mgr->smu_present)
return;
- if (support)
- msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
+ if (!clk_mgr->base.ctx->dc->debug.enable_z9_disable_interface &&
+ (support == DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY))
+ support = DCN_ZSTATE_SUPPORT_DISALLOW;
+
+ if (support == DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY ||
+ support == DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY)
+ param = 1;
else
+ param = 0;
+
+ if (support == DCN_ZSTATE_SUPPORT_DISALLOW)
msg_id = VBIOSSMC_MSG_DisallowZstatesEntry;
+ else
+ msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
dcn31_smu_send_msg_with_param(
clk_mgr,
msg_id,
- 0);
+ param);
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.h
index cd0b7e1e685f..dfa25a76a6d1 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.h
@@ -265,7 +265,7 @@ void dcn31_smu_set_dram_addr_low(struct clk_mgr_internal *clk_mgr, uint32_t addr
void dcn31_smu_transfer_dpm_table_smu_2_dram(struct clk_mgr_internal *clk_mgr);
void dcn31_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr);
-void dcn31_smu_set_Z9_support(struct clk_mgr_internal *clk_mgr, bool support);
+void dcn31_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zstate_support_state support);
void dcn31_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable);
#endif /* DAL_DC_31_SMU_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
new file mode 100644
index 000000000000..db687a13174d
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
@@ -0,0 +1,1051 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+
+
+#include "dcn314_clk_mgr.h"
+
+#include "dccg.h"
+#include "clk_mgr_internal.h"
+
+// For dce12_get_dp_ref_freq_khz
+#include "dce100/dce_clk_mgr.h"
+
+// For dcn20_update_clocks_update_dpp_dto
+#include "dcn20/dcn20_clk_mgr.h"
+
+
+
+#include "reg_helper.h"
+#include "core_types.h"
+#include "dm_helpers.h"
+
+/* TODO: remove this include once we ported over remaining clk mgr functions*/
+#include "dcn30/dcn30_clk_mgr.h"
+#include "dcn31/dcn31_clk_mgr.h"
+
+#include "dc_dmub_srv.h"
+#include "link_service.h"
+#include "dcn314_smu.h"
+
+
+#include "logger_types.h"
+
+
+#define MAX_INSTANCE 7
+#define MAX_SEGMENT 8
+
+struct IP_BASE_INSTANCE {
+ unsigned int segment[MAX_SEGMENT];
+};
+
+struct IP_BASE {
+ struct IP_BASE_INSTANCE instance[MAX_INSTANCE];
+};
+
+static const struct IP_BASE CLK_BASE = { { { { 0x00016C00, 0x02401800, 0, 0, 0, 0, 0, 0 } },
+ { { 0x00016E00, 0x02401C00, 0, 0, 0, 0, 0, 0 } },
+ { { 0x00017000, 0x02402000, 0, 0, 0, 0, 0, 0 } },
+ { { 0x00017200, 0x02402400, 0, 0, 0, 0, 0, 0 } },
+ { { 0x0001B000, 0x0242D800, 0, 0, 0, 0, 0, 0 } },
+ { { 0x0001B200, 0x0242DC00, 0, 0, 0, 0, 0, 0 } },
+ { { 0x0001B400, 0x0242E000, 0, 0, 0, 0, 0, 0 } } } };
+
+#undef DC_LOGGER
+#define DC_LOGGER \
+ clk_mgr->base.base.ctx->logger
+
+#define regCLK1_CLK_PLL_REQ 0x0237
+#define regCLK1_CLK_PLL_REQ_BASE_IDX 0
+
+#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0
+#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc
+#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10
+#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL
+#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L
+#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L
+
+#define regCLK1_CLK0_DFS_CNTL 0x0269
+#define regCLK1_CLK0_DFS_CNTL_BASE_IDX 0
+#define regCLK1_CLK1_DFS_CNTL 0x026c
+#define regCLK1_CLK1_DFS_CNTL_BASE_IDX 0
+#define regCLK1_CLK2_DFS_CNTL 0x026f
+#define regCLK1_CLK2_DFS_CNTL_BASE_IDX 0
+#define regCLK1_CLK3_DFS_CNTL 0x0272
+#define regCLK1_CLK3_DFS_CNTL_BASE_IDX 0
+#define regCLK1_CLK4_DFS_CNTL 0x0275
+#define regCLK1_CLK4_DFS_CNTL_BASE_IDX 0
+#define regCLK1_CLK5_DFS_CNTL 0x0278
+#define regCLK1_CLK5_DFS_CNTL_BASE_IDX 0
+
+#define regCLK1_CLK0_CURRENT_CNT 0x02fb
+#define regCLK1_CLK0_CURRENT_CNT_BASE_IDX 0
+#define regCLK1_CLK1_CURRENT_CNT 0x02fc
+#define regCLK1_CLK1_CURRENT_CNT_BASE_IDX 0
+#define regCLK1_CLK2_CURRENT_CNT 0x02fd
+#define regCLK1_CLK2_CURRENT_CNT_BASE_IDX 0
+#define regCLK1_CLK3_CURRENT_CNT 0x02fe
+#define regCLK1_CLK3_CURRENT_CNT_BASE_IDX 0
+#define regCLK1_CLK4_CURRENT_CNT 0x02ff
+#define regCLK1_CLK4_CURRENT_CNT_BASE_IDX 0
+#define regCLK1_CLK5_CURRENT_CNT 0x0300
+#define regCLK1_CLK5_CURRENT_CNT_BASE_IDX 0
+
+#define regCLK1_CLK0_BYPASS_CNTL 0x028a
+#define regCLK1_CLK0_BYPASS_CNTL_BASE_IDX 0
+#define regCLK1_CLK1_BYPASS_CNTL 0x0293
+#define regCLK1_CLK1_BYPASS_CNTL_BASE_IDX 0
+#define regCLK1_CLK2_BYPASS_CNTL 0x029c
+#define regCLK1_CLK2_BYPASS_CNTL_BASE_IDX 0
+#define regCLK1_CLK3_BYPASS_CNTL 0x02a5
+#define regCLK1_CLK3_BYPASS_CNTL_BASE_IDX 0
+#define regCLK1_CLK4_BYPASS_CNTL 0x02ae
+#define regCLK1_CLK4_BYPASS_CNTL_BASE_IDX 0
+#define regCLK1_CLK5_BYPASS_CNTL 0x02b7
+#define regCLK1_CLK5_BYPASS_CNTL_BASE_IDX 0
+
+#define regCLK1_CLK0_DS_CNTL 0x0283
+#define regCLK1_CLK0_DS_CNTL_BASE_IDX 0
+#define regCLK1_CLK1_DS_CNTL 0x028c
+#define regCLK1_CLK1_DS_CNTL_BASE_IDX 0
+#define regCLK1_CLK2_DS_CNTL 0x0295
+#define regCLK1_CLK2_DS_CNTL_BASE_IDX 0
+#define regCLK1_CLK3_DS_CNTL 0x029e
+#define regCLK1_CLK3_DS_CNTL_BASE_IDX 0
+#define regCLK1_CLK4_DS_CNTL 0x02a7
+#define regCLK1_CLK4_DS_CNTL_BASE_IDX 0
+#define regCLK1_CLK5_DS_CNTL 0x02b0
+#define regCLK1_CLK5_DS_CNTL_BASE_IDX 0
+
+#define regCLK1_CLK0_ALLOW_DS 0x0284
+#define regCLK1_CLK0_ALLOW_DS_BASE_IDX 0
+#define regCLK1_CLK1_ALLOW_DS 0x028d
+#define regCLK1_CLK1_ALLOW_DS_BASE_IDX 0
+#define regCLK1_CLK2_ALLOW_DS 0x0296
+#define regCLK1_CLK2_ALLOW_DS_BASE_IDX 0
+#define regCLK1_CLK3_ALLOW_DS 0x029f
+#define regCLK1_CLK3_ALLOW_DS_BASE_IDX 0
+#define regCLK1_CLK4_ALLOW_DS 0x02a8
+#define regCLK1_CLK4_ALLOW_DS_BASE_IDX 0
+#define regCLK1_CLK5_ALLOW_DS 0x02b1
+#define regCLK1_CLK5_ALLOW_DS_BASE_IDX 0
+
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL__SHIFT 0x0
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV__SHIFT 0x10
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK 0x000F0000L
+
+#define regCLK6_0_CLK6_spll_field_8 0x464b
+#define regCLK6_0_CLK6_spll_field_8_BASE_IDX 0
+
+#define CLK6_0_CLK6_spll_field_8__spll_ssc_en__SHIFT 0xd
+#define CLK6_0_CLK6_spll_field_8__spll_ssc_en_MASK 0x00002000L
+
+#define REG(reg_name) \
+ (CLK_BASE.instance[0].segment[reg ## reg_name ## _BASE_IDX] + reg ## reg_name)
+
+#define TO_CLK_MGR_DCN314(clk_mgr)\
+ container_of(clk_mgr, struct clk_mgr_dcn314, base)
+
+static int dcn314_get_active_display_cnt_wa(
+ struct dc *dc,
+ struct dc_state *context)
+{
+ int i, display_count;
+ bool tmds_present = false;
+
+ display_count = 0;
+ for (i = 0; i < context->stream_count; i++) {
+ const struct dc_stream_state *stream = context->streams[i];
+
+ if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A ||
+ stream->signal == SIGNAL_TYPE_DVI_SINGLE_LINK ||
+ stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK)
+ tmds_present = true;
+
+ /* Checking stream / link detection ensuring that PHY is active*/
+ if (dc_is_dp_signal(stream->signal) && !stream->dpms_off)
+ display_count++;
+
+ }
+
+ for (i = 0; i < dc->link_count; i++) {
+ const struct dc_link *link = dc->links[i];
+
+ /* abusing the fact that the dig and phy are coupled to see if the phy is enabled */
+ if (link->link_enc && link->link_enc->funcs->is_dig_enabled &&
+ link->link_enc->funcs->is_dig_enabled(link->link_enc))
+ display_count++;
+ }
+
+ /* WA for hang on HDMI after display off back on*/
+ if (display_count == 0 && tmds_present)
+ display_count = 1;
+
+ return display_count;
+}
+
+static void dcn314_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context,
+ bool safe_to_lower, bool disable)
+{
+ struct dc *dc = clk_mgr_base->ctx->dc;
+ int i;
+
+ for (i = 0; i < dc->res_pool->pipe_count; ++i) {
+ struct pipe_ctx *pipe = safe_to_lower
+ ? &context->res_ctx.pipe_ctx[i]
+ : &dc->current_state->res_ctx.pipe_ctx[i];
+
+ if (pipe->top_pipe || pipe->prev_odm_pipe)
+ continue;
+ if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) {
+ if (disable) {
+ if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc)
+ pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
+
+ reset_sync_context_for_pipe(dc, context, i);
+ } else {
+ pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg);
+ }
+ }
+ }
+}
+
+bool dcn314_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ uint32_t ssc_enable;
+
+ REG_GET(CLK6_0_CLK6_spll_field_8, spll_ssc_en, &ssc_enable);
+
+ return ssc_enable == 1;
+}
+
+void dcn314_init_clocks(struct clk_mgr *clk_mgr)
+{
+ struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr);
+ uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz;
+ struct clk_mgr_dcn314 *clk_mgr_dcn314 = TO_CLK_MGR_DCN314(clk_mgr_int);
+ struct clk_log_info log_info = {0};
+
+ memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
+ // Assumption is that boot state always supports pstate
+ clk_mgr->clks.ref_dtbclk_khz = ref_dtbclk; // restore ref_dtbclk
+ clk_mgr->clks.p_state_change_support = true;
+ clk_mgr->clks.prev_p_state_change_support = true;
+ clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN;
+ clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN;
+
+ // to adjust dp_dto reference clock if ssc is enable otherwise to apply dprefclk
+ if (dcn314_is_spll_ssc_enabled(clk_mgr))
+ clk_mgr->dp_dto_source_clock_in_khz =
+ dce_adjust_dp_ref_freq_for_ss(clk_mgr_int, clk_mgr->dprefclk_khz);
+ else
+ clk_mgr->dp_dto_source_clock_in_khz = clk_mgr->dprefclk_khz;
+
+ dcn314_dump_clk_registers(&clk_mgr->boot_snapshot, &clk_mgr_dcn314->base.base, &log_info);
+ clk_mgr->clks.dispclk_khz = clk_mgr->boot_snapshot.dispclk * 1000;
+}
+
+void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
+ struct dc_state *context,
+ bool safe_to_lower)
+{
+ union dmub_rb_cmd cmd;
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
+ struct dc *dc = clk_mgr_base->ctx->dc;
+ int display_count;
+ bool update_dppclk = false;
+ bool update_dispclk = false;
+ bool dpp_clock_lowered = false;
+
+ if (dc->work_arounds.skip_clock_update)
+ return;
+
+ display_count = dcn314_get_active_display_cnt_wa(dc, context);
+
+ /*
+ * if it is safe to lower, but we are already in the lower state, we don't have to do anything
+ * also if safe to lower is false, we just go in the higher state
+ */
+ if (safe_to_lower) {
+ if (new_clocks->zstate_support != DCN_ZSTATE_SUPPORT_DISALLOW &&
+ new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) {
+ dcn314_smu_set_zstate_support(clk_mgr, new_clocks->zstate_support);
+ dm_helpers_enable_periodic_detection(clk_mgr_base->ctx, true);
+ clk_mgr_base->clks.zstate_support = new_clocks->zstate_support;
+ }
+
+ if (clk_mgr_base->clks.dtbclk_en && !new_clocks->dtbclk_en) {
+ dcn314_smu_set_dtbclk(clk_mgr, false);
+ clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en;
+ }
+ /* check that we're not already in lower */
+ if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) {
+ /* if we can go lower, go lower */
+ if (display_count == 0) {
+ union display_idle_optimization_u idle_info = { 0 };
+ idle_info.idle_info.df_request_disabled = 1;
+ idle_info.idle_info.phy_ref_clk_off = 1;
+ idle_info.idle_info.s0i2_rdy = 1;
+ dcn314_smu_set_display_idle_optimization(clk_mgr, idle_info.data);
+ /* update power state */
+ clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
+ }
+ }
+ } else {
+ if (new_clocks->zstate_support == DCN_ZSTATE_SUPPORT_DISALLOW &&
+ new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) {
+ dcn314_smu_set_zstate_support(clk_mgr, DCN_ZSTATE_SUPPORT_DISALLOW);
+ dm_helpers_enable_periodic_detection(clk_mgr_base->ctx, false);
+ clk_mgr_base->clks.zstate_support = new_clocks->zstate_support;
+ }
+
+ if (!clk_mgr_base->clks.dtbclk_en && new_clocks->dtbclk_en) {
+ dcn314_smu_set_dtbclk(clk_mgr, true);
+ clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en;
+ }
+
+ /* check that we're not already in D0 */
+ if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_MISSION_MODE) {
+ union display_idle_optimization_u idle_info = { 0 };
+
+ dcn314_smu_set_display_idle_optimization(clk_mgr, idle_info.data);
+ /* update power state */
+ clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_MISSION_MODE;
+ }
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->dcfclk_khz, clk_mgr_base->clks.dcfclk_khz)) {
+ clk_mgr_base->clks.dcfclk_khz = new_clocks->dcfclk_khz;
+ dcn314_smu_set_hard_min_dcfclk(clk_mgr, clk_mgr_base->clks.dcfclk_khz);
+ }
+
+ if (should_set_clock(safe_to_lower,
+ new_clocks->dcfclk_deep_sleep_khz, clk_mgr_base->clks.dcfclk_deep_sleep_khz)) {
+ clk_mgr_base->clks.dcfclk_deep_sleep_khz = new_clocks->dcfclk_deep_sleep_khz;
+ dcn314_smu_set_min_deep_sleep_dcfclk(clk_mgr, clk_mgr_base->clks.dcfclk_deep_sleep_khz);
+ }
+
+ // workaround: Limit dppclk to 100Mhz to avoid lower eDP panel switch to plus 4K monitor underflow.
+ if (new_clocks->dppclk_khz < 100000)
+ new_clocks->dppclk_khz = 100000;
+
+ if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) {
+ if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz)
+ dpp_clock_lowered = true;
+ clk_mgr_base->clks.dppclk_khz = new_clocks->dppclk_khz;
+ update_dppclk = true;
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz) &&
+ (new_clocks->dispclk_khz > 0 || (safe_to_lower && display_count == 0))) {
+ int requested_dispclk_khz = new_clocks->dispclk_khz;
+
+ dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true);
+
+ /* Clamp the requested clock to PMFW based on their limit. */
+ if (dc->debug.min_disp_clk_khz > 0 && requested_dispclk_khz < dc->debug.min_disp_clk_khz)
+ requested_dispclk_khz = dc->debug.min_disp_clk_khz;
+
+ dcn314_smu_set_dispclk(clk_mgr, requested_dispclk_khz);
+ clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
+
+ dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false);
+
+ update_dispclk = true;
+ }
+
+ if (dpp_clock_lowered) {
+ // increase per DPP DTO before lowering global dppclk
+ dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower);
+ dcn314_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz);
+ } else {
+ // increase global DPPCLK before lowering per DPP DTO
+ if (update_dppclk || update_dispclk)
+ dcn314_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz);
+ // always update dtos unless clock is lowered and not safe to lower
+ if (new_clocks->dppclk_khz >= dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz)
+ dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower);
+ }
+
+ // notify DMCUB of latest clocks
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.notify_clocks.header.type = DMUB_CMD__CLK_MGR;
+ cmd.notify_clocks.header.sub_type = DMUB_CMD__CLK_MGR_NOTIFY_CLOCKS;
+ cmd.notify_clocks.clocks.dcfclk_khz = clk_mgr_base->clks.dcfclk_khz;
+ cmd.notify_clocks.clocks.dcfclk_deep_sleep_khz =
+ clk_mgr_base->clks.dcfclk_deep_sleep_khz;
+ cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz;
+ cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz;
+
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr)
+{
+ /* get FbMult value */
+ struct fixed31_32 pll_req;
+ unsigned int fbmult_frac_val = 0;
+ unsigned int fbmult_int_val = 0;
+
+ /*
+ * Register value of fbmult is in 8.16 format, we are converting to 314.32
+ * to leverage the fix point operations available in driver
+ */
+
+ REG_GET(CLK1_CLK_PLL_REQ, FbMult_frac, &fbmult_frac_val); /* 16 bit fractional part*/
+ REG_GET(CLK1_CLK_PLL_REQ, FbMult_int, &fbmult_int_val); /* 8 bit integer part */
+
+ pll_req = dc_fixpt_from_int(fbmult_int_val);
+
+ /*
+ * since fractional part is only 16 bit in register definition but is 32 bit
+ * in our fix point definiton, need to shift left by 16 to obtain correct value
+ */
+ pll_req.value |= fbmult_frac_val << 16;
+
+ /* multiply by REFCLK period */
+ pll_req = dc_fixpt_mul_int(pll_req, clk_mgr->dfs_ref_freq_khz);
+
+ /* integer part is now VCO frequency in kHz */
+ return dc_fixpt_floor(pll_req);
+}
+
+static void dcn314_enable_pme_wa(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ dcn314_smu_enable_pme_wa(clk_mgr);
+}
+
+bool dcn314_are_clock_states_equal(struct dc_clocks *a,
+ struct dc_clocks *b)
+{
+ if (a->dispclk_khz != b->dispclk_khz)
+ return false;
+ else if (a->dppclk_khz != b->dppclk_khz)
+ return false;
+ else if (a->dcfclk_khz != b->dcfclk_khz)
+ return false;
+ else if (a->dcfclk_deep_sleep_khz != b->dcfclk_deep_sleep_khz)
+ return false;
+ else if (a->zstate_support != b->zstate_support)
+ return false;
+ else if (a->dtbclk_en != b->dtbclk_en)
+ return false;
+
+ return true;
+}
+
+
+static void dcn314_dump_clk_registers_internal(struct dcn35_clk_internal *internal, struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ // read dtbclk
+ internal->CLK1_CLK4_CURRENT_CNT = REG_READ(CLK1_CLK4_CURRENT_CNT);
+ internal->CLK1_CLK4_BYPASS_CNTL = REG_READ(CLK1_CLK4_BYPASS_CNTL);
+
+ // read dcfclk
+ internal->CLK1_CLK3_CURRENT_CNT = REG_READ(CLK1_CLK3_CURRENT_CNT);
+ internal->CLK1_CLK3_BYPASS_CNTL = REG_READ(CLK1_CLK3_BYPASS_CNTL);
+
+ // read dcf deep sleep divider
+ internal->CLK1_CLK3_DS_CNTL = REG_READ(CLK1_CLK3_DS_CNTL);
+ internal->CLK1_CLK3_ALLOW_DS = REG_READ(CLK1_CLK3_ALLOW_DS);
+
+ // read dppclk
+ internal->CLK1_CLK1_CURRENT_CNT = REG_READ(CLK1_CLK1_CURRENT_CNT);
+ internal->CLK1_CLK1_BYPASS_CNTL = REG_READ(CLK1_CLK1_BYPASS_CNTL);
+
+ // read dprefclk
+ internal->CLK1_CLK2_CURRENT_CNT = REG_READ(CLK1_CLK2_CURRENT_CNT);
+ internal->CLK1_CLK2_BYPASS_CNTL = REG_READ(CLK1_CLK2_BYPASS_CNTL);
+
+ // read dispclk
+ internal->CLK1_CLK0_CURRENT_CNT = REG_READ(CLK1_CLK0_CURRENT_CNT);
+ internal->CLK1_CLK0_BYPASS_CNTL = REG_READ(CLK1_CLK0_BYPASS_CNTL);
+}
+
+void dcn314_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
+ struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info)
+{
+
+ struct dcn35_clk_internal internal = {0};
+
+ dcn314_dump_clk_registers_internal(&internal, clk_mgr_base);
+
+ regs_and_bypass->dcfclk = internal.CLK1_CLK3_CURRENT_CNT / 10;
+ regs_and_bypass->dcf_deep_sleep_divider = internal.CLK1_CLK3_DS_CNTL / 10;
+ regs_and_bypass->dcf_deep_sleep_allow = internal.CLK1_CLK3_ALLOW_DS;
+ regs_and_bypass->dprefclk = internal.CLK1_CLK2_CURRENT_CNT / 10;
+ regs_and_bypass->dispclk = internal.CLK1_CLK0_CURRENT_CNT / 10;
+ regs_and_bypass->dppclk = internal.CLK1_CLK1_CURRENT_CNT / 10;
+ regs_and_bypass->dtbclk = internal.CLK1_CLK4_CURRENT_CNT / 10;
+
+ regs_and_bypass->dppclk_bypass = internal.CLK1_CLK1_BYPASS_CNTL & 0x0007;
+ if (regs_and_bypass->dppclk_bypass > 4)
+ regs_and_bypass->dppclk_bypass = 0;
+ regs_and_bypass->dcfclk_bypass = internal.CLK1_CLK3_BYPASS_CNTL & 0x0007;
+ if (regs_and_bypass->dcfclk_bypass > 4)
+ regs_and_bypass->dcfclk_bypass = 0;
+ regs_and_bypass->dispclk_bypass = internal.CLK1_CLK0_BYPASS_CNTL & 0x0007;
+ if (regs_and_bypass->dispclk_bypass > 4)
+ regs_and_bypass->dispclk_bypass = 0;
+ regs_and_bypass->dprefclk_bypass = internal.CLK1_CLK2_BYPASS_CNTL & 0x0007;
+ if (regs_and_bypass->dprefclk_bypass > 4)
+ regs_and_bypass->dprefclk_bypass = 0;
+
+}
+
+static struct clk_bw_params dcn314_bw_params = {
+ .vram_type = Ddr4MemType,
+ .num_channels = 1,
+ .clk_table = {
+ .num_entries = 4,
+ },
+
+};
+
+static struct wm_table ddr5_wm_table = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 12.5,
+ .sr_enter_plus_exit_time_us = 14.5,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 12.5,
+ .sr_enter_plus_exit_time_us = 14.5,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 12.5,
+ .sr_enter_plus_exit_time_us = 14.5,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 12.5,
+ .sr_enter_plus_exit_time_us = 14.5,
+ .valid = true,
+ },
+ }
+};
+
+static struct wm_table lpddr5_wm_table = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 30.0,
+ .sr_enter_plus_exit_time_us = 32.0,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 30.0,
+ .sr_enter_plus_exit_time_us = 32.0,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 30.0,
+ .sr_enter_plus_exit_time_us = 32.0,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 30.0,
+ .sr_enter_plus_exit_time_us = 32.0,
+ .valid = true,
+ },
+ }
+};
+
+static DpmClocks314_t dummy_clocks;
+
+static struct dcn314_watermarks dummy_wms = { 0 };
+
+static struct dcn314_ss_info_table ss_info_table = {
+ .ss_divider = 1000,
+ .ss_percentage = {0, 0, 375, 375, 375}
+};
+
+static void dcn314_build_watermark_ranges(struct clk_bw_params *bw_params, struct dcn314_watermarks *table)
+{
+ int i, num_valid_sets;
+
+ num_valid_sets = 0;
+
+ for (i = 0; i < WM_SET_COUNT; i++) {
+ /* skip empty entries, the smu array has no holes*/
+ if (!bw_params->wm_table.entries[i].valid)
+ continue;
+
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].WmSetting = bw_params->wm_table.entries[i].wm_inst;
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].WmType = bw_params->wm_table.entries[i].wm_type;
+ /* We will not select WM based on fclk, so leave it as unconstrained */
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MinClock = 0;
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MaxClock = 0xFFFF;
+
+ if (table->WatermarkRow[WM_DCFCLK][num_valid_sets].WmType == WM_TYPE_PSTATE_CHG) {
+ if (i == 0)
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MinMclk = 0;
+ else {
+ /* add 1 to make it non-overlapping with next lvl */
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MinMclk =
+ bw_params->clk_table.entries[i - 1].dcfclk_mhz + 1;
+ }
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MaxMclk =
+ bw_params->clk_table.entries[i].dcfclk_mhz;
+
+ } else {
+ /* unconstrained for memory retraining */
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MinClock = 0;
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MaxClock = 0xFFFF;
+
+ /* Modify previous watermark range to cover up to max */
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets - 1].MaxClock = 0xFFFF;
+ }
+ num_valid_sets++;
+ }
+
+ ASSERT(num_valid_sets != 0); /* Must have at least one set of valid watermarks */
+
+ /* modify the min and max to make sure we cover the whole range*/
+ table->WatermarkRow[WM_DCFCLK][0].MinMclk = 0;
+ table->WatermarkRow[WM_DCFCLK][0].MinClock = 0;
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets - 1].MaxMclk = 0xFFFF;
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets - 1].MaxClock = 0xFFFF;
+
+ /* This is for writeback only, does not matter currently as no writeback support*/
+ table->WatermarkRow[WM_SOCCLK][0].WmSetting = WM_A;
+ table->WatermarkRow[WM_SOCCLK][0].MinClock = 0;
+ table->WatermarkRow[WM_SOCCLK][0].MaxClock = 0xFFFF;
+ table->WatermarkRow[WM_SOCCLK][0].MinMclk = 0;
+ table->WatermarkRow[WM_SOCCLK][0].MaxMclk = 0xFFFF;
+}
+
+static void dcn314_notify_wm_ranges(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct clk_mgr_dcn314 *clk_mgr_dcn314 = TO_CLK_MGR_DCN314(clk_mgr);
+ struct dcn314_watermarks *table = clk_mgr_dcn314->smu_wm_set.wm_set;
+
+ if (!clk_mgr->smu_ver)
+ return;
+
+ if (!table || clk_mgr_dcn314->smu_wm_set.mc_address.quad_part == 0)
+ return;
+
+ memset(table, 0, sizeof(*table));
+
+ dcn314_build_watermark_ranges(clk_mgr_base->bw_params, table);
+
+ dcn314_smu_set_dram_addr_high(clk_mgr,
+ clk_mgr_dcn314->smu_wm_set.mc_address.high_part);
+ dcn314_smu_set_dram_addr_low(clk_mgr,
+ clk_mgr_dcn314->smu_wm_set.mc_address.low_part);
+ dcn314_smu_transfer_wm_table_dram_2_smu(clk_mgr);
+}
+
+static void dcn314_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr,
+ struct dcn314_smu_dpm_clks *smu_dpm_clks)
+{
+ DpmClocks314_t *table = smu_dpm_clks->dpm_clks;
+
+ if (!clk_mgr->smu_ver)
+ return;
+
+ if (!table || smu_dpm_clks->mc_address.quad_part == 0)
+ return;
+
+ memset(table, 0, sizeof(*table));
+
+ dcn314_smu_set_dram_addr_high(clk_mgr,
+ smu_dpm_clks->mc_address.high_part);
+ dcn314_smu_set_dram_addr_low(clk_mgr,
+ smu_dpm_clks->mc_address.low_part);
+ dcn314_smu_transfer_dpm_table_smu_2_dram(clk_mgr);
+}
+
+static inline bool is_valid_clock_value(uint32_t clock_value)
+{
+ return clock_value > 1 && clock_value < 100000;
+}
+
+static unsigned int convert_wck_ratio(uint8_t wck_ratio)
+{
+ switch (wck_ratio) {
+ case WCK_RATIO_1_2:
+ return 2;
+
+ case WCK_RATIO_1_4:
+ return 4;
+
+ default:
+ break;
+ }
+ return 1;
+}
+
+static uint32_t find_max_clk_value(const uint32_t clocks[], uint32_t num_clocks)
+{
+ uint32_t max = 0;
+ int i;
+
+ for (i = 0; i < num_clocks; ++i) {
+ if (clocks[i] > max)
+ max = clocks[i];
+ }
+
+ return max;
+}
+
+static void dcn314_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk_mgr,
+ struct integrated_info *bios_info,
+ const DpmClocks314_t *clock_table)
+{
+ struct clk_bw_params *bw_params = clk_mgr->base.bw_params;
+ struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1];
+ uint32_t max_pstate = 0, max_fclk = 0, min_pstate = 0, max_dispclk = 0, max_dppclk = 0;
+ int i;
+
+ /* Find highest valid fclk pstate */
+ for (i = 0; i < clock_table->NumDfPstatesEnabled; i++) {
+ if (is_valid_clock_value(clock_table->DfPstateTable[i].FClk) &&
+ clock_table->DfPstateTable[i].FClk > max_fclk) {
+ max_fclk = clock_table->DfPstateTable[i].FClk;
+ max_pstate = i;
+ }
+ }
+
+ /* We expect the table to contain at least one valid fclk entry. */
+ ASSERT(is_valid_clock_value(max_fclk));
+
+ /* Dispclk and dppclk can be max at any voltage, same number of levels for both */
+ if (clock_table->NumDispClkLevelsEnabled <= NUM_DISPCLK_DPM_LEVELS &&
+ clock_table->NumDispClkLevelsEnabled <= NUM_DPPCLK_DPM_LEVELS) {
+ max_dispclk = find_max_clk_value(clock_table->DispClocks, clock_table->NumDispClkLevelsEnabled);
+ max_dppclk = find_max_clk_value(clock_table->DppClocks, clock_table->NumDispClkLevelsEnabled);
+ } else {
+ /* Invalid number of entries in the table from PMFW. */
+ ASSERT(0);
+ }
+
+ /* Base the clock table on dcfclk, need at least one entry regardless of pmfw table */
+ for (i = 0; i < clock_table->NumDcfClkLevelsEnabled; i++) {
+ uint32_t min_fclk = clock_table->DfPstateTable[0].FClk;
+ int j;
+
+ for (j = 1; j < clock_table->NumDfPstatesEnabled; j++) {
+ if (is_valid_clock_value(clock_table->DfPstateTable[j].FClk) &&
+ clock_table->DfPstateTable[j].FClk < min_fclk &&
+ clock_table->DfPstateTable[j].Voltage <= clock_table->SocVoltage[i]) {
+ min_fclk = clock_table->DfPstateTable[j].FClk;
+ min_pstate = j;
+ }
+ }
+
+ /* First search defaults for the clocks we don't read using closest lower or equal default dcfclk */
+ for (j = bw_params->clk_table.num_entries - 1; j > 0; j--)
+ if (bw_params->clk_table.entries[j].dcfclk_mhz <= clock_table->DcfClocks[i])
+ break;
+
+ bw_params->clk_table.entries[i].phyclk_mhz = bw_params->clk_table.entries[j].phyclk_mhz;
+ bw_params->clk_table.entries[i].phyclk_d18_mhz = bw_params->clk_table.entries[j].phyclk_d18_mhz;
+ bw_params->clk_table.entries[i].dtbclk_mhz = bw_params->clk_table.entries[j].dtbclk_mhz;
+
+ /* Now update clocks we do read */
+ bw_params->clk_table.entries[i].fclk_mhz = min_fclk;
+ bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[min_pstate].MemClk;
+ bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[min_pstate].Voltage;
+ bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[i];
+ bw_params->clk_table.entries[i].socclk_mhz = clock_table->SocClocks[i];
+ bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk;
+ bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk;
+ bw_params->clk_table.entries[i].wck_ratio = convert_wck_ratio(
+ clock_table->DfPstateTable[min_pstate].WckRatio);
+ }
+
+ /* Make sure to include at least one entry at highest pstate */
+ if (max_pstate != min_pstate || i == 0) {
+ if (i > MAX_NUM_DPM_LVL - 1)
+ i = MAX_NUM_DPM_LVL - 1;
+
+ bw_params->clk_table.entries[i].fclk_mhz = max_fclk;
+ bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[max_pstate].MemClk;
+ bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[max_pstate].Voltage;
+ bw_params->clk_table.entries[i].dcfclk_mhz = find_max_clk_value(clock_table->DcfClocks, NUM_DCFCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].socclk_mhz = find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk;
+ bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk;
+ bw_params->clk_table.entries[i].wck_ratio = convert_wck_ratio(
+ clock_table->DfPstateTable[max_pstate].WckRatio);
+ i++;
+ }
+ bw_params->clk_table.num_entries = i--;
+
+ /* Make sure all highest clocks are included*/
+ bw_params->clk_table.entries[i].socclk_mhz = find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].dispclk_mhz = find_max_clk_value(clock_table->DispClocks, NUM_DISPCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].dppclk_mhz = find_max_clk_value(clock_table->DppClocks, NUM_DPPCLK_DPM_LEVELS);
+ ASSERT(clock_table->DcfClocks[i] == find_max_clk_value(clock_table->DcfClocks, NUM_DCFCLK_DPM_LEVELS));
+ bw_params->clk_table.entries[i].phyclk_mhz = def_max.phyclk_mhz;
+ bw_params->clk_table.entries[i].phyclk_d18_mhz = def_max.phyclk_d18_mhz;
+ bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz;
+
+ /*
+ * Set any 0 clocks to max default setting. Not an issue for
+ * power since we aren't doing switching in such case anyway
+ */
+ for (i = 0; i < bw_params->clk_table.num_entries; i++) {
+ if (!bw_params->clk_table.entries[i].fclk_mhz) {
+ bw_params->clk_table.entries[i].fclk_mhz = def_max.fclk_mhz;
+ bw_params->clk_table.entries[i].memclk_mhz = def_max.memclk_mhz;
+ bw_params->clk_table.entries[i].voltage = def_max.voltage;
+ }
+ if (!bw_params->clk_table.entries[i].dcfclk_mhz)
+ bw_params->clk_table.entries[i].dcfclk_mhz = def_max.dcfclk_mhz;
+ if (!bw_params->clk_table.entries[i].socclk_mhz)
+ bw_params->clk_table.entries[i].socclk_mhz = def_max.socclk_mhz;
+ if (!bw_params->clk_table.entries[i].dispclk_mhz)
+ bw_params->clk_table.entries[i].dispclk_mhz = def_max.dispclk_mhz;
+ if (!bw_params->clk_table.entries[i].dppclk_mhz)
+ bw_params->clk_table.entries[i].dppclk_mhz = def_max.dppclk_mhz;
+ if (!bw_params->clk_table.entries[i].phyclk_mhz)
+ bw_params->clk_table.entries[i].phyclk_mhz = def_max.phyclk_mhz;
+ if (!bw_params->clk_table.entries[i].phyclk_d18_mhz)
+ bw_params->clk_table.entries[i].phyclk_d18_mhz = def_max.phyclk_d18_mhz;
+ if (!bw_params->clk_table.entries[i].dtbclk_mhz)
+ bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz;
+ }
+ ASSERT(bw_params->clk_table.entries[i-1].dcfclk_mhz);
+ bw_params->vram_type = bios_info->memory_type;
+
+ bw_params->dram_channel_width_bytes = bios_info->memory_type == 0x22 ? 8 : 4;
+ bw_params->num_channels = bios_info->ma_channel_number ? bios_info->ma_channel_number : 4;
+
+ for (i = 0; i < WM_SET_COUNT; i++) {
+ bw_params->wm_table.entries[i].wm_inst = i;
+
+ if (i >= bw_params->clk_table.num_entries) {
+ bw_params->wm_table.entries[i].valid = false;
+ continue;
+ }
+
+ bw_params->wm_table.entries[i].wm_type = WM_TYPE_PSTATE_CHG;
+ bw_params->wm_table.entries[i].valid = true;
+ }
+}
+
+static struct clk_mgr_funcs dcn314_funcs = {
+ .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
+ .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz,
+ .update_clocks = dcn314_update_clocks,
+ .init_clocks = dcn314_init_clocks,
+ .enable_pme_wa = dcn314_enable_pme_wa,
+ .are_clock_states_equal = dcn314_are_clock_states_equal,
+ .notify_wm_ranges = dcn314_notify_wm_ranges
+};
+extern struct clk_mgr_funcs dcn3_fpga_funcs;
+
+static void dcn314_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr)
+{
+ uint32_t clock_source;
+ //uint32_t ssc_enable;
+
+ REG_GET(CLK1_CLK2_BYPASS_CNTL, CLK2_BYPASS_SEL, &clock_source);
+ //REG_GET(CLK6_0_CLK6_spll_field_8, spll_ssc_en, &ssc_enable);
+
+ if (dcn314_is_spll_ssc_enabled(&clk_mgr->base) && (clock_source < ARRAY_SIZE(ss_info_table.ss_percentage))) {
+ clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source];
+
+ if (clk_mgr->dprefclk_ss_percentage != 0) {
+ clk_mgr->ss_on_dprefclk = true;
+ clk_mgr->dprefclk_ss_divider = ss_info_table.ss_divider;
+ }
+ }
+}
+
+void dcn314_clk_mgr_construct(
+ struct dc_context *ctx,
+ struct clk_mgr_dcn314 *clk_mgr,
+ struct pp_smu_funcs *pp_smu,
+ struct dccg *dccg)
+{
+ struct dcn314_smu_dpm_clks smu_dpm_clks = { 0 };
+ struct clk_log_info log_info = {0};
+
+ clk_mgr->base.base.ctx = ctx;
+ clk_mgr->base.base.funcs = &dcn314_funcs;
+
+ clk_mgr->base.pp_smu = pp_smu;
+
+ clk_mgr->base.dccg = dccg;
+ clk_mgr->base.dfs_bypass_disp_clk = 0;
+
+ clk_mgr->base.dprefclk_ss_percentage = 0;
+ clk_mgr->base.dprefclk_ss_divider = 1000;
+ clk_mgr->base.ss_on_dprefclk = false;
+ clk_mgr->base.dfs_ref_freq_khz = 48000;
+
+ clk_mgr->smu_wm_set.wm_set = (struct dcn314_watermarks *)dm_helpers_allocate_gpu_mem(
+ clk_mgr->base.base.ctx,
+ DC_MEM_ALLOC_TYPE_FRAME_BUFFER,
+ sizeof(struct dcn314_watermarks),
+ &clk_mgr->smu_wm_set.mc_address.quad_part);
+
+ if (!clk_mgr->smu_wm_set.wm_set) {
+ clk_mgr->smu_wm_set.wm_set = &dummy_wms;
+ clk_mgr->smu_wm_set.mc_address.quad_part = 0;
+ }
+ ASSERT(clk_mgr->smu_wm_set.wm_set);
+
+ smu_dpm_clks.dpm_clks = (DpmClocks314_t *)dm_helpers_allocate_gpu_mem(
+ clk_mgr->base.base.ctx,
+ DC_MEM_ALLOC_TYPE_FRAME_BUFFER,
+ sizeof(DpmClocks314_t),
+ &smu_dpm_clks.mc_address.quad_part);
+
+ if (smu_dpm_clks.dpm_clks == NULL) {
+ smu_dpm_clks.dpm_clks = &dummy_clocks;
+ smu_dpm_clks.mc_address.quad_part = 0;
+ }
+
+ ASSERT(smu_dpm_clks.dpm_clks);
+
+ clk_mgr->base.smu_ver = dcn314_smu_get_smu_version(&clk_mgr->base);
+
+ if (clk_mgr->base.smu_ver)
+ clk_mgr->base.smu_present = true;
+
+ /* TODO: Check we get what we expect during bringup */
+ clk_mgr->base.base.dentist_vco_freq_khz = get_vco_frequency_from_reg(&clk_mgr->base);
+
+ if (ctx->dc_bios->integrated_info->memory_type == LpDdr5MemType)
+ dcn314_bw_params.wm_table = lpddr5_wm_table;
+ else
+ dcn314_bw_params.wm_table = ddr5_wm_table;
+
+ /* Saved clocks configured at boot for debug purposes */
+ dcn314_dump_clk_registers(&clk_mgr->base.base.boot_snapshot,
+ &clk_mgr->base.base, &log_info);
+
+ clk_mgr->base.base.dprefclk_khz = 600000;
+ clk_mgr->base.base.clks.ref_dtbclk_khz = 600000;
+ dce_clock_read_ss_info(&clk_mgr->base);
+ dcn314_read_ss_info_from_lut(&clk_mgr->base);
+ /*if bios enabled SS, driver needs to adjust dtb clock, only enable with correct bios*/
+
+ clk_mgr->base.base.bw_params = &dcn314_bw_params;
+
+ if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) {
+ int i;
+
+ dcn314_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks);
+ DC_LOG_SMU("NumDcfClkLevelsEnabled: %d\n"
+ "NumDispClkLevelsEnabled: %d\n"
+ "NumSocClkLevelsEnabled: %d\n"
+ "VcnClkLevelsEnabled: %d\n"
+ "NumDfPst atesEnabled: %d\n"
+ "MinGfxClk: %d\n"
+ "MaxGfxClk: %d\n",
+ smu_dpm_clks.dpm_clks->NumDcfClkLevelsEnabled,
+ smu_dpm_clks.dpm_clks->NumDispClkLevelsEnabled,
+ smu_dpm_clks.dpm_clks->NumSocClkLevelsEnabled,
+ smu_dpm_clks.dpm_clks->VcnClkLevelsEnabled,
+ smu_dpm_clks.dpm_clks->NumDfPstatesEnabled,
+ smu_dpm_clks.dpm_clks->MinGfxClk,
+ smu_dpm_clks.dpm_clks->MaxGfxClk);
+ for (i = 0; i < smu_dpm_clks.dpm_clks->NumDcfClkLevelsEnabled; i++) {
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks->DcfClocks[%d] = %d\n",
+ i,
+ smu_dpm_clks.dpm_clks->DcfClocks[i]);
+ }
+ for (i = 0; i < smu_dpm_clks.dpm_clks->NumDispClkLevelsEnabled; i++) {
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks->DispClocks[%d] = %d\n",
+ i, smu_dpm_clks.dpm_clks->DispClocks[i]);
+ }
+ for (i = 0; i < smu_dpm_clks.dpm_clks->NumSocClkLevelsEnabled; i++) {
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks->SocClocks[%d] = %d\n",
+ i, smu_dpm_clks.dpm_clks->SocClocks[i]);
+ }
+ for (i = 0; i < NUM_SOC_VOLTAGE_LEVELS; i++)
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks->SocVoltage[%d] = %d\n",
+ i, smu_dpm_clks.dpm_clks->SocVoltage[i]);
+
+ for (i = 0; i < NUM_DF_PSTATE_LEVELS; i++) {
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks.DfPstateTable[%d].FClk = %d\n"
+ "smu_dpm_clks.dpm_clks->DfPstateTable[%d].MemClk= %d\n"
+ "smu_dpm_clks.dpm_clks->DfPstateTable[%d].Voltage = %d\n",
+ i, smu_dpm_clks.dpm_clks->DfPstateTable[i].FClk,
+ i, smu_dpm_clks.dpm_clks->DfPstateTable[i].MemClk,
+ i, smu_dpm_clks.dpm_clks->DfPstateTable[i].Voltage);
+ }
+
+ if (ctx->dc_bios->integrated_info && ctx->dc->config.use_default_clock_table == false) {
+ dcn314_clk_mgr_helper_populate_bw_params(
+ &clk_mgr->base,
+ ctx->dc_bios->integrated_info,
+ smu_dpm_clks.dpm_clks);
+ }
+ }
+
+ if (smu_dpm_clks.dpm_clks && smu_dpm_clks.mc_address.quad_part != 0)
+ dm_helpers_free_gpu_mem(clk_mgr->base.base.ctx, DC_MEM_ALLOC_TYPE_FRAME_BUFFER,
+ smu_dpm_clks.dpm_clks);
+}
+
+void dcn314_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int)
+{
+ struct clk_mgr_dcn314 *clk_mgr = TO_CLK_MGR_DCN314(clk_mgr_int);
+
+ if (clk_mgr->smu_wm_set.wm_set && clk_mgr->smu_wm_set.mc_address.quad_part != 0)
+ dm_helpers_free_gpu_mem(clk_mgr_int->base.ctx, DC_MEM_ALLOC_TYPE_FRAME_BUFFER,
+ clk_mgr->smu_wm_set.wm_set);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
new file mode 100644
index 000000000000..0577eb527bc3
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN314_CLK_MGR_H__
+#define __DCN314_CLK_MGR_H__
+#include "clk_mgr_internal.h"
+
+#define DCN314_NUM_CLOCK_SOURCES 5
+
+struct dcn314_watermarks;
+
+struct dcn314_smu_watermark_set {
+ struct dcn314_watermarks *wm_set;
+ union large_integer mc_address;
+};
+
+struct clk_mgr_dcn314 {
+ struct clk_mgr_internal base;
+ struct dcn314_smu_watermark_set smu_wm_set;
+};
+
+struct dcn314_ss_info_table {
+ uint32_t ss_divider;
+ uint32_t ss_percentage[DCN314_NUM_CLOCK_SOURCES];
+};
+
+bool dcn314_are_clock_states_equal(struct dc_clocks *a,
+ struct dc_clocks *b);
+
+bool dcn314_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base);
+
+void dcn314_init_clocks(struct clk_mgr *clk_mgr);
+
+void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
+ struct dc_state *context,
+ bool safe_to_lower);
+
+void dcn314_clk_mgr_construct(struct dc_context *ctx,
+ struct clk_mgr_dcn314 *clk_mgr,
+ struct pp_smu_funcs *pp_smu,
+ struct dccg *dccg);
+
+void dcn314_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int);
+
+
+void dcn314_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
+ struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info);
+
+
+#endif //__DCN314_CLK_MGR_H__
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c
new file mode 100644
index 000000000000..c4af406146b7
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c
@@ -0,0 +1,398 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+
+
+#include "core_types.h"
+#include "clk_mgr_internal.h"
+#include "reg_helper.h"
+#include "dm_helpers.h"
+#include "dcn314_smu.h"
+
+#include "mp/mp_13_0_5_offset.h"
+
+/* TODO: Use the real headers when they're correct */
+#define MP1_BASE__INST0_SEG0 0x00016000
+#define MP1_BASE__INST0_SEG1 0x0243FC00
+#define MP1_BASE__INST0_SEG2 0x00DC0000
+#define MP1_BASE__INST0_SEG3 0x00E00000
+#define MP1_BASE__INST0_SEG4 0x00E40000
+#define MP1_BASE__INST0_SEG5 0
+
+#ifdef BASE_INNER
+#undef BASE_INNER
+#endif
+
+#define BASE_INNER(seg) MP1_BASE__INST0_SEG ## seg
+
+#define BASE(seg) BASE_INNER(seg)
+
+#define REG(reg_name) (BASE(reg##reg_name##_BASE_IDX) + reg##reg_name)
+
+#define FN(reg_name, field) \
+ FD(reg_name##__##field)
+
+#include "logger_types.h"
+#undef DC_LOGGER
+#define DC_LOGGER \
+ CTX->logger
+#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
+
+#define VBIOSSMC_MSG_TestMessage 0x1
+#define VBIOSSMC_MSG_GetSmuVersion 0x2
+#define VBIOSSMC_MSG_PowerUpGfx 0x3
+#define VBIOSSMC_MSG_SetDispclkFreq 0x4
+#define VBIOSSMC_MSG_SetDprefclkFreq 0x5 //Not used. DPRef is constant
+#define VBIOSSMC_MSG_SetDppclkFreq 0x6
+#define VBIOSSMC_MSG_SetHardMinDcfclkByFreq 0x7
+#define VBIOSSMC_MSG_SetMinDeepSleepDcfclk 0x8
+#define VBIOSSMC_MSG_SetPhyclkVoltageByFreq 0x9 //Keep it in case VMIN dees not support phy clk
+#define VBIOSSMC_MSG_GetFclkFrequency 0xA
+#define VBIOSSMC_MSG_SetDisplayCount 0xB //Not used anymore
+#define VBIOSSMC_MSG_EnableTmdp48MHzRefclkPwrDown 0xC //Not used anymore
+#define VBIOSSMC_MSG_UpdatePmeRestore 0xD
+#define VBIOSSMC_MSG_SetVbiosDramAddrHigh 0xE //Used for WM table txfr
+#define VBIOSSMC_MSG_SetVbiosDramAddrLow 0xF
+#define VBIOSSMC_MSG_TransferTableSmu2Dram 0x10
+#define VBIOSSMC_MSG_TransferTableDram2Smu 0x11
+#define VBIOSSMC_MSG_SetDisplayIdleOptimizations 0x12
+#define VBIOSSMC_MSG_GetDprefclkFreq 0x13
+#define VBIOSSMC_MSG_GetDtbclkFreq 0x14
+#define VBIOSSMC_MSG_AllowZstatesEntry 0x15
+#define VBIOSSMC_MSG_DisallowZstatesEntry 0x16
+#define VBIOSSMC_MSG_SetDtbClk 0x17
+#define VBIOSSMC_Message_Count 0x18
+
+#define VBIOSSMC_Status_BUSY 0x0
+#define VBIOSSMC_Result_OK 0x1
+#define VBIOSSMC_Result_Failed 0xFF
+#define VBIOSSMC_Result_UnknownCmd 0xFE
+#define VBIOSSMC_Result_CmdRejectedPrereq 0xFD
+#define VBIOSSMC_Result_CmdRejectedBusy 0xFC
+
+/*
+ * Function to be used instead of REG_WAIT macro because the wait ends when
+ * the register is NOT EQUAL to zero, and because the translation in msg_if.h
+ * won't work with REG_WAIT.
+ */
+static uint32_t dcn314_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, unsigned int delay_us, unsigned int max_retries)
+{
+ uint32_t res_val = VBIOSSMC_Status_BUSY;
+
+ do {
+ res_val = REG_READ(MP1_SMN_C2PMSG_91);
+ if (res_val != VBIOSSMC_Status_BUSY)
+ break;
+
+ if (delay_us >= 1000)
+ msleep(delay_us/1000);
+ else if (delay_us > 0)
+ udelay(delay_us);
+ } while (max_retries--);
+
+ return res_val;
+}
+
+static int dcn314_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
+ unsigned int msg_id,
+ unsigned int param)
+{
+ uint32_t result;
+
+ result = dcn314_smu_wait_for_response(clk_mgr, 10, 200000);
+
+ if (result != VBIOSSMC_Result_OK)
+ smu_print("SMU Response was not OK. SMU response after wait received is: %d\n",
+ result);
+
+ if (result == VBIOSSMC_Status_BUSY)
+ return -1;
+
+ /* First clear response register */
+ REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Status_BUSY);
+
+ /* Set the parameter register for the SMU message, unit is Mhz */
+ REG_WRITE(MP1_SMN_C2PMSG_83, param);
+
+ /* Trigger the message transaction by writing the message ID */
+ REG_WRITE(MP1_SMN_C2PMSG_67, msg_id);
+
+ result = dcn314_smu_wait_for_response(clk_mgr, 10, 200000);
+
+ if (result == VBIOSSMC_Result_Failed) {
+ if (msg_id == VBIOSSMC_MSG_TransferTableDram2Smu &&
+ param == TABLE_WATERMARKS)
+ DC_LOG_DEBUG("Watermarks table not configured properly by SMU");
+ else if (msg_id == VBIOSSMC_MSG_SetHardMinDcfclkByFreq ||
+ msg_id == VBIOSSMC_MSG_SetMinDeepSleepDcfclk)
+ DC_LOG_WARNING("DCFCLK_DPM is not enabled by BIOS");
+ else
+ ASSERT(0);
+ REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Result_OK);
+ return -1;
+ }
+
+ if (IS_SMU_TIMEOUT(result)) {
+ ASSERT(0);
+ dm_helpers_smu_timeout(CTX, msg_id, param, 10 * 200000);
+ }
+
+ return REG_READ(MP1_SMN_C2PMSG_83);
+}
+
+int dcn314_smu_get_smu_version(struct clk_mgr_internal *clk_mgr)
+{
+ return dcn314_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_GetSmuVersion,
+ 0);
+}
+
+
+int dcn314_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispclk_khz)
+{
+ int actual_dispclk_set_mhz = -1;
+
+ if (!clk_mgr->smu_present)
+ return requested_dispclk_khz;
+
+ /* Unit of SMU msg parameter is Mhz */
+ actual_dispclk_set_mhz = dcn314_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDispclkFreq,
+ khz_to_mhz_ceil(requested_dispclk_khz));
+
+ return actual_dispclk_set_mhz * 1000;
+}
+
+int dcn314_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr)
+{
+ int actual_dprefclk_set_mhz = -1;
+
+ if (!clk_mgr->smu_present)
+ return clk_mgr->base.dprefclk_khz;
+
+ actual_dprefclk_set_mhz = dcn314_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDprefclkFreq,
+ khz_to_mhz_ceil(clk_mgr->base.dprefclk_khz));
+
+ /* TODO: add code for programing DP DTO, currently this is down by command table */
+
+ return actual_dprefclk_set_mhz * 1000;
+}
+
+int dcn314_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_dcfclk_khz)
+{
+ int actual_dcfclk_set_mhz = -1;
+
+ if (!clk_mgr->base.ctx->dc->debug.pstate_enabled)
+ return -1;
+
+ if (!clk_mgr->smu_present)
+ return requested_dcfclk_khz;
+
+ actual_dcfclk_set_mhz = dcn314_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetHardMinDcfclkByFreq,
+ khz_to_mhz_ceil(requested_dcfclk_khz));
+
+ return actual_dcfclk_set_mhz * 1000;
+}
+
+int dcn314_smu_set_min_deep_sleep_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_min_ds_dcfclk_khz)
+{
+ int actual_min_ds_dcfclk_mhz = -1;
+
+ if (!clk_mgr->base.ctx->dc->debug.pstate_enabled)
+ return -1;
+
+ if (!clk_mgr->smu_present)
+ return requested_min_ds_dcfclk_khz;
+
+ actual_min_ds_dcfclk_mhz = dcn314_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetMinDeepSleepDcfclk,
+ khz_to_mhz_ceil(requested_min_ds_dcfclk_khz));
+
+ return actual_min_ds_dcfclk_mhz * 1000;
+}
+
+int dcn314_smu_set_dppclk(struct clk_mgr_internal *clk_mgr, int requested_dpp_khz)
+{
+ int actual_dppclk_set_mhz = -1;
+
+ if (!clk_mgr->smu_present)
+ return requested_dpp_khz;
+
+ actual_dppclk_set_mhz = dcn314_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDppclkFreq,
+ khz_to_mhz_ceil(requested_dpp_khz));
+
+ return actual_dppclk_set_mhz * 1000;
+}
+
+void dcn314_smu_set_display_idle_optimization(struct clk_mgr_internal *clk_mgr, uint32_t idle_info)
+{
+ if (!clk_mgr->base.ctx->dc->debug.pstate_enabled)
+ return;
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ //TODO: Work with smu team to define optimization options.
+ dcn314_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDisplayIdleOptimizations,
+ idle_info);
+}
+
+void dcn314_smu_enable_phy_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable)
+{
+ union display_idle_optimization_u idle_info = { 0 };
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ if (enable) {
+ idle_info.idle_info.df_request_disabled = 1;
+ idle_info.idle_info.phy_ref_clk_off = 1;
+ }
+
+ dcn314_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDisplayIdleOptimizations,
+ idle_info.data);
+}
+
+void dcn314_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn314_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_UpdatePmeRestore,
+ 0);
+}
+
+void dcn314_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn314_smu_send_msg_with_param(clk_mgr,
+ VBIOSSMC_MSG_SetVbiosDramAddrHigh, addr_high);
+}
+
+void dcn314_smu_set_dram_addr_low(struct clk_mgr_internal *clk_mgr, uint32_t addr_low)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn314_smu_send_msg_with_param(clk_mgr,
+ VBIOSSMC_MSG_SetVbiosDramAddrLow, addr_low);
+}
+
+void dcn314_smu_transfer_dpm_table_smu_2_dram(struct clk_mgr_internal *clk_mgr)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn314_smu_send_msg_with_param(clk_mgr,
+ VBIOSSMC_MSG_TransferTableSmu2Dram, TABLE_DPMCLOCKS);
+}
+
+void dcn314_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn314_smu_send_msg_with_param(clk_mgr,
+ VBIOSSMC_MSG_TransferTableDram2Smu, TABLE_WATERMARKS);
+}
+
+void dcn314_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zstate_support_state support)
+{
+ unsigned int msg_id, param;
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ switch (support) {
+
+ case DCN_ZSTATE_SUPPORT_ALLOW:
+ msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
+ param = (1 << 10) | (1 << 9) | (1 << 8);
+ break;
+
+ case DCN_ZSTATE_SUPPORT_DISALLOW:
+ msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
+ param = 0;
+ break;
+
+
+ case DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY:
+ msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
+ param = (1 << 10);
+ break;
+
+ case DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY:
+ msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
+ param = (1 << 10) | (1 << 8);
+ break;
+
+ case DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY:
+ msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
+ param = (1 << 8);
+ break;
+
+ default: //DCN_ZSTATE_SUPPORT_UNKNOWN
+ msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
+ param = 0;
+ break;
+ }
+
+
+ dcn314_smu_send_msg_with_param(
+ clk_mgr,
+ msg_id,
+ param);
+
+}
+
+/* Arg = 1: Turn DTB on; 0: Turn DTB CLK OFF. when it is on, it is 600MHZ */
+void dcn314_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn314_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDtbClk,
+ enable);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h
new file mode 100644
index 000000000000..78ca1e5c5e9e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef DAL_DC_314_SMU_H_
+#define DAL_DC_314_SMU_H_
+
+#include "smu13_driver_if_v13_0_4.h"
+
+typedef enum {
+ WCK_RATIO_1_1 = 0, // DDR5, Wck:ck is always 1:1;
+ WCK_RATIO_1_2,
+ WCK_RATIO_1_4,
+ WCK_RATIO_MAX
+} WCK_RATIO_e;
+
+typedef struct {
+ uint32_t FClk;
+ uint32_t MemClk;
+ uint32_t Voltage;
+ uint8_t WckRatio;
+ uint8_t Spare[3];
+} DfPstateTable314_t;
+
+//Freq in MHz
+//Voltage in milli volts with 2 fractional bits
+typedef struct {
+ uint32_t DcfClocks[NUM_DCFCLK_DPM_LEVELS];
+ uint32_t DispClocks[NUM_DISPCLK_DPM_LEVELS];
+ uint32_t DppClocks[NUM_DPPCLK_DPM_LEVELS];
+ uint32_t SocClocks[NUM_SOCCLK_DPM_LEVELS];
+ uint32_t VClocks[NUM_VCN_DPM_LEVELS];
+ uint32_t DClocks[NUM_VCN_DPM_LEVELS];
+ uint32_t SocVoltage[NUM_SOC_VOLTAGE_LEVELS];
+ DfPstateTable314_t DfPstateTable[NUM_DF_PSTATE_LEVELS];
+
+ uint8_t NumDcfClkLevelsEnabled;
+ uint8_t NumDispClkLevelsEnabled; //Applies to both Dispclk and Dppclk
+ uint8_t NumSocClkLevelsEnabled;
+ uint8_t VcnClkLevelsEnabled; //Applies to both Vclk and Dclk
+ uint8_t NumDfPstatesEnabled;
+ uint8_t spare[3];
+
+ uint32_t MinGfxClk;
+ uint32_t MaxGfxClk;
+} DpmClocks314_t;
+
+struct dcn314_watermarks {
+ // Watermarks
+ WatermarkRowGeneric_t WatermarkRow[WM_COUNT][NUM_WM_RANGES];
+ uint32_t MmHubPadding[7]; // SMU internal use
+};
+
+struct dcn314_smu_dpm_clks {
+ DpmClocks314_t *dpm_clks;
+ union large_integer mc_address;
+};
+
+struct display_idle_optimization {
+ unsigned int df_request_disabled : 1;
+ unsigned int phy_ref_clk_off : 1;
+ unsigned int s0i2_rdy : 1;
+ unsigned int reserved : 29;
+};
+
+union display_idle_optimization_u {
+ struct display_idle_optimization idle_info;
+ uint32_t data;
+};
+
+int dcn314_smu_get_smu_version(struct clk_mgr_internal *clk_mgr);
+int dcn314_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispclk_khz);
+int dcn314_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr);
+int dcn314_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_dcfclk_khz);
+int dcn314_smu_set_min_deep_sleep_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_min_ds_dcfclk_khz);
+int dcn314_smu_set_dppclk(struct clk_mgr_internal *clk_mgr, int requested_dpp_khz);
+void dcn314_smu_set_display_idle_optimization(struct clk_mgr_internal *clk_mgr, uint32_t idle_info);
+void dcn314_smu_enable_phy_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable);
+void dcn314_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr);
+void dcn314_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high);
+void dcn314_smu_set_dram_addr_low(struct clk_mgr_internal *clk_mgr, uint32_t addr_low);
+void dcn314_smu_transfer_dpm_table_smu_2_dram(struct clk_mgr_internal *clk_mgr);
+void dcn314_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr);
+
+void dcn314_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zstate_support_state support);
+void dcn314_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable);
+
+#endif /* DAL_DC_314_SMU_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
new file mode 100644
index 000000000000..3a881451e9da
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
@@ -0,0 +1,820 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+
+
+#include "dccg.h"
+#include "clk_mgr_internal.h"
+
+// For dce12_get_dp_ref_freq_khz
+#include "dce100/dce_clk_mgr.h"
+// For dcn20_update_clocks_update_dpp_dto
+#include "dcn20/dcn20_clk_mgr.h"
+#include "dcn31/dcn31_clk_mgr.h"
+#include "dcn315_clk_mgr.h"
+
+#include "core_types.h"
+#include "dcn315_smu.h"
+#include "dm_helpers.h"
+
+#include "dc_dmub_srv.h"
+#include "reg_helper.h"
+#include "logger_types.h"
+#undef DC_LOGGER
+#define DC_LOGGER \
+ clk_mgr->base.base.ctx->logger
+
+#include "link_service.h"
+
+#define MAX_INSTANCE 7
+#define MAX_SEGMENT 8
+
+struct IP_BASE_INSTANCE {
+ unsigned int segment[MAX_SEGMENT];
+};
+
+struct IP_BASE {
+ struct IP_BASE_INSTANCE instance[MAX_INSTANCE];
+};
+
+static const struct IP_BASE CLK_BASE = { { { { 0x00016C00, 0x02401800, 0, 0, 0, 0, 0, 0 } },
+ { { 0x00016E00, 0x02401C00, 0, 0, 0, 0, 0, 0 } },
+ { { 0x00017000, 0x02402000, 0, 0, 0, 0, 0, 0 } },
+ { { 0x00017200, 0x02402400, 0, 0, 0, 0, 0, 0 } },
+ { { 0x0001B000, 0x0242D800, 0, 0, 0, 0, 0, 0 } },
+ { { 0x0001B200, 0x0242DC00, 0, 0, 0, 0, 0, 0 } } } };
+
+#define regCLK1_CLK0_CURRENT_CNT 0x0314
+#define regCLK1_CLK0_CURRENT_CNT_BASE_IDX 0
+#define regCLK1_CLK1_CURRENT_CNT 0x0315
+#define regCLK1_CLK1_CURRENT_CNT_BASE_IDX 0
+#define regCLK1_CLK2_CURRENT_CNT 0x0316
+#define regCLK1_CLK2_CURRENT_CNT_BASE_IDX 0
+#define regCLK1_CLK3_CURRENT_CNT 0x0317
+#define regCLK1_CLK3_CURRENT_CNT_BASE_IDX 0
+#define regCLK1_CLK4_CURRENT_CNT 0x0318
+#define regCLK1_CLK4_CURRENT_CNT_BASE_IDX 0
+#define regCLK1_CLK5_CURRENT_CNT 0x0319
+#define regCLK1_CLK5_CURRENT_CNT_BASE_IDX 0
+
+#define TO_CLK_MGR_DCN315(clk_mgr)\
+ container_of(clk_mgr, struct clk_mgr_dcn315, base)
+
+#define REG(reg_name) \
+ (CLK_BASE.instance[0].segment[reg ## reg_name ## _BASE_IDX] + reg ## reg_name)
+
+#define UNSUPPORTED_DCFCLK 10000000
+#define MIN_DPP_DISP_CLK 100000
+
+static int dcn315_get_active_display_cnt_wa(
+ struct dc *dc,
+ struct dc_state *context)
+{
+ int i, display_count;
+ bool tmds_present = false;
+
+ display_count = 0;
+ for (i = 0; i < context->stream_count; i++) {
+ const struct dc_stream_state *stream = context->streams[i];
+
+ if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A ||
+ stream->signal == SIGNAL_TYPE_DVI_SINGLE_LINK ||
+ stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK)
+ tmds_present = true;
+ }
+
+ for (i = 0; i < dc->link_count; i++) {
+ const struct dc_link *link = dc->links[i];
+
+ /* abusing the fact that the dig and phy are coupled to see if the phy is enabled */
+ if (link->link_enc && link->link_enc->funcs->is_dig_enabled &&
+ link->link_enc->funcs->is_dig_enabled(link->link_enc))
+ display_count++;
+ }
+
+ /* WA for hang on HDMI after display off back back on*/
+ if (display_count == 0 && tmds_present)
+ display_count = 1;
+
+ return display_count;
+}
+
+static bool should_disable_otg(struct pipe_ctx *pipe)
+{
+ bool ret = true;
+
+ if (pipe->stream->link->link_enc && pipe->stream->link->link_enc->funcs->is_dig_enabled &&
+ pipe->stream->link->link_enc->funcs->is_dig_enabled(pipe->stream->link->link_enc))
+ ret = false;
+ return ret;
+}
+
+static void dcn315_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool disable)
+{
+ struct dc *dc = clk_mgr_base->ctx->dc;
+ int i;
+
+ for (i = 0; i < dc->res_pool->pipe_count; ++i) {
+ struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+
+ if (pipe->top_pipe || pipe->prev_odm_pipe)
+ continue;
+ if (pipe->stream && (pipe->stream->dpms_off || pipe->plane_state == NULL ||
+ dc_is_virtual_signal(pipe->stream->signal))) {
+
+ /* This w/a should not trigger when we have a dig active */
+ if (should_disable_otg(pipe)) {
+ if (disable) {
+ pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
+ reset_sync_context_for_pipe(dc, context, i);
+ } else
+ pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg);
+ }
+ }
+ }
+}
+
+static void dcn315_update_clocks(struct clk_mgr *clk_mgr_base,
+ struct dc_state *context,
+ bool safe_to_lower)
+{
+ union dmub_rb_cmd cmd;
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
+ struct dc *dc = clk_mgr_base->ctx->dc;
+ int display_count = 0;
+ bool update_dppclk = false;
+ bool update_dispclk = false;
+ bool dpp_clock_lowered = false;
+
+ if (dc->work_arounds.skip_clock_update)
+ return;
+
+ clk_mgr_base->clks.zstate_support = new_clocks->zstate_support;
+ /*
+ * if it is safe to lower, but we are already in the lower state, we don't have to do anything
+ * also if safe to lower is false, we just go in the higher state
+ */
+ clk_mgr_base->clks.zstate_support = new_clocks->zstate_support;
+ if (safe_to_lower) {
+ if (clk_mgr_base->clks.dtbclk_en && !new_clocks->dtbclk_en) {
+ dcn315_smu_set_dtbclk(clk_mgr, false);
+ clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en;
+ }
+ /* check that we're not already in lower */
+ if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) {
+ display_count = dcn315_get_active_display_cnt_wa(dc, context);
+ /* if we can go lower, go lower */
+ if (display_count == 0) {
+ union display_idle_optimization_u idle_info = { 0 };
+ idle_info.idle_info.df_request_disabled = 1;
+ idle_info.idle_info.phy_ref_clk_off = 1;
+ idle_info.idle_info.s0i2_rdy = 1;
+ dcn315_smu_set_display_idle_optimization(clk_mgr, idle_info.data);
+ /* update power state */
+ clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
+ }
+ }
+ } else {
+ if (!clk_mgr_base->clks.dtbclk_en && new_clocks->dtbclk_en) {
+ dcn315_smu_set_dtbclk(clk_mgr, true);
+ clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en;
+ }
+ /* check that we're not already in D0 */
+ if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_MISSION_MODE) {
+ union display_idle_optimization_u idle_info = { 0 };
+ dcn315_smu_set_display_idle_optimization(clk_mgr, idle_info.data);
+ /* update power state */
+ clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_MISSION_MODE;
+ }
+ }
+
+ /* Lock pstate by requesting unsupported dcfclk if change is unsupported */
+ if (!new_clocks->p_state_change_support)
+ new_clocks->dcfclk_khz = UNSUPPORTED_DCFCLK;
+ if (should_set_clock(safe_to_lower, new_clocks->dcfclk_khz, clk_mgr_base->clks.dcfclk_khz)) {
+ clk_mgr_base->clks.dcfclk_khz = new_clocks->dcfclk_khz;
+ dcn315_smu_set_hard_min_dcfclk(clk_mgr, clk_mgr_base->clks.dcfclk_khz);
+ }
+
+ if (should_set_clock(safe_to_lower,
+ new_clocks->dcfclk_deep_sleep_khz, clk_mgr_base->clks.dcfclk_deep_sleep_khz)) {
+ clk_mgr_base->clks.dcfclk_deep_sleep_khz = new_clocks->dcfclk_deep_sleep_khz;
+ dcn315_smu_set_min_deep_sleep_dcfclk(clk_mgr, clk_mgr_base->clks.dcfclk_deep_sleep_khz);
+ }
+
+ // workaround: Limit dppclk to 100Mhz to avoid lower eDP panel switch to plus 4K monitor underflow.
+ if (new_clocks->dppclk_khz < MIN_DPP_DISP_CLK)
+ new_clocks->dppclk_khz = MIN_DPP_DISP_CLK;
+
+ if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) {
+ if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz)
+ dpp_clock_lowered = true;
+ clk_mgr_base->clks.dppclk_khz = new_clocks->dppclk_khz;
+ update_dppclk = true;
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz) &&
+ (new_clocks->dispclk_khz > 0 || (safe_to_lower && display_count == 0))) {
+ int requested_dispclk_khz = new_clocks->dispclk_khz;
+
+ dcn315_disable_otg_wa(clk_mgr_base, context, true);
+
+ /* Clamp the requested clock to PMFW based on their limit. */
+ if (dc->debug.min_disp_clk_khz > 0 && requested_dispclk_khz < dc->debug.min_disp_clk_khz)
+ requested_dispclk_khz = dc->debug.min_disp_clk_khz;
+
+ dcn315_smu_set_dispclk(clk_mgr, requested_dispclk_khz);
+ clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
+ dcn315_disable_otg_wa(clk_mgr_base, context, false);
+
+ update_dispclk = true;
+ }
+
+ if (dpp_clock_lowered) {
+ // increase per DPP DTO before lowering global dppclk
+ dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower);
+ dcn315_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz);
+ } else {
+ // increase global DPPCLK before lowering per DPP DTO
+ if (update_dppclk || update_dispclk)
+ dcn315_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz);
+ // always update dtos unless clock is lowered and not safe to lower
+ if (new_clocks->dppclk_khz >= dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz)
+ dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower);
+ }
+
+ // notify DMCUB of latest clocks
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.notify_clocks.header.type = DMUB_CMD__CLK_MGR;
+ cmd.notify_clocks.header.sub_type = DMUB_CMD__CLK_MGR_NOTIFY_CLOCKS;
+ cmd.notify_clocks.clocks.dcfclk_khz = clk_mgr_base->clks.dcfclk_khz;
+ cmd.notify_clocks.clocks.dcfclk_deep_sleep_khz =
+ clk_mgr_base->clks.dcfclk_deep_sleep_khz;
+ cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz;
+ cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz;
+
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+static void dcn315_dump_clk_registers_internal(struct dcn35_clk_internal *internal, struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ // read dtbclk
+ internal->CLK1_CLK4_CURRENT_CNT = REG_READ(CLK1_CLK4_CURRENT_CNT);
+
+ // read dcfclk
+ internal->CLK1_CLK3_CURRENT_CNT = REG_READ(CLK1_CLK3_CURRENT_CNT);
+
+ // read dppclk
+ internal->CLK1_CLK1_CURRENT_CNT = REG_READ(CLK1_CLK1_CURRENT_CNT);
+
+ // read dprefclk
+ internal->CLK1_CLK2_CURRENT_CNT = REG_READ(CLK1_CLK2_CURRENT_CNT);
+
+ // read dispclk
+ internal->CLK1_CLK0_CURRENT_CNT = REG_READ(CLK1_CLK0_CURRENT_CNT);
+}
+
+static void dcn315_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
+ struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info)
+{
+ struct dcn35_clk_internal internal = {0};
+
+ dcn315_dump_clk_registers_internal(&internal, clk_mgr_base);
+
+ regs_and_bypass->dcfclk = internal.CLK1_CLK3_CURRENT_CNT / 10;
+ regs_and_bypass->dprefclk = internal.CLK1_CLK2_CURRENT_CNT / 10;
+ regs_and_bypass->dispclk = internal.CLK1_CLK0_CURRENT_CNT / 10;
+ regs_and_bypass->dppclk = internal.CLK1_CLK1_CURRENT_CNT / 10;
+ regs_and_bypass->dtbclk = internal.CLK1_CLK4_CURRENT_CNT / 10;
+ return;
+}
+
+static struct clk_bw_params dcn315_bw_params = {
+ .vram_type = Ddr4MemType,
+ .num_channels = 2,
+ .clk_table = {
+ .entries = {
+ {
+ .voltage = 0,
+ .dispclk_mhz = 640,
+ .dppclk_mhz = 640,
+ .phyclk_mhz = 810,
+ .phyclk_d18_mhz = 667,
+ .dtbclk_mhz = 600,
+ },
+ {
+ .voltage = 1,
+ .dispclk_mhz = 739,
+ .dppclk_mhz = 739,
+ .phyclk_mhz = 810,
+ .phyclk_d18_mhz = 667,
+ .dtbclk_mhz = 600,
+ },
+ {
+ .voltage = 2,
+ .dispclk_mhz = 960,
+ .dppclk_mhz = 960,
+ .phyclk_mhz = 810,
+ .phyclk_d18_mhz = 667,
+ .dtbclk_mhz = 600,
+ },
+ {
+ .voltage = 3,
+ .dispclk_mhz = 1200,
+ .dppclk_mhz = 1200,
+ .phyclk_mhz = 810,
+ .phyclk_d18_mhz = 667,
+ .dtbclk_mhz = 600,
+ },
+ {
+ .voltage = 4,
+ .dispclk_mhz = 1372,
+ .dppclk_mhz = 1372,
+ .phyclk_mhz = 810,
+ .phyclk_d18_mhz = 667,
+ .dtbclk_mhz = 600,
+ },
+ },
+ .num_entries = 5,
+ },
+
+};
+
+static struct wm_table ddr5_wm_table = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 129.0,
+ .sr_exit_time_us = 11.5,
+ .sr_enter_plus_exit_time_us = 14.5,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 129.0,
+ .sr_exit_time_us = 11.5,
+ .sr_enter_plus_exit_time_us = 14.5,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 129.0,
+ .sr_exit_time_us = 11.5,
+ .sr_enter_plus_exit_time_us = 14.5,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 129.0,
+ .sr_exit_time_us = 11.5,
+ .sr_enter_plus_exit_time_us = 14.5,
+ .valid = true,
+ },
+ }
+};
+
+static struct wm_table lpddr5_wm_table = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 129.0,
+ .sr_exit_time_us = 11.5,
+ .sr_enter_plus_exit_time_us = 14.5,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 129.0,
+ .sr_exit_time_us = 11.5,
+ .sr_enter_plus_exit_time_us = 14.5,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 129.0,
+ .sr_exit_time_us = 11.5,
+ .sr_enter_plus_exit_time_us = 14.5,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 129.0,
+ .sr_exit_time_us = 11.5,
+ .sr_enter_plus_exit_time_us = 14.5,
+ .valid = true,
+ },
+ }
+};
+
+/* Temporary Place holder until we can get them from fuse */
+static DpmClocks_315_t dummy_clocks = { 0 };
+static struct dcn315_watermarks dummy_wms = { 0 };
+
+static void dcn315_build_watermark_ranges(struct clk_bw_params *bw_params, struct dcn315_watermarks *table)
+{
+ int i, num_valid_sets;
+
+ num_valid_sets = 0;
+
+ for (i = 0; i < WM_SET_COUNT; i++) {
+ /* skip empty entries, the smu array has no holes*/
+ if (!bw_params->wm_table.entries[i].valid)
+ continue;
+
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].WmSetting = bw_params->wm_table.entries[i].wm_inst;
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].WmType = bw_params->wm_table.entries[i].wm_type;
+ /* We will not select WM based on fclk, so leave it as unconstrained */
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MinClock = 0;
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MaxClock = 0xFFFF;
+
+ if (table->WatermarkRow[WM_DCFCLK][num_valid_sets].WmType == WM_TYPE_PSTATE_CHG) {
+ if (i == 0)
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MinMclk = 0;
+ else {
+ /* add 1 to make it non-overlapping with next lvl */
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MinMclk =
+ bw_params->clk_table.entries[i - 1].dcfclk_mhz + 1;
+ }
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MaxMclk =
+ bw_params->clk_table.entries[i].dcfclk_mhz;
+
+ } else {
+ /* unconstrained for memory retraining */
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MinClock = 0;
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MaxClock = 0xFFFF;
+
+ /* Modify previous watermark range to cover up to max */
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets - 1].MaxClock = 0xFFFF;
+ }
+ num_valid_sets++;
+ }
+
+ ASSERT(num_valid_sets != 0); /* Must have at least one set of valid watermarks */
+
+ /* modify the min and max to make sure we cover the whole range*/
+ table->WatermarkRow[WM_DCFCLK][0].MinMclk = 0;
+ table->WatermarkRow[WM_DCFCLK][0].MinClock = 0;
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets - 1].MaxMclk = 0xFFFF;
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets - 1].MaxClock = 0xFFFF;
+
+ /* This is for writeback only, does not matter currently as no writeback support*/
+ table->WatermarkRow[WM_SOCCLK][0].WmSetting = WM_A;
+ table->WatermarkRow[WM_SOCCLK][0].MinClock = 0;
+ table->WatermarkRow[WM_SOCCLK][0].MaxClock = 0xFFFF;
+ table->WatermarkRow[WM_SOCCLK][0].MinMclk = 0;
+ table->WatermarkRow[WM_SOCCLK][0].MaxMclk = 0xFFFF;
+}
+
+static void dcn315_notify_wm_ranges(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct clk_mgr_dcn315 *clk_mgr_dcn315 = TO_CLK_MGR_DCN315(clk_mgr);
+ struct dcn315_watermarks *table = clk_mgr_dcn315->smu_wm_set.wm_set;
+
+ if (!clk_mgr->smu_ver)
+ return;
+
+ if (!table || clk_mgr_dcn315->smu_wm_set.mc_address.quad_part == 0)
+ return;
+
+ memset(table, 0, sizeof(*table));
+
+ dcn315_build_watermark_ranges(clk_mgr_base->bw_params, table);
+
+ dcn315_smu_set_dram_addr_high(clk_mgr,
+ clk_mgr_dcn315->smu_wm_set.mc_address.high_part);
+ dcn315_smu_set_dram_addr_low(clk_mgr,
+ clk_mgr_dcn315->smu_wm_set.mc_address.low_part);
+ dcn315_smu_transfer_wm_table_dram_2_smu(clk_mgr);
+}
+
+static void dcn315_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr,
+ struct dcn315_smu_dpm_clks *smu_dpm_clks)
+{
+ DpmClocks_315_t *table = smu_dpm_clks->dpm_clks;
+
+ if (!clk_mgr->smu_ver)
+ return;
+
+ if (!table || smu_dpm_clks->mc_address.quad_part == 0)
+ return;
+
+ memset(table, 0, sizeof(*table));
+
+ dcn315_smu_set_dram_addr_high(clk_mgr,
+ smu_dpm_clks->mc_address.high_part);
+ dcn315_smu_set_dram_addr_low(clk_mgr,
+ smu_dpm_clks->mc_address.low_part);
+ dcn315_smu_transfer_dpm_table_smu_2_dram(clk_mgr);
+}
+
+static void dcn315_clk_mgr_helper_populate_bw_params(
+ struct clk_mgr_internal *clk_mgr,
+ struct integrated_info *bios_info,
+ const DpmClocks_315_t *clock_table)
+{
+ int i;
+ struct clk_bw_params *bw_params = clk_mgr->base.bw_params;
+ uint32_t max_pstate = clock_table->NumDfPstatesEnabled - 1;
+ struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1];
+
+ /* For 315 we want to base clock table on dcfclk, need at least one entry regardless of pmfw table */
+ for (i = 0; i < clock_table->NumDcfClkLevelsEnabled; i++) {
+ int j;
+
+ /* DF table is sorted with clocks decreasing */
+ for (j = clock_table->NumDfPstatesEnabled - 2; j >= 0; j--) {
+ if (clock_table->DfPstateTable[j].Voltage <= clock_table->SocVoltage[i])
+ max_pstate = j;
+ }
+ /* Max DCFCLK should match up with max pstate */
+ if (i == clock_table->NumDcfClkLevelsEnabled - 1)
+ max_pstate = 0;
+
+ /* First search defaults for the clocks we don't read using closest lower or equal default dcfclk */
+ for (j = bw_params->clk_table.num_entries - 1; j > 0; j--)
+ if (bw_params->clk_table.entries[j].dcfclk_mhz <= clock_table->DcfClocks[i])
+ break;
+ bw_params->clk_table.entries[i].phyclk_mhz = bw_params->clk_table.entries[j].phyclk_mhz;
+ bw_params->clk_table.entries[i].phyclk_d18_mhz = bw_params->clk_table.entries[j].phyclk_d18_mhz;
+ bw_params->clk_table.entries[i].dtbclk_mhz = bw_params->clk_table.entries[j].dtbclk_mhz;
+
+ /* Now update clocks we do read */
+ bw_params->clk_table.entries[i].fclk_mhz = clock_table->DfPstateTable[max_pstate].FClk;
+ bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[max_pstate].MemClk;
+ bw_params->clk_table.entries[i].voltage = clock_table->SocVoltage[i];
+ bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[i];
+ bw_params->clk_table.entries[i].socclk_mhz = clock_table->SocClocks[i];
+ bw_params->clk_table.entries[i].dispclk_mhz = clock_table->DispClocks[i];
+ bw_params->clk_table.entries[i].dppclk_mhz = clock_table->DppClocks[i];
+ bw_params->clk_table.entries[i].wck_ratio = 1;
+ }
+
+ /* Make sure to include at least one entry */
+ if (i == 0) {
+ bw_params->clk_table.entries[i].fclk_mhz = clock_table->DfPstateTable[0].FClk;
+ bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[0].MemClk;
+ bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[0].Voltage;
+ bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[0];
+ bw_params->clk_table.entries[i].wck_ratio = 1;
+ i++;
+ } else if (clock_table->NumDcfClkLevelsEnabled != clock_table->NumSocClkLevelsEnabled) {
+ bw_params->clk_table.entries[i-1].voltage = clock_table->SocVoltage[clock_table->NumSocClkLevelsEnabled - 1];
+ bw_params->clk_table.entries[i-1].socclk_mhz = clock_table->SocClocks[clock_table->NumSocClkLevelsEnabled - 1];
+ bw_params->clk_table.entries[i-1].dispclk_mhz = clock_table->DispClocks[clock_table->NumDispClkLevelsEnabled - 1];
+ bw_params->clk_table.entries[i-1].dppclk_mhz = clock_table->DppClocks[clock_table->NumDispClkLevelsEnabled - 1];
+ }
+ bw_params->clk_table.num_entries = i;
+
+ /* Set any 0 clocks to max default setting. Not an issue for
+ * power since we aren't doing switching in such case anyway
+ */
+ for (i = 0; i < bw_params->clk_table.num_entries; i++) {
+ if (!bw_params->clk_table.entries[i].fclk_mhz) {
+ bw_params->clk_table.entries[i].fclk_mhz = def_max.fclk_mhz;
+ bw_params->clk_table.entries[i].memclk_mhz = def_max.memclk_mhz;
+ bw_params->clk_table.entries[i].voltage = def_max.voltage;
+ }
+ if (!bw_params->clk_table.entries[i].dcfclk_mhz)
+ bw_params->clk_table.entries[i].dcfclk_mhz = def_max.dcfclk_mhz;
+ if (!bw_params->clk_table.entries[i].socclk_mhz)
+ bw_params->clk_table.entries[i].socclk_mhz = def_max.socclk_mhz;
+ if (!bw_params->clk_table.entries[i].dispclk_mhz)
+ bw_params->clk_table.entries[i].dispclk_mhz = def_max.dispclk_mhz;
+ if (!bw_params->clk_table.entries[i].dppclk_mhz)
+ bw_params->clk_table.entries[i].dppclk_mhz = def_max.dppclk_mhz;
+ if (!bw_params->clk_table.entries[i].phyclk_mhz)
+ bw_params->clk_table.entries[i].phyclk_mhz = def_max.phyclk_mhz;
+ if (!bw_params->clk_table.entries[i].phyclk_d18_mhz)
+ bw_params->clk_table.entries[i].phyclk_d18_mhz = def_max.phyclk_d18_mhz;
+ if (!bw_params->clk_table.entries[i].dtbclk_mhz)
+ bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz;
+ }
+
+ /* Make sure all highest default clocks are included*/
+ ASSERT(bw_params->clk_table.entries[i-1].phyclk_mhz == def_max.phyclk_mhz);
+ ASSERT(bw_params->clk_table.entries[i-1].phyclk_d18_mhz == def_max.phyclk_d18_mhz);
+ ASSERT(bw_params->clk_table.entries[i-1].dtbclk_mhz == def_max.dtbclk_mhz);
+ ASSERT(bw_params->clk_table.entries[i-1].dcfclk_mhz);
+ bw_params->vram_type = bios_info->memory_type;
+ bw_params->num_channels = bios_info->ma_channel_number;
+ bw_params->dram_channel_width_bytes = bios_info->memory_type == 0x22 ? 8 : 4;
+
+ for (i = 0; i < WM_SET_COUNT; i++) {
+ bw_params->wm_table.entries[i].wm_inst = i;
+
+ if (i >= bw_params->clk_table.num_entries) {
+ bw_params->wm_table.entries[i].valid = false;
+ continue;
+ }
+
+ bw_params->wm_table.entries[i].wm_type = WM_TYPE_PSTATE_CHG;
+ bw_params->wm_table.entries[i].valid = true;
+ }
+}
+
+static void dcn315_enable_pme_wa(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ dcn315_smu_enable_pme_wa(clk_mgr);
+}
+
+static struct clk_mgr_funcs dcn315_funcs = {
+ .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
+ .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz,
+ .update_clocks = dcn315_update_clocks,
+ .init_clocks = dcn315_init_clocks,
+ .enable_pme_wa = dcn315_enable_pme_wa,
+ .are_clock_states_equal = dcn31_are_clock_states_equal,
+ .notify_wm_ranges = dcn315_notify_wm_ranges
+};
+extern struct clk_mgr_funcs dcn3_fpga_funcs;
+
+void dcn315_init_clocks(struct clk_mgr *clk_mgr)
+{
+ struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr);
+ uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz;
+ struct clk_mgr_dcn315 *clk_mgr_dcn315 = TO_CLK_MGR_DCN315(clk_mgr_int);
+ struct clk_log_info log_info = {0};
+
+ memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
+ // Assumption is that boot state always supports pstate
+ clk_mgr->clks.ref_dtbclk_khz = ref_dtbclk; // restore ref_dtbclk
+ clk_mgr->clks.p_state_change_support = true;
+ clk_mgr->clks.prev_p_state_change_support = true;
+ clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN;
+ clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN;
+
+ dcn315_dump_clk_registers(&clk_mgr->boot_snapshot, &clk_mgr_dcn315->base.base, &log_info);
+ clk_mgr->clks.dispclk_khz = clk_mgr->boot_snapshot.dispclk * 1000;
+}
+
+void dcn315_clk_mgr_construct(
+ struct dc_context *ctx,
+ struct clk_mgr_dcn315 *clk_mgr,
+ struct pp_smu_funcs *pp_smu,
+ struct dccg *dccg)
+{
+ struct dcn315_smu_dpm_clks smu_dpm_clks = { 0 };
+ struct clk_log_info log_info = {0};
+
+ clk_mgr->base.base.ctx = ctx;
+ clk_mgr->base.base.funcs = &dcn315_funcs;
+
+ clk_mgr->base.pp_smu = pp_smu;
+
+ clk_mgr->base.dccg = dccg;
+ clk_mgr->base.dfs_bypass_disp_clk = 0;
+
+ clk_mgr->base.dprefclk_ss_percentage = 0;
+ clk_mgr->base.dprefclk_ss_divider = 1000;
+ clk_mgr->base.ss_on_dprefclk = false;
+ clk_mgr->base.dfs_ref_freq_khz = 48000;
+
+ clk_mgr->smu_wm_set.wm_set = (struct dcn315_watermarks *)dm_helpers_allocate_gpu_mem(
+ clk_mgr->base.base.ctx,
+ DC_MEM_ALLOC_TYPE_FRAME_BUFFER,
+ sizeof(struct dcn315_watermarks),
+ &clk_mgr->smu_wm_set.mc_address.quad_part);
+
+ if (!clk_mgr->smu_wm_set.wm_set) {
+ clk_mgr->smu_wm_set.wm_set = &dummy_wms;
+ clk_mgr->smu_wm_set.mc_address.quad_part = 0;
+ }
+ ASSERT(clk_mgr->smu_wm_set.wm_set);
+
+ smu_dpm_clks.dpm_clks = (DpmClocks_315_t *)dm_helpers_allocate_gpu_mem(
+ clk_mgr->base.base.ctx,
+ DC_MEM_ALLOC_TYPE_FRAME_BUFFER,
+ sizeof(DpmClocks_315_t),
+ &smu_dpm_clks.mc_address.quad_part);
+
+ if (smu_dpm_clks.dpm_clks == NULL) {
+ smu_dpm_clks.dpm_clks = &dummy_clocks;
+ smu_dpm_clks.mc_address.quad_part = 0;
+ }
+
+ ASSERT(smu_dpm_clks.dpm_clks);
+
+ clk_mgr->base.smu_ver = dcn315_smu_get_smu_version(&clk_mgr->base);
+
+ if (clk_mgr->base.smu_ver > 0)
+ clk_mgr->base.smu_present = true;
+
+ if (ctx->dc_bios->integrated_info->memory_type == LpDdr5MemType) {
+ dcn315_bw_params.wm_table = lpddr5_wm_table;
+ } else {
+ dcn315_bw_params.wm_table = ddr5_wm_table;
+ }
+ /* Saved clocks configured at boot for debug purposes */
+ dcn315_dump_clk_registers(&clk_mgr->base.base.boot_snapshot,
+ &clk_mgr->base.base, &log_info);
+ clk_mgr->base.base.clks.dispclk_khz = clk_mgr->base.base.boot_snapshot.dispclk * 1000;
+
+ clk_mgr->base.base.dprefclk_khz = 600000;
+ clk_mgr->base.base.dprefclk_khz = dcn315_smu_get_dpref_clk(&clk_mgr->base);
+ clk_mgr->base.base.clks.ref_dtbclk_khz = clk_mgr->base.base.dprefclk_khz;
+ dce_clock_read_ss_info(&clk_mgr->base);
+ clk_mgr->base.base.clks.ref_dtbclk_khz = dce_adjust_dp_ref_freq_for_ss(&clk_mgr->base, clk_mgr->base.base.dprefclk_khz);
+
+ clk_mgr->base.base.bw_params = &dcn315_bw_params;
+
+ if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) {
+ int i;
+
+ dcn315_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks);
+ DC_LOG_SMU("NumDcfClkLevelsEnabled: %d\n"
+ "NumDispClkLevelsEnabled: %d\n"
+ "NumSocClkLevelsEnabled: %d\n"
+ "VcnClkLevelsEnabled: %d\n"
+ "NumDfPst atesEnabled: %d\n"
+ "MinGfxClk: %d\n"
+ "MaxGfxClk: %d\n",
+ smu_dpm_clks.dpm_clks->NumDcfClkLevelsEnabled,
+ smu_dpm_clks.dpm_clks->NumDispClkLevelsEnabled,
+ smu_dpm_clks.dpm_clks->NumSocClkLevelsEnabled,
+ smu_dpm_clks.dpm_clks->VcnClkLevelsEnabled,
+ smu_dpm_clks.dpm_clks->NumDfPstatesEnabled,
+ smu_dpm_clks.dpm_clks->MinGfxClk,
+ smu_dpm_clks.dpm_clks->MaxGfxClk);
+ for (i = 0; i < smu_dpm_clks.dpm_clks->NumDcfClkLevelsEnabled; i++) {
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks->DcfClocks[%d] = %d\n",
+ i,
+ smu_dpm_clks.dpm_clks->DcfClocks[i]);
+ }
+ for (i = 0; i < smu_dpm_clks.dpm_clks->NumDispClkLevelsEnabled; i++) {
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks->DispClocks[%d] = %d\n",
+ i, smu_dpm_clks.dpm_clks->DispClocks[i]);
+ }
+ for (i = 0; i < smu_dpm_clks.dpm_clks->NumSocClkLevelsEnabled; i++) {
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks->SocClocks[%d] = %d\n",
+ i, smu_dpm_clks.dpm_clks->SocClocks[i]);
+ }
+ for (i = 0; i < NUM_SOC_VOLTAGE_LEVELS; i++)
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks->SocVoltage[%d] = %d\n",
+ i, smu_dpm_clks.dpm_clks->SocVoltage[i]);
+
+ for (i = 0; i < NUM_DF_PSTATE_LEVELS; i++) {
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks.DfPstateTable[%d].FClk = %d\n"
+ "smu_dpm_clks.dpm_clks->DfPstateTable[%d].MemClk= %d\n"
+ "smu_dpm_clks.dpm_clks->DfPstateTable[%d].Voltage = %d\n",
+ i, smu_dpm_clks.dpm_clks->DfPstateTable[i].FClk,
+ i, smu_dpm_clks.dpm_clks->DfPstateTable[i].MemClk,
+ i, smu_dpm_clks.dpm_clks->DfPstateTable[i].Voltage);
+ }
+
+ if (ctx->dc_bios->integrated_info) {
+ dcn315_clk_mgr_helper_populate_bw_params(
+ &clk_mgr->base,
+ ctx->dc_bios->integrated_info,
+ smu_dpm_clks.dpm_clks);
+ }
+ }
+
+ if (smu_dpm_clks.dpm_clks && smu_dpm_clks.mc_address.quad_part != 0)
+ dm_helpers_free_gpu_mem(clk_mgr->base.base.ctx, DC_MEM_ALLOC_TYPE_FRAME_BUFFER,
+ smu_dpm_clks.dpm_clks);
+}
+
+void dcn315_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int)
+{
+ struct clk_mgr_dcn315 *clk_mgr = TO_CLK_MGR_DCN315(clk_mgr_int);
+
+ if (clk_mgr->smu_wm_set.wm_set && clk_mgr->smu_wm_set.mc_address.quad_part != 0)
+ dm_helpers_free_gpu_mem(clk_mgr_int->base.ctx, DC_MEM_ALLOC_TYPE_FRAME_BUFFER,
+ clk_mgr->smu_wm_set.wm_set);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.h
new file mode 100644
index 000000000000..642ae3d4a790
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN315_CLK_MGR_H__
+#define __DCN315_CLK_MGR_H__
+#include "clk_mgr_internal.h"
+
+struct dcn315_watermarks;
+
+struct dcn315_smu_watermark_set {
+ struct dcn315_watermarks *wm_set;
+ union large_integer mc_address;
+};
+
+struct clk_mgr_dcn315 {
+ struct clk_mgr_internal base;
+ struct dcn315_smu_watermark_set smu_wm_set;
+};
+
+void dcn315_clk_mgr_construct(struct dc_context *ctx,
+ struct clk_mgr_dcn315 *clk_mgr,
+ struct pp_smu_funcs *pp_smu,
+ struct dccg *dccg);
+
+void dcn315_init_clocks(struct clk_mgr *clk_mgr);
+void dcn315_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int);
+
+#endif //__DCN315_CLK_MGR_H__
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c
new file mode 100644
index 000000000000..478b4d6a3544
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c
@@ -0,0 +1,366 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "core_types.h"
+#include "clk_mgr_internal.h"
+#include "reg_helper.h"
+#include "dm_helpers.h"
+#include "dcn315_smu.h"
+#include "mp/mp_13_0_5_offset.h"
+#include "logger_types.h"
+
+#define MAX_INSTANCE 6
+#define MAX_SEGMENT 6
+#define SMU_REGISTER_WRITE_RETRY_COUNT 5
+
+struct IP_BASE_INSTANCE {
+ unsigned int segment[MAX_SEGMENT];
+};
+
+struct IP_BASE {
+ struct IP_BASE_INSTANCE instance[MAX_INSTANCE];
+};
+
+static const struct IP_BASE MP0_BASE = { { { { 0x00016000, 0x00DC0000, 0x00E00000, 0x00E40000, 0x0243FC00, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } } } };
+
+#define CTX clk_mgr->base.ctx
+#define IND_REG(offset) offset
+
+#define regBIF_BX_PF2_RSMU_INDEX 0x0000
+#define regBIF_BX_PF2_RSMU_INDEX_BASE_IDX 1
+#define regBIF_BX_PF2_RSMU_DATA 0x0001
+#define regBIF_BX_PF2_RSMU_DATA_BASE_IDX 1
+
+#define REG(reg_name) \
+ (MP0_BASE.instance[0].segment[reg ## reg_name ## _BASE_IDX] + reg ## reg_name)
+
+#define FN(reg_name, field) \
+ FD(reg_name##__##field)
+
+#undef DC_LOGGER
+#define DC_LOGGER \
+ CTX->logger
+#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
+
+#define mmMP1_C2PMSG_3 0x3B1050C
+
+#define reg__MP1_C2PMSG_3_MASK (0xFFFFFFFF)
+#define reg__MP1_C2PMSG_3__SHIFT (0)
+
+
+#define data_reg_name__MP1_C2PMSG_3_MASK (0xFFFFFFFF)
+#define data_reg_name__MP1_C2PMSG_3__SHIFT (0)
+
+#define VBIOSSMC_MSG_TestMessage 0x01 ///< To check if PMFW is alive and responding. Requirement specified by PMFW team
+#define VBIOSSMC_MSG_GetPmfwVersion 0x02 ///< Get PMFW version
+#define VBIOSSMC_MSG_Spare0 0x03 ///< Spare0
+#define VBIOSSMC_MSG_SetDispclkFreq 0x04 ///< Set display clock frequency in MHZ
+#define VBIOSSMC_MSG_Spare1 0x05 ///< Spare1
+#define VBIOSSMC_MSG_SetDppclkFreq 0x06 ///< Set DPP clock frequency in MHZ
+#define VBIOSSMC_MSG_SetHardMinDcfclkByFreq 0x07 ///< Set DCF clock frequency hard min in MHZ
+#define VBIOSSMC_MSG_SetMinDeepSleepDcfclk 0x08 ///< Set DCF clock minimum frequency in deep sleep in MHZ
+#define VBIOSSMC_MSG_GetDtbclkFreq 0x09 ///< Get display dtb clock frequency in MHZ in case VMIN does not support phy frequency
+#define VBIOSSMC_MSG_SetDtbClk 0x0A ///< Set dtb clock frequency, return frequemcy in MHZ
+#define VBIOSSMC_MSG_SetDisplayCount 0x0B ///< Inform PMFW of number of display connected
+#define VBIOSSMC_MSG_EnableTmdp48MHzRefclkPwrDown 0x0C ///< To ask PMFW turn off TMDP 48MHz refclk during display off to save power
+#define VBIOSSMC_MSG_UpdatePmeRestore 0x0D ///< To ask PMFW to write into Azalia for PME wake up event
+#define VBIOSSMC_MSG_SetVbiosDramAddrHigh 0x0E ///< Set DRAM address high 32 bits for WM table transfer
+#define VBIOSSMC_MSG_SetVbiosDramAddrLow 0x0F ///< Set DRAM address low 32 bits for WM table transfer
+#define VBIOSSMC_MSG_TransferTableSmu2Dram 0x10 ///< Transfer table from PMFW SRAM to system DRAM
+#define VBIOSSMC_MSG_TransferTableDram2Smu 0x11 ///< Transfer table from system DRAM to PMFW
+#define VBIOSSMC_MSG_SetDisplayIdleOptimizations 0x12 ///< Set Idle state optimization for display off
+#define VBIOSSMC_MSG_GetDprefclkFreq 0x13 ///< Get DPREF clock frequency. Return in MHZ
+#define VBIOSSMC_Message_Count 0x14 ///< Total number of VBIS and DAL messages
+
+#define VBIOSSMC_Status_BUSY 0x0
+#define VBIOSSMC_Result_OK 0x01 ///< Message Response OK
+#define VBIOSSMC_Result_Failed 0xFF ///< Message Response Failed
+#define VBIOSSMC_Result_UnknownCmd 0xFE ///< Message Response Unknown Command
+#define VBIOSSMC_Result_CmdRejectedPrereq 0xFD ///< Message Response Command Failed Prerequisite
+#define VBIOSSMC_Result_CmdRejectedBusy 0xFC ///< Message Response Command Rejected due to PMFW is busy. Sender should retry sending this message
+
+/*
+ * Function to be used instead of REG_WAIT macro because the wait ends when
+ * the register is NOT EQUAL to zero, and because the translation in msg_if.h
+ * won't work with REG_WAIT.
+ */
+static uint32_t dcn315_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, unsigned int delay_us, unsigned int max_retries)
+{
+ uint32_t res_val = VBIOSSMC_Status_BUSY;
+
+ do {
+ res_val = REG_READ(MP1_SMN_C2PMSG_38);
+ if (res_val != VBIOSSMC_Status_BUSY)
+ break;
+
+ if (delay_us >= 1000)
+ msleep(delay_us/1000);
+ else if (delay_us > 0)
+ udelay(delay_us);
+ } while (max_retries--);
+
+ return res_val;
+}
+
+static int dcn315_smu_send_msg_with_param(
+ struct clk_mgr_internal *clk_mgr,
+ unsigned int msg_id, unsigned int param)
+{
+ uint32_t result;
+ uint32_t i = 0;
+ uint32_t read_back_data;
+
+ result = dcn315_smu_wait_for_response(clk_mgr, 10, 200000);
+
+ if (result != VBIOSSMC_Result_OK)
+ smu_print("SMU Response was not OK. SMU response after wait received is: %d\n", result);
+
+ if (result == VBIOSSMC_Status_BUSY) {
+ return -1;
+ }
+
+ /* First clear response register */
+ REG_WRITE(MP1_SMN_C2PMSG_38, VBIOSSMC_Status_BUSY);
+
+ /* Set the parameter register for the SMU message, unit is Mhz */
+ REG_WRITE(MP1_SMN_C2PMSG_37, param);
+
+ for (i = 0; i < SMU_REGISTER_WRITE_RETRY_COUNT; i++) {
+ /* Trigger the message transaction by writing the message ID */
+ IX_REG_SET_SYNC(mmMP1_C2PMSG_3, 0,
+ MP1_C2PMSG_3, msg_id);
+ IX_REG_GET_SYNC(mmMP1_C2PMSG_3,
+ MP1_C2PMSG_3, &read_back_data);
+ if (read_back_data == msg_id)
+ break;
+ udelay(2);
+ smu_print("SMU msg id write fail %x times. \n", i + 1);
+ }
+
+ result = dcn315_smu_wait_for_response(clk_mgr, 10, 200000);
+
+ if (result == VBIOSSMC_Status_BUSY) {
+ ASSERT(0);
+ dm_helpers_smu_timeout(CTX, msg_id, param, 10 * 200000);
+ }
+
+ return REG_READ(MP1_SMN_C2PMSG_37);
+}
+
+int dcn315_smu_get_smu_version(struct clk_mgr_internal *clk_mgr)
+{
+ return dcn315_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_GetPmfwVersion,
+ 0);
+}
+
+
+int dcn315_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispclk_khz)
+{
+ int actual_dispclk_set_mhz = -1;
+
+ if (!clk_mgr->smu_present)
+ return requested_dispclk_khz;
+
+ /* Unit of SMU msg parameter is Mhz */
+ actual_dispclk_set_mhz = dcn315_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDispclkFreq,
+ khz_to_mhz_ceil(requested_dispclk_khz));
+
+ return actual_dispclk_set_mhz * 1000;
+}
+
+int dcn315_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_dcfclk_khz)
+{
+ int actual_dcfclk_set_mhz = -1;
+
+ if (!clk_mgr->base.ctx->dc->debug.pstate_enabled)
+ return -1;
+
+ if (!clk_mgr->smu_present)
+ return requested_dcfclk_khz;
+
+ actual_dcfclk_set_mhz = dcn315_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetHardMinDcfclkByFreq,
+ khz_to_mhz_ceil(requested_dcfclk_khz));
+
+ return actual_dcfclk_set_mhz * 1000;
+}
+
+int dcn315_smu_set_min_deep_sleep_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_min_ds_dcfclk_khz)
+{
+ int actual_min_ds_dcfclk_mhz = -1;
+
+ if (!clk_mgr->base.ctx->dc->debug.pstate_enabled)
+ return -1;
+
+ if (!clk_mgr->smu_present)
+ return requested_min_ds_dcfclk_khz;
+
+ actual_min_ds_dcfclk_mhz = dcn315_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetMinDeepSleepDcfclk,
+ khz_to_mhz_ceil(requested_min_ds_dcfclk_khz));
+
+ return actual_min_ds_dcfclk_mhz * 1000;
+}
+
+int dcn315_smu_set_dppclk(struct clk_mgr_internal *clk_mgr, int requested_dpp_khz)
+{
+ int actual_dppclk_set_mhz = -1;
+
+ if (!clk_mgr->smu_present)
+ return requested_dpp_khz;
+
+ actual_dppclk_set_mhz = dcn315_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDppclkFreq,
+ khz_to_mhz_ceil(requested_dpp_khz));
+
+ return actual_dppclk_set_mhz * 1000;
+}
+
+void dcn315_smu_set_display_idle_optimization(struct clk_mgr_internal *clk_mgr, uint32_t idle_info)
+{
+ if (!clk_mgr->base.ctx->dc->debug.pstate_enabled)
+ return;
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ //TODO: Work with smu team to define optimization options.
+ dcn315_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDisplayIdleOptimizations,
+ idle_info);
+}
+
+void dcn315_smu_enable_phy_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable)
+{
+ union display_idle_optimization_u idle_info = { 0 };
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ if (enable) {
+ idle_info.idle_info.df_request_disabled = 1;
+ idle_info.idle_info.phy_ref_clk_off = 1;
+ }
+
+ dcn315_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDisplayIdleOptimizations,
+ idle_info.data);
+}
+
+void dcn315_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn315_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_UpdatePmeRestore,
+ 0);
+}
+void dcn315_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn315_smu_send_msg_with_param(clk_mgr,
+ VBIOSSMC_MSG_SetVbiosDramAddrHigh, addr_high);
+}
+
+void dcn315_smu_set_dram_addr_low(struct clk_mgr_internal *clk_mgr, uint32_t addr_low)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn315_smu_send_msg_with_param(clk_mgr,
+ VBIOSSMC_MSG_SetVbiosDramAddrLow, addr_low);
+}
+
+void dcn315_smu_transfer_dpm_table_smu_2_dram(struct clk_mgr_internal *clk_mgr)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn315_smu_send_msg_with_param(clk_mgr,
+ VBIOSSMC_MSG_TransferTableSmu2Dram, TABLE_DPMCLOCKS);
+}
+
+void dcn315_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn315_smu_send_msg_with_param(clk_mgr,
+ VBIOSSMC_MSG_TransferTableDram2Smu, TABLE_WATERMARKS);
+}
+
+int dcn315_smu_get_dpref_clk(struct clk_mgr_internal *clk_mgr)
+{
+ int dprefclk_get_mhz = -1;
+ if (clk_mgr->smu_present) {
+ dprefclk_get_mhz = dcn315_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_GetDprefclkFreq,
+ 0);
+ }
+ return (dprefclk_get_mhz * 1000);
+}
+
+int dcn315_smu_get_dtbclk(struct clk_mgr_internal *clk_mgr)
+{
+ int fclk_get_mhz = -1;
+
+ if (clk_mgr->smu_present) {
+ fclk_get_mhz = dcn315_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_GetDtbclkFreq,
+ 0);
+ }
+ return (fclk_get_mhz * 1000);
+}
+
+void dcn315_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn315_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDtbClk,
+ enable);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.h
new file mode 100644
index 000000000000..5aa3275ac7d8
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef DAL_DC_315_SMU_H_
+#define DAL_DC_315_SMU_H_
+#include "os_types.h"
+
+#define PMFW_DRIVER_IF_VERSION 4
+
+#define NUM_DCFCLK_DPM_LEVELS 4
+#define NUM_DISPCLK_DPM_LEVELS 4
+#define NUM_DPPCLK_DPM_LEVELS 4
+#define NUM_SOCCLK_DPM_LEVELS 4
+#define NUM_VCN_DPM_LEVELS 4
+#define NUM_SOC_VOLTAGE_LEVELS 4
+#define NUM_DF_PSTATE_LEVELS 4
+
+
+typedef struct {
+ uint16_t MinClock; // This is either DCFCLK or SOCCLK (in MHz)
+ uint16_t MaxClock; // This is either DCFCLK or SOCCLK (in MHz)
+ uint16_t MinMclk;
+ uint16_t MaxMclk;
+ uint8_t WmSetting;
+ uint8_t WmType; // Used for normal pstate change or memory retraining
+ uint8_t Padding[2];
+} WatermarkRowGeneric_t;
+
+#define NUM_WM_RANGES 4
+#define WM_PSTATE_CHG 0
+#define WM_RETRAINING 1
+
+typedef enum {
+ WM_SOCCLK = 0,
+ WM_DCFCLK,
+ WM_COUNT,
+} WM_CLOCK_e;
+
+typedef struct {
+ uint32_t FClk;
+ uint32_t MemClk;
+ uint32_t Voltage;
+} DfPstateTable_t;
+
+//Freq in MHz
+//Voltage in milli volts with 2 fractional bits
+typedef struct {
+ uint32_t DcfClocks[NUM_DCFCLK_DPM_LEVELS];
+ uint32_t DispClocks[NUM_DISPCLK_DPM_LEVELS];
+ uint32_t DppClocks[NUM_DPPCLK_DPM_LEVELS];
+ uint32_t SocClocks[NUM_SOCCLK_DPM_LEVELS];
+ uint32_t VClocks[NUM_VCN_DPM_LEVELS];
+ uint32_t DClocks[NUM_VCN_DPM_LEVELS];
+ uint32_t SocVoltage[NUM_SOC_VOLTAGE_LEVELS];
+ DfPstateTable_t DfPstateTable[NUM_DF_PSTATE_LEVELS];
+ uint8_t NumDcfClkLevelsEnabled;
+ uint8_t NumDispClkLevelsEnabled; //Applies to both Dispclk and Dppclk
+ uint8_t NumSocClkLevelsEnabled;
+ uint8_t VcnClkLevelsEnabled; //Applies to both Vclk and Dclk
+ uint8_t NumDfPstatesEnabled;
+ uint8_t spare[3];
+ uint32_t MinGfxClk;
+ uint32_t MaxGfxClk;
+} DpmClocks_315_t;
+
+struct dcn315_watermarks {
+ // Watermarks
+ WatermarkRowGeneric_t WatermarkRow[WM_COUNT][NUM_WM_RANGES];
+ uint32_t MmHubPadding[7]; // SMU internal use
+};
+
+struct dcn315_smu_dpm_clks {
+ DpmClocks_315_t *dpm_clks;
+ union large_integer mc_address;
+};
+
+#define TABLE_WATERMARKS 1 // Called by DAL through VBIOS
+#define TABLE_DPMCLOCKS 4 // Called by Driver and VBIOS
+
+struct display_idle_optimization {
+ unsigned int df_request_disabled : 1;
+ unsigned int phy_ref_clk_off : 1;
+ unsigned int s0i2_rdy : 1;
+ unsigned int reserved : 29;
+};
+
+union display_idle_optimization_u {
+ struct display_idle_optimization idle_info;
+ uint32_t data;
+};
+
+int dcn315_smu_get_smu_version(struct clk_mgr_internal *clk_mgr);
+int dcn315_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispclk_khz);
+int dcn315_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_dcfclk_khz);
+int dcn315_smu_set_min_deep_sleep_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_min_ds_dcfclk_khz);
+int dcn315_smu_set_dppclk(struct clk_mgr_internal *clk_mgr, int requested_dpp_khz);
+void dcn315_smu_set_display_idle_optimization(struct clk_mgr_internal *clk_mgr, uint32_t idle_info);
+void dcn315_smu_enable_phy_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable);
+void dcn315_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high);
+void dcn315_smu_set_dram_addr_low(struct clk_mgr_internal *clk_mgr, uint32_t addr_low);
+void dcn315_smu_transfer_dpm_table_smu_2_dram(struct clk_mgr_internal *clk_mgr);
+void dcn315_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr);
+void dcn315_smu_request_voltage_via_phyclk(struct clk_mgr_internal *clk_mgr, int requested_phyclk_khz);
+void dcn315_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr);
+int dcn315_smu_get_dpref_clk(struct clk_mgr_internal *clk_mgr);
+int dcn315_smu_get_dtbclk(struct clk_mgr_internal *clk_mgr);
+void dcn315_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable);
+#endif /* DAL_DC_315_SMU_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
new file mode 100644
index 000000000000..1769b1f26e75
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
@@ -0,0 +1,680 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+
+
+#include "dccg.h"
+#include "clk_mgr_internal.h"
+
+// For dce12_get_dp_ref_freq_khz
+#include "dce100/dce_clk_mgr.h"
+// For dcn20_update_clocks_update_dpp_dto
+#include "dcn20/dcn20_clk_mgr.h"
+#include "dcn31/dcn31_clk_mgr.h"
+#include "dcn316_clk_mgr.h"
+#include "reg_helper.h"
+#include "core_types.h"
+#include "dcn316_smu.h"
+#include "dm_helpers.h"
+#include "dc_dmub_srv.h"
+#include "link_service.h"
+
+// DCN316 this is CLK1 instance
+#define MAX_INSTANCE 7
+#define MAX_SEGMENT 6
+
+struct IP_BASE_INSTANCE {
+ unsigned int segment[MAX_SEGMENT];
+};
+
+struct IP_BASE {
+ struct IP_BASE_INSTANCE instance[MAX_INSTANCE];
+};
+
+#define regCLK1_CLK_PLL_REQ 0x0237
+#define regCLK1_CLK_PLL_REQ_BASE_IDX 0
+
+#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0
+#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc
+#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10
+#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL
+#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L
+#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L
+
+#define TO_CLK_MGR_DCN316(clk_mgr)\
+ container_of(clk_mgr, struct clk_mgr_dcn316, base)
+
+static int dcn316_get_active_display_cnt_wa(
+ struct dc *dc,
+ struct dc_state *context)
+{
+ int i, display_count;
+ bool tmds_present = false;
+
+ display_count = 0;
+ for (i = 0; i < context->stream_count; i++) {
+ const struct dc_stream_state *stream = context->streams[i];
+
+ if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A ||
+ stream->signal == SIGNAL_TYPE_DVI_SINGLE_LINK ||
+ stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK)
+ tmds_present = true;
+ }
+
+ for (i = 0; i < dc->link_count; i++) {
+ const struct dc_link *link = dc->links[i];
+
+ /* abusing the fact that the dig and phy are coupled to see if the phy is enabled */
+ if (link->link_enc && link->link_enc->funcs->is_dig_enabled &&
+ link->link_enc->funcs->is_dig_enabled(link->link_enc))
+ display_count++;
+ }
+
+ /* WA for hang on HDMI after display off back back on*/
+ if (display_count == 0 && tmds_present)
+ display_count = 1;
+
+ return display_count;
+}
+
+static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context,
+ bool safe_to_lower, bool disable)
+{
+ struct dc *dc = clk_mgr_base->ctx->dc;
+ int i;
+
+ for (i = 0; i < dc->res_pool->pipe_count; ++i) {
+ struct pipe_ctx *pipe = safe_to_lower
+ ? &context->res_ctx.pipe_ctx[i]
+ : &dc->current_state->res_ctx.pipe_ctx[i];
+
+ if (pipe->top_pipe || pipe->prev_odm_pipe)
+ continue;
+ if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) ||
+ !pipe->stream->link_enc)) {
+ if (disable) {
+ if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc)
+ pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
+
+ reset_sync_context_for_pipe(dc, context, i);
+ } else
+ pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg);
+ }
+ }
+}
+
+static void dcn316_enable_pme_wa(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ dcn316_smu_enable_pme_wa(clk_mgr);
+}
+
+static void dcn316_update_clocks(struct clk_mgr *clk_mgr_base,
+ struct dc_state *context,
+ bool safe_to_lower)
+{
+ union dmub_rb_cmd cmd;
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
+ struct dc *dc = clk_mgr_base->ctx->dc;
+ int display_count = 0;
+ bool update_dppclk = false;
+ bool update_dispclk = false;
+ bool dpp_clock_lowered = false;
+
+ if (dc->work_arounds.skip_clock_update)
+ return;
+
+ /*
+ * if it is safe to lower, but we are already in the lower state, we don't have to do anything
+ * also if safe to lower is false, we just go in the higher state
+ */
+ clk_mgr_base->clks.zstate_support = new_clocks->zstate_support;
+ if (safe_to_lower) {
+ if (clk_mgr_base->clks.dtbclk_en && !new_clocks->dtbclk_en) {
+ dcn316_smu_set_dtbclk(clk_mgr, false);
+ clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en;
+ }
+ /* check that we're not already in lower */
+ if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) {
+ display_count = dcn316_get_active_display_cnt_wa(dc, context);
+ /* if we can go lower, go lower */
+ if (display_count == 0) {
+ union display_idle_optimization_u idle_info = { 0 };
+ idle_info.idle_info.df_request_disabled = 1;
+ idle_info.idle_info.phy_ref_clk_off = 1;
+ idle_info.idle_info.s0i2_rdy = 1;
+ dcn316_smu_set_display_idle_optimization(clk_mgr, idle_info.data);
+ /* update power state */
+ clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
+ }
+ }
+ } else {
+ if (!clk_mgr_base->clks.dtbclk_en && new_clocks->dtbclk_en) {
+ dcn316_smu_set_dtbclk(clk_mgr, true);
+ clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en;
+ }
+
+ /* check that we're not already in D0 */
+ if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_MISSION_MODE) {
+ union display_idle_optimization_u idle_info = { 0 };
+ dcn316_smu_set_display_idle_optimization(clk_mgr, idle_info.data);
+ /* update power state */
+ clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_MISSION_MODE;
+ }
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->dcfclk_khz, clk_mgr_base->clks.dcfclk_khz)) {
+ clk_mgr_base->clks.dcfclk_khz = new_clocks->dcfclk_khz;
+ dcn316_smu_set_hard_min_dcfclk(clk_mgr, clk_mgr_base->clks.dcfclk_khz);
+ }
+
+ if (should_set_clock(safe_to_lower,
+ new_clocks->dcfclk_deep_sleep_khz, clk_mgr_base->clks.dcfclk_deep_sleep_khz)) {
+ clk_mgr_base->clks.dcfclk_deep_sleep_khz = new_clocks->dcfclk_deep_sleep_khz;
+ dcn316_smu_set_min_deep_sleep_dcfclk(clk_mgr, clk_mgr_base->clks.dcfclk_deep_sleep_khz);
+ }
+
+ // workaround: Limit dppclk to 100Mhz to avoid lower eDP panel switch to plus 4K monitor underflow.
+ if (new_clocks->dppclk_khz < 100000)
+ new_clocks->dppclk_khz = 100000;
+
+ if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) {
+ if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz)
+ dpp_clock_lowered = true;
+ clk_mgr_base->clks.dppclk_khz = new_clocks->dppclk_khz;
+ update_dppclk = true;
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz) &&
+ (new_clocks->dispclk_khz > 0 || (safe_to_lower && display_count == 0))) {
+ int requested_dispclk_khz = new_clocks->dispclk_khz;
+
+ dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true);
+
+ /* Clamp the requested clock to PMFW based on their limit. */
+ if (dc->debug.min_disp_clk_khz > 0 && requested_dispclk_khz < dc->debug.min_disp_clk_khz)
+ requested_dispclk_khz = dc->debug.min_disp_clk_khz;
+
+ dcn316_smu_set_dispclk(clk_mgr, requested_dispclk_khz);
+ clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
+ dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false);
+
+ update_dispclk = true;
+ }
+
+ if (dpp_clock_lowered) {
+ // increase per DPP DTO before lowering global dppclk
+ dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower);
+ dcn316_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz);
+ } else {
+ // increase global DPPCLK before lowering per DPP DTO
+ if (update_dppclk || update_dispclk)
+ dcn316_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz);
+ // always update dtos unless clock is lowered and not safe to lower
+ if (new_clocks->dppclk_khz >= dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz)
+ dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower);
+ }
+
+ // notify DMCUB of latest clocks
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.notify_clocks.header.type = DMUB_CMD__CLK_MGR;
+ cmd.notify_clocks.header.sub_type = DMUB_CMD__CLK_MGR_NOTIFY_CLOCKS;
+ cmd.notify_clocks.clocks.dcfclk_khz = clk_mgr_base->clks.dcfclk_khz;
+ cmd.notify_clocks.clocks.dcfclk_deep_sleep_khz =
+ clk_mgr_base->clks.dcfclk_deep_sleep_khz;
+ cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz;
+ cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz;
+
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+static void dcn316_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
+ struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info)
+{
+ return;
+}
+
+static struct clk_bw_params dcn316_bw_params = {
+ .vram_type = Ddr4MemType,
+ .num_channels = 1,
+ .clk_table = {
+ .num_entries = 5,
+ },
+
+};
+
+static struct wm_table ddr4_wm_table = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 6.09,
+ .sr_enter_plus_exit_time_us = 7.14,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 10.12,
+ .sr_enter_plus_exit_time_us = 11.48,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 10.12,
+ .sr_enter_plus_exit_time_us = 11.48,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 10.12,
+ .sr_enter_plus_exit_time_us = 11.48,
+ .valid = true,
+ },
+ }
+};
+
+static struct wm_table lpddr5_wm_table = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 11.5,
+ .sr_enter_plus_exit_time_us = 14.5,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 11.5,
+ .sr_enter_plus_exit_time_us = 14.5,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 11.5,
+ .sr_enter_plus_exit_time_us = 14.5,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 11.5,
+ .sr_enter_plus_exit_time_us = 14.5,
+ .valid = true,
+ },
+ }
+};
+
+static DpmClocks_316_t dummy_clocks;
+
+static struct dcn316_watermarks dummy_wms = { 0 };
+
+static void dcn316_build_watermark_ranges(struct clk_bw_params *bw_params, struct dcn316_watermarks *table)
+{
+ int i, num_valid_sets;
+
+ num_valid_sets = 0;
+
+ for (i = 0; i < WM_SET_COUNT; i++) {
+ /* skip empty entries, the smu array has no holes*/
+ if (!bw_params->wm_table.entries[i].valid)
+ continue;
+
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].WmSetting = bw_params->wm_table.entries[i].wm_inst;
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].WmType = bw_params->wm_table.entries[i].wm_type;
+ /* We will not select WM based on fclk, so leave it as unconstrained */
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MinClock = 0;
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MaxClock = 0xFFFF;
+
+ if (table->WatermarkRow[WM_DCFCLK][num_valid_sets].WmType == WM_TYPE_PSTATE_CHG) {
+ if (i == 0)
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MinMclk = 0;
+ else {
+ /* add 1 to make it non-overlapping with next lvl */
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MinMclk =
+ bw_params->clk_table.entries[i - 1].dcfclk_mhz + 1;
+ }
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MaxMclk =
+ bw_params->clk_table.entries[i].dcfclk_mhz;
+
+ } else {
+ /* unconstrained for memory retraining */
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MinClock = 0;
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MaxClock = 0xFFFF;
+
+ /* Modify previous watermark range to cover up to max */
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets - 1].MaxClock = 0xFFFF;
+ }
+ num_valid_sets++;
+ }
+
+ ASSERT(num_valid_sets != 0); /* Must have at least one set of valid watermarks */
+
+ /* modify the min and max to make sure we cover the whole range*/
+ table->WatermarkRow[WM_DCFCLK][0].MinMclk = 0;
+ table->WatermarkRow[WM_DCFCLK][0].MinClock = 0;
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets - 1].MaxMclk = 0xFFFF;
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets - 1].MaxClock = 0xFFFF;
+
+ /* This is for writeback only, does not matter currently as no writeback support*/
+ table->WatermarkRow[WM_SOCCLK][0].WmSetting = WM_A;
+ table->WatermarkRow[WM_SOCCLK][0].MinClock = 0;
+ table->WatermarkRow[WM_SOCCLK][0].MaxClock = 0xFFFF;
+ table->WatermarkRow[WM_SOCCLK][0].MinMclk = 0;
+ table->WatermarkRow[WM_SOCCLK][0].MaxMclk = 0xFFFF;
+}
+
+static void dcn316_notify_wm_ranges(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct clk_mgr_dcn316 *clk_mgr_dcn316 = TO_CLK_MGR_DCN316(clk_mgr);
+ struct dcn316_watermarks *table = clk_mgr_dcn316->smu_wm_set.wm_set;
+
+ if (!clk_mgr->smu_ver)
+ return;
+
+ if (!table || clk_mgr_dcn316->smu_wm_set.mc_address.quad_part == 0)
+ return;
+
+ memset(table, 0, sizeof(*table));
+
+ dcn316_build_watermark_ranges(clk_mgr_base->bw_params, table);
+
+ dcn316_smu_set_dram_addr_high(clk_mgr,
+ clk_mgr_dcn316->smu_wm_set.mc_address.high_part);
+ dcn316_smu_set_dram_addr_low(clk_mgr,
+ clk_mgr_dcn316->smu_wm_set.mc_address.low_part);
+ dcn316_smu_transfer_wm_table_dram_2_smu(clk_mgr);
+}
+
+static void dcn316_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr,
+ struct dcn316_smu_dpm_clks *smu_dpm_clks)
+{
+ DpmClocks_316_t *table = smu_dpm_clks->dpm_clks;
+
+ if (!clk_mgr->smu_ver)
+ return;
+
+ if (!table || smu_dpm_clks->mc_address.quad_part == 0)
+ return;
+
+ memset(table, 0, sizeof(*table));
+
+ dcn316_smu_set_dram_addr_high(clk_mgr,
+ smu_dpm_clks->mc_address.high_part);
+ dcn316_smu_set_dram_addr_low(clk_mgr,
+ smu_dpm_clks->mc_address.low_part);
+ dcn316_smu_transfer_dpm_table_smu_2_dram(clk_mgr);
+}
+
+static uint32_t find_max_clk_value(const uint32_t clocks[], uint32_t num_clocks)
+{
+ uint32_t max = 0;
+ int i;
+
+ for (i = 0; i < num_clocks; ++i) {
+ if (clocks[i] > max)
+ max = clocks[i];
+ }
+
+ return max;
+}
+
+static unsigned int find_clk_for_voltage(
+ const DpmClocks_316_t *clock_table,
+ const uint32_t clocks[],
+ unsigned int voltage)
+{
+ int i;
+ int max_voltage = 0;
+ int clock = 0;
+
+ for (i = 0; i < NUM_SOC_VOLTAGE_LEVELS; i++) {
+ if (clock_table->SocVoltage[i] == voltage) {
+ return clocks[i];
+ } else if (clock_table->SocVoltage[i] >= max_voltage &&
+ clock_table->SocVoltage[i] < voltage) {
+ max_voltage = clock_table->SocVoltage[i];
+ clock = clocks[i];
+ }
+ }
+
+ ASSERT(clock);
+ return clock;
+}
+
+static void dcn316_clk_mgr_helper_populate_bw_params(
+ struct clk_mgr_internal *clk_mgr,
+ struct integrated_info *bios_info,
+ const DpmClocks_316_t *clock_table)
+{
+ int i, j;
+ struct clk_bw_params *bw_params = clk_mgr->base.bw_params;
+ uint32_t max_dispclk = 0, max_dppclk = 0;
+
+ j = -1;
+
+ static_assert(NUM_DF_PSTATE_LEVELS <= MAX_NUM_DPM_LVL,
+ "number of reported pstate levels exceeds maximum");
+
+ /* Find lowest DPM, FCLK is filled in reverse order*/
+
+ for (i = NUM_DF_PSTATE_LEVELS - 1; i >= 0; i--) {
+ if (clock_table->DfPstateTable[i].FClk != 0) {
+ j = i;
+ break;
+ }
+ }
+
+ if (j == -1) {
+ /* clock table is all 0s, just use our own hardcode */
+ ASSERT(0);
+ return;
+ }
+
+ bw_params->clk_table.num_entries = j + 1;
+
+ /* dispclk and dppclk can be max at any voltage, same number of levels for both */
+ if (clock_table->NumDispClkLevelsEnabled <= NUM_DISPCLK_DPM_LEVELS &&
+ clock_table->NumDispClkLevelsEnabled <= NUM_DPPCLK_DPM_LEVELS) {
+ max_dispclk = find_max_clk_value(clock_table->DispClocks, clock_table->NumDispClkLevelsEnabled);
+ max_dppclk = find_max_clk_value(clock_table->DppClocks, clock_table->NumDispClkLevelsEnabled);
+ } else {
+ ASSERT(0);
+ }
+
+ for (i = 0; i < bw_params->clk_table.num_entries; i++, j--) {
+ int temp;
+
+ bw_params->clk_table.entries[i].fclk_mhz = clock_table->DfPstateTable[j].FClk;
+ bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[j].MemClk;
+ bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[j].Voltage;
+ switch (clock_table->DfPstateTable[j].WckRatio) {
+ case WCK_RATIO_1_2:
+ bw_params->clk_table.entries[i].wck_ratio = 2;
+ break;
+ case WCK_RATIO_1_4:
+ bw_params->clk_table.entries[i].wck_ratio = 4;
+ break;
+ default:
+ bw_params->clk_table.entries[i].wck_ratio = 1;
+ }
+ temp = find_clk_for_voltage(clock_table, clock_table->DcfClocks, clock_table->DfPstateTable[j].Voltage);
+ if (temp)
+ bw_params->clk_table.entries[i].dcfclk_mhz = temp;
+ temp = find_clk_for_voltage(clock_table, clock_table->SocClocks, clock_table->DfPstateTable[j].Voltage);
+ if (temp)
+ bw_params->clk_table.entries[i].socclk_mhz = temp;
+ bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk;
+ bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk;
+ }
+
+ bw_params->vram_type = bios_info->memory_type;
+ bw_params->num_channels = bios_info->ma_channel_number;
+ bw_params->dram_channel_width_bytes = bios_info->memory_type == 0x22 ? 8 : 4;
+
+ for (i = 0; i < WM_SET_COUNT; i++) {
+ bw_params->wm_table.entries[i].wm_inst = i;
+
+ if (i >= bw_params->clk_table.num_entries) {
+ bw_params->wm_table.entries[i].valid = false;
+ continue;
+ }
+
+ bw_params->wm_table.entries[i].wm_type = WM_TYPE_PSTATE_CHG;
+ bw_params->wm_table.entries[i].valid = true;
+ }
+}
+
+
+
+static struct clk_mgr_funcs dcn316_funcs = {
+ .enable_pme_wa = dcn316_enable_pme_wa,
+ .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
+ .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz,
+ .update_clocks = dcn316_update_clocks,
+ .init_clocks = dcn31_init_clocks,
+ .are_clock_states_equal = dcn31_are_clock_states_equal,
+ .notify_wm_ranges = dcn316_notify_wm_ranges
+};
+extern struct clk_mgr_funcs dcn3_fpga_funcs;
+
+void dcn316_clk_mgr_construct(
+ struct dc_context *ctx,
+ struct clk_mgr_dcn316 *clk_mgr,
+ struct pp_smu_funcs *pp_smu,
+ struct dccg *dccg)
+{
+ struct dcn316_smu_dpm_clks smu_dpm_clks = { 0 };
+ struct clk_log_info log_info = {0};
+
+ clk_mgr->base.base.ctx = ctx;
+ clk_mgr->base.base.funcs = &dcn316_funcs;
+
+ clk_mgr->base.pp_smu = pp_smu;
+
+ clk_mgr->base.dccg = dccg;
+ clk_mgr->base.dfs_bypass_disp_clk = 0;
+
+ clk_mgr->base.dprefclk_ss_percentage = 0;
+ clk_mgr->base.dprefclk_ss_divider = 1000;
+ clk_mgr->base.ss_on_dprefclk = false;
+ clk_mgr->base.dfs_ref_freq_khz = 48000;
+
+ clk_mgr->smu_wm_set.wm_set = (struct dcn316_watermarks *)dm_helpers_allocate_gpu_mem(
+ clk_mgr->base.base.ctx,
+ DC_MEM_ALLOC_TYPE_FRAME_BUFFER,
+ sizeof(struct dcn316_watermarks),
+ &clk_mgr->smu_wm_set.mc_address.quad_part);
+
+ if (!clk_mgr->smu_wm_set.wm_set) {
+ clk_mgr->smu_wm_set.wm_set = &dummy_wms;
+ clk_mgr->smu_wm_set.mc_address.quad_part = 0;
+ }
+ ASSERT(clk_mgr->smu_wm_set.wm_set);
+
+ smu_dpm_clks.dpm_clks = (DpmClocks_316_t *)dm_helpers_allocate_gpu_mem(
+ clk_mgr->base.base.ctx,
+ DC_MEM_ALLOC_TYPE_FRAME_BUFFER,
+ sizeof(DpmClocks_316_t),
+ &smu_dpm_clks.mc_address.quad_part);
+
+ if (smu_dpm_clks.dpm_clks == NULL) {
+ smu_dpm_clks.dpm_clks = &dummy_clocks;
+ smu_dpm_clks.mc_address.quad_part = 0;
+ }
+
+ ASSERT(smu_dpm_clks.dpm_clks);
+
+ clk_mgr->base.smu_ver = dcn316_smu_get_smu_version(&clk_mgr->base);
+
+ if (clk_mgr->base.smu_ver > 0)
+ clk_mgr->base.smu_present = true;
+
+ // Skip this for now as it did not work on DCN315, renable during bring up
+ //clk_mgr->base.base.dentist_vco_freq_khz = get_vco_frequency_from_reg(&clk_mgr->base);
+ clk_mgr->base.base.dentist_vco_freq_khz = 2500000;
+
+ /* in case we don't get a value from the register, use default */
+ if (clk_mgr->base.base.dentist_vco_freq_khz == 0)
+ clk_mgr->base.base.dentist_vco_freq_khz = 2500000; /* 2400MHz */
+
+
+ if (ctx->dc_bios->integrated_info->memory_type == LpDdr5MemType) {
+ dcn316_bw_params.wm_table = lpddr5_wm_table;
+ } else {
+ dcn316_bw_params.wm_table = ddr4_wm_table;
+ }
+ /* Saved clocks configured at boot for debug purposes */
+ dcn316_dump_clk_registers(&clk_mgr->base.base.boot_snapshot,
+ &clk_mgr->base.base, &log_info);
+
+ clk_mgr->base.base.dprefclk_khz = 600000;
+ clk_mgr->base.base.dprefclk_khz = dcn316_smu_get_dpref_clk(&clk_mgr->base);
+ clk_mgr->base.base.clks.ref_dtbclk_khz = clk_mgr->base.base.dprefclk_khz;
+ dce_clock_read_ss_info(&clk_mgr->base);
+ /*clk_mgr->base.dccg->ref_dtbclk_khz =
+ dce_adjust_dp_ref_freq_for_ss(&clk_mgr->base, clk_mgr->base.base.dprefclk_khz);*/
+
+ clk_mgr->base.base.bw_params = &dcn316_bw_params;
+
+ if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) {
+ dcn316_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks);
+
+ if (ctx->dc_bios->integrated_info) {
+ dcn316_clk_mgr_helper_populate_bw_params(
+ &clk_mgr->base,
+ ctx->dc_bios->integrated_info,
+ smu_dpm_clks.dpm_clks);
+ }
+ }
+
+ if (smu_dpm_clks.dpm_clks && smu_dpm_clks.mc_address.quad_part != 0)
+ dm_helpers_free_gpu_mem(clk_mgr->base.base.ctx, DC_MEM_ALLOC_TYPE_FRAME_BUFFER,
+ smu_dpm_clks.dpm_clks);
+}
+
+void dcn316_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int)
+{
+ struct clk_mgr_dcn316 *clk_mgr = TO_CLK_MGR_DCN316(clk_mgr_int);
+
+ if (clk_mgr->smu_wm_set.wm_set && clk_mgr->smu_wm_set.mc_address.quad_part != 0)
+ dm_helpers_free_gpu_mem(clk_mgr_int->base.ctx, DC_MEM_ALLOC_TYPE_FRAME_BUFFER,
+ clk_mgr->smu_wm_set.wm_set);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.h
new file mode 100644
index 000000000000..864d1f6cef26
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN316_CLK_MGR_H__
+#define __DCN316_CLK_MGR_H__
+#include "clk_mgr_internal.h"
+
+struct dcn316_watermarks;
+
+struct dcn316_smu_watermark_set {
+ struct dcn316_watermarks *wm_set;
+ union large_integer mc_address;
+};
+
+struct clk_mgr_dcn316 {
+ struct clk_mgr_internal base;
+ struct dcn316_smu_watermark_set smu_wm_set;
+};
+
+void dcn316_clk_mgr_construct(struct dc_context *ctx,
+ struct clk_mgr_dcn316 *clk_mgr,
+ struct pp_smu_funcs *pp_smu,
+ struct dccg *dccg);
+
+void dcn316_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int);
+
+#endif //__DCN316_CLK_MGR_H__
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_smu.c
new file mode 100644
index 000000000000..8b82092b91cd
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_smu.c
@@ -0,0 +1,344 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "core_types.h"
+#include "clk_mgr_internal.h"
+#include "reg_helper.h"
+#include "dm_helpers.h"
+#include "dcn316_smu.h"
+#include "mp/mp_13_0_8_offset.h"
+#include "mp/mp_13_0_8_sh_mask.h"
+
+#define MAX_INSTANCE 7
+#define MAX_SEGMENT 6
+
+struct IP_BASE_INSTANCE {
+ unsigned int segment[MAX_SEGMENT];
+};
+
+struct IP_BASE {
+ struct IP_BASE_INSTANCE instance[MAX_INSTANCE];
+};
+
+static const struct IP_BASE MP0_BASE = { { { { 0x00016000, 0x00DC0000, 0x00E00000, 0x00E40000, 0x0243FC00, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } } } };
+
+#define REG(reg_name) \
+ (MP0_BASE.instance[0].segment[reg ## reg_name ## _BASE_IDX] + reg ## reg_name)
+
+#define FN(reg_name, field) \
+ FD(reg_name##__##field)
+
+#include "logger_types.h"
+#undef DC_LOGGER
+#define DC_LOGGER \
+ CTX->logger
+#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
+
+#define VBIOSSMC_MSG_TestMessage 0x01 ///< To check if PMFW is alive and responding. Requirement specified by PMFW team
+#define VBIOSSMC_MSG_GetPmfwVersion 0x02 ///< Get PMFW version
+#define VBIOSSMC_MSG_Spare0 0x03 ///< Spare0
+#define VBIOSSMC_MSG_SetDispclkFreq 0x04 ///< Set display clock frequency in MHZ
+#define VBIOSSMC_MSG_Spare1 0x05 ///< Spare1
+#define VBIOSSMC_MSG_SetDppclkFreq 0x06 ///< Set DPP clock frequency in MHZ
+#define VBIOSSMC_MSG_SetHardMinDcfclkByFreq 0x07 ///< Set DCF clock frequency hard min in MHZ
+#define VBIOSSMC_MSG_SetMinDeepSleepDcfclk 0x08 ///< Set DCF clock minimum frequency in deep sleep in MHZ
+#define VBIOSSMC_MSG_SetPhyclkVoltageByFreq 0x09 ///< Set display phy clock frequency in MHZ in case VMIN does not support phy frequency
+#define VBIOSSMC_MSG_GetFclkFrequency 0x0A ///< Get FCLK frequency, return frequemcy in MHZ
+#define VBIOSSMC_MSG_SetDisplayCount 0x0B ///< Inform PMFW of number of display connected
+#define VBIOSSMC_MSG_SPARE 0x0C ///< SPARE
+#define VBIOSSMC_MSG_UpdatePmeRestore 0x0D ///< To ask PMFW to write into Azalia for PME wake up event
+#define VBIOSSMC_MSG_SetVbiosDramAddrHigh 0x0E ///< Set DRAM address high 32 bits for WM table transfer
+#define VBIOSSMC_MSG_SetVbiosDramAddrLow 0x0F ///< Set DRAM address low 32 bits for WM table transfer
+#define VBIOSSMC_MSG_TransferTableSmu2Dram 0x10 ///< Transfer table from PMFW SRAM to system DRAM
+#define VBIOSSMC_MSG_TransferTableDram2Smu 0x11 ///< Transfer table from system DRAM to PMFW
+#define VBIOSSMC_MSG_SetDisplayIdleOptimizations 0x12 ///< Set Idle state optimization for display off
+#define VBIOSSMC_MSG_GetDprefclkFreq 0x13 ///< Get DPREF clock frequency. Return in MHZ
+#define VBIOSSMC_MSG_GetDtbclkFreq 0x14 ///< Get DPREF clock frequency. Return in MHZ
+#define VBIOSSMC_MSG_SetDtbclkFreq 0x15 ///< Inform PMFW to turn on/off DTB clock arg = 1, turn DTB clock on 600MHz/ arg = 0 turn DTB clock off
+#define VBIOSSMC_Message_Count 0x16 ///< Total number of VBIS and DAL messages
+
+#define VBIOSSMC_Status_BUSY 0x0
+#define VBIOSSMC_Result_OK 0x01 ///< Message Response OK
+#define VBIOSSMC_Result_Failed 0xFF ///< Message Response Failed
+#define VBIOSSMC_Result_UnknownCmd 0xFE ///< Message Response Unknown Command
+#define VBIOSSMC_Result_CmdRejectedPrereq 0xFD ///< Message Response Command Failed Prerequisite
+#define VBIOSSMC_Result_CmdRejectedBusy 0xFC ///< Message Response Command Rejected due to PMFW is busy. Sender should retry sending this message
+
+/*
+ * Function to be used instead of REG_WAIT macro because the wait ends when
+ * the register is NOT EQUAL to zero, and because the translation in msg_if.h
+ * won't work with REG_WAIT.
+ */
+static uint32_t dcn316_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, unsigned int delay_us, unsigned int max_retries)
+{
+ uint32_t res_val = VBIOSSMC_Status_BUSY;
+
+ do {
+ res_val = REG_READ(MP1_SMN_C2PMSG_91);
+ if (res_val != VBIOSSMC_Status_BUSY)
+ break;
+
+ if (delay_us >= 1000)
+ msleep(delay_us/1000);
+ else if (delay_us > 0)
+ udelay(delay_us);
+ } while (max_retries--);
+
+ return res_val;
+}
+
+static int dcn316_smu_send_msg_with_param(
+ struct clk_mgr_internal *clk_mgr,
+ unsigned int msg_id, unsigned int param)
+{
+ uint32_t result;
+
+ result = dcn316_smu_wait_for_response(clk_mgr, 10, 200000);
+
+ if (result != VBIOSSMC_Result_OK)
+ smu_print("SMU Response was not OK. SMU response after wait received is: %d\n", result);
+
+ if (result == VBIOSSMC_Status_BUSY) {
+ return -1;
+ }
+
+ /* First clear response register */
+ REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Status_BUSY);
+
+ /* Set the parameter register for the SMU message, unit is Mhz */
+ REG_WRITE(MP1_SMN_C2PMSG_83, param);
+
+ /* Trigger the message transaction by writing the message ID */
+ REG_WRITE(MP1_SMN_C2PMSG_67, msg_id);
+
+ result = dcn316_smu_wait_for_response(clk_mgr, 10, 200000);
+
+ if (result == VBIOSSMC_Status_BUSY) {
+ ASSERT(0);
+ dm_helpers_smu_timeout(CTX, msg_id, param, 10 * 200000);
+ }
+
+ return REG_READ(MP1_SMN_C2PMSG_83);
+}
+
+int dcn316_smu_get_smu_version(struct clk_mgr_internal *clk_mgr)
+{
+ return dcn316_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_GetPmfwVersion,
+ 0);
+}
+
+
+int dcn316_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispclk_khz)
+{
+ int actual_dispclk_set_mhz = -1;
+
+ if (!clk_mgr->smu_present)
+ return requested_dispclk_khz;
+
+ /* Unit of SMU msg parameter is Mhz */
+ actual_dispclk_set_mhz = dcn316_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDispclkFreq,
+ khz_to_mhz_ceil(requested_dispclk_khz));
+
+ return actual_dispclk_set_mhz * 1000;
+}
+
+int dcn316_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_dcfclk_khz)
+{
+ int actual_dcfclk_set_mhz = -1;
+
+ if (!clk_mgr->base.ctx->dc->debug.pstate_enabled)
+ return -1;
+
+ if (!clk_mgr->smu_present)
+ return requested_dcfclk_khz;
+
+ actual_dcfclk_set_mhz = dcn316_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetHardMinDcfclkByFreq,
+ khz_to_mhz_ceil(requested_dcfclk_khz));
+
+ return actual_dcfclk_set_mhz * 1000;
+}
+
+int dcn316_smu_set_min_deep_sleep_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_min_ds_dcfclk_khz)
+{
+ int actual_min_ds_dcfclk_mhz = -1;
+
+ if (!clk_mgr->base.ctx->dc->debug.pstate_enabled)
+ return -1;
+
+ if (!clk_mgr->smu_present)
+ return requested_min_ds_dcfclk_khz;
+
+ actual_min_ds_dcfclk_mhz = dcn316_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetMinDeepSleepDcfclk,
+ khz_to_mhz_ceil(requested_min_ds_dcfclk_khz));
+
+ return actual_min_ds_dcfclk_mhz * 1000;
+}
+
+int dcn316_smu_set_dppclk(struct clk_mgr_internal *clk_mgr, int requested_dpp_khz)
+{
+ int actual_dppclk_set_mhz = -1;
+
+ if (!clk_mgr->smu_present)
+ return requested_dpp_khz;
+
+ actual_dppclk_set_mhz = dcn316_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDppclkFreq,
+ khz_to_mhz_ceil(requested_dpp_khz));
+
+ return actual_dppclk_set_mhz * 1000;
+}
+
+void dcn316_smu_set_display_idle_optimization(struct clk_mgr_internal *clk_mgr, uint32_t idle_info)
+{
+ if (!clk_mgr->base.ctx->dc->debug.pstate_enabled)
+ return;
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ //TODO: Work with smu team to define optimization options.
+ dcn316_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDisplayIdleOptimizations,
+ idle_info);
+}
+
+void dcn316_smu_enable_phy_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable)
+{
+ union display_idle_optimization_u idle_info = { 0 };
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ if (enable) {
+ idle_info.idle_info.df_request_disabled = 1;
+ idle_info.idle_info.phy_ref_clk_off = 1;
+ }
+
+ dcn316_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDisplayIdleOptimizations,
+ idle_info.data);
+}
+
+void dcn316_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn316_smu_send_msg_with_param(clk_mgr,
+ VBIOSSMC_MSG_SetVbiosDramAddrHigh, addr_high);
+}
+
+void dcn316_smu_set_dram_addr_low(struct clk_mgr_internal *clk_mgr, uint32_t addr_low)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn316_smu_send_msg_with_param(clk_mgr,
+ VBIOSSMC_MSG_SetVbiosDramAddrLow, addr_low);
+}
+
+void dcn316_smu_transfer_dpm_table_smu_2_dram(struct clk_mgr_internal *clk_mgr)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn316_smu_send_msg_with_param(clk_mgr,
+ VBIOSSMC_MSG_TransferTableSmu2Dram, TABLE_DPMCLOCKS);
+}
+
+void dcn316_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn316_smu_send_msg_with_param(clk_mgr,
+ VBIOSSMC_MSG_TransferTableDram2Smu, TABLE_WATERMARKS);
+}
+
+void dcn316_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn316_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_UpdatePmeRestore,
+ 0);
+}
+
+/* Arg = 1: Turn DTB on; 0: Turn DTB CLK OFF. when it is on, it is 600MHZ */
+void dcn316_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn316_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDtbclkFreq,
+ enable);
+}
+
+int dcn316_smu_get_dpref_clk(struct clk_mgr_internal *clk_mgr)
+{
+ int dprefclk_get_mhz = -1;
+
+ if (clk_mgr->smu_present) {
+ dprefclk_get_mhz = dcn316_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_GetDprefclkFreq,
+ 0);
+ }
+ return (dprefclk_get_mhz * 1000);
+}
+
+int dcn316_smu_get_smu_fclk(struct clk_mgr_internal *clk_mgr)
+{
+ int fclk_get_mhz = -1;
+
+ if (clk_mgr->smu_present) {
+ fclk_get_mhz = dcn316_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_GetFclkFrequency,
+ 0);
+ }
+ return (fclk_get_mhz * 1000);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_smu.h
new file mode 100644
index 000000000000..2a7293f66515
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_smu.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef DAL_DC_316_SMU_H_
+#define DAL_DC_316_SMU_H_
+#include "os_types.h"
+
+#define PMFW_DRIVER_IF_VERSION 4
+
+#define NUM_DCFCLK_DPM_LEVELS 8
+#define NUM_DISPCLK_DPM_LEVELS 8
+#define NUM_DPPCLK_DPM_LEVELS 8
+#define NUM_SOCCLK_DPM_LEVELS 8
+#define NUM_VCN_DPM_LEVELS 8
+#define NUM_SOC_VOLTAGE_LEVELS 8
+#define NUM_DF_PSTATE_LEVELS 4
+
+typedef struct {
+ uint16_t MinClock; // This is either DCFCLK or SOCCLK (in MHz)
+ uint16_t MaxClock; // This is either DCFCLK or SOCCLK (in MHz)
+ uint16_t MinMclk;
+ uint16_t MaxMclk;
+ uint8_t WmSetting;
+ uint8_t WmType; // Used for normal pstate change or memory retraining
+ uint8_t Padding[2];
+} WatermarkRowGeneric_t;
+
+#define NUM_WM_RANGES 4
+#define WM_PSTATE_CHG 0
+#define WM_RETRAINING 1
+
+typedef enum {
+ WM_SOCCLK = 0,
+ WM_DCFCLK,
+ WM_COUNT,
+} WM_CLOCK_e;
+
+typedef enum{
+ WCK_RATIO_1_1 = 0, // DDR5, Wck:ck is always 1:1;
+ WCK_RATIO_1_2,
+ WCK_RATIO_1_4,
+ WCK_RATIO_MAX
+} WCK_RATIO_e;
+
+typedef struct {
+ uint32_t FClk;
+ uint32_t MemClk;
+ uint32_t Voltage;
+ uint8_t WckRatio;
+ uint8_t Spare[3];
+} DfPstateTable_t;
+
+//Freq in MHz
+//Voltage in milli volts with 2 fractional bits
+typedef struct {
+ uint32_t DcfClocks[NUM_DCFCLK_DPM_LEVELS];
+ uint32_t DispClocks[NUM_DISPCLK_DPM_LEVELS];
+ uint32_t DppClocks[NUM_DPPCLK_DPM_LEVELS];
+ uint32_t SocClocks[NUM_SOCCLK_DPM_LEVELS];
+ uint32_t VClocks[NUM_VCN_DPM_LEVELS];
+ uint32_t DClocks[NUM_VCN_DPM_LEVELS];
+ uint32_t SocVoltage[NUM_SOC_VOLTAGE_LEVELS];
+ DfPstateTable_t DfPstateTable[NUM_DF_PSTATE_LEVELS];
+ uint8_t NumDcfClkLevelsEnabled;
+ uint8_t NumDispClkLevelsEnabled; //Applies to both Dispclk and Dppclk
+ uint8_t NumSocClkLevelsEnabled;
+ uint8_t VcnClkLevelsEnabled; //Applies to both Vclk and Dclk
+ uint8_t NumDfPstatesEnabled;
+ uint8_t spare[3];
+ uint32_t MinGfxClk;
+ uint32_t MaxGfxClk;
+} DpmClocks_316_t;
+
+struct dcn316_watermarks {
+ // Watermarks
+ WatermarkRowGeneric_t WatermarkRow[WM_COUNT][NUM_WM_RANGES];
+ uint32_t MmHubPadding[7]; // SMU internal use
+};
+
+struct dcn316_smu_dpm_clks {
+ DpmClocks_316_t *dpm_clks;
+ union large_integer mc_address;
+};
+
+#define TABLE_WATERMARKS 1 // Called by DAL through VBIOS
+#define TABLE_DPMCLOCKS 4 // Called by Driver and VBIOS
+
+struct display_idle_optimization {
+ unsigned int df_request_disabled : 1;
+ unsigned int phy_ref_clk_off : 1;
+ unsigned int s0i2_rdy : 1;
+ unsigned int reserved : 29;
+};
+
+union display_idle_optimization_u {
+ struct display_idle_optimization idle_info;
+ uint32_t data;
+};
+
+int dcn316_smu_get_smu_version(struct clk_mgr_internal *clk_mgr);
+int dcn316_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispclk_khz);
+int dcn316_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_dcfclk_khz);
+int dcn316_smu_set_min_deep_sleep_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_min_ds_dcfclk_khz);
+int dcn316_smu_set_dppclk(struct clk_mgr_internal *clk_mgr, int requested_dpp_khz);
+void dcn316_smu_set_display_idle_optimization(struct clk_mgr_internal *clk_mgr, uint32_t idle_info);
+void dcn316_smu_enable_phy_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable);
+void dcn316_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high);
+void dcn316_smu_set_dram_addr_low(struct clk_mgr_internal *clk_mgr, uint32_t addr_low);
+void dcn316_smu_transfer_dpm_table_smu_2_dram(struct clk_mgr_internal *clk_mgr);
+void dcn316_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr);
+void dcn316_smu_request_voltage_via_phyclk(struct clk_mgr_internal *clk_mgr, int requested_phyclk_khz);
+void dcn316_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr);
+void dcn316_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable);
+int dcn316_smu_get_dpref_clk(struct clk_mgr_internal *clk_mgr);
+int dcn316_smu_get_smu_fclk(struct clk_mgr_internal *clk_mgr);
+
+#endif /* DAL_DC_316_SMU_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dalsmc.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dalsmc.h
new file mode 100644
index 000000000000..724a508b0adb
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dalsmc.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#ifndef DALSMC_H
+#define DALSMC_H
+
+#define DALSMC_VERSION 0x1
+
+// SMU Response Codes:
+#define DALSMC_Result_OK 0x1
+#define DALSMC_Result_Failed 0xFF
+#define DALSMC_Result_UnknownCmd 0xFE
+#define DALSMC_Result_CmdRejectedPrereq 0xFD
+#define DALSMC_Result_CmdRejectedBusy 0xFC
+
+// Message Definitions:
+#define DALSMC_MSG_TestMessage 0x1
+#define DALSMC_MSG_GetSmuVersion 0x2
+#define DALSMC_MSG_GetDriverIfVersion 0x3
+#define DALSMC_MSG_GetMsgHeaderVersion 0x4
+#define DALSMC_MSG_SetDalDramAddrHigh 0x5
+#define DALSMC_MSG_SetDalDramAddrLow 0x6
+#define DALSMC_MSG_TransferTableSmu2Dram 0x7
+#define DALSMC_MSG_TransferTableDram2Smu 0x8
+#define DALSMC_MSG_SetHardMinByFreq 0x9
+#define DALSMC_MSG_SetHardMaxByFreq 0xA
+#define DALSMC_MSG_GetDpmFreqByIndex 0xB
+#define DALSMC_MSG_GetDcModeMaxDpmFreq 0xC
+#define DALSMC_MSG_SetMinDeepSleepDcfclk 0xD
+#define DALSMC_MSG_NumOfDisplays 0xE
+#define DALSMC_MSG_SetExternalClientDfCstateAllow 0xF
+#define DALSMC_MSG_BacoAudioD3PME 0x10
+#define DALSMC_MSG_SetFclkSwitchAllow 0x11
+#define DALSMC_MSG_SetCabForUclkPstate 0x12
+#define DALSMC_MSG_SetWorstCaseUclkLatency 0x13
+#define DALSMC_MSG_SetAlwaysWaitDmcubResp 0x14
+#define DALSMC_MSG_ReturnHardMinStatus 0x15
+#define DALSMC_Message_Count 0x16
+
+#define CHECK_HARD_MIN_CLK_DISPCLK 0x1
+#define CHECK_HARD_MIN_CLK_DPPCLK 0x2
+#define CHECK_HARD_MIN_CLK_DPREFCLK 0x4
+#define CHECK_HARD_MIN_CLK_DCFCLK 0x8
+#define CHECK_HARD_MIN_CLK_DTBCLK 0x10
+#define CHECK_HARD_MIN_CLK_UCLK 0x20
+
+typedef enum {
+ FCLK_SWITCH_DISALLOW,
+ FCLK_SWITCH_ALLOW,
+} FclkSwitchAllow_e;
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
new file mode 100644
index 000000000000..7da7b41bd092
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
@@ -0,0 +1,1233 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dccg.h"
+#include "clk_mgr_internal.h"
+#include "dcn32/dcn32_clk_mgr_smu_msg.h"
+#include "dcn20/dcn20_clk_mgr.h"
+#include "dce100/dce_clk_mgr.h"
+#include "dcn31/dcn31_clk_mgr.h"
+#include "dcn32/dcn32_clk_mgr.h"
+#include "reg_helper.h"
+#include "core_types.h"
+#include "dm_helpers.h"
+#include "link_service.h"
+#include "dc_state_priv.h"
+#include "atomfirmware.h"
+#include "dcn32_smu13_driver_if.h"
+
+#include "dcn/dcn_3_2_0_offset.h"
+#include "dcn/dcn_3_2_0_sh_mask.h"
+
+#include "dml/dcn32/dcn32_fpu.h"
+
+#define DCN_BASE__INST0_SEG1 0x000000C0
+
+#define mmCLK1_CLK_PLL_REQ 0x16E37
+#define mmCLK1_CLK0_DFS_CNTL 0x16E69
+#define mmCLK1_CLK1_DFS_CNTL 0x16E6C
+#define mmCLK1_CLK2_DFS_CNTL 0x16E6F
+#define mmCLK1_CLK3_DFS_CNTL 0x16E72
+#define mmCLK1_CLK4_DFS_CNTL 0x16E75
+
+#define mmCLK1_CLK0_CURRENT_CNT 0x16EE7
+#define mmCLK1_CLK1_CURRENT_CNT 0x16EE8
+#define mmCLK1_CLK2_CURRENT_CNT 0x16EE9
+#define mmCLK1_CLK3_CURRENT_CNT 0x16EEA
+#define mmCLK1_CLK4_CURRENT_CNT 0x16EEB
+
+#define mmCLK4_CLK0_CURRENT_CNT 0x1B0C9
+
+#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001ffUL
+#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000f000UL
+#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xffff0000UL
+#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x00000000
+#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0x0000000c
+#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x00000010
+
+#define mmCLK01_CLK0_CLK_PLL_REQ 0x16E37
+#define mmCLK01_CLK0_CLK0_DFS_CNTL 0x16E64
+#define mmCLK01_CLK0_CLK1_DFS_CNTL 0x16E67
+#define mmCLK01_CLK0_CLK2_DFS_CNTL 0x16E6A
+#define mmCLK01_CLK0_CLK3_DFS_CNTL 0x16E6D
+#define mmCLK01_CLK0_CLK4_DFS_CNTL 0x16E70
+
+#define CLK0_CLK_PLL_REQ__FbMult_int_MASK 0x000001ffL
+#define CLK0_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000f000L
+#define CLK0_CLK_PLL_REQ__FbMult_frac_MASK 0xffff0000L
+#define CLK0_CLK_PLL_REQ__FbMult_int__SHIFT 0x00000000
+#define CLK0_CLK_PLL_REQ__PllSpineDiv__SHIFT 0x0000000c
+#define CLK0_CLK_PLL_REQ__FbMult_frac__SHIFT 0x00000010
+
+#undef FN
+#define FN(reg_name, field_name) \
+ clk_mgr->clk_mgr_shift->field_name, clk_mgr->clk_mgr_mask->field_name
+
+#define REG(reg) \
+ (clk_mgr->regs->reg)
+
+#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
+
+#define BASE(seg) BASE_INNER(seg)
+
+#define SR(reg_name)\
+ .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \
+ reg ## reg_name
+
+#define CLK_SR_DCN32(reg_name)\
+ .reg_name = mm ## reg_name
+
+static const struct clk_mgr_registers clk_mgr_regs_dcn32 = {
+ CLK_REG_LIST_DCN32()
+};
+
+static const struct clk_mgr_shift clk_mgr_shift_dcn32 = {
+ CLK_COMMON_MASK_SH_LIST_DCN32(__SHIFT)
+};
+
+static const struct clk_mgr_mask clk_mgr_mask_dcn32 = {
+ CLK_COMMON_MASK_SH_LIST_DCN32(_MASK)
+};
+
+
+#define CLK_SR_DCN321(reg_name, block, inst)\
+ .reg_name = mm ## block ## _ ## reg_name
+
+static const struct clk_mgr_registers clk_mgr_regs_dcn321 = {
+ CLK_REG_LIST_DCN321()
+};
+
+static const struct clk_mgr_shift clk_mgr_shift_dcn321 = {
+ CLK_COMMON_MASK_SH_LIST_DCN321(__SHIFT)
+};
+
+static const struct clk_mgr_mask clk_mgr_mask_dcn321 = {
+ CLK_COMMON_MASK_SH_LIST_DCN321(_MASK)
+};
+
+
+/* Query SMU for all clock states for a particular clock */
+static void dcn32_init_single_clock(struct clk_mgr_internal *clk_mgr, PPCLK_e clk, unsigned int *entry_0,
+ unsigned int *num_levels)
+{
+ unsigned int i;
+ char *entry_i = (char *)entry_0;
+
+ uint32_t ret = dcn30_smu_get_dpm_freq_by_index(clk_mgr, clk, 0xFF);
+
+ if (ret & (1 << 31))
+ /* fine-grained, only min and max */
+ *num_levels = 2;
+ else
+ /* discrete, a number of fixed states */
+ /* will set num_levels to 0 on failure */
+ *num_levels = ret & 0xFF;
+
+ /* if the initial message failed, num_levels will be 0 */
+ for (i = 0; i < *num_levels; i++) {
+ *((unsigned int *)entry_i) = (dcn30_smu_get_dpm_freq_by_index(clk_mgr, clk, i) & 0xFFFF);
+ entry_i += sizeof(clk_mgr->base.bw_params->clk_table.entries[0]);
+ }
+}
+
+static void dcn32_build_wm_range_table(struct clk_mgr_internal *clk_mgr)
+{
+ DC_FP_START();
+ dcn32_build_wm_range_table_fpu(clk_mgr);
+ DC_FP_END();
+}
+
+void dcn32_init_clocks(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ unsigned int num_levels;
+ struct clk_limit_num_entries *num_entries_per_clk;
+ unsigned int i;
+
+ if (!clk_mgr_base->bw_params)
+ return;
+
+ num_entries_per_clk = &clk_mgr_base->bw_params->clk_table.num_entries_per_clk;
+
+ memset(&(clk_mgr_base->clks), 0, sizeof(struct dc_clocks));
+ clk_mgr_base->clks.p_state_change_support = true;
+ clk_mgr_base->clks.prev_p_state_change_support = true;
+ clk_mgr_base->clks.fclk_prev_p_state_change_support = true;
+ clk_mgr->smu_present = false;
+ clk_mgr->dpm_present = false;
+
+ if (!clk_mgr_base->force_smu_not_present && dcn30_smu_get_smu_version(clk_mgr, &clk_mgr->smu_ver))
+ clk_mgr->smu_present = true;
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn30_smu_check_driver_if_version(clk_mgr);
+ dcn30_smu_check_msg_header_version(clk_mgr);
+
+ /* DCFCLK */
+ dcn32_init_single_clock(clk_mgr, PPCLK_DCFCLK,
+ &clk_mgr_base->bw_params->clk_table.entries[0].dcfclk_mhz,
+ &num_entries_per_clk->num_dcfclk_levels);
+ clk_mgr_base->bw_params->dc_mode_limit.dcfclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DCFCLK);
+
+ /* SOCCLK */
+ dcn32_init_single_clock(clk_mgr, PPCLK_SOCCLK,
+ &clk_mgr_base->bw_params->clk_table.entries[0].socclk_mhz,
+ &num_entries_per_clk->num_socclk_levels);
+ clk_mgr_base->bw_params->dc_mode_limit.socclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_SOCCLK);
+
+ /* DTBCLK */
+ if (!clk_mgr->base.ctx->dc->debug.disable_dtb_ref_clk_switch) {
+ dcn32_init_single_clock(clk_mgr, PPCLK_DTBCLK,
+ &clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz,
+ &num_entries_per_clk->num_dtbclk_levels);
+ clk_mgr_base->bw_params->dc_mode_limit.dtbclk_mhz =
+ dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DTBCLK);
+ }
+
+ /* DISPCLK */
+ dcn32_init_single_clock(clk_mgr, PPCLK_DISPCLK,
+ &clk_mgr_base->bw_params->clk_table.entries[0].dispclk_mhz,
+ &num_entries_per_clk->num_dispclk_levels);
+ num_levels = num_entries_per_clk->num_dispclk_levels;
+ clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DISPCLK);
+ //HW recommends limit of 1950 MHz in display clock for all DCN3.2.x
+ if (clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz > 1950)
+ clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz = 1950;
+
+ /* DPPCLK */
+ dcn32_init_single_clock(clk_mgr, PPCLK_DPPCLK,
+ &clk_mgr_base->bw_params->clk_table.entries[0].dppclk_mhz,
+ &num_entries_per_clk->num_dppclk_levels);
+ num_levels = num_entries_per_clk->num_dppclk_levels;
+ clk_mgr_base->bw_params->dc_mode_limit.dppclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DPPCLK);
+ //HW recommends limit of 1950 MHz in display clock for all DCN3.2.x
+ if (clk_mgr_base->bw_params->dc_mode_limit.dppclk_mhz > 1950)
+ clk_mgr_base->bw_params->dc_mode_limit.dppclk_mhz = 1950;
+
+ if (num_entries_per_clk->num_dcfclk_levels &&
+ num_entries_per_clk->num_dtbclk_levels &&
+ num_entries_per_clk->num_dispclk_levels)
+ clk_mgr->dpm_present = true;
+
+ if (clk_mgr_base->ctx->dc->debug.min_disp_clk_khz) {
+ for (i = 0; i < num_levels; i++)
+ if (clk_mgr_base->bw_params->clk_table.entries[i].dispclk_mhz
+ < khz_to_mhz_ceil(clk_mgr_base->ctx->dc->debug.min_disp_clk_khz))
+ clk_mgr_base->bw_params->clk_table.entries[i].dispclk_mhz
+ = khz_to_mhz_ceil(clk_mgr_base->ctx->dc->debug.min_disp_clk_khz);
+ }
+ for (i = 0; i < num_levels; i++)
+ if (clk_mgr_base->bw_params->clk_table.entries[i].dispclk_mhz > 1950)
+ clk_mgr_base->bw_params->clk_table.entries[i].dispclk_mhz = 1950;
+
+ if (clk_mgr_base->ctx->dc->debug.min_dpp_clk_khz) {
+ for (i = 0; i < num_levels; i++)
+ if (clk_mgr_base->bw_params->clk_table.entries[i].dppclk_mhz
+ < khz_to_mhz_ceil(clk_mgr_base->ctx->dc->debug.min_dpp_clk_khz))
+ clk_mgr_base->bw_params->clk_table.entries[i].dppclk_mhz
+ = khz_to_mhz_ceil(clk_mgr_base->ctx->dc->debug.min_dpp_clk_khz);
+ }
+
+ for (i = 0; i < num_levels; i++)
+ if (clk_mgr_base->bw_params->clk_table.entries[i].dppclk_mhz > 1950)
+ clk_mgr_base->bw_params->clk_table.entries[i].dppclk_mhz = 1950;
+
+ /* Get UCLK, update bounding box */
+ clk_mgr_base->funcs->get_memclk_states_from_smu(clk_mgr_base);
+
+ /* WM range table */
+ dcn32_build_wm_range_table(clk_mgr);
+}
+
+static void dcn32_update_clocks_update_dtb_dto(struct clk_mgr_internal *clk_mgr,
+ struct dc_state *context,
+ int ref_dtbclk_khz)
+{
+ struct dccg *dccg = clk_mgr->dccg;
+ uint32_t tg_mask = 0;
+ int i;
+
+ for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+ struct dtbclk_dto_params dto_params = {0};
+
+ /* use mask to program DTO once per tg */
+ if (pipe_ctx->stream_res.tg &&
+ !(tg_mask & (1 << pipe_ctx->stream_res.tg->inst))) {
+ tg_mask |= (1 << pipe_ctx->stream_res.tg->inst);
+
+ dto_params.otg_inst = pipe_ctx->stream_res.tg->inst;
+ dto_params.ref_dtbclk_khz = ref_dtbclk_khz;
+
+ dccg->funcs->set_dtbclk_dto(clk_mgr->dccg, &dto_params);
+ //dccg->funcs->set_audio_dtbclk_dto(clk_mgr->dccg, &dto_params);
+ }
+ }
+}
+
+/* Since DPPCLK request to PMFW needs to be exact (due to DPP DTO programming),
+ * update DPPCLK to be the exact frequency that will be set after the DPPCLK
+ * divider is updated. This will prevent rounding issues that could cause DPP
+ * refclk and DPP DTO to not match up.
+ */
+static void dcn32_update_dppclk_dispclk_freq(struct clk_mgr_internal *clk_mgr, struct dc_clocks *new_clocks)
+{
+ int dpp_divider = 0;
+ int disp_divider = 0;
+
+ if (new_clocks->dppclk_khz) {
+ dpp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+ * clk_mgr->base.dentist_vco_freq_khz / new_clocks->dppclk_khz;
+ new_clocks->dppclk_khz = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR * clk_mgr->base.dentist_vco_freq_khz) / dpp_divider;
+ }
+ if (new_clocks->dispclk_khz > 0) {
+ disp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+ * clk_mgr->base.dentist_vco_freq_khz / new_clocks->dispclk_khz;
+ new_clocks->dispclk_khz = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR * clk_mgr->base.dentist_vco_freq_khz) / disp_divider;
+ }
+}
+
+void dcn32_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr,
+ struct dc_state *context, bool safe_to_lower)
+{
+ int i;
+
+ clk_mgr->dccg->ref_dppclk = clk_mgr->base.clks.dppclk_khz;
+ for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) {
+ int dpp_inst = 0, dppclk_khz, prev_dppclk_khz;
+
+ dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz;
+
+ if (context->res_ctx.pipe_ctx[i].plane_res.dpp)
+ dpp_inst = context->res_ctx.pipe_ctx[i].plane_res.dpp->inst;
+ else if (!context->res_ctx.pipe_ctx[i].plane_res.dpp && dppclk_khz == 0) {
+ /* dpp == NULL && dppclk_khz == 0 is valid because of pipe harvesting.
+ * In this case just continue in loop
+ */
+ continue;
+ } else if (!context->res_ctx.pipe_ctx[i].plane_res.dpp && dppclk_khz > 0) {
+ /* The software state is not valid if dpp resource is NULL and
+ * dppclk_khz > 0.
+ */
+ ASSERT(false);
+ continue;
+ }
+
+ prev_dppclk_khz = clk_mgr->dccg->pipe_dppclk_khz[i];
+
+ if (safe_to_lower || prev_dppclk_khz < dppclk_khz)
+ clk_mgr->dccg->funcs->update_dpp_dto(
+ clk_mgr->dccg, dpp_inst, dppclk_khz);
+ }
+}
+
+static void dcn32_update_clocks_update_dentist(
+ struct clk_mgr_internal *clk_mgr,
+ struct dc_state *context)
+{
+ uint32_t new_disp_divider = 0;
+ uint32_t new_dispclk_wdivider = 0;
+ uint32_t old_dispclk_wdivider = 0;
+ uint32_t i;
+ uint32_t dentist_dispclk_wdivider_readback = 0;
+ struct dc *dc = clk_mgr->base.ctx->dc;
+
+ if (clk_mgr->base.clks.dispclk_khz == 0)
+ return;
+
+ new_disp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+ * clk_mgr->base.dentist_vco_freq_khz / clk_mgr->base.clks.dispclk_khz;
+
+ new_dispclk_wdivider = dentist_get_did_from_divider(new_disp_divider);
+ REG_GET(DENTIST_DISPCLK_CNTL,
+ DENTIST_DISPCLK_WDIVIDER, &old_dispclk_wdivider);
+
+ /* When changing divider to or from 127, some extra programming is required to prevent corruption */
+ if (old_dispclk_wdivider == 127 && new_dispclk_wdivider != 127) {
+ for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+ uint32_t fifo_level;
+ struct dccg *dccg = clk_mgr->base.ctx->dc->res_pool->dccg;
+ struct stream_encoder *stream_enc = pipe_ctx->stream_res.stream_enc;
+ int32_t N;
+ int32_t j;
+
+ if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER))
+ continue;
+ /* Virtual encoders don't have this function */
+ if (!stream_enc->funcs->get_fifo_cal_average_level)
+ continue;
+ fifo_level = stream_enc->funcs->get_fifo_cal_average_level(
+ stream_enc);
+ N = fifo_level / 4;
+ dccg->funcs->set_fifo_errdet_ovr_en(
+ dccg,
+ true);
+ for (j = 0; j < N - 4; j++)
+ dccg->funcs->otg_drop_pixel(
+ dccg,
+ pipe_ctx->stream_res.tg->inst);
+ dccg->funcs->set_fifo_errdet_ovr_en(
+ dccg,
+ false);
+ }
+ } else if (new_dispclk_wdivider == 127 && old_dispclk_wdivider != 127) {
+ /* request clock with 126 divider first */
+ uint32_t temp_disp_divider = dentist_get_divider_from_did(126);
+ uint32_t temp_dispclk_khz = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR * clk_mgr->base.dentist_vco_freq_khz) / temp_disp_divider;
+
+ if (clk_mgr->smu_present)
+ /*
+ * SMU uses discrete dispclk presets. We applied
+ * the same formula to increase our dppclk_khz
+ * to the next matching discrete value. By
+ * contract, we should use the preset dispclk
+ * floored in Mhz to describe the intended clock.
+ */
+ dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DISPCLK,
+ khz_to_mhz_floor(temp_dispclk_khz));
+
+ if (dc->debug.override_dispclk_programming) {
+ REG_GET(DENTIST_DISPCLK_CNTL,
+ DENTIST_DISPCLK_WDIVIDER, &dentist_dispclk_wdivider_readback);
+
+ if (dentist_dispclk_wdivider_readback != 126) {
+ REG_UPDATE(DENTIST_DISPCLK_CNTL,
+ DENTIST_DISPCLK_WDIVIDER, 126);
+ REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 50, 2000);
+ }
+ }
+
+ for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+ struct dccg *dccg = clk_mgr->base.ctx->dc->res_pool->dccg;
+ struct stream_encoder *stream_enc = pipe_ctx->stream_res.stream_enc;
+ uint32_t fifo_level;
+ int32_t N;
+ int32_t j;
+
+ if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER))
+ continue;
+ /* Virtual encoders don't have this function */
+ if (!stream_enc->funcs->get_fifo_cal_average_level)
+ continue;
+ fifo_level = stream_enc->funcs->get_fifo_cal_average_level(
+ stream_enc);
+ N = fifo_level / 4;
+ dccg->funcs->set_fifo_errdet_ovr_en(dccg, true);
+ for (j = 0; j < 12 - N; j++)
+ dccg->funcs->otg_add_pixel(dccg,
+ pipe_ctx->stream_res.tg->inst);
+ dccg->funcs->set_fifo_errdet_ovr_en(dccg, false);
+ }
+ }
+
+ /* do requested DISPCLK updates*/
+ if (clk_mgr->smu_present)
+ /*
+ * SMU uses discrete dispclk presets. We applied
+ * the same formula to increase our dppclk_khz
+ * to the next matching discrete value. By
+ * contract, we should use the preset dispclk
+ * floored in Mhz to describe the intended clock.
+ */
+ dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DISPCLK,
+ khz_to_mhz_floor(clk_mgr->base.clks.dispclk_khz));
+
+ if (dc->debug.override_dispclk_programming) {
+ REG_GET(DENTIST_DISPCLK_CNTL,
+ DENTIST_DISPCLK_WDIVIDER, &dentist_dispclk_wdivider_readback);
+
+ if (dentist_dispclk_wdivider_readback > new_dispclk_wdivider) {
+ REG_UPDATE(DENTIST_DISPCLK_CNTL,
+ DENTIST_DISPCLK_WDIVIDER, new_dispclk_wdivider);
+ REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 50, 2000);
+ }
+ }
+
+}
+
+static int dcn32_get_dispclk_from_dentist(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ uint32_t dispclk_wdivider;
+ int disp_divider;
+
+ REG_GET(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, &dispclk_wdivider);
+ disp_divider = dentist_get_divider_from_did(dispclk_wdivider);
+
+ /* Return DISPCLK freq in Khz */
+ if (disp_divider)
+ return (DENTIST_DIVIDER_RANGE_SCALE_FACTOR * clk_mgr->base.dentist_vco_freq_khz) / disp_divider;
+
+ return 0;
+}
+
+static bool dcn32_check_native_scaling(struct pipe_ctx *pipe)
+{
+ bool is_native_scaling = false;
+ int width = pipe->plane_state->src_rect.width;
+ int height = pipe->plane_state->src_rect.height;
+
+ if (pipe->stream->timing.h_addressable == width &&
+ pipe->stream->timing.v_addressable == height &&
+ pipe->plane_state->dst_rect.width == width &&
+ pipe->plane_state->dst_rect.height == height)
+ is_native_scaling = true;
+
+ return is_native_scaling;
+}
+
+static void dcn32_auto_dpm_test_log(
+ struct dc_clocks *new_clocks,
+ struct clk_mgr_internal *clk_mgr,
+ struct dc_state *context)
+{
+ unsigned int dispclk_khz_reg, dppclk_khz_reg, dprefclk_khz_reg, dcfclk_khz_reg, dtbclk_khz_reg,
+ fclk_khz_reg, mall_ss_size_bytes;
+ int dramclk_khz_override, fclk_khz_override, num_fclk_levels;
+
+ struct pipe_ctx *pipe_ctx_list[MAX_PIPES];
+ int active_pipe_count = 0;
+
+ for (int i = 0; i < MAX_PIPES; i++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) {
+ pipe_ctx_list[active_pipe_count] = pipe_ctx;
+ active_pipe_count++;
+ }
+ }
+
+ msleep(5);
+
+ mall_ss_size_bytes = context->bw_ctx.bw.dcn.mall_ss_size_bytes;
+
+ dispclk_khz_reg = REG_READ(CLK1_CLK0_CURRENT_CNT); // DISPCLK
+ dppclk_khz_reg = REG_READ(CLK1_CLK1_CURRENT_CNT); // DPPCLK
+ dprefclk_khz_reg = REG_READ(CLK1_CLK2_CURRENT_CNT); // DPREFCLK
+ dcfclk_khz_reg = REG_READ(CLK1_CLK3_CURRENT_CNT); // DCFCLK
+ dtbclk_khz_reg = REG_READ(CLK1_CLK4_CURRENT_CNT); // DTBCLK
+ fclk_khz_reg = REG_READ(CLK4_CLK0_CURRENT_CNT); // FCLK
+
+ // Overrides for these clocks in case there is no p_state change support
+ dramclk_khz_override = new_clocks->dramclk_khz;
+ fclk_khz_override = new_clocks->fclk_khz;
+
+ num_fclk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_fclk_levels - 1;
+
+ if (!new_clocks->p_state_change_support) {
+ dramclk_khz_override = clk_mgr->base.bw_params->max_memclk_mhz * 1000;
+ }
+ if (!new_clocks->fclk_p_state_change_support) {
+ fclk_khz_override = clk_mgr->base.bw_params->clk_table.entries[num_fclk_levels].fclk_mhz * 1000;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////
+ // IMPORTANT: When adding more clocks to these logs, do NOT put a newline
+ // anywhere other than at the very end of the string.
+ //
+ // Formatting example (make sure to have " - " between each entry):
+ //
+ // AutoDPMTest: clk1:%d - clk2:%d - clk3:%d - clk4:%d\n"
+ ////////////////////////////////////////////////////////////////////////////
+ if (active_pipe_count > 0 &&
+ new_clocks->dramclk_khz > 0 &&
+ new_clocks->fclk_khz > 0 &&
+ new_clocks->dcfclk_khz > 0 &&
+ new_clocks->dppclk_khz > 0) {
+
+ uint32_t pix_clk_list[MAX_PIPES] = {0};
+ int p_state_list[MAX_PIPES] = {0};
+ int disp_src_width_list[MAX_PIPES] = {0};
+ int disp_src_height_list[MAX_PIPES] = {0};
+ uint64_t disp_src_refresh_list[MAX_PIPES] = {0};
+ bool is_scaled_list[MAX_PIPES] = {0};
+
+ for (int i = 0; i < active_pipe_count; i++) {
+ struct pipe_ctx *curr_pipe_ctx = pipe_ctx_list[i];
+ uint64_t refresh_rate;
+
+ pix_clk_list[i] = curr_pipe_ctx->stream->timing.pix_clk_100hz;
+ p_state_list[i] = curr_pipe_ctx->p_state_type;
+
+ refresh_rate = (curr_pipe_ctx->stream->timing.pix_clk_100hz * (uint64_t)100 +
+ curr_pipe_ctx->stream->timing.v_total * (uint64_t)curr_pipe_ctx->stream->timing.h_total - (uint64_t)1);
+ refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.v_total);
+ refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.h_total);
+ disp_src_refresh_list[i] = refresh_rate;
+
+ if (curr_pipe_ctx->plane_state) {
+ is_scaled_list[i] = !(dcn32_check_native_scaling(curr_pipe_ctx));
+ disp_src_width_list[i] = curr_pipe_ctx->plane_state->src_rect.width;
+ disp_src_height_list[i] = curr_pipe_ctx->plane_state->src_rect.height;
+ }
+ }
+
+ DC_LOG_AUTO_DPM_TEST("AutoDPMTest: dramclk:%d - fclk:%d - "
+ "dcfclk:%d - dppclk:%d - dispclk_hw:%d - "
+ "dppclk_hw:%d - dprefclk_hw:%d - dcfclk_hw:%d - "
+ "dtbclk_hw:%d - fclk_hw:%d - pix_clk_0:%d - pix_clk_1:%d - "
+ "pix_clk_2:%d - pix_clk_3:%d - mall_ss_size:%d - p_state_type_0:%d - "
+ "p_state_type_1:%d - p_state_type_2:%d - p_state_type_3:%d - "
+ "pix_width_0:%d - pix_height_0:%d - refresh_rate_0:%lld - is_scaled_0:%d - "
+ "pix_width_1:%d - pix_height_1:%d - refresh_rate_1:%lld - is_scaled_1:%d - "
+ "pix_width_2:%d - pix_height_2:%d - refresh_rate_2:%lld - is_scaled_2:%d - "
+ "pix_width_3:%d - pix_height_3:%d - refresh_rate_3:%lld - is_scaled_3:%d - LOG_END\n",
+ dramclk_khz_override,
+ fclk_khz_override,
+ new_clocks->dcfclk_khz,
+ new_clocks->dppclk_khz,
+ dispclk_khz_reg,
+ dppclk_khz_reg,
+ dprefclk_khz_reg,
+ dcfclk_khz_reg,
+ dtbclk_khz_reg,
+ fclk_khz_reg,
+ pix_clk_list[0], pix_clk_list[1], pix_clk_list[3], pix_clk_list[2],
+ mall_ss_size_bytes,
+ p_state_list[0], p_state_list[1], p_state_list[2], p_state_list[3],
+ disp_src_width_list[0], disp_src_height_list[0], disp_src_refresh_list[0], is_scaled_list[0],
+ disp_src_width_list[1], disp_src_height_list[1], disp_src_refresh_list[1], is_scaled_list[1],
+ disp_src_width_list[2], disp_src_height_list[2], disp_src_refresh_list[2], is_scaled_list[2],
+ disp_src_width_list[3], disp_src_height_list[3], disp_src_refresh_list[3], is_scaled_list[3]);
+ }
+}
+
+static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base,
+ struct dc_state *context,
+ bool safe_to_lower)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
+ struct dc *dc = clk_mgr_base->ctx->dc;
+ int display_count;
+ bool update_dppclk = false;
+ bool update_dispclk = false;
+ bool enter_display_off = false;
+ bool dpp_clock_lowered = false;
+ struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu;
+ bool force_reset = false;
+ bool update_uclk = false, update_fclk = false;
+ bool p_state_change_support;
+ bool fclk_p_state_change_support;
+
+ if (clk_mgr_base->clks.dispclk_khz == 0 ||
+ (dc->debug.force_clock_mode & 0x1)) {
+ /* This is from resume or boot up, if forced_clock cfg option used,
+ * we bypass program dispclk and DPPCLK, but need set them for S3.
+ */
+ force_reset = true;
+
+ dcn2_read_clocks_from_hw_dentist(clk_mgr_base);
+
+ /* Force_clock_mode 0x1: force reset the clock even it is the same clock
+ * as long as it is in Passive level.
+ */
+ }
+ display_count = clk_mgr_helper_get_active_display_cnt(dc, context);
+
+ if (display_count == 0)
+ enter_display_off = true;
+
+ if (clk_mgr->smu_present) {
+ if (enter_display_off == safe_to_lower)
+ dcn30_smu_set_num_of_displays(clk_mgr, display_count);
+
+ clk_mgr_base->clks.fclk_prev_p_state_change_support = clk_mgr_base->clks.fclk_p_state_change_support;
+
+ fclk_p_state_change_support = new_clocks->fclk_p_state_change_support;
+
+ if (should_update_pstate_support(safe_to_lower, fclk_p_state_change_support, clk_mgr_base->clks.fclk_p_state_change_support) &&
+ !dc->work_arounds.clock_update_disable_mask.fclk) {
+ clk_mgr_base->clks.fclk_p_state_change_support = fclk_p_state_change_support;
+
+ /* To enable FCLK P-state switching, send FCLK_PSTATE_SUPPORTED message to PMFW */
+ if (clk_mgr_base->ctx->dce_version != DCN_VERSION_3_21 && clk_mgr_base->clks.fclk_p_state_change_support) {
+ /* Handle the code for sending a message to PMFW that FCLK P-state change is supported */
+ dcn32_smu_send_fclk_pstate_message(clk_mgr, FCLK_PSTATE_SUPPORTED);
+ }
+ }
+
+ if (dc->debug.force_min_dcfclk_mhz > 0)
+ new_clocks->dcfclk_khz = (new_clocks->dcfclk_khz > (dc->debug.force_min_dcfclk_mhz * 1000)) ?
+ new_clocks->dcfclk_khz : (dc->debug.force_min_dcfclk_mhz * 1000);
+
+ if (should_set_clock(safe_to_lower, new_clocks->dcfclk_khz, clk_mgr_base->clks.dcfclk_khz) &&
+ !dc->work_arounds.clock_update_disable_mask.dcfclk) {
+ clk_mgr_base->clks.dcfclk_khz = new_clocks->dcfclk_khz;
+ dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DCFCLK, khz_to_mhz_ceil(clk_mgr_base->clks.dcfclk_khz));
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->dcfclk_deep_sleep_khz, clk_mgr_base->clks.dcfclk_deep_sleep_khz) &&
+ !dc->work_arounds.clock_update_disable_mask.dcfclk_ds) {
+ clk_mgr_base->clks.dcfclk_deep_sleep_khz = new_clocks->dcfclk_deep_sleep_khz;
+ dcn30_smu_set_min_deep_sleep_dcef_clk(clk_mgr, khz_to_mhz_ceil(clk_mgr_base->clks.dcfclk_deep_sleep_khz));
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->socclk_khz, clk_mgr_base->clks.socclk_khz))
+ /* We don't actually care about socclk, don't notify SMU of hard min */
+ clk_mgr_base->clks.socclk_khz = new_clocks->socclk_khz;
+
+ clk_mgr_base->clks.prev_p_state_change_support = clk_mgr_base->clks.p_state_change_support;
+ clk_mgr_base->clks.prev_num_ways = clk_mgr_base->clks.num_ways;
+
+ if (clk_mgr_base->clks.num_ways != new_clocks->num_ways &&
+ clk_mgr_base->clks.num_ways < new_clocks->num_ways) {
+ clk_mgr_base->clks.num_ways = new_clocks->num_ways;
+ dcn32_smu_send_cab_for_uclk_message(clk_mgr, clk_mgr_base->clks.num_ways);
+ }
+
+ p_state_change_support = new_clocks->p_state_change_support;
+ if (should_update_pstate_support(safe_to_lower, p_state_change_support, clk_mgr_base->clks.p_state_change_support) &&
+ !dc->work_arounds.clock_update_disable_mask.uclk) {
+ clk_mgr_base->clks.p_state_change_support = p_state_change_support;
+
+ /* to disable P-State switching, set UCLK min = max */
+ if (!clk_mgr_base->clks.p_state_change_support) {
+ if (dc->clk_mgr->dc_mode_softmax_enabled) {
+ /* On DCN32x we will never have the functional UCLK min above the softmax
+ * since we calculate mode support based on softmax being the max UCLK
+ * frequency.
+ */
+ if (dc->debug.disable_dc_mode_overwrite) {
+ dcn30_smu_set_hard_max_by_freq(clk_mgr, PPCLK_UCLK, dc->clk_mgr->bw_params->max_memclk_mhz);
+ dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, dc->clk_mgr->bw_params->max_memclk_mhz);
+ } else
+ dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK,
+ dc->clk_mgr->bw_params->dc_mode_softmax_memclk);
+ } else {
+ dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, dc->clk_mgr->bw_params->max_memclk_mhz);
+ }
+ }
+ }
+
+ if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching)
+ dcn32_smu_wait_for_dmub_ack_mclk(clk_mgr, true);
+ else
+ dcn32_smu_wait_for_dmub_ack_mclk(clk_mgr, false);
+
+ /* Always update saved value, even if new value not set due to P-State switching unsupported. Also check safe_to_lower for FCLK */
+ if (safe_to_lower && (clk_mgr_base->clks.fclk_p_state_change_support != clk_mgr_base->clks.fclk_prev_p_state_change_support)) {
+ update_fclk = true;
+ }
+
+ if (clk_mgr_base->ctx->dce_version != DCN_VERSION_3_21 && !clk_mgr_base->clks.fclk_p_state_change_support && update_fclk &&
+ !dc->work_arounds.clock_update_disable_mask.fclk) {
+ /* Handle code for sending a message to PMFW that FCLK P-state change is not supported */
+ dcn32_smu_send_fclk_pstate_message(clk_mgr, FCLK_PSTATE_NOTSUPPORTED);
+ }
+
+ /* Always update saved value, even if new value not set due to P-State switching unsupported */
+ if (should_set_clock(safe_to_lower, new_clocks->dramclk_khz, clk_mgr_base->clks.dramclk_khz) &&
+ !dc->work_arounds.clock_update_disable_mask.uclk) {
+ clk_mgr_base->clks.dramclk_khz = new_clocks->dramclk_khz;
+ update_uclk = true;
+ }
+
+ /* set UCLK to requested value if P-State switching is supported, or to re-enable P-State switching */
+ if (clk_mgr_base->clks.p_state_change_support &&
+ (update_uclk || !clk_mgr_base->clks.prev_p_state_change_support) &&
+ !dc->work_arounds.clock_update_disable_mask.uclk) {
+ if (dc->clk_mgr->dc_mode_softmax_enabled && dc->debug.disable_dc_mode_overwrite)
+ dcn30_smu_set_hard_max_by_freq(clk_mgr, PPCLK_UCLK,
+ max((int)dc->clk_mgr->bw_params->dc_mode_softmax_memclk, khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz)));
+
+ dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz));
+ }
+
+ if (clk_mgr_base->clks.num_ways != new_clocks->num_ways &&
+ clk_mgr_base->clks.num_ways > new_clocks->num_ways) {
+ clk_mgr_base->clks.num_ways = new_clocks->num_ways;
+ dcn32_smu_send_cab_for_uclk_message(clk_mgr, clk_mgr_base->clks.num_ways);
+ }
+ }
+
+ dcn32_update_dppclk_dispclk_freq(clk_mgr, new_clocks);
+ if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr_base->clks.dppclk_khz)) {
+ if (clk_mgr_base->clks.dppclk_khz > new_clocks->dppclk_khz)
+ dpp_clock_lowered = true;
+
+ clk_mgr_base->clks.dppclk_khz = new_clocks->dppclk_khz;
+
+ if (clk_mgr->smu_present && !dpp_clock_lowered)
+ /*
+ * SMU uses discrete dppclk presets. We applied
+ * the same formula to increase our dppclk_khz
+ * to the next matching discrete value. By
+ * contract, we should use the preset dppclk
+ * floored in Mhz to describe the intended clock.
+ */
+ dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DPPCLK,
+ khz_to_mhz_floor(clk_mgr_base->clks.dppclk_khz));
+
+ update_dppclk = true;
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) {
+ clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
+
+ update_dispclk = true;
+ }
+
+ if (!new_clocks->dtbclk_en) {
+ new_clocks->ref_dtbclk_khz = clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz * 1000;
+ }
+
+ /* clock limits are received with MHz precision, divide by 1000 to prevent setting clocks at every call */
+ if (!dc->debug.disable_dtb_ref_clk_switch &&
+ should_set_clock(safe_to_lower, new_clocks->ref_dtbclk_khz / 1000, clk_mgr_base->clks.ref_dtbclk_khz / 1000)) {
+ /* DCCG requires KHz precision for DTBCLK */
+ clk_mgr_base->clks.ref_dtbclk_khz =
+ dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DTBCLK, khz_to_mhz_ceil(new_clocks->ref_dtbclk_khz));
+
+ dcn32_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz);
+ }
+
+ if (dc->config.forced_clocks == false || (force_reset && safe_to_lower)) {
+ if (dpp_clock_lowered) {
+ /* if clock is being lowered, increase DTO before lowering refclk */
+ dcn32_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower);
+ dcn32_update_clocks_update_dentist(clk_mgr, context);
+ if (clk_mgr->smu_present)
+ /*
+ * SMU uses discrete dppclk presets. We applied
+ * the same formula to increase our dppclk_khz
+ * to the next matching discrete value. By
+ * contract, we should use the preset dppclk
+ * floored in Mhz to describe the intended clock.
+ */
+ dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DPPCLK,
+ khz_to_mhz_floor(clk_mgr_base->clks.dppclk_khz));
+ } else {
+ /* if clock is being raised, increase refclk before lowering DTO */
+ if (update_dppclk || update_dispclk)
+ dcn32_update_clocks_update_dentist(clk_mgr, context);
+ /* There is a check inside dcn20_update_clocks_update_dpp_dto which ensures
+ * that we do not lower dto when it is not safe to lower. We do not need to
+ * compare the current and new dppclk before calling this function.
+ */
+ dcn32_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower);
+ }
+ }
+
+ if (update_dispclk && dmcu && dmcu->funcs->is_dmcu_initialized(dmcu))
+ /*update dmcu for wait_loop count*/
+ dmcu->funcs->set_psr_wait_loop(dmcu,
+ clk_mgr_base->clks.dispclk_khz / 1000 / 7);
+
+ if (dc->config.enable_auto_dpm_test_logs) {
+ dcn32_auto_dpm_test_log(new_clocks, clk_mgr, context);
+ }
+}
+
+static uint32_t dcn32_get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr)
+{
+ struct fixed31_32 pll_req;
+ uint32_t pll_req_reg = 0;
+
+ /* get FbMult value */
+ if (ASICREV_IS_GC_11_0_2(clk_mgr->base.ctx->asic_id.hw_internal_rev))
+ pll_req_reg = REG_READ(CLK0_CLK_PLL_REQ);
+ else
+ pll_req_reg = REG_READ(CLK1_CLK_PLL_REQ);
+
+ /* set up a fixed-point number
+ * this works because the int part is on the right edge of the register
+ * and the frac part is on the left edge
+ */
+ pll_req = dc_fixpt_from_int(pll_req_reg & clk_mgr->clk_mgr_mask->FbMult_int);
+ pll_req.value |= pll_req_reg & clk_mgr->clk_mgr_mask->FbMult_frac;
+
+ /* multiply by REFCLK period */
+ pll_req = dc_fixpt_mul_int(pll_req, clk_mgr->dfs_ref_freq_khz);
+
+ return dc_fixpt_floor(pll_req);
+}
+
+static void dcn32_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
+ struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ uint32_t dprefclk_did = 0;
+ uint32_t dcfclk_did = 0;
+ uint32_t dtbclk_did = 0;
+ uint32_t dispclk_did = 0;
+ uint32_t dppclk_did = 0;
+ uint32_t target_div = 0;
+
+ if (ASICREV_IS_GC_11_0_2(clk_mgr->base.ctx->asic_id.hw_internal_rev)) {
+ /* DFS Slice 0 is used for DISPCLK */
+ dispclk_did = REG_READ(CLK0_CLK0_DFS_CNTL);
+ /* DFS Slice 1 is used for DPPCLK */
+ dppclk_did = REG_READ(CLK0_CLK1_DFS_CNTL);
+ /* DFS Slice 2 is used for DPREFCLK */
+ dprefclk_did = REG_READ(CLK0_CLK2_DFS_CNTL);
+ /* DFS Slice 3 is used for DCFCLK */
+ dcfclk_did = REG_READ(CLK0_CLK3_DFS_CNTL);
+ /* DFS Slice 4 is used for DTBCLK */
+ dtbclk_did = REG_READ(CLK0_CLK4_DFS_CNTL);
+ } else {
+ /* DFS Slice 0 is used for DISPCLK */
+ dispclk_did = REG_READ(CLK1_CLK0_DFS_CNTL);
+ /* DFS Slice 1 is used for DPPCLK */
+ dppclk_did = REG_READ(CLK1_CLK1_DFS_CNTL);
+ /* DFS Slice 2 is used for DPREFCLK */
+ dprefclk_did = REG_READ(CLK1_CLK2_DFS_CNTL);
+ /* DFS Slice 3 is used for DCFCLK */
+ dcfclk_did = REG_READ(CLK1_CLK3_DFS_CNTL);
+ /* DFS Slice 4 is used for DTBCLK */
+ dtbclk_did = REG_READ(CLK1_CLK4_DFS_CNTL);
+ }
+
+ /* Convert DISPCLK DFS Slice DID to divider*/
+ target_div = dentist_get_divider_from_did(dispclk_did);
+ //Get dispclk in khz
+ regs_and_bypass->dispclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+ * clk_mgr->base.dentist_vco_freq_khz) / target_div;
+
+ /* Convert DISPCLK DFS Slice DID to divider*/
+ target_div = dentist_get_divider_from_did(dppclk_did);
+ //Get dppclk in khz
+ regs_and_bypass->dppclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+ * clk_mgr->base.dentist_vco_freq_khz) / target_div;
+
+ /* Convert DPREFCLK DFS Slice DID to divider*/
+ target_div = dentist_get_divider_from_did(dprefclk_did);
+ //Get dprefclk in khz
+ regs_and_bypass->dprefclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+ * clk_mgr->base.dentist_vco_freq_khz) / target_div;
+
+ /* Convert DCFCLK DFS Slice DID to divider*/
+ target_div = dentist_get_divider_from_did(dcfclk_did);
+ //Get dcfclk in khz
+ regs_and_bypass->dcfclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+ * clk_mgr->base.dentist_vco_freq_khz) / target_div;
+
+ /* Convert DTBCLK DFS Slice DID to divider*/
+ target_div = dentist_get_divider_from_did(dtbclk_did);
+ //Get dtbclk in khz
+ regs_and_bypass->dtbclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+ * clk_mgr->base.dentist_vco_freq_khz) / target_div;
+}
+
+static void dcn32_clock_read_ss_info(struct clk_mgr_internal *clk_mgr)
+{
+ struct dc_bios *bp = clk_mgr->base.ctx->dc_bios;
+ int ss_info_num = bp->funcs->get_ss_entry_number(
+ bp, AS_SIGNAL_TYPE_GPU_PLL);
+
+ if (ss_info_num) {
+ struct spread_spectrum_info info = { { 0 } };
+ enum bp_result result = bp->funcs->get_spread_spectrum_info(
+ bp, AS_SIGNAL_TYPE_GPU_PLL, 0, &info);
+
+ /* SSInfo.spreadSpectrumPercentage !=0 would be sign
+ * that SS is enabled
+ */
+ if (result == BP_RESULT_OK &&
+ info.spread_spectrum_percentage != 0) {
+ clk_mgr->ss_on_dprefclk = true;
+ clk_mgr->dprefclk_ss_divider = info.spread_percentage_divider;
+
+ if (info.type.CENTER_MODE == 0) {
+ /* Currently for DP Reference clock we
+ * need only SS percentage for
+ * downspread
+ */
+ clk_mgr->dprefclk_ss_percentage =
+ info.spread_spectrum_percentage;
+ }
+ }
+ }
+}
+static void dcn32_notify_wm_ranges(struct clk_mgr *clk_mgr_base)
+{
+ unsigned int i;
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ WatermarksExternal_t *table = (WatermarksExternal_t *) clk_mgr->wm_range_table;
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ if (!table)
+ return;
+
+ memset(table, 0, sizeof(*table));
+
+ /* collect valid ranges, place in pmfw table */
+ for (i = 0; i < WM_SET_COUNT; i++)
+ if (clk_mgr->base.bw_params->wm_table.nv_entries[i].valid) {
+ table->Watermarks.WatermarkRow[i].WmSetting = i;
+ table->Watermarks.WatermarkRow[i].Flags = clk_mgr->base.bw_params->wm_table.nv_entries[i].pmfw_breakdown.wm_type;
+ }
+ dcn30_smu_set_dram_addr_high(clk_mgr, clk_mgr->wm_range_table_addr >> 32);
+ dcn30_smu_set_dram_addr_low(clk_mgr, clk_mgr->wm_range_table_addr & 0xFFFFFFFF);
+ dcn32_smu_transfer_wm_table_dram_2_smu(clk_mgr);
+}
+
+/* Set min memclk to minimum, either constrained by the current mode or DPM0 */
+static void dcn32_set_hard_min_memclk(struct clk_mgr *clk_mgr_base, bool current_mode)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ if (current_mode) {
+ if (clk_mgr_base->clks.p_state_change_support)
+ dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK,
+ khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz));
+ else
+ dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK,
+ clk_mgr_base->bw_params->max_memclk_mhz);
+ } else {
+ dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK,
+ clk_mgr_base->bw_params->clk_table.entries[0].memclk_mhz);
+ }
+}
+
+/* Set max memclk to highest DPM value */
+static void dcn32_set_hard_max_memclk(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn30_smu_set_hard_max_by_freq(clk_mgr, PPCLK_UCLK, clk_mgr_base->bw_params->max_memclk_mhz);
+}
+
+/* Get current memclk states, update bounding box */
+static void dcn32_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct clk_limit_num_entries *num_entries_per_clk = &clk_mgr_base->bw_params->clk_table.num_entries_per_clk;
+ unsigned int num_levels;
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ /* Refresh memclk and fclk states */
+ dcn32_init_single_clock(clk_mgr, PPCLK_UCLK,
+ &clk_mgr_base->bw_params->clk_table.entries[0].memclk_mhz,
+ &num_entries_per_clk->num_memclk_levels);
+ clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_UCLK);
+ clk_mgr_base->bw_params->dc_mode_softmax_memclk = clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz;
+
+ /* memclk must have at least one level */
+ num_entries_per_clk->num_memclk_levels = num_entries_per_clk->num_memclk_levels ? num_entries_per_clk->num_memclk_levels : 1;
+
+ dcn32_init_single_clock(clk_mgr, PPCLK_FCLK,
+ &clk_mgr_base->bw_params->clk_table.entries[0].fclk_mhz,
+ &num_entries_per_clk->num_fclk_levels);
+ clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_FCLK);
+
+ num_levels = max(num_entries_per_clk->num_memclk_levels, num_entries_per_clk->num_fclk_levels);
+
+ clk_mgr_base->bw_params->max_memclk_mhz =
+ clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_memclk_levels - 1].memclk_mhz;
+ clk_mgr_base->bw_params->clk_table.num_entries = num_levels ? num_levels : 1;
+
+ if (clk_mgr->dpm_present && !num_levels)
+ clk_mgr->dpm_present = false;
+
+ if (!clk_mgr->dpm_present)
+ dcn32_patch_dpm_table(clk_mgr_base->bw_params);
+
+ DC_FP_START();
+ /* Refresh bounding box */
+ clk_mgr_base->ctx->dc->res_pool->funcs->update_bw_bounding_box(
+ clk_mgr->base.ctx->dc, clk_mgr_base->bw_params);
+ DC_FP_END();
+}
+
+static bool dcn32_are_clock_states_equal(struct dc_clocks *a,
+ struct dc_clocks *b)
+{
+ if (a->dispclk_khz != b->dispclk_khz)
+ return false;
+ else if (a->dppclk_khz != b->dppclk_khz)
+ return false;
+ else if (a->dcfclk_khz != b->dcfclk_khz)
+ return false;
+ else if (a->dcfclk_deep_sleep_khz != b->dcfclk_deep_sleep_khz)
+ return false;
+ else if (a->dramclk_khz != b->dramclk_khz)
+ return false;
+ else if (a->p_state_change_support != b->p_state_change_support)
+ return false;
+ else if (a->fclk_p_state_change_support != b->fclk_p_state_change_support)
+ return false;
+
+ return true;
+}
+
+static void dcn32_enable_pme_wa(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn32_smu_set_pme_workaround(clk_mgr);
+}
+
+static bool dcn32_is_smu_present(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ return clk_mgr->smu_present;
+}
+
+static void dcn32_set_max_memclk(struct clk_mgr *clk_mgr_base, unsigned int memclk_mhz)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn30_smu_set_hard_max_by_freq(clk_mgr, PPCLK_UCLK, memclk_mhz);
+}
+
+static void dcn32_set_min_memclk(struct clk_mgr *clk_mgr_base, unsigned int memclk_mhz)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, memclk_mhz);
+}
+
+static struct clk_mgr_funcs dcn32_funcs = {
+ .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
+ .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz,
+ .update_clocks = dcn32_update_clocks,
+ .dump_clk_registers = dcn32_dump_clk_registers,
+ .init_clocks = dcn32_init_clocks,
+ .notify_wm_ranges = dcn32_notify_wm_ranges,
+ .set_hard_min_memclk = dcn32_set_hard_min_memclk,
+ .set_hard_max_memclk = dcn32_set_hard_max_memclk,
+ .set_max_memclk = dcn32_set_max_memclk,
+ .set_min_memclk = dcn32_set_min_memclk,
+ .get_memclk_states_from_smu = dcn32_get_memclk_states_from_smu,
+ .are_clock_states_equal = dcn32_are_clock_states_equal,
+ .enable_pme_wa = dcn32_enable_pme_wa,
+ .is_smu_present = dcn32_is_smu_present,
+ .get_dispclk_from_dentist = dcn32_get_dispclk_from_dentist,
+};
+
+void dcn32_clk_mgr_construct(
+ struct dc_context *ctx,
+ struct clk_mgr_internal *clk_mgr,
+ struct pp_smu_funcs *pp_smu,
+ struct dccg *dccg)
+{
+ struct clk_log_info log_info = {0};
+
+ clk_mgr->base.ctx = ctx;
+ clk_mgr->base.funcs = &dcn32_funcs;
+ if (ASICREV_IS_GC_11_0_2(clk_mgr->base.ctx->asic_id.hw_internal_rev)) {
+ clk_mgr->regs = &clk_mgr_regs_dcn321;
+ clk_mgr->clk_mgr_shift = &clk_mgr_shift_dcn321;
+ clk_mgr->clk_mgr_mask = &clk_mgr_mask_dcn321;
+ } else {
+ clk_mgr->regs = &clk_mgr_regs_dcn32;
+ clk_mgr->clk_mgr_shift = &clk_mgr_shift_dcn32;
+ clk_mgr->clk_mgr_mask = &clk_mgr_mask_dcn32;
+ }
+
+ clk_mgr->dccg = dccg;
+ clk_mgr->dfs_bypass_disp_clk = 0;
+
+ clk_mgr->dprefclk_ss_percentage = 0;
+ clk_mgr->dprefclk_ss_divider = 1000;
+ clk_mgr->ss_on_dprefclk = false;
+ clk_mgr->dfs_ref_freq_khz = 100000;
+
+ /* Changed from DCN3.2_clock_frequency doc to match
+ * dcn32_dump_clk_registers from 4 * dentist_vco_freq_khz /
+ * dprefclk DID divider
+ */
+ clk_mgr->base.dprefclk_khz = 716666;
+ if (ctx->dc->debug.disable_dtb_ref_clk_switch) {
+ //initialize DTB ref clock value if DPM disabled
+ if (ctx->dce_version == DCN_VERSION_3_21)
+ clk_mgr->base.clks.ref_dtbclk_khz = 477800;
+ else
+ clk_mgr->base.clks.ref_dtbclk_khz = 268750;
+ }
+
+
+ /* integer part is now VCO frequency in kHz */
+ clk_mgr->base.dentist_vco_freq_khz = dcn32_get_vco_frequency_from_reg(clk_mgr);
+
+ /* in case we don't get a value from the register, use default */
+ if (clk_mgr->base.dentist_vco_freq_khz == 0)
+ clk_mgr->base.dentist_vco_freq_khz = 4300000; /* Updated as per HW docs */
+
+ dcn32_dump_clk_registers(&clk_mgr->base.boot_snapshot, &clk_mgr->base, &log_info);
+
+ if (ctx->dc->debug.disable_dtb_ref_clk_switch &&
+ clk_mgr->base.clks.ref_dtbclk_khz != clk_mgr->base.boot_snapshot.dtbclk) {
+ clk_mgr->base.clks.ref_dtbclk_khz = clk_mgr->base.boot_snapshot.dtbclk;
+ }
+
+ if (clk_mgr->base.boot_snapshot.dprefclk != 0) {
+ clk_mgr->base.dprefclk_khz = clk_mgr->base.boot_snapshot.dprefclk;
+ }
+ dcn32_clock_read_ss_info(clk_mgr);
+
+ clk_mgr->dfs_bypass_enabled = false;
+
+ clk_mgr->smu_present = false;
+
+ clk_mgr->base.bw_params = kzalloc(sizeof(*clk_mgr->base.bw_params), GFP_KERNEL);
+ if (!clk_mgr->base.bw_params) {
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+ /* need physical address of table to give to PMFW */
+ clk_mgr->wm_range_table = dm_helpers_allocate_gpu_mem(clk_mgr->base.ctx,
+ DC_MEM_ALLOC_TYPE_GART, sizeof(WatermarksExternal_t),
+ &clk_mgr->wm_range_table_addr);
+ if (!clk_mgr->wm_range_table) {
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+void dcn32_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr)
+{
+ kfree(clk_mgr->base.bw_params);
+
+ if (clk_mgr->wm_range_table)
+ dm_helpers_free_gpu_mem(clk_mgr->base.ctx, DC_MEM_ALLOC_TYPE_GART,
+ clk_mgr->wm_range_table);
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.h
new file mode 100644
index 000000000000..186daada7b03
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#ifndef __DCN32_CLK_MGR_H_
+#define __DCN32_CLK_MGR_H_
+
+void dcn32_init_clocks(struct clk_mgr *clk_mgr_base);
+
+void dcn32_clk_mgr_construct(struct dc_context *ctx,
+ struct clk_mgr_internal *clk_mgr,
+ struct pp_smu_funcs *pp_smu,
+ struct dccg *dccg);
+
+void dcn32_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr,
+ struct dc_state *context, bool safe_to_lower);
+
+void dcn32_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr);
+
+
+
+#endif /* __DCN32_CLK_MGR_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c
new file mode 100644
index 000000000000..5d80fdf63ffc
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c
@@ -0,0 +1,308 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dcn32_clk_mgr_smu_msg.h"
+
+#include "clk_mgr_internal.h"
+#include "reg_helper.h"
+#include "dalsmc.h"
+#include "dcn32_smu13_driver_if.h"
+
+#define mmDAL_MSG_REG 0x1628A
+#define mmDAL_ARG_REG 0x16273
+#define mmDAL_RESP_REG 0x16274
+
+#define REG(reg_name) \
+ mm ## reg_name
+
+#include "logger_types.h"
+
+#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
+
+
+/*
+ * Function to be used instead of REG_WAIT macro because the wait ends when
+ * the register is NOT EQUAL to zero, and because the translation in msg_if.h
+ * won't work with REG_WAIT.
+ */
+static uint32_t dcn32_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, unsigned int delay_us, unsigned int max_retries)
+{
+ const uint32_t initial_max_retries = max_retries;
+ uint32_t reg = 0;
+
+ do {
+ reg = REG_READ(DAL_RESP_REG);
+ if (reg)
+ break;
+
+ if (delay_us >= 1000)
+ msleep(delay_us/1000);
+ else if (delay_us > 0)
+ udelay(delay_us);
+ } while (max_retries--);
+
+ TRACE_SMU_MSG_DELAY(0, 0, delay_us * (initial_max_retries - max_retries), clk_mgr->base.ctx);
+
+
+ return reg;
+}
+
+static bool dcn32_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, uint32_t msg_id, uint32_t param_in, uint32_t *param_out)
+{
+ /* Wait for response register to be ready */
+ dcn32_smu_wait_for_response(clk_mgr, 10, 200000);
+
+ /* Clear response register */
+ REG_WRITE(DAL_RESP_REG, 0);
+
+ /* Set the parameter register for the SMU message */
+ REG_WRITE(DAL_ARG_REG, param_in);
+
+ /* Trigger the message transaction by writing the message ID */
+ REG_WRITE(DAL_MSG_REG, msg_id);
+
+ TRACE_SMU_MSG(msg_id, param_in, clk_mgr->base.ctx);
+
+ /* Wait for response */
+ if (dcn32_smu_wait_for_response(clk_mgr, 10, 200000) == DALSMC_Result_OK) {
+ if (param_out)
+ *param_out = REG_READ(DAL_ARG_REG);
+
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Use these functions to return back delay information so we can aggregate the total
+ * delay when requesting hardmin clk
+ *
+ * dcn32_smu_wait_for_response_delay
+ * dcn32_smu_send_msg_with_param_delay
+ *
+ */
+static uint32_t dcn32_smu_wait_for_response_delay(struct clk_mgr_internal *clk_mgr, unsigned int delay_us, unsigned int max_retries, unsigned int *total_delay_us)
+{
+ uint32_t reg = 0;
+ *total_delay_us = 0;
+
+ do {
+ reg = REG_READ(DAL_RESP_REG);
+ if (reg)
+ break;
+
+ if (delay_us >= 1000)
+ msleep(delay_us/1000);
+ else if (delay_us > 0)
+ udelay(delay_us);
+ *total_delay_us += delay_us;
+ } while (max_retries--);
+
+ TRACE_SMU_MSG_DELAY(0, 0, *total_delay_us, clk_mgr->base.ctx);
+
+ return reg;
+}
+
+static bool dcn32_smu_send_msg_with_param_delay(struct clk_mgr_internal *clk_mgr, uint32_t msg_id, uint32_t param_in, uint32_t *param_out, unsigned int *total_delay_us)
+{
+ unsigned int delay1_us, delay2_us;
+ *total_delay_us = 0;
+
+ /* Wait for response register to be ready */
+ dcn32_smu_wait_for_response_delay(clk_mgr, 10, 200000, &delay1_us);
+
+ /* Clear response register */
+ REG_WRITE(DAL_RESP_REG, 0);
+
+ /* Set the parameter register for the SMU message */
+ REG_WRITE(DAL_ARG_REG, param_in);
+
+ /* Trigger the message transaction by writing the message ID */
+ REG_WRITE(DAL_MSG_REG, msg_id);
+
+ TRACE_SMU_MSG(msg_id, param_in, clk_mgr->base.ctx);
+
+ /* Wait for response */
+ if (dcn32_smu_wait_for_response_delay(clk_mgr, 10, 200000, &delay2_us) == DALSMC_Result_OK) {
+ if (param_out)
+ *param_out = REG_READ(DAL_ARG_REG);
+
+ *total_delay_us = delay1_us + delay2_us;
+ return true;
+ }
+
+ *total_delay_us = delay1_us + 2000000;
+ return false;
+}
+
+void dcn32_smu_send_fclk_pstate_message(struct clk_mgr_internal *clk_mgr, bool enable)
+{
+ smu_print("FCLK P-state support value is : %d\n", enable);
+
+ dcn32_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_SetFclkSwitchAllow, enable ? FCLK_PSTATE_SUPPORTED : FCLK_PSTATE_NOTSUPPORTED, NULL);
+}
+
+void dcn32_smu_send_cab_for_uclk_message(struct clk_mgr_internal *clk_mgr, unsigned int num_ways)
+{
+ uint32_t param = (num_ways << 1) | (num_ways > 0);
+
+ dcn32_smu_send_msg_with_param(clk_mgr, DALSMC_MSG_SetCabForUclkPstate, param, NULL);
+ smu_print("Numways for SubVP : %d\n", num_ways);
+}
+
+void dcn32_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr)
+{
+ smu_print("SMU Transfer WM table DRAM 2 SMU\n");
+
+ dcn32_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_TransferTableDram2Smu, TABLE_WATERMARKS, NULL);
+}
+
+void dcn32_smu_set_pme_workaround(struct clk_mgr_internal *clk_mgr)
+{
+ smu_print("SMU Set PME workaround\n");
+
+ dcn32_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_BacoAudioD3PME, 0, NULL);
+}
+
+/* Check PMFW version if it supports ReturnHardMinStatus message */
+static bool dcn32_get_hard_min_status_supported(struct clk_mgr_internal *clk_mgr)
+{
+ if (ASICREV_IS_GC_11_0_0(clk_mgr->base.ctx->asic_id.hw_internal_rev)) {
+ if (clk_mgr->smu_ver >= 0x4e6a00)
+ return true;
+ } else if (ASICREV_IS_GC_11_0_2(clk_mgr->base.ctx->asic_id.hw_internal_rev)) {
+ if (clk_mgr->smu_ver >= 0x524e00)
+ return true;
+ } else { /* ASICREV_IS_GC_11_0_3 */
+ if (clk_mgr->smu_ver >= 0x503900)
+ return true;
+ }
+ return false;
+}
+
+/* Returns the clocks which were fulfilled by the DAL hard min arbiter in PMFW */
+static unsigned int dcn32_smu_get_hard_min_status(struct clk_mgr_internal *clk_mgr, bool *no_timeout, unsigned int *total_delay_us)
+{
+ uint32_t response = 0;
+
+ /* bits 23:16 for clock type, lower 16 bits for frequency in MHz */
+ uint32_t param = 0;
+
+ *no_timeout = dcn32_smu_send_msg_with_param_delay(clk_mgr,
+ DALSMC_MSG_ReturnHardMinStatus, param, &response, total_delay_us);
+
+ smu_print("SMU Get hard min status: no_timeout %d delay %d us clk bits %x\n",
+ *no_timeout, *total_delay_us, response);
+
+ return response;
+}
+
+static bool dcn32_smu_wait_get_hard_min_status(struct clk_mgr_internal *clk_mgr,
+ uint32_t clk)
+{
+ int readDalHardMinClkBits, checkDalHardMinClkBits;
+ unsigned int total_delay_us, read_total_delay_us;
+ bool no_timeout, hard_min_done;
+
+ static unsigned int cur_wait_get_hard_min_max_us;
+ static unsigned int cur_wait_get_hard_min_max_timeouts;
+
+ checkDalHardMinClkBits = CHECK_HARD_MIN_CLK_DPREFCLK;
+ if (clk == PPCLK_DISPCLK)
+ checkDalHardMinClkBits |= CHECK_HARD_MIN_CLK_DISPCLK;
+ if (clk == PPCLK_DPPCLK)
+ checkDalHardMinClkBits |= CHECK_HARD_MIN_CLK_DPPCLK;
+ if (clk == PPCLK_DCFCLK)
+ checkDalHardMinClkBits |= CHECK_HARD_MIN_CLK_DCFCLK;
+ if (clk == PPCLK_DTBCLK)
+ checkDalHardMinClkBits |= CHECK_HARD_MIN_CLK_DTBCLK;
+ if (clk == PPCLK_UCLK)
+ checkDalHardMinClkBits |= CHECK_HARD_MIN_CLK_UCLK;
+
+ if (checkDalHardMinClkBits == CHECK_HARD_MIN_CLK_DPREFCLK)
+ return 0;
+
+ total_delay_us = 0;
+ hard_min_done = false;
+ while (1) {
+ readDalHardMinClkBits = dcn32_smu_get_hard_min_status(clk_mgr, &no_timeout, &read_total_delay_us);
+ total_delay_us += read_total_delay_us;
+ if (checkDalHardMinClkBits == (readDalHardMinClkBits & checkDalHardMinClkBits)) {
+ hard_min_done = true;
+ break;
+ }
+
+
+ if (total_delay_us >= 2000000) {
+ cur_wait_get_hard_min_max_timeouts++;
+ smu_print("SMU Wait get hard min status: %d timeouts\n", cur_wait_get_hard_min_max_timeouts);
+ break;
+ }
+ msleep(1);
+ total_delay_us += 1000;
+ }
+
+ if (total_delay_us > cur_wait_get_hard_min_max_us)
+ cur_wait_get_hard_min_max_us = total_delay_us;
+
+ smu_print("SMU Wait get hard min status: no_timeout %d, delay %d us, max %d us, read %x, check %x\n",
+ no_timeout, total_delay_us, cur_wait_get_hard_min_max_us, readDalHardMinClkBits, checkDalHardMinClkBits);
+
+ return hard_min_done;
+}
+
+/* Returns the actual frequency that was set in MHz, 0 on failure */
+unsigned int dcn32_smu_set_hard_min_by_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint16_t freq_mhz)
+{
+ uint32_t response = 0;
+ bool hard_min_done = false;
+
+ /* bits 23:16 for clock type, lower 16 bits for frequency in MHz */
+ uint32_t param = (clk << 16) | freq_mhz;
+
+ smu_print("SMU Set hard min by freq: clk = %d, freq_mhz = %d MHz\n", clk, freq_mhz);
+
+ dcn32_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_SetHardMinByFreq, param, &response);
+
+ if (dcn32_get_hard_min_status_supported(clk_mgr)) {
+ hard_min_done = dcn32_smu_wait_get_hard_min_status(clk_mgr, clk);
+ smu_print("SMU Frequency set = %d KHz hard_min_done %d\n", response, hard_min_done);
+ } else
+ smu_print("SMU Frequency set = %d KHz\n", response);
+
+ return response;
+}
+
+void dcn32_smu_wait_for_dmub_ack_mclk(struct clk_mgr_internal *clk_mgr, bool enable)
+{
+ smu_print("PMFW to wait for DMCUB ack for MCLK : %d\n", enable);
+
+ dcn32_smu_send_msg_with_param(clk_mgr, 0x14, enable ? 1 : 0, NULL);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.h
new file mode 100644
index 000000000000..5c44ab0e8667
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN32_CLK_MGR_SMU_MSG_H_
+#define __DCN32_CLK_MGR_SMU_MSG_H_
+
+#include "core_types.h"
+#include "dcn30/dcn30_clk_mgr_smu_msg.h"
+
+#define FCLK_PSTATE_NOTSUPPORTED 0x00
+#define FCLK_PSTATE_SUPPORTED 0x01
+
+/* TODO Remove this MSG ID define after it becomes available in dalsmc */
+#define DALSMC_MSG_SetCabForUclkPstate 0x12
+#define DALSMC_Result_OK 0x1
+
+void dcn32_smu_send_fclk_pstate_message(struct clk_mgr_internal *clk_mgr, bool enable);
+void dcn32_smu_send_cab_for_uclk_message(struct clk_mgr_internal *clk_mgr, unsigned int num_ways);
+void dcn32_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr);
+void dcn32_smu_set_pme_workaround(struct clk_mgr_internal *clk_mgr);
+unsigned int dcn32_smu_set_hard_min_by_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint16_t freq_mhz);
+void dcn32_smu_wait_for_dmub_ack_mclk(struct clk_mgr_internal *clk_mgr, bool enable);
+
+#endif /* __DCN32_CLK_MGR_SMU_MSG_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_smu13_driver_if.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_smu13_driver_if.h
new file mode 100644
index 000000000000..8d54865bbd5d
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_smu13_driver_if.h
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: MIT
+/* Copyright © 2022-2024 Advanced Micro Devices, Inc. All rights reserved. */
+
+#define SMU13_DRIVER_IF_VERSION 0x18
+
+//Only Clks that have DPM descriptors are listed here
+typedef enum {
+ PPCLK_GFXCLK = 0,
+ PPCLK_SOCCLK,
+ PPCLK_UCLK,
+ PPCLK_FCLK,
+ PPCLK_DCLK_0,
+ PPCLK_VCLK_0,
+ PPCLK_DCLK_1,
+ PPCLK_VCLK_1,
+ PPCLK_DISPCLK,
+ PPCLK_DPPCLK,
+ PPCLK_DPREFCLK,
+ PPCLK_DCFCLK,
+ PPCLK_DTBCLK,
+ PPCLK_COUNT,
+} PPCLK_e;
+
+typedef struct {
+ uint8_t WmSetting;
+ uint8_t Flags;
+ uint8_t Padding[2];
+
+} WatermarkRowGeneric_t;
+
+#define NUM_WM_RANGES 4
+
+typedef enum {
+ WATERMARKS_CLOCK_RANGE = 0,
+ WATERMARKS_DUMMY_PSTATE,
+ WATERMARKS_MALL,
+ WATERMARKS_COUNT,
+} WATERMARKS_FLAGS_e;
+
+typedef struct {
+ // Watermarks
+ WatermarkRowGeneric_t WatermarkRow[NUM_WM_RANGES];
+} Watermarks_t;
+
+typedef struct {
+ Watermarks_t Watermarks;
+ uint32_t Spare[16];
+
+ uint32_t MmHubPadding[8]; // SMU internal use
+} WatermarksExternal_t;
+
+// Table types
+#define TABLE_PMFW_PPTABLE 0
+#define TABLE_COMBO_PPTABLE 1
+#define TABLE_WATERMARKS 2
+#define TABLE_AVFS_PSM_DEBUG 3
+#define TABLE_PMSTATUSLOG 4
+#define TABLE_SMU_METRICS 5
+#define TABLE_DRIVER_SMU_CONFIG 6
+#define TABLE_ACTIVITY_MONITOR_COEFF 7
+#define TABLE_OVERDRIVE 8
+#define TABLE_I2C_COMMANDS 9
+#define TABLE_DRIVER_INFO 10
+#define TABLE_COUNT 11
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c
new file mode 100644
index 000000000000..4607eff07253
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "core_types.h"
+#include "dcn35_clk_mgr.h"
+
+#define DCN_BASE__INST0_SEG1 0x000000C0
+#define mmCLK1_CLK_PLL_REQ 0x16E37
+
+#define mmCLK1_CLK0_DFS_CNTL 0x16E69
+#define mmCLK1_CLK1_DFS_CNTL 0x16E6C
+#define mmCLK1_CLK2_DFS_CNTL 0x16E6F
+#define mmCLK1_CLK3_DFS_CNTL 0x16E72
+#define mmCLK1_CLK4_DFS_CNTL 0x16E75
+#define mmCLK1_CLK5_DFS_CNTL 0x16E78
+
+#define mmCLK1_CLK0_CURRENT_CNT 0x16EFC
+#define mmCLK1_CLK1_CURRENT_CNT 0x16EFD
+#define mmCLK1_CLK2_CURRENT_CNT 0x16EFE
+#define mmCLK1_CLK3_CURRENT_CNT 0x16EFF
+#define mmCLK1_CLK4_CURRENT_CNT 0x16F00
+#define mmCLK1_CLK5_CURRENT_CNT 0x16F01
+
+#define mmCLK1_CLK0_BYPASS_CNTL 0x16E8A
+#define mmCLK1_CLK1_BYPASS_CNTL 0x16E93
+#define mmCLK1_CLK2_BYPASS_CNTL 0x16E9C
+#define mmCLK1_CLK3_BYPASS_CNTL 0x16EA5
+#define mmCLK1_CLK4_BYPASS_CNTL 0x16EAE
+#define mmCLK1_CLK5_BYPASS_CNTL 0x16EB7
+
+#define mmCLK1_CLK0_DS_CNTL 0x16E83
+#define mmCLK1_CLK1_DS_CNTL 0x16E8C
+#define mmCLK1_CLK2_DS_CNTL 0x16E95
+#define mmCLK1_CLK3_DS_CNTL 0x16E9E
+#define mmCLK1_CLK4_DS_CNTL 0x16EA7
+#define mmCLK1_CLK5_DS_CNTL 0x16EB0
+
+#define mmCLK1_CLK0_ALLOW_DS 0x16E84
+#define mmCLK1_CLK1_ALLOW_DS 0x16E8D
+#define mmCLK1_CLK2_ALLOW_DS 0x16E96
+#define mmCLK1_CLK3_ALLOW_DS 0x16E9F
+#define mmCLK1_CLK4_ALLOW_DS 0x16EA8
+#define mmCLK1_CLK5_ALLOW_DS 0x16EB1
+
+#define mmCLK5_spll_field_8 0x1B04B
+#define mmCLK6_spll_field_8 0x1B24B
+#define mmDENTIST_DISPCLK_CNTL 0x0124
+#define regDENTIST_DISPCLK_CNTL 0x0064
+#define regDENTIST_DISPCLK_CNTL_BASE_IDX 1
+
+#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0
+#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc
+#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10
+#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL
+#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L
+#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L
+
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L
+
+// DENTIST_DISPCLK_CNTL
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER__SHIFT 0x0
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER__SHIFT 0x8
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE__SHIFT 0x13
+#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE__SHIFT 0x14
+#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER__SHIFT 0x18
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER_MASK 0x0000007FL
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER_MASK 0x00007F00L
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE_MASK 0x00080000L
+#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE_MASK 0x00100000L
+#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER_MASK 0x7F000000L
+
+#define CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L
+
+#define REG(reg) \
+ (clk_mgr->regs->reg)
+
+#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
+
+#define BASE(seg) BASE_INNER(seg)
+
+#define SR(reg_name)\
+ .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \
+ reg ## reg_name
+
+#define CLK_SR_DCN35(reg_name)\
+ .reg_name = mm ## reg_name
+
+static const struct clk_mgr_registers clk_mgr_regs_dcn351 = {
+ CLK_REG_LIST_DCN35()
+};
+
+static const struct clk_mgr_shift clk_mgr_shift_dcn351 = {
+ CLK_COMMON_MASK_SH_LIST_DCN32(__SHIFT)
+};
+
+static const struct clk_mgr_mask clk_mgr_mask_dcn351 = {
+ CLK_COMMON_MASK_SH_LIST_DCN32(_MASK)
+};
+
+#define TO_CLK_MGR_DCN35(clk_mgr)\
+ container_of(clk_mgr, struct clk_mgr_dcn35, base)
+
+
+void dcn351_clk_mgr_construct(
+ struct dc_context *ctx,
+ struct clk_mgr_dcn35 *clk_mgr,
+ struct pp_smu_funcs *pp_smu,
+ struct dccg *dccg)
+{
+ /*register offset changed*/
+ clk_mgr->base.regs = &clk_mgr_regs_dcn351;
+ clk_mgr->base.clk_mgr_shift = &clk_mgr_shift_dcn351;
+ clk_mgr->base.clk_mgr_mask = &clk_mgr_mask_dcn351;
+
+ dcn35_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
+
+}
+
+
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
new file mode 100644
index 000000000000..dfd0c9505af0
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -0,0 +1,1588 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+
+#include "dcn35_clk_mgr.h"
+
+#include "dccg.h"
+#include "clk_mgr_internal.h"
+
+// For dce12_get_dp_ref_freq_khz
+#include "dce100/dce_clk_mgr.h"
+
+// For dcn20_update_clocks_update_dpp_dto
+#include "dcn20/dcn20_clk_mgr.h"
+
+
+#include "reg_helper.h"
+#include "core_types.h"
+#include "dcn35_smu.h"
+#include "dm_helpers.h"
+
+#include "dcn31/dcn31_clk_mgr.h"
+
+#include "dc_dmub_srv.h"
+#include "link_service.h"
+#include "logger_types.h"
+
+#undef DC_LOGGER
+#define DC_LOGGER \
+ clk_mgr->base.base.ctx->logger
+
+#define DCN_BASE__INST0_SEG1 0x000000C0
+#define mmCLK1_CLK_PLL_REQ 0x16E37
+
+#define mmCLK1_CLK0_DFS_CNTL 0x16E69
+#define mmCLK1_CLK1_DFS_CNTL 0x16E6C
+#define mmCLK1_CLK2_DFS_CNTL 0x16E6F
+#define mmCLK1_CLK3_DFS_CNTL 0x16E72
+#define mmCLK1_CLK4_DFS_CNTL 0x16E75
+#define mmCLK1_CLK5_DFS_CNTL 0x16E78
+
+#define mmCLK1_CLK0_CURRENT_CNT 0x16EFB
+#define mmCLK1_CLK1_CURRENT_CNT 0x16EFC
+#define mmCLK1_CLK2_CURRENT_CNT 0x16EFD
+#define mmCLK1_CLK3_CURRENT_CNT 0x16EFE
+#define mmCLK1_CLK4_CURRENT_CNT 0x16EFF
+#define mmCLK1_CLK5_CURRENT_CNT 0x16F00
+
+#define mmCLK1_CLK0_BYPASS_CNTL 0x16E8A
+#define mmCLK1_CLK1_BYPASS_CNTL 0x16E93
+#define mmCLK1_CLK2_BYPASS_CNTL 0x16E9C
+#define mmCLK1_CLK3_BYPASS_CNTL 0x16EA5
+#define mmCLK1_CLK4_BYPASS_CNTL 0x16EAE
+#define mmCLK1_CLK5_BYPASS_CNTL 0x16EB7
+
+#define mmCLK1_CLK0_DS_CNTL 0x16E83
+#define mmCLK1_CLK1_DS_CNTL 0x16E8C
+#define mmCLK1_CLK2_DS_CNTL 0x16E95
+#define mmCLK1_CLK3_DS_CNTL 0x16E9E
+#define mmCLK1_CLK4_DS_CNTL 0x16EA7
+#define mmCLK1_CLK5_DS_CNTL 0x16EB0
+
+#define mmCLK1_CLK0_ALLOW_DS 0x16E84
+#define mmCLK1_CLK1_ALLOW_DS 0x16E8D
+#define mmCLK1_CLK2_ALLOW_DS 0x16E96
+#define mmCLK1_CLK3_ALLOW_DS 0x16E9F
+#define mmCLK1_CLK4_ALLOW_DS 0x16EA8
+#define mmCLK1_CLK5_ALLOW_DS 0x16EB1
+
+#define mmCLK5_spll_field_8 0x1B24B
+#define mmCLK6_spll_field_8 0x1B24B
+#define mmDENTIST_DISPCLK_CNTL 0x0124
+#define regDENTIST_DISPCLK_CNTL 0x0064
+#define regDENTIST_DISPCLK_CNTL_BASE_IDX 1
+
+#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0
+#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc
+#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10
+#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL
+#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L
+#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L
+
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK 0x000F0000L
+// DENTIST_DISPCLK_CNTL
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER__SHIFT 0x0
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER__SHIFT 0x8
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE__SHIFT 0x13
+#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE__SHIFT 0x14
+#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER__SHIFT 0x18
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER_MASK 0x0000007FL
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER_MASK 0x00007F00L
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE_MASK 0x00080000L
+#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE_MASK 0x00100000L
+#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER_MASK 0x7F000000L
+
+#define CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L
+#define CLK6_spll_field_8__spll_ssc_en_MASK 0x00002000L
+
+#define SMU_VER_THRESHOLD 0x5D4A00 //93.74.0
+#undef FN
+#define FN(reg_name, field_name) \
+ clk_mgr->clk_mgr_shift->field_name, clk_mgr->clk_mgr_mask->field_name
+
+#define REG(reg) \
+ (clk_mgr->regs->reg)
+
+#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
+
+#define BASE(seg) BASE_INNER(seg)
+
+#define SR(reg_name)\
+ .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \
+ reg ## reg_name
+
+#define CLK_SR_DCN35(reg_name)\
+ .reg_name = mm ## reg_name
+
+static const struct clk_mgr_registers clk_mgr_regs_dcn35 = {
+ CLK_REG_LIST_DCN35()
+};
+
+static const struct clk_mgr_shift clk_mgr_shift_dcn35 = {
+ CLK_COMMON_MASK_SH_LIST_DCN32(__SHIFT)
+};
+
+static const struct clk_mgr_mask clk_mgr_mask_dcn35 = {
+ CLK_COMMON_MASK_SH_LIST_DCN32(_MASK)
+};
+
+#define TO_CLK_MGR_DCN35(clk_mgr)\
+ container_of(clk_mgr, struct clk_mgr_dcn35, base)
+
+static int dcn35_get_active_display_cnt_wa(
+ struct dc *dc,
+ struct dc_state *context,
+ int *all_active_disps)
+{
+ int i, display_count = 0;
+ bool tmds_present = false;
+
+ for (i = 0; i < context->stream_count; i++) {
+ const struct dc_stream_state *stream = context->streams[i];
+
+ if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A ||
+ stream->signal == SIGNAL_TYPE_DVI_SINGLE_LINK ||
+ stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK)
+ tmds_present = true;
+ }
+
+ for (i = 0; i < dc->link_count; i++) {
+ const struct dc_link *link = dc->links[i];
+
+ /* abusing the fact that the dig and phy are coupled to see if the phy is enabled */
+ if (link->link_enc && link->link_enc->funcs->is_dig_enabled &&
+ link->link_enc->funcs->is_dig_enabled(link->link_enc))
+ display_count++;
+ }
+ if (all_active_disps != NULL)
+ *all_active_disps = display_count;
+ /* WA for hang on HDMI after display off back on*/
+ if (display_count == 0 && tmds_present)
+ display_count = 1;
+
+ return display_count;
+}
+static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context,
+ bool safe_to_lower, bool disable)
+{
+ struct dc *dc = clk_mgr_base->ctx->dc;
+ int i;
+
+ if (dc->ctx->dce_environment == DCE_ENV_DIAG)
+ return;
+
+ for (i = 0; i < dc->res_pool->pipe_count; ++i) {
+ struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i];
+ struct clk_mgr_internal *clk_mgr_internal = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct dccg *dccg = clk_mgr_internal->dccg;
+ struct pipe_ctx *pipe = safe_to_lower
+ ? &context->res_ctx.pipe_ctx[i]
+ : &dc->current_state->res_ctx.pipe_ctx[i];
+ struct link_encoder *new_pipe_link_enc = new_pipe->link_res.dio_link_enc;
+ struct link_encoder *pipe_link_enc = pipe->link_res.dio_link_enc;
+ bool stream_changed_otg_dig_on = false;
+ bool has_active_hpo = false;
+
+ if (pipe->top_pipe || pipe->prev_odm_pipe)
+ continue;
+
+ if (!dc->config.unify_link_enc_assignment) {
+ if (new_pipe->stream)
+ new_pipe_link_enc = new_pipe->stream->link_enc;
+ if (pipe->stream)
+ pipe_link_enc = pipe->stream->link_enc;
+ }
+
+ stream_changed_otg_dig_on = old_pipe->stream && new_pipe->stream &&
+ old_pipe->stream != new_pipe->stream &&
+ old_pipe->stream_res.tg == new_pipe->stream_res.tg &&
+ new_pipe_link_enc && !new_pipe->stream->dpms_off &&
+ new_pipe_link_enc->funcs->is_dig_enabled &&
+ new_pipe_link_enc->funcs->is_dig_enabled(
+ new_pipe_link_enc) &&
+ new_pipe->stream_res.stream_enc &&
+ new_pipe->stream_res.stream_enc->funcs->is_fifo_enabled &&
+ new_pipe->stream_res.stream_enc->funcs->is_fifo_enabled(new_pipe->stream_res.stream_enc);
+
+ if (old_pipe->stream && new_pipe->stream && old_pipe->stream == new_pipe->stream) {
+ has_active_hpo = dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(old_pipe) &&
+ dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(new_pipe);
+
+ }
+
+ if (!has_active_hpo && !stream_changed_otg_dig_on && pipe->stream &&
+ (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) || !pipe_link_enc) &&
+ !dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(pipe)) {
+ /* This w/a should not trigger when we have a dig active */
+ if (disable) {
+ if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc)
+ pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
+
+ reset_sync_context_for_pipe(dc, context, i);
+ } else {
+ pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg);
+ }
+ }
+ }
+}
+
+static void dcn35_update_clocks_update_dtb_dto(struct clk_mgr_internal *clk_mgr,
+ struct dc_state *context,
+ int ref_dtbclk_khz)
+{
+ struct dccg *dccg = clk_mgr->dccg;
+ uint32_t tg_mask = 0;
+ int i;
+
+ for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+ struct dtbclk_dto_params dto_params = {0};
+
+ /* use mask to program DTO once per tg */
+ if (pipe_ctx->stream_res.tg &&
+ !(tg_mask & (1 << pipe_ctx->stream_res.tg->inst))) {
+ tg_mask |= (1 << pipe_ctx->stream_res.tg->inst);
+
+ dto_params.otg_inst = pipe_ctx->stream_res.tg->inst;
+ dto_params.ref_dtbclk_khz = ref_dtbclk_khz;
+
+ dccg->funcs->set_dtbclk_dto(clk_mgr->dccg, &dto_params);
+ //dccg->funcs->set_audio_dtbclk_dto(clk_mgr->dccg, &dto_params);
+ }
+ }
+}
+
+static void dcn35_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr,
+ struct dc_state *context, bool safe_to_lower)
+{
+ int i;
+ bool dppclk_active[MAX_PIPES] = {0};
+
+
+ clk_mgr->dccg->ref_dppclk = clk_mgr->base.clks.dppclk_khz;
+ for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) {
+ int dpp_inst = 0, dppclk_khz, prev_dppclk_khz;
+
+ dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz;
+
+ if (context->res_ctx.pipe_ctx[i].plane_res.dpp)
+ dpp_inst = context->res_ctx.pipe_ctx[i].plane_res.dpp->inst;
+ else if (!context->res_ctx.pipe_ctx[i].plane_res.dpp && dppclk_khz == 0) {
+ /* dpp == NULL && dppclk_khz == 0 is valid because of pipe harvesting.
+ * In this case just continue in loop
+ */
+ continue;
+ } else if (!context->res_ctx.pipe_ctx[i].plane_res.dpp && dppclk_khz > 0) {
+ /* The software state is not valid if dpp resource is NULL and
+ * dppclk_khz > 0.
+ */
+ ASSERT(false);
+ continue;
+ }
+
+ prev_dppclk_khz = clk_mgr->dccg->pipe_dppclk_khz[i];
+
+ if (safe_to_lower || prev_dppclk_khz < dppclk_khz)
+ clk_mgr->dccg->funcs->update_dpp_dto(
+ clk_mgr->dccg, dpp_inst, dppclk_khz);
+ dppclk_active[dpp_inst] = true;
+ }
+ if (safe_to_lower)
+ for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) {
+ struct dpp *old_dpp = clk_mgr->base.ctx->dc->current_state->res_ctx.pipe_ctx[i].plane_res.dpp;
+
+ if (old_dpp && !dppclk_active[old_dpp->inst])
+ clk_mgr->dccg->funcs->update_dpp_dto(clk_mgr->dccg, old_dpp->inst, 0);
+ }
+}
+
+static uint8_t get_lowest_dpia_index(const struct dc_link *link)
+{
+ const struct dc *dc_struct = link->dc;
+ uint8_t idx = 0xFF;
+ int i;
+
+ for (i = 0; i < MAX_PIPES * 2; ++i) {
+ if (!dc_struct->links[i] || dc_struct->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA)
+ continue;
+
+ if (idx > dc_struct->links[i]->link_index)
+ idx = dc_struct->links[i]->link_index;
+ }
+
+ return idx;
+}
+
+static void dcn35_notify_host_router_bw(struct clk_mgr *clk_mgr_base, struct dc_state *context,
+ bool safe_to_lower)
+{
+ struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ uint32_t host_router_bw_kbps[MAX_HOST_ROUTERS_NUM] = { 0 };
+ int i;
+ for (i = 0; i < context->stream_count; ++i) {
+ const struct dc_stream_state *stream = context->streams[i];
+ const struct dc_link *link = stream->link;
+ uint8_t lowest_dpia_index = 0;
+ unsigned int hr_index = 0;
+
+ if (!link)
+ continue;
+
+ lowest_dpia_index = get_lowest_dpia_index(link);
+ if (link->link_index < lowest_dpia_index)
+ continue;
+
+ hr_index = (link->link_index - lowest_dpia_index) / 2;
+ if (hr_index >= MAX_HOST_ROUTERS_NUM)
+ continue;
+ host_router_bw_kbps[hr_index] += dc_bandwidth_in_kbps_from_timing(
+ &stream->timing, dc_link_get_highest_encoding_format(link));
+ }
+
+ for (i = 0; i < MAX_HOST_ROUTERS_NUM; ++i) {
+ new_clocks->host_router_bw_kbps[i] = host_router_bw_kbps[i];
+ if (should_set_clock(safe_to_lower, new_clocks->host_router_bw_kbps[i], clk_mgr_base->clks.host_router_bw_kbps[i])) {
+ clk_mgr_base->clks.host_router_bw_kbps[i] = new_clocks->host_router_bw_kbps[i];
+ dcn35_smu_notify_host_router_bw(clk_mgr, i, new_clocks->host_router_bw_kbps[i]);
+ }
+ }
+}
+
+void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
+ struct dc_state *context,
+ bool safe_to_lower)
+{
+ union dmub_rb_cmd cmd;
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
+ struct dc *dc = clk_mgr_base->ctx->dc;
+ int display_count = 0;
+ bool update_dppclk = false;
+ bool update_dispclk = false;
+ bool dpp_clock_lowered = false;
+ int all_active_disps = 0;
+
+ if (dc->work_arounds.skip_clock_update)
+ return;
+
+ display_count = dcn35_get_active_display_cnt_wa(dc, context, &all_active_disps);
+ if (new_clocks->dtbclk_en && !new_clocks->ref_dtbclk_khz)
+ new_clocks->ref_dtbclk_khz = 600000;
+ else if (!new_clocks->dtbclk_en && new_clocks->ref_dtbclk_khz > 590000)
+ new_clocks->ref_dtbclk_khz = 0;
+
+ /*
+ * if it is safe to lower, but we are already in the lower state, we don't have to do anything
+ * also if safe to lower is false, we just go in the higher state
+ */
+ if (safe_to_lower) {
+ if (new_clocks->zstate_support != DCN_ZSTATE_SUPPORT_DISALLOW &&
+ new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) {
+ dcn35_smu_set_zstate_support(clk_mgr, new_clocks->zstate_support);
+ dm_helpers_enable_periodic_detection(clk_mgr_base->ctx, true);
+ clk_mgr_base->clks.zstate_support = new_clocks->zstate_support;
+ }
+
+ if (clk_mgr_base->clks.dtbclk_en && !new_clocks->dtbclk_en) {
+ if (clk_mgr->base.ctx->dc->config.allow_0_dtb_clk)
+ dcn35_smu_set_dtbclk(clk_mgr, false);
+
+ clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en;
+ }
+ /* check that we're not already in lower */
+ if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) {
+ /* if we can go lower, go lower */
+ if (display_count == 0)
+ clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
+ }
+ } else {
+ if (new_clocks->zstate_support == DCN_ZSTATE_SUPPORT_DISALLOW &&
+ new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) {
+ dcn35_smu_set_zstate_support(clk_mgr, DCN_ZSTATE_SUPPORT_DISALLOW);
+ dm_helpers_enable_periodic_detection(clk_mgr_base->ctx, false);
+ clk_mgr_base->clks.zstate_support = new_clocks->zstate_support;
+ }
+
+ if (!clk_mgr_base->clks.dtbclk_en && new_clocks->dtbclk_en) {
+ int actual_dtbclk = 0;
+
+ dcn35_update_clocks_update_dtb_dto(clk_mgr, context, new_clocks->ref_dtbclk_khz);
+ dcn35_smu_set_dtbclk(clk_mgr, true);
+
+ actual_dtbclk = REG_READ(CLK1_CLK4_CURRENT_CNT);
+
+ if (actual_dtbclk > 590000) {
+ clk_mgr_base->clks.ref_dtbclk_khz = new_clocks->ref_dtbclk_khz;
+ clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en;
+ }
+ }
+
+ /* check that we're not already in D0 */
+ if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_MISSION_MODE) {
+ union display_idle_optimization_u idle_info = { 0 };
+
+ dcn35_smu_set_display_idle_optimization(clk_mgr, idle_info.data);
+ /* update power state */
+ clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_MISSION_MODE;
+ }
+ }
+ if (dc->debug.force_min_dcfclk_mhz > 0)
+ new_clocks->dcfclk_khz = (new_clocks->dcfclk_khz > (dc->debug.force_min_dcfclk_mhz * 1000)) ?
+ new_clocks->dcfclk_khz : (dc->debug.force_min_dcfclk_mhz * 1000);
+
+ if (should_set_clock(safe_to_lower, new_clocks->dcfclk_khz, clk_mgr_base->clks.dcfclk_khz)) {
+ clk_mgr_base->clks.dcfclk_khz = new_clocks->dcfclk_khz;
+ dcn35_smu_set_hard_min_dcfclk(clk_mgr, clk_mgr_base->clks.dcfclk_khz);
+ }
+
+ if (should_set_clock(safe_to_lower,
+ new_clocks->dcfclk_deep_sleep_khz, clk_mgr_base->clks.dcfclk_deep_sleep_khz)) {
+ clk_mgr_base->clks.dcfclk_deep_sleep_khz = new_clocks->dcfclk_deep_sleep_khz;
+ dcn35_smu_set_min_deep_sleep_dcfclk(clk_mgr, clk_mgr_base->clks.dcfclk_deep_sleep_khz);
+ }
+
+ // workaround: Limit dppclk to 100Mhz to avoid lower eDP panel switch to plus 4K monitor underflow.
+ if (new_clocks->dppclk_khz < 100000)
+ new_clocks->dppclk_khz = 100000;
+
+ if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) {
+ if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz)
+ dpp_clock_lowered = true;
+ clk_mgr_base->clks.dppclk_khz = new_clocks->dppclk_khz;
+ update_dppclk = true;
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz) &&
+ (new_clocks->dispclk_khz > 0 || (safe_to_lower && display_count == 0))) {
+ int requested_dispclk_khz = new_clocks->dispclk_khz;
+
+ dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true);
+
+ /* Clamp the requested clock to PMFW based on their limit. */
+ if (dc->debug.min_disp_clk_khz > 0 && requested_dispclk_khz < dc->debug.min_disp_clk_khz)
+ requested_dispclk_khz = dc->debug.min_disp_clk_khz;
+
+ dcn35_smu_set_dispclk(clk_mgr, requested_dispclk_khz);
+ clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
+
+ dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false);
+
+ update_dispclk = true;
+ }
+
+ /* clock limits are received with MHz precision, divide by 1000 to prevent setting clocks at every call */
+ if (!dc->debug.disable_dtb_ref_clk_switch &&
+ should_set_clock(safe_to_lower, new_clocks->ref_dtbclk_khz / 1000,
+ clk_mgr_base->clks.ref_dtbclk_khz / 1000)) {
+ dcn35_update_clocks_update_dtb_dto(clk_mgr, context, new_clocks->ref_dtbclk_khz);
+ clk_mgr_base->clks.ref_dtbclk_khz = new_clocks->ref_dtbclk_khz;
+ }
+
+ if (dpp_clock_lowered) {
+ // increase per DPP DTO before lowering global dppclk
+ dcn35_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower);
+ dcn35_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz);
+ } else {
+ // increase global DPPCLK before lowering per DPP DTO
+ if (update_dppclk || update_dispclk)
+ dcn35_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz);
+ dcn35_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower);
+ }
+
+ // notify PMFW of bandwidth per DPIA tunnel
+ if (dc->debug.notify_dpia_hr_bw)
+ dcn35_notify_host_router_bw(clk_mgr_base, context, safe_to_lower);
+
+ // notify DMCUB of latest clocks
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.notify_clocks.header.type = DMUB_CMD__CLK_MGR;
+ cmd.notify_clocks.header.sub_type = DMUB_CMD__CLK_MGR_NOTIFY_CLOCKS;
+ cmd.notify_clocks.clocks.dcfclk_khz = clk_mgr_base->clks.dcfclk_khz;
+ cmd.notify_clocks.clocks.dcfclk_deep_sleep_khz =
+ clk_mgr_base->clks.dcfclk_deep_sleep_khz;
+ cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz;
+ cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz;
+
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr)
+{
+ /* get FbMult value */
+ struct fixed31_32 pll_req;
+ unsigned int fbmult_frac_val = 0;
+ unsigned int fbmult_int_val = 0;
+
+ /*
+ * Register value of fbmult is in 8.16 format, we are converting to 314.32
+ * to leverage the fix point operations available in driver
+ */
+
+ REG_GET(CLK1_CLK_PLL_REQ, FbMult_frac, &fbmult_frac_val); /* 16 bit fractional part*/
+ REG_GET(CLK1_CLK_PLL_REQ, FbMult_int, &fbmult_int_val); /* 8 bit integer part */
+
+ pll_req = dc_fixpt_from_int(fbmult_int_val);
+
+ /*
+ * since fractional part is only 16 bit in register definition but is 32 bit
+ * in our fix point definiton, need to shift left by 16 to obtain correct value
+ */
+ pll_req.value |= fbmult_frac_val << 16;
+
+ /* multiply by REFCLK period */
+ pll_req = dc_fixpt_mul_int(pll_req, clk_mgr->dfs_ref_freq_khz);
+
+ /* integer part is now VCO frequency in kHz */
+ return dc_fixpt_floor(pll_req);
+}
+
+static void dcn35_enable_pme_wa(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ dcn35_smu_enable_pme_wa(clk_mgr);
+}
+
+
+bool dcn35_are_clock_states_equal(struct dc_clocks *a,
+ struct dc_clocks *b)
+{
+ if (a->dispclk_khz != b->dispclk_khz)
+ return false;
+ else if (a->dppclk_khz != b->dppclk_khz)
+ return false;
+ else if (a->dcfclk_khz != b->dcfclk_khz)
+ return false;
+ else if (a->dcfclk_deep_sleep_khz != b->dcfclk_deep_sleep_khz)
+ return false;
+ else if (a->zstate_support != b->zstate_support)
+ return false;
+ else if (a->dtbclk_en != b->dtbclk_en)
+ return false;
+
+ return true;
+}
+
+static void dcn35_save_clk_registers_internal(struct dcn35_clk_internal *internal, struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ // read dtbclk
+ internal->CLK1_CLK4_CURRENT_CNT = REG_READ(CLK1_CLK4_CURRENT_CNT);
+ internal->CLK1_CLK4_BYPASS_CNTL = REG_READ(CLK1_CLK4_BYPASS_CNTL);
+
+ // read dcfclk
+ internal->CLK1_CLK3_CURRENT_CNT = REG_READ(CLK1_CLK3_CURRENT_CNT);
+ internal->CLK1_CLK3_BYPASS_CNTL = REG_READ(CLK1_CLK3_BYPASS_CNTL);
+
+ // read dcf deep sleep divider
+ internal->CLK1_CLK3_DS_CNTL = REG_READ(CLK1_CLK3_DS_CNTL);
+ internal->CLK1_CLK3_ALLOW_DS = REG_READ(CLK1_CLK3_ALLOW_DS);
+
+ // read dppclk
+ internal->CLK1_CLK1_CURRENT_CNT = REG_READ(CLK1_CLK1_CURRENT_CNT);
+ internal->CLK1_CLK1_BYPASS_CNTL = REG_READ(CLK1_CLK1_BYPASS_CNTL);
+
+ // read dprefclk
+ internal->CLK1_CLK2_CURRENT_CNT = REG_READ(CLK1_CLK2_CURRENT_CNT);
+ internal->CLK1_CLK2_BYPASS_CNTL = REG_READ(CLK1_CLK2_BYPASS_CNTL);
+
+ // read dispclk
+ internal->CLK1_CLK0_CURRENT_CNT = REG_READ(CLK1_CLK0_CURRENT_CNT);
+ internal->CLK1_CLK0_BYPASS_CNTL = REG_READ(CLK1_CLK0_BYPASS_CNTL);
+}
+
+static void dcn35_save_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
+ struct clk_mgr_dcn35 *clk_mgr)
+{
+ struct dcn35_clk_internal internal = {0};
+ char *bypass_clks[5] = {"0x0 DFS", "0x1 REFCLK", "0x2 ERROR", "0x3 400 FCH", "0x4 600 FCH"};
+
+ dcn35_save_clk_registers_internal(&internal, &clk_mgr->base.base);
+
+ regs_and_bypass->dcfclk = internal.CLK1_CLK3_CURRENT_CNT / 10;
+ regs_and_bypass->dcf_deep_sleep_divider = internal.CLK1_CLK3_DS_CNTL / 10;
+ regs_and_bypass->dcf_deep_sleep_allow = internal.CLK1_CLK3_ALLOW_DS;
+ regs_and_bypass->dprefclk = internal.CLK1_CLK2_CURRENT_CNT / 10;
+ regs_and_bypass->dispclk = internal.CLK1_CLK0_CURRENT_CNT / 10;
+ regs_and_bypass->dppclk = internal.CLK1_CLK1_CURRENT_CNT / 10;
+ regs_and_bypass->dtbclk = internal.CLK1_CLK4_CURRENT_CNT / 10;
+
+ regs_and_bypass->dppclk_bypass = internal.CLK1_CLK1_BYPASS_CNTL & 0x0007;
+ if (regs_and_bypass->dppclk_bypass > 4)
+ regs_and_bypass->dppclk_bypass = 0;
+ regs_and_bypass->dcfclk_bypass = internal.CLK1_CLK3_BYPASS_CNTL & 0x0007;
+ if (regs_and_bypass->dcfclk_bypass > 4)
+ regs_and_bypass->dcfclk_bypass = 0;
+ regs_and_bypass->dispclk_bypass = internal.CLK1_CLK0_BYPASS_CNTL & 0x0007;
+ if (regs_and_bypass->dispclk_bypass > 4)
+ regs_and_bypass->dispclk_bypass = 0;
+ regs_and_bypass->dprefclk_bypass = internal.CLK1_CLK2_BYPASS_CNTL & 0x0007;
+ if (regs_and_bypass->dprefclk_bypass > 4)
+ regs_and_bypass->dprefclk_bypass = 0;
+
+ if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) {
+ DC_LOG_SMU("clk_type,clk_value,deepsleep_cntl,deepsleep_allow,bypass\n");
+
+ DC_LOG_SMU("dcfclk,%d,%d,%d,%s\n",
+ regs_and_bypass->dcfclk,
+ regs_and_bypass->dcf_deep_sleep_divider,
+ regs_and_bypass->dcf_deep_sleep_allow,
+ bypass_clks[(int) regs_and_bypass->dcfclk_bypass]);
+
+ DC_LOG_SMU("dprefclk,%d,N/A,N/A,%s\n",
+ regs_and_bypass->dprefclk,
+ bypass_clks[(int) regs_and_bypass->dprefclk_bypass]);
+
+ DC_LOG_SMU("dispclk,%d,N/A,N/A,%s\n",
+ regs_and_bypass->dispclk,
+ bypass_clks[(int) regs_and_bypass->dispclk_bypass]);
+
+ // REGISTER VALUES
+ DC_LOG_SMU("reg_name,value,clk_type");
+
+ DC_LOG_SMU("CLK1_CLK3_CURRENT_CNT,%d,dcfclk",
+ internal.CLK1_CLK3_CURRENT_CNT);
+
+ DC_LOG_SMU("CLK1_CLK4_CURRENT_CNT,%d,dtbclk",
+ internal.CLK1_CLK4_CURRENT_CNT);
+
+ DC_LOG_SMU("CLK1_CLK3_DS_CNTL,%d,dcf_deep_sleep_divider",
+ internal.CLK1_CLK3_DS_CNTL);
+
+ DC_LOG_SMU("CLK1_CLK3_ALLOW_DS,%d,dcf_deep_sleep_allow",
+ internal.CLK1_CLK3_ALLOW_DS);
+
+ DC_LOG_SMU("CLK1_CLK2_CURRENT_CNT,%d,dprefclk",
+ internal.CLK1_CLK2_CURRENT_CNT);
+
+ DC_LOG_SMU("CLK1_CLK0_CURRENT_CNT,%d,dispclk",
+ internal.CLK1_CLK0_CURRENT_CNT);
+
+ DC_LOG_SMU("CLK1_CLK1_CURRENT_CNT,%d,dppclk",
+ internal.CLK1_CLK1_CURRENT_CNT);
+
+ DC_LOG_SMU("CLK1_CLK3_BYPASS_CNTL,%d,dcfclk_bypass",
+ internal.CLK1_CLK3_BYPASS_CNTL);
+
+ DC_LOG_SMU("CLK1_CLK2_BYPASS_CNTL,%d,dprefclk_bypass",
+ internal.CLK1_CLK2_BYPASS_CNTL);
+
+ DC_LOG_SMU("CLK1_CLK0_BYPASS_CNTL,%d,dispclk_bypass",
+ internal.CLK1_CLK0_BYPASS_CNTL);
+
+ DC_LOG_SMU("CLK1_CLK1_BYPASS_CNTL,%d,dppclk_bypass",
+ internal.CLK1_CLK1_BYPASS_CNTL);
+
+ }
+}
+
+static bool dcn35_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ uint32_t ssc_enable;
+
+ if (clk_mgr_base->ctx->dce_version == DCN_VERSION_3_51) {
+ ssc_enable = REG_READ(CLK6_spll_field_8) & CLK6_spll_field_8__spll_ssc_en_MASK;
+ } else {
+ ssc_enable = REG_READ(CLK5_spll_field_8) & CLK5_spll_field_8__spll_ssc_en_MASK;
+ }
+
+ return ssc_enable != 0;
+}
+
+static void init_clk_states(struct clk_mgr *clk_mgr)
+{
+ uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz;
+
+ memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
+
+ clk_mgr->clks.ref_dtbclk_khz = ref_dtbclk; // restore ref_dtbclk
+ clk_mgr->clks.p_state_change_support = true;
+ clk_mgr->clks.prev_p_state_change_support = true;
+ clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN;
+ clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN;
+}
+
+void dcn35_init_clocks(struct clk_mgr *clk_mgr)
+{
+ struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr);
+ struct clk_mgr_dcn35 *clk_mgr_dcn35 = TO_CLK_MGR_DCN35(clk_mgr_int);
+
+ init_clk_states(clk_mgr);
+
+ // to adjust dp_dto reference clock if ssc is enable otherwise to apply dprefclk
+ if (dcn35_is_spll_ssc_enabled(clk_mgr))
+ clk_mgr->dp_dto_source_clock_in_khz =
+ dce_adjust_dp_ref_freq_for_ss(clk_mgr_int, clk_mgr->dprefclk_khz);
+ else
+ clk_mgr->dp_dto_source_clock_in_khz = clk_mgr->dprefclk_khz;
+
+ dcn35_save_clk_registers(&clk_mgr->boot_snapshot, clk_mgr_dcn35);
+
+ clk_mgr->clks.ref_dtbclk_khz = clk_mgr->boot_snapshot.dtbclk * 10;
+ if (clk_mgr->boot_snapshot.dtbclk > 59000) {
+ /*dtbclk enabled based on */
+ clk_mgr->clks.dtbclk_en = true;
+ }
+}
+static struct clk_bw_params dcn35_bw_params = {
+ .vram_type = Ddr4MemType,
+ .num_channels = 1,
+ .clk_table = {
+ .num_entries = 4,
+ },
+
+};
+
+static struct wm_table ddr5_wm_table = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
+ .valid = true,
+ },
+ }
+};
+
+static struct wm_table lpddr5_wm_table = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
+ .valid = true,
+ },
+ }
+};
+
+static DpmClocks_t_dcn35 dummy_clocks;
+static DpmClocks_t_dcn351 dummy_clocks_dcn351;
+
+static struct dcn35_watermarks dummy_wms = { 0 };
+
+static struct dcn35_ss_info_table ss_info_table = {
+ .ss_divider = 1000,
+ .ss_percentage = {0, 0, 375, 375, 375}
+};
+
+static void dcn35_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr)
+{
+ uint32_t clock_source = 0;
+
+ clock_source = REG_READ(CLK1_CLK2_BYPASS_CNTL) & CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK;
+
+ // If it's DFS mode, clock_source is 0.
+ if (dcn35_is_spll_ssc_enabled(&clk_mgr->base) && (clock_source < ARRAY_SIZE(ss_info_table.ss_percentage))) {
+ clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source];
+
+ if (clk_mgr->dprefclk_ss_percentage != 0) {
+ clk_mgr->ss_on_dprefclk = true;
+ clk_mgr->dprefclk_ss_divider = ss_info_table.ss_divider;
+ }
+ }
+}
+
+static void dcn35_build_watermark_ranges(struct clk_bw_params *bw_params, struct dcn35_watermarks *table)
+{
+ int i, num_valid_sets;
+
+ num_valid_sets = 0;
+
+ for (i = 0; i < WM_SET_COUNT; i++) {
+ /* skip empty entries, the smu array has no holes*/
+ if (!bw_params->wm_table.entries[i].valid)
+ continue;
+
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].WmSetting = bw_params->wm_table.entries[i].wm_inst;
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].WmType = bw_params->wm_table.entries[i].wm_type;
+ /* We will not select WM based on fclk, so leave it as unconstrained */
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MinClock = 0;
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MaxClock = 0xFFFF;
+
+ if (table->WatermarkRow[WM_DCFCLK][num_valid_sets].WmType == WM_TYPE_PSTATE_CHG) {
+ if (i == 0)
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MinMclk = 0;
+ else {
+ /* add 1 to make it non-overlapping with next lvl */
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MinMclk =
+ bw_params->clk_table.entries[i - 1].dcfclk_mhz + 1;
+ }
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MaxMclk =
+ bw_params->clk_table.entries[i].dcfclk_mhz;
+
+ } else {
+ /* unconstrained for memory retraining */
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MinClock = 0;
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MaxClock = 0xFFFF;
+
+ /* Modify previous watermark range to cover up to max */
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets - 1].MaxClock = 0xFFFF;
+ }
+ num_valid_sets++;
+ }
+
+ ASSERT(num_valid_sets != 0); /* Must have at least one set of valid watermarks */
+
+ /* modify the min and max to make sure we cover the whole range*/
+ table->WatermarkRow[WM_DCFCLK][0].MinMclk = 0;
+ table->WatermarkRow[WM_DCFCLK][0].MinClock = 0;
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets - 1].MaxMclk = 0xFFFF;
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets - 1].MaxClock = 0xFFFF;
+
+ /* This is for writeback only, does not matter currently as no writeback support*/
+ table->WatermarkRow[WM_SOCCLK][0].WmSetting = WM_A;
+ table->WatermarkRow[WM_SOCCLK][0].MinClock = 0;
+ table->WatermarkRow[WM_SOCCLK][0].MaxClock = 0xFFFF;
+ table->WatermarkRow[WM_SOCCLK][0].MinMclk = 0;
+ table->WatermarkRow[WM_SOCCLK][0].MaxMclk = 0xFFFF;
+}
+
+static void dcn35_notify_wm_ranges(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct clk_mgr_dcn35 *clk_mgr_dcn35 = TO_CLK_MGR_DCN35(clk_mgr);
+ struct dcn35_watermarks *table = clk_mgr_dcn35->smu_wm_set.wm_set;
+
+ if (!clk_mgr->smu_ver)
+ return;
+
+ if (!table || clk_mgr_dcn35->smu_wm_set.mc_address.quad_part == 0)
+ return;
+
+ memset(table, 0, sizeof(*table));
+
+ dcn35_build_watermark_ranges(clk_mgr_base->bw_params, table);
+
+ dcn35_smu_set_dram_addr_high(clk_mgr,
+ clk_mgr_dcn35->smu_wm_set.mc_address.high_part);
+ dcn35_smu_set_dram_addr_low(clk_mgr,
+ clk_mgr_dcn35->smu_wm_set.mc_address.low_part);
+ dcn35_smu_transfer_wm_table_dram_2_smu(clk_mgr);
+}
+
+static void dcn35_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr,
+ struct dcn35_smu_dpm_clks *smu_dpm_clks)
+{
+ DpmClocks_t_dcn35 *table = smu_dpm_clks->dpm_clks;
+
+ if (!clk_mgr->smu_ver)
+ return;
+
+ if (!table || smu_dpm_clks->mc_address.quad_part == 0)
+ return;
+
+ memset(table, 0, sizeof(*table));
+
+ dcn35_smu_set_dram_addr_high(clk_mgr,
+ smu_dpm_clks->mc_address.high_part);
+ dcn35_smu_set_dram_addr_low(clk_mgr,
+ smu_dpm_clks->mc_address.low_part);
+ dcn35_smu_transfer_dpm_table_smu_2_dram(clk_mgr);
+}
+
+static void dcn351_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr,
+ struct dcn351_smu_dpm_clks *smu_dpm_clks)
+{
+ DpmClocks_t_dcn351 *table = smu_dpm_clks->dpm_clks;
+
+ if (!clk_mgr->smu_ver)
+ return;
+ if (!table || smu_dpm_clks->mc_address.quad_part == 0)
+ return;
+ memset(table, 0, sizeof(*table));
+ dcn35_smu_set_dram_addr_high(clk_mgr,
+ smu_dpm_clks->mc_address.high_part);
+ dcn35_smu_set_dram_addr_low(clk_mgr,
+ smu_dpm_clks->mc_address.low_part);
+ dcn35_smu_transfer_dpm_table_smu_2_dram(clk_mgr);
+}
+static uint32_t find_max_clk_value(const uint32_t clocks[], uint32_t num_clocks)
+{
+ uint32_t max = 0;
+ int i;
+
+ for (i = 0; i < num_clocks; ++i) {
+ if (clocks[i] > max)
+ max = clocks[i];
+ }
+
+ return max;
+}
+
+static inline bool is_valid_clock_value(uint32_t clock_value)
+{
+ return clock_value > 1 && clock_value < 100000;
+}
+
+static unsigned int convert_wck_ratio(uint8_t wck_ratio)
+{
+ switch (wck_ratio) {
+ case WCK_RATIO_1_2:
+ return 2;
+
+ case WCK_RATIO_1_4:
+ return 4;
+ /* Find lowest DPM, FCLK is filled in reverse order*/
+
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+static inline uint32_t calc_dram_speed_mts(const MemPstateTable_t *entry)
+{
+ return entry->UClk * convert_wck_ratio(entry->WckRatio) * 2;
+}
+
+static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk_mgr,
+ struct integrated_info *bios_info,
+ DpmClocks_t_dcn35 *clock_table)
+{
+ struct clk_bw_params *bw_params = clk_mgr->base.bw_params;
+ struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1];
+ uint32_t max_fclk = 0, min_pstate = 0, max_dispclk = 0, max_dppclk = 0;
+ uint32_t max_pstate = 0, max_dram_speed_mts = 0, min_dram_speed_mts = 0;
+ uint32_t num_memps, num_fclk, num_dcfclk;
+ int i;
+
+ /* Determine min/max p-state values. */
+ num_memps = (clock_table->NumMemPstatesEnabled > NUM_MEM_PSTATE_LEVELS) ? NUM_MEM_PSTATE_LEVELS :
+ clock_table->NumMemPstatesEnabled;
+ for (i = 0; i < num_memps; i++) {
+ uint32_t dram_speed_mts = calc_dram_speed_mts(&clock_table->MemPstateTable[i]);
+
+ if (is_valid_clock_value(dram_speed_mts) && dram_speed_mts > max_dram_speed_mts) {
+ max_dram_speed_mts = dram_speed_mts;
+ max_pstate = i;
+ }
+ }
+
+ min_dram_speed_mts = max_dram_speed_mts;
+ min_pstate = max_pstate;
+
+ for (i = 0; i < num_memps; i++) {
+ uint32_t dram_speed_mts = calc_dram_speed_mts(&clock_table->MemPstateTable[i]);
+
+ if (is_valid_clock_value(dram_speed_mts) && dram_speed_mts < min_dram_speed_mts) {
+ min_dram_speed_mts = dram_speed_mts;
+ min_pstate = i;
+ }
+ }
+
+ /* We expect the table to contain at least one valid P-state entry. */
+ ASSERT(clock_table->NumMemPstatesEnabled &&
+ is_valid_clock_value(max_dram_speed_mts) &&
+ is_valid_clock_value(min_dram_speed_mts));
+
+ /* dispclk and dppclk can be max at any voltage, same number of levels for both */
+ if (clock_table->NumDispClkLevelsEnabled <= NUM_DISPCLK_DPM_LEVELS &&
+ clock_table->NumDispClkLevelsEnabled <= NUM_DPPCLK_DPM_LEVELS) {
+ max_dispclk = find_max_clk_value(clock_table->DispClocks,
+ clock_table->NumDispClkLevelsEnabled);
+ max_dppclk = find_max_clk_value(clock_table->DppClocks,
+ clock_table->NumDispClkLevelsEnabled);
+ } else {
+ /* Invalid number of entries in the table from PMFW. */
+ ASSERT(0);
+ }
+
+ /* Base the clock table on dcfclk, need at least one entry regardless of pmfw table */
+ ASSERT(clock_table->NumDcfClkLevelsEnabled > 0);
+
+ num_fclk = (clock_table->NumFclkLevelsEnabled > NUM_FCLK_DPM_LEVELS) ? NUM_FCLK_DPM_LEVELS :
+ clock_table->NumFclkLevelsEnabled;
+ max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq, num_fclk);
+
+ num_dcfclk = (clock_table->NumDcfClkLevelsEnabled > NUM_DCFCLK_DPM_LEVELS) ? NUM_DCFCLK_DPM_LEVELS :
+ clock_table->NumDcfClkLevelsEnabled;
+ for (i = 0; i < num_dcfclk; i++) {
+ int j;
+
+ /* First search defaults for the clocks we don't read using closest lower or equal default dcfclk */
+ for (j = bw_params->clk_table.num_entries - 1; j > 0; j--)
+ if (bw_params->clk_table.entries[j].dcfclk_mhz <= clock_table->DcfClocks[i])
+ break;
+
+ bw_params->clk_table.entries[i].phyclk_mhz = bw_params->clk_table.entries[j].phyclk_mhz;
+ bw_params->clk_table.entries[i].phyclk_d18_mhz = bw_params->clk_table.entries[j].phyclk_d18_mhz;
+ bw_params->clk_table.entries[i].dtbclk_mhz = bw_params->clk_table.entries[j].dtbclk_mhz;
+
+ /* Now update clocks we do read */
+ bw_params->clk_table.entries[i].memclk_mhz = clock_table->MemPstateTable[min_pstate].MemClk;
+ bw_params->clk_table.entries[i].voltage = clock_table->MemPstateTable[min_pstate].Voltage;
+ bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[i];
+ bw_params->clk_table.entries[i].socclk_mhz = clock_table->SocClocks[i];
+ bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk;
+ bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk;
+ bw_params->clk_table.entries[i].wck_ratio =
+ convert_wck_ratio(clock_table->MemPstateTable[min_pstate].WckRatio);
+
+ /* Dcfclk and Fclk are tied, but at a different ratio */
+ bw_params->clk_table.entries[i].fclk_mhz = min(max_fclk, 2 * clock_table->DcfClocks[i]);
+ }
+
+ /* Make sure to include at least one entry at highest pstate */
+ if (max_pstate != min_pstate || i == 0) {
+ if (i > MAX_NUM_DPM_LVL - 1)
+ i = MAX_NUM_DPM_LVL - 1;
+
+ bw_params->clk_table.entries[i].fclk_mhz = max_fclk;
+ bw_params->clk_table.entries[i].memclk_mhz = clock_table->MemPstateTable[max_pstate].MemClk;
+ bw_params->clk_table.entries[i].voltage = clock_table->MemPstateTable[max_pstate].Voltage;
+ bw_params->clk_table.entries[i].dcfclk_mhz =
+ find_max_clk_value(clock_table->DcfClocks, NUM_DCFCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].socclk_mhz =
+ find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk;
+ bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk;
+ bw_params->clk_table.entries[i].wck_ratio = convert_wck_ratio(
+ clock_table->MemPstateTable[max_pstate].WckRatio);
+ i++;
+ }
+ bw_params->clk_table.num_entries = i--;
+
+ /* Make sure all highest clocks are included*/
+ bw_params->clk_table.entries[i].socclk_mhz =
+ find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].dispclk_mhz =
+ find_max_clk_value(clock_table->DispClocks, NUM_DISPCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].dppclk_mhz =
+ find_max_clk_value(clock_table->DppClocks, NUM_DPPCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].fclk_mhz =
+ find_max_clk_value(clock_table->FclkClocks_Freq, NUM_FCLK_DPM_LEVELS);
+ ASSERT(clock_table->DcfClocks[i] == find_max_clk_value(clock_table->DcfClocks, NUM_DCFCLK_DPM_LEVELS));
+ bw_params->clk_table.entries[i].phyclk_mhz = def_max.phyclk_mhz;
+ bw_params->clk_table.entries[i].phyclk_d18_mhz = def_max.phyclk_d18_mhz;
+ bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz;
+ bw_params->clk_table.num_entries_per_clk.num_dcfclk_levels = clock_table->NumDcfClkLevelsEnabled;
+ bw_params->clk_table.num_entries_per_clk.num_dispclk_levels = clock_table->NumDispClkLevelsEnabled;
+ bw_params->clk_table.num_entries_per_clk.num_dppclk_levels = clock_table->NumDispClkLevelsEnabled;
+ bw_params->clk_table.num_entries_per_clk.num_fclk_levels = clock_table->NumFclkLevelsEnabled;
+ bw_params->clk_table.num_entries_per_clk.num_memclk_levels = clock_table->NumMemPstatesEnabled;
+ bw_params->clk_table.num_entries_per_clk.num_socclk_levels = clock_table->NumSocClkLevelsEnabled;
+
+ /*
+ * Set any 0 clocks to max default setting. Not an issue for
+ * power since we aren't doing switching in such case anyway
+ */
+ for (i = 0; i < bw_params->clk_table.num_entries; i++) {
+ if (!bw_params->clk_table.entries[i].fclk_mhz) {
+ bw_params->clk_table.entries[i].fclk_mhz = def_max.fclk_mhz;
+ bw_params->clk_table.entries[i].memclk_mhz = def_max.memclk_mhz;
+ bw_params->clk_table.entries[i].voltage = def_max.voltage;
+ }
+ if (!bw_params->clk_table.entries[i].dcfclk_mhz)
+ bw_params->clk_table.entries[i].dcfclk_mhz = def_max.dcfclk_mhz;
+ if (!bw_params->clk_table.entries[i].socclk_mhz)
+ bw_params->clk_table.entries[i].socclk_mhz = def_max.socclk_mhz;
+ if (!bw_params->clk_table.entries[i].dispclk_mhz)
+ bw_params->clk_table.entries[i].dispclk_mhz = def_max.dispclk_mhz;
+ if (!bw_params->clk_table.entries[i].dppclk_mhz)
+ bw_params->clk_table.entries[i].dppclk_mhz = def_max.dppclk_mhz;
+ if (!bw_params->clk_table.entries[i].fclk_mhz)
+ bw_params->clk_table.entries[i].fclk_mhz = def_max.fclk_mhz;
+ if (!bw_params->clk_table.entries[i].phyclk_mhz)
+ bw_params->clk_table.entries[i].phyclk_mhz = def_max.phyclk_mhz;
+ if (!bw_params->clk_table.entries[i].phyclk_d18_mhz)
+ bw_params->clk_table.entries[i].phyclk_d18_mhz = def_max.phyclk_d18_mhz;
+ if (!bw_params->clk_table.entries[i].dtbclk_mhz)
+ bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz;
+ }
+ ASSERT(bw_params->clk_table.entries[i-1].dcfclk_mhz);
+ bw_params->vram_type = bios_info->memory_type;
+ bw_params->dram_channel_width_bytes = bios_info->memory_type == 0x22 ? 8 : 4;
+ bw_params->num_channels = bios_info->ma_channel_number ? bios_info->ma_channel_number : 4;
+
+ for (i = 0; i < WM_SET_COUNT; i++) {
+ bw_params->wm_table.entries[i].wm_inst = i;
+
+ if (i >= bw_params->clk_table.num_entries) {
+ bw_params->wm_table.entries[i].valid = false;
+ continue;
+ }
+
+ bw_params->wm_table.entries[i].wm_type = WM_TYPE_PSTATE_CHG;
+ bw_params->wm_table.entries[i].valid = true;
+ }
+}
+
+static void dcn35_set_low_power_state(struct clk_mgr *clk_mgr_base)
+{
+ int display_count;
+ struct dc *dc = clk_mgr_base->ctx->dc;
+ struct dc_state *context = dc->current_state;
+
+ if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) {
+ display_count = dcn35_get_active_display_cnt_wa(dc, context, NULL);
+ /* if we can go lower, go lower */
+ if (display_count == 0)
+ clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
+ }
+}
+
+static void dcn35_exit_low_power_state(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ //SMU optimization is performed part of low power state exit.
+ dcn35_smu_exit_low_power_state(clk_mgr);
+
+}
+
+static bool dcn35_is_ips_supported(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ return dcn35_smu_get_ips_supported(clk_mgr) ? true : false;
+}
+
+static void dcn35_init_clocks_fpga(struct clk_mgr *clk_mgr)
+{
+ init_clk_states(clk_mgr);
+
+/* TODO: Implement the functions and remove the ifndef guard */
+}
+
+static void dcn35_update_clocks_fpga(struct clk_mgr *clk_mgr,
+ struct dc_state *context,
+ bool safe_to_lower)
+{
+ struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr);
+ struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
+ int fclk_adj = new_clocks->fclk_khz;
+
+ /* TODO: remove this after correctly set by DML */
+ new_clocks->dcfclk_khz = 400000;
+ new_clocks->socclk_khz = 400000;
+
+ /* Min fclk = 1.2GHz since all the extra scemi logic seems to run off of it */
+ //int fclk_adj = new_clocks->fclk_khz > 1200000 ? new_clocks->fclk_khz : 1200000;
+ new_clocks->fclk_khz = 4320000;
+
+ if (should_set_clock(safe_to_lower, new_clocks->phyclk_khz, clk_mgr->clks.phyclk_khz)) {
+ clk_mgr->clks.phyclk_khz = new_clocks->phyclk_khz;
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->dcfclk_khz, clk_mgr->clks.dcfclk_khz)) {
+ clk_mgr->clks.dcfclk_khz = new_clocks->dcfclk_khz;
+ }
+
+ if (should_set_clock(safe_to_lower,
+ new_clocks->dcfclk_deep_sleep_khz, clk_mgr->clks.dcfclk_deep_sleep_khz)) {
+ clk_mgr->clks.dcfclk_deep_sleep_khz = new_clocks->dcfclk_deep_sleep_khz;
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->socclk_khz, clk_mgr->clks.socclk_khz)) {
+ clk_mgr->clks.socclk_khz = new_clocks->socclk_khz;
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->dramclk_khz, clk_mgr->clks.dramclk_khz)) {
+ clk_mgr->clks.dramclk_khz = new_clocks->dramclk_khz;
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->clks.dppclk_khz)) {
+ clk_mgr->clks.dppclk_khz = new_clocks->dppclk_khz;
+ }
+
+ if (should_set_clock(safe_to_lower, fclk_adj, clk_mgr->clks.fclk_khz)) {
+ clk_mgr->clks.fclk_khz = fclk_adj;
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr->clks.dispclk_khz)) {
+ clk_mgr->clks.dispclk_khz = new_clocks->dispclk_khz;
+ }
+
+ /* Both fclk and ref_dppclk run on the same scemi clock.
+ * So take the higher value since the DPP DTO is typically programmed
+ * such that max dppclk is 1:1 with ref_dppclk.
+ */
+ if (clk_mgr->clks.fclk_khz > clk_mgr->clks.dppclk_khz)
+ clk_mgr->clks.dppclk_khz = clk_mgr->clks.fclk_khz;
+ if (clk_mgr->clks.dppclk_khz > clk_mgr->clks.fclk_khz)
+ clk_mgr->clks.fclk_khz = clk_mgr->clks.dppclk_khz;
+
+ // Both fclk and ref_dppclk run on the same scemi clock.
+ clk_mgr_int->dccg->ref_dppclk = clk_mgr->clks.fclk_khz;
+
+ /* TODO: set dtbclk in correct place */
+ clk_mgr->clks.dtbclk_en = true;
+ dm_set_dcn_clocks(clk_mgr->ctx, &clk_mgr->clks);
+ dcn35_update_clocks_update_dpp_dto(clk_mgr_int, context, safe_to_lower);
+
+ dcn35_update_clocks_update_dtb_dto(clk_mgr_int, context, clk_mgr->clks.ref_dtbclk_khz);
+}
+
+static unsigned int dcn35_get_max_clock_khz(struct clk_mgr *clk_mgr_base, enum clk_type clk_type)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ unsigned int num_clk_levels;
+
+ switch (clk_type) {
+ case CLK_TYPE_DISPCLK:
+ num_clk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dispclk_levels;
+ return num_clk_levels ?
+ clk_mgr->base.bw_params->clk_table.entries[num_clk_levels - 1].dispclk_mhz * 1000 :
+ clk_mgr->base.boot_snapshot.dispclk;
+ case CLK_TYPE_DPPCLK:
+ num_clk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dppclk_levels;
+ return num_clk_levels ?
+ clk_mgr->base.bw_params->clk_table.entries[num_clk_levels - 1].dppclk_mhz * 1000 :
+ clk_mgr->base.boot_snapshot.dppclk;
+ case CLK_TYPE_DSCCLK:
+ num_clk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dispclk_levels;
+ return num_clk_levels ?
+ clk_mgr->base.bw_params->clk_table.entries[num_clk_levels - 1].dispclk_mhz * 1000 / 3 :
+ clk_mgr->base.boot_snapshot.dispclk / 3;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static struct clk_mgr_funcs dcn35_funcs = {
+ .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
+ .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz,
+ .update_clocks = dcn35_update_clocks,
+ .init_clocks = dcn35_init_clocks,
+ .enable_pme_wa = dcn35_enable_pme_wa,
+ .are_clock_states_equal = dcn35_are_clock_states_equal,
+ .notify_wm_ranges = dcn35_notify_wm_ranges,
+ .set_low_power_state = dcn35_set_low_power_state,
+ .exit_low_power_state = dcn35_exit_low_power_state,
+ .is_ips_supported = dcn35_is_ips_supported,
+ .get_max_clock_khz = dcn35_get_max_clock_khz,
+};
+
+struct clk_mgr_funcs dcn35_fpga_funcs = {
+ .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
+ .update_clocks = dcn35_update_clocks_fpga,
+ .init_clocks = dcn35_init_clocks_fpga,
+ .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz,
+};
+
+static void translate_to_DpmClocks_t_dcn35(struct dcn351_smu_dpm_clks *smu_dpm_clks_a,
+ struct dcn35_smu_dpm_clks *smu_dpm_clks_b)
+{
+ /*translate two structures and only take need clock tables*/
+ uint8_t i;
+
+ if (smu_dpm_clks_a == NULL || smu_dpm_clks_b == NULL ||
+ smu_dpm_clks_a->dpm_clks == NULL || smu_dpm_clks_b->dpm_clks == NULL)
+ return;
+
+ for (i = 0; i < NUM_DCFCLK_DPM_LEVELS; i++)
+ smu_dpm_clks_b->dpm_clks->DcfClocks[i] = smu_dpm_clks_a->dpm_clks->DcfClocks[i];
+
+ for (i = 0; i < NUM_DISPCLK_DPM_LEVELS; i++)
+ smu_dpm_clks_b->dpm_clks->DispClocks[i] = smu_dpm_clks_a->dpm_clks->DispClocks[i];
+
+ for (i = 0; i < NUM_DPPCLK_DPM_LEVELS; i++)
+ smu_dpm_clks_b->dpm_clks->DppClocks[i] = smu_dpm_clks_a->dpm_clks->DppClocks[i];
+
+ for (i = 0; i < NUM_FCLK_DPM_LEVELS; i++) {
+ smu_dpm_clks_b->dpm_clks->FclkClocks_Freq[i] = smu_dpm_clks_a->dpm_clks->FclkClocks_Freq[i];
+ smu_dpm_clks_b->dpm_clks->FclkClocks_Voltage[i] = smu_dpm_clks_a->dpm_clks->FclkClocks_Voltage[i];
+ }
+ for (i = 0; i < NUM_MEM_PSTATE_LEVELS; i++) {
+ smu_dpm_clks_b->dpm_clks->MemPstateTable[i].MemClk =
+ smu_dpm_clks_a->dpm_clks->MemPstateTable[i].MemClk;
+ smu_dpm_clks_b->dpm_clks->MemPstateTable[i].UClk =
+ smu_dpm_clks_a->dpm_clks->MemPstateTable[i].UClk;
+ smu_dpm_clks_b->dpm_clks->MemPstateTable[i].Voltage =
+ smu_dpm_clks_a->dpm_clks->MemPstateTable[i].Voltage;
+ smu_dpm_clks_b->dpm_clks->MemPstateTable[i].WckRatio =
+ smu_dpm_clks_a->dpm_clks->MemPstateTable[i].WckRatio;
+ }
+ smu_dpm_clks_b->dpm_clks->MaxGfxClk = smu_dpm_clks_a->dpm_clks->MaxGfxClk;
+ smu_dpm_clks_b->dpm_clks->MinGfxClk = smu_dpm_clks_a->dpm_clks->MinGfxClk;
+ smu_dpm_clks_b->dpm_clks->NumDcfClkLevelsEnabled =
+ smu_dpm_clks_a->dpm_clks->NumDcfClkLevelsEnabled;
+ smu_dpm_clks_b->dpm_clks->NumDispClkLevelsEnabled =
+ smu_dpm_clks_a->dpm_clks->NumDispClkLevelsEnabled;
+ smu_dpm_clks_b->dpm_clks->NumFclkLevelsEnabled =
+ smu_dpm_clks_a->dpm_clks->NumFclkLevelsEnabled;
+ smu_dpm_clks_b->dpm_clks->NumMemPstatesEnabled =
+ smu_dpm_clks_a->dpm_clks->NumMemPstatesEnabled;
+ smu_dpm_clks_b->dpm_clks->NumSocClkLevelsEnabled =
+ smu_dpm_clks_a->dpm_clks->NumSocClkLevelsEnabled;
+
+ for (i = 0; i < NUM_SOC_VOLTAGE_LEVELS; i++) {
+ smu_dpm_clks_b->dpm_clks->SocClocks[i] = smu_dpm_clks_a->dpm_clks->SocClocks[i];
+ smu_dpm_clks_b->dpm_clks->SocVoltage[i] = smu_dpm_clks_a->dpm_clks->SocVoltage[i];
+ }
+}
+void dcn35_clk_mgr_construct(
+ struct dc_context *ctx,
+ struct clk_mgr_dcn35 *clk_mgr,
+ struct pp_smu_funcs *pp_smu,
+ struct dccg *dccg)
+{
+ struct dcn35_smu_dpm_clks smu_dpm_clks = { 0 };
+ struct dcn351_smu_dpm_clks smu_dpm_clks_dcn351 = { 0 };
+ clk_mgr->base.base.ctx = ctx;
+ clk_mgr->base.base.funcs = &dcn35_funcs;
+
+ clk_mgr->base.pp_smu = pp_smu;
+
+ clk_mgr->base.dccg = dccg;
+ clk_mgr->base.dfs_bypass_disp_clk = 0;
+
+ clk_mgr->base.dprefclk_ss_percentage = 0;
+ clk_mgr->base.dprefclk_ss_divider = 1000;
+ clk_mgr->base.ss_on_dprefclk = false;
+ clk_mgr->base.dfs_ref_freq_khz = 48000;
+ if (ctx->dce_version != DCN_VERSION_3_51) {
+ clk_mgr->base.regs = &clk_mgr_regs_dcn35;
+ clk_mgr->base.clk_mgr_shift = &clk_mgr_shift_dcn35;
+ clk_mgr->base.clk_mgr_mask = &clk_mgr_mask_dcn35;
+ }
+
+
+ clk_mgr->smu_wm_set.wm_set = (struct dcn35_watermarks *)dm_helpers_allocate_gpu_mem(
+ clk_mgr->base.base.ctx,
+ DC_MEM_ALLOC_TYPE_GART,
+ sizeof(struct dcn35_watermarks),
+ &clk_mgr->smu_wm_set.mc_address.quad_part);
+
+ if (!clk_mgr->smu_wm_set.wm_set) {
+ clk_mgr->smu_wm_set.wm_set = &dummy_wms;
+ clk_mgr->smu_wm_set.mc_address.quad_part = 0;
+ }
+ ASSERT(clk_mgr->smu_wm_set.wm_set);
+
+ smu_dpm_clks.dpm_clks = (DpmClocks_t_dcn35 *)dm_helpers_allocate_gpu_mem(
+ clk_mgr->base.base.ctx,
+ DC_MEM_ALLOC_TYPE_GART,
+ sizeof(DpmClocks_t_dcn35),
+ &smu_dpm_clks.mc_address.quad_part);
+ if (smu_dpm_clks.dpm_clks == NULL) {
+ smu_dpm_clks.dpm_clks = &dummy_clocks;
+ smu_dpm_clks.mc_address.quad_part = 0;
+ }
+ ASSERT(smu_dpm_clks.dpm_clks);
+
+ if (ctx->dce_version == DCN_VERSION_3_51) {
+ smu_dpm_clks_dcn351.dpm_clks = (DpmClocks_t_dcn351 *)dm_helpers_allocate_gpu_mem(
+ clk_mgr->base.base.ctx,
+ DC_MEM_ALLOC_TYPE_GART,
+ sizeof(DpmClocks_t_dcn351),
+ &smu_dpm_clks_dcn351.mc_address.quad_part);
+ if (smu_dpm_clks_dcn351.dpm_clks == NULL) {
+ smu_dpm_clks_dcn351.dpm_clks = &dummy_clocks_dcn351;
+ smu_dpm_clks_dcn351.mc_address.quad_part = 0;
+ }
+ }
+
+ clk_mgr->base.smu_ver = dcn35_smu_get_smu_version(&clk_mgr->base);
+
+ if (clk_mgr->base.smu_ver)
+ clk_mgr->base.smu_present = true;
+
+ /* TODO: Check we get what we expect during bringup */
+ clk_mgr->base.base.dentist_vco_freq_khz = get_vco_frequency_from_reg(&clk_mgr->base);
+
+ if (ctx->dc_bios->integrated_info->memory_type == LpDdr5MemType) {
+ dcn35_bw_params.wm_table = lpddr5_wm_table;
+ } else {
+ dcn35_bw_params.wm_table = ddr5_wm_table;
+ }
+ /* Saved clocks configured at boot for debug purposes */
+ dcn35_save_clk_registers(&clk_mgr->base.base.boot_snapshot, clk_mgr);
+
+ clk_mgr->base.base.dprefclk_khz = dcn35_smu_get_dprefclk(&clk_mgr->base);
+ clk_mgr->base.base.clks.ref_dtbclk_khz = 600000;
+
+ dce_clock_read_ss_info(&clk_mgr->base);
+ /*when clk src is from FCH, it could have ss, same clock src as DPREF clk*/
+
+ dcn35_read_ss_info_from_lut(&clk_mgr->base);
+
+ clk_mgr->base.base.bw_params = &dcn35_bw_params;
+
+ if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) {
+ int i;
+ if (ctx->dce_version == DCN_VERSION_3_51) {
+ dcn351_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks_dcn351);
+ translate_to_DpmClocks_t_dcn35(&smu_dpm_clks_dcn351, &smu_dpm_clks);
+ } else
+ dcn35_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks);
+ DC_LOG_SMU("NumDcfClkLevelsEnabled: %d\n"
+ "NumDispClkLevelsEnabled: %d\n"
+ "NumSocClkLevelsEnabled: %d\n"
+ "VcnClkLevelsEnabled: %d\n"
+ "FClkLevelsEnabled: %d\n"
+ "NumMemPstatesEnabled: %d\n"
+ "MinGfxClk: %d\n"
+ "MaxGfxClk: %d\n",
+ smu_dpm_clks.dpm_clks->NumDcfClkLevelsEnabled,
+ smu_dpm_clks.dpm_clks->NumDispClkLevelsEnabled,
+ smu_dpm_clks.dpm_clks->NumSocClkLevelsEnabled,
+ smu_dpm_clks.dpm_clks->VcnClkLevelsEnabled,
+ smu_dpm_clks.dpm_clks->NumFclkLevelsEnabled,
+ smu_dpm_clks.dpm_clks->NumMemPstatesEnabled,
+ smu_dpm_clks.dpm_clks->MinGfxClk,
+ smu_dpm_clks.dpm_clks->MaxGfxClk);
+ for (i = 0; i < smu_dpm_clks.dpm_clks->NumDcfClkLevelsEnabled; i++) {
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks->DcfClocks[%d] = %d\n",
+ i,
+ smu_dpm_clks.dpm_clks->DcfClocks[i]);
+ }
+ for (i = 0; i < smu_dpm_clks.dpm_clks->NumDispClkLevelsEnabled; i++) {
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks->DispClocks[%d] = %d\n",
+ i, smu_dpm_clks.dpm_clks->DispClocks[i]);
+ }
+ for (i = 0; i < smu_dpm_clks.dpm_clks->NumSocClkLevelsEnabled; i++) {
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks->SocClocks[%d] = %d\n",
+ i, smu_dpm_clks.dpm_clks->SocClocks[i]);
+ }
+ for (i = 0; i < smu_dpm_clks.dpm_clks->NumFclkLevelsEnabled; i++) {
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks->FclkClocks_Freq[%d] = %d\n",
+ i, smu_dpm_clks.dpm_clks->FclkClocks_Freq[i]);
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks->FclkClocks_Voltage[%d] = %d\n",
+ i, smu_dpm_clks.dpm_clks->FclkClocks_Voltage[i]);
+ }
+ for (i = 0; i < smu_dpm_clks.dpm_clks->NumSocClkLevelsEnabled; i++)
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks->SocVoltage[%d] = %d\n",
+ i, smu_dpm_clks.dpm_clks->SocVoltage[i]);
+
+ for (i = 0; i < smu_dpm_clks.dpm_clks->NumMemPstatesEnabled; i++) {
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks.MemPstateTable[%d].UClk = %d\n"
+ "smu_dpm_clks.dpm_clks->MemPstateTable[%d].MemClk= %d\n"
+ "smu_dpm_clks.dpm_clks->MemPstateTable[%d].Voltage = %d\n",
+ i, smu_dpm_clks.dpm_clks->MemPstateTable[i].UClk,
+ i, smu_dpm_clks.dpm_clks->MemPstateTable[i].MemClk,
+ i, smu_dpm_clks.dpm_clks->MemPstateTable[i].Voltage);
+ }
+
+ if (ctx->dc_bios->integrated_info && ctx->dc->config.use_default_clock_table == false) {
+ dcn35_clk_mgr_helper_populate_bw_params(
+ &clk_mgr->base,
+ ctx->dc_bios->integrated_info,
+ smu_dpm_clks.dpm_clks);
+ }
+ }
+
+ if (smu_dpm_clks.dpm_clks && smu_dpm_clks.mc_address.quad_part != 0)
+ dm_helpers_free_gpu_mem(clk_mgr->base.base.ctx, DC_MEM_ALLOC_TYPE_GART,
+ smu_dpm_clks.dpm_clks);
+
+ if (smu_dpm_clks_dcn351.dpm_clks && smu_dpm_clks_dcn351.mc_address.quad_part != 0)
+ dm_helpers_free_gpu_mem(clk_mgr->base.base.ctx, DC_MEM_ALLOC_TYPE_GART,
+ smu_dpm_clks_dcn351.dpm_clks);
+
+ if (ctx->dc->config.disable_ips != DMUB_IPS_DISABLE_ALL) {
+ bool ips_support = false;
+
+ /*avoid call pmfw at init*/
+ ips_support = dcn35_smu_get_ips_supported(&clk_mgr->base);
+ if (ips_support) {
+ ctx->dc->debug.ignore_pg = false;
+ ctx->dc->debug.disable_dpp_power_gate = false;
+ ctx->dc->debug.disable_hubp_power_gate = false;
+ ctx->dc->debug.disable_dsc_power_gate = false;
+
+ /* Disable dynamic IPS2 in older PMFW (93.12) for Z8 interop. */
+ if (ctx->dc->config.disable_ips == DMUB_IPS_ENABLE &&
+ ctx->dce_version != DCN_VERSION_3_51 &&
+ ((clk_mgr->base.smu_ver & 0x00FFFFFF) <= 0x005d0c00))
+ ctx->dc->config.disable_ips = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF;
+ } else {
+ /*let's reset the config control flag*/
+ ctx->dc->config.disable_ips = DMUB_IPS_DISABLE_ALL; /*pmfw not support it, disable it all*/
+ }
+ }
+}
+
+void dcn35_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int)
+{
+ struct clk_mgr_dcn35 *clk_mgr = TO_CLK_MGR_DCN35(clk_mgr_int);
+
+ if (clk_mgr->smu_wm_set.wm_set && clk_mgr->smu_wm_set.mc_address.quad_part != 0)
+ dm_helpers_free_gpu_mem(clk_mgr_int->base.ctx, DC_MEM_ALLOC_TYPE_FRAME_BUFFER,
+ clk_mgr->smu_wm_set.wm_set);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h
new file mode 100644
index 000000000000..a12a9bf90806
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN35_CLK_MGR_H__
+#define __DCN35_CLK_MGR_H__
+#include "clk_mgr_internal.h"
+
+#define NUM_CLOCK_SOURCES 5
+
+struct dcn35_watermarks;
+
+struct dcn35_smu_watermark_set {
+ struct dcn35_watermarks *wm_set;
+ union large_integer mc_address;
+};
+
+struct dcn35_ss_info_table {
+ uint32_t ss_divider;
+ uint32_t ss_percentage[NUM_CLOCK_SOURCES];
+};
+
+struct clk_mgr_dcn35 {
+ struct clk_mgr_internal base;
+ struct dcn35_smu_watermark_set smu_wm_set;
+};
+
+bool dcn35_are_clock_states_equal(struct dc_clocks *a,
+ struct dc_clocks *b);
+void dcn35_init_clocks(struct clk_mgr *clk_mgr);
+void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
+ struct dc_state *context,
+ bool safe_to_lower);
+
+void dcn35_clk_mgr_construct(struct dc_context *ctx,
+ struct clk_mgr_dcn35 *clk_mgr,
+ struct pp_smu_funcs *pp_smu,
+ struct dccg *dccg);
+
+void dcn35_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int);
+
+void dcn351_clk_mgr_construct(struct dc_context *ctx,
+ struct clk_mgr_dcn35 *clk_mgr,
+ struct pp_smu_funcs *pp_smu,
+ struct dccg *dccg);
+#endif //__DCN35_CLK_MGR_H__
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c
new file mode 100644
index 000000000000..604d256cb47a
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c
@@ -0,0 +1,508 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+
+
+#include "core_types.h"
+#include "clk_mgr_internal.h"
+#include "reg_helper.h"
+#include "dm_helpers.h"
+#include "dcn35_smu.h"
+
+#include "mp/mp_14_0_0_offset.h"
+#include "mp/mp_14_0_0_sh_mask.h"
+
+/* TODO: Use the real headers when they're correct */
+#define MP1_BASE__INST0_SEG0 0x00016000
+#define MP1_BASE__INST0_SEG1 0x0243FC00
+#define MP1_BASE__INST0_SEG2 0x00DC0000
+#define MP1_BASE__INST0_SEG3 0x00E00000
+#define MP1_BASE__INST0_SEG4 0x00E40000
+#define MP1_BASE__INST0_SEG5 0
+
+#ifdef BASE_INNER
+#undef BASE_INNER
+#endif
+
+#define BASE_INNER(seg) MP1_BASE__INST0_SEG ## seg
+
+#define BASE(seg) BASE_INNER(seg)
+
+#define REG(reg_name) (BASE(reg##reg_name##_BASE_IDX) + reg##reg_name)
+
+#define FN(reg_name, field) \
+ FD(reg_name##__##field)
+
+#include "logger_types.h"
+#undef DC_LOGGER
+#define DC_LOGGER \
+ CTX->logger
+#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
+
+#define VBIOSSMC_MSG_TestMessage 0x1
+#define VBIOSSMC_MSG_GetSmuVersion 0x2
+#define VBIOSSMC_MSG_PowerUpGfx 0x3
+#define VBIOSSMC_MSG_SetDispclkFreq 0x4
+#define VBIOSSMC_MSG_SetDprefclkFreq 0x5 //Not used. DPRef is constant
+#define VBIOSSMC_MSG_SetDppclkFreq 0x6
+#define VBIOSSMC_MSG_SetHardMinDcfclkByFreq 0x7
+#define VBIOSSMC_MSG_SetMinDeepSleepDcfclk 0x8
+#define VBIOSSMC_MSG_SetPhyclkVoltageByFreq 0x9 //Keep it in case VMIN dees not support phy clk
+#define VBIOSSMC_MSG_GetFclkFrequency 0xA
+#define VBIOSSMC_MSG_SetDisplayCount 0xB //Not used anymore
+#define VBIOSSMC_MSG_EnableTmdp48MHzRefclkPwrDown 0xC //To ask PMFW turn off TMDP 48MHz refclk during display off to save power
+#define VBIOSSMC_MSG_UpdatePmeRestore 0xD
+#define VBIOSSMC_MSG_SetVbiosDramAddrHigh 0xE //Used for WM table txfr
+#define VBIOSSMC_MSG_SetVbiosDramAddrLow 0xF
+#define VBIOSSMC_MSG_TransferTableSmu2Dram 0x10
+#define VBIOSSMC_MSG_TransferTableDram2Smu 0x11
+#define VBIOSSMC_MSG_SetDisplayIdleOptimizations 0x12
+#define VBIOSSMC_MSG_GetDprefclkFreq 0x13
+#define VBIOSSMC_MSG_GetDtbclkFreq 0x14
+#define VBIOSSMC_MSG_AllowZstatesEntry 0x15
+#define VBIOSSMC_MSG_DisallowZstatesEntry 0x16
+#define VBIOSSMC_MSG_SetDtbClk 0x17
+#define VBIOSSMC_MSG_DispIPS2Entry 0x18 ///< Display IPS2 entry, DMU
+#define VBIOSSMC_MSG_DispIPS2Exit 0x19 ///< Display IPS2 exit, DMU
+#define VBIOSSMC_MSG_DisableLSdma 0x1A ///< Disable LSDMA; only sent by VBIOS
+#define VBIOSSMC_MSG_DpControllerPhyStatus 0x1B ///< Inform PMFW about the pre conditions for turning SLDO2 on/off . bit[0]==1 precondition is met, bit[1-2] are for DPPHY number
+#define VBIOSSMC_MSG_QueryIPS2Support 0x1C ///< Return 1: support; else not supported
+#define VBIOSSMC_MSG_NotifyHostRouterBW 0x1D
+#define VBIOSSMC_Message_Count 0x1E
+
+#define VBIOSSMC_Status_BUSY 0x0
+#define VBIOSSMC_Result_OK 0x1
+#define VBIOSSMC_Result_Failed 0xFF
+#define VBIOSSMC_Result_UnknownCmd 0xFE
+#define VBIOSSMC_Result_CmdRejectedPrereq 0xFD
+#define VBIOSSMC_Result_CmdRejectedBusy 0xFC
+
+union dcn35_dpia_host_router_bw {
+ struct {
+ uint32_t hr_id : 16;
+ uint32_t bw_mbps : 16;
+ } bits;
+ uint32_t all;
+};
+
+/*
+ * Function to be used instead of REG_WAIT macro because the wait ends when
+ * the register is NOT EQUAL to zero, and because `the translation in msg_if.h
+ * won't work with REG_WAIT.
+ */
+static uint32_t dcn35_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, unsigned int delay_us, unsigned int max_retries)
+{
+ uint32_t res_val = VBIOSSMC_Status_BUSY;
+
+ do {
+ res_val = REG_READ(MP1_SMN_C2PMSG_91);
+ if (res_val != VBIOSSMC_Status_BUSY)
+ break;
+
+ if (delay_us >= 1000)
+ msleep(delay_us/1000);
+ else if (delay_us > 0)
+ udelay(delay_us);
+
+ if (clk_mgr->base.ctx->dc->debug.disable_timeout)
+ max_retries++;
+ } while (max_retries--);
+
+ return res_val;
+}
+
+static int dcn35_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
+ unsigned int msg_id,
+ unsigned int param)
+{
+ uint32_t result;
+
+ result = dcn35_smu_wait_for_response(clk_mgr, 10, 2000000);
+ ASSERT(result == VBIOSSMC_Result_OK);
+
+ if (result != VBIOSSMC_Result_OK) {
+ DC_LOG_WARNING("SMU response after wait: %d, msg id = %d\n", result, msg_id);
+
+ if (result == VBIOSSMC_Status_BUSY)
+ return -1;
+ }
+
+ /* First clear response register */
+ REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Status_BUSY);
+
+ /* Set the parameter register for the SMU message, unit is Mhz */
+ REG_WRITE(MP1_SMN_C2PMSG_83, param);
+
+ /* Trigger the message transaction by writing the message ID */
+ REG_WRITE(MP1_SMN_C2PMSG_67, msg_id);
+
+ result = dcn35_smu_wait_for_response(clk_mgr, 10, 2000000);
+
+ if (result == VBIOSSMC_Result_Failed) {
+ if (msg_id == VBIOSSMC_MSG_TransferTableDram2Smu &&
+ param == TABLE_WATERMARKS)
+ DC_LOG_WARNING("Watermarks table not configured properly by SMU");
+ else
+ ASSERT(0);
+ REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Result_OK);
+ DC_LOG_WARNING("SMU response after wait: %d, msg id = %d\n", result, msg_id);
+ return -1;
+ }
+
+ if (IS_SMU_TIMEOUT(result)) {
+ ASSERT(0);
+ result = dcn35_smu_wait_for_response(clk_mgr, 10, 2000000);
+ //dm_helpers_smu_timeout(CTX, msg_id, param, 10 * 200000);
+ DC_LOG_WARNING("SMU response after wait: %d, msg id = %d\n", result, msg_id);
+ }
+
+ return REG_READ(MP1_SMN_C2PMSG_83);
+}
+
+int dcn35_smu_get_smu_version(struct clk_mgr_internal *clk_mgr)
+{
+ return dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_GetSmuVersion,
+ 0);
+}
+
+
+int dcn35_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispclk_khz)
+{
+ int actual_dispclk_set_mhz = -1;
+
+ if (!clk_mgr->smu_present)
+ return requested_dispclk_khz;
+
+ /* Unit of SMU msg parameter is Mhz */
+ actual_dispclk_set_mhz = dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDispclkFreq,
+ khz_to_mhz_ceil(requested_dispclk_khz));
+
+ smu_print("requested_dispclk_khz = %d, actual_dispclk_set_mhz: %d\n", requested_dispclk_khz, actual_dispclk_set_mhz);
+ return actual_dispclk_set_mhz * 1000;
+}
+
+int dcn35_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr)
+{
+ int actual_dprefclk_set_mhz = -1;
+
+ if (!clk_mgr->smu_present)
+ return clk_mgr->base.dprefclk_khz;
+
+ actual_dprefclk_set_mhz = dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDprefclkFreq,
+ khz_to_mhz_ceil(clk_mgr->base.dprefclk_khz));
+
+ /* TODO: add code for programing DP DTO, currently this is down by command table */
+
+ return actual_dprefclk_set_mhz * 1000;
+}
+
+int dcn35_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_dcfclk_khz)
+{
+ int actual_dcfclk_set_mhz = -1;
+
+ if (!clk_mgr->smu_present)
+ return requested_dcfclk_khz;
+
+ actual_dcfclk_set_mhz = dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetHardMinDcfclkByFreq,
+ khz_to_mhz_ceil(requested_dcfclk_khz));
+
+ smu_print("requested_dcfclk_khz = %d, actual_dcfclk_set_mhz: %d\n", requested_dcfclk_khz, actual_dcfclk_set_mhz);
+
+ return actual_dcfclk_set_mhz * 1000;
+}
+
+int dcn35_smu_set_min_deep_sleep_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_min_ds_dcfclk_khz)
+{
+ int actual_min_ds_dcfclk_mhz = -1;
+
+ if (!clk_mgr->smu_present)
+ return requested_min_ds_dcfclk_khz;
+
+ actual_min_ds_dcfclk_mhz = dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetMinDeepSleepDcfclk,
+ khz_to_mhz_ceil(requested_min_ds_dcfclk_khz));
+
+ smu_print("requested_min_ds_dcfclk_khz = %d, actual_min_ds_dcfclk_mhz: %d\n", requested_min_ds_dcfclk_khz, actual_min_ds_dcfclk_mhz);
+
+ return actual_min_ds_dcfclk_mhz * 1000;
+}
+
+int dcn35_smu_set_dppclk(struct clk_mgr_internal *clk_mgr, int requested_dpp_khz)
+{
+ int actual_dppclk_set_mhz = -1;
+
+ if (!clk_mgr->smu_present)
+ return requested_dpp_khz;
+
+ actual_dppclk_set_mhz = dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDppclkFreq,
+ khz_to_mhz_ceil(requested_dpp_khz));
+
+ smu_print("requested_dpp_khz = %d, actual_dppclk_set_mhz: %d\n", requested_dpp_khz, actual_dppclk_set_mhz);
+
+ return actual_dppclk_set_mhz * 1000;
+}
+
+void dcn35_smu_set_display_idle_optimization(struct clk_mgr_internal *clk_mgr, uint32_t idle_info)
+{
+ if (!clk_mgr->base.ctx->dc->debug.pstate_enabled)
+ return;
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ //TODO: Work with smu team to define optimization options.
+ dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDisplayIdleOptimizations,
+ idle_info);
+ smu_print("%s: VBIOSSMC_MSG_SetDisplayIdleOptimizations idle_info = %x\n", __func__, idle_info);
+}
+
+void dcn35_smu_enable_phy_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable)
+{
+ union display_idle_optimization_u idle_info = { 0 };
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ if (enable) {
+ idle_info.idle_info.df_request_disabled = 1;
+ idle_info.idle_info.phy_ref_clk_off = 1;
+ }
+
+ dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDisplayIdleOptimizations,
+ idle_info.data);
+ smu_print("%s smu_enable_phy_refclk_pwrdwn = %d\n", __func__, enable ? 1 : 0);
+}
+
+void dcn35_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_UpdatePmeRestore,
+ 0);
+ smu_print("%s: SMC_MSG_UpdatePmeRestore\n", __func__);
+}
+
+void dcn35_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn35_smu_send_msg_with_param(clk_mgr,
+ VBIOSSMC_MSG_SetVbiosDramAddrHigh, addr_high);
+}
+
+void dcn35_smu_set_dram_addr_low(struct clk_mgr_internal *clk_mgr, uint32_t addr_low)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn35_smu_send_msg_with_param(clk_mgr,
+ VBIOSSMC_MSG_SetVbiosDramAddrLow, addr_low);
+}
+
+void dcn35_smu_transfer_dpm_table_smu_2_dram(struct clk_mgr_internal *clk_mgr)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn35_smu_send_msg_with_param(clk_mgr,
+ VBIOSSMC_MSG_TransferTableSmu2Dram, TABLE_DPMCLOCKS);
+}
+
+void dcn35_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn35_smu_send_msg_with_param(clk_mgr,
+ VBIOSSMC_MSG_TransferTableDram2Smu, TABLE_WATERMARKS);
+}
+
+void dcn35_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zstate_support_state support)
+{
+ unsigned int msg_id, param, retv;
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ switch (support) {
+
+ case DCN_ZSTATE_SUPPORT_ALLOW:
+ msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
+ param = (1 << 10) | (1 << 9) | (1 << 8);
+ smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW, param = 0x%x\n", __func__, param);
+ break;
+
+ case DCN_ZSTATE_SUPPORT_DISALLOW:
+ msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
+ param = 0;
+ smu_print("%s: SMC_MSG_AllowZstatesEntry msg_id = DISALLOW, param = 0x%x\n", __func__, param);
+ break;
+
+
+ case DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY:
+ msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
+ param = (1 << 10);
+ smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW_Z10_ONLY, param = 0x%x\n", __func__, param);
+ break;
+
+ case DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY:
+ msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
+ param = (1 << 10) | (1 << 8);
+ smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW_Z8_Z10_ONLY, param = 0x%x\n", __func__, param);
+ break;
+
+ case DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY:
+ msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
+ param = (1 << 8);
+ smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW_Z8_ONLY, param = 0x%x\n", __func__, param);
+ break;
+
+ default: //DCN_ZSTATE_SUPPORT_UNKNOWN
+ msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
+ param = 0;
+ break;
+ }
+
+
+ retv = dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ msg_id,
+ param);
+ smu_print("%s: msg_id = %d, param = 0x%x, return = 0x%x\n", __func__, msg_id, param, retv);
+}
+
+int dcn35_smu_get_dprefclk(struct clk_mgr_internal *clk_mgr)
+{
+ int dprefclk;
+
+ if (!clk_mgr->smu_present)
+ return 0;
+
+ dprefclk = dcn35_smu_send_msg_with_param(clk_mgr,
+ VBIOSSMC_MSG_GetDprefclkFreq,
+ 0);
+
+ smu_print("%s: SMU DPREF clk = %d mhz\n", __func__, dprefclk);
+ return dprefclk * 1000;
+}
+
+int dcn35_smu_get_dtbclk(struct clk_mgr_internal *clk_mgr)
+{
+ int dtbclk;
+
+ if (!clk_mgr->smu_present)
+ return 0;
+
+ dtbclk = dcn35_smu_send_msg_with_param(clk_mgr,
+ VBIOSSMC_MSG_GetDtbclkFreq,
+ 0);
+
+ smu_print("%s: get_dtbclk = %dmhz\n", __func__, dtbclk);
+ return dtbclk * 1000;
+}
+/* Arg = 1: Turn DTB on; 0: Turn DTB CLK OFF. when it is on, it is 600MHZ */
+void dcn35_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDtbClk,
+ enable);
+ smu_print("%s: smu_set_dtbclk = %d\n", __func__, enable ? 1 : 0);
+}
+
+void dcn35_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_EnableTmdp48MHzRefclkPwrDown,
+ enable);
+ smu_print("%s: smu_enable_48mhz_tmdp_refclk_pwrdwn = %d\n", __func__, enable ? 1 : 0);
+}
+
+int dcn35_smu_exit_low_power_state(struct clk_mgr_internal *clk_mgr)
+{
+ int retv;
+
+ if (!clk_mgr->smu_present)
+ return 0;
+
+ retv = dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_DispIPS2Exit,
+ 0);
+ smu_print("%s: smu_exit_low_power_state return = %d\n", __func__, retv);
+ return retv;
+}
+
+int dcn35_smu_get_ips_supported(struct clk_mgr_internal *clk_mgr)
+{
+ int retv;
+
+ if (!clk_mgr->smu_present)
+ return 0;
+
+ retv = dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_QueryIPS2Support,
+ 0);
+
+ //smu_print("%s: VBIOSSMC_MSG_QueryIPS2Support return = %x\n", __func__, retv);
+ return retv;
+}
+
+void dcn35_smu_notify_host_router_bw(struct clk_mgr_internal *clk_mgr, uint32_t hr_id, uint32_t bw_kbps)
+{
+ union dcn35_dpia_host_router_bw msg_data = { 0 };
+
+ msg_data.bits.hr_id = hr_id;
+ msg_data.bits.bw_mbps = bw_kbps / 1000;
+
+ dcn35_smu_send_msg_with_param(clk_mgr, VBIOSSMC_MSG_NotifyHostRouterBW, msg_data.all);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.h
new file mode 100644
index 000000000000..ab9d21ba0c43
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.h
@@ -0,0 +1,220 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef DAL_DC_35_SMU_H_
+#define DAL_DC_35_SMU_H_
+
+#include "os_types.h"
+
+#ifndef PMFW_DRIVER_IF_H
+#define PMFW_DRIVER_IF_H
+#define PMFW_DRIVER_IF_VERSION 4
+
+typedef enum {
+ DSPCLK_DCFCLK = 0,
+ DSPCLK_DISPCLK,
+ DSPCLK_PIXCLK,
+ DSPCLK_PHYCLK,
+ DSPCLK_COUNT,
+} DSPCLK_e;
+
+typedef struct {
+ uint16_t Freq; // in MHz
+ uint16_t Vid; // min voltage in SVI3 VID
+} DisplayClockTable_t;
+
+typedef struct {
+ uint16_t MinClock; // This is either DCFCLK or SOCCLK (in MHz)
+ uint16_t MaxClock; // This is either DCFCLK or SOCCLK (in MHz)
+ uint16_t MinMclk;
+ uint16_t MaxMclk;
+
+ uint8_t WmSetting;
+ uint8_t WmType; // Used for normal pstate change or memory retraining
+ uint8_t Padding[2];
+} WatermarkRowGeneric_t;
+
+#define NUM_WM_RANGES 4
+#define WM_PSTATE_CHG 0
+#define WM_RETRAINING 1
+
+typedef enum {
+ WM_SOCCLK = 0,
+ WM_DCFCLK,
+ WM_COUNT,
+} WM_CLOCK_e;
+
+typedef struct {
+ // Watermarks
+ WatermarkRowGeneric_t WatermarkRow[WM_COUNT][NUM_WM_RANGES];
+
+ uint32_t MmHubPadding[7]; // SMU internal use
+} Watermarks_t;
+
+#define NUM_DCFCLK_DPM_LEVELS 8
+#define NUM_DISPCLK_DPM_LEVELS 8
+#define NUM_DPPCLK_DPM_LEVELS 8
+#define NUM_SOCCLK_DPM_LEVELS 8
+#define NUM_VCN_DPM_LEVELS 8
+#define NUM_SOC_VOLTAGE_LEVELS 8
+#define NUM_VPE_DPM_LEVELS 8
+#define NUM_FCLK_DPM_LEVELS 8
+#define NUM_MEM_PSTATE_LEVELS 4
+
+typedef enum{
+ WCK_RATIO_1_1 = 0, // DDR5, Wck:ck is always 1:1;
+ WCK_RATIO_1_2,
+ WCK_RATIO_1_4,
+ WCK_RATIO_MAX
+} WCK_RATIO_e;
+
+typedef struct {
+ uint32_t UClk;
+ uint32_t MemClk;
+ uint32_t Voltage;
+ uint8_t WckRatio;
+ uint8_t Spare[3];
+} MemPstateTable_t;
+
+//Freq in MHz
+//Voltage in milli volts with 2 fractional bits
+typedef struct {
+ uint32_t DcfClocks[NUM_DCFCLK_DPM_LEVELS];
+ uint32_t DispClocks[NUM_DISPCLK_DPM_LEVELS];
+ uint32_t DppClocks[NUM_DPPCLK_DPM_LEVELS];
+ uint32_t SocClocks[NUM_SOCCLK_DPM_LEVELS];
+ uint32_t VClocks[NUM_VCN_DPM_LEVELS];
+ uint32_t DClocks[NUM_VCN_DPM_LEVELS];
+ uint32_t VPEClocks[NUM_VPE_DPM_LEVELS];
+ uint32_t FclkClocks_Freq[NUM_FCLK_DPM_LEVELS];
+ uint32_t FclkClocks_Voltage[NUM_FCLK_DPM_LEVELS];
+ uint32_t SocVoltage[NUM_SOC_VOLTAGE_LEVELS];
+ MemPstateTable_t MemPstateTable[NUM_MEM_PSTATE_LEVELS];
+
+ uint8_t NumDcfClkLevelsEnabled;
+ uint8_t NumDispClkLevelsEnabled; //Applies to both Dispclk and Dppclk
+ uint8_t NumSocClkLevelsEnabled;
+ uint8_t VcnClkLevelsEnabled; //Applies to both Vclk and Dclk
+ uint8_t VpeClkLevelsEnabled;
+ uint8_t NumMemPstatesEnabled;
+ uint8_t NumFclkLevelsEnabled;
+ uint8_t spare[2];
+
+ uint32_t MinGfxClk;
+ uint32_t MaxGfxClk;
+} DpmClocks_t_dcn35;
+
+typedef struct {
+ uint32_t DcfClocks[NUM_DCFCLK_DPM_LEVELS];
+ uint32_t DispClocks[NUM_DISPCLK_DPM_LEVELS];
+ uint32_t DppClocks[NUM_DPPCLK_DPM_LEVELS];
+ uint32_t SocClocks[NUM_SOCCLK_DPM_LEVELS];
+ uint32_t VClocks0[NUM_VCN_DPM_LEVELS];
+ uint32_t VClocks1[NUM_VCN_DPM_LEVELS];
+ uint32_t DClocks0[NUM_VCN_DPM_LEVELS];
+ uint32_t DClocks1[NUM_VCN_DPM_LEVELS];
+ uint32_t VPEClocks[NUM_VPE_DPM_LEVELS];
+ uint32_t FclkClocks_Freq[NUM_FCLK_DPM_LEVELS];
+ uint32_t FclkClocks_Voltage[NUM_FCLK_DPM_LEVELS];
+ uint32_t SocVoltage[NUM_SOC_VOLTAGE_LEVELS];
+ MemPstateTable_t MemPstateTable[NUM_MEM_PSTATE_LEVELS];
+ uint8_t NumDcfClkLevelsEnabled;
+ uint8_t NumDispClkLevelsEnabled; // Applies to both Dispclk and Dppclk
+ uint8_t NumSocClkLevelsEnabled;
+ uint8_t Vcn0ClkLevelsEnabled; // Applies to both Vclk0 and Dclk0
+ uint8_t Vcn1ClkLevelsEnabled; // Applies to both Vclk1 and Dclk1
+ uint8_t VpeClkLevelsEnabled;
+ uint8_t NumMemPstatesEnabled;
+ uint8_t NumFclkLevelsEnabled;
+ uint32_t MinGfxClk;
+ uint32_t MaxGfxClk;
+} DpmClocks_t_dcn351;
+
+#define TABLE_BIOS_IF 0 // Called by BIOS
+#define TABLE_WATERMARKS 1 // Called by DAL through VBIOS
+#define TABLE_CUSTOM_DPM 2 // Called by Driver
+#define TABLE_SPARE1 3
+#define TABLE_DPMCLOCKS 4 // Called by Driver
+#define TABLE_MOMENTARY_PM 5 // Called by Tools
+#define TABLE_MODERN_STDBY 6 // Called by Tools for Modern Standby Log
+#define TABLE_SMU_METRICS 7 // Called by Driver
+#define TABLE_COUNT 8
+
+#endif
+
+struct dcn35_watermarks {
+ // Watermarks
+ WatermarkRowGeneric_t WatermarkRow[WM_COUNT][NUM_WM_RANGES];
+
+ uint32_t MmHubPadding[7]; // SMU internal use
+};
+
+struct dcn35_smu_dpm_clks {
+ DpmClocks_t_dcn35 *dpm_clks;
+ union large_integer mc_address;
+};
+
+struct dcn351_smu_dpm_clks {
+ DpmClocks_t_dcn351 *dpm_clks;
+ union large_integer mc_address;
+};
+/* TODO: taken from vgh, may not be correct */
+struct display_idle_optimization {
+ unsigned int df_request_disabled : 1;
+ unsigned int phy_ref_clk_off : 1;
+ unsigned int s0i2_rdy : 1;
+ unsigned int reserved : 29;
+};
+
+union display_idle_optimization_u {
+ struct display_idle_optimization idle_info;
+ uint32_t data;
+};
+
+int dcn35_smu_get_smu_version(struct clk_mgr_internal *clk_mgr);
+int dcn35_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispclk_khz);
+int dcn35_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr);
+int dcn35_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_dcfclk_khz);
+int dcn35_smu_set_min_deep_sleep_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_min_ds_dcfclk_khz);
+int dcn35_smu_set_dppclk(struct clk_mgr_internal *clk_mgr, int requested_dpp_khz);
+void dcn35_smu_set_display_idle_optimization(struct clk_mgr_internal *clk_mgr, uint32_t idle_info);
+void dcn35_smu_enable_phy_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable);
+void dcn35_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr);
+void dcn35_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high);
+void dcn35_smu_set_dram_addr_low(struct clk_mgr_internal *clk_mgr, uint32_t addr_low);
+void dcn35_smu_transfer_dpm_table_smu_2_dram(struct clk_mgr_internal *clk_mgr);
+void dcn35_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr);
+
+void dcn35_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zstate_support_state support);
+void dcn35_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable);
+void dcn35_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable);
+
+int dcn35_smu_exit_low_power_state(struct clk_mgr_internal *clk_mgr);
+int dcn35_smu_get_ips_supported(struct clk_mgr_internal *clk_mgr);
+int dcn35_smu_get_dtbclk(struct clk_mgr_internal *clk_mgr);
+int dcn35_smu_get_dprefclk(struct clk_mgr_internal *clk_mgr);
+void dcn35_smu_notify_host_router_bw(struct clk_mgr_internal *clk_mgr, uint32_t hr_id, uint32_t bw_kbps);
+
+#endif /* DAL_DC_35_SMU_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dalsmc.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dalsmc.h
new file mode 100644
index 000000000000..2e0d34fd7512
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dalsmc.h
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef DALSMC_H
+#define DALSMC_H
+
+#define DALSMC_VERSION 0x1
+
+// SMU Response Codes:
+#define DALSMC_Result_OK 0x1
+#define DALSMC_Result_Failed 0xFF
+#define DALSMC_Result_UnknownCmd 0xFE
+#define DALSMC_Result_CmdRejectedPrereq 0xFD
+#define DALSMC_Result_CmdRejectedBusy 0xFC
+
+
+
+// Message Definitions:
+#define DALSMC_MSG_TestMessage 0x1
+#define DALSMC_MSG_GetSmuVersion 0x2
+#define DALSMC_MSG_GetDriverIfVersion 0x3
+#define DALSMC_MSG_GetMsgHeaderVersion 0x4
+#define DALSMC_MSG_SetDalDramAddrHigh 0x5
+#define DALSMC_MSG_SetDalDramAddrLow 0x6
+#define DALSMC_MSG_TransferTableSmu2Dram 0x7
+#define DALSMC_MSG_TransferTableDram2Smu 0x8
+#define DALSMC_MSG_SetHardMinByFreq 0x9
+#define DALSMC_MSG_SetHardMaxByFreq 0xA
+#define DALSMC_MSG_GetDpmFreqByIndex 0xB
+#define DALSMC_MSG_GetDcModeMaxDpmFreq 0xC
+#define DALSMC_MSG_SetMinDeepSleepDcfclk 0xD
+#define DALSMC_MSG_NumOfDisplays 0xE
+#define DALSMC_MSG_SetExternalClientDfCstateAllow 0xF
+#define DALSMC_MSG_BacoAudioD3PME 0x10
+#define DALSMC_MSG_SetFclkSwitchAllow 0x11
+#define DALSMC_MSG_SetCabForUclkPstate 0x12
+#define DALSMC_MSG_SetWorstCaseUclkLatency 0x13
+#define DALSMC_MSG_DcnExitReset 0x14
+#define DALSMC_MSG_ReturnHardMinStatus 0x15
+#define DALSMC_MSG_SetAlwaysWaitDmcubResp 0x16
+#define DALSMC_MSG_IndicateDrrStatus 0x17 // PMFW 15811
+#define DALSMC_MSG_ActiveUclkFclk 0x18
+#define DALSMC_MSG_IdleUclkFclk 0x19
+#define DALSMC_MSG_SetUclkPstateAllow 0x1A
+#define DALSMC_MSG_SubvpUclkFclk 0x1B
+#define DALSMC_MSG_GetNumUmcChannels 0x1C
+#define DALSMC_Message_Count 0x1D
+
+typedef enum {
+ FCLK_SWITCH_DISALLOW,
+ FCLK_SWITCH_ALLOW,
+} FclkSwitchAllow_e;
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c
new file mode 100644
index 000000000000..306016c1f109
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c
@@ -0,0 +1,1631 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dccg.h"
+#include "clk_mgr_internal.h"
+#include "dcn401/dcn401_clk_mgr_smu_msg.h"
+#include "dcn20/dcn20_clk_mgr.h"
+#include "dce100/dce_clk_mgr.h"
+#include "dcn31/dcn31_clk_mgr.h"
+#include "dcn32/dcn32_clk_mgr.h"
+#include "dcn401/dcn401_clk_mgr.h"
+#include "reg_helper.h"
+#include "core_types.h"
+#include "dm_helpers.h"
+#include "link_service.h"
+#include "dc_state_priv.h"
+#include "atomfirmware.h"
+
+#include "dcn401_smu14_driver_if.h"
+
+#include "dcn/dcn_4_1_0_offset.h"
+#include "dcn/dcn_4_1_0_sh_mask.h"
+
+#define DCN_BASE__INST0_SEG1 0x000000C0
+
+#define mmCLK01_CLK0_CLK_PLL_REQ 0x16E37
+#define mmCLK01_CLK0_CLK0_DFS_CNTL 0x16E69
+#define mmCLK01_CLK0_CLK1_DFS_CNTL 0x16E6C
+#define mmCLK01_CLK0_CLK2_DFS_CNTL 0x16E6F
+#define mmCLK01_CLK0_CLK3_DFS_CNTL 0x16E72
+#define mmCLK01_CLK0_CLK4_DFS_CNTL 0x16E75
+#define mmCLK20_CLK2_CLK2_DFS_CNTL 0x1B051
+
+#define CLK0_CLK_PLL_REQ__FbMult_int_MASK 0x000001ffUL
+#define CLK0_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000f000UL
+#define CLK0_CLK_PLL_REQ__FbMult_frac_MASK 0xffff0000UL
+#define CLK0_CLK_PLL_REQ__FbMult_int__SHIFT 0x00000000
+#define CLK0_CLK_PLL_REQ__PllSpineDiv__SHIFT 0x0000000c
+#define CLK0_CLK_PLL_REQ__FbMult_frac__SHIFT 0x00000010
+
+#undef FN
+#define FN(reg_name, field_name) \
+ clk_mgr->clk_mgr_shift->field_name, clk_mgr->clk_mgr_mask->field_name
+
+#define REG(reg) \
+ (clk_mgr->regs->reg)
+
+#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
+
+#define BASE(seg) BASE_INNER(seg)
+
+#define SR(reg_name)\
+ .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \
+ reg ## reg_name
+
+#define CLK_SR_DCN401(reg_name, block, inst)\
+ .reg_name = mm ## block ## _ ## reg_name
+
+static const struct clk_mgr_registers clk_mgr_regs_dcn401 = {
+ CLK_REG_LIST_DCN401()
+};
+
+static const struct clk_mgr_shift clk_mgr_shift_dcn401 = {
+ CLK_COMMON_MASK_SH_LIST_DCN401(__SHIFT)
+};
+
+static const struct clk_mgr_mask clk_mgr_mask_dcn401 = {
+ CLK_COMMON_MASK_SH_LIST_DCN401(_MASK)
+};
+
+#define TO_DCN401_CLK_MGR(clk_mgr)\
+ container_of(clk_mgr, struct dcn401_clk_mgr, base)
+
+static bool dcn401_is_ppclk_dpm_enabled(struct clk_mgr_internal *clk_mgr, PPCLK_e clk)
+{
+ bool ppclk_dpm_enabled = false;
+
+ switch (clk) {
+ case PPCLK_SOCCLK:
+ ppclk_dpm_enabled =
+ clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_socclk_levels > 1;
+ break;
+ case PPCLK_UCLK:
+ ppclk_dpm_enabled =
+ clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_memclk_levels > 1;
+ break;
+ case PPCLK_FCLK:
+ ppclk_dpm_enabled =
+ clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_fclk_levels > 1;
+ break;
+ case PPCLK_DISPCLK:
+ ppclk_dpm_enabled =
+ clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dispclk_levels > 1;
+ break;
+ case PPCLK_DPPCLK:
+ ppclk_dpm_enabled =
+ clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dppclk_levels > 1;
+ break;
+ case PPCLK_DPREFCLK:
+ ppclk_dpm_enabled = false;
+ break;
+ case PPCLK_DCFCLK:
+ ppclk_dpm_enabled =
+ clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dcfclk_levels > 1;
+ break;
+ case PPCLK_DTBCLK:
+ ppclk_dpm_enabled =
+ clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dtbclk_levels > 1;
+ break;
+ default:
+ ppclk_dpm_enabled = false;
+ }
+
+ ppclk_dpm_enabled &= clk_mgr->smu_present;
+
+ return ppclk_dpm_enabled;
+}
+
+static bool dcn401_is_ppclk_idle_dpm_enabled(struct clk_mgr_internal *clk_mgr, PPCLK_e clk)
+{
+ bool ppclk_idle_dpm_enabled = false;
+
+ switch (clk) {
+ case PPCLK_UCLK:
+ case PPCLK_FCLK:
+ if (ASICREV_IS_GC_12_0_0_A0(clk_mgr->base.ctx->asic_id.hw_internal_rev) &&
+ clk_mgr->smu_ver >= 0x681800) {
+ ppclk_idle_dpm_enabled = true;
+ } else if (ASICREV_IS_GC_12_0_1_A0(clk_mgr->base.ctx->asic_id.hw_internal_rev) &&
+ clk_mgr->smu_ver >= 0x661300) {
+ ppclk_idle_dpm_enabled = true;
+ }
+ break;
+ default:
+ ppclk_idle_dpm_enabled = false;
+ }
+
+ ppclk_idle_dpm_enabled &= clk_mgr->smu_present;
+
+ return ppclk_idle_dpm_enabled;
+}
+
+static bool dcn401_is_df_throttle_opt_enabled(struct clk_mgr_internal *clk_mgr)
+{
+ bool is_df_throttle_opt_enabled = false;
+
+ if (ASICREV_IS_GC_12_0_1_A0(clk_mgr->base.ctx->asic_id.hw_internal_rev) &&
+ clk_mgr->smu_ver >= 0x663500) {
+ is_df_throttle_opt_enabled = !clk_mgr->base.ctx->dc->debug.force_subvp_df_throttle;
+ }
+
+ is_df_throttle_opt_enabled &= clk_mgr->smu_present;
+
+ return is_df_throttle_opt_enabled;
+}
+
+/* Query SMU for all clock states for a particular clock */
+static void dcn401_init_single_clock(struct clk_mgr_internal *clk_mgr, PPCLK_e clk, unsigned int *entry_0,
+ unsigned int *num_levels)
+{
+ unsigned int i;
+ char *entry_i = (char *)entry_0;
+
+ uint32_t ret = dcn401_smu_get_dpm_freq_by_index(clk_mgr, clk, 0xFF);
+
+ if (ret & (1 << 31))
+ /* fine-grained, only min and max */
+ *num_levels = 2;
+ else
+ /* discrete, a number of fixed states */
+ /* will set num_levels to 0 on failure */
+ *num_levels = ret & 0xFF;
+
+ /* if the initial message failed, num_levels will be 0 */
+ for (i = 0; i < *num_levels && i < ARRAY_SIZE(clk_mgr->base.bw_params->clk_table.entries); i++) {
+ *((unsigned int *)entry_i) = (dcn401_smu_get_dpm_freq_by_index(clk_mgr, clk, i) & 0xFFFF);
+ entry_i += sizeof(clk_mgr->base.bw_params->clk_table.entries[0]);
+ }
+}
+
+static void dcn401_build_wm_range_table(struct clk_mgr *clk_mgr)
+{
+ /* For min clocks use as reported by PM FW and report those as min */
+ uint16_t min_uclk_mhz = clk_mgr->bw_params->clk_table.entries[0].memclk_mhz;
+ uint16_t min_dcfclk_mhz = clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz;
+
+ /* Set A - Normal - default values */
+ clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid = true;
+ clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE;
+ clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz;
+ clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF;
+ clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz;
+ clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF;
+
+ /* Set B - Unused on dcn4 */
+ clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid = false;
+
+ /* Set 1A - Dummy P-State - P-State latency set to "dummy p-state" value */
+ /* 'DalDummyClockChangeLatencyNs' registry key option set to 0x7FFFFFFF can be used to disable Set C for dummy p-state */
+ if (clk_mgr->ctx->dc->bb_overrides.dummy_clock_change_latency_ns != 0x7FFFFFFF) {
+ clk_mgr->bw_params->wm_table.nv_entries[WM_1A].valid = true;
+ clk_mgr->bw_params->wm_table.nv_entries[WM_1A].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE;
+ clk_mgr->bw_params->wm_table.nv_entries[WM_1A].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz;
+ clk_mgr->bw_params->wm_table.nv_entries[WM_1A].pmfw_breakdown.max_dcfclk = 0xFFFF;
+ clk_mgr->bw_params->wm_table.nv_entries[WM_1A].pmfw_breakdown.min_uclk = min_uclk_mhz;
+ clk_mgr->bw_params->wm_table.nv_entries[WM_1A].pmfw_breakdown.max_uclk = 0xFFFF;
+ } else {
+ clk_mgr->bw_params->wm_table.nv_entries[WM_1A].valid = false;
+ }
+
+ /* Set 1B - Unused on dcn4 */
+ clk_mgr->bw_params->wm_table.nv_entries[WM_1B].valid = false;
+}
+
+void dcn401_init_clocks(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct clk_limit_num_entries *num_entries_per_clk;
+ unsigned int i;
+
+ if (!clk_mgr_base->bw_params)
+ return;
+
+ num_entries_per_clk = &clk_mgr_base->bw_params->clk_table.num_entries_per_clk;
+
+ memset(&(clk_mgr_base->clks), 0, sizeof(struct dc_clocks));
+ clk_mgr_base->clks.p_state_change_support = true;
+ clk_mgr_base->clks.prev_p_state_change_support = true;
+ clk_mgr_base->clks.fclk_prev_p_state_change_support = true;
+ clk_mgr->smu_present = false;
+ clk_mgr->dpm_present = false;
+
+ if (!clk_mgr_base->force_smu_not_present && dcn401_smu_get_smu_version(clk_mgr, &clk_mgr->smu_ver))
+ clk_mgr->smu_present = true;
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn401_smu_check_driver_if_version(clk_mgr);
+ dcn401_smu_check_msg_header_version(clk_mgr);
+
+ /* DCFCLK */
+ dcn401_init_single_clock(clk_mgr, PPCLK_DCFCLK,
+ &clk_mgr_base->bw_params->clk_table.entries[0].dcfclk_mhz,
+ &num_entries_per_clk->num_dcfclk_levels);
+ clk_mgr_base->bw_params->dc_mode_limit.dcfclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DCFCLK);
+ if (num_entries_per_clk->num_dcfclk_levels && clk_mgr_base->bw_params->dc_mode_limit.dcfclk_mhz ==
+ clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_dcfclk_levels - 1].dcfclk_mhz)
+ clk_mgr_base->bw_params->dc_mode_limit.dcfclk_mhz = 0;
+
+ /* SOCCLK */
+ dcn401_init_single_clock(clk_mgr, PPCLK_SOCCLK,
+ &clk_mgr_base->bw_params->clk_table.entries[0].socclk_mhz,
+ &num_entries_per_clk->num_socclk_levels);
+ clk_mgr_base->bw_params->dc_mode_limit.socclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_SOCCLK);
+ if (num_entries_per_clk->num_socclk_levels && clk_mgr_base->bw_params->dc_mode_limit.socclk_mhz ==
+ clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_socclk_levels - 1].socclk_mhz)
+ clk_mgr_base->bw_params->dc_mode_limit.socclk_mhz = 0;
+
+ /* DTBCLK */
+ if (!clk_mgr->base.ctx->dc->debug.disable_dtb_ref_clk_switch) {
+ dcn401_init_single_clock(clk_mgr, PPCLK_DTBCLK,
+ &clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz,
+ &num_entries_per_clk->num_dtbclk_levels);
+ clk_mgr_base->bw_params->dc_mode_limit.dtbclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DTBCLK);
+ if (num_entries_per_clk->num_dtbclk_levels && clk_mgr_base->bw_params->dc_mode_limit.dtbclk_mhz ==
+ clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_dtbclk_levels - 1].dtbclk_mhz)
+ clk_mgr_base->bw_params->dc_mode_limit.dtbclk_mhz = 0;
+ }
+
+ /* DISPCLK */
+ dcn401_init_single_clock(clk_mgr, PPCLK_DISPCLK,
+ &clk_mgr_base->bw_params->clk_table.entries[0].dispclk_mhz,
+ &num_entries_per_clk->num_dispclk_levels);
+ clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DISPCLK);
+ if (num_entries_per_clk->num_dispclk_levels && clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz ==
+ clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_dispclk_levels - 1].dispclk_mhz)
+ clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz = 0;
+
+ /* DPPCLK */
+ dcn401_init_single_clock(clk_mgr, PPCLK_DPPCLK,
+ &clk_mgr_base->bw_params->clk_table.entries[0].dppclk_mhz,
+ &num_entries_per_clk->num_dppclk_levels);
+
+ if (num_entries_per_clk->num_dcfclk_levels &&
+ num_entries_per_clk->num_dtbclk_levels &&
+ num_entries_per_clk->num_dispclk_levels)
+ clk_mgr->dpm_present = true;
+
+ if (clk_mgr_base->ctx->dc->debug.min_disp_clk_khz) {
+ for (i = 0; i < num_entries_per_clk->num_dispclk_levels; i++)
+ if (clk_mgr_base->bw_params->clk_table.entries[i].dispclk_mhz
+ < khz_to_mhz_ceil(clk_mgr_base->ctx->dc->debug.min_disp_clk_khz))
+ clk_mgr_base->bw_params->clk_table.entries[i].dispclk_mhz
+ = khz_to_mhz_ceil(clk_mgr_base->ctx->dc->debug.min_disp_clk_khz);
+ }
+
+ if (clk_mgr_base->ctx->dc->debug.min_dpp_clk_khz) {
+ for (i = 0; i < num_entries_per_clk->num_dppclk_levels; i++)
+ if (clk_mgr_base->bw_params->clk_table.entries[i].dppclk_mhz
+ < khz_to_mhz_ceil(clk_mgr_base->ctx->dc->debug.min_dpp_clk_khz))
+ clk_mgr_base->bw_params->clk_table.entries[i].dppclk_mhz
+ = khz_to_mhz_ceil(clk_mgr_base->ctx->dc->debug.min_dpp_clk_khz);
+ }
+
+ /* Get UCLK, update bounding box */
+ clk_mgr_base->funcs->get_memclk_states_from_smu(clk_mgr_base);
+
+ /* WM range table */
+ dcn401_build_wm_range_table(clk_mgr_base);
+}
+
+bool dcn401_is_dc_mode_present(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ return clk_mgr->smu_present && clk_mgr->dpm_present &&
+ ((clk_mgr_base->bw_params->clk_table.num_entries_per_clk.num_dcfclk_levels &&
+ clk_mgr_base->bw_params->dc_mode_limit.dcfclk_mhz) ||
+ (clk_mgr_base->bw_params->clk_table.num_entries_per_clk.num_dispclk_levels &&
+ clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz) ||
+ (clk_mgr_base->bw_params->clk_table.num_entries_per_clk.num_dtbclk_levels &&
+ clk_mgr_base->bw_params->dc_mode_limit.dtbclk_mhz) ||
+ (clk_mgr_base->bw_params->clk_table.num_entries_per_clk.num_fclk_levels &&
+ clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz) ||
+ (clk_mgr_base->bw_params->clk_table.num_entries_per_clk.num_memclk_levels &&
+ clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz) ||
+ (clk_mgr_base->bw_params->clk_table.num_entries_per_clk.num_socclk_levels &&
+ clk_mgr_base->bw_params->dc_mode_limit.socclk_mhz));
+}
+
+static void dcn401_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
+ struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ uint32_t dprefclk_did = 0;
+ uint32_t dcfclk_did = 0;
+ uint32_t dtbclk_did = 0;
+ uint32_t dispclk_did = 0;
+ uint32_t dppclk_did = 0;
+ uint32_t fclk_did = 0;
+ uint32_t target_div = 0;
+
+ /* DFS Slice 0 is used for DISPCLK */
+ dispclk_did = REG_READ(CLK0_CLK0_DFS_CNTL);
+ /* DFS Slice 1 is used for DPPCLK */
+ dppclk_did = REG_READ(CLK0_CLK1_DFS_CNTL);
+ /* DFS Slice 2 is used for DPREFCLK */
+ dprefclk_did = REG_READ(CLK0_CLK2_DFS_CNTL);
+ /* DFS Slice 3 is used for DCFCLK */
+ dcfclk_did = REG_READ(CLK0_CLK3_DFS_CNTL);
+ /* DFS Slice 4 is used for DTBCLK */
+ dtbclk_did = REG_READ(CLK0_CLK4_DFS_CNTL);
+ /* DFS Slice _ is used for FCLK */
+ fclk_did = REG_READ(CLK2_CLK2_DFS_CNTL);
+
+ /* Convert DISPCLK DFS Slice DID to divider*/
+ target_div = dentist_get_divider_from_did(dispclk_did);
+ //Get dispclk in khz
+ regs_and_bypass->dispclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+ * clk_mgr->base.dentist_vco_freq_khz) / target_div;
+
+ /* Convert DISPCLK DFS Slice DID to divider*/
+ target_div = dentist_get_divider_from_did(dppclk_did);
+ //Get dppclk in khz
+ regs_and_bypass->dppclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+ * clk_mgr->base.dentist_vco_freq_khz) / target_div;
+
+ /* Convert DPREFCLK DFS Slice DID to divider*/
+ target_div = dentist_get_divider_from_did(dprefclk_did);
+ //Get dprefclk in khz
+ regs_and_bypass->dprefclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+ * clk_mgr->base.dentist_vco_freq_khz) / target_div;
+
+ /* Convert DCFCLK DFS Slice DID to divider*/
+ target_div = dentist_get_divider_from_did(dcfclk_did);
+ //Get dcfclk in khz
+ regs_and_bypass->dcfclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+ * clk_mgr->base.dentist_vco_freq_khz) / target_div;
+
+ /* Convert DTBCLK DFS Slice DID to divider*/
+ target_div = dentist_get_divider_from_did(dtbclk_did);
+ //Get dtbclk in khz
+ regs_and_bypass->dtbclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+ * clk_mgr->base.dentist_vco_freq_khz) / target_div;
+
+ /* Convert DTBCLK DFS Slice DID to divider*/
+ target_div = dentist_get_divider_from_did(fclk_did);
+ //Get fclk in khz
+ regs_and_bypass->fclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+ * clk_mgr->base.dentist_vco_freq_khz) / target_div;
+}
+
+static bool dcn401_check_native_scaling(struct pipe_ctx *pipe)
+{
+ bool is_native_scaling = false;
+ int width = pipe->plane_state->src_rect.width;
+ int height = pipe->plane_state->src_rect.height;
+
+ if (pipe->stream->timing.h_addressable == width &&
+ pipe->stream->timing.v_addressable == height &&
+ pipe->plane_state->dst_rect.width == width &&
+ pipe->plane_state->dst_rect.height == height)
+ is_native_scaling = true;
+
+ return is_native_scaling;
+}
+
+static void dcn401_auto_dpm_test_log(
+ struct dc_clocks *new_clocks,
+ struct clk_mgr_internal *clk_mgr,
+ struct dc_state *context)
+{
+ unsigned int mall_ss_size_bytes;
+ int dramclk_khz_override, fclk_khz_override, num_fclk_levels;
+
+ struct pipe_ctx *pipe_ctx_list[MAX_PIPES];
+ int active_pipe_count = 0;
+
+ for (int i = 0; i < MAX_PIPES; i++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) {
+ pipe_ctx_list[active_pipe_count] = pipe_ctx;
+ active_pipe_count++;
+ }
+ }
+
+ msleep(5);
+
+ mall_ss_size_bytes = context->bw_ctx.bw.dcn.mall_ss_size_bytes;
+
+ struct clk_log_info log_info = {0};
+ struct clk_state_registers_and_bypass clk_register_dump;
+
+ dcn401_dump_clk_registers(&clk_register_dump, &clk_mgr->base, &log_info);
+
+ // Overrides for these clocks in case there is no p_state change support
+ dramclk_khz_override = new_clocks->dramclk_khz;
+ fclk_khz_override = new_clocks->fclk_khz;
+
+ num_fclk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_fclk_levels - 1;
+
+ if (!new_clocks->p_state_change_support)
+ dramclk_khz_override = clk_mgr->base.bw_params->max_memclk_mhz * 1000;
+
+ if (!new_clocks->fclk_p_state_change_support)
+ fclk_khz_override = clk_mgr->base.bw_params->clk_table.entries[num_fclk_levels].fclk_mhz * 1000;
+
+
+ ////////////////////////////////////////////////////////////////////////////
+ // IMPORTANT: When adding more clocks to these logs, do NOT put a newline
+ // anywhere other than at the very end of the string.
+ //
+ // Formatting example (make sure to have " - " between each entry):
+ //
+ // AutoDPMTest: clk1:%d - clk2:%d - clk3:%d - clk4:%d\n"
+ ////////////////////////////////////////////////////////////////////////////
+ if (active_pipe_count > 0 &&
+ new_clocks->dramclk_khz > 0 &&
+ new_clocks->fclk_khz > 0 &&
+ new_clocks->dcfclk_khz > 0 &&
+ new_clocks->dppclk_khz > 0) {
+
+ uint32_t pix_clk_list[MAX_PIPES] = {0};
+ int p_state_list[MAX_PIPES] = {0};
+ int disp_src_width_list[MAX_PIPES] = {0};
+ int disp_src_height_list[MAX_PIPES] = {0};
+ uint64_t disp_src_refresh_list[MAX_PIPES] = {0};
+ bool is_scaled_list[MAX_PIPES] = {0};
+
+ for (int i = 0; i < active_pipe_count; i++) {
+ struct pipe_ctx *curr_pipe_ctx = pipe_ctx_list[i];
+ uint64_t refresh_rate;
+
+ pix_clk_list[i] = curr_pipe_ctx->stream->timing.pix_clk_100hz;
+ p_state_list[i] = curr_pipe_ctx->p_state_type;
+
+ refresh_rate = (curr_pipe_ctx->stream->timing.pix_clk_100hz * (uint64_t)100 +
+ curr_pipe_ctx->stream->timing.v_total
+ * (uint64_t) curr_pipe_ctx->stream->timing.h_total - (uint64_t)1);
+ refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.v_total);
+ refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.h_total);
+ disp_src_refresh_list[i] = refresh_rate;
+
+ if (curr_pipe_ctx->plane_state) {
+ is_scaled_list[i] = !(dcn401_check_native_scaling(curr_pipe_ctx));
+ disp_src_width_list[i] = curr_pipe_ctx->plane_state->src_rect.width;
+ disp_src_height_list[i] = curr_pipe_ctx->plane_state->src_rect.height;
+ }
+ }
+
+ DC_LOG_AUTO_DPM_TEST("AutoDPMTest: dramclk:%d - fclk:%d - "
+ "dcfclk:%d - dppclk:%d - dispclk_hw:%d - "
+ "dppclk_hw:%d - dprefclk_hw:%d - dcfclk_hw:%d - "
+ "dtbclk_hw:%d - fclk_hw:%d - pix_clk_0:%d - pix_clk_1:%d - "
+ "pix_clk_2:%d - pix_clk_3:%d - mall_ss_size:%d - p_state_type_0:%d - "
+ "p_state_type_1:%d - p_state_type_2:%d - p_state_type_3:%d - "
+ "pix_width_0:%d - pix_height_0:%d - refresh_rate_0:%lld - is_scaled_0:%d - "
+ "pix_width_1:%d - pix_height_1:%d - refresh_rate_1:%lld - is_scaled_1:%d - "
+ "pix_width_2:%d - pix_height_2:%d - refresh_rate_2:%lld - is_scaled_2:%d - "
+ "pix_width_3:%d - pix_height_3:%d - refresh_rate_3:%lld - is_scaled_3:%d - LOG_END\n",
+ dramclk_khz_override,
+ fclk_khz_override,
+ new_clocks->dcfclk_khz,
+ new_clocks->dppclk_khz,
+ clk_register_dump.dispclk,
+ clk_register_dump.dppclk,
+ clk_register_dump.dprefclk,
+ clk_register_dump.dcfclk,
+ clk_register_dump.dtbclk,
+ clk_register_dump.fclk,
+ pix_clk_list[0], pix_clk_list[1], pix_clk_list[3], pix_clk_list[2],
+ mall_ss_size_bytes,
+ p_state_list[0], p_state_list[1], p_state_list[2], p_state_list[3],
+ disp_src_width_list[0], disp_src_height_list[0], disp_src_refresh_list[0], is_scaled_list[0],
+ disp_src_width_list[1], disp_src_height_list[1], disp_src_refresh_list[1], is_scaled_list[1],
+ disp_src_width_list[2], disp_src_height_list[2], disp_src_refresh_list[2], is_scaled_list[2],
+ disp_src_width_list[3], disp_src_height_list[3], disp_src_refresh_list[3], is_scaled_list[3]);
+ }
+}
+
+static void dcn401_update_clocks_update_dtb_dto(struct clk_mgr_internal *clk_mgr,
+ struct dc_state *context,
+ int ref_dtbclk_khz)
+{
+ int i;
+ struct dccg *dccg = clk_mgr->dccg;
+ struct pipe_ctx *otg_master;
+ bool use_hpo_encoder;
+
+
+ for (i = 0; i < context->stream_count; i++) {
+ otg_master = resource_get_otg_master_for_stream(
+ &context->res_ctx, context->streams[i]);
+ ASSERT(otg_master);
+ ASSERT(otg_master->clock_source);
+ ASSERT(otg_master->clock_source->funcs->program_pix_clk);
+ ASSERT(otg_master->stream_res.pix_clk_params.controller_id >= CONTROLLER_ID_D0);
+
+ use_hpo_encoder = dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(otg_master);
+ if (!use_hpo_encoder)
+ continue;
+
+ if (otg_master->stream_res.pix_clk_params.controller_id > CONTROLLER_ID_UNDEFINED)
+ otg_master->clock_source->funcs->program_pix_clk(
+ otg_master->clock_source,
+ &otg_master->stream_res.pix_clk_params,
+ dccg->ctx->dc->link_srv->dp_get_encoding_format(
+ &otg_master->link_config.dp_link_settings),
+ &otg_master->pll_settings);
+ }
+}
+
+static void dcn401_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr,
+ struct dc_state *context, bool safe_to_lower, int ref_dppclk_khz)
+{
+ int i;
+
+ clk_mgr->dccg->ref_dppclk = ref_dppclk_khz;
+ for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) {
+ int dpp_inst = 0, dppclk_khz, prev_dppclk_khz;
+
+ dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz;
+
+ if (context->res_ctx.pipe_ctx[i].plane_res.dpp)
+ dpp_inst = context->res_ctx.pipe_ctx[i].plane_res.dpp->inst;
+ else if (!context->res_ctx.pipe_ctx[i].plane_res.dpp && dppclk_khz == 0) {
+ /* dpp == NULL && dppclk_khz == 0 is valid because of pipe harvesting.
+ * In this case just continue in loop
+ */
+ continue;
+ } else if (!context->res_ctx.pipe_ctx[i].plane_res.dpp && dppclk_khz > 0) {
+ /* The software state is not valid if dpp resource is NULL and
+ * dppclk_khz > 0.
+ */
+ ASSERT(false);
+ continue;
+ }
+
+ prev_dppclk_khz = clk_mgr->dccg->pipe_dppclk_khz[i];
+
+ if (safe_to_lower || prev_dppclk_khz < dppclk_khz)
+ clk_mgr->dccg->funcs->update_dpp_dto(
+ clk_mgr->dccg, dpp_inst, dppclk_khz);
+ }
+}
+
+static int dcn401_set_hard_min_by_freq_optimized(struct clk_mgr_internal *clk_mgr, PPCLK_e clk, int requested_clk_khz)
+{
+ if (!clk_mgr->smu_present || !dcn401_is_ppclk_dpm_enabled(clk_mgr, clk))
+ return 0;
+
+ /*
+ * SMU set hard min interface takes requested clock in mhz and return
+ * actual clock configured in khz. If we floor requested clk to mhz,
+ * there is a chance that the actual clock configured in khz is less
+ * than requested. If we ceil it to mhz, there is a chance that it
+ * unnecessarily dumps up to a higher dpm level, which burns more power.
+ * The solution is to set by flooring it to mhz first. If the actual
+ * clock returned is less than requested, then we will ceil the
+ * requested value to mhz and call it again.
+ */
+ int actual_clk_khz = dcn401_smu_set_hard_min_by_freq(clk_mgr, clk, khz_to_mhz_floor(requested_clk_khz));
+
+ if (actual_clk_khz < requested_clk_khz)
+ actual_clk_khz = dcn401_smu_set_hard_min_by_freq(clk_mgr, clk, khz_to_mhz_ceil(requested_clk_khz));
+
+ return actual_clk_khz;
+}
+
+static void dcn401_update_clocks_update_dentist(
+ struct clk_mgr_internal *clk_mgr,
+ struct dc_state *context)
+{
+ uint32_t new_disp_divider = 0;
+ uint32_t new_dispclk_wdivider = 0;
+ uint32_t dentist_dispclk_wdivider_readback = 0;
+ struct dc *dc = clk_mgr->base.ctx->dc;
+
+ if (clk_mgr->base.clks.dispclk_khz == 0)
+ return;
+
+ new_disp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+ * clk_mgr->base.dentist_vco_freq_khz / clk_mgr->base.clks.dispclk_khz;
+
+ new_dispclk_wdivider = dentist_get_did_from_divider(new_disp_divider);
+
+ if (dc->debug.override_dispclk_programming) {
+ REG_GET(DENTIST_DISPCLK_CNTL,
+ DENTIST_DISPCLK_WDIVIDER, &dentist_dispclk_wdivider_readback);
+
+ if (dentist_dispclk_wdivider_readback > new_dispclk_wdivider) {
+ REG_UPDATE(DENTIST_DISPCLK_CNTL,
+ DENTIST_DISPCLK_WDIVIDER, new_dispclk_wdivider);
+ REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 50, 2000);
+ }
+ }
+
+}
+
+static void dcn401_execute_block_sequence(struct clk_mgr *clk_mgr_base, unsigned int num_steps)
+{
+ struct clk_mgr_internal *clk_mgr_internal = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct dcn401_clk_mgr *clk_mgr401 = TO_DCN401_CLK_MGR(clk_mgr_internal);
+
+ unsigned int i;
+ union dcn401_clk_mgr_block_sequence_params *params;
+
+ /* execute sequence */
+ for (i = 0; i < num_steps; i++) {
+ params = &clk_mgr401->block_sequence[i].params;
+
+ switch (clk_mgr401->block_sequence[i].func) {
+ case CLK_MGR401_READ_CLOCKS_FROM_DENTIST:
+ dcn2_read_clocks_from_hw_dentist(clk_mgr_base);
+ break;
+ case CLK_MGR401_UPDATE_NUM_DISPLAYS:
+ dcn401_smu_set_num_of_displays(clk_mgr_internal,
+ params->update_num_displays_params.num_displays);
+ break;
+ case CLK_MGR401_UPDATE_HARDMIN_PPCLK:
+ if (params->update_hardmin_params.response)
+ *params->update_hardmin_params.response = dcn401_smu_set_hard_min_by_freq(
+ clk_mgr_internal,
+ params->update_hardmin_params.ppclk,
+ params->update_hardmin_params.freq_mhz);
+ else
+ dcn401_smu_set_hard_min_by_freq(clk_mgr_internal,
+ params->update_hardmin_params.ppclk,
+ params->update_hardmin_params.freq_mhz);
+ break;
+ case CLK_MGR401_UPDATE_HARDMIN_PPCLK_OPTIMIZED:
+ if (params->update_hardmin_optimized_params.response)
+ *params->update_hardmin_optimized_params.response = dcn401_set_hard_min_by_freq_optimized(
+ clk_mgr_internal,
+ params->update_hardmin_optimized_params.ppclk,
+ params->update_hardmin_optimized_params.freq_khz);
+ else
+ dcn401_set_hard_min_by_freq_optimized(clk_mgr_internal,
+ params->update_hardmin_optimized_params.ppclk,
+ params->update_hardmin_optimized_params.freq_khz);
+ break;
+ case CLK_MGR401_UPDATE_ACTIVE_HARDMINS:
+ dcn401_smu_set_active_uclk_fclk_hardmin(
+ clk_mgr_internal,
+ params->update_idle_hardmin_params.uclk_mhz,
+ params->update_idle_hardmin_params.fclk_mhz);
+ break;
+ case CLK_MGR401_UPDATE_IDLE_HARDMINS:
+ dcn401_smu_set_idle_uclk_fclk_hardmin(
+ clk_mgr_internal,
+ params->update_idle_hardmin_params.uclk_mhz,
+ params->update_idle_hardmin_params.fclk_mhz);
+ break;
+ case CLK_MGR401_UPDATE_SUBVP_HARDMINS:
+ dcn401_smu_set_subvp_uclk_fclk_hardmin(
+ clk_mgr_internal,
+ params->update_idle_hardmin_params.uclk_mhz,
+ params->update_idle_hardmin_params.fclk_mhz);
+ break;
+ case CLK_MGR401_UPDATE_DEEP_SLEEP_DCFCLK:
+ dcn401_smu_set_min_deep_sleep_dcef_clk(
+ clk_mgr_internal,
+ params->update_deep_sleep_dcfclk_params.freq_mhz);
+ break;
+ case CLK_MGR401_UPDATE_FCLK_PSTATE_SUPPORT:
+ dcn401_smu_send_fclk_pstate_message(
+ clk_mgr_internal,
+ params->update_pstate_support_params.support);
+ break;
+ case CLK_MGR401_UPDATE_UCLK_PSTATE_SUPPORT:
+ dcn401_smu_send_uclk_pstate_message(
+ clk_mgr_internal,
+ params->update_pstate_support_params.support);
+ break;
+ case CLK_MGR401_UPDATE_CAB_FOR_UCLK:
+ dcn401_smu_send_cab_for_uclk_message(
+ clk_mgr_internal,
+ params->update_cab_for_uclk_params.num_ways);
+ break;
+ case CLK_MGR401_UPDATE_WAIT_FOR_DMUB_ACK:
+ dcn401_smu_wait_for_dmub_ack_mclk(
+ clk_mgr_internal,
+ params->update_wait_for_dmub_ack_params.enable);
+ break;
+ case CLK_MGR401_INDICATE_DRR_STATUS:
+ dcn401_smu_indicate_drr_status(
+ clk_mgr_internal,
+ params->indicate_drr_status_params.mod_drr_for_pstate);
+ break;
+ case CLK_MGR401_UPDATE_DPPCLK_DTO:
+ dcn401_update_clocks_update_dpp_dto(
+ clk_mgr_internal,
+ params->update_dppclk_dto_params.context,
+ params->update_dppclk_dto_params.safe_to_lower,
+ *params->update_dppclk_dto_params.ref_dppclk_khz);
+ break;
+ case CLK_MGR401_UPDATE_DTBCLK_DTO:
+ dcn401_update_clocks_update_dtb_dto(
+ clk_mgr_internal,
+ params->update_dtbclk_dto_params.context,
+ *params->update_dtbclk_dto_params.ref_dtbclk_khz);
+ break;
+ case CLK_MGR401_UPDATE_DENTIST:
+ dcn401_update_clocks_update_dentist(
+ clk_mgr_internal,
+ params->update_dentist_params.context);
+ break;
+ case CLK_MGR401_UPDATE_PSR_WAIT_LOOP:
+ params->update_psr_wait_loop_params.dmcu->funcs->set_psr_wait_loop(
+ params->update_psr_wait_loop_params.dmcu,
+ params->update_psr_wait_loop_params.wait);
+ break;
+ default:
+ /* this should never happen */
+ BREAK_TO_DEBUGGER();
+ break;
+ }
+ }
+}
+
+static unsigned int dcn401_build_update_bandwidth_clocks_sequence(
+ struct clk_mgr *clk_mgr_base,
+ struct dc_state *context,
+ struct dc_clocks *new_clocks,
+ bool safe_to_lower)
+{
+ struct clk_mgr_internal *clk_mgr_internal = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct dcn401_clk_mgr *clk_mgr401 = TO_DCN401_CLK_MGR(clk_mgr_internal);
+ struct dc *dc = clk_mgr_base->ctx->dc;
+ struct dcn401_clk_mgr_block_sequence *block_sequence = clk_mgr401->block_sequence;
+ bool enter_display_off = false;
+ bool update_active_fclk = false;
+ bool update_active_uclk = false;
+ bool update_idle_fclk = false;
+ bool update_idle_uclk = false;
+ bool update_subvp_prefetch_dramclk = false;
+ bool update_subvp_prefetch_fclk = false;
+ bool is_idle_dpm_enabled = dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_UCLK) &&
+ dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK) &&
+ dcn401_is_ppclk_idle_dpm_enabled(clk_mgr_internal, PPCLK_UCLK) &&
+ dcn401_is_ppclk_idle_dpm_enabled(clk_mgr_internal, PPCLK_FCLK);
+ bool is_df_throttle_opt_enabled = is_idle_dpm_enabled &&
+ dcn401_is_df_throttle_opt_enabled(clk_mgr_internal);
+ int total_plane_count = clk_mgr_helper_get_active_plane_cnt(dc, context);
+ int active_uclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz);
+ int active_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.fclk_khz);
+ int idle_uclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.idle_dramclk_khz);
+ int idle_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.idle_fclk_khz);
+ int subvp_prefetch_dramclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_dramclk_khz);
+ int subvp_prefetch_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_fclk_khz);
+
+ unsigned int num_steps = 0;
+
+ int display_count;
+ bool fclk_p_state_change_support, uclk_p_state_change_support;
+
+ /* CLK_MGR401_UPDATE_NUM_DISPLAYS */
+ if (clk_mgr_internal->smu_present) {
+ display_count = clk_mgr_helper_get_active_display_cnt(dc, context);
+
+ if (display_count == 0)
+ enter_display_off = true;
+
+ if (enter_display_off == safe_to_lower) {
+ block_sequence[num_steps].params.update_num_displays_params.num_displays = display_count;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_NUM_DISPLAYS;
+ num_steps++;
+ }
+ }
+
+ /* CLK_MGR401_UPDATE_FCLK_PSTATE_SUPPORT */
+ clk_mgr_base->clks.fclk_prev_p_state_change_support = clk_mgr_base->clks.fclk_p_state_change_support;
+ fclk_p_state_change_support = new_clocks->fclk_p_state_change_support || (total_plane_count == 0);
+ if (should_update_pstate_support(safe_to_lower, fclk_p_state_change_support, clk_mgr_base->clks.fclk_prev_p_state_change_support)) {
+ clk_mgr_base->clks.fclk_p_state_change_support = fclk_p_state_change_support;
+ update_active_fclk = true;
+ update_idle_fclk = true;
+
+ /* To enable FCLK P-state switching, send PSTATE_SUPPORTED message to PMFW (message not supported on DCN401)*/
+ // if (clk_mgr_base->clks.fclk_p_state_change_support) {
+ // /* Handle the code for sending a message to PMFW that FCLK P-state change is supported */
+ // if (dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK)) {
+ // block_sequence[num_steps].params.update_pstate_support_params.support = true;
+ // block_sequence[num_steps].func = CLK_MGR401_UPDATE_FCLK_PSTATE_SUPPORT;
+ // num_steps++;
+ // }
+ // }
+ }
+
+ if (!clk_mgr_base->clks.fclk_p_state_change_support && dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK)) {
+ /* when P-State switching disabled, set UCLK min = max */
+ idle_fclk_mhz =
+ clk_mgr_base->bw_params->clk_table.entries[clk_mgr_base->bw_params->clk_table.num_entries_per_clk.num_fclk_levels - 1].fclk_mhz;
+ active_fclk_mhz = idle_fclk_mhz;
+ }
+
+ /* UPDATE DCFCLK */
+ if (dc->debug.force_min_dcfclk_mhz > 0)
+ new_clocks->dcfclk_khz = (new_clocks->dcfclk_khz > (dc->debug.force_min_dcfclk_mhz * 1000)) ?
+ new_clocks->dcfclk_khz : (dc->debug.force_min_dcfclk_mhz * 1000);
+
+ if (should_set_clock(safe_to_lower, new_clocks->dcfclk_khz, clk_mgr_base->clks.dcfclk_khz)) {
+ clk_mgr_base->clks.dcfclk_khz = new_clocks->dcfclk_khz;
+ if (dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_DCFCLK)) {
+ block_sequence[num_steps].params.update_hardmin_params.ppclk = PPCLK_DCFCLK;
+ block_sequence[num_steps].params.update_hardmin_params.freq_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.dcfclk_khz);
+ block_sequence[num_steps].params.update_hardmin_params.response = NULL;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_HARDMIN_PPCLK;
+ num_steps++;
+ }
+ }
+
+ /* CLK_MGR401_UPDATE_DEEP_SLEEP_DCFCLK */
+ if (should_set_clock(safe_to_lower, new_clocks->dcfclk_deep_sleep_khz, clk_mgr_base->clks.dcfclk_deep_sleep_khz)) {
+ clk_mgr_base->clks.dcfclk_deep_sleep_khz = new_clocks->dcfclk_deep_sleep_khz;
+ if (dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_DCFCLK)) {
+ block_sequence[num_steps].params.update_deep_sleep_dcfclk_params.freq_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.dcfclk_deep_sleep_khz);
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_DEEP_SLEEP_DCFCLK;
+ num_steps++;
+ }
+ }
+
+ /* SOCCLK */
+ if (should_set_clock(safe_to_lower, new_clocks->socclk_khz, clk_mgr_base->clks.socclk_khz))
+ /* We don't actually care about socclk, don't notify SMU of hard min */
+ clk_mgr_base->clks.socclk_khz = new_clocks->socclk_khz;
+
+ /* UCLK */
+ if (new_clocks->fw_based_mclk_switching != clk_mgr_base->clks.fw_based_mclk_switching &&
+ new_clocks->fw_based_mclk_switching) {
+ /* enable FAMS features */
+ clk_mgr_base->clks.fw_based_mclk_switching = new_clocks->fw_based_mclk_switching;
+
+ block_sequence[num_steps].params.update_wait_for_dmub_ack_params.enable = clk_mgr_base->clks.fw_based_mclk_switching;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_WAIT_FOR_DMUB_ACK;
+ num_steps++;
+
+ block_sequence[num_steps].params.indicate_drr_status_params.mod_drr_for_pstate = clk_mgr_base->clks.fw_based_mclk_switching;
+ block_sequence[num_steps].func = CLK_MGR401_INDICATE_DRR_STATUS;
+ num_steps++;
+ }
+
+ /* CLK_MGR401_UPDATE_CAB_FOR_UCLK */
+ clk_mgr_base->clks.prev_num_ways = clk_mgr_base->clks.num_ways;
+ if (clk_mgr_base->clks.num_ways != new_clocks->num_ways &&
+ clk_mgr_base->clks.num_ways < new_clocks->num_ways) {
+ /* increase num ways for subvp */
+ clk_mgr_base->clks.num_ways = new_clocks->num_ways;
+ if (dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_UCLK)) {
+ block_sequence[num_steps].params.update_cab_for_uclk_params.num_ways = clk_mgr_base->clks.num_ways;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_CAB_FOR_UCLK;
+ num_steps++;
+ }
+ }
+
+ clk_mgr_base->clks.prev_p_state_change_support = clk_mgr_base->clks.p_state_change_support;
+ uclk_p_state_change_support = new_clocks->p_state_change_support || (total_plane_count == 0);
+ if (should_update_pstate_support(safe_to_lower, uclk_p_state_change_support, clk_mgr_base->clks.prev_p_state_change_support)) {
+ clk_mgr_base->clks.p_state_change_support = uclk_p_state_change_support;
+ update_active_uclk = true;
+ update_idle_uclk = true;
+
+ if (clk_mgr_base->clks.p_state_change_support) {
+ /* enable UCLK switching */
+ if (dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_UCLK)) {
+ block_sequence[num_steps].params.update_pstate_support_params.support = true;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_UCLK_PSTATE_SUPPORT;
+ num_steps++;
+ }
+ }
+ }
+
+ if (!clk_mgr_base->clks.p_state_change_support && dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_UCLK)) {
+ /* when P-State switching disabled, set UCLK min = max */
+ if (dc->clk_mgr->dc_mode_softmax_enabled) {
+ /* will never have the functional UCLK min above the softmax
+ * since we calculate mode support based on softmax being the max UCLK
+ * frequency.
+ */
+ active_uclk_mhz = clk_mgr_base->bw_params->dc_mode_softmax_memclk;
+ } else {
+ active_uclk_mhz = clk_mgr_base->bw_params->max_memclk_mhz;
+ }
+ idle_uclk_mhz = active_uclk_mhz;
+ }
+
+ /* Always update saved value, even if new value not set due to P-State switching unsupported */
+ if (should_set_clock(safe_to_lower, new_clocks->dramclk_khz, clk_mgr_base->clks.dramclk_khz)) {
+ clk_mgr_base->clks.dramclk_khz = new_clocks->dramclk_khz;
+
+ if (clk_mgr_base->clks.p_state_change_support) {
+ update_active_uclk = true;
+ active_uclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz);
+ }
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->idle_dramclk_khz, clk_mgr_base->clks.idle_dramclk_khz)) {
+ clk_mgr_base->clks.idle_dramclk_khz = new_clocks->idle_dramclk_khz;
+
+ if (clk_mgr_base->clks.p_state_change_support) {
+ update_idle_uclk = true;
+ idle_uclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.idle_dramclk_khz);
+ }
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->subvp_prefetch_dramclk_khz, clk_mgr_base->clks.subvp_prefetch_dramclk_khz)) {
+ clk_mgr_base->clks.subvp_prefetch_dramclk_khz = new_clocks->subvp_prefetch_dramclk_khz;
+ update_subvp_prefetch_dramclk = true;
+ subvp_prefetch_dramclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_dramclk_khz);
+ }
+
+ /* FCLK */
+ /* Always update saved value, even if new value not set due to P-State switching unsupported */
+ if (should_set_clock(safe_to_lower, new_clocks->fclk_khz, clk_mgr_base->clks.fclk_khz)) {
+ clk_mgr_base->clks.fclk_khz = new_clocks->fclk_khz;
+
+ if (clk_mgr_base->clks.fclk_p_state_change_support) {
+ update_active_fclk = true;
+ active_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.fclk_khz);
+ }
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->idle_fclk_khz, clk_mgr_base->clks.idle_fclk_khz)) {
+ clk_mgr_base->clks.idle_fclk_khz = new_clocks->idle_fclk_khz;
+
+ if (clk_mgr_base->clks.fclk_p_state_change_support) {
+ update_idle_fclk = true;
+ idle_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.idle_fclk_khz);
+ }
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->subvp_prefetch_fclk_khz, clk_mgr_base->clks.subvp_prefetch_fclk_khz)) {
+ clk_mgr_base->clks.subvp_prefetch_fclk_khz = new_clocks->subvp_prefetch_fclk_khz;
+ update_subvp_prefetch_fclk = true;
+ subvp_prefetch_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_fclk_khz);
+ }
+
+ /* When idle DPM is enabled, need to send active and idle hardmins separately */
+ /* CLK_MGR401_UPDATE_ACTIVE_HARDMINS */
+ if ((update_active_uclk || update_active_fclk) && is_idle_dpm_enabled) {
+ block_sequence[num_steps].params.update_idle_hardmin_params.uclk_mhz = active_uclk_mhz;
+ block_sequence[num_steps].params.update_idle_hardmin_params.fclk_mhz = active_fclk_mhz;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_ACTIVE_HARDMINS;
+ num_steps++;
+ }
+
+ /* CLK_MGR401_UPDATE_IDLE_HARDMINS */
+ if ((update_idle_uclk || update_idle_fclk) && is_idle_dpm_enabled) {
+ block_sequence[num_steps].params.update_idle_hardmin_params.uclk_mhz = idle_uclk_mhz;
+ block_sequence[num_steps].params.update_idle_hardmin_params.fclk_mhz = idle_fclk_mhz;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_IDLE_HARDMINS;
+ num_steps++;
+ }
+
+ /* CLK_MGR401_UPDATE_SUBVP_HARDMINS */
+ if ((update_subvp_prefetch_dramclk || update_subvp_prefetch_fclk) && is_df_throttle_opt_enabled) {
+ block_sequence[num_steps].params.update_idle_hardmin_params.uclk_mhz = subvp_prefetch_dramclk_mhz;
+ block_sequence[num_steps].params.update_idle_hardmin_params.fclk_mhz = subvp_prefetch_fclk_mhz;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_SUBVP_HARDMINS;
+ num_steps++;
+ }
+
+ /* set UCLK to requested value if P-State switching is supported, or to re-enable P-State switching */
+ if (update_active_uclk || update_idle_uclk) {
+ if (!is_idle_dpm_enabled) {
+ block_sequence[num_steps].params.update_hardmin_params.ppclk = PPCLK_UCLK;
+ block_sequence[num_steps].params.update_hardmin_params.freq_mhz = active_uclk_mhz;
+ block_sequence[num_steps].params.update_hardmin_params.response = NULL;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_HARDMIN_PPCLK;
+ num_steps++;
+ }
+
+ /* disable UCLK P-State support if needed */
+ if (!uclk_p_state_change_support &&
+ should_update_pstate_support(safe_to_lower, uclk_p_state_change_support, clk_mgr_base->clks.prev_p_state_change_support) &&
+ dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_UCLK)) {
+ block_sequence[num_steps].params.update_pstate_support_params.support = false;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_UCLK_PSTATE_SUPPORT;
+ num_steps++;
+ }
+ }
+
+ /* set FCLK to requested value if P-State switching is supported, or to re-enable P-State switching */
+ if (update_active_fclk || update_idle_fclk) {
+ /* No need to send active FCLK hardmin, automatically set based on DCFCLK */
+ // if (!is_idle_dpm_enabled) {
+ // block_sequence[*num_steps].update_hardmin_params.clk_mgr = clk_mgr;
+ // block_sequence[*num_steps].update_hardmin_params.ppclk = PPCLK_FCLK;
+ // block_sequence[*num_steps].update_hardmin_params.freq_mhz = active_fclk_mhz;
+ // block_sequence[*num_steps].update_hardmin_params.response = NULL;
+ // block_sequence[*num_steps].func = CLK_MGR401_UPDATE_HARDMIN_PPCLK;
+ // (*num_steps)++;
+ // }
+
+ /* disable FCLK P-State support if needed (message not supported on DCN401)*/
+ // if (!fclk_p_state_change_support &&
+ // should_update_pstate_support(safe_to_lower, fclk_p_state_change_support, clk_mgr_base->clks.fclk_prev_p_state_change_support) &&
+ // dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK)) {
+ // block_sequence[num_steps].params.update_pstate_support_params.support = false;
+ // block_sequence[num_steps].func = CLK_MGR401_UPDATE_FCLK_PSTATE_SUPPORT;
+ // num_steps++;
+ // }
+ }
+
+ if (new_clocks->fw_based_mclk_switching != clk_mgr_base->clks.fw_based_mclk_switching &&
+ safe_to_lower && !new_clocks->fw_based_mclk_switching) {
+ /* disable FAMS features */
+ clk_mgr_base->clks.fw_based_mclk_switching = new_clocks->fw_based_mclk_switching;
+
+ block_sequence[num_steps].params.update_wait_for_dmub_ack_params.enable = clk_mgr_base->clks.fw_based_mclk_switching;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_WAIT_FOR_DMUB_ACK;
+ num_steps++;
+
+ block_sequence[num_steps].params.indicate_drr_status_params.mod_drr_for_pstate = clk_mgr_base->clks.fw_based_mclk_switching;
+ block_sequence[num_steps].func = CLK_MGR401_INDICATE_DRR_STATUS;
+ num_steps++;
+ }
+
+ /* CLK_MGR401_UPDATE_CAB_FOR_UCLK */
+ if (clk_mgr_base->clks.num_ways != new_clocks->num_ways &&
+ safe_to_lower && clk_mgr_base->clks.num_ways > new_clocks->num_ways) {
+ /* decrease num ways for subvp */
+ clk_mgr_base->clks.num_ways = new_clocks->num_ways;
+ if (dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_UCLK)) {
+ block_sequence[num_steps].params.update_cab_for_uclk_params.num_ways = clk_mgr_base->clks.num_ways;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_CAB_FOR_UCLK;
+ num_steps++;
+ }
+ }
+
+ return num_steps;
+}
+
+static unsigned int dcn401_build_update_display_clocks_sequence(
+ struct clk_mgr *clk_mgr_base,
+ struct dc_state *context,
+ struct dc_clocks *new_clocks,
+ bool safe_to_lower)
+{
+ struct clk_mgr_internal *clk_mgr_internal = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct dcn401_clk_mgr *clk_mgr401 = TO_DCN401_CLK_MGR(clk_mgr_internal);
+ struct dc *dc = clk_mgr_base->ctx->dc;
+ struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu;
+ struct dcn401_clk_mgr_block_sequence *block_sequence = clk_mgr401->block_sequence;
+ bool force_reset = false;
+ bool update_dispclk = false;
+ bool update_dppclk = false;
+ bool dppclk_lowered = false;
+
+ unsigned int num_steps = 0;
+
+ /* CLK_MGR401_READ_CLOCKS_FROM_DENTIST */
+ if (clk_mgr_base->clks.dispclk_khz == 0 ||
+ (dc->debug.force_clock_mode & 0x1)) {
+ /* This is from resume or boot up, if forced_clock cfg option used,
+ * we bypass program dispclk and DPPCLK, but need set them for S3.
+ * Force_clock_mode 0x1: force reset the clock even it is the same clock
+ * as long as it is in Passive level.
+ */
+ force_reset = true;
+
+ clk_mgr_base->clks.dispclk_khz = clk_mgr_base->boot_snapshot.dispclk;
+ clk_mgr_base->clks.actual_dispclk_khz = clk_mgr_base->clks.dispclk_khz;
+
+ clk_mgr_base->clks.dppclk_khz = clk_mgr_base->boot_snapshot.dppclk;
+ clk_mgr_base->clks.actual_dppclk_khz = clk_mgr_base->clks.dppclk_khz;
+ }
+
+ /* DTBCLK */
+ if (!new_clocks->dtbclk_en && dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_DTBCLK)) {
+ new_clocks->ref_dtbclk_khz = clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz * 1000;
+ }
+
+ /* clock limits are received with MHz precision, divide by 1000 to prevent setting clocks at every call */
+ if (!dc->debug.disable_dtb_ref_clk_switch &&
+ should_set_clock(safe_to_lower, new_clocks->ref_dtbclk_khz / 1000, clk_mgr_base->clks.ref_dtbclk_khz / 1000) && //TODO these should be ceiled
+ dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_DTBCLK)) {
+ /* DCCG requires KHz precision for DTBCLK */
+ block_sequence[num_steps].params.update_hardmin_params.ppclk = PPCLK_DTBCLK;
+ block_sequence[num_steps].params.update_hardmin_params.freq_mhz = khz_to_mhz_ceil(new_clocks->ref_dtbclk_khz);
+ block_sequence[num_steps].params.update_hardmin_params.response = &clk_mgr_base->clks.ref_dtbclk_khz;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_HARDMIN_PPCLK;
+ num_steps++;
+
+ /* Update DTO in DCCG */
+ block_sequence[num_steps].params.update_dtbclk_dto_params.context = context;
+ block_sequence[num_steps].params.update_dtbclk_dto_params.ref_dtbclk_khz = &clk_mgr_base->clks.ref_dtbclk_khz;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_DTBCLK_DTO;
+ num_steps++;
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr_base->clks.dppclk_khz)) {
+ if (clk_mgr_base->clks.dppclk_khz > new_clocks->dppclk_khz)
+ dppclk_lowered = true;
+
+ clk_mgr_base->clks.dppclk_khz = new_clocks->dppclk_khz;
+ clk_mgr_base->clks.actual_dppclk_khz = new_clocks->dppclk_khz;
+
+ update_dppclk = true;
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) {
+ clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
+
+ block_sequence[num_steps].params.update_hardmin_optimized_params.ppclk = PPCLK_DISPCLK;
+ block_sequence[num_steps].params.update_hardmin_optimized_params.freq_khz = clk_mgr_base->clks.dispclk_khz;
+ block_sequence[num_steps].params.update_hardmin_optimized_params.response = &clk_mgr_base->clks.actual_dispclk_khz;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_HARDMIN_PPCLK_OPTIMIZED;
+ num_steps++;
+
+ update_dispclk = true;
+ }
+
+ if (dc->config.forced_clocks == false || (force_reset && safe_to_lower)) {
+ if (dppclk_lowered) {
+ /* if clock is being lowered, increase DTO before lowering refclk */
+ block_sequence[num_steps].params.update_dppclk_dto_params.context = context;
+ block_sequence[num_steps].params.update_dppclk_dto_params.ref_dppclk_khz = &clk_mgr_base->clks.dppclk_khz;
+ block_sequence[num_steps].params.update_dppclk_dto_params.safe_to_lower = safe_to_lower;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_DPPCLK_DTO;
+ num_steps++;
+
+ block_sequence[num_steps].params.update_dentist_params.context = context;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_DENTIST;
+ num_steps++;
+
+ if (dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_DPPCLK)) {
+ block_sequence[num_steps].params.update_hardmin_optimized_params.ppclk = PPCLK_DPPCLK;
+ block_sequence[num_steps].params.update_hardmin_optimized_params.freq_khz = clk_mgr_base->clks.dppclk_khz;
+ block_sequence[num_steps].params.update_hardmin_optimized_params.response = &clk_mgr_base->clks.actual_dppclk_khz;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_HARDMIN_PPCLK_OPTIMIZED;
+ num_steps++;
+
+ block_sequence[num_steps].params.update_dppclk_dto_params.context = context;
+ block_sequence[num_steps].params.update_dppclk_dto_params.ref_dppclk_khz = &clk_mgr_base->clks.actual_dppclk_khz;
+ block_sequence[num_steps].params.update_dppclk_dto_params.safe_to_lower = safe_to_lower;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_DPPCLK_DTO;
+ num_steps++;
+ }
+ } else {
+ /* if clock is being raised, increase refclk before lowering DTO */
+ if (update_dppclk && dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_DPPCLK)) {
+ block_sequence[num_steps].params.update_hardmin_optimized_params.ppclk = PPCLK_DPPCLK;
+ block_sequence[num_steps].params.update_hardmin_optimized_params.freq_khz = clk_mgr_base->clks.dppclk_khz;
+ block_sequence[num_steps].params.update_hardmin_optimized_params.response = &clk_mgr_base->clks.actual_dppclk_khz;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_HARDMIN_PPCLK_OPTIMIZED;
+ num_steps++;
+ }
+
+ if (update_dppclk || update_dispclk) {
+ block_sequence[num_steps].params.update_dentist_params.context = context;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_DENTIST;
+ num_steps++;
+ }
+
+ block_sequence[num_steps].params.update_dppclk_dto_params.context = context;
+ block_sequence[num_steps].params.update_dppclk_dto_params.ref_dppclk_khz = &clk_mgr_base->clks.actual_dppclk_khz;
+ block_sequence[num_steps].params.update_dppclk_dto_params.safe_to_lower = safe_to_lower;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_DPPCLK_DTO;
+ num_steps++;
+ }
+ }
+
+ if (update_dispclk && dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) {
+ /*update dmcu for wait_loop count*/
+ block_sequence[num_steps].params.update_psr_wait_loop_params.dmcu = dmcu;
+ block_sequence[num_steps].params.update_psr_wait_loop_params.wait = clk_mgr_base->clks.dispclk_khz / 1000 / 7;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_PSR_WAIT_LOOP;
+ num_steps++;
+ }
+
+ return num_steps;
+}
+
+static void dcn401_update_clocks(struct clk_mgr *clk_mgr_base,
+ struct dc_state *context,
+ bool safe_to_lower)
+{
+ struct dc *dc = clk_mgr_base->ctx->dc;
+
+ unsigned int num_steps = 0;
+
+ /* build bandwidth related clocks update sequence */
+ num_steps = dcn401_build_update_bandwidth_clocks_sequence(clk_mgr_base,
+ context,
+ &context->bw_ctx.bw.dcn.clk,
+ safe_to_lower);
+
+ /* execute sequence */
+ dcn401_execute_block_sequence(clk_mgr_base, num_steps);
+
+ /* build display related clocks update sequence */
+ num_steps = dcn401_build_update_display_clocks_sequence(clk_mgr_base,
+ context,
+ &context->bw_ctx.bw.dcn.clk,
+ safe_to_lower);
+
+ /* execute sequence */
+ dcn401_execute_block_sequence(clk_mgr_base, num_steps);
+
+ if (dc->config.enable_auto_dpm_test_logs)
+ dcn401_auto_dpm_test_log(&context->bw_ctx.bw.dcn.clk, TO_CLK_MGR_INTERNAL(clk_mgr_base), context);
+
+}
+
+
+static uint32_t dcn401_get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr)
+{
+ struct fixed31_32 pll_req;
+ uint32_t pll_req_reg = 0;
+
+ /* get FbMult value */
+ pll_req_reg = REG_READ(CLK0_CLK_PLL_REQ);
+
+ /* set up a fixed-point number
+ * this works because the int part is on the right edge of the register
+ * and the frac part is on the left edge
+ */
+ pll_req = dc_fixpt_from_int(pll_req_reg & clk_mgr->clk_mgr_mask->FbMult_int);
+ pll_req.value |= pll_req_reg & clk_mgr->clk_mgr_mask->FbMult_frac;
+
+ /* multiply by REFCLK period */
+ pll_req = dc_fixpt_mul_int(pll_req, clk_mgr->dfs_ref_freq_khz);
+
+ return dc_fixpt_floor(pll_req);
+}
+
+static void dcn401_clock_read_ss_info(struct clk_mgr_internal *clk_mgr)
+{
+ struct dc_bios *bp = clk_mgr->base.ctx->dc_bios;
+ int ss_info_num = bp->funcs->get_ss_entry_number(
+ bp, AS_SIGNAL_TYPE_GPU_PLL);
+
+ if (ss_info_num) {
+ struct spread_spectrum_info info = { { 0 } };
+ enum bp_result result = bp->funcs->get_spread_spectrum_info(
+ bp, AS_SIGNAL_TYPE_GPU_PLL, 0, &info);
+
+ /* SSInfo.spreadSpectrumPercentage !=0 would be sign
+ * that SS is enabled
+ */
+ if (result == BP_RESULT_OK &&
+ info.spread_spectrum_percentage != 0) {
+ clk_mgr->ss_on_dprefclk = true;
+ clk_mgr->dprefclk_ss_divider = info.spread_percentage_divider;
+
+ if (info.type.CENTER_MODE == 0) {
+ /* Currently for DP Reference clock we
+ * need only SS percentage for
+ * downspread
+ */
+ clk_mgr->dprefclk_ss_percentage =
+ info.spread_spectrum_percentage;
+ }
+ }
+ }
+}
+static void dcn401_notify_wm_ranges(struct clk_mgr *clk_mgr_base)
+{
+ unsigned int i;
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ WatermarksExternal_t *table = (WatermarksExternal_t *) clk_mgr->wm_range_table;
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ if (!table)
+ return;
+
+ memset(table, 0, sizeof(*table));
+
+ /* collect valid ranges, place in pmfw table */
+ for (i = 0; i < WM_SET_COUNT; i++)
+ if (clk_mgr->base.bw_params->wm_table.nv_entries[i].valid) {
+ table->Watermarks.WatermarkRow[i].WmSetting = i;
+ table->Watermarks.WatermarkRow[i].Flags = clk_mgr->base.bw_params->wm_table.nv_entries[i].pmfw_breakdown.wm_type;
+ }
+ dcn401_smu_set_dram_addr_high(clk_mgr, clk_mgr->wm_range_table_addr >> 32);
+ dcn401_smu_set_dram_addr_low(clk_mgr, clk_mgr->wm_range_table_addr & 0xFFFFFFFF);
+ dcn401_smu_transfer_wm_table_dram_2_smu(clk_mgr);
+}
+
+/* Set min memclk to minimum, either constrained by the current mode or DPM0 */
+static void dcn401_set_hard_min_memclk(struct clk_mgr *clk_mgr_base, bool current_mode)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ const struct dc *dc = clk_mgr->base.ctx->dc;
+ struct dc_state *context = dc->current_state;
+ struct dc_clocks new_clocks;
+ int num_steps;
+
+ if (!clk_mgr->smu_present || !dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK))
+ return;
+
+ /* build clock update */
+ memcpy(&new_clocks, &clk_mgr_base->clks, sizeof(struct dc_clocks));
+
+ if (current_mode) {
+ new_clocks.dramclk_khz = context->bw_ctx.bw.dcn.clk.dramclk_khz;
+ new_clocks.idle_dramclk_khz = context->bw_ctx.bw.dcn.clk.idle_dramclk_khz;
+ new_clocks.p_state_change_support = context->bw_ctx.bw.dcn.clk.p_state_change_support;
+ } else {
+ new_clocks.dramclk_khz = clk_mgr_base->bw_params->clk_table.entries[0].memclk_mhz * 1000;
+ new_clocks.idle_dramclk_khz = new_clocks.dramclk_khz;
+ new_clocks.p_state_change_support = true;
+ }
+
+ num_steps = dcn401_build_update_bandwidth_clocks_sequence(clk_mgr_base,
+ context,
+ &new_clocks,
+ true);
+
+ /* execute sequence */
+ dcn401_execute_block_sequence(clk_mgr_base, num_steps);
+}
+
+static int dcn401_get_hard_min_memclk(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ return clk_mgr->base.ctx->dc->current_state->bw_ctx.bw.dcn.clk.dramclk_khz;
+}
+
+static int dcn401_get_hard_min_fclk(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ return clk_mgr->base.ctx->dc->current_state->bw_ctx.bw.dcn.clk.fclk_khz;
+}
+
+/* Get current memclk states, update bounding box */
+static void dcn401_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct clk_limit_num_entries *num_entries_per_clk = &clk_mgr_base->bw_params->clk_table.num_entries_per_clk;
+ unsigned int num_levels;
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ /* Refresh memclk and fclk states */
+ dcn401_init_single_clock(clk_mgr, PPCLK_UCLK,
+ &clk_mgr_base->bw_params->clk_table.entries[0].memclk_mhz,
+ &num_entries_per_clk->num_memclk_levels);
+ if (num_entries_per_clk->num_memclk_levels) {
+ clk_mgr_base->bw_params->max_memclk_mhz =
+ clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_memclk_levels - 1].memclk_mhz;
+ }
+
+ clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_UCLK);
+ if (num_entries_per_clk->num_memclk_levels && clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz ==
+ clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_memclk_levels - 1].memclk_mhz)
+ clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz = 0;
+ clk_mgr_base->bw_params->dc_mode_softmax_memclk = clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz;
+
+ dcn401_init_single_clock(clk_mgr, PPCLK_FCLK,
+ &clk_mgr_base->bw_params->clk_table.entries[0].fclk_mhz,
+ &num_entries_per_clk->num_fclk_levels);
+ clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_FCLK);
+ if (num_entries_per_clk->num_fclk_levels && clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz ==
+ clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_fclk_levels - 1].fclk_mhz)
+ clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz = 0;
+
+ num_levels = max(num_entries_per_clk->num_memclk_levels, num_entries_per_clk->num_fclk_levels);
+
+ clk_mgr_base->bw_params->clk_table.num_entries = num_levels ? num_levels : 1;
+
+ if (clk_mgr->dpm_present && !num_levels)
+ clk_mgr->dpm_present = false;
+
+ clk_mgr_base->bw_params->num_channels = dcn401_smu_get_num_of_umc_channels(clk_mgr);
+ if (clk_mgr_base->ctx->dc_bios) {
+ /* use BIOS values if none provided by PMFW */
+ if (clk_mgr_base->bw_params->num_channels == 0) {
+ clk_mgr_base->bw_params->num_channels = clk_mgr_base->ctx->dc_bios->vram_info.num_chans;
+ }
+ clk_mgr_base->bw_params->dram_channel_width_bytes = clk_mgr_base->ctx->dc_bios->vram_info.dram_channel_width_bytes;
+ }
+
+ /* Refresh bounding box */
+ clk_mgr_base->ctx->dc->res_pool->funcs->update_bw_bounding_box(
+ clk_mgr->base.ctx->dc, clk_mgr_base->bw_params);
+}
+
+static bool dcn401_are_clock_states_equal(struct dc_clocks *a,
+ struct dc_clocks *b)
+{
+ if (a->dispclk_khz != b->dispclk_khz)
+ return false;
+ else if (a->dppclk_khz != b->dppclk_khz)
+ return false;
+ else if (a->dcfclk_khz != b->dcfclk_khz)
+ return false;
+ else if (a->dcfclk_deep_sleep_khz != b->dcfclk_deep_sleep_khz)
+ return false;
+ else if (a->dramclk_khz != b->dramclk_khz)
+ return false;
+ else if (a->p_state_change_support != b->p_state_change_support)
+ return false;
+ else if (a->fclk_p_state_change_support != b->fclk_p_state_change_support)
+ return false;
+
+ return true;
+}
+
+static void dcn401_enable_pme_wa(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn401_smu_set_pme_workaround(clk_mgr);
+}
+
+static bool dcn401_is_smu_present(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ return clk_mgr->smu_present;
+}
+
+
+static int dcn401_get_dtb_ref_freq_khz(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ int dtb_ref_clk_khz = 0;
+
+ if (clk_mgr->smu_present && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DTBCLK)) {
+ /* DPM enabled, use currently set value */
+ dtb_ref_clk_khz = clk_mgr_base->clks.ref_dtbclk_khz;
+ } else {
+ /* DPM disabled, so use boot snapshot */
+ dtb_ref_clk_khz = clk_mgr_base->boot_snapshot.dtbclk;
+ }
+
+ return dtb_ref_clk_khz;
+}
+
+static int dcn401_get_dispclk_from_dentist(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ uint32_t dispclk_wdivider;
+ int disp_divider;
+
+ REG_GET(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, &dispclk_wdivider);
+ disp_divider = dentist_get_divider_from_did(dispclk_wdivider);
+
+ /* Return DISPCLK freq in Khz */
+ if (disp_divider)
+ return (DENTIST_DIVIDER_RANGE_SCALE_FACTOR * clk_mgr->base.dentist_vco_freq_khz) / disp_divider;
+
+ return 0;
+}
+
+unsigned int dcn401_get_max_clock_khz(struct clk_mgr *clk_mgr_base, enum clk_type clk_type)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ unsigned int num_clk_levels;
+
+ switch (clk_type) {
+ case CLK_TYPE_DISPCLK:
+ num_clk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dispclk_levels;
+ return dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DISPCLK) ?
+ clk_mgr->base.bw_params->clk_table.entries[num_clk_levels - 1].dispclk_mhz * 1000 :
+ clk_mgr->base.boot_snapshot.dispclk;
+ case CLK_TYPE_DPPCLK:
+ num_clk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dppclk_levels;
+ return dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DPPCLK) ?
+ clk_mgr->base.bw_params->clk_table.entries[num_clk_levels - 1].dppclk_mhz * 1000 :
+ clk_mgr->base.boot_snapshot.dppclk;
+ case CLK_TYPE_DSCCLK:
+ num_clk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dispclk_levels;
+ return dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DISPCLK) ?
+ clk_mgr->base.bw_params->clk_table.entries[num_clk_levels - 1].dispclk_mhz * 1000 / 3 :
+ clk_mgr->base.boot_snapshot.dispclk / 3;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static struct clk_mgr_funcs dcn401_funcs = {
+ .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
+ .get_dtb_ref_clk_frequency = dcn401_get_dtb_ref_freq_khz,
+ .update_clocks = dcn401_update_clocks,
+ .dump_clk_registers = dcn401_dump_clk_registers,
+ .init_clocks = dcn401_init_clocks,
+ .notify_wm_ranges = dcn401_notify_wm_ranges,
+ .set_hard_min_memclk = dcn401_set_hard_min_memclk,
+ .get_memclk_states_from_smu = dcn401_get_memclk_states_from_smu,
+ .are_clock_states_equal = dcn401_are_clock_states_equal,
+ .enable_pme_wa = dcn401_enable_pme_wa,
+ .is_smu_present = dcn401_is_smu_present,
+ .get_dispclk_from_dentist = dcn401_get_dispclk_from_dentist,
+ .get_hard_min_memclk = dcn401_get_hard_min_memclk,
+ .get_hard_min_fclk = dcn401_get_hard_min_fclk,
+ .is_dc_mode_present = dcn401_is_dc_mode_present,
+ .get_max_clock_khz = dcn401_get_max_clock_khz,
+};
+
+struct clk_mgr_internal *dcn401_clk_mgr_construct(
+ struct dc_context *ctx,
+ struct dccg *dccg)
+{
+ struct clk_log_info log_info = {0};
+ struct dcn401_clk_mgr *clk_mgr401 = kzalloc(sizeof(struct dcn401_clk_mgr), GFP_KERNEL);
+ struct clk_mgr_internal *clk_mgr;
+
+ if (!clk_mgr401)
+ return NULL;
+
+ clk_mgr = &clk_mgr401->base;
+ clk_mgr->base.ctx = ctx;
+ clk_mgr->base.funcs = &dcn401_funcs;
+ clk_mgr->regs = &clk_mgr_regs_dcn401;
+ clk_mgr->clk_mgr_shift = &clk_mgr_shift_dcn401;
+ clk_mgr->clk_mgr_mask = &clk_mgr_mask_dcn401;
+
+ clk_mgr->dccg = dccg;
+ clk_mgr->dfs_bypass_disp_clk = 0;
+
+ clk_mgr->dprefclk_ss_percentage = 0;
+ clk_mgr->dprefclk_ss_divider = 1000;
+ clk_mgr->ss_on_dprefclk = false;
+ clk_mgr->dfs_ref_freq_khz = 100000;
+
+ /* Changed from DCN3.2_clock_frequency doc to match
+ * dcn401_dump_clk_registers from 4 * dentist_vco_freq_khz /
+ * dprefclk DID divider
+ */
+ clk_mgr->base.dprefclk_khz = 720000; //TODO update from VBIOS
+
+ /* integer part is now VCO frequency in kHz */
+ clk_mgr->base.dentist_vco_freq_khz = dcn401_get_vco_frequency_from_reg(clk_mgr);
+
+ /* in case we don't get a value from the register, use default */
+ if (clk_mgr->base.dentist_vco_freq_khz == 0)
+ clk_mgr->base.dentist_vco_freq_khz = 4500000; //TODO Update from VBIOS
+
+ dcn401_dump_clk_registers(&clk_mgr->base.boot_snapshot, &clk_mgr->base, &log_info);
+
+ if (ctx->dc->debug.disable_dtb_ref_clk_switch &&
+ clk_mgr->base.clks.ref_dtbclk_khz != clk_mgr->base.boot_snapshot.dtbclk) {
+ clk_mgr->base.clks.ref_dtbclk_khz = clk_mgr->base.boot_snapshot.dtbclk;
+ }
+
+ if (clk_mgr->base.boot_snapshot.dprefclk != 0) {
+ clk_mgr->base.dprefclk_khz = clk_mgr->base.boot_snapshot.dprefclk;
+ }
+ dcn401_clock_read_ss_info(clk_mgr);
+
+ clk_mgr->dfs_bypass_enabled = false;
+
+ clk_mgr->smu_present = false;
+
+ clk_mgr->base.bw_params = kzalloc(sizeof(*clk_mgr->base.bw_params), GFP_KERNEL);
+ if (!clk_mgr->base.bw_params) {
+ BREAK_TO_DEBUGGER();
+ kfree(clk_mgr401);
+ return NULL;
+ }
+
+ /* need physical address of table to give to PMFW */
+ clk_mgr->wm_range_table = dm_helpers_allocate_gpu_mem(clk_mgr->base.ctx,
+ DC_MEM_ALLOC_TYPE_GART, sizeof(WatermarksExternal_t),
+ &clk_mgr->wm_range_table_addr);
+ if (!clk_mgr->wm_range_table) {
+ BREAK_TO_DEBUGGER();
+ kfree(clk_mgr->base.bw_params);
+ kfree(clk_mgr401);
+ return NULL;
+ }
+
+ return &clk_mgr401->base;
+}
+
+void dcn401_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr)
+{
+ kfree(clk_mgr->base.bw_params);
+
+ if (clk_mgr->wm_range_table)
+ dm_helpers_free_gpu_mem(clk_mgr->base.ctx, DC_MEM_ALLOC_TYPE_GART,
+ clk_mgr->wm_range_table);
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h
new file mode 100644
index 000000000000..97a1ce1e8a9e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DCN401_CLK_MGR_H_
+#define __DCN401_CLK_MGR_H_
+
+#define DCN401_CLK_MGR_MAX_SEQUENCE_SIZE 30
+
+union dcn401_clk_mgr_block_sequence_params {
+ struct {
+ /* inputs */
+ uint32_t num_displays;
+ } update_num_displays_params;
+ struct {
+ /* inputs */
+ uint32_t ppclk;
+ uint16_t freq_mhz;
+ /* outputs */
+ uint32_t *response;
+ } update_hardmin_params;
+ struct {
+ /* inputs */
+ uint32_t ppclk;
+ int freq_khz;
+ /* outputs */
+ uint32_t *response;
+ } update_hardmin_optimized_params;
+ struct {
+ /* inputs */
+ uint16_t uclk_mhz;
+ uint16_t fclk_mhz;
+ } update_idle_hardmin_params;
+ struct {
+ /* inputs */
+ uint16_t freq_mhz;
+ } update_deep_sleep_dcfclk_params;
+ struct {
+ /* inputs */
+ bool support;
+ } update_pstate_support_params;
+ struct {
+ /* inputs */
+ unsigned int num_ways;
+ } update_cab_for_uclk_params;
+ struct {
+ /* inputs */
+ bool enable;
+ } update_wait_for_dmub_ack_params;
+ struct {
+ /* inputs */
+ bool mod_drr_for_pstate;
+ } indicate_drr_status_params;
+ struct {
+ /* inputs */
+ struct dc_state *context;
+ int *ref_dppclk_khz;
+ bool safe_to_lower;
+ } update_dppclk_dto_params;
+ struct {
+ /* inputs */
+ struct dc_state *context;
+ int *ref_dtbclk_khz;
+ } update_dtbclk_dto_params;
+ struct {
+ /* inputs */
+ struct dc_state *context;
+ } update_dentist_params;
+ struct {
+ /* inputs */
+ struct dmcu *dmcu;
+ unsigned int wait;
+ } update_psr_wait_loop_params;
+};
+
+enum dcn401_clk_mgr_block_sequence_func {
+ CLK_MGR401_READ_CLOCKS_FROM_DENTIST,
+ CLK_MGR401_UPDATE_NUM_DISPLAYS,
+ CLK_MGR401_UPDATE_HARDMIN_PPCLK,
+ CLK_MGR401_UPDATE_HARDMIN_PPCLK_OPTIMIZED,
+ CLK_MGR401_UPDATE_ACTIVE_HARDMINS,
+ CLK_MGR401_UPDATE_IDLE_HARDMINS,
+ CLK_MGR401_UPDATE_DEEP_SLEEP_DCFCLK,
+ CLK_MGR401_UPDATE_FCLK_PSTATE_SUPPORT,
+ CLK_MGR401_UPDATE_UCLK_PSTATE_SUPPORT,
+ CLK_MGR401_UPDATE_CAB_FOR_UCLK,
+ CLK_MGR401_UPDATE_WAIT_FOR_DMUB_ACK,
+ CLK_MGR401_INDICATE_DRR_STATUS,
+ CLK_MGR401_UPDATE_DPPCLK_DTO,
+ CLK_MGR401_UPDATE_DTBCLK_DTO,
+ CLK_MGR401_UPDATE_DENTIST,
+ CLK_MGR401_UPDATE_PSR_WAIT_LOOP,
+ CLK_MGR401_UPDATE_SUBVP_HARDMINS,
+};
+
+struct dcn401_clk_mgr_block_sequence {
+ union dcn401_clk_mgr_block_sequence_params params;
+ enum dcn401_clk_mgr_block_sequence_func func;
+};
+
+struct dcn401_clk_mgr {
+ struct clk_mgr_internal base;
+
+ struct dcn401_clk_mgr_block_sequence block_sequence[DCN401_CLK_MGR_MAX_SEQUENCE_SIZE];
+};
+
+void dcn401_init_clocks(struct clk_mgr *clk_mgr_base);
+bool dcn401_is_dc_mode_present(struct clk_mgr *clk_mgr_base);
+
+struct clk_mgr_internal *dcn401_clk_mgr_construct(struct dc_context *ctx,
+ struct dccg *dccg);
+
+void dcn401_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr);
+
+unsigned int dcn401_get_max_clock_khz(struct clk_mgr *clk_mgr_base, enum clk_type clk_type);
+
+#endif /* __DCN401_CLK_MGR_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c
new file mode 100644
index 000000000000..3a263840893e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c
@@ -0,0 +1,472 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dcn401_clk_mgr_smu_msg.h"
+
+#include "clk_mgr_internal.h"
+#include "reg_helper.h"
+
+#include "dalsmc.h"
+#include "dcn401_smu14_driver_if.h"
+
+#define mmDAL_MSG_REG 0x1628A
+#define mmDAL_ARG_REG 0x16273
+#define mmDAL_RESP_REG 0x16274
+
+#define REG(reg_name) \
+ mm ## reg_name
+
+#include "logger_types.h"
+
+#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
+
+/* temporary define */
+#ifndef DALSMC_MSG_SubvpUclkFclk
+#define DALSMC_MSG_SubvpUclkFclk 0x1B
+#endif
+#ifndef DALSMC_MSG_GetNumUmcChannels
+#define DALSMC_MSG_GetNumUmcChannels 0x1C
+#endif
+
+/*
+ * Function to be used instead of REG_WAIT macro because the wait ends when
+ * the register is NOT EQUAL to zero, and because the translation in msg_if.h
+ * won't work with REG_WAIT.
+ */
+static uint32_t dcn401_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, unsigned int delay_us, unsigned int max_retries)
+{
+ uint32_t reg = 0;
+
+ do {
+ reg = REG_READ(DAL_RESP_REG);
+ if (reg)
+ break;
+
+ if (delay_us >= 1000)
+ msleep(delay_us/1000);
+ else if (delay_us > 0)
+ udelay(delay_us);
+ } while (max_retries--);
+
+ return reg;
+}
+
+static bool dcn401_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, uint32_t msg_id, uint32_t param_in, uint32_t *param_out)
+{
+ /* Wait for response register to be ready */
+ dcn401_smu_wait_for_response(clk_mgr, 10, 200000);
+
+ TRACE_SMU_MSG_ENTER(msg_id, param_in, clk_mgr->base.ctx);
+
+ /* Clear response register */
+ REG_WRITE(DAL_RESP_REG, 0);
+
+ /* Set the parameter register for the SMU message */
+ REG_WRITE(DAL_ARG_REG, param_in);
+
+ /* Trigger the message transaction by writing the message ID */
+ REG_WRITE(DAL_MSG_REG, msg_id);
+
+ /* Wait for response */
+ if (dcn401_smu_wait_for_response(clk_mgr, 10, 200000) == DALSMC_Result_OK) {
+ if (param_out)
+ *param_out = REG_READ(DAL_ARG_REG);
+
+ TRACE_SMU_MSG_EXIT(true, param_out ? *param_out : 0, clk_mgr->base.ctx);
+ return true;
+ }
+
+ TRACE_SMU_MSG_EXIT(false, 0, clk_mgr->base.ctx);
+ return false;
+}
+
+/*
+ * Use these functions to return back delay information so we can aggregate the total
+ * delay when requesting hardmin clk
+ *
+ * dcn401_smu_wait_for_response_delay
+ * dcn401_smu_send_msg_with_param_delay
+ *
+ */
+static uint32_t dcn401_smu_wait_for_response_delay(struct clk_mgr_internal *clk_mgr, unsigned int delay_us, unsigned int max_retries, unsigned int *total_delay_us)
+{
+ uint32_t reg = 0;
+ *total_delay_us = 0;
+
+ do {
+ reg = REG_READ(DAL_RESP_REG);
+ if (reg)
+ break;
+
+ if (delay_us >= 1000)
+ msleep(delay_us/1000);
+ else if (delay_us > 0)
+ udelay(delay_us);
+ *total_delay_us += delay_us;
+ } while (max_retries--);
+
+ return reg;
+}
+
+static bool dcn401_smu_send_msg_with_param_delay(struct clk_mgr_internal *clk_mgr, uint32_t msg_id, uint32_t param_in, uint32_t *param_out, unsigned int *total_delay_us)
+{
+ unsigned int delay1_us, delay2_us;
+ *total_delay_us = 0;
+
+ /* Wait for response register to be ready */
+ dcn401_smu_wait_for_response_delay(clk_mgr, 10, 200000, &delay1_us);
+
+ TRACE_SMU_MSG_ENTER(msg_id, param_in, clk_mgr->base.ctx);
+
+ /* Clear response register */
+ REG_WRITE(DAL_RESP_REG, 0);
+
+ /* Set the parameter register for the SMU message */
+ REG_WRITE(DAL_ARG_REG, param_in);
+
+ /* Trigger the message transaction by writing the message ID */
+ REG_WRITE(DAL_MSG_REG, msg_id);
+
+ /* Wait for response */
+ if (dcn401_smu_wait_for_response_delay(clk_mgr, 10, 200000, &delay2_us) == DALSMC_Result_OK) {
+ if (param_out)
+ *param_out = REG_READ(DAL_ARG_REG);
+
+ *total_delay_us = delay1_us + delay2_us;
+ TRACE_SMU_MSG_EXIT(true, param_out ? *param_out : 0, clk_mgr->base.ctx);
+ return true;
+ }
+
+ *total_delay_us = delay1_us + 2000000;
+ TRACE_SMU_MSG_EXIT(false, 0, clk_mgr->base.ctx);
+ return false;
+}
+
+bool dcn401_smu_get_smu_version(struct clk_mgr_internal *clk_mgr, unsigned int *version)
+{
+ smu_print("SMU Get SMU version\n");
+
+ if (dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_GetSmuVersion, 0, version)) {
+
+ smu_print("SMU version: %d\n", *version);
+
+ return true;
+ }
+
+ return false;
+}
+
+/* Message output should match SMU11_DRIVER_IF_VERSION in smu11_driver_if.h */
+bool dcn401_smu_check_driver_if_version(struct clk_mgr_internal *clk_mgr)
+{
+ uint32_t response = 0;
+
+ smu_print("SMU Check driver if version\n");
+
+ if (dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_GetDriverIfVersion, 0, &response)) {
+
+ smu_print("SMU driver if version: %d\n", response);
+
+ if (response == SMU14_DRIVER_IF_VERSION)
+ return true;
+ }
+
+ return false;
+}
+
+/* Message output should match DALSMC_VERSION in dalsmc.h */
+bool dcn401_smu_check_msg_header_version(struct clk_mgr_internal *clk_mgr)
+{
+ uint32_t response = 0;
+
+ smu_print("SMU Check msg header version\n");
+
+ if (dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_GetMsgHeaderVersion, 0, &response)) {
+
+ smu_print("SMU msg header version: %d\n", response);
+
+ if (response == DALSMC_VERSION)
+ return true;
+ }
+
+ return false;
+}
+
+void dcn401_smu_send_fclk_pstate_message(struct clk_mgr_internal *clk_mgr, bool support)
+{
+ smu_print("FCLK P-state support value is : %d\n", support);
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_SetFclkSwitchAllow, support, NULL);
+}
+
+void dcn401_smu_send_uclk_pstate_message(struct clk_mgr_internal *clk_mgr, bool support)
+{
+ smu_print("UCLK P-state support value is : %d\n", support);
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_SetUclkPstateAllow, support, NULL);
+}
+
+void dcn401_smu_send_cab_for_uclk_message(struct clk_mgr_internal *clk_mgr, unsigned int num_ways)
+{
+ uint32_t param = (num_ways << 1) | (num_ways > 0);
+
+ dcn401_smu_send_msg_with_param(clk_mgr, DALSMC_MSG_SetCabForUclkPstate, param, NULL);
+ smu_print("Numways for SubVP : %d\n", num_ways);
+}
+
+void dcn401_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high)
+{
+ smu_print("SMU Set DRAM addr high: %d\n", addr_high);
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_SetDalDramAddrHigh, addr_high, NULL);
+}
+
+void dcn401_smu_set_dram_addr_low(struct clk_mgr_internal *clk_mgr, uint32_t addr_low)
+{
+ smu_print("SMU Set DRAM addr low: %d\n", addr_low);
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_SetDalDramAddrLow, addr_low, NULL);
+}
+
+void dcn401_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr)
+{
+ smu_print("SMU Transfer WM table DRAM 2 SMU\n");
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_TransferTableDram2Smu, TABLE_WATERMARKS, NULL);
+}
+
+void dcn401_smu_set_pme_workaround(struct clk_mgr_internal *clk_mgr)
+{
+ smu_print("SMU Set PME workaround\n");
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_BacoAudioD3PME, 0, NULL);
+}
+
+static unsigned int dcn401_smu_get_hard_min_status(struct clk_mgr_internal *clk_mgr, bool *no_timeout, unsigned int *total_delay_us)
+{
+ uint32_t response = 0;
+
+ /* bits 23:16 for clock type, lower 16 bits for frequency in MHz */
+ uint32_t param = 0;
+
+ *no_timeout = dcn401_smu_send_msg_with_param_delay(clk_mgr,
+ DALSMC_MSG_ReturnHardMinStatus, param, &response, total_delay_us);
+
+ smu_print("SMU Get hard min status: no_timeout %d delay %d us clk bits %x\n",
+ *no_timeout, *total_delay_us, response);
+
+ return response;
+}
+
+static bool dcn401_smu_wait_hard_min_status(struct clk_mgr_internal *clk_mgr, uint32_t ppclk)
+{
+ const unsigned int max_delay_us = 1000000;
+
+ unsigned int hardmin_status_mask = (1 << ppclk);
+ unsigned int total_delay_us = 0;
+ bool hardmin_done = false;
+
+ while (!hardmin_done && total_delay_us < max_delay_us) {
+ unsigned int hardmin_status;
+ unsigned int read_total_delay_us;
+ bool no_timeout;
+
+ if (!hardmin_done && total_delay_us > 0) {
+ /* hardmin not yet fulfilled, wait 500us and retry*/
+ udelay(500);
+ total_delay_us += 500;
+
+ smu_print("SMU Wait hard min status for %d us\n", total_delay_us);
+ }
+
+ hardmin_status = dcn401_smu_get_hard_min_status(clk_mgr, &no_timeout, &read_total_delay_us);
+ total_delay_us += read_total_delay_us;
+ hardmin_done = hardmin_status & hardmin_status_mask;
+ }
+
+ return hardmin_done;
+}
+
+/* Returns the actual frequency that was set in MHz, 0 on failure */
+unsigned int dcn401_smu_set_hard_min_by_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint16_t freq_mhz)
+{
+ uint32_t response = 0;
+ bool hard_min_done = false;
+
+ /* bits 23:16 for clock type, lower 16 bits for frequency in MHz */
+ uint32_t param = (clk << 16) | freq_mhz;
+
+ smu_print("SMU Set hard min by freq: clk = %d, freq_mhz = %d MHz\n", clk, freq_mhz);
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_SetHardMinByFreq, param, &response);
+
+ /* wait until hardmin acknowledged */
+ hard_min_done = dcn401_smu_wait_hard_min_status(clk_mgr, clk);
+ smu_print("SMU Frequency set = %d KHz hard_min_done %d\n", response, hard_min_done);
+
+ return response;
+}
+
+void dcn401_smu_wait_for_dmub_ack_mclk(struct clk_mgr_internal *clk_mgr, bool enable)
+{
+ smu_print("SMU to wait for DMCUB ack for MCLK : %d\n", enable);
+
+ dcn401_smu_send_msg_with_param(clk_mgr, DALSMC_MSG_SetAlwaysWaitDmcubResp, enable ? 1 : 0, NULL);
+}
+
+void dcn401_smu_indicate_drr_status(struct clk_mgr_internal *clk_mgr, bool mod_drr_for_pstate)
+{
+ smu_print("SMU Set indicate drr status = %d\n", mod_drr_for_pstate);
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_IndicateDrrStatus, mod_drr_for_pstate ? 1 : 0, NULL);
+}
+
+bool dcn401_smu_set_idle_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr,
+ uint16_t uclk_freq_mhz,
+ uint16_t fclk_freq_mhz)
+{
+ uint32_t response = 0;
+ bool success;
+
+ /* 15:0 for uclk, 32:16 for fclk */
+ uint32_t param = (fclk_freq_mhz << 16) | uclk_freq_mhz;
+
+ smu_print("SMU Set idle hardmin by freq: uclk_freq_mhz = %d MHz, fclk_freq_mhz = %d MHz\n", uclk_freq_mhz, fclk_freq_mhz);
+
+ success = dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_IdleUclkFclk, param, &response);
+
+ /* wait until hardmin acknowledged */
+ success &= dcn401_smu_wait_hard_min_status(clk_mgr, PPCLK_UCLK);
+ smu_print("SMU hard_min_done %d\n", success);
+
+ return success;
+}
+
+bool dcn401_smu_set_active_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr,
+ uint16_t uclk_freq_mhz,
+ uint16_t fclk_freq_mhz)
+{
+ uint32_t response = 0;
+ bool success;
+
+ /* 15:0 for uclk, 32:16 for fclk */
+ uint32_t param = (fclk_freq_mhz << 16) | uclk_freq_mhz;
+
+ smu_print("SMU Set active hardmin by freq: uclk_freq_mhz = %d MHz, fclk_freq_mhz = %d MHz\n", uclk_freq_mhz, fclk_freq_mhz);
+
+ success = dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_ActiveUclkFclk, param, &response);
+
+ /* wait until hardmin acknowledged */
+ success &= dcn401_smu_wait_hard_min_status(clk_mgr, PPCLK_UCLK);
+ smu_print("SMU hard_min_done %d\n", success);
+
+ return success;
+}
+
+bool dcn401_smu_set_subvp_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr,
+ uint16_t uclk_freq_mhz,
+ uint16_t fclk_freq_mhz)
+{
+ uint32_t response = 0;
+ bool success;
+
+ /* 15:0 for uclk, 32:16 for fclk */
+ uint32_t param = (fclk_freq_mhz << 16) | uclk_freq_mhz;
+
+ smu_print("SMU Set active hardmin by freq: uclk_freq_mhz = %d MHz, fclk_freq_mhz = %d MHz\n", uclk_freq_mhz, fclk_freq_mhz);
+
+ success = dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_SubvpUclkFclk, param, &response);
+
+ return success;
+}
+
+void dcn401_smu_set_min_deep_sleep_dcef_clk(struct clk_mgr_internal *clk_mgr, uint32_t freq_mhz)
+{
+ smu_print("SMU Set min deep sleep dcef clk: freq_mhz = %d MHz\n", freq_mhz);
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_SetMinDeepSleepDcfclk, freq_mhz, NULL);
+}
+
+void dcn401_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t num_displays)
+{
+ smu_print("SMU Set num of displays: num_displays = %d\n", num_displays);
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_NumOfDisplays, num_displays, NULL);
+}
+
+unsigned int dcn401_smu_get_num_of_umc_channels(struct clk_mgr_internal *clk_mgr)
+{
+ unsigned int response = 0;
+
+ dcn401_smu_send_msg_with_param(clk_mgr, DALSMC_MSG_GetNumUmcChannels, 0, &response);
+
+ smu_print("SMU Get Num UMC Channels: num_umc_channels = %d\n", response);
+
+ return response;
+}
+
+/*
+ * Frequency in MHz returned in lower 16 bits for valid DPM level
+ *
+ * Call with dpm_level = 0xFF to query features, return value will be:
+ * Bits 7:0 - number of DPM levels
+ * Bit 28 - 1 = auto DPM on
+ * Bit 29 - 1 = sweep DPM on
+ * Bit 30 - 1 = forced DPM on
+ * Bit 31 - 0 = discrete, 1 = fine-grained
+ *
+ * With fine-grained DPM, only min and max frequencies will be reported
+ *
+ * Returns 0 on failure
+ */
+unsigned int dcn401_smu_get_dpm_freq_by_index(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint8_t dpm_level)
+{
+ uint32_t response = 0;
+
+ /* bits 23:16 for clock type, lower 8 bits for DPM level */
+ uint32_t param = (clk << 16) | dpm_level;
+
+ smu_print("SMU Get dpm freq by index: clk = %d, dpm_level = %d\n", clk, dpm_level);
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_GetDpmFreqByIndex, param, &response);
+
+ smu_print("SMU dpm freq: %d MHz\n", response);
+
+ return response;
+}
+
+/* Returns the max DPM frequency in DC mode in MHz, 0 on failure */
+unsigned int dcn401_smu_get_dc_mode_max_dpm_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk)
+{
+ uint32_t response = 0;
+
+ /* bits 23:16 for clock type */
+ uint32_t param = clk << 16;
+
+ smu_print("SMU Get DC mode max DPM freq: clk = %d\n", clk);
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_GetDcModeMaxDpmFreq, param, &response);
+
+ smu_print("SMU DC mode max DMP freq: %d MHz\n", response);
+
+ return response;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h
new file mode 100644
index 000000000000..4f5ac603e822
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DCN401_CLK_MGR_SMU_MSG_H_
+#define __DCN401_CLK_MGR_SMU_MSG_H_
+
+#include "os_types.h"
+#include "core_types.h"
+
+struct clk_mgr_internal;
+
+bool dcn401_smu_get_smu_version(struct clk_mgr_internal *clk_mgr, unsigned int *version);
+bool dcn401_smu_check_driver_if_version(struct clk_mgr_internal *clk_mgr);
+bool dcn401_smu_check_msg_header_version(struct clk_mgr_internal *clk_mgr);
+void dcn401_smu_send_fclk_pstate_message(struct clk_mgr_internal *clk_mgr, bool support);
+void dcn401_smu_send_uclk_pstate_message(struct clk_mgr_internal *clk_mgr, bool support);
+void dcn401_smu_send_cab_for_uclk_message(struct clk_mgr_internal *clk_mgr, unsigned int num_ways);
+void dcn401_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high);
+void dcn401_smu_set_dram_addr_low(struct clk_mgr_internal *clk_mgr, uint32_t addr_low);
+void dcn401_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr);
+void dcn401_smu_set_pme_workaround(struct clk_mgr_internal *clk_mgr);
+unsigned int dcn401_smu_set_hard_min_by_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint16_t freq_mhz);
+void dcn401_smu_wait_for_dmub_ack_mclk(struct clk_mgr_internal *clk_mgr, bool enable);
+void dcn401_smu_indicate_drr_status(struct clk_mgr_internal *clk_mgr, bool mod_drr_for_pstate);
+bool dcn401_smu_set_idle_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr,
+ uint16_t uclk_freq_mhz,
+ uint16_t fclk_freq_mhz);
+bool dcn401_smu_set_active_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr,
+ uint16_t uclk_freq_mhz,
+ uint16_t fclk_freq_mhz);
+bool dcn401_smu_set_subvp_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr,
+ uint16_t uclk_freq_mhz,
+ uint16_t fclk_freq_mhz);
+void dcn401_smu_set_min_deep_sleep_dcef_clk(struct clk_mgr_internal *clk_mgr, uint32_t freq_mhz);
+void dcn401_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t num_displays);
+unsigned int dcn401_smu_get_num_of_umc_channels(struct clk_mgr_internal *clk_mgr);
+unsigned int dcn401_smu_get_dc_mode_max_dpm_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk);
+unsigned int dcn401_smu_get_dpm_freq_by_index(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint8_t dpm_level);
+
+#endif /* __DCN401_CLK_MGR_SMU_MSG_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_smu14_driver_if.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_smu14_driver_if.h
new file mode 100644
index 000000000000..36034b32870c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_smu14_driver_if.h
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+//
+// This is a stripped-down version of the smu13_driver_if.h file for the relevant DAL interfaces.
+
+#define SMU14_DRIVER_IF_VERSION 0x1
+
+//Only Clks that have DPM descriptors are listed here
+typedef enum {
+ PPCLK_GFXCLK = 0,
+ PPCLK_SOCCLK,
+ PPCLK_UCLK,
+ PPCLK_FCLK,
+ PPCLK_DCLK_0,
+ PPCLK_VCLK_0,
+ PPCLK_DISPCLK,
+ PPCLK_DPPCLK,
+ PPCLK_DPREFCLK,
+ PPCLK_DCFCLK,
+ PPCLK_DTBCLK,
+ PPCLK_COUNT,
+} PPCLK_e;
+
+typedef struct {
+ uint8_t WmSetting;
+ uint8_t Flags;
+ uint8_t Padding[2];
+
+} WatermarkRowGeneric_t;
+
+#define NUM_WM_RANGES 4
+
+typedef enum {
+ WATERMARKS_CLOCK_RANGE = 0,
+ WATERMARKS_DUMMY_PSTATE,
+ WATERMARKS_MALL,
+ WATERMARKS_COUNT,
+} WATERMARKS_FLAGS_e;
+
+typedef struct {
+ // Watermarks
+ WatermarkRowGeneric_t WatermarkRow[NUM_WM_RANGES];
+} Watermarks_t;
+
+typedef struct {
+ Watermarks_t Watermarks;
+ uint32_t Spare[16];
+
+ uint32_t MmHubPadding[8]; // SMU internal use
+} WatermarksExternal_t;
+
+// Table types
+#define TABLE_PMFW_PPTABLE 0
+#define TABLE_COMBO_PPTABLE 1
+#define TABLE_WATERMARKS 2
+#define TABLE_AVFS_PSM_DEBUG 3
+#define TABLE_PMSTATUSLOG 4
+#define TABLE_SMU_METRICS 5
+#define TABLE_DRIVER_SMU_CONFIG 6
+#define TABLE_ACTIVITY_MONITOR_COEFF 7
+#define TABLE_OVERDRIVE 8
+#define TABLE_I2C_COMMANDS 9
+#define TABLE_DRIVER_INFO 10
+#define TABLE_ECCINFO 11
+#define TABLE_COUNT 12
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 0ded4decee05..8be9cbd43e18 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -22,11 +22,10 @@
* Authors: AMD
*/
-#include <linux/slab.h>
-#include <linux/mm.h>
-
#include "dm_services.h"
+#include "amdgpu.h"
+
#include "dc.h"
#include "core_status.h"
@@ -35,7 +34,13 @@
#include "dce/dce_hwseq.h"
#include "resource.h"
+#include "dc_state.h"
+#include "dc_state_priv.h"
+#include "dc_plane.h"
+#include "dc_plane_priv.h"
+#include "dc_stream_priv.h"
+#include "gpio_service_interface.h"
#include "clk_mgr.h"
#include "clock_source.h"
#include "dc_bios_types.h"
@@ -55,12 +60,10 @@
#include "link_encoder.h"
#include "link_enc_cfg.h"
-#include "dc_link.h"
-#include "dc_link_ddc.h"
+#include "link_service.h"
#include "dm_helpers.h"
#include "mem_input.h"
-#include "dc_link_dp.h"
#include "dc_dmub_srv.h"
#include "dsc.h"
@@ -71,11 +74,21 @@
#include "dmub/dmub_srv.h"
-#include "i2caux_interface.h"
+#include "dce/dmub_psr.h"
+
#include "dce/dmub_hw_lock_mgr.h"
#include "dc_trace.h"
+#include "hw_sequencer_private.h"
+
+#if defined(CONFIG_DRM_AMD_DC_FP)
+#include "dml2_0/dml2_internal_types.h"
+#include "soc_and_ip_translator.h"
+#endif
+
+#include "dce/dmub_outbox.h"
+
#define CTX \
dc->ctx
@@ -133,14 +146,18 @@ static const char DC_BUILD_ID[] = "production-build";
* one or two (in the pipe-split case).
*/
-/*******************************************************************************
- * Private functions
- ******************************************************************************/
+/* Private functions */
-static inline void elevate_update_type(enum surface_update_type *original, enum surface_update_type new)
+static inline void elevate_update_type(
+ struct surface_update_descriptor *descriptor,
+ enum surface_update_type new_type,
+ enum dc_lock_descriptor new_locks
+)
{
- if (new > *original)
- *original = new;
+ if (new_type > descriptor->update_type)
+ descriptor->update_type = new_type;
+
+ descriptor->lock_descriptor |= new_locks;
}
static void destroy_links(struct dc *dc)
@@ -149,7 +166,7 @@ static void destroy_links(struct dc *dc)
for (i = 0; i < dc->link_count; i++) {
if (NULL != dc->links[i])
- link_destroy(&dc->links[i]);
+ dc->link_srv->destroy_link(&dc->links[i]);
}
}
@@ -207,10 +224,24 @@ static bool create_links(
connectors_num,
num_virtual_links);
- for (i = 0; i < connectors_num; i++) {
+ /* When getting the number of connectors, the VBIOS reports the number of valid indices,
+ * but it doesn't say which indices are valid, and not every index has an actual connector.
+ * So, if we don't find a connector on an index, that is not an error.
+ *
+ * - There is no guarantee that the first N indices will be valid
+ * - VBIOS may report a higher amount of valid indices than there are actual connectors
+ * - Some VBIOS have valid configurations for more connectors than there actually are
+ * on the card. This may be because the manufacturer used the same VBIOS for different
+ * variants of the same card.
+ */
+ for (i = 0; dc->link_count < connectors_num && i < MAX_LINKS; i++) {
+ struct graphics_object_id connector_id = bios->funcs->get_connector_id(bios, i);
struct link_init_data link_init_params = {0};
struct dc_link *link;
+ if (connector_id.id == CONNECTOR_ID_UNKNOWN)
+ continue;
+
DC_LOG_DC("BIOS object table - printing link object info for connector number: %d, link_index: %d", i, dc->link_count);
link_init_params.ctx = dc->ctx;
@@ -218,18 +249,19 @@ static bool create_links(
link_init_params.connector_index = i;
link_init_params.link_index = dc->link_count;
link_init_params.dc = dc;
- link = link_create(&link_init_params);
+ link = dc->link_srv->create_link(&link_init_params);
if (link) {
- dc->links[dc->link_count] = link;
- link->dc = dc;
- ++dc->link_count;
+ dc->links[dc->link_count] = link;
+ link->dc = dc;
+ ++dc->link_count;
}
}
DC_LOG_DC("BIOS object table - end");
/* Create a link for each usb4 dpia port */
+ dc->lowest_dpia_link_index = MAX_LINKS;
for (i = 0; i < dc->res_pool->usb4_dpia_count; i++) {
struct link_init_data link_init_params = {0};
struct dc_link *link;
@@ -240,8 +272,11 @@ static bool create_links(
link_init_params.dc = dc;
link_init_params.is_dpia_link = true;
- link = link_create(&link_init_params);
+ link = dc->link_srv->create_link(&link_init_params);
if (link) {
+ if (dc->lowest_dpia_link_index > dc->link_count)
+ dc->lowest_dpia_link_index = dc->link_count;
+
dc->links[dc->link_count] = link;
link->dc = dc;
++dc->link_count;
@@ -267,6 +302,8 @@ static bool create_links(
link->link_id.type = OBJECT_TYPE_CONNECTOR;
link->link_id.id = CONNECTOR_ID_VIRTUAL;
link->link_id.enum_id = ENUM_ID_1;
+ link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED;
+ link->replay_settings.config.replay_version = DC_REPLAY_VERSION_UNSUPPORTED;
link->link_enc = kzalloc(sizeof(*link->link_enc), GFP_KERNEL);
if (!link->link_enc) {
@@ -274,24 +311,6 @@ static bool create_links(
goto failed_alloc;
}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment) &&
- dc->caps.dp_hpo &&
- link->dc->res_pool->res_cap->num_hpo_dp_link_encoder > 0) {
- /* FPGA case - Allocate HPO DP link encoder */
- if (i < link->dc->res_pool->res_cap->num_hpo_dp_link_encoder) {
- link->hpo_dp_link_enc = link->dc->res_pool->hpo_dp_link_enc[i];
-
- if (link->hpo_dp_link_enc == NULL) {
- BREAK_TO_DEBUGGER();
- goto failed_alloc;
- }
- link->hpo_dp_link_enc->hpd_source = link->link_enc->hpd_source;
- link->hpo_dp_link_enc->transmitter = link->link_enc->transmitter;
- }
- }
-#endif
-
link->link_status.dpcd_caps = &link->dpcd_caps;
enc_init.ctx = dc->ctx;
@@ -360,10 +379,16 @@ static bool create_link_encoders(struct dc *dc)
*/
static void destroy_link_encoders(struct dc *dc)
{
- unsigned int num_usb4_dpia = dc->res_pool->res_cap->num_usb4_dpia;
- unsigned int num_dig_link_enc = dc->res_pool->res_cap->num_dig_link_enc;
+ unsigned int num_usb4_dpia;
+ unsigned int num_dig_link_enc;
int i;
+ if (!dc->res_pool)
+ return;
+
+ num_usb4_dpia = dc->res_pool->res_cap->num_usb4_dpia;
+ num_dig_link_enc = dc->res_pool->res_cap->num_dig_link_enc;
+
/* A platform without USB4 DPIA endpoints has a fixed mapping between DIG
* link encoders and physical display endpoints and does not require
* additional link encoder objects.
@@ -393,29 +418,79 @@ static void dc_perf_trace_destroy(struct dc_perf_trace **perf_trace)
*perf_trace = NULL;
}
+static bool set_long_vtotal(struct dc *dc, struct dc_stream_state *stream, struct dc_crtc_timing_adjust *adjust)
+{
+ if (!dc || !stream || !adjust)
+ return false;
+
+ if (!dc->current_state)
+ return false;
+
+ int i;
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+
+ if (pipe->stream == stream && pipe->stream_res.tg) {
+ if (dc->hwss.set_long_vtotal)
+ dc->hwss.set_long_vtotal(&pipe, 1, adjust->v_total_min, adjust->v_total_max);
+
+ return true;
+ }
+ }
+
+ return false;
+}
+
/**
- * dc_stream_adjust_vmin_vmax:
+ * dc_stream_adjust_vmin_vmax - look up pipe context & update parts of DRR
+ * @dc: dc reference
+ * @stream: Initial dc stream state
+ * @adjust: Updated parameters for vertical_total_min and vertical_total_max
*
* Looks up the pipe context of dc_stream_state and updates the
* vertical_total_min and vertical_total_max of the DRR, Dynamic Refresh
* Rate, which is a power-saving feature that targets reducing panel
* refresh rate while the screen is static
*
- * @dc: dc reference
- * @stream: Initial dc stream state
- * @adjust: Updated parameters for vertical_total_min and vertical_total_max
+ * Return: %true if the pipe context is found and adjusted;
+ * %false if the pipe context is not found.
*/
bool dc_stream_adjust_vmin_vmax(struct dc *dc,
struct dc_stream_state *stream,
struct dc_crtc_timing_adjust *adjust)
{
int i;
- bool ret = false;
+
+ /*
+ * Don't adjust DRR while there's bandwidth optimizations pending to
+ * avoid conflicting with firmware updates.
+ */
+ if (dc->ctx->dce_version > DCE_VERSION_MAX) {
+ if (dc->optimized_required &&
+ (stream->adjust.v_total_max != adjust->v_total_max ||
+ stream->adjust.v_total_min != adjust->v_total_min)) {
+ stream->adjust.timing_adjust_pending = true;
+ return false;
+ }
+ }
+
+ dc_exit_ips_for_hw_access(dc);
stream->adjust.v_total_max = adjust->v_total_max;
stream->adjust.v_total_mid = adjust->v_total_mid;
stream->adjust.v_total_mid_frame_num = adjust->v_total_mid_frame_num;
stream->adjust.v_total_min = adjust->v_total_min;
+ stream->adjust.allow_otg_v_count_halt = adjust->allow_otg_v_count_halt;
+
+ if (dc->caps.max_v_total != 0 &&
+ (adjust->v_total_max > dc->caps.max_v_total || adjust->v_total_min > dc->caps.max_v_total)) {
+ stream->adjust.timing_adjust_pending = false;
+ if (adjust->allow_otg_v_count_halt)
+ return set_long_vtotal(dc, stream, adjust);
+ else
+ return false;
+ }
for (i = 0; i < MAX_PIPES; i++) {
struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
@@ -424,26 +499,30 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc,
dc->hwss.set_drr(&pipe,
1,
*adjust);
+ stream->adjust.timing_adjust_pending = false;
- ret = true;
+ if (dc->hwss.notify_cursor_offload_drr_update)
+ dc->hwss.notify_cursor_offload_drr_update(dc, dc->current_state, stream);
+
+ return true;
}
}
- return ret;
+
+ return false;
}
/**
- *****************************************************************************
- * Function: dc_stream_get_last_vrr_vtotal
- *
- * @brief
- * Looks up the pipe context of dc_stream_state and gets the
- * last VTOTAL used by DRR (Dynamic Refresh Rate)
- *
- * @param [in] dc: dc reference
- * @param [in] stream: Initial dc stream state
- * @param [in] adjust: Updated parameters for vertical_total_min and
- * vertical_total_max
- *****************************************************************************
+ * dc_stream_get_last_used_drr_vtotal - Looks up the pipe context of
+ * dc_stream_state and gets the last VTOTAL used by DRR (Dynamic Refresh Rate)
+ *
+ * @dc: [in] dc reference
+ * @stream: [in] Initial dc stream state
+ * @refresh_rate: [in] new refresh_rate
+ *
+ * Return: %true if the pipe context is found and there is an associated
+ * timing_generator for the DC;
+ * %false if the pipe context is not found or there is no
+ * timing_generator for the DC.
*/
bool dc_stream_get_last_used_drr_vtotal(struct dc *dc,
struct dc_stream_state *stream,
@@ -453,6 +532,8 @@ bool dc_stream_get_last_used_drr_vtotal(struct dc *dc,
int i = 0;
+ dc_exit_ips_for_hw_access(dc);
+
for (i = 0; i < MAX_PIPES; i++) {
struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
@@ -473,142 +554,176 @@ bool dc_stream_get_last_used_drr_vtotal(struct dc *dc,
return status;
}
-bool dc_stream_get_crtc_position(struct dc *dc,
- struct dc_stream_state **streams, int num_streams,
- unsigned int *v_pos, unsigned int *nom_v_pos)
+#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
+static inline void
+dc_stream_forward_dmub_crc_window(struct dc_dmub_srv *dmub_srv,
+ struct rect *rect, struct otg_phy_mux *mux_mapping, bool is_stop)
{
- /* TODO: Support multiple streams */
- const struct dc_stream_state *stream = streams[0];
- int i;
- bool ret = false;
- struct crtc_position position;
-
- for (i = 0; i < MAX_PIPES; i++) {
- struct pipe_ctx *pipe =
- &dc->current_state->res_ctx.pipe_ctx[i];
+ union dmub_rb_cmd cmd = {0};
- if (pipe->stream == stream && pipe->stream_res.stream_enc) {
- dc->hwss.get_position(&pipe, 1, &position);
+ cmd.secure_display.roi_info.phy_id = mux_mapping->phy_output_num;
+ cmd.secure_display.roi_info.otg_id = mux_mapping->otg_output_num;
- *v_pos = position.vertical_count;
- *nom_v_pos = position.nominal_vcount;
- ret = true;
- }
+ if (is_stop) {
+ cmd.secure_display.header.type = DMUB_CMD__SECURE_DISPLAY;
+ cmd.secure_display.header.sub_type = DMUB_CMD__SECURE_DISPLAY_CRC_STOP_UPDATE;
+ } else {
+ cmd.secure_display.header.type = DMUB_CMD__SECURE_DISPLAY;
+ cmd.secure_display.header.sub_type = DMUB_CMD__SECURE_DISPLAY_CRC_WIN_NOTIFY;
+ cmd.secure_display.roi_info.x_start = rect->x;
+ cmd.secure_display.roi_info.y_start = rect->y;
+ cmd.secure_display.roi_info.x_end = rect->x + rect->width;
+ cmd.secure_display.roi_info.y_end = rect->y + rect->height;
}
- return ret;
+
+ dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
}
-#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
-bool dc_stream_forward_dmcu_crc_window(struct dc *dc, struct dc_stream_state *stream,
- struct crc_params *crc_window)
+static inline void
+dc_stream_forward_dmcu_crc_window(struct dmcu *dmcu,
+ struct rect *rect, struct otg_phy_mux *mux_mapping, bool is_stop)
{
- int i;
- struct dmcu *dmcu = dc->res_pool->dmcu;
+ if (is_stop)
+ dmcu->funcs->stop_crc_win_update(dmcu, mux_mapping);
+ else
+ dmcu->funcs->forward_crc_window(dmcu, rect, mux_mapping);
+}
+
+bool
+dc_stream_forward_crc_window(struct dc_stream_state *stream,
+ struct rect *rect, uint8_t phy_id, bool is_stop)
+{
+ struct dmcu *dmcu;
+ struct dc_dmub_srv *dmub_srv;
+ struct otg_phy_mux mux_mapping;
struct pipe_ctx *pipe;
- struct crc_region tmp_win, *crc_win;
- struct otg_phy_mux mapping_tmp, *mux_mapping;
+ int i;
+ struct dc *dc = stream->ctx->dc;
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ if (pipe->stream == stream && !pipe->top_pipe && !pipe->prev_odm_pipe)
+ break;
+ }
- /*crc window can't be null*/
- if (!crc_window)
+ /* Stream not found */
+ if (i == MAX_PIPES)
return false;
- if ((dmcu != NULL && dmcu->funcs->is_dmcu_initialized(dmcu))) {
- crc_win = &tmp_win;
- mux_mapping = &mapping_tmp;
- /*set crc window*/
- tmp_win.x_start = crc_window->windowa_x_start;
- tmp_win.y_start = crc_window->windowa_y_start;
- tmp_win.x_end = crc_window->windowa_x_end;
- tmp_win.y_end = crc_window->windowa_y_end;
+ mux_mapping.phy_output_num = phy_id;
+ mux_mapping.otg_output_num = pipe->stream_res.tg->inst;
- for (i = 0; i < MAX_PIPES; i++) {
- pipe = &dc->current_state->res_ctx.pipe_ctx[i];
- if (pipe->stream == stream && !pipe->top_pipe && !pipe->prev_odm_pipe)
- break;
- }
+ dmcu = dc->res_pool->dmcu;
+ dmub_srv = dc->ctx->dmub_srv;
- /* Stream not found */
- if (i == MAX_PIPES)
- return false;
+ /* forward to dmub */
+ if (dmub_srv)
+ dc_stream_forward_dmub_crc_window(dmub_srv, rect, &mux_mapping, is_stop);
+ /* forward to dmcu */
+ else if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu))
+ dc_stream_forward_dmcu_crc_window(dmcu, rect, &mux_mapping, is_stop);
+ else
+ return false;
+
+ return true;
+}
+static void
+dc_stream_forward_dmub_multiple_crc_window(struct dc_dmub_srv *dmub_srv,
+ struct crc_window *window, struct otg_phy_mux *mux_mapping, bool stop)
+{
+ int i;
+ union dmub_rb_cmd cmd = {0};
- /*set mux routing info*/
- mapping_tmp.phy_output_num = stream->link->link_enc_hw_inst;
- mapping_tmp.otg_output_num = pipe->stream_res.tg->inst;
+ cmd.secure_display.mul_roi_ctl.phy_id = mux_mapping->phy_output_num;
+ cmd.secure_display.mul_roi_ctl.otg_id = mux_mapping->otg_output_num;
- dmcu->funcs->forward_crc_window(dmcu, crc_win, mux_mapping);
+ cmd.secure_display.header.type = DMUB_CMD__SECURE_DISPLAY;
+
+ if (stop) {
+ cmd.secure_display.header.sub_type = DMUB_CMD__SECURE_DISPLAY_MULTIPLE_CRC_STOP_UPDATE;
} else {
- DC_LOG_DC("dmcu is not initialized");
- return false;
+ cmd.secure_display.header.sub_type = DMUB_CMD__SECURE_DISPLAY_MULTIPLE_CRC_WIN_NOTIFY;
+ for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) {
+ cmd.secure_display.mul_roi_ctl.roi_ctl[i].x_start = window[i].rect.x;
+ cmd.secure_display.mul_roi_ctl.roi_ctl[i].y_start = window[i].rect.y;
+ cmd.secure_display.mul_roi_ctl.roi_ctl[i].x_end = window[i].rect.x + window[i].rect.width;
+ cmd.secure_display.mul_roi_ctl.roi_ctl[i].y_end = window[i].rect.y + window[i].rect.height;
+ cmd.secure_display.mul_roi_ctl.roi_ctl[i].enable = window[i].enable;
+ }
}
- return true;
+ dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
}
-bool dc_stream_stop_dmcu_crc_win_update(struct dc *dc, struct dc_stream_state *stream)
+bool
+dc_stream_forward_multiple_crc_window(struct dc_stream_state *stream,
+ struct crc_window *window, uint8_t phy_id, bool stop)
{
- int i;
- struct dmcu *dmcu = dc->res_pool->dmcu;
+ struct dc_dmub_srv *dmub_srv;
+ struct otg_phy_mux mux_mapping;
struct pipe_ctx *pipe;
- struct otg_phy_mux mapping_tmp, *mux_mapping;
-
- if ((dmcu != NULL && dmcu->funcs->is_dmcu_initialized(dmcu))) {
- mux_mapping = &mapping_tmp;
+ int i;
+ struct dc *dc = stream->ctx->dc;
- for (i = 0; i < MAX_PIPES; i++) {
- pipe = &dc->current_state->res_ctx.pipe_ctx[i];
- if (pipe->stream == stream && !pipe->top_pipe && !pipe->prev_odm_pipe)
- break;
- }
+ for (i = 0; i < MAX_PIPES; i++) {
+ pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ if (pipe->stream == stream && !pipe->top_pipe && !pipe->prev_odm_pipe)
+ break;
+ }
- /* Stream not found */
- if (i == MAX_PIPES)
- return false;
+ /* Stream not found */
+ if (i == MAX_PIPES)
+ return false;
+ mux_mapping.phy_output_num = phy_id;
+ mux_mapping.otg_output_num = pipe->stream_res.tg->inst;
- /*set mux routing info*/
- mapping_tmp.phy_output_num = stream->link->link_enc_hw_inst;
- mapping_tmp.otg_output_num = pipe->stream_res.tg->inst;
+ dmub_srv = dc->ctx->dmub_srv;
- dmcu->funcs->stop_crc_win_update(dmcu, mux_mapping);
- } else {
- DC_LOG_DC("dmcu is not initialized");
+ /* forward to dmub only. no dmcu support*/
+ if (dmub_srv)
+ dc_stream_forward_dmub_multiple_crc_window(dmub_srv, window, &mux_mapping, stop);
+ else
return false;
- }
return true;
}
-#endif
+#endif /* CONFIG_DRM_AMD_SECURE_DISPLAY */
/**
* dc_stream_configure_crc() - Configure CRC capture for the given stream.
* @dc: DC Object
* @stream: The stream to configure CRC on.
- * @enable: Enable CRC if true, disable otherwise.
* @crc_window: CRC window (x/y start/end) information
+ * @enable: Enable CRC if true, disable otherwise.
* @continuous: Capture CRC on every frame if true. Otherwise, only capture
* once.
+ * @idx: Capture CRC on which CRC engine instance
+ * @reset: Reset CRC engine before the configuration
+ *
+ * By default, the entire frame is used to calculate the CRC.
*
- * By default, only CRC0 is configured, and the entire frame is used to
- * calculate the crc.
+ * Return: %false if the stream is not found or CRC capture is not supported;
+ * %true if the stream has been configured.
*/
bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream,
- struct crc_params *crc_window, bool enable, bool continuous)
+ struct crc_params *crc_window, bool enable, bool continuous,
+ uint8_t idx, bool reset)
{
- int i;
struct pipe_ctx *pipe;
struct crc_params param;
struct timing_generator *tg;
- for (i = 0; i < MAX_PIPES; i++) {
- pipe = &dc->current_state->res_ctx.pipe_ctx[i];
- if (pipe->stream == stream && !pipe->top_pipe && !pipe->prev_odm_pipe)
- break;
- }
+ pipe = resource_get_otg_master_for_stream(
+ &dc->current_state->res_ctx, stream);
+
/* Stream not found */
- if (i == MAX_PIPES)
+ if (pipe == NULL)
return false;
+ dc_exit_ips_for_hw_access(dc);
+
/* By default, capture the full frame */
param.windowa_x_start = 0;
param.windowa_y_start = 0;
@@ -638,6 +753,9 @@ bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream,
param.continuous_mode = continuous;
param.enable = enable;
+ param.crc_eng_inst = idx;
+ param.reset = reset;
+
tg = pipe->stream_res.tg;
/* Only call if supported */
@@ -649,22 +767,28 @@ bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream,
/**
* dc_stream_get_crc() - Get CRC values for the given stream.
- * @dc: DC object
+ *
+ * @dc: DC object.
* @stream: The DC stream state of the stream to get CRCs from.
- * @r_cr: CRC value for the first of the 3 channels stored here.
- * @g_y: CRC value for the second of the 3 channels stored here.
- * @b_cb: CRC value for the third of the 3 channels stored here.
+ * @idx: index of crc engine to get CRC from
+ * @r_cr: CRC value for the red component.
+ * @g_y: CRC value for the green component.
+ * @b_cb: CRC value for the blue component.
*
* dc_stream_configure_crc needs to be called beforehand to enable CRCs.
- * Return false if stream is not found, or if CRCs are not enabled.
+ *
+ * Return:
+ * %false if stream is not found, or if CRCs are not enabled.
*/
-bool dc_stream_get_crc(struct dc *dc, struct dc_stream_state *stream,
+bool dc_stream_get_crc(struct dc *dc, struct dc_stream_state *stream, uint8_t idx,
uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb)
{
int i;
struct pipe_ctx *pipe;
struct timing_generator *tg;
+ dc_exit_ips_for_hw_access(dc);
+
for (i = 0; i < MAX_PIPES; i++) {
pipe = &dc->current_state->res_ctx.pipe_ctx[i];
if (pipe->stream == stream)
@@ -677,7 +801,7 @@ bool dc_stream_get_crc(struct dc *dc, struct dc_stream_state *stream,
tg = pipe->stream_res.tg;
if (tg->funcs->get_crc)
- return tg->funcs->get_crc(tg, r_cr, g_y, b_cb);
+ return tg->funcs->get_crc(tg, idx, r_cr, g_y, b_cb);
DC_LOG_WARNING("CRC capture not supported.");
return false;
}
@@ -689,6 +813,8 @@ void dc_stream_set_dyn_expansion(struct dc *dc, struct dc_stream_state *stream,
int i;
struct pipe_ctx *pipe_ctx;
+ dc_exit_ips_for_hw_access(dc);
+
for (i = 0; i < MAX_PIPES; i++) {
if (dc->current_state->res_ctx.pipe_ctx[i].stream
== stream) {
@@ -724,6 +850,8 @@ void dc_stream_set_dither_option(struct dc_stream_state *stream,
if (option > DITHER_OPTION_MAX)
return;
+ dc_exit_ips_for_hw_access(stream->ctx->dc);
+
stream->dither_option = option;
memset(&params, 0, sizeof(params));
@@ -748,6 +876,8 @@ bool dc_stream_set_gamut_remap(struct dc *dc, const struct dc_stream_state *stre
bool ret = false;
struct pipe_ctx *pipes;
+ dc_exit_ips_for_hw_access(dc);
+
for (i = 0; i < MAX_PIPES; i++) {
if (dc->current_state->res_ctx.pipe_ctx[i].stream == stream) {
pipes = &dc->current_state->res_ctx.pipe_ctx[i];
@@ -765,6 +895,8 @@ bool dc_stream_program_csc_matrix(struct dc *dc, struct dc_stream_state *stream)
bool ret = false;
struct pipe_ctx *pipes;
+ dc_exit_ips_for_hw_access(dc);
+
for (i = 0; i < MAX_PIPES; i++) {
if (dc->current_state->res_ctx.pipe_ctx[i].stream
== stream) {
@@ -791,6 +923,8 @@ void dc_stream_set_static_screen_params(struct dc *dc,
struct pipe_ctx *pipes_affected[MAX_PIPES];
int num_pipes_affected = 0;
+ dc_exit_ips_for_hw_access(dc);
+
for (i = 0; i < num_streams; i++) {
struct dc_stream_state *stream = streams[i];
@@ -808,8 +942,13 @@ void dc_stream_set_static_screen_params(struct dc *dc,
static void dc_destruct(struct dc *dc)
{
+ // reset link encoder assignment table on destruct
+ if (dc->res_pool && dc->res_pool->funcs->link_encs_assign &&
+ !dc->config.unify_link_enc_assignment)
+ link_enc_cfg_init(dc, dc->current_state);
+
if (dc->current_state) {
- dc_release_state(dc->current_state);
+ dc_state_release(dc->current_state);
dc->current_state = NULL;
}
@@ -823,17 +962,24 @@ static void dc_destruct(struct dc *dc)
}
dc_destroy_resource_pool(dc);
+#ifdef CONFIG_DRM_AMD_DC_FP
+ dc_destroy_soc_and_ip_translator(&dc->soc_and_ip_translator);
+#endif
+ if (dc->link_srv)
+ link_destroy_link_service(&dc->link_srv);
- if (dc->ctx->gpio_service)
- dal_gpio_service_destroy(&dc->ctx->gpio_service);
-
- if (dc->ctx->created_bios)
- dal_bios_parser_destroy(&dc->ctx->dc_bios);
+ if (dc->ctx) {
+ if (dc->ctx->gpio_service)
+ dal_gpio_service_destroy(&dc->ctx->gpio_service);
- dc_perf_trace_destroy(&dc->ctx->perf_trace);
+ if (dc->ctx->created_bios)
+ dal_bios_parser_destroy(&dc->ctx->dc_bios);
+ kfree(dc->ctx->logger);
+ dc_perf_trace_destroy(&dc->ctx->perf_trace);
- kfree(dc->ctx);
- dc->ctx = NULL;
+ kfree(dc->ctx);
+ dc->ctx = NULL;
+ }
kfree(dc->bw_vbios);
dc->bw_vbios = NULL;
@@ -841,14 +987,12 @@ static void dc_destruct(struct dc *dc)
kfree(dc->bw_dceip);
dc->bw_dceip = NULL;
-#ifdef CONFIG_DRM_AMD_DC_DCN
kfree(dc->dcn_soc);
dc->dcn_soc = NULL;
kfree(dc->dcn_ip);
dc->dcn_ip = NULL;
-#endif
kfree(dc->vm_helper);
dc->vm_helper = NULL;
@@ -858,12 +1002,13 @@ static bool dc_construct_ctx(struct dc *dc,
const struct dc_init_data *init_params)
{
struct dc_context *dc_ctx;
- enum dce_version dc_version = DCE_VERSION_UNKNOWN;
dc_ctx = kzalloc(sizeof(*dc_ctx), GFP_KERNEL);
if (!dc_ctx)
return false;
+ dc_stream_init_rmcm_3dlut(dc);
+
dc_ctx->cgs_device = init_params->cgs_device;
dc_ctx->driver_context = init_params->driver;
dc_ctx->dc = dc;
@@ -871,20 +1016,36 @@ static bool dc_construct_ctx(struct dc *dc,
dc_ctx->dc_sink_id_count = 0;
dc_ctx->dc_stream_id_count = 0;
dc_ctx->dce_environment = init_params->dce_environment;
+ dc_ctx->dcn_reg_offsets = init_params->dcn_reg_offsets;
+ dc_ctx->nbio_reg_offsets = init_params->nbio_reg_offsets;
+ dc_ctx->clk_reg_offsets = init_params->clk_reg_offsets;
/* Create logger */
+ dc_ctx->logger = kmalloc(sizeof(*dc_ctx->logger), GFP_KERNEL);
- dc_version = resource_parse_asic_id(init_params->asic_id);
- dc_ctx->dce_version = dc_version;
+ if (!dc_ctx->logger) {
+ kfree(dc_ctx);
+ return false;
+ }
+
+ dc_ctx->logger->dev = adev_to_drm(init_params->driver);
+ dc->dml.logger = dc_ctx->logger;
+
+ dc_ctx->dce_version = resource_parse_asic_id(init_params->asic_id);
dc_ctx->perf_trace = dc_perf_trace_create();
if (!dc_ctx->perf_trace) {
+ kfree(dc_ctx);
ASSERT_CRITICAL(false);
return false;
}
dc->ctx = dc_ctx;
+ dc->link_srv = link_create_link_service();
+ if (!dc->link_srv)
+ return false;
+
return true;
}
@@ -894,10 +1055,8 @@ static bool dc_construct(struct dc *dc,
struct dc_context *dc_ctx;
struct bw_calcs_dceip *dc_dceip;
struct bw_calcs_vbios *dc_vbios;
-#ifdef CONFIG_DRM_AMD_DC_DCN
struct dcn_soc_bounding_box *dcn_soc;
struct dcn_ip_params *dcn_ip;
-#endif
dc->config = init_params->flags;
@@ -925,7 +1084,6 @@ static bool dc_construct(struct dc *dc,
}
dc->bw_vbios = dc_vbios;
-#ifdef CONFIG_DRM_AMD_DC_DCN
dcn_soc = kzalloc(sizeof(*dcn_soc), GFP_KERNEL);
if (!dcn_soc) {
dm_error("%s: failed to create dcn_soc\n", __func__);
@@ -941,14 +1099,18 @@ static bool dc_construct(struct dc *dc,
}
dc->dcn_ip = dcn_ip;
-#endif
+
+ if (init_params->bb_from_dmub)
+ dc->dml2_options.bb_from_dmub = init_params->bb_from_dmub;
+ else
+ dc->dml2_options.bb_from_dmub = NULL;
if (!dc_construct_ctx(dc, init_params)) {
dm_error("%s: failed to create ctx\n", __func__);
goto fail;
}
- dc_ctx = dc->ctx;
+ dc_ctx = dc->ctx;
/* Resource should construct all asic specific resources.
* This should be the only place where we need to parse the asic id
@@ -993,30 +1155,23 @@ static bool dc_construct(struct dc *dc,
/* set i2c speed if not done by the respective dcnxxx__resource.c */
if (dc->caps.i2c_speed_in_khz_hdcp == 0)
dc->caps.i2c_speed_in_khz_hdcp = dc->caps.i2c_speed_in_khz;
-
+ if (dc->check_config.max_optimizable_video_width == 0)
+ dc->check_config.max_optimizable_video_width = 5120;
dc->clk_mgr = dc_clk_mgr_create(dc->ctx, dc->res_pool->pp_smu, dc->res_pool->dccg);
if (!dc->clk_mgr)
goto fail;
-#ifdef CONFIG_DRM_AMD_DC_DCN
+#ifdef CONFIG_DRM_AMD_DC_FP
dc->clk_mgr->force_smu_not_present = init_params->force_smu_not_present;
-#endif
- if (dc->res_pool->funcs->update_bw_bounding_box)
+ if (dc->res_pool->funcs->update_bw_bounding_box) {
+ DC_FP_START();
dc->res_pool->funcs->update_bw_bounding_box(dc, dc->clk_mgr->bw_params);
-
- /* Creation of current_state must occur after dc->dml
- * is initialized in dc_create_resource_pool because
- * on creation it copies the contents of dc->dml
- */
-
- dc->current_state = dc_create_state(dc);
-
- if (!dc->current_state) {
- dm_error("%s: failed to create validate ctx\n", __func__);
- goto fail;
+ DC_FP_END();
}
-
- dc_resource_state_construct(dc, dc->current_state);
+ dc->soc_and_ip_translator = dc_create_soc_and_ip_translator(dc_ctx->dce_version);
+ if (!dc->soc_and_ip_translator)
+ goto fail;
+#endif
if (!create_links(dc, init_params->num_virtual_links))
goto fail;
@@ -1027,8 +1182,16 @@ static bool dc_construct(struct dc *dc,
if (!create_link_encoders(dc))
goto fail;
- /* Initialise DIG link encoder resource tracking variables. */
- link_enc_cfg_init(dc, dc->current_state);
+ /* Creation of current_state must occur after dc->dml
+ * is initialized in dc_create_resource_pool because
+ * on creation it copies the contents of dc->dml
+ */
+ dc->current_state = dc_state_create(dc, NULL);
+
+ if (!dc->current_state) {
+ dm_error("%s: failed to create validate ctx\n", __func__);
+ goto fail;
+ }
return true;
@@ -1047,8 +1210,10 @@ static void disable_all_writeback_pipes_for_stream(
stream->writeback_info[i].wb_enabled = false;
}
-static void apply_ctx_interdependent_lock(struct dc *dc, struct dc_state *context,
- struct dc_stream_state *stream, bool lock)
+static void apply_ctx_interdependent_lock(struct dc *dc,
+ struct dc_state *context,
+ struct dc_stream_state *stream,
+ bool lock)
{
int i;
@@ -1062,7 +1227,7 @@ static void apply_ctx_interdependent_lock(struct dc *dc, struct dc_state *contex
// Copied conditions that were previously in dce110_apply_ctx_for_surface
if (stream == pipe_ctx->stream) {
- if (!pipe_ctx->top_pipe &&
+ if (resource_is_pipe_type(pipe_ctx, OPP_HEAD) &&
(pipe_ctx->plane_state || old_pipe_ctx->plane_state))
dc->hwss.pipe_control_lock(dc, pipe_ctx, lock);
}
@@ -1070,23 +1235,116 @@ static void apply_ctx_interdependent_lock(struct dc *dc, struct dc_state *contex
}
}
+static void dc_update_visual_confirm_color(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx)
+{
+ if (dc->debug.visual_confirm & VISUAL_CONFIRM_EXPLICIT) {
+ memcpy(&pipe_ctx->visual_confirm_color, &pipe_ctx->plane_state->visual_confirm_color,
+ sizeof(pipe_ctx->visual_confirm_color));
+ return;
+ }
+
+ if (dc->ctx->dce_version >= DCN_VERSION_1_0) {
+ memset(&pipe_ctx->visual_confirm_color, 0, sizeof(struct tg_color));
+
+ if (dc->debug.visual_confirm == VISUAL_CONFIRM_HDR)
+ get_hdr_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
+ else if (dc->debug.visual_confirm == VISUAL_CONFIRM_SURFACE)
+ get_surface_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
+ else if (dc->debug.visual_confirm == VISUAL_CONFIRM_SWIZZLE)
+ get_surface_tile_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
+ else if (dc->debug.visual_confirm == VISUAL_CONFIRM_HW_CURSOR)
+ get_cursor_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
+ else if (dc->debug.visual_confirm == VISUAL_CONFIRM_DCC)
+ get_dcc_visual_confirm_color(dc, pipe_ctx, &(pipe_ctx->visual_confirm_color));
+ else {
+ if (dc->ctx->dce_version < DCN_VERSION_2_0)
+ color_space_to_black_color(
+ dc, pipe_ctx->stream->output_color_space, &(pipe_ctx->visual_confirm_color));
+ }
+ if (dc->ctx->dce_version >= DCN_VERSION_2_0) {
+ if (dc->debug.visual_confirm == VISUAL_CONFIRM_MPCTREE)
+ get_mpctree_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
+ else if (dc->debug.visual_confirm == VISUAL_CONFIRM_SUBVP)
+ get_subvp_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
+ else if (dc->debug.visual_confirm == VISUAL_CONFIRM_MCLK_SWITCH)
+ get_mclk_switch_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
+ else if (dc->debug.visual_confirm == VISUAL_CONFIRM_FAMS2)
+ get_fams2_visual_confirm_color(dc, context, pipe_ctx, &(pipe_ctx->visual_confirm_color));
+ else if (dc->debug.visual_confirm == VISUAL_CONFIRM_VABC)
+ get_vabc_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
+ }
+ }
+}
+
+void dc_get_visual_confirm_for_stream(
+ struct dc *dc,
+ struct dc_stream_state *stream_state,
+ struct tg_color *color)
+{
+ struct dc_stream_status *stream_status = dc_stream_get_status(stream_state);
+ struct pipe_ctx *pipe_ctx;
+ int i;
+ struct dc_plane_state *plane_state = NULL;
+
+ if (!stream_status)
+ return;
+
+ switch (dc->debug.visual_confirm) {
+ case VISUAL_CONFIRM_DISABLE:
+ return;
+ case VISUAL_CONFIRM_PSR:
+ case VISUAL_CONFIRM_FAMS:
+ pipe_ctx = dc_stream_get_pipe_ctx(stream_state);
+ if (!pipe_ctx)
+ return;
+ dc_dmub_srv_get_visual_confirm_color_cmd(dc, pipe_ctx);
+ memcpy(color, &dc->ctx->dmub_srv->dmub->visual_confirm_color, sizeof(struct tg_color));
+ return;
+
+ default:
+ /* find plane with highest layer_index */
+ for (i = 0; i < stream_status->plane_count; i++) {
+ if (stream_status->plane_states[i]->visible)
+ plane_state = stream_status->plane_states[i];
+ }
+ if (!plane_state)
+ return;
+ /* find pipe that contains plane with highest layer index */
+ for (i = 0; i < MAX_PIPES; i++) {
+ struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+
+ if (pipe->plane_state == plane_state) {
+ memcpy(color, &pipe->visual_confirm_color, sizeof(struct tg_color));
+ return;
+ }
+ }
+ }
+}
+
static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
{
int i, j;
- struct dc_state *dangling_context = dc_create_state(dc);
+ struct dc_state *dangling_context = dc_state_create_current_copy(dc);
struct dc_state *current_ctx;
+ struct pipe_ctx *pipe;
+ struct timing_generator *tg;
if (dangling_context == NULL)
return;
- dc_resource_state_copy_construct(dc->current_state, dangling_context);
-
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct dc_stream_state *old_stream =
dc->current_state->res_ctx.pipe_ctx[i].stream;
bool should_disable = true;
- bool pipe_split_change =
- context->res_ctx.pipe_ctx[i].top_pipe != dc->current_state->res_ctx.pipe_ctx[i].top_pipe;
+ bool pipe_split_change = false;
+
+ if ((context->res_ctx.pipe_ctx[i].top_pipe) &&
+ (dc->current_state->res_ctx.pipe_ctx[i].top_pipe))
+ pipe_split_change = context->res_ctx.pipe_ctx[i].top_pipe->pipe_idx !=
+ dc->current_state->res_ctx.pipe_ctx[i].top_pipe->pipe_idx;
+ else
+ pipe_split_change = context->res_ctx.pipe_ctx[i].top_pipe !=
+ dc->current_state->res_ctx.pipe_ctx[i].top_pipe;
for (j = 0; j < context->stream_count; j++) {
if (old_stream == context->streams[j]) {
@@ -1094,31 +1352,81 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
break;
}
}
- if (!should_disable && pipe_split_change)
+ if (!should_disable && pipe_split_change &&
+ dc->current_state->stream_count != context->stream_count)
should_disable = true;
+ if (old_stream && !dc->current_state->res_ctx.pipe_ctx[i].top_pipe &&
+ !dc->current_state->res_ctx.pipe_ctx[i].prev_odm_pipe) {
+ struct pipe_ctx *old_pipe, *new_pipe;
+
+ old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ new_pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (old_pipe->plane_state && !new_pipe->plane_state)
+ should_disable = true;
+ }
+
if (should_disable && old_stream) {
- dc_rem_all_planes_for_stream(dc, old_stream, dangling_context);
+ bool is_phantom = dc_state_get_stream_subvp_type(dc->current_state, old_stream) == SUBVP_PHANTOM;
+ pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ tg = pipe->stream_res.tg;
+ /* When disabling plane for a phantom pipe, we must turn on the
+ * phantom OTG so the disable programming gets the double buffer
+ * update. Otherwise the pipe will be left in a partially disabled
+ * state that can result in underflow or hang when enabling it
+ * again for different use.
+ */
+ if (is_phantom) {
+ if (tg->funcs->enable_crtc) {
+ if (dc->hwseq->funcs.blank_pixel_data)
+ dc->hwseq->funcs.blank_pixel_data(dc, pipe, true);
+ tg->funcs->enable_crtc(tg);
+ }
+ }
+
+ if (is_phantom)
+ dc_state_rem_all_phantom_planes_for_stream(dc, old_stream, dangling_context, true);
+ else
+ dc_state_rem_all_planes_for_stream(dc, old_stream, dangling_context);
disable_all_writeback_pipes_for_stream(dc, old_stream, dangling_context);
+ if (pipe->stream && pipe->plane_state) {
+ if (!dc->debug.using_dml2)
+ set_p_state_switch_method(dc, context, pipe);
+ dc_update_visual_confirm_color(dc, context, pipe);
+ }
+
if (dc->hwss.apply_ctx_for_surface) {
apply_ctx_interdependent_lock(dc, dc->current_state, old_stream, true);
dc->hwss.apply_ctx_for_surface(dc, old_stream, 0, dangling_context);
apply_ctx_interdependent_lock(dc, dc->current_state, old_stream, false);
dc->hwss.post_unlock_program_front_end(dc, dangling_context);
}
+
+ if (dc->res_pool->funcs->prepare_mcache_programming)
+ dc->res_pool->funcs->prepare_mcache_programming(dc, dangling_context);
if (dc->hwss.program_front_end_for_ctx) {
dc->hwss.interdependent_update_lock(dc, dc->current_state, true);
dc->hwss.program_front_end_for_ctx(dc, dangling_context);
dc->hwss.interdependent_update_lock(dc, dc->current_state, false);
dc->hwss.post_unlock_program_front_end(dc, dangling_context);
}
+ /* We need to put the phantom OTG back into it's default (disabled) state or we
+ * can get corruption when transition from one SubVP config to a different one.
+ * The OTG is set to disable on falling edge of VUPDATE so the plane disable
+ * will still get it's double buffer update.
+ */
+ if (is_phantom) {
+ if (tg->funcs->disable_phantom_crtc)
+ tg->funcs->disable_phantom_crtc(tg);
+ }
}
}
current_ctx = dc->current_state;
dc->current_state = dangling_context;
- dc_release_state(current_ctx);
+ dc_state_release(current_ctx);
}
static void disable_vbios_mode_if_required(
@@ -1138,6 +1446,9 @@ static void disable_vbios_mode_if_required(
if (stream == NULL)
continue;
+ if (stream->apply_seamless_boot_optimization)
+ continue;
+
// only looking for first odm pipe
if (pipe->prev_odm_pipe)
continue;
@@ -1149,7 +1460,7 @@ static void disable_vbios_mode_if_required(
if (link != NULL && link->link_enc->funcs->is_dig_enabled(link->link_enc)) {
unsigned int enc_inst, tg_inst = 0;
- unsigned int pix_clk_100hz;
+ unsigned int pix_clk_100hz = 0;
enc_inst = link->link_enc->funcs->get_dig_frontend(link->link_enc);
if (enc_inst != ENGINE_ID_UNKNOWN) {
@@ -1170,7 +1481,7 @@ static void disable_vbios_mode_if_required(
pipe->stream_res.pix_clk_params.requested_pix_clk_100hz;
if (pix_clk_100hz != requested_pix_clk_100hz) {
- core_link_disable_stream(pipe);
+ dc->link_srv->set_dpms_off(pipe);
pipe->stream->dpms_off = false;
}
}
@@ -1179,35 +1490,7 @@ static void disable_vbios_mode_if_required(
}
}
-static void wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context)
-{
- int i;
- PERF_TRACE();
- for (i = 0; i < MAX_PIPES; i++) {
- int count = 0;
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
- if (!pipe->plane_state)
- continue;
-
- /* Timeout 100 ms */
- while (count < 100000) {
- /* Must set to false to start with, due to OR in update function */
- pipe->plane_state->status.is_flip_pending = false;
- dc->hwss.update_pending_status(pipe);
- if (!pipe->plane_state->status.is_flip_pending)
- break;
- udelay(1);
- count++;
- }
- ASSERT(!pipe->plane_state->status.is_flip_pending);
- }
- PERF_TRACE();
-}
-
-/*******************************************************************************
- * Public functions
- ******************************************************************************/
+/* Public functions */
struct dc *dc_create(const struct dc_init_data *init_params)
{
@@ -1218,6 +1501,7 @@ struct dc *dc_create(const struct dc_init_data *init_params)
return NULL;
if (init_params->dce_environment == DCE_ENV_VIRTUAL_HW) {
+ dc->caps.linear_pitch_alignment = 64;
if (!dc_construct_ctx(dc, init_params))
goto destruct_dc;
} else {
@@ -1237,10 +1521,16 @@ struct dc *dc_create(const struct dc_init_data *init_params)
dc->caps.max_dp_protocol_version = DP_VERSION_1_4;
+ dc->caps.max_otg_num = dc->res_pool->res_cap->num_timing_generator;
+
if (dc->res_pool->dmcu != NULL)
dc->versions.dmcu_version = dc->res_pool->dmcu->dmcu_version;
}
+ dc->dcn_reg_offsets = init_params->dcn_reg_offsets;
+ dc->nbio_reg_offsets = init_params->nbio_reg_offsets;
+ dc->clk_reg_offsets = init_params->clk_reg_offsets;
+
/* Populate versioning information */
dc->versions.dc_ver = DC_VER;
@@ -1248,8 +1538,6 @@ struct dc *dc_create(const struct dc_init_data *init_params)
DC_LOG_DC("Display Core initialized\n");
-
-
return dc;
destruct_dc:
@@ -1266,7 +1554,7 @@ static void detect_edp_presence(struct dc *dc)
int i;
int edp_num;
- get_edp_links(dc, edp_links, &edp_num);
+ dc_get_edp_links(dc, edp_links, &edp_num);
if (!edp_num)
return;
@@ -1275,7 +1563,7 @@ static void detect_edp_presence(struct dc *dc)
if (dc->config.edp_not_connected) {
edp_link->edp_sink_present = false;
} else {
- dc_link_detect_sink(edp_link, &type);
+ dc_link_detect_connection_type(edp_link, &type);
edp_link->edp_sink_present = (type != dc_connection_none);
}
}
@@ -1287,21 +1575,18 @@ void dc_hardware_init(struct dc *dc)
detect_edp_presence(dc);
if (dc->ctx->dce_environment != DCE_ENV_VIRTUAL_HW)
dc->hwss.init_hw(dc);
+ dc_dmub_srv_notify_fw_dc_power_state(dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D0);
}
void dc_init_callbacks(struct dc *dc,
const struct dc_callback_init *init_params)
{
-#ifdef CONFIG_DRM_AMD_DC_HDCP
dc->ctx->cp_psp = init_params->cp_psp;
-#endif
}
void dc_deinit_callbacks(struct dc *dc)
{
-#ifdef CONFIG_DRM_AMD_DC_HDCP
memset(&dc->ctx->cp_psp, 0, sizeof(dc->ctx->cp_psp));
-#endif
}
void dc_destroy(struct dc **dc)
@@ -1346,7 +1631,9 @@ static void program_timing_sync(
struct pipe_ctx *unsynced_pipes[MAX_PIPES] = { NULL };
for (i = 0; i < pipe_count; i++) {
- if (!ctx->res_ctx.pipe_ctx[i].stream || ctx->res_ctx.pipe_ctx[i].top_pipe)
+ if (!ctx->res_ctx.pipe_ctx[i].stream
+ || ctx->res_ctx.pipe_ctx[i].top_pipe
+ || ctx->res_ctx.pipe_ctx[i].prev_odm_pipe)
continue;
unsynced_pipes[i] = &ctx->res_ctx.pipe_ctx[i];
@@ -1411,7 +1698,10 @@ static void program_timing_sync(
}
for (k = 0; k < group_size; k++) {
- struct dc_stream_status *status = dc_stream_get_status_from_state(ctx, pipe_set[k]->stream);
+ struct dc_stream_status *status = dc_state_get_stream_status(ctx, pipe_set[k]->stream);
+
+ if (!status)
+ continue;
status->timing_sync_info.group_id = num_group;
status->timing_sync_info.group_size = group_size;
@@ -1421,27 +1711,42 @@ static void program_timing_sync(
status->timing_sync_info.master = false;
}
+
/* remove any other unblanked pipes as they have already been synced */
- for (j = j + 1; j < group_size; j++) {
- bool is_blanked;
+ if (dc->config.use_pipe_ctx_sync_logic) {
+ /* check pipe's syncd to decide which pipe to be removed */
+ for (j = 1; j < group_size; j++) {
+ if (pipe_set[j]->pipe_idx_syncd == pipe_set[0]->pipe_idx_syncd) {
+ group_size--;
+ pipe_set[j] = pipe_set[group_size];
+ j--;
+ } else
+ /* link slave pipe's syncd with master pipe */
+ pipe_set[j]->pipe_idx_syncd = pipe_set[0]->pipe_idx_syncd;
+ }
+ } else {
+ /* remove any other pipes by checking valid plane */
+ for (j = j + 1; j < group_size; j++) {
+ bool is_blanked;
- if (pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked)
- is_blanked =
- pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked(pipe_set[j]->stream_res.opp);
- else
- is_blanked =
- pipe_set[j]->stream_res.tg->funcs->is_blanked(pipe_set[j]->stream_res.tg);
- if (!is_blanked) {
- group_size--;
- pipe_set[j] = pipe_set[group_size];
- j--;
+ if (pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked)
+ is_blanked =
+ pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked(pipe_set[j]->stream_res.opp);
+ else
+ is_blanked =
+ pipe_set[j]->stream_res.tg->funcs->is_blanked(pipe_set[j]->stream_res.tg);
+ if (!is_blanked) {
+ group_size--;
+ pipe_set[j] = pipe_set[group_size];
+ j--;
+ }
}
}
if (group_size > 1) {
if (sync_type == TIMING_SYNCHRONIZABLE) {
dc->hwss.enable_timing_synchronization(
- dc, group_index, group_size, pipe_set);
+ dc, ctx, group_index, group_size, pipe_set);
} else
if (sync_type == VBLANK_SYNCHRONIZABLE) {
dc->hwss.enable_vblanks_synchronization(
@@ -1453,24 +1758,26 @@ static void program_timing_sync(
}
}
-static bool context_changed(
- struct dc *dc,
- struct dc_state *context)
+static bool streams_changed(struct dc *dc,
+ struct dc_stream_state *streams[],
+ uint8_t stream_count)
{
uint8_t i;
- if (context->stream_count != dc->current_state->stream_count)
+ if (stream_count != dc->current_state->stream_count)
return true;
for (i = 0; i < dc->current_state->stream_count; i++) {
- if (dc->current_state->streams[i] != context->streams[i])
+ if (dc->current_state->streams[i] != streams[i])
+ return true;
+ if (!streams[i]->link->link_state_valid)
return true;
}
return false;
}
-bool dc_validate_seamless_boot_timing(const struct dc *dc,
+bool dc_validate_boot_timing(const struct dc *dc,
const struct dc_sink *sink,
struct dc_crtc_timing *crtc_timing)
{
@@ -1487,14 +1794,23 @@ bool dc_validate_seamless_boot_timing(const struct dc *dc,
return false;
}
+ if (dc->debug.force_odm_combine) {
+ DC_LOG_DEBUG("boot timing validation failed due to force_odm_combine\n");
+ return false;
+ }
+
/* Check for enabled DIG to identify enabled display */
- if (!link->link_enc->funcs->is_dig_enabled(link->link_enc))
+ if (!link->link_enc->funcs->is_dig_enabled(link->link_enc)) {
+ DC_LOG_DEBUG("boot timing validation failed due to disabled DIG\n");
return false;
+ }
enc_inst = link->link_enc->funcs->get_dig_frontend(link->link_enc);
- if (enc_inst == ENGINE_ID_UNKNOWN)
+ if (enc_inst == ENGINE_ID_UNKNOWN) {
+ DC_LOG_DEBUG("boot timing validation failed due to unknown DIG engine ID\n");
return false;
+ }
for (i = 0; i < dc->res_pool->stream_enc_count; i++) {
if (dc->res_pool->stream_enc[i]->id == enc_inst) {
@@ -1508,91 +1824,170 @@ bool dc_validate_seamless_boot_timing(const struct dc *dc,
}
// tg_inst not found
- if (i == dc->res_pool->stream_enc_count)
+ if (i == dc->res_pool->stream_enc_count) {
+ DC_LOG_DEBUG("boot timing validation failed due to timing generator instance not found\n");
return false;
+ }
- if (tg_inst >= dc->res_pool->timing_generator_count)
+ if (tg_inst >= dc->res_pool->timing_generator_count) {
+ DC_LOG_DEBUG("boot timing validation failed due to invalid timing generator count\n");
return false;
+ }
+
+ if (tg_inst != link->link_enc->preferred_engine) {
+ DC_LOG_DEBUG("boot timing validation failed due to non-preferred timing generator\n");
+ return false;
+ }
tg = dc->res_pool->timing_generators[tg_inst];
- if (!tg->funcs->get_hw_timing)
+ if (!tg->funcs->get_hw_timing) {
+ DC_LOG_DEBUG("boot timing validation failed due to missing get_hw_timing callback\n");
return false;
+ }
- if (!tg->funcs->get_hw_timing(tg, &hw_crtc_timing))
+ if (!tg->funcs->get_hw_timing(tg, &hw_crtc_timing)) {
+ DC_LOG_DEBUG("boot timing validation failed due to failed get_hw_timing return\n");
return false;
+ }
- if (crtc_timing->h_total != hw_crtc_timing.h_total)
+ if (crtc_timing->h_total != hw_crtc_timing.h_total) {
+ DC_LOG_DEBUG("boot timing validation failed due to h_total mismatch\n");
return false;
+ }
- if (crtc_timing->h_border_left != hw_crtc_timing.h_border_left)
+ if (crtc_timing->h_border_left != hw_crtc_timing.h_border_left) {
+ DC_LOG_DEBUG("boot timing validation failed due to h_border_left mismatch\n");
return false;
+ }
- if (crtc_timing->h_addressable != hw_crtc_timing.h_addressable)
+ if (crtc_timing->h_addressable != hw_crtc_timing.h_addressable) {
+ DC_LOG_DEBUG("boot timing validation failed due to h_addressable mismatch\n");
return false;
+ }
- if (crtc_timing->h_border_right != hw_crtc_timing.h_border_right)
+ if (crtc_timing->h_border_right != hw_crtc_timing.h_border_right) {
+ DC_LOG_DEBUG("boot timing validation failed due to h_border_right mismatch\n");
return false;
+ }
- if (crtc_timing->h_front_porch != hw_crtc_timing.h_front_porch)
+ if (crtc_timing->h_front_porch != hw_crtc_timing.h_front_porch) {
+ DC_LOG_DEBUG("boot timing validation failed due to h_front_porch mismatch\n");
return false;
+ }
- if (crtc_timing->h_sync_width != hw_crtc_timing.h_sync_width)
+ if (crtc_timing->h_sync_width != hw_crtc_timing.h_sync_width) {
+ DC_LOG_DEBUG("boot timing validation failed due to h_sync_width mismatch\n");
return false;
+ }
- if (crtc_timing->v_total != hw_crtc_timing.v_total)
+ if (crtc_timing->v_total != hw_crtc_timing.v_total) {
+ DC_LOG_DEBUG("boot timing validation failed due to v_total mismatch\n");
return false;
+ }
- if (crtc_timing->v_border_top != hw_crtc_timing.v_border_top)
+ if (crtc_timing->v_border_top != hw_crtc_timing.v_border_top) {
+ DC_LOG_DEBUG("boot timing validation failed due to v_border_top mismatch\n");
return false;
+ }
- if (crtc_timing->v_addressable != hw_crtc_timing.v_addressable)
+ if (crtc_timing->v_addressable != hw_crtc_timing.v_addressable) {
+ DC_LOG_DEBUG("boot timing validation failed due to v_addressable mismatch\n");
return false;
+ }
- if (crtc_timing->v_border_bottom != hw_crtc_timing.v_border_bottom)
+ if (crtc_timing->v_border_bottom != hw_crtc_timing.v_border_bottom) {
+ DC_LOG_DEBUG("boot timing validation failed due to v_border_bottom mismatch\n");
return false;
+ }
- if (crtc_timing->v_front_porch != hw_crtc_timing.v_front_porch)
+ if (crtc_timing->v_front_porch != hw_crtc_timing.v_front_porch) {
+ DC_LOG_DEBUG("boot timing validation failed due to v_front_porch mismatch\n");
return false;
+ }
- if (crtc_timing->v_sync_width != hw_crtc_timing.v_sync_width)
+ if (crtc_timing->v_sync_width != hw_crtc_timing.v_sync_width) {
+ DC_LOG_DEBUG("boot timing validation failed due to v_sync_width mismatch\n");
return false;
+ }
/* block DSC for now, as VBIOS does not currently support DSC timings */
- if (crtc_timing->flags.DSC)
+ if (crtc_timing->flags.DSC) {
+ DC_LOG_DEBUG("boot timing validation failed due to DSC\n");
return false;
+ }
if (dc_is_dp_signal(link->connector_signal)) {
- unsigned int pix_clk_100hz;
+ unsigned int pix_clk_100hz = 0;
+ uint32_t numOdmPipes = 1;
+ uint32_t id_src[4] = {0};
dc->res_pool->dp_clock_source->funcs->get_pixel_clk_frequency_100hz(
dc->res_pool->dp_clock_source,
tg_inst, &pix_clk_100hz);
- if (crtc_timing->pix_clk_100hz != pix_clk_100hz)
+ if (tg->funcs->get_optc_source)
+ tg->funcs->get_optc_source(tg,
+ &numOdmPipes, &id_src[0], &id_src[1]);
+
+ if (numOdmPipes == 2) {
+ pix_clk_100hz *= 2;
+ } else if (numOdmPipes == 4) {
+ pix_clk_100hz *= 4;
+ } else if (se && se->funcs->get_pixels_per_cycle) {
+ uint32_t pixels_per_cycle = se->funcs->get_pixels_per_cycle(se);
+
+ if (pixels_per_cycle != 1 && !dc->debug.enable_dp_dig_pixel_rate_div_policy) {
+ DC_LOG_DEBUG("boot timing validation failed due to pixels_per_cycle\n");
+ return false;
+ }
+
+ pix_clk_100hz *= pixels_per_cycle;
+ }
+
+ // Note: In rare cases, HW pixclk may differ from crtc's pixclk
+ // slightly due to rounding issues in 10 kHz units.
+ if (crtc_timing->pix_clk_100hz != pix_clk_100hz) {
+ DC_LOG_DEBUG("boot timing validation failed due to pix_clk_100hz mismatch\n");
return false;
+ }
- if (!se->funcs->dp_get_pixel_format)
+ if (!se || !se->funcs->dp_get_pixel_format) {
+ DC_LOG_DEBUG("boot timing validation failed due to missing dp_get_pixel_format\n");
return false;
+ }
if (!se->funcs->dp_get_pixel_format(
se,
&hw_crtc_timing.pixel_encoding,
- &hw_crtc_timing.display_color_depth))
+ &hw_crtc_timing.display_color_depth)) {
+ DC_LOG_DEBUG("boot timing validation failed due to dp_get_pixel_format failure\n");
return false;
+ }
- if (hw_crtc_timing.display_color_depth != crtc_timing->display_color_depth)
+ if (hw_crtc_timing.display_color_depth != crtc_timing->display_color_depth) {
+ DC_LOG_DEBUG("boot timing validation failed due to display_color_depth mismatch\n");
return false;
+ }
- if (hw_crtc_timing.pixel_encoding != crtc_timing->pixel_encoding)
+ if (hw_crtc_timing.pixel_encoding != crtc_timing->pixel_encoding) {
+ DC_LOG_DEBUG("boot timing validation failed due to pixel_encoding mismatch\n");
return false;
+ }
}
+
if (link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED) {
+ DC_LOG_DEBUG("boot timing validation failed due to VSC SDP colorimetry\n");
+ return false;
+ }
+
+ if (link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED) {
+ DC_LOG_DEBUG("boot timing validation failed due to DP 128b/132b\n");
return false;
}
- if (is_edp_ilr_optimization_required(link, crtc_timing)) {
+ if (dc->link_srv->edp_is_ilr_optimization_required(link, crtc_timing)) {
DC_LOG_EVENT_LINK_TRAINING("Seamless boot disabled to optimize eDP link rate\n");
return false;
}
@@ -1625,6 +2020,8 @@ void dc_enable_stereo(
int i, j;
struct pipe_ctx *pipe;
+ dc_exit_ips_for_hw_access(dc);
+
for (i = 0; i < MAX_PIPES; i++) {
if (context != NULL) {
pipe = &context->res_ctx.pipe_ctx[i];
@@ -1644,6 +2041,8 @@ void dc_enable_stereo(
void dc_trigger_sync(struct dc *dc, struct dc_state *context)
{
if (context->stream_count > 1 && !dc->debug.disable_timing_sync) {
+ dc_exit_ips_for_hw_access(dc);
+
enable_timing_multisync(dc, context);
program_timing_sync(dc, context);
}
@@ -1662,7 +2061,6 @@ static uint8_t get_stream_mask(struct dc *dc, struct dc_state *context)
return stream_mask;
}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
void dc_z10_restore(const struct dc *dc)
{
if (dc->hwss.z10_restore)
@@ -1674,10 +2072,52 @@ void dc_z10_save_init(struct dc *dc)
if (dc->hwss.z10_save_init)
dc->hwss.z10_save_init(dc);
}
-#endif
-/*
- * Applies given context to HW and copy it into current context.
+
+/* Set a pipe unlock order based on the change in DET allocation and stores it in dc scratch memory
+ * Prevents over allocation of DET during unlock process
+ * e.g. 2 pipe config with different streams with a max of 20 DET segments
+ * Before: After:
+ * - Pipe0: 10 DET segments - Pipe0: 12 DET segments
+ * - Pipe1: 10 DET segments - Pipe1: 8 DET segments
+ * If Pipe0 gets updated first, 22 DET segments will be allocated
+ */
+static void determine_pipe_unlock_order(struct dc *dc, struct dc_state *context)
+{
+ unsigned int i = 0;
+ struct pipe_ctx *pipe = NULL;
+ struct timing_generator *tg = NULL;
+
+ if (!dc->config.set_pipe_unlock_order)
+ return;
+
+ memset(dc->scratch.pipes_to_unlock_first, 0, sizeof(dc->scratch.pipes_to_unlock_first));
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ tg = pipe->stream_res.tg;
+
+ if (!resource_is_pipe_type(pipe, OTG_MASTER) ||
+ !tg->funcs->is_tg_enabled(tg) ||
+ dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
+ continue;
+ }
+
+ if (resource_calculate_det_for_stream(context, pipe) <
+ resource_calculate_det_for_stream(dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i])) {
+ dc->scratch.pipes_to_unlock_first[i] = true;
+ }
+ }
+}
+
+/**
+ * dc_commit_state_no_check - Apply context to the hardware
+ *
+ * @dc: DC object with the current status to be updated
+ * @context: New state that will become the current status at the end of this function
+ *
+ * Applies given context to the hardware and copy it into current context.
* It's up to the user to release the src context afterwards.
+ *
+ * Return: an enum dc_status result code for the operation
*/
static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *context)
{
@@ -1686,11 +2126,20 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
struct pipe_ctx *pipe;
int i, k, l;
struct dc_stream_state *dc_streams[MAX_STREAMS] = {0};
+ struct dc_state *old_state;
+ bool subvp_prev_use = false;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
dc_z10_restore(dc);
dc_allow_idle_optimizations(dc, false);
-#endif
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+
+ /* Check old context for SubVP */
+ subvp_prev_use |= (dc_state_get_pipe_subvp_type(dc->current_state, old_pipe) == SUBVP_PHANTOM);
+ if (subvp_prev_use)
+ break;
+ }
for (i = 0; i < context->stream_count; i++)
dc_streams[i] = context->streams[i];
@@ -1698,12 +2147,43 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
if (!dcb->funcs->is_accelerated_mode(dcb)) {
disable_vbios_mode_if_required(dc, context);
dc->hwss.enable_accelerated_mode(dc, context);
+ } else if (get_seamless_boot_stream_count(dc->current_state) > 0) {
+ /* If the previous Stream still retains the apply seamless boot flag,
+ * it means the OS has not actually performed a flip yet.
+ * At this point, if we receive dc_commit_streams again, we should
+ * once more check whether the actual HW timing matches what the OS
+ * has provided
+ */
+ disable_vbios_mode_if_required(dc, context);
+ }
+
+ if (dc->hwseq->funcs.wait_for_pipe_update_if_needed) {
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ //Only delay otg master for a given config
+ if (resource_is_pipe_type(pipe, OTG_MASTER)) {
+ //dc_commit_state_no_check is always a full update
+ dc->hwseq->funcs.wait_for_pipe_update_if_needed(dc, pipe, false);
+ break;
+ }
+ }
}
if (context->stream_count > get_seamless_boot_stream_count(context) ||
context->stream_count == 0)
dc->hwss.prepare_bandwidth(dc, context);
+ /* When SubVP is active, all HW programming must be done while
+ * SubVP lock is acquired
+ */
+ if (dc->hwss.subvp_pipe_control_lock)
+ dc->hwss.subvp_pipe_control_lock(dc, context, true, true, NULL, subvp_prev_use);
+ if (dc->hwss.dmub_hw_control_lock)
+ dc->hwss.dmub_hw_control_lock(dc, context, true);
+
+ if (dc->hwss.update_dsc_pg)
+ dc->hwss.update_dsc_pg(dc, context, false);
+
disable_dangling_plane(dc, context);
/* re-program planes for existing stream, in case we need to
* free up plane resource for later use
@@ -1728,20 +2208,56 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
dc->hwss.wait_for_mpcc_disconnect(dc, dc->res_pool, pipe);
}
+ for (i = 0; i < dc->current_state->stream_count; i++)
+ dc_dmub_srv_control_cursor_offload(dc, dc->current_state, dc->current_state->streams[i], false);
+
result = dc->hwss.apply_ctx_to_hw(dc, context);
- if (result != DC_OK)
+ for (i = 0; i < context->stream_count; i++)
+ dc_dmub_srv_control_cursor_offload(dc, context, context->streams[i], true);
+
+ if (result != DC_OK) {
+ /* Application of dc_state to hardware stopped. */
+ dc->current_state->res_ctx.link_enc_cfg_ctx.mode = LINK_ENC_CFG_STEADY;
return result;
+ }
dc_trigger_sync(dc, context);
+ /* Full update should unconditionally be triggered when dc_commit_state_no_check is called */
+ for (i = 0; i < context->stream_count; i++) {
+ uint32_t prev_dsc_changed = context->streams[i]->update_flags.bits.dsc_changed;
+
+ context->streams[i]->update_flags.raw = 0xFFFFFFFF;
+ context->streams[i]->update_flags.bits.dsc_changed = prev_dsc_changed;
+ }
+
+ determine_pipe_unlock_order(dc, context);
/* Program all planes within new context*/
+ if (dc->res_pool->funcs->prepare_mcache_programming)
+ dc->res_pool->funcs->prepare_mcache_programming(dc, context);
if (dc->hwss.program_front_end_for_ctx) {
dc->hwss.interdependent_update_lock(dc, context, true);
dc->hwss.program_front_end_for_ctx(dc, context);
+
+ if (dc->hwseq->funcs.set_wait_for_update_needed_for_pipe) {
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ dc->hwseq->funcs.set_wait_for_update_needed_for_pipe(dc, pipe);
+ }
+ }
+
dc->hwss.interdependent_update_lock(dc, context, false);
dc->hwss.post_unlock_program_front_end(dc, context);
}
+
+ if (dc->hwss.commit_subvp_config)
+ dc->hwss.commit_subvp_config(dc, context);
+ if (dc->hwss.subvp_pipe_control_lock)
+ dc->hwss.subvp_pipe_control_lock(dc, context, false, true, NULL, subvp_prev_use);
+ if (dc->hwss.dmub_hw_control_lock)
+ dc->hwss.dmub_hw_control_lock(dc, context, false);
+
for (i = 0; i < context->stream_count; i++) {
const struct dc_link *link = context->streams[i]->link;
@@ -1783,14 +2299,26 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
dc_enable_stereo(dc, context, dc_streams, context->stream_count);
- if (context->stream_count > get_seamless_boot_stream_count(context) ||
+ if (get_seamless_boot_stream_count(context) == 0 ||
context->stream_count == 0) {
/* Must wait for no flips to be pending before doing optimize bw */
- wait_for_no_pipes_pending(dc, context);
+ hwss_wait_for_no_pipes_pending(dc, context);
+ /*
+ * optimized dispclk depends on ODM setup. Need to wait for ODM
+ * update pending complete before optimizing bandwidth.
+ */
+ hwss_wait_for_odm_update_pending_complete(dc, context);
/* pplib is notified if disp_num changed */
dc->hwss.optimize_bandwidth(dc, context);
+ /* Need to do otg sync again as otg could be out of sync due to otg
+ * workaround applied during clock update
+ */
+ dc_trigger_sync(dc, context);
}
+ if (dc->hwss.update_dsc_pg)
+ dc->hwss.update_dsc_pg(dc, context, true);
+
if (dc->ctx->dce_version >= DCE_VERSION_MAX)
TRACE_DCN_CLOCK_STATE(&context->bw_ctx.bw.dcn.clk);
else
@@ -1804,38 +2332,162 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
for (i = 0; i < context->stream_count; i++)
context->streams[i]->mode_changed = false;
- dc_release_state(dc->current_state);
+ /* Clear update flags that were set earlier to avoid redundant programming */
+ for (i = 0; i < context->stream_count; i++) {
+ context->streams[i]->update_flags.raw = 0x0;
+ }
+ old_state = dc->current_state;
dc->current_state = context;
- dc_retain_state(dc->current_state);
+ dc_state_release(old_state);
+
+ dc_state_retain(dc->current_state);
return result;
}
-bool dc_commit_state(struct dc *dc, struct dc_state *context)
+static bool commit_minimal_transition_state(struct dc *dc,
+ struct dc_state *transition_base_context);
+
+/**
+ * dc_commit_streams - Commit current stream state
+ *
+ * @dc: DC object with the commit state to be configured in the hardware
+ * @params: Parameters for the commit, including the streams to be committed
+ *
+ * Function responsible for commit streams change to the hardware.
+ *
+ * Return:
+ * Return DC_OK if everything work as expected, otherwise, return a dc_status
+ * code.
+ */
+enum dc_status dc_commit_streams(struct dc *dc, struct dc_commit_streams_params *params)
{
- enum dc_status result = DC_ERROR_UNEXPECTED;
- int i;
+ int i, j;
+ struct dc_state *context;
+ enum dc_status res = DC_OK;
+ struct dc_validation_set set[MAX_STREAMS] = {0};
+ struct pipe_ctx *pipe;
+ bool handle_exit_odm2to1 = false;
+
+ if (!params)
+ return DC_ERROR_UNEXPECTED;
- if (!context_changed(dc, context))
- return DC_OK;
+ if (dc->ctx->dce_environment == DCE_ENV_VIRTUAL_HW)
+ return res;
+
+ if (!streams_changed(dc, params->streams, params->stream_count) &&
+ dc->current_state->power_source == params->power_source)
+ return res;
- DC_LOG_DC("%s: %d streams\n",
- __func__, context->stream_count);
+ dc_exit_ips_for_hw_access(dc);
+
+ DC_LOG_DC("%s: %d streams\n", __func__, params->stream_count);
+
+ for (i = 0; i < params->stream_count; i++) {
+ struct dc_stream_state *stream = params->streams[i];
+ struct dc_stream_status *status = dc_stream_get_status(stream);
+ struct dc_sink *sink = stream->sink;
+
+ /* revalidate streams */
+ if (!dc_is_virtual_signal(sink->sink_signal)) {
+ res = dc_validate_stream(dc, stream);
+ if (res != DC_OK)
+ return res;
+ }
- for (i = 0; i < context->stream_count; i++) {
- struct dc_stream_state *stream = context->streams[i];
dc_stream_log(dc, stream);
+
+ set[i].stream = stream;
+
+ if (status) {
+ set[i].plane_count = status->plane_count;
+ for (j = 0; j < status->plane_count; j++)
+ set[i].plane_states[j] = status->plane_states[j];
+ }
+ }
+
+ /* ODM Combine 2:1 power optimization is only applied for single stream
+ * scenario, it uses extra pipes than needed to reduce power consumption
+ * We need to switch off this feature to make room for new streams.
+ */
+ if (params->stream_count > dc->current_state->stream_count &&
+ dc->current_state->stream_count == 1) {
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ if (pipe->next_odm_pipe)
+ handle_exit_odm2to1 = true;
+ }
+ }
+
+ if (handle_exit_odm2to1)
+ res = commit_minimal_transition_state(dc, dc->current_state);
+
+ context = dc_state_create_current_copy(dc);
+ if (!context)
+ goto context_alloc_fail;
+
+ context->power_source = params->power_source;
+
+ res = dc_validate_with_context(dc, set, params->stream_count, context, DC_VALIDATE_MODE_AND_PROGRAMMING);
+
+ /*
+ * Only update link encoder to stream assignment after bandwidth validation passed.
+ */
+ if (res == DC_OK && dc->res_pool->funcs->link_encs_assign && !dc->config.unify_link_enc_assignment)
+ dc->res_pool->funcs->link_encs_assign(
+ dc, context, context->streams, context->stream_count);
+
+ if (res != DC_OK) {
+ BREAK_TO_DEBUGGER();
+ goto fail;
}
- result = dc_commit_state_no_check(dc, context);
+ /*
+ * If not already seamless, make transition seamless by inserting intermediate minimal transition
+ */
+ if (dc->hwss.is_pipe_topology_transition_seamless &&
+ !dc->hwss.is_pipe_topology_transition_seamless(dc, dc->current_state, context)) {
+ res = commit_minimal_transition_state(dc, context);
+ if (res != DC_OK) {
+ BREAK_TO_DEBUGGER();
+ goto fail;
+ }
+ }
+
+ res = dc_commit_state_no_check(dc, context);
+
+ for (i = 0; i < params->stream_count; i++) {
+ for (j = 0; j < context->stream_count; j++) {
+ if (params->streams[i]->stream_id == context->streams[j]->stream_id)
+ params->streams[i]->out.otg_offset = context->stream_status[j].primary_otg_inst;
+
+ if (dc_is_embedded_signal(params->streams[i]->signal)) {
+ struct dc_stream_status *status = dc_state_get_stream_status(context, params->streams[i]);
+
+ if (!status)
+ continue;
+
+ if (dc->hwss.is_abm_supported)
+ status->is_abm_supported = dc->hwss.is_abm_supported(dc, context, params->streams[i]);
+ else
+ status->is_abm_supported = true;
+ }
+ }
+ }
- return (result == DC_OK);
+fail:
+ dc_state_release(context);
+
+context_alloc_fail:
+
+ DC_LOG_DC("%s Finished.\n", __func__);
+
+ return res;
}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
bool dc_acquire_release_mpc_3dlut(
struct dc *dc, bool acquire,
struct dc_stream_state *stream,
@@ -1871,7 +2523,7 @@ bool dc_acquire_release_mpc_3dlut(
}
return ret;
}
-#endif
+
static bool is_flip_pending_in_pipes(struct dc *dc, struct dc_state *context)
{
int i;
@@ -1880,7 +2532,8 @@ static bool is_flip_pending_in_pipes(struct dc *dc, struct dc_state *context)
for (i = 0; i < MAX_PIPES; i++) {
pipe = &context->res_ctx.pipe_ctx[i];
- if (!pipe->plane_state)
+ // Don't check flip pending on phantom pipes
+ if (!pipe->plane_state || (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM))
continue;
/* Must set to false to start with, due to OR in update function */
@@ -1892,7 +2545,6 @@ static bool is_flip_pending_in_pipes(struct dc *dc, struct dc_state *context)
return false;
}
-#ifdef CONFIG_DRM_AMD_DC_DCN
/* Perform updates here which need to be deferred until next vupdate
*
* i.e. blnd lut, 3dlut, and shaper lut bypass regs are double buffered
@@ -1911,7 +2563,6 @@ static void process_deferred_updates(struct dc *dc)
dc->res_pool->dpps[i]->funcs->dpp_deferred_update(dc->res_pool->dpps[i]);
}
}
-#endif /* CONFIG_DRM_AMD_DC_DCN */
void dc_post_update_surfaces_to_stream(struct dc *dc)
{
@@ -1923,110 +2574,35 @@ void dc_post_update_surfaces_to_stream(struct dc *dc)
post_surface_trace(dc);
- if (dc->ctx->dce_version >= DCE_VERSION_MAX)
- TRACE_DCN_CLOCK_STATE(&context->bw_ctx.bw.dcn.clk);
- else
- TRACE_DCE_CLOCK_STATE(&context->bw_ctx.bw.dce);
-
- if (is_flip_pending_in_pipes(dc, context))
- return;
-
- for (i = 0; i < dc->res_pool->pipe_count; i++)
- if (context->res_ctx.pipe_ctx[i].stream == NULL ||
- context->res_ctx.pipe_ctx[i].plane_state == NULL) {
- context->res_ctx.pipe_ctx[i].pipe_idx = i;
- dc->hwss.disable_plane(dc, &context->res_ctx.pipe_ctx[i]);
- }
-
-#ifdef CONFIG_DRM_AMD_DC_DCN
- process_deferred_updates(dc);
-#endif
-
- dc->hwss.optimize_bandwidth(dc, context);
-
- dc->optimized_required = false;
- dc->wm_optimized_required = false;
-}
-
-static void init_state(struct dc *dc, struct dc_state *context)
-{
- /* Each context must have their own instance of VBA and in order to
- * initialize and obtain IP and SOC the base DML instance from DC is
- * initially copied into every context
+ /*
+ * Only relevant for DCN behavior where we can guarantee the optimization
+ * is safe to apply - retain the legacy behavior for DCE.
*/
-#ifdef CONFIG_DRM_AMD_DC_DCN
- memcpy(&context->bw_ctx.dml, &dc->dml, sizeof(struct display_mode_lib));
-#endif
-}
-struct dc_state *dc_create_state(struct dc *dc)
-{
- struct dc_state *context = kvzalloc(sizeof(struct dc_state),
- GFP_KERNEL);
-
- if (!context)
- return NULL;
-
- init_state(dc, context);
-
- kref_init(&context->refcount);
-
- return context;
-}
-
-struct dc_state *dc_copy_state(struct dc_state *src_ctx)
-{
- int i, j;
- struct dc_state *new_ctx = kvmalloc(sizeof(struct dc_state), GFP_KERNEL);
-
- if (!new_ctx)
- return NULL;
- memcpy(new_ctx, src_ctx, sizeof(struct dc_state));
-
- for (i = 0; i < MAX_PIPES; i++) {
- struct pipe_ctx *cur_pipe = &new_ctx->res_ctx.pipe_ctx[i];
-
- if (cur_pipe->top_pipe)
- cur_pipe->top_pipe = &new_ctx->res_ctx.pipe_ctx[cur_pipe->top_pipe->pipe_idx];
+ if (dc->ctx->dce_version < DCE_VERSION_MAX)
+ TRACE_DCE_CLOCK_STATE(&context->bw_ctx.bw.dce);
+ else {
+ TRACE_DCN_CLOCK_STATE(&context->bw_ctx.bw.dcn.clk);
- if (cur_pipe->bottom_pipe)
- cur_pipe->bottom_pipe = &new_ctx->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx];
+ if (is_flip_pending_in_pipes(dc, context))
+ return;
- if (cur_pipe->prev_odm_pipe)
- cur_pipe->prev_odm_pipe = &new_ctx->res_ctx.pipe_ctx[cur_pipe->prev_odm_pipe->pipe_idx];
+ for (i = 0; i < dc->res_pool->pipe_count; i++)
+ if (context->res_ctx.pipe_ctx[i].stream == NULL ||
+ context->res_ctx.pipe_ctx[i].plane_state == NULL) {
+ context->res_ctx.pipe_ctx[i].pipe_idx = i;
+ dc->hwss.disable_plane(dc, context, &context->res_ctx.pipe_ctx[i]);
+ }
- if (cur_pipe->next_odm_pipe)
- cur_pipe->next_odm_pipe = &new_ctx->res_ctx.pipe_ctx[cur_pipe->next_odm_pipe->pipe_idx];
+ process_deferred_updates(dc);
- }
+ dc->hwss.optimize_bandwidth(dc, context);
- for (i = 0; i < new_ctx->stream_count; i++) {
- dc_stream_retain(new_ctx->streams[i]);
- for (j = 0; j < new_ctx->stream_status[i].plane_count; j++)
- dc_plane_state_retain(
- new_ctx->stream_status[i].plane_states[j]);
+ if (dc->hwss.update_dsc_pg)
+ dc->hwss.update_dsc_pg(dc, context, true);
}
- kref_init(&new_ctx->refcount);
-
- return new_ctx;
-}
-
-void dc_retain_state(struct dc_state *context)
-{
- kref_get(&context->refcount);
-}
-
-static void dc_state_free(struct kref *kref)
-{
- struct dc_state *context = container_of(kref, struct dc_state, refcount);
- dc_resource_state_destruct(context);
- kvfree(context);
-}
-
-void dc_release_state(struct dc_state *context)
-{
- kref_put(&context->refcount, dc_state_free);
+ dc->optimized_required = false;
}
bool dc_set_generic_gpio_for_stereo(bool enable,
@@ -2095,47 +2671,50 @@ static bool is_surface_in_context(
return false;
}
-static enum surface_update_type get_plane_info_update_type(const struct dc_surface_update *u)
+static struct surface_update_descriptor get_plane_info_update_type(const struct dc_surface_update *u)
{
union surface_update_flags *update_flags = &u->surface->update_flags;
- enum surface_update_type update_type = UPDATE_TYPE_FAST;
+ struct surface_update_descriptor update_type = { UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_NONE };
if (!u->plane_info)
- return UPDATE_TYPE_FAST;
+ return update_type;
+
+ // `plane_info` present means at least `STREAM` lock is required
+ elevate_update_type(&update_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_STREAM);
if (u->plane_info->color_space != u->surface->color_space) {
update_flags->bits.color_space_change = 1;
- elevate_update_type(&update_type, UPDATE_TYPE_MED);
+ elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STREAM);
}
if (u->plane_info->horizontal_mirror != u->surface->horizontal_mirror) {
update_flags->bits.horizontal_mirror_change = 1;
- elevate_update_type(&update_type, UPDATE_TYPE_MED);
+ elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STREAM);
}
if (u->plane_info->rotation != u->surface->rotation) {
update_flags->bits.rotation_change = 1;
- elevate_update_type(&update_type, UPDATE_TYPE_FULL);
+ elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL);
}
if (u->plane_info->format != u->surface->format) {
update_flags->bits.pixel_format_change = 1;
- elevate_update_type(&update_type, UPDATE_TYPE_FULL);
+ elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL);
}
if (u->plane_info->stereo_format != u->surface->stereo_format) {
update_flags->bits.stereo_format_change = 1;
- elevate_update_type(&update_type, UPDATE_TYPE_FULL);
+ elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL);
}
if (u->plane_info->per_pixel_alpha != u->surface->per_pixel_alpha) {
update_flags->bits.per_pixel_alpha_change = 1;
- elevate_update_type(&update_type, UPDATE_TYPE_MED);
+ elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STREAM);
}
if (u->plane_info->global_alpha_value != u->surface->global_alpha_value) {
update_flags->bits.global_alpha_change = 1;
- elevate_update_type(&update_type, UPDATE_TYPE_MED);
+ elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STREAM);
}
if (u->plane_info->dcc.enable != u->surface->dcc.enable
@@ -2147,7 +2726,7 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa
* recalculate stutter period.
*/
update_flags->bits.dcc_change = 1;
- elevate_update_type(&update_type, UPDATE_TYPE_FULL);
+ elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL);
}
if (resource_pixel_format_to_bpp(u->plane_info->format) !=
@@ -2156,30 +2735,41 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa
* and DML calculation
*/
update_flags->bits.bpp_change = 1;
- elevate_update_type(&update_type, UPDATE_TYPE_FULL);
+ elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL);
}
if (u->plane_info->plane_size.surface_pitch != u->surface->plane_size.surface_pitch
|| u->plane_info->plane_size.chroma_pitch != u->surface->plane_size.chroma_pitch) {
update_flags->bits.plane_size_change = 1;
- elevate_update_type(&update_type, UPDATE_TYPE_MED);
+ elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STREAM);
}
+ const struct dc_tiling_info *tiling = &u->plane_info->tiling_info;
- if (memcmp(&u->plane_info->tiling_info, &u->surface->tiling_info,
- sizeof(union dc_tiling_info)) != 0) {
+ if (memcmp(tiling, &u->surface->tiling_info, sizeof(*tiling)) != 0) {
update_flags->bits.swizzle_change = 1;
- elevate_update_type(&update_type, UPDATE_TYPE_MED);
-
- /* todo: below are HW dependent, we should add a hook to
- * DCE/N resource and validated there.
- */
- if (u->plane_info->tiling_info.gfx9.swizzle != DC_SW_LINEAR) {
- /* swizzled mode requires RQ to be setup properly,
- * thus need to run DML to calculate RQ settings
- */
- update_flags->bits.bandwidth_change = 1;
- elevate_update_type(&update_type, UPDATE_TYPE_FULL);
+ elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STREAM);
+
+ switch (tiling->gfxversion) {
+ case DcGfxVersion9:
+ case DcGfxVersion10:
+ case DcGfxVersion11:
+ if (tiling->gfx9.swizzle != DC_SW_LINEAR) {
+ update_flags->bits.bandwidth_change = 1;
+ elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL);
+ }
+ break;
+ case DcGfxAddr3:
+ if (tiling->gfx_addr3.swizzle != DC_ADDR3_SW_LINEAR) {
+ update_flags->bits.bandwidth_change = 1;
+ elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL);
+ }
+ break;
+ case DcGfxVersion7:
+ case DcGfxVersion8:
+ case DcGfxVersionUnknown:
+ default:
+ break;
}
}
@@ -2187,22 +2777,34 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa
return update_type;
}
-static enum surface_update_type get_scaling_info_update_type(
+static struct surface_update_descriptor get_scaling_info_update_type(
+ const struct dc_check_config *check_config,
const struct dc_surface_update *u)
{
union surface_update_flags *update_flags = &u->surface->update_flags;
+ struct surface_update_descriptor update_type = { UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_NONE };
if (!u->scaling_info)
- return UPDATE_TYPE_FAST;
+ return update_type;
- if (u->scaling_info->clip_rect.width != u->surface->clip_rect.width
- || u->scaling_info->clip_rect.height != u->surface->clip_rect.height
+ // `scaling_info` present means at least `STREAM` lock is required
+ elevate_update_type(&update_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_STREAM);
+
+ if (u->scaling_info->src_rect.width != u->surface->src_rect.width
+ || u->scaling_info->src_rect.height != u->surface->src_rect.height
|| u->scaling_info->dst_rect.width != u->surface->dst_rect.width
|| u->scaling_info->dst_rect.height != u->surface->dst_rect.height
+ || u->scaling_info->clip_rect.width != u->surface->clip_rect.width
+ || u->scaling_info->clip_rect.height != u->surface->clip_rect.height
|| u->scaling_info->scaling_quality.integer_scaling !=
- u->surface->scaling_quality.integer_scaling
- ) {
+ u->surface->scaling_quality.integer_scaling) {
update_flags->bits.scaling_change = 1;
+ elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL);
+
+ if (u->scaling_info->src_rect.width > u->surface->src_rect.width
+ || u->scaling_info->src_rect.height > u->surface->src_rect.height)
+ /* Making src rect bigger requires a bandwidth change */
+ update_flags->bits.clock_change = 1;
if ((u->scaling_info->dst_rect.width < u->surface->dst_rect.width
|| u->scaling_info->dst_rect.height < u->surface->dst_rect.height)
@@ -2210,16 +2812,12 @@ static enum surface_update_type get_scaling_info_update_type(
|| u->scaling_info->dst_rect.height < u->surface->src_rect.height))
/* Making dst rect smaller requires a bandwidth change */
update_flags->bits.bandwidth_change = 1;
- }
- if (u->scaling_info->src_rect.width != u->surface->src_rect.width
- || u->scaling_info->src_rect.height != u->surface->src_rect.height) {
-
- update_flags->bits.scaling_change = 1;
- if (u->scaling_info->src_rect.width > u->surface->src_rect.width
- || u->scaling_info->src_rect.height > u->surface->src_rect.height)
- /* Making src rect bigger requires a bandwidth change */
- update_flags->bits.clock_change = 1;
+ if (u->scaling_info->src_rect.width > check_config->max_optimizable_video_width &&
+ (u->scaling_info->clip_rect.width > u->surface->clip_rect.width ||
+ u->scaling_info->clip_rect.height > u->surface->clip_rect.height))
+ /* Changing clip size of a large surface may result in MPC slice count change */
+ update_flags->bits.bandwidth_change = 1;
}
if (u->scaling_info->src_rect.x != u->surface->src_rect.x
@@ -2227,121 +2825,156 @@ static enum surface_update_type get_scaling_info_update_type(
|| u->scaling_info->clip_rect.x != u->surface->clip_rect.x
|| u->scaling_info->clip_rect.y != u->surface->clip_rect.y
|| u->scaling_info->dst_rect.x != u->surface->dst_rect.x
- || u->scaling_info->dst_rect.y != u->surface->dst_rect.y)
+ || u->scaling_info->dst_rect.y != u->surface->dst_rect.y) {
+ elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STREAM);
update_flags->bits.position_change = 1;
+ }
- if (update_flags->bits.clock_change
- || update_flags->bits.bandwidth_change
- || update_flags->bits.scaling_change)
- return UPDATE_TYPE_FULL;
-
- if (update_flags->bits.position_change)
- return UPDATE_TYPE_MED;
-
- return UPDATE_TYPE_FAST;
+ return update_type;
}
-static enum surface_update_type det_surface_update(const struct dc *dc,
- const struct dc_surface_update *u)
+static struct surface_update_descriptor det_surface_update(
+ const struct dc_check_config *check_config,
+ struct dc_surface_update *u)
{
- const struct dc_state *context = dc->current_state;
- enum surface_update_type type;
- enum surface_update_type overall_type = UPDATE_TYPE_FAST;
+ struct surface_update_descriptor overall_type = { UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_NONE };
union surface_update_flags *update_flags = &u->surface->update_flags;
- if (u->flip_addr)
- update_flags->bits.addr_update = 1;
-
- if (!is_surface_in_context(context, u->surface) || u->surface->force_full_update) {
+ if (u->surface->force_full_update) {
update_flags->raw = 0xFFFFFFFF;
- return UPDATE_TYPE_FULL;
+ elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL);
+ return overall_type;
}
update_flags->raw = 0; // Reset all flags
- type = get_plane_info_update_type(u);
- elevate_update_type(&overall_type, type);
+ struct surface_update_descriptor inner_type = get_plane_info_update_type(u);
+
+ elevate_update_type(&overall_type, inner_type.update_type, inner_type.lock_descriptor);
- type = get_scaling_info_update_type(u);
- elevate_update_type(&overall_type, type);
+ inner_type = get_scaling_info_update_type(check_config, u);
+ elevate_update_type(&overall_type, inner_type.update_type, inner_type.lock_descriptor);
- if (u->flip_addr)
+ if (u->flip_addr) {
update_flags->bits.addr_update = 1;
+ elevate_update_type(&overall_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_STREAM);
- if (u->in_transfer_func)
+ if (u->flip_addr->address.tmz_surface != u->surface->address.tmz_surface) {
+ update_flags->bits.tmz_changed = 1;
+ elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL);
+ }
+ }
+ if (u->in_transfer_func) {
update_flags->bits.in_transfer_func_change = 1;
+ elevate_update_type(&overall_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STREAM);
+ }
- if (u->input_csc_color_matrix)
+ if (u->input_csc_color_matrix) {
update_flags->bits.input_csc_change = 1;
+ elevate_update_type(&overall_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_STREAM);
+ }
- if (u->coeff_reduction_factor)
+ if (u->coeff_reduction_factor) {
update_flags->bits.coeff_reduction_change = 1;
+ elevate_update_type(&overall_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_STREAM);
+ }
- if (u->gamut_remap_matrix)
+ if (u->gamut_remap_matrix) {
update_flags->bits.gamut_remap_change = 1;
+ elevate_update_type(&overall_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_STREAM);
+ }
- if (u->gamma) {
- enum surface_pixel_format format = SURFACE_PIXEL_FORMAT_GRPH_BEGIN;
-
- if (u->plane_info)
- format = u->plane_info->format;
- else if (u->surface)
- format = u->surface->format;
-
- if (dce_use_lut(format))
- update_flags->bits.gamma_change = 1;
+ if (u->blend_tf || (u->gamma && dce_use_lut(u->plane_info ? u->plane_info->format : u->surface->format))) {
+ update_flags->bits.gamma_change = 1;
+ elevate_update_type(&overall_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_STREAM);
}
- if (u->lut3d_func || u->func_shaper)
+ if (u->lut3d_func || u->func_shaper) {
update_flags->bits.lut_3d = 1;
+ elevate_update_type(&overall_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_STREAM);
+ }
if (u->hdr_mult.value)
if (u->hdr_mult.value != u->surface->hdr_mult.value) {
+ // TODO: Should be fast?
update_flags->bits.hdr_mult = 1;
- elevate_update_type(&overall_type, UPDATE_TYPE_MED);
+ elevate_update_type(&overall_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STREAM);
}
- if (update_flags->bits.in_transfer_func_change) {
- type = UPDATE_TYPE_MED;
- elevate_update_type(&overall_type, type);
+ if (u->sdr_white_level_nits)
+ if (u->sdr_white_level_nits != u->surface->sdr_white_level_nits) {
+ // TODO: Should be fast?
+ update_flags->bits.sdr_white_level_nits = 1;
+ elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL);
+ }
+
+ if (u->cm2_params) {
+ if (u->cm2_params->component_settings.shaper_3dlut_setting != u->surface->mcm_shaper_3dlut_setting
+ || u->cm2_params->component_settings.lut1d_enable != u->surface->mcm_lut1d_enable
+ || u->cm2_params->cm2_luts.lut3d_data.lut3d_src != u->surface->mcm_luts.lut3d_data.lut3d_src) {
+ update_flags->bits.mcm_transfer_function_enable_change = 1;
+ elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL);
+ }
}
- if (update_flags->bits.input_csc_change
- || update_flags->bits.coeff_reduction_change
- || update_flags->bits.lut_3d
- || update_flags->bits.gamma_change
- || update_flags->bits.gamut_remap_change) {
- type = UPDATE_TYPE_FULL;
- elevate_update_type(&overall_type, type);
+ if (update_flags->bits.lut_3d &&
+ u->surface->mcm_luts.lut3d_data.lut3d_src != DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM) {
+ elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL);
}
+ if (check_config->enable_legacy_fast_update &&
+ (update_flags->bits.gamma_change ||
+ update_flags->bits.gamut_remap_change ||
+ update_flags->bits.input_csc_change ||
+ update_flags->bits.coeff_reduction_change)) {
+ elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL);
+ }
return overall_type;
}
-static enum surface_update_type check_update_surfaces_for_stream(
- struct dc *dc,
- struct dc_surface_update *updates,
- int surface_count,
- struct dc_stream_update *stream_update,
- const struct dc_stream_status *stream_status)
+/* May need to flip the desktop plane in cases where MPO plane receives a flip but desktop plane doesn't
+ * while both planes are flip_immediate
+ */
+static void force_immediate_gsl_plane_flip(struct dc *dc, struct dc_surface_update *updates, int surface_count)
{
+ bool has_flip_immediate_plane = false;
int i;
- enum surface_update_type overall_type = UPDATE_TYPE_FAST;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (dc->idle_optimizations_allowed)
- overall_type = UPDATE_TYPE_FULL;
+ for (i = 0; i < surface_count; i++) {
+ if (updates[i].surface->flip_immediate) {
+ has_flip_immediate_plane = true;
+ break;
+ }
+ }
-#endif
- if (stream_status == NULL || stream_status->plane_count != surface_count)
- overall_type = UPDATE_TYPE_FULL;
+ if (has_flip_immediate_plane && surface_count > 1) {
+ for (i = 0; i < surface_count; i++) {
+ if (updates[i].surface->flip_immediate)
+ updates[i].surface->update_flags.bits.addr_update = 1;
+ }
+ }
+}
+
+static struct surface_update_descriptor check_update_surfaces_for_stream(
+ const struct dc_check_config *check_config,
+ struct dc_surface_update *updates,
+ int surface_count,
+ struct dc_stream_update *stream_update)
+{
+ struct surface_update_descriptor overall_type = { UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_NONE };
if (stream_update && stream_update->pending_test_pattern) {
- overall_type = UPDATE_TYPE_FULL;
+ elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL);
+ }
+
+ if (stream_update && stream_update->hw_cursor_req) {
+ elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL);
}
/* some stream updates require passive update */
if (stream_update) {
+ elevate_update_type(&overall_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_STREAM);
+
union stream_update_flags *su_flags = &stream_update->stream->update_flags;
if ((stream_update->src.height != 0 && stream_update->src.width != 0) ||
@@ -2349,14 +2982,16 @@ static enum surface_update_type check_update_surfaces_for_stream(
stream_update->integer_scaling_update)
su_flags->bits.scaling = 1;
- if (stream_update->out_transfer_func)
+ if (check_config->enable_legacy_fast_update && stream_update->out_transfer_func)
su_flags->bits.out_tf = 1;
if (stream_update->abm_level)
su_flags->bits.abm_level = 1;
- if (stream_update->dpms_off)
+ if (stream_update->dpms_off) {
su_flags->bits.dpms_off = 1;
+ elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL | LOCK_DESCRIPTOR_LINK);
+ }
if (stream_update->gamut_remap)
su_flags->bits.gamut_remap = 1;
@@ -2367,23 +3002,44 @@ static enum surface_update_type check_update_surfaces_for_stream(
if (stream_update->dsc_config)
su_flags->bits.dsc_changed = 1;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
if (stream_update->mst_bw_update)
su_flags->bits.mst_bw = 1;
-#endif
- if (su_flags->raw != 0)
- overall_type = UPDATE_TYPE_FULL;
+ if (stream_update->stream->freesync_on_desktop &&
+ (stream_update->vrr_infopacket || stream_update->allow_freesync ||
+ stream_update->vrr_active_variable || stream_update->vrr_active_fixed))
+ su_flags->bits.fams_changed = 1;
+
+ if (stream_update->scaler_sharpener_update)
+ su_flags->bits.scaler_sharpener = 1;
+
+ if (stream_update->sharpening_required)
+ su_flags->bits.sharpening_required = 1;
+
+ if (stream_update->output_color_space)
+ su_flags->bits.out_csc = 1;
+
+ // TODO: Make each elevation explicit, as to not override fast stream in crct_timing_adjust
+ if (su_flags->raw)
+ elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL);
- if (stream_update->output_csc_transform || stream_update->output_color_space)
+ // Non-global cases
+ if (stream_update->output_csc_transform) {
su_flags->bits.out_csc = 1;
+ elevate_update_type(&overall_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_STREAM);
+ }
+
+ if (!check_config->enable_legacy_fast_update && stream_update->out_transfer_func) {
+ su_flags->bits.out_tf = 1;
+ elevate_update_type(&overall_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_STREAM);
+ }
}
- for (i = 0 ; i < surface_count; i++) {
- enum surface_update_type type =
- det_surface_update(dc, &updates[i]);
+ for (int i = 0 ; i < surface_count; i++) {
+ struct surface_update_descriptor inner_type =
+ det_surface_update(check_config, &updates[i]);
- elevate_update_type(&overall_type, type);
+ elevate_update_type(&overall_type, inner_type.update_type, inner_type.lock_descriptor);
}
return overall_type;
@@ -2394,46 +3050,18 @@ static enum surface_update_type check_update_surfaces_for_stream(
*
* See :c:type:`enum surface_update_type <surface_update_type>` for explanation of update types
*/
-enum surface_update_type dc_check_update_surfaces_for_stream(
- struct dc *dc,
+struct surface_update_descriptor dc_check_update_surfaces_for_stream(
+ const struct dc_check_config *check_config,
struct dc_surface_update *updates,
int surface_count,
- struct dc_stream_update *stream_update,
- const struct dc_stream_status *stream_status)
+ struct dc_stream_update *stream_update)
{
- int i;
- enum surface_update_type type;
-
if (stream_update)
stream_update->stream->update_flags.raw = 0;
- for (i = 0; i < surface_count; i++)
+ for (size_t i = 0; i < surface_count; i++)
updates[i].surface->update_flags.raw = 0;
- type = check_update_surfaces_for_stream(dc, updates, surface_count, stream_update, stream_status);
- if (type == UPDATE_TYPE_FULL) {
- if (stream_update) {
- uint32_t dsc_changed = stream_update->stream->update_flags.bits.dsc_changed;
- stream_update->stream->update_flags.raw = 0xFFFFFFFF;
- stream_update->stream->update_flags.bits.dsc_changed = dsc_changed;
- }
- for (i = 0; i < surface_count; i++)
- updates[i].surface->update_flags.raw = 0xFFFFFFFF;
- }
-
- if (type == UPDATE_TYPE_FAST) {
- // If there's an available clock comparator, we use that.
- if (dc->clk_mgr->funcs->are_clock_states_equal) {
- if (!dc->clk_mgr->funcs->are_clock_states_equal(&dc->clk_mgr->clks, &dc->current_state->bw_ctx.bw.dcn.clk))
- dc->optimized_required = true;
- // Else we fallback to mem compare.
- } else if (memcmp(&dc->current_state->bw_ctx.bw.dcn.clk, &dc->clk_mgr->clks, offsetof(struct dc_clocks, prev_p_state_change_support)) != 0) {
- dc->optimized_required = true;
- }
-
- dc->optimized_required |= dc->wm_optimized_required;
- }
-
- return type;
+ return check_update_surfaces_for_stream(check_config, updates, surface_count, stream_update);
}
static struct dc_stream_status *stream_get_status(
@@ -2513,55 +3141,66 @@ static void copy_surface_update_to_plane(
srf_update->plane_info->layer_index;
}
- if (srf_update->gamma &&
- (surface->gamma_correction !=
- srf_update->gamma)) {
- memcpy(&surface->gamma_correction->entries,
+ if (srf_update->gamma) {
+ memcpy(&surface->gamma_correction.entries,
&srf_update->gamma->entries,
sizeof(struct dc_gamma_entries));
- surface->gamma_correction->is_identity =
+ surface->gamma_correction.is_identity =
srf_update->gamma->is_identity;
- surface->gamma_correction->num_entries =
+ surface->gamma_correction.num_entries =
srf_update->gamma->num_entries;
- surface->gamma_correction->type =
+ surface->gamma_correction.type =
srf_update->gamma->type;
}
- if (srf_update->in_transfer_func &&
- (surface->in_transfer_func !=
- srf_update->in_transfer_func)) {
- surface->in_transfer_func->sdr_ref_white_level =
+ if (srf_update->in_transfer_func) {
+ surface->in_transfer_func.sdr_ref_white_level =
srf_update->in_transfer_func->sdr_ref_white_level;
- surface->in_transfer_func->tf =
+ surface->in_transfer_func.tf =
srf_update->in_transfer_func->tf;
- surface->in_transfer_func->type =
+ surface->in_transfer_func.type =
srf_update->in_transfer_func->type;
- memcpy(&surface->in_transfer_func->tf_pts,
+ memcpy(&surface->in_transfer_func.tf_pts,
&srf_update->in_transfer_func->tf_pts,
sizeof(struct dc_transfer_func_distributed_points));
}
- if (srf_update->func_shaper &&
- (surface->in_shaper_func !=
- srf_update->func_shaper))
- memcpy(surface->in_shaper_func, srf_update->func_shaper,
- sizeof(*surface->in_shaper_func));
+ if (srf_update->cm2_params) {
+ surface->mcm_shaper_3dlut_setting = srf_update->cm2_params->component_settings.shaper_3dlut_setting;
+ surface->mcm_lut1d_enable = srf_update->cm2_params->component_settings.lut1d_enable;
+ surface->mcm_luts = srf_update->cm2_params->cm2_luts;
+ }
+
+ if (srf_update->func_shaper) {
+ memcpy(&surface->in_shaper_func, srf_update->func_shaper,
+ sizeof(surface->in_shaper_func));
+
+ if (surface->mcm_shaper_3dlut_setting >= DC_CM2_SHAPER_3DLUT_SETTING_ENABLE_SHAPER)
+ surface->mcm_luts.shaper = &surface->in_shaper_func;
+ }
- if (srf_update->lut3d_func &&
- (surface->lut3d_func !=
- srf_update->lut3d_func))
- memcpy(surface->lut3d_func, srf_update->lut3d_func,
- sizeof(*surface->lut3d_func));
+ if (srf_update->lut3d_func)
+ memcpy(&surface->lut3d_func, srf_update->lut3d_func,
+ sizeof(surface->lut3d_func));
if (srf_update->hdr_mult.value)
surface->hdr_mult =
srf_update->hdr_mult;
- if (srf_update->blend_tf &&
- (surface->blend_tf !=
- srf_update->blend_tf))
- memcpy(surface->blend_tf, srf_update->blend_tf,
- sizeof(*surface->blend_tf));
+ if (srf_update->sdr_white_level_nits)
+ surface->sdr_white_level_nits =
+ srf_update->sdr_white_level_nits;
+
+ if (srf_update->blend_tf) {
+ memcpy(&surface->blend_tf, srf_update->blend_tf,
+ sizeof(surface->blend_tf));
+
+ if (surface->mcm_lut1d_enable)
+ surface->mcm_luts.lut1d_func = &surface->blend_tf;
+ }
+
+ if (srf_update->cm2_params || srf_update->blend_tf)
+ surface->lut_bank_a = !surface->lut_bank_a;
if (srf_update->input_csc_color_matrix)
surface->input_csc_color_matrix =
@@ -2574,6 +3213,14 @@ static void copy_surface_update_to_plane(
if (srf_update->gamut_remap_matrix)
surface->gamut_remap_matrix =
*srf_update->gamut_remap_matrix;
+
+ if (srf_update->cursor_csc_color_matrix)
+ surface->cursor_csc_color_matrix =
+ *srf_update->cursor_csc_color_matrix;
+
+ if (srf_update->bias_and_scale.bias_and_scale_valid)
+ surface->bias_and_scale =
+ srf_update->bias_and_scale;
}
static void copy_stream_update_to_stream(struct dc *dc,
@@ -2592,14 +3239,13 @@ static void copy_stream_update_to_stream(struct dc *dc,
if (update->dst.height && update->dst.width)
stream->dst = update->dst;
- if (update->out_transfer_func &&
- stream->out_transfer_func != update->out_transfer_func) {
- stream->out_transfer_func->sdr_ref_white_level =
+ if (update->out_transfer_func) {
+ stream->out_transfer_func.sdr_ref_white_level =
update->out_transfer_func->sdr_ref_white_level;
- stream->out_transfer_func->tf = update->out_transfer_func->tf;
- stream->out_transfer_func->type =
+ stream->out_transfer_func.tf = update->out_transfer_func->tf;
+ stream->out_transfer_func.type =
update->out_transfer_func->type;
- memcpy(&stream->out_transfer_func->tf_pts,
+ memcpy(&stream->out_transfer_func.tf_pts,
&update->out_transfer_func->tf_pts,
sizeof(struct dc_transfer_func_distributed_points));
}
@@ -2610,11 +3256,8 @@ static void copy_stream_update_to_stream(struct dc *dc,
if (update->abm_level)
stream->abm_level = *update->abm_level;
- if (update->periodic_interrupt0)
- stream->periodic_interrupt0 = *update->periodic_interrupt0;
-
- if (update->periodic_interrupt1)
- stream->periodic_interrupt1 = *update->periodic_interrupt1;
+ if (update->periodic_interrupt)
+ stream->periodic_interrupt = *update->periodic_interrupt;
if (update->gamut_remap)
stream->gamut_remap_matrix = *update->gamut_remap;
@@ -2632,15 +3275,48 @@ static void copy_stream_update_to_stream(struct dc *dc,
if (update->vrr_infopacket)
stream->vrr_infopacket = *update->vrr_infopacket;
+ if (update->hw_cursor_req)
+ stream->hw_cursor_req = *update->hw_cursor_req;
+
+ if (update->allow_freesync)
+ stream->allow_freesync = *update->allow_freesync;
+
+ if (update->vrr_active_variable)
+ stream->vrr_active_variable = *update->vrr_active_variable;
+
+ if (update->vrr_active_fixed)
+ stream->vrr_active_fixed = *update->vrr_active_fixed;
+
+ if (update->crtc_timing_adjust) {
+ if (stream->adjust.v_total_min != update->crtc_timing_adjust->v_total_min ||
+ stream->adjust.v_total_max != update->crtc_timing_adjust->v_total_max ||
+ stream->adjust.timing_adjust_pending)
+ update->crtc_timing_adjust->timing_adjust_pending = true;
+ stream->adjust = *update->crtc_timing_adjust;
+ update->crtc_timing_adjust->timing_adjust_pending = false;
+ }
+
if (update->dpms_off)
stream->dpms_off = *update->dpms_off;
+ if (update->hfvsif_infopacket)
+ stream->hfvsif_infopacket = *update->hfvsif_infopacket;
+
+ if (update->vtem_infopacket)
+ stream->vtem_infopacket = *update->vtem_infopacket;
+
if (update->vsc_infopacket)
stream->vsc_infopacket = *update->vsc_infopacket;
if (update->vsp_infopacket)
stream->vsp_infopacket = *update->vsp_infopacket;
+ if (update->adaptive_sync_infopacket)
+ stream->adaptive_sync_infopacket = *update->adaptive_sync_infopacket;
+
+ if (update->avi_infopacket)
+ stream->avi_infopacket = *update->avi_infopacket;
+
if (update->dither_option)
stream->dither_option = *update->dither_option;
@@ -2663,25 +3339,277 @@ static void copy_stream_update_to_stream(struct dc *dc,
update->dsc_config->num_slices_v != 0);
/* Use temporarry context for validating new DSC config */
- struct dc_state *dsc_validate_context = dc_create_state(dc);
+ struct dc_state *dsc_validate_context = dc_state_create_copy(dc->current_state);
if (dsc_validate_context) {
- dc_resource_state_copy_construct(dc->current_state, dsc_validate_context);
-
stream->timing.dsc_cfg = *update->dsc_config;
stream->timing.flags.DSC = enable_dsc;
- if (!dc->res_pool->funcs->validate_bandwidth(dc, dsc_validate_context, true)) {
+ if (dc->res_pool->funcs->validate_bandwidth(dc, dsc_validate_context,
+ DC_VALIDATE_MODE_ONLY) != DC_OK) {
stream->timing.dsc_cfg = old_dsc_cfg;
stream->timing.flags.DSC = old_dsc_enabled;
update->dsc_config = NULL;
}
- dc_release_state(dsc_validate_context);
+ dc_state_release(dsc_validate_context);
} else {
DC_ERROR("Failed to allocate new validate context for DSC change\n");
update->dsc_config = NULL;
}
}
+ if (update->scaler_sharpener_update)
+ stream->scaler_sharpener_update = *update->scaler_sharpener_update;
+ if (update->sharpening_required)
+ stream->sharpening_required = *update->sharpening_required;
+}
+
+static void backup_planes_and_stream_state(
+ struct dc_scratch_space *scratch,
+ struct dc_stream_state *stream)
+{
+ int i;
+ struct dc_stream_status *status = dc_stream_get_status(stream);
+
+ if (!status)
+ return;
+
+ for (i = 0; i < status->plane_count; i++) {
+ dc_plane_copy_config(&scratch->plane_states[i], status->plane_states[i]);
+ }
+ scratch->stream_state = *stream;
+}
+
+static void restore_planes_and_stream_state(
+ struct dc_scratch_space *scratch,
+ struct dc_stream_state *stream)
+{
+ int i;
+ struct dc_stream_status *status = dc_stream_get_status(stream);
+
+ if (!status)
+ return;
+
+ for (i = 0; i < status->plane_count; i++) {
+ dc_plane_copy_config(status->plane_states[i], &scratch->plane_states[i]);
+ }
+
+ // refcount is persistent
+ struct kref temp_refcount = stream->refcount;
+ *stream = scratch->stream_state;
+ stream->refcount = temp_refcount;
+}
+
+/**
+ * update_seamless_boot_flags() - Helper function for updating seamless boot flags
+ *
+ * @dc: Current DC state
+ * @context: New DC state to be programmed
+ * @surface_count: Number of surfaces that have an updated
+ * @stream: Corresponding stream to be updated in the current flip
+ *
+ * Updating seamless boot flags do not need to be part of the commit sequence. This
+ * helper function will update the seamless boot flags on each flip (if required)
+ * outside of the HW commit sequence (fast or slow).
+ *
+ * Return: void
+ */
+static void update_seamless_boot_flags(struct dc *dc,
+ struct dc_state *context,
+ int surface_count,
+ struct dc_stream_state *stream)
+{
+ if (get_seamless_boot_stream_count(context) > 0 && (surface_count > 0 || stream->dpms_off)) {
+ /* Optimize seamless boot flag keeps clocks and watermarks high until
+ * first flip. After first flip, optimization is required to lower
+ * bandwidth. Important to note that it is expected UEFI will
+ * only light up a single display on POST, therefore we only expect
+ * one stream with seamless boot flag set.
+ */
+ if (stream->apply_seamless_boot_optimization) {
+ stream->apply_seamless_boot_optimization = false;
+
+ if (get_seamless_boot_stream_count(context) == 0)
+ dc->optimized_required = true;
+ }
+ }
+}
+
+static bool full_update_required_weak(
+ const struct dc *dc,
+ const struct dc_surface_update *srf_updates,
+ int surface_count,
+ const struct dc_stream_update *stream_update,
+ const struct dc_stream_state *stream);
+
+/**
+ * update_planes_and_stream_state() - The function takes planes and stream
+ * updates as inputs and determines the appropriate update type. If update type
+ * is FULL, the function allocates a new context, populates and validates it.
+ * Otherwise, it updates current dc context. The function will return both
+ * new_context and new_update_type back to the caller. The function also backs
+ * up both current and new contexts into corresponding dc state scratch memory.
+ * TODO: The function does too many things, and even conditionally allocates dc
+ * context memory implicitly. We should consider to break it down.
+ *
+ * @dc: Current DC state
+ * @srf_updates: an array of surface updates
+ * @surface_count: surface update count
+ * @stream: Corresponding stream to be updated
+ * @stream_update: stream update
+ * @new_update_type: [out] determined update type by the function
+ * @new_context: [out] new context allocated and validated if update type is
+ * FULL, reference to current context if update type is less than FULL.
+ *
+ * Return: true if a valid update is populated into new_context, false
+ * otherwise.
+ */
+static bool update_planes_and_stream_state(struct dc *dc,
+ struct dc_surface_update *srf_updates, int surface_count,
+ struct dc_stream_state *stream,
+ struct dc_stream_update *stream_update,
+ enum surface_update_type *new_update_type,
+ struct dc_state **new_context)
+{
+ struct dc_state *context;
+ int i, j;
+ enum surface_update_type update_type;
+ const struct dc_stream_status *stream_status;
+ struct dc_context *dc_ctx = dc->ctx;
+
+ stream_status = dc_stream_get_status(stream);
+
+ if (!stream_status) {
+ if (surface_count) /* Only an error condition if surf_count non-zero*/
+ ASSERT(false);
+
+ return false; /* Cannot commit surface to stream that is not committed */
+ }
+
+ context = dc->current_state;
+ update_type = dc_check_update_surfaces_for_stream(
+ &dc->check_config, srf_updates, surface_count, stream_update).update_type;
+ if (full_update_required_weak(dc, srf_updates, surface_count, stream_update, stream))
+ update_type = UPDATE_TYPE_FULL;
+
+ /* It is possible to receive a flip for one plane while there are multiple flip_immediate planes in the same stream.
+ * E.g. Desktop and MPO plane are flip_immediate but only the MPO plane received a flip
+ * Force the other flip_immediate planes to flip so GSL doesn't wait for a flip that won't come.
+ */
+ force_immediate_gsl_plane_flip(dc, srf_updates, surface_count);
+ if (update_type == UPDATE_TYPE_FULL)
+ backup_planes_and_stream_state(&dc->scratch.current_state, stream);
+
+ /* update current stream with the new updates */
+ copy_stream_update_to_stream(dc, context, stream, stream_update);
+
+ /* do not perform surface update if surface has invalid dimensions
+ * (all zero) and no scaling_info is provided
+ */
+ if (surface_count > 0) {
+ for (i = 0; i < surface_count; i++) {
+ if ((srf_updates[i].surface->src_rect.width == 0 ||
+ srf_updates[i].surface->src_rect.height == 0 ||
+ srf_updates[i].surface->dst_rect.width == 0 ||
+ srf_updates[i].surface->dst_rect.height == 0) &&
+ (!srf_updates[i].scaling_info ||
+ srf_updates[i].scaling_info->src_rect.width == 0 ||
+ srf_updates[i].scaling_info->src_rect.height == 0 ||
+ srf_updates[i].scaling_info->dst_rect.width == 0 ||
+ srf_updates[i].scaling_info->dst_rect.height == 0)) {
+ DC_ERROR("Invalid src/dst rects in surface update!\n");
+ return false;
+ }
+ }
+ }
+
+ if (update_type == UPDATE_TYPE_FULL) {
+ if (stream_update) {
+ uint32_t dsc_changed = stream_update->stream->update_flags.bits.dsc_changed;
+ stream_update->stream->update_flags.raw = 0xFFFFFFFF;
+ stream_update->stream->update_flags.bits.dsc_changed = dsc_changed;
+ }
+ for (i = 0; i < surface_count; i++)
+ srf_updates[i].surface->update_flags.raw = 0xFFFFFFFF;
+ }
+
+ if (update_type >= update_surface_trace_level)
+ update_surface_trace(dc, srf_updates, surface_count);
+
+ for (i = 0; i < surface_count; i++)
+ copy_surface_update_to_plane(srf_updates[i].surface, &srf_updates[i]);
+
+ if (update_type >= UPDATE_TYPE_FULL) {
+ struct dc_plane_state *new_planes[MAX_SURFACES] = {0};
+
+ for (i = 0; i < surface_count; i++)
+ new_planes[i] = srf_updates[i].surface;
+
+ /* initialize scratch memory for building context */
+ context = dc_state_create_copy(dc->current_state);
+ if (context == NULL) {
+ DC_ERROR("Failed to allocate new validate context!\n");
+ return false;
+ }
+
+ /* For each full update, remove all existing phantom pipes first.
+ * Ensures that we have enough pipes for newly added MPO planes
+ */
+ dc_state_remove_phantom_streams_and_planes(dc, context);
+ dc_state_release_phantom_streams_and_planes(dc, context);
+
+ /*remove old surfaces from context */
+ if (!dc_state_rem_all_planes_for_stream(dc, stream, context)) {
+
+ BREAK_TO_DEBUGGER();
+ goto fail;
+ }
+
+ /* add surface to context */
+ if (!dc_state_add_all_planes_for_stream(dc, stream, new_planes, surface_count, context)) {
+
+ BREAK_TO_DEBUGGER();
+ goto fail;
+ }
+ }
+
+ /* save update parameters into surface */
+ for (i = 0; i < surface_count; i++) {
+ struct dc_plane_state *surface = srf_updates[i].surface;
+
+ if (update_type != UPDATE_TYPE_MED)
+ continue;
+ if (surface->update_flags.bits.position_change) {
+ for (j = 0; j < dc->res_pool->pipe_count; j++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j];
+
+ if (pipe_ctx->plane_state != surface)
+ continue;
+
+ resource_build_scaling_params(pipe_ctx);
+ }
+ }
+ }
+
+ if (update_type == UPDATE_TYPE_FULL) {
+ if (dc->res_pool->funcs->validate_bandwidth(dc, context, DC_VALIDATE_MODE_AND_PROGRAMMING) != DC_OK) {
+ BREAK_TO_DEBUGGER();
+ goto fail;
+ }
+ }
+ update_seamless_boot_flags(dc, context, surface_count, stream);
+
+ *new_context = context;
+ *new_update_type = update_type;
+ if (update_type == UPDATE_TYPE_FULL)
+ backup_planes_and_stream_state(&dc->scratch.new_state, stream);
+
+ return true;
+
+fail:
+ dc_state_release(context);
+
+ return false;
+
}
static void commit_planes_do_stream_update(struct dc *dc,
@@ -2696,22 +3624,26 @@ static void commit_planes_do_stream_update(struct dc *dc,
for (j = 0; j < dc->res_pool->pipe_count; j++) {
struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j];
- if (!pipe_ctx->top_pipe && !pipe_ctx->prev_odm_pipe && pipe_ctx->stream == stream) {
-
- if (stream_update->periodic_interrupt0 &&
- dc->hwss.setup_periodic_interrupt)
- dc->hwss.setup_periodic_interrupt(dc, pipe_ctx, VLINE0);
+ if (resource_is_pipe_type(pipe_ctx, OTG_MASTER) && pipe_ctx->stream == stream) {
- if (stream_update->periodic_interrupt1 &&
- dc->hwss.setup_periodic_interrupt)
- dc->hwss.setup_periodic_interrupt(dc, pipe_ctx, VLINE1);
+ if (stream_update->periodic_interrupt && dc->hwss.setup_periodic_interrupt)
+ dc->hwss.setup_periodic_interrupt(dc, pipe_ctx);
if ((stream_update->hdr_static_metadata && !stream->use_dynamic_meta) ||
stream_update->vrr_infopacket ||
stream_update->vsc_infopacket ||
- stream_update->vsp_infopacket) {
+ stream_update->vsp_infopacket ||
+ stream_update->hfvsif_infopacket ||
+ stream_update->adaptive_sync_infopacket ||
+ stream_update->vtem_infopacket ||
+ stream_update->avi_infopacket) {
resource_build_info_frame(pipe_ctx);
dc->hwss.update_info_frame(pipe_ctx);
+
+ if (dc_is_dp_signal(pipe_ctx->stream->signal))
+ dc->link_srv->dp_trace_source_sequence(
+ pipe_ctx->stream->link,
+ DPCD_SOURCE_SEQ_AFTER_UPDATE_INFO_FRAME);
}
if (stream_update->hdr_static_metadata &&
@@ -2741,35 +3673,54 @@ static void commit_planes_do_stream_update(struct dc *dc,
}
}
+ if (stream_update->cursor_attributes)
+ program_cursor_attributes(dc, stream);
+
+ if (stream_update->cursor_position)
+ program_cursor_position(dc, stream);
/* Full fe update*/
if (update_type == UPDATE_TYPE_FAST)
continue;
if (stream_update->dsc_config)
- dp_update_dsc_config(pipe_ctx);
+ dc->link_srv->update_dsc_config(pipe_ctx);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
if (stream_update->mst_bw_update) {
if (stream_update->mst_bw_update->is_increase)
- dc_link_increase_mst_payload(pipe_ctx, stream_update->mst_bw_update->mst_stream_bw);
+ dc->link_srv->increase_mst_payload(pipe_ctx,
+ stream_update->mst_bw_update->mst_stream_bw);
else
- dc_link_reduce_mst_payload(pipe_ctx, stream_update->mst_bw_update->mst_stream_bw);
+ dc->link_srv->reduce_mst_payload(pipe_ctx,
+ stream_update->mst_bw_update->mst_stream_bw);
}
-#endif
if (stream_update->pending_test_pattern) {
- dc_link_dp_set_test_pattern(stream->link,
+ /*
+ * test pattern params depends on ODM topology
+ * changes that we could be applying to front
+ * end. Since at the current stage front end
+ * changes are not yet applied. We can only
+ * apply test pattern in hw based on current
+ * state and populate the final test pattern
+ * params in new state. If current and new test
+ * pattern params are different as result of
+ * different ODM topology being used, it will be
+ * detected and handle during front end
+ * programming update.
+ */
+ dc->link_srv->dp_set_test_pattern(stream->link,
stream->test_pattern.type,
stream->test_pattern.color_space,
stream->test_pattern.p_link_settings,
stream->test_pattern.p_custom_pattern,
stream->test_pattern.cust_pattern_size);
+ resource_build_test_pattern_params(&context->res_ctx, pipe_ctx);
}
if (stream_update->dpms_off) {
if (*stream_update->dpms_off) {
- core_link_disable_stream(pipe_ctx);
+ dc->link_srv->set_dpms_off(pipe_ctx);
/* for dpms, keep acquired resources*/
if (pipe_ctx->stream_res.audio && !dc->debug.az_endpoint_mute_only)
pipe_ctx->stream_res.audio->funcs->az_disable(pipe_ctx->stream_res.audio);
@@ -2779,9 +3730,15 @@ static void commit_planes_do_stream_update(struct dc *dc,
} else {
if (get_seamless_boot_stream_count(context) == 0)
dc->hwss.prepare_bandwidth(dc, dc->current_state);
-
- core_link_enable_stream(dc->current_state, pipe_ctx);
+ dc->link_srv->set_dpms_on(dc->current_state, pipe_ctx);
}
+ } else if (pipe_ctx->stream->link->wa_flags.blank_stream_on_ocs_change && stream_update->output_color_space
+ && !stream->dpms_off && dc_is_dp_signal(pipe_ctx->stream->signal)) {
+ /*
+ * Workaround for firmware issue in some receivers where they don't pick up
+ * correct output color space unless DP link is disabled/re-enabled
+ */
+ dc->link_srv->set_dpms_on(dc->current_state, pipe_ctx);
}
if (stream_update->abm_level && pipe_ctx->stream_res.abm) {
@@ -2805,6 +3762,329 @@ static void commit_planes_do_stream_update(struct dc *dc,
}
}
+static bool dc_dmub_should_send_dirty_rect_cmd(struct dc *dc, struct dc_stream_state *stream)
+{
+ if ((stream->link->psr_settings.psr_version == DC_PSR_VERSION_SU_1
+ || stream->link->psr_settings.psr_version == DC_PSR_VERSION_1)
+ && stream->ctx->dce_version >= DCN_VERSION_3_1)
+ return true;
+
+ if (stream->link->replay_settings.config.replay_supported)
+ return true;
+
+ if (stream->ctx->dce_version >= DCN_VERSION_3_5 && stream->abm_level)
+ return true;
+
+ return false;
+}
+
+void dc_dmub_update_dirty_rect(struct dc *dc,
+ int surface_count,
+ struct dc_stream_state *stream,
+ struct dc_surface_update *srf_updates,
+ struct dc_state *context)
+{
+ union dmub_rb_cmd cmd;
+ struct dmub_cmd_update_dirty_rect_data *update_dirty_rect;
+ unsigned int i, j;
+ unsigned int panel_inst = 0;
+
+ if (!dc_dmub_should_send_dirty_rect_cmd(dc, stream))
+ return;
+
+ if (!dc_get_edp_link_panel_inst(dc, stream->link, &panel_inst))
+ return;
+
+ memset(&cmd, 0x0, sizeof(cmd));
+ cmd.update_dirty_rect.header.type = DMUB_CMD__UPDATE_DIRTY_RECT;
+ cmd.update_dirty_rect.header.sub_type = 0;
+ cmd.update_dirty_rect.header.payload_bytes =
+ sizeof(cmd.update_dirty_rect) -
+ sizeof(cmd.update_dirty_rect.header);
+ update_dirty_rect = &cmd.update_dirty_rect.update_dirty_rect_data;
+ for (i = 0; i < surface_count; i++) {
+ struct dc_plane_state *plane_state = srf_updates[i].surface;
+ const struct dc_flip_addrs *flip_addr = srf_updates[i].flip_addr;
+
+ if (!srf_updates[i].surface || !flip_addr)
+ continue;
+ /* Do not send in immediate flip mode */
+ if (srf_updates[i].surface->flip_immediate)
+ continue;
+
+ update_dirty_rect->cmd_version = DMUB_CMD_PSR_CONTROL_VERSION_1;
+ update_dirty_rect->dirty_rect_count = flip_addr->dirty_rect_count;
+ memcpy(update_dirty_rect->src_dirty_rects, flip_addr->dirty_rects,
+ sizeof(flip_addr->dirty_rects));
+ for (j = 0; j < dc->res_pool->pipe_count; j++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j];
+
+ if (pipe_ctx->stream != stream)
+ continue;
+ if (pipe_ctx->plane_state != plane_state)
+ continue;
+
+ update_dirty_rect->panel_inst = panel_inst;
+ update_dirty_rect->pipe_idx = j;
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+ }
+ }
+}
+
+static void build_dmub_update_dirty_rect(
+ struct dc *dc,
+ int surface_count,
+ struct dc_stream_state *stream,
+ struct dc_surface_update *srf_updates,
+ struct dc_state *context,
+ struct dc_dmub_cmd dc_dmub_cmd[],
+ unsigned int *dmub_cmd_count)
+{
+ union dmub_rb_cmd cmd;
+ struct dmub_cmd_update_dirty_rect_data *update_dirty_rect;
+ unsigned int i, j;
+ unsigned int panel_inst = 0;
+
+ if (!dc_dmub_should_send_dirty_rect_cmd(dc, stream))
+ return;
+
+ if (!dc_get_edp_link_panel_inst(dc, stream->link, &panel_inst))
+ return;
+
+ memset(&cmd, 0x0, sizeof(cmd));
+ cmd.update_dirty_rect.header.type = DMUB_CMD__UPDATE_DIRTY_RECT;
+ cmd.update_dirty_rect.header.sub_type = 0;
+ cmd.update_dirty_rect.header.payload_bytes =
+ sizeof(cmd.update_dirty_rect) -
+ sizeof(cmd.update_dirty_rect.header);
+ update_dirty_rect = &cmd.update_dirty_rect.update_dirty_rect_data;
+ for (i = 0; i < surface_count; i++) {
+ struct dc_plane_state *plane_state = srf_updates[i].surface;
+ const struct dc_flip_addrs *flip_addr = srf_updates[i].flip_addr;
+
+ if (!srf_updates[i].surface || !flip_addr)
+ continue;
+ /* Do not send in immediate flip mode */
+ if (srf_updates[i].surface->flip_immediate)
+ continue;
+ update_dirty_rect->cmd_version = DMUB_CMD_PSR_CONTROL_VERSION_1;
+ update_dirty_rect->dirty_rect_count = flip_addr->dirty_rect_count;
+ memcpy(update_dirty_rect->src_dirty_rects, flip_addr->dirty_rects,
+ sizeof(flip_addr->dirty_rects));
+ for (j = 0; j < dc->res_pool->pipe_count; j++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j];
+
+ if (pipe_ctx->stream != stream)
+ continue;
+ if (pipe_ctx->plane_state != plane_state)
+ continue;
+ update_dirty_rect->panel_inst = panel_inst;
+ update_dirty_rect->pipe_idx = j;
+ dc_dmub_cmd[*dmub_cmd_count].dmub_cmd = cmd;
+ dc_dmub_cmd[*dmub_cmd_count].wait_type = DM_DMUB_WAIT_TYPE_NO_WAIT;
+ (*dmub_cmd_count)++;
+ }
+ }
+}
+
+static bool check_address_only_update(union surface_update_flags update_flags)
+{
+ union surface_update_flags addr_only_update_flags;
+ addr_only_update_flags.raw = 0;
+ addr_only_update_flags.bits.addr_update = 1;
+
+ return update_flags.bits.addr_update &&
+ !(update_flags.raw & ~addr_only_update_flags.raw);
+}
+
+/**
+ * build_dmub_cmd_list() - Build an array of DMCUB commands to be sent to DMCUB
+ *
+ * @dc: Current DC state
+ * @srf_updates: Array of surface updates
+ * @surface_count: Number of surfaces that have an updated
+ * @stream: Corresponding stream to be updated in the current flip
+ * @context: New DC state to be programmed
+ *
+ * @dc_dmub_cmd: Array of DMCUB commands to be sent to DMCUB
+ * @dmub_cmd_count: Count indicating the number of DMCUB commands in dc_dmub_cmd array
+ *
+ * This function builds an array of DMCUB commands to be sent to DMCUB. This function is required
+ * to build an array of commands and have them sent while the OTG lock is acquired.
+ *
+ * Return: void
+ */
+static void build_dmub_cmd_list(struct dc *dc,
+ struct dc_surface_update *srf_updates,
+ int surface_count,
+ struct dc_stream_state *stream,
+ struct dc_state *context,
+ struct dc_dmub_cmd dc_dmub_cmd[],
+ unsigned int *dmub_cmd_count)
+{
+ // Initialize cmd count to 0
+ *dmub_cmd_count = 0;
+ build_dmub_update_dirty_rect(dc, surface_count, stream, srf_updates, context, dc_dmub_cmd, dmub_cmd_count);
+}
+
+static void commit_plane_for_stream_offload_fams2_flip(struct dc *dc,
+ struct dc_surface_update *srf_updates,
+ int surface_count,
+ struct dc_stream_state *stream,
+ struct dc_state *context)
+{
+ int i, j;
+
+ /* update dirty rect for PSR */
+ dc_dmub_update_dirty_rect(dc, surface_count, stream,
+ srf_updates, context);
+
+ /* Perform requested Updates */
+ for (i = 0; i < surface_count; i++) {
+ struct dc_plane_state *plane_state = srf_updates[i].surface;
+
+ for (j = 0; j < dc->res_pool->pipe_count; j++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j];
+
+ if (!should_update_pipe_for_stream(context, pipe_ctx, stream))
+ continue;
+
+ if (!should_update_pipe_for_plane(context, pipe_ctx, plane_state))
+ continue;
+
+ /* update pipe context for plane */
+ if (pipe_ctx->plane_state->update_flags.bits.addr_update)
+ dc->hwss.update_plane_addr(dc, pipe_ctx);
+ }
+ }
+
+ /* Send commands to DMCUB */
+ dc_dmub_srv_fams2_passthrough_flip(dc,
+ context,
+ stream,
+ srf_updates,
+ surface_count);
+}
+
+static void commit_planes_for_stream_fast(struct dc *dc,
+ struct dc_surface_update *srf_updates,
+ int surface_count,
+ struct dc_stream_state *stream,
+ struct dc_stream_update *stream_update,
+ enum surface_update_type update_type,
+ struct dc_state *context)
+{
+ int i, j;
+ struct pipe_ctx *top_pipe_to_program = NULL;
+ struct dc_stream_status *stream_status = NULL;
+ bool should_offload_fams2_flip = false;
+ bool should_lock_all_pipes = (update_type != UPDATE_TYPE_FAST);
+
+ if (should_lock_all_pipes)
+ determine_pipe_unlock_order(dc, context);
+
+ if (dc->debug.fams2_config.bits.enable &&
+ dc->debug.fams2_config.bits.enable_offload_flip &&
+ dc_state_is_fams2_in_use(dc, context)) {
+ /* if not offloading to HWFQ, offload to FAMS2 if needed */
+ should_offload_fams2_flip = true;
+ for (i = 0; i < surface_count; i++) {
+ if (srf_updates[i].surface &&
+ srf_updates[i].surface->update_flags.raw &&
+ !check_address_only_update(srf_updates[i].surface->update_flags)) {
+ /* more than address update, need to acquire FAMS2 lock */
+ should_offload_fams2_flip = false;
+ break;
+ }
+ }
+ if (stream_update) {
+ /* more than address update, need to acquire FAMS2 lock */
+ should_offload_fams2_flip = false;
+ }
+ }
+
+ dc_exit_ips_for_hw_access(dc);
+
+ dc_z10_restore(dc);
+
+ top_pipe_to_program = resource_get_otg_master_for_stream(
+ &context->res_ctx,
+ stream);
+
+ if (!top_pipe_to_program)
+ return;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe->stream && pipe->plane_state) {
+ if (!dc->debug.using_dml2)
+ set_p_state_switch_method(dc, context, pipe);
+
+ if (dc->debug.visual_confirm)
+ dc_update_visual_confirm_color(dc, context, pipe);
+ }
+ }
+
+ for (i = 0; i < surface_count; i++) {
+ struct dc_plane_state *plane_state = srf_updates[i].surface;
+ /*set logical flag for lock/unlock use*/
+ for (j = 0; j < dc->res_pool->pipe_count; j++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j];
+
+ if (!pipe_ctx->plane_state)
+ continue;
+ if (!should_update_pipe_for_plane(context, pipe_ctx, plane_state))
+ continue;
+
+ pipe_ctx->plane_state->triplebuffer_flips = false;
+ if (update_type == UPDATE_TYPE_FAST &&
+ dc->hwss.program_triplebuffer != NULL &&
+ !pipe_ctx->plane_state->flip_immediate && dc->debug.enable_tri_buf) {
+ /*triple buffer for VUpdate only*/
+ pipe_ctx->plane_state->triplebuffer_flips = true;
+ }
+ }
+ }
+
+ stream_status = dc_state_get_stream_status(context, stream);
+
+ if (should_offload_fams2_flip) {
+ commit_plane_for_stream_offload_fams2_flip(dc,
+ srf_updates,
+ surface_count,
+ stream,
+ context);
+ } else if (stream_status) {
+ build_dmub_cmd_list(dc,
+ srf_updates,
+ surface_count,
+ stream,
+ context,
+ context->dc_dmub_cmd,
+ &(context->dmub_cmd_count));
+ hwss_build_fast_sequence(dc,
+ context->dc_dmub_cmd,
+ context->dmub_cmd_count,
+ context->block_sequence,
+ &(context->block_sequence_steps),
+ top_pipe_to_program,
+ stream_status,
+ context);
+ hwss_execute_sequence(dc,
+ context->block_sequence,
+ context->block_sequence_steps);
+ }
+
+ /* Clear update flags so next flip doesn't have redundant programming
+ * (if there's no stream update, the update flags are not cleared).
+ * Surface updates are cleared unconditionally at the beginning of each flip,
+ * so no need to clear here.
+ */
+ if (top_pipe_to_program->stream)
+ top_pipe_to_program->stream->update_flags.raw = 0;
+}
+
static void commit_planes_for_stream(struct dc *dc,
struct dc_surface_update *srf_updates,
int surface_count,
@@ -2816,49 +4096,75 @@ static void commit_planes_for_stream(struct dc *dc,
int i, j;
struct pipe_ctx *top_pipe_to_program = NULL;
bool should_lock_all_pipes = (update_type != UPDATE_TYPE_FAST);
+ bool subvp_prev_use = false;
+ bool subvp_curr_use = false;
+ uint8_t current_stream_mask = 0;
+
+ if (should_lock_all_pipes)
+ determine_pipe_unlock_order(dc, context);
+ // Once we apply the new subvp context to hardware it won't be in the
+ // dc->current_state anymore, so we have to cache it before we apply
+ // the new SubVP context
+ subvp_prev_use = false;
+ dc_exit_ips_for_hw_access(dc);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
dc_z10_restore(dc);
-#endif
+ if (update_type == UPDATE_TYPE_FULL && dc->optimized_required)
+ hwss_process_outstanding_hw_updates(dc, dc->current_state);
- if (get_seamless_boot_stream_count(context) > 0 && surface_count > 0) {
- /* Optimize seamless boot flag keeps clocks and watermarks high until
- * first flip. After first flip, optimization is required to lower
- * bandwidth. Important to note that it is expected UEFI will
- * only light up a single display on POST, therefore we only expect
- * one stream with seamless boot flag set.
- */
- if (stream->apply_seamless_boot_optimization) {
- stream->apply_seamless_boot_optimization = false;
+ if (update_type != UPDATE_TYPE_FAST && dc->res_pool->funcs->prepare_mcache_programming)
+ dc->res_pool->funcs->prepare_mcache_programming(dc, context);
- if (get_seamless_boot_stream_count(context) == 0)
- dc->optimized_required = true;
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe->stream && pipe->plane_state) {
+ if (!dc->debug.using_dml2)
+ set_p_state_switch_method(dc, context, pipe);
+
+ if (dc->debug.visual_confirm)
+ dc_update_visual_confirm_color(dc, context, pipe);
}
}
if (update_type == UPDATE_TYPE_FULL) {
-#if defined(CONFIG_DRM_AMD_DC_DCN)
dc_allow_idle_optimizations(dc, false);
-#endif
if (get_seamless_boot_stream_count(context) == 0)
dc->hwss.prepare_bandwidth(dc, context);
+ if (dc->hwss.update_dsc_pg)
+ dc->hwss.update_dsc_pg(dc, context, false);
+
context_clock_trace(dc, context);
}
- for (j = 0; j < dc->res_pool->pipe_count; j++) {
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j];
+ if (update_type == UPDATE_TYPE_FULL)
+ hwss_wait_for_outstanding_hw_updates(dc, dc->current_state);
- if (!pipe_ctx->top_pipe &&
- !pipe_ctx->prev_odm_pipe &&
- pipe_ctx->stream &&
- pipe_ctx->stream == stream) {
- top_pipe_to_program = pipe_ctx;
+ top_pipe_to_program = resource_get_otg_master_for_stream(
+ &context->res_ctx,
+ stream);
+ ASSERT(top_pipe_to_program != NULL);
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+
+ // Check old context for SubVP
+ subvp_prev_use |= (dc_state_get_pipe_subvp_type(dc->current_state, old_pipe) == SUBVP_PHANTOM);
+ if (subvp_prev_use)
+ break;
+ }
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
+ subvp_curr_use = true;
+ break;
}
}
-#ifdef CONFIG_DRM_AMD_DC_DCN
if (stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE) {
struct pipe_ctx *mpcc_pipe;
struct pipe_ctx *odm_pipe;
@@ -2867,11 +4173,11 @@ static void commit_planes_for_stream(struct dc *dc,
for (odm_pipe = mpcc_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
odm_pipe->ttu_regs.min_ttu_vblank = MAX_TTU;
}
-#endif
if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed)
- if (top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) {
- if (should_use_dmub_lock(stream->link)) {
+ if (top_pipe_to_program &&
+ top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) {
+ if (should_use_dmub_inbox1_lock(dc, stream->link)) {
union dmub_hw_lock_flags hw_locks = { 0 };
struct dmub_hw_lock_inst_flags inst_flags = { 0 };
@@ -2887,14 +4193,36 @@ static void commit_planes_for_stream(struct dc *dc,
top_pipe_to_program->stream_res.tg);
}
- if (should_lock_all_pipes && dc->hwss.interdependent_update_lock)
+ if (dc->hwss.wait_for_dcc_meta_propagation) {
+ dc->hwss.wait_for_dcc_meta_propagation(dc, top_pipe_to_program);
+ }
+
+ if (dc->hwseq->funcs.wait_for_pipe_update_if_needed)
+ dc->hwseq->funcs.wait_for_pipe_update_if_needed(dc, top_pipe_to_program, update_type < UPDATE_TYPE_FULL);
+
+ if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
+ if (dc->hwss.subvp_pipe_control_lock)
+ dc->hwss.subvp_pipe_control_lock(dc, context, true, should_lock_all_pipes, NULL, subvp_prev_use);
+
+ if (dc->hwss.dmub_hw_control_lock)
+ dc->hwss.dmub_hw_control_lock(dc, context, true);
+
dc->hwss.interdependent_update_lock(dc, context, true);
- else
+ } else {
+ if (dc->hwss.subvp_pipe_control_lock)
+ dc->hwss.subvp_pipe_control_lock(dc, context, true, should_lock_all_pipes, top_pipe_to_program, subvp_prev_use);
+
+ if (dc->hwss.dmub_hw_control_lock)
+ dc->hwss.dmub_hw_control_lock(dc, context, true);
+
/* Lock the top pipe while updating plane addrs, since freesync requires
* plane addr update event triggers to be synchronized.
* top_pipe_to_program is expected to never be NULL
*/
dc->hwss.pipe_control_lock(dc, top_pipe_to_program, true);
+ }
+
+ dc_dmub_update_dirty_rect(dc, surface_count, stream, srf_updates, context);
// Stream updates
if (stream_update)
@@ -2910,37 +4238,69 @@ static void commit_planes_for_stream(struct dc *dc,
if (dc->hwss.program_front_end_for_ctx)
dc->hwss.program_front_end_for_ctx(dc, context);
- if (should_lock_all_pipes && dc->hwss.interdependent_update_lock)
+ if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
dc->hwss.interdependent_update_lock(dc, context, false);
- else
+ } else {
dc->hwss.pipe_control_lock(dc, top_pipe_to_program, false);
+ }
dc->hwss.post_unlock_program_front_end(dc, context);
+
+ if (update_type != UPDATE_TYPE_FAST)
+ if (dc->hwss.commit_subvp_config)
+ dc->hwss.commit_subvp_config(dc, context);
+
+ /* Since phantom pipe programming is moved to post_unlock_program_front_end,
+ * move the SubVP lock to after the phantom pipes have been setup
+ */
+ if (dc->hwss.subvp_pipe_control_lock)
+ dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes,
+ NULL, subvp_prev_use);
+
+ if (dc->hwss.dmub_hw_control_lock)
+ dc->hwss.dmub_hw_control_lock(dc, context, false);
return;
}
- if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
- for (i = 0; i < surface_count; i++) {
- struct dc_plane_state *plane_state = srf_updates[i].surface;
- /*set logical flag for lock/unlock use*/
- for (j = 0; j < dc->res_pool->pipe_count; j++) {
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j];
- if (!pipe_ctx->plane_state)
- continue;
- if (should_update_pipe_for_plane(context, pipe_ctx, plane_state))
- continue;
- pipe_ctx->plane_state->triplebuffer_flips = false;
- if (update_type == UPDATE_TYPE_FAST &&
+ if (update_type != UPDATE_TYPE_FAST) {
+ for (j = 0; j < dc->res_pool->pipe_count; j++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j];
+
+ if ((dc->debug.visual_confirm == VISUAL_CONFIRM_SUBVP ||
+ dc->debug.visual_confirm == VISUAL_CONFIRM_MCLK_SWITCH) &&
+ pipe_ctx->stream && pipe_ctx->plane_state) {
+ /* Only update visual confirm for SUBVP and Mclk switching here.
+ * The bar appears on all pipes, so we need to update the bar on all displays,
+ * so the information doesn't get stale.
+ */
+ dc->hwss.update_visual_confirm_color(dc, pipe_ctx,
+ pipe_ctx->plane_res.hubp->inst);
+ }
+ }
+ }
+
+ for (i = 0; i < surface_count; i++) {
+ struct dc_plane_state *plane_state = srf_updates[i].surface;
+
+ /*set logical flag for lock/unlock use*/
+ for (j = 0; j < dc->res_pool->pipe_count; j++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j];
+ if (!pipe_ctx->plane_state)
+ continue;
+ if (!should_update_pipe_for_plane(context, pipe_ctx, plane_state))
+ continue;
+ pipe_ctx->plane_state->triplebuffer_flips = false;
+ if (update_type == UPDATE_TYPE_FAST &&
dc->hwss.program_triplebuffer != NULL &&
!pipe_ctx->plane_state->flip_immediate && dc->debug.enable_tri_buf) {
- /*triple buffer for VUpdate only*/
- pipe_ctx->plane_state->triplebuffer_flips = true;
- }
- }
- if (update_type == UPDATE_TYPE_FULL) {
- /* force vsync flip when reconfiguring pipes to prevent underflow */
- plane_state->flip_immediate = false;
+ /*triple buffer for VUpdate only*/
+ pipe_ctx->plane_state->triplebuffer_flips = true;
}
}
+ if (update_type == UPDATE_TYPE_FULL) {
+ /* force vsync flip when reconfiguring pipes to prevent underflow */
+ plane_state->flip_immediate = false;
+ plane_state->triplebuffer_flips = false;
+ }
}
// Update Type FULL, Surface updates
@@ -2959,38 +4319,58 @@ static void commit_planes_for_stream(struct dc *dc,
if (update_type == UPDATE_TYPE_FAST)
continue;
- ASSERT(!pipe_ctx->plane_state->triplebuffer_flips);
-
- if (dc->hwss.program_triplebuffer != NULL && dc->debug.enable_tri_buf) {
- /*turn off triple buffer for full update*/
- dc->hwss.program_triplebuffer(
- dc, pipe_ctx, pipe_ctx->plane_state->triplebuffer_flips);
- }
stream_status =
stream_get_status(context, pipe_ctx->stream);
- if (dc->hwss.apply_ctx_for_surface)
+ if (dc->hwss.apply_ctx_for_surface && stream_status)
dc->hwss.apply_ctx_for_surface(
dc, pipe_ctx->stream, stream_status->plane_count, context);
}
}
+
+ for (j = 0; j < dc->res_pool->pipe_count; j++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j];
+
+ if (!pipe_ctx->plane_state)
+ continue;
+
+ /* Full fe update*/
+ if (update_type == UPDATE_TYPE_FAST)
+ continue;
+
+ ASSERT(!pipe_ctx->plane_state->triplebuffer_flips);
+ if (dc->hwss.program_triplebuffer != NULL && dc->debug.enable_tri_buf) {
+ /*turn off triple buffer for full update*/
+ dc->hwss.program_triplebuffer(
+ dc, pipe_ctx, pipe_ctx->plane_state->triplebuffer_flips);
+ }
+ }
+
if (dc->hwss.program_front_end_for_ctx && update_type != UPDATE_TYPE_FAST) {
dc->hwss.program_front_end_for_ctx(dc, context);
-#ifdef CONFIG_DRM_AMD_DC_DCN
+
+ //Pipe busy until some frame and line #
+ if (dc->hwseq->funcs.set_wait_for_update_needed_for_pipe && update_type == UPDATE_TYPE_FULL) {
+ for (j = 0; j < dc->res_pool->pipe_count; j++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j];
+
+ dc->hwseq->funcs.set_wait_for_update_needed_for_pipe(dc, pipe_ctx);
+ }
+ }
+
if (dc->debug.validate_dml_output) {
for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx cur_pipe = context->res_ctx.pipe_ctx[i];
- if (cur_pipe.stream == NULL)
+ struct pipe_ctx *cur_pipe = &context->res_ctx.pipe_ctx[i];
+ if (cur_pipe->stream == NULL)
continue;
- cur_pipe.plane_res.hubp->funcs->validate_dml_output(
- cur_pipe.plane_res.hubp, dc->ctx,
+ cur_pipe->plane_res.hubp->funcs->validate_dml_output(
+ cur_pipe->plane_res.hubp, dc->ctx,
&context->res_ctx.pipe_ctx[i].rq_regs,
&context->res_ctx.pipe_ctx[i].dlg_regs,
&context->res_ctx.pipe_ctx[i].ttu_regs);
}
}
-#endif
}
// Update Type FAST, Surface updates
@@ -3027,9 +4407,17 @@ static void commit_planes_for_stream(struct dc *dc,
if (!should_update_pipe_for_plane(context, pipe_ctx, plane_state))
continue;
+ if (srf_updates[i].cm2_params &&
+ srf_updates[i].cm2_params->cm2_luts.lut3d_data.lut3d_src ==
+ DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM &&
+ srf_updates[i].cm2_params->component_settings.shaper_3dlut_setting ==
+ DC_CM2_SHAPER_3DLUT_SETTING_ENABLE_SHAPER_3DLUT &&
+ dc->hwss.trigger_3dlut_dma_load)
+ dc->hwss.trigger_3dlut_dma_load(dc, pipe_ctx);
+
/*program triple buffer after lock based on flip type*/
if (dc->hwss.program_triplebuffer != NULL && dc->debug.enable_tri_buf) {
- /*only enable triplebuffer for fast_update*/
+ /*only enable triplebuffer for fast_update*/
dc->hwss.program_triplebuffer(
dc, pipe_ctx, pipe_ctx->plane_state->triplebuffer_flips);
}
@@ -3037,27 +4425,28 @@ static void commit_planes_for_stream(struct dc *dc,
dc->hwss.update_plane_addr(dc, pipe_ctx);
}
}
-
}
- if (should_lock_all_pipes && dc->hwss.interdependent_update_lock)
+ if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
dc->hwss.interdependent_update_lock(dc, context, false);
- else
+ } else {
dc->hwss.pipe_control_lock(dc, top_pipe_to_program, false);
+ }
if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed)
- if (top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) {
+ if (top_pipe_to_program &&
+ top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) {
top_pipe_to_program->stream_res.tg->funcs->wait_for_state(
- top_pipe_to_program->stream_res.tg,
- CRTC_STATE_VACTIVE);
+ top_pipe_to_program->stream_res.tg,
+ CRTC_STATE_VACTIVE);
top_pipe_to_program->stream_res.tg->funcs->wait_for_state(
- top_pipe_to_program->stream_res.tg,
- CRTC_STATE_VBLANK);
+ top_pipe_to_program->stream_res.tg,
+ CRTC_STATE_VBLANK);
top_pipe_to_program->stream_res.tg->funcs->wait_for_state(
- top_pipe_to_program->stream_res.tg,
- CRTC_STATE_VACTIVE);
+ top_pipe_to_program->stream_res.tg,
+ CRTC_STATE_VACTIVE);
- if (stream && should_use_dmub_lock(stream->link)) {
+ if (should_use_dmub_inbox1_lock(dc, stream->link)) {
union dmub_hw_lock_flags hw_locks = { 0 };
struct dmub_hw_lock_inst_flags inst_flags = { 0 };
@@ -3073,9 +4462,47 @@ static void commit_planes_for_stream(struct dc *dc,
top_pipe_to_program->stream_res.tg);
}
+ if (subvp_curr_use) {
+ /* If enabling subvp or transitioning from subvp->subvp, enable the
+ * phantom streams before we program front end for the phantom pipes.
+ */
+ if (update_type != UPDATE_TYPE_FAST) {
+ if (dc->hwss.enable_phantom_streams)
+ dc->hwss.enable_phantom_streams(dc, context);
+ }
+ }
+
if (update_type != UPDATE_TYPE_FAST)
dc->hwss.post_unlock_program_front_end(dc, context);
+ if (subvp_prev_use && !subvp_curr_use) {
+ /* If disabling subvp, disable phantom streams after front end
+ * programming has completed (we turn on phantom OTG in order
+ * to complete the plane disable for phantom pipes).
+ */
+
+ if (dc->hwss.disable_phantom_streams)
+ dc->hwss.disable_phantom_streams(dc, context);
+ }
+
+ if (update_type != UPDATE_TYPE_FAST)
+ if (dc->hwss.commit_subvp_config)
+ dc->hwss.commit_subvp_config(dc, context);
+ /* Since phantom pipe programming is moved to post_unlock_program_front_end,
+ * move the SubVP lock to after the phantom pipes have been setup
+ */
+ if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
+ if (dc->hwss.subvp_pipe_control_lock)
+ dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, NULL, subvp_prev_use);
+ if (dc->hwss.dmub_hw_control_lock)
+ dc->hwss.dmub_hw_control_lock(dc, context, false);
+ } else {
+ if (dc->hwss.subvp_pipe_control_lock)
+ dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, top_pipe_to_program, subvp_prev_use);
+ if (dc->hwss.dmub_hw_control_lock)
+ dc->hwss.dmub_hw_control_lock(dc, context, false);
+ }
+
// Fire manual trigger only when bottom plane is flipped
for (j = 0; j < dc->res_pool->pipe_count; j++) {
struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j];
@@ -3089,94 +4516,732 @@ static void commit_planes_for_stream(struct dc *dc,
pipe_ctx->plane_state->skip_manual_trigger)
continue;
+ if (dc->hwss.program_cursor_offload_now)
+ dc->hwss.program_cursor_offload_now(dc, pipe_ctx);
if (pipe_ctx->stream_res.tg->funcs->program_manual_trigger)
pipe_ctx->stream_res.tg->funcs->program_manual_trigger(pipe_ctx->stream_res.tg);
}
+
+ current_stream_mask = get_stream_mask(dc, context);
+ if (current_stream_mask != context->stream_mask) {
+ context->stream_mask = current_stream_mask;
+ dc_dmub_srv_notify_stream_mask(dc->ctx->dmub_srv, current_stream_mask);
+ }
}
-void dc_commit_updates_for_stream(struct dc *dc,
+/**
+ * could_mpcc_tree_change_for_active_pipes - Check if an OPP associated with MPCC might change
+ *
+ * @dc: Used to get the current state status
+ * @stream: Target stream, which we want to remove the attached planes
+ * @srf_updates: Array of surface updates
+ * @surface_count: Number of surface update
+ * @is_plane_addition: [in] Fill out with true if it is a plane addition case
+ *
+ * DCN32x and newer support a feature named Dynamic ODM which can conflict with
+ * the MPO if used simultaneously in some specific configurations (e.g.,
+ * 4k@144). This function checks if the incoming context requires applying a
+ * transition state with unnecessary pipe splitting and ODM disabled to
+ * circumvent our hardware limitations to prevent this edge case. If the OPP
+ * associated with an MPCC might change due to plane additions, this function
+ * returns true.
+ *
+ * Return:
+ * Return true if OPP and MPCC might change, otherwise, return false.
+ */
+static bool could_mpcc_tree_change_for_active_pipes(struct dc *dc,
+ struct dc_stream_state *stream,
struct dc_surface_update *srf_updates,
int surface_count,
- struct dc_stream_state *stream,
- struct dc_stream_update *stream_update,
- struct dc_state *state)
+ bool *is_plane_addition)
{
- const struct dc_stream_status *stream_status;
- enum surface_update_type update_type;
- struct dc_state *context;
- struct dc_context *dc_ctx = dc->ctx;
- int i, j;
- stream_status = dc_stream_get_status(stream);
- context = dc->current_state;
+ struct dc_stream_status *cur_stream_status = stream_get_status(dc->current_state, stream);
+ bool force_minimal_pipe_splitting = false;
+ bool subvp_active = false;
+ uint32_t i;
- update_type = dc_check_update_surfaces_for_stream(
- dc, srf_updates, surface_count, stream_update, stream_status);
+ *is_plane_addition = false;
+
+ if (cur_stream_status &&
+ dc->current_state->stream_count > 0 &&
+ dc->debug.pipe_split_policy != MPC_SPLIT_AVOID) {
+ /* determine if minimal transition is required due to MPC*/
+ if (surface_count > 0) {
+ if (cur_stream_status->plane_count > surface_count) {
+ force_minimal_pipe_splitting = true;
+ } else if (cur_stream_status->plane_count < surface_count) {
+ force_minimal_pipe_splitting = true;
+ *is_plane_addition = true;
+ }
+ }
+ }
- if (update_type >= update_surface_trace_level)
- update_surface_trace(dc, srf_updates, surface_count);
+ if (cur_stream_status &&
+ dc->current_state->stream_count == 1 &&
+ dc->debug.enable_single_display_2to1_odm_policy) {
+ /* determine if minimal transition is required due to dynamic ODM*/
+ if (surface_count > 0) {
+ if (cur_stream_status->plane_count > 2 && cur_stream_status->plane_count > surface_count) {
+ force_minimal_pipe_splitting = true;
+ } else if (surface_count > 2 && cur_stream_status->plane_count < surface_count) {
+ force_minimal_pipe_splitting = true;
+ *is_plane_addition = true;
+ }
+ }
+ }
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
- if (update_type >= UPDATE_TYPE_FULL) {
+ if (dc_state_get_pipe_subvp_type(dc->current_state, pipe) != SUBVP_NONE) {
+ subvp_active = true;
+ break;
+ }
+ }
- /* initialize scratch memory for building context */
- context = dc_create_state(dc);
- if (context == NULL) {
- DC_ERROR("Failed to allocate new validate context!\n");
- return;
+ /* For SubVP when adding or removing planes we need to add a minimal transition
+ * (even when disabling all planes). Whenever disabling a phantom pipe, we
+ * must use the minimal transition path to disable the pipe correctly.
+ *
+ * We want to use the minimal transition whenever subvp is active, not only if
+ * a plane is being added / removed from a subvp stream (MPO plane can be added
+ * to a DRR pipe of SubVP + DRR config, in which case we still want to run through
+ * a min transition to disable subvp.
+ */
+ if (cur_stream_status && subvp_active) {
+ /* determine if minimal transition is required due to SubVP*/
+ if (cur_stream_status->plane_count > surface_count) {
+ force_minimal_pipe_splitting = true;
+ } else if (cur_stream_status->plane_count < surface_count) {
+ force_minimal_pipe_splitting = true;
+ *is_plane_addition = true;
}
+ }
- dc_resource_state_copy_construct(state, context);
+ return force_minimal_pipe_splitting;
+}
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i];
- struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+struct pipe_split_policy_backup {
+ bool dynamic_odm_policy;
+ bool subvp_policy;
+ enum pipe_split_policy mpc_policy;
+ char force_odm[MAX_PIPES];
+};
- if (new_pipe->plane_state && new_pipe->plane_state != old_pipe->plane_state)
- new_pipe->plane_state->force_full_update = true;
- }
- } else if (update_type == UPDATE_TYPE_FAST && dc_ctx->dce_version >= DCE_VERSION_MAX) {
+static void backup_and_set_minimal_pipe_split_policy(struct dc *dc,
+ struct dc_state *context,
+ struct pipe_split_policy_backup *policy)
+{
+ int i;
+
+ if (!dc->config.is_vmin_only_asic) {
+ policy->mpc_policy = dc->debug.pipe_split_policy;
+ dc->debug.pipe_split_policy = MPC_SPLIT_AVOID;
+ }
+ policy->dynamic_odm_policy = dc->debug.enable_single_display_2to1_odm_policy;
+ dc->debug.enable_single_display_2to1_odm_policy = false;
+ policy->subvp_policy = dc->debug.force_disable_subvp;
+ dc->debug.force_disable_subvp = true;
+ for (i = 0; i < context->stream_count; i++) {
+ policy->force_odm[i] = context->streams[i]->debug.force_odm_combine_segments;
+ if (context->streams[i]->debug.allow_transition_for_forced_odm)
+ context->streams[i]->debug.force_odm_combine_segments = 0;
+ }
+}
+
+static void restore_minimal_pipe_split_policy(struct dc *dc,
+ struct dc_state *context,
+ struct pipe_split_policy_backup *policy)
+{
+ uint8_t i;
+
+ if (!dc->config.is_vmin_only_asic)
+ dc->debug.pipe_split_policy = policy->mpc_policy;
+ dc->debug.enable_single_display_2to1_odm_policy =
+ policy->dynamic_odm_policy;
+ dc->debug.force_disable_subvp = policy->subvp_policy;
+ for (i = 0; i < context->stream_count; i++)
+ context->streams[i]->debug.force_odm_combine_segments = policy->force_odm[i];
+}
+
+static void release_minimal_transition_state(struct dc *dc,
+ struct dc_state *minimal_transition_context,
+ struct dc_state *base_context,
+ struct pipe_split_policy_backup *policy)
+{
+ restore_minimal_pipe_split_policy(dc, base_context, policy);
+ dc_state_release(minimal_transition_context);
+}
+
+static void force_vsync_flip_in_minimal_transition_context(struct dc_state *context)
+{
+ uint8_t i;
+ int j;
+ struct dc_stream_status *stream_status;
+
+ for (i = 0; i < context->stream_count; i++) {
+ stream_status = &context->stream_status[i];
+
+ for (j = 0; j < stream_status->plane_count; j++)
+ stream_status->plane_states[j]->flip_immediate = false;
+ }
+}
+
+static struct dc_state *create_minimal_transition_state(struct dc *dc,
+ struct dc_state *base_context, struct pipe_split_policy_backup *policy)
+{
+ struct dc_state *minimal_transition_context = NULL;
+
+ minimal_transition_context = dc_state_create_copy(base_context);
+ if (!minimal_transition_context)
+ return NULL;
+
+ backup_and_set_minimal_pipe_split_policy(dc, base_context, policy);
+ /* commit minimal state */
+ if (dc->res_pool->funcs->validate_bandwidth(dc, minimal_transition_context,
+ DC_VALIDATE_MODE_AND_PROGRAMMING) == DC_OK) {
+ /* prevent underflow and corruption when reconfiguring pipes */
+ force_vsync_flip_in_minimal_transition_context(minimal_transition_context);
+ } else {
/*
- * Previous frame finished and HW is ready for optimization.
- *
- * Only relevant for DCN behavior where we can guarantee the optimization
- * is safe to apply - retain the legacy behavior for DCE.
+ * This should never happen, minimal transition state should
+ * always be validated first before adding pipe split features.
*/
- dc_post_update_surfaces_to_stream(dc);
+ release_minimal_transition_state(dc, minimal_transition_context, base_context, policy);
+ BREAK_TO_DEBUGGER();
+ minimal_transition_context = NULL;
}
+ return minimal_transition_context;
+}
+static bool is_pipe_topology_transition_seamless_with_intermediate_step(
+ struct dc *dc,
+ struct dc_state *initial_state,
+ struct dc_state *intermediate_state,
+ struct dc_state *final_state)
+{
+ return dc->hwss.is_pipe_topology_transition_seamless(dc, initial_state,
+ intermediate_state) &&
+ dc->hwss.is_pipe_topology_transition_seamless(dc,
+ intermediate_state, final_state);
+}
- for (i = 0; i < surface_count; i++) {
- struct dc_plane_state *surface = srf_updates[i].surface;
+static void swap_and_release_current_context(struct dc *dc,
+ struct dc_state *new_context, struct dc_stream_state *stream)
+{
- copy_surface_update_to_plane(surface, &srf_updates[i]);
+ int i;
+ struct dc_state *old = dc->current_state;
+ struct pipe_ctx *pipe_ctx;
- if (update_type >= UPDATE_TYPE_MED) {
- for (j = 0; j < dc->res_pool->pipe_count; j++) {
- struct pipe_ctx *pipe_ctx =
- &context->res_ctx.pipe_ctx[j];
+ /* Since memory free requires elevated IRQ, an interrupt
+ * request is generated by mem free. If this happens
+ * between freeing and reassigning the context, our vsync
+ * interrupt will call into dc and cause a memory
+ * corruption. Hence, we first reassign the context,
+ * then free the old context.
+ */
+ dc->current_state = new_context;
+ dc_state_release(old);
- if (pipe_ctx->plane_state != surface)
- continue;
+ // clear any forced full updates
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe_ctx = &new_context->res_ctx.pipe_ctx[i];
- resource_build_scaling_params(pipe_ctx);
- }
+ if (pipe_ctx->plane_state && pipe_ctx->stream == stream)
+ pipe_ctx->plane_state->force_full_update = false;
+ }
+}
+
+static int initialize_empty_surface_updates(
+ struct dc_stream_state *stream,
+ struct dc_surface_update *srf_updates)
+{
+ struct dc_stream_status *status = dc_stream_get_status(stream);
+ int i;
+
+ if (!status)
+ return 0;
+
+ for (i = 0; i < status->plane_count; i++)
+ srf_updates[i].surface = status->plane_states[i];
+
+ return status->plane_count;
+}
+
+static bool commit_minimal_transition_based_on_new_context(struct dc *dc,
+ struct dc_state *new_context,
+ struct dc_stream_state *stream,
+ struct dc_surface_update *srf_updates,
+ int surface_count)
+{
+ bool success = false;
+ struct pipe_split_policy_backup policy;
+ struct dc_state *intermediate_context =
+ create_minimal_transition_state(dc, new_context,
+ &policy);
+
+ if (intermediate_context) {
+ if (is_pipe_topology_transition_seamless_with_intermediate_step(
+ dc,
+ dc->current_state,
+ intermediate_context,
+ new_context)) {
+ DC_LOG_DC("commit minimal transition state: base = new state\n");
+ commit_planes_for_stream(dc, srf_updates,
+ surface_count, stream, NULL,
+ UPDATE_TYPE_FULL, intermediate_context);
+ swap_and_release_current_context(
+ dc, intermediate_context, stream);
+ dc_state_retain(dc->current_state);
+ success = true;
}
+ release_minimal_transition_state(
+ dc, intermediate_context, new_context, &policy);
}
+ return success;
+}
- copy_stream_update_to_stream(dc, context, stream, stream_update);
+static bool commit_minimal_transition_based_on_current_context(struct dc *dc,
+ struct dc_state *new_context, struct dc_stream_state *stream)
+{
+ bool success = false;
+ struct pipe_split_policy_backup policy;
+ struct dc_state *intermediate_context;
+ struct dc_state *old_current_state = dc->current_state;
+ struct dc_surface_update srf_updates[MAX_SURFACES] = {0};
+ int surface_count;
- if (update_type >= UPDATE_TYPE_FULL) {
- if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) {
- DC_ERROR("Mode validation failed for stream update!\n");
- dc_release_state(context);
- return;
+ /*
+ * Both current and new contexts share the same stream and plane state
+ * pointers. When new context is validated, stream and planes get
+ * populated with new updates such as new plane addresses. This makes
+ * the current context no longer valid because stream and planes are
+ * modified from the original. We backup current stream and plane states
+ * into scratch space whenever we are populating new context. So we can
+ * restore the original values back by calling the restore function now.
+ * This restores back the original stream and plane states associated
+ * with the current state.
+ */
+ restore_planes_and_stream_state(&dc->scratch.current_state, stream);
+ dc_state_retain(old_current_state);
+ intermediate_context = create_minimal_transition_state(dc,
+ old_current_state, &policy);
+
+ if (intermediate_context) {
+ if (is_pipe_topology_transition_seamless_with_intermediate_step(
+ dc,
+ dc->current_state,
+ intermediate_context,
+ new_context)) {
+ DC_LOG_DC("commit minimal transition state: base = current state\n");
+ surface_count = initialize_empty_surface_updates(
+ stream, srf_updates);
+ commit_planes_for_stream(dc, srf_updates,
+ surface_count, stream, NULL,
+ UPDATE_TYPE_FULL, intermediate_context);
+ swap_and_release_current_context(
+ dc, intermediate_context, stream);
+ dc_state_retain(dc->current_state);
+ success = true;
}
+ release_minimal_transition_state(dc, intermediate_context,
+ old_current_state, &policy);
}
+ dc_state_release(old_current_state);
+ /*
+ * Restore stream and plane states back to the values associated with
+ * new context.
+ */
+ restore_planes_and_stream_state(&dc->scratch.new_state, stream);
+ return success;
+}
- TRACE_DC_PIPE_STATE(pipe_ctx, i, MAX_PIPES);
+/**
+ * commit_minimal_transition_state_in_dc_update - Commit a minimal state based
+ * on current or new context
+ *
+ * @dc: DC structure, used to get the current state
+ * @new_context: New context
+ * @stream: Stream getting the update for the flip
+ * @srf_updates: Surface updates
+ * @surface_count: Number of surfaces
+ *
+ * The function takes in current state and new state and determine a minimal
+ * transition state as the intermediate step which could make the transition
+ * between current and new states seamless. If found, it will commit the minimal
+ * transition state and update current state to this minimal transition state
+ * and return true, if not, it will return false.
+ *
+ * Return:
+ * Return True if the minimal transition succeeded, false otherwise
+ */
+static bool commit_minimal_transition_state_in_dc_update(struct dc *dc,
+ struct dc_state *new_context,
+ struct dc_stream_state *stream,
+ struct dc_surface_update *srf_updates,
+ int surface_count)
+{
+ bool success = commit_minimal_transition_based_on_new_context(
+ dc, new_context, stream, srf_updates,
+ surface_count);
+ if (!success)
+ success = commit_minimal_transition_based_on_current_context(dc,
+ new_context, stream);
+ if (!success)
+ DC_LOG_ERROR("Fail to commit a seamless minimal transition state between current and new states.\nThis pipe topology update is non-seamless!\n");
+ return success;
+}
- commit_planes_for_stream(
+/**
+ * commit_minimal_transition_state - Create a transition pipe split state
+ *
+ * @dc: Used to get the current state status
+ * @transition_base_context: New transition state
+ *
+ * In some specific configurations, such as pipe split on multi-display with
+ * MPO and/or Dynamic ODM, removing a plane may cause unsupported pipe
+ * programming when moving to new planes. To mitigate those types of problems,
+ * this function adds a transition state that minimizes pipe usage before
+ * programming the new configuration. When adding a new plane, the current
+ * state requires the least pipes, so it is applied without splitting. When
+ * removing a plane, the new state requires the least pipes, so it is applied
+ * without splitting.
+ *
+ * Return:
+ * Return false if something is wrong in the transition state.
+ */
+static bool commit_minimal_transition_state(struct dc *dc,
+ struct dc_state *transition_base_context)
+{
+ struct dc_state *transition_context;
+ struct pipe_split_policy_backup policy;
+ enum dc_status ret = DC_ERROR_UNEXPECTED;
+ unsigned int i, j;
+ unsigned int pipe_in_use = 0;
+ bool subvp_in_use = false;
+ bool odm_in_use = false;
+
+ /* check current pipes in use*/
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &transition_base_context->res_ctx.pipe_ctx[i];
+
+ if (pipe->plane_state)
+ pipe_in_use++;
+ }
+
+ /* If SubVP is enabled and we are adding or removing planes from any main subvp
+ * pipe, we must use the minimal transition.
+ */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+
+ if (pipe->stream && dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_PHANTOM) {
+ subvp_in_use = true;
+ break;
+ }
+ }
+
+ /* If ODM is enabled and we are adding or removing planes from any ODM
+ * pipe, we must use the minimal transition.
+ */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &transition_base_context->res_ctx.pipe_ctx[i];
+
+ if (resource_is_pipe_type(pipe, OTG_MASTER)) {
+ odm_in_use = resource_get_odm_slice_count(pipe) > 1;
+ break;
+ }
+ }
+
+ /* When the OS add a new surface if we have been used all of pipes with odm combine
+ * and mpc split feature, it need use commit_minimal_transition_state to transition safely.
+ * After OS exit MPO, it will back to use odm and mpc split with all of pipes, we need
+ * call it again. Otherwise return true to skip.
+ *
+ * Reduce the scenarios to use dc_commit_state_no_check in the stage of flip. Especially
+ * enter/exit MPO when DCN still have enough resources.
+ */
+ if (pipe_in_use != dc->res_pool->pipe_count && !subvp_in_use && !odm_in_use)
+ return true;
+
+ DC_LOG_DC("%s base = %s state, reason = %s\n", __func__,
+ dc->current_state == transition_base_context ? "current" : "new",
+ subvp_in_use ? "Subvp In Use" :
+ odm_in_use ? "ODM in Use" :
+ dc->debug.pipe_split_policy != MPC_SPLIT_AVOID ? "MPC in Use" :
+ "Unknown");
+
+ dc_state_retain(transition_base_context);
+ transition_context = create_minimal_transition_state(dc,
+ transition_base_context, &policy);
+ if (transition_context) {
+ ret = dc_commit_state_no_check(dc, transition_context);
+ release_minimal_transition_state(dc, transition_context, transition_base_context, &policy);
+ }
+ dc_state_release(transition_base_context);
+
+ if (ret != DC_OK) {
+ /* this should never happen */
+ BREAK_TO_DEBUGGER();
+ return false;
+ }
+
+ /* force full surface update */
+ for (i = 0; i < dc->current_state->stream_count; i++) {
+ for (j = 0; j < dc->current_state->stream_status[i].plane_count; j++) {
+ dc->current_state->stream_status[i].plane_states[j]->update_flags.raw = 0xFFFFFFFF;
+ }
+ }
+
+ return true;
+}
+
+void populate_fast_updates(struct dc_fast_update *fast_update,
+ struct dc_surface_update *srf_updates,
+ int surface_count,
+ struct dc_stream_update *stream_update)
+{
+ int i = 0;
+
+ if (stream_update) {
+ fast_update[0].out_transfer_func = stream_update->out_transfer_func;
+ fast_update[0].output_csc_transform = stream_update->output_csc_transform;
+ } else {
+ fast_update[0].out_transfer_func = NULL;
+ fast_update[0].output_csc_transform = NULL;
+ }
+
+ for (i = 0; i < surface_count; i++) {
+ fast_update[i].flip_addr = srf_updates[i].flip_addr;
+ fast_update[i].gamma = srf_updates[i].gamma;
+ fast_update[i].gamut_remap_matrix = srf_updates[i].gamut_remap_matrix;
+ fast_update[i].input_csc_color_matrix = srf_updates[i].input_csc_color_matrix;
+ fast_update[i].coeff_reduction_factor = srf_updates[i].coeff_reduction_factor;
+ fast_update[i].cursor_csc_color_matrix = srf_updates[i].cursor_csc_color_matrix;
+ }
+}
+
+static bool fast_updates_exist(const struct dc_fast_update *fast_update, int surface_count)
+{
+ int i;
+
+ if (fast_update[0].out_transfer_func ||
+ fast_update[0].output_csc_transform)
+ return true;
+
+ for (i = 0; i < surface_count; i++) {
+ if (fast_update[i].flip_addr ||
+ fast_update[i].gamma ||
+ fast_update[i].gamut_remap_matrix ||
+ fast_update[i].input_csc_color_matrix ||
+ fast_update[i].cursor_csc_color_matrix ||
+ fast_update[i].coeff_reduction_factor)
+ return true;
+ }
+
+ return false;
+}
+
+bool fast_nonaddr_updates_exist(struct dc_fast_update *fast_update, int surface_count)
+{
+ int i;
+
+ if (fast_update[0].out_transfer_func ||
+ fast_update[0].output_csc_transform)
+ return true;
+
+ for (i = 0; i < surface_count; i++) {
+ if (fast_update[i].input_csc_color_matrix ||
+ fast_update[i].gamma ||
+ fast_update[i].gamut_remap_matrix ||
+ fast_update[i].coeff_reduction_factor ||
+ fast_update[i].cursor_csc_color_matrix)
+ return true;
+ }
+
+ return false;
+}
+
+static bool full_update_required_weak(
+ const struct dc *dc,
+ const struct dc_surface_update *srf_updates,
+ int surface_count,
+ const struct dc_stream_update *stream_update,
+ const struct dc_stream_state *stream)
+{
+ const struct dc_state *context = dc->current_state;
+ if (srf_updates)
+ for (int i = 0; i < surface_count; i++)
+ if (!is_surface_in_context(context, srf_updates[i].surface))
+ return true;
+
+ if (stream) {
+ const struct dc_stream_status *stream_status = dc_stream_get_status_const(stream);
+ if (stream_status == NULL || stream_status->plane_count != surface_count)
+ return true;
+ }
+ if (dc->idle_optimizations_allowed)
+ return true;
+
+ if (dc_can_clear_cursor_limit(dc))
+ return true;
+
+ return false;
+}
+
+static bool full_update_required(
+ const struct dc *dc,
+ const struct dc_surface_update *srf_updates,
+ int surface_count,
+ const struct dc_stream_update *stream_update,
+ const struct dc_stream_state *stream)
+{
+ if (full_update_required_weak(dc, srf_updates, surface_count, stream_update, stream))
+ return true;
+
+ for (int i = 0; i < surface_count; i++) {
+ if (srf_updates &&
+ (srf_updates[i].plane_info ||
+ srf_updates[i].scaling_info ||
+ (srf_updates[i].hdr_mult.value &&
+ srf_updates[i].hdr_mult.value != srf_updates->surface->hdr_mult.value) ||
+ (srf_updates[i].sdr_white_level_nits &&
+ srf_updates[i].sdr_white_level_nits != srf_updates->surface->sdr_white_level_nits) ||
+ srf_updates[i].in_transfer_func ||
+ srf_updates[i].func_shaper ||
+ srf_updates[i].lut3d_func ||
+ srf_updates[i].surface->force_full_update ||
+ (srf_updates[i].flip_addr &&
+ srf_updates[i].flip_addr->address.tmz_surface != srf_updates[i].surface->address.tmz_surface) ||
+ (srf_updates[i].cm2_params &&
+ (srf_updates[i].cm2_params->component_settings.shaper_3dlut_setting != srf_updates[i].surface->mcm_shaper_3dlut_setting ||
+ srf_updates[i].cm2_params->component_settings.lut1d_enable != srf_updates[i].surface->mcm_lut1d_enable))))
+ return true;
+ }
+
+ if (stream_update &&
+ (((stream_update->src.height != 0 && stream_update->src.width != 0) ||
+ (stream_update->dst.height != 0 && stream_update->dst.width != 0) ||
+ stream_update->integer_scaling_update) ||
+ stream_update->hdr_static_metadata ||
+ stream_update->abm_level ||
+ stream_update->periodic_interrupt ||
+ stream_update->vrr_infopacket ||
+ stream_update->vsc_infopacket ||
+ stream_update->vsp_infopacket ||
+ stream_update->hfvsif_infopacket ||
+ stream_update->vtem_infopacket ||
+ stream_update->adaptive_sync_infopacket ||
+ stream_update->avi_infopacket ||
+ stream_update->dpms_off ||
+ stream_update->allow_freesync ||
+ stream_update->vrr_active_variable ||
+ stream_update->vrr_active_fixed ||
+ stream_update->gamut_remap ||
+ stream_update->output_color_space ||
+ stream_update->dither_option ||
+ stream_update->wb_update ||
+ stream_update->dsc_config ||
+ stream_update->mst_bw_update ||
+ stream_update->func_shaper ||
+ stream_update->lut3d_func ||
+ stream_update->pending_test_pattern ||
+ stream_update->crtc_timing_adjust ||
+ stream_update->scaler_sharpener_update ||
+ stream_update->hw_cursor_req))
+ return true;
+
+ return false;
+}
+
+static bool fast_update_only(
+ const struct dc *dc,
+ const struct dc_fast_update *fast_update,
+ const struct dc_surface_update *srf_updates,
+ int surface_count,
+ const struct dc_stream_update *stream_update,
+ const struct dc_stream_state *stream)
+{
+ return fast_updates_exist(fast_update, surface_count)
+ && !full_update_required(dc, srf_updates, surface_count, stream_update, stream);
+}
+
+static bool update_planes_and_stream_v2(struct dc *dc,
+ struct dc_surface_update *srf_updates, int surface_count,
+ struct dc_stream_state *stream,
+ struct dc_stream_update *stream_update)
+{
+ struct dc_state *context;
+ enum surface_update_type update_type;
+ struct dc_fast_update fast_update[MAX_SURFACES] = {0};
+
+ /* In cases where MPO and split or ODM are used transitions can
+ * cause underflow. Apply stream configuration with minimal pipe
+ * split first to avoid unsupported transitions for active pipes.
+ */
+ bool force_minimal_pipe_splitting = 0;
+ bool is_plane_addition = 0;
+ bool is_fast_update_only;
+
+ populate_fast_updates(fast_update, srf_updates, surface_count, stream_update);
+ is_fast_update_only = fast_update_only(dc, fast_update, srf_updates,
+ surface_count, stream_update, stream);
+ force_minimal_pipe_splitting = could_mpcc_tree_change_for_active_pipes(
+ dc,
+ stream,
+ srf_updates,
+ surface_count,
+ &is_plane_addition);
+
+ /* on plane addition, minimal state is the current one */
+ if (force_minimal_pipe_splitting && is_plane_addition &&
+ !commit_minimal_transition_state(dc, dc->current_state))
+ return false;
+
+ if (!update_planes_and_stream_state(
+ dc,
+ srf_updates,
+ surface_count,
+ stream,
+ stream_update,
+ &update_type,
+ &context))
+ return false;
+
+ /* on plane removal, minimal state is the new one */
+ if (force_minimal_pipe_splitting && !is_plane_addition) {
+ if (!commit_minimal_transition_state(dc, context)) {
+ dc_state_release(context);
+ return false;
+ }
+ update_type = UPDATE_TYPE_FULL;
+ }
+
+ if (dc->hwss.is_pipe_topology_transition_seamless &&
+ !dc->hwss.is_pipe_topology_transition_seamless(
+ dc, dc->current_state, context))
+ commit_minimal_transition_state_in_dc_update(dc, context, stream,
+ srf_updates, surface_count);
+
+ if (is_fast_update_only && !dc->check_config.enable_legacy_fast_update) {
+ commit_planes_for_stream_fast(dc,
+ srf_updates,
+ surface_count,
+ stream,
+ stream_update,
+ update_type,
+ context);
+ } else {
+ if (!stream_update &&
+ dc->hwss.is_pipe_topology_transition_seamless &&
+ !dc->hwss.is_pipe_topology_transition_seamless(
+ dc, dc->current_state, context)) {
+ DC_LOG_ERROR("performing non-seamless pipe topology transition with surface only update!\n");
+ BREAK_TO_DEBUGGER();
+ }
+ commit_planes_for_stream(
dc,
srf_updates,
surface_count,
@@ -3184,30 +5249,190 @@ void dc_commit_updates_for_stream(struct dc *dc,
stream_update,
update_type,
context);
- /*update current_State*/
- if (dc->current_state != context) {
+ }
+ if (dc->current_state != context)
+ swap_and_release_current_context(dc, context, stream);
+ return true;
+}
+
+static void commit_planes_and_stream_update_on_current_context(struct dc *dc,
+ struct dc_surface_update *srf_updates, int surface_count,
+ struct dc_stream_state *stream,
+ struct dc_stream_update *stream_update,
+ enum surface_update_type update_type)
+{
+ struct dc_fast_update fast_update[MAX_SURFACES] = {0};
+
+ ASSERT(update_type < UPDATE_TYPE_FULL);
+ populate_fast_updates(fast_update, srf_updates, surface_count,
+ stream_update);
+ if (fast_update_only(dc, fast_update, srf_updates, surface_count,
+ stream_update, stream) &&
+ !dc->check_config.enable_legacy_fast_update)
+ commit_planes_for_stream_fast(dc,
+ srf_updates,
+ surface_count,
+ stream,
+ stream_update,
+ update_type,
+ dc->current_state);
+ else
+ commit_planes_for_stream(
+ dc,
+ srf_updates,
+ surface_count,
+ stream,
+ stream_update,
+ update_type,
+ dc->current_state);
+}
+
+static void commit_planes_and_stream_update_with_new_context(struct dc *dc,
+ struct dc_surface_update *srf_updates, int surface_count,
+ struct dc_stream_state *stream,
+ struct dc_stream_update *stream_update,
+ enum surface_update_type update_type,
+ struct dc_state *new_context)
+{
+ ASSERT(update_type >= UPDATE_TYPE_FULL);
+ if (!dc->hwss.is_pipe_topology_transition_seamless(dc,
+ dc->current_state, new_context))
+ /*
+ * It is required by the feature design that all pipe topologies
+ * using extra free pipes for power saving purposes such as
+ * dynamic ODM or SubVp shall only be enabled when it can be
+ * transitioned seamlessly to AND from its minimal transition
+ * state. A minimal transition state is defined as the same dc
+ * state but with all power saving features disabled. So it uses
+ * the minimum pipe topology. When we can't seamlessly
+ * transition from state A to state B, we will insert the
+ * minimal transition state A' or B' in between so seamless
+ * transition between A and B can be made possible.
+ */
+ commit_minimal_transition_state_in_dc_update(dc, new_context,
+ stream, srf_updates, surface_count);
- struct dc_state *old = dc->current_state;
+ commit_planes_for_stream(
+ dc,
+ srf_updates,
+ surface_count,
+ stream,
+ stream_update,
+ update_type,
+ new_context);
+}
- dc->current_state = context;
- dc_release_state(old);
+static bool update_planes_and_stream_v3(struct dc *dc,
+ struct dc_surface_update *srf_updates, int surface_count,
+ struct dc_stream_state *stream,
+ struct dc_stream_update *stream_update)
+{
+ struct dc_state *new_context;
+ enum surface_update_type update_type;
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+ /*
+ * When this function returns true and new_context is not equal to
+ * current state, the function allocates and validates a new dc state
+ * and assigns it to new_context. The function expects that the caller
+ * is responsible to free this memory when new_context is no longer
+ * used. We swap current with new context and free current instead. So
+ * new_context's memory will live until the next full update after it is
+ * replaced by a newer context. Refer to the use of
+ * swap_and_free_current_context below.
+ */
+ if (!update_planes_and_stream_state(dc, srf_updates, surface_count,
+ stream, stream_update, &update_type,
+ &new_context))
+ return false;
- if (pipe_ctx->plane_state && pipe_ctx->stream == stream)
- pipe_ctx->plane_state->force_full_update = false;
- }
+ if (new_context == dc->current_state) {
+ commit_planes_and_stream_update_on_current_context(dc,
+ srf_updates, surface_count, stream,
+ stream_update, update_type);
+ } else {
+ commit_planes_and_stream_update_with_new_context(dc,
+ srf_updates, surface_count, stream,
+ stream_update, update_type, new_context);
+ swap_and_release_current_context(dc, new_context, stream);
}
- /* Legacy optimization path for DCE. */
- if (update_type >= UPDATE_TYPE_FULL && dc_ctx->dce_version < DCE_VERSION_MAX) {
- dc_post_update_surfaces_to_stream(dc);
- TRACE_DCE_CLOCK_STATE(&context->bw_ctx.bw.dce);
- }
+ return true;
+}
- return;
+static void clear_update_flags(struct dc_surface_update *srf_updates,
+ int surface_count, struct dc_stream_state *stream)
+{
+ int i;
+ if (stream)
+ stream->update_flags.raw = 0;
+
+ for (i = 0; i < surface_count; i++)
+ if (srf_updates[i].surface)
+ srf_updates[i].surface->update_flags.raw = 0;
+}
+
+bool dc_update_planes_and_stream(struct dc *dc,
+ struct dc_surface_update *srf_updates, int surface_count,
+ struct dc_stream_state *stream,
+ struct dc_stream_update *stream_update)
+{
+ bool ret = false;
+
+ dc_exit_ips_for_hw_access(dc);
+ /*
+ * update planes and stream version 3 separates FULL and FAST updates
+ * to their own sequences. It aims to clean up frequent checks for
+ * update type resulting unnecessary branching in logic flow. It also
+ * adds a new commit minimal transition sequence, which detects the need
+ * for minimal transition based on the actual comparison of current and
+ * new states instead of "predicting" it based on per feature software
+ * policy.i.e could_mpcc_tree_change_for_active_pipes. The new commit
+ * minimal transition sequence is made universal to any power saving
+ * features that would use extra free pipes such as Dynamic ODM/MPC
+ * Combine, MPO or SubVp. Therefore there is no longer a need to
+ * specially handle compatibility problems with transitions among those
+ * features as they are now transparent to the new sequence.
+ */
+ if (dc->ctx->dce_version >= DCN_VERSION_4_01 || dc->ctx->dce_version == DCN_VERSION_3_2 ||
+ dc->ctx->dce_version == DCN_VERSION_3_21)
+ ret = update_planes_and_stream_v3(dc, srf_updates,
+ surface_count, stream, stream_update);
+ else
+ ret = update_planes_and_stream_v2(dc, srf_updates,
+ surface_count, stream, stream_update);
+ if (ret && (dc->ctx->dce_version >= DCN_VERSION_3_2 ||
+ dc->ctx->dce_version == DCN_VERSION_3_01))
+ clear_update_flags(srf_updates, surface_count, stream);
+
+ return ret;
+}
+
+void dc_commit_updates_for_stream(struct dc *dc,
+ struct dc_surface_update *srf_updates,
+ int surface_count,
+ struct dc_stream_state *stream,
+ struct dc_stream_update *stream_update,
+ struct dc_state *state)
+{
+ bool ret = false;
+
+ dc_exit_ips_for_hw_access(dc);
+ /* TODO: Since change commit sequence can have a huge impact,
+ * we decided to only enable it for DCN3x. However, as soon as
+ * we get more confident about this change we'll need to enable
+ * the new sequence for all ASICs.
+ */
+ if (dc->ctx->dce_version >= DCN_VERSION_4_01) {
+ ret = update_planes_and_stream_v3(dc, srf_updates, surface_count,
+ stream, stream_update);
+ } else {
+ ret = update_planes_and_stream_v2(dc, srf_updates, surface_count,
+ stream, stream_update);
+ }
+
+ if (ret && dc->ctx->dce_version >= DCN_VERSION_3_2)
+ clear_update_flags(srf_updates, surface_count, stream);
}
uint8_t dc_get_current_stream_count(struct dc *dc)
@@ -3222,19 +5447,6 @@ struct dc_stream_state *dc_get_stream_at_index(struct dc *dc, uint8_t i)
return NULL;
}
-struct dc_stream_state *dc_stream_find_from_link(const struct dc_link *link)
-{
- uint8_t i;
- struct dc_context *ctx = link->ctx;
-
- for (i = 0; i < ctx->dc->current_state->stream_count; i++) {
- if (ctx->dc->current_state->streams[i]->link == link)
- return ctx->dc->current_state->streams[i];
- }
-
- return NULL;
-}
-
enum dc_irq_source dc_interrupt_to_irq_source(
struct dc *dc,
uint32_t src_id,
@@ -3263,29 +5475,27 @@ void dc_interrupt_ack(struct dc *dc, enum dc_irq_source src)
void dc_power_down_on_boot(struct dc *dc)
{
if (dc->ctx->dce_environment != DCE_ENV_VIRTUAL_HW &&
- dc->hwss.power_down_on_boot)
+ dc->hwss.power_down_on_boot) {
+ if (dc->caps.ips_support)
+ dc_exit_ips_for_hw_access(dc);
dc->hwss.power_down_on_boot(dc);
+ }
}
-void dc_set_power_state(
- struct dc *dc,
- enum dc_acpi_cm_power_state power_state)
+void dc_set_power_state(struct dc *dc, enum dc_acpi_cm_power_state power_state)
{
- struct kref refcount;
- struct display_mode_lib *dml;
-
if (!dc->current_state)
return;
switch (power_state) {
case DC_ACPI_CM_POWER_STATE_D0:
- dc_resource_state_construct(dc, dc->current_state);
+ dc_state_construct(dc, dc->current_state);
+
+ dc_exit_ips_for_hw_access(dc);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
dc_z10_restore(dc);
-#endif
- if (dc->ctx->dmub_srv)
- dc_dmub_srv_wait_phy_init(dc->ctx->dmub_srv);
+
+ dc_dmub_srv_notify_fw_dc_power_state(dc->ctx->dmub_srv, power_state);
dc->hwss.init_hw(dc);
@@ -3293,34 +5503,21 @@ void dc_set_power_state(
dc->vm_pa_config.valid) {
dc->hwss.init_sys_ctx(dc->hwseq, dc, &dc->vm_pa_config);
}
+ break;
+ case DC_ACPI_CM_POWER_STATE_D3:
+ if (dc->caps.ips_support)
+ dc_dmub_srv_notify_fw_dc_power_state(dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D3);
+ if (dc->caps.ips_v2_support) {
+ if (dc->clk_mgr->funcs->set_low_power_state)
+ dc->clk_mgr->funcs->set_low_power_state(dc->clk_mgr);
+ }
break;
default:
ASSERT(dc->current_state->stream_count == 0);
- /* Zero out the current context so that on resume we start with
- * clean state, and dc hw programming optimizations will not
- * cause any trouble.
- */
- dml = kzalloc(sizeof(struct display_mode_lib),
- GFP_KERNEL);
-
- ASSERT(dml);
- if (!dml)
- return;
+ dc_dmub_srv_notify_fw_dc_power_state(dc->ctx->dmub_srv, power_state);
- /* Preserve refcount */
- refcount = dc->current_state->refcount;
- /* Preserve display mode lib */
- memcpy(dml, &dc->current_state->bw_ctx.dml, sizeof(struct display_mode_lib));
-
- dc_resource_state_destruct(dc->current_state);
- memset(dc->current_state, 0,
- sizeof(*dc->current_state));
-
- dc->current_state->refcount = refcount;
- dc->current_state->bw_ctx.dml = *dml;
-
- kfree(dml);
+ dc_state_destruct(dc->current_state);
break;
}
@@ -3331,7 +5528,7 @@ void dc_resume(struct dc *dc)
uint32_t i;
for (i = 0; i < dc->link_count; i++)
- core_link_resume(dc->links[i]);
+ dc->link_srv->resume(dc->links[i]);
}
bool dc_is_dmcu_initialized(struct dc *dc)
@@ -3343,153 +5540,6 @@ bool dc_is_dmcu_initialized(struct dc *dc)
return false;
}
-bool dc_submit_i2c(
- struct dc *dc,
- uint32_t link_index,
- struct i2c_command *cmd)
-{
-
- struct dc_link *link = dc->links[link_index];
- struct ddc_service *ddc = link->ddc;
- return dce_i2c_submit_command(
- dc->res_pool,
- ddc->ddc_pin,
- cmd);
-}
-
-bool dc_submit_i2c_oem(
- struct dc *dc,
- struct i2c_command *cmd)
-{
- struct ddc_service *ddc = dc->res_pool->oem_device;
- return dce_i2c_submit_command(
- dc->res_pool,
- ddc->ddc_pin,
- cmd);
-}
-
-static bool link_add_remote_sink_helper(struct dc_link *dc_link, struct dc_sink *sink)
-{
- if (dc_link->sink_count >= MAX_SINKS_PER_LINK) {
- BREAK_TO_DEBUGGER();
- return false;
- }
-
- dc_sink_retain(sink);
-
- dc_link->remote_sinks[dc_link->sink_count] = sink;
- dc_link->sink_count++;
-
- return true;
-}
-
-/*
- * dc_link_add_remote_sink() - Create a sink and attach it to an existing link
- *
- * EDID length is in bytes
- */
-struct dc_sink *dc_link_add_remote_sink(
- struct dc_link *link,
- const uint8_t *edid,
- int len,
- struct dc_sink_init_data *init_data)
-{
- struct dc_sink *dc_sink;
- enum dc_edid_status edid_status;
-
- if (len > DC_MAX_EDID_BUFFER_SIZE) {
- dm_error("Max EDID buffer size breached!\n");
- return NULL;
- }
-
- if (!init_data) {
- BREAK_TO_DEBUGGER();
- return NULL;
- }
-
- if (!init_data->link) {
- BREAK_TO_DEBUGGER();
- return NULL;
- }
-
- dc_sink = dc_sink_create(init_data);
-
- if (!dc_sink)
- return NULL;
-
- memmove(dc_sink->dc_edid.raw_edid, edid, len);
- dc_sink->dc_edid.length = len;
-
- if (!link_add_remote_sink_helper(
- link,
- dc_sink))
- goto fail_add_sink;
-
- edid_status = dm_helpers_parse_edid_caps(
- link->ctx,
- &dc_sink->dc_edid,
- &dc_sink->edid_caps);
-
- /*
- * Treat device as no EDID device if EDID
- * parsing fails
- */
- if (edid_status != EDID_OK) {
- dc_sink->dc_edid.length = 0;
- dm_error("Bad EDID, status%d!\n", edid_status);
- }
-
- return dc_sink;
-
-fail_add_sink:
- dc_sink_release(dc_sink);
- return NULL;
-}
-
-/*
- * dc_link_remove_remote_sink() - Remove a remote sink from a dc_link
- *
- * Note that this just removes the struct dc_sink - it doesn't
- * program hardware or alter other members of dc_link
- */
-void dc_link_remove_remote_sink(struct dc_link *link, struct dc_sink *sink)
-{
- int i;
-
- if (!link->sink_count) {
- BREAK_TO_DEBUGGER();
- return;
- }
-
- for (i = 0; i < link->sink_count; i++) {
- if (link->remote_sinks[i] == sink) {
- dc_sink_release(sink);
- link->remote_sinks[i] = NULL;
-
- /* shrink array to remove empty place */
- while (i < link->sink_count - 1) {
- link->remote_sinks[i] = link->remote_sinks[i+1];
- i++;
- }
- link->remote_sinks[i] = NULL;
- link->sink_count--;
- return;
- }
- }
-}
-
-void get_clock_requirements_for_state(struct dc_state *state, struct AsicStateEx *info)
-{
- info->displayClock = (unsigned int)state->bw_ctx.bw.dcn.clk.dispclk_khz;
- info->engineClock = (unsigned int)state->bw_ctx.bw.dcn.clk.dcfclk_khz;
- info->memoryClock = (unsigned int)state->bw_ctx.bw.dcn.clk.dramclk_khz;
- info->maxSupportedDppClock = (unsigned int)state->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz;
- info->dppClock = (unsigned int)state->bw_ctx.bw.dcn.clk.dppclk_khz;
- info->socClock = (unsigned int)state->bw_ctx.bw.dcn.clk.socclk_khz;
- info->dcfClockDeepSleep = (unsigned int)state->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz;
- info->fClock = (unsigned int)state->bw_ctx.bw.dcn.clk.fclk_khz;
- info->phyClock = (unsigned int)state->bw_ctx.bw.dcn.clk.phyclk_khz;
-}
enum dc_status dc_set_clock(struct dc *dc, enum dc_clock_type clock_type, uint32_t clk_khz, uint32_t stepping)
{
if (dc->hwss.set_clock)
@@ -3532,11 +5582,65 @@ bool dc_set_psr_allow_active(struct dc *dc, bool enable)
return true;
}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
+/* enable/disable eDP Replay without specify stream for eDP */
+bool dc_set_replay_allow_active(struct dc *dc, bool active)
+{
+ int i;
+ bool allow_active;
-void dc_allow_idle_optimizations(struct dc *dc, bool allow)
+ for (i = 0; i < dc->current_state->stream_count; i++) {
+ struct dc_link *link;
+ struct dc_stream_state *stream = dc->current_state->streams[i];
+
+ link = stream->link;
+ if (!link)
+ continue;
+
+ if (link->replay_settings.replay_feature_enabled) {
+ if (active && !link->replay_settings.replay_allow_active) {
+ allow_active = true;
+ if (!dc_link_set_replay_allow_active(link, &allow_active,
+ false, false, NULL))
+ return false;
+ } else if (!active && link->replay_settings.replay_allow_active) {
+ allow_active = false;
+ if (!dc_link_set_replay_allow_active(link, &allow_active,
+ true, false, NULL))
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+/* set IPS disable state */
+bool dc_set_ips_disable(struct dc *dc, unsigned int disable_ips)
{
- if (dc->debug.disable_idle_power_optimizations)
+ dc_exit_ips_for_hw_access(dc);
+
+ dc->config.disable_ips = disable_ips;
+
+ return true;
+}
+
+void dc_allow_idle_optimizations_internal(struct dc *dc, bool allow, char const *caller_name)
+{
+ int idle_fclk_khz = 0, idle_dramclk_khz = 0, i = 0;
+ enum mall_stream_type subvp_pipe_type[MAX_PIPES] = {0};
+ struct pipe_ctx *pipe = NULL;
+ struct dc_state *context = dc->current_state;
+
+ if (dc->debug.disable_idle_power_optimizations) {
+ DC_LOG_DEBUG("%s: disabled\n", __func__);
+ return;
+ }
+
+ if (allow != dc->idle_optimizations_allowed)
+ DC_LOG_IPS("%s: allow_idle old=%d new=%d (caller=%s)\n", __func__,
+ dc->idle_optimizations_allowed, allow, caller_name);
+
+ if (dc->caps.ips_support && (dc->config.disable_ips == DMUB_IPS_DISABLE_ALL))
return;
if (dc->clk_mgr != NULL && dc->clk_mgr->funcs->is_smu_present)
@@ -3546,47 +5650,177 @@ void dc_allow_idle_optimizations(struct dc *dc, bool allow)
if (allow == dc->idle_optimizations_allowed)
return;
- if (dc->hwss.apply_idle_power_optimizations && dc->hwss.apply_idle_power_optimizations(dc, allow))
+ if (dc->hwss.apply_idle_power_optimizations && dc->clk_mgr != NULL &&
+ dc->hwss.apply_idle_power_optimizations(dc, allow)) {
dc->idle_optimizations_allowed = allow;
+ DC_LOG_DEBUG("%s: %s\n", __func__, allow ? "enabled" : "disabled");
+ }
+
+ // log idle clocks and sub vp pipe types at idle optimization time
+ if (dc->clk_mgr != NULL && dc->clk_mgr->funcs->get_hard_min_fclk)
+ idle_fclk_khz = dc->clk_mgr->funcs->get_hard_min_fclk(dc->clk_mgr);
+
+ if (dc->clk_mgr != NULL && dc->clk_mgr->funcs->get_hard_min_memclk)
+ idle_dramclk_khz = dc->clk_mgr->funcs->get_hard_min_memclk(dc->clk_mgr);
+
+ if (dc->res_pool && context) {
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ subvp_pipe_type[i] = dc_state_get_pipe_subvp_type(context, pipe);
+ }
+ }
+ if (!dc->caps.is_apu)
+ DC_LOG_DC("%s: allow_idle=%d\n HardMinUClk_Khz=%d HardMinDramclk_Khz=%d\n Pipe_0=%d Pipe_1=%d Pipe_2=%d Pipe_3=%d Pipe_4=%d Pipe_5=%d (caller=%s)\n",
+ __func__, allow, idle_fclk_khz, idle_dramclk_khz, subvp_pipe_type[0], subvp_pipe_type[1], subvp_pipe_type[2],
+ subvp_pipe_type[3], subvp_pipe_type[4], subvp_pipe_type[5], caller_name);
+
}
-/*
- * blank all streams, and set min and max memory clock to
- * lowest and highest DPM level, respectively
- */
-void dc_unlock_memory_clock_frequency(struct dc *dc)
+void dc_exit_ips_for_hw_access_internal(struct dc *dc, const char *caller_name)
{
- unsigned int i;
+ if (dc->caps.ips_support)
+ dc_allow_idle_optimizations_internal(dc, false, caller_name);
+}
+
+bool dc_dmub_is_ips_idle_state(struct dc *dc)
+{
+ if (dc->debug.disable_idle_power_optimizations)
+ return false;
+
+ if (!dc->caps.ips_support || (dc->config.disable_ips == DMUB_IPS_DISABLE_ALL))
+ return false;
- for (i = 0; i < MAX_PIPES; i++)
- if (dc->current_state->res_ctx.pipe_ctx[i].plane_state)
- core_link_disable_stream(&dc->current_state->res_ctx.pipe_ctx[i]);
+ if (!dc->ctx->dmub_srv)
+ return false;
- dc->clk_mgr->funcs->set_hard_min_memclk(dc->clk_mgr, false);
- dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr);
+ return dc->ctx->dmub_srv->idle_allowed;
}
-/*
- * set min memory clock to the min required for current mode,
- * max to maxDPM, and unblank streams
- */
+/* set min and max memory clock to lowest and highest DPM level, respectively */
+void dc_unlock_memory_clock_frequency(struct dc *dc)
+{
+ if (dc->clk_mgr->funcs->set_hard_min_memclk)
+ dc->clk_mgr->funcs->set_hard_min_memclk(dc->clk_mgr, false);
+
+ if (dc->clk_mgr->funcs->set_hard_max_memclk)
+ dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr);
+}
+
+/* set min memory clock to the min required for current mode, max to maxDPM */
void dc_lock_memory_clock_frequency(struct dc *dc)
{
- unsigned int i;
+ if (dc->clk_mgr->funcs->get_memclk_states_from_smu)
+ dc->clk_mgr->funcs->get_memclk_states_from_smu(dc->clk_mgr);
- dc->clk_mgr->funcs->get_memclk_states_from_smu(dc->clk_mgr);
- dc->clk_mgr->funcs->set_hard_min_memclk(dc->clk_mgr, true);
- dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr);
+ if (dc->clk_mgr->funcs->set_hard_min_memclk)
+ dc->clk_mgr->funcs->set_hard_min_memclk(dc->clk_mgr, true);
- for (i = 0; i < MAX_PIPES; i++)
- if (dc->current_state->res_ctx.pipe_ctx[i].plane_state)
- core_link_enable_stream(dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]);
+ if (dc->clk_mgr->funcs->set_hard_max_memclk)
+ dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr);
}
-bool dc_is_plane_eligible_for_idle_optimizations(struct dc *dc, struct dc_plane_state *plane,
+static void blank_and_force_memclk(struct dc *dc, bool apply, unsigned int memclk_mhz)
+{
+ struct dc_state *context = dc->current_state;
+ struct hubp *hubp;
+ struct pipe_ctx *pipe;
+ int i;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe->stream != NULL) {
+ dc->hwss.disable_pixel_data(dc, pipe, true);
+
+ // wait for double buffer
+ pipe->stream_res.tg->funcs->wait_for_state(pipe->stream_res.tg, CRTC_STATE_VACTIVE);
+ pipe->stream_res.tg->funcs->wait_for_state(pipe->stream_res.tg, CRTC_STATE_VBLANK);
+ pipe->stream_res.tg->funcs->wait_for_state(pipe->stream_res.tg, CRTC_STATE_VACTIVE);
+
+ hubp = pipe->plane_res.hubp;
+ hubp->funcs->set_blank_regs(hubp, true);
+ }
+ }
+ if (dc->clk_mgr->funcs->set_max_memclk)
+ dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, memclk_mhz);
+ if (dc->clk_mgr->funcs->set_min_memclk)
+ dc->clk_mgr->funcs->set_min_memclk(dc->clk_mgr, memclk_mhz);
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe->stream != NULL) {
+ dc->hwss.disable_pixel_data(dc, pipe, false);
+
+ hubp = pipe->plane_res.hubp;
+ hubp->funcs->set_blank_regs(hubp, false);
+ }
+ }
+}
+
+
+/**
+ * dc_enable_dcmode_clk_limit() - lower clocks in dc (battery) mode
+ * @dc: pointer to dc of the dm calling this
+ * @enable: True = transition to DC mode, false = transition back to AC mode
+ *
+ * Some SoCs define additional clock limits when in DC mode, DM should
+ * invoke this function when the platform undergoes a power source transition
+ * so DC can apply/unapply the limit. This interface may be disruptive to
+ * the onscreen content.
+ *
+ * Context: Triggered by OS through DM interface, or manually by escape calls.
+ * Need to hold a dclock when doing so.
+ *
+ * Return: none (void function)
+ *
+ */
+void dc_enable_dcmode_clk_limit(struct dc *dc, bool enable)
+{
+ unsigned int softMax = 0, maxDPM = 0, funcMin = 0, i;
+ bool p_state_change_support;
+
+ if (!dc->config.dc_mode_clk_limit_support)
+ return;
+
+ softMax = dc->clk_mgr->bw_params->dc_mode_softmax_memclk;
+ for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries; i++) {
+ if (dc->clk_mgr->bw_params->clk_table.entries[i].memclk_mhz > maxDPM)
+ maxDPM = dc->clk_mgr->bw_params->clk_table.entries[i].memclk_mhz;
+ }
+ funcMin = (dc->clk_mgr->clks.dramclk_khz + 999) / 1000;
+ p_state_change_support = dc->clk_mgr->clks.p_state_change_support;
+
+ if (enable && !dc->clk_mgr->dc_mode_softmax_enabled) {
+ if (p_state_change_support) {
+ if (funcMin <= softMax && dc->clk_mgr->funcs->set_max_memclk)
+ dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, softMax);
+ // else: No-Op
+ } else {
+ if (funcMin <= softMax)
+ blank_and_force_memclk(dc, true, softMax);
+ // else: No-Op
+ }
+ } else if (!enable && dc->clk_mgr->dc_mode_softmax_enabled) {
+ if (p_state_change_support) {
+ if (funcMin <= softMax && dc->clk_mgr->funcs->set_max_memclk)
+ dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, maxDPM);
+ // else: No-Op
+ } else {
+ if (funcMin <= softMax)
+ blank_and_force_memclk(dc, true, maxDPM);
+ // else: No-Op
+ }
+ }
+ dc->clk_mgr->dc_mode_softmax_enabled = enable;
+}
+bool dc_is_plane_eligible_for_idle_optimizations(struct dc *dc,
+ unsigned int pitch,
+ unsigned int height,
+ enum surface_pixel_format format,
struct dc_cursor_attributes *cursor_attr)
{
- if (dc->hwss.does_plane_fit_in_mall && dc->hwss.does_plane_fit_in_mall(dc, plane, cursor_attr))
+ if (dc->hwss.does_plane_fit_in_mall && dc->hwss.does_plane_fit_in_mall(dc, pitch, height, format, cursor_attr))
return true;
return false;
}
@@ -3594,28 +5828,89 @@ bool dc_is_plane_eligible_for_idle_optimizations(struct dc *dc, struct dc_plane_
/* cleanup on driver unload */
void dc_hardware_release(struct dc *dc)
{
+ dc_mclk_switch_using_fw_based_vblank_stretch_shut_down(dc);
+
if (dc->hwss.hardware_release)
dc->hwss.hardware_release(dc);
}
-#endif
+
+void dc_mclk_switch_using_fw_based_vblank_stretch_shut_down(struct dc *dc)
+{
+ if (dc->current_state)
+ dc->current_state->bw_ctx.bw.dcn.clk.fw_based_mclk_switching_shut_down = true;
+}
/**
- * dc_enable_dmub_notifications - Returns whether dmub notification can be enabled
- * @dc: dc structure
+ * dc_is_dmub_outbox_supported - Check if DMUB firmware support outbox notification
+ *
+ * @dc: [in] dc structure
*
- * Returns: True to enable dmub notifications, False otherwise
+ * Checks whether DMUB FW supports outbox notifications, if supported DM
+ * should register outbox interrupt prior to actually enabling interrupts
+ * via dc_enable_dmub_outbox
+ *
+ * Return:
+ * True if DMUB FW supports outbox notifications, False otherwise
*/
-bool dc_enable_dmub_notifications(struct dc *dc)
+bool dc_is_dmub_outbox_supported(struct dc *dc)
{
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- /* YELLOW_CARP B0 USB4 DPIA needs dmub notifications for interrupts */
- if (dc->ctx->asic_id.chip_family == FAMILY_YELLOW_CARP &&
- dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0 &&
- !dc->debug.dpia_debug.bits.disable_dpia)
- return true;
-#endif
+ if (!dc->caps.dmcub_support)
+ return false;
+
+ switch (dc->ctx->asic_id.chip_family) {
+
+ case FAMILY_YELLOW_CARP:
+ /* DCN31 B0 USB4 DPIA needs dmub notifications for interrupts */
+ if (dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0 &&
+ !dc->debug.dpia_debug.bits.disable_dpia)
+ return true;
+ break;
+
+ case AMDGPU_FAMILY_GC_11_0_1:
+ case AMDGPU_FAMILY_GC_11_5_0:
+ if (!dc->debug.dpia_debug.bits.disable_dpia)
+ return true;
+ break;
+
+ default:
+ break;
+ }
+
/* dmub aux needs dmub notifications to be enabled */
return dc->debug.enable_dmub_aux_for_legacy_ddc;
+
+}
+
+/**
+ * dc_enable_dmub_notifications - Check if dmub fw supports outbox
+ *
+ * @dc: [in] dc structure
+ *
+ * Calls dc_is_dmub_outbox_supported to check if dmub fw supports outbox
+ * notifications. All DMs shall switch to dc_is_dmub_outbox_supported. This
+ * API shall be removed after switching.
+ *
+ * Return:
+ * True if DMUB FW supports outbox notifications, False otherwise
+ */
+bool dc_enable_dmub_notifications(struct dc *dc)
+{
+ return dc_is_dmub_outbox_supported(dc);
+}
+
+/**
+ * dc_enable_dmub_outbox - Enables DMUB unsolicited notification
+ *
+ * @dc: [in] dc structure
+ *
+ * Enables DMUB unsolicited notifications to x86 via outbox.
+ */
+void dc_enable_dmub_outbox(struct dc *dc)
+{
+ struct dc_context *dc_ctx = dc->ctx;
+
+ dmub_enable_outbox_notification(dc_ctx->dmub_srv);
+ DC_LOG_DC("%s: dmub outbox notifications enabled\n", __func__);
}
/**
@@ -3633,7 +5928,6 @@ bool dc_process_dmub_aux_transfer_async(struct dc *dc,
{
uint8_t action;
union dmub_rb_cmd cmd = {0};
- struct dc_dmub_srv *dmub_srv = dc->ctx->dmub_srv;
ASSERT(payload->length <= 16);
@@ -3681,13 +5975,106 @@ bool dc_process_dmub_aux_transfer_async(struct dc *dc,
);
}
- dc_dmub_srv_cmd_queue(dmub_srv, &cmd);
- dc_dmub_srv_cmd_execute(dmub_srv);
- dc_dmub_srv_wait_idle(dmub_srv);
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
return true;
}
+bool dc_smart_power_oled_enable(const struct dc_link *link, bool enable, uint16_t peak_nits,
+ uint8_t debug_control, uint16_t fixed_CLL, uint32_t triggerline)
+{
+ bool status = false;
+ struct dc *dc = link->ctx->dc;
+ union dmub_rb_cmd cmd;
+ uint8_t otg_inst = 0;
+ unsigned int panel_inst = 0;
+ struct pipe_ctx *pipe_ctx = NULL;
+ struct resource_context *res_ctx = &link->ctx->dc->current_state->res_ctx;
+ int i = 0;
+
+ // get panel_inst
+ if (!dc_get_edp_link_panel_inst(dc, link, &panel_inst))
+ return status;
+
+ // get otg_inst
+ for (i = 0; i < MAX_PIPES; i++) {
+ if (res_ctx &&
+ res_ctx->pipe_ctx[i].stream &&
+ res_ctx->pipe_ctx[i].stream->link &&
+ res_ctx->pipe_ctx[i].stream->link == link &&
+ res_ctx->pipe_ctx[i].stream->link->connector_signal == SIGNAL_TYPE_EDP) {
+ pipe_ctx = &res_ctx->pipe_ctx[i];
+ //TODO: refactor for multi edp support
+ break;
+ }
+ }
+
+ if (pipe_ctx)
+ otg_inst = pipe_ctx->stream_res.tg->inst;
+
+ // before enable smart power OLED, we need to call set pipe for DMUB to set ABM config
+ if (enable) {
+ if (dc->hwss.set_pipe && pipe_ctx)
+ dc->hwss.set_pipe(pipe_ctx);
+ }
+
+ // fill in cmd
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.smart_power_oled_enable.header.type = DMUB_CMD__SMART_POWER_OLED;
+ cmd.smart_power_oled_enable.header.sub_type = DMUB_CMD__SMART_POWER_OLED_ENABLE;
+ cmd.smart_power_oled_enable.header.payload_bytes =
+ sizeof(struct dmub_rb_cmd_smart_power_oled_enable_data) - sizeof(struct dmub_cmd_header);
+ cmd.smart_power_oled_enable.header.ret_status = 1;
+ cmd.smart_power_oled_enable.data.enable = enable;
+ cmd.smart_power_oled_enable.data.panel_inst = panel_inst;
+ cmd.smart_power_oled_enable.data.peak_nits = peak_nits;
+ cmd.smart_power_oled_enable.data.otg_inst = otg_inst;
+ cmd.smart_power_oled_enable.data.digfe_inst = link->link_enc->preferred_engine;
+ cmd.smart_power_oled_enable.data.digbe_inst = link->link_enc->transmitter;
+
+ cmd.smart_power_oled_enable.data.debugcontrol = debug_control;
+ cmd.smart_power_oled_enable.data.triggerline = triggerline;
+ cmd.smart_power_oled_enable.data.fixed_max_cll = fixed_CLL;
+
+ // send cmd
+ status = dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+
+ return status;
+}
+
+bool dc_smart_power_oled_get_max_cll(const struct dc_link *link, unsigned int *pCurrent_MaxCLL)
+{
+ struct dc *dc = link->ctx->dc;
+ union dmub_rb_cmd cmd;
+ bool status = false;
+ unsigned int panel_inst = 0;
+
+ // get panel_inst
+ if (!dc_get_edp_link_panel_inst(dc, link, &panel_inst))
+ return status;
+
+ // fill in cmd
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.smart_power_oled_getmaxcll.header.type = DMUB_CMD__SMART_POWER_OLED;
+ cmd.smart_power_oled_getmaxcll.header.sub_type = DMUB_CMD__SMART_POWER_OLED_GETMAXCLL;
+ cmd.smart_power_oled_getmaxcll.header.payload_bytes = sizeof(cmd.smart_power_oled_getmaxcll.data);
+ cmd.smart_power_oled_getmaxcll.header.ret_status = 1;
+
+ cmd.smart_power_oled_getmaxcll.data.input.panel_inst = panel_inst;
+
+ // send cmd and wait for reply
+ status = dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY);
+
+ if (status)
+ *pCurrent_MaxCLL = cmd.smart_power_oled_getmaxcll.data.output.current_max_cll;
+ else
+ *pCurrent_MaxCLL = 0;
+
+ return status;
+}
+
uint8_t get_link_index_from_dpia_port_index(const struct dc *dc,
uint8_t dpia_port_index)
{
@@ -3709,21 +6096,17 @@ uint8_t get_link_index_from_dpia_port_index(const struct dc *dc,
}
/**
- *****************************************************************************
- * Function: dc_process_dmub_set_config_async
+ * dc_process_dmub_set_config_async - Submits set_config command
*
- * @brief
- * Submits set_config command to dmub via inbox message
+ * @dc: [in] dc structure
+ * @link_index: [in] link_index: link index
+ * @payload: [in] aux payload
+ * @notify: [out] set_config immediate reply
*
- * @param
- * [in] dc: dc structure
- * [in] link_index: link index
- * [in] payload: aux payload
- * [out] notify: set_config immediate reply
+ * Submits set_config command to dmub via inbox message.
*
- * @return
- * True if successful, False if failure
- *****************************************************************************
+ * Return:
+ * True if successful, False if failure
*/
bool dc_process_dmub_set_config_async(struct dc *dc,
uint32_t link_index,
@@ -3731,7 +6114,6 @@ bool dc_process_dmub_set_config_async(struct dc *dc,
struct dmub_notification *notify)
{
union dmub_rb_cmd cmd = {0};
- struct dc_dmub_srv *dmub_srv = dc->ctx->dmub_srv;
bool is_cmd_complete = true;
/* prepare SET_CONFIG command */
@@ -3742,7 +6124,7 @@ bool dc_process_dmub_set_config_async(struct dc *dc,
cmd.set_config_access.set_config_control.cmd_pkt.msg_type = payload->msg_type;
cmd.set_config_access.set_config_control.cmd_pkt.msg_data = payload->msg_data;
- if (!dc_dmub_srv_cmd_with_reply_data(dmub_srv, &cmd)) {
+ if (!dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) {
/* command is not processed by dmub */
notify->sc_status = SET_CONFIG_UNKNOWN_ERROR;
return is_cmd_complete;
@@ -3759,21 +6141,17 @@ bool dc_process_dmub_set_config_async(struct dc *dc,
}
/**
- *****************************************************************************
- * Function: dc_process_dmub_set_mst_slots
+ * dc_process_dmub_set_mst_slots - Submits MST solt allocation
*
- * @brief
- * Submits mst slot allocation command to dmub via inbox message
+ * @dc: [in] dc structure
+ * @link_index: [in] link index
+ * @mst_alloc_slots: [in] mst slots to be allotted
+ * @mst_slots_in_use: [out] mst slots in use returned in failure case
*
- * @param
- * [in] dc: dc structure
- * [in] link_index: link index
- * [in] mst_alloc_slots: mst slots to be allotted
- * [out] mst_slots_in_use: mst slots in use returned in failure case
+ * Submits mst slot allocation command to dmub via inbox message
*
- * @return
- * DC_OK if successful, DC_ERROR if failure
- *****************************************************************************
+ * Return:
+ * DC_OK if successful, DC_ERROR if failure
*/
enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc,
uint32_t link_index,
@@ -3781,7 +6159,6 @@ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc,
uint8_t *mst_slots_in_use)
{
union dmub_rb_cmd cmd = {0};
- struct dc_dmub_srv *dmub_srv = dc->ctx->dmub_srv;
/* prepare MST_ALLOC_SLOTS command */
cmd.set_mst_alloc_slots.header.type = DMUB_CMD__DPIA;
@@ -3790,7 +6167,7 @@ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc,
cmd.set_mst_alloc_slots.mst_slots_control.instance = dc->links[link_index]->ddc_hw_inst;
cmd.set_mst_alloc_slots.mst_slots_control.mst_alloc_slots = mst_alloc_slots;
- if (!dc_dmub_srv_cmd_with_reply_data(dmub_srv, &cmd))
+ if (!dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
/* command is not processed by dmub */
return DC_ERROR_UNEXPECTED;
@@ -3813,6 +6190,60 @@ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc,
}
/**
+ * dc_process_dmub_dpia_set_tps_notification - Submits tps notification
+ *
+ * @dc: [in] dc structure
+ * @link_index: [in] link index
+ * @tps: [in] request tps
+ *
+ * Submits set_tps_notification command to dmub via inbox message
+ */
+void dc_process_dmub_dpia_set_tps_notification(const struct dc *dc, uint32_t link_index, uint8_t tps)
+{
+ union dmub_rb_cmd cmd = {0};
+
+ cmd.set_tps_notification.header.type = DMUB_CMD__DPIA;
+ cmd.set_tps_notification.header.sub_type = DMUB_CMD__DPIA_SET_TPS_NOTIFICATION;
+ cmd.set_tps_notification.tps_notification.instance = dc->links[link_index]->ddc_hw_inst;
+ cmd.set_tps_notification.tps_notification.tps = tps;
+
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+/**
+ * dc_process_dmub_dpia_hpd_int_enable - Submits DPIA DPD interruption
+ *
+ * @dc: [in] dc structure
+ * @hpd_int_enable: [in] 1 for hpd int enable, 0 to disable
+ *
+ * Submits dpia hpd int enable command to dmub via inbox message
+ */
+void dc_process_dmub_dpia_hpd_int_enable(const struct dc *dc,
+ uint32_t hpd_int_enable)
+{
+ union dmub_rb_cmd cmd = {0};
+
+ cmd.dpia_hpd_int_enable.header.type = DMUB_CMD__DPIA_HPD_INT_ENABLE;
+ cmd.dpia_hpd_int_enable.enable = hpd_int_enable;
+
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+
+ DC_LOG_DEBUG("%s: hpd_int_enable(%d)\n", __func__, hpd_int_enable);
+}
+
+/**
+ * dc_print_dmub_diagnostic_data - Print DMUB diagnostic data for debugging
+ *
+ * @dc: [in] dc structure
+ *
+ *
+ */
+void dc_print_dmub_diagnostic_data(const struct dc *dc)
+{
+ dc_dmub_srv_log_diagnostic_data(dc->ctx->dmub_srv);
+}
+
+/**
* dc_disable_accelerated_mode - disable accelerated mode
* @dc: dc structure
*/
@@ -3823,16 +6254,13 @@ void dc_disable_accelerated_mode(struct dc *dc)
/**
- *****************************************************************************
- * dc_notify_vsync_int_state() - notifies vsync enable/disable state
+ * dc_notify_vsync_int_state - notifies vsync enable/disable state
* @dc: dc structure
- * @stream: stream where vsync int state changed
- * @enable: whether vsync is enabled or disabled
- *
- * Called when vsync is enabled/disabled
- * Will notify DMUB to start/stop ABM interrupts after steady state is reached
+ * @stream: stream where vsync int state changed
+ * @enable: whether vsync is enabled or disabled
*
- *****************************************************************************
+ * Called when vsync is enabled/disabled Will notify DMUB to start/stop ABM
+ * interrupts after steady state is reached.
*/
void dc_notify_vsync_int_state(struct dc *dc, struct dc_stream_state *stream, bool enable)
{
@@ -3846,6 +6274,9 @@ void dc_notify_vsync_int_state(struct dc *dc, struct dc_stream_state *stream, bo
if (link->psr_settings.psr_feature_enabled)
return;
+ if (link->replay_settings.replay_feature_enabled)
+ return;
+
/*find primary pipe associated with stream*/
for (i = 0; i < MAX_PIPES; i++) {
pipe = &dc->current_state->res_ctx.pipe_ctx[i];
@@ -3859,7 +6290,7 @@ void dc_notify_vsync_int_state(struct dc *dc, struct dc_stream_state *stream, bo
return;
}
- get_edp_links(dc, edp_links, &edp_num);
+ dc_get_edp_links(dc, edp_links, &edp_num);
/* Determine panel inst */
for (i = 0; i < edp_num; i++) {
@@ -3874,3 +6305,789 @@ void dc_notify_vsync_int_state(struct dc *dc, struct dc_stream_state *stream, bo
if (pipe->stream_res.abm && pipe->stream_res.abm->funcs->set_abm_pause)
pipe->stream_res.abm->funcs->set_abm_pause(pipe->stream_res.abm, !enable, i, pipe->stream_res.tg->inst);
}
+
+/*****************************************************************************
+ * dc_abm_save_restore() - Interface to DC for save+pause and restore+un-pause
+ * ABM
+ * @dc: dc structure
+ * @stream: stream where vsync int state changed
+ * @pData: abm hw states
+ *
+ ****************************************************************************/
+bool dc_abm_save_restore(
+ struct dc *dc,
+ struct dc_stream_state *stream,
+ struct abm_save_restore *pData)
+{
+ int i;
+ int edp_num;
+ struct pipe_ctx *pipe = NULL;
+ struct dc_link *link = stream->sink->link;
+ struct dc_link *edp_links[MAX_NUM_EDP];
+
+ if (link->replay_settings.replay_feature_enabled)
+ return false;
+
+ /*find primary pipe associated with stream*/
+ for (i = 0; i < MAX_PIPES; i++) {
+ pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+
+ if (pipe->stream == stream && pipe->stream_res.tg)
+ break;
+ }
+
+ if (i == MAX_PIPES) {
+ ASSERT(0);
+ return false;
+ }
+
+ dc_get_edp_links(dc, edp_links, &edp_num);
+
+ /* Determine panel inst */
+ for (i = 0; i < edp_num; i++)
+ if (edp_links[i] == link)
+ break;
+
+ if (i == edp_num)
+ return false;
+
+ if (pipe->stream_res.abm &&
+ pipe->stream_res.abm->funcs->save_restore)
+ return pipe->stream_res.abm->funcs->save_restore(
+ pipe->stream_res.abm,
+ i,
+ pData);
+ return false;
+}
+
+void dc_query_current_properties(struct dc *dc, struct dc_current_properties *properties)
+{
+ unsigned int i;
+ unsigned int max_cursor_size = dc->caps.max_cursor_size;
+ unsigned int stream_cursor_size;
+
+ if (dc->debug.allow_sw_cursor_fallback && dc->res_pool->funcs->get_max_hw_cursor_size) {
+ for (i = 0; i < dc->current_state->stream_count; i++) {
+ stream_cursor_size = dc->res_pool->funcs->get_max_hw_cursor_size(dc,
+ dc->current_state,
+ dc->current_state->streams[i]);
+
+ if (stream_cursor_size < max_cursor_size) {
+ max_cursor_size = stream_cursor_size;
+ }
+ }
+ }
+
+ properties->cursor_size_limit = max_cursor_size;
+}
+
+/**
+ * dc_set_edp_power() - DM controls eDP power to be ON/OFF
+ *
+ * Called when DM wants to power on/off eDP.
+ * Only work on links with flag skip_implict_edp_power_control is set.
+ *
+ * @dc: Current DC state
+ * @edp_link: a link with eDP connector signal type
+ * @powerOn: power on/off eDP
+ *
+ * Return: void
+ */
+void dc_set_edp_power(const struct dc *dc, struct dc_link *edp_link,
+ bool powerOn)
+{
+ if (edp_link->connector_signal != SIGNAL_TYPE_EDP)
+ return;
+
+ if (edp_link->skip_implict_edp_power_control == false)
+ return;
+
+ edp_link->dc->link_srv->edp_set_panel_power(edp_link, powerOn);
+}
+
+/**
+ * dc_get_power_profile_for_dc_state() - extracts power profile from dc state
+ *
+ * Called when DM wants to make power policy decisions based on dc_state
+ *
+ * @context: Pointer to the dc_state from which the power profile is extracted.
+ *
+ * Return: The power profile structure containing the power level information.
+ */
+struct dc_power_profile dc_get_power_profile_for_dc_state(const struct dc_state *context)
+{
+ struct dc_power_profile profile = { 0 };
+
+ profile.power_level = !context->bw_ctx.bw.dcn.clk.p_state_change_support;
+ if (!context->clk_mgr || !context->clk_mgr->ctx || !context->clk_mgr->ctx->dc)
+ return profile;
+ struct dc *dc = context->clk_mgr->ctx->dc;
+
+ if (dc->res_pool->funcs->get_power_profile)
+ profile.power_level = dc->res_pool->funcs->get_power_profile(context);
+ return profile;
+}
+
+/**
+ * dc_get_det_buffer_size_from_state() - extracts detile buffer size from dc state
+ *
+ * This function is called to log the detile buffer size from the dc_state.
+ *
+ * @context: a pointer to the dc_state from which the detile buffer size is extracted.
+ *
+ * Return: the size of the detile buffer, or 0 if not available.
+ */
+unsigned int dc_get_det_buffer_size_from_state(const struct dc_state *context)
+{
+ struct dc *dc = context->clk_mgr->ctx->dc;
+
+ if (dc->res_pool->funcs->get_det_buffer_size)
+ return dc->res_pool->funcs->get_det_buffer_size(context);
+ else
+ return 0;
+}
+
+/**
+ * dc_get_host_router_index: Get index of host router from a dpia link
+ *
+ * This function return a host router index of the target link. If the target link is dpia link.
+ *
+ * @link: Pointer to the target link (input)
+ * @host_router_index: Pointer to store the host router index of the target link (output).
+ *
+ * Return: true if the host router index is found and valid.
+ *
+ */
+bool dc_get_host_router_index(const struct dc_link *link, unsigned int *host_router_index)
+{
+ struct dc *dc;
+
+ if (!link || !host_router_index || link->ep_type != DISPLAY_ENDPOINT_USB4_DPIA)
+ return false;
+
+ dc = link->ctx->dc;
+
+ if (link->link_index < dc->lowest_dpia_link_index)
+ return false;
+
+ *host_router_index = (link->link_index - dc->lowest_dpia_link_index) / dc->caps.num_of_dpias_per_host_router;
+ if (*host_router_index < dc->caps.num_of_host_routers)
+ return true;
+ else
+ return false;
+}
+
+bool dc_is_cursor_limit_pending(struct dc *dc)
+{
+ uint32_t i;
+
+ for (i = 0; i < dc->current_state->stream_count; i++) {
+ if (dc_stream_is_cursor_limit_pending(dc, dc->current_state->streams[i]))
+ return true;
+ }
+
+ return false;
+}
+
+bool dc_can_clear_cursor_limit(const struct dc *dc)
+{
+ uint32_t i;
+
+ for (i = 0; i < dc->current_state->stream_count; i++) {
+ if (dc_state_can_clear_stream_cursor_subvp_limit(dc->current_state->streams[i], dc->current_state))
+ return true;
+ }
+
+ return false;
+}
+
+void dc_get_underflow_debug_data_for_otg(struct dc *dc, int primary_otg_inst,
+ struct dc_underflow_debug_data *out_data)
+{
+ struct timing_generator *tg = NULL;
+
+ for (int i = 0; i < MAX_PIPES; i++) {
+ if (dc->res_pool->timing_generators[i] &&
+ dc->res_pool->timing_generators[i]->inst == primary_otg_inst) {
+ tg = dc->res_pool->timing_generators[i];
+ break;
+ }
+ }
+
+ dc_exit_ips_for_hw_access(dc);
+ if (dc->hwss.get_underflow_debug_data)
+ dc->hwss.get_underflow_debug_data(dc, tg, out_data);
+}
+
+void dc_get_power_feature_status(struct dc *dc, int primary_otg_inst,
+ struct power_features *out_data)
+{
+ out_data->uclk_p_state = dc->current_state->clk_mgr->clks.p_state_change_support;
+ out_data->fams = dc->current_state->bw_ctx.bw.dcn.clk.fw_based_mclk_switching;
+}
+
+bool dc_capture_register_software_state(struct dc *dc, struct dc_register_software_state *state)
+{
+ struct dc_state *context;
+ struct resource_context *res_ctx;
+ int i;
+
+ if (!dc || !dc->current_state || !state) {
+ if (state)
+ state->state_valid = false;
+ return false;
+ }
+
+ /* Initialize the state structure */
+ memset(state, 0, sizeof(struct dc_register_software_state));
+
+ context = dc->current_state;
+ res_ctx = &context->res_ctx;
+
+ /* Count active pipes and streams */
+ state->active_pipe_count = 0;
+ state->active_stream_count = context->stream_count;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (res_ctx->pipe_ctx[i].stream)
+ state->active_pipe_count++;
+ }
+
+ /* Capture HUBP programming state for each pipe */
+ for (i = 0; i < MAX_PIPES && i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i];
+
+ state->hubp[i].valid_stream = false;
+ if (!pipe_ctx->stream)
+ continue;
+
+ state->hubp[i].valid_stream = true;
+
+ /* HUBP register programming variables */
+ if (pipe_ctx->stream_res.tg)
+ state->hubp[i].vtg_sel = pipe_ctx->stream_res.tg->inst;
+
+ state->hubp[i].hubp_clock_enable = (pipe_ctx->plane_res.hubp != NULL) ? 1 : 0;
+
+ state->hubp[i].valid_plane_state = false;
+ if (pipe_ctx->plane_state) {
+ state->hubp[i].valid_plane_state = true;
+ state->hubp[i].surface_pixel_format = pipe_ctx->plane_state->format;
+ state->hubp[i].rotation_angle = pipe_ctx->plane_state->rotation;
+ state->hubp[i].h_mirror_en = pipe_ctx->plane_state->horizontal_mirror ? 1 : 0;
+
+ /* Surface size */
+ if (pipe_ctx->plane_state->plane_size.surface_size.width > 0) {
+ state->hubp[i].surface_size_width = pipe_ctx->plane_state->plane_size.surface_size.width;
+ state->hubp[i].surface_size_height = pipe_ctx->plane_state->plane_size.surface_size.height;
+ }
+
+ /* Viewport dimensions from scaler data */
+ if (pipe_ctx->plane_state->src_rect.width > 0) {
+ state->hubp[i].pri_viewport_width = pipe_ctx->plane_state->src_rect.width;
+ state->hubp[i].pri_viewport_height = pipe_ctx->plane_state->src_rect.height;
+ state->hubp[i].pri_viewport_x_start = pipe_ctx->plane_state->src_rect.x;
+ state->hubp[i].pri_viewport_y_start = pipe_ctx->plane_state->src_rect.y;
+ }
+
+ /* DCC settings */
+ state->hubp[i].surface_dcc_en = (pipe_ctx->plane_state->dcc.enable) ? 1 : 0;
+ state->hubp[i].surface_dcc_ind_64b_blk = pipe_ctx->plane_state->dcc.independent_64b_blks;
+ state->hubp[i].surface_dcc_ind_128b_blk = pipe_ctx->plane_state->dcc.dcc_ind_blk;
+
+ /* Surface pitch */
+ state->hubp[i].surface_pitch = pipe_ctx->plane_state->plane_size.surface_pitch;
+ state->hubp[i].meta_pitch = pipe_ctx->plane_state->dcc.meta_pitch;
+ state->hubp[i].chroma_pitch = pipe_ctx->plane_state->plane_size.chroma_pitch;
+ state->hubp[i].meta_pitch_c = pipe_ctx->plane_state->dcc.meta_pitch_c;
+
+ /* Surface addresses - primary */
+ state->hubp[i].primary_surface_address_low = pipe_ctx->plane_state->address.grph.addr.low_part;
+ state->hubp[i].primary_surface_address_high = pipe_ctx->plane_state->address.grph.addr.high_part;
+ state->hubp[i].primary_meta_surface_address_low = pipe_ctx->plane_state->address.grph.meta_addr.low_part;
+ state->hubp[i].primary_meta_surface_address_high = pipe_ctx->plane_state->address.grph.meta_addr.high_part;
+
+ /* TMZ settings */
+ state->hubp[i].primary_surface_tmz = pipe_ctx->plane_state->address.tmz_surface;
+ state->hubp[i].primary_meta_surface_tmz = pipe_ctx->plane_state->address.tmz_surface;
+
+ /* Tiling configuration */
+ state->hubp[i].min_dc_gfx_version9 = false;
+ if (pipe_ctx->plane_state->tiling_info.gfxversion >= DcGfxVersion9) {
+ state->hubp[i].min_dc_gfx_version9 = true;
+ state->hubp[i].sw_mode = pipe_ctx->plane_state->tiling_info.gfx9.swizzle;
+ state->hubp[i].num_pipes = pipe_ctx->plane_state->tiling_info.gfx9.num_pipes;
+ state->hubp[i].num_banks = pipe_ctx->plane_state->tiling_info.gfx9.num_banks;
+ state->hubp[i].pipe_interleave = pipe_ctx->plane_state->tiling_info.gfx9.pipe_interleave;
+ state->hubp[i].num_shader_engines = pipe_ctx->plane_state->tiling_info.gfx9.num_shader_engines;
+ state->hubp[i].num_rb_per_se = pipe_ctx->plane_state->tiling_info.gfx9.num_rb_per_se;
+ state->hubp[i].num_pkrs = pipe_ctx->plane_state->tiling_info.gfx9.num_pkrs;
+ }
+ }
+
+ /* DML Request Size Configuration */
+ if (pipe_ctx->rq_regs.rq_regs_l.chunk_size > 0) {
+ state->hubp[i].rq_chunk_size = pipe_ctx->rq_regs.rq_regs_l.chunk_size;
+ state->hubp[i].rq_min_chunk_size = pipe_ctx->rq_regs.rq_regs_l.min_chunk_size;
+ state->hubp[i].rq_meta_chunk_size = pipe_ctx->rq_regs.rq_regs_l.meta_chunk_size;
+ state->hubp[i].rq_min_meta_chunk_size = pipe_ctx->rq_regs.rq_regs_l.min_meta_chunk_size;
+ state->hubp[i].rq_dpte_group_size = pipe_ctx->rq_regs.rq_regs_l.dpte_group_size;
+ state->hubp[i].rq_mpte_group_size = pipe_ctx->rq_regs.rq_regs_l.mpte_group_size;
+ state->hubp[i].rq_swath_height_l = pipe_ctx->rq_regs.rq_regs_l.swath_height;
+ state->hubp[i].rq_pte_row_height_l = pipe_ctx->rq_regs.rq_regs_l.pte_row_height_linear;
+ }
+
+ /* Chroma request size configuration */
+ if (pipe_ctx->rq_regs.rq_regs_c.chunk_size > 0) {
+ state->hubp[i].rq_chunk_size_c = pipe_ctx->rq_regs.rq_regs_c.chunk_size;
+ state->hubp[i].rq_min_chunk_size_c = pipe_ctx->rq_regs.rq_regs_c.min_chunk_size;
+ state->hubp[i].rq_meta_chunk_size_c = pipe_ctx->rq_regs.rq_regs_c.meta_chunk_size;
+ state->hubp[i].rq_min_meta_chunk_size_c = pipe_ctx->rq_regs.rq_regs_c.min_meta_chunk_size;
+ state->hubp[i].rq_dpte_group_size_c = pipe_ctx->rq_regs.rq_regs_c.dpte_group_size;
+ state->hubp[i].rq_mpte_group_size_c = pipe_ctx->rq_regs.rq_regs_c.mpte_group_size;
+ state->hubp[i].rq_swath_height_c = pipe_ctx->rq_regs.rq_regs_c.swath_height;
+ state->hubp[i].rq_pte_row_height_c = pipe_ctx->rq_regs.rq_regs_c.pte_row_height_linear;
+ }
+
+ /* DML expansion modes */
+ state->hubp[i].drq_expansion_mode = pipe_ctx->rq_regs.drq_expansion_mode;
+ state->hubp[i].prq_expansion_mode = pipe_ctx->rq_regs.prq_expansion_mode;
+ state->hubp[i].mrq_expansion_mode = pipe_ctx->rq_regs.mrq_expansion_mode;
+ state->hubp[i].crq_expansion_mode = pipe_ctx->rq_regs.crq_expansion_mode;
+
+ /* DML DLG parameters - nominal */
+ state->hubp[i].dst_y_per_vm_vblank = pipe_ctx->dlg_regs.dst_y_per_vm_vblank;
+ state->hubp[i].dst_y_per_row_vblank = pipe_ctx->dlg_regs.dst_y_per_row_vblank;
+ state->hubp[i].dst_y_per_vm_flip = pipe_ctx->dlg_regs.dst_y_per_vm_flip;
+ state->hubp[i].dst_y_per_row_flip = pipe_ctx->dlg_regs.dst_y_per_row_flip;
+
+ /* DML prefetch settings */
+ state->hubp[i].dst_y_prefetch = pipe_ctx->dlg_regs.dst_y_prefetch;
+ state->hubp[i].vratio_prefetch = pipe_ctx->dlg_regs.vratio_prefetch;
+ state->hubp[i].vratio_prefetch_c = pipe_ctx->dlg_regs.vratio_prefetch_c;
+
+ /* TTU parameters */
+ state->hubp[i].qos_level_low_wm = pipe_ctx->ttu_regs.qos_level_low_wm;
+ state->hubp[i].qos_level_high_wm = pipe_ctx->ttu_regs.qos_level_high_wm;
+ state->hubp[i].qos_level_flip = pipe_ctx->ttu_regs.qos_level_flip;
+ state->hubp[i].min_ttu_vblank = pipe_ctx->ttu_regs.min_ttu_vblank;
+ }
+
+ /* Capture HUBBUB programming state */
+ if (dc->res_pool->hubbub) {
+ /* Individual DET buffer sizes - software state variables that program DET registers */
+ for (i = 0; i < 4 && i < dc->res_pool->pipe_count; i++) {
+ uint32_t det_size = res_ctx->pipe_ctx[i].det_buffer_size_kb;
+ switch (i) {
+ case 0:
+ state->hubbub.det0_size = det_size;
+ break;
+ case 1:
+ state->hubbub.det1_size = det_size;
+ break;
+ case 2:
+ state->hubbub.det2_size = det_size;
+ break;
+ case 3:
+ state->hubbub.det3_size = det_size;
+ break;
+ }
+ }
+
+ /* Compression buffer configuration - software state that programs COMPBUF_SIZE register */
+ // TODO: Handle logic for legacy DCN pre-DCN401
+ state->hubbub.compbuf_size = context->bw_ctx.bw.dcn.arb_regs.compbuf_size;
+ }
+
+ /* Capture DPP programming state for each pipe */
+ for (i = 0; i < MAX_PIPES && i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i];
+
+ if (!pipe_ctx->stream)
+ continue;
+
+ state->dpp[i].dpp_clock_enable = (pipe_ctx->plane_res.dpp != NULL) ? 1 : 0;
+
+ if (pipe_ctx->plane_state && pipe_ctx->plane_res.scl_data.recout.width > 0) {
+ /* Access dscl_prog_data directly - this contains the actual software state used for register programming */
+ struct dscl_prog_data *dscl_data = &pipe_ctx->plane_res.scl_data.dscl_prog_data;
+
+ /* Recout (Rectangle of Interest) configuration - software state that programs RECOUT registers */
+ state->dpp[i].recout_start_x = dscl_data->recout.x;
+ state->dpp[i].recout_start_y = dscl_data->recout.y;
+ state->dpp[i].recout_width = dscl_data->recout.width;
+ state->dpp[i].recout_height = dscl_data->recout.height;
+
+ /* MPC (Multiple Pipe/Plane Combiner) size - software state that programs MPC_SIZE registers */
+ state->dpp[i].mpc_width = dscl_data->mpc_size.width;
+ state->dpp[i].mpc_height = dscl_data->mpc_size.height;
+
+ /* DSCL mode - software state that programs SCL_MODE registers */
+ state->dpp[i].dscl_mode = dscl_data->dscl_mode;
+
+ /* Scaler ratios - software state that programs scale ratio registers (use actual programmed ratios) */
+ state->dpp[i].horz_ratio_int = dscl_data->ratios.h_scale_ratio >> 19; // Extract integer part from programmed ratio
+ state->dpp[i].vert_ratio_int = dscl_data->ratios.v_scale_ratio >> 19; // Extract integer part from programmed ratio
+
+ /* Basic scaler taps - software state that programs tap control registers (use actual programmed taps) */
+ state->dpp[i].h_taps = dscl_data->taps.h_taps + 1; // dscl_prog_data.taps stores (taps - 1), so add 1 back
+ state->dpp[i].v_taps = dscl_data->taps.v_taps + 1; // dscl_prog_data.taps stores (taps - 1), so add 1 back
+ }
+ }
+
+ /* Capture essential clock state for underflow analysis */
+ if (dc->clk_mgr && dc->clk_mgr->clks.dispclk_khz > 0) {
+ /* Core display clocks affecting bandwidth and timing */
+ state->dccg.dispclk_khz = dc->clk_mgr->clks.dispclk_khz;
+
+ /* Per-pipe clock configuration - only capture what's essential */
+ for (i = 0; i < MAX_PIPES && i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i];
+ if (pipe_ctx->stream) {
+ /* Essential clocks that directly affect underflow risk */
+ state->dccg.dppclk_khz[i] = dc->clk_mgr->clks.dppclk_khz;
+ state->dccg.pixclk_khz[i] = pipe_ctx->stream->timing.pix_clk_100hz / 10;
+ state->dccg.dppclk_enable[i] = 1;
+
+ /* DP stream clock only for DP signals */
+ if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT ||
+ pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
+ state->dccg.dpstreamclk_enable[i] = 1;
+ } else {
+ state->dccg.dpstreamclk_enable[i] = 0;
+ }
+ } else {
+ /* Inactive pipe - no clocks */
+ state->dccg.dppclk_khz[i] = 0;
+ state->dccg.pixclk_khz[i] = 0;
+ state->dccg.dppclk_enable[i] = 0;
+ if (i < 4) {
+ state->dccg.dpstreamclk_enable[i] = 0;
+ }
+ }
+ }
+
+ /* DSC clock state - only when actually using DSC */
+ for (i = 0; i < MAX_PIPES; i++) {
+ struct pipe_ctx *pipe_ctx = (i < dc->res_pool->pipe_count) ? &res_ctx->pipe_ctx[i] : NULL;
+ if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->timing.dsc_cfg.num_slices_h > 0) {
+ state->dccg.dscclk_khz[i] = 400000; /* Typical DSC clock frequency */
+ } else {
+ state->dccg.dscclk_khz[i] = 0;
+ }
+ }
+
+ /* SYMCLK32 LE Control - only the essential HPO state for underflow analysis */
+ for (i = 0; i < 2; i++) {
+ state->dccg.symclk32_le_enable[i] = 0; /* Default: disabled */
+ }
+
+ }
+
+ /* Capture essential DSC configuration for underflow analysis */
+ for (i = 0; i < MAX_PIPES && i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i];
+
+ if (pipe_ctx->stream && pipe_ctx->stream->timing.dsc_cfg.num_slices_h > 0) {
+ /* DSC is enabled - capture essential configuration */
+ state->dsc[i].dsc_clock_enable = 1;
+
+ /* DSC configuration affecting bandwidth and timing */
+ struct dc_dsc_config *dsc_cfg = &pipe_ctx->stream->timing.dsc_cfg;
+ state->dsc[i].dsc_num_slices_h = dsc_cfg->num_slices_h;
+ state->dsc[i].dsc_num_slices_v = dsc_cfg->num_slices_v;
+ state->dsc[i].dsc_bits_per_pixel = dsc_cfg->bits_per_pixel;
+
+ /* OPP pipe source for DSC forwarding */
+ if (pipe_ctx->stream_res.opp) {
+ state->dsc[i].dscrm_dsc_forward_enable = 1;
+ state->dsc[i].dscrm_dsc_opp_pipe_source = pipe_ctx->stream_res.opp->inst;
+ } else {
+ state->dsc[i].dscrm_dsc_forward_enable = 0;
+ state->dsc[i].dscrm_dsc_opp_pipe_source = 0;
+ }
+ } else {
+ /* DSC not enabled - clear all fields */
+ memset(&state->dsc[i], 0, sizeof(state->dsc[i]));
+ }
+ }
+
+ /* Capture MPC programming state - comprehensive register field coverage */
+ for (i = 0; i < MAX_PIPES && i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i];
+
+ if (pipe_ctx->plane_state && pipe_ctx->stream) {
+ struct dc_plane_state *plane_state = pipe_ctx->plane_state;
+
+ /* MPCC blending tree and mode control - capture actual blend configuration */
+ state->mpc.mpcc_mode[i] = (plane_state->blend_tf.type != TF_TYPE_BYPASS) ? 1 : 0;
+ state->mpc.mpcc_alpha_blend_mode[i] = plane_state->per_pixel_alpha ? 1 : 0;
+ state->mpc.mpcc_alpha_multiplied_mode[i] = plane_state->pre_multiplied_alpha ? 1 : 0;
+ state->mpc.mpcc_blnd_active_overlap_only[i] = 0; /* Default - no overlap restriction */
+ state->mpc.mpcc_global_alpha[i] = plane_state->global_alpha_value;
+ state->mpc.mpcc_global_gain[i] = plane_state->global_alpha ? 255 : 0;
+ state->mpc.mpcc_bg_bpc[i] = 8; /* Standard 8-bit background */
+ state->mpc.mpcc_bot_gain_mode[i] = 0; /* Standard gain mode */
+
+ /* MPCC blending tree connections - capture tree topology */
+ if (pipe_ctx->bottom_pipe) {
+ state->mpc.mpcc_bot_sel[i] = pipe_ctx->bottom_pipe->pipe_idx;
+ } else {
+ state->mpc.mpcc_bot_sel[i] = 0xF; /* No bottom connection */
+ }
+ state->mpc.mpcc_top_sel[i] = pipe_ctx->pipe_idx; /* This pipe's DPP ID */
+
+ /* MPCC output gamma control - capture gamma programming */
+ if (plane_state->gamma_correction.type != GAMMA_CS_TFM_1D && plane_state->gamma_correction.num_entries > 0) {
+ state->mpc.mpcc_ogam_mode[i] = 1; /* Gamma enabled */
+ state->mpc.mpcc_ogam_select[i] = 0; /* Bank A selection */
+ state->mpc.mpcc_ogam_pwl_disable[i] = 0; /* PWL enabled */
+ } else {
+ state->mpc.mpcc_ogam_mode[i] = 0; /* Bypass mode */
+ state->mpc.mpcc_ogam_select[i] = 0;
+ state->mpc.mpcc_ogam_pwl_disable[i] = 1; /* PWL disabled */
+ }
+
+ /* MPCC pipe assignment and operational status */
+ if (pipe_ctx->stream_res.opp) {
+ state->mpc.mpcc_opp_id[i] = pipe_ctx->stream_res.opp->inst;
+ } else {
+ state->mpc.mpcc_opp_id[i] = 0xF; /* No OPP assignment */
+ }
+
+ /* MPCC status indicators - active pipe state */
+ state->mpc.mpcc_idle[i] = 0; /* Active pipe - not idle */
+ state->mpc.mpcc_busy[i] = 1; /* Active pipe - busy processing */
+
+ } else {
+ /* Pipe not active - set disabled/idle state for all fields */
+ state->mpc.mpcc_mode[i] = 0;
+ state->mpc.mpcc_alpha_blend_mode[i] = 0;
+ state->mpc.mpcc_alpha_multiplied_mode[i] = 0;
+ state->mpc.mpcc_blnd_active_overlap_only[i] = 0;
+ state->mpc.mpcc_global_alpha[i] = 0;
+ state->mpc.mpcc_global_gain[i] = 0;
+ state->mpc.mpcc_bg_bpc[i] = 0;
+ state->mpc.mpcc_bot_gain_mode[i] = 0;
+ state->mpc.mpcc_bot_sel[i] = 0xF; /* No bottom connection */
+ state->mpc.mpcc_top_sel[i] = 0xF; /* No top connection */
+ state->mpc.mpcc_ogam_mode[i] = 0; /* Bypass */
+ state->mpc.mpcc_ogam_select[i] = 0;
+ state->mpc.mpcc_ogam_pwl_disable[i] = 1; /* PWL disabled */
+ state->mpc.mpcc_opp_id[i] = 0xF; /* No OPP assignment */
+ state->mpc.mpcc_idle[i] = 1; /* Idle */
+ state->mpc.mpcc_busy[i] = 0; /* Not busy */
+ }
+ }
+
+ /* Capture OPP programming state for each pipe - comprehensive register field coverage */
+ for (i = 0; i < MAX_PIPES && i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i];
+
+ if (!pipe_ctx->stream)
+ continue;
+
+ if (pipe_ctx->stream_res.opp) {
+ struct dc_crtc_timing *timing = &pipe_ctx->stream->timing;
+
+ /* OPP Pipe Control */
+ state->opp[i].opp_pipe_clock_enable = 1; /* Active pipe has clock enabled */
+
+ /* Display Pattern Generator (DPG) Control - 19 fields */
+ if (pipe_ctx->stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE) {
+ state->opp[i].dpg_enable = 1;
+ } else {
+ /* Video mode - DPG disabled */
+ state->opp[i].dpg_enable = 0;
+ }
+
+ /* Format Control (FMT) - 18 fields */
+ state->opp[i].fmt_pixel_encoding = timing->pixel_encoding;
+
+ /* Chroma subsampling mode based on pixel encoding */
+ if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) {
+ state->opp[i].fmt_subsampling_mode = 1; /* 4:2:0 subsampling */
+ } else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) {
+ state->opp[i].fmt_subsampling_mode = 2; /* 4:2:2 subsampling */
+ } else {
+ state->opp[i].fmt_subsampling_mode = 0; /* No subsampling (4:4:4) */
+ }
+
+ state->opp[i].fmt_cbcr_bit_reduction_bypass = (timing->pixel_encoding == PIXEL_ENCODING_RGB) ? 1 : 0;
+ state->opp[i].fmt_stereosync_override = (timing->timing_3d_format != TIMING_3D_FORMAT_NONE) ? 1 : 0;
+
+ /* Dithering control based on bit depth */
+ if (timing->display_color_depth < COLOR_DEPTH_121212) {
+ state->opp[i].fmt_spatial_dither_frame_counter_max = 15; /* Typical frame counter max */
+ state->opp[i].fmt_spatial_dither_frame_counter_bit_swap = 0; /* No bit swapping */
+ state->opp[i].fmt_spatial_dither_enable = 1;
+ state->opp[i].fmt_spatial_dither_mode = 0; /* Spatial dithering mode */
+ state->opp[i].fmt_spatial_dither_depth = timing->display_color_depth;
+ state->opp[i].fmt_temporal_dither_enable = 0; /* Spatial dithering preferred */
+ } else {
+ state->opp[i].fmt_spatial_dither_frame_counter_max = 0;
+ state->opp[i].fmt_spatial_dither_frame_counter_bit_swap = 0;
+ state->opp[i].fmt_spatial_dither_enable = 0;
+ state->opp[i].fmt_spatial_dither_mode = 0;
+ state->opp[i].fmt_spatial_dither_depth = 0;
+ state->opp[i].fmt_temporal_dither_enable = 0;
+ }
+
+ /* Truncation control for bit depth reduction */
+ if (timing->display_color_depth < COLOR_DEPTH_121212) {
+ state->opp[i].fmt_truncate_enable = 1;
+ state->opp[i].fmt_truncate_depth = timing->display_color_depth;
+ state->opp[i].fmt_truncate_mode = 0; /* Round mode */
+ } else {
+ state->opp[i].fmt_truncate_enable = 0;
+ state->opp[i].fmt_truncate_depth = 0;
+ state->opp[i].fmt_truncate_mode = 0;
+ }
+
+ /* Data clamping control */
+ state->opp[i].fmt_clamp_data_enable = 1; /* Clamping typically enabled */
+ state->opp[i].fmt_clamp_color_format = timing->pixel_encoding;
+
+ /* Dynamic expansion for limited range content */
+ if (timing->pixel_encoding != PIXEL_ENCODING_RGB) {
+ state->opp[i].fmt_dynamic_exp_enable = 1; /* YCbCr typically needs expansion */
+ state->opp[i].fmt_dynamic_exp_mode = 0; /* Standard expansion */
+ } else {
+ state->opp[i].fmt_dynamic_exp_enable = 0; /* RGB typically full range */
+ state->opp[i].fmt_dynamic_exp_mode = 0;
+ }
+
+ /* Legacy field for compatibility */
+ state->opp[i].fmt_bit_depth_control = timing->display_color_depth;
+
+ /* Output Buffer (OPPBUF) Control - 6 fields */
+ state->opp[i].oppbuf_active_width = timing->h_addressable;
+ state->opp[i].oppbuf_pixel_repetition = 0; /* No pixel repetition by default */
+
+ /* Multi-Stream Output (MSO) / ODM segmentation */
+ if (pipe_ctx->next_odm_pipe) {
+ state->opp[i].oppbuf_display_segmentation = 1; /* Segmented display */
+ state->opp[i].oppbuf_overlap_pixel_num = 0; /* ODM overlap pixels */
+ } else {
+ state->opp[i].oppbuf_display_segmentation = 0; /* Single segment */
+ state->opp[i].oppbuf_overlap_pixel_num = 0;
+ }
+
+ /* 3D/Stereo control */
+ if (timing->timing_3d_format != TIMING_3D_FORMAT_NONE) {
+ state->opp[i].oppbuf_3d_vact_space1_size = 30; /* Typical stereo blanking */
+ state->opp[i].oppbuf_3d_vact_space2_size = 30;
+ } else {
+ state->opp[i].oppbuf_3d_vact_space1_size = 0;
+ state->opp[i].oppbuf_3d_vact_space2_size = 0;
+ }
+
+ /* DSC Forward Config - 3 fields */
+ if (timing->dsc_cfg.num_slices_h > 0) {
+ state->opp[i].dscrm_dsc_forward_enable = 1;
+ state->opp[i].dscrm_dsc_opp_pipe_source = pipe_ctx->stream_res.opp->inst;
+ state->opp[i].dscrm_dsc_forward_enable_status = 1; /* Status follows enable */
+ } else {
+ state->opp[i].dscrm_dsc_forward_enable = 0;
+ state->opp[i].dscrm_dsc_opp_pipe_source = 0;
+ state->opp[i].dscrm_dsc_forward_enable_status = 0;
+ }
+ } else {
+ /* No OPP resource - set all fields to disabled state */
+ memset(&state->opp[i], 0, sizeof(state->opp[i]));
+ }
+ }
+
+ /* Capture OPTC programming state for each pipe - comprehensive register field coverage */
+ for (i = 0; i < MAX_PIPES && i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i];
+
+ if (!pipe_ctx->stream)
+ continue;
+
+ if (pipe_ctx->stream_res.tg) {
+ struct dc_crtc_timing *timing = &pipe_ctx->stream->timing;
+
+ state->optc[i].otg_master_inst = pipe_ctx->stream_res.tg->inst;
+
+ /* OTG_CONTROL register - 5 fields */
+ state->optc[i].otg_master_enable = 1; /* Active stream */
+ state->optc[i].otg_disable_point_cntl = 0; /* Normal operation */
+ state->optc[i].otg_start_point_cntl = 0; /* Normal start */
+ state->optc[i].otg_field_number_cntl = (timing->flags.INTERLACE) ? 1 : 0;
+ state->optc[i].otg_out_mux = 0; /* Direct output */
+
+ /* OTG Horizontal Timing - 7 fields */
+ state->optc[i].otg_h_total = timing->h_total;
+ state->optc[i].otg_h_blank_start = timing->h_addressable;
+ state->optc[i].otg_h_blank_end = timing->h_total - timing->h_front_porch;
+ state->optc[i].otg_h_sync_start = timing->h_addressable + timing->h_front_porch;
+ state->optc[i].otg_h_sync_end = timing->h_addressable + timing->h_front_porch + timing->h_sync_width;
+ state->optc[i].otg_h_sync_polarity = timing->flags.HSYNC_POSITIVE_POLARITY ? 0 : 1;
+ state->optc[i].otg_h_timing_div_mode = (pipe_ctx->next_odm_pipe) ? 1 : 0; /* ODM divide mode */
+
+ /* OTG Vertical Timing - 7 fields */
+ state->optc[i].otg_v_total = timing->v_total;
+ state->optc[i].otg_v_blank_start = timing->v_addressable;
+ state->optc[i].otg_v_blank_end = timing->v_total - timing->v_front_porch;
+ state->optc[i].otg_v_sync_start = timing->v_addressable + timing->v_front_porch;
+ state->optc[i].otg_v_sync_end = timing->v_addressable + timing->v_front_porch + timing->v_sync_width;
+ state->optc[i].otg_v_sync_polarity = timing->flags.VSYNC_POSITIVE_POLARITY ? 0 : 1;
+ state->optc[i].otg_v_sync_mode = 0; /* Normal sync mode */
+
+ /* Initialize remaining core fields with appropriate defaults */
+ // TODO: Update logic for accurate vtotal min/max
+ state->optc[i].otg_v_total_max = timing->v_total + 100; /* Typical DRR range */
+ state->optc[i].otg_v_total_min = timing->v_total - 50;
+ state->optc[i].otg_v_total_mid = timing->v_total;
+
+ /* ODM configuration */
+ // TODO: Update logic to have complete ODM mappings (e.g. 3:1 and 4:1) stored in single pipe
+ if (pipe_ctx->next_odm_pipe) {
+ state->optc[i].optc_seg0_src_sel = pipe_ctx->stream_res.opp ? pipe_ctx->stream_res.opp->inst : 0;
+ state->optc[i].optc_seg1_src_sel = pipe_ctx->next_odm_pipe->stream_res.opp ? pipe_ctx->next_odm_pipe->stream_res.opp->inst : 0;
+ state->optc[i].optc_num_of_input_segment = 1; /* 2 segments - 1 */
+ } else {
+ state->optc[i].optc_seg0_src_sel = pipe_ctx->stream_res.opp ? pipe_ctx->stream_res.opp->inst : 0;
+ state->optc[i].optc_seg1_src_sel = 0;
+ state->optc[i].optc_num_of_input_segment = 0; /* Single segment */
+ }
+
+ /* DSC configuration */
+ if (timing->dsc_cfg.num_slices_h > 0) {
+ state->optc[i].optc_dsc_mode = 1; /* DSC enabled */
+ state->optc[i].optc_dsc_bytes_per_pixel = timing->dsc_cfg.bits_per_pixel / 16; /* Convert to bytes */
+ state->optc[i].optc_dsc_slice_width = timing->h_addressable / timing->dsc_cfg.num_slices_h;
+ } else {
+ state->optc[i].optc_dsc_mode = 0;
+ state->optc[i].optc_dsc_bytes_per_pixel = 0;
+ state->optc[i].optc_dsc_slice_width = 0;
+ }
+
+ /* Essential control fields */
+ state->optc[i].otg_stereo_enable = (timing->timing_3d_format != TIMING_3D_FORMAT_NONE) ? 1 : 0;
+ state->optc[i].otg_interlace_enable = timing->flags.INTERLACE ? 1 : 0;
+ state->optc[i].otg_clock_enable = 1; /* OTG clock enabled */
+ state->optc[i].vtg0_enable = 1; /* VTG enabled for timing generation */
+
+ /* Initialize other key fields to defaults */
+ state->optc[i].optc_input_pix_clk_en = 1;
+ state->optc[i].optc_segment_width = (pipe_ctx->next_odm_pipe) ? (timing->h_addressable / 2) : timing->h_addressable;
+ state->optc[i].otg_vready_offset = 1;
+ state->optc[i].otg_vstartup_start = timing->v_addressable + 10;
+ state->optc[i].otg_vupdate_offset = 0;
+ state->optc[i].otg_vupdate_width = 5;
+ } else {
+ /* No timing generator resource - initialize all fields to 0 */
+ memset(&state->optc[i], 0, sizeof(state->optc[i]));
+ }
+ }
+
+ state->state_valid = true;
+ return true;
+}
+
+void dc_log_preos_dmcub_info(const struct dc *dc)
+{
+ dc_dmub_srv_log_preos_dmcub_info(dc->ctx->dmub_srv);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
index 21be2a684393..bbce751b485f 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
@@ -36,6 +36,8 @@
#include "resource.h"
+#define DC_LOGGER \
+ dc->ctx->logger
#define DC_LOGGER_INIT(logger)
@@ -44,136 +46,11 @@
DC_LOG_IF_TRACE(__VA_ARGS__); \
} while (0)
-#define TIMING_TRACE(...) do {\
- if (dc->debug.timing_trace) \
- DC_LOG_SYNC(__VA_ARGS__); \
-} while (0)
-
#define CLOCK_TRACE(...) do {\
if (dc->debug.clock_trace) \
DC_LOG_BANDWIDTH_CALCS(__VA_ARGS__); \
} while (0)
-void pre_surface_trace(
- struct dc *dc,
- const struct dc_plane_state *const *plane_states,
- int surface_count)
-{
- int i;
- DC_LOGGER_INIT(dc->ctx->logger);
-
- for (i = 0; i < surface_count; i++) {
- const struct dc_plane_state *plane_state = plane_states[i];
-
- SURFACE_TRACE("Planes %d:\n", i);
-
- SURFACE_TRACE(
- "plane_state->visible = %d;\n"
- "plane_state->flip_immediate = %d;\n"
- "plane_state->address.type = %d;\n"
- "plane_state->address.grph.addr.quad_part = 0x%llX;\n"
- "plane_state->address.grph.meta_addr.quad_part = 0x%llX;\n"
- "plane_state->scaling_quality.h_taps = %d;\n"
- "plane_state->scaling_quality.v_taps = %d;\n"
- "plane_state->scaling_quality.h_taps_c = %d;\n"
- "plane_state->scaling_quality.v_taps_c = %d;\n",
- plane_state->visible,
- plane_state->flip_immediate,
- plane_state->address.type,
- plane_state->address.grph.addr.quad_part,
- plane_state->address.grph.meta_addr.quad_part,
- plane_state->scaling_quality.h_taps,
- plane_state->scaling_quality.v_taps,
- plane_state->scaling_quality.h_taps_c,
- plane_state->scaling_quality.v_taps_c);
-
- SURFACE_TRACE(
- "plane_state->src_rect.x = %d;\n"
- "plane_state->src_rect.y = %d;\n"
- "plane_state->src_rect.width = %d;\n"
- "plane_state->src_rect.height = %d;\n"
- "plane_state->dst_rect.x = %d;\n"
- "plane_state->dst_rect.y = %d;\n"
- "plane_state->dst_rect.width = %d;\n"
- "plane_state->dst_rect.height = %d;\n"
- "plane_state->clip_rect.x = %d;\n"
- "plane_state->clip_rect.y = %d;\n"
- "plane_state->clip_rect.width = %d;\n"
- "plane_state->clip_rect.height = %d;\n",
- plane_state->src_rect.x,
- plane_state->src_rect.y,
- plane_state->src_rect.width,
- plane_state->src_rect.height,
- plane_state->dst_rect.x,
- plane_state->dst_rect.y,
- plane_state->dst_rect.width,
- plane_state->dst_rect.height,
- plane_state->clip_rect.x,
- plane_state->clip_rect.y,
- plane_state->clip_rect.width,
- plane_state->clip_rect.height);
-
- SURFACE_TRACE(
- "plane_state->plane_size.surface_size.x = %d;\n"
- "plane_state->plane_size.surface_size.y = %d;\n"
- "plane_state->plane_size.surface_size.width = %d;\n"
- "plane_state->plane_size.surface_size.height = %d;\n"
- "plane_state->plane_size.surface_pitch = %d;\n",
- plane_state->plane_size.surface_size.x,
- plane_state->plane_size.surface_size.y,
- plane_state->plane_size.surface_size.width,
- plane_state->plane_size.surface_size.height,
- plane_state->plane_size.surface_pitch);
-
-
- SURFACE_TRACE(
- "plane_state->tiling_info.gfx8.num_banks = %d;\n"
- "plane_state->tiling_info.gfx8.bank_width = %d;\n"
- "plane_state->tiling_info.gfx8.bank_width_c = %d;\n"
- "plane_state->tiling_info.gfx8.bank_height = %d;\n"
- "plane_state->tiling_info.gfx8.bank_height_c = %d;\n"
- "plane_state->tiling_info.gfx8.tile_aspect = %d;\n"
- "plane_state->tiling_info.gfx8.tile_aspect_c = %d;\n"
- "plane_state->tiling_info.gfx8.tile_split = %d;\n"
- "plane_state->tiling_info.gfx8.tile_split_c = %d;\n"
- "plane_state->tiling_info.gfx8.tile_mode = %d;\n"
- "plane_state->tiling_info.gfx8.tile_mode_c = %d;\n",
- plane_state->tiling_info.gfx8.num_banks,
- plane_state->tiling_info.gfx8.bank_width,
- plane_state->tiling_info.gfx8.bank_width_c,
- plane_state->tiling_info.gfx8.bank_height,
- plane_state->tiling_info.gfx8.bank_height_c,
- plane_state->tiling_info.gfx8.tile_aspect,
- plane_state->tiling_info.gfx8.tile_aspect_c,
- plane_state->tiling_info.gfx8.tile_split,
- plane_state->tiling_info.gfx8.tile_split_c,
- plane_state->tiling_info.gfx8.tile_mode,
- plane_state->tiling_info.gfx8.tile_mode_c);
-
- SURFACE_TRACE(
- "plane_state->tiling_info.gfx8.pipe_config = %d;\n"
- "plane_state->tiling_info.gfx8.array_mode = %d;\n"
- "plane_state->color_space = %d;\n"
- "plane_state->dcc.enable = %d;\n"
- "plane_state->format = %d;\n"
- "plane_state->rotation = %d;\n"
- "plane_state->stereo_format = %d;\n",
- plane_state->tiling_info.gfx8.pipe_config,
- plane_state->tiling_info.gfx8.array_mode,
- plane_state->color_space,
- plane_state->dcc.enable,
- plane_state->format,
- plane_state->rotation,
- plane_state->stereo_format);
-
- SURFACE_TRACE("plane_state->tiling_info.gfx9.swizzle = %d;\n",
- plane_state->tiling_info.gfx9.swizzle);
-
- SURFACE_TRACE("\n");
- }
- SURFACE_TRACE("\n");
-}
-
void update_surface_trace(
struct dc *dc,
const struct dc_surface_update *updates,
@@ -304,48 +181,10 @@ void post_surface_trace(struct dc *dc)
}
-void context_timing_trace(
- struct dc *dc,
- struct resource_context *res_ctx)
-{
- int i;
- int h_pos[MAX_PIPES] = {0}, v_pos[MAX_PIPES] = {0};
- struct crtc_position position;
- unsigned int underlay_idx = dc->res_pool->underlay_pipe_index;
- DC_LOGGER_INIT(dc->ctx->logger);
-
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i];
- /* get_position() returns CRTC vertical/horizontal counter
- * hence not applicable for underlay pipe
- */
- if (pipe_ctx->stream == NULL || pipe_ctx->pipe_idx == underlay_idx)
- continue;
-
- pipe_ctx->stream_res.tg->funcs->get_position(pipe_ctx->stream_res.tg, &position);
- h_pos[i] = position.horizontal_count;
- v_pos[i] = position.vertical_count;
- }
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i];
-
- if (pipe_ctx->stream == NULL || pipe_ctx->pipe_idx == underlay_idx)
- continue;
-
- TIMING_TRACE("OTG_%d H_tot:%d V_tot:%d H_pos:%d V_pos:%d\n",
- pipe_ctx->stream_res.tg->inst,
- pipe_ctx->stream->timing.h_total,
- pipe_ctx->stream->timing.v_total,
- h_pos[i], v_pos[i]);
- }
-}
-
void context_clock_trace(
struct dc *dc,
struct dc_state *context)
{
-#if defined(CONFIG_DRM_AMD_DC_DCN)
DC_LOGGER_INIT(dc->ctx->logger);
CLOCK_TRACE("Current: dispclk_khz:%d max_dppclk_khz:%d dcfclk_khz:%d\n"
"dcfclk_deep_sleep_khz:%d fclk_khz:%d socclk_khz:%d\n",
@@ -363,7 +202,6 @@ void context_clock_trace(
context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz,
context->bw_ctx.bw.dcn.clk.fclk_khz,
context->bw_ctx.bw.dcn.clk.socclk_khz);
-#endif
}
/**
@@ -422,9 +260,59 @@ char *dc_status_to_str(enum dc_status status)
return "The operation is not supported.";
case DC_UNSUPPORTED_VALUE:
return "The value specified is not supported.";
+ case DC_NO_LINK_ENC_RESOURCE:
+ return "No link encoder resource";
+ case DC_FAIL_DP_PAYLOAD_ALLOCATION:
+ return "Fail dp payload allocation";
+ case DC_FAIL_DP_LINK_BANDWIDTH:
+ return "Insufficient DP link bandwidth";
+ case DC_FAIL_HW_CURSOR_SUPPORT:
+ return "HW Cursor not supported";
+ case DC_FAIL_DP_TUNNEL_BW_VALIDATE:
+ return "Fail DP Tunnel BW validation";
case DC_ERROR_UNEXPECTED:
return "Unexpected error";
}
return "Unexpected status error";
}
+
+char *dc_pixel_encoding_to_str(enum dc_pixel_encoding pixel_encoding)
+{
+ switch (pixel_encoding) {
+ case PIXEL_ENCODING_RGB:
+ return "RGB";
+ case PIXEL_ENCODING_YCBCR422:
+ return "YUV422";
+ case PIXEL_ENCODING_YCBCR444:
+ return "YUV444";
+ case PIXEL_ENCODING_YCBCR420:
+ return "YUV420";
+ default:
+ return "Unknown";
+ }
+}
+
+char *dc_color_depth_to_str(enum dc_color_depth color_depth)
+{
+ switch (color_depth) {
+ case COLOR_DEPTH_666:
+ return "6-bpc";
+ case COLOR_DEPTH_888:
+ return "8-bpc";
+ case COLOR_DEPTH_101010:
+ return "10-bpc";
+ case COLOR_DEPTH_121212:
+ return "12-bpc";
+ case COLOR_DEPTH_141414:
+ return "14-bpc";
+ case COLOR_DEPTH_161616:
+ return "16-bpc";
+ case COLOR_DEPTH_999:
+ return "9-bpc";
+ case COLOR_DEPTH_111111:
+ return "11-bpc";
+ default:
+ return "Unknown";
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
index 9039fb134db5..e2763b60482a 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
@@ -23,14 +23,25 @@
*
*/
-#include <linux/delay.h>
-
#include "dm_services.h"
#include "core_types.h"
#include "timing_generator.h"
#include "hw_sequencer.h"
+#include "hw_sequencer_private.h"
+#include "basics/dc_common.h"
+#include "resource.h"
+#include "dc_dmub_srv.h"
+#include "dc_state_priv.h"
+#include "opp.h"
+#include "dsc.h"
+#include "dchubbub.h"
+#include "dccg.h"
+#include "abm.h"
+#include "dcn10/dcn10_hubbub.h"
+#include "dce/dmub_hw_lock_mgr.h"
#define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0]))
+#define MAX_NUM_MCACHE 8
/* used as index in array of black_color_format */
enum black_color_format {
@@ -75,28 +86,38 @@ struct out_csc_color_matrix_type {
static const struct out_csc_color_matrix_type output_csc_matrix[] = {
{ COLOR_SPACE_RGB_TYPE,
- { 0x2000, 0, 0, 0, 0, 0x2000, 0, 0, 0, 0, 0x2000, 0} },
+ { 0x2000, 0, 0, 0,
+ 0, 0x2000, 0, 0,
+ 0, 0, 0x2000, 0} },
{ COLOR_SPACE_RGB_LIMITED_TYPE,
- { 0x1B67, 0, 0, 0x201, 0, 0x1B67, 0, 0x201, 0, 0, 0x1B67, 0x201} },
+ { 0x1B67, 0, 0, 0x201,
+ 0, 0x1B67, 0, 0x201,
+ 0, 0, 0x1B67, 0x201} },
{ COLOR_SPACE_YCBCR601_TYPE,
- { 0xE04, 0xF444, 0xFDB9, 0x1004, 0x831, 0x1016, 0x320, 0x201, 0xFB45,
- 0xF6B7, 0xE04, 0x1004} },
+ { 0xE04, 0xF444, 0xFDB9, 0x1004,
+ 0x831, 0x1016, 0x320, 0x201,
+ 0xFB45, 0xF6B7, 0xE04, 0x1004} },
{ COLOR_SPACE_YCBCR709_TYPE,
- { 0xE04, 0xF345, 0xFEB7, 0x1004, 0x5D3, 0x1399, 0x1FA,
- 0x201, 0xFCCA, 0xF533, 0xE04, 0x1004} },
+ { 0xE04, 0xF345, 0xFEB7, 0x1004,
+ 0x5D3, 0x1399, 0x1FA, 0x201,
+ 0xFCCA, 0xF533, 0xE04, 0x1004} },
/* TODO: correct values below */
{ COLOR_SPACE_YCBCR601_LIMITED_TYPE,
- { 0xE00, 0xF447, 0xFDB9, 0x1000, 0x991,
- 0x12C9, 0x3A6, 0x200, 0xFB47, 0xF6B9, 0xE00, 0x1000} },
+ { 0xE00, 0xF447, 0xFDB9, 0x1000,
+ 0x991, 0x12C9, 0x3A6, 0x200,
+ 0xFB47, 0xF6B9, 0xE00, 0x1000} },
{ COLOR_SPACE_YCBCR709_LIMITED_TYPE,
- { 0xE00, 0xF349, 0xFEB7, 0x1000, 0x6CE, 0x16E3,
- 0x24F, 0x200, 0xFCCB, 0xF535, 0xE00, 0x1000} },
+ { 0xE00, 0xF349, 0xFEB7, 0x1000,
+ 0x6CE, 0x16E3, 0x24F, 0x200,
+ 0xFCCB, 0xF535, 0xE00, 0x1000} },
{ COLOR_SPACE_YCBCR2020_TYPE,
- { 0x1000, 0xF149, 0xFEB7, 0x0000, 0x0868, 0x15B2,
- 0x01E6, 0x0000, 0xFB88, 0xF478, 0x1000, 0x0000} },
+ { 0x1000, 0xF149, 0xFEB7, 0x1004,
+ 0x0868, 0x15B2, 0x01E6, 0x201,
+ 0xFB88, 0xF478, 0x1000, 0x1004} },
{ COLOR_SPACE_YCBCR709_BLACK_TYPE,
- { 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x0000,
- 0x0000, 0x0200, 0x0000, 0x0000, 0x0000, 0x1000} },
+ { 0x0000, 0x0000, 0x0000, 0x1000,
+ 0x0000, 0x0000, 0x0000, 0x0200,
+ 0x0000, 0x0000, 0x0000, 0x1000} },
};
static bool is_rgb_type(
@@ -163,7 +184,7 @@ static bool is_ycbcr2020_type(
{
bool ret = false;
- if (color_space == COLOR_SPACE_2020_YCBCR)
+ if (color_space == COLOR_SPACE_2020_YCBCR_LIMITED || color_space == COLOR_SPACE_2020_YCBCR_FULL)
ret = true;
return ret;
}
@@ -177,6 +198,7 @@ static bool is_ycbcr709_limited_type(
ret = true;
return ret;
}
+
static enum dc_color_space_type get_color_space_type(enum dc_color_space color_space)
{
enum dc_color_space_type type = COLOR_SPACE_RGB_TYPE;
@@ -233,7 +255,8 @@ void color_space_to_black_color(
case COLOR_SPACE_YCBCR709_BLACK:
case COLOR_SPACE_YCBCR601_LIMITED:
case COLOR_SPACE_YCBCR709_LIMITED:
- case COLOR_SPACE_2020_YCBCR:
+ case COLOR_SPACE_2020_YCBCR_LIMITED:
+ case COLOR_SPACE_2020_YCBCR_FULL:
*black_color = black_color_format[BLACK_COLOR_FORMAT_YUV_CV];
break;
@@ -242,7 +265,7 @@ void color_space_to_black_color(
black_color_format[BLACK_COLOR_FORMAT_RGB_LIMITED];
break;
- /**
+ /*
* Remove default and add case for all color space
* so when we forget to add new color space
* compiler will give a warning
@@ -298,11 +321,11 @@ void get_mpctree_visual_confirm_color(
{
const struct tg_color pipe_colors[6] = {
{MAX_TG_COLOR_VALUE, 0, 0}, /* red */
- {MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE / 4, 0}, /* orange */
{MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE, 0}, /* yellow */
{0, MAX_TG_COLOR_VALUE, 0}, /* green */
+ {0, MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE}, /* cyan */
{0, 0, MAX_TG_COLOR_VALUE}, /* blue */
- {MAX_TG_COLOR_VALUE / 2, 0, MAX_TG_COLOR_VALUE / 2}, /* purple */
+ {MAX_TG_COLOR_VALUE, 0, MAX_TG_COLOR_VALUE}, /* magenta */
};
struct pipe_ctx *top_pipe = pipe_ctx;
@@ -368,6 +391,7 @@ void get_hdr_visual_confirm_color(
struct tg_color *color)
{
uint32_t color_value = MAX_TG_COLOR_VALUE;
+ bool is_sdr = false;
/* Determine the overscan color based on the top-most (desktop) plane's context */
struct pipe_ctx *top_pipe_ctx = pipe_ctx;
@@ -377,33 +401,1657 @@ void get_hdr_visual_confirm_color(
switch (top_pipe_ctx->plane_res.scl_data.format) {
case PIXEL_FORMAT_ARGB2101010:
- if (top_pipe_ctx->stream->out_transfer_func->tf == TRANSFER_FUNCTION_PQ) {
+ if (top_pipe_ctx->stream->out_transfer_func.tf == TRANSFER_FUNCTION_PQ) {
/* HDR10, ARGB2101010 - set border color to red */
color->color_r_cr = color_value;
- } else if (top_pipe_ctx->stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) {
+ } else if (top_pipe_ctx->stream->out_transfer_func.tf == TRANSFER_FUNCTION_GAMMA22) {
/* FreeSync 2 ARGB2101010 - set border color to pink */
color->color_r_cr = color_value;
color->color_b_cb = color_value;
- }
+ } else
+ is_sdr = true;
break;
case PIXEL_FORMAT_FP16:
- if (top_pipe_ctx->stream->out_transfer_func->tf == TRANSFER_FUNCTION_PQ) {
+ if (top_pipe_ctx->stream->out_transfer_func.tf == TRANSFER_FUNCTION_PQ) {
/* HDR10, FP16 - set border color to blue */
color->color_b_cb = color_value;
- } else if (top_pipe_ctx->stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) {
+ } else if (top_pipe_ctx->stream->out_transfer_func.tf == TRANSFER_FUNCTION_GAMMA22) {
/* FreeSync 2 HDR - set border color to green */
color->color_g_y = color_value;
- }
+ } else
+ is_sdr = true;
break;
default:
+ is_sdr = true;
+ break;
+ }
+
+ if (is_sdr) {
/* SDR - set border color to Gray */
color->color_r_cr = color_value/2;
color->color_b_cb = color_value/2;
color->color_g_y = color_value/2;
- break;
}
}
+/* Visual Confirm color definition for Smart Mux */
+void get_smartmux_visual_confirm_color(
+ struct dc *dc,
+ struct tg_color *color)
+{
+ uint32_t color_value = MAX_TG_COLOR_VALUE;
+
+ const struct tg_color sm_ver_colors[5] = {
+ {0, 0, 0}, /* SMUX_MUXCONTROL_UNSUPPORTED - Black */
+ {0, MAX_TG_COLOR_VALUE, 0}, /* SMUX_MUXCONTROL_v10 - Green */
+ {0, MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE}, /* SMUX_MUXCONTROL_v15 - Cyan */
+ {MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE, 0}, /* SMUX_MUXCONTROL_MDM - Yellow */
+ {MAX_TG_COLOR_VALUE, 0, MAX_TG_COLOR_VALUE}, /* SMUX_MUXCONTROL_vUNKNOWN - Magenta*/
+ };
+
+ if (dc->caps.is_apu) {
+ /* APU driving the eDP */
+ *color = sm_ver_colors[dc->config.smart_mux_version];
+ } else {
+ /* dGPU driving the eDP - red */
+ color->color_r_cr = color_value;
+ color->color_g_y = 0;
+ color->color_b_cb = 0;
+ }
+}
+
+/* Visual Confirm color definition for VABC */
+void get_vabc_visual_confirm_color(
+ struct pipe_ctx *pipe_ctx,
+ struct tg_color *color)
+{
+ uint32_t color_value = MAX_TG_COLOR_VALUE;
+ struct dc_link *edp_link = NULL;
+
+ if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link) {
+ if (pipe_ctx->stream->link->connector_signal == SIGNAL_TYPE_EDP)
+ edp_link = pipe_ctx->stream->link;
+ }
+
+ if (edp_link) {
+ switch (edp_link->backlight_control_type) {
+ case BACKLIGHT_CONTROL_PWM:
+ color->color_r_cr = color_value;
+ color->color_g_y = 0;
+ color->color_b_cb = 0;
+ break;
+ case BACKLIGHT_CONTROL_AMD_AUX:
+ color->color_r_cr = 0;
+ color->color_g_y = color_value;
+ color->color_b_cb = 0;
+ break;
+ case BACKLIGHT_CONTROL_VESA_AUX:
+ color->color_r_cr = 0;
+ color->color_g_y = 0;
+ color->color_b_cb = color_value;
+ break;
+ }
+ } else {
+ color->color_r_cr = 0;
+ color->color_g_y = 0;
+ color->color_b_cb = 0;
+ }
+}
+
+void get_subvp_visual_confirm_color(
+ struct pipe_ctx *pipe_ctx,
+ struct tg_color *color)
+{
+ uint32_t color_value = MAX_TG_COLOR_VALUE;
+ if (pipe_ctx) {
+ switch (pipe_ctx->p_state_type) {
+ case P_STATE_SUB_VP:
+ color->color_r_cr = color_value;
+ color->color_g_y = 0;
+ color->color_b_cb = 0;
+ break;
+ case P_STATE_DRR_SUB_VP:
+ color->color_r_cr = 0;
+ color->color_g_y = color_value;
+ color->color_b_cb = 0;
+ break;
+ case P_STATE_V_BLANK_SUB_VP:
+ color->color_r_cr = 0;
+ color->color_g_y = 0;
+ color->color_b_cb = color_value;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+void get_mclk_switch_visual_confirm_color(
+ struct pipe_ctx *pipe_ctx,
+ struct tg_color *color)
+{
+ uint32_t color_value = MAX_TG_COLOR_VALUE;
+
+ if (pipe_ctx) {
+ switch (pipe_ctx->p_state_type) {
+ case P_STATE_V_BLANK:
+ color->color_r_cr = color_value;
+ color->color_g_y = color_value;
+ color->color_b_cb = 0;
+ break;
+ case P_STATE_FPO:
+ color->color_r_cr = 0;
+ color->color_g_y = color_value;
+ color->color_b_cb = color_value;
+ break;
+ case P_STATE_V_ACTIVE:
+ color->color_r_cr = color_value;
+ color->color_g_y = 0;
+ color->color_b_cb = color_value;
+ break;
+ case P_STATE_SUB_VP:
+ color->color_r_cr = color_value;
+ color->color_g_y = 0;
+ color->color_b_cb = 0;
+ break;
+ case P_STATE_DRR_SUB_VP:
+ color->color_r_cr = 0;
+ color->color_g_y = color_value;
+ color->color_b_cb = 0;
+ break;
+ case P_STATE_V_BLANK_SUB_VP:
+ color->color_r_cr = 0;
+ color->color_g_y = 0;
+ color->color_b_cb = color_value;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+void get_cursor_visual_confirm_color(
+ struct pipe_ctx *pipe_ctx,
+ struct tg_color *color)
+{
+ uint32_t color_value = MAX_TG_COLOR_VALUE;
+
+ if (pipe_ctx->stream && pipe_ctx->stream->cursor_position.enable) {
+ color->color_r_cr = color_value;
+ color->color_g_y = 0;
+ color->color_b_cb = 0;
+ } else {
+ color->color_r_cr = 0;
+ color->color_g_y = 0;
+ color->color_b_cb = color_value;
+ }
+}
+
+void get_dcc_visual_confirm_color(
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct tg_color *color)
+{
+ const uint32_t MCACHE_ID_UNASSIGNED = 0xF;
+
+ if (!pipe_ctx->plane_state->dcc.enable) {
+ color->color_r_cr = 0; /* black - DCC disabled */
+ color->color_g_y = 0;
+ color->color_b_cb = 0;
+ return;
+ }
+
+ if (dc->ctx->dce_version < DCN_VERSION_4_01) {
+ color->color_r_cr = MAX_TG_COLOR_VALUE; /* red - DCC enabled */
+ color->color_g_y = 0;
+ color->color_b_cb = 0;
+ return;
+ }
+
+ uint32_t first_id = pipe_ctx->mcache_regs.main.p0.mcache_id_first;
+ uint32_t second_id = pipe_ctx->mcache_regs.main.p0.mcache_id_second;
+
+ if (first_id != MCACHE_ID_UNASSIGNED && second_id != MCACHE_ID_UNASSIGNED && first_id != second_id) {
+ color->color_r_cr = MAX_TG_COLOR_VALUE/2; /* grey - 2 mcache */
+ color->color_g_y = MAX_TG_COLOR_VALUE/2;
+ color->color_b_cb = MAX_TG_COLOR_VALUE/2;
+ }
+
+ else if (first_id != MCACHE_ID_UNASSIGNED || second_id != MCACHE_ID_UNASSIGNED) {
+ const struct tg_color id_colors[MAX_NUM_MCACHE] = {
+ {0, MAX_TG_COLOR_VALUE, 0}, /* green */
+ {0, 0, MAX_TG_COLOR_VALUE}, /* blue */
+ {MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE, 0}, /* yellow */
+ {MAX_TG_COLOR_VALUE, 0, MAX_TG_COLOR_VALUE}, /* magenta */
+ {0, MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE}, /* cyan */
+ {MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE}, /* white */
+ {MAX_TG_COLOR_VALUE/2, 0, 0}, /* dark red */
+ {0, MAX_TG_COLOR_VALUE/2, 0}, /* dark green */
+ };
+
+ uint32_t assigned_id = (first_id != MCACHE_ID_UNASSIGNED) ? first_id : second_id;
+ *color = id_colors[assigned_id];
+ }
+}
+
+void set_p_state_switch_method(
+ struct dc *dc,
+ struct dc_state *context,
+ struct pipe_ctx *pipe_ctx)
+{
+ struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+ bool enable_subvp;
+
+ if (!dc->ctx || !dc->ctx->dmub_srv || !pipe_ctx || !vba)
+ return;
+
+ pipe_ctx->p_state_type = P_STATE_UNKNOWN;
+ if (vba->DRAMClockChangeSupport[vba->VoltageLevel][vba->maxMpcComb] !=
+ dm_dram_clock_change_unsupported) {
+ /* MCLK switching is supported */
+ if (!pipe_ctx->has_vactive_margin) {
+ /* In Vblank - yellow */
+ pipe_ctx->p_state_type = P_STATE_V_BLANK;
+
+ if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {
+ /* FPO + Vblank - cyan */
+ pipe_ctx->p_state_type = P_STATE_FPO;
+ }
+ } else {
+ /* In Vactive - pink */
+ pipe_ctx->p_state_type = P_STATE_V_ACTIVE;
+ }
+
+ /* SubVP */
+ enable_subvp = false;
+
+ for (int i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe->stream && dc_state_get_paired_subvp_stream(context, pipe->stream) &&
+ dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) {
+ /* SubVP enable - red */
+ pipe_ctx->p_state_type = P_STATE_SUB_VP;
+ enable_subvp = true;
+
+ if (pipe_ctx->stream == pipe->stream)
+ return;
+ break;
+ }
+ }
+
+ if (enable_subvp && dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_NONE) {
+ if (pipe_ctx->stream->allow_freesync == 1) {
+ /* SubVP enable and DRR on - green */
+ pipe_ctx->p_state_type = P_STATE_DRR_SUB_VP;
+ } else {
+ /* SubVP enable and No DRR - blue */
+ pipe_ctx->p_state_type = P_STATE_V_BLANK_SUB_VP;
+ }
+ }
+ }
+}
+
+void set_drr_and_clear_adjust_pending(
+ struct pipe_ctx *pipe_ctx,
+ struct dc_stream_state *stream,
+ struct drr_params *params)
+{
+ /* params can be null.*/
+ if (pipe_ctx && pipe_ctx->stream_res.tg &&
+ pipe_ctx->stream_res.tg->funcs->set_drr)
+ pipe_ctx->stream_res.tg->funcs->set_drr(
+ pipe_ctx->stream_res.tg, params);
+
+ if (stream)
+ stream->adjust.timing_adjust_pending = false;
+}
+
+void get_fams2_visual_confirm_color(
+ struct dc *dc,
+ struct dc_state *context,
+ struct pipe_ctx *pipe_ctx,
+ struct tg_color *color)
+{
+ uint32_t color_value = MAX_TG_COLOR_VALUE;
+
+ if (!dc->ctx || !dc->ctx->dmub_srv || !pipe_ctx || !context || !dc->debug.fams2_config.bits.enable)
+ return;
+
+ /* driver only handles visual confirm when FAMS2 is disabled */
+ if (!dc_state_is_fams2_in_use(dc, context)) {
+ /* when FAMS2 is disabled, all pipes are grey */
+ color->color_g_y = color_value / 2;
+ color->color_b_cb = color_value / 2;
+ color->color_r_cr = color_value / 2;
+ }
+}
+
+void hwss_build_fast_sequence(struct dc *dc,
+ struct dc_dmub_cmd *dc_dmub_cmd,
+ unsigned int dmub_cmd_count,
+ struct block_sequence block_sequence[MAX_HWSS_BLOCK_SEQUENCE_SIZE],
+ unsigned int *num_steps,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_stream_status *stream_status,
+ struct dc_state *context)
+{
+ struct dc_plane_state *plane = pipe_ctx->plane_state;
+ struct dc_stream_state *stream = pipe_ctx->stream;
+ struct dce_hwseq *hws = dc->hwseq;
+ struct pipe_ctx *current_pipe = NULL;
+ struct pipe_ctx *current_mpc_pipe = NULL;
+ unsigned int i = 0;
+
+ *num_steps = 0; // Initialize to 0
+
+ if (!plane || !stream)
+ return;
+
+ if (dc->hwss.wait_for_dcc_meta_propagation) {
+ block_sequence[*num_steps].params.wait_for_dcc_meta_propagation_params.dc = dc;
+ block_sequence[*num_steps].params.wait_for_dcc_meta_propagation_params.top_pipe_to_program = pipe_ctx;
+ block_sequence[*num_steps].func = HUBP_WAIT_FOR_DCC_META_PROP;
+ (*num_steps)++;
+ }
+ if (dc->hwss.subvp_pipe_control_lock_fast) {
+ block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.dc = dc;
+ block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.lock = true;
+ block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.subvp_immediate_flip =
+ plane->flip_immediate && stream_status->mall_stream_config.type == SUBVP_MAIN;
+ block_sequence[*num_steps].func = DMUB_SUBVP_PIPE_CONTROL_LOCK_FAST;
+ (*num_steps)++;
+ }
+ if (dc->hwss.dmub_hw_control_lock_fast) {
+ block_sequence[*num_steps].params.dmub_hw_control_lock_fast_params.dc = dc;
+ block_sequence[*num_steps].params.dmub_hw_control_lock_fast_params.lock = true;
+ block_sequence[*num_steps].params.dmub_hw_control_lock_fast_params.is_required =
+ dc_state_is_fams2_in_use(dc, context) ||
+ dmub_hw_lock_mgr_does_link_require_lock(dc, stream->link);
+ block_sequence[*num_steps].func = DMUB_HW_CONTROL_LOCK_FAST;
+ (*num_steps)++;
+ }
+ if (dc->hwss.pipe_control_lock) {
+ block_sequence[*num_steps].params.pipe_control_lock_params.dc = dc;
+ block_sequence[*num_steps].params.pipe_control_lock_params.lock = true;
+ block_sequence[*num_steps].params.pipe_control_lock_params.pipe_ctx = pipe_ctx;
+ block_sequence[*num_steps].func = OPTC_PIPE_CONTROL_LOCK;
+ (*num_steps)++;
+ }
+
+ for (i = 0; i < dmub_cmd_count; i++) {
+ block_sequence[*num_steps].params.send_dmcub_cmd_params.ctx = dc->ctx;
+ block_sequence[*num_steps].params.send_dmcub_cmd_params.cmd = &(dc_dmub_cmd[i].dmub_cmd);
+ block_sequence[*num_steps].params.send_dmcub_cmd_params.wait_type = dc_dmub_cmd[i].wait_type;
+ block_sequence[*num_steps].func = DMUB_SEND_DMCUB_CMD;
+ (*num_steps)++;
+ }
+
+ current_pipe = pipe_ctx;
+ while (current_pipe) {
+ current_mpc_pipe = current_pipe;
+ while (current_mpc_pipe) {
+ if (current_mpc_pipe->plane_state) {
+ if (dc->hwss.set_flip_control_gsl && current_mpc_pipe->plane_state->update_flags.raw) {
+ block_sequence[*num_steps].params.set_flip_control_gsl_params.hubp = current_mpc_pipe->plane_res.hubp;
+ block_sequence[*num_steps].params.set_flip_control_gsl_params.flip_immediate = current_mpc_pipe->plane_state->flip_immediate;
+ block_sequence[*num_steps].func = HUBP_SET_FLIP_CONTROL_GSL;
+ (*num_steps)++;
+ }
+ if (dc->hwss.program_triplebuffer && dc->debug.enable_tri_buf && current_mpc_pipe->plane_state->update_flags.raw) {
+ block_sequence[*num_steps].params.program_triplebuffer_params.dc = dc;
+ block_sequence[*num_steps].params.program_triplebuffer_params.pipe_ctx = current_mpc_pipe;
+ block_sequence[*num_steps].params.program_triplebuffer_params.enableTripleBuffer = current_mpc_pipe->plane_state->triplebuffer_flips;
+ block_sequence[*num_steps].func = HUBP_PROGRAM_TRIPLEBUFFER;
+ (*num_steps)++;
+ }
+ if (dc->hwss.update_plane_addr && current_mpc_pipe->plane_state->update_flags.bits.addr_update) {
+ if (resource_is_pipe_type(current_mpc_pipe, OTG_MASTER) &&
+ stream_status->mall_stream_config.type == SUBVP_MAIN) {
+ block_sequence[*num_steps].params.subvp_save_surf_addr.dc_dmub_srv = dc->ctx->dmub_srv;
+ block_sequence[*num_steps].params.subvp_save_surf_addr.addr = &current_mpc_pipe->plane_state->address;
+ block_sequence[*num_steps].params.subvp_save_surf_addr.subvp_index = current_mpc_pipe->subvp_index;
+ block_sequence[*num_steps].func = DMUB_SUBVP_SAVE_SURF_ADDR;
+ (*num_steps)++;
+ }
+
+ block_sequence[*num_steps].params.update_plane_addr_params.dc = dc;
+ block_sequence[*num_steps].params.update_plane_addr_params.pipe_ctx = current_mpc_pipe;
+ block_sequence[*num_steps].func = HUBP_UPDATE_PLANE_ADDR;
+ (*num_steps)++;
+ }
+
+ if (hws->funcs.set_input_transfer_func && current_mpc_pipe->plane_state->update_flags.bits.gamma_change) {
+ block_sequence[*num_steps].params.set_input_transfer_func_params.dc = dc;
+ block_sequence[*num_steps].params.set_input_transfer_func_params.pipe_ctx = current_mpc_pipe;
+ block_sequence[*num_steps].params.set_input_transfer_func_params.plane_state = current_mpc_pipe->plane_state;
+ block_sequence[*num_steps].func = DPP_SET_INPUT_TRANSFER_FUNC;
+ (*num_steps)++;
+ }
+
+ if (dc->hwss.program_gamut_remap && current_mpc_pipe->plane_state->update_flags.bits.gamut_remap_change) {
+ block_sequence[*num_steps].params.program_gamut_remap_params.pipe_ctx = current_mpc_pipe;
+ block_sequence[*num_steps].func = DPP_PROGRAM_GAMUT_REMAP;
+ (*num_steps)++;
+ }
+ if (current_mpc_pipe->plane_state->update_flags.bits.input_csc_change) {
+ block_sequence[*num_steps].params.setup_dpp_params.pipe_ctx = current_mpc_pipe;
+ block_sequence[*num_steps].func = DPP_SETUP_DPP;
+ (*num_steps)++;
+ }
+ if (current_mpc_pipe->plane_state->update_flags.bits.coeff_reduction_change) {
+ block_sequence[*num_steps].params.program_bias_and_scale_params.pipe_ctx = current_mpc_pipe;
+ block_sequence[*num_steps].func = DPP_PROGRAM_BIAS_AND_SCALE;
+ (*num_steps)++;
+ }
+ }
+ if (hws->funcs.set_output_transfer_func && current_mpc_pipe->stream->update_flags.bits.out_tf) {
+ block_sequence[*num_steps].params.set_output_transfer_func_params.dc = dc;
+ block_sequence[*num_steps].params.set_output_transfer_func_params.pipe_ctx = current_mpc_pipe;
+ block_sequence[*num_steps].params.set_output_transfer_func_params.stream = current_mpc_pipe->stream;
+ block_sequence[*num_steps].func = DPP_SET_OUTPUT_TRANSFER_FUNC;
+ (*num_steps)++;
+ }
+ if (dc->debug.visual_confirm != VISUAL_CONFIRM_DISABLE &&
+ dc->hwss.update_visual_confirm_color) {
+ block_sequence[*num_steps].params.update_visual_confirm_params.dc = dc;
+ block_sequence[*num_steps].params.update_visual_confirm_params.pipe_ctx = current_mpc_pipe;
+ block_sequence[*num_steps].params.update_visual_confirm_params.mpcc_id = current_mpc_pipe->plane_res.hubp->inst;
+ block_sequence[*num_steps].func = MPC_UPDATE_VISUAL_CONFIRM;
+ (*num_steps)++;
+ }
+ if (current_mpc_pipe->stream->update_flags.bits.out_csc) {
+ block_sequence[*num_steps].params.power_on_mpc_mem_pwr_params.mpc = dc->res_pool->mpc;
+ block_sequence[*num_steps].params.power_on_mpc_mem_pwr_params.mpcc_id = current_mpc_pipe->plane_res.hubp->inst;
+ block_sequence[*num_steps].params.power_on_mpc_mem_pwr_params.power_on = true;
+ block_sequence[*num_steps].func = MPC_POWER_ON_MPC_MEM_PWR;
+ (*num_steps)++;
+
+ if (current_mpc_pipe->stream->csc_color_matrix.enable_adjustment == true) {
+ block_sequence[*num_steps].params.set_output_csc_params.mpc = dc->res_pool->mpc;
+ block_sequence[*num_steps].params.set_output_csc_params.opp_id = current_mpc_pipe->stream_res.opp->inst;
+ block_sequence[*num_steps].params.set_output_csc_params.regval = current_mpc_pipe->stream->csc_color_matrix.matrix;
+ block_sequence[*num_steps].params.set_output_csc_params.ocsc_mode = MPC_OUTPUT_CSC_COEF_A;
+ block_sequence[*num_steps].func = MPC_SET_OUTPUT_CSC;
+ (*num_steps)++;
+ } else {
+ block_sequence[*num_steps].params.set_ocsc_default_params.mpc = dc->res_pool->mpc;
+ block_sequence[*num_steps].params.set_ocsc_default_params.opp_id = current_mpc_pipe->stream_res.opp->inst;
+ block_sequence[*num_steps].params.set_ocsc_default_params.color_space = current_mpc_pipe->stream->output_color_space;
+ block_sequence[*num_steps].params.set_ocsc_default_params.ocsc_mode = MPC_OUTPUT_CSC_COEF_A;
+ block_sequence[*num_steps].func = MPC_SET_OCSC_DEFAULT;
+ (*num_steps)++;
+ }
+ }
+ current_mpc_pipe = current_mpc_pipe->bottom_pipe;
+ }
+ current_pipe = current_pipe->next_odm_pipe;
+ }
+
+ if (dc->hwss.pipe_control_lock) {
+ block_sequence[*num_steps].params.pipe_control_lock_params.dc = dc;
+ block_sequence[*num_steps].params.pipe_control_lock_params.lock = false;
+ block_sequence[*num_steps].params.pipe_control_lock_params.pipe_ctx = pipe_ctx;
+ block_sequence[*num_steps].func = OPTC_PIPE_CONTROL_LOCK;
+ (*num_steps)++;
+ }
+ if (dc->hwss.subvp_pipe_control_lock_fast) {
+ block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.dc = dc;
+ block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.lock = false;
+ block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.subvp_immediate_flip =
+ plane->flip_immediate && stream_status->mall_stream_config.type == SUBVP_MAIN;
+ block_sequence[*num_steps].func = DMUB_SUBVP_PIPE_CONTROL_LOCK_FAST;
+ (*num_steps)++;
+ }
+ if (dc->hwss.dmub_hw_control_lock_fast) {
+ block_sequence[*num_steps].params.dmub_hw_control_lock_fast_params.dc = dc;
+ block_sequence[*num_steps].params.dmub_hw_control_lock_fast_params.lock = false;
+ block_sequence[*num_steps].params.dmub_hw_control_lock_fast_params.is_required = dc_state_is_fams2_in_use(dc, context);
+ block_sequence[*num_steps].func = DMUB_HW_CONTROL_LOCK_FAST;
+ (*num_steps)++;
+ }
+
+ current_pipe = pipe_ctx;
+ while (current_pipe) {
+ current_mpc_pipe = current_pipe;
+
+ while (current_mpc_pipe) {
+ if (!current_mpc_pipe->bottom_pipe && !current_mpc_pipe->next_odm_pipe &&
+ current_mpc_pipe->stream && current_mpc_pipe->plane_state &&
+ current_mpc_pipe->plane_state->update_flags.bits.addr_update &&
+ !current_mpc_pipe->plane_state->skip_manual_trigger) {
+ if (dc->hwss.program_cursor_offload_now) {
+ block_sequence[*num_steps].params.program_cursor_update_now_params.dc = dc;
+ block_sequence[*num_steps].params.program_cursor_update_now_params.pipe_ctx = current_mpc_pipe;
+ block_sequence[*num_steps].func = PROGRAM_CURSOR_UPDATE_NOW;
+ (*num_steps)++;
+ }
+
+ block_sequence[*num_steps].params.program_manual_trigger_params.pipe_ctx = current_mpc_pipe;
+ block_sequence[*num_steps].func = OPTC_PROGRAM_MANUAL_TRIGGER;
+ (*num_steps)++;
+ }
+ current_mpc_pipe = current_mpc_pipe->bottom_pipe;
+ }
+ current_pipe = current_pipe->next_odm_pipe;
+ }
+}
+
+void hwss_execute_sequence(struct dc *dc,
+ struct block_sequence block_sequence[MAX_HWSS_BLOCK_SEQUENCE_SIZE],
+ int num_steps)
+{
+ unsigned int i;
+ union block_sequence_params *params;
+ struct dce_hwseq *hws = dc->hwseq;
+
+ for (i = 0; i < num_steps; i++) {
+ params = &(block_sequence[i].params);
+ switch (block_sequence[i].func) {
+
+ case DMUB_SUBVP_PIPE_CONTROL_LOCK_FAST:
+ dc->hwss.subvp_pipe_control_lock_fast(params);
+ break;
+ case OPTC_PIPE_CONTROL_LOCK:
+ dc->hwss.pipe_control_lock(params->pipe_control_lock_params.dc,
+ params->pipe_control_lock_params.pipe_ctx,
+ params->pipe_control_lock_params.lock);
+ break;
+ case HUBP_SET_FLIP_CONTROL_GSL:
+ params->set_flip_control_gsl_params.hubp->funcs->hubp_set_flip_control_surface_gsl(
+ params->set_flip_control_gsl_params.hubp,
+ params->set_flip_control_gsl_params.flip_immediate);
+ break;
+ case HUBP_PROGRAM_TRIPLEBUFFER:
+ dc->hwss.program_triplebuffer(params->program_triplebuffer_params.dc,
+ params->program_triplebuffer_params.pipe_ctx,
+ params->program_triplebuffer_params.enableTripleBuffer);
+ break;
+ case HUBP_UPDATE_PLANE_ADDR:
+ dc->hwss.update_plane_addr(params->update_plane_addr_params.dc,
+ params->update_plane_addr_params.pipe_ctx);
+ break;
+ case DPP_SET_INPUT_TRANSFER_FUNC:
+ hws->funcs.set_input_transfer_func(params->set_input_transfer_func_params.dc,
+ params->set_input_transfer_func_params.pipe_ctx,
+ params->set_input_transfer_func_params.plane_state);
+ break;
+ case DPP_PROGRAM_GAMUT_REMAP:
+ dc->hwss.program_gamut_remap(params->program_gamut_remap_params.pipe_ctx);
+ break;
+ case DPP_SETUP_DPP:
+ hwss_setup_dpp(params);
+ break;
+ case DPP_PROGRAM_BIAS_AND_SCALE:
+ hwss_program_bias_and_scale(params);
+ break;
+ case OPTC_PROGRAM_MANUAL_TRIGGER:
+ hwss_program_manual_trigger(params);
+ break;
+ case DPP_SET_OUTPUT_TRANSFER_FUNC:
+ hws->funcs.set_output_transfer_func(params->set_output_transfer_func_params.dc,
+ params->set_output_transfer_func_params.pipe_ctx,
+ params->set_output_transfer_func_params.stream);
+ break;
+ case MPC_UPDATE_VISUAL_CONFIRM:
+ dc->hwss.update_visual_confirm_color(params->update_visual_confirm_params.dc,
+ params->update_visual_confirm_params.pipe_ctx,
+ params->update_visual_confirm_params.mpcc_id);
+ break;
+ case MPC_POWER_ON_MPC_MEM_PWR:
+ hwss_power_on_mpc_mem_pwr(params);
+ break;
+ case MPC_SET_OUTPUT_CSC:
+ hwss_set_output_csc(params);
+ break;
+ case MPC_SET_OCSC_DEFAULT:
+ hwss_set_ocsc_default(params);
+ break;
+ case DMUB_SEND_DMCUB_CMD:
+ hwss_send_dmcub_cmd(params);
+ break;
+ case DMUB_SUBVP_SAVE_SURF_ADDR:
+ hwss_subvp_save_surf_addr(params);
+ break;
+ case HUBP_WAIT_FOR_DCC_META_PROP:
+ dc->hwss.wait_for_dcc_meta_propagation(
+ params->wait_for_dcc_meta_propagation_params.dc,
+ params->wait_for_dcc_meta_propagation_params.top_pipe_to_program);
+ break;
+ case DMUB_HW_CONTROL_LOCK_FAST:
+ dc->hwss.dmub_hw_control_lock_fast(params);
+ break;
+ case HUBP_PROGRAM_SURFACE_CONFIG:
+ hwss_program_surface_config(params);
+ break;
+ case HUBP_PROGRAM_MCACHE_ID:
+ hwss_program_mcache_id_and_split_coordinate(params);
+ break;
+ case PROGRAM_CURSOR_UPDATE_NOW:
+ dc->hwss.program_cursor_offload_now(
+ params->program_cursor_update_now_params.dc,
+ params->program_cursor_update_now_params.pipe_ctx);
+ break;
+ case HUBP_WAIT_PIPE_READ_START:
+ params->hubp_wait_pipe_read_start_params.hubp->funcs->hubp_wait_pipe_read_start(
+ params->hubp_wait_pipe_read_start_params.hubp);
+ break;
+ case HWS_APPLY_UPDATE_FLAGS_FOR_PHANTOM:
+ dc->hwss.apply_update_flags_for_phantom(params->apply_update_flags_for_phantom_params.pipe_ctx);
+ break;
+ case HWS_UPDATE_PHANTOM_VP_POSITION:
+ dc->hwss.update_phantom_vp_position(params->update_phantom_vp_position_params.dc,
+ params->update_phantom_vp_position_params.context,
+ params->update_phantom_vp_position_params.pipe_ctx);
+ break;
+ case OPTC_SET_ODM_COMBINE:
+ hwss_set_odm_combine(params);
+ break;
+ case OPTC_SET_ODM_BYPASS:
+ hwss_set_odm_bypass(params);
+ break;
+ case OPP_PIPE_CLOCK_CONTROL:
+ hwss_opp_pipe_clock_control(params);
+ break;
+ case OPP_PROGRAM_LEFT_EDGE_EXTRA_PIXEL:
+ hwss_opp_program_left_edge_extra_pixel(params);
+ break;
+ case DCCG_SET_DTO_DSCCLK:
+ hwss_dccg_set_dto_dscclk(params);
+ break;
+ case DSC_SET_CONFIG:
+ hwss_dsc_set_config(params);
+ break;
+ case DSC_ENABLE:
+ hwss_dsc_enable(params);
+ break;
+ case TG_SET_DSC_CONFIG:
+ hwss_tg_set_dsc_config(params);
+ break;
+ case DSC_DISCONNECT:
+ hwss_dsc_disconnect(params);
+ break;
+ case DSC_READ_STATE:
+ hwss_dsc_read_state(params);
+ break;
+ case DSC_CALCULATE_AND_SET_CONFIG:
+ hwss_dsc_calculate_and_set_config(params);
+ break;
+ case DSC_ENABLE_WITH_OPP:
+ hwss_dsc_enable_with_opp(params);
+ break;
+ case TG_PROGRAM_GLOBAL_SYNC:
+ hwss_tg_program_global_sync(params);
+ break;
+ case TG_WAIT_FOR_STATE:
+ hwss_tg_wait_for_state(params);
+ break;
+ case TG_SET_VTG_PARAMS:
+ hwss_tg_set_vtg_params(params);
+ break;
+ case TG_SETUP_VERTICAL_INTERRUPT2:
+ hwss_tg_setup_vertical_interrupt2(params);
+ break;
+ case DPP_SET_HDR_MULTIPLIER:
+ hwss_dpp_set_hdr_multiplier(params);
+ break;
+ case HUBP_PROGRAM_DET_SIZE:
+ hwss_program_det_size(params);
+ break;
+ case HUBP_PROGRAM_DET_SEGMENTS:
+ hwss_program_det_segments(params);
+ break;
+ case OPP_SET_DYN_EXPANSION:
+ hwss_opp_set_dyn_expansion(params);
+ break;
+ case OPP_PROGRAM_FMT:
+ hwss_opp_program_fmt(params);
+ break;
+ case OPP_PROGRAM_BIT_DEPTH_REDUCTION:
+ hwss_opp_program_bit_depth_reduction(params);
+ break;
+ case OPP_SET_DISP_PATTERN_GENERATOR:
+ hwss_opp_set_disp_pattern_generator(params);
+ break;
+ case ABM_SET_PIPE:
+ hwss_set_abm_pipe(params);
+ break;
+ case ABM_SET_LEVEL:
+ hwss_set_abm_level(params);
+ break;
+ case ABM_SET_IMMEDIATE_DISABLE:
+ hwss_set_abm_immediate_disable(params);
+ break;
+ case MPC_REMOVE_MPCC:
+ hwss_mpc_remove_mpcc(params);
+ break;
+ case OPP_SET_MPCC_DISCONNECT_PENDING:
+ hwss_opp_set_mpcc_disconnect_pending(params);
+ break;
+ case DC_SET_OPTIMIZED_REQUIRED:
+ hwss_dc_set_optimized_required(params);
+ break;
+ case HUBP_DISCONNECT:
+ hwss_hubp_disconnect(params);
+ break;
+ case HUBBUB_FORCE_PSTATE_CHANGE_CONTROL:
+ hwss_hubbub_force_pstate_change_control(params);
+ break;
+ case TG_ENABLE_CRTC:
+ hwss_tg_enable_crtc(params);
+ break;
+ case TG_SET_GSL:
+ hwss_tg_set_gsl(params);
+ break;
+ case TG_SET_GSL_SOURCE_SELECT:
+ hwss_tg_set_gsl_source_select(params);
+ break;
+ case HUBP_WAIT_FLIP_PENDING:
+ hwss_hubp_wait_flip_pending(params);
+ break;
+ case TG_WAIT_DOUBLE_BUFFER_PENDING:
+ hwss_tg_wait_double_buffer_pending(params);
+ break;
+ case UPDATE_FORCE_PSTATE:
+ hwss_update_force_pstate(params);
+ break;
+ case HUBBUB_APPLY_DEDCN21_147_WA:
+ hwss_hubbub_apply_dedcn21_147_wa(params);
+ break;
+ case HUBBUB_ALLOW_SELF_REFRESH_CONTROL:
+ hwss_hubbub_allow_self_refresh_control(params);
+ break;
+ case TG_GET_FRAME_COUNT:
+ hwss_tg_get_frame_count(params);
+ break;
+ case MPC_SET_DWB_MUX:
+ hwss_mpc_set_dwb_mux(params);
+ break;
+ case MPC_DISABLE_DWB_MUX:
+ hwss_mpc_disable_dwb_mux(params);
+ break;
+ case MCIF_WB_CONFIG_BUF:
+ hwss_mcif_wb_config_buf(params);
+ break;
+ case MCIF_WB_CONFIG_ARB:
+ hwss_mcif_wb_config_arb(params);
+ break;
+ case MCIF_WB_ENABLE:
+ hwss_mcif_wb_enable(params);
+ break;
+ case MCIF_WB_DISABLE:
+ hwss_mcif_wb_disable(params);
+ break;
+ case DWBC_ENABLE:
+ hwss_dwbc_enable(params);
+ break;
+ case DWBC_DISABLE:
+ hwss_dwbc_disable(params);
+ break;
+ case DWBC_UPDATE:
+ hwss_dwbc_update(params);
+ break;
+ case HUBP_UPDATE_MALL_SEL:
+ hwss_hubp_update_mall_sel(params);
+ break;
+ case HUBP_PREPARE_SUBVP_BUFFERING:
+ hwss_hubp_prepare_subvp_buffering(params);
+ break;
+ case HUBP_SET_BLANK_EN:
+ hwss_hubp_set_blank_en(params);
+ break;
+ case HUBP_DISABLE_CONTROL:
+ hwss_hubp_disable_control(params);
+ break;
+ case HUBBUB_SOFT_RESET:
+ hwss_hubbub_soft_reset(params);
+ break;
+ case HUBP_CLK_CNTL:
+ hwss_hubp_clk_cntl(params);
+ break;
+ case HUBP_INIT:
+ hwss_hubp_init(params);
+ break;
+ case HUBP_SET_VM_SYSTEM_APERTURE_SETTINGS:
+ hwss_hubp_set_vm_system_aperture_settings(params);
+ break;
+ case HUBP_SET_FLIP_INT:
+ hwss_hubp_set_flip_int(params);
+ break;
+ case DPP_DPPCLK_CONTROL:
+ hwss_dpp_dppclk_control(params);
+ break;
+ case DISABLE_PHANTOM_CRTC:
+ hwss_disable_phantom_crtc(params);
+ break;
+ case DSC_PG_STATUS:
+ hwss_dsc_pg_status(params);
+ break;
+ case DSC_WAIT_DISCONNECT_PENDING_CLEAR:
+ hwss_dsc_wait_disconnect_pending_clear(params);
+ break;
+ case DSC_DISABLE:
+ hwss_dsc_disable(params);
+ break;
+ case DCCG_SET_REF_DSCCLK:
+ hwss_dccg_set_ref_dscclk(params);
+ break;
+ case DPP_PG_CONTROL:
+ hwss_dpp_pg_control(params);
+ break;
+ case HUBP_PG_CONTROL:
+ hwss_hubp_pg_control(params);
+ break;
+ case HUBP_RESET:
+ hwss_hubp_reset(params);
+ break;
+ case DPP_RESET:
+ hwss_dpp_reset(params);
+ break;
+ case DPP_ROOT_CLOCK_CONTROL:
+ hwss_dpp_root_clock_control(params);
+ break;
+ case DC_IP_REQUEST_CNTL:
+ hwss_dc_ip_request_cntl(params);
+ break;
+ case DCCG_UPDATE_DPP_DTO:
+ hwss_dccg_update_dpp_dto(params);
+ break;
+ case HUBP_VTG_SEL:
+ hwss_hubp_vtg_sel(params);
+ break;
+ case HUBP_SETUP2:
+ hwss_hubp_setup2(params);
+ break;
+ case HUBP_SETUP:
+ hwss_hubp_setup(params);
+ break;
+ case HUBP_SET_UNBOUNDED_REQUESTING:
+ hwss_hubp_set_unbounded_requesting(params);
+ break;
+ case HUBP_SETUP_INTERDEPENDENT2:
+ hwss_hubp_setup_interdependent2(params);
+ break;
+ case HUBP_SETUP_INTERDEPENDENT:
+ hwss_hubp_setup_interdependent(params);
+ break;
+ case DPP_SET_CURSOR_MATRIX:
+ hwss_dpp_set_cursor_matrix(params);
+ break;
+ case MPC_UPDATE_BLENDING:
+ hwss_mpc_update_blending(params);
+ break;
+ case MPC_ASSERT_IDLE_MPCC:
+ hwss_mpc_assert_idle_mpcc(params);
+ break;
+ case MPC_INSERT_PLANE:
+ hwss_mpc_insert_plane(params);
+ break;
+ case DPP_SET_SCALER:
+ hwss_dpp_set_scaler(params);
+ break;
+ case HUBP_MEM_PROGRAM_VIEWPORT:
+ hwss_hubp_mem_program_viewport(params);
+ break;
+ case ABORT_CURSOR_OFFLOAD_UPDATE:
+ hwss_abort_cursor_offload_update(params);
+ break;
+ case SET_CURSOR_ATTRIBUTE:
+ hwss_set_cursor_attribute(params);
+ break;
+ case SET_CURSOR_POSITION:
+ hwss_set_cursor_position(params);
+ break;
+ case SET_CURSOR_SDR_WHITE_LEVEL:
+ hwss_set_cursor_sdr_white_level(params);
+ break;
+ case PROGRAM_OUTPUT_CSC:
+ hwss_program_output_csc(params);
+ break;
+ case HUBP_SET_BLANK:
+ hwss_hubp_set_blank(params);
+ break;
+ case PHANTOM_HUBP_POST_ENABLE:
+ hwss_phantom_hubp_post_enable(params);
+ break;
+ default:
+ ASSERT(false);
+ break;
+ }
+ }
+}
+
+/*
+ * Helper function to add OPTC pipe control lock to block sequence
+ */
+void hwss_add_optc_pipe_control_lock(struct block_sequence_state *seq_state,
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ bool lock)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.pipe_control_lock_params.dc = dc;
+ seq_state->steps[*seq_state->num_steps].params.pipe_control_lock_params.pipe_ctx = pipe_ctx;
+ seq_state->steps[*seq_state->num_steps].params.pipe_control_lock_params.lock = lock;
+ seq_state->steps[*seq_state->num_steps].func = OPTC_PIPE_CONTROL_LOCK;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add HUBP set flip control GSL to block sequence
+ */
+void hwss_add_hubp_set_flip_control_gsl(struct block_sequence_state *seq_state,
+ struct hubp *hubp,
+ bool flip_immediate)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.set_flip_control_gsl_params.hubp = hubp;
+ seq_state->steps[*seq_state->num_steps].params.set_flip_control_gsl_params.flip_immediate = flip_immediate;
+ seq_state->steps[*seq_state->num_steps].func = HUBP_SET_FLIP_CONTROL_GSL;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add HUBP program triplebuffer to block sequence
+ */
+void hwss_add_hubp_program_triplebuffer(struct block_sequence_state *seq_state,
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ bool enableTripleBuffer)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.program_triplebuffer_params.dc = dc;
+ seq_state->steps[*seq_state->num_steps].params.program_triplebuffer_params.pipe_ctx = pipe_ctx;
+ seq_state->steps[*seq_state->num_steps].params.program_triplebuffer_params.enableTripleBuffer = enableTripleBuffer;
+ seq_state->steps[*seq_state->num_steps].func = HUBP_PROGRAM_TRIPLEBUFFER;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add HUBP update plane address to block sequence
+ */
+void hwss_add_hubp_update_plane_addr(struct block_sequence_state *seq_state,
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.update_plane_addr_params.dc = dc;
+ seq_state->steps[*seq_state->num_steps].params.update_plane_addr_params.pipe_ctx = pipe_ctx;
+ seq_state->steps[*seq_state->num_steps].func = HUBP_UPDATE_PLANE_ADDR;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add DPP set input transfer function to block sequence
+ */
+void hwss_add_dpp_set_input_transfer_func(struct block_sequence_state *seq_state,
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_plane_state *plane_state)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.set_input_transfer_func_params.dc = dc;
+ seq_state->steps[*seq_state->num_steps].params.set_input_transfer_func_params.pipe_ctx = pipe_ctx;
+ seq_state->steps[*seq_state->num_steps].params.set_input_transfer_func_params.plane_state = plane_state;
+ seq_state->steps[*seq_state->num_steps].func = DPP_SET_INPUT_TRANSFER_FUNC;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add DPP program gamut remap to block sequence
+ */
+void hwss_add_dpp_program_gamut_remap(struct block_sequence_state *seq_state,
+ struct pipe_ctx *pipe_ctx)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.program_gamut_remap_params.pipe_ctx = pipe_ctx;
+ seq_state->steps[*seq_state->num_steps].func = DPP_PROGRAM_GAMUT_REMAP;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add DPP program bias and scale to block sequence
+ */
+void hwss_add_dpp_program_bias_and_scale(struct block_sequence_state *seq_state, struct pipe_ctx *pipe_ctx)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.program_bias_and_scale_params.pipe_ctx = pipe_ctx;
+ seq_state->steps[*seq_state->num_steps].func = DPP_PROGRAM_BIAS_AND_SCALE;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add OPTC program manual trigger to block sequence
+ */
+void hwss_add_optc_program_manual_trigger(struct block_sequence_state *seq_state,
+ struct pipe_ctx *pipe_ctx)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.program_manual_trigger_params.pipe_ctx = pipe_ctx;
+ seq_state->steps[*seq_state->num_steps].func = OPTC_PROGRAM_MANUAL_TRIGGER;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add DPP set output transfer function to block sequence
+ */
+void hwss_add_dpp_set_output_transfer_func(struct block_sequence_state *seq_state,
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_stream_state *stream)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.set_output_transfer_func_params.dc = dc;
+ seq_state->steps[*seq_state->num_steps].params.set_output_transfer_func_params.pipe_ctx = pipe_ctx;
+ seq_state->steps[*seq_state->num_steps].params.set_output_transfer_func_params.stream = stream;
+ seq_state->steps[*seq_state->num_steps].func = DPP_SET_OUTPUT_TRANSFER_FUNC;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add MPC update visual confirm to block sequence
+ */
+void hwss_add_mpc_update_visual_confirm(struct block_sequence_state *seq_state,
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ int mpcc_id)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.update_visual_confirm_params.dc = dc;
+ seq_state->steps[*seq_state->num_steps].params.update_visual_confirm_params.pipe_ctx = pipe_ctx;
+ seq_state->steps[*seq_state->num_steps].params.update_visual_confirm_params.mpcc_id = mpcc_id;
+ seq_state->steps[*seq_state->num_steps].func = MPC_UPDATE_VISUAL_CONFIRM;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add MPC power on MPC mem PWR to block sequence
+ */
+void hwss_add_mpc_power_on_mpc_mem_pwr(struct block_sequence_state *seq_state,
+ struct mpc *mpc,
+ int mpcc_id,
+ bool power_on)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.power_on_mpc_mem_pwr_params.mpc = mpc;
+ seq_state->steps[*seq_state->num_steps].params.power_on_mpc_mem_pwr_params.mpcc_id = mpcc_id;
+ seq_state->steps[*seq_state->num_steps].params.power_on_mpc_mem_pwr_params.power_on = power_on;
+ seq_state->steps[*seq_state->num_steps].func = MPC_POWER_ON_MPC_MEM_PWR;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add MPC set output CSC to block sequence
+ */
+void hwss_add_mpc_set_output_csc(struct block_sequence_state *seq_state,
+ struct mpc *mpc,
+ int opp_id,
+ const uint16_t *regval,
+ enum mpc_output_csc_mode ocsc_mode)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.set_output_csc_params.mpc = mpc;
+ seq_state->steps[*seq_state->num_steps].params.set_output_csc_params.opp_id = opp_id;
+ seq_state->steps[*seq_state->num_steps].params.set_output_csc_params.regval = regval;
+ seq_state->steps[*seq_state->num_steps].params.set_output_csc_params.ocsc_mode = ocsc_mode;
+ seq_state->steps[*seq_state->num_steps].func = MPC_SET_OUTPUT_CSC;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add MPC set OCSC default to block sequence
+ */
+void hwss_add_mpc_set_ocsc_default(struct block_sequence_state *seq_state,
+ struct mpc *mpc,
+ int opp_id,
+ enum dc_color_space colorspace,
+ enum mpc_output_csc_mode ocsc_mode)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.set_ocsc_default_params.mpc = mpc;
+ seq_state->steps[*seq_state->num_steps].params.set_ocsc_default_params.opp_id = opp_id;
+ seq_state->steps[*seq_state->num_steps].params.set_ocsc_default_params.color_space = colorspace;
+ seq_state->steps[*seq_state->num_steps].params.set_ocsc_default_params.ocsc_mode = ocsc_mode;
+ seq_state->steps[*seq_state->num_steps].func = MPC_SET_OCSC_DEFAULT;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add DMUB send DMCUB command to block sequence
+ */
+void hwss_add_dmub_send_dmcub_cmd(struct block_sequence_state *seq_state,
+ struct dc_context *ctx,
+ union dmub_rb_cmd *cmd,
+ enum dm_dmub_wait_type wait_type)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.send_dmcub_cmd_params.ctx = ctx;
+ seq_state->steps[*seq_state->num_steps].params.send_dmcub_cmd_params.cmd = cmd;
+ seq_state->steps[*seq_state->num_steps].params.send_dmcub_cmd_params.wait_type = wait_type;
+ seq_state->steps[*seq_state->num_steps].func = DMUB_SEND_DMCUB_CMD;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add DMUB SubVP save surface address to block sequence
+ */
+void hwss_add_dmub_subvp_save_surf_addr(struct block_sequence_state *seq_state,
+ struct dc_dmub_srv *dc_dmub_srv,
+ struct dc_plane_address *addr,
+ uint8_t subvp_index)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.subvp_save_surf_addr.dc_dmub_srv = dc_dmub_srv;
+ seq_state->steps[*seq_state->num_steps].params.subvp_save_surf_addr.addr = addr;
+ seq_state->steps[*seq_state->num_steps].params.subvp_save_surf_addr.subvp_index = subvp_index;
+ seq_state->steps[*seq_state->num_steps].func = DMUB_SUBVP_SAVE_SURF_ADDR;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add HUBP wait for DCC meta propagation to block sequence
+ */
+void hwss_add_hubp_wait_for_dcc_meta_prop(struct block_sequence_state *seq_state,
+ struct dc *dc,
+ struct pipe_ctx *top_pipe_to_program)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.wait_for_dcc_meta_propagation_params.dc = dc;
+ seq_state->steps[*seq_state->num_steps].params.wait_for_dcc_meta_propagation_params.top_pipe_to_program = top_pipe_to_program;
+ seq_state->steps[*seq_state->num_steps].func = HUBP_WAIT_FOR_DCC_META_PROP;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add HUBP wait pipe read start to block sequence
+ */
+void hwss_add_hubp_wait_pipe_read_start(struct block_sequence_state *seq_state,
+ struct hubp *hubp)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.hubp_wait_pipe_read_start_params.hubp = hubp;
+ seq_state->steps[*seq_state->num_steps].func = HUBP_WAIT_PIPE_READ_START;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add HWS apply update flags for phantom to block sequence
+ */
+void hwss_add_hws_apply_update_flags_for_phantom(struct block_sequence_state *seq_state,
+ struct pipe_ctx *pipe_ctx)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.apply_update_flags_for_phantom_params.pipe_ctx = pipe_ctx;
+ seq_state->steps[*seq_state->num_steps].func = HWS_APPLY_UPDATE_FLAGS_FOR_PHANTOM;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add HWS update phantom VP position to block sequence
+ */
+void hwss_add_hws_update_phantom_vp_position(struct block_sequence_state *seq_state,
+ struct dc *dc,
+ struct dc_state *context,
+ struct pipe_ctx *pipe_ctx)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.update_phantom_vp_position_params.dc = dc;
+ seq_state->steps[*seq_state->num_steps].params.update_phantom_vp_position_params.context = context;
+ seq_state->steps[*seq_state->num_steps].params.update_phantom_vp_position_params.pipe_ctx = pipe_ctx;
+ seq_state->steps[*seq_state->num_steps].func = HWS_UPDATE_PHANTOM_VP_POSITION;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add OPTC set ODM combine to block sequence
+ */
+void hwss_add_optc_set_odm_combine(struct block_sequence_state *seq_state,
+ struct timing_generator *tg, int opp_inst[MAX_PIPES], int opp_head_count,
+ int odm_slice_width, int last_odm_slice_width)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.set_odm_combine_params.tg = tg;
+ memcpy(seq_state->steps[*seq_state->num_steps].params.set_odm_combine_params.opp_inst, opp_inst, sizeof(int) * MAX_PIPES);
+ seq_state->steps[*seq_state->num_steps].params.set_odm_combine_params.opp_head_count = opp_head_count;
+ seq_state->steps[*seq_state->num_steps].params.set_odm_combine_params.odm_slice_width = odm_slice_width;
+ seq_state->steps[*seq_state->num_steps].params.set_odm_combine_params.last_odm_slice_width = last_odm_slice_width;
+ seq_state->steps[*seq_state->num_steps].func = OPTC_SET_ODM_COMBINE;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add OPTC set ODM bypass to block sequence
+ */
+void hwss_add_optc_set_odm_bypass(struct block_sequence_state *seq_state,
+ struct timing_generator *tg, struct dc_crtc_timing *timing)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.set_odm_bypass_params.tg = tg;
+ seq_state->steps[*seq_state->num_steps].params.set_odm_bypass_params.timing = timing;
+ seq_state->steps[*seq_state->num_steps].func = OPTC_SET_ODM_BYPASS;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_send_dmcub_cmd(union block_sequence_params *params)
+{
+ struct dc_context *ctx = params->send_dmcub_cmd_params.ctx;
+ union dmub_rb_cmd *cmd = params->send_dmcub_cmd_params.cmd;
+ enum dm_dmub_wait_type wait_type = params->send_dmcub_cmd_params.wait_type;
+
+ dc_wake_and_execute_dmub_cmd(ctx, cmd, wait_type);
+}
+
+/*
+ * Helper function to add TG program global sync to block sequence
+ */
+void hwss_add_tg_program_global_sync(struct block_sequence_state *seq_state,
+ struct timing_generator *tg,
+ int vready_offset,
+ unsigned int vstartup_lines,
+ unsigned int vupdate_offset_pixels,
+ unsigned int vupdate_vupdate_width_pixels,
+ unsigned int pstate_keepout_start_lines)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.tg_program_global_sync_params.tg = tg;
+ seq_state->steps[*seq_state->num_steps].params.tg_program_global_sync_params.vready_offset = vready_offset;
+ seq_state->steps[*seq_state->num_steps].params.tg_program_global_sync_params.vstartup_lines = vstartup_lines;
+ seq_state->steps[*seq_state->num_steps].params.tg_program_global_sync_params.vupdate_offset_pixels = vupdate_offset_pixels;
+ seq_state->steps[*seq_state->num_steps].params.tg_program_global_sync_params.vupdate_vupdate_width_pixels = vupdate_vupdate_width_pixels;
+ seq_state->steps[*seq_state->num_steps].params.tg_program_global_sync_params.pstate_keepout_start_lines = pstate_keepout_start_lines;
+ seq_state->steps[*seq_state->num_steps].func = TG_PROGRAM_GLOBAL_SYNC;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add TG wait for state to block sequence
+ */
+void hwss_add_tg_wait_for_state(struct block_sequence_state *seq_state,
+ struct timing_generator *tg,
+ enum crtc_state state)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.tg_wait_for_state_params.tg = tg;
+ seq_state->steps[*seq_state->num_steps].params.tg_wait_for_state_params.state = state;
+ seq_state->steps[*seq_state->num_steps].func = TG_WAIT_FOR_STATE;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add TG set VTG params to block sequence
+ */
+void hwss_add_tg_set_vtg_params(struct block_sequence_state *seq_state,
+ struct timing_generator *tg,
+ struct dc_crtc_timing *dc_crtc_timing,
+ bool program_fp2)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.tg_set_vtg_params_params.tg = tg;
+ seq_state->steps[*seq_state->num_steps].params.tg_set_vtg_params_params.timing = dc_crtc_timing;
+ seq_state->steps[*seq_state->num_steps].params.tg_set_vtg_params_params.program_fp2 = program_fp2;
+ seq_state->steps[*seq_state->num_steps].func = TG_SET_VTG_PARAMS;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add TG setup vertical interrupt2 to block sequence
+ */
+void hwss_add_tg_setup_vertical_interrupt2(struct block_sequence_state *seq_state,
+ struct timing_generator *tg, int start_line)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.tg_setup_vertical_interrupt2_params.tg = tg;
+ seq_state->steps[*seq_state->num_steps].params.tg_setup_vertical_interrupt2_params.start_line = start_line;
+ seq_state->steps[*seq_state->num_steps].func = TG_SETUP_VERTICAL_INTERRUPT2;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add DPP set HDR multiplier to block sequence
+ */
+void hwss_add_dpp_set_hdr_multiplier(struct block_sequence_state *seq_state,
+ struct dpp *dpp, uint32_t hw_mult)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.dpp_set_hdr_multiplier_params.dpp = dpp;
+ seq_state->steps[*seq_state->num_steps].params.dpp_set_hdr_multiplier_params.hw_mult = hw_mult;
+ seq_state->steps[*seq_state->num_steps].func = DPP_SET_HDR_MULTIPLIER;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add HUBP program DET size to block sequence
+ */
+void hwss_add_hubp_program_det_size(struct block_sequence_state *seq_state,
+ struct hubbub *hubbub,
+ unsigned int hubp_inst,
+ unsigned int det_buffer_size_kb)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.program_det_size_params.hubbub = hubbub;
+ seq_state->steps[*seq_state->num_steps].params.program_det_size_params.hubp_inst = hubp_inst;
+ seq_state->steps[*seq_state->num_steps].params.program_det_size_params.det_buffer_size_kb = det_buffer_size_kb;
+ seq_state->steps[*seq_state->num_steps].func = HUBP_PROGRAM_DET_SIZE;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_hubp_program_mcache_id(struct block_sequence_state *seq_state,
+ struct hubp *hubp,
+ struct dml2_hubp_pipe_mcache_regs *mcache_regs)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.program_mcache_id_and_split_coordinate.hubp = hubp;
+ seq_state->steps[*seq_state->num_steps].params.program_mcache_id_and_split_coordinate.mcache_regs = mcache_regs;
+ seq_state->steps[*seq_state->num_steps].func = HUBP_PROGRAM_MCACHE_ID;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_hubbub_force_pstate_change_control(struct block_sequence_state *seq_state,
+ struct hubbub *hubbub,
+ bool enable,
+ bool wait)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.hubbub_force_pstate_change_control_params.hubbub = hubbub;
+ seq_state->steps[*seq_state->num_steps].params.hubbub_force_pstate_change_control_params.enable = enable;
+ seq_state->steps[*seq_state->num_steps].params.hubbub_force_pstate_change_control_params.wait = wait;
+ seq_state->steps[*seq_state->num_steps].func = HUBBUB_FORCE_PSTATE_CHANGE_CONTROL;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add HUBP program DET segments to block sequence
+ */
+void hwss_add_hubp_program_det_segments(struct block_sequence_state *seq_state,
+ struct hubbub *hubbub,
+ unsigned int hubp_inst,
+ unsigned int det_size)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.program_det_segments_params.hubbub = hubbub;
+ seq_state->steps[*seq_state->num_steps].params.program_det_segments_params.hubp_inst = hubp_inst;
+ seq_state->steps[*seq_state->num_steps].params.program_det_segments_params.det_size = det_size;
+ seq_state->steps[*seq_state->num_steps].func = HUBP_PROGRAM_DET_SEGMENTS;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add OPP set dynamic expansion to block sequence
+ */
+void hwss_add_opp_set_dyn_expansion(struct block_sequence_state *seq_state,
+ struct output_pixel_processor *opp,
+ enum dc_color_space color_space,
+ enum dc_color_depth color_depth,
+ enum signal_type signal)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.opp_set_dyn_expansion_params.opp = opp;
+ seq_state->steps[*seq_state->num_steps].params.opp_set_dyn_expansion_params.color_space = color_space;
+ seq_state->steps[*seq_state->num_steps].params.opp_set_dyn_expansion_params.color_depth = color_depth;
+ seq_state->steps[*seq_state->num_steps].params.opp_set_dyn_expansion_params.signal = signal;
+ seq_state->steps[*seq_state->num_steps].func = OPP_SET_DYN_EXPANSION;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add OPP program FMT to block sequence
+ */
+void hwss_add_opp_program_fmt(struct block_sequence_state *seq_state,
+ struct output_pixel_processor *opp,
+ struct bit_depth_reduction_params *fmt_bit_depth,
+ struct clamping_and_pixel_encoding_params *clamping)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.opp_program_fmt_params.opp = opp;
+ seq_state->steps[*seq_state->num_steps].params.opp_program_fmt_params.fmt_bit_depth = fmt_bit_depth;
+ seq_state->steps[*seq_state->num_steps].params.opp_program_fmt_params.clamping = clamping;
+ seq_state->steps[*seq_state->num_steps].func = OPP_PROGRAM_FMT;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_opp_program_left_edge_extra_pixel(struct block_sequence_state *seq_state,
+ struct output_pixel_processor *opp,
+ enum dc_pixel_encoding pixel_encoding,
+ bool is_otg_master)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = OPP_PROGRAM_LEFT_EDGE_EXTRA_PIXEL;
+ seq_state->steps[*seq_state->num_steps].params.opp_program_left_edge_extra_pixel_params.opp = opp;
+ seq_state->steps[*seq_state->num_steps].params.opp_program_left_edge_extra_pixel_params.pixel_encoding = pixel_encoding;
+ seq_state->steps[*seq_state->num_steps].params.opp_program_left_edge_extra_pixel_params.is_otg_master = is_otg_master;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add ABM set pipe to block sequence
+ */
+void hwss_add_abm_set_pipe(struct block_sequence_state *seq_state,
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.set_abm_pipe_params.dc = dc;
+ seq_state->steps[*seq_state->num_steps].params.set_abm_pipe_params.pipe_ctx = pipe_ctx;
+ seq_state->steps[*seq_state->num_steps].func = ABM_SET_PIPE;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add ABM set level to block sequence
+ */
+void hwss_add_abm_set_level(struct block_sequence_state *seq_state,
+ struct abm *abm,
+ uint32_t abm_level)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.set_abm_level_params.abm = abm;
+ seq_state->steps[*seq_state->num_steps].params.set_abm_level_params.abm_level = abm_level;
+ seq_state->steps[*seq_state->num_steps].func = ABM_SET_LEVEL;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add TG enable CRTC to block sequence
+ */
+void hwss_add_tg_enable_crtc(struct block_sequence_state *seq_state,
+ struct timing_generator *tg)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.tg_enable_crtc_params.tg = tg;
+ seq_state->steps[*seq_state->num_steps].func = TG_ENABLE_CRTC;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add HUBP wait flip pending to block sequence
+ */
+void hwss_add_hubp_wait_flip_pending(struct block_sequence_state *seq_state,
+ struct hubp *hubp,
+ unsigned int timeout_us,
+ unsigned int polling_interval_us)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.hubp_wait_flip_pending_params.hubp = hubp;
+ seq_state->steps[*seq_state->num_steps].params.hubp_wait_flip_pending_params.timeout_us = timeout_us;
+ seq_state->steps[*seq_state->num_steps].params.hubp_wait_flip_pending_params.polling_interval_us = polling_interval_us;
+ seq_state->steps[*seq_state->num_steps].func = HUBP_WAIT_FLIP_PENDING;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add TG wait double buffer pending to block sequence
+ */
+void hwss_add_tg_wait_double_buffer_pending(struct block_sequence_state *seq_state,
+ struct timing_generator *tg,
+ unsigned int timeout_us,
+ unsigned int polling_interval_us)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].params.tg_wait_double_buffer_pending_params.tg = tg;
+ seq_state->steps[*seq_state->num_steps].params.tg_wait_double_buffer_pending_params.timeout_us = timeout_us;
+ seq_state->steps[*seq_state->num_steps].params.tg_wait_double_buffer_pending_params.polling_interval_us = polling_interval_us;
+ seq_state->steps[*seq_state->num_steps].func = TG_WAIT_DOUBLE_BUFFER_PENDING;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_program_manual_trigger(union block_sequence_params *params)
+{
+ struct pipe_ctx *pipe_ctx = params->program_manual_trigger_params.pipe_ctx;
+
+ if (pipe_ctx->stream_res.tg->funcs->program_manual_trigger)
+ pipe_ctx->stream_res.tg->funcs->program_manual_trigger(pipe_ctx->stream_res.tg);
+}
+
+void hwss_setup_dpp(union block_sequence_params *params)
+{
+ struct pipe_ctx *pipe_ctx = params->setup_dpp_params.pipe_ctx;
+ struct dpp *dpp = pipe_ctx->plane_res.dpp;
+ struct dc_plane_state *plane_state = pipe_ctx->plane_state;
+
+ if (!plane_state)
+ return;
+
+ if (dpp && dpp->funcs->dpp_setup) {
+ // program the input csc
+ dpp->funcs->dpp_setup(dpp,
+ plane_state->format,
+ EXPANSION_MODE_ZERO,
+ plane_state->input_csc_color_matrix,
+ plane_state->color_space,
+ NULL);
+ }
+}
+
+void hwss_program_bias_and_scale(union block_sequence_params *params)
+{
+ struct pipe_ctx *pipe_ctx = params->program_bias_and_scale_params.pipe_ctx;
+ struct dpp *dpp = pipe_ctx->plane_res.dpp;
+ struct dc_plane_state *plane_state = pipe_ctx->plane_state;
+ struct dc_bias_and_scale bns_params = plane_state->bias_and_scale;
+
+ //TODO :for CNVC set scale and bias registers if necessary
+ if (dpp->funcs->dpp_program_bias_and_scale)
+ dpp->funcs->dpp_program_bias_and_scale(dpp, &bns_params);
+}
+
+void hwss_power_on_mpc_mem_pwr(union block_sequence_params *params)
+{
+ struct mpc *mpc = params->power_on_mpc_mem_pwr_params.mpc;
+ int mpcc_id = params->power_on_mpc_mem_pwr_params.mpcc_id;
+ bool power_on = params->power_on_mpc_mem_pwr_params.power_on;
+
+ if (mpc->funcs->power_on_mpc_mem_pwr)
+ mpc->funcs->power_on_mpc_mem_pwr(mpc, mpcc_id, power_on);
+}
+
+void hwss_set_output_csc(union block_sequence_params *params)
+{
+ struct mpc *mpc = params->set_output_csc_params.mpc;
+ int opp_id = params->set_output_csc_params.opp_id;
+ const uint16_t *matrix = params->set_output_csc_params.regval;
+ enum mpc_output_csc_mode ocsc_mode = params->set_output_csc_params.ocsc_mode;
+
+ if (mpc->funcs->set_output_csc != NULL)
+ mpc->funcs->set_output_csc(mpc,
+ opp_id,
+ matrix,
+ ocsc_mode);
+}
+
+void hwss_set_ocsc_default(union block_sequence_params *params)
+{
+ struct mpc *mpc = params->set_ocsc_default_params.mpc;
+ int opp_id = params->set_ocsc_default_params.opp_id;
+ enum dc_color_space colorspace = params->set_ocsc_default_params.color_space;
+ enum mpc_output_csc_mode ocsc_mode = params->set_ocsc_default_params.ocsc_mode;
+
+ if (mpc->funcs->set_ocsc_default != NULL)
+ mpc->funcs->set_ocsc_default(mpc,
+ opp_id,
+ colorspace,
+ ocsc_mode);
+}
+
+void hwss_subvp_save_surf_addr(union block_sequence_params *params)
+{
+ struct dc_dmub_srv *dc_dmub_srv = params->subvp_save_surf_addr.dc_dmub_srv;
+ const struct dc_plane_address *addr = params->subvp_save_surf_addr.addr;
+ uint8_t subvp_index = params->subvp_save_surf_addr.subvp_index;
+
+ dc_dmub_srv_subvp_save_surf_addr(dc_dmub_srv, addr, subvp_index);
+}
+
+void hwss_program_surface_config(union block_sequence_params *params)
+{
+ struct hubp *hubp = params->program_surface_config_params.hubp;
+ enum surface_pixel_format format = params->program_surface_config_params.format;
+ struct dc_tiling_info *tiling_info = params->program_surface_config_params.tiling_info;
+ struct plane_size size = params->program_surface_config_params.plane_size;
+ enum dc_rotation_angle rotation = params->program_surface_config_params.rotation;
+ struct dc_plane_dcc_param *dcc = params->program_surface_config_params.dcc;
+ bool horizontal_mirror = params->program_surface_config_params.horizontal_mirror;
+ int compat_level = params->program_surface_config_params.compat_level;
+
+ hubp->funcs->hubp_program_surface_config(
+ hubp,
+ format,
+ tiling_info,
+ &size,
+ rotation,
+ dcc,
+ horizontal_mirror,
+ compat_level);
+
+ hubp->power_gated = false;
+}
+
+void hwss_program_mcache_id_and_split_coordinate(union block_sequence_params *params)
+{
+ struct hubp *hubp = params->program_mcache_id_and_split_coordinate.hubp;
+ struct dml2_hubp_pipe_mcache_regs *mcache_regs = params->program_mcache_id_and_split_coordinate.mcache_regs;
+
+ hubp->funcs->hubp_program_mcache_id_and_split_coordinate(hubp, mcache_regs);
+
+}
+
void get_surface_tile_visual_confirm_color(
struct pipe_ctx *pipe_ctx,
struct tg_color *color)
@@ -424,3 +2072,2000 @@ void get_surface_tile_visual_confirm_color(
break;
}
}
+
+/**
+ * hwss_wait_for_all_blank_complete - wait for all active OPPs to finish pending blank
+ * pattern updates
+ *
+ * @dc: [in] dc reference
+ * @context: [in] hardware context in use
+ */
+void hwss_wait_for_all_blank_complete(struct dc *dc,
+ struct dc_state *context)
+{
+ struct pipe_ctx *opp_head;
+ struct dce_hwseq *hws = dc->hwseq;
+ int i;
+
+ if (!hws->funcs.wait_for_blank_complete)
+ return;
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ opp_head = &context->res_ctx.pipe_ctx[i];
+
+ if (!resource_is_pipe_type(opp_head, OPP_HEAD) ||
+ dc_state_get_pipe_subvp_type(context, opp_head) == SUBVP_PHANTOM)
+ continue;
+
+ hws->funcs.wait_for_blank_complete(opp_head->stream_res.opp);
+ }
+}
+
+void hwss_wait_for_odm_update_pending_complete(struct dc *dc, struct dc_state *context)
+{
+ struct pipe_ctx *otg_master;
+ struct timing_generator *tg;
+ int i;
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ otg_master = &context->res_ctx.pipe_ctx[i];
+ if (!resource_is_pipe_type(otg_master, OTG_MASTER) ||
+ dc_state_get_pipe_subvp_type(context, otg_master) == SUBVP_PHANTOM)
+ continue;
+ tg = otg_master->stream_res.tg;
+ if (tg->funcs->wait_odm_doublebuffer_pending_clear)
+ tg->funcs->wait_odm_doublebuffer_pending_clear(tg);
+ if (tg->funcs->wait_otg_disable)
+ tg->funcs->wait_otg_disable(tg);
+ }
+
+ /* ODM update may require to reprogram blank pattern for each OPP */
+ hwss_wait_for_all_blank_complete(dc, context);
+}
+
+void hwss_wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context)
+{
+ int i;
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ int count = 0;
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe->plane_state || dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM)
+ continue;
+
+ /* Timeout 100 ms */
+ while (count < 100000) {
+ /* Must set to false to start with, due to OR in update function */
+ pipe->plane_state->status.is_flip_pending = false;
+ dc->hwss.update_pending_status(pipe);
+ if (!pipe->plane_state->status.is_flip_pending)
+ break;
+ udelay(1);
+ count++;
+ }
+ ASSERT(!pipe->plane_state->status.is_flip_pending);
+ }
+}
+
+void hwss_wait_for_outstanding_hw_updates(struct dc *dc, struct dc_state *dc_context)
+{
+/*
+ * This function calls HWSS to wait for any potentially double buffered
+ * operations to complete. It should be invoked as a pre-amble prior
+ * to full update programming before asserting any HW locks.
+ */
+ int pipe_idx;
+ int opp_inst;
+ int opp_count = dc->res_pool->res_cap->num_opp;
+ struct hubp *hubp;
+ int mpcc_inst;
+ const struct pipe_ctx *pipe_ctx;
+
+ for (pipe_idx = 0; pipe_idx < dc->res_pool->pipe_count; pipe_idx++) {
+ pipe_ctx = &dc_context->res_ctx.pipe_ctx[pipe_idx];
+
+ if (!pipe_ctx->stream)
+ continue;
+
+ /* For full update we must wait for all double buffer updates, not just DRR updates. This
+ * is particularly important for minimal transitions. Only check for OTG_MASTER pipes,
+ * as non-OTG Master pipes share the same OTG as
+ */
+ if (resource_is_pipe_type(pipe_ctx, OTG_MASTER) && dc->hwss.wait_for_all_pending_updates) {
+ dc->hwss.wait_for_all_pending_updates(pipe_ctx);
+ }
+
+ hubp = pipe_ctx->plane_res.hubp;
+ if (!hubp)
+ continue;
+
+ mpcc_inst = hubp->inst;
+ // MPCC inst is equal to pipe index in practice
+ for (opp_inst = 0; opp_inst < opp_count; opp_inst++) {
+ if ((dc->res_pool->opps[opp_inst] != NULL) &&
+ (dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst])) {
+ dc->res_pool->mpc->funcs->wait_for_idle(dc->res_pool->mpc, mpcc_inst);
+ dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst] = false;
+ break;
+ }
+ }
+ }
+ hwss_wait_for_odm_update_pending_complete(dc, dc_context);
+}
+
+void hwss_process_outstanding_hw_updates(struct dc *dc, struct dc_state *dc_context)
+{
+ /* wait for outstanding updates */
+ hwss_wait_for_outstanding_hw_updates(dc, dc_context);
+
+ /* perform outstanding post update programming */
+ if (dc->hwss.program_outstanding_updates)
+ dc->hwss.program_outstanding_updates(dc, dc_context);
+}
+
+void hwss_set_odm_combine(union block_sequence_params *params)
+{
+ struct timing_generator *tg = params->set_odm_combine_params.tg;
+ int *opp_inst = params->set_odm_combine_params.opp_inst;
+ int opp_head_count = params->set_odm_combine_params.opp_head_count;
+ int odm_slice_width = params->set_odm_combine_params.odm_slice_width;
+ int last_odm_slice_width = params->set_odm_combine_params.last_odm_slice_width;
+
+ if (tg && tg->funcs->set_odm_combine)
+ tg->funcs->set_odm_combine(tg, opp_inst, opp_head_count,
+ odm_slice_width, last_odm_slice_width);
+}
+
+void hwss_set_odm_bypass(union block_sequence_params *params)
+{
+ struct timing_generator *tg = params->set_odm_bypass_params.tg;
+ const struct dc_crtc_timing *timing = params->set_odm_bypass_params.timing;
+
+ if (tg && tg->funcs->set_odm_bypass)
+ tg->funcs->set_odm_bypass(tg, timing);
+}
+
+void hwss_opp_pipe_clock_control(union block_sequence_params *params)
+{
+ struct output_pixel_processor *opp = params->opp_pipe_clock_control_params.opp;
+ bool enable = params->opp_pipe_clock_control_params.enable;
+
+ if (opp && opp->funcs->opp_pipe_clock_control)
+ opp->funcs->opp_pipe_clock_control(opp, enable);
+}
+
+void hwss_opp_program_left_edge_extra_pixel(union block_sequence_params *params)
+{
+ struct output_pixel_processor *opp = params->opp_program_left_edge_extra_pixel_params.opp;
+ enum dc_pixel_encoding pixel_encoding = params->opp_program_left_edge_extra_pixel_params.pixel_encoding;
+ bool is_otg_master = params->opp_program_left_edge_extra_pixel_params.is_otg_master;
+
+ if (opp && opp->funcs->opp_program_left_edge_extra_pixel)
+ opp->funcs->opp_program_left_edge_extra_pixel(opp, pixel_encoding, is_otg_master);
+}
+
+void hwss_dccg_set_dto_dscclk(union block_sequence_params *params)
+{
+ struct dccg *dccg = params->dccg_set_dto_dscclk_params.dccg;
+ int inst = params->dccg_set_dto_dscclk_params.inst;
+ int num_slices_h = params->dccg_set_dto_dscclk_params.num_slices_h;
+
+ if (dccg && dccg->funcs->set_dto_dscclk)
+ dccg->funcs->set_dto_dscclk(dccg, inst, num_slices_h);
+}
+
+void hwss_dsc_set_config(union block_sequence_params *params)
+{
+ struct display_stream_compressor *dsc = params->dsc_set_config_params.dsc;
+ struct dsc_config *dsc_cfg = params->dsc_set_config_params.dsc_cfg;
+ struct dsc_optc_config *dsc_optc_cfg = params->dsc_set_config_params.dsc_optc_cfg;
+
+ if (dsc && dsc->funcs->dsc_set_config)
+ dsc->funcs->dsc_set_config(dsc, dsc_cfg, dsc_optc_cfg);
+}
+
+void hwss_dsc_enable(union block_sequence_params *params)
+{
+ struct display_stream_compressor *dsc = params->dsc_enable_params.dsc;
+ int opp_inst = params->dsc_enable_params.opp_inst;
+
+ if (dsc && dsc->funcs->dsc_enable)
+ dsc->funcs->dsc_enable(dsc, opp_inst);
+}
+
+void hwss_tg_set_dsc_config(union block_sequence_params *params)
+{
+ struct timing_generator *tg = params->tg_set_dsc_config_params.tg;
+ enum optc_dsc_mode optc_dsc_mode = OPTC_DSC_DISABLED;
+ uint32_t bytes_per_pixel = 0;
+ uint32_t slice_width = 0;
+
+ if (params->tg_set_dsc_config_params.enable) {
+ struct dsc_optc_config *dsc_optc_cfg = params->tg_set_dsc_config_params.dsc_optc_cfg;
+
+ if (dsc_optc_cfg) {
+ bytes_per_pixel = dsc_optc_cfg->bytes_per_pixel;
+ slice_width = dsc_optc_cfg->slice_width;
+ optc_dsc_mode = dsc_optc_cfg->is_pixel_format_444 ?
+ OPTC_DSC_ENABLED_444 : OPTC_DSC_ENABLED_NATIVE_SUBSAMPLED;
+ }
+ }
+
+ if (tg && tg->funcs->set_dsc_config)
+ tg->funcs->set_dsc_config(tg, optc_dsc_mode, bytes_per_pixel, slice_width);
+}
+
+void hwss_dsc_disconnect(union block_sequence_params *params)
+{
+ struct display_stream_compressor *dsc = params->dsc_disconnect_params.dsc;
+
+ if (dsc && dsc->funcs->dsc_disconnect)
+ dsc->funcs->dsc_disconnect(dsc);
+}
+
+void hwss_dsc_read_state(union block_sequence_params *params)
+{
+ struct display_stream_compressor *dsc = params->dsc_read_state_params.dsc;
+ struct dcn_dsc_state *dsc_state = params->dsc_read_state_params.dsc_state;
+
+ if (dsc && dsc->funcs->dsc_read_state)
+ dsc->funcs->dsc_read_state(dsc, dsc_state);
+}
+
+void hwss_dsc_calculate_and_set_config(union block_sequence_params *params)
+{
+ struct pipe_ctx *pipe_ctx = params->dsc_calculate_and_set_config_params.pipe_ctx;
+ struct pipe_ctx *top_pipe = pipe_ctx;
+ bool enable = params->dsc_calculate_and_set_config_params.enable;
+ int opp_cnt = params->dsc_calculate_and_set_config_params.opp_cnt;
+
+ struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
+ struct dc_stream_state *stream = pipe_ctx->stream;
+
+ if (!dsc || !enable)
+ return;
+
+ /* Calculate DSC configuration - extracted from dcn32_update_dsc_on_stream */
+ struct dsc_config dsc_cfg;
+
+ while (top_pipe->prev_odm_pipe)
+ top_pipe = top_pipe->prev_odm_pipe;
+
+ dsc_cfg.pic_width = (stream->timing.h_addressable + top_pipe->dsc_padding_params.dsc_hactive_padding +
+ stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt;
+ dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + stream->timing.v_border_bottom;
+ dsc_cfg.pixel_encoding = stream->timing.pixel_encoding;
+ dsc_cfg.color_depth = stream->timing.display_color_depth;
+ dsc_cfg.is_odm = top_pipe->next_odm_pipe ? true : false;
+ dsc_cfg.dc_dsc_cfg = stream->timing.dsc_cfg;
+ dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt;
+ dsc_cfg.dsc_padding = top_pipe->dsc_padding_params.dsc_hactive_padding;
+
+ /* Set DSC configuration */
+ if (dsc->funcs->dsc_set_config)
+ dsc->funcs->dsc_set_config(dsc, &dsc_cfg,
+ &params->dsc_calculate_and_set_config_params.dsc_optc_cfg);
+}
+
+void hwss_dsc_enable_with_opp(union block_sequence_params *params)
+{
+ struct pipe_ctx *pipe_ctx = params->dsc_enable_with_opp_params.pipe_ctx;
+ struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
+
+ if (dsc && dsc->funcs->dsc_enable)
+ dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst);
+}
+
+void hwss_tg_program_global_sync(union block_sequence_params *params)
+{
+ struct timing_generator *tg = params->tg_program_global_sync_params.tg;
+ int vready_offset = params->tg_program_global_sync_params.vready_offset;
+ unsigned int vstartup_lines = params->tg_program_global_sync_params.vstartup_lines;
+ unsigned int vupdate_offset_pixels = params->tg_program_global_sync_params.vupdate_offset_pixels;
+ unsigned int vupdate_vupdate_width_pixels = params->tg_program_global_sync_params.vupdate_vupdate_width_pixels;
+ unsigned int pstate_keepout_start_lines = params->tg_program_global_sync_params.pstate_keepout_start_lines;
+
+ if (tg->funcs->program_global_sync) {
+ tg->funcs->program_global_sync(tg, vready_offset, vstartup_lines,
+ vupdate_offset_pixels, vupdate_vupdate_width_pixels, pstate_keepout_start_lines);
+ }
+}
+
+void hwss_tg_wait_for_state(union block_sequence_params *params)
+{
+ struct timing_generator *tg = params->tg_wait_for_state_params.tg;
+ enum crtc_state state = params->tg_wait_for_state_params.state;
+
+ if (tg->funcs->wait_for_state)
+ tg->funcs->wait_for_state(tg, state);
+}
+
+void hwss_tg_set_vtg_params(union block_sequence_params *params)
+{
+ struct timing_generator *tg = params->tg_set_vtg_params_params.tg;
+ struct dc_crtc_timing *timing = params->tg_set_vtg_params_params.timing;
+ bool program_fp2 = params->tg_set_vtg_params_params.program_fp2;
+
+ if (tg->funcs->set_vtg_params)
+ tg->funcs->set_vtg_params(tg, timing, program_fp2);
+}
+
+void hwss_tg_setup_vertical_interrupt2(union block_sequence_params *params)
+{
+ struct timing_generator *tg = params->tg_setup_vertical_interrupt2_params.tg;
+ int start_line = params->tg_setup_vertical_interrupt2_params.start_line;
+
+ if (tg->funcs->setup_vertical_interrupt2)
+ tg->funcs->setup_vertical_interrupt2(tg, start_line);
+}
+
+void hwss_dpp_set_hdr_multiplier(union block_sequence_params *params)
+{
+ struct dpp *dpp = params->dpp_set_hdr_multiplier_params.dpp;
+ uint32_t hw_mult = params->dpp_set_hdr_multiplier_params.hw_mult;
+
+ if (dpp->funcs->dpp_set_hdr_multiplier)
+ dpp->funcs->dpp_set_hdr_multiplier(dpp, hw_mult);
+}
+
+void hwss_program_det_size(union block_sequence_params *params)
+{
+ struct hubbub *hubbub = params->program_det_size_params.hubbub;
+ unsigned int hubp_inst = params->program_det_size_params.hubp_inst;
+ unsigned int det_buffer_size_kb = params->program_det_size_params.det_buffer_size_kb;
+
+ if (hubbub->funcs->program_det_size)
+ hubbub->funcs->program_det_size(hubbub, hubp_inst, det_buffer_size_kb);
+}
+
+void hwss_program_det_segments(union block_sequence_params *params)
+{
+ struct hubbub *hubbub = params->program_det_segments_params.hubbub;
+ unsigned int hubp_inst = params->program_det_segments_params.hubp_inst;
+ unsigned int det_size = params->program_det_segments_params.det_size;
+
+ if (hubbub->funcs->program_det_segments)
+ hubbub->funcs->program_det_segments(hubbub, hubp_inst, det_size);
+}
+
+void hwss_opp_set_dyn_expansion(union block_sequence_params *params)
+{
+ struct output_pixel_processor *opp = params->opp_set_dyn_expansion_params.opp;
+ enum dc_color_space color_space = params->opp_set_dyn_expansion_params.color_space;
+ enum dc_color_depth color_depth = params->opp_set_dyn_expansion_params.color_depth;
+ enum signal_type signal = params->opp_set_dyn_expansion_params.signal;
+
+ if (opp->funcs->opp_set_dyn_expansion)
+ opp->funcs->opp_set_dyn_expansion(opp, color_space, color_depth, signal);
+}
+
+void hwss_opp_program_fmt(union block_sequence_params *params)
+{
+ struct output_pixel_processor *opp = params->opp_program_fmt_params.opp;
+ struct bit_depth_reduction_params *fmt_bit_depth = params->opp_program_fmt_params.fmt_bit_depth;
+ struct clamping_and_pixel_encoding_params *clamping = params->opp_program_fmt_params.clamping;
+
+ if (opp->funcs->opp_program_fmt)
+ opp->funcs->opp_program_fmt(opp, fmt_bit_depth, clamping);
+}
+
+void hwss_opp_program_bit_depth_reduction(union block_sequence_params *params)
+{
+ struct output_pixel_processor *opp = params->opp_program_bit_depth_reduction_params.opp;
+ bool use_default_params = params->opp_program_bit_depth_reduction_params.use_default_params;
+ struct pipe_ctx *pipe_ctx = params->opp_program_bit_depth_reduction_params.pipe_ctx;
+ struct bit_depth_reduction_params bit_depth_params;
+
+ if (use_default_params)
+ memset(&bit_depth_params, 0, sizeof(bit_depth_params));
+ else
+ resource_build_bit_depth_reduction_params(pipe_ctx->stream, &bit_depth_params);
+
+ if (opp->funcs->opp_program_bit_depth_reduction)
+ opp->funcs->opp_program_bit_depth_reduction(opp, &bit_depth_params);
+}
+
+void hwss_opp_set_disp_pattern_generator(union block_sequence_params *params)
+{
+ struct output_pixel_processor *opp = params->opp_set_disp_pattern_generator_params.opp;
+ enum controller_dp_test_pattern test_pattern = params->opp_set_disp_pattern_generator_params.test_pattern;
+ enum controller_dp_color_space color_space = params->opp_set_disp_pattern_generator_params.color_space;
+ enum dc_color_depth color_depth = params->opp_set_disp_pattern_generator_params.color_depth;
+ struct tg_color *solid_color = params->opp_set_disp_pattern_generator_params.use_solid_color ?
+ &params->opp_set_disp_pattern_generator_params.solid_color : NULL;
+ int width = params->opp_set_disp_pattern_generator_params.width;
+ int height = params->opp_set_disp_pattern_generator_params.height;
+ int offset = params->opp_set_disp_pattern_generator_params.offset;
+
+ if (opp && opp->funcs->opp_set_disp_pattern_generator) {
+ opp->funcs->opp_set_disp_pattern_generator(opp, test_pattern, color_space,
+ color_depth, solid_color, width, height, offset);
+ }
+}
+
+void hwss_set_abm_pipe(union block_sequence_params *params)
+{
+ struct dc *dc = params->set_abm_pipe_params.dc;
+ struct pipe_ctx *pipe_ctx = params->set_abm_pipe_params.pipe_ctx;
+
+ dc->hwss.set_pipe(pipe_ctx);
+}
+
+void hwss_set_abm_level(union block_sequence_params *params)
+{
+ struct abm *abm = params->set_abm_level_params.abm;
+ unsigned int abm_level = params->set_abm_level_params.abm_level;
+
+ if (abm->funcs->set_abm_level)
+ abm->funcs->set_abm_level(abm, abm_level);
+}
+
+void hwss_set_abm_immediate_disable(union block_sequence_params *params)
+{
+ struct dc *dc = params->set_abm_immediate_disable_params.dc;
+ struct pipe_ctx *pipe_ctx = params->set_abm_immediate_disable_params.pipe_ctx;
+
+ if (dc && dc->hwss.set_abm_immediate_disable)
+ dc->hwss.set_abm_immediate_disable(pipe_ctx);
+}
+
+void hwss_mpc_remove_mpcc(union block_sequence_params *params)
+{
+ struct mpc *mpc = params->mpc_remove_mpcc_params.mpc;
+ struct mpc_tree *mpc_tree_params = params->mpc_remove_mpcc_params.mpc_tree_params;
+ struct mpcc *mpcc_to_remove = params->mpc_remove_mpcc_params.mpcc_to_remove;
+
+ mpc->funcs->remove_mpcc(mpc, mpc_tree_params, mpcc_to_remove);
+}
+
+void hwss_opp_set_mpcc_disconnect_pending(union block_sequence_params *params)
+{
+ struct output_pixel_processor *opp = params->opp_set_mpcc_disconnect_pending_params.opp;
+ int mpcc_inst = params->opp_set_mpcc_disconnect_pending_params.mpcc_inst;
+ bool pending = params->opp_set_mpcc_disconnect_pending_params.pending;
+
+ opp->mpcc_disconnect_pending[mpcc_inst] = pending;
+}
+
+void hwss_dc_set_optimized_required(union block_sequence_params *params)
+{
+ struct dc *dc = params->dc_set_optimized_required_params.dc;
+ bool optimized_required = params->dc_set_optimized_required_params.optimized_required;
+
+ dc->optimized_required = optimized_required;
+}
+
+void hwss_hubp_disconnect(union block_sequence_params *params)
+{
+ struct hubp *hubp = params->hubp_disconnect_params.hubp;
+
+ if (hubp->funcs->hubp_disconnect)
+ hubp->funcs->hubp_disconnect(hubp);
+}
+
+void hwss_hubbub_force_pstate_change_control(union block_sequence_params *params)
+{
+ struct hubbub *hubbub = params->hubbub_force_pstate_change_control_params.hubbub;
+ bool enable = params->hubbub_force_pstate_change_control_params.enable;
+ bool wait = params->hubbub_force_pstate_change_control_params.wait;
+
+ if (hubbub->funcs->force_pstate_change_control) {
+ hubbub->funcs->force_pstate_change_control(hubbub, enable, wait);
+ /* Add delay when enabling pstate change control */
+ if (enable)
+ udelay(500);
+ }
+}
+
+void hwss_tg_enable_crtc(union block_sequence_params *params)
+{
+ struct timing_generator *tg = params->tg_enable_crtc_params.tg;
+
+ if (tg->funcs->enable_crtc)
+ tg->funcs->enable_crtc(tg);
+}
+
+void hwss_tg_set_gsl(union block_sequence_params *params)
+{
+ struct timing_generator *tg = params->tg_set_gsl_params.tg;
+ struct gsl_params *gsl = &params->tg_set_gsl_params.gsl;
+
+ if (tg->funcs->set_gsl)
+ tg->funcs->set_gsl(tg, gsl);
+}
+
+void hwss_tg_set_gsl_source_select(union block_sequence_params *params)
+{
+ struct timing_generator *tg = params->tg_set_gsl_source_select_params.tg;
+ int group_idx = params->tg_set_gsl_source_select_params.group_idx;
+ uint32_t gsl_ready_signal = params->tg_set_gsl_source_select_params.gsl_ready_signal;
+
+ if (tg->funcs->set_gsl_source_select)
+ tg->funcs->set_gsl_source_select(tg, group_idx, gsl_ready_signal);
+}
+
+void hwss_hubp_wait_flip_pending(union block_sequence_params *params)
+{
+ struct hubp *hubp = params->hubp_wait_flip_pending_params.hubp;
+ unsigned int timeout_us = params->hubp_wait_flip_pending_params.timeout_us;
+ unsigned int polling_interval_us = params->hubp_wait_flip_pending_params.polling_interval_us;
+ int j = 0;
+
+ for (j = 0; j < timeout_us / polling_interval_us
+ && hubp->funcs->hubp_is_flip_pending(hubp); j++)
+ udelay(polling_interval_us);
+}
+
+void hwss_tg_wait_double_buffer_pending(union block_sequence_params *params)
+{
+ struct timing_generator *tg = params->tg_wait_double_buffer_pending_params.tg;
+ unsigned int timeout_us = params->tg_wait_double_buffer_pending_params.timeout_us;
+ unsigned int polling_interval_us = params->tg_wait_double_buffer_pending_params.polling_interval_us;
+ int j = 0;
+
+ if (tg->funcs->get_optc_double_buffer_pending) {
+ for (j = 0; j < timeout_us / polling_interval_us
+ && tg->funcs->get_optc_double_buffer_pending(tg); j++)
+ udelay(polling_interval_us);
+ }
+}
+
+void hwss_update_force_pstate(union block_sequence_params *params)
+{
+ struct dc *dc = params->update_force_pstate_params.dc;
+ struct dc_state *context = params->update_force_pstate_params.context;
+ struct dce_hwseq *hwseq = dc->hwseq;
+
+ if (hwseq->funcs.update_force_pstate)
+ hwseq->funcs.update_force_pstate(dc, context);
+}
+
+void hwss_hubbub_apply_dedcn21_147_wa(union block_sequence_params *params)
+{
+ struct hubbub *hubbub = params->hubbub_apply_dedcn21_147_wa_params.hubbub;
+
+ hubbub->funcs->apply_DEDCN21_147_wa(hubbub);
+}
+
+void hwss_hubbub_allow_self_refresh_control(union block_sequence_params *params)
+{
+ struct hubbub *hubbub = params->hubbub_allow_self_refresh_control_params.hubbub;
+ bool allow = params->hubbub_allow_self_refresh_control_params.allow;
+
+ hubbub->funcs->allow_self_refresh_control(hubbub, allow);
+
+ if (!allow && params->hubbub_allow_self_refresh_control_params.disallow_self_refresh_applied)
+ *params->hubbub_allow_self_refresh_control_params.disallow_self_refresh_applied = true;
+}
+
+void hwss_tg_get_frame_count(union block_sequence_params *params)
+{
+ struct timing_generator *tg = params->tg_get_frame_count_params.tg;
+ unsigned int *frame_count = params->tg_get_frame_count_params.frame_count;
+
+ *frame_count = tg->funcs->get_frame_count(tg);
+}
+
+void hwss_mpc_set_dwb_mux(union block_sequence_params *params)
+{
+ struct mpc *mpc = params->mpc_set_dwb_mux_params.mpc;
+ int dwb_id = params->mpc_set_dwb_mux_params.dwb_id;
+ int mpcc_id = params->mpc_set_dwb_mux_params.mpcc_id;
+
+ if (mpc->funcs->set_dwb_mux)
+ mpc->funcs->set_dwb_mux(mpc, dwb_id, mpcc_id);
+}
+
+void hwss_mpc_disable_dwb_mux(union block_sequence_params *params)
+{
+ struct mpc *mpc = params->mpc_disable_dwb_mux_params.mpc;
+ unsigned int dwb_id = params->mpc_disable_dwb_mux_params.dwb_id;
+
+ if (mpc->funcs->disable_dwb_mux)
+ mpc->funcs->disable_dwb_mux(mpc, dwb_id);
+}
+
+void hwss_mcif_wb_config_buf(union block_sequence_params *params)
+{
+ struct mcif_wb *mcif_wb = params->mcif_wb_config_buf_params.mcif_wb;
+ struct mcif_buf_params *mcif_buf_params = params->mcif_wb_config_buf_params.mcif_buf_params;
+ unsigned int dest_height = params->mcif_wb_config_buf_params.dest_height;
+
+ if (mcif_wb->funcs->config_mcif_buf)
+ mcif_wb->funcs->config_mcif_buf(mcif_wb, mcif_buf_params, dest_height);
+}
+
+void hwss_mcif_wb_config_arb(union block_sequence_params *params)
+{
+ struct mcif_wb *mcif_wb = params->mcif_wb_config_arb_params.mcif_wb;
+ struct mcif_arb_params *mcif_arb_params = params->mcif_wb_config_arb_params.mcif_arb_params;
+
+ if (mcif_wb->funcs->config_mcif_arb)
+ mcif_wb->funcs->config_mcif_arb(mcif_wb, mcif_arb_params);
+}
+
+void hwss_mcif_wb_enable(union block_sequence_params *params)
+{
+ struct mcif_wb *mcif_wb = params->mcif_wb_enable_params.mcif_wb;
+
+ if (mcif_wb->funcs->enable_mcif)
+ mcif_wb->funcs->enable_mcif(mcif_wb);
+}
+
+void hwss_mcif_wb_disable(union block_sequence_params *params)
+{
+ struct mcif_wb *mcif_wb = params->mcif_wb_disable_params.mcif_wb;
+
+ if (mcif_wb->funcs->disable_mcif)
+ mcif_wb->funcs->disable_mcif(mcif_wb);
+}
+
+void hwss_dwbc_enable(union block_sequence_params *params)
+{
+ struct dwbc *dwb = params->dwbc_enable_params.dwb;
+ struct dc_dwb_params *dwb_params = params->dwbc_enable_params.dwb_params;
+
+ if (dwb->funcs->enable)
+ dwb->funcs->enable(dwb, dwb_params);
+}
+
+void hwss_dwbc_disable(union block_sequence_params *params)
+{
+ struct dwbc *dwb = params->dwbc_disable_params.dwb;
+
+ if (dwb->funcs->disable)
+ dwb->funcs->disable(dwb);
+}
+
+void hwss_dwbc_update(union block_sequence_params *params)
+{
+ struct dwbc *dwb = params->dwbc_update_params.dwb;
+ struct dc_dwb_params *dwb_params = params->dwbc_update_params.dwb_params;
+
+ if (dwb->funcs->update)
+ dwb->funcs->update(dwb, dwb_params);
+}
+
+void hwss_hubp_update_mall_sel(union block_sequence_params *params)
+{
+ struct hubp *hubp = params->hubp_update_mall_sel_params.hubp;
+ uint32_t mall_sel = params->hubp_update_mall_sel_params.mall_sel;
+ bool cache_cursor = params->hubp_update_mall_sel_params.cache_cursor;
+
+ if (hubp && hubp->funcs->hubp_update_mall_sel)
+ hubp->funcs->hubp_update_mall_sel(hubp, mall_sel, cache_cursor);
+}
+
+void hwss_hubp_prepare_subvp_buffering(union block_sequence_params *params)
+{
+ struct hubp *hubp = params->hubp_prepare_subvp_buffering_params.hubp;
+ bool enable = params->hubp_prepare_subvp_buffering_params.enable;
+
+ if (hubp && hubp->funcs->hubp_prepare_subvp_buffering)
+ hubp->funcs->hubp_prepare_subvp_buffering(hubp, enable);
+}
+
+void hwss_hubp_set_blank_en(union block_sequence_params *params)
+{
+ struct hubp *hubp = params->hubp_set_blank_en_params.hubp;
+ bool enable = params->hubp_set_blank_en_params.enable;
+
+ if (hubp && hubp->funcs->set_hubp_blank_en)
+ hubp->funcs->set_hubp_blank_en(hubp, enable);
+}
+
+void hwss_hubp_disable_control(union block_sequence_params *params)
+{
+ struct hubp *hubp = params->hubp_disable_control_params.hubp;
+ bool disable = params->hubp_disable_control_params.disable;
+
+ if (hubp && hubp->funcs->hubp_disable_control)
+ hubp->funcs->hubp_disable_control(hubp, disable);
+}
+
+void hwss_hubbub_soft_reset(union block_sequence_params *params)
+{
+ struct hubbub *hubbub = params->hubbub_soft_reset_params.hubbub;
+ bool reset = params->hubbub_soft_reset_params.reset;
+
+ if (hubbub)
+ params->hubbub_soft_reset_params.hubbub_soft_reset(hubbub, reset);
+}
+
+void hwss_hubp_clk_cntl(union block_sequence_params *params)
+{
+ struct hubp *hubp = params->hubp_clk_cntl_params.hubp;
+ bool enable = params->hubp_clk_cntl_params.enable;
+
+ if (hubp && hubp->funcs->hubp_clk_cntl) {
+ hubp->funcs->hubp_clk_cntl(hubp, enable);
+ hubp->power_gated = !enable;
+ }
+}
+
+void hwss_hubp_init(union block_sequence_params *params)
+{
+ struct hubp *hubp = params->hubp_init_params.hubp;
+
+ if (hubp && hubp->funcs->hubp_init)
+ hubp->funcs->hubp_init(hubp);
+}
+
+void hwss_hubp_set_vm_system_aperture_settings(union block_sequence_params *params)
+{
+ struct hubp *hubp = params->hubp_set_vm_system_aperture_settings_params.hubp;
+ struct vm_system_aperture_param apt;
+
+ apt.sys_default = params->hubp_set_vm_system_aperture_settings_params.sys_default;
+ apt.sys_high = params->hubp_set_vm_system_aperture_settings_params.sys_high;
+ apt.sys_low = params->hubp_set_vm_system_aperture_settings_params.sys_low;
+
+ if (hubp && hubp->funcs->hubp_set_vm_system_aperture_settings)
+ hubp->funcs->hubp_set_vm_system_aperture_settings(hubp, &apt);
+}
+
+void hwss_hubp_set_flip_int(union block_sequence_params *params)
+{
+ struct hubp *hubp = params->hubp_set_flip_int_params.hubp;
+
+ if (hubp && hubp->funcs->hubp_set_flip_int)
+ hubp->funcs->hubp_set_flip_int(hubp);
+}
+
+void hwss_dpp_dppclk_control(union block_sequence_params *params)
+{
+ struct dpp *dpp = params->dpp_dppclk_control_params.dpp;
+ bool dppclk_div = params->dpp_dppclk_control_params.dppclk_div;
+ bool enable = params->dpp_dppclk_control_params.enable;
+
+ if (dpp && dpp->funcs->dpp_dppclk_control)
+ dpp->funcs->dpp_dppclk_control(dpp, dppclk_div, enable);
+}
+
+void hwss_disable_phantom_crtc(union block_sequence_params *params)
+{
+ struct timing_generator *tg = params->disable_phantom_crtc_params.tg;
+
+ if (tg && tg->funcs->disable_phantom_crtc)
+ tg->funcs->disable_phantom_crtc(tg);
+}
+
+void hwss_dsc_pg_status(union block_sequence_params *params)
+{
+ struct dce_hwseq *hws = params->dsc_pg_status_params.hws;
+ int dsc_inst = params->dsc_pg_status_params.dsc_inst;
+
+ if (hws && hws->funcs.dsc_pg_status)
+ params->dsc_pg_status_params.is_ungated = hws->funcs.dsc_pg_status(hws, dsc_inst);
+}
+
+void hwss_dsc_wait_disconnect_pending_clear(union block_sequence_params *params)
+{
+ struct display_stream_compressor *dsc = params->dsc_wait_disconnect_pending_clear_params.dsc;
+
+ if (!params->dsc_wait_disconnect_pending_clear_params.is_ungated)
+ return;
+ if (*params->dsc_wait_disconnect_pending_clear_params.is_ungated == false)
+ return;
+
+ if (dsc && dsc->funcs->dsc_wait_disconnect_pending_clear)
+ dsc->funcs->dsc_wait_disconnect_pending_clear(dsc);
+}
+
+void hwss_dsc_disable(union block_sequence_params *params)
+{
+ struct display_stream_compressor *dsc = params->dsc_disable_params.dsc;
+
+ if (!params->dsc_disable_params.is_ungated)
+ return;
+ if (*params->dsc_disable_params.is_ungated == false)
+ return;
+
+ if (dsc && dsc->funcs->dsc_disable)
+ dsc->funcs->dsc_disable(dsc);
+}
+
+void hwss_dccg_set_ref_dscclk(union block_sequence_params *params)
+{
+ struct dccg *dccg = params->dccg_set_ref_dscclk_params.dccg;
+ int dsc_inst = params->dccg_set_ref_dscclk_params.dsc_inst;
+
+ if (!params->dccg_set_ref_dscclk_params.is_ungated)
+ return;
+ if (*params->dccg_set_ref_dscclk_params.is_ungated == false)
+ return;
+
+ if (dccg && dccg->funcs->set_ref_dscclk)
+ dccg->funcs->set_ref_dscclk(dccg, dsc_inst);
+}
+
+void hwss_dpp_pg_control(union block_sequence_params *params)
+{
+ struct dce_hwseq *hws = params->dpp_pg_control_params.hws;
+ unsigned int dpp_inst = params->dpp_pg_control_params.dpp_inst;
+ bool power_on = params->dpp_pg_control_params.power_on;
+
+ if (hws->funcs.dpp_pg_control)
+ hws->funcs.dpp_pg_control(hws, dpp_inst, power_on);
+}
+
+void hwss_hubp_pg_control(union block_sequence_params *params)
+{
+ struct dce_hwseq *hws = params->hubp_pg_control_params.hws;
+ unsigned int hubp_inst = params->hubp_pg_control_params.hubp_inst;
+ bool power_on = params->hubp_pg_control_params.power_on;
+
+ if (hws->funcs.hubp_pg_control)
+ hws->funcs.hubp_pg_control(hws, hubp_inst, power_on);
+}
+
+void hwss_hubp_reset(union block_sequence_params *params)
+{
+ struct hubp *hubp = params->hubp_reset_params.hubp;
+
+ if (hubp && hubp->funcs->hubp_reset)
+ hubp->funcs->hubp_reset(hubp);
+}
+
+void hwss_dpp_reset(union block_sequence_params *params)
+{
+ struct dpp *dpp = params->dpp_reset_params.dpp;
+
+ if (dpp && dpp->funcs->dpp_reset)
+ dpp->funcs->dpp_reset(dpp);
+}
+
+void hwss_dpp_root_clock_control(union block_sequence_params *params)
+{
+ struct dce_hwseq *hws = params->dpp_root_clock_control_params.hws;
+ unsigned int dpp_inst = params->dpp_root_clock_control_params.dpp_inst;
+ bool clock_on = params->dpp_root_clock_control_params.clock_on;
+
+ if (hws->funcs.dpp_root_clock_control)
+ hws->funcs.dpp_root_clock_control(hws, dpp_inst, clock_on);
+}
+
+void hwss_dc_ip_request_cntl(union block_sequence_params *params)
+{
+ struct dc *dc = params->dc_ip_request_cntl_params.dc;
+ bool enable = params->dc_ip_request_cntl_params.enable;
+ struct dce_hwseq *hws = dc->hwseq;
+
+ if (hws->funcs.dc_ip_request_cntl)
+ hws->funcs.dc_ip_request_cntl(dc, enable);
+}
+
+void hwss_dccg_update_dpp_dto(union block_sequence_params *params)
+{
+ struct dccg *dccg = params->dccg_update_dpp_dto_params.dccg;
+ int dpp_inst = params->dccg_update_dpp_dto_params.dpp_inst;
+ int dppclk_khz = params->dccg_update_dpp_dto_params.dppclk_khz;
+
+ if (dccg && dccg->funcs->update_dpp_dto)
+ dccg->funcs->update_dpp_dto(dccg, dpp_inst, dppclk_khz);
+}
+
+void hwss_hubp_vtg_sel(union block_sequence_params *params)
+{
+ struct hubp *hubp = params->hubp_vtg_sel_params.hubp;
+ uint32_t otg_inst = params->hubp_vtg_sel_params.otg_inst;
+
+ if (hubp && hubp->funcs->hubp_vtg_sel)
+ hubp->funcs->hubp_vtg_sel(hubp, otg_inst);
+}
+
+void hwss_hubp_setup2(union block_sequence_params *params)
+{
+ struct hubp *hubp = params->hubp_setup2_params.hubp;
+ struct dml2_dchub_per_pipe_register_set *hubp_regs = params->hubp_setup2_params.hubp_regs;
+ union dml2_global_sync_programming *global_sync = params->hubp_setup2_params.global_sync;
+ struct dc_crtc_timing *timing = params->hubp_setup2_params.timing;
+
+ if (hubp && hubp->funcs->hubp_setup2)
+ hubp->funcs->hubp_setup2(hubp, hubp_regs, global_sync, timing);
+}
+
+void hwss_hubp_setup(union block_sequence_params *params)
+{
+ struct hubp *hubp = params->hubp_setup_params.hubp;
+ struct _vcs_dpi_display_dlg_regs_st *dlg_regs = params->hubp_setup_params.dlg_regs;
+ struct _vcs_dpi_display_ttu_regs_st *ttu_regs = params->hubp_setup_params.ttu_regs;
+ struct _vcs_dpi_display_rq_regs_st *rq_regs = params->hubp_setup_params.rq_regs;
+ struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest = params->hubp_setup_params.pipe_dest;
+
+ if (hubp && hubp->funcs->hubp_setup)
+ hubp->funcs->hubp_setup(hubp, dlg_regs, ttu_regs, rq_regs, pipe_dest);
+}
+
+void hwss_hubp_set_unbounded_requesting(union block_sequence_params *params)
+{
+ struct hubp *hubp = params->hubp_set_unbounded_requesting_params.hubp;
+ bool unbounded_req = params->hubp_set_unbounded_requesting_params.unbounded_req;
+
+ if (hubp && hubp->funcs->set_unbounded_requesting)
+ hubp->funcs->set_unbounded_requesting(hubp, unbounded_req);
+}
+
+void hwss_hubp_setup_interdependent2(union block_sequence_params *params)
+{
+ struct hubp *hubp = params->hubp_setup_interdependent2_params.hubp;
+ struct dml2_dchub_per_pipe_register_set *hubp_regs = params->hubp_setup_interdependent2_params.hubp_regs;
+
+ if (hubp && hubp->funcs->hubp_setup_interdependent2)
+ hubp->funcs->hubp_setup_interdependent2(hubp, hubp_regs);
+}
+
+void hwss_hubp_setup_interdependent(union block_sequence_params *params)
+{
+ struct hubp *hubp = params->hubp_setup_interdependent_params.hubp;
+ struct _vcs_dpi_display_dlg_regs_st *dlg_regs = params->hubp_setup_interdependent_params.dlg_regs;
+ struct _vcs_dpi_display_ttu_regs_st *ttu_regs = params->hubp_setup_interdependent_params.ttu_regs;
+
+ if (hubp && hubp->funcs->hubp_setup_interdependent)
+ hubp->funcs->hubp_setup_interdependent(hubp, dlg_regs, ttu_regs);
+}
+
+void hwss_dpp_set_cursor_matrix(union block_sequence_params *params)
+{
+ struct dpp *dpp = params->dpp_set_cursor_matrix_params.dpp;
+ enum dc_color_space color_space = params->dpp_set_cursor_matrix_params.color_space;
+ struct dc_csc_transform *cursor_csc_color_matrix = params->dpp_set_cursor_matrix_params.cursor_csc_color_matrix;
+
+ if (dpp && dpp->funcs->set_cursor_matrix)
+ dpp->funcs->set_cursor_matrix(dpp, color_space, *cursor_csc_color_matrix);
+}
+
+void hwss_mpc_update_mpcc(union block_sequence_params *params)
+{
+ struct dc *dc = params->mpc_update_mpcc_params.dc;
+ struct pipe_ctx *pipe_ctx = params->mpc_update_mpcc_params.pipe_ctx;
+ struct dce_hwseq *hws = dc->hwseq;
+
+ if (hws->funcs.update_mpcc)
+ hws->funcs.update_mpcc(dc, pipe_ctx);
+}
+
+void hwss_mpc_update_blending(union block_sequence_params *params)
+{
+ struct mpc *mpc = params->mpc_update_blending_params.mpc;
+ struct mpcc_blnd_cfg *blnd_cfg = &params->mpc_update_blending_params.blnd_cfg;
+ int mpcc_id = params->mpc_update_blending_params.mpcc_id;
+
+ if (mpc && mpc->funcs->update_blending)
+ mpc->funcs->update_blending(mpc, blnd_cfg, mpcc_id);
+}
+
+void hwss_mpc_assert_idle_mpcc(union block_sequence_params *params)
+{
+ struct mpc *mpc = params->mpc_assert_idle_mpcc_params.mpc;
+ int mpcc_id = params->mpc_assert_idle_mpcc_params.mpcc_id;
+
+ if (mpc && mpc->funcs->wait_for_idle)
+ mpc->funcs->wait_for_idle(mpc, mpcc_id);
+}
+
+void hwss_mpc_insert_plane(union block_sequence_params *params)
+{
+ struct mpc *mpc = params->mpc_insert_plane_params.mpc;
+ struct mpc_tree *tree = params->mpc_insert_plane_params.mpc_tree_params;
+ struct mpcc_blnd_cfg *blnd_cfg = &params->mpc_insert_plane_params.blnd_cfg;
+ struct mpcc_sm_cfg *sm_cfg = params->mpc_insert_plane_params.sm_cfg;
+ struct mpcc *insert_above_mpcc = params->mpc_insert_plane_params.insert_above_mpcc;
+ int mpcc_id = params->mpc_insert_plane_params.mpcc_id;
+ int dpp_id = params->mpc_insert_plane_params.dpp_id;
+
+ if (mpc && mpc->funcs->insert_plane)
+ mpc->funcs->insert_plane(mpc, tree, blnd_cfg, sm_cfg, insert_above_mpcc,
+ dpp_id, mpcc_id);
+}
+
+void hwss_dpp_set_scaler(union block_sequence_params *params)
+{
+ struct dpp *dpp = params->dpp_set_scaler_params.dpp;
+ const struct scaler_data *scl_data = params->dpp_set_scaler_params.scl_data;
+
+ if (dpp && dpp->funcs->dpp_set_scaler)
+ dpp->funcs->dpp_set_scaler(dpp, scl_data);
+}
+
+void hwss_hubp_mem_program_viewport(union block_sequence_params *params)
+{
+ struct hubp *hubp = params->hubp_mem_program_viewport_params.hubp;
+ const struct rect *viewport = params->hubp_mem_program_viewport_params.viewport;
+ const struct rect *viewport_c = params->hubp_mem_program_viewport_params.viewport_c;
+
+ if (hubp && hubp->funcs->mem_program_viewport)
+ hubp->funcs->mem_program_viewport(hubp, viewport, viewport_c);
+}
+
+void hwss_abort_cursor_offload_update(union block_sequence_params *params)
+{
+ struct dc *dc = params->abort_cursor_offload_update_params.dc;
+ struct pipe_ctx *pipe_ctx = params->abort_cursor_offload_update_params.pipe_ctx;
+
+ if (dc && dc->hwss.abort_cursor_offload_update)
+ dc->hwss.abort_cursor_offload_update(dc, pipe_ctx);
+}
+
+void hwss_set_cursor_attribute(union block_sequence_params *params)
+{
+ struct dc *dc = params->set_cursor_attribute_params.dc;
+ struct pipe_ctx *pipe_ctx = params->set_cursor_attribute_params.pipe_ctx;
+
+ if (dc && dc->hwss.set_cursor_attribute)
+ dc->hwss.set_cursor_attribute(pipe_ctx);
+}
+
+void hwss_set_cursor_position(union block_sequence_params *params)
+{
+ struct dc *dc = params->set_cursor_position_params.dc;
+ struct pipe_ctx *pipe_ctx = params->set_cursor_position_params.pipe_ctx;
+
+ if (dc && dc->hwss.set_cursor_position)
+ dc->hwss.set_cursor_position(pipe_ctx);
+}
+
+void hwss_set_cursor_sdr_white_level(union block_sequence_params *params)
+{
+ struct dc *dc = params->set_cursor_sdr_white_level_params.dc;
+ struct pipe_ctx *pipe_ctx = params->set_cursor_sdr_white_level_params.pipe_ctx;
+
+ if (dc && dc->hwss.set_cursor_sdr_white_level)
+ dc->hwss.set_cursor_sdr_white_level(pipe_ctx);
+}
+
+void hwss_program_output_csc(union block_sequence_params *params)
+{
+ struct dc *dc = params->program_output_csc_params.dc;
+ struct pipe_ctx *pipe_ctx = params->program_output_csc_params.pipe_ctx;
+ enum dc_color_space colorspace = params->program_output_csc_params.colorspace;
+ uint16_t *matrix = params->program_output_csc_params.matrix;
+ int opp_id = params->program_output_csc_params.opp_id;
+
+ if (dc && dc->hwss.program_output_csc)
+ dc->hwss.program_output_csc(dc, pipe_ctx, colorspace, matrix, opp_id);
+}
+
+void hwss_hubp_set_blank(union block_sequence_params *params)
+{
+ struct hubp *hubp = params->hubp_set_blank_params.hubp;
+ bool blank = params->hubp_set_blank_params.blank;
+
+ if (hubp && hubp->funcs->set_blank)
+ hubp->funcs->set_blank(hubp, blank);
+}
+
+void hwss_phantom_hubp_post_enable(union block_sequence_params *params)
+{
+ struct hubp *hubp = params->phantom_hubp_post_enable_params.hubp;
+
+ if (hubp && hubp->funcs->phantom_hubp_post_enable)
+ hubp->funcs->phantom_hubp_post_enable(hubp);
+}
+
+void hwss_add_dccg_set_dto_dscclk(struct block_sequence_state *seq_state,
+ struct dccg *dccg, int inst, int num_slices_h)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = DCCG_SET_DTO_DSCCLK;
+ seq_state->steps[*seq_state->num_steps].params.dccg_set_dto_dscclk_params.dccg = dccg;
+ seq_state->steps[*seq_state->num_steps].params.dccg_set_dto_dscclk_params.inst = inst;
+ seq_state->steps[*seq_state->num_steps].params.dccg_set_dto_dscclk_params.num_slices_h = num_slices_h;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_dsc_calculate_and_set_config(struct block_sequence_state *seq_state,
+ struct pipe_ctx *pipe_ctx, bool enable, int opp_cnt)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = DSC_CALCULATE_AND_SET_CONFIG;
+ seq_state->steps[*seq_state->num_steps].params.dsc_calculate_and_set_config_params.pipe_ctx = pipe_ctx;
+ seq_state->steps[*seq_state->num_steps].params.dsc_calculate_and_set_config_params.enable = enable;
+ seq_state->steps[*seq_state->num_steps].params.dsc_calculate_and_set_config_params.opp_cnt = opp_cnt;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_mpc_remove_mpcc(struct block_sequence_state *seq_state,
+ struct mpc *mpc, struct mpc_tree *mpc_tree_params, struct mpcc *mpcc_to_remove)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = MPC_REMOVE_MPCC;
+ seq_state->steps[*seq_state->num_steps].params.mpc_remove_mpcc_params.mpc = mpc;
+ seq_state->steps[*seq_state->num_steps].params.mpc_remove_mpcc_params.mpc_tree_params = mpc_tree_params;
+ seq_state->steps[*seq_state->num_steps].params.mpc_remove_mpcc_params.mpcc_to_remove = mpcc_to_remove;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_opp_set_mpcc_disconnect_pending(struct block_sequence_state *seq_state,
+ struct output_pixel_processor *opp, int mpcc_inst, bool pending)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = OPP_SET_MPCC_DISCONNECT_PENDING;
+ seq_state->steps[*seq_state->num_steps].params.opp_set_mpcc_disconnect_pending_params.opp = opp;
+ seq_state->steps[*seq_state->num_steps].params.opp_set_mpcc_disconnect_pending_params.mpcc_inst = mpcc_inst;
+ seq_state->steps[*seq_state->num_steps].params.opp_set_mpcc_disconnect_pending_params.pending = pending;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_hubp_disconnect(struct block_sequence_state *seq_state,
+ struct hubp *hubp)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = HUBP_DISCONNECT;
+ seq_state->steps[*seq_state->num_steps].params.hubp_disconnect_params.hubp = hubp;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_dsc_enable_with_opp(struct block_sequence_state *seq_state,
+ struct pipe_ctx *pipe_ctx)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = DSC_ENABLE_WITH_OPP;
+ seq_state->steps[*seq_state->num_steps].params.dsc_enable_with_opp_params.pipe_ctx = pipe_ctx;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_tg_set_dsc_config(struct block_sequence_state *seq_state,
+ struct timing_generator *tg, struct dsc_optc_config *dsc_optc_cfg, bool enable)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = TG_SET_DSC_CONFIG;
+ seq_state->steps[*seq_state->num_steps].params.tg_set_dsc_config_params.tg = tg;
+ seq_state->steps[*seq_state->num_steps].params.tg_set_dsc_config_params.dsc_optc_cfg = dsc_optc_cfg;
+ seq_state->steps[*seq_state->num_steps].params.tg_set_dsc_config_params.enable = enable;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_dsc_disconnect(struct block_sequence_state *seq_state,
+ struct display_stream_compressor *dsc)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = DSC_DISCONNECT;
+ seq_state->steps[*seq_state->num_steps].params.dsc_disconnect_params.dsc = dsc;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_dc_set_optimized_required(struct block_sequence_state *seq_state,
+ struct dc *dc, bool optimized_required)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = DC_SET_OPTIMIZED_REQUIRED;
+ seq_state->steps[*seq_state->num_steps].params.dc_set_optimized_required_params.dc = dc;
+ seq_state->steps[*seq_state->num_steps].params.dc_set_optimized_required_params.optimized_required = optimized_required;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_abm_set_immediate_disable(struct block_sequence_state *seq_state,
+ struct dc *dc, struct pipe_ctx *pipe_ctx)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = ABM_SET_IMMEDIATE_DISABLE;
+ seq_state->steps[*seq_state->num_steps].params.set_abm_immediate_disable_params.dc = dc;
+ seq_state->steps[*seq_state->num_steps].params.set_abm_immediate_disable_params.pipe_ctx = pipe_ctx;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_opp_set_disp_pattern_generator(struct block_sequence_state *seq_state,
+ struct output_pixel_processor *opp,
+ enum controller_dp_test_pattern test_pattern,
+ enum controller_dp_color_space color_space,
+ enum dc_color_depth color_depth,
+ struct tg_color solid_color,
+ bool use_solid_color,
+ int width,
+ int height,
+ int offset)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = OPP_SET_DISP_PATTERN_GENERATOR;
+ seq_state->steps[*seq_state->num_steps].params.opp_set_disp_pattern_generator_params.opp = opp;
+ seq_state->steps[*seq_state->num_steps].params.opp_set_disp_pattern_generator_params.test_pattern = test_pattern;
+ seq_state->steps[*seq_state->num_steps].params.opp_set_disp_pattern_generator_params.color_space = color_space;
+ seq_state->steps[*seq_state->num_steps].params.opp_set_disp_pattern_generator_params.color_depth = color_depth;
+ seq_state->steps[*seq_state->num_steps].params.opp_set_disp_pattern_generator_params.solid_color = solid_color;
+ seq_state->steps[*seq_state->num_steps].params.opp_set_disp_pattern_generator_params.use_solid_color = use_solid_color;
+ seq_state->steps[*seq_state->num_steps].params.opp_set_disp_pattern_generator_params.width = width;
+ seq_state->steps[*seq_state->num_steps].params.opp_set_disp_pattern_generator_params.height = height;
+ seq_state->steps[*seq_state->num_steps].params.opp_set_disp_pattern_generator_params.offset = offset;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add MPC update blending to block sequence
+ */
+void hwss_add_mpc_update_blending(struct block_sequence_state *seq_state,
+ struct mpc *mpc,
+ struct mpcc_blnd_cfg blnd_cfg,
+ int mpcc_id)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = MPC_UPDATE_BLENDING;
+ seq_state->steps[*seq_state->num_steps].params.mpc_update_blending_params.mpc = mpc;
+ seq_state->steps[*seq_state->num_steps].params.mpc_update_blending_params.blnd_cfg = blnd_cfg;
+ seq_state->steps[*seq_state->num_steps].params.mpc_update_blending_params.mpcc_id = mpcc_id;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add MPC insert plane to block sequence
+ */
+void hwss_add_mpc_insert_plane(struct block_sequence_state *seq_state,
+ struct mpc *mpc,
+ struct mpc_tree *mpc_tree_params,
+ struct mpcc_blnd_cfg blnd_cfg,
+ struct mpcc_sm_cfg *sm_cfg,
+ struct mpcc *insert_above_mpcc,
+ int dpp_id,
+ int mpcc_id)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = MPC_INSERT_PLANE;
+ seq_state->steps[*seq_state->num_steps].params.mpc_insert_plane_params.mpc = mpc;
+ seq_state->steps[*seq_state->num_steps].params.mpc_insert_plane_params.mpc_tree_params = mpc_tree_params;
+ seq_state->steps[*seq_state->num_steps].params.mpc_insert_plane_params.blnd_cfg = blnd_cfg;
+ seq_state->steps[*seq_state->num_steps].params.mpc_insert_plane_params.sm_cfg = sm_cfg;
+ seq_state->steps[*seq_state->num_steps].params.mpc_insert_plane_params.insert_above_mpcc = insert_above_mpcc;
+ seq_state->steps[*seq_state->num_steps].params.mpc_insert_plane_params.dpp_id = dpp_id;
+ seq_state->steps[*seq_state->num_steps].params.mpc_insert_plane_params.mpcc_id = mpcc_id;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add MPC assert idle MPCC to block sequence
+ */
+void hwss_add_mpc_assert_idle_mpcc(struct block_sequence_state *seq_state,
+ struct mpc *mpc,
+ int mpcc_id)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = MPC_ASSERT_IDLE_MPCC;
+ seq_state->steps[*seq_state->num_steps].params.mpc_assert_idle_mpcc_params.mpc = mpc;
+ seq_state->steps[*seq_state->num_steps].params.mpc_assert_idle_mpcc_params.mpcc_id = mpcc_id;
+ (*seq_state->num_steps)++;
+ }
+}
+
+/*
+ * Helper function to add HUBP set blank to block sequence
+ */
+void hwss_add_hubp_set_blank(struct block_sequence_state *seq_state,
+ struct hubp *hubp,
+ bool blank)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = HUBP_SET_BLANK;
+ seq_state->steps[*seq_state->num_steps].params.hubp_set_blank_params.hubp = hubp;
+ seq_state->steps[*seq_state->num_steps].params.hubp_set_blank_params.blank = blank;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_opp_program_bit_depth_reduction(struct block_sequence_state *seq_state,
+ struct output_pixel_processor *opp,
+ bool use_default_params,
+ struct pipe_ctx *pipe_ctx)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = OPP_PROGRAM_BIT_DEPTH_REDUCTION;
+ seq_state->steps[*seq_state->num_steps].params.opp_program_bit_depth_reduction_params.opp = opp;
+ seq_state->steps[*seq_state->num_steps].params.opp_program_bit_depth_reduction_params.use_default_params = use_default_params;
+ seq_state->steps[*seq_state->num_steps].params.opp_program_bit_depth_reduction_params.pipe_ctx = pipe_ctx;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_dc_ip_request_cntl(struct block_sequence_state *seq_state,
+ struct dc *dc,
+ bool enable)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = DC_IP_REQUEST_CNTL;
+ seq_state->steps[*seq_state->num_steps].params.dc_ip_request_cntl_params.dc = dc;
+ seq_state->steps[*seq_state->num_steps].params.dc_ip_request_cntl_params.enable = enable;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_dwbc_update(struct block_sequence_state *seq_state,
+ struct dwbc *dwb,
+ struct dc_dwb_params *dwb_params)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = DWBC_UPDATE;
+ seq_state->steps[*seq_state->num_steps].params.dwbc_update_params.dwb = dwb;
+ seq_state->steps[*seq_state->num_steps].params.dwbc_update_params.dwb_params = dwb_params;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_mcif_wb_config_buf(struct block_sequence_state *seq_state,
+ struct mcif_wb *mcif_wb,
+ struct mcif_buf_params *mcif_buf_params,
+ unsigned int dest_height)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = MCIF_WB_CONFIG_BUF;
+ seq_state->steps[*seq_state->num_steps].params.mcif_wb_config_buf_params.mcif_wb = mcif_wb;
+ seq_state->steps[*seq_state->num_steps].params.mcif_wb_config_buf_params.mcif_buf_params = mcif_buf_params;
+ seq_state->steps[*seq_state->num_steps].params.mcif_wb_config_buf_params.dest_height = dest_height;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_mcif_wb_config_arb(struct block_sequence_state *seq_state,
+ struct mcif_wb *mcif_wb,
+ struct mcif_arb_params *mcif_arb_params)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = MCIF_WB_CONFIG_ARB;
+ seq_state->steps[*seq_state->num_steps].params.mcif_wb_config_arb_params.mcif_wb = mcif_wb;
+ seq_state->steps[*seq_state->num_steps].params.mcif_wb_config_arb_params.mcif_arb_params = mcif_arb_params;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_mcif_wb_enable(struct block_sequence_state *seq_state,
+ struct mcif_wb *mcif_wb)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = MCIF_WB_ENABLE;
+ seq_state->steps[*seq_state->num_steps].params.mcif_wb_enable_params.mcif_wb = mcif_wb;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_mcif_wb_disable(struct block_sequence_state *seq_state,
+ struct mcif_wb *mcif_wb)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = MCIF_WB_DISABLE;
+ seq_state->steps[*seq_state->num_steps].params.mcif_wb_disable_params.mcif_wb = mcif_wb;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_mpc_set_dwb_mux(struct block_sequence_state *seq_state,
+ struct mpc *mpc,
+ int dwb_id,
+ int mpcc_id)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = MPC_SET_DWB_MUX;
+ seq_state->steps[*seq_state->num_steps].params.mpc_set_dwb_mux_params.mpc = mpc;
+ seq_state->steps[*seq_state->num_steps].params.mpc_set_dwb_mux_params.dwb_id = dwb_id;
+ seq_state->steps[*seq_state->num_steps].params.mpc_set_dwb_mux_params.mpcc_id = mpcc_id;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_mpc_disable_dwb_mux(struct block_sequence_state *seq_state,
+ struct mpc *mpc,
+ unsigned int dwb_id)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = MPC_DISABLE_DWB_MUX;
+ seq_state->steps[*seq_state->num_steps].params.mpc_disable_dwb_mux_params.mpc = mpc;
+ seq_state->steps[*seq_state->num_steps].params.mpc_disable_dwb_mux_params.dwb_id = dwb_id;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_dwbc_enable(struct block_sequence_state *seq_state,
+ struct dwbc *dwb,
+ struct dc_dwb_params *dwb_params)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = DWBC_ENABLE;
+ seq_state->steps[*seq_state->num_steps].params.dwbc_enable_params.dwb = dwb;
+ seq_state->steps[*seq_state->num_steps].params.dwbc_enable_params.dwb_params = dwb_params;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_dwbc_disable(struct block_sequence_state *seq_state,
+ struct dwbc *dwb)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = DWBC_DISABLE;
+ seq_state->steps[*seq_state->num_steps].params.dwbc_disable_params.dwb = dwb;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_tg_set_gsl(struct block_sequence_state *seq_state,
+ struct timing_generator *tg,
+ struct gsl_params gsl)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = TG_SET_GSL;
+ seq_state->steps[*seq_state->num_steps].params.tg_set_gsl_params.tg = tg;
+ seq_state->steps[*seq_state->num_steps].params.tg_set_gsl_params.gsl = gsl;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_tg_set_gsl_source_select(struct block_sequence_state *seq_state,
+ struct timing_generator *tg,
+ int group_idx,
+ uint32_t gsl_ready_signal)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = TG_SET_GSL_SOURCE_SELECT;
+ seq_state->steps[*seq_state->num_steps].params.tg_set_gsl_source_select_params.tg = tg;
+ seq_state->steps[*seq_state->num_steps].params.tg_set_gsl_source_select_params.group_idx = group_idx;
+ seq_state->steps[*seq_state->num_steps].params.tg_set_gsl_source_select_params.gsl_ready_signal = gsl_ready_signal;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_hubp_update_mall_sel(struct block_sequence_state *seq_state,
+ struct hubp *hubp,
+ uint32_t mall_sel,
+ bool cache_cursor)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = HUBP_UPDATE_MALL_SEL;
+ seq_state->steps[*seq_state->num_steps].params.hubp_update_mall_sel_params.hubp = hubp;
+ seq_state->steps[*seq_state->num_steps].params.hubp_update_mall_sel_params.mall_sel = mall_sel;
+ seq_state->steps[*seq_state->num_steps].params.hubp_update_mall_sel_params.cache_cursor = cache_cursor;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_hubp_prepare_subvp_buffering(struct block_sequence_state *seq_state,
+ struct hubp *hubp,
+ bool enable)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = HUBP_PREPARE_SUBVP_BUFFERING;
+ seq_state->steps[*seq_state->num_steps].params.hubp_prepare_subvp_buffering_params.hubp = hubp;
+ seq_state->steps[*seq_state->num_steps].params.hubp_prepare_subvp_buffering_params.enable = enable;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_hubp_set_blank_en(struct block_sequence_state *seq_state,
+ struct hubp *hubp,
+ bool enable)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = HUBP_SET_BLANK_EN;
+ seq_state->steps[*seq_state->num_steps].params.hubp_set_blank_en_params.hubp = hubp;
+ seq_state->steps[*seq_state->num_steps].params.hubp_set_blank_en_params.enable = enable;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_hubp_disable_control(struct block_sequence_state *seq_state,
+ struct hubp *hubp,
+ bool disable)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = HUBP_DISABLE_CONTROL;
+ seq_state->steps[*seq_state->num_steps].params.hubp_disable_control_params.hubp = hubp;
+ seq_state->steps[*seq_state->num_steps].params.hubp_disable_control_params.disable = disable;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_hubbub_soft_reset(struct block_sequence_state *seq_state,
+ struct hubbub *hubbub,
+ void (*hubbub_soft_reset)(struct hubbub *hubbub, bool reset),
+ bool reset)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = HUBBUB_SOFT_RESET;
+ seq_state->steps[*seq_state->num_steps].params.hubbub_soft_reset_params.hubbub = hubbub;
+ seq_state->steps[*seq_state->num_steps].params.hubbub_soft_reset_params.hubbub_soft_reset = hubbub_soft_reset;
+ seq_state->steps[*seq_state->num_steps].params.hubbub_soft_reset_params.reset = reset;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_hubp_clk_cntl(struct block_sequence_state *seq_state,
+ struct hubp *hubp,
+ bool enable)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = HUBP_CLK_CNTL;
+ seq_state->steps[*seq_state->num_steps].params.hubp_clk_cntl_params.hubp = hubp;
+ seq_state->steps[*seq_state->num_steps].params.hubp_clk_cntl_params.enable = enable;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_dpp_dppclk_control(struct block_sequence_state *seq_state,
+ struct dpp *dpp,
+ bool dppclk_div,
+ bool enable)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = DPP_DPPCLK_CONTROL;
+ seq_state->steps[*seq_state->num_steps].params.dpp_dppclk_control_params.dpp = dpp;
+ seq_state->steps[*seq_state->num_steps].params.dpp_dppclk_control_params.dppclk_div = dppclk_div;
+ seq_state->steps[*seq_state->num_steps].params.dpp_dppclk_control_params.enable = enable;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_disable_phantom_crtc(struct block_sequence_state *seq_state,
+ struct timing_generator *tg)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = DISABLE_PHANTOM_CRTC;
+ seq_state->steps[*seq_state->num_steps].params.disable_phantom_crtc_params.tg = tg;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_dsc_pg_status(struct block_sequence_state *seq_state,
+ struct dce_hwseq *hws,
+ int dsc_inst,
+ bool is_ungated)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = DSC_PG_STATUS;
+ seq_state->steps[*seq_state->num_steps].params.dsc_pg_status_params.hws = hws;
+ seq_state->steps[*seq_state->num_steps].params.dsc_pg_status_params.dsc_inst = dsc_inst;
+ seq_state->steps[*seq_state->num_steps].params.dsc_pg_status_params.is_ungated = is_ungated;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_dsc_wait_disconnect_pending_clear(struct block_sequence_state *seq_state,
+ struct display_stream_compressor *dsc,
+ bool *is_ungated)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = DSC_WAIT_DISCONNECT_PENDING_CLEAR;
+ seq_state->steps[*seq_state->num_steps].params.dsc_wait_disconnect_pending_clear_params.dsc = dsc;
+ seq_state->steps[*seq_state->num_steps].params.dsc_wait_disconnect_pending_clear_params.is_ungated = is_ungated;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_dsc_disable(struct block_sequence_state *seq_state,
+ struct display_stream_compressor *dsc,
+ bool *is_ungated)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = DSC_DISABLE;
+ seq_state->steps[*seq_state->num_steps].params.dsc_disable_params.dsc = dsc;
+ seq_state->steps[*seq_state->num_steps].params.dsc_disable_params.is_ungated = is_ungated;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_dccg_set_ref_dscclk(struct block_sequence_state *seq_state,
+ struct dccg *dccg,
+ int dsc_inst,
+ bool *is_ungated)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = DCCG_SET_REF_DSCCLK;
+ seq_state->steps[*seq_state->num_steps].params.dccg_set_ref_dscclk_params.dccg = dccg;
+ seq_state->steps[*seq_state->num_steps].params.dccg_set_ref_dscclk_params.dsc_inst = dsc_inst;
+ seq_state->steps[*seq_state->num_steps].params.dccg_set_ref_dscclk_params.is_ungated = is_ungated;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_dpp_root_clock_control(struct block_sequence_state *seq_state,
+ struct dce_hwseq *hws,
+ unsigned int dpp_inst,
+ bool clock_on)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = DPP_ROOT_CLOCK_CONTROL;
+ seq_state->steps[*seq_state->num_steps].params.dpp_root_clock_control_params.hws = hws;
+ seq_state->steps[*seq_state->num_steps].params.dpp_root_clock_control_params.dpp_inst = dpp_inst;
+ seq_state->steps[*seq_state->num_steps].params.dpp_root_clock_control_params.clock_on = clock_on;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_dpp_pg_control(struct block_sequence_state *seq_state,
+ struct dce_hwseq *hws,
+ unsigned int dpp_inst,
+ bool power_on)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = DPP_PG_CONTROL;
+ seq_state->steps[*seq_state->num_steps].params.dpp_pg_control_params.hws = hws;
+ seq_state->steps[*seq_state->num_steps].params.dpp_pg_control_params.dpp_inst = dpp_inst;
+ seq_state->steps[*seq_state->num_steps].params.dpp_pg_control_params.power_on = power_on;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_hubp_pg_control(struct block_sequence_state *seq_state,
+ struct dce_hwseq *hws,
+ unsigned int hubp_inst,
+ bool power_on)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = HUBP_PG_CONTROL;
+ seq_state->steps[*seq_state->num_steps].params.hubp_pg_control_params.hws = hws;
+ seq_state->steps[*seq_state->num_steps].params.hubp_pg_control_params.hubp_inst = hubp_inst;
+ seq_state->steps[*seq_state->num_steps].params.hubp_pg_control_params.power_on = power_on;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_hubp_init(struct block_sequence_state *seq_state,
+ struct hubp *hubp)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = HUBP_INIT;
+ seq_state->steps[*seq_state->num_steps].params.hubp_init_params.hubp = hubp;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_hubp_reset(struct block_sequence_state *seq_state,
+ struct hubp *hubp)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = HUBP_RESET;
+ seq_state->steps[*seq_state->num_steps].params.hubp_reset_params.hubp = hubp;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_dpp_reset(struct block_sequence_state *seq_state,
+ struct dpp *dpp)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = DPP_RESET;
+ seq_state->steps[*seq_state->num_steps].params.dpp_reset_params.dpp = dpp;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_opp_pipe_clock_control(struct block_sequence_state *seq_state,
+ struct output_pixel_processor *opp,
+ bool enable)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = OPP_PIPE_CLOCK_CONTROL;
+ seq_state->steps[*seq_state->num_steps].params.opp_pipe_clock_control_params.opp = opp;
+ seq_state->steps[*seq_state->num_steps].params.opp_pipe_clock_control_params.enable = enable;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_hubp_set_vm_system_aperture_settings(struct block_sequence_state *seq_state,
+ struct hubp *hubp,
+ uint64_t sys_default,
+ uint64_t sys_low,
+ uint64_t sys_high)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = HUBP_SET_VM_SYSTEM_APERTURE_SETTINGS;
+ seq_state->steps[*seq_state->num_steps].params.hubp_set_vm_system_aperture_settings_params.hubp = hubp;
+ seq_state->steps[*seq_state->num_steps].params.hubp_set_vm_system_aperture_settings_params.sys_default.quad_part = sys_default;
+ seq_state->steps[*seq_state->num_steps].params.hubp_set_vm_system_aperture_settings_params.sys_low.quad_part = sys_low;
+ seq_state->steps[*seq_state->num_steps].params.hubp_set_vm_system_aperture_settings_params.sys_high.quad_part = sys_high;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_hubp_set_flip_int(struct block_sequence_state *seq_state,
+ struct hubp *hubp)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = HUBP_SET_FLIP_INT;
+ seq_state->steps[*seq_state->num_steps].params.hubp_set_flip_int_params.hubp = hubp;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_dccg_update_dpp_dto(struct block_sequence_state *seq_state,
+ struct dccg *dccg,
+ int dpp_inst,
+ int dppclk_khz)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = DCCG_UPDATE_DPP_DTO;
+ seq_state->steps[*seq_state->num_steps].params.dccg_update_dpp_dto_params.dccg = dccg;
+ seq_state->steps[*seq_state->num_steps].params.dccg_update_dpp_dto_params.dpp_inst = dpp_inst;
+ seq_state->steps[*seq_state->num_steps].params.dccg_update_dpp_dto_params.dppclk_khz = dppclk_khz;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_hubp_vtg_sel(struct block_sequence_state *seq_state,
+ struct hubp *hubp,
+ uint32_t otg_inst)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = HUBP_VTG_SEL;
+ seq_state->steps[*seq_state->num_steps].params.hubp_vtg_sel_params.hubp = hubp;
+ seq_state->steps[*seq_state->num_steps].params.hubp_vtg_sel_params.otg_inst = otg_inst;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_hubp_setup2(struct block_sequence_state *seq_state,
+ struct hubp *hubp,
+ struct dml2_dchub_per_pipe_register_set *hubp_regs,
+ union dml2_global_sync_programming *global_sync,
+ struct dc_crtc_timing *timing)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = HUBP_SETUP2;
+ seq_state->steps[*seq_state->num_steps].params.hubp_setup2_params.hubp = hubp;
+ seq_state->steps[*seq_state->num_steps].params.hubp_setup2_params.hubp_regs = hubp_regs;
+ seq_state->steps[*seq_state->num_steps].params.hubp_setup2_params.global_sync = global_sync;
+ seq_state->steps[*seq_state->num_steps].params.hubp_setup2_params.timing = timing;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_hubp_setup(struct block_sequence_state *seq_state,
+ struct hubp *hubp,
+ struct _vcs_dpi_display_dlg_regs_st *dlg_regs,
+ struct _vcs_dpi_display_ttu_regs_st *ttu_regs,
+ struct _vcs_dpi_display_rq_regs_st *rq_regs,
+ struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = HUBP_SETUP;
+ seq_state->steps[*seq_state->num_steps].params.hubp_setup_params.hubp = hubp;
+ seq_state->steps[*seq_state->num_steps].params.hubp_setup_params.dlg_regs = dlg_regs;
+ seq_state->steps[*seq_state->num_steps].params.hubp_setup_params.ttu_regs = ttu_regs;
+ seq_state->steps[*seq_state->num_steps].params.hubp_setup_params.rq_regs = rq_regs;
+ seq_state->steps[*seq_state->num_steps].params.hubp_setup_params.pipe_dest = pipe_dest;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_hubp_set_unbounded_requesting(struct block_sequence_state *seq_state,
+ struct hubp *hubp,
+ bool unbounded_req)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = HUBP_SET_UNBOUNDED_REQUESTING;
+ seq_state->steps[*seq_state->num_steps].params.hubp_set_unbounded_requesting_params.hubp = hubp;
+ seq_state->steps[*seq_state->num_steps].params.hubp_set_unbounded_requesting_params.unbounded_req = unbounded_req;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_hubp_setup_interdependent2(struct block_sequence_state *seq_state,
+ struct hubp *hubp,
+ struct dml2_dchub_per_pipe_register_set *hubp_regs)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = HUBP_SETUP_INTERDEPENDENT2;
+ seq_state->steps[*seq_state->num_steps].params.hubp_setup_interdependent2_params.hubp = hubp;
+ seq_state->steps[*seq_state->num_steps].params.hubp_setup_interdependent2_params.hubp_regs = hubp_regs;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_hubp_setup_interdependent(struct block_sequence_state *seq_state,
+ struct hubp *hubp,
+ struct _vcs_dpi_display_dlg_regs_st *dlg_regs,
+ struct _vcs_dpi_display_ttu_regs_st *ttu_regs)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = HUBP_SETUP_INTERDEPENDENT;
+ seq_state->steps[*seq_state->num_steps].params.hubp_setup_interdependent_params.hubp = hubp;
+ seq_state->steps[*seq_state->num_steps].params.hubp_setup_interdependent_params.dlg_regs = dlg_regs;
+ seq_state->steps[*seq_state->num_steps].params.hubp_setup_interdependent_params.ttu_regs = ttu_regs;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_hubp_program_surface_config(struct block_sequence_state *seq_state,
+ struct hubp *hubp,
+ enum surface_pixel_format format,
+ struct dc_tiling_info *tiling_info,
+ struct plane_size plane_size,
+ enum dc_rotation_angle rotation,
+ struct dc_plane_dcc_param *dcc,
+ bool horizontal_mirror,
+ int compat_level)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = HUBP_PROGRAM_SURFACE_CONFIG;
+ seq_state->steps[*seq_state->num_steps].params.program_surface_config_params.hubp = hubp;
+ seq_state->steps[*seq_state->num_steps].params.program_surface_config_params.format = format;
+ seq_state->steps[*seq_state->num_steps].params.program_surface_config_params.tiling_info = tiling_info;
+ seq_state->steps[*seq_state->num_steps].params.program_surface_config_params.plane_size = plane_size;
+ seq_state->steps[*seq_state->num_steps].params.program_surface_config_params.rotation = rotation;
+ seq_state->steps[*seq_state->num_steps].params.program_surface_config_params.dcc = dcc;
+ seq_state->steps[*seq_state->num_steps].params.program_surface_config_params.horizontal_mirror = horizontal_mirror;
+ seq_state->steps[*seq_state->num_steps].params.program_surface_config_params.compat_level = compat_level;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_dpp_setup_dpp(struct block_sequence_state *seq_state,
+ struct pipe_ctx *pipe_ctx)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = DPP_SETUP_DPP;
+ seq_state->steps[*seq_state->num_steps].params.setup_dpp_params.pipe_ctx = pipe_ctx;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_dpp_set_cursor_matrix(struct block_sequence_state *seq_state,
+ struct dpp *dpp,
+ enum dc_color_space color_space,
+ struct dc_csc_transform *cursor_csc_color_matrix)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = DPP_SET_CURSOR_MATRIX;
+ seq_state->steps[*seq_state->num_steps].params.dpp_set_cursor_matrix_params.dpp = dpp;
+ seq_state->steps[*seq_state->num_steps].params.dpp_set_cursor_matrix_params.color_space = color_space;
+ seq_state->steps[*seq_state->num_steps].params.dpp_set_cursor_matrix_params.cursor_csc_color_matrix = cursor_csc_color_matrix;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_dpp_set_scaler(struct block_sequence_state *seq_state,
+ struct dpp *dpp,
+ const struct scaler_data *scl_data)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = DPP_SET_SCALER;
+ seq_state->steps[*seq_state->num_steps].params.dpp_set_scaler_params.dpp = dpp;
+ seq_state->steps[*seq_state->num_steps].params.dpp_set_scaler_params.scl_data = scl_data;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_hubp_mem_program_viewport(struct block_sequence_state *seq_state,
+ struct hubp *hubp,
+ const struct rect *viewport,
+ const struct rect *viewport_c)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = HUBP_MEM_PROGRAM_VIEWPORT;
+ seq_state->steps[*seq_state->num_steps].params.hubp_mem_program_viewport_params.hubp = hubp;
+ seq_state->steps[*seq_state->num_steps].params.hubp_mem_program_viewport_params.viewport = viewport;
+ seq_state->steps[*seq_state->num_steps].params.hubp_mem_program_viewport_params.viewport_c = viewport_c;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_abort_cursor_offload_update(struct block_sequence_state *seq_state,
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = ABORT_CURSOR_OFFLOAD_UPDATE;
+ seq_state->steps[*seq_state->num_steps].params.abort_cursor_offload_update_params.dc = dc;
+ seq_state->steps[*seq_state->num_steps].params.abort_cursor_offload_update_params.pipe_ctx = pipe_ctx;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_set_cursor_attribute(struct block_sequence_state *seq_state,
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = SET_CURSOR_ATTRIBUTE;
+ seq_state->steps[*seq_state->num_steps].params.set_cursor_attribute_params.dc = dc;
+ seq_state->steps[*seq_state->num_steps].params.set_cursor_attribute_params.pipe_ctx = pipe_ctx;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_set_cursor_position(struct block_sequence_state *seq_state,
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = SET_CURSOR_POSITION;
+ seq_state->steps[*seq_state->num_steps].params.set_cursor_position_params.dc = dc;
+ seq_state->steps[*seq_state->num_steps].params.set_cursor_position_params.pipe_ctx = pipe_ctx;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_set_cursor_sdr_white_level(struct block_sequence_state *seq_state,
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = SET_CURSOR_SDR_WHITE_LEVEL;
+ seq_state->steps[*seq_state->num_steps].params.set_cursor_sdr_white_level_params.dc = dc;
+ seq_state->steps[*seq_state->num_steps].params.set_cursor_sdr_white_level_params.pipe_ctx = pipe_ctx;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_program_output_csc(struct block_sequence_state *seq_state,
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ enum dc_color_space colorspace,
+ uint16_t *matrix,
+ int opp_id)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = PROGRAM_OUTPUT_CSC;
+ seq_state->steps[*seq_state->num_steps].params.program_output_csc_params.dc = dc;
+ seq_state->steps[*seq_state->num_steps].params.program_output_csc_params.pipe_ctx = pipe_ctx;
+ seq_state->steps[*seq_state->num_steps].params.program_output_csc_params.colorspace = colorspace;
+ seq_state->steps[*seq_state->num_steps].params.program_output_csc_params.matrix = matrix;
+ seq_state->steps[*seq_state->num_steps].params.program_output_csc_params.opp_id = opp_id;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_phantom_hubp_post_enable(struct block_sequence_state *seq_state,
+ struct hubp *hubp)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = PHANTOM_HUBP_POST_ENABLE;
+ seq_state->steps[*seq_state->num_steps].params.phantom_hubp_post_enable_params.hubp = hubp;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_update_force_pstate(struct block_sequence_state *seq_state,
+ struct dc *dc,
+ struct dc_state *context)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = UPDATE_FORCE_PSTATE;
+ seq_state->steps[*seq_state->num_steps].params.update_force_pstate_params.dc = dc;
+ seq_state->steps[*seq_state->num_steps].params.update_force_pstate_params.context = context;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_hubbub_apply_dedcn21_147_wa(struct block_sequence_state *seq_state,
+ struct hubbub *hubbub)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = HUBBUB_APPLY_DEDCN21_147_WA;
+ seq_state->steps[*seq_state->num_steps].params.hubbub_apply_dedcn21_147_wa_params.hubbub = hubbub;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_hubbub_allow_self_refresh_control(struct block_sequence_state *seq_state,
+ struct hubbub *hubbub,
+ bool allow,
+ bool *disallow_self_refresh_applied)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = HUBBUB_ALLOW_SELF_REFRESH_CONTROL;
+ seq_state->steps[*seq_state->num_steps].params.hubbub_allow_self_refresh_control_params.hubbub = hubbub;
+ seq_state->steps[*seq_state->num_steps].params.hubbub_allow_self_refresh_control_params.allow = allow;
+ seq_state->steps[*seq_state->num_steps].params.hubbub_allow_self_refresh_control_params.disallow_self_refresh_applied = disallow_self_refresh_applied;
+ (*seq_state->num_steps)++;
+ }
+}
+
+void hwss_add_tg_get_frame_count(struct block_sequence_state *seq_state,
+ struct timing_generator *tg,
+ unsigned int *frame_count)
+{
+ if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) {
+ seq_state->steps[*seq_state->num_steps].func = TG_GET_FRAME_COUNT;
+ seq_state->steps[*seq_state->num_steps].params.tg_get_frame_count_params.tg = tg;
+ seq_state->steps[*seq_state->num_steps].params.tg_get_frame_count_params.frame_count = frame_count;
+ (*seq_state->num_steps)++;
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
deleted file mode 100644
index c8457babfdea..000000000000
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ /dev/null
@@ -1,4830 +0,0 @@
-/*
- * Copyright 2012-15 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#include <linux/slab.h>
-
-#include "dm_services.h"
-#include "atomfirmware.h"
-#include "dm_helpers.h"
-#include "dc.h"
-#include "grph_object_id.h"
-#include "gpio_service_interface.h"
-#include "core_status.h"
-#include "dc_link_dp.h"
-#include "dc_link_ddc.h"
-#include "link_hwss.h"
-#include "opp.h"
-
-#include "link_encoder.h"
-#include "hw_sequencer.h"
-#include "resource.h"
-#include "abm.h"
-#include "fixed31_32.h"
-#include "dpcd_defs.h"
-#include "dmcu.h"
-#include "hw/clk_mgr.h"
-#include "dce/dmub_psr.h"
-#include "dmub/dmub_srv.h"
-#include "inc/hw/panel_cntl.h"
-#include "inc/link_enc_cfg.h"
-#include "inc/link_dpcd.h"
-
-#include "dc/dcn30/dcn30_vpg.h"
-
-#define DC_LOGGER_INIT(logger)
-
-#define LINK_INFO(...) \
- DC_LOG_HW_HOTPLUG( \
- __VA_ARGS__)
-
-#define RETIMER_REDRIVER_INFO(...) \
- DC_LOG_RETIMER_REDRIVER( \
- __VA_ARGS__)
-
-/*******************************************************************************
- * Private functions
- ******************************************************************************/
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-static bool add_dp_hpo_link_encoder_to_link(struct dc_link *link)
-{
- struct hpo_dp_link_encoder *enc = resource_get_unused_hpo_dp_link_encoder(
- link->dc->res_pool);
-
- if (!link->hpo_dp_link_enc && enc) {
- link->hpo_dp_link_enc = enc;
- link->hpo_dp_link_enc->transmitter = link->link_enc->transmitter;
- link->hpo_dp_link_enc->hpd_source = link->link_enc->hpd_source;
- }
-
- return (link->hpo_dp_link_enc != NULL);
-}
-
-static void remove_dp_hpo_link_encoder_from_link(struct dc_link *link)
-{
- if (link->hpo_dp_link_enc) {
- link->hpo_dp_link_enc->hpd_source = HPD_SOURCEID_UNKNOWN;
- link->hpo_dp_link_enc->transmitter = TRANSMITTER_UNKNOWN;
- link->hpo_dp_link_enc = NULL;
- }
-}
-#endif
-
-static void dc_link_destruct(struct dc_link *link)
-{
- int i;
-
- if (link->hpd_gpio) {
- dal_gpio_destroy_irq(&link->hpd_gpio);
- link->hpd_gpio = NULL;
- }
-
- if (link->ddc)
- dal_ddc_service_destroy(&link->ddc);
-
- if (link->panel_cntl)
- link->panel_cntl->funcs->destroy(&link->panel_cntl);
-
- if (link->link_enc) {
- /* Update link encoder resource tracking variables. These are used for
- * the dynamic assignment of link encoders to streams. Virtual links
- * are not assigned encoder resources on creation.
- */
- if (link->link_id.id != CONNECTOR_ID_VIRTUAL) {
- link->dc->res_pool->link_encoders[link->eng_id - ENGINE_ID_DIGA] = NULL;
- link->dc->res_pool->dig_link_enc_count--;
- }
- link->link_enc->funcs->destroy(&link->link_enc);
- }
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (link->hpo_dp_link_enc) {
- remove_dp_hpo_link_encoder_from_link(link);
- }
-#endif
-
- if (link->local_sink)
- dc_sink_release(link->local_sink);
-
- for (i = 0; i < link->sink_count; ++i)
- dc_sink_release(link->remote_sinks[i]);
-}
-
-struct gpio *get_hpd_gpio(struct dc_bios *dcb,
- struct graphics_object_id link_id,
- struct gpio_service *gpio_service)
-{
- enum bp_result bp_result;
- struct graphics_object_hpd_info hpd_info;
- struct gpio_pin_info pin_info;
-
- if (dcb->funcs->get_hpd_info(dcb, link_id, &hpd_info) != BP_RESULT_OK)
- return NULL;
-
- bp_result = dcb->funcs->get_gpio_pin_info(dcb,
- hpd_info.hpd_int_gpio_uid, &pin_info);
-
- if (bp_result != BP_RESULT_OK) {
- ASSERT(bp_result == BP_RESULT_NORECORD);
- return NULL;
- }
-
- return dal_gpio_service_create_irq(gpio_service,
- pin_info.offset,
- pin_info.mask);
-}
-
-/*
- * Function: program_hpd_filter
- *
- * @brief
- * Programs HPD filter on associated HPD line
- *
- * @param [in] delay_on_connect_in_ms: Connect filter timeout
- * @param [in] delay_on_disconnect_in_ms: Disconnect filter timeout
- *
- * @return
- * true on success, false otherwise
- */
-static bool program_hpd_filter(const struct dc_link *link)
-{
- bool result = false;
- struct gpio *hpd;
- int delay_on_connect_in_ms = 0;
- int delay_on_disconnect_in_ms = 0;
-
- if (link->is_hpd_filter_disabled)
- return false;
- /* Verify feature is supported */
- switch (link->connector_signal) {
- case SIGNAL_TYPE_DVI_SINGLE_LINK:
- case SIGNAL_TYPE_DVI_DUAL_LINK:
- case SIGNAL_TYPE_HDMI_TYPE_A:
- /* Program hpd filter */
- delay_on_connect_in_ms = 500;
- delay_on_disconnect_in_ms = 100;
- break;
- case SIGNAL_TYPE_DISPLAY_PORT:
- case SIGNAL_TYPE_DISPLAY_PORT_MST:
- /* Program hpd filter to allow DP signal to settle */
- /* 500: not able to detect MST <-> SST switch as HPD is low for
- * only 100ms on DELL U2413
- * 0: some passive dongle still show aux mode instead of i2c
- * 20-50: not enough to hide bouncing HPD with passive dongle.
- * also see intermittent i2c read issues.
- */
- delay_on_connect_in_ms = 80;
- delay_on_disconnect_in_ms = 0;
- break;
- case SIGNAL_TYPE_LVDS:
- case SIGNAL_TYPE_EDP:
- default:
- /* Don't program hpd filter */
- return false;
- }
-
- /* Obtain HPD handle */
- hpd = get_hpd_gpio(link->ctx->dc_bios, link->link_id,
- link->ctx->gpio_service);
-
- if (!hpd)
- return result;
-
- /* Setup HPD filtering */
- if (dal_gpio_open(hpd, GPIO_MODE_INTERRUPT) == GPIO_RESULT_OK) {
- struct gpio_hpd_config config;
-
- config.delay_on_connect = delay_on_connect_in_ms;
- config.delay_on_disconnect = delay_on_disconnect_in_ms;
-
- dal_irq_setup_hpd_filter(hpd, &config);
-
- dal_gpio_close(hpd);
-
- result = true;
- } else {
- ASSERT_CRITICAL(false);
- }
-
- /* Release HPD handle */
- dal_gpio_destroy_irq(&hpd);
-
- return result;
-}
-
-bool dc_link_wait_for_t12(struct dc_link *link)
-{
- if (link->connector_signal == SIGNAL_TYPE_EDP && link->dc->hwss.edp_wait_for_T12) {
- link->dc->hwss.edp_wait_for_T12(link);
-
- return true;
- }
-
- return false;
-}
-
-/**
- * dc_link_detect_sink() - Determine if there is a sink connected
- *
- * @link: pointer to the dc link
- * @type: Returned connection type
- * Does not detect downstream devices, such as MST sinks
- * or display connected through active dongles
- */
-bool dc_link_detect_sink(struct dc_link *link, enum dc_connection_type *type)
-{
- uint32_t is_hpd_high = 0;
- struct gpio *hpd_pin;
-
- if (link->connector_signal == SIGNAL_TYPE_LVDS) {
- *type = dc_connection_single;
- return true;
- }
-
- if (link->connector_signal == SIGNAL_TYPE_EDP) {
- /*in case it is not on*/
- link->dc->hwss.edp_power_control(link, true);
- link->dc->hwss.edp_wait_for_hpd_ready(link, true);
- }
-
- /* Link may not have physical HPD pin. */
- if (link->ep_type != DISPLAY_ENDPOINT_PHY) {
- if (link->hpd_status)
- *type = dc_connection_single;
- else
- *type = dc_connection_none;
-
- return true;
- }
-
- /* todo: may need to lock gpio access */
- hpd_pin = get_hpd_gpio(link->ctx->dc_bios, link->link_id,
- link->ctx->gpio_service);
- if (!hpd_pin)
- goto hpd_gpio_failure;
-
- dal_gpio_open(hpd_pin, GPIO_MODE_INTERRUPT);
- dal_gpio_get_value(hpd_pin, &is_hpd_high);
- dal_gpio_close(hpd_pin);
- dal_gpio_destroy_irq(&hpd_pin);
-
- if (is_hpd_high) {
- *type = dc_connection_single;
- /* TODO: need to do the actual detection */
- } else {
- *type = dc_connection_none;
- }
-
- return true;
-
-hpd_gpio_failure:
- return false;
-}
-
-static enum ddc_transaction_type get_ddc_transaction_type(enum signal_type sink_signal)
-{
- enum ddc_transaction_type transaction_type = DDC_TRANSACTION_TYPE_NONE;
-
- switch (sink_signal) {
- case SIGNAL_TYPE_DVI_SINGLE_LINK:
- case SIGNAL_TYPE_DVI_DUAL_LINK:
- case SIGNAL_TYPE_HDMI_TYPE_A:
- case SIGNAL_TYPE_LVDS:
- case SIGNAL_TYPE_RGB:
- transaction_type = DDC_TRANSACTION_TYPE_I2C;
- break;
-
- case SIGNAL_TYPE_DISPLAY_PORT:
- case SIGNAL_TYPE_EDP:
- transaction_type = DDC_TRANSACTION_TYPE_I2C_OVER_AUX;
- break;
-
- case SIGNAL_TYPE_DISPLAY_PORT_MST:
- /* MST does not use I2COverAux, but there is the
- * SPECIAL use case for "immediate dwnstrm device
- * access" (EPR#370830).
- */
- transaction_type = DDC_TRANSACTION_TYPE_I2C_OVER_AUX;
- break;
-
- default:
- break;
- }
-
- return transaction_type;
-}
-
-static enum signal_type get_basic_signal_type(struct graphics_object_id encoder,
- struct graphics_object_id downstream)
-{
- if (downstream.type == OBJECT_TYPE_CONNECTOR) {
- switch (downstream.id) {
- case CONNECTOR_ID_SINGLE_LINK_DVII:
- switch (encoder.id) {
- case ENCODER_ID_INTERNAL_DAC1:
- case ENCODER_ID_INTERNAL_KLDSCP_DAC1:
- case ENCODER_ID_INTERNAL_DAC2:
- case ENCODER_ID_INTERNAL_KLDSCP_DAC2:
- return SIGNAL_TYPE_RGB;
- default:
- return SIGNAL_TYPE_DVI_SINGLE_LINK;
- }
- break;
- case CONNECTOR_ID_DUAL_LINK_DVII:
- {
- switch (encoder.id) {
- case ENCODER_ID_INTERNAL_DAC1:
- case ENCODER_ID_INTERNAL_KLDSCP_DAC1:
- case ENCODER_ID_INTERNAL_DAC2:
- case ENCODER_ID_INTERNAL_KLDSCP_DAC2:
- return SIGNAL_TYPE_RGB;
- default:
- return SIGNAL_TYPE_DVI_DUAL_LINK;
- }
- }
- break;
- case CONNECTOR_ID_SINGLE_LINK_DVID:
- return SIGNAL_TYPE_DVI_SINGLE_LINK;
- case CONNECTOR_ID_DUAL_LINK_DVID:
- return SIGNAL_TYPE_DVI_DUAL_LINK;
- case CONNECTOR_ID_VGA:
- return SIGNAL_TYPE_RGB;
- case CONNECTOR_ID_HDMI_TYPE_A:
- return SIGNAL_TYPE_HDMI_TYPE_A;
- case CONNECTOR_ID_LVDS:
- return SIGNAL_TYPE_LVDS;
- case CONNECTOR_ID_DISPLAY_PORT:
- return SIGNAL_TYPE_DISPLAY_PORT;
- case CONNECTOR_ID_EDP:
- return SIGNAL_TYPE_EDP;
- default:
- return SIGNAL_TYPE_NONE;
- }
- } else if (downstream.type == OBJECT_TYPE_ENCODER) {
- switch (downstream.id) {
- case ENCODER_ID_EXTERNAL_NUTMEG:
- case ENCODER_ID_EXTERNAL_TRAVIS:
- return SIGNAL_TYPE_DISPLAY_PORT;
- default:
- return SIGNAL_TYPE_NONE;
- }
- }
-
- return SIGNAL_TYPE_NONE;
-}
-
-/*
- * dc_link_is_dp_sink_present() - Check if there is a native DP
- * or passive DP-HDMI dongle connected
- */
-bool dc_link_is_dp_sink_present(struct dc_link *link)
-{
- enum gpio_result gpio_result;
- uint32_t clock_pin = 0;
- uint8_t retry = 0;
- struct ddc *ddc;
-
- enum connector_id connector_id =
- dal_graphics_object_id_get_connector_id(link->link_id);
-
- bool present =
- ((connector_id == CONNECTOR_ID_DISPLAY_PORT) ||
- (connector_id == CONNECTOR_ID_EDP));
-
- ddc = dal_ddc_service_get_ddc_pin(link->ddc);
-
- if (!ddc) {
- BREAK_TO_DEBUGGER();
- return present;
- }
-
- /* Open GPIO and set it to I2C mode */
- /* Note: this GpioMode_Input will be converted
- * to GpioConfigType_I2cAuxDualMode in GPIO component,
- * which indicates we need additional delay
- */
-
- if (dal_ddc_open(ddc, GPIO_MODE_INPUT,
- GPIO_DDC_CONFIG_TYPE_MODE_I2C) != GPIO_RESULT_OK) {
- dal_ddc_close(ddc);
-
- return present;
- }
-
- /*
- * Read GPIO: DP sink is present if both clock and data pins are zero
- *
- * [W/A] plug-unplug DP cable, sometimes customer board has
- * one short pulse on clk_pin(1V, < 1ms). DP will be config to HDMI/DVI
- * then monitor can't br light up. Add retry 3 times
- * But in real passive dongle, it need additional 3ms to detect
- */
- do {
- gpio_result = dal_gpio_get_value(ddc->pin_clock, &clock_pin);
- ASSERT(gpio_result == GPIO_RESULT_OK);
- if (clock_pin)
- udelay(1000);
- else
- break;
- } while (retry++ < 3);
-
- present = (gpio_result == GPIO_RESULT_OK) && !clock_pin;
-
- dal_ddc_close(ddc);
-
- return present;
-}
-
-/*
- * @brief
- * Detect output sink type
- */
-static enum signal_type link_detect_sink(struct dc_link *link,
- enum dc_detect_reason reason)
-{
- enum signal_type result;
- struct graphics_object_id enc_id;
-
- if (link->is_dig_mapping_flexible)
- enc_id = (struct graphics_object_id){.id = ENCODER_ID_UNKNOWN};
- else
- enc_id = link->link_enc->id;
- result = get_basic_signal_type(enc_id, link->link_id);
-
- /* Use basic signal type for link without physical connector. */
- if (link->ep_type != DISPLAY_ENDPOINT_PHY)
- return result;
-
- /* Internal digital encoder will detect only dongles
- * that require digital signal
- */
-
- /* Detection mechanism is different
- * for different native connectors.
- * LVDS connector supports only LVDS signal;
- * PCIE is a bus slot, the actual connector needs to be detected first;
- * eDP connector supports only eDP signal;
- * HDMI should check straps for audio
- */
-
- /* PCIE detects the actual connector on add-on board */
- if (link->link_id.id == CONNECTOR_ID_PCIE) {
- /* ZAZTODO implement PCIE add-on card detection */
- }
-
- switch (link->link_id.id) {
- case CONNECTOR_ID_HDMI_TYPE_A: {
- /* check audio support:
- * if native HDMI is not supported, switch to DVI
- */
- struct audio_support *aud_support =
- &link->dc->res_pool->audio_support;
-
- if (!aud_support->hdmi_audio_native)
- if (link->link_id.id == CONNECTOR_ID_HDMI_TYPE_A)
- result = SIGNAL_TYPE_DVI_SINGLE_LINK;
- }
- break;
- case CONNECTOR_ID_DISPLAY_PORT: {
- /* DP HPD short pulse. Passive DP dongle will not
- * have short pulse
- */
- if (reason != DETECT_REASON_HPDRX) {
- /* Check whether DP signal detected: if not -
- * we assume signal is DVI; it could be corrected
- * to HDMI after dongle detection
- */
- if (!dm_helpers_is_dp_sink_present(link))
- result = SIGNAL_TYPE_DVI_SINGLE_LINK;
- }
- }
- break;
- default:
- break;
- }
-
- return result;
-}
-
-static enum signal_type decide_signal_from_strap_and_dongle_type(enum display_dongle_type dongle_type,
- struct audio_support *audio_support)
-{
- enum signal_type signal = SIGNAL_TYPE_NONE;
-
- switch (dongle_type) {
- case DISPLAY_DONGLE_DP_HDMI_DONGLE:
- if (audio_support->hdmi_audio_on_dongle)
- signal = SIGNAL_TYPE_HDMI_TYPE_A;
- else
- signal = SIGNAL_TYPE_DVI_SINGLE_LINK;
- break;
- case DISPLAY_DONGLE_DP_DVI_DONGLE:
- signal = SIGNAL_TYPE_DVI_SINGLE_LINK;
- break;
- case DISPLAY_DONGLE_DP_HDMI_MISMATCHED_DONGLE:
- if (audio_support->hdmi_audio_native)
- signal = SIGNAL_TYPE_HDMI_TYPE_A;
- else
- signal = SIGNAL_TYPE_DVI_SINGLE_LINK;
- break;
- default:
- signal = SIGNAL_TYPE_NONE;
- break;
- }
-
- return signal;
-}
-
-static enum signal_type dp_passive_dongle_detection(struct ddc_service *ddc,
- struct display_sink_capability *sink_cap,
- struct audio_support *audio_support)
-{
- dal_ddc_service_i2c_query_dp_dual_mode_adaptor(ddc, sink_cap);
-
- return decide_signal_from_strap_and_dongle_type(sink_cap->dongle_type,
- audio_support);
-}
-
-static void link_disconnect_sink(struct dc_link *link)
-{
- if (link->local_sink) {
- dc_sink_release(link->local_sink);
- link->local_sink = NULL;
- }
-
- link->dpcd_sink_count = 0;
- //link->dpcd_caps.dpcd_rev.raw = 0;
-}
-
-static void link_disconnect_remap(struct dc_sink *prev_sink, struct dc_link *link)
-{
- dc_sink_release(link->local_sink);
- link->local_sink = prev_sink;
-}
-
-#if defined(CONFIG_DRM_AMD_DC_HDCP)
-bool dc_link_is_hdcp14(struct dc_link *link, enum signal_type signal)
-{
- bool ret = false;
-
- switch (signal) {
- case SIGNAL_TYPE_DISPLAY_PORT:
- case SIGNAL_TYPE_DISPLAY_PORT_MST:
- ret = link->hdcp_caps.bcaps.bits.HDCP_CAPABLE;
- break;
- case SIGNAL_TYPE_DVI_SINGLE_LINK:
- case SIGNAL_TYPE_DVI_DUAL_LINK:
- case SIGNAL_TYPE_HDMI_TYPE_A:
- /* HDMI doesn't tell us its HDCP(1.4) capability, so assume to always be capable,
- * we can poll for bksv but some displays have an issue with this. Since its so rare
- * for a display to not be 1.4 capable, this assumtion is ok
- */
- ret = true;
- break;
- default:
- break;
- }
- return ret;
-}
-
-bool dc_link_is_hdcp22(struct dc_link *link, enum signal_type signal)
-{
- bool ret = false;
-
- switch (signal) {
- case SIGNAL_TYPE_DISPLAY_PORT:
- case SIGNAL_TYPE_DISPLAY_PORT_MST:
- ret = (link->hdcp_caps.bcaps.bits.HDCP_CAPABLE &&
- link->hdcp_caps.rx_caps.fields.byte0.hdcp_capable &&
- (link->hdcp_caps.rx_caps.fields.version == 0x2)) ? 1 : 0;
- break;
- case SIGNAL_TYPE_DVI_SINGLE_LINK:
- case SIGNAL_TYPE_DVI_DUAL_LINK:
- case SIGNAL_TYPE_HDMI_TYPE_A:
- ret = (link->hdcp_caps.rx_caps.fields.version == 0x4) ? 1:0;
- break;
- default:
- break;
- }
-
- return ret;
-}
-
-static void query_hdcp_capability(enum signal_type signal, struct dc_link *link)
-{
- struct hdcp_protection_message msg22;
- struct hdcp_protection_message msg14;
-
- memset(&msg22, 0, sizeof(struct hdcp_protection_message));
- memset(&msg14, 0, sizeof(struct hdcp_protection_message));
- memset(link->hdcp_caps.rx_caps.raw, 0,
- sizeof(link->hdcp_caps.rx_caps.raw));
-
- if ((link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT &&
- link->ddc->transaction_type ==
- DDC_TRANSACTION_TYPE_I2C_OVER_AUX) ||
- link->connector_signal == SIGNAL_TYPE_EDP) {
- msg22.data = link->hdcp_caps.rx_caps.raw;
- msg22.length = sizeof(link->hdcp_caps.rx_caps.raw);
- msg22.msg_id = HDCP_MESSAGE_ID_RX_CAPS;
- } else {
- msg22.data = &link->hdcp_caps.rx_caps.fields.version;
- msg22.length = sizeof(link->hdcp_caps.rx_caps.fields.version);
- msg22.msg_id = HDCP_MESSAGE_ID_HDCP2VERSION;
- }
- msg22.version = HDCP_VERSION_22;
- msg22.link = HDCP_LINK_PRIMARY;
- msg22.max_retries = 5;
- dc_process_hdcp_msg(signal, link, &msg22);
-
- if (signal == SIGNAL_TYPE_DISPLAY_PORT || signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
- msg14.data = &link->hdcp_caps.bcaps.raw;
- msg14.length = sizeof(link->hdcp_caps.bcaps.raw);
- msg14.msg_id = HDCP_MESSAGE_ID_READ_BCAPS;
- msg14.version = HDCP_VERSION_14;
- msg14.link = HDCP_LINK_PRIMARY;
- msg14.max_retries = 5;
-
- dc_process_hdcp_msg(signal, link, &msg14);
- }
-
-}
-#endif
-
-static void read_current_link_settings_on_detect(struct dc_link *link)
-{
- union lane_count_set lane_count_set = {0};
- uint8_t link_bw_set;
- uint8_t link_rate_set;
- uint32_t read_dpcd_retry_cnt = 10;
- enum dc_status status = DC_ERROR_UNEXPECTED;
- int i;
- union max_down_spread max_down_spread = {0};
-
- // Read DPCD 00101h to find out the number of lanes currently set
- for (i = 0; i < read_dpcd_retry_cnt; i++) {
- status = core_link_read_dpcd(link,
- DP_LANE_COUNT_SET,
- &lane_count_set.raw,
- sizeof(lane_count_set));
- /* First DPCD read after VDD ON can fail if the particular board
- * does not have HPD pin wired correctly. So if DPCD read fails,
- * which it should never happen, retry a few times. Target worst
- * case scenario of 80 ms.
- */
- if (status == DC_OK) {
- link->cur_link_settings.lane_count =
- lane_count_set.bits.LANE_COUNT_SET;
- break;
- }
-
- msleep(8);
- }
-
- // Read DPCD 00100h to find if standard link rates are set
- core_link_read_dpcd(link, DP_LINK_BW_SET,
- &link_bw_set, sizeof(link_bw_set));
-
- if (link_bw_set == 0) {
- if (link->connector_signal == SIGNAL_TYPE_EDP) {
- /* If standard link rates are not being used,
- * Read DPCD 00115h to find the edp link rate set used
- */
- core_link_read_dpcd(link, DP_LINK_RATE_SET,
- &link_rate_set, sizeof(link_rate_set));
-
- // edp_supported_link_rates_count = 0 for DP
- if (link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) {
- link->cur_link_settings.link_rate =
- link->dpcd_caps.edp_supported_link_rates[link_rate_set];
- link->cur_link_settings.link_rate_set = link_rate_set;
- link->cur_link_settings.use_link_rate_set = true;
- }
- } else {
- // Link Rate not found. Seamless boot may not work.
- ASSERT(false);
- }
- } else {
- link->cur_link_settings.link_rate = link_bw_set;
- link->cur_link_settings.use_link_rate_set = false;
- }
- // Read DPCD 00003h to find the max down spread.
- core_link_read_dpcd(link, DP_MAX_DOWNSPREAD,
- &max_down_spread.raw, sizeof(max_down_spread));
- link->cur_link_settings.link_spread =
- max_down_spread.bits.MAX_DOWN_SPREAD ?
- LINK_SPREAD_05_DOWNSPREAD_30KHZ : LINK_SPREAD_DISABLED;
-}
-
-static bool detect_dp(struct dc_link *link,
- struct display_sink_capability *sink_caps,
- enum dc_detect_reason reason)
-{
- struct audio_support *audio_support = &link->dc->res_pool->audio_support;
-
- sink_caps->signal = link_detect_sink(link, reason);
- sink_caps->transaction_type =
- get_ddc_transaction_type(sink_caps->signal);
-
- if (sink_caps->transaction_type == DDC_TRANSACTION_TYPE_I2C_OVER_AUX) {
- sink_caps->signal = SIGNAL_TYPE_DISPLAY_PORT;
- if (!detect_dp_sink_caps(link))
- return false;
- if (is_mst_supported(link)) {
- sink_caps->signal = SIGNAL_TYPE_DISPLAY_PORT_MST;
- link->type = dc_connection_mst_branch;
-
- dal_ddc_service_set_transaction_type(link->ddc,
- sink_caps->transaction_type);
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- /* Apply work around for tunneled MST on certain USB4 docks. Always use DSC if dock
- * reports DSC support.
- */
- if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA &&
- link->type == dc_connection_mst_branch &&
- link->dpcd_caps.branch_dev_id == DP_BRANCH_DEVICE_ID_90CC24 &&
- link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_SUPPORT &&
- !link->dc->debug.dpia_debug.bits.disable_mst_dsc_work_around)
- link->wa_flags.dpia_mst_dsc_always_on = true;
-#endif
-
-#if defined(CONFIG_DRM_AMD_DC_HDCP)
- /* In case of fallback to SST when topology discovery below fails
- * HDCP caps will be querried again later by the upper layer (caller
- * of this function). */
- query_hdcp_capability(SIGNAL_TYPE_DISPLAY_PORT_MST, link);
-#endif
- }
-
- if (link->type != dc_connection_mst_branch &&
- is_dp_branch_device(link))
- /* DP SST branch */
- link->type = dc_connection_sst_branch;
- } else {
- /* DP passive dongles */
- sink_caps->signal = dp_passive_dongle_detection(link->ddc,
- sink_caps,
- audio_support);
- link->dpcd_caps.dongle_type = sink_caps->dongle_type;
- link->dpcd_caps.dpcd_rev.raw = 0;
- }
-
- return true;
-}
-
-static bool is_same_edid(struct dc_edid *old_edid, struct dc_edid *new_edid)
-{
- if (old_edid->length != new_edid->length)
- return false;
-
- if (new_edid->length == 0)
- return false;
-
- return (memcmp(old_edid->raw_edid,
- new_edid->raw_edid, new_edid->length) == 0);
-}
-
-static bool wait_for_entering_dp_alt_mode(struct dc_link *link)
-{
- /**
- * something is terribly wrong if time out is > 200ms. (5Hz)
- * 500 microseconds * 400 tries us 200 ms
- **/
- unsigned int sleep_time_in_microseconds = 500;
- unsigned int tries_allowed = 400;
- bool is_in_alt_mode;
- unsigned long long enter_timestamp;
- unsigned long long finish_timestamp;
- unsigned long long time_taken_in_ns;
- int tries_taken;
-
- DC_LOGGER_INIT(link->ctx->logger);
-
- if (!link->link_enc->funcs->is_in_alt_mode)
- return true;
-
- is_in_alt_mode = link->link_enc->funcs->is_in_alt_mode(link->link_enc);
- DC_LOG_WARNING("DP Alt mode state on HPD: %d\n", is_in_alt_mode);
-
- if (is_in_alt_mode)
- return true;
-
- enter_timestamp = dm_get_timestamp(link->ctx);
-
- for (tries_taken = 0; tries_taken < tries_allowed; tries_taken++) {
- udelay(sleep_time_in_microseconds);
- /* ask the link if alt mode is enabled, if so return ok */
- if (link->link_enc->funcs->is_in_alt_mode(link->link_enc)) {
- finish_timestamp = dm_get_timestamp(link->ctx);
- time_taken_in_ns =
- dm_get_elapse_time_in_ns(link->ctx,
- finish_timestamp,
- enter_timestamp);
- DC_LOG_WARNING("Alt mode entered finished after %llu ms\n",
- div_u64(time_taken_in_ns, 1000000));
- return true;
- }
- }
- finish_timestamp = dm_get_timestamp(link->ctx);
- time_taken_in_ns = dm_get_elapse_time_in_ns(link->ctx, finish_timestamp,
- enter_timestamp);
- DC_LOG_WARNING("Alt mode has timed out after %llu ms\n",
- div_u64(time_taken_in_ns, 1000000));
- return false;
-}
-
-/*
- * dc_link_detect() - Detect if a sink is attached to a given link
- *
- * link->local_sink is created or destroyed as needed.
- *
- * This does not create remote sinks but will trigger DM
- * to start MST detection if a branch is detected.
- */
-static bool dc_link_detect_helper(struct dc_link *link,
- enum dc_detect_reason reason)
-{
- struct dc_sink_init_data sink_init_data = { 0 };
- struct display_sink_capability sink_caps = { 0 };
- uint32_t i;
- bool converter_disable_audio = false;
- struct audio_support *aud_support = &link->dc->res_pool->audio_support;
- bool same_edid = false;
- enum dc_edid_status edid_status;
- struct dc_context *dc_ctx = link->ctx;
- struct dc_sink *sink = NULL;
- struct dc_sink *prev_sink = NULL;
- struct dpcd_caps prev_dpcd_caps;
- enum dc_connection_type new_connection_type = dc_connection_none;
- enum dc_connection_type pre_connection_type = dc_connection_none;
- bool perform_dp_seamless_boot = false;
- const uint32_t post_oui_delay = 30; // 30ms
-
- DC_LOGGER_INIT(link->ctx->logger);
-
- if (dc_is_virtual_signal(link->connector_signal))
- return false;
-
- if (((link->connector_signal == SIGNAL_TYPE_LVDS ||
- link->connector_signal == SIGNAL_TYPE_EDP) &&
- (!link->dc->config.allow_edp_hotplug_detection)) &&
- link->local_sink) {
- // need to re-write OUI and brightness in resume case
- if (link->connector_signal == SIGNAL_TYPE_EDP) {
- dpcd_set_source_specific_data(link);
- msleep(post_oui_delay);
- dc_link_set_default_brightness_aux(link);
- //TODO: use cached
- }
-
- return true;
- }
-
- if (!dc_link_detect_sink(link, &new_connection_type)) {
- BREAK_TO_DEBUGGER();
- return false;
- }
-
- prev_sink = link->local_sink;
- if (prev_sink) {
- dc_sink_retain(prev_sink);
- memcpy(&prev_dpcd_caps, &link->dpcd_caps, sizeof(struct dpcd_caps));
- }
-
- link_disconnect_sink(link);
- if (new_connection_type != dc_connection_none) {
- pre_connection_type = link->type;
- link->type = new_connection_type;
- link->link_state_valid = false;
-
- /* From Disconnected-to-Connected. */
- switch (link->connector_signal) {
- case SIGNAL_TYPE_HDMI_TYPE_A: {
- sink_caps.transaction_type = DDC_TRANSACTION_TYPE_I2C;
- if (aud_support->hdmi_audio_native)
- sink_caps.signal = SIGNAL_TYPE_HDMI_TYPE_A;
- else
- sink_caps.signal = SIGNAL_TYPE_DVI_SINGLE_LINK;
- break;
- }
-
- case SIGNAL_TYPE_DVI_SINGLE_LINK: {
- sink_caps.transaction_type = DDC_TRANSACTION_TYPE_I2C;
- sink_caps.signal = SIGNAL_TYPE_DVI_SINGLE_LINK;
- break;
- }
-
- case SIGNAL_TYPE_DVI_DUAL_LINK: {
- sink_caps.transaction_type = DDC_TRANSACTION_TYPE_I2C;
- sink_caps.signal = SIGNAL_TYPE_DVI_DUAL_LINK;
- break;
- }
-
- case SIGNAL_TYPE_LVDS: {
- sink_caps.transaction_type = DDC_TRANSACTION_TYPE_I2C;
- sink_caps.signal = SIGNAL_TYPE_LVDS;
- break;
- }
-
- case SIGNAL_TYPE_EDP: {
- read_current_link_settings_on_detect(link);
-
- detect_edp_sink_caps(link);
- read_current_link_settings_on_detect(link);
- sink_caps.transaction_type = DDC_TRANSACTION_TYPE_I2C_OVER_AUX;
- sink_caps.signal = SIGNAL_TYPE_EDP;
- break;
- }
-
- case SIGNAL_TYPE_DISPLAY_PORT: {
- /* wa HPD high coming too early*/
- if (link->ep_type == DISPLAY_ENDPOINT_PHY &&
- link->link_enc->features.flags.bits.DP_IS_USB_C == 1) {
- /* if alt mode times out, return false */
- if (!wait_for_entering_dp_alt_mode(link))
- return false;
- }
-
- if (!detect_dp(link, &sink_caps, reason)) {
- if (prev_sink)
- dc_sink_release(prev_sink);
- return false;
- }
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (dp_get_link_encoding_format(&link->reported_link_cap) == DP_128b_132b_ENCODING)
- add_dp_hpo_link_encoder_to_link(link);
-#endif
-
- if (link->type == dc_connection_mst_branch) {
- LINK_INFO("link=%d, mst branch is now Connected\n",
- link->link_index);
- /* Need to setup mst link_cap struct here
- * otherwise dc_link_detect() will leave mst link_cap
- * empty which leads to allocate_mst_payload() has "0"
- * pbn_per_slot value leading to exception on dc_fixpt_div()
- */
- dp_verify_mst_link_cap(link);
-
- /*
- * This call will initiate MST topology discovery. Which
- * will detect MST ports and add new DRM connector DRM
- * framework. Then read EDID via remote i2c over aux. In
- * the end, will notify DRM detect result and save EDID
- * into DRM framework.
- *
- * .detect is called by .fill_modes.
- * .fill_modes is called by user mode ioctl
- * DRM_IOCTL_MODE_GETCONNECTOR.
- *
- * .get_modes is called by .fill_modes.
- *
- * call .get_modes, AMDGPU DM implementation will create
- * new dc_sink and add to dc_link. For long HPD plug
- * in/out, MST has its own handle.
- *
- * Therefore, just after dc_create, link->sink is not
- * created for MST until user mode app calls
- * DRM_IOCTL_MODE_GETCONNECTOR.
- *
- * Need check ->sink usages in case ->sink = NULL
- * TODO: s3 resume check
- */
-
- dm_helpers_dp_update_branch_info(link->ctx, link);
- if (dm_helpers_dp_mst_start_top_mgr(link->ctx,
- link, reason == DETECT_REASON_BOOT)) {
- if (prev_sink)
- dc_sink_release(prev_sink);
- return false;
- } else {
- link->type = dc_connection_sst_branch;
- sink_caps.signal = SIGNAL_TYPE_DISPLAY_PORT;
- }
- }
-
- /* Active SST downstream branch device unplug*/
- if (link->type == dc_connection_sst_branch &&
- link->dpcd_caps.sink_count.bits.SINK_COUNT == 0) {
- if (prev_sink)
- /* Downstream unplug */
- dc_sink_release(prev_sink);
- return true;
- }
-
- /* disable audio for non DP to HDMI active sst converter */
- if (link->type == dc_connection_sst_branch &&
- is_dp_active_dongle(link) &&
- (link->dpcd_caps.dongle_type !=
- DISPLAY_DONGLE_DP_HDMI_CONVERTER))
- converter_disable_audio = true;
-
- // link switch from MST to non-MST stop topology manager
- if (pre_connection_type == dc_connection_mst_branch &&
- link->type != dc_connection_mst_branch)
- dm_helpers_dp_mst_stop_top_mgr(link->ctx, link);
-
-
- // For seamless boot, to skip verify link cap, we read UEFI settings and set them as verified.
- if (reason == DETECT_REASON_BOOT &&
- !dc_ctx->dc->config.power_down_display_on_boot &&
- link->link_status.link_active)
- perform_dp_seamless_boot = true;
-
- if (perform_dp_seamless_boot) {
- read_current_link_settings_on_detect(link);
- link->verified_link_cap = link->reported_link_cap;
- }
-
- break;
- }
-
- default:
- DC_ERROR("Invalid connector type! signal:%d\n",
- link->connector_signal);
- if (prev_sink)
- dc_sink_release(prev_sink);
- return false;
- } /* switch() */
-
- if (link->dpcd_caps.sink_count.bits.SINK_COUNT)
- link->dpcd_sink_count =
- link->dpcd_caps.sink_count.bits.SINK_COUNT;
- else
- link->dpcd_sink_count = 1;
-
- dal_ddc_service_set_transaction_type(link->ddc,
- sink_caps.transaction_type);
-
- link->aux_mode =
- dal_ddc_service_is_in_aux_transaction_mode(link->ddc);
-
- sink_init_data.link = link;
- sink_init_data.sink_signal = sink_caps.signal;
-
- sink = dc_sink_create(&sink_init_data);
- if (!sink) {
- DC_ERROR("Failed to create sink!\n");
- if (prev_sink)
- dc_sink_release(prev_sink);
- return false;
- }
-
- sink->link->dongle_max_pix_clk = sink_caps.max_hdmi_pixel_clock;
- sink->converter_disable_audio = converter_disable_audio;
-
- /* dc_sink_create returns a new reference */
- link->local_sink = sink;
-
- edid_status = dm_helpers_read_local_edid(link->ctx,
- link, sink);
-
- switch (edid_status) {
- case EDID_BAD_CHECKSUM:
- DC_LOG_ERROR("EDID checksum invalid.\n");
- break;
- case EDID_NO_RESPONSE:
- DC_LOG_ERROR("No EDID read.\n");
- /*
- * Abort detection for non-DP connectors if we have
- * no EDID
- *
- * DP needs to report as connected if HDP is high
- * even if we have no EDID in order to go to
- * fail-safe mode
- */
- if (dc_is_hdmi_signal(link->connector_signal) ||
- dc_is_dvi_signal(link->connector_signal)) {
- if (prev_sink)
- dc_sink_release(prev_sink);
-
- return false;
- }
- break;
- default:
- break;
- }
-
- // Check if edid is the same
- if ((prev_sink) &&
- (edid_status == EDID_THE_SAME || edid_status == EDID_OK))
- same_edid = is_same_edid(&prev_sink->dc_edid,
- &sink->dc_edid);
-
- if (sink->edid_caps.panel_patch.skip_scdc_overwrite)
- link->ctx->dc->debug.hdmi20_disable = true;
-
- if (link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT &&
- sink_caps.transaction_type ==
- DDC_TRANSACTION_TYPE_I2C_OVER_AUX) {
- /*
- * TODO debug why Dell 2413 doesn't like
- * two link trainings
- */
-#if defined(CONFIG_DRM_AMD_DC_HDCP)
- query_hdcp_capability(sink->sink_signal, link);
-#endif
-
- // verify link cap for SST non-seamless boot
- if (!perform_dp_seamless_boot)
- dp_verify_link_cap_with_retries(link,
- &link->reported_link_cap,
- LINK_TRAINING_MAX_VERIFY_RETRY);
- } else {
- // If edid is the same, then discard new sink and revert back to original sink
- if (same_edid) {
- link_disconnect_remap(prev_sink, link);
- sink = prev_sink;
- prev_sink = NULL;
- }
-#if defined(CONFIG_DRM_AMD_DC_HDCP)
- query_hdcp_capability(sink->sink_signal, link);
-#endif
- }
-
- /* HDMI-DVI Dongle */
- if (sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A &&
- !sink->edid_caps.edid_hdmi)
- sink->sink_signal = SIGNAL_TYPE_DVI_SINGLE_LINK;
-
- /* Connectivity log: detection */
- for (i = 0; i < sink->dc_edid.length / DC_EDID_BLOCK_SIZE; i++) {
- CONN_DATA_DETECT(link,
- &sink->dc_edid.raw_edid[i * DC_EDID_BLOCK_SIZE],
- DC_EDID_BLOCK_SIZE,
- "%s: [Block %d] ", sink->edid_caps.display_name, i);
- }
-
- DC_LOG_DETECTION_EDID_PARSER("%s: "
- "manufacturer_id = %X, "
- "product_id = %X, "
- "serial_number = %X, "
- "manufacture_week = %d, "
- "manufacture_year = %d, "
- "display_name = %s, "
- "speaker_flag = %d, "
- "audio_mode_count = %d\n",
- __func__,
- sink->edid_caps.manufacturer_id,
- sink->edid_caps.product_id,
- sink->edid_caps.serial_number,
- sink->edid_caps.manufacture_week,
- sink->edid_caps.manufacture_year,
- sink->edid_caps.display_name,
- sink->edid_caps.speaker_flags,
- sink->edid_caps.audio_mode_count);
-
- for (i = 0; i < sink->edid_caps.audio_mode_count; i++) {
- DC_LOG_DETECTION_EDID_PARSER("%s: mode number = %d, "
- "format_code = %d, "
- "channel_count = %d, "
- "sample_rate = %d, "
- "sample_size = %d\n",
- __func__,
- i,
- sink->edid_caps.audio_modes[i].format_code,
- sink->edid_caps.audio_modes[i].channel_count,
- sink->edid_caps.audio_modes[i].sample_rate,
- sink->edid_caps.audio_modes[i].sample_size);
- }
- } else {
- /* From Connected-to-Disconnected. */
- if (link->type == dc_connection_mst_branch) {
- LINK_INFO("link=%d, mst branch is now Disconnected\n",
- link->link_index);
-
- /* Disable work around which keeps DSC on for tunneled MST on certain USB4 docks. */
- if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
- link->wa_flags.dpia_mst_dsc_always_on = false;
-
- dm_helpers_dp_mst_stop_top_mgr(link->ctx, link);
-
- link->mst_stream_alloc_table.stream_count = 0;
- memset(link->mst_stream_alloc_table.stream_allocations,
- 0,
- sizeof(link->mst_stream_alloc_table.stream_allocations));
- }
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (dp_get_link_encoding_format(&link->cur_link_settings) == DP_128b_132b_ENCODING)
- reset_dp_hpo_stream_encoders_for_link(link);
-#endif
-
- link->type = dc_connection_none;
- sink_caps.signal = SIGNAL_TYPE_NONE;
- /* When we unplug a passive DP-HDMI dongle connection, dongle_max_pix_clk
- * is not cleared. If we emulate a DP signal on this connection, it thinks
- * the dongle is still there and limits the number of modes we can emulate.
- * Clear dongle_max_pix_clk on disconnect to fix this
- */
- link->dongle_max_pix_clk = 0;
- }
-
- LINK_INFO("link=%d, dc_sink_in=%p is now %s prev_sink=%p edid same=%d\n",
- link->link_index, sink,
- (sink_caps.signal ==
- SIGNAL_TYPE_NONE ? "Disconnected" : "Connected"),
- prev_sink, same_edid);
-
- if (prev_sink)
- dc_sink_release(prev_sink);
-
- return true;
-}
-
-bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
-{
- const struct dc *dc = link->dc;
- bool ret;
- bool can_apply_seamless_boot = false;
- int i;
-
- for (i = 0; i < dc->current_state->stream_count; i++) {
- if (dc->current_state->streams[i]->apply_seamless_boot_optimization) {
- can_apply_seamless_boot = true;
- break;
- }
- }
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- dc_z10_restore(dc);
-#endif
-
- /* get out of low power state */
- if (!can_apply_seamless_boot && reason != DETECT_REASON_BOOT)
- clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr);
-
- ret = dc_link_detect_helper(link, reason);
-
- /* Go back to power optimized state */
- if (!can_apply_seamless_boot && reason != DETECT_REASON_BOOT)
- clk_mgr_optimize_pwr_state(dc, dc->clk_mgr);
-
- return ret;
-}
-
-bool dc_link_get_hpd_state(struct dc_link *dc_link)
-{
- uint32_t state;
-
- dal_gpio_lock_pin(dc_link->hpd_gpio);
- dal_gpio_get_value(dc_link->hpd_gpio, &state);
- dal_gpio_unlock_pin(dc_link->hpd_gpio);
-
- return state;
-}
-
-static enum hpd_source_id get_hpd_line(struct dc_link *link)
-{
- struct gpio *hpd;
- enum hpd_source_id hpd_id = HPD_SOURCEID_UNKNOWN;
-
- hpd = get_hpd_gpio(link->ctx->dc_bios, link->link_id,
- link->ctx->gpio_service);
-
- if (hpd) {
- switch (dal_irq_get_source(hpd)) {
- case DC_IRQ_SOURCE_HPD1:
- hpd_id = HPD_SOURCEID1;
- break;
- case DC_IRQ_SOURCE_HPD2:
- hpd_id = HPD_SOURCEID2;
- break;
- case DC_IRQ_SOURCE_HPD3:
- hpd_id = HPD_SOURCEID3;
- break;
- case DC_IRQ_SOURCE_HPD4:
- hpd_id = HPD_SOURCEID4;
- break;
- case DC_IRQ_SOURCE_HPD5:
- hpd_id = HPD_SOURCEID5;
- break;
- case DC_IRQ_SOURCE_HPD6:
- hpd_id = HPD_SOURCEID6;
- break;
- default:
- BREAK_TO_DEBUGGER();
- break;
- }
-
- dal_gpio_destroy_irq(&hpd);
- }
-
- return hpd_id;
-}
-
-static enum channel_id get_ddc_line(struct dc_link *link)
-{
- struct ddc *ddc;
- enum channel_id channel = CHANNEL_ID_UNKNOWN;
-
- ddc = dal_ddc_service_get_ddc_pin(link->ddc);
-
- if (ddc) {
- switch (dal_ddc_get_line(ddc)) {
- case GPIO_DDC_LINE_DDC1:
- channel = CHANNEL_ID_DDC1;
- break;
- case GPIO_DDC_LINE_DDC2:
- channel = CHANNEL_ID_DDC2;
- break;
- case GPIO_DDC_LINE_DDC3:
- channel = CHANNEL_ID_DDC3;
- break;
- case GPIO_DDC_LINE_DDC4:
- channel = CHANNEL_ID_DDC4;
- break;
- case GPIO_DDC_LINE_DDC5:
- channel = CHANNEL_ID_DDC5;
- break;
- case GPIO_DDC_LINE_DDC6:
- channel = CHANNEL_ID_DDC6;
- break;
- case GPIO_DDC_LINE_DDC_VGA:
- channel = CHANNEL_ID_DDC_VGA;
- break;
- case GPIO_DDC_LINE_I2C_PAD:
- channel = CHANNEL_ID_I2C_PAD;
- break;
- default:
- BREAK_TO_DEBUGGER();
- break;
- }
- }
-
- return channel;
-}
-
-static enum transmitter translate_encoder_to_transmitter(struct graphics_object_id encoder)
-{
- switch (encoder.id) {
- case ENCODER_ID_INTERNAL_UNIPHY:
- switch (encoder.enum_id) {
- case ENUM_ID_1:
- return TRANSMITTER_UNIPHY_A;
- case ENUM_ID_2:
- return TRANSMITTER_UNIPHY_B;
- default:
- return TRANSMITTER_UNKNOWN;
- }
- break;
- case ENCODER_ID_INTERNAL_UNIPHY1:
- switch (encoder.enum_id) {
- case ENUM_ID_1:
- return TRANSMITTER_UNIPHY_C;
- case ENUM_ID_2:
- return TRANSMITTER_UNIPHY_D;
- default:
- return TRANSMITTER_UNKNOWN;
- }
- break;
- case ENCODER_ID_INTERNAL_UNIPHY2:
- switch (encoder.enum_id) {
- case ENUM_ID_1:
- return TRANSMITTER_UNIPHY_E;
- case ENUM_ID_2:
- return TRANSMITTER_UNIPHY_F;
- default:
- return TRANSMITTER_UNKNOWN;
- }
- break;
- case ENCODER_ID_INTERNAL_UNIPHY3:
- switch (encoder.enum_id) {
- case ENUM_ID_1:
- return TRANSMITTER_UNIPHY_G;
- default:
- return TRANSMITTER_UNKNOWN;
- }
- break;
- case ENCODER_ID_EXTERNAL_NUTMEG:
- switch (encoder.enum_id) {
- case ENUM_ID_1:
- return TRANSMITTER_NUTMEG_CRT;
- default:
- return TRANSMITTER_UNKNOWN;
- }
- break;
- case ENCODER_ID_EXTERNAL_TRAVIS:
- switch (encoder.enum_id) {
- case ENUM_ID_1:
- return TRANSMITTER_TRAVIS_CRT;
- case ENUM_ID_2:
- return TRANSMITTER_TRAVIS_LCD;
- default:
- return TRANSMITTER_UNKNOWN;
- }
- break;
- default:
- return TRANSMITTER_UNKNOWN;
- }
-}
-
-static bool dc_link_construct_legacy(struct dc_link *link,
- const struct link_init_data *init_params)
-{
- uint8_t i;
- struct ddc_service_init_data ddc_service_init_data = { { 0 } };
- struct dc_context *dc_ctx = init_params->ctx;
- struct encoder_init_data enc_init_data = { 0 };
- struct panel_cntl_init_data panel_cntl_init_data = { 0 };
- struct integrated_info *info;
- struct dc_bios *bios = init_params->dc->ctx->dc_bios;
- const struct dc_vbios_funcs *bp_funcs = bios->funcs;
- struct bp_disp_connector_caps_info disp_connect_caps_info = { 0 };
-
- DC_LOGGER_INIT(dc_ctx->logger);
-
- info = kzalloc(sizeof(*info), GFP_KERNEL);
- if (!info)
- goto create_fail;
-
- link->irq_source_hpd = DC_IRQ_SOURCE_INVALID;
- link->irq_source_hpd_rx = DC_IRQ_SOURCE_INVALID;
-
- link->link_status.dpcd_caps = &link->dpcd_caps;
-
- link->dc = init_params->dc;
- link->ctx = dc_ctx;
- link->link_index = init_params->link_index;
-
- memset(&link->preferred_training_settings, 0,
- sizeof(struct dc_link_training_overrides));
- memset(&link->preferred_link_setting, 0,
- sizeof(struct dc_link_settings));
-
- link->link_id =
- bios->funcs->get_connector_id(bios, init_params->connector_index);
-
- link->ep_type = DISPLAY_ENDPOINT_PHY;
-
- DC_LOG_DC("BIOS object table - link_id: %d", link->link_id.id);
-
- if (bios->funcs->get_disp_connector_caps_info) {
- bios->funcs->get_disp_connector_caps_info(bios, link->link_id, &disp_connect_caps_info);
- link->is_internal_display = disp_connect_caps_info.INTERNAL_DISPLAY;
- DC_LOG_DC("BIOS object table - is_internal_display: %d", link->is_internal_display);
- }
-
- if (link->link_id.type != OBJECT_TYPE_CONNECTOR) {
- dm_output_to_console("%s: Invalid Connector ObjectID from Adapter Service for connector index:%d! type %d expected %d\n",
- __func__, init_params->connector_index,
- link->link_id.type, OBJECT_TYPE_CONNECTOR);
- goto create_fail;
- }
-
- if (link->dc->res_pool->funcs->link_init)
- link->dc->res_pool->funcs->link_init(link);
-
- link->hpd_gpio = get_hpd_gpio(link->ctx->dc_bios, link->link_id,
- link->ctx->gpio_service);
-
- if (link->hpd_gpio) {
- dal_gpio_open(link->hpd_gpio, GPIO_MODE_INTERRUPT);
- dal_gpio_unlock_pin(link->hpd_gpio);
- link->irq_source_hpd = dal_irq_get_source(link->hpd_gpio);
-
- DC_LOG_DC("BIOS object table - hpd_gpio id: %d", link->hpd_gpio->id);
- DC_LOG_DC("BIOS object table - hpd_gpio en: %d", link->hpd_gpio->en);
- }
-
- switch (link->link_id.id) {
- case CONNECTOR_ID_HDMI_TYPE_A:
- link->connector_signal = SIGNAL_TYPE_HDMI_TYPE_A;
-
- break;
- case CONNECTOR_ID_SINGLE_LINK_DVID:
- case CONNECTOR_ID_SINGLE_LINK_DVII:
- link->connector_signal = SIGNAL_TYPE_DVI_SINGLE_LINK;
- break;
- case CONNECTOR_ID_DUAL_LINK_DVID:
- case CONNECTOR_ID_DUAL_LINK_DVII:
- link->connector_signal = SIGNAL_TYPE_DVI_DUAL_LINK;
- break;
- case CONNECTOR_ID_DISPLAY_PORT:
- link->connector_signal = SIGNAL_TYPE_DISPLAY_PORT;
-
- if (link->hpd_gpio)
- link->irq_source_hpd_rx =
- dal_irq_get_rx_source(link->hpd_gpio);
-
- break;
- case CONNECTOR_ID_EDP:
- link->connector_signal = SIGNAL_TYPE_EDP;
-
- if (link->hpd_gpio) {
- if (!link->dc->config.allow_edp_hotplug_detection)
- link->irq_source_hpd = DC_IRQ_SOURCE_INVALID;
- link->irq_source_hpd_rx =
- dal_irq_get_rx_source(link->hpd_gpio);
- }
-
- break;
- case CONNECTOR_ID_LVDS:
- link->connector_signal = SIGNAL_TYPE_LVDS;
- break;
- default:
- DC_LOG_WARNING("Unsupported Connector type:%d!\n",
- link->link_id.id);
- goto create_fail;
- }
-
- /* TODO: #DAL3 Implement id to str function.*/
- LINK_INFO("Connector[%d] description:"
- "signal %d\n",
- init_params->connector_index,
- link->connector_signal);
-
- ddc_service_init_data.ctx = link->ctx;
- ddc_service_init_data.id = link->link_id;
- ddc_service_init_data.link = link;
- link->ddc = dal_ddc_service_create(&ddc_service_init_data);
-
- if (!link->ddc) {
- DC_ERROR("Failed to create ddc_service!\n");
- goto ddc_create_fail;
- }
-
- if (!link->ddc->ddc_pin) {
- DC_ERROR("Failed to get I2C info for connector!\n");
- goto ddc_create_fail;
- }
-
- link->ddc_hw_inst =
- dal_ddc_get_line(dal_ddc_service_get_ddc_pin(link->ddc));
-
-
- if (link->dc->res_pool->funcs->panel_cntl_create &&
- (link->link_id.id == CONNECTOR_ID_EDP ||
- link->link_id.id == CONNECTOR_ID_LVDS)) {
- panel_cntl_init_data.ctx = dc_ctx;
- panel_cntl_init_data.inst =
- panel_cntl_init_data.ctx->dc_edp_id_count;
- link->panel_cntl =
- link->dc->res_pool->funcs->panel_cntl_create(
- &panel_cntl_init_data);
- panel_cntl_init_data.ctx->dc_edp_id_count++;
-
- if (link->panel_cntl == NULL) {
- DC_ERROR("Failed to create link panel_cntl!\n");
- goto panel_cntl_create_fail;
- }
- }
-
- enc_init_data.ctx = dc_ctx;
- bp_funcs->get_src_obj(dc_ctx->dc_bios, link->link_id, 0,
- &enc_init_data.encoder);
- enc_init_data.connector = link->link_id;
- enc_init_data.channel = get_ddc_line(link);
- enc_init_data.hpd_source = get_hpd_line(link);
-
- link->hpd_src = enc_init_data.hpd_source;
-
- enc_init_data.transmitter =
- translate_encoder_to_transmitter(enc_init_data.encoder);
- link->link_enc =
- link->dc->res_pool->funcs->link_enc_create(&enc_init_data);
-
- if (!link->link_enc) {
- DC_ERROR("Failed to create link encoder!\n");
- goto link_enc_create_fail;
- }
-
- DC_LOG_DC("BIOS object table - DP_IS_USB_C: %d", link->link_enc->features.flags.bits.DP_IS_USB_C);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- DC_LOG_DC("BIOS object table - IS_DP2_CAPABLE: %d", link->link_enc->features.flags.bits.IS_DP2_CAPABLE);
-#endif
-
- /* Update link encoder tracking variables. These are used for the dynamic
- * assignment of link encoders to streams.
- */
- link->eng_id = link->link_enc->preferred_engine;
- link->dc->res_pool->link_encoders[link->eng_id - ENGINE_ID_DIGA] = link->link_enc;
- link->dc->res_pool->dig_link_enc_count++;
-
- link->link_enc_hw_inst = link->link_enc->transmitter;
-
- for (i = 0; i < 4; i++) {
- if (bp_funcs->get_device_tag(dc_ctx->dc_bios,
- link->link_id, i,
- &link->device_tag) != BP_RESULT_OK) {
- DC_ERROR("Failed to find device tag!\n");
- goto device_tag_fail;
- }
-
- /* Look for device tag that matches connector signal,
- * CRT for rgb, LCD for other supported signal tyes
- */
- if (!bp_funcs->is_device_id_supported(dc_ctx->dc_bios,
- link->device_tag.dev_id))
- continue;
- if (link->device_tag.dev_id.device_type == DEVICE_TYPE_CRT &&
- link->connector_signal != SIGNAL_TYPE_RGB)
- continue;
- if (link->device_tag.dev_id.device_type == DEVICE_TYPE_LCD &&
- link->connector_signal == SIGNAL_TYPE_RGB)
- continue;
-
- DC_LOG_DC("BIOS object table - device_tag.acpi_device: %d", link->device_tag.acpi_device);
- DC_LOG_DC("BIOS object table - device_tag.dev_id.device_type: %d", link->device_tag.dev_id.device_type);
- DC_LOG_DC("BIOS object table - device_tag.dev_id.enum_id: %d", link->device_tag.dev_id.enum_id);
- break;
- }
-
- if (bios->integrated_info)
- memcpy(info, bios->integrated_info, sizeof(*info));
-
- /* Look for channel mapping corresponding to connector and device tag */
- for (i = 0; i < MAX_NUMBER_OF_EXT_DISPLAY_PATH; i++) {
- struct external_display_path *path =
- &info->ext_disp_conn_info.path[i];
-
- if (path->device_connector_id.enum_id == link->link_id.enum_id &&
- path->device_connector_id.id == link->link_id.id &&
- path->device_connector_id.type == link->link_id.type) {
- if (link->device_tag.acpi_device != 0 &&
- path->device_acpi_enum == link->device_tag.acpi_device) {
- link->ddi_channel_mapping = path->channel_mapping;
- link->chip_caps = path->caps;
- DC_LOG_DC("BIOS object table - ddi_channel_mapping: 0x%04X", link->ddi_channel_mapping.raw);
- DC_LOG_DC("BIOS object table - chip_caps: %d", link->chip_caps);
- } else if (path->device_tag ==
- link->device_tag.dev_id.raw_device_tag) {
- link->ddi_channel_mapping = path->channel_mapping;
- link->chip_caps = path->caps;
- DC_LOG_DC("BIOS object table - ddi_channel_mapping: 0x%04X", link->ddi_channel_mapping.raw);
- DC_LOG_DC("BIOS object table - chip_caps: %d", link->chip_caps);
- }
-
- if (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) {
- link->bios_forced_drive_settings.VOLTAGE_SWING =
- (info->ext_disp_conn_info.fixdpvoltageswing & 0x3);
- link->bios_forced_drive_settings.PRE_EMPHASIS =
- ((info->ext_disp_conn_info.fixdpvoltageswing >> 2) & 0x3);
- }
-
- break;
- }
- }
-
- if (bios->funcs->get_atom_dc_golden_table)
- bios->funcs->get_atom_dc_golden_table(bios);
-
- /*
- * TODO check if GPIO programmed correctly
- *
- * If GPIO isn't programmed correctly HPD might not rise or drain
- * fast enough, leading to bounces.
- */
- program_hpd_filter(link);
-
- link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED;
-
- DC_LOG_DC("BIOS object table - %s finished successfully.\n", __func__);
- kfree(info);
- return true;
-device_tag_fail:
- link->link_enc->funcs->destroy(&link->link_enc);
-link_enc_create_fail:
- if (link->panel_cntl != NULL)
- link->panel_cntl->funcs->destroy(&link->panel_cntl);
-panel_cntl_create_fail:
- dal_ddc_service_destroy(&link->ddc);
-ddc_create_fail:
-create_fail:
-
- if (link->hpd_gpio) {
- dal_gpio_destroy_irq(&link->hpd_gpio);
- link->hpd_gpio = NULL;
- }
-
- DC_LOG_DC("BIOS object table - %s failed.\n", __func__);
- kfree(info);
-
- return false;
-}
-
-static bool dc_link_construct_dpia(struct dc_link *link,
- const struct link_init_data *init_params)
-{
- struct ddc_service_init_data ddc_service_init_data = { { 0 } };
- struct dc_context *dc_ctx = init_params->ctx;
-
- DC_LOGGER_INIT(dc_ctx->logger);
-
- /* Initialized irq source for hpd and hpd rx */
- link->irq_source_hpd = DC_IRQ_SOURCE_INVALID;
- link->irq_source_hpd_rx = DC_IRQ_SOURCE_INVALID;
- link->link_status.dpcd_caps = &link->dpcd_caps;
-
- link->dc = init_params->dc;
- link->ctx = dc_ctx;
- link->link_index = init_params->link_index;
-
- memset(&link->preferred_training_settings, 0,
- sizeof(struct dc_link_training_overrides));
- memset(&link->preferred_link_setting, 0,
- sizeof(struct dc_link_settings));
-
- /* Dummy Init for linkid */
- link->link_id.type = OBJECT_TYPE_CONNECTOR;
- link->link_id.id = CONNECTOR_ID_DISPLAY_PORT;
- link->link_id.enum_id = ENUM_ID_1 + init_params->connector_index;
- link->is_internal_display = false;
- link->connector_signal = SIGNAL_TYPE_DISPLAY_PORT;
- LINK_INFO("Connector[%d] description:signal %d\n",
- init_params->connector_index,
- link->connector_signal);
-
- link->ep_type = DISPLAY_ENDPOINT_USB4_DPIA;
- link->is_dig_mapping_flexible = true;
-
- /* TODO: Initialize link : funcs->link_init */
-
- ddc_service_init_data.ctx = link->ctx;
- ddc_service_init_data.id = link->link_id;
- ddc_service_init_data.link = link;
- /* Set indicator for dpia link so that ddc won't be created */
- ddc_service_init_data.is_dpia_link = true;
-
- link->ddc = dal_ddc_service_create(&ddc_service_init_data);
- if (!link->ddc) {
- DC_ERROR("Failed to create ddc_service!\n");
- goto ddc_create_fail;
- }
-
- /* Set dpia port index : 0 to number of dpia ports */
- link->ddc_hw_inst = init_params->connector_index;
-
- /* TODO: Create link encoder */
-
- link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED;
-
- /* Some docks seem to NAK I2C writes to segment pointer with mot=0. */
- link->wa_flags.dp_mot_reset_segment = true;
-
- return true;
-
-ddc_create_fail:
- return false;
-}
-
-static bool dc_link_construct(struct dc_link *link,
- const struct link_init_data *init_params)
-{
- /* Handle dpia case */
- if (init_params->is_dpia_link)
- return dc_link_construct_dpia(link, init_params);
- else
- return dc_link_construct_legacy(link, init_params);
-}
-/*******************************************************************************
- * Public functions
- ******************************************************************************/
-struct dc_link *link_create(const struct link_init_data *init_params)
-{
- struct dc_link *link =
- kzalloc(sizeof(*link), GFP_KERNEL);
-
- if (NULL == link)
- goto alloc_fail;
-
- if (false == dc_link_construct(link, init_params))
- goto construct_fail;
-
- /*
- * Must use preferred_link_setting, not reported_link_cap or verified_link_cap,
- * since struct preferred_link_setting won't be reset after S3.
- */
- link->preferred_link_setting.dpcd_source_device_specific_field_support = true;
-
- return link;
-
-construct_fail:
- kfree(link);
-
-alloc_fail:
- return NULL;
-}
-
-void link_destroy(struct dc_link **link)
-{
- dc_link_destruct(*link);
- kfree(*link);
- *link = NULL;
-}
-
-static void enable_stream_features(struct pipe_ctx *pipe_ctx)
-{
- struct dc_stream_state *stream = pipe_ctx->stream;
-
- if (pipe_ctx->stream->signal != SIGNAL_TYPE_DISPLAY_PORT_MST) {
- struct dc_link *link = stream->link;
- union down_spread_ctrl old_downspread;
- union down_spread_ctrl new_downspread;
-
- core_link_read_dpcd(link, DP_DOWNSPREAD_CTRL,
- &old_downspread.raw, sizeof(old_downspread));
-
- new_downspread.raw = old_downspread.raw;
-
- new_downspread.bits.IGNORE_MSA_TIMING_PARAM =
- (stream->ignore_msa_timing_param) ? 1 : 0;
-
- if (new_downspread.raw != old_downspread.raw) {
- core_link_write_dpcd(link, DP_DOWNSPREAD_CTRL,
- &new_downspread.raw, sizeof(new_downspread));
- }
-
- } else {
- dm_helpers_mst_enable_stream_features(stream);
- }
-}
-
-static enum dc_status enable_link_dp(struct dc_state *state,
- struct pipe_ctx *pipe_ctx)
-{
- struct dc_stream_state *stream = pipe_ctx->stream;
- enum dc_status status;
- bool skip_video_pattern;
- struct dc_link *link = stream->link;
- struct dc_link_settings link_settings = {0};
- bool fec_enable;
- int i;
- bool apply_seamless_boot_optimization = false;
- uint32_t bl_oled_enable_delay = 50; // in ms
- const uint32_t post_oui_delay = 30; // 30ms
- /* Reduce link bandwidth between failed link training attempts. */
- bool do_fallback = false;
-
- // check for seamless boot
- for (i = 0; i < state->stream_count; i++) {
- if (state->streams[i]->apply_seamless_boot_optimization) {
- apply_seamless_boot_optimization = true;
- break;
- }
- }
-
- /* get link settings for video mode timing */
- decide_link_settings(stream, &link_settings);
-
- /* Train with fallback when enabling DPIA link. Conventional links are
- * trained with fallback during sink detection.
- */
- if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
- do_fallback = true;
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- /*
- * Temporary w/a to get DP2.0 link rates to work with SST.
- * TODO DP2.0 - Workaround: Remove w/a if and when the issue is resolved.
- */
- if (dp_get_link_encoding_format(&link_settings) == DP_128b_132b_ENCODING &&
- pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT &&
- link->dc->debug.set_mst_en_for_sst) {
- dp_enable_mst_on_sink(link, true);
- }
-#endif
-
- if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP) {
- /*in case it is not on*/
- link->dc->hwss.edp_power_control(link, true);
- link->dc->hwss.edp_wait_for_hpd_ready(link, true);
- }
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (dp_get_link_encoding_format(&link_settings) == DP_128b_132b_ENCODING) {
- /* TODO - DP2.0 HW: calculate 32 symbol clock for HPO encoder */
- } else {
- pipe_ctx->stream_res.pix_clk_params.requested_sym_clk =
- link_settings.link_rate * LINK_RATE_REF_FREQ_IN_KHZ;
- if (state->clk_mgr && !apply_seamless_boot_optimization)
- state->clk_mgr->funcs->update_clocks(state->clk_mgr,
- state, false);
- }
-#else
- pipe_ctx->stream_res.pix_clk_params.requested_sym_clk =
- link_settings.link_rate * LINK_RATE_REF_FREQ_IN_KHZ;
- if (state->clk_mgr && !apply_seamless_boot_optimization)
- state->clk_mgr->funcs->update_clocks(state->clk_mgr,
- state, false);
-#endif
-
- // during mode switch we do DP_SET_POWER off then on, and OUI is lost
- dpcd_set_source_specific_data(link);
- if (link->dpcd_sink_ext_caps.raw != 0)
- msleep(post_oui_delay);
-
- skip_video_pattern = true;
-
- if (link_settings.link_rate == LINK_RATE_LOW)
- skip_video_pattern = false;
-
- if (perform_link_training_with_retries(&link_settings,
- skip_video_pattern,
- LINK_TRAINING_ATTEMPTS,
- pipe_ctx,
- pipe_ctx->stream->signal,
- do_fallback)) {
- link->cur_link_settings = link_settings;
- status = DC_OK;
- } else {
- status = DC_FAIL_DP_LINK_TRAINING;
- }
-
- if (link->preferred_training_settings.fec_enable)
- fec_enable = *link->preferred_training_settings.fec_enable;
- else
- fec_enable = true;
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (dp_get_link_encoding_format(&link_settings) == DP_8b_10b_ENCODING)
- dp_set_fec_enable(link, fec_enable);
-#else
- dp_set_fec_enable(link, fec_enable);
-#endif
-
- // during mode set we do DP_SET_POWER off then on, aux writes are lost
- if (link->dpcd_sink_ext_caps.bits.oled == 1 ||
- link->dpcd_sink_ext_caps.bits.sdr_aux_backlight_control == 1 ||
- link->dpcd_sink_ext_caps.bits.hdr_aux_backlight_control == 1) {
- dc_link_set_default_brightness_aux(link); // TODO: use cached if known
- if (link->dpcd_sink_ext_caps.bits.oled == 1)
- msleep(bl_oled_enable_delay);
- dc_link_backlight_enable_aux(link, true);
- }
-
- return status;
-}
-
-static enum dc_status enable_link_edp(
- struct dc_state *state,
- struct pipe_ctx *pipe_ctx)
-{
- enum dc_status status;
-
- status = enable_link_dp(state, pipe_ctx);
-
- return status;
-}
-
-static enum dc_status enable_link_dp_mst(
- struct dc_state *state,
- struct pipe_ctx *pipe_ctx)
-{
- struct dc_link *link = pipe_ctx->stream->link;
-
- /* sink signal type after MST branch is MST. Multiple MST sinks
- * share one link. Link DP PHY is enable or training only once.
- */
- if (link->link_status.link_active)
- return DC_OK;
-
- /* clear payload table */
- dm_helpers_dp_mst_clear_payload_allocation_table(link->ctx, link);
-
- /* to make sure the pending down rep can be processed
- * before enabling the link
- */
- dm_helpers_dp_mst_poll_pending_down_reply(link->ctx, link);
-
- /* set the sink to MST mode before enabling the link */
- dp_enable_mst_on_sink(link, true);
-
- return enable_link_dp(state, pipe_ctx);
-}
-
-static bool get_ext_hdmi_settings(struct pipe_ctx *pipe_ctx,
- enum engine_id eng_id,
- struct ext_hdmi_settings *settings)
-{
- bool result = false;
- int i = 0;
- struct integrated_info *integrated_info =
- pipe_ctx->stream->ctx->dc_bios->integrated_info;
-
- if (integrated_info == NULL)
- return false;
-
- /*
- * Get retimer settings from sbios for passing SI eye test for DCE11
- * The setting values are varied based on board revision and port id
- * Therefore the setting values of each ports is passed by sbios.
- */
-
- // Check if current bios contains ext Hdmi settings
- if (integrated_info->gpu_cap_info & 0x20) {
- switch (eng_id) {
- case ENGINE_ID_DIGA:
- settings->slv_addr = integrated_info->dp0_ext_hdmi_slv_addr;
- settings->reg_num = integrated_info->dp0_ext_hdmi_6g_reg_num;
- settings->reg_num_6g = integrated_info->dp0_ext_hdmi_6g_reg_num;
- memmove(settings->reg_settings,
- integrated_info->dp0_ext_hdmi_reg_settings,
- sizeof(integrated_info->dp0_ext_hdmi_reg_settings));
- memmove(settings->reg_settings_6g,
- integrated_info->dp0_ext_hdmi_6g_reg_settings,
- sizeof(integrated_info->dp0_ext_hdmi_6g_reg_settings));
- result = true;
- break;
- case ENGINE_ID_DIGB:
- settings->slv_addr = integrated_info->dp1_ext_hdmi_slv_addr;
- settings->reg_num = integrated_info->dp1_ext_hdmi_6g_reg_num;
- settings->reg_num_6g = integrated_info->dp1_ext_hdmi_6g_reg_num;
- memmove(settings->reg_settings,
- integrated_info->dp1_ext_hdmi_reg_settings,
- sizeof(integrated_info->dp1_ext_hdmi_reg_settings));
- memmove(settings->reg_settings_6g,
- integrated_info->dp1_ext_hdmi_6g_reg_settings,
- sizeof(integrated_info->dp1_ext_hdmi_6g_reg_settings));
- result = true;
- break;
- case ENGINE_ID_DIGC:
- settings->slv_addr = integrated_info->dp2_ext_hdmi_slv_addr;
- settings->reg_num = integrated_info->dp2_ext_hdmi_6g_reg_num;
- settings->reg_num_6g = integrated_info->dp2_ext_hdmi_6g_reg_num;
- memmove(settings->reg_settings,
- integrated_info->dp2_ext_hdmi_reg_settings,
- sizeof(integrated_info->dp2_ext_hdmi_reg_settings));
- memmove(settings->reg_settings_6g,
- integrated_info->dp2_ext_hdmi_6g_reg_settings,
- sizeof(integrated_info->dp2_ext_hdmi_6g_reg_settings));
- result = true;
- break;
- case ENGINE_ID_DIGD:
- settings->slv_addr = integrated_info->dp3_ext_hdmi_slv_addr;
- settings->reg_num = integrated_info->dp3_ext_hdmi_6g_reg_num;
- settings->reg_num_6g = integrated_info->dp3_ext_hdmi_6g_reg_num;
- memmove(settings->reg_settings,
- integrated_info->dp3_ext_hdmi_reg_settings,
- sizeof(integrated_info->dp3_ext_hdmi_reg_settings));
- memmove(settings->reg_settings_6g,
- integrated_info->dp3_ext_hdmi_6g_reg_settings,
- sizeof(integrated_info->dp3_ext_hdmi_6g_reg_settings));
- result = true;
- break;
- default:
- break;
- }
-
- if (result == true) {
- // Validate settings from bios integrated info table
- if (settings->slv_addr == 0)
- return false;
- if (settings->reg_num > 9)
- return false;
- if (settings->reg_num_6g > 3)
- return false;
-
- for (i = 0; i < settings->reg_num; i++) {
- if (settings->reg_settings[i].i2c_reg_index > 0x20)
- return false;
- }
-
- for (i = 0; i < settings->reg_num_6g; i++) {
- if (settings->reg_settings_6g[i].i2c_reg_index > 0x20)
- return false;
- }
- }
- }
-
- return result;
-}
-
-static bool i2c_write(struct pipe_ctx *pipe_ctx,
- uint8_t address, uint8_t *buffer, uint32_t length)
-{
- struct i2c_command cmd = {0};
- struct i2c_payload payload = {0};
-
- memset(&payload, 0, sizeof(payload));
- memset(&cmd, 0, sizeof(cmd));
-
- cmd.number_of_payloads = 1;
- cmd.engine = I2C_COMMAND_ENGINE_DEFAULT;
- cmd.speed = pipe_ctx->stream->ctx->dc->caps.i2c_speed_in_khz;
-
- payload.address = address;
- payload.data = buffer;
- payload.length = length;
- payload.write = true;
- cmd.payloads = &payload;
-
- if (dm_helpers_submit_i2c(pipe_ctx->stream->ctx,
- pipe_ctx->stream->link, &cmd))
- return true;
-
- return false;
-}
-
-static void write_i2c_retimer_setting(
- struct pipe_ctx *pipe_ctx,
- bool is_vga_mode,
- bool is_over_340mhz,
- struct ext_hdmi_settings *settings)
-{
- uint8_t slave_address = (settings->slv_addr >> 1);
- uint8_t buffer[2];
- const uint8_t apply_rx_tx_change = 0x4;
- uint8_t offset = 0xA;
- uint8_t value = 0;
- int i = 0;
- bool i2c_success = false;
- DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger);
-
- memset(&buffer, 0, sizeof(buffer));
-
- /* Start Ext-Hdmi programming*/
-
- for (i = 0; i < settings->reg_num; i++) {
- /* Apply 3G settings */
- if (settings->reg_settings[i].i2c_reg_index <= 0x20) {
-
- buffer[0] = settings->reg_settings[i].i2c_reg_index;
- buffer[1] = settings->reg_settings[i].i2c_reg_val;
- i2c_success = i2c_write(pipe_ctx, slave_address,
- buffer, sizeof(buffer));
- RETIMER_REDRIVER_INFO("retimer write to slave_address = 0x%x,\
- offset = 0x%x, reg_val= 0x%x, i2c_success = %d\n",
- slave_address, buffer[0], buffer[1], i2c_success?1:0);
-
- if (!i2c_success)
- goto i2c_write_fail;
-
- /* Based on DP159 specs, APPLY_RX_TX_CHANGE bit in 0x0A
- * needs to be set to 1 on every 0xA-0xC write.
- */
- if (settings->reg_settings[i].i2c_reg_index == 0xA ||
- settings->reg_settings[i].i2c_reg_index == 0xB ||
- settings->reg_settings[i].i2c_reg_index == 0xC) {
-
- /* Query current value from offset 0xA */
- if (settings->reg_settings[i].i2c_reg_index == 0xA)
- value = settings->reg_settings[i].i2c_reg_val;
- else {
- i2c_success =
- dal_ddc_service_query_ddc_data(
- pipe_ctx->stream->link->ddc,
- slave_address, &offset, 1, &value, 1);
- if (!i2c_success)
- goto i2c_write_fail;
- }
-
- buffer[0] = offset;
- /* Set APPLY_RX_TX_CHANGE bit to 1 */
- buffer[1] = value | apply_rx_tx_change;
- i2c_success = i2c_write(pipe_ctx, slave_address,
- buffer, sizeof(buffer));
- RETIMER_REDRIVER_INFO("retimer write to slave_address = 0x%x,\
- offset = 0x%x, reg_val = 0x%x, i2c_success = %d\n",
- slave_address, buffer[0], buffer[1], i2c_success?1:0);
- if (!i2c_success)
- goto i2c_write_fail;
- }
- }
- }
-
- /* Apply 3G settings */
- if (is_over_340mhz) {
- for (i = 0; i < settings->reg_num_6g; i++) {
- /* Apply 3G settings */
- if (settings->reg_settings[i].i2c_reg_index <= 0x20) {
-
- buffer[0] = settings->reg_settings_6g[i].i2c_reg_index;
- buffer[1] = settings->reg_settings_6g[i].i2c_reg_val;
- i2c_success = i2c_write(pipe_ctx, slave_address,
- buffer, sizeof(buffer));
- RETIMER_REDRIVER_INFO("above 340Mhz: retimer write to slave_address = 0x%x,\
- offset = 0x%x, reg_val = 0x%x, i2c_success = %d\n",
- slave_address, buffer[0], buffer[1], i2c_success?1:0);
-
- if (!i2c_success)
- goto i2c_write_fail;
-
- /* Based on DP159 specs, APPLY_RX_TX_CHANGE bit in 0x0A
- * needs to be set to 1 on every 0xA-0xC write.
- */
- if (settings->reg_settings_6g[i].i2c_reg_index == 0xA ||
- settings->reg_settings_6g[i].i2c_reg_index == 0xB ||
- settings->reg_settings_6g[i].i2c_reg_index == 0xC) {
-
- /* Query current value from offset 0xA */
- if (settings->reg_settings_6g[i].i2c_reg_index == 0xA)
- value = settings->reg_settings_6g[i].i2c_reg_val;
- else {
- i2c_success =
- dal_ddc_service_query_ddc_data(
- pipe_ctx->stream->link->ddc,
- slave_address, &offset, 1, &value, 1);
- if (!i2c_success)
- goto i2c_write_fail;
- }
-
- buffer[0] = offset;
- /* Set APPLY_RX_TX_CHANGE bit to 1 */
- buffer[1] = value | apply_rx_tx_change;
- i2c_success = i2c_write(pipe_ctx, slave_address,
- buffer, sizeof(buffer));
- RETIMER_REDRIVER_INFO("retimer write to slave_address = 0x%x,\
- offset = 0x%x, reg_val = 0x%x, i2c_success = %d\n",
- slave_address, buffer[0], buffer[1], i2c_success?1:0);
- if (!i2c_success)
- goto i2c_write_fail;
- }
- }
- }
- }
-
- if (is_vga_mode) {
- /* Program additional settings if using 640x480 resolution */
-
- /* Write offset 0xFF to 0x01 */
- buffer[0] = 0xff;
- buffer[1] = 0x01;
- i2c_success = i2c_write(pipe_ctx, slave_address,
- buffer, sizeof(buffer));
- RETIMER_REDRIVER_INFO("retimer write to slave_address = 0x%x,\
- offset = 0x%x, reg_val = 0x%x, i2c_success = %d\n",
- slave_address, buffer[0], buffer[1], i2c_success?1:0);
- if (!i2c_success)
- goto i2c_write_fail;
-
- /* Write offset 0x00 to 0x23 */
- buffer[0] = 0x00;
- buffer[1] = 0x23;
- i2c_success = i2c_write(pipe_ctx, slave_address,
- buffer, sizeof(buffer));
- RETIMER_REDRIVER_INFO("retimer write to slave_address = 0x%x,\
- offset = 0x%x, reg_val = 0x%x, i2c_success = %d\n",
- slave_address, buffer[0], buffer[1], i2c_success?1:0);
- if (!i2c_success)
- goto i2c_write_fail;
-
- /* Write offset 0xff to 0x00 */
- buffer[0] = 0xff;
- buffer[1] = 0x00;
- i2c_success = i2c_write(pipe_ctx, slave_address,
- buffer, sizeof(buffer));
- RETIMER_REDRIVER_INFO("retimer write to slave_address = 0x%x,\
- offset = 0x%x, reg_val = 0x%x, i2c_success = %d\n",
- slave_address, buffer[0], buffer[1], i2c_success?1:0);
- if (!i2c_success)
- goto i2c_write_fail;
-
- }
-
- return;
-
-i2c_write_fail:
- DC_LOG_DEBUG("Set retimer failed");
-}
-
-static void write_i2c_default_retimer_setting(
- struct pipe_ctx *pipe_ctx,
- bool is_vga_mode,
- bool is_over_340mhz)
-{
- uint8_t slave_address = (0xBA >> 1);
- uint8_t buffer[2];
- bool i2c_success = false;
- DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger);
-
- memset(&buffer, 0, sizeof(buffer));
-
- /* Program Slave Address for tuning single integrity */
- /* Write offset 0x0A to 0x13 */
- buffer[0] = 0x0A;
- buffer[1] = 0x13;
- i2c_success = i2c_write(pipe_ctx, slave_address,
- buffer, sizeof(buffer));
- RETIMER_REDRIVER_INFO("retimer writes default setting to slave_address = 0x%x,\
- offset = 0x%x, reg_val = 0x%x, i2c_success = %d\n",
- slave_address, buffer[0], buffer[1], i2c_success?1:0);
- if (!i2c_success)
- goto i2c_write_fail;
-
- /* Write offset 0x0A to 0x17 */
- buffer[0] = 0x0A;
- buffer[1] = 0x17;
- i2c_success = i2c_write(pipe_ctx, slave_address,
- buffer, sizeof(buffer));
- RETIMER_REDRIVER_INFO("retimer write to slave_addr = 0x%x,\
- offset = 0x%x, reg_val = 0x%x, i2c_success = %d\n",
- slave_address, buffer[0], buffer[1], i2c_success?1:0);
- if (!i2c_success)
- goto i2c_write_fail;
-
- /* Write offset 0x0B to 0xDA or 0xD8 */
- buffer[0] = 0x0B;
- buffer[1] = is_over_340mhz ? 0xDA : 0xD8;
- i2c_success = i2c_write(pipe_ctx, slave_address,
- buffer, sizeof(buffer));
- RETIMER_REDRIVER_INFO("retimer write to slave_addr = 0x%x,\
- offset = 0x%x, reg_val = 0x%x, i2c_success = %d\n",
- slave_address, buffer[0], buffer[1], i2c_success?1:0);
- if (!i2c_success)
- goto i2c_write_fail;
-
- /* Write offset 0x0A to 0x17 */
- buffer[0] = 0x0A;
- buffer[1] = 0x17;
- i2c_success = i2c_write(pipe_ctx, slave_address,
- buffer, sizeof(buffer));
- RETIMER_REDRIVER_INFO("retimer write to slave_addr = 0x%x,\
- offset = 0x%x, reg_val= 0x%x, i2c_success = %d\n",
- slave_address, buffer[0], buffer[1], i2c_success?1:0);
- if (!i2c_success)
- goto i2c_write_fail;
-
- /* Write offset 0x0C to 0x1D or 0x91 */
- buffer[0] = 0x0C;
- buffer[1] = is_over_340mhz ? 0x1D : 0x91;
- i2c_success = i2c_write(pipe_ctx, slave_address,
- buffer, sizeof(buffer));
- RETIMER_REDRIVER_INFO("retimer write to slave_addr = 0x%x,\
- offset = 0x%x, reg_val = 0x%x, i2c_success = %d\n",
- slave_address, buffer[0], buffer[1], i2c_success?1:0);
- if (!i2c_success)
- goto i2c_write_fail;
-
- /* Write offset 0x0A to 0x17 */
- buffer[0] = 0x0A;
- buffer[1] = 0x17;
- i2c_success = i2c_write(pipe_ctx, slave_address,
- buffer, sizeof(buffer));
- RETIMER_REDRIVER_INFO("retimer write to slave_addr = 0x%x,\
- offset = 0x%x, reg_val = 0x%x, i2c_success = %d\n",
- slave_address, buffer[0], buffer[1], i2c_success?1:0);
- if (!i2c_success)
- goto i2c_write_fail;
-
-
- if (is_vga_mode) {
- /* Program additional settings if using 640x480 resolution */
-
- /* Write offset 0xFF to 0x01 */
- buffer[0] = 0xff;
- buffer[1] = 0x01;
- i2c_success = i2c_write(pipe_ctx, slave_address,
- buffer, sizeof(buffer));
- RETIMER_REDRIVER_INFO("retimer write to slave_addr = 0x%x,\
- offset = 0x%x, reg_val = 0x%x, i2c_success = %d\n",
- slave_address, buffer[0], buffer[1], i2c_success?1:0);
- if (!i2c_success)
- goto i2c_write_fail;
-
- /* Write offset 0x00 to 0x23 */
- buffer[0] = 0x00;
- buffer[1] = 0x23;
- i2c_success = i2c_write(pipe_ctx, slave_address,
- buffer, sizeof(buffer));
- RETIMER_REDRIVER_INFO("retimer write to slave_addr = 0x%x,\
- offset = 0x%x, reg_val= 0x%x, i2c_success = %d\n",
- slave_address, buffer[0], buffer[1], i2c_success?1:0);
- if (!i2c_success)
- goto i2c_write_fail;
-
- /* Write offset 0xff to 0x00 */
- buffer[0] = 0xff;
- buffer[1] = 0x00;
- i2c_success = i2c_write(pipe_ctx, slave_address,
- buffer, sizeof(buffer));
- RETIMER_REDRIVER_INFO("retimer write default setting to slave_addr = 0x%x,\
- offset = 0x%x, reg_val= 0x%x, i2c_success = %d end here\n",
- slave_address, buffer[0], buffer[1], i2c_success?1:0);
- if (!i2c_success)
- goto i2c_write_fail;
- }
-
- return;
-
-i2c_write_fail:
- DC_LOG_DEBUG("Set default retimer failed");
-}
-
-static void write_i2c_redriver_setting(
- struct pipe_ctx *pipe_ctx,
- bool is_over_340mhz)
-{
- uint8_t slave_address = (0xF0 >> 1);
- uint8_t buffer[16];
- bool i2c_success = false;
- DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger);
-
- memset(&buffer, 0, sizeof(buffer));
-
- // Program Slave Address for tuning single integrity
- buffer[3] = 0x4E;
- buffer[4] = 0x4E;
- buffer[5] = 0x4E;
- buffer[6] = is_over_340mhz ? 0x4E : 0x4A;
-
- i2c_success = i2c_write(pipe_ctx, slave_address,
- buffer, sizeof(buffer));
- RETIMER_REDRIVER_INFO("redriver write 0 to all 16 reg offset expect following:\n\
- \t slave_addr = 0x%x, offset[3] = 0x%x, offset[4] = 0x%x,\
- offset[5] = 0x%x,offset[6] is_over_340mhz = 0x%x,\
- i2c_success = %d\n",
- slave_address, buffer[3], buffer[4], buffer[5], buffer[6], i2c_success?1:0);
-
- if (!i2c_success)
- DC_LOG_DEBUG("Set redriver failed");
-}
-
-static void disable_link(struct dc_link *link, enum signal_type signal)
-{
- /*
- * TODO: implement call for dp_set_hw_test_pattern
- * it is needed for compliance testing
- */
-
- /* Here we need to specify that encoder output settings
- * need to be calculated as for the set mode,
- * it will lead to querying dynamic link capabilities
- * which should be done before enable output
- */
-
- if (dc_is_dp_signal(signal)) {
- /* SST DP, eDP */
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- struct dc_link_settings link_settings = link->cur_link_settings;
-#endif
- if (dc_is_dp_sst_signal(signal))
- dp_disable_link_phy(link, signal);
- else
- dp_disable_link_phy_mst(link, signal);
-
- if (dc_is_dp_sst_signal(signal) ||
- link->mst_stream_alloc_table.stream_count == 0) {
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (dp_get_link_encoding_format(&link_settings) == DP_8b_10b_ENCODING) {
- dp_set_fec_enable(link, false);
- dp_set_fec_ready(link, false);
- }
-#else
- dp_set_fec_enable(link, false);
- dp_set_fec_ready(link, false);
-#endif
- }
- } else {
- if (signal != SIGNAL_TYPE_VIRTUAL)
- link->link_enc->funcs->disable_output(link->link_enc, signal);
- }
-
- if (signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
- /* MST disable link only when no stream use the link */
- if (link->mst_stream_alloc_table.stream_count <= 0)
- link->link_status.link_active = false;
- } else {
- link->link_status.link_active = false;
- }
-}
-
-static void enable_link_hdmi(struct pipe_ctx *pipe_ctx)
-{
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct dc_link *link = stream->link;
- enum dc_color_depth display_color_depth;
- enum engine_id eng_id;
- struct ext_hdmi_settings settings = {0};
- bool is_over_340mhz = false;
- bool is_vga_mode = (stream->timing.h_addressable == 640)
- && (stream->timing.v_addressable == 480);
-
- if (stream->phy_pix_clk == 0)
- stream->phy_pix_clk = stream->timing.pix_clk_100hz / 10;
- if (stream->phy_pix_clk > 340000)
- is_over_340mhz = true;
-
- if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) {
- unsigned short masked_chip_caps = pipe_ctx->stream->link->chip_caps &
- EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK;
- if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT) {
- /* DP159, Retimer settings */
- eng_id = pipe_ctx->stream_res.stream_enc->id;
-
- if (get_ext_hdmi_settings(pipe_ctx, eng_id, &settings)) {
- write_i2c_retimer_setting(pipe_ctx,
- is_vga_mode, is_over_340mhz, &settings);
- } else {
- write_i2c_default_retimer_setting(pipe_ctx,
- is_vga_mode, is_over_340mhz);
- }
- } else if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204) {
- /* PI3EQX1204, Redriver settings */
- write_i2c_redriver_setting(pipe_ctx, is_over_340mhz);
- }
- }
-
- if (dc_is_hdmi_signal(pipe_ctx->stream->signal))
- dal_ddc_service_write_scdc_data(
- stream->link->ddc,
- stream->phy_pix_clk,
- stream->timing.flags.LTE_340MCSC_SCRAMBLE);
-
- memset(&stream->link->cur_link_settings, 0,
- sizeof(struct dc_link_settings));
-
- display_color_depth = stream->timing.display_color_depth;
- if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR422)
- display_color_depth = COLOR_DEPTH_888;
-
- link->link_enc->funcs->enable_tmds_output(
- link->link_enc,
- pipe_ctx->clock_source->id,
- display_color_depth,
- pipe_ctx->stream->signal,
- stream->phy_pix_clk);
-
- if (dc_is_hdmi_signal(pipe_ctx->stream->signal))
- dal_ddc_service_read_scdc_data(link->ddc);
-}
-
-static void enable_link_lvds(struct pipe_ctx *pipe_ctx)
-{
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct dc_link *link = stream->link;
-
- if (stream->phy_pix_clk == 0)
- stream->phy_pix_clk = stream->timing.pix_clk_100hz / 10;
-
- memset(&stream->link->cur_link_settings, 0,
- sizeof(struct dc_link_settings));
-
- link->link_enc->funcs->enable_lvds_output(
- link->link_enc,
- pipe_ctx->clock_source->id,
- stream->phy_pix_clk);
-
-}
-
-/****************************enable_link***********************************/
-static enum dc_status enable_link(
- struct dc_state *state,
- struct pipe_ctx *pipe_ctx)
-{
- enum dc_status status = DC_ERROR_UNEXPECTED;
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct dc_link *link = stream->link;
-
- /* There's some scenarios where driver is unloaded with display
- * still enabled. When driver is reloaded, it may cause a display
- * to not light up if there is a mismatch between old and new
- * link settings. Need to call disable first before enabling at
- * new link settings.
- */
- if (link->link_status.link_active) {
- disable_link(link, pipe_ctx->stream->signal);
- }
-
- switch (pipe_ctx->stream->signal) {
- case SIGNAL_TYPE_DISPLAY_PORT:
- status = enable_link_dp(state, pipe_ctx);
- break;
- case SIGNAL_TYPE_EDP:
- status = enable_link_edp(state, pipe_ctx);
- break;
- case SIGNAL_TYPE_DISPLAY_PORT_MST:
- status = enable_link_dp_mst(state, pipe_ctx);
- msleep(200);
- break;
- case SIGNAL_TYPE_DVI_SINGLE_LINK:
- case SIGNAL_TYPE_DVI_DUAL_LINK:
- case SIGNAL_TYPE_HDMI_TYPE_A:
- enable_link_hdmi(pipe_ctx);
- status = DC_OK;
- break;
- case SIGNAL_TYPE_LVDS:
- enable_link_lvds(pipe_ctx);
- status = DC_OK;
- break;
- case SIGNAL_TYPE_VIRTUAL:
- status = DC_OK;
- break;
- default:
- break;
- }
-
- if (status == DC_OK)
- pipe_ctx->stream->link->link_status.link_active = true;
-
- return status;
-}
-
-static uint32_t get_timing_pixel_clock_100hz(const struct dc_crtc_timing *timing)
-{
-
- uint32_t pxl_clk = timing->pix_clk_100hz;
-
- if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420)
- pxl_clk /= 2;
- else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422)
- pxl_clk = pxl_clk * 2 / 3;
-
- if (timing->display_color_depth == COLOR_DEPTH_101010)
- pxl_clk = pxl_clk * 10 / 8;
- else if (timing->display_color_depth == COLOR_DEPTH_121212)
- pxl_clk = pxl_clk * 12 / 8;
-
- return pxl_clk;
-}
-
-static bool dp_active_dongle_validate_timing(
- const struct dc_crtc_timing *timing,
- const struct dpcd_caps *dpcd_caps)
-{
- const struct dc_dongle_caps *dongle_caps = &dpcd_caps->dongle_caps;
-
- switch (dpcd_caps->dongle_type) {
- case DISPLAY_DONGLE_DP_VGA_CONVERTER:
- case DISPLAY_DONGLE_DP_DVI_CONVERTER:
- case DISPLAY_DONGLE_DP_DVI_DONGLE:
- if (timing->pixel_encoding == PIXEL_ENCODING_RGB)
- return true;
- else
- return false;
- default:
- break;
- }
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (dpcd_caps->dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER &&
- dongle_caps->extendedCapValid == true) {
-#else
- if (dpcd_caps->dongle_type != DISPLAY_DONGLE_DP_HDMI_CONVERTER ||
- dongle_caps->extendedCapValid == false)
- return true;
-#endif
-
- /* Check Pixel Encoding */
- switch (timing->pixel_encoding) {
- case PIXEL_ENCODING_RGB:
- case PIXEL_ENCODING_YCBCR444:
- break;
- case PIXEL_ENCODING_YCBCR422:
- if (!dongle_caps->is_dp_hdmi_ycbcr422_pass_through)
- return false;
- break;
- case PIXEL_ENCODING_YCBCR420:
- if (!dongle_caps->is_dp_hdmi_ycbcr420_pass_through)
- return false;
- break;
- default:
- /* Invalid Pixel Encoding*/
- return false;
- }
-
- switch (timing->display_color_depth) {
- case COLOR_DEPTH_666:
- case COLOR_DEPTH_888:
- /*888 and 666 should always be supported*/
- break;
- case COLOR_DEPTH_101010:
- if (dongle_caps->dp_hdmi_max_bpc < 10)
- return false;
- break;
- case COLOR_DEPTH_121212:
- if (dongle_caps->dp_hdmi_max_bpc < 12)
- return false;
- break;
- case COLOR_DEPTH_141414:
- case COLOR_DEPTH_161616:
- default:
- /* These color depths are currently not supported */
- return false;
- }
-
- if (get_timing_pixel_clock_100hz(timing) > (dongle_caps->dp_hdmi_max_pixel_clk_in_khz * 10))
- return false;
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- }
-
- if (dpcd_caps->channel_coding_cap.bits.DP_128b_132b_SUPPORTED == 0 &&
- dpcd_caps->dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_PASSTHROUGH_SUPPORT == 0 &&
- dongle_caps->dfp_cap_ext.supported) {
-
- if (dongle_caps->dfp_cap_ext.max_pixel_rate_in_mps < (timing->pix_clk_100hz / 10000))
- return false;
-
- if (dongle_caps->dfp_cap_ext.max_video_h_active_width < timing->h_addressable)
- return false;
-
- if (dongle_caps->dfp_cap_ext.max_video_v_active_height < timing->v_addressable)
- return false;
-
- if (timing->pixel_encoding == PIXEL_ENCODING_RGB) {
- if (!dongle_caps->dfp_cap_ext.encoding_format_caps.support_rgb)
- return false;
- if (timing->display_color_depth == COLOR_DEPTH_666 &&
- !dongle_caps->dfp_cap_ext.rgb_color_depth_caps.support_6bpc)
- return false;
- else if (timing->display_color_depth == COLOR_DEPTH_888 &&
- !dongle_caps->dfp_cap_ext.rgb_color_depth_caps.support_8bpc)
- return false;
- else if (timing->display_color_depth == COLOR_DEPTH_101010 &&
- !dongle_caps->dfp_cap_ext.rgb_color_depth_caps.support_10bpc)
- return false;
- else if (timing->display_color_depth == COLOR_DEPTH_121212 &&
- !dongle_caps->dfp_cap_ext.rgb_color_depth_caps.support_12bpc)
- return false;
- else if (timing->display_color_depth == COLOR_DEPTH_161616 &&
- !dongle_caps->dfp_cap_ext.rgb_color_depth_caps.support_16bpc)
- return false;
- } else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR444) {
- if (!dongle_caps->dfp_cap_ext.encoding_format_caps.support_rgb)
- return false;
- if (timing->display_color_depth == COLOR_DEPTH_888 &&
- !dongle_caps->dfp_cap_ext.ycbcr444_color_depth_caps.support_8bpc)
- return false;
- else if (timing->display_color_depth == COLOR_DEPTH_101010 &&
- !dongle_caps->dfp_cap_ext.ycbcr444_color_depth_caps.support_10bpc)
- return false;
- else if (timing->display_color_depth == COLOR_DEPTH_121212 &&
- !dongle_caps->dfp_cap_ext.ycbcr444_color_depth_caps.support_12bpc)
- return false;
- else if (timing->display_color_depth == COLOR_DEPTH_161616 &&
- !dongle_caps->dfp_cap_ext.ycbcr444_color_depth_caps.support_16bpc)
- return false;
- } else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) {
- if (!dongle_caps->dfp_cap_ext.encoding_format_caps.support_rgb)
- return false;
- if (timing->display_color_depth == COLOR_DEPTH_888 &&
- !dongle_caps->dfp_cap_ext.ycbcr422_color_depth_caps.support_8bpc)
- return false;
- else if (timing->display_color_depth == COLOR_DEPTH_101010 &&
- !dongle_caps->dfp_cap_ext.ycbcr422_color_depth_caps.support_10bpc)
- return false;
- else if (timing->display_color_depth == COLOR_DEPTH_121212 &&
- !dongle_caps->dfp_cap_ext.ycbcr422_color_depth_caps.support_12bpc)
- return false;
- else if (timing->display_color_depth == COLOR_DEPTH_161616 &&
- !dongle_caps->dfp_cap_ext.ycbcr422_color_depth_caps.support_16bpc)
- return false;
- } else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) {
- if (!dongle_caps->dfp_cap_ext.encoding_format_caps.support_rgb)
- return false;
- if (timing->display_color_depth == COLOR_DEPTH_888 &&
- !dongle_caps->dfp_cap_ext.ycbcr420_color_depth_caps.support_8bpc)
- return false;
- else if (timing->display_color_depth == COLOR_DEPTH_101010 &&
- !dongle_caps->dfp_cap_ext.ycbcr420_color_depth_caps.support_10bpc)
- return false;
- else if (timing->display_color_depth == COLOR_DEPTH_121212 &&
- !dongle_caps->dfp_cap_ext.ycbcr420_color_depth_caps.support_12bpc)
- return false;
- else if (timing->display_color_depth == COLOR_DEPTH_161616 &&
- !dongle_caps->dfp_cap_ext.ycbcr420_color_depth_caps.support_16bpc)
- return false;
- }
- }
-#endif
-
- return true;
-}
-
-enum dc_status dc_link_validate_mode_timing(
- const struct dc_stream_state *stream,
- struct dc_link *link,
- const struct dc_crtc_timing *timing)
-{
- uint32_t max_pix_clk = stream->link->dongle_max_pix_clk * 10;
- struct dpcd_caps *dpcd_caps = &link->dpcd_caps;
-
- /* A hack to avoid failing any modes for EDID override feature on
- * topology change such as lower quality cable for DP or different dongle
- */
- if (link->remote_sinks[0] && link->remote_sinks[0]->sink_signal == SIGNAL_TYPE_VIRTUAL)
- return DC_OK;
-
- /* Passive Dongle */
- if (max_pix_clk != 0 && get_timing_pixel_clock_100hz(timing) > max_pix_clk)
- return DC_EXCEED_DONGLE_CAP;
-
- /* Active Dongle*/
- if (!dp_active_dongle_validate_timing(timing, dpcd_caps))
- return DC_EXCEED_DONGLE_CAP;
-
- switch (stream->signal) {
- case SIGNAL_TYPE_EDP:
- case SIGNAL_TYPE_DISPLAY_PORT:
- if (!dp_validate_mode_timing(
- link,
- timing))
- return DC_NO_DP_LINK_BANDWIDTH;
- break;
-
- default:
- break;
- }
-
- return DC_OK;
-}
-
-static struct abm *get_abm_from_stream_res(const struct dc_link *link)
-{
- int i;
- struct dc *dc = NULL;
- struct abm *abm = NULL;
-
- if (!link || !link->ctx)
- return NULL;
-
- dc = link->ctx->dc;
-
- for (i = 0; i < MAX_PIPES; i++) {
- struct pipe_ctx pipe_ctx = dc->current_state->res_ctx.pipe_ctx[i];
- struct dc_stream_state *stream = pipe_ctx.stream;
-
- if (stream && stream->link == link) {
- abm = pipe_ctx.stream_res.abm;
- break;
- }
- }
- return abm;
-}
-
-int dc_link_get_backlight_level(const struct dc_link *link)
-{
- struct abm *abm = get_abm_from_stream_res(link);
- struct panel_cntl *panel_cntl = link->panel_cntl;
- struct dc *dc = link->ctx->dc;
- struct dmcu *dmcu = dc->res_pool->dmcu;
- bool fw_set_brightness = true;
-
- if (dmcu)
- fw_set_brightness = dmcu->funcs->is_dmcu_initialized(dmcu);
-
- if (!fw_set_brightness && panel_cntl->funcs->get_current_backlight)
- return panel_cntl->funcs->get_current_backlight(panel_cntl);
- else if (abm != NULL && abm->funcs->get_current_backlight != NULL)
- return (int) abm->funcs->get_current_backlight(abm);
- else
- return DC_ERROR_UNEXPECTED;
-}
-
-int dc_link_get_target_backlight_pwm(const struct dc_link *link)
-{
- struct abm *abm = get_abm_from_stream_res(link);
-
- if (abm == NULL || abm->funcs->get_target_backlight == NULL)
- return DC_ERROR_UNEXPECTED;
-
- return (int) abm->funcs->get_target_backlight(abm);
-}
-
-static struct pipe_ctx *get_pipe_from_link(const struct dc_link *link)
-{
- int i;
- struct dc *dc = link->ctx->dc;
- struct pipe_ctx *pipe_ctx = NULL;
-
- for (i = 0; i < MAX_PIPES; i++) {
- if (dc->current_state->res_ctx.pipe_ctx[i].stream) {
- if (dc->current_state->res_ctx.pipe_ctx[i].stream->link == link) {
- pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
- break;
- }
- }
- }
-
- return pipe_ctx;
-}
-
-bool dc_link_set_backlight_level(const struct dc_link *link,
- uint32_t backlight_pwm_u16_16,
- uint32_t frame_ramp)
-{
- struct dc *dc = link->ctx->dc;
-
- DC_LOGGER_INIT(link->ctx->logger);
- DC_LOG_BACKLIGHT("New Backlight level: %d (0x%X)\n",
- backlight_pwm_u16_16, backlight_pwm_u16_16);
-
- if (dc_is_embedded_signal(link->connector_signal)) {
- struct pipe_ctx *pipe_ctx = get_pipe_from_link(link);
-
- if (pipe_ctx) {
- /* Disable brightness ramping when the display is blanked
- * as it can hang the DMCU
- */
- if (pipe_ctx->plane_state == NULL)
- frame_ramp = 0;
- } else {
- return false;
- }
-
- dc->hwss.set_backlight_level(
- pipe_ctx,
- backlight_pwm_u16_16,
- frame_ramp);
- }
- return true;
-}
-
-bool dc_link_set_psr_allow_active(struct dc_link *link, const bool *allow_active,
- bool wait, bool force_static, const unsigned int *power_opts)
-{
- struct dc *dc = link->ctx->dc;
- struct dmcu *dmcu = dc->res_pool->dmcu;
- struct dmub_psr *psr = dc->res_pool->psr;
- unsigned int panel_inst;
-
- if (psr == NULL && force_static)
- return false;
-
- if (!dc_get_edp_link_panel_inst(dc, link, &panel_inst))
- return false;
-
- /* Set power optimization flag */
- if (power_opts && link->psr_settings.psr_power_opt != *power_opts) {
- link->psr_settings.psr_power_opt = *power_opts;
-
- if (psr != NULL && link->psr_settings.psr_feature_enabled && psr->funcs->psr_set_power_opt)
- psr->funcs->psr_set_power_opt(psr, link->psr_settings.psr_power_opt);
- }
-
- /* Enable or Disable PSR */
- if (allow_active && link->psr_settings.psr_allow_active != *allow_active) {
- link->psr_settings.psr_allow_active = *allow_active;
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (!link->psr_settings.psr_allow_active)
- dc_z10_restore(dc);
-#endif
-
- if (psr != NULL && link->psr_settings.psr_feature_enabled) {
- if (force_static && psr->funcs->psr_force_static)
- psr->funcs->psr_force_static(psr, panel_inst);
- psr->funcs->psr_enable(psr, link->psr_settings.psr_allow_active, wait, panel_inst);
- } else if ((dmcu != NULL && dmcu->funcs->is_dmcu_initialized(dmcu)) &&
- link->psr_settings.psr_feature_enabled)
- dmcu->funcs->set_psr_enable(dmcu, link->psr_settings.psr_allow_active, wait);
- else
- return false;
- }
-
- return true;
-}
-
-bool dc_link_get_psr_state(const struct dc_link *link, enum dc_psr_state *state)
-{
- struct dc *dc = link->ctx->dc;
- struct dmcu *dmcu = dc->res_pool->dmcu;
- struct dmub_psr *psr = dc->res_pool->psr;
- unsigned int panel_inst;
-
- if (!dc_get_edp_link_panel_inst(dc, link, &panel_inst))
- return false;
-
- if (psr != NULL && link->psr_settings.psr_feature_enabled)
- psr->funcs->psr_get_state(psr, state, panel_inst);
- else if (dmcu != NULL && link->psr_settings.psr_feature_enabled)
- dmcu->funcs->get_psr_state(dmcu, state);
-
- return true;
-}
-
-static inline enum physical_phy_id
-transmitter_to_phy_id(enum transmitter transmitter_value)
-{
- switch (transmitter_value) {
- case TRANSMITTER_UNIPHY_A:
- return PHYLD_0;
- case TRANSMITTER_UNIPHY_B:
- return PHYLD_1;
- case TRANSMITTER_UNIPHY_C:
- return PHYLD_2;
- case TRANSMITTER_UNIPHY_D:
- return PHYLD_3;
- case TRANSMITTER_UNIPHY_E:
- return PHYLD_4;
- case TRANSMITTER_UNIPHY_F:
- return PHYLD_5;
- case TRANSMITTER_NUTMEG_CRT:
- return PHYLD_6;
- case TRANSMITTER_TRAVIS_CRT:
- return PHYLD_7;
- case TRANSMITTER_TRAVIS_LCD:
- return PHYLD_8;
- case TRANSMITTER_UNIPHY_G:
- return PHYLD_9;
- case TRANSMITTER_COUNT:
- return PHYLD_COUNT;
- case TRANSMITTER_UNKNOWN:
- return PHYLD_UNKNOWN;
- default:
- WARN_ONCE(1, "Unknown transmitter value %d\n",
- transmitter_value);
- return PHYLD_UNKNOWN;
- }
-}
-
-bool dc_link_setup_psr(struct dc_link *link,
- const struct dc_stream_state *stream, struct psr_config *psr_config,
- struct psr_context *psr_context)
-{
- struct dc *dc;
- struct dmcu *dmcu;
- struct dmub_psr *psr;
- int i;
- unsigned int panel_inst;
- /* updateSinkPsrDpcdConfig*/
- union dpcd_psr_configuration psr_configuration;
-
- psr_context->controllerId = CONTROLLER_ID_UNDEFINED;
-
- if (!link)
- return false;
-
- dc = link->ctx->dc;
- dmcu = dc->res_pool->dmcu;
- psr = dc->res_pool->psr;
-
- if (!dmcu && !psr)
- return false;
-
- if (!dc_get_edp_link_panel_inst(dc, link, &panel_inst))
- return false;
-
-
- memset(&psr_configuration, 0, sizeof(psr_configuration));
-
- psr_configuration.bits.ENABLE = 1;
- psr_configuration.bits.CRC_VERIFICATION = 1;
- psr_configuration.bits.FRAME_CAPTURE_INDICATION =
- psr_config->psr_frame_capture_indication_req;
-
- /* Check for PSR v2*/
- if (psr_config->psr_version == 0x2) {
- /* For PSR v2 selective update.
- * Indicates whether sink should start capturing
- * immediately following active scan line,
- * or starting with the 2nd active scan line.
- */
- psr_configuration.bits.LINE_CAPTURE_INDICATION = 0;
- /*For PSR v2, determines whether Sink should generate
- * IRQ_HPD when CRC mismatch is detected.
- */
- psr_configuration.bits.IRQ_HPD_WITH_CRC_ERROR = 1;
- }
-
- dm_helpers_dp_write_dpcd(
- link->ctx,
- link,
- 368,
- &psr_configuration.raw,
- sizeof(psr_configuration.raw));
-
- psr_context->channel = link->ddc->ddc_pin->hw_info.ddc_channel;
- psr_context->transmitterId = link->link_enc->transmitter;
- psr_context->engineId = link->link_enc->preferred_engine;
-
- for (i = 0; i < MAX_PIPES; i++) {
- if (dc->current_state->res_ctx.pipe_ctx[i].stream
- == stream) {
- /* dmcu -1 for all controller id values,
- * therefore +1 here
- */
- psr_context->controllerId =
- dc->current_state->res_ctx.
- pipe_ctx[i].stream_res.tg->inst + 1;
- break;
- }
- }
-
- /* Hardcoded for now. Can be Pcie or Uniphy (or Unknown)*/
- psr_context->phyType = PHY_TYPE_UNIPHY;
- /*PhyId is associated with the transmitter id*/
- psr_context->smuPhyId =
- transmitter_to_phy_id(link->link_enc->transmitter);
-
- psr_context->crtcTimingVerticalTotal = stream->timing.v_total;
- psr_context->vsync_rate_hz = div64_u64(div64_u64((stream->
- timing.pix_clk_100hz * 100),
- stream->timing.v_total),
- stream->timing.h_total);
-
- psr_context->psrSupportedDisplayConfig = true;
- psr_context->psrExitLinkTrainingRequired =
- psr_config->psr_exit_link_training_required;
- psr_context->sdpTransmitLineNumDeadline =
- psr_config->psr_sdp_transmit_line_num_deadline;
- psr_context->psrFrameCaptureIndicationReq =
- psr_config->psr_frame_capture_indication_req;
-
- psr_context->skipPsrWaitForPllLock = 0; /* only = 1 in KV */
-
- psr_context->numberOfControllers =
- link->dc->res_pool->timing_generator_count;
-
- psr_context->rfb_update_auto_en = true;
-
- /* 2 frames before enter PSR. */
- psr_context->timehyst_frames = 2;
- /* half a frame
- * (units in 100 lines, i.e. a value of 1 represents 100 lines)
- */
- psr_context->hyst_lines = stream->timing.v_total / 2 / 100;
- psr_context->aux_repeats = 10;
-
- psr_context->psr_level.u32all = 0;
-
- /*skip power down the single pipe since it blocks the cstate*/
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (link->ctx->asic_id.chip_family >= FAMILY_RV) {
- psr_context->psr_level.bits.SKIP_CRTC_DISABLE = true;
- if (link->ctx->asic_id.chip_family == FAMILY_YELLOW_CARP && !dc->debug.disable_z10)
- psr_context->psr_level.bits.SKIP_CRTC_DISABLE = false;
- }
-#else
- if (link->ctx->asic_id.chip_family >= FAMILY_RV)
- psr_context->psr_level.bits.SKIP_CRTC_DISABLE = true;
-#endif
-
- /* SMU will perform additional powerdown sequence.
- * For unsupported ASICs, set psr_level flag to skip PSR
- * static screen notification to SMU.
- * (Always set for DAL2, did not check ASIC)
- */
- psr_context->allow_smu_optimizations = psr_config->allow_smu_optimizations;
- psr_context->allow_multi_disp_optimizations = psr_config->allow_multi_disp_optimizations;
-
- /* Complete PSR entry before aborting to prevent intermittent
- * freezes on certain eDPs
- */
- psr_context->psr_level.bits.DISABLE_PSR_ENTRY_ABORT = 1;
-
- /* Controls additional delay after remote frame capture before
- * continuing power down, default = 0
- */
- psr_context->frame_delay = 0;
-
- if (psr)
- link->psr_settings.psr_feature_enabled = psr->funcs->psr_copy_settings(psr,
- link, psr_context, panel_inst);
- else
- link->psr_settings.psr_feature_enabled = dmcu->funcs->setup_psr(dmcu, link, psr_context);
-
- /* psr_enabled == 0 indicates setup_psr did not succeed, but this
- * should not happen since firmware should be running at this point
- */
- if (link->psr_settings.psr_feature_enabled == 0)
- ASSERT(0);
-
- return true;
-
-}
-
-void dc_link_get_psr_residency(const struct dc_link *link, uint32_t *residency)
-{
- struct dc *dc = link->ctx->dc;
- struct dmub_psr *psr = dc->res_pool->psr;
- unsigned int panel_inst;
-
- if (!dc_get_edp_link_panel_inst(dc, link, &panel_inst))
- return;
-
- /* PSR residency measurements only supported on DMCUB */
- if (psr != NULL && link->psr_settings.psr_feature_enabled)
- psr->funcs->psr_get_residency(psr, residency, panel_inst);
- else
- *residency = 0;
-}
-
-const struct dc_link_status *dc_link_get_status(const struct dc_link *link)
-{
- return &link->link_status;
-}
-
-void core_link_resume(struct dc_link *link)
-{
- if (link->connector_signal != SIGNAL_TYPE_VIRTUAL)
- program_hpd_filter(link);
-}
-
-static struct fixed31_32 get_pbn_per_slot(struct dc_stream_state *stream)
-{
- struct fixed31_32 mbytes_per_sec;
- uint32_t link_rate_in_mbytes_per_sec = dc_link_bandwidth_kbps(stream->link,
- &stream->link->cur_link_settings);
- link_rate_in_mbytes_per_sec /= 8000; /* Kbits to MBytes */
-
- mbytes_per_sec = dc_fixpt_from_int(link_rate_in_mbytes_per_sec);
-
- return dc_fixpt_div_int(mbytes_per_sec, 54);
-}
-
-static struct fixed31_32 get_pbn_from_bw_in_kbps(uint64_t kbps)
-{
- struct fixed31_32 peak_kbps;
- uint32_t numerator = 0;
- uint32_t denominator = 1;
-
- /*
- * margin 5300ppm + 300ppm ~ 0.6% as per spec, factor is 1.006
- * The unit of 54/64Mbytes/sec is an arbitrary unit chosen based on
- * common multiplier to render an integer PBN for all link rate/lane
- * counts combinations
- * calculate
- * peak_kbps *= (1006/1000)
- * peak_kbps *= (64/54)
- * peak_kbps *= 8 convert to bytes
- */
-
- numerator = 64 * PEAK_FACTOR_X1000;
- denominator = 54 * 8 * 1000 * 1000;
- kbps *= numerator;
- peak_kbps = dc_fixpt_from_fraction(kbps, denominator);
-
- return peak_kbps;
-}
-
-static struct fixed31_32 get_pbn_from_timing(struct pipe_ctx *pipe_ctx)
-{
- uint64_t kbps;
-
- kbps = dc_bandwidth_in_kbps_from_timing(&pipe_ctx->stream->timing);
- return get_pbn_from_bw_in_kbps(kbps);
-}
-
-static void update_mst_stream_alloc_table(
- struct dc_link *link,
- struct stream_encoder *stream_enc,
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- struct hpo_dp_stream_encoder *hpo_dp_stream_enc, // TODO: Rename stream_enc to dio_stream_enc?
-#endif
- const struct dp_mst_stream_allocation_table *proposed_table)
-{
- struct link_mst_stream_allocation work_table[MAX_CONTROLLER_NUM] = { 0 };
- struct link_mst_stream_allocation *dc_alloc;
-
- int i;
- int j;
-
- /* if DRM proposed_table has more than one new payload */
- ASSERT(proposed_table->stream_count -
- link->mst_stream_alloc_table.stream_count < 2);
-
- /* copy proposed_table to link, add stream encoder */
- for (i = 0; i < proposed_table->stream_count; i++) {
-
- for (j = 0; j < link->mst_stream_alloc_table.stream_count; j++) {
- dc_alloc =
- &link->mst_stream_alloc_table.stream_allocations[j];
-
- if (dc_alloc->vcp_id ==
- proposed_table->stream_allocations[i].vcp_id) {
-
- work_table[i] = *dc_alloc;
- work_table[i].slot_count = proposed_table->stream_allocations[i].slot_count;
- break; /* exit j loop */
- }
- }
-
- /* new vcp_id */
- if (j == link->mst_stream_alloc_table.stream_count) {
- work_table[i].vcp_id =
- proposed_table->stream_allocations[i].vcp_id;
- work_table[i].slot_count =
- proposed_table->stream_allocations[i].slot_count;
- work_table[i].stream_enc = stream_enc;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- work_table[i].hpo_dp_stream_enc = hpo_dp_stream_enc;
-#endif
- }
- }
-
- /* update link->mst_stream_alloc_table with work_table */
- link->mst_stream_alloc_table.stream_count =
- proposed_table->stream_count;
- for (i = 0; i < MAX_CONTROLLER_NUM; i++)
- link->mst_stream_alloc_table.stream_allocations[i] =
- work_table[i];
-}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-static void dc_log_vcp_x_y(const struct dc_link *link, struct fixed31_32 avg_time_slots_per_mtp)
-{
- const uint32_t VCP_Y_PRECISION = 1000;
- uint64_t vcp_x, vcp_y;
-
- // Add 0.5*(1/VCP_Y_PRECISION) to round up to decimal precision
- avg_time_slots_per_mtp = dc_fixpt_add(
- avg_time_slots_per_mtp, dc_fixpt_from_fraction(1, 2 * VCP_Y_PRECISION));
-
- vcp_x = dc_fixpt_floor(avg_time_slots_per_mtp);
- vcp_y = dc_fixpt_floor(
- dc_fixpt_mul_int(
- dc_fixpt_sub_int(avg_time_slots_per_mtp, dc_fixpt_floor(avg_time_slots_per_mtp)),
- VCP_Y_PRECISION));
-
- if (link->type == dc_connection_mst_branch)
- DC_LOG_DP2("MST Update Payload: set_throttled_vcp_size slot X.Y for MST stream "
- "X: %lld Y: %lld/%d", vcp_x, vcp_y, VCP_Y_PRECISION);
- else
- DC_LOG_DP2("SST Update Payload: set_throttled_vcp_size slot X.Y for SST stream "
- "X: %lld Y: %lld/%d", vcp_x, vcp_y, VCP_Y_PRECISION);
-}
-
-/*
- * Payload allocation/deallocation for SST introduced in DP2.0
- */
-enum dc_status dc_link_update_sst_payload(struct pipe_ctx *pipe_ctx, bool allocate)
-{
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct dc_link *link = stream->link;
- struct hpo_dp_link_encoder *hpo_dp_link_encoder = link->hpo_dp_link_enc;
- struct hpo_dp_stream_encoder *hpo_dp_stream_encoder = pipe_ctx->stream_res.hpo_dp_stream_enc;
- struct link_mst_stream_allocation_table proposed_table = {0};
- struct fixed31_32 avg_time_slots_per_mtp;
- DC_LOGGER_INIT(link->ctx->logger);
-
- /* slot X.Y for SST payload deallocate */
- if (!allocate) {
- avg_time_slots_per_mtp = dc_fixpt_from_int(0);
-
- dc_log_vcp_x_y(link, avg_time_slots_per_mtp);
-
- hpo_dp_link_encoder->funcs->set_throttled_vcp_size(
- hpo_dp_link_encoder,
- hpo_dp_stream_encoder->inst,
- avg_time_slots_per_mtp);
- }
-
- /* calculate VC payload and update branch with new payload allocation table*/
- if (!dpcd_write_128b_132b_sst_payload_allocation_table(
- stream,
- link,
- &proposed_table,
- allocate)) {
- DC_LOG_ERROR("SST Update Payload: Failed to update "
- "allocation table for "
- "pipe idx: %d\n",
- pipe_ctx->pipe_idx);
- }
-
- proposed_table.stream_allocations[0].hpo_dp_stream_enc = hpo_dp_stream_encoder;
-
- ASSERT(proposed_table.stream_count == 1);
-
- //TODO - DP2.0 Logging: Instead of hpo_dp_stream_enc pointer, log instance id
- DC_LOG_DP2("SST Update Payload: hpo_dp_stream_enc: %p "
- "vcp_id: %d "
- "slot_count: %d\n",
- (void *) proposed_table.stream_allocations[0].hpo_dp_stream_enc,
- proposed_table.stream_allocations[0].vcp_id,
- proposed_table.stream_allocations[0].slot_count);
-
- /* program DP source TX for payload */
- hpo_dp_link_encoder->funcs->update_stream_allocation_table(
- hpo_dp_link_encoder,
- &proposed_table);
-
- /* poll for ACT handled */
- if (!dpcd_poll_for_allocation_change_trigger(link)) {
- // Failures will result in blackscreen and errors logged
- BREAK_TO_DEBUGGER();
- }
-
- /* slot X.Y for SST payload allocate */
- if (allocate) {
- avg_time_slots_per_mtp = calculate_sst_avg_time_slots_per_mtp(stream, link);
-
- dc_log_vcp_x_y(link, avg_time_slots_per_mtp);
-
- hpo_dp_link_encoder->funcs->set_throttled_vcp_size(
- hpo_dp_link_encoder,
- hpo_dp_stream_encoder->inst,
- avg_time_slots_per_mtp);
- }
-
- /* Always return DC_OK.
- * If part of sequence fails, log failure(s) and show blackscreen
- */
- return DC_OK;
-}
-#endif
-
-/* convert link_mst_stream_alloc_table to dm dp_mst_stream_alloc_table
- * because stream_encoder is not exposed to dm
- */
-enum dc_status dc_link_allocate_mst_payload(struct pipe_ctx *pipe_ctx)
-{
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct dc_link *link = stream->link;
- struct link_encoder *link_encoder = NULL;
- struct stream_encoder *stream_encoder = pipe_ctx->stream_res.stream_enc;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- struct hpo_dp_link_encoder *hpo_dp_link_encoder = link->hpo_dp_link_enc;
- struct hpo_dp_stream_encoder *hpo_dp_stream_encoder = pipe_ctx->stream_res.hpo_dp_stream_enc;
-#endif
- struct dp_mst_stream_allocation_table proposed_table = {0};
- struct fixed31_32 avg_time_slots_per_mtp;
- struct fixed31_32 pbn;
- struct fixed31_32 pbn_per_slot;
- int i;
- enum act_return_status ret;
- DC_LOGGER_INIT(link->ctx->logger);
-
- /* Link encoder may have been dynamically assigned to non-physical display endpoint. */
- if (link->ep_type == DISPLAY_ENDPOINT_PHY)
- link_encoder = link->link_enc;
- else if (link->dc->res_pool->funcs->link_encs_assign)
- link_encoder = link_enc_cfg_get_link_enc_used_by_stream(pipe_ctx->stream->ctx->dc, stream);
- ASSERT(link_encoder);
-
- /* enable_link_dp_mst already check link->enabled_stream_count
- * and stream is in link->stream[]. This is called during set mode,
- * stream_enc is available.
- */
-
- /* get calculate VC payload for stream: stream_alloc */
- if (dm_helpers_dp_mst_write_payload_allocation_table(
- stream->ctx,
- stream,
- &proposed_table,
- true)) {
- update_mst_stream_alloc_table(
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- link,
- pipe_ctx->stream_res.stream_enc,
- pipe_ctx->stream_res.hpo_dp_stream_enc,
- &proposed_table);
-#else
- link, pipe_ctx->stream_res.stream_enc, &proposed_table);
-#endif
- }
- else
- DC_LOG_WARNING("Failed to update"
- "MST allocation table for"
- "pipe idx:%d\n",
- pipe_ctx->pipe_idx);
-
- DC_LOG_MST("%s "
- "stream_count: %d: \n ",
- __func__,
- link->mst_stream_alloc_table.stream_count);
-
- for (i = 0; i < MAX_CONTROLLER_NUM; i++) {
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- DC_LOG_MST("stream_enc[%d]: %p "
- "stream[%d].hpo_dp_stream_enc: %p "
- "stream[%d].vcp_id: %d "
- "stream[%d].slot_count: %d\n",
- i,
- (void *) link->mst_stream_alloc_table.stream_allocations[i].stream_enc,
- i,
- (void *) link->mst_stream_alloc_table.stream_allocations[i].hpo_dp_stream_enc,
- i,
- link->mst_stream_alloc_table.stream_allocations[i].vcp_id,
- i,
- link->mst_stream_alloc_table.stream_allocations[i].slot_count);
-#else
- DC_LOG_MST("stream_enc[%d]: %p "
- "stream[%d].vcp_id: %d "
- "stream[%d].slot_count: %d\n",
- i,
- (void *) link->mst_stream_alloc_table.stream_allocations[i].stream_enc,
- i,
- link->mst_stream_alloc_table.stream_allocations[i].vcp_id,
- i,
- link->mst_stream_alloc_table.stream_allocations[i].slot_count);
-#endif
- }
-
- ASSERT(proposed_table.stream_count > 0);
-
- if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
- static enum dc_status status;
- uint8_t mst_alloc_slots = 0, prev_mst_slots_in_use = 0xFF;
-
- for (i = 0; i < link->mst_stream_alloc_table.stream_count; i++)
- mst_alloc_slots += link->mst_stream_alloc_table.stream_allocations[i].slot_count;
-
- status = dc_process_dmub_set_mst_slots(link->dc, link->link_index,
- mst_alloc_slots, &prev_mst_slots_in_use);
- ASSERT(status == DC_OK);
- DC_LOG_MST("dpia : status[%d]: alloc_slots[%d]: used_slots[%d]\n",
- status, mst_alloc_slots, prev_mst_slots_in_use);
- }
-
- /* program DP source TX for payload */
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- switch (dp_get_link_encoding_format(&link->cur_link_settings)) {
- case DP_8b_10b_ENCODING:
- link_encoder->funcs->update_mst_stream_allocation_table(
- link_encoder,
- &link->mst_stream_alloc_table);
- break;
- case DP_128b_132b_ENCODING:
- hpo_dp_link_encoder->funcs->update_stream_allocation_table(
- hpo_dp_link_encoder,
- &link->mst_stream_alloc_table);
- break;
- case DP_UNKNOWN_ENCODING:
- DC_LOG_ERROR("Failure: unknown encoding format\n");
- return DC_ERROR_UNEXPECTED;
- }
-#else
- link_encoder->funcs->update_mst_stream_allocation_table(
- link_encoder,
- &link->mst_stream_alloc_table);
-#endif
-
- /* send down message */
- ret = dm_helpers_dp_mst_poll_for_allocation_change_trigger(
- stream->ctx,
- stream);
-
- if (ret != ACT_LINK_LOST) {
- dm_helpers_dp_mst_send_payload_allocation(
- stream->ctx,
- stream,
- true);
- }
-
- /* slot X.Y for only current stream */
- pbn_per_slot = get_pbn_per_slot(stream);
- if (pbn_per_slot.value == 0) {
- DC_LOG_ERROR("Failure: pbn_per_slot==0 not allowed. Cannot continue, returning DC_UNSUPPORTED_VALUE.\n");
- return DC_UNSUPPORTED_VALUE;
- }
- pbn = get_pbn_from_timing(pipe_ctx);
- avg_time_slots_per_mtp = dc_fixpt_div(pbn, pbn_per_slot);
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- switch (dp_get_link_encoding_format(&link->cur_link_settings)) {
- case DP_8b_10b_ENCODING:
- stream_encoder->funcs->set_throttled_vcp_size(
- stream_encoder,
- avg_time_slots_per_mtp);
- break;
- case DP_128b_132b_ENCODING:
- hpo_dp_link_encoder->funcs->set_throttled_vcp_size(
- hpo_dp_link_encoder,
- hpo_dp_stream_encoder->inst,
- avg_time_slots_per_mtp);
- break;
- case DP_UNKNOWN_ENCODING:
- DC_LOG_ERROR("Failure: unknown encoding format\n");
- return DC_ERROR_UNEXPECTED;
- }
-#else
- stream_encoder->funcs->set_throttled_vcp_size(
- stream_encoder,
- avg_time_slots_per_mtp);
-#endif
-
- return DC_OK;
-
-}
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-enum dc_status dc_link_reduce_mst_payload(struct pipe_ctx *pipe_ctx, uint32_t bw_in_kbps)
-{
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct dc_link *link = stream->link;
- struct fixed31_32 avg_time_slots_per_mtp;
- struct fixed31_32 pbn;
- struct fixed31_32 pbn_per_slot;
- struct link_encoder *link_encoder = link->link_enc;
- struct stream_encoder *stream_encoder = pipe_ctx->stream_res.stream_enc;
- struct dp_mst_stream_allocation_table proposed_table = {0};
- uint8_t i;
- enum act_return_status ret;
- DC_LOGGER_INIT(link->ctx->logger);
-
- /* decrease throttled vcp size */
- pbn_per_slot = get_pbn_per_slot(stream);
- pbn = get_pbn_from_bw_in_kbps(bw_in_kbps);
- avg_time_slots_per_mtp = dc_fixpt_div(pbn, pbn_per_slot);
-
- stream_encoder->funcs->set_throttled_vcp_size(
- stream_encoder,
- avg_time_slots_per_mtp);
-
- /* send ALLOCATE_PAYLOAD sideband message with updated pbn */
- dm_helpers_dp_mst_send_payload_allocation(
- stream->ctx,
- stream,
- true);
-
- /* notify immediate branch device table update */
- if (dm_helpers_dp_mst_write_payload_allocation_table(
- stream->ctx,
- stream,
- &proposed_table,
- true)) {
- /* update mst stream allocation table software state */
- update_mst_stream_alloc_table(
- link,
- pipe_ctx->stream_res.stream_enc,
- pipe_ctx->stream_res.hpo_dp_stream_enc,
- &proposed_table);
- } else {
- DC_LOG_WARNING("Failed to update"
- "MST allocation table for"
- "pipe idx:%d\n",
- pipe_ctx->pipe_idx);
- }
-
- DC_LOG_MST("%s "
- "stream_count: %d: \n ",
- __func__,
- link->mst_stream_alloc_table.stream_count);
-
- for (i = 0; i < MAX_CONTROLLER_NUM; i++) {
- DC_LOG_MST("stream_enc[%d]: %p "
- "stream[%d].vcp_id: %d "
- "stream[%d].slot_count: %d\n",
- i,
- (void *) link->mst_stream_alloc_table.stream_allocations[i].stream_enc,
- i,
- link->mst_stream_alloc_table.stream_allocations[i].vcp_id,
- i,
- link->mst_stream_alloc_table.stream_allocations[i].slot_count);
- }
-
- ASSERT(proposed_table.stream_count > 0);
-
- /* update mst stream allocation table hardware state */
- link_encoder->funcs->update_mst_stream_allocation_table(
- link_encoder,
- &link->mst_stream_alloc_table);
-
- /* poll for immediate branch device ACT handled */
- ret = dm_helpers_dp_mst_poll_for_allocation_change_trigger(
- stream->ctx,
- stream);
-
- return DC_OK;
-}
-
-enum dc_status dc_link_increase_mst_payload(struct pipe_ctx *pipe_ctx, uint32_t bw_in_kbps)
-{
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct dc_link *link = stream->link;
- struct fixed31_32 avg_time_slots_per_mtp;
- struct fixed31_32 pbn;
- struct fixed31_32 pbn_per_slot;
- struct link_encoder *link_encoder = link->link_enc;
- struct stream_encoder *stream_encoder = pipe_ctx->stream_res.stream_enc;
- struct dp_mst_stream_allocation_table proposed_table = {0};
- uint8_t i;
- enum act_return_status ret;
- DC_LOGGER_INIT(link->ctx->logger);
-
- /* notify immediate branch device table update */
- if (dm_helpers_dp_mst_write_payload_allocation_table(
- stream->ctx,
- stream,
- &proposed_table,
- true)) {
- /* update mst stream allocation table software state */
- update_mst_stream_alloc_table(
- link,
- pipe_ctx->stream_res.stream_enc,
- pipe_ctx->stream_res.hpo_dp_stream_enc,
- &proposed_table);
- }
-
- DC_LOG_MST("%s "
- "stream_count: %d: \n ",
- __func__,
- link->mst_stream_alloc_table.stream_count);
-
- for (i = 0; i < MAX_CONTROLLER_NUM; i++) {
- DC_LOG_MST("stream_enc[%d]: %p "
- "stream[%d].vcp_id: %d "
- "stream[%d].slot_count: %d\n",
- i,
- (void *) link->mst_stream_alloc_table.stream_allocations[i].stream_enc,
- i,
- link->mst_stream_alloc_table.stream_allocations[i].vcp_id,
- i,
- link->mst_stream_alloc_table.stream_allocations[i].slot_count);
- }
-
- ASSERT(proposed_table.stream_count > 0);
-
- /* update mst stream allocation table hardware state */
- link_encoder->funcs->update_mst_stream_allocation_table(
- link_encoder,
- &link->mst_stream_alloc_table);
-
- /* poll for immediate branch device ACT handled */
- ret = dm_helpers_dp_mst_poll_for_allocation_change_trigger(
- stream->ctx,
- stream);
-
- if (ret != ACT_LINK_LOST) {
- /* send ALLOCATE_PAYLOAD sideband message with updated pbn */
- dm_helpers_dp_mst_send_payload_allocation(
- stream->ctx,
- stream,
- true);
- }
-
- /* increase throttled vcp size */
- pbn = get_pbn_from_bw_in_kbps(bw_in_kbps);
- pbn_per_slot = get_pbn_per_slot(stream);
- avg_time_slots_per_mtp = dc_fixpt_div(pbn, pbn_per_slot);
-
- stream_encoder->funcs->set_throttled_vcp_size(
- stream_encoder,
- avg_time_slots_per_mtp);
-
- return DC_OK;
-}
-#endif
-
-static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx)
-{
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct dc_link *link = stream->link;
- struct link_encoder *link_encoder = NULL;
- struct stream_encoder *stream_encoder = pipe_ctx->stream_res.stream_enc;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- struct hpo_dp_link_encoder *hpo_dp_link_encoder = link->hpo_dp_link_enc;
- struct hpo_dp_stream_encoder *hpo_dp_stream_encoder = pipe_ctx->stream_res.hpo_dp_stream_enc;
-#endif
- struct dp_mst_stream_allocation_table proposed_table = {0};
- struct fixed31_32 avg_time_slots_per_mtp = dc_fixpt_from_int(0);
- int i;
- bool mst_mode = (link->type == dc_connection_mst_branch);
- DC_LOGGER_INIT(link->ctx->logger);
-
- /* Link encoder may have been dynamically assigned to non-physical display endpoint. */
- if (link->ep_type == DISPLAY_ENDPOINT_PHY)
- link_encoder = link->link_enc;
- else if (link->dc->res_pool->funcs->link_encs_assign)
- link_encoder = link_enc_cfg_get_link_enc_used_by_stream(pipe_ctx->stream->ctx->dc, stream);
- ASSERT(link_encoder);
-
- /* deallocate_mst_payload is called before disable link. When mode or
- * disable/enable monitor, new stream is created which is not in link
- * stream[] yet. For this, payload is not allocated yet, so de-alloc
- * should not done. For new mode set, map_resources will get engine
- * for new stream, so stream_enc->id should be validated until here.
- */
-
- /* slot X.Y */
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- switch (dp_get_link_encoding_format(&link->cur_link_settings)) {
- case DP_8b_10b_ENCODING:
- stream_encoder->funcs->set_throttled_vcp_size(
- stream_encoder,
- avg_time_slots_per_mtp);
- break;
- case DP_128b_132b_ENCODING:
- hpo_dp_link_encoder->funcs->set_throttled_vcp_size(
- hpo_dp_link_encoder,
- hpo_dp_stream_encoder->inst,
- avg_time_slots_per_mtp);
- break;
- case DP_UNKNOWN_ENCODING:
- DC_LOG_ERROR("Failure: unknown encoding format\n");
- return DC_ERROR_UNEXPECTED;
- }
-#else
- stream_encoder->funcs->set_throttled_vcp_size(
- stream_encoder,
- avg_time_slots_per_mtp);
-#endif
-
- /* TODO: which component is responsible for remove payload table? */
- if (mst_mode) {
- if (dm_helpers_dp_mst_write_payload_allocation_table(
- stream->ctx,
- stream,
- &proposed_table,
- false)) {
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- update_mst_stream_alloc_table(
- link,
- pipe_ctx->stream_res.stream_enc,
- pipe_ctx->stream_res.hpo_dp_stream_enc,
- &proposed_table);
-#else
- update_mst_stream_alloc_table(
- link, pipe_ctx->stream_res.stream_enc, &proposed_table);
-#endif
- }
- else {
- DC_LOG_WARNING("Failed to update"
- "MST allocation table for"
- "pipe idx:%d\n",
- pipe_ctx->pipe_idx);
- }
- }
-
- DC_LOG_MST("%s"
- "stream_count: %d: ",
- __func__,
- link->mst_stream_alloc_table.stream_count);
-
- for (i = 0; i < MAX_CONTROLLER_NUM; i++) {
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- DC_LOG_MST("stream_enc[%d]: %p "
- "stream[%d].hpo_dp_stream_enc: %p "
- "stream[%d].vcp_id: %d "
- "stream[%d].slot_count: %d\n",
- i,
- (void *) link->mst_stream_alloc_table.stream_allocations[i].stream_enc,
- i,
- (void *) link->mst_stream_alloc_table.stream_allocations[i].hpo_dp_stream_enc,
- i,
- link->mst_stream_alloc_table.stream_allocations[i].vcp_id,
- i,
- link->mst_stream_alloc_table.stream_allocations[i].slot_count);
-#else
- DC_LOG_MST("stream_enc[%d]: %p "
- "stream[%d].vcp_id: %d "
- "stream[%d].slot_count: %d\n",
- i,
- (void *) link->mst_stream_alloc_table.stream_allocations[i].stream_enc,
- i,
- link->mst_stream_alloc_table.stream_allocations[i].vcp_id,
- i,
- link->mst_stream_alloc_table.stream_allocations[i].slot_count);
-#endif
- }
-
- if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
- enum dc_status status;
- uint8_t mst_alloc_slots = 0, prev_mst_slots_in_use = 0xFF;
-
- for (i = 0; i < link->mst_stream_alloc_table.stream_count; i++)
- mst_alloc_slots += link->mst_stream_alloc_table.stream_allocations[i].slot_count;
-
- status = dc_process_dmub_set_mst_slots(link->dc, link->link_index,
- mst_alloc_slots, &prev_mst_slots_in_use);
- ASSERT(status != DC_NOT_SUPPORTED);
- DC_LOG_MST("dpia : status[%d]: alloc_slots[%d]: used_slots[%d]\n",
- status, mst_alloc_slots, prev_mst_slots_in_use);
- }
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- switch (dp_get_link_encoding_format(&link->cur_link_settings)) {
- case DP_8b_10b_ENCODING:
- link_encoder->funcs->update_mst_stream_allocation_table(
- link_encoder,
- &link->mst_stream_alloc_table);
- break;
- case DP_128b_132b_ENCODING:
- hpo_dp_link_encoder->funcs->update_stream_allocation_table(
- hpo_dp_link_encoder,
- &link->mst_stream_alloc_table);
- break;
- case DP_UNKNOWN_ENCODING:
- DC_LOG_ERROR("Failure: unknown encoding format\n");
- return DC_ERROR_UNEXPECTED;
- }
-#else
- link_encoder->funcs->update_mst_stream_allocation_table(
- link_encoder,
- &link->mst_stream_alloc_table);
-#endif
-
- if (mst_mode) {
- dm_helpers_dp_mst_poll_for_allocation_change_trigger(
- stream->ctx,
- stream);
-
- dm_helpers_dp_mst_send_payload_allocation(
- stream->ctx,
- stream,
- false);
- }
-
- return DC_OK;
-}
-
-
-#if defined(CONFIG_DRM_AMD_DC_HDCP)
-static void update_psp_stream_config(struct pipe_ctx *pipe_ctx, bool dpms_off)
-{
- struct cp_psp *cp_psp = &pipe_ctx->stream->ctx->cp_psp;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- struct link_encoder *link_enc = NULL;
- struct dc_state *state = pipe_ctx->stream->ctx->dc->current_state;
- struct link_enc_assignment link_enc_assign;
- int i;
-#endif
-
- if (cp_psp && cp_psp->funcs.update_stream_config) {
- struct cp_psp_stream_config config = {0};
- enum dp_panel_mode panel_mode =
- dp_get_panel_mode(pipe_ctx->stream->link);
-
- config.otg_inst = (uint8_t) pipe_ctx->stream_res.tg->inst;
- /*stream_enc_inst*/
- config.dig_fe = (uint8_t) pipe_ctx->stream_res.stream_enc->stream_enc_inst;
- config.dig_be = pipe_ctx->stream->link->link_enc_hw_inst;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- config.stream_enc_idx = pipe_ctx->stream_res.stream_enc->id - ENGINE_ID_DIGA;
-
- if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_PHY ||
- pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
- link_enc = pipe_ctx->stream->link->link_enc;
- config.dio_output_type = pipe_ctx->stream->link->ep_type;
- config.dio_output_idx = link_enc->transmitter - TRANSMITTER_UNIPHY_A;
- if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_PHY)
- link_enc = pipe_ctx->stream->link->link_enc;
- else if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
- if (pipe_ctx->stream->link->dc->res_pool->funcs->link_encs_assign) {
- link_enc = link_enc_cfg_get_link_enc_used_by_stream(
- pipe_ctx->stream->ctx->dc,
- pipe_ctx->stream);
- }
- // Initialize PHY ID with ABCDE - 01234 mapping except when it is B0
- config.phy_idx = link_enc->transmitter - TRANSMITTER_UNIPHY_A;
-
- //look up the link_enc_assignment for the current pipe_ctx
- for (i = 0; i < state->stream_count; i++) {
- if (pipe_ctx->stream == state->streams[i]) {
- link_enc_assign = state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i];
- }
- }
- // Add flag to guard new A0 DIG mapping
- if (pipe_ctx->stream->ctx->dc->enable_c20_dtm_b0 == true) {
- config.dig_be = link_enc_assign.eng_id;
- config.dio_output_type = pipe_ctx->stream->link->ep_type;
- config.dio_output_idx = link_enc->transmitter - TRANSMITTER_UNIPHY_A;
- } else {
- config.dio_output_type = 0;
- config.dio_output_idx = 0;
- }
-
- // Add flag to guard B0 implementation
- if (pipe_ctx->stream->ctx->dc->enable_c20_dtm_b0 == true &&
- link_enc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0) {
- if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
- link_enc = link_enc_assign.stream->link_enc;
-
- // enum ID 1-4 maps to DPIA PHY ID 0-3
- config.phy_idx = link_enc_assign.ep_id.link_id.enum_id - ENUM_ID_1;
- } else { // for non DPIA mode over B0, ABCDE maps to 01564
-
- switch (link_enc->transmitter) {
- case TRANSMITTER_UNIPHY_A:
- config.phy_idx = 0;
- break;
- case TRANSMITTER_UNIPHY_B:
- config.phy_idx = 1;
- break;
- case TRANSMITTER_UNIPHY_C:
- config.phy_idx = 5;
- break;
- case TRANSMITTER_UNIPHY_D:
- config.phy_idx = 6;
- break;
- case TRANSMITTER_UNIPHY_E:
- config.phy_idx = 4;
- break;
- default:
- config.phy_idx = 0;
- break;
- }
-
- }
- }
- } else if (pipe_ctx->stream->link->dc->res_pool->funcs->link_encs_assign) {
- link_enc = link_enc_cfg_get_link_enc_used_by_stream(
- pipe_ctx->stream->ctx->dc,
- pipe_ctx->stream);
- config.phy_idx = 0; /* Clear phy_idx for non-physical display endpoints. */
- }
- ASSERT(link_enc);
- if (link_enc)
- config.link_enc_idx = link_enc->transmitter - TRANSMITTER_UNIPHY_A;
- if (is_dp_128b_132b_signal(pipe_ctx)) {
- config.stream_enc_idx = pipe_ctx->stream_res.hpo_dp_stream_enc->id - ENGINE_ID_HPO_DP_0;
- config.link_enc_idx = pipe_ctx->stream->link->hpo_dp_link_enc->inst;
- config.dp2_enabled = 1;
- }
-#endif
- config.dpms_off = dpms_off;
- config.dm_stream_ctx = pipe_ctx->stream->dm_stream_context;
- config.assr_enabled = (panel_mode == DP_PANEL_MODE_EDP);
- config.mst_enabled = (pipe_ctx->stream->signal ==
- SIGNAL_TYPE_DISPLAY_PORT_MST);
- cp_psp->funcs.update_stream_config(cp_psp->handle, &config);
- }
-}
-#endif
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-static void fpga_dp_hpo_enable_link_and_stream(struct dc_state *state, struct pipe_ctx *pipe_ctx)
-{
- struct dc *dc = pipe_ctx->stream->ctx->dc;
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct link_mst_stream_allocation_table proposed_table = {0};
- struct fixed31_32 avg_time_slots_per_mtp;
- uint8_t req_slot_count = 0;
- uint8_t vc_id = 1; /// VC ID always 1 for SST
-
- struct dc_link_settings link_settings = {0};
- DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger);
-
- decide_link_settings(stream, &link_settings);
- stream->link->cur_link_settings = link_settings;
-
- /* Enable clock, Configure lane count, and Enable Link Encoder*/
- enable_dp_hpo_output(stream->link, &stream->link->cur_link_settings);
-
-#ifdef DIAGS_BUILD
- /* Workaround for FPGA HPO capture DP link data:
- * HPO capture will set link to active mode
- * This workaround is required to get a capture from start of frame
- */
- if (!dc->debug.fpga_hpo_capture_en) {
- struct encoder_set_dp_phy_pattern_param params = {0};
- params.dp_phy_pattern = DP_TEST_PATTERN_VIDEO_MODE;
-
- /* Set link active */
- stream->link->hpo_dp_link_enc->funcs->set_link_test_pattern(
- stream->link->hpo_dp_link_enc,
- &params);
- }
-#endif
-
- /* Enable DP_STREAM_ENC */
- dc->hwss.enable_stream(pipe_ctx);
-
- /* Set DPS PPS SDP (AKA "info frames") */
- if (pipe_ctx->stream->timing.flags.DSC) {
- dp_set_dsc_pps_sdp(pipe_ctx, true, true);
- }
-
- /* Allocate Payload */
- if ((stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) && (state->stream_count > 1)) {
- // MST case
- uint8_t i;
-
- proposed_table.stream_count = state->stream_count;
- for (i = 0; i < state->stream_count; i++) {
- avg_time_slots_per_mtp = calculate_sst_avg_time_slots_per_mtp(state->streams[i], state->streams[i]->link);
- req_slot_count = dc_fixpt_ceil(avg_time_slots_per_mtp);
- proposed_table.stream_allocations[i].slot_count = req_slot_count;
- proposed_table.stream_allocations[i].vcp_id = i+1;
- /* NOTE: This makes assumption that pipe_ctx index is same as stream index */
- proposed_table.stream_allocations[i].hpo_dp_stream_enc = state->res_ctx.pipe_ctx[i].stream_res.hpo_dp_stream_enc;
- }
- } else {
- // SST case
- avg_time_slots_per_mtp = calculate_sst_avg_time_slots_per_mtp(stream, stream->link);
- req_slot_count = dc_fixpt_ceil(avg_time_slots_per_mtp);
- proposed_table.stream_count = 1; /// Always 1 stream for SST
- proposed_table.stream_allocations[0].slot_count = req_slot_count;
- proposed_table.stream_allocations[0].vcp_id = vc_id;
- proposed_table.stream_allocations[0].hpo_dp_stream_enc = pipe_ctx->stream_res.hpo_dp_stream_enc;
- }
-
- stream->link->hpo_dp_link_enc->funcs->update_stream_allocation_table(
- stream->link->hpo_dp_link_enc,
- &proposed_table);
-
- stream->link->hpo_dp_link_enc->funcs->set_throttled_vcp_size(
- stream->link->hpo_dp_link_enc,
- pipe_ctx->stream_res.hpo_dp_stream_enc->inst,
- avg_time_slots_per_mtp);
-
-
-
- dc->hwss.unblank_stream(pipe_ctx, &stream->link->cur_link_settings);
-}
-#endif
-
-void core_link_enable_stream(
- struct dc_state *state,
- struct pipe_ctx *pipe_ctx)
-{
- struct dc *dc = pipe_ctx->stream->ctx->dc;
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct dc_link *link = stream->sink->link;
- enum dc_status status;
- struct link_encoder *link_enc;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- enum otg_out_mux_dest otg_out_dest = OUT_MUX_DIO;
- struct vpg *vpg = pipe_ctx->stream_res.stream_enc->vpg;
-
- if (is_dp_128b_132b_signal(pipe_ctx))
- vpg = pipe_ctx->stream_res.hpo_dp_stream_enc->vpg;
-#endif
- DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger);
-
- if (!IS_DIAG_DC(dc->ctx->dce_environment) &&
- dc_is_virtual_signal(pipe_ctx->stream->signal))
- return;
-
- if (dc->res_pool->funcs->link_encs_assign && stream->link->ep_type != DISPLAY_ENDPOINT_PHY)
- link_enc = link_enc_cfg_get_link_enc_used_by_stream(dc, stream);
- else
- link_enc = stream->link->link_enc;
- ASSERT(link_enc);
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (!dc_is_virtual_signal(pipe_ctx->stream->signal)
- && !is_dp_128b_132b_signal(pipe_ctx)) {
-#else
- if (!dc_is_virtual_signal(pipe_ctx->stream->signal)) {
-#endif
- if (link_enc)
- link_enc->funcs->setup(
- link_enc,
- pipe_ctx->stream->signal);
- pipe_ctx->stream_res.stream_enc->funcs->setup_stereo_sync(
- pipe_ctx->stream_res.stream_enc,
- pipe_ctx->stream_res.tg->inst,
- stream->timing.timing_3d_format != TIMING_3D_FORMAT_NONE);
- }
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (is_dp_128b_132b_signal(pipe_ctx)) {
- pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->set_stream_attribute(
- pipe_ctx->stream_res.hpo_dp_stream_enc,
- &stream->timing,
- stream->output_color_space,
- stream->use_vsc_sdp_for_colorimetry,
- stream->timing.flags.DSC,
- false);
- otg_out_dest = OUT_MUX_HPO_DP;
- } else if (dc_is_dp_signal(pipe_ctx->stream->signal)) {
- pipe_ctx->stream_res.stream_enc->funcs->dp_set_stream_attribute(
- pipe_ctx->stream_res.stream_enc,
- &stream->timing,
- stream->output_color_space,
- stream->use_vsc_sdp_for_colorimetry,
- stream->link->dpcd_caps.dprx_feature.bits.SST_SPLIT_SDP_CAP);
- }
-#else
- pipe_ctx->stream_res.stream_enc->funcs->dp_set_stream_attribute(
- pipe_ctx->stream_res.stream_enc,
- &stream->timing,
- stream->output_color_space,
- stream->use_vsc_sdp_for_colorimetry,
- stream->link->dpcd_caps.dprx_feature.bits.SST_SPLIT_SDP_CAP);
-#endif
-
- if (dc_is_dp_signal(pipe_ctx->stream->signal))
- dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_DP_STREAM_ATTR);
-
- if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal))
- pipe_ctx->stream_res.stream_enc->funcs->hdmi_set_stream_attribute(
- pipe_ctx->stream_res.stream_enc,
- &stream->timing,
- stream->phy_pix_clk,
- pipe_ctx->stream_res.audio != NULL);
-
- pipe_ctx->stream->link->link_state_valid = true;
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (pipe_ctx->stream_res.tg->funcs->set_out_mux)
- pipe_ctx->stream_res.tg->funcs->set_out_mux(pipe_ctx->stream_res.tg, otg_out_dest);
-#endif
-
- if (dc_is_dvi_signal(pipe_ctx->stream->signal))
- pipe_ctx->stream_res.stream_enc->funcs->dvi_set_stream_attribute(
- pipe_ctx->stream_res.stream_enc,
- &stream->timing,
- (pipe_ctx->stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK) ?
- true : false);
-
- if (dc_is_lvds_signal(pipe_ctx->stream->signal))
- pipe_ctx->stream_res.stream_enc->funcs->lvds_set_stream_attribute(
- pipe_ctx->stream_res.stream_enc,
- &stream->timing);
-
- if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
- bool apply_edp_fast_boot_optimization =
- pipe_ctx->stream->apply_edp_fast_boot_optimization;
-
- pipe_ctx->stream->apply_edp_fast_boot_optimization = false;
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- // Enable VPG before building infoframe
- if (vpg && vpg->funcs->vpg_poweron)
- vpg->funcs->vpg_poweron(vpg);
-#endif
-
- resource_build_info_frame(pipe_ctx);
- dc->hwss.update_info_frame(pipe_ctx);
-
- if (dc_is_dp_signal(pipe_ctx->stream->signal))
- dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_UPDATE_INFO_FRAME);
-
- /* Do not touch link on seamless boot optimization. */
- if (pipe_ctx->stream->apply_seamless_boot_optimization) {
- pipe_ctx->stream->dpms_off = false;
-
- /* Still enable stream features & audio on seamless boot for DP external displays */
- if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT) {
- enable_stream_features(pipe_ctx);
- if (pipe_ctx->stream_res.audio != NULL) {
- pipe_ctx->stream_res.stream_enc->funcs->dp_audio_enable(pipe_ctx->stream_res.stream_enc);
- dc->hwss.enable_audio_stream(pipe_ctx);
- }
- }
-
-#if defined(CONFIG_DRM_AMD_DC_HDCP)
- update_psp_stream_config(pipe_ctx, false);
-#endif
- return;
- }
-
- /* eDP lit up by bios already, no need to enable again. */
- if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP &&
- apply_edp_fast_boot_optimization &&
- !pipe_ctx->stream->timing.flags.DSC) {
- pipe_ctx->stream->dpms_off = false;
-#if defined(CONFIG_DRM_AMD_DC_HDCP)
- update_psp_stream_config(pipe_ctx, false);
-#endif
- return;
- }
-
- if (pipe_ctx->stream->dpms_off)
- return;
-
- /* Have to setup DSC before DIG FE and BE are connected (which happens before the
- * link training). This is to make sure the bandwidth sent to DIG BE won't be
- * bigger than what the link and/or DIG BE can handle. VBID[6]/CompressedStream_flag
- * will be automatically set at a later time when the video is enabled
- * (DP_VID_STREAM_EN = 1).
- */
- if (pipe_ctx->stream->timing.flags.DSC) {
- if (dc_is_dp_signal(pipe_ctx->stream->signal) ||
- dc_is_virtual_signal(pipe_ctx->stream->signal))
- dp_set_dsc_enable(pipe_ctx, true);
- }
-
- status = enable_link(state, pipe_ctx);
-
- if (status != DC_OK) {
- DC_LOG_WARNING("enabling link %u failed: %d\n",
- pipe_ctx->stream->link->link_index,
- status);
-
- /* Abort stream enable *unless* the failure was due to
- * DP link training - some DP monitors will recover and
- * show the stream anyway. But MST displays can't proceed
- * without link training.
- */
- if (status != DC_FAIL_DP_LINK_TRAINING ||
- pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
- if (false == stream->link->link_status.link_active)
- disable_link(stream->link, pipe_ctx->stream->signal);
- BREAK_TO_DEBUGGER();
- return;
- }
- }
-
- /* turn off otg test pattern if enable */
- if (pipe_ctx->stream_res.tg->funcs->set_test_pattern)
- pipe_ctx->stream_res.tg->funcs->set_test_pattern(pipe_ctx->stream_res.tg,
- CONTROLLER_DP_TEST_PATTERN_VIDEOMODE,
- COLOR_DEPTH_UNDEFINED);
-
- /* This second call is needed to reconfigure the DIG
- * as a workaround for the incorrect value being applied
- * from transmitter control.
- */
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (!(dc_is_virtual_signal(pipe_ctx->stream->signal) ||
- is_dp_128b_132b_signal(pipe_ctx)))
-#else
- if (!dc_is_virtual_signal(pipe_ctx->stream->signal))
-#endif
- if (link_enc)
- link_enc->funcs->setup(
- link_enc,
- pipe_ctx->stream->signal);
-
- dc->hwss.enable_stream(pipe_ctx);
-
- /* Set DPS PPS SDP (AKA "info frames") */
- if (pipe_ctx->stream->timing.flags.DSC) {
- if (dc_is_dp_signal(pipe_ctx->stream->signal) ||
- dc_is_virtual_signal(pipe_ctx->stream->signal)) {
- dp_set_dsc_on_rx(pipe_ctx, true);
- dp_set_dsc_pps_sdp(pipe_ctx, true, true);
- }
- }
-
- if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
- dc_link_allocate_mst_payload(pipe_ctx);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT &&
- is_dp_128b_132b_signal(pipe_ctx))
- dc_link_update_sst_payload(pipe_ctx, true);
-#endif
-
- dc->hwss.unblank_stream(pipe_ctx,
- &pipe_ctx->stream->link->cur_link_settings);
-
- if (stream->sink_patches.delay_ignore_msa > 0)
- msleep(stream->sink_patches.delay_ignore_msa);
-
- if (dc_is_dp_signal(pipe_ctx->stream->signal))
- enable_stream_features(pipe_ctx);
-#if defined(CONFIG_DRM_AMD_DC_HDCP)
- update_psp_stream_config(pipe_ctx, false);
-#endif
-
- dc->hwss.enable_audio_stream(pipe_ctx);
-
- } else { // if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment))
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (is_dp_128b_132b_signal(pipe_ctx)) {
- fpga_dp_hpo_enable_link_and_stream(state, pipe_ctx);
- }
-#endif
- if (dc_is_dp_signal(pipe_ctx->stream->signal) ||
- dc_is_virtual_signal(pipe_ctx->stream->signal))
- dp_set_dsc_enable(pipe_ctx, true);
-
- }
-
- if (pipe_ctx->stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) {
- core_link_set_avmute(pipe_ctx, false);
- }
-}
-
-void core_link_disable_stream(struct pipe_ctx *pipe_ctx)
-{
- struct dc *dc = pipe_ctx->stream->ctx->dc;
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct dc_link *link = stream->sink->link;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- struct vpg *vpg = pipe_ctx->stream_res.stream_enc->vpg;
-
- if (is_dp_128b_132b_signal(pipe_ctx))
- vpg = pipe_ctx->stream_res.hpo_dp_stream_enc->vpg;
-#endif
-
- if (!IS_DIAG_DC(dc->ctx->dce_environment) &&
- dc_is_virtual_signal(pipe_ctx->stream->signal))
- return;
-
- if (!pipe_ctx->stream->sink->edid_caps.panel_patch.skip_avmute) {
- if (dc_is_hdmi_signal(pipe_ctx->stream->signal))
- core_link_set_avmute(pipe_ctx, true);
- }
-
- dc->hwss.disable_audio_stream(pipe_ctx);
-
-#if defined(CONFIG_DRM_AMD_DC_HDCP)
- update_psp_stream_config(pipe_ctx, true);
-#endif
- dc->hwss.blank_stream(pipe_ctx);
-
- if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
- deallocate_mst_payload(pipe_ctx);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT &&
- is_dp_128b_132b_signal(pipe_ctx))
- dc_link_update_sst_payload(pipe_ctx, false);
-#endif
-
- if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) {
- struct ext_hdmi_settings settings = {0};
- enum engine_id eng_id = pipe_ctx->stream_res.stream_enc->id;
-
- unsigned short masked_chip_caps = link->chip_caps &
- EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK;
- //Need to inform that sink is going to use legacy HDMI mode.
- dal_ddc_service_write_scdc_data(
- link->ddc,
- 165000,//vbios only handles 165Mhz.
- false);
- if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT) {
- /* DP159, Retimer settings */
- if (get_ext_hdmi_settings(pipe_ctx, eng_id, &settings))
- write_i2c_retimer_setting(pipe_ctx,
- false, false, &settings);
- else
- write_i2c_default_retimer_setting(pipe_ctx,
- false, false);
- } else if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204) {
- /* PI3EQX1204, Redriver settings */
- write_i2c_redriver_setting(pipe_ctx, false);
- }
- }
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT &&
- !is_dp_128b_132b_signal(pipe_ctx)) {
-
- /* In DP1.x SST mode, our encoder will go to TPS1
- * when link is on but stream is off.
- * Disabling link before stream will avoid exposing TPS1 pattern
- * during the disable sequence as it will confuse some receivers
- * state machine.
- * In DP2 or MST mode, our encoder will stay video active
- */
- disable_link(pipe_ctx->stream->link, pipe_ctx->stream->signal);
- dc->hwss.disable_stream(pipe_ctx);
- } else {
- dc->hwss.disable_stream(pipe_ctx);
- disable_link(pipe_ctx->stream->link, pipe_ctx->stream->signal);
- }
-#else
- disable_link(pipe_ctx->stream->link, pipe_ctx->stream->signal);
-
- dc->hwss.disable_stream(pipe_ctx);
-#endif
-
- if (pipe_ctx->stream->timing.flags.DSC) {
- if (dc_is_dp_signal(pipe_ctx->stream->signal))
- dp_set_dsc_enable(pipe_ctx, false);
- }
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (is_dp_128b_132b_signal(pipe_ctx)) {
- if (pipe_ctx->stream_res.tg->funcs->set_out_mux)
- pipe_ctx->stream_res.tg->funcs->set_out_mux(pipe_ctx->stream_res.tg, OUT_MUX_DIO);
- }
-#endif
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (vpg && vpg->funcs->vpg_powerdown)
- vpg->funcs->vpg_powerdown(vpg);
-#endif
-}
-
-void core_link_set_avmute(struct pipe_ctx *pipe_ctx, bool enable)
-{
- struct dc *dc = pipe_ctx->stream->ctx->dc;
-
- if (!dc_is_hdmi_signal(pipe_ctx->stream->signal))
- return;
-
- dc->hwss.set_avmute(pipe_ctx, enable);
-}
-
-/**
- * dc_link_enable_hpd_filter:
- * If enable is true, programs HPD filter on associated HPD line using
- * delay_on_disconnect/delay_on_connect values dependent on
- * link->connector_signal
- *
- * If enable is false, programs HPD filter on associated HPD line with no
- * delays on connect or disconnect
- *
- * @link: pointer to the dc link
- * @enable: boolean specifying whether to enable hbd
- */
-void dc_link_enable_hpd_filter(struct dc_link *link, bool enable)
-{
- struct gpio *hpd;
-
- if (enable) {
- link->is_hpd_filter_disabled = false;
- program_hpd_filter(link);
- } else {
- link->is_hpd_filter_disabled = true;
- /* Obtain HPD handle */
- hpd = get_hpd_gpio(link->ctx->dc_bios, link->link_id, link->ctx->gpio_service);
-
- if (!hpd)
- return;
-
- /* Setup HPD filtering */
- if (dal_gpio_open(hpd, GPIO_MODE_INTERRUPT) == GPIO_RESULT_OK) {
- struct gpio_hpd_config config;
-
- config.delay_on_connect = 0;
- config.delay_on_disconnect = 0;
-
- dal_irq_setup_hpd_filter(hpd, &config);
-
- dal_gpio_close(hpd);
- } else {
- ASSERT_CRITICAL(false);
- }
- /* Release HPD handle */
- dal_gpio_destroy_irq(&hpd);
- }
-}
-
-void dc_link_set_drive_settings(struct dc *dc,
- struct link_training_settings *lt_settings,
- const struct dc_link *link)
-{
-
- int i;
-
- for (i = 0; i < dc->link_count; i++) {
- if (dc->links[i] == link)
- break;
- }
-
- if (i >= dc->link_count)
- ASSERT_CRITICAL(false);
-
- dc_link_dp_set_drive_settings(dc->links[i], lt_settings);
-}
-
-void dc_link_set_preferred_link_settings(struct dc *dc,
- struct dc_link_settings *link_setting,
- struct dc_link *link)
-{
- int i;
- struct pipe_ctx *pipe;
- struct dc_stream_state *link_stream;
- struct dc_link_settings store_settings = *link_setting;
-
- link->preferred_link_setting = store_settings;
-
- /* Retrain with preferred link settings only relevant for
- * DP signal type
- * Check for non-DP signal or if passive dongle present
- */
- if (!dc_is_dp_signal(link->connector_signal) ||
- link->dongle_max_pix_clk > 0)
- return;
-
- for (i = 0; i < MAX_PIPES; i++) {
- pipe = &dc->current_state->res_ctx.pipe_ctx[i];
- if (pipe->stream && pipe->stream->link) {
- if (pipe->stream->link == link) {
- link_stream = pipe->stream;
- break;
- }
- }
- }
-
- /* Stream not found */
- if (i == MAX_PIPES)
- return;
-
- /* Cannot retrain link if backend is off */
- if (link_stream->dpms_off)
- return;
-
- decide_link_settings(link_stream, &store_settings);
-
- if ((store_settings.lane_count != LANE_COUNT_UNKNOWN) &&
- (store_settings.link_rate != LINK_RATE_UNKNOWN))
- dp_retrain_link_dp_test(link, &store_settings, false);
-}
-
-void dc_link_set_preferred_training_settings(struct dc *dc,
- struct dc_link_settings *link_setting,
- struct dc_link_training_overrides *lt_overrides,
- struct dc_link *link,
- bool skip_immediate_retrain)
-{
- if (lt_overrides != NULL)
- link->preferred_training_settings = *lt_overrides;
- else
- memset(&link->preferred_training_settings, 0, sizeof(link->preferred_training_settings));
-
- if (link_setting != NULL) {
- link->preferred_link_setting = *link_setting;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (dp_get_link_encoding_format(link_setting) ==
- DP_128b_132b_ENCODING && !link->hpo_dp_link_enc) {
- if (!add_dp_hpo_link_encoder_to_link(link))
- memset(&link->preferred_link_setting, 0, sizeof(link->preferred_link_setting));
- }
-#endif
- } else {
- link->preferred_link_setting.lane_count = LANE_COUNT_UNKNOWN;
- link->preferred_link_setting.link_rate = LINK_RATE_UNKNOWN;
- }
-
- /* Retrain now, or wait until next stream update to apply */
- if (skip_immediate_retrain == false)
- dc_link_set_preferred_link_settings(dc, &link->preferred_link_setting, link);
-}
-
-void dc_link_enable_hpd(const struct dc_link *link)
-{
- dc_link_dp_enable_hpd(link);
-}
-
-void dc_link_disable_hpd(const struct dc_link *link)
-{
- dc_link_dp_disable_hpd(link);
-}
-
-void dc_link_set_test_pattern(struct dc_link *link,
- enum dp_test_pattern test_pattern,
- enum dp_test_pattern_color_space test_pattern_color_space,
- const struct link_training_settings *p_link_settings,
- const unsigned char *p_custom_pattern,
- unsigned int cust_pattern_size)
-{
- if (link != NULL)
- dc_link_dp_set_test_pattern(
- link,
- test_pattern,
- test_pattern_color_space,
- p_link_settings,
- p_custom_pattern,
- cust_pattern_size);
-}
-
-uint32_t dc_link_bandwidth_kbps(
- const struct dc_link *link,
- const struct dc_link_settings *link_setting)
-{
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- uint32_t total_data_bw_efficiency_x10000 = 0;
- uint32_t link_rate_per_lane_kbps = 0;
-
- switch (dp_get_link_encoding_format(link_setting)) {
- case DP_8b_10b_ENCODING:
- /* For 8b/10b encoding:
- * link rate is defined in the unit of LINK_RATE_REF_FREQ_IN_KHZ per DP byte per lane.
- * data bandwidth efficiency is 80% with additional 3% overhead if FEC is supported.
- */
- link_rate_per_lane_kbps = link_setting->link_rate * LINK_RATE_REF_FREQ_IN_KHZ * BITS_PER_DP_BYTE;
- total_data_bw_efficiency_x10000 = DATA_EFFICIENCY_8b_10b_x10000;
- if (dc_link_should_enable_fec(link)) {
- total_data_bw_efficiency_x10000 /= 100;
- total_data_bw_efficiency_x10000 *= DATA_EFFICIENCY_8b_10b_FEC_EFFICIENCY_x100;
- }
- break;
- case DP_128b_132b_ENCODING:
- /* For 128b/132b encoding:
- * link rate is defined in the unit of 10mbps per lane.
- * total data bandwidth efficiency is always 96.71%.
- */
- link_rate_per_lane_kbps = link_setting->link_rate * 10000;
- total_data_bw_efficiency_x10000 = DATA_EFFICIENCY_128b_132b_x10000;
- break;
- default:
- break;
- }
-
- /* overall effective link bandwidth = link rate per lane * lane count * total data bandwidth efficiency */
- return link_rate_per_lane_kbps * link_setting->lane_count / 10000 * total_data_bw_efficiency_x10000;
-#else
- uint32_t link_bw_kbps =
- link_setting->link_rate * LINK_RATE_REF_FREQ_IN_KHZ; /* bytes per sec */
-
- link_bw_kbps *= 8; /* 8 bits per byte*/
- link_bw_kbps *= link_setting->lane_count;
-
- if (dc_link_should_enable_fec(link)) {
- /* Account for FEC overhead.
- * We have to do it based on caps,
- * and not based on FEC being set ready,
- * because FEC is set ready too late in
- * the process to correctly be picked up
- * by mode enumeration.
- *
- * There's enough zeros at the end of 'kbps'
- * that make the below operation 100% precise
- * for our purposes.
- * 'long long' makes it work even for HDMI 2.1
- * max bandwidth (and much, much bigger bandwidths
- * than that, actually).
- *
- * NOTE: Reducing link BW by 3% may not be precise
- * because it may be a stream BT that increases by 3%, and so
- * 1/1.03 = 0.970873 factor should have been used instead,
- * but the difference is minimal and is in a safe direction,
- * which all works well around potential ambiguity of DP 1.4a spec.
- */
- long long fec_link_bw_kbps = link_bw_kbps * 970LL;
- link_bw_kbps = (uint32_t)(div64_s64(fec_link_bw_kbps, 1000LL));
- }
- return link_bw_kbps;
-
-#endif
-}
-
-const struct dc_link_settings *dc_link_get_link_cap(
- const struct dc_link *link)
-{
- if (link->preferred_link_setting.lane_count != LANE_COUNT_UNKNOWN &&
- link->preferred_link_setting.link_rate != LINK_RATE_UNKNOWN)
- return &link->preferred_link_setting;
- return &link->verified_link_cap;
-}
-
-void dc_link_overwrite_extended_receiver_cap(
- struct dc_link *link)
-{
- dp_overwrite_extended_receiver_cap(link);
-}
-
-bool dc_link_is_fec_supported(const struct dc_link *link)
-{
- struct link_encoder *link_enc = NULL;
-
- /* Links supporting dynamically assigned link encoder will be assigned next
- * available encoder if one not already assigned.
- */
- if (link->is_dig_mapping_flexible &&
- link->dc->res_pool->funcs->link_encs_assign) {
- link_enc = link_enc_cfg_get_link_enc_used_by_link(link->ctx->dc, link);
- if (link_enc == NULL)
- link_enc = link_enc_cfg_get_next_avail_link_enc(link->ctx->dc);
- } else
- link_enc = link->link_enc;
- ASSERT(link_enc);
-
- return (dc_is_dp_signal(link->connector_signal) && link_enc &&
- link_enc->features.fec_supported &&
- link->dpcd_caps.fec_cap.bits.FEC_CAPABLE &&
- !IS_FPGA_MAXIMUS_DC(link->ctx->dce_environment));
-}
-
-bool dc_link_should_enable_fec(const struct dc_link *link)
-{
- bool is_fec_disable = false;
- bool ret = false;
-
- if ((link->connector_signal != SIGNAL_TYPE_DISPLAY_PORT_MST &&
- link->local_sink &&
- link->local_sink->edid_caps.panel_patch.disable_fec) ||
- (link->connector_signal == SIGNAL_TYPE_EDP
- ))
- is_fec_disable = true;
-
- if (dc_link_is_fec_supported(link) && !link->dc->debug.disable_fec && !is_fec_disable)
- ret = true;
-
- return ret;
-}
-
-uint32_t dc_bandwidth_in_kbps_from_timing(
- const struct dc_crtc_timing *timing)
-{
- uint32_t bits_per_channel = 0;
- uint32_t kbps;
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (timing->flags.DSC)
- return dc_dsc_stream_bandwidth_in_kbps(timing,
- timing->dsc_cfg.bits_per_pixel,
- timing->dsc_cfg.num_slices_h,
- timing->dsc_cfg.is_dp);
-#endif /* CONFIG_DRM_AMD_DC_DCN */
-
- switch (timing->display_color_depth) {
- case COLOR_DEPTH_666:
- bits_per_channel = 6;
- break;
- case COLOR_DEPTH_888:
- bits_per_channel = 8;
- break;
- case COLOR_DEPTH_101010:
- bits_per_channel = 10;
- break;
- case COLOR_DEPTH_121212:
- bits_per_channel = 12;
- break;
- case COLOR_DEPTH_141414:
- bits_per_channel = 14;
- break;
- case COLOR_DEPTH_161616:
- bits_per_channel = 16;
- break;
- default:
- ASSERT(bits_per_channel != 0);
- bits_per_channel = 8;
- break;
- }
-
- kbps = timing->pix_clk_100hz / 10;
- kbps *= bits_per_channel;
-
- if (timing->flags.Y_ONLY != 1) {
- /*Only YOnly make reduce bandwidth by 1/3 compares to RGB*/
- kbps *= 3;
- if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420)
- kbps /= 2;
- else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422)
- kbps = kbps * 2 / 3;
- }
-
- return kbps;
-
-}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c
deleted file mode 100644
index 60539b1f2a80..000000000000
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c
+++ /dev/null
@@ -1,780 +0,0 @@
-/*
- * Copyright 2012-15 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#include <linux/slab.h>
-
-#include "dm_services.h"
-#include "dm_helpers.h"
-#include "gpio_service_interface.h"
-#include "include/ddc_service_types.h"
-#include "include/grph_object_id.h"
-#include "include/dpcd_defs.h"
-#include "include/logger_interface.h"
-#include "include/vector.h"
-#include "core_types.h"
-#include "dc_link_ddc.h"
-#include "dce/dce_aux.h"
-#include "dmub/inc/dmub_cmd.h"
-
-#define DC_LOGGER_INIT(logger)
-
-static const uint8_t DP_VGA_DONGLE_BRANCH_DEV_NAME[] = "DpVga";
-/* DP to Dual link DVI converter */
-static const uint8_t DP_DVI_CONVERTER_ID_4[] = "m2DVIa";
-static const uint8_t DP_DVI_CONVERTER_ID_5[] = "3393N2";
-
-#define AUX_POWER_UP_WA_DELAY 500
-#define I2C_OVER_AUX_DEFER_WA_DELAY 70
-#define DPVGA_DONGLE_AUX_DEFER_WA_DELAY 40
-#define I2C_OVER_AUX_DEFER_WA_DELAY_1MS 1
-
-/* CV smart dongle slave address for retrieving supported HDTV modes*/
-#define CV_SMART_DONGLE_ADDRESS 0x20
-/* DVI-HDMI dongle slave address for retrieving dongle signature*/
-#define DVI_HDMI_DONGLE_ADDRESS 0x68
-struct dvi_hdmi_dongle_signature_data {
- int8_t vendor[3];/* "AMD" */
- uint8_t version[2];
- uint8_t size;
- int8_t id[11];/* "6140063500G"*/
-};
-/* DP-HDMI dongle slave address for retrieving dongle signature*/
-#define DP_HDMI_DONGLE_ADDRESS 0x40
-static const uint8_t dp_hdmi_dongle_signature_str[] = "DP-HDMI ADAPTOR";
-#define DP_HDMI_DONGLE_SIGNATURE_EOT 0x04
-
-struct dp_hdmi_dongle_signature_data {
- int8_t id[15];/* "DP-HDMI ADAPTOR"*/
- uint8_t eot;/* end of transmition '\x4' */
-};
-
-/* SCDC Address defines (HDMI 2.0)*/
-#define HDMI_SCDC_WRITE_UPDATE_0_ARRAY 3
-#define HDMI_SCDC_ADDRESS 0x54
-#define HDMI_SCDC_SINK_VERSION 0x01
-#define HDMI_SCDC_SOURCE_VERSION 0x02
-#define HDMI_SCDC_UPDATE_0 0x10
-#define HDMI_SCDC_TMDS_CONFIG 0x20
-#define HDMI_SCDC_SCRAMBLER_STATUS 0x21
-#define HDMI_SCDC_CONFIG_0 0x30
-#define HDMI_SCDC_STATUS_FLAGS 0x40
-#define HDMI_SCDC_ERR_DETECT 0x50
-#define HDMI_SCDC_TEST_CONFIG 0xC0
-
-union hdmi_scdc_update_read_data {
- uint8_t byte[2];
- struct {
- uint8_t STATUS_UPDATE:1;
- uint8_t CED_UPDATE:1;
- uint8_t RR_TEST:1;
- uint8_t RESERVED:5;
- uint8_t RESERVED2:8;
- } fields;
-};
-
-union hdmi_scdc_status_flags_data {
- uint8_t byte[2];
- struct {
- uint8_t CLOCK_DETECTED:1;
- uint8_t CH0_LOCKED:1;
- uint8_t CH1_LOCKED:1;
- uint8_t CH2_LOCKED:1;
- uint8_t RESERVED:4;
- uint8_t RESERVED2:8;
- uint8_t RESERVED3:8;
-
- } fields;
-};
-
-union hdmi_scdc_ced_data {
- uint8_t byte[7];
- struct {
- uint8_t CH0_8LOW:8;
- uint8_t CH0_7HIGH:7;
- uint8_t CH0_VALID:1;
- uint8_t CH1_8LOW:8;
- uint8_t CH1_7HIGH:7;
- uint8_t CH1_VALID:1;
- uint8_t CH2_8LOW:8;
- uint8_t CH2_7HIGH:7;
- uint8_t CH2_VALID:1;
- uint8_t CHECKSUM:8;
- uint8_t RESERVED:8;
- uint8_t RESERVED2:8;
- uint8_t RESERVED3:8;
- uint8_t RESERVED4:4;
- } fields;
-};
-
-struct i2c_payloads {
- struct vector payloads;
-};
-
-struct aux_payloads {
- struct vector payloads;
-};
-
-static bool dal_ddc_i2c_payloads_create(
- struct dc_context *ctx,
- struct i2c_payloads *payloads,
- uint32_t count)
-{
- if (dal_vector_construct(
- &payloads->payloads, ctx, count, sizeof(struct i2c_payload)))
- return true;
-
- return false;
-}
-
-static struct i2c_payload *dal_ddc_i2c_payloads_get(struct i2c_payloads *p)
-{
- return (struct i2c_payload *)p->payloads.container;
-}
-
-static uint32_t dal_ddc_i2c_payloads_get_count(struct i2c_payloads *p)
-{
- return p->payloads.count;
-}
-
-#define DDC_MIN(a, b) (((a) < (b)) ? (a) : (b))
-
-void dal_ddc_i2c_payloads_add(
- struct i2c_payloads *payloads,
- uint32_t address,
- uint32_t len,
- uint8_t *data,
- bool write)
-{
- uint32_t payload_size = EDID_SEGMENT_SIZE;
- uint32_t pos;
-
- for (pos = 0; pos < len; pos += payload_size) {
- struct i2c_payload payload = {
- .write = write,
- .address = address,
- .length = DDC_MIN(payload_size, len - pos),
- .data = data + pos };
- dal_vector_append(&payloads->payloads, &payload);
- }
-
-}
-
-static void ddc_service_construct(
- struct ddc_service *ddc_service,
- struct ddc_service_init_data *init_data)
-{
- enum connector_id connector_id =
- dal_graphics_object_id_get_connector_id(init_data->id);
-
- struct gpio_service *gpio_service = init_data->ctx->gpio_service;
- struct graphics_object_i2c_info i2c_info;
- struct gpio_ddc_hw_info hw_info;
- struct dc_bios *dcb = init_data->ctx->dc_bios;
-
- ddc_service->link = init_data->link;
- ddc_service->ctx = init_data->ctx;
-
- if (init_data->is_dpia_link ||
- dcb->funcs->get_i2c_info(dcb, init_data->id, &i2c_info) != BP_RESULT_OK) {
- ddc_service->ddc_pin = NULL;
- } else {
- DC_LOGGER_INIT(ddc_service->ctx->logger);
- DC_LOG_DC("BIOS object table - i2c_line: %d", i2c_info.i2c_line);
- DC_LOG_DC("BIOS object table - i2c_engine_id: %d", i2c_info.i2c_engine_id);
-
- hw_info.ddc_channel = i2c_info.i2c_line;
- if (ddc_service->link != NULL)
- hw_info.hw_supported = i2c_info.i2c_hw_assist;
- else
- hw_info.hw_supported = false;
-
- ddc_service->ddc_pin = dal_gpio_create_ddc(
- gpio_service,
- i2c_info.gpio_info.clk_a_register_index,
- 1 << i2c_info.gpio_info.clk_a_shift,
- &hw_info);
- }
-
- ddc_service->flags.EDID_QUERY_DONE_ONCE = false;
- ddc_service->flags.FORCE_READ_REPEATED_START = false;
- ddc_service->flags.EDID_STRESS_READ = false;
-
- ddc_service->flags.IS_INTERNAL_DISPLAY =
- connector_id == CONNECTOR_ID_EDP ||
- connector_id == CONNECTOR_ID_LVDS;
-
- ddc_service->wa.raw = 0;
-}
-
-struct ddc_service *dal_ddc_service_create(
- struct ddc_service_init_data *init_data)
-{
- struct ddc_service *ddc_service;
-
- ddc_service = kzalloc(sizeof(struct ddc_service), GFP_KERNEL);
-
- if (!ddc_service)
- return NULL;
-
- ddc_service_construct(ddc_service, init_data);
- return ddc_service;
-}
-
-static void ddc_service_destruct(struct ddc_service *ddc)
-{
- if (ddc->ddc_pin)
- dal_gpio_destroy_ddc(&ddc->ddc_pin);
-}
-
-void dal_ddc_service_destroy(struct ddc_service **ddc)
-{
- if (!ddc || !*ddc) {
- BREAK_TO_DEBUGGER();
- return;
- }
- ddc_service_destruct(*ddc);
- kfree(*ddc);
- *ddc = NULL;
-}
-
-enum ddc_service_type dal_ddc_service_get_type(struct ddc_service *ddc)
-{
- return DDC_SERVICE_TYPE_CONNECTOR;
-}
-
-void dal_ddc_service_set_transaction_type(
- struct ddc_service *ddc,
- enum ddc_transaction_type type)
-{
- ddc->transaction_type = type;
-}
-
-bool dal_ddc_service_is_in_aux_transaction_mode(struct ddc_service *ddc)
-{
- switch (ddc->transaction_type) {
- case DDC_TRANSACTION_TYPE_I2C_OVER_AUX:
- case DDC_TRANSACTION_TYPE_I2C_OVER_AUX_WITH_DEFER:
- case DDC_TRANSACTION_TYPE_I2C_OVER_AUX_RETRY_DEFER:
- return true;
- default:
- break;
- }
- return false;
-}
-
-void ddc_service_set_dongle_type(struct ddc_service *ddc,
- enum display_dongle_type dongle_type)
-{
- ddc->dongle_type = dongle_type;
-}
-
-static uint32_t defer_delay_converter_wa(
- struct ddc_service *ddc,
- uint32_t defer_delay)
-{
- struct dc_link *link = ddc->link;
-
- if (link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_VGA_CONVERTER &&
- link->dpcd_caps.branch_dev_id == DP_BRANCH_DEVICE_ID_0080E1 &&
- !memcmp(link->dpcd_caps.branch_dev_name,
- DP_VGA_DONGLE_BRANCH_DEV_NAME,
- sizeof(link->dpcd_caps.branch_dev_name)))
-
- return defer_delay > DPVGA_DONGLE_AUX_DEFER_WA_DELAY ?
- defer_delay : DPVGA_DONGLE_AUX_DEFER_WA_DELAY;
-
- if (link->dpcd_caps.branch_dev_id == DP_BRANCH_DEVICE_ID_0080E1 &&
- !memcmp(link->dpcd_caps.branch_dev_name,
- DP_DVI_CONVERTER_ID_4,
- sizeof(link->dpcd_caps.branch_dev_name)))
- return defer_delay > I2C_OVER_AUX_DEFER_WA_DELAY ?
- defer_delay : I2C_OVER_AUX_DEFER_WA_DELAY;
- if (link->dpcd_caps.branch_dev_id == DP_BRANCH_DEVICE_ID_006037 &&
- !memcmp(link->dpcd_caps.branch_dev_name,
- DP_DVI_CONVERTER_ID_5,
- sizeof(link->dpcd_caps.branch_dev_name)))
- return defer_delay > I2C_OVER_AUX_DEFER_WA_DELAY_1MS ?
- I2C_OVER_AUX_DEFER_WA_DELAY_1MS : defer_delay;
-
- return defer_delay;
-}
-
-#define DP_TRANSLATOR_DELAY 5
-
-uint32_t get_defer_delay(struct ddc_service *ddc)
-{
- uint32_t defer_delay = 0;
-
- switch (ddc->transaction_type) {
- case DDC_TRANSACTION_TYPE_I2C_OVER_AUX:
- if ((DISPLAY_DONGLE_DP_VGA_CONVERTER == ddc->dongle_type) ||
- (DISPLAY_DONGLE_DP_DVI_CONVERTER == ddc->dongle_type) ||
- (DISPLAY_DONGLE_DP_HDMI_CONVERTER ==
- ddc->dongle_type)) {
-
- defer_delay = DP_TRANSLATOR_DELAY;
-
- defer_delay =
- defer_delay_converter_wa(ddc, defer_delay);
-
- } else /*sink has a delay different from an Active Converter*/
- defer_delay = 0;
- break;
- case DDC_TRANSACTION_TYPE_I2C_OVER_AUX_WITH_DEFER:
- defer_delay = DP_TRANSLATOR_DELAY;
- break;
- default:
- break;
- }
- return defer_delay;
-}
-
-static bool i2c_read(
- struct ddc_service *ddc,
- uint32_t address,
- uint8_t *buffer,
- uint32_t len)
-{
- uint8_t offs_data = 0;
- struct i2c_payload payloads[2] = {
- {
- .write = true,
- .address = address,
- .length = 1,
- .data = &offs_data },
- {
- .write = false,
- .address = address,
- .length = len,
- .data = buffer } };
-
- struct i2c_command command = {
- .payloads = payloads,
- .number_of_payloads = 2,
- .engine = DDC_I2C_COMMAND_ENGINE,
- .speed = ddc->ctx->dc->caps.i2c_speed_in_khz };
-
- return dm_helpers_submit_i2c(
- ddc->ctx,
- ddc->link,
- &command);
-}
-
-void dal_ddc_service_i2c_query_dp_dual_mode_adaptor(
- struct ddc_service *ddc,
- struct display_sink_capability *sink_cap)
-{
- uint8_t i;
- bool is_valid_hdmi_signature;
- enum display_dongle_type *dongle = &sink_cap->dongle_type;
- uint8_t type2_dongle_buf[DP_ADAPTOR_TYPE2_SIZE];
- bool is_type2_dongle = false;
- int retry_count = 2;
- struct dp_hdmi_dongle_signature_data *dongle_signature;
-
- /* Assume we have no valid DP passive dongle connected */
- *dongle = DISPLAY_DONGLE_NONE;
- sink_cap->max_hdmi_pixel_clock = DP_ADAPTOR_HDMI_SAFE_MAX_TMDS_CLK;
-
- /* Read DP-HDMI dongle I2c (no response interpreted as DP-DVI dongle)*/
- if (!i2c_read(
- ddc,
- DP_HDMI_DONGLE_ADDRESS,
- type2_dongle_buf,
- sizeof(type2_dongle_buf))) {
- /* Passive HDMI dongles can sometimes fail here without retrying*/
- while (retry_count > 0) {
- if (i2c_read(ddc,
- DP_HDMI_DONGLE_ADDRESS,
- type2_dongle_buf,
- sizeof(type2_dongle_buf)))
- break;
- retry_count--;
- }
- if (retry_count == 0) {
- *dongle = DISPLAY_DONGLE_DP_DVI_DONGLE;
- sink_cap->max_hdmi_pixel_clock = DP_ADAPTOR_DVI_MAX_TMDS_CLK;
-
- CONN_DATA_DETECT(ddc->link, type2_dongle_buf, sizeof(type2_dongle_buf),
- "DP-DVI passive dongle %dMhz: ",
- DP_ADAPTOR_DVI_MAX_TMDS_CLK / 1000);
- return;
- }
- }
-
- /* Check if Type 2 dongle.*/
- if (type2_dongle_buf[DP_ADAPTOR_TYPE2_REG_ID] == DP_ADAPTOR_TYPE2_ID)
- is_type2_dongle = true;
-
- dongle_signature =
- (struct dp_hdmi_dongle_signature_data *)type2_dongle_buf;
-
- is_valid_hdmi_signature = true;
-
- /* Check EOT */
- if (dongle_signature->eot != DP_HDMI_DONGLE_SIGNATURE_EOT) {
- is_valid_hdmi_signature = false;
- }
-
- /* Check signature */
- for (i = 0; i < sizeof(dongle_signature->id); ++i) {
- /* If its not the right signature,
- * skip mismatch in subversion byte.*/
- if (dongle_signature->id[i] !=
- dp_hdmi_dongle_signature_str[i] && i != 3) {
-
- if (is_type2_dongle) {
- is_valid_hdmi_signature = false;
- break;
- }
-
- }
- }
-
- if (is_type2_dongle) {
- uint32_t max_tmds_clk =
- type2_dongle_buf[DP_ADAPTOR_TYPE2_REG_MAX_TMDS_CLK];
-
- max_tmds_clk = max_tmds_clk * 2 + max_tmds_clk / 2;
-
- if (0 == max_tmds_clk ||
- max_tmds_clk < DP_ADAPTOR_TYPE2_MIN_TMDS_CLK ||
- max_tmds_clk > DP_ADAPTOR_TYPE2_MAX_TMDS_CLK) {
- *dongle = DISPLAY_DONGLE_DP_DVI_DONGLE;
-
- CONN_DATA_DETECT(ddc->link, type2_dongle_buf,
- sizeof(type2_dongle_buf),
- "DP-DVI passive dongle %dMhz: ",
- DP_ADAPTOR_DVI_MAX_TMDS_CLK / 1000);
- } else {
- if (is_valid_hdmi_signature == true) {
- *dongle = DISPLAY_DONGLE_DP_HDMI_DONGLE;
-
- CONN_DATA_DETECT(ddc->link, type2_dongle_buf,
- sizeof(type2_dongle_buf),
- "Type 2 DP-HDMI passive dongle %dMhz: ",
- max_tmds_clk);
- } else {
- *dongle = DISPLAY_DONGLE_DP_HDMI_MISMATCHED_DONGLE;
-
- CONN_DATA_DETECT(ddc->link, type2_dongle_buf,
- sizeof(type2_dongle_buf),
- "Type 2 DP-HDMI passive dongle (no signature) %dMhz: ",
- max_tmds_clk);
-
- }
-
- /* Multiply by 1000 to convert to kHz. */
- sink_cap->max_hdmi_pixel_clock =
- max_tmds_clk * 1000;
- }
-
- } else {
- if (is_valid_hdmi_signature == true) {
- *dongle = DISPLAY_DONGLE_DP_HDMI_DONGLE;
-
- CONN_DATA_DETECT(ddc->link, type2_dongle_buf,
- sizeof(type2_dongle_buf),
- "Type 1 DP-HDMI passive dongle %dMhz: ",
- sink_cap->max_hdmi_pixel_clock / 1000);
- } else {
- *dongle = DISPLAY_DONGLE_DP_HDMI_MISMATCHED_DONGLE;
-
- CONN_DATA_DETECT(ddc->link, type2_dongle_buf,
- sizeof(type2_dongle_buf),
- "Type 1 DP-HDMI passive dongle (no signature) %dMhz: ",
- sink_cap->max_hdmi_pixel_clock / 1000);
- }
- }
-
- return;
-}
-
-enum {
- DP_SINK_CAP_SIZE =
- DP_EDP_CONFIGURATION_CAP - DP_DPCD_REV + 1
-};
-
-bool dal_ddc_service_query_ddc_data(
- struct ddc_service *ddc,
- uint32_t address,
- uint8_t *write_buf,
- uint32_t write_size,
- uint8_t *read_buf,
- uint32_t read_size)
-{
- bool success = true;
- uint32_t payload_size =
- dal_ddc_service_is_in_aux_transaction_mode(ddc) ?
- DEFAULT_AUX_MAX_DATA_SIZE : EDID_SEGMENT_SIZE;
-
- uint32_t write_payloads =
- (write_size + payload_size - 1) / payload_size;
-
- uint32_t read_payloads =
- (read_size + payload_size - 1) / payload_size;
-
- uint32_t payloads_num = write_payloads + read_payloads;
-
-
- if (write_size > EDID_SEGMENT_SIZE || read_size > EDID_SEGMENT_SIZE)
- return false;
-
- if (!payloads_num)
- return false;
-
- /*TODO: len of payload data for i2c and aux is uint8!!!!,
- * but we want to read 256 over i2c!!!!*/
- if (dal_ddc_service_is_in_aux_transaction_mode(ddc)) {
- struct aux_payload payload;
-
- payload.i2c_over_aux = true;
- payload.address = address;
- payload.reply = NULL;
- payload.defer_delay = get_defer_delay(ddc);
- payload.write_status_update = false;
-
- if (write_size != 0) {
- payload.write = true;
- /* should not set mot (middle of transaction) to 0
- * if there are pending read payloads
- */
- payload.mot = !(read_size == 0);
- payload.length = write_size;
- payload.data = write_buf;
-
- success = dal_ddc_submit_aux_command(ddc, &payload);
- }
-
- if (read_size != 0 && success) {
- payload.write = false;
- /* should set mot (middle of transaction) to 0
- * since it is the last payload to send
- */
- payload.mot = false;
- payload.length = read_size;
- payload.data = read_buf;
-
- success = dal_ddc_submit_aux_command(ddc, &payload);
- }
- } else {
- struct i2c_command command = {0};
- struct i2c_payloads payloads;
-
- if (!dal_ddc_i2c_payloads_create(ddc->ctx, &payloads, payloads_num))
- return false;
-
- command.payloads = dal_ddc_i2c_payloads_get(&payloads);
- command.number_of_payloads = 0;
- command.engine = DDC_I2C_COMMAND_ENGINE;
- command.speed = ddc->ctx->dc->caps.i2c_speed_in_khz;
-
- dal_ddc_i2c_payloads_add(
- &payloads, address, write_size, write_buf, true);
-
- dal_ddc_i2c_payloads_add(
- &payloads, address, read_size, read_buf, false);
-
- command.number_of_payloads =
- dal_ddc_i2c_payloads_get_count(&payloads);
-
- success = dm_helpers_submit_i2c(
- ddc->ctx,
- ddc->link,
- &command);
-
- dal_vector_destruct(&payloads.payloads);
- }
-
- return success;
-}
-
-bool dal_ddc_submit_aux_command(struct ddc_service *ddc,
- struct aux_payload *payload)
-{
- uint32_t retrieved = 0;
- bool ret = false;
-
- if (!ddc)
- return false;
-
- if (!payload)
- return false;
-
- do {
- struct aux_payload current_payload;
- bool is_end_of_payload = (retrieved + DEFAULT_AUX_MAX_DATA_SIZE) >=
- payload->length ? true : false;
- uint32_t payload_length = is_end_of_payload ?
- payload->length - retrieved : DEFAULT_AUX_MAX_DATA_SIZE;
-
- current_payload.address = payload->address;
- current_payload.data = &payload->data[retrieved];
- current_payload.defer_delay = payload->defer_delay;
- current_payload.i2c_over_aux = payload->i2c_over_aux;
- current_payload.length = payload_length;
- /* set mot (middle of transaction) to false if it is the last payload */
- current_payload.mot = is_end_of_payload ? payload->mot:true;
- current_payload.write_status_update = false;
- current_payload.reply = payload->reply;
- current_payload.write = payload->write;
-
- ret = dc_link_aux_transfer_with_retries(ddc, &current_payload);
-
- retrieved += payload_length;
- } while (retrieved < payload->length && ret == true);
-
- return ret;
-}
-
-/* dc_link_aux_transfer_raw() - Attempt to transfer
- * the given aux payload. This function does not perform
- * retries or handle error states. The reply is returned
- * in the payload->reply and the result through
- * *operation_result. Returns the number of bytes transferred,
- * or -1 on a failure.
- */
-int dc_link_aux_transfer_raw(struct ddc_service *ddc,
- struct aux_payload *payload,
- enum aux_return_code_type *operation_result)
-{
- if (ddc->ctx->dc->debug.enable_dmub_aux_for_legacy_ddc ||
- !ddc->ddc_pin) {
- return dce_aux_transfer_dmub_raw(ddc, payload, operation_result);
- } else {
- return dce_aux_transfer_raw(ddc, payload, operation_result);
- }
-}
-
-/* dc_link_aux_transfer_with_retries() - Attempt to submit an
- * aux payload, retrying on timeouts, defers, and busy states
- * as outlined in the DP spec. Returns true if the request
- * was successful.
- *
- * Unless you want to implement your own retry semantics, this
- * is probably the one you want.
- */
-bool dc_link_aux_transfer_with_retries(struct ddc_service *ddc,
- struct aux_payload *payload)
-{
- return dce_aux_transfer_with_retries(ddc, payload);
-}
-
-
-bool dc_link_aux_try_to_configure_timeout(struct ddc_service *ddc,
- uint32_t timeout)
-{
- bool result = false;
- struct ddc *ddc_pin = ddc->ddc_pin;
-
- /* Do not try to access nonexistent DDC pin. */
- if (ddc->link->ep_type != DISPLAY_ENDPOINT_PHY)
- return true;
-
- if (ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en]->funcs->configure_timeout) {
- ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en]->funcs->configure_timeout(ddc, timeout);
- result = true;
- }
- return result;
-}
-
-/*test only function*/
-void dal_ddc_service_set_ddc_pin(
- struct ddc_service *ddc_service,
- struct ddc *ddc)
-{
- ddc_service->ddc_pin = ddc;
-}
-
-struct ddc *dal_ddc_service_get_ddc_pin(struct ddc_service *ddc_service)
-{
- return ddc_service->ddc_pin;
-}
-
-void dal_ddc_service_write_scdc_data(struct ddc_service *ddc_service,
- uint32_t pix_clk,
- bool lte_340_scramble)
-{
- bool over_340_mhz = pix_clk > 340000 ? 1 : 0;
- uint8_t slave_address = HDMI_SCDC_ADDRESS;
- uint8_t offset = HDMI_SCDC_SINK_VERSION;
- uint8_t sink_version = 0;
- uint8_t write_buffer[2] = {0};
- /*Lower than 340 Scramble bit from SCDC caps*/
-
- if (ddc_service->link->local_sink &&
- ddc_service->link->local_sink->edid_caps.panel_patch.skip_scdc_overwrite)
- return;
-
- dal_ddc_service_query_ddc_data(ddc_service, slave_address, &offset,
- sizeof(offset), &sink_version, sizeof(sink_version));
- if (sink_version == 1) {
- /*Source Version = 1*/
- write_buffer[0] = HDMI_SCDC_SOURCE_VERSION;
- write_buffer[1] = 1;
- dal_ddc_service_query_ddc_data(ddc_service, slave_address,
- write_buffer, sizeof(write_buffer), NULL, 0);
- /*Read Request from SCDC caps*/
- }
- write_buffer[0] = HDMI_SCDC_TMDS_CONFIG;
-
- if (over_340_mhz) {
- write_buffer[1] = 3;
- } else if (lte_340_scramble) {
- write_buffer[1] = 1;
- } else {
- write_buffer[1] = 0;
- }
- dal_ddc_service_query_ddc_data(ddc_service, slave_address, write_buffer,
- sizeof(write_buffer), NULL, 0);
-}
-
-void dal_ddc_service_read_scdc_data(struct ddc_service *ddc_service)
-{
- uint8_t slave_address = HDMI_SCDC_ADDRESS;
- uint8_t offset = HDMI_SCDC_TMDS_CONFIG;
- uint8_t tmds_config = 0;
-
- if (ddc_service->link->local_sink &&
- ddc_service->link->local_sink->edid_caps.panel_patch.skip_scdc_overwrite)
- return;
-
- dal_ddc_service_query_ddc_data(ddc_service, slave_address, &offset,
- sizeof(offset), &tmds_config, sizeof(tmds_config));
- if (tmds_config & 0x1) {
- union hdmi_scdc_status_flags_data status_data = {0};
- uint8_t scramble_status = 0;
-
- offset = HDMI_SCDC_SCRAMBLER_STATUS;
- dal_ddc_service_query_ddc_data(ddc_service, slave_address,
- &offset, sizeof(offset), &scramble_status,
- sizeof(scramble_status));
- offset = HDMI_SCDC_STATUS_FLAGS;
- dal_ddc_service_query_ddc_data(ddc_service, slave_address,
- &offset, sizeof(offset), status_data.byte,
- sizeof(status_data.byte));
- }
-}
-
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
deleted file mode 100644
index 13bc69d6b679..000000000000
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ /dev/null
@@ -1,6128 +0,0 @@
-/*
- * Copyright 2015 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- */
-#include "dm_services.h"
-#include "dc.h"
-#include "dc_link_dp.h"
-#include "dm_helpers.h"
-#include "opp.h"
-#include "dsc.h"
-#include "resource.h"
-
-#include "inc/core_types.h"
-#include "link_hwss.h"
-#include "dc_link_ddc.h"
-#include "core_status.h"
-#include "dpcd_defs.h"
-#include "dc_dmub_srv.h"
-#include "dce/dmub_hw_lock_mgr.h"
-#include "inc/dc_link_dpia.h"
-#include "inc/link_enc_cfg.h"
-
-/*Travis*/
-static const uint8_t DP_VGA_LVDS_CONVERTER_ID_2[] = "sivarT";
-/*Nutmeg*/
-static const uint8_t DP_VGA_LVDS_CONVERTER_ID_3[] = "dnomlA";
-
-#define DC_LOGGER \
- link->ctx->logger
-#define DC_TRACE_LEVEL_MESSAGE(...) /* do nothing */
-
-#include "link_dpcd.h"
-
- /* maximum pre emphasis level allowed for each voltage swing level*/
- static const enum dc_pre_emphasis
- voltage_swing_to_pre_emphasis[] = { PRE_EMPHASIS_LEVEL3,
- PRE_EMPHASIS_LEVEL2,
- PRE_EMPHASIS_LEVEL1,
- PRE_EMPHASIS_DISABLED };
-
-enum {
- POST_LT_ADJ_REQ_LIMIT = 6,
- POST_LT_ADJ_REQ_TIMEOUT = 200
-};
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-struct dp_lt_fallback_entry {
- enum dc_lane_count lane_count;
- enum dc_link_rate link_rate;
-};
-
-static const struct dp_lt_fallback_entry dp_lt_fallbacks[] = {
- /* This link training fallback array is ordered by
- * link bandwidth from highest to lowest.
- * DP specs makes it a normative policy to always
- * choose the next highest link bandwidth during
- * link training fallback.
- */
- {LANE_COUNT_FOUR, LINK_RATE_UHBR20},
- {LANE_COUNT_FOUR, LINK_RATE_UHBR13_5},
- {LANE_COUNT_TWO, LINK_RATE_UHBR20},
- {LANE_COUNT_FOUR, LINK_RATE_UHBR10},
- {LANE_COUNT_TWO, LINK_RATE_UHBR13_5},
- {LANE_COUNT_FOUR, LINK_RATE_HIGH3},
- {LANE_COUNT_ONE, LINK_RATE_UHBR20},
- {LANE_COUNT_TWO, LINK_RATE_UHBR10},
- {LANE_COUNT_FOUR, LINK_RATE_HIGH2},
- {LANE_COUNT_ONE, LINK_RATE_UHBR13_5},
- {LANE_COUNT_TWO, LINK_RATE_HIGH3},
- {LANE_COUNT_ONE, LINK_RATE_UHBR10},
- {LANE_COUNT_TWO, LINK_RATE_HIGH2},
- {LANE_COUNT_FOUR, LINK_RATE_HIGH},
- {LANE_COUNT_ONE, LINK_RATE_HIGH3},
- {LANE_COUNT_FOUR, LINK_RATE_LOW},
- {LANE_COUNT_ONE, LINK_RATE_HIGH2},
- {LANE_COUNT_TWO, LINK_RATE_HIGH},
- {LANE_COUNT_TWO, LINK_RATE_LOW},
- {LANE_COUNT_ONE, LINK_RATE_HIGH},
- {LANE_COUNT_ONE, LINK_RATE_LOW},
-};
-#endif
-
-static bool decide_fallback_link_setting(
- struct dc_link_settings initial_link_settings,
- struct dc_link_settings *current_link_setting,
- enum link_training_result training_result);
-static struct dc_link_settings get_common_supported_link_settings(
- struct dc_link_settings link_setting_a,
- struct dc_link_settings link_setting_b);
-static void maximize_lane_settings(const struct link_training_settings *lt_settings,
- struct dc_lane_settings lane_settings[LANE_COUNT_DP_MAX]);
-static void override_lane_settings(const struct link_training_settings *lt_settings,
- struct dc_lane_settings lane_settings[LANE_COUNT_DP_MAX]);
-
-static uint32_t get_cr_training_aux_rd_interval(struct dc_link *link,
- const struct dc_link_settings *link_settings)
-{
- union training_aux_rd_interval training_rd_interval;
- uint32_t wait_in_micro_secs = 100;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- memset(&training_rd_interval, 0, sizeof(training_rd_interval));
- if (dp_get_link_encoding_format(link_settings) == DP_8b_10b_ENCODING &&
- link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) {
- core_link_read_dpcd(
- link,
- DP_TRAINING_AUX_RD_INTERVAL,
- (uint8_t *)&training_rd_interval,
- sizeof(training_rd_interval));
- if (training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL)
- wait_in_micro_secs = training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL * 4000;
- }
-#else
- core_link_read_dpcd(
- link,
- DP_TRAINING_AUX_RD_INTERVAL,
- (uint8_t *)&training_rd_interval,
- sizeof(training_rd_interval));
- if (training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL)
- wait_in_micro_secs = training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL * 4000;
-#endif
- return wait_in_micro_secs;
-}
-
-static uint32_t get_eq_training_aux_rd_interval(
- struct dc_link *link,
- const struct dc_link_settings *link_settings)
-{
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- union training_aux_rd_interval training_rd_interval;
-
- memset(&training_rd_interval, 0, sizeof(training_rd_interval));
- if (dp_get_link_encoding_format(link_settings) == DP_128b_132b_ENCODING) {
- core_link_read_dpcd(
- link,
- DP_128b_132b_TRAINING_AUX_RD_INTERVAL,
- (uint8_t *)&training_rd_interval,
- sizeof(training_rd_interval));
- } else if (dp_get_link_encoding_format(link_settings) == DP_8b_10b_ENCODING &&
- link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) {
- core_link_read_dpcd(
- link,
- DP_TRAINING_AUX_RD_INTERVAL,
- (uint8_t *)&training_rd_interval,
- sizeof(training_rd_interval));
- }
-
- switch (training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL) {
- case 0: return 400;
- case 1: return 4000;
- case 2: return 8000;
- case 3: return 12000;
- case 4: return 16000;
- case 5: return 32000;
- case 6: return 64000;
- default: return 400;
- }
-#else
- union training_aux_rd_interval training_rd_interval;
- uint32_t wait_in_micro_secs = 400;
-
- memset(&training_rd_interval, 0, sizeof(training_rd_interval));
- /* overwrite the delay if rev > 1.1*/
- if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) {
- /* DP 1.2 or later - retrieve delay through
- * "DPCD_ADDR_TRAINING_AUX_RD_INTERVAL" register */
- core_link_read_dpcd(
- link,
- DP_TRAINING_AUX_RD_INTERVAL,
- (uint8_t *)&training_rd_interval,
- sizeof(training_rd_interval));
-
- if (training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL)
- wait_in_micro_secs = training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL * 4000;
- }
-
- return wait_in_micro_secs;
-#endif
-}
-
-void dp_wait_for_training_aux_rd_interval(
- struct dc_link *link,
- uint32_t wait_in_micro_secs)
-{
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (wait_in_micro_secs > 16000)
- msleep(wait_in_micro_secs/1000);
- else
- udelay(wait_in_micro_secs);
-#else
- udelay(wait_in_micro_secs);
-#endif
-
- DC_LOG_HW_LINK_TRAINING("%s:\n wait = %d\n",
- __func__,
- wait_in_micro_secs);
-}
-
-enum dpcd_training_patterns
- dc_dp_training_pattern_to_dpcd_training_pattern(
- struct dc_link *link,
- enum dc_dp_training_pattern pattern)
-{
- enum dpcd_training_patterns dpcd_tr_pattern =
- DPCD_TRAINING_PATTERN_VIDEOIDLE;
-
- switch (pattern) {
- case DP_TRAINING_PATTERN_SEQUENCE_1:
- dpcd_tr_pattern = DPCD_TRAINING_PATTERN_1;
- break;
- case DP_TRAINING_PATTERN_SEQUENCE_2:
- dpcd_tr_pattern = DPCD_TRAINING_PATTERN_2;
- break;
- case DP_TRAINING_PATTERN_SEQUENCE_3:
- dpcd_tr_pattern = DPCD_TRAINING_PATTERN_3;
- break;
- case DP_TRAINING_PATTERN_SEQUENCE_4:
- dpcd_tr_pattern = DPCD_TRAINING_PATTERN_4;
- break;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- case DP_128b_132b_TPS1:
- dpcd_tr_pattern = DPCD_128b_132b_TPS1;
- break;
- case DP_128b_132b_TPS2:
- dpcd_tr_pattern = DPCD_128b_132b_TPS2;
- break;
- case DP_128b_132b_TPS2_CDS:
- dpcd_tr_pattern = DPCD_128b_132b_TPS2_CDS;
- break;
-#endif
- case DP_TRAINING_PATTERN_VIDEOIDLE:
- dpcd_tr_pattern = DPCD_TRAINING_PATTERN_VIDEOIDLE;
- break;
- default:
- ASSERT(0);
- DC_LOG_HW_LINK_TRAINING("%s: Invalid HW Training pattern: %d\n",
- __func__, pattern);
- break;
- }
-
- return dpcd_tr_pattern;
-}
-
-static void dpcd_set_training_pattern(
- struct dc_link *link,
- enum dc_dp_training_pattern training_pattern)
-{
- union dpcd_training_pattern dpcd_pattern = {0};
-
- dpcd_pattern.v1_4.TRAINING_PATTERN_SET =
- dc_dp_training_pattern_to_dpcd_training_pattern(
- link, training_pattern);
-
- core_link_write_dpcd(
- link,
- DP_TRAINING_PATTERN_SET,
- &dpcd_pattern.raw,
- 1);
-
- DC_LOG_HW_LINK_TRAINING("%s\n %x pattern = %x\n",
- __func__,
- DP_TRAINING_PATTERN_SET,
- dpcd_pattern.v1_4.TRAINING_PATTERN_SET);
-}
-
-static enum dc_dp_training_pattern decide_cr_training_pattern(
- const struct dc_link_settings *link_settings)
-{
- switch (dp_get_link_encoding_format(link_settings)) {
- case DP_8b_10b_ENCODING:
- default:
- return DP_TRAINING_PATTERN_SEQUENCE_1;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- case DP_128b_132b_ENCODING:
- return DP_128b_132b_TPS1;
-#endif
- }
-}
-
-static enum dc_dp_training_pattern decide_eq_training_pattern(struct dc_link *link,
- const struct dc_link_settings *link_settings)
-{
- struct link_encoder *link_enc;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- struct encoder_feature_support *enc_caps;
- struct dpcd_caps *rx_caps = &link->dpcd_caps;
- enum dc_dp_training_pattern pattern = DP_TRAINING_PATTERN_SEQUENCE_2;
-
- /* Access link encoder capability based on whether it is statically
- * or dynamically assigned to a link.
- */
- if (link->is_dig_mapping_flexible &&
- link->dc->res_pool->funcs->link_encs_assign)
- link_enc = link_enc_cfg_get_link_enc_used_by_link(link->ctx->dc, link);
- else
- link_enc = link->link_enc;
- ASSERT(link_enc);
- enc_caps = &link_enc->features;
-
- switch (dp_get_link_encoding_format(link_settings)) {
- case DP_8b_10b_ENCODING:
- if (enc_caps->flags.bits.IS_TPS4_CAPABLE &&
- rx_caps->max_down_spread.bits.TPS4_SUPPORTED)
- pattern = DP_TRAINING_PATTERN_SEQUENCE_4;
- else if (enc_caps->flags.bits.IS_TPS3_CAPABLE &&
- rx_caps->max_ln_count.bits.TPS3_SUPPORTED)
- pattern = DP_TRAINING_PATTERN_SEQUENCE_3;
- else
- pattern = DP_TRAINING_PATTERN_SEQUENCE_2;
- break;
- case DP_128b_132b_ENCODING:
- pattern = DP_128b_132b_TPS2;
- break;
- default:
- pattern = DP_TRAINING_PATTERN_SEQUENCE_2;
- break;
- }
- return pattern;
-#else
- enum dc_dp_training_pattern highest_tp = DP_TRAINING_PATTERN_SEQUENCE_2;
- struct encoder_feature_support *features;
- struct dpcd_caps *dpcd_caps = &link->dpcd_caps;
-
- /* Access link encoder capability based on whether it is statically
- * or dynamically assigned to a link.
- */
- if (link->is_dig_mapping_flexible &&
- link->dc->res_pool->funcs->link_encs_assign)
- link_enc = link_enc_cfg_get_link_enc_used_by_link(link->ctx->dc, link);
- else
- link_enc = link->link_enc;
- ASSERT(link_enc);
- features = &link_enc->features;
-
- if (features->flags.bits.IS_TPS3_CAPABLE)
- highest_tp = DP_TRAINING_PATTERN_SEQUENCE_3;
-
- if (features->flags.bits.IS_TPS4_CAPABLE)
- highest_tp = DP_TRAINING_PATTERN_SEQUENCE_4;
-
- if (dpcd_caps->max_down_spread.bits.TPS4_SUPPORTED &&
- highest_tp >= DP_TRAINING_PATTERN_SEQUENCE_4)
- return DP_TRAINING_PATTERN_SEQUENCE_4;
-
- if (dpcd_caps->max_ln_count.bits.TPS3_SUPPORTED &&
- highest_tp >= DP_TRAINING_PATTERN_SEQUENCE_3)
- return DP_TRAINING_PATTERN_SEQUENCE_3;
-
- return DP_TRAINING_PATTERN_SEQUENCE_2;
-#endif
-}
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-static uint8_t get_dpcd_link_rate(const struct dc_link_settings *link_settings)
-{
- uint8_t link_rate = 0;
- enum dp_link_encoding encoding = dp_get_link_encoding_format(link_settings);
-
- if (encoding == DP_128b_132b_ENCODING)
- switch (link_settings->link_rate) {
- case LINK_RATE_UHBR10:
- link_rate = 0x1;
- break;
- case LINK_RATE_UHBR20:
- link_rate = 0x2;
- break;
- case LINK_RATE_UHBR13_5:
- link_rate = 0x4;
- break;
- default:
- link_rate = 0;
- break;
- }
- else if (encoding == DP_8b_10b_ENCODING)
- link_rate = (uint8_t) link_settings->link_rate;
- else
- link_rate = 0;
-
- return link_rate;
-}
-#endif
-
-enum dc_status dpcd_set_link_settings(
- struct dc_link *link,
- const struct link_training_settings *lt_settings)
-{
- uint8_t rate;
- enum dc_status status;
-
- union down_spread_ctrl downspread = {0};
- union lane_count_set lane_count_set = {0};
-
- downspread.raw = (uint8_t)
- (lt_settings->link_settings.link_spread);
-
- lane_count_set.bits.LANE_COUNT_SET =
- lt_settings->link_settings.lane_count;
-
- lane_count_set.bits.ENHANCED_FRAMING = lt_settings->enhanced_framing;
- lane_count_set.bits.POST_LT_ADJ_REQ_GRANTED = 0;
-
-
- if (link->ep_type == DISPLAY_ENDPOINT_PHY &&
- lt_settings->pattern_for_eq < DP_TRAINING_PATTERN_SEQUENCE_4) {
- lane_count_set.bits.POST_LT_ADJ_REQ_GRANTED =
- link->dpcd_caps.max_ln_count.bits.POST_LT_ADJ_REQ_SUPPORTED;
- }
-
- status = core_link_write_dpcd(link, DP_DOWNSPREAD_CTRL,
- &downspread.raw, sizeof(downspread));
-
- status = core_link_write_dpcd(link, DP_LANE_COUNT_SET,
- &lane_count_set.raw, 1);
-
- if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_14 &&
- lt_settings->link_settings.use_link_rate_set == true) {
- rate = 0;
- /* WA for some MUX chips that will power down with eDP and lose supported
- * link rate set for eDP 1.4. Source reads DPCD 0x010 again to ensure
- * MUX chip gets link rate set back before link training.
- */
- if (link->connector_signal == SIGNAL_TYPE_EDP) {
- uint8_t supported_link_rates[16];
-
- core_link_read_dpcd(link, DP_SUPPORTED_LINK_RATES,
- supported_link_rates, sizeof(supported_link_rates));
- }
- status = core_link_write_dpcd(link, DP_LINK_BW_SET, &rate, 1);
- status = core_link_write_dpcd(link, DP_LINK_RATE_SET,
- &lt_settings->link_settings.link_rate_set, 1);
- } else {
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- rate = get_dpcd_link_rate(&lt_settings->link_settings);
-#else
- rate = (uint8_t) (lt_settings->link_settings.link_rate);
-#endif
- status = core_link_write_dpcd(link, DP_LINK_BW_SET, &rate, 1);
- }
-
- if (rate) {
- DC_LOG_HW_LINK_TRAINING("%s\n %x rate = %x\n %x lane = %x framing = %x\n %x spread = %x\n",
- __func__,
- DP_LINK_BW_SET,
- lt_settings->link_settings.link_rate,
- DP_LANE_COUNT_SET,
- lt_settings->link_settings.lane_count,
- lt_settings->enhanced_framing,
- DP_DOWNSPREAD_CTRL,
- lt_settings->link_settings.link_spread);
- } else {
- DC_LOG_HW_LINK_TRAINING("%s\n %x rate set = %x\n %x lane = %x framing = %x\n %x spread = %x\n",
- __func__,
- DP_LINK_RATE_SET,
- lt_settings->link_settings.link_rate_set,
- DP_LANE_COUNT_SET,
- lt_settings->link_settings.lane_count,
- lt_settings->enhanced_framing,
- DP_DOWNSPREAD_CTRL,
- lt_settings->link_settings.link_spread);
- }
-
- return status;
-}
-
-uint8_t dc_dp_initialize_scrambling_data_symbols(
- struct dc_link *link,
- enum dc_dp_training_pattern pattern)
-{
- uint8_t disable_scrabled_data_symbols = 0;
-
- switch (pattern) {
- case DP_TRAINING_PATTERN_SEQUENCE_1:
- case DP_TRAINING_PATTERN_SEQUENCE_2:
- case DP_TRAINING_PATTERN_SEQUENCE_3:
- disable_scrabled_data_symbols = 1;
- break;
- case DP_TRAINING_PATTERN_SEQUENCE_4:
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- case DP_128b_132b_TPS1:
- case DP_128b_132b_TPS2:
-#endif
- disable_scrabled_data_symbols = 0;
- break;
- default:
- ASSERT(0);
- DC_LOG_HW_LINK_TRAINING("%s: Invalid HW Training pattern: %d\n",
- __func__, pattern);
- break;
- }
- return disable_scrabled_data_symbols;
-}
-
-static inline bool is_repeater(struct dc_link *link, uint32_t offset)
-{
- return (link->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) && (offset != 0);
-}
-
-static void dpcd_set_lt_pattern_and_lane_settings(
- struct dc_link *link,
- const struct link_training_settings *lt_settings,
- enum dc_dp_training_pattern pattern,
- uint32_t offset)
-{
- uint32_t dpcd_base_lt_offset;
-
- uint8_t dpcd_lt_buffer[5] = {0};
- union dpcd_training_pattern dpcd_pattern = { 0 };
- uint32_t size_in_bytes;
- bool edp_workaround = false; /* TODO link_prop.INTERNAL */
- dpcd_base_lt_offset = DP_TRAINING_PATTERN_SET;
-
- if (is_repeater(link, offset))
- dpcd_base_lt_offset = DP_TRAINING_PATTERN_SET_PHY_REPEATER1 +
- ((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (offset - 1));
-
- /*****************************************************************
- * DpcdAddress_TrainingPatternSet
- *****************************************************************/
- dpcd_pattern.v1_4.TRAINING_PATTERN_SET =
- dc_dp_training_pattern_to_dpcd_training_pattern(link, pattern);
-
- dpcd_pattern.v1_4.SCRAMBLING_DISABLE =
- dc_dp_initialize_scrambling_data_symbols(link, pattern);
-
- dpcd_lt_buffer[DP_TRAINING_PATTERN_SET - DP_TRAINING_PATTERN_SET]
- = dpcd_pattern.raw;
-
- if (is_repeater(link, offset)) {
- DC_LOG_HW_LINK_TRAINING("%s\n LTTPR Repeater ID: %d\n 0x%X pattern = %x\n",
- __func__,
- offset,
- dpcd_base_lt_offset,
- dpcd_pattern.v1_4.TRAINING_PATTERN_SET);
- } else {
- DC_LOG_HW_LINK_TRAINING("%s\n 0x%X pattern = %x\n",
- __func__,
- dpcd_base_lt_offset,
- dpcd_pattern.v1_4.TRAINING_PATTERN_SET);
- }
-
- /* concatenate everything into one buffer*/
- size_in_bytes = lt_settings->link_settings.lane_count *
- sizeof(lt_settings->dpcd_lane_settings[0]);
-
- // 0x00103 - 0x00102
- memmove(
- &dpcd_lt_buffer[DP_TRAINING_LANE0_SET - DP_TRAINING_PATTERN_SET],
- lt_settings->dpcd_lane_settings,
- size_in_bytes);
-
- if (is_repeater(link, offset)) {
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (dp_get_link_encoding_format(&lt_settings->link_settings) ==
- DP_128b_132b_ENCODING)
- DC_LOG_HW_LINK_TRAINING("%s:\n LTTPR Repeater ID: %d\n"
- " 0x%X TX_FFE_PRESET_VALUE = %x\n",
- __func__,
- offset,
- dpcd_base_lt_offset,
- lt_settings->dpcd_lane_settings[0].tx_ffe.PRESET_VALUE);
- else if (dp_get_link_encoding_format(&lt_settings->link_settings) ==
- DP_8b_10b_ENCODING)
-#endif
- DC_LOG_HW_LINK_TRAINING("%s:\n LTTPR Repeater ID: %d\n"
- " 0x%X VS set = %x PE set = %x max VS Reached = %x max PE Reached = %x\n",
- __func__,
- offset,
- dpcd_base_lt_offset,
- lt_settings->dpcd_lane_settings[0].bits.VOLTAGE_SWING_SET,
- lt_settings->dpcd_lane_settings[0].bits.PRE_EMPHASIS_SET,
- lt_settings->dpcd_lane_settings[0].bits.MAX_SWING_REACHED,
- lt_settings->dpcd_lane_settings[0].bits.MAX_PRE_EMPHASIS_REACHED);
- } else {
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (dp_get_link_encoding_format(&lt_settings->link_settings) ==
- DP_128b_132b_ENCODING)
- DC_LOG_HW_LINK_TRAINING("%s:\n 0x%X TX_FFE_PRESET_VALUE = %x\n",
- __func__,
- dpcd_base_lt_offset,
- lt_settings->dpcd_lane_settings[0].tx_ffe.PRESET_VALUE);
- else if (dp_get_link_encoding_format(&lt_settings->link_settings) ==
- DP_8b_10b_ENCODING)
-#endif
- DC_LOG_HW_LINK_TRAINING("%s:\n 0x%X VS set = %x PE set = %x max VS Reached = %x max PE Reached = %x\n",
- __func__,
- dpcd_base_lt_offset,
- lt_settings->dpcd_lane_settings[0].bits.VOLTAGE_SWING_SET,
- lt_settings->dpcd_lane_settings[0].bits.PRE_EMPHASIS_SET,
- lt_settings->dpcd_lane_settings[0].bits.MAX_SWING_REACHED,
- lt_settings->dpcd_lane_settings[0].bits.MAX_PRE_EMPHASIS_REACHED);
- }
- if (edp_workaround) {
- /* for eDP write in 2 parts because the 5-byte burst is
- * causing issues on some eDP panels (EPR#366724)
- */
- core_link_write_dpcd(
- link,
- DP_TRAINING_PATTERN_SET,
- &dpcd_pattern.raw,
- sizeof(dpcd_pattern.raw));
-
- core_link_write_dpcd(
- link,
- DP_TRAINING_LANE0_SET,
- (uint8_t *)(lt_settings->dpcd_lane_settings),
- size_in_bytes);
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- } else if (dp_get_link_encoding_format(&lt_settings->link_settings) ==
- DP_128b_132b_ENCODING) {
- core_link_write_dpcd(
- link,
- dpcd_base_lt_offset,
- dpcd_lt_buffer,
- sizeof(dpcd_lt_buffer));
-#endif
- } else
- /* write it all in (1 + number-of-lanes)-byte burst*/
- core_link_write_dpcd(
- link,
- dpcd_base_lt_offset,
- dpcd_lt_buffer,
- size_in_bytes + sizeof(dpcd_pattern.raw));
-}
-
-bool dp_is_cr_done(enum dc_lane_count ln_count,
- union lane_status *dpcd_lane_status)
-{
- uint32_t lane;
- /*LANEx_CR_DONE bits All 1's?*/
- for (lane = 0; lane < (uint32_t)(ln_count); lane++) {
- if (!dpcd_lane_status[lane].bits.CR_DONE_0)
- return false;
- }
- return true;
-}
-
-bool dp_is_ch_eq_done(enum dc_lane_count ln_count,
- union lane_status *dpcd_lane_status)
-{
- bool done = true;
- uint32_t lane;
- for (lane = 0; lane < (uint32_t)(ln_count); lane++)
- if (!dpcd_lane_status[lane].bits.CHANNEL_EQ_DONE_0)
- done = false;
- return done;
-}
-
-bool dp_is_symbol_locked(enum dc_lane_count ln_count,
- union lane_status *dpcd_lane_status)
-{
- bool locked = true;
- uint32_t lane;
- for (lane = 0; lane < (uint32_t)(ln_count); lane++)
- if (!dpcd_lane_status[lane].bits.SYMBOL_LOCKED_0)
- locked = false;
- return locked;
-}
-
-bool dp_is_interlane_aligned(union lane_align_status_updated align_status)
-{
- return align_status.bits.INTERLANE_ALIGN_DONE == 1;
-}
-
-void dp_hw_to_dpcd_lane_settings(
- const struct link_training_settings *lt_settings,
- const struct dc_lane_settings hw_lane_settings[LANE_COUNT_DP_MAX],
- union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX])
-{
- uint8_t lane = 0;
-
- for (lane = 0; lane < LANE_COUNT_DP_MAX; lane++) {
- if (dp_get_link_encoding_format(&lt_settings->link_settings) ==
- DP_8b_10b_ENCODING) {
- dpcd_lane_settings[lane].bits.VOLTAGE_SWING_SET =
- (uint8_t)(hw_lane_settings[lane].VOLTAGE_SWING);
- dpcd_lane_settings[lane].bits.PRE_EMPHASIS_SET =
- (uint8_t)(hw_lane_settings[lane].PRE_EMPHASIS);
- dpcd_lane_settings[lane].bits.MAX_SWING_REACHED =
- (hw_lane_settings[lane].VOLTAGE_SWING ==
- VOLTAGE_SWING_MAX_LEVEL ? 1 : 0);
- dpcd_lane_settings[lane].bits.MAX_PRE_EMPHASIS_REACHED =
- (hw_lane_settings[lane].PRE_EMPHASIS ==
- PRE_EMPHASIS_MAX_LEVEL ? 1 : 0);
- }
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- else if (dp_get_link_encoding_format(&lt_settings->link_settings) ==
- DP_128b_132b_ENCODING) {
- dpcd_lane_settings[lane].tx_ffe.PRESET_VALUE =
- hw_lane_settings[lane].FFE_PRESET.settings.level;
- }
-#endif
- }
-}
-
-void dp_decide_lane_settings(
- const struct link_training_settings *lt_settings,
- const union lane_adjust ln_adjust[LANE_COUNT_DP_MAX],
- struct dc_lane_settings hw_lane_settings[LANE_COUNT_DP_MAX],
- union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX])
-{
- uint32_t lane;
-
- for (lane = 0; lane < LANE_COUNT_DP_MAX; lane++) {
- if (dp_get_link_encoding_format(&lt_settings->link_settings) ==
- DP_8b_10b_ENCODING) {
- hw_lane_settings[lane].VOLTAGE_SWING =
- (enum dc_voltage_swing)(ln_adjust[lane].bits.
- VOLTAGE_SWING_LANE);
- hw_lane_settings[lane].PRE_EMPHASIS =
- (enum dc_pre_emphasis)(ln_adjust[lane].bits.
- PRE_EMPHASIS_LANE);
- }
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- else if (dp_get_link_encoding_format(&lt_settings->link_settings) ==
- DP_128b_132b_ENCODING) {
- hw_lane_settings[lane].FFE_PRESET.raw =
- ln_adjust[lane].tx_ffe.PRESET_VALUE;
- }
-#endif
- }
- dp_hw_to_dpcd_lane_settings(lt_settings, hw_lane_settings, dpcd_lane_settings);
-
- if (lt_settings->disallow_per_lane_settings) {
- /* we find the maximum of the requested settings across all lanes*/
- /* and set this maximum for all lanes*/
- maximize_lane_settings(lt_settings, hw_lane_settings);
- override_lane_settings(lt_settings, hw_lane_settings);
-
- if (lt_settings->always_match_dpcd_with_hw_lane_settings)
- dp_hw_to_dpcd_lane_settings(lt_settings, hw_lane_settings, dpcd_lane_settings);
- }
-
-}
-
-static uint8_t get_nibble_at_index(const uint8_t *buf,
- uint32_t index)
-{
- uint8_t nibble;
- nibble = buf[index / 2];
-
- if (index % 2)
- nibble >>= 4;
- else
- nibble &= 0x0F;
-
- return nibble;
-}
-
-static enum dc_pre_emphasis get_max_pre_emphasis_for_voltage_swing(
- enum dc_voltage_swing voltage)
-{
- enum dc_pre_emphasis pre_emphasis;
- pre_emphasis = PRE_EMPHASIS_MAX_LEVEL;
-
- if (voltage <= VOLTAGE_SWING_MAX_LEVEL)
- pre_emphasis = voltage_swing_to_pre_emphasis[voltage];
-
- return pre_emphasis;
-
-}
-
-static void maximize_lane_settings(const struct link_training_settings *lt_settings,
- struct dc_lane_settings lane_settings[LANE_COUNT_DP_MAX])
-{
- uint32_t lane;
- struct dc_lane_settings max_requested;
-
- max_requested.VOLTAGE_SWING = lane_settings[0].VOLTAGE_SWING;
- max_requested.PRE_EMPHASIS = lane_settings[0].PRE_EMPHASIS;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- max_requested.FFE_PRESET = lane_settings[0].FFE_PRESET;
-#endif
-
- /* Determine what the maximum of the requested settings are*/
- for (lane = 1; lane < lt_settings->link_settings.lane_count; lane++) {
- if (lane_settings[lane].VOLTAGE_SWING > max_requested.VOLTAGE_SWING)
- max_requested.VOLTAGE_SWING = lane_settings[lane].VOLTAGE_SWING;
-
- if (lane_settings[lane].PRE_EMPHASIS > max_requested.PRE_EMPHASIS)
- max_requested.PRE_EMPHASIS = lane_settings[lane].PRE_EMPHASIS;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (lane_settings[lane].FFE_PRESET.settings.level >
- max_requested.FFE_PRESET.settings.level)
- max_requested.FFE_PRESET.settings.level =
- lane_settings[lane].FFE_PRESET.settings.level;
-#endif
- }
-
- /* make sure the requested settings are
- * not higher than maximum settings*/
- if (max_requested.VOLTAGE_SWING > VOLTAGE_SWING_MAX_LEVEL)
- max_requested.VOLTAGE_SWING = VOLTAGE_SWING_MAX_LEVEL;
-
- if (max_requested.PRE_EMPHASIS > PRE_EMPHASIS_MAX_LEVEL)
- max_requested.PRE_EMPHASIS = PRE_EMPHASIS_MAX_LEVEL;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (max_requested.FFE_PRESET.settings.level > DP_FFE_PRESET_MAX_LEVEL)
- max_requested.FFE_PRESET.settings.level = DP_FFE_PRESET_MAX_LEVEL;
-#endif
-
- /* make sure the pre-emphasis matches the voltage swing*/
- if (max_requested.PRE_EMPHASIS >
- get_max_pre_emphasis_for_voltage_swing(
- max_requested.VOLTAGE_SWING))
- max_requested.PRE_EMPHASIS =
- get_max_pre_emphasis_for_voltage_swing(
- max_requested.VOLTAGE_SWING);
-
- for (lane = 0; lane < LANE_COUNT_DP_MAX; lane++) {
- lane_settings[lane].VOLTAGE_SWING = max_requested.VOLTAGE_SWING;
- lane_settings[lane].PRE_EMPHASIS = max_requested.PRE_EMPHASIS;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- lane_settings[lane].FFE_PRESET = max_requested.FFE_PRESET;
-#endif
- }
-}
-
-static void override_lane_settings(const struct link_training_settings *lt_settings,
- struct dc_lane_settings lane_settings[LANE_COUNT_DP_MAX])
-{
- uint32_t lane;
-
- if (lt_settings->voltage_swing == NULL &&
- lt_settings->pre_emphasis == NULL &&
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- lt_settings->ffe_preset == NULL &&
-#endif
- lt_settings->post_cursor2 == NULL)
-
- return;
-
- for (lane = 1; lane < LANE_COUNT_DP_MAX; lane++) {
- if (lt_settings->voltage_swing)
- lane_settings[lane].VOLTAGE_SWING = *lt_settings->voltage_swing;
- if (lt_settings->pre_emphasis)
- lane_settings[lane].PRE_EMPHASIS = *lt_settings->pre_emphasis;
- if (lt_settings->post_cursor2)
- lane_settings[lane].POST_CURSOR2 = *lt_settings->post_cursor2;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (lt_settings->ffe_preset)
- lane_settings[lane].FFE_PRESET = *lt_settings->ffe_preset;
-#endif
- }
-}
-
-enum dc_status dp_get_lane_status_and_lane_adjust(
- struct dc_link *link,
- const struct link_training_settings *link_training_setting,
- union lane_status ln_status[LANE_COUNT_DP_MAX],
- union lane_align_status_updated *ln_align,
- union lane_adjust ln_adjust[LANE_COUNT_DP_MAX],
- uint32_t offset)
-{
- unsigned int lane01_status_address = DP_LANE0_1_STATUS;
- uint8_t lane_adjust_offset = 4;
- unsigned int lane01_adjust_address;
- uint8_t dpcd_buf[6] = {0};
- uint32_t lane;
- enum dc_status status;
-
- if (is_repeater(link, offset)) {
- lane01_status_address =
- DP_LANE0_1_STATUS_PHY_REPEATER1 +
- ((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (offset - 1));
- lane_adjust_offset = 3;
- }
-
- status = core_link_read_dpcd(
- link,
- lane01_status_address,
- (uint8_t *)(dpcd_buf),
- sizeof(dpcd_buf));
-
- for (lane = 0; lane <
- (uint32_t)(link_training_setting->link_settings.lane_count);
- lane++) {
-
- ln_status[lane].raw =
- get_nibble_at_index(&dpcd_buf[0], lane);
- ln_adjust[lane].raw =
- get_nibble_at_index(&dpcd_buf[lane_adjust_offset], lane);
- }
-
- ln_align->raw = dpcd_buf[2];
-
- if (is_repeater(link, offset)) {
- DC_LOG_HW_LINK_TRAINING("%s:\n LTTPR Repeater ID: %d\n"
- " 0x%X Lane01Status = %x\n 0x%X Lane23Status = %x\n ",
- __func__,
- offset,
- lane01_status_address, dpcd_buf[0],
- lane01_status_address + 1, dpcd_buf[1]);
- } else {
- DC_LOG_HW_LINK_TRAINING("%s:\n 0x%X Lane01Status = %x\n 0x%X Lane23Status = %x\n ",
- __func__,
- lane01_status_address, dpcd_buf[0],
- lane01_status_address + 1, dpcd_buf[1]);
- }
- lane01_adjust_address = DP_ADJUST_REQUEST_LANE0_1;
-
- if (is_repeater(link, offset))
- lane01_adjust_address = DP_ADJUST_REQUEST_LANE0_1_PHY_REPEATER1 +
- ((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (offset - 1));
-
- if (is_repeater(link, offset)) {
- DC_LOG_HW_LINK_TRAINING("%s:\n LTTPR Repeater ID: %d\n"
- " 0x%X Lane01AdjustRequest = %x\n 0x%X Lane23AdjustRequest = %x\n",
- __func__,
- offset,
- lane01_adjust_address,
- dpcd_buf[lane_adjust_offset],
- lane01_adjust_address + 1,
- dpcd_buf[lane_adjust_offset + 1]);
- } else {
- DC_LOG_HW_LINK_TRAINING("%s:\n 0x%X Lane01AdjustRequest = %x\n 0x%X Lane23AdjustRequest = %x\n",
- __func__,
- lane01_adjust_address,
- dpcd_buf[lane_adjust_offset],
- lane01_adjust_address + 1,
- dpcd_buf[lane_adjust_offset + 1]);
- }
-
- return status;
-}
-
-enum dc_status dpcd_set_lane_settings(
- struct dc_link *link,
- const struct link_training_settings *link_training_setting,
- uint32_t offset)
-{
- unsigned int lane0_set_address;
- enum dc_status status;
-
- lane0_set_address = DP_TRAINING_LANE0_SET;
-
- if (is_repeater(link, offset))
- lane0_set_address = DP_TRAINING_LANE0_SET_PHY_REPEATER1 +
- ((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (offset - 1));
-
- status = core_link_write_dpcd(link,
- lane0_set_address,
- (uint8_t *)(link_training_setting->dpcd_lane_settings),
- link_training_setting->link_settings.lane_count);
-
- if (is_repeater(link, offset)) {
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (dp_get_link_encoding_format(&link_training_setting->link_settings) ==
- DP_128b_132b_ENCODING)
- DC_LOG_HW_LINK_TRAINING("%s:\n LTTPR Repeater ID: %d\n"
- " 0x%X TX_FFE_PRESET_VALUE = %x\n",
- __func__,
- offset,
- lane0_set_address,
- link_training_setting->dpcd_lane_settings[0].tx_ffe.PRESET_VALUE);
- else if (dp_get_link_encoding_format(&link_training_setting->link_settings) ==
- DP_8b_10b_ENCODING)
-#endif
- DC_LOG_HW_LINK_TRAINING("%s\n LTTPR Repeater ID: %d\n"
- " 0x%X VS set = %x PE set = %x max VS Reached = %x max PE Reached = %x\n",
- __func__,
- offset,
- lane0_set_address,
- link_training_setting->dpcd_lane_settings[0].bits.VOLTAGE_SWING_SET,
- link_training_setting->dpcd_lane_settings[0].bits.PRE_EMPHASIS_SET,
- link_training_setting->dpcd_lane_settings[0].bits.MAX_SWING_REACHED,
- link_training_setting->dpcd_lane_settings[0].bits.MAX_PRE_EMPHASIS_REACHED);
-
- } else {
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (dp_get_link_encoding_format(&link_training_setting->link_settings) ==
- DP_128b_132b_ENCODING)
- DC_LOG_HW_LINK_TRAINING("%s:\n 0x%X TX_FFE_PRESET_VALUE = %x\n",
- __func__,
- lane0_set_address,
- link_training_setting->dpcd_lane_settings[0].tx_ffe.PRESET_VALUE);
- else if (dp_get_link_encoding_format(&link_training_setting->link_settings) ==
- DP_8b_10b_ENCODING)
-#endif
- DC_LOG_HW_LINK_TRAINING("%s\n 0x%X VS set = %x PE set = %x max VS Reached = %x max PE Reached = %x\n",
- __func__,
- lane0_set_address,
- link_training_setting->dpcd_lane_settings[0].bits.VOLTAGE_SWING_SET,
- link_training_setting->dpcd_lane_settings[0].bits.PRE_EMPHASIS_SET,
- link_training_setting->dpcd_lane_settings[0].bits.MAX_SWING_REACHED,
- link_training_setting->dpcd_lane_settings[0].bits.MAX_PRE_EMPHASIS_REACHED);
- }
-
- return status;
-}
-
-bool dp_is_max_vs_reached(
- const struct link_training_settings *lt_settings)
-{
- uint32_t lane;
- for (lane = 0; lane <
- (uint32_t)(lt_settings->link_settings.lane_count);
- lane++) {
- if (lt_settings->dpcd_lane_settings[lane].bits.VOLTAGE_SWING_SET
- == VOLTAGE_SWING_MAX_LEVEL)
- return true;
- }
- return false;
-
-}
-
-static bool perform_post_lt_adj_req_sequence(
- struct dc_link *link,
- struct link_training_settings *lt_settings)
-{
- enum dc_lane_count lane_count =
- lt_settings->link_settings.lane_count;
-
- uint32_t adj_req_count;
- uint32_t adj_req_timer;
- bool req_drv_setting_changed;
- uint32_t lane;
-
- req_drv_setting_changed = false;
- for (adj_req_count = 0; adj_req_count < POST_LT_ADJ_REQ_LIMIT;
- adj_req_count++) {
-
- req_drv_setting_changed = false;
-
- for (adj_req_timer = 0;
- adj_req_timer < POST_LT_ADJ_REQ_TIMEOUT;
- adj_req_timer++) {
-
- union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX];
- union lane_align_status_updated
- dpcd_lane_status_updated;
- union lane_adjust dpcd_lane_adjust[LANE_COUNT_DP_MAX] = { { {0} } };
-
- dp_get_lane_status_and_lane_adjust(
- link,
- lt_settings,
- dpcd_lane_status,
- &dpcd_lane_status_updated,
- dpcd_lane_adjust,
- DPRX);
-
- if (dpcd_lane_status_updated.bits.
- POST_LT_ADJ_REQ_IN_PROGRESS == 0)
- return true;
-
- if (!dp_is_cr_done(lane_count, dpcd_lane_status))
- return false;
-
- if (!dp_is_ch_eq_done(lane_count, dpcd_lane_status) ||
- !dp_is_symbol_locked(lane_count, dpcd_lane_status) ||
- !dp_is_interlane_aligned(dpcd_lane_status_updated))
- return false;
-
- for (lane = 0; lane < (uint32_t)(lane_count); lane++) {
-
- if (lt_settings->
- dpcd_lane_settings[lane].bits.VOLTAGE_SWING_SET !=
- dpcd_lane_adjust[lane].bits.VOLTAGE_SWING_LANE ||
- lt_settings->dpcd_lane_settings[lane].bits.PRE_EMPHASIS_SET !=
- dpcd_lane_adjust[lane].bits.PRE_EMPHASIS_LANE) {
-
- req_drv_setting_changed = true;
- break;
- }
- }
-
- if (req_drv_setting_changed) {
- dp_decide_lane_settings(lt_settings, dpcd_lane_adjust,
- lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings);
-
- dc_link_dp_set_drive_settings(link,
- lt_settings);
- break;
- }
-
- msleep(1);
- }
-
- if (!req_drv_setting_changed) {
- DC_LOG_WARNING("%s: Post Link Training Adjust Request Timed out\n",
- __func__);
-
- ASSERT(0);
- return true;
- }
- }
- DC_LOG_WARNING("%s: Post Link Training Adjust Request limit reached\n",
- __func__);
-
- ASSERT(0);
- return true;
-
-}
-
-/* Only used for channel equalization */
-uint32_t dp_translate_training_aux_read_interval(uint32_t dpcd_aux_read_interval)
-{
- unsigned int aux_rd_interval_us = 400;
-
- switch (dpcd_aux_read_interval) {
- case 0x01:
- aux_rd_interval_us = 4000;
- break;
- case 0x02:
- aux_rd_interval_us = 8000;
- break;
- case 0x03:
- aux_rd_interval_us = 12000;
- break;
- case 0x04:
- aux_rd_interval_us = 16000;
- break;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- case 0x05:
- aux_rd_interval_us = 32000;
- break;
- case 0x06:
- aux_rd_interval_us = 64000;
- break;
-#endif
- default:
- break;
- }
-
- return aux_rd_interval_us;
-}
-
-enum link_training_result dp_get_cr_failure(enum dc_lane_count ln_count,
- union lane_status *dpcd_lane_status)
-{
- enum link_training_result result = LINK_TRAINING_SUCCESS;
-
- if (ln_count >= LANE_COUNT_ONE && !dpcd_lane_status[0].bits.CR_DONE_0)
- result = LINK_TRAINING_CR_FAIL_LANE0;
- else if (ln_count >= LANE_COUNT_TWO && !dpcd_lane_status[1].bits.CR_DONE_0)
- result = LINK_TRAINING_CR_FAIL_LANE1;
- else if (ln_count >= LANE_COUNT_FOUR && !dpcd_lane_status[2].bits.CR_DONE_0)
- result = LINK_TRAINING_CR_FAIL_LANE23;
- else if (ln_count >= LANE_COUNT_FOUR && !dpcd_lane_status[3].bits.CR_DONE_0)
- result = LINK_TRAINING_CR_FAIL_LANE23;
- return result;
-}
-
-static enum link_training_result perform_channel_equalization_sequence(
- struct dc_link *link,
- struct link_training_settings *lt_settings,
- uint32_t offset)
-{
- enum dc_dp_training_pattern tr_pattern;
- uint32_t retries_ch_eq;
- uint32_t wait_time_microsec;
- enum dc_lane_count lane_count = lt_settings->link_settings.lane_count;
- union lane_align_status_updated dpcd_lane_status_updated = {0};
- union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = {0};
- union lane_adjust dpcd_lane_adjust[LANE_COUNT_DP_MAX] = {0};
-
- /* Note: also check that TPS4 is a supported feature*/
- tr_pattern = lt_settings->pattern_for_eq;
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (is_repeater(link, offset) && dp_get_link_encoding_format(&lt_settings->link_settings) == DP_8b_10b_ENCODING)
- tr_pattern = DP_TRAINING_PATTERN_SEQUENCE_4;
-#else
- if (is_repeater(link, offset))
- tr_pattern = DP_TRAINING_PATTERN_SEQUENCE_4;
-#endif
-
- dp_set_hw_training_pattern(link, tr_pattern, offset);
-
- for (retries_ch_eq = 0; retries_ch_eq <= LINK_TRAINING_MAX_RETRY_COUNT;
- retries_ch_eq++) {
-
- dp_set_hw_lane_settings(link, lt_settings, offset);
-
- /* 2. update DPCD*/
- if (!retries_ch_eq)
- /* EPR #361076 - write as a 5-byte burst,
- * but only for the 1-st iteration
- */
-
- dpcd_set_lt_pattern_and_lane_settings(
- link,
- lt_settings,
- tr_pattern, offset);
- else
- dpcd_set_lane_settings(link, lt_settings, offset);
-
- /* 3. wait for receiver to lock-on*/
- wait_time_microsec = lt_settings->eq_pattern_time;
-
- if (is_repeater(link, offset))
- wait_time_microsec =
- dp_translate_training_aux_read_interval(
- link->dpcd_caps.lttpr_caps.aux_rd_interval[offset - 1]);
-
- dp_wait_for_training_aux_rd_interval(
- link,
- wait_time_microsec);
-
- /* 4. Read lane status and requested
- * drive settings as set by the sink*/
-
- dp_get_lane_status_and_lane_adjust(
- link,
- lt_settings,
- dpcd_lane_status,
- &dpcd_lane_status_updated,
- dpcd_lane_adjust,
- offset);
-
- /* 5. check CR done*/
- if (!dp_is_cr_done(lane_count, dpcd_lane_status))
- return LINK_TRAINING_EQ_FAIL_CR;
-
- /* 6. check CHEQ done*/
- if (dp_is_ch_eq_done(lane_count, dpcd_lane_status) &&
- dp_is_symbol_locked(lane_count, dpcd_lane_status) &&
- dp_is_interlane_aligned(dpcd_lane_status_updated))
- return LINK_TRAINING_SUCCESS;
-
- /* 7. update VS/PE/PC2 in lt_settings*/
- dp_decide_lane_settings(lt_settings, dpcd_lane_adjust,
- lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings);
- }
-
- return LINK_TRAINING_EQ_FAIL_EQ;
-
-}
-
-static void start_clock_recovery_pattern_early(struct dc_link *link,
- struct link_training_settings *lt_settings,
- uint32_t offset)
-{
- DC_LOG_HW_LINK_TRAINING("%s\n GPU sends TPS1. Wait 400us.\n",
- __func__);
- dp_set_hw_training_pattern(link, lt_settings->pattern_for_cr, offset);
- dp_set_hw_lane_settings(link, lt_settings, offset);
- udelay(400);
-}
-
-static enum link_training_result perform_clock_recovery_sequence(
- struct dc_link *link,
- struct link_training_settings *lt_settings,
- uint32_t offset)
-{
- uint32_t retries_cr;
- uint32_t retry_count;
- uint32_t wait_time_microsec;
- enum dc_lane_count lane_count = lt_settings->link_settings.lane_count;
- union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX];
- union lane_align_status_updated dpcd_lane_status_updated;
- union lane_adjust dpcd_lane_adjust[LANE_COUNT_DP_MAX] = { { {0} } };
-
- retries_cr = 0;
- retry_count = 0;
-
- if (!link->ctx->dc->work_arounds.lt_early_cr_pattern)
- dp_set_hw_training_pattern(link, lt_settings->pattern_for_cr, offset);
-
- /* najeeb - The synaptics MST hub can put the LT in
- * infinite loop by switching the VS
- */
- /* between level 0 and level 1 continuously, here
- * we try for CR lock for LinkTrainingMaxCRRetry count*/
- while ((retries_cr < LINK_TRAINING_MAX_RETRY_COUNT) &&
- (retry_count < LINK_TRAINING_MAX_CR_RETRY)) {
-
- memset(&dpcd_lane_status, '\0', sizeof(dpcd_lane_status));
- memset(&dpcd_lane_status_updated, '\0',
- sizeof(dpcd_lane_status_updated));
-
- /* 1. call HWSS to set lane settings*/
- dp_set_hw_lane_settings(
- link,
- lt_settings,
- offset);
-
- /* 2. update DPCD of the receiver*/
- if (!retry_count)
- /* EPR #361076 - write as a 5-byte burst,
- * but only for the 1-st iteration.*/
- dpcd_set_lt_pattern_and_lane_settings(
- link,
- lt_settings,
- lt_settings->pattern_for_cr,
- offset);
- else
- dpcd_set_lane_settings(
- link,
- lt_settings,
- offset);
-
- /* 3. wait receiver to lock-on*/
- wait_time_microsec = lt_settings->cr_pattern_time;
-
- if (link->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT)
- wait_time_microsec = TRAINING_AUX_RD_INTERVAL;
-
- dp_wait_for_training_aux_rd_interval(
- link,
- wait_time_microsec);
-
- /* 4. Read lane status and requested drive
- * settings as set by the sink
- */
- dp_get_lane_status_and_lane_adjust(
- link,
- lt_settings,
- dpcd_lane_status,
- &dpcd_lane_status_updated,
- dpcd_lane_adjust,
- offset);
-
- /* 5. check CR done*/
- if (dp_is_cr_done(lane_count, dpcd_lane_status))
- return LINK_TRAINING_SUCCESS;
-
- /* 6. max VS reached*/
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if ((dp_get_link_encoding_format(&lt_settings->link_settings) ==
- DP_8b_10b_ENCODING) &&
- dp_is_max_vs_reached(lt_settings))
- break;
-#else
- if (dp_is_max_vs_reached(lt_settings))
- break;
-#endif
-
- /* 7. same lane settings*/
- /* Note: settings are the same for all lanes,
- * so comparing first lane is sufficient*/
- if ((dp_get_link_encoding_format(&lt_settings->link_settings) == DP_8b_10b_ENCODING) &&
- lt_settings->dpcd_lane_settings[0].bits.VOLTAGE_SWING_SET ==
- dpcd_lane_adjust[0].bits.VOLTAGE_SWING_LANE)
- retries_cr++;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- else if ((dp_get_link_encoding_format(&lt_settings->link_settings) == DP_128b_132b_ENCODING) &&
- lt_settings->dpcd_lane_settings[0].tx_ffe.PRESET_VALUE ==
- dpcd_lane_adjust[0].tx_ffe.PRESET_VALUE)
- retries_cr++;
-#endif
- else
- retries_cr = 0;
-
- /* 8. update VS/PE/PC2 in lt_settings*/
- dp_decide_lane_settings(lt_settings, dpcd_lane_adjust,
- lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings);
- retry_count++;
- }
-
- if (retry_count >= LINK_TRAINING_MAX_CR_RETRY) {
- ASSERT(0);
- DC_LOG_ERROR("%s: Link Training Error, could not get CR after %d tries. Possibly voltage swing issue",
- __func__,
- LINK_TRAINING_MAX_CR_RETRY);
-
- }
-
- return dp_get_cr_failure(lane_count, dpcd_lane_status);
-}
-
-static inline enum link_training_result dp_transition_to_video_idle(
- struct dc_link *link,
- struct link_training_settings *lt_settings,
- enum link_training_result status)
-{
- union lane_count_set lane_count_set = {0};
-
- /* 4. mainlink output idle pattern*/
- dp_set_hw_test_pattern(link, DP_TEST_PATTERN_VIDEO_MODE, NULL, 0);
-
- /*
- * 5. post training adjust if required
- * If the upstream DPTX and downstream DPRX both support TPS4,
- * TPS4 must be used instead of POST_LT_ADJ_REQ.
- */
- if (link->dpcd_caps.max_ln_count.bits.POST_LT_ADJ_REQ_SUPPORTED != 1 ||
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- lt_settings->pattern_for_eq >= DP_TRAINING_PATTERN_SEQUENCE_4) {
-#else
- lt_settings->pattern_for_eq == DP_TRAINING_PATTERN_SEQUENCE_4) {
-#endif
- /* delay 5ms after Main Link output idle pattern and then check
- * DPCD 0202h.
- */
- if (link->connector_signal != SIGNAL_TYPE_EDP && status == LINK_TRAINING_SUCCESS) {
- msleep(5);
- status = dp_check_link_loss_status(link, lt_settings);
- }
- return status;
- }
-
- if (status == LINK_TRAINING_SUCCESS &&
- perform_post_lt_adj_req_sequence(link, lt_settings) == false)
- status = LINK_TRAINING_LQA_FAIL;
-
- lane_count_set.bits.LANE_COUNT_SET = lt_settings->link_settings.lane_count;
- lane_count_set.bits.ENHANCED_FRAMING = lt_settings->enhanced_framing;
- lane_count_set.bits.POST_LT_ADJ_REQ_GRANTED = 0;
-
- core_link_write_dpcd(
- link,
- DP_LANE_COUNT_SET,
- &lane_count_set.raw,
- sizeof(lane_count_set));
-
- return status;
-}
-
-enum link_training_result dp_check_link_loss_status(
- struct dc_link *link,
- const struct link_training_settings *link_training_setting)
-{
- enum link_training_result status = LINK_TRAINING_SUCCESS;
- union lane_status lane_status;
- uint8_t dpcd_buf[6] = {0};
- uint32_t lane;
-
- core_link_read_dpcd(
- link,
- DP_SINK_COUNT,
- (uint8_t *)(dpcd_buf),
- sizeof(dpcd_buf));
-
- /*parse lane status*/
- for (lane = 0; lane < link->cur_link_settings.lane_count; lane++) {
- /*
- * check lanes status
- */
- lane_status.raw = get_nibble_at_index(&dpcd_buf[2], lane);
-
- if (!lane_status.bits.CHANNEL_EQ_DONE_0 ||
- !lane_status.bits.CR_DONE_0 ||
- !lane_status.bits.SYMBOL_LOCKED_0) {
- /* if one of the channel equalization, clock
- * recovery or symbol lock is dropped
- * consider it as (link has been
- * dropped) dp sink status has changed
- */
- status = LINK_TRAINING_LINK_LOSS;
- break;
- }
- }
-
- return status;
-}
-
-static inline void decide_8b_10b_training_settings(
- struct dc_link *link,
- const struct dc_link_settings *link_setting,
- struct link_training_settings *lt_settings)
-{
- memset(lt_settings, '\0', sizeof(struct link_training_settings));
-
- /* Initialize link settings */
- lt_settings->link_settings.use_link_rate_set = link_setting->use_link_rate_set;
- lt_settings->link_settings.link_rate_set = link_setting->link_rate_set;
- lt_settings->link_settings.link_rate = link_setting->link_rate;
- lt_settings->link_settings.lane_count = link_setting->lane_count;
- /* TODO hard coded to SS for now
- * lt_settings.link_settings.link_spread =
- * dal_display_path_is_ss_supported(
- * path_mode->display_path) ?
- * LINK_SPREAD_05_DOWNSPREAD_30KHZ :
- * LINK_SPREAD_DISABLED;
- */
- lt_settings->link_settings.link_spread = link->dp_ss_off ?
- LINK_SPREAD_DISABLED : LINK_SPREAD_05_DOWNSPREAD_30KHZ;
- lt_settings->lttpr_mode = link->lttpr_mode;
- lt_settings->cr_pattern_time = get_cr_training_aux_rd_interval(link, link_setting);
- lt_settings->eq_pattern_time = get_eq_training_aux_rd_interval(link, link_setting);
- lt_settings->pattern_for_cr = decide_cr_training_pattern(link_setting);
- lt_settings->pattern_for_eq = decide_eq_training_pattern(link, link_setting);
- lt_settings->enhanced_framing = 1;
- lt_settings->should_set_fec_ready = true;
- lt_settings->disallow_per_lane_settings = true;
- lt_settings->always_match_dpcd_with_hw_lane_settings = true;
- dp_hw_to_dpcd_lane_settings(lt_settings, lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings);
-}
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-static inline void decide_128b_132b_training_settings(struct dc_link *link,
- const struct dc_link_settings *link_settings,
- struct link_training_settings *lt_settings)
-{
- memset(lt_settings, 0, sizeof(*lt_settings));
-
- lt_settings->link_settings = *link_settings;
- /* TODO: should decide link spread when populating link_settings */
- lt_settings->link_settings.link_spread = link->dp_ss_off ? LINK_SPREAD_DISABLED :
- LINK_SPREAD_05_DOWNSPREAD_30KHZ;
-
- lt_settings->pattern_for_cr = decide_cr_training_pattern(link_settings);
- lt_settings->pattern_for_eq = decide_eq_training_pattern(link, link_settings);
- lt_settings->eq_pattern_time = 2500;
- lt_settings->eq_wait_time_limit = 400000;
- lt_settings->eq_loop_count_limit = 20;
- lt_settings->pattern_for_cds = DP_128b_132b_TPS2_CDS;
- lt_settings->cds_pattern_time = 2500;
- lt_settings->cds_wait_time_limit = (dp_convert_to_count(
- link->dpcd_caps.lttpr_caps.phy_repeater_cnt) + 1) * 20000;
- lt_settings->lttpr_mode = dp_convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt) ?
- LTTPR_MODE_NON_TRANSPARENT : LTTPR_MODE_TRANSPARENT;
- lt_settings->disallow_per_lane_settings = true;
- dp_hw_to_dpcd_lane_settings(lt_settings,
- lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings);
-}
-#endif
-
-void dp_decide_training_settings(
- struct dc_link *link,
- const struct dc_link_settings *link_settings,
- struct link_training_settings *lt_settings)
-{
- if (dp_get_link_encoding_format(link_settings) == DP_8b_10b_ENCODING)
- decide_8b_10b_training_settings(link, link_settings, lt_settings);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- else if (dp_get_link_encoding_format(link_settings) == DP_128b_132b_ENCODING)
- decide_128b_132b_training_settings(link, link_settings, lt_settings);
-#endif
-}
-
-static void override_training_settings(
- struct dc_link *link,
- const struct dc_link_training_overrides *overrides,
- struct link_training_settings *lt_settings)
-{
- uint32_t lane;
-
- /* Override link spread */
- if (!link->dp_ss_off && overrides->downspread != NULL)
- lt_settings->link_settings.link_spread = *overrides->downspread ?
- LINK_SPREAD_05_DOWNSPREAD_30KHZ
- : LINK_SPREAD_DISABLED;
-
- /* Override lane settings */
- if (overrides->voltage_swing != NULL)
- lt_settings->voltage_swing = overrides->voltage_swing;
- if (overrides->pre_emphasis != NULL)
- lt_settings->pre_emphasis = overrides->pre_emphasis;
- if (overrides->post_cursor2 != NULL)
- lt_settings->post_cursor2 = overrides->post_cursor2;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (overrides->ffe_preset != NULL)
- lt_settings->ffe_preset = overrides->ffe_preset;
-#endif
- /* Override HW lane settings with BIOS forced values if present */
- if (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN &&
- link->lttpr_mode == LTTPR_MODE_TRANSPARENT) {
- lt_settings->voltage_swing = &link->bios_forced_drive_settings.VOLTAGE_SWING;
- lt_settings->pre_emphasis = &link->bios_forced_drive_settings.PRE_EMPHASIS;
- lt_settings->always_match_dpcd_with_hw_lane_settings = false;
- }
- for (lane = 0; lane < LANE_COUNT_DP_MAX; lane++) {
- lt_settings->lane_settings[lane].VOLTAGE_SWING =
- lt_settings->voltage_swing != NULL ?
- *lt_settings->voltage_swing :
- VOLTAGE_SWING_LEVEL0;
- lt_settings->lane_settings[lane].PRE_EMPHASIS =
- lt_settings->pre_emphasis != NULL ?
- *lt_settings->pre_emphasis
- : PRE_EMPHASIS_DISABLED;
- lt_settings->lane_settings[lane].POST_CURSOR2 =
- lt_settings->post_cursor2 != NULL ?
- *lt_settings->post_cursor2
- : POST_CURSOR2_DISABLED;
- }
-
- dp_hw_to_dpcd_lane_settings(lt_settings,
- lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings);
-
- /* Initialize training timings */
- if (overrides->cr_pattern_time != NULL)
- lt_settings->cr_pattern_time = *overrides->cr_pattern_time;
-
- if (overrides->eq_pattern_time != NULL)
- lt_settings->eq_pattern_time = *overrides->eq_pattern_time;
-
- if (overrides->pattern_for_cr != NULL)
- lt_settings->pattern_for_cr = *overrides->pattern_for_cr;
- if (overrides->pattern_for_eq != NULL)
- lt_settings->pattern_for_eq = *overrides->pattern_for_eq;
-
- if (overrides->enhanced_framing != NULL)
- lt_settings->enhanced_framing = *overrides->enhanced_framing;
-
- if (link->preferred_training_settings.fec_enable != NULL)
- lt_settings->should_set_fec_ready = *link->preferred_training_settings.fec_enable;
-}
-
-uint8_t dp_convert_to_count(uint8_t lttpr_repeater_count)
-{
- switch (lttpr_repeater_count) {
- case 0x80: // 1 lttpr repeater
- return 1;
- case 0x40: // 2 lttpr repeaters
- return 2;
- case 0x20: // 3 lttpr repeaters
- return 3;
- case 0x10: // 4 lttpr repeaters
- return 4;
- case 0x08: // 5 lttpr repeaters
- return 5;
- case 0x04: // 6 lttpr repeaters
- return 6;
- case 0x02: // 7 lttpr repeaters
- return 7;
- case 0x01: // 8 lttpr repeaters
- return 8;
- default:
- break;
- }
- return 0; // invalid value
-}
-
-static enum dc_status configure_lttpr_mode_transparent(struct dc_link *link)
-{
- uint8_t repeater_mode = DP_PHY_REPEATER_MODE_TRANSPARENT;
-
- DC_LOG_HW_LINK_TRAINING("%s\n Set LTTPR to Transparent Mode\n", __func__);
- return core_link_write_dpcd(link,
- DP_PHY_REPEATER_MODE,
- (uint8_t *)&repeater_mode,
- sizeof(repeater_mode));
-}
-
-static enum dc_status configure_lttpr_mode_non_transparent(
- struct dc_link *link,
- const struct link_training_settings *lt_settings)
-{
- /* aux timeout is already set to extended */
- /* RESET/SET lttpr mode to enable non transparent mode */
- uint8_t repeater_cnt;
- uint32_t aux_interval_address;
- uint8_t repeater_id;
- enum dc_status result = DC_ERROR_UNEXPECTED;
- uint8_t repeater_mode = DP_PHY_REPEATER_MODE_TRANSPARENT;
-
- enum dp_link_encoding encoding = dp_get_link_encoding_format(&lt_settings->link_settings);
-
- if (encoding == DP_8b_10b_ENCODING) {
- DC_LOG_HW_LINK_TRAINING("%s\n Set LTTPR to Transparent Mode\n", __func__);
- result = core_link_write_dpcd(link,
- DP_PHY_REPEATER_MODE,
- (uint8_t *)&repeater_mode,
- sizeof(repeater_mode));
-
- }
-
- if (result == DC_OK) {
- link->dpcd_caps.lttpr_caps.mode = repeater_mode;
- }
-
- if (link->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) {
-
- DC_LOG_HW_LINK_TRAINING("%s\n Set LTTPR to Non Transparent Mode\n", __func__);
-
- repeater_mode = DP_PHY_REPEATER_MODE_NON_TRANSPARENT;
- result = core_link_write_dpcd(link,
- DP_PHY_REPEATER_MODE,
- (uint8_t *)&repeater_mode,
- sizeof(repeater_mode));
-
- if (result == DC_OK) {
- link->dpcd_caps.lttpr_caps.mode = repeater_mode;
- }
-
- if (encoding == DP_8b_10b_ENCODING) {
- repeater_cnt = dp_convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
-
- /* Driver does not need to train the first hop. Skip DPCD read and clear
- * AUX_RD_INTERVAL for DPTX-to-DPIA hop.
- */
- if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
- link->dpcd_caps.lttpr_caps.aux_rd_interval[--repeater_cnt] = 0;
-
- for (repeater_id = repeater_cnt; repeater_id > 0; repeater_id--) {
- aux_interval_address = DP_TRAINING_AUX_RD_INTERVAL_PHY_REPEATER1 +
- ((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (repeater_id - 1));
- core_link_read_dpcd(
- link,
- aux_interval_address,
- (uint8_t *)&link->dpcd_caps.lttpr_caps.aux_rd_interval[repeater_id - 1],
- sizeof(link->dpcd_caps.lttpr_caps.aux_rd_interval[repeater_id - 1]));
- link->dpcd_caps.lttpr_caps.aux_rd_interval[repeater_id - 1] &= 0x7F;
- }
- }
- }
-
- return result;
-}
-
-static void repeater_training_done(struct dc_link *link, uint32_t offset)
-{
- union dpcd_training_pattern dpcd_pattern = {0};
-
- const uint32_t dpcd_base_lt_offset =
- DP_TRAINING_PATTERN_SET_PHY_REPEATER1 +
- ((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (offset - 1));
- /* Set training not in progress*/
- dpcd_pattern.v1_4.TRAINING_PATTERN_SET = DPCD_TRAINING_PATTERN_VIDEOIDLE;
-
- core_link_write_dpcd(
- link,
- dpcd_base_lt_offset,
- &dpcd_pattern.raw,
- 1);
-
- DC_LOG_HW_LINK_TRAINING("%s\n LTTPR Id: %d 0x%X pattern = %x\n",
- __func__,
- offset,
- dpcd_base_lt_offset,
- dpcd_pattern.v1_4.TRAINING_PATTERN_SET);
-}
-
-static void print_status_message(
- struct dc_link *link,
- const struct link_training_settings *lt_settings,
- enum link_training_result status)
-{
- char *link_rate = "Unknown";
- char *lt_result = "Unknown";
- char *lt_spread = "Disabled";
-
- switch (lt_settings->link_settings.link_rate) {
- case LINK_RATE_LOW:
- link_rate = "RBR";
- break;
- case LINK_RATE_RATE_2:
- link_rate = "R2";
- break;
- case LINK_RATE_RATE_3:
- link_rate = "R3";
- break;
- case LINK_RATE_HIGH:
- link_rate = "HBR";
- break;
- case LINK_RATE_RBR2:
- link_rate = "RBR2";
- break;
- case LINK_RATE_RATE_6:
- link_rate = "R6";
- break;
- case LINK_RATE_HIGH2:
- link_rate = "HBR2";
- break;
- case LINK_RATE_HIGH3:
- link_rate = "HBR3";
- break;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- case LINK_RATE_UHBR10:
- link_rate = "UHBR10";
- break;
- case LINK_RATE_UHBR13_5:
- link_rate = "UHBR13.5";
- break;
- case LINK_RATE_UHBR20:
- link_rate = "UHBR20";
- break;
-#endif
- default:
- break;
- }
-
- switch (status) {
- case LINK_TRAINING_SUCCESS:
- lt_result = "pass";
- break;
- case LINK_TRAINING_CR_FAIL_LANE0:
- lt_result = "CR failed lane0";
- break;
- case LINK_TRAINING_CR_FAIL_LANE1:
- lt_result = "CR failed lane1";
- break;
- case LINK_TRAINING_CR_FAIL_LANE23:
- lt_result = "CR failed lane23";
- break;
- case LINK_TRAINING_EQ_FAIL_CR:
- lt_result = "CR failed in EQ";
- break;
- case LINK_TRAINING_EQ_FAIL_EQ:
- lt_result = "EQ failed";
- break;
- case LINK_TRAINING_LQA_FAIL:
- lt_result = "LQA failed";
- break;
- case LINK_TRAINING_LINK_LOSS:
- lt_result = "Link loss";
- break;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- case DP_128b_132b_LT_FAILED:
- lt_result = "LT_FAILED received";
- break;
- case DP_128b_132b_MAX_LOOP_COUNT_REACHED:
- lt_result = "max loop count reached";
- break;
- case DP_128b_132b_CHANNEL_EQ_DONE_TIMEOUT:
- lt_result = "channel EQ timeout";
- break;
- case DP_128b_132b_CDS_DONE_TIMEOUT:
- lt_result = "CDS timeout";
- break;
-#endif
- default:
- break;
- }
-
- switch (lt_settings->link_settings.link_spread) {
- case LINK_SPREAD_DISABLED:
- lt_spread = "Disabled";
- break;
- case LINK_SPREAD_05_DOWNSPREAD_30KHZ:
- lt_spread = "0.5% 30KHz";
- break;
- case LINK_SPREAD_05_DOWNSPREAD_33KHZ:
- lt_spread = "0.5% 33KHz";
- break;
- default:
- break;
- }
-
- /* Connectivity log: link training */
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- /* TODO - DP2.0 Log: add connectivity log for FFE PRESET */
-#endif
- CONN_MSG_LT(link, "%sx%d %s VS=%d, PE=%d, DS=%s",
- link_rate,
- lt_settings->link_settings.lane_count,
- lt_result,
- lt_settings->lane_settings[0].VOLTAGE_SWING,
- lt_settings->lane_settings[0].PRE_EMPHASIS,
- lt_spread);
-}
-
-void dc_link_dp_set_drive_settings(
- struct dc_link *link,
- struct link_training_settings *lt_settings)
-{
- /* program ASIC PHY settings*/
- dp_set_hw_lane_settings(link, lt_settings, DPRX);
-
- dp_hw_to_dpcd_lane_settings(lt_settings,
- lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings);
-
- /* Notify DP sink the PHY settings from source */
- dpcd_set_lane_settings(link, lt_settings, DPRX);
-}
-
-bool dc_link_dp_perform_link_training_skip_aux(
- struct dc_link *link,
- const struct dc_link_settings *link_setting)
-{
- struct link_training_settings lt_settings = {0};
-
- dp_decide_training_settings(
- link,
- link_setting,
- &lt_settings);
- override_training_settings(
- link,
- &link->preferred_training_settings,
- &lt_settings);
-
- /* 1. Perform_clock_recovery_sequence. */
-
- /* transmit training pattern for clock recovery */
- dp_set_hw_training_pattern(link, lt_settings.pattern_for_cr, DPRX);
-
- /* call HWSS to set lane settings*/
- dp_set_hw_lane_settings(link, &lt_settings, DPRX);
-
- /* wait receiver to lock-on*/
- dp_wait_for_training_aux_rd_interval(link, lt_settings.cr_pattern_time);
-
- /* 2. Perform_channel_equalization_sequence. */
-
- /* transmit training pattern for channel equalization. */
- dp_set_hw_training_pattern(link, lt_settings.pattern_for_eq, DPRX);
-
- /* call HWSS to set lane settings*/
- dp_set_hw_lane_settings(link, &lt_settings, DPRX);
-
- /* wait receiver to lock-on. */
- dp_wait_for_training_aux_rd_interval(link, lt_settings.eq_pattern_time);
-
- /* 3. Perform_link_training_int. */
-
- /* Mainlink output idle pattern. */
- dp_set_hw_test_pattern(link, DP_TEST_PATTERN_VIDEO_MODE, NULL, 0);
-
- print_status_message(link, &lt_settings, LINK_TRAINING_SUCCESS);
-
- return true;
-}
-
-enum dc_status dpcd_configure_lttpr_mode(struct dc_link *link, struct link_training_settings *lt_settings)
-{
- enum dc_status status = DC_OK;
-
- if (lt_settings->lttpr_mode == LTTPR_MODE_TRANSPARENT)
- status = configure_lttpr_mode_transparent(link);
-
- else if (lt_settings->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT)
- status = configure_lttpr_mode_non_transparent(link, lt_settings);
-
- return status;
-}
-
-static void dpcd_exit_training_mode(struct dc_link *link)
-{
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- uint8_t sink_status = 0;
- uint8_t i;
-#endif
-
- /* clear training pattern set */
- dpcd_set_training_pattern(link, DP_TRAINING_PATTERN_VIDEOIDLE);
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- /* poll for intra-hop disable */
- for (i = 0; i < 10; i++) {
- if ((core_link_read_dpcd(link, DP_SINK_STATUS, &sink_status, 1) == DC_OK) &&
- (sink_status & DP_INTRA_HOP_AUX_REPLY_INDICATION) == 0)
- break;
- udelay(1000);
- }
-#endif
-}
-
-enum dc_status dpcd_configure_channel_coding(struct dc_link *link,
- struct link_training_settings *lt_settings)
-{
- enum dp_link_encoding encoding =
- dp_get_link_encoding_format(
- &lt_settings->link_settings);
- enum dc_status status;
-
- status = core_link_write_dpcd(
- link,
- DP_MAIN_LINK_CHANNEL_CODING_SET,
- (uint8_t *) &encoding,
- 1);
- DC_LOG_HW_LINK_TRAINING("%s:\n 0x%X MAIN_LINK_CHANNEL_CODING_SET = %x\n",
- __func__,
- DP_MAIN_LINK_CHANNEL_CODING_SET,
- encoding);
-
- return status;
-}
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-static void dpcd_128b_132b_get_aux_rd_interval(struct dc_link *link,
- uint32_t *interval_in_us)
-{
- union dp_128b_132b_training_aux_rd_interval dpcd_interval;
- uint32_t interval_unit = 0;
-
- dpcd_interval.raw = 0;
- core_link_read_dpcd(link, DP_128b_132b_TRAINING_AUX_RD_INTERVAL,
- &dpcd_interval.raw, sizeof(dpcd_interval.raw));
- interval_unit = dpcd_interval.bits.UNIT ? 1 : 2; /* 0b = 2 ms, 1b = 1 ms */
- /* (128b/132b_TRAINING_AUX_RD_INTERVAL value + 1) *
- * INTERVAL_UNIT. The maximum is 256 ms
- */
- *interval_in_us = (dpcd_interval.bits.VALUE + 1) * interval_unit * 1000;
-}
-
-static enum link_training_result dp_perform_128b_132b_channel_eq_done_sequence(
- struct dc_link *link,
- struct link_training_settings *lt_settings)
-{
- uint8_t loop_count;
- uint32_t aux_rd_interval = 0;
- uint32_t wait_time = 0;
- union lane_align_status_updated dpcd_lane_status_updated = {0};
- union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = {0};
- enum link_training_result status = LINK_TRAINING_SUCCESS;
- union lane_adjust dpcd_lane_adjust[LANE_COUNT_DP_MAX] = {0};
-
- /* Transmit 128b/132b_TPS1 over Main-Link */
- dp_set_hw_training_pattern(link, lt_settings->pattern_for_cr, DPRX);
- /* Set TRAINING_PATTERN_SET to 01h */
- dpcd_set_training_pattern(link, lt_settings->pattern_for_cr);
-
- /* Adjust TX_FFE_PRESET_VALUE and Transmit 128b/132b_TPS2 over Main-Link */
- dpcd_128b_132b_get_aux_rd_interval(link, &aux_rd_interval);
- dp_get_lane_status_and_lane_adjust(link, lt_settings, dpcd_lane_status,
- &dpcd_lane_status_updated, dpcd_lane_adjust, DPRX);
- dp_decide_lane_settings(lt_settings, dpcd_lane_adjust,
- lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings);
- dp_set_hw_lane_settings(link, lt_settings, DPRX);
- dp_set_hw_training_pattern(link, lt_settings->pattern_for_eq, DPRX);
-
- /* Set loop counter to start from 1 */
- loop_count = 1;
-
- /* Set TRAINING_PATTERN_SET to 02h and TX_FFE_PRESET_VALUE in one AUX transaction */
- dpcd_set_lt_pattern_and_lane_settings(link, lt_settings,
- lt_settings->pattern_for_eq, DPRX);
-
- /* poll for channel EQ done */
- while (status == LINK_TRAINING_SUCCESS) {
- dp_wait_for_training_aux_rd_interval(link, aux_rd_interval);
- wait_time += aux_rd_interval;
- dp_get_lane_status_and_lane_adjust(link, lt_settings, dpcd_lane_status,
- &dpcd_lane_status_updated, dpcd_lane_adjust, DPRX);
- dp_decide_lane_settings(lt_settings, dpcd_lane_adjust,
- lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings);
- dpcd_128b_132b_get_aux_rd_interval(link, &aux_rd_interval);
- if (dp_is_ch_eq_done(lt_settings->link_settings.lane_count,
- dpcd_lane_status)) {
- /* pass */
- break;
- } else if (loop_count >= lt_settings->eq_loop_count_limit) {
- status = DP_128b_132b_MAX_LOOP_COUNT_REACHED;
- } else if (dpcd_lane_status_updated.bits.LT_FAILED_128b_132b) {
- status = DP_128b_132b_LT_FAILED;
- } else {
- dp_set_hw_lane_settings(link, lt_settings, DPRX);
- dpcd_set_lane_settings(link, lt_settings, DPRX);
- }
- loop_count++;
- }
-
- /* poll for EQ interlane align done */
- while (status == LINK_TRAINING_SUCCESS) {
- if (dpcd_lane_status_updated.bits.EQ_INTERLANE_ALIGN_DONE_128b_132b) {
- /* pass */
- break;
- } else if (wait_time >= lt_settings->eq_wait_time_limit) {
- status = DP_128b_132b_CHANNEL_EQ_DONE_TIMEOUT;
- } else if (dpcd_lane_status_updated.bits.LT_FAILED_128b_132b) {
- status = DP_128b_132b_LT_FAILED;
- } else {
- dp_wait_for_training_aux_rd_interval(link,
- lt_settings->eq_pattern_time);
- wait_time += lt_settings->eq_pattern_time;
- dp_get_lane_status_and_lane_adjust(link, lt_settings, dpcd_lane_status,
- &dpcd_lane_status_updated, dpcd_lane_adjust, DPRX);
- }
- }
-
- return status;
-}
-
-static enum link_training_result dp_perform_128b_132b_cds_done_sequence(
- struct dc_link *link,
- struct link_training_settings *lt_settings)
-{
- /* Assumption: assume hardware has transmitted eq pattern */
- enum link_training_result status = LINK_TRAINING_SUCCESS;
- union lane_align_status_updated dpcd_lane_status_updated = {0};
- union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = {0};
- union lane_adjust dpcd_lane_adjust[LANE_COUNT_DP_MAX] = { { {0} } };
- uint32_t wait_time = 0;
-
- /* initiate CDS done sequence */
- dpcd_set_training_pattern(link, lt_settings->pattern_for_cds);
-
- /* poll for CDS interlane align done and symbol lock */
- while (status == LINK_TRAINING_SUCCESS) {
- dp_wait_for_training_aux_rd_interval(link,
- lt_settings->cds_pattern_time);
- wait_time += lt_settings->cds_pattern_time;
- dp_get_lane_status_and_lane_adjust(link, lt_settings, dpcd_lane_status,
- &dpcd_lane_status_updated, dpcd_lane_adjust, DPRX);
- if (dp_is_symbol_locked(lt_settings->link_settings.lane_count, dpcd_lane_status) &&
- dpcd_lane_status_updated.bits.CDS_INTERLANE_ALIGN_DONE_128b_132b) {
- /* pass */
- break;
- } else if (dpcd_lane_status_updated.bits.LT_FAILED_128b_132b) {
- status = DP_128b_132b_LT_FAILED;
- } else if (wait_time >= lt_settings->cds_wait_time_limit) {
- status = DP_128b_132b_CDS_DONE_TIMEOUT;
- }
- }
-
- return status;
-}
-#endif
-
-static enum link_training_result dp_perform_8b_10b_link_training(
- struct dc_link *link,
- struct link_training_settings *lt_settings)
-{
- enum link_training_result status = LINK_TRAINING_SUCCESS;
-
- uint8_t repeater_cnt;
- uint8_t repeater_id;
- uint8_t lane = 0;
-
- if (link->ctx->dc->work_arounds.lt_early_cr_pattern)
- start_clock_recovery_pattern_early(link, lt_settings, DPRX);
-
- /* 1. set link rate, lane count and spread. */
- dpcd_set_link_settings(link, lt_settings);
-
- if (link->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) {
-
- /* 2. perform link training (set link training done
- * to false is done as well)
- */
- repeater_cnt = dp_convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
-
- for (repeater_id = repeater_cnt; (repeater_id > 0 && status == LINK_TRAINING_SUCCESS);
- repeater_id--) {
- status = perform_clock_recovery_sequence(link, lt_settings, repeater_id);
-
- if (status != LINK_TRAINING_SUCCESS)
- break;
-
- status = perform_channel_equalization_sequence(link,
- lt_settings,
- repeater_id);
-
- if (status != LINK_TRAINING_SUCCESS)
- break;
-
- repeater_training_done(link, repeater_id);
- }
-
- for (lane = 0; lane < (uint8_t)lt_settings->link_settings.lane_count; lane++)
- lt_settings->dpcd_lane_settings[lane].raw = 0;
- }
-
- if (status == LINK_TRAINING_SUCCESS) {
- status = perform_clock_recovery_sequence(link, lt_settings, DPRX);
- if (status == LINK_TRAINING_SUCCESS) {
- status = perform_channel_equalization_sequence(link,
- lt_settings,
- DPRX);
- }
- }
-
- return status;
-}
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-static enum link_training_result dp_perform_128b_132b_link_training(
- struct dc_link *link,
- struct link_training_settings *lt_settings)
-{
- enum link_training_result result = LINK_TRAINING_SUCCESS;
-
- /* TODO - DP2.0 Link: remove legacy_dp2_lt logic */
- if (link->dc->debug.legacy_dp2_lt) {
- struct link_training_settings legacy_settings;
-
- decide_8b_10b_training_settings(link,
- &lt_settings->link_settings,
- &legacy_settings);
- return dp_perform_8b_10b_link_training(link, &legacy_settings);
- }
-
- dpcd_set_link_settings(link, lt_settings);
-
- if (result == LINK_TRAINING_SUCCESS)
- result = dp_perform_128b_132b_channel_eq_done_sequence(link, lt_settings);
-
- if (result == LINK_TRAINING_SUCCESS)
- result = dp_perform_128b_132b_cds_done_sequence(link, lt_settings);
-
- return result;
-}
-#endif
-
-enum link_training_result dc_link_dp_perform_link_training(
- struct dc_link *link,
- const struct dc_link_settings *link_settings,
- bool skip_video_pattern)
-{
- enum link_training_result status = LINK_TRAINING_SUCCESS;
- struct link_training_settings lt_settings = {0};
- enum dp_link_encoding encoding =
- dp_get_link_encoding_format(link_settings);
-
- /* decide training settings */
- dp_decide_training_settings(
- link,
- link_settings,
- &lt_settings);
- override_training_settings(
- link,
- &link->preferred_training_settings,
- &lt_settings);
-
- /* reset previous training states */
- dpcd_exit_training_mode(link);
-
- /* configure link prior to entering training mode */
- dpcd_configure_lttpr_mode(link, &lt_settings);
- dp_set_fec_ready(link, lt_settings.should_set_fec_ready);
- dpcd_configure_channel_coding(link, &lt_settings);
-
- /* enter training mode:
- * Per DP specs starting from here, DPTX device shall not issue
- * Non-LT AUX transactions inside training mode.
- */
- if (encoding == DP_8b_10b_ENCODING)
- status = dp_perform_8b_10b_link_training(link, &lt_settings);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- else if (encoding == DP_128b_132b_ENCODING)
- status = dp_perform_128b_132b_link_training(link, &lt_settings);
-#endif
- else
- ASSERT(0);
-
- /* exit training mode and switch to video idle */
- dpcd_exit_training_mode(link);
- if ((status == LINK_TRAINING_SUCCESS) || !skip_video_pattern)
- status = dp_transition_to_video_idle(link,
- &lt_settings,
- status);
-
- /* dump debug data */
- print_status_message(link, &lt_settings, status);
- if (status != LINK_TRAINING_SUCCESS)
- link->ctx->dc->debug_data.ltFailCount++;
- return status;
-}
-
-bool perform_link_training_with_retries(
- const struct dc_link_settings *link_setting,
- bool skip_video_pattern,
- int attempts,
- struct pipe_ctx *pipe_ctx,
- enum signal_type signal,
- bool do_fallback)
-{
- int j;
- uint8_t delay_between_attempts = LINK_TRAINING_RETRY_DELAY;
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct dc_link *link = stream->link;
- enum dp_panel_mode panel_mode = dp_get_panel_mode(link);
- struct link_encoder *link_enc;
- enum link_training_result status = LINK_TRAINING_CR_FAIL_LANE0;
- struct dc_link_settings current_setting = *link_setting;
-
- /* Dynamically assigned link encoders associated with stream rather than
- * link.
- */
- if (link->is_dig_mapping_flexible && link->dc->res_pool->funcs->link_encs_assign)
- link_enc = link_enc_cfg_get_link_enc_used_by_stream(link->ctx->dc, pipe_ctx->stream);
- else
- link_enc = link->link_enc;
-
- /* We need to do this before the link training to ensure the idle pattern in SST
- * mode will be sent right after the link training
- */
- if (dp_get_link_encoding_format(&current_setting) == DP_8b_10b_ENCODING) {
- link_enc->funcs->connect_dig_be_to_fe(link_enc,
- pipe_ctx->stream_res.stream_enc->id, true);
- dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_CONNECT_DIG_FE_BE);
- }
-
- for (j = 0; j < attempts; ++j) {
-
- DC_LOG_HW_LINK_TRAINING("%s: Beginning link training attempt %u of %d\n",
- __func__, (unsigned int)j + 1, attempts);
-
- dp_enable_link_phy(
- link,
- signal,
- pipe_ctx->clock_source->id,
- &current_setting);
-
- if (stream->sink_patches.dppowerup_delay > 0) {
- int delay_dp_power_up_in_ms = stream->sink_patches.dppowerup_delay;
-
- msleep(delay_dp_power_up_in_ms);
- }
-
-#ifdef CONFIG_DRM_AMD_DC_HDCP
- if (panel_mode == DP_PANEL_MODE_EDP) {
- struct cp_psp *cp_psp = &stream->ctx->cp_psp;
-
- if (cp_psp && cp_psp->funcs.enable_assr)
- /* ASSR is bound to fail with unsigned PSP
- * verstage used during devlopment phase.
- * Report and continue with eDP panel mode to
- * perform eDP link training with right settings
- */
- cp_psp->funcs.enable_assr(cp_psp->handle, link);
- }
-#endif
-
- dp_set_panel_mode(link, panel_mode);
-
- if (link->aux_access_disabled) {
- dc_link_dp_perform_link_training_skip_aux(link, &current_setting);
- return true;
- } else {
- /** @todo Consolidate USB4 DP and DPx.x training. */
- if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
- status = dc_link_dpia_perform_link_training(link,
- &current_setting,
- skip_video_pattern);
-
- /* Transmit idle pattern once training successful. */
- if (status == LINK_TRAINING_SUCCESS)
- dp_set_hw_test_pattern(link, DP_TEST_PATTERN_VIDEO_MODE,
- NULL, 0);
- } else {
- status = dc_link_dp_perform_link_training(link,
- &current_setting,
- skip_video_pattern);
- }
-
- if (status == LINK_TRAINING_SUCCESS)
- return true;
- }
-
- /* latest link training still fail, skip delay and keep PHY on
- */
- if (j == (attempts - 1) && link->ep_type == DISPLAY_ENDPOINT_PHY)
- break;
-
- DC_LOG_WARNING("%s: Link training attempt %u of %d failed\n",
- __func__, (unsigned int)j + 1, attempts);
-
- dp_disable_link_phy(link, signal);
-
- /* Abort link training if failure due to sink being unplugged. */
- if (status == LINK_TRAINING_ABORT) {
- enum dc_connection_type type = dc_connection_none;
-
- dc_link_detect_sink(link, &type);
- if (type == dc_connection_none)
- break;
- } else if (do_fallback) {
- uint32_t req_bw;
- uint32_t link_bw;
-
- decide_fallback_link_setting(*link_setting, &current_setting, status);
- /* Fail link training if reduced link bandwidth no longer meets
- * stream requirements.
- */
- req_bw = dc_bandwidth_in_kbps_from_timing(&stream->timing);
- link_bw = dc_link_bandwidth_kbps(link, &current_setting);
- if (req_bw > link_bw)
- break;
- }
-
- msleep(delay_between_attempts);
-
- delay_between_attempts += LINK_TRAINING_RETRY_DELAY;
- }
-
- return false;
-}
-
-static enum clock_source_id get_clock_source_id(struct dc_link *link)
-{
- enum clock_source_id dp_cs_id = CLOCK_SOURCE_ID_UNDEFINED;
- struct clock_source *dp_cs = link->dc->res_pool->dp_clock_source;
-
- if (dp_cs != NULL) {
- dp_cs_id = dp_cs->id;
- } else {
- /*
- * dp clock source is not initialized for some reason.
- * Should not happen, CLOCK_SOURCE_ID_EXTERNAL will be used
- */
- ASSERT(dp_cs);
- }
-
- return dp_cs_id;
-}
-
-static void set_dp_mst_mode(struct dc_link *link, bool mst_enable)
-{
- if (mst_enable == false &&
- link->type == dc_connection_mst_branch) {
- /* Disable MST on link. Use only local sink. */
- dp_disable_link_phy_mst(link, link->connector_signal);
-
- link->type = dc_connection_single;
- link->local_sink = link->remote_sinks[0];
- link->local_sink->sink_signal = SIGNAL_TYPE_DISPLAY_PORT;
- dc_sink_retain(link->local_sink);
- dm_helpers_dp_mst_stop_top_mgr(link->ctx, link);
- } else if (mst_enable == true &&
- link->type == dc_connection_single &&
- link->remote_sinks[0] != NULL) {
- /* Re-enable MST on link. */
- dp_disable_link_phy(link, link->connector_signal);
- dp_enable_mst_on_sink(link, true);
-
- link->type = dc_connection_mst_branch;
- link->local_sink->sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST;
- }
-}
-
-bool dc_link_dp_sync_lt_begin(struct dc_link *link)
-{
- /* Begin Sync LT. During this time,
- * DPCD:600h must not be powered down.
- */
- link->sync_lt_in_progress = true;
-
- /*Clear any existing preferred settings.*/
- memset(&link->preferred_training_settings, 0,
- sizeof(struct dc_link_training_overrides));
- memset(&link->preferred_link_setting, 0,
- sizeof(struct dc_link_settings));
-
- return true;
-}
-
-enum link_training_result dc_link_dp_sync_lt_attempt(
- struct dc_link *link,
- struct dc_link_settings *link_settings,
- struct dc_link_training_overrides *lt_overrides)
-{
- struct link_training_settings lt_settings = {0};
- enum link_training_result lt_status = LINK_TRAINING_SUCCESS;
- enum dp_panel_mode panel_mode = DP_PANEL_MODE_DEFAULT;
- enum clock_source_id dp_cs_id = CLOCK_SOURCE_ID_EXTERNAL;
- bool fec_enable = false;
-
- dp_decide_training_settings(
- link,
- link_settings,
- &lt_settings);
- override_training_settings(
- link,
- lt_overrides,
- &lt_settings);
- /* Setup MST Mode */
- if (lt_overrides->mst_enable)
- set_dp_mst_mode(link, *lt_overrides->mst_enable);
-
- /* Disable link */
- dp_disable_link_phy(link, link->connector_signal);
-
- /* Enable link */
- dp_cs_id = get_clock_source_id(link);
- dp_enable_link_phy(link, link->connector_signal,
- dp_cs_id, link_settings);
-
- /* Set FEC enable */
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (dp_get_link_encoding_format(link_settings) == DP_8b_10b_ENCODING) {
-#endif
- fec_enable = lt_overrides->fec_enable && *lt_overrides->fec_enable;
- dp_set_fec_ready(link, fec_enable);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- }
-#endif
-
- if (lt_overrides->alternate_scrambler_reset) {
- if (*lt_overrides->alternate_scrambler_reset)
- panel_mode = DP_PANEL_MODE_EDP;
- else
- panel_mode = DP_PANEL_MODE_DEFAULT;
- } else
- panel_mode = dp_get_panel_mode(link);
-
- dp_set_panel_mode(link, panel_mode);
-
- /* Attempt to train with given link training settings */
- if (link->ctx->dc->work_arounds.lt_early_cr_pattern)
- start_clock_recovery_pattern_early(link, &lt_settings, DPRX);
-
- /* Set link rate, lane count and spread. */
- dpcd_set_link_settings(link, &lt_settings);
-
- /* 2. perform link training (set link training done
- * to false is done as well)
- */
- lt_status = perform_clock_recovery_sequence(link, &lt_settings, DPRX);
- if (lt_status == LINK_TRAINING_SUCCESS) {
- lt_status = perform_channel_equalization_sequence(link,
- &lt_settings,
- DPRX);
- }
-
- /* 3. Sync LT must skip TRAINING_PATTERN_SET:0 (video pattern)*/
- /* 4. print status message*/
- print_status_message(link, &lt_settings, lt_status);
-
- return lt_status;
-}
-
-bool dc_link_dp_sync_lt_end(struct dc_link *link, bool link_down)
-{
- /* If input parameter is set, shut down phy.
- * Still shouldn't turn off dp_receiver (DPCD:600h)
- */
- if (link_down == true) {
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- struct dc_link_settings link_settings = link->cur_link_settings;
-#endif
- dp_disable_link_phy(link, link->connector_signal);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (dp_get_link_encoding_format(&link_settings) == DP_8b_10b_ENCODING)
-#endif
- dp_set_fec_ready(link, false);
- }
-
- link->sync_lt_in_progress = false;
- return true;
-}
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-static enum dc_link_rate get_lttpr_max_link_rate(struct dc_link *link)
-{
- enum dc_link_rate lttpr_max_link_rate = link->dpcd_caps.lttpr_caps.max_link_rate;
-
- if (link->dpcd_caps.lttpr_caps.supported_128b_132b_rates.bits.UHBR20)
- lttpr_max_link_rate = LINK_RATE_UHBR20;
- else if (link->dpcd_caps.lttpr_caps.supported_128b_132b_rates.bits.UHBR13_5)
- lttpr_max_link_rate = LINK_RATE_UHBR13_5;
- else if (link->dpcd_caps.lttpr_caps.supported_128b_132b_rates.bits.UHBR10)
- lttpr_max_link_rate = LINK_RATE_UHBR10;
-
- return lttpr_max_link_rate;
-}
-#endif
-
-bool dc_link_dp_get_max_link_enc_cap(const struct dc_link *link, struct dc_link_settings *max_link_enc_cap)
-{
- struct link_encoder *link_enc = NULL;
-
- if (!max_link_enc_cap) {
- DC_LOG_ERROR("%s: Could not return max link encoder caps", __func__);
- return false;
- }
-
- /* Links supporting dynamically assigned link encoder will be assigned next
- * available encoder if one not already assigned.
- */
- if (link->is_dig_mapping_flexible &&
- link->dc->res_pool->funcs->link_encs_assign) {
- link_enc = link_enc_cfg_get_link_enc_used_by_link(link->ctx->dc, link);
- if (link_enc == NULL)
- link_enc = link_enc_cfg_get_next_avail_link_enc(link->ctx->dc);
- } else
- link_enc = link->link_enc;
- ASSERT(link_enc);
-
- if (link_enc && link_enc->funcs->get_max_link_cap) {
- link_enc->funcs->get_max_link_cap(link_enc, max_link_enc_cap);
- return true;
- }
-
- DC_LOG_ERROR("%s: Max link encoder caps unknown", __func__);
- max_link_enc_cap->lane_count = 1;
- max_link_enc_cap->link_rate = 6;
- return false;
-}
-
-static struct dc_link_settings get_max_link_cap(struct dc_link *link)
-{
- struct dc_link_settings max_link_cap = {0};
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- enum dc_link_rate lttpr_max_link_rate;
-#endif
- struct link_encoder *link_enc = NULL;
-
- /* Links supporting dynamically assigned link encoder will be assigned next
- * available encoder if one not already assigned.
- */
- if (link->is_dig_mapping_flexible &&
- link->dc->res_pool->funcs->link_encs_assign) {
- link_enc = link_enc_cfg_get_link_enc_used_by_link(link->ctx->dc, link);
- if (link_enc == NULL)
- link_enc = link_enc_cfg_get_next_avail_link_enc(link->ctx->dc);
- } else
- link_enc = link->link_enc;
- ASSERT(link_enc);
-
- /* get max link encoder capability */
- if (link_enc)
- link_enc->funcs->get_max_link_cap(link_enc, &max_link_cap);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (max_link_cap.link_rate >= LINK_RATE_UHBR10 &&
- !link->hpo_dp_link_enc)
- max_link_cap.link_rate = LINK_RATE_HIGH3;
-#endif
-
- /* Lower link settings based on sink's link cap */
- if (link->reported_link_cap.lane_count < max_link_cap.lane_count)
- max_link_cap.lane_count =
- link->reported_link_cap.lane_count;
- if (link->reported_link_cap.link_rate < max_link_cap.link_rate)
- max_link_cap.link_rate =
- link->reported_link_cap.link_rate;
- if (link->reported_link_cap.link_spread <
- max_link_cap.link_spread)
- max_link_cap.link_spread =
- link->reported_link_cap.link_spread;
- /*
- * account for lttpr repeaters cap
- * notes: repeaters do not snoop in the DPRX Capabilities addresses (3.6.3).
- */
- if (link->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) {
- if (link->dpcd_caps.lttpr_caps.max_lane_count < max_link_cap.lane_count)
- max_link_cap.lane_count = link->dpcd_caps.lttpr_caps.max_lane_count;
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- lttpr_max_link_rate = get_lttpr_max_link_rate(link);
-
- if (lttpr_max_link_rate < max_link_cap.link_rate)
- max_link_cap.link_rate = lttpr_max_link_rate;
-#else
- if (link->dpcd_caps.lttpr_caps.max_link_rate < max_link_cap.link_rate)
- max_link_cap.link_rate = link->dpcd_caps.lttpr_caps.max_link_rate;
-#endif
-
- DC_LOG_HW_LINK_TRAINING("%s\n Training with LTTPR, max_lane count %d max_link rate %d \n",
- __func__,
- max_link_cap.lane_count,
- max_link_cap.link_rate);
- }
- return max_link_cap;
-}
-
-static enum dc_status read_hpd_rx_irq_data(
- struct dc_link *link,
- union hpd_irq_data *irq_data)
-{
- static enum dc_status retval;
-
- /* The HW reads 16 bytes from 200h on HPD,
- * but if we get an AUX_DEFER, the HW cannot retry
- * and this causes the CTS tests 4.3.2.1 - 3.2.4 to
- * fail, so we now explicitly read 6 bytes which is
- * the req from the above mentioned test cases.
- *
- * For DP 1.4 we need to read those from 2002h range.
- */
- if (link->dpcd_caps.dpcd_rev.raw < DPCD_REV_14)
- retval = core_link_read_dpcd(
- link,
- DP_SINK_COUNT,
- irq_data->raw,
- sizeof(union hpd_irq_data));
- else {
- /* Read 14 bytes in a single read and then copy only the required fields.
- * This is more efficient than doing it in two separate AUX reads. */
-
- uint8_t tmp[DP_SINK_STATUS_ESI - DP_SINK_COUNT_ESI + 1];
-
- retval = core_link_read_dpcd(
- link,
- DP_SINK_COUNT_ESI,
- tmp,
- sizeof(tmp));
-
- if (retval != DC_OK)
- return retval;
-
- irq_data->bytes.sink_cnt.raw = tmp[DP_SINK_COUNT_ESI - DP_SINK_COUNT_ESI];
- irq_data->bytes.device_service_irq.raw = tmp[DP_DEVICE_SERVICE_IRQ_VECTOR_ESI0 - DP_SINK_COUNT_ESI];
- irq_data->bytes.lane01_status.raw = tmp[DP_LANE0_1_STATUS_ESI - DP_SINK_COUNT_ESI];
- irq_data->bytes.lane23_status.raw = tmp[DP_LANE2_3_STATUS_ESI - DP_SINK_COUNT_ESI];
- irq_data->bytes.lane_status_updated.raw = tmp[DP_LANE_ALIGN_STATUS_UPDATED_ESI - DP_SINK_COUNT_ESI];
- irq_data->bytes.sink_status.raw = tmp[DP_SINK_STATUS_ESI - DP_SINK_COUNT_ESI];
- }
-
- return retval;
-}
-
-bool hpd_rx_irq_check_link_loss_status(
- struct dc_link *link,
- union hpd_irq_data *hpd_irq_dpcd_data)
-{
- uint8_t irq_reg_rx_power_state = 0;
- enum dc_status dpcd_result = DC_ERROR_UNEXPECTED;
- union lane_status lane_status;
- uint32_t lane;
- bool sink_status_changed;
- bool return_code;
-
- sink_status_changed = false;
- return_code = false;
-
- if (link->cur_link_settings.lane_count == 0)
- return return_code;
-
- /*1. Check that Link Status changed, before re-training.*/
-
- /*parse lane status*/
- for (lane = 0; lane < link->cur_link_settings.lane_count; lane++) {
- /* check status of lanes 0,1
- * changed DpcdAddress_Lane01Status (0x202)
- */
- lane_status.raw = get_nibble_at_index(
- &hpd_irq_dpcd_data->bytes.lane01_status.raw,
- lane);
-
- if (!lane_status.bits.CHANNEL_EQ_DONE_0 ||
- !lane_status.bits.CR_DONE_0 ||
- !lane_status.bits.SYMBOL_LOCKED_0) {
- /* if one of the channel equalization, clock
- * recovery or symbol lock is dropped
- * consider it as (link has been
- * dropped) dp sink status has changed
- */
- sink_status_changed = true;
- break;
- }
- }
-
- /* Check interlane align.*/
- if (sink_status_changed ||
- !hpd_irq_dpcd_data->bytes.lane_status_updated.bits.INTERLANE_ALIGN_DONE) {
-
- DC_LOG_HW_HPD_IRQ("%s: Link Status changed.\n", __func__);
-
- return_code = true;
-
- /*2. Check that we can handle interrupt: Not in FS DOS,
- * Not in "Display Timeout" state, Link is trained.
- */
- dpcd_result = core_link_read_dpcd(link,
- DP_SET_POWER,
- &irq_reg_rx_power_state,
- sizeof(irq_reg_rx_power_state));
-
- if (dpcd_result != DC_OK) {
- DC_LOG_HW_HPD_IRQ("%s: DPCD read failed to obtain power state.\n",
- __func__);
- } else {
- if (irq_reg_rx_power_state != DP_SET_POWER_D0)
- return_code = false;
- }
- }
-
- return return_code;
-}
-
-bool dp_verify_link_cap(
- struct dc_link *link,
- struct dc_link_settings *known_limit_link_setting,
- int *fail_count)
-{
- struct dc_link_settings max_link_cap = {0};
- struct dc_link_settings cur_link_setting = {0};
- struct dc_link_settings *cur = &cur_link_setting;
- struct dc_link_settings initial_link_settings = {0};
- bool success;
- bool skip_link_training;
- bool skip_video_pattern;
- enum clock_source_id dp_cs_id = CLOCK_SOURCE_ID_EXTERNAL;
- enum link_training_result status;
- union hpd_irq_data irq_data;
-
- /* link training starts with the maximum common settings
- * supported by both sink and ASIC.
- */
- max_link_cap = get_max_link_cap(link);
- initial_link_settings = get_common_supported_link_settings(
- *known_limit_link_setting,
- max_link_cap);
-
- /* Accept reported capabilities if link supports flexible encoder mapping or encoder already in use. */
- if (link->dc->debug.skip_detection_link_training ||
- link->is_dig_mapping_flexible) {
- /* TODO - should we check link encoder's max link caps here?
- * How do we know which link encoder to check from?
- */
- link->verified_link_cap = *known_limit_link_setting;
- return true;
- } else if (link->link_enc && link->dc->res_pool->funcs->link_encs_assign &&
- !link_enc_cfg_is_link_enc_avail(link->ctx->dc, link->link_enc->preferred_engine, link)) {
- link->verified_link_cap = initial_link_settings;
- return true;
- }
-
- memset(&irq_data, 0, sizeof(irq_data));
- success = false;
- skip_link_training = false;
-
- /* Grant extended timeout request */
- if ((link->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) && (link->dpcd_caps.lttpr_caps.max_ext_timeout > 0)) {
- uint8_t grant = link->dpcd_caps.lttpr_caps.max_ext_timeout & 0x80;
-
- core_link_write_dpcd(link, DP_PHY_REPEATER_EXTENDED_WAIT_TIMEOUT, &grant, sizeof(grant));
- }
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (dp_get_link_encoding_format(&link->cur_link_settings) == DP_128b_132b_ENCODING)
- reset_dp_hpo_stream_encoders_for_link(link);
-#endif
- /* TODO implement override and monitor patch later */
-
- /* try to train the link from high to low to
- * find the physical link capability
- */
- /* disable PHY done possible by BIOS, will be done by driver itself */
- dp_disable_link_phy(link, link->connector_signal);
-
- dp_cs_id = get_clock_source_id(link);
-
- cur_link_setting = initial_link_settings;
-
- /* Temporary Renoir-specific workaround for SWDEV-215184;
- * PHY will sometimes be in bad state on hotplugging display from certain USB-C dongle,
- * so add extra cycle of enabling and disabling the PHY before first link training.
- */
- if (link->link_enc && link->link_enc->features.flags.bits.DP_IS_USB_C &&
- link->dc->debug.usbc_combo_phy_reset_wa) {
- dp_enable_link_phy(link, link->connector_signal, dp_cs_id, cur);
- dp_disable_link_phy(link, link->connector_signal);
- }
-
- do {
- skip_video_pattern = true;
-
- if (cur->link_rate == LINK_RATE_LOW)
- skip_video_pattern = false;
-
- dp_enable_link_phy(
- link,
- link->connector_signal,
- dp_cs_id,
- cur);
-
-
- if (skip_link_training)
- success = true;
- else {
- status = dc_link_dp_perform_link_training(
- link,
- cur,
- skip_video_pattern);
- if (status == LINK_TRAINING_SUCCESS)
- success = true;
- else
- (*fail_count)++;
- }
-
- if (success) {
- link->verified_link_cap = *cur;
- udelay(1000);
- if (read_hpd_rx_irq_data(link, &irq_data) == DC_OK)
- if (hpd_rx_irq_check_link_loss_status(
- link,
- &irq_data))
- (*fail_count)++;
- }
- /* always disable the link before trying another
- * setting or before returning we'll enable it later
- * based on the actual mode we're driving
- */
- dp_disable_link_phy(link, link->connector_signal);
- } while (!success && decide_fallback_link_setting(
- initial_link_settings, cur, status));
-
- /* Link Training failed for all Link Settings
- * (Lane Count is still unknown)
- */
- if (!success) {
- /* If all LT fails for all settings,
- * set verified = failed safe (1 lane low)
- */
- link->verified_link_cap.lane_count = LANE_COUNT_ONE;
- link->verified_link_cap.link_rate = LINK_RATE_LOW;
-
- link->verified_link_cap.link_spread =
- LINK_SPREAD_DISABLED;
- }
-
-
- return success;
-}
-
-bool dp_verify_link_cap_with_retries(
- struct dc_link *link,
- struct dc_link_settings *known_limit_link_setting,
- int attempts)
-{
- int i = 0;
- bool success = false;
-
- for (i = 0; i < attempts; i++) {
- int fail_count = 0;
- enum dc_connection_type type = dc_connection_none;
-
- memset(&link->verified_link_cap, 0,
- sizeof(struct dc_link_settings));
- if (!dc_link_detect_sink(link, &type) || type == dc_connection_none) {
- link->verified_link_cap.lane_count = LANE_COUNT_ONE;
- link->verified_link_cap.link_rate = LINK_RATE_LOW;
- link->verified_link_cap.link_spread = LINK_SPREAD_DISABLED;
- break;
- } else if (dp_verify_link_cap(link,
- known_limit_link_setting,
- &fail_count) && fail_count == 0) {
- success = true;
- break;
- }
- msleep(10);
- }
- return success;
-}
-
-bool dp_verify_mst_link_cap(
- struct dc_link *link)
-{
- struct dc_link_settings max_link_cap = {0};
-
- if (dp_get_link_encoding_format(&link->reported_link_cap) ==
- DP_8b_10b_ENCODING) {
- max_link_cap = get_max_link_cap(link);
- link->verified_link_cap = get_common_supported_link_settings(
- link->reported_link_cap,
- max_link_cap);
- }
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- else if (dp_get_link_encoding_format(&link->reported_link_cap) ==
- DP_128b_132b_ENCODING) {
- dp_verify_link_cap_with_retries(link,
- &link->reported_link_cap,
- LINK_TRAINING_MAX_VERIFY_RETRY);
- }
-#endif
- return true;
-}
-
-static struct dc_link_settings get_common_supported_link_settings(
- struct dc_link_settings link_setting_a,
- struct dc_link_settings link_setting_b)
-{
- struct dc_link_settings link_settings = {0};
-
- link_settings.lane_count =
- (link_setting_a.lane_count <=
- link_setting_b.lane_count) ?
- link_setting_a.lane_count :
- link_setting_b.lane_count;
- link_settings.link_rate =
- (link_setting_a.link_rate <=
- link_setting_b.link_rate) ?
- link_setting_a.link_rate :
- link_setting_b.link_rate;
- link_settings.link_spread = LINK_SPREAD_DISABLED;
-
- /* in DP compliance test, DPR-120 may have
- * a random value in its MAX_LINK_BW dpcd field.
- * We map it to the maximum supported link rate that
- * is smaller than MAX_LINK_BW in this case.
- */
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (link_settings.link_rate > LINK_RATE_UHBR20) {
- link_settings.link_rate = LINK_RATE_UHBR20;
- } else if (link_settings.link_rate < LINK_RATE_UHBR20 &&
- link_settings.link_rate > LINK_RATE_UHBR13_5) {
- link_settings.link_rate = LINK_RATE_UHBR13_5;
- } else if (link_settings.link_rate < LINK_RATE_UHBR10 &&
- link_settings.link_rate > LINK_RATE_HIGH3) {
-#else
- if (link_settings.link_rate > LINK_RATE_HIGH3) {
-#endif
- link_settings.link_rate = LINK_RATE_HIGH3;
- } else if (link_settings.link_rate < LINK_RATE_HIGH3
- && link_settings.link_rate > LINK_RATE_HIGH2) {
- link_settings.link_rate = LINK_RATE_HIGH2;
- } else if (link_settings.link_rate < LINK_RATE_HIGH2
- && link_settings.link_rate > LINK_RATE_HIGH) {
- link_settings.link_rate = LINK_RATE_HIGH;
- } else if (link_settings.link_rate < LINK_RATE_HIGH
- && link_settings.link_rate > LINK_RATE_LOW) {
- link_settings.link_rate = LINK_RATE_LOW;
- } else if (link_settings.link_rate < LINK_RATE_LOW) {
- link_settings.link_rate = LINK_RATE_UNKNOWN;
- }
-
- return link_settings;
-}
-
-static inline bool reached_minimum_lane_count(enum dc_lane_count lane_count)
-{
- return lane_count <= LANE_COUNT_ONE;
-}
-
-static inline bool reached_minimum_link_rate(enum dc_link_rate link_rate)
-{
- return link_rate <= LINK_RATE_LOW;
-}
-
-static enum dc_lane_count reduce_lane_count(enum dc_lane_count lane_count)
-{
- switch (lane_count) {
- case LANE_COUNT_FOUR:
- return LANE_COUNT_TWO;
- case LANE_COUNT_TWO:
- return LANE_COUNT_ONE;
- case LANE_COUNT_ONE:
- return LANE_COUNT_UNKNOWN;
- default:
- return LANE_COUNT_UNKNOWN;
- }
-}
-
-static enum dc_link_rate reduce_link_rate(enum dc_link_rate link_rate)
-{
- switch (link_rate) {
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- case LINK_RATE_UHBR20:
- return LINK_RATE_UHBR13_5;
- case LINK_RATE_UHBR13_5:
- return LINK_RATE_UHBR10;
- case LINK_RATE_UHBR10:
- return LINK_RATE_HIGH3;
-#endif
- case LINK_RATE_HIGH3:
- return LINK_RATE_HIGH2;
- case LINK_RATE_HIGH2:
- return LINK_RATE_HIGH;
- case LINK_RATE_HIGH:
- return LINK_RATE_LOW;
- case LINK_RATE_LOW:
- return LINK_RATE_UNKNOWN;
- default:
- return LINK_RATE_UNKNOWN;
- }
-}
-
-static enum dc_lane_count increase_lane_count(enum dc_lane_count lane_count)
-{
- switch (lane_count) {
- case LANE_COUNT_ONE:
- return LANE_COUNT_TWO;
- case LANE_COUNT_TWO:
- return LANE_COUNT_FOUR;
- default:
- return LANE_COUNT_UNKNOWN;
- }
-}
-
-static enum dc_link_rate increase_link_rate(enum dc_link_rate link_rate)
-{
- switch (link_rate) {
- case LINK_RATE_LOW:
- return LINK_RATE_HIGH;
- case LINK_RATE_HIGH:
- return LINK_RATE_HIGH2;
- case LINK_RATE_HIGH2:
- return LINK_RATE_HIGH3;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- case LINK_RATE_HIGH3:
- return LINK_RATE_UHBR10;
- case LINK_RATE_UHBR10:
- return LINK_RATE_UHBR13_5;
- case LINK_RATE_UHBR13_5:
- return LINK_RATE_UHBR20;
-#endif
- default:
- return LINK_RATE_UNKNOWN;
- }
-}
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-static bool decide_fallback_link_setting_max_bw_policy(
- const struct dc_link_settings *max,
- struct dc_link_settings *cur)
-{
- uint8_t cur_idx = 0, next_idx;
- bool found = false;
-
- while (cur_idx < ARRAY_SIZE(dp_lt_fallbacks))
- /* find current index */
- if (dp_lt_fallbacks[cur_idx].lane_count == cur->lane_count &&
- dp_lt_fallbacks[cur_idx].link_rate == cur->link_rate)
- break;
- else
- cur_idx++;
-
- next_idx = cur_idx + 1;
-
- while (next_idx < ARRAY_SIZE(dp_lt_fallbacks))
- /* find next index */
- if (dp_lt_fallbacks[next_idx].lane_count <= max->lane_count &&
- dp_lt_fallbacks[next_idx].link_rate <= max->link_rate)
- break;
- else
- next_idx++;
-
- if (next_idx < ARRAY_SIZE(dp_lt_fallbacks)) {
- cur->lane_count = dp_lt_fallbacks[next_idx].lane_count;
- cur->link_rate = dp_lt_fallbacks[next_idx].link_rate;
- found = true;
- }
-
- return found;
-}
-#endif
-
-/*
- * function: set link rate and lane count fallback based
- * on current link setting and last link training result
- * return value:
- * true - link setting could be set
- * false - has reached minimum setting
- * and no further fallback could be done
- */
-static bool decide_fallback_link_setting(
- struct dc_link_settings initial_link_settings,
- struct dc_link_settings *current_link_setting,
- enum link_training_result training_result)
-{
- if (!current_link_setting)
- return false;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (dp_get_link_encoding_format(&initial_link_settings) == DP_128b_132b_ENCODING)
- return decide_fallback_link_setting_max_bw_policy(&initial_link_settings,
- current_link_setting);
-#endif
-
- switch (training_result) {
- case LINK_TRAINING_CR_FAIL_LANE0:
- case LINK_TRAINING_CR_FAIL_LANE1:
- case LINK_TRAINING_CR_FAIL_LANE23:
- case LINK_TRAINING_LQA_FAIL:
- {
- if (!reached_minimum_link_rate
- (current_link_setting->link_rate)) {
- current_link_setting->link_rate =
- reduce_link_rate(
- current_link_setting->link_rate);
- } else if (!reached_minimum_lane_count
- (current_link_setting->lane_count)) {
- current_link_setting->link_rate =
- initial_link_settings.link_rate;
- if (training_result == LINK_TRAINING_CR_FAIL_LANE0)
- return false;
- else if (training_result == LINK_TRAINING_CR_FAIL_LANE1)
- current_link_setting->lane_count =
- LANE_COUNT_ONE;
- else if (training_result ==
- LINK_TRAINING_CR_FAIL_LANE23)
- current_link_setting->lane_count =
- LANE_COUNT_TWO;
- else
- current_link_setting->lane_count =
- reduce_lane_count(
- current_link_setting->lane_count);
- } else {
- return false;
- }
- break;
- }
- case LINK_TRAINING_EQ_FAIL_EQ:
- {
- if (!reached_minimum_lane_count
- (current_link_setting->lane_count)) {
- current_link_setting->lane_count =
- reduce_lane_count(
- current_link_setting->lane_count);
- } else if (!reached_minimum_link_rate
- (current_link_setting->link_rate)) {
- current_link_setting->link_rate =
- reduce_link_rate(
- current_link_setting->link_rate);
- } else {
- return false;
- }
- break;
- }
- case LINK_TRAINING_EQ_FAIL_CR:
- {
- if (!reached_minimum_link_rate
- (current_link_setting->link_rate)) {
- current_link_setting->link_rate =
- reduce_link_rate(
- current_link_setting->link_rate);
- } else {
- return false;
- }
- break;
- }
- default:
- return false;
- }
- return true;
-}
-
-bool dp_validate_mode_timing(
- struct dc_link *link,
- const struct dc_crtc_timing *timing)
-{
- uint32_t req_bw;
- uint32_t max_bw;
-
- const struct dc_link_settings *link_setting;
-
- /* According to spec, VSC SDP should be used if pixel format is YCbCr420 */
- if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420 &&
- !link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED &&
- dal_graphics_object_id_get_connector_id(link->link_id) != CONNECTOR_ID_VIRTUAL)
- return false;
-
- /*always DP fail safe mode*/
- if ((timing->pix_clk_100hz / 10) == (uint32_t) 25175 &&
- timing->h_addressable == (uint32_t) 640 &&
- timing->v_addressable == (uint32_t) 480)
- return true;
-
- link_setting = dc_link_get_link_cap(link);
-
- /* TODO: DYNAMIC_VALIDATION needs to be implemented */
- /*if (flags.DYNAMIC_VALIDATION == 1 &&
- link->verified_link_cap.lane_count != LANE_COUNT_UNKNOWN)
- link_setting = &link->verified_link_cap;
- */
-
- req_bw = dc_bandwidth_in_kbps_from_timing(timing);
- max_bw = dc_link_bandwidth_kbps(link, link_setting);
-
- if (req_bw <= max_bw) {
- /* remember the biggest mode here, during
- * initial link training (to get
- * verified_link_cap), LS sends event about
- * cannot train at reported cap to upper
- * layer and upper layer will re-enumerate modes.
- * this is not necessary if the lower
- * verified_link_cap is enough to drive
- * all the modes */
-
- /* TODO: DYNAMIC_VALIDATION needs to be implemented */
- /* if (flags.DYNAMIC_VALIDATION == 1)
- dpsst->max_req_bw_for_verified_linkcap = dal_max(
- dpsst->max_req_bw_for_verified_linkcap, req_bw); */
- return true;
- } else
- return false;
-}
-
-static bool decide_dp_link_settings(struct dc_link *link, struct dc_link_settings *link_setting, uint32_t req_bw)
-{
- struct dc_link_settings initial_link_setting = {
- LANE_COUNT_ONE, LINK_RATE_LOW, LINK_SPREAD_DISABLED, false, 0};
- struct dc_link_settings current_link_setting =
- initial_link_setting;
- uint32_t link_bw;
-
- if (req_bw > dc_link_bandwidth_kbps(link, &link->verified_link_cap))
- return false;
-
- /* search for the minimum link setting that:
- * 1. is supported according to the link training result
- * 2. could support the b/w requested by the timing
- */
- while (current_link_setting.link_rate <=
- link->verified_link_cap.link_rate) {
- link_bw = dc_link_bandwidth_kbps(
- link,
- &current_link_setting);
- if (req_bw <= link_bw) {
- *link_setting = current_link_setting;
- return true;
- }
-
- if (current_link_setting.lane_count <
- link->verified_link_cap.lane_count) {
- current_link_setting.lane_count =
- increase_lane_count(
- current_link_setting.lane_count);
- } else {
- current_link_setting.link_rate =
- increase_link_rate(
- current_link_setting.link_rate);
- current_link_setting.lane_count =
- initial_link_setting.lane_count;
- }
- }
-
- return false;
-}
-
-bool decide_edp_link_settings(struct dc_link *link, struct dc_link_settings *link_setting, uint32_t req_bw)
-{
- struct dc_link_settings initial_link_setting;
- struct dc_link_settings current_link_setting;
- uint32_t link_bw;
-
- /*
- * edp_supported_link_rates_count is only valid for eDP v1.4 or higher.
- * Per VESA eDP spec, "The DPCD revision for eDP v1.4 is 13h"
- */
- if (link->dpcd_caps.dpcd_rev.raw < DPCD_REV_13 ||
- link->dpcd_caps.edp_supported_link_rates_count == 0) {
- *link_setting = link->verified_link_cap;
- return true;
- }
-
- memset(&initial_link_setting, 0, sizeof(initial_link_setting));
- initial_link_setting.lane_count = LANE_COUNT_ONE;
- initial_link_setting.link_rate = link->dpcd_caps.edp_supported_link_rates[0];
- initial_link_setting.link_spread = LINK_SPREAD_DISABLED;
- initial_link_setting.use_link_rate_set = true;
- initial_link_setting.link_rate_set = 0;
- current_link_setting = initial_link_setting;
-
- /* search for the minimum link setting that:
- * 1. is supported according to the link training result
- * 2. could support the b/w requested by the timing
- */
- while (current_link_setting.link_rate <=
- link->verified_link_cap.link_rate) {
- link_bw = dc_link_bandwidth_kbps(
- link,
- &current_link_setting);
- if (req_bw <= link_bw) {
- *link_setting = current_link_setting;
- return true;
- }
-
- if (current_link_setting.lane_count <
- link->verified_link_cap.lane_count) {
- current_link_setting.lane_count =
- increase_lane_count(
- current_link_setting.lane_count);
- } else {
- if (current_link_setting.link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) {
- current_link_setting.link_rate_set++;
- current_link_setting.link_rate =
- link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set];
- current_link_setting.lane_count =
- initial_link_setting.lane_count;
- } else
- break;
- }
- }
- return false;
-}
-
-static bool decide_mst_link_settings(const struct dc_link *link, struct dc_link_settings *link_setting)
-{
- *link_setting = link->verified_link_cap;
- return true;
-}
-
-void decide_link_settings(struct dc_stream_state *stream,
- struct dc_link_settings *link_setting)
-{
- struct dc_link *link;
- uint32_t req_bw;
-
- req_bw = dc_bandwidth_in_kbps_from_timing(&stream->timing);
-
- link = stream->link;
-
- /* if preferred is specified through AMDDP, use it, if it's enough
- * to drive the mode
- */
- if (link->preferred_link_setting.lane_count !=
- LANE_COUNT_UNKNOWN &&
- link->preferred_link_setting.link_rate !=
- LINK_RATE_UNKNOWN) {
- *link_setting = link->preferred_link_setting;
- return;
- }
-
- /* MST doesn't perform link training for now
- * TODO: add MST specific link training routine
- */
- if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
- if (decide_mst_link_settings(link, link_setting))
- return;
- } else if (link->connector_signal == SIGNAL_TYPE_EDP) {
- if (decide_edp_link_settings(link, link_setting, req_bw))
- return;
- } else if (decide_dp_link_settings(link, link_setting, req_bw))
- return;
-
- BREAK_TO_DEBUGGER();
- ASSERT(link->verified_link_cap.lane_count != LANE_COUNT_UNKNOWN);
-
- *link_setting = link->verified_link_cap;
-}
-
-/*************************Short Pulse IRQ***************************/
-bool dc_link_dp_allow_hpd_rx_irq(const struct dc_link *link)
-{
- /*
- * Don't handle RX IRQ unless one of following is met:
- * 1) The link is established (cur_link_settings != unknown)
- * 2) We know we're dealing with a branch device, SST or MST
- */
-
- if ((link->cur_link_settings.lane_count != LANE_COUNT_UNKNOWN) ||
- is_dp_branch_device(link))
- return true;
-
- return false;
-}
-
-static bool handle_hpd_irq_psr_sink(struct dc_link *link)
-{
- union dpcd_psr_configuration psr_configuration;
-
- if (!link->psr_settings.psr_feature_enabled)
- return false;
-
- dm_helpers_dp_read_dpcd(
- link->ctx,
- link,
- 368,/*DpcdAddress_PSR_Enable_Cfg*/
- &psr_configuration.raw,
- sizeof(psr_configuration.raw));
-
-
- if (psr_configuration.bits.ENABLE) {
- unsigned char dpcdbuf[3] = {0};
- union psr_error_status psr_error_status;
- union psr_sink_psr_status psr_sink_psr_status;
-
- dm_helpers_dp_read_dpcd(
- link->ctx,
- link,
- 0x2006, /*DpcdAddress_PSR_Error_Status*/
- (unsigned char *) dpcdbuf,
- sizeof(dpcdbuf));
-
- /*DPCD 2006h ERROR STATUS*/
- psr_error_status.raw = dpcdbuf[0];
- /*DPCD 2008h SINK PANEL SELF REFRESH STATUS*/
- psr_sink_psr_status.raw = dpcdbuf[2];
-
- if (psr_error_status.bits.LINK_CRC_ERROR ||
- psr_error_status.bits.RFB_STORAGE_ERROR ||
- psr_error_status.bits.VSC_SDP_ERROR) {
- bool allow_active;
-
- /* Acknowledge and clear error bits */
- dm_helpers_dp_write_dpcd(
- link->ctx,
- link,
- 8198,/*DpcdAddress_PSR_Error_Status*/
- &psr_error_status.raw,
- sizeof(psr_error_status.raw));
-
- /* PSR error, disable and re-enable PSR */
- allow_active = false;
- dc_link_set_psr_allow_active(link, &allow_active, true, false, NULL);
- allow_active = true;
- dc_link_set_psr_allow_active(link, &allow_active, true, false, NULL);
-
- return true;
- } else if (psr_sink_psr_status.bits.SINK_SELF_REFRESH_STATUS ==
- PSR_SINK_STATE_ACTIVE_DISPLAY_FROM_SINK_RFB){
- /* No error is detect, PSR is active.
- * We should return with IRQ_HPD handled without
- * checking for loss of sync since PSR would have
- * powered down main link.
- */
- return true;
- }
- }
- return false;
-}
-
-static void dp_test_send_link_training(struct dc_link *link)
-{
- struct dc_link_settings link_settings = {0};
-
- core_link_read_dpcd(
- link,
- DP_TEST_LANE_COUNT,
- (unsigned char *)(&link_settings.lane_count),
- 1);
- core_link_read_dpcd(
- link,
- DP_TEST_LINK_RATE,
- (unsigned char *)(&link_settings.link_rate),
- 1);
-
- /* Set preferred link settings */
- link->verified_link_cap.lane_count = link_settings.lane_count;
- link->verified_link_cap.link_rate = link_settings.link_rate;
-
- dp_retrain_link_dp_test(link, &link_settings, false);
-}
-
-/* TODO Raven hbr2 compliance eye output is unstable
- * (toggling on and off) with debugger break
- * This caueses intermittent PHY automation failure
- * Need to look into the root cause */
-static void dp_test_send_phy_test_pattern(struct dc_link *link)
-{
- union phy_test_pattern dpcd_test_pattern;
- union lane_adjust dpcd_lane_adjustment[2];
- unsigned char dpcd_post_cursor_2_adjustment = 0;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- unsigned char test_pattern_buffer[
- (DP_TEST_264BIT_CUSTOM_PATTERN_263_256 -
- DP_TEST_264BIT_CUSTOM_PATTERN_7_0)+1] = {0};
-#else
- unsigned char test_pattern_buffer[
- (DP_TEST_80BIT_CUSTOM_PATTERN_79_72 -
- DP_TEST_80BIT_CUSTOM_PATTERN_7_0)+1] = {0};
-#endif
- unsigned int test_pattern_size = 0;
- enum dp_test_pattern test_pattern;
- union lane_adjust dpcd_lane_adjust;
- unsigned int lane;
- struct link_training_settings link_training_settings;
-
- dpcd_test_pattern.raw = 0;
- memset(dpcd_lane_adjustment, 0, sizeof(dpcd_lane_adjustment));
- memset(&link_training_settings, 0, sizeof(link_training_settings));
-
- /* get phy test pattern and pattern parameters from DP receiver */
- core_link_read_dpcd(
- link,
- DP_PHY_TEST_PATTERN,
- &dpcd_test_pattern.raw,
- sizeof(dpcd_test_pattern));
- core_link_read_dpcd(
- link,
- DP_ADJUST_REQUEST_LANE0_1,
- &dpcd_lane_adjustment[0].raw,
- sizeof(dpcd_lane_adjustment));
-
- /*get post cursor 2 parameters
- * For DP 1.1a or eariler, this DPCD register's value is 0
- * For DP 1.2 or later:
- * Bits 1:0 = POST_CURSOR2_LANE0; Bits 3:2 = POST_CURSOR2_LANE1
- * Bits 5:4 = POST_CURSOR2_LANE2; Bits 7:6 = POST_CURSOR2_LANE3
- */
- core_link_read_dpcd(
- link,
- DP_ADJUST_REQUEST_POST_CURSOR2,
- &dpcd_post_cursor_2_adjustment,
- sizeof(dpcd_post_cursor_2_adjustment));
-
- /* translate request */
- switch (dpcd_test_pattern.bits.PATTERN) {
- case PHY_TEST_PATTERN_D10_2:
- test_pattern = DP_TEST_PATTERN_D102;
- break;
- case PHY_TEST_PATTERN_SYMBOL_ERROR:
- test_pattern = DP_TEST_PATTERN_SYMBOL_ERROR;
- break;
- case PHY_TEST_PATTERN_PRBS7:
- test_pattern = DP_TEST_PATTERN_PRBS7;
- break;
- case PHY_TEST_PATTERN_80BIT_CUSTOM:
- test_pattern = DP_TEST_PATTERN_80BIT_CUSTOM;
- break;
- case PHY_TEST_PATTERN_CP2520_1:
- /* CP2520 pattern is unstable, temporarily use TPS4 instead */
- test_pattern = (link->dc->caps.force_dp_tps4_for_cp2520 == 1) ?
- DP_TEST_PATTERN_TRAINING_PATTERN4 :
- DP_TEST_PATTERN_HBR2_COMPLIANCE_EYE;
- break;
- case PHY_TEST_PATTERN_CP2520_2:
- /* CP2520 pattern is unstable, temporarily use TPS4 instead */
- test_pattern = (link->dc->caps.force_dp_tps4_for_cp2520 == 1) ?
- DP_TEST_PATTERN_TRAINING_PATTERN4 :
- DP_TEST_PATTERN_HBR2_COMPLIANCE_EYE;
- break;
- case PHY_TEST_PATTERN_CP2520_3:
- test_pattern = DP_TEST_PATTERN_TRAINING_PATTERN4;
- break;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- case PHY_TEST_PATTERN_128b_132b_TPS1:
- test_pattern = DP_TEST_PATTERN_128b_132b_TPS1;
- break;
- case PHY_TEST_PATTERN_128b_132b_TPS2:
- test_pattern = DP_TEST_PATTERN_128b_132b_TPS2;
- break;
- case PHY_TEST_PATTERN_PRBS9:
- test_pattern = DP_TEST_PATTERN_PRBS9;
- break;
- case PHY_TEST_PATTERN_PRBS11:
- test_pattern = DP_TEST_PATTERN_PRBS11;
- break;
- case PHY_TEST_PATTERN_PRBS15:
- test_pattern = DP_TEST_PATTERN_PRBS15;
- break;
- case PHY_TEST_PATTERN_PRBS23:
- test_pattern = DP_TEST_PATTERN_PRBS23;
- break;
- case PHY_TEST_PATTERN_PRBS31:
- test_pattern = DP_TEST_PATTERN_PRBS31;
- break;
- case PHY_TEST_PATTERN_264BIT_CUSTOM:
- test_pattern = DP_TEST_PATTERN_264BIT_CUSTOM;
- break;
- case PHY_TEST_PATTERN_SQUARE_PULSE:
- test_pattern = DP_TEST_PATTERN_SQUARE_PULSE;
- break;
-#endif
- default:
- test_pattern = DP_TEST_PATTERN_VIDEO_MODE;
- break;
- }
-
- if (test_pattern == DP_TEST_PATTERN_80BIT_CUSTOM) {
- test_pattern_size = (DP_TEST_80BIT_CUSTOM_PATTERN_79_72 -
- DP_TEST_80BIT_CUSTOM_PATTERN_7_0) + 1;
- core_link_read_dpcd(
- link,
- DP_TEST_80BIT_CUSTOM_PATTERN_7_0,
- test_pattern_buffer,
- test_pattern_size);
- }
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (test_pattern == DP_TEST_PATTERN_SQUARE_PULSE) {
- test_pattern_size = 1; // Square pattern data is 1 byte (DP spec)
- core_link_read_dpcd(
- link,
- DP_PHY_SQUARE_PATTERN,
- test_pattern_buffer,
- test_pattern_size);
- }
-
- if (test_pattern == DP_TEST_PATTERN_264BIT_CUSTOM) {
- test_pattern_size = (DP_TEST_264BIT_CUSTOM_PATTERN_263_256-
- DP_TEST_264BIT_CUSTOM_PATTERN_7_0) + 1;
- core_link_read_dpcd(
- link,
- DP_TEST_264BIT_CUSTOM_PATTERN_7_0,
- test_pattern_buffer,
- test_pattern_size);
- }
-#endif
-
- /* prepare link training settings */
- link_training_settings.link_settings = link->cur_link_settings;
-
- for (lane = 0; lane <
- (unsigned int)(link->cur_link_settings.lane_count);
- lane++) {
- dpcd_lane_adjust.raw =
- get_nibble_at_index(&dpcd_lane_adjustment[0].raw, lane);
- if (dp_get_link_encoding_format(&link->cur_link_settings) ==
- DP_8b_10b_ENCODING) {
- link_training_settings.hw_lane_settings[lane].VOLTAGE_SWING =
- (enum dc_voltage_swing)
- (dpcd_lane_adjust.bits.VOLTAGE_SWING_LANE);
- link_training_settings.hw_lane_settings[lane].PRE_EMPHASIS =
- (enum dc_pre_emphasis)
- (dpcd_lane_adjust.bits.PRE_EMPHASIS_LANE);
- link_training_settings.hw_lane_settings[lane].POST_CURSOR2 =
- (enum dc_post_cursor2)
- ((dpcd_post_cursor_2_adjustment >> (lane * 2)) & 0x03);
- }
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- else if (dp_get_link_encoding_format(&link->cur_link_settings) ==
- DP_128b_132b_ENCODING) {
- link_training_settings.hw_lane_settings[lane].FFE_PRESET.raw =
- dpcd_lane_adjust.tx_ffe.PRESET_VALUE;
- }
-#endif
- }
-
- dp_hw_to_dpcd_lane_settings(&link_training_settings,
- link_training_settings.hw_lane_settings,
- link_training_settings.dpcd_lane_settings);
- /*Usage: Measure DP physical lane signal
- * by DP SI test equipment automatically.
- * PHY test pattern request is generated by equipment via HPD interrupt.
- * HPD needs to be active all the time. HPD should be active
- * all the time. Do not touch it.
- * forward request to DS
- */
- dc_link_dp_set_test_pattern(
- link,
- test_pattern,
- DP_TEST_PATTERN_COLOR_SPACE_UNDEFINED,
- &link_training_settings,
- test_pattern_buffer,
- test_pattern_size);
-}
-
-static void dp_test_send_link_test_pattern(struct dc_link *link)
-{
- union link_test_pattern dpcd_test_pattern;
- union test_misc dpcd_test_params;
- enum dp_test_pattern test_pattern;
- enum dp_test_pattern_color_space test_pattern_color_space =
- DP_TEST_PATTERN_COLOR_SPACE_UNDEFINED;
- enum dc_color_depth requestColorDepth = COLOR_DEPTH_UNDEFINED;
- struct pipe_ctx *pipes = link->dc->current_state->res_ctx.pipe_ctx;
- struct pipe_ctx *pipe_ctx = NULL;
- int i;
-
- memset(&dpcd_test_pattern, 0, sizeof(dpcd_test_pattern));
- memset(&dpcd_test_params, 0, sizeof(dpcd_test_params));
-
- for (i = 0; i < MAX_PIPES; i++) {
- if (pipes[i].stream == NULL)
- continue;
-
- if (pipes[i].stream->link == link && !pipes[i].top_pipe && !pipes[i].prev_odm_pipe) {
- pipe_ctx = &pipes[i];
- break;
- }
- }
-
- if (pipe_ctx == NULL)
- return;
-
- /* get link test pattern and pattern parameters */
- core_link_read_dpcd(
- link,
- DP_TEST_PATTERN,
- &dpcd_test_pattern.raw,
- sizeof(dpcd_test_pattern));
- core_link_read_dpcd(
- link,
- DP_TEST_MISC0,
- &dpcd_test_params.raw,
- sizeof(dpcd_test_params));
-
- switch (dpcd_test_pattern.bits.PATTERN) {
- case LINK_TEST_PATTERN_COLOR_RAMP:
- test_pattern = DP_TEST_PATTERN_COLOR_RAMP;
- break;
- case LINK_TEST_PATTERN_VERTICAL_BARS:
- test_pattern = DP_TEST_PATTERN_VERTICAL_BARS;
- break; /* black and white */
- case LINK_TEST_PATTERN_COLOR_SQUARES:
- test_pattern = (dpcd_test_params.bits.DYN_RANGE ==
- TEST_DYN_RANGE_VESA ?
- DP_TEST_PATTERN_COLOR_SQUARES :
- DP_TEST_PATTERN_COLOR_SQUARES_CEA);
- break;
- default:
- test_pattern = DP_TEST_PATTERN_VIDEO_MODE;
- break;
- }
-
- if (dpcd_test_params.bits.CLR_FORMAT == 0)
- test_pattern_color_space = DP_TEST_PATTERN_COLOR_SPACE_RGB;
- else
- test_pattern_color_space = dpcd_test_params.bits.YCBCR_COEFS ?
- DP_TEST_PATTERN_COLOR_SPACE_YCBCR709 :
- DP_TEST_PATTERN_COLOR_SPACE_YCBCR601;
-
- switch (dpcd_test_params.bits.BPC) {
- case 0: // 6 bits
- requestColorDepth = COLOR_DEPTH_666;
- break;
- case 1: // 8 bits
- requestColorDepth = COLOR_DEPTH_888;
- break;
- case 2: // 10 bits
- requestColorDepth = COLOR_DEPTH_101010;
- break;
- case 3: // 12 bits
- requestColorDepth = COLOR_DEPTH_121212;
- break;
- default:
- break;
- }
-
- switch (dpcd_test_params.bits.CLR_FORMAT) {
- case 0:
- pipe_ctx->stream->timing.pixel_encoding = PIXEL_ENCODING_RGB;
- break;
- case 1:
- pipe_ctx->stream->timing.pixel_encoding = PIXEL_ENCODING_YCBCR422;
- break;
- case 2:
- pipe_ctx->stream->timing.pixel_encoding = PIXEL_ENCODING_YCBCR444;
- break;
- default:
- pipe_ctx->stream->timing.pixel_encoding = PIXEL_ENCODING_RGB;
- break;
- }
-
-
- if (requestColorDepth != COLOR_DEPTH_UNDEFINED
- && pipe_ctx->stream->timing.display_color_depth != requestColorDepth) {
- DC_LOG_DEBUG("%s: original bpc %d, changing to %d\n",
- __func__,
- pipe_ctx->stream->timing.display_color_depth,
- requestColorDepth);
- pipe_ctx->stream->timing.display_color_depth = requestColorDepth;
- }
-
- dp_update_dsc_config(pipe_ctx);
-
- dc_link_dp_set_test_pattern(
- link,
- test_pattern,
- test_pattern_color_space,
- NULL,
- NULL,
- 0);
-}
-
-static void dp_test_get_audio_test_data(struct dc_link *link, bool disable_video)
-{
- union audio_test_mode dpcd_test_mode = {0};
- struct audio_test_pattern_type dpcd_pattern_type = {0};
- union audio_test_pattern_period dpcd_pattern_period[AUDIO_CHANNELS_COUNT] = {0};
- enum dp_test_pattern test_pattern = DP_TEST_PATTERN_AUDIO_OPERATOR_DEFINED;
-
- struct pipe_ctx *pipes = link->dc->current_state->res_ctx.pipe_ctx;
- struct pipe_ctx *pipe_ctx = &pipes[0];
- unsigned int channel_count;
- unsigned int channel = 0;
- unsigned int modes = 0;
- unsigned int sampling_rate_in_hz = 0;
-
- // get audio test mode and test pattern parameters
- core_link_read_dpcd(
- link,
- DP_TEST_AUDIO_MODE,
- &dpcd_test_mode.raw,
- sizeof(dpcd_test_mode));
-
- core_link_read_dpcd(
- link,
- DP_TEST_AUDIO_PATTERN_TYPE,
- &dpcd_pattern_type.value,
- sizeof(dpcd_pattern_type));
-
- channel_count = dpcd_test_mode.bits.channel_count + 1;
-
- // read pattern periods for requested channels when sawTooth pattern is requested
- if (dpcd_pattern_type.value == AUDIO_TEST_PATTERN_SAWTOOTH ||
- dpcd_pattern_type.value == AUDIO_TEST_PATTERN_OPERATOR_DEFINED) {
-
- test_pattern = (dpcd_pattern_type.value == AUDIO_TEST_PATTERN_SAWTOOTH) ?
- DP_TEST_PATTERN_AUDIO_SAWTOOTH : DP_TEST_PATTERN_AUDIO_OPERATOR_DEFINED;
- // read period for each channel
- for (channel = 0; channel < channel_count; channel++) {
- core_link_read_dpcd(
- link,
- DP_TEST_AUDIO_PERIOD_CH1 + channel,
- &dpcd_pattern_period[channel].raw,
- sizeof(dpcd_pattern_period[channel]));
- }
- }
-
- // translate sampling rate
- switch (dpcd_test_mode.bits.sampling_rate) {
- case AUDIO_SAMPLING_RATE_32KHZ:
- sampling_rate_in_hz = 32000;
- break;
- case AUDIO_SAMPLING_RATE_44_1KHZ:
- sampling_rate_in_hz = 44100;
- break;
- case AUDIO_SAMPLING_RATE_48KHZ:
- sampling_rate_in_hz = 48000;
- break;
- case AUDIO_SAMPLING_RATE_88_2KHZ:
- sampling_rate_in_hz = 88200;
- break;
- case AUDIO_SAMPLING_RATE_96KHZ:
- sampling_rate_in_hz = 96000;
- break;
- case AUDIO_SAMPLING_RATE_176_4KHZ:
- sampling_rate_in_hz = 176400;
- break;
- case AUDIO_SAMPLING_RATE_192KHZ:
- sampling_rate_in_hz = 192000;
- break;
- default:
- sampling_rate_in_hz = 0;
- break;
- }
-
- link->audio_test_data.flags.test_requested = 1;
- link->audio_test_data.flags.disable_video = disable_video;
- link->audio_test_data.sampling_rate = sampling_rate_in_hz;
- link->audio_test_data.channel_count = channel_count;
- link->audio_test_data.pattern_type = test_pattern;
-
- if (test_pattern == DP_TEST_PATTERN_AUDIO_SAWTOOTH) {
- for (modes = 0; modes < pipe_ctx->stream->audio_info.mode_count; modes++) {
- link->audio_test_data.pattern_period[modes] = dpcd_pattern_period[modes].bits.pattern_period;
- }
- }
-}
-
-void dc_link_dp_handle_automated_test(struct dc_link *link)
-{
- union test_request test_request;
- union test_response test_response;
-
- memset(&test_request, 0, sizeof(test_request));
- memset(&test_response, 0, sizeof(test_response));
-
- core_link_read_dpcd(
- link,
- DP_TEST_REQUEST,
- &test_request.raw,
- sizeof(union test_request));
- if (test_request.bits.LINK_TRAINING) {
- /* ACK first to let DP RX test box monitor LT sequence */
- test_response.bits.ACK = 1;
- core_link_write_dpcd(
- link,
- DP_TEST_RESPONSE,
- &test_response.raw,
- sizeof(test_response));
- dp_test_send_link_training(link);
- /* no acknowledge request is needed again */
- test_response.bits.ACK = 0;
- }
- if (test_request.bits.LINK_TEST_PATTRN) {
- dp_test_send_link_test_pattern(link);
- test_response.bits.ACK = 1;
- }
-
- if (test_request.bits.AUDIO_TEST_PATTERN) {
- dp_test_get_audio_test_data(link, test_request.bits.TEST_AUDIO_DISABLED_VIDEO);
- test_response.bits.ACK = 1;
- }
-
- if (test_request.bits.PHY_TEST_PATTERN) {
- dp_test_send_phy_test_pattern(link);
- test_response.bits.ACK = 1;
- }
-
- /* send request acknowledgment */
- if (test_response.bits.ACK)
- core_link_write_dpcd(
- link,
- DP_TEST_RESPONSE,
- &test_response.raw,
- sizeof(test_response));
-}
-
-void dc_link_dp_handle_link_loss(struct dc_link *link)
-{
- int i;
- struct pipe_ctx *pipe_ctx;
-
- for (i = 0; i < MAX_PIPES; i++) {
- pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i];
- if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link == link)
- break;
- }
-
- if (pipe_ctx == NULL || pipe_ctx->stream == NULL)
- return;
-
- for (i = 0; i < MAX_PIPES; i++) {
- pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i];
- if (pipe_ctx && pipe_ctx->stream && !pipe_ctx->stream->dpms_off &&
- pipe_ctx->stream->link == link && !pipe_ctx->prev_odm_pipe) {
- core_link_disable_stream(pipe_ctx);
- }
- }
-
- for (i = 0; i < MAX_PIPES; i++) {
- pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i];
- if (pipe_ctx && pipe_ctx->stream && !pipe_ctx->stream->dpms_off &&
- pipe_ctx->stream->link == link && !pipe_ctx->prev_odm_pipe) {
- core_link_enable_stream(link->dc->current_state, pipe_ctx);
- }
- }
-}
-
-bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd_irq_dpcd_data, bool *out_link_loss,
- bool defer_handling, bool *has_left_work)
-{
- union hpd_irq_data hpd_irq_dpcd_data = {0};
- union device_service_irq device_service_clear = {0};
- enum dc_status result;
- bool status = false;
-
- if (out_link_loss)
- *out_link_loss = false;
-
- if (has_left_work)
- *has_left_work = false;
- /* For use cases related to down stream connection status change,
- * PSR and device auto test, refer to function handle_sst_hpd_irq
- * in DAL2.1*/
-
- DC_LOG_HW_HPD_IRQ("%s: Got short pulse HPD on link %d\n",
- __func__, link->link_index);
-
-
- /* All the "handle_hpd_irq_xxx()" methods
- * should be called only after
- * dal_dpsst_ls_read_hpd_irq_data
- * Order of calls is important too
- */
- result = read_hpd_rx_irq_data(link, &hpd_irq_dpcd_data);
- if (out_hpd_irq_dpcd_data)
- *out_hpd_irq_dpcd_data = hpd_irq_dpcd_data;
-
- if (result != DC_OK) {
- DC_LOG_HW_HPD_IRQ("%s: DPCD read failed to obtain irq data\n",
- __func__);
- return false;
- }
-
- if (hpd_irq_dpcd_data.bytes.device_service_irq.bits.AUTOMATED_TEST) {
- device_service_clear.bits.AUTOMATED_TEST = 1;
- core_link_write_dpcd(
- link,
- DP_DEVICE_SERVICE_IRQ_VECTOR,
- &device_service_clear.raw,
- sizeof(device_service_clear.raw));
- device_service_clear.raw = 0;
- if (defer_handling && has_left_work)
- *has_left_work = true;
- else
- dc_link_dp_handle_automated_test(link);
- return false;
- }
-
- if (!dc_link_dp_allow_hpd_rx_irq(link)) {
- DC_LOG_HW_HPD_IRQ("%s: skipping HPD handling on %d\n",
- __func__, link->link_index);
- return false;
- }
-
- if (handle_hpd_irq_psr_sink(link))
- /* PSR-related error was detected and handled */
- return true;
-
- /* If PSR-related error handled, Main link may be off,
- * so do not handle as a normal sink status change interrupt.
- */
-
- if (hpd_irq_dpcd_data.bytes.device_service_irq.bits.UP_REQ_MSG_RDY) {
- if (defer_handling && has_left_work)
- *has_left_work = true;
- return true;
- }
-
- /* check if we have MST msg and return since we poll for it */
- if (hpd_irq_dpcd_data.bytes.device_service_irq.bits.DOWN_REP_MSG_RDY) {
- if (defer_handling && has_left_work)
- *has_left_work = true;
- return false;
- }
-
- /* For now we only handle 'Downstream port status' case.
- * If we got sink count changed it means
- * Downstream port status changed,
- * then DM should call DC to do the detection.
- * NOTE: Do not handle link loss on eDP since it is internal link*/
- if ((link->connector_signal != SIGNAL_TYPE_EDP) &&
- hpd_rx_irq_check_link_loss_status(
- link,
- &hpd_irq_dpcd_data)) {
- /* Connectivity log: link loss */
- CONN_DATA_LINK_LOSS(link,
- hpd_irq_dpcd_data.raw,
- sizeof(hpd_irq_dpcd_data),
- "Status: ");
-
- if (defer_handling && has_left_work)
- *has_left_work = true;
- else
- dc_link_dp_handle_link_loss(link);
-
- status = false;
- if (out_link_loss)
- *out_link_loss = true;
- }
-
- if (link->type == dc_connection_sst_branch &&
- hpd_irq_dpcd_data.bytes.sink_cnt.bits.SINK_COUNT
- != link->dpcd_sink_count)
- status = true;
-
- /* reasons for HPD RX:
- * 1. Link Loss - ie Re-train the Link
- * 2. MST sideband message
- * 3. Automated Test - ie. Internal Commit
- * 4. CP (copy protection) - (not interesting for DM???)
- * 5. DRR
- * 6. Downstream Port status changed
- * -ie. Detect - this the only one
- * which is interesting for DM because
- * it must call dc_link_detect.
- */
- return status;
-}
-
-/*query dpcd for version and mst cap addresses*/
-bool is_mst_supported(struct dc_link *link)
-{
- bool mst = false;
- enum dc_status st = DC_OK;
- union dpcd_rev rev;
- union mstm_cap cap;
-
- if (link->preferred_training_settings.mst_enable &&
- *link->preferred_training_settings.mst_enable == false) {
- return false;
- }
-
- rev.raw = 0;
- cap.raw = 0;
-
- st = core_link_read_dpcd(link, DP_DPCD_REV, &rev.raw,
- sizeof(rev));
-
- if (st == DC_OK && rev.raw >= DPCD_REV_12) {
-
- st = core_link_read_dpcd(link, DP_MSTM_CAP,
- &cap.raw, sizeof(cap));
- if (st == DC_OK && cap.bits.MST_CAP == 1)
- mst = true;
- }
- return mst;
-
-}
-
-bool is_dp_active_dongle(const struct dc_link *link)
-{
- return (link->dpcd_caps.dongle_type >= DISPLAY_DONGLE_DP_VGA_CONVERTER) &&
- (link->dpcd_caps.dongle_type <= DISPLAY_DONGLE_DP_HDMI_CONVERTER);
-}
-
-bool is_dp_branch_device(const struct dc_link *link)
-{
- return link->dpcd_caps.is_branch_dev;
-}
-
-static int translate_dpcd_max_bpc(enum dpcd_downstream_port_max_bpc bpc)
-{
- switch (bpc) {
- case DOWN_STREAM_MAX_8BPC:
- return 8;
- case DOWN_STREAM_MAX_10BPC:
- return 10;
- case DOWN_STREAM_MAX_12BPC:
- return 12;
- case DOWN_STREAM_MAX_16BPC:
- return 16;
- default:
- break;
- }
-
- return -1;
-}
-
-static void read_dp_device_vendor_id(struct dc_link *link)
-{
- struct dp_device_vendor_id dp_id;
-
- /* read IEEE branch device id */
- core_link_read_dpcd(
- link,
- DP_BRANCH_OUI,
- (uint8_t *)&dp_id,
- sizeof(dp_id));
-
- link->dpcd_caps.branch_dev_id =
- (dp_id.ieee_oui[0] << 16) +
- (dp_id.ieee_oui[1] << 8) +
- dp_id.ieee_oui[2];
-
- memmove(
- link->dpcd_caps.branch_dev_name,
- dp_id.ieee_device_id,
- sizeof(dp_id.ieee_device_id));
-}
-
-
-
-static void get_active_converter_info(
- uint8_t data, struct dc_link *link)
-{
- union dp_downstream_port_present ds_port = { .byte = data };
- memset(&link->dpcd_caps.dongle_caps, 0, sizeof(link->dpcd_caps.dongle_caps));
-
- /* decode converter info*/
- if (!ds_port.fields.PORT_PRESENT) {
- link->dpcd_caps.dongle_type = DISPLAY_DONGLE_NONE;
- ddc_service_set_dongle_type(link->ddc,
- link->dpcd_caps.dongle_type);
- link->dpcd_caps.is_branch_dev = false;
- return;
- }
-
- /* DPCD 0x5 bit 0 = 1, it indicate it's branch device */
- link->dpcd_caps.is_branch_dev = ds_port.fields.PORT_PRESENT;
-
- switch (ds_port.fields.PORT_TYPE) {
- case DOWNSTREAM_VGA:
- link->dpcd_caps.dongle_type = DISPLAY_DONGLE_DP_VGA_CONVERTER;
- break;
- case DOWNSTREAM_DVI_HDMI_DP_PLUS_PLUS:
- /* At this point we don't know is it DVI or HDMI or DP++,
- * assume DVI.*/
- link->dpcd_caps.dongle_type = DISPLAY_DONGLE_DP_DVI_CONVERTER;
- break;
- default:
- link->dpcd_caps.dongle_type = DISPLAY_DONGLE_NONE;
- break;
- }
-
- if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_11) {
- uint8_t det_caps[16]; /* CTS 4.2.2.7 expects source to read Detailed Capabilities Info : 00080h-0008F.*/
- union dwnstream_port_caps_byte0 *port_caps =
- (union dwnstream_port_caps_byte0 *)det_caps;
- if (core_link_read_dpcd(link, DP_DOWNSTREAM_PORT_0,
- det_caps, sizeof(det_caps)) == DC_OK) {
-
- switch (port_caps->bits.DWN_STRM_PORTX_TYPE) {
- /*Handle DP case as DONGLE_NONE*/
- case DOWN_STREAM_DETAILED_DP:
- link->dpcd_caps.dongle_type = DISPLAY_DONGLE_NONE;
- break;
- case DOWN_STREAM_DETAILED_VGA:
- link->dpcd_caps.dongle_type =
- DISPLAY_DONGLE_DP_VGA_CONVERTER;
- break;
- case DOWN_STREAM_DETAILED_DVI:
- link->dpcd_caps.dongle_type =
- DISPLAY_DONGLE_DP_DVI_CONVERTER;
- break;
- case DOWN_STREAM_DETAILED_HDMI:
- case DOWN_STREAM_DETAILED_DP_PLUS_PLUS:
- /*Handle DP++ active converter case, process DP++ case as HDMI case according DP1.4 spec*/
- link->dpcd_caps.dongle_type =
- DISPLAY_DONGLE_DP_HDMI_CONVERTER;
-
- link->dpcd_caps.dongle_caps.dongle_type = link->dpcd_caps.dongle_type;
- if (ds_port.fields.DETAILED_CAPS) {
-
- union dwnstream_port_caps_byte3_hdmi
- hdmi_caps = {.raw = det_caps[3] };
- union dwnstream_port_caps_byte2
- hdmi_color_caps = {.raw = det_caps[2] };
- link->dpcd_caps.dongle_caps.dp_hdmi_max_pixel_clk_in_khz =
- det_caps[1] * 2500;
-
- link->dpcd_caps.dongle_caps.is_dp_hdmi_s3d_converter =
- hdmi_caps.bits.FRAME_SEQ_TO_FRAME_PACK;
- /*YCBCR capability only for HDMI case*/
- if (port_caps->bits.DWN_STRM_PORTX_TYPE
- == DOWN_STREAM_DETAILED_HDMI) {
- link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr422_pass_through =
- hdmi_caps.bits.YCrCr422_PASS_THROUGH;
- link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr420_pass_through =
- hdmi_caps.bits.YCrCr420_PASS_THROUGH;
- link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr422_converter =
- hdmi_caps.bits.YCrCr422_CONVERSION;
- link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr420_converter =
- hdmi_caps.bits.YCrCr420_CONVERSION;
- }
-
- link->dpcd_caps.dongle_caps.dp_hdmi_max_bpc =
- translate_dpcd_max_bpc(
- hdmi_color_caps.bits.MAX_BITS_PER_COLOR_COMPONENT);
-
- if (link->dpcd_caps.dongle_caps.dp_hdmi_max_pixel_clk_in_khz != 0)
- link->dpcd_caps.dongle_caps.extendedCapValid = true;
- }
-
- break;
- }
- }
- }
-
- ddc_service_set_dongle_type(link->ddc, link->dpcd_caps.dongle_type);
-
- {
- struct dp_sink_hw_fw_revision dp_hw_fw_revision;
-
- core_link_read_dpcd(
- link,
- DP_BRANCH_REVISION_START,
- (uint8_t *)&dp_hw_fw_revision,
- sizeof(dp_hw_fw_revision));
-
- link->dpcd_caps.branch_hw_revision =
- dp_hw_fw_revision.ieee_hw_rev;
-
- memmove(
- link->dpcd_caps.branch_fw_revision,
- dp_hw_fw_revision.ieee_fw_rev,
- sizeof(dp_hw_fw_revision.ieee_fw_rev));
- }
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_14 &&
- link->dpcd_caps.dongle_type != DISPLAY_DONGLE_NONE) {
- union dp_dfp_cap_ext dfp_cap_ext;
- memset(&dfp_cap_ext, '\0', sizeof (dfp_cap_ext));
- core_link_read_dpcd(
- link,
- DP_DFP_CAPABILITY_EXTENSION_SUPPORT,
- dfp_cap_ext.raw,
- sizeof(dfp_cap_ext.raw));
- link->dpcd_caps.dongle_caps.dfp_cap_ext.supported = dfp_cap_ext.fields.supported;
- link->dpcd_caps.dongle_caps.dfp_cap_ext.max_pixel_rate_in_mps =
- dfp_cap_ext.fields.max_pixel_rate_in_mps[0] +
- (dfp_cap_ext.fields.max_pixel_rate_in_mps[1] << 8);
- link->dpcd_caps.dongle_caps.dfp_cap_ext.max_video_h_active_width =
- dfp_cap_ext.fields.max_video_h_active_width[0] +
- (dfp_cap_ext.fields.max_video_h_active_width[1] << 8);
- link->dpcd_caps.dongle_caps.dfp_cap_ext.max_video_v_active_height =
- dfp_cap_ext.fields.max_video_v_active_height[0] +
- (dfp_cap_ext.fields.max_video_v_active_height[1] << 8);
- link->dpcd_caps.dongle_caps.dfp_cap_ext.encoding_format_caps =
- dfp_cap_ext.fields.encoding_format_caps;
- link->dpcd_caps.dongle_caps.dfp_cap_ext.rgb_color_depth_caps =
- dfp_cap_ext.fields.rgb_color_depth_caps;
- link->dpcd_caps.dongle_caps.dfp_cap_ext.ycbcr444_color_depth_caps =
- dfp_cap_ext.fields.ycbcr444_color_depth_caps;
- link->dpcd_caps.dongle_caps.dfp_cap_ext.ycbcr422_color_depth_caps =
- dfp_cap_ext.fields.ycbcr422_color_depth_caps;
- link->dpcd_caps.dongle_caps.dfp_cap_ext.ycbcr420_color_depth_caps =
- dfp_cap_ext.fields.ycbcr420_color_depth_caps;
- DC_LOG_DP2("DFP capability extension is read at link %d", link->link_index);
- DC_LOG_DP2("\tdfp_cap_ext.supported = %s", link->dpcd_caps.dongle_caps.dfp_cap_ext.supported ? "true" : "false");
- DC_LOG_DP2("\tdfp_cap_ext.max_pixel_rate_in_mps = %d", link->dpcd_caps.dongle_caps.dfp_cap_ext.max_pixel_rate_in_mps);
- DC_LOG_DP2("\tdfp_cap_ext.max_video_h_active_width = %d", link->dpcd_caps.dongle_caps.dfp_cap_ext.max_video_h_active_width);
- DC_LOG_DP2("\tdfp_cap_ext.max_video_v_active_height = %d", link->dpcd_caps.dongle_caps.dfp_cap_ext.max_video_v_active_height);
- }
-#endif
-}
-
-static void dp_wa_power_up_0010FA(struct dc_link *link, uint8_t *dpcd_data,
- int length)
-{
- int retry = 0;
-
- if (!link->dpcd_caps.dpcd_rev.raw) {
- do {
- dp_receiver_power_ctrl(link, true);
- core_link_read_dpcd(link, DP_DPCD_REV,
- dpcd_data, length);
- link->dpcd_caps.dpcd_rev.raw = dpcd_data[
- DP_DPCD_REV -
- DP_DPCD_REV];
- } while (retry++ < 4 && !link->dpcd_caps.dpcd_rev.raw);
- }
-
- if (link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_VGA_CONVERTER) {
- switch (link->dpcd_caps.branch_dev_id) {
- /* 0010FA active dongles (DP-VGA, DP-DLDVI converters) power down
- * all internal circuits including AUX communication preventing
- * reading DPCD table and EDID (spec violation).
- * Encoder will skip DP RX power down on disable_output to
- * keep receiver powered all the time.*/
- case DP_BRANCH_DEVICE_ID_0010FA:
- case DP_BRANCH_DEVICE_ID_0080E1:
- case DP_BRANCH_DEVICE_ID_00E04C:
- link->wa_flags.dp_keep_receiver_powered = true;
- break;
-
- /* TODO: May need work around for other dongles. */
- default:
- link->wa_flags.dp_keep_receiver_powered = false;
- break;
- }
- } else
- link->wa_flags.dp_keep_receiver_powered = false;
-}
-
-/* Read additional sink caps defined in source specific DPCD area
- * This function currently only reads from SinkCapability address (DP_SOURCE_SINK_CAP)
- */
-static bool dpcd_read_sink_ext_caps(struct dc_link *link)
-{
- uint8_t dpcd_data;
-
- if (!link)
- return false;
-
- if (core_link_read_dpcd(link, DP_SOURCE_SINK_CAP, &dpcd_data, 1) != DC_OK)
- return false;
-
- link->dpcd_sink_ext_caps.raw = dpcd_data;
- return true;
-}
-
-bool dp_retrieve_lttpr_cap(struct dc_link *link)
-{
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- uint8_t lttpr_dpcd_data[8];
- bool allow_lttpr_non_transparent_mode = 0;
-#else
- uint8_t lttpr_dpcd_data[6];
-#endif
- bool vbios_lttpr_enable = link->dc->caps.vbios_lttpr_enable;
- bool vbios_lttpr_interop = link->dc->caps.vbios_lttpr_aware;
- enum dc_status status = DC_ERROR_UNEXPECTED;
- bool is_lttpr_present = false;
-
- memset(lttpr_dpcd_data, '\0', sizeof(lttpr_dpcd_data));
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if ((link->dc->config.allow_lttpr_non_transparent_mode.bits.DP2_0 &&
- link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED)) {
- allow_lttpr_non_transparent_mode = 1;
- } else if (link->dc->config.allow_lttpr_non_transparent_mode.bits.DP1_4A &&
- !link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED) {
- allow_lttpr_non_transparent_mode = 1;
- }
-#endif
-
- /*
- * Logic to determine LTTPR mode
- */
- link->lttpr_mode = LTTPR_MODE_NON_LTTPR;
- if (vbios_lttpr_enable && vbios_lttpr_interop)
- link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT;
- else if (!vbios_lttpr_enable && vbios_lttpr_interop) {
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (allow_lttpr_non_transparent_mode)
-#else
- if (link->dc->config.allow_lttpr_non_transparent_mode)
-#endif
- link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT;
- else
- link->lttpr_mode = LTTPR_MODE_TRANSPARENT;
- } else if (!vbios_lttpr_enable && !vbios_lttpr_interop) {
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (!allow_lttpr_non_transparent_mode || !link->dc->caps.extended_aux_timeout_support)
-#else
- if (!link->dc->config.allow_lttpr_non_transparent_mode
- || !link->dc->caps.extended_aux_timeout_support)
-#endif
- link->lttpr_mode = LTTPR_MODE_NON_LTTPR;
- else
- link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT;
- }
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- /* Check DP tunnel LTTPR mode debug option. */
- if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA &&
- link->dc->debug.dpia_debug.bits.force_non_lttpr)
- link->lttpr_mode = LTTPR_MODE_NON_LTTPR;
-#endif
-
- if (link->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT || link->lttpr_mode == LTTPR_MODE_TRANSPARENT) {
- /* By reading LTTPR capability, RX assumes that we will enable
- * LTTPR extended aux timeout if LTTPR is present.
- */
- status = core_link_read_dpcd(
- link,
- DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV,
- lttpr_dpcd_data,
- sizeof(lttpr_dpcd_data));
- if (status != DC_OK) {
- dm_error("%s: Read LTTPR caps data failed.\n", __func__);
- return false;
- }
-
- link->dpcd_caps.lttpr_caps.revision.raw =
- lttpr_dpcd_data[DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV -
- DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
-
- link->dpcd_caps.lttpr_caps.max_link_rate =
- lttpr_dpcd_data[DP_MAX_LINK_RATE_PHY_REPEATER -
- DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
-
- link->dpcd_caps.lttpr_caps.phy_repeater_cnt =
- lttpr_dpcd_data[DP_PHY_REPEATER_CNT -
- DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
-
- link->dpcd_caps.lttpr_caps.max_lane_count =
- lttpr_dpcd_data[DP_MAX_LANE_COUNT_PHY_REPEATER -
- DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
-
- link->dpcd_caps.lttpr_caps.mode =
- lttpr_dpcd_data[DP_PHY_REPEATER_MODE -
- DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
-
- link->dpcd_caps.lttpr_caps.max_ext_timeout =
- lttpr_dpcd_data[DP_PHY_REPEATER_EXTENDED_WAIT_TIMEOUT -
- DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- link->dpcd_caps.lttpr_caps.main_link_channel_coding.raw =
- lttpr_dpcd_data[DP_MAIN_LINK_CHANNEL_CODING_PHY_REPEATER -
- DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
-
- link->dpcd_caps.lttpr_caps.supported_128b_132b_rates.raw =
- lttpr_dpcd_data[DP_PHY_REPEATER_128b_132b_RATES -
- DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
-#endif
-
- /* Attempt to train in LTTPR transparent mode if repeater count exceeds 8. */
- is_lttpr_present = (dp_convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt) != 0 &&
- link->dpcd_caps.lttpr_caps.phy_repeater_cnt < 0xff &&
- link->dpcd_caps.lttpr_caps.max_lane_count > 0 &&
- link->dpcd_caps.lttpr_caps.max_lane_count <= 4 &&
- link->dpcd_caps.lttpr_caps.revision.raw >= 0x14);
- if (is_lttpr_present) {
- CONN_DATA_DETECT(link, lttpr_dpcd_data, sizeof(lttpr_dpcd_data), "LTTPR Caps: ");
- configure_lttpr_mode_transparent(link);
- } else
- link->lttpr_mode = LTTPR_MODE_NON_LTTPR;
- }
- return is_lttpr_present;
-}
-
-static bool retrieve_link_cap(struct dc_link *link)
-{
- /* DP_ADAPTER_CAP - DP_DPCD_REV + 1 == 16 and also DP_DSC_BITS_PER_PIXEL_INC - DP_DSC_SUPPORT + 1 == 16,
- * which means size 16 will be good for both of those DPCD register block reads
- */
- uint8_t dpcd_data[16];
- /*Only need to read 1 byte starting from DP_DPRX_FEATURE_ENUMERATION_LIST.
- */
- uint8_t dpcd_dprx_data = '\0';
- uint8_t dpcd_power_state = '\0';
-
- struct dp_device_vendor_id sink_id;
- union down_stream_port_count down_strm_port_count;
- union edp_configuration_cap edp_config_cap;
- union dp_downstream_port_present ds_port = { 0 };
- enum dc_status status = DC_ERROR_UNEXPECTED;
- uint32_t read_dpcd_retry_cnt = 3;
- int i;
- struct dp_sink_hw_fw_revision dp_hw_fw_revision;
- const uint32_t post_oui_delay = 30; // 30ms
- bool is_lttpr_present = false;
-
- memset(dpcd_data, '\0', sizeof(dpcd_data));
- memset(&down_strm_port_count,
- '\0', sizeof(union down_stream_port_count));
- memset(&edp_config_cap, '\0',
- sizeof(union edp_configuration_cap));
-
- /* if extended timeout is supported in hardware,
- * default to LTTPR timeout (3.2ms) first as a W/A for DP link layer
- * CTS 4.2.1.1 regression introduced by CTS specs requirement update.
- */
- dc_link_aux_try_to_configure_timeout(link->ddc,
- LINK_AUX_DEFAULT_LTTPR_TIMEOUT_PERIOD);
-
- is_lttpr_present = dp_retrieve_lttpr_cap(link);
- /* Read DP tunneling information. */
- status = dpcd_get_tunneling_device_data(link);
-
- status = core_link_read_dpcd(link, DP_SET_POWER,
- &dpcd_power_state, sizeof(dpcd_power_state));
-
- /* Delay 1 ms if AUX CH is in power down state. Based on spec
- * section 2.3.1.2, if AUX CH may be powered down due to
- * write to DPCD 600h = 2. Sink AUX CH is monitoring differential
- * signal and may need up to 1 ms before being able to reply.
- */
- if (status != DC_OK || dpcd_power_state == DP_SET_POWER_D3)
- udelay(1000);
-
- dpcd_set_source_specific_data(link);
- /* Sink may need to configure internals based on vendor, so allow some
- * time before proceeding with possibly vendor specific transactions
- */
- msleep(post_oui_delay);
-
- for (i = 0; i < read_dpcd_retry_cnt; i++) {
- status = core_link_read_dpcd(
- link,
- DP_DPCD_REV,
- dpcd_data,
- sizeof(dpcd_data));
- if (status == DC_OK)
- break;
- }
-
- if (status != DC_OK) {
- dm_error("%s: Read receiver caps dpcd data failed.\n", __func__);
- return false;
- }
-
- if (!is_lttpr_present)
- dc_link_aux_try_to_configure_timeout(link->ddc, LINK_AUX_DEFAULT_TIMEOUT_PERIOD);
-
- {
- union training_aux_rd_interval aux_rd_interval;
-
- aux_rd_interval.raw =
- dpcd_data[DP_TRAINING_AUX_RD_INTERVAL];
-
- link->dpcd_caps.ext_receiver_cap_field_present =
- aux_rd_interval.bits.EXT_RECEIVER_CAP_FIELD_PRESENT == 1;
-
- if (aux_rd_interval.bits.EXT_RECEIVER_CAP_FIELD_PRESENT == 1) {
- uint8_t ext_cap_data[16];
-
- memset(ext_cap_data, '\0', sizeof(ext_cap_data));
- for (i = 0; i < read_dpcd_retry_cnt; i++) {
- status = core_link_read_dpcd(
- link,
- DP_DP13_DPCD_REV,
- ext_cap_data,
- sizeof(ext_cap_data));
- if (status == DC_OK) {
- memcpy(dpcd_data, ext_cap_data, sizeof(dpcd_data));
- break;
- }
- }
- if (status != DC_OK)
- dm_error("%s: Read extend caps data failed, use cap from dpcd 0.\n", __func__);
- }
- }
-
- link->dpcd_caps.dpcd_rev.raw =
- dpcd_data[DP_DPCD_REV - DP_DPCD_REV];
-
- if (link->dpcd_caps.ext_receiver_cap_field_present) {
- for (i = 0; i < read_dpcd_retry_cnt; i++) {
- status = core_link_read_dpcd(
- link,
- DP_DPRX_FEATURE_ENUMERATION_LIST,
- &dpcd_dprx_data,
- sizeof(dpcd_dprx_data));
- if (status == DC_OK)
- break;
- }
-
- link->dpcd_caps.dprx_feature.raw = dpcd_dprx_data;
-
- if (status != DC_OK)
- dm_error("%s: Read DPRX caps data failed.\n", __func__);
- }
-
- else {
- link->dpcd_caps.dprx_feature.raw = 0;
- }
-
-
- /* Error condition checking...
- * It is impossible for Sink to report Max Lane Count = 0.
- * It is possible for Sink to report Max Link Rate = 0, if it is
- * an eDP device that is reporting specialized link rates in the
- * SUPPORTED_LINK_RATE table.
- */
- if (dpcd_data[DP_MAX_LANE_COUNT - DP_DPCD_REV] == 0)
- return false;
-
- ds_port.byte = dpcd_data[DP_DOWNSTREAMPORT_PRESENT -
- DP_DPCD_REV];
-
- read_dp_device_vendor_id(link);
-
- get_active_converter_info(ds_port.byte, link);
-
- dp_wa_power_up_0010FA(link, dpcd_data, sizeof(dpcd_data));
-
- down_strm_port_count.raw = dpcd_data[DP_DOWN_STREAM_PORT_COUNT -
- DP_DPCD_REV];
-
- link->dpcd_caps.allow_invalid_MSA_timing_param =
- down_strm_port_count.bits.IGNORE_MSA_TIMING_PARAM;
-
- link->dpcd_caps.max_ln_count.raw = dpcd_data[
- DP_MAX_LANE_COUNT - DP_DPCD_REV];
-
- link->dpcd_caps.max_down_spread.raw = dpcd_data[
- DP_MAX_DOWNSPREAD - DP_DPCD_REV];
-
- link->reported_link_cap.lane_count =
- link->dpcd_caps.max_ln_count.bits.MAX_LANE_COUNT;
- link->reported_link_cap.link_rate = dpcd_data[
- DP_MAX_LINK_RATE - DP_DPCD_REV];
- link->reported_link_cap.link_spread =
- link->dpcd_caps.max_down_spread.bits.MAX_DOWN_SPREAD ?
- LINK_SPREAD_05_DOWNSPREAD_30KHZ : LINK_SPREAD_DISABLED;
-
- edp_config_cap.raw = dpcd_data[
- DP_EDP_CONFIGURATION_CAP - DP_DPCD_REV];
- link->dpcd_caps.panel_mode_edp =
- edp_config_cap.bits.ALT_SCRAMBLER_RESET;
- link->dpcd_caps.dpcd_display_control_capable =
- edp_config_cap.bits.DPCD_DISPLAY_CONTROL_CAPABLE;
-
- link->test_pattern_enabled = false;
- link->compliance_test_state.raw = 0;
-
- /* read sink count */
- core_link_read_dpcd(link,
- DP_SINK_COUNT,
- &link->dpcd_caps.sink_count.raw,
- sizeof(link->dpcd_caps.sink_count.raw));
-
- /* read sink ieee oui */
- core_link_read_dpcd(link,
- DP_SINK_OUI,
- (uint8_t *)(&sink_id),
- sizeof(sink_id));
-
- link->dpcd_caps.sink_dev_id =
- (sink_id.ieee_oui[0] << 16) +
- (sink_id.ieee_oui[1] << 8) +
- (sink_id.ieee_oui[2]);
-
- memmove(
- link->dpcd_caps.sink_dev_id_str,
- sink_id.ieee_device_id,
- sizeof(sink_id.ieee_device_id));
-
- /* Quirk Apple MBP 2017 15" Retina panel: Wrong DP_MAX_LINK_RATE */
- {
- uint8_t str_mbp_2017[] = { 101, 68, 21, 101, 98, 97 };
-
- if ((link->dpcd_caps.sink_dev_id == 0x0010fa) &&
- !memcmp(link->dpcd_caps.sink_dev_id_str, str_mbp_2017,
- sizeof(str_mbp_2017))) {
- link->reported_link_cap.link_rate = 0x0c;
- }
- }
-
- core_link_read_dpcd(
- link,
- DP_SINK_HW_REVISION_START,
- (uint8_t *)&dp_hw_fw_revision,
- sizeof(dp_hw_fw_revision));
-
- link->dpcd_caps.sink_hw_revision =
- dp_hw_fw_revision.ieee_hw_rev;
-
- memmove(
- link->dpcd_caps.sink_fw_revision,
- dp_hw_fw_revision.ieee_fw_rev,
- sizeof(dp_hw_fw_revision.ieee_fw_rev));
-
- memset(&link->dpcd_caps.dsc_caps, '\0',
- sizeof(link->dpcd_caps.dsc_caps));
- memset(&link->dpcd_caps.fec_cap, '\0', sizeof(link->dpcd_caps.fec_cap));
- /* Read DSC and FEC sink capabilities if DP revision is 1.4 and up */
- if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_14) {
- status = core_link_read_dpcd(
- link,
- DP_FEC_CAPABILITY,
- &link->dpcd_caps.fec_cap.raw,
- sizeof(link->dpcd_caps.fec_cap.raw));
- status = core_link_read_dpcd(
- link,
- DP_DSC_SUPPORT,
- link->dpcd_caps.dsc_caps.dsc_basic_caps.raw,
- sizeof(link->dpcd_caps.dsc_caps.dsc_basic_caps.raw));
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (link->dpcd_caps.dongle_type != DISPLAY_DONGLE_NONE) {
- status = core_link_read_dpcd(
- link,
- DP_DSC_BRANCH_OVERALL_THROUGHPUT_0,
- link->dpcd_caps.dsc_caps.dsc_branch_decoder_caps.raw,
- sizeof(link->dpcd_caps.dsc_caps.dsc_branch_decoder_caps.raw));
- DC_LOG_DSC("DSC branch decoder capability is read at link %d", link->link_index);
- DC_LOG_DSC("\tBRANCH_OVERALL_THROUGHPUT_0 = 0x%02x",
- link->dpcd_caps.dsc_caps.dsc_branch_decoder_caps.fields.BRANCH_OVERALL_THROUGHPUT_0);
- DC_LOG_DSC("\tBRANCH_OVERALL_THROUGHPUT_1 = 0x%02x",
- link->dpcd_caps.dsc_caps.dsc_branch_decoder_caps.fields.BRANCH_OVERALL_THROUGHPUT_1);
- DC_LOG_DSC("\tBRANCH_MAX_LINE_WIDTH 0x%02x",
- link->dpcd_caps.dsc_caps.dsc_branch_decoder_caps.fields.BRANCH_MAX_LINE_WIDTH);
- }
-#else
- status = core_link_read_dpcd(
- link,
- DP_DSC_BRANCH_OVERALL_THROUGHPUT_0,
- link->dpcd_caps.dsc_caps.dsc_branch_decoder_caps.raw,
- sizeof(link->dpcd_caps.dsc_caps.dsc_branch_decoder_caps.raw));
-#endif
- }
-
- if (!dpcd_read_sink_ext_caps(link))
- link->dpcd_sink_ext_caps.raw = 0;
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- link->dpcd_caps.channel_coding_cap.raw = dpcd_data[DP_MAIN_LINK_CHANNEL_CODING_CAP - DP_DPCD_REV];
-
- if (link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED) {
- DC_LOG_DP2("128b/132b encoding is supported at link %d", link->link_index);
-
- core_link_read_dpcd(link,
- DP_128b_132b_SUPPORTED_LINK_RATES,
- &link->dpcd_caps.dp_128b_132b_supported_link_rates.raw,
- sizeof(link->dpcd_caps.dp_128b_132b_supported_link_rates.raw));
- if (link->dpcd_caps.dp_128b_132b_supported_link_rates.bits.UHBR20)
- link->reported_link_cap.link_rate = LINK_RATE_UHBR20;
- else if (link->dpcd_caps.dp_128b_132b_supported_link_rates.bits.UHBR13_5)
- link->reported_link_cap.link_rate = LINK_RATE_UHBR13_5;
- else if (link->dpcd_caps.dp_128b_132b_supported_link_rates.bits.UHBR10)
- link->reported_link_cap.link_rate = LINK_RATE_UHBR10;
- else
- dm_error("%s: Invalid RX 128b_132b_supported_link_rates\n", __func__);
- DC_LOG_DP2("128b/132b supported link rates is read at link %d", link->link_index);
- DC_LOG_DP2("\tmax 128b/132b link rate support is %d.%d GHz",
- link->reported_link_cap.link_rate / 100,
- link->reported_link_cap.link_rate % 100);
-
- core_link_read_dpcd(link,
- DP_SINK_VIDEO_FALLBACK_FORMATS,
- &link->dpcd_caps.fallback_formats.raw,
- sizeof(link->dpcd_caps.fallback_formats.raw));
- DC_LOG_DP2("sink video fallback format is read at link %d", link->link_index);
- if (link->dpcd_caps.fallback_formats.bits.dp_1920x1080_60Hz_24bpp_support)
- DC_LOG_DP2("\t1920x1080@60Hz 24bpp fallback format supported");
- if (link->dpcd_caps.fallback_formats.bits.dp_1280x720_60Hz_24bpp_support)
- DC_LOG_DP2("\t1280x720@60Hz 24bpp fallback format supported");
- if (link->dpcd_caps.fallback_formats.bits.dp_1024x768_60Hz_24bpp_support)
- DC_LOG_DP2("\t1024x768@60Hz 24bpp fallback format supported");
- if (link->dpcd_caps.fallback_formats.raw == 0) {
- DC_LOG_DP2("\tno supported fallback formats, assume 1920x1080@60Hz 24bpp is supported");
- link->dpcd_caps.fallback_formats.bits.dp_1920x1080_60Hz_24bpp_support = 1;
- }
-
- core_link_read_dpcd(link,
- DP_FEC_CAPABILITY_1,
- &link->dpcd_caps.fec_cap1.raw,
- sizeof(link->dpcd_caps.fec_cap1.raw));
- DC_LOG_DP2("FEC CAPABILITY 1 is read at link %d", link->link_index);
- if (link->dpcd_caps.fec_cap1.bits.AGGREGATED_ERROR_COUNTERS_CAPABLE)
- DC_LOG_DP2("\tFEC aggregated error counters are supported");
- }
-#endif
-
- /* Connectivity log: detection */
- CONN_DATA_DETECT(link, dpcd_data, sizeof(dpcd_data), "Rx Caps: ");
-
- return true;
-}
-
-bool dp_overwrite_extended_receiver_cap(struct dc_link *link)
-{
- uint8_t dpcd_data[16];
- uint32_t read_dpcd_retry_cnt = 3;
- enum dc_status status = DC_ERROR_UNEXPECTED;
- union dp_downstream_port_present ds_port = { 0 };
- union down_stream_port_count down_strm_port_count;
- union edp_configuration_cap edp_config_cap;
-
- int i;
-
- for (i = 0; i < read_dpcd_retry_cnt; i++) {
- status = core_link_read_dpcd(
- link,
- DP_DPCD_REV,
- dpcd_data,
- sizeof(dpcd_data));
- if (status == DC_OK)
- break;
- }
-
- link->dpcd_caps.dpcd_rev.raw =
- dpcd_data[DP_DPCD_REV - DP_DPCD_REV];
-
- if (dpcd_data[DP_MAX_LANE_COUNT - DP_DPCD_REV] == 0)
- return false;
-
- ds_port.byte = dpcd_data[DP_DOWNSTREAMPORT_PRESENT -
- DP_DPCD_REV];
-
- get_active_converter_info(ds_port.byte, link);
-
- down_strm_port_count.raw = dpcd_data[DP_DOWN_STREAM_PORT_COUNT -
- DP_DPCD_REV];
-
- link->dpcd_caps.allow_invalid_MSA_timing_param =
- down_strm_port_count.bits.IGNORE_MSA_TIMING_PARAM;
-
- link->dpcd_caps.max_ln_count.raw = dpcd_data[
- DP_MAX_LANE_COUNT - DP_DPCD_REV];
-
- link->dpcd_caps.max_down_spread.raw = dpcd_data[
- DP_MAX_DOWNSPREAD - DP_DPCD_REV];
-
- link->reported_link_cap.lane_count =
- link->dpcd_caps.max_ln_count.bits.MAX_LANE_COUNT;
- link->reported_link_cap.link_rate = dpcd_data[
- DP_MAX_LINK_RATE - DP_DPCD_REV];
- link->reported_link_cap.link_spread =
- link->dpcd_caps.max_down_spread.bits.MAX_DOWN_SPREAD ?
- LINK_SPREAD_05_DOWNSPREAD_30KHZ : LINK_SPREAD_DISABLED;
-
- edp_config_cap.raw = dpcd_data[
- DP_EDP_CONFIGURATION_CAP - DP_DPCD_REV];
- link->dpcd_caps.panel_mode_edp =
- edp_config_cap.bits.ALT_SCRAMBLER_RESET;
- link->dpcd_caps.dpcd_display_control_capable =
- edp_config_cap.bits.DPCD_DISPLAY_CONTROL_CAPABLE;
-
- return true;
-}
-
-bool detect_dp_sink_caps(struct dc_link *link)
-{
- return retrieve_link_cap(link);
-
- /* dc init_hw has power encoder using default
- * signal for connector. For native DP, no
- * need to power up encoder again. If not native
- * DP, hw_init may need check signal or power up
- * encoder here.
- */
- /* TODO save sink caps in link->sink */
-}
-
-static enum dc_link_rate linkRateInKHzToLinkRateMultiplier(uint32_t link_rate_in_khz)
-{
- enum dc_link_rate link_rate;
- // LinkRate is normally stored as a multiplier of 0.27 Gbps per lane. Do the translation.
- switch (link_rate_in_khz) {
- case 1620000:
- link_rate = LINK_RATE_LOW; // Rate_1 (RBR) - 1.62 Gbps/Lane
- break;
- case 2160000:
- link_rate = LINK_RATE_RATE_2; // Rate_2 - 2.16 Gbps/Lane
- break;
- case 2430000:
- link_rate = LINK_RATE_RATE_3; // Rate_3 - 2.43 Gbps/Lane
- break;
- case 2700000:
- link_rate = LINK_RATE_HIGH; // Rate_4 (HBR) - 2.70 Gbps/Lane
- break;
- case 3240000:
- link_rate = LINK_RATE_RBR2; // Rate_5 (RBR2) - 3.24 Gbps/Lane
- break;
- case 4320000:
- link_rate = LINK_RATE_RATE_6; // Rate_6 - 4.32 Gbps/Lane
- break;
- case 5400000:
- link_rate = LINK_RATE_HIGH2; // Rate_7 (HBR2) - 5.40 Gbps/Lane
- break;
- case 8100000:
- link_rate = LINK_RATE_HIGH3; // Rate_8 (HBR3) - 8.10 Gbps/Lane
- break;
- default:
- link_rate = LINK_RATE_UNKNOWN;
- break;
- }
- return link_rate;
-}
-
-void detect_edp_sink_caps(struct dc_link *link)
-{
- uint8_t supported_link_rates[16];
- uint32_t entry;
- uint32_t link_rate_in_khz;
- enum dc_link_rate link_rate = LINK_RATE_UNKNOWN;
- uint8_t backlight_adj_cap;
-
- retrieve_link_cap(link);
- link->dpcd_caps.edp_supported_link_rates_count = 0;
- memset(supported_link_rates, 0, sizeof(supported_link_rates));
-
- /*
- * edp_supported_link_rates_count is only valid for eDP v1.4 or higher.
- * Per VESA eDP spec, "The DPCD revision for eDP v1.4 is 13h"
- */
- if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_13 &&
- (link->dc->debug.optimize_edp_link_rate ||
- link->reported_link_cap.link_rate == LINK_RATE_UNKNOWN)) {
- // Read DPCD 00010h - 0001Fh 16 bytes at one shot
- core_link_read_dpcd(link, DP_SUPPORTED_LINK_RATES,
- supported_link_rates, sizeof(supported_link_rates));
-
- for (entry = 0; entry < 16; entry += 2) {
- // DPCD register reports per-lane link rate = 16-bit link rate capability
- // value X 200 kHz. Need multiplier to find link rate in kHz.
- link_rate_in_khz = (supported_link_rates[entry+1] * 0x100 +
- supported_link_rates[entry]) * 200;
-
- if (link_rate_in_khz != 0) {
- link_rate = linkRateInKHzToLinkRateMultiplier(link_rate_in_khz);
- link->dpcd_caps.edp_supported_link_rates[link->dpcd_caps.edp_supported_link_rates_count] = link_rate;
- link->dpcd_caps.edp_supported_link_rates_count++;
-
- if (link->reported_link_cap.link_rate < link_rate)
- link->reported_link_cap.link_rate = link_rate;
- }
- }
- }
- link->verified_link_cap = link->reported_link_cap;
-
- core_link_read_dpcd(link, DP_EDP_BACKLIGHT_ADJUSTMENT_CAP,
- &backlight_adj_cap, sizeof(backlight_adj_cap));
-
- link->dpcd_caps.dynamic_backlight_capable_edp =
- (backlight_adj_cap & DP_EDP_DYNAMIC_BACKLIGHT_CAP) ? true:false;
-
- dc_link_set_default_brightness_aux(link);
-}
-
-void dc_link_dp_enable_hpd(const struct dc_link *link)
-{
- struct link_encoder *encoder = link->link_enc;
-
- if (encoder != NULL && encoder->funcs->enable_hpd != NULL)
- encoder->funcs->enable_hpd(encoder);
-}
-
-void dc_link_dp_disable_hpd(const struct dc_link *link)
-{
- struct link_encoder *encoder = link->link_enc;
-
- if (encoder != NULL && encoder->funcs->enable_hpd != NULL)
- encoder->funcs->disable_hpd(encoder);
-}
-
-static bool is_dp_phy_pattern(enum dp_test_pattern test_pattern)
-{
- if ((DP_TEST_PATTERN_PHY_PATTERN_BEGIN <= test_pattern &&
- test_pattern <= DP_TEST_PATTERN_PHY_PATTERN_END) ||
- test_pattern == DP_TEST_PATTERN_VIDEO_MODE)
- return true;
- else
- return false;
-}
-
-static void set_crtc_test_pattern(struct dc_link *link,
- struct pipe_ctx *pipe_ctx,
- enum dp_test_pattern test_pattern,
- enum dp_test_pattern_color_space test_pattern_color_space)
-{
- enum controller_dp_test_pattern controller_test_pattern;
- enum dc_color_depth color_depth = pipe_ctx->
- stream->timing.display_color_depth;
- struct bit_depth_reduction_params params;
- struct output_pixel_processor *opp = pipe_ctx->stream_res.opp;
- int width = pipe_ctx->stream->timing.h_addressable +
- pipe_ctx->stream->timing.h_border_left +
- pipe_ctx->stream->timing.h_border_right;
- int height = pipe_ctx->stream->timing.v_addressable +
- pipe_ctx->stream->timing.v_border_bottom +
- pipe_ctx->stream->timing.v_border_top;
-
- memset(&params, 0, sizeof(params));
-
- switch (test_pattern) {
- case DP_TEST_PATTERN_COLOR_SQUARES:
- controller_test_pattern =
- CONTROLLER_DP_TEST_PATTERN_COLORSQUARES;
- break;
- case DP_TEST_PATTERN_COLOR_SQUARES_CEA:
- controller_test_pattern =
- CONTROLLER_DP_TEST_PATTERN_COLORSQUARES_CEA;
- break;
- case DP_TEST_PATTERN_VERTICAL_BARS:
- controller_test_pattern =
- CONTROLLER_DP_TEST_PATTERN_VERTICALBARS;
- break;
- case DP_TEST_PATTERN_HORIZONTAL_BARS:
- controller_test_pattern =
- CONTROLLER_DP_TEST_PATTERN_HORIZONTALBARS;
- break;
- case DP_TEST_PATTERN_COLOR_RAMP:
- controller_test_pattern =
- CONTROLLER_DP_TEST_PATTERN_COLORRAMP;
- break;
- default:
- controller_test_pattern =
- CONTROLLER_DP_TEST_PATTERN_VIDEOMODE;
- break;
- }
-
- switch (test_pattern) {
- case DP_TEST_PATTERN_COLOR_SQUARES:
- case DP_TEST_PATTERN_COLOR_SQUARES_CEA:
- case DP_TEST_PATTERN_VERTICAL_BARS:
- case DP_TEST_PATTERN_HORIZONTAL_BARS:
- case DP_TEST_PATTERN_COLOR_RAMP:
- {
- /* disable bit depth reduction */
- pipe_ctx->stream->bit_depth_params = params;
- opp->funcs->opp_program_bit_depth_reduction(opp, &params);
- if (pipe_ctx->stream_res.tg->funcs->set_test_pattern)
- pipe_ctx->stream_res.tg->funcs->set_test_pattern(pipe_ctx->stream_res.tg,
- controller_test_pattern, color_depth);
- else if (link->dc->hwss.set_disp_pattern_generator) {
- struct pipe_ctx *odm_pipe;
- enum controller_dp_color_space controller_color_space;
- int opp_cnt = 1;
- int offset = 0;
- int dpg_width = width;
-
- switch (test_pattern_color_space) {
- case DP_TEST_PATTERN_COLOR_SPACE_RGB:
- controller_color_space = CONTROLLER_DP_COLOR_SPACE_RGB;
- break;
- case DP_TEST_PATTERN_COLOR_SPACE_YCBCR601:
- controller_color_space = CONTROLLER_DP_COLOR_SPACE_YCBCR601;
- break;
- case DP_TEST_PATTERN_COLOR_SPACE_YCBCR709:
- controller_color_space = CONTROLLER_DP_COLOR_SPACE_YCBCR709;
- break;
- case DP_TEST_PATTERN_COLOR_SPACE_UNDEFINED:
- default:
- controller_color_space = CONTROLLER_DP_COLOR_SPACE_UDEFINED;
- DC_LOG_ERROR("%s: Color space must be defined for test pattern", __func__);
- ASSERT(0);
- break;
- }
-
- for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
- opp_cnt++;
- dpg_width = width / opp_cnt;
- offset = dpg_width;
-
- link->dc->hwss.set_disp_pattern_generator(link->dc,
- pipe_ctx,
- controller_test_pattern,
- controller_color_space,
- color_depth,
- NULL,
- dpg_width,
- height,
- 0);
-
- for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
- struct output_pixel_processor *odm_opp = odm_pipe->stream_res.opp;
-
- odm_opp->funcs->opp_program_bit_depth_reduction(odm_opp, &params);
- link->dc->hwss.set_disp_pattern_generator(link->dc,
- odm_pipe,
- controller_test_pattern,
- controller_color_space,
- color_depth,
- NULL,
- dpg_width,
- height,
- offset);
- offset += offset;
- }
- }
- }
- break;
- case DP_TEST_PATTERN_VIDEO_MODE:
- {
- /* restore bitdepth reduction */
- resource_build_bit_depth_reduction_params(pipe_ctx->stream, &params);
- pipe_ctx->stream->bit_depth_params = params;
- opp->funcs->opp_program_bit_depth_reduction(opp, &params);
- if (pipe_ctx->stream_res.tg->funcs->set_test_pattern)
- pipe_ctx->stream_res.tg->funcs->set_test_pattern(pipe_ctx->stream_res.tg,
- CONTROLLER_DP_TEST_PATTERN_VIDEOMODE,
- color_depth);
- else if (link->dc->hwss.set_disp_pattern_generator) {
- struct pipe_ctx *odm_pipe;
- int opp_cnt = 1;
- int dpg_width = width;
-
- for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
- opp_cnt++;
-
- dpg_width = width / opp_cnt;
- for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
- struct output_pixel_processor *odm_opp = odm_pipe->stream_res.opp;
-
- odm_opp->funcs->opp_program_bit_depth_reduction(odm_opp, &params);
- link->dc->hwss.set_disp_pattern_generator(link->dc,
- odm_pipe,
- CONTROLLER_DP_TEST_PATTERN_VIDEOMODE,
- CONTROLLER_DP_COLOR_SPACE_UDEFINED,
- color_depth,
- NULL,
- dpg_width,
- height,
- 0);
- }
- link->dc->hwss.set_disp_pattern_generator(link->dc,
- pipe_ctx,
- CONTROLLER_DP_TEST_PATTERN_VIDEOMODE,
- CONTROLLER_DP_COLOR_SPACE_UDEFINED,
- color_depth,
- NULL,
- dpg_width,
- height,
- 0);
- }
- }
- break;
-
- default:
- break;
- }
-}
-
-bool dc_link_dp_set_test_pattern(
- struct dc_link *link,
- enum dp_test_pattern test_pattern,
- enum dp_test_pattern_color_space test_pattern_color_space,
- const struct link_training_settings *p_link_settings,
- const unsigned char *p_custom_pattern,
- unsigned int cust_pattern_size)
-{
- struct pipe_ctx *pipes = link->dc->current_state->res_ctx.pipe_ctx;
- struct pipe_ctx *pipe_ctx = NULL;
- unsigned int lane;
- unsigned int i;
- unsigned char link_qual_pattern[LANE_COUNT_DP_MAX] = {0};
- union dpcd_training_pattern training_pattern;
- enum dpcd_phy_test_patterns pattern;
-
- memset(&training_pattern, 0, sizeof(training_pattern));
-
- for (i = 0; i < MAX_PIPES; i++) {
- if (pipes[i].stream == NULL)
- continue;
-
- if (pipes[i].stream->link == link && !pipes[i].top_pipe && !pipes[i].prev_odm_pipe) {
- pipe_ctx = &pipes[i];
- break;
- }
- }
-
- if (pipe_ctx == NULL)
- return false;
-
- /* Reset CRTC Test Pattern if it is currently running and request is VideoMode */
- if (link->test_pattern_enabled && test_pattern ==
- DP_TEST_PATTERN_VIDEO_MODE) {
- /* Set CRTC Test Pattern */
- set_crtc_test_pattern(link, pipe_ctx, test_pattern, test_pattern_color_space);
- dp_set_hw_test_pattern(link, test_pattern,
- (uint8_t *)p_custom_pattern,
- (uint32_t)cust_pattern_size);
-
- /* Unblank Stream */
- link->dc->hwss.unblank_stream(
- pipe_ctx,
- &link->verified_link_cap);
- /* TODO:m_pHwss->MuteAudioEndpoint
- * (pPathMode->pDisplayPath, false);
- */
-
- /* Reset Test Pattern state */
- link->test_pattern_enabled = false;
-
- return true;
- }
-
- /* Check for PHY Test Patterns */
- if (is_dp_phy_pattern(test_pattern)) {
- /* Set DPCD Lane Settings before running test pattern */
- if (p_link_settings != NULL) {
- dp_set_hw_lane_settings(link, p_link_settings, DPRX);
- dpcd_set_lane_settings(link, p_link_settings, DPRX);
- }
-
- /* Blank stream if running test pattern */
- if (test_pattern != DP_TEST_PATTERN_VIDEO_MODE) {
- /*TODO:
- * m_pHwss->
- * MuteAudioEndpoint(pPathMode->pDisplayPath, true);
- */
- /* Blank stream */
- pipes->stream_res.stream_enc->funcs->dp_blank(link, pipe_ctx->stream_res.stream_enc);
- }
-
- dp_set_hw_test_pattern(link, test_pattern,
- (uint8_t *)p_custom_pattern,
- (uint32_t)cust_pattern_size);
-
- if (test_pattern != DP_TEST_PATTERN_VIDEO_MODE) {
- /* Set Test Pattern state */
- link->test_pattern_enabled = true;
- if (p_link_settings != NULL)
- dpcd_set_link_settings(link,
- p_link_settings);
- }
-
- switch (test_pattern) {
- case DP_TEST_PATTERN_VIDEO_MODE:
- pattern = PHY_TEST_PATTERN_NONE;
- break;
- case DP_TEST_PATTERN_D102:
- pattern = PHY_TEST_PATTERN_D10_2;
- break;
- case DP_TEST_PATTERN_SYMBOL_ERROR:
- pattern = PHY_TEST_PATTERN_SYMBOL_ERROR;
- break;
- case DP_TEST_PATTERN_PRBS7:
- pattern = PHY_TEST_PATTERN_PRBS7;
- break;
- case DP_TEST_PATTERN_80BIT_CUSTOM:
- pattern = PHY_TEST_PATTERN_80BIT_CUSTOM;
- break;
- case DP_TEST_PATTERN_CP2520_1:
- pattern = PHY_TEST_PATTERN_CP2520_1;
- break;
- case DP_TEST_PATTERN_CP2520_2:
- pattern = PHY_TEST_PATTERN_CP2520_2;
- break;
- case DP_TEST_PATTERN_CP2520_3:
- pattern = PHY_TEST_PATTERN_CP2520_3;
- break;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- case DP_TEST_PATTERN_128b_132b_TPS1:
- pattern = PHY_TEST_PATTERN_128b_132b_TPS1;
- break;
- case DP_TEST_PATTERN_128b_132b_TPS2:
- pattern = PHY_TEST_PATTERN_128b_132b_TPS2;
- break;
- case DP_TEST_PATTERN_PRBS9:
- pattern = PHY_TEST_PATTERN_PRBS9;
- break;
- case DP_TEST_PATTERN_PRBS11:
- pattern = PHY_TEST_PATTERN_PRBS11;
- break;
- case DP_TEST_PATTERN_PRBS15:
- pattern = PHY_TEST_PATTERN_PRBS15;
- break;
- case DP_TEST_PATTERN_PRBS23:
- pattern = PHY_TEST_PATTERN_PRBS23;
- break;
- case DP_TEST_PATTERN_PRBS31:
- pattern = PHY_TEST_PATTERN_PRBS31;
- break;
- case DP_TEST_PATTERN_264BIT_CUSTOM:
- pattern = PHY_TEST_PATTERN_264BIT_CUSTOM;
- break;
- case DP_TEST_PATTERN_SQUARE_PULSE:
- pattern = PHY_TEST_PATTERN_SQUARE_PULSE;
- break;
-#endif
- default:
- return false;
- }
-
- if (test_pattern == DP_TEST_PATTERN_VIDEO_MODE
- /*TODO:&& !pPathMode->pDisplayPath->IsTargetPoweredOn()*/)
- return false;
-
- if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) {
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (test_pattern == DP_TEST_PATTERN_SQUARE_PULSE)
- core_link_write_dpcd(link,
- DP_LINK_SQUARE_PATTERN,
- p_custom_pattern,
- 1);
-
-#endif
- /* tell receiver that we are sending qualification
- * pattern DP 1.2 or later - DP receiver's link quality
- * pattern is set using DPCD LINK_QUAL_LANEx_SET
- * register (0x10B~0x10E)\
- */
- for (lane = 0; lane < LANE_COUNT_DP_MAX; lane++)
- link_qual_pattern[lane] =
- (unsigned char)(pattern);
-
- core_link_write_dpcd(link,
- DP_LINK_QUAL_LANE0_SET,
- link_qual_pattern,
- sizeof(link_qual_pattern));
- } else if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_10 ||
- link->dpcd_caps.dpcd_rev.raw == 0) {
- /* tell receiver that we are sending qualification
- * pattern DP 1.1a or earlier - DP receiver's link
- * quality pattern is set using
- * DPCD TRAINING_PATTERN_SET -> LINK_QUAL_PATTERN_SET
- * register (0x102). We will use v_1.3 when we are
- * setting test pattern for DP 1.1.
- */
- core_link_read_dpcd(link, DP_TRAINING_PATTERN_SET,
- &training_pattern.raw,
- sizeof(training_pattern));
- training_pattern.v1_3.LINK_QUAL_PATTERN_SET = pattern;
- core_link_write_dpcd(link, DP_TRAINING_PATTERN_SET,
- &training_pattern.raw,
- sizeof(training_pattern));
- }
- } else {
- enum dc_color_space color_space = COLOR_SPACE_UNKNOWN;
-
- switch (test_pattern_color_space) {
- case DP_TEST_PATTERN_COLOR_SPACE_RGB:
- color_space = COLOR_SPACE_SRGB;
- if (test_pattern == DP_TEST_PATTERN_COLOR_SQUARES_CEA)
- color_space = COLOR_SPACE_SRGB_LIMITED;
- break;
-
- case DP_TEST_PATTERN_COLOR_SPACE_YCBCR601:
- color_space = COLOR_SPACE_YCBCR601;
- if (test_pattern == DP_TEST_PATTERN_COLOR_SQUARES_CEA)
- color_space = COLOR_SPACE_YCBCR601_LIMITED;
- break;
- case DP_TEST_PATTERN_COLOR_SPACE_YCBCR709:
- color_space = COLOR_SPACE_YCBCR709;
- if (test_pattern == DP_TEST_PATTERN_COLOR_SQUARES_CEA)
- color_space = COLOR_SPACE_YCBCR709_LIMITED;
- break;
- default:
- break;
- }
-
- if (pipe_ctx->stream_res.tg->funcs->lock_doublebuffer_enable) {
- if (pipe_ctx->stream && should_use_dmub_lock(pipe_ctx->stream->link)) {
- union dmub_hw_lock_flags hw_locks = { 0 };
- struct dmub_hw_lock_inst_flags inst_flags = { 0 };
-
- hw_locks.bits.lock_dig = 1;
- inst_flags.dig_inst = pipe_ctx->stream_res.tg->inst;
-
- dmub_hw_lock_mgr_cmd(link->ctx->dmub_srv,
- true,
- &hw_locks,
- &inst_flags);
- } else
- pipe_ctx->stream_res.tg->funcs->lock_doublebuffer_enable(
- pipe_ctx->stream_res.tg);
- }
-
- pipe_ctx->stream_res.tg->funcs->lock(pipe_ctx->stream_res.tg);
- /* update MSA to requested color space */
- pipe_ctx->stream_res.stream_enc->funcs->dp_set_stream_attribute(pipe_ctx->stream_res.stream_enc,
- &pipe_ctx->stream->timing,
- color_space,
- pipe_ctx->stream->use_vsc_sdp_for_colorimetry,
- link->dpcd_caps.dprx_feature.bits.SST_SPLIT_SDP_CAP);
-
- if (pipe_ctx->stream->use_vsc_sdp_for_colorimetry) {
- if (test_pattern == DP_TEST_PATTERN_COLOR_SQUARES_CEA)
- pipe_ctx->stream->vsc_infopacket.sb[17] |= (1 << 7); // sb17 bit 7 Dynamic Range: 0 = VESA range, 1 = CTA range
- else
- pipe_ctx->stream->vsc_infopacket.sb[17] &= ~(1 << 7);
- resource_build_info_frame(pipe_ctx);
- link->dc->hwss.update_info_frame(pipe_ctx);
- }
-
- /* CRTC Patterns */
- set_crtc_test_pattern(link, pipe_ctx, test_pattern, test_pattern_color_space);
- pipe_ctx->stream_res.tg->funcs->unlock(pipe_ctx->stream_res.tg);
- pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg,
- CRTC_STATE_VACTIVE);
- pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg,
- CRTC_STATE_VBLANK);
- pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg,
- CRTC_STATE_VACTIVE);
-
- if (pipe_ctx->stream_res.tg->funcs->lock_doublebuffer_disable) {
- if (pipe_ctx->stream && should_use_dmub_lock(pipe_ctx->stream->link)) {
- union dmub_hw_lock_flags hw_locks = { 0 };
- struct dmub_hw_lock_inst_flags inst_flags = { 0 };
-
- hw_locks.bits.lock_dig = 1;
- inst_flags.dig_inst = pipe_ctx->stream_res.tg->inst;
-
- dmub_hw_lock_mgr_cmd(link->ctx->dmub_srv,
- false,
- &hw_locks,
- &inst_flags);
- } else
- pipe_ctx->stream_res.tg->funcs->lock_doublebuffer_disable(
- pipe_ctx->stream_res.tg);
- }
-
- /* Set Test Pattern state */
- link->test_pattern_enabled = true;
- }
-
- return true;
-}
-
-void dp_enable_mst_on_sink(struct dc_link *link, bool enable)
-{
- unsigned char mstmCntl;
-
- core_link_read_dpcd(link, DP_MSTM_CTRL, &mstmCntl, 1);
- if (enable)
- mstmCntl |= DP_MST_EN;
- else
- mstmCntl &= (~DP_MST_EN);
-
- core_link_write_dpcd(link, DP_MSTM_CTRL, &mstmCntl, 1);
-}
-
-void dp_set_panel_mode(struct dc_link *link, enum dp_panel_mode panel_mode)
-{
- union dpcd_edp_config edp_config_set;
- bool panel_mode_edp = false;
-
- memset(&edp_config_set, '\0', sizeof(union dpcd_edp_config));
-
- if (panel_mode != DP_PANEL_MODE_DEFAULT) {
-
- switch (panel_mode) {
- case DP_PANEL_MODE_EDP:
- case DP_PANEL_MODE_SPECIAL:
- panel_mode_edp = true;
- break;
-
- default:
- break;
- }
-
- /*set edp panel mode in receiver*/
- core_link_read_dpcd(
- link,
- DP_EDP_CONFIGURATION_SET,
- &edp_config_set.raw,
- sizeof(edp_config_set.raw));
-
- if (edp_config_set.bits.PANEL_MODE_EDP
- != panel_mode_edp) {
- enum dc_status result;
-
- edp_config_set.bits.PANEL_MODE_EDP =
- panel_mode_edp;
- result = core_link_write_dpcd(
- link,
- DP_EDP_CONFIGURATION_SET,
- &edp_config_set.raw,
- sizeof(edp_config_set.raw));
-
- ASSERT(result == DC_OK);
- }
- }
- DC_LOG_DETECTION_DP_CAPS("Link: %d eDP panel mode supported: %d "
- "eDP panel mode enabled: %d \n",
- link->link_index,
- link->dpcd_caps.panel_mode_edp,
- panel_mode_edp);
-}
-
-enum dp_panel_mode dp_get_panel_mode(struct dc_link *link)
-{
- /* We need to explicitly check that connector
- * is not DP. Some Travis_VGA get reported
- * by video bios as DP.
- */
- if (link->connector_signal != SIGNAL_TYPE_DISPLAY_PORT) {
-
- switch (link->dpcd_caps.branch_dev_id) {
- case DP_BRANCH_DEVICE_ID_0022B9:
- /* alternate scrambler reset is required for Travis
- * for the case when external chip does not
- * provide sink device id, alternate scrambler
- * scheme will be overriden later by querying
- * Encoder features
- */
- if (strncmp(
- link->dpcd_caps.branch_dev_name,
- DP_VGA_LVDS_CONVERTER_ID_2,
- sizeof(
- link->dpcd_caps.
- branch_dev_name)) == 0) {
- return DP_PANEL_MODE_SPECIAL;
- }
- break;
- case DP_BRANCH_DEVICE_ID_00001A:
- /* alternate scrambler reset is required for Travis
- * for the case when external chip does not provide
- * sink device id, alternate scrambler scheme will
- * be overriden later by querying Encoder feature
- */
- if (strncmp(link->dpcd_caps.branch_dev_name,
- DP_VGA_LVDS_CONVERTER_ID_3,
- sizeof(
- link->dpcd_caps.
- branch_dev_name)) == 0) {
- return DP_PANEL_MODE_SPECIAL;
- }
- break;
- default:
- break;
- }
- }
-
- if (link->dpcd_caps.panel_mode_edp &&
- (link->connector_signal == SIGNAL_TYPE_EDP ||
- (link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT &&
- link->is_internal_display))) {
- return DP_PANEL_MODE_EDP;
- }
-
- return DP_PANEL_MODE_DEFAULT;
-}
-
-enum dc_status dp_set_fec_ready(struct dc_link *link, bool ready)
-{
- /* FEC has to be "set ready" before the link training.
- * The policy is to always train with FEC
- * if the sink supports it and leave it enabled on link.
- * If FEC is not supported, disable it.
- */
- struct link_encoder *link_enc = NULL;
- enum dc_status status = DC_OK;
- uint8_t fec_config = 0;
-
- /* Access link encoder based on whether it is statically
- * or dynamically assigned to a link.
- */
- if (link->is_dig_mapping_flexible &&
- link->dc->res_pool->funcs->link_encs_assign)
- link_enc = link_enc_cfg_get_link_enc_used_by_link(link->ctx->dc, link);
- else
- link_enc = link->link_enc;
- ASSERT(link_enc);
-
- if (!dc_link_should_enable_fec(link))
- return status;
-
- if (link_enc->funcs->fec_set_ready &&
- link->dpcd_caps.fec_cap.bits.FEC_CAPABLE) {
- if (ready) {
- fec_config = 1;
- status = core_link_write_dpcd(link,
- DP_FEC_CONFIGURATION,
- &fec_config,
- sizeof(fec_config));
- if (status == DC_OK) {
- link_enc->funcs->fec_set_ready(link_enc, true);
- link->fec_state = dc_link_fec_ready;
- } else {
- link_enc->funcs->fec_set_ready(link_enc, false);
- link->fec_state = dc_link_fec_not_ready;
- dm_error("dpcd write failed to set fec_ready");
- }
- } else if (link->fec_state == dc_link_fec_ready) {
- fec_config = 0;
- status = core_link_write_dpcd(link,
- DP_FEC_CONFIGURATION,
- &fec_config,
- sizeof(fec_config));
- link_enc->funcs->fec_set_ready(link_enc, false);
- link->fec_state = dc_link_fec_not_ready;
- }
- }
-
- return status;
-}
-
-void dp_set_fec_enable(struct dc_link *link, bool enable)
-{
- struct link_encoder *link_enc = NULL;
-
- /* Access link encoder based on whether it is statically
- * or dynamically assigned to a link.
- */
- if (link->is_dig_mapping_flexible &&
- link->dc->res_pool->funcs->link_encs_assign)
- link_enc = link_enc_cfg_get_link_enc_used_by_link(link->ctx->dc, link);
- else
- link_enc = link->link_enc;
- ASSERT(link_enc);
-
- if (!dc_link_should_enable_fec(link))
- return;
-
- if (link_enc->funcs->fec_set_enable &&
- link->dpcd_caps.fec_cap.bits.FEC_CAPABLE) {
- if (link->fec_state == dc_link_fec_ready && enable) {
- /* Accord to DP spec, FEC enable sequence can first
- * be transmitted anytime after 1000 LL codes have
- * been transmitted on the link after link training
- * completion. Using 1 lane RBR should have the maximum
- * time for transmitting 1000 LL codes which is 6.173 us.
- * So use 7 microseconds delay instead.
- */
- udelay(7);
- link_enc->funcs->fec_set_enable(link_enc, true);
- link->fec_state = dc_link_fec_enabled;
- } else if (link->fec_state == dc_link_fec_enabled && !enable) {
- link_enc->funcs->fec_set_enable(link_enc, false);
- link->fec_state = dc_link_fec_ready;
- }
- }
-}
-
-void dpcd_set_source_specific_data(struct dc_link *link)
-{
- if (!link->dc->vendor_signature.is_valid) {
- enum dc_status __maybe_unused result_write_min_hblank = DC_NOT_SUPPORTED;
- struct dpcd_amd_signature amd_signature = {0};
- struct dpcd_amd_device_id amd_device_id = {0};
-
- amd_device_id.device_id_byte1 =
- (uint8_t)(link->ctx->asic_id.chip_id);
- amd_device_id.device_id_byte2 =
- (uint8_t)(link->ctx->asic_id.chip_id >> 8);
- amd_device_id.dce_version =
- (uint8_t)(link->ctx->dce_version);
- amd_device_id.dal_version_byte1 = 0x0; // needed? where to get?
- amd_device_id.dal_version_byte2 = 0x0; // needed? where to get?
-
- core_link_read_dpcd(link, DP_SOURCE_OUI,
- (uint8_t *)(&amd_signature),
- sizeof(amd_signature));
-
- if (!((amd_signature.AMD_IEEE_TxSignature_byte1 == 0x0) &&
- (amd_signature.AMD_IEEE_TxSignature_byte2 == 0x0) &&
- (amd_signature.AMD_IEEE_TxSignature_byte3 == 0x1A))) {
-
- amd_signature.AMD_IEEE_TxSignature_byte1 = 0x0;
- amd_signature.AMD_IEEE_TxSignature_byte2 = 0x0;
- amd_signature.AMD_IEEE_TxSignature_byte3 = 0x1A;
-
- core_link_write_dpcd(link, DP_SOURCE_OUI,
- (uint8_t *)(&amd_signature),
- sizeof(amd_signature));
- }
-
- core_link_write_dpcd(link, DP_SOURCE_OUI+0x03,
- (uint8_t *)(&amd_device_id),
- sizeof(amd_device_id));
-
- if (link->ctx->dce_version >= DCN_VERSION_2_0 &&
- link->dc->caps.min_horizontal_blanking_period != 0) {
-
- uint8_t hblank_size = (uint8_t)link->dc->caps.min_horizontal_blanking_period;
-
- if (link->preferred_link_setting.dpcd_source_device_specific_field_support) {
- result_write_min_hblank = core_link_write_dpcd(link,
- DP_SOURCE_MINIMUM_HBLANK_SUPPORTED, (uint8_t *)(&hblank_size),
- sizeof(hblank_size));
-
- if (result_write_min_hblank == DC_ERROR_UNEXPECTED)
- link->preferred_link_setting.dpcd_source_device_specific_field_support = false;
- } else {
- DC_LOG_DC("Sink device does not support 00340h DPCD write. Skipping on purpose.\n");
- }
- }
-
- DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION,
- WPP_BIT_FLAG_DC_DETECTION_DP_CAPS,
- "result=%u link_index=%u enum dce_version=%d DPCD=0x%04X min_hblank=%u branch_dev_id=0x%x branch_dev_name='%c%c%c%c%c%c'",
- result_write_min_hblank,
- link->link_index,
- link->ctx->dce_version,
- DP_SOURCE_MINIMUM_HBLANK_SUPPORTED,
- link->dc->caps.min_horizontal_blanking_period,
- link->dpcd_caps.branch_dev_id,
- link->dpcd_caps.branch_dev_name[0],
- link->dpcd_caps.branch_dev_name[1],
- link->dpcd_caps.branch_dev_name[2],
- link->dpcd_caps.branch_dev_name[3],
- link->dpcd_caps.branch_dev_name[4],
- link->dpcd_caps.branch_dev_name[5]);
- } else {
- core_link_write_dpcd(link, DP_SOURCE_OUI,
- link->dc->vendor_signature.data.raw,
- sizeof(link->dc->vendor_signature.data.raw));
- }
-}
-
-bool dc_link_set_backlight_level_nits(struct dc_link *link,
- bool isHDR,
- uint32_t backlight_millinits,
- uint32_t transition_time_in_ms)
-{
- struct dpcd_source_backlight_set dpcd_backlight_set;
- uint8_t backlight_control = isHDR ? 1 : 0;
-
- if (!link || (link->connector_signal != SIGNAL_TYPE_EDP &&
- link->connector_signal != SIGNAL_TYPE_DISPLAY_PORT))
- return false;
-
- // OLEDs have no PWM, they can only use AUX
- if (link->dpcd_sink_ext_caps.bits.oled == 1)
- backlight_control = 1;
-
- *(uint32_t *)&dpcd_backlight_set.backlight_level_millinits = backlight_millinits;
- *(uint16_t *)&dpcd_backlight_set.backlight_transition_time_ms = (uint16_t)transition_time_in_ms;
-
-
- if (core_link_write_dpcd(link, DP_SOURCE_BACKLIGHT_LEVEL,
- (uint8_t *)(&dpcd_backlight_set),
- sizeof(dpcd_backlight_set)) != DC_OK)
- return false;
-
- if (core_link_write_dpcd(link, DP_SOURCE_BACKLIGHT_CONTROL,
- &backlight_control, 1) != DC_OK)
- return false;
-
- return true;
-}
-
-bool dc_link_get_backlight_level_nits(struct dc_link *link,
- uint32_t *backlight_millinits_avg,
- uint32_t *backlight_millinits_peak)
-{
- union dpcd_source_backlight_get dpcd_backlight_get;
-
- memset(&dpcd_backlight_get, 0, sizeof(union dpcd_source_backlight_get));
-
- if (!link || (link->connector_signal != SIGNAL_TYPE_EDP &&
- link->connector_signal != SIGNAL_TYPE_DISPLAY_PORT))
- return false;
-
- if (core_link_read_dpcd(link, DP_SOURCE_BACKLIGHT_CURRENT_PEAK,
- dpcd_backlight_get.raw,
- sizeof(union dpcd_source_backlight_get)) != DC_OK)
- return false;
-
- *backlight_millinits_avg =
- dpcd_backlight_get.bytes.backlight_millinits_avg;
- *backlight_millinits_peak =
- dpcd_backlight_get.bytes.backlight_millinits_peak;
-
- /* On non-supported panels dpcd_read usually succeeds with 0 returned */
- if (*backlight_millinits_avg == 0 ||
- *backlight_millinits_avg > *backlight_millinits_peak)
- return false;
-
- return true;
-}
-
-bool dc_link_backlight_enable_aux(struct dc_link *link, bool enable)
-{
- uint8_t backlight_enable = enable ? 1 : 0;
-
- if (!link || (link->connector_signal != SIGNAL_TYPE_EDP &&
- link->connector_signal != SIGNAL_TYPE_DISPLAY_PORT))
- return false;
-
- if (core_link_write_dpcd(link, DP_SOURCE_BACKLIGHT_ENABLE,
- &backlight_enable, 1) != DC_OK)
- return false;
-
- return true;
-}
-
-// we read default from 0x320 because we expect BIOS wrote it there
-// regular get_backlight_nit reads from panel set at 0x326
-bool dc_link_read_default_bl_aux(struct dc_link *link, uint32_t *backlight_millinits)
-{
- if (!link || (link->connector_signal != SIGNAL_TYPE_EDP &&
- link->connector_signal != SIGNAL_TYPE_DISPLAY_PORT))
- return false;
-
- if (core_link_read_dpcd(link, DP_SOURCE_BACKLIGHT_LEVEL,
- (uint8_t *) backlight_millinits,
- sizeof(uint32_t)) != DC_OK)
- return false;
-
- return true;
-}
-
-bool dc_link_set_default_brightness_aux(struct dc_link *link)
-{
- uint32_t default_backlight;
-
- if (link && link->dpcd_sink_ext_caps.bits.oled == 1) {
- if (!dc_link_read_default_bl_aux(link, &default_backlight))
- default_backlight = 150000;
- // if < 5 nits or > 5000, it might be wrong readback
- if (default_backlight < 5000 || default_backlight > 5000000)
- default_backlight = 150000; //
-
- return dc_link_set_backlight_level_nits(link, true,
- default_backlight, 0);
- }
- return false;
-}
-
-bool is_edp_ilr_optimization_required(struct dc_link *link, struct dc_crtc_timing *crtc_timing)
-{
- struct dc_link_settings link_setting;
- uint8_t link_bw_set;
- uint8_t link_rate_set;
- uint32_t req_bw;
- union lane_count_set lane_count_set = {0};
-
- ASSERT(link || crtc_timing); // invalid input
-
- if (link->dpcd_caps.edp_supported_link_rates_count == 0 ||
- !link->dc->debug.optimize_edp_link_rate)
- return false;
-
-
- // Read DPCD 00100h to find if standard link rates are set
- core_link_read_dpcd(link, DP_LINK_BW_SET,
- &link_bw_set, sizeof(link_bw_set));
-
- if (link_bw_set) {
- DC_LOG_EVENT_LINK_TRAINING("eDP ILR: Optimization required, VBIOS used link_bw_set\n");
- return true;
- }
-
- // Read DPCD 00115h to find the edp link rate set used
- core_link_read_dpcd(link, DP_LINK_RATE_SET,
- &link_rate_set, sizeof(link_rate_set));
-
- // Read DPCD 00101h to find out the number of lanes currently set
- core_link_read_dpcd(link, DP_LANE_COUNT_SET,
- &lane_count_set.raw, sizeof(lane_count_set));
-
- req_bw = dc_bandwidth_in_kbps_from_timing(crtc_timing);
-
- decide_edp_link_settings(link, &link_setting, req_bw);
-
- if (link->dpcd_caps.edp_supported_link_rates[link_rate_set] != link_setting.link_rate ||
- lane_count_set.bits.LANE_COUNT_SET != link_setting.lane_count) {
- DC_LOG_EVENT_LINK_TRAINING("eDP ILR: Optimization required, VBIOS link_rate_set not optimal\n");
- return true;
- }
-
- DC_LOG_EVENT_LINK_TRAINING("eDP ILR: No optimization required, VBIOS set optimal link_rate_set\n");
- return false;
-}
-
-enum dp_link_encoding dp_get_link_encoding_format(const struct dc_link_settings *link_settings)
-{
- if ((link_settings->link_rate >= LINK_RATE_LOW) &&
- (link_settings->link_rate <= LINK_RATE_HIGH3))
- return DP_8b_10b_ENCODING;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- else if ((link_settings->link_rate >= LINK_RATE_UHBR10) &&
- (link_settings->link_rate <= LINK_RATE_UHBR20))
- return DP_128b_132b_ENCODING;
-#endif
- return DP_UNKNOWN_ENCODING;
-}
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-enum dp_link_encoding dc_link_dp_mst_decide_link_encoding_format(const struct dc_link *link)
-{
- struct dc_link_settings link_settings = {0};
-
- if (!dc_is_dp_signal(link->connector_signal))
- return DP_UNKNOWN_ENCODING;
-
- if (link->preferred_link_setting.lane_count !=
- LANE_COUNT_UNKNOWN &&
- link->preferred_link_setting.link_rate !=
- LINK_RATE_UNKNOWN) {
- link_settings = link->preferred_link_setting;
- } else {
- decide_mst_link_settings(link, &link_settings);
- }
-
- return dp_get_link_encoding_format(&link_settings);
-}
-
-// TODO - DP2.0 Link: Fix get_lane_status to handle LTTPR offset (SST and MST)
-static void get_lane_status(
- struct dc_link *link,
- uint32_t lane_count,
- union lane_status *status,
- union lane_align_status_updated *status_updated)
-{
- unsigned int lane;
- uint8_t dpcd_buf[3] = {0};
-
- if (status == NULL || status_updated == NULL) {
- return;
- }
-
- core_link_read_dpcd(
- link,
- DP_LANE0_1_STATUS,
- dpcd_buf,
- sizeof(dpcd_buf));
-
- for (lane = 0; lane < lane_count; lane++) {
- status[lane].raw = get_nibble_at_index(&dpcd_buf[0], lane);
- }
-
- status_updated->raw = dpcd_buf[2];
-}
-
-bool dpcd_write_128b_132b_sst_payload_allocation_table(
- const struct dc_stream_state *stream,
- struct dc_link *link,
- struct link_mst_stream_allocation_table *proposed_table,
- bool allocate)
-{
- const uint8_t vc_id = 1; /// VC ID always 1 for SST
- const uint8_t start_time_slot = 0; /// Always start at time slot 0 for SST
- bool result = false;
- uint8_t req_slot_count = 0;
- struct fixed31_32 avg_time_slots_per_mtp = { 0 };
- union payload_table_update_status update_status = { 0 };
- const uint32_t max_retries = 30;
- uint32_t retries = 0;
-
- if (allocate) {
- avg_time_slots_per_mtp = calculate_sst_avg_time_slots_per_mtp(stream, link);
- req_slot_count = dc_fixpt_ceil(avg_time_slots_per_mtp);
- } else {
- /// Leave req_slot_count = 0 if allocate is false.
- }
-
- /// Write DPCD 2C0 = 1 to start updating
- update_status.bits.VC_PAYLOAD_TABLE_UPDATED = 1;
- core_link_write_dpcd(
- link,
- DP_PAYLOAD_TABLE_UPDATE_STATUS,
- &update_status.raw,
- 1);
-
- /// Program the changes in DPCD 1C0 - 1C2
- ASSERT(vc_id == 1);
- core_link_write_dpcd(
- link,
- DP_PAYLOAD_ALLOCATE_SET,
- &vc_id,
- 1);
-
- ASSERT(start_time_slot == 0);
- core_link_write_dpcd(
- link,
- DP_PAYLOAD_ALLOCATE_START_TIME_SLOT,
- &start_time_slot,
- 1);
-
- ASSERT(req_slot_count <= MAX_MTP_SLOT_COUNT); /// Validation should filter out modes that exceed link BW
- core_link_write_dpcd(
- link,
- DP_PAYLOAD_ALLOCATE_TIME_SLOT_COUNT,
- &req_slot_count,
- 1);
-
- /// Poll till DPCD 2C0 read 1
- /// Try for at least 150ms (30 retries, with 5ms delay after each attempt)
-
- while (retries < max_retries) {
- if (core_link_read_dpcd(
- link,
- DP_PAYLOAD_TABLE_UPDATE_STATUS,
- &update_status.raw,
- 1) == DC_OK) {
- if (update_status.bits.VC_PAYLOAD_TABLE_UPDATED == 1) {
- DC_LOG_DP2("SST Update Payload: downstream payload table updated.");
- result = true;
- break;
- }
- } else {
- union dpcd_rev dpcdRev;
-
- if (core_link_read_dpcd(
- link,
- DP_DPCD_REV,
- &dpcdRev.raw,
- 1) != DC_OK) {
- DC_LOG_ERROR("SST Update Payload: Unable to read DPCD revision "
- "of sink while polling payload table "
- "updated status bit.");
- break;
- }
- }
- retries++;
- udelay(5000);
- }
-
- if (!result && retries == max_retries) {
- DC_LOG_ERROR("SST Update Payload: Payload table not updated after retries, "
- "continue on. Something is wrong with the branch.");
- // TODO - DP2.0 Payload: Read and log the payload table from downstream branch
- }
-
- proposed_table->stream_count = 1; /// Always 1 stream for SST
- proposed_table->stream_allocations[0].slot_count = req_slot_count;
- proposed_table->stream_allocations[0].vcp_id = vc_id;
-
- return result;
-}
-
-bool dpcd_poll_for_allocation_change_trigger(struct dc_link *link)
-{
- /*
- * wait for ACT handled
- */
- int i;
- const int act_retries = 30;
- enum act_return_status result = ACT_FAILED;
- union payload_table_update_status update_status = {0};
- union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX];
- union lane_align_status_updated lane_status_updated;
-
- for (i = 0; i < act_retries; i++) {
- get_lane_status(link, link->cur_link_settings.lane_count, dpcd_lane_status, &lane_status_updated);
-
- if (!dp_is_cr_done(link->cur_link_settings.lane_count, dpcd_lane_status) ||
- !dp_is_ch_eq_done(link->cur_link_settings.lane_count, dpcd_lane_status) ||
- !dp_is_symbol_locked(link->cur_link_settings.lane_count, dpcd_lane_status) ||
- !dp_is_interlane_aligned(lane_status_updated)) {
- DC_LOG_ERROR("SST Update Payload: Link loss occurred while "
- "polling for ACT handled.");
- result = ACT_LINK_LOST;
- break;
- }
- core_link_read_dpcd(
- link,
- DP_PAYLOAD_TABLE_UPDATE_STATUS,
- &update_status.raw,
- 1);
-
- if (update_status.bits.ACT_HANDLED == 1) {
- DC_LOG_DP2("SST Update Payload: ACT handled by downstream.");
- result = ACT_SUCCESS;
- break;
- }
-
- udelay(5000);
- }
-
- if (result == ACT_FAILED) {
- DC_LOG_ERROR("SST Update Payload: ACT still not handled after retries, "
- "continue on. Something is wrong with the branch.");
- }
-
- return (result == ACT_SUCCESS);
-}
-
-struct fixed31_32 calculate_sst_avg_time_slots_per_mtp(
- const struct dc_stream_state *stream,
- const struct dc_link *link)
-{
- struct fixed31_32 link_bw_effective =
- dc_fixpt_from_int(
- dc_link_bandwidth_kbps(link, &link->cur_link_settings));
- struct fixed31_32 timeslot_bw_effective =
- dc_fixpt_div_int(link_bw_effective, MAX_MTP_SLOT_COUNT);
- struct fixed31_32 timing_bw =
- dc_fixpt_from_int(
- dc_bandwidth_in_kbps_from_timing(&stream->timing));
- struct fixed31_32 avg_time_slots_per_mtp =
- dc_fixpt_div(timing_bw, timeslot_bw_effective);
-
- return avg_time_slots_per_mtp;
-}
-
-bool is_dp_128b_132b_signal(struct pipe_ctx *pipe_ctx)
-{
- return (pipe_ctx->stream_res.hpo_dp_stream_enc &&
- pipe_ctx->stream->link->hpo_dp_link_enc &&
- dc_is_dp_signal(pipe_ctx->stream->signal));
-}
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c
deleted file mode 100644
index 7f25c11f4248..000000000000
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c
+++ /dev/null
@@ -1,246 +0,0 @@
-/*
- * Copyright 2021 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#include <inc/core_status.h>
-#include <dc_link.h>
-#include <inc/link_hwss.h>
-#include <inc/link_dpcd.h>
-#include "drm/drm_dp_helper.h"
-#include <dc_dp_types.h>
-#include "dm_helpers.h"
-
-#define END_ADDRESS(start, size) (start + size - 1)
-#define ADDRESS_RANGE_SIZE(start, end) (end - start + 1)
-struct dpcd_address_range {
- uint32_t start;
- uint32_t end;
-};
-
-static enum dc_status internal_link_read_dpcd(
- struct dc_link *link,
- uint32_t address,
- uint8_t *data,
- uint32_t size)
-{
- if (!link->aux_access_disabled &&
- !dm_helpers_dp_read_dpcd(link->ctx,
- link, address, data, size)) {
- return DC_ERROR_UNEXPECTED;
- }
-
- return DC_OK;
-}
-
-static enum dc_status internal_link_write_dpcd(
- struct dc_link *link,
- uint32_t address,
- const uint8_t *data,
- uint32_t size)
-{
- if (!link->aux_access_disabled &&
- !dm_helpers_dp_write_dpcd(link->ctx,
- link, address, data, size)) {
- return DC_ERROR_UNEXPECTED;
- }
-
- return DC_OK;
-}
-
-/*
- * Partition the entire DPCD address space
- * XXX: This partitioning must cover the entire DPCD address space,
- * and must contain no gaps or overlapping address ranges.
- */
-static const struct dpcd_address_range mandatory_dpcd_partitions[] = {
- { 0, DP_TRAINING_PATTERN_SET_PHY_REPEATER(DP_PHY_LTTPR1) - 1},
- { DP_TRAINING_PATTERN_SET_PHY_REPEATER(DP_PHY_LTTPR1), DP_TRAINING_PATTERN_SET_PHY_REPEATER(DP_PHY_LTTPR2) - 1 },
- { DP_TRAINING_PATTERN_SET_PHY_REPEATER(DP_PHY_LTTPR2), DP_TRAINING_PATTERN_SET_PHY_REPEATER(DP_PHY_LTTPR3) - 1 },
- { DP_TRAINING_PATTERN_SET_PHY_REPEATER(DP_PHY_LTTPR3), DP_TRAINING_PATTERN_SET_PHY_REPEATER(DP_PHY_LTTPR4) - 1 },
- { DP_TRAINING_PATTERN_SET_PHY_REPEATER(DP_PHY_LTTPR4), DP_TRAINING_PATTERN_SET_PHY_REPEATER(DP_PHY_LTTPR5) - 1 },
- { DP_TRAINING_PATTERN_SET_PHY_REPEATER(DP_PHY_LTTPR5), DP_TRAINING_PATTERN_SET_PHY_REPEATER(DP_PHY_LTTPR6) - 1 },
- { DP_TRAINING_PATTERN_SET_PHY_REPEATER(DP_PHY_LTTPR6), DP_TRAINING_PATTERN_SET_PHY_REPEATER(DP_PHY_LTTPR7) - 1 },
- { DP_TRAINING_PATTERN_SET_PHY_REPEATER(DP_PHY_LTTPR7), DP_TRAINING_PATTERN_SET_PHY_REPEATER(DP_PHY_LTTPR8) - 1 },
- { DP_TRAINING_PATTERN_SET_PHY_REPEATER(DP_PHY_LTTPR8), DP_FEC_STATUS_PHY_REPEATER(DP_PHY_LTTPR1) - 1 },
- /*
- * The FEC registers are contiguous
- */
- { DP_FEC_STATUS_PHY_REPEATER(DP_PHY_LTTPR1), DP_FEC_STATUS_PHY_REPEATER(DP_PHY_LTTPR1) - 1 },
- { DP_FEC_STATUS_PHY_REPEATER(DP_PHY_LTTPR2), DP_FEC_STATUS_PHY_REPEATER(DP_PHY_LTTPR2) - 1 },
- { DP_FEC_STATUS_PHY_REPEATER(DP_PHY_LTTPR3), DP_FEC_STATUS_PHY_REPEATER(DP_PHY_LTTPR3) - 1 },
- { DP_FEC_STATUS_PHY_REPEATER(DP_PHY_LTTPR4), DP_FEC_STATUS_PHY_REPEATER(DP_PHY_LTTPR4) - 1 },
- { DP_FEC_STATUS_PHY_REPEATER(DP_PHY_LTTPR5), DP_FEC_STATUS_PHY_REPEATER(DP_PHY_LTTPR5) - 1 },
- { DP_FEC_STATUS_PHY_REPEATER(DP_PHY_LTTPR6), DP_FEC_STATUS_PHY_REPEATER(DP_PHY_LTTPR6) - 1 },
- { DP_FEC_STATUS_PHY_REPEATER(DP_PHY_LTTPR7), DP_FEC_STATUS_PHY_REPEATER(DP_PHY_LTTPR7) - 1 },
- { DP_FEC_STATUS_PHY_REPEATER(DP_PHY_LTTPR8), DP_LTTPR_MAX_ADD },
- /* all remaining DPCD addresses */
- { DP_LTTPR_MAX_ADD + 1, DP_DPCD_MAX_ADD } };
-
-static inline bool do_addresses_intersect_with_range(
- const struct dpcd_address_range *range,
- const uint32_t start_address,
- const uint32_t end_address)
-{
- return start_address <= range->end && end_address >= range->start;
-}
-
-static uint32_t dpcd_get_next_partition_size(const uint32_t address, const uint32_t size)
-{
- const uint32_t end_address = END_ADDRESS(address, size);
- uint32_t partition_iterator = 0;
-
- /*
- * find current partition
- * this loop spins forever if partition map above is not surjective
- */
- while (!do_addresses_intersect_with_range(&mandatory_dpcd_partitions[partition_iterator],
- address, end_address))
- partition_iterator++;
- if (end_address < mandatory_dpcd_partitions[partition_iterator].end)
- return size;
- return ADDRESS_RANGE_SIZE(address, mandatory_dpcd_partitions[partition_iterator].end);
-}
-
-/*
- * Ranges of DPCD addresses that must be read in a single transaction
- * XXX: Do not allow any two address ranges in this array to overlap
- */
-static const struct dpcd_address_range mandatory_dpcd_blocks[] = {
- { DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV, DP_PHY_REPEATER_EXTENDED_WAIT_TIMEOUT }};
-
-/*
- * extend addresses to read all mandatory blocks together
- */
-static void dpcd_extend_address_range(
- const uint32_t in_address,
- uint8_t * const in_data,
- const uint32_t in_size,
- uint32_t *out_address,
- uint8_t **out_data,
- uint32_t *out_size)
-{
- const uint32_t end_address = END_ADDRESS(in_address, in_size);
- const struct dpcd_address_range *addr_range;
- struct dpcd_address_range new_addr_range;
- uint32_t i;
-
- new_addr_range.start = in_address;
- new_addr_range.end = end_address;
- for (i = 0; i < ARRAY_SIZE(mandatory_dpcd_blocks); i++) {
- addr_range = &mandatory_dpcd_blocks[i];
- if (addr_range->start <= in_address && addr_range->end >= in_address)
- new_addr_range.start = addr_range->start;
-
- if (addr_range->start <= end_address && addr_range->end >= end_address)
- new_addr_range.end = addr_range->end;
- }
- *out_address = in_address;
- *out_size = in_size;
- *out_data = in_data;
- if (new_addr_range.start != in_address || new_addr_range.end != end_address) {
- *out_address = new_addr_range.start;
- *out_size = ADDRESS_RANGE_SIZE(new_addr_range.start, new_addr_range.end);
- *out_data = kzalloc(*out_size * sizeof(**out_data), GFP_KERNEL);
- }
-}
-
-/*
- * Reduce the AUX reply down to the values the caller requested
- */
-static void dpcd_reduce_address_range(
- const uint32_t extended_address,
- uint8_t * const extended_data,
- const uint32_t extended_size,
- const uint32_t reduced_address,
- uint8_t * const reduced_data,
- const uint32_t reduced_size)
-{
- const uint32_t offset = reduced_address - extended_address;
-
- /*
- * If the address is same, address was not extended.
- * So we do not need to free any memory.
- * The data is in original buffer(reduced_data).
- */
- if (extended_data == reduced_data)
- return;
-
- memcpy(&extended_data[offset], reduced_data, reduced_size);
- kfree(extended_data);
-}
-
-enum dc_status core_link_read_dpcd(
- struct dc_link *link,
- uint32_t address,
- uint8_t *data,
- uint32_t size)
-{
- uint32_t extended_address;
- uint32_t partitioned_address;
- uint8_t *extended_data;
- uint32_t extended_size;
- /* size of the remaining partitioned address space */
- uint32_t size_left_to_read;
- enum dc_status status;
- /* size of the next partition to be read from */
- uint32_t partition_size;
- uint32_t data_index = 0;
-
- dpcd_extend_address_range(address, data, size, &extended_address, &extended_data, &extended_size);
- partitioned_address = extended_address;
- size_left_to_read = extended_size;
- while (size_left_to_read) {
- partition_size = dpcd_get_next_partition_size(partitioned_address, size_left_to_read);
- status = internal_link_read_dpcd(link, partitioned_address, &extended_data[data_index], partition_size);
- if (status != DC_OK)
- break;
- partitioned_address += partition_size;
- data_index += partition_size;
- size_left_to_read -= partition_size;
- }
- dpcd_reduce_address_range(extended_address, extended_data, extended_size, address, data, size);
- return status;
-}
-
-enum dc_status core_link_write_dpcd(
- struct dc_link *link,
- uint32_t address,
- const uint8_t *data,
- uint32_t size)
-{
- uint32_t partition_size;
- uint32_t data_index = 0;
- enum dc_status status;
-
- while (size) {
- partition_size = dpcd_get_next_partition_size(address, size);
- status = internal_link_write_dpcd(link, address, &data[data_index], partition_size);
- if (status != DC_OK)
- break;
- address += partition_size;
- data_index += partition_size;
- size -= partition_size;
- }
- return status;
-}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c
deleted file mode 100644
index b1c9f77d6bf4..000000000000
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c
+++ /dev/null
@@ -1,962 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright 2021 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#include "dc.h"
-#include "dc_link_dpia.h"
-#include "inc/core_status.h"
-#include "dc_link.h"
-#include "dc_link_dp.h"
-#include "dpcd_defs.h"
-#include "link_hwss.h"
-#include "dm_helpers.h"
-#include "dmub/inc/dmub_cmd.h"
-#include "inc/link_dpcd.h"
-
-#define DC_LOGGER \
- link->ctx->logger
-
-enum dc_status dpcd_get_tunneling_device_data(struct dc_link *link)
-{
- enum dc_status status = DC_OK;
- uint8_t dpcd_dp_tun_data[3] = {0};
- uint8_t dpcd_topology_data[DPCD_USB4_TOPOLOGY_ID_LEN] = {0};
- uint8_t i = 0;
-
- status = core_link_read_dpcd(link,
- DP_TUNNELING_CAPABILITIES_SUPPORT,
- dpcd_dp_tun_data,
- sizeof(dpcd_dp_tun_data));
-
- status = core_link_read_dpcd(link,
- DP_USB4_ROUTER_TOPOLOGY_ID,
- dpcd_topology_data,
- sizeof(dpcd_topology_data));
-
- link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.raw =
- dpcd_dp_tun_data[DP_TUNNELING_CAPABILITIES_SUPPORT -
- DP_TUNNELING_CAPABILITIES_SUPPORT];
- link->dpcd_caps.usb4_dp_tun_info.dpia_info.raw =
- dpcd_dp_tun_data[DP_IN_ADAPTER_INFO - DP_TUNNELING_CAPABILITIES_SUPPORT];
- link->dpcd_caps.usb4_dp_tun_info.usb4_driver_id =
- dpcd_dp_tun_data[DP_USB4_DRIVER_ID - DP_TUNNELING_CAPABILITIES_SUPPORT];
-
- for (i = 0; i < DPCD_USB4_TOPOLOGY_ID_LEN; i++)
- link->dpcd_caps.usb4_dp_tun_info.usb4_topology_id[i] = dpcd_topology_data[i];
-
- return status;
-}
-
-/* Configure link as prescribed in link_setting; set LTTPR mode; and
- * Initialize link training settings.
- * Abort link training if sink unplug detected.
- *
- * @param link DPIA link being trained.
- * @param[in] link_setting Lane count, link rate and downspread control.
- * @param[out] lt_settings Link settings and drive settings (voltage swing and pre-emphasis).
- */
-static enum link_training_result dpia_configure_link(struct dc_link *link,
- const struct dc_link_settings *link_setting,
- struct link_training_settings *lt_settings)
-{
- enum dc_status status;
- bool fec_enable;
-
- DC_LOG_HW_LINK_TRAINING("%s\n DPIA(%d) configuring\n - LTTPR mode(%d)\n",
- __func__,
- link->link_id.enum_id - ENUM_ID_1,
- link->lttpr_mode);
-
- dp_decide_training_settings(link,
- link_setting,
- lt_settings);
-
- status = dpcd_configure_channel_coding(link, lt_settings);
- if (status != DC_OK && !link->hpd_status)
- return LINK_TRAINING_ABORT;
-
- /* Configure lttpr mode */
- status = dpcd_configure_lttpr_mode(link, lt_settings);
- if (status != DC_OK && !link->hpd_status)
- return LINK_TRAINING_ABORT;
-
- /* Set link rate, lane count and spread. */
- status = dpcd_set_link_settings(link, lt_settings);
- if (status != DC_OK && !link->hpd_status)
- return LINK_TRAINING_ABORT;
-
- if (link->preferred_training_settings.fec_enable)
- fec_enable = *link->preferred_training_settings.fec_enable;
- else
- fec_enable = true;
- status = dp_set_fec_ready(link, fec_enable);
- if (status != DC_OK && !link->hpd_status)
- return LINK_TRAINING_ABORT;
-
- return LINK_TRAINING_SUCCESS;
-}
-
-static enum dc_status core_link_send_set_config(struct dc_link *link,
- uint8_t msg_type,
- uint8_t msg_data)
-{
- struct set_config_cmd_payload payload;
- enum set_config_status set_config_result = SET_CONFIG_PENDING;
-
- /* prepare set_config payload */
- payload.msg_type = msg_type;
- payload.msg_data = msg_data;
-
- if (!link->ddc->ddc_pin && !link->aux_access_disabled &&
- (dm_helpers_dmub_set_config_sync(link->ctx, link,
- &payload, &set_config_result) == -1)) {
- return DC_ERROR_UNEXPECTED;
- }
-
- /* set_config should return ACK if successful */
- return (set_config_result == SET_CONFIG_ACK_RECEIVED) ? DC_OK : DC_ERROR_UNEXPECTED;
-}
-
-/* Build SET_CONFIG message data payload for specified message type. */
-static uint8_t dpia_build_set_config_data(enum dpia_set_config_type type,
- struct dc_link *link,
- struct link_training_settings *lt_settings)
-{
- union dpia_set_config_data data;
-
- data.raw = 0;
-
- switch (type) {
- case DPIA_SET_CFG_SET_LINK:
- data.set_link.mode = link->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT ? 1 : 0;
- break;
- case DPIA_SET_CFG_SET_PHY_TEST_MODE:
- break;
- case DPIA_SET_CFG_SET_VSPE:
- /* Assume all lanes have same drive settings. */
- data.set_vspe.swing = lt_settings->lane_settings[0].VOLTAGE_SWING;
- data.set_vspe.pre_emph = lt_settings->lane_settings[0].PRE_EMPHASIS;
- data.set_vspe.max_swing_reached =
- lt_settings->lane_settings[0].VOLTAGE_SWING ==
- VOLTAGE_SWING_MAX_LEVEL ? 1 : 0;
- data.set_vspe.max_pre_emph_reached =
- lt_settings->lane_settings[0].PRE_EMPHASIS ==
- PRE_EMPHASIS_MAX_LEVEL ? 1 : 0;
- break;
- default:
- ASSERT(false); /* Message type not supported by helper function. */
- break;
- }
-
- return data.raw;
-}
-
-/* Convert DC training pattern to DPIA training stage. */
-static enum dpia_set_config_ts convert_trng_ptn_to_trng_stg(enum dc_dp_training_pattern tps)
-{
- enum dpia_set_config_ts ts;
-
- switch (tps) {
- case DP_TRAINING_PATTERN_SEQUENCE_1:
- ts = DPIA_TS_TPS1;
- break;
- case DP_TRAINING_PATTERN_SEQUENCE_2:
- ts = DPIA_TS_TPS2;
- break;
- case DP_TRAINING_PATTERN_SEQUENCE_3:
- ts = DPIA_TS_TPS3;
- break;
- case DP_TRAINING_PATTERN_SEQUENCE_4:
- ts = DPIA_TS_TPS4;
- break;
- default:
- ts = DPIA_TS_DPRX_DONE;
- ASSERT(false); /* TPS not supported by helper function. */
- break;
- }
-
- return ts;
-}
-
-/* Write training pattern to DPCD. */
-static enum dc_status dpcd_set_lt_pattern(struct dc_link *link,
- enum dc_dp_training_pattern pattern,
- uint32_t hop)
-{
- union dpcd_training_pattern dpcd_pattern = { {0} };
- uint32_t dpcd_tps_offset = DP_TRAINING_PATTERN_SET;
- enum dc_status status;
-
- if (hop != DPRX)
- dpcd_tps_offset = DP_TRAINING_PATTERN_SET_PHY_REPEATER1 +
- ((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (hop - 1));
-
- /* DpcdAddress_TrainingPatternSet */
- dpcd_pattern.v1_4.TRAINING_PATTERN_SET =
- dc_dp_training_pattern_to_dpcd_training_pattern(link, pattern);
-
- dpcd_pattern.v1_4.SCRAMBLING_DISABLE =
- dc_dp_initialize_scrambling_data_symbols(link, pattern);
-
- if (hop != DPRX) {
- DC_LOG_HW_LINK_TRAINING("%s\n LTTPR Repeater ID: %d\n 0x%X pattern = %x\n",
- __func__,
- hop,
- dpcd_tps_offset,
- dpcd_pattern.v1_4.TRAINING_PATTERN_SET);
- } else {
- DC_LOG_HW_LINK_TRAINING("%s\n 0x%X pattern = %x\n",
- __func__,
- dpcd_tps_offset,
- dpcd_pattern.v1_4.TRAINING_PATTERN_SET);
- }
-
- status = core_link_write_dpcd(link,
- dpcd_tps_offset,
- &dpcd_pattern.raw,
- sizeof(dpcd_pattern.raw));
-
- return status;
-}
-
-/* Execute clock recovery phase of link training for specified hop in display
- * path.in non-transparent mode:
- * - Driver issues both DPCD and SET_CONFIG transactions.
- * - TPS1 is transmitted for any hops downstream of DPOA.
- * - Drive (VS/PE) only transmitted for the hop immediately downstream of DPOA.
- * - CR for the first hop (DPTX-to-DPIA) is assumed to be successful.
- *
- * @param link DPIA link being trained.
- * @param lt_settings link_setting and drive settings (voltage swing and pre-emphasis).
- * @param hop The Hop in display path. DPRX = 0.
- */
-static enum link_training_result dpia_training_cr_non_transparent(struct dc_link *link,
- struct link_training_settings *lt_settings,
- uint32_t hop)
-{
- enum link_training_result result = LINK_TRAINING_CR_FAIL_LANE0;
- uint8_t repeater_cnt = 0; /* Number of hops/repeaters in display path. */
- enum dc_status status;
- uint32_t retries_cr = 0; /* Number of consecutive attempts with same VS or PE. */
- uint32_t retry_count = 0;
- /* From DP spec, CR read interval is always 100us. */
- uint32_t wait_time_microsec = TRAINING_AUX_RD_INTERVAL;
- enum dc_lane_count lane_count = lt_settings->link_settings.lane_count;
- union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = { { {0} } };
- union lane_align_status_updated dpcd_lane_status_updated = { {0} };
- union lane_adjust dpcd_lane_adjust[LANE_COUNT_DP_MAX] = { { {0} } };
- uint8_t set_cfg_data;
- enum dpia_set_config_ts ts;
-
- repeater_cnt = dp_convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
-
- /* Cap of LINK_TRAINING_MAX_CR_RETRY attempts at clock recovery.
- * Fix inherited from perform_clock_recovery_sequence() -
- * the DP equivalent of this function:
- * Required for Synaptics MST hub which can put the LT in
- * infinite loop by switching the VS between level 0 and level 1
- * continuously.
- */
- while ((retries_cr < LINK_TRAINING_MAX_RETRY_COUNT) &&
- (retry_count < LINK_TRAINING_MAX_CR_RETRY)) {
- /* DPTX-to-DPIA */
- if (hop == repeater_cnt) {
- /* Send SET_CONFIG(SET_LINK:LC,LR,LTTPR) to notify DPOA that
- * non-transparent link training has started.
- * This also enables the transmission of clk_sync packets.
- */
- set_cfg_data = dpia_build_set_config_data(DPIA_SET_CFG_SET_LINK,
- link,
- lt_settings);
- status = core_link_send_set_config(link,
- DPIA_SET_CFG_SET_LINK,
- set_cfg_data);
- /* CR for this hop is considered successful as long as
- * SET_CONFIG message is acknowledged by DPOA.
- */
- if (status == DC_OK)
- result = LINK_TRAINING_SUCCESS;
- else
- result = LINK_TRAINING_ABORT;
- break;
- }
-
- /* DPOA-to-x */
- /* Instruct DPOA to transmit TPS1 then update DPCD. */
- if (retry_count == 0) {
- ts = convert_trng_ptn_to_trng_stg(lt_settings->pattern_for_cr);
- status = core_link_send_set_config(link,
- DPIA_SET_CFG_SET_TRAINING,
- ts);
- if (status != DC_OK) {
- result = LINK_TRAINING_ABORT;
- break;
- }
- status = dpcd_set_lt_pattern(link, lt_settings->pattern_for_cr, hop);
- if (status != DC_OK) {
- result = LINK_TRAINING_ABORT;
- break;
- }
- }
-
- /* Update DPOA drive settings then DPCD. DPOA does only adjusts
- * drive settings for hops immediately downstream.
- */
- if (hop == repeater_cnt - 1) {
- set_cfg_data = dpia_build_set_config_data(DPIA_SET_CFG_SET_VSPE,
- link,
- lt_settings);
- status = core_link_send_set_config(link,
- DPIA_SET_CFG_SET_VSPE,
- set_cfg_data);
- if (status != DC_OK) {
- result = LINK_TRAINING_ABORT;
- break;
- }
- }
- status = dpcd_set_lane_settings(link, lt_settings, hop);
- if (status != DC_OK) {
- result = LINK_TRAINING_ABORT;
- break;
- }
-
- dp_wait_for_training_aux_rd_interval(link, wait_time_microsec);
-
- /* Read status and adjustment requests from DPCD. */
- status = dp_get_lane_status_and_lane_adjust(
- link,
- lt_settings,
- dpcd_lane_status,
- &dpcd_lane_status_updated,
- dpcd_lane_adjust,
- hop);
- if (status != DC_OK) {
- result = LINK_TRAINING_ABORT;
- break;
- }
-
- /* Check if clock recovery successful. */
- if (dp_is_cr_done(lane_count, dpcd_lane_status)) {
- result = LINK_TRAINING_SUCCESS;
- break;
- }
-
- result = dp_get_cr_failure(lane_count, dpcd_lane_status);
-
- if (dp_is_max_vs_reached(lt_settings))
- break;
-
- /* Count number of attempts with same drive settings.
- * Note: settings are the same for all lanes,
- * so comparing first lane is sufficient.
- */
- if ((lt_settings->dpcd_lane_settings[0].bits.VOLTAGE_SWING_SET ==
- dpcd_lane_adjust[0].bits.VOLTAGE_SWING_LANE)
- && (lt_settings->dpcd_lane_settings[0].bits.PRE_EMPHASIS_SET ==
- dpcd_lane_adjust[0].bits.PRE_EMPHASIS_LANE))
- retries_cr++;
- else
- retries_cr = 0;
-
- /* Update VS/PE. */
- dp_decide_lane_settings(lt_settings, dpcd_lane_adjust,
- lt_settings->lane_settings,
- lt_settings->dpcd_lane_settings);
- retry_count++;
- }
-
- /* Abort link training if clock recovery failed due to HPD unplug. */
- if (!link->hpd_status)
- result = LINK_TRAINING_ABORT;
-
- DC_LOG_HW_LINK_TRAINING("%s\n DPIA(%d) clock recovery\n"
- " -hop(%d)\n - result(%d)\n - retries(%d)\n",
- __func__,
- link->link_id.enum_id - ENUM_ID_1,
- hop,
- result,
- retry_count);
-
- return result;
-}
-
-/* Execute clock recovery phase of link training in transparent LTTPR mode:
- * - Driver only issues DPCD transactions and leaves USB4 tunneling (SET_CONFIG) messages to DPIA.
- * - Driver writes TPS1 to DPCD to kick off training.
- * - Clock recovery (CR) for link is handled by DPOA, which reports result to DPIA on completion.
- * - DPIA communicates result to driver by updating CR status when driver reads DPCD.
- *
- * @param link DPIA link being trained.
- * @param lt_settings link_setting and drive settings (voltage swing and pre-emphasis).
- */
-static enum link_training_result dpia_training_cr_transparent(struct dc_link *link,
- struct link_training_settings *lt_settings)
-{
- enum link_training_result result = LINK_TRAINING_CR_FAIL_LANE0;
- enum dc_status status;
- uint32_t retries_cr = 0; /* Number of consecutive attempts with same VS or PE. */
- uint32_t retry_count = 0;
- uint32_t wait_time_microsec = lt_settings->cr_pattern_time;
- enum dc_lane_count lane_count = lt_settings->link_settings.lane_count;
- union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = { { {0} } };
- union lane_align_status_updated dpcd_lane_status_updated = { {0} };
- union lane_adjust dpcd_lane_adjust[LANE_COUNT_DP_MAX] = { { {0} } };
-
- /* Cap of LINK_TRAINING_MAX_CR_RETRY attempts at clock recovery.
- * Fix inherited from perform_clock_recovery_sequence() -
- * the DP equivalent of this function:
- * Required for Synaptics MST hub which can put the LT in
- * infinite loop by switching the VS between level 0 and level 1
- * continuously.
- */
- while ((retries_cr < LINK_TRAINING_MAX_RETRY_COUNT) &&
- (retry_count < LINK_TRAINING_MAX_CR_RETRY)) {
- /* Write TPS1 (not VS or PE) to DPCD to start CR phase.
- * DPIA sends SET_CONFIG(SET_LINK) to notify DPOA to
- * start link training.
- */
- if (retry_count == 0) {
- status = dpcd_set_lt_pattern(link, lt_settings->pattern_for_cr, DPRX);
- if (status != DC_OK) {
- result = LINK_TRAINING_ABORT;
- break;
- }
- }
-
- dp_wait_for_training_aux_rd_interval(link, wait_time_microsec);
-
- /* Read status and adjustment requests from DPCD. */
- status = dp_get_lane_status_and_lane_adjust(
- link,
- lt_settings,
- dpcd_lane_status,
- &dpcd_lane_status_updated,
- dpcd_lane_adjust,
- DPRX);
- if (status != DC_OK) {
- result = LINK_TRAINING_ABORT;
- break;
- }
-
- /* Check if clock recovery successful. */
- if (dp_is_cr_done(lane_count, dpcd_lane_status)) {
- result = LINK_TRAINING_SUCCESS;
- break;
- }
-
- result = dp_get_cr_failure(lane_count, dpcd_lane_status);
-
- if (dp_is_max_vs_reached(lt_settings))
- break;
-
- /* Count number of attempts with same drive settings.
- * Note: settings are the same for all lanes,
- * so comparing first lane is sufficient.
- */
- if ((lt_settings->dpcd_lane_settings[0].bits.VOLTAGE_SWING_SET ==
- dpcd_lane_adjust[0].bits.VOLTAGE_SWING_LANE)
- && (lt_settings->dpcd_lane_settings[0].bits.PRE_EMPHASIS_SET ==
- dpcd_lane_adjust[0].bits.PRE_EMPHASIS_LANE))
- retries_cr++;
- else
- retries_cr = 0;
-
- /* Update VS/PE. */
- dp_decide_lane_settings(lt_settings, dpcd_lane_adjust,
- lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings);
- retry_count++;
- }
-
- /* Abort link training if clock recovery failed due to HPD unplug. */
- if (!link->hpd_status)
- result = LINK_TRAINING_ABORT;
-
- DC_LOG_HW_LINK_TRAINING("%s\n DPIA(%d) clock recovery\n"
- " -hop(%d)\n - result(%d)\n - retries(%d)\n",
- __func__,
- link->link_id.enum_id - ENUM_ID_1,
- DPRX,
- result,
- retry_count);
-
- return result;
-}
-
-/* Execute clock recovery phase of link training for specified hop in display
- * path.
- *
- * @param link DPIA link being trained.
- * @param lt_settings link_setting and drive settings (voltage swing and pre-emphasis).
- * @param hop The Hop in display path. DPRX = 0.
- */
-static enum link_training_result dpia_training_cr_phase(struct dc_link *link,
- struct link_training_settings *lt_settings,
- uint32_t hop)
-{
- enum link_training_result result = LINK_TRAINING_CR_FAIL_LANE0;
-
- if (link->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT)
- result = dpia_training_cr_non_transparent(link, lt_settings, hop);
- else
- result = dpia_training_cr_transparent(link, lt_settings);
-
- return result;
-}
-
-/* Return status read interval during equalization phase. */
-static uint32_t dpia_get_eq_aux_rd_interval(const struct dc_link *link,
- const struct link_training_settings *lt_settings,
- uint32_t hop)
-{
- uint32_t wait_time_microsec;
-
- if (hop == DPRX)
- wait_time_microsec = lt_settings->eq_pattern_time;
- else
- wait_time_microsec =
- dp_translate_training_aux_read_interval(
- link->dpcd_caps.lttpr_caps.aux_rd_interval[hop - 1]);
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- /* Check debug option for extending aux read interval. */
- if (link->dc->debug.dpia_debug.bits.extend_aux_rd_interval)
- wait_time_microsec = DPIA_DEBUG_EXTENDED_AUX_RD_INTERVAL_US;
-#endif
-
- return wait_time_microsec;
-}
-
-/* Execute equalization phase of link training for specified hop in display
- * path in non-transparent mode:
- * - driver issues both DPCD and SET_CONFIG transactions.
- * - TPSx is transmitted for any hops downstream of DPOA.
- * - Drive (VS/PE) only transmitted for the hop immediately downstream of DPOA.
- * - EQ for the first hop (DPTX-to-DPIA) is assumed to be successful.
- * - DPRX EQ only reported successful when both DPRX and DPIA requirements
- * (clk sync packets sent) fulfilled.
- *
- * @param link DPIA link being trained.
- * @param lt_settings link_setting and drive settings (voltage swing and pre-emphasis).
- * @param hop The Hop in display path. DPRX = 0.
- */
-static enum link_training_result dpia_training_eq_non_transparent(struct dc_link *link,
- struct link_training_settings *lt_settings,
- uint32_t hop)
-{
- enum link_training_result result = LINK_TRAINING_EQ_FAIL_EQ;
- uint8_t repeater_cnt = 0; /* Number of hops/repeaters in display path. */
- uint32_t retries_eq = 0;
- enum dc_status status;
- enum dc_dp_training_pattern tr_pattern;
- uint32_t wait_time_microsec;
- enum dc_lane_count lane_count = lt_settings->link_settings.lane_count;
- union lane_align_status_updated dpcd_lane_status_updated = { {0} };
- union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = { { {0} } };
- union lane_adjust dpcd_lane_adjust[LANE_COUNT_DP_MAX] = { { {0} } };
- uint8_t set_cfg_data;
- enum dpia_set_config_ts ts;
-
- /* Training pattern is TPS4 for repeater;
- * TPS2/3/4 for DPRX depending on what it supports.
- */
- if (hop == DPRX)
- tr_pattern = lt_settings->pattern_for_eq;
- else
- tr_pattern = DP_TRAINING_PATTERN_SEQUENCE_4;
-
- repeater_cnt = dp_convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
-
- for (retries_eq = 0; retries_eq < LINK_TRAINING_MAX_RETRY_COUNT; retries_eq++) {
- /* DPTX-to-DPIA equalization always successful. */
- if (hop == repeater_cnt) {
- result = LINK_TRAINING_SUCCESS;
- break;
- }
-
- /* Instruct DPOA to transmit TPSn then update DPCD. */
- if (retries_eq == 0) {
- ts = convert_trng_ptn_to_trng_stg(tr_pattern);
- status = core_link_send_set_config(link,
- DPIA_SET_CFG_SET_TRAINING,
- ts);
- if (status != DC_OK) {
- result = LINK_TRAINING_ABORT;
- break;
- }
- status = dpcd_set_lt_pattern(link, tr_pattern, hop);
- if (status != DC_OK) {
- result = LINK_TRAINING_ABORT;
- break;
- }
- }
-
- /* Update DPOA drive settings then DPCD. DPOA only adjusts
- * drive settings for hop immediately downstream.
- */
- if (hop == repeater_cnt - 1) {
- set_cfg_data = dpia_build_set_config_data(DPIA_SET_CFG_SET_VSPE,
- link,
- lt_settings);
- status = core_link_send_set_config(link,
- DPIA_SET_CFG_SET_VSPE,
- set_cfg_data);
- if (status != DC_OK) {
- result = LINK_TRAINING_ABORT;
- break;
- }
- }
- status = dpcd_set_lane_settings(link, lt_settings, hop);
- if (status != DC_OK) {
- result = LINK_TRAINING_ABORT;
- break;
- }
-
- /* Extend wait time on second equalisation attempt on final hop to
- * ensure clock sync packets have been sent.
- */
- if (hop == DPRX && retries_eq == 1)
- wait_time_microsec = max(wait_time_microsec, (uint32_t)DPIA_CLK_SYNC_DELAY);
- else
- wait_time_microsec = dpia_get_eq_aux_rd_interval(link, lt_settings, hop);
-
- dp_wait_for_training_aux_rd_interval(link, wait_time_microsec);
-
- /* Read status and adjustment requests from DPCD. */
- status = dp_get_lane_status_and_lane_adjust(
- link,
- lt_settings,
- dpcd_lane_status,
- &dpcd_lane_status_updated,
- dpcd_lane_adjust,
- hop);
- if (status != DC_OK) {
- result = LINK_TRAINING_ABORT;
- break;
- }
-
- /* CR can still fail during EQ phase. Fail training if CR fails. */
- if (!dp_is_cr_done(lane_count, dpcd_lane_status)) {
- result = LINK_TRAINING_EQ_FAIL_CR;
- break;
- }
-
- if (dp_is_ch_eq_done(lane_count, dpcd_lane_status) &&
- dp_is_symbol_locked(link->cur_link_settings.lane_count, dpcd_lane_status) &&
- dp_is_interlane_aligned(dpcd_lane_status_updated)) {
- result = LINK_TRAINING_SUCCESS;
- break;
- }
-
- /* Update VS/PE. */
- dp_decide_lane_settings(lt_settings, dpcd_lane_adjust,
- lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings);
- }
-
- /* Abort link training if equalization failed due to HPD unplug. */
- if (!link->hpd_status)
- result = LINK_TRAINING_ABORT;
-
- DC_LOG_HW_LINK_TRAINING("%s\n DPIA(%d) equalization\n"
- " - hop(%d)\n - result(%d)\n - retries(%d)\n",
- __func__,
- link->link_id.enum_id - ENUM_ID_1,
- hop,
- result,
- retries_eq);
-
- return result;
-}
-
-/* Execute equalization phase of link training for specified hop in display
- * path in transparent LTTPR mode:
- * - driver only issues DPCD transactions leaves USB4 tunneling (SET_CONFIG) messages to DPIA.
- * - driver writes TPSx to DPCD to notify DPIA that is in equalization phase.
- * - equalization (EQ) for link is handled by DPOA, which reports result to DPIA on completion.
- * - DPIA communicates result to driver by updating EQ status when driver reads DPCD.
- *
- * @param link DPIA link being trained.
- * @param lt_settings link_setting and drive settings (voltage swing and pre-emphasis).
- * @param hop The Hop in display path. DPRX = 0.
- */
-static enum link_training_result dpia_training_eq_transparent(struct dc_link *link,
- struct link_training_settings *lt_settings)
-{
- enum link_training_result result = LINK_TRAINING_EQ_FAIL_EQ;
- uint32_t retries_eq = 0;
- enum dc_status status;
- enum dc_dp_training_pattern tr_pattern = lt_settings->pattern_for_eq;
- uint32_t wait_time_microsec;
- enum dc_lane_count lane_count = lt_settings->link_settings.lane_count;
- union lane_align_status_updated dpcd_lane_status_updated = { {0} };
- union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = { { {0} } };
- union lane_adjust dpcd_lane_adjust[LANE_COUNT_DP_MAX] = { { {0} } };
-
- wait_time_microsec = dpia_get_eq_aux_rd_interval(link, lt_settings, DPRX);
-
- for (retries_eq = 0; retries_eq < LINK_TRAINING_MAX_RETRY_COUNT; retries_eq++) {
- if (retries_eq == 0) {
- status = dpcd_set_lt_pattern(link, tr_pattern, DPRX);
- if (status != DC_OK) {
- result = LINK_TRAINING_ABORT;
- break;
- }
- }
-
- dp_wait_for_training_aux_rd_interval(link, wait_time_microsec);
-
- /* Read status and adjustment requests from DPCD. */
- status = dp_get_lane_status_and_lane_adjust(
- link,
- lt_settings,
- dpcd_lane_status,
- &dpcd_lane_status_updated,
- dpcd_lane_adjust,
- DPRX);
- if (status != DC_OK) {
- result = LINK_TRAINING_ABORT;
- break;
- }
-
- /* CR can still fail during EQ phase. Fail training if CR fails. */
- if (!dp_is_cr_done(lane_count, dpcd_lane_status)) {
- result = LINK_TRAINING_EQ_FAIL_CR;
- break;
- }
-
- if (dp_is_ch_eq_done(lane_count, dpcd_lane_status) &&
- dp_is_symbol_locked(link->cur_link_settings.lane_count, dpcd_lane_status) &&
- dp_is_interlane_aligned(dpcd_lane_status_updated)) {
- result = LINK_TRAINING_SUCCESS;
- break;
- }
-
- /* Update VS/PE. */
- dp_decide_lane_settings(lt_settings, dpcd_lane_adjust,
- lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings);
- }
-
- /* Abort link training if equalization failed due to HPD unplug. */
- if (!link->hpd_status)
- result = LINK_TRAINING_ABORT;
-
- DC_LOG_HW_LINK_TRAINING("%s\n DPIA(%d) equalization\n"
- " - hop(%d)\n - result(%d)\n - retries(%d)\n",
- __func__,
- link->link_id.enum_id - ENUM_ID_1,
- DPRX,
- result,
- retries_eq);
-
- return result;
-}
-
-/* Execute equalization phase of link training for specified hop in display
- * path.
- *
- * @param link DPIA link being trained.
- * @param lt_settings link_setting and drive settings (voltage swing and pre-emphasis).
- * @param hop The Hop in display path. DPRX = 0.
- */
-static enum link_training_result dpia_training_eq_phase(struct dc_link *link,
- struct link_training_settings *lt_settings,
- uint32_t hop)
-{
- enum link_training_result result;
-
- if (link->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT)
- result = dpia_training_eq_non_transparent(link, lt_settings, hop);
- else
- result = dpia_training_eq_transparent(link, lt_settings);
-
- return result;
-}
-
-/* End training of specified hop in display path. */
-static enum dc_status dpcd_clear_lt_pattern(struct dc_link *link, uint32_t hop)
-{
- union dpcd_training_pattern dpcd_pattern = { {0} };
- uint32_t dpcd_tps_offset = DP_TRAINING_PATTERN_SET;
- enum dc_status status;
-
- if (hop != DPRX)
- dpcd_tps_offset = DP_TRAINING_PATTERN_SET_PHY_REPEATER1 +
- ((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (hop - 1));
-
- status = core_link_write_dpcd(link,
- dpcd_tps_offset,
- &dpcd_pattern.raw,
- sizeof(dpcd_pattern.raw));
-
- return status;
-}
-
-/* End training of specified hop in display path.
- *
- * In transparent LTTPR mode:
- * - driver clears training pattern for the specified hop in DPCD.
- * In non-transparent LTTPR mode:
- * - in addition to clearing training pattern, driver issues USB4 tunneling
- * (SET_CONFIG) messages to notify DPOA when training is done for first hop
- * (DPTX-to-DPIA) and last hop (DPRX).
- *
- * @param link DPIA link being trained.
- * @param hop The Hop in display path. DPRX = 0.
- */
-static enum link_training_result dpia_training_end(struct dc_link *link,
- uint32_t hop)
-{
- enum link_training_result result = LINK_TRAINING_SUCCESS;
- uint8_t repeater_cnt = 0; /* Number of hops/repeaters in display path. */
- enum dc_status status;
-
- if (link->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) {
- repeater_cnt = dp_convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
-
- if (hop == repeater_cnt) { /* DPTX-to-DPIA */
- /* Send SET_CONFIG(SET_TRAINING:0xff) to notify DPOA that
- * DPTX-to-DPIA hop trained. No DPCD write needed for first hop.
- */
- status = core_link_send_set_config(link,
- DPIA_SET_CFG_SET_TRAINING,
- DPIA_TS_UFP_DONE);
- if (status != DC_OK)
- result = LINK_TRAINING_ABORT;
- } else { /* DPOA-to-x */
- /* Write 0x0 to TRAINING_PATTERN_SET */
- status = dpcd_clear_lt_pattern(link, hop);
- if (status != DC_OK)
- result = LINK_TRAINING_ABORT;
- }
-
- /* Notify DPOA that non-transparent link training of DPRX done. */
- if (hop == DPRX && result != LINK_TRAINING_ABORT) {
- status = core_link_send_set_config(link,
- DPIA_SET_CFG_SET_TRAINING,
- DPIA_TS_DPRX_DONE);
- if (status != DC_OK)
- result = LINK_TRAINING_ABORT;
- }
-
- } else { /* non-LTTPR or transparent LTTPR. */
- /* Write 0x0 to TRAINING_PATTERN_SET */
- status = dpcd_clear_lt_pattern(link, hop);
- if (status != DC_OK)
- result = LINK_TRAINING_ABORT;
- }
-
- DC_LOG_HW_LINK_TRAINING("%s\n DPIA(%d) end\n - hop(%d)\n - result(%d)\n - LTTPR mode(%d)\n",
- __func__,
- link->link_id.enum_id - ENUM_ID_1,
- hop,
- result,
- link->lttpr_mode);
-
- return result;
-}
-
-/* When aborting training of specified hop in display path, clean up by:
- * - Attempting to clear DPCD TRAINING_PATTERN_SET, LINK_BW_SET and LANE_COUNT_SET.
- * - Sending SET_CONFIG(SET_LINK) with lane count and link rate set to 0.
- *
- * @param link DPIA link being trained.
- * @param hop The Hop in display path. DPRX = 0.
- */
-static void dpia_training_abort(struct dc_link *link, uint32_t hop)
-{
- uint8_t data = 0;
- uint32_t dpcd_tps_offset = DP_TRAINING_PATTERN_SET;
-
- DC_LOG_HW_LINK_TRAINING("%s\n DPIA(%d) aborting\n - LTTPR mode(%d)\n - HPD(%d)\n",
- __func__,
- link->link_id.enum_id - ENUM_ID_1,
- link->lttpr_mode,
- link->hpd_status);
-
- /* Abandon clean-up if sink unplugged. */
- if (!link->hpd_status)
- return;
-
- if (hop != DPRX)
- dpcd_tps_offset = DP_TRAINING_PATTERN_SET_PHY_REPEATER1 +
- ((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (hop - 1));
-
- core_link_write_dpcd(link, dpcd_tps_offset, &data, 1);
- core_link_write_dpcd(link, DP_LINK_BW_SET, &data, 1);
- core_link_write_dpcd(link, DP_LANE_COUNT_SET, &data, 1);
- core_link_send_set_config(link, DPIA_SET_CFG_SET_LINK, data);
-}
-
-enum link_training_result dc_link_dpia_perform_link_training(struct dc_link *link,
- const struct dc_link_settings *link_setting,
- bool skip_video_pattern)
-{
- enum link_training_result result;
- struct link_training_settings lt_settings;
- uint8_t repeater_cnt = 0; /* Number of hops/repeaters in display path. */
- int8_t repeater_id; /* Current hop. */
-
- /* Configure link as prescribed in link_setting and set LTTPR mode. */
- result = dpia_configure_link(link, link_setting, &lt_settings);
- if (result != LINK_TRAINING_SUCCESS)
- return result;
-
- if (link->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT)
- repeater_cnt = dp_convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
-
- /* Train each hop in turn starting with the one closest to DPTX.
- * In transparent or non-LTTPR mode, train only the final hop (DPRX).
- */
- for (repeater_id = repeater_cnt; repeater_id >= 0; repeater_id--) {
- /* Clock recovery. */
- result = dpia_training_cr_phase(link, &lt_settings, repeater_id);
- if (result != LINK_TRAINING_SUCCESS)
- break;
-
- /* Equalization. */
- result = dpia_training_eq_phase(link, &lt_settings, repeater_id);
- if (result != LINK_TRAINING_SUCCESS)
- break;
-
- /* Stop training hop. */
- result = dpia_training_end(link, repeater_id);
- if (result != LINK_TRAINING_SUCCESS)
- break;
- }
-
- /* Double-check link status if training successful; gracefully abort
- * training of current hop if training failed due to message tunneling
- * failure; end training of hop if training ended conventionally and
- * falling back to lower bandwidth settings possible.
- */
- if (result == LINK_TRAINING_SUCCESS) {
- msleep(5);
- result = dp_check_link_loss_status(link, &lt_settings);
- } else if (result == LINK_TRAINING_ABORT) {
- dpia_training_abort(link, repeater_id);
- } else {
- dpia_training_end(link, repeater_id);
- }
- return result;
-}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c
index 25e48a8cbb78..deb23d20bca6 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c
@@ -24,7 +24,9 @@
#include "link_enc_cfg.h"
#include "resource.h"
-#include "dc_link_dp.h"
+#include "link_service.h"
+
+#define DC_LOGGER dc->ctx->logger
/* Check whether stream is supported by DIG link encoders. */
static bool is_dig_link_enc_stream(struct dc_stream_state *stream)
@@ -42,20 +44,8 @@ static bool is_dig_link_enc_stream(struct dc_stream_state *stream)
* yet match.
*/
if (link_enc && ((uint32_t)stream->link->connector_signal & link_enc->output_signals)) {
- if (dc_is_dp_signal(stream->signal)) {
- /* DIGs do not support DP2.0 streams with 128b/132b encoding. */
- struct dc_link_settings link_settings = {0};
-
- decide_link_settings(stream, &link_settings);
- if ((link_settings.link_rate >= LINK_RATE_LOW) &&
- link_settings.link_rate <= LINK_RATE_HIGH3) {
- is_dig_stream = true;
- break;
- }
- } else {
- is_dig_stream = true;
- break;
- }
+ is_dig_stream = true;
+ break;
}
}
}
@@ -118,7 +108,11 @@ static void remove_link_enc_assignment(
*/
if (get_stream_using_link_enc(state, eng_id) == NULL)
state->res_ctx.link_enc_cfg_ctx.link_enc_avail[eng_idx] = eng_id;
+
stream->link_enc = NULL;
+ state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].eng_id = ENGINE_ID_UNKNOWN;
+ state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].stream = NULL;
+ dc_stream_release(stream);
break;
}
}
@@ -148,6 +142,7 @@ static void add_link_enc_assignment(
.ep_type = stream->link->ep_type},
.eng_id = eng_id,
.stream = stream};
+ dc_stream_retain(stream);
state->res_ctx.link_enc_cfg_ctx.link_enc_avail[eng_idx] = ENGINE_ID_UNKNOWN;
stream->link_enc = stream->ctx->dc->res_pool->link_encoders[eng_idx];
break;
@@ -162,11 +157,23 @@ static void add_link_enc_assignment(
/* Return first available DIG link encoder. */
static enum engine_id find_first_avail_link_enc(
const struct dc_context *ctx,
- const struct dc_state *state)
+ const struct dc_state *state,
+ enum engine_id eng_id_requested)
{
enum engine_id eng_id = ENGINE_ID_UNKNOWN;
int i;
+ if (eng_id_requested != ENGINE_ID_UNKNOWN) {
+
+ for (i = 0; i < ctx->dc->res_pool->res_cap->num_dig_link_enc; i++) {
+ eng_id = state->res_ctx.link_enc_cfg_ctx.link_enc_avail[i];
+ if (eng_id == eng_id_requested)
+ return eng_id;
+ }
+ }
+
+ eng_id = ENGINE_ID_UNKNOWN;
+
for (i = 0; i < ctx->dc->res_pool->res_cap->num_dig_link_enc; i++) {
eng_id = state->res_ctx.link_enc_cfg_ctx.link_enc_avail[i];
if (eng_id != ENGINE_ID_UNKNOWN)
@@ -227,8 +234,10 @@ static struct link_encoder *get_link_enc_used_by_link(
.link_id = link->link_id,
.ep_type = link->ep_type};
- for (i = 0; i < state->stream_count; i++) {
+ for (i = 0; i < MAX_PIPES; i++) {
struct link_enc_assignment assignment = state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i];
+ if (assignment.eng_id == ENGINE_ID_UNKNOWN)
+ continue;
if (assignment.valid == true && are_ep_ids_equal(&assignment.ep_id, &ep_id))
link_enc = link->dc->res_pool->link_encoders[assignment.eng_id - ENGINE_ID_DIGA];
@@ -237,28 +246,18 @@ static struct link_encoder *get_link_enc_used_by_link(
return link_enc;
}
/* Clear all link encoder assignments. */
-static void clear_enc_assignments(struct dc_state *state)
+static void clear_enc_assignments(const struct dc *dc, struct dc_state *state)
{
int i;
- enum engine_id eng_id;
- struct dc_stream_state *stream;
for (i = 0; i < MAX_PIPES; i++) {
state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].valid = false;
- eng_id = state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].eng_id;
- stream = state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].stream;
- if (eng_id != ENGINE_ID_UNKNOWN)
- state->res_ctx.link_enc_cfg_ctx.link_enc_avail[eng_id - ENGINE_ID_DIGA] = eng_id;
- if (stream)
- stream->link_enc = NULL;
+ state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].eng_id = ENGINE_ID_UNKNOWN;
+ if (state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].stream != NULL) {
+ dc_stream_release(state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].stream);
+ state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].stream = NULL;
+ }
}
-}
-
-void link_enc_cfg_init(
- struct dc *dc,
- struct dc_state *state)
-{
- int i;
for (i = 0; i < dc->res_pool->res_cap->num_dig_link_enc; i++) {
if (dc->res_pool->link_encoders[i])
@@ -266,30 +265,40 @@ void link_enc_cfg_init(
else
state->res_ctx.link_enc_cfg_ctx.link_enc_avail[i] = ENGINE_ID_UNKNOWN;
}
+}
- clear_enc_assignments(state);
+void link_enc_cfg_init(
+ const struct dc *dc,
+ struct dc_state *state)
+{
+ clear_enc_assignments(dc, state);
state->res_ctx.link_enc_cfg_ctx.mode = LINK_ENC_CFG_STEADY;
}
+void link_enc_cfg_copy(const struct dc_state *src_ctx, struct dc_state *dst_ctx)
+{
+ memcpy(&dst_ctx->res_ctx.link_enc_cfg_ctx,
+ &src_ctx->res_ctx.link_enc_cfg_ctx,
+ sizeof(dst_ctx->res_ctx.link_enc_cfg_ctx));
+}
+
void link_enc_cfg_link_encs_assign(
struct dc *dc,
struct dc_state *state,
struct dc_stream_state *streams[],
uint8_t stream_count)
{
- enum engine_id eng_id = ENGINE_ID_UNKNOWN;
+ enum engine_id eng_id = ENGINE_ID_UNKNOWN, eng_id_req = ENGINE_ID_UNKNOWN;
int i;
int j;
ASSERT(state->stream_count == stream_count);
-
- if (stream_count == 0)
- clear_enc_assignments(state);
+ ASSERT(dc->current_state->res_ctx.link_enc_cfg_ctx.mode == LINK_ENC_CFG_STEADY);
/* Release DIG link encoder resources before running assignment algorithm. */
- for (i = 0; i < stream_count; i++)
- dc->res_pool->funcs->link_enc_unassign(state, streams[i]);
+ for (i = 0; i < dc->current_state->stream_count; i++)
+ dc->res_pool->funcs->link_enc_unassign(state, dc->current_state->streams[i]);
for (i = 0; i < MAX_PIPES; i++)
ASSERT(state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].valid == false);
@@ -298,15 +307,17 @@ void link_enc_cfg_link_encs_assign(
for (i = 0; i < stream_count; i++) {
struct dc_stream_state *stream = streams[i];
+ /* skip it if the link is mappable endpoint. */
+ if (stream->link->is_dig_mapping_flexible)
+ continue;
+
/* Skip stream if not supported by DIG link encoder. */
if (!is_dig_link_enc_stream(stream))
continue;
/* Physical endpoints have a fixed mapping to DIG link encoders. */
- if (!stream->link->is_dig_mapping_flexible) {
- eng_id = stream->link->eng_id;
- add_link_enc_assignment(state, stream, eng_id);
- }
+ eng_id = stream->link->eng_id;
+ add_link_enc_assignment(state, stream, eng_id);
}
/* (b) Retain previous assignments for mappable endpoints if encoders still available. */
@@ -318,11 +329,12 @@ void link_enc_cfg_link_encs_assign(
for (i = 0; i < stream_count; i++) {
struct dc_stream_state *stream = state->streams[i];
- /* Skip stream if not supported by DIG link encoder. */
- if (!is_dig_link_enc_stream(stream))
+ /* Skip it if the link is NOT mappable endpoint. */
+ if (!stream->link->is_dig_mapping_flexible)
continue;
- if (!stream->link->is_dig_mapping_flexible)
+ /* Skip stream if not supported by DIG link encoder. */
+ if (!is_dig_link_enc_stream(stream))
continue;
for (j = 0; j < prev_state->stream_count; j++) {
@@ -331,6 +343,7 @@ void link_enc_cfg_link_encs_assign(
if (stream == prev_stream && stream->link == prev_stream->link &&
prev_state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[j].valid) {
eng_id = prev_state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[j].eng_id;
+
if (is_avail_link_enc(state, eng_id, stream))
add_link_enc_assignment(state, stream, eng_id);
}
@@ -343,6 +356,15 @@ void link_enc_cfg_link_encs_assign(
for (i = 0; i < stream_count; i++) {
struct dc_stream_state *stream = streams[i];
+ struct link_encoder *link_enc = NULL;
+
+ /* Skip it if the link is NOT mappable endpoint. */
+ if (!stream->link->is_dig_mapping_flexible)
+ continue;
+
+ /* Skip if encoder assignment retained in step (b) above. */
+ if (stream->link_enc)
+ continue;
/* Skip stream if not supported by DIG link encoder. */
if (!is_dig_link_enc_stream(stream)) {
@@ -351,24 +373,24 @@ void link_enc_cfg_link_encs_assign(
}
/* Mappable endpoints have a flexible mapping to DIG link encoders. */
- if (stream->link->is_dig_mapping_flexible) {
- struct link_encoder *link_enc = NULL;
- /* Skip if encoder assignment retained in step (b) above. */
- if (stream->link_enc)
- continue;
+ /* For MST, multiple streams will share the same link / display
+ * endpoint. These streams should use the same link encoder
+ * assigned to that endpoint.
+ */
+ link_enc = get_link_enc_used_by_link(state, stream->link);
+ if (link_enc == NULL) {
- /* For MST, multiple streams will share the same link / display
- * endpoint. These streams should use the same link encoder
- * assigned to that endpoint.
- */
- link_enc = get_link_enc_used_by_link(state, stream->link);
- if (link_enc == NULL)
- eng_id = find_first_avail_link_enc(stream->ctx, state);
- else
- eng_id = link_enc->preferred_engine;
- add_link_enc_assignment(state, stream, eng_id);
+ if (stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA &&
+ stream->link->dpia_preferred_eng_id != ENGINE_ID_UNKNOWN)
+ eng_id_req = stream->link->dpia_preferred_eng_id;
+
+ eng_id = find_first_avail_link_enc(stream->ctx, state, eng_id_req);
}
+ else
+ eng_id = link_enc->preferred_engine;
+
+ add_link_enc_assignment(state, stream, eng_id);
}
link_enc_cfg_validate(dc, state);
@@ -379,6 +401,34 @@ void link_enc_cfg_link_encs_assign(
state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i];
}
+ /* Log encoder assignments. */
+ for (i = 0; i < MAX_PIPES; i++) {
+ struct link_enc_assignment assignment =
+ dc->current_state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i];
+
+ if (assignment.valid)
+ DC_LOG_DEBUG("%s: CUR %s(%d) - enc_id(%d)\n",
+ __func__,
+ assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ? "PHY" : "DPIA",
+ assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ?
+ assignment.ep_id.link_id.enum_id :
+ assignment.ep_id.link_id.enum_id - 1,
+ assignment.eng_id);
+ }
+ for (i = 0; i < MAX_PIPES; i++) {
+ struct link_enc_assignment assignment =
+ state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i];
+
+ if (assignment.valid)
+ DC_LOG_DEBUG("%s: NEW %s(%d) - enc_id(%d)\n",
+ __func__,
+ assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ? "PHY" : "DPIA",
+ assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ?
+ assignment.ep_id.link_id.enum_id :
+ assignment.ep_id.link_id.enum_id - 1,
+ assignment.eng_id);
+ }
+
/* Current state mode will be set to steady once this state committed. */
state->res_ctx.link_enc_cfg_ctx.mode = LINK_ENC_CFG_STEADY;
}
@@ -389,10 +439,6 @@ void link_enc_cfg_link_enc_unassign(
{
enum engine_id eng_id = ENGINE_ID_UNKNOWN;
- /* Only DIG link encoders. */
- if (!is_dig_link_enc_stream(stream))
- return;
-
if (stream->link_enc)
eng_id = stream->link_enc->preferred_engine;
@@ -444,7 +490,6 @@ struct dc_link *link_enc_cfg_get_link_using_link_enc(
if (stream)
link = stream->link;
- // dm_output_to_console("%s: No link using DIG(%d).\n", __func__, eng_id);
return link;
}
@@ -462,6 +507,8 @@ struct link_encoder *link_enc_cfg_get_link_enc_used_by_link(
for (i = 0; i < MAX_PIPES; i++) {
struct link_enc_assignment assignment = get_assignment(dc, i);
+ if (assignment.eng_id == ENGINE_ID_UNKNOWN)
+ continue;
if (assignment.valid == true && are_ep_ids_equal(&assignment.ep_id, &ep_id)) {
link_enc = link->dc->res_pool->link_encoders[assignment.eng_id - ENGINE_ID_DIGA];
@@ -475,22 +522,23 @@ struct link_encoder *link_enc_cfg_get_link_enc_used_by_link(
struct link_encoder *link_enc_cfg_get_next_avail_link_enc(struct dc *dc)
{
struct link_encoder *link_enc = NULL;
- enum engine_id encs_assigned[MAX_DIG_LINK_ENCODERS];
+ enum engine_id encs_assigned[MAX_LINK_ENCODERS];
int i;
- for (i = 0; i < MAX_DIG_LINK_ENCODERS; i++)
+ for (i = 0; i < MAX_LINK_ENCODERS; i++)
encs_assigned[i] = ENGINE_ID_UNKNOWN;
/* Add assigned encoders to list. */
for (i = 0; i < MAX_PIPES; i++) {
struct link_enc_assignment assignment = get_assignment(dc, i);
- if (assignment.valid)
+ if (assignment.valid && assignment.eng_id != ENGINE_ID_UNKNOWN)
encs_assigned[assignment.eng_id - ENGINE_ID_DIGA] = assignment.eng_id;
}
for (i = 0; i < dc->res_pool->res_cap->num_dig_link_enc; i++) {
- if (encs_assigned[i] == ENGINE_ID_UNKNOWN) {
+ if (encs_assigned[i] == ENGINE_ID_UNKNOWN &&
+ dc->res_pool->link_encoders[i] != NULL) {
link_enc = dc->res_pool->link_encoders[i];
break;
}
@@ -499,13 +547,50 @@ struct link_encoder *link_enc_cfg_get_next_avail_link_enc(struct dc *dc)
return link_enc;
}
-struct link_encoder *link_enc_cfg_get_link_enc_used_by_stream(
+struct link_encoder *link_enc_cfg_get_link_enc(
+ const struct dc_link *link)
+{
+ struct link_encoder *link_enc = NULL;
+
+ /* Links supporting dynamically assigned link encoder will be assigned next
+ * available encoder if one not already assigned.
+ */
+ if (link->is_dig_mapping_flexible &&
+ link->dc->res_pool->funcs->link_encs_assign) {
+ link_enc = link_enc_cfg_get_link_enc_used_by_link(link->ctx->dc, link);
+ if (link_enc == NULL)
+ link_enc = link_enc_cfg_get_next_avail_link_enc(
+ link->ctx->dc);
+ } else
+ link_enc = link->link_enc;
+
+ return link_enc;
+}
+
+struct link_encoder *link_enc_cfg_get_link_enc_used_by_stream_current(
struct dc *dc,
const struct dc_stream_state *stream)
{
- struct link_encoder *link_enc;
+ struct link_encoder *link_enc = NULL;
+ struct display_endpoint_id ep_id;
+ int i;
+
+ ep_id = (struct display_endpoint_id) {
+ .link_id = stream->link->link_id,
+ .ep_type = stream->link->ep_type};
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ struct link_enc_assignment assignment =
+ dc->current_state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i];
+
+ if (assignment.eng_id == ENGINE_ID_UNKNOWN)
+ continue;
- link_enc = link_enc_cfg_get_link_enc_used_by_link(dc, stream->link);
+ if (assignment.valid == true && are_ep_ids_equal(&assignment.ep_id, &ep_id)) {
+ link_enc = stream->link->dc->res_pool->link_encoders[assignment.eng_id - ENGINE_ID_DIGA];
+ break;
+ }
+ }
return link_enc;
}
@@ -542,8 +627,9 @@ bool link_enc_cfg_validate(struct dc *dc, struct dc_state *state)
int i, j;
uint8_t valid_count = 0;
uint8_t dig_stream_count = 0;
- int matching_stream_ptrs = 0;
int eng_ids_per_ep_id[MAX_PIPES] = {0};
+ int ep_ids_per_eng_id[MAX_PIPES] = {0};
+ int valid_bitmap = 0;
/* (1) No. valid entries same as stream count. */
for (i = 0; i < MAX_PIPES; i++) {
@@ -563,9 +649,7 @@ bool link_enc_cfg_validate(struct dc *dc, struct dc_state *state)
struct link_enc_assignment assignment = state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i];
if (assignment.valid) {
- if (assignment.stream == state->streams[i])
- matching_stream_ptrs++;
- else
+ if (assignment.stream != state->streams[i])
valid_stream_ptrs = false;
}
}
@@ -578,6 +662,7 @@ bool link_enc_cfg_validate(struct dc *dc, struct dc_state *state)
struct display_endpoint_id ep_id_i = assignment_i.ep_id;
eng_ids_per_ep_id[i]++;
+ ep_ids_per_eng_id[i]++;
for (j = 0; j < MAX_PIPES; j++) {
struct link_enc_assignment assignment_j =
state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[j];
@@ -592,6 +677,10 @@ bool link_enc_cfg_validate(struct dc *dc, struct dc_state *state)
assignment_i.eng_id != assignment_j.eng_id) {
valid_uniqueness = false;
eng_ids_per_ep_id[i]++;
+ } else if (!are_ep_ids_equal(&ep_id_i, &ep_id_j) &&
+ assignment_i.eng_id == assignment_j.eng_id) {
+ valid_uniqueness = false;
+ ep_ids_per_eng_id[i]++;
}
}
}
@@ -625,5 +714,32 @@ bool link_enc_cfg_validate(struct dc *dc, struct dc_state *state)
is_valid = valid_entries && valid_stream_ptrs && valid_uniqueness && valid_avail && valid_streams;
ASSERT(is_valid);
+ if (is_valid == false) {
+ valid_bitmap =
+ (valid_entries & 0x1) |
+ ((valid_stream_ptrs & 0x1) << 1) |
+ ((valid_uniqueness & 0x1) << 2) |
+ ((valid_avail & 0x1) << 3) |
+ ((valid_streams & 0x1) << 4);
+ DC_LOG_ERROR("%s: Invalid link encoder assignments - 0x%x\n", __func__, valid_bitmap);
+ }
+
return is_valid;
}
+
+void link_enc_cfg_set_transient_mode(struct dc *dc, struct dc_state *current_state, struct dc_state *new_state)
+{
+ int i = 0;
+ int num_transient_assignments = 0;
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ if (current_state->res_ctx.link_enc_cfg_ctx.transient_assignments[i].valid)
+ num_transient_assignments++;
+ }
+
+ /* Only enter transient mode if the new encoder assignments are valid. */
+ if (new_state->stream_count == num_transient_assignments) {
+ current_state->res_ctx.link_enc_cfg_ctx.mode = LINK_ENC_CFG_TRANSIENT;
+ DC_LOG_DEBUG("%s: current_state(%p) mode(%d)\n", __func__, current_state, LINK_ENC_CFG_TRANSIENT);
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
new file mode 100644
index 000000000000..9acd30019717
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
@@ -0,0 +1,530 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+/* FILE POLICY AND INTENDED USAGE:
+ * This file provides single entrance to link functionality declared in dc
+ * public headers. The file is intended to be used as a thin translation layer
+ * that directly calls link internal functions without adding new functional
+ * behavior.
+ *
+ * When exporting a new link related dc function, add function declaration in
+ * dc.h with detail interface documentation, then add function implementation
+ * in this file which calls link functions.
+ */
+#include "link_service.h"
+#include "dce/dce_i2c.h"
+
+struct dc_link *dc_get_link_at_index(struct dc *dc, uint32_t link_index)
+{
+ if (link_index >= MAX_LINKS)
+ return NULL;
+
+ return dc->links[link_index];
+}
+
+void dc_get_edp_links(const struct dc *dc,
+ struct dc_link **edp_links,
+ int *edp_num)
+{
+ int i;
+
+ *edp_num = 0;
+ for (i = 0; i < dc->link_count; i++) {
+ // report any eDP links, even unconnected DDI's
+ if (!dc->links[i])
+ continue;
+ if (dc->links[i]->connector_signal == SIGNAL_TYPE_EDP) {
+ edp_links[*edp_num] = dc->links[i];
+ if (++(*edp_num) == MAX_NUM_EDP)
+ return;
+ }
+ }
+}
+
+bool dc_get_edp_link_panel_inst(const struct dc *dc,
+ const struct dc_link *link,
+ unsigned int *inst_out)
+{
+ struct dc_link *edp_links[MAX_NUM_EDP];
+ int edp_num, i;
+
+ *inst_out = 0;
+ if (link->connector_signal != SIGNAL_TYPE_EDP)
+ return false;
+ dc_get_edp_links(dc, edp_links, &edp_num);
+ for (i = 0; i < edp_num; i++) {
+ if (link == edp_links[i])
+ break;
+ (*inst_out)++;
+ }
+ return true;
+}
+
+bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
+{
+ return link->dc->link_srv->detect_link(link, reason);
+}
+
+bool dc_link_detect_connection_type(struct dc_link *link,
+ enum dc_connection_type *type)
+{
+ return link->dc->link_srv->detect_connection_type(link, type);
+}
+
+const struct dc_link_status *dc_link_get_status(const struct dc_link *link)
+{
+ return link->dc->link_srv->get_status(link);
+}
+
+/* return true if the connected receiver supports the hdcp version */
+bool dc_link_is_hdcp14(struct dc_link *link, enum signal_type signal)
+{
+ return link->dc->link_srv->is_hdcp1x_supported(link, signal);
+}
+
+bool dc_link_is_hdcp22(struct dc_link *link, enum signal_type signal)
+{
+ return link->dc->link_srv->is_hdcp2x_supported(link, signal);
+}
+
+void dc_link_clear_dprx_states(struct dc_link *link)
+{
+ link->dc->link_srv->clear_dprx_states(link);
+}
+
+bool dc_link_reset_cur_dp_mst_topology(struct dc_link *link)
+{
+ return link->dc->link_srv->reset_cur_dp_mst_topology(link);
+}
+
+uint32_t dc_link_bandwidth_kbps(
+ const struct dc_link *link,
+ const struct dc_link_settings *link_settings)
+{
+ return link->dc->link_srv->dp_link_bandwidth_kbps(link, link_settings);
+}
+
+uint32_t dc_link_required_hblank_size_bytes(
+ const struct dc_link *link,
+ struct dp_audio_bandwidth_params *audio_params)
+{
+ return link->dc->link_srv->dp_required_hblank_size_bytes(link,
+ audio_params);
+}
+
+void dc_get_cur_link_res_map(const struct dc *dc, uint32_t *map)
+{
+ dc->link_srv->get_cur_res_map(dc, map);
+}
+
+void dc_restore_link_res_map(const struct dc *dc, uint32_t *map)
+{
+ dc->link_srv->restore_res_map(dc, map);
+}
+
+bool dc_link_update_dsc_config(struct pipe_ctx *pipe_ctx)
+{
+ struct dc_link *link = pipe_ctx->stream->link;
+
+ return link->dc->link_srv->update_dsc_config(pipe_ctx);
+}
+
+struct ddc_service *
+dc_get_oem_i2c_device(struct dc *dc)
+{
+ return dc->res_pool->oem_device;
+}
+
+bool dc_is_oem_i2c_device_present(
+ struct dc *dc,
+ size_t slave_address)
+{
+ if (dc->res_pool->oem_device)
+ return dce_i2c_oem_device_present(
+ dc->res_pool,
+ dc->res_pool->oem_device,
+ slave_address);
+
+ return false;
+}
+
+bool dc_submit_i2c(
+ struct dc *dc,
+ uint32_t link_index,
+ struct i2c_command *cmd)
+{
+
+ struct dc_link *link = dc->links[link_index];
+ struct ddc_service *ddc = link->ddc;
+
+ return dce_i2c_submit_command(
+ dc->res_pool,
+ ddc->ddc_pin,
+ cmd);
+}
+
+bool dc_submit_i2c_oem(
+ struct dc *dc,
+ struct i2c_command *cmd)
+{
+ struct ddc_service *ddc = dc->res_pool->oem_device;
+
+ if (ddc)
+ return dce_i2c_submit_command(
+ dc->res_pool,
+ ddc->ddc_pin,
+ cmd);
+
+ return false;
+}
+
+void dc_link_dp_handle_automated_test(struct dc_link *link)
+{
+ link->dc->link_srv->dp_handle_automated_test(link);
+}
+
+bool dc_link_dp_set_test_pattern(
+ struct dc_link *link,
+ enum dp_test_pattern test_pattern,
+ enum dp_test_pattern_color_space test_pattern_color_space,
+ const struct link_training_settings *p_link_settings,
+ const unsigned char *p_custom_pattern,
+ unsigned int cust_pattern_size)
+{
+ return link->dc->link_srv->dp_set_test_pattern(link, test_pattern,
+ test_pattern_color_space, p_link_settings,
+ p_custom_pattern, cust_pattern_size);
+}
+
+void dc_link_set_drive_settings(struct dc *dc,
+ struct link_training_settings *lt_settings,
+ struct dc_link *link)
+{
+ struct link_resource link_res;
+
+ dc->link_srv->get_cur_link_res(link, &link_res);
+ dc->link_srv->dp_set_drive_settings(link, &link_res, lt_settings);
+}
+
+void dc_link_set_preferred_link_settings(struct dc *dc,
+ struct dc_link_settings *link_setting,
+ struct dc_link *link)
+{
+ dc->link_srv->dp_set_preferred_link_settings(dc, link_setting, link);
+}
+
+void dc_link_set_preferred_training_settings(struct dc *dc,
+ struct dc_link_settings *link_setting,
+ struct dc_link_training_overrides *lt_overrides,
+ struct dc_link *link,
+ bool skip_immediate_retrain)
+{
+ dc->link_srv->dp_set_preferred_training_settings(dc, link_setting,
+ lt_overrides, link, skip_immediate_retrain);
+}
+
+bool dc_dp_trace_is_initialized(struct dc_link *link)
+{
+ return link->dc->link_srv->dp_trace_is_initialized(link);
+}
+
+void dc_dp_trace_set_is_logged_flag(struct dc_link *link,
+ bool in_detection,
+ bool is_logged)
+{
+ link->dc->link_srv->dp_trace_set_is_logged_flag(link, in_detection, is_logged);
+}
+
+bool dc_dp_trace_is_logged(struct dc_link *link, bool in_detection)
+{
+ return link->dc->link_srv->dp_trace_is_logged(link, in_detection);
+}
+
+unsigned long long dc_dp_trace_get_lt_end_timestamp(struct dc_link *link,
+ bool in_detection)
+{
+ return link->dc->link_srv->dp_trace_get_lt_end_timestamp(link, in_detection);
+}
+
+const struct dp_trace_lt_counts *dc_dp_trace_get_lt_counts(struct dc_link *link,
+ bool in_detection)
+{
+ return link->dc->link_srv->dp_trace_get_lt_counts(link, in_detection);
+}
+
+unsigned int dc_dp_trace_get_link_loss_count(struct dc_link *link)
+{
+ return link->dc->link_srv->dp_trace_get_link_loss_count(link);
+}
+
+struct dc_sink *dc_link_add_remote_sink(
+ struct dc_link *link,
+ const uint8_t *edid,
+ int len,
+ struct dc_sink_init_data *init_data)
+{
+ return link->dc->link_srv->add_remote_sink(link, edid, len, init_data);
+}
+
+void dc_link_remove_remote_sink(struct dc_link *link, struct dc_sink *sink)
+{
+ link->dc->link_srv->remove_remote_sink(link, sink);
+}
+
+int dc_link_aux_transfer_raw(struct ddc_service *ddc,
+ struct aux_payload *payload,
+ enum aux_return_code_type *operation_result)
+{
+ const struct dc *dc = ddc->link->dc;
+
+ return dc->link_srv->aux_transfer_raw(
+ ddc, payload, operation_result);
+}
+
+uint32_t dc_link_bw_kbps_from_raw_frl_link_rate_data(const struct dc *dc, uint8_t bw)
+{
+ return dc->link_srv->bw_kbps_from_raw_frl_link_rate_data(bw);
+}
+
+bool dc_link_decide_edp_link_settings(struct dc_link *link,
+ struct dc_link_settings *link_setting, uint32_t req_bw)
+{
+ return link->dc->link_srv->edp_decide_link_settings(link, link_setting, req_bw);
+}
+
+
+bool dc_link_dp_get_max_link_enc_cap(const struct dc_link *link,
+ struct dc_link_settings *max_link_enc_cap)
+{
+ return link->dc->link_srv->dp_get_max_link_enc_cap(link, max_link_enc_cap);
+}
+
+enum dp_link_encoding dc_link_dp_mst_decide_link_encoding_format(
+ const struct dc_link *link)
+{
+ return link->dc->link_srv->mst_decide_link_encoding_format(link);
+}
+
+const struct dc_link_settings *dc_link_get_link_cap(const struct dc_link *link)
+{
+ return link->dc->link_srv->dp_get_verified_link_cap(link);
+}
+
+enum dc_link_encoding_format dc_link_get_highest_encoding_format(const struct dc_link *link)
+{
+ if (dc_is_dp_signal(link->connector_signal)) {
+ if (link->dpcd_caps.dongle_type >= DISPLAY_DONGLE_DP_DVI_DONGLE &&
+ link->dpcd_caps.dongle_type <= DISPLAY_DONGLE_DP_HDMI_MISMATCHED_DONGLE)
+ return DC_LINK_ENCODING_HDMI_TMDS;
+ else if (link->dc->link_srv->dp_get_encoding_format(&link->verified_link_cap) ==
+ DP_8b_10b_ENCODING)
+ return DC_LINK_ENCODING_DP_8b_10b;
+ else if (link->dc->link_srv->dp_get_encoding_format(&link->verified_link_cap) ==
+ DP_128b_132b_ENCODING)
+ return DC_LINK_ENCODING_DP_128b_132b;
+ } else if (dc_is_hdmi_signal(link->connector_signal)) {
+ }
+
+ return DC_LINK_ENCODING_UNSPECIFIED;
+}
+
+bool dc_link_is_dp_sink_present(struct dc_link *link)
+{
+ return link->dc->link_srv->dp_is_sink_present(link);
+}
+
+bool dc_link_is_fec_supported(const struct dc_link *link)
+{
+ return link->dc->link_srv->dp_is_fec_supported(link);
+}
+
+void dc_link_overwrite_extended_receiver_cap(
+ struct dc_link *link)
+{
+ link->dc->link_srv->dp_overwrite_extended_receiver_cap(link);
+}
+
+bool dc_link_should_enable_fec(const struct dc_link *link)
+{
+ return link->dc->link_srv->dp_should_enable_fec(link);
+}
+
+void dc_link_dp_dpia_handle_usb4_bandwidth_allocation_for_link(
+ struct dc_link *link, int peak_bw)
+{
+ link->dc->link_srv->dpia_handle_usb4_bandwidth_allocation_for_link(link, peak_bw);
+}
+
+bool dc_link_check_link_loss_status(
+ struct dc_link *link,
+ union hpd_irq_data *hpd_irq_dpcd_data)
+{
+ return link->dc->link_srv->dp_parse_link_loss_status(link, hpd_irq_dpcd_data);
+}
+
+bool dc_link_dp_allow_hpd_rx_irq(const struct dc_link *link)
+{
+ return link->dc->link_srv->dp_should_allow_hpd_rx_irq(link);
+}
+
+void dc_link_dp_handle_link_loss(struct dc_link *link)
+{
+ link->dc->link_srv->dp_handle_link_loss(link);
+}
+
+enum dc_status dc_link_dp_read_hpd_rx_irq_data(
+ struct dc_link *link,
+ union hpd_irq_data *irq_data)
+{
+ return link->dc->link_srv->dp_read_hpd_rx_irq_data(link, irq_data);
+}
+
+bool dc_link_handle_hpd_rx_irq(struct dc_link *link,
+ union hpd_irq_data *out_hpd_irq_dpcd_data, bool *out_link_loss,
+ bool defer_handling, bool *has_left_work)
+{
+ return link->dc->link_srv->dp_handle_hpd_rx_irq(link, out_hpd_irq_dpcd_data,
+ out_link_loss, defer_handling, has_left_work);
+}
+
+void dc_link_dp_receiver_power_ctrl(struct dc_link *link, bool on)
+{
+ link->dc->link_srv->dpcd_write_rx_power_ctrl(link, on);
+}
+
+enum lttpr_mode dc_link_decide_lttpr_mode(struct dc_link *link,
+ struct dc_link_settings *link_setting)
+{
+ return link->dc->link_srv->dp_decide_lttpr_mode(link, link_setting);
+}
+
+void dc_link_edp_panel_backlight_power_on(struct dc_link *link, bool wait_for_hpd)
+{
+ link->dc->link_srv->edp_panel_backlight_power_on(link, wait_for_hpd);
+}
+
+int dc_link_get_backlight_level(const struct dc_link *link)
+{
+ return link->dc->link_srv->edp_get_backlight_level(link);
+}
+
+bool dc_link_get_backlight_level_nits(struct dc_link *link,
+ uint32_t *backlight_millinits_avg,
+ uint32_t *backlight_millinits_peak)
+{
+ return link->dc->link_srv->edp_get_backlight_level_nits(link,
+ backlight_millinits_avg,
+ backlight_millinits_peak);
+}
+
+bool dc_link_set_backlight_level(const struct dc_link *link,
+ struct set_backlight_level_params *backlight_level_params)
+{
+ return link->dc->link_srv->edp_set_backlight_level(link,
+ backlight_level_params);
+}
+
+bool dc_link_set_backlight_level_nits(struct dc_link *link,
+ bool isHDR,
+ uint32_t backlight_millinits,
+ uint32_t transition_time_in_ms)
+{
+ return link->dc->link_srv->edp_set_backlight_level_nits(link, isHDR,
+ backlight_millinits, transition_time_in_ms);
+}
+
+int dc_link_get_target_backlight_pwm(const struct dc_link *link)
+{
+ return link->dc->link_srv->edp_get_target_backlight_pwm(link);
+}
+
+bool dc_link_get_psr_state(const struct dc_link *link, enum dc_psr_state *state)
+{
+ return link->dc->link_srv->edp_get_psr_state(link, state);
+}
+
+bool dc_link_set_psr_allow_active(struct dc_link *link, const bool *allow_active,
+ bool wait, bool force_static, const unsigned int *power_opts)
+{
+ return link->dc->link_srv->edp_set_psr_allow_active(link, allow_active, wait,
+ force_static, power_opts);
+}
+
+bool dc_link_setup_psr(struct dc_link *link,
+ const struct dc_stream_state *stream, struct psr_config *psr_config,
+ struct psr_context *psr_context)
+{
+ return link->dc->link_srv->edp_setup_psr(link, stream, psr_config, psr_context);
+}
+
+bool dc_link_set_replay_allow_active(struct dc_link *link, const bool *allow_active,
+ bool wait, bool force_static, const unsigned int *power_opts)
+{
+ return link->dc->link_srv->edp_set_replay_allow_active(link, allow_active, wait,
+ force_static, power_opts);
+}
+
+bool dc_link_get_replay_state(const struct dc_link *link, uint64_t *state)
+{
+ return link->dc->link_srv->edp_get_replay_state(link, state);
+}
+
+bool dc_link_wait_for_t12(struct dc_link *link)
+{
+ return link->dc->link_srv->edp_wait_for_t12(link);
+}
+
+bool dc_link_get_hpd_state(struct dc_link *link)
+{
+ return link->dc->link_srv->get_hpd_state(link);
+}
+
+void dc_link_enable_hpd(const struct dc_link *link)
+{
+ link->dc->link_srv->enable_hpd(link);
+}
+
+void dc_link_disable_hpd(const struct dc_link *link)
+{
+ link->dc->link_srv->disable_hpd(link);
+}
+
+void dc_link_enable_hpd_filter(struct dc_link *link, bool enable)
+{
+ link->dc->link_srv->enable_hpd_filter(link, enable);
+}
+
+enum dc_status dc_link_validate_dp_tunneling_bandwidth(const struct dc *dc, const struct dc_state *new_ctx)
+{
+ return dc->link_srv->validate_dp_tunnel_bandwidth(dc, new_ctx);
+}
+
+void dc_link_get_alpm_support(struct dc_link *link,
+ bool *auxless_support,
+ bool *auxwake_support)
+{
+ link->dc->link_srv->edp_get_alpm_support(link, auxless_support, auxwake_support);
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c
deleted file mode 100644
index 368e834c6809..000000000000
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c
+++ /dev/null
@@ -1,906 +0,0 @@
-/* Copyright 2015 Advanced Micro Devices, Inc. */
-
-
-#include "dm_services.h"
-#include "dc.h"
-#include "inc/core_types.h"
-#include "include/ddc_service_types.h"
-#include "include/i2caux_interface.h"
-#include "link_hwss.h"
-#include "hw_sequencer.h"
-#include "dc_link_dp.h"
-#include "dc_link_ddc.h"
-#include "dm_helpers.h"
-#include "dpcd_defs.h"
-#include "dsc.h"
-#include "resource.h"
-#include "link_enc_cfg.h"
-#include "clk_mgr.h"
-#include "inc/link_dpcd.h"
-#include "dccg.h"
-
-static uint8_t convert_to_count(uint8_t lttpr_repeater_count)
-{
- switch (lttpr_repeater_count) {
- case 0x80: // 1 lttpr repeater
- return 1;
- case 0x40: // 2 lttpr repeaters
- return 2;
- case 0x20: // 3 lttpr repeaters
- return 3;
- case 0x10: // 4 lttpr repeaters
- return 4;
- case 0x08: // 5 lttpr repeaters
- return 5;
- case 0x04: // 6 lttpr repeaters
- return 6;
- case 0x02: // 7 lttpr repeaters
- return 7;
- case 0x01: // 8 lttpr repeaters
- return 8;
- default:
- break;
- }
- return 0; // invalid value
-}
-
-static inline bool is_immediate_downstream(struct dc_link *link, uint32_t offset)
-{
- return (convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt) == offset);
-}
-
-void dp_receiver_power_ctrl(struct dc_link *link, bool on)
-{
- uint8_t state;
-
- state = on ? DP_POWER_STATE_D0 : DP_POWER_STATE_D3;
-
- if (link->sync_lt_in_progress)
- return;
-
- core_link_write_dpcd(link, DP_SET_POWER, &state,
- sizeof(state));
-}
-
-void dp_source_sequence_trace(struct dc_link *link, uint8_t dp_test_mode)
-{
- if (link != NULL && link->dc->debug.enable_driver_sequence_debug)
- core_link_write_dpcd(link, DP_SOURCE_SEQUENCE,
- &dp_test_mode, sizeof(dp_test_mode));
-}
-
-void dp_enable_link_phy(
- struct dc_link *link,
- enum signal_type signal,
- enum clock_source_id clock_source,
- const struct dc_link_settings *link_settings)
-{
- struct link_encoder *link_enc;
- struct dc *dc = link->ctx->dc;
- struct dmcu *dmcu = dc->res_pool->dmcu;
-
- struct pipe_ctx *pipes =
- link->dc->current_state->res_ctx.pipe_ctx;
- struct clock_source *dp_cs =
- link->dc->res_pool->dp_clock_source;
- unsigned int i;
-
- /* Link should always be assigned encoder when en-/disabling. */
- if (link->is_dig_mapping_flexible && dc->res_pool->funcs->link_encs_assign)
- link_enc = link_enc_cfg_get_link_enc_used_by_link(dc, link);
- else
- link_enc = link->link_enc;
- ASSERT(link_enc);
-
- if (link->connector_signal == SIGNAL_TYPE_EDP) {
- link->dc->hwss.edp_power_control(link, true);
- link->dc->hwss.edp_wait_for_hpd_ready(link, true);
- }
-
- /* If the current pixel clock source is not DTO(happens after
- * switching from HDMI passive dongle to DP on the same connector),
- * switch the pixel clock source to DTO.
- */
- for (i = 0; i < MAX_PIPES; i++) {
- if (pipes[i].stream != NULL &&
- pipes[i].stream->link == link) {
- if (pipes[i].clock_source != NULL &&
- pipes[i].clock_source->id != CLOCK_SOURCE_ID_DP_DTO) {
- pipes[i].clock_source = dp_cs;
- pipes[i].stream_res.pix_clk_params.requested_pix_clk_100hz =
- pipes[i].stream->timing.pix_clk_100hz;
- pipes[i].clock_source->funcs->program_pix_clk(
- pipes[i].clock_source,
- &pipes[i].stream_res.pix_clk_params,
- &pipes[i].pll_settings);
- }
- }
- }
-
- link->cur_link_settings = *link_settings;
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (dp_get_link_encoding_format(link_settings) == DP_128b_132b_ENCODING) {
- /* TODO - DP2.0 HW: notify link rate change here */
- } else if (dp_get_link_encoding_format(link_settings) == DP_8b_10b_ENCODING) {
- if (dc->clk_mgr->funcs->notify_link_rate_change)
- dc->clk_mgr->funcs->notify_link_rate_change(dc->clk_mgr, link);
- }
-#else
- if (dc->clk_mgr->funcs->notify_link_rate_change)
- dc->clk_mgr->funcs->notify_link_rate_change(dc->clk_mgr, link);
-#endif
- if (dmcu != NULL && dmcu->funcs->lock_phy)
- dmcu->funcs->lock_phy(dmcu);
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (dp_get_link_encoding_format(link_settings) == DP_128b_132b_ENCODING) {
- enable_dp_hpo_output(link, link_settings);
- } else if (dp_get_link_encoding_format(link_settings) == DP_8b_10b_ENCODING) {
- if (dc_is_dp_sst_signal(signal)) {
- link_enc->funcs->enable_dp_output(
- link_enc,
- link_settings,
- clock_source);
- } else {
- link_enc->funcs->enable_dp_mst_output(
- link_enc,
- link_settings,
- clock_source);
- }
- }
-#else
- if (dc_is_dp_sst_signal(signal)) {
- link_enc->funcs->enable_dp_output(
- link_enc,
- link_settings,
- clock_source);
- } else {
- link_enc->funcs->enable_dp_mst_output(
- link_enc,
- link_settings,
- clock_source);
- }
-#endif
- if (dmcu != NULL && dmcu->funcs->unlock_phy)
- dmcu->funcs->unlock_phy(dmcu);
-
- dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_ENABLE_LINK_PHY);
- dp_receiver_power_ctrl(link, true);
-}
-
-void edp_add_delay_for_T9(struct dc_link *link)
-{
- if (link->local_sink &&
- link->local_sink->edid_caps.panel_patch.extra_delay_backlight_off > 0)
- udelay(link->local_sink->edid_caps.panel_patch.extra_delay_backlight_off * 1000);
-}
-
-bool edp_receiver_ready_T9(struct dc_link *link)
-{
- unsigned int tries = 0;
- unsigned char sinkstatus = 0;
- unsigned char edpRev = 0;
- enum dc_status result;
-
- result = core_link_read_dpcd(link, DP_EDP_DPCD_REV, &edpRev, sizeof(edpRev));
-
- /* start from eDP version 1.2, SINK_STAUS indicate the sink is ready.*/
- if (result == DC_OK && edpRev >= DP_EDP_12) {
- do {
- sinkstatus = 1;
- result = core_link_read_dpcd(link, DP_SINK_STATUS, &sinkstatus, sizeof(sinkstatus));
- if (sinkstatus == 0)
- break;
- if (result != DC_OK)
- break;
- udelay(100); //MAx T9
- } while (++tries < 50);
- }
-
- return result;
-}
-bool edp_receiver_ready_T7(struct dc_link *link)
-{
- unsigned char sinkstatus = 0;
- unsigned char edpRev = 0;
- enum dc_status result;
-
- /* use absolute time stamp to constrain max T7*/
- unsigned long long enter_timestamp = 0;
- unsigned long long finish_timestamp = 0;
- unsigned long long time_taken_in_ns = 0;
-
- result = core_link_read_dpcd(link, DP_EDP_DPCD_REV, &edpRev, sizeof(edpRev));
-
- if (result == DC_OK && edpRev >= DP_EDP_12) {
- /* start from eDP version 1.2, SINK_STAUS indicate the sink is ready.*/
- enter_timestamp = dm_get_timestamp(link->ctx);
- do {
- sinkstatus = 0;
- result = core_link_read_dpcd(link, DP_SINK_STATUS, &sinkstatus, sizeof(sinkstatus));
- if (sinkstatus == 1)
- break;
- if (result != DC_OK)
- break;
- udelay(25);
- finish_timestamp = dm_get_timestamp(link->ctx);
- time_taken_in_ns = dm_get_elapse_time_in_ns(link->ctx, finish_timestamp, enter_timestamp);
- } while (time_taken_in_ns < 50 * 1000000); //MAx T7 is 50ms
- }
-
- if (link->local_sink &&
- link->local_sink->edid_caps.panel_patch.extra_t7_ms > 0)
- udelay(link->local_sink->edid_caps.panel_patch.extra_t7_ms * 1000);
-
- return result;
-}
-
-void dp_disable_link_phy(struct dc_link *link, enum signal_type signal)
-{
- struct dc *dc = link->ctx->dc;
- struct dmcu *dmcu = dc->res_pool->dmcu;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- struct hpo_dp_link_encoder *hpo_link_enc = link->hpo_dp_link_enc;
-#endif
- struct link_encoder *link_enc;
-
- /* Link should always be assigned encoder when en-/disabling. */
- if (link->is_dig_mapping_flexible && dc->res_pool->funcs->link_encs_assign)
- link_enc = link_enc_cfg_get_link_enc_used_by_link(dc, link);
- else
- link_enc = link->link_enc;
- ASSERT(link_enc);
-
- if (!link->wa_flags.dp_keep_receiver_powered)
- dp_receiver_power_ctrl(link, false);
-
- if (signal == SIGNAL_TYPE_EDP) {
- if (link->dc->hwss.edp_backlight_control)
- link->dc->hwss.edp_backlight_control(link, false);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (dp_get_link_encoding_format(&link->cur_link_settings) == DP_128b_132b_ENCODING)
- disable_dp_hpo_output(link, signal);
- else
- link_enc->funcs->disable_output(link_enc, signal);
-#else
- link_enc->funcs->disable_output(link_enc, signal);
-#endif
- link->dc->hwss.edp_power_control(link, false);
- } else {
- if (dmcu != NULL && dmcu->funcs->lock_phy)
- dmcu->funcs->lock_phy(dmcu);
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (dp_get_link_encoding_format(&link->cur_link_settings) == DP_128b_132b_ENCODING &&
- hpo_link_enc)
- disable_dp_hpo_output(link, signal);
- else
- link_enc->funcs->disable_output(link_enc, signal);
-#else
- link_enc->funcs->disable_output(link_enc, signal);
-#endif
- if (dmcu != NULL && dmcu->funcs->unlock_phy)
- dmcu->funcs->unlock_phy(dmcu);
- }
-
- dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_DISABLE_LINK_PHY);
-
- /* Clear current link setting.*/
- memset(&link->cur_link_settings, 0,
- sizeof(link->cur_link_settings));
-
- if (dc->clk_mgr->funcs->notify_link_rate_change)
- dc->clk_mgr->funcs->notify_link_rate_change(dc->clk_mgr, link);
-}
-
-void dp_disable_link_phy_mst(struct dc_link *link, enum signal_type signal)
-{
- /* MST disable link only when no stream use the link */
- if (link->mst_stream_alloc_table.stream_count > 0)
- return;
-
- dp_disable_link_phy(link, signal);
-
- /* set the sink to SST mode after disabling the link */
- dp_enable_mst_on_sink(link, false);
-}
-
-bool dp_set_hw_training_pattern(
- struct dc_link *link,
- enum dc_dp_training_pattern pattern,
- uint32_t offset)
-{
- enum dp_test_pattern test_pattern = DP_TEST_PATTERN_UNSUPPORTED;
-
- switch (pattern) {
- case DP_TRAINING_PATTERN_SEQUENCE_1:
- test_pattern = DP_TEST_PATTERN_TRAINING_PATTERN1;
- break;
- case DP_TRAINING_PATTERN_SEQUENCE_2:
- test_pattern = DP_TEST_PATTERN_TRAINING_PATTERN2;
- break;
- case DP_TRAINING_PATTERN_SEQUENCE_3:
- test_pattern = DP_TEST_PATTERN_TRAINING_PATTERN3;
- break;
- case DP_TRAINING_PATTERN_SEQUENCE_4:
- test_pattern = DP_TEST_PATTERN_TRAINING_PATTERN4;
- break;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- case DP_128b_132b_TPS1:
- test_pattern = DP_TEST_PATTERN_128b_132b_TPS1_TRAINING_MODE;
- break;
- case DP_128b_132b_TPS2:
- test_pattern = DP_TEST_PATTERN_128b_132b_TPS2_TRAINING_MODE;
- break;
-#endif
- default:
- break;
- }
-
- dp_set_hw_test_pattern(link, test_pattern, NULL, 0);
-
- return true;
-}
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-#define DC_LOGGER \
- link->ctx->logger
-#endif
-void dp_set_hw_lane_settings(
- struct dc_link *link,
- const struct link_training_settings *link_settings,
- uint32_t offset)
-{
- struct link_encoder *encoder = link->link_enc;
-
- if ((link->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) && !is_immediate_downstream(link, offset))
- return;
-
- /* call Encoder to set lane settings */
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (dp_get_link_encoding_format(&link_settings->link_settings) ==
- DP_128b_132b_ENCODING) {
- link->hpo_dp_link_enc->funcs->set_ffe(
- link->hpo_dp_link_enc,
- &link_settings->link_settings,
- link_settings->lane_settings[0].FFE_PRESET.raw);
- } else if (dp_get_link_encoding_format(&link_settings->link_settings)
- == DP_8b_10b_ENCODING) {
- encoder->funcs->dp_set_lane_settings(encoder, link_settings);
- }
-#else
- encoder->funcs->dp_set_lane_settings(encoder, link_settings);
-#endif
- memmove(link->cur_lane_setting,
- link_settings->lane_settings,
- sizeof(link->cur_lane_setting));
-}
-
-void dp_set_hw_test_pattern(
- struct dc_link *link,
- enum dp_test_pattern test_pattern,
- uint8_t *custom_pattern,
- uint32_t custom_pattern_size)
-{
- struct encoder_set_dp_phy_pattern_param pattern_param = {0};
- struct link_encoder *encoder;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- enum dp_link_encoding link_encoding_format = dp_get_link_encoding_format(&link->cur_link_settings);
-#endif
-
- /* Access link encoder based on whether it is statically
- * or dynamically assigned to a link.
- */
- if (link->is_dig_mapping_flexible &&
- link->dc->res_pool->funcs->link_encs_assign)
- encoder = link_enc_cfg_get_link_enc_used_by_link(link->ctx->dc, link);
- else
- encoder = link->link_enc;
-
- pattern_param.dp_phy_pattern = test_pattern;
- pattern_param.custom_pattern = custom_pattern;
- pattern_param.custom_pattern_size = custom_pattern_size;
- pattern_param.dp_panel_mode = dp_get_panel_mode(link);
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- switch (link_encoding_format) {
- case DP_128b_132b_ENCODING:
- link->hpo_dp_link_enc->funcs->set_link_test_pattern(
- link->hpo_dp_link_enc, &pattern_param);
- break;
- case DP_8b_10b_ENCODING:
- ASSERT(encoder);
- encoder->funcs->dp_set_phy_pattern(encoder, &pattern_param);
- break;
- default:
- DC_LOG_ERROR("%s: Unknown link encoding format.", __func__);
- break;
- }
-#else
- encoder->funcs->dp_set_phy_pattern(encoder, &pattern_param);
-#endif
- dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_SET_SOURCE_PATTERN);
-}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-#undef DC_LOGGER
-#endif
-
-void dp_retrain_link_dp_test(struct dc_link *link,
- struct dc_link_settings *link_setting,
- bool skip_video_pattern)
-{
- struct pipe_ctx *pipes =
- &link->dc->current_state->res_ctx.pipe_ctx[0];
- unsigned int i;
-
- for (i = 0; i < MAX_PIPES; i++) {
- if (pipes[i].stream != NULL &&
- !pipes[i].top_pipe && !pipes[i].prev_odm_pipe &&
- pipes[i].stream->link != NULL &&
- pipes[i].stream_res.stream_enc != NULL &&
- pipes[i].stream->link == link) {
- udelay(100);
-
- pipes[i].stream_res.stream_enc->funcs->dp_blank(link,
- pipes[i].stream_res.stream_enc);
-
- /* disable any test pattern that might be active */
- dp_set_hw_test_pattern(link,
- DP_TEST_PATTERN_VIDEO_MODE, NULL, 0);
-
- dp_receiver_power_ctrl(link, false);
-
- link->dc->hwss.disable_stream(&pipes[i]);
- if ((&pipes[i])->stream_res.audio && !link->dc->debug.az_endpoint_mute_only)
- (&pipes[i])->stream_res.audio->funcs->az_disable((&pipes[i])->stream_res.audio);
-
- if (link->link_enc)
- link->link_enc->funcs->disable_output(
- link->link_enc,
- SIGNAL_TYPE_DISPLAY_PORT);
-
- /* Clear current link setting. */
- memset(&link->cur_link_settings, 0,
- sizeof(link->cur_link_settings));
-
- perform_link_training_with_retries(
- link_setting,
- skip_video_pattern,
- LINK_TRAINING_ATTEMPTS,
- &pipes[i],
- SIGNAL_TYPE_DISPLAY_PORT,
- false);
-
- link->dc->hwss.enable_stream(&pipes[i]);
-
- link->dc->hwss.unblank_stream(&pipes[i],
- link_setting);
-
- if (pipes[i].stream_res.audio) {
- /* notify audio driver for
- * audio modes of monitor */
- pipes[i].stream_res.audio->funcs->az_enable(
- pipes[i].stream_res.audio);
-
- /* un-mute audio */
- /* TODO: audio should be per stream rather than
- * per link */
- pipes[i].stream_res.stream_enc->funcs->
- audio_mute_control(
- pipes[i].stream_res.stream_enc, false);
- }
- }
- }
-}
-
-#define DC_LOGGER \
- dsc->ctx->logger
-static void dsc_optc_config_log(struct display_stream_compressor *dsc,
- struct dsc_optc_config *config)
-{
- uint32_t precision = 1 << 28;
- uint32_t bytes_per_pixel_int = config->bytes_per_pixel / precision;
- uint32_t bytes_per_pixel_mod = config->bytes_per_pixel % precision;
- uint64_t ll_bytes_per_pix_fraq = bytes_per_pixel_mod;
-
- /* 7 fractional digits decimal precision for bytes per pixel is enough because DSC
- * bits per pixel precision is 1/16th of a pixel, which means bytes per pixel precision is
- * 1/16/8 = 1/128 of a byte, or 0.0078125 decimal
- */
- ll_bytes_per_pix_fraq *= 10000000;
- ll_bytes_per_pix_fraq /= precision;
-
- DC_LOG_DSC("\tbytes_per_pixel 0x%08x (%d.%07d)",
- config->bytes_per_pixel, bytes_per_pixel_int, (uint32_t)ll_bytes_per_pix_fraq);
- DC_LOG_DSC("\tis_pixel_format_444 %d", config->is_pixel_format_444);
- DC_LOG_DSC("\tslice_width %d", config->slice_width);
-}
-
-bool dp_set_dsc_on_rx(struct pipe_ctx *pipe_ctx, bool enable)
-{
- struct dc *dc = pipe_ctx->stream->ctx->dc;
- struct dc_stream_state *stream = pipe_ctx->stream;
- bool result = false;
-
- if (dc_is_virtual_signal(stream->signal) || IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment))
- result = true;
- else
- result = dm_helpers_dp_write_dsc_enable(dc->ctx, stream, enable);
- return result;
-}
-
-/* The stream with these settings can be sent (unblanked) only after DSC was enabled on RX first,
- * i.e. after dp_enable_dsc_on_rx() had been called
- */
-void dp_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
-{
- struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
- struct dc *dc = pipe_ctx->stream->ctx->dc;
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct pipe_ctx *odm_pipe;
- int opp_cnt = 1;
-
- for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
- opp_cnt++;
-
- if (enable) {
- struct dsc_config dsc_cfg;
- struct dsc_optc_config dsc_optc_cfg;
- enum optc_dsc_mode optc_dsc_mode;
-
- /* Enable DSC hw block */
- dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt;
- dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + stream->timing.v_border_bottom;
- dsc_cfg.pixel_encoding = stream->timing.pixel_encoding;
- dsc_cfg.color_depth = stream->timing.display_color_depth;
- dsc_cfg.is_odm = pipe_ctx->next_odm_pipe ? true : false;
- dsc_cfg.dc_dsc_cfg = stream->timing.dsc_cfg;
- ASSERT(dsc_cfg.dc_dsc_cfg.num_slices_h % opp_cnt == 0);
- dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt;
-
- dsc->funcs->dsc_set_config(dsc, &dsc_cfg, &dsc_optc_cfg);
- dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst);
- for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
- struct display_stream_compressor *odm_dsc = odm_pipe->stream_res.dsc;
-
- odm_dsc->funcs->dsc_set_config(odm_dsc, &dsc_cfg, &dsc_optc_cfg);
- odm_dsc->funcs->dsc_enable(odm_dsc, odm_pipe->stream_res.opp->inst);
- }
- dsc_cfg.dc_dsc_cfg.num_slices_h *= opp_cnt;
- dsc_cfg.pic_width *= opp_cnt;
-
- optc_dsc_mode = dsc_optc_cfg.is_pixel_format_444 ? OPTC_DSC_ENABLED_444 : OPTC_DSC_ENABLED_NATIVE_SUBSAMPLED;
-
- /* Enable DSC in encoder */
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (dc_is_dp_signal(stream->signal) && !IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)
- && !is_dp_128b_132b_signal(pipe_ctx)) {
-#else
- if (dc_is_dp_signal(stream->signal) && !IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
-#endif
- DC_LOG_DSC("Setting stream encoder DSC config for engine %d:", (int)pipe_ctx->stream_res.stream_enc->id);
- dsc_optc_config_log(dsc, &dsc_optc_cfg);
- pipe_ctx->stream_res.stream_enc->funcs->dp_set_dsc_config(pipe_ctx->stream_res.stream_enc,
- optc_dsc_mode,
- dsc_optc_cfg.bytes_per_pixel,
- dsc_optc_cfg.slice_width);
-
- /* PPS SDP is set elsewhere because it has to be done after DIG FE is connected to DIG BE */
- }
-
- /* Enable DSC in OPTC */
- DC_LOG_DSC("Setting optc DSC config for tg instance %d:", pipe_ctx->stream_res.tg->inst);
- dsc_optc_config_log(dsc, &dsc_optc_cfg);
- pipe_ctx->stream_res.tg->funcs->set_dsc_config(pipe_ctx->stream_res.tg,
- optc_dsc_mode,
- dsc_optc_cfg.bytes_per_pixel,
- dsc_optc_cfg.slice_width);
- } else {
- /* disable DSC in OPTC */
- pipe_ctx->stream_res.tg->funcs->set_dsc_config(
- pipe_ctx->stream_res.tg,
- OPTC_DSC_DISABLED, 0, 0);
-
- /* disable DSC in stream encoder */
- if (dc_is_dp_signal(stream->signal)) {
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (is_dp_128b_132b_signal(pipe_ctx))
- pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->dp_set_dsc_pps_info_packet(
- pipe_ctx->stream_res.hpo_dp_stream_enc,
- false,
- NULL,
- true);
- else
-#endif
- if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
- pipe_ctx->stream_res.stream_enc->funcs->dp_set_dsc_config(
- pipe_ctx->stream_res.stream_enc,
- OPTC_DSC_DISABLED, 0, 0);
- pipe_ctx->stream_res.stream_enc->funcs->dp_set_dsc_pps_info_packet(
- pipe_ctx->stream_res.stream_enc, false, NULL, true);
- }
- }
-
- /* disable DSC block */
- pipe_ctx->stream_res.dsc->funcs->dsc_disable(pipe_ctx->stream_res.dsc);
- for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
- odm_pipe->stream_res.dsc->funcs->dsc_disable(odm_pipe->stream_res.dsc);
- }
-}
-
-bool dp_set_dsc_enable(struct pipe_ctx *pipe_ctx, bool enable)
-{
- struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
- bool result = false;
-
- if (!pipe_ctx->stream->timing.flags.DSC)
- goto out;
- if (!dsc)
- goto out;
-
- if (enable) {
- {
- dp_set_dsc_on_stream(pipe_ctx, true);
- result = true;
- }
- } else {
- dp_set_dsc_on_rx(pipe_ctx, false);
- dp_set_dsc_on_stream(pipe_ctx, false);
- result = true;
- }
-out:
- return result;
-}
-
-/*
- * For dynamic bpp change case, dsc is programmed with MASTER_UPDATE_LOCK enabled;
- * hence PPS info packet update need to use frame update instead of immediate update.
- * Added parameter immediate_update for this purpose.
- * The decision to use frame update is hard-coded in function dp_update_dsc_config(),
- * which is the only place where a "false" would be passed in for param immediate_update.
- *
- * immediate_update is only applicable when DSC is enabled.
- */
-bool dp_set_dsc_pps_sdp(struct pipe_ctx *pipe_ctx, bool enable, bool immediate_update)
-{
- struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
- struct dc_stream_state *stream = pipe_ctx->stream;
-
- if (!pipe_ctx->stream->timing.flags.DSC || !dsc)
- return false;
-
- if (enable) {
- struct dsc_config dsc_cfg;
- uint8_t dsc_packed_pps[128];
-
- memset(&dsc_cfg, 0, sizeof(dsc_cfg));
- memset(dsc_packed_pps, 0, 128);
-
- /* Enable DSC hw block */
- dsc_cfg.pic_width = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right;
- dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + stream->timing.v_border_bottom;
- dsc_cfg.pixel_encoding = stream->timing.pixel_encoding;
- dsc_cfg.color_depth = stream->timing.display_color_depth;
- dsc_cfg.is_odm = pipe_ctx->next_odm_pipe ? true : false;
- dsc_cfg.dc_dsc_cfg = stream->timing.dsc_cfg;
-
- DC_LOG_DSC(" ");
- dsc->funcs->dsc_get_packed_pps(dsc, &dsc_cfg, &dsc_packed_pps[0]);
- if (dc_is_dp_signal(stream->signal)) {
- DC_LOG_DSC("Setting stream encoder DSC PPS SDP for engine %d\n", (int)pipe_ctx->stream_res.stream_enc->id);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (is_dp_128b_132b_signal(pipe_ctx))
- pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->dp_set_dsc_pps_info_packet(
- pipe_ctx->stream_res.hpo_dp_stream_enc,
- true,
- &dsc_packed_pps[0],
- immediate_update);
- else
-#endif
- pipe_ctx->stream_res.stream_enc->funcs->dp_set_dsc_pps_info_packet(
- pipe_ctx->stream_res.stream_enc,
- true,
- &dsc_packed_pps[0],
- immediate_update);
- }
- } else {
- /* disable DSC PPS in stream encoder */
- if (dc_is_dp_signal(stream->signal)) {
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (is_dp_128b_132b_signal(pipe_ctx))
- pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->dp_set_dsc_pps_info_packet(
- pipe_ctx->stream_res.hpo_dp_stream_enc,
- false,
- NULL,
- true);
- else
-#endif
- pipe_ctx->stream_res.stream_enc->funcs->dp_set_dsc_pps_info_packet(
- pipe_ctx->stream_res.stream_enc, false, NULL, true);
- }
- }
-
- return true;
-}
-
-
-bool dp_update_dsc_config(struct pipe_ctx *pipe_ctx)
-{
- struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
-
- if (!pipe_ctx->stream->timing.flags.DSC)
- return false;
- if (!dsc)
- return false;
-
- dp_set_dsc_on_stream(pipe_ctx, true);
- dp_set_dsc_pps_sdp(pipe_ctx, true, false);
- return true;
-}
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-#undef DC_LOGGER
-#define DC_LOGGER \
- link->ctx->logger
-
-static enum phyd32clk_clock_source get_phyd32clk_src(struct dc_link *link)
-{
- switch (link->link_enc->transmitter) {
- case TRANSMITTER_UNIPHY_A:
- return PHYD32CLKA;
- case TRANSMITTER_UNIPHY_B:
- return PHYD32CLKB;
- case TRANSMITTER_UNIPHY_C:
- return PHYD32CLKC;
- case TRANSMITTER_UNIPHY_D:
- return PHYD32CLKD;
- case TRANSMITTER_UNIPHY_E:
- return PHYD32CLKE;
- default:
- return PHYD32CLKA;
- }
-}
-
-void enable_dp_hpo_output(struct dc_link *link, const struct dc_link_settings *link_settings)
-{
- const struct dc *dc = link->dc;
- enum phyd32clk_clock_source phyd32clk;
-
- /* Enable PHY PLL at target bit rate
- * UHBR10 = 10Gbps (SYMCLK32 = 312.5MHz)
- * UBR13.5 = 13.5Gbps (SYMCLK32 = 421.875MHz)
- * UHBR20 = 20Gbps (SYMCLK32 = 625MHz)
- */
- if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
- switch (link_settings->link_rate) {
- case LINK_RATE_UHBR10:
- dm_set_phyd32clk(dc->ctx, 312500);
- break;
- case LINK_RATE_UHBR13_5:
- dm_set_phyd32clk(dc->ctx, 412875);
- break;
- case LINK_RATE_UHBR20:
- dm_set_phyd32clk(dc->ctx, 625000);
- break;
- default:
- return;
- }
- } else {
- /* DP2.0 HW: call transmitter control to enable PHY */
- link->hpo_dp_link_enc->funcs->enable_link_phy(
- link->hpo_dp_link_enc,
- link_settings,
- link->link_enc->transmitter);
- }
-
- /* DCCG muxing and DTBCLK DTO */
- if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
- dc->res_pool->dccg->funcs->set_physymclk(
- dc->res_pool->dccg,
- link->link_enc_hw_inst,
- PHYSYMCLK_FORCE_SRC_PHYD32CLK,
- true);
-
- phyd32clk = get_phyd32clk_src(link);
- dc->res_pool->dccg->funcs->enable_symclk32_le(
- dc->res_pool->dccg,
- link->hpo_dp_link_enc->inst,
- phyd32clk);
- link->hpo_dp_link_enc->funcs->link_enable(
- link->hpo_dp_link_enc,
- link_settings->lane_count);
- }
-}
-
-void disable_dp_hpo_output(struct dc_link *link, enum signal_type signal)
-{
- const struct dc *dc = link->dc;
-
- link->hpo_dp_link_enc->funcs->link_disable(link->hpo_dp_link_enc);
-
- if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
- dc->res_pool->dccg->funcs->disable_symclk32_le(
- dc->res_pool->dccg,
- link->hpo_dp_link_enc->inst);
-
- dc->res_pool->dccg->funcs->set_physymclk(
- dc->res_pool->dccg,
- link->link_enc_hw_inst,
- PHYSYMCLK_FORCE_SRC_SYMCLK,
- false);
-
- dm_set_phyd32clk(dc->ctx, 0);
- } else {
- /* DP2.0 HW: call transmitter control to disable PHY */
- link->hpo_dp_link_enc->funcs->disable_link_phy(
- link->hpo_dp_link_enc,
- signal);
- }
-}
-
-void setup_dp_hpo_stream(struct pipe_ctx *pipe_ctx, bool enable)
-{
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct dc *dc = pipe_ctx->stream->ctx->dc;
- struct pipe_ctx *odm_pipe;
- int odm_combine_num_segments = 1;
- enum phyd32clk_clock_source phyd32clk;
-
- if (enable) {
- for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
- odm_combine_num_segments++;
-
- dc->res_pool->dccg->funcs->set_dpstreamclk(
- dc->res_pool->dccg,
- DTBCLK0,
- pipe_ctx->stream_res.tg->inst);
-
- phyd32clk = get_phyd32clk_src(stream->link);
- dc->res_pool->dccg->funcs->enable_symclk32_se(
- dc->res_pool->dccg,
- pipe_ctx->stream_res.hpo_dp_stream_enc->inst,
- phyd32clk);
-
- dc->res_pool->dccg->funcs->set_dtbclk_dto(
- dc->res_pool->dccg,
- pipe_ctx->stream_res.tg->inst,
- stream->phy_pix_clk,
- odm_combine_num_segments,
- &stream->timing);
- } else {
- dc->res_pool->dccg->funcs->set_dtbclk_dto(
- dc->res_pool->dccg,
- pipe_ctx->stream_res.tg->inst,
- 0,
- 0,
- &stream->timing);
- dc->res_pool->dccg->funcs->disable_symclk32_se(
- dc->res_pool->dccg,
- pipe_ctx->stream_res.hpo_dp_stream_enc->inst);
- dc->res_pool->dccg->funcs->set_dpstreamclk(
- dc->res_pool->dccg,
- REFCLK,
- pipe_ctx->stream_res.tg->inst);
- }
-}
-
-void reset_dp_hpo_stream_encoders_for_link(struct dc_link *link)
-{
- const struct dc *dc = link->dc;
- struct dc_state *state = dc->current_state;
- uint8_t i;
-
- for (i = 0; i < MAX_PIPES; i++) {
- if (state->res_ctx.pipe_ctx[i].stream_res.hpo_dp_stream_enc &&
- state->res_ctx.pipe_ctx[i].stream &&
- state->res_ctx.pipe_ctx[i].stream->link == link &&
- !state->res_ctx.pipe_ctx[i].stream->dpms_off) {
- setup_dp_hpo_stream(&state->res_ctx.pipe_ctx[i], false);
- }
- }
-}
-
-#undef DC_LOGGER
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index e2d9a46d0e1a..848c267ef11e 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -23,8 +23,6 @@
*
*/
-#include <linux/slab.h>
-
#include "dm_services.h"
#include "resource.h"
@@ -42,7 +40,17 @@
#include "virtual/virtual_stream_encoder.h"
#include "dpcd_defs.h"
#include "link_enc_cfg.h"
-#include "dc_link_dp.h"
+#include "link_service.h"
+#include "clk_mgr.h"
+#include "dc_state_priv.h"
+#include "dc_stream_priv.h"
+
+#include "virtual/virtual_link_hwss.h"
+#include "link/hwss/link_hwss_dio.h"
+#include "link/hwss/link_hwss_dpia.h"
+#include "link/hwss/link_hwss_hpo_dp.h"
+#include "link/hwss/link_hwss_dio_fixed_vs_pe_retimer.h"
+#include "link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.h"
#if defined(CONFIG_DRM_AMD_DC_SI)
#include "dce60/dce60_resource.h"
@@ -52,7 +60,6 @@
#include "dce110/dce110_resource.h"
#include "dce112/dce112_resource.h"
#include "dce120/dce120_resource.h"
-#if defined(CONFIG_DRM_AMD_DC_DCN)
#include "dcn10/dcn10_resource.h"
#include "dcn20/dcn20_resource.h"
#include "dcn21/dcn21_resource.h"
@@ -62,13 +69,74 @@
#include "dcn302/dcn302_resource.h"
#include "dcn303/dcn303_resource.h"
#include "dcn31/dcn31_resource.h"
+#include "dcn314/dcn314_resource.h"
+#include "dcn315/dcn315_resource.h"
+#include "dcn316/dcn316_resource.h"
+#include "dcn32/dcn32_resource.h"
+#include "dcn321/dcn321_resource.h"
+#include "dcn35/dcn35_resource.h"
+#include "dcn351/dcn351_resource.h"
+#include "dcn36/dcn36_resource.h"
+#include "dcn401/dcn401_resource.h"
+#if defined(CONFIG_DRM_AMD_DC_FP)
+#include "dc_spl_translate.h"
#endif
+#define VISUAL_CONFIRM_BASE_DEFAULT 3
+#define VISUAL_CONFIRM_BASE_MIN 1
+#define VISUAL_CONFIRM_BASE_MAX 10
+/* we choose 240 because it is a common denominator of common v addressable
+ * such as 2160, 1440, 1200, 960. So we take 1/240 portion of v addressable as
+ * the visual confirm dpp offset height. So visual confirm height can stay
+ * relatively the same independent from timing used.
+ */
+#define VISUAL_CONFIRM_DPP_OFFSET_DENO 240
+
+#define DC_LOGGER \
+ dc->ctx->logger
#define DC_LOGGER_INIT(logger)
+#include "dml2_0/dml2_wrapper.h"
+
+#define UNABLE_TO_SPLIT -1
+
+static void capture_pipe_topology_data(struct dc *dc, int plane_idx, int slice_idx, int stream_idx,
+ int dpp_inst, int opp_inst, int tg_inst, bool is_phantom_pipe)
+{
+ struct pipe_topology_snapshot *current_snapshot = &dc->debug_data.topology_history.snapshots[dc->debug_data.topology_history.current_snapshot_index];
+
+ if (current_snapshot->line_count >= MAX_PIPES)
+ return;
+
+ current_snapshot->pipe_log_lines[current_snapshot->line_count].is_phantom_pipe = is_phantom_pipe;
+ current_snapshot->pipe_log_lines[current_snapshot->line_count].plane_idx = plane_idx;
+ current_snapshot->pipe_log_lines[current_snapshot->line_count].slice_idx = slice_idx;
+ current_snapshot->pipe_log_lines[current_snapshot->line_count].stream_idx = stream_idx;
+ current_snapshot->pipe_log_lines[current_snapshot->line_count].dpp_inst = dpp_inst;
+ current_snapshot->pipe_log_lines[current_snapshot->line_count].opp_inst = opp_inst;
+ current_snapshot->pipe_log_lines[current_snapshot->line_count].tg_inst = tg_inst;
+
+ current_snapshot->line_count++;
+}
+
+static void start_new_topology_snapshot(struct dc *dc, struct dc_state *state)
+{
+ // Move to next snapshot slot (circular buffer)
+ dc->debug_data.topology_history.current_snapshot_index = (dc->debug_data.topology_history.current_snapshot_index + 1) % MAX_TOPOLOGY_SNAPSHOTS;
+
+ // Clear the new snapshot
+ struct pipe_topology_snapshot *current_snapshot = &dc->debug_data.topology_history.snapshots[dc->debug_data.topology_history.current_snapshot_index];
+ memset(current_snapshot, 0, sizeof(*current_snapshot));
+
+ // Set metadata
+ current_snapshot->timestamp_us = dm_get_timestamp(dc->ctx);
+ current_snapshot->stream_count = state->stream_count;
+ current_snapshot->phantom_stream_count = state->phantom_stream_count;
+}
enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id)
{
enum dce_version dc_version = DCE_VERSION_UNKNOWN;
+
switch (asic_id.chip_family) {
#if defined(CONFIG_DRM_AMD_DC_SI)
@@ -118,7 +186,6 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id)
else
dc_version = DCE_VERSION_12_0;
break;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
case FAMILY_RV:
dc_version = DCN_VERSION_1_0;
if (ASICREV_IS_RAVEN2(asic_id.hw_internal_rev))
@@ -131,7 +198,13 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id)
case FAMILY_NV:
dc_version = DCN_VERSION_2_0;
- if (asic_id.chip_id == DEVICE_ID_NV_13FE) {
+ if (asic_id.chip_id == DEVICE_ID_NV_13FE ||
+ asic_id.chip_id == DEVICE_ID_NV_143F ||
+ asic_id.chip_id == DEVICE_ID_NV_13F9 ||
+ asic_id.chip_id == DEVICE_ID_NV_13FA ||
+ asic_id.chip_id == DEVICE_ID_NV_13FB ||
+ asic_id.chip_id == DEVICE_ID_NV_13FC ||
+ asic_id.chip_id == DEVICE_ID_NV_13DB) {
dc_version = DCN_VERSION_2_01;
break;
}
@@ -151,8 +224,34 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id)
if (ASICREV_IS_YELLOW_CARP(asic_id.hw_internal_rev))
dc_version = DCN_VERSION_3_1;
break;
-#endif
-
+ case AMDGPU_FAMILY_GC_10_3_6:
+ if (ASICREV_IS_GC_10_3_6(asic_id.hw_internal_rev))
+ dc_version = DCN_VERSION_3_15;
+ break;
+ case AMDGPU_FAMILY_GC_10_3_7:
+ if (ASICREV_IS_GC_10_3_7(asic_id.hw_internal_rev))
+ dc_version = DCN_VERSION_3_16;
+ break;
+ case AMDGPU_FAMILY_GC_11_0_0:
+ dc_version = DCN_VERSION_3_2;
+ if (ASICREV_IS_GC_11_0_2(asic_id.hw_internal_rev))
+ dc_version = DCN_VERSION_3_21;
+ break;
+ case AMDGPU_FAMILY_GC_11_0_1:
+ dc_version = DCN_VERSION_3_14;
+ break;
+ case AMDGPU_FAMILY_GC_11_5_0:
+ dc_version = DCN_VERSION_3_5;
+ if (ASICREV_IS_GC_11_0_4(asic_id.hw_internal_rev))
+ dc_version = DCN_VERSION_3_51;
+ if (ASICREV_IS_DCN36(asic_id.hw_internal_rev))
+ dc_version = DCN_VERSION_3_6;
+ break;
+ case AMDGPU_FAMILY_GC_12_0_0:
+ if (ASICREV_IS_GC_12_0_1_A0(asic_id.hw_internal_rev) ||
+ ASICREV_IS_GC_12_0_0_A0(asic_id.hw_internal_rev))
+ dc_version = DCN_VERSION_4_01;
+ break;
default:
dc_version = DCE_VERSION_UNKNOWN;
break;
@@ -213,7 +312,7 @@ struct resource_pool *dc_create_resource_pool(struct dc *dc,
init_data->num_virtual_links, dc);
break;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
+#if defined(CONFIG_DRM_AMD_DC_FP)
case DCN_VERSION_1_0:
case DCN_VERSION_1_01:
res_pool = dcn10_create_resource_pool(init_data, dc);
@@ -242,7 +341,34 @@ struct resource_pool *dc_create_resource_pool(struct dc *dc,
case DCN_VERSION_3_1:
res_pool = dcn31_create_resource_pool(init_data, dc);
break;
-#endif
+ case DCN_VERSION_3_14:
+ res_pool = dcn314_create_resource_pool(init_data, dc);
+ break;
+ case DCN_VERSION_3_15:
+ res_pool = dcn315_create_resource_pool(init_data, dc);
+ break;
+ case DCN_VERSION_3_16:
+ res_pool = dcn316_create_resource_pool(init_data, dc);
+ break;
+ case DCN_VERSION_3_2:
+ res_pool = dcn32_create_resource_pool(init_data, dc);
+ break;
+ case DCN_VERSION_3_21:
+ res_pool = dcn321_create_resource_pool(init_data, dc);
+ break;
+ case DCN_VERSION_3_5:
+ res_pool = dcn35_create_resource_pool(init_data, dc);
+ break;
+ case DCN_VERSION_3_51:
+ res_pool = dcn351_create_resource_pool(init_data, dc);
+ break;
+ case DCN_VERSION_3_6:
+ res_pool = dcn36_create_resource_pool(init_data, dc);
+ break;
+ case DCN_VERSION_4_01:
+ res_pool = dcn401_create_resource_pool(init_data, dc);
+ break;
+#endif /* CONFIG_DRM_AMD_DC_FP */
default:
break;
}
@@ -268,7 +394,7 @@ struct resource_pool *dc_create_resource_pool(struct dc *dc,
return res_pool;
}
-void dc_destroy_resource_pool(struct dc *dc)
+void dc_destroy_resource_pool(struct dc *dc)
{
if (dc) {
if (dc->res_pool)
@@ -354,9 +480,16 @@ bool resource_construct(
DC_ERR("DC: failed to create stream_encoder!\n");
pool->stream_enc_count++;
}
+
+ for (i = 0; i < caps->num_analog_stream_encoder; i++) {
+ pool->stream_enc[caps->num_stream_encoder + i] =
+ create_funcs->create_stream_encoder(ENGINE_ID_DACA + i, ctx);
+ if (pool->stream_enc[caps->num_stream_encoder + i] == NULL)
+ DC_ERR("DC: failed to create analog stream_encoder %d!\n", i);
+ pool->stream_enc_count++;
+ }
}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
pool->hpo_dp_stream_enc_count = 0;
if (create_funcs->create_hpo_dp_stream_encoder) {
for (i = 0; i < caps->num_hpo_dp_stream_encoder; i++) {
@@ -377,9 +510,7 @@ bool resource_construct(
pool->hpo_dp_link_enc_count++;
}
}
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN)
for (i = 0; i < caps->num_mpc_3dlut; i++) {
pool->mpc_lut[i] = dc_create_3dlut_func();
if (pool->mpc_lut[i] == NULL)
@@ -388,7 +519,7 @@ bool resource_construct(
if (pool->mpc_shaper[i] == NULL)
DC_ERR("DC: failed to create MPC shaper!\n");
}
-#endif
+
dc->caps.dynamic_audio = false;
if (pool->audio_count < pool->stream_enc_count) {
dc->caps.dynamic_audio = true;
@@ -687,155 +818,363 @@ static inline void get_vp_scan_direction(
*flip_horz_scan_dir = !*flip_horz_scan_dir;
}
-int get_num_mpc_splits(struct pipe_ctx *pipe)
+static struct rect intersect_rec(const struct rect *r0, const struct rect *r1)
+{
+ struct rect rec;
+ int r0_x_end = r0->x + r0->width;
+ int r1_x_end = r1->x + r1->width;
+ int r0_y_end = r0->y + r0->height;
+ int r1_y_end = r1->y + r1->height;
+
+ rec.x = r0->x > r1->x ? r0->x : r1->x;
+ rec.width = r0_x_end > r1_x_end ? r1_x_end - rec.x : r0_x_end - rec.x;
+ rec.y = r0->y > r1->y ? r0->y : r1->y;
+ rec.height = r0_y_end > r1_y_end ? r1_y_end - rec.y : r0_y_end - rec.y;
+
+ /* in case that there is no intersection */
+ if (rec.width < 0 || rec.height < 0)
+ memset(&rec, 0, sizeof(rec));
+
+ return rec;
+}
+
+static struct rect shift_rec(const struct rect *rec_in, int x, int y)
{
- int mpc_split_count = 0;
- struct pipe_ctx *other_pipe = pipe->bottom_pipe;
+ struct rect rec_out = *rec_in;
- while (other_pipe && other_pipe->plane_state == pipe->plane_state) {
- mpc_split_count++;
- other_pipe = other_pipe->bottom_pipe;
- }
- other_pipe = pipe->top_pipe;
- while (other_pipe && other_pipe->plane_state == pipe->plane_state) {
- mpc_split_count++;
- other_pipe = other_pipe->top_pipe;
- }
+ rec_out.x += x;
+ rec_out.y += y;
- return mpc_split_count;
+ return rec_out;
}
-int get_num_odm_splits(struct pipe_ctx *pipe)
+static struct rect calculate_plane_rec_in_timing_active(
+ struct pipe_ctx *pipe_ctx,
+ const struct rect *rec_in)
{
- int odm_split_count = 0;
- struct pipe_ctx *next_pipe = pipe->next_odm_pipe;
- while (next_pipe) {
- odm_split_count++;
- next_pipe = next_pipe->next_odm_pipe;
+ /*
+ * The following diagram shows an example where we map a 1920x1200
+ * desktop to a 2560x1440 timing with a plane rect in the middle
+ * of the screen. To map a plane rect from Stream Source to Timing
+ * Active space, we first multiply stream scaling ratios (i.e 2304/1920
+ * horizontal and 1440/1200 vertical) to the plane's x and y, then
+ * we add stream destination offsets (i.e 128 horizontal, 0 vertical).
+ * This will give us a plane rect's position in Timing Active. However
+ * we have to remove the fractional. The rule is that we find left/right
+ * and top/bottom positions and round the value to the adjacent integer.
+ *
+ * Stream Source Space
+ * ------------
+ * __________________________________________________
+ * |Stream Source (1920 x 1200) ^ |
+ * | y |
+ * | <------- w --------|> |
+ * | __________________V |
+ * |<-- x -->|Plane//////////////| ^ |
+ * | |(pre scale)////////| | |
+ * | |///////////////////| | |
+ * | |///////////////////| h |
+ * | |///////////////////| | |
+ * | |///////////////////| | |
+ * | |///////////////////| V |
+ * | |
+ * | |
+ * |__________________________________________________|
+ *
+ *
+ * Timing Active Space
+ * ---------------------------------
+ *
+ * Timing Active (2560 x 1440)
+ * __________________________________________________
+ * |*****| Stteam Destination (2304 x 1440) |*****|
+ * |*****| |*****|
+ * |<128>| |*****|
+ * |*****| __________________ |*****|
+ * |*****| |Plane/////////////| |*****|
+ * |*****| |(post scale)//////| |*****|
+ * |*****| |//////////////////| |*****|
+ * |*****| |//////////////////| |*****|
+ * |*****| |//////////////////| |*****|
+ * |*****| |//////////////////| |*****|
+ * |*****| |*****|
+ * |*****| |*****|
+ * |*****| |*****|
+ * |*****|______________________________________|*****|
+ *
+ * So the resulting formulas are shown below:
+ *
+ * recout_x = 128 + round(plane_x * 2304 / 1920)
+ * recout_w = 128 + round((plane_x + plane_w) * 2304 / 1920) - recout_x
+ * recout_y = 0 + round(plane_y * 1440 / 1280)
+ * recout_h = 0 + round((plane_y + plane_h) * 1440 / 1200) - recout_y
+ *
+ * NOTE: fixed point division is not error free. To reduce errors
+ * introduced by fixed point division, we divide only after
+ * multiplication is complete.
+ */
+ const struct dc_stream_state *stream = pipe_ctx->stream;
+ struct rect rec_out = {0};
+ struct fixed31_32 temp;
+
+ temp = dc_fixpt_from_fraction(rec_in->x * (long long)stream->dst.width,
+ stream->src.width);
+ rec_out.x = stream->dst.x + dc_fixpt_round(temp);
+
+ temp = dc_fixpt_from_fraction(
+ (rec_in->x + rec_in->width) * (long long)stream->dst.width,
+ stream->src.width);
+ rec_out.width = stream->dst.x + dc_fixpt_round(temp) - rec_out.x;
+
+ temp = dc_fixpt_from_fraction(rec_in->y * (long long)stream->dst.height,
+ stream->src.height);
+ rec_out.y = stream->dst.y + dc_fixpt_round(temp);
+
+ temp = dc_fixpt_from_fraction(
+ (rec_in->y + rec_in->height) * (long long)stream->dst.height,
+ stream->src.height);
+ rec_out.height = stream->dst.y + dc_fixpt_round(temp) - rec_out.y;
+
+ return rec_out;
+}
+
+static struct rect calculate_mpc_slice_in_timing_active(
+ struct pipe_ctx *pipe_ctx,
+ struct rect *plane_clip_rec)
+{
+ const struct dc_stream_state *stream = pipe_ctx->stream;
+ int mpc_slice_count = resource_get_mpc_slice_count(pipe_ctx);
+ int mpc_slice_idx = resource_get_mpc_slice_index(pipe_ctx);
+ int epimo = mpc_slice_count - plane_clip_rec->width % mpc_slice_count - 1;
+ struct rect mpc_rec;
+
+ mpc_rec.width = plane_clip_rec->width / mpc_slice_count;
+ mpc_rec.x = plane_clip_rec->x + mpc_rec.width * mpc_slice_idx;
+ mpc_rec.height = plane_clip_rec->height;
+ mpc_rec.y = plane_clip_rec->y;
+ ASSERT(mpc_slice_count == 1 ||
+ stream->view_format != VIEW_3D_FORMAT_SIDE_BY_SIDE ||
+ mpc_rec.width % 2 == 0);
+
+ if (stream->view_format == VIEW_3D_FORMAT_SIDE_BY_SIDE)
+ mpc_rec.x -= (mpc_rec.width * mpc_slice_idx);
+
+ /* extra pixels in the division remainder need to go to pipes after
+ * the extra pixel index minus one(epimo) defined here as:
+ */
+ if (mpc_slice_idx > epimo) {
+ mpc_rec.x += mpc_slice_idx - epimo - 1;
+ mpc_rec.width += 1;
}
- pipe = pipe->prev_odm_pipe;
- while (pipe) {
- odm_split_count++;
- pipe = pipe->prev_odm_pipe;
+
+ if (stream->view_format == VIEW_3D_FORMAT_TOP_AND_BOTTOM) {
+ ASSERT(mpc_rec.height % 2 == 0);
+ mpc_rec.height /= 2;
}
- return odm_split_count;
+ return mpc_rec;
}
-static void calculate_split_count_and_index(struct pipe_ctx *pipe_ctx, int *split_count, int *split_idx)
+static void calculate_adjust_recout_for_visual_confirm(struct pipe_ctx *pipe_ctx,
+ int *base_offset, int *dpp_offset)
{
- *split_count = get_num_odm_splits(pipe_ctx);
- *split_idx = 0;
- if (*split_count == 0) {
- /*Check for mpc split*/
- struct pipe_ctx *split_pipe = pipe_ctx->top_pipe;
+ struct dc *dc = pipe_ctx->stream->ctx->dc;
+ *base_offset = 0;
+ *dpp_offset = 0;
- *split_count = get_num_mpc_splits(pipe_ctx);
- while (split_pipe && split_pipe->plane_state == pipe_ctx->plane_state) {
- (*split_idx)++;
- split_pipe = split_pipe->top_pipe;
- }
- } else {
- /*Get odm split index*/
- struct pipe_ctx *split_pipe = pipe_ctx->prev_odm_pipe;
+ if (dc->debug.visual_confirm == VISUAL_CONFIRM_DISABLE || !pipe_ctx->plane_res.dpp)
+ return;
- while (split_pipe) {
- (*split_idx)++;
- split_pipe = split_pipe->prev_odm_pipe;
- }
- }
+ *dpp_offset = pipe_ctx->stream->timing.v_addressable / VISUAL_CONFIRM_DPP_OFFSET_DENO;
+ *dpp_offset *= pipe_ctx->plane_res.dpp->inst;
+
+ if ((dc->debug.visual_confirm_rect_height >= VISUAL_CONFIRM_BASE_MIN) &&
+ dc->debug.visual_confirm_rect_height <= VISUAL_CONFIRM_BASE_MAX)
+ *base_offset = dc->debug.visual_confirm_rect_height;
+ else
+ *base_offset = VISUAL_CONFIRM_BASE_DEFAULT;
}
-/*
- * This is a preliminary vp size calculation to allow us to check taps support.
- * The result is completely overridden afterwards.
- */
-static void calculate_viewport_size(struct pipe_ctx *pipe_ctx)
+static void reverse_adjust_recout_for_visual_confirm(struct rect *recout,
+ struct pipe_ctx *pipe_ctx)
{
- struct scaler_data *data = &pipe_ctx->plane_res.scl_data;
+ int dpp_offset, base_offset;
- data->viewport.width = dc_fixpt_ceil(dc_fixpt_mul_int(data->ratios.horz, data->recout.width));
- data->viewport.height = dc_fixpt_ceil(dc_fixpt_mul_int(data->ratios.vert, data->recout.height));
- data->viewport_c.width = dc_fixpt_ceil(dc_fixpt_mul_int(data->ratios.horz_c, data->recout.width));
- data->viewport_c.height = dc_fixpt_ceil(dc_fixpt_mul_int(data->ratios.vert_c, data->recout.height));
- if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_90 ||
- pipe_ctx->plane_state->rotation == ROTATION_ANGLE_270) {
- swap(data->viewport.width, data->viewport.height);
- swap(data->viewport_c.width, data->viewport_c.height);
- }
+ calculate_adjust_recout_for_visual_confirm(pipe_ctx, &base_offset,
+ &dpp_offset);
+ recout->height += base_offset;
+ recout->height += dpp_offset;
}
-static void calculate_recout(struct pipe_ctx *pipe_ctx)
+static void adjust_recout_for_visual_confirm(struct rect *recout,
+ struct pipe_ctx *pipe_ctx)
{
- const struct dc_plane_state *plane_state = pipe_ctx->plane_state;
- const struct dc_stream_state *stream = pipe_ctx->stream;
- struct scaler_data *data = &pipe_ctx->plane_res.scl_data;
- struct rect surf_clip = plane_state->clip_rect;
- bool split_tb = stream->view_format == VIEW_3D_FORMAT_TOP_AND_BOTTOM;
- int split_count, split_idx;
+ int dpp_offset, base_offset;
- calculate_split_count_and_index(pipe_ctx, &split_count, &split_idx);
- if (stream->view_format == VIEW_3D_FORMAT_SIDE_BY_SIDE)
- split_idx = 0;
+ calculate_adjust_recout_for_visual_confirm(pipe_ctx, &base_offset,
+ &dpp_offset);
+ recout->height -= base_offset;
+ recout->height -= dpp_offset;
+}
+/*
+ * The function maps a plane clip from Stream Source Space to ODM Slice Space
+ * and calculates the rec of the overlapping area of MPC slice of the plane
+ * clip, ODM slice associated with the pipe context and stream destination rec.
+ */
+static void calculate_recout(struct pipe_ctx *pipe_ctx)
+{
/*
- * Only the leftmost ODM pipe should be offset by a nonzero distance
+ * A plane clip represents the desired plane size and position in Stream
+ * Source Space. Stream Source is the destination where all planes are
+ * blended (i.e. positioned, scaled and overlaid). It is a canvas where
+ * all planes associated with the current stream are drawn together.
+ * After Stream Source is completed, we will further scale and
+ * reposition the entire canvas of the stream source to Stream
+ * Destination in Timing Active Space. This could be due to display
+ * overscan adjustment where we will need to rescale and reposition all
+ * the planes so they can fit into a TV with overscan or downscale
+ * upscale features such as GPU scaling or VSR.
+ *
+ * This two step blending is a virtual procedure in software. In
+ * hardware there is no such thing as Stream Source. all planes are
+ * blended once in Timing Active Space. Software virtualizes a Stream
+ * Source space to decouple the math complicity so scaling param
+ * calculation focuses on one step at a time.
+ *
+ * In the following two diagrams, user applied 10% overscan adjustment
+ * so the Stream Source needs to be scaled down a little before mapping
+ * to Timing Active Space. As a result the Plane Clip is also scaled
+ * down by the same ratio, Plane Clip position (i.e. x and y) with
+ * respect to Stream Source is also scaled down. To map it in Timing
+ * Active Space additional x and y offsets from Stream Destination are
+ * added to Plane Clip as well.
+ *
+ * Stream Source Space
+ * ------------
+ * __________________________________________________
+ * |Stream Source (3840 x 2160) ^ |
+ * | y |
+ * | | |
+ * | __________________V |
+ * |<-- x -->|Plane Clip/////////| |
+ * | |(pre scale)////////| |
+ * | |///////////////////| |
+ * | |///////////////////| |
+ * | |///////////////////| |
+ * | |///////////////////| |
+ * | |///////////////////| |
+ * | |
+ * | |
+ * |__________________________________________________|
+ *
+ *
+ * Timing Active Space (3840 x 2160)
+ * ---------------------------------
+ *
+ * Timing Active
+ * __________________________________________________
+ * | y_____________________________________________ |
+ * |x |Stream Destination (3456 x 1944) | |
+ * | | | |
+ * | | __________________ | |
+ * | | |Plane Clip////////| | |
+ * | | |(post scale)//////| | |
+ * | | |//////////////////| | |
+ * | | |//////////////////| | |
+ * | | |//////////////////| | |
+ * | | |//////////////////| | |
+ * | | | |
+ * | | | |
+ * | |____________________________________________| |
+ * |__________________________________________________|
+ *
+ *
+ * In Timing Active Space a plane clip could be further sliced into
+ * pieces called MPC slices. Each Pipe Context is responsible for
+ * processing only one MPC slice so the plane processing workload can be
+ * distributed to multiple DPP Pipes. MPC slices could be blended
+ * together to a single ODM slice. Each ODM slice is responsible for
+ * processing a portion of Timing Active divided horizontally so the
+ * output pixel processing workload can be distributed to multiple OPP
+ * pipes. All ODM slices are mapped together in ODM block so all MPC
+ * slices belong to different ODM slices could be pieced together to
+ * form a single image in Timing Active. MPC slices must belong to
+ * single ODM slice. If an MPC slice goes across ODM slice boundary, it
+ * needs to be divided into two MPC slices one for each ODM slice.
+ *
+ * In the following diagram the output pixel processing workload is
+ * divided horizontally into two ODM slices one for each OPP blend tree.
+ * OPP0 blend tree is responsible for processing left half of Timing
+ * Active, while OPP2 blend tree is responsible for processing right
+ * half.
+ *
+ * The plane has two MPC slices. However since the right MPC slice goes
+ * across ODM boundary, two DPP pipes are needed one for each OPP blend
+ * tree. (i.e. DPP1 for OPP0 blend tree and DPP2 for OPP2 blend tree).
+ *
+ * Assuming that we have a Pipe Context associated with OPP0 and DPP1
+ * working on processing the plane in the diagram. We want to know the
+ * width and height of the shaded rectangle and its relative position
+ * with respect to the ODM slice0. This is called the recout of the pipe
+ * context.
+ *
+ * Planes can be at arbitrary size and position and there could be an
+ * arbitrary number of MPC and ODM slices. The algorithm needs to take
+ * all scenarios into account.
+ *
+ * Timing Active Space (3840 x 2160)
+ * ---------------------------------
+ *
+ * Timing Active
+ * __________________________________________________
+ * |OPP0(ODM slice0)^ |OPP2(ODM slice1) |
+ * | y | |
+ * | | <- w -> |
+ * | _____V________|____ |
+ * | |DPP0 ^ |DPP1 |DPP2| |
+ * |<------ x |-----|->|/////| | |
+ * | | | |/////| | |
+ * | | h |/////| | |
+ * | | | |/////| | |
+ * | |_____V__|/////|____| |
+ * | | |
+ * | | |
+ * | | |
+ * |_________________________|________________________|
+ *
+ *
*/
- if (!pipe_ctx->prev_odm_pipe || split_idx == split_count) {
- data->recout.x = stream->dst.x;
- if (stream->src.x < surf_clip.x)
- data->recout.x += (surf_clip.x - stream->src.x) * stream->dst.width
- / stream->src.width;
- } else
- data->recout.x = 0;
-
- if (stream->src.x > surf_clip.x)
- surf_clip.width -= stream->src.x - surf_clip.x;
- data->recout.width = surf_clip.width * stream->dst.width / stream->src.width;
- if (data->recout.width + data->recout.x > stream->dst.x + stream->dst.width)
- data->recout.width = stream->dst.x + stream->dst.width - data->recout.x;
-
- data->recout.y = stream->dst.y;
- if (stream->src.y < surf_clip.y)
- data->recout.y += (surf_clip.y - stream->src.y) * stream->dst.height
- / stream->src.height;
- else if (stream->src.y > surf_clip.y)
- surf_clip.height -= stream->src.y - surf_clip.y;
-
- data->recout.height = surf_clip.height * stream->dst.height / stream->src.height;
- if (data->recout.height + data->recout.y > stream->dst.y + stream->dst.height)
- data->recout.height = stream->dst.y + stream->dst.height - data->recout.y;
-
- /* Handle h & v split */
- if (split_tb) {
- ASSERT(data->recout.height % 2 == 0);
- data->recout.height /= 2;
- } else if (split_count) {
- if (!pipe_ctx->next_odm_pipe && !pipe_ctx->prev_odm_pipe) {
- /* extra pixels in the division remainder need to go to pipes after
- * the extra pixel index minus one(epimo) defined here as:
- */
- int epimo = split_count - data->recout.width % (split_count + 1);
-
- data->recout.x += (data->recout.width / (split_count + 1)) * split_idx;
- if (split_idx > epimo)
- data->recout.x += split_idx - epimo - 1;
- ASSERT(stream->view_format != VIEW_3D_FORMAT_SIDE_BY_SIDE || data->recout.width % 2 == 0);
- data->recout.width = data->recout.width / (split_count + 1) + (split_idx > epimo ? 1 : 0);
- } else {
- /* odm */
- if (split_idx == split_count) {
- /* rightmost pipe is the remainder recout */
- data->recout.width -= data->h_active * split_count - data->recout.x;
-
- /* ODM combine cases with MPO we can get negative widths */
- if (data->recout.width < 0)
- data->recout.width = 0;
-
- data->recout.x = 0;
- } else
- data->recout.width = data->h_active - data->recout.x;
- }
+ struct rect plane_clip;
+ struct rect mpc_slice_of_plane_clip;
+ struct rect odm_slice_src;
+ struct rect overlapping_area;
+
+ plane_clip = calculate_plane_rec_in_timing_active(pipe_ctx,
+ &pipe_ctx->plane_state->clip_rect);
+ /* guard plane clip from drawing beyond stream dst here */
+ plane_clip = intersect_rec(&plane_clip,
+ &pipe_ctx->stream->dst);
+ mpc_slice_of_plane_clip = calculate_mpc_slice_in_timing_active(
+ pipe_ctx, &plane_clip);
+ odm_slice_src = resource_get_odm_slice_src_rect(pipe_ctx);
+ overlapping_area = intersect_rec(&mpc_slice_of_plane_clip, &odm_slice_src);
+ if (overlapping_area.height > 0 &&
+ overlapping_area.width > 0) {
+ /* shift the overlapping area so it is with respect to current
+ * ODM slice source's position
+ */
+ pipe_ctx->plane_res.scl_data.recout = shift_rec(
+ &overlapping_area,
+ -odm_slice_src.x, -odm_slice_src.y);
+ adjust_recout_for_visual_confirm(
+ &pipe_ctx->plane_res.scl_data.recout,
+ pipe_ctx);
+ } else {
+ /* if there is no overlap, zero recout */
+ memset(&pipe_ctx->plane_res.scl_data.recout, 0,
+ sizeof(struct rect));
}
+
}
static void calculate_scaling_ratios(struct pipe_ctx *pipe_ctx)
@@ -957,31 +1296,30 @@ static void calculate_init_and_vp(
static void calculate_inits_and_viewports(struct pipe_ctx *pipe_ctx)
{
const struct dc_plane_state *plane_state = pipe_ctx->plane_state;
- const struct dc_stream_state *stream = pipe_ctx->stream;
struct scaler_data *data = &pipe_ctx->plane_res.scl_data;
struct rect src = plane_state->src_rect;
+ struct rect recout_dst_in_active_timing;
+ struct rect recout_clip_in_active_timing;
+ struct rect recout_clip_in_recout_dst;
+ struct rect overlap_in_active_timing;
+ struct rect odm_slice_src = resource_get_odm_slice_src_rect(pipe_ctx);
int vpc_div = (data->format == PIXEL_FORMAT_420BPP8
|| data->format == PIXEL_FORMAT_420BPP10) ? 2 : 1;
- int split_count, split_idx, ro_lb, ro_tb, recout_full_x, recout_full_y;
bool orthogonal_rotation, flip_vert_scan_dir, flip_horz_scan_dir;
- calculate_split_count_and_index(pipe_ctx, &split_count, &split_idx);
- /*
- * recout full is what the recout would have been if we didnt clip
- * the source plane at all. We only care about left(ro_lb) and top(ro_tb)
- * offsets of recout within recout full because those are the directions
- * we scan from and therefore the only ones that affect inits.
- */
- recout_full_x = stream->dst.x + (plane_state->dst_rect.x - stream->src.x)
- * stream->dst.width / stream->src.width;
- recout_full_y = stream->dst.y + (plane_state->dst_rect.y - stream->src.y)
- * stream->dst.height / stream->src.height;
- if (pipe_ctx->prev_odm_pipe && split_idx)
- ro_lb = data->h_active * split_idx - recout_full_x;
+ recout_clip_in_active_timing = shift_rec(
+ &data->recout, odm_slice_src.x, odm_slice_src.y);
+ recout_dst_in_active_timing = calculate_plane_rec_in_timing_active(
+ pipe_ctx, &plane_state->dst_rect);
+ overlap_in_active_timing = intersect_rec(&recout_clip_in_active_timing,
+ &recout_dst_in_active_timing);
+ if (overlap_in_active_timing.width > 0 &&
+ overlap_in_active_timing.height > 0)
+ recout_clip_in_recout_dst = shift_rec(&overlap_in_active_timing,
+ -recout_dst_in_active_timing.x,
+ -recout_dst_in_active_timing.y);
else
- ro_lb = data->recout.x - recout_full_x;
- ro_tb = data->recout.y - recout_full_y;
- ASSERT(ro_lb >= 0 && ro_tb >= 0);
+ memset(&recout_clip_in_recout_dst, 0, sizeof(struct rect));
/*
* Work in recout rotation since that requires less transformations
@@ -1000,7 +1338,7 @@ static void calculate_inits_and_viewports(struct pipe_ctx *pipe_ctx)
calculate_init_and_vp(
flip_horz_scan_dir,
- ro_lb,
+ recout_clip_in_recout_dst.x,
data->recout.width,
src.width,
data->taps.h_taps,
@@ -1010,7 +1348,7 @@ static void calculate_inits_and_viewports(struct pipe_ctx *pipe_ctx)
&data->viewport.width);
calculate_init_and_vp(
flip_horz_scan_dir,
- ro_lb,
+ recout_clip_in_recout_dst.x,
data->recout.width,
src.width / vpc_div,
data->taps.h_taps_c,
@@ -1020,7 +1358,7 @@ static void calculate_inits_and_viewports(struct pipe_ctx *pipe_ctx)
&data->viewport_c.width);
calculate_init_and_vp(
flip_vert_scan_dir,
- ro_tb,
+ recout_clip_in_recout_dst.y,
data->recout.height,
src.height,
data->taps.v_taps,
@@ -1030,7 +1368,7 @@ static void calculate_inits_and_viewports(struct pipe_ctx *pipe_ctx)
&data->viewport.height);
calculate_init_and_vp(
flip_vert_scan_dir,
- ro_tb,
+ recout_clip_in_recout_dst.y,
data->recout.height,
src.height / vpc_div,
data->taps.v_taps_c,
@@ -1051,15 +1389,118 @@ static void calculate_inits_and_viewports(struct pipe_ctx *pipe_ctx)
data->viewport_c.y += src.y / vpc_div;
}
+static enum controller_dp_test_pattern convert_dp_to_controller_test_pattern(
+ enum dp_test_pattern test_pattern)
+{
+ enum controller_dp_test_pattern controller_test_pattern;
+
+ switch (test_pattern) {
+ case DP_TEST_PATTERN_COLOR_SQUARES:
+ controller_test_pattern =
+ CONTROLLER_DP_TEST_PATTERN_COLORSQUARES;
+ break;
+ case DP_TEST_PATTERN_COLOR_SQUARES_CEA:
+ controller_test_pattern =
+ CONTROLLER_DP_TEST_PATTERN_COLORSQUARES_CEA;
+ break;
+ case DP_TEST_PATTERN_VERTICAL_BARS:
+ controller_test_pattern =
+ CONTROLLER_DP_TEST_PATTERN_VERTICALBARS;
+ break;
+ case DP_TEST_PATTERN_HORIZONTAL_BARS:
+ controller_test_pattern =
+ CONTROLLER_DP_TEST_PATTERN_HORIZONTALBARS;
+ break;
+ case DP_TEST_PATTERN_COLOR_RAMP:
+ controller_test_pattern =
+ CONTROLLER_DP_TEST_PATTERN_COLORRAMP;
+ break;
+ default:
+ controller_test_pattern =
+ CONTROLLER_DP_TEST_PATTERN_VIDEOMODE;
+ break;
+ }
+
+ return controller_test_pattern;
+}
+
+static enum controller_dp_color_space convert_dp_to_controller_color_space(
+ enum dp_test_pattern_color_space color_space)
+{
+ enum controller_dp_color_space controller_color_space;
+
+ switch (color_space) {
+ case DP_TEST_PATTERN_COLOR_SPACE_RGB:
+ controller_color_space = CONTROLLER_DP_COLOR_SPACE_RGB;
+ break;
+ case DP_TEST_PATTERN_COLOR_SPACE_YCBCR601:
+ controller_color_space = CONTROLLER_DP_COLOR_SPACE_YCBCR601;
+ break;
+ case DP_TEST_PATTERN_COLOR_SPACE_YCBCR709:
+ controller_color_space = CONTROLLER_DP_COLOR_SPACE_YCBCR709;
+ break;
+ case DP_TEST_PATTERN_COLOR_SPACE_UNDEFINED:
+ default:
+ controller_color_space = CONTROLLER_DP_COLOR_SPACE_UDEFINED;
+ break;
+ }
+
+ return controller_color_space;
+}
+
+void resource_build_test_pattern_params(struct resource_context *res_ctx,
+ struct pipe_ctx *otg_master)
+{
+ struct pipe_ctx *opp_heads[MAX_PIPES];
+ struct test_pattern_params *params;
+ int odm_cnt;
+ enum controller_dp_test_pattern controller_test_pattern;
+ enum controller_dp_color_space controller_color_space;
+ enum dc_color_depth color_depth = otg_master->stream->timing.display_color_depth;
+ struct rect odm_slice_src;
+ int i;
+
+ controller_test_pattern = convert_dp_to_controller_test_pattern(
+ otg_master->stream->test_pattern.type);
+ controller_color_space = convert_dp_to_controller_color_space(
+ otg_master->stream->test_pattern.color_space);
+
+ if (controller_test_pattern == CONTROLLER_DP_TEST_PATTERN_VIDEOMODE)
+ return;
+
+ odm_cnt = resource_get_opp_heads_for_otg_master(otg_master, res_ctx, opp_heads);
+
+ for (i = 0; i < odm_cnt; i++) {
+ odm_slice_src = resource_get_odm_slice_src_rect(opp_heads[i]);
+ params = &opp_heads[i]->stream_res.test_pattern_params;
+ params->test_pattern = controller_test_pattern;
+ params->color_space = controller_color_space;
+ params->color_depth = color_depth;
+ params->height = odm_slice_src.height;
+ params->offset = odm_slice_src.x;
+ params->width = odm_slice_src.width;
+ }
+}
+
bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
{
const struct dc_plane_state *plane_state = pipe_ctx->plane_state;
struct dc_crtc_timing *timing = &pipe_ctx->stream->timing;
+ const struct rect odm_slice_src = resource_get_odm_slice_src_rect(pipe_ctx);
+ struct scaling_taps temp = {0};
bool res = false;
+
DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger);
- pipe_ctx->plane_res.scl_data.format = convert_pixel_format_to_dalsurface(
- pipe_ctx->plane_state->format);
+ /* Invalid input */
+ if (!plane_state ||
+ !plane_state->dst_rect.width ||
+ !plane_state->dst_rect.height ||
+ !plane_state->src_rect.width ||
+ !plane_state->src_rect.height) {
+ ASSERT(0);
+ return false;
+ }
/* Timing borders are part of vactive that we are also supposed to skip in addition
* to any stream dst offset. Since dm logic assumes dst is in addressable
@@ -1070,24 +1511,45 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
pipe_ctx->stream->dst.y += timing->v_border_top;
/* Calculate H and V active size */
- pipe_ctx->plane_res.scl_data.h_active = timing->h_addressable +
- timing->h_border_left + timing->h_border_right;
- pipe_ctx->plane_res.scl_data.v_active = timing->v_addressable +
- timing->v_border_top + timing->v_border_bottom;
- if (pipe_ctx->next_odm_pipe || pipe_ctx->prev_odm_pipe)
- pipe_ctx->plane_res.scl_data.h_active /= get_num_odm_splits(pipe_ctx) + 1;
+ pipe_ctx->plane_res.scl_data.h_active = odm_slice_src.width;
+ pipe_ctx->plane_res.scl_data.v_active = odm_slice_src.height;
+ pipe_ctx->plane_res.scl_data.format = convert_pixel_format_to_dalsurface(
+ pipe_ctx->plane_state->format);
+
+#if defined(CONFIG_DRM_AMD_DC_FP)
+ if ((pipe_ctx->stream->ctx->dc->config.use_spl) && (!pipe_ctx->stream->ctx->dc->debug.disable_spl)) {
+ struct spl_in *spl_in = &pipe_ctx->plane_res.spl_in;
+ struct spl_out *spl_out = &pipe_ctx->plane_res.spl_out;
+ if (plane_state->ctx->dce_version > DCE_VERSION_MAX)
+ pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_36BPP;
+ else
+ pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP;
+
+ pipe_ctx->plane_res.scl_data.lb_params.alpha_en = plane_state->per_pixel_alpha;
+
+ // Convert pipe_ctx to respective input params for SPL
+ translate_SPL_in_params_from_pipe_ctx(pipe_ctx, spl_in);
+ /* Pass visual confirm debug information */
+ calculate_adjust_recout_for_visual_confirm(pipe_ctx,
+ &spl_in->debug.visual_confirm_base_offset,
+ &spl_in->debug.visual_confirm_dpp_offset);
+ // Set SPL output parameters to dscl_prog_data to be used for hw registers
+ spl_out->dscl_prog_data = resource_get_dscl_prog_data(pipe_ctx);
+ // Calculate scaler parameters from SPL
+ res = spl_calculate_scaler_params(spl_in, spl_out);
+ // Convert respective out params from SPL to scaler data
+ translate_SPL_out_params_to_pipe_ctx(pipe_ctx, spl_out);
+
+ /* Ignore scaler failure if pipe context plane is phantom plane */
+ if (!res && plane_state->is_phantom)
+ res = true;
+ } else {
+#endif
/* depends on h_active */
calculate_recout(pipe_ctx);
/* depends on pixel format */
calculate_scaling_ratios(pipe_ctx);
- /* depends on scaling ratios and recout, does not calculate offset yet */
- calculate_viewport_size(pipe_ctx);
-
- /* Stopgap for validation of ODM + MPO on one side of screen case */
- if (pipe_ctx->plane_res.scl_data.viewport.height < 1 ||
- pipe_ctx->plane_res.scl_data.viewport.width < 1)
- return false;
/*
* LB calculations depend on vp size, h/v_active and scaling ratios
@@ -1095,18 +1557,37 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
* on certain displays, such as the Sharp 4k. 36bpp is needed
* to support SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 and
* SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616 with actual > 10 bpc
- * precision on at least DCN display engines. However, at least
- * Carrizo with DCE_VERSION_11_0 does not like 36 bpp lb depth,
- * so use only 30 bpp on DCE_VERSION_11_0. Testing with DCE 11.2 and 8.3
- * did not show such problems, so this seems to be the exception.
+ * precision on DCN display engines, but apparently not for DCE, as
+ * far as testing on DCE-11.2 and DCE-8 showed. Various DCE parts have
+ * problems: Carrizo with DCE_VERSION_11_0 does not like 36 bpp lb depth,
+ * neither do DCE-8 at 4k resolution, or DCE-11.2 (broken identify pixel
+ * passthrough). Therefore only use 36 bpp on DCN where it is actually needed.
*/
- if (plane_state->ctx->dce_version > DCE_VERSION_11_0)
+ if (plane_state->ctx->dce_version > DCE_VERSION_MAX)
pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_36BPP;
else
pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP;
pipe_ctx->plane_res.scl_data.lb_params.alpha_en = plane_state->per_pixel_alpha;
+ // get TAP value with 100x100 dummy data for max scaling qualify, override
+ // if a new scaling quality required
+ pipe_ctx->plane_res.scl_data.viewport.width = 100;
+ pipe_ctx->plane_res.scl_data.viewport.height = 100;
+ pipe_ctx->plane_res.scl_data.viewport_c.width = 100;
+ pipe_ctx->plane_res.scl_data.viewport_c.height = 100;
+ if (pipe_ctx->plane_res.xfm != NULL)
+ res = pipe_ctx->plane_res.xfm->funcs->transform_get_optimal_number_of_taps(
+ pipe_ctx->plane_res.xfm, &pipe_ctx->plane_res.scl_data, &plane_state->scaling_quality);
+
+ if (pipe_ctx->plane_res.dpp != NULL)
+ res = pipe_ctx->plane_res.dpp->funcs->dpp_get_optimal_number_of_taps(
+ pipe_ctx->plane_res.dpp, &pipe_ctx->plane_res.scl_data, &plane_state->scaling_quality);
+
+ temp = pipe_ctx->plane_res.scl_data.taps;
+
+ calculate_inits_and_viewports(pipe_ctx);
+
if (pipe_ctx->plane_res.xfm != NULL)
res = pipe_ctx->plane_res.xfm->funcs->transform_get_optimal_number_of_taps(
pipe_ctx->plane_res.xfm, &pipe_ctx->plane_res.scl_data, &plane_state->scaling_quality);
@@ -1133,11 +1614,14 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
&plane_state->scaling_quality);
}
- /*
- * Depends on recout, scaling ratios, h_active and taps
- * May need to re-check lb size after this in some obscure scenario
- */
- if (res)
+ /* Ignore scaler failure if pipe context plane is phantom plane */
+ if (!res && plane_state->is_phantom)
+ res = true;
+
+ if (res && (pipe_ctx->plane_res.scl_data.taps.v_taps != temp.v_taps ||
+ pipe_ctx->plane_res.scl_data.taps.h_taps != temp.h_taps ||
+ pipe_ctx->plane_res.scl_data.taps.v_taps_c != temp.v_taps_c ||
+ pipe_ctx->plane_res.scl_data.taps.h_taps_c != temp.h_taps_c))
calculate_inits_and_viewports(pipe_ctx);
/*
@@ -1155,18 +1639,14 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
pipe_ctx->plane_res.scl_data.recout.x += pipe_ctx->plane_res.scl_data.recout.width;
}
- if (!pipe_ctx->stream->ctx->dc->config.enable_windowed_mpo_odm) {
- if (pipe_ctx->plane_res.scl_data.viewport.height < MIN_VIEWPORT_SIZE ||
- pipe_ctx->plane_res.scl_data.viewport.width < MIN_VIEWPORT_SIZE)
- res = false;
- } else {
- /* Clamp minimum viewport size */
- if (pipe_ctx->plane_res.scl_data.viewport.height < MIN_VIEWPORT_SIZE)
- pipe_ctx->plane_res.scl_data.viewport.height = MIN_VIEWPORT_SIZE;
- if (pipe_ctx->plane_res.scl_data.viewport.width < MIN_VIEWPORT_SIZE)
- pipe_ctx->plane_res.scl_data.viewport.width = MIN_VIEWPORT_SIZE;
+ /* Clamp minimum viewport size */
+ if (pipe_ctx->plane_res.scl_data.viewport.height < MIN_VIEWPORT_SIZE)
+ pipe_ctx->plane_res.scl_data.viewport.height = MIN_VIEWPORT_SIZE;
+ if (pipe_ctx->plane_res.scl_data.viewport.width < MIN_VIEWPORT_SIZE)
+ pipe_ctx->plane_res.scl_data.viewport.width = MIN_VIEWPORT_SIZE;
+#ifdef CONFIG_DRM_AMD_DC_FP
}
-
+#endif
DC_LOG_SCALER("%s pipe %d:\nViewport: height:%d width:%d x:%d y:%d Recout: height:%d width:%d x:%d y:%d HACTIVE:%d VACTIVE:%d\n"
"src_rect: height:%d width:%d x:%d y:%d dst_rect: height:%d width:%d x:%d y:%d clip_rect: height:%d width:%d x:%d y:%d\n",
__func__,
@@ -1200,6 +1680,62 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
return res;
}
+bool resource_can_pipe_disable_cursor(struct pipe_ctx *pipe_ctx)
+{
+ struct pipe_ctx *test_pipe, *split_pipe;
+ struct rect r1 = pipe_ctx->plane_res.scl_data.recout;
+ int r1_right, r1_bottom;
+ int cur_layer = pipe_ctx->plane_state->layer_index;
+
+ reverse_adjust_recout_for_visual_confirm(&r1, pipe_ctx);
+ r1_right = r1.x + r1.width;
+ r1_bottom = r1.y + r1.height;
+
+ /**
+ * Disable the cursor if there's another pipe above this with a
+ * plane that contains this pipe's viewport to prevent double cursor
+ * and incorrect scaling artifacts.
+ */
+ for (test_pipe = pipe_ctx->top_pipe; test_pipe;
+ test_pipe = test_pipe->top_pipe) {
+ struct rect r2;
+ int r2_right, r2_bottom;
+ // Skip invisible layer and pipe-split plane on same layer
+ if (!test_pipe->plane_state ||
+ !test_pipe->plane_state->visible ||
+ test_pipe->plane_state->layer_index == cur_layer)
+ continue;
+
+ r2 = test_pipe->plane_res.scl_data.recout;
+ reverse_adjust_recout_for_visual_confirm(&r2, test_pipe);
+ r2_right = r2.x + r2.width;
+ r2_bottom = r2.y + r2.height;
+
+ /**
+ * There is another half plane on same layer because of
+ * pipe-split, merge together per same height.
+ */
+ for (split_pipe = pipe_ctx->top_pipe; split_pipe;
+ split_pipe = split_pipe->top_pipe)
+ if (split_pipe->plane_state->layer_index == test_pipe->plane_state->layer_index) {
+ struct rect r2_half;
+
+ r2_half = split_pipe->plane_res.scl_data.recout;
+ reverse_adjust_recout_for_visual_confirm(&r2_half, split_pipe);
+ r2.x = min(r2_half.x, r2.x);
+ r2.width = r2.width + r2_half.width;
+ r2_right = r2.x + r2.width;
+ r2_bottom = min(r2_bottom, r2_half.y + r2_half.height);
+ break;
+ }
+
+ if (r1.x >= r2.x && r1.y >= r2.y && r1_right <= r2_right && r1_bottom <= r2_bottom)
+ return true;
+ }
+
+ return false;
+}
+
enum dc_status resource_build_scaling_params_for_context(
const struct dc *dc,
@@ -1217,7 +1753,7 @@ enum dc_status resource_build_scaling_params_for_context(
return DC_OK;
}
-struct pipe_ctx *find_idle_secondary_pipe(
+struct pipe_ctx *resource_find_free_secondary_pipe_legacy(
struct resource_context *res_ctx,
const struct resource_pool *pool,
const struct pipe_ctx *primary_pipe)
@@ -1277,28 +1813,715 @@ struct pipe_ctx *find_idle_secondary_pipe(
return secondary_pipe;
}
-struct pipe_ctx *resource_get_head_pipe_for_stream(
+int resource_find_free_pipe_used_as_sec_opp_head_by_cur_otg_master(
+ const struct resource_context *cur_res_ctx,
+ struct resource_context *new_res_ctx,
+ const struct pipe_ctx *cur_otg_master)
+{
+ const struct pipe_ctx *cur_sec_opp_head = cur_otg_master->next_odm_pipe;
+ struct pipe_ctx *new_pipe;
+ int free_pipe_idx = FREE_PIPE_INDEX_NOT_FOUND;
+
+ while (cur_sec_opp_head) {
+ new_pipe = &new_res_ctx->pipe_ctx[cur_sec_opp_head->pipe_idx];
+ if (resource_is_pipe_type(new_pipe, FREE_PIPE)) {
+ free_pipe_idx = cur_sec_opp_head->pipe_idx;
+ break;
+ }
+ cur_sec_opp_head = cur_sec_opp_head->next_odm_pipe;
+ }
+
+ return free_pipe_idx;
+}
+
+int resource_find_free_pipe_used_in_cur_mpc_blending_tree(
+ const struct resource_context *cur_res_ctx,
+ struct resource_context *new_res_ctx,
+ const struct pipe_ctx *cur_opp_head)
+{
+ const struct pipe_ctx *cur_sec_dpp = cur_opp_head->bottom_pipe;
+ struct pipe_ctx *new_pipe;
+ int free_pipe_idx = FREE_PIPE_INDEX_NOT_FOUND;
+
+ while (cur_sec_dpp) {
+ /* find a free pipe used in current opp blend tree,
+ * this is to avoid MPO pipe switching to different opp blending
+ * tree
+ */
+ new_pipe = &new_res_ctx->pipe_ctx[cur_sec_dpp->pipe_idx];
+ if (resource_is_pipe_type(new_pipe, FREE_PIPE)) {
+ free_pipe_idx = cur_sec_dpp->pipe_idx;
+ break;
+ }
+ cur_sec_dpp = cur_sec_dpp->bottom_pipe;
+ }
+
+ return free_pipe_idx;
+}
+
+int recource_find_free_pipe_not_used_in_cur_res_ctx(
+ const struct resource_context *cur_res_ctx,
+ struct resource_context *new_res_ctx,
+ const struct resource_pool *pool)
+{
+ int free_pipe_idx = FREE_PIPE_INDEX_NOT_FOUND;
+ const struct pipe_ctx *new_pipe, *cur_pipe;
+ int i;
+
+ for (i = 0; i < pool->pipe_count; i++) {
+ cur_pipe = &cur_res_ctx->pipe_ctx[i];
+ new_pipe = &new_res_ctx->pipe_ctx[i];
+
+ if (resource_is_pipe_type(cur_pipe, FREE_PIPE) &&
+ resource_is_pipe_type(new_pipe, FREE_PIPE)) {
+ free_pipe_idx = i;
+ break;
+ }
+ }
+
+ return free_pipe_idx;
+}
+
+int recource_find_free_pipe_used_as_otg_master_in_cur_res_ctx(
+ const struct resource_context *cur_res_ctx,
+ struct resource_context *new_res_ctx,
+ const struct resource_pool *pool)
+{
+ int free_pipe_idx = FREE_PIPE_INDEX_NOT_FOUND;
+ const struct pipe_ctx *new_pipe, *cur_pipe;
+ int i;
+
+ for (i = 0; i < pool->pipe_count; i++) {
+ cur_pipe = &cur_res_ctx->pipe_ctx[i];
+ new_pipe = &new_res_ctx->pipe_ctx[i];
+
+ if (resource_is_pipe_type(cur_pipe, OTG_MASTER) &&
+ resource_is_pipe_type(new_pipe, FREE_PIPE)) {
+ free_pipe_idx = i;
+ break;
+ }
+ }
+
+ return free_pipe_idx;
+}
+
+int resource_find_free_pipe_used_as_cur_sec_dpp(
+ const struct resource_context *cur_res_ctx,
+ struct resource_context *new_res_ctx,
+ const struct resource_pool *pool)
+{
+ int free_pipe_idx = FREE_PIPE_INDEX_NOT_FOUND;
+ const struct pipe_ctx *new_pipe, *cur_pipe;
+ int i;
+
+ for (i = 0; i < pool->pipe_count; i++) {
+ cur_pipe = &cur_res_ctx->pipe_ctx[i];
+ new_pipe = &new_res_ctx->pipe_ctx[i];
+
+ if (resource_is_pipe_type(cur_pipe, DPP_PIPE) &&
+ !resource_is_pipe_type(cur_pipe, OPP_HEAD) &&
+ resource_is_pipe_type(new_pipe, FREE_PIPE)) {
+ free_pipe_idx = i;
+ break;
+ }
+ }
+
+ return free_pipe_idx;
+}
+
+int resource_find_free_pipe_used_as_cur_sec_dpp_in_mpcc_combine(
+ const struct resource_context *cur_res_ctx,
+ struct resource_context *new_res_ctx,
+ const struct resource_pool *pool)
+{
+ int free_pipe_idx = FREE_PIPE_INDEX_NOT_FOUND;
+ const struct pipe_ctx *new_pipe, *cur_pipe;
+ int i;
+
+ for (i = 0; i < pool->pipe_count; i++) {
+ cur_pipe = &cur_res_ctx->pipe_ctx[i];
+ new_pipe = &new_res_ctx->pipe_ctx[i];
+
+ if (resource_is_pipe_type(cur_pipe, DPP_PIPE) &&
+ !resource_is_pipe_type(cur_pipe, OPP_HEAD) &&
+ resource_get_mpc_slice_index(cur_pipe) > 0 &&
+ resource_is_pipe_type(new_pipe, FREE_PIPE)) {
+ free_pipe_idx = i;
+ break;
+ }
+ }
+
+ return free_pipe_idx;
+}
+
+int resource_find_any_free_pipe(struct resource_context *new_res_ctx,
+ const struct resource_pool *pool)
+{
+ int free_pipe_idx = FREE_PIPE_INDEX_NOT_FOUND;
+ const struct pipe_ctx *new_pipe;
+ int i;
+
+ for (i = 0; i < pool->pipe_count; i++) {
+ new_pipe = &new_res_ctx->pipe_ctx[i];
+
+ if (resource_is_pipe_type(new_pipe, FREE_PIPE)) {
+ free_pipe_idx = i;
+ break;
+ }
+ }
+
+ return free_pipe_idx;
+}
+
+bool resource_is_pipe_type(const struct pipe_ctx *pipe_ctx, enum pipe_type type)
+{
+ switch (type) {
+ case OTG_MASTER:
+ return !pipe_ctx->prev_odm_pipe &&
+ !pipe_ctx->top_pipe &&
+ pipe_ctx->stream;
+ case OPP_HEAD:
+ return !pipe_ctx->top_pipe && pipe_ctx->stream;
+ case DPP_PIPE:
+ return pipe_ctx->plane_state && pipe_ctx->stream;
+ case FREE_PIPE:
+ return !pipe_ctx->plane_state && !pipe_ctx->stream;
+ default:
+ return false;
+ }
+}
+
+struct pipe_ctx *resource_get_otg_master_for_stream(
struct resource_context *res_ctx,
- struct dc_stream_state *stream)
+ const struct dc_stream_state *stream)
{
int i;
for (i = 0; i < MAX_PIPES; i++) {
- if (res_ctx->pipe_ctx[i].stream == stream
- && !res_ctx->pipe_ctx[i].top_pipe
- && !res_ctx->pipe_ctx[i].prev_odm_pipe)
+ if (res_ctx->pipe_ctx[i].stream == stream &&
+ resource_is_pipe_type(&res_ctx->pipe_ctx[i], OTG_MASTER))
return &res_ctx->pipe_ctx[i];
}
return NULL;
}
-static struct pipe_ctx *resource_get_tail_pipe(
+int resource_get_opp_heads_for_otg_master(const struct pipe_ctx *otg_master,
struct resource_context *res_ctx,
- struct pipe_ctx *head_pipe)
+ struct pipe_ctx *opp_heads[MAX_PIPES])
{
- struct pipe_ctx *tail_pipe;
+ struct pipe_ctx *opp_head = &res_ctx->pipe_ctx[otg_master->pipe_idx];
+ struct dc *dc = otg_master->stream->ctx->dc;
+ int i = 0;
+
+ DC_LOGGER_INIT(dc->ctx->logger);
- tail_pipe = head_pipe->bottom_pipe;
+ if (!resource_is_pipe_type(otg_master, OTG_MASTER)) {
+ DC_LOG_WARNING("%s called from a non OTG master, something "
+ "is wrong in the pipe configuration",
+ __func__);
+ ASSERT(0);
+ return 0;
+ }
+ while (opp_head) {
+ ASSERT(i < MAX_PIPES);
+ opp_heads[i++] = opp_head;
+ opp_head = opp_head->next_odm_pipe;
+ }
+ return i;
+}
+
+int resource_get_dpp_pipes_for_opp_head(const struct pipe_ctx *opp_head,
+ struct resource_context *res_ctx,
+ struct pipe_ctx *dpp_pipes[MAX_PIPES])
+{
+ struct pipe_ctx *pipe = &res_ctx->pipe_ctx[opp_head->pipe_idx];
+ int i = 0;
+
+ if (!resource_is_pipe_type(opp_head, OPP_HEAD)) {
+ ASSERT(0);
+ return 0;
+ }
+ while (pipe && resource_is_pipe_type(pipe, DPP_PIPE)) {
+ ASSERT(i < MAX_PIPES);
+ dpp_pipes[i++] = pipe;
+ pipe = pipe->bottom_pipe;
+ }
+ return i;
+}
+
+int resource_get_dpp_pipes_for_plane(const struct dc_plane_state *plane,
+ struct resource_context *res_ctx,
+ struct pipe_ctx *dpp_pipes[MAX_PIPES])
+{
+ int i = 0, j;
+ struct pipe_ctx *pipe;
+
+ for (j = 0; j < MAX_PIPES; j++) {
+ pipe = &res_ctx->pipe_ctx[j];
+ if (pipe->plane_state == plane && pipe->prev_odm_pipe == NULL) {
+ if (resource_is_pipe_type(pipe, OPP_HEAD) ||
+ pipe->top_pipe->plane_state != plane)
+ break;
+ }
+ }
+
+ if (j < MAX_PIPES) {
+ if (pipe->next_odm_pipe)
+ while (pipe) {
+ dpp_pipes[i++] = pipe;
+ pipe = pipe->next_odm_pipe;
+ }
+ else
+ while (pipe && pipe->plane_state == plane) {
+ dpp_pipes[i++] = pipe;
+ pipe = pipe->bottom_pipe;
+ }
+ }
+ return i;
+}
+
+struct pipe_ctx *resource_get_otg_master(const struct pipe_ctx *pipe_ctx)
+{
+ struct pipe_ctx *otg_master = resource_get_opp_head(pipe_ctx);
+
+ while (otg_master->prev_odm_pipe)
+ otg_master = otg_master->prev_odm_pipe;
+ return otg_master;
+}
+
+struct pipe_ctx *resource_get_opp_head(const struct pipe_ctx *pipe_ctx)
+{
+ struct pipe_ctx *opp_head = (struct pipe_ctx *) pipe_ctx;
+
+ ASSERT(!resource_is_pipe_type(opp_head, FREE_PIPE));
+ while (opp_head->top_pipe)
+ opp_head = opp_head->top_pipe;
+ return opp_head;
+}
+
+struct pipe_ctx *resource_get_primary_dpp_pipe(const struct pipe_ctx *dpp_pipe)
+{
+ struct pipe_ctx *pri_dpp_pipe = (struct pipe_ctx *) dpp_pipe;
+
+ ASSERT(resource_is_pipe_type(dpp_pipe, DPP_PIPE));
+ while (pri_dpp_pipe->prev_odm_pipe)
+ pri_dpp_pipe = pri_dpp_pipe->prev_odm_pipe;
+ while (pri_dpp_pipe->top_pipe &&
+ pri_dpp_pipe->top_pipe->plane_state == pri_dpp_pipe->plane_state)
+ pri_dpp_pipe = pri_dpp_pipe->top_pipe;
+ return pri_dpp_pipe;
+}
+
+
+int resource_get_mpc_slice_index(const struct pipe_ctx *pipe_ctx)
+{
+ struct pipe_ctx *split_pipe = pipe_ctx->top_pipe;
+ int index = 0;
+
+ while (split_pipe && split_pipe->plane_state == pipe_ctx->plane_state) {
+ index++;
+ split_pipe = split_pipe->top_pipe;
+ }
+
+ return index;
+}
+
+int resource_get_mpc_slice_count(const struct pipe_ctx *pipe)
+{
+ int mpc_split_count = 1;
+ const struct pipe_ctx *other_pipe = pipe->bottom_pipe;
+
+ while (other_pipe && other_pipe->plane_state == pipe->plane_state) {
+ mpc_split_count++;
+ other_pipe = other_pipe->bottom_pipe;
+ }
+ other_pipe = pipe->top_pipe;
+ while (other_pipe && other_pipe->plane_state == pipe->plane_state) {
+ mpc_split_count++;
+ other_pipe = other_pipe->top_pipe;
+ }
+
+ return mpc_split_count;
+}
+
+int resource_get_odm_slice_count(const struct pipe_ctx *pipe)
+{
+ int odm_split_count = 1;
+
+ pipe = resource_get_otg_master(pipe);
+
+ while (pipe->next_odm_pipe) {
+ odm_split_count++;
+ pipe = pipe->next_odm_pipe;
+ }
+ return odm_split_count;
+}
+
+int resource_get_odm_slice_index(const struct pipe_ctx *pipe_ctx)
+{
+ int index = 0;
+
+ pipe_ctx = resource_get_opp_head(pipe_ctx);
+ if (!pipe_ctx)
+ return 0;
+
+ while (pipe_ctx->prev_odm_pipe) {
+ index++;
+ pipe_ctx = pipe_ctx->prev_odm_pipe;
+ }
+
+ return index;
+}
+
+int resource_get_odm_slice_dst_width(struct pipe_ctx *otg_master,
+ bool is_last_segment)
+{
+ const struct dc_crtc_timing *timing;
+ int count;
+ int h_active;
+ int width;
+ bool two_pixel_alignment_required = false;
+
+ if (!otg_master || !otg_master->stream)
+ return 0;
+
+ timing = &otg_master->stream->timing;
+ count = resource_get_odm_slice_count(otg_master);
+ h_active = timing->h_addressable +
+ timing->h_border_left +
+ timing->h_border_right +
+ otg_master->dsc_padding_params.dsc_hactive_padding;
+ width = h_active / count;
+
+ if (otg_master->stream_res.tg)
+ two_pixel_alignment_required =
+ otg_master->stream_res.tg->funcs->is_two_pixels_per_container(timing) ||
+ /*
+ * 422 is sub-sampled horizontally. 1 set of chromas
+ * (Cb/Cr) is shared for 2 lumas (i.e 2 Y values).
+ * Therefore even if 422 is still 1 pixel per container,
+ * ODM segment width still needs to be 2 pixel aligned.
+ */
+ timing->pixel_encoding == PIXEL_ENCODING_YCBCR422;
+ if ((width % 2) && two_pixel_alignment_required)
+ width++;
+
+ return is_last_segment ?
+ h_active - width * (count - 1) :
+ width;
+}
+
+struct rect resource_get_odm_slice_dst_rect(struct pipe_ctx *pipe_ctx)
+{
+ const struct dc_stream_state *stream = pipe_ctx->stream;
+ bool is_last_odm_slice = pipe_ctx->next_odm_pipe == NULL;
+ struct pipe_ctx *otg_master = resource_get_otg_master(pipe_ctx);
+ int odm_slice_idx = resource_get_odm_slice_index(pipe_ctx);
+ int odm_segment_offset = resource_get_odm_slice_dst_width(otg_master, false);
+ struct rect odm_slice_dst;
+
+ odm_slice_dst.x = odm_segment_offset * odm_slice_idx;
+ odm_slice_dst.width = resource_get_odm_slice_dst_width(otg_master, is_last_odm_slice);
+ odm_slice_dst.y = 0;
+ odm_slice_dst.height = stream->timing.v_addressable +
+ stream->timing.v_border_bottom +
+ stream->timing.v_border_top;
+
+ return odm_slice_dst;
+}
+
+struct rect resource_get_odm_slice_src_rect(struct pipe_ctx *pipe_ctx)
+{
+ struct rect odm_slice_dst;
+ struct rect odm_slice_src;
+ struct pipe_ctx *opp_head = resource_get_opp_head(pipe_ctx);
+ struct output_pixel_processor *opp = opp_head->stream_res.opp;
+ uint32_t left_edge_extra_pixel_count;
+
+ odm_slice_dst = resource_get_odm_slice_dst_rect(opp_head);
+ odm_slice_src = odm_slice_dst;
+
+ if (opp && opp->funcs->opp_get_left_edge_extra_pixel_count)
+ left_edge_extra_pixel_count =
+ opp->funcs->opp_get_left_edge_extra_pixel_count(
+ opp, pipe_ctx->stream->timing.pixel_encoding,
+ resource_is_pipe_type(opp_head, OTG_MASTER));
+ else
+ left_edge_extra_pixel_count = 0;
+
+ odm_slice_src.x -= left_edge_extra_pixel_count;
+ odm_slice_src.width += left_edge_extra_pixel_count;
+
+ return odm_slice_src;
+}
+
+bool resource_is_pipe_topology_changed(const struct dc_state *state_a,
+ const struct dc_state *state_b)
+{
+ int i;
+ const struct pipe_ctx *pipe_a, *pipe_b;
+
+ if (state_a->stream_count != state_b->stream_count)
+ return true;
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ pipe_a = &state_a->res_ctx.pipe_ctx[i];
+ pipe_b = &state_b->res_ctx.pipe_ctx[i];
+
+ if (pipe_a->stream && !pipe_b->stream)
+ return true;
+ else if (!pipe_a->stream && pipe_b->stream)
+ return true;
+
+ if (pipe_a->plane_state && !pipe_b->plane_state)
+ return true;
+ else if (!pipe_a->plane_state && pipe_b->plane_state)
+ return true;
+
+ if (pipe_a->bottom_pipe && pipe_b->bottom_pipe) {
+ if (pipe_a->bottom_pipe->pipe_idx != pipe_b->bottom_pipe->pipe_idx)
+ return true;
+ if ((pipe_a->bottom_pipe->plane_state == pipe_a->plane_state) &&
+ (pipe_b->bottom_pipe->plane_state != pipe_b->plane_state))
+ return true;
+ else if ((pipe_a->bottom_pipe->plane_state != pipe_a->plane_state) &&
+ (pipe_b->bottom_pipe->plane_state == pipe_b->plane_state))
+ return true;
+ } else if (pipe_a->bottom_pipe || pipe_b->bottom_pipe) {
+ return true;
+ }
+
+ if (pipe_a->next_odm_pipe && pipe_b->next_odm_pipe) {
+ if (pipe_a->next_odm_pipe->pipe_idx != pipe_b->next_odm_pipe->pipe_idx)
+ return true;
+ } else if (pipe_a->next_odm_pipe || pipe_b->next_odm_pipe) {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool resource_is_odm_topology_changed(const struct pipe_ctx *otg_master_a,
+ const struct pipe_ctx *otg_master_b)
+{
+ const struct pipe_ctx *opp_head_a = otg_master_a;
+ const struct pipe_ctx *opp_head_b = otg_master_b;
+
+ if (!resource_is_pipe_type(otg_master_a, OTG_MASTER) ||
+ !resource_is_pipe_type(otg_master_b, OTG_MASTER))
+ return true;
+
+ while (opp_head_a && opp_head_b) {
+ if (opp_head_a->stream_res.opp != opp_head_b->stream_res.opp)
+ return true;
+ if ((opp_head_a->next_odm_pipe && !opp_head_b->next_odm_pipe) ||
+ (!opp_head_a->next_odm_pipe && opp_head_b->next_odm_pipe))
+ return true;
+ opp_head_a = opp_head_a->next_odm_pipe;
+ opp_head_b = opp_head_b->next_odm_pipe;
+ }
+
+ return false;
+}
+
+/*
+ * Sample log:
+ * pipe topology update
+ * ________________________
+ * | plane0 slice0 stream0|
+ * |DPP0----OPP0----OTG0----| <--- case 0 (OTG master pipe with plane)
+ * | plane1 | | |
+ * |DPP1----| | | <--- case 5 (DPP pipe not in last slice)
+ * | plane0 slice1 | |
+ * |DPP2----OPP2----| | <--- case 2 (OPP head pipe with plane)
+ * | plane1 | |
+ * |DPP3----| | <--- case 4 (DPP pipe in last slice)
+ * | slice0 stream1|
+ * |DPG4----OPP4----OTG4----| <--- case 1 (OTG master pipe without plane)
+ * | slice1 | |
+ * |DPG5----OPP5----| | <--- case 3 (OPP head pipe without plane)
+ * |________________________|
+ */
+
+static void resource_log_pipe(struct dc *dc, struct pipe_ctx *pipe,
+ int stream_idx, int slice_idx, int plane_idx, int slice_count,
+ bool is_primary, bool is_phantom_pipe)
+{
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ // new format for logging: bit storing code
+ if (slice_idx == 0 && plane_idx == 0 && is_primary) {
+ /* case 0 (OTG master pipe with plane) */
+ DC_LOG_DC(" | plane%d slice%d stream%d|",
+ plane_idx, slice_idx, stream_idx);
+ DC_LOG_DC(" |DPP%d----OPP%d----OTG%d----|",
+ pipe->plane_res.dpp->inst,
+ pipe->stream_res.opp->inst,
+ pipe->stream_res.tg->inst);
+ capture_pipe_topology_data(dc, plane_idx, slice_idx, stream_idx,
+ pipe->plane_res.dpp->inst,
+ pipe->stream_res.opp->inst,
+ pipe->stream_res.tg->inst, is_phantom_pipe);
+ } else if (slice_idx == 0 && plane_idx == -1) {
+ /* case 1 (OTG master pipe without plane) */
+ DC_LOG_DC(" | slice%d stream%d|",
+ slice_idx, stream_idx);
+ DC_LOG_DC(" |DPG%d----OPP%d----OTG%d----|",
+ pipe->stream_res.opp->inst,
+ pipe->stream_res.opp->inst,
+ pipe->stream_res.tg->inst);
+ capture_pipe_topology_data(dc, 0xF, slice_idx, stream_idx,
+ pipe->plane_res.dpp->inst,
+ pipe->stream_res.opp->inst,
+ pipe->stream_res.tg->inst, is_phantom_pipe);
+ } else if (slice_idx != 0 && plane_idx == 0 && is_primary) {
+ /* case 2 (OPP head pipe with plane) */
+ DC_LOG_DC(" | plane%d slice%d | |",
+ plane_idx, slice_idx);
+ DC_LOG_DC(" |DPP%d----OPP%d----| |",
+ pipe->plane_res.dpp->inst,
+ pipe->stream_res.opp->inst);
+ capture_pipe_topology_data(dc, plane_idx, slice_idx, stream_idx,
+ pipe->plane_res.dpp->inst,
+ pipe->stream_res.opp->inst,
+ pipe->stream_res.tg->inst, is_phantom_pipe);
+ } else if (slice_idx != 0 && plane_idx == -1) {
+ /* case 3 (OPP head pipe without plane) */
+ DC_LOG_DC(" | slice%d | |", slice_idx);
+ DC_LOG_DC(" |DPG%d----OPP%d----| |",
+ pipe->plane_res.dpp->inst,
+ pipe->stream_res.opp->inst);
+ capture_pipe_topology_data(dc, 0xF, slice_idx, stream_idx,
+ pipe->plane_res.dpp->inst,
+ pipe->stream_res.opp->inst,
+ pipe->stream_res.tg->inst, is_phantom_pipe);
+ } else if (slice_idx == slice_count - 1) {
+ /* case 4 (DPP pipe in last slice) */
+ DC_LOG_DC(" | plane%d | |", plane_idx);
+ DC_LOG_DC(" |DPP%d----| |",
+ pipe->plane_res.dpp->inst);
+ capture_pipe_topology_data(dc, plane_idx, slice_idx, stream_idx,
+ pipe->plane_res.dpp->inst,
+ pipe->stream_res.opp->inst,
+ pipe->stream_res.tg->inst, is_phantom_pipe);
+ } else {
+ /* case 5 (DPP pipe not in last slice) */
+ DC_LOG_DC(" | plane%d | | |", plane_idx);
+ DC_LOG_DC(" |DPP%d----| | |",
+ pipe->plane_res.dpp->inst);
+ capture_pipe_topology_data(dc, plane_idx, slice_idx, stream_idx,
+ pipe->plane_res.dpp->inst,
+ pipe->stream_res.opp->inst,
+ pipe->stream_res.tg->inst, is_phantom_pipe);
+ }
+}
+
+static void resource_log_pipe_for_stream(struct dc *dc, struct dc_state *state,
+ struct pipe_ctx *otg_master, int stream_idx, bool is_phantom_pipe)
+{
+ struct pipe_ctx *opp_heads[MAX_PIPES];
+ struct pipe_ctx *dpp_pipes[MAX_PIPES];
+
+ int slice_idx, dpp_idx, plane_idx, slice_count, dpp_count;
+ bool is_primary;
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ slice_count = resource_get_opp_heads_for_otg_master(otg_master,
+ &state->res_ctx, opp_heads);
+ for (slice_idx = 0; slice_idx < slice_count; slice_idx++) {
+ plane_idx = -1;
+ if (opp_heads[slice_idx]->plane_state) {
+ dpp_count = resource_get_dpp_pipes_for_opp_head(
+ opp_heads[slice_idx],
+ &state->res_ctx,
+ dpp_pipes);
+ for (dpp_idx = 0; dpp_idx < dpp_count; dpp_idx++) {
+ is_primary = !dpp_pipes[dpp_idx]->top_pipe ||
+ dpp_pipes[dpp_idx]->top_pipe->plane_state != dpp_pipes[dpp_idx]->plane_state;
+ if (is_primary)
+ plane_idx++;
+ resource_log_pipe(dc, dpp_pipes[dpp_idx],
+ stream_idx, slice_idx,
+ plane_idx, slice_count,
+ is_primary, is_phantom_pipe);
+ }
+ } else {
+ resource_log_pipe(dc, opp_heads[slice_idx],
+ stream_idx, slice_idx, plane_idx,
+ slice_count, true, is_phantom_pipe);
+ }
+
+ }
+}
+
+static int resource_stream_to_stream_idx(struct dc_state *state,
+ struct dc_stream_state *stream)
+{
+ int i, stream_idx = -1;
+
+ for (i = 0; i < state->stream_count; i++)
+ if (state->streams[i] == stream) {
+ stream_idx = i;
+ break;
+ }
+
+ /* never return negative array index */
+ if (stream_idx == -1) {
+ ASSERT(0);
+ return 0;
+ }
+
+ return stream_idx;
+}
+
+void resource_log_pipe_topology_update(struct dc *dc, struct dc_state *state)
+{
+ struct pipe_ctx *otg_master;
+ int stream_idx, phantom_stream_idx;
+ DC_LOGGER_INIT(dc->ctx->logger);
+ bool is_phantom_pipe = false;
+
+ // Start a new snapshot for this topology update
+ start_new_topology_snapshot(dc, state);
+
+ DC_LOG_DC(" pipe topology update");
+ DC_LOG_DC(" ________________________");
+ for (stream_idx = 0; stream_idx < state->stream_count; stream_idx++) {
+ if (state->streams[stream_idx]->is_phantom)
+ continue;
+
+ otg_master = resource_get_otg_master_for_stream(
+ &state->res_ctx, state->streams[stream_idx]);
+
+ if (!otg_master)
+ continue;
+
+ resource_log_pipe_for_stream(dc, state, otg_master, stream_idx, is_phantom_pipe);
+ }
+ if (state->phantom_stream_count > 0) {
+ is_phantom_pipe = true;
+ DC_LOG_DC(" | (phantom pipes) |");
+ for (stream_idx = 0; stream_idx < state->stream_count; stream_idx++) {
+ if (state->stream_status[stream_idx].mall_stream_config.type != SUBVP_MAIN)
+ continue;
+
+ phantom_stream_idx = resource_stream_to_stream_idx(state,
+ state->stream_status[stream_idx].mall_stream_config.paired_stream);
+ otg_master = resource_get_otg_master_for_stream(
+ &state->res_ctx, state->streams[phantom_stream_idx]);
+ if (!otg_master)
+ continue;
+
+ resource_log_pipe_for_stream(dc, state, otg_master, stream_idx, is_phantom_pipe);
+ }
+ }
+ DC_LOG_DC(" |________________________|\n");
+}
+
+static struct pipe_ctx *get_tail_pipe(
+ struct pipe_ctx *head_pipe)
+{
+ struct pipe_ctx *tail_pipe = head_pipe->bottom_pipe;
while (tail_pipe) {
head_pipe = tail_pipe;
@@ -1308,41 +2531,65 @@ static struct pipe_ctx *resource_get_tail_pipe(
return head_pipe;
}
-/*
- * A free_pipe for a stream is defined here as a pipe
- * that has no surface attached yet
- */
-static struct pipe_ctx *acquire_free_pipe_for_head(
+static struct pipe_ctx *get_last_opp_head(
+ struct pipe_ctx *opp_head)
+{
+ ASSERT(resource_is_pipe_type(opp_head, OPP_HEAD));
+ while (opp_head->next_odm_pipe)
+ opp_head = opp_head->next_odm_pipe;
+ return opp_head;
+}
+
+static struct pipe_ctx *get_last_dpp_pipe_in_mpcc_combine(
+ struct pipe_ctx *dpp_pipe)
+{
+ ASSERT(resource_is_pipe_type(dpp_pipe, DPP_PIPE));
+ while (dpp_pipe->bottom_pipe &&
+ dpp_pipe->plane_state == dpp_pipe->bottom_pipe->plane_state)
+ dpp_pipe = dpp_pipe->bottom_pipe;
+ return dpp_pipe;
+}
+
+static bool update_pipe_params_after_odm_slice_count_change(
+ struct pipe_ctx *otg_master,
struct dc_state *context,
- const struct resource_pool *pool,
- struct pipe_ctx *head_pipe)
+ const struct resource_pool *pool)
{
int i;
- struct resource_context *res_ctx = &context->res_ctx;
+ struct pipe_ctx *pipe;
+ bool result = true;
- if (!head_pipe->plane_state)
- return head_pipe;
-
- /* Re-use pipe already acquired for this stream if available*/
- for (i = pool->pipe_count - 1; i >= 0; i--) {
- if (res_ctx->pipe_ctx[i].stream == head_pipe->stream &&
- !res_ctx->pipe_ctx[i].plane_state) {
- return &res_ctx->pipe_ctx[i];
- }
+ for (i = 0; i < pool->pipe_count && result; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ if (pipe->stream == otg_master->stream && pipe->plane_state)
+ result = resource_build_scaling_params(pipe);
}
- /*
- * At this point we have no re-useable pipe for this stream and we need
- * to acquire an idle one to satisfy the request
- */
+ if (pool->funcs->build_pipe_pix_clk_params)
+ pool->funcs->build_pipe_pix_clk_params(otg_master);
- if (!pool->funcs->acquire_idle_pipe_for_layer)
- return NULL;
+ resource_build_test_pattern_params(&context->res_ctx, otg_master);
- return pool->funcs->acquire_idle_pipe_for_layer(context, pool, head_pipe->stream);
+ return result;
+}
+
+static bool update_pipe_params_after_mpc_slice_count_change(
+ const struct dc_plane_state *plane,
+ struct dc_state *context,
+ const struct resource_pool *pool)
+{
+ int i;
+ struct pipe_ctx *pipe;
+ bool result = true;
+
+ for (i = 0; i < pool->pipe_count && result; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ if (pipe->plane_state == plane)
+ result = resource_build_scaling_params(pipe);
+ }
+ return result;
}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
static int acquire_first_split_pipe(
struct resource_context *res_ctx,
const struct resource_pool *pool,
@@ -1375,117 +2622,529 @@ static int acquire_first_split_pipe(
return i;
}
}
+ return FREE_PIPE_INDEX_NOT_FOUND;
+}
+
+static void update_stream_engine_usage(
+ struct resource_context *res_ctx,
+ const struct resource_pool *pool,
+ struct stream_encoder *stream_enc,
+ bool acquired)
+{
+ int i;
+
+ for (i = 0; i < pool->stream_enc_count; i++) {
+ if (pool->stream_enc[i] == stream_enc)
+ res_ctx->is_stream_enc_acquired[i] = acquired;
+ }
+}
+
+static void update_hpo_dp_stream_engine_usage(
+ struct resource_context *res_ctx,
+ const struct resource_pool *pool,
+ struct hpo_dp_stream_encoder *hpo_dp_stream_enc,
+ bool acquired)
+{
+ int i;
+
+ for (i = 0; i < pool->hpo_dp_stream_enc_count; i++) {
+ if (pool->hpo_dp_stream_enc[i] == hpo_dp_stream_enc)
+ res_ctx->is_hpo_dp_stream_enc_acquired[i] = acquired;
+ }
+}
+
+static inline int find_acquired_hpo_dp_link_enc_for_link(
+ const struct resource_context *res_ctx,
+ const struct dc_link *link)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(res_ctx->hpo_dp_link_enc_to_link_idx); i++)
+ if (res_ctx->hpo_dp_link_enc_ref_cnts[i] > 0 &&
+ res_ctx->hpo_dp_link_enc_to_link_idx[i] == link->link_index)
+ return i;
+
return -1;
}
-#endif
-bool dc_add_plane_to_context(
- const struct dc *dc,
- struct dc_stream_state *stream,
- struct dc_plane_state *plane_state,
- struct dc_state *context)
+static inline int find_free_hpo_dp_link_enc(const struct resource_context *res_ctx,
+ const struct resource_pool *pool)
{
int i;
- struct resource_pool *pool = dc->res_pool;
- struct pipe_ctx *head_pipe, *tail_pipe, *free_pipe;
- struct dc_stream_status *stream_status = NULL;
- for (i = 0; i < context->stream_count; i++)
- if (context->streams[i] == stream) {
- stream_status = &context->stream_status[i];
+ for (i = 0; i < ARRAY_SIZE(res_ctx->hpo_dp_link_enc_ref_cnts); i++)
+ if (res_ctx->hpo_dp_link_enc_ref_cnts[i] == 0)
break;
- }
- if (stream_status == NULL) {
- dm_error("Existing stream not found; failed to attach surface!\n");
- return false;
- }
+ return (i < ARRAY_SIZE(res_ctx->hpo_dp_link_enc_ref_cnts) &&
+ i < pool->hpo_dp_link_enc_count) ? i : -1;
+}
- if (stream_status->plane_count == MAX_SURFACE_NUM) {
- dm_error("Surface: can not attach plane_state %p! Maximum is: %d\n",
- plane_state, MAX_SURFACE_NUM);
- return false;
+static inline void acquire_hpo_dp_link_enc(
+ struct resource_context *res_ctx,
+ unsigned int link_index,
+ int enc_index)
+{
+ res_ctx->hpo_dp_link_enc_to_link_idx[enc_index] = link_index;
+ res_ctx->hpo_dp_link_enc_ref_cnts[enc_index] = 1;
+}
+
+static inline void retain_hpo_dp_link_enc(
+ struct resource_context *res_ctx,
+ int enc_index)
+{
+ res_ctx->hpo_dp_link_enc_ref_cnts[enc_index]++;
+}
+
+static inline void release_hpo_dp_link_enc(
+ struct resource_context *res_ctx,
+ int enc_index)
+{
+ ASSERT(res_ctx->hpo_dp_link_enc_ref_cnts[enc_index] > 0);
+ res_ctx->hpo_dp_link_enc_ref_cnts[enc_index]--;
+}
+
+static bool add_hpo_dp_link_enc_to_ctx(struct resource_context *res_ctx,
+ const struct resource_pool *pool,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_stream_state *stream)
+{
+ int enc_index;
+
+ enc_index = find_acquired_hpo_dp_link_enc_for_link(res_ctx, stream->link);
+
+ if (enc_index >= 0) {
+ retain_hpo_dp_link_enc(res_ctx, enc_index);
+ } else {
+ enc_index = find_free_hpo_dp_link_enc(res_ctx, pool);
+ if (enc_index >= 0)
+ acquire_hpo_dp_link_enc(res_ctx, stream->link->link_index, enc_index);
}
- head_pipe = resource_get_head_pipe_for_stream(&context->res_ctx, stream);
+ if (enc_index >= 0)
+ pipe_ctx->link_res.hpo_dp_link_enc = pool->hpo_dp_link_enc[enc_index];
- if (!head_pipe) {
- dm_error("Head pipe not found for stream_state %p !\n", stream);
- return false;
+ return pipe_ctx->link_res.hpo_dp_link_enc != NULL;
+}
+
+static void remove_hpo_dp_link_enc_from_ctx(struct resource_context *res_ctx,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_stream_state *stream)
+{
+ int enc_index;
+
+ enc_index = find_acquired_hpo_dp_link_enc_for_link(res_ctx, stream->link);
+
+ if (enc_index >= 0) {
+ release_hpo_dp_link_enc(res_ctx, enc_index);
+ pipe_ctx->link_res.hpo_dp_link_enc = NULL;
}
+}
+
+static inline int find_acquired_dio_link_enc_for_link(
+ const struct resource_context *res_ctx,
+ const struct dc_link *link)
+{
+ int i;
- /* retain new surface, but only once per stream */
- dc_plane_state_retain(plane_state);
+ for (i = 0; i < ARRAY_SIZE(res_ctx->dio_link_enc_ref_cnts); i++)
+ if (res_ctx->dio_link_enc_ref_cnts[i] > 0 &&
+ res_ctx->dio_link_enc_to_link_idx[i] == link->link_index)
+ return i;
- while (head_pipe) {
- free_pipe = acquire_free_pipe_for_head(context, pool, head_pipe);
+ return -1;
+}
- #if defined(CONFIG_DRM_AMD_DC_DCN)
- if (!free_pipe) {
- int pipe_idx = acquire_first_split_pipe(&context->res_ctx, pool, stream);
- if (pipe_idx >= 0)
- free_pipe = &context->res_ctx.pipe_ctx[pipe_idx];
- }
- #endif
- if (!free_pipe) {
- dc_plane_state_release(plane_state);
- return false;
+static inline int find_fixed_dio_link_enc(const struct dc_link *link)
+{
+ /* the 8b10b dp phy can only use fixed link encoder */
+ return link->eng_id;
+}
+
+static inline int find_free_dio_link_enc(const struct resource_context *res_ctx,
+ const struct dc_link *link, const struct resource_pool *pool, struct dc_stream_state *stream)
+{
+ int i, j = -1;
+ int stream_enc_inst = -1;
+ int enc_count = pool->dig_link_enc_count;
+
+ /* Find stream encoder instance for the stream */
+ if (stream) {
+ for (i = 0; i < pool->pipe_count; i++) {
+ if ((res_ctx->pipe_ctx[i].stream == stream) &&
+ (res_ctx->pipe_ctx[i].stream_res.stream_enc != NULL)) {
+ stream_enc_inst = res_ctx->pipe_ctx[i].stream_res.stream_enc->id;
+ break;
+ }
}
+ }
+
+ /* Assign dpia preferred > stream enc instance > available */
+ for (i = 0; i < enc_count; i++) {
+ if (res_ctx->dio_link_enc_ref_cnts[i] == 0) {
+ if (j == -1)
+ j = i;
- free_pipe->plane_state = plane_state;
-
- if (head_pipe != free_pipe) {
- tail_pipe = resource_get_tail_pipe(&context->res_ctx, head_pipe);
- ASSERT(tail_pipe);
- free_pipe->stream_res.tg = tail_pipe->stream_res.tg;
- free_pipe->stream_res.abm = tail_pipe->stream_res.abm;
- free_pipe->stream_res.opp = tail_pipe->stream_res.opp;
- free_pipe->stream_res.stream_enc = tail_pipe->stream_res.stream_enc;
- free_pipe->stream_res.audio = tail_pipe->stream_res.audio;
- free_pipe->clock_source = tail_pipe->clock_source;
- free_pipe->top_pipe = tail_pipe;
- tail_pipe->bottom_pipe = free_pipe;
- if (!free_pipe->next_odm_pipe && tail_pipe->next_odm_pipe && tail_pipe->next_odm_pipe->bottom_pipe) {
- free_pipe->next_odm_pipe = tail_pipe->next_odm_pipe->bottom_pipe;
- tail_pipe->next_odm_pipe->bottom_pipe->prev_odm_pipe = free_pipe;
+ if (link->dpia_preferred_eng_id == i) {
+ j = i;
+ break;
}
- if (!free_pipe->prev_odm_pipe && tail_pipe->prev_odm_pipe && tail_pipe->prev_odm_pipe->bottom_pipe) {
- free_pipe->prev_odm_pipe = tail_pipe->prev_odm_pipe->bottom_pipe;
- tail_pipe->prev_odm_pipe->bottom_pipe->next_odm_pipe = free_pipe;
+
+ if (stream_enc_inst == i) {
+ j = stream_enc_inst;
}
}
- head_pipe = head_pipe->next_odm_pipe;
}
- /* assign new surfaces*/
- stream_status->plane_states[stream_status->plane_count] = plane_state;
+ return j;
+}
- stream_status->plane_count++;
+static inline void acquire_dio_link_enc(
+ struct resource_context *res_ctx,
+ unsigned int link_index,
+ int enc_index)
+{
+ res_ctx->dio_link_enc_to_link_idx[enc_index] = link_index;
+ res_ctx->dio_link_enc_ref_cnts[enc_index] = 1;
+}
- return true;
+static inline void retain_dio_link_enc(
+ struct resource_context *res_ctx,
+ int enc_index)
+{
+ res_ctx->dio_link_enc_ref_cnts[enc_index]++;
}
-bool dc_remove_plane_from_context(
- const struct dc *dc,
- struct dc_stream_state *stream,
+static inline void release_dio_link_enc(
+ struct resource_context *res_ctx,
+ int enc_index)
+{
+ ASSERT(res_ctx->dio_link_enc_ref_cnts[enc_index] > 0);
+ res_ctx->dio_link_enc_ref_cnts[enc_index]--;
+}
+
+static bool is_dio_enc_acquired_by_other_link(const struct dc_link *link,
+ int enc_index,
+ int *link_index)
+{
+ const struct dc *dc = link->dc;
+ const struct resource_context *res_ctx = &dc->current_state->res_ctx;
+
+ /* pass the link_index that acquired the enc_index */
+ if (res_ctx->dio_link_enc_ref_cnts[enc_index] > 0 &&
+ res_ctx->dio_link_enc_to_link_idx[enc_index] != link->link_index) {
+ *link_index = res_ctx->dio_link_enc_to_link_idx[enc_index];
+ return true;
+ }
+
+ return false;
+}
+
+static void swap_dio_link_enc_to_muxable_ctx(struct dc_state *context,
+ const struct resource_pool *pool,
+ int new_encoder,
+ int old_encoder)
+{
+ struct resource_context *res_ctx = &context->res_ctx;
+ int stream_count = context->stream_count;
+ int i = 0;
+
+ res_ctx->dio_link_enc_ref_cnts[new_encoder] = res_ctx->dio_link_enc_ref_cnts[old_encoder];
+ res_ctx->dio_link_enc_to_link_idx[new_encoder] = res_ctx->dio_link_enc_to_link_idx[old_encoder];
+ res_ctx->dio_link_enc_ref_cnts[old_encoder] = 0;
+
+ for (i = 0; i < stream_count; i++) {
+ struct dc_stream_state *stream = context->streams[i];
+ struct pipe_ctx *pipe_ctx = resource_get_otg_master_for_stream(&context->res_ctx, stream);
+
+ if (pipe_ctx && pipe_ctx->link_res.dio_link_enc == pool->link_encoders[old_encoder])
+ pipe_ctx->link_res.dio_link_enc = pool->link_encoders[new_encoder];
+ }
+}
+
+static bool add_dio_link_enc_to_ctx(const struct dc *dc,
+ struct dc_state *context,
+ const struct resource_pool *pool,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_stream_state *stream)
+{
+ struct resource_context *res_ctx = &context->res_ctx;
+ int enc_index;
+
+ enc_index = find_acquired_dio_link_enc_for_link(res_ctx, stream->link);
+
+ if (enc_index >= 0) {
+ retain_dio_link_enc(res_ctx, enc_index);
+ } else {
+ if (stream->link->is_dig_mapping_flexible)
+ enc_index = find_free_dio_link_enc(res_ctx, stream->link, pool, stream);
+ else {
+ int link_index = 0;
+
+ enc_index = find_fixed_dio_link_enc(stream->link);
+ /* Fixed mapping link can only use its fixed link encoder.
+ * If the encoder is acquired by other link then get a new free encoder and swap the new
+ * one into the acquiring link.
+ */
+ if (enc_index >= 0 && is_dio_enc_acquired_by_other_link(stream->link, enc_index, &link_index)) {
+ int new_enc_index = find_free_dio_link_enc(res_ctx, dc->links[link_index], pool, stream);
+
+ if (new_enc_index >= 0)
+ swap_dio_link_enc_to_muxable_ctx(context, pool, new_enc_index, enc_index);
+ else
+ return false;
+ }
+ }
+
+ if (enc_index >= 0)
+ acquire_dio_link_enc(res_ctx, stream->link->link_index, enc_index);
+ }
+
+ if (enc_index >= 0)
+ pipe_ctx->link_res.dio_link_enc = pool->link_encoders[enc_index];
+
+ return pipe_ctx->link_res.dio_link_enc != NULL;
+}
+
+static void remove_dio_link_enc_from_ctx(struct resource_context *res_ctx,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_stream_state *stream)
+{
+ int enc_index = -1;
+
+ if (stream->link)
+ enc_index = find_acquired_dio_link_enc_for_link(res_ctx, stream->link);
+
+ if (enc_index >= 0) {
+ release_dio_link_enc(res_ctx, enc_index);
+ pipe_ctx->link_res.dio_link_enc = NULL;
+ }
+}
+
+static int get_num_of_free_pipes(const struct resource_pool *pool, const struct dc_state *context)
+{
+ int i;
+ int count = 0;
+
+ for (i = 0; i < pool->pipe_count; i++)
+ if (resource_is_pipe_type(&context->res_ctx.pipe_ctx[i], FREE_PIPE))
+ count++;
+ return count;
+}
+
+enum dc_status resource_add_otg_master_for_stream_output(struct dc_state *new_ctx,
+ const struct resource_pool *pool,
+ struct dc_stream_state *stream)
+{
+ struct dc *dc = stream->ctx->dc;
+
+ return dc->res_pool->funcs->add_stream_to_ctx(dc, new_ctx, stream);
+}
+
+void resource_remove_otg_master_for_stream_output(struct dc_state *context,
+ const struct resource_pool *pool,
+ struct dc_stream_state *stream)
+{
+ struct pipe_ctx *otg_master = resource_get_otg_master_for_stream(
+ &context->res_ctx, stream);
+
+ if (!otg_master)
+ return;
+
+ ASSERT(resource_get_odm_slice_count(otg_master) == 1);
+ ASSERT(otg_master->plane_state == NULL);
+ ASSERT(otg_master->stream_res.stream_enc);
+ update_stream_engine_usage(
+ &context->res_ctx,
+ pool,
+ otg_master->stream_res.stream_enc,
+ false);
+
+ if (stream->ctx->dc->link_srv->dp_is_128b_132b_signal(otg_master)) {
+ update_hpo_dp_stream_engine_usage(
+ &context->res_ctx, pool,
+ otg_master->stream_res.hpo_dp_stream_enc,
+ false);
+ remove_hpo_dp_link_enc_from_ctx(
+ &context->res_ctx, otg_master, stream);
+ }
+
+ if (stream->ctx->dc->config.unify_link_enc_assignment)
+ remove_dio_link_enc_from_ctx(&context->res_ctx, otg_master, stream);
+
+ if (otg_master->stream_res.audio)
+ update_audio_usage(
+ &context->res_ctx,
+ pool,
+ otg_master->stream_res.audio,
+ false);
+
+ resource_unreference_clock_source(&context->res_ctx,
+ pool,
+ otg_master->clock_source);
+
+ if (pool->funcs->remove_stream_from_ctx)
+ pool->funcs->remove_stream_from_ctx(
+ stream->ctx->dc, context, stream);
+
+ memset(otg_master, 0, sizeof(*otg_master));
+}
+
+/* For each OPP head of an OTG master, add top plane at plane index 0.
+ *
+ * In the following example, the stream has 2 ODM slices without a top plane.
+ * By adding a plane 0 to OPP heads, we are configuring our hardware to render
+ * plane 0 by using each OPP head's DPP.
+ *
+ * Inter-pipe Relation (Before Adding Plane)
+ * __________________________________________________
+ * |PIPE IDX| DPP PIPES | OPP HEADS | OTG MASTER |
+ * | | | slice 0 | |
+ * | 0 | |blank ----ODM----------- |
+ * | | | slice 1 | | |
+ * | 1 | |blank ---- | |
+ * |________|_______________|___________|_____________|
+ *
+ * Inter-pipe Relation (After Adding Plane)
+ * __________________________________________________
+ * |PIPE IDX| DPP PIPES | OPP HEADS | OTG MASTER |
+ * | | plane 0 | slice 0 | |
+ * | 0 | -------------------------ODM----------- |
+ * | | plane 0 | slice 1 | | |
+ * | 1 | ------------------------- | |
+ * |________|_______________|___________|_____________|
+ */
+static bool add_plane_to_opp_head_pipes(struct pipe_ctx *otg_master_pipe,
struct dc_plane_state *plane_state,
struct dc_state *context)
{
- int i;
- struct dc_stream_status *stream_status = NULL;
- struct resource_pool *pool = dc->res_pool;
+ struct pipe_ctx *opp_head_pipe = otg_master_pipe;
- for (i = 0; i < context->stream_count; i++)
- if (context->streams[i] == stream) {
- stream_status = &context->stream_status[i];
- break;
+ while (opp_head_pipe) {
+ if (opp_head_pipe->plane_state) {
+ ASSERT(0);
+ return false;
}
+ opp_head_pipe->plane_state = plane_state;
+ opp_head_pipe = opp_head_pipe->next_odm_pipe;
+ }
+
+ return true;
+}
+
+/* For each OPP head of an OTG master, acquire a secondary DPP pipe and add
+ * the plane. So the plane is added to all ODM slices associated with the OTG
+ * master pipe in the bottom layer.
+ *
+ * In the following example, the stream has 2 ODM slices and a top plane 0.
+ * By acquiring secondary DPP pipes and adding a plane 1, we are configuring our
+ * hardware to render the plane 1 by acquiring a new pipe for each ODM slice and
+ * render plane 1 using new pipes' DPP in the Z axis below plane 0.
+ *
+ * Inter-pipe Relation (Before Adding Plane)
+ * __________________________________________________
+ * |PIPE IDX| DPP PIPES | OPP HEADS | OTG MASTER |
+ * | | plane 0 | slice 0 | |
+ * | 0 | -------------------------ODM----------- |
+ * | | plane 0 | slice 1 | | |
+ * | 1 | ------------------------- | |
+ * |________|_______________|___________|_____________|
+ *
+ * Inter-pipe Relation (After Acquiring and Adding Plane)
+ * __________________________________________________
+ * |PIPE IDX| DPP PIPES | OPP HEADS | OTG MASTER |
+ * | | plane 0 | slice 0 | |
+ * | 0 | -------------MPC---------ODM----------- |
+ * | | plane 1 | | | | |
+ * | 2 | ------------- | | | |
+ * | | plane 0 | slice 1 | | |
+ * | 1 | -------------MPC--------- | |
+ * | | plane 1 | | | |
+ * | 3 | ------------- | | |
+ * |________|_______________|___________|_____________|
+ */
+static bool acquire_secondary_dpp_pipes_and_add_plane(
+ struct pipe_ctx *otg_master_pipe,
+ struct dc_plane_state *plane_state,
+ struct dc_state *new_ctx,
+ struct dc_state *cur_ctx,
+ struct resource_pool *pool)
+{
+ struct pipe_ctx *sec_pipe, *tail_pipe;
+ struct pipe_ctx *opp_heads[MAX_PIPES];
+ int opp_head_count;
+ int i;
+
+ if (!pool->funcs->acquire_free_pipe_as_secondary_dpp_pipe) {
+ ASSERT(0);
+ return false;
+ }
- if (stream_status == NULL) {
- dm_error("Existing stream not found; failed to remove plane.\n");
+ opp_head_count = resource_get_opp_heads_for_otg_master(otg_master_pipe,
+ &new_ctx->res_ctx, opp_heads);
+ if (get_num_of_free_pipes(pool, new_ctx) < opp_head_count)
+ /* not enough free pipes */
return false;
+
+ for (i = 0; i < opp_head_count; i++) {
+ sec_pipe = pool->funcs->acquire_free_pipe_as_secondary_dpp_pipe(
+ cur_ctx,
+ new_ctx,
+ pool,
+ opp_heads[i]);
+ ASSERT(sec_pipe);
+ sec_pipe->plane_state = plane_state;
+
+ /* establish pipe relationship */
+ tail_pipe = get_tail_pipe(opp_heads[i]);
+ tail_pipe->bottom_pipe = sec_pipe;
+ sec_pipe->top_pipe = tail_pipe;
+ sec_pipe->bottom_pipe = NULL;
+ if (tail_pipe->prev_odm_pipe) {
+ ASSERT(tail_pipe->prev_odm_pipe->bottom_pipe);
+ sec_pipe->prev_odm_pipe = tail_pipe->prev_odm_pipe->bottom_pipe;
+ tail_pipe->prev_odm_pipe->bottom_pipe->next_odm_pipe = sec_pipe;
+ } else {
+ sec_pipe->prev_odm_pipe = NULL;
+ }
}
+ return true;
+}
+
+bool resource_append_dpp_pipes_for_plane_composition(
+ struct dc_state *new_ctx,
+ struct dc_state *cur_ctx,
+ struct resource_pool *pool,
+ struct pipe_ctx *otg_master_pipe,
+ struct dc_plane_state *plane_state)
+{
+ bool success;
+
+ if (otg_master_pipe->plane_state == NULL)
+ success = add_plane_to_opp_head_pipes(otg_master_pipe,
+ plane_state, new_ctx);
+ else
+ success = acquire_secondary_dpp_pipes_and_add_plane(
+ otg_master_pipe, plane_state, new_ctx,
+ cur_ctx, pool);
+ if (success) {
+ /* when appending a plane mpc slice count changes from 0 to 1 */
+ success = update_pipe_params_after_mpc_slice_count_change(
+ plane_state, new_ctx, pool);
+ if (!success)
+ resource_remove_dpp_pipes_for_plane_composition(new_ctx,
+ pool, plane_state);
+ }
+
+ return success;
+}
+
+void resource_remove_dpp_pipes_for_plane_composition(
+ struct dc_state *context,
+ const struct resource_pool *pool,
+ const struct dc_plane_state *plane_state)
+{
+ int i;
- /* release pipe for plane*/
for (i = pool->pipe_count - 1; i >= 0; i--) {
struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
@@ -1510,110 +3169,340 @@ bool dc_remove_plane_from_context(
memset(pipe_ctx, 0, sizeof(*pipe_ctx));
}
}
+}
+/*
+ * Increase ODM slice count by 1 by acquiring pipes and adding a new ODM slice
+ * at the last index.
+ * return - true if a new ODM slice is added and required pipes are acquired.
+ * false if new_ctx is no longer a valid state after new ODM slice is added.
+ *
+ * This is achieved by duplicating MPC blending tree from previous ODM slice.
+ * In the following example, we have a single MPC tree and 1 ODM slice 0. We
+ * want to add a new odm slice by duplicating the MPC blending tree and add
+ * ODM slice 1.
+ *
+ * Inter-pipe Relation (Before Acquiring and Adding ODM Slice)
+ * __________________________________________________
+ * |PIPE IDX| DPP PIPES | OPP HEADS | OTG MASTER |
+ * | | plane 0 | slice 0 | |
+ * | 0 | -------------MPC---------ODM----------- |
+ * | | plane 1 | | | |
+ * | 1 | ------------- | | |
+ * |________|_______________|___________|_____________|
+ *
+ * Inter-pipe Relation (After Acquiring and Adding ODM Slice)
+ * __________________________________________________
+ * |PIPE IDX| DPP PIPES | OPP HEADS | OTG MASTER |
+ * | | plane 0 | slice 0 | |
+ * | 0 | -------------MPC---------ODM----------- |
+ * | | plane 1 | | | | |
+ * | 1 | ------------- | | | |
+ * | | plane 0 | slice 1 | | |
+ * | 2 | -------------MPC--------- | |
+ * | | plane 1 | | | |
+ * | 3 | ------------- | | |
+ * |________|_______________|___________|_____________|
+ */
+static bool acquire_pipes_and_add_odm_slice(
+ struct pipe_ctx *otg_master_pipe,
+ struct dc_state *new_ctx,
+ const struct dc_state *cur_ctx,
+ const struct resource_pool *pool)
+{
+ struct pipe_ctx *last_opp_head = get_last_opp_head(otg_master_pipe);
+ struct pipe_ctx *new_opp_head;
+ struct pipe_ctx *last_top_dpp_pipe, *last_bottom_dpp_pipe,
+ *new_top_dpp_pipe, *new_bottom_dpp_pipe;
- for (i = 0; i < stream_status->plane_count; i++) {
- if (stream_status->plane_states[i] == plane_state) {
-
- dc_plane_state_release(stream_status->plane_states[i]);
- break;
- }
- }
-
- if (i == stream_status->plane_count) {
- dm_error("Existing plane_state not found; failed to detach it!\n");
+ if (!pool->funcs->acquire_free_pipe_as_secondary_opp_head) {
+ ASSERT(0);
return false;
}
+ new_opp_head = pool->funcs->acquire_free_pipe_as_secondary_opp_head(
+ cur_ctx, new_ctx, pool,
+ otg_master_pipe);
+ if (!new_opp_head)
+ return false;
- stream_status->plane_count--;
-
- /* Start at the plane we've just released, and move all the planes one index forward to "trim" the array */
- for (; i < stream_status->plane_count; i++)
- stream_status->plane_states[i] = stream_status->plane_states[i + 1];
+ last_opp_head->next_odm_pipe = new_opp_head;
+ new_opp_head->prev_odm_pipe = last_opp_head;
+ new_opp_head->next_odm_pipe = NULL;
+ new_opp_head->plane_state = last_opp_head->plane_state;
+ last_top_dpp_pipe = last_opp_head;
+ new_top_dpp_pipe = new_opp_head;
+
+ while (last_top_dpp_pipe->bottom_pipe) {
+ last_bottom_dpp_pipe = last_top_dpp_pipe->bottom_pipe;
+ new_bottom_dpp_pipe = pool->funcs->acquire_free_pipe_as_secondary_dpp_pipe(
+ cur_ctx, new_ctx, pool,
+ new_opp_head);
+ if (!new_bottom_dpp_pipe)
+ return false;
- stream_status->plane_states[stream_status->plane_count] = NULL;
+ new_bottom_dpp_pipe->plane_state = last_bottom_dpp_pipe->plane_state;
+ new_top_dpp_pipe->bottom_pipe = new_bottom_dpp_pipe;
+ new_bottom_dpp_pipe->top_pipe = new_top_dpp_pipe;
+ last_bottom_dpp_pipe->next_odm_pipe = new_bottom_dpp_pipe;
+ new_bottom_dpp_pipe->prev_odm_pipe = last_bottom_dpp_pipe;
+ new_bottom_dpp_pipe->next_odm_pipe = NULL;
+ last_top_dpp_pipe = last_bottom_dpp_pipe;
+ }
return true;
}
-bool dc_rem_all_planes_for_stream(
- const struct dc *dc,
- struct dc_stream_state *stream,
- struct dc_state *context)
+/*
+ * Decrease ODM slice count by 1 by releasing pipes and removing the ODM slice
+ * at the last index.
+ * return - true if the last ODM slice is removed and related pipes are
+ * released. false if there is no removable ODM slice.
+ *
+ * In the following example, we have 2 MPC trees and ODM slice 0 and slice 1.
+ * We want to remove the last ODM i.e slice 1. We are releasing secondary DPP
+ * pipe 3 and OPP head pipe 2.
+ *
+ * Inter-pipe Relation (Before Releasing and Removing ODM Slice)
+ * __________________________________________________
+ * |PIPE IDX| DPP PIPES | OPP HEADS | OTG MASTER |
+ * | | plane 0 | slice 0 | |
+ * | 0 | -------------MPC---------ODM----------- |
+ * | | plane 1 | | | | |
+ * | 1 | ------------- | | | |
+ * | | plane 0 | slice 1 | | |
+ * | 2 | -------------MPC--------- | |
+ * | | plane 1 | | | |
+ * | 3 | ------------- | | |
+ * |________|_______________|___________|_____________|
+ *
+ * Inter-pipe Relation (After Releasing and Removing ODM Slice)
+ * __________________________________________________
+ * |PIPE IDX| DPP PIPES | OPP HEADS | OTG MASTER |
+ * | | plane 0 | slice 0 | |
+ * | 0 | -------------MPC---------ODM----------- |
+ * | | plane 1 | | | |
+ * | 1 | ------------- | | |
+ * |________|_______________|___________|_____________|
+ */
+static bool release_pipes_and_remove_odm_slice(
+ struct pipe_ctx *otg_master_pipe,
+ struct dc_state *context,
+ const struct resource_pool *pool)
{
- int i, old_plane_count;
- struct dc_stream_status *stream_status = NULL;
- struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 };
+ struct pipe_ctx *last_opp_head = get_last_opp_head(otg_master_pipe);
+ struct pipe_ctx *tail_pipe = get_tail_pipe(last_opp_head);
- for (i = 0; i < context->stream_count; i++)
- if (context->streams[i] == stream) {
- stream_status = &context->stream_status[i];
- break;
- }
+ if (!pool->funcs->release_pipe) {
+ ASSERT(0);
+ return false;
+ }
- if (stream_status == NULL) {
- dm_error("Existing stream %p not found!\n", stream);
+ if (resource_is_pipe_type(last_opp_head, OTG_MASTER))
return false;
+
+ while (tail_pipe->top_pipe) {
+ tail_pipe->prev_odm_pipe->next_odm_pipe = NULL;
+ tail_pipe = tail_pipe->top_pipe;
+ pool->funcs->release_pipe(context, tail_pipe->bottom_pipe, pool);
+ tail_pipe->bottom_pipe = NULL;
}
+ last_opp_head->prev_odm_pipe->next_odm_pipe = NULL;
+ pool->funcs->release_pipe(context, last_opp_head, pool);
- old_plane_count = stream_status->plane_count;
+ return true;
+}
- for (i = 0; i < old_plane_count; i++)
- del_planes[i] = stream_status->plane_states[i];
+/*
+ * Increase MPC slice count by 1 by acquiring a new DPP pipe and add it as the
+ * last MPC slice of the plane associated with dpp_pipe.
+ *
+ * return - true if a new MPC slice is added and required pipes are acquired.
+ * false if new_ctx is no longer a valid state after new MPC slice is added.
+ *
+ * In the following example, we add a new MPC slice for plane 0 into the
+ * new_ctx. To do so we pass pipe 0 as dpp_pipe. The function acquires a new DPP
+ * pipe 2 for plane 0 as the bottom most pipe for plane 0.
+ *
+ * Inter-pipe Relation (Before Acquiring and Adding MPC Slice)
+ * __________________________________________________
+ * |PIPE IDX| DPP PIPES | OPP HEADS | OTG MASTER |
+ * | | plane 0 | | |
+ * | 0 | -------------MPC----------------------- |
+ * | | plane 1 | | | |
+ * | 1 | ------------- | | |
+ * |________|_______________|___________|_____________|
+ *
+ * Inter-pipe Relation (After Acquiring and Adding MPC Slice)
+ * __________________________________________________
+ * |PIPE IDX| DPP PIPES | OPP HEADS | OTG MASTER |
+ * | | plane 0 | | |
+ * | 0 | -------------MPC----------------------- |
+ * | | plane 0 | | | |
+ * | 2 | ------------- | | |
+ * | | plane 1 | | | |
+ * | 1 | ------------- | | |
+ * |________|_______________|___________|_____________|
+ */
+static bool acquire_dpp_pipe_and_add_mpc_slice(
+ struct pipe_ctx *dpp_pipe,
+ struct dc_state *new_ctx,
+ const struct dc_state *cur_ctx,
+ const struct resource_pool *pool)
+{
+ struct pipe_ctx *last_dpp_pipe =
+ get_last_dpp_pipe_in_mpcc_combine(dpp_pipe);
+ struct pipe_ctx *opp_head = resource_get_opp_head(dpp_pipe);
+ struct pipe_ctx *new_dpp_pipe;
- for (i = 0; i < old_plane_count; i++)
- if (!dc_remove_plane_from_context(dc, stream, del_planes[i], context))
- return false;
+ if (!pool->funcs->acquire_free_pipe_as_secondary_dpp_pipe) {
+ ASSERT(0);
+ return false;
+ }
+ new_dpp_pipe = pool->funcs->acquire_free_pipe_as_secondary_dpp_pipe(
+ cur_ctx, new_ctx, pool, opp_head);
+ if (!new_dpp_pipe || resource_get_odm_slice_count(dpp_pipe) > 1)
+ return false;
+
+ new_dpp_pipe->bottom_pipe = last_dpp_pipe->bottom_pipe;
+ if (new_dpp_pipe->bottom_pipe)
+ new_dpp_pipe->bottom_pipe->top_pipe = new_dpp_pipe;
+ new_dpp_pipe->top_pipe = last_dpp_pipe;
+ last_dpp_pipe->bottom_pipe = new_dpp_pipe;
+ new_dpp_pipe->plane_state = last_dpp_pipe->plane_state;
return true;
}
-static bool add_all_planes_for_stream(
- const struct dc *dc,
- struct dc_stream_state *stream,
- const struct dc_validation_set set[],
- int set_count,
- struct dc_state *context)
+/*
+ * Reduce MPC slice count by 1 by releasing the bottom DPP pipe in MPCC combine
+ * with dpp_pipe and removing last MPC slice of the plane associated with
+ * dpp_pipe.
+ *
+ * return - true if the last MPC slice of the plane associated with dpp_pipe is
+ * removed and last DPP pipe in MPCC combine with dpp_pipe is released.
+ * false if there is no removable MPC slice.
+ *
+ * In the following example, we remove an MPC slice for plane 0 from the
+ * context. To do so we pass pipe 0 as dpp_pipe. The function releases pipe 1 as
+ * it is the last pipe for plane 0.
+ *
+ * Inter-pipe Relation (Before Releasing and Removing MPC Slice)
+ * __________________________________________________
+ * |PIPE IDX| DPP PIPES | OPP HEADS | OTG MASTER |
+ * | | plane 0 | | |
+ * | 0 | -------------MPC----------------------- |
+ * | | plane 0 | | | |
+ * | 1 | ------------- | | |
+ * | | plane 1 | | | |
+ * | 2 | ------------- | | |
+ * |________|_______________|___________|_____________|
+ *
+ * Inter-pipe Relation (After Releasing and Removing MPC Slice)
+ * __________________________________________________
+ * |PIPE IDX| DPP PIPES | OPP HEADS | OTG MASTER |
+ * | | plane 0 | | |
+ * | 0 | -------------MPC----------------------- |
+ * | | plane 1 | | | |
+ * | 2 | ------------- | | |
+ * |________|_______________|___________|_____________|
+ */
+static bool release_dpp_pipe_and_remove_mpc_slice(
+ struct pipe_ctx *dpp_pipe,
+ struct dc_state *context,
+ const struct resource_pool *pool)
{
- int i, j;
-
- for (i = 0; i < set_count; i++)
- if (set[i].stream == stream)
- break;
+ struct pipe_ctx *last_dpp_pipe =
+ get_last_dpp_pipe_in_mpcc_combine(dpp_pipe);
- if (i == set_count) {
- dm_error("Stream %p not found in set!\n", stream);
+ if (!pool->funcs->release_pipe) {
+ ASSERT(0);
return false;
}
- for (j = 0; j < set[i].plane_count; j++)
- if (!dc_add_plane_to_context(dc, stream, set[i].plane_states[j], context))
- return false;
+ if (resource_is_pipe_type(last_dpp_pipe, OPP_HEAD) ||
+ resource_get_odm_slice_count(dpp_pipe) > 1)
+ return false;
+
+ last_dpp_pipe->top_pipe->bottom_pipe = last_dpp_pipe->bottom_pipe;
+ if (last_dpp_pipe->bottom_pipe)
+ last_dpp_pipe->bottom_pipe->top_pipe = last_dpp_pipe->top_pipe;
+ pool->funcs->release_pipe(context, last_dpp_pipe, pool);
return true;
}
-bool dc_add_all_planes_for_stream(
- const struct dc *dc,
- struct dc_stream_state *stream,
- struct dc_plane_state * const *plane_states,
- int plane_count,
- struct dc_state *context)
+bool resource_update_pipes_for_stream_with_slice_count(
+ struct dc_state *new_ctx,
+ const struct dc_state *cur_ctx,
+ const struct resource_pool *pool,
+ const struct dc_stream_state *stream,
+ int new_slice_count)
{
- struct dc_validation_set set;
int i;
+ struct pipe_ctx *otg_master = resource_get_otg_master_for_stream(
+ &new_ctx->res_ctx, stream);
+ int cur_slice_count;
+ bool result = true;
+
+ if (!otg_master)
+ return false;
- set.stream = stream;
- set.plane_count = plane_count;
+ cur_slice_count = resource_get_odm_slice_count(otg_master);
- for (i = 0; i < plane_count; i++)
- set.plane_states[i] = plane_states[i];
+ if (new_slice_count == cur_slice_count)
+ return result;
- return add_all_planes_for_stream(dc, stream, &set, 1, context);
+ if (new_slice_count > cur_slice_count)
+ for (i = 0; i < new_slice_count - cur_slice_count && result; i++)
+ result = acquire_pipes_and_add_odm_slice(
+ otg_master, new_ctx, cur_ctx, pool);
+ else
+ for (i = 0; i < cur_slice_count - new_slice_count && result; i++)
+ result = release_pipes_and_remove_odm_slice(
+ otg_master, new_ctx, pool);
+ if (result)
+ result = update_pipe_params_after_odm_slice_count_change(
+ otg_master, new_ctx, pool);
+ return result;
}
-static bool is_timing_changed(struct dc_stream_state *cur_stream,
- struct dc_stream_state *new_stream)
+bool resource_update_pipes_for_plane_with_slice_count(
+ struct dc_state *new_ctx,
+ const struct dc_state *cur_ctx,
+ const struct resource_pool *pool,
+ const struct dc_plane_state *plane,
+ int new_slice_count)
+{
+ int i;
+ int dpp_pipe_count;
+ int cur_slice_count;
+ struct pipe_ctx *dpp_pipes[MAX_PIPES] = {0};
+ bool result = true;
+
+ dpp_pipe_count = resource_get_dpp_pipes_for_plane(plane,
+ &new_ctx->res_ctx, dpp_pipes);
+ ASSERT(dpp_pipe_count > 0);
+ cur_slice_count = resource_get_mpc_slice_count(dpp_pipes[0]);
+
+ if (new_slice_count == cur_slice_count)
+ return result;
+
+ if (new_slice_count > cur_slice_count)
+ for (i = 0; i < new_slice_count - cur_slice_count && result; i++)
+ result = acquire_dpp_pipe_and_add_mpc_slice(
+ dpp_pipes[0], new_ctx, cur_ctx, pool);
+ else
+ for (i = 0; i < cur_slice_count - new_slice_count && result; i++)
+ result = release_dpp_pipe_and_remove_mpc_slice(
+ dpp_pipes[0], new_ctx, pool);
+ if (result)
+ result = update_pipe_params_after_mpc_slice_count_change(
+ dpp_pipes[0]->plane_state, new_ctx, pool);
+ return result;
+}
+
+bool dc_is_timing_changed(struct dc_stream_state *cur_stream,
+ struct dc_stream_state *new_stream)
{
if (cur_stream == NULL)
return true;
@@ -1637,7 +3526,10 @@ static bool are_stream_backends_same(
if (stream_a == NULL || stream_b == NULL)
return false;
- if (is_timing_changed(stream_a, stream_b))
+ if (dc_is_timing_changed(stream_a, stream_b))
+ return false;
+
+ if (stream_a->signal != stream_b->signal)
return false;
if (stream_a->dpms_off != stream_b->dpms_off)
@@ -1657,6 +3549,8 @@ static bool are_stream_backends_same(
bool dc_is_stream_unchanged(
struct dc_stream_state *old_stream, struct dc_stream_state *stream)
{
+ if (!old_stream || !stream)
+ return false;
if (!are_stream_backends_same(old_stream, stream))
return false;
@@ -1664,8 +3558,8 @@ bool dc_is_stream_unchanged(
if (old_stream->ignore_msa_timing_param != stream->ignore_msa_timing_param)
return false;
- // Only Have Audio left to check whether it is same or not. This is a corner case for Tiled sinks
- if (old_stream->audio_info.mode_count != stream->audio_info.mode_count)
+ /*compare audio info*/
+ if (memcmp(&old_stream->audio_info, &stream->audio_info, sizeof(stream->audio_info)) != 0)
return false;
return true;
@@ -1674,8 +3568,8 @@ bool dc_is_stream_unchanged(
/*
* dc_is_stream_scaling_unchanged() - Compare scaling rectangles of two streams.
*/
-bool dc_is_stream_scaling_unchanged(
- struct dc_stream_state *old_stream, struct dc_stream_state *stream)
+bool dc_is_stream_scaling_unchanged(struct dc_stream_state *old_stream,
+ struct dc_stream_state *stream)
{
if (old_stream == stream)
return true;
@@ -1696,36 +3590,6 @@ bool dc_is_stream_scaling_unchanged(
return true;
}
-static void update_stream_engine_usage(
- struct resource_context *res_ctx,
- const struct resource_pool *pool,
- struct stream_encoder *stream_enc,
- bool acquired)
-{
- int i;
-
- for (i = 0; i < pool->stream_enc_count; i++) {
- if (pool->stream_enc[i] == stream_enc)
- res_ctx->is_stream_enc_acquired[i] = acquired;
- }
-}
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-static void update_hpo_dp_stream_engine_usage(
- struct resource_context *res_ctx,
- const struct resource_pool *pool,
- struct hpo_dp_stream_encoder *hpo_dp_stream_enc,
- bool acquired)
-{
- int i;
-
- for (i = 0; i < pool->hpo_dp_stream_enc_count; i++) {
- if (pool->hpo_dp_stream_enc[i] == hpo_dp_stream_enc)
- res_ctx->is_hpo_dp_stream_enc_acquired[i] = acquired;
- }
-}
-#endif
-
/* TODO: release audio object */
void update_audio_usage(
struct resource_context *res_ctx,
@@ -1740,37 +3604,6 @@ void update_audio_usage(
}
}
-static int acquire_first_free_pipe(
- struct resource_context *res_ctx,
- const struct resource_pool *pool,
- struct dc_stream_state *stream)
-{
- int i;
-
- for (i = 0; i < pool->pipe_count; i++) {
- if (!res_ctx->pipe_ctx[i].stream) {
- struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i];
-
- pipe_ctx->stream_res.tg = pool->timing_generators[i];
- pipe_ctx->plane_res.mi = pool->mis[i];
- pipe_ctx->plane_res.hubp = pool->hubps[i];
- pipe_ctx->plane_res.ipp = pool->ipps[i];
- pipe_ctx->plane_res.xfm = pool->transforms[i];
- pipe_ctx->plane_res.dpp = pool->dpps[i];
- pipe_ctx->stream_res.opp = pool->opps[i];
- if (pool->dpps[i])
- pipe_ctx->plane_res.mpcc_inst = pool->dpps[i]->inst;
- pipe_ctx->pipe_idx = i;
-
-
- pipe_ctx->stream = stream;
- return i;
- }
- }
- return -1;
-}
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
static struct hpo_dp_stream_encoder *find_first_free_match_hpo_dp_stream_enc_for_link(
struct resource_context *res_ctx,
const struct resource_pool *pool,
@@ -1788,7 +3621,6 @@ static struct hpo_dp_stream_encoder *find_first_free_match_hpo_dp_stream_enc_for
return NULL;
}
-#endif
static struct audio *find_first_free_audio(
struct resource_context *res_ctx,
@@ -1798,6 +3630,9 @@ static struct audio *find_first_free_audio(
{
int i, available_audio_count;
+ if (id == ENGINE_ID_UNKNOWN)
+ return NULL;
+
available_audio_count = pool->audio_count;
for (i = 0; i < available_audio_count; i++) {
@@ -1819,123 +3654,7 @@ static struct audio *find_first_free_audio(
return pool->audios[i];
}
}
- return 0;
-}
-
-/*
- * dc_add_stream_to_ctx() - Add a new dc_stream_state to a dc_state.
- */
-enum dc_status dc_add_stream_to_ctx(
- struct dc *dc,
- struct dc_state *new_ctx,
- struct dc_stream_state *stream)
-{
- enum dc_status res;
- DC_LOGGER_INIT(dc->ctx->logger);
-
- if (new_ctx->stream_count >= dc->res_pool->timing_generator_count) {
- DC_LOG_WARNING("Max streams reached, can't add stream %p !\n", stream);
- return DC_ERROR_UNEXPECTED;
- }
-
- new_ctx->streams[new_ctx->stream_count] = stream;
- dc_stream_retain(stream);
- new_ctx->stream_count++;
-
- res = dc->res_pool->funcs->add_stream_to_ctx(dc, new_ctx, stream);
- if (res != DC_OK)
- DC_LOG_WARNING("Adding stream %p to context failed with err %d!\n", stream, res);
-
- return res;
-}
-
-/*
- * dc_remove_stream_from_ctx() - Remove a stream from a dc_state.
- */
-enum dc_status dc_remove_stream_from_ctx(
- struct dc *dc,
- struct dc_state *new_ctx,
- struct dc_stream_state *stream)
-{
- int i;
- struct dc_context *dc_ctx = dc->ctx;
- struct pipe_ctx *del_pipe = resource_get_head_pipe_for_stream(&new_ctx->res_ctx, stream);
- struct pipe_ctx *odm_pipe;
-
- if (!del_pipe) {
- DC_ERROR("Pipe not found for stream %p !\n", stream);
- return DC_ERROR_UNEXPECTED;
- }
-
- odm_pipe = del_pipe->next_odm_pipe;
-
- /* Release primary pipe */
- ASSERT(del_pipe->stream_res.stream_enc);
- update_stream_engine_usage(
- &new_ctx->res_ctx,
- dc->res_pool,
- del_pipe->stream_res.stream_enc,
- false);
- /* Release link encoder from stream in new dc_state. */
- if (dc->res_pool->funcs->link_enc_unassign)
- dc->res_pool->funcs->link_enc_unassign(new_ctx, del_pipe->stream);
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (is_dp_128b_132b_signal(del_pipe)) {
- update_hpo_dp_stream_engine_usage(
- &new_ctx->res_ctx, dc->res_pool,
- del_pipe->stream_res.hpo_dp_stream_enc,
- false);
- }
-#endif
-
- if (del_pipe->stream_res.audio)
- update_audio_usage(
- &new_ctx->res_ctx,
- dc->res_pool,
- del_pipe->stream_res.audio,
- false);
-
- resource_unreference_clock_source(&new_ctx->res_ctx,
- dc->res_pool,
- del_pipe->clock_source);
-
- if (dc->res_pool->funcs->remove_stream_from_ctx)
- dc->res_pool->funcs->remove_stream_from_ctx(dc, new_ctx, stream);
-
- while (odm_pipe) {
- struct pipe_ctx *next_odm_pipe = odm_pipe->next_odm_pipe;
-
- memset(odm_pipe, 0, sizeof(*odm_pipe));
- odm_pipe = next_odm_pipe;
- }
- memset(del_pipe, 0, sizeof(*del_pipe));
-
- for (i = 0; i < new_ctx->stream_count; i++)
- if (new_ctx->streams[i] == stream)
- break;
-
- if (new_ctx->streams[i] != stream) {
- DC_ERROR("Context doesn't have stream %p !\n", stream);
- return DC_ERROR_UNEXPECTED;
- }
-
- dc_stream_release(new_ctx->streams[i]);
- new_ctx->stream_count--;
-
- /* Trim back arrays */
- for (; i < new_ctx->stream_count; i++) {
- new_ctx->streams[i] = new_ctx->streams[i + 1];
- new_ctx->stream_status[i] = new_ctx->stream_status[i + 1];
- }
-
- new_ctx->streams[new_ctx->stream_count] = NULL;
- memset(
- &new_ctx->stream_status[new_ctx->stream_count],
- 0,
- sizeof(new_ctx->stream_status[0]));
-
- return DC_OK;
+ return NULL;
}
static struct dc_stream_state *find_pll_sharable_stream(
@@ -1978,10 +3697,13 @@ static int get_norm_pix_clk(const struct dc_crtc_timing *timing)
break;
case COLOR_DEPTH_121212:
normalized_pix_clk = (pix_clk * 36) / 24;
- break;
+ break;
+ case COLOR_DEPTH_141414:
+ normalized_pix_clk = (pix_clk * 42) / 24;
+ break;
case COLOR_DEPTH_161616:
normalized_pix_clk = (pix_clk * 48) / 24;
- break;
+ break;
default:
ASSERT(0);
break;
@@ -2011,6 +3733,8 @@ static int acquire_resource_from_hw_enabled_state(
{
struct dc_link *link = stream->link;
unsigned int i, inst, tg_inst = 0;
+ uint32_t numPipes = 1;
+ uint32_t id_src[4] = {0};
/* Check for enabled DIG to identify enabled display */
if (!link->link_enc->funcs->is_dig_enabled(link->link_enc))
@@ -2040,56 +3764,194 @@ static int acquire_resource_from_hw_enabled_state(
struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[tg_inst];
pipe_ctx->stream_res.tg = pool->timing_generators[tg_inst];
- pipe_ctx->plane_res.mi = pool->mis[tg_inst];
- pipe_ctx->plane_res.hubp = pool->hubps[tg_inst];
- pipe_ctx->plane_res.ipp = pool->ipps[tg_inst];
- pipe_ctx->plane_res.xfm = pool->transforms[tg_inst];
- pipe_ctx->plane_res.dpp = pool->dpps[tg_inst];
- pipe_ctx->stream_res.opp = pool->opps[tg_inst];
+ id_src[0] = tg_inst;
+
+ if (pipe_ctx->stream_res.tg->funcs->get_optc_source)
+ pipe_ctx->stream_res.tg->funcs->get_optc_source(pipe_ctx->stream_res.tg,
+ &numPipes, &id_src[0], &id_src[1]);
+
+ if (id_src[0] == 0xf && id_src[1] == 0xf) {
+ id_src[0] = tg_inst;
+ numPipes = 1;
+ }
- if (pool->dpps[tg_inst]) {
- pipe_ctx->plane_res.mpcc_inst = pool->dpps[tg_inst]->inst;
+ for (i = 0; i < numPipes; i++) {
+ //Check if src id invalid
+ if (id_src[i] == 0xf)
+ return -1;
- // Read DPP->MPCC->OPP Pipe from HW State
- if (pool->mpc->funcs->read_mpcc_state) {
- struct mpcc_state s = {0};
+ pipe_ctx = &res_ctx->pipe_ctx[id_src[i]];
- pool->mpc->funcs->read_mpcc_state(pool->mpc, pipe_ctx->plane_res.mpcc_inst, &s);
+ pipe_ctx->stream_res.tg = pool->timing_generators[tg_inst];
+ pipe_ctx->plane_res.mi = pool->mis[id_src[i]];
+ pipe_ctx->plane_res.hubp = pool->hubps[id_src[i]];
+ pipe_ctx->plane_res.ipp = pool->ipps[id_src[i]];
+ pipe_ctx->plane_res.xfm = pool->transforms[id_src[i]];
+ pipe_ctx->plane_res.dpp = pool->dpps[id_src[i]];
+ pipe_ctx->stream_res.opp = pool->opps[id_src[i]];
- if (s.dpp_id < MAX_MPCC)
- pool->mpc->mpcc_array[pipe_ctx->plane_res.mpcc_inst].dpp_id = s.dpp_id;
+ if (pool->dpps[id_src[i]]) {
+ pipe_ctx->plane_res.mpcc_inst = pool->dpps[id_src[i]]->inst;
- if (s.bot_mpcc_id < MAX_MPCC)
- pool->mpc->mpcc_array[pipe_ctx->plane_res.mpcc_inst].mpcc_bot =
- &pool->mpc->mpcc_array[s.bot_mpcc_id];
+ if (pool->mpc->funcs->read_mpcc_state) {
+ struct mpcc_state s = {0};
- if (s.opp_id < MAX_OPP)
- pipe_ctx->stream_res.opp->mpc_tree_params.opp_id = s.opp_id;
+ pool->mpc->funcs->read_mpcc_state(pool->mpc, pipe_ctx->plane_res.mpcc_inst, &s);
+
+ if (s.dpp_id < MAX_MPCC)
+ pool->mpc->mpcc_array[pipe_ctx->plane_res.mpcc_inst].dpp_id =
+ s.dpp_id;
+
+ if (s.bot_mpcc_id < MAX_MPCC)
+ pool->mpc->mpcc_array[pipe_ctx->plane_res.mpcc_inst].mpcc_bot =
+ &pool->mpc->mpcc_array[s.bot_mpcc_id];
+
+ if (s.opp_id < MAX_OPP)
+ pipe_ctx->stream_res.opp->mpc_tree_params.opp_id = s.opp_id;
+ }
}
+ pipe_ctx->pipe_idx = id_src[i];
+
+ if (id_src[i] >= pool->timing_generator_count) {
+ id_src[i] = pool->timing_generator_count - 1;
+
+ pipe_ctx->stream_res.tg = pool->timing_generators[id_src[i]];
+ pipe_ctx->stream_res.opp = pool->opps[id_src[i]];
+ }
+
+ pipe_ctx->stream = stream;
}
- pipe_ctx->pipe_idx = tg_inst;
- pipe_ctx->stream = stream;
- return tg_inst;
+ if (numPipes == 2) {
+ stream->apply_boot_odm_mode = dm_odm_combine_policy_2to1;
+ res_ctx->pipe_ctx[id_src[0]].next_odm_pipe = &res_ctx->pipe_ctx[id_src[1]];
+ res_ctx->pipe_ctx[id_src[0]].prev_odm_pipe = NULL;
+ res_ctx->pipe_ctx[id_src[1]].next_odm_pipe = NULL;
+ res_ctx->pipe_ctx[id_src[1]].prev_odm_pipe = &res_ctx->pipe_ctx[id_src[0]];
+ } else
+ stream->apply_boot_odm_mode = dm_odm_combine_mode_disabled;
+
+ return id_src[0];
}
return -1;
}
-static void mark_seamless_boot_stream(
- const struct dc *dc,
- struct dc_stream_state *stream)
+static void mark_seamless_boot_stream(const struct dc *dc,
+ struct dc_stream_state *stream)
{
struct dc_bios *dcb = dc->ctx->dc_bios;
- /* TODO: Check Linux */
- if (dc->config.allow_seamless_boot_optimization &&
- !dcb->funcs->is_accelerated_mode(dcb)) {
- if (dc_validate_seamless_boot_timing(dc, stream->sink, &stream->timing))
- stream->apply_seamless_boot_optimization = true;
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ if (stream->apply_seamless_boot_optimization)
+ return;
+ if (!dc->config.allow_seamless_boot_optimization)
+ return;
+ if (dcb->funcs->is_accelerated_mode(dcb))
+ return;
+ if (dc_validate_boot_timing(dc, stream->sink, &stream->timing)) {
+ stream->apply_seamless_boot_optimization = true;
+ DC_LOG_DC("Marked stream for seamless boot optimization\n");
}
}
+/*
+ * Acquire a pipe as OTG master and assign to the stream in new dc context.
+ * return - true if OTG master pipe is acquired and new dc context is updated.
+ * false if it fails to acquire an OTG master pipe for this stream.
+ *
+ * In the example below, we acquired pipe 0 as OTG master pipe for the stream.
+ * After the function its Inter-pipe Relation is represented by the diagram
+ * below.
+ *
+ * Inter-pipe Relation
+ * __________________________________________________
+ * |PIPE IDX| DPP PIPES | OPP HEADS | OTG MASTER |
+ * | | | | |
+ * | 0 | |blank ------------------ |
+ * |________|_______________|___________|_____________|
+ */
+static bool acquire_otg_master_pipe_for_stream(
+ const struct dc_state *cur_ctx,
+ struct dc_state *new_ctx,
+ const struct resource_pool *pool,
+ struct dc_stream_state *stream)
+{
+ /* TODO: Move this function to DCN specific resource file and acquire
+ * DSC resource here. The reason is that the function should have the
+ * same level of responsibility as when we acquire secondary OPP head.
+ * We acquire DSC when we acquire secondary OPP head, so we should
+ * acquire DSC when we acquire OTG master.
+ */
+ int pipe_idx;
+ struct pipe_ctx *pipe_ctx = NULL;
+
+ /*
+ * Upper level code is responsible to optimize unnecessary addition and
+ * removal for unchanged streams. So unchanged stream will keep the same
+ * OTG master instance allocated. When current stream is removed and a
+ * new stream is added, we want to reuse the OTG instance made available
+ * by the removed stream first. If not found, we try to avoid of using
+ * any free pipes already used in current context as this could tear
+ * down exiting ODM/MPC/MPO configuration unnecessarily.
+ */
+
+ /*
+ * Try to acquire the same OTG master already in use. This is not
+ * optimal because resetting an enabled OTG master pipe for a new stream
+ * requires an extra frame of wait. However there are test automation
+ * and eDP assumptions that rely on reusing the same OTG master pipe
+ * during mode change. We have to keep this logic as is for now.
+ */
+ pipe_idx = recource_find_free_pipe_used_as_otg_master_in_cur_res_ctx(
+ &cur_ctx->res_ctx, &new_ctx->res_ctx, pool);
+ /*
+ * Try to acquire a pipe not used in current resource context to avoid
+ * pipe swapping.
+ */
+ if (pipe_idx == FREE_PIPE_INDEX_NOT_FOUND)
+ pipe_idx = recource_find_free_pipe_not_used_in_cur_res_ctx(
+ &cur_ctx->res_ctx, &new_ctx->res_ctx, pool);
+ /*
+ * If pipe swapping is unavoidable, try to acquire pipe used as
+ * secondary DPP pipe in current state as we prioritize to support more
+ * streams over supporting MPO planes.
+ */
+ if (pipe_idx == FREE_PIPE_INDEX_NOT_FOUND)
+ pipe_idx = resource_find_free_pipe_used_as_cur_sec_dpp(
+ &cur_ctx->res_ctx, &new_ctx->res_ctx, pool);
+ if (pipe_idx == FREE_PIPE_INDEX_NOT_FOUND)
+ pipe_idx = resource_find_any_free_pipe(&new_ctx->res_ctx, pool);
+ if (pipe_idx != FREE_PIPE_INDEX_NOT_FOUND) {
+ pipe_ctx = &new_ctx->res_ctx.pipe_ctx[pipe_idx];
+ memset(pipe_ctx, 0, sizeof(*pipe_ctx));
+ pipe_ctx->pipe_idx = pipe_idx;
+ pipe_ctx->stream_res.tg = pool->timing_generators[pipe_idx];
+ pipe_ctx->plane_res.mi = pool->mis[pipe_idx];
+ pipe_ctx->plane_res.hubp = pool->hubps[pipe_idx];
+ pipe_ctx->plane_res.ipp = pool->ipps[pipe_idx];
+ pipe_ctx->plane_res.xfm = pool->transforms[pipe_idx];
+ pipe_ctx->plane_res.dpp = pool->dpps[pipe_idx];
+ pipe_ctx->stream_res.opp = pool->opps[pipe_idx];
+ if (pool->dpps[pipe_idx])
+ pipe_ctx->plane_res.mpcc_inst = pool->dpps[pipe_idx]->inst;
+
+ if (pipe_idx >= pool->timing_generator_count && pool->timing_generator_count != 0) {
+ int tg_inst = pool->timing_generator_count - 1;
+
+ pipe_ctx->stream_res.tg = pool->timing_generators[tg_inst];
+ pipe_ctx->stream_res.opp = pool->opps[tg_inst];
+ }
+
+ pipe_ctx->stream = stream;
+ } else {
+ pipe_idx = acquire_first_split_pipe(&new_ctx->res_ctx, pool, stream);
+ }
+
+ return pipe_idx != FREE_PIPE_INDEX_NOT_FOUND;
+}
+
enum dc_status resource_map_pool_resources(
const struct dc *dc,
struct dc_state *context,
@@ -2100,6 +3962,8 @@ enum dc_status resource_map_pool_resources(
struct dc_context *dc_ctx = dc->ctx;
struct pipe_ctx *pipe_ctx = NULL;
int pipe_idx = -1;
+ bool acquired = false;
+ bool is_dio_encoder = true;
calculate_phy_pix_clks(stream);
@@ -2113,22 +3977,20 @@ enum dc_status resource_map_pool_resources(
if (pipe_idx < 0)
/* hw resource was assigned to other stream */
stream->apply_seamless_boot_optimization = false;
+ else
+ acquired = true;
}
- if (pipe_idx < 0)
+ if (!acquired)
/* acquire new resources */
- pipe_idx = acquire_first_free_pipe(&context->res_ctx, pool, stream);
+ acquired = acquire_otg_master_pipe_for_stream(dc->current_state,
+ context, pool, stream);
-#ifdef CONFIG_DRM_AMD_DC_DCN
- if (pipe_idx < 0)
- pipe_idx = acquire_first_split_pipe(&context->res_ctx, pool, stream);
-#endif
+ pipe_ctx = resource_get_otg_master_for_stream(&context->res_ctx, stream);
- if (pipe_idx < 0 || context->res_ctx.pipe_ctx[pipe_idx].stream_res.tg == NULL)
+ if (!pipe_ctx || pipe_ctx->stream_res.tg == NULL)
return DC_NO_CONTROLLER_RESOURCE;
- pipe_ctx = &context->res_ctx.pipe_ctx[pipe_idx];
-
pipe_ctx->stream_res.stream_enc =
dc->res_pool->funcs->find_first_free_match_stream_enc_for_link(
&context->res_ctx, pool, stream);
@@ -2141,16 +4003,20 @@ enum dc_status resource_map_pool_resources(
pipe_ctx->stream_res.stream_enc,
true);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
/* Allocate DP HPO Stream Encoder based on signal, hw capabilities
* and link settings
*/
- if (dc_is_dp_signal(stream->signal) &&
- dc->caps.dp_hpo) {
- struct dc_link_settings link_settings = {0};
+ if (dc_is_dp_signal(stream->signal) ||
+ dc_is_virtual_signal(stream->signal)) {
+ if (!dc->link_srv->dp_decide_link_settings(stream,
+ &pipe_ctx->link_config.dp_link_settings))
+ return DC_FAIL_DP_LINK_BANDWIDTH;
+
+ dc->link_srv->dp_decide_tunnel_settings(stream,
+ &pipe_ctx->link_config.dp_tunnel_settings);
- decide_link_settings(stream, &link_settings);
- if (dp_get_link_encoding_format(&link_settings) == DP_128b_132b_ENCODING) {
+ if (dc->link_srv->dp_get_encoding_format(
+ &pipe_ctx->link_config.dp_link_settings) == DP_128b_132b_ENCODING) {
pipe_ctx->stream_res.hpo_dp_stream_enc =
find_first_free_match_hpo_dp_stream_enc_for_link(
&context->res_ctx, pool, stream);
@@ -2162,14 +4028,21 @@ enum dc_status resource_map_pool_resources(
&context->res_ctx, pool,
pipe_ctx->stream_res.hpo_dp_stream_enc,
true);
+ if (!add_hpo_dp_link_enc_to_ctx(&context->res_ctx, pool, pipe_ctx, stream))
+ return DC_NO_LINK_ENC_RESOURCE;
}
}
-#endif
+
+ if (dc->config.unify_link_enc_assignment && is_dio_encoder)
+ if (!add_dio_link_enc_to_ctx(dc, context, pool, pipe_ctx, stream))
+ return DC_NO_LINK_ENC_RESOURCE;
/* TODO: Add check if ASIC support and EDID audio */
if (!stream->converter_disable_audio &&
dc_is_audio_capable_signal(pipe_ctx->stream->signal) &&
- stream->audio_info.mode_count && stream->audio_info.flags.all) {
+ stream->audio_info.mode_count &&
+ (stream->audio_info.flags.all ||
+ (stream->sink && stream->sink->edid_caps.panel_patch.skip_audio_sab_check))) {
pipe_ctx->stream_res.audio = find_first_free_audio(
&context->res_ctx, pool, pipe_ctx->stream_res.stream_enc->id, dc_ctx->dce_version);
@@ -2185,14 +4058,10 @@ enum dc_status resource_map_pool_resources(
/* Add ABM to the resource if on EDP */
if (pipe_ctx->stream && dc_is_embedded_signal(pipe_ctx->stream->signal)) {
-#if defined(CONFIG_DRM_AMD_DC_DCN)
if (pool->abm)
pipe_ctx->stream_res.abm = pool->abm;
else
pipe_ctx->stream_res.abm = pool->multiple_abms[pipe_ctx->stream_res.tg->inst];
-#else
- pipe_ctx->stream_res.abm = pool->abm;
-#endif
}
for (i = 0; i < context->stream_count; i++)
@@ -2209,47 +4078,334 @@ enum dc_status resource_map_pool_resources(
return DC_ERROR_UNEXPECTED;
}
-/**
- * dc_resource_state_copy_construct_current() - Creates a new dc_state from existing state
- * Is a shallow copy. Increments refcounts on existing streams and planes.
- * @dc: copy out of dc->current_state
- * @dst_ctx: copy into this
- */
-void dc_resource_state_copy_construct_current(
- const struct dc *dc,
- struct dc_state *dst_ctx)
+bool dc_resource_is_dsc_encoding_supported(const struct dc *dc)
{
- dc_resource_state_copy_construct(dc->current_state, dst_ctx);
+ if (dc->res_pool == NULL)
+ return false;
+
+ return dc->res_pool->res_cap->num_dsc > 0;
}
+static bool planes_changed_for_existing_stream(struct dc_state *context,
+ struct dc_stream_state *stream,
+ const struct dc_validation_set set[],
+ int set_count)
+{
+ int i, j;
+ struct dc_stream_status *stream_status = NULL;
+
+ for (i = 0; i < context->stream_count; i++) {
+ if (context->streams[i] == stream) {
+ stream_status = &context->stream_status[i];
+ break;
+ }
+ }
+
+ if (!stream_status) {
+ ASSERT(0);
+ return false;
+ }
+
+ for (i = 0; i < set_count; i++)
+ if (set[i].stream == stream)
+ break;
+
+ if (i == set_count)
+ ASSERT(0);
+
+ if (set[i].plane_count != stream_status->plane_count)
+ return true;
+
+ for (j = 0; j < set[i].plane_count; j++)
+ if (set[i].plane_states[j] != stream_status->plane_states[j])
+ return true;
-void dc_resource_state_construct(
+ return false;
+}
+
+static bool add_all_planes_for_stream(
const struct dc *dc,
- struct dc_state *dst_ctx)
+ struct dc_stream_state *stream,
+ const struct dc_validation_set set[],
+ int set_count,
+ struct dc_state *state)
{
- dst_ctx->clk_mgr = dc->clk_mgr;
-}
+ int i, j;
+ for (i = 0; i < set_count; i++)
+ if (set[i].stream == stream)
+ break;
-bool dc_resource_is_dsc_encoding_supported(const struct dc *dc)
+ if (i == set_count) {
+ dm_error("Stream %p not found in set!\n", stream);
+ return false;
+ }
+
+ for (j = 0; j < set[i].plane_count; j++)
+ if (!dc_state_add_plane(dc, stream, set[i].plane_states[j], state))
+ return false;
+
+ return true;
+}
+
+/**
+ * dc_validate_with_context - Validate and update the potential new stream in the context object
+ *
+ * @dc: Used to get the current state status
+ * @set: An array of dc_validation_set with all the current streams reference
+ * @set_count: Total of streams
+ * @context: New context
+ * @validate_mode: identify the validation mode
+ *
+ * This function updates the potential new stream in the context object. It
+ * creates multiple lists for the add, remove, and unchanged streams. In
+ * particular, if the unchanged streams have a plane that changed, it is
+ * necessary to remove all planes from the unchanged streams. In summary, this
+ * function is responsible for validating the new context.
+ *
+ * Return:
+ * In case of success, return DC_OK (1), otherwise, return a DC error.
+ */
+enum dc_status dc_validate_with_context(struct dc *dc,
+ const struct dc_validation_set set[],
+ int set_count,
+ struct dc_state *context,
+ enum dc_validate_mode validate_mode)
{
- return dc->res_pool->res_cap->num_dsc > 0;
+ struct dc_stream_state *unchanged_streams[MAX_PIPES] = { 0 };
+ struct dc_stream_state *del_streams[MAX_PIPES] = { 0 };
+ struct dc_stream_state *add_streams[MAX_PIPES] = { 0 };
+ int old_stream_count = context->stream_count;
+ enum dc_status res = DC_ERROR_UNEXPECTED;
+ int unchanged_streams_count = 0;
+ int del_streams_count = 0;
+ int add_streams_count = 0;
+ bool found = false;
+ int i, j, k;
+
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ /* First build a list of streams to be remove from current context */
+ for (i = 0; i < old_stream_count; i++) {
+ struct dc_stream_state *stream = context->streams[i];
+
+ for (j = 0; j < set_count; j++) {
+ if (stream == set[j].stream) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ del_streams[del_streams_count++] = stream;
+
+ found = false;
+ }
+
+ /* Second, build a list of new streams */
+ for (i = 0; i < set_count; i++) {
+ struct dc_stream_state *stream = set[i].stream;
+
+ for (j = 0; j < old_stream_count; j++) {
+ if (stream == context->streams[j]) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ add_streams[add_streams_count++] = stream;
+
+ found = false;
+ }
+
+ /* Build a list of unchanged streams which is necessary for handling
+ * planes change such as added, removed, and updated.
+ */
+ for (i = 0; i < set_count; i++) {
+ /* Check if stream is part of the delete list */
+ for (j = 0; j < del_streams_count; j++) {
+ if (set[i].stream == del_streams[j]) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ /* Check if stream is part of the add list */
+ for (j = 0; j < add_streams_count; j++) {
+ if (set[i].stream == add_streams[j]) {
+ found = true;
+ break;
+ }
+ }
+ }
+
+ if (!found)
+ unchanged_streams[unchanged_streams_count++] = set[i].stream;
+
+ found = false;
+ }
+
+ /* Remove all planes for unchanged streams if planes changed */
+ for (i = 0; i < unchanged_streams_count; i++) {
+ if (planes_changed_for_existing_stream(context,
+ unchanged_streams[i],
+ set,
+ set_count)) {
+
+ if (!dc_state_rem_all_planes_for_stream(dc,
+ unchanged_streams[i],
+ context)) {
+ res = DC_FAIL_DETACH_SURFACES;
+ goto fail;
+ }
+ }
+ }
+
+ /* Remove all planes for removed streams and then remove the streams */
+ for (i = 0; i < del_streams_count; i++) {
+ /* Need to cpy the dwb data from the old stream in order to efc to work */
+ if (del_streams[i]->num_wb_info > 0) {
+ for (j = 0; j < add_streams_count; j++) {
+ if (del_streams[i]->sink == add_streams[j]->sink) {
+ add_streams[j]->num_wb_info = del_streams[i]->num_wb_info;
+ for (k = 0; k < del_streams[i]->num_wb_info; k++)
+ add_streams[j]->writeback_info[k] = del_streams[i]->writeback_info[k];
+ }
+ }
+ }
+
+ if (dc_state_get_stream_subvp_type(context, del_streams[i]) == SUBVP_PHANTOM) {
+ /* remove phantoms specifically */
+ if (!dc_state_rem_all_phantom_planes_for_stream(dc, del_streams[i], context, true)) {
+ res = DC_FAIL_DETACH_SURFACES;
+ goto fail;
+ }
+
+ res = dc_state_remove_phantom_stream(dc, context, del_streams[i]);
+ dc_state_release_phantom_stream(dc, context, del_streams[i]);
+ } else {
+ if (!dc_state_rem_all_planes_for_stream(dc, del_streams[i], context)) {
+ res = DC_FAIL_DETACH_SURFACES;
+ goto fail;
+ }
+
+ res = dc_state_remove_stream(dc, context, del_streams[i]);
+ }
+
+ if (res != DC_OK)
+ goto fail;
+ }
+
+ /* Swap seamless boot stream to pipe 0 (if needed) to ensure pipe_ctx
+ * matches. This may change in the future if seamless_boot_stream can be
+ * multiple.
+ */
+ for (i = 0; i < add_streams_count; i++) {
+ mark_seamless_boot_stream(dc, add_streams[i]);
+ if (add_streams[i]->apply_seamless_boot_optimization && i != 0) {
+ struct dc_stream_state *temp = add_streams[0];
+
+ add_streams[0] = add_streams[i];
+ add_streams[i] = temp;
+ break;
+ }
+ }
+
+ /* Add new streams and then add all planes for the new stream */
+ for (i = 0; i < add_streams_count; i++) {
+ calculate_phy_pix_clks(add_streams[i]);
+ res = dc_state_add_stream(dc, context, add_streams[i]);
+ if (res != DC_OK)
+ goto fail;
+
+ if (!add_all_planes_for_stream(dc, add_streams[i], set, set_count, context)) {
+ res = DC_FAIL_ATTACH_SURFACES;
+ goto fail;
+ }
+ }
+
+ /* Add all planes for unchanged streams if planes changed */
+ for (i = 0; i < unchanged_streams_count; i++) {
+ if (planes_changed_for_existing_stream(context,
+ unchanged_streams[i],
+ set,
+ set_count)) {
+ if (!add_all_planes_for_stream(dc, unchanged_streams[i], set, set_count, context)) {
+ res = DC_FAIL_ATTACH_SURFACES;
+ goto fail;
+ }
+ }
+ }
+
+ /* clear subvp cursor limitations */
+ for (i = 0; i < context->stream_count; i++) {
+ dc_state_set_stream_subvp_cursor_limit(context->streams[i], context, false);
+ }
+
+ res = dc_validate_global_state(dc, context, validate_mode);
+
+ /* calculate pixel rate divider after deciding pxiel clock & odm combine */
+ if ((dc->hwss.calculate_pix_rate_divider) && (res == DC_OK)) {
+ for (i = 0; i < add_streams_count; i++)
+ dc->hwss.calculate_pix_rate_divider(dc, context, add_streams[i]);
+ }
+
+fail:
+ if (res != DC_OK)
+ DC_LOG_WARNING("%s:resource validation failed, dc_status:%d\n",
+ __func__,
+ res);
+
+ return res;
}
+#if defined(CONFIG_DRM_AMD_DC_FP)
+#endif /* CONFIG_DRM_AMD_DC_FP */
+
+/**
+ * calculate_timing_params_for_dsc_with_padding - Calculates timing parameters for DSC with padding.
+ * @pipe_ctx: Pointer to the pipe context structure.
+ *
+ * This function calculates the timing parameters for a given pipe context based on the
+ * display stream compression (DSC) configuration. If the horizontal active pixels (hactive) are less
+ * than the total width of the DSC slices, it sets the dsc_hactive_padding value to the difference. If the
+ * total horizontal timing minus the dsc_hactive_padding value is less than 32, it resets the dsc_hactive_padding
+ * value to 0.
+ */
+static void calculate_timing_params_for_dsc_with_padding(struct pipe_ctx *pipe_ctx)
+{
+ struct dc_stream_state *stream = NULL;
+
+ if (!pipe_ctx)
+ return;
+
+ stream = pipe_ctx->stream;
+ pipe_ctx->dsc_padding_params.dsc_hactive_padding = 0;
+ pipe_ctx->dsc_padding_params.dsc_htotal_padding = 0;
+
+ if (stream)
+ pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz = stream->timing.pix_clk_100hz;
+
+}
/**
- * dc_validate_global_state() - Determine if HW can support a given state
- * Checks HW resource availability and bandwidth requirement.
+ * dc_validate_global_state() - Determine if hardware can support a given state
+ *
* @dc: dc struct for this driver
* @new_ctx: state to be validated
- * @fast_validate: set to true if only yes/no to support matters
+ * @validate_mode: identify the validation mode
*
- * Return: DC_OK if the result can be programmed. Otherwise, an error code.
+ * Checks hardware resource availability and bandwidth requirement.
+ *
+ * Return:
+ * DC_OK if the result can be programmed. Otherwise, an error code.
*/
enum dc_status dc_validate_global_state(
struct dc *dc,
struct dc_state *new_ctx,
- bool fast_validate)
+ enum dc_validate_mode validate_mode)
{
enum dc_status result = DC_ERROR_UNEXPECTED;
int i, j;
@@ -2272,6 +4428,10 @@ enum dc_status dc_validate_global_state(
if (pipe_ctx->stream != stream)
continue;
+ /* Decide whether hblank borrow is needed and save it in pipe_ctx */
+ if (dc->debug.enable_hblank_borrow)
+ calculate_timing_params_for_dsc_with_padding(pipe_ctx);
+
if (dc->res_pool->funcs->patch_unknown_plane_state &&
pipe_ctx->plane_state &&
pipe_ctx->plane_state->tiling_info.gfx9.swizzle == DC_SW_UNKNOWN) {
@@ -2304,18 +4464,7 @@ enum dc_status dc_validate_global_state(
result = resource_build_scaling_params_for_context(dc, new_ctx);
if (result == DC_OK)
- if (!dc->res_pool->funcs->validate_bandwidth(dc, new_ctx, fast_validate))
- result = DC_FAIL_BANDWIDTH_VALIDATE;
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- /*
- * Only update link encoder to stream assignment after bandwidth validation passed.
- * TODO: Split out assignment and validation.
- */
- if (result == DC_OK && dc->res_pool->funcs->link_encs_assign && fast_validate == false)
- dc->res_pool->funcs->link_encs_assign(
- dc, new_ctx, new_ctx->streams, new_ctx->stream_count);
-#endif
+ result = dc->res_pool->funcs->validate_bandwidth(dc, new_ctx, validate_mode);
return result;
}
@@ -2348,19 +4497,22 @@ static void set_avi_info_frame(
uint32_t pixel_encoding = 0;
enum scanning_type scan_type = SCANNING_TYPE_NODATA;
enum dc_aspect_ratio aspect = ASPECT_RATIO_NO_DATA;
- bool itc = false;
- uint8_t itc_value = 0;
- uint8_t cn0_cn1 = 0;
- unsigned int cn0_cn1_value = 0;
uint8_t *check_sum = NULL;
uint8_t byte_index = 0;
union hdmi_info_packet hdmi_info;
- union display_content_support support = {0};
unsigned int vic = pipe_ctx->stream->timing.vic;
+ unsigned int rid = pipe_ctx->stream->timing.rid;
+ unsigned int fr_ind = pipe_ctx->stream->timing.fr_index;
enum dc_timing_3d_format format;
+ if (stream->avi_infopacket.valid) {
+ *info_packet = stream->avi_infopacket;
+ return;
+ }
+
memset(&hdmi_info, 0, sizeof(union hdmi_info_packet));
+
color_space = pipe_ctx->stream->output_color_space;
if (color_space == COLOR_SPACE_UNKNOWN)
color_space = (stream->timing.pixel_encoding == PIXEL_ENCODING_RGB) ?
@@ -2413,23 +4565,35 @@ static void set_avi_info_frame(
hdmi_info.bits.S0_S1 = scan_type;
/* C0, C1 : Colorimetry */
- if (color_space == COLOR_SPACE_YCBCR709 ||
- color_space == COLOR_SPACE_YCBCR709_LIMITED)
+ switch (color_space) {
+ case COLOR_SPACE_YCBCR709:
+ case COLOR_SPACE_YCBCR709_LIMITED:
hdmi_info.bits.C0_C1 = COLORIMETRY_ITU709;
- else if (color_space == COLOR_SPACE_YCBCR601 ||
- color_space == COLOR_SPACE_YCBCR601_LIMITED)
+ break;
+ case COLOR_SPACE_YCBCR601:
+ case COLOR_SPACE_YCBCR601_LIMITED:
hdmi_info.bits.C0_C1 = COLORIMETRY_ITU601;
- else {
- hdmi_info.bits.C0_C1 = COLORIMETRY_NO_DATA;
- }
- if (color_space == COLOR_SPACE_2020_RGB_FULLRANGE ||
- color_space == COLOR_SPACE_2020_RGB_LIMITEDRANGE ||
- color_space == COLOR_SPACE_2020_YCBCR) {
+ break;
+ case COLOR_SPACE_2020_RGB_FULLRANGE:
+ case COLOR_SPACE_2020_RGB_LIMITEDRANGE:
+ case COLOR_SPACE_2020_YCBCR_LIMITED:
hdmi_info.bits.EC0_EC2 = COLORIMETRYEX_BT2020RGBYCBCR;
hdmi_info.bits.C0_C1 = COLORIMETRY_EXTENDED;
- } else if (color_space == COLOR_SPACE_ADOBERGB) {
+ break;
+ case COLOR_SPACE_ADOBERGB:
hdmi_info.bits.EC0_EC2 = COLORIMETRYEX_ADOBERGB;
hdmi_info.bits.C0_C1 = COLORIMETRY_EXTENDED;
+ break;
+ case COLOR_SPACE_SRGB:
+ default:
+ hdmi_info.bits.C0_C1 = COLORIMETRY_NO_DATA;
+ break;
+ }
+
+ if (pixel_encoding && color_space == COLOR_SPACE_2020_YCBCR_LIMITED &&
+ stream->out_transfer_func.tf == TRANSFER_FUNCTION_GAMMA22) {
+ hdmi_info.bits.EC0_EC2 = 0;
+ hdmi_info.bits.C0_C1 = COLORIMETRY_ITU709;
}
/* TODO: un-hardcode aspect ratio */
@@ -2451,49 +4615,27 @@ static void set_avi_info_frame(
/* Active Format Aspect ratio - same as Picture Aspect Ratio. */
hdmi_info.bits.R0_R3 = ACTIVE_FORMAT_ASPECT_RATIO_SAME_AS_PICTURE;
- /* TODO: un-hardcode cn0_cn1 and itc */
-
- cn0_cn1 = 0;
- cn0_cn1_value = 0;
-
- itc = true;
- itc_value = 1;
-
- support = stream->content_support;
-
- if (itc) {
- if (!support.bits.valid_content_type) {
- cn0_cn1_value = 0;
- } else {
- if (cn0_cn1 == DISPLAY_CONTENT_TYPE_GRAPHICS) {
- if (support.bits.graphics_content == 1) {
- cn0_cn1_value = 0;
- }
- } else if (cn0_cn1 == DISPLAY_CONTENT_TYPE_PHOTO) {
- if (support.bits.photo_content == 1) {
- cn0_cn1_value = 1;
- } else {
- cn0_cn1_value = 0;
- itc_value = 0;
- }
- } else if (cn0_cn1 == DISPLAY_CONTENT_TYPE_CINEMA) {
- if (support.bits.cinema_content == 1) {
- cn0_cn1_value = 2;
- } else {
- cn0_cn1_value = 0;
- itc_value = 0;
- }
- } else if (cn0_cn1 == DISPLAY_CONTENT_TYPE_GAME) {
- if (support.bits.game_content == 1) {
- cn0_cn1_value = 3;
- } else {
- cn0_cn1_value = 0;
- itc_value = 0;
- }
- }
- }
- hdmi_info.bits.CN0_CN1 = cn0_cn1_value;
- hdmi_info.bits.ITC = itc_value;
+ switch (stream->content_type) {
+ case DISPLAY_CONTENT_TYPE_NO_DATA:
+ hdmi_info.bits.CN0_CN1 = 0;
+ hdmi_info.bits.ITC = 1;
+ break;
+ case DISPLAY_CONTENT_TYPE_GRAPHICS:
+ hdmi_info.bits.CN0_CN1 = 0;
+ hdmi_info.bits.ITC = 1;
+ break;
+ case DISPLAY_CONTENT_TYPE_PHOTO:
+ hdmi_info.bits.CN0_CN1 = 1;
+ hdmi_info.bits.ITC = 1;
+ break;
+ case DISPLAY_CONTENT_TYPE_CINEMA:
+ hdmi_info.bits.CN0_CN1 = 2;
+ hdmi_info.bits.ITC = 1;
+ break;
+ case DISPLAY_CONTENT_TYPE_GAME:
+ hdmi_info.bits.CN0_CN1 = 3;
+ hdmi_info.bits.ITC = 1;
+ break;
}
if (stream->qs_bit == 1) {
@@ -2510,19 +4652,11 @@ static void set_avi_info_frame(
/* TODO : We should handle YCC quantization */
/* but we do not have matrix calculation */
- if (stream->qy_bit == 1) {
- if (color_space == COLOR_SPACE_SRGB ||
- color_space == COLOR_SPACE_2020_RGB_FULLRANGE)
- hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE;
- else if (color_space == COLOR_SPACE_SRGB_LIMITED ||
- color_space == COLOR_SPACE_2020_RGB_LIMITEDRANGE)
- hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE;
- else
- hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE;
- } else
- hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE;
+ hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE;
///VIC
+ if (pipe_ctx->stream->timing.hdmi_vic != 0)
+ vic = 0;
format = stream->timing.timing_3d_format;
/*todo, add 3DStereo support*/
if (format != TIMING_3D_FORMAT_NONE) {
@@ -2557,6 +4691,15 @@ static void set_avi_info_frame(
hdmi_info.bits.header.length = 14;
}
+ if (rid != 0 && fr_ind != 0) {
+ hdmi_info.bits.header.version = 4;
+ hdmi_info.bits.header.length = 15;
+
+ hdmi_info.bits.FR0_FR3 = fr_ind & 0xF;
+ hdmi_info.bits.FR4 = (fr_ind >> 4) & 0x1;
+ hdmi_info.bits.RID0_RID5 = rid;
+ }
+
/* pixel repetition
* PR0 - PR3 start from 0 whereas pHwPathMode->mode.timing.flags.pixel
* repetition start from 1 */
@@ -2655,58 +4798,69 @@ static void set_vsc_info_packet(
*info_packet = stream->vsc_infopacket;
}
-
-void dc_resource_state_destruct(struct dc_state *context)
+static void set_hfvs_info_packet(
+ struct dc_info_packet *info_packet,
+ struct dc_stream_state *stream)
{
- int i, j;
-
- for (i = 0; i < context->stream_count; i++) {
- for (j = 0; j < context->stream_status[i].plane_count; j++)
- dc_plane_state_release(
- context->stream_status[i].plane_states[j]);
+ if (!stream->hfvsif_infopacket.valid)
+ return;
- context->stream_status[i].plane_count = 0;
- dc_stream_release(context->streams[i]);
- context->streams[i] = NULL;
- }
- context->stream_count = 0;
+ *info_packet = stream->hfvsif_infopacket;
}
-void dc_resource_state_copy_construct(
- const struct dc_state *src_ctx,
- struct dc_state *dst_ctx)
+static void adaptive_sync_override_dp_info_packets_sdp_line_num(
+ const struct dc_crtc_timing *timing,
+ struct enc_sdp_line_num *sdp_line_num,
+ unsigned int vstartup_start)
{
- int i, j;
- struct kref refcount = dst_ctx->refcount;
+ uint32_t asic_blank_start = 0;
+ uint32_t asic_blank_end = 0;
+ uint32_t v_update = 0;
- *dst_ctx = *src_ctx;
+ const struct dc_crtc_timing *tg = timing;
- for (i = 0; i < MAX_PIPES; i++) {
- struct pipe_ctx *cur_pipe = &dst_ctx->res_ctx.pipe_ctx[i];
+ /* blank_start = frame end - front porch */
+ asic_blank_start = tg->v_total - tg->v_front_porch;
- if (cur_pipe->top_pipe)
- cur_pipe->top_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->top_pipe->pipe_idx];
+ /* blank_end = blank_start - active */
+ asic_blank_end = (asic_blank_start - tg->v_border_bottom -
+ tg->v_addressable - tg->v_border_top);
- if (cur_pipe->bottom_pipe)
- cur_pipe->bottom_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx];
+ if (vstartup_start > asic_blank_end) {
+ v_update = (tg->v_total - (vstartup_start - asic_blank_end));
+ sdp_line_num->adaptive_sync_line_num_valid = true;
+ sdp_line_num->adaptive_sync_line_num = (tg->v_total - v_update - 1);
+ } else {
+ sdp_line_num->adaptive_sync_line_num_valid = false;
+ sdp_line_num->adaptive_sync_line_num = 0;
+ }
+}
- if (cur_pipe->next_odm_pipe)
- cur_pipe->next_odm_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->next_odm_pipe->pipe_idx];
+static void set_adaptive_sync_info_packet(
+ struct dc_info_packet *info_packet,
+ const struct dc_stream_state *stream,
+ struct encoder_info_frame *info_frame,
+ unsigned int vstartup_start)
+{
+ if (!stream->adaptive_sync_infopacket.valid)
+ return;
- if (cur_pipe->prev_odm_pipe)
- cur_pipe->prev_odm_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->prev_odm_pipe->pipe_idx];
- }
+ adaptive_sync_override_dp_info_packets_sdp_line_num(
+ &stream->timing,
+ &info_frame->sdp_line_num,
+ vstartup_start);
- for (i = 0; i < dst_ctx->stream_count; i++) {
- dc_stream_retain(dst_ctx->streams[i]);
- for (j = 0; j < dst_ctx->stream_status[i].plane_count; j++)
- dc_plane_state_retain(
- dst_ctx->stream_status[i].plane_states[j]);
- }
+ *info_packet = stream->adaptive_sync_infopacket;
+}
- /* context refcount should not be overridden */
- dst_ctx->refcount = refcount;
+static void set_vtem_info_packet(
+ struct dc_info_packet *info_packet,
+ struct dc_stream_state *stream)
+{
+ if (!stream->vtem_infopacket.valid)
+ return;
+ *info_packet = stream->vtem_infopacket;
}
struct clock_source *dc_resource_find_first_free_pll(
@@ -2727,6 +4881,7 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx)
{
enum signal_type signal = SIGNAL_TYPE_NONE;
struct encoder_info_frame *info = &pipe_ctx->stream_res.encoder_info_frame;
+ unsigned int vstartup_start = 0;
/* default all packets to invalid */
info->avi.valid = false;
@@ -2735,14 +4890,21 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx)
info->spd.valid = false;
info->hdrsmd.valid = false;
info->vsc.valid = false;
-
+ info->hfvsif.valid = false;
+ info->vtem.valid = false;
+ info->adaptive_sync.valid = false;
signal = pipe_ctx->stream->signal;
+ if (pipe_ctx->stream->ctx->dc->res_pool->funcs->get_vstartup_for_pipe)
+ vstartup_start = pipe_ctx->stream->ctx->dc->res_pool->funcs->get_vstartup_for_pipe(pipe_ctx);
+
/* HDMi and DP have different info packets*/
if (dc_is_hdmi_signal(signal)) {
set_avi_info_frame(&info->avi, pipe_ctx);
set_vendor_info_packet(&info->vendor, pipe_ctx->stream);
+ set_hfvs_info_packet(&info->hfvsif, pipe_ctx->stream);
+ set_vtem_info_packet(&info->vtem, pipe_ctx->stream);
set_spd_info_packet(&info->spd, pipe_ctx->stream);
@@ -2754,6 +4916,10 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx)
set_spd_info_packet(&info->spd, pipe_ctx->stream);
set_hdr_static_info_packet(&info->hdrsmd, pipe_ctx->stream);
+ set_adaptive_sync_info_packet(&info->adaptive_sync,
+ pipe_ctx->stream,
+ info,
+ vstartup_start);
}
patch_gamut_packet_checksum(&info->gamut);
@@ -2766,7 +4932,7 @@ enum dc_status resource_map_clock_resources(
{
/* acquire new resources */
const struct resource_pool *pool = dc->res_pool;
- struct pipe_ctx *pipe_ctx = resource_get_head_pipe_for_stream(
+ struct pipe_ctx *pipe_ctx = resource_get_otg_master_for_stream(
&context->res_ctx, stream);
if (!pipe_ctx)
@@ -2828,7 +4994,7 @@ bool pipe_need_reprogram(
if (pipe_ctx_old->stream_res.stream_enc != pipe_ctx->stream_res.stream_enc)
return true;
- if (is_timing_changed(pipe_ctx_old->stream, pipe_ctx->stream))
+ if (dc_is_timing_changed(pipe_ctx_old->stream, pipe_ctx->stream))
return true;
if (pipe_ctx_old->stream->dpms_off != pipe_ctx->stream->dpms_off)
@@ -2841,21 +5007,23 @@ bool pipe_need_reprogram(
if (pipe_ctx_old->stream_res.dsc != pipe_ctx->stream_res.dsc)
return true;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
if (pipe_ctx_old->stream_res.hpo_dp_stream_enc != pipe_ctx->stream_res.hpo_dp_stream_enc)
return true;
-#endif
+ if (pipe_ctx_old->link_res.hpo_dp_link_enc != pipe_ctx->link_res.hpo_dp_link_enc)
+ return true;
/* DIG link encoder resource assignment for stream changed. */
- if (pipe_ctx_old->stream->ctx->dc->res_pool->funcs->link_encs_assign) {
+ if (pipe_ctx_old->stream->ctx->dc->config.unify_link_enc_assignment) {
+ if (pipe_ctx_old->link_res.dio_link_enc != pipe_ctx->link_res.dio_link_enc)
+ return true;
+ } else if (pipe_ctx_old->stream->ctx->dc->res_pool->funcs->link_encs_assign) {
bool need_reprogram = false;
struct dc *dc = pipe_ctx_old->stream->ctx->dc;
- enum link_enc_cfg_mode mode = dc->current_state->res_ctx.link_enc_cfg_ctx.mode;
+ struct link_encoder *link_enc_prev =
+ link_enc_cfg_get_link_enc_used_by_stream_current(dc, pipe_ctx_old->stream);
- dc->current_state->res_ctx.link_enc_cfg_ctx.mode = LINK_ENC_CFG_STEADY;
- if (link_enc_cfg_get_link_enc_used_by_stream(dc, pipe_ctx_old->stream) != pipe_ctx->stream->link_enc)
+ if (link_enc_prev != pipe_ctx->stream->link_enc)
need_reprogram = true;
- dc->current_state->res_ctx.link_enc_cfg_ctx.mode = mode;
return need_reprogram;
}
@@ -2881,7 +5049,7 @@ void resource_build_bit_depth_reduction_params(struct dc_stream_state *stream,
option = DITHER_OPTION_SPATIAL8;
break;
case COLOR_DEPTH_101010:
- option = DITHER_OPTION_SPATIAL10;
+ option = DITHER_OPTION_TRUN10;
break;
default:
option = DITHER_OPTION_DISABLE;
@@ -2907,6 +5075,8 @@ void resource_build_bit_depth_reduction_params(struct dc_stream_state *stream,
option == DITHER_OPTION_TRUN10_SPATIAL8_FM6) {
fmt_bit_depth->flags.TRUNCATE_ENABLED = 1;
fmt_bit_depth->flags.TRUNCATE_DEPTH = 2;
+ if (option == DITHER_OPTION_TRUN10)
+ fmt_bit_depth->flags.TRUNCATE_MODE = 1;
}
/* special case - Formatter can only reduce by 4 bits at most.
@@ -2991,6 +5161,9 @@ void resource_build_bit_depth_reduction_params(struct dc_stream_state *stream,
enum dc_status dc_validate_stream(struct dc *dc, struct dc_stream_state *stream)
{
+ if (dc == NULL || stream == NULL)
+ return DC_ERROR_UNEXPECTED;
+
struct dc_link *link = stream->link;
struct timing_generator *tg = dc->res_pool->timing_generators[0];
enum dc_status res = DC_OK;
@@ -3010,7 +5183,7 @@ enum dc_status dc_validate_stream(struct dc *dc, struct dc_stream_state *stream)
/* TODO: validate audio ASIC caps, encoder */
if (res == DC_OK)
- res = dc_link_validate_mode_timing(stream,
+ res = dc->link_srv->validate_mode_timing(stream,
link,
&stream->timing);
@@ -3051,10 +5224,8 @@ unsigned int resource_pixel_format_to_bpp(enum surface_pixel_format format)
case SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010:
case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010:
case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010_XR_BIAS:
-#if defined(CONFIG_DRM_AMD_DC_DCN)
case SURFACE_PIXEL_FORMAT_GRPH_RGBE:
case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA:
-#endif
return 32;
case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
@@ -3111,22 +5282,434 @@ void get_audio_check(struct audio_info *aud_modes,
}
}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-struct hpo_dp_link_encoder *resource_get_unused_hpo_dp_link_encoder(
- const struct resource_pool *pool)
+struct link_encoder *get_temp_dio_link_enc(
+ const struct resource_context *res_ctx,
+ const struct resource_pool *const pool,
+ const struct dc_link *link)
{
- uint8_t i;
- struct hpo_dp_link_encoder *enc = NULL;
+ struct link_encoder *link_enc = NULL;
+ int enc_index;
+
+ if (link->is_dig_mapping_flexible)
+ enc_index = find_acquired_dio_link_enc_for_link(res_ctx, link);
+ else
+ enc_index = link->eng_id;
+
+ if (enc_index < 0)
+ enc_index = find_free_dio_link_enc(res_ctx, link, pool, NULL);
+
+ if (enc_index >= 0)
+ link_enc = pool->link_encoders[enc_index];
- ASSERT(pool->hpo_dp_link_enc_count <= MAX_HPO_DP2_LINK_ENCODERS);
+ return link_enc;
+}
+
+static struct hpo_dp_link_encoder *get_temp_hpo_dp_link_enc(
+ const struct resource_context *res_ctx,
+ const struct resource_pool *const pool,
+ const struct dc_link *link)
+{
+ struct hpo_dp_link_encoder *hpo_dp_link_enc = NULL;
+ int enc_index;
+
+ enc_index = find_acquired_hpo_dp_link_enc_for_link(res_ctx, link);
+
+ if (enc_index < 0)
+ enc_index = find_free_hpo_dp_link_enc(res_ctx, pool);
+
+ if (enc_index >= 0)
+ hpo_dp_link_enc = pool->hpo_dp_link_enc[enc_index];
+
+ return hpo_dp_link_enc;
+}
+
+bool get_temp_dp_link_res(struct dc_link *link,
+ struct link_resource *link_res,
+ struct dc_link_settings *link_settings)
+{
+ const struct dc *dc = link->dc;
+ const struct resource_context *res_ctx = &dc->current_state->res_ctx;
- for (i = 0; i < pool->hpo_dp_link_enc_count; i++) {
- if (pool->hpo_dp_link_enc[i]->transmitter == TRANSMITTER_UNKNOWN) {
- enc = pool->hpo_dp_link_enc[i];
+ memset(link_res, 0, sizeof(*link_res));
+
+ if (dc->link_srv->dp_get_encoding_format(link_settings) == DP_128b_132b_ENCODING) {
+ link_res->hpo_dp_link_enc = get_temp_hpo_dp_link_enc(res_ctx, dc->res_pool, link);
+ if (!link_res->hpo_dp_link_enc)
+ return false;
+ } else if (dc->link_srv->dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING &&
+ dc->config.unify_link_enc_assignment) {
+ link_res->dio_link_enc = get_temp_dio_link_enc(res_ctx,
+ dc->res_pool, link);
+ if (!link_res->dio_link_enc)
+ return false;
+ }
+
+ return true;
+}
+
+void reset_syncd_pipes_from_disabled_pipes(struct dc *dc,
+ struct dc_state *context)
+{
+ int i, j;
+ struct pipe_ctx *pipe_ctx_old, *pipe_ctx, *pipe_ctx_syncd;
+
+ /* If pipe backend is reset, need to reset pipe syncd status */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe_ctx_old = &dc->current_state->res_ctx.pipe_ctx[i];
+ pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ if (!resource_is_pipe_type(pipe_ctx_old, OTG_MASTER))
+ continue;
+
+ if (!pipe_ctx->stream ||
+ pipe_need_reprogram(pipe_ctx_old, pipe_ctx)) {
+
+ /* Reset all the syncd pipes from the disabled pipe */
+ for (j = 0; j < dc->res_pool->pipe_count; j++) {
+ pipe_ctx_syncd = &context->res_ctx.pipe_ctx[j];
+ if ((GET_PIPE_SYNCD_FROM_PIPE(pipe_ctx_syncd) == pipe_ctx_old->pipe_idx) ||
+ !IS_PIPE_SYNCD_VALID(pipe_ctx_syncd))
+ SET_PIPE_SYNCD_TO_PIPE(pipe_ctx_syncd, j);
+ }
+ }
+ }
+}
+
+void check_syncd_pipes_for_disabled_master_pipe(struct dc *dc,
+ struct dc_state *context,
+ uint8_t disabled_master_pipe_idx)
+{
+ int i;
+ struct pipe_ctx *pipe_ctx, *pipe_ctx_check;
+
+ pipe_ctx = &context->res_ctx.pipe_ctx[disabled_master_pipe_idx];
+ if ((GET_PIPE_SYNCD_FROM_PIPE(pipe_ctx) != disabled_master_pipe_idx) ||
+ !IS_PIPE_SYNCD_VALID(pipe_ctx))
+ SET_PIPE_SYNCD_TO_PIPE(pipe_ctx, disabled_master_pipe_idx);
+
+ /* for the pipe disabled, check if any slave pipe exists and assert */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe_ctx_check = &context->res_ctx.pipe_ctx[i];
+
+ if ((GET_PIPE_SYNCD_FROM_PIPE(pipe_ctx_check) == disabled_master_pipe_idx) &&
+ IS_PIPE_SYNCD_VALID(pipe_ctx_check) && (i != disabled_master_pipe_idx)) {
+ struct pipe_ctx *first_pipe = pipe_ctx_check;
+
+ while (first_pipe->prev_odm_pipe)
+ first_pipe = first_pipe->prev_odm_pipe;
+ /* When ODM combine is enabled, this case is expected. If the disabled pipe
+ * is part of the ODM tree, then we should not print an error.
+ * */
+ if (first_pipe->pipe_idx == disabled_master_pipe_idx)
+ continue;
+
+ DC_ERR("DC: Failure: pipe_idx[%d] syncd with disabled master pipe_idx[%d]\n",
+ i, disabled_master_pipe_idx);
+ }
+ }
+}
+
+void reset_sync_context_for_pipe(const struct dc *dc,
+ struct dc_state *context,
+ uint8_t pipe_idx)
+{
+ int i;
+ struct pipe_ctx *pipe_ctx_reset;
+
+ /* reset the otg sync context for the pipe and its slave pipes if any */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe_ctx_reset = &context->res_ctx.pipe_ctx[i];
+
+ if (((GET_PIPE_SYNCD_FROM_PIPE(pipe_ctx_reset) == pipe_idx) &&
+ IS_PIPE_SYNCD_VALID(pipe_ctx_reset)) || (i == pipe_idx))
+ SET_PIPE_SYNCD_TO_PIPE(pipe_ctx_reset, i);
+ }
+}
+
+uint8_t resource_transmitter_to_phy_idx(const struct dc *dc, enum transmitter transmitter)
+{
+ /* TODO - get transmitter to phy idx mapping from DMUB */
+ uint8_t phy_idx = transmitter - TRANSMITTER_UNIPHY_A;
+
+ if (dc->ctx->dce_version == DCN_VERSION_3_1 &&
+ dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0) {
+ switch (transmitter) {
+ case TRANSMITTER_UNIPHY_A:
+ phy_idx = 0;
+ break;
+ case TRANSMITTER_UNIPHY_B:
+ phy_idx = 1;
+ break;
+ case TRANSMITTER_UNIPHY_C:
+ phy_idx = 5;
+ break;
+ case TRANSMITTER_UNIPHY_D:
+ phy_idx = 6;
+ break;
+ case TRANSMITTER_UNIPHY_E:
+ phy_idx = 4;
+ break;
+ default:
+ phy_idx = 0;
break;
}
}
- return enc;
+ return phy_idx;
}
+
+const struct link_hwss *get_link_hwss(const struct dc_link *link,
+ const struct link_resource *link_res)
+{
+ /* Link_hwss is only accessible by getter function instead of accessing
+ * by pointers in dc with the intent to protect against breaking polymorphism.
+ */
+ if (can_use_hpo_dp_link_hwss(link, link_res))
+ /* TODO: some assumes that if decided link settings is 128b/132b
+ * channel coding format hpo_dp_link_enc should be used.
+ * Others believe that if hpo_dp_link_enc is available in link
+ * resource then hpo_dp_link_enc must be used. This bound between
+ * hpo_dp_link_enc != NULL and decided link settings is loosely coupled
+ * with a premise that both hpo_dp_link_enc pointer and decided link
+ * settings are determined based on single policy function like
+ * "decide_link_settings" from upper layer. This "convention"
+ * cannot be maintained and enforced at current level.
+ * Therefore a refactor is due so we can enforce a strong bound
+ * between those two parameters at this level.
+ *
+ * To put it simple, we want to make enforcement at low level so that
+ * we will not return link hwss if caller plans to do 8b/10b
+ * with an hpo encoder. Or we can return a very dummy one that doesn't
+ * do work for all functions
+ */
+ return (requires_fixed_vs_pe_retimer_hpo_link_hwss(link) ?
+ get_hpo_fixed_vs_pe_retimer_dp_link_hwss() : get_hpo_dp_link_hwss());
+ else if (can_use_dpia_link_hwss(link, link_res))
+ return get_dpia_link_hwss();
+ else if (can_use_dio_link_hwss(link, link_res))
+ return (requires_fixed_vs_pe_retimer_dio_link_hwss(link)) ?
+ get_dio_fixed_vs_pe_retimer_link_hwss() : get_dio_link_hwss();
+ else
+ return get_virtual_link_hwss();
+}
+
+bool is_h_timing_divisible_by_2(struct dc_stream_state *stream)
+{
+ bool divisible = false;
+ uint16_t h_blank_start = 0;
+ uint16_t h_blank_end = 0;
+
+ if (stream) {
+ h_blank_start = stream->timing.h_total - stream->timing.h_front_porch;
+ h_blank_end = h_blank_start - stream->timing.h_addressable;
+
+ /* HTOTAL, Hblank start/end, and Hsync start/end all must be
+ * divisible by 2 in order for the horizontal timing params
+ * to be considered divisible by 2. Hsync start is always 0.
+ */
+ divisible = (stream->timing.h_total % 2 == 0) &&
+ (h_blank_start % 2 == 0) &&
+ (h_blank_end % 2 == 0) &&
+ (stream->timing.h_sync_width % 2 == 0);
+ }
+ return divisible;
+}
+
+/* This interface is deprecated for new DCNs. It is replaced by the following
+ * new interfaces. These two interfaces encapsulate pipe selection priority
+ * with DCN specific minimum hardware transition optimization algorithm. With
+ * the new interfaces caller no longer needs to know the implementation detail
+ * of a pipe topology.
+ *
+ * resource_update_pipes_with_odm_slice_count
+ * resource_update_pipes_with_mpc_slice_count
+ *
+ */
+bool dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy(
+ const struct dc *dc,
+ struct dc_state *state,
+ struct pipe_ctx *pri_pipe,
+ struct pipe_ctx *sec_pipe,
+ bool odm)
+{
+ int pipe_idx = sec_pipe->pipe_idx;
+ struct pipe_ctx *sec_top, *sec_bottom, *sec_next, *sec_prev;
+ const struct resource_pool *pool = dc->res_pool;
+
+ sec_top = sec_pipe->top_pipe;
+ sec_bottom = sec_pipe->bottom_pipe;
+ sec_next = sec_pipe->next_odm_pipe;
+ sec_prev = sec_pipe->prev_odm_pipe;
+
+ if (pri_pipe == NULL)
+ return false;
+
+ *sec_pipe = *pri_pipe;
+
+ sec_pipe->top_pipe = sec_top;
+ sec_pipe->bottom_pipe = sec_bottom;
+ sec_pipe->next_odm_pipe = sec_next;
+ sec_pipe->prev_odm_pipe = sec_prev;
+
+ sec_pipe->pipe_idx = pipe_idx;
+ sec_pipe->plane_res.mi = pool->mis[pipe_idx];
+ sec_pipe->plane_res.hubp = pool->hubps[pipe_idx];
+ sec_pipe->plane_res.ipp = pool->ipps[pipe_idx];
+ sec_pipe->plane_res.xfm = pool->transforms[pipe_idx];
+ sec_pipe->plane_res.dpp = pool->dpps[pipe_idx];
+ sec_pipe->plane_res.mpcc_inst = pool->dpps[pipe_idx]->inst;
+ sec_pipe->stream_res.dsc = NULL;
+ if (odm) {
+ if (!sec_pipe->top_pipe)
+ sec_pipe->stream_res.opp = pool->opps[pipe_idx];
+ else
+ sec_pipe->stream_res.opp = sec_pipe->top_pipe->stream_res.opp;
+ if (sec_pipe->stream->timing.flags.DSC == 1) {
+#if defined(CONFIG_DRM_AMD_DC_FP)
+ dcn20_acquire_dsc(dc, &state->res_ctx, &sec_pipe->stream_res.dsc, sec_pipe->stream_res.opp->inst);
#endif
+ ASSERT(sec_pipe->stream_res.dsc);
+ if (sec_pipe->stream_res.dsc == NULL)
+ return false;
+ }
+#if defined(CONFIG_DRM_AMD_DC_FP)
+ dcn20_build_mapped_resource(dc, state, sec_pipe->stream);
+#endif
+ }
+
+ return true;
+}
+
+enum dc_status update_dp_encoder_resources_for_test_harness(const struct dc *dc,
+ struct dc_state *context,
+ struct pipe_ctx *pipe_ctx)
+{
+ if (dc->link_srv->dp_get_encoding_format(&pipe_ctx->link_config.dp_link_settings) == DP_128b_132b_ENCODING) {
+ if (pipe_ctx->stream_res.hpo_dp_stream_enc == NULL) {
+ pipe_ctx->stream_res.hpo_dp_stream_enc =
+ find_first_free_match_hpo_dp_stream_enc_for_link(
+ &context->res_ctx, dc->res_pool, pipe_ctx->stream);
+
+ if (!pipe_ctx->stream_res.hpo_dp_stream_enc)
+ return DC_NO_STREAM_ENC_RESOURCE;
+
+ update_hpo_dp_stream_engine_usage(
+ &context->res_ctx, dc->res_pool,
+ pipe_ctx->stream_res.hpo_dp_stream_enc,
+ true);
+ }
+
+ if (pipe_ctx->link_res.hpo_dp_link_enc == NULL) {
+ if (!add_hpo_dp_link_enc_to_ctx(&context->res_ctx, dc->res_pool, pipe_ctx, pipe_ctx->stream))
+ return DC_NO_LINK_ENC_RESOURCE;
+ }
+ } else {
+ if (pipe_ctx->stream_res.hpo_dp_stream_enc) {
+ update_hpo_dp_stream_engine_usage(
+ &context->res_ctx, dc->res_pool,
+ pipe_ctx->stream_res.hpo_dp_stream_enc,
+ false);
+ pipe_ctx->stream_res.hpo_dp_stream_enc = NULL;
+ }
+ if (pipe_ctx->link_res.hpo_dp_link_enc)
+ remove_hpo_dp_link_enc_from_ctx(&context->res_ctx, pipe_ctx, pipe_ctx->stream);
+ }
+
+ if (pipe_ctx->link_res.dio_link_enc == NULL && dc->config.unify_link_enc_assignment)
+ if (!add_dio_link_enc_to_ctx(dc, context, dc->res_pool, pipe_ctx, pipe_ctx->stream))
+ return DC_NO_LINK_ENC_RESOURCE;
+
+ return DC_OK;
+}
+
+struct dscl_prog_data *resource_get_dscl_prog_data(struct pipe_ctx *pipe_ctx)
+{
+ return &pipe_ctx->plane_res.scl_data.dscl_prog_data;
+}
+
+static bool resource_allocate_mcache(struct dc_state *context, const struct dc_mcache_params *mcache_params)
+{
+ if (context->clk_mgr->ctx->dc->res_pool->funcs->program_mcache_pipe_config)
+ context->clk_mgr->ctx->dc->res_pool->funcs->program_mcache_pipe_config(context, mcache_params);
+
+ return true;
+}
+
+void resource_init_common_dml2_callbacks(struct dc *dc, struct dml2_configuration_options *dml2_options)
+{
+ dml2_options->callbacks.dc = dc;
+ dml2_options->callbacks.build_scaling_params = &resource_build_scaling_params;
+ dml2_options->callbacks.build_test_pattern_params = &resource_build_test_pattern_params;
+ dml2_options->callbacks.acquire_secondary_pipe_for_mpc_odm = &dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy;
+ dml2_options->callbacks.update_pipes_for_stream_with_slice_count = &resource_update_pipes_for_stream_with_slice_count;
+ dml2_options->callbacks.update_pipes_for_plane_with_slice_count = &resource_update_pipes_for_plane_with_slice_count;
+ dml2_options->callbacks.get_mpc_slice_index = &resource_get_mpc_slice_index;
+ dml2_options->callbacks.get_mpc_slice_count = &resource_get_mpc_slice_count;
+ dml2_options->callbacks.get_odm_slice_index = &resource_get_odm_slice_index;
+ dml2_options->callbacks.get_odm_slice_count = &resource_get_odm_slice_count;
+ dml2_options->callbacks.get_opp_head = &resource_get_opp_head;
+ dml2_options->callbacks.get_otg_master_for_stream = &resource_get_otg_master_for_stream;
+ dml2_options->callbacks.get_opp_heads_for_otg_master = &resource_get_opp_heads_for_otg_master;
+ dml2_options->callbacks.get_dpp_pipes_for_plane = &resource_get_dpp_pipes_for_plane;
+ dml2_options->callbacks.get_stream_status = &dc_state_get_stream_status;
+ dml2_options->callbacks.get_stream_from_id = &dc_state_get_stream_from_id;
+ dml2_options->callbacks.get_max_flickerless_instant_vtotal_increase = &dc_stream_get_max_flickerless_instant_vtotal_increase;
+ dml2_options->callbacks.allocate_mcache = &resource_allocate_mcache;
+
+ dml2_options->svp_pstate.callbacks.dc = dc;
+ dml2_options->svp_pstate.callbacks.add_phantom_plane = &dc_state_add_phantom_plane;
+ dml2_options->svp_pstate.callbacks.add_phantom_stream = &dc_state_add_phantom_stream;
+ dml2_options->svp_pstate.callbacks.build_scaling_params = &resource_build_scaling_params;
+ dml2_options->svp_pstate.callbacks.create_phantom_plane = &dc_state_create_phantom_plane;
+ dml2_options->svp_pstate.callbacks.remove_phantom_plane = &dc_state_remove_phantom_plane;
+ dml2_options->svp_pstate.callbacks.remove_phantom_stream = &dc_state_remove_phantom_stream;
+ dml2_options->svp_pstate.callbacks.create_phantom_stream = &dc_state_create_phantom_stream;
+ dml2_options->svp_pstate.callbacks.release_phantom_plane = &dc_state_release_phantom_plane;
+ dml2_options->svp_pstate.callbacks.release_phantom_stream = &dc_state_release_phantom_stream;
+ dml2_options->svp_pstate.callbacks.get_pipe_subvp_type = &dc_state_get_pipe_subvp_type;
+ dml2_options->svp_pstate.callbacks.get_stream_subvp_type = &dc_state_get_stream_subvp_type;
+ dml2_options->svp_pstate.callbacks.get_paired_subvp_stream = &dc_state_get_paired_subvp_stream;
+ dml2_options->svp_pstate.callbacks.remove_phantom_streams_and_planes = &dc_state_remove_phantom_streams_and_planes;
+ dml2_options->svp_pstate.callbacks.release_phantom_streams_and_planes = &dc_state_release_phantom_streams_and_planes;
+}
+
+/* Returns number of DET segments allocated for a given OTG_MASTER pipe */
+int resource_calculate_det_for_stream(struct dc_state *state, struct pipe_ctx *otg_master)
+{
+ struct pipe_ctx *opp_heads[MAX_PIPES];
+ struct pipe_ctx *dpp_pipes[MAX_PIPES];
+
+ int dpp_count = 0;
+ int det_segments = 0;
+
+ if (!otg_master->stream)
+ return 0;
+
+ int slice_count = resource_get_opp_heads_for_otg_master(otg_master,
+ &state->res_ctx, opp_heads);
+
+ for (int slice_idx = 0; slice_idx < slice_count; slice_idx++) {
+ if (opp_heads[slice_idx]->plane_state) {
+ dpp_count = resource_get_dpp_pipes_for_opp_head(
+ opp_heads[slice_idx],
+ &state->res_ctx,
+ dpp_pipes);
+ for (int dpp_idx = 0; dpp_idx < dpp_count; dpp_idx++)
+ det_segments += dpp_pipes[dpp_idx]->hubp_regs.det_size;
+ }
+ }
+ return det_segments;
+}
+
+bool resource_is_hpo_acquired(struct dc_state *context)
+{
+ int i;
+
+ for (i = 0; i < MAX_HPO_DP2_ENCODERS; i++) {
+ if (context->res_ctx.is_hpo_dp_stream_enc_acquired[i]) {
+ return true;
+ }
+ }
+
+ return false;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_sink.c b/drivers/gpu/drm/amd/display/dc/core/dc_sink.c
index a249a0e5edd0..455fa5dd1420 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_sink.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_sink.c
@@ -23,8 +23,6 @@
*
*/
-#include <linux/slab.h>
-
#include "dm_services.h"
#include "dm_helpers.h"
#include "core_types.h"
@@ -33,14 +31,6 @@
* Private functions
******************************************************************************/
-static void dc_sink_destruct(struct dc_sink *sink)
-{
- if (sink->dc_container_id) {
- kfree(sink->dc_container_id);
- sink->dc_container_id = NULL;
- }
-}
-
static bool dc_sink_construct(struct dc_sink *sink, const struct dc_sink_init_data *init_params)
{
@@ -75,7 +65,7 @@ void dc_sink_retain(struct dc_sink *sink)
static void dc_sink_free(struct kref *kref)
{
struct dc_sink *sink = container_of(kref, struct dc_sink, refcount);
- dc_sink_destruct(sink);
+ kfree(sink->dc_container_id);
kfree(sink);
}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stat.c b/drivers/gpu/drm/amd/display/dc/core/dc_stat.c
index 4b372aa52801..f976ffd6d466 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stat.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stat.c
@@ -35,19 +35,15 @@
*/
/**
- *****************************************************************************
- * Function: dc_stat_get_dmub_notification
+ * dc_stat_get_dmub_notification
*
- * @brief
- * Calls dmub layer to retrieve dmub notification
+ * Calls dmub layer to retrieve dmub notification
*
- * @param
- * [in] dc: dc structure
- * [in] notify: dmub notification structure
+ * @dc: dc structure
+ * @notify: dmub notification structure
*
- * @return
+ * Returns
* None
- *****************************************************************************
*/
void dc_stat_get_dmub_notification(const struct dc *dc, struct dmub_notification *notify)
{
@@ -65,26 +61,24 @@ void dc_stat_get_dmub_notification(const struct dc *dc, struct dmub_notification
/* For HPD/HPD RX, convert dpia port index into link index */
if (notify->type == DMUB_NOTIFICATION_HPD ||
notify->type == DMUB_NOTIFICATION_HPD_IRQ ||
+ notify->type == DMUB_NOTIFICATION_AUX_REPLY ||
+ notify->type == DMUB_NOTIFICATION_DPIA_NOTIFICATION ||
notify->type == DMUB_NOTIFICATION_SET_CONFIG_REPLY) {
notify->link_index =
- get_link_index_from_dpia_port_index(dc, notify->link_index);
+ get_link_index_from_dpia_port_index(dc, notify->instance);
}
}
/**
- *****************************************************************************
- * Function: dc_stat_get_dmub_dataout
+ * dc_stat_get_dmub_dataout
*
- * @brief
- * Calls dmub layer to retrieve dmub gpint dataout
+ * Calls dmub layer to retrieve dmub gpint dataout
*
- * @param
- * [in] dc: dc structure
- * [in] dataout: dmub gpint dataout
+ * @dc: dc structure
+ * @dataout: dmub gpint dataout
*
- * @return
+ * Returns
* None
- *****************************************************************************
*/
void dc_stat_get_dmub_dataout(const struct dc *dc, uint32_t *dataout)
{
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
new file mode 100644
index 000000000000..2de8ef4a58ec
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
@@ -0,0 +1,1077 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#include "dc_types.h"
+#include "core_types.h"
+#include "core_status.h"
+#include "dc_state.h"
+#include "dc_state_priv.h"
+#include "dc_stream_priv.h"
+#include "dc_plane_priv.h"
+
+#include "dm_services.h"
+#include "resource.h"
+#include "link_enc_cfg.h"
+
+#if defined(CONFIG_DRM_AMD_DC_FP)
+#include "dml2_0/dml2_wrapper.h"
+#include "dml2_0/dml2_internal_types.h"
+#endif
+
+#define DC_LOGGER \
+ dc->ctx->logger
+#define DC_LOGGER_INIT(logger)
+
+/* Private dc_state helper functions */
+static bool dc_state_track_phantom_stream(struct dc_state *state,
+ struct dc_stream_state *phantom_stream)
+{
+ if (state->phantom_stream_count >= MAX_PHANTOM_PIPES)
+ return false;
+
+ state->phantom_streams[state->phantom_stream_count++] = phantom_stream;
+
+ return true;
+}
+
+static bool dc_state_untrack_phantom_stream(struct dc_state *state, struct dc_stream_state *phantom_stream)
+{
+ bool res = false;
+ int i;
+
+ /* first find phantom stream in the dc_state */
+ for (i = 0; i < state->phantom_stream_count; i++) {
+ if (state->phantom_streams[i] == phantom_stream) {
+ state->phantom_streams[i] = NULL;
+ res = true;
+ break;
+ }
+ }
+
+ /* failed to find stream in state */
+ if (!res)
+ return res;
+
+ /* trim back phantom streams */
+ state->phantom_stream_count--;
+ for (; i < state->phantom_stream_count; i++)
+ state->phantom_streams[i] = state->phantom_streams[i + 1];
+
+ return res;
+}
+
+static bool dc_state_is_phantom_stream_tracked(struct dc_state *state, struct dc_stream_state *phantom_stream)
+{
+ int i;
+
+ for (i = 0; i < state->phantom_stream_count; i++) {
+ if (state->phantom_streams[i] == phantom_stream)
+ return true;
+ }
+
+ return false;
+}
+
+static bool dc_state_track_phantom_plane(struct dc_state *state,
+ struct dc_plane_state *phantom_plane)
+{
+ if (state->phantom_plane_count >= MAX_PHANTOM_PIPES)
+ return false;
+
+ state->phantom_planes[state->phantom_plane_count++] = phantom_plane;
+
+ return true;
+}
+
+static bool dc_state_untrack_phantom_plane(struct dc_state *state, struct dc_plane_state *phantom_plane)
+{
+ bool res = false;
+ int i;
+
+ /* first find phantom plane in the dc_state */
+ for (i = 0; i < state->phantom_plane_count; i++) {
+ if (state->phantom_planes[i] == phantom_plane) {
+ state->phantom_planes[i] = NULL;
+ res = true;
+ break;
+ }
+ }
+
+ /* failed to find plane in state */
+ if (!res)
+ return res;
+
+ /* trim back phantom planes */
+ state->phantom_plane_count--;
+ for (; i < state->phantom_plane_count; i++)
+ state->phantom_planes[i] = state->phantom_planes[i + 1];
+
+ return res;
+}
+
+static bool dc_state_is_phantom_plane_tracked(struct dc_state *state, struct dc_plane_state *phantom_plane)
+{
+ int i;
+
+ for (i = 0; i < state->phantom_plane_count; i++) {
+ if (state->phantom_planes[i] == phantom_plane)
+ return true;
+ }
+
+ return false;
+}
+
+static void dc_state_copy_internal(struct dc_state *dst_state, struct dc_state *src_state)
+{
+ int i, j;
+
+ memcpy(dst_state, src_state, sizeof(struct dc_state));
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ struct pipe_ctx *cur_pipe = &dst_state->res_ctx.pipe_ctx[i];
+
+ if (cur_pipe->top_pipe)
+ cur_pipe->top_pipe = &dst_state->res_ctx.pipe_ctx[cur_pipe->top_pipe->pipe_idx];
+
+ if (cur_pipe->bottom_pipe)
+ cur_pipe->bottom_pipe = &dst_state->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx];
+
+ if (cur_pipe->prev_odm_pipe)
+ cur_pipe->prev_odm_pipe = &dst_state->res_ctx.pipe_ctx[cur_pipe->prev_odm_pipe->pipe_idx];
+
+ if (cur_pipe->next_odm_pipe)
+ cur_pipe->next_odm_pipe = &dst_state->res_ctx.pipe_ctx[cur_pipe->next_odm_pipe->pipe_idx];
+ }
+
+ /* retain phantoms */
+ for (i = 0; i < dst_state->phantom_stream_count; i++)
+ dc_stream_retain(dst_state->phantom_streams[i]);
+
+ for (i = 0; i < dst_state->phantom_plane_count; i++)
+ dc_plane_state_retain(dst_state->phantom_planes[i]);
+
+ /* retain streams and planes */
+ for (i = 0; i < dst_state->stream_count; i++) {
+ dc_stream_retain(dst_state->streams[i]);
+ for (j = 0; j < dst_state->stream_status[i].plane_count; j++)
+ dc_plane_state_retain(
+ dst_state->stream_status[i].plane_states[j]);
+ }
+
+}
+
+static void init_state(struct dc *dc, struct dc_state *state)
+{
+ /* Each context must have their own instance of VBA and in order to
+ * initialize and obtain IP and SOC the base DML instance from DC is
+ * initially copied into every context
+ */
+ memcpy(&state->bw_ctx.dml, &dc->dml, sizeof(struct display_mode_lib));
+}
+
+/* Public dc_state functions */
+struct dc_state *dc_state_create(struct dc *dc, struct dc_state_create_params *params)
+{
+ struct dc_state *state;
+
+ state = kvzalloc(sizeof(struct dc_state), GFP_KERNEL);
+
+ if (!state)
+ return NULL;
+
+ init_state(dc, state);
+ dc_state_construct(dc, state);
+ state->power_source = params ? params->power_source : DC_POWER_SOURCE_AC;
+
+#ifdef CONFIG_DRM_AMD_DC_FP
+ if (dc->debug.using_dml2) {
+ if (!dml2_create(dc, &dc->dml2_options, &state->bw_ctx.dml2)) {
+ dc_state_release(state);
+ return NULL;
+ }
+
+ if (dc->caps.dcmode_power_limits_present && !dml2_create(dc, &dc->dml2_dc_power_options, &state->bw_ctx.dml2_dc_power_source)) {
+ dc_state_release(state);
+ return NULL;
+ }
+ }
+#endif
+
+ kref_init(&state->refcount);
+
+ return state;
+}
+
+void dc_state_copy(struct dc_state *dst_state, struct dc_state *src_state)
+{
+ struct kref refcount = dst_state->refcount;
+#ifdef CONFIG_DRM_AMD_DC_FP
+ struct dml2_context *dst_dml2 = dst_state->bw_ctx.dml2;
+ struct dml2_context *dst_dml2_dc_power_source = dst_state->bw_ctx.dml2_dc_power_source;
+#endif
+
+ dc_state_copy_internal(dst_state, src_state);
+
+#ifdef CONFIG_DRM_AMD_DC_FP
+ dst_state->bw_ctx.dml2 = dst_dml2;
+ if (src_state->bw_ctx.dml2)
+ dml2_copy(dst_state->bw_ctx.dml2, src_state->bw_ctx.dml2);
+
+ dst_state->bw_ctx.dml2_dc_power_source = dst_dml2_dc_power_source;
+ if (src_state->bw_ctx.dml2_dc_power_source)
+ dml2_copy(dst_state->bw_ctx.dml2_dc_power_source, src_state->bw_ctx.dml2_dc_power_source);
+#endif
+
+ /* context refcount should not be overridden */
+ dst_state->refcount = refcount;
+}
+
+struct dc_state *dc_state_create_copy(struct dc_state *src_state)
+{
+ struct dc_state *new_state;
+
+ new_state = kvmalloc(sizeof(struct dc_state),
+ GFP_KERNEL);
+ if (!new_state)
+ return NULL;
+
+ dc_state_copy_internal(new_state, src_state);
+
+#ifdef CONFIG_DRM_AMD_DC_FP
+ new_state->bw_ctx.dml2 = NULL;
+ new_state->bw_ctx.dml2_dc_power_source = NULL;
+
+ if (src_state->bw_ctx.dml2 &&
+ !dml2_create_copy(&new_state->bw_ctx.dml2, src_state->bw_ctx.dml2)) {
+ dc_state_release(new_state);
+ return NULL;
+ }
+
+ if (src_state->bw_ctx.dml2_dc_power_source &&
+ !dml2_create_copy(&new_state->bw_ctx.dml2_dc_power_source, src_state->bw_ctx.dml2_dc_power_source)) {
+ dc_state_release(new_state);
+ return NULL;
+ }
+#endif
+
+ kref_init(&new_state->refcount);
+
+ return new_state;
+}
+
+void dc_state_copy_current(struct dc *dc, struct dc_state *dst_state)
+{
+ dc_state_copy(dst_state, dc->current_state);
+}
+
+struct dc_state *dc_state_create_current_copy(struct dc *dc)
+{
+ return dc_state_create_copy(dc->current_state);
+}
+
+void dc_state_construct(struct dc *dc, struct dc_state *state)
+{
+ state->clk_mgr = dc->clk_mgr;
+
+ /* Initialise DIG link encoder resource tracking variables. */
+ if (dc->res_pool)
+ link_enc_cfg_init(dc, state);
+}
+
+void dc_state_destruct(struct dc_state *state)
+{
+ int i, j;
+
+ for (i = 0; i < state->stream_count; i++) {
+ for (j = 0; j < state->stream_status[i].plane_count; j++)
+ dc_plane_state_release(
+ state->stream_status[i].plane_states[j]);
+
+ state->stream_status[i].plane_count = 0;
+ dc_stream_release(state->streams[i]);
+ state->streams[i] = NULL;
+ }
+ state->stream_count = 0;
+
+ /* release tracked phantoms */
+ for (i = 0; i < state->phantom_stream_count; i++) {
+ dc_stream_release(state->phantom_streams[i]);
+ state->phantom_streams[i] = NULL;
+ }
+ state->phantom_stream_count = 0;
+
+ for (i = 0; i < state->phantom_plane_count; i++) {
+ dc_plane_state_release(state->phantom_planes[i]);
+ state->phantom_planes[i] = NULL;
+ }
+ state->phantom_plane_count = 0;
+
+ state->stream_mask = 0;
+ memset(&state->res_ctx, 0, sizeof(state->res_ctx));
+ memset(&state->pp_display_cfg, 0, sizeof(state->pp_display_cfg));
+ memset(&state->dcn_bw_vars, 0, sizeof(state->dcn_bw_vars));
+ state->clk_mgr = NULL;
+ memset(&state->bw_ctx.bw, 0, sizeof(state->bw_ctx.bw));
+ memset(state->block_sequence, 0, sizeof(state->block_sequence));
+ state->block_sequence_steps = 0;
+ memset(state->dc_dmub_cmd, 0, sizeof(state->dc_dmub_cmd));
+ state->dmub_cmd_count = 0;
+ memset(&state->perf_params, 0, sizeof(state->perf_params));
+}
+
+void dc_state_retain(struct dc_state *state)
+{
+ kref_get(&state->refcount);
+}
+
+static void dc_state_free(struct kref *kref)
+{
+ struct dc_state *state = container_of(kref, struct dc_state, refcount);
+
+ dc_state_destruct(state);
+
+#ifdef CONFIG_DRM_AMD_DC_FP
+ dml2_destroy(state->bw_ctx.dml2);
+ state->bw_ctx.dml2 = 0;
+
+ dml2_destroy(state->bw_ctx.dml2_dc_power_source);
+ state->bw_ctx.dml2_dc_power_source = 0;
+#endif
+
+ kvfree(state);
+}
+
+void dc_state_release(struct dc_state *state)
+{
+ if (state != NULL)
+ kref_put(&state->refcount, dc_state_free);
+}
+/*
+ * dc_state_add_stream() - Add a new dc_stream_state to a dc_state.
+ */
+enum dc_status dc_state_add_stream(
+ const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *stream)
+{
+ enum dc_status res;
+
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ if (state->stream_count >= dc->res_pool->timing_generator_count) {
+ DC_LOG_WARNING("Max streams reached, can't add stream %p !\n", stream);
+ return DC_ERROR_UNEXPECTED;
+ }
+
+ state->streams[state->stream_count] = stream;
+ dc_stream_retain(stream);
+ state->stream_count++;
+
+ res = resource_add_otg_master_for_stream_output(
+ state, dc->res_pool, stream);
+ if (res != DC_OK)
+ DC_LOG_WARNING("Adding stream %p to context failed with err %d!\n", stream, res);
+
+ return res;
+}
+
+/*
+ * dc_state_remove_stream() - Remove a stream from a dc_state.
+ */
+enum dc_status dc_state_remove_stream(
+ const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *stream)
+{
+ int i;
+ struct pipe_ctx *del_pipe = resource_get_otg_master_for_stream(
+ &state->res_ctx, stream);
+
+ if (!del_pipe) {
+ dm_error("Pipe not found for stream %p !\n", stream);
+ return DC_ERROR_UNEXPECTED;
+ }
+
+ resource_update_pipes_for_stream_with_slice_count(state,
+ dc->current_state, dc->res_pool, stream, 1);
+ resource_remove_otg_master_for_stream_output(
+ state, dc->res_pool, stream);
+
+ for (i = 0; i < state->stream_count; i++)
+ if (state->streams[i] == stream)
+ break;
+
+ if (state->streams[i] != stream) {
+ dm_error("Context doesn't have stream %p !\n", stream);
+ return DC_ERROR_UNEXPECTED;
+ }
+
+ dc_stream_release_3dlut_for_stream(dc, stream);
+
+ dc_stream_release(state->streams[i]);
+ state->stream_count--;
+
+ /* Trim back arrays */
+ for (; i < state->stream_count; i++) {
+ state->streams[i] = state->streams[i + 1];
+ state->stream_status[i] = state->stream_status[i + 1];
+ }
+
+ state->streams[state->stream_count] = NULL;
+ memset(
+ &state->stream_status[state->stream_count],
+ 0,
+ sizeof(state->stream_status[0]));
+
+ return DC_OK;
+}
+
+static void remove_mpc_combine_for_stream(const struct dc *dc,
+ struct dc_state *new_ctx,
+ const struct dc_state *cur_ctx,
+ struct dc_stream_status *status)
+{
+ int i;
+
+ for (i = 0; i < status->plane_count; i++)
+ resource_update_pipes_for_plane_with_slice_count(
+ new_ctx, cur_ctx, dc->res_pool,
+ status->plane_states[i], 1);
+}
+
+bool dc_state_add_plane(
+ const struct dc *dc,
+ struct dc_stream_state *stream,
+ struct dc_plane_state *plane_state,
+ struct dc_state *state)
+{
+ struct resource_pool *pool = dc->res_pool;
+ struct pipe_ctx *otg_master_pipe;
+ struct dc_stream_status *stream_status = NULL;
+ bool added = false;
+ int odm_slice_count;
+ int i;
+
+ stream_status = dc_state_get_stream_status(state, stream);
+ otg_master_pipe = resource_get_otg_master_for_stream(
+ &state->res_ctx, stream);
+ if (stream_status == NULL) {
+ dm_error("Existing stream not found; failed to attach surface!\n");
+ goto out;
+ } else if (stream_status->plane_count == MAX_SURFACES) {
+ dm_error("Surface: can not attach plane_state %p! Maximum is: %d\n",
+ plane_state, MAX_SURFACES);
+ goto out;
+ } else if (!otg_master_pipe) {
+ goto out;
+ }
+
+ added = resource_append_dpp_pipes_for_plane_composition(state,
+ dc->current_state, pool, otg_master_pipe, plane_state);
+
+ if (!added) {
+ /* try to remove MPC combine to free up pipes */
+ for (i = 0; i < state->stream_count; i++)
+ remove_mpc_combine_for_stream(dc, state,
+ dc->current_state,
+ &state->stream_status[i]);
+ added = resource_append_dpp_pipes_for_plane_composition(state,
+ dc->current_state, pool,
+ otg_master_pipe, plane_state);
+ }
+
+ if (!added) {
+ /* try to decrease ODM slice count gradually to free up pipes */
+ odm_slice_count = resource_get_odm_slice_count(otg_master_pipe);
+ for (i = odm_slice_count - 1; i > 0; i--) {
+ resource_update_pipes_for_stream_with_slice_count(state,
+ dc->current_state, dc->res_pool, stream,
+ i);
+ added = resource_append_dpp_pipes_for_plane_composition(
+ state,
+ dc->current_state, pool,
+ otg_master_pipe, plane_state);
+ if (added)
+ break;
+ }
+ }
+
+ if (added) {
+ stream_status->plane_states[stream_status->plane_count] =
+ plane_state;
+ stream_status->plane_count++;
+ dc_plane_state_retain(plane_state);
+ }
+
+out:
+ return added;
+}
+
+bool dc_state_remove_plane(
+ const struct dc *dc,
+ struct dc_stream_state *stream,
+ struct dc_plane_state *plane_state,
+ struct dc_state *state)
+{
+ int i;
+ struct dc_stream_status *stream_status = NULL;
+ struct resource_pool *pool = dc->res_pool;
+
+ if (!plane_state)
+ return true;
+
+ for (i = 0; i < state->stream_count; i++)
+ if (state->streams[i] == stream) {
+ stream_status = &state->stream_status[i];
+ break;
+ }
+
+ if (stream_status == NULL) {
+ dm_error("Existing stream not found; failed to remove plane.\n");
+ return false;
+ }
+
+ resource_remove_dpp_pipes_for_plane_composition(
+ state, pool, plane_state);
+
+ for (i = 0; i < stream_status->plane_count; i++) {
+ if (stream_status->plane_states[i] == plane_state) {
+ dc_plane_state_release(stream_status->plane_states[i]);
+ break;
+ }
+ }
+
+ if (i == stream_status->plane_count) {
+ dm_error("Existing plane_state not found; failed to detach it!\n");
+ return false;
+ }
+
+ stream_status->plane_count--;
+
+ /* Start at the plane we've just released, and move all the planes one index forward to "trim" the array */
+ for (; i < stream_status->plane_count; i++)
+ stream_status->plane_states[i] = stream_status->plane_states[i + 1];
+
+ stream_status->plane_states[stream_status->plane_count] = NULL;
+
+ return true;
+}
+
+/**
+ * dc_state_rem_all_planes_for_stream - Remove planes attached to the target stream.
+ *
+ * @dc: Current dc state.
+ * @stream: Target stream, which we want to remove the attached plans.
+ * @state: context from which the planes are to be removed.
+ *
+ * Return:
+ * Return true if DC was able to remove all planes from the target
+ * stream, otherwise, return false.
+ */
+bool dc_state_rem_all_planes_for_stream(
+ const struct dc *dc,
+ struct dc_stream_state *stream,
+ struct dc_state *state)
+{
+ int i, old_plane_count;
+ struct dc_stream_status *stream_status = NULL;
+ struct dc_plane_state *del_planes[MAX_SURFACES] = { 0 };
+
+ for (i = 0; i < state->stream_count; i++)
+ if (state->streams[i] == stream) {
+ stream_status = &state->stream_status[i];
+ break;
+ }
+
+ if (stream_status == NULL) {
+ dm_error("Existing stream %p not found!\n", stream);
+ return false;
+ }
+
+ old_plane_count = stream_status->plane_count;
+
+ for (i = 0; i < old_plane_count; i++)
+ del_planes[i] = stream_status->plane_states[i];
+
+ for (i = 0; i < old_plane_count; i++)
+ if (!dc_state_remove_plane(dc, stream, del_planes[i], state))
+ return false;
+
+ return true;
+}
+
+bool dc_state_add_all_planes_for_stream(
+ const struct dc *dc,
+ struct dc_stream_state *stream,
+ struct dc_plane_state * const *plane_states,
+ int plane_count,
+ struct dc_state *state)
+{
+ int i;
+ bool result = true;
+
+ for (i = 0; i < plane_count; i++)
+ if (!dc_state_add_plane(dc, stream, plane_states[i], state)) {
+ result = false;
+ break;
+ }
+
+ return result;
+}
+
+/* Private dc_state functions */
+
+/**
+ * dc_state_get_stream_status - Get stream status from given dc state
+ * @state: DC state to find the stream status in
+ * @stream: The stream to get the stream status for
+ *
+ * The given stream is expected to exist in the given dc state. Otherwise, NULL
+ * will be returned.
+ */
+struct dc_stream_status *dc_state_get_stream_status(
+ struct dc_state *state,
+ const struct dc_stream_state *stream)
+{
+ uint8_t i;
+
+ if (state == NULL)
+ return NULL;
+
+ for (i = 0; i < state->stream_count; i++) {
+ if (stream == state->streams[i])
+ return &state->stream_status[i];
+ }
+
+ return NULL;
+}
+
+enum mall_stream_type dc_state_get_pipe_subvp_type(const struct dc_state *state,
+ const struct pipe_ctx *pipe_ctx)
+{
+ return dc_state_get_stream_subvp_type(state, pipe_ctx->stream);
+}
+
+enum mall_stream_type dc_state_get_stream_subvp_type(const struct dc_state *state,
+ const struct dc_stream_state *stream)
+{
+ int i;
+
+ enum mall_stream_type type = SUBVP_NONE;
+
+ for (i = 0; i < state->stream_count; i++) {
+ if (state->streams[i] == stream) {
+ type = state->stream_status[i].mall_stream_config.type;
+ break;
+ }
+ }
+
+ return type;
+}
+
+struct dc_stream_state *dc_state_get_paired_subvp_stream(const struct dc_state *state,
+ const struct dc_stream_state *stream)
+{
+ int i;
+
+ struct dc_stream_state *paired_stream = NULL;
+
+ for (i = 0; i < state->stream_count; i++) {
+ if (state->streams[i] == stream) {
+ paired_stream = state->stream_status[i].mall_stream_config.paired_stream;
+ break;
+ }
+ }
+
+ return paired_stream;
+}
+
+struct dc_stream_state *dc_state_create_phantom_stream(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *main_stream)
+{
+ struct dc_stream_state *phantom_stream;
+
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ phantom_stream = dc_create_stream_for_sink(main_stream->sink);
+
+ if (!phantom_stream) {
+ DC_LOG_ERROR("Failed to allocate phantom stream.\n");
+ return NULL;
+ }
+
+ /* track phantom stream in dc_state */
+ dc_state_track_phantom_stream(state, phantom_stream);
+
+ phantom_stream->is_phantom = true;
+ phantom_stream->signal = SIGNAL_TYPE_VIRTUAL;
+ phantom_stream->dpms_off = true;
+
+ return phantom_stream;
+}
+
+void dc_state_release_phantom_stream(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *phantom_stream)
+{
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ if (!dc_state_untrack_phantom_stream(state, phantom_stream)) {
+ DC_LOG_ERROR("Failed to free phantom stream %p in dc state %p.\n", phantom_stream, state);
+ return;
+ }
+
+ dc_stream_release(phantom_stream);
+}
+
+struct dc_plane_state *dc_state_create_phantom_plane(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_plane_state *main_plane)
+{
+ struct dc_plane_state *phantom_plane = dc_create_plane_state(dc);
+
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ if (!phantom_plane) {
+ DC_LOG_ERROR("Failed to allocate phantom plane.\n");
+ return NULL;
+ }
+
+ /* track phantom inside dc_state */
+ dc_state_track_phantom_plane(state, phantom_plane);
+
+ phantom_plane->is_phantom = true;
+
+ return phantom_plane;
+}
+
+void dc_state_release_phantom_plane(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_plane_state *phantom_plane)
+{
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ if (!dc_state_untrack_phantom_plane(state, phantom_plane)) {
+ DC_LOG_ERROR("Failed to free phantom plane %p in dc state %p.\n", phantom_plane, state);
+ return;
+ }
+
+ dc_plane_state_release(phantom_plane);
+}
+
+/* add phantom streams to context and generate correct meta inside dc_state */
+enum dc_status dc_state_add_phantom_stream(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *phantom_stream,
+ struct dc_stream_state *main_stream)
+{
+ struct dc_stream_status *main_stream_status;
+ struct dc_stream_status *phantom_stream_status;
+ enum dc_status res = dc_state_add_stream(dc, state, phantom_stream);
+
+ /* check if stream is tracked */
+ if (res == DC_OK && !dc_state_is_phantom_stream_tracked(state, phantom_stream)) {
+ /* stream must be tracked if added to state */
+ dc_state_track_phantom_stream(state, phantom_stream);
+ }
+
+ /* setup subvp meta */
+ main_stream_status = dc_state_get_stream_status(state, main_stream);
+ if (main_stream_status) {
+ main_stream_status->mall_stream_config.type = SUBVP_MAIN;
+ main_stream_status->mall_stream_config.paired_stream = phantom_stream;
+ }
+
+ phantom_stream_status = dc_state_get_stream_status(state, phantom_stream);
+ if (phantom_stream_status) {
+ phantom_stream_status->mall_stream_config.type = SUBVP_PHANTOM;
+ phantom_stream_status->mall_stream_config.paired_stream = main_stream;
+ phantom_stream_status->mall_stream_config.subvp_limit_cursor_size = false;
+ phantom_stream_status->mall_stream_config.cursor_size_limit_subvp = false;
+ }
+
+ dc_state_set_stream_subvp_cursor_limit(main_stream, state, true);
+
+ return res;
+}
+
+enum dc_status dc_state_remove_phantom_stream(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *phantom_stream)
+{
+ struct dc_stream_status *main_stream_status = NULL;
+ struct dc_stream_status *phantom_stream_status;
+
+ /* reset subvp meta */
+ phantom_stream_status = dc_state_get_stream_status(state, phantom_stream);
+ if (phantom_stream_status) {
+ main_stream_status = dc_state_get_stream_status(state, phantom_stream_status->mall_stream_config.paired_stream);
+ phantom_stream_status->mall_stream_config.type = SUBVP_NONE;
+ phantom_stream_status->mall_stream_config.paired_stream = NULL;
+ }
+
+ if (main_stream_status) {
+ main_stream_status->mall_stream_config.type = SUBVP_NONE;
+ main_stream_status->mall_stream_config.paired_stream = NULL;
+ }
+
+ /* remove stream from state */
+ return dc_state_remove_stream(dc, state, phantom_stream);
+}
+
+bool dc_state_add_phantom_plane(
+ const struct dc *dc,
+ struct dc_stream_state *phantom_stream,
+ struct dc_plane_state *phantom_plane,
+ struct dc_state *state)
+{
+ bool res = dc_state_add_plane(dc, phantom_stream, phantom_plane, state);
+
+ /* check if stream is tracked */
+ if (res && !dc_state_is_phantom_plane_tracked(state, phantom_plane)) {
+ /* stream must be tracked if added to state */
+ dc_state_track_phantom_plane(state, phantom_plane);
+ }
+
+ return res;
+}
+
+bool dc_state_remove_phantom_plane(
+ const struct dc *dc,
+ struct dc_stream_state *phantom_stream,
+ struct dc_plane_state *phantom_plane,
+ struct dc_state *state)
+{
+ return dc_state_remove_plane(dc, phantom_stream, phantom_plane, state);
+}
+
+bool dc_state_rem_all_phantom_planes_for_stream(
+ const struct dc *dc,
+ struct dc_stream_state *phantom_stream,
+ struct dc_state *state,
+ bool should_release_planes)
+{
+ int i, old_plane_count;
+ struct dc_stream_status *stream_status = NULL;
+ struct dc_plane_state *del_planes[MAX_SURFACES] = { 0 };
+
+ for (i = 0; i < state->stream_count; i++)
+ if (state->streams[i] == phantom_stream) {
+ stream_status = &state->stream_status[i];
+ break;
+ }
+
+ if (stream_status == NULL) {
+ dm_error("Existing stream %p not found!\n", phantom_stream);
+ return false;
+ }
+
+ old_plane_count = stream_status->plane_count;
+
+ for (i = 0; i < old_plane_count; i++)
+ del_planes[i] = stream_status->plane_states[i];
+
+ for (i = 0; i < old_plane_count; i++) {
+ if (!dc_state_remove_plane(dc, phantom_stream, del_planes[i], state))
+ return false;
+ if (should_release_planes)
+ dc_state_release_phantom_plane(dc, state, del_planes[i]);
+ }
+
+ return true;
+}
+
+bool dc_state_add_all_phantom_planes_for_stream(
+ const struct dc *dc,
+ struct dc_stream_state *phantom_stream,
+ struct dc_plane_state * const *phantom_planes,
+ int plane_count,
+ struct dc_state *state)
+{
+ return dc_state_add_all_planes_for_stream(dc, phantom_stream, phantom_planes, plane_count, state);
+}
+
+bool dc_state_remove_phantom_streams_and_planes(
+ const struct dc *dc,
+ struct dc_state *state)
+{
+ int i;
+ bool removed_phantom = false;
+ struct dc_stream_state *phantom_stream = NULL;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i];
+
+ if (pipe->plane_state && pipe->stream && dc_state_get_pipe_subvp_type(state, pipe) == SUBVP_PHANTOM) {
+ phantom_stream = pipe->stream;
+
+ dc_state_rem_all_phantom_planes_for_stream(dc, phantom_stream, state, false);
+ dc_state_remove_phantom_stream(dc, state, phantom_stream);
+ removed_phantom = true;
+ }
+ }
+ return removed_phantom;
+}
+
+void dc_state_release_phantom_streams_and_planes(
+ const struct dc *dc,
+ struct dc_state *state)
+{
+ unsigned int phantom_count;
+ struct dc_stream_state *phantom_streams[MAX_PHANTOM_PIPES];
+ struct dc_plane_state *phantom_planes[MAX_PHANTOM_PIPES];
+ int i;
+
+ phantom_count = state->phantom_stream_count;
+ memcpy(phantom_streams, state->phantom_streams, sizeof(struct dc_stream_state *) * MAX_PHANTOM_PIPES);
+ for (i = 0; i < phantom_count; i++)
+ dc_state_release_phantom_stream(dc, state, phantom_streams[i]);
+
+ phantom_count = state->phantom_plane_count;
+ memcpy(phantom_planes, state->phantom_planes, sizeof(struct dc_plane_state *) * MAX_PHANTOM_PIPES);
+ for (i = 0; i < phantom_count; i++)
+ dc_state_release_phantom_plane(dc, state, phantom_planes[i]);
+}
+
+struct dc_stream_state *dc_state_get_stream_from_id(const struct dc_state *state, unsigned int id)
+{
+ struct dc_stream_state *stream = NULL;
+ int i;
+
+ for (i = 0; i < state->stream_count; i++) {
+ if (state->streams[i] && state->streams[i]->stream_id == id) {
+ stream = state->streams[i];
+ break;
+ }
+ }
+
+ return stream;
+}
+
+bool dc_state_is_fams2_in_use(
+ const struct dc *dc,
+ const struct dc_state *state)
+{
+ bool is_fams2_in_use = false;
+
+ if (state)
+ is_fams2_in_use |= state->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable;
+
+ if (dc->current_state)
+ is_fams2_in_use |= dc->current_state->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable;
+
+ return is_fams2_in_use;
+}
+
+void dc_state_set_stream_subvp_cursor_limit(const struct dc_stream_state *stream,
+ struct dc_state *state,
+ bool limit)
+{
+ struct dc_stream_status *stream_status;
+
+ stream_status = dc_state_get_stream_status(state, stream);
+
+ if (stream_status) {
+ stream_status->mall_stream_config.subvp_limit_cursor_size = limit;
+ }
+}
+
+bool dc_state_get_stream_subvp_cursor_limit(const struct dc_stream_state *stream,
+ struct dc_state *state)
+{
+ bool limit = false;
+
+ struct dc_stream_status *stream_status;
+
+ stream_status = dc_state_get_stream_status(state, stream);
+
+ if (stream_status) {
+ limit = stream_status->mall_stream_config.subvp_limit_cursor_size;
+ }
+
+ return limit;
+}
+
+void dc_state_set_stream_cursor_subvp_limit(const struct dc_stream_state *stream,
+ struct dc_state *state,
+ bool limit)
+{
+ struct dc_stream_status *stream_status;
+
+ stream_status = dc_state_get_stream_status(state, stream);
+
+ if (stream_status) {
+ stream_status->mall_stream_config.cursor_size_limit_subvp = limit;
+ }
+}
+
+bool dc_state_get_stream_cursor_subvp_limit(const struct dc_stream_state *stream,
+ struct dc_state *state)
+{
+ bool limit = false;
+
+ struct dc_stream_status *stream_status;
+
+ stream_status = dc_state_get_stream_status(state, stream);
+
+ if (stream_status) {
+ limit = stream_status->mall_stream_config.cursor_size_limit_subvp;
+ }
+
+ return limit;
+}
+
+bool dc_state_can_clear_stream_cursor_subvp_limit(const struct dc_stream_state *stream,
+ struct dc_state *state)
+{
+ bool can_clear_limit = false;
+
+ struct dc_stream_status *stream_status;
+
+ stream_status = dc_state_get_stream_status(state, stream);
+
+ if (stream_status) {
+ can_clear_limit = dc_state_get_stream_cursor_subvp_limit(stream, state) &&
+ (stream_status->mall_stream_config.type == SUBVP_PHANTOM ||
+ stream->hw_cursor_req ||
+ !stream_status->mall_stream_config.subvp_limit_cursor_size ||
+ !stream->cursor_position.enable ||
+ dc_stream_check_cursor_attributes(stream, state, &stream->cursor_attributes));
+ }
+
+ return can_clear_limit;
+}
+
+bool dc_state_is_subvp_in_use(struct dc_state *state)
+{
+ uint32_t i;
+
+ for (i = 0; i < state->stream_count; i++) {
+ if (dc_state_get_stream_subvp_type(state, state->streams[i]) != SUBVP_NONE)
+ return true;
+ }
+
+ return false;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index 57cf4cb82370..129cd5f84983 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -23,9 +23,6 @@
*
*/
-#include <linux/delay.h>
-#include <linux/slab.h>
-
#include "dm_services.h"
#include "basics/dc_common.h"
#include "dc.h"
@@ -33,8 +30,17 @@
#include "resource.h"
#include "ipp.h"
#include "timing_generator.h"
+#include "dc_dmub_srv.h"
+#include "dc_state_priv.h"
+#include "dc_stream_priv.h"
#define DC_LOGGER dc->ctx->logger
+#ifndef MIN
+#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+#endif
+#ifndef MAX
+#define MAX(x, y) ((x > y) ? x : y)
+#endif
/*******************************************************************************
* Private functions
@@ -56,7 +62,7 @@ void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink)
}
}
-static bool dc_stream_construct(struct dc_stream_state *stream,
+bool dc_stream_construct(struct dc_stream_state *stream,
struct dc_sink *dc_sink_data)
{
uint32_t i = 0;
@@ -73,8 +79,7 @@ static bool dc_stream_construct(struct dc_stream_state *stream,
/* Copy audio modes */
/* TODO - Remove this translation */
- for (i = 0; i < (dc_sink_data->edid_caps.audio_mode_count); i++)
- {
+ for (i = 0; i < (dc_sink_data->edid_caps.audio_mode_count); i++) {
stream->audio_info.modes[i].channel_count = dc_sink_data->edid_caps.audio_modes[i].channel_count;
stream->audio_info.modes[i].format_code = dc_sink_data->edid_caps.audio_modes[i].format_code;
stream->audio_info.modes[i].sample_rates.all = dc_sink_data->edid_caps.audio_modes[i].sample_rate;
@@ -117,26 +122,23 @@ static bool dc_stream_construct(struct dc_stream_state *stream,
update_stream_signal(stream, dc_sink_data);
- stream->out_transfer_func = dc_create_transfer_func();
- if (stream->out_transfer_func == NULL) {
- dc_sink_release(dc_sink_data);
- return false;
- }
- stream->out_transfer_func->type = TF_TYPE_BYPASS;
+ stream->out_transfer_func.type = TF_TYPE_BYPASS;
- stream->stream_id = stream->ctx->dc_stream_id_count;
- stream->ctx->dc_stream_id_count++;
+ dc_stream_assign_stream_id(stream);
return true;
}
-static void dc_stream_destruct(struct dc_stream_state *stream)
+void dc_stream_destruct(struct dc_stream_state *stream)
{
dc_sink_release(stream->sink);
- if (stream->out_transfer_func != NULL) {
- dc_transfer_func_release(stream->out_transfer_func);
- stream->out_transfer_func = NULL;
- }
+}
+
+void dc_stream_assign_stream_id(struct dc_stream_state *stream)
+{
+ /* MSB is reserved to indicate phantoms */
+ stream->stream_id = stream->ctx->dc_stream_id_count;
+ stream->ctx->dc_stream_id_count++;
}
void dc_stream_retain(struct dc_stream_state *stream)
@@ -196,14 +198,11 @@ struct dc_stream_state *dc_copy_stream(const struct dc_stream_state *stream)
if (new_stream->sink)
dc_sink_retain(new_stream->sink);
- if (new_stream->out_transfer_func)
- dc_transfer_func_retain(new_stream->out_transfer_func);
-
- new_stream->stream_id = new_stream->ctx->dc_stream_id_count;
- new_stream->ctx->dc_stream_id_count++;
+ dc_stream_assign_stream_id(new_stream);
/* If using dynamic encoder assignment, wait till stream committed to assign encoder. */
- if (new_stream->ctx->dc->res_pool->funcs->link_encs_assign)
+ if (new_stream->ctx->dc->res_pool->funcs->link_encs_assign &&
+ !new_stream->ctx->dc->config.unify_link_enc_assignment)
new_stream->link_enc = NULL;
kref_init(&new_stream->refcount);
@@ -212,31 +211,6 @@ struct dc_stream_state *dc_copy_stream(const struct dc_stream_state *stream)
}
/**
- * dc_stream_get_status_from_state - Get stream status from given dc state
- * @state: DC state to find the stream status in
- * @stream: The stream to get the stream status for
- *
- * The given stream is expected to exist in the given dc state. Otherwise, NULL
- * will be returned.
- */
-struct dc_stream_status *dc_stream_get_status_from_state(
- struct dc_state *state,
- struct dc_stream_state *stream)
-{
- uint8_t i;
-
- if (state == NULL)
- return NULL;
-
- for (i = 0; i < state->stream_count; i++) {
- if (stream == state->streams[i])
- return &state->stream_status[i];
- }
-
- return NULL;
-}
-
-/**
* dc_stream_get_status() - Get current stream status of the given stream state
* @stream: The stream to get the stream status for.
*
@@ -247,17 +221,25 @@ struct dc_stream_status *dc_stream_get_status(
struct dc_stream_state *stream)
{
struct dc *dc = stream->ctx->dc;
- return dc_stream_get_status_from_state(dc->current_state, stream);
+ return dc_state_get_stream_status(dc->current_state, stream);
}
-static void program_cursor_attributes(
+const struct dc_stream_status *dc_stream_get_status_const(
+ const struct dc_stream_state *stream)
+{
+ struct dc *dc = stream->ctx->dc;
+
+ return dc_state_get_stream_status(dc->current_state, stream);
+}
+
+void program_cursor_attributes(
struct dc *dc,
- struct dc_stream_state *stream,
- const struct dc_cursor_attributes *attributes)
+ struct dc_stream_state *stream)
{
int i;
struct resource_context *res_ctx;
struct pipe_ctx *pipe_to_program = NULL;
+ bool enable_cursor_offload = dc_dmub_srv_is_cursor_offload_enabled(dc);
if (!stream)
return;
@@ -272,46 +254,47 @@ static void program_cursor_attributes(
if (!pipe_to_program) {
pipe_to_program = pipe_ctx;
- dc->hwss.cursor_lock(dc, pipe_to_program, true);
+
+ if (enable_cursor_offload && dc->hwss.begin_cursor_offload_update) {
+ dc->hwss.begin_cursor_offload_update(dc, pipe_ctx);
+ } else {
+ dc->hwss.cursor_lock(dc, pipe_to_program, true);
+ if (pipe_to_program->next_odm_pipe)
+ dc->hwss.cursor_lock(dc, pipe_to_program->next_odm_pipe, true);
+ }
}
dc->hwss.set_cursor_attribute(pipe_ctx);
+ if (dc->ctx->dmub_srv)
+ dc_send_update_cursor_info_to_dmu(pipe_ctx, i);
if (dc->hwss.set_cursor_sdr_white_level)
dc->hwss.set_cursor_sdr_white_level(pipe_ctx);
+ if (enable_cursor_offload && dc->hwss.update_cursor_offload_pipe)
+ dc->hwss.update_cursor_offload_pipe(dc, pipe_ctx);
}
- if (pipe_to_program)
- dc->hwss.cursor_lock(dc, pipe_to_program, false);
-}
-
-#ifndef TRIM_FSFT
-/*
- * dc_optimize_timing_for_fsft() - dc to optimize timing
- */
-bool dc_optimize_timing_for_fsft(
- struct dc_stream_state *pStream,
- unsigned int max_input_rate_in_khz)
-{
- struct dc *dc;
-
- dc = pStream->ctx->dc;
-
- return (dc->hwss.optimize_timing_for_fsft &&
- dc->hwss.optimize_timing_for_fsft(dc, &pStream->timing, max_input_rate_in_khz));
+ if (pipe_to_program) {
+ if (enable_cursor_offload && dc->hwss.commit_cursor_offload_update) {
+ dc->hwss.commit_cursor_offload_update(dc, pipe_to_program);
+ } else {
+ dc->hwss.cursor_lock(dc, pipe_to_program, false);
+ if (pipe_to_program->next_odm_pipe)
+ dc->hwss.cursor_lock(dc, pipe_to_program->next_odm_pipe, false);
+ }
+ }
}
-#endif
/*
- * dc_stream_set_cursor_attributes() - Update cursor attributes and set cursor surface address
+ * dc_stream_check_cursor_attributes() - Check validitity of cursor attributes and surface address
*/
-bool dc_stream_set_cursor_attributes(
- struct dc_stream_state *stream,
+bool dc_stream_check_cursor_attributes(
+ const struct dc_stream_state *stream,
+ struct dc_state *state,
const struct dc_cursor_attributes *attributes)
{
- struct dc *dc;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- bool reset_idle_optimizations = false;
-#endif
+ const struct dc *dc;
+
+ unsigned int max_cursor_size;
if (NULL == stream) {
dm_error("DC: dc_stream is NULL!\n");
@@ -328,36 +311,82 @@ bool dc_stream_set_cursor_attributes(
}
dc = stream->ctx->dc;
- stream->cursor_attributes = *attributes;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- dc_z10_restore(dc);
- /* disable idle optimizations while updating cursor */
- if (dc->idle_optimizations_allowed) {
- dc_allow_idle_optimizations(dc, false);
- reset_idle_optimizations = true;
+ /* SubVP is not compatible with HW cursor larger than what can fit in cursor SRAM.
+ * Therefore, if cursor is greater than this, fallback to SW cursor.
+ */
+ if (dc->debug.allow_sw_cursor_fallback && dc->res_pool->funcs->get_max_hw_cursor_size) {
+ max_cursor_size = dc->res_pool->funcs->get_max_hw_cursor_size(dc, state, stream);
+ max_cursor_size = max_cursor_size * max_cursor_size * 4;
+
+ if (attributes->height * attributes->width * 4 > max_cursor_size) {
+ return false;
+ }
}
-#endif
- program_cursor_attributes(dc, stream, attributes);
+ return true;
+}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- /* re-enable idle optimizations if necessary */
- if (reset_idle_optimizations)
- dc_allow_idle_optimizations(dc, true);
+/*
+ * dc_stream_set_cursor_attributes() - Update cursor attributes and set cursor surface address
+ */
+bool dc_stream_set_cursor_attributes(
+ struct dc_stream_state *stream,
+ const struct dc_cursor_attributes *attributes)
+{
+ bool result = false;
-#endif
- return true;
+ if (!stream)
+ return false;
+
+ if (dc_stream_check_cursor_attributes(stream, stream->ctx->dc->current_state, attributes)) {
+ stream->cursor_attributes = *attributes;
+ result = true;
+ }
+
+ return result;
}
-static void program_cursor_position(
- struct dc *dc,
+bool dc_stream_program_cursor_attributes(
struct dc_stream_state *stream,
- const struct dc_cursor_position *position)
+ const struct dc_cursor_attributes *attributes)
+{
+ struct dc *dc;
+ bool reset_idle_optimizations = false;
+
+ if (!stream)
+ return false;
+
+ dc = stream->ctx->dc;
+
+ if (dc_stream_set_cursor_attributes(stream, attributes)) {
+ dc_z10_restore(dc);
+ /* disable idle optimizations while updating cursor */
+ if (dc->idle_optimizations_allowed) {
+ dc_allow_idle_optimizations(dc, false);
+ reset_idle_optimizations = true;
+ }
+
+ program_cursor_attributes(dc, stream);
+
+ /* re-enable idle optimizations if necessary */
+ if (reset_idle_optimizations && !dc->debug.disable_dmub_reallow_idle)
+ dc_allow_idle_optimizations(dc, true);
+
+ return true;
+ }
+
+ return false;
+}
+
+void program_cursor_position(
+ struct dc *dc,
+ struct dc_stream_state *stream)
{
int i;
struct resource_context *res_ctx;
struct pipe_ctx *pipe_to_program = NULL;
+ bool enable_cursor_offload = dc_dmub_srv_is_cursor_offload_enabled(dc);
if (!stream)
return;
@@ -376,25 +405,33 @@ static void program_cursor_position(
if (!pipe_to_program) {
pipe_to_program = pipe_ctx;
- dc->hwss.cursor_lock(dc, pipe_to_program, true);
+
+ if (enable_cursor_offload && dc->hwss.begin_cursor_offload_update)
+ dc->hwss.begin_cursor_offload_update(dc, pipe_ctx);
+ else
+ dc->hwss.cursor_lock(dc, pipe_to_program, true);
}
dc->hwss.set_cursor_position(pipe_ctx);
+ if (enable_cursor_offload && dc->hwss.update_cursor_offload_pipe)
+ dc->hwss.update_cursor_offload_pipe(dc, pipe_ctx);
+
+ if (dc->ctx->dmub_srv)
+ dc_send_update_cursor_info_to_dmu(pipe_ctx, i);
}
- if (pipe_to_program)
- dc->hwss.cursor_lock(dc, pipe_to_program, false);
+ if (pipe_to_program) {
+ if (enable_cursor_offload && dc->hwss.commit_cursor_offload_update)
+ dc->hwss.commit_cursor_offload_update(dc, pipe_to_program);
+ else
+ dc->hwss.cursor_lock(dc, pipe_to_program, false);
+ }
}
bool dc_stream_set_cursor_position(
struct dc_stream_state *stream,
const struct dc_cursor_position *position)
{
- struct dc *dc;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- bool reset_idle_optimizations = false;
-#endif
-
if (NULL == stream) {
dm_error("DC: dc_stream is NULL!\n");
return false;
@@ -405,27 +442,66 @@ bool dc_stream_set_cursor_position(
return false;
}
+ stream->cursor_position = *position;
+
+
+ return true;
+}
+
+bool dc_stream_program_cursor_position(
+ struct dc_stream_state *stream,
+ const struct dc_cursor_position *position)
+{
+ struct dc *dc;
+ bool reset_idle_optimizations = false;
+ const struct dc_cursor_position *old_position;
+
+ if (!stream)
+ return false;
+
+ old_position = &stream->cursor_position;
dc = stream->ctx->dc;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- dc_z10_restore(dc);
- /* disable idle optimizations if enabling cursor */
- if (dc->idle_optimizations_allowed && !stream->cursor_position.enable && position->enable) {
- dc_allow_idle_optimizations(dc, false);
- reset_idle_optimizations = true;
- }
+ if (dc_stream_set_cursor_position(stream, position)) {
+ dc_z10_restore(dc);
-#endif
- stream->cursor_position = *position;
+ /* disable idle optimizations if enabling cursor */
+ if (dc->idle_optimizations_allowed &&
+ (!old_position->enable || dc->debug.exit_idle_opt_for_cursor_updates) &&
+ position->enable) {
+ dc_allow_idle_optimizations(dc, false);
+ reset_idle_optimizations = true;
+ }
- program_cursor_position(dc, stream, position);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- /* re-enable idle optimizations if necessary */
- if (reset_idle_optimizations)
- dc_allow_idle_optimizations(dc, true);
+ program_cursor_position(dc, stream);
+ /* re-enable idle optimizations if necessary */
+ if (reset_idle_optimizations && !dc->debug.disable_dmub_reallow_idle)
+ dc_allow_idle_optimizations(dc, true);
-#endif
- return true;
+ /* apply/update visual confirm */
+ if (dc->debug.visual_confirm == VISUAL_CONFIRM_HW_CURSOR) {
+ /* update software state */
+ int i;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
+
+ /* adjust visual confirm color for all pipes with current stream */
+ if (stream == pipe_ctx->stream) {
+ get_cursor_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
+
+ /* programming hardware */
+ if (pipe_ctx->plane_state)
+ dc->hwss.update_visual_confirm_color(dc, pipe_ctx,
+ pipe_ctx->plane_res.hubp->mpcc_id);
+ }
+ }
+ }
+
+ return true;
+ }
+
+ return false;
}
bool dc_stream_add_writeback(struct dc *dc,
@@ -451,7 +527,9 @@ bool dc_stream_add_writeback(struct dc *dc,
return false;
}
- wb_info->dwb_params.out_transfer_func = stream->out_transfer_func;
+ dc_exit_ips_for_hw_access(dc);
+
+ wb_info->dwb_params.out_transfer_func = &stream->out_transfer_func;
dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst];
dwb->dwb_is_drc = false;
@@ -468,41 +546,80 @@ bool dc_stream_add_writeback(struct dc *dc,
}
if (!isDrc) {
+ ASSERT(stream->num_wb_info + 1 <= MAX_DWB_PIPES);
stream->writeback_info[stream->num_wb_info++] = *wb_info;
}
if (dc->hwss.enable_writeback) {
struct dc_stream_status *stream_status = dc_stream_get_status(stream);
struct dwbc *dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst];
- dwb->otg_inst = stream_status->primary_otg_inst;
+ if (stream_status)
+ dwb->otg_inst = stream_status->primary_otg_inst;
}
- if (IS_DIAG_DC(dc->ctx->dce_environment)) {
- if (!dc->hwss.update_bandwidth(dc, dc->current_state)) {
- dm_error("DC: update_bandwidth failed!\n");
- return false;
- }
- /* enable writeback */
- if (dc->hwss.enable_writeback) {
- struct dwbc *dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst];
+ if (!dc->hwss.update_bandwidth(dc, dc->current_state)) {
+ dm_error("DC: update_bandwidth failed!\n");
+ return false;
+ }
- if (dwb->funcs->is_enabled(dwb)) {
- /* writeback pipe already enabled, only need to update */
- dc->hwss.update_writeback(dc, wb_info, dc->current_state);
- } else {
- /* Enable writeback pipe from scratch*/
- dc->hwss.enable_writeback(dc, wb_info, dc->current_state);
- }
+ /* enable writeback */
+ if (dc->hwss.enable_writeback) {
+ struct dwbc *dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst];
+
+ if (dwb->funcs->is_enabled(dwb)) {
+ /* writeback pipe already enabled, only need to update */
+ dc->hwss.update_writeback(dc, wb_info, dc->current_state);
+ } else {
+ /* Enable writeback pipe from scratch*/
+ dc->hwss.enable_writeback(dc, wb_info, dc->current_state);
}
}
+
+ return true;
+}
+
+bool dc_stream_fc_disable_writeback(struct dc *dc,
+ struct dc_stream_state *stream,
+ uint32_t dwb_pipe_inst)
+{
+ struct dwbc *dwb = dc->res_pool->dwbc[dwb_pipe_inst];
+
+ if (stream == NULL) {
+ dm_error("DC: dc_stream is NULL!\n");
+ return false;
+ }
+
+ if (dwb_pipe_inst >= MAX_DWB_PIPES) {
+ dm_error("DC: writeback pipe is invalid!\n");
+ return false;
+ }
+
+ if (stream->num_wb_info > MAX_DWB_PIPES) {
+ dm_error("DC: num_wb_info is invalid!\n");
+ return false;
+ }
+
+ dc_exit_ips_for_hw_access(dc);
+
+ if (dwb->funcs->set_fc_enable)
+ dwb->funcs->set_fc_enable(dwb, DWB_FRAME_CAPTURE_DISABLE);
+
return true;
}
+/**
+ * dc_stream_remove_writeback() - Disables writeback and removes writeback info.
+ * @dc: Display core control structure.
+ * @stream: Display core stream state.
+ * @dwb_pipe_inst: Display writeback pipe.
+ *
+ * Return: returns true on success, false otherwise.
+ */
bool dc_stream_remove_writeback(struct dc *dc,
struct dc_stream_state *stream,
uint32_t dwb_pipe_inst)
{
- int i = 0, j = 0;
+ unsigned int i, j;
if (stream == NULL) {
dm_error("DC: dc_stream is NULL!\n");
return false;
@@ -513,49 +630,47 @@ bool dc_stream_remove_writeback(struct dc *dc,
return false;
}
-// stream->writeback_info[dwb_pipe_inst].wb_enabled = false;
- for (i = 0; i < stream->num_wb_info; i++) {
- /*dynamic update*/
- if (stream->writeback_info[i].wb_enabled &&
- stream->writeback_info[i].dwb_pipe_inst == dwb_pipe_inst) {
- stream->writeback_info[i].wb_enabled = false;
- }
+ if (stream->num_wb_info > MAX_DWB_PIPES) {
+ dm_error("DC: num_wb_info is invalid!\n");
+ return false;
}
/* remove writeback info for disabled writeback pipes from stream */
for (i = 0, j = 0; i < stream->num_wb_info; i++) {
if (stream->writeback_info[i].wb_enabled) {
- if (i != j)
- /* trim the array */
- stream->writeback_info[j] = stream->writeback_info[i];
- j++;
+
+ if (stream->writeback_info[i].dwb_pipe_inst == dwb_pipe_inst)
+ stream->writeback_info[i].wb_enabled = false;
+
+ /* trim the array */
+ if (j < i) {
+ memcpy(&stream->writeback_info[j], &stream->writeback_info[i],
+ sizeof(struct dc_writeback_info));
+ j++;
+ }
}
}
stream->num_wb_info = j;
- if (IS_DIAG_DC(dc->ctx->dce_environment)) {
- /* recalculate and apply DML parameters */
- if (!dc->hwss.update_bandwidth(dc, dc->current_state)) {
- dm_error("DC: update_bandwidth failed!\n");
- return false;
- }
+ /* recalculate and apply DML parameters */
+ if (!dc->hwss.update_bandwidth(dc, dc->current_state)) {
+ dm_error("DC: update_bandwidth failed!\n");
+ return false;
+ }
+
+ dc_exit_ips_for_hw_access(dc);
- /* disable writeback */
- if (dc->hwss.disable_writeback)
+ /* disable writeback */
+ if (dc->hwss.disable_writeback) {
+ struct dwbc *dwb = dc->res_pool->dwbc[dwb_pipe_inst];
+
+ if (dwb->funcs->is_enabled(dwb))
dc->hwss.disable_writeback(dc, dwb_pipe_inst);
}
+
return true;
}
-bool dc_stream_warmup_writeback(struct dc *dc,
- int num_dwb,
- struct dc_writeback_info *wb_info)
-{
- if (dc->hwss.mmhubbub_warmup)
- return dc->hwss.mmhubbub_warmup(dc, num_dwb, wb_info);
- else
- return false;
-}
uint32_t dc_stream_get_vblank_counter(const struct dc_stream_state *stream)
{
uint8_t i;
@@ -563,10 +678,12 @@ uint32_t dc_stream_get_vblank_counter(const struct dc_stream_state *stream)
struct resource_context *res_ctx =
&dc->current_state->res_ctx;
+ dc_exit_ips_for_hw_access(dc);
+
for (i = 0; i < MAX_PIPES; i++) {
struct timing_generator *tg = res_ctx->pipe_ctx[i].stream_res.tg;
- if (res_ctx->pipe_ctx[i].stream != stream)
+ if (res_ctx->pipe_ctx[i].stream != stream || !tg)
continue;
return tg->funcs->get_frame_count(tg);
@@ -591,6 +708,8 @@ bool dc_stream_send_dp_sdp(const struct dc_stream_state *stream,
dc = stream->ctx->dc;
res_ctx = &dc->current_state->res_ctx;
+ dc_exit_ips_for_hw_access(dc);
+
for (i = 0; i < MAX_PIPES; i++) {
struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i];
@@ -618,14 +737,21 @@ bool dc_stream_get_scanoutpos(const struct dc_stream_state *stream,
{
uint8_t i;
bool ret = false;
- struct dc *dc = stream->ctx->dc;
- struct resource_context *res_ctx =
- &dc->current_state->res_ctx;
+ struct dc *dc;
+ struct resource_context *res_ctx;
+
+ if (!stream->ctx)
+ return false;
+
+ dc = stream->ctx->dc;
+ res_ctx = &dc->current_state->res_ctx;
+
+ dc_exit_ips_for_hw_access(dc);
for (i = 0; i < MAX_PIPES; i++) {
struct timing_generator *tg = res_ctx->pipe_ctx[i].stream_res.tg;
- if (res_ctx->pipe_ctx[i].stream != stream)
+ if (res_ctx->pipe_ctx[i].stream != stream || !tg)
continue;
tg->funcs->get_scanoutpos(tg,
@@ -658,6 +784,8 @@ bool dc_stream_dmdata_status_done(struct dc *dc, struct dc_stream_state *stream)
if (i == MAX_PIPES)
return true;
+ dc_exit_ips_for_hw_access(dc);
+
return dc->hwss.dmdata_status_done(pipe);
}
@@ -692,6 +820,8 @@ bool dc_stream_set_dynamic_metadata(struct dc *dc,
pipe_ctx->stream->dmdata_address = attr->address;
+ dc_exit_ips_for_hw_access(dc);
+
dc->hwss.program_dmdata_engine(pipe_ctx);
if (hubp->funcs->dmdata_set_attributes != NULL &&
@@ -713,6 +843,20 @@ enum dc_status dc_stream_add_dsc_to_resource(struct dc *dc,
}
}
+struct pipe_ctx *dc_stream_get_pipe_ctx(struct dc_stream_state *stream)
+{
+ int i = 0;
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ struct pipe_ctx *pipe = &stream->ctx->dc->current_state->res_ctx.pipe_ctx[i];
+
+ if (pipe->stream == stream)
+ return pipe;
+ }
+
+ return NULL;
+}
+
void dc_stream_log(const struct dc *dc, const struct dc_stream_state *stream)
{
DC_LOG_DC(
@@ -728,14 +872,400 @@ void dc_stream_log(const struct dc *dc, const struct dc_stream_state *stream)
stream->dst.height,
stream->output_color_space);
DC_LOG_DC(
- "\tpix_clk_khz: %d, h_total: %d, v_total: %d, pixelencoder:%d, displaycolorDepth:%d\n",
+ "\tpix_clk_khz: %d, h_total: %d, v_total: %d, pixel_encoding:%s, color_depth:%s\n",
stream->timing.pix_clk_100hz / 10,
stream->timing.h_total,
stream->timing.v_total,
- stream->timing.pixel_encoding,
- stream->timing.display_color_depth);
+ dc_pixel_encoding_to_str(stream->timing.pixel_encoding),
+ dc_color_depth_to_str(stream->timing.display_color_depth));
DC_LOG_DC(
"\tlink: %d\n",
stream->link->link_index);
+
+ DC_LOG_DC(
+ "\tdsc: %d, mst_pbn: %d\n",
+ stream->timing.flags.DSC,
+ stream->timing.dsc_cfg.mst_pbn);
+
+ if (stream->sink) {
+ if (stream->sink->sink_signal != SIGNAL_TYPE_VIRTUAL &&
+ stream->sink->sink_signal != SIGNAL_TYPE_NONE) {
+
+ DC_LOG_DC(
+ "\tsignal: %x dispname: %s manufacturer_id: 0x%x product_id: 0x%x\n",
+ stream->signal,
+ stream->sink->edid_caps.display_name,
+ stream->sink->edid_caps.manufacturer_id,
+ stream->sink->edid_caps.product_id);
+ }
+ }
+}
+
+/*
+* dc_stream_get_3dlut()
+* Requirements:
+* 1. Is stream already owns an RMCM instance, return it.
+* 2. If it doesn't and we don't need to allocate, return NULL.
+* 3. If there's a free RMCM instance, assign to stream and return it.
+* 4. If no free RMCM instances, return NULL.
+*/
+
+struct dc_rmcm_3dlut *dc_stream_get_3dlut_for_stream(
+ const struct dc *dc,
+ const struct dc_stream_state *stream,
+ bool allocate_one)
+{
+ unsigned int num_rmcm = dc->caps.color.mpc.num_rmcm_3dluts;
+
+ // see if one is allocated for this stream
+ for (int i = 0; i < num_rmcm; i++) {
+ if (dc->res_pool->rmcm_3dlut[i].isInUse &&
+ dc->res_pool->rmcm_3dlut[i].stream == stream)
+ return &dc->res_pool->rmcm_3dlut[i];
+ }
+
+ //case: not found one, and dont need to allocate
+ if (!allocate_one)
+ return NULL;
+
+ //see if there is an unused 3dlut, allocate
+ for (int i = 0; i < num_rmcm; i++) {
+ if (!dc->res_pool->rmcm_3dlut[i].isInUse) {
+ dc->res_pool->rmcm_3dlut[i].isInUse = true;
+ dc->res_pool->rmcm_3dlut[i].stream = stream;
+ return &dc->res_pool->rmcm_3dlut[i];
+ }
+ }
+
+ //dont have a 3dlut
+ return NULL;
+}
+
+
+void dc_stream_release_3dlut_for_stream(
+ const struct dc *dc,
+ const struct dc_stream_state *stream)
+{
+ struct dc_rmcm_3dlut *rmcm_3dlut =
+ dc_stream_get_3dlut_for_stream(dc, stream, false);
+
+ if (rmcm_3dlut) {
+ rmcm_3dlut->isInUse = false;
+ rmcm_3dlut->stream = NULL;
+ rmcm_3dlut->protection_bits = 0;
+ }
+}
+
+
+void dc_stream_init_rmcm_3dlut(struct dc *dc)
+{
+ unsigned int num_rmcm = dc->caps.color.mpc.num_rmcm_3dluts;
+
+ for (int i = 0; i < num_rmcm; i++) {
+ dc->res_pool->rmcm_3dlut[i].isInUse = false;
+ dc->res_pool->rmcm_3dlut[i].stream = NULL;
+ dc->res_pool->rmcm_3dlut[i].protection_bits = 0;
+ }
+}
+
+/*
+ * Finds the greatest index in refresh_rate_hz that contains a value <= refresh
+ */
+static int dc_stream_get_nearest_smallest_index(struct dc_stream_state *stream, int refresh)
+{
+ for (int i = 0; i < (LUMINANCE_DATA_TABLE_SIZE - 1); ++i) {
+ if ((stream->lumin_data.refresh_rate_hz[i] <= refresh) && (refresh < stream->lumin_data.refresh_rate_hz[i + 1])) {
+ return i;
+ }
+ }
+ return 9;
+}
+
+/*
+ * Finds a corresponding brightness for a given refresh rate between 2 given indices, where index1 < index2
+ */
+static int dc_stream_get_brightness_millinits_linear_interpolation (struct dc_stream_state *stream,
+ int index1,
+ int index2,
+ int refresh_hz)
+{
+ long long slope = 0;
+ if (stream->lumin_data.refresh_rate_hz[index2] != stream->lumin_data.refresh_rate_hz[index1]) {
+ slope = (stream->lumin_data.luminance_millinits[index2] - stream->lumin_data.luminance_millinits[index1]) /
+ (stream->lumin_data.refresh_rate_hz[index2] - stream->lumin_data.refresh_rate_hz[index1]);
+ }
+
+ int y_intercept = stream->lumin_data.luminance_millinits[index2] - slope * stream->lumin_data.refresh_rate_hz[index2];
+
+ return (y_intercept + refresh_hz * slope);
+}
+
+/*
+ * Finds a corresponding refresh rate for a given brightness between 2 given indices, where index1 < index2
+ */
+static int dc_stream_get_refresh_hz_linear_interpolation (struct dc_stream_state *stream,
+ int index1,
+ int index2,
+ int brightness_millinits)
+{
+ long long slope = 1;
+ if (stream->lumin_data.refresh_rate_hz[index2] != stream->lumin_data.refresh_rate_hz[index1]) {
+ slope = (stream->lumin_data.luminance_millinits[index2] - stream->lumin_data.luminance_millinits[index1]) /
+ (stream->lumin_data.refresh_rate_hz[index2] - stream->lumin_data.refresh_rate_hz[index1]);
+ }
+
+ int y_intercept = stream->lumin_data.luminance_millinits[index2] - slope * stream->lumin_data.refresh_rate_hz[index2];
+
+ return ((int)div64_s64((brightness_millinits - y_intercept), slope));
+}
+
+/*
+ * Finds the current brightness in millinits given a refresh rate
+ */
+static int dc_stream_get_brightness_millinits_from_refresh (struct dc_stream_state *stream, int refresh_hz)
+{
+ int nearest_smallest_index = dc_stream_get_nearest_smallest_index(stream, refresh_hz);
+ int nearest_smallest_value = stream->lumin_data.refresh_rate_hz[nearest_smallest_index];
+
+ if (nearest_smallest_value == refresh_hz)
+ return stream->lumin_data.luminance_millinits[nearest_smallest_index];
+
+ if (nearest_smallest_index >= 9)
+ return dc_stream_get_brightness_millinits_linear_interpolation(stream, nearest_smallest_index - 1, nearest_smallest_index, refresh_hz);
+
+ if (nearest_smallest_value == stream->lumin_data.refresh_rate_hz[nearest_smallest_index + 1])
+ return stream->lumin_data.luminance_millinits[nearest_smallest_index];
+
+ return dc_stream_get_brightness_millinits_linear_interpolation(stream, nearest_smallest_index, nearest_smallest_index + 1, refresh_hz);
}
+/*
+ * Finds the lowest/highest refresh rate (depending on search_for_max_increase)
+ * that can be achieved from starting_refresh_hz while staying
+ * within flicker criteria
+ */
+static int dc_stream_calculate_flickerless_refresh_rate(struct dc_stream_state *stream,
+ int current_brightness,
+ int starting_refresh_hz,
+ bool is_gaming,
+ bool search_for_max_increase)
+{
+ int nearest_smallest_index = dc_stream_get_nearest_smallest_index(stream, starting_refresh_hz);
+
+ int flicker_criteria_millinits = is_gaming ?
+ stream->lumin_data.flicker_criteria_milli_nits_GAMING :
+ stream->lumin_data.flicker_criteria_milli_nits_STATIC;
+
+ int safe_upper_bound = current_brightness + flicker_criteria_millinits;
+ int safe_lower_bound = current_brightness - flicker_criteria_millinits;
+ int lumin_millinits_temp = 0;
+
+ int offset = -1;
+ if (search_for_max_increase) {
+ offset = 1;
+ }
+
+ /*
+ * Increments up or down by 1 depending on search_for_max_increase
+ */
+ for (int i = nearest_smallest_index; (i > 0 && !search_for_max_increase) || (i < (LUMINANCE_DATA_TABLE_SIZE - 1) && search_for_max_increase); i += offset) {
+
+ lumin_millinits_temp = stream->lumin_data.luminance_millinits[i + offset];
+
+ if ((lumin_millinits_temp >= safe_upper_bound) || (lumin_millinits_temp <= safe_lower_bound)) {
+
+ if (stream->lumin_data.refresh_rate_hz[i + offset] == stream->lumin_data.refresh_rate_hz[i])
+ return stream->lumin_data.refresh_rate_hz[i];
+
+ int target_brightness = (stream->lumin_data.luminance_millinits[i + offset] >= (current_brightness + flicker_criteria_millinits)) ?
+ current_brightness + flicker_criteria_millinits :
+ current_brightness - flicker_criteria_millinits;
+
+ int refresh = 0;
+
+ /*
+ * Need the second input to be < third input for dc_stream_get_refresh_hz_linear_interpolation
+ */
+ if (search_for_max_increase)
+ refresh = dc_stream_get_refresh_hz_linear_interpolation(stream, i, i + offset, target_brightness);
+ else
+ refresh = dc_stream_get_refresh_hz_linear_interpolation(stream, i + offset, i, target_brightness);
+
+ if (refresh == stream->lumin_data.refresh_rate_hz[i + offset])
+ return stream->lumin_data.refresh_rate_hz[i + offset];
+
+ return refresh;
+ }
+ }
+
+ if (search_for_max_increase)
+ return (int)div64_s64((long long)stream->timing.pix_clk_100hz*100, stream->timing.v_total*(long long)stream->timing.h_total);
+ else
+ return stream->lumin_data.refresh_rate_hz[0];
+}
+
+/*
+ * Gets the max delta luminance within a specified refresh range
+ */
+static int dc_stream_get_max_delta_lumin_millinits(struct dc_stream_state *stream, int hz1, int hz2, bool isGaming)
+{
+ int lower_refresh_brightness = dc_stream_get_brightness_millinits_from_refresh (stream, hz1);
+ int higher_refresh_brightness = dc_stream_get_brightness_millinits_from_refresh (stream, hz2);
+
+ int min = lower_refresh_brightness;
+ int max = higher_refresh_brightness;
+
+ /*
+ * Static screen, therefore no need to scan through array
+ */
+ if (!isGaming) {
+ if (lower_refresh_brightness >= higher_refresh_brightness) {
+ return lower_refresh_brightness - higher_refresh_brightness;
+ }
+ return higher_refresh_brightness - lower_refresh_brightness;
+ }
+
+ min = MIN(lower_refresh_brightness, higher_refresh_brightness);
+ max = MAX(lower_refresh_brightness, higher_refresh_brightness);
+
+ int nearest_smallest_index = dc_stream_get_nearest_smallest_index(stream, hz1);
+
+ for (; nearest_smallest_index < (LUMINANCE_DATA_TABLE_SIZE - 1) &&
+ stream->lumin_data.refresh_rate_hz[nearest_smallest_index + 1] <= hz2 ; nearest_smallest_index++) {
+ min = MIN(min, stream->lumin_data.luminance_millinits[nearest_smallest_index + 1]);
+ max = MAX(max, stream->lumin_data.luminance_millinits[nearest_smallest_index + 1]);
+ }
+
+ return (max - min);
+}
+
+/*
+ * Determines the max flickerless instant vtotal delta for a stream.
+ * Determines vtotal increase/decrease based on the bool "increase"
+ */
+static unsigned int dc_stream_get_max_flickerless_instant_vtotal_delta(struct dc_stream_state *stream, bool is_gaming, bool increase)
+{
+ if (stream->timing.v_total * stream->timing.h_total == 0)
+ return 0;
+
+ int current_refresh_hz = (int)div64_s64((long long)stream->timing.pix_clk_100hz*100, stream->timing.v_total*(long long)stream->timing.h_total);
+
+ int safe_refresh_hz = dc_stream_calculate_flickerless_refresh_rate(stream,
+ dc_stream_get_brightness_millinits_from_refresh(stream, current_refresh_hz),
+ current_refresh_hz,
+ is_gaming,
+ increase);
+
+ int safe_refresh_v_total = (int)div64_s64((long long)stream->timing.pix_clk_100hz*100, safe_refresh_hz*(long long)stream->timing.h_total);
+
+ if (increase)
+ return (((int) stream->timing.v_total - safe_refresh_v_total) >= 0) ? (stream->timing.v_total - safe_refresh_v_total) : 0;
+
+ return ((safe_refresh_v_total - (int) stream->timing.v_total) >= 0) ? (safe_refresh_v_total - stream->timing.v_total) : 0;
+}
+
+/*
+ * Finds the highest refresh rate that can be achieved
+ * from starting_refresh_hz while staying within flicker criteria
+ */
+int dc_stream_calculate_max_flickerless_refresh_rate(struct dc_stream_state *stream, int starting_refresh_hz, bool is_gaming)
+{
+ if (!stream->lumin_data.is_valid)
+ return 0;
+
+ int current_brightness = dc_stream_get_brightness_millinits_from_refresh(stream, starting_refresh_hz);
+
+ return dc_stream_calculate_flickerless_refresh_rate(stream,
+ current_brightness,
+ starting_refresh_hz,
+ is_gaming,
+ true);
+}
+
+/*
+ * Finds the lowest refresh rate that can be achieved
+ * from starting_refresh_hz while staying within flicker criteria
+ */
+int dc_stream_calculate_min_flickerless_refresh_rate(struct dc_stream_state *stream, int starting_refresh_hz, bool is_gaming)
+{
+ if (!stream->lumin_data.is_valid)
+ return 0;
+
+ int current_brightness = dc_stream_get_brightness_millinits_from_refresh(stream, starting_refresh_hz);
+
+ return dc_stream_calculate_flickerless_refresh_rate(stream,
+ current_brightness,
+ starting_refresh_hz,
+ is_gaming,
+ false);
+}
+
+/*
+ * Determines if there will be a flicker when moving between 2 refresh rates
+ */
+bool dc_stream_is_refresh_rate_range_flickerless(struct dc_stream_state *stream, int hz1, int hz2, bool is_gaming)
+{
+
+ /*
+ * Assume that we wont flicker if there is invalid data
+ */
+ if (!stream->lumin_data.is_valid)
+ return false;
+
+ int dl = dc_stream_get_max_delta_lumin_millinits(stream, hz1, hz2, is_gaming);
+
+ int flicker_criteria_millinits = (is_gaming) ?
+ stream->lumin_data.flicker_criteria_milli_nits_GAMING :
+ stream->lumin_data.flicker_criteria_milli_nits_STATIC;
+
+ return (dl <= flicker_criteria_millinits);
+}
+
+/*
+ * Determines the max instant vtotal delta increase that can be applied without
+ * flickering for a given stream
+ */
+unsigned int dc_stream_get_max_flickerless_instant_vtotal_decrease(struct dc_stream_state *stream,
+ bool is_gaming)
+{
+ if (!stream->lumin_data.is_valid)
+ return 0;
+
+ return dc_stream_get_max_flickerless_instant_vtotal_delta(stream, is_gaming, true);
+}
+
+/*
+ * Determines the max instant vtotal delta decrease that can be applied without
+ * flickering for a given stream
+ */
+unsigned int dc_stream_get_max_flickerless_instant_vtotal_increase(struct dc_stream_state *stream,
+ bool is_gaming)
+{
+ if (!stream->lumin_data.is_valid)
+ return 0;
+
+ return dc_stream_get_max_flickerless_instant_vtotal_delta(stream, is_gaming, false);
+}
+
+bool dc_stream_is_cursor_limit_pending(struct dc *dc, struct dc_stream_state *stream)
+{
+ bool is_limit_pending = false;
+
+ if (dc->current_state)
+ is_limit_pending = dc_state_get_stream_cursor_subvp_limit(stream, dc->current_state);
+
+ return is_limit_pending;
+}
+
+bool dc_stream_can_clear_cursor_limit(struct dc *dc, struct dc_stream_state *stream)
+{
+ bool can_clear_limit = false;
+
+ if (dc->current_state)
+ can_clear_limit = dc_state_get_stream_cursor_subvp_limit(stream, dc->current_state) &&
+ (stream->hw_cursor_req ||
+ !stream->cursor_position.enable ||
+ dc_stream_check_cursor_attributes(stream, dc->current_state, &stream->cursor_attributes));
+
+ return can_clear_limit;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
index e6b9c6a71841..922f23557f5d 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
@@ -23,8 +23,6 @@
*
*/
-#include <linux/mm.h>
-
/* DC interface (public) */
#include "dm_services.h"
#include "dc.h"
@@ -34,74 +32,58 @@
#include "transform.h"
#include "dpp.h"
+#include "dc_plane_priv.h"
+
/*******************************************************************************
* Private functions
******************************************************************************/
-static void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *plane_state)
+void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *plane_state)
{
plane_state->ctx = ctx;
- plane_state->gamma_correction = dc_create_gamma();
- if (plane_state->gamma_correction != NULL)
- plane_state->gamma_correction->is_identity = true;
+ plane_state->gamma_correction.is_identity = true;
- plane_state->in_transfer_func = dc_create_transfer_func();
- if (plane_state->in_transfer_func != NULL) {
- plane_state->in_transfer_func->type = TF_TYPE_BYPASS;
- }
- plane_state->in_shaper_func = dc_create_transfer_func();
- if (plane_state->in_shaper_func != NULL) {
- plane_state->in_shaper_func->type = TF_TYPE_BYPASS;
- }
+ plane_state->in_transfer_func.type = TF_TYPE_BYPASS;
- plane_state->lut3d_func = dc_create_3dlut_func();
+ plane_state->in_shaper_func.type = TF_TYPE_BYPASS;
- plane_state->blend_tf = dc_create_transfer_func();
- if (plane_state->blend_tf != NULL) {
- plane_state->blend_tf->type = TF_TYPE_BYPASS;
- }
+ plane_state->lut3d_func.state.raw = 0;
+
+ plane_state->blend_tf.type = TF_TYPE_BYPASS;
+
+ plane_state->pre_multiplied_alpha = true;
}
-static void dc_plane_destruct(struct dc_plane_state *plane_state)
+void dc_plane_destruct(struct dc_plane_state *plane_state)
{
- if (plane_state->gamma_correction != NULL) {
- dc_gamma_release(&plane_state->gamma_correction);
- }
- if (plane_state->in_transfer_func != NULL) {
- dc_transfer_func_release(
- plane_state->in_transfer_func);
- plane_state->in_transfer_func = NULL;
- }
- if (plane_state->in_shaper_func != NULL) {
- dc_transfer_func_release(
- plane_state->in_shaper_func);
- plane_state->in_shaper_func = NULL;
- }
- if (plane_state->lut3d_func != NULL) {
- dc_3dlut_func_release(
- plane_state->lut3d_func);
- plane_state->lut3d_func = NULL;
- }
- if (plane_state->blend_tf != NULL) {
- dc_transfer_func_release(
- plane_state->blend_tf);
- plane_state->blend_tf = NULL;
+ // no more pointers to free within dc_plane_state
+}
+
+
+/* dc_state is passed in separately since it may differ from the current dc state accessible from plane_state e.g.
+ * if the driver is doing an update from an old context to a new one and the caller wants the pipe mask for the new
+ * context rather than the existing one
+ */
+uint8_t dc_plane_get_pipe_mask(struct dc_state *dc_state, const struct dc_plane_state *plane_state)
+{
+ uint8_t pipe_mask = 0;
+ int i;
+
+ for (i = 0; i < plane_state->ctx->dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &dc_state->res_ctx.pipe_ctx[i];
+
+ if (pipe_ctx->plane_state == plane_state && pipe_ctx->plane_res.hubp)
+ pipe_mask |= 1 << pipe_ctx->plane_res.hubp->inst;
}
+ return pipe_mask;
}
/*******************************************************************************
* Public functions
******************************************************************************/
-void enable_surface_flip_reporting(struct dc_plane_state *plane_state,
- uint32_t controller_id)
-{
- plane_state->irq_source = controller_id + DC_IRQ_SOURCE_PFLIP1 - 1;
- /*register_flip_interrupt(surface);*/
-}
-
-struct dc_plane_state *dc_create_plane_state(struct dc *dc)
+struct dc_plane_state *dc_create_plane_state(const struct dc *dc)
{
struct dc_plane_state *plane_state = kvzalloc(sizeof(*plane_state),
GFP_KERNEL);
@@ -127,7 +109,8 @@ struct dc_plane_state *dc_create_plane_state(struct dc *dc)
*****************************************************************************
*/
const struct dc_plane_status *dc_plane_get_status(
- const struct dc_plane_state *plane_state)
+ const struct dc_plane_state *plane_state,
+ union dc_plane_status_update_flags flags)
{
const struct dc_plane_status *plane_status;
struct dc *dc;
@@ -154,11 +137,14 @@ const struct dc_plane_status *dc_plane_get_status(
if (pipe_ctx->plane_state != plane_state)
continue;
- pipe_ctx->plane_state->status.is_flip_pending = false;
+ if (pipe_ctx->plane_state && flags.bits.address)
+ pipe_ctx->plane_state->status.is_flip_pending = false;
break;
}
+ dc_exit_ips_for_hw_access(dc);
+
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe_ctx =
&dc->current_state->res_ctx.pipe_ctx[i];
@@ -166,7 +152,8 @@ const struct dc_plane_status *dc_plane_get_status(
if (pipe_ctx->plane_state != plane_state)
continue;
- dc->hwss.update_pending_status(pipe_ctx);
+ if (flags.bits.address)
+ dc->hwss.update_pending_status(pipe_ctx);
}
return plane_status;
@@ -285,4 +272,41 @@ void dc_3dlut_func_retain(struct dc_3dlut *lut)
kref_get(&lut->refcount);
}
+void dc_plane_force_dcc_and_tiling_disable(struct dc_plane_state *plane_state,
+ bool clear_tiling)
+{
+ struct dc *dc;
+ int i;
+
+ if (!plane_state)
+ return;
+
+ dc = plane_state->ctx->dc;
+
+ if (!dc || !dc->current_state)
+ return;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
+
+ if (!pipe_ctx)
+ continue;
+
+ if (dc->hwss.clear_surface_dcc_and_tiling)
+ dc->hwss.clear_surface_dcc_and_tiling(pipe_ctx, plane_state, clear_tiling);
+ }
+}
+
+void dc_plane_copy_config(struct dc_plane_state *dst, const struct dc_plane_state *src)
+{
+ struct kref temp_refcount;
+
+ /* backup persistent info */
+ memcpy(&temp_refcount, &dst->refcount, sizeof(struct kref));
+ /* copy all configuration information */
+ memcpy(dst, src, sizeof(struct dc_plane_state));
+
+ /* restore persistent info */
+ memcpy(&dst->refcount, &temp_refcount, sizeof(struct kref));
+}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c b/drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c
index cde8ed2560b3..d1e68dc57a2a 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c
@@ -47,9 +47,8 @@ int dc_setup_system_context(struct dc *dc, struct dc_phy_addr_space_config *pa_c
*/
memcpy(&dc->vm_pa_config, pa_config, sizeof(struct dc_phy_addr_space_config));
dc->vm_pa_config.valid = true;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
+ dc->dml2_options.gpuvm_enable = true;
dc_z10_save_init(dc);
-#endif
}
return num_vmids;
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 618e7989176f..29edfa51ea2c 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2012-14 Advanced Micro Devices, Inc.
+ * Copyright 2012-2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -27,45 +27,71 @@
#define DC_INTERFACE_H_
#include "dc_types.h"
+#include "dc_state.h"
+#include "dc_plane.h"
#include "grph_object_defs.h"
#include "logger_types.h"
-#if defined(CONFIG_DRM_AMD_DC_HDCP)
-#include "hdcp_types.h"
-#endif
+#include "hdcp_msg_types.h"
#include "gpio_types.h"
#include "link_service_types.h"
#include "grph_object_ctrl_defs.h"
#include <inc/hw/opp.h>
-#include "inc/hw_sequencer.h"
+#include "hwss/hw_sequencer.h"
#include "inc/compressor.h"
#include "inc/hw/dmcu.h"
#include "dml/display_mode_lib.h"
+#include "dml2_0/dml2_wrapper.h"
+
+#include "dmub/inc/dmub_cmd.h"
+
+#include "sspl/dc_spl_types.h"
+
+struct abm_save_restore;
+
/* forward declaration */
struct aux_payload;
struct set_config_cmd_payload;
struct dmub_notification;
-
-#define DC_VER "3.2.160"
-
-#define MAX_SURFACES 3
+struct dcn_hubbub_reg_state;
+struct dcn_hubp_reg_state;
+struct dcn_dpp_reg_state;
+struct dcn_mpc_reg_state;
+struct dcn_opp_reg_state;
+struct dcn_dsc_reg_state;
+struct dcn_optc_reg_state;
+struct dcn_dccg_reg_state;
+
+#define DC_VER "3.2.359"
+
+/**
+ * MAX_SURFACES - representative of the upper bound of surfaces that can be piped to a single CRTC
+ */
+#define MAX_SURFACES 4
+/**
+ * MAX_PLANES - representative of the upper bound of planes that are supported by the HW
+ */
#define MAX_PLANES 6
#define MAX_STREAMS 6
-#define MAX_SINKS_PER_LINK 4
#define MIN_VIEWPORT_SIZE 12
#define MAX_NUM_EDP 2
+#define MAX_SUPPORTED_FORMATS 7
-/*******************************************************************************
- * Display Core Interfaces
- ******************************************************************************/
+#define MAX_HOST_ROUTERS_NUM 3
+#define MAX_DPIA_PER_HOST_ROUTER 3
+#define MAX_DPIA_NUM (MAX_HOST_ROUTERS_NUM * MAX_DPIA_PER_HOST_ROUTER)
+
+/* Display Core Interfaces */
struct dc_versions {
const char *dc_ver;
struct dmcu_version dmcu_version;
};
enum dp_protocol_version {
- DP_VERSION_1_4,
+ DP_VERSION_1_4 = 0,
+ DP_VERSION_2_1,
+ DP_VERSION_UNKNOWN,
};
enum dc_plane_type {
@@ -75,10 +101,18 @@ enum dc_plane_type {
DC_PLANE_TYPE_DCN_UNIVERSAL,
};
+// Sizes defined as multiples of 64KB
+enum det_size {
+ DET_SIZE_DEFAULT = 0,
+ DET_SIZE_192KB = 3,
+ DET_SIZE_256KB = 4,
+ DET_SIZE_320KB = 5,
+ DET_SIZE_384KB = 6
+};
+
+
struct dc_plane_cap {
enum dc_plane_type type;
- uint32_t blends_with_above : 1;
- uint32_t blends_with_below : 1;
uint32_t per_pixel_alpha : 1;
struct {
uint32_t argb8888 : 1;
@@ -108,7 +142,26 @@ struct dc_plane_cap {
uint32_t min_height;
};
-// Color management caps (DPP and MPC)
+/**
+ * DOC: color-management-caps
+ *
+ * **Color management caps (DPP and MPC)**
+ *
+ * Modules/color calculates various color operations which are translated to
+ * abstracted HW. DCE 5-12 had almost no important changes, but starting with
+ * DCN1, every new generation comes with fairly major differences in color
+ * pipeline. Therefore, we abstract color pipe capabilities so modules/DM can
+ * decide mapping to HW block based on logical capabilities.
+ */
+
+/**
+ * struct rom_curve_caps - predefined transfer function caps for degamma and regamma
+ * @srgb: RGB color space transfer func
+ * @bt2020: BT.2020 transfer func
+ * @gamma2_2: standard gamma
+ * @pq: perceptual quantizer transfer function
+ * @hlg: hybrid log–gamma transfer function
+ */
struct rom_curve_caps {
uint16_t srgb : 1;
uint16_t bt2020 : 1;
@@ -117,41 +170,131 @@ struct rom_curve_caps {
uint16_t hlg : 1;
};
+/**
+ * struct dpp_color_caps - color pipeline capabilities for display pipe and
+ * plane blocks
+ *
+ * @dcn_arch: all DCE generations treated the same
+ * @input_lut_shared: shared with DGAM. Input LUT is different than most LUTs,
+ * just plain 256-entry lookup
+ * @icsc: input color space conversion
+ * @dgam_ram: programmable degamma LUT
+ * @post_csc: post color space conversion, before gamut remap
+ * @gamma_corr: degamma correction
+ * @hw_3d_lut: 3D LUT support. It implies a shaper LUT before. It may be shared
+ * with MPC by setting mpc:shared_3d_lut flag
+ * @ogam_ram: programmable out/blend gamma LUT
+ * @ocsc: output color space conversion
+ * @dgam_rom_for_yuv: pre-defined degamma LUT for YUV planes
+ * @dgam_rom_caps: pre-definied curve caps for degamma 1D LUT
+ * @ogam_rom_caps: pre-definied curve caps for regamma 1D LUT
+ *
+ * Note: hdr_mult and gamut remap (CTM) are always available in DPP (in that order)
+ */
struct dpp_color_caps {
- uint16_t dcn_arch : 1; // all DCE generations treated the same
- // input lut is different than most LUTs, just plain 256-entry lookup
- uint16_t input_lut_shared : 1; // shared with DGAM
+ uint16_t dcn_arch : 1;
+ uint16_t input_lut_shared : 1;
uint16_t icsc : 1;
uint16_t dgam_ram : 1;
- uint16_t post_csc : 1; // before gamut remap
+ uint16_t post_csc : 1;
uint16_t gamma_corr : 1;
-
- // hdr_mult and gamut remap always available in DPP (in that order)
- // 3d lut implies shaper LUT,
- // it may be shared with MPC - check MPC:shared_3d_lut flag
uint16_t hw_3d_lut : 1;
- uint16_t ogam_ram : 1; // blnd gam
+ uint16_t ogam_ram : 1;
uint16_t ocsc : 1;
uint16_t dgam_rom_for_yuv : 1;
struct rom_curve_caps dgam_rom_caps;
struct rom_curve_caps ogam_rom_caps;
};
+/* Below structure is to describe the HW support for mem layout, extend support
+ range to match what OS could handle in the roadmap */
+struct lut3d_caps {
+ uint32_t dma_3d_lut : 1; /*< DMA mode support for 3D LUT */
+ struct {
+ uint32_t swizzle_3d_rgb : 1;
+ uint32_t swizzle_3d_bgr : 1;
+ uint32_t linear_1d : 1;
+ } mem_layout_support;
+ struct {
+ uint32_t unorm_12msb : 1;
+ uint32_t unorm_12lsb : 1;
+ uint32_t float_fp1_5_10 : 1;
+ } mem_format_support;
+ struct {
+ uint32_t order_rgba : 1;
+ uint32_t order_bgra : 1;
+ } mem_pixel_order_support;
+ /*< size options are 9, 17, 33, 45, 65 */
+ struct {
+ uint32_t dim_9 : 1; /* 3D LUT support for 9x9x9 */
+ uint32_t dim_17 : 1; /* 3D LUT support for 17x17x17 */
+ uint32_t dim_33 : 1; /* 3D LUT support for 33x33x33 */
+ uint32_t dim_45 : 1; /* 3D LUT support for 45x45x45 */
+ uint32_t dim_65 : 1; /* 3D LUT support for 65x65x65 */
+ } lut_dim_caps;
+};
+
+/**
+ * struct mpc_color_caps - color pipeline capabilities for multiple pipe and
+ * plane combined blocks
+ *
+ * @gamut_remap: color transformation matrix
+ * @ogam_ram: programmable out gamma LUT
+ * @ocsc: output color space conversion matrix
+ * @num_3dluts: MPC 3D LUT; always assumes a preceding shaper LUT
+ * @num_rmcm_3dluts: number of RMCM 3D LUTS; always assumes a preceding shaper LUT
+ * @shared_3d_lut: shared 3D LUT flag. Can be either DPP or MPC, but single
+ * instance
+ * @ogam_rom_caps: pre-definied curve caps for regamma 1D LUT
+ * @mcm_3d_lut_caps: HW support cap for MCM LUT memory
+ * @rmcm_3d_lut_caps: HW support cap for RMCM LUT memory
+ * @preblend: whether color manager supports preblend with MPC
+ */
struct mpc_color_caps {
uint16_t gamut_remap : 1;
uint16_t ogam_ram : 1;
uint16_t ocsc : 1;
- uint16_t num_3dluts : 3; //3d lut always assumes a preceding shaper LUT
- uint16_t shared_3d_lut:1; //can be in either DPP or MPC, but single instance
-
+ uint16_t num_3dluts : 3;
+ uint16_t num_rmcm_3dluts : 3;
+ uint16_t shared_3d_lut:1;
struct rom_curve_caps ogam_rom_caps;
+ struct lut3d_caps mcm_3d_lut_caps;
+ struct lut3d_caps rmcm_3d_lut_caps;
+ bool preblend;
};
+/**
+ * struct dc_color_caps - color pipes capabilities for DPP and MPC hw blocks
+ * @dpp: color pipes caps for DPP
+ * @mpc: color pipes caps for MPC
+ */
struct dc_color_caps {
struct dpp_color_caps dpp;
struct mpc_color_caps mpc;
};
+struct dc_dmub_caps {
+ bool psr;
+ bool mclk_sw;
+ bool subvp_psr;
+ bool gecc_enable;
+ uint8_t fams_ver;
+ bool aux_backlight_support;
+};
+
+struct dc_scl_caps {
+ bool sharpener_support;
+};
+
+struct dc_check_config {
+ /**
+ * max video plane width that can be safely assumed to be always
+ * supported by single DPP pipe.
+ */
+ unsigned int max_optimizable_video_width;
+ bool enable_legacy_fast_update;
+};
+
struct dc_caps {
uint32_t max_streams;
uint32_t max_links;
@@ -165,6 +308,7 @@ struct dc_caps {
uint32_t i2c_speed_in_khz_hdcp;
uint32_t dmdata_alloc_size;
unsigned int max_cursor_size;
+ unsigned int max_buffered_cursor_size;
unsigned int max_video_width;
unsigned int min_horizontal_blanking_period;
int linear_pitch_alignment;
@@ -178,6 +322,9 @@ struct dc_caps {
bool psp_setup_panel_mode;
bool extended_aux_timeout_support;
bool dmcub_support;
+ bool zstate_support;
+ bool ips_support;
+ bool ips_v2_support;
uint32_t num_of_internal_disp;
enum dp_protocol_version max_dp_protocol_version;
unsigned int mall_size_per_mem_channel;
@@ -185,11 +332,38 @@ struct dc_caps {
unsigned int cursor_cache_size;
struct dc_plane_cap planes[MAX_PLANES];
struct dc_color_caps color;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
+ struct dc_dmub_caps dmub_caps;
bool dp_hpo;
-#endif
+ bool dp_hdmi21_pcon_support;
+ bool edp_dsc_support;
bool vbios_lttpr_aware;
bool vbios_lttpr_enable;
+ bool fused_io_supported;
+ uint32_t max_otg_num;
+ uint32_t max_cab_allocation_bytes;
+ uint32_t cache_line_size;
+ uint32_t cache_num_ways;
+ uint16_t subvp_fw_processing_delay_us;
+ uint8_t subvp_drr_max_vblank_margin_us;
+ uint16_t subvp_prefetch_end_to_mall_start_us;
+ uint8_t subvp_swath_height_margin_lines; // subvp start line must be aligned to 2 x swath height
+ uint16_t subvp_pstate_allow_width_us;
+ uint16_t subvp_vertical_int_margin_us;
+ bool seamless_odm;
+ uint32_t max_v_total;
+ bool vtotal_limited_by_fp2;
+ uint32_t max_disp_clock_khz_at_vmin;
+ uint8_t subvp_drr_vblank_start_margin_us;
+ bool cursor_not_scaled;
+ bool dcmode_power_limits_present;
+ bool sequential_ono;
+ /* Conservative limit for DCC cases which require ODM4:1 to support*/
+ uint32_t dcc_plane_width_limit;
+ struct dc_scl_caps scl_caps;
+ uint8_t num_of_host_routers;
+ uint8_t num_of_dpias_per_host_router;
+ /* limit of the ODM only, could be limited by other factors (like pipe count)*/
+ uint8_t max_odm_combine_factor;
};
struct dc_bug_wa {
@@ -197,12 +371,24 @@ struct dc_bug_wa {
bool dedcn20_305_wa;
bool skip_clock_update;
bool lt_early_cr_pattern;
+ struct {
+ uint8_t uclk : 1;
+ uint8_t fclk : 1;
+ uint8_t dcfclk : 1;
+ uint8_t dcfclk_ds: 1;
+ } clock_update_disable_mask;
+ bool skip_psr_ips_crtc_disable;
};
-
struct dc_dcc_surface_param {
struct dc_size surface_size;
enum surface_pixel_format format;
- enum swizzle_mode_values swizzle_mode;
+ unsigned int plane0_pitch;
+ struct dc_size plane1_size;
+ unsigned int plane1_pitch;
+ union {
+ enum swizzle_mode_values swizzle_mode;
+ enum swizzle_mode_addr3_values swizzle_mode_addr3;
+ };
enum dc_scan_direction scan;
};
@@ -210,15 +396,16 @@ struct dc_dcc_setting {
unsigned int max_compressed_blk_size;
unsigned int max_uncompressed_blk_size;
bool independent_64b_blks;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- //These bitfields to be used starting with DCN
+ //These bitfields to be used starting with DCN 3.0
struct {
- uint32_t dcc_256_64_64 : 1;//available in ASICs before DCN (the worst compression case)
- uint32_t dcc_128_128_uncontrained : 1; //available in ASICs before DCN
- uint32_t dcc_256_128_128 : 1; //available starting with DCN
- uint32_t dcc_256_256_unconstrained : 1; //available in ASICs before DCN (the best compression case)
+ uint32_t dcc_256_64_64 : 1;//available in ASICs before DCN 3.0 (the worst compression case)
+ uint32_t dcc_128_128_uncontrained : 1; //available in ASICs before DCN 3.0
+ uint32_t dcc_256_128_128 : 1; //available starting with DCN 3.0
+ uint32_t dcc_256_256_unconstrained : 1; //available in ASICs before DCN 3.0 (the best compression case)
+ uint32_t dcc_256_256 : 1; //available in ASICs starting with DCN 4.0x (the best compression case)
+ uint32_t dcc_256_128 : 1; //available in ASICs starting with DCN 4.0x
+ uint32_t dcc_256_64 : 1; //available in ASICs starting with DCN 4.0x (the worst compression case)
} dcc_controls;
-#endif
};
struct dc_surface_dcc_cap {
@@ -280,21 +467,32 @@ enum surface_update_type {
UPDATE_TYPE_FULL, /* may need to shuffle resources */
};
+enum dc_lock_descriptor {
+ LOCK_DESCRIPTOR_NONE = 0x0,
+ LOCK_DESCRIPTOR_STREAM = 0x1,
+ LOCK_DESCRIPTOR_LINK = 0x2,
+ LOCK_DESCRIPTOR_GLOBAL = 0x4,
+};
+
+struct surface_update_descriptor {
+ enum surface_update_type update_type;
+ enum dc_lock_descriptor lock_descriptor;
+};
+
/* Forward declaration*/
struct dc;
struct dc_plane_state;
struct dc_state;
-
struct dc_cap_funcs {
bool (*get_dcc_compression_cap)(const struct dc *dc,
const struct dc_dcc_surface_param *input,
struct dc_surface_dcc_cap *output);
+ bool (*get_subvp_en)(struct dc *dc, struct dc_state *context);
};
struct link_training_settings;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
union allow_lttpr_non_transparent_mode {
struct {
bool DP1_4A : 1;
@@ -302,7 +500,7 @@ union allow_lttpr_non_transparent_mode {
} bits;
unsigned char raw;
};
-#endif
+
/* Structure to hold configuration flags set by dm at dc creation. */
struct dc_config {
bool gpu_vm_support;
@@ -310,28 +508,55 @@ struct dc_config {
bool fbc_support;
bool disable_fractional_pwm;
bool allow_seamless_boot_optimization;
- bool power_down_display_on_boot;
+ bool seamless_boot_edp_requested;
bool edp_not_connected;
bool edp_no_power_sequencing;
bool force_enum_edp;
bool forced_clocks;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
union allow_lttpr_non_transparent_mode allow_lttpr_non_transparent_mode;
-#else
- bool allow_lttpr_non_transparent_mode;
-#endif
bool multi_mon_pp_mclk_switch;
bool disable_dmcu;
bool enable_4to1MPC;
bool enable_windowed_mpo_odm;
- bool allow_edp_hotplug_detection;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
+ bool forceHBR2CP2520; // Used for switching between test patterns TPS4 and CP2520
+ uint32_t allow_edp_hotplug_detection;
+ bool skip_riommu_prefetch_wa;
bool clamp_min_dcfclk;
-#endif
uint64_t vblank_alignment_dto_params;
uint8_t vblank_alignment_max_frame_time_diff;
bool is_asymmetric_memory;
bool is_single_rank_dimm;
+ bool is_vmin_only_asic;
+ bool use_spl;
+ bool prefer_easf;
+ bool use_pipe_ctx_sync_logic;
+ int smart_mux_version;
+ bool ignore_dpref_ss;
+ bool enable_mipi_converter_optimization;
+ bool use_default_clock_table;
+ bool force_bios_enable_lttpr;
+ uint8_t force_bios_fixed_vs;
+ int sdpif_request_limit_words_per_umc;
+ bool dc_mode_clk_limit_support;
+ bool EnableMinDispClkODM;
+ bool enable_auto_dpm_test_logs;
+ unsigned int disable_ips;
+ unsigned int disable_ips_rcg;
+ unsigned int disable_ips_in_vpb;
+ bool disable_ips_in_dpms_off;
+ bool usb4_bw_alloc_support;
+ bool allow_0_dtb_clk;
+ bool use_assr_psp_message;
+ bool support_edp0_on_dp1;
+ unsigned int enable_fpo_flicker_detection;
+ bool disable_hbr_audio_dp2;
+ bool consolidated_dpia_dp_lt;
+ bool set_pipe_unlock_order;
+ bool enable_dpia_pre_training;
+ bool unify_link_enc_assignment;
+ bool enable_cursor_offload;
+ struct spl_sharpness_range dcn_sharpness_range;
+ struct spl_sharpness_range dcn_override_sharpness_range;
};
enum visual_confirm {
@@ -340,13 +565,37 @@ enum visual_confirm {
VISUAL_CONFIRM_HDR = 2,
VISUAL_CONFIRM_MPCTREE = 4,
VISUAL_CONFIRM_PSR = 5,
+ VISUAL_CONFIRM_SWAPCHAIN = 6,
+ VISUAL_CONFIRM_FAMS = 7,
VISUAL_CONFIRM_SWIZZLE = 9,
+ VISUAL_CONFIRM_SMARTMUX_DGPU = 10,
+ VISUAL_CONFIRM_REPLAY = 12,
+ VISUAL_CONFIRM_SUBVP = 14,
+ VISUAL_CONFIRM_MCLK_SWITCH = 16,
+ VISUAL_CONFIRM_FAMS2 = 19,
+ VISUAL_CONFIRM_HW_CURSOR = 20,
+ VISUAL_CONFIRM_VABC = 21,
+ VISUAL_CONFIRM_DCC = 22,
+ VISUAL_CONFIRM_EXPLICIT = 0x80000000,
};
enum dc_psr_power_opts {
psr_power_opt_invalid = 0x0,
psr_power_opt_smu_opt_static_screen = 0x1,
psr_power_opt_z10_static_screen = 0x10,
+ psr_power_opt_ds_disable_allow = 0x100,
+};
+
+enum dml_hostvm_override_opts {
+ DML_HOSTVM_NO_OVERRIDE = 0x0,
+ DML_HOSTVM_OVERRIDE_FALSE = 0x1,
+ DML_HOSTVM_OVERRIDE_TRUE = 0x2,
+};
+
+enum dc_replay_power_opts {
+ replay_power_opt_invalid = 0x0,
+ replay_power_opt_smu_opt_static_screen = 0x1,
+ replay_power_opt_z10_static_screen = 0x10,
};
enum dcc_option {
@@ -355,9 +604,38 @@ enum dcc_option {
DCC_HALF_REQ_DISALBE = 2,
};
+enum in_game_fams_config {
+ INGAME_FAMS_SINGLE_DISP_ENABLE, // enable in-game fams
+ INGAME_FAMS_DISABLE, // disable in-game fams
+ INGAME_FAMS_MULTI_DISP_ENABLE, //enable in-game fams for multi-display
+ INGAME_FAMS_MULTI_DISP_CLAMPED_ONLY, //enable in-game fams for multi-display only for clamped RR strategies
+};
+
+/**
+ * enum pipe_split_policy - Pipe split strategy supported by DCN
+ *
+ * This enum is used to define the pipe split policy supported by DCN. By
+ * default, DC favors MPC_SPLIT_DYNAMIC.
+ */
enum pipe_split_policy {
+ /**
+ * @MPC_SPLIT_DYNAMIC: DC will automatically decide how to split the
+ * pipe in order to bring the best trade-off between performance and
+ * power consumption. This is the recommended option.
+ */
MPC_SPLIT_DYNAMIC = 0,
+
+ /**
+ * @MPC_SPLIT_AVOID: Avoid pipe split, which means that DC will not
+ * try any sort of split optimization.
+ */
MPC_SPLIT_AVOID = 1,
+
+ /**
+ * @MPC_SPLIT_AVOID_MULT_DISP: With this option, DC will only try to
+ * optimize the pipe utilization when using a single display; if the
+ * user connects to a second display, DC will avoid pipe split.
+ */
MPC_SPLIT_AVOID_MULT_DISP = 2,
};
@@ -379,16 +657,20 @@ enum dcn_pwr_state {
DCN_PWR_STATE_LOW_POWER = 3,
};
-#if defined(CONFIG_DRM_AMD_DC_DCN)
enum dcn_zstate_support_state {
DCN_ZSTATE_SUPPORT_UNKNOWN,
DCN_ZSTATE_SUPPORT_ALLOW,
+ DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY,
+ DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY,
+ DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY,
DCN_ZSTATE_SUPPORT_DISALLOW,
};
-#endif
+
/*
- * For any clocks that may differ per pipe
- * only the max is stored in this structure
+ * struct dc_clocks - DC pipe clocks
+ *
+ * For any clocks that may differ per pipe only the max is stored in this
+ * structure
*/
struct dc_clocks {
int dispclk_khz;
@@ -403,21 +685,50 @@ struct dc_clocks {
int phyclk_khz;
int dramclk_khz;
bool p_state_change_support;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
enum dcn_zstate_support_state zstate_support;
bool dtbclk_en;
-#endif
+ int ref_dtbclk_khz;
+ bool fclk_p_state_change_support;
enum dcn_pwr_state pwr_state;
/*
* Elements below are not compared for the purposes of
* optimization required
*/
bool prev_p_state_change_support;
+ bool fclk_prev_p_state_change_support;
+ int num_ways;
+ int host_router_bw_kbps[MAX_HOST_ROUTERS_NUM];
+
+ /*
+ * @fw_based_mclk_switching
+ *
+ * DC has a mechanism that leverage the variable refresh rate to switch
+ * memory clock in cases that we have a large latency to achieve the
+ * memory clock change and a short vblank window. DC has some
+ * requirements to enable this feature, and this field describes if the
+ * system support or not such a feature.
+ */
+ bool fw_based_mclk_switching;
+ bool fw_based_mclk_switching_shut_down;
+ int prev_num_ways;
enum dtm_pstate dtm_level;
int max_supported_dppclk_khz;
int max_supported_dispclk_khz;
int bw_dppclk_khz; /*a copy of dppclk_khz*/
int bw_dispclk_khz;
+ int idle_dramclk_khz;
+ int idle_fclk_khz;
+ int subvp_prefetch_dramclk_khz;
+ int subvp_prefetch_fclk_khz;
+
+ /* Stutter efficiency is technically not clock values
+ * but stored here so the values are part of the update_clocks call similar to num_ways
+ * Efficiencies are stored as percentage (0-100)
+ */
+ struct {
+ uint8_t base_efficiency; //LP1
+ uint8_t low_power_efficiency; //LP2
+ } stutter_efficiency;
};
struct dc_bw_validation_profile {
@@ -503,13 +814,83 @@ union root_clock_optimization_options {
uint32_t u32All;
};
+union fine_grain_clock_gating_enable_options {
+ struct {
+ bool dccg_global_fgcg_rep : 1; /* Global fine grain clock gating of repeaters */
+ bool dchub : 1; /* Display controller hub */
+ bool dchubbub : 1;
+ bool dpp : 1; /* Display pipes and planes */
+ bool opp : 1; /* Output pixel processing */
+ bool optc : 1; /* Output pipe timing combiner */
+ bool dio : 1; /* Display output */
+ bool dwb : 1; /* Display writeback */
+ bool mmhubbub : 1; /* Multimedia hub */
+ bool dmu : 1; /* Display core management unit */
+ bool az : 1; /* Azalia */
+ bool dchvm : 1;
+ bool dsc : 1; /* Display stream compression */
+
+ uint32_t reserved : 19;
+ } bits;
+ uint32_t u32All;
+};
+
+enum pg_hw_pipe_resources {
+ PG_HUBP = 0,
+ PG_DPP,
+ PG_DSC,
+ PG_MPCC,
+ PG_OPP,
+ PG_OPTC,
+ PG_DPSTREAM,
+ PG_HDMISTREAM,
+ PG_PHYSYMCLK,
+ PG_HW_PIPE_RESOURCES_NUM_ELEMENT
+};
+
+enum pg_hw_resources {
+ PG_DCCG = 0,
+ PG_DCIO,
+ PG_DIO,
+ PG_DCHUBBUB,
+ PG_DCHVM,
+ PG_DWB,
+ PG_HPO,
+ PG_DCOH,
+ PG_HW_RESOURCES_NUM_ELEMENT
+};
+
+struct pg_block_update {
+ bool pg_pipe_res_update[PG_HW_PIPE_RESOURCES_NUM_ELEMENT][MAX_PIPES];
+ bool pg_res_update[PG_HW_RESOURCES_NUM_ELEMENT];
+};
+
union dpia_debug_options {
struct {
- uint32_t disable_dpia:1;
- uint32_t force_non_lttpr:1;
- uint32_t extend_aux_rd_interval:1;
- uint32_t disable_mst_dsc_work_around:1;
- uint32_t reserved:28;
+ uint32_t disable_dpia:1; /* bit 0 */
+ uint32_t force_non_lttpr:1; /* bit 1 */
+ uint32_t extend_aux_rd_interval:1; /* bit 2 */
+ uint32_t disable_mst_dsc_work_around:1; /* bit 3 */
+ uint32_t enable_force_tbt3_work_around:1; /* bit 4 */
+ uint32_t disable_usb4_pm_support:1; /* bit 5 */
+ uint32_t enable_usb4_bw_zero_alloc_patch:1; /* bit 6 */
+ uint32_t reserved:25;
+ } bits;
+ uint32_t raw;
+};
+
+/* AUX wake work around options
+ * 0: enable/disable work around
+ * 1: use default timeout LINK_AUX_WAKE_TIMEOUT_MS
+ * 15-2: reserved
+ * 31-16: timeout in ms
+ */
+union aux_wake_wa_options {
+ struct {
+ uint32_t enable_wa : 1;
+ uint32_t use_default_timeout : 1;
+ uint32_t rsvd: 14;
+ uint32_t timeout_ms : 16;
} bits;
uint32_t raw;
};
@@ -518,6 +899,7 @@ struct dc_debug_data {
uint32_t ltFailCount;
uint32_t i2cErrorCount;
uint32_t auxErrorCount;
+ struct pipe_topology_history topology_history;
};
struct dc_phy_addr_space_config {
@@ -555,10 +937,13 @@ struct dc_virtual_addr_space_config {
struct dc_bounding_box_overrides {
int sr_exit_time_ns;
int sr_enter_plus_exit_time_ns;
+ int sr_exit_z8_time_ns;
+ int sr_enter_plus_exit_z8_time_ns;
int urgent_latency_ns;
int percent_of_ideal_drambw;
int dram_clock_change_latency_ns;
int dummy_clock_change_latency_ns;
+ int fclk_clock_change_latency_ns;
/* This forces a hard min on the DCFCLK we use
* for DML. Unlike the debug option for forcing
* DCFCLK, this override affects watermark calculations
@@ -569,15 +954,25 @@ struct dc_bounding_box_overrides {
struct dc_state;
struct resource_pool;
struct dce_hwseq;
+struct link_service;
+/*
+ * struct dc_debug_options - DC debug struct
+ *
+ * This struct provides a simple mechanism for developers to change some
+ * configurations, enable/disable features, and activate extra debug options.
+ * This can be very handy to narrow down whether some specific feature is
+ * causing an issue or not.
+ */
struct dc_debug_options {
bool native422_support;
bool disable_dsc;
enum visual_confirm visual_confirm;
+ int visual_confirm_rect_height;
+
bool sanity_checks;
bool max_disp_clk;
bool surface_trace;
- bool timing_trace;
bool clock_trace;
bool validation_trace;
bool bandwidth_calcs_trace;
@@ -587,15 +982,28 @@ struct dc_debug_options {
bool disable_stutter;
bool use_max_lb;
enum dcc_option disable_dcc;
+
+ /*
+ * @pipe_split_policy: Define which pipe split policy is used by the
+ * display core.
+ */
enum pipe_split_policy pipe_split_policy;
bool force_single_disp_pipe_split;
bool voltage_align_fclk;
bool disable_min_fclk;
+ bool hdcp_lc_force_fw_enable;
+ bool hdcp_lc_enable_sw_fallback;
+
bool disable_dfs_bypass;
bool disable_dpp_power_gate;
bool disable_hubp_power_gate;
bool disable_dsc_power_gate;
+ bool disable_optc_power_gate;
+ bool disable_hpo_power_gate;
+ bool disable_io_clk_power_gate;
+ bool disable_mem_power_gate;
+ bool disable_dio_power_gate;
int dsc_min_slice_height_override;
int dsc_bpp_increment_div;
bool disable_pplib_wm_range;
@@ -607,6 +1015,8 @@ struct dc_debug_options {
int sr_enter_plus_exit_time_dpm0_ns;
int sr_exit_time_ns;
int sr_enter_plus_exit_time_ns;
+ int sr_exit_z8_time_ns;
+ int sr_enter_plus_exit_z8_time_ns;
int urgent_latency_ns;
uint32_t underflow_assert_delay_us;
int percent_of_ideal_drambw;
@@ -616,11 +1026,8 @@ struct dc_debug_options {
bool disable_pplib_clock_request;
bool disable_clock_gate;
bool disable_mem_low_power;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
bool pstate_enabled;
-#endif
bool disable_dmcu;
- bool disable_psr;
bool force_abm_enable;
bool disable_stereo_support;
bool vsr_support;
@@ -633,22 +1040,21 @@ struct dc_debug_options {
bool hdmi20_disable;
bool skip_detection_link_training;
uint32_t edid_read_retry_times;
- bool remove_disconnect_edp;
unsigned int force_odm_combine; //bit vector based on otg inst
-#if defined(CONFIG_DRM_AMD_DC_DCN)
+ unsigned int seamless_boot_odm_combine;
unsigned int force_odm_combine_4to1; //bit vector based on otg inst
+ int minimum_z8_residency_time;
+ int minimum_z10_residency_time;
bool disable_z9_mpc;
-#endif
unsigned int force_fclk_khz;
bool enable_tri_buf;
+ bool ips_disallow_entry;
bool dmub_offload_enabled;
bool dmcub_emulation;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
bool disable_idle_power_optimizations;
unsigned int mall_size_override;
unsigned int mall_additional_timer_percent;
bool mall_error_as_fatal;
-#endif
bool dmub_command_table; /* for testing only */
struct dc_bw_validation_profile bw_val_profile;
bool disable_fec;
@@ -657,9 +1063,7 @@ struct dc_debug_options {
* watermarks are not affected.
*/
unsigned int force_min_dcfclk_mhz;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
int dwb_fi_phase;
-#endif
bool disable_timing_sync;
bool cm_in_bypass;
int force_clock_mode;/*every mode change.*/
@@ -669,85 +1073,129 @@ struct dc_debug_options {
bool enable_dmcub_surface_flip;
bool usbc_combo_phy_reset_wa;
bool enable_dram_clock_change_one_display_vactive;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
/* TODO - remove once tested */
bool legacy_dp2_lt;
bool set_mst_en_for_sst;
-#endif
+ bool disable_uhbr;
+ bool force_dp2_lt_fallback_method;
+ bool ignore_cable_id;
union mem_low_power_enable_options enable_mem_low_power;
union root_clock_optimization_options root_clock_optimization;
+ union fine_grain_clock_gating_enable_options enable_fine_grain_clock_gating;
bool hpo_optimization;
bool force_vblank_alignment;
/* Enable dmub aux for legacy ddc */
bool enable_dmub_aux_for_legacy_ddc;
- bool optimize_edp_link_rate; /* eDP ILR */
+ bool disable_fams;
+ enum in_game_fams_config disable_fams_gaming;
/* FEC/PSR1 sequence enable delay in 100us */
uint8_t fec_enable_delay_in100us;
bool enable_driver_sequence_debug;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
+ enum det_size crb_alloc_policy;
+ int crb_alloc_policy_min_disp_count;
bool disable_z10;
- bool enable_sw_cntl_psr;
+ bool enable_z9_disable_interface;
+ bool psr_skip_crtc_disable;
+ uint32_t ips_skip_crtc_disable_mask;
union dpia_debug_options dpia_debug;
-#endif
-};
-
-struct gpu_info_soc_bounding_box_v1_0;
-struct dc {
- struct dc_debug_options debug;
- struct dc_versions versions;
- struct dc_caps caps;
- struct dc_cap_funcs cap_funcs;
- struct dc_config config;
- struct dc_bounding_box_overrides bb_overrides;
- struct dc_bug_wa work_arounds;
- struct dc_context *ctx;
- struct dc_phy_addr_space_config vm_pa_config;
-
- uint8_t link_count;
- struct dc_link *links[MAX_PIPES * 2];
-
- struct dc_state *current_state;
- struct resource_pool *res_pool;
-
- struct clk_mgr *clk_mgr;
-
- /* Display Engine Clock levels */
- struct dm_pp_clock_levels sclk_lvls;
-
- /* Inputs into BW and WM calculations. */
- struct bw_calcs_dceip *bw_dceip;
- struct bw_calcs_vbios *bw_vbios;
-#ifdef CONFIG_DRM_AMD_DC_DCN
- struct dcn_soc_bounding_box *dcn_soc;
- struct dcn_ip_params *dcn_ip;
- struct display_mode_lib dml;
-#endif
-
- /* HW functions */
- struct hw_sequencer_funcs hwss;
- struct dce_hwseq *hwseq;
-
- /* Require to optimize clocks and bandwidth for added/removed planes */
- bool optimized_required;
- bool wm_optimized_required;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- bool idle_optimizations_allowed;
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- bool enable_c20_dtm_b0;
-#endif
-
- /* Require to maintain clocks and bandwidth for UEFI enabled HW */
-
- /* FBC compressor */
- struct compressor *fbc_compressor;
-
- struct dc_debug_data debug_data;
- struct dpcd_vendor_signature vendor_signature;
-
- const char *build_id;
- struct vm_helper *vm_helper;
+ bool disable_fixed_vs_aux_timeout_wa;
+ uint32_t fixed_vs_aux_delay_config_wa;
+ bool force_disable_subvp;
+ bool force_subvp_mclk_switch;
+ bool allow_sw_cursor_fallback;
+ unsigned int force_subvp_num_ways;
+ unsigned int force_mall_ss_num_ways;
+ bool alloc_extra_way_for_cursor;
+ uint32_t subvp_extra_lines;
+ bool disable_force_pstate_allow_on_hw_release;
+ bool force_usr_allow;
+ /* uses value at boot and disables switch */
+ bool disable_dtb_ref_clk_switch;
+ bool extended_blank_optimization;
+ union aux_wake_wa_options aux_wake_wa;
+ uint32_t mst_start_top_delay;
+ uint8_t psr_power_use_phy_fsm;
+ enum dml_hostvm_override_opts dml_hostvm_override;
+ bool dml_disallow_alternate_prefetch_modes;
+ bool use_legacy_soc_bb_mechanism;
+ bool exit_idle_opt_for_cursor_updates;
+ bool using_dml2;
+ bool enable_single_display_2to1_odm_policy;
+ bool enable_double_buffered_dsc_pg_support;
+ bool enable_dp_dig_pixel_rate_div_policy;
+ bool using_dml21;
+ enum lttpr_mode lttpr_mode_override;
+ unsigned int dsc_delay_factor_wa_x1000;
+ unsigned int min_prefetch_in_strobe_ns;
+ bool disable_unbounded_requesting;
+ bool dig_fifo_off_in_blank;
+ bool override_dispclk_programming;
+ bool otg_crc_db;
+ bool disallow_dispclk_dppclk_ds;
+ bool disable_fpo_optimizations;
+ bool support_eDP1_5;
+ uint32_t fpo_vactive_margin_us;
+ bool disable_fpo_vactive;
+ bool disable_boot_optimizations;
+ bool override_odm_optimization;
+ bool minimize_dispclk_using_odm;
+ bool disable_subvp_high_refresh;
+ bool disable_dp_plus_plus_wa;
+ uint32_t fpo_vactive_min_active_margin_us;
+ uint32_t fpo_vactive_max_blank_us;
+ bool enable_hpo_pg_support;
+ bool disable_dc_mode_overwrite;
+ bool replay_skip_crtc_disabled;
+ bool ignore_pg;/*do nothing, let pmfw control it*/
+ bool psp_disabled_wa;
+ unsigned int ips2_eval_delay_us;
+ unsigned int ips2_entry_delay_us;
+ bool optimize_ips_handshake;
+ bool disable_dmub_reallow_idle;
+ bool disable_timeout;
+ bool disable_extblankadj;
+ bool enable_idle_reg_checks;
+ unsigned int static_screen_wait_frames;
+ uint32_t pwm_freq;
+ bool force_chroma_subsampling_1tap;
+ unsigned int dcc_meta_propagation_delay_us;
+ bool disable_422_left_edge_pixel;
+ bool dml21_force_pstate_method;
+ uint32_t dml21_force_pstate_method_values[MAX_PIPES];
+ uint32_t dml21_disable_pstate_method_mask;
+ union fw_assisted_mclk_switch_version fams_version;
+ union dmub_fams2_global_feature_config fams2_config;
+ unsigned int force_cositing;
+ unsigned int disable_spl;
+ unsigned int force_easf;
+ unsigned int force_sharpness;
+ unsigned int force_sharpness_level;
+ unsigned int force_lls;
+ bool notify_dpia_hr_bw;
+ bool enable_ips_visual_confirm;
+ unsigned int sharpen_policy;
+ unsigned int scale_to_sharpness_policy;
+ unsigned int enable_oled_edp_power_up_opt;
+ bool enable_hblank_borrow;
+ bool force_subvp_df_throttle;
+ uint32_t acpi_transition_bitmasks[MAX_PIPES];
+ bool enable_pg_cntl_debug_logs;
+ unsigned int auxless_alpm_lfps_setup_ns;
+ unsigned int auxless_alpm_lfps_period_ns;
+ unsigned int auxless_alpm_lfps_silence_ns;
+ unsigned int auxless_alpm_lfps_t1t2_us;
+ short auxless_alpm_lfps_t1t2_offset_us;
+ bool disable_stutter_for_wm_program;
+ bool enable_block_sequence_programming;
+};
+
+
+/* Generic structure that can be used to query properties of DC. More fields
+ * can be added as required.
+ */
+struct dc_current_properties {
+ unsigned int cursor_size_limit;
};
enum frame_buffer_mode {
@@ -765,6 +1213,8 @@ struct dchub_init_data {
bool dchub_info_valid;
};
+struct dml2_soc_bb;
+
struct dc_init_data {
struct hw_asic_id asic_id;
void *driver; /* ctx */
@@ -786,17 +1236,22 @@ struct dc_init_data {
uint64_t log_mask;
struct dpcd_vendor_signature vendor_signature;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
bool force_smu_not_present;
-#endif
+ /*
+ * IP offset for run time initializaion of register addresses
+ *
+ * DCN3.5+ will fail dc_create() if these fields are null for them. They are
+ * applicable starting with DCN32/321 and are not used for ASICs upstreamed
+ * before them.
+ */
+ uint32_t *dcn_reg_offsets;
+ uint32_t *nbio_reg_offsets;
+ uint32_t *clk_reg_offsets;
+ void *bb_from_dmub;
};
struct dc_callback_init {
-#ifdef CONFIG_DRM_AMD_DC_HDCP
struct cp_psp cp_psp;
-#else
- uint8_t reserved;
-#endif
};
struct dc *dc_create(const struct dc_init_data *init_params);
@@ -811,9 +1266,7 @@ void dc_init_callbacks(struct dc *dc,
void dc_deinit_callbacks(struct dc *dc);
void dc_destroy(struct dc **dc);
-/*******************************************************************************
- * Surface Interfaces
- ******************************************************************************/
+/* Surface Interfaces */
enum {
TRANSFER_FUNC_POINTS = 1025
@@ -895,6 +1348,38 @@ union dc_3dlut_state {
};
+#define MATRIX_9C__DIM_128_ALIGNED_LEN 16 // 9+8 : 9 * 8 + 7 * 8 = 72 + 56 = 128 % 128 = 0
+#define MATRIX_17C__DIM_128_ALIGNED_LEN 32 //17+15: 17 * 8 + 15 * 8 = 136 + 120 = 256 % 128 = 0
+#define MATRIX_33C__DIM_128_ALIGNED_LEN 64 //17+47: 17 * 8 + 47 * 8 = 136 + 376 = 512 % 128 = 0
+
+struct lut_rgb {
+ uint16_t b;
+ uint16_t g;
+ uint16_t r;
+ uint16_t padding;
+};
+
+//this structure maps directly to how the lut will read it from memory
+struct lut_mem_mapping {
+ union {
+ //NATIVE MODE 1, 2
+ //RGB layout [b][g][r] //red is 128 byte aligned
+ //BGR layout [r][g][b] //blue is 128 byte aligned
+ struct lut_rgb rgb_17c[17][17][MATRIX_17C__DIM_128_ALIGNED_LEN];
+ struct lut_rgb rgb_33c[33][33][MATRIX_33C__DIM_128_ALIGNED_LEN];
+
+ //TRANSFORMED
+ uint16_t linear_rgb[(33*33*33*4/128+1)*128];
+ };
+ uint16_t size;
+};
+
+struct dc_rmcm_3dlut {
+ bool isInUse;
+ const struct dc_stream_state *stream;
+ uint8_t protection_bits;
+};
+
struct dc_3dlut {
struct kref refcount;
struct tetrahedral_params lut_3d;
@@ -931,7 +1416,6 @@ union surface_update_flags {
uint32_t in_transfer_func_change:1;
uint32_t input_csc_change:1;
uint32_t coeff_reduction_change:1;
- uint32_t output_tf_change:1;
uint32_t pixel_format_change:1;
uint32_t plane_size_change:1;
uint32_t gamut_remap_change:1;
@@ -944,12 +1428,17 @@ union surface_update_flags {
uint32_t clock_change:1;
uint32_t stereo_format_change:1;
uint32_t lut_3d:1;
+ uint32_t tmz_changed:1;
+ uint32_t mcm_transfer_function_enable_change:1; /* disable or enable MCM transfer func */
uint32_t full_update:1;
+ uint32_t sdr_white_level_nits:1;
} bits;
uint32_t raw;
};
+#define DC_REMOVE_PLANE_POINTERS 1
+
struct dc_plane_state {
struct dc_plane_address address;
struct dc_plane_flip_time time;
@@ -960,13 +1449,13 @@ struct dc_plane_state {
struct rect clip_rect;
struct plane_size plane_size;
- union dc_tiling_info tiling_info;
+ struct dc_tiling_info tiling_info;
struct dc_plane_dcc_param dcc;
- struct dc_gamma *gamma_correction;
- struct dc_transfer_func *in_transfer_func;
- struct dc_bias_and_scale *bias_and_scale;
+ struct dc_gamma gamma_correction;
+ struct dc_transfer_func in_transfer_func;
+ struct dc_bias_and_scale bias_and_scale;
struct dc_csc_transform input_csc_color_matrix;
struct fixed31_32 coeff_reduction_factor;
struct fixed31_32 hdr_mult;
@@ -977,19 +1466,18 @@ struct dc_plane_state {
enum dc_color_space color_space;
- struct dc_3dlut *lut3d_func;
- struct dc_transfer_func *in_shaper_func;
- struct dc_transfer_func *blend_tf;
+ struct dc_3dlut lut3d_func;
+ struct dc_transfer_func in_shaper_func;
+ struct dc_transfer_func blend_tf;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
struct dc_transfer_func *gamcor_tf;
-#endif
enum surface_pixel_format format;
enum dc_rotation_angle rotation;
enum plane_stereo_format stereo_format;
bool is_tiling_rotated;
bool per_pixel_alpha;
+ bool pre_multiplied_alpha;
bool global_alpha;
int global_alpha_value;
bool visible;
@@ -1008,14 +1496,33 @@ struct dc_plane_state {
/* HACK: Workaround for forcing full reprogramming under some conditions */
bool force_full_update;
+ bool is_phantom; // TODO: Change mall_stream_config into mall_plane_config instead
+
/* private to dc_surface.c */
enum dc_irq_source irq_source;
struct kref refcount;
+ struct tg_color visual_confirm_color;
+
+ bool is_statically_allocated;
+ enum chroma_cositing cositing;
+ enum dc_cm2_shaper_3dlut_setting mcm_shaper_3dlut_setting;
+ bool mcm_lut1d_enable;
+ struct dc_cm2_func_luts mcm_luts;
+ bool lut_bank_a;
+ enum mpcc_movable_cm_location mcm_location;
+ struct dc_csc_transform cursor_csc_color_matrix;
+ bool adaptive_sharpness_en;
+ int adaptive_sharpness_policy;
+ int sharpness_level;
+ enum linear_light_scaling linear_light_scaling;
+ unsigned int sdr_white_level_nits;
+ struct spl_sharpness_range sharpness_range;
+ enum sharpness_range_source sharpness_source;
};
struct dc_plane_info {
struct plane_size plane_size;
- union dc_tiling_info tiling_info;
+ struct dc_tiling_info tiling_info;
struct dc_plane_dcc_param dcc;
enum surface_pixel_format format;
enum dc_rotation_angle rotation;
@@ -1024,10 +1531,277 @@ struct dc_plane_info {
bool horizontal_mirror;
bool visible;
bool per_pixel_alpha;
+ bool pre_multiplied_alpha;
bool global_alpha;
int global_alpha_value;
bool input_csc_enabled;
int layer_index;
+ enum chroma_cositing cositing;
+};
+
+#include "dc_stream.h"
+
+struct dc_scratch_space {
+ /* used to temporarily backup plane states of a stream during
+ * dc update. The reason is that plane states are overwritten
+ * with surface updates in dc update. Once they are overwritten
+ * current state is no longer valid. We want to temporarily
+ * store current value in plane states so we can still recover
+ * a valid current state during dc update.
+ */
+ struct dc_plane_state plane_states[MAX_SURFACES];
+
+ struct dc_stream_state stream_state;
+};
+
+/*
+ * A link contains one or more sinks and their connected status.
+ * The currently active signal type (HDMI, DP-SST, DP-MST) is also reported.
+ */
+ struct dc_link {
+ struct dc_sink *remote_sinks[MAX_SINKS_PER_LINK];
+ unsigned int sink_count;
+ struct dc_sink *local_sink;
+ unsigned int link_index;
+ enum dc_connection_type type;
+ enum signal_type connector_signal;
+ enum dc_irq_source irq_source_hpd;
+ enum dc_irq_source irq_source_hpd_rx;/* aka DP Short Pulse */
+ enum dc_irq_source irq_source_read_request;/* Read Request */
+
+ bool is_hpd_filter_disabled;
+ bool dp_ss_off;
+
+ /**
+ * @link_state_valid:
+ *
+ * If there is no link and local sink, this variable should be set to
+ * false. Otherwise, it should be set to true; usually, the function
+ * core_link_enable_stream sets this field to true.
+ */
+ bool link_state_valid;
+ bool aux_access_disabled;
+ bool sync_lt_in_progress;
+ bool skip_stream_reenable;
+ bool is_internal_display;
+ /** @todo Rename. Flag an endpoint as having a programmable mapping to a DIG encoder. */
+ bool is_dig_mapping_flexible;
+ bool hpd_status; /* HPD status of link without physical HPD pin. */
+ bool is_hpd_pending; /* Indicates a new received hpd */
+
+ /* USB4 DPIA links skip verifying link cap, instead performing the fallback method
+ * for every link training. This is incompatible with DP LL compliance automation,
+ * which expects the same link settings to be used every retry on a link loss.
+ * This flag is used to skip the fallback when link loss occurs during automation.
+ */
+ bool skip_fallback_on_link_loss;
+
+ bool edp_sink_present;
+
+ struct dp_trace dp_trace;
+
+ /* caps is the same as reported_link_cap. link_traing use
+ * reported_link_cap. Will clean up. TODO
+ */
+ struct dc_link_settings reported_link_cap;
+ struct dc_link_settings verified_link_cap;
+ struct dc_link_settings cur_link_settings;
+ struct dc_lane_settings cur_lane_setting[LANE_COUNT_DP_MAX];
+ struct dc_link_settings preferred_link_setting;
+ /* preferred_training_settings are override values that
+ * come from DM. DM is responsible for the memory
+ * management of the override pointers.
+ */
+ struct dc_link_training_overrides preferred_training_settings;
+ struct dp_audio_test_data audio_test_data;
+
+ uint8_t ddc_hw_inst;
+
+ uint8_t hpd_src;
+
+ uint8_t link_enc_hw_inst;
+ /* DIG link encoder ID. Used as index in link encoder resource pool.
+ * For links with fixed mapping to DIG, this is not changed after dc_link
+ * object creation.
+ */
+ enum engine_id eng_id;
+ enum engine_id dpia_preferred_eng_id;
+
+ bool test_pattern_enabled;
+ /* Pending/Current test pattern are only used to perform and track
+ * FIXED_VS retimer test pattern/lane adjustment override state.
+ * Pending allows link HWSS to differentiate PHY vs non-PHY pattern,
+ * to perform specific lane adjust overrides before setting certain
+ * PHY test patterns. In cases when lane adjust and set test pattern
+ * calls are not performed atomically (i.e. performing link training),
+ * pending_test_pattern will be invalid or contain a non-PHY test pattern
+ * and current_test_pattern will contain required context for any future
+ * set pattern/set lane adjust to transition between override state(s).
+ * */
+ enum dp_test_pattern current_test_pattern;
+ enum dp_test_pattern pending_test_pattern;
+
+ union compliance_test_state compliance_test_state;
+
+ void *priv;
+
+ struct ddc_service *ddc;
+
+ enum dp_panel_mode panel_mode;
+ bool aux_mode;
+
+ /* Private to DC core */
+
+ const struct dc *dc;
+
+ struct dc_context *ctx;
+
+ struct panel_cntl *panel_cntl;
+ struct link_encoder *link_enc;
+ struct graphics_object_id link_id;
+ /* Endpoint type distinguishes display endpoints which do not have entries
+ * in the BIOS connector table from those that do. Helps when tracking link
+ * encoder to display endpoint assignments.
+ */
+ enum display_endpoint_type ep_type;
+ union ddi_channel_mapping ddi_channel_mapping;
+ struct connector_device_tag_info device_tag;
+ struct dpcd_caps dpcd_caps;
+ uint32_t dongle_max_pix_clk;
+ unsigned short chip_caps;
+ unsigned int dpcd_sink_count;
+ struct hdcp_caps hdcp_caps;
+ enum edp_revision edp_revision;
+ union dpcd_sink_ext_caps dpcd_sink_ext_caps;
+
+ struct psr_settings psr_settings;
+ struct replay_settings replay_settings;
+
+ /* Drive settings read from integrated info table */
+ struct dc_lane_settings bios_forced_drive_settings;
+
+ /* Vendor specific LTTPR workaround variables */
+ uint8_t vendor_specific_lttpr_link_rate_wa;
+ bool apply_vendor_specific_lttpr_link_rate_wa;
+
+ /* MST record stream using this link */
+ struct link_flags {
+ bool dp_keep_receiver_powered;
+ bool dp_skip_DID2;
+ bool dp_skip_reset_segment;
+ bool dp_skip_fs_144hz;
+ bool dp_mot_reset_segment;
+ /* Some USB4 docks do not handle turning off MST DSC once it has been enabled. */
+ bool dpia_mst_dsc_always_on;
+ /* Forced DPIA into TBT3 compatibility mode. */
+ bool dpia_forced_tbt3_mode;
+ bool dongle_mode_timing_override;
+ bool blank_stream_on_ocs_change;
+ bool read_dpcd204h_on_irq_hpd;
+ bool force_dp_ffe_preset;
+ bool skip_phy_ssc_reduction;
+ } wa_flags;
+ union dc_dp_ffe_preset forced_dp_ffe_preset;
+ struct link_mst_stream_allocation_table mst_stream_alloc_table;
+
+ struct dc_link_status link_status;
+ struct dprx_states dprx_states;
+
+ struct gpio *hpd_gpio;
+ enum dc_link_fec_state fec_state;
+ bool is_dds;
+ bool is_display_mux_present;
+ bool link_powered_externally; // Used to bypass hardware sequencing delays when panel is powered down forcibly
+
+ struct dc_panel_config panel_config;
+ struct phy_state phy_state;
+ uint32_t phy_transition_bitmask;
+ // BW ALLOCATON USB4 ONLY
+ struct dc_dpia_bw_alloc dpia_bw_alloc_config;
+ bool skip_implict_edp_power_control;
+ enum backlight_control_type backlight_control_type;
+};
+
+struct dc {
+ struct dc_debug_options debug;
+ struct dc_versions versions;
+ struct dc_caps caps;
+ struct dc_check_config check_config;
+ struct dc_cap_funcs cap_funcs;
+ struct dc_config config;
+ struct dc_bounding_box_overrides bb_overrides;
+ struct dc_bug_wa work_arounds;
+ struct dc_context *ctx;
+ struct dc_phy_addr_space_config vm_pa_config;
+
+ uint8_t link_count;
+ struct dc_link *links[MAX_LINKS];
+ uint8_t lowest_dpia_link_index;
+ struct link_service *link_srv;
+
+ struct dc_state *current_state;
+ struct resource_pool *res_pool;
+
+ struct clk_mgr *clk_mgr;
+
+ /* Display Engine Clock levels */
+ struct dm_pp_clock_levels sclk_lvls;
+
+ /* Inputs into BW and WM calculations. */
+ struct bw_calcs_dceip *bw_dceip;
+ struct bw_calcs_vbios *bw_vbios;
+ struct dcn_soc_bounding_box *dcn_soc;
+ struct dcn_ip_params *dcn_ip;
+ struct display_mode_lib dml;
+
+ /* HW functions */
+ struct hw_sequencer_funcs hwss;
+ struct dce_hwseq *hwseq;
+
+ /* Require to optimize clocks and bandwidth for added/removed planes */
+ bool optimized_required;
+ bool idle_optimizations_allowed;
+ bool enable_c20_dtm_b0;
+
+ /* Require to maintain clocks and bandwidth for UEFI enabled HW */
+
+ /* For eDP to know the switching state of SmartMux */
+ bool is_switch_in_progress_orig;
+ bool is_switch_in_progress_dest;
+
+ /* FBC compressor */
+ struct compressor *fbc_compressor;
+
+ struct dc_debug_data debug_data;
+ struct dpcd_vendor_signature vendor_signature;
+
+ const char *build_id;
+ struct vm_helper *vm_helper;
+
+ uint32_t *dcn_reg_offsets;
+ uint32_t *nbio_reg_offsets;
+ uint32_t *clk_reg_offsets;
+
+ /* Scratch memory */
+ struct {
+ struct {
+ /*
+ * For matching clock_limits table in driver with table
+ * from PMFW.
+ */
+ struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES];
+ } update_bw_bounding_box;
+ struct dc_scratch_space current_state;
+ struct dc_scratch_space new_state;
+ struct dc_stream_state temp_stream; // Used so we don't need to allocate stream on the stack
+ struct dc_link temp_link;
+ bool pipes_to_unlock_first[MAX_PIPES]; /* Any of the pipes indicated here should be unlocked first */
+ } scratch;
+
+ struct dml2_configuration_options dml2_options;
+ struct dml2_configuration_options dml2_dc_power_options;
+ enum dc_acpi_cm_power_state power_state;
+ struct soc_and_ip_translator *soc_and_ip_translator;
};
struct dc_scaling_info {
@@ -1037,6 +1811,17 @@ struct dc_scaling_info {
struct scaling_taps scaling_quality;
};
+struct dc_fast_update {
+ const struct dc_flip_addrs *flip_addr;
+ const struct dc_gamma *gamma;
+ const struct colorspace_transform *gamut_remap_matrix;
+ const struct dc_csc_transform *input_csc_color_matrix;
+ const struct fixed31_32 *coeff_reduction_factor;
+ struct dc_transfer_func *out_transfer_func;
+ struct dc_csc_transform *output_csc_transform;
+ const struct dc_csc_transform *cursor_csc_color_matrix;
+};
+
struct dc_surface_update {
struct dc_plane_state *surface;
@@ -1057,18 +1842,44 @@ struct dc_surface_update {
const struct dc_3dlut *lut3d_func;
const struct dc_transfer_func *blend_tf;
const struct colorspace_transform *gamut_remap_matrix;
+ /*
+ * Color Transformations for pre-blend MCM (Shaper, 3DLUT, 1DLUT)
+ *
+ * change cm2_params.component_settings: Full update
+ * change cm2_params.cm2_luts: Fast update
+ */
+ const struct dc_cm2_parameters *cm2_params;
+ const struct dc_csc_transform *cursor_csc_color_matrix;
+ unsigned int sdr_white_level_nits;
+ struct dc_bias_and_scale bias_and_scale;
+};
+
+struct dc_underflow_debug_data {
+ struct dcn_hubbub_reg_state *hubbub_reg_state;
+ struct dcn_hubp_reg_state *hubp_reg_state[MAX_PIPES];
+ struct dcn_dpp_reg_state *dpp_reg_state[MAX_PIPES];
+ struct dcn_mpc_reg_state *mpc_reg_state[MAX_PIPES];
+ struct dcn_opp_reg_state *opp_reg_state[MAX_PIPES];
+ struct dcn_dsc_reg_state *dsc_reg_state[MAX_PIPES];
+ struct dcn_optc_reg_state *optc_reg_state[MAX_PIPES];
+ struct dcn_dccg_reg_state *dccg_reg_state[MAX_PIPES];
+};
+
+struct power_features {
+ bool ips;
+ bool rcg;
+ bool replay;
+ bool dds;
+ bool sprs;
+ bool psr;
+ bool fams;
+ bool mpo;
+ bool uclk_p_state;
};
/*
* Create a new surface with default parameters;
*/
-struct dc_plane_state *dc_create_plane_state(struct dc *dc);
-const struct dc_plane_status *dc_plane_get_status(
- const struct dc_plane_state *plane_state);
-
-void dc_plane_state_retain(struct dc_plane_state *plane_state);
-void dc_plane_state_release(struct dc_plane_state *plane_state);
-
void dc_gamma_retain(struct dc_gamma *dc_gamma);
void dc_gamma_release(struct dc_gamma **dc_gamma);
struct dc_gamma *dc_create_gamma(void);
@@ -1080,80 +1891,66 @@ struct dc_transfer_func *dc_create_transfer_func(void);
struct dc_3dlut *dc_create_3dlut_func(void);
void dc_3dlut_func_release(struct dc_3dlut *lut);
void dc_3dlut_func_retain(struct dc_3dlut *lut);
-/*
- * This structure holds a surface address. There could be multiple addresses
- * in cases such as Stereo 3D, Planar YUV, etc. Other per-flip attributes such
- * as frame durations and DCC format can also be set.
- */
-struct dc_flip_addrs {
- struct dc_plane_address address;
- unsigned int flip_timestamp_in_us;
- bool flip_immediate;
- /* TODO: add flip duration for FreeSync */
- bool triplebuffer_flips;
-};
void dc_post_update_surfaces_to_stream(
struct dc *dc);
-#include "dc_stream.h"
-
-/*
- * Structure to store surface/stream associations for validation
+/**
+ * struct dc_validation_set - Struct to store surface/stream associations for validation
*/
struct dc_validation_set {
+ /**
+ * @stream: Stream state properties
+ */
struct dc_stream_state *stream;
+
+ /**
+ * @plane_states: Surface state
+ */
struct dc_plane_state *plane_states[MAX_SURFACES];
+
+ /**
+ * @plane_count: Total of active planes
+ */
uint8_t plane_count;
};
-bool dc_validate_seamless_boot_timing(const struct dc *dc,
+bool dc_validate_boot_timing(const struct dc *dc,
const struct dc_sink *sink,
struct dc_crtc_timing *crtc_timing);
enum dc_status dc_validate_plane(struct dc *dc, const struct dc_plane_state *plane_state);
-void get_clock_requirements_for_state(struct dc_state *state, struct AsicStateEx *info);
+enum dc_status dc_validate_with_context(struct dc *dc,
+ const struct dc_validation_set set[],
+ int set_count,
+ struct dc_state *context,
+ enum dc_validate_mode validate_mode);
bool dc_set_generic_gpio_for_stereo(bool enable,
struct gpio_service *gpio_service);
-/*
- * fast_validate: we return after determining if we can support the new state,
- * but before we populate the programming info
- */
enum dc_status dc_validate_global_state(
struct dc *dc,
struct dc_state *new_ctx,
- bool fast_validate);
+ enum dc_validate_mode validate_mode);
-
-void dc_resource_state_construct(
- const struct dc *dc,
- struct dc_state *dst_ctx);
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
bool dc_acquire_release_mpc_3dlut(
struct dc *dc, bool acquire,
struct dc_stream_state *stream,
struct dc_3dlut **lut,
struct dc_transfer_func **shaper);
-#endif
-
-void dc_resource_state_copy_construct(
- const struct dc_state *src_ctx,
- struct dc_state *dst_ctx);
-
-void dc_resource_state_copy_construct_current(
- const struct dc *dc,
- struct dc_state *dst_ctx);
-
-void dc_resource_state_destruct(struct dc_state *context);
bool dc_resource_is_dsc_encoding_supported(const struct dc *dc);
-
+void get_audio_check(struct audio_info *aud_modes,
+ struct audio_check *aud_chk);
+
+bool fast_nonaddr_updates_exist(struct dc_fast_update *fast_update, int surface_count);
+void populate_fast_updates(struct dc_fast_update *fast_update,
+ struct dc_surface_update *srf_updates,
+ int surface_count,
+ struct dc_stream_update *stream_update);
/*
- * TODO update to make it about validation sets
* Set up streams and links associated to drive sinks
* The streams parameter is an absolute set of all active streams.
*
@@ -1161,118 +1958,591 @@ bool dc_resource_is_dsc_encoding_supported(const struct dc *dc);
* Phy, Encoder, Timing Generator are programmed and enabled.
* New streams are enabled with blank stream; no memory read.
*/
-bool dc_commit_state(struct dc *dc, struct dc_state *context);
-
-struct dc_state *dc_create_state(struct dc *dc);
-struct dc_state *dc_copy_state(struct dc_state *src_ctx);
-void dc_retain_state(struct dc_state *context);
-void dc_release_state(struct dc_state *context);
-
-/*******************************************************************************
- * Link Interfaces
- ******************************************************************************/
-
-struct dpcd_caps {
- union dpcd_rev dpcd_rev;
- union max_lane_count max_ln_count;
- union max_down_spread max_down_spread;
- union dprx_feature dprx_feature;
-
- /* valid only for eDP v1.4 or higher*/
- uint8_t edp_supported_link_rates_count;
- enum dc_link_rate edp_supported_link_rates[8];
-
- /* dongle type (DP converter, CV smart dongle) */
- enum display_dongle_type dongle_type;
- /* branch device or sink device */
- bool is_branch_dev;
- /* Dongle's downstream count. */
- union sink_count sink_count;
- /* If dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER,
- indicates 'Frame Sequential-to-lllFrame Pack' conversion capability.*/
- struct dc_dongle_caps dongle_caps;
-
- uint32_t sink_dev_id;
- int8_t sink_dev_id_str[6];
- int8_t sink_hw_revision;
- int8_t sink_fw_revision[2];
-
- uint32_t branch_dev_id;
- int8_t branch_dev_name[6];
- int8_t branch_hw_revision;
- int8_t branch_fw_revision[2];
-
- bool allow_invalid_MSA_timing_param;
- bool panel_mode_edp;
- bool dpcd_display_control_capable;
- bool ext_receiver_cap_field_present;
- bool dynamic_backlight_capable_edp;
- union dpcd_fec_capability fec_cap;
- struct dpcd_dsc_capabilities dsc_caps;
- struct dc_lttpr_caps lttpr_caps;
- struct psr_caps psr_caps;
- struct dpcd_usb4_dp_tunneling_info usb4_dp_tun_info;
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- union dp_128b_132b_supported_link_rates dp_128b_132b_supported_link_rates;
- union dp_main_line_channel_coding_cap channel_coding_cap;
- union dp_sink_video_fallback_formats fallback_formats;
- union dp_fec_capability1 fec_cap1;
-#endif
-};
-
-union dpcd_sink_ext_caps {
- struct {
- /* 0 - Sink supports backlight adjust via PWM during SDR/HDR mode
- * 1 - Sink supports backlight adjust via AUX during SDR/HDR mode.
- */
- uint8_t sdr_aux_backlight_control : 1;
- uint8_t hdr_aux_backlight_control : 1;
- uint8_t reserved_1 : 2;
- uint8_t oled : 1;
- uint8_t reserved : 3;
- } bits;
- uint8_t raw;
-};
+enum dc_status dc_commit_streams(struct dc *dc, struct dc_commit_streams_params *params);
-#if defined(CONFIG_DRM_AMD_DC_HDCP)
-union hdcp_rx_caps {
- struct {
- uint8_t version;
- uint8_t reserved;
- struct {
- uint8_t repeater : 1;
- uint8_t hdcp_capable : 1;
- uint8_t reserved : 6;
- } byte0;
- } fields;
- uint8_t raw[3];
-};
-union hdcp_bcaps {
- struct {
- uint8_t HDCP_CAPABLE:1;
- uint8_t REPEATER:1;
- uint8_t RESERVED:6;
- } bits;
- uint8_t raw;
-};
+struct dc_plane_state *dc_get_surface_for_mpcc(struct dc *dc,
+ struct dc_stream_state *stream,
+ int mpcc_inst);
+
+
+uint32_t dc_get_opp_for_plane(struct dc *dc, struct dc_plane_state *plane);
+
+void dc_set_disable_128b_132b_stream_overhead(bool disable);
-struct hdcp_caps {
- union hdcp_rx_caps rx_caps;
- union hdcp_bcaps bcaps;
+/* The function returns minimum bandwidth required to drive a given timing
+ * return - minimum required timing bandwidth in kbps.
+ */
+uint32_t dc_bandwidth_in_kbps_from_timing(
+ const struct dc_crtc_timing *timing,
+ const enum dc_link_encoding_format link_encoding);
+
+/* Link Interfaces */
+/* Return an enumerated dc_link.
+ * dc_link order is constant and determined at
+ * boot time. They cannot be created or destroyed.
+ * Use dc_get_caps() to get number of links.
+ */
+struct dc_link *dc_get_link_at_index(struct dc *dc, uint32_t link_index);
+
+/* Return instance id of the edp link. Inst 0 is primary edp link. */
+bool dc_get_edp_link_panel_inst(const struct dc *dc,
+ const struct dc_link *link,
+ unsigned int *inst_out);
+
+/* Return an array of link pointers to edp links. */
+void dc_get_edp_links(const struct dc *dc,
+ struct dc_link **edp_links,
+ int *edp_num);
+
+void dc_set_edp_power(const struct dc *dc, struct dc_link *edp_link,
+ bool powerOn);
+
+/* The function initiates detection handshake over the given link. It first
+ * determines if there are display connections over the link. If so it initiates
+ * detection protocols supported by the connected receiver device. The function
+ * contains protocol specific handshake sequences which are sometimes mandatory
+ * to establish a proper connection between TX and RX. So it is always
+ * recommended to call this function as the first link operation upon HPD event
+ * or power up event. Upon completion, the function will update link structure
+ * in place based on latest RX capabilities. The function may also cause dpms
+ * to be reset to off for all currently enabled streams to the link. It is DM's
+ * responsibility to serialize detection and DPMS updates.
+ *
+ * @reason - Indicate which event triggers this detection. dc may customize
+ * detection flow depending on the triggering events.
+ * return false - if detection is not fully completed. This could happen when
+ * there is an unrecoverable error during detection or detection is partially
+ * completed (detection has been delegated to dm mst manager ie.
+ * link->connection_type == dc_connection_mst_branch when returning false).
+ * return true - detection is completed, link has been fully updated with latest
+ * detection result.
+ */
+bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason);
+
+struct dc_sink_init_data;
+
+/* When link connection type is dc_connection_mst_branch, remote sink can be
+ * added to the link. The interface creates a remote sink and associates it with
+ * current link. The sink will be retained by link until remove remote sink is
+ * called.
+ *
+ * @dc_link - link the remote sink will be added to.
+ * @edid - byte array of EDID raw data.
+ * @len - size of the edid in byte
+ * @init_data -
+ */
+struct dc_sink *dc_link_add_remote_sink(
+ struct dc_link *dc_link,
+ const uint8_t *edid,
+ int len,
+ struct dc_sink_init_data *init_data);
+
+/* Remove remote sink from a link with dc_connection_mst_branch connection type.
+ * @link - link the sink should be removed from
+ * @sink - sink to be removed.
+ */
+void dc_link_remove_remote_sink(
+ struct dc_link *link,
+ struct dc_sink *sink);
+
+/* Enable HPD interrupt handler for a given link */
+void dc_link_enable_hpd(const struct dc_link *link);
+
+/* Disable HPD interrupt handler for a given link */
+void dc_link_disable_hpd(const struct dc_link *link);
+
+/* determine if there is a sink connected to the link
+ *
+ * @type - dc_connection_single if connected, dc_connection_none otherwise.
+ * return - false if an unexpected error occurs, true otherwise.
+ *
+ * NOTE: This function doesn't detect downstream sink connections i.e
+ * dc_connection_mst_branch, dc_connection_sst_branch. In this case, it will
+ * return dc_connection_single if the branch device is connected despite of
+ * downstream sink's connection status.
+ */
+bool dc_link_detect_connection_type(struct dc_link *link,
+ enum dc_connection_type *type);
+
+/* query current hpd pin value
+ * return - true HPD is asserted (HPD high), false otherwise (HPD low)
+ *
+ */
+bool dc_link_get_hpd_state(struct dc_link *link);
+
+/* Getter for cached link status from given link */
+const struct dc_link_status *dc_link_get_status(const struct dc_link *link);
+
+/* enable/disable hardware HPD filter.
+ *
+ * @link - The link the HPD pin is associated with.
+ * @enable = true - enable hardware HPD filter. HPD event will only queued to irq
+ * handler once after no HPD change has been detected within dc default HPD
+ * filtering interval since last HPD event. i.e if display keeps toggling hpd
+ * pulses within default HPD interval, no HPD event will be received until HPD
+ * toggles have stopped. Then HPD event will be queued to irq handler once after
+ * dc default HPD filtering interval since last HPD event.
+ *
+ * @enable = false - disable hardware HPD filter. HPD event will be queued
+ * immediately to irq handler after no HPD change has been detected within
+ * IRQ_HPD (aka HPD short pulse) interval (i.e 2ms).
+ */
+void dc_link_enable_hpd_filter(struct dc_link *link, bool enable);
+
+/* submit i2c read/write payloads through ddc channel
+ * @link_index - index to a link with ddc in i2c mode
+ * @cmd - i2c command structure
+ * return - true if success, false otherwise.
+ */
+bool dc_submit_i2c(
+ struct dc *dc,
+ uint32_t link_index,
+ struct i2c_command *cmd);
+
+/* submit i2c read/write payloads through oem channel
+ * @link_index - index to a link with ddc in i2c mode
+ * @cmd - i2c command structure
+ * return - true if success, false otherwise.
+ */
+bool dc_submit_i2c_oem(
+ struct dc *dc,
+ struct i2c_command *cmd);
+
+enum aux_return_code_type;
+/* Attempt to transfer the given aux payload. This function does not perform
+ * retries or handle error states. The reply is returned in the payload->reply
+ * and the result through operation_result. Returns the number of bytes
+ * transferred,or -1 on a failure.
+ */
+int dc_link_aux_transfer_raw(struct ddc_service *ddc,
+ struct aux_payload *payload,
+ enum aux_return_code_type *operation_result);
+
+struct ddc_service *
+dc_get_oem_i2c_device(struct dc *dc);
+
+bool dc_is_oem_i2c_device_present(
+ struct dc *dc,
+ size_t slave_address
+);
+
+/* return true if the connected receiver supports the hdcp version */
+bool dc_link_is_hdcp14(struct dc_link *link, enum signal_type signal);
+bool dc_link_is_hdcp22(struct dc_link *link, enum signal_type signal);
+
+/* Notify DC about DP RX Interrupt (aka DP IRQ_HPD).
+ *
+ * TODO - When defer_handling is true the function will have a different purpose.
+ * It no longer does complete hpd rx irq handling. We should create a separate
+ * interface specifically for this case.
+ *
+ * Return:
+ * true - Downstream port status changed. DM should call DC to do the
+ * detection.
+ * false - no change in Downstream port status. No further action required
+ * from DM.
+ */
+bool dc_link_handle_hpd_rx_irq(struct dc_link *dc_link,
+ union hpd_irq_data *hpd_irq_dpcd_data, bool *out_link_loss,
+ bool defer_handling, bool *has_left_work);
+/* handle DP specs define test automation sequence*/
+void dc_link_dp_handle_automated_test(struct dc_link *link);
+
+/* handle DP Link loss sequence and try to recover RX link loss with best
+ * effort
+ */
+void dc_link_dp_handle_link_loss(struct dc_link *link);
+
+/* Determine if hpd rx irq should be handled or ignored
+ * return true - hpd rx irq should be handled.
+ * return false - it is safe to ignore hpd rx irq event
+ */
+bool dc_link_dp_allow_hpd_rx_irq(const struct dc_link *link);
+
+/* Determine if link loss is indicated with a given hpd_irq_dpcd_data.
+ * @link - link the hpd irq data associated with
+ * @hpd_irq_dpcd_data - input hpd irq data
+ * return - true if hpd irq data indicates a link lost
+ */
+bool dc_link_check_link_loss_status(struct dc_link *link,
+ union hpd_irq_data *hpd_irq_dpcd_data);
+
+/* Read hpd rx irq data from a given link
+ * @link - link where the hpd irq data should be read from
+ * @irq_data - output hpd irq data
+ * return - DC_OK if hpd irq data is read successfully, otherwise hpd irq data
+ * read has failed.
+ */
+enum dc_status dc_link_dp_read_hpd_rx_irq_data(
+ struct dc_link *link,
+ union hpd_irq_data *irq_data);
+
+/* The function clears recorded DP RX states in the link. DM should call this
+ * function when it is resuming from S3 power state to previously connected links.
+ *
+ * TODO - in the future we should consider to expand link resume interface to
+ * support clearing previous rx states. So we don't have to rely on dm to call
+ * this interface explicitly.
+ */
+void dc_link_clear_dprx_states(struct dc_link *link);
+
+/* Destruct the mst topology of the link and reset the allocated payload table
+ *
+ * NOTE: this should only be called if DM chooses not to call dc_link_detect but
+ * still wants to reset MST topology on an unplug event */
+bool dc_link_reset_cur_dp_mst_topology(struct dc_link *link);
+
+/* The function calculates effective DP link bandwidth when a given link is
+ * using the given link settings.
+ *
+ * return - total effective link bandwidth in kbps.
+ */
+uint32_t dc_link_bandwidth_kbps(
+ const struct dc_link *link,
+ const struct dc_link_settings *link_setting);
+
+struct dp_audio_bandwidth_params {
+ const struct dc_crtc_timing *crtc_timing;
+ enum dp_link_encoding link_encoding;
+ uint32_t channel_count;
+ uint32_t sample_rate_hz;
};
-#endif
-#include "dc_link.h"
+/* The function calculates the minimum size of hblank (in bytes) needed to
+ * support the specified channel count and sample rate combination, given the
+ * link encoding and timing to be used. This calculation is not supported
+ * for 8b/10b SST.
+ *
+ * return - min hblank size in bytes, 0 if 8b/10b SST.
+ */
+uint32_t dc_link_required_hblank_size_bytes(
+ const struct dc_link *link,
+ struct dp_audio_bandwidth_params *audio_params);
+
+/* The function takes a snapshot of current link resource allocation state
+ * @dc: pointer to dc of the dm calling this
+ * @map: a dc link resource snapshot defined internally to dc.
+ *
+ * DM needs to capture a snapshot of current link resource allocation mapping
+ * and store it in its persistent storage.
+ *
+ * Some of the link resource is using first come first serve policy.
+ * The allocation mapping depends on original hotplug order. This information
+ * is lost after driver is loaded next time. The snapshot is used in order to
+ * restore link resource to its previous state so user will get consistent
+ * link capability allocation across reboot.
+ *
+ */
+void dc_get_cur_link_res_map(const struct dc *dc, uint32_t *map);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-uint32_t dc_get_opp_for_plane(struct dc *dc, struct dc_plane_state *plane);
+/* This function restores link resource allocation state from a snapshot
+ * @dc: pointer to dc of the dm calling this
+ * @map: a dc link resource snapshot defined internally to dc.
+ *
+ * DM needs to call this function after initial link detection on boot and
+ * before first commit streams to restore link resource allocation state
+ * from previous boot session.
+ *
+ * Some of the link resource is using first come first serve policy.
+ * The allocation mapping depends on original hotplug order. This information
+ * is lost after driver is loaded next time. The snapshot is used in order to
+ * restore link resource to its previous state so user will get consistent
+ * link capability allocation across reboot.
+ *
+ */
+void dc_restore_link_res_map(const struct dc *dc, uint32_t *map);
+
+/* TODO: this is not meant to be exposed to DM. Should switch to stream update
+ * interface i.e stream_update->dsc_config
+ */
+bool dc_link_update_dsc_config(struct pipe_ctx *pipe_ctx);
+
+/* translate a raw link rate data to bandwidth in kbps */
+uint32_t dc_link_bw_kbps_from_raw_frl_link_rate_data(const struct dc *dc, uint8_t bw);
+
+/* determine the optimal bandwidth given link and required bw.
+ * @link - current detected link
+ * @req_bw - requested bandwidth in kbps
+ * @link_settings - returned most optimal link settings that can fit the
+ * requested bandwidth
+ * return - false if link can't support requested bandwidth, true if link
+ * settings is found.
+ */
+bool dc_link_decide_edp_link_settings(struct dc_link *link,
+ struct dc_link_settings *link_settings,
+ uint32_t req_bw);
+
+/* return the max dp link settings can be driven by the link without considering
+ * connected RX device and its capability
+ */
+bool dc_link_dp_get_max_link_enc_cap(const struct dc_link *link,
+ struct dc_link_settings *max_link_enc_cap);
+
+/* determine when the link is driving MST mode, what DP link channel coding
+ * format will be used. The decision will remain unchanged until next HPD event.
+ *
+ * @link - a link with DP RX connection
+ * return - if stream is committed to this link with MST signal type, type of
+ * channel coding format dc will choose.
+ */
+enum dp_link_encoding dc_link_dp_mst_decide_link_encoding_format(
+ const struct dc_link *link);
+
+/* get max dp link settings the link can enable with all things considered. (i.e
+ * TX/RX/Cable capabilities and dp override policies.
+ *
+ * @link - a link with DP RX connection
+ * return - max dp link settings the link can enable.
+ *
+ */
+const struct dc_link_settings *dc_link_get_link_cap(const struct dc_link *link);
+
+/* Get the highest encoding format that the link supports; highest meaning the
+ * encoding format which supports the maximum bandwidth.
+ *
+ * @link - a link with DP RX connection
+ * return - highest encoding format link supports.
+ */
+enum dc_link_encoding_format dc_link_get_highest_encoding_format(const struct dc_link *link);
+
+/* Check if a RX (ex. DP sink, MST hub, passive or active dongle) is connected
+ * to a link with dp connector signal type.
+ * @link - a link with dp connector signal type
+ * return - true if connected, false otherwise
+ */
+bool dc_link_is_dp_sink_present(struct dc_link *link);
+
+/* Force DP lane settings update to main-link video signal and notify the change
+ * to DP RX via DPCD. This is a debug interface used for video signal integrity
+ * tuning purpose. The interface assumes link has already been enabled with DP
+ * signal.
+ *
+ * @lt_settings - a container structure with desired hw_lane_settings
+ */
+void dc_link_set_drive_settings(struct dc *dc,
+ struct link_training_settings *lt_settings,
+ struct dc_link *link);
-#endif
-/*******************************************************************************
- * Sink Interfaces - A sink corresponds to a display output device
- ******************************************************************************/
+/* Enable a test pattern in Link or PHY layer in an active link for compliance
+ * test or debugging purpose. The test pattern will remain until next un-plug.
+ *
+ * @link - active link with DP signal output enabled.
+ * @test_pattern - desired test pattern to output.
+ * NOTE: set to DP_TEST_PATTERN_VIDEO_MODE to disable previous test pattern.
+ * @test_pattern_color_space - for video test pattern choose a desired color
+ * space.
+ * @p_link_settings - For PHY pattern choose a desired link settings
+ * @p_custom_pattern - some test pattern will require a custom input to
+ * customize some pattern details. Otherwise keep it to NULL.
+ * @cust_pattern_size - size of the custom pattern input.
+ *
+ */
+bool dc_link_dp_set_test_pattern(
+ struct dc_link *link,
+ enum dp_test_pattern test_pattern,
+ enum dp_test_pattern_color_space test_pattern_color_space,
+ const struct link_training_settings *p_link_settings,
+ const unsigned char *p_custom_pattern,
+ unsigned int cust_pattern_size);
+
+/* Force DP link settings to always use a specific value until reboot to a
+ * specific link. If link has already been enabled, the interface will also
+ * switch to desired link settings immediately. This is a debug interface to
+ * generic dp issue trouble shooting.
+ */
+void dc_link_set_preferred_link_settings(struct dc *dc,
+ struct dc_link_settings *link_setting,
+ struct dc_link *link);
+
+/* Force DP link to customize a specific link training behavior by overriding to
+ * standard DP specs defined protocol. This is a debug interface to trouble shoot
+ * display specific link training issues or apply some display specific
+ * workaround in link training.
+ *
+ * @link_settings - if not NULL, force preferred link settings to the link.
+ * @lt_override - a set of override pointers. If any pointer is none NULL, dc
+ * will apply this particular override in future link training. If NULL is
+ * passed in, dc resets previous overrides.
+ * NOTE: DM must keep the memory from override pointers until DM resets preferred
+ * training settings.
+ */
+void dc_link_set_preferred_training_settings(struct dc *dc,
+ struct dc_link_settings *link_setting,
+ struct dc_link_training_overrides *lt_overrides,
+ struct dc_link *link,
+ bool skip_immediate_retrain);
+
+/* return - true if FEC is supported with connected DP RX, false otherwise */
+bool dc_link_is_fec_supported(const struct dc_link *link);
+
+/* query FEC enablement policy to determine if FEC will be enabled by dc during
+ * link enablement.
+ * return - true if FEC should be enabled, false otherwise.
+ */
+bool dc_link_should_enable_fec(const struct dc_link *link);
+
+/* determine lttpr mode the current link should be enabled with a specific link
+ * settings.
+ */
+enum lttpr_mode dc_link_decide_lttpr_mode(struct dc_link *link,
+ struct dc_link_settings *link_setting);
+
+/* Force DP RX to update its power state.
+ * NOTE: this interface doesn't update dp main-link. Calling this function will
+ * cause DP TX main-link and DP RX power states out of sync. DM has to restore
+ * RX power state back upon finish DM specific execution requiring DP RX in a
+ * specific power state.
+ * @on - true to set DP RX in D0 power state, false to set DP RX in D3 power
+ * state.
+ */
+void dc_link_dp_receiver_power_ctrl(struct dc_link *link, bool on);
+
+/* Force link to read base dp receiver caps from dpcd 000h - 00Fh and overwrite
+ * current value read from extended receiver cap from 02200h - 0220Fh.
+ * Some DP RX has problems of providing accurate DP receiver caps from extended
+ * field, this interface is a workaround to revert link back to use base caps.
+ */
+void dc_link_overwrite_extended_receiver_cap(
+ struct dc_link *link);
+
+void dc_link_edp_panel_backlight_power_on(struct dc_link *link,
+ bool wait_for_hpd);
+
+/* Set backlight level of an embedded panel (eDP, LVDS).
+ * backlight_pwm_u16_16 is unsigned 32 bit with 16 bit integer
+ * and 16 bit fractional, where 1.0 is max backlight value.
+ */
+bool dc_link_set_backlight_level(const struct dc_link *dc_link,
+ struct set_backlight_level_params *backlight_level_params);
+
+/* Set/get nits-based backlight level of an embedded panel (eDP, LVDS). */
+bool dc_link_set_backlight_level_nits(struct dc_link *link,
+ bool isHDR,
+ uint32_t backlight_millinits,
+ uint32_t transition_time_in_ms);
+
+bool dc_link_get_backlight_level_nits(struct dc_link *link,
+ uint32_t *backlight_millinits,
+ uint32_t *backlight_millinits_peak);
+
+int dc_link_get_backlight_level(const struct dc_link *dc_link);
+
+int dc_link_get_target_backlight_pwm(const struct dc_link *link);
+
+bool dc_link_set_psr_allow_active(struct dc_link *dc_link, const bool *enable,
+ bool wait, bool force_static, const unsigned int *power_opts);
+
+bool dc_link_get_psr_state(const struct dc_link *dc_link, enum dc_psr_state *state);
+
+bool dc_link_setup_psr(struct dc_link *dc_link,
+ const struct dc_stream_state *stream, struct psr_config *psr_config,
+ struct psr_context *psr_context);
+
+/*
+ * Communicate with DMUB to allow or disallow Panel Replay on the specified link:
+ *
+ * @link: pointer to the dc_link struct instance
+ * @enable: enable(active) or disable(inactive) replay
+ * @wait: state transition need to wait the active set completed.
+ * @force_static: force disable(inactive) the replay
+ * @power_opts: set power optimazation parameters to DMUB.
+ *
+ * return: allow Replay active will return true, else will return false.
+ */
+bool dc_link_set_replay_allow_active(struct dc_link *dc_link, const bool *enable,
+ bool wait, bool force_static, const unsigned int *power_opts);
+
+bool dc_link_get_replay_state(const struct dc_link *dc_link, uint64_t *state);
+
+/* On eDP links this function call will stall until T12 has elapsed.
+ * If the panel is not in power off state, this function will return
+ * immediately.
+ */
+bool dc_link_wait_for_t12(struct dc_link *link);
+
+/* Determine if dp trace has been initialized to reflect upto date result *
+ * return - true if trace is initialized and has valid data. False dp trace
+ * doesn't have valid result.
+ */
+bool dc_dp_trace_is_initialized(struct dc_link *link);
+
+/* Query a dp trace flag to indicate if the current dp trace data has been
+ * logged before
+ */
+bool dc_dp_trace_is_logged(struct dc_link *link,
+ bool in_detection);
+
+/* Set dp trace flag to indicate whether DM has already logged the current dp
+ * trace data. DM can set is_logged to true upon logging and check
+ * dc_dp_trace_is_logged before logging to avoid logging the same result twice.
+ */
+void dc_dp_trace_set_is_logged_flag(struct dc_link *link,
+ bool in_detection,
+ bool is_logged);
+
+/* Obtain driver time stamp for last dp link training end. The time stamp is
+ * formatted based on dm_get_timestamp DM function.
+ * @in_detection - true to get link training end time stamp of last link
+ * training in detection sequence. false to get link training end time stamp
+ * of last link training in commit (dpms) sequence
+ */
+unsigned long long dc_dp_trace_get_lt_end_timestamp(struct dc_link *link,
+ bool in_detection);
+
+/* Get how many link training attempts dc has done with latest sequence.
+ * @in_detection - true to get link training count of last link
+ * training in detection sequence. false to get link training count of last link
+ * training in commit (dpms) sequence
+ */
+const struct dp_trace_lt_counts *dc_dp_trace_get_lt_counts(struct dc_link *link,
+ bool in_detection);
+
+/* Get how many link loss has happened since last link training attempts */
+unsigned int dc_dp_trace_get_link_loss_count(struct dc_link *link);
+
+/*
+ * USB4 DPIA BW ALLOCATION PUBLIC FUNCTIONS
+ */
+/*
+ * Send a request from DP-Tx requesting to allocate BW remotely after
+ * allocating it locally. This will get processed by CM and a CB function
+ * will be called.
+ *
+ * @link: pointer to the dc_link struct instance
+ * @req_bw: The requested bw in Kbyte to allocated
+ *
+ * return: none
+ */
+void dc_link_set_usb4_req_bw_req(struct dc_link *link, int req_bw);
+
+/*
+ * Handle the USB4 BW Allocation related functionality here:
+ * Plug => Try to allocate max bw from timing parameters supported by the sink
+ * Unplug => de-allocate bw
+ *
+ * @link: pointer to the dc_link struct instance
+ * @peak_bw: Peak bw used by the link/sink
+ *
+ */
+void dc_link_dp_dpia_handle_usb4_bandwidth_allocation_for_link(
+ struct dc_link *link, int peak_bw);
+
+/*
+ * Calculates the DP tunneling bandwidth required for the stream timing
+ * and aggregates the stream bandwidth for the respective DP tunneling link
+ *
+ * return: dc_status
+ */
+enum dc_status dc_link_validate_dp_tunneling_bandwidth(const struct dc *dc, const struct dc_state *new_ctx);
+
+/*
+ * Get if ALPM is supported by the link
+ */
+void dc_link_get_alpm_support(struct dc_link *link, bool *auxless_support,
+ bool *auxwake_support);
+
+/* Sink Interfaces - A sink corresponds to a display output device */
struct dc_container_id {
// 128bit GUID in binary form
@@ -1290,14 +2560,29 @@ struct dc_sink_dsc_caps {
// 'true' if these are virtual DPCD's DSC caps (immediately upstream of sink in MST topology),
// 'false' if they are sink's DSC caps
bool is_virtual_dpcd_dsc;
+ // 'true' if MST topology supports DSC passthrough for sink
+ // 'false' if MST topology does not support DSC passthrough
+ bool is_dsc_passthrough_supported;
struct dsc_dec_dpcd_caps dsc_dec_caps;
};
+struct dc_sink_hblank_expansion_caps {
+ // 'true' if these are virtual DPCD's HBlank expansion caps (immediately upstream of sink in MST topology),
+ // 'false' if they are sink's HBlank expansion caps
+ bool is_virtual_dpcd_hblank_expansion;
+ struct hblank_expansion_dpcd_caps dpcd_caps;
+};
+
struct dc_sink_fec_caps {
bool is_rx_fec_supported;
bool is_topology_fec_supported;
};
+struct scdc_caps {
+ union hdmi_scdc_manufacturer_OUI_data manufacturer_OUI;
+ union hdmi_scdc_device_id_data device_id;
+};
+
/*
* The sink structure contains EDID and other display device properties
*/
@@ -1311,8 +2596,10 @@ struct dc_sink {
struct stereo_3d_features features_3d[TIMING_3D_FORMAT_MAX];
bool converter_disable_audio;
+ struct scdc_caps scdc_caps;
struct dc_sink_dsc_caps dsc_caps;
struct dc_sink_fec_caps fec_caps;
+ struct dc_sink_hblank_expansion_caps hblank_expansion_caps;
bool is_vsc_sdp_colorimetry_supported;
@@ -1348,9 +2635,7 @@ struct dc_cursor {
};
-/*******************************************************************************
- * Interrupt interfaces
- ******************************************************************************/
+/* Interrupt interfaces */
enum dc_irq_source dc_interrupt_to_irq_source(
struct dc *dc,
uint32_t src_id,
@@ -1362,9 +2647,7 @@ enum dc_irq_source dc_get_hpd_irq_source_at_index(
void dc_notify_vsync_int_state(struct dc *dc, struct dc_stream_state *stream, bool enable);
-/*******************************************************************************
- * Power Interfaces
- ******************************************************************************/
+/* Power Interfaces */
void dc_set_power_state(
struct dc *dc,
@@ -1373,7 +2656,6 @@ void dc_resume(struct dc *dc);
void dc_power_down_on_boot(struct dc *dc);
-#if defined(CONFIG_DRM_AMD_DC_HDCP)
/*
* HDCP Interfaces
*/
@@ -1381,47 +2663,69 @@ enum hdcp_message_status dc_process_hdcp_msg(
enum signal_type signal,
struct dc_link *link,
struct hdcp_protection_message *message_info);
-#endif
bool dc_is_dmcu_initialized(struct dc *dc);
enum dc_status dc_set_clock(struct dc *dc, enum dc_clock_type clock_type, uint32_t clk_khz, uint32_t stepping);
void dc_get_clock(struct dc *dc, enum dc_clock_type clock_type, struct dc_clock_config *clock_cfg);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-bool dc_is_plane_eligible_for_idle_optimizations(struct dc *dc, struct dc_plane_state *plane,
- struct dc_cursor_attributes *cursor_attr);
+bool dc_is_plane_eligible_for_idle_optimizations(struct dc *dc,
+ unsigned int pitch,
+ unsigned int height,
+ enum surface_pixel_format format,
+ struct dc_cursor_attributes *cursor_attr);
-void dc_allow_idle_optimizations(struct dc *dc, bool allow);
+#define dc_allow_idle_optimizations(dc, allow) dc_allow_idle_optimizations_internal(dc, allow, __func__)
+#define dc_exit_ips_for_hw_access(dc) dc_exit_ips_for_hw_access_internal(dc, __func__)
-/*
- * blank all streams, and set min and max memory clock to
- * lowest and highest DPM level, respectively
- */
+void dc_allow_idle_optimizations_internal(struct dc *dc, bool allow, const char *caller_name);
+void dc_exit_ips_for_hw_access_internal(struct dc *dc, const char *caller_name);
+bool dc_dmub_is_ips_idle_state(struct dc *dc);
+
+/* set min and max memory clock to lowest and highest DPM level, respectively */
void dc_unlock_memory_clock_frequency(struct dc *dc);
-/*
- * set min memory clock to the min required for current mode,
- * max to maxDPM, and unblank streams
- */
+/* set min memory clock to the min required for current mode, max to maxDPM */
void dc_lock_memory_clock_frequency(struct dc *dc);
+/* set soft max for memclk, to be used for AC/DC switching clock limitations */
+void dc_enable_dcmode_clk_limit(struct dc *dc, bool enable);
+
/* cleanup on driver unload */
void dc_hardware_release(struct dc *dc);
-#endif
+/* disables fw based mclk switch */
+void dc_mclk_switch_using_fw_based_vblank_stretch_shut_down(struct dc *dc);
bool dc_set_psr_allow_active(struct dc *dc, bool enable);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
+
+bool dc_set_replay_allow_active(struct dc *dc, bool active);
+
+bool dc_set_ips_disable(struct dc *dc, unsigned int disable_ips);
+
void dc_z10_restore(const struct dc *dc);
void dc_z10_save_init(struct dc *dc);
-#endif
+bool dc_is_dmub_outbox_supported(struct dc *dc);
bool dc_enable_dmub_notifications(struct dc *dc);
+bool dc_abm_save_restore(
+ struct dc *dc,
+ struct dc_stream_state *stream,
+ struct abm_save_restore *pData);
+
+void dc_enable_dmub_outbox(struct dc *dc);
+
bool dc_process_dmub_aux_transfer_async(struct dc *dc,
uint32_t link_index,
struct aux_payload *payload);
+/*
+ * smart power OLED Interfaces
+ */
+bool dc_smart_power_oled_enable(const struct dc_link *link, bool enable, uint16_t peak_nits,
+ uint8_t debug_control, uint16_t fixed_CLL, uint32_t triggerline);
+bool dc_smart_power_oled_get_max_cll(const struct dc_link *link, unsigned int *pCurrent_MaxCLL);
+
/* Get dc link index from dpia port index */
uint8_t get_link_index_from_dpia_port_index(const struct dc *dc,
uint8_t dpia_port_index);
@@ -1436,14 +2740,544 @@ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc,
uint8_t mst_alloc_slots,
uint8_t *mst_slots_in_use);
-/*******************************************************************************
- * DSC Interfaces
- ******************************************************************************/
+void dc_process_dmub_dpia_set_tps_notification(const struct dc *dc, uint32_t link_index, uint8_t tps);
+
+void dc_process_dmub_dpia_hpd_int_enable(const struct dc *dc,
+ uint32_t hpd_int_enable);
+
+void dc_print_dmub_diagnostic_data(const struct dc *dc);
+
+void dc_query_current_properties(struct dc *dc, struct dc_current_properties *properties);
+
+struct dc_power_profile {
+ int power_level; /* Lower is better */
+};
+
+struct dc_power_profile dc_get_power_profile_for_dc_state(const struct dc_state *context);
+
+unsigned int dc_get_det_buffer_size_from_state(const struct dc_state *context);
+
+bool dc_get_host_router_index(const struct dc_link *link, unsigned int *host_router_index);
+
+void dc_log_preos_dmcub_info(const struct dc *dc);
+
+/* DSC Interfaces */
#include "dc_dsc.h"
-/*******************************************************************************
- * Disable acc mode Interfaces
- ******************************************************************************/
+void dc_get_visual_confirm_for_stream(
+ struct dc *dc,
+ struct dc_stream_state *stream_state,
+ struct tg_color *color);
+
+/* Disable acc mode Interfaces */
void dc_disable_accelerated_mode(struct dc *dc);
+bool dc_is_timing_changed(struct dc_stream_state *cur_stream,
+ struct dc_stream_state *new_stream);
+
+bool dc_is_cursor_limit_pending(struct dc *dc);
+bool dc_can_clear_cursor_limit(const struct dc *dc);
+
+/**
+ * dc_get_underflow_debug_data_for_otg() - Retrieve underflow debug data.
+ *
+ * @dc: Pointer to the display core context.
+ * @primary_otg_inst: Instance index of the primary OTG that underflowed.
+ * @out_data: Pointer to a dc_underflow_debug_data struct to be filled with debug information.
+ *
+ * This function collects and logs underflow-related HW states when underflow happens,
+ * including OTG underflow status, current read positions, frame count, and per-HUBP debug data.
+ * The results are stored in the provided out_data structure for further analysis or logging.
+ */
+void dc_get_underflow_debug_data_for_otg(struct dc *dc, int primary_otg_inst, struct dc_underflow_debug_data *out_data);
+
+void dc_get_power_feature_status(struct dc *dc, int primary_otg_inst, struct power_features *out_data);
+
+/**
+ * Software state variables used to program register fields across the display pipeline
+ */
+struct dc_register_software_state {
+ /* HUBP register programming variables for each pipe */
+ struct {
+ bool valid_plane_state;
+ bool valid_stream;
+ bool min_dc_gfx_version9;
+ uint32_t vtg_sel; /* DCHUBP_CNTL->HUBP_VTG_SEL from pipe_ctx->stream_res.tg->inst */
+ uint32_t hubp_clock_enable; /* HUBP_CLK_CNTL->HUBP_CLOCK_ENABLE from power management */
+ uint32_t surface_pixel_format; /* DCSURF_SURFACE_CONFIG->SURFACE_PIXEL_FORMAT from plane_state->format */
+ uint32_t rotation_angle; /* DCSURF_SURFACE_CONFIG->ROTATION_ANGLE from plane_state->rotation */
+ uint32_t h_mirror_en; /* DCSURF_SURFACE_CONFIG->H_MIRROR_EN from plane_state->horizontal_mirror */
+ uint32_t surface_dcc_en; /* DCSURF_SURFACE_CONTROL->PRIMARY_SURFACE_DCC_EN from dcc->enable */
+ uint32_t surface_size_width; /* HUBP_SIZE->SURFACE_SIZE_WIDTH from plane_size.surface_size.width */
+ uint32_t surface_size_height; /* HUBP_SIZE->SURFACE_SIZE_HEIGHT from plane_size.surface_size.height */
+ uint32_t pri_viewport_width; /* DCSURF_PRI_VIEWPORT_DIMENSION->PRI_VIEWPORT_WIDTH from scaler_data.viewport.width */
+ uint32_t pri_viewport_height; /* DCSURF_PRI_VIEWPORT_DIMENSION->PRI_VIEWPORT_HEIGHT from scaler_data.viewport.height */
+ uint32_t pri_viewport_x_start; /* DCSURF_PRI_VIEWPORT_START->PRI_VIEWPORT_X_START from scaler_data.viewport.x */
+ uint32_t pri_viewport_y_start; /* DCSURF_PRI_VIEWPORT_START->PRI_VIEWPORT_Y_START from scaler_data.viewport.y */
+ uint32_t cursor_enable; /* CURSOR_CONTROL->CURSOR_ENABLE from cursor_attributes.enable */
+ uint32_t cursor_width; /* CURSOR_SETTINGS->CURSOR_WIDTH from cursor_position.width */
+ uint32_t cursor_height; /* CURSOR_SETTINGS->CURSOR_HEIGHT from cursor_position.height */
+
+ /* Additional DCC configuration */
+ uint32_t surface_dcc_ind_64b_blk; /* DCSURF_SURFACE_CONTROL->PRIMARY_SURFACE_DCC_IND_64B_BLK from dcc.independent_64b_blks */
+ uint32_t surface_dcc_ind_128b_blk; /* DCSURF_SURFACE_CONTROL->PRIMARY_SURFACE_DCC_IND_128B_BLK from dcc.independent_128b_blks */
+
+ /* Surface pitch configuration */
+ uint32_t surface_pitch; /* DCSURF_SURFACE_PITCH->PITCH from plane_size.surface_pitch */
+ uint32_t meta_pitch; /* DCSURF_SURFACE_PITCH->META_PITCH from dcc.meta_pitch */
+ uint32_t chroma_pitch; /* DCSURF_SURFACE_PITCH_C->PITCH_C from plane_size.chroma_pitch */
+ uint32_t meta_pitch_c; /* DCSURF_SURFACE_PITCH_C->META_PITCH_C from dcc.meta_pitch_c */
+
+ /* Surface addresses */
+ uint32_t primary_surface_address_low; /* DCSURF_PRIMARY_SURFACE_ADDRESS->PRIMARY_SURFACE_ADDRESS from address.grph.addr.low_part */
+ uint32_t primary_surface_address_high; /* DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH->PRIMARY_SURFACE_ADDRESS_HIGH from address.grph.addr.high_part */
+ uint32_t primary_meta_surface_address_low; /* DCSURF_PRIMARY_META_SURFACE_ADDRESS->PRIMARY_META_SURFACE_ADDRESS from address.grph.meta_addr.low_part */
+ uint32_t primary_meta_surface_address_high; /* DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH->PRIMARY_META_SURFACE_ADDRESS_HIGH from address.grph.meta_addr.high_part */
+
+ /* TMZ configuration */
+ uint32_t primary_surface_tmz; /* DCSURF_SURFACE_CONTROL->PRIMARY_SURFACE_TMZ from address.tmz_surface */
+ uint32_t primary_meta_surface_tmz; /* DCSURF_SURFACE_CONTROL->PRIMARY_META_SURFACE_TMZ from address.tmz_surface */
+
+ /* Tiling configuration */
+ uint32_t sw_mode; /* DCSURF_TILING_CONFIG->SW_MODE from tiling_info.gfx9.swizzle */
+ uint32_t num_pipes; /* DCSURF_ADDR_CONFIG->NUM_PIPES from tiling_info.gfx9.num_pipes */
+ uint32_t num_banks; /* DCSURF_ADDR_CONFIG->NUM_BANKS from tiling_info.gfx9.num_banks */
+ uint32_t pipe_interleave; /* DCSURF_ADDR_CONFIG->PIPE_INTERLEAVE from tiling_info.gfx9.pipe_interleave */
+ uint32_t num_shader_engines; /* DCSURF_ADDR_CONFIG->NUM_SE from tiling_info.gfx9.num_shader_engines */
+ uint32_t num_rb_per_se; /* DCSURF_ADDR_CONFIG->NUM_RB_PER_SE from tiling_info.gfx9.num_rb_per_se */
+ uint32_t num_pkrs; /* DCSURF_ADDR_CONFIG->NUM_PKRS from tiling_info.gfx9.num_pkrs */
+
+ /* DML Request Size Configuration - Luma */
+ uint32_t rq_chunk_size; /* DCHUBP_REQ_SIZE_CONFIG->CHUNK_SIZE from rq_regs.rq_regs_l.chunk_size */
+ uint32_t rq_min_chunk_size; /* DCHUBP_REQ_SIZE_CONFIG->MIN_CHUNK_SIZE from rq_regs.rq_regs_l.min_chunk_size */
+ uint32_t rq_meta_chunk_size; /* DCHUBP_REQ_SIZE_CONFIG->META_CHUNK_SIZE from rq_regs.rq_regs_l.meta_chunk_size */
+ uint32_t rq_min_meta_chunk_size; /* DCHUBP_REQ_SIZE_CONFIG->MIN_META_CHUNK_SIZE from rq_regs.rq_regs_l.min_meta_chunk_size */
+ uint32_t rq_dpte_group_size; /* DCHUBP_REQ_SIZE_CONFIG->DPTE_GROUP_SIZE from rq_regs.rq_regs_l.dpte_group_size */
+ uint32_t rq_mpte_group_size; /* DCHUBP_REQ_SIZE_CONFIG->MPTE_GROUP_SIZE from rq_regs.rq_regs_l.mpte_group_size */
+ uint32_t rq_swath_height_l; /* DCHUBP_REQ_SIZE_CONFIG->SWATH_HEIGHT_L from rq_regs.rq_regs_l.swath_height */
+ uint32_t rq_pte_row_height_l; /* DCHUBP_REQ_SIZE_CONFIG->PTE_ROW_HEIGHT_L from rq_regs.rq_regs_l.pte_row_height */
+
+ /* DML Request Size Configuration - Chroma */
+ uint32_t rq_chunk_size_c; /* DCHUBP_REQ_SIZE_CONFIG_C->CHUNK_SIZE_C from rq_regs.rq_regs_c.chunk_size */
+ uint32_t rq_min_chunk_size_c; /* DCHUBP_REQ_SIZE_CONFIG_C->MIN_CHUNK_SIZE_C from rq_regs.rq_regs_c.min_chunk_size */
+ uint32_t rq_meta_chunk_size_c; /* DCHUBP_REQ_SIZE_CONFIG_C->META_CHUNK_SIZE_C from rq_regs.rq_regs_c.meta_chunk_size */
+ uint32_t rq_min_meta_chunk_size_c; /* DCHUBP_REQ_SIZE_CONFIG_C->MIN_META_CHUNK_SIZE_C from rq_regs.rq_regs_c.min_meta_chunk_size */
+ uint32_t rq_dpte_group_size_c; /* DCHUBP_REQ_SIZE_CONFIG_C->DPTE_GROUP_SIZE_C from rq_regs.rq_regs_c.dpte_group_size */
+ uint32_t rq_mpte_group_size_c; /* DCHUBP_REQ_SIZE_CONFIG_C->MPTE_GROUP_SIZE_C from rq_regs.rq_regs_c.mpte_group_size */
+ uint32_t rq_swath_height_c; /* DCHUBP_REQ_SIZE_CONFIG_C->SWATH_HEIGHT_C from rq_regs.rq_regs_c.swath_height */
+ uint32_t rq_pte_row_height_c; /* DCHUBP_REQ_SIZE_CONFIG_C->PTE_ROW_HEIGHT_C from rq_regs.rq_regs_c.pte_row_height */
+
+ /* DML Expansion Modes */
+ uint32_t drq_expansion_mode; /* DCN_EXPANSION_MODE->DRQ_EXPANSION_MODE from rq_regs.drq_expansion_mode */
+ uint32_t prq_expansion_mode; /* DCN_EXPANSION_MODE->PRQ_EXPANSION_MODE from rq_regs.prq_expansion_mode */
+ uint32_t mrq_expansion_mode; /* DCN_EXPANSION_MODE->MRQ_EXPANSION_MODE from rq_regs.mrq_expansion_mode */
+ uint32_t crq_expansion_mode; /* DCN_EXPANSION_MODE->CRQ_EXPANSION_MODE from rq_regs.crq_expansion_mode */
+
+ /* DML DLG parameters - nominal */
+ uint32_t dst_y_per_vm_vblank; /* NOM_PARAMETERS_0->DST_Y_PER_VM_VBLANK from dlg_regs.dst_y_per_vm_vblank */
+ uint32_t dst_y_per_row_vblank; /* NOM_PARAMETERS_0->DST_Y_PER_ROW_VBLANK from dlg_regs.dst_y_per_row_vblank */
+ uint32_t dst_y_per_vm_flip; /* NOM_PARAMETERS_1->DST_Y_PER_VM_FLIP from dlg_regs.dst_y_per_vm_flip */
+ uint32_t dst_y_per_row_flip; /* NOM_PARAMETERS_1->DST_Y_PER_ROW_FLIP from dlg_regs.dst_y_per_row_flip */
+
+ /* DML prefetch settings */
+ uint32_t dst_y_prefetch; /* PREFETCH_SETTINS->DST_Y_PREFETCH from dlg_regs.dst_y_prefetch */
+ uint32_t vratio_prefetch; /* PREFETCH_SETTINS->VRATIO_PREFETCH from dlg_regs.vratio_prefetch */
+ uint32_t vratio_prefetch_c; /* PREFETCH_SETTINS_C->VRATIO_PREFETCH_C from dlg_regs.vratio_prefetch_c */
+
+ /* TTU parameters */
+ uint32_t qos_level_low_wm; /* TTU_CNTL1->QoSLevelLowWaterMark from ttu_regs.qos_level_low_wm */
+ uint32_t qos_level_high_wm; /* TTU_CNTL1->QoSLevelHighWaterMark from ttu_regs.qos_level_high_wm */
+ uint32_t qos_level_flip; /* TTU_CNTL2->QoS_LEVEL_FLIP_L from ttu_regs.qos_level_flip */
+ uint32_t min_ttu_vblank; /* DCN_GLOBAL_TTU_CNTL->MIN_TTU_VBLANK from ttu_regs.min_ttu_vblank */
+ } hubp[MAX_PIPES];
+
+ /* HUBBUB register programming variables */
+ struct {
+ /* Individual DET buffer control per pipe - software state that programs DET registers */
+ uint32_t det0_size; /* DCHUBBUB_DET0_CTRL->DET0_SIZE from hubbub->funcs->program_det_size(hubbub, 0, det_buffer_size_kb) */
+ uint32_t det1_size; /* DCHUBBUB_DET1_CTRL->DET1_SIZE from hubbub->funcs->program_det_size(hubbub, 1, det_buffer_size_kb) */
+ uint32_t det2_size; /* DCHUBBUB_DET2_CTRL->DET2_SIZE from hubbub->funcs->program_det_size(hubbub, 2, det_buffer_size_kb) */
+ uint32_t det3_size; /* DCHUBBUB_DET3_CTRL->DET3_SIZE from hubbub->funcs->program_det_size(hubbub, 3, det_buffer_size_kb) */
+
+ /* Compression buffer control - software state that programs COMPBUF registers */
+ uint32_t compbuf_size; /* DCHUBBUB_COMPBUF_CTRL->COMPBUF_SIZE from hubbub->funcs->program_compbuf_size(hubbub, compbuf_size_kb, safe_to_increase) */
+ uint32_t compbuf_reserved_space_64b; /* COMPBUF_RESERVED_SPACE->COMPBUF_RESERVED_SPACE_64B from hubbub2->pixel_chunk_size / 32 */
+ uint32_t compbuf_reserved_space_zs; /* COMPBUF_RESERVED_SPACE->COMPBUF_RESERVED_SPACE_ZS from hubbub2->pixel_chunk_size / 128 */
+ } hubbub;
+
+ /* DPP register programming variables for each pipe (simplified for available fields) */
+ struct {
+ uint32_t dpp_clock_enable; /* DPP_CONTROL->DPP_CLOCK_ENABLE from dppclk_enable */
+
+ /* Recout (Rectangle of Interest) configuration */
+ uint32_t recout_start_x; /* RECOUT_START->RECOUT_START_X from pipe_ctx->plane_res.scl_data.recout.x */
+ uint32_t recout_start_y; /* RECOUT_START->RECOUT_START_Y from pipe_ctx->plane_res.scl_data.recout.y */
+ uint32_t recout_width; /* RECOUT_SIZE->RECOUT_WIDTH from pipe_ctx->plane_res.scl_data.recout.width */
+ uint32_t recout_height; /* RECOUT_SIZE->RECOUT_HEIGHT from pipe_ctx->plane_res.scl_data.recout.height */
+
+ /* MPC (Multiple Pipe/Plane Combiner) size configuration */
+ uint32_t mpc_width; /* MPC_SIZE->MPC_WIDTH from pipe_ctx->plane_res.scl_data.h_active */
+ uint32_t mpc_height; /* MPC_SIZE->MPC_HEIGHT from pipe_ctx->plane_res.scl_data.v_active */
+
+ /* DSCL mode configuration */
+ uint32_t dscl_mode; /* SCL_MODE->DSCL_MODE from pipe_ctx->plane_res.scl_data.dscl_prog_data.dscl_mode */
+
+ /* Scaler ratios (simplified to integer parts) */
+ uint32_t horz_ratio_int; /* SCL_HORZ_FILTER_SCALE_RATIO->SCL_H_SCALE_RATIO integer part from ratios.horz */
+ uint32_t vert_ratio_int; /* SCL_VERT_FILTER_SCALE_RATIO->SCL_V_SCALE_RATIO integer part from ratios.vert */
+
+ /* Basic scaler taps */
+ uint32_t h_taps; /* SCL_TAP_CONTROL->SCL_H_NUM_TAPS from taps.h_taps */
+ uint32_t v_taps; /* SCL_TAP_CONTROL->SCL_V_NUM_TAPS from taps.v_taps */
+ } dpp[MAX_PIPES];
+
+ /* DCCG register programming variables */
+ struct {
+ /* Core Display Clock Control */
+ uint32_t dispclk_khz; /* DENTIST_DISPCLK_CNTL->DENTIST_DISPCLK_WDIVIDER from clk_mgr.dispclk_khz */
+ uint32_t dc_mem_global_pwr_req_dis; /* DC_MEM_GLOBAL_PWR_REQ_CNTL->DC_MEM_GLOBAL_PWR_REQ_DIS from memory power management settings */
+
+ /* DPP Clock Control - 4 fields per pipe */
+ uint32_t dppclk_khz[MAX_PIPES]; /* DPPCLK_CTRL->DPPCLK_R_GATE_DISABLE from dpp_clocks[pipe] */
+ uint32_t dppclk_enable[MAX_PIPES]; /* DPPCLK_CTRL->DPPCLK0_EN,DPPCLK1_EN,DPPCLK2_EN,DPPCLK3_EN from dccg31_update_dpp_dto() */
+ uint32_t dppclk_dto_enable[MAX_PIPES]; /* DPPCLK_DTO_CTRL->DPPCLK_DTO_ENABLE from dccg->dpp_clock_gated[dpp_inst] state */
+ uint32_t dppclk_dto_phase[MAX_PIPES]; /* DPPCLK0_DTO_PARAM->DPPCLK0_DTO_PHASE from phase calculation req_dppclk/ref_dppclk */
+ uint32_t dppclk_dto_modulo[MAX_PIPES]; /* DPPCLK0_DTO_PARAM->DPPCLK0_DTO_MODULO from modulo = 0xff */
+
+ /* DSC Clock Control - 4 fields per DSC resource */
+ uint32_t dscclk_khz[MAX_PIPES]; /* DSCCLK_DTO_CTRL->DSCCLK_DTO_ENABLE from dsc_clocks */
+ uint32_t dscclk_dto_enable[MAX_PIPES]; /* DSCCLK_DTO_CTRL->DSCCLK0_DTO_ENABLE,DSCCLK1_DTO_ENABLE,DSCCLK2_DTO_ENABLE,DSCCLK3_DTO_ENABLE */
+ uint32_t dscclk_dto_phase[MAX_PIPES]; /* DSCCLK0_DTO_PARAM->DSCCLK0_DTO_PHASE from dccg31_enable_dscclk() */
+ uint32_t dscclk_dto_modulo[MAX_PIPES]; /* DSCCLK0_DTO_PARAM->DSCCLK0_DTO_MODULO from dccg31_enable_dscclk() */
+
+ /* Pixel Clock Control - per pipe */
+ uint32_t pixclk_khz[MAX_PIPES]; /* PIXCLK_RESYNC_CNTL->PIXCLK_RESYNC_ENABLE from stream.timing.pix_clk_100hz */
+ uint32_t otg_pixel_rate_div[MAX_PIPES]; /* OTG_PIXEL_RATE_DIV->OTG_PIXEL_RATE_DIV from OTG pixel rate divider control */
+ uint32_t dtbclk_dto_enable[MAX_PIPES]; /* OTG0_PIXEL_RATE_CNTL->DTBCLK_DTO_ENABLE from dccg31_set_dtbclk_dto() */
+ uint32_t pipe_dto_src_sel[MAX_PIPES]; /* OTG0_PIXEL_RATE_CNTL->PIPE_DTO_SRC_SEL from dccg31_set_dtbclk_dto() source selection */
+ uint32_t dtbclk_dto_div[MAX_PIPES]; /* OTG0_PIXEL_RATE_CNTL->DTBCLK_DTO_DIV from dtbdto_div calculation */
+ uint32_t otg_add_pixel[MAX_PIPES]; /* OTG0_PIXEL_RATE_CNTL->OTG_ADD_PIXEL from dccg31_otg_add_pixel() */
+ uint32_t otg_drop_pixel[MAX_PIPES]; /* OTG0_PIXEL_RATE_CNTL->OTG_DROP_PIXEL from dccg31_otg_drop_pixel() */
+
+ /* DTBCLK DTO Control - 4 DTOs */
+ uint32_t dtbclk_dto_modulo[4]; /* DTBCLK_DTO0_MODULO->DTBCLK_DTO0_MODULO from dccg31_set_dtbclk_dto() modulo calculation */
+ uint32_t dtbclk_dto_phase[4]; /* DTBCLK_DTO0_PHASE->DTBCLK_DTO0_PHASE from phase calculation pixclk_khz/ref_dtbclk_khz */
+ uint32_t dtbclk_dto_dbuf_en; /* DTBCLK_DTO_DBUF_EN->DTBCLK DTO data buffer enable */
+
+ /* DP Stream Clock Control - 4 pipes */
+ uint32_t dpstreamclk_enable[MAX_PIPES]; /* DPSTREAMCLK_CNTL->DPSTREAMCLK_PIPE0_EN,DPSTREAMCLK_PIPE1_EN,DPSTREAMCLK_PIPE2_EN,DPSTREAMCLK_PIPE3_EN */
+ uint32_t dp_dto_modulo[4]; /* DP_DTO0_MODULO->DP_DTO0_MODULO from DP stream DTO programming */
+ uint32_t dp_dto_phase[4]; /* DP_DTO0_PHASE->DP_DTO0_PHASE from DP stream DTO programming */
+ uint32_t dp_dto_dbuf_en; /* DP_DTO_DBUF_EN->DP DTO data buffer enable */
+
+ /* PHY Symbol Clock Control - 5 PHYs (A,B,C,D,E) */
+ uint32_t phy_symclk_force_en[5]; /* PHYASYMCLK_CLOCK_CNTL->PHYASYMCLK_FORCE_EN from dccg31_set_physymclk() force_enable */
+ uint32_t phy_symclk_force_src_sel[5]; /* PHYASYMCLK_CLOCK_CNTL->PHYASYMCLK_FORCE_SRC_SEL from dccg31_set_physymclk() clk_src */
+ uint32_t phy_symclk_gate_disable[5]; /* DCCG_GATE_DISABLE_CNTL2->PHYASYMCLK_GATE_DISABLE from debug.root_clock_optimization.bits.physymclk */
+
+ /* SYMCLK32 SE Control - 4 instances */
+ uint32_t symclk32_se_src_sel[4]; /* SYMCLK32_SE_CNTL->SYMCLK32_SE0_SRC_SEL from dccg31_enable_symclk32_se() with get_phy_mux_symclk() mapping */
+ uint32_t symclk32_se_enable[4]; /* SYMCLK32_SE_CNTL->SYMCLK32_SE0_EN from dccg31_enable_symclk32_se() enable */
+ uint32_t symclk32_se_gate_disable[4]; /* DCCG_GATE_DISABLE_CNTL3->SYMCLK32_SE0_GATE_DISABLE from debug.root_clock_optimization.bits.symclk32_se */
+
+ /* SYMCLK32 LE Control - 2 instances */
+ uint32_t symclk32_le_src_sel[2]; /* SYMCLK32_LE_CNTL->SYMCLK32_LE0_SRC_SEL from dccg31_enable_symclk32_le() phyd32clk source */
+ uint32_t symclk32_le_enable[2]; /* SYMCLK32_LE_CNTL->SYMCLK32_LE0_EN from dccg31_enable_symclk32_le() enable */
+ uint32_t symclk32_le_gate_disable[2]; /* DCCG_GATE_DISABLE_CNTL3->SYMCLK32_LE0_GATE_DISABLE from debug.root_clock_optimization.bits.symclk32_le */
+
+ /* DPIA Clock Control */
+ uint32_t dpiaclk_540m_dto_modulo; /* DPIACLK_540M_DTO_MODULO->DPIA 540MHz DTO modulo */
+ uint32_t dpiaclk_540m_dto_phase; /* DPIACLK_540M_DTO_PHASE->DPIA 540MHz DTO phase */
+ uint32_t dpiaclk_810m_dto_modulo; /* DPIACLK_810M_DTO_MODULO->DPIA 810MHz DTO modulo */
+ uint32_t dpiaclk_810m_dto_phase; /* DPIACLK_810M_DTO_PHASE->DPIA 810MHz DTO phase */
+ uint32_t dpiaclk_dto_cntl; /* DPIACLK_DTO_CNTL->DPIA clock DTO control */
+ uint32_t dpiasymclk_cntl; /* DPIASYMCLK_CNTL->DPIA symbol clock control */
+
+ /* Clock Gating Control */
+ uint32_t dccg_gate_disable_cntl; /* DCCG_GATE_DISABLE_CNTL->Clock gate disable control from dccg31_init() */
+ uint32_t dpstreamclk_gate_disable; /* DCCG_GATE_DISABLE_CNTL3->DPSTREAMCLK_GATE_DISABLE from debug.root_clock_optimization.bits.dpstream */
+ uint32_t dpstreamclk_root_gate_disable; /* DCCG_GATE_DISABLE_CNTL3->DPSTREAMCLK_ROOT_GATE_DISABLE from debug.root_clock_optimization.bits.dpstream */
+
+ /* VSync Control */
+ uint32_t vsync_cnt_ctrl; /* DCCG_VSYNC_CNT_CTRL->VSync counter control */
+ uint32_t vsync_cnt_int_ctrl; /* DCCG_VSYNC_CNT_INT_CTRL->VSync counter interrupt control */
+ uint32_t vsync_otg_latch_value[6]; /* DCCG_VSYNC_OTG0_LATCH_VALUE->OTG0 VSync latch value (for OTG0-5) */
+
+ /* Time Base Control */
+ uint32_t microsecond_time_base_div; /* MICROSECOND_TIME_BASE_DIV->Microsecond time base divider */
+ uint32_t millisecond_time_base_div; /* MILLISECOND_TIME_BASE_DIV->Millisecond time base divider */
+ } dccg;
+
+ /* DSC essential configuration for underflow analysis */
+ struct {
+ /* DSC active state - critical for bandwidth analysis */
+ uint32_t dsc_clock_enable; /* DSC enabled - affects bandwidth requirements */
+
+ /* DSC configuration affecting bandwidth and timing */
+ uint32_t dsc_num_slices_h; /* Horizontal slice count - affects throughput */
+ uint32_t dsc_num_slices_v; /* Vertical slice count - affects throughput */
+ uint32_t dsc_bits_per_pixel; /* Compression ratio - affects bandwidth */
+
+ /* OPP integration - affects pipeline flow */
+ uint32_t dscrm_dsc_forward_enable; /* DSC forwarding to OPP enabled */
+ uint32_t dscrm_dsc_opp_pipe_source; /* Which OPP receives DSC output */
+ } dsc[MAX_PIPES];
+
+ /* MPC register programming variables */
+ struct {
+ /* MPCC blending tree and mode control */
+ uint32_t mpcc_mode[MAX_PIPES]; /* MPCC_CONTROL->MPCC_MODE from blend_cfg.blend_mode */
+ uint32_t mpcc_alpha_blend_mode[MAX_PIPES]; /* MPCC_CONTROL->MPCC_ALPHA_BLND_MODE from blend_cfg.alpha_mode */
+ uint32_t mpcc_alpha_multiplied_mode[MAX_PIPES]; /* MPCC_CONTROL->MPCC_ALPHA_MULTIPLIED_MODE from blend_cfg.pre_multiplied_alpha */
+ uint32_t mpcc_blnd_active_overlap_only[MAX_PIPES]; /* MPCC_CONTROL->MPCC_BLND_ACTIVE_OVERLAP_ONLY from blend_cfg.overlap_only */
+ uint32_t mpcc_global_alpha[MAX_PIPES]; /* MPCC_CONTROL->MPCC_GLOBAL_ALPHA from blend_cfg.global_alpha */
+ uint32_t mpcc_global_gain[MAX_PIPES]; /* MPCC_CONTROL->MPCC_GLOBAL_GAIN from blend_cfg.global_gain */
+ uint32_t mpcc_bg_bpc[MAX_PIPES]; /* MPCC_CONTROL->MPCC_BG_BPC from background color depth */
+ uint32_t mpcc_bot_gain_mode[MAX_PIPES]; /* MPCC_CONTROL->MPCC_BOT_GAIN_MODE from bottom layer gain control */
+
+ /* MPCC blending tree connections */
+ uint32_t mpcc_bot_sel[MAX_PIPES]; /* MPCC_BOT_SEL->MPCC_BOT_SEL from mpcc_state->bot_sel */
+ uint32_t mpcc_top_sel[MAX_PIPES]; /* MPCC_TOP_SEL->MPCC_TOP_SEL from mpcc_state->dpp_id */
+
+ /* MPCC output gamma control */
+ uint32_t mpcc_ogam_mode[MAX_PIPES]; /* MPCC_OGAM_CONTROL->MPCC_OGAM_MODE from output gamma mode */
+ uint32_t mpcc_ogam_select[MAX_PIPES]; /* MPCC_OGAM_CONTROL->MPCC_OGAM_SELECT from gamma LUT bank selection */
+ uint32_t mpcc_ogam_pwl_disable[MAX_PIPES]; /* MPCC_OGAM_CONTROL->MPCC_OGAM_PWL_DISABLE from PWL control */
+
+ /* MPCC pipe assignment and status */
+ uint32_t mpcc_opp_id[MAX_PIPES]; /* MPCC_OPP_ID->MPCC_OPP_ID from mpcc_state->opp_id */
+ uint32_t mpcc_idle[MAX_PIPES]; /* MPCC_STATUS->MPCC_IDLE from mpcc idle status */
+ uint32_t mpcc_busy[MAX_PIPES]; /* MPCC_STATUS->MPCC_BUSY from mpcc busy status */
+
+ /* MPC output processing */
+ uint32_t mpc_out_csc_mode; /* MPC_OUT_CSC_COEF->MPC_OUT_CSC_MODE from output_csc */
+ uint32_t mpc_out_gamma_mode; /* MPC_OUT_GAMMA_LUT->MPC_OUT_GAMMA_MODE from output_gamma */
+ } mpc;
+
+ /* OPP register programming variables for each pipe */
+ struct {
+ /* Display Pattern Generator (DPG) Control - 19 fields from DPG_CONTROL register */
+ uint32_t dpg_enable; /* DPG_CONTROL->DPG_EN from test_pattern parameter (enable/disable) */
+
+ /* Format Control (FMT) - 18 fields from FMT_CONTROL register */
+ uint32_t fmt_pixel_encoding; /* FMT_CONTROL->FMT_PIXEL_ENCODING from clamping->pixel_encoding */
+ uint32_t fmt_subsampling_mode; /* FMT_CONTROL->FMT_SUBSAMPLING_MODE from force_chroma_subsampling_1tap */
+ uint32_t fmt_cbcr_bit_reduction_bypass; /* FMT_CONTROL->FMT_CBCR_BIT_REDUCTION_BYPASS from pixel_encoding bypass control */
+ uint32_t fmt_stereosync_override; /* FMT_CONTROL->FMT_STEREOSYNC_OVERRIDE from stereo timing override */
+ uint32_t fmt_spatial_dither_frame_counter_max; /* FMT_CONTROL->FMT_SPATIAL_DITHER_FRAME_COUNTER_MAX from fmt_bit_depth->flags */
+ uint32_t fmt_spatial_dither_frame_counter_bit_swap; /* FMT_CONTROL->FMT_SPATIAL_DITHER_FRAME_COUNTER_BIT_SWAP from dither control */
+ uint32_t fmt_truncate_enable; /* FMT_CONTROL->FMT_TRUNCATE_EN from fmt_bit_depth->flags.TRUNCATE_ENABLED */
+ uint32_t fmt_truncate_depth; /* FMT_CONTROL->FMT_TRUNCATE_DEPTH from fmt_bit_depth->flags.TRUNCATE_DEPTH */
+ uint32_t fmt_truncate_mode; /* FMT_CONTROL->FMT_TRUNCATE_MODE from fmt_bit_depth->flags.TRUNCATE_MODE */
+ uint32_t fmt_spatial_dither_enable; /* FMT_CONTROL->FMT_SPATIAL_DITHER_EN from fmt_bit_depth->flags.SPATIAL_DITHER_ENABLED */
+ uint32_t fmt_spatial_dither_mode; /* FMT_CONTROL->FMT_SPATIAL_DITHER_MODE from fmt_bit_depth->flags.SPATIAL_DITHER_MODE */
+ uint32_t fmt_spatial_dither_depth; /* FMT_CONTROL->FMT_SPATIAL_DITHER_DEPTH from fmt_bit_depth->flags.SPATIAL_DITHER_DEPTH */
+ uint32_t fmt_temporal_dither_enable; /* FMT_CONTROL->FMT_TEMPORAL_DITHER_EN from fmt_bit_depth->flags.TEMPORAL_DITHER_ENABLED */
+ uint32_t fmt_clamp_data_enable; /* FMT_CONTROL->FMT_CLAMP_DATA_EN from clamping->clamping_range enable */
+ uint32_t fmt_clamp_color_format; /* FMT_CONTROL->FMT_CLAMP_COLOR_FORMAT from clamping->color_format */
+ uint32_t fmt_dynamic_exp_enable; /* FMT_CONTROL->FMT_DYNAMIC_EXP_EN from color_sp/color_dpth/signal */
+ uint32_t fmt_dynamic_exp_mode; /* FMT_CONTROL->FMT_DYNAMIC_EXP_MODE from color space mode mapping */
+ uint32_t fmt_bit_depth_control; /* Legacy field - kept for compatibility */
+
+ /* OPP Pipe Control - 1 field from OPP_PIPE_CONTROL register */
+ uint32_t opp_pipe_clock_enable; /* OPP_PIPE_CONTROL->OPP_PIPE_CLOCK_EN from enable parameter (bool) */
+
+ /* OPP CRC Control - 3 fields from OPP_PIPE_CRC_CONTROL register */
+ uint32_t opp_crc_enable; /* OPP_PIPE_CRC_CONTROL->CRC_EN from CRC enable control */
+ uint32_t opp_crc_select_source; /* OPP_PIPE_CRC_CONTROL->CRC_SELECT_SOURCE from CRC source selection */
+ uint32_t opp_crc_stereo_cont; /* OPP_PIPE_CRC_CONTROL->CRC_STEREO_CONT from stereo continuous CRC */
+
+ /* Output Buffer (OPPBUF) Control - 6 fields from OPPBUF_CONTROL register */
+ uint32_t oppbuf_active_width; /* OPPBUF_CONTROL->OPPBUF_ACTIVE_WIDTH from oppbuf_params->active_width */
+ uint32_t oppbuf_pixel_repetition; /* OPPBUF_CONTROL->OPPBUF_PIXEL_REPETITION from oppbuf_params->pixel_repetition */
+ uint32_t oppbuf_display_segmentation; /* OPPBUF_CONTROL->OPPBUF_DISPLAY_SEGMENTATION from oppbuf_params->mso_segmentation */
+ uint32_t oppbuf_overlap_pixel_num; /* OPPBUF_CONTROL->OPPBUF_OVERLAP_PIXEL_NUM from oppbuf_params->mso_overlap_pixel_num */
+ uint32_t oppbuf_3d_vact_space1_size; /* OPPBUF_CONTROL->OPPBUF_3D_VACT_SPACE1_SIZE from 3D timing space1_size */
+ uint32_t oppbuf_3d_vact_space2_size; /* OPPBUF_CONTROL->OPPBUF_3D_VACT_SPACE2_SIZE from 3D timing space2_size */
+
+ /* DSC Forward Config - 3 fields from DSCRM_DSC_FORWARD_CONFIG register */
+ uint32_t dscrm_dsc_forward_enable; /* DSCRM_DSC_FORWARD_CONFIG->DSCRM_DSC_FORWARD_EN from DSC forward enable control */
+ uint32_t dscrm_dsc_opp_pipe_source; /* DSCRM_DSC_FORWARD_CONFIG->DSCRM_DSC_OPP_PIPE_SOURCE from opp_pipe parameter */
+ uint32_t dscrm_dsc_forward_enable_status; /* DSCRM_DSC_FORWARD_CONFIG->DSCRM_DSC_FORWARD_EN_STATUS from DSC forward status (read-only) */
+ } opp[MAX_PIPES];
+
+ /* OPTC register programming variables for each pipe */
+ struct {
+ uint32_t otg_master_inst;
+
+ /* OTG_CONTROL register - 5 fields for OTG control */
+ uint32_t otg_master_enable; /* OTG_CONTROL->OTG_MASTER_EN from timing enable/disable control */
+ uint32_t otg_disable_point_cntl; /* OTG_CONTROL->OTG_DISABLE_POINT_CNTL from disable timing control */
+ uint32_t otg_start_point_cntl; /* OTG_CONTROL->OTG_START_POINT_CNTL from start timing control */
+ uint32_t otg_field_number_cntl; /* OTG_CONTROL->OTG_FIELD_NUMBER_CNTL from interlace field control */
+ uint32_t otg_out_mux; /* OTG_CONTROL->OTG_OUT_MUX from output mux selection */
+
+ /* OTG Horizontal Timing - 7 fields */
+ uint32_t otg_h_total; /* OTG_H_TOTAL->OTG_H_TOTAL from dc_crtc_timing->h_total */
+ uint32_t otg_h_blank_start; /* OTG_H_BLANK_START_END->OTG_H_BLANK_START from dc_crtc_timing->h_front_porch */
+ uint32_t otg_h_blank_end; /* OTG_H_BLANK_START_END->OTG_H_BLANK_END from dc_crtc_timing->h_addressable_video_pixel_width */
+ uint32_t otg_h_sync_start; /* OTG_H_SYNC_A->OTG_H_SYNC_A_START from dc_crtc_timing->h_sync_width */
+ uint32_t otg_h_sync_end; /* OTG_H_SYNC_A->OTG_H_SYNC_A_END from calculated sync end position */
+ uint32_t otg_h_sync_polarity; /* OTG_H_SYNC_A_CNTL->OTG_H_SYNC_A_POL from dc_crtc_timing->flags.HSYNC_POSITIVE_POLARITY */
+ uint32_t otg_h_timing_div_mode; /* OTG_H_TIMING_CNTL->OTG_H_TIMING_DIV_MODE from horizontal timing division mode */
+
+ /* OTG Vertical Timing - 7 fields */
+ uint32_t otg_v_total; /* OTG_V_TOTAL->OTG_V_TOTAL from dc_crtc_timing->v_total */
+ uint32_t otg_v_blank_start; /* OTG_V_BLANK_START_END->OTG_V_BLANK_START from dc_crtc_timing->v_front_porch */
+ uint32_t otg_v_blank_end; /* OTG_V_BLANK_START_END->OTG_V_BLANK_END from dc_crtc_timing->v_addressable_video_line_width */
+ uint32_t otg_v_sync_start; /* OTG_V_SYNC_A->OTG_V_SYNC_A_START from dc_crtc_timing->v_sync_width */
+ uint32_t otg_v_sync_end; /* OTG_V_SYNC_A->OTG_V_SYNC_A_END from calculated sync end position */
+ uint32_t otg_v_sync_polarity; /* OTG_V_SYNC_A_CNTL->OTG_V_SYNC_A_POL from dc_crtc_timing->flags.VSYNC_POSITIVE_POLARITY */
+ uint32_t otg_v_sync_mode; /* OTG_V_SYNC_A_CNTL->OTG_V_SYNC_MODE from sync mode selection */
+
+ /* OTG DRR (Dynamic Refresh Rate) Control - 8 fields */
+ uint32_t otg_v_total_max; /* OTG_V_TOTAL_MAX->OTG_V_TOTAL_MAX from drr_params->vertical_total_max */
+ uint32_t otg_v_total_min; /* OTG_V_TOTAL_MIN->OTG_V_TOTAL_MIN from drr_params->vertical_total_min */
+ uint32_t otg_v_total_mid; /* OTG_V_TOTAL_MID->OTG_V_TOTAL_MID from drr_params->vertical_total_mid */
+ uint32_t otg_v_total_max_sel; /* OTG_V_TOTAL_CONTROL->OTG_V_TOTAL_MAX_SEL from DRR max selection enable */
+ uint32_t otg_v_total_min_sel; /* OTG_V_TOTAL_CONTROL->OTG_V_TOTAL_MIN_SEL from DRR min selection enable */
+ uint32_t otg_vtotal_mid_replacing_max_en; /* OTG_V_TOTAL_CONTROL->OTG_VTOTAL_MID_REPLACING_MAX_EN from DRR mid-frame enable */
+ uint32_t otg_vtotal_mid_frame_num; /* OTG_V_TOTAL_CONTROL->OTG_VTOTAL_MID_FRAME_NUM from drr_params->vertical_total_mid_frame_num */
+ uint32_t otg_set_v_total_min_mask; /* OTG_V_TOTAL_CONTROL->OTG_SET_V_TOTAL_MIN_MASK from DRR trigger mask */
+ uint32_t otg_force_lock_on_event; /* OTG_V_TOTAL_CONTROL->OTG_FORCE_LOCK_ON_EVENT from DRR force lock control */
+
+ /* OPTC Data Source and ODM - 6 fields */
+ uint32_t optc_seg0_src_sel; /* OPTC_DATA_SOURCE_SELECT->OPTC_SEG0_SRC_SEL from opp_id[0] ODM segment 0 source */
+ uint32_t optc_seg1_src_sel; /* OPTC_DATA_SOURCE_SELECT->OPTC_SEG1_SRC_SEL from opp_id[1] ODM segment 1 source */
+ uint32_t optc_seg2_src_sel; /* OPTC_DATA_SOURCE_SELECT->OPTC_SEG2_SRC_SEL from opp_id[2] ODM segment 2 source */
+ uint32_t optc_seg3_src_sel; /* OPTC_DATA_SOURCE_SELECT->OPTC_SEG3_SRC_SEL from opp_id[3] ODM segment 3 source */
+ uint32_t optc_num_of_input_segment; /* OPTC_DATA_SOURCE_SELECT->OPTC_NUM_OF_INPUT_SEGMENT from opp_cnt-1 number of input segments */
+ uint32_t optc_mem_sel; /* OPTC_MEMORY_CONFIG->OPTC_MEM_SEL from memory_mask ODM memory selection */
+
+ /* OPTC Data Format and DSC - 4 fields */
+ uint32_t optc_data_format; /* OPTC_DATA_FORMAT_CONTROL->OPTC_DATA_FORMAT from data format selection */
+ uint32_t optc_dsc_mode; /* OPTC_DATA_FORMAT_CONTROL->OPTC_DSC_MODE from dsc_mode parameter */
+ uint32_t optc_dsc_bytes_per_pixel; /* OPTC_BYTES_PER_PIXEL->OPTC_DSC_BYTES_PER_PIXEL from dsc_bytes_per_pixel parameter */
+ uint32_t optc_segment_width; /* OPTC_WIDTH_CONTROL->OPTC_SEGMENT_WIDTH from segment_width parameter */
+ uint32_t optc_dsc_slice_width; /* OPTC_WIDTH_CONTROL->OPTC_DSC_SLICE_WIDTH from dsc_slice_width parameter */
+
+ /* OPTC Clock and Underflow Control - 4 fields */
+ uint32_t optc_input_pix_clk_en; /* OPTC_INPUT_CLOCK_CONTROL->OPTC_INPUT_PIX_CLK_EN from pixel clock enable */
+ uint32_t optc_underflow_occurred_status; /* OPTC_INPUT_GLOBAL_CONTROL->OPTC_UNDERFLOW_OCCURRED_STATUS from underflow status (read-only) */
+ uint32_t optc_underflow_clear; /* OPTC_INPUT_GLOBAL_CONTROL->OPTC_UNDERFLOW_CLEAR from underflow clear control */
+ uint32_t otg_clock_enable; /* OTG_CLOCK_CONTROL->OTG_CLOCK_EN from OTG clock enable */
+ uint32_t otg_clock_gate_dis; /* OTG_CLOCK_CONTROL->OTG_CLOCK_GATE_DIS from clock gate disable */
+
+ /* OTG Stereo and 3D Control - 6 fields */
+ uint32_t otg_stereo_enable; /* OTG_STEREO_CONTROL->OTG_STEREO_EN from stereo enable control */
+ uint32_t otg_stereo_sync_output_line_num; /* OTG_STEREO_CONTROL->OTG_STEREO_SYNC_OUTPUT_LINE_NUM from timing->stereo_3d_format line num */
+ uint32_t otg_stereo_sync_output_polarity; /* OTG_STEREO_CONTROL->OTG_STEREO_SYNC_OUTPUT_POLARITY from stereo polarity control */
+ uint32_t otg_3d_structure_en; /* OTG_3D_STRUCTURE_CONTROL->OTG_3D_STRUCTURE_EN from 3D structure enable */
+ uint32_t otg_3d_structure_v_update_mode; /* OTG_3D_STRUCTURE_CONTROL->OTG_3D_STRUCTURE_V_UPDATE_MODE from 3D vertical update mode */
+ uint32_t otg_3d_structure_stereo_sel_ovr; /* OTG_3D_STRUCTURE_CONTROL->OTG_3D_STRUCTURE_STEREO_SEL_OVR from 3D stereo selection override */
+ uint32_t otg_interlace_enable; /* OTG_INTERLACE_CONTROL->OTG_INTERLACE_ENABLE from dc_crtc_timing->flags.INTERLACE */
+
+ /* OTG GSL (Global Sync Lock) Control - 5 fields */
+ uint32_t otg_gsl0_en; /* OTG_GSL_CONTROL->OTG_GSL0_EN from GSL group 0 enable */
+ uint32_t otg_gsl1_en; /* OTG_GSL_CONTROL->OTG_GSL1_EN from GSL group 1 enable */
+ uint32_t otg_gsl2_en; /* OTG_GSL_CONTROL->OTG_GSL2_EN from GSL group 2 enable */
+ uint32_t otg_gsl_master_en; /* OTG_GSL_CONTROL->OTG_GSL_MASTER_EN from GSL master enable */
+ uint32_t otg_gsl_master_mode; /* OTG_GSL_CONTROL->OTG_GSL_MASTER_MODE from gsl_params->gsl_master mode */
+
+ /* OTG DRR Advanced Control - 4 fields */
+ uint32_t otg_v_total_last_used_by_drr; /* OTG_DRR_CONTROL->OTG_V_TOTAL_LAST_USED_BY_DRR from last used DRR V_TOTAL (read-only) */
+ uint32_t otg_drr_trigger_window_start_x; /* OTG_DRR_TRIGGER_WINDOW->OTG_DRR_TRIGGER_WINDOW_START_X from window_start parameter */
+ uint32_t otg_drr_trigger_window_end_x; /* OTG_DRR_TRIGGER_WINDOW->OTG_DRR_TRIGGER_WINDOW_END_X from window_end parameter */
+ uint32_t otg_drr_v_total_change_limit; /* OTG_DRR_V_TOTAL_CHANGE->OTG_DRR_V_TOTAL_CHANGE_LIMIT from limit parameter */
+
+ /* OTG DSC Position Control - 2 fields */
+ uint32_t otg_dsc_start_position_x; /* OTG_DSC_START_POSITION->OTG_DSC_START_POSITION_X from DSC start X position */
+ uint32_t otg_dsc_start_position_line_num; /* OTG_DSC_START_POSITION->OTG_DSC_START_POSITION_LINE_NUM from DSC start line number */
+
+ /* OTG Double Buffer Control - 2 fields */
+ uint32_t otg_drr_timing_dbuf_update_mode; /* OTG_DOUBLE_BUFFER_CONTROL->OTG_DRR_TIMING_DBUF_UPDATE_MODE from DRR double buffer mode */
+ uint32_t otg_blank_data_double_buffer_en; /* OTG_DOUBLE_BUFFER_CONTROL->OTG_BLANK_DATA_DOUBLE_BUFFER_EN from blank data double buffer enable */
+
+ /* OTG Vertical Interrupts - 6 fields */
+ uint32_t otg_vertical_interrupt0_int_enable; /* OTG_VERTICAL_INTERRUPT0_CONTROL->OTG_VERTICAL_INTERRUPT0_INT_ENABLE from interrupt 0 enable */
+ uint32_t otg_vertical_interrupt0_line_start; /* OTG_VERTICAL_INTERRUPT0_POSITION->OTG_VERTICAL_INTERRUPT0_LINE_START from start_line parameter */
+ uint32_t otg_vertical_interrupt1_int_enable; /* OTG_VERTICAL_INTERRUPT1_CONTROL->OTG_VERTICAL_INTERRUPT1_INT_ENABLE from interrupt 1 enable */
+ uint32_t otg_vertical_interrupt1_line_start; /* OTG_VERTICAL_INTERRUPT1_POSITION->OTG_VERTICAL_INTERRUPT1_LINE_START from start_line parameter */
+ uint32_t otg_vertical_interrupt2_int_enable; /* OTG_VERTICAL_INTERRUPT2_CONTROL->OTG_VERTICAL_INTERRUPT2_INT_ENABLE from interrupt 2 enable */
+ uint32_t otg_vertical_interrupt2_line_start; /* OTG_VERTICAL_INTERRUPT2_POSITION->OTG_VERTICAL_INTERRUPT2_LINE_START from start_line parameter */
+
+ /* OTG Global Sync Parameters - 6 fields */
+ uint32_t otg_vready_offset; /* OTG_VREADY_PARAM->OTG_VREADY_OFFSET from vready_offset parameter */
+ uint32_t otg_vstartup_start; /* OTG_VSTARTUP_PARAM->OTG_VSTARTUP_START from vstartup_start parameter */
+ uint32_t otg_vupdate_offset; /* OTG_VUPDATE_PARAM->OTG_VUPDATE_OFFSET from vupdate_offset parameter */
+ uint32_t otg_vupdate_width; /* OTG_VUPDATE_PARAM->OTG_VUPDATE_WIDTH from vupdate_width parameter */
+ uint32_t master_update_lock_vupdate_keepout_start_offset; /* OTG_VUPDATE_KEEPOUT->MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET from pstate_keepout start */
+ uint32_t master_update_lock_vupdate_keepout_end_offset; /* OTG_VUPDATE_KEEPOUT->MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET from pstate_keepout end */
+
+ /* OTG Manual Trigger Control - 11 fields */
+ uint32_t otg_triga_source_select; /* OTG_TRIGA_CNTL->OTG_TRIGA_SOURCE_SELECT from trigger A source selection */
+ uint32_t otg_triga_source_pipe_select; /* OTG_TRIGA_CNTL->OTG_TRIGA_SOURCE_PIPE_SELECT from trigger A pipe selection */
+ uint32_t otg_triga_rising_edge_detect_cntl; /* OTG_TRIGA_CNTL->OTG_TRIGA_RISING_EDGE_DETECT_CNTL from trigger A rising edge detect */
+ uint32_t otg_triga_falling_edge_detect_cntl; /* OTG_TRIGA_CNTL->OTG_TRIGA_FALLING_EDGE_DETECT_CNTL from trigger A falling edge detect */
+ uint32_t otg_triga_polarity_select; /* OTG_TRIGA_CNTL->OTG_TRIGA_POLARITY_SELECT from trigger A polarity selection */
+ uint32_t otg_triga_frequency_select; /* OTG_TRIGA_CNTL->OTG_TRIGA_FREQUENCY_SELECT from trigger A frequency selection */
+ uint32_t otg_triga_delay; /* OTG_TRIGA_CNTL->OTG_TRIGA_DELAY from trigger A delay */
+ uint32_t otg_triga_clear; /* OTG_TRIGA_CNTL->OTG_TRIGA_CLEAR from trigger A clear */
+ uint32_t otg_triga_manual_trig; /* OTG_TRIGA_MANUAL_TRIG->OTG_TRIGA_MANUAL_TRIG from manual trigger A */
+ uint32_t otg_trigb_source_select; /* OTG_TRIGB_CNTL->OTG_TRIGB_SOURCE_SELECT from trigger B source selection */
+ uint32_t otg_trigb_polarity_select; /* OTG_TRIGB_CNTL->OTG_TRIGB_POLARITY_SELECT from trigger B polarity selection */
+ uint32_t otg_trigb_manual_trig; /* OTG_TRIGB_MANUAL_TRIG->OTG_TRIGB_MANUAL_TRIG from manual trigger B */
+
+ /* OTG Static Screen and Update Control - 6 fields */
+ uint32_t otg_static_screen_event_mask; /* OTG_STATIC_SCREEN_CONTROL->OTG_STATIC_SCREEN_EVENT_MASK from event_triggers parameter */
+ uint32_t otg_static_screen_frame_count; /* OTG_STATIC_SCREEN_CONTROL->OTG_STATIC_SCREEN_FRAME_COUNT from num_frames parameter */
+ uint32_t master_update_lock; /* OTG_MASTER_UPDATE_LOCK->MASTER_UPDATE_LOCK from update lock control */
+ uint32_t master_update_mode; /* OTG_MASTER_UPDATE_MODE->MASTER_UPDATE_MODE from update mode selection */
+ uint32_t otg_force_count_now_mode; /* OTG_FORCE_COUNT_NOW_CNTL->OTG_FORCE_COUNT_NOW_MODE from force count mode */
+ uint32_t otg_force_count_now_clear; /* OTG_FORCE_COUNT_NOW_CNTL->OTG_FORCE_COUNT_NOW_CLEAR from force count clear */
+
+ /* VTG Control - 3 fields */
+ uint32_t vtg0_enable; /* CONTROL->VTG0_ENABLE from VTG enable control */
+ uint32_t vtg0_fp2; /* CONTROL->VTG0_FP2 from VTG front porch 2 */
+ uint32_t vtg0_vcount_init; /* CONTROL->VTG0_VCOUNT_INIT from VTG vertical count init */
+
+ /* OTG Status (Read-Only) - 12 fields */
+ uint32_t otg_v_blank; /* OTG_STATUS->OTG_V_BLANK from vertical blank status (read-only) */
+ uint32_t otg_v_active_disp; /* OTG_STATUS->OTG_V_ACTIVE_DISP from vertical active display (read-only) */
+ uint32_t otg_frame_count; /* OTG_STATUS_FRAME_COUNT->OTG_FRAME_COUNT from frame count (read-only) */
+ uint32_t otg_horz_count; /* OTG_STATUS_POSITION->OTG_HORZ_COUNT from horizontal position (read-only) */
+ uint32_t otg_vert_count; /* OTG_STATUS_POSITION->OTG_VERT_COUNT from vertical position (read-only) */
+ uint32_t otg_horz_count_hv; /* OTG_STATUS_HV_COUNT->OTG_HORZ_COUNT from horizontal count (read-only) */
+ uint32_t otg_vert_count_nom; /* OTG_STATUS_HV_COUNT->OTG_VERT_COUNT_NOM from vertical count nominal (read-only) */
+ uint32_t otg_flip_pending; /* OTG_PIPE_UPDATE_STATUS->OTG_FLIP_PENDING from flip pending status (read-only) */
+ uint32_t otg_dc_reg_update_pending; /* OTG_PIPE_UPDATE_STATUS->OTG_DC_REG_UPDATE_PENDING from DC register update pending (read-only) */
+ uint32_t otg_cursor_update_pending; /* OTG_PIPE_UPDATE_STATUS->OTG_CURSOR_UPDATE_PENDING from cursor update pending (read-only) */
+ uint32_t otg_vupdate_keepout_status; /* OTG_PIPE_UPDATE_STATUS->OTG_VUPDATE_KEEPOUT_STATUS from VUPDATE keepout status (read-only) */
+ } optc[MAX_PIPES];
+
+ /* Metadata */
+ uint32_t active_pipe_count;
+ uint32_t active_stream_count;
+ bool state_valid;
+};
+
+/**
+ * dc_capture_register_software_state() - Capture software state for register programming
+ * @dc: DC context containing current display configuration
+ * @state: Pointer to dc_register_software_state structure to populate
+ *
+ * Extracts all software state variables that are used to program hardware register
+ * fields across the display driver pipeline. This provides a complete snapshot
+ * of the software configuration that drives hardware register programming.
+ *
+ * The function traverses the DC context and extracts values from:
+ * - Stream configurations (timing, format, DSC settings)
+ * - Plane states (surface format, rotation, scaling, cursor)
+ * - Pipe contexts (resource allocation, blending, viewport)
+ * - Clock manager (display clocks, DPP clocks, pixel clocks)
+ * - Resource context (DET buffer allocation, ODM configuration)
+ *
+ * This is essential for underflow debugging as it captures the exact software
+ * state that determines how registers are programmed, allowing analysis of
+ * whether underflow is caused by incorrect register programming or timing issues.
+ *
+ * Return: true if state was successfully captured, false on error
+ */
+bool dc_capture_register_software_state(struct dc *dc, struct dc_register_software_state *state);
+
#endif /* DC_INTERFACE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h
index 67abda44eb1f..40d7a7d83c40 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h
@@ -91,9 +91,17 @@ struct dc_vbios_funcs {
struct device_id id);
/* COMMANDS */
+ enum bp_result (*select_crtc_source)(
+ struct dc_bios *bios,
+ struct bp_crtc_source_select *bp_params);
enum bp_result (*encoder_control)(
struct dc_bios *bios,
struct bp_encoder_control *cntl);
+ enum bp_result (*dac_load_detection)(
+ struct dc_bios *bios,
+ enum engine_id engine_id,
+ enum dal_device_type device_type,
+ uint32_t enum_id);
enum bp_result (*transmitter_control)(
struct dc_bios *bios,
struct bp_transmitter_control *cntl);
@@ -140,7 +148,8 @@ struct dc_vbios_funcs {
enum bp_result (*enable_lvtma_control)(
struct dc_bios *bios,
uint8_t uc_pwr_on,
- uint8_t panel_instance);
+ uint8_t pwrseq_instance,
+ uint8_t bypass_panel_control_wait);
enum bp_result (*get_soc_bb_info)(
struct dc_bios *dcb,
@@ -156,9 +165,15 @@ struct dc_vbios_funcs {
enum bp_result (*get_lttpr_interop)(
struct dc_bios *dcb,
uint8_t *dce_caps);
+
+ enum bp_result (*get_connector_speed_cap_info)(
+ struct dc_bios *bios,
+ struct graphics_object_id object_id,
+ struct bp_connector_speed_cap_info *info);
};
struct bios_registers {
+ uint32_t BIOS_SCRATCH_0;
uint32_t BIOS_SCRATCH_3;
uint32_t BIOS_SCRATCH_6;
};
@@ -177,6 +192,7 @@ struct dc_bios {
struct dc_firmware_info fw_info;
bool fw_info_valid;
struct dc_vram_info vram_info;
+ struct bp_soc_bb_info bb_info;
struct dc_golden_table golden_table;
};
diff --git a/drivers/gpu/drm/amd/display/dc/dc_ddc_types.h b/drivers/gpu/drm/amd/display/dc/dc_ddc_types.h
index 7769bd099a5a..428e3a9ab65a 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_ddc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_ddc_types.h
@@ -77,6 +77,32 @@ struct aux_reply_transaction_data {
uint8_t *data;
};
+struct aux_payload {
+ /* set following flag to read/write I2C data,
+ * reset it to read/write DPCD data */
+ bool i2c_over_aux;
+ /* set following flag to write data,
+ * reset it to read data */
+ bool write;
+ bool mot;
+ bool write_status_update;
+
+ uint32_t address;
+ uint32_t length;
+ uint8_t *data;
+ /*
+ * used to return the reply type of the transaction
+ * ignored if NULL
+ */
+ uint8_t *reply;
+ /* expressed in milliseconds
+ * zero means "use default value"
+ */
+ uint32_t defer_delay;
+
+};
+#define DEFAULT_AUX_MAX_DATA_SIZE 16
+
struct i2c_payload {
bool write;
uint8_t address;
@@ -90,6 +116,8 @@ enum i2c_command_engine {
I2C_COMMAND_ENGINE_HW
};
+#define DDC_I2C_COMMAND_ENGINE I2C_COMMAND_ENGINE_SW
+
struct i2c_command {
struct i2c_payload *payloads;
uint8_t number_of_payloads;
@@ -150,6 +178,9 @@ enum display_dongle_type {
DISPLAY_DONGLE_DP_HDMI_MISMATCHED_DONGLE,
};
+#define DC_MAX_EDID_BUFFER_SIZE 2048
+#define DC_EDID_BLOCK_SIZE 128
+
struct ddc_service {
struct ddc *ddc_pin;
struct ddc_flags flags;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index 360f3199ea6f..7b09af1cb306 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -23,13 +23,23 @@
*
*/
+#include "dm_services.h"
#include "dc.h"
#include "dc_dmub_srv.h"
#include "../dmub/dmub_srv.h"
#include "dm_helpers.h"
+#include "dc_hw_types.h"
+#include "core_types.h"
+#include "../basics/conversion.h"
+#include "cursor_reg_cache.h"
+#include "resource.h"
+#include "clk_mgr.h"
+#include "dc_state_priv.h"
+#include "dc_plane_priv.h"
#define CTX dc_dmub_srv->ctx
#define DC_LOGGER CTX->logger
+#define GPINT_RETRY_NUM 20
static void dc_dmub_srv_construct(struct dc_dmub_srv *dc_srv, struct dc *dc,
struct dmub_srv *dmub)
@@ -61,129 +71,273 @@ void dc_dmub_srv_destroy(struct dc_dmub_srv **dmub_srv)
}
}
-void dc_dmub_srv_cmd_queue(struct dc_dmub_srv *dc_dmub_srv,
- union dmub_rb_cmd *cmd)
+bool dc_dmub_srv_wait_for_pending(struct dc_dmub_srv *dc_dmub_srv)
{
- struct dmub_srv *dmub = dc_dmub_srv->dmub;
- struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+ struct dmub_srv *dmub;
+ struct dc_context *dc_ctx;
enum dmub_status status;
- status = dmub_srv_cmd_queue(dmub, cmd);
- if (status == DMUB_STATUS_OK)
- return;
+ if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+ return false;
- if (status != DMUB_STATUS_QUEUE_FULL)
- goto error;
+ dc_ctx = dc_dmub_srv->ctx;
+ dmub = dc_dmub_srv->dmub;
- /* Execute and wait for queue to become empty again. */
- dc_dmub_srv_cmd_execute(dc_dmub_srv);
- dc_dmub_srv_wait_idle(dc_dmub_srv);
+ do {
+ status = dmub_srv_wait_for_pending(dmub, 100000);
+ } while (dc_dmub_srv->ctx->dc->debug.disable_timeout && status != DMUB_STATUS_OK);
- /* Requeue the command. */
- status = dmub_srv_cmd_queue(dmub, cmd);
- if (status == DMUB_STATUS_OK)
- return;
+ if (status != DMUB_STATUS_OK) {
+ DC_ERROR("Error waiting for DMUB idle: status=%d\n", status);
+ dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+ }
-error:
- DC_ERROR("Error queuing DMUB command: status=%d\n", status);
- dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+ return status == DMUB_STATUS_OK;
}
-void dc_dmub_srv_cmd_execute(struct dc_dmub_srv *dc_dmub_srv)
+void dc_dmub_srv_clear_inbox0_ack(struct dc_dmub_srv *dc_dmub_srv)
{
struct dmub_srv *dmub = dc_dmub_srv->dmub;
struct dc_context *dc_ctx = dc_dmub_srv->ctx;
- enum dmub_status status;
+ enum dmub_status status = DMUB_STATUS_OK;
- status = dmub_srv_cmd_execute(dmub);
+ status = dmub_srv_clear_inbox0_ack(dmub);
if (status != DMUB_STATUS_OK) {
- DC_ERROR("Error starting DMUB execution: status=%d\n", status);
+ DC_ERROR("Error clearing INBOX0 ack: status=%d\n", status);
dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
}
}
-void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv)
+void dc_dmub_srv_wait_for_inbox0_ack(struct dc_dmub_srv *dc_dmub_srv)
{
struct dmub_srv *dmub = dc_dmub_srv->dmub;
struct dc_context *dc_ctx = dc_dmub_srv->ctx;
- enum dmub_status status;
+ enum dmub_status status = DMUB_STATUS_OK;
- status = dmub_srv_wait_for_idle(dmub, 100000);
+ status = dmub_srv_wait_for_inbox0_ack(dmub, 100000);
if (status != DMUB_STATUS_OK) {
- DC_ERROR("Error waiting for DMUB idle: status=%d\n", status);
+ DC_ERROR("Error waiting for INBOX0 HW Lock Ack\n");
dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
}
}
-void dc_dmub_srv_send_inbox0_cmd(struct dc_dmub_srv *dmub_srv,
- union dmub_inbox0_data_register data)
+void dc_dmub_srv_send_inbox0_cmd(struct dc_dmub_srv *dc_dmub_srv,
+ union dmub_inbox0_data_register data)
{
- struct dmub_srv *dmub = dmub_srv->dmub;
- if (dmub->hw_funcs.send_inbox0_cmd)
- dmub->hw_funcs.send_inbox0_cmd(dmub, data);
- // TODO: Add wait command -- poll register for ACK
+ struct dmub_srv *dmub = dc_dmub_srv->dmub;
+ struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+ enum dmub_status status = DMUB_STATUS_OK;
+
+ status = dmub_srv_send_inbox0_cmd(dmub, data);
+ if (status != DMUB_STATUS_OK) {
+ DC_ERROR("Error sending INBOX0 cmd\n");
+ dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+ }
+}
+
+static bool dc_dmub_srv_reg_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
+ unsigned int count,
+ union dmub_rb_cmd *cmd_list)
+{
+ struct dc_context *dc_ctx;
+ struct dmub_srv *dmub;
+ enum dmub_status status = DMUB_STATUS_OK;
+ int i;
+
+ if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+ return false;
+
+ dc_ctx = dc_dmub_srv->ctx;
+ dmub = dc_dmub_srv->dmub;
+
+ for (i = 0 ; i < count; i++) {
+ /* confirm no messages pending */
+ do {
+ status = dmub_srv_wait_for_idle(dmub, 100000);
+ } while (dc_dmub_srv->ctx->dc->debug.disable_timeout && status != DMUB_STATUS_OK);
+
+ /* queue command */
+ if (status == DMUB_STATUS_OK)
+ status = dmub_srv_reg_cmd_execute(dmub, &cmd_list[i]);
+
+ /* check for errors */
+ if (status != DMUB_STATUS_OK) {
+ break;
+ }
+ }
+
+ if (status != DMUB_STATUS_OK) {
+ if (status != DMUB_STATUS_POWER_STATE_D3) {
+ DC_ERROR("Error starting DMUB execution: status=%d\n", status);
+ dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+ }
+ return false;
+ }
+
+ return true;
}
-bool dc_dmub_srv_cmd_with_reply_data(struct dc_dmub_srv *dc_dmub_srv, union dmub_rb_cmd *cmd)
+static bool dc_dmub_srv_fb_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
+ unsigned int count,
+ union dmub_rb_cmd *cmd_list)
{
+ struct dc_context *dc_ctx;
struct dmub_srv *dmub;
enum dmub_status status;
+ int i;
if (!dc_dmub_srv || !dc_dmub_srv->dmub)
return false;
+ dc_ctx = dc_dmub_srv->ctx;
dmub = dc_dmub_srv->dmub;
- status = dmub_srv_cmd_with_reply_data(dmub, cmd);
+ for (i = 0 ; i < count; i++) {
+ // Queue command
+ if (!cmd_list[i].cmd_common.header.multi_cmd_pending ||
+ dmub_rb_num_free(&dmub->inbox1.rb) >= count - i) {
+ status = dmub_srv_fb_cmd_queue(dmub, &cmd_list[i]);
+ } else {
+ status = DMUB_STATUS_QUEUE_FULL;
+ }
+
+ if (status == DMUB_STATUS_QUEUE_FULL) {
+ /* Execute and wait for queue to become empty again. */
+ status = dmub_srv_fb_cmd_execute(dmub);
+ if (status == DMUB_STATUS_POWER_STATE_D3)
+ return false;
+
+ do {
+ status = dmub_srv_wait_for_inbox_free(dmub, 100000, count - i);
+ } while (dc_dmub_srv->ctx->dc->debug.disable_timeout && status != DMUB_STATUS_OK);
+
+ /* Requeue the command. */
+ status = dmub_srv_fb_cmd_queue(dmub, &cmd_list[i]);
+ }
+
+ if (status != DMUB_STATUS_OK) {
+ if (status != DMUB_STATUS_POWER_STATE_D3) {
+ DC_ERROR("Error queueing DMUB command: status=%d\n", status);
+ dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+ }
+ return false;
+ }
+ }
+
+ status = dmub_srv_fb_cmd_execute(dmub);
if (status != DMUB_STATUS_OK) {
- DC_LOG_DEBUG("No reply for DMUB command: status=%d\n", status);
+ if (status != DMUB_STATUS_POWER_STATE_D3) {
+ DC_ERROR("Error starting DMUB execution: status=%d\n", status);
+ dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+ }
return false;
}
return true;
}
-void dc_dmub_srv_wait_phy_init(struct dc_dmub_srv *dc_dmub_srv)
+bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
+ unsigned int count,
+ union dmub_rb_cmd *cmd_list)
{
- struct dmub_srv *dmub = dc_dmub_srv->dmub;
- struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+ bool res = false;
+
+ if (dc_dmub_srv && dc_dmub_srv->dmub) {
+ if (dc_dmub_srv->dmub->inbox_type == DMUB_CMD_INTERFACE_REG) {
+ res = dc_dmub_srv_reg_cmd_list_queue_execute(dc_dmub_srv, count, cmd_list);
+ } else {
+ res = dc_dmub_srv_fb_cmd_list_queue_execute(dc_dmub_srv, count, cmd_list);
+ }
+
+ if (res)
+ res = dmub_srv_update_inbox_status(dc_dmub_srv->dmub) == DMUB_STATUS_OK;
+ }
+
+ return res;
+}
+
+bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv,
+ enum dm_dmub_wait_type wait_type,
+ union dmub_rb_cmd *cmd_list)
+{
+ struct dmub_srv *dmub;
enum dmub_status status;
- for (;;) {
- /* Wait up to a second for PHY init. */
- status = dmub_srv_wait_for_phy_init(dmub, 1000000);
- if (status == DMUB_STATUS_OK)
- /* Initialization OK */
- break;
+ if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+ return false;
- DC_ERROR("DMCUB PHY init failed: status=%d\n", status);
- ASSERT(0);
+ dmub = dc_dmub_srv->dmub;
- if (status != DMUB_STATUS_TIMEOUT)
- /*
- * Server likely initialized or we don't have
- * DMCUB HW support - this won't end.
- */
- break;
+ // Wait for DMUB to process command
+ if (wait_type != DM_DMUB_WAIT_TYPE_NO_WAIT) {
+ do {
+ status = dmub_srv_wait_for_idle(dmub, 100000);
+ } while (dc_dmub_srv->ctx->dc->debug.disable_timeout && status != DMUB_STATUS_OK);
+
+ if (status != DMUB_STATUS_OK) {
+ DC_LOG_DEBUG("No reply for DMUB command: status=%d\n", status);
+ if (!dmub->debug.timeout_info.timeout_occured) {
+ dmub->debug.timeout_info.timeout_occured = true;
+ if (cmd_list)
+ dmub->debug.timeout_info.timeout_cmd = *cmd_list;
+ dmub->debug.timeout_info.timestamp = dm_get_timestamp(dc_dmub_srv->ctx);
+ }
+ dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+ return false;
+ }
- /* Continue spinning so we don't hang the ASIC. */
+ // Copy data back from ring buffer into command
+ if (wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY && cmd_list) {
+ dmub_srv_cmd_get_response(dc_dmub_srv->dmub, cmd_list);
+ }
}
+
+ return true;
}
-bool dc_dmub_srv_notify_stream_mask(struct dc_dmub_srv *dc_dmub_srv,
- unsigned int stream_mask)
+bool dc_dmub_srv_cmd_run(struct dc_dmub_srv *dc_dmub_srv, union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type)
+{
+ return dc_dmub_srv_cmd_run_list(dc_dmub_srv, 1, cmd, wait_type);
+}
+
+bool dc_dmub_srv_cmd_run_list(struct dc_dmub_srv *dc_dmub_srv, unsigned int count, union dmub_rb_cmd *cmd_list, enum dm_dmub_wait_type wait_type)
+{
+ if (!dc_dmub_srv_cmd_list_queue_execute(dc_dmub_srv, count, cmd_list))
+ return false;
+
+ return dc_dmub_srv_wait_for_idle(dc_dmub_srv, wait_type, cmd_list);
+}
+
+bool dc_dmub_srv_optimized_init_done(struct dc_dmub_srv *dc_dmub_srv)
{
struct dmub_srv *dmub;
- const uint32_t timeout = 30;
+ struct dc_context *dc_ctx;
+ union dmub_fw_boot_status boot_status;
+ enum dmub_status status;
if (!dc_dmub_srv || !dc_dmub_srv->dmub)
return false;
dmub = dc_dmub_srv->dmub;
+ dc_ctx = dc_dmub_srv->ctx;
+
+ status = dmub_srv_get_fw_boot_status(dmub, &boot_status);
+ if (status != DMUB_STATUS_OK) {
+ DC_ERROR("Error querying DMUB boot status: error=%d\n", status);
+ return false;
+ }
+
+ return boot_status.bits.optimized_init_done;
+}
- return dmub_srv_send_gpint_command(
- dmub, DMUB_GPINT__IDLE_OPT_NOTIFY_STREAM_MASK,
- stream_mask, timeout) == DMUB_STATUS_OK;
+bool dc_dmub_srv_notify_stream_mask(struct dc_dmub_srv *dc_dmub_srv,
+ unsigned int stream_mask)
+{
+ if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+ return false;
+
+ return dc_wake_and_execute_gpint(dc_dmub_srv->ctx, DMUB_GPINT__IDLE_OPT_NOTIFY_STREAM_MASK,
+ stream_mask, NULL, DM_DMUB_WAIT_TYPE_WAIT);
}
bool dc_dmub_srv_is_restore_required(struct dc_dmub_srv *dc_dmub_srv)
@@ -219,93 +373,1998 @@ void dc_dmub_trace_event_control(struct dc *dc, bool enable)
dm_helpers_dmub_outbox_interrupt_control(dc->ctx, enable);
}
-bool dc_dmub_srv_get_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv, struct dmub_diagnostic_data *diag_data)
+void dc_dmub_srv_drr_update_cmd(struct dc *dc, uint32_t tg_inst, uint32_t vtotal_min, uint32_t vtotal_max)
{
- if (!dc_dmub_srv || !dc_dmub_srv->dmub || !diag_data)
+ union dmub_rb_cmd cmd = { 0 };
+
+ cmd.drr_update.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
+ cmd.drr_update.header.sub_type = DMUB_CMD__FAMS_DRR_UPDATE;
+ cmd.drr_update.dmub_optc_state_req.v_total_max = vtotal_max;
+ cmd.drr_update.dmub_optc_state_req.v_total_min = vtotal_min;
+ cmd.drr_update.dmub_optc_state_req.tg_inst = tg_inst;
+
+ cmd.drr_update.header.payload_bytes = sizeof(cmd.drr_update) - sizeof(cmd.drr_update.header);
+
+ // Send the command to the DMCUB.
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+void dc_dmub_srv_set_drr_manual_trigger_cmd(struct dc *dc, uint32_t tg_inst)
+{
+ union dmub_rb_cmd cmd = { 0 };
+
+ cmd.drr_update.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
+ cmd.drr_update.header.sub_type = DMUB_CMD__FAMS_SET_MANUAL_TRIGGER;
+ cmd.drr_update.dmub_optc_state_req.tg_inst = tg_inst;
+
+ cmd.drr_update.header.payload_bytes = sizeof(cmd.drr_update) - sizeof(cmd.drr_update.header);
+
+ // Send the command to the DMCUB.
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+static uint8_t dc_dmub_srv_get_pipes_for_stream(struct dc *dc, struct dc_stream_state *stream)
+{
+ uint8_t pipes = 0;
+ int i = 0;
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+
+ if (pipe->stream == stream && pipe->stream_res.tg)
+ pipes = i;
+ }
+ return pipes;
+}
+
+static void dc_dmub_srv_populate_fams_pipe_info(struct dc *dc, struct dc_state *context,
+ struct pipe_ctx *head_pipe,
+ struct dmub_cmd_fw_assisted_mclk_switch_pipe_data *fams_pipe_data)
+{
+ int j;
+ int pipe_idx = 0;
+
+ fams_pipe_data->pipe_index[pipe_idx++] = head_pipe->plane_res.hubp->inst;
+ for (j = 0; j < dc->res_pool->pipe_count; j++) {
+ struct pipe_ctx *split_pipe = &context->res_ctx.pipe_ctx[j];
+
+ if (split_pipe->stream == head_pipe->stream && (split_pipe->top_pipe || split_pipe->prev_odm_pipe)) {
+ fams_pipe_data->pipe_index[pipe_idx++] = split_pipe->plane_res.hubp->inst;
+ }
+ }
+ fams_pipe_data->pipe_count = pipe_idx;
+}
+
+bool dc_dmub_srv_p_state_delegate(struct dc *dc, bool should_manage_pstate, struct dc_state *context)
+{
+ union dmub_rb_cmd cmd = { 0 };
+ struct dmub_cmd_fw_assisted_mclk_switch_config *config_data = &cmd.fw_assisted_mclk_switch.config_data;
+ int i = 0, k = 0;
+ int ramp_up_num_steps = 1; // TODO: Ramp is currently disabled. Reenable it.
+ uint8_t visual_confirm_enabled;
+ struct dc_stream_status *stream_status = NULL;
+
+ if (dc == NULL)
return false;
- return dmub_srv_get_diagnostic_data(dc_dmub_srv->dmub, diag_data);
+
+ visual_confirm_enabled = dc->debug.visual_confirm == VISUAL_CONFIRM_FAMS;
+
+ // Format command.
+ cmd.fw_assisted_mclk_switch.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
+ cmd.fw_assisted_mclk_switch.header.sub_type = DMUB_CMD__FAMS_SETUP_FW_CTRL;
+ cmd.fw_assisted_mclk_switch.config_data.fams_enabled = should_manage_pstate;
+ cmd.fw_assisted_mclk_switch.config_data.visual_confirm_enabled = visual_confirm_enabled;
+
+ if (should_manage_pstate) {
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe->stream)
+ continue;
+
+ /* If FAMS is being used to support P-State and there is a stream
+ * that does not use FAMS, we are in an FPO + VActive scenario.
+ * Assign vactive stretch margin in this case.
+ */
+ stream_status = dc_state_get_stream_status(context, pipe->stream);
+ if (stream_status && !stream_status->fpo_in_use) {
+ cmd.fw_assisted_mclk_switch.config_data.vactive_stretch_margin_us = dc->debug.fpo_vactive_margin_us;
+ break;
+ }
+ }
+ }
+
+ for (i = 0, k = 0; context && i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!resource_is_pipe_type(pipe, OTG_MASTER))
+ continue;
+
+ stream_status = dc_state_get_stream_status(context, pipe->stream);
+ if (stream_status && stream_status->fpo_in_use) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ uint8_t min_refresh_in_hz = (pipe->stream->timing.min_refresh_in_uhz + 999999) / 1000000;
+
+ config_data->pipe_data[k].pix_clk_100hz = pipe->stream->timing.pix_clk_100hz;
+ config_data->pipe_data[k].min_refresh_in_hz = min_refresh_in_hz;
+ config_data->pipe_data[k].max_ramp_step = ramp_up_num_steps;
+ config_data->pipe_data[k].pipes = dc_dmub_srv_get_pipes_for_stream(dc, pipe->stream);
+ dc_dmub_srv_populate_fams_pipe_info(dc, context, pipe, &config_data->pipe_data[k]);
+ k++;
+ }
+ }
+ cmd.fw_assisted_mclk_switch.header.payload_bytes =
+ sizeof(cmd.fw_assisted_mclk_switch) - sizeof(cmd.fw_assisted_mclk_switch.header);
+
+ // Send the command to the DMCUB.
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+
+ return true;
+}
+
+void dc_dmub_srv_query_caps_cmd(struct dc_dmub_srv *dc_dmub_srv)
+{
+ union dmub_rb_cmd cmd = { 0 };
+
+ if (dc_dmub_srv->ctx->dc->debug.dmcub_emulation)
+ return;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ /* Prepare fw command */
+ cmd.query_feature_caps.header.type = DMUB_CMD__QUERY_FEATURE_CAPS;
+ cmd.query_feature_caps.header.sub_type = 0;
+ cmd.query_feature_caps.header.ret_status = 1;
+ cmd.query_feature_caps.header.payload_bytes = sizeof(struct dmub_cmd_query_feature_caps_data);
+
+ /* If command was processed, copy feature caps to dmub srv */
+ if (dc_wake_and_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
+ cmd.query_feature_caps.header.ret_status == 0) {
+ memcpy(&dc_dmub_srv->dmub->feature_caps,
+ &cmd.query_feature_caps.query_feature_caps_data,
+ sizeof(struct dmub_feature_caps));
+ }
+}
+
+void dc_dmub_srv_get_visual_confirm_color_cmd(struct dc *dc, struct pipe_ctx *pipe_ctx)
+{
+ union dmub_rb_cmd cmd = { 0 };
+ unsigned int panel_inst = 0;
+
+ if (!dc_get_edp_link_panel_inst(dc, pipe_ctx->stream->link, &panel_inst) &&
+ dc->debug.visual_confirm == VISUAL_CONFIRM_DISABLE)
+ return;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ // Prepare fw command
+ cmd.visual_confirm_color.header.type = DMUB_CMD__GET_VISUAL_CONFIRM_COLOR;
+ cmd.visual_confirm_color.header.sub_type = 0;
+ cmd.visual_confirm_color.header.ret_status = 1;
+ cmd.visual_confirm_color.header.payload_bytes = sizeof(struct dmub_cmd_visual_confirm_color_data);
+ cmd.visual_confirm_color.visual_confirm_color_data.visual_confirm_color.panel_inst = panel_inst;
+
+ // If command was processed, copy feature caps to dmub srv
+ if (dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
+ cmd.visual_confirm_color.header.ret_status == 0) {
+ memcpy(&dc->ctx->dmub_srv->dmub->visual_confirm_color,
+ &cmd.visual_confirm_color.visual_confirm_color_data,
+ sizeof(struct dmub_visual_confirm_color));
+ }
+}
+
+/**
+ * populate_subvp_cmd_drr_info - Helper to populate DRR pipe info for the DMCUB subvp command
+ *
+ * @dc: [in] pointer to dc object
+ * @subvp_pipe: [in] pipe_ctx for the SubVP pipe
+ * @vblank_pipe: [in] pipe_ctx for the DRR pipe
+ * @pipe_data: [in] Pipe data which stores the VBLANK/DRR info
+ * @context: [in] DC state for access to phantom stream
+ *
+ * Populate the DMCUB SubVP command with DRR pipe info. All the information
+ * required for calculating the SubVP + DRR microschedule is populated here.
+ *
+ * High level algorithm:
+ * 1. Get timing for SubVP pipe, phantom pipe, and DRR pipe
+ * 2. Calculate the min and max vtotal which supports SubVP + DRR microschedule
+ * 3. Populate the drr_info with the min and max supported vtotal values
+ */
+static void populate_subvp_cmd_drr_info(struct dc *dc,
+ struct dc_state *context,
+ struct pipe_ctx *subvp_pipe,
+ struct pipe_ctx *vblank_pipe,
+ struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data)
+{
+ struct dc_stream_state *phantom_stream = dc_state_get_paired_subvp_stream(context, subvp_pipe->stream);
+ struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing;
+ struct dc_crtc_timing *phantom_timing;
+ struct dc_crtc_timing *drr_timing = &vblank_pipe->stream->timing;
+ uint16_t drr_frame_us = 0;
+ uint16_t min_drr_supported_us = 0;
+ uint16_t max_drr_supported_us = 0;
+ uint16_t max_drr_vblank_us = 0;
+ uint16_t max_drr_mallregion_us = 0;
+ uint16_t mall_region_us = 0;
+ uint16_t prefetch_us = 0;
+ uint16_t subvp_active_us = 0;
+ uint16_t drr_active_us = 0;
+ uint16_t min_vtotal_supported = 0;
+ uint16_t max_vtotal_supported = 0;
+
+ if (!phantom_stream)
+ return;
+
+ phantom_timing = &phantom_stream->timing;
+
+ pipe_data->pipe_config.vblank_data.drr_info.drr_in_use = true;
+ pipe_data->pipe_config.vblank_data.drr_info.use_ramping = false; // for now don't use ramping
+ pipe_data->pipe_config.vblank_data.drr_info.drr_window_size_ms = 4; // hardcode 4ms DRR window for now
+
+ drr_frame_us = div64_u64(((uint64_t)drr_timing->v_total * drr_timing->h_total * 1000000),
+ (((uint64_t)drr_timing->pix_clk_100hz * 100)));
+ // P-State allow width and FW delays already included phantom_timing->v_addressable
+ mall_region_us = div64_u64(((uint64_t)phantom_timing->v_addressable * phantom_timing->h_total * 1000000),
+ (((uint64_t)phantom_timing->pix_clk_100hz * 100)));
+ min_drr_supported_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US;
+ min_vtotal_supported = div64_u64(((uint64_t)drr_timing->pix_clk_100hz * 100 * min_drr_supported_us),
+ (((uint64_t)drr_timing->h_total * 1000000)));
+
+ prefetch_us = div64_u64(((uint64_t)(phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total * 1000000),
+ (((uint64_t)phantom_timing->pix_clk_100hz * 100) + dc->caps.subvp_prefetch_end_to_mall_start_us));
+ subvp_active_us = div64_u64(((uint64_t)main_timing->v_addressable * main_timing->h_total * 1000000),
+ (((uint64_t)main_timing->pix_clk_100hz * 100)));
+ drr_active_us = div64_u64(((uint64_t)drr_timing->v_addressable * drr_timing->h_total * 1000000),
+ (((uint64_t)drr_timing->pix_clk_100hz * 100)));
+ max_drr_vblank_us = div64_u64((subvp_active_us - prefetch_us -
+ dc->caps.subvp_fw_processing_delay_us - drr_active_us), 2) + drr_active_us;
+ max_drr_mallregion_us = subvp_active_us - prefetch_us - mall_region_us - dc->caps.subvp_fw_processing_delay_us;
+ max_drr_supported_us = max_drr_vblank_us > max_drr_mallregion_us ? max_drr_vblank_us : max_drr_mallregion_us;
+ max_vtotal_supported = div64_u64(((uint64_t)drr_timing->pix_clk_100hz * 100 * max_drr_supported_us),
+ (((uint64_t)drr_timing->h_total * 1000000)));
+
+ /* When calculating the max vtotal supported for SubVP + DRR cases, add
+ * margin due to possible rounding errors (being off by 1 line in the
+ * FW calculation can incorrectly push the P-State switch to wait 1 frame
+ * longer).
+ */
+ max_vtotal_supported = max_vtotal_supported - dc->caps.subvp_drr_max_vblank_margin_us;
+
+ pipe_data->pipe_config.vblank_data.drr_info.min_vtotal_supported = min_vtotal_supported;
+ pipe_data->pipe_config.vblank_data.drr_info.max_vtotal_supported = max_vtotal_supported;
+ pipe_data->pipe_config.vblank_data.drr_info.drr_vblank_start_margin = dc->caps.subvp_drr_vblank_start_margin_us;
+}
+
+/**
+ * populate_subvp_cmd_vblank_pipe_info - Helper to populate VBLANK pipe info for the DMUB subvp command
+ *
+ * @dc: [in] current dc state
+ * @context: [in] new dc state
+ * @cmd: [in] DMUB cmd to be populated with SubVP info
+ * @vblank_pipe: [in] pipe_ctx for the VBLANK pipe
+ * @cmd_pipe_index: [in] index for the pipe array in DMCUB SubVP cmd
+ *
+ * Populate the DMCUB SubVP command with VBLANK pipe info. All the information
+ * required to calculate the microschedule for SubVP + VBLANK case is stored in
+ * the pipe_data (subvp_data and vblank_data). Also check if the VBLANK pipe
+ * is a DRR display -- if it is make a call to populate drr_info.
+ */
+static void populate_subvp_cmd_vblank_pipe_info(struct dc *dc,
+ struct dc_state *context,
+ union dmub_rb_cmd *cmd,
+ struct pipe_ctx *vblank_pipe,
+ uint8_t cmd_pipe_index)
+{
+ uint32_t i;
+ struct pipe_ctx *pipe = NULL;
+ struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data =
+ &cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[cmd_pipe_index];
+
+ // Find the SubVP pipe
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+
+ // We check for master pipe, but it shouldn't matter since we only need
+ // the pipe for timing info (stream should be same for any pipe splits)
+ if (!resource_is_pipe_type(pipe, OTG_MASTER) ||
+ !resource_is_pipe_type(pipe, DPP_PIPE))
+ continue;
+
+ // Find the SubVP pipe
+ if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN)
+ break;
+ }
+
+ pipe_data->mode = VBLANK;
+ pipe_data->pipe_config.vblank_data.pix_clk_100hz = vblank_pipe->stream->timing.pix_clk_100hz;
+ pipe_data->pipe_config.vblank_data.vblank_start = vblank_pipe->stream->timing.v_total -
+ vblank_pipe->stream->timing.v_front_porch;
+ pipe_data->pipe_config.vblank_data.vtotal = vblank_pipe->stream->timing.v_total;
+ pipe_data->pipe_config.vblank_data.htotal = vblank_pipe->stream->timing.h_total;
+ pipe_data->pipe_config.vblank_data.vblank_pipe_index = vblank_pipe->pipe_idx;
+ pipe_data->pipe_config.vblank_data.vstartup_start = vblank_pipe->pipe_dlg_param.vstartup_start;
+ pipe_data->pipe_config.vblank_data.vblank_end =
+ vblank_pipe->stream->timing.v_total - vblank_pipe->stream->timing.v_front_porch - vblank_pipe->stream->timing.v_addressable;
+
+ if (vblank_pipe->stream->ignore_msa_timing_param &&
+ (vblank_pipe->stream->allow_freesync || vblank_pipe->stream->vrr_active_variable || vblank_pipe->stream->vrr_active_fixed))
+ populate_subvp_cmd_drr_info(dc, context, pipe, vblank_pipe, pipe_data);
+}
+
+/**
+ * update_subvp_prefetch_end_to_mall_start - Helper for SubVP + SubVP case
+ *
+ * @dc: [in] current dc state
+ * @context: [in] new dc state
+ * @cmd: [in] DMUB cmd to be populated with SubVP info
+ * @subvp_pipes: [in] Array of SubVP pipes (should always be length 2)
+ *
+ * For SubVP + SubVP, we use a single vertical interrupt to start the
+ * microschedule for both SubVP pipes. In order for this to work correctly, the
+ * MALL REGION of both SubVP pipes must start at the same time. This function
+ * lengthens the prefetch end to mall start delay of the SubVP pipe that has
+ * the shorter prefetch so that both MALL REGION's will start at the same time.
+ */
+static void update_subvp_prefetch_end_to_mall_start(struct dc *dc,
+ struct dc_state *context,
+ union dmub_rb_cmd *cmd,
+ struct pipe_ctx *subvp_pipes[])
+{
+ uint32_t subvp0_prefetch_us = 0;
+ uint32_t subvp1_prefetch_us = 0;
+ uint32_t prefetch_delta_us = 0;
+ struct dc_stream_state *phantom_stream0 = NULL;
+ struct dc_stream_state *phantom_stream1 = NULL;
+ struct dc_crtc_timing *phantom_timing0 = NULL;
+ struct dc_crtc_timing *phantom_timing1 = NULL;
+ struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data = NULL;
+
+ phantom_stream0 = dc_state_get_paired_subvp_stream(context, subvp_pipes[0]->stream);
+ if (!phantom_stream0)
+ return;
+
+ phantom_stream1 = dc_state_get_paired_subvp_stream(context, subvp_pipes[1]->stream);
+ if (!phantom_stream1)
+ return;
+
+ phantom_timing0 = &phantom_stream0->timing;
+ phantom_timing1 = &phantom_stream1->timing;
+
+ subvp0_prefetch_us = div64_u64(((uint64_t)(phantom_timing0->v_total - phantom_timing0->v_front_porch) *
+ (uint64_t)phantom_timing0->h_total * 1000000),
+ (((uint64_t)phantom_timing0->pix_clk_100hz * 100) + dc->caps.subvp_prefetch_end_to_mall_start_us));
+ subvp1_prefetch_us = div64_u64(((uint64_t)(phantom_timing1->v_total - phantom_timing1->v_front_porch) *
+ (uint64_t)phantom_timing1->h_total * 1000000),
+ (((uint64_t)phantom_timing1->pix_clk_100hz * 100) + dc->caps.subvp_prefetch_end_to_mall_start_us));
+
+ // Whichever SubVP PIPE has the smaller prefetch (including the prefetch end to mall start time)
+ // should increase it's prefetch time to match the other
+ if (subvp0_prefetch_us > subvp1_prefetch_us) {
+ pipe_data = &cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[1];
+ prefetch_delta_us = subvp0_prefetch_us - subvp1_prefetch_us;
+ pipe_data->pipe_config.subvp_data.prefetch_to_mall_start_lines =
+ div64_u64(((uint64_t)(dc->caps.subvp_prefetch_end_to_mall_start_us + prefetch_delta_us) *
+ ((uint64_t)phantom_timing1->pix_clk_100hz * 100) + ((uint64_t)phantom_timing1->h_total * 1000000 - 1)),
+ ((uint64_t)phantom_timing1->h_total * 1000000));
+
+ } else if (subvp1_prefetch_us > subvp0_prefetch_us) {
+ pipe_data = &cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[0];
+ prefetch_delta_us = subvp1_prefetch_us - subvp0_prefetch_us;
+ pipe_data->pipe_config.subvp_data.prefetch_to_mall_start_lines =
+ div64_u64(((uint64_t)(dc->caps.subvp_prefetch_end_to_mall_start_us + prefetch_delta_us) *
+ ((uint64_t)phantom_timing0->pix_clk_100hz * 100) + ((uint64_t)phantom_timing0->h_total * 1000000 - 1)),
+ ((uint64_t)phantom_timing0->h_total * 1000000));
+ }
+}
+
+/**
+ * populate_subvp_cmd_pipe_info - Helper to populate the SubVP pipe info for the DMUB subvp command
+ *
+ * @dc: [in] current dc state
+ * @context: [in] new dc state
+ * @cmd: [in] DMUB cmd to be populated with SubVP info
+ * @subvp_pipe: [in] pipe_ctx for the SubVP pipe
+ * @cmd_pipe_index: [in] index for the pipe array in DMCUB SubVP cmd
+ *
+ * Populate the DMCUB SubVP command with SubVP pipe info. All the information
+ * required to calculate the microschedule for the SubVP pipe is stored in the
+ * pipe_data of the DMCUB SubVP command.
+ */
+static void populate_subvp_cmd_pipe_info(struct dc *dc,
+ struct dc_state *context,
+ union dmub_rb_cmd *cmd,
+ struct pipe_ctx *subvp_pipe,
+ uint8_t cmd_pipe_index)
+{
+ uint32_t j;
+ struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data =
+ &cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[cmd_pipe_index];
+ struct dc_stream_state *phantom_stream = dc_state_get_paired_subvp_stream(context, subvp_pipe->stream);
+ struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing;
+ struct dc_crtc_timing *phantom_timing;
+ uint32_t out_num_stream, out_den_stream, out_num_plane, out_den_plane, out_num, out_den;
+
+ if (!phantom_stream)
+ return;
+
+ phantom_timing = &phantom_stream->timing;
+
+ pipe_data->mode = SUBVP;
+ pipe_data->pipe_config.subvp_data.pix_clk_100hz = subvp_pipe->stream->timing.pix_clk_100hz;
+ pipe_data->pipe_config.subvp_data.htotal = subvp_pipe->stream->timing.h_total;
+ pipe_data->pipe_config.subvp_data.vtotal = subvp_pipe->stream->timing.v_total;
+ pipe_data->pipe_config.subvp_data.main_vblank_start =
+ main_timing->v_total - main_timing->v_front_porch;
+ pipe_data->pipe_config.subvp_data.main_vblank_end =
+ main_timing->v_total - main_timing->v_front_porch - main_timing->v_addressable;
+ pipe_data->pipe_config.subvp_data.mall_region_lines = phantom_timing->v_addressable;
+ pipe_data->pipe_config.subvp_data.main_pipe_index = subvp_pipe->stream_res.tg->inst;
+ pipe_data->pipe_config.subvp_data.is_drr = subvp_pipe->stream->ignore_msa_timing_param &&
+ (subvp_pipe->stream->allow_freesync || subvp_pipe->stream->vrr_active_variable || subvp_pipe->stream->vrr_active_fixed);
+
+ /* Calculate the scaling factor from the src and dst height.
+ * e.g. If 3840x2160 being downscaled to 1920x1080, the scaling factor is 1/2.
+ * Reduce the fraction 1080/2160 = 1/2 for the "scaling factor"
+ *
+ * Make sure to combine stream and plane scaling together.
+ */
+ reduce_fraction(subvp_pipe->stream->src.height, subvp_pipe->stream->dst.height,
+ &out_num_stream, &out_den_stream);
+ reduce_fraction(subvp_pipe->plane_state->src_rect.height, subvp_pipe->plane_state->dst_rect.height,
+ &out_num_plane, &out_den_plane);
+ reduce_fraction(out_num_stream * out_num_plane, out_den_stream * out_den_plane, &out_num, &out_den);
+ pipe_data->pipe_config.subvp_data.scale_factor_numerator = out_num;
+ pipe_data->pipe_config.subvp_data.scale_factor_denominator = out_den;
+
+ // Prefetch lines is equal to VACTIVE + BP + VSYNC
+ pipe_data->pipe_config.subvp_data.prefetch_lines =
+ phantom_timing->v_total - phantom_timing->v_front_porch;
+
+ // Round up
+ pipe_data->pipe_config.subvp_data.prefetch_to_mall_start_lines =
+ div64_u64(((uint64_t)dc->caps.subvp_prefetch_end_to_mall_start_us * ((uint64_t)phantom_timing->pix_clk_100hz * 100) +
+ ((uint64_t)phantom_timing->h_total * 1000000 - 1)), ((uint64_t)phantom_timing->h_total * 1000000));
+ pipe_data->pipe_config.subvp_data.processing_delay_lines =
+ div64_u64(((uint64_t)(dc->caps.subvp_fw_processing_delay_us) * ((uint64_t)phantom_timing->pix_clk_100hz * 100) +
+ ((uint64_t)phantom_timing->h_total * 1000000 - 1)), ((uint64_t)phantom_timing->h_total * 1000000));
+
+ if (subvp_pipe->bottom_pipe) {
+ pipe_data->pipe_config.subvp_data.main_split_pipe_index = subvp_pipe->bottom_pipe->pipe_idx;
+ } else if (subvp_pipe->next_odm_pipe) {
+ pipe_data->pipe_config.subvp_data.main_split_pipe_index = subvp_pipe->next_odm_pipe->pipe_idx;
+ } else {
+ pipe_data->pipe_config.subvp_data.main_split_pipe_index = 0xF;
+ }
+
+ // Find phantom pipe index based on phantom stream
+ for (j = 0; j < dc->res_pool->pipe_count; j++) {
+ struct pipe_ctx *phantom_pipe = &context->res_ctx.pipe_ctx[j];
+
+ if (resource_is_pipe_type(phantom_pipe, OTG_MASTER) &&
+ phantom_pipe->stream == dc_state_get_paired_subvp_stream(context, subvp_pipe->stream)) {
+ pipe_data->pipe_config.subvp_data.phantom_pipe_index = phantom_pipe->stream_res.tg->inst;
+ if (phantom_pipe->bottom_pipe) {
+ pipe_data->pipe_config.subvp_data.phantom_split_pipe_index = phantom_pipe->bottom_pipe->plane_res.hubp->inst;
+ } else if (phantom_pipe->next_odm_pipe) {
+ pipe_data->pipe_config.subvp_data.phantom_split_pipe_index = phantom_pipe->next_odm_pipe->plane_res.hubp->inst;
+ } else {
+ pipe_data->pipe_config.subvp_data.phantom_split_pipe_index = 0xF;
+ }
+ break;
+ }
+ }
+}
+
+/**
+ * dc_dmub_setup_subvp_dmub_command - Populate the DMCUB SubVP command
+ *
+ * @dc: [in] current dc state
+ * @context: [in] new dc state
+ * @enable: [in] if true enables the pipes population
+ *
+ * This function loops through each pipe and populates the DMUB SubVP CMD info
+ * based on the pipe (e.g. SubVP, VBLANK).
+ */
+void dc_dmub_setup_subvp_dmub_command(struct dc *dc,
+ struct dc_state *context,
+ bool enable)
+{
+ uint8_t cmd_pipe_index = 0;
+ uint32_t i;
+ uint8_t subvp_count = 0;
+ union dmub_rb_cmd cmd;
+ struct pipe_ctx *subvp_pipes[2];
+ uint32_t wm_val_refclk = 0;
+ enum mall_stream_type pipe_mall_type;
+
+ memset(&cmd, 0, sizeof(cmd));
+ // FW command for SUBVP
+ cmd.fw_assisted_mclk_switch_v2.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
+ cmd.fw_assisted_mclk_switch_v2.header.sub_type = DMUB_CMD__HANDLE_SUBVP_CMD;
+ cmd.fw_assisted_mclk_switch_v2.header.payload_bytes =
+ sizeof(cmd.fw_assisted_mclk_switch_v2) - sizeof(cmd.fw_assisted_mclk_switch_v2.header);
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ /* For SubVP pipe count, only count the top most (ODM / MPC) pipe
+ */
+ if (resource_is_pipe_type(pipe, OTG_MASTER) &&
+ resource_is_pipe_type(pipe, DPP_PIPE) &&
+ dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN)
+ subvp_pipes[subvp_count++] = pipe;
+ }
+
+ if (enable) {
+ // For each pipe that is a "main" SUBVP pipe, fill in pipe data for DMUB SUBVP cmd
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe);
+
+ if (!pipe->stream)
+ continue;
+
+ /* When populating subvp cmd info, only pass in the top most (ODM / MPC) pipe.
+ * Any ODM or MPC splits being used in SubVP will be handled internally in
+ * populate_subvp_cmd_pipe_info
+ */
+ if (resource_is_pipe_type(pipe, OTG_MASTER) &&
+ resource_is_pipe_type(pipe, DPP_PIPE) &&
+ pipe_mall_type == SUBVP_MAIN) {
+ populate_subvp_cmd_pipe_info(dc, context, &cmd, pipe, cmd_pipe_index++);
+ } else if (resource_is_pipe_type(pipe, OTG_MASTER) &&
+ resource_is_pipe_type(pipe, DPP_PIPE) &&
+ pipe_mall_type == SUBVP_NONE) {
+ // Don't need to check for ActiveDRAMClockChangeMargin < 0, not valid in cases where
+ // we run through DML without calculating "natural" P-state support
+ populate_subvp_cmd_vblank_pipe_info(dc, context, &cmd, pipe, cmd_pipe_index++);
+
+ }
+ }
+ if (subvp_count == 2) {
+ update_subvp_prefetch_end_to_mall_start(dc, context, &cmd, subvp_pipes);
+ }
+ cmd.fw_assisted_mclk_switch_v2.config_data.pstate_allow_width_us = dc->caps.subvp_pstate_allow_width_us;
+ cmd.fw_assisted_mclk_switch_v2.config_data.vertical_int_margin_us = dc->caps.subvp_vertical_int_margin_us;
+
+ // Store the original watermark value for this SubVP config so we can lower it when the
+ // MCLK switch starts
+ wm_val_refclk = context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns *
+ (dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000) / 1000;
+
+ cmd.fw_assisted_mclk_switch_v2.config_data.watermark_a_cache = wm_val_refclk < 0xFFFF ? wm_val_refclk : 0xFFFF;
+ }
+
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+bool dc_dmub_srv_get_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv)
+{
+ if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+ return false;
+ return dmub_srv_get_diagnostic_data(dc_dmub_srv->dmub);
}
void dc_dmub_srv_log_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv)
{
- struct dmub_diagnostic_data diag_data = {0};
+ uint32_t i;
if (!dc_dmub_srv || !dc_dmub_srv->dmub) {
DC_LOG_ERROR("%s: invalid parameters.", __func__);
return;
}
- if (!dc_dmub_srv_get_diagnostic_data(dc_dmub_srv, &diag_data)) {
+ DC_LOG_ERROR("%s: DMCUB error - collecting diagnostic data\n", __func__);
+
+ if (!dc_dmub_srv_get_diagnostic_data(dc_dmub_srv)) {
DC_LOG_ERROR("%s: dc_dmub_srv_get_diagnostic_data failed.", __func__);
return;
}
- DC_LOG_DEBUG(
- "DMCUB STATE\n"
- " dmcub_version : %08x\n"
- " scratch [0] : %08x\n"
- " scratch [1] : %08x\n"
- " scratch [2] : %08x\n"
- " scratch [3] : %08x\n"
- " scratch [4] : %08x\n"
- " scratch [5] : %08x\n"
- " scratch [6] : %08x\n"
- " scratch [7] : %08x\n"
- " scratch [8] : %08x\n"
- " scratch [9] : %08x\n"
- " scratch [10] : %08x\n"
- " scratch [11] : %08x\n"
- " scratch [12] : %08x\n"
- " scratch [13] : %08x\n"
- " scratch [14] : %08x\n"
- " scratch [15] : %08x\n"
- " pc : %08x\n"
- " unk_fault_addr : %08x\n"
- " inst_fault_addr : %08x\n"
- " data_fault_addr : %08x\n"
- " inbox1_rptr : %08x\n"
- " inbox1_wptr : %08x\n"
- " inbox1_size : %08x\n"
- " inbox0_rptr : %08x\n"
- " inbox0_wptr : %08x\n"
- " inbox0_size : %08x\n"
- " is_enabled : %d\n"
- " is_soft_reset : %d\n"
- " is_secure_reset : %d\n"
- " is_traceport_en : %d\n"
- " is_cw0_en : %d\n"
- " is_cw6_en : %d\n",
- diag_data.dmcub_version,
- diag_data.scratch[0],
- diag_data.scratch[1],
- diag_data.scratch[2],
- diag_data.scratch[3],
- diag_data.scratch[4],
- diag_data.scratch[5],
- diag_data.scratch[6],
- diag_data.scratch[7],
- diag_data.scratch[8],
- diag_data.scratch[9],
- diag_data.scratch[10],
- diag_data.scratch[11],
- diag_data.scratch[12],
- diag_data.scratch[13],
- diag_data.scratch[14],
- diag_data.scratch[15],
- diag_data.pc,
- diag_data.undefined_address_fault_addr,
- diag_data.inst_fetch_fault_addr,
- diag_data.data_write_fault_addr,
- diag_data.inbox1_rptr,
- diag_data.inbox1_wptr,
- diag_data.inbox1_size,
- diag_data.inbox0_rptr,
- diag_data.inbox0_wptr,
- diag_data.inbox0_size,
- diag_data.is_dmcub_enabled,
- diag_data.is_dmcub_soft_reset,
- diag_data.is_dmcub_secure_reset,
- diag_data.is_traceport_en,
- diag_data.is_cw0_enabled,
- diag_data.is_cw6_enabled);
+ DC_LOG_DEBUG("DMCUB STATE:");
+ DC_LOG_DEBUG(" dmcub_version : %08x", dc_dmub_srv->dmub->debug.dmcub_version);
+ DC_LOG_DEBUG(" scratch [0] : %08x", dc_dmub_srv->dmub->debug.scratch[0]);
+ DC_LOG_DEBUG(" scratch [1] : %08x", dc_dmub_srv->dmub->debug.scratch[1]);
+ DC_LOG_DEBUG(" scratch [2] : %08x", dc_dmub_srv->dmub->debug.scratch[2]);
+ DC_LOG_DEBUG(" scratch [3] : %08x", dc_dmub_srv->dmub->debug.scratch[3]);
+ DC_LOG_DEBUG(" scratch [4] : %08x", dc_dmub_srv->dmub->debug.scratch[4]);
+ DC_LOG_DEBUG(" scratch [5] : %08x", dc_dmub_srv->dmub->debug.scratch[5]);
+ DC_LOG_DEBUG(" scratch [6] : %08x", dc_dmub_srv->dmub->debug.scratch[6]);
+ DC_LOG_DEBUG(" scratch [7] : %08x", dc_dmub_srv->dmub->debug.scratch[7]);
+ DC_LOG_DEBUG(" scratch [8] : %08x", dc_dmub_srv->dmub->debug.scratch[8]);
+ DC_LOG_DEBUG(" scratch [9] : %08x", dc_dmub_srv->dmub->debug.scratch[9]);
+ DC_LOG_DEBUG(" scratch [10] : %08x", dc_dmub_srv->dmub->debug.scratch[10]);
+ DC_LOG_DEBUG(" scratch [11] : %08x", dc_dmub_srv->dmub->debug.scratch[11]);
+ DC_LOG_DEBUG(" scratch [12] : %08x", dc_dmub_srv->dmub->debug.scratch[12]);
+ DC_LOG_DEBUG(" scratch [13] : %08x", dc_dmub_srv->dmub->debug.scratch[13]);
+ DC_LOG_DEBUG(" scratch [14] : %08x", dc_dmub_srv->dmub->debug.scratch[14]);
+ DC_LOG_DEBUG(" scratch [15] : %08x", dc_dmub_srv->dmub->debug.scratch[15]);
+ for (i = 0; i < DMUB_PC_SNAPSHOT_COUNT; i++)
+ DC_LOG_DEBUG(" pc[%d] : %08x", i, dc_dmub_srv->dmub->debug.pc[i]);
+ DC_LOG_DEBUG(" unk_fault_addr : %08x", dc_dmub_srv->dmub->debug.undefined_address_fault_addr);
+ DC_LOG_DEBUG(" inst_fault_addr : %08x", dc_dmub_srv->dmub->debug.inst_fetch_fault_addr);
+ DC_LOG_DEBUG(" data_fault_addr : %08x", dc_dmub_srv->dmub->debug.data_write_fault_addr);
+ DC_LOG_DEBUG(" inbox1_rptr : %08x", dc_dmub_srv->dmub->debug.inbox1_rptr);
+ DC_LOG_DEBUG(" inbox1_wptr : %08x", dc_dmub_srv->dmub->debug.inbox1_wptr);
+ DC_LOG_DEBUG(" inbox1_size : %08x", dc_dmub_srv->dmub->debug.inbox1_size);
+ DC_LOG_DEBUG(" inbox0_rptr : %08x", dc_dmub_srv->dmub->debug.inbox0_rptr);
+ DC_LOG_DEBUG(" inbox0_wptr : %08x", dc_dmub_srv->dmub->debug.inbox0_wptr);
+ DC_LOG_DEBUG(" inbox0_size : %08x", dc_dmub_srv->dmub->debug.inbox0_size);
+ DC_LOG_DEBUG(" outbox1_rptr : %08x", dc_dmub_srv->dmub->debug.outbox1_rptr);
+ DC_LOG_DEBUG(" outbox1_wptr : %08x", dc_dmub_srv->dmub->debug.outbox1_wptr);
+ DC_LOG_DEBUG(" outbox1_size : %08x", dc_dmub_srv->dmub->debug.outbox1_size);
+ DC_LOG_DEBUG(" is_enabled : %d", dc_dmub_srv->dmub->debug.is_dmcub_enabled);
+ DC_LOG_DEBUG(" is_soft_reset : %d", dc_dmub_srv->dmub->debug.is_dmcub_soft_reset);
+ DC_LOG_DEBUG(" is_secure_reset : %d", dc_dmub_srv->dmub->debug.is_dmcub_secure_reset);
+ DC_LOG_DEBUG(" is_traceport_en : %d", dc_dmub_srv->dmub->debug.is_traceport_en);
+ DC_LOG_DEBUG(" is_cw0_en : %d", dc_dmub_srv->dmub->debug.is_cw0_enabled);
+ DC_LOG_DEBUG(" is_cw6_en : %d", dc_dmub_srv->dmub->debug.is_cw6_enabled);
+}
+
+static bool dc_dmub_should_update_cursor_data(struct pipe_ctx *pipe_ctx)
+{
+ if (pipe_ctx->plane_state != NULL) {
+ if (pipe_ctx->plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE ||
+ resource_can_pipe_disable_cursor(pipe_ctx))
+ return false;
+ }
+
+ if ((pipe_ctx->stream->link->psr_settings.psr_version == DC_PSR_VERSION_SU_1 ||
+ pipe_ctx->stream->link->psr_settings.psr_version == DC_PSR_VERSION_1) &&
+ pipe_ctx->stream->ctx->dce_version >= DCN_VERSION_3_1)
+ return true;
+
+ if (pipe_ctx->stream->link->replay_settings.config.replay_supported)
+ return true;
+
+ return false;
+}
+
+static void dc_build_cursor_update_payload0(
+ struct pipe_ctx *pipe_ctx, uint8_t p_idx,
+ struct dmub_cmd_update_cursor_payload0 *payload)
+{
+ struct hubp *hubp = pipe_ctx->plane_res.hubp;
+ unsigned int panel_inst = 0;
+
+ if (!dc_get_edp_link_panel_inst(hubp->ctx->dc,
+ pipe_ctx->stream->link, &panel_inst))
+ return;
+
+ /* Payload: Cursor Rect is built from position & attribute
+ * x & y are obtained from postion
+ */
+ payload->cursor_rect.x = hubp->cur_rect.x;
+ payload->cursor_rect.y = hubp->cur_rect.y;
+ /* w & h are obtained from attribute */
+ payload->cursor_rect.width = hubp->cur_rect.w;
+ payload->cursor_rect.height = hubp->cur_rect.h;
+
+ payload->enable = hubp->pos.cur_ctl.bits.cur_enable;
+ payload->pipe_idx = p_idx;
+ payload->cmd_version = DMUB_CMD_PSR_CONTROL_VERSION_1;
+ payload->panel_inst = panel_inst;
+}
+
+static void dc_build_cursor_position_update_payload0(
+ struct dmub_cmd_update_cursor_payload0 *pl, const uint8_t p_idx,
+ const struct hubp *hubp, const struct dpp *dpp)
+{
+ /* Hubp */
+ pl->position_cfg.pHubp.cur_ctl.raw = hubp->pos.cur_ctl.raw;
+ pl->position_cfg.pHubp.position.raw = hubp->pos.position.raw;
+ pl->position_cfg.pHubp.hot_spot.raw = hubp->pos.hot_spot.raw;
+ pl->position_cfg.pHubp.dst_offset.raw = hubp->pos.dst_offset.raw;
+
+ /* dpp */
+ pl->position_cfg.pDpp.cur0_ctl.raw = dpp->pos.cur0_ctl.raw;
+ pl->position_cfg.pipe_idx = p_idx;
+}
+
+static void dc_build_cursor_attribute_update_payload1(
+ struct dmub_cursor_attributes_cfg *pl_A, const uint8_t p_idx,
+ const struct hubp *hubp, const struct dpp *dpp)
+{
+ /* Hubp */
+ pl_A->aHubp.SURFACE_ADDR_HIGH = hubp->att.SURFACE_ADDR_HIGH;
+ pl_A->aHubp.SURFACE_ADDR = hubp->att.SURFACE_ADDR;
+ pl_A->aHubp.cur_ctl.raw = hubp->att.cur_ctl.raw;
+ pl_A->aHubp.size.raw = hubp->att.size.raw;
+ pl_A->aHubp.settings.raw = hubp->att.settings.raw;
+
+ /* dpp */
+ pl_A->aDpp.cur0_ctl.raw = dpp->att.cur0_ctl.raw;
+}
+
+/**
+ * dc_send_update_cursor_info_to_dmu - Populate the DMCUB Cursor update info command
+ *
+ * @pCtx: [in] pipe context
+ * @pipe_idx: [in] pipe index
+ *
+ * This function would store the cursor related information and pass it into
+ * dmub
+ */
+void dc_send_update_cursor_info_to_dmu(
+ struct pipe_ctx *pCtx, uint8_t pipe_idx)
+{
+ union dmub_rb_cmd cmd[2];
+ union dmub_cmd_update_cursor_info_data *update_cursor_info_0 =
+ &cmd[0].update_cursor_info.update_cursor_info_data;
+
+ memset(cmd, 0, sizeof(cmd));
+
+ if (!dc_dmub_should_update_cursor_data(pCtx))
+ return;
+ /*
+ * Since we use multi_cmd_pending for dmub command, the 2nd command is
+ * only assigned to store cursor attributes info.
+ * 1st command can view as 2 parts, 1st is for PSR/Replay data, the other
+ * is to store cursor position info.
+ *
+ * Command heaer type must be the same type if using multi_cmd_pending.
+ * Besides, while process 2nd command in DMU, the sub type is useless.
+ * So it's meanless to pass the sub type header with different type.
+ */
+
+ {
+ /* Build Payload#0 Header */
+ cmd[0].update_cursor_info.header.type = DMUB_CMD__UPDATE_CURSOR_INFO;
+ cmd[0].update_cursor_info.header.payload_bytes =
+ sizeof(cmd[0].update_cursor_info.update_cursor_info_data);
+ cmd[0].update_cursor_info.header.multi_cmd_pending = 1; //To combine multi dmu cmd, 1st cmd
+
+ /* Prepare Payload */
+ dc_build_cursor_update_payload0(pCtx, pipe_idx, &update_cursor_info_0->payload0);
+
+ dc_build_cursor_position_update_payload0(&update_cursor_info_0->payload0, pipe_idx,
+ pCtx->plane_res.hubp, pCtx->plane_res.dpp);
+ }
+ {
+ /* Build Payload#1 Header */
+ cmd[1].update_cursor_info.header.type = DMUB_CMD__UPDATE_CURSOR_INFO;
+ cmd[1].update_cursor_info.header.payload_bytes = sizeof(struct cursor_attributes_cfg);
+ cmd[1].update_cursor_info.header.multi_cmd_pending = 0; //Indicate it's the last command.
+
+ dc_build_cursor_attribute_update_payload1(
+ &cmd[1].update_cursor_info.update_cursor_info_data.payload1.attribute_cfg,
+ pipe_idx, pCtx->plane_res.hubp, pCtx->plane_res.dpp);
+
+ /* Combine 2nd cmds update_curosr_info to DMU */
+ dc_wake_and_execute_dmub_cmd_list(pCtx->stream->ctx, 2, cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ }
+}
+
+bool dc_dmub_check_min_version(struct dmub_srv *srv)
+{
+ if (!srv->hw_funcs.is_psrsu_supported)
+ return true;
+ return srv->hw_funcs.is_psrsu_supported(srv);
+}
+
+void dc_dmub_srv_enable_dpia_trace(const struct dc *dc)
+{
+ struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv;
+
+ if (!dc_dmub_srv || !dc_dmub_srv->dmub) {
+ DC_LOG_ERROR("%s: invalid parameters.", __func__);
+ return;
+ }
+
+ if (!dc_wake_and_execute_gpint(dc->ctx, DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1,
+ 0x0010, NULL, DM_DMUB_WAIT_TYPE_WAIT)) {
+ DC_LOG_ERROR("timeout updating trace buffer mask word\n");
+ return;
+ }
+
+ if (!dc_wake_and_execute_gpint(dc->ctx, DMUB_GPINT__UPDATE_TRACE_BUFFER_MASK,
+ 0x0000, NULL, DM_DMUB_WAIT_TYPE_WAIT)) {
+ DC_LOG_ERROR("timeout updating trace buffer mask word\n");
+ return;
+ }
+
+ DC_LOG_DEBUG("Enabled DPIA trace\n");
+}
+
+void dc_dmub_srv_subvp_save_surf_addr(const struct dc_dmub_srv *dc_dmub_srv, const struct dc_plane_address *addr, uint8_t subvp_index)
+{
+ dmub_srv_subvp_save_surf_addr(dc_dmub_srv->dmub, addr, subvp_index);
+}
+
+void dc_dmub_srv_cursor_offload_init(struct dc *dc)
+{
+ struct dmub_rb_cmd_cursor_offload_init *init;
+ struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv;
+ union dmub_rb_cmd cmd;
+
+ if (!dc->config.enable_cursor_offload)
+ return;
+
+ if (!dc_dmub_srv->dmub->meta_info.feature_bits.bits.cursor_offload_v1_support)
+ return;
+
+ if (!dc_dmub_srv->dmub->cursor_offload_fb.gpu_addr || !dc_dmub_srv->dmub->cursor_offload_fb.cpu_addr)
+ return;
+
+ if (!dc_dmub_srv->dmub->cursor_offload_v1)
+ return;
+
+ if (!dc_dmub_srv->dmub->shared_state)
+ return;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ init = &cmd.cursor_offload_init;
+ init->header.type = DMUB_CMD__CURSOR_OFFLOAD;
+ init->header.sub_type = DMUB_CMD__CURSOR_OFFLOAD_INIT;
+ init->header.payload_bytes = sizeof(init->init_data);
+ init->init_data.state_addr.quad_part = dc_dmub_srv->dmub->cursor_offload_fb.gpu_addr;
+ init->init_data.state_size = dc_dmub_srv->dmub->cursor_offload_fb.size;
+
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+
+ dc_dmub_srv->cursor_offload_enabled = true;
+}
+
+void dc_dmub_srv_control_cursor_offload(struct dc *dc, struct dc_state *context,
+ const struct dc_stream_state *stream, bool enable)
+{
+ struct pipe_ctx const *pipe_ctx;
+ struct dmub_rb_cmd_cursor_offload_stream_cntl *cntl;
+ union dmub_rb_cmd cmd;
+
+ if (!dc_dmub_srv_is_cursor_offload_enabled(dc))
+ return;
+
+ if (!stream)
+ return;
+
+ pipe_ctx = resource_get_otg_master_for_stream(&context->res_ctx, stream);
+ if (!pipe_ctx || !pipe_ctx->stream_res.tg || pipe_ctx->stream != stream)
+ return;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ cntl = &cmd.cursor_offload_stream_ctnl;
+ cntl->header.type = DMUB_CMD__CURSOR_OFFLOAD;
+ cntl->header.sub_type =
+ enable ? DMUB_CMD__CURSOR_OFFLOAD_STREAM_ENABLE : DMUB_CMD__CURSOR_OFFLOAD_STREAM_DISABLE;
+ cntl->header.payload_bytes = sizeof(cntl->data);
+
+ cntl->data.otg_inst = pipe_ctx->stream_res.tg->inst;
+ cntl->data.line_time_in_ns = 1u + (uint32_t)(div64_u64(stream->timing.h_total * 1000000ull,
+ stream->timing.pix_clk_100hz / 10));
+
+ cntl->data.v_total_max = stream->adjust.v_total_max > stream->timing.v_total ?
+ stream->adjust.v_total_max :
+ stream->timing.v_total;
+
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd,
+ enable ? DM_DMUB_WAIT_TYPE_NO_WAIT : DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+void dc_dmub_srv_program_cursor_now(struct dc *dc, const struct pipe_ctx *pipe)
+{
+ struct dmub_rb_cmd_cursor_offload_stream_cntl *cntl;
+ union dmub_rb_cmd cmd;
+
+ if (!dc_dmub_srv_is_cursor_offload_enabled(dc))
+ return;
+
+ if (!pipe || !pipe->stream || !pipe->stream_res.tg)
+ return;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ cntl = &cmd.cursor_offload_stream_ctnl;
+ cntl->header.type = DMUB_CMD__CURSOR_OFFLOAD;
+ cntl->header.sub_type = DMUB_CMD__CURSOR_OFFLOAD_STREAM_PROGRAM;
+ cntl->header.payload_bytes = sizeof(cntl->data);
+ cntl->data.otg_inst = pipe->stream_res.tg->inst;
+
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+}
+
+bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait)
+{
+ struct dc_context *dc_ctx;
+ enum dmub_status status;
+
+ if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+ return true;
+
+ if (dc_dmub_srv->ctx->dc->debug.dmcub_emulation)
+ return true;
+
+ dc_ctx = dc_dmub_srv->ctx;
+
+ if (wait) {
+ if (dc_dmub_srv->ctx->dc->debug.disable_timeout) {
+ do {
+ status = dmub_srv_wait_for_hw_pwr_up(dc_dmub_srv->dmub, 500000);
+ } while (status != DMUB_STATUS_OK);
+ } else {
+ status = dmub_srv_wait_for_hw_pwr_up(dc_dmub_srv->dmub, 500000);
+ if (status != DMUB_STATUS_OK) {
+ DC_ERROR("Error querying DMUB hw power up status: error=%d\n", status);
+ return false;
+ }
+ }
+ } else
+ return dmub_srv_is_hw_pwr_up(dc_dmub_srv->dmub);
+
+ return true;
+}
+
+static int count_active_streams(const struct dc *dc)
+{
+ int i, count = 0;
+
+ for (i = 0; i < dc->current_state->stream_count; ++i) {
+ struct dc_stream_state *stream = dc->current_state->streams[i];
+
+ if (stream && (!stream->dpms_off || dc->config.disable_ips_in_dpms_off))
+ count += 1;
+ }
+
+ return count;
+}
+
+static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle)
+{
+ volatile const struct dmub_shared_state_ips_fw *ips_fw;
+ struct dc_dmub_srv *dc_dmub_srv;
+ union dmub_rb_cmd cmd = {0};
+
+ if (dc->debug.dmcub_emulation)
+ return;
+
+ if (!dc->ctx->dmub_srv || !dc->ctx->dmub_srv->dmub)
+ return;
+
+ dc_dmub_srv = dc->ctx->dmub_srv;
+ ips_fw = &dc_dmub_srv->dmub->shared_state[DMUB_SHARED_SHARE_FEATURE__IPS_FW].data.ips_fw;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.idle_opt_notify_idle.header.type = DMUB_CMD__IDLE_OPT;
+ cmd.idle_opt_notify_idle.header.sub_type = DMUB_CMD__IDLE_OPT_DCN_NOTIFY_IDLE;
+ cmd.idle_opt_notify_idle.header.payload_bytes =
+ sizeof(cmd.idle_opt_notify_idle) -
+ sizeof(cmd.idle_opt_notify_idle.header);
+
+ cmd.idle_opt_notify_idle.cntl_data.driver_idle = allow_idle;
+
+ if (dc->work_arounds.skip_psr_ips_crtc_disable)
+ cmd.idle_opt_notify_idle.cntl_data.skip_otg_disable = true;
+
+ if (allow_idle) {
+ volatile struct dmub_shared_state_ips_driver *ips_driver =
+ &dc_dmub_srv->dmub->shared_state[DMUB_SHARED_SHARE_FEATURE__IPS_DRIVER].data.ips_driver;
+ union dmub_shared_state_ips_driver_signals new_signals;
+
+ DC_LOG_IPS(
+ "%s wait idle (ips1_commit=%u ips2_commit=%u)",
+ __func__,
+ ips_fw->signals.bits.ips1_commit,
+ ips_fw->signals.bits.ips2_commit);
+
+ dc_dmub_srv_wait_for_idle(dc->ctx->dmub_srv, DM_DMUB_WAIT_TYPE_WAIT, NULL);
+
+ memset(&new_signals, 0, sizeof(new_signals));
+
+ new_signals.bits.allow_idle = 1; /* always set */
+
+ if (dc->config.disable_ips == DMUB_IPS_ENABLE ||
+ dc->config.disable_ips == DMUB_IPS_DISABLE_DYNAMIC) {
+ new_signals.bits.allow_pg = 1;
+ new_signals.bits.allow_ips1 = 1;
+ new_signals.bits.allow_ips2 = 1;
+ new_signals.bits.allow_z10 = 1;
+ // New in IPSv2.0
+ new_signals.bits.allow_ips1z8 = 1;
+ } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS1) {
+ new_signals.bits.allow_ips1 = 1;
+ } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2) {
+ // IPSv1.0 only
+ new_signals.bits.allow_pg = 1;
+ new_signals.bits.allow_ips1 = 1;
+ } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2_Z10) {
+ // IPSv1.0 only
+ new_signals.bits.allow_pg = 1;
+ new_signals.bits.allow_ips1 = 1;
+ new_signals.bits.allow_ips2 = 1;
+ } else if (dc->config.disable_ips == DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF) {
+ /* TODO: Move this logic out to hwseq */
+ if (count_active_streams(dc) == 0) {
+ /* IPS2 - Display off */
+ new_signals.bits.allow_pg = 1;
+ new_signals.bits.allow_ips1 = 1;
+ new_signals.bits.allow_ips2 = 1;
+ new_signals.bits.allow_z10 = 1;
+ // New in IPSv2.0
+ new_signals.bits.allow_ips1z8 = 1;
+ } else {
+ /* RCG only */
+ new_signals.bits.allow_pg = 0;
+ new_signals.bits.allow_ips1 = 1;
+ new_signals.bits.allow_ips2 = 0;
+ new_signals.bits.allow_z10 = 0;
+ }
+ } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_Z8_RETENTION) {
+ new_signals.bits.allow_pg = 1;
+ new_signals.bits.allow_ips1 = 1;
+ new_signals.bits.allow_ips2 = 1;
+ new_signals.bits.allow_z10 = 1;
+ }
+ // Setting RCG allow bits (IPSv2.0)
+ if (dc->config.disable_ips_rcg == DMUB_IPS_RCG_ENABLE) {
+ new_signals.bits.allow_ips0_rcg = 1;
+ new_signals.bits.allow_ips1_rcg = 1;
+ } else if (dc->config.disable_ips_rcg == DMUB_IPS0_RCG_DISABLE) {
+ new_signals.bits.allow_ips1_rcg = 1;
+ } else if (dc->config.disable_ips_rcg == DMUB_IPS1_RCG_DISABLE) {
+ new_signals.bits.allow_ips0_rcg = 1;
+ }
+ // IPS dynamic allow bits (IPSv2 change, vpb use case)
+ if (dc->config.disable_ips_in_vpb == DMUB_IPS_VPB_ENABLE_IPS1_AND_RCG) {
+ new_signals.bits.allow_dynamic_ips1 = 1;
+ } else if (dc->config.disable_ips_in_vpb == DMUB_IPS_VPB_ENABLE_ALL) {
+ new_signals.bits.allow_dynamic_ips1 = 1;
+ new_signals.bits.allow_dynamic_ips1_z8 = 1;
+ }
+ ips_driver->signals = new_signals;
+ dc_dmub_srv->driver_signals = ips_driver->signals;
+ }
+
+ DC_LOG_IPS(
+ "%s send allow_idle=%d (ips1_commit=%u ips2_commit=%u)",
+ __func__,
+ allow_idle,
+ ips_fw->signals.bits.ips1_commit,
+ ips_fw->signals.bits.ips2_commit);
+
+ /* NOTE: This does not use the "wake" interface since this is part of the wake path. */
+ /* We also do not perform a wait since DMCUB could enter idle after the notification. */
+ dm_execute_dmub_cmd(dc->ctx, &cmd, allow_idle ? DM_DMUB_WAIT_TYPE_NO_WAIT : DM_DMUB_WAIT_TYPE_WAIT);
+
+ /* Register access should stop at this point. */
+ if (allow_idle)
+ dc_dmub_srv->needs_idle_wake = true;
+}
+
+static void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
+{
+ struct dc_dmub_srv *dc_dmub_srv;
+ uint32_t rcg_exit_count = 0, ips1_exit_count = 0, ips2_exit_count = 0, ips1z8_exit_count = 0;
+
+ if (dc->debug.dmcub_emulation)
+ return;
+
+ if (!dc->ctx->dmub_srv || !dc->ctx->dmub_srv->dmub)
+ return;
+
+ dc_dmub_srv = dc->ctx->dmub_srv;
+
+ if (dc->clk_mgr->funcs->exit_low_power_state) {
+ volatile const struct dmub_shared_state_ips_fw *ips_fw =
+ &dc_dmub_srv->dmub->shared_state[DMUB_SHARED_SHARE_FEATURE__IPS_FW].data.ips_fw;
+ volatile struct dmub_shared_state_ips_driver *ips_driver =
+ &dc_dmub_srv->dmub->shared_state[DMUB_SHARED_SHARE_FEATURE__IPS_DRIVER].data.ips_driver;
+ union dmub_shared_state_ips_driver_signals prev_driver_signals = ips_driver->signals;
+
+ rcg_exit_count = ips_fw->rcg_exit_count;
+ ips1_exit_count = ips_fw->ips1_exit_count;
+ ips2_exit_count = ips_fw->ips2_exit_count;
+ ips1z8_exit_count = ips_fw->ips1_z8ret_exit_count;
+
+ ips_driver->signals.all = 0;
+ dc_dmub_srv->driver_signals = ips_driver->signals;
+
+ DC_LOG_IPS(
+ "%s (allow ips1=%u ips2=%u) (commit ips1=%u ips2=%u ips1z8=%u) (count rcg=%u ips1=%u ips2=%u ips1_z8=%u)",
+ __func__,
+ ips_driver->signals.bits.allow_ips1,
+ ips_driver->signals.bits.allow_ips2,
+ ips_fw->signals.bits.ips1_commit,
+ ips_fw->signals.bits.ips2_commit,
+ ips_fw->signals.bits.ips1z8_commit,
+ ips_fw->rcg_entry_count,
+ ips_fw->ips1_entry_count,
+ ips_fw->ips2_entry_count,
+ ips_fw->ips1_z8ret_entry_count);
+
+ /* Note: register access has technically not resumed for DCN here, but we
+ * need to be message PMFW through our standard register interface.
+ */
+ dc_dmub_srv->needs_idle_wake = false;
+
+ if (!dc->caps.ips_v2_support && ((prev_driver_signals.bits.allow_ips2 || prev_driver_signals.all == 0) &&
+ (!dc->debug.optimize_ips_handshake ||
+ ips_fw->signals.bits.ips2_commit || !ips_fw->signals.bits.in_idle))) {
+ DC_LOG_IPS(
+ "wait IPS2 eval (ips1_commit=%u ips2_commit=%u )",
+ ips_fw->signals.bits.ips1_commit,
+ ips_fw->signals.bits.ips2_commit);
+
+ if (!dc->debug.optimize_ips_handshake || !ips_fw->signals.bits.ips2_commit)
+ udelay(dc->debug.ips2_eval_delay_us);
+
+ DC_LOG_IPS(
+ "exit IPS2 #1 (ips1_commit=%u ips2_commit=%u)",
+ ips_fw->signals.bits.ips1_commit,
+ ips_fw->signals.bits.ips2_commit);
+
+ // Tell PMFW to exit low power state
+ dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr);
+
+ if (ips_fw->signals.bits.ips2_commit) {
+
+ DC_LOG_IPS(
+ "wait IPS2 entry delay (ips1_commit=%u ips2_commit=%u)",
+ ips_fw->signals.bits.ips1_commit,
+ ips_fw->signals.bits.ips2_commit);
+
+ // Wait for IPS2 entry upper bound
+ udelay(dc->debug.ips2_entry_delay_us);
+
+ DC_LOG_IPS(
+ "exit IPS2 #2 (ips1_commit=%u ips2_commit=%u)",
+ ips_fw->signals.bits.ips1_commit,
+ ips_fw->signals.bits.ips2_commit);
+
+ dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr);
+
+ DC_LOG_IPS(
+ "wait IPS2 commit clear (ips1_commit=%u ips2_commit=%u)",
+ ips_fw->signals.bits.ips1_commit,
+ ips_fw->signals.bits.ips2_commit);
+
+ while (ips_fw->signals.bits.ips2_commit)
+ udelay(1);
+
+ DC_LOG_IPS(
+ "wait hw_pwr_up (ips1_commit=%u ips2_commit=%u)",
+ ips_fw->signals.bits.ips1_commit,
+ ips_fw->signals.bits.ips2_commit);
+
+ if (!dc_dmub_srv_is_hw_pwr_up(dc->ctx->dmub_srv, true))
+ ASSERT(0);
+
+ DC_LOG_IPS(
+ "resync inbox1 (ips1_commit=%u ips2_commit=%u)",
+ ips_fw->signals.bits.ips1_commit,
+ ips_fw->signals.bits.ips2_commit);
+
+ dmub_srv_sync_inboxes(dc->ctx->dmub_srv->dmub);
+ }
+ }
+
+ dc_dmub_srv_notify_idle(dc, false);
+ if (prev_driver_signals.bits.allow_ips1 || prev_driver_signals.all == 0) {
+ DC_LOG_IPS(
+ "wait for IPS1 commit clear (ips1_commit=%u ips2_commit=%u ips1z8=%u)",
+ ips_fw->signals.bits.ips1_commit,
+ ips_fw->signals.bits.ips2_commit,
+ ips_fw->signals.bits.ips1z8_commit);
+
+ while (ips_fw->signals.bits.ips1_commit)
+ udelay(1);
+
+ DC_LOG_IPS(
+ "wait for IPS1 commit clear done (ips1_commit=%u ips2_commit=%u ips1z8=%u)",
+ ips_fw->signals.bits.ips1_commit,
+ ips_fw->signals.bits.ips2_commit,
+ ips_fw->signals.bits.ips1z8_commit);
+ }
+ }
+
+ if (!dc_dmub_srv_is_hw_pwr_up(dc->ctx->dmub_srv, true))
+ ASSERT(0);
+
+ DC_LOG_IPS("%s exit (count rcg=%u ips1=%u ips2=%u ips1z8=%u)",
+ __func__,
+ rcg_exit_count,
+ ips1_exit_count,
+ ips2_exit_count,
+ ips1z8_exit_count);
+}
+
+void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state power_state)
+{
+ struct dmub_srv *dmub;
+
+ if (!dc_dmub_srv)
+ return;
+
+ dmub = dc_dmub_srv->dmub;
+
+ if (power_state == DC_ACPI_CM_POWER_STATE_D0)
+ dmub_srv_set_power_state(dmub, DMUB_POWER_STATE_D0);
+ else
+ dmub_srv_set_power_state(dmub, DMUB_POWER_STATE_D3);
+}
+
+void dc_dmub_srv_notify_fw_dc_power_state(struct dc_dmub_srv *dc_dmub_srv,
+ enum dc_acpi_cm_power_state power_state)
+{
+ union dmub_rb_cmd cmd;
+
+ if (!dc_dmub_srv)
+ return;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.idle_opt_set_dc_power_state.header.type = DMUB_CMD__IDLE_OPT;
+ cmd.idle_opt_set_dc_power_state.header.sub_type = DMUB_CMD__IDLE_OPT_SET_DC_POWER_STATE;
+ cmd.idle_opt_set_dc_power_state.header.payload_bytes =
+ sizeof(cmd.idle_opt_set_dc_power_state) - sizeof(cmd.idle_opt_set_dc_power_state.header);
+
+ if (power_state == DC_ACPI_CM_POWER_STATE_D0) {
+ cmd.idle_opt_set_dc_power_state.data.power_state = DMUB_IDLE_OPT_DC_POWER_STATE_D0;
+ } else if (power_state == DC_ACPI_CM_POWER_STATE_D3) {
+ cmd.idle_opt_set_dc_power_state.data.power_state = DMUB_IDLE_OPT_DC_POWER_STATE_D3;
+ } else {
+ cmd.idle_opt_set_dc_power_state.data.power_state = DMUB_IDLE_OPT_DC_POWER_STATE_UNKNOWN;
+ }
+
+ dc_wake_and_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+bool dc_dmub_srv_should_detect(struct dc_dmub_srv *dc_dmub_srv)
+{
+ volatile const struct dmub_shared_state_ips_fw *ips_fw;
+ bool reallow_idle = false, should_detect = false;
+
+ if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+ return false;
+
+ if (dc_dmub_srv->dmub->shared_state &&
+ dc_dmub_srv->dmub->meta_info.feature_bits.bits.shared_state_link_detection) {
+ ips_fw = &dc_dmub_srv->dmub->shared_state[DMUB_SHARED_SHARE_FEATURE__IPS_FW].data.ips_fw;
+ return ips_fw->signals.bits.detection_required;
+ }
+
+ /* Detection may require reading scratch 0 - exit out of idle prior to the read. */
+ if (dc_dmub_srv->idle_allowed) {
+ dc_dmub_srv_apply_idle_power_optimizations(dc_dmub_srv->ctx->dc, false);
+ reallow_idle = true;
+ }
+
+ should_detect = dmub_srv_should_detect(dc_dmub_srv->dmub);
+
+ /* Re-enter idle if we're not about to immediately redetect links. */
+ if (!should_detect && reallow_idle && dc_dmub_srv->idle_exit_counter == 0 &&
+ !dc_dmub_srv->ctx->dc->debug.disable_dmub_reallow_idle)
+ dc_dmub_srv_apply_idle_power_optimizations(dc_dmub_srv->ctx->dc, true);
+
+ return should_detect;
+}
+
+void dc_dmub_srv_apply_idle_power_optimizations(const struct dc *dc, bool allow_idle)
+{
+ struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv;
+
+ if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+ return;
+
+ allow_idle &= (!dc->debug.ips_disallow_entry);
+
+ if (dc_dmub_srv->idle_allowed == allow_idle)
+ return;
+
+ DC_LOG_IPS("%s state change: old=%d new=%d", __func__, dc_dmub_srv->idle_allowed, allow_idle);
+
+ /*
+ * Entering a low power state requires a driver notification.
+ * Powering up the hardware requires notifying PMFW and DMCUB.
+ * Clearing the driver idle allow requires a DMCUB command.
+ * DMCUB commands requires the DMCUB to be powered up and restored.
+ */
+
+ if (!allow_idle) {
+ dc_dmub_srv->idle_exit_counter += 1;
+
+ dc_dmub_srv_exit_low_power_state(dc);
+ /*
+ * Idle is considered fully exited only after the sequence above
+ * fully completes. If we have a race of two threads exiting
+ * at the same time then it's safe to perform the sequence
+ * twice as long as we're not re-entering.
+ *
+ * Infinite command submission is avoided by using the
+ * dm_execute_dmub_cmd submission instead of the "wake" helpers.
+ */
+ dc_dmub_srv->idle_allowed = false;
+
+ dc_dmub_srv->idle_exit_counter -= 1;
+ if (dc_dmub_srv->idle_exit_counter < 0) {
+ ASSERT(0);
+ dc_dmub_srv->idle_exit_counter = 0;
+ }
+ } else {
+ /* Consider idle as notified prior to the actual submission to
+ * prevent multiple entries. */
+ dc_dmub_srv->idle_allowed = true;
+
+ dc_dmub_srv_notify_idle(dc, allow_idle);
+ }
+}
+
+bool dc_wake_and_execute_dmub_cmd(const struct dc_context *ctx, union dmub_rb_cmd *cmd,
+ enum dm_dmub_wait_type wait_type)
+{
+ return dc_wake_and_execute_dmub_cmd_list(ctx, 1, cmd, wait_type);
+}
+
+bool dc_wake_and_execute_dmub_cmd_list(const struct dc_context *ctx, unsigned int count,
+ union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type)
+{
+ struct dc_dmub_srv *dc_dmub_srv = ctx->dmub_srv;
+ bool result = false, reallow_idle = false;
+
+ if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+ return false;
+
+ if (count == 0)
+ return true;
+
+ if (dc_dmub_srv->idle_allowed) {
+ dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, false);
+ reallow_idle = true;
+ }
+
+ /*
+ * These may have different implementations in DM, so ensure
+ * that we guide it to the expected helper.
+ */
+ if (count > 1)
+ result = dm_execute_dmub_cmd_list(ctx, count, cmd, wait_type);
+ else
+ result = dm_execute_dmub_cmd(ctx, cmd, wait_type);
+
+ if (result && reallow_idle && dc_dmub_srv->idle_exit_counter == 0 &&
+ !ctx->dc->debug.disable_dmub_reallow_idle)
+ dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, true);
+
+ return result;
+}
+
+static bool dc_dmub_execute_gpint(const struct dc_context *ctx, enum dmub_gpint_command command_code,
+ uint16_t param, uint32_t *response, enum dm_dmub_wait_type wait_type)
+{
+ struct dc_dmub_srv *dc_dmub_srv = ctx->dmub_srv;
+ const uint32_t wait_us = wait_type == DM_DMUB_WAIT_TYPE_NO_WAIT ? 0 : 30;
+ enum dmub_status status;
+
+ if (response)
+ *response = 0;
+
+ if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+ return false;
+
+ status = dmub_srv_send_gpint_command(dc_dmub_srv->dmub, command_code, param, wait_us);
+ if (status != DMUB_STATUS_OK) {
+ if (status == DMUB_STATUS_TIMEOUT && wait_type == DM_DMUB_WAIT_TYPE_NO_WAIT)
+ return true;
+
+ return false;
+ }
+
+ if (response && wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)
+ dmub_srv_get_gpint_response(dc_dmub_srv->dmub, response);
+
+ return true;
+}
+
+bool dc_wake_and_execute_gpint(const struct dc_context *ctx, enum dmub_gpint_command command_code,
+ uint16_t param, uint32_t *response, enum dm_dmub_wait_type wait_type)
+{
+ struct dc_dmub_srv *dc_dmub_srv = ctx->dmub_srv;
+ bool result = false, reallow_idle = false;
+
+ if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+ return false;
+
+ if (dc_dmub_srv->idle_allowed) {
+ dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, false);
+ reallow_idle = true;
+ }
+
+ result = dc_dmub_execute_gpint(ctx, command_code, param, response, wait_type);
+
+ if (result && reallow_idle && dc_dmub_srv->idle_exit_counter == 0 &&
+ !ctx->dc->debug.disable_dmub_reallow_idle)
+ dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, true);
+
+ return result;
+}
+
+static void dc_dmub_srv_rb_based_fams2_update_config(struct dc *dc,
+ struct dc_state *context,
+ bool enable)
+{
+ uint8_t num_cmds = 1;
+ uint32_t i;
+ union dmub_rb_cmd cmd[2 * MAX_STREAMS + 1];
+ struct dmub_rb_cmd_fams2 *global_cmd = &cmd[0].fams2_config;
+
+ memset(cmd, 0, sizeof(union dmub_rb_cmd) * (2 * MAX_STREAMS + 1));
+ /* fill in generic command header */
+ global_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
+ global_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG;
+ global_cmd->header.payload_bytes =
+ sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header);
+
+ if (enable) {
+ /* send global configuration parameters */
+ memcpy(&global_cmd->config.global, &context->bw_ctx.bw.dcn.fams2_global_config, sizeof(struct dmub_cmd_fams2_global_config));
+
+ /* copy static feature configuration overrides */
+ global_cmd->config.global.features.bits.enable_stall_recovery = dc->debug.fams2_config.bits.enable_stall_recovery;
+ global_cmd->config.global.features.bits.enable_debug = dc->debug.fams2_config.bits.enable_debug;
+ global_cmd->config.global.features.bits.enable_offload_flip = dc->debug.fams2_config.bits.enable_offload_flip;
+
+ /* construct per-stream configs */
+ for (i = 0; i < context->bw_ctx.bw.dcn.fams2_global_config.num_streams; i++) {
+ struct dmub_rb_cmd_fams2 *stream_base_cmd = &cmd[i+1].fams2_config;
+ struct dmub_rb_cmd_fams2 *stream_sub_state_cmd = &cmd[i+1+context->bw_ctx.bw.dcn.fams2_global_config.num_streams].fams2_config;
+
+ /* configure command header */
+ stream_base_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
+ stream_base_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG;
+ stream_base_cmd->header.payload_bytes =
+ sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header);
+ stream_base_cmd->header.multi_cmd_pending = 1;
+ stream_sub_state_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
+ stream_sub_state_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG;
+ stream_sub_state_cmd->header.payload_bytes =
+ sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header);
+ stream_sub_state_cmd->header.multi_cmd_pending = 1;
+ /* copy stream static base state */
+ memcpy(&stream_base_cmd->config,
+ &context->bw_ctx.bw.dcn.fams2_stream_base_params[i],
+ sizeof(union dmub_cmd_fams2_config));
+ /* copy stream static sub state */
+ memcpy(&stream_sub_state_cmd->config,
+ &context->bw_ctx.bw.dcn.fams2_stream_sub_params[i],
+ sizeof(union dmub_cmd_fams2_config));
+ }
+ }
+
+ /* apply feature configuration based on current driver state */
+ global_cmd->config.global.features.bits.enable_visual_confirm = dc->debug.visual_confirm == VISUAL_CONFIRM_FAMS2;
+ global_cmd->config.global.features.bits.enable = enable;
+
+ if (enable && context->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable) {
+ /* set multi pending for global, and unset for last stream cmd */
+ global_cmd->header.multi_cmd_pending = 1;
+ cmd[2 * context->bw_ctx.bw.dcn.fams2_global_config.num_streams].fams2_config.header.multi_cmd_pending = 0;
+ num_cmds += 2 * context->bw_ctx.bw.dcn.fams2_global_config.num_streams;
+ }
+
+ dm_execute_dmub_cmd_list(dc->ctx, num_cmds, cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+static void dc_dmub_srv_ib_based_fams2_update_config(struct dc *dc,
+ struct dc_state *context,
+ bool enable)
+{
+ struct dmub_fams2_config_v2 *config = (struct dmub_fams2_config_v2 *)dc->ctx->dmub_srv->dmub->ib_mem_gart.cpu_addr;
+ union dmub_rb_cmd cmd;
+ uint32_t i;
+
+ memset(config, 0, sizeof(*config));
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.ib_fams2_config.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
+ cmd.ib_fams2_config.header.sub_type = DMUB_CMD__FAMS2_IB_CONFIG;
+
+ cmd.ib_fams2_config.ib_data.src.quad_part = dc->ctx->dmub_srv->dmub->ib_mem_gart.gpu_addr;
+ cmd.ib_fams2_config.ib_data.size = sizeof(*config);
+
+ if (enable && context->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable) {
+ /* copy static feature configuration overrides */
+ config->global.features.bits.enable_stall_recovery = dc->debug.fams2_config.bits.enable_stall_recovery;
+ config->global.features.bits.enable_offload_flip = dc->debug.fams2_config.bits.enable_offload_flip;
+ config->global.features.bits.enable_debug = dc->debug.fams2_config.bits.enable_debug;
+
+ /* send global configuration parameters */
+ memcpy(&config->global, &context->bw_ctx.bw.dcn.fams2_global_config,
+ sizeof(struct dmub_cmd_fams2_global_config));
+
+ /* construct per-stream configs */
+ for (i = 0; i < context->bw_ctx.bw.dcn.fams2_global_config.num_streams; i++) {
+ /* copy stream static base state */
+ memcpy(&config->stream_v1[i].base,
+ &context->bw_ctx.bw.dcn.fams2_stream_base_params[i],
+ sizeof(config->stream_v1[i].base));
+
+ /* copy stream static sub-state */
+ memcpy(&config->stream_v1[i].sub_state,
+ &context->bw_ctx.bw.dcn.fams2_stream_sub_params_v2[i],
+ sizeof(config->stream_v1[i].sub_state));
+ }
+ }
+
+ config->global.features.bits.enable_visual_confirm = dc->debug.visual_confirm == VISUAL_CONFIRM_FAMS2;
+ config->global.features.bits.enable = enable;
+
+ dm_execute_dmub_cmd_list(dc->ctx, 1, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+void dc_dmub_srv_fams2_update_config(struct dc *dc,
+ struct dc_state *context,
+ bool enable)
+{
+ if (dc->debug.fams_version.major == 2)
+ dc_dmub_srv_rb_based_fams2_update_config(dc, context, enable);
+ if (dc->debug.fams_version.major == 3)
+ dc_dmub_srv_ib_based_fams2_update_config(dc, context, enable);
+}
+
+void dc_dmub_srv_fams2_drr_update(struct dc *dc,
+ uint32_t tg_inst,
+ uint32_t vtotal_min,
+ uint32_t vtotal_max,
+ uint32_t vtotal_mid,
+ uint32_t vtotal_mid_frame_num,
+ bool program_manual_trigger)
+{
+ union dmub_rb_cmd cmd = { 0 };
+
+ cmd.fams2_drr_update.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
+ cmd.fams2_drr_update.header.sub_type = DMUB_CMD__FAMS2_DRR_UPDATE;
+ cmd.fams2_drr_update.dmub_optc_state_req.tg_inst = tg_inst;
+ cmd.fams2_drr_update.dmub_optc_state_req.v_total_max = vtotal_max;
+ cmd.fams2_drr_update.dmub_optc_state_req.v_total_min = vtotal_min;
+ cmd.fams2_drr_update.dmub_optc_state_req.v_total_mid = vtotal_mid;
+ cmd.fams2_drr_update.dmub_optc_state_req.v_total_mid_frame_num = vtotal_mid_frame_num;
+ cmd.fams2_drr_update.dmub_optc_state_req.program_manual_trigger = program_manual_trigger;
+
+ cmd.fams2_drr_update.header.payload_bytes =
+ sizeof(cmd.fams2_drr_update) - sizeof(cmd.fams2_drr_update.header);
+
+ dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+void dc_dmub_srv_fams2_passthrough_flip(
+ struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *stream,
+ struct dc_surface_update *srf_updates,
+ int surface_count)
+{
+ int plane_index;
+ union dmub_rb_cmd cmds[MAX_PLANES];
+ struct dc_plane_address *address;
+ struct dc_plane_state *plane_state;
+ int num_cmds = 0;
+ struct dc_stream_status *stream_status = dc_stream_get_status(stream);
+
+ if (surface_count <= 0 || stream_status == NULL)
+ return;
+
+ memset(cmds, 0, sizeof(union dmub_rb_cmd) * MAX_PLANES);
+
+ /* build command for each surface update */
+ for (plane_index = 0; plane_index < surface_count; plane_index++) {
+ plane_state = srf_updates[plane_index].surface;
+ address = &plane_state->address;
+
+ /* skip if there is no address update for plane */
+ if (!srf_updates[plane_index].flip_addr)
+ continue;
+
+ /* build command header */
+ cmds[num_cmds].fams2_flip.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
+ cmds[num_cmds].fams2_flip.header.sub_type = DMUB_CMD__FAMS2_FLIP;
+ cmds[num_cmds].fams2_flip.header.payload_bytes =
+ sizeof(struct dmub_rb_cmd_fams2_flip) - sizeof(struct dmub_cmd_header);
+
+ /* for chaining multiple commands, all but last command should set to 1 */
+ cmds[num_cmds].fams2_flip.header.multi_cmd_pending = 1;
+
+ /* set topology info */
+ cmds[num_cmds].fams2_flip.flip_info.pipe_mask = dc_plane_get_pipe_mask(state, plane_state);
+ if (stream_status)
+ cmds[num_cmds].fams2_flip.flip_info.otg_inst = stream_status->primary_otg_inst;
+
+ cmds[num_cmds].fams2_flip.flip_info.config.bits.is_immediate = plane_state->flip_immediate;
+
+ /* build address info for command */
+ switch (address->type) {
+ case PLN_ADDR_TYPE_GRAPHICS:
+ if (address->grph.addr.quad_part == 0) {
+ BREAK_TO_DEBUGGER();
+ break;
+ }
+
+ cmds[num_cmds].fams2_flip.flip_info.addr_info.meta_addr_lo =
+ address->grph.meta_addr.low_part;
+ cmds[num_cmds].fams2_flip.flip_info.addr_info.meta_addr_hi =
+ (uint16_t)address->grph.meta_addr.high_part;
+ cmds[num_cmds].fams2_flip.flip_info.addr_info.surf_addr_lo =
+ address->grph.addr.low_part;
+ cmds[num_cmds].fams2_flip.flip_info.addr_info.surf_addr_hi =
+ (uint16_t)address->grph.addr.high_part;
+ break;
+ case PLN_ADDR_TYPE_VIDEO_PROGRESSIVE:
+ if (address->video_progressive.luma_addr.quad_part == 0 ||
+ address->video_progressive.chroma_addr.quad_part == 0) {
+ BREAK_TO_DEBUGGER();
+ break;
+ }
+
+ cmds[num_cmds].fams2_flip.flip_info.addr_info.meta_addr_lo =
+ address->video_progressive.luma_meta_addr.low_part;
+ cmds[num_cmds].fams2_flip.flip_info.addr_info.meta_addr_hi =
+ (uint16_t)address->video_progressive.luma_meta_addr.high_part;
+ cmds[num_cmds].fams2_flip.flip_info.addr_info.meta_addr_c_lo =
+ address->video_progressive.chroma_meta_addr.low_part;
+ cmds[num_cmds].fams2_flip.flip_info.addr_info.meta_addr_c_hi =
+ (uint16_t)address->video_progressive.chroma_meta_addr.high_part;
+ cmds[num_cmds].fams2_flip.flip_info.addr_info.surf_addr_lo =
+ address->video_progressive.luma_addr.low_part;
+ cmds[num_cmds].fams2_flip.flip_info.addr_info.surf_addr_hi =
+ (uint16_t)address->video_progressive.luma_addr.high_part;
+ cmds[num_cmds].fams2_flip.flip_info.addr_info.surf_addr_c_lo =
+ address->video_progressive.chroma_addr.low_part;
+ cmds[num_cmds].fams2_flip.flip_info.addr_info.surf_addr_c_hi =
+ (uint16_t)address->video_progressive.chroma_addr.high_part;
+ break;
+ default:
+ // Should never be hit
+ BREAK_TO_DEBUGGER();
+ break;
+ }
+
+ num_cmds++;
+ }
+
+ if (num_cmds > 0) {
+ cmds[num_cmds - 1].fams2_flip.header.multi_cmd_pending = 0;
+ dm_execute_dmub_cmd_list(dc->ctx, num_cmds, cmds, DM_DMUB_WAIT_TYPE_WAIT);
+ }
+}
+
+
+bool dc_dmub_srv_ips_residency_cntl(const struct dc_context *ctx, uint8_t panel_inst, bool start_measurement)
+{
+ union dmub_rb_cmd cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.ips_residency_cntl.header.type = DMUB_CMD__IPS;
+ cmd.ips_residency_cntl.header.sub_type = DMUB_CMD__IPS_RESIDENCY_CNTL;
+ cmd.ips_residency_cntl.header.payload_bytes = sizeof(struct dmub_cmd_ips_residency_cntl_data);
+
+ // only panel_inst=0 is supported at the moment
+ cmd.ips_residency_cntl.cntl_data.panel_inst = panel_inst;
+ cmd.ips_residency_cntl.cntl_data.start_measurement = start_measurement;
+
+ if (!dc_wake_and_execute_dmub_cmd(ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
+ return false;
+
+ return true;
+}
+
+bool dc_dmub_srv_ips_query_residency_info(const struct dc_context *ctx, uint8_t panel_inst, struct dmub_ips_residency_info *driver_info,
+ enum ips_residency_mode ips_mode)
+{
+ union dmub_rb_cmd cmd;
+ uint32_t bytes = sizeof(struct dmub_ips_residency_info);
+
+ dmub_flush_buffer_mem(&ctx->dmub_srv->dmub->scratch_mem_fb);
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.ips_query_residency_info.header.type = DMUB_CMD__IPS;
+ cmd.ips_query_residency_info.header.sub_type = DMUB_CMD__IPS_QUERY_RESIDENCY_INFO;
+ cmd.ips_query_residency_info.header.payload_bytes = sizeof(struct dmub_cmd_ips_query_residency_info_data);
+
+ cmd.ips_query_residency_info.info_data.dest.quad_part = ctx->dmub_srv->dmub->scratch_mem_fb.gpu_addr;
+ cmd.ips_query_residency_info.info_data.size = bytes;
+ cmd.ips_query_residency_info.info_data.panel_inst = panel_inst;
+ cmd.ips_query_residency_info.info_data.ips_mode = (uint32_t)ips_mode;
+
+ if (!dc_wake_and_execute_dmub_cmd(ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) ||
+ cmd.ips_query_residency_info.header.ret_status == 0)
+ return false;
+
+ // copy the result to the output since ret_status != 0 means the command returned data
+ memcpy(driver_info, ctx->dmub_srv->dmub->scratch_mem_fb.cpu_addr, bytes);
+
+ return true;
+}
+
+bool dmub_lsdma_init(struct dc_dmub_srv *dc_dmub_srv)
+{
+ struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+ union dmub_rb_cmd cmd;
+ enum dm_dmub_wait_type wait_type;
+ struct dmub_cmd_lsdma_data *lsdma_data = &cmd.lsdma.lsdma_data;
+ bool result;
+
+ if (!dc_dmub_srv->dmub->feature_caps.lsdma_support_in_dmu)
+ return false;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.cmd_common.header.type = DMUB_CMD__LSDMA;
+ cmd.cmd_common.header.sub_type = DMUB_CMD__LSDMA_INIT_CONFIG;
+ wait_type = DM_DMUB_WAIT_TYPE_NO_WAIT;
+
+ lsdma_data->u.init_data.gpu_addr_base.quad_part = dc_ctx->dmub_srv->dmub->lsdma_rb_fb.gpu_addr;
+ lsdma_data->u.init_data.ring_size = dc_ctx->dmub_srv->dmub->lsdma_rb_fb.size;
+
+ result = dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, wait_type);
+
+ if (!result)
+ DC_ERROR("LSDMA Init failed in DMUB");
+
+ return result;
+}
+
+bool dmub_lsdma_send_linear_copy_command(
+ struct dc_dmub_srv *dc_dmub_srv,
+ uint64_t src_addr,
+ uint64_t dst_addr,
+ uint32_t count
+)
+{
+ struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+ union dmub_rb_cmd cmd;
+ enum dm_dmub_wait_type wait_type;
+ struct dmub_cmd_lsdma_data *lsdma_data = &cmd.lsdma.lsdma_data;
+ bool result;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.cmd_common.header.type = DMUB_CMD__LSDMA;
+ cmd.cmd_common.header.sub_type = DMUB_CMD__LSDMA_LINEAR_COPY;
+ wait_type = DM_DMUB_WAIT_TYPE_NO_WAIT;
+
+ lsdma_data->u.linear_copy_data.count = count - 1; // LSDMA controller expects bytes to copy -1
+ lsdma_data->u.linear_copy_data.src_lo = src_addr & 0xFFFFFFFF;
+ lsdma_data->u.linear_copy_data.src_hi = (src_addr >> 32) & 0xFFFFFFFF;
+ lsdma_data->u.linear_copy_data.dst_lo = dst_addr & 0xFFFFFFFF;
+ lsdma_data->u.linear_copy_data.dst_hi = (dst_addr >> 32) & 0xFFFFFFFF;
+
+ result = dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, wait_type);
+
+ if (!result)
+ DC_ERROR("LSDMA Linear Copy failed in DMUB");
+
+ return result;
+}
+
+bool dmub_lsdma_send_linear_sub_window_copy_command(
+ struct dc_dmub_srv *dc_dmub_srv,
+ struct lsdma_linear_sub_window_copy_params copy_data
+)
+{
+ struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+ union dmub_rb_cmd cmd;
+ enum dm_dmub_wait_type wait_type;
+ struct dmub_cmd_lsdma_data *lsdma_data = &cmd.lsdma.lsdma_data;
+ bool result;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.cmd_common.header.type = DMUB_CMD__LSDMA;
+ cmd.cmd_common.header.sub_type = DMUB_CMD__LSDMA_LINEAR_SUB_WINDOW_COPY;
+ wait_type = DM_DMUB_WAIT_TYPE_NO_WAIT;
+
+ lsdma_data->u.linear_sub_window_copy_data.tmz = copy_data.tmz;
+ lsdma_data->u.linear_sub_window_copy_data.element_size = copy_data.element_size;
+ lsdma_data->u.linear_sub_window_copy_data.src_lo = copy_data.src_lo;
+ lsdma_data->u.linear_sub_window_copy_data.src_hi = copy_data.src_hi;
+ lsdma_data->u.linear_sub_window_copy_data.src_x = copy_data.src_x;
+ lsdma_data->u.linear_sub_window_copy_data.src_y = copy_data.src_y;
+ lsdma_data->u.linear_sub_window_copy_data.src_pitch = copy_data.src_pitch;
+ lsdma_data->u.linear_sub_window_copy_data.src_slice_pitch = copy_data.src_slice_pitch;
+ lsdma_data->u.linear_sub_window_copy_data.dst_lo = copy_data.dst_lo;
+ lsdma_data->u.linear_sub_window_copy_data.dst_hi = copy_data.dst_hi;
+ lsdma_data->u.linear_sub_window_copy_data.dst_x = copy_data.dst_x;
+ lsdma_data->u.linear_sub_window_copy_data.dst_y = copy_data.dst_y;
+ lsdma_data->u.linear_sub_window_copy_data.dst_pitch = copy_data.dst_pitch;
+ lsdma_data->u.linear_sub_window_copy_data.dst_slice_pitch = copy_data.dst_slice_pitch;
+ lsdma_data->u.linear_sub_window_copy_data.rect_x = copy_data.rect_x;
+ lsdma_data->u.linear_sub_window_copy_data.rect_y = copy_data.rect_y;
+ lsdma_data->u.linear_sub_window_copy_data.src_cache_policy = copy_data.src_cache_policy;
+ lsdma_data->u.linear_sub_window_copy_data.dst_cache_policy = copy_data.dst_cache_policy;
+
+ result = dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, wait_type);
+
+ if (!result)
+ DC_ERROR("LSDMA Linear Sub Window Copy failed in DMUB");
+
+ return result;
+}
+
+bool dmub_lsdma_send_tiled_to_tiled_copy_command(
+ struct dc_dmub_srv *dc_dmub_srv,
+ struct lsdma_send_tiled_to_tiled_copy_command_params params
+)
+{
+ struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+ union dmub_rb_cmd cmd;
+ enum dm_dmub_wait_type wait_type;
+ struct dmub_cmd_lsdma_data *lsdma_data = &cmd.lsdma.lsdma_data;
+ bool result;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.cmd_common.header.type = DMUB_CMD__LSDMA;
+ cmd.cmd_common.header.sub_type = DMUB_CMD__LSDMA_TILED_TO_TILED_COPY;
+ wait_type = DM_DMUB_WAIT_TYPE_NO_WAIT;
+
+ lsdma_data->u.tiled_copy_data.src_addr_lo = params.src_addr & 0xFFFFFFFF;
+ lsdma_data->u.tiled_copy_data.src_addr_hi = (params.src_addr >> 32) & 0xFFFFFFFF;
+ lsdma_data->u.tiled_copy_data.dst_addr_lo = params.dst_addr & 0xFFFFFFFF;
+ lsdma_data->u.tiled_copy_data.dst_addr_hi = (params.dst_addr >> 32) & 0xFFFFFFFF;
+ lsdma_data->u.tiled_copy_data.src_x = params.src_x;
+ lsdma_data->u.tiled_copy_data.src_y = params.src_y;
+ lsdma_data->u.tiled_copy_data.dst_x = params.dst_x;
+ lsdma_data->u.tiled_copy_data.dst_y = params.dst_y;
+ lsdma_data->u.tiled_copy_data.src_width = params.src_width;
+ lsdma_data->u.tiled_copy_data.dst_width = params.dst_width;
+ lsdma_data->u.tiled_copy_data.src_swizzle_mode = params.swizzle_mode;
+ lsdma_data->u.tiled_copy_data.dst_swizzle_mode = params.swizzle_mode;
+ lsdma_data->u.tiled_copy_data.src_element_size = params.element_size;
+ lsdma_data->u.tiled_copy_data.dst_element_size = params.element_size;
+ lsdma_data->u.tiled_copy_data.rect_x = params.rect_x;
+ lsdma_data->u.tiled_copy_data.rect_y = params.rect_y;
+ lsdma_data->u.tiled_copy_data.dcc = params.dcc;
+ lsdma_data->u.tiled_copy_data.tmz = params.tmz;
+ lsdma_data->u.tiled_copy_data.read_compress = params.read_compress;
+ lsdma_data->u.tiled_copy_data.write_compress = params.write_compress;
+ lsdma_data->u.tiled_copy_data.src_height = params.src_height;
+ lsdma_data->u.tiled_copy_data.dst_height = params.dst_height;
+ lsdma_data->u.tiled_copy_data.data_format = params.data_format;
+ lsdma_data->u.tiled_copy_data.max_com = params.max_com;
+ lsdma_data->u.tiled_copy_data.max_uncom = params.max_uncom;
+
+ result = dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, wait_type);
+
+ if (!result)
+ DC_ERROR("LSDMA Tiled to Tiled Copy failed in DMUB");
+
+ return result;
+}
+
+bool dmub_lsdma_send_pio_copy_command(
+ struct dc_dmub_srv *dc_dmub_srv,
+ uint64_t src_addr,
+ uint64_t dst_addr,
+ uint32_t byte_count,
+ uint32_t overlap_disable
+)
+{
+ struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+ union dmub_rb_cmd cmd;
+ enum dm_dmub_wait_type wait_type;
+ struct dmub_cmd_lsdma_data *lsdma_data = &cmd.lsdma.lsdma_data;
+ bool result;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.cmd_common.header.type = DMUB_CMD__LSDMA;
+ cmd.cmd_common.header.sub_type = DMUB_CMD__LSDMA_PIO_COPY;
+ wait_type = DM_DMUB_WAIT_TYPE_NO_WAIT;
+
+ lsdma_data->u.pio_copy_data.packet.fields.byte_count = byte_count;
+ lsdma_data->u.pio_copy_data.packet.fields.overlap_disable = overlap_disable;
+ lsdma_data->u.pio_copy_data.src_lo = src_addr & 0xFFFFFFFF;
+ lsdma_data->u.pio_copy_data.src_hi = (src_addr >> 32) & 0xFFFFFFFF;
+ lsdma_data->u.pio_copy_data.dst_lo = dst_addr & 0xFFFFFFFF;
+ lsdma_data->u.pio_copy_data.dst_hi = (dst_addr >> 32) & 0xFFFFFFFF;
+
+ result = dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, wait_type);
+
+ if (!result)
+ DC_ERROR("LSDMA PIO Copy failed in DMUB");
+
+ return result;
+}
+
+bool dmub_lsdma_send_pio_constfill_command(
+ struct dc_dmub_srv *dc_dmub_srv,
+ uint64_t dst_addr,
+ uint32_t byte_count,
+ uint32_t data
+)
+{
+ struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+ union dmub_rb_cmd cmd;
+ enum dm_dmub_wait_type wait_type;
+ struct dmub_cmd_lsdma_data *lsdma_data = &cmd.lsdma.lsdma_data;
+ bool result;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.cmd_common.header.type = DMUB_CMD__LSDMA;
+ cmd.cmd_common.header.sub_type = DMUB_CMD__LSDMA_PIO_CONSTFILL;
+ wait_type = DM_DMUB_WAIT_TYPE_NO_WAIT;
+
+ lsdma_data->u.pio_constfill_data.packet.fields.constant_fill = 1;
+ lsdma_data->u.pio_constfill_data.packet.fields.byte_count = byte_count;
+ lsdma_data->u.pio_constfill_data.dst_lo = dst_addr & 0xFFFFFFFF;
+ lsdma_data->u.pio_constfill_data.dst_hi = (dst_addr >> 32) & 0xFFFFFFFF;
+ lsdma_data->u.pio_constfill_data.data = data;
+
+ result = dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, wait_type);
+
+ if (!result)
+ DC_ERROR("LSDMA PIO Constfill failed in DMUB");
+
+ return result;
+}
+
+bool dmub_lsdma_send_poll_reg_write_command(struct dc_dmub_srv *dc_dmub_srv, uint32_t reg_addr, uint32_t reg_data)
+{
+ struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+ union dmub_rb_cmd cmd;
+ enum dm_dmub_wait_type wait_type;
+ struct dmub_cmd_lsdma_data *lsdma_data = &cmd.lsdma.lsdma_data;
+ bool result;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.cmd_common.header.type = DMUB_CMD__LSDMA;
+ cmd.cmd_common.header.sub_type = DMUB_CMD__LSDMA_POLL_REG_WRITE;
+ wait_type = DM_DMUB_WAIT_TYPE_NO_WAIT;
+
+ lsdma_data->u.reg_write_data.reg_addr = reg_addr;
+ lsdma_data->u.reg_write_data.reg_data = reg_data;
+
+ result = dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, wait_type);
+
+ if (!result)
+ DC_ERROR("LSDMA Poll Reg failed in DMUB");
+
+ return result;
+}
+
+bool dc_dmub_srv_is_cursor_offload_enabled(const struct dc *dc)
+{
+ return dc->ctx->dmub_srv && dc->ctx->dmub_srv->cursor_offload_enabled;
+}
+
+void dc_dmub_srv_release_hw(const struct dc *dc)
+{
+ struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv;
+ union dmub_rb_cmd cmd = {0};
+
+ if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+ return;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.idle_opt_notify_idle.header.type = DMUB_CMD__IDLE_OPT;
+ cmd.idle_opt_notify_idle.header.sub_type = DMUB_CMD__IDLE_OPT_RELEASE_HW;
+ cmd.idle_opt_notify_idle.header.payload_bytes =
+ sizeof(cmd.idle_opt_notify_idle) -
+ sizeof(cmd.idle_opt_notify_idle.header);
+
+ dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+void dc_dmub_srv_log_preos_dmcub_info(struct dc_dmub_srv *dc_dmub_srv)
+{
+ struct dmub_srv *dmub;
+
+ if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+ return;
+
+ dmub = dc_dmub_srv->dmub;
+
+ if (dmub_srv_get_preos_info(dmub)) {
+ DC_LOG_DEBUG("%s: PreOS DMCUB Info", __func__);
+ DC_LOG_DEBUG("fw_version : 0x%08x", dmub->preos_info.fw_version);
+ DC_LOG_DEBUG("boot_options : 0x%08x", dmub->preos_info.boot_options);
+ DC_LOG_DEBUG("boot_status : 0x%08x", dmub->preos_info.boot_status);
+ DC_LOG_DEBUG("trace_buffer_phy_addr : 0x%016llx", dmub->preos_info.trace_buffer_phy_addr);
+ DC_LOG_DEBUG("trace_buffer_size_bytes : 0x%08x", dmub->preos_info.trace_buffer_size);
+ DC_LOG_DEBUG("fb_base : 0x%016llx", dmub->preos_info.fb_base);
+ DC_LOG_DEBUG("fb_offset : 0x%016llx", dmub->preos_info.fb_offset);
+ }
}
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
index 3e35eee7188c..72e0a41f39f0 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
@@ -26,11 +26,16 @@
#ifndef _DMUB_DC_SRV_H_
#define _DMUB_DC_SRV_H_
-#include "os_types.h"
+#include "dm_services_types.h"
#include "dmub/dmub_srv.h"
struct dmub_srv;
struct dc;
+struct pipe_ctx;
+struct dc_crtc_timing_adjust;
+struct dc_crtc_timing;
+struct dc_state;
+struct dc_surface_update;
struct dc_reg_helper_state {
bool gather_in_progress;
@@ -46,21 +51,32 @@ struct dc_dmub_srv {
struct dc_context *ctx;
void *dm;
+
+ int32_t idle_exit_counter;
+ union dmub_shared_state_ips_driver_signals driver_signals;
+ bool idle_allowed;
+ bool needs_idle_wake;
+ bool cursor_offload_enabled;
};
-void dc_dmub_srv_cmd_queue(struct dc_dmub_srv *dc_dmub_srv,
- union dmub_rb_cmd *cmd);
+bool dc_dmub_srv_wait_for_pending(struct dc_dmub_srv *dc_dmub_srv);
+
+bool dc_dmub_srv_optimized_init_done(struct dc_dmub_srv *dc_dmub_srv);
-void dc_dmub_srv_cmd_execute(struct dc_dmub_srv *dc_dmub_srv);
+bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
+ unsigned int count,
+ union dmub_rb_cmd *cmd_list);
-void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv);
+bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv,
+ enum dm_dmub_wait_type wait_type,
+ union dmub_rb_cmd *cmd_list);
-void dc_dmub_srv_wait_phy_init(struct dc_dmub_srv *dc_dmub_srv);
+bool dc_dmub_srv_cmd_run(struct dc_dmub_srv *dc_dmub_srv, union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type);
-bool dc_dmub_srv_cmd_with_reply_data(struct dc_dmub_srv *dc_dmub_srv, union dmub_rb_cmd *cmd);
+bool dc_dmub_srv_cmd_run_list(struct dc_dmub_srv *dc_dmub_srv, unsigned int count, union dmub_rb_cmd *cmd_list, enum dm_dmub_wait_type wait_type);
bool dc_dmub_srv_notify_stream_mask(struct dc_dmub_srv *dc_dmub_srv,
- unsigned int stream_mask);
+ unsigned int stream_mask);
bool dc_dmub_srv_is_restore_required(struct dc_dmub_srv *dc_dmub_srv);
@@ -68,10 +84,294 @@ bool dc_dmub_srv_get_dmub_outbox0_msg(const struct dc *dc, struct dmcub_trace_bu
void dc_dmub_trace_event_control(struct dc *dc, bool enable);
+void dc_dmub_srv_drr_update_cmd(struct dc *dc, uint32_t tg_inst, uint32_t vtotal_min, uint32_t vtotal_max);
+
+void dc_dmub_srv_set_drr_manual_trigger_cmd(struct dc *dc, uint32_t tg_inst);
+bool dc_dmub_srv_p_state_delegate(struct dc *dc, bool enable_pstate, struct dc_state *context);
+
+void dc_dmub_srv_query_caps_cmd(struct dc_dmub_srv *dc_dmub_srv);
+void dc_dmub_srv_get_visual_confirm_color_cmd(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dc_dmub_srv_clear_inbox0_ack(struct dc_dmub_srv *dmub_srv);
+void dc_dmub_srv_wait_for_inbox0_ack(struct dc_dmub_srv *dmub_srv);
void dc_dmub_srv_send_inbox0_cmd(struct dc_dmub_srv *dmub_srv, union dmub_inbox0_data_register data);
-bool dc_dmub_srv_get_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv, struct dmub_diagnostic_data *dmub_oca);
+bool dc_dmub_srv_get_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv);
+void dc_dmub_setup_subvp_dmub_command(struct dc *dc, struct dc_state *context, bool enable);
void dc_dmub_srv_log_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv);
+void dc_send_update_cursor_info_to_dmu(struct pipe_ctx *pCtx, uint8_t pipe_idx);
+bool dc_dmub_check_min_version(struct dmub_srv *srv);
+
+void dc_dmub_srv_enable_dpia_trace(const struct dc *dc);
+void dc_dmub_srv_subvp_save_surf_addr(const struct dc_dmub_srv *dc_dmub_srv, const struct dc_plane_address *addr, uint8_t subvp_index);
+
+bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait);
+
+void dc_dmub_srv_apply_idle_power_optimizations(const struct dc *dc, bool allow_idle);
+
+/**
+ * dc_dmub_srv_set_power_state() - Sets the power state for DMUB service.
+ *
+ * Controls whether messaging the DMCUB or interfacing with it via HW register
+ * interaction is permittable.
+ *
+ * @dc_dmub_srv - The DC DMUB service pointer
+ * @power_state - the DC power state
+ */
+void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state power_state);
+
+/**
+ * dc_dmub_srv_notify_fw_dc_power_state() - Notifies firmware of the DC power state.
+ *
+ * Differs from dc_dmub_srv_set_power_state in that it needs to access HW in order
+ * to message DMCUB of the state transition. Should come after the D0 exit and
+ * before D3 set power state.
+ *
+ * @dc_dmub_srv - The DC DMUB service pointer
+ * @power_state - the DC power state
+ */
+void dc_dmub_srv_notify_fw_dc_power_state(struct dc_dmub_srv *dc_dmub_srv,
+ enum dc_acpi_cm_power_state power_state);
+
+/**
+ * @dc_dmub_srv_should_detect() - Checks if link detection is required.
+ *
+ * While in idle power states we may need driver to manually redetect in
+ * the case of a missing hotplug. Should be called from a polling timer.
+ *
+ * Return: true if redetection is required.
+ */
+bool dc_dmub_srv_should_detect(struct dc_dmub_srv *dc_dmub_srv);
+
+/**
+ * dc_wake_and_execute_dmub_cmd() - Wrapper for DMUB command execution.
+ *
+ * Refer to dc_wake_and_execute_dmub_cmd_list() for usage and limitations,
+ * This function is a convenience wrapper for a single command execution.
+ *
+ * @ctx: DC context
+ * @cmd: The command to send/receive
+ * @wait_type: The wait behavior for the execution
+ *
+ * Return: true on command submission success, false otherwise
+ */
+bool dc_wake_and_execute_dmub_cmd(const struct dc_context *ctx, union dmub_rb_cmd *cmd,
+ enum dm_dmub_wait_type wait_type);
+
+/**
+ * dc_wake_and_execute_dmub_cmd_list() - Wrapper for DMUB command list execution.
+ *
+ * If the DMCUB hardware was asleep then it wakes the DMUB before
+ * executing the command and attempts to re-enter if the command
+ * submission was successful.
+ *
+ * This should be the preferred command submission interface provided
+ * the DC lock is acquired.
+ *
+ * Entry/exit out of idle power optimizations would need to be
+ * manually performed otherwise through dc_allow_idle_optimizations().
+ *
+ * @ctx: DC context
+ * @count: Number of commands to send/receive
+ * @cmd: Array of commands to send
+ * @wait_type: The wait behavior for the execution
+ *
+ * Return: true on command submission success, false otherwise
+ */
+bool dc_wake_and_execute_dmub_cmd_list(const struct dc_context *ctx, unsigned int count,
+ union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type);
+
+/**
+ * dc_wake_and_execute_gpint()
+ *
+ * @ctx: DC context
+ * @command_code: The command ID to send to DMCUB
+ * @param: The parameter to message DMCUB
+ * @response: Optional response out value - may be NULL.
+ * @wait_type: The wait behavior for the execution
+ */
+bool dc_wake_and_execute_gpint(const struct dc_context *ctx, enum dmub_gpint_command command_code,
+ uint16_t param, uint32_t *response, enum dm_dmub_wait_type wait_type);
+
+void dc_dmub_srv_fams2_update_config(struct dc *dc,
+ struct dc_state *context,
+ bool enable);
+void dc_dmub_srv_fams2_drr_update(struct dc *dc,
+ uint32_t tg_inst,
+ uint32_t vtotal_min,
+ uint32_t vtotal_max,
+ uint32_t vtotal_mid,
+ uint32_t vtotal_mid_frame_num,
+ bool program_manual_trigger);
+void dc_dmub_srv_fams2_passthrough_flip(
+ struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *stream,
+ struct dc_surface_update *srf_updates,
+ int surface_count);
+
+bool dmub_lsdma_init(struct dc_dmub_srv *dc_dmub_srv);
+bool dmub_lsdma_send_linear_copy_command(
+ struct dc_dmub_srv *dc_dmub_srv,
+ uint64_t src_addr,
+ uint64_t dst_addr,
+ uint32_t count);
+
+struct lsdma_linear_sub_window_copy_params {
+ uint32_t src_lo;
+ uint32_t src_hi;
+
+ uint32_t dst_lo;
+ uint32_t dst_hi;
+
+ uint32_t src_x : 16;
+ uint32_t src_y : 16;
+
+ uint32_t dst_x : 16;
+ uint32_t dst_y : 16;
+
+ uint32_t rect_x : 16;
+ uint32_t rect_y : 16;
+
+ uint32_t src_pitch : 16;
+ uint32_t dst_pitch : 16;
+
+ uint32_t src_slice_pitch;
+ uint32_t dst_slice_pitch;
+
+ uint32_t tmz : 1;
+ uint32_t element_size : 3;
+ uint32_t src_cache_policy : 3;
+ uint32_t dst_cache_policy : 3;
+ uint32_t padding : 22;
+};
+
+bool dmub_lsdma_send_linear_sub_window_copy_command(
+ struct dc_dmub_srv *dc_dmub_srv,
+ struct lsdma_linear_sub_window_copy_params copy_data
+);
+bool dmub_lsdma_send_pio_copy_command(
+ struct dc_dmub_srv *dc_dmub_srv,
+ uint64_t src_addr,
+ uint64_t dst_addr,
+ uint32_t byte_count,
+ uint32_t overlap_disable);
+bool dmub_lsdma_send_pio_constfill_command(
+ struct dc_dmub_srv *dc_dmub_srv,
+ uint64_t dst_addr,
+ uint32_t byte_count,
+ uint32_t data);
+
+struct lsdma_send_tiled_to_tiled_copy_command_params {
+ uint64_t src_addr;
+ uint64_t dst_addr;
+
+ uint32_t src_x : 16;
+ uint32_t src_y : 16;
+
+ uint32_t dst_x : 16;
+ uint32_t dst_y : 16;
+
+ uint32_t src_width : 16;
+ uint32_t dst_width : 16;
+
+ uint32_t rect_x : 16;
+ uint32_t rect_y : 16;
+
+ uint32_t src_height : 16;
+ uint32_t dst_height : 16;
+
+ uint32_t data_format : 6;
+ uint32_t swizzle_mode : 5;
+ uint32_t element_size : 3;
+ uint32_t dcc : 1;
+ uint32_t tmz : 1;
+ uint32_t read_compress : 2;
+ uint32_t write_compress : 2;
+ uint32_t max_com : 2;
+ uint32_t max_uncom : 1;
+ uint32_t padding : 9;
+};
+
+bool dmub_lsdma_send_tiled_to_tiled_copy_command(
+ struct dc_dmub_srv *dc_dmub_srv,
+ struct lsdma_send_tiled_to_tiled_copy_command_params params);
+bool dmub_lsdma_send_poll_reg_write_command(struct dc_dmub_srv *dc_dmub_srv, uint32_t reg_addr, uint32_t reg_data);
+
+/**
+ * struct ips_residency_info - struct containing info from dmub_ips_residency_stats
+ *
+ * @ips_mode: The mode of IPS that the follow stats appertain to
+ * @residency_percent: The percentage of time spent in given IPS mode in millipercent
+ * @entry_counter: The number of entries made in to this IPS state
+ * @total_active_time_us: uint32_t array of length 2 representing time in the given IPS mode
+ * in microseconds. Index 0 is lower 32 bits, index 1 is upper 32 bits.
+ * @total_inactive_time_us: uint32_t array of length 2 representing time outside the given IPS mode
+ * in microseconds. Index 0 is lower 32 bits, index 1 is upper 32 bits.
+ * @histogram: Histogram of given IPS state durations - bucket definitions in dmub_ips.c
+ */
+struct ips_residency_info {
+ enum ips_residency_mode ips_mode;
+ unsigned int residency_percent;
+ unsigned int entry_counter;
+ unsigned int total_active_time_us[2];
+ unsigned int total_inactive_time_us[2];
+ unsigned int histogram[16];
+};
+
+bool dc_dmub_srv_ips_residency_cntl(const struct dc_context *ctx, uint8_t panel_inst, bool start_measurement);
+
+bool dc_dmub_srv_ips_query_residency_info(const struct dc_context *ctx, uint8_t panel_inst,
+ struct dmub_ips_residency_info *driver_info,
+ enum ips_residency_mode ips_mode);
+
+/**
+ * dc_dmub_srv_cursor_offload_init() - Enables or disables cursor offloading for a stream.
+ *
+ * @dc: pointer to DC object
+ */
+void dc_dmub_srv_cursor_offload_init(struct dc *dc);
+
+/**
+ * dc_dmub_srv_control_cursor_offload() - Enables or disables cursor offloading for a stream.
+ *
+ * @dc: pointer to DC object
+ * @context: the DC context to reference for pipe allocations
+ * @stream: the stream to control
+ * @enable: true to enable cursor offload, false to disable
+ */
+void dc_dmub_srv_control_cursor_offload(struct dc *dc, struct dc_state *context,
+ const struct dc_stream_state *stream, bool enable);
+
+/**
+ * dc_dmub_srv_program_cursor_now() - Requests immediate cursor programming for a given pipe.
+ *
+ * @dc: pointer to DC object
+ * @pipe: top-most pipe for a stream.
+ */
+void dc_dmub_srv_program_cursor_now(struct dc *dc, const struct pipe_ctx *pipe);
+
+/**
+ * dc_dmub_srv_is_cursor_offload_enabled() - Checks if cursor offload is supported.
+ *
+ * @dc: pointer to DC object
+ *
+ * Return: true if cursor offload is supported, false otherwise
+ */
+bool dc_dmub_srv_is_cursor_offload_enabled(const struct dc *dc);
+
+/**
+ * dc_dmub_srv_release_hw() - Notifies DMUB service that HW access is no longer required.
+ *
+ * @dc - pointer to DC object
+ */
+void dc_dmub_srv_release_hw(const struct dc *dc);
+
+/**
+ * dc_dmub_srv_log_preos_dmcub_info() - Logs preos dmcub fw info.
+ *
+ * @dc - pointer to DC object
+ */
+void dc_dmub_srv_log_preos_dmcub_info(struct dc_dmub_srv *dc_dmub_srv);
#endif /* _DMUB_DC_SRV_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
index e68e9a86a4d9..79e1696def63 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
@@ -27,6 +27,7 @@
#define DC_DP_TYPES_H
#include "os_types.h"
+#include "dc_ddc_types.h"
enum dc_lane_count {
LANE_COUNT_UNKNOWN = 0,
@@ -46,24 +47,21 @@ enum dc_lane_count {
*/
enum dc_link_rate {
LINK_RATE_UNKNOWN = 0,
- LINK_RATE_LOW = 0x06, // Rate_1 (RBR) - 1.62 Gbps/Lane
- LINK_RATE_RATE_2 = 0x08, // Rate_2 - 2.16 Gbps/Lane
- LINK_RATE_RATE_3 = 0x09, // Rate_3 - 2.43 Gbps/Lane
- LINK_RATE_HIGH = 0x0A, // Rate_4 (HBR) - 2.70 Gbps/Lane
- LINK_RATE_RBR2 = 0x0C, // Rate_5 (RBR2)- 3.24 Gbps/Lane
- LINK_RATE_RATE_6 = 0x10, // Rate_6 - 4.32 Gbps/Lane
- LINK_RATE_HIGH2 = 0x14, // Rate_7 (HBR2)- 5.40 Gbps/Lane
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- LINK_RATE_HIGH3 = 0x1E, // Rate_8 (HBR3)- 8.10 Gbps/Lane
+ LINK_RATE_LOW = 0x06, // Rate_1 (RBR) - 1.62 Gbps/Lane
+ LINK_RATE_RATE_2 = 0x08, // Rate_2 - 2.16 Gbps/Lane
+ LINK_RATE_RATE_3 = 0x09, // Rate_3 - 2.43 Gbps/Lane
+ LINK_RATE_HIGH = 0x0A, // Rate_4 (HBR) - 2.70 Gbps/Lane
+ LINK_RATE_RBR2 = 0x0C, // Rate_5 (RBR2) - 3.24 Gbps/Lane
+ LINK_RATE_RATE_6 = 0x10, // Rate_6 - 4.32 Gbps/Lane
+ LINK_RATE_HIGH2 = 0x14, // Rate_7 (HBR2) - 5.40 Gbps/Lane
+ LINK_RATE_RATE_8 = 0x19, // Rate_8 - 6.75 Gbps/Lane
+ LINK_RATE_HIGH3 = 0x1E, // Rate_9 (HBR3) - 8.10 Gbps/Lane
/* Starting from DP2.0 link rate enum directly represents actual
* link rate value in unit of 10 mbps
*/
LINK_RATE_UHBR10 = 1000, // UHBR10 - 10.0 Gbps/Lane
LINK_RATE_UHBR13_5 = 1350, // UHBR13.5 - 13.5 Gbps/Lane
- LINK_RATE_UHBR20 = 2000, // UHBR10 - 20.0 Gbps/Lane
-#else
- LINK_RATE_HIGH3 = 0x1E // Rate_8 (HBR3)- 8.10 Gbps/Lane
-#endif
+ LINK_RATE_UHBR20 = 2000, // UHBR20 - 20.0 Gbps/Lane
};
enum dc_link_spread {
@@ -100,7 +98,6 @@ enum dc_post_cursor2 {
POST_CURSOR2_MAX_LEVEL = POST_CURSOR2_LEVEL3,
};
-#if defined(CONFIG_DRM_AMD_DC_DCN)
enum dc_dp_ffe_preset_level {
DP_FFE_PRESET_LEVEL0 = 0,
DP_FFE_PRESET_LEVEL1,
@@ -120,7 +117,6 @@ enum dc_dp_ffe_preset_level {
DP_FFE_PRESET_LEVEL15,
DP_FFE_PRESET_MAX_LEVEL = DP_FFE_PRESET_LEVEL15,
};
-#endif
enum dc_dp_training_pattern {
DP_TRAINING_PATTERN_SEQUENCE_1 = 0,
@@ -128,19 +124,31 @@ enum dc_dp_training_pattern {
DP_TRAINING_PATTERN_SEQUENCE_3,
DP_TRAINING_PATTERN_SEQUENCE_4,
DP_TRAINING_PATTERN_VIDEOIDLE,
-#if defined(CONFIG_DRM_AMD_DC_DCN)
DP_128b_132b_TPS1,
DP_128b_132b_TPS2,
DP_128b_132b_TPS2_CDS,
-#endif
};
enum dp_link_encoding {
DP_UNKNOWN_ENCODING = 0,
DP_8b_10b_ENCODING = 1,
-#if defined(CONFIG_DRM_AMD_DC_DCN)
DP_128b_132b_ENCODING = 2,
-#endif
+};
+
+enum dp_test_link_rate {
+ DP_TEST_LINK_RATE_RBR = 0x06,
+ DP_TEST_LINK_RATE_RATE_2 = 0x08, // Rate_2 - 2.16 Gbps/Lane
+ DP_TEST_LINK_RATE_RATE_3 = 0x09, // Rate_3 - 2.43 Gbps/Lane
+ DP_TEST_LINK_RATE_HBR = 0x0A,
+ DP_TEST_LINK_RATE_RBR2 = 0x0C, // Rate_5 (RBR2) - 3.24 Gbps/Lane
+ DP_TEST_LINK_RATE_RATE_6 = 0x10, // Rate_6 - 4.32 Gbps/Lane
+ DP_TEST_LINK_RATE_HBR2 = 0x14,
+ DP_TEST_LINK_RATE_RATE_8 = 0x19, // Rate_8 - 6.75 Gbps/Lane
+ DP_TEST_LINK_RATE_HBR3 = 0x1E,
+ DP_TEST_LINK_RATE_UHBR10 = 0x01,
+ DP_TEST_LINK_RATE_UHBR20 = 0x02,
+ DP_TEST_LINK_RATE_UHBR13_5_LEGACY = 0x03, /* For backward compatibility*/
+ DP_TEST_LINK_RATE_UHBR13_5 = 0x04,
};
struct dc_link_settings {
@@ -149,10 +157,18 @@ struct dc_link_settings {
enum dc_link_spread link_spread;
bool use_link_rate_set;
uint8_t link_rate_set;
- bool dpcd_source_device_specific_field_support;
};
-#if defined(CONFIG_DRM_AMD_DC_DCN)
+struct dc_tunnel_settings {
+ bool should_enable_dp_tunneling;
+ bool should_use_dp_bw_allocation;
+ uint8_t cm_id;
+ uint8_t group_id;
+ uint32_t bw_granularity;
+ uint32_t estimated_bw;
+ uint32_t allocated_bw;
+};
+
union dc_dp_ffe_preset {
struct {
uint8_t level : 4;
@@ -163,24 +179,19 @@ union dc_dp_ffe_preset {
} settings;
uint8_t raw;
};
-#endif
struct dc_lane_settings {
enum dc_voltage_swing VOLTAGE_SWING;
enum dc_pre_emphasis PRE_EMPHASIS;
enum dc_post_cursor2 POST_CURSOR2;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
union dc_dp_ffe_preset FFE_PRESET;
-#endif
};
struct dc_link_training_overrides {
enum dc_voltage_swing *voltage_swing;
enum dc_pre_emphasis *pre_emphasis;
enum dc_post_cursor2 *post_cursor2;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
union dc_dp_ffe_preset *ffe_preset;
-#endif
uint16_t *cr_pattern_time;
uint16_t *eq_pattern_time;
@@ -194,7 +205,6 @@ struct dc_link_training_overrides {
bool *fec_enable;
};
-#if defined(CONFIG_DRM_AMD_DC_DCN)
union payload_table_update_status {
struct {
uint8_t VC_PAYLOAD_TABLE_UPDATED:1;
@@ -202,7 +212,6 @@ union payload_table_update_status {
} bits;
uint8_t raw;
};
-#endif
union dpcd_rev {
struct {
@@ -291,32 +300,39 @@ union lane_align_status_updated {
struct {
uint8_t INTERLANE_ALIGN_DONE:1;
uint8_t POST_LT_ADJ_REQ_IN_PROGRESS:1;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
uint8_t EQ_INTERLANE_ALIGN_DONE_128b_132b:1;
uint8_t CDS_INTERLANE_ALIGN_DONE_128b_132b:1;
uint8_t LT_FAILED_128b_132b:1;
uint8_t RESERVED:1;
-#else
- uint8_t RESERVED:4;
-#endif
uint8_t DOWNSTREAM_PORT_STATUS_CHANGED:1;
uint8_t LINK_STATUS_UPDATED:1;
} bits;
uint8_t raw;
};
+union link_service_irq_vector_esi0 {
+ struct {
+ uint8_t DP_LINK_RX_CAP_CHANGED:1;
+ uint8_t DP_LINK_STATUS_CHANGED:1;
+ uint8_t DP_LINK_STREAM_STATUS_CHANGED:1;
+ uint8_t DP_LINK_HDMI_LINK_STATUS_CHANGED:1;
+ uint8_t DP_LINK_CONNECTED_OFF_ENTRY_REQUESTED:1;
+ uint8_t DP_LINK_TUNNELING_IRQ:1;
+ uint8_t reserved:2;
+ } bits;
+ uint8_t raw;
+};
+
union lane_adjust {
struct {
uint8_t VOLTAGE_SWING_LANE:2;
uint8_t PRE_EMPHASIS_LANE:2;
uint8_t RESERVED:4;
} bits;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
struct {
uint8_t PRESET_VALUE :4;
uint8_t RESERVED :4;
} tx_ffe;
-#endif
uint8_t raw;
};
@@ -346,12 +362,10 @@ union dpcd_training_lane {
uint8_t MAX_PRE_EMPHASIS_REACHED:1;
uint8_t RESERVED:2;
} bits;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
struct {
uint8_t PRESET_VALUE :4;
uint8_t RESERVED :4;
} tx_ffe;
-#endif
uint8_t raw;
};
@@ -378,7 +392,10 @@ enum dpcd_downstream_port_detailed_type {
union dwnstream_port_caps_byte2 {
struct {
uint8_t MAX_BITS_PER_COLOR_COMPONENT:2;
- uint8_t RESERVED:6;
+ uint8_t MAX_ENCODED_LINK_BW_SUPPORT:3;
+ uint8_t SOURCE_CONTROL_MODE_SUPPORT:1;
+ uint8_t CONCURRENT_LINK_BRING_UP_SEQ_SUPPORT:1;
+ uint8_t RESERVED:1;
} bits;
uint8_t raw;
};
@@ -416,6 +433,41 @@ union dwnstream_port_caps_byte3_hdmi {
uint8_t raw;
};
+union hdmi_encoded_link_bw {
+ struct {
+ uint8_t FRL_MODE:1; // Bit 0
+ uint8_t BW_9Gbps:1;
+ uint8_t BW_18Gbps:1;
+ uint8_t BW_24Gbps:1;
+ uint8_t BW_32Gbps:1;
+ uint8_t BW_40Gbps:1;
+ uint8_t BW_48Gbps:1;
+ uint8_t FRL_LINK_TRAINING_FINISHED:1; // Bit 7
+ } bits;
+ uint8_t raw;
+};
+
+union hdmi_tx_link_status {
+ struct {
+ uint8_t HDMI_TX_LINK_ACTIVE_STATUS:1;
+ uint8_t HDMI_TX_READY_STATUS:1;
+ uint8_t RESERVED:6;
+ } bits;
+ uint8_t raw;
+};
+
+union autonomous_mode_and_frl_link_status {
+ struct {
+ uint8_t FRL_LT_IN_PROGRESS_STATUS:1;
+ uint8_t FRL_LT_LINK_CONFIG_IN_PROGRESS:3;
+ uint8_t RESERVED:1;
+ uint8_t FALLBACK_POLICY:1;
+ uint8_t FALLBACK_POLICY_VALID:1;
+ uint8_t REGULATED_AUTONOMOUS_MODE_SUPPORTED:1;
+ } bits;
+ uint8_t raw;
+};
+
/*4-byte structure for detailed capabilities of a down-stream port
(DP-to-TMDS converter).*/
union dwnstream_portxcaps {
@@ -454,8 +506,10 @@ union sink_status {
uint8_t raw;
};
-/*6-byte structure corresponding to 6 registers (200h-205h)
-read during handling of HPD-IRQ*/
+/* 7-byte structure corresponding to 6 registers (200h-205h)
+ * and LINK_SERVICE_IRQ_ESI0 (2005h) for tunneling IRQ
+ * read during handling of HPD-IRQ
+ */
union hpd_irq_data {
struct {
union sink_count sink_cnt;/* 200h */
@@ -463,9 +517,10 @@ union hpd_irq_data {
union lane_status lane01_status;/* 202h */
union lane_status lane23_status;/* 203h */
union lane_align_status_updated lane_status_updated;/* 204h */
- union sink_status sink_status;
+ union sink_status sink_status;/* 205h */
+ union link_service_irq_vector_esi0 link_service_irq_esi0;/* 2005h */
} bytes;
- uint8_t raw[6];
+ uint8_t raw[7];
};
union down_stream_port_count {
@@ -494,7 +549,11 @@ union down_spread_ctrl {
1 = Main link signal is downspread <= 0.5%
with frequency in the range of 30kHz ~ 33kHz*/
uint8_t SPREAD_AMP:1;
- uint8_t RESERVED2:2;/*Bit 6:5 = RESERVED. Read all 0s*/
+ uint8_t RESERVED2:1;/*Bit 5 = RESERVED. Read all 0s*/
+ /* Bit 6 = FIXED_VTOTAL_AS_SDP_EN_IN_PR_ACTIVE.
+ 0 = FIXED_VTOTAL_AS_SDP_EN_IN_PR_ACTIVE is not enabled by the Source device (default)
+ 1 = FIXED_VTOTAL_AS_SDP_EN_IN_PR_ACTIVE is enabled by Source device */
+ uint8_t FIXED_VTOTAL_AS_SDP_EN_IN_PR_ACTIVE:1;
/*Bit 7 = MSA_TIMING_PAR_IGNORE_EN
0 = Source device will send valid data for the MSA Timing Params
1 = Source device may send invalid data for these MSA Timing Params*/
@@ -552,6 +611,12 @@ struct dpcd_amd_device_id {
uint8_t dal_version_byte2;
};
+struct target_luminance_value {
+ uint8_t byte0;
+ uint8_t byte1;
+ uint8_t byte2;
+};
+
struct dpcd_source_backlight_set {
struct {
uint8_t byte0;
@@ -615,7 +680,7 @@ union test_request {
uint8_t LINK_TEST_PATTRN :1;
uint8_t EDID_READ :1;
uint8_t PHY_TEST_PATTERN :1;
- uint8_t RESERVED :1;
+ uint8_t PHY_TEST_CHANNEL_CODING_TYPE :2;
uint8_t AUDIO_TEST_PATTERN :1;
uint8_t TEST_AUDIO_DISABLED_VIDEO :1;
} bits;
@@ -634,18 +699,9 @@ union test_response {
union phy_test_pattern {
struct {
-#if defined(CONFIG_DRM_AMD_DC_DCN)
/* This field is 7 bits for DP2.0 */
uint8_t PATTERN :7;
uint8_t RESERVED :1;
-#else
- /* DpcdPhyTestPatterns. This field is 2 bits for DP1.1
- * and 3 bits for DP1.2.
- */
- uint8_t PATTERN :3;
- /* BY speci, bit7:2 is 0 for DP1.1. */
- uint8_t RESERVED :5;
-#endif
} bits;
uint8_t raw;
};
@@ -723,14 +779,10 @@ union dpcd_fec_capability {
uint8_t UNCORRECTED_BLOCK_ERROR_COUNT_CAPABLE:1;
uint8_t CORRECTED_BLOCK_ERROR_COUNT_CAPABLE:1;
uint8_t BIT_ERROR_COUNT_CAPABLE:1;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
uint8_t PARITY_BLOCK_ERROR_COUNT_CAPABLE:1;
uint8_t ARITY_BIT_ERROR_COUNT_CAPABLE:1;
uint8_t FEC_RUNNING_INDICATOR_SUPPORTED:1;
uint8_t FEC_ERROR_REPORTING_POLICY_SUPPORTED:1;
-#else
- uint8_t RESERVED:4;
-#endif
} bits;
uint8_t raw;
};
@@ -852,6 +904,30 @@ struct psr_caps {
unsigned char psr_version;
unsigned int psr_rfb_setup_time;
bool psr_exit_link_training_required;
+ unsigned char edp_revision;
+ unsigned char support_ver;
+ bool su_granularity_required;
+ bool y_coordinate_required;
+ uint8_t su_y_granularity;
+ bool alpm_cap;
+ bool standby_support;
+ uint8_t rate_control_caps;
+ unsigned int psr_power_opt_flag;
+};
+
+union dpcd_dprx_feature_enumeration_list_cont_1 {
+ struct {
+ uint8_t ADAPTIVE_SYNC_SDP_SUPPORT:1;
+ uint8_t AS_SDP_FIRST_HALF_LINE_OR_3840_PIXEL_CYCLE_WINDOW_NOT_SUPPORTED: 1;
+ uint8_t RESERVED0: 2;
+ uint8_t VSC_EXT_SDP_VER1_SUPPORT: 1;
+ uint8_t RESERVED1: 3;
+ } bits;
+ uint8_t raw;
+};
+
+struct adaptive_sync_caps {
+ union dpcd_dprx_feature_enumeration_list_cont_1 dp_adap_sync_caps;
};
/* Length of router topology ID read from DPCD in bytes. */
@@ -877,86 +953,34 @@ union dpia_info {
uint8_t raw;
};
+/* DPCD[0xE0020] USB4_DRIVER_BW_CAPABILITY register. */
+union usb4_driver_bw_cap {
+ struct {
+ uint8_t rsvd :7;
+ uint8_t driver_bw_alloc_support :1;
+ } bits;
+ uint8_t raw;
+};
+
+/* DPCD[0xE0021] DP_IN_ADAPTER_TUNNEL_INFORMATION register. */
+union dpia_tunnel_info {
+ struct {
+ uint8_t group_id :3;
+ uint8_t rsvd :5;
+ } bits;
+ uint8_t raw;
+};
+
/* DP Tunneling over USB4 */
struct dpcd_usb4_dp_tunneling_info {
union dp_tun_cap_support dp_tun_cap;
union dpia_info dpia_info;
+ union usb4_driver_bw_cap driver_bw_cap;
+ union dpia_tunnel_info dpia_tunnel_info;
uint8_t usb4_driver_id;
uint8_t usb4_topology_id[DPCD_USB4_TOPOLOGY_ID_LEN];
};
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-#ifndef DP_MAIN_LINK_CHANNEL_CODING_CAP
-#define DP_MAIN_LINK_CHANNEL_CODING_CAP 0x006
-#endif
-#ifndef DP_SINK_VIDEO_FALLBACK_FORMATS
-#define DP_SINK_VIDEO_FALLBACK_FORMATS 0x020
-#endif
-#ifndef DP_FEC_CAPABILITY_1
-#define DP_FEC_CAPABILITY_1 0x091
-#endif
-#ifndef DP_DFP_CAPABILITY_EXTENSION_SUPPORT
-#define DP_DFP_CAPABILITY_EXTENSION_SUPPORT 0x0A3
-#endif
-#ifndef DP_LINK_SQUARE_PATTERN
-#define DP_LINK_SQUARE_PATTERN 0x10F
-#endif
-#ifndef DP_DSC_CONFIGURATION
-#define DP_DSC_CONFIGURATION 0x161
-#endif
-#ifndef DP_PHY_SQUARE_PATTERN
-#define DP_PHY_SQUARE_PATTERN 0x249
-#endif
-#ifndef DP_128b_132b_SUPPORTED_LINK_RATES
-#define DP_128b_132b_SUPPORTED_LINK_RATES 0x2215
-#endif
-#ifndef DP_128b_132b_TRAINING_AUX_RD_INTERVAL
-#define DP_128b_132b_TRAINING_AUX_RD_INTERVAL 0x2216
-#endif
-#ifndef DP_TEST_264BIT_CUSTOM_PATTERN_7_0
-#define DP_TEST_264BIT_CUSTOM_PATTERN_7_0 0X2230
-#endif
-#ifndef DP_TEST_264BIT_CUSTOM_PATTERN_263_256
-#define DP_TEST_264BIT_CUSTOM_PATTERN_263_256 0X2250
-#endif
-#ifndef DP_DSC_SUPPORT_AND_DECODER_COUNT
-#define DP_DSC_SUPPORT_AND_DECODER_COUNT 0x2260
-#endif
-#ifndef DP_DSC_MAX_SLICE_COUNT_AND_AGGREGATION_0
-#define DP_DSC_MAX_SLICE_COUNT_AND_AGGREGATION_0 0x2270
-#endif
-#ifndef DP_DSC_DECODER_0_MAXIMUM_SLICE_COUNT_MASK
-#define DP_DSC_DECODER_0_MAXIMUM_SLICE_COUNT_MASK (1 << 0)
-#endif
-#ifndef DP_DSC_DECODER_0_AGGREGATION_SUPPORT_MASK
-#define DP_DSC_DECODER_0_AGGREGATION_SUPPORT_MASK (0b111 << 1)
-#endif
-#ifndef DP_DSC_DECODER_0_AGGREGATION_SUPPORT_SHIFT
-#define DP_DSC_DECODER_0_AGGREGATION_SUPPORT_SHIFT 1
-#endif
-#ifndef DP_DSC_DECODER_COUNT_MASK
-#define DP_DSC_DECODER_COUNT_MASK (0b111 << 5)
-#endif
-#ifndef DP_DSC_DECODER_COUNT_SHIFT
-#define DP_DSC_DECODER_COUNT_SHIFT 5
-#endif
-#ifndef DP_MAIN_LINK_CHANNEL_CODING_SET
-#define DP_MAIN_LINK_CHANNEL_CODING_SET 0x108
-#endif
-#ifndef DP_MAIN_LINK_CHANNEL_CODING_PHY_REPEATER
-#define DP_MAIN_LINK_CHANNEL_CODING_PHY_REPEATER 0xF0006
-#endif
-#ifndef DP_PHY_REPEATER_128b_132b_RATES
-#define DP_PHY_REPEATER_128b_132b_RATES 0xF0007
-#endif
-#ifndef DP_128b_132b_TRAINING_AUX_RD_INTERVAL_PHY_REPEATER1
-#define DP_128b_132b_TRAINING_AUX_RD_INTERVAL_PHY_REPEATER1 0xF0022
-#endif
-#ifndef DP_INTRA_HOP_AUX_REPLY_INDICATION
-#define DP_INTRA_HOP_AUX_REPLY_INDICATION (1 << 3)
-#endif
-/* TODO - Use DRM header to replace above once available */
-
union dp_main_line_channel_coding_cap {
struct {
uint8_t DP_8b_10b_SUPPORTED :1;
@@ -987,13 +1011,22 @@ union dp_128b_132b_supported_link_rates {
union dp_128b_132b_supported_lttpr_link_rates {
struct {
uint8_t UHBR10 :1;
- uint8_t UHBR13_5:1;
uint8_t UHBR20 :1;
+ uint8_t UHBR13_5:1;
uint8_t RESERVED:5;
} bits;
uint8_t raw;
};
+union dp_alpm_lttpr_cap {
+ struct {
+ uint8_t AUX_LESS_ALPM_SUPPORTED :1;
+ uint8_t ASSR_SUPPORTED :1;
+ uint8_t RESERVED :6;
+ } bits;
+ uint8_t raw;
+};
+
union dp_sink_video_fallback_formats {
struct {
uint8_t dp_1024x768_60Hz_24bpp_support :1;
@@ -1004,6 +1037,29 @@ union dp_sink_video_fallback_formats {
uint8_t raw;
};
+union dp_receive_port0_cap {
+ struct {
+ uint8_t RESERVED :1;
+ uint8_t LOCAL_EDID_PRESENT :1;
+ uint8_t ASSOCIATED_TO_PRECEDING_PORT:1;
+ uint8_t HBLANK_EXPANSION_CAPABLE :1;
+ uint8_t BUFFER_SIZE_UNIT :1;
+ uint8_t BUFFER_SIZE_PER_PORT :1;
+ uint8_t HBLANK_REDUCTION_CAPABLE :1;
+ uint8_t RESERVED2:1;
+ uint8_t BUFFER_SIZE:8;
+ } bits;
+ uint8_t raw[2];
+};
+
+union dpcd_max_uncompressed_pixel_rate_cap {
+ struct {
+ uint16_t max_uncompressed_pixel_rate_cap :15;
+ uint16_t valid :1;
+ } bits;
+ uint8_t raw[2];
+};
+
union dp_fec_capability1 {
struct {
uint8_t AGGREGATED_ERROR_COUNTERS_CAPABLE :1;
@@ -1012,6 +1068,16 @@ union dp_fec_capability1 {
uint8_t raw;
};
+union dp_cable_id {
+ struct {
+ uint8_t UHBR10_20_CAPABILITY :2;
+ uint8_t UHBR13_5_CAPABILITY :1;
+ uint8_t CABLE_TYPE :3;
+ uint8_t RESERVED :2;
+ } bits;
+ uint8_t raw;
+};
+
struct dp_color_depth_caps {
uint8_t support_6bpc :1;
uint8_t support_8bpc :1;
@@ -1051,6 +1117,451 @@ union dp_128b_132b_training_aux_rd_interval {
} bits;
uint8_t raw;
};
+
+union edp_alpm_caps {
+ struct {
+ uint8_t AUX_WAKE_ALPM_CAP :1;
+ uint8_t PM_STATE_2A_SUPPORT :1;
+ uint8_t AUX_LESS_ALPM_CAP :1;
+ uint8_t AUX_LESS_ALPM_ML_PHY_SLEEP_STATUS_SUPPORTED :1;
+ uint8_t RESERVED :4;
+ } bits;
+ uint8_t raw;
+};
+
+union edp_psr_dpcd_caps {
+ struct {
+ uint8_t LINK_TRAINING_ON_EXIT_NOT_REQUIRED :1;
+ uint8_t PSR_SETUP_TIME :3;
+ uint8_t Y_COORDINATE_REQUIRED :1;
+ uint8_t SU_GRANULARITY_REQUIRED :1;
+ uint8_t FRAME_SYNC_IS_NOT_NEEDED_FOR_SU :1;
+ uint8_t RESERVED :1;
+ } bits;
+ uint8_t raw;
+};
+
+struct edp_psr_info {
+ uint8_t psr_version;
+ union edp_psr_dpcd_caps psr_dpcd_caps;
+ uint8_t psr2_su_y_granularity_cap;
+ uint8_t force_psrsu_cap;
+};
+
+struct replay_info {
+ uint8_t pixel_deviation_per_line;
+ uint8_t max_deviation_line;
+};
+
+struct dprx_states {
+ bool cable_id_written;
+};
+
+union dpcd_panel_replay_capability_supported {
+ struct {
+ unsigned char PANEL_REPLAY_SUPPORT :1;
+ unsigned char SELECTIVE_UPDATE_SUPPORT :1;
+ unsigned char EARLY_TRANSPORT_SUPPORT :1;
+ unsigned char RESERVED :5;
+ } bits;
+ unsigned char raw;
+};
+
+enum dpcd_downstream_port_max_bpc {
+ DOWN_STREAM_MAX_8BPC = 0,
+ DOWN_STREAM_MAX_10BPC,
+ DOWN_STREAM_MAX_12BPC,
+ DOWN_STREAM_MAX_16BPC
+};
+
+enum link_training_offset {
+ DPRX = 0,
+ LTTPR_PHY_REPEATER1 = 1,
+ LTTPR_PHY_REPEATER2 = 2,
+ LTTPR_PHY_REPEATER3 = 3,
+ LTTPR_PHY_REPEATER4 = 4,
+ LTTPR_PHY_REPEATER5 = 5,
+ LTTPR_PHY_REPEATER6 = 6,
+ LTTPR_PHY_REPEATER7 = 7,
+ LTTPR_PHY_REPEATER8 = 8
+};
+
+#define MAX_REPEATER_CNT 8
+
+struct dc_lttpr_caps {
+ union dpcd_rev revision;
+ uint8_t mode;
+ uint8_t max_lane_count;
+ uint8_t max_link_rate;
+ uint8_t phy_repeater_cnt;
+ uint8_t max_ext_timeout;
+ union dp_main_link_channel_coding_lttpr_cap main_link_channel_coding;
+ union dp_128b_132b_supported_lttpr_link_rates supported_128b_132b_rates;
+ union dp_alpm_lttpr_cap alpm;
+ uint8_t aux_rd_interval[MAX_REPEATER_CNT - 1];
+ uint8_t lttpr_ieee_oui[3]; // Always read from closest LTTPR to host
+ uint8_t lttpr_device_id[6]; // Always read from closest LTTPR to host
+};
+
+struct dc_dongle_dfp_cap_ext {
+ bool supported;
+ uint16_t max_pixel_rate_in_mps;
+ uint16_t max_video_h_active_width;
+ uint16_t max_video_v_active_height;
+ struct dp_encoding_format_caps encoding_format_caps;
+ struct dp_color_depth_caps rgb_color_depth_caps;
+ struct dp_color_depth_caps ycbcr444_color_depth_caps;
+ struct dp_color_depth_caps ycbcr422_color_depth_caps;
+ struct dp_color_depth_caps ycbcr420_color_depth_caps;
+};
+
+struct dc_dongle_caps {
+ /* dongle type (DP converter, CV smart dongle) */
+ enum display_dongle_type dongle_type;
+ bool extendedCapValid;
+ /* If dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER,
+ indicates 'Frame Sequential-to-lllFrame Pack' conversion capability.*/
+ bool is_dp_hdmi_s3d_converter;
+ bool is_dp_hdmi_ycbcr422_pass_through;
+ bool is_dp_hdmi_ycbcr420_pass_through;
+ bool is_dp_hdmi_ycbcr422_converter;
+ bool is_dp_hdmi_ycbcr420_converter;
+ uint32_t dp_hdmi_max_bpc;
+ uint32_t dp_hdmi_max_pixel_clk_in_khz;
+ uint32_t dp_hdmi_frl_max_link_bw_in_kbps;
+ uint32_t dp_hdmi_regulated_autonomous_mode_support;
+ struct dc_dongle_dfp_cap_ext dfp_cap_ext;
+};
+
+struct dpcd_caps {
+ union dpcd_rev dpcd_rev;
+ union max_lane_count max_ln_count;
+ union max_down_spread max_down_spread;
+ union dprx_feature dprx_feature;
+
+ /* valid only for eDP v1.4 or higher*/
+ uint8_t edp_supported_link_rates_count;
+ enum dc_link_rate edp_supported_link_rates[8];
+
+ /* dongle type (DP converter, CV smart dongle) */
+ enum display_dongle_type dongle_type;
+ bool is_dongle_type_one;
+ /* branch device or sink device */
+ bool is_branch_dev;
+ /* Dongle's downstream count. */
+ union sink_count sink_count;
+ bool is_mst_capable;
+ /* If dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER,
+ indicates 'Frame Sequential-to-lllFrame Pack' conversion capability.*/
+ struct dc_dongle_caps dongle_caps;
+
+ uint32_t sink_dev_id;
+ int8_t sink_dev_id_str[6];
+ int8_t sink_hw_revision;
+ int8_t sink_fw_revision[2];
+
+ uint32_t branch_dev_id;
+ int8_t branch_dev_name[6];
+ int8_t branch_hw_revision;
+ int8_t branch_fw_revision[2];
+ int8_t branch_vendor_specific_data[4];
+
+ bool allow_invalid_MSA_timing_param;
+ bool panel_mode_edp;
+ bool dpcd_display_control_capable;
+ bool ext_receiver_cap_field_present;
+ bool set_power_state_capable_edp;
+ bool dynamic_backlight_capable_edp;
+ union dpcd_fec_capability fec_cap;
+ struct dpcd_dsc_capabilities dsc_caps;
+ struct dc_lttpr_caps lttpr_caps;
+ struct adaptive_sync_caps adaptive_sync_caps;
+ struct dpcd_usb4_dp_tunneling_info usb4_dp_tun_info;
+ union dpcd_max_uncompressed_pixel_rate_cap max_uncompressed_pixel_rate_cap;
+
+ union dp_128b_132b_supported_link_rates dp_128b_132b_supported_link_rates;
+ union dp_main_line_channel_coding_cap channel_coding_cap;
+ union dp_sink_video_fallback_formats fallback_formats;
+ union dp_fec_capability1 fec_cap1;
+ bool panel_luminance_control;
+ union dp_cable_id cable_id;
+ uint8_t edp_rev;
+ union edp_alpm_caps alpm_caps;
+ struct edp_psr_info psr_info;
+
+ struct replay_info pr_info;
+ union dpcd_panel_replay_capability_supported pr_caps_supported;
+ uint16_t edp_oled_emission_rate;
+ union dp_receive_port0_cap receive_port0_cap;
+ /* Indicates the number of SST links supported by MSO (Multi-Stream Output) */
+ uint8_t mso_cap_sst_links_supported;
+ uint8_t dp_edp_general_cap_2;
+};
+
+union dpcd_sink_ext_caps {
+ struct {
+ /* 0 - Sink supports backlight adjust via PWM during SDR/HDR mode
+ * 1 - Sink supports backlight adjust via AUX during SDR/HDR mode.
+ */
+ uint8_t sdr_aux_backlight_control : 1;
+ uint8_t hdr_aux_backlight_control : 1;
+ uint8_t reserved_1 : 2;
+ uint8_t oled : 1;
+ uint8_t reserved_2 : 1;
+ uint8_t miniled : 1;
+ uint8_t emission_output : 1;
+ } bits;
+ uint8_t raw;
+};
+
+enum dc_link_fec_state {
+ dc_link_fec_not_ready,
+ dc_link_fec_ready,
+ dc_link_fec_enabled
+};
+
+union dpcd_psr_configuration {
+ struct {
+ unsigned char ENABLE : 1;
+ unsigned char TRANSMITTER_ACTIVE_IN_PSR : 1;
+ unsigned char CRC_VERIFICATION : 1;
+ unsigned char FRAME_CAPTURE_INDICATION : 1;
+ /* For eDP 1.4, PSR v2*/
+ unsigned char LINE_CAPTURE_INDICATION : 1;
+ /* For eDP 1.4, PSR v2*/
+ unsigned char IRQ_HPD_WITH_CRC_ERROR : 1;
+ unsigned char ENABLE_PSR2 : 1;
+ unsigned char EARLY_TRANSPORT_ENABLE : 1;
+ } bits;
+ unsigned char raw;
+};
+
+union replay_enable_and_configuration {
+ struct {
+ unsigned char FREESYNC_PANEL_REPLAY_MODE :1;
+ unsigned char TIMING_DESYNC_ERROR_VERIFICATION :1;
+ unsigned char STATE_TRANSITION_ERROR_DETECTION :1;
+ unsigned char RESERVED :5;
+ } bits;
+ unsigned char raw;
+};
+
+union dpcd_replay_configuration {
+ struct {
+ unsigned char STATE_TRANSITION_ERROR_STATUS : 1;
+ unsigned char DESYNC_ERROR_STATUS : 1;
+ unsigned char SINK_DEVICE_REPLAY_STATUS : 3;
+ unsigned char SINK_FRAME_LOCKED : 2;
+ unsigned char RESERVED : 1;
+ } bits;
+ unsigned char raw;
+};
+
+union panel_replay_enable_and_configuration_1 {
+ struct {
+ unsigned char PANEL_REPLAY_ENABLE :1;
+ unsigned char PANEL_REPLAY_CRC_ENABLE :1;
+ unsigned char IRQ_HPD_ASSDP_MISSING :1;
+ unsigned char IRQ_HPD_VSCSDP_UNCORRECTABLE_ERROR :1;
+ unsigned char IRQ_HPD_RFB_ERROR :1;
+ unsigned char IRQ_HPD_ACTIVE_FRAME_CRC_ERROR :1;
+ unsigned char PANEL_REPLAY_SELECTIVE_UPDATE_ENABLE :1;
+ unsigned char PANEL_REPLAY_EARLY_TRANSPORT_ENABLE :1;
+ } bits;
+ unsigned char raw;
+};
+
+union panel_replay_enable_and_configuration_2 {
+ struct {
+ unsigned char SINK_REFRESH_RATE_UNLOCK_GRANTED :1;
+ unsigned char RESERVED :1;
+ unsigned char SU_Y_GRANULARITY_EXT_VALUE_ENABLED :1;
+ unsigned char SU_Y_GRANULARITY_EXT_VALUE :4;
+ unsigned char SU_REGION_SCAN_LINE_CAPTURE_INDICATION :1;
+ } bits;
+ unsigned char raw;
+};
+
+union dpcd_alpm_configuration {
+ struct {
+ unsigned char ENABLE : 1;
+ unsigned char IRQ_HPD_ENABLE : 1;
+ unsigned char ALPM_MODE_SEL : 1;
+ unsigned char ACDS_PERIOD_DURATION : 1;
+ unsigned char RESERVED : 4;
+ } bits;
+ unsigned char raw;
+};
+
+union dpcd_sink_active_vtotal_control_mode {
+ struct {
+ unsigned char ENABLE : 1;
+ unsigned char RESERVED : 7;
+ } bits;
+ unsigned char raw;
+};
+
+union psr_error_status {
+ struct {
+ unsigned char LINK_CRC_ERROR :1;
+ unsigned char RFB_STORAGE_ERROR :1;
+ unsigned char VSC_SDP_ERROR :1;
+ unsigned char RESERVED :5;
+ } bits;
+ unsigned char raw;
+};
+
+union psr_sink_psr_status {
+ struct {
+ unsigned char SINK_SELF_REFRESH_STATUS :3;
+ unsigned char RESERVED :5;
+ } bits;
+ unsigned char raw;
+};
+
+struct edp_trace_power_timestamps {
+ uint64_t poweroff;
+ uint64_t poweron;
+};
+
+struct dp_trace_lt_counts {
+ unsigned int total;
+ unsigned int fail;
+};
+
+enum link_training_result {
+ LINK_TRAINING_SUCCESS,
+ LINK_TRAINING_CR_FAIL_LANE0,
+ LINK_TRAINING_CR_FAIL_LANE1,
+ LINK_TRAINING_CR_FAIL_LANE23,
+ /* CR DONE bit is cleared during EQ step */
+ LINK_TRAINING_EQ_FAIL_CR,
+ /* CR DONE bit is cleared but LANE0_CR_DONE is set during EQ step */
+ LINK_TRAINING_EQ_FAIL_CR_PARTIAL,
+ /* other failure during EQ step */
+ LINK_TRAINING_EQ_FAIL_EQ,
+ LINK_TRAINING_LQA_FAIL,
+ /* one of the CR,EQ or symbol lock is dropped */
+ LINK_TRAINING_LINK_LOSS,
+ /* Abort link training (because sink unplugged) */
+ LINK_TRAINING_ABORT,
+ DP_128b_132b_LT_FAILED,
+ DP_128b_132b_MAX_LOOP_COUNT_REACHED,
+ DP_128b_132b_CHANNEL_EQ_DONE_TIMEOUT,
+ DP_128b_132b_CDS_DONE_TIMEOUT,
+};
+
+struct dp_trace_lt {
+ struct dp_trace_lt_counts counts;
+ struct dp_trace_timestamps {
+ unsigned long long start;
+ unsigned long long end;
+ } timestamps;
+ enum link_training_result result;
+ bool is_logged;
+};
+
+struct dp_trace {
+ struct dp_trace_lt detect_lt_trace;
+ struct dp_trace_lt commit_lt_trace;
+ unsigned int link_loss_count;
+ bool is_initialized;
+ struct edp_trace_power_timestamps edp_trace_power_timestamps;
+};
+
+/* TODO - This is a temporary location for any new DPCD definitions.
+ * We should move these to drm_dp header.
+ */
+#ifndef DP_LINK_SQUARE_PATTERN
+#define DP_LINK_SQUARE_PATTERN 0x10F
+#endif
+#ifndef DP_CABLE_ATTRIBUTES_UPDATED_BY_DPRX
+#define DP_CABLE_ATTRIBUTES_UPDATED_BY_DPRX 0x2217
+#endif
+#ifndef DP_CABLE_ATTRIBUTES_UPDATED_BY_DPTX
+#define DP_CABLE_ATTRIBUTES_UPDATED_BY_DPTX 0x110
+#endif
+#ifndef DPCD_MAX_UNCOMPRESSED_PIXEL_RATE_CAP
+#define DPCD_MAX_UNCOMPRESSED_PIXEL_RATE_CAP 0x221c
+#endif
+#ifndef DP_LTTPR_ALPM_CAPABILITIES
+#define DP_LTTPR_ALPM_CAPABILITIES 0xF0009
+#endif
+#ifndef DP_REGULATED_AUTONOMOUS_MODE_SUPPORTED_AND_HDMI_LINK_TRAINING_STATUS
+#define DP_REGULATED_AUTONOMOUS_MODE_SUPPORTED_AND_HDMI_LINK_TRAINING_STATUS 0x303C
+#endif
+#ifndef DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE
+#define DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE 0x50
+#endif
+#ifndef DP_TUNNELING_IRQ
+#define DP_TUNNELING_IRQ (1 << 5)
+#endif
+#ifndef DP_BRANCH_VENDOR_SPECIFIC_START
+#define DP_BRANCH_VENDOR_SPECIFIC_START 0x50C
+#endif
+#ifndef DP_LTTPR_IEEE_OUI
+#define DP_LTTPR_IEEE_OUI 0xF003D
+#endif
+#ifndef DP_LTTPR_DEVICE_ID
+#define DP_LTTPR_DEVICE_ID 0xF0040
+#endif
+/** USB4 DPCD BW Allocation Registers Chapter 10.7 **/
+#ifndef DP_TUNNELING_CAPABILITIES
+#define DP_TUNNELING_CAPABILITIES 0xE000D /* 1.4a */
+#endif
+#ifndef USB4_DRIVER_ID
+#define USB4_DRIVER_ID 0xE000F /* 1.4a */
+#endif
+#ifndef USB4_DRIVER_BW_CAPABILITY
+#define USB4_DRIVER_BW_CAPABILITY 0xE0020 /* 1.4a */
+#endif
+#ifndef DP_IN_ADAPTER_TUNNEL_INFO
+#define DP_IN_ADAPTER_TUNNEL_INFO 0xE0021 /* 1.4a */
+#endif
+#ifndef DP_BW_GRANULALITY
+#define DP_BW_GRANULALITY 0xE0022 /* 1.4a */
+#endif
+#ifndef ESTIMATED_BW
+#define ESTIMATED_BW 0xE0023 /* 1.4a */
+#endif
+#ifndef ALLOCATED_BW
+#define ALLOCATED_BW 0xE0024 /* 1.4a */
+#endif
+#ifndef DP_TUNNELING_STATUS
+#define DP_TUNNELING_STATUS 0xE0025 /* 1.4a */
+#endif
+#ifndef DP_TUNNELING_MAX_LINK_RATE
+#define DP_TUNNELING_MAX_LINK_RATE 0xE0028 /* 1.4a */
+#endif
+#ifndef DP_TUNNELING_MAX_LANE_COUNT
+#define DP_TUNNELING_MAX_LANE_COUNT 0xE0029 /* 1.4a */
+#endif
+#ifndef DPTX_BW_ALLOCATION_MODE_CONTROL
+#define DPTX_BW_ALLOCATION_MODE_CONTROL 0xE0030 /* 1.4a */
+#endif
+#ifndef REQUESTED_BW
+#define REQUESTED_BW 0xE0031 /* 1.4a */
#endif
+# ifndef DP_TUNNELING_BW_ALLOC_BITS_MASK
+# define DP_TUNNELING_BW_ALLOC_BITS_MASK (0x0F << 0)
+# endif
+# ifndef DP_TUNNELING_BW_REQUEST_FAILED
+# define DP_TUNNELING_BW_REQUEST_FAILED (1 << 0)
+# endif
+# ifndef DP_TUNNELING_BW_REQUEST_SUCCEEDED
+# define DP_TUNNELING_BW_REQUEST_SUCCEEDED (1 << 1)
+# endif
+# ifndef DP_TUNNELING_ESTIMATED_BW_CHANGED
+# define DP_TUNNELING_ESTIMATED_BW_CHANGED (1 << 2)
+# endif
+# ifndef DP_TUNNELING_BW_ALLOC_CAP_CHANGED
+# define DP_TUNNELING_BW_ALLOC_CAP_CHANGED (1 << 3)
+# endif
+# ifndef DPTX_BW_ALLOC_UNMASK_IRQ
+# define DPTX_BW_ALLOC_UNMASK_IRQ (1 << 6)
+# endif
+# ifndef DPTX_BW_ALLOC_MODE_ENABLE
+# define DPTX_BW_ALLOC_MODE_ENABLE (1 << 7)
+# endif
#endif /* DC_DP_TYPES_H */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dsc.h b/drivers/gpu/drm/amd/display/dc/dc_dsc.h
index 684713b2cff7..9d18f1c08079 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dsc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dsc.h
@@ -54,6 +54,14 @@ struct dc_dsc_policy {
bool enable_dsc_when_not_needed;
};
+struct dc_dsc_config_options {
+ uint32_t dsc_min_slice_height_override;
+ uint32_t max_target_bpp_limit_override_x16;
+ uint32_t slice_height_granularity;
+ uint32_t dsc_force_odm_hslice_override;
+ bool force_dsc_when_not_needed;
+};
+
bool dc_dsc_parse_dsc_dpcd(const struct dc *dc,
const uint8_t *dpcd_dsc_basic_data,
const uint8_t *dpcd_dsc_ext_data,
@@ -66,15 +74,16 @@ bool dc_dsc_compute_bandwidth_range(
uint32_t max_bpp_x16,
const struct dsc_dec_dpcd_caps *dsc_sink_caps,
const struct dc_crtc_timing *timing,
+ const enum dc_link_encoding_format link_encoding,
struct dc_dsc_bw_range *range);
bool dc_dsc_compute_config(
const struct display_stream_compressor *dsc,
const struct dsc_dec_dpcd_caps *dsc_sink_caps,
- uint32_t dsc_min_slice_height_override,
- uint32_t max_target_bpp_limit_override,
+ const struct dc_dsc_config_options *options,
uint32_t target_bandwidth_kbps,
const struct dc_crtc_timing *timing,
+ const enum dc_link_encoding_format link_encoding,
struct dc_dsc_config *dsc_cfg);
uint32_t dc_dsc_stream_bandwidth_in_kbps(const struct dc_crtc_timing *timing,
@@ -85,6 +94,11 @@ uint32_t dc_dsc_stream_bandwidth_overhead_in_kbps(
const int num_slices_h,
const bool is_dp);
+void dc_dsc_dump_decoder_caps(const struct display_stream_compressor *dsc,
+ const struct dsc_dec_dpcd_caps *dsc_sink_caps);
+void dc_dsc_dump_encoder_caps(const struct display_stream_compressor *dsc,
+ const struct dc_crtc_timing *timing);
+
/* TODO - Hardware/specs limitation should be owned by dc dsc and returned to DM,
* and DM can choose to OVERRIDE the limitation on CASE BY CASE basis.
* Hardware/specs limitation should not be writable by DM.
@@ -92,7 +106,8 @@ uint32_t dc_dsc_stream_bandwidth_overhead_in_kbps(
*/
void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing,
uint32_t max_target_bpp_limit_override_x16,
- struct dc_dsc_policy *policy);
+ struct dc_dsc_policy *policy,
+ const enum dc_link_encoding_format link_encoding);
void dc_dsc_policy_set_max_target_bpp_limit(uint32_t limit);
@@ -100,4 +115,6 @@ void dc_dsc_policy_set_enable_dsc_when_not_needed(bool enable);
void dc_dsc_policy_set_disable_dsc_stream_overhead(bool disable);
+void dc_dsc_get_default_config_option(const struct dc *dc, struct dc_dsc_config_options *options);
+
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dc_fused_io.c b/drivers/gpu/drm/amd/display/dc/dc_fused_io.c
new file mode 100644
index 000000000000..fee69642fb93
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_fused_io.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2025 Advanced Micro Devices, Inc.
+
+#include "dc_fused_io.h"
+
+#include "dm_helpers.h"
+#include "gpio.h"
+
+static bool op_i2c_convert(
+ union dmub_rb_cmd *cmd,
+ const struct mod_hdcp_atomic_op_i2c *op,
+ enum dmub_cmd_fused_request_type type,
+ uint32_t ddc_line,
+ bool over_aux
+)
+{
+ struct dmub_cmd_fused_request *req = &cmd->fused_io.request;
+ struct dmub_cmd_fused_request_location_i2c *loc = &req->u.i2c;
+
+ if (!op || op->size > sizeof(req->buffer))
+ return false;
+
+ req->type = type;
+ loc->is_aux = false;
+ loc->ddc_line = ddc_line;
+ loc->over_aux = over_aux;
+ loc->address = op->address;
+ loc->offset = op->offset;
+ loc->length = op->size;
+ memcpy(req->buffer, op->data, op->size);
+
+ return true;
+}
+
+static bool op_aux_convert(
+ union dmub_rb_cmd *cmd,
+ const struct mod_hdcp_atomic_op_aux *op,
+ enum dmub_cmd_fused_request_type type,
+ uint32_t ddc_line
+)
+{
+ struct dmub_cmd_fused_request *req = &cmd->fused_io.request;
+ struct dmub_cmd_fused_request_location_aux *loc = &req->u.aux;
+
+ if (!op || op->size > sizeof(req->buffer))
+ return false;
+
+ req->type = type;
+ loc->is_aux = true;
+ loc->ddc_line = ddc_line;
+ loc->address = op->address;
+ loc->length = op->size;
+ memcpy(req->buffer, op->data, op->size);
+
+ return true;
+}
+
+static bool atomic_write_poll_read(
+ struct dc_link *link,
+ union dmub_rb_cmd commands[3],
+ uint32_t poll_timeout_us,
+ uint8_t poll_mask_msb
+)
+{
+ const uint8_t count = 3;
+ const uint32_t timeout_per_request_us = 10000;
+ const uint32_t timeout_per_aux_transaction_us = 10000;
+ uint64_t timeout_us = 0;
+
+ commands[1].fused_io.request.poll_mask_msb = poll_mask_msb;
+ commands[1].fused_io.request.timeout_us = poll_timeout_us;
+
+ for (uint8_t i = 0; i < count; i++) {
+ struct dmub_rb_cmd_fused_io *io = &commands[i].fused_io;
+
+ io->header.type = DMUB_CMD__FUSED_IO;
+ io->header.sub_type = DMUB_CMD__FUSED_IO_EXECUTE;
+ io->header.multi_cmd_pending = i != count - 1;
+ io->header.payload_bytes = sizeof(commands[i].fused_io) - sizeof(io->header);
+
+ timeout_us += timeout_per_request_us + io->request.timeout_us;
+ if (!io->request.timeout_us && io->request.u.aux.is_aux)
+ timeout_us += timeout_per_aux_transaction_us * (io->request.u.aux.length / 16);
+ }
+
+ if (!dm_helpers_execute_fused_io(link->ctx, link, commands, count, timeout_us))
+ return false;
+
+ return commands[0].fused_io.request.status == FUSED_REQUEST_STATUS_SUCCESS;
+}
+
+bool dm_atomic_write_poll_read_i2c(
+ struct dc_link *link,
+ const struct mod_hdcp_atomic_op_i2c *write,
+ const struct mod_hdcp_atomic_op_i2c *poll,
+ struct mod_hdcp_atomic_op_i2c *read,
+ uint32_t poll_timeout_us,
+ uint8_t poll_mask_msb
+)
+{
+ if (!link)
+ return false;
+
+ const bool over_aux = false;
+ const uint32_t ddc_line = link->ddc->ddc_pin->pin_data->en;
+
+ union dmub_rb_cmd commands[3] = { 0 };
+ const bool converted = op_i2c_convert(&commands[0], write, FUSED_REQUEST_WRITE, ddc_line, over_aux)
+ && op_i2c_convert(&commands[1], poll, FUSED_REQUEST_POLL, ddc_line, over_aux)
+ && op_i2c_convert(&commands[2], read, FUSED_REQUEST_READ, ddc_line, over_aux);
+
+ if (!converted)
+ return false;
+
+ const bool result = atomic_write_poll_read(link, commands, poll_timeout_us, poll_mask_msb);
+
+ memcpy(read->data, commands[0].fused_io.request.buffer, read->size);
+ return result;
+}
+
+bool dm_atomic_write_poll_read_aux(
+ struct dc_link *link,
+ const struct mod_hdcp_atomic_op_aux *write,
+ const struct mod_hdcp_atomic_op_aux *poll,
+ struct mod_hdcp_atomic_op_aux *read,
+ uint32_t poll_timeout_us,
+ uint8_t poll_mask_msb
+)
+{
+ if (!link)
+ return false;
+
+ const uint32_t ddc_line = link->ddc->ddc_pin->pin_data->en;
+ union dmub_rb_cmd commands[3] = { 0 };
+ const bool converted = op_aux_convert(&commands[0], write, FUSED_REQUEST_WRITE, ddc_line)
+ && op_aux_convert(&commands[1], poll, FUSED_REQUEST_POLL, ddc_line)
+ && op_aux_convert(&commands[2], read, FUSED_REQUEST_READ, ddc_line);
+
+ if (!converted)
+ return false;
+
+ const bool result = atomic_write_poll_read(link, commands, poll_timeout_us, poll_mask_msb);
+
+ memcpy(read->data, commands[0].fused_io.request.buffer, read->size);
+ return result;
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/dc_fused_io.h b/drivers/gpu/drm/amd/display/dc/dc_fused_io.h
new file mode 100644
index 000000000000..c74917240985
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_fused_io.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ */
+
+#ifndef __DC_FUSED_IO_H__
+#define __DC_FUSED_IO_H__
+
+#include "dc.h"
+#include "mod_hdcp.h"
+
+bool dm_atomic_write_poll_read_i2c(
+ struct dc_link *link,
+ const struct mod_hdcp_atomic_op_i2c *write,
+ const struct mod_hdcp_atomic_op_i2c *poll,
+ struct mod_hdcp_atomic_op_i2c *read,
+ uint32_t poll_timeout_us,
+ uint8_t poll_mask_msb
+);
+
+bool dm_atomic_write_poll_read_aux(
+ struct dc_link *link,
+ const struct mod_hdcp_atomic_op_aux *write,
+ const struct mod_hdcp_atomic_op_aux *poll,
+ struct mod_hdcp_atomic_op_aux *read,
+ uint32_t poll_timeout_us,
+ uint8_t poll_mask_msb
+);
+
+#endif // __DC_FUSED_IO_H__
+
diff --git a/drivers/gpu/drm/amd/display/dc/dc_hdmi_types.h b/drivers/gpu/drm/amd/display/dc/dc_hdmi_types.h
new file mode 100644
index 000000000000..b015e80672ec
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_hdmi_types.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef DC_HDMI_TYPES_H
+#define DC_HDMI_TYPES_H
+
+#include "os_types.h"
+
+/* Address range from 0x00 to 0x1F.*/
+#define DP_ADAPTOR_TYPE2_SIZE 0x20
+#define DP_ADAPTOR_TYPE2_REG_ID 0x10
+#define DP_ADAPTOR_TYPE2_REG_MAX_TMDS_CLK 0x1D
+/* Identifies adaptor as Dual-mode adaptor */
+#define DP_ADAPTOR_TYPE2_ID 0xA0
+/* MHz*/
+#define DP_ADAPTOR_TYPE2_MAX_TMDS_CLK 600
+/* MHz*/
+#define DP_ADAPTOR_TYPE2_MIN_TMDS_CLK 25
+/* kHZ*/
+#define DP_ADAPTOR_DVI_MAX_TMDS_CLK 165000
+/* kHZ*/
+#define DP_ADAPTOR_HDMI_SAFE_MAX_TMDS_CLK 165000
+
+struct dp_hdmi_dongle_signature_data {
+ int8_t id[15];/* "DP-HDMI ADAPTOR"*/
+ uint8_t eot;/* end of transmition '\x4' */
+};
+
+/* DP-HDMI dongle slave address for retrieving dongle signature*/
+#define DP_HDMI_DONGLE_ADDRESS 0x40
+#define DP_HDMI_DONGLE_SIGNATURE_EOT 0x04
+
+
+/* SCDC Address defines (HDMI 2.0)*/
+#define HDMI_SCDC_WRITE_UPDATE_0_ARRAY 3
+#define HDMI_SCDC_ADDRESS 0x54
+#define HDMI_SCDC_SINK_VERSION 0x01
+#define HDMI_SCDC_SOURCE_VERSION 0x02
+#define HDMI_SCDC_UPDATE_0 0x10
+#define HDMI_SCDC_TMDS_CONFIG 0x20
+#define HDMI_SCDC_SCRAMBLER_STATUS 0x21
+#define HDMI_SCDC_CONFIG_0 0x30
+#define HDMI_SCDC_CONFIG_1 0x31
+#define HDMI_SCDC_SOURCE_TEST_REQ 0x35
+#define HDMI_SCDC_STATUS_FLAGS 0x40
+#define HDMI_SCDC_ERR_DETECT 0x50
+#define HDMI_SCDC_TEST_CONFIG 0xC0
+
+#define HDMI_SCDC_MANUFACTURER_OUI 0xD0
+#define HDMI_SCDC_DEVICE_ID 0xDB
+
+union hdmi_scdc_update_read_data {
+ uint8_t byte[2];
+ struct {
+ uint8_t STATUS_UPDATE:1;
+ uint8_t CED_UPDATE:1;
+ uint8_t RR_TEST:1;
+ uint8_t RESERVED:5;
+ uint8_t RESERVED2:8;
+ } fields;
+};
+
+union hdmi_scdc_status_flags_data {
+ uint8_t byte;
+ struct {
+ uint8_t CLOCK_DETECTED:1;
+ uint8_t CH0_LOCKED:1;
+ uint8_t CH1_LOCKED:1;
+ uint8_t CH2_LOCKED:1;
+ uint8_t RESERVED:4;
+ } fields;
+};
+
+union hdmi_scdc_ced_data {
+ uint8_t byte[11];
+ struct {
+ uint8_t CH0_8LOW:8;
+ uint8_t CH0_7HIGH:7;
+ uint8_t CH0_VALID:1;
+ uint8_t CH1_8LOW:8;
+ uint8_t CH1_7HIGH:7;
+ uint8_t CH1_VALID:1;
+ uint8_t CH2_8LOW:8;
+ uint8_t CH2_7HIGH:7;
+ uint8_t CH2_VALID:1;
+ uint8_t CHECKSUM:8;
+ uint8_t RESERVED:8;
+ uint8_t RESERVED2:8;
+ uint8_t RESERVED3:8;
+ uint8_t RESERVED4:4;
+ } fields;
+};
+
+union hdmi_scdc_manufacturer_OUI_data {
+ uint8_t byte[3];
+ struct {
+ uint8_t Manufacturer_OUI_1:8;
+ uint8_t Manufacturer_OUI_2:8;
+ uint8_t Manufacturer_OUI_3:8;
+ } fields;
+};
+
+union hdmi_scdc_device_id_data {
+ uint8_t byte;
+ struct {
+ uint8_t Hardware_Minor_Rev:4;
+ uint8_t Hardware_Major_Rev:4;
+ } fields;
+};
+
+#endif /* DC_HDMI_TYPES_H */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c
index ab6bc5d79012..5a365bd19933 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c
@@ -36,24 +36,21 @@
#include "dc_dmub_srv.h"
#include "reg_helper.h"
+#define DC_LOGGER \
+ ctx->logger
+
static inline void submit_dmub_read_modify_write(
struct dc_reg_helper_state *offload,
const struct dc_context *ctx)
{
struct dmub_rb_cmd_read_modify_write *cmd_buf = &offload->cmd_data.read_modify_write;
- bool gather = false;
offload->should_burst_write =
(offload->same_addr_count == (DMUB_READ_MODIFY_WRITE_SEQ__MAX - 1));
cmd_buf->header.payload_bytes =
sizeof(struct dmub_cmd_read_modify_write_sequence) * offload->reg_seq_count;
- gather = ctx->dmub_srv->reg_helper_offload.gather_in_progress;
- ctx->dmub_srv->reg_helper_offload.gather_in_progress = false;
-
- dc_dmub_srv_cmd_queue(ctx->dmub_srv, &offload->cmd_data);
-
- ctx->dmub_srv->reg_helper_offload.gather_in_progress = gather;
+ dc_wake_and_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT);
memset(cmd_buf, 0, sizeof(*cmd_buf));
@@ -66,17 +63,11 @@ static inline void submit_dmub_burst_write(
const struct dc_context *ctx)
{
struct dmub_rb_cmd_burst_write *cmd_buf = &offload->cmd_data.burst_write;
- bool gather = false;
cmd_buf->header.payload_bytes =
sizeof(uint32_t) * offload->reg_seq_count;
- gather = ctx->dmub_srv->reg_helper_offload.gather_in_progress;
- ctx->dmub_srv->reg_helper_offload.gather_in_progress = false;
-
- dc_dmub_srv_cmd_queue(ctx->dmub_srv, &offload->cmd_data);
-
- ctx->dmub_srv->reg_helper_offload.gather_in_progress = gather;
+ dc_wake_and_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT);
memset(cmd_buf, 0, sizeof(*cmd_buf));
@@ -88,17 +79,11 @@ static inline void submit_dmub_reg_wait(
const struct dc_context *ctx)
{
struct dmub_rb_cmd_reg_wait *cmd_buf = &offload->cmd_data.reg_wait;
- bool gather = false;
- gather = ctx->dmub_srv->reg_helper_offload.gather_in_progress;
- ctx->dmub_srv->reg_helper_offload.gather_in_progress = false;
-
- dc_dmub_srv_cmd_queue(ctx->dmub_srv, &offload->cmd_data);
+ dc_wake_and_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT);
memset(cmd_buf, 0, sizeof(*cmd_buf));
offload->reg_seq_count = 0;
-
- ctx->dmub_srv->reg_helper_offload.gather_in_progress = gather;
}
struct dc_reg_value_masks {
@@ -106,11 +91,6 @@ struct dc_reg_value_masks {
uint32_t mask;
};
-struct dc_reg_sequence {
- uint32_t addr;
- struct dc_reg_value_masks value_masks;
-};
-
static inline void set_reg_field_value_masks(
struct dc_reg_value_masks *field_value_mask,
uint32_t value,
@@ -151,7 +131,6 @@ static void dmub_flush_buffer_execute(
const struct dc_context *ctx)
{
submit_dmub_read_modify_write(offload, ctx);
- dc_dmub_srv_cmd_execute(ctx->dmub_srv);
}
static void dmub_flush_burst_write_buffer_execute(
@@ -159,7 +138,6 @@ static void dmub_flush_burst_write_buffer_execute(
const struct dc_context *ctx)
{
submit_dmub_burst_write(offload, ctx);
- dc_dmub_srv_cmd_execute(ctx->dmub_srv);
}
static bool dmub_reg_value_burst_set_pack(const struct dc_context *ctx, uint32_t addr,
@@ -283,7 +261,6 @@ uint32_t generic_reg_set_ex(const struct dc_context *ctx,
va_end(ap);
-
/* mmio write directly */
reg_val = (reg_val & ~field_value_mask.mask) | field_value_mask.value;
@@ -484,8 +461,7 @@ void generic_reg_wait(const struct dc_context *ctx,
field_value = get_reg_field_value_ex(reg_val, mask, shift);
if (field_value == condition_value) {
- if (i * delay_between_poll_us > 1000 &&
- !IS_FPGA_MAXIMUS_DC(ctx->dce_environment))
+ if (i * delay_between_poll_us > 1000)
DC_LOG_DC("REG_WAIT taking a while: %dms in %s line:%d\n",
delay_between_poll_us * i / 1000,
func_name, line);
@@ -497,8 +473,7 @@ void generic_reg_wait(const struct dc_context *ctx,
delay_between_poll_us, time_out_num_tries,
func_name, line);
- if (!IS_FPGA_MAXIMUS_DC(ctx->dce_environment))
- BREAK_TO_DEBUGGER();
+ BREAK_TO_DEBUGGER();
}
void generic_write_indirect_reg(const struct dc_context *ctx,
@@ -588,6 +563,66 @@ uint32_t generic_indirect_reg_update_ex(const struct dc_context *ctx,
return reg_val;
}
+
+uint32_t generic_indirect_reg_update_ex_sync(const struct dc_context *ctx,
+ uint32_t index, uint32_t reg_val, int n,
+ uint8_t shift1, uint32_t mask1, uint32_t field_value1,
+ ...)
+{
+ uint32_t shift, mask, field_value;
+ int i = 1;
+
+ va_list ap;
+
+ va_start(ap, field_value1);
+
+ reg_val = set_reg_field_value_ex(reg_val, field_value1, mask1, shift1);
+
+ while (i < n) {
+ shift = va_arg(ap, uint32_t);
+ mask = va_arg(ap, uint32_t);
+ field_value = va_arg(ap, uint32_t);
+
+ reg_val = set_reg_field_value_ex(reg_val, field_value, mask, shift);
+ i++;
+ }
+
+ dm_write_index_reg(ctx, CGS_IND_REG__PCIE, index, reg_val);
+ va_end(ap);
+
+ return reg_val;
+}
+
+uint32_t generic_indirect_reg_get_sync(const struct dc_context *ctx,
+ uint32_t index, int n,
+ uint8_t shift1, uint32_t mask1, uint32_t *field_value1,
+ ...)
+{
+ uint32_t shift, mask, *field_value;
+ uint32_t value = 0;
+ int i = 1;
+
+ va_list ap;
+
+ va_start(ap, field_value1);
+
+ value = dm_read_index_reg(ctx, CGS_IND_REG__PCIE, index);
+ *field_value1 = get_reg_field_value_ex(value, mask1, shift1);
+
+ while (i < n) {
+ shift = va_arg(ap, uint32_t);
+ mask = va_arg(ap, uint32_t);
+ field_value = va_arg(ap, uint32_t *);
+
+ *field_value = get_reg_field_value_ex(value, mask, shift);
+ i++;
+ }
+
+ va_end(ap);
+
+ return value;
+}
+
void reg_sequence_start_gather(const struct dc_context *ctx)
{
/* if reg sequence is supported and enabled, set flag to
@@ -631,8 +666,6 @@ void reg_sequence_start_execute(const struct dc_context *ctx)
default:
return;
}
-
- dc_dmub_srv_cmd_execute(ctx->dmub_srv);
}
}
@@ -649,6 +682,81 @@ void reg_sequence_wait_done(const struct dc_context *ctx)
if (offload &&
ctx->dc->debug.dmub_offload_enabled &&
!ctx->dc->debug.dmcub_emulation) {
- dc_dmub_srv_wait_idle(ctx->dmub_srv);
+ dc_dmub_srv_wait_for_idle(ctx->dmub_srv, DM_DMUB_WAIT_TYPE_WAIT, NULL);
}
}
+
+char *dce_version_to_string(const int version)
+{
+ switch (version) {
+ case DCE_VERSION_6_0:
+ return "DCE 6.0";
+ case DCE_VERSION_6_1:
+ return "DCE 6.1";
+ case DCE_VERSION_6_4:
+ return "DCE 6.4";
+ case DCE_VERSION_8_0:
+ return "DCE 8.0";
+ case DCE_VERSION_8_1:
+ return "DCE 8.1";
+ case DCE_VERSION_8_3:
+ return "DCE 8.3";
+ case DCE_VERSION_10_0:
+ return "DCE 10.0";
+ case DCE_VERSION_11_0:
+ return "DCE 11.0";
+ case DCE_VERSION_11_2:
+ return "DCE 11.2";
+ case DCE_VERSION_11_22:
+ return "DCE 11.22";
+ case DCE_VERSION_12_0:
+ return "DCE 12.0";
+ case DCE_VERSION_12_1:
+ return "DCE 12.1";
+ case DCN_VERSION_1_0:
+ return "DCN 1.0";
+ case DCN_VERSION_1_01:
+ return "DCN 1.0.1";
+ case DCN_VERSION_2_0:
+ return "DCN 2.0";
+ case DCN_VERSION_2_1:
+ return "DCN 2.1";
+ case DCN_VERSION_2_01:
+ return "DCN 2.0.1";
+ case DCN_VERSION_3_0:
+ return "DCN 3.0";
+ case DCN_VERSION_3_01:
+ return "DCN 3.0.1";
+ case DCN_VERSION_3_02:
+ return "DCN 3.0.2";
+ case DCN_VERSION_3_03:
+ return "DCN 3.0.3";
+ case DCN_VERSION_3_1:
+ return "DCN 3.1.2";
+ case DCN_VERSION_3_14:
+ return "DCN 3.1.4";
+ case DCN_VERSION_3_15:
+ return "DCN 3.1.5";
+ case DCN_VERSION_3_16:
+ return "DCN 3.1.6";
+ case DCN_VERSION_3_2:
+ return "DCN 3.2";
+ case DCN_VERSION_3_21:
+ return "DCN 3.2.1";
+ case DCN_VERSION_3_5:
+ return "DCN 3.5";
+ case DCN_VERSION_3_51:
+ return "DCN 3.5.1";
+ case DCN_VERSION_3_6:
+ return "DCN 3.6";
+ case DCN_VERSION_4_01:
+ return "DCN 4.0.1";
+ default:
+ return "Unknown";
+ }
+}
+
+bool dc_supports_vrr(const enum dce_version v)
+{
+ return v >= DCE_VERSION_8_0;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
index 52355fe6994c..667852517246 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
@@ -60,6 +60,7 @@ union large_integer {
enum dc_plane_addr_type {
PLN_ADDR_TYPE_GRAPHICS = 0,
+ PLN_ADDR_TYPE_3DLUT,
PLN_ADDR_TYPE_GRPH_STEREO,
PLN_ADDR_TYPE_VIDEO_PROGRESSIVE,
PLN_ADDR_TYPE_RGBEA
@@ -67,7 +68,7 @@ enum dc_plane_addr_type {
struct dc_plane_address {
enum dc_plane_addr_type type;
- bool tmz_surface;
+ uint8_t tmz_surface;
union {
struct{
PHYSICAL_ADDRESS_LOC addr;
@@ -76,6 +77,10 @@ struct dc_plane_address {
union large_integer dcc_const_color;
} grph;
+ struct {
+ PHYSICAL_ADDRESS_LOC addr;
+ } lut3d;
+
/*stereo*/
struct {
PHYSICAL_ADDRESS_LOC left_addr;
@@ -93,7 +98,6 @@ struct dc_plane_address {
PHYSICAL_ADDRESS_LOC right_alpha_addr;
PHYSICAL_ADDRESS_LOC right_alpha_meta_addr;
union large_integer right_alpha_dcc_const_color;
-
} grph_stereo;
/*video progressive*/
@@ -201,8 +205,9 @@ enum surface_pixel_format {
SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb,
SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr,
SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb,
+ SURFACE_PIXEL_FORMAT_SUBSAMPLE_END,
+ SURFACE_PIXEL_FORMAT_VIDEO_ACrYCb2101010 =
SURFACE_PIXEL_FORMAT_SUBSAMPLE_END,
- SURFACE_PIXEL_FORMAT_VIDEO_ACrYCb2101010,
SURFACE_PIXEL_FORMAT_VIDEO_CrYCbA1010102,
SURFACE_PIXEL_FORMAT_VIDEO_AYCrCb8888,
SURFACE_PIXEL_FORMAT_INVALID
@@ -235,6 +240,22 @@ enum pixel_format {
PIXEL_FORMAT_UNKNOWN
};
+/*
+ * This structure holds a surface address. There could be multiple addresses
+ * in cases such as Stereo 3D, Planar YUV, etc. Other per-flip attributes such
+ * as frame durations and DCC format can also be set.
+ */
+#define DC_MAX_DIRTY_RECTS 3
+struct dc_flip_addrs {
+ struct dc_plane_address address;
+ unsigned long long flip_timestamp_in_us;
+ bool flip_immediate;
+ /* TODO: add flip duration for FreeSync */
+ bool triplebuffer_flips;
+ unsigned int dirty_rect_count;
+ struct rect dirty_rects[DC_MAX_DIRTY_RECTS];
+};
+
enum tile_split_values {
DC_DISPLAY_MICRO_TILING = 0x0,
DC_THIN_MICRO_TILING = 0x1,
@@ -246,6 +267,9 @@ enum tripleBuffer_enable {
DC_TRIPLEBUFFER_DISABLE = 0x0,
DC_TRIPLEBUFFER_ENABLE = 0x1,
};
+enum tile_split_values_new {
+ DC_SURF_TILE_SPLIT_1KB = 0x4,
+};
/* TODO: These values come from hardware spec. We need to readdress this
* if they ever change.
@@ -303,86 +327,115 @@ enum swizzle_mode_values {
DC_SW_UNKNOWN = DC_SW_MAX
};
-union dc_tiling_info {
-
- struct {
- /* Specifies the number of memory banks for tiling
- * purposes.
- * Only applies to 2D and 3D tiling modes.
- * POSSIBLE VALUES: 2,4,8,16
- */
- unsigned int num_banks;
- /* Specifies the number of tiles in the x direction
- * to be incorporated into the same bank.
- * Only applies to 2D and 3D tiling modes.
- * POSSIBLE VALUES: 1,2,4,8
- */
- unsigned int bank_width;
- unsigned int bank_width_c;
- /* Specifies the number of tiles in the y direction to
- * be incorporated into the same bank.
- * Only applies to 2D and 3D tiling modes.
- * POSSIBLE VALUES: 1,2,4,8
- */
- unsigned int bank_height;
- unsigned int bank_height_c;
- /* Specifies the macro tile aspect ratio. Only applies
- * to 2D and 3D tiling modes.
- */
- unsigned int tile_aspect;
- unsigned int tile_aspect_c;
- /* Specifies the number of bytes that will be stored
- * contiguously for each tile.
- * If the tile data requires more storage than this
- * amount, it is split into multiple slices.
- * This field must not be larger than
- * GB_ADDR_CONFIG.DRAM_ROW_SIZE.
- * Only applies to 2D and 3D tiling modes.
- * For color render targets, TILE_SPLIT >= 256B.
- */
- enum tile_split_values tile_split;
- enum tile_split_values tile_split_c;
- /* Specifies the addressing within a tile.
- * 0x0 - DISPLAY_MICRO_TILING
- * 0x1 - THIN_MICRO_TILING
- * 0x2 - DEPTH_MICRO_TILING
- * 0x3 - ROTATED_MICRO_TILING
- */
- enum tile_mode_values tile_mode;
- enum tile_mode_values tile_mode_c;
- /* Specifies the number of pipes and how they are
- * interleaved in the surface.
- * Refer to memory addressing document for complete
- * details and constraints.
- */
- unsigned int pipe_config;
- /* Specifies the tiling mode of the surface.
- * THIN tiles use an 8x8x1 tile size.
- * THICK tiles use an 8x8x4 tile size.
- * 2D tiling modes rotate banks for successive Z slices
- * 3D tiling modes rotate pipes and banks for Z slices
- * Refer to memory addressing document for complete
- * details and constraints.
- */
- enum array_mode_values array_mode;
- } gfx8;
+// Definition of swizzle modes with addr3 ASICs
+enum swizzle_mode_addr3_values {
+ DC_ADDR3_SW_LINEAR = 0,
+ DC_ADDR3_SW_256B_2D = 1,
+ DC_ADDR3_SW_4KB_2D = 2,
+ DC_ADDR3_SW_64KB_2D = 3,
+ DC_ADDR3_SW_256KB_2D = 4,
+ DC_ADDR3_SW_4KB_3D = 5,
+ DC_ADDR3_SW_64KB_3D = 6,
+ DC_ADDR3_SW_256KB_3D = 7,
+ DC_ADDR3_SW_MAX = 8,
+ DC_ADDR3_SW_UNKNOWN = DC_ADDR3_SW_MAX
+};
- struct {
- enum swizzle_mode_values swizzle;
- unsigned int num_pipes;
- unsigned int max_compressed_frags;
- unsigned int pipe_interleave;
+enum dc_gfxversion {
+ DcGfxVersion7 = 0,
+ DcGfxVersion8,
+ DcGfxVersion9,
+ DcGfxVersion10,
+ DcGfxVersion11,
+ DcGfxAddr3,
+ DcGfxVersionUnknown
+};
- unsigned int num_banks;
- unsigned int num_shader_engines;
- unsigned int num_rb_per_se;
- bool shaderEnable;
+ struct dc_tiling_info {
+ unsigned int gfxversion; // Specifies which part of the union to use. Must use DalGfxVersion enum
+ union {
+ struct {
+ /* Specifies the number of memory banks for tiling
+ * purposes.
+ * Only applies to 2D and 3D tiling modes.
+ * POSSIBLE VALUES: 2,4,8,16
+ */
+ unsigned int num_banks;
+ /* Specifies the number of tiles in the x direction
+ * to be incorporated into the same bank.
+ * Only applies to 2D and 3D tiling modes.
+ * POSSIBLE VALUES: 1,2,4,8
+ */
+ unsigned int bank_width;
+ unsigned int bank_width_c;
+ /* Specifies the number of tiles in the y direction to
+ * be incorporated into the same bank.
+ * Only applies to 2D and 3D tiling modes.
+ * POSSIBLE VALUES: 1,2,4,8
+ */
+ unsigned int bank_height;
+ unsigned int bank_height_c;
+ /* Specifies the macro tile aspect ratio. Only applies
+ * to 2D and 3D tiling modes.
+ */
+ unsigned int tile_aspect;
+ unsigned int tile_aspect_c;
+ /* Specifies the number of bytes that will be stored
+ * contiguously for each tile.
+ * If the tile data requires more storage than this
+ * amount, it is split into multiple slices.
+ * This field must not be larger than
+ * GB_ADDR_CONFIG.DRAM_ROW_SIZE.
+ * Only applies to 2D and 3D tiling modes.
+ * For color render targets, TILE_SPLIT >= 256B.
+ */
+ enum tile_split_values tile_split;
+ enum tile_split_values tile_split_c;
+ /* Specifies the addressing within a tile.
+ * 0x0 - DISPLAY_MICRO_TILING
+ * 0x1 - THIN_MICRO_TILING
+ * 0x2 - DEPTH_MICRO_TILING
+ * 0x3 - ROTATED_MICRO_TILING
+ */
+ enum tile_mode_values tile_mode;
+ enum tile_mode_values tile_mode_c;
+ /* Specifies the number of pipes and how they are
+ * interleaved in the surface.
+ * Refer to memory addressing document for complete
+ * details and constraints.
+ */
+ unsigned int pipe_config;
+ /* Specifies the tiling mode of the surface.
+ * THIN tiles use an 8x8x1 tile size.
+ * THICK tiles use an 8x8x4 tile size.
+ * 2D tiling modes rotate banks for successive Z slices
+ * 3D tiling modes rotate pipes and banks for Z slices
+ * Refer to memory addressing document for complete
+ * details and constraints.
+ */
+ enum array_mode_values array_mode;
+ } gfx8;
- bool meta_linear;
- bool rb_aligned;
- bool pipe_aligned;
- unsigned int num_pkrs;
- } gfx9;
+ struct {
+ enum swizzle_mode_values swizzle;
+ unsigned int num_pipes;
+ unsigned int max_compressed_frags;
+ unsigned int pipe_interleave;
+
+ unsigned int num_banks;
+ unsigned int num_shader_engines;
+ unsigned int num_rb_per_se;
+ bool shaderEnable;
+
+ bool meta_linear;
+ bool rb_aligned;
+ bool pipe_aligned;
+ unsigned int num_pkrs;
+ } gfx9;/*gfx9, gfx10 and above*/
+ struct {
+ enum swizzle_mode_addr3_values swizzle;
+ } gfx_addr3;/*gfx with addr3 and above*/
+ };
};
/* Rotation angle */
@@ -400,19 +453,43 @@ enum dc_scan_direction {
SCAN_DIRECTION_VERTICAL = 2, /* 90, 270 rotation */
};
+/**
+ * struct dc_cursor_position: Hardware cursor data.
+ *
+ * This struct keeps the action information related to the cursor that will be
+ * sent and received from our DC core.
+ */
struct dc_cursor_position {
+ /**
+ * @x: It represents the top left abscissa coordinate of the cursor.
+ */
uint32_t x;
+
+ /**
+ * @y: It is the top ordinate of the cursor coordinate.
+ */
uint32_t y;
+ /**
+ * @x_hotspot: Define the abscissa point where mouse click happens.
+ */
uint32_t x_hotspot;
+
+ /**
+ * @y_hotspot: Define the ordinate point where mouse click happens.
+ */
uint32_t y_hotspot;
- /*
- * This parameter indicates whether HW cursor should be enabled
+ /**
+ * @enable: This parameter indicates whether hardware cursor should be
+ * enabled.
*/
bool enable;
- /* Translate cursor x/y by the source rectangle for each plane. */
+ /**
+ * @translate_by_source: Translate cursor x/y by the source rectangle
+ * for each plane.
+ */
bool translate_by_source;
};
@@ -420,10 +497,12 @@ struct dc_cursor_mi_param {
unsigned int pixel_clk_khz;
unsigned int ref_clk_khz;
struct rect viewport;
+ struct rect recout;
struct fixed31_32 h_scale_ratio;
struct fixed31_32 v_scale_ratio;
enum dc_rotation_angle rotation;
bool mirror;
+ struct dc_stream_state *stream;
};
/* IPP related types */
@@ -477,7 +556,9 @@ struct dc_gamma {
/* Used by both ipp amd opp functions*/
/* TODO: to be consolidated with enum color_space */
-/*
+/**
+ * enum dc_cursor_color_format - DC cursor programming mode
+ *
* This enum is for programming CURSOR_MODE register field. What this register
* should be programmed to depends on OS requested cursor shape flags and what
* we stored in the cursor surface.
@@ -513,17 +594,39 @@ union dc_cursor_attribute_flags {
};
struct dc_cursor_attributes {
+ /**
+ * @address: This field represents the framebuffer address associated
+ * with the cursor. It is important to highlight that this address is
+ * divided into a high and low parts.
+ */
PHYSICAL_ADDRESS_LOC address;
+
+ /**
+ * @pitch: Cursor line stride.
+ */
uint32_t pitch;
- /* Width and height should correspond to cursor surface width x heigh */
+ /**
+ * @width: Width should correspond to cursor surface width.
+ */
uint32_t width;
+ /**
+ * @heigh: Height should correspond to cursor surface heigh.
+ */
uint32_t height;
+ /**
+ * @color_format: DC cursor programming mode.
+ */
enum dc_cursor_color_format color_format;
- uint32_t sdr_white_level; // for boosting (SDR) cursor in HDR mode
+ /**
+ * @sdr_white_level: Boosting (SDR) cursor in HDR mode.
+ */
+ uint32_t sdr_white_level;
- /* In case we support HW Cursor rotation in the future */
+ /**
+ * @rotation_angle: In case we support HW Cursor rotation in the future
+ */
enum dc_rotation_angle rotation_angle;
union dc_cursor_attribute_flags attribute_flags;
@@ -550,7 +653,8 @@ enum dc_color_space {
COLOR_SPACE_YCBCR709_LIMITED,
COLOR_SPACE_2020_RGB_FULLRANGE,
COLOR_SPACE_2020_RGB_LIMITEDRANGE,
- COLOR_SPACE_2020_YCBCR,
+ COLOR_SPACE_2020_YCBCR_LIMITED,
+ COLOR_SPACE_2020_YCBCR_FULL,
COLOR_SPACE_ADOBERGB,
COLOR_SPACE_DCIP3,
COLOR_SPACE_DISPLAYNATIVE,
@@ -558,6 +662,7 @@ enum dc_color_space {
COLOR_SPACE_APPCTRL,
COLOR_SPACE_CUSTOMPOINTS,
COLOR_SPACE_YCBCR709_BLACK,
+ COLOR_SPACE_2020_YCBCR = COLOR_SPACE_2020_YCBCR_LIMITED,
};
enum dc_dither_option {
@@ -704,9 +809,6 @@ struct dc_crtc_timing_flags {
uint32_t LTE_340MCSC_SCRAMBLE:1;
uint32_t DSC : 1; /* Use DSC with this timing */
-#ifndef TRIM_FSFT
- uint32_t FAST_TRANSPORT: 1;
-#endif
uint32_t VBLANK_SYNCHRONIZABLE: 1;
};
@@ -732,6 +834,29 @@ enum dc_timing_3d_format {
TIMING_3D_FORMAT_MAX,
};
+#define DC_DSC_QP_SET_SIZE 15
+#define DC_DSC_RC_BUF_THRESH_SIZE 14
+struct dc_dsc_rc_params_override {
+ int32_t rc_model_size;
+ int32_t rc_buf_thresh[DC_DSC_RC_BUF_THRESH_SIZE];
+ int32_t rc_minqp[DC_DSC_QP_SET_SIZE];
+ int32_t rc_maxqp[DC_DSC_QP_SET_SIZE];
+ int32_t rc_offset[DC_DSC_QP_SET_SIZE];
+
+ int32_t rc_tgt_offset_hi;
+ int32_t rc_tgt_offset_lo;
+ int32_t rc_edge_factor;
+ int32_t rc_quant_incr_limit0;
+ int32_t rc_quant_incr_limit1;
+
+ int32_t initial_fullness_offset;
+ int32_t initial_delay;
+
+ int32_t flatness_min_qp;
+ int32_t flatness_max_qp;
+ int32_t flatness_det_thresh;
+};
+
struct dc_dsc_config {
uint32_t num_slices_h; /* Number of DSC slices - horizontal */
uint32_t num_slices_v; /* Number of DSC slices - vertical */
@@ -741,40 +866,133 @@ struct dc_dsc_config {
uint32_t version_minor; /* DSC minor version. Full version is formed as 1.version_minor. */
bool ycbcr422_simple; /* Tell DSC engine to convert YCbCr 4:2:2 to 'YCbCr 4:2:2 simple'. */
int32_t rc_buffer_size; /* DSC RC buffer block size in bytes */
+ bool is_frl; /* indicate if DSC is applied based on HDMI FRL sink's capability */
bool is_dp; /* indicate if DSC is applied based on DP's capability */
+ uint32_t mst_pbn; /* pbn of display on dsc mst hub */
+ const struct dc_dsc_rc_params_override *rc_params_ovrd; /* DM owned memory. If not NULL, apply custom dsc rc params */
};
+
+/**
+ * struct dc_crtc_timing - Timing parameters used to configure DCN blocks
+ *
+ * DCN provides multiple signals and parameters that can be used to adjust
+ * timing parameters, this struct aggregate multiple of these values for easy
+ * access. In this struct, fields prefixed with h_* are related to horizontal
+ * timing, and v_* to vertical timing. Keep in mind that when we talk about
+ * vertical timings, the values, in general, are described in the number of
+ * lines; on the other hand, the horizontal values are in pixels.
+ */
struct dc_crtc_timing {
+ /**
+ * @h_total: The total number of pixels from the rising edge of HSync
+ * until the rising edge of the current HSync.
+ */
uint32_t h_total;
+
+ /**
+ * @h_border_left: The black pixels related to the left border
+ */
uint32_t h_border_left;
+
+ /**
+ * @h_addressable: It is the range of pixels displayed horizontally.
+ * For example, if the display resolution is 3840@2160, the horizontal
+ * addressable area is 3840.
+ */
uint32_t h_addressable;
+
+ /**
+ * @h_border_right: The black pixels related to the right border
+ */
uint32_t h_border_right;
+
+ /**
+ * @h_front_porch: Period (in pixels) between HBlank start and the
+ * rising edge of HSync.
+ */
uint32_t h_front_porch;
+
+ /**
+ * @h_sync_width: HSync duration in pixels.
+ */
uint32_t h_sync_width;
+ /**
+ * @v_total: It is the total number of lines from the rising edge of
+ * the previous VSync until the rising edge of the current VSync.
+ *
+ * |--------------------------|
+ * +-+ V_TOTAL +-+
+ * | | | |
+ * VSync ---+ +--------- // -----------+ +---
+ */
uint32_t v_total;
+
+ /**
+ * @v_border_top: The black border on the top.
+ */
uint32_t v_border_top;
+
+ /**
+ * @v_addressable: It is the range of the scanout at which the
+ * framebuffer is displayed. For example, if the display resolution is
+ * 3840@2160, the addressable area is 2160 lines, or if the resolution
+ * is 1920x1080, the addressable area is 1080 lines.
+ */
uint32_t v_addressable;
+
+ /**
+ * @v_border_bottom: The black border on the bottom.
+ */
uint32_t v_border_bottom;
+
+ /**
+ * @v_front_porch: Period (in lines) between VBlank start and rising
+ * edge of VSync.
+ * +-+
+ * VSync | |
+ * ----------+ +--------...
+ * +------------------...
+ * VBlank |
+ * --+
+ * |-------|
+ * v_front_porch
+ */
uint32_t v_front_porch;
+
+ /**
+ * @v_sync_width: VSync signal width in lines.
+ */
uint32_t v_sync_width;
+ /**
+ * @pix_clk_100hz: Pipe pixel precision
+ *
+ * This field is used to communicate pixel clocks with 100 Hz accuracy
+ * from dc_crtc_timing to BIOS command table.
+ */
uint32_t pix_clk_100hz;
+ uint32_t min_refresh_in_uhz;
+ uint32_t max_refresh_in_uhz;
+
uint32_t vic;
uint32_t hdmi_vic;
+ uint32_t rid;
+ uint32_t fr_index;
+ uint32_t frl_uncompressed_video_bandwidth_in_kbps;
enum dc_timing_3d_format timing_3d_format;
enum dc_color_depth display_color_depth;
enum dc_pixel_encoding pixel_encoding;
enum dc_aspect_ratio aspect_ratio;
enum scanning_type scan_type;
-#ifndef TRIM_FSFT
- uint32_t fast_transport_output_rate_100hz;
-#endif
-
struct dc_crtc_timing_flags flags;
uint32_t dsc_fixed_bits_per_pixel_x16; /* DSC target bitrate in 1/16 of bpp (e.g. 128 -> 8bpp) */
struct dc_dsc_config dsc_cfg;
+
+ /* The number of pixels that HBlank has been expanded by from the original EDID timing. */
+ uint32_t expanded_hblank;
};
enum trigger_delay {
@@ -799,6 +1017,8 @@ struct dc_crtc_timing_adjust {
uint32_t v_total_max;
uint32_t v_total_mid;
uint32_t v_total_mid_frame_num;
+ uint32_t allow_otg_v_count_halt;
+ uint8_t timing_adjust_pending;
};
@@ -875,6 +1095,25 @@ enum cm_gamut_coef_format {
CM_GAMUT_REMAP_COEF_FORMAT_S3_12 = 1
};
+enum mpcc_gamut_remap_mode_select {
+ MPCC_GAMUT_REMAP_MODE_SELECT_0 = 0,
+ MPCC_GAMUT_REMAP_MODE_SELECT_1,
+ MPCC_GAMUT_REMAP_MODE_SELECT_2
+};
+
+enum mpcc_gamut_remap_id {
+ MPCC_OGAM_GAMUT_REMAP,
+ MPCC_MCM_FIRST_GAMUT_REMAP,
+ MPCC_MCM_SECOND_GAMUT_REMAP,
+ MPCC_RMCM_GAMUT_REMAP,
+};
+
+enum cursor_matrix_mode {
+ CUR_MATRIX_BYPASS = 0,
+ CUR_MATRIX_SET_A,
+ CUR_MATRIX_SET_B
+};
+
struct mcif_warmup_params {
union large_integer start_address;
unsigned int address_increment;
@@ -903,5 +1142,19 @@ struct tg_color {
uint16_t color_b_cb;
};
+enum symclk_state {
+ SYMCLK_OFF_TX_OFF,
+ SYMCLK_ON_TX_ON,
+ SYMCLK_ON_TX_OFF,
+};
+
+struct phy_state {
+ struct {
+ uint8_t otg : 1;
+ uint8_t reserved : 7;
+ } symclk_ref_cnts;
+ enum symclk_state symclk_state;
+};
+
#endif /* DC_HW_TYPES_H */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h
deleted file mode 100644
index fad3d883ed89..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dc_link.h
+++ /dev/null
@@ -1,447 +0,0 @@
-/*
- * Copyright 2012-14 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#ifndef DC_LINK_H_
-#define DC_LINK_H_
-
-#include "dc.h"
-#include "dc_types.h"
-#include "grph_object_defs.h"
-
-enum dc_link_fec_state {
- dc_link_fec_not_ready,
- dc_link_fec_ready,
- dc_link_fec_enabled
-};
-
-struct dc_link_status {
- bool link_active;
- struct dpcd_caps *dpcd_caps;
-};
-
-/* DP MST stream allocation (payload bandwidth number) */
-struct link_mst_stream_allocation {
- /* DIG front */
- const struct stream_encoder *stream_enc;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- /* HPO DP Stream Encoder */
- const struct hpo_dp_stream_encoder *hpo_dp_stream_enc;
-#endif
- /* associate DRM payload table with DC stream encoder */
- uint8_t vcp_id;
- /* number of slots required for the DP stream in transport packet */
- uint8_t slot_count;
-};
-
-/* DP MST stream allocation table */
-struct link_mst_stream_allocation_table {
- /* number of DP video streams */
- int stream_count;
- /* array of stream allocations */
- struct link_mst_stream_allocation stream_allocations[MAX_CONTROLLER_NUM];
-};
-
-struct time_stamp {
- uint64_t edp_poweroff;
- uint64_t edp_poweron;
-};
-
-struct link_trace {
- struct time_stamp time_stamp;
-};
-
-/* PSR feature flags */
-struct psr_settings {
- bool psr_feature_enabled; // PSR is supported by sink
- bool psr_allow_active; // PSR is currently active
- enum dc_psr_version psr_version; // Internal PSR version, determined based on DPCD
-
- /* These parameters are calculated in Driver,
- * based on display timing and Sink capabilities.
- * If VBLANK region is too small and Sink takes a long time
- * to set up RFB, it may take an extra frame to enter PSR state.
- */
- bool psr_frame_capture_indication_req;
- unsigned int psr_sdp_transmit_line_num_deadline;
- unsigned int psr_power_opt;
-};
-
-/*
- * A link contains one or more sinks and their connected status.
- * The currently active signal type (HDMI, DP-SST, DP-MST) is also reported.
- */
-struct dc_link {
- struct dc_sink *remote_sinks[MAX_SINKS_PER_LINK];
- unsigned int sink_count;
- struct dc_sink *local_sink;
- unsigned int link_index;
- enum dc_connection_type type;
- enum signal_type connector_signal;
- enum dc_irq_source irq_source_hpd;
- enum dc_irq_source irq_source_hpd_rx;/* aka DP Short Pulse */
- bool is_hpd_filter_disabled;
- bool dp_ss_off;
- bool link_state_valid;
- bool aux_access_disabled;
- bool sync_lt_in_progress;
- enum lttpr_mode lttpr_mode;
- bool is_internal_display;
-
- /* TODO: Rename. Flag an endpoint as having a programmable mapping to a
- * DIG encoder. */
- bool is_dig_mapping_flexible;
- bool hpd_status; /* HPD status of link without physical HPD pin. */
-
- bool edp_sink_present;
-
- /* caps is the same as reported_link_cap. link_traing use
- * reported_link_cap. Will clean up. TODO
- */
- struct dc_link_settings reported_link_cap;
- struct dc_link_settings verified_link_cap;
- struct dc_link_settings cur_link_settings;
- struct dc_lane_settings cur_lane_setting[LANE_COUNT_DP_MAX];
- struct dc_link_settings preferred_link_setting;
- /* preferred_training_settings are override values that
- * come from DM. DM is responsible for the memory
- * management of the override pointers.
- */
- struct dc_link_training_overrides preferred_training_settings;
- struct dp_audio_test_data audio_test_data;
-
- uint8_t ddc_hw_inst;
-
- uint8_t hpd_src;
-
- uint8_t link_enc_hw_inst;
- /* DIG link encoder ID. Used as index in link encoder resource pool.
- * For links with fixed mapping to DIG, this is not changed after dc_link
- * object creation.
- */
- enum engine_id eng_id;
-
- bool test_pattern_enabled;
- union compliance_test_state compliance_test_state;
-
- void *priv;
-
- struct ddc_service *ddc;
-
- bool aux_mode;
-
- /* Private to DC core */
-
- const struct dc *dc;
-
- struct dc_context *ctx;
-
- struct panel_cntl *panel_cntl;
- struct link_encoder *link_enc;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- struct hpo_dp_link_encoder *hpo_dp_link_enc;
-#endif
- struct graphics_object_id link_id;
- /* Endpoint type distinguishes display endpoints which do not have entries
- * in the BIOS connector table from those that do. Helps when tracking link
- * encoder to display endpoint assignments.
- */
- enum display_endpoint_type ep_type;
- union ddi_channel_mapping ddi_channel_mapping;
- struct connector_device_tag_info device_tag;
- struct dpcd_caps dpcd_caps;
- uint32_t dongle_max_pix_clk;
- unsigned short chip_caps;
- unsigned int dpcd_sink_count;
-#if defined(CONFIG_DRM_AMD_DC_HDCP)
- struct hdcp_caps hdcp_caps;
-#endif
- enum edp_revision edp_revision;
- union dpcd_sink_ext_caps dpcd_sink_ext_caps;
-
- struct psr_settings psr_settings;
-
- /* Drive settings read from integrated info table */
- struct dc_lane_settings bios_forced_drive_settings;
-
- /* MST record stream using this link */
- struct link_flags {
- bool dp_keep_receiver_powered;
- bool dp_skip_DID2;
- bool dp_skip_reset_segment;
- bool dp_mot_reset_segment;
- /* Some USB4 docks do not handle turning off MST DSC once it has been enabled. */
- bool dpia_mst_dsc_always_on;
- } wa_flags;
- struct link_mst_stream_allocation_table mst_stream_alloc_table;
-
- struct dc_link_status link_status;
-
- struct link_trace link_trace;
- struct gpio *hpd_gpio;
- enum dc_link_fec_state fec_state;
-};
-
-const struct dc_link_status *dc_link_get_status(const struct dc_link *dc_link);
-
-/**
- * dc_get_link_at_index() - Return an enumerated dc_link.
- *
- * dc_link order is constant and determined at
- * boot time. They cannot be created or destroyed.
- * Use dc_get_caps() to get number of links.
- */
-static inline struct dc_link *dc_get_link_at_index(struct dc *dc, uint32_t link_index)
-{
- return dc->links[link_index];
-}
-
-static inline void get_edp_links(const struct dc *dc,
- struct dc_link **edp_links,
- int *edp_num)
-{
- int i;
-
- *edp_num = 0;
- for (i = 0; i < dc->link_count; i++) {
- // report any eDP links, even unconnected DDI's
- if (!dc->links[i])
- continue;
- if (dc->links[i]->connector_signal == SIGNAL_TYPE_EDP) {
- edp_links[*edp_num] = dc->links[i];
- if (++(*edp_num) == MAX_NUM_EDP)
- return;
- }
- }
-}
-
-static inline bool dc_get_edp_link_panel_inst(const struct dc *dc,
- const struct dc_link *link,
- unsigned int *inst_out)
-{
- struct dc_link *edp_links[MAX_NUM_EDP];
- int edp_num;
-
- if (link->connector_signal != SIGNAL_TYPE_EDP)
- return false;
- get_edp_links(dc, edp_links, &edp_num);
- if ((edp_num > 1) && (link->link_index > edp_links[0]->link_index))
- *inst_out = 1;
- else
- *inst_out = 0;
- return true;
-}
-
-/* Set backlight level of an embedded panel (eDP, LVDS).
- * backlight_pwm_u16_16 is unsigned 32 bit with 16 bit integer
- * and 16 bit fractional, where 1.0 is max backlight value.
- */
-bool dc_link_set_backlight_level(const struct dc_link *dc_link,
- uint32_t backlight_pwm_u16_16,
- uint32_t frame_ramp);
-
-/* Set/get nits-based backlight level of an embedded panel (eDP, LVDS). */
-bool dc_link_set_backlight_level_nits(struct dc_link *link,
- bool isHDR,
- uint32_t backlight_millinits,
- uint32_t transition_time_in_ms);
-
-bool dc_link_get_backlight_level_nits(struct dc_link *link,
- uint32_t *backlight_millinits,
- uint32_t *backlight_millinits_peak);
-
-bool dc_link_backlight_enable_aux(struct dc_link *link, bool enable);
-
-bool dc_link_read_default_bl_aux(struct dc_link *link, uint32_t *backlight_millinits);
-bool dc_link_set_default_brightness_aux(struct dc_link *link);
-
-int dc_link_get_backlight_level(const struct dc_link *dc_link);
-
-int dc_link_get_target_backlight_pwm(const struct dc_link *link);
-
-bool dc_link_set_psr_allow_active(struct dc_link *dc_link, const bool *enable,
- bool wait, bool force_static, const unsigned int *power_opts);
-
-bool dc_link_get_psr_state(const struct dc_link *dc_link, enum dc_psr_state *state);
-
-bool dc_link_setup_psr(struct dc_link *dc_link,
- const struct dc_stream_state *stream, struct psr_config *psr_config,
- struct psr_context *psr_context);
-
-void dc_link_get_psr_residency(const struct dc_link *link, uint32_t *residency);
-
-/* Request DC to detect if there is a Panel connected.
- * boot - If this call is during initial boot.
- * Return false for any type of detection failure or MST detection
- * true otherwise. True meaning further action is required (status update
- * and OS notification).
- */
-enum dc_detect_reason {
- DETECT_REASON_BOOT,
- DETECT_REASON_HPD,
- DETECT_REASON_HPDRX,
- DETECT_REASON_FALLBACK,
- DETECT_REASON_RETRAIN
-};
-
-bool dc_link_detect(struct dc_link *dc_link, enum dc_detect_reason reason);
-bool dc_link_get_hpd_state(struct dc_link *dc_link);
-enum dc_status dc_link_allocate_mst_payload(struct pipe_ctx *pipe_ctx);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-enum dc_status dc_link_reduce_mst_payload(struct pipe_ctx *pipe_ctx, uint32_t req_pbn);
-enum dc_status dc_link_increase_mst_payload(struct pipe_ctx *pipe_ctx, uint32_t req_pbn);
-#endif
-
-/* Notify DC about DP RX Interrupt (aka Short Pulse Interrupt).
- * Return:
- * true - Downstream port status changed. DM should call DC to do the
- * detection.
- * false - no change in Downstream port status. No further action required
- * from DM. */
-bool dc_link_handle_hpd_rx_irq(struct dc_link *dc_link,
- union hpd_irq_data *hpd_irq_dpcd_data, bool *out_link_loss,
- bool defer_handling, bool *has_left_work);
-
-/*
- * On eDP links this function call will stall until T12 has elapsed.
- * If the panel is not in power off state, this function will return
- * immediately.
- */
-bool dc_link_wait_for_t12(struct dc_link *link);
-
-void dc_link_dp_handle_automated_test(struct dc_link *link);
-void dc_link_dp_handle_link_loss(struct dc_link *link);
-bool dc_link_dp_allow_hpd_rx_irq(const struct dc_link *link);
-
-struct dc_sink_init_data;
-
-struct dc_sink *dc_link_add_remote_sink(
- struct dc_link *dc_link,
- const uint8_t *edid,
- int len,
- struct dc_sink_init_data *init_data);
-
-void dc_link_remove_remote_sink(
- struct dc_link *link,
- struct dc_sink *sink);
-
-/* Used by diagnostics for virtual link at the moment */
-
-void dc_link_dp_set_drive_settings(
- struct dc_link *link,
- struct link_training_settings *lt_settings);
-
-bool dc_link_dp_perform_link_training_skip_aux(
- struct dc_link *link,
- const struct dc_link_settings *link_setting);
-
-enum link_training_result dc_link_dp_perform_link_training(
- struct dc_link *link,
- const struct dc_link_settings *link_settings,
- bool skip_video_pattern);
-
-bool dc_link_dp_sync_lt_begin(struct dc_link *link);
-
-enum link_training_result dc_link_dp_sync_lt_attempt(
- struct dc_link *link,
- struct dc_link_settings *link_setting,
- struct dc_link_training_overrides *lt_settings);
-
-bool dc_link_dp_sync_lt_end(struct dc_link *link, bool link_down);
-
-void dc_link_dp_enable_hpd(const struct dc_link *link);
-
-void dc_link_dp_disable_hpd(const struct dc_link *link);
-
-bool dc_link_dp_set_test_pattern(
- struct dc_link *link,
- enum dp_test_pattern test_pattern,
- enum dp_test_pattern_color_space test_pattern_color_space,
- const struct link_training_settings *p_link_settings,
- const unsigned char *p_custom_pattern,
- unsigned int cust_pattern_size);
-
-bool dc_link_dp_get_max_link_enc_cap(const struct dc_link *link, struct dc_link_settings *max_link_enc_cap);
-
-void dc_link_enable_hpd_filter(struct dc_link *link, bool enable);
-
-bool dc_link_is_dp_sink_present(struct dc_link *link);
-
-bool dc_link_detect_sink(struct dc_link *link, enum dc_connection_type *type);
-/*
- * DPCD access interfaces
- */
-
-#ifdef CONFIG_DRM_AMD_DC_HDCP
-bool dc_link_is_hdcp14(struct dc_link *link, enum signal_type signal);
-bool dc_link_is_hdcp22(struct dc_link *link, enum signal_type signal);
-#endif
-void dc_link_set_drive_settings(struct dc *dc,
- struct link_training_settings *lt_settings,
- const struct dc_link *link);
-void dc_link_set_preferred_link_settings(struct dc *dc,
- struct dc_link_settings *link_setting,
- struct dc_link *link);
-void dc_link_set_preferred_training_settings(struct dc *dc,
- struct dc_link_settings *link_setting,
- struct dc_link_training_overrides *lt_overrides,
- struct dc_link *link,
- bool skip_immediate_retrain);
-void dc_link_enable_hpd(const struct dc_link *link);
-void dc_link_disable_hpd(const struct dc_link *link);
-void dc_link_set_test_pattern(struct dc_link *link,
- enum dp_test_pattern test_pattern,
- enum dp_test_pattern_color_space test_pattern_color_space,
- const struct link_training_settings *p_link_settings,
- const unsigned char *p_custom_pattern,
- unsigned int cust_pattern_size);
-uint32_t dc_link_bandwidth_kbps(
- const struct dc_link *link,
- const struct dc_link_settings *link_setting);
-
-const struct dc_link_settings *dc_link_get_link_cap(
- const struct dc_link *link);
-
-void dc_link_overwrite_extended_receiver_cap(
- struct dc_link *link);
-
-bool dc_submit_i2c(
- struct dc *dc,
- uint32_t link_index,
- struct i2c_command *cmd);
-
-bool dc_submit_i2c_oem(
- struct dc *dc,
- struct i2c_command *cmd);
-
-uint32_t dc_bandwidth_in_kbps_from_timing(
- const struct dc_crtc_timing *timing);
-
-bool dc_link_is_fec_supported(const struct dc_link *link);
-bool dc_link_should_enable_fec(const struct dc_link *link);
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-enum dp_link_encoding dc_link_dp_mst_decide_link_encoding_format(const struct dc_link *link);
-#endif
-#endif /* DC_LINK_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_plane.h b/drivers/gpu/drm/amd/display/dc/dc_plane.h
new file mode 100644
index 000000000000..14feb843e694
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_plane.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DC_PLANE_H_
+#define _DC_PLANE_H_
+
+#include "dc_hw_types.h"
+
+union dc_plane_status_update_flags {
+ struct {
+ uint32_t address : 1;
+ } bits;
+ uint32_t raw;
+};
+
+struct dc_plane_state *dc_create_plane_state(const struct dc *dc);
+const struct dc_plane_status *dc_plane_get_status(
+ const struct dc_plane_state *plane_state,
+ union dc_plane_status_update_flags flags);
+void dc_plane_state_retain(struct dc_plane_state *plane_state);
+void dc_plane_state_release(struct dc_plane_state *plane_state);
+
+void dc_plane_force_dcc_and_tiling_disable(struct dc_plane_state *plane_state,
+ bool clear_tiling);
+
+
+void dc_plane_copy_config(struct dc_plane_state *dst, const struct dc_plane_state *src);
+
+#endif /* _DC_PLANE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_plane_priv.h b/drivers/gpu/drm/amd/display/dc/dc_plane_priv.h
new file mode 100644
index 000000000000..ab13335f1d01
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_plane_priv.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DC_PLANE_PRIV_H_
+#define _DC_PLANE_PRIV_H_
+
+#include "dc_plane.h"
+
+void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *plane_state);
+void dc_plane_destruct(struct dc_plane_state *plane_state);
+uint8_t dc_plane_get_pipe_mask(struct dc_state *dc_state, const struct dc_plane_state *plane_state);
+
+#endif /* _DC_PLANE_PRIV_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c
new file mode 100644
index 000000000000..37d1a79e8241
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dc_spl_translate.h"
+#include "dcn20/dcn20_dpp.h"
+#include "dcn32/dcn32_dpp.h"
+#include "dcn401/dcn401_dpp.h"
+
+static struct spl_callbacks dcn2_spl_callbacks = {
+ .spl_calc_lb_num_partitions = dscl2_spl_calc_lb_num_partitions,
+};
+static struct spl_callbacks dcn32_spl_callbacks = {
+ .spl_calc_lb_num_partitions = dscl32_spl_calc_lb_num_partitions,
+};
+static struct spl_callbacks dcn401_spl_callbacks = {
+ .spl_calc_lb_num_partitions = dscl401_spl_calc_lb_num_partitions,
+};
+static void populate_splrect_from_rect(struct spl_rect *spl_rect, const struct rect *rect)
+{
+ spl_rect->x = rect->x;
+ spl_rect->y = rect->y;
+ spl_rect->width = rect->width;
+ spl_rect->height = rect->height;
+}
+static void populate_rect_from_splrect(struct rect *rect, const struct spl_rect *spl_rect)
+{
+ rect->x = spl_rect->x;
+ rect->y = spl_rect->y;
+ rect->width = spl_rect->width;
+ rect->height = spl_rect->height;
+}
+static void populate_spltaps_from_taps(struct spl_taps *spl_scaling_quality,
+ const struct scaling_taps *scaling_quality)
+{
+ spl_scaling_quality->h_taps_c = scaling_quality->h_taps_c;
+ spl_scaling_quality->h_taps = scaling_quality->h_taps;
+ spl_scaling_quality->v_taps_c = scaling_quality->v_taps_c;
+ spl_scaling_quality->v_taps = scaling_quality->v_taps;
+ spl_scaling_quality->integer_scaling = scaling_quality->integer_scaling;
+}
+static void populate_taps_from_spltaps(struct scaling_taps *scaling_quality,
+ const struct spl_taps *spl_scaling_quality)
+{
+ scaling_quality->h_taps_c = spl_scaling_quality->h_taps_c + 1;
+ scaling_quality->h_taps = spl_scaling_quality->h_taps + 1;
+ scaling_quality->v_taps_c = spl_scaling_quality->v_taps_c + 1;
+ scaling_quality->v_taps = spl_scaling_quality->v_taps + 1;
+}
+static void populate_ratios_from_splratios(struct scaling_ratios *ratios,
+ const struct ratio *spl_ratios)
+{
+ ratios->horz = dc_fixpt_from_ux_dy(spl_ratios->h_scale_ratio >> 5, 3, 19);
+ ratios->vert = dc_fixpt_from_ux_dy(spl_ratios->v_scale_ratio >> 5, 3, 19);
+ ratios->horz_c = dc_fixpt_from_ux_dy(spl_ratios->h_scale_ratio_c >> 5, 3, 19);
+ ratios->vert_c = dc_fixpt_from_ux_dy(spl_ratios->v_scale_ratio_c >> 5, 3, 19);
+}
+static void populate_inits_from_splinits(struct scl_inits *inits,
+ const struct init *spl_inits)
+{
+ inits->h = dc_fixpt_from_int_dy(spl_inits->h_filter_init_int, spl_inits->h_filter_init_frac >> 5, 0, 19);
+ inits->v = dc_fixpt_from_int_dy(spl_inits->v_filter_init_int, spl_inits->v_filter_init_frac >> 5, 0, 19);
+ inits->h_c = dc_fixpt_from_int_dy(spl_inits->h_filter_init_int_c, spl_inits->h_filter_init_frac_c >> 5, 0, 19);
+ inits->v_c = dc_fixpt_from_int_dy(spl_inits->v_filter_init_int_c, spl_inits->v_filter_init_frac_c >> 5, 0, 19);
+}
+static void populate_splformat_from_format(enum spl_pixel_format *spl_pixel_format, const enum pixel_format pixel_format)
+{
+ if (pixel_format < PIXEL_FORMAT_INVALID)
+ *spl_pixel_format = (enum spl_pixel_format)pixel_format;
+ else
+ *spl_pixel_format = SPL_PIXEL_FORMAT_INVALID;
+}
+/// @brief Translate SPL input parameters from pipe context
+/// @param pipe_ctx
+/// @param spl_in
+void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl_in *spl_in)
+{
+ const struct dc_plane_state *plane_state = pipe_ctx->plane_state;
+ const struct dc_stream_state *stream = pipe_ctx->stream;
+ struct rect odm_slice_src = resource_get_odm_slice_src_rect(pipe_ctx);
+
+ // Assign the function to calculate the number of partitions in the line buffer
+ // This is used to determine the vtap support
+ switch (plane_state->ctx->dce_version) {
+ case DCN_VERSION_2_0:
+ spl_in->callbacks = dcn2_spl_callbacks;
+ break;
+ case DCN_VERSION_3_2:
+ spl_in->callbacks = dcn32_spl_callbacks;
+ break;
+ case DCN_VERSION_4_01:
+ spl_in->callbacks = dcn401_spl_callbacks;
+ break;
+ default:
+ spl_in->callbacks = dcn2_spl_callbacks;
+ }
+ // Make format field from spl_in point to plane_res scl_data format
+ populate_splformat_from_format(&spl_in->basic_in.format, pipe_ctx->plane_res.scl_data.format);
+ // Make view_format from basic_out point to view_format from stream
+ spl_in->basic_out.view_format = (enum spl_view_3d)stream->view_format;
+ // Populate spl input basic input clip rect from plane state clip rect
+ populate_splrect_from_rect(&spl_in->basic_in.clip_rect, &plane_state->clip_rect);
+ // Populate spl input basic out src rect from stream src rect
+ populate_splrect_from_rect(&spl_in->basic_out.src_rect, &stream->src);
+ // Populate spl input basic out dst rect from stream dst rect
+ populate_splrect_from_rect(&spl_in->basic_out.dst_rect, &stream->dst);
+ // Make spl input basic input info rotation field point to plane state rotation
+ spl_in->basic_in.rotation = (enum spl_rotation_angle)plane_state->rotation;
+ // Populate spl input basic input src rect from plane state src rect
+ populate_splrect_from_rect(&spl_in->basic_in.src_rect, &plane_state->src_rect);
+ // Populate spl input basic input dst rect from plane state dst rect
+ populate_splrect_from_rect(&spl_in->basic_in.dst_rect, &plane_state->dst_rect);
+ // Make spl input basic input info horiz mirror field point to plane state horz mirror
+ spl_in->basic_in.horizontal_mirror = plane_state->horizontal_mirror;
+
+ // Calculate horizontal splits and split index
+ spl_in->basic_in.num_h_slices_recout_width_align.use_recout_width_aligned = false;
+ spl_in->basic_in.num_h_slices_recout_width_align.num_slices_recout_width.mpc_num_h_slices =
+ resource_get_mpc_slice_count(pipe_ctx);
+
+ if (stream->view_format == VIEW_3D_FORMAT_SIDE_BY_SIDE)
+ spl_in->basic_in.mpc_h_slice_index = 0;
+ else
+ spl_in->basic_in.mpc_h_slice_index = resource_get_mpc_slice_index(pipe_ctx);
+
+ populate_splrect_from_rect(&spl_in->basic_out.odm_slice_rect, &odm_slice_src);
+ spl_in->basic_out.odm_combine_factor = 0;
+ spl_in->odm_slice_index = resource_get_odm_slice_index(pipe_ctx);
+ // Make spl input basic out info output_size width point to stream h active
+ spl_in->basic_out.output_size.width =
+ stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right + pipe_ctx->dsc_padding_params.dsc_hactive_padding;
+ // Make spl input basic out info output_size height point to v active
+ spl_in->basic_out.output_size.height =
+ stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top;
+ spl_in->basic_out.max_downscale_src_width =
+ pipe_ctx->stream->ctx->dc->debug.max_downscale_src_width;
+ spl_in->basic_out.always_scale = pipe_ctx->stream->ctx->dc->debug.always_scale;
+ // Make spl input basic output info alpha_en field point to plane res scl_data lb_params alpha_en
+ spl_in->basic_out.alpha_en = pipe_ctx->plane_res.scl_data.lb_params.alpha_en;
+ spl_in->basic_out.use_two_pixels_per_container = pipe_ctx->stream_res.tg->funcs->is_two_pixels_per_container(&stream->timing);
+ // Make spl input basic input info scaling quality field point to plane state scaling_quality
+ populate_spltaps_from_taps(&spl_in->scaling_quality, &plane_state->scaling_quality);
+ // Translate edge adaptive scaler preference
+ spl_in->prefer_easf = pipe_ctx->stream->ctx->dc->config.prefer_easf;
+ spl_in->disable_easf = false;
+ if (pipe_ctx->stream->ctx->dc->debug.force_easf == 1)
+ spl_in->prefer_easf = false;
+ else if (pipe_ctx->stream->ctx->dc->debug.force_easf == 2)
+ spl_in->disable_easf = true;
+ else if (pipe_ctx->stream->ctx->dc->debug.force_easf == 3)
+ spl_in->override_easf = true;
+ /* Translate adaptive sharpening preference */
+ unsigned int sharpness_setting = pipe_ctx->stream->ctx->dc->debug.force_sharpness;
+ unsigned int force_sharpness_level = pipe_ctx->stream->ctx->dc->debug.force_sharpness_level;
+ if (sharpness_setting == SHARPNESS_HW_OFF)
+ spl_in->adaptive_sharpness.enable = false;
+ else if (sharpness_setting == SHARPNESS_ZERO) {
+ spl_in->adaptive_sharpness.enable = true;
+ spl_in->adaptive_sharpness.sharpness_level = 0;
+ } else if (sharpness_setting == SHARPNESS_CUSTOM) {
+ /* SAT: read harpness_range from dc_plane_state */
+ spl_in->adaptive_sharpness.sharpness_range.sdr_rgb_min = plane_state->sharpness_range.sdr_rgb_min;
+ spl_in->adaptive_sharpness.sharpness_range.sdr_rgb_max = plane_state->sharpness_range.sdr_rgb_max;
+ spl_in->adaptive_sharpness.sharpness_range.sdr_rgb_mid = plane_state->sharpness_range.sdr_rgb_mid;
+ spl_in->adaptive_sharpness.sharpness_range.sdr_yuv_min = plane_state->sharpness_range.sdr_yuv_min;
+ spl_in->adaptive_sharpness.sharpness_range.sdr_yuv_max = plane_state->sharpness_range.sdr_yuv_max;
+ spl_in->adaptive_sharpness.sharpness_range.sdr_yuv_mid = plane_state->sharpness_range.sdr_yuv_mid;
+ spl_in->adaptive_sharpness.sharpness_range.hdr_rgb_min = plane_state->sharpness_range.hdr_rgb_min;
+ spl_in->adaptive_sharpness.sharpness_range.hdr_rgb_max = plane_state->sharpness_range.hdr_rgb_max;
+ spl_in->adaptive_sharpness.sharpness_range.hdr_rgb_mid = plane_state->sharpness_range.hdr_rgb_mid;
+
+ if (force_sharpness_level > 0) {
+ if (force_sharpness_level > 10)
+ force_sharpness_level = 10;
+ spl_in->adaptive_sharpness.enable = true;
+ spl_in->adaptive_sharpness.sharpness_level = force_sharpness_level;
+ } else if (!plane_state->adaptive_sharpness_en) {
+ spl_in->adaptive_sharpness.enable = false;
+ spl_in->adaptive_sharpness.sharpness_level = 0;
+ } else {
+ spl_in->adaptive_sharpness.enable = true;
+ spl_in->adaptive_sharpness.sharpness_level = plane_state->sharpness_level;
+ }
+ }
+ // Translate linear light scaling preference
+ if (pipe_ctx->stream->ctx->dc->debug.force_lls > 0)
+ spl_in->lls_pref = pipe_ctx->stream->ctx->dc->debug.force_lls;
+ else
+ spl_in->lls_pref = plane_state->linear_light_scaling;
+ /* Translate chroma subsampling offset ( cositing ) */
+ if (pipe_ctx->stream->ctx->dc->debug.force_cositing)
+ spl_in->basic_in.cositing = pipe_ctx->stream->ctx->dc->debug.force_cositing - 1;
+ else
+ spl_in->basic_in.cositing = plane_state->cositing;
+ /* Translate transfer function */
+ spl_in->basic_in.tf_type = (enum spl_transfer_func_type) plane_state->in_transfer_func.type;
+ spl_in->basic_in.tf_predefined_type = (enum spl_transfer_func_predefined) plane_state->in_transfer_func.tf;
+
+ spl_in->h_active = pipe_ctx->plane_res.scl_data.h_active;
+ spl_in->v_active = pipe_ctx->plane_res.scl_data.v_active;
+
+ spl_in->sharpen_policy = (enum sharpen_policy)plane_state->adaptive_sharpness_policy;
+ spl_in->debug.scale_to_sharpness_policy =
+ (enum scale_to_sharpness_policy)pipe_ctx->stream->ctx->dc->debug.scale_to_sharpness_policy;
+
+ /* Check if it is stream is in fullscreen and if its HDR.
+ * Use this to determine sharpness levels
+ */
+ spl_in->is_fullscreen = pipe_ctx->stream->sharpening_required;
+ spl_in->is_hdr_on = dm_helpers_is_hdr_on(pipe_ctx->stream->ctx, pipe_ctx->stream);
+ spl_in->sdr_white_level_nits = plane_state->sdr_white_level_nits;
+}
+
+/// @brief Translate SPL output parameters to pipe context
+/// @param pipe_ctx
+/// @param spl_out
+void translate_SPL_out_params_to_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl_out *spl_out)
+{
+ // Make scaler data recout point to spl output field recout
+ populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.recout, &spl_out->dscl_prog_data->recout);
+ // Make scaler data ratios point to spl output field ratios
+ populate_ratios_from_splratios(&pipe_ctx->plane_res.scl_data.ratios, &spl_out->dscl_prog_data->ratios);
+ // Make scaler data viewport point to spl output field viewport
+ populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport, &spl_out->dscl_prog_data->viewport);
+ // Make scaler data viewport_c point to spl output field viewport_c
+ populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport_c, &spl_out->dscl_prog_data->viewport_c);
+ // Make scaler data taps point to spl output field scaling taps
+ populate_taps_from_spltaps(&pipe_ctx->plane_res.scl_data.taps, &spl_out->dscl_prog_data->taps);
+ // Make scaler data init point to spl output field init
+ populate_inits_from_splinits(&pipe_ctx->plane_res.scl_data.inits, &spl_out->dscl_prog_data->init);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h
new file mode 100644
index 000000000000..eaa5c5373b28
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DC_SPL_TRANSLATE_H__
+#define __DC_SPL_TRANSLATE_H__
+#include "dc.h"
+#include "resource.h"
+#include "dm_helpers.h"
+
+/* Map SPL input parameters to pipe context
+ * @pipe_ctx: pipe context
+ * @spl_in: spl input structure
+ */
+void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl_in *spl_in);
+
+/* Map SPL output parameters to pipe context
+ * @pipe_ctx: pipe context
+ * @spl_out: spl output structure
+ */
+void translate_SPL_out_params_to_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl_out *spl_out);
+
+#endif /* __DC_SPL_TRANSLATE_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_state.h b/drivers/gpu/drm/amd/display/dc/dc_state.h
new file mode 100644
index 000000000000..db1e63a7d460
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_state.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DC_STATE_H_
+#define _DC_STATE_H_
+
+#include "inc/core_status.h"
+
+struct dc_state *dc_state_create(struct dc *dc, struct dc_state_create_params *params);
+void dc_state_copy(struct dc_state *dst_state, struct dc_state *src_state);
+struct dc_state *dc_state_create_copy(struct dc_state *src_state);
+void dc_state_copy_current(struct dc *dc, struct dc_state *dst_state);
+struct dc_state *dc_state_create_current_copy(struct dc *dc);
+void dc_state_construct(struct dc *dc, struct dc_state *state);
+void dc_state_destruct(struct dc_state *state);
+void dc_state_retain(struct dc_state *state);
+void dc_state_release(struct dc_state *state);
+
+enum dc_status dc_state_add_stream(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *stream);
+
+enum dc_status dc_state_remove_stream(
+ const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *stream);
+
+bool dc_state_add_plane(
+ const struct dc *dc,
+ struct dc_stream_state *stream,
+ struct dc_plane_state *plane_state,
+ struct dc_state *state);
+
+bool dc_state_remove_plane(
+ const struct dc *dc,
+ struct dc_stream_state *stream,
+ struct dc_plane_state *plane_state,
+ struct dc_state *state);
+
+bool dc_state_rem_all_planes_for_stream(
+ const struct dc *dc,
+ struct dc_stream_state *stream,
+ struct dc_state *state);
+
+bool dc_state_add_all_planes_for_stream(
+ const struct dc *dc,
+ struct dc_stream_state *stream,
+ struct dc_plane_state * const *plane_states,
+ int plane_count,
+ struct dc_state *state);
+
+struct dc_stream_status *dc_state_get_stream_status(
+ struct dc_state *state,
+ const struct dc_stream_state *stream);
+#endif /* _DC_STATE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_state_priv.h b/drivers/gpu/drm/amd/display/dc/dc_state_priv.h
new file mode 100644
index 000000000000..1d9bae56ff6a
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_state_priv.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DC_STATE_PRIV_H_
+#define _DC_STATE_PRIV_H_
+
+#include "dc_state.h"
+#include "dc_stream.h"
+
+struct dc_stream_state *dc_state_get_stream_from_id(const struct dc_state *state, unsigned int id);
+
+/* Get the type of the provided resource (none, phantom, main) based on the provided
+ * context. If the context is unavailable, determine only if phantom or not.
+ */
+enum mall_stream_type dc_state_get_pipe_subvp_type(const struct dc_state *state,
+ const struct pipe_ctx *pipe_ctx);
+enum mall_stream_type dc_state_get_stream_subvp_type(const struct dc_state *state,
+ const struct dc_stream_state *stream);
+
+/* Gets the phantom stream if main is provided, gets the main if phantom is provided.*/
+struct dc_stream_state *dc_state_get_paired_subvp_stream(const struct dc_state *state,
+ const struct dc_stream_state *stream);
+
+/* allocate's phantom stream or plane and returns pointer to the object */
+struct dc_stream_state *dc_state_create_phantom_stream(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *main_stream);
+struct dc_plane_state *dc_state_create_phantom_plane(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_plane_state *main_plane);
+
+/* deallocate's phantom stream or plane */
+void dc_state_release_phantom_stream(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *phantom_stream);
+void dc_state_release_phantom_plane(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_plane_state *phantom_plane);
+
+/* add/remove phantom stream to context and generate subvp meta data */
+enum dc_status dc_state_add_phantom_stream(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *phantom_stream,
+ struct dc_stream_state *main_stream);
+enum dc_status dc_state_remove_phantom_stream(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *phantom_stream);
+
+bool dc_state_add_phantom_plane(
+ const struct dc *dc,
+ struct dc_stream_state *phantom_stream,
+ struct dc_plane_state *phantom_plane,
+ struct dc_state *state);
+
+bool dc_state_remove_phantom_plane(
+ const struct dc *dc,
+ struct dc_stream_state *phantom_stream,
+ struct dc_plane_state *phantom_plane,
+ struct dc_state *state);
+
+bool dc_state_rem_all_phantom_planes_for_stream(
+ const struct dc *dc,
+ struct dc_stream_state *phantom_stream,
+ struct dc_state *state,
+ bool should_release_planes);
+
+bool dc_state_add_all_phantom_planes_for_stream(
+ const struct dc *dc,
+ struct dc_stream_state *phantom_stream,
+ struct dc_plane_state * const *phantom_planes,
+ int plane_count,
+ struct dc_state *state);
+
+bool dc_state_remove_phantom_streams_and_planes(
+ const struct dc *dc,
+ struct dc_state *state);
+
+void dc_state_release_phantom_streams_and_planes(
+ const struct dc *dc,
+ struct dc_state *state);
+
+bool dc_state_is_fams2_in_use(
+ const struct dc *dc,
+ const struct dc_state *state);
+
+
+void dc_state_set_stream_subvp_cursor_limit(const struct dc_stream_state *stream,
+ struct dc_state *state,
+ bool limit);
+
+bool dc_state_get_stream_subvp_cursor_limit(const struct dc_stream_state *stream,
+ struct dc_state *state);
+
+void dc_state_set_stream_cursor_subvp_limit(const struct dc_stream_state *stream,
+ struct dc_state *state,
+ bool limit);
+
+bool dc_state_get_stream_cursor_subvp_limit(const struct dc_stream_state *stream,
+ struct dc_state *state);
+
+bool dc_state_can_clear_stream_cursor_subvp_limit(const struct dc_stream_state *stream,
+ struct dc_state *state);
+
+bool dc_state_is_subvp_in_use(struct dc_state *state);
+
+#endif /* _DC_STATE_PRIV_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index e37c4a10bfd5..321cfe92d799 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -38,19 +38,30 @@ struct timing_sync_info {
bool master;
};
+struct mall_stream_config {
+ /* MALL stream config to indicate if the stream is phantom or not.
+ * We will use a phantom stream to indicate that the pipe is phantom.
+ */
+ enum mall_stream_type type;
+ struct dc_stream_state *paired_stream; // master / slave stream
+ bool subvp_limit_cursor_size; /* stream has/is using subvp limiting hw cursor support */
+ bool cursor_size_limit_subvp; /* stream is using hw cursor config preventing subvp */
+};
+
struct dc_stream_status {
int primary_otg_inst;
int stream_enc_inst;
+
+ /**
+ * @plane_count: Total of planes attached to a single stream
+ */
int plane_count;
int audio_inst;
struct timing_sync_info timing_sync_info;
- struct dc_plane_state *plane_states[MAX_SURFACE_NUM];
+ struct dc_plane_state *plane_states[MAX_SURFACES];
bool is_abm_supported;
-};
-
-// TODO: References to this needs to be removed..
-struct freesync_context {
- bool dummy;
+ struct mall_stream_config mall_stream_config;
+ bool fpo_in_use;
};
enum hubp_dmdata_mode {
@@ -115,12 +126,10 @@ struct periodic_interrupt_config {
int lines_offset;
};
-#if defined(CONFIG_DRM_AMD_DC_DCN)
struct dc_mst_stream_bw_update {
bool is_increase; // is bandwidth reduced or increased
uint32_t mst_stream_bw; // new mst bandwidth in kbps
};
-#endif
union stream_update_flags {
struct {
@@ -132,9 +141,11 @@ union stream_update_flags {
uint32_t gamut_remap:1;
uint32_t wb_update:1;
uint32_t dsc_changed : 1;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
uint32_t mst_bw : 1;
-#endif
+ uint32_t crtc_timing_adjust : 1;
+ uint32_t fams_changed : 1;
+ uint32_t scaler_sharpener : 1;
+ uint32_t sharpening_required : 1;
} bits;
uint32_t raw;
@@ -148,6 +159,30 @@ struct test_pattern {
unsigned int cust_pattern_size;
};
+#define SUBVP_DRR_MARGIN_US 100 // 100us for DRR margin (SubVP + DRR)
+
+struct dc_stream_debug_options {
+ char force_odm_combine_segments;
+ /*
+ * When force_odm_combine_segments is non zero, allow dc to
+ * temporarily transition to ODM bypass when minimal transition state
+ * is required to prevent visual glitches showing on the screen
+ */
+ char allow_transition_for_forced_odm;
+};
+
+#define LUMINANCE_DATA_TABLE_SIZE 10
+
+struct luminance_data {
+ bool is_valid;
+ int refresh_rate_hz[LUMINANCE_DATA_TABLE_SIZE];
+ int luminance_millinits[LUMINANCE_DATA_TABLE_SIZE];
+ int flicker_criteria_milli_nits_GAMING;
+ int flicker_criteria_milli_nits_STATIC;
+ int nominal_refresh_rate;
+ int dm_max_decrease_from_nominal;
+};
+
struct dc_stream_state {
// sink is deprecated, new code should not reference
// this pointer
@@ -158,31 +193,33 @@ struct dc_stream_state {
* a stream via the volatile dc_state rather than the static dc_link.
*/
struct link_encoder *link_enc;
+ struct dc_stream_debug_options debug;
struct dc_panel_patch sink_patches;
- union display_content_support content_support;
struct dc_crtc_timing timing;
struct dc_crtc_timing_adjust adjust;
struct dc_info_packet vrr_infopacket;
struct dc_info_packet vsc_infopacket;
struct dc_info_packet vsp_infopacket;
-
+ struct dc_info_packet hfvsif_infopacket;
+ struct dc_info_packet vtem_infopacket;
+ struct dc_info_packet adaptive_sync_infopacket;
+ struct dc_info_packet avi_infopacket;
+ uint8_t dsc_packed_pps[128];
struct rect src; /* composition area */
struct rect dst; /* stream addressable area */
- // TODO: References to this needs to be removed..
- struct freesync_context freesync_ctx;
-
struct audio_info audio_info;
struct dc_info_packet hdr_static_metadata;
PHYSICAL_ADDRESS_LOC dmdata_address;
bool use_dynamic_meta;
- struct dc_transfer_func *out_transfer_func;
+ struct dc_transfer_func out_transfer_func;
struct colorspace_transform gamut_remap_matrix;
struct dc_csc_transform csc_color_matrix;
enum dc_color_space output_color_space;
+ enum display_content_type content_type;
enum dc_dither_option dither_option;
enum view_3d_format view_format;
@@ -190,7 +227,21 @@ struct dc_stream_state {
bool use_vsc_sdp_for_colorimetry;
bool ignore_msa_timing_param;
+ /**
+ * @allow_freesync:
+ *
+ * It say if Freesync is enabled or not.
+ */
+ bool allow_freesync;
+
+ /**
+ * @vrr_active_variable:
+ *
+ * It describes if VRR is in use.
+ */
+ bool vrr_active_variable;
bool freesync_on_desktop;
+ bool vrr_active_fixed;
bool converter_disable_audio;
uint8_t qs_bit;
@@ -203,8 +254,7 @@ struct dc_stream_state {
/* DMCU info */
unsigned int abm_level;
- struct periodic_interrupt_config periodic_interrupt0;
- struct periodic_interrupt_config periodic_interrupt1;
+ struct periodic_interrupt_config periodic_interrupt;
/* from core_stream struct */
struct dc_context *ctx;
@@ -221,6 +271,8 @@ struct dc_stream_state {
struct dc_cursor_attributes cursor_attributes;
struct dc_cursor_position cursor_position;
+ bool hw_cursor_req;
+
uint32_t sdr_white_level; // for boosting (SDR) cursor in HDR mode
/* from stream struct */
@@ -249,6 +301,7 @@ struct dc_stream_state {
bool apply_edp_fast_boot_optimization;
bool apply_seamless_boot_optimization;
+ uint32_t apply_boot_odm_mode;
uint32_t stream_id;
@@ -257,6 +310,11 @@ struct dc_stream_state {
bool has_non_synchronizable_pclk;
bool vblank_synchronized;
+ bool is_phantom;
+
+ struct luminance_data lumin_data;
+ bool scaler_sharpener_update;
+ bool sharpening_required;
};
#define ABM_LEVEL_IMMEDIATE_DISABLE 255
@@ -270,15 +328,21 @@ struct dc_stream_update {
struct dc_info_packet *hdr_static_metadata;
unsigned int *abm_level;
- struct periodic_interrupt_config *periodic_interrupt0;
- struct periodic_interrupt_config *periodic_interrupt1;
+ struct periodic_interrupt_config *periodic_interrupt;
struct dc_info_packet *vrr_infopacket;
struct dc_info_packet *vsc_infopacket;
struct dc_info_packet *vsp_infopacket;
+ struct dc_info_packet *hfvsif_infopacket;
+ struct dc_info_packet *vtem_infopacket;
+ struct dc_info_packet *adaptive_sync_infopacket;
+ struct dc_info_packet *avi_infopacket;
bool *dpms_off;
bool integer_scaling_update;
+ bool *allow_freesync;
+ bool *vrr_active_variable;
+ bool *vrr_active_fixed;
struct colorspace_transform *gamut_remap;
enum dc_color_space *output_color_space;
@@ -288,13 +352,18 @@ struct dc_stream_update {
struct dc_writeback_update *wb_update;
struct dc_dsc_config *dsc_config;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
struct dc_mst_stream_bw_update *mst_bw_update;
-#endif
struct dc_transfer_func *func_shaper;
struct dc_3dlut *lut3d_func;
struct test_pattern *pending_test_pattern;
+ struct dc_crtc_timing_adjust *crtc_timing_adjust;
+
+ struct dc_cursor_attributes *cursor_attributes;
+ struct dc_cursor_position *cursor_position;
+ bool *hw_cursor_req;
+ bool *scaler_sharpener_update;
+ bool *sharpening_required;
};
bool dc_is_stream_unchanged(
@@ -303,6 +372,9 @@ bool dc_is_stream_scaling_unchanged(
struct dc_stream_state *old_stream, struct dc_stream_state *stream);
/*
+ * Setup stream attributes if no stream updates are provided
+ * there will be no impact on the stream parameters
+ *
* Set up surface attributes and associate to a stream
* The surfaces parameter is an absolute set of all surface active for the stream.
* If no surfaces are provided, the stream will be blanked; no memory read.
@@ -311,8 +383,23 @@ bool dc_is_stream_scaling_unchanged(
* After this call:
* Surfaces attributes are programmed and configured to be composed into stream.
* This does not trigger a flip. No surface address is programmed.
+ *
*/
+bool dc_update_planes_and_stream(struct dc *dc,
+ struct dc_surface_update *surface_updates, int surface_count,
+ struct dc_stream_state *dc_stream,
+ struct dc_stream_update *stream_update);
+/*
+ * Set up surface attributes and associate to a stream
+ * The surfaces parameter is an absolute set of all surface active for the stream.
+ * If no surfaces are provided, the stream will be blanked; no memory read.
+ * Any flip related attribute changes must be done through this interface.
+ *
+ * After this call:
+ * Surfaces attributes are programmed and configured to be composed into stream.
+ * This does not trigger a flip. No surface address is programmed.
+ */
void dc_commit_updates_for_stream(struct dc *dc,
struct dc_surface_update *srf_updates,
int surface_count,
@@ -326,7 +413,6 @@ void dc_stream_log(const struct dc *dc, const struct dc_stream_state *stream);
uint8_t dc_get_current_stream_count(struct dc *dc);
struct dc_stream_state *dc_get_stream_at_index(struct dc *dc, uint8_t i);
-struct dc_stream_state *dc_stream_find_from_link(const struct dc_link *link);
/*
* Return the current frame counter.
@@ -350,45 +436,14 @@ bool dc_stream_get_scanoutpos(const struct dc_stream_state *stream,
uint32_t *h_position,
uint32_t *v_position);
-enum dc_status dc_add_stream_to_ctx(
- struct dc *dc,
- struct dc_state *new_ctx,
- struct dc_stream_state *stream);
-
-enum dc_status dc_remove_stream_from_ctx(
- struct dc *dc,
- struct dc_state *new_ctx,
- struct dc_stream_state *stream);
-
-
-bool dc_add_plane_to_context(
- const struct dc *dc,
- struct dc_stream_state *stream,
- struct dc_plane_state *plane_state,
- struct dc_state *context);
-
-bool dc_remove_plane_from_context(
- const struct dc *dc,
- struct dc_stream_state *stream,
- struct dc_plane_state *plane_state,
- struct dc_state *context);
-
-bool dc_rem_all_planes_for_stream(
- const struct dc *dc,
- struct dc_stream_state *stream,
- struct dc_state *context);
-
-bool dc_add_all_planes_for_stream(
- const struct dc *dc,
- struct dc_stream_state *stream,
- struct dc_plane_state * const *plane_states,
- int plane_count,
- struct dc_state *context);
-
bool dc_stream_add_writeback(struct dc *dc,
struct dc_stream_state *stream,
struct dc_writeback_info *wb_info);
+bool dc_stream_fc_disable_writeback(struct dc *dc,
+ struct dc_stream_state *stream,
+ uint32_t dwb_pipe_inst);
+
bool dc_stream_remove_writeback(struct dc *dc,
struct dc_stream_state *stream,
uint32_t dwb_pipe_inst);
@@ -397,10 +452,6 @@ enum dc_status dc_stream_add_dsc_to_resource(struct dc *dc,
struct dc_state *state,
struct dc_stream_state *stream);
-bool dc_stream_warmup_writeback(struct dc *dc,
- int num_dwb,
- struct dc_writeback_info *wb_info);
-
bool dc_stream_dmdata_status_done(struct dc *dc, struct dc_stream_state *stream);
bool dc_stream_set_dynamic_metadata(struct dc *dc,
@@ -410,14 +461,6 @@ bool dc_stream_set_dynamic_metadata(struct dc *dc,
enum dc_status dc_validate_stream(struct dc *dc, struct dc_stream_state *stream);
/*
- * Set up streams and links associated to drive sinks
- * The streams parameter is an absolute set of all active streams.
- *
- * After this call:
- * Phy, Encoder, Timing Generator are programmed and enabled.
- * New streams are enabled with blank stream; no memory read.
- */
-/*
* Enable stereo when commit_streams is not required,
* for example, frame alternate.
*/
@@ -430,12 +473,11 @@ void dc_enable_stereo(
/* Triggers multi-stream synchronization. */
void dc_trigger_sync(struct dc *dc, struct dc_state *context);
-enum surface_update_type dc_check_update_surfaces_for_stream(
- struct dc *dc,
+struct surface_update_descriptor dc_check_update_surfaces_for_stream(
+ const struct dc_check_config *check_config,
struct dc_surface_update *updates,
int surface_count,
- struct dc_stream_update *stream_update,
- const struct dc_stream_status *stream_status);
+ struct dc_stream_update *stream_update);
/**
* Create a new default stream for the requested sink
@@ -449,30 +491,43 @@ void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink);
void dc_stream_retain(struct dc_stream_state *dc_stream);
void dc_stream_release(struct dc_stream_state *dc_stream);
-struct dc_stream_status *dc_stream_get_status_from_state(
- struct dc_state *state,
- struct dc_stream_state *stream);
-struct dc_stream_status *dc_stream_get_status(
- struct dc_stream_state *dc_stream);
-
-#ifndef TRIM_FSFT
-bool dc_optimize_timing_for_fsft(
- struct dc_stream_state *pStream,
- unsigned int max_input_rate_in_khz);
-#endif
+struct dc_stream_status *dc_stream_get_status(struct dc_stream_state *dc_stream);
+const struct dc_stream_status *dc_stream_get_status_const(const struct dc_stream_state *dc_stream);
/*******************************************************************************
* Cursor interfaces - To manages the cursor within a stream
******************************************************************************/
/* TODO: Deprecated once we switch to dc_set_cursor_position */
+
+void program_cursor_attributes(
+ struct dc *dc,
+ struct dc_stream_state *stream);
+
+void program_cursor_position(
+ struct dc *dc,
+ struct dc_stream_state *stream);
+
+bool dc_stream_check_cursor_attributes(
+ const struct dc_stream_state *stream,
+ struct dc_state *state,
+ const struct dc_cursor_attributes *attributes);
+
bool dc_stream_set_cursor_attributes(
struct dc_stream_state *stream,
const struct dc_cursor_attributes *attributes);
+bool dc_stream_program_cursor_attributes(
+ struct dc_stream_state *stream,
+ const struct dc_cursor_attributes *attributes);
+
bool dc_stream_set_cursor_position(
struct dc_stream_state *stream,
const struct dc_cursor_position *position);
+bool dc_stream_program_cursor_position(
+ struct dc_stream_state *stream,
+ const struct dc_cursor_position *position);
+
bool dc_stream_adjust_vmin_vmax(struct dc *dc,
struct dc_stream_state *stream,
@@ -482,27 +537,29 @@ bool dc_stream_get_last_used_drr_vtotal(struct dc *dc,
struct dc_stream_state *stream,
uint32_t *refresh_rate);
-bool dc_stream_get_crtc_position(struct dc *dc,
- struct dc_stream_state **stream,
- int num_streams,
- unsigned int *v_pos,
- unsigned int *nom_v_pos);
-
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
-bool dc_stream_forward_dmcu_crc_window(struct dc *dc, struct dc_stream_state *stream,
- struct crc_params *crc_window);
-bool dc_stream_stop_dmcu_crc_win_update(struct dc *dc,
- struct dc_stream_state *stream);
+bool dc_stream_forward_crc_window(struct dc_stream_state *stream,
+ struct rect *rect,
+ uint8_t phy_id,
+ bool is_stop);
+
+bool dc_stream_forward_multiple_crc_window(struct dc_stream_state *stream,
+ struct crc_window *window,
+ uint8_t phy_id,
+ bool stop);
#endif
bool dc_stream_configure_crc(struct dc *dc,
struct dc_stream_state *stream,
struct crc_params *crc_window,
bool enable,
- bool continuous);
+ bool continuous,
+ uint8_t idx,
+ bool reset);
bool dc_stream_get_crc(struct dc *dc,
struct dc_stream_state *stream,
+ uint8_t idx,
uint32_t *r_cr,
uint32_t *g_y,
uint32_t *b_cb);
@@ -524,10 +581,26 @@ bool dc_stream_set_gamut_remap(struct dc *dc,
bool dc_stream_program_csc_matrix(struct dc *dc,
struct dc_stream_state *stream);
-bool dc_stream_get_crtc_position(struct dc *dc,
- struct dc_stream_state **stream,
- int num_streams,
- unsigned int *v_pos,
- unsigned int *nom_v_pos);
+struct dc_rmcm_3dlut *dc_stream_get_3dlut_for_stream(
+ const struct dc *dc,
+ const struct dc_stream_state *stream,
+ bool allocate_one);
+
+void dc_stream_release_3dlut_for_stream(
+ const struct dc *dc,
+ const struct dc_stream_state *stream);
+
+void dc_stream_init_rmcm_3dlut(struct dc *dc);
+
+struct pipe_ctx *dc_stream_get_pipe_ctx(struct dc_stream_state *stream);
+
+void dc_dmub_update_dirty_rect(struct dc *dc,
+ int surface_count,
+ struct dc_stream_state *stream,
+ struct dc_surface_update *srf_updates,
+ struct dc_state *context);
+
+bool dc_stream_is_cursor_limit_pending(struct dc *dc, struct dc_stream_state *stream);
+bool dc_stream_can_clear_cursor_limit(struct dc *dc, struct dc_stream_state *stream);
#endif /* DC_STREAM_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h b/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h
new file mode 100644
index 000000000000..ca37eac20986
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DC_STREAM_PRIV_H_
+#define _DC_STREAM_PRIV_H_
+
+#include "dc_stream.h"
+
+bool dc_stream_construct(struct dc_stream_state *stream,
+ struct dc_sink *dc_sink_data);
+void dc_stream_destruct(struct dc_stream_state *stream);
+
+void dc_stream_assign_stream_id(struct dc_stream_state *stream);
+
+/*
+ * Finds the highest refresh rate that can be achieved
+ * from starting_freq while staying within flicker criteria
+ */
+int dc_stream_calculate_max_flickerless_refresh_rate(struct dc_stream_state *stream,
+ int starting_refresh_hz,
+ bool is_gaming);
+
+/*
+ * Finds the lowest refresh rate that can be achieved
+ * from starting_freq while staying within flicker criteria
+ */
+int dc_stream_calculate_min_flickerless_refresh_rate(struct dc_stream_state *stream,
+ int starting_refresh_hz,
+ bool is_gaming);
+
+/*
+ * Determines if there will be a flicker when moving between 2 refresh rates
+ */
+bool dc_stream_is_refresh_rate_range_flickerless(struct dc_stream_state *stream,
+ int hz1,
+ int hz2,
+ bool is_gaming);
+
+/*
+ * Determines the max instant vtotal delta increase that can be applied without
+ * flickering for a given stream
+ */
+unsigned int dc_stream_get_max_flickerless_instant_vtotal_decrease(struct dc_stream_state *stream,
+ bool is_gaming);
+
+/*
+ * Determines the max instant vtotal delta decrease that can be applied without
+ * flickering for a given stream
+ */
+unsigned int dc_stream_get_max_flickerless_instant_vtotal_increase(struct dc_stream_state *stream,
+ bool is_gaming);
+
+#endif // _DC_STREAM_PRIV_H_
diff --git a/drivers/gpu/drm/amd/display/dc/dc_trace.h b/drivers/gpu/drm/amd/display/dc/dc_trace.h
index c711797e5c9e..bbec308a3a5e 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_trace.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_trace.h
@@ -40,3 +40,5 @@
#define TRACE_DCN_FPU(begin, function, line, ref_count) \
trace_dcn_fpu(begin, function, line, ref_count)
+#define TRACE_OPTC_LOCK_UNLOCK_STATE(optc, inst, lock) \
+ trace_dcn_optc_lock_unlock_state(optc, inst, lock, __func__, __LINE__)
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index 388457ffc0a8..f46039f64203 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -32,14 +32,15 @@
#include "os_types.h"
#include "fixed31_32.h"
#include "irq_types.h"
+#include "dc_ddc_types.h"
#include "dc_dp_types.h"
+#include "dc_hdmi_types.h"
#include "dc_hw_types.h"
#include "dal_types.h"
#include "grph_object_defs.h"
+#include "grph_object_ctrl_defs.h"
-#ifdef CONFIG_DRM_AMD_DC_HDCP
#include "dm_cp_psp.h"
-#endif
/* forward declarations */
struct dc_plane_state;
@@ -68,13 +69,6 @@ enum dce_environment {
DCE_ENV_VIRTUAL_HW
};
-/* Note: use these macro definitions instead of direct comparison! */
-#define IS_FPGA_MAXIMUS_DC(dce_environment) \
- (dce_environment == DCE_ENV_FPGA_MAXIMUS)
-
-#define IS_DIAG_DC(dce_environment) \
- (IS_FPGA_MAXIMUS_DC(dce_environment) || (dce_environment == DCE_ENV_DIAG))
-
struct dc_perf_trace {
unsigned long read_count;
unsigned long write_count;
@@ -82,13 +76,7 @@ struct dc_perf_trace {
unsigned long last_entry_write;
};
-#define DC_MAX_EDID_BUFFER_SIZE 2048
-#define DC_EDID_BLOCK_SIZE 128
-#define MAX_SURFACE_NUM 4
#define NUM_PIXEL_FORMATS 10
-#define MAX_REPEATER_CNT 8
-
-#include "dc_ddc_types.h"
enum tiling_mode {
TILING_MODE_INVALID,
@@ -138,6 +126,7 @@ enum dc_edid_status {
EDID_BAD_CHECKSUM,
EDID_THE_SAME,
EDID_FALL_BACK,
+ EDID_PARTIAL_VALID,
};
enum act_return_status {
@@ -173,18 +162,6 @@ struct dc_edid {
#define AUDIO_INFO_DISPLAY_NAME_SIZE_IN_CHARS 20
-union display_content_support {
- unsigned int raw;
- struct {
- unsigned int valid_content_type :1;
- unsigned int game_content :1;
- unsigned int cinema_content :1;
- unsigned int photo_content :1;
- unsigned int graphics_content :1;
- unsigned int reserved :27;
- } bits;
-};
-
struct dc_panel_patch {
unsigned int dppowerup_delay;
unsigned int extra_t12_ms;
@@ -195,9 +172,23 @@ struct dc_panel_patch {
unsigned int disable_fec;
unsigned int extra_t3_ms;
unsigned int max_dsc_target_bpp_limit;
+ unsigned int embedded_tiled_slave;
+ unsigned int disable_fams;
unsigned int skip_avmute;
-};
-
+ unsigned int skip_audio_sab_check;
+ unsigned int mst_start_top_delay;
+ unsigned int remove_sink_ext_caps;
+ unsigned int disable_colorimetry;
+ uint8_t blankstream_before_otg_off;
+ bool oled_optimize_display_on;
+ unsigned int force_mst_blocked_discovery;
+ unsigned int wait_after_dpcd_poweroff_ms;
+};
+
+/**
+ * struct dc_edid_caps - Capabilities read from EDID.
+ * @analog: Whether the monitor is analog. Used by DVI-I handling.
+ */
struct dc_edid_caps {
/* sink identification */
uint16_t manufacturer_id;
@@ -214,8 +205,6 @@ struct dc_edid_caps {
uint32_t audio_latency;
uint32_t video_latency;
- union display_content_support content_support;
-
uint8_t qs_bit;
uint8_t qy_bit;
@@ -226,6 +215,9 @@ struct dc_edid_caps {
bool edid_hdmi;
bool hdr_supported;
+ bool rr_capable;
+ bool scdc_present;
+ bool analog;
struct dc_panel_patch panel_patch;
};
@@ -276,6 +268,9 @@ enum dc_timing_source {
TIMING_SOURCE_EDID_CEA_SVD,
TIMING_SOURCE_EDID_CVT_3BYTE,
TIMING_SOURCE_EDID_4BYTE,
+ TIMING_SOURCE_EDID_CEA_DISPLAYID_VTDB,
+ TIMING_SOURCE_EDID_CEA_RID,
+ TIMING_SOURCE_EDID_DISPLAYID_TYPE5,
TIMING_SOURCE_VBIOS,
TIMING_SOURCE_CV,
TIMING_SOURCE_TV,
@@ -358,7 +353,8 @@ enum dc_connection_type {
dc_connection_none,
dc_connection_single,
dc_connection_mst_branch,
- dc_connection_sst_branch
+ dc_connection_sst_branch,
+ dc_connection_dac_load
};
struct dc_csc_adjustments {
@@ -368,71 +364,6 @@ struct dc_csc_adjustments {
struct fixed31_32 hue;
};
-enum dpcd_downstream_port_max_bpc {
- DOWN_STREAM_MAX_8BPC = 0,
- DOWN_STREAM_MAX_10BPC,
- DOWN_STREAM_MAX_12BPC,
- DOWN_STREAM_MAX_16BPC
-};
-
-
-enum link_training_offset {
- DPRX = 0,
- LTTPR_PHY_REPEATER1 = 1,
- LTTPR_PHY_REPEATER2 = 2,
- LTTPR_PHY_REPEATER3 = 3,
- LTTPR_PHY_REPEATER4 = 4,
- LTTPR_PHY_REPEATER5 = 5,
- LTTPR_PHY_REPEATER6 = 6,
- LTTPR_PHY_REPEATER7 = 7,
- LTTPR_PHY_REPEATER8 = 8
-};
-
-struct dc_lttpr_caps {
- union dpcd_rev revision;
- uint8_t mode;
- uint8_t max_lane_count;
- uint8_t max_link_rate;
- uint8_t phy_repeater_cnt;
- uint8_t max_ext_timeout;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- union dp_main_link_channel_coding_lttpr_cap main_link_channel_coding;
- union dp_128b_132b_supported_lttpr_link_rates supported_128b_132b_rates;
-#endif
- uint8_t aux_rd_interval[MAX_REPEATER_CNT - 1];
-};
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-struct dc_dongle_dfp_cap_ext {
- bool supported;
- uint16_t max_pixel_rate_in_mps;
- uint16_t max_video_h_active_width;
- uint16_t max_video_v_active_height;
- struct dp_encoding_format_caps encoding_format_caps;
- struct dp_color_depth_caps rgb_color_depth_caps;
- struct dp_color_depth_caps ycbcr444_color_depth_caps;
- struct dp_color_depth_caps ycbcr422_color_depth_caps;
- struct dp_color_depth_caps ycbcr420_color_depth_caps;
-};
-#endif
-
-struct dc_dongle_caps {
- /* dongle type (DP converter, CV smart dongle) */
- enum display_dongle_type dongle_type;
- bool extendedCapValid;
- /* If dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER,
- indicates 'Frame Sequential-to-lllFrame Pack' conversion capability.*/
- bool is_dp_hdmi_s3d_converter;
- bool is_dp_hdmi_ycbcr422_pass_through;
- bool is_dp_hdmi_ycbcr420_pass_through;
- bool is_dp_hdmi_ycbcr422_converter;
- bool is_dp_hdmi_ycbcr420_converter;
- uint32_t dp_hdmi_max_bpc;
- uint32_t dp_hdmi_max_pixel_clk_in_khz;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- struct dc_dongle_dfp_cap_ext dfp_cap_ext;
-#endif
-};
/* Scaling format */
enum scaling_transformation {
SCALING_TRANSFORMATION_UNINITIALIZED,
@@ -505,7 +436,7 @@ struct dc_dwb_params {
enum dwb_capture_rate capture_rate; /* controls the frame capture rate */
struct scaling_taps scaler_taps; /* Scaling taps */
enum dwb_subsample_position subsample_position;
- struct dc_transfer_func *out_transfer_func;
+ const struct dc_transfer_func *out_transfer_func;
};
/* audio*/
@@ -639,6 +570,12 @@ struct dc_info_packet_128 {
uint8_t sb[128];
};
+struct dc_edid_read_policy {
+ uint32_t max_retry_count;
+ uint32_t delay_time_ms;
+ uint32_t ignore_checksum;
+};
+
#define DC_PLANE_UPDATE_TIMES_MAX 10
struct dc_plane_flip_time {
@@ -647,6 +584,12 @@ struct dc_plane_flip_time {
unsigned int prev_update_time_in_us;
};
+enum dc_alpm_mode {
+ DC_ALPM_AUXWAKE = 0,
+ DC_ALPM_AUXLESS = 1,
+ DC_ALPM_UNSUPPORTED = 0xF,
+};
+
enum dc_psr_state {
PSR_STATE0 = 0x0,
PSR_STATE1,
@@ -661,10 +604,19 @@ enum dc_psr_state {
PSR_STATE4b,
PSR_STATE4c,
PSR_STATE4d,
+ PSR_STATE4_FULL_FRAME,
+ PSR_STATE4a_FULL_FRAME,
+ PSR_STATE4b_FULL_FRAME,
+ PSR_STATE4c_FULL_FRAME,
+ PSR_STATE4_FULL_FRAME_POWERUP,
+ PSR_STATE4_FULL_FRAME_HW_LOCK,
PSR_STATE5,
PSR_STATE5a,
PSR_STATE5b,
PSR_STATE5c,
+ PSR_STATE_HWLOCK_MGR,
+ PSR_STATE_POLLVUPDATE,
+ PSR_STATE_RELEASE_HWLOCK_MGR_FULL_FRAME,
PSR_STATE_INVALID = 0xFF
};
@@ -676,6 +628,14 @@ struct psr_config {
unsigned int psr_sdp_transmit_line_num_deadline;
bool allow_smu_optimizations;
bool allow_multi_disp_optimizations;
+ /* Panel self refresh 2 selective update granularity required */
+ bool su_granularity_required;
+ /* psr2 selective update y granularity capability */
+ uint8_t su_y_granularity;
+ unsigned int line_time_in_us;
+ uint8_t rate_control_caps;
+ uint16_t dsc_slice_height;
+ bool os_request_force_ffu;
};
union dmcu_psr_level {
@@ -690,7 +650,9 @@ union dmcu_psr_level {
unsigned int SKIP_AUTO_STATE_ADVANCE:1;
unsigned int DISABLE_PSR_ENTRY_ABORT:1;
unsigned int SKIP_SINGLE_OTG_DISABLE:1;
- unsigned int RESERVED:22;
+ unsigned int DISABLE_ALPM:1;
+ unsigned int ALPM_DEFAULT_PD_MODE:1;
+ unsigned int RESERVED:20;
} bits;
unsigned int u32all;
};
@@ -779,6 +741,14 @@ struct psr_context {
unsigned int frame_delay;
bool allow_smu_optimizations;
bool allow_multi_disp_optimizations;
+ /* Panel self refresh 2 selective update granularity required */
+ bool su_granularity_required;
+ /* psr2 selective update y granularity capability */
+ uint8_t su_y_granularity;
+ unsigned int line_time_in_us;
+ uint8_t rate_control_caps;
+ uint16_t dsc_slice_height;
+ bool os_request_force_ffu;
};
struct colorspace_transform {
@@ -833,6 +803,7 @@ struct dc_context {
struct dc *dc;
void *driver_context; /* e.g. amdgpu_device */
+ struct dal_logger *logger;
struct dc_perf_trace *perf_trace;
void *cgs_device;
@@ -851,10 +822,10 @@ struct dc_context {
uint32_t dc_edp_id_count;
uint64_t fbc_gpu_addr;
struct dc_dmub_srv *dmub_srv;
-#ifdef CONFIG_DRM_AMD_DC_HDCP
struct cp_psp cp_psp;
-#endif
-
+ uint32_t *dcn_reg_offsets;
+ uint32_t *nbio_reg_offsets;
+ uint32_t *clk_reg_offsets;
};
/* DSC DPCD capabilities */
@@ -925,7 +896,15 @@ struct dsc_dec_dpcd_caps {
uint32_t branch_overall_throughput_0_mps; /* In MPs */
uint32_t branch_overall_throughput_1_mps; /* In MPs */
uint32_t branch_max_line_width;
- bool is_dp;
+ bool is_dp; /* Decoded format */
+};
+
+struct hblank_expansion_dpcd_caps {
+ bool expansion_supported;
+ bool reduction_supported;
+ bool buffer_unit_bytes; /* True: buffer size in bytes. False: buffer size in pixels*/
+ bool buffer_per_port; /* True: buffer size per port. False: buffer size per lane*/
+ uint32_t buffer_size; /* Add 1 to value and multiply by 32 */
};
struct dc_golden_table {
@@ -948,11 +927,26 @@ enum dc_gpu_mem_alloc_type {
DC_MEM_ALLOC_TYPE_AGP
};
+enum dc_link_encoding_format {
+ DC_LINK_ENCODING_UNSPECIFIED = 0,
+ DC_LINK_ENCODING_DP_8b_10b,
+ DC_LINK_ENCODING_DP_128b_132b,
+ DC_LINK_ENCODING_HDMI_TMDS,
+ DC_LINK_ENCODING_HDMI_FRL
+};
+
enum dc_psr_version {
DC_PSR_VERSION_1 = 0,
+ DC_PSR_VERSION_SU_1 = 1,
DC_PSR_VERSION_UNSUPPORTED = 0xFFFFFFFF,
};
+enum dc_replay_version {
+ DC_FREESYNC_REPLAY = 0,
+ DC_VESA_PANEL_REPLAY = 1,
+ DC_REPLAY_VERSION_UNSUPPORTED = 0XFF,
+};
+
/* Possible values of display_endpoint_id.endpoint */
enum display_endpoint_type {
DISPLAY_ENDPOINT_PHY = 0, /* Physical connector. */
@@ -969,4 +963,470 @@ struct display_endpoint_id {
enum display_endpoint_type ep_type;
};
+enum backlight_control_type {
+ BACKLIGHT_CONTROL_PWM = 0,
+ BACKLIGHT_CONTROL_VESA_AUX = 1,
+ BACKLIGHT_CONTROL_AMD_AUX = 2,
+};
+
+#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
+#define MAX_CRC_WINDOW_NUM 2
+
+struct otg_phy_mux {
+ uint8_t phy_output_num;
+ uint8_t otg_output_num;
+};
+
+struct crc_window {
+ struct rect rect;
+ bool enable;
+};
+#endif
+
+enum dc_detect_reason {
+ DETECT_REASON_BOOT,
+ DETECT_REASON_RESUMEFROMS3S4,
+ DETECT_REASON_HPD,
+ DETECT_REASON_HPDRX,
+ DETECT_REASON_FALLBACK,
+ DETECT_REASON_RETRAIN,
+ DETECT_REASON_TDR,
+};
+
+struct dc_link_status {
+ bool link_active;
+ struct dpcd_caps *dpcd_caps;
+};
+
+union hdcp_rx_caps {
+ struct {
+ uint8_t version;
+ uint8_t reserved;
+ struct {
+ uint8_t repeater : 1;
+ uint8_t hdcp_capable : 1;
+ uint8_t reserved : 6;
+ } byte0;
+ } fields;
+ uint8_t raw[3];
+};
+
+union hdcp_bcaps {
+ struct {
+ uint8_t HDCP_CAPABLE:1;
+ uint8_t REPEATER:1;
+ uint8_t RESERVED:6;
+ } bits;
+ uint8_t raw;
+};
+
+struct hdcp_caps {
+ union hdcp_rx_caps rx_caps;
+ union hdcp_bcaps bcaps;
+};
+
+/* DP MST stream allocation (payload bandwidth number) */
+struct link_mst_stream_allocation {
+ /* DIG front */
+ const struct stream_encoder *stream_enc;
+ /* HPO DP Stream Encoder */
+ const struct hpo_dp_stream_encoder *hpo_dp_stream_enc;
+ /* associate DRM payload table with DC stream encoder */
+ uint8_t vcp_id;
+ /* number of slots required for the DP stream in transport packet */
+ uint8_t slot_count;
+};
+
+#define MAX_CONTROLLER_NUM 6
+
+/* DP MST stream allocation table */
+struct link_mst_stream_allocation_table {
+ /* number of DP video streams */
+ int stream_count;
+ /* array of stream allocations */
+ struct link_mst_stream_allocation stream_allocations[MAX_CONTROLLER_NUM];
+};
+
+/* PSR feature flags */
+struct psr_settings {
+ bool psr_feature_enabled; // PSR is supported by sink
+ bool psr_allow_active; // PSR is currently active
+ enum dc_psr_version psr_version; // Internal PSR version, determined based on DPCD
+ bool psr_vtotal_control_support; // Vtotal control is supported by sink
+ unsigned long long psr_dirty_rects_change_timestamp_ns; // for delay of enabling PSR-SU
+
+ /* These parameters are calculated in Driver,
+ * based on display timing and Sink capabilities.
+ * If VBLANK region is too small and Sink takes a long time
+ * to set up RFB, it may take an extra frame to enter PSR state.
+ */
+ bool psr_frame_capture_indication_req;
+ unsigned int psr_sdp_transmit_line_num_deadline;
+ uint8_t force_ffu_mode;
+ unsigned int psr_power_opt;
+
+ /**
+ * Some panels cannot handle idle pattern during PSR entry.
+ * To power down phy before disable stream to avoid sending
+ * idle pattern.
+ */
+ uint8_t power_down_phy_before_disable_stream;
+};
+
+enum replay_coasting_vtotal_type {
+ PR_COASTING_TYPE_NOM = 0,
+ PR_COASTING_TYPE_STATIC,
+ PR_COASTING_TYPE_FULL_SCREEN_VIDEO,
+ PR_COASTING_TYPE_TEST_HARNESS,
+ PR_COASTING_TYPE_NUM,
+};
+
+enum replay_link_off_frame_count_level {
+ PR_LINK_OFF_FRAME_COUNT_FAIL = 0x0,
+ PR_LINK_OFF_FRAME_COUNT_GOOD = 0x2,
+ PR_LINK_OFF_FRAME_COUNT_BEST = 0x6,
+};
+
+/*
+ * This is general Interface for Replay to
+ * set an 32 bit variable to dmub
+ * The Message_type indicates which variable
+ * passed to DMUB.
+ */
+enum replay_FW_Message_type {
+ Replay_Msg_Not_Support = -1,
+ Replay_Set_Timing_Sync_Supported,
+ Replay_Set_Residency_Frameupdate_Timer,
+ Replay_Set_Pseudo_VTotal,
+ Replay_Disabled_Adaptive_Sync_SDP,
+ Replay_Set_Version,
+ Replay_Set_General_Cmd,
+};
+
+union replay_error_status {
+ struct {
+ unsigned int STATE_TRANSITION_ERROR :1;
+ unsigned int LINK_CRC_ERROR :1;
+ unsigned int DESYNC_ERROR :1;
+ unsigned int RESERVED_3 :1;
+ unsigned int LOW_RR_INCORRECT_VTOTAL :1;
+ unsigned int NO_DOUBLED_RR :1;
+ unsigned int RESERVED_6_7 :2;
+ } bits;
+ unsigned char raw;
+};
+
+union replay_low_refresh_rate_enable_options {
+ struct {
+ //BIT[0-3]: Replay Low Hz Support control
+ unsigned int ENABLE_LOW_RR_SUPPORT :1;
+ unsigned int SKIP_ASIC_CHECK :1;
+ unsigned int RESERVED_2_3 :2;
+ //BIT[4-15]: Replay Low Hz Enable Scenarios
+ unsigned int ENABLE_STATIC_SCREEN :1;
+ unsigned int ENABLE_FULL_SCREEN_VIDEO :1;
+ unsigned int ENABLE_GENERAL_UI :1;
+ unsigned int RESERVED_7_15 :9;
+ //BIT[16-31]: Replay Low Hz Enable Check
+ unsigned int ENABLE_STATIC_FLICKER_CHECK :1;
+ unsigned int RESERVED_17_31 :15;
+ } bits;
+ unsigned int raw;
+};
+
+struct replay_config {
+ /* Replay version */
+ enum dc_replay_version replay_version;
+ /* Replay feature is supported */
+ bool replay_supported;
+ /* Replay caps support DPCD & EDID caps*/
+ bool replay_cap_support;
+ /* Power opt flags that are supported */
+ unsigned int replay_power_opt_supported;
+ /* SMU optimization is supported */
+ bool replay_smu_opt_supported;
+ /* Replay enablement option */
+ unsigned int replay_enable_option;
+ /* Replay debug flags */
+ uint32_t debug_flags;
+ /* Replay sync is supported */
+ bool replay_timing_sync_supported;
+ /* Replay Disable desync error check. */
+ bool force_disable_desync_error_check;
+ /* Replay Received Desync Error HPD. */
+ bool received_desync_error_hpd;
+ /* Replay feature is supported long vblank */
+ bool replay_support_fast_resync_in_ultra_sleep_mode;
+ /* Replay error status */
+ union replay_error_status replay_error_status;
+ /* Replay Low Hz enable Options */
+ union replay_low_refresh_rate_enable_options low_rr_enable_options;
+ /* Replay coasting vtotal is within low refresh rate range. */
+ bool low_rr_activated;
+ /* Replay low refresh rate supported*/
+ bool low_rr_supported;
+ /* Replay Video Conferencing Optimization Enabled */
+ bool replay_video_conferencing_optimization_enabled;
+ /* Replay alpm mode */
+ enum dc_alpm_mode alpm_mode;
+ /* Replay full screen only */
+ bool os_request_force_ffu;
+};
+
+/* Replay feature flags*/
+struct replay_settings {
+ /* Replay configuration */
+ struct replay_config config;
+ /* Replay feature is ready for activating */
+ bool replay_feature_enabled;
+ /* Replay is currently active */
+ bool replay_allow_active;
+ /* Replay is currently active */
+ bool replay_allow_long_vblank;
+ /* Power opt flags that are activated currently */
+ unsigned int replay_power_opt_active;
+ /* SMU optimization is enabled */
+ bool replay_smu_opt_enable;
+ /* Current Coasting vtotal */
+ uint32_t coasting_vtotal;
+ /* Coasting vtotal table */
+ uint32_t coasting_vtotal_table[PR_COASTING_TYPE_NUM];
+ /* Defer Update Coasting vtotal table */
+ uint32_t defer_update_coasting_vtotal_table[PR_COASTING_TYPE_NUM];
+ /* Skip frame number table */
+ uint32_t frame_skip_number_table[PR_COASTING_TYPE_NUM];
+ /* Defer skip frame number table */
+ uint32_t defer_frame_skip_number_table[PR_COASTING_TYPE_NUM];
+ /* Maximum link off frame count */
+ uint32_t link_off_frame_count;
+ /* Replay pseudo vtotal for low refresh rate*/
+ uint16_t low_rr_full_screen_video_pseudo_vtotal;
+ /* Replay last pseudo vtotal set to DMUB */
+ uint16_t last_pseudo_vtotal;
+ /* Replay desync error */
+ uint32_t replay_desync_error_fail_count;
+ /* The frame skip number dal send to DMUB */
+ uint16_t frame_skip_number;
+};
+
+/* To split out "global" and "per-panel" config settings.
+ * Add a struct dc_panel_config under dc_link
+ */
+struct dc_panel_config {
+ /* extra panel power sequence parameters */
+ struct pps {
+ unsigned int extra_t3_ms;
+ unsigned int extra_t7_ms;
+ unsigned int extra_delay_backlight_off;
+ unsigned int extra_post_t7_ms;
+ unsigned int extra_pre_t11_ms;
+ unsigned int extra_t12_ms;
+ unsigned int extra_post_OUI_ms;
+ } pps;
+ /* nit brightness */
+ struct nits_brightness {
+ unsigned int peak; /* nits */
+ unsigned int max_avg; /* nits */
+ unsigned int min; /* 1/10000 nits */
+ unsigned int max_nonboost_brightness_millinits;
+ unsigned int min_brightness_millinits;
+ } nits_brightness;
+ /* PSR */
+ struct psr {
+ bool disable_psr;
+ bool disallow_psrsu;
+ bool disallow_replay;
+ bool rc_disable;
+ bool rc_allow_static_screen;
+ bool rc_allow_fullscreen_VPB;
+ bool read_psrcap_again;
+ unsigned int replay_enable_option;
+ } psr;
+ /* ABM */
+ struct varib {
+ unsigned int varibright_feature_enable;
+ unsigned int def_varibright_level;
+ unsigned int abm_config_setting;
+ } varib;
+ /* edp DSC */
+ struct dsc {
+ bool disable_dsc_edp;
+ unsigned int force_dsc_edp_policy;
+ } dsc;
+ /* eDP ILR */
+ struct ilr {
+ bool optimize_edp_link_rate; /* eDP ILR */
+ } ilr;
+};
+
+#define MAX_SINKS_PER_LINK 4
+
+/*
+ * USB4 DPIA BW ALLOCATION STRUCTS
+ */
+struct dc_dpia_bw_alloc {
+ int remote_sink_req_bw[MAX_SINKS_PER_LINK]; // BW requested by remote sinks
+ int link_verified_bw; // The Verified BW that link can allocated and use that has been verified already
+ int link_max_bw; // The Max BW that link can require/support
+ int allocated_bw; // The Actual Allocated BW for this DPIA
+ int estimated_bw; // The estimated available BW for this DPIA
+ int bw_granularity; // BW Granularity
+ int dp_overhead; // DP overhead in dp tunneling
+ bool bw_alloc_enabled; // The BW Alloc Mode Support is turned ON for all 3: DP-Tx & Dpia & CM
+ uint8_t nrd_max_lane_count; // Non-reduced max lane count
+ uint8_t nrd_max_link_rate; // Non-reduced max link rate
+};
+
+enum dc_hpd_enable_select {
+ HPD_EN_FOR_ALL_EDP = 0,
+ HPD_EN_FOR_PRIMARY_EDP_ONLY,
+ HPD_EN_FOR_SECONDARY_EDP_ONLY,
+};
+
+enum dc_cm2_shaper_3dlut_setting {
+ DC_CM2_SHAPER_3DLUT_SETTING_BYPASS_ALL,
+ DC_CM2_SHAPER_3DLUT_SETTING_ENABLE_SHAPER,
+ /* Bypassing Shaper will always bypass 3DLUT */
+ DC_CM2_SHAPER_3DLUT_SETTING_ENABLE_SHAPER_3DLUT
+};
+
+enum dc_cm2_gpu_mem_layout {
+ DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_RGB,
+ DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_BGR,
+ DC_CM2_GPU_MEM_LAYOUT_1D_PACKED_LINEAR
+};
+
+enum dc_cm2_gpu_mem_pixel_component_order {
+ DC_CM2_GPU_MEM_PIXEL_COMPONENT_ORDER_RGBA,
+};
+
+enum dc_cm2_gpu_mem_format {
+ DC_CM2_GPU_MEM_FORMAT_16161616_UNORM_12MSB,
+ DC_CM2_GPU_MEM_FORMAT_16161616_UNORM_12LSB,
+ DC_CM2_GPU_MEM_FORMAT_16161616_FLOAT_FP1_5_10
+};
+
+struct dc_cm2_gpu_mem_format_parameters {
+ enum dc_cm2_gpu_mem_format format;
+ union {
+ struct {
+ /* bias & scale for float only */
+ uint16_t bias;
+ uint16_t scale;
+ } float_params;
+ };
+};
+
+enum dc_cm2_gpu_mem_size {
+ DC_CM2_GPU_MEM_SIZE_171717,
+ DC_CM2_GPU_MEM_SIZE_TRANSFORMED,
+};
+
+struct dc_cm2_gpu_mem_parameters {
+ struct dc_plane_address addr;
+ enum dc_cm2_gpu_mem_layout layout;
+ struct dc_cm2_gpu_mem_format_parameters format_params;
+ enum dc_cm2_gpu_mem_pixel_component_order component_order;
+ enum dc_cm2_gpu_mem_size size;
+ uint16_t bit_depth;
+};
+
+enum dc_cm2_transfer_func_source {
+ DC_CM2_TRANSFER_FUNC_SOURCE_SYSMEM,
+ DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM
+};
+
+struct dc_cm2_component_settings {
+ enum dc_cm2_shaper_3dlut_setting shaper_3dlut_setting;
+ bool lut1d_enable;
+};
+
+/*
+ * All pointers in this struct must remain valid for as long as the 3DLUTs are used
+ */
+struct dc_cm2_func_luts {
+ const struct dc_transfer_func *shaper;
+ struct {
+ enum dc_cm2_transfer_func_source lut3d_src;
+ union {
+ const struct dc_3dlut *lut3d_func;
+ struct dc_cm2_gpu_mem_parameters gpu_mem_params;
+ };
+ bool rmcm_3dlut_shaper_select;
+ bool mpc_3dlut_enable;
+ bool rmcm_3dlut_enable;
+ bool mpc_mcm_post_blend;
+ uint8_t rmcm_tmz;
+ } lut3d_data;
+ const struct dc_transfer_func *lut1d_func;
+};
+
+struct dc_cm2_parameters {
+ struct dc_cm2_component_settings component_settings;
+ struct dc_cm2_func_luts cm2_luts;
+};
+
+enum mall_stream_type {
+ SUBVP_NONE, // subvp not in use
+ SUBVP_MAIN, // subvp in use, this stream is main stream
+ SUBVP_PHANTOM, // subvp in use, this stream is a phantom stream
+};
+
+enum dc_power_source_type {
+ DC_POWER_SOURCE_AC, // wall power
+ DC_POWER_SOURCE_DC, // battery power
+};
+
+struct dc_state_create_params {
+ enum dc_power_source_type power_source;
+};
+
+struct dc_commit_streams_params {
+ struct dc_stream_state **streams;
+ uint8_t stream_count;
+ enum dc_power_source_type power_source;
+};
+
+struct set_backlight_level_params {
+ /* backlight in pwm */
+ uint32_t backlight_pwm_u16_16;
+ /* brightness ramping */
+ uint32_t frame_ramp;
+ /* backlight control type
+ * 0: PWM backlight control
+ * 1: VESA AUX backlight control
+ * 2: AMD AUX backlight control
+ */
+ enum backlight_control_type control_type;
+ /* backlight in millinits */
+ uint32_t backlight_millinits;
+ /* transition time in ms */
+ uint32_t transition_time_in_ms;
+ /* minimum luminance in nits */
+ uint32_t min_luminance;
+ /* maximum luminance in nits */
+ uint32_t max_luminance;
+ /* minimum backlight in pwm */
+ uint32_t min_backlight_pwm;
+ /* maximum backlight in pwm */
+ uint32_t max_backlight_pwm;
+ /* AUX HW instance */
+ uint8_t aux_inst;
+};
+
+enum dc_validate_mode {
+ /* validate the mode and program HW */
+ DC_VALIDATE_MODE_AND_PROGRAMMING = 0,
+ /* only validate the mode */
+ DC_VALIDATE_MODE_ONLY = 1,
+ /* validate the mode and get the max state (voltage level) */
+ DC_VALIDATE_MODE_AND_STATE_INDEX = 2,
+};
+
+struct dc_validation_dpia_set {
+ const struct dc_link *link;
+ const struct dc_tunnel_settings *tunnel_settings;
+ uint32_t required_bw;
+};
+
#endif /* DC_TYPES_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/Makefile b/drivers/gpu/drm/amd/display/dc/dccg/Makefile
new file mode 100644
index 000000000000..1d5cf0f8e79d
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dccg/Makefile
@@ -0,0 +1,103 @@
+
+# Copyright 2022 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# Makefile for the 'dccg' sub-component of DAL.
+#
+ifdef CONFIG_DRM_AMD_DC_FP
+###############################################################################
+# DCN
+###############################################################################
+
+DCCG_DCN20 = dcn20_dccg.o
+
+AMD_DAL_DCCG_DCN20 = $(addprefix $(AMDDALPATH)/dc/dccg/dcn20/,$(DCCG_DCN20))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DCCG_DCN20)
+
+###############################################################################
+
+DCCG_DCN201 = dcn201_dccg.o
+
+AMD_DAL_DCCG_DCN201 = $(addprefix $(AMDDALPATH)/dc/dccg/dcn201/,$(DCCG_DCN201))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DCCG_DCN201)
+
+###############################################################################
+
+DCCG_DCN21 = dcn21_dccg.o
+
+AMD_DAL_DCCG_DCN21 = $(addprefix $(AMDDALPATH)/dc/dccg/dcn21/,$(DCCG_DCN21))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DCCG_DCN21)
+
+###############################################################################
+DCCG_DCN30 = dcn30_dccg.o
+
+AMD_DAL_DCCG_DCN30 = $(addprefix $(AMDDALPATH)/dc/dccg/dcn30/,$(DCCG_DCN30))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DCCG_DCN30)
+
+###############################################################################
+DCCG_DCN301 = dcn301_dccg.o
+
+AMD_DAL_DCCG_DCN301 = $(addprefix $(AMDDALPATH)/dc/dccg/dcn301/,$(DCCG_DCN301))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DCCG_DCN301)
+
+###############################################################################
+
+DCCG_DCN31 = dcn31_dccg.o
+
+AMD_DAL_DCCG_DCN31 = $(addprefix $(AMDDALPATH)/dc/dccg/dcn31/,$(DCCG_DCN31))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DCCG_DCN31)
+
+###############################################################################
+
+DCCG_DCN314 = dcn314_dccg.o
+
+AMD_DAL_DCCG_DCN314 = $(addprefix $(AMDDALPATH)/dc/dccg/dcn314/,$(DCCG_DCN314))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DCCG_DCN314)
+
+###############################################################################
+
+DCCG_DCN32 = dcn32_dccg.o
+
+AMD_DAL_DCCG_DCN32 = $(addprefix $(AMDDALPATH)/dc/dccg/dcn32/,$(DCCG_DCN32))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DCCG_DCN32)
+
+###############################################################################
+
+DCCG_DCN35 = dcn35_dccg.o
+
+AMD_DAL_DCCG_DCN35 = $(addprefix $(AMDDALPATH)/dc/dccg/dcn35/,$(DCCG_DCN35))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DCCG_DCN35)
+
+###############################################################################
+DCCG_DCN401 = dcn401_dccg.o
+
+AMD_DAL_DCCG_DCN401 = $(addprefix $(AMDDALPATH)/dc/dccg/dcn401/,$(DCCG_DCN401))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DCCG_DCN401)
+endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.c
index 5999b2da3a01..33d8bd91cb01 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.c
@@ -148,7 +148,7 @@ struct dccg *dccg2_create(
const struct dccg_shift *dccg_shift,
const struct dccg_mask *dccg_mask)
{
- struct dcn_dccg *dccg_dcn = kzalloc(sizeof(*dccg_dcn), GFP_ATOMIC);
+ struct dcn_dccg *dccg_dcn = kzalloc(sizeof(*dccg_dcn), GFP_KERNEL);
struct dccg *base;
if (dccg_dcn == NULL) {
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h
new file mode 100644
index 000000000000..8bdffd9ff31b
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h
@@ -0,0 +1,526 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN20_DCCG_H__
+#define __DCN20_DCCG_H__
+
+#include "dccg.h"
+
+#define DCCG_COMMON_REG_LIST_DCN_BASE() \
+ SR(DPPCLK_DTO_CTRL),\
+ DCCG_SRII(DTO_PARAM, DPPCLK, 0),\
+ DCCG_SRII(DTO_PARAM, DPPCLK, 1),\
+ DCCG_SRII(DTO_PARAM, DPPCLK, 2),\
+ DCCG_SRII(DTO_PARAM, DPPCLK, 3),\
+ SR(REFCLK_CNTL),\
+ DCCG_SRII(PIXEL_RATE_CNTL, OTG, 0),\
+ DCCG_SRII(PIXEL_RATE_CNTL, OTG, 1),\
+ SR(DISPCLK_FREQ_CHANGE_CNTL)
+
+#define DCCG_REG_LIST_DCN2() \
+ DCCG_COMMON_REG_LIST_DCN_BASE(),\
+ DCCG_SRII(DTO_PARAM, DPPCLK, 4),\
+ DCCG_SRII(DTO_PARAM, DPPCLK, 5),\
+ DCCG_SRII(PIXEL_RATE_CNTL, OTG, 2),\
+ DCCG_SRII(PIXEL_RATE_CNTL, OTG, 3),\
+ DCCG_SRII(PIXEL_RATE_CNTL, OTG, 4),\
+ DCCG_SRII(PIXEL_RATE_CNTL, OTG, 5)
+
+#define DCCG_SF(reg_name, field_name, post_fix)\
+ .field_name = reg_name ## __ ## field_name ## post_fix
+
+#define DCCG_SFI(reg_name, field_name, field_prefix, inst, post_fix)\
+ .field_prefix ## _ ## field_name[inst] = reg_name ## __ ## field_prefix ## inst ## _ ## field_name ## post_fix
+
+#define DCCG_SFII(block, reg_name, field_prefix, field_name, inst, post_fix)\
+ .field_prefix ## _ ## field_name[inst] = block ## inst ## _ ## reg_name ## __ ## field_prefix ## inst ## _ ## field_name ## post_fix
+
+#define DCCG_COMMON_MASK_SH_LIST_DCN_COMMON_BASE(mask_sh) \
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 0, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 0, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 1, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 1, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 2, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 2, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 3, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 3, mask_sh),\
+ DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_PHASE, mask_sh),\
+ DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_MODULO, mask_sh),\
+ DCCG_SF(REFCLK_CNTL, REFCLK_CLOCK_EN, mask_sh),\
+ DCCG_SF(REFCLK_CNTL, REFCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_STEP_DELAY, mask_sh),\
+ DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_STEP_SIZE, mask_sh),\
+ DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_FREQ_RAMP_DONE, mask_sh),\
+ DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_MAX_ERRDET_CYCLES, mask_sh),\
+ DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DCCG_FIFO_ERRDET_RESET, mask_sh),\
+ DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DCCG_FIFO_ERRDET_STATE, mask_sh),\
+ DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DCCG_FIFO_ERRDET_OVR_EN, mask_sh),\
+ DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_CHG_FWD_CORR_DISABLE, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 1, mask_sh)
+
+
+
+
+#define DCCG_MASK_SH_LIST_DCN2(mask_sh) \
+ DCCG_COMMON_MASK_SH_LIST_DCN_COMMON_BASE(mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 4, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 4, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 5, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 5, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 3, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 4, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 5, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 3, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 4, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 5, mask_sh)
+
+#define DCCG_MASK_SH_LIST_DCN2_1(mask_sh) \
+ DCCG_COMMON_MASK_SH_LIST_DCN_COMMON_BASE(mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 4, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 4, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 5, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 5, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 3, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 3, mask_sh)
+
+
+#define DCCG_REG_FIELD_LIST(type) \
+ type DPPCLK0_DTO_PHASE;\
+ type DPPCLK0_DTO_MODULO;\
+ type DPPCLK_DTO_ENABLE[6];\
+ type DPPCLK_DTO_DB_EN[6];\
+ type REFCLK_CLOCK_EN;\
+ type REFCLK_SRC_SEL;\
+ type DISPCLK_STEP_DELAY;\
+ type DISPCLK_STEP_SIZE;\
+ type DISPCLK_FREQ_RAMP_DONE;\
+ type DISPCLK_MAX_ERRDET_CYCLES;\
+ type DCCG_FIFO_ERRDET_RESET;\
+ type DCCG_FIFO_ERRDET_STATE;\
+ type DCCG_FIFO_ERRDET_OVR_EN;\
+ type DISPCLK_CHG_FWD_CORR_DISABLE;\
+ type DISPCLK_FREQ_CHANGE_CNTL;\
+ type OTG_ADD_PIXEL[MAX_PIPES];\
+ type OTG_DROP_PIXEL[MAX_PIPES];
+
+#define DCCG3_REG_FIELD_LIST(type) \
+ type HDMICHARCLK0_EN;\
+ type HDMICHARCLK0_SRC_SEL;\
+ type PHYASYMCLK_FORCE_EN;\
+ type PHYASYMCLK_FORCE_SRC_SEL;\
+ type PHYBSYMCLK_FORCE_EN;\
+ type PHYBSYMCLK_FORCE_SRC_SEL;\
+ type PHYCSYMCLK_FORCE_EN;\
+ type PHYCSYMCLK_FORCE_SRC_SEL;
+
+#define DCCG31_REG_FIELD_LIST(type) \
+ type PHYDSYMCLK_FORCE_EN;\
+ type PHYDSYMCLK_FORCE_SRC_SEL;\
+ type PHYESYMCLK_FORCE_EN;\
+ type PHYESYMCLK_FORCE_SRC_SEL;\
+ type DPSTREAMCLK_PIPE0_EN;\
+ type DPSTREAMCLK_PIPE1_EN;\
+ type DPSTREAMCLK_PIPE2_EN;\
+ type DPSTREAMCLK_PIPE3_EN;\
+ type HDMISTREAMCLK0_SRC_SEL;\
+ type HDMISTREAMCLK0_DTO_FORCE_DIS;\
+ type SYMCLK32_SE0_SRC_SEL;\
+ type SYMCLK32_SE1_SRC_SEL;\
+ type SYMCLK32_SE2_SRC_SEL;\
+ type SYMCLK32_SE3_SRC_SEL;\
+ type SYMCLK32_SE0_EN;\
+ type SYMCLK32_SE1_EN;\
+ type SYMCLK32_SE2_EN;\
+ type SYMCLK32_SE3_EN;\
+ type SYMCLK32_LE0_SRC_SEL;\
+ type SYMCLK32_LE1_SRC_SEL;\
+ type SYMCLK32_LE0_EN;\
+ type SYMCLK32_LE1_EN;\
+ type DTBCLK_DTO_ENABLE[MAX_PIPES];\
+ type DTBCLKDTO_ENABLE_STATUS[MAX_PIPES];\
+ type PIPE_DTO_SRC_SEL[MAX_PIPES];\
+ type DTBCLK_DTO_DIV[MAX_PIPES];\
+ type DCCG_AUDIO_DTO_SEL;\
+ type DCCG_AUDIO_DTO0_SOURCE_SEL;\
+ type DENTIST_DISPCLK_CHG_MODE;\
+ type DSCCLK0_DTO_PHASE;\
+ type DSCCLK0_DTO_MODULO;\
+ type DSCCLK1_DTO_PHASE;\
+ type DSCCLK1_DTO_MODULO;\
+ type DSCCLK2_DTO_PHASE;\
+ type DSCCLK2_DTO_MODULO;\
+ type DSCCLK0_DTO_ENABLE;\
+ type DSCCLK1_DTO_ENABLE;\
+ type DSCCLK2_DTO_ENABLE;\
+ type SYMCLK32_ROOT_SE0_GATE_DISABLE;\
+ type SYMCLK32_ROOT_SE1_GATE_DISABLE;\
+ type SYMCLK32_ROOT_SE2_GATE_DISABLE;\
+ type SYMCLK32_ROOT_SE3_GATE_DISABLE;\
+ type SYMCLK32_SE0_GATE_DISABLE;\
+ type SYMCLK32_SE1_GATE_DISABLE;\
+ type SYMCLK32_SE2_GATE_DISABLE;\
+ type SYMCLK32_SE3_GATE_DISABLE;\
+ type SYMCLK32_ROOT_LE0_GATE_DISABLE;\
+ type SYMCLK32_ROOT_LE1_GATE_DISABLE;\
+ type SYMCLK32_LE0_GATE_DISABLE;\
+ type SYMCLK32_LE1_GATE_DISABLE;\
+ type DPSTREAMCLK_ROOT_GATE_DISABLE;\
+ type DPSTREAMCLK_GATE_DISABLE;\
+ type HDMISTREAMCLK0_DTO_PHASE;\
+ type HDMISTREAMCLK0_DTO_MODULO;\
+ type HDMICHARCLK0_GATE_DISABLE;\
+ type HDMICHARCLK0_ROOT_GATE_DISABLE; \
+ type PHYASYMCLK_GATE_DISABLE; \
+ type PHYBSYMCLK_GATE_DISABLE; \
+ type PHYCSYMCLK_GATE_DISABLE; \
+ type PHYDSYMCLK_GATE_DISABLE; \
+ type PHYESYMCLK_GATE_DISABLE;
+
+#define DCCG314_REG_FIELD_LIST(type) \
+ type DSCCLK3_DTO_PHASE;\
+ type DSCCLK3_DTO_MODULO;\
+ type DSCCLK3_DTO_ENABLE;\
+ type DENTIST_DISPCLK_RDIVIDER;\
+ type DENTIST_DISPCLK_WDIVIDER;
+
+#define DCCG32_REG_FIELD_LIST(type) \
+ type DPSTREAMCLK0_EN;\
+ type DPSTREAMCLK1_EN;\
+ type DPSTREAMCLK2_EN;\
+ type DPSTREAMCLK3_EN;\
+ type DPSTREAMCLK0_SRC_SEL;\
+ type DPSTREAMCLK1_SRC_SEL;\
+ type DPSTREAMCLK2_SRC_SEL;\
+ type DPSTREAMCLK3_SRC_SEL;\
+ type HDMISTREAMCLK0_EN;\
+ type OTG0_PIXEL_RATE_DIVK1;\
+ type OTG0_PIXEL_RATE_DIVK2;\
+ type OTG1_PIXEL_RATE_DIVK1;\
+ type OTG1_PIXEL_RATE_DIVK2;\
+ type OTG2_PIXEL_RATE_DIVK1;\
+ type OTG2_PIXEL_RATE_DIVK2;\
+ type OTG3_PIXEL_RATE_DIVK1;\
+ type OTG3_PIXEL_RATE_DIVK2;\
+ type DTBCLK_P0_SRC_SEL;\
+ type DTBCLK_P0_EN;\
+ type DTBCLK_P1_SRC_SEL;\
+ type DTBCLK_P1_EN;\
+ type DTBCLK_P2_SRC_SEL;\
+ type DTBCLK_P2_EN;\
+ type DTBCLK_P3_SRC_SEL;\
+ type DTBCLK_P3_EN;\
+ type DENTIST_DISPCLK_CHG_DONE;
+
+#define DCCG35_REG_FIELD_LIST(type) \
+ type DPPCLK0_EN;\
+ type DPPCLK1_EN;\
+ type DPPCLK2_EN;\
+ type DPPCLK3_EN;\
+ type DSCCLK0_EN;\
+ type DSCCLK1_EN;\
+ type DSCCLK2_EN;\
+ type DSCCLK3_EN;\
+ type DISPCLK_DCCG_GATE_DISABLE;\
+ type DCCG_GLOBAL_FGCG_REP_DIS; \
+ type PHYASYMCLK_EN;\
+ type PHYASYMCLK_SRC_SEL;\
+ type PHYBSYMCLK_EN;\
+ type PHYBSYMCLK_SRC_SEL;\
+ type PHYCSYMCLK_EN;\
+ type PHYCSYMCLK_SRC_SEL;\
+ type PHYDSYMCLK_EN;\
+ type PHYDSYMCLK_SRC_SEL;\
+ type PHYESYMCLK_EN;\
+ type PHYESYMCLK_SRC_SEL;\
+ type PHYASYMCLK_ROOT_GATE_DISABLE;\
+ type PHYBSYMCLK_ROOT_GATE_DISABLE;\
+ type PHYCSYMCLK_ROOT_GATE_DISABLE;\
+ type PHYDSYMCLK_ROOT_GATE_DISABLE;\
+ type PHYESYMCLK_ROOT_GATE_DISABLE;\
+ type HDMISTREAMCLK0_GATE_DISABLE;\
+ type HDMISTREAMCLK1_GATE_DISABLE;\
+ type HDMISTREAMCLK2_GATE_DISABLE;\
+ type HDMISTREAMCLK3_GATE_DISABLE;\
+ type HDMISTREAMCLK4_GATE_DISABLE;\
+ type HDMISTREAMCLK5_GATE_DISABLE;\
+ type SYMCLKA_CLOCK_ENABLE;\
+ type SYMCLKB_CLOCK_ENABLE;\
+ type SYMCLKC_CLOCK_ENABLE;\
+ type SYMCLKD_CLOCK_ENABLE;\
+ type SYMCLKE_CLOCK_ENABLE;\
+ type SYMCLKA_FE_EN;\
+ type SYMCLKB_FE_EN;\
+ type SYMCLKC_FE_EN;\
+ type SYMCLKD_FE_EN;\
+ type SYMCLKE_FE_EN;\
+ type SYMCLKA_SRC_SEL;\
+ type SYMCLKB_SRC_SEL;\
+ type SYMCLKC_SRC_SEL;\
+ type SYMCLKD_SRC_SEL;\
+ type SYMCLKE_SRC_SEL;\
+ type SYMCLKA_FE_SRC_SEL;\
+ type SYMCLKB_FE_SRC_SEL;\
+ type SYMCLKC_FE_SRC_SEL;\
+ type SYMCLKD_FE_SRC_SEL;\
+ type SYMCLKE_FE_SRC_SEL;\
+ type DTBCLK_P0_GATE_DISABLE;\
+ type DTBCLK_P1_GATE_DISABLE;\
+ type DTBCLK_P2_GATE_DISABLE;\
+ type DTBCLK_P3_GATE_DISABLE;\
+ type DSCCLK0_ROOT_GATE_DISABLE;\
+ type DSCCLK1_ROOT_GATE_DISABLE;\
+ type DSCCLK2_ROOT_GATE_DISABLE;\
+ type DSCCLK3_ROOT_GATE_DISABLE;\
+ type SYMCLKA_FE_ROOT_GATE_DISABLE;\
+ type SYMCLKB_FE_ROOT_GATE_DISABLE;\
+ type SYMCLKC_FE_ROOT_GATE_DISABLE;\
+ type SYMCLKD_FE_ROOT_GATE_DISABLE;\
+ type SYMCLKE_FE_ROOT_GATE_DISABLE;\
+ type DPPCLK0_ROOT_GATE_DISABLE;\
+ type DPPCLK1_ROOT_GATE_DISABLE;\
+ type DPPCLK2_ROOT_GATE_DISABLE;\
+ type DPPCLK3_ROOT_GATE_DISABLE;\
+ type HDMISTREAMCLK0_ROOT_GATE_DISABLE;\
+ type SYMCLKA_ROOT_GATE_DISABLE;\
+ type SYMCLKB_ROOT_GATE_DISABLE;\
+ type SYMCLKC_ROOT_GATE_DISABLE;\
+ type SYMCLKD_ROOT_GATE_DISABLE;\
+ type SYMCLKE_ROOT_GATE_DISABLE;\
+ type PHYA_REFCLK_ROOT_GATE_DISABLE;\
+ type PHYB_REFCLK_ROOT_GATE_DISABLE;\
+ type PHYC_REFCLK_ROOT_GATE_DISABLE;\
+ type PHYD_REFCLK_ROOT_GATE_DISABLE;\
+ type PHYE_REFCLK_ROOT_GATE_DISABLE;\
+ type DPSTREAMCLK0_ROOT_GATE_DISABLE;\
+ type DPSTREAMCLK1_ROOT_GATE_DISABLE;\
+ type DPSTREAMCLK2_ROOT_GATE_DISABLE;\
+ type DPSTREAMCLK3_ROOT_GATE_DISABLE;\
+ type DPSTREAMCLK0_GATE_DISABLE;\
+ type DPSTREAMCLK1_GATE_DISABLE;\
+ type DPSTREAMCLK2_GATE_DISABLE;\
+ type DPSTREAMCLK3_GATE_DISABLE;\
+ type SYMCLKA_FE_GATE_DISABLE;\
+ type SYMCLKB_FE_GATE_DISABLE;\
+ type SYMCLKC_FE_GATE_DISABLE;\
+ type SYMCLKD_FE_GATE_DISABLE;\
+ type SYMCLKE_FE_GATE_DISABLE;\
+ type SYMCLKA_GATE_DISABLE;\
+ type SYMCLKB_GATE_DISABLE;\
+ type SYMCLKC_GATE_DISABLE;\
+ type SYMCLKD_GATE_DISABLE;\
+ type SYMCLKE_GATE_DISABLE;\
+
+
+#define DCCG401_REG_FIELD_LIST(type) \
+ type OTG0_TMDS_PIXEL_RATE_DIV;\
+ type DPDTO0_INT;\
+ type OTG1_TMDS_PIXEL_RATE_DIV;\
+ type DPDTO1_INT;\
+ type OTG2_TMDS_PIXEL_RATE_DIV;\
+ type DPDTO2_INT;\
+ type OTG3_TMDS_PIXEL_RATE_DIV;\
+ type DPDTO3_INT;\
+ type SYMCLK32_ROOT_LE2_GATE_DISABLE;\
+ type SYMCLK32_ROOT_LE3_GATE_DISABLE;\
+ type SYMCLK32_LE2_GATE_DISABLE;\
+ type SYMCLK32_LE3_GATE_DISABLE;\
+ type SYMCLK32_LE2_SRC_SEL;\
+ type SYMCLK32_LE3_SRC_SEL;\
+ type SYMCLK32_LE2_EN;\
+ type SYMCLK32_LE3_EN;\
+ type DP_DTO_ENABLE[MAX_PIPES];
+
+struct dccg_shift {
+ DCCG_REG_FIELD_LIST(uint8_t)
+ DCCG3_REG_FIELD_LIST(uint8_t)
+ DCCG31_REG_FIELD_LIST(uint8_t)
+ DCCG314_REG_FIELD_LIST(uint8_t)
+ DCCG32_REG_FIELD_LIST(uint8_t)
+ DCCG35_REG_FIELD_LIST(uint8_t)
+ DCCG401_REG_FIELD_LIST(uint8_t)
+};
+
+struct dccg_mask {
+ DCCG_REG_FIELD_LIST(uint32_t)
+ DCCG3_REG_FIELD_LIST(uint32_t)
+ DCCG31_REG_FIELD_LIST(uint32_t)
+ DCCG314_REG_FIELD_LIST(uint32_t)
+ DCCG32_REG_FIELD_LIST(uint32_t)
+ DCCG35_REG_FIELD_LIST(uint32_t)
+ DCCG401_REG_FIELD_LIST(uint32_t)
+};
+
+#define DCCG_REG_VARIABLE_LIST \
+ uint32_t DPPCLK_DTO_CTRL; \
+ uint32_t DPPCLK_DTO_PARAM[6]; \
+ uint32_t REFCLK_CNTL; \
+ uint32_t DISPCLK_FREQ_CHANGE_CNTL; \
+ uint32_t OTG_PIXEL_RATE_CNTL[MAX_PIPES]; \
+ uint32_t HDMICHARCLK_CLOCK_CNTL[6]; \
+ uint32_t PHYASYMCLK_CLOCK_CNTL; \
+ uint32_t PHYBSYMCLK_CLOCK_CNTL; \
+ uint32_t PHYCSYMCLK_CLOCK_CNTL; \
+ uint32_t PHYDSYMCLK_CLOCK_CNTL; \
+ uint32_t PHYESYMCLK_CLOCK_CNTL; \
+ uint32_t DTBCLK_DTO_MODULO[MAX_PIPES]; \
+ uint32_t DTBCLK_DTO_PHASE[MAX_PIPES]; \
+ uint32_t DCCG_AUDIO_DTBCLK_DTO_MODULO; \
+ uint32_t DCCG_AUDIO_DTBCLK_DTO_PHASE; \
+ uint32_t DCCG_AUDIO_DTO_SOURCE; \
+ uint32_t DPSTREAMCLK_CNTL; \
+ uint32_t HDMISTREAMCLK_CNTL; \
+ uint32_t SYMCLK32_SE_CNTL; \
+ uint32_t SYMCLK32_LE_CNTL; \
+ uint32_t DENTIST_DISPCLK_CNTL; \
+ uint32_t DSCCLK_DTO_CTRL; \
+ uint32_t DSCCLK0_DTO_PARAM; \
+ uint32_t DSCCLK1_DTO_PARAM; \
+ uint32_t DSCCLK2_DTO_PARAM; \
+ uint32_t DSCCLK3_DTO_PARAM; \
+ uint32_t DPSTREAMCLK_ROOT_GATE_DISABLE; \
+ uint32_t DPSTREAMCLK_GATE_DISABLE; \
+ uint32_t DCCG_GATE_DISABLE_CNTL; \
+ uint32_t DCCG_GATE_DISABLE_CNTL2; \
+ uint32_t DCCG_GATE_DISABLE_CNTL3; \
+ uint32_t HDMISTREAMCLK0_DTO_PARAM; \
+ uint32_t DCCG_GATE_DISABLE_CNTL4; \
+ uint32_t OTG_PIXEL_RATE_DIV; \
+ uint32_t DTBCLK_P_CNTL; \
+ uint32_t DPPCLK_CTRL; \
+ uint32_t DCCG_GATE_DISABLE_CNTL5; \
+ uint32_t DCCG_GATE_DISABLE_CNTL6; \
+ uint32_t DCCG_GLOBAL_FGCG_REP_CNTL; \
+ uint32_t SYMCLKA_CLOCK_ENABLE; \
+ uint32_t SYMCLKB_CLOCK_ENABLE; \
+ uint32_t SYMCLKC_CLOCK_ENABLE; \
+ uint32_t SYMCLKD_CLOCK_ENABLE; \
+ uint32_t SYMCLKE_CLOCK_ENABLE; \
+ uint32_t DP_DTO_MODULO[MAX_PIPES]; \
+ uint32_t DP_DTO_PHASE[MAX_PIPES]; \
+ uint32_t DC_MEM_GLOBAL_PWR_REQ_CNTL; \
+ uint32_t DCCG_AUDIO_DTO0_MODULE; \
+ uint32_t DCCG_AUDIO_DTO0_PHASE; \
+ uint32_t DCCG_AUDIO_DTO1_MODULE; \
+ uint32_t DCCG_AUDIO_DTO1_PHASE; \
+ uint32_t DCCG_CAC_STATUS; \
+ uint32_t DCCG_CAC_STATUS2; \
+ uint32_t DCCG_DISP_CNTL_REG; \
+ uint32_t DCCG_DS_CNTL; \
+ uint32_t DCCG_DS_DTO_INCR; \
+ uint32_t DCCG_DS_DTO_MODULO; \
+ uint32_t DCCG_DS_HW_CAL_INTERVAL; \
+ uint32_t DCCG_GTC_CNTL; \
+ uint32_t DCCG_GTC_CURRENT; \
+ uint32_t DCCG_GTC_DTO_INCR; \
+ uint32_t DCCG_GTC_DTO_MODULO; \
+ uint32_t DCCG_PERFMON_CNTL; \
+ uint32_t DCCG_PERFMON_CNTL2; \
+ uint32_t DCCG_SOFT_RESET; \
+ uint32_t DCCG_TEST_CLK_SEL; \
+ uint32_t DCCG_VSYNC_CNT_CTRL; \
+ uint32_t DCCG_VSYNC_CNT_INT_CTRL; \
+ uint32_t DCCG_VSYNC_OTG0_LATCH_VALUE; \
+ uint32_t DCCG_VSYNC_OTG1_LATCH_VALUE; \
+ uint32_t DCCG_VSYNC_OTG2_LATCH_VALUE; \
+ uint32_t DCCG_VSYNC_OTG3_LATCH_VALUE; \
+ uint32_t DCCG_VSYNC_OTG4_LATCH_VALUE; \
+ uint32_t DCCG_VSYNC_OTG5_LATCH_VALUE; \
+ uint32_t DISPCLK_CGTT_BLK_CTRL_REG; \
+ uint32_t DP_DTO_DBUF_EN; \
+ uint32_t DPIACLK_540M_DTO_MODULO; \
+ uint32_t DPIACLK_540M_DTO_PHASE; \
+ uint32_t DPIACLK_810M_DTO_MODULO; \
+ uint32_t DPIACLK_810M_DTO_PHASE; \
+ uint32_t DPIACLK_DTO_CNTL; \
+ uint32_t DPIASYMCLK_CNTL; \
+ uint32_t DPPCLK_CGTT_BLK_CTRL_REG; \
+ uint32_t DPREFCLK_CGTT_BLK_CTRL_REG; \
+ uint32_t DPREFCLK_CNTL; \
+ uint32_t DTBCLK_DTO_DBUF_EN; \
+ uint32_t FORCE_SYMCLK_DISABLE; \
+ uint32_t HDMICHARCLK0_CLOCK_CNTL; \
+ uint32_t MICROSECOND_TIME_BASE_DIV; \
+ uint32_t MILLISECOND_TIME_BASE_DIV; \
+ uint32_t OTG0_PHYPLL_PIXEL_RATE_CNTL; \
+ uint32_t OTG0_PIXEL_RATE_CNTL; \
+ uint32_t OTG1_PHYPLL_PIXEL_RATE_CNTL; \
+ uint32_t OTG1_PIXEL_RATE_CNTL; \
+ uint32_t OTG2_PHYPLL_PIXEL_RATE_CNTL; \
+ uint32_t OTG2_PIXEL_RATE_CNTL; \
+ uint32_t OTG3_PHYPLL_PIXEL_RATE_CNTL; \
+ uint32_t OTG3_PIXEL_RATE_CNTL; \
+ uint32_t PHYPLLA_PIXCLK_RESYNC_CNTL; \
+ uint32_t PHYPLLB_PIXCLK_RESYNC_CNTL; \
+ uint32_t PHYPLLC_PIXCLK_RESYNC_CNTL; \
+ uint32_t PHYPLLD_PIXCLK_RESYNC_CNTL; \
+ uint32_t PHYPLLE_PIXCLK_RESYNC_CNTL; \
+ uint32_t REFCLK_CGTT_BLK_CTRL_REG; \
+ uint32_t SOCCLK_CGTT_BLK_CTRL_REG; \
+ uint32_t SYMCLK_CGTT_BLK_CTRL_REG; \
+ uint32_t SYMCLK_PSP_CNTL
+
+struct dccg_registers {
+ DCCG_REG_VARIABLE_LIST;
+};
+
+struct dcn_dccg {
+ struct dccg base;
+ const struct dccg_registers *regs;
+ const struct dccg_shift *dccg_shift;
+ const struct dccg_mask *dccg_mask;
+};
+
+void dccg2_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk);
+
+void dccg2_get_dccg_ref_freq(struct dccg *dccg,
+ unsigned int xtalin_freq_inKhz,
+ unsigned int *dccg_ref_freq_inKhz);
+
+void dccg2_set_fifo_errdet_ovr_en(struct dccg *dccg,
+ bool en);
+void dccg2_otg_add_pixel(struct dccg *dccg,
+ uint32_t otg_inst);
+void dccg2_otg_drop_pixel(struct dccg *dccg,
+ uint32_t otg_inst);
+
+
+void dccg2_init(struct dccg *dccg);
+
+struct dccg *dccg2_create(
+ struct dc_context *ctx,
+ const struct dccg_registers *regs,
+ const struct dccg_shift *dccg_shift,
+ const struct dccg_mask *dccg_mask);
+
+void dcn_dccg_destroy(struct dccg **dccg);
+
+#endif //__DCN20_DCCG_H__
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn201/dcn201_dccg.c
index f5bf04f7da25..9a3402148fde 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn201/dcn201_dccg.c
@@ -44,7 +44,8 @@
#define DC_LOGGER \
dccg->ctx->logger
-void dccg201_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk)
+static void dccg201_update_dpp_dto(struct dccg *dccg, int dpp_inst,
+ int req_dppclk)
{
/* vbios handles it */
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn201/dcn201_dccg.h
index 80888b0484fb..80888b0484fb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn201/dcn201_dccg.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn21/dcn21_dccg.c
index 33fc9aa8621b..d07c04458d31 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn21/dcn21_dccg.c
@@ -43,7 +43,7 @@
#define DC_LOGGER \
dccg->ctx->logger
-void dccg21_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk)
+static void dccg21_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk)
{
struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn21/dcn21_dccg.h
index e44a37491c1e..b7efa777ec73 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn21/dcn21_dccg.h
@@ -32,6 +32,5 @@ struct dccg *dccg21_create(
const struct dccg_shift *dccg_shift,
const struct dccg_mask *dccg_mask);
-void dccg21_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk);
#endif /* __DCN21_DCCG_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn30/dcn30_dccg.c
index d445dfefc047..d445dfefc047 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn30/dcn30_dccg.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn30/dcn30_dccg.h
index 35a613bb08bf..3f1da7f3a91c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn30/dcn30_dccg.h
@@ -29,15 +29,9 @@
#include "dcn20/dcn20_dccg.h"
-#define DCCG_REG_LIST_DCN3AG() \
- DCCG_COMMON_REG_LIST_DCN_BASE(),\
- SR(PHYASYMCLK_CLOCK_CNTL),\
- SR(PHYBSYMCLK_CLOCK_CNTL),\
- SR(PHYCSYMCLK_CLOCK_CNTL)
-
-
#define DCCG_REG_LIST_DCN30() \
DCCG_REG_LIST_DCN2(),\
+ DCCG_SRII(CLOCK_CNTL, HDMICHARCLK, 0),\
DCCG_SRII(PIXEL_RATE_CNTL, OTG, 2),\
DCCG_SRII(PIXEL_RATE_CNTL, OTG, 3),\
DCCG_SRII(PIXEL_RATE_CNTL, OTG, 4),\
@@ -46,19 +40,10 @@
SR(PHYBSYMCLK_CLOCK_CNTL),\
SR(PHYCSYMCLK_CLOCK_CNTL)
-#define DCCG_MASK_SH_LIST_DCN3AG(mask_sh) \
- DCCG_MASK_SH_LIST_DCN2_1(mask_sh),\
- DCCG_SF(HDMICHARCLK0_CLOCK_CNTL, HDMICHARCLK0_EN, mask_sh),\
- DCCG_SF(HDMICHARCLK0_CLOCK_CNTL, HDMICHARCLK0_SRC_SEL, mask_sh),\
- DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_FORCE_EN, mask_sh),\
- DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_FORCE_SRC_SEL, mask_sh),\
- DCCG_SF(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_FORCE_EN, mask_sh),\
- DCCG_SF(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_FORCE_SRC_SEL, mask_sh),\
- DCCG_SF(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_FORCE_EN, mask_sh),\
- DCCG_SF(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_FORCE_SRC_SEL, mask_sh)
-
#define DCCG_MASK_SH_LIST_DCN3(mask_sh) \
DCCG_MASK_SH_LIST_DCN2(mask_sh),\
+ DCCG_SF(HDMICHARCLK0_CLOCK_CNTL, HDMICHARCLK0_EN, mask_sh),\
+ DCCG_SF(HDMICHARCLK0_CLOCK_CNTL, HDMICHARCLK0_SRC_SEL, mask_sh),\
DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_FORCE_EN, mask_sh),\
DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_FORCE_SRC_SEL, mask_sh),\
DCCG_SF(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_FORCE_EN, mask_sh),\
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn301/dcn301_dccg.c
index 97e9be87afd9..97e9be87afd9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn301/dcn301_dccg.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn301/dcn301_dccg.h
index 73db962dbc03..067e49cb238e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn301/dcn301_dccg.h
@@ -56,10 +56,4 @@ struct dccg *dccg301_create(
const struct dccg_shift *dccg_shift,
const struct dccg_mask *dccg_mask);
-struct dccg *dccg301_create(
- struct dc_context *ctx,
- const struct dccg_registers *regs,
- const struct dccg_shift *dccg_shift,
- const struct dccg_mask *dccg_mask);
-
#endif //__DCN301_DCCG_H__
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn302/dcn302_dccg.h
index c884dde1bb25..c884dde1bb25 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn302/dcn302_dccg.h
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn303/dcn303_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn303/dcn303_dccg.h
new file mode 100644
index 000000000000..2e12fb643005
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn303/dcn303_dccg.h
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright (C) 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ */
+
+#ifndef __DCN303_DCCG_H__
+#define __DCN303_DCCG_H__
+
+#include "dcn30/dcn30_dccg.h"
+
+
+#define DCCG_REG_LIST_DCN3_03() \
+ SR(DPPCLK_DTO_CTRL),\
+ DCCG_SRII(DTO_PARAM, DPPCLK, 0),\
+ DCCG_SRII(DTO_PARAM, DPPCLK, 1),\
+ SR(REFCLK_CNTL),\
+ SR(DISPCLK_FREQ_CHANGE_CNTL),\
+ DCCG_SRII(PIXEL_RATE_CNTL, OTG, 0),\
+ DCCG_SRII(PIXEL_RATE_CNTL, OTG, 1)
+
+
+#define DCCG_MASK_SH_LIST_DCN3_03(mask_sh) \
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 0, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 0, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 1, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 1, mask_sh),\
+ DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_PHASE, mask_sh),\
+ DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_MODULO, mask_sh),\
+ DCCG_SF(REFCLK_CNTL, REFCLK_CLOCK_EN, mask_sh),\
+ DCCG_SF(REFCLK_CNTL, REFCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_STEP_DELAY, mask_sh),\
+ DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_STEP_SIZE, mask_sh),\
+ DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_FREQ_RAMP_DONE, mask_sh),\
+ DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_MAX_ERRDET_CYCLES, mask_sh),\
+ DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DCCG_FIFO_ERRDET_RESET, mask_sh),\
+ DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DCCG_FIFO_ERRDET_STATE, mask_sh),\
+ DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DCCG_FIFO_ERRDET_OVR_EN, mask_sh),\
+ DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_CHG_FWD_CORR_DISABLE, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 1, mask_sh)
+
+#endif //__DCN303_DCCG_H__
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.c
new file mode 100644
index 000000000000..97df04b7e39d
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.c
@@ -0,0 +1,878 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "reg_helper.h"
+#include "core_types.h"
+#include "dcn31_dccg.h"
+#include "dal_asic_id.h"
+
+#define TO_DCN_DCCG(dccg)\
+ container_of(dccg, struct dcn_dccg, base)
+
+#define REG(reg) \
+ (dccg_dcn->regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+ dccg_dcn->dccg_shift->field_name, dccg_dcn->dccg_mask->field_name
+
+#define CTX \
+ dccg_dcn->base.ctx
+#define DC_LOGGER \
+ dccg->ctx->logger
+
+void dccg31_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (dccg->dpp_clock_gated[dpp_inst]) {
+ /*
+ * Do not update the DPPCLK DTO if the clock is stopped.
+ * It is treated the same as if the pipe itself were in PG.
+ */
+ return;
+ }
+
+ if (dccg->ref_dppclk && req_dppclk) {
+ int ref_dppclk = dccg->ref_dppclk;
+ int modulo, phase;
+
+ // phase / modulo = dpp pipe clk / dpp global clk
+ modulo = 0xff; // use FF at the end
+ phase = ((modulo * req_dppclk) + ref_dppclk - 1) / ref_dppclk;
+
+ if (phase > 0xff) {
+ ASSERT(false);
+ phase = 0xff;
+ }
+
+ REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0,
+ DPPCLK0_DTO_PHASE, phase,
+ DPPCLK0_DTO_MODULO, modulo);
+ REG_UPDATE(DPPCLK_DTO_CTRL,
+ DPPCLK_DTO_ENABLE[dpp_inst], 1);
+ } else {
+ REG_UPDATE(DPPCLK_DTO_CTRL,
+ DPPCLK_DTO_ENABLE[dpp_inst], 0);
+ }
+ dccg->pipe_dppclk_khz[dpp_inst] = req_dppclk;
+}
+
+static enum phyd32clk_clock_source get_phy_mux_symclk(
+ struct dcn_dccg *dccg_dcn,
+ enum phyd32clk_clock_source src)
+{
+ if (dccg_dcn->base.ctx->asic_id.chip_family == FAMILY_YELLOW_CARP &&
+ dccg_dcn->base.ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0) {
+ if (src == PHYD32CLKC)
+ src = PHYD32CLKF;
+ if (src == PHYD32CLKD)
+ src = PHYD32CLKG;
+ }
+ return src;
+}
+
+static void dccg31_enable_dpstreamclk(struct dccg *dccg, int otg_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ /* enabled to select one of the DTBCLKs for pipe */
+ switch (otg_inst) {
+ case 0:
+ REG_UPDATE(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK_PIPE0_EN, 1);
+ break;
+ case 1:
+ REG_UPDATE(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK_PIPE1_EN, 1);
+ break;
+ case 2:
+ REG_UPDATE(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK_PIPE2_EN, 1);
+ break;
+ case 3:
+ REG_UPDATE(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK_PIPE3_EN, 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ DPSTREAMCLK_GATE_DISABLE, 1,
+ DPSTREAMCLK_ROOT_GATE_DISABLE, 1);
+}
+
+static void dccg31_disable_dpstreamclk(struct dccg *dccg, int otg_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ DPSTREAMCLK_ROOT_GATE_DISABLE, 0,
+ DPSTREAMCLK_GATE_DISABLE, 0);
+
+ switch (otg_inst) {
+ case 0:
+ REG_UPDATE(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK_PIPE0_EN, 0);
+ break;
+ case 1:
+ REG_UPDATE(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK_PIPE1_EN, 0);
+ break;
+ case 2:
+ REG_UPDATE(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK_PIPE2_EN, 0);
+ break;
+ case 3:
+ REG_UPDATE(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK_PIPE3_EN, 0);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+void dccg31_set_dpstreamclk(
+ struct dccg *dccg,
+ enum streamclk_source src,
+ int otg_inst,
+ int dp_hpo_inst)
+{
+ if (src == REFCLK)
+ dccg31_disable_dpstreamclk(dccg, otg_inst);
+ else
+ dccg31_enable_dpstreamclk(dccg, otg_inst);
+}
+
+void dccg31_enable_symclk32_se(
+ struct dccg *dccg,
+ int hpo_se_inst,
+ enum phyd32clk_clock_source phyd32clk)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ phyd32clk = get_phy_mux_symclk(dccg_dcn, phyd32clk);
+
+ /* select one of the PHYD32CLKs as the source for symclk32_se */
+ switch (hpo_se_inst) {
+ case 0:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE0_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_SE0_GATE_DISABLE, 1);
+ REG_UPDATE_2(SYMCLK32_SE_CNTL,
+ SYMCLK32_SE0_SRC_SEL, phyd32clk,
+ SYMCLK32_SE0_EN, 1);
+ break;
+ case 1:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE1_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_SE1_GATE_DISABLE, 1);
+ REG_UPDATE_2(SYMCLK32_SE_CNTL,
+ SYMCLK32_SE1_SRC_SEL, phyd32clk,
+ SYMCLK32_SE1_EN, 1);
+ break;
+ case 2:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE2_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_SE2_GATE_DISABLE, 1);
+ REG_UPDATE_2(SYMCLK32_SE_CNTL,
+ SYMCLK32_SE2_SRC_SEL, phyd32clk,
+ SYMCLK32_SE2_EN, 1);
+ break;
+ case 3:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE3_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_SE3_GATE_DISABLE, 1);
+ REG_UPDATE_2(SYMCLK32_SE_CNTL,
+ SYMCLK32_SE3_SRC_SEL, phyd32clk,
+ SYMCLK32_SE3_EN, 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+void dccg31_disable_symclk32_se(
+ struct dccg *dccg,
+ int hpo_se_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ /* set refclk as the source for symclk32_se */
+ switch (hpo_se_inst) {
+ case 0:
+ REG_UPDATE_2(SYMCLK32_SE_CNTL,
+ SYMCLK32_SE0_SRC_SEL, 0,
+ SYMCLK32_SE0_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE0_GATE_DISABLE, 0,
+ SYMCLK32_ROOT_SE0_GATE_DISABLE, 0);
+ break;
+ case 1:
+ REG_UPDATE_2(SYMCLK32_SE_CNTL,
+ SYMCLK32_SE1_SRC_SEL, 0,
+ SYMCLK32_SE1_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE1_GATE_DISABLE, 0,
+ SYMCLK32_ROOT_SE1_GATE_DISABLE, 0);
+ break;
+ case 2:
+ REG_UPDATE_2(SYMCLK32_SE_CNTL,
+ SYMCLK32_SE2_SRC_SEL, 0,
+ SYMCLK32_SE2_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE2_GATE_DISABLE, 0,
+ SYMCLK32_ROOT_SE2_GATE_DISABLE, 0);
+ break;
+ case 3:
+ REG_UPDATE_2(SYMCLK32_SE_CNTL,
+ SYMCLK32_SE3_SRC_SEL, 0,
+ SYMCLK32_SE3_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE3_GATE_DISABLE, 0,
+ SYMCLK32_ROOT_SE3_GATE_DISABLE, 0);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+void dccg31_enable_symclk32_le(
+ struct dccg *dccg,
+ int hpo_le_inst,
+ enum phyd32clk_clock_source phyd32clk)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ phyd32clk = get_phy_mux_symclk(dccg_dcn, phyd32clk);
+
+ /* select one of the PHYD32CLKs as the source for symclk32_le */
+ switch (hpo_le_inst) {
+ case 0:
+ REG_UPDATE_2(SYMCLK32_LE_CNTL,
+ SYMCLK32_LE0_SRC_SEL, phyd32clk,
+ SYMCLK32_LE0_EN, 1);
+ break;
+ case 1:
+ REG_UPDATE_2(SYMCLK32_LE_CNTL,
+ SYMCLK32_LE1_SRC_SEL, phyd32clk,
+ SYMCLK32_LE1_EN, 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+void dccg31_disable_symclk32_le(
+ struct dccg *dccg,
+ int hpo_le_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ /* set refclk as the source for symclk32_le */
+ switch (hpo_le_inst) {
+ case 0:
+ REG_UPDATE_2(SYMCLK32_LE_CNTL,
+ SYMCLK32_LE0_SRC_SEL, 0,
+ SYMCLK32_LE0_EN, 0);
+ break;
+ case 1:
+ REG_UPDATE_2(SYMCLK32_LE_CNTL,
+ SYMCLK32_LE1_SRC_SEL, 0,
+ SYMCLK32_LE1_EN, 0);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+void dccg31_set_symclk32_le_root_clock_gating(
+ struct dccg *dccg,
+ int hpo_le_inst,
+ bool enable)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+ return;
+
+ switch (hpo_le_inst) {
+ case 0:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_LE0_GATE_DISABLE, enable ? 1 : 0,
+ SYMCLK32_ROOT_LE0_GATE_DISABLE, enable ? 1 : 0);
+ break;
+ case 1:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_LE1_GATE_DISABLE, enable ? 1 : 0,
+ SYMCLK32_ROOT_LE1_GATE_DISABLE, enable ? 1 : 0);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+void dccg31_disable_dscclk(struct dccg *dccg, int inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dsc)
+ return;
+ //DTO must be enabled to generate a 0 Hz clock output
+ switch (inst) {
+ case 0:
+ REG_UPDATE(DSCCLK_DTO_CTRL,
+ DSCCLK0_DTO_ENABLE, 1);
+ REG_UPDATE_2(DSCCLK0_DTO_PARAM,
+ DSCCLK0_DTO_PHASE, 0,
+ DSCCLK0_DTO_MODULO, 1);
+ break;
+ case 1:
+ REG_UPDATE(DSCCLK_DTO_CTRL,
+ DSCCLK1_DTO_ENABLE, 1);
+ REG_UPDATE_2(DSCCLK1_DTO_PARAM,
+ DSCCLK1_DTO_PHASE, 0,
+ DSCCLK1_DTO_MODULO, 1);
+ break;
+ case 2:
+ REG_UPDATE(DSCCLK_DTO_CTRL,
+ DSCCLK2_DTO_ENABLE, 1);
+ REG_UPDATE_2(DSCCLK2_DTO_PARAM,
+ DSCCLK2_DTO_PHASE, 0,
+ DSCCLK2_DTO_MODULO, 1);
+ break;
+ case 3:
+ if (REG(DSCCLK3_DTO_PARAM)) {
+ REG_UPDATE(DSCCLK_DTO_CTRL,
+ DSCCLK3_DTO_ENABLE, 1);
+ REG_UPDATE_2(DSCCLK3_DTO_PARAM,
+ DSCCLK3_DTO_PHASE, 0,
+ DSCCLK3_DTO_MODULO, 1);
+ }
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+void dccg31_enable_dscclk(struct dccg *dccg, int inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dsc)
+ return;
+ //Disable DTO
+ switch (inst) {
+ case 0:
+ REG_UPDATE_2(DSCCLK0_DTO_PARAM,
+ DSCCLK0_DTO_PHASE, 0,
+ DSCCLK0_DTO_MODULO, 0);
+ REG_UPDATE(DSCCLK_DTO_CTRL,
+ DSCCLK0_DTO_ENABLE, 0);
+ break;
+ case 1:
+ REG_UPDATE_2(DSCCLK1_DTO_PARAM,
+ DSCCLK1_DTO_PHASE, 0,
+ DSCCLK1_DTO_MODULO, 0);
+ REG_UPDATE(DSCCLK_DTO_CTRL,
+ DSCCLK1_DTO_ENABLE, 0);
+ break;
+ case 2:
+ REG_UPDATE_2(DSCCLK2_DTO_PARAM,
+ DSCCLK2_DTO_PHASE, 0,
+ DSCCLK2_DTO_MODULO, 0);
+ REG_UPDATE(DSCCLK_DTO_CTRL,
+ DSCCLK2_DTO_ENABLE, 0);
+ break;
+ case 3:
+ if (REG(DSCCLK3_DTO_PARAM)) {
+ REG_UPDATE(DSCCLK_DTO_CTRL,
+ DSCCLK3_DTO_ENABLE, 0);
+ REG_UPDATE_2(DSCCLK3_DTO_PARAM,
+ DSCCLK3_DTO_PHASE, 0,
+ DSCCLK3_DTO_MODULO, 0);
+ }
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+void dccg31_set_physymclk(
+ struct dccg *dccg,
+ int phy_inst,
+ enum physymclk_clock_source clk_src,
+ bool force_enable)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ /* Force PHYSYMCLK on and Select phyd32clk as the source of clock which is output to PHY through DCIO */
+ switch (phy_inst) {
+ case 0:
+ if (force_enable) {
+ REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL,
+ PHYASYMCLK_FORCE_EN, 1,
+ PHYASYMCLK_FORCE_SRC_SEL, clk_src);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYASYMCLK_GATE_DISABLE, 1);
+ } else {
+ REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL,
+ PHYASYMCLK_FORCE_EN, 0,
+ PHYASYMCLK_FORCE_SRC_SEL, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYASYMCLK_GATE_DISABLE, 0);
+ }
+ break;
+ case 1:
+ if (force_enable) {
+ REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL,
+ PHYBSYMCLK_FORCE_EN, 1,
+ PHYBSYMCLK_FORCE_SRC_SEL, clk_src);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYBSYMCLK_GATE_DISABLE, 1);
+ } else {
+ REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL,
+ PHYBSYMCLK_FORCE_EN, 0,
+ PHYBSYMCLK_FORCE_SRC_SEL, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYBSYMCLK_GATE_DISABLE, 0);
+ }
+ break;
+ case 2:
+ if (force_enable) {
+ REG_UPDATE_2(PHYCSYMCLK_CLOCK_CNTL,
+ PHYCSYMCLK_FORCE_EN, 1,
+ PHYCSYMCLK_FORCE_SRC_SEL, clk_src);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYCSYMCLK_GATE_DISABLE, 1);
+ } else {
+ REG_UPDATE_2(PHYCSYMCLK_CLOCK_CNTL,
+ PHYCSYMCLK_FORCE_EN, 0,
+ PHYCSYMCLK_FORCE_SRC_SEL, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYCSYMCLK_GATE_DISABLE, 0);
+ }
+ break;
+ case 3:
+ if (force_enable) {
+ REG_UPDATE_2(PHYDSYMCLK_CLOCK_CNTL,
+ PHYDSYMCLK_FORCE_EN, 1,
+ PHYDSYMCLK_FORCE_SRC_SEL, clk_src);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYDSYMCLK_GATE_DISABLE, 1);
+ } else {
+ REG_UPDATE_2(PHYDSYMCLK_CLOCK_CNTL,
+ PHYDSYMCLK_FORCE_EN, 0,
+ PHYDSYMCLK_FORCE_SRC_SEL, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYDSYMCLK_GATE_DISABLE, 0);
+ }
+ break;
+ case 4:
+ if (force_enable) {
+ REG_UPDATE_2(PHYESYMCLK_CLOCK_CNTL,
+ PHYESYMCLK_FORCE_EN, 1,
+ PHYESYMCLK_FORCE_SRC_SEL, clk_src);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYESYMCLK_GATE_DISABLE, 1);
+ } else {
+ REG_UPDATE_2(PHYESYMCLK_CLOCK_CNTL,
+ PHYESYMCLK_FORCE_EN, 0,
+ PHYESYMCLK_FORCE_SRC_SEL, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYESYMCLK_GATE_DISABLE, 0);
+ }
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+/* Controls the generation of pixel valid for OTG in (OTG -> HPO case) */
+void dccg31_set_dtbclk_dto(
+ struct dccg *dccg,
+ const struct dtbclk_dto_params *params)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ int req_dtbclk_khz = params->pixclk_khz;
+ uint32_t dtbdto_div;
+
+ /* Mode DTBDTO Rate DTBCLK_DTO<x>_DIV Register
+ * ODM 4:1 combine pixel rate/4 2
+ * ODM 2:1 combine pixel rate/2 4
+ * non-DSC 4:2:0 mode pixel rate/2 4
+ * DSC native 4:2:0 pixel rate/2 4
+ * DSC native 4:2:2 pixel rate/2 4
+ * Other modes pixel rate 8
+ */
+ if (params->num_odm_segments == 4) {
+ dtbdto_div = 2;
+ req_dtbclk_khz = params->pixclk_khz / 4;
+ } else if ((params->num_odm_segments == 2) ||
+ (params->timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) ||
+ (params->timing->flags.DSC && params->timing->pixel_encoding == PIXEL_ENCODING_YCBCR422
+ && !params->timing->dsc_cfg.ycbcr422_simple)) {
+ dtbdto_div = 4;
+ req_dtbclk_khz = params->pixclk_khz / 2;
+ } else
+ dtbdto_div = 8;
+
+ if (params->ref_dtbclk_khz && req_dtbclk_khz) {
+ uint32_t modulo, phase;
+
+ // phase / modulo = dtbclk / dtbclk ref
+ modulo = params->ref_dtbclk_khz * 1000;
+ phase = div_u64((((unsigned long long)modulo * req_dtbclk_khz) + params->ref_dtbclk_khz - 1),
+ params->ref_dtbclk_khz);
+
+ REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DTBCLK_DTO_DIV[params->otg_inst], dtbdto_div);
+
+ REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], modulo);
+ REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], phase);
+
+ REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DTBCLK_DTO_ENABLE[params->otg_inst], 1);
+
+ REG_WAIT(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DTBCLKDTO_ENABLE_STATUS[params->otg_inst], 1,
+ 1, 100);
+
+ /* The recommended programming sequence to enable DTBCLK DTO to generate
+ * valid pixel HPO DPSTREAM ENCODER, specifies that DTO source select should
+ * be set only after DTO is enabled
+ */
+ REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ PIPE_DTO_SRC_SEL[params->otg_inst], 1);
+ } else {
+ REG_UPDATE_3(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DTBCLK_DTO_ENABLE[params->otg_inst], 0,
+ PIPE_DTO_SRC_SEL[params->otg_inst], 0,
+ DTBCLK_DTO_DIV[params->otg_inst], dtbdto_div);
+
+ REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0);
+ REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0);
+ }
+}
+
+void dccg31_set_audio_dtbclk_dto(
+ struct dccg *dccg,
+ const struct dtbclk_dto_params *params)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (params->ref_dtbclk_khz && params->req_audio_dtbclk_khz) {
+ uint32_t modulo, phase;
+
+ // phase / modulo = dtbclk / dtbclk ref
+ modulo = params->ref_dtbclk_khz * 1000;
+ phase = div_u64((((unsigned long long)modulo * params->req_audio_dtbclk_khz) + params->ref_dtbclk_khz - 1),
+ params->ref_dtbclk_khz);
+
+
+ REG_WRITE(DCCG_AUDIO_DTBCLK_DTO_MODULO, modulo);
+ REG_WRITE(DCCG_AUDIO_DTBCLK_DTO_PHASE, phase);
+
+ //REG_UPDATE(DCCG_AUDIO_DTO_SOURCE,
+ // DCCG_AUDIO_DTBCLK_DTO_USE_512FBR_DTO, 1);
+
+ REG_UPDATE(DCCG_AUDIO_DTO_SOURCE,
+ DCCG_AUDIO_DTO_SEL, 4); // 04 - DCCG_AUDIO_DTO_SEL_AUDIO_DTO_DTBCLK
+ } else {
+ REG_WRITE(DCCG_AUDIO_DTBCLK_DTO_PHASE, 0);
+ REG_WRITE(DCCG_AUDIO_DTBCLK_DTO_MODULO, 0);
+
+ REG_UPDATE(DCCG_AUDIO_DTO_SOURCE,
+ DCCG_AUDIO_DTO_SEL, 3); // 03 - DCCG_AUDIO_DTO_SEL_NO_AUDIO_DTO
+ }
+}
+
+void dccg31_get_dccg_ref_freq(struct dccg *dccg,
+ unsigned int xtalin_freq_inKhz,
+ unsigned int *dccg_ref_freq_inKhz)
+{
+ /*
+ * Assume refclk is sourced from xtalin
+ * expect 24MHz
+ */
+ *dccg_ref_freq_inKhz = xtalin_freq_inKhz;
+ return;
+}
+
+void dccg31_set_dispclk_change_mode(
+ struct dccg *dccg,
+ enum dentist_dispclk_change_mode change_mode)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ REG_UPDATE(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_MODE,
+ change_mode == DISPCLK_CHANGE_MODE_RAMPING ? 2 : 0);
+}
+
+void dccg31_init(struct dccg *dccg)
+{
+ /* Set HPO stream encoder to use refclk to avoid case where PHY is
+ * disabled and SYMCLK32 for HPO SE is sourced from PHYD32CLK which
+ * will cause DCN to hang.
+ */
+ dccg31_disable_symclk32_se(dccg, 0);
+ dccg31_disable_symclk32_se(dccg, 1);
+ dccg31_disable_symclk32_se(dccg, 2);
+ dccg31_disable_symclk32_se(dccg, 3);
+
+ dccg31_set_symclk32_le_root_clock_gating(dccg, 0, false);
+ dccg31_set_symclk32_le_root_clock_gating(dccg, 1, false);
+
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) {
+ dccg31_disable_dpstreamclk(dccg, 0);
+ dccg31_disable_dpstreamclk(dccg, 1);
+ dccg31_disable_dpstreamclk(dccg, 2);
+ dccg31_disable_dpstreamclk(dccg, 3);
+ }
+
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) {
+ dccg31_set_physymclk(dccg, 0, PHYSYMCLK_FORCE_SRC_SYMCLK, false);
+ dccg31_set_physymclk(dccg, 1, PHYSYMCLK_FORCE_SRC_SYMCLK, false);
+ dccg31_set_physymclk(dccg, 2, PHYSYMCLK_FORCE_SRC_SYMCLK, false);
+ dccg31_set_physymclk(dccg, 3, PHYSYMCLK_FORCE_SRC_SYMCLK, false);
+ dccg31_set_physymclk(dccg, 4, PHYSYMCLK_FORCE_SRC_SYMCLK, false);
+ }
+}
+
+void dccg31_otg_add_pixel(struct dccg *dccg,
+ uint32_t otg_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ REG_UPDATE(OTG_PIXEL_RATE_CNTL[otg_inst],
+ OTG_ADD_PIXEL[otg_inst], 1);
+}
+
+void dccg31_otg_drop_pixel(struct dccg *dccg,
+ uint32_t otg_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ REG_UPDATE(OTG_PIXEL_RATE_CNTL[otg_inst],
+ OTG_DROP_PIXEL[otg_inst], 1);
+}
+
+void dccg31_read_reg_state(struct dccg *dccg, struct dcn_dccg_reg_state *dccg_reg_state)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ dccg_reg_state->dc_mem_global_pwr_req_cntl = REG_READ(DC_MEM_GLOBAL_PWR_REQ_CNTL);
+ dccg_reg_state->dccg_audio_dtbclk_dto_modulo = REG_READ(DCCG_AUDIO_DTBCLK_DTO_MODULO);
+ dccg_reg_state->dccg_audio_dtbclk_dto_phase = REG_READ(DCCG_AUDIO_DTBCLK_DTO_PHASE);
+ dccg_reg_state->dccg_audio_dto_source = REG_READ(DCCG_AUDIO_DTO_SOURCE);
+ dccg_reg_state->dccg_audio_dto0_module = REG_READ(DCCG_AUDIO_DTO0_MODULE);
+ dccg_reg_state->dccg_audio_dto0_phase = REG_READ(DCCG_AUDIO_DTO0_PHASE);
+ dccg_reg_state->dccg_audio_dto1_module = REG_READ(DCCG_AUDIO_DTO1_MODULE);
+ dccg_reg_state->dccg_audio_dto1_phase = REG_READ(DCCG_AUDIO_DTO1_PHASE);
+ dccg_reg_state->dccg_cac_status = REG_READ(DCCG_CAC_STATUS);
+ dccg_reg_state->dccg_cac_status2 = REG_READ(DCCG_CAC_STATUS2);
+ dccg_reg_state->dccg_disp_cntl_reg = REG_READ(DCCG_DISP_CNTL_REG);
+ dccg_reg_state->dccg_ds_cntl = REG_READ(DCCG_DS_CNTL);
+ dccg_reg_state->dccg_ds_dto_incr = REG_READ(DCCG_DS_DTO_INCR);
+ dccg_reg_state->dccg_ds_dto_modulo = REG_READ(DCCG_DS_DTO_MODULO);
+ dccg_reg_state->dccg_ds_hw_cal_interval = REG_READ(DCCG_DS_HW_CAL_INTERVAL);
+ dccg_reg_state->dccg_gate_disable_cntl = REG_READ(DCCG_GATE_DISABLE_CNTL);
+ dccg_reg_state->dccg_gate_disable_cntl2 = REG_READ(DCCG_GATE_DISABLE_CNTL2);
+ dccg_reg_state->dccg_gate_disable_cntl3 = REG_READ(DCCG_GATE_DISABLE_CNTL3);
+ dccg_reg_state->dccg_gate_disable_cntl4 = REG_READ(DCCG_GATE_DISABLE_CNTL4);
+ dccg_reg_state->dccg_gate_disable_cntl5 = REG_READ(DCCG_GATE_DISABLE_CNTL5);
+ dccg_reg_state->dccg_gate_disable_cntl6 = REG_READ(DCCG_GATE_DISABLE_CNTL6);
+ dccg_reg_state->dccg_global_fgcg_rep_cntl = REG_READ(DCCG_GLOBAL_FGCG_REP_CNTL);
+ dccg_reg_state->dccg_gtc_cntl = REG_READ(DCCG_GTC_CNTL);
+ dccg_reg_state->dccg_gtc_current = REG_READ(DCCG_GTC_CURRENT);
+ dccg_reg_state->dccg_gtc_dto_incr = REG_READ(DCCG_GTC_DTO_INCR);
+ dccg_reg_state->dccg_gtc_dto_modulo = REG_READ(DCCG_GTC_DTO_MODULO);
+ dccg_reg_state->dccg_perfmon_cntl = REG_READ(DCCG_PERFMON_CNTL);
+ dccg_reg_state->dccg_perfmon_cntl2 = REG_READ(DCCG_PERFMON_CNTL2);
+ dccg_reg_state->dccg_soft_reset = REG_READ(DCCG_SOFT_RESET);
+ dccg_reg_state->dccg_test_clk_sel = REG_READ(DCCG_TEST_CLK_SEL);
+ dccg_reg_state->dccg_vsync_cnt_ctrl = REG_READ(DCCG_VSYNC_CNT_CTRL);
+ dccg_reg_state->dccg_vsync_cnt_int_ctrl = REG_READ(DCCG_VSYNC_CNT_INT_CTRL);
+ dccg_reg_state->dccg_vsync_otg0_latch_value = REG_READ(DCCG_VSYNC_OTG0_LATCH_VALUE);
+ dccg_reg_state->dccg_vsync_otg1_latch_value = REG_READ(DCCG_VSYNC_OTG1_LATCH_VALUE);
+ dccg_reg_state->dccg_vsync_otg2_latch_value = REG_READ(DCCG_VSYNC_OTG2_LATCH_VALUE);
+ dccg_reg_state->dccg_vsync_otg3_latch_value = REG_READ(DCCG_VSYNC_OTG3_LATCH_VALUE);
+ dccg_reg_state->dccg_vsync_otg4_latch_value = REG_READ(DCCG_VSYNC_OTG4_LATCH_VALUE);
+ dccg_reg_state->dccg_vsync_otg5_latch_value = REG_READ(DCCG_VSYNC_OTG5_LATCH_VALUE);
+ dccg_reg_state->dispclk_cgtt_blk_ctrl_reg = REG_READ(DISPCLK_CGTT_BLK_CTRL_REG);
+ dccg_reg_state->dispclk_freq_change_cntl = REG_READ(DISPCLK_FREQ_CHANGE_CNTL);
+ dccg_reg_state->dp_dto_dbuf_en = REG_READ(DP_DTO_DBUF_EN);
+ dccg_reg_state->dp_dto0_modulo = REG_READ(DP_DTO_MODULO[0]);
+ dccg_reg_state->dp_dto0_phase = REG_READ(DP_DTO_PHASE[0]);
+ dccg_reg_state->dp_dto1_modulo = REG_READ(DP_DTO_MODULO[1]);
+ dccg_reg_state->dp_dto1_phase = REG_READ(DP_DTO_PHASE[1]);
+ dccg_reg_state->dp_dto2_modulo = REG_READ(DP_DTO_MODULO[2]);
+ dccg_reg_state->dp_dto2_phase = REG_READ(DP_DTO_PHASE[2]);
+ dccg_reg_state->dp_dto3_modulo = REG_READ(DP_DTO_MODULO[3]);
+ dccg_reg_state->dp_dto3_phase = REG_READ(DP_DTO_PHASE[3]);
+ dccg_reg_state->dpiaclk_540m_dto_modulo = REG_READ(DPIACLK_540M_DTO_MODULO);
+ dccg_reg_state->dpiaclk_540m_dto_phase = REG_READ(DPIACLK_540M_DTO_PHASE);
+ dccg_reg_state->dpiaclk_810m_dto_modulo = REG_READ(DPIACLK_810M_DTO_MODULO);
+ dccg_reg_state->dpiaclk_810m_dto_phase = REG_READ(DPIACLK_810M_DTO_PHASE);
+ dccg_reg_state->dpiaclk_dto_cntl = REG_READ(DPIACLK_DTO_CNTL);
+ dccg_reg_state->dpiasymclk_cntl = REG_READ(DPIASYMCLK_CNTL);
+ dccg_reg_state->dppclk_cgtt_blk_ctrl_reg = REG_READ(DPPCLK_CGTT_BLK_CTRL_REG);
+ dccg_reg_state->dppclk_ctrl = REG_READ(DPPCLK_CTRL);
+ dccg_reg_state->dppclk_dto_ctrl = REG_READ(DPPCLK_DTO_CTRL);
+ dccg_reg_state->dppclk0_dto_param = REG_READ(DPPCLK_DTO_PARAM[0]);
+ dccg_reg_state->dppclk1_dto_param = REG_READ(DPPCLK_DTO_PARAM[1]);
+ dccg_reg_state->dppclk2_dto_param = REG_READ(DPPCLK_DTO_PARAM[2]);
+ dccg_reg_state->dppclk3_dto_param = REG_READ(DPPCLK_DTO_PARAM[3]);
+ dccg_reg_state->dprefclk_cgtt_blk_ctrl_reg = REG_READ(DPREFCLK_CGTT_BLK_CTRL_REG);
+ dccg_reg_state->dprefclk_cntl = REG_READ(DPREFCLK_CNTL);
+ dccg_reg_state->dpstreamclk_cntl = REG_READ(DPSTREAMCLK_CNTL);
+ dccg_reg_state->dscclk_dto_ctrl = REG_READ(DSCCLK_DTO_CTRL);
+ dccg_reg_state->dscclk0_dto_param = REG_READ(DSCCLK0_DTO_PARAM);
+ dccg_reg_state->dscclk1_dto_param = REG_READ(DSCCLK1_DTO_PARAM);
+ dccg_reg_state->dscclk2_dto_param = REG_READ(DSCCLK2_DTO_PARAM);
+ dccg_reg_state->dscclk3_dto_param = REG_READ(DSCCLK3_DTO_PARAM);
+ dccg_reg_state->dtbclk_dto_dbuf_en = REG_READ(DTBCLK_DTO_DBUF_EN);
+ dccg_reg_state->dtbclk_dto0_modulo = REG_READ(DTBCLK_DTO_MODULO[0]);
+ dccg_reg_state->dtbclk_dto0_phase = REG_READ(DTBCLK_DTO_PHASE[0]);
+ dccg_reg_state->dtbclk_dto1_modulo = REG_READ(DTBCLK_DTO_MODULO[1]);
+ dccg_reg_state->dtbclk_dto1_phase = REG_READ(DTBCLK_DTO_PHASE[1]);
+ dccg_reg_state->dtbclk_dto2_modulo = REG_READ(DTBCLK_DTO_MODULO[2]);
+ dccg_reg_state->dtbclk_dto2_phase = REG_READ(DTBCLK_DTO_PHASE[2]);
+ dccg_reg_state->dtbclk_dto3_modulo = REG_READ(DTBCLK_DTO_MODULO[3]);
+ dccg_reg_state->dtbclk_dto3_phase = REG_READ(DTBCLK_DTO_PHASE[3]);
+ dccg_reg_state->dtbclk_p_cntl = REG_READ(DTBCLK_P_CNTL);
+ dccg_reg_state->force_symclk_disable = REG_READ(FORCE_SYMCLK_DISABLE);
+ dccg_reg_state->hdmicharclk0_clock_cntl = REG_READ(HDMICHARCLK0_CLOCK_CNTL);
+ dccg_reg_state->hdmistreamclk_cntl = REG_READ(HDMISTREAMCLK_CNTL);
+ dccg_reg_state->hdmistreamclk0_dto_param = REG_READ(HDMISTREAMCLK0_DTO_PARAM);
+ dccg_reg_state->microsecond_time_base_div = REG_READ(MICROSECOND_TIME_BASE_DIV);
+ dccg_reg_state->millisecond_time_base_div = REG_READ(MILLISECOND_TIME_BASE_DIV);
+ dccg_reg_state->otg_pixel_rate_div = REG_READ(OTG_PIXEL_RATE_DIV);
+ dccg_reg_state->otg0_phypll_pixel_rate_cntl = REG_READ(OTG0_PHYPLL_PIXEL_RATE_CNTL);
+ dccg_reg_state->otg0_pixel_rate_cntl = REG_READ(OTG0_PIXEL_RATE_CNTL);
+ dccg_reg_state->otg1_phypll_pixel_rate_cntl = REG_READ(OTG1_PHYPLL_PIXEL_RATE_CNTL);
+ dccg_reg_state->otg1_pixel_rate_cntl = REG_READ(OTG1_PIXEL_RATE_CNTL);
+ dccg_reg_state->otg2_phypll_pixel_rate_cntl = REG_READ(OTG2_PHYPLL_PIXEL_RATE_CNTL);
+ dccg_reg_state->otg2_pixel_rate_cntl = REG_READ(OTG2_PIXEL_RATE_CNTL);
+ dccg_reg_state->otg3_phypll_pixel_rate_cntl = REG_READ(OTG3_PHYPLL_PIXEL_RATE_CNTL);
+ dccg_reg_state->otg3_pixel_rate_cntl = REG_READ(OTG3_PIXEL_RATE_CNTL);
+ dccg_reg_state->phyasymclk_clock_cntl = REG_READ(PHYASYMCLK_CLOCK_CNTL);
+ dccg_reg_state->phybsymclk_clock_cntl = REG_READ(PHYBSYMCLK_CLOCK_CNTL);
+ dccg_reg_state->phycsymclk_clock_cntl = REG_READ(PHYCSYMCLK_CLOCK_CNTL);
+ dccg_reg_state->phydsymclk_clock_cntl = REG_READ(PHYDSYMCLK_CLOCK_CNTL);
+ dccg_reg_state->phyesymclk_clock_cntl = REG_READ(PHYESYMCLK_CLOCK_CNTL);
+ dccg_reg_state->phyplla_pixclk_resync_cntl = REG_READ(PHYPLLA_PIXCLK_RESYNC_CNTL);
+ dccg_reg_state->phypllb_pixclk_resync_cntl = REG_READ(PHYPLLB_PIXCLK_RESYNC_CNTL);
+ dccg_reg_state->phypllc_pixclk_resync_cntl = REG_READ(PHYPLLC_PIXCLK_RESYNC_CNTL);
+ dccg_reg_state->phyplld_pixclk_resync_cntl = REG_READ(PHYPLLD_PIXCLK_RESYNC_CNTL);
+ dccg_reg_state->phyplle_pixclk_resync_cntl = REG_READ(PHYPLLE_PIXCLK_RESYNC_CNTL);
+ dccg_reg_state->refclk_cgtt_blk_ctrl_reg = REG_READ(REFCLK_CGTT_BLK_CTRL_REG);
+ dccg_reg_state->socclk_cgtt_blk_ctrl_reg = REG_READ(SOCCLK_CGTT_BLK_CTRL_REG);
+ dccg_reg_state->symclk_cgtt_blk_ctrl_reg = REG_READ(SYMCLK_CGTT_BLK_CTRL_REG);
+ dccg_reg_state->symclk_psp_cntl = REG_READ(SYMCLK_PSP_CNTL);
+ dccg_reg_state->symclk32_le_cntl = REG_READ(SYMCLK32_LE_CNTL);
+ dccg_reg_state->symclk32_se_cntl = REG_READ(SYMCLK32_SE_CNTL);
+ dccg_reg_state->symclka_clock_enable = REG_READ(SYMCLKA_CLOCK_ENABLE);
+ dccg_reg_state->symclkb_clock_enable = REG_READ(SYMCLKB_CLOCK_ENABLE);
+ dccg_reg_state->symclkc_clock_enable = REG_READ(SYMCLKC_CLOCK_ENABLE);
+ dccg_reg_state->symclkd_clock_enable = REG_READ(SYMCLKD_CLOCK_ENABLE);
+ dccg_reg_state->symclke_clock_enable = REG_READ(SYMCLKE_CLOCK_ENABLE);
+}
+
+static const struct dccg_funcs dccg31_funcs = {
+ .update_dpp_dto = dccg31_update_dpp_dto,
+ .get_dccg_ref_freq = dccg31_get_dccg_ref_freq,
+ .dccg_init = dccg31_init,
+ .set_dpstreamclk = dccg31_set_dpstreamclk,
+ .enable_symclk32_se = dccg31_enable_symclk32_se,
+ .disable_symclk32_se = dccg31_disable_symclk32_se,
+ .enable_symclk32_le = dccg31_enable_symclk32_le,
+ .disable_symclk32_le = dccg31_disable_symclk32_le,
+ .set_physymclk = dccg31_set_physymclk,
+ .set_dtbclk_dto = dccg31_set_dtbclk_dto,
+ .set_audio_dtbclk_dto = dccg31_set_audio_dtbclk_dto,
+ .set_fifo_errdet_ovr_en = dccg2_set_fifo_errdet_ovr_en,
+ .otg_add_pixel = dccg31_otg_add_pixel,
+ .otg_drop_pixel = dccg31_otg_drop_pixel,
+ .set_dispclk_change_mode = dccg31_set_dispclk_change_mode,
+ .disable_dsc = dccg31_disable_dscclk,
+ .enable_dsc = dccg31_enable_dscclk,
+ .dccg_read_reg_state = dccg31_read_reg_state,
+};
+
+struct dccg *dccg31_create(
+ struct dc_context *ctx,
+ const struct dccg_registers *regs,
+ const struct dccg_shift *dccg_shift,
+ const struct dccg_mask *dccg_mask)
+{
+ struct dcn_dccg *dccg_dcn = kzalloc(sizeof(*dccg_dcn), GFP_KERNEL);
+ struct dccg *base;
+
+ if (dccg_dcn == NULL) {
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+
+ base = &dccg_dcn->base;
+ base->ctx = ctx;
+ base->funcs = &dccg31_funcs;
+
+ dccg_dcn->regs = regs;
+ dccg_dcn->dccg_shift = dccg_shift;
+ dccg_dcn->dccg_mask = dccg_mask;
+
+ return &dccg_dcn->base;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.h
index a013a32bbaf7..bf659920d4cc 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.h
@@ -28,22 +28,20 @@
#include "dcn30/dcn30_dccg.h"
-#define DCCG_SFII(block, reg_name, field_prefix, field_name, inst, post_fix)\
- .field_prefix ## _ ## field_name[inst] = block ## inst ## _ ## reg_name ## __ ## field_prefix ## inst ## _ ## field_name ## post_fix
-
-
#define DCCG_REG_LIST_DCN31() \
SR(DPPCLK_DTO_CTRL),\
DCCG_SRII(DTO_PARAM, DPPCLK, 0),\
DCCG_SRII(DTO_PARAM, DPPCLK, 1),\
DCCG_SRII(DTO_PARAM, DPPCLK, 2),\
DCCG_SRII(DTO_PARAM, DPPCLK, 3),\
+ DCCG_SRII(CLOCK_CNTL, HDMICHARCLK, 0),\
SR(PHYASYMCLK_CLOCK_CNTL),\
SR(PHYBSYMCLK_CLOCK_CNTL),\
SR(PHYCSYMCLK_CLOCK_CNTL),\
SR(PHYDSYMCLK_CLOCK_CNTL),\
SR(PHYESYMCLK_CLOCK_CNTL),\
SR(DPSTREAMCLK_CNTL),\
+ SR(HDMISTREAMCLK_CNTL),\
SR(SYMCLK32_SE_CNTL),\
SR(SYMCLK32_LE_CNTL),\
DCCG_SRII(PIXEL_RATE_CNTL, OTG, 0),\
@@ -66,6 +64,7 @@
SR(DSCCLK1_DTO_PARAM),\
SR(DSCCLK2_DTO_PARAM),\
SR(DSCCLK_DTO_CTRL),\
+ SR(DCCG_GATE_DISABLE_CNTL2),\
SR(DCCG_GATE_DISABLE_CNTL3),\
SR(HDMISTREAMCLK0_DTO_PARAM)
@@ -81,6 +80,8 @@
DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 3, mask_sh),\
DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_PHASE, mask_sh),\
DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_MODULO, mask_sh),\
+ DCCG_SF(HDMICHARCLK0_CLOCK_CNTL, HDMICHARCLK0_EN, mask_sh),\
+ DCCG_SF(HDMICHARCLK0_CLOCK_CNTL, HDMICHARCLK0_SRC_SEL, mask_sh),\
DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_FORCE_EN, mask_sh),\
DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_FORCE_SRC_SEL, mask_sh),\
DCCG_SF(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_FORCE_EN, mask_sh),\
@@ -95,6 +96,8 @@
DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK_PIPE1_EN, mask_sh),\
DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK_PIPE2_EN, mask_sh),\
DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK_PIPE3_EN, mask_sh),\
+ DCCG_SF(HDMISTREAMCLK_CNTL, HDMISTREAMCLK0_SRC_SEL, mask_sh),\
+ DCCG_SF(HDMISTREAMCLK_CNTL, HDMISTREAMCLK0_DTO_FORCE_DIS, mask_sh),\
DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE0_SRC_SEL, mask_sh),\
DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE1_SRC_SEL, mask_sh),\
DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE2_SRC_SEL, mask_sh),\
@@ -123,9 +126,13 @@
DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLK_DTO, DIV, 1, mask_sh),\
DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLK_DTO, DIV, 2, mask_sh),\
DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLK_DTO, DIV, 3, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 3, mask_sh),\
DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO_SEL, mask_sh),\
DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL, mask_sh),\
- DCCG_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_MODE, mask_sh), \
+ DCCG_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_MODE, mask_sh),\
DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_PHASE, mask_sh),\
DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_MODULO, mask_sh),\
DCCG_SF(DSCCLK1_DTO_PARAM, DSCCLK1_DTO_PHASE, mask_sh),\
@@ -135,6 +142,11 @@
DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK0_DTO_ENABLE, mask_sh),\
DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK1_DTO_ENABLE, mask_sh),\
DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK2_DTO_ENABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYBSYMCLK_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYCSYMCLK_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYDSYMCLK_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYESYMCLK_GATE_DISABLE, mask_sh),\
DCCG_SF(DCCG_GATE_DISABLE_CNTL3, DPSTREAMCLK_ROOT_GATE_DISABLE, mask_sh),\
DCCG_SF(DCCG_GATE_DISABLE_CNTL3, DPSTREAMCLK_GATE_DISABLE, mask_sh),\
DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE0_GATE_DISABLE, mask_sh),\
@@ -155,11 +167,6 @@ struct dccg *dccg31_create(
void dccg31_init(struct dccg *dccg);
-void dccg31_set_dpstreamclk(
- struct dccg *dccg,
- enum hdmistreamclk_source src,
- int otg_inst);
-
void dccg31_enable_symclk32_se(
struct dccg *dccg,
int hpo_se_inst,
@@ -178,6 +185,11 @@ void dccg31_disable_symclk32_le(
struct dccg *dccg,
int hpo_le_inst);
+void dccg31_set_symclk32_le_root_clock_gating(
+ struct dccg *dccg,
+ int hpo_le_inst,
+ bool enable);
+
void dccg31_set_physymclk(
struct dccg *dccg,
int phy_inst,
@@ -186,10 +198,44 @@ void dccg31_set_physymclk(
void dccg31_set_audio_dtbclk_dto(
struct dccg *dccg,
- uint32_t req_audio_dtbclk_khz);
+ const struct dtbclk_dto_params *params);
+
+void dccg31_update_dpp_dto(
+ struct dccg *dccg,
+ int dpp_inst,
+ int req_dppclk);
-void dccg31_set_hdmistreamclk(
+void dccg31_get_dccg_ref_freq(
+ struct dccg *dccg,
+ unsigned int xtalin_freq_inKhz,
+ unsigned int *dccg_ref_freq_inKhz);
+
+void dccg31_set_dpstreamclk(
+ struct dccg *dccg,
+ enum streamclk_source src,
+ int otg_inst,
+ int dp_hpo_inst);
+
+void dccg31_set_dtbclk_dto(
struct dccg *dccg,
- enum hdmistreamclk_source src);
+ const struct dtbclk_dto_params *params);
+
+void dccg31_otg_add_pixel(
+ struct dccg *dccg,
+ uint32_t otg_inst);
+
+void dccg31_otg_drop_pixel(
+ struct dccg *dccg,
+ uint32_t otg_inst);
+
+void dccg31_set_dispclk_change_mode(
+ struct dccg *dccg,
+ enum dentist_dispclk_change_mode change_mode);
+
+void dccg31_disable_dscclk(struct dccg *dccg, int inst);
+
+void dccg31_enable_dscclk(struct dccg *dccg, int inst);
+
+void dccg31_read_reg_state(struct dccg *dccg, struct dcn_dccg_reg_state *dccg_reg_state);
#endif //__DCN31_DCCG_H__
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.c
new file mode 100644
index 000000000000..ef3db6beba25
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.c
@@ -0,0 +1,407 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "reg_helper.h"
+#include "core_types.h"
+
+#include "dcn31/dcn31_dccg.h"
+#include "dcn314_dccg.h"
+
+#define TO_DCN_DCCG(dccg)\
+ container_of(dccg, struct dcn_dccg, base)
+
+#define REG(reg) \
+ (dccg_dcn->regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+ dccg_dcn->dccg_shift->field_name, dccg_dcn->dccg_mask->field_name
+
+#define CTX \
+ dccg_dcn->base.ctx
+#define DC_LOGGER \
+ dccg->ctx->logger
+
+static void dccg314_trigger_dio_fifo_resync(
+ struct dccg *dccg)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ uint32_t dispclk_rdivider_value = 0;
+
+ REG_GET(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_RDIVIDER, &dispclk_rdivider_value);
+ REG_UPDATE(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, dispclk_rdivider_value);
+}
+
+static void dccg314_get_pixel_rate_div(
+ struct dccg *dccg,
+ uint32_t otg_inst,
+ uint32_t *k1,
+ uint32_t *k2)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ uint32_t val_k1 = PIXEL_RATE_DIV_NA, val_k2 = PIXEL_RATE_DIV_NA;
+
+ *k1 = PIXEL_RATE_DIV_NA;
+ *k2 = PIXEL_RATE_DIV_NA;
+
+ switch (otg_inst) {
+ case 0:
+ REG_GET_2(OTG_PIXEL_RATE_DIV,
+ OTG0_PIXEL_RATE_DIVK1, &val_k1,
+ OTG0_PIXEL_RATE_DIVK2, &val_k2);
+ break;
+ case 1:
+ REG_GET_2(OTG_PIXEL_RATE_DIV,
+ OTG1_PIXEL_RATE_DIVK1, &val_k1,
+ OTG1_PIXEL_RATE_DIVK2, &val_k2);
+ break;
+ case 2:
+ REG_GET_2(OTG_PIXEL_RATE_DIV,
+ OTG2_PIXEL_RATE_DIVK1, &val_k1,
+ OTG2_PIXEL_RATE_DIVK2, &val_k2);
+ break;
+ case 3:
+ REG_GET_2(OTG_PIXEL_RATE_DIV,
+ OTG3_PIXEL_RATE_DIVK1, &val_k1,
+ OTG3_PIXEL_RATE_DIVK2, &val_k2);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+ *k1 = val_k1;
+ *k2 = val_k2;
+}
+
+static void dccg314_set_pixel_rate_div(
+ struct dccg *dccg,
+ uint32_t otg_inst,
+ enum pixel_rate_div k1,
+ enum pixel_rate_div k2)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ uint32_t cur_k1 = PIXEL_RATE_DIV_NA;
+ uint32_t cur_k2 = PIXEL_RATE_DIV_NA;
+
+ // Don't program 0xF into the register field. Not valid since
+ // K1 / K2 field is only 1 / 2 bits wide
+ if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA) {
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+ dccg314_get_pixel_rate_div(dccg, otg_inst, &cur_k1, &cur_k2);
+ if (k1 == cur_k1 && k2 == cur_k2)
+ return;
+
+ switch (otg_inst) {
+ case 0:
+ REG_UPDATE_2(OTG_PIXEL_RATE_DIV,
+ OTG0_PIXEL_RATE_DIVK1, k1,
+ OTG0_PIXEL_RATE_DIVK2, k2);
+ break;
+ case 1:
+ REG_UPDATE_2(OTG_PIXEL_RATE_DIV,
+ OTG1_PIXEL_RATE_DIVK1, k1,
+ OTG1_PIXEL_RATE_DIVK2, k2);
+ break;
+ case 2:
+ REG_UPDATE_2(OTG_PIXEL_RATE_DIV,
+ OTG2_PIXEL_RATE_DIVK1, k1,
+ OTG2_PIXEL_RATE_DIVK2, k2);
+ break;
+ case 3:
+ REG_UPDATE_2(OTG_PIXEL_RATE_DIV,
+ OTG3_PIXEL_RATE_DIVK1, k1,
+ OTG3_PIXEL_RATE_DIVK2, k2);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg314_set_dtbclk_p_src(
+ struct dccg *dccg,
+ enum streamclk_source src,
+ uint32_t otg_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ uint32_t p_src_sel = 0; /* selects dprefclk */
+
+ if (src == DTBCLK0)
+ p_src_sel = 2; /* selects dtbclk0 */
+
+ switch (otg_inst) {
+ case 0:
+ if (src == REFCLK)
+ REG_UPDATE(DTBCLK_P_CNTL,
+ DTBCLK_P0_EN, 0);
+ else
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P0_SRC_SEL, p_src_sel,
+ DTBCLK_P0_EN, 1);
+ break;
+ case 1:
+ if (src == REFCLK)
+ REG_UPDATE(DTBCLK_P_CNTL,
+ DTBCLK_P1_EN, 0);
+ else
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P1_SRC_SEL, p_src_sel,
+ DTBCLK_P1_EN, 1);
+ break;
+ case 2:
+ if (src == REFCLK)
+ REG_UPDATE(DTBCLK_P_CNTL,
+ DTBCLK_P2_EN, 0);
+ else
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P2_SRC_SEL, p_src_sel,
+ DTBCLK_P2_EN, 1);
+ break;
+ case 3:
+ if (src == REFCLK)
+ REG_UPDATE(DTBCLK_P_CNTL,
+ DTBCLK_P3_EN, 0);
+ else
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P3_SRC_SEL, p_src_sel,
+ DTBCLK_P3_EN, 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+}
+
+/* Controls the generation of pixel valid for OTG in (OTG -> HPO case) */
+static void dccg314_set_dtbclk_dto(
+ struct dccg *dccg,
+ const struct dtbclk_dto_params *params)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ /* DTO Output Rate / Pixel Rate = 1/4 */
+ int req_dtbclk_khz = params->pixclk_khz / 4;
+
+ if (params->ref_dtbclk_khz && req_dtbclk_khz) {
+ uint32_t modulo, phase;
+
+ // phase / modulo = dtbclk / dtbclk ref
+ modulo = params->ref_dtbclk_khz * 1000;
+ phase = req_dtbclk_khz * 1000;
+
+ REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], modulo);
+ REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], phase);
+
+ REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DTBCLK_DTO_ENABLE[params->otg_inst], 1);
+
+ REG_WAIT(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DTBCLKDTO_ENABLE_STATUS[params->otg_inst], 1,
+ 1, 100);
+
+ /* program OTG_PIXEL_RATE_DIV for DIVK1 and DIVK2 fields */
+ dccg314_set_pixel_rate_div(dccg, params->otg_inst, PIXEL_RATE_DIV_BY_1, PIXEL_RATE_DIV_BY_1);
+
+ /* The recommended programming sequence to enable DTBCLK DTO to generate
+ * valid pixel HPO DPSTREAM ENCODER, specifies that DTO source select should
+ * be set only after DTO is enabled
+ */
+ REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ PIPE_DTO_SRC_SEL[params->otg_inst], 2);
+ } else {
+ REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DTBCLK_DTO_ENABLE[params->otg_inst], 0,
+ PIPE_DTO_SRC_SEL[params->otg_inst], 1);
+
+ REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0);
+ REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0);
+ }
+}
+
+void dccg314_set_dpstreamclk(
+ struct dccg *dccg,
+ enum streamclk_source src,
+ int otg_inst,
+ int dp_hpo_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ /* set the dtbclk_p source */
+ dccg314_set_dtbclk_p_src(dccg, src, otg_inst);
+
+ /* enabled to select one of the DTBCLKs for pipe */
+ switch (dp_hpo_inst) {
+ case 0:
+ REG_UPDATE_2(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK0_EN, (src == REFCLK) ? 0 : 1,
+ DPSTREAMCLK0_SRC_SEL, otg_inst);
+ break;
+ case 1:
+ REG_UPDATE_2(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK1_EN, (src == REFCLK) ? 0 : 1,
+ DPSTREAMCLK1_SRC_SEL, otg_inst);
+ break;
+ case 2:
+ REG_UPDATE_2(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK2_EN, (src == REFCLK) ? 0 : 1,
+ DPSTREAMCLK2_SRC_SEL, otg_inst);
+ break;
+ case 3:
+ REG_UPDATE_2(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK3_EN, (src == REFCLK) ? 0 : 1,
+ DPSTREAMCLK3_SRC_SEL, otg_inst);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg314_init(struct dccg *dccg)
+{
+ int otg_inst;
+
+ /* Set HPO stream encoder to use refclk to avoid case where PHY is
+ * disabled and SYMCLK32 for HPO SE is sourced from PHYD32CLK which
+ * will cause DCN to hang.
+ */
+ for (otg_inst = 0; otg_inst < 4; otg_inst++)
+ dccg31_disable_symclk32_se(dccg, otg_inst);
+
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+ for (otg_inst = 0; otg_inst < 2; otg_inst++)
+ dccg31_disable_symclk32_le(dccg, otg_inst);
+
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ for (otg_inst = 0; otg_inst < 4; otg_inst++)
+ dccg314_set_dpstreamclk(dccg, REFCLK, otg_inst,
+ otg_inst);
+
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ for (otg_inst = 0; otg_inst < 5; otg_inst++)
+ dccg31_set_physymclk(dccg, otg_inst,
+ PHYSYMCLK_FORCE_SRC_SYMCLK, false);
+}
+
+static void dccg314_set_valid_pixel_rate(
+ struct dccg *dccg,
+ int ref_dtbclk_khz,
+ int otg_inst,
+ int pixclk_khz)
+{
+ struct dtbclk_dto_params dto_params = {0};
+
+ dto_params.ref_dtbclk_khz = ref_dtbclk_khz;
+ dto_params.otg_inst = otg_inst;
+ dto_params.pixclk_khz = pixclk_khz;
+
+ dccg314_set_dtbclk_dto(dccg, &dto_params);
+}
+
+static void dccg314_dpp_root_clock_control(
+ struct dccg *dccg,
+ unsigned int dpp_inst,
+ bool clock_on)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (dccg->dpp_clock_gated[dpp_inst] != clock_on)
+ return;
+
+ if (clock_on) {
+ /* turn off the DTO and leave phase/modulo at max */
+ REG_UPDATE(DPPCLK_DTO_CTRL, DPPCLK_DTO_ENABLE[dpp_inst], 0);
+ REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0,
+ DPPCLK0_DTO_PHASE, 0xFF,
+ DPPCLK0_DTO_MODULO, 0xFF);
+ } else {
+ /* turn on the DTO to generate a 0hz clock */
+ REG_UPDATE(DPPCLK_DTO_CTRL, DPPCLK_DTO_ENABLE[dpp_inst], 1);
+ REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0,
+ DPPCLK0_DTO_PHASE, 0,
+ DPPCLK0_DTO_MODULO, 1);
+ }
+
+ dccg->dpp_clock_gated[dpp_inst] = !clock_on;
+}
+
+static const struct dccg_funcs dccg314_funcs = {
+ .update_dpp_dto = dccg31_update_dpp_dto,
+ .dpp_root_clock_control = dccg314_dpp_root_clock_control,
+ .get_dccg_ref_freq = dccg31_get_dccg_ref_freq,
+ .dccg_init = dccg314_init,
+ .set_dpstreamclk = dccg314_set_dpstreamclk,
+ .enable_symclk32_se = dccg31_enable_symclk32_se,
+ .disable_symclk32_se = dccg31_disable_symclk32_se,
+ .enable_symclk32_le = dccg31_enable_symclk32_le,
+ .disable_symclk32_le = dccg31_disable_symclk32_le,
+ .set_symclk32_le_root_clock_gating = dccg31_set_symclk32_le_root_clock_gating,
+ .set_physymclk = dccg31_set_physymclk,
+ .set_dtbclk_dto = dccg314_set_dtbclk_dto,
+ .set_audio_dtbclk_dto = dccg31_set_audio_dtbclk_dto,
+ .set_fifo_errdet_ovr_en = dccg2_set_fifo_errdet_ovr_en,
+ .otg_add_pixel = dccg31_otg_add_pixel,
+ .otg_drop_pixel = dccg31_otg_drop_pixel,
+ .set_dispclk_change_mode = dccg31_set_dispclk_change_mode,
+ .disable_dsc = dccg31_disable_dscclk,
+ .enable_dsc = dccg31_enable_dscclk,
+ .set_pixel_rate_div = dccg314_set_pixel_rate_div,
+ .get_pixel_rate_div = dccg314_get_pixel_rate_div,
+ .trigger_dio_fifo_resync = dccg314_trigger_dio_fifo_resync,
+ .set_valid_pixel_rate = dccg314_set_valid_pixel_rate,
+ .set_dtbclk_p_src = dccg314_set_dtbclk_p_src,
+ .dccg_read_reg_state = dccg31_read_reg_state
+};
+
+struct dccg *dccg314_create(
+ struct dc_context *ctx,
+ const struct dccg_registers *regs,
+ const struct dccg_shift *dccg_shift,
+ const struct dccg_mask *dccg_mask)
+{
+ struct dcn_dccg *dccg_dcn = kzalloc(sizeof(*dccg_dcn), GFP_KERNEL);
+ struct dccg *base;
+
+ if (dccg_dcn == NULL) {
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+
+ base = &dccg_dcn->base;
+ base->ctx = ctx;
+ base->funcs = &dccg314_funcs;
+
+ dccg_dcn->regs = regs;
+ dccg_dcn->dccg_shift = dccg_shift;
+ dccg_dcn->dccg_mask = dccg_mask;
+
+ return &dccg_dcn->base;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.h
new file mode 100644
index 000000000000..a609635f35db
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.h
@@ -0,0 +1,211 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN314_DCCG_H__
+#define __DCN314_DCCG_H__
+
+#include "dcn31/dcn31_dccg.h"
+
+#define DCCG_SFII(block, reg_name, field_prefix, field_name, inst, post_fix)\
+ .field_prefix ## _ ## field_name[inst] = block ## inst ## _ ## reg_name ## __ ## field_prefix ## inst ## _ ## field_name ## post_fix
+
+
+#define DCCG_REG_LIST_DCN314() \
+ SR(DPPCLK_DTO_CTRL),\
+ DCCG_SRII(DTO_PARAM, DPPCLK, 0),\
+ DCCG_SRII(DTO_PARAM, DPPCLK, 1),\
+ DCCG_SRII(DTO_PARAM, DPPCLK, 2),\
+ DCCG_SRII(DTO_PARAM, DPPCLK, 3),\
+ DCCG_SRII(CLOCK_CNTL, HDMICHARCLK, 0),\
+ SR(PHYASYMCLK_CLOCK_CNTL),\
+ SR(PHYBSYMCLK_CLOCK_CNTL),\
+ SR(PHYCSYMCLK_CLOCK_CNTL),\
+ SR(PHYDSYMCLK_CLOCK_CNTL),\
+ SR(PHYESYMCLK_CLOCK_CNTL),\
+ SR(DPSTREAMCLK_CNTL),\
+ SR(HDMISTREAMCLK_CNTL),\
+ SR(SYMCLK32_SE_CNTL),\
+ SR(SYMCLK32_LE_CNTL),\
+ DCCG_SRII(PIXEL_RATE_CNTL, OTG, 0),\
+ DCCG_SRII(PIXEL_RATE_CNTL, OTG, 1),\
+ DCCG_SRII(PIXEL_RATE_CNTL, OTG, 2),\
+ DCCG_SRII(PIXEL_RATE_CNTL, OTG, 3),\
+ DCCG_SRII(MODULO, DTBCLK_DTO, 0),\
+ DCCG_SRII(MODULO, DTBCLK_DTO, 1),\
+ DCCG_SRII(MODULO, DTBCLK_DTO, 2),\
+ DCCG_SRII(MODULO, DTBCLK_DTO, 3),\
+ DCCG_SRII(PHASE, DTBCLK_DTO, 0),\
+ DCCG_SRII(PHASE, DTBCLK_DTO, 1),\
+ DCCG_SRII(PHASE, DTBCLK_DTO, 2),\
+ DCCG_SRII(PHASE, DTBCLK_DTO, 3),\
+ SR(DCCG_AUDIO_DTBCLK_DTO_MODULO),\
+ SR(DCCG_AUDIO_DTBCLK_DTO_PHASE),\
+ SR(DCCG_AUDIO_DTO_SOURCE),\
+ SR(DENTIST_DISPCLK_CNTL),\
+ SR(DSCCLK0_DTO_PARAM),\
+ SR(DSCCLK1_DTO_PARAM),\
+ SR(DSCCLK2_DTO_PARAM),\
+ SR(DSCCLK3_DTO_PARAM),\
+ SR(DSCCLK_DTO_CTRL),\
+ SR(DCCG_GATE_DISABLE_CNTL2),\
+ SR(DCCG_GATE_DISABLE_CNTL3),\
+ SR(HDMISTREAMCLK0_DTO_PARAM),\
+ SR(OTG_PIXEL_RATE_DIV),\
+ SR(DTBCLK_P_CNTL)
+
+#define DCCG_MASK_SH_LIST_DCN314_COMMON(mask_sh) \
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 0, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 1, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 2, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 3, mask_sh),\
+ DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_PHASE, mask_sh),\
+ DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_MODULO, mask_sh),\
+ DCCG_SF(HDMICHARCLK0_CLOCK_CNTL, HDMICHARCLK0_EN, mask_sh),\
+ DCCG_SF(HDMICHARCLK0_CLOCK_CNTL, HDMICHARCLK0_SRC_SEL, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK0_EN, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK1_EN, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK2_EN, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK3_EN, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK0_SRC_SEL, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK1_SRC_SEL, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK2_SRC_SEL, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK3_SRC_SEL, mask_sh),\
+ DCCG_SF(HDMISTREAMCLK_CNTL, HDMISTREAMCLK0_EN, mask_sh),\
+ DCCG_SF(HDMISTREAMCLK_CNTL, HDMISTREAMCLK0_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE0_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE1_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE2_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE3_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE0_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE1_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE2_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE3_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE0_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE1_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE0_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE1_EN, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLK_DTO, ENABLE, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLK_DTO, ENABLE, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLK_DTO, ENABLE, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLK_DTO, ENABLE, 3, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLKDTO, ENABLE_STATUS, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLKDTO, ENABLE_STATUS, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLKDTO, ENABLE_STATUS, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLKDTO, ENABLE_STATUS, 3, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 3, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 3, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG0_PIXEL_RATE_DIVK1, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG0_PIXEL_RATE_DIVK2, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG1_PIXEL_RATE_DIVK1, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG1_PIXEL_RATE_DIVK2, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG2_PIXEL_RATE_DIVK1, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG2_PIXEL_RATE_DIVK2, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG3_PIXEL_RATE_DIVK1, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG3_PIXEL_RATE_DIVK2, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG3_PIXEL_RATE_DIVK2, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P0_SRC_SEL, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P0_EN, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P1_SRC_SEL, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P1_EN, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P2_SRC_SEL, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P2_EN, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P3_SRC_SEL, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P3_EN, mask_sh),\
+ DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL, mask_sh),\
+ DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO_SEL, mask_sh),\
+ DCCG_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_MODE, mask_sh),\
+ DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_PHASE, mask_sh),\
+ DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_MODULO, mask_sh),\
+ DCCG_SF(DSCCLK1_DTO_PARAM, DSCCLK1_DTO_PHASE, mask_sh),\
+ DCCG_SF(DSCCLK1_DTO_PARAM, DSCCLK1_DTO_MODULO, mask_sh),\
+ DCCG_SF(DSCCLK2_DTO_PARAM, DSCCLK2_DTO_PHASE, mask_sh),\
+ DCCG_SF(DSCCLK2_DTO_PARAM, DSCCLK2_DTO_MODULO, mask_sh),\
+ DCCG_SF(DSCCLK3_DTO_PARAM, DSCCLK3_DTO_PHASE, mask_sh),\
+ DCCG_SF(DSCCLK3_DTO_PARAM, DSCCLK3_DTO_MODULO, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE2_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE3_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE2_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE3_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(HDMISTREAMCLK0_DTO_PARAM, HDMISTREAMCLK0_DTO_PHASE, mask_sh),\
+ DCCG_SF(HDMISTREAMCLK0_DTO_PARAM, HDMISTREAMCLK0_DTO_MODULO, mask_sh)
+
+#define DCCG_MASK_SH_LIST_DCN314(mask_sh) \
+ DCCG_MASK_SH_LIST_DCN314_COMMON(mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 0, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 1, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 2, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 3, mask_sh),\
+ DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_FORCE_EN, mask_sh),\
+ DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_FORCE_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_FORCE_EN, mask_sh),\
+ DCCG_SF(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_FORCE_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_FORCE_EN, mask_sh),\
+ DCCG_SF(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_FORCE_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_FORCE_EN, mask_sh),\
+ DCCG_SF(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_FORCE_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYESYMCLK_CLOCK_CNTL, PHYESYMCLK_FORCE_EN, mask_sh),\
+ DCCG_SF(PHYESYMCLK_CLOCK_CNTL, PHYESYMCLK_FORCE_SRC_SEL, mask_sh),\
+ DCCG_SF(HDMISTREAMCLK_CNTL, HDMISTREAMCLK0_DTO_FORCE_DIS, mask_sh),\
+ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK0_DTO_ENABLE, mask_sh),\
+ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK1_DTO_ENABLE, mask_sh),\
+ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK2_DTO_ENABLE, mask_sh),\
+ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK3_DTO_ENABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYBSYMCLK_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYCSYMCLK_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYDSYMCLK_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYESYMCLK_GATE_DISABLE, mask_sh),\
+ DCCG_SF(HDMISTREAMCLK0_DTO_PARAM, HDMISTREAMCLK0_DTO_MODULO, mask_sh),\
+ DCCG_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_RDIVIDER, mask_sh),\
+ DCCG_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, mask_sh)
+
+struct dccg *dccg314_create(
+ struct dc_context *ctx,
+ const struct dccg_registers *regs,
+ const struct dccg_shift *dccg_shift,
+ const struct dccg_mask *dccg_mask);
+
+void dccg314_set_dpstreamclk(
+ struct dccg *dccg,
+ enum streamclk_source src,
+ int otg_inst,
+ int dp_hpo_inst);
+
+#endif //__DCN314_DCCG_H__
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn32/dcn32_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn32/dcn32_dccg.c
new file mode 100644
index 000000000000..21a6ca5ca192
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn32/dcn32_dccg.c
@@ -0,0 +1,375 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "reg_helper.h"
+#include "core_types.h"
+#include "dcn32_dccg.h"
+
+#define TO_DCN_DCCG(dccg)\
+ container_of(dccg, struct dcn_dccg, base)
+
+#define REG(reg) \
+ (dccg_dcn->regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+ dccg_dcn->dccg_shift->field_name, dccg_dcn->dccg_mask->field_name
+
+#define CTX \
+ dccg_dcn->base.ctx
+#define DC_LOGGER \
+ dccg->ctx->logger
+
+static void dccg32_trigger_dio_fifo_resync(
+ struct dccg *dccg)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ uint32_t dispclk_rdivider_value = 0;
+
+ REG_GET(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_RDIVIDER, &dispclk_rdivider_value);
+
+ /* Not valid for the WDIVIDER to be set to 0 */
+ if (dispclk_rdivider_value != 0)
+ REG_UPDATE(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, dispclk_rdivider_value);
+}
+
+static void dccg32_get_pixel_rate_div(
+ struct dccg *dccg,
+ uint32_t otg_inst,
+ uint32_t *k1,
+ uint32_t *k2)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ uint32_t val_k1 = PIXEL_RATE_DIV_NA, val_k2 = PIXEL_RATE_DIV_NA;
+
+ *k1 = PIXEL_RATE_DIV_NA;
+ *k2 = PIXEL_RATE_DIV_NA;
+
+ switch (otg_inst) {
+ case 0:
+ REG_GET_2(OTG_PIXEL_RATE_DIV,
+ OTG0_PIXEL_RATE_DIVK1, &val_k1,
+ OTG0_PIXEL_RATE_DIVK2, &val_k2);
+ break;
+ case 1:
+ REG_GET_2(OTG_PIXEL_RATE_DIV,
+ OTG1_PIXEL_RATE_DIVK1, &val_k1,
+ OTG1_PIXEL_RATE_DIVK2, &val_k2);
+ break;
+ case 2:
+ REG_GET_2(OTG_PIXEL_RATE_DIV,
+ OTG2_PIXEL_RATE_DIVK1, &val_k1,
+ OTG2_PIXEL_RATE_DIVK2, &val_k2);
+ break;
+ case 3:
+ REG_GET_2(OTG_PIXEL_RATE_DIV,
+ OTG3_PIXEL_RATE_DIVK1, &val_k1,
+ OTG3_PIXEL_RATE_DIVK2, &val_k2);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+ *k1 = val_k1;
+ *k2 = val_k2;
+}
+
+static void dccg32_set_pixel_rate_div(
+ struct dccg *dccg,
+ uint32_t otg_inst,
+ enum pixel_rate_div k1,
+ enum pixel_rate_div k2)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ uint32_t cur_k1 = PIXEL_RATE_DIV_NA;
+ uint32_t cur_k2 = PIXEL_RATE_DIV_NA;
+
+ // Don't program 0xF into the register field. Not valid since
+ // K1 / K2 field is only 1 / 2 bits wide
+ if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA) {
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+ dccg32_get_pixel_rate_div(dccg, otg_inst, &cur_k1, &cur_k2);
+ if (k1 == cur_k1 && k2 == cur_k2)
+ return;
+
+ switch (otg_inst) {
+ case 0:
+ REG_UPDATE_2(OTG_PIXEL_RATE_DIV,
+ OTG0_PIXEL_RATE_DIVK1, k1,
+ OTG0_PIXEL_RATE_DIVK2, k2);
+ break;
+ case 1:
+ REG_UPDATE_2(OTG_PIXEL_RATE_DIV,
+ OTG1_PIXEL_RATE_DIVK1, k1,
+ OTG1_PIXEL_RATE_DIVK2, k2);
+ break;
+ case 2:
+ REG_UPDATE_2(OTG_PIXEL_RATE_DIV,
+ OTG2_PIXEL_RATE_DIVK1, k1,
+ OTG2_PIXEL_RATE_DIVK2, k2);
+ break;
+ case 3:
+ REG_UPDATE_2(OTG_PIXEL_RATE_DIV,
+ OTG3_PIXEL_RATE_DIVK1, k1,
+ OTG3_PIXEL_RATE_DIVK2, k2);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg32_set_dtbclk_p_src(
+ struct dccg *dccg,
+ enum streamclk_source src,
+ uint32_t otg_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ uint32_t p_src_sel = 0; /* selects dprefclk */
+ if (src == DTBCLK0)
+ p_src_sel = 2; /* selects dtbclk0 */
+
+ switch (otg_inst) {
+ case 0:
+ if (src == REFCLK)
+ REG_UPDATE(DTBCLK_P_CNTL,
+ DTBCLK_P0_EN, 0);
+ else
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P0_SRC_SEL, p_src_sel,
+ DTBCLK_P0_EN, 1);
+ break;
+ case 1:
+ if (src == REFCLK)
+ REG_UPDATE(DTBCLK_P_CNTL,
+ DTBCLK_P1_EN, 0);
+ else
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P1_SRC_SEL, p_src_sel,
+ DTBCLK_P1_EN, 1);
+ break;
+ case 2:
+ if (src == REFCLK)
+ REG_UPDATE(DTBCLK_P_CNTL,
+ DTBCLK_P2_EN, 0);
+ else
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P2_SRC_SEL, p_src_sel,
+ DTBCLK_P2_EN, 1);
+ break;
+ case 3:
+ if (src == REFCLK)
+ REG_UPDATE(DTBCLK_P_CNTL,
+ DTBCLK_P3_EN, 0);
+ else
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P3_SRC_SEL, p_src_sel,
+ DTBCLK_P3_EN, 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+}
+
+/* Controls the generation of pixel valid for OTG in (OTG -> HPO case) */
+static void dccg32_set_dtbclk_dto(
+ struct dccg *dccg,
+ const struct dtbclk_dto_params *params)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ /* DTO Output Rate / Pixel Rate = 1/4 */
+ int req_dtbclk_khz = params->pixclk_khz / 4;
+
+ if (params->ref_dtbclk_khz && req_dtbclk_khz) {
+ uint32_t modulo, phase;
+
+ // phase / modulo = dtbclk / dtbclk ref
+ modulo = params->ref_dtbclk_khz * 1000;
+ phase = req_dtbclk_khz * 1000;
+
+ REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], modulo);
+ REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], phase);
+
+ REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DTBCLK_DTO_ENABLE[params->otg_inst], 1);
+
+ REG_WAIT(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DTBCLKDTO_ENABLE_STATUS[params->otg_inst], 1,
+ 1, 100);
+
+ /* program OTG_PIXEL_RATE_DIV for DIVK1 and DIVK2 fields */
+ dccg32_set_pixel_rate_div(dccg, params->otg_inst, PIXEL_RATE_DIV_BY_1, PIXEL_RATE_DIV_BY_1);
+
+ /* The recommended programming sequence to enable DTBCLK DTO to generate
+ * valid pixel HPO DPSTREAM ENCODER, specifies that DTO source select should
+ * be set only after DTO is enabled
+ */
+ REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ PIPE_DTO_SRC_SEL[params->otg_inst], 2);
+ } else {
+ REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DTBCLK_DTO_ENABLE[params->otg_inst], 0,
+ PIPE_DTO_SRC_SEL[params->otg_inst], params->is_hdmi ? 0 : 1);
+ REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0);
+ REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0);
+ }
+}
+
+static void dccg32_set_valid_pixel_rate(
+ struct dccg *dccg,
+ int ref_dtbclk_khz,
+ int otg_inst,
+ int pixclk_khz)
+{
+ struct dtbclk_dto_params dto_params = {0};
+
+ dto_params.ref_dtbclk_khz = ref_dtbclk_khz;
+ dto_params.otg_inst = otg_inst;
+ dto_params.pixclk_khz = pixclk_khz;
+ dto_params.is_hdmi = true;
+
+ dccg32_set_dtbclk_dto(dccg, &dto_params);
+}
+
+static void dccg32_get_dccg_ref_freq(struct dccg *dccg,
+ unsigned int xtalin_freq_inKhz,
+ unsigned int *dccg_ref_freq_inKhz)
+{
+ /*
+ * Assume refclk is sourced from xtalin
+ * expect 100MHz
+ */
+ *dccg_ref_freq_inKhz = xtalin_freq_inKhz;
+ return;
+}
+
+static void dccg32_set_dpstreamclk(
+ struct dccg *dccg,
+ enum streamclk_source src,
+ int otg_inst,
+ int dp_hpo_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ /* set the dtbclk_p source */
+ /* always program refclk as DTBCLK. No use-case expected to require DPREFCLK as refclk */
+ dccg32_set_dtbclk_p_src(dccg, DTBCLK0, otg_inst);
+
+ /* enabled to select one of the DTBCLKs for pipe */
+ switch (dp_hpo_inst) {
+ case 0:
+ REG_UPDATE_2(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK0_EN,
+ (src == REFCLK) ? 0 : 1, DPSTREAMCLK0_SRC_SEL, otg_inst);
+ break;
+ case 1:
+ REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK1_EN,
+ (src == REFCLK) ? 0 : 1, DPSTREAMCLK1_SRC_SEL, otg_inst);
+ break;
+ case 2:
+ REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK2_EN,
+ (src == REFCLK) ? 0 : 1, DPSTREAMCLK2_SRC_SEL, otg_inst);
+ break;
+ case 3:
+ REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK3_EN,
+ (src == REFCLK) ? 0 : 1, DPSTREAMCLK3_SRC_SEL, otg_inst);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg32_otg_add_pixel(struct dccg *dccg,
+ uint32_t otg_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ REG_UPDATE(OTG_PIXEL_RATE_CNTL[otg_inst],
+ OTG_ADD_PIXEL[otg_inst], 1);
+}
+
+static void dccg32_otg_drop_pixel(struct dccg *dccg,
+ uint32_t otg_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ REG_UPDATE(OTG_PIXEL_RATE_CNTL[otg_inst],
+ OTG_DROP_PIXEL[otg_inst], 1);
+}
+
+static const struct dccg_funcs dccg32_funcs = {
+ .update_dpp_dto = dccg2_update_dpp_dto,
+ .get_dccg_ref_freq = dccg32_get_dccg_ref_freq,
+ .dccg_init = dccg31_init,
+ .set_dpstreamclk = dccg32_set_dpstreamclk,
+ .enable_symclk32_se = dccg31_enable_symclk32_se,
+ .disable_symclk32_se = dccg31_disable_symclk32_se,
+ .enable_symclk32_le = dccg31_enable_symclk32_le,
+ .disable_symclk32_le = dccg31_disable_symclk32_le,
+ .set_physymclk = dccg31_set_physymclk,
+ .set_dtbclk_dto = dccg32_set_dtbclk_dto,
+ .set_valid_pixel_rate = dccg32_set_valid_pixel_rate,
+ .set_fifo_errdet_ovr_en = dccg2_set_fifo_errdet_ovr_en,
+ .set_audio_dtbclk_dto = dccg31_set_audio_dtbclk_dto,
+ .otg_add_pixel = dccg32_otg_add_pixel,
+ .otg_drop_pixel = dccg32_otg_drop_pixel,
+ .set_pixel_rate_div = dccg32_set_pixel_rate_div,
+ .get_pixel_rate_div = dccg32_get_pixel_rate_div,
+ .trigger_dio_fifo_resync = dccg32_trigger_dio_fifo_resync,
+ .set_dtbclk_p_src = dccg32_set_dtbclk_p_src,
+};
+
+struct dccg *dccg32_create(
+ struct dc_context *ctx,
+ const struct dccg_registers *regs,
+ const struct dccg_shift *dccg_shift,
+ const struct dccg_mask *dccg_mask)
+{
+ struct dcn_dccg *dccg_dcn = kzalloc(sizeof(*dccg_dcn), GFP_KERNEL);
+ struct dccg *base;
+
+ if (dccg_dcn == NULL) {
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+
+ base = &dccg_dcn->base;
+ base->ctx = ctx;
+ base->funcs = &dccg32_funcs;
+
+ dccg_dcn->regs = regs;
+ dccg_dcn->dccg_shift = dccg_shift;
+ dccg_dcn->dccg_mask = dccg_mask;
+
+ return &dccg_dcn->base;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn32/dcn32_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn32/dcn32_dccg.h
new file mode 100644
index 000000000000..cf5508718122
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn32/dcn32_dccg.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN32_DCCG_H__
+#define __DCN32_DCCG_H__
+
+#include "dcn31/dcn31_dccg.h"
+
+#define DCCG_SFII(block, reg_name, field_prefix, field_name, inst, post_fix)\
+ .field_prefix ## _ ## field_name[inst] = block ## inst ## _ ## reg_name ## __ ## field_prefix ## inst ## _ ## field_name ## post_fix
+
+#define DCCG_MASK_SH_LIST_DCN32(mask_sh) \
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 0, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 0, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 1, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 1, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 2, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 2, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 3, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 3, mask_sh),\
+ DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_PHASE, mask_sh),\
+ DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_MODULO, mask_sh),\
+ DCCG_SF(HDMICHARCLK0_CLOCK_CNTL, HDMICHARCLK0_EN, mask_sh),\
+ DCCG_SF(HDMICHARCLK0_CLOCK_CNTL, HDMICHARCLK0_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_FORCE_EN, mask_sh),\
+ DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_FORCE_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_FORCE_EN, mask_sh),\
+ DCCG_SF(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_FORCE_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_FORCE_EN, mask_sh),\
+ DCCG_SF(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_FORCE_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_FORCE_EN, mask_sh),\
+ DCCG_SF(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_FORCE_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYESYMCLK_CLOCK_CNTL, PHYESYMCLK_FORCE_EN, mask_sh),\
+ DCCG_SF(PHYESYMCLK_CLOCK_CNTL, PHYESYMCLK_FORCE_SRC_SEL, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK0_EN, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK1_EN, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK2_EN, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK3_EN, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK0_SRC_SEL, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK1_SRC_SEL, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK2_SRC_SEL, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK3_SRC_SEL, mask_sh),\
+ DCCG_SF(HDMISTREAMCLK_CNTL, HDMISTREAMCLK0_EN, mask_sh),\
+ DCCG_SF(HDMISTREAMCLK_CNTL, HDMISTREAMCLK0_DTO_FORCE_DIS, mask_sh),\
+ DCCG_SF(HDMISTREAMCLK_CNTL, HDMISTREAMCLK0_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE0_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE1_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE2_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE3_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE0_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE1_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE2_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE3_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE0_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE1_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE0_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE1_EN, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLK_DTO, ENABLE, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLK_DTO, ENABLE, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLK_DTO, ENABLE, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLK_DTO, ENABLE, 3, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLKDTO, ENABLE_STATUS, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLKDTO, ENABLE_STATUS, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLKDTO, ENABLE_STATUS, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLKDTO, ENABLE_STATUS, 3, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 3, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 3, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG0_PIXEL_RATE_DIVK1, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG0_PIXEL_RATE_DIVK2, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG1_PIXEL_RATE_DIVK1, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG1_PIXEL_RATE_DIVK2, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG2_PIXEL_RATE_DIVK1, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG2_PIXEL_RATE_DIVK2, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG3_PIXEL_RATE_DIVK1, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG3_PIXEL_RATE_DIVK2, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG3_PIXEL_RATE_DIVK2, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P0_SRC_SEL, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P0_EN, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P1_SRC_SEL, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P1_EN, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P2_SRC_SEL, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P2_EN, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P3_SRC_SEL, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P3_EN, mask_sh),\
+ DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO_SEL, mask_sh),\
+ DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL, mask_sh),\
+ DCCG_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, mask_sh),\
+ DCCG_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_RDIVIDER, mask_sh),\
+ DCCG_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, mask_sh)
+
+struct dccg *dccg32_create(
+ struct dc_context *ctx,
+ const struct dccg_registers *regs,
+ const struct dccg_shift *dccg_shift,
+ const struct dccg_mask *dccg_mask);
+
+#endif //__DCN32_DCCG_H__
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c
new file mode 100644
index 000000000000..bd2f528137b2
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c
@@ -0,0 +1,2486 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "reg_helper.h"
+#include "core_types.h"
+#include "resource.h"
+#include "dcn35_dccg.h"
+
+#define TO_DCN_DCCG(dccg)\
+ container_of(dccg, struct dcn_dccg, base)
+
+#define REG(reg) \
+ (dccg_dcn->regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+ dccg_dcn->dccg_shift->field_name, dccg_dcn->dccg_mask->field_name
+
+#define CTX \
+ dccg_dcn->base.ctx
+#include "logger_types.h"
+#define DC_LOGGER \
+ dccg->ctx->logger
+
+enum symclk_fe_source {
+ SYMCLK_FE_SYMCLK_A = 0, // Select functional clock from backend symclk A
+ SYMCLK_FE_SYMCLK_B,
+ SYMCLK_FE_SYMCLK_C,
+ SYMCLK_FE_SYMCLK_D,
+ SYMCLK_FE_SYMCLK_E,
+ SYMCLK_FE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software
+};
+
+enum symclk_be_source {
+ SYMCLK_BE_PHYCLK = 0, // Select phy clk when sym_clk_enable = 1
+ SYMCLK_BE_DPIACLK_810 = 4,
+ SYMCLK_BE_DPIACLK_162 = 5,
+ SYMCLK_BE_DPIACLK_540 = 6,
+ SYMCLK_BE_DPIACLK_270 = 7,
+ SYMCLK_BE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software
+};
+
+enum physymclk_source {
+ PHYSYMCLK_PHYCLK = 0, // Select symclk as source of clock which is output to PHY through DCIO.
+ PHYSYMCLK_PHYD18CLK, // Select phyd18clk as the source of clock which is output to PHY through DCIO.
+ PHYSYMCLK_PHYD32CLK, // Select phyd32clk as the source of clock which is output to PHY through DCIO.
+ PHYSYMCLK_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software
+};
+
+enum dtbclk_source {
+ DTBCLK_DPREFCLK = 0, // Selects source for DTBCLK_P# as DPREFCLK (src sel 0 and 1 are same)
+ DTBCLK_DPREFCLK_0, // Selects source for DTBCLK_P# as DPREFCLK (src sel 0 and 1 are same)
+ DTBCLK_DTBCLK0, // Selects source for DTBCLK_P# as DTBCLK0
+ DTBCLK_DTBCLK1, // Selects source for DTBCLK_P# as DTBCLK0
+ DTBCLK_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software
+};
+
+enum dppclk_clock_source {
+ DPP_REFCLK = 0, // refclk is selected
+ DPP_DCCG_DTO, // Functional clock selected is DTO tuned DPPCLK
+};
+
+enum dp_stream_clk_source {
+ DP_STREAM_DTBCLK_P0 = 0, // Selects functional for DP_STREAM_CLK as DTBCLK_P#
+ DP_STREAM_DTBCLK_P1,
+ DP_STREAM_DTBCLK_P2,
+ DP_STREAM_DTBCLK_P3,
+ DP_STREAM_DTBCLK_P4,
+ DP_STREAM_DTBCLK_P5,
+ DP_STREAM_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software
+};
+
+enum hdmi_char_clk {
+ HDMI_CHAR_PHYAD18CLK = 0, // Selects functional for hdmi_char_clk as UNIPHYA PHYD18CLK
+ HDMI_CHAR_PHYBD18CLK,
+ HDMI_CHAR_PHYCD18CLK,
+ HDMI_CHAR_PHYDD18CLK,
+ HDMI_CHAR_PHYED18CLK,
+ HDMI_CHAR_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software
+};
+
+enum hdmi_stream_clk_source {
+ HDMI_STREAM_DTBCLK_P0 = 0, // Selects functional for HDMI_STREAM_CLK as DTBCLK_P#
+ HDMI_STREAM_DTBCLK_P1,
+ HDMI_STREAM_DTBCLK_P2,
+ HDMI_STREAM_DTBCLK_P3,
+ HDMI_STREAM_DTBCLK_P4,
+ HDMI_STREAM_DTBCLK_P5,
+ HDMI_STREAM_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software
+};
+
+enum symclk32_se_clk_source {
+ SYMCLK32_SE_PHYAD32CLK = 0, // Selects functional for SYMCLK32 as UNIPHYA PHYD32CLK
+ SYMCLK32_SE_PHYBD32CLK,
+ SYMCLK32_SE_PHYCD32CLK,
+ SYMCLK32_SE_PHYDD32CLK,
+ SYMCLK32_SE_PHYED32CLK,
+ SYMCLK32_SE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software
+};
+
+enum symclk32_le_clk_source {
+ SYMCLK32_LE_PHYAD32CLK = 0, // Selects functional for SYMCLK32 as UNIPHYA PHYD32CLK
+ SYMCLK32_LE_PHYBD32CLK,
+ SYMCLK32_LE_PHYCD32CLK,
+ SYMCLK32_LE_PHYDD32CLK,
+ SYMCLK32_LE_PHYED32CLK,
+ SYMCLK32_LE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software
+};
+
+enum dsc_clk_source {
+ DSC_CLK_REF_CLK = 0, // Ref clock selected for DSC_CLK
+ DSC_DTO_TUNED_CK_GPU_DISCLK_3, // DTO divided clock selected as functional clock
+};
+
+
+static void dccg35_set_dsc_clk_rcg(struct dccg *dccg, int inst, bool allow_rcg)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dsc && allow_rcg)
+ return;
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1);
+ break;
+ case 2:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1);
+ break;
+ case 3:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+ /* Wait for clock to ramp */
+ if (!allow_rcg)
+ udelay(10);
+}
+
+static void dccg35_set_symclk32_se_rcg(
+ struct dccg *dccg,
+ int inst,
+ bool enable)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se && enable)
+ return;
+
+ /* SYMCLK32_ROOT_SE#_GATE_DISABLE will clock gate in DCCG */
+ /* SYMCLK32_SE#_GATE_DISABLE will clock gate in HPO only */
+ switch (inst) {
+ case 0:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE0_GATE_DISABLE, enable ? 0 : 1,
+ SYMCLK32_ROOT_SE0_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE1_GATE_DISABLE, enable ? 0 : 1,
+ SYMCLK32_ROOT_SE1_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 2:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE2_GATE_DISABLE, enable ? 0 : 1,
+ SYMCLK32_ROOT_SE2_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 3:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE3_GATE_DISABLE, enable ? 0 : 1,
+ SYMCLK32_ROOT_SE3_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg35_set_symclk32_le_rcg(
+ struct dccg *dccg,
+ int inst,
+ bool enable)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le && enable)
+ return;
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_LE0_GATE_DISABLE, enable ? 0 : 1,
+ SYMCLK32_ROOT_LE0_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_LE1_GATE_DISABLE, enable ? 0 : 1,
+ SYMCLK32_ROOT_LE1_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg35_set_physymclk_rcg(
+ struct dccg *dccg,
+ int inst,
+ bool enable)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk && enable)
+ return;
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYASYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYBSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 2:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYCSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 3:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYDSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 4:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYESYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg35_set_symclk_fe_rcg(
+ struct dccg *dccg,
+ int inst,
+ bool enable)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk_fe && enable)
+ return;
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ SYMCLKA_FE_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKA_FE_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ SYMCLKB_FE_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKB_FE_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 2:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ SYMCLKC_FE_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKC_FE_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 3:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ SYMCLKD_FE_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKD_FE_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 4:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ SYMCLKE_FE_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKE_FE_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg35_set_symclk_be_rcg(
+ struct dccg *dccg,
+ int inst,
+ bool enable)
+{
+
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ /* TBD add symclk_be in rcg control bits */
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk_fe && enable)
+ return;
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ SYMCLKA_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKA_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ SYMCLKB_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKB_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 2:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ SYMCLKC_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKC_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 3:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ SYMCLKD_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKD_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 4:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ SYMCLKE_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKE_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg35_set_dtbclk_p_rcg(struct dccg *dccg, int inst, bool enable)
+{
+
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp && enable)
+ return;
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 2:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 3:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ break;
+ }
+}
+
+static void dccg35_set_dppclk_rcg(struct dccg *dccg, int inst, bool allow_rcg)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp && allow_rcg)
+ return;
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1);
+ break;
+ case 2:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1);
+ break;
+ case 3:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ break;
+ }
+
+ /* Wait for clock to ramp */
+ if (!allow_rcg)
+ udelay(10);
+}
+
+static void dccg35_set_dpstreamclk_rcg(
+ struct dccg *dccg,
+ int inst,
+ bool enable)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream && enable)
+ return;
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5,
+ DPSTREAMCLK0_GATE_DISABLE, enable ? 0 : 1,
+ DPSTREAMCLK0_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5,
+ DPSTREAMCLK1_GATE_DISABLE, enable ? 0 : 1,
+ DPSTREAMCLK1_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 2:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5,
+ DPSTREAMCLK2_GATE_DISABLE, enable ? 0 : 1,
+ DPSTREAMCLK2_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 3:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5,
+ DPSTREAMCLK3_GATE_DISABLE, enable ? 0 : 1,
+ DPSTREAMCLK3_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg35_set_smclk32_se_rcg(
+ struct dccg *dccg,
+ int inst,
+ bool enable)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se && enable)
+ return;
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE0_GATE_DISABLE, enable ? 0 : 1,
+ SYMCLK32_ROOT_SE0_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE1_GATE_DISABLE, enable ? 0 : 1,
+ SYMCLK32_ROOT_SE1_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 2:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE2_GATE_DISABLE, enable ? 0 : 1,
+ SYMCLK32_ROOT_SE2_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 3:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE3_GATE_DISABLE, enable ? 0 : 1,
+ SYMCLK32_ROOT_SE3_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg35_set_dsc_clk_src_new(struct dccg *dccg, int inst, enum dsc_clk_source src)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ /* DSCCLK#_EN=0 switches to refclock from functional clock */
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, src);
+ break;
+ case 1:
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, src);
+ break;
+ case 2:
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, src);
+ break;
+ case 3:
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK3_EN, src);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg35_set_symclk32_se_src_new(
+ struct dccg *dccg,
+ int inst,
+ enum symclk32_se_clk_source src
+ )
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE_2(SYMCLK32_SE_CNTL,
+ SYMCLK32_SE0_SRC_SEL, (src == SYMCLK32_SE_REFCLK) ? 0 : src,
+ SYMCLK32_SE0_EN, (src == SYMCLK32_SE_REFCLK) ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE_2(SYMCLK32_SE_CNTL,
+ SYMCLK32_SE1_SRC_SEL, (src == SYMCLK32_SE_REFCLK) ? 0 : src,
+ SYMCLK32_SE1_EN, (src == SYMCLK32_SE_REFCLK) ? 0 : 1);
+ break;
+ case 2:
+ REG_UPDATE_2(SYMCLK32_SE_CNTL,
+ SYMCLK32_SE2_SRC_SEL, (src == SYMCLK32_SE_REFCLK) ? 0 : src,
+ SYMCLK32_SE2_EN, (src == SYMCLK32_SE_REFCLK) ? 0 : 1);
+ break;
+ case 3:
+ REG_UPDATE_2(SYMCLK32_SE_CNTL,
+ SYMCLK32_SE3_SRC_SEL, (src == SYMCLK32_SE_REFCLK) ? 0 : src,
+ SYMCLK32_SE3_EN, (src == SYMCLK32_SE_REFCLK) ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static int
+dccg35_is_symclk32_se_src_functional_le_new(struct dccg *dccg, int symclk_32_se_inst, int symclk_32_le_inst)
+{
+ uint32_t en;
+ uint32_t src_sel;
+
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ REG_GET_2(SYMCLK32_SE_CNTL, SYMCLK32_SE3_SRC_SEL, &src_sel, SYMCLK32_SE3_EN, &en);
+
+ if (en == 1 && src_sel == symclk_32_le_inst)
+ return 1;
+
+ return 0;
+}
+
+
+static void dccg35_set_symclk32_le_src_new(
+ struct dccg *dccg,
+ int inst,
+ enum symclk32_le_clk_source src)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE_2(SYMCLK32_LE_CNTL,
+ SYMCLK32_LE0_SRC_SEL, (src == SYMCLK32_LE_REFCLK) ? 0 : src,
+ SYMCLK32_LE0_EN, (src == SYMCLK32_LE_REFCLK) ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE_2(SYMCLK32_LE_CNTL,
+ SYMCLK32_LE1_SRC_SEL, (src == SYMCLK32_LE_REFCLK) ? 0 : src,
+ SYMCLK32_LE1_EN, (src == SYMCLK32_LE_REFCLK) ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dcn35_set_dppclk_src_new(struct dccg *dccg,
+ int inst, enum dppclk_clock_source src)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE(DPPCLK_CTRL, DPPCLK0_EN, src);
+ break;
+ case 1:
+ REG_UPDATE(DPPCLK_CTRL, DPPCLK1_EN, src);
+ break;
+ case 2:
+ REG_UPDATE(DPPCLK_CTRL, DPPCLK2_EN, src);
+ break;
+ case 3:
+ REG_UPDATE(DPPCLK_CTRL, DPPCLK3_EN, src);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ break;
+ }
+}
+
+static void dccg35_set_dtbclk_p_src_new(
+ struct dccg *dccg,
+ enum dtbclk_source src,
+ int inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ /* If DTBCLK_P#_EN is 0 refclock is selected as functional clock
+ * If DTBCLK_P#_EN is 1 functional clock is selected as DTBCLK_P#_SRC_SEL
+ */
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P0_SRC_SEL, (src == DTBCLK_REFCLK) ? 0 : src,
+ DTBCLK_P0_EN, (src == DTBCLK_REFCLK) ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P1_SRC_SEL, (src == DTBCLK_REFCLK) ? 0 : src,
+ DTBCLK_P1_EN, (src == DTBCLK_REFCLK) ? 0 : 1);
+ break;
+ case 2:
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P2_SRC_SEL, (src == DTBCLK_REFCLK) ? 0 : src,
+ DTBCLK_P2_EN, (src == DTBCLK_REFCLK) ? 0 : 1);
+ break;
+ case 3:
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P3_SRC_SEL, (src == DTBCLK_REFCLK) ? 0 : src,
+ DTBCLK_P3_EN, (src == DTBCLK_REFCLK) ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg35_set_dpstreamclk_src_new(
+ struct dccg *dccg,
+ enum dp_stream_clk_source src,
+ int inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK0_EN,
+ (src == DP_STREAM_REFCLK) ? 0 : 1,
+ DPSTREAMCLK0_SRC_SEL,
+ (src == DP_STREAM_REFCLK) ? 0 : src);
+ break;
+ case 1:
+ REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK1_EN,
+ (src == DP_STREAM_REFCLK) ? 0 : 1,
+ DPSTREAMCLK1_SRC_SEL,
+ (src == DP_STREAM_REFCLK) ? 0 : src);
+
+ break;
+ case 2:
+ REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK2_EN,
+ (src == DP_STREAM_REFCLK) ? 0 : 1,
+ DPSTREAMCLK2_SRC_SEL,
+ (src == DP_STREAM_REFCLK) ? 0 : src);
+
+ break;
+ case 3:
+ REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK3_EN,
+ (src == DP_STREAM_REFCLK) ? 0 : 1,
+ DPSTREAMCLK3_SRC_SEL,
+ (src == DP_STREAM_REFCLK) ? 0 : src);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg35_set_physymclk_src_new(
+ struct dccg *dccg,
+ enum physymclk_source src,
+ int inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_EN,
+ (src == PHYSYMCLK_REFCLK) ? 0 : 1,
+ PHYASYMCLK_SRC_SEL,
+ (src == PHYSYMCLK_REFCLK) ? 0 : src);
+ break;
+ case 1:
+ REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_EN,
+ (src == PHYSYMCLK_REFCLK) ? 0 : 1,
+ PHYBSYMCLK_SRC_SEL,
+ (src == PHYSYMCLK_REFCLK) ? 0 : src);
+ break;
+ case 2:
+ REG_UPDATE_2(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_EN,
+ (src == PHYSYMCLK_REFCLK) ? 0 : 1,
+ PHYCSYMCLK_SRC_SEL,
+ (src == PHYSYMCLK_REFCLK) ? 0 : src);
+ break;
+ case 3:
+ REG_UPDATE_2(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_EN,
+ (src == PHYSYMCLK_REFCLK) ? 0 : 1,
+ PHYDSYMCLK_SRC_SEL,
+ (src == PHYSYMCLK_REFCLK) ? 0 : src);
+ break;
+ case 4:
+ REG_UPDATE_2(PHYESYMCLK_CLOCK_CNTL, PHYESYMCLK_EN,
+ (src == PHYSYMCLK_REFCLK) ? 0 : 1,
+ PHYESYMCLK_SRC_SEL,
+ (src == PHYSYMCLK_REFCLK) ? 0 : src);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg35_set_symclk_be_src_new(
+ struct dccg *dccg,
+ enum symclk_be_source src,
+ int inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE_2(SYMCLKA_CLOCK_ENABLE,
+ SYMCLKA_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1,
+ SYMCLKA_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src);
+ break;
+ case 1:
+ REG_UPDATE_2(SYMCLKB_CLOCK_ENABLE,
+ SYMCLKB_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1,
+ SYMCLKB_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src);
+ break;
+ case 2:
+ REG_UPDATE_2(SYMCLKC_CLOCK_ENABLE,
+ SYMCLKC_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1,
+ SYMCLKC_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src);
+ break;
+ case 3:
+ REG_UPDATE_2(SYMCLKD_CLOCK_ENABLE,
+ SYMCLKD_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1,
+ SYMCLKD_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src);
+ break;
+ case 4:
+ REG_UPDATE_2(SYMCLKE_CLOCK_ENABLE,
+ SYMCLKE_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1,
+ SYMCLKE_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src);
+ break;
+ }
+}
+
+static int dccg35_is_symclk_fe_src_functional_be(struct dccg *dccg,
+ int symclk_fe_inst,
+ int symclk_be_inst)
+{
+
+ uint32_t en = 0;
+ uint32_t src_sel = 0;
+
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (symclk_fe_inst) {
+ case 0:
+ REG_GET_2(SYMCLKA_CLOCK_ENABLE, SYMCLKA_FE_SRC_SEL, &src_sel, SYMCLKA_FE_EN, &en);
+ break;
+ case 1:
+ REG_GET_2(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_SRC_SEL, &src_sel, SYMCLKB_FE_EN, &en);
+ break;
+ case 2:
+ REG_GET_2(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_SRC_SEL, &src_sel, SYMCLKC_FE_EN, &en);
+ break;
+ case 3:
+ REG_GET_2(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_SRC_SEL, &src_sel, SYMCLKD_FE_EN, &en);
+ break;
+ case 4:
+ REG_GET_2(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_SRC_SEL, &src_sel, SYMCLKE_FE_EN, &en);
+ break;
+ }
+
+ if (en == 1 && src_sel == symclk_be_inst)
+ return 1;
+
+ return 0;
+}
+
+static void dccg35_set_symclk_fe_src_new(struct dccg *dccg, enum symclk_fe_source src, int inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE_2(SYMCLKA_CLOCK_ENABLE,
+ SYMCLKA_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1,
+ SYMCLKA_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src);
+ break;
+ case 1:
+ REG_UPDATE_2(SYMCLKB_CLOCK_ENABLE,
+ SYMCLKB_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1,
+ SYMCLKB_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src);
+ break;
+ case 2:
+ REG_UPDATE_2(SYMCLKC_CLOCK_ENABLE,
+ SYMCLKC_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1,
+ SYMCLKC_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src);
+ break;
+ case 3:
+ REG_UPDATE_2(SYMCLKD_CLOCK_ENABLE,
+ SYMCLKD_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1,
+ SYMCLKD_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src);
+ break;
+ case 4:
+ REG_UPDATE_2(SYMCLKE_CLOCK_ENABLE,
+ SYMCLKE_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1,
+ SYMCLKE_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src);
+ break;
+ }
+}
+
+static uint32_t dccg35_is_fe_rcg(struct dccg *dccg, int inst)
+{
+ uint32_t enable = 0;
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (inst) {
+ case 0:
+ REG_GET(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKA_FE_ROOT_GATE_DISABLE, &enable);
+ break;
+ case 1:
+ REG_GET(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKB_FE_ROOT_GATE_DISABLE, &enable);
+ break;
+ case 2:
+ REG_GET(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKC_FE_ROOT_GATE_DISABLE, &enable);
+ break;
+ case 3:
+ REG_GET(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKD_FE_ROOT_GATE_DISABLE, &enable);
+ break;
+ case 4:
+ REG_GET(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKE_FE_ROOT_GATE_DISABLE, &enable);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ break;
+ }
+ return enable;
+}
+
+static uint32_t dccg35_is_symclk32_se_rcg(struct dccg *dccg, int inst)
+{
+ uint32_t disable_l1 = 0;
+ uint32_t disable_l2 = 0;
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (inst) {
+ case 0:
+ REG_GET_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE0_GATE_DISABLE, &disable_l1,
+ SYMCLK32_ROOT_SE0_GATE_DISABLE, &disable_l2);
+ break;
+ case 1:
+ REG_GET_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE1_GATE_DISABLE, &disable_l1,
+ SYMCLK32_ROOT_SE1_GATE_DISABLE, &disable_l2);
+ break;
+ case 2:
+ REG_GET_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE2_GATE_DISABLE, &disable_l1,
+ SYMCLK32_ROOT_SE2_GATE_DISABLE, &disable_l2);
+ break;
+ case 3:
+ REG_GET_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE3_GATE_DISABLE, &disable_l1,
+ SYMCLK32_ROOT_SE3_GATE_DISABLE, &disable_l2);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return 0;
+ }
+
+ /* return true if either block level or DCCG level gating is active */
+ return (disable_l1 | disable_l2);
+}
+
+static void dccg35_enable_symclk_fe_new(
+ struct dccg *dccg,
+ int inst,
+ enum symclk_fe_source src)
+{
+ dccg35_set_symclk_fe_rcg(dccg, inst, false);
+ dccg35_set_symclk_fe_src_new(dccg, src, inst);
+}
+
+static void dccg35_disable_symclk_fe_new(
+ struct dccg *dccg,
+ int inst)
+{
+ dccg35_set_symclk_fe_src_new(dccg, SYMCLK_FE_REFCLK, inst);
+ dccg35_set_symclk_fe_rcg(dccg, inst, true);
+}
+
+static void dccg35_enable_symclk_be_new(
+ struct dccg *dccg,
+ int inst,
+ enum symclk_be_source src)
+{
+ dccg35_set_symclk_be_rcg(dccg, inst, false);
+ dccg35_set_symclk_be_src_new(dccg, inst, src);
+}
+
+static void dccg35_disable_symclk_be_new(
+ struct dccg *dccg,
+ int inst)
+{
+ int i;
+
+ /* Switch from functional clock to refclock */
+ dccg35_set_symclk_be_src_new(dccg, inst, SYMCLK_BE_REFCLK);
+
+ /* Check if any other SE connected LE and disable them */
+ for (i = 0; i < 4; i++) {
+ /* Make sure FE is not already in RCG */
+ if (dccg35_is_fe_rcg(dccg, i) == 0) {
+ if (dccg35_is_symclk_fe_src_functional_be(dccg, i, inst))
+ dccg35_disable_symclk_fe_new(dccg, i);
+ }
+ }
+ /* Safe to RCG SYMCLK*/
+ dccg35_set_symclk_be_rcg(dccg, inst, true);
+}
+
+static void dccg35_enable_symclk32_se_new(
+ struct dccg *dccg,
+ int inst,
+ enum symclk32_se_clk_source src)
+{
+ dccg35_set_symclk32_se_rcg(dccg, inst, false);
+ dccg35_set_symclk32_se_src_new(dccg, inst, src);
+}
+
+static void dccg35_disable_symclk32_se_new(
+ struct dccg *dccg,
+ int inst)
+{
+ dccg35_set_symclk32_se_src_new(dccg, SYMCLK32_SE_REFCLK, inst);
+ dccg35_set_symclk32_se_rcg(dccg, inst, true);
+}
+
+static void dccg35_enable_symclk32_le_new(
+ struct dccg *dccg,
+ int inst,
+ enum symclk32_le_clk_source src)
+{
+ dccg35_set_symclk32_le_rcg(dccg, inst, false);
+ dccg35_set_symclk32_le_src_new(dccg, inst, src);
+}
+
+static void dccg35_disable_symclk32_le_new(
+ struct dccg *dccg,
+ int inst)
+{
+ int i;
+
+ /* Switch from functional clock to refclock */
+ dccg35_set_symclk32_le_src_new(dccg, inst, SYMCLK32_LE_REFCLK);
+
+ /* Check if any SE are connected and disable SE as well */
+ for (i = 0; i < 4; i++) {
+ /* Make sure FE is not already in RCG */
+ if (dccg35_is_symclk32_se_rcg(dccg, i) == 0) {
+ /* Disable and SE connected to this LE before RCG */
+ if (dccg35_is_symclk32_se_src_functional_le_new(dccg, i, inst))
+ dccg35_disable_symclk32_se_new(dccg, i);
+ }
+ }
+ /* Safe to RCG SYM32_LE*/
+ dccg35_set_symclk32_le_rcg(dccg, inst, true);
+}
+
+static void dccg35_enable_physymclk_new(struct dccg *dccg,
+ int inst,
+ enum physymclk_source src)
+{
+ dccg35_set_physymclk_rcg(dccg, inst, false);
+ dccg35_set_physymclk_src_new(dccg, src, inst);
+}
+
+static void dccg35_disable_physymclk_new(struct dccg *dccg,
+ int inst)
+{
+ dccg35_set_physymclk_src_new(dccg, PHYSYMCLK_REFCLK, inst);
+ dccg35_set_physymclk_rcg(dccg, inst, true);
+}
+
+static void dccg35_enable_dpp_clk_new(
+ struct dccg *dccg,
+ int inst,
+ enum dppclk_clock_source src)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ /* Sanitize inst before use in array de-ref */
+ if (inst < 0) {
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+ dccg35_set_dppclk_rcg(dccg, inst, false);
+ dcn35_set_dppclk_src_new(dccg, inst, src);
+ /* Switch DPP clock to DTO */
+ REG_SET_2(DPPCLK_DTO_PARAM[inst], 0,
+ DPPCLK0_DTO_PHASE, 0xFF,
+ DPPCLK0_DTO_MODULO, 0xFF);
+}
+
+
+static void dccg35_disable_dpp_clk_new(
+ struct dccg *dccg,
+ int inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ /* Sanitize inst before use in array de-ref */
+ if (inst < 0) {
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+ dcn35_set_dppclk_src_new(dccg, inst, DPP_REFCLK);
+ REG_SET_2(DPPCLK_DTO_PARAM[inst], 0,
+ DPPCLK0_DTO_PHASE, 0,
+ DPPCLK0_DTO_MODULO, 1);
+ dccg35_set_dppclk_rcg(dccg, inst, true);
+}
+
+static void dccg35_disable_dscclk_new(struct dccg *dccg,
+ int inst)
+{
+ dccg35_set_dsc_clk_src_new(dccg, inst, DSC_CLK_REF_CLK);
+ dccg35_set_dsc_clk_rcg(dccg, inst, true);
+}
+
+static void dccg35_enable_dscclk_new(struct dccg *dccg,
+ int inst,
+ enum dsc_clk_source src)
+{
+ dccg35_set_dsc_clk_rcg(dccg, inst, false);
+ dccg35_set_dsc_clk_src_new(dccg, inst, src);
+}
+
+static void dccg35_enable_dtbclk_p_new(struct dccg *dccg,
+ enum dtbclk_source src,
+ int inst)
+{
+ dccg35_set_dtbclk_p_rcg(dccg, inst, false);
+ dccg35_set_dtbclk_p_src_new(dccg, src, inst);
+}
+
+static void dccg35_disable_dtbclk_p_new(struct dccg *dccg,
+ int inst)
+{
+ dccg35_set_dtbclk_p_src_new(dccg, DTBCLK_REFCLK, inst);
+ dccg35_set_dtbclk_p_rcg(dccg, inst, true);
+}
+
+static void dccg35_disable_dpstreamclk_new(struct dccg *dccg,
+ int inst)
+{
+ dccg35_set_dpstreamclk_src_new(dccg, DP_STREAM_REFCLK, inst);
+ dccg35_set_dpstreamclk_rcg(dccg, inst, true);
+}
+
+static void dccg35_enable_dpstreamclk_new(struct dccg *dccg,
+ enum dp_stream_clk_source src,
+ int inst)
+{
+ dccg35_set_dpstreamclk_rcg(dccg, inst, false);
+ dccg35_set_dpstreamclk_src_new(dccg, src, inst);
+}
+
+static void dccg35_trigger_dio_fifo_resync(struct dccg *dccg)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ uint32_t dispclk_rdivider_value = 0;
+
+ REG_GET(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_RDIVIDER, &dispclk_rdivider_value);
+ if (dispclk_rdivider_value != 0)
+ REG_UPDATE(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, dispclk_rdivider_value);
+}
+static void dccg35_wait_for_dentist_change_done(
+ struct dccg *dccg)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ uint32_t dentist_dispclk_value = REG_READ(DENTIST_DISPCLK_CNTL);
+
+ REG_WRITE(DENTIST_DISPCLK_CNTL, dentist_dispclk_value);
+ REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 50, 2000);
+}
+
+static void dcn35_set_dppclk_enable(struct dccg *dccg,
+ uint32_t dpp_inst, uint32_t enable)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+
+ switch (dpp_inst) {
+ case 0:
+ REG_UPDATE(DPPCLK_CTRL, DPPCLK0_EN, enable);
+ break;
+ case 1:
+ REG_UPDATE(DPPCLK_CTRL, DPPCLK1_EN, enable);
+ break;
+ case 2:
+ REG_UPDATE(DPPCLK_CTRL, DPPCLK2_EN, enable);
+ break;
+ case 3:
+ REG_UPDATE(DPPCLK_CTRL, DPPCLK3_EN, enable);
+ break;
+ default:
+ break;
+ }
+ DC_LOG_DEBUG("%s: dpp_inst(%d) DPPCLK_EN = %d\n", __func__, dpp_inst, enable);
+
+}
+
+static void dccg35_update_dpp_dto(struct dccg *dccg, int dpp_inst,
+ int req_dppclk)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (dccg->dpp_clock_gated[dpp_inst]) {
+ /*
+ * Do not update the DPPCLK DTO if the clock is stopped.
+ */
+ return;
+ }
+
+ if (dccg->ref_dppclk && req_dppclk) {
+ int ref_dppclk = dccg->ref_dppclk;
+ int modulo, phase;
+
+ // phase / modulo = dpp pipe clk / dpp global clk
+ modulo = 0xff; // use FF at the end
+ phase = ((modulo * req_dppclk) + ref_dppclk - 1) / ref_dppclk;
+
+ if (phase > 0xff) {
+ ASSERT(false);
+ phase = 0xff;
+ }
+ dccg35_set_dppclk_rcg(dccg, dpp_inst, false);
+
+ REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0,
+ DPPCLK0_DTO_PHASE, phase,
+ DPPCLK0_DTO_MODULO, modulo);
+
+ dcn35_set_dppclk_enable(dccg, dpp_inst, true);
+ } else {
+ dcn35_set_dppclk_enable(dccg, dpp_inst, false);
+ dccg35_set_dppclk_rcg(dccg, dpp_inst, true);
+ }
+ udelay(10);
+ dccg->pipe_dppclk_khz[dpp_inst] = req_dppclk;
+}
+
+static void dccg35_set_dppclk_root_clock_gating(struct dccg *dccg,
+ uint32_t dpp_inst, uint32_t disallow_rcg)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp && !disallow_rcg)
+ return;
+
+
+ switch (dpp_inst) {
+ case 0:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, disallow_rcg);
+ break;
+ case 1:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, disallow_rcg);
+ break;
+ case 2:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, disallow_rcg);
+ break;
+ case 3:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, disallow_rcg);
+ break;
+ default:
+ break;
+ }
+
+ /* Wait for clock to ramp */
+ if (disallow_rcg)
+ udelay(10);
+}
+
+static void dccg35_get_pixel_rate_div(
+ struct dccg *dccg,
+ uint32_t otg_inst,
+ uint32_t *k1,
+ uint32_t *k2)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ uint32_t val_k1 = PIXEL_RATE_DIV_NA, val_k2 = PIXEL_RATE_DIV_NA;
+
+ *k1 = PIXEL_RATE_DIV_NA;
+ *k2 = PIXEL_RATE_DIV_NA;
+
+ switch (otg_inst) {
+ case 0:
+ REG_GET_2(OTG_PIXEL_RATE_DIV,
+ OTG0_PIXEL_RATE_DIVK1, &val_k1,
+ OTG0_PIXEL_RATE_DIVK2, &val_k2);
+ break;
+ case 1:
+ REG_GET_2(OTG_PIXEL_RATE_DIV,
+ OTG1_PIXEL_RATE_DIVK1, &val_k1,
+ OTG1_PIXEL_RATE_DIVK2, &val_k2);
+ break;
+ case 2:
+ REG_GET_2(OTG_PIXEL_RATE_DIV,
+ OTG2_PIXEL_RATE_DIVK1, &val_k1,
+ OTG2_PIXEL_RATE_DIVK2, &val_k2);
+ break;
+ case 3:
+ REG_GET_2(OTG_PIXEL_RATE_DIV,
+ OTG3_PIXEL_RATE_DIVK1, &val_k1,
+ OTG3_PIXEL_RATE_DIVK2, &val_k2);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+ *k1 = val_k1;
+ *k2 = val_k2;
+}
+
+static void dccg35_set_pixel_rate_div(
+ struct dccg *dccg,
+ uint32_t otg_inst,
+ enum pixel_rate_div k1,
+ enum pixel_rate_div k2)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ uint32_t cur_k1 = PIXEL_RATE_DIV_NA;
+ uint32_t cur_k2 = PIXEL_RATE_DIV_NA;
+
+
+ // Don't program 0xF into the register field. Not valid since
+ // K1 / K2 field is only 1 / 2 bits wide
+ if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA) {
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+ dccg35_get_pixel_rate_div(dccg, otg_inst, &cur_k1, &cur_k2);
+ if (k1 == cur_k1 && k2 == cur_k2)
+ return;
+
+ switch (otg_inst) {
+ case 0:
+ REG_UPDATE_2(OTG_PIXEL_RATE_DIV,
+ OTG0_PIXEL_RATE_DIVK1, k1,
+ OTG0_PIXEL_RATE_DIVK2, k2);
+ break;
+ case 1:
+ REG_UPDATE_2(OTG_PIXEL_RATE_DIV,
+ OTG1_PIXEL_RATE_DIVK1, k1,
+ OTG1_PIXEL_RATE_DIVK2, k2);
+ break;
+ case 2:
+ REG_UPDATE_2(OTG_PIXEL_RATE_DIV,
+ OTG2_PIXEL_RATE_DIVK1, k1,
+ OTG2_PIXEL_RATE_DIVK2, k2);
+ break;
+ case 3:
+ REG_UPDATE_2(OTG_PIXEL_RATE_DIV,
+ OTG3_PIXEL_RATE_DIVK1, k1,
+ OTG3_PIXEL_RATE_DIVK2, k2);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+ if (otg_inst < 4)
+ dccg35_wait_for_dentist_change_done(dccg);
+}
+
+static void dccg35_set_dtbclk_p_src(
+ struct dccg *dccg,
+ enum streamclk_source src,
+ uint32_t otg_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ uint32_t p_src_sel = 0; /* selects dprefclk */
+ if (src == DTBCLK0)
+ p_src_sel = 2; /* selects dtbclk0 */
+
+ switch (otg_inst) {
+ case 0:
+ if (src == REFCLK)
+ REG_UPDATE(DTBCLK_P_CNTL,
+ DTBCLK_P0_EN, 0);
+ else
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P0_SRC_SEL, p_src_sel,
+ DTBCLK_P0_EN, 1);
+ break;
+ case 1:
+ if (src == REFCLK)
+ REG_UPDATE(DTBCLK_P_CNTL,
+ DTBCLK_P1_EN, 0);
+ else
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P1_SRC_SEL, p_src_sel,
+ DTBCLK_P1_EN, 1);
+ break;
+ case 2:
+ if (src == REFCLK)
+ REG_UPDATE(DTBCLK_P_CNTL,
+ DTBCLK_P2_EN, 0);
+ else
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P2_SRC_SEL, p_src_sel,
+ DTBCLK_P2_EN, 1);
+ break;
+ case 3:
+ if (src == REFCLK)
+ REG_UPDATE(DTBCLK_P_CNTL,
+ DTBCLK_P3_EN, 0);
+ else
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P3_SRC_SEL, p_src_sel,
+ DTBCLK_P3_EN, 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+}
+
+/* Controls the generation of pixel valid for OTG in (OTG -> HPO case) */
+static void dccg35_set_dtbclk_dto(
+ struct dccg *dccg,
+ const struct dtbclk_dto_params *params)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ /* DTO Output Rate / Pixel Rate = 1/4 */
+ int req_dtbclk_khz = params->pixclk_khz / 4;
+
+ if (params->ref_dtbclk_khz && req_dtbclk_khz) {
+ uint32_t modulo, phase;
+
+ switch (params->otg_inst) {
+ case 0:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, 1);
+ break;
+ case 1:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, 1);
+ break;
+ case 2:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, 1);
+ break;
+ case 3:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, 1);
+ break;
+ }
+
+ // phase / modulo = dtbclk / dtbclk ref
+ modulo = params->ref_dtbclk_khz * 1000;
+ phase = req_dtbclk_khz * 1000;
+
+ REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], modulo);
+ REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], phase);
+
+ REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DTBCLK_DTO_ENABLE[params->otg_inst], 1);
+
+ REG_WAIT(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DTBCLKDTO_ENABLE_STATUS[params->otg_inst], 1,
+ 1, 100);
+
+ /* program OTG_PIXEL_RATE_DIV for DIVK1 and DIVK2 fields */
+ dccg35_set_pixel_rate_div(dccg, params->otg_inst, PIXEL_RATE_DIV_BY_1, PIXEL_RATE_DIV_BY_1);
+
+ /* The recommended programming sequence to enable DTBCLK DTO to generate
+ * valid pixel HPO DPSTREAM ENCODER, specifies that DTO source select should
+ * be set only after DTO is enabled.
+ * PIPEx_DTO_SRC_SEL should not be programmed during DTBCLK update since OTG may still be on, and the
+ * programming is handled in program_pix_clk() regardless, so it can be removed from here.
+ */
+ DC_LOG_DEBUG("%s: OTG%d DTBCLK DTO enabled: pixclk_khz=%d, ref_dtbclk_khz=%d, req_dtbclk_khz=%d, phase=%d, modulo=%d\n",
+ __func__, params->otg_inst, params->pixclk_khz,
+ params->ref_dtbclk_khz, req_dtbclk_khz, phase, modulo);
+
+ } else if (!params->ref_dtbclk_khz && !req_dtbclk_khz) {
+ switch (params->otg_inst) {
+ case 0:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, 0);
+ break;
+ case 1:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, 0);
+ break;
+ case 2:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, 0);
+ break;
+ case 3:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, 0);
+ break;
+ }
+
+ /**
+ * PIPEx_DTO_SRC_SEL should not be programmed during DTBCLK update since OTG may still be on, and the
+ * programming is handled in program_pix_clk() regardless, so it can be removed from here.
+ */
+ REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DTBCLK_DTO_ENABLE[params->otg_inst], 0);
+
+ REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0);
+ REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0);
+
+ DC_LOG_DEBUG("%s: OTG%d DTBCLK DTO disabled\n", __func__, params->otg_inst);
+ }
+}
+
+static void dccg35_set_dpstreamclk(
+ struct dccg *dccg,
+ enum streamclk_source src,
+ int otg_inst,
+ int dp_hpo_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ /* set the dtbclk_p source */
+ dccg35_set_dtbclk_p_src(dccg, src, otg_inst);
+
+ /* enabled to select one of the DTBCLKs for pipe */
+ switch (dp_hpo_inst) {
+ case 0:
+ REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK0_EN,
+ (src == REFCLK) ? 0 : 1, DPSTREAMCLK0_SRC_SEL, otg_inst);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_ROOT_GATE_DISABLE, (src == REFCLK) ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK1_EN,
+ (src == REFCLK) ? 0 : 1, DPSTREAMCLK1_SRC_SEL, otg_inst);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_ROOT_GATE_DISABLE, (src == REFCLK) ? 0 : 1);
+ break;
+ case 2:
+ REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK2_EN,
+ (src == REFCLK) ? 0 : 1, DPSTREAMCLK2_SRC_SEL, otg_inst);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_ROOT_GATE_DISABLE, (src == REFCLK) ? 0 : 1);
+ break;
+ case 3:
+ REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK3_EN,
+ (src == REFCLK) ? 0 : 1, DPSTREAMCLK3_SRC_SEL, otg_inst);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_ROOT_GATE_DISABLE, (src == REFCLK) ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+ DC_LOG_DEBUG("%s: dp_hpo_inst(%d) DPSTREAMCLK_EN = %d, DPSTREAMCLK_SRC_SEL = %d\n",
+ __func__, dp_hpo_inst, (src == REFCLK) ? 0 : 1, otg_inst);
+}
+
+
+static void dccg35_set_dpstreamclk_root_clock_gating(
+ struct dccg *dccg,
+ int dp_hpo_inst,
+ bool enable)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (dp_hpo_inst) {
+ case 0:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) {
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_ROOT_GATE_DISABLE, enable ? 1 : 0);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_GATE_DISABLE, enable ? 1 : 0);
+ }
+ break;
+ case 1:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) {
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_ROOT_GATE_DISABLE, enable ? 1 : 0);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_GATE_DISABLE, enable ? 1 : 0);
+ }
+ break;
+ case 2:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) {
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_ROOT_GATE_DISABLE, enable ? 1 : 0);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_GATE_DISABLE, enable ? 1 : 0);
+ }
+ break;
+ case 3:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) {
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_ROOT_GATE_DISABLE, enable ? 1 : 0);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_GATE_DISABLE, enable ? 1 : 0);
+ }
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+ DC_LOG_DEBUG("%s: dp_hpo_inst(%d) DPSTREAMCLK_ROOT_GATE_DISABLE = %d\n",
+ __func__, dp_hpo_inst, enable ? 1 : 0);
+}
+
+
+
+static void dccg35_set_physymclk_root_clock_gating(
+ struct dccg *dccg,
+ int phy_inst,
+ bool enable)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ return;
+
+ switch (phy_inst) {
+ case 0:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYASYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYBSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 2:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYCSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 3:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYDSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 4:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYESYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+ DC_LOG_DEBUG("%s: dpp_inst(%d) PHYESYMCLK_ROOT_GATE_DISABLE: %d\n", __func__, phy_inst, enable ? 0 : 1);
+
+}
+
+static void dccg35_set_physymclk(
+ struct dccg *dccg,
+ int phy_inst,
+ enum physymclk_clock_source clk_src,
+ bool force_enable)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ /* Force PHYSYMCLK on and Select phyd32clk as the source of clock which is output to PHY through DCIO */
+ switch (phy_inst) {
+ case 0:
+ if (force_enable) {
+ REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL,
+ PHYASYMCLK_EN, 1,
+ PHYASYMCLK_SRC_SEL, clk_src);
+ } else {
+ REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL,
+ PHYASYMCLK_EN, 0,
+ PHYASYMCLK_SRC_SEL, 0);
+ }
+ break;
+ case 1:
+ if (force_enable) {
+ REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL,
+ PHYBSYMCLK_EN, 1,
+ PHYBSYMCLK_SRC_SEL, clk_src);
+ } else {
+ REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL,
+ PHYBSYMCLK_EN, 0,
+ PHYBSYMCLK_SRC_SEL, 0);
+ }
+ break;
+ case 2:
+ if (force_enable) {
+ REG_UPDATE_2(PHYCSYMCLK_CLOCK_CNTL,
+ PHYCSYMCLK_EN, 1,
+ PHYCSYMCLK_SRC_SEL, clk_src);
+ } else {
+ REG_UPDATE_2(PHYCSYMCLK_CLOCK_CNTL,
+ PHYCSYMCLK_EN, 0,
+ PHYCSYMCLK_SRC_SEL, 0);
+ }
+ break;
+ case 3:
+ if (force_enable) {
+ REG_UPDATE_2(PHYDSYMCLK_CLOCK_CNTL,
+ PHYDSYMCLK_EN, 1,
+ PHYDSYMCLK_SRC_SEL, clk_src);
+ } else {
+ REG_UPDATE_2(PHYDSYMCLK_CLOCK_CNTL,
+ PHYDSYMCLK_EN, 0,
+ PHYDSYMCLK_SRC_SEL, 0);
+ }
+ break;
+ case 4:
+ if (force_enable) {
+ REG_UPDATE_2(PHYESYMCLK_CLOCK_CNTL,
+ PHYESYMCLK_EN, 1,
+ PHYESYMCLK_SRC_SEL, clk_src);
+ } else {
+ REG_UPDATE_2(PHYESYMCLK_CLOCK_CNTL,
+ PHYESYMCLK_EN, 0,
+ PHYESYMCLK_SRC_SEL, 0);
+ }
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+ DC_LOG_DEBUG("%s: phy_inst(%d) PHYxSYMCLK_EN = %d, PHYxSYMCLK_SRC_SEL = %d\n",
+ __func__, phy_inst, force_enable ? 1 : 0, clk_src);
+}
+
+static void dccg35_set_valid_pixel_rate(
+ struct dccg *dccg,
+ int ref_dtbclk_khz,
+ int otg_inst,
+ int pixclk_khz)
+{
+ struct dtbclk_dto_params dto_params = {0};
+
+ dto_params.ref_dtbclk_khz = ref_dtbclk_khz;
+ dto_params.otg_inst = otg_inst;
+ dto_params.pixclk_khz = pixclk_khz;
+ dto_params.is_hdmi = true;
+
+ dccg35_set_dtbclk_dto(dccg, &dto_params);
+}
+
+static void dccg35_dpp_root_clock_control(
+ struct dccg *dccg,
+ unsigned int dpp_inst,
+ bool clock_on)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (dccg->dpp_clock_gated[dpp_inst] != clock_on)
+ return;
+
+ if (clock_on) {
+ dccg35_set_dppclk_rcg(dccg, dpp_inst, false);
+
+ /* turn off the DTO and leave phase/modulo at max */
+ dcn35_set_dppclk_enable(dccg, dpp_inst, 1);
+ REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0,
+ DPPCLK0_DTO_PHASE, 0xFF,
+ DPPCLK0_DTO_MODULO, 0xFF);
+ } else {
+ dcn35_set_dppclk_enable(dccg, dpp_inst, 0);
+ /* turn on the DTO to generate a 0hz clock */
+ REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0,
+ DPPCLK0_DTO_PHASE, 0,
+ DPPCLK0_DTO_MODULO, 1);
+ /*we have this in hwss: disable_plane*/
+ dccg35_set_dppclk_rcg(dccg, dpp_inst, true);
+ }
+
+ // wait for clock to fully ramp
+ udelay(10);
+
+ dccg->dpp_clock_gated[dpp_inst] = !clock_on;
+ DC_LOG_DEBUG("%s: dpp_inst(%d) clock_on = %d\n", __func__, dpp_inst, clock_on);
+}
+
+static void dccg35_disable_symclk32_se(
+ struct dccg *dccg,
+ int hpo_se_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ /* set refclk as the source for symclk32_se */
+ switch (hpo_se_inst) {
+ case 0:
+ REG_UPDATE_2(SYMCLK32_SE_CNTL,
+ SYMCLK32_SE0_SRC_SEL, 0,
+ SYMCLK32_SE0_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) {
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE0_GATE_DISABLE, 0);
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+// SYMCLK32_ROOT_SE0_GATE_DISABLE, 0);
+ }
+ break;
+ case 1:
+ REG_UPDATE_2(SYMCLK32_SE_CNTL,
+ SYMCLK32_SE1_SRC_SEL, 0,
+ SYMCLK32_SE1_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) {
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE1_GATE_DISABLE, 0);
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+// SYMCLK32_ROOT_SE1_GATE_DISABLE, 0);
+ }
+ break;
+ case 2:
+ REG_UPDATE_2(SYMCLK32_SE_CNTL,
+ SYMCLK32_SE2_SRC_SEL, 0,
+ SYMCLK32_SE2_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) {
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE2_GATE_DISABLE, 0);
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+// SYMCLK32_ROOT_SE2_GATE_DISABLE, 0);
+ }
+ break;
+ case 3:
+ REG_UPDATE_2(SYMCLK32_SE_CNTL,
+ SYMCLK32_SE3_SRC_SEL, 0,
+ SYMCLK32_SE3_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) {
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE3_GATE_DISABLE, 0);
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+// SYMCLK32_ROOT_SE3_GATE_DISABLE, 0);
+ }
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+}
+
+static void dccg35_init_cb(struct dccg *dccg)
+{
+ (void)dccg;
+ /* Any RCG should be done when driver enter low power mode*/
+}
+void dccg35_init(struct dccg *dccg)
+{
+ int otg_inst;
+ /* Set HPO stream encoder to use refclk to avoid case where PHY is
+ * disabled and SYMCLK32 for HPO SE is sourced from PHYD32CLK which
+ * will cause DCN to hang.
+ */
+ for (otg_inst = 0; otg_inst < 4; otg_inst++)
+ dccg35_disable_symclk32_se(dccg, otg_inst);
+
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+ for (otg_inst = 0; otg_inst < 2; otg_inst++) {
+ dccg31_disable_symclk32_le(dccg, otg_inst);
+ dccg31_set_symclk32_le_root_clock_gating(dccg, otg_inst, false);
+ DC_LOG_DEBUG("%s: OTG%d SYMCLK32_LE disabled and root clock gating disabled\n",
+ __func__, otg_inst);
+ }
+
+// if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+// for (otg_inst = 0; otg_inst < 4; otg_inst++)
+// dccg35_disable_symclk_se(dccg, otg_inst, otg_inst);
+
+
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ for (otg_inst = 0; otg_inst < 4; otg_inst++) {
+ dccg35_set_dpstreamclk(dccg, REFCLK, otg_inst,
+ otg_inst);
+ dccg35_set_dpstreamclk_root_clock_gating(dccg, otg_inst, false);
+ DC_LOG_DEBUG("%s: OTG%d DPSTREAMCLK disabled and root clock gating disabled\n",
+ __func__, otg_inst);
+ }
+
+/*
+ dccg35_enable_global_fgcg_rep(
+ dccg, dccg->ctx->dc->debug.enable_fine_grain_clock_gating.bits
+ .dccg_global_fgcg_rep);*/
+}
+
+void dccg35_enable_global_fgcg_rep(struct dccg *dccg, bool value)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ REG_UPDATE(DCCG_GLOBAL_FGCG_REP_CNTL, DCCG_GLOBAL_FGCG_REP_DIS, !value);
+}
+
+static void dccg35_enable_dscclk(struct dccg *dccg, int inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ //Disable DTO
+ switch (inst) {
+ case 0:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, 1);
+
+ REG_UPDATE_2(DSCCLK0_DTO_PARAM,
+ DSCCLK0_DTO_PHASE, 0,
+ DSCCLK0_DTO_MODULO, 0);
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, 1);
+ break;
+ case 1:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, 1);
+
+ REG_UPDATE_2(DSCCLK1_DTO_PARAM,
+ DSCCLK1_DTO_PHASE, 0,
+ DSCCLK1_DTO_MODULO, 0);
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, 1);
+ break;
+ case 2:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, 1);
+
+ REG_UPDATE_2(DSCCLK2_DTO_PARAM,
+ DSCCLK2_DTO_PHASE, 0,
+ DSCCLK2_DTO_MODULO, 0);
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, 1);
+ break;
+ case 3:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, 1);
+
+ REG_UPDATE_2(DSCCLK3_DTO_PARAM,
+ DSCCLK3_DTO_PHASE, 0,
+ DSCCLK3_DTO_MODULO, 0);
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK3_EN, 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+ /* Wait for clock to ramp */
+ udelay(10);
+}
+
+static void dccg35_disable_dscclk(struct dccg *dccg,
+ int inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, 0);
+ REG_UPDATE_2(DSCCLK0_DTO_PARAM,
+ DSCCLK0_DTO_PHASE, 0,
+ DSCCLK0_DTO_MODULO, 1);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, 0);
+ break;
+ case 1:
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, 0);
+ REG_UPDATE_2(DSCCLK1_DTO_PARAM,
+ DSCCLK1_DTO_PHASE, 0,
+ DSCCLK1_DTO_MODULO, 1);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, 0);
+ break;
+ case 2:
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, 0);
+ REG_UPDATE_2(DSCCLK2_DTO_PARAM,
+ DSCCLK2_DTO_PHASE, 0,
+ DSCCLK2_DTO_MODULO, 1);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, 0);
+ break;
+ case 3:
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK3_EN, 0);
+ REG_UPDATE_2(DSCCLK3_DTO_PARAM,
+ DSCCLK3_DTO_PHASE, 0,
+ DSCCLK3_DTO_MODULO, 1);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, 0);
+ break;
+ default:
+ return;
+ }
+
+ /* Wait for clock ramp */
+ udelay(10);
+}
+
+static void dccg35_enable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (link_enc_inst) {
+ case 0:
+ REG_UPDATE(SYMCLKA_CLOCK_ENABLE,
+ SYMCLKA_CLOCK_ENABLE, 1);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_ROOT_GATE_DISABLE, 1);
+ break;
+ case 1:
+ REG_UPDATE(SYMCLKB_CLOCK_ENABLE,
+ SYMCLKB_CLOCK_ENABLE, 1);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_ROOT_GATE_DISABLE, 1);
+ break;
+ case 2:
+ REG_UPDATE(SYMCLKC_CLOCK_ENABLE,
+ SYMCLKC_CLOCK_ENABLE, 1);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_ROOT_GATE_DISABLE, 1);
+ break;
+ case 3:
+ REG_UPDATE(SYMCLKD_CLOCK_ENABLE,
+ SYMCLKD_CLOCK_ENABLE, 1);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_ROOT_GATE_DISABLE, 1);
+ break;
+ case 4:
+ REG_UPDATE(SYMCLKE_CLOCK_ENABLE,
+ SYMCLKE_CLOCK_ENABLE, 1);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKE_ROOT_GATE_DISABLE, 1);
+ break;
+ }
+
+ switch (stream_enc_inst) {
+ case 0:
+ REG_UPDATE_2(SYMCLKA_CLOCK_ENABLE,
+ SYMCLKA_FE_EN, 1,
+ SYMCLKA_FE_SRC_SEL, link_enc_inst);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_FE_ROOT_GATE_DISABLE, 1);
+ break;
+ case 1:
+ REG_UPDATE_2(SYMCLKB_CLOCK_ENABLE,
+ SYMCLKB_FE_EN, 1,
+ SYMCLKB_FE_SRC_SEL, link_enc_inst);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_FE_ROOT_GATE_DISABLE, 1);
+ break;
+ case 2:
+ REG_UPDATE_2(SYMCLKC_CLOCK_ENABLE,
+ SYMCLKC_FE_EN, 1,
+ SYMCLKC_FE_SRC_SEL, link_enc_inst);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_FE_ROOT_GATE_DISABLE, 1);
+ break;
+ case 3:
+ REG_UPDATE_2(SYMCLKD_CLOCK_ENABLE,
+ SYMCLKD_FE_EN, 1,
+ SYMCLKD_FE_SRC_SEL, link_enc_inst);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_FE_ROOT_GATE_DISABLE, 1);
+ break;
+ case 4:
+ REG_UPDATE_2(SYMCLKE_CLOCK_ENABLE,
+ SYMCLKE_FE_EN, 1,
+ SYMCLKE_FE_SRC_SEL, link_enc_inst);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKE_FE_ROOT_GATE_DISABLE, 1);
+ break;
+ }
+}
+
+/*get other front end connected to this backend*/
+static uint8_t dccg35_get_number_enabled_symclk_fe_connected_to_be(struct dccg *dccg, uint32_t link_enc_inst)
+{
+ uint8_t num_enabled_symclk_fe = 0;
+ uint32_t fe_clk_en[5] = {0}, be_clk_sel[5] = {0};
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ REG_GET_2(SYMCLKA_CLOCK_ENABLE, SYMCLKA_FE_EN, &fe_clk_en[0],
+ SYMCLKA_FE_SRC_SEL, &be_clk_sel[0]);
+
+ REG_GET_2(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_EN, &fe_clk_en[1],
+ SYMCLKB_FE_SRC_SEL, &be_clk_sel[1]);
+
+ REG_GET_2(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_EN, &fe_clk_en[2],
+ SYMCLKC_FE_SRC_SEL, &be_clk_sel[2]);
+
+ REG_GET_2(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_EN, &fe_clk_en[3],
+ SYMCLKD_FE_SRC_SEL, &be_clk_sel[3]);
+
+ REG_GET_2(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_EN, &fe_clk_en[4],
+ SYMCLKE_FE_SRC_SEL, &be_clk_sel[4]);
+
+ uint8_t i;
+
+ for (i = 0; i < ARRAY_SIZE(fe_clk_en); i++) {
+ if (fe_clk_en[i] && be_clk_sel[i] == link_enc_inst)
+ num_enabled_symclk_fe++;
+ }
+ return num_enabled_symclk_fe;
+}
+
+static void dccg35_disable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst)
+{
+ uint8_t num_enabled_symclk_fe = 0;
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (stream_enc_inst) {
+ case 0:
+ REG_UPDATE_2(SYMCLKA_CLOCK_ENABLE,
+ SYMCLKA_FE_EN, 0,
+ SYMCLKA_FE_SRC_SEL, 0);
+// if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_FE_ROOT_GATE_DISABLE, 0);
+ break;
+ case 1:
+ REG_UPDATE_2(SYMCLKB_CLOCK_ENABLE,
+ SYMCLKB_FE_EN, 0,
+ SYMCLKB_FE_SRC_SEL, 0);
+// if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_FE_ROOT_GATE_DISABLE, 0);
+ break;
+ case 2:
+ REG_UPDATE_2(SYMCLKC_CLOCK_ENABLE,
+ SYMCLKC_FE_EN, 0,
+ SYMCLKC_FE_SRC_SEL, 0);
+// if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_FE_ROOT_GATE_DISABLE, 0);
+ break;
+ case 3:
+ REG_UPDATE_2(SYMCLKD_CLOCK_ENABLE,
+ SYMCLKD_FE_EN, 0,
+ SYMCLKD_FE_SRC_SEL, 0);
+// if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_FE_ROOT_GATE_DISABLE, 0);
+ break;
+ case 4:
+ REG_UPDATE_2(SYMCLKE_CLOCK_ENABLE,
+ SYMCLKE_FE_EN, 0,
+ SYMCLKE_FE_SRC_SEL, 0);
+// if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKE_FE_ROOT_GATE_DISABLE, 0);
+ break;
+ }
+
+ /*check other enabled symclk fe connected to this be */
+ num_enabled_symclk_fe = dccg35_get_number_enabled_symclk_fe_connected_to_be(dccg, link_enc_inst);
+ /*only turn off backend clk if other front end attached to this backend are all off,
+ for mst, only turn off the backend if this is the last front end*/
+ if (num_enabled_symclk_fe == 0) {
+ switch (link_enc_inst) {
+ case 0:
+ REG_UPDATE(SYMCLKA_CLOCK_ENABLE,
+ SYMCLKA_CLOCK_ENABLE, 0);
+// if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_ROOT_GATE_DISABLE, 0);
+ break;
+ case 1:
+ REG_UPDATE(SYMCLKB_CLOCK_ENABLE,
+ SYMCLKB_CLOCK_ENABLE, 0);
+// if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_ROOT_GATE_DISABLE, 0);
+ break;
+ case 2:
+ REG_UPDATE(SYMCLKC_CLOCK_ENABLE,
+ SYMCLKC_CLOCK_ENABLE, 0);
+// if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_ROOT_GATE_DISABLE, 0);
+ break;
+ case 3:
+ REG_UPDATE(SYMCLKD_CLOCK_ENABLE,
+ SYMCLKD_CLOCK_ENABLE, 0);
+// if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_ROOT_GATE_DISABLE, 0);
+ break;
+ case 4:
+ REG_UPDATE(SYMCLKE_CLOCK_ENABLE,
+ SYMCLKE_CLOCK_ENABLE, 0);
+// if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKE_ROOT_GATE_DISABLE, 0);
+ break;
+ }
+ }
+}
+
+static void dccg35_set_dpstreamclk_cb(
+ struct dccg *dccg,
+ enum streamclk_source src,
+ int otg_inst,
+ int dp_hpo_inst)
+{
+
+ enum dtbclk_source dtb_clk_src;
+ enum dp_stream_clk_source dp_stream_clk_src;
+
+ switch (src) {
+ case REFCLK:
+ dtb_clk_src = DTBCLK_REFCLK;
+ dp_stream_clk_src = DP_STREAM_REFCLK;
+ break;
+ case DPREFCLK:
+ dtb_clk_src = DTBCLK_DPREFCLK;
+ dp_stream_clk_src = (enum dp_stream_clk_source)otg_inst;
+ break;
+ case DTBCLK0:
+ dtb_clk_src = DTBCLK_DTBCLK0;
+ dp_stream_clk_src = (enum dp_stream_clk_source)otg_inst;
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+ if (dtb_clk_src == DTBCLK_REFCLK &&
+ dp_stream_clk_src == DP_STREAM_REFCLK) {
+ dccg35_disable_dtbclk_p_new(dccg, otg_inst);
+ dccg35_disable_dpstreamclk_new(dccg, dp_hpo_inst);
+ } else {
+ dccg35_enable_dtbclk_p_new(dccg, dtb_clk_src, otg_inst);
+ dccg35_enable_dpstreamclk_new(dccg,
+ dp_stream_clk_src,
+ dp_hpo_inst);
+ }
+}
+
+static void dccg35_set_dpstreamclk_root_clock_gating_cb(
+ struct dccg *dccg,
+ int dp_hpo_inst,
+ bool power_on)
+{
+ /* power_on set indicates we need to ungate
+ * Currently called from optimize_bandwidth and prepare_bandwidth calls
+ * Since clock source is not passed restore to refclock on ungate
+ * Instance 0 is implied here since only one streamclock resource
+ * Redundant as gating when enabled is acheived through set_dpstreamclk
+ */
+ if (power_on)
+ dccg35_enable_dpstreamclk_new(dccg,
+ DP_STREAM_REFCLK,
+ dp_hpo_inst);
+ else
+ dccg35_disable_dpstreamclk_new(dccg, dp_hpo_inst);
+}
+
+static void dccg35_update_dpp_dto_cb(struct dccg *dccg, int dpp_inst,
+ int req_dppclk)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (dccg->dpp_clock_gated[dpp_inst]) {
+ /*
+ * Do not update the DPPCLK DTO if the clock is stopped.
+ */
+ return;
+ }
+
+ if (dccg->ref_dppclk && req_dppclk) {
+ int ref_dppclk = dccg->ref_dppclk;
+ int modulo, phase;
+
+ // phase / modulo = dpp pipe clk / dpp global clk
+ modulo = 0xff; // use FF at the end
+ phase = ((modulo * req_dppclk) + ref_dppclk - 1) / ref_dppclk;
+
+ if (phase > 0xff) {
+ ASSERT(false);
+ phase = 0xff;
+ }
+
+ /* Enable DPP CLK DTO output */
+ dccg35_enable_dpp_clk_new(dccg, dpp_inst, DPP_DCCG_DTO);
+
+ /* Program DTO */
+ REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0,
+ DPPCLK0_DTO_PHASE, phase,
+ DPPCLK0_DTO_MODULO, modulo);
+ } else
+ dccg35_disable_dpp_clk_new(dccg, dpp_inst);
+
+ dccg->pipe_dppclk_khz[dpp_inst] = req_dppclk;
+}
+
+static void dccg35_dpp_root_clock_control_cb(
+ struct dccg *dccg,
+ unsigned int dpp_inst,
+ bool power_on)
+{
+ if (dccg->dpp_clock_gated[dpp_inst] == power_on)
+ return;
+ /* power_on set indicates we need to ungate
+ * Currently called from optimize_bandwidth and prepare_bandwidth calls
+ * Since clock source is not passed restore to refclock on ungate
+ * Redundant as gating when enabled is acheived through update_dpp_dto
+ */
+ dccg35_set_dppclk_rcg(dccg, dpp_inst, !power_on);
+
+ dccg->dpp_clock_gated[dpp_inst] = !power_on;
+}
+
+static void dccg35_enable_symclk32_se_cb(
+ struct dccg *dccg,
+ int inst,
+ enum phyd32clk_clock_source phyd32clk)
+{
+ dccg35_enable_symclk32_se_new(dccg, inst, (enum symclk32_se_clk_source)phyd32clk);
+}
+
+static void dccg35_disable_symclk32_se_cb(struct dccg *dccg, int inst)
+{
+ dccg35_disable_symclk32_se_new(dccg, inst);
+}
+
+static void dccg35_enable_symclk32_le_cb(
+ struct dccg *dccg,
+ int inst,
+ enum phyd32clk_clock_source src)
+{
+ dccg35_enable_symclk32_le_new(dccg, inst, (enum symclk32_le_clk_source) src);
+}
+
+static void dccg35_disable_symclk32_le_cb(struct dccg *dccg, int inst)
+{
+ dccg35_disable_symclk32_le_new(dccg, inst);
+}
+
+static void dccg35_set_symclk32_le_root_clock_gating_cb(
+ struct dccg *dccg,
+ int inst,
+ bool power_on)
+{
+ /* power_on set indicates we need to ungate
+ * Currently called from optimize_bandwidth and prepare_bandwidth calls
+ * Since clock source is not passed restore to refclock on ungate
+ * Redundant as gating when enabled is acheived through disable_symclk32_le
+ */
+ if (power_on)
+ dccg35_enable_symclk32_le_new(dccg, inst, SYMCLK32_LE_REFCLK);
+ else
+ dccg35_disable_symclk32_le_new(dccg, inst);
+}
+
+static void dccg35_set_physymclk_cb(
+ struct dccg *dccg,
+ int inst,
+ enum physymclk_clock_source clk_src,
+ bool force_enable)
+{
+ /* force_enable = 0 indicates we can switch to ref clock */
+ if (force_enable)
+ dccg35_enable_physymclk_new(dccg, inst, (enum physymclk_source)clk_src);
+ else
+ dccg35_disable_physymclk_new(dccg, inst);
+}
+
+static void dccg35_set_physymclk_root_clock_gating_cb(
+ struct dccg *dccg,
+ int inst,
+ bool power_on)
+{
+ /* Redundant RCG already done in disable_physymclk
+ * power_on = 1 indicates we need to ungate
+ */
+ if (power_on)
+ dccg35_enable_physymclk_new(dccg, inst, PHYSYMCLK_REFCLK);
+ else
+ dccg35_disable_physymclk_new(dccg, inst);
+}
+
+static void dccg35_set_symclk32_le_root_clock_gating(
+ struct dccg *dccg,
+ int inst,
+ bool power_on)
+{
+ /* power_on set indicates we need to ungate
+ * Currently called from optimize_bandwidth and prepare_bandwidth calls
+ * Since clock source is not passed restore to refclock on ungate
+ * Redundant as gating when enabled is acheived through disable_symclk32_le
+ */
+ if (power_on)
+ dccg35_enable_symclk32_le_new(dccg, inst, SYMCLK32_LE_REFCLK);
+ else
+ dccg35_disable_symclk32_le_new(dccg, inst);
+}
+
+static void dccg35_set_dtbclk_p_src_cb(
+ struct dccg *dccg,
+ enum streamclk_source src,
+ uint32_t inst)
+{
+ if (src == DTBCLK0)
+ dccg35_enable_dtbclk_p_new(dccg, DTBCLK_DTBCLK0, inst);
+ else
+ dccg35_disable_dtbclk_p_new(dccg, inst);
+}
+
+static void dccg35_set_dtbclk_dto_cb(
+ struct dccg *dccg,
+ const struct dtbclk_dto_params *params)
+{
+ /* set_dtbclk_p_src typ called earlier to switch to DTBCLK
+ * if params->ref_dtbclk_khz and req_dtbclk_khz are 0 switch to ref-clock
+ */
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ /* DTO Output Rate / Pixel Rate = 1/4 */
+ int req_dtbclk_khz = params->pixclk_khz / 4;
+
+ if (params->ref_dtbclk_khz && req_dtbclk_khz) {
+ uint32_t modulo, phase;
+
+ dccg35_enable_dtbclk_p_new(dccg, DTBCLK_DTBCLK0, params->otg_inst);
+
+ // phase / modulo = dtbclk / dtbclk ref
+ modulo = params->ref_dtbclk_khz * 1000;
+ phase = req_dtbclk_khz * 1000;
+
+ REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], modulo);
+ REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], phase);
+
+ REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DTBCLK_DTO_ENABLE[params->otg_inst], 1);
+
+ REG_WAIT(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DTBCLKDTO_ENABLE_STATUS[params->otg_inst], 1,
+ 1, 100);
+
+ /* program OTG_PIXEL_RATE_DIV for DIVK1 and DIVK2 fields */
+ dccg35_set_pixel_rate_div(dccg, params->otg_inst, PIXEL_RATE_DIV_BY_1, PIXEL_RATE_DIV_BY_1);
+
+ /* The recommended programming sequence to enable DTBCLK DTO to generate
+ * valid pixel HPO DPSTREAM ENCODER, specifies that DTO source select should
+ * be set only after DTO is enabled
+ */
+ REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ PIPE_DTO_SRC_SEL[params->otg_inst], 2);
+ } else {
+ dccg35_disable_dtbclk_p_new(dccg, params->otg_inst);
+
+ REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DTBCLK_DTO_ENABLE[params->otg_inst], 0,
+ PIPE_DTO_SRC_SEL[params->otg_inst], params->is_hdmi ? 0 : 1);
+
+ REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0);
+ REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0);
+ }
+}
+
+static void dccg35_disable_dscclk_cb(struct dccg *dccg,
+ int inst)
+{
+ dccg35_disable_dscclk_new(dccg, inst);
+}
+
+static void dccg35_enable_dscclk_cb(struct dccg *dccg, int inst)
+{
+ dccg35_enable_dscclk_new(dccg, inst, DSC_DTO_TUNED_CK_GPU_DISCLK_3);
+}
+
+static void dccg35_enable_symclk_se_cb(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst)
+{
+ /* Switch to functional clock if already not selected */
+ dccg35_enable_symclk_be_new(dccg, SYMCLK_BE_PHYCLK, link_enc_inst);
+
+ dccg35_enable_symclk_fe_new(dccg, stream_enc_inst, (enum symclk_fe_source) link_enc_inst);
+
+}
+
+static void dccg35_disable_symclk_se_cb(
+ struct dccg *dccg,
+ uint32_t stream_enc_inst,
+ uint32_t link_enc_inst)
+{
+ dccg35_disable_symclk_fe_new(dccg, stream_enc_inst);
+
+ /* DMU PHY sequence switches SYMCLK_BE (link_enc_inst) to ref clock once PHY is turned off */
+}
+
+void dccg35_root_gate_disable_control(struct dccg *dccg, uint32_t pipe_idx, uint32_t disable_clock_gating)
+{
+ dccg35_set_dppclk_root_clock_gating(dccg, pipe_idx, disable_clock_gating);
+}
+
+static const struct dccg_funcs dccg35_funcs_new = {
+ .update_dpp_dto = dccg35_update_dpp_dto_cb,
+ .dpp_root_clock_control = dccg35_dpp_root_clock_control_cb,
+ .get_dccg_ref_freq = dccg31_get_dccg_ref_freq,
+ .dccg_init = dccg35_init_cb,
+ .set_dpstreamclk = dccg35_set_dpstreamclk_cb,
+ .set_dpstreamclk_root_clock_gating = dccg35_set_dpstreamclk_root_clock_gating_cb,
+ .enable_symclk32_se = dccg35_enable_symclk32_se_cb,
+ .disable_symclk32_se = dccg35_disable_symclk32_se_cb,
+ .enable_symclk32_le = dccg35_enable_symclk32_le_cb,
+ .disable_symclk32_le = dccg35_disable_symclk32_le_cb,
+ .set_symclk32_le_root_clock_gating = dccg35_set_symclk32_le_root_clock_gating_cb,
+ .set_physymclk = dccg35_set_physymclk_cb,
+ .set_physymclk_root_clock_gating = dccg35_set_physymclk_root_clock_gating_cb,
+ .set_dtbclk_dto = dccg35_set_dtbclk_dto_cb,
+ .set_audio_dtbclk_dto = dccg31_set_audio_dtbclk_dto,
+ .set_fifo_errdet_ovr_en = dccg2_set_fifo_errdet_ovr_en,
+ .otg_add_pixel = dccg31_otg_add_pixel,
+ .otg_drop_pixel = dccg31_otg_drop_pixel,
+ .set_dispclk_change_mode = dccg31_set_dispclk_change_mode,
+ .disable_dsc = dccg35_disable_dscclk_cb,
+ .enable_dsc = dccg35_enable_dscclk_cb,
+ .set_pixel_rate_div = dccg35_set_pixel_rate_div,
+ .get_pixel_rate_div = dccg35_get_pixel_rate_div,
+ .trigger_dio_fifo_resync = dccg35_trigger_dio_fifo_resync,
+ .set_valid_pixel_rate = dccg35_set_valid_pixel_rate,
+ .enable_symclk_se = dccg35_enable_symclk_se_cb,
+ .disable_symclk_se = dccg35_disable_symclk_se_cb,
+ .set_dtbclk_p_src = dccg35_set_dtbclk_p_src_cb,
+};
+
+static const struct dccg_funcs dccg35_funcs = {
+ .update_dpp_dto = dccg35_update_dpp_dto,
+ .dpp_root_clock_control = dccg35_dpp_root_clock_control,
+ .get_dccg_ref_freq = dccg31_get_dccg_ref_freq,
+ .dccg_init = dccg35_init,
+ .set_dpstreamclk = dccg35_set_dpstreamclk,
+ .set_dpstreamclk_root_clock_gating = dccg35_set_dpstreamclk_root_clock_gating,
+ .enable_symclk32_se = dccg31_enable_symclk32_se,
+ .disable_symclk32_se = dccg35_disable_symclk32_se,
+ .enable_symclk32_le = dccg31_enable_symclk32_le,
+ .disable_symclk32_le = dccg31_disable_symclk32_le,
+ .set_symclk32_le_root_clock_gating = dccg31_set_symclk32_le_root_clock_gating,
+ .set_physymclk = dccg35_set_physymclk,
+ .set_physymclk_root_clock_gating = dccg35_set_physymclk_root_clock_gating,
+ .set_dtbclk_dto = dccg35_set_dtbclk_dto,
+ .set_audio_dtbclk_dto = dccg31_set_audio_dtbclk_dto,
+ .set_fifo_errdet_ovr_en = dccg2_set_fifo_errdet_ovr_en,
+ .otg_add_pixel = dccg31_otg_add_pixel,
+ .otg_drop_pixel = dccg31_otg_drop_pixel,
+ .set_dispclk_change_mode = dccg31_set_dispclk_change_mode,
+ .disable_dsc = dccg35_disable_dscclk,
+ .enable_dsc = dccg35_enable_dscclk,
+ .set_pixel_rate_div = dccg35_set_pixel_rate_div,
+ .get_pixel_rate_div = dccg35_get_pixel_rate_div,
+ .trigger_dio_fifo_resync = dccg35_trigger_dio_fifo_resync,
+ .set_valid_pixel_rate = dccg35_set_valid_pixel_rate,
+ .enable_symclk_se = dccg35_enable_symclk_se,
+ .disable_symclk_se = dccg35_disable_symclk_se,
+ .set_dtbclk_p_src = dccg35_set_dtbclk_p_src,
+ .dccg_root_gate_disable_control = dccg35_root_gate_disable_control,
+ .dccg_read_reg_state = dccg31_read_reg_state,
+};
+
+struct dccg *dccg35_create(
+ struct dc_context *ctx,
+ const struct dccg_registers *regs,
+ const struct dccg_shift *dccg_shift,
+ const struct dccg_mask *dccg_mask)
+{
+ struct dcn_dccg *dccg_dcn = kzalloc(sizeof(*dccg_dcn), GFP_KERNEL);
+ struct dccg *base;
+
+ if (dccg_dcn == NULL) {
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+ (void)&dccg35_disable_symclk_be_new;
+ (void)&dccg35_set_symclk32_le_root_clock_gating;
+ (void)&dccg35_set_smclk32_se_rcg;
+ (void)&dccg35_funcs_new;
+
+ base = &dccg_dcn->base;
+ base->ctx = ctx;
+ base->funcs = &dccg35_funcs;
+
+ dccg_dcn->regs = regs;
+ dccg_dcn->dccg_shift = dccg_shift;
+ dccg_dcn->dccg_mask = dccg_mask;
+
+ return &dccg_dcn->base;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h
new file mode 100644
index 000000000000..7b9c36456cd9
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h
@@ -0,0 +1,256 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DCN35_DCCG_H__
+#define __DCN35_DCCG_H__
+
+#include "dcn314/dcn314_dccg.h"
+
+#define DCCG_SFII(block, reg_name, field_prefix, field_name, inst, post_fix)\
+ .field_prefix ## _ ## field_name[inst] = block ## inst ## _ ## reg_name ## __ ## field_prefix ## inst ## _ ## field_name ## post_fix
+
+
+#define DCCG_REG_LIST_DCN35() \
+ DCCG_REG_LIST_DCN314(),\
+ SR(DPPCLK_CTRL),\
+ SR(DCCG_GATE_DISABLE_CNTL4),\
+ SR(DCCG_GATE_DISABLE_CNTL5),\
+ SR(DCCG_GATE_DISABLE_CNTL6),\
+ SR(DCCG_GLOBAL_FGCG_REP_CNTL),\
+ SR(SYMCLKA_CLOCK_ENABLE),\
+ SR(SYMCLKB_CLOCK_ENABLE),\
+ SR(SYMCLKC_CLOCK_ENABLE),\
+ SR(SYMCLKD_CLOCK_ENABLE), \
+ SR(SYMCLKE_CLOCK_ENABLE),\
+ SR(SYMCLK_PSP_CNTL)
+
+#define DCCG_MASK_SH_LIST_DCN35(mask_sh) \
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 0, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 1, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 2, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 3, mask_sh),\
+ DCCG_SF(DPPCLK_CTRL, DPPCLK0_EN, mask_sh),\
+ DCCG_SF(DPPCLK_CTRL, DPPCLK1_EN, mask_sh),\
+ DCCG_SF(DPPCLK_CTRL, DPPCLK2_EN, mask_sh),\
+ DCCG_SF(DPPCLK_CTRL, DPPCLK3_EN, mask_sh),\
+ DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_PHASE, mask_sh),\
+ DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_MODULO, mask_sh),\
+ DCCG_SF(HDMICHARCLK0_CLOCK_CNTL, HDMICHARCLK0_EN, mask_sh),\
+ DCCG_SF(HDMICHARCLK0_CLOCK_CNTL, HDMICHARCLK0_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_EN, mask_sh),\
+ DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_EN, mask_sh),\
+ DCCG_SF(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_EN, mask_sh),\
+ DCCG_SF(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_EN, mask_sh),\
+ DCCG_SF(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK0_EN, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK1_EN, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK2_EN, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK3_EN, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK0_SRC_SEL, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK1_SRC_SEL, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK2_SRC_SEL, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK3_SRC_SEL, mask_sh),\
+ DCCG_SF(HDMISTREAMCLK_CNTL, HDMISTREAMCLK0_EN, mask_sh),\
+ DCCG_SF(HDMISTREAMCLK_CNTL, HDMISTREAMCLK0_SRC_SEL, mask_sh),\
+ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK0_EN, mask_sh),\
+ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK1_EN, mask_sh),\
+ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK2_EN, mask_sh),\
+ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK3_EN, mask_sh),\
+ DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_PHASE, mask_sh),\
+ DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_MODULO, mask_sh),\
+ DCCG_SF(DSCCLK1_DTO_PARAM, DSCCLK1_DTO_PHASE, mask_sh),\
+ DCCG_SF(DSCCLK1_DTO_PARAM, DSCCLK1_DTO_MODULO, mask_sh),\
+ DCCG_SF(DSCCLK2_DTO_PARAM, DSCCLK2_DTO_PHASE, mask_sh),\
+ DCCG_SF(DSCCLK2_DTO_PARAM, DSCCLK2_DTO_MODULO, mask_sh),\
+ DCCG_SF(DSCCLK3_DTO_PARAM, DSCCLK3_DTO_PHASE, mask_sh),\
+ DCCG_SF(DSCCLK3_DTO_PARAM, DSCCLK3_DTO_MODULO, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE0_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE1_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE2_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE3_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE0_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE1_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE2_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE3_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE0_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE1_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE0_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE1_EN, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLK_DTO, ENABLE, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLK_DTO, ENABLE, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLK_DTO, ENABLE, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLK_DTO, ENABLE, 3, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLKDTO, ENABLE_STATUS, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLKDTO, ENABLE_STATUS, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLKDTO, ENABLE_STATUS, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLKDTO, ENABLE_STATUS, 3, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 3, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 3, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG0_PIXEL_RATE_DIVK1, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG0_PIXEL_RATE_DIVK2, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG1_PIXEL_RATE_DIVK1, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG1_PIXEL_RATE_DIVK2, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG2_PIXEL_RATE_DIVK1, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG2_PIXEL_RATE_DIVK2, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG3_PIXEL_RATE_DIVK1, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG3_PIXEL_RATE_DIVK2, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG3_PIXEL_RATE_DIVK2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 3, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P0_SRC_SEL, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P0_EN, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P1_SRC_SEL, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P1_EN, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P2_SRC_SEL, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P2_EN, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P3_SRC_SEL, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P3_EN, mask_sh),\
+ DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_EN, mask_sh),\
+ DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_EN, mask_sh),\
+ DCCG_SF(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_EN, mask_sh),\
+ DCCG_SF(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_EN, mask_sh),\
+ DCCG_SF(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYESYMCLK_CLOCK_CNTL, PHYESYMCLK_EN, mask_sh),\
+ DCCG_SF(PHYESYMCLK_CLOCK_CNTL, PHYESYMCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO_SEL, mask_sh),\
+ DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL, mask_sh),\
+ DCCG_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, mask_sh),\
+ DCCG_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_RDIVIDER, mask_sh),\
+ DCCG_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYBSYMCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYCSYMCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYDSYMCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYESYMCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GLOBAL_FGCG_REP_CNTL, DCCG_GLOBAL_FGCG_REP_DIS, mask_sh),\
+ DCCG_SF(SYMCLKA_CLOCK_ENABLE, SYMCLKA_CLOCK_ENABLE, mask_sh),\
+ DCCG_SF(SYMCLKB_CLOCK_ENABLE, SYMCLKB_CLOCK_ENABLE, mask_sh),\
+ DCCG_SF(SYMCLKC_CLOCK_ENABLE, SYMCLKC_CLOCK_ENABLE, mask_sh),\
+ DCCG_SF(SYMCLKD_CLOCK_ENABLE, SYMCLKD_CLOCK_ENABLE, mask_sh),\
+ DCCG_SF(SYMCLKE_CLOCK_ENABLE, SYMCLKE_CLOCK_ENABLE, mask_sh),\
+ DCCG_SF(SYMCLKA_CLOCK_ENABLE, SYMCLKA_FE_EN, mask_sh),\
+ DCCG_SF(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_EN, mask_sh),\
+ DCCG_SF(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_EN, mask_sh),\
+ DCCG_SF(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_EN, mask_sh),\
+ DCCG_SF(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_EN, mask_sh),\
+ DCCG_SF(SYMCLKA_CLOCK_ENABLE, SYMCLKA_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLKB_CLOCK_ENABLE, SYMCLKB_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLKC_CLOCK_ENABLE, SYMCLKC_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLKD_CLOCK_ENABLE, SYMCLKD_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLKE_CLOCK_ENABLE, SYMCLKE_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLKA_CLOCK_ENABLE, SYMCLKA_FE_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_SRC_SEL, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_FE_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_FE_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_FE_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_FE_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKE_FE_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, HDMICHARCLK0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL4, HDMICHARCLK0_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, HDMISTREAMCLK0_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKE_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE2_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE3_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE2_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE3_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYA_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYB_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYC_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYD_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYE_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(HDMISTREAMCLK0_DTO_PARAM, HDMISTREAMCLK0_DTO_PHASE, mask_sh),\
+ DCCG_SF(HDMISTREAMCLK0_DTO_PARAM, HDMISTREAMCLK0_DTO_MODULO, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL, DISPCLK_DCCG_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, HDMISTREAMCLK0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_STEP_DELAY, mask_sh),\
+ DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_STEP_SIZE, mask_sh),\
+ DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_FREQ_RAMP_DONE, mask_sh),\
+ DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_MAX_ERRDET_CYCLES, mask_sh),\
+ DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DCCG_FIFO_ERRDET_RESET, mask_sh),\
+ DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DCCG_FIFO_ERRDET_STATE, mask_sh),\
+ DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DCCG_FIFO_ERRDET_OVR_EN, mask_sh),\
+ DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_CHG_FWD_CORR_DISABLE, mask_sh),\
+
+struct dccg *dccg35_create(
+ struct dc_context *ctx,
+ const struct dccg_registers *regs,
+ const struct dccg_shift *dccg_shift,
+ const struct dccg_mask *dccg_mask);
+
+void dccg35_init(struct dccg *dccg);
+
+void dccg35_enable_global_fgcg_rep(struct dccg *dccg, bool value);
+void dccg35_root_gate_disable_control(struct dccg *dccg, uint32_t pipe_idx, uint32_t disable_clock_gating);
+
+
+#endif //__DCN35_DCCG_H__
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c
new file mode 100644
index 000000000000..663a18ee5162
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c
@@ -0,0 +1,915 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "reg_helper.h"
+#include "core_types.h"
+#include "dcn401_dccg.h"
+#include "dcn31/dcn31_dccg.h"
+
+/*
+#include "dmub_common.h"
+#include "dmcub_reg_access_helper.h"
+
+#include "dmub401_common.h"
+#include "dmub401_regs.h"
+#include "dmub401_dccg.h"
+*/
+
+#define TO_DCN_DCCG(dccg)\
+ container_of(dccg, struct dcn_dccg, base)
+
+#define REG(reg) \
+ (dccg_dcn->regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+ dccg_dcn->dccg_shift->field_name, dccg_dcn->dccg_mask->field_name
+
+#define CTX \
+ dccg_dcn->base.ctx
+#define DC_LOGGER \
+ dccg->ctx->logger
+
+static void dcn401_set_dppclk_enable(struct dccg *dccg,
+ uint32_t dpp_inst, uint32_t enable)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (dpp_inst) {
+ case 0:
+ REG_UPDATE(DPPCLK_CTRL, DPPCLK0_EN, enable);
+ break;
+ case 1:
+ REG_UPDATE(DPPCLK_CTRL, DPPCLK1_EN, enable);
+ break;
+ case 2:
+ REG_UPDATE(DPPCLK_CTRL, DPPCLK2_EN, enable);
+ break;
+ case 3:
+ REG_UPDATE(DPPCLK_CTRL, DPPCLK3_EN, enable);
+ break;
+ default:
+ break;
+ }
+}
+void dccg401_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (dccg->ref_dppclk && req_dppclk) {
+ int ref_dppclk = dccg->ref_dppclk;
+ int modulo, phase;
+
+ // phase / modulo = dpp pipe clk / dpp global clk
+ modulo = 0xff; // use FF at the end
+ phase = ((modulo * req_dppclk) + ref_dppclk - 1) / ref_dppclk;
+
+ if (phase > 0xff) {
+ ASSERT(false);
+ phase = 0xff;
+ }
+
+ REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0,
+ DPPCLK0_DTO_PHASE, phase,
+ DPPCLK0_DTO_MODULO, modulo);
+ dcn401_set_dppclk_enable(dccg, dpp_inst, true);
+ } else {
+ dcn401_set_dppclk_enable(dccg, dpp_inst, false);
+ }
+
+ dccg->pipe_dppclk_khz[dpp_inst] = req_dppclk;
+}
+
+/* This function is a workaround for writing to OTG_PIXEL_RATE_DIV
+ * without the probability of causing a DIG FIFO error.
+ */
+static void dccg401_wait_for_dentist_change_done(
+ struct dccg *dccg)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ uint32_t dentist_dispclk_value = REG_READ(DENTIST_DISPCLK_CNTL);
+
+ REG_WRITE(DENTIST_DISPCLK_CNTL, dentist_dispclk_value);
+ REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 50, 2000);
+}
+
+void dccg401_get_pixel_rate_div(
+ struct dccg *dccg,
+ uint32_t otg_inst,
+ uint32_t *tmds_div,
+ uint32_t *dp_dto_int)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ uint32_t val_tmds_div = PIXEL_RATE_DIV_NA;
+
+ switch (otg_inst) {
+ case 0:
+ REG_GET_2(OTG_PIXEL_RATE_DIV,
+ OTG0_TMDS_PIXEL_RATE_DIV, &val_tmds_div,
+ DPDTO0_INT, dp_dto_int);
+ break;
+ case 1:
+ REG_GET_2(OTG_PIXEL_RATE_DIV,
+ OTG1_TMDS_PIXEL_RATE_DIV, &val_tmds_div,
+ DPDTO1_INT, dp_dto_int);
+ break;
+ case 2:
+ REG_GET_2(OTG_PIXEL_RATE_DIV,
+ OTG2_TMDS_PIXEL_RATE_DIV, &val_tmds_div,
+ DPDTO2_INT, dp_dto_int);
+ break;
+ case 3:
+ REG_GET_2(OTG_PIXEL_RATE_DIV,
+ OTG3_TMDS_PIXEL_RATE_DIV, &val_tmds_div,
+ DPDTO3_INT, dp_dto_int);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+ *tmds_div = val_tmds_div == 0 ? PIXEL_RATE_DIV_BY_2 : PIXEL_RATE_DIV_BY_4;
+}
+
+void dccg401_set_pixel_rate_div(
+ struct dccg *dccg,
+ uint32_t otg_inst,
+ enum pixel_rate_div tmds_div,
+ enum pixel_rate_div unused)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ uint32_t cur_tmds_div = PIXEL_RATE_DIV_NA;
+ uint32_t dp_dto_int;
+ uint32_t reg_val;
+
+ // only 2 and 4 are valid on dcn401
+ if (tmds_div != PIXEL_RATE_DIV_BY_2 && tmds_div != PIXEL_RATE_DIV_BY_4) {
+ return;
+ }
+
+ dccg401_get_pixel_rate_div(dccg, otg_inst, &cur_tmds_div, &dp_dto_int);
+ if (tmds_div == cur_tmds_div)
+ return;
+
+ // encode enum to register value
+ reg_val = tmds_div == PIXEL_RATE_DIV_BY_4 ? 1 : 0;
+
+ switch (otg_inst) {
+ case 0:
+ REG_UPDATE(OTG_PIXEL_RATE_DIV,
+ OTG0_TMDS_PIXEL_RATE_DIV, reg_val);
+
+ dccg401_wait_for_dentist_change_done(dccg);
+ break;
+ case 1:
+ REG_UPDATE(OTG_PIXEL_RATE_DIV,
+ OTG1_TMDS_PIXEL_RATE_DIV, reg_val);
+
+ dccg401_wait_for_dentist_change_done(dccg);
+ break;
+ case 2:
+ REG_UPDATE(OTG_PIXEL_RATE_DIV,
+ OTG2_TMDS_PIXEL_RATE_DIV, reg_val);
+
+ dccg401_wait_for_dentist_change_done(dccg);
+ break;
+ case 3:
+ REG_UPDATE(OTG_PIXEL_RATE_DIV,
+ OTG3_TMDS_PIXEL_RATE_DIV, reg_val);
+
+ dccg401_wait_for_dentist_change_done(dccg);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+
+void dccg401_set_dtbclk_p_src(
+ struct dccg *dccg,
+ enum streamclk_source src,
+ uint32_t otg_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ uint32_t p_src_sel = 0; /* selects dprefclk */
+ if (src == DTBCLK0)
+ p_src_sel = 2; /* selects dtbclk0 */
+
+ switch (otg_inst) {
+ case 0:
+ if (src == REFCLK)
+ REG_UPDATE(DTBCLK_P_CNTL,
+ DTBCLK_P0_EN, 0);
+ else
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P0_SRC_SEL, p_src_sel,
+ DTBCLK_P0_EN, 1);
+ break;
+ case 1:
+ if (src == REFCLK)
+ REG_UPDATE(DTBCLK_P_CNTL,
+ DTBCLK_P1_EN, 0);
+ else
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P1_SRC_SEL, p_src_sel,
+ DTBCLK_P1_EN, 1);
+ break;
+ case 2:
+ if (src == REFCLK)
+ REG_UPDATE(DTBCLK_P_CNTL,
+ DTBCLK_P2_EN, 0);
+ else
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P2_SRC_SEL, p_src_sel,
+ DTBCLK_P2_EN, 1);
+ break;
+ case 3:
+ if (src == REFCLK)
+ REG_UPDATE(DTBCLK_P_CNTL,
+ DTBCLK_P3_EN, 0);
+ else
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P3_SRC_SEL, p_src_sel,
+ DTBCLK_P3_EN, 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+}
+
+void dccg401_set_physymclk(
+ struct dccg *dccg,
+ int phy_inst,
+ enum physymclk_clock_source clk_src,
+ bool force_enable)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ /* Force PHYSYMCLK on and Select phyd32clk as the source of clock which is output to PHY through DCIO */
+ switch (phy_inst) {
+ case 0:
+ if (force_enable) {
+ REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL,
+ PHYASYMCLK_EN, 1,
+ PHYASYMCLK_SRC_SEL, clk_src);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYASYMCLK_ROOT_GATE_DISABLE, 1);
+ } else {
+ REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL,
+ PHYASYMCLK_EN, 0,
+ PHYASYMCLK_SRC_SEL, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYASYMCLK_ROOT_GATE_DISABLE, 0);
+ }
+ break;
+ case 1:
+ if (force_enable) {
+ REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL,
+ PHYBSYMCLK_EN, 1,
+ PHYBSYMCLK_SRC_SEL, clk_src);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYBSYMCLK_ROOT_GATE_DISABLE, 1);
+ } else {
+ REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL,
+ PHYBSYMCLK_EN, 0,
+ PHYBSYMCLK_SRC_SEL, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYBSYMCLK_ROOT_GATE_DISABLE, 0);
+ }
+ break;
+ case 2:
+ if (force_enable) {
+ REG_UPDATE_2(PHYCSYMCLK_CLOCK_CNTL,
+ PHYCSYMCLK_EN, 1,
+ PHYCSYMCLK_SRC_SEL, clk_src);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYCSYMCLK_ROOT_GATE_DISABLE, 1);
+ } else {
+ REG_UPDATE_2(PHYCSYMCLK_CLOCK_CNTL,
+ PHYCSYMCLK_EN, 0,
+ PHYCSYMCLK_SRC_SEL, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYCSYMCLK_ROOT_GATE_DISABLE, 0);
+ }
+ break;
+ case 3:
+ if (force_enable) {
+ REG_UPDATE_2(PHYDSYMCLK_CLOCK_CNTL,
+ PHYDSYMCLK_EN, 1,
+ PHYDSYMCLK_SRC_SEL, clk_src);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYDSYMCLK_ROOT_GATE_DISABLE, 1);
+ } else {
+ REG_UPDATE_2(PHYDSYMCLK_CLOCK_CNTL,
+ PHYDSYMCLK_EN, 0,
+ PHYDSYMCLK_SRC_SEL, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYDSYMCLK_ROOT_GATE_DISABLE, 0);
+ }
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+void dccg401_get_dccg_ref_freq(struct dccg *dccg,
+ unsigned int xtalin_freq_inKhz,
+ unsigned int *dccg_ref_freq_inKhz)
+{
+ /*
+ * Assume refclk is sourced from xtalin
+ * expect 100MHz
+ */
+ *dccg_ref_freq_inKhz = xtalin_freq_inKhz;
+ return;
+}
+
+static void dccg401_otg_add_pixel(struct dccg *dccg,
+ uint32_t otg_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ REG_UPDATE(OTG_PIXEL_RATE_CNTL[otg_inst],
+ OTG_ADD_PIXEL[otg_inst], 1);
+}
+
+static void dccg401_otg_drop_pixel(struct dccg *dccg,
+ uint32_t otg_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ REG_UPDATE(OTG_PIXEL_RATE_CNTL[otg_inst],
+ OTG_DROP_PIXEL[otg_inst], 1);
+}
+
+void dccg401_enable_symclk32_le(
+ struct dccg *dccg,
+ int hpo_le_inst,
+ enum phyd32clk_clock_source phyd32clk)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ /* select one of the PHYD32CLKs as the source for symclk32_le */
+ switch (hpo_le_inst) {
+ case 0:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_LE0_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_LE0_GATE_DISABLE, 1);
+ REG_UPDATE_2(SYMCLK32_LE_CNTL,
+ SYMCLK32_LE0_SRC_SEL, phyd32clk,
+ SYMCLK32_LE0_EN, 1);
+ break;
+ case 1:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_LE1_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_LE1_GATE_DISABLE, 1);
+ REG_UPDATE_2(SYMCLK32_LE_CNTL,
+ SYMCLK32_LE1_SRC_SEL, phyd32clk,
+ SYMCLK32_LE1_EN, 1);
+ break;
+ case 2:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_LE2_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_LE2_GATE_DISABLE, 1);
+ REG_UPDATE_2(SYMCLK32_LE_CNTL,
+ SYMCLK32_LE2_SRC_SEL, phyd32clk,
+ SYMCLK32_LE2_EN, 1);
+ break;
+ case 3:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_LE3_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_LE3_GATE_DISABLE, 1);
+ REG_UPDATE_2(SYMCLK32_LE_CNTL,
+ SYMCLK32_LE3_SRC_SEL, phyd32clk,
+ SYMCLK32_LE3_EN, 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+void dccg401_disable_symclk32_le(
+ struct dccg *dccg,
+ int hpo_le_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ /* set refclk as the source for symclk32_le */
+ switch (hpo_le_inst) {
+ case 0:
+ REG_UPDATE_2(SYMCLK32_LE_CNTL,
+ SYMCLK32_LE0_SRC_SEL, 0,
+ SYMCLK32_LE0_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_LE0_GATE_DISABLE, 0,
+ SYMCLK32_ROOT_LE0_GATE_DISABLE, 0);
+ break;
+ case 1:
+ REG_UPDATE_2(SYMCLK32_LE_CNTL,
+ SYMCLK32_LE1_SRC_SEL, 0,
+ SYMCLK32_LE1_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_LE1_GATE_DISABLE, 0,
+ SYMCLK32_ROOT_LE1_GATE_DISABLE, 0);
+ break;
+ case 2:
+ REG_UPDATE_2(SYMCLK32_LE_CNTL,
+ SYMCLK32_LE2_SRC_SEL, 0,
+ SYMCLK32_LE2_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_LE2_GATE_DISABLE, 0,
+ SYMCLK32_ROOT_LE2_GATE_DISABLE, 0);
+ break;
+ case 3:
+ REG_UPDATE_2(SYMCLK32_LE_CNTL,
+ SYMCLK32_LE3_SRC_SEL, 0,
+ SYMCLK32_LE3_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_LE3_GATE_DISABLE, 0,
+ SYMCLK32_ROOT_LE3_GATE_DISABLE, 0);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg401_enable_dpstreamclk(struct dccg *dccg, int otg_inst, int dp_hpo_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ /* enabled to select one of the DTBCLKs for pipe */
+ switch (dp_hpo_inst) {
+ case 0:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5,
+ DPSTREAMCLK0_ROOT_GATE_DISABLE, 1,
+ DPSTREAMCLK0_GATE_DISABLE, 1);
+ REG_UPDATE_2(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK0_SRC_SEL, otg_inst,
+ DPSTREAMCLK0_EN, 1);
+ break;
+ case 1:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5,
+ DPSTREAMCLK1_ROOT_GATE_DISABLE, 1,
+ DPSTREAMCLK1_GATE_DISABLE, 1);
+ REG_UPDATE_2(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK1_SRC_SEL, otg_inst,
+ DPSTREAMCLK1_EN, 1);
+ break;
+ case 2:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5,
+ DPSTREAMCLK2_ROOT_GATE_DISABLE, 1,
+ DPSTREAMCLK2_GATE_DISABLE, 1);
+ REG_UPDATE_2(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK2_SRC_SEL, otg_inst,
+ DPSTREAMCLK2_EN, 1);
+ break;
+ case 3:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5,
+ DPSTREAMCLK3_ROOT_GATE_DISABLE, 1,
+ DPSTREAMCLK3_GATE_DISABLE, 1);
+ REG_UPDATE_2(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK3_SRC_SEL, otg_inst,
+ DPSTREAMCLK3_EN, 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ DPSTREAMCLK_GATE_DISABLE, 1,
+ DPSTREAMCLK_ROOT_GATE_DISABLE, 1);
+}
+
+void dccg401_disable_dpstreamclk(struct dccg *dccg, int dp_hpo_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (dp_hpo_inst) {
+ case 0:
+ REG_UPDATE(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK0_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5,
+ DPSTREAMCLK0_ROOT_GATE_DISABLE, 0,
+ DPSTREAMCLK0_GATE_DISABLE, 0);
+ break;
+ case 1:
+ REG_UPDATE(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK1_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5,
+ DPSTREAMCLK1_ROOT_GATE_DISABLE, 0,
+ DPSTREAMCLK1_GATE_DISABLE, 0);
+ break;
+ case 2:
+ REG_UPDATE(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK2_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5,
+ DPSTREAMCLK2_ROOT_GATE_DISABLE, 0,
+ DPSTREAMCLK2_GATE_DISABLE, 0);
+ break;
+ case 3:
+ REG_UPDATE(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK3_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5,
+ DPSTREAMCLK3_ROOT_GATE_DISABLE, 0,
+ DPSTREAMCLK3_GATE_DISABLE, 0);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+void dccg401_set_dpstreamclk(
+ struct dccg *dccg,
+ enum streamclk_source src,
+ int otg_inst,
+ int dp_hpo_inst)
+{
+ /* enabled to select one of the DTBCLKs for pipe */
+ if (src == REFCLK)
+ dccg401_disable_dpstreamclk(dccg, dp_hpo_inst);
+ else
+ dccg401_enable_dpstreamclk(dccg, otg_inst, dp_hpo_inst);
+}
+
+void dccg401_set_dp_dto(
+ struct dccg *dccg,
+ const struct dp_dto_params *params)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ bool enable = false;
+
+ if (params->otg_inst > 3) {
+ /* dcn401 only has 4 instances */
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+ if (!params->refclk_hz) {
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+ if (!dc_is_tmds_signal(params->signal)) {
+ uint64_t dto_integer;
+ uint64_t dto_phase_hz;
+ uint64_t dto_modulo_hz = params->refclk_hz;
+
+ enable = true;
+
+ /* Set DTO values:
+ * int = target_pix_rate / reference_clock
+ * phase = target_pix_rate - int * reference_clock,
+ * modulo = reference_clock */
+ dto_integer = div_u64(params->pixclk_hz, dto_modulo_hz);
+ dto_phase_hz = params->pixclk_hz - dto_integer * dto_modulo_hz;
+
+ if (dto_phase_hz <= 0 && dto_integer <= 0) {
+ /* negative pixel rate should never happen */
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+ switch (params->otg_inst) {
+ case 0:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, 1);
+ REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE0_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_SE0_GATE_DISABLE, 1,
+ SYMCLK32_LE0_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_LE0_GATE_DISABLE, 1);
+ break;
+ case 1:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, 1);
+ REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE1_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_SE1_GATE_DISABLE, 1,
+ SYMCLK32_LE1_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_LE1_GATE_DISABLE, 1);
+ break;
+ case 2:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, 1);
+ REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE2_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_SE2_GATE_DISABLE, 1,
+ SYMCLK32_LE2_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_LE2_GATE_DISABLE, 1);
+ break;
+ case 3:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, 1);
+ REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE3_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_SE3_GATE_DISABLE, 1,
+ SYMCLK32_LE3_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_LE3_GATE_DISABLE, 1);
+ break;
+ }
+
+ dccg401_set_dtbclk_p_src(dccg, params->clk_src, params->otg_inst);
+
+ REG_WRITE(DP_DTO_PHASE[params->otg_inst], dto_phase_hz);
+ REG_WRITE(DP_DTO_MODULO[params->otg_inst], dto_modulo_hz);
+
+ switch (params->otg_inst) {
+ case 0:
+ REG_UPDATE(OTG_PIXEL_RATE_DIV,
+ DPDTO0_INT, dto_integer);
+ break;
+ case 1:
+ REG_UPDATE(OTG_PIXEL_RATE_DIV,
+ DPDTO1_INT, dto_integer);
+ break;
+ case 2:
+ REG_UPDATE(OTG_PIXEL_RATE_DIV,
+ DPDTO2_INT, dto_integer);
+ break;
+ case 3:
+ REG_UPDATE(OTG_PIXEL_RATE_DIV,
+ DPDTO3_INT, dto_integer);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+ }
+
+ /* Toggle DTO */
+ REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DP_DTO_ENABLE[params->otg_inst], enable,
+ PIPE_DTO_SRC_SEL[params->otg_inst], enable);
+}
+
+void dccg401_init(struct dccg *dccg)
+{
+ /* Set HPO stream encoder to use refclk to avoid case where PHY is
+ * disabled and SYMCLK32 for HPO SE is sourced from PHYD32CLK which
+ * will cause DCN to hang.
+ */
+ dccg31_disable_symclk32_se(dccg, 0);
+ dccg31_disable_symclk32_se(dccg, 1);
+ dccg31_disable_symclk32_se(dccg, 2);
+ dccg31_disable_symclk32_se(dccg, 3);
+
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le) {
+ dccg401_disable_symclk32_le(dccg, 0);
+ dccg401_disable_symclk32_le(dccg, 1);
+ dccg401_disable_symclk32_le(dccg, 2);
+ dccg401_disable_symclk32_le(dccg, 3);
+ }
+
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) {
+ dccg401_disable_dpstreamclk(dccg, 0);
+ dccg401_disable_dpstreamclk(dccg, 1);
+ dccg401_disable_dpstreamclk(dccg, 2);
+ dccg401_disable_dpstreamclk(dccg, 3);
+ }
+
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) {
+ dccg401_set_physymclk(dccg, 0, PHYSYMCLK_FORCE_SRC_SYMCLK, false);
+ dccg401_set_physymclk(dccg, 1, PHYSYMCLK_FORCE_SRC_SYMCLK, false);
+ dccg401_set_physymclk(dccg, 2, PHYSYMCLK_FORCE_SRC_SYMCLK, false);
+ dccg401_set_physymclk(dccg, 3, PHYSYMCLK_FORCE_SRC_SYMCLK, false);
+ }
+}
+
+void dccg401_set_dto_dscclk(struct dccg *dccg, uint32_t inst, uint32_t num_slices_h)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE_2(DSCCLK0_DTO_PARAM,
+ DSCCLK0_DTO_PHASE, 1,
+ DSCCLK0_DTO_MODULO, 1);
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, 1);
+
+ break;
+ case 1:
+ REG_UPDATE_2(DSCCLK1_DTO_PARAM,
+ DSCCLK1_DTO_PHASE, 1,
+ DSCCLK1_DTO_MODULO, 1);
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, 1);
+ break;
+ case 2:
+ REG_UPDATE_2(DSCCLK2_DTO_PARAM,
+ DSCCLK2_DTO_PHASE, 1,
+ DSCCLK2_DTO_MODULO, 1);
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, 1);
+ break;
+ case 3:
+ REG_UPDATE_2(DSCCLK3_DTO_PARAM,
+ DSCCLK3_DTO_PHASE, 1,
+ DSCCLK3_DTO_MODULO, 1);
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK3_EN, 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+void dccg401_set_ref_dscclk(struct dccg *dccg,
+ uint32_t dsc_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (dsc_inst) {
+ case 0:
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, 0);
+ REG_UPDATE_2(DSCCLK0_DTO_PARAM,
+ DSCCLK0_DTO_PHASE, 0,
+ DSCCLK0_DTO_MODULO, 0);
+ break;
+ case 1:
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, 0);
+ REG_UPDATE_2(DSCCLK1_DTO_PARAM,
+ DSCCLK1_DTO_PHASE, 0,
+ DSCCLK1_DTO_MODULO, 0);
+ break;
+ case 2:
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, 0);
+ REG_UPDATE_2(DSCCLK2_DTO_PARAM,
+ DSCCLK2_DTO_PHASE, 0,
+ DSCCLK2_DTO_MODULO, 0);
+ break;
+ case 3:
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK3_EN, 0);
+ REG_UPDATE_2(DSCCLK3_DTO_PARAM,
+ DSCCLK3_DTO_PHASE, 0,
+ DSCCLK3_DTO_MODULO, 0);
+ break;
+ default:
+ return;
+ }
+}
+
+void dccg401_enable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (stream_enc_inst) {
+ case 0:
+ REG_UPDATE_2(SYMCLKA_CLOCK_ENABLE,
+ SYMCLKA_FE_EN, 1,
+ SYMCLKA_FE_SRC_SEL, link_enc_inst);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_FE_ROOT_GATE_DISABLE, 1);
+ break;
+ case 1:
+ REG_UPDATE_2(SYMCLKB_CLOCK_ENABLE,
+ SYMCLKB_FE_EN, 1,
+ SYMCLKB_FE_SRC_SEL, link_enc_inst);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_FE_ROOT_GATE_DISABLE, 1);
+ break;
+ case 2:
+ REG_UPDATE_2(SYMCLKC_CLOCK_ENABLE,
+ SYMCLKC_FE_EN, 1,
+ SYMCLKC_FE_SRC_SEL, link_enc_inst);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_FE_ROOT_GATE_DISABLE, 1);
+ break;
+ case 3:
+ REG_UPDATE_2(SYMCLKD_CLOCK_ENABLE,
+ SYMCLKD_FE_EN, 1,
+ SYMCLKD_FE_SRC_SEL, link_enc_inst);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_FE_ROOT_GATE_DISABLE, 1);
+ break;
+ }
+}
+
+void dccg401_disable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (stream_enc_inst) {
+ case 0:
+ REG_UPDATE_2(SYMCLKA_CLOCK_ENABLE,
+ SYMCLKA_FE_EN, 0,
+ SYMCLKA_FE_SRC_SEL, 0);
+ break;
+ case 1:
+ REG_UPDATE_2(SYMCLKB_CLOCK_ENABLE,
+ SYMCLKB_FE_EN, 0,
+ SYMCLKB_FE_SRC_SEL, 0);
+ break;
+ case 2:
+ REG_UPDATE_2(SYMCLKC_CLOCK_ENABLE,
+ SYMCLKC_FE_EN, 0,
+ SYMCLKC_FE_SRC_SEL, 0);
+ break;
+ case 3:
+ REG_UPDATE_2(SYMCLKD_CLOCK_ENABLE,
+ SYMCLKD_FE_EN, 0,
+ SYMCLKD_FE_SRC_SEL, 0);
+ break;
+ }
+}
+
+static const struct dccg_funcs dccg401_funcs = {
+ .update_dpp_dto = dccg401_update_dpp_dto,
+ .get_dccg_ref_freq = dccg401_get_dccg_ref_freq,
+ .dccg_init = dccg401_init,
+ .set_dpstreamclk = dccg401_set_dpstreamclk,
+ .enable_symclk32_se = dccg31_enable_symclk32_se,
+ .disable_symclk32_se = dccg31_disable_symclk32_se,
+ .enable_symclk32_le = dccg401_enable_symclk32_le,
+ .disable_symclk32_le = dccg401_disable_symclk32_le,
+ .set_physymclk = dccg401_set_physymclk,
+ .set_dtbclk_dto = NULL,
+ .set_dto_dscclk = dccg401_set_dto_dscclk,
+ .set_ref_dscclk = dccg401_set_ref_dscclk,
+ .set_valid_pixel_rate = NULL,
+ .set_fifo_errdet_ovr_en = dccg2_set_fifo_errdet_ovr_en,
+ .set_audio_dtbclk_dto = NULL,
+ .otg_add_pixel = dccg401_otg_add_pixel,
+ .otg_drop_pixel = dccg401_otg_drop_pixel,
+ .set_pixel_rate_div = dccg401_set_pixel_rate_div,
+ .get_pixel_rate_div = dccg401_get_pixel_rate_div,
+ .set_dp_dto = dccg401_set_dp_dto,
+ .enable_symclk_se = dccg401_enable_symclk_se,
+ .disable_symclk_se = dccg401_disable_symclk_se,
+ .set_dtbclk_p_src = dccg401_set_dtbclk_p_src,
+ .dccg_read_reg_state = dccg31_read_reg_state
+};
+
+struct dccg *dccg401_create(
+ struct dc_context *ctx,
+ const struct dccg_registers *regs,
+ const struct dccg_shift *dccg_shift,
+ const struct dccg_mask *dccg_mask)
+{
+ struct dcn_dccg *dccg_dcn = kzalloc(sizeof(*dccg_dcn), GFP_KERNEL);
+ struct dccg *base;
+
+ if (dccg_dcn == NULL) {
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+
+ base = &dccg_dcn->base;
+ base->ctx = ctx;
+ base->funcs = &dccg401_funcs;
+
+ dccg_dcn->regs = regs;
+ dccg_dcn->dccg_shift = dccg_shift;
+ dccg_dcn->dccg_mask = dccg_mask;
+
+ return &dccg_dcn->base;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h
new file mode 100644
index 000000000000..5947a35363aa
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h
@@ -0,0 +1,249 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN401_DCCG_H__
+#define __DCN401_DCCG_H__
+
+#include "dcn32/dcn32_dccg.h"
+
+#define DCCG_SFII(block, reg_name, field_prefix, field_name, inst, post_fix)\
+ .field_prefix ## _ ## field_name[inst] = block ## inst ## _ ## reg_name ## __ ## field_prefix ## inst ## _ ## field_name ## post_fix
+
+#define DCCG_MASK_SH_LIST_DCN401(mask_sh) \
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 0, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 1, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 2, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 3, mask_sh),\
+ DCCG_SF(DPPCLK_CTRL, DPPCLK0_EN, mask_sh),\
+ DCCG_SF(DPPCLK_CTRL, DPPCLK1_EN, mask_sh),\
+ DCCG_SF(DPPCLK_CTRL, DPPCLK2_EN, mask_sh),\
+ DCCG_SF(DPPCLK_CTRL, DPPCLK3_EN, mask_sh),\
+ DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_PHASE, mask_sh),\
+ DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_MODULO, mask_sh),\
+ DCCG_SF(HDMICHARCLK0_CLOCK_CNTL, HDMICHARCLK0_EN, mask_sh),\
+ DCCG_SF(HDMICHARCLK0_CLOCK_CNTL, HDMICHARCLK0_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_EN, mask_sh),\
+ DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_EN, mask_sh),\
+ DCCG_SF(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_EN, mask_sh),\
+ DCCG_SF(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_EN, mask_sh),\
+ DCCG_SF(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK0_EN, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK1_EN, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK2_EN, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK3_EN, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK0_SRC_SEL, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK1_SRC_SEL, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK2_SRC_SEL, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK3_SRC_SEL, mask_sh),\
+ DCCG_SF(HDMISTREAMCLK_CNTL, HDMISTREAMCLK0_EN, mask_sh),\
+ DCCG_SF(HDMISTREAMCLK_CNTL, HDMISTREAMCLK0_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE0_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE1_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE2_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE3_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE0_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE1_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE2_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE3_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE0_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE1_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE0_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE1_EN, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 3, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 3, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG0_TMDS_PIXEL_RATE_DIV, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, DPDTO0_INT, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG1_TMDS_PIXEL_RATE_DIV, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, DPDTO1_INT, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG2_TMDS_PIXEL_RATE_DIV, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, DPDTO2_INT, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG3_TMDS_PIXEL_RATE_DIV, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, DPDTO3_INT, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P0_SRC_SEL, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P0_EN, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P1_SRC_SEL, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P1_EN, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P2_SRC_SEL, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P2_EN, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P3_SRC_SEL, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P3_EN, mask_sh),\
+ DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO_SEL, mask_sh),\
+ DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL, mask_sh),\
+ DCCG_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYBSYMCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYCSYMCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYDSYMCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DP_DTO, ENABLE, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DP_DTO, ENABLE, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DP_DTO, ENABLE, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DP_DTO, ENABLE, 3, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 3, mask_sh),\
+ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK0_EN, mask_sh),\
+ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK1_EN, mask_sh),\
+ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK2_EN, mask_sh),\
+ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK3_EN, mask_sh),\
+ DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_PHASE, mask_sh),\
+ DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_MODULO, mask_sh),\
+ DCCG_SF(DSCCLK1_DTO_PARAM, DSCCLK1_DTO_PHASE, mask_sh),\
+ DCCG_SF(DSCCLK1_DTO_PARAM, DSCCLK1_DTO_MODULO, mask_sh),\
+ DCCG_SF(DSCCLK2_DTO_PARAM, DSCCLK2_DTO_PHASE, mask_sh),\
+ DCCG_SF(DSCCLK2_DTO_PARAM, DSCCLK2_DTO_MODULO, mask_sh),\
+ DCCG_SF(DSCCLK3_DTO_PARAM, DSCCLK3_DTO_PHASE, mask_sh),\
+ DCCG_SF(DSCCLK3_DTO_PARAM, DSCCLK3_DTO_MODULO, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, HDMICHARCLK0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, HDMISTREAMCLK0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE2_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE3_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE2_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE3_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE2_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE3_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE2_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE3_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL4, HDMICHARCLK0_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYA_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYB_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYC_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYD_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_FE_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_FE_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_FE_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_FE_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, HDMISTREAMCLK0_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(SYMCLKA_CLOCK_ENABLE, SYMCLKA_CLOCK_ENABLE, mask_sh),\
+ DCCG_SF(SYMCLKB_CLOCK_ENABLE, SYMCLKB_CLOCK_ENABLE, mask_sh),\
+ DCCG_SF(SYMCLKC_CLOCK_ENABLE, SYMCLKC_CLOCK_ENABLE, mask_sh),\
+ DCCG_SF(SYMCLKD_CLOCK_ENABLE, SYMCLKD_CLOCK_ENABLE, mask_sh),\
+ DCCG_SF(SYMCLKA_CLOCK_ENABLE, SYMCLKA_FE_EN, mask_sh),\
+ DCCG_SF(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_EN, mask_sh),\
+ DCCG_SF(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_EN, mask_sh),\
+ DCCG_SF(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_EN, mask_sh),\
+ DCCG_SF(SYMCLKA_CLOCK_ENABLE, SYMCLKA_FE_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_SRC_SEL, mask_sh),\
+
+void dccg401_init(struct dccg *dccg);
+
+void dccg401_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk);
+void dccg401_get_dccg_ref_freq(struct dccg *dccg,
+ unsigned int xtalin_freq_inKhz,
+ unsigned int *dccg_ref_freq_inKhz);
+void dccg401_set_dpstreamclk(
+ struct dccg *dccg,
+ enum streamclk_source src,
+ int otg_inst,
+ int dp_hpo_inst);
+void dccg401_enable_symclk32_le(
+ struct dccg *dccg,
+ int hpo_le_inst,
+ enum phyd32clk_clock_source phyd32clk);
+void dccg401_disable_symclk32_le(
+ struct dccg *dccg,
+ int hpo_le_inst);
+void dccg401_disable_dpstreamclk(struct dccg *dccg, int dp_hpo_inst);
+void dccg401_set_dto_dscclk(struct dccg *dccg, uint32_t inst, uint32_t num_slices_h);
+void dccg401_set_ref_dscclk(struct dccg *dccg,
+ uint32_t dsc_inst);
+void dccg401_set_src_sel(
+ struct dccg *dccg,
+ const struct dtbclk_dto_params *params);
+void dccg401_set_pixel_rate_div(
+ struct dccg *dccg,
+ uint32_t otg_inst,
+ enum pixel_rate_div tmds_div,
+ enum pixel_rate_div unused);
+void dccg401_get_pixel_rate_div(
+ struct dccg *dccg,
+ uint32_t otg_inst,
+ uint32_t *tmds_div,
+ uint32_t *dp_dto_int);
+void dccg401_set_dp_dto(
+ struct dccg *dccg,
+ const struct dp_dto_params *params);
+void dccg401_enable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst);
+void dccg401_disable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst);
+void dccg401_set_dtbclk_p_src(
+ struct dccg *dccg,
+ enum streamclk_source src,
+ uint32_t otg_inst);
+struct dccg *dccg401_create(
+ struct dc_context *ctx,
+ const struct dccg_registers *regs,
+ const struct dccg_shift *dccg_shift,
+ const struct dccg_mask *dccg_mask);
+
+void dccg401_set_physymclk(
+ struct dccg *dccg,
+ int phy_inst,
+ enum physymclk_clock_source clk_src,
+ bool force_enable);
+
+#endif //__DCN401_DCCG_H__
diff --git a/drivers/gpu/drm/amd/display/dc/dce/Makefile b/drivers/gpu/drm/amd/display/dc/dce/Makefile
index 0d7db132a20f..986e0e7abbc2 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce/Makefile
@@ -26,11 +26,11 @@
# - register programming through common macros that look up register
# offset/shift/mask stored in dce_hw struct
-DCE = dce_audio.o dce_stream_encoder.o dce_link_encoder.o dce_hwseq.o \
+DCE = dce_audio.o dce_stream_encoder.o dce_link_encoder.o \
dce_mem_input.o dce_clock_source.o dce_scl_filters.o dce_transform.o \
dce_opp.o dce_dmcu.o dce_abm.o dce_ipp.o dce_aux.o \
-dce_i2c.o dce_i2c_hw.o dce_i2c_sw.o dmub_psr.o dmub_abm.o dce_panel_cntl.o \
-dmub_hw_lock_mgr.o dmub_outbox.o
+dce_i2c.o dce_i2c_hw.o dce_i2c_sw.o dmub_psr.o dmub_abm.o dmub_abm_lcd.o dce_panel_cntl.o \
+dmub_hw_lock_mgr.o dmub_outbox.o dmub_replay.o
AMD_DAL_DCE = $(addprefix $(AMDDALPATH)/dc/dce/,$(DCE))
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
index 874b132fe1d7..2dcf394edf22 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
@@ -135,7 +135,7 @@ static void dmcu_set_backlight_level(
0, 1, 80000);
}
-static void dce_abm_init(struct abm *abm, uint32_t backlight)
+static void dce_abm_init(struct abm *abm, uint32_t backlight, uint32_t user_level)
{
struct dce_abm *abm_dce = TO_DCE_ABM(abm);
@@ -162,7 +162,7 @@ static void dce_abm_init(struct abm *abm, uint32_t backlight)
BL1_PWM_TARGET_ABM_LEVEL, backlight);
REG_UPDATE(BL1_PWM_USER_LEVEL,
- BL1_PWM_USER_LEVEL, backlight);
+ BL1_PWM_USER_LEVEL, user_level);
REG_UPDATE_2(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES,
ABM1_LS_MIN_PIXEL_VALUE_THRES, 0,
@@ -283,7 +283,7 @@ struct abm *dce_abm_create(
const struct dce_abm_shift *abm_shift,
const struct dce_abm_mask *abm_mask)
{
- struct dce_abm *abm_dce = kzalloc(sizeof(*abm_dce), GFP_ATOMIC);
+ struct dce_abm *abm_dce = kzalloc(sizeof(*abm_dce), GFP_KERNEL);
if (abm_dce == NULL) {
BREAK_TO_DEBUGGER();
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h
index b699d1b2ba83..3d819fc5654c 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h
@@ -168,8 +168,7 @@
ABM_SF(DC_ABM1_HGLS_REG_READ_PROGRESS, \
ABM1_BL_REG_READ_MISSED_FRAME_CLEAR, mask_sh)
-#define ABM_MASK_SH_LIST_DCN10(mask_sh) \
- ABM_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(mask_sh), \
+#define ABM_MASK_SH_LIST_DCN10_COMMON(mask_sh) \
ABM_SF(ABM0_DC_ABM1_HG_MISC_CTRL, \
ABM1_HG_NUM_OF_BINS_SEL, mask_sh), \
ABM_SF(ABM0_DC_ABM1_HG_MISC_CTRL, \
@@ -199,9 +198,107 @@
ABM_SF(ABM0_DC_ABM1_HGLS_REG_READ_PROGRESS, \
ABM1_BL_REG_READ_MISSED_FRAME_CLEAR, mask_sh)
-#define ABM_MASK_SH_LIST_DCN20(mask_sh) ABM_MASK_SH_LIST_DCE110(mask_sh)
+#define ABM_MASK_SH_LIST_DCN10(mask_sh) \
+ ABM_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(mask_sh), \
+ ABM_MASK_SH_LIST_DCN10_COMMON(mask_sh)
+#define ABM_MASK_SH_LIST_DCN20(mask_sh) ABM_MASK_SH_LIST_DCE110(mask_sh)
#define ABM_MASK_SH_LIST_DCN30(mask_sh) ABM_MASK_SH_LIST_DCN10(mask_sh)
+#define ABM_MASK_SH_LIST_DCN35(mask_sh) ABM_MASK_SH_LIST_DCN10_COMMON(mask_sh)
+
+#define ABM_MASK_SH_LIST_DCN32(mask_sh) \
+ ABM_SF(ABM0_DC_ABM1_HG_MISC_CTRL, \
+ ABM1_HG_NUM_OF_BINS_SEL, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HG_MISC_CTRL, \
+ ABM1_HG_VMAX_SEL, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HG_MISC_CTRL, \
+ ABM1_HG_BIN_BITWIDTH_SIZE_SEL, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_IPCSC_COEFF_SEL, \
+ ABM1_IPCSC_COEFF_SEL_R, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_IPCSC_COEFF_SEL, \
+ ABM1_IPCSC_COEFF_SEL_G, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_IPCSC_COEFF_SEL, \
+ ABM1_IPCSC_COEFF_SEL_B, mask_sh), \
+ ABM_SF(ABM0_BL1_PWM_CURRENT_ABM_LEVEL, \
+ BL1_PWM_CURRENT_ABM_LEVEL, mask_sh), \
+ ABM_SF(ABM0_BL1_PWM_TARGET_ABM_LEVEL, \
+ BL1_PWM_TARGET_ABM_LEVEL, mask_sh), \
+ ABM_SF(ABM0_BL1_PWM_USER_LEVEL, \
+ BL1_PWM_USER_LEVEL, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES, \
+ ABM1_LS_MIN_PIXEL_VALUE_THRES, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES, \
+ ABM1_LS_MAX_PIXEL_VALUE_THRES, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HGLS_REG_READ_PROGRESS, \
+ ABM1_HG_REG_READ_MISSED_FRAME_CLEAR, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HGLS_REG_READ_PROGRESS, \
+ ABM1_LS_REG_READ_MISSED_FRAME_CLEAR, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HGLS_REG_READ_PROGRESS, \
+ ABM1_BL_REG_READ_MISSED_FRAME_CLEAR, mask_sh)
+
+#define ABM_MASK_SH_LIST_DCN401(mask_sh) \
+ ABM_SF(ABM0_DC_ABM1_HG_MISC_CTRL, \
+ ABM1_HG_NUM_OF_BINS_SEL, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HG_MISC_CTRL, \
+ ABM1_HG_VMAX_SEL, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HG_MISC_CTRL, \
+ ABM1_HG_BIN_BITWIDTH_SIZE_SEL, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_IPCSC_COEFF_SEL, \
+ ABM1_IPCSC_COEFF_SEL_R, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_IPCSC_COEFF_SEL, \
+ ABM1_IPCSC_COEFF_SEL_G, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_IPCSC_COEFF_SEL, \
+ ABM1_IPCSC_COEFF_SEL_B, mask_sh), \
+ ABM_SF(ABM0_BL1_PWM_CURRENT_ABM_LEVEL, \
+ BL1_PWM_CURRENT_ABM_LEVEL, mask_sh), \
+ ABM_SF(ABM0_BL1_PWM_TARGET_ABM_LEVEL, \
+ BL1_PWM_TARGET_ABM_LEVEL, mask_sh), \
+ ABM_SF(ABM0_BL1_PWM_USER_LEVEL, \
+ BL1_PWM_USER_LEVEL, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES, \
+ ABM1_LS_MIN_PIXEL_VALUE_THRES, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES, \
+ ABM1_LS_MAX_PIXEL_VALUE_THRES, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HGLS_REG_READ_PROGRESS, \
+ ABM1_HG_REG_READ_MISSED_FRAME_CLEAR, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HGLS_REG_READ_PROGRESS, \
+ ABM1_LS_REG_READ_MISSED_FRAME_CLEAR, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HGLS_REG_READ_PROGRESS, \
+ ABM1_BL_REG_READ_MISSED_FRAME_CLEAR, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_ACE_OFFSET_SLOPE_DATA, \
+ ABM1_ACE_SLOPE_DATA, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_ACE_OFFSET_SLOPE_DATA, \
+ ABM1_ACE_OFFSET_DATA, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_ACE_PWL_CNTL, \
+ ABM1_ACE_OFFSET_SLOPE_INDEX, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_ACE_PWL_CNTL, \
+ ABM1_ACE_THRES_INDEX, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_ACE_PWL_CNTL, \
+ ABM1_ACE_IGNORE_MASTER_LOCK_EN, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_ACE_PWL_CNTL, \
+ ABM1_ACE_READBACK_DB_REG_VALUE_EN, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_ACE_PWL_CNTL, \
+ ABM1_ACE_DBUF_REG_UPDATE_PENDING, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_ACE_PWL_CNTL, \
+ ABM1_ACE_LOCK, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_ACE_THRES_DATA, \
+ ABM1_ACE_THRES_DATA_1, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_ACE_THRES_DATA, \
+ ABM1_ACE_THRES_DATA_2, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HG_RESULT_DATA, \
+ ABM1_HG_RESULT_DATA, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HG_RESULT_INDEX, \
+ ABM1_HG_RESULT_INDEX, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HG_BIN_33_40_SHIFT_INDEX, \
+ ABM1_HG_BIN_33_40_SHIFT_INDEX, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HG_BIN_33_64_SHIFT_FLAG, \
+ ABM1_HG_BIN_33_64_SHIFT_FLAG, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HG_BIN_41_48_SHIFT_INDEX, \
+ ABM1_HG_BIN_41_48_SHIFT_INDEX, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HG_BIN_49_56_SHIFT_INDEX, \
+ ABM1_HG_BIN_49_56_SHIFT_INDEX, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HG_BIN_57_64_SHIFT_INDEX, \
+ ABM1_HG_BIN_57_64_SHIFT_INDEX, mask_sh)
#define ABM_REG_FIELD_LIST(type) \
type ABM1_HG_NUM_OF_BINS_SEL; \
@@ -221,7 +318,24 @@
type MASTER_COMM_INTERRUPT; \
type MASTER_COMM_CMD_REG_BYTE0; \
type MASTER_COMM_CMD_REG_BYTE1; \
- type MASTER_COMM_CMD_REG_BYTE2
+ type MASTER_COMM_CMD_REG_BYTE2; \
+ type ABM1_HG_BIN_33_40_SHIFT_INDEX; \
+ type ABM1_HG_BIN_33_64_SHIFT_FLAG; \
+ type ABM1_HG_BIN_41_48_SHIFT_INDEX; \
+ type ABM1_HG_BIN_49_56_SHIFT_INDEX; \
+ type ABM1_HG_BIN_57_64_SHIFT_INDEX; \
+ type ABM1_HG_RESULT_DATA; \
+ type ABM1_HG_RESULT_INDEX; \
+ type ABM1_ACE_SLOPE_DATA; \
+ type ABM1_ACE_OFFSET_DATA; \
+ type ABM1_ACE_OFFSET_SLOPE_INDEX; \
+ type ABM1_ACE_THRES_INDEX; \
+ type ABM1_ACE_IGNORE_MASTER_LOCK_EN; \
+ type ABM1_ACE_READBACK_DB_REG_VALUE_EN; \
+ type ABM1_ACE_DBUF_REG_UPDATE_PENDING; \
+ type ABM1_ACE_LOCK; \
+ type ABM1_ACE_THRES_DATA_1; \
+ type ABM1_ACE_THRES_DATA_2
struct dce_abm_shift {
ABM_REG_FIELD_LIST(uint8_t);
@@ -243,6 +357,16 @@ struct dce_abm_registers {
uint32_t DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES;
uint32_t DC_ABM1_HGLS_REG_READ_PROGRESS;
uint32_t DC_ABM1_ACE_OFFSET_SLOPE_0;
+ uint32_t DC_ABM1_ACE_OFFSET_SLOPE_DATA;
+ uint32_t DC_ABM1_ACE_PWL_CNTL;
+ uint32_t DC_ABM1_HG_BIN_33_40_SHIFT_INDEX;
+ uint32_t DC_ABM1_HG_BIN_33_64_SHIFT_FLAG;
+ uint32_t DC_ABM1_HG_BIN_41_48_SHIFT_INDEX;
+ uint32_t DC_ABM1_HG_BIN_49_56_SHIFT_INDEX;
+ uint32_t DC_ABM1_HG_BIN_57_64_SHIFT_INDEX;
+ uint32_t DC_ABM1_HG_RESULT_DATA;
+ uint32_t DC_ABM1_HG_RESULT_INDEX;
+ uint32_t DC_ABM1_ACE_THRES_DATA;
uint32_t DC_ABM1_ACE_THRES_12;
uint32_t MASTER_COMM_CNTL_REG;
uint32_t MASTER_COMM_CMD_REG;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
index 27218ede150a..fcad61c618a1 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
@@ -23,8 +23,6 @@
*
*/
-#include <linux/slab.h>
-
#include "reg_helper.h"
#include "dce_audio.h"
#include "dce/dce_11_0_d.h"
@@ -67,9 +65,6 @@ static void write_indirect_azalia_reg(struct audio *audio,
/* AZALIA_F0_CODEC_ENDPOINT_DATA endpoint data */
REG_SET(AZALIA_F0_CODEC_ENDPOINT_DATA, 0,
AZALIA_ENDPOINT_REG_DATA, reg_data);
-
- DC_LOG_HW_AUDIO("AUDIO:write_indirect_azalia_reg: index: %u data: %u\n",
- reg_index, reg_data);
}
static uint32_t read_indirect_azalia_reg(struct audio *audio, uint32_t reg_index)
@@ -85,9 +80,6 @@ static uint32_t read_indirect_azalia_reg(struct audio *audio, uint32_t reg_index
/* AZALIA_F0_CODEC_ENDPOINT_DATA endpoint data */
value = REG_READ(AZALIA_F0_CODEC_ENDPOINT_DATA);
- DC_LOG_HW_AUDIO("AUDIO:read_indirect_azalia_reg: index: %u data: %u\n",
- reg_index, value);
-
return value;
}
@@ -247,27 +239,295 @@ static void check_audio_bandwidth_hdmi(
}
}
}
+static struct fixed31_32 get_link_symbol_clk_freq_mhz(enum dc_link_rate link_rate)
+{
+ switch (link_rate) {
+ case LINK_RATE_LOW:
+ return dc_fixpt_from_int(162); /* 162 MHz */
+ case LINK_RATE_HIGH:
+ return dc_fixpt_from_int(270); /* 270 MHz */
+ case LINK_RATE_HIGH2:
+ return dc_fixpt_from_int(540); /* 540 MHz */
+ case LINK_RATE_HIGH3:
+ return dc_fixpt_from_int(810); /* 810 MHz */
+ case LINK_RATE_UHBR10:
+ return dc_fixpt_from_fraction(3125, 10); /* 312.5 MHz */
+ case LINK_RATE_UHBR13_5:
+ return dc_fixpt_from_fraction(421875, 1000); /* 421.875 MHz */
+ case LINK_RATE_UHBR20:
+ return dc_fixpt_from_int(625); /* 625 MHz */
+ default:
+ /* Unexpected case, this requires debug if encountered. */
+ ASSERT(0);
+ return dc_fixpt_from_int(0);
+ }
+}
+
+struct dp_audio_layout_config {
+ uint8_t layouts_per_sample_denom;
+ uint8_t symbols_per_layout;
+ uint8_t max_layouts_per_audio_sdp;
+};
+
+static void get_audio_layout_config(
+ uint32_t channel_count,
+ enum dp_link_encoding encoding,
+ struct dp_audio_layout_config *output)
+{
+ /* Assuming L-PCM audio. Current implementation uses max 1 layout per SDP,
+ * with each layout being the same size (8ch layout).
+ */
+ if (encoding == DP_8b_10b_ENCODING) {
+ if (channel_count == 2) {
+ output->layouts_per_sample_denom = 4;
+ output->symbols_per_layout = 40;
+ output->max_layouts_per_audio_sdp = 1;
+ } else if (channel_count == 8 || channel_count == 6) {
+ output->layouts_per_sample_denom = 1;
+ output->symbols_per_layout = 40;
+ output->max_layouts_per_audio_sdp = 1;
+ }
+ } else if (encoding == DP_128b_132b_ENCODING) {
+ if (channel_count == 2) {
+ output->layouts_per_sample_denom = 4;
+ output->symbols_per_layout = 10;
+ output->max_layouts_per_audio_sdp = 1;
+ } else if (channel_count == 8 || channel_count == 6) {
+ output->layouts_per_sample_denom = 1;
+ output->symbols_per_layout = 10;
+ output->max_layouts_per_audio_sdp = 1;
+ }
+ }
+}
-/*For DP SST, calculate if specified sample rates can fit into a given timing */
-static void check_audio_bandwidth_dpsst(
+static uint32_t get_av_stream_map_lane_count(
+ enum dp_link_encoding encoding,
+ enum dc_lane_count lane_count,
+ bool is_mst)
+{
+ uint32_t av_stream_map_lane_count = 0;
+
+ if (encoding == DP_8b_10b_ENCODING) {
+ if (!is_mst)
+ av_stream_map_lane_count = lane_count;
+ else
+ av_stream_map_lane_count = 4;
+ } else if (encoding == DP_128b_132b_ENCODING) {
+ av_stream_map_lane_count = 4;
+ }
+
+ ASSERT(av_stream_map_lane_count != 0);
+
+ return av_stream_map_lane_count;
+}
+
+static uint32_t get_audio_sdp_overhead(
+ enum dp_link_encoding encoding,
+ enum dc_lane_count lane_count,
+ bool is_mst)
+{
+ uint32_t audio_sdp_overhead = 0;
+
+ if (encoding == DP_8b_10b_ENCODING) {
+ if (is_mst)
+ audio_sdp_overhead = 16; /* 4 * 2 + 8 */
+ else
+ audio_sdp_overhead = lane_count * 2 + 8;
+ } else if (encoding == DP_128b_132b_ENCODING) {
+ audio_sdp_overhead = 10; /* 4 x 2.5 */
+ }
+
+ ASSERT(audio_sdp_overhead != 0);
+
+ return audio_sdp_overhead;
+}
+
+static uint32_t calculate_required_audio_bw_in_symbols(
const struct audio_crtc_info *crtc_info,
+ const struct dp_audio_layout_config *layout_config,
uint32_t channel_count,
- union audio_sample_rates *sample_rates)
+ uint32_t sample_rate_hz,
+ uint32_t av_stream_map_lane_count,
+ uint32_t audio_sdp_overhead)
{
- /* do nothing */
+ /* DP spec recommends between 1.05 to 1.1 safety margin to prevent sample under-run */
+ struct fixed31_32 audio_sdp_margin = dc_fixpt_from_fraction(110, 100);
+ struct fixed31_32 horizontal_line_freq_khz = dc_fixpt_from_fraction(
+ crtc_info->requested_pixel_clock_100Hz, (long long)crtc_info->h_total * 10);
+ struct fixed31_32 samples_per_line;
+ struct fixed31_32 layouts_per_line;
+ struct fixed31_32 symbols_per_sdp_max_layout;
+ struct fixed31_32 remainder;
+ uint32_t num_sdp_with_max_layouts;
+ uint32_t required_symbols_per_hblank;
+
+ samples_per_line = dc_fixpt_from_fraction(sample_rate_hz, 1000);
+ samples_per_line = dc_fixpt_div(samples_per_line, horizontal_line_freq_khz);
+ layouts_per_line = dc_fixpt_div_int(samples_per_line, layout_config->layouts_per_sample_denom);
+
+ num_sdp_with_max_layouts = dc_fixpt_floor(
+ dc_fixpt_div_int(layouts_per_line, layout_config->max_layouts_per_audio_sdp));
+ symbols_per_sdp_max_layout = dc_fixpt_from_int(
+ layout_config->max_layouts_per_audio_sdp * layout_config->symbols_per_layout);
+ symbols_per_sdp_max_layout = dc_fixpt_add_int(symbols_per_sdp_max_layout, audio_sdp_overhead);
+ symbols_per_sdp_max_layout = dc_fixpt_mul(symbols_per_sdp_max_layout, audio_sdp_margin);
+ required_symbols_per_hblank = num_sdp_with_max_layouts;
+ required_symbols_per_hblank *= ((dc_fixpt_ceil(symbols_per_sdp_max_layout) + av_stream_map_lane_count) /
+ av_stream_map_lane_count) * av_stream_map_lane_count;
+
+ if (num_sdp_with_max_layouts != dc_fixpt_ceil(
+ dc_fixpt_div_int(layouts_per_line, layout_config->max_layouts_per_audio_sdp))) {
+ remainder = dc_fixpt_sub_int(layouts_per_line,
+ num_sdp_with_max_layouts * layout_config->max_layouts_per_audio_sdp);
+ remainder = dc_fixpt_mul_int(remainder, layout_config->symbols_per_layout);
+ remainder = dc_fixpt_add_int(remainder, audio_sdp_overhead);
+ remainder = dc_fixpt_mul(remainder, audio_sdp_margin);
+ required_symbols_per_hblank += ((dc_fixpt_ceil(remainder) + av_stream_map_lane_count) /
+ av_stream_map_lane_count) * av_stream_map_lane_count;
+ }
+
+ return required_symbols_per_hblank;
}
-/*For DP MST, calculate if specified sample rates can fit into a given timing */
-static void check_audio_bandwidth_dpmst(
+/* Current calculation only applicable for 8b/10b MST and 128b/132b SST/MST.
+ */
+static uint32_t calculate_available_hblank_bw_in_symbols(
const struct audio_crtc_info *crtc_info,
+ const struct audio_dp_link_info *dp_link_info)
+{
+ uint64_t hblank = crtc_info->h_total - crtc_info->h_active;
+ struct fixed31_32 hblank_time_msec =
+ dc_fixpt_from_fraction(hblank * 10, crtc_info->requested_pixel_clock_100Hz);
+ struct fixed31_32 lsclkfreq_mhz =
+ get_link_symbol_clk_freq_mhz(dp_link_info->link_rate);
+ struct fixed31_32 average_stream_sym_bw_frac;
+ struct fixed31_32 peak_stream_bw_kbps;
+ struct fixed31_32 bits_per_pixel;
+ struct fixed31_32 link_bw_kbps;
+ struct fixed31_32 available_stream_sym_count;
+ uint32_t available_hblank_bw = 0; /* in stream symbols */
+
+ if (crtc_info->dsc_bits_per_pixel) {
+ bits_per_pixel = dc_fixpt_from_fraction(crtc_info->dsc_bits_per_pixel, 16);
+ } else {
+ switch (crtc_info->color_depth) {
+ case COLOR_DEPTH_666:
+ bits_per_pixel = dc_fixpt_from_int(6);
+ break;
+ case COLOR_DEPTH_888:
+ bits_per_pixel = dc_fixpt_from_int(8);
+ break;
+ case COLOR_DEPTH_101010:
+ bits_per_pixel = dc_fixpt_from_int(10);
+ break;
+ case COLOR_DEPTH_121212:
+ bits_per_pixel = dc_fixpt_from_int(12);
+ break;
+ default:
+ /* Default to commonly supported color depth. */
+ bits_per_pixel = dc_fixpt_from_int(8);
+ break;
+ }
+
+ bits_per_pixel = dc_fixpt_mul_int(bits_per_pixel, 3);
+
+ if (crtc_info->pixel_encoding == PIXEL_ENCODING_YCBCR422) {
+ bits_per_pixel = dc_fixpt_div_int(bits_per_pixel, 3);
+ bits_per_pixel = dc_fixpt_mul_int(bits_per_pixel, 2);
+ } else if (crtc_info->pixel_encoding == PIXEL_ENCODING_YCBCR420) {
+ bits_per_pixel = dc_fixpt_div_int(bits_per_pixel, 2);
+ }
+ }
+
+ /* Use simple stream BW calculation because mainlink overhead is
+ * accounted for separately in the audio BW calculations.
+ */
+ peak_stream_bw_kbps = dc_fixpt_from_fraction(crtc_info->requested_pixel_clock_100Hz, 10);
+ peak_stream_bw_kbps = dc_fixpt_mul(peak_stream_bw_kbps, bits_per_pixel);
+ link_bw_kbps = dc_fixpt_from_int(dp_link_info->link_bandwidth_kbps);
+ average_stream_sym_bw_frac = dc_fixpt_div(peak_stream_bw_kbps, link_bw_kbps);
+
+ available_stream_sym_count = dc_fixpt_mul_int(hblank_time_msec, 1000);
+ available_stream_sym_count = dc_fixpt_mul(available_stream_sym_count, lsclkfreq_mhz);
+ available_stream_sym_count = dc_fixpt_mul(available_stream_sym_count, average_stream_sym_bw_frac);
+ available_hblank_bw = dc_fixpt_floor(available_stream_sym_count);
+ available_hblank_bw *= dp_link_info->lane_count;
+ available_hblank_bw -= crtc_info->dsc_num_slices * 4; /* EOC overhead */
+
+ if (available_hblank_bw < dp_link_info->hblank_min_symbol_width)
+ /* Each symbol takes 4 frames */
+ available_hblank_bw = 4 * dp_link_info->hblank_min_symbol_width;
+
+ if (available_hblank_bw < 12)
+ available_hblank_bw = 0;
+ else
+ available_hblank_bw -= 12; /* Main link overhead */
+
+ return available_hblank_bw;
+}
+
+static void check_audio_bandwidth_dp(
+ const struct audio_crtc_info *crtc_info,
+ const struct audio_dp_link_info *dp_link_info,
uint32_t channel_count,
union audio_sample_rates *sample_rates)
{
- /* do nothing */
+ struct dp_audio_layout_config layout_config = {0};
+ uint32_t available_hblank_bw;
+ uint32_t av_stream_map_lane_count;
+ uint32_t audio_sdp_overhead;
+
+ /* TODO: Add validation for SST 8b/10 case */
+ if (!dp_link_info->is_mst && dp_link_info->encoding == DP_8b_10b_ENCODING)
+ return;
+
+ available_hblank_bw = calculate_available_hblank_bw_in_symbols(
+ crtc_info, dp_link_info);
+ av_stream_map_lane_count = get_av_stream_map_lane_count(
+ dp_link_info->encoding, dp_link_info->lane_count, dp_link_info->is_mst);
+ audio_sdp_overhead = get_audio_sdp_overhead(
+ dp_link_info->encoding, dp_link_info->lane_count, dp_link_info->is_mst);
+ get_audio_layout_config(
+ channel_count, dp_link_info->encoding, &layout_config);
+
+ if (layout_config.max_layouts_per_audio_sdp == 0 ||
+ layout_config.symbols_per_layout == 0 ||
+ layout_config.layouts_per_sample_denom == 0) {
+ return;
+ }
+ if (available_hblank_bw < calculate_required_audio_bw_in_symbols(
+ crtc_info, &layout_config, channel_count, 192000,
+ av_stream_map_lane_count, audio_sdp_overhead))
+ sample_rates->rate.RATE_192 = 0;
+ if (available_hblank_bw < calculate_required_audio_bw_in_symbols(
+ crtc_info, &layout_config, channel_count, 176400,
+ av_stream_map_lane_count, audio_sdp_overhead))
+ sample_rates->rate.RATE_176_4 = 0;
+ if (available_hblank_bw < calculate_required_audio_bw_in_symbols(
+ crtc_info, &layout_config, channel_count, 96000,
+ av_stream_map_lane_count, audio_sdp_overhead))
+ sample_rates->rate.RATE_96 = 0;
+ if (available_hblank_bw < calculate_required_audio_bw_in_symbols(
+ crtc_info, &layout_config, channel_count, 88200,
+ av_stream_map_lane_count, audio_sdp_overhead))
+ sample_rates->rate.RATE_88_2 = 0;
+ if (available_hblank_bw < calculate_required_audio_bw_in_symbols(
+ crtc_info, &layout_config, channel_count, 48000,
+ av_stream_map_lane_count, audio_sdp_overhead))
+ sample_rates->rate.RATE_48 = 0;
+ if (available_hblank_bw < calculate_required_audio_bw_in_symbols(
+ crtc_info, &layout_config, channel_count, 44100,
+ av_stream_map_lane_count, audio_sdp_overhead))
+ sample_rates->rate.RATE_44_1 = 0;
+ if (available_hblank_bw < calculate_required_audio_bw_in_symbols(
+ crtc_info, &layout_config, channel_count, 32000,
+ av_stream_map_lane_count, audio_sdp_overhead))
+ sample_rates->rate.RATE_32 = 0;
}
static void check_audio_bandwidth(
const struct audio_crtc_info *crtc_info,
+ const struct audio_dp_link_info *dp_link_info,
uint32_t channel_count,
enum signal_type signal,
union audio_sample_rates *sample_rates)
@@ -279,12 +539,9 @@ static void check_audio_bandwidth(
break;
case SIGNAL_TYPE_EDP:
case SIGNAL_TYPE_DISPLAY_PORT:
- check_audio_bandwidth_dpsst(
- crtc_info, channel_count, sample_rates);
- break;
case SIGNAL_TYPE_DISPLAY_PORT_MST:
- check_audio_bandwidth_dpmst(
- crtc_info, channel_count, sample_rates);
+ check_audio_bandwidth_dp(
+ crtc_info, dp_link_info, channel_count, sample_rates);
break;
default:
break;
@@ -308,7 +565,7 @@ static void set_high_bit_rate_capable(
AZ_REG_WRITE(AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_HBR, value);
}
-/* set video latency in in ms/2+1 */
+/* set video latency in ms/2+1 */
static void set_video_latency(
struct audio *audio,
int latency_in_ms)
@@ -328,7 +585,7 @@ static void set_video_latency(
value);
}
-/* set audio latency in in ms/2+1 */
+/* set audio latency in ms/2+1 */
static void set_audio_latency(
struct audio *audio,
int latency_in_ms)
@@ -373,6 +630,11 @@ void dce_aud_az_enable(struct audio *audio)
audio->inst, value);
}
+void dce_aud_az_disable_hbr_audio(struct audio *audio)
+{
+ set_high_bit_rate_capable(audio, false);
+}
+
void dce_aud_az_disable(struct audio *audio)
{
uint32_t value;
@@ -402,7 +664,8 @@ void dce_aud_az_configure(
struct audio *audio,
enum signal_type signal,
const struct audio_crtc_info *crtc_info,
- const struct audio_info *audio_info)
+ const struct audio_info *audio_info,
+ const struct audio_dp_link_info *dp_link_info)
{
struct dce_audio *aud = DCE_AUD(audio);
@@ -415,6 +678,10 @@ void dce_aud_az_configure(
bool is_ac3_supported = false;
union audio_sample_rates sample_rate;
uint32_t strlen = 0;
+
+ if (signal == SIGNAL_TYPE_VIRTUAL)
+ return;
+
value = AZ_REG_READ(AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL);
set_reg_field_value(value, 1,
AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL,
@@ -492,6 +759,17 @@ void dce_aud_az_configure(
AZ_REG_WRITE(AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, value);
+ /* ACP Data - Supports AI */
+ value = AZ_REG_READ(AZALIA_F0_CODEC_PIN_CONTROL_ACP_DATA);
+
+ set_reg_field_value(
+ value,
+ audio_info->flags.info.SUPPORT_AI,
+ AZALIA_F0_CODEC_PIN_CONTROL_ACP_DATA,
+ SUPPORTS_AI);
+
+ AZ_REG_WRITE(AZALIA_F0_CODEC_PIN_CONTROL_ACP_DATA, value);
+
/* Audio Descriptors */
/* pass through all formats */
for (format_index = 0; format_index < AUDIO_FORMAT_CODE_COUNT;
@@ -522,6 +800,7 @@ void dce_aud_az_configure(
check_audio_bandwidth(
crtc_info,
+ dp_link_info,
channel_count,
signal,
&sample_rates);
@@ -581,6 +860,7 @@ void dce_aud_az_configure(
check_audio_bandwidth(
crtc_info,
+ dp_link_info,
8,
signal,
&sample_rate);
@@ -775,7 +1055,7 @@ static void get_azalia_clock_info_dp(
/*audio_dto_module = dpDtoSourceClockInkhz * 10,000;
* [khz] ->[100Hz] */
azalia_clock_info->audio_dto_module =
- pll_info->dp_dto_source_clock_in_khz * 10;
+ pll_info->audio_dto_source_clock_in_khz * 10;
}
void dce_aud_wall_dto_setup(
@@ -863,7 +1143,8 @@ void dce_aud_wall_dto_setup(
REG_UPDATE(DCCG_AUDIO_DTO1_PHASE,
DCCG_AUDIO_DTO1_PHASE, clock_info.audio_dto_phase);
- REG_UPDATE(DCCG_AUDIO_DTO_SOURCE,
+ if (aud->masks->DCCG_AUDIO_DTO2_USE_512FBR_DTO)
+ REG_UPDATE(DCCG_AUDIO_DTO_SOURCE,
DCCG_AUDIO_DTO2_USE_512FBR_DTO, 1);
}
@@ -1018,6 +1299,7 @@ static const struct audio_funcs funcs = {
.az_enable = dce_aud_az_enable,
.az_disable = dce_aud_az_disable,
.az_configure = dce_aud_az_configure,
+ .az_disable_hbr_audio = dce_aud_az_disable_hbr_audio,
.destroy = dce_aud_destroy,
};
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h
index 5622d5e32d81..1b7b8b079af4 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h
@@ -113,6 +113,7 @@ struct dce_audio_shift {
uint8_t DCCG_AUDIO_DTO2_USE_512FBR_DTO;
uint32_t DCCG_AUDIO_DTO0_USE_512FBR_DTO;
uint32_t DCCG_AUDIO_DTO1_USE_512FBR_DTO;
+ uint32_t CLOCK_GATING_DISABLE;
};
struct dce_audio_mask {
@@ -132,6 +133,7 @@ struct dce_audio_mask {
uint32_t DCCG_AUDIO_DTO2_USE_512FBR_DTO;
uint32_t DCCG_AUDIO_DTO0_USE_512FBR_DTO;
uint32_t DCCG_AUDIO_DTO1_USE_512FBR_DTO;
+ uint32_t CLOCK_GATING_DISABLE;
};
@@ -164,11 +166,13 @@ void dce_aud_hw_init(struct audio *audio);
void dce_aud_az_enable(struct audio *audio);
void dce_aud_az_disable(struct audio *audio);
+void dce_aud_az_disable_hbr_audio(struct audio *audio);
void dce_aud_az_configure(struct audio *audio,
enum signal_type signal,
const struct audio_crtc_info *crtc_info,
- const struct audio_info *audio_info);
+ const struct audio_info *audio_info,
+ const struct audio_dp_link_info *dp_link_info);
void dce_aud_wall_dto_setup(struct audio *audio,
enum signal_type signal,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
index 6d42a9cc9916..673bb87d2c17 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
@@ -23,9 +23,6 @@
*
*/
-#include <linux/delay.h>
-#include <linux/slab.h>
-
#include "dm_services.h"
#include "core_types.h"
#include "dce_aux.h"
@@ -87,7 +84,8 @@ static void release_engine(
engine->ddc = NULL;
- REG_UPDATE(AUX_ARB_CONTROL, AUX_SW_DONE_USING_AUX_REG, 1);
+ REG_UPDATE_2(AUX_ARB_CONTROL, AUX_SW_DONE_USING_AUX_REG, 1,
+ AUX_SW_USE_AUX_REG_REQ, 0);
}
#define SW_CAN_ACCESS_AUX 1
@@ -413,7 +411,8 @@ static bool acquire(
return false;
if (!acquire_engine(engine)) {
- dal_ddc_close(ddc);
+ engine->ddc = ddc;
+ release_engine(engine);
return false;
}
@@ -564,13 +563,16 @@ int dce_aux_transfer_raw(struct ddc_service *ddc,
struct ddc *ddc_pin = ddc->ddc_pin;
struct dce_aux *aux_engine;
struct aux_request_transaction_data aux_req;
- struct aux_reply_transaction_data aux_rep;
uint8_t returned_bytes = 0;
int res = -1;
uint32_t status;
memset(&aux_req, 0, sizeof(aux_req));
- memset(&aux_rep, 0, sizeof(aux_rep));
+
+ if (ddc_pin == NULL) {
+ *operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE;
+ return -1;
+ }
aux_engine = ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en];
if (!acquire(aux_engine, ddc_pin)) {
@@ -723,20 +725,34 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc,
for (i = 0; i < AUX_MAX_RETRIES; i++) {
DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION,
LOG_FLAG_I2cAux_DceAux,
- "dce_aux_transfer_with_retries: link_index=%u: START: retry %d of %d: address=0x%04x length=%u write=%d mot=%d",
+ "dce_aux_transfer_with_retries: link_index=%u: START: retry %d of %d: "
+ "address=0x%04x length=%u write=%d mot=%d is_i2c=%d is_dpia=%d ddc_hw_inst=%d",
ddc && ddc->link ? ddc->link->link_index : UINT_MAX,
i + 1,
(int)AUX_MAX_RETRIES,
payload->address,
payload->length,
(unsigned int) payload->write,
- (unsigned int) payload->mot);
+ (unsigned int) payload->mot,
+ payload->i2c_over_aux,
+ (ddc->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) ? true : false,
+ ddc->link->ddc_hw_inst);
if (payload->write)
dce_aux_log_payload(" write", payload->data, payload->length, 16);
- ret = dce_aux_transfer_raw(ddc, payload, &operation_result);
+
+ /* Check whether aux to be processed via dmub or dcn directly */
+ if (ddc->ctx->dc->debug.enable_dmub_aux_for_legacy_ddc
+ || ddc->ddc_pin == NULL) {
+ ret = dce_aux_transfer_dmub_raw(ddc, payload, &operation_result);
+ } else {
+ ret = dce_aux_transfer_raw(ddc, payload, &operation_result);
+ }
+
DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION,
LOG_FLAG_I2cAux_DceAux,
- "dce_aux_transfer_with_retries: link_index=%u: END: retry %d of %d: address=0x%04x length=%u write=%d mot=%d: ret=%d operation_result=%d payload->reply=%u",
+ "dce_aux_transfer_with_retries: link_index=%u: END: retry %d of %d: "
+ "address=0x%04x length=%u write=%d mot=%d: ret=%d operation_result=%d "
+ "payload->reply=%u is_i2c=%d is_dpia=%d ddc_hw_inst=%d",
ddc && ddc->link ? ddc->link->link_index : UINT_MAX,
i + 1,
(int)AUX_MAX_RETRIES,
@@ -746,7 +762,10 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc,
(unsigned int) payload->mot,
ret,
(int)operation_result,
- (unsigned int) *payload->reply);
+ (unsigned int) *payload->reply,
+ payload->i2c_over_aux,
+ (ddc->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) ? true : false,
+ ddc->link->ddc_hw_inst);
if (!payload->write)
dce_aux_log_payload(" read", payload->data, ret > 0 ? ret : 0, 16);
@@ -768,7 +787,7 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc,
aux_defer_retries,
AUX_MAX_RETRIES);
goto fail;
- } else
+ } else
udelay(300);
} else if (payload->write && ret > 0) {
/* sink requested more time to complete the write via AUX_ACKM */
@@ -788,7 +807,6 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc,
payload->write_status_update = true;
payload->length = 0;
udelay(300);
-
} else
return true;
break;
@@ -812,12 +830,6 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc,
"dce_aux_transfer_with_retries: AUX_RET_SUCCESS: AUX_TRANSACTION_REPLY_I2C_OVER_AUX_DEFER");
retry_on_defer = true;
- fallthrough;
- case AUX_TRANSACTION_REPLY_I2C_OVER_AUX_NACK:
- if (*payload->reply == AUX_TRANSACTION_REPLY_I2C_OVER_AUX_NACK)
- DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION,
- LOG_FLAG_I2cAux_DceAux,
- "dce_aux_transfer_with_retries: AUX_RET_SUCCESS: AUX_TRANSACTION_REPLY_I2C_OVER_AUX_NACK");
if (aux_defer_retries >= AUX_MIN_DEFER_RETRIES
&& defer_time_in_ms >= AUX_MAX_DEFER_TIMEOUT_MS) {
@@ -836,17 +848,16 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc,
LOG_FLAG_I2cAux_DceAux,
"dce_aux_transfer_with_retries: payload->defer_delay=%u",
payload->defer_delay);
- if (payload->defer_delay > 1) {
- msleep(payload->defer_delay);
- defer_time_in_ms += payload->defer_delay;
- } else if (payload->defer_delay <= 1) {
- udelay(payload->defer_delay * 1000);
- defer_time_in_ms += payload->defer_delay;
- }
+ fsleep(payload->defer_delay * 1000);
+ defer_time_in_ms += payload->defer_delay;
}
}
break;
-
+ case AUX_TRANSACTION_REPLY_I2C_OVER_AUX_NACK:
+ DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION,
+ LOG_FLAG_I2cAux_DceAux,
+ "dce_aux_transfer_with_retries: FAILURE: AUX_TRANSACTION_REPLY_I2C_OVER_AUX_NACK");
+ goto fail;
case AUX_TRANSACTION_REPLY_I2C_DEFER:
DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION,
LOG_FLAG_I2cAux_DceAux,
@@ -878,7 +889,7 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc,
default:
DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_ERROR,
LOG_FLAG_Error_I2cAux,
- "dce_aux_transfer_with_retries: AUX_RET_SUCCESS: FAILURE: AUX_TRANSACTION_REPLY_* unknown, default case.");
+ "dce_aux_transfer_with_retries: AUX_RET_SUCCESS: FAILURE: AUX_TRANSACTION_REPLY_* unknown, default case. Reply: %d", *payload->reply);
goto fail;
}
break;
@@ -942,10 +953,6 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc,
case AUX_RET_ERROR_ENGINE_ACQUIRE:
case AUX_RET_ERROR_UNKNOWN:
default:
- DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION,
- LOG_FLAG_I2cAux_DceAux,
- "dce_aux_transfer_with_retries: Failure: operation_result=%d",
- (int)operation_result);
goto fail;
}
}
@@ -953,14 +960,11 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc,
fail:
DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_ERROR,
LOG_FLAG_Error_I2cAux,
- "dce_aux_transfer_with_retries: FAILURE");
+ "%s: Failure: operation_result=%d",
+ __func__,
+ (int)operation_result);
if (!payload_reply)
payload->reply = NULL;
- DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_ERROR,
- WPP_BIT_FLAG_DC_ERROR,
- "AUX transaction failed. Result: %d",
- operation_result);
-
return false;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h
index e69f1899fbf0..c850ed49281f 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h
@@ -26,7 +26,7 @@
#ifndef __DAL_AUX_ENGINE_DCE110_H__
#define __DAL_AUX_ENGINE_DCE110_H__
-#include "i2caux_interface.h"
+#include "gpio_service_interface.h"
#include "inc/hw/aux_engine.h"
enum aux_return_code_type;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c
index 1435d7bc1f21..e7acd6eec1fd 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c
@@ -450,6 +450,8 @@ void dce_clock_read_ss_info(struct dce_clk_mgr *clk_mgr_dce)
clk_mgr_dce->dprefclk_ss_percentage =
info.spread_spectrum_percentage;
}
+ if (clk_mgr_dce->base.ctx->dc->debug.ignore_dpref_ss)
+ clk_mgr_dce->dprefclk_ss_percentage = 0;
}
}
}
@@ -638,7 +640,7 @@ static void dce11_pplib_apply_display_requirements(
* on power saving.
*
*/
- pp_display_cfg->min_dcfclock_khz = (context->stream_count > 4)?
+ pp_display_cfg->min_dcfclock_khz = (context->stream_count > 4) ?
pp_display_cfg->min_engine_clock_khz : 0;
pp_display_cfg->min_engine_clock_deep_sleep_khz
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
index 2c7eb982eabc..b4f5b4a6331a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
@@ -23,8 +23,6 @@
*
*/
-#include <linux/slab.h>
-
#include "dm_services.h"
@@ -36,6 +34,7 @@
#include "dce_clock_source.h"
#include "clk_mgr.h"
+#include "dccg.h"
#include "reg_helper.h"
@@ -45,7 +44,10 @@
#define CTX \
clk_src->base.ctx
-#define DC_LOGGER_INIT()
+#define DC_LOGGER \
+ calc_pll_cs->ctx->logger
+#define DC_LOGGER_INIT() \
+ struct calc_pll_clock_source *calc_pll_cs = &clk_src->calc_pll
#undef FN
#define FN(reg_name, field_name) \
@@ -215,7 +217,7 @@ static bool calc_fb_divider_checking_tolerance(
actual_calc_clk_100hz = (uint64_t)feedback_divider *
calc_pll_cs->fract_fb_divider_factor +
fract_feedback_divider;
- actual_calc_clk_100hz *= calc_pll_cs->ref_freq_khz * 10;
+ actual_calc_clk_100hz *= (uint64_t)calc_pll_cs->ref_freq_khz * 10;
actual_calc_clk_100hz =
div_u64(actual_calc_clk_100hz,
ref_divider * post_divider *
@@ -545,9 +547,11 @@ static void dce112_get_pix_clk_dividers_helper (
switch (pix_clk_params->color_depth) {
case COLOR_DEPTH_101010:
actual_pixel_clock_100hz = (actual_pixel_clock_100hz * 5) >> 2;
+ actual_pixel_clock_100hz -= actual_pixel_clock_100hz % 10;
break;
case COLOR_DEPTH_121212:
actual_pixel_clock_100hz = (actual_pixel_clock_100hz * 6) >> 2;
+ actual_pixel_clock_100hz -= actual_pixel_clock_100hz % 10;
break;
case COLOR_DEPTH_161616:
actual_pixel_clock_100hz = actual_pixel_clock_100hz * 2;
@@ -676,7 +680,7 @@ static bool calculate_ss(
* so have to divided by 100 * 100*/
ss_amount = dc_fixpt_mul(
fb_div, dc_fixpt_from_fraction(ss_data->percentage,
- 100 * ss_data->percentage_divider));
+ 100 * (long long)ss_data->percentage_divider));
ds_data->feedback_amount = dc_fixpt_floor(ss_amount);
ss_nslip_amount = dc_fixpt_sub(ss_amount,
@@ -691,8 +695,8 @@ static bool calculate_ss(
/* compute SS_STEP_SIZE_DSFRAC */
modulation_time = dc_fixpt_from_fraction(
- pll_settings->reference_freq * 1000,
- pll_settings->reference_divider * ss_data->modulation_freq_hz);
+ pll_settings->reference_freq * (uint64_t)1000,
+ pll_settings->reference_divider * (uint64_t)ss_data->modulation_freq_hz);
if (ss_data->flags.CENTER_SPREAD)
modulation_time = dc_fixpt_div_int(modulation_time, 4);
@@ -840,6 +844,7 @@ static void dce112_program_pixel_clk_resync(
static bool dce110_program_pix_clk(
struct clock_source *clock_source,
struct pixel_clk_params *pix_clk_params,
+ enum dp_link_encoding encoding,
struct pll_settings *pll_settings)
{
struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source);
@@ -913,26 +918,12 @@ static bool dce110_program_pix_clk(
static bool dce112_program_pix_clk(
struct clock_source *clock_source,
struct pixel_clk_params *pix_clk_params,
+ enum dp_link_encoding encoding,
struct pll_settings *pll_settings)
{
struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source);
struct bp_pixel_clock_parameters bp_pc_params = {0};
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (IS_FPGA_MAXIMUS_DC(clock_source->ctx->dce_environment)) {
- unsigned int inst = pix_clk_params->controller_id - CONTROLLER_ID_D0;
- unsigned dp_dto_ref_100hz = 7000000;
- unsigned clock_100hz = pll_settings->actual_pix_clk_100hz;
-
- /* Set DTO values: phase = target clock, modulo = reference clock */
- REG_WRITE(PHASE[inst], clock_100hz);
- REG_WRITE(MODULO[inst], dp_dto_ref_100hz);
-
- /* Enable DTO */
- REG_UPDATE(PIXEL_RATE_CNTL[inst], DP_DTO0_ENABLE, 1);
- return true;
- }
-#endif
/* First disable SS
* ATOMBIOS will enable by default SS on PLL for DP,
* do not disable it here
@@ -971,6 +962,209 @@ static bool dce112_program_pix_clk(
return true;
}
+static bool dcn31_program_pix_clk(
+ struct clock_source *clock_source,
+ struct pixel_clk_params *pix_clk_params,
+ enum dp_link_encoding encoding,
+ struct pll_settings *pll_settings)
+{
+ struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source);
+ unsigned int inst = pix_clk_params->controller_id - CONTROLLER_ID_D0;
+ unsigned int dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dprefclk_khz;
+ const struct pixel_rate_range_table_entry *e =
+ look_up_in_video_optimized_rate_tlb(pix_clk_params->requested_pix_clk_100hz / 10);
+ struct bp_pixel_clock_parameters bp_pc_params = {0};
+ enum transmitter_color_depth bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24;
+
+ // Apply ssed(spread spectrum) dpref clock for edp and dp
+ if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0 &&
+ dc_is_dp_signal(pix_clk_params->signal_type) &&
+ encoding == DP_8b_10b_ENCODING)
+ dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz;
+
+ // For these signal types Driver to program DP_DTO without calling VBIOS Command table
+ if (dc_is_dp_signal(pix_clk_params->signal_type) || dc_is_virtual_signal(pix_clk_params->signal_type)) {
+ if (e) {
+ /* Set DTO values: phase = target clock, modulo = reference clock*/
+ REG_WRITE(PHASE[inst], e->target_pixel_rate_khz * e->mult_factor);
+ REG_WRITE(MODULO[inst], dp_dto_ref_khz * e->div_factor);
+ } else {
+ /* Set DTO values: phase = target clock, modulo = reference clock*/
+ REG_WRITE(PHASE[inst], pll_settings->actual_pix_clk_100hz * 100);
+ REG_WRITE(MODULO[inst], dp_dto_ref_khz * 1000);
+ }
+ /* Enable DTO */
+ if (clk_src->cs_mask->PIPE0_DTO_SRC_SEL)
+ if (encoding == DP_128b_132b_ENCODING)
+ REG_UPDATE_2(PIXEL_RATE_CNTL[inst],
+ DP_DTO0_ENABLE, 1,
+ PIPE0_DTO_SRC_SEL, 2);
+ else
+ REG_UPDATE_2(PIXEL_RATE_CNTL[inst],
+ DP_DTO0_ENABLE, 1,
+ PIPE0_DTO_SRC_SEL, 1);
+ else
+ REG_UPDATE(PIXEL_RATE_CNTL[inst],
+ DP_DTO0_ENABLE, 1);
+ } else {
+
+ if (clk_src->cs_mask->PIPE0_DTO_SRC_SEL)
+ REG_UPDATE(PIXEL_RATE_CNTL[inst],
+ PIPE0_DTO_SRC_SEL, 0);
+
+ /*ATOMBIOS expects pixel rate adjusted by deep color ratio)*/
+ bp_pc_params.controller_id = pix_clk_params->controller_id;
+ bp_pc_params.pll_id = clock_source->id;
+ bp_pc_params.target_pixel_clock_100hz = pll_settings->actual_pix_clk_100hz;
+ bp_pc_params.encoder_object_id = pix_clk_params->encoder_object_id;
+ bp_pc_params.signal_type = pix_clk_params->signal_type;
+
+ // Make sure we send the correct color depth to DMUB for HDMI
+ if (pix_clk_params->signal_type == SIGNAL_TYPE_HDMI_TYPE_A) {
+ switch (pix_clk_params->color_depth) {
+ case COLOR_DEPTH_888:
+ bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24;
+ break;
+ case COLOR_DEPTH_101010:
+ bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_30;
+ break;
+ case COLOR_DEPTH_121212:
+ bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_36;
+ break;
+ case COLOR_DEPTH_161616:
+ bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_48;
+ break;
+ default:
+ bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24;
+ break;
+ }
+ bp_pc_params.color_depth = bp_pc_colour_depth;
+ }
+
+ if (clock_source->id != CLOCK_SOURCE_ID_DP_DTO) {
+ bp_pc_params.flags.SET_GENLOCK_REF_DIV_SRC =
+ pll_settings->use_external_clk;
+ bp_pc_params.flags.SET_XTALIN_REF_SRC =
+ !pll_settings->use_external_clk;
+ if (pix_clk_params->flags.SUPPORT_YCBCR420) {
+ bp_pc_params.flags.SUPPORT_YUV_420 = 1;
+ }
+ }
+ if (clk_src->bios->funcs->set_pixel_clock(
+ clk_src->bios, &bp_pc_params) != BP_RESULT_OK)
+ return false;
+ /* Resync deep color DTO */
+ if (clock_source->id != CLOCK_SOURCE_ID_DP_DTO)
+ dce112_program_pixel_clk_resync(clk_src,
+ pix_clk_params->signal_type,
+ pix_clk_params->color_depth,
+ pix_clk_params->flags.SUPPORT_YCBCR420);
+ }
+
+ return true;
+}
+
+static bool dcn401_program_pix_clk(
+ struct clock_source *clock_source,
+ struct pixel_clk_params *pix_clk_params,
+ enum dp_link_encoding encoding,
+ struct pll_settings *pll_settings)
+{
+ struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source);
+ unsigned int inst = pix_clk_params->controller_id - CONTROLLER_ID_D0;
+ const struct pixel_rate_range_table_entry *e =
+ look_up_in_video_optimized_rate_tlb(pix_clk_params->requested_pix_clk_100hz / 10);
+ struct bp_pixel_clock_parameters bp_pc_params = {0};
+ enum transmitter_color_depth bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24;
+ struct dp_dto_params dto_params = { 0 };
+
+ dto_params.otg_inst = inst;
+ dto_params.signal = pix_clk_params->signal_type;
+
+ // all but TMDS gets Driver to program DP_DTO without calling VBIOS Command table
+ if (!dc_is_tmds_signal(pix_clk_params->signal_type)) {
+ long long dtbclk_p_src_clk_khz;
+
+ dtbclk_p_src_clk_khz = clock_source->ctx->dc->clk_mgr->dprefclk_khz;
+ dto_params.clk_src = DPREFCLK;
+
+ if (e) {
+ dto_params.pixclk_hz = e->target_pixel_rate_khz;
+ dto_params.pixclk_hz *= e->mult_factor;
+ dto_params.refclk_hz = dtbclk_p_src_clk_khz;
+ dto_params.refclk_hz *= e->div_factor;
+ } else {
+ dto_params.pixclk_hz = pix_clk_params->requested_pix_clk_100hz;
+ dto_params.pixclk_hz *= 100;
+ dto_params.refclk_hz = dtbclk_p_src_clk_khz;
+ dto_params.refclk_hz *= 1000;
+ }
+
+ /* enable DP DTO */
+ clock_source->ctx->dc->res_pool->dccg->funcs->set_dp_dto(
+ clock_source->ctx->dc->res_pool->dccg,
+ &dto_params);
+
+ } else {
+ if (pll_settings->actual_pix_clk_100hz > 6000000UL)
+ return false;
+
+ /* disables DP DTO when provided with TMDS signal type */
+ clock_source->ctx->dc->res_pool->dccg->funcs->set_dp_dto(
+ clock_source->ctx->dc->res_pool->dccg,
+ &dto_params);
+
+ /*ATOMBIOS expects pixel rate adjusted by deep color ratio)*/
+ bp_pc_params.controller_id = pix_clk_params->controller_id;
+ bp_pc_params.pll_id = clock_source->id;
+ bp_pc_params.target_pixel_clock_100hz = pll_settings->actual_pix_clk_100hz;
+ bp_pc_params.encoder_object_id = pix_clk_params->encoder_object_id;
+ bp_pc_params.signal_type = pix_clk_params->signal_type;
+
+ // Make sure we send the correct color depth to DMUB for HDMI
+ if (pix_clk_params->signal_type == SIGNAL_TYPE_HDMI_TYPE_A) {
+ switch (pix_clk_params->color_depth) {
+ case COLOR_DEPTH_888:
+ bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24;
+ break;
+ case COLOR_DEPTH_101010:
+ bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_30;
+ break;
+ case COLOR_DEPTH_121212:
+ bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_36;
+ break;
+ case COLOR_DEPTH_161616:
+ bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_48;
+ break;
+ default:
+ bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24;
+ break;
+ }
+ bp_pc_params.color_depth = bp_pc_colour_depth;
+ }
+
+ if (clock_source->id != CLOCK_SOURCE_ID_DP_DTO) {
+ bp_pc_params.flags.SET_GENLOCK_REF_DIV_SRC =
+ pll_settings->use_external_clk;
+ bp_pc_params.flags.SET_XTALIN_REF_SRC =
+ !pll_settings->use_external_clk;
+ if (pix_clk_params->flags.SUPPORT_YCBCR420) {
+ bp_pc_params.flags.SUPPORT_YUV_420 = 1;
+ }
+ }
+ if (clk_src->bios->funcs->set_pixel_clock(
+ clk_src->bios, &bp_pc_params) != BP_RESULT_OK)
+ return false;
+ /* Resync deep color DTO */
+ if (clock_source->id != CLOCK_SOURCE_ID_DP_DTO)
+ dce112_program_pixel_clk_resync(clk_src,
+ pix_clk_params->signal_type,
+ pix_clk_params->color_depth,
+ pix_clk_params->flags.SUPPORT_YCBCR420);
+ }
+
+ return true;
+}
static bool dce110_clock_source_power_down(
struct clock_source *clk_src)
@@ -1003,6 +1197,7 @@ static bool get_pixel_clk_frequency_100hz(
struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source);
unsigned int clock_hz = 0;
unsigned int modulo_hz = 0;
+ unsigned int dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dprefclk_khz;
if (clock_source->id == CLOCK_SOURCE_ID_DP_DTO) {
clock_hz = REG_READ(PHASE[inst]);
@@ -1013,9 +1208,12 @@ static bool get_pixel_clk_frequency_100hz(
* not be programmed equal to DPREFCLK
*/
modulo_hz = REG_READ(MODULO[inst]);
- *pixel_clk_khz = div_u64((uint64_t)clock_hz*
- clock_source->ctx->dc->clk_mgr->dprefclk_khz*10,
- modulo_hz);
+ if (modulo_hz)
+ *pixel_clk_khz = div_u64((uint64_t)clock_hz*
+ dp_dto_ref_khz*10,
+ modulo_hz);
+ else
+ *pixel_clk_khz = 0;
} else {
/* NOTE: There is agreement with VBIOS here that MODULO is
* programmed equal to DPREFCLK, in which case PHASE will be
@@ -1029,13 +1227,13 @@ static bool get_pixel_clk_frequency_100hz(
return false;
}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
/* this table is use to find *1.001 and /1.001 pixel rates from non-precise pixel rate */
const struct pixel_rate_range_table_entry video_optimized_pixel_rates[] = {
// /1.001 rates
{25170, 25180, 25200, 1000, 1001}, //25.2MHz -> 25.17
{59340, 59350, 59400, 1000, 1001}, //59.4Mhz -> 59.340
{74170, 74180, 74250, 1000, 1001}, //74.25Mhz -> 74.1758
+ {89910, 90000, 90000, 1000, 1001}, //90Mhz -> 89.91
{125870, 125880, 126000, 1000, 1001}, //126Mhz -> 125.87
{148350, 148360, 148500, 1000, 1001}, //148.5Mhz -> 148.3516
{167830, 167840, 168000, 1000, 1001}, //168Mhz -> 167.83
@@ -1079,17 +1277,17 @@ const struct pixel_rate_range_table_entry *look_up_in_video_optimized_rate_tlb(
return NULL;
}
-#endif
static bool dcn20_program_pix_clk(
struct clock_source *clock_source,
struct pixel_clk_params *pix_clk_params,
+ enum dp_link_encoding encoding,
struct pll_settings *pll_settings)
{
struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source);
unsigned int inst = pix_clk_params->controller_id - CONTROLLER_ID_D0;
- dce112_program_pix_clk(clock_source, pix_clk_params, pll_settings);
+ dce112_program_pix_clk(clock_source, pix_clk_params, encoding, pll_settings);
if (clock_source->ctx->dc->hwss.enable_vblanks_synchronization &&
clock_source->ctx->dc->config.vblank_alignment_max_frame_time_diff > 0) {
@@ -1126,10 +1324,10 @@ static const struct clock_source_funcs dcn20_clk_src_funcs = {
.override_dp_pix_clk = dcn20_override_dp_pix_clk
};
-#if defined(CONFIG_DRM_AMD_DC_DCN)
static bool dcn3_program_pix_clk(
struct clock_source *clock_source,
struct pixel_clk_params *pix_clk_params,
+ enum dp_link_encoding encoding,
struct pll_settings *pll_settings)
{
struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source);
@@ -1149,10 +1347,17 @@ static bool dcn3_program_pix_clk(
REG_WRITE(PHASE[inst], pll_settings->actual_pix_clk_100hz * 100);
REG_WRITE(MODULO[inst], dp_dto_ref_khz * 1000);
}
- REG_UPDATE(PIXEL_RATE_CNTL[inst], DP_DTO0_ENABLE, 1);
+ /* Enable DTO */
+ if (clk_src->cs_mask->PIPE0_DTO_SRC_SEL)
+ REG_UPDATE_2(PIXEL_RATE_CNTL[inst],
+ DP_DTO0_ENABLE, 1,
+ PIPE0_DTO_SRC_SEL, 1);
+ else
+ REG_UPDATE(PIXEL_RATE_CNTL[inst],
+ DP_DTO0_ENABLE, 1);
} else
// For other signal types(HDMI_TYPE_A, DVI) Driver still to call VBIOS Command table
- dce112_program_pix_clk(clock_source, pix_clk_params, pll_settings);
+ dce112_program_pix_clk(clock_source, pix_clk_params, encoding, pll_settings);
return true;
}
@@ -1162,10 +1367,9 @@ static uint32_t dcn3_get_pix_clk_dividers(
struct pixel_clk_params *pix_clk_params,
struct pll_settings *pll_settings)
{
- unsigned long long actual_pix_clk_100Hz = pix_clk_params->requested_pix_clk_100hz;
- struct dce110_clk_src *clk_src;
+ unsigned long long actual_pix_clk_100Hz = pix_clk_params ? pix_clk_params->requested_pix_clk_100hz : 0;
+ struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(cs);
- clk_src = TO_DCE110_CLK_SRC(cs);
DC_LOGGER_INIT();
if (pix_clk_params == NULL || pll_settings == NULL
@@ -1205,7 +1409,21 @@ static const struct clock_source_funcs dcn3_clk_src_funcs = {
.get_pix_clk_dividers = dcn3_get_pix_clk_dividers,
.get_pixel_clk_frequency_100hz = get_pixel_clk_frequency_100hz
};
-#endif
+
+static const struct clock_source_funcs dcn31_clk_src_funcs = {
+ .cs_power_down = dce110_clock_source_power_down,
+ .program_pix_clk = dcn31_program_pix_clk,
+ .get_pix_clk_dividers = dcn3_get_pix_clk_dividers,
+ .get_pixel_clk_frequency_100hz = get_pixel_clk_frequency_100hz
+};
+
+static const struct clock_source_funcs dcn401_clk_src_funcs = {
+ .cs_power_down = dce110_clock_source_power_down,
+ .program_pix_clk = dcn401_program_pix_clk,
+ .get_pix_clk_dividers = dcn3_get_pix_clk_dividers,
+ .get_pixel_clk_frequency_100hz = get_pixel_clk_frequency_100hz
+};
+
/*****************************************/
/* Constructor */
/*****************************************/
@@ -1591,7 +1809,6 @@ bool dcn20_clk_src_construct(
return ret;
}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
bool dcn3_clk_src_construct(
struct dce110_clk_src *clk_src,
struct dc_context *ctx,
@@ -1607,9 +1824,38 @@ bool dcn3_clk_src_construct(
return ret;
}
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN)
+bool dcn31_clk_src_construct(
+ struct dce110_clk_src *clk_src,
+ struct dc_context *ctx,
+ struct dc_bios *bios,
+ enum clock_source_id id,
+ const struct dce110_clk_src_regs *regs,
+ const struct dce110_clk_src_shift *cs_shift,
+ const struct dce110_clk_src_mask *cs_mask)
+{
+ bool ret = dce112_clk_src_construct(clk_src, ctx, bios, id, regs, cs_shift, cs_mask);
+
+ clk_src->base.funcs = &dcn31_clk_src_funcs;
+
+ return ret;
+}
+
+bool dcn401_clk_src_construct(
+ struct dce110_clk_src *clk_src,
+ struct dc_context *ctx,
+ struct dc_bios *bios,
+ enum clock_source_id id,
+ const struct dce110_clk_src_regs *regs,
+ const struct dce110_clk_src_shift *cs_shift,
+ const struct dce110_clk_src_mask *cs_mask)
+{
+ bool ret = dce112_clk_src_construct(clk_src, ctx, bios, id, regs, cs_shift, cs_mask);
+
+ clk_src->base.funcs = &dcn401_clk_src_funcs;
+
+ return ret;
+}
bool dcn301_clk_src_construct(
struct dce110_clk_src *clk_src,
struct dc_context *ctx,
@@ -1625,4 +1871,3 @@ bool dcn301_clk_src_construct(
return ret;
}
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h
index 692fa23ca02b..94128f7a18b1 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h
@@ -100,7 +100,6 @@
SRII(PIXEL_RATE_CNTL, OTG, 2),\
SRII(PIXEL_RATE_CNTL, OTG, 3)
-#if defined(CONFIG_DRM_AMD_DC_DCN)
#define CS_COMMON_REG_LIST_DCN3_0(index, pllid) \
SRI(PIXCLK_RESYNC_CNTL, PHYPLL, pllid),\
SRII(PHASE, DP_DTO, 0),\
@@ -130,9 +129,7 @@
SRII(PIXEL_RATE_CNTL, OTG, 1),\
SRII(PIXEL_RATE_CNTL, OTG, 2),\
SRII(PIXEL_RATE_CNTL, OTG, 3)
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN)
#define CS_COMMON_REG_LIST_DCN3_02(index, pllid) \
SRI(PIXCLK_RESYNC_CNTL, PHYPLL, pllid),\
SRII(PHASE, DP_DTO, 0),\
@@ -160,14 +157,19 @@
SRII(PIXEL_RATE_CNTL, OTG, 0),\
SRII(PIXEL_RATE_CNTL, OTG, 1)
-#endif
#define CS_COMMON_MASK_SH_LIST_DCN2_0(mask_sh)\
CS_SF(DP_DTO0_PHASE, DP_DTO0_PHASE, mask_sh),\
CS_SF(DP_DTO0_MODULO, DP_DTO0_MODULO, mask_sh),\
CS_SF(PHYPLLA_PIXCLK_RESYNC_CNTL, PHYPLLA_DCCG_DEEP_COLOR_CNTL, mask_sh),\
CS_SF(OTG0_PIXEL_RATE_CNTL, DP_DTO0_ENABLE, mask_sh)
-#if defined(CONFIG_DRM_AMD_DC_DCN)
+#define CS_COMMON_MASK_SH_LIST_DCN3_1_4(mask_sh)\
+ CS_COMMON_MASK_SH_LIST_DCN2_0(mask_sh),\
+ CS_SF(OTG0_PIXEL_RATE_CNTL, PIPE0_DTO_SRC_SEL, mask_sh),
+
+#define CS_COMMON_MASK_SH_LIST_DCN3_2(mask_sh)\
+ CS_COMMON_MASK_SH_LIST_DCN2_0(mask_sh),\
+ CS_SF(OTG0_PIXEL_RATE_CNTL, PIPE0_DTO_SRC_SEL, mask_sh)
#define CS_COMMON_REG_LIST_DCN1_0(index, pllid) \
SRI(PIXCLK_RESYNC_CNTL, PHYPLL, pllid),\
@@ -190,7 +192,6 @@
CS_SF(PHYPLLA_PIXCLK_RESYNC_CNTL, PHYPLLA_DCCG_DEEP_COLOR_CNTL, mask_sh),\
CS_SF(OTG0_PIXEL_RATE_CNTL, DP_DTO0_ENABLE, mask_sh)
-#endif
#define CS_REG_FIELD_LIST(type) \
type PLL_REF_DIV_SRC; \
@@ -203,12 +204,17 @@
type DP_DTO0_MODULO; \
type DP_DTO0_ENABLE;
+#define CS_REG_FIELD_LIST_DCN32(type) \
+ type PIPE0_DTO_SRC_SEL;
+
struct dce110_clk_src_shift {
CS_REG_FIELD_LIST(uint8_t)
+ CS_REG_FIELD_LIST_DCN32(uint8_t)
};
struct dce110_clk_src_mask{
CS_REG_FIELD_LIST(uint32_t)
+ CS_REG_FIELD_LIST_DCN32(uint32_t)
};
struct dce110_clk_src_regs {
@@ -251,7 +257,7 @@ bool dce110_clk_src_construct(
struct dce110_clk_src *clk_src,
struct dc_context *ctx,
struct dc_bios *bios,
- enum clock_source_id,
+ enum clock_source_id id,
const struct dce110_clk_src_regs *regs,
const struct dce110_clk_src_shift *cs_shift,
const struct dce110_clk_src_mask *cs_mask);
@@ -274,7 +280,6 @@ bool dcn20_clk_src_construct(
const struct dce110_clk_src_shift *cs_shift,
const struct dce110_clk_src_mask *cs_mask);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
bool dcn3_clk_src_construct(
struct dce110_clk_src *clk_src,
struct dc_context *ctx,
@@ -292,8 +297,24 @@ bool dcn301_clk_src_construct(
const struct dce110_clk_src_regs *regs,
const struct dce110_clk_src_shift *cs_shift,
const struct dce110_clk_src_mask *cs_mask);
-#endif
+bool dcn31_clk_src_construct(
+ struct dce110_clk_src *clk_src,
+ struct dc_context *ctx,
+ struct dc_bios *bios,
+ enum clock_source_id id,
+ const struct dce110_clk_src_regs *regs,
+ const struct dce110_clk_src_shift *cs_shift,
+ const struct dce110_clk_src_mask *cs_mask);
+
+bool dcn401_clk_src_construct(
+ struct dce110_clk_src *clk_src,
+ struct dc_context *ctx,
+ struct dc_bios *bios,
+ enum clock_source_id id,
+ const struct dce110_clk_src_regs *regs,
+ const struct dce110_clk_src_shift *cs_shift,
+ const struct dce110_clk_src_mask *cs_mask);
/* this table is use to find *1.001 and /1.001 pixel rates from non-precise pixel rate */
struct pixel_rate_range_table_entry {
unsigned int range_min_khz;
@@ -303,10 +324,8 @@ struct pixel_rate_range_table_entry {
unsigned short div_factor;
};
-#if defined(CONFIG_DRM_AMD_DC_DCN)
extern const struct pixel_rate_range_table_entry video_optimized_pixel_rates[];
const struct pixel_rate_range_table_entry *look_up_in_video_optimized_rate_tlb(
unsigned int pixel_rate_khz);
-#endif
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
index 8cd841320ded..5f8fba45d98d 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
@@ -23,9 +23,6 @@
*
*/
-#include <linux/delay.h>
-#include <linux/slab.h>
-
#include "core_types.h"
#include "link_encoder.h"
#include "dce_dmcu.h"
@@ -70,9 +67,7 @@
//Register access policy version
#define mmMP0_SMN_C2PMSG_91 0x1609B
-#if defined(CONFIG_DRM_AMD_DC_DCN)
static const uint32_t abm_gain_stepsize = 0x0060;
-#endif
static bool dce_dmcu_init(struct dmcu *dmcu)
{
@@ -81,9 +76,9 @@ static bool dce_dmcu_init(struct dmcu *dmcu)
}
static bool dce_dmcu_load_iram(struct dmcu *dmcu,
- unsigned int start_offset,
- const char *src,
- unsigned int bytes)
+ unsigned int start_offset,
+ const char *src,
+ unsigned int bytes)
{
struct dce_dmcu *dmcu_dce = TO_DCE_DMCU(dmcu);
unsigned int count = 0;
@@ -333,7 +328,6 @@ static void dce_get_psr_wait_loop(
return;
}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
static void dcn10_get_dmcu_version(struct dmcu *dmcu)
{
struct dce_dmcu *dmcu_dce = TO_DCE_DMCU(dmcu);
@@ -592,6 +586,7 @@ static void dcn10_dmcu_set_psr_enable(struct dmcu *dmcu, bool enable, bool wait)
if (state == PSR_STATE0)
break;
}
+ /* must *not* be fsleep - this can be called from high irq levels */
udelay(500);
}
@@ -930,23 +925,23 @@ static bool dcn10_recv_edid_cea_ack(struct dmcu *dmcu, int *offset)
return false;
}
-#endif //(CONFIG_DRM_AMD_DC_DCN)
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
static void dcn10_forward_crc_window(struct dmcu *dmcu,
- struct crc_region *crc_win,
+ struct rect *rect,
struct otg_phy_mux *mux_mapping)
{
struct dce_dmcu *dmcu_dce = TO_DCE_DMCU(dmcu);
unsigned int dmcu_max_retry_on_wait_reg_ready = 801;
unsigned int dmcu_wait_reg_ready_interval = 100;
unsigned int crc_start = 0, crc_end = 0, otg_phy_mux = 0;
+ int x_start, y_start, x_end, y_end;
/* If microcontroller is not running, do nothing */
if (dmcu->dmcu_state != DMCU_RUNNING)
return;
- if (!crc_win)
+ if (!rect)
return;
/* waitDMCUReadyForCmd */
@@ -954,9 +949,14 @@ static void dcn10_forward_crc_window(struct dmcu *dmcu,
dmcu_wait_reg_ready_interval,
dmcu_max_retry_on_wait_reg_ready);
+ x_start = rect->x;
+ y_start = rect->y;
+ x_end = x_start + rect->width;
+ y_end = y_start + rect->height;
+
/* build up nitification data */
- crc_start = (((unsigned int) crc_win->x_start) << 16) | crc_win->y_start;
- crc_end = (((unsigned int) crc_win->x_end) << 16) | crc_win->y_end;
+ crc_start = (((unsigned int) x_start) << 16) | y_start;
+ crc_end = (((unsigned int) x_end) << 16) | y_end;
otg_phy_mux =
(((unsigned int) mux_mapping->otg_output_num) << 16) | mux_mapping->phy_output_num;
@@ -1021,7 +1021,6 @@ static const struct dmcu_funcs dce_funcs = {
.is_dmcu_initialized = dce_is_dmcu_initialized
};
-#if defined(CONFIG_DRM_AMD_DC_DCN)
static const struct dmcu_funcs dcn10_funcs = {
.dmcu_init = dcn10_dmcu_init,
.load_iram = dcn10_dmcu_load_iram,
@@ -1065,7 +1064,6 @@ static const struct dmcu_funcs dcn21_funcs = {
.lock_phy = dcn20_lock_phy,
.unlock_phy = dcn20_unlock_phy
};
-#endif
static void dce_dmcu_construct(
struct dce_dmcu *dmcu_dce,
@@ -1085,7 +1083,6 @@ static void dce_dmcu_construct(
dmcu_dce->dmcu_mask = dmcu_mask;
}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
static void dcn21_dmcu_construct(
struct dce_dmcu *dmcu_dce,
struct dc_context *ctx,
@@ -1097,13 +1094,10 @@ static void dcn21_dmcu_construct(
dce_dmcu_construct(dmcu_dce, ctx, regs, dmcu_shift, dmcu_mask);
- if (!IS_FPGA_MAXIMUS_DC(ctx->dce_environment)) {
- psp_version = dm_read_reg(ctx, mmMP0_SMN_C2PMSG_58);
- dmcu_dce->base.auto_load_dmcu = ((psp_version & 0x00FF00FF) > 0x00110029);
- dmcu_dce->base.psp_version = psp_version;
- }
+ psp_version = dm_read_reg(ctx, mmMP0_SMN_C2PMSG_58);
+ dmcu_dce->base.auto_load_dmcu = ((psp_version & 0x00FF00FF) > 0x00110029);
+ dmcu_dce->base.psp_version = psp_version;
}
-#endif
struct dmcu *dce_dmcu_create(
struct dc_context *ctx,
@@ -1126,14 +1120,13 @@ struct dmcu *dce_dmcu_create(
return &dmcu_dce->base;
}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
struct dmcu *dcn10_dmcu_create(
struct dc_context *ctx,
const struct dce_dmcu_registers *regs,
const struct dce_dmcu_shift *dmcu_shift,
const struct dce_dmcu_mask *dmcu_mask)
{
- struct dce_dmcu *dmcu_dce = kzalloc(sizeof(*dmcu_dce), GFP_ATOMIC);
+ struct dce_dmcu *dmcu_dce = kzalloc(sizeof(*dmcu_dce), GFP_KERNEL);
if (dmcu_dce == NULL) {
BREAK_TO_DEBUGGER();
@@ -1154,7 +1147,7 @@ struct dmcu *dcn20_dmcu_create(
const struct dce_dmcu_shift *dmcu_shift,
const struct dce_dmcu_mask *dmcu_mask)
{
- struct dce_dmcu *dmcu_dce = kzalloc(sizeof(*dmcu_dce), GFP_ATOMIC);
+ struct dce_dmcu *dmcu_dce = kzalloc(sizeof(*dmcu_dce), GFP_KERNEL);
if (dmcu_dce == NULL) {
BREAK_TO_DEBUGGER();
@@ -1175,7 +1168,7 @@ struct dmcu *dcn21_dmcu_create(
const struct dce_dmcu_shift *dmcu_shift,
const struct dce_dmcu_mask *dmcu_mask)
{
- struct dce_dmcu *dmcu_dce = kzalloc(sizeof(*dmcu_dce), GFP_ATOMIC);
+ struct dce_dmcu *dmcu_dce = kzalloc(sizeof(*dmcu_dce), GFP_KERNEL);
if (dmcu_dce == NULL) {
BREAK_TO_DEBUGGER();
@@ -1189,7 +1182,6 @@ struct dmcu *dcn21_dmcu_create(
return &dmcu_dce->base;
}
-#endif
void dce_dmcu_destroy(struct dmcu **dmcu)
{
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c.c b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c.c
index dd41736bb5c4..f5cd2392fc5f 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c.c
@@ -25,6 +25,32 @@
#include "dce_i2c.h"
#include "reg_helper.h"
+bool dce_i2c_oem_device_present(
+ struct resource_pool *pool,
+ struct ddc_service *ddc,
+ size_t slave_address
+)
+{
+ struct dc *dc = ddc->ctx->dc;
+ struct dc_bios *dcb = dc->ctx->dc_bios;
+ struct graphics_object_id id = {0};
+ struct graphics_object_i2c_info i2c_info;
+
+ if (!dc->ctx->dc_bios->fw_info.oem_i2c_present)
+ return false;
+
+ id.id = dc->ctx->dc_bios->fw_info.oem_i2c_obj_id;
+ id.enum_id = 0;
+ id.type = OBJECT_TYPE_GENERIC;
+ if (dcb->funcs->get_i2c_info(dcb, id, &i2c_info) != BP_RESULT_OK)
+ return false;
+
+ if (i2c_info.i2c_slave_address != slave_address)
+ return false;
+
+ return true;
+}
+
bool dce_i2c_submit_command(
struct resource_pool *pool,
struct ddc *ddc,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c.h b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c.h
index a171c5cd8439..535fd58de450 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c.h
@@ -30,6 +30,12 @@
#include "dce_i2c_hw.h"
#include "dce_i2c_sw.h"
+bool dce_i2c_oem_device_present(
+ struct resource_pool *pool,
+ struct ddc_service *ddc,
+ size_t slave_address
+);
+
bool dce_i2c_submit_command(
struct resource_pool *pool,
struct ddc *ddc,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c
index 6d1b01c267b7..365dd2e37aea 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c
@@ -22,9 +22,6 @@
* Authors: AMD
*
*/
-
-#include <linux/delay.h>
-
#include "resource.h"
#include "dce_i2c.h"
#include "dce_i2c_hw.h"
@@ -295,24 +292,51 @@ static void set_speed(
FN(DC_I2C_DDC1_SPEED, DC_I2C_DDC1_THRESHOLD), 2);
}
+static bool acquire_engine(struct dce_i2c_hw *dce_i2c_hw)
+{
+ uint32_t arbitrate = 0;
+
+ REG_GET(DC_I2C_ARBITRATION, DC_I2C_REG_RW_CNTL_STATUS, &arbitrate);
+ switch (arbitrate) {
+ case DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_SW:
+ return true;
+ case DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_HW:
+ return false;
+ case DC_I2C_STATUS__DC_I2C_STATUS_IDLE:
+ default:
+ break;
+ }
+
+ REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_USE_I2C_REG_REQ, true);
+ REG_GET(DC_I2C_ARBITRATION, DC_I2C_REG_RW_CNTL_STATUS, &arbitrate);
+ if (arbitrate != DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_SW)
+ return false;
+
+ return true;
+}
+
static bool setup_engine(
struct dce_i2c_hw *dce_i2c_hw)
{
+ // Deassert soft reset to unblock I2C engine registers
+ REG_UPDATE(DC_I2C_CONTROL, DC_I2C_SOFT_RESET, false);
+
uint32_t i2c_setup_limit = I2C_SETUP_TIME_LIMIT_DCE;
uint32_t reset_length = 0;
- if (dce_i2c_hw->ctx->dc->debug.enable_mem_low_power.bits.i2c) {
- if (dce_i2c_hw->regs->DIO_MEM_PWR_CTRL) {
- REG_UPDATE(DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, 0);
- REG_WAIT(DIO_MEM_PWR_STATUS, I2C_MEM_PWR_STATE, 0, 0, 5);
- }
- }
+ if (dce_i2c_hw->ctx->dc->debug.enable_mem_low_power.bits.i2c) {
+ if (dce_i2c_hw->regs->DIO_MEM_PWR_CTRL) {
+ REG_UPDATE(DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, 0);
+ REG_WAIT(DIO_MEM_PWR_STATUS, I2C_MEM_PWR_STATE, 0, 0, 5);
+ }
+ }
- /* we have checked I2c not used by DMCU, set SW use I2C REQ to 1 to indicate SW using it*/
- REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_USE_I2C_REG_REQ, 1);
+ if (dce_i2c_hw->masks->DC_I2C_DDC1_CLK_EN)
+ REG_UPDATE_N(SETUP, 1,
+ FN(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_CLK_EN), 1);
- /* we have checked I2c not used by DMCU, set SW use I2C REQ to 1 to indicate SW using it*/
- REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_USE_I2C_REG_REQ, 1);
+ if (!acquire_engine(dce_i2c_hw))
+ return false;
/*set SW requested I2c speed to default, if API calls in it will be override later*/
set_speed(dce_i2c_hw, dce_i2c_hw->ctx->dc->caps.i2c_speed_in_khz);
@@ -321,9 +345,8 @@ static bool setup_engine(
i2c_setup_limit = dce_i2c_hw->setup_limit;
/* Program pin select */
- REG_UPDATE_6(DC_I2C_CONTROL,
+ REG_UPDATE_5(DC_I2C_CONTROL,
DC_I2C_GO, 0,
- DC_I2C_SOFT_RESET, 0,
DC_I2C_SEND_RESET, 0,
DC_I2C_SW_STATUS_RESET, 1,
DC_I2C_TRANSACTION_COUNT, 0,
@@ -353,6 +376,32 @@ static bool setup_engine(
return true;
}
+/**
+ * cntl_stuck_hw_workaround - Workaround for I2C engine stuck state
+ * @dce_i2c_hw: Pointer to dce_i2c_hw structure
+ *
+ * If we boot without an HDMI display, the I2C engine does not get initialized
+ * correctly. One of its symptoms is that SW_USE_I2C does not get cleared after
+ * acquire. After setting SW_DONE_USING_I2C on release, the engine gets
+ * immediately reacquired by SW, preventing DMUB from using it.
+ *
+ * This function checks the I2C arbitration status and applies a release
+ * workaround if necessary.
+ */
+static void cntl_stuck_hw_workaround(struct dce_i2c_hw *dce_i2c_hw)
+{
+ uint32_t arbitrate = 0;
+
+ REG_GET(DC_I2C_ARBITRATION, DC_I2C_REG_RW_CNTL_STATUS, &arbitrate);
+ if (arbitrate != DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_SW)
+ return;
+
+ // Still acquired after release, release again as a workaround
+ REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, true);
+ REG_GET(DC_I2C_ARBITRATION, DC_I2C_REG_RW_CNTL_STATUS, &arbitrate);
+ ASSERT(arbitrate != DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_SW);
+}
+
static void release_engine(
struct dce_i2c_hw *dce_i2c_hw)
{
@@ -380,9 +429,9 @@ static void release_engine(
/*for HW HDCP Ri polling failure w/a test*/
set_speed(dce_i2c_hw, dce_i2c_hw->ctx->dc->caps.i2c_speed_in_khz_hdcp);
- /* Release I2C after reset, so HW or DMCU could use it */
- REG_UPDATE_2(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, 1,
- DC_I2C_SW_USE_I2C_REG_REQ, 0);
+ // Release I2C engine so it can be used by HW or DMCU, automatically clears SW_USE_I2C
+ REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, true);
+ cntl_stuck_hw_workaround(dce_i2c_hw);
if (dce_i2c_hw->ctx->dc->debug.enable_mem_low_power.bits.i2c) {
if (dce_i2c_hw->regs->DIO_MEM_PWR_CTRL)
@@ -442,10 +491,9 @@ struct dce_i2c_hw *acquire_i2c_hw_engine(
return dce_i2c_hw;
}
-static enum i2c_channel_operation_result dce_i2c_hw_engine_wait_on_operation_result(
- struct dce_i2c_hw *dce_i2c_hw,
- uint32_t timeout,
- enum i2c_channel_operation_result expected_result)
+static enum i2c_channel_operation_result dce_i2c_hw_engine_wait_on_operation_result(struct dce_i2c_hw *dce_i2c_hw,
+ uint32_t timeout,
+ enum i2c_channel_operation_result expected_result)
{
enum i2c_channel_operation_result result;
uint32_t i = 0;
@@ -509,11 +557,10 @@ static uint32_t get_transaction_timeout_hw(
return period_timeout * num_of_clock_stretches;
}
-static bool dce_i2c_hw_engine_submit_payload(
- struct dce_i2c_hw *dce_i2c_hw,
- struct i2c_payload *payload,
- bool middle_of_transaction,
- uint32_t speed)
+static bool dce_i2c_hw_engine_submit_payload(struct dce_i2c_hw *dce_i2c_hw,
+ struct i2c_payload *payload,
+ bool middle_of_transaction,
+ uint32_t speed)
{
struct i2c_request_transaction_data request;
@@ -544,7 +591,7 @@ static bool dce_i2c_hw_engine_submit_payload(
DCE_I2C_TRANSACTION_ACTION_I2C_WRITE;
- request.address = (uint8_t) ((payload->address << 1) | !payload->write);
+ request.address = (uint8_t) ((payload->address << 1) | (payload->write ? 0 : 1));
request.length = payload->length;
request.data = payload->data;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h
index 3f45ecd189a2..a9a16f645994 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h
@@ -188,6 +188,7 @@ struct dce_i2c_shift {
uint8_t DC_I2C_REG_RW_CNTL_STATUS;
uint8_t I2C_LIGHT_SLEEP_FORCE;
uint8_t I2C_MEM_PWR_STATE;
+ uint8_t DC_I2C_DDC1_CLK_EN;
};
struct dce_i2c_mask {
@@ -232,6 +233,7 @@ struct dce_i2c_mask {
uint32_t DC_I2C_REG_RW_CNTL_STATUS;
uint32_t I2C_LIGHT_SLEEP_FORCE;
uint32_t I2C_MEM_PWR_STATE;
+ uint32_t DC_I2C_DDC1_CLK_EN;
};
#define I2C_COMMON_MASK_SH_LIST_DCN2(mask_sh)\
@@ -243,6 +245,14 @@ struct dce_i2c_mask {
I2C_SF(DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh),\
I2C_SF(DIO_MEM_PWR_STATUS, I2C_MEM_PWR_STATE, mask_sh)
+#define I2C_COMMON_MASK_SH_LIST_DCN35(mask_sh)\
+ I2C_COMMON_MASK_SH_LIST_DCN30(mask_sh),\
+ I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_CLK_EN, mask_sh)
+
+#define I2C_COMMON_MASK_SH_LIST_DCN401(mask_sh)\
+ I2C_COMMON_MASK_SH_LIST_DCN30(mask_sh),\
+ I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_CLK_EN, mask_sh)
+
struct dce_i2c_registers {
uint32_t SETUP;
uint32_t SPEED;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c
index 6846afd83701..2d73b94c515c 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c
@@ -23,8 +23,6 @@
*
*/
-#include <linux/delay.h>
-
#include "dce_i2c.h"
#include "dce_i2c_sw.h"
#include "include/gpio_service_interface.h"
@@ -369,6 +367,7 @@ static bool dce_i2c_sw_engine_acquire_engine(
return true;
}
+
bool dce_i2c_engine_acquire_sw(
struct dce_i2c_sw *dce_i2c_sw,
struct ddc *ddc_handle)
@@ -394,12 +393,8 @@ bool dce_i2c_engine_acquire_sw(
return result;
}
-
-
-
-static void dce_i2c_sw_engine_submit_channel_request(
- struct dce_i2c_sw *engine,
- struct i2c_request_transaction_data *req)
+static void dce_i2c_sw_engine_submit_channel_request(struct dce_i2c_sw *engine,
+ struct i2c_request_transaction_data *req)
{
struct ddc *ddc = engine->ddc;
uint16_t clock_delay_div_4 = engine->clock_delay >> 2;
@@ -441,10 +436,9 @@ static void dce_i2c_sw_engine_submit_channel_request(
I2C_CHANNEL_OPERATION_FAILED;
}
-static bool dce_i2c_sw_engine_submit_payload(
- struct dce_i2c_sw *engine,
- struct i2c_payload *payload,
- bool middle_of_transaction)
+static bool dce_i2c_sw_engine_submit_payload(struct dce_i2c_sw *engine,
+ struct i2c_payload *payload,
+ bool middle_of_transaction)
{
struct i2c_request_transaction_data request;
@@ -457,7 +451,7 @@ static bool dce_i2c_sw_engine_submit_payload(
DCE_I2C_TRANSACTION_ACTION_I2C_WRITE_MOT :
DCE_I2C_TRANSACTION_ACTION_I2C_WRITE;
- request.address = (uint8_t) ((payload->address << 1) | !payload->write);
+ request.address = (uint8_t) ((payload->address << 1) | (payload->write ? 0 : 1));
request.length = payload->length;
request.data = payload->data;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_ipp.c b/drivers/gpu/drm/amd/display/dc/dce/dce_ipp.c
index 80569a2734eb..34bff9aef66c 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_ipp.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_ipp.c
@@ -23,8 +23,6 @@
*
*/
-#include <linux/slab.h>
-
#include "dce_ipp.h"
#include "reg_helper.h"
#include "dm_services.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
index 1e77ffee71b3..87dbb8d7ed27 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
@@ -23,16 +23,12 @@
*
*/
-#include <linux/delay.h>
-#include <linux/slab.h>
-
#include "reg_helper.h"
#include "core_types.h"
#include "link_encoder.h"
#include "dce_link_encoder.h"
#include "stream_encoder.h"
-#include "i2caux_interface.h"
#include "dc_bios_types.h"
#include "gpio_service_interface.h"
@@ -306,6 +302,10 @@ static void setup_panel_mode(
if (ctx->dc->caps.psp_setup_panel_mode)
return;
+ /* The code below is only applicable to encoders with a digital transmitter. */
+ if (enc110->base.transmitter == TRANSMITTER_UNKNOWN)
+ return;
+
ASSERT(REG(DP_DPHY_INTERNAL_CTRL));
value = REG_READ(DP_DPHY_INTERNAL_CTRL);
@@ -788,8 +788,9 @@ static bool dce110_link_encoder_validate_hdmi_output(
crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR420)
return false;
- if (!enc110->base.features.flags.bits.HDMI_6GB_EN &&
- adjusted_pix_clk_khz >= 300000)
+ if ((!enc110->base.features.flags.bits.HDMI_6GB_EN ||
+ enc110->base.ctx->dc->debug.hdmi20_disable) &&
+ adjusted_pix_clk_khz >= 300000)
return false;
if (enc110->base.ctx->dc->debug.hdmi20_disable &&
crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR420)
@@ -807,6 +808,33 @@ bool dce110_link_encoder_validate_dp_output(
return true;
}
+static bool dce110_link_encoder_validate_rgb_output(
+ const struct dce110_link_encoder *enc110,
+ const struct dc_crtc_timing *crtc_timing)
+{
+ /* When the VBIOS doesn't specify any limits, use 400 MHz.
+ * The value comes from amdgpu_atombios_get_clock_info.
+ */
+ uint32_t max_pixel_clock_khz = 400000;
+
+ if (enc110->base.ctx->dc_bios->fw_info_valid &&
+ enc110->base.ctx->dc_bios->fw_info.max_pixel_clock) {
+ max_pixel_clock_khz =
+ enc110->base.ctx->dc_bios->fw_info.max_pixel_clock;
+ }
+
+ if (crtc_timing->pix_clk_100hz > max_pixel_clock_khz * 10)
+ return false;
+
+ if (crtc_timing->display_color_depth != COLOR_DEPTH_888)
+ return false;
+
+ if (crtc_timing->pixel_encoding != PIXEL_ENCODING_RGB)
+ return false;
+
+ return true;
+}
+
void dce110_link_encoder_construct(
struct dce110_link_encoder *enc110,
const struct encoder_init_data *init_data,
@@ -827,6 +855,7 @@ void dce110_link_encoder_construct(
enc110->base.connector = init_data->connector;
enc110->base.preferred_engine = ENGINE_ID_UNKNOWN;
+ enc110->base.analog_engine = init_data->analog_engine;
enc110->base.features = *enc_features;
@@ -850,6 +879,11 @@ void dce110_link_encoder_construct(
SIGNAL_TYPE_EDP |
SIGNAL_TYPE_HDMI_TYPE_A;
+ if ((enc110->base.connector.id == CONNECTOR_ID_DUAL_LINK_DVII ||
+ enc110->base.connector.id == CONNECTOR_ID_SINGLE_LINK_DVII) &&
+ enc110->base.analog_engine != ENGINE_ID_UNKNOWN)
+ enc110->base.output_signals |= SIGNAL_TYPE_RGB;
+
/* For DCE 8.0 and 8.1, by design, UNIPHY is hardwired to DIG_BE.
* SW always assign DIG_FE 1:1 mapped to DIG_FE for non-MST UNIPHY.
* SW assign DIG_FE to non-MST UNIPHY first and MST last. So prefer
@@ -888,6 +922,13 @@ void dce110_link_encoder_construct(
enc110->base.preferred_engine = ENGINE_ID_DIGG;
break;
default:
+ if (init_data->analog_engine != ENGINE_ID_UNKNOWN) {
+ /* The connector is analog-only, ie. VGA */
+ enc110->base.preferred_engine = init_data->analog_engine;
+ enc110->base.output_signals = SIGNAL_TYPE_RGB;
+ enc110->base.transmitter = TRANSMITTER_UNKNOWN;
+ break;
+ }
ASSERT_CRITICAL(false);
enc110->base.preferred_engine = ENGINE_ID_UNKNOWN;
}
@@ -899,13 +940,13 @@ void dce110_link_encoder_construct(
enc110->base.id, &bp_cap_info);
/* Override features with DCE-specific values */
- if (BP_RESULT_OK == result) {
+ if (result == BP_RESULT_OK) {
enc110->base.features.flags.bits.IS_HBR2_CAPABLE =
bp_cap_info.DP_HBR2_EN;
enc110->base.features.flags.bits.IS_HBR3_CAPABLE =
bp_cap_info.DP_HBR3_EN;
enc110->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN;
- } else {
+ } else if (result != BP_RESULT_NORECORD) {
DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n",
__func__,
result);
@@ -942,11 +983,13 @@ bool dce110_link_encoder_validate_output_with_stream(
is_valid = dce110_link_encoder_validate_dp_output(
enc110, &stream->timing);
break;
+ case SIGNAL_TYPE_RGB:
+ is_valid = dce110_link_encoder_validate_rgb_output(
+ enc110, &stream->timing);
+ break;
case SIGNAL_TYPE_EDP:
case SIGNAL_TYPE_LVDS:
- is_valid =
- (stream->timing.
- pixel_encoding == PIXEL_ENCODING_RGB) ? true : false;
+ is_valid = stream->timing.pixel_encoding == PIXEL_ENCODING_RGB;
break;
case SIGNAL_TYPE_VIRTUAL:
is_valid = true;
@@ -974,6 +1017,10 @@ void dce110_link_encoder_hw_init(
cntl.coherent = false;
cntl.hpd_sel = enc110->base.hpd_source;
+ /* The code below is only applicable to encoders with a digital transmitter. */
+ if (enc110->base.transmitter == TRANSMITTER_UNKNOWN)
+ return;
+
if (enc110->base.connector.id == CONNECTOR_ID_EDP)
cntl.signal = SIGNAL_TYPE_EDP;
@@ -1039,6 +1086,8 @@ void dce110_link_encoder_setup(
/* DP MST */
REG_UPDATE(DIG_BE_CNTL, DIG_MODE, 5);
break;
+ case SIGNAL_TYPE_RGB:
+ break;
default:
ASSERT_CRITICAL(false);
/* invalid mode ! */
@@ -1287,6 +1336,24 @@ void dce110_link_encoder_disable_output(
struct bp_transmitter_control cntl = { 0 };
enum bp_result result;
+ switch (enc->analog_engine) {
+ case ENGINE_ID_DACA:
+ REG_UPDATE(DAC_ENABLE, DAC_ENABLE, 0);
+ break;
+ case ENGINE_ID_DACB:
+ /* DACB doesn't seem to be present on DCE6+,
+ * although there are references to it in the register file.
+ */
+ DC_LOG_ERROR("%s DACB is unsupported\n", __func__);
+ break;
+ default:
+ break;
+ }
+
+ /* The code below only applies to connectors that support digital signals. */
+ if (enc->transmitter == TRANSMITTER_UNKNOWN)
+ return;
+
if (!dce110_is_dig_enabled(enc)) {
/* OF_SKIP_POWER_DOWN_INACTIVE_ENCODER */
return;
@@ -1324,7 +1391,8 @@ void dce110_link_encoder_disable_output(
void dce110_link_encoder_dp_set_lane_settings(
struct link_encoder *enc,
- const struct link_training_settings *link_settings)
+ const struct dc_link_settings *link_settings,
+ const struct dc_lane_settings lane_settings[LANE_COUNT_DP_MAX])
{
struct dce110_link_encoder *enc110 = TO_DCE110_LINK_ENC(enc);
union dpcd_training_lane_set training_lane_set = { { 0 } };
@@ -1339,33 +1407,36 @@ void dce110_link_encoder_dp_set_lane_settings(
cntl.action = TRANSMITTER_CONTROL_SET_VOLTAGE_AND_PREEMPASIS;
cntl.transmitter = enc110->base.transmitter;
cntl.connector_obj_id = enc110->base.connector;
- cntl.lanes_number = link_settings->link_settings.lane_count;
+ cntl.lanes_number = link_settings->lane_count;
cntl.hpd_sel = enc110->base.hpd_source;
- cntl.pixel_clock = link_settings->link_settings.link_rate *
+ cntl.pixel_clock = link_settings->link_rate *
LINK_RATE_REF_FREQ_IN_KHZ;
- for (lane = 0; lane < link_settings->link_settings.lane_count; lane++) {
+ for (lane = 0; lane < link_settings->lane_count; lane++) {
/* translate lane settings */
training_lane_set.bits.VOLTAGE_SWING_SET =
- link_settings->lane_settings[lane].VOLTAGE_SWING;
+ lane_settings[lane].VOLTAGE_SWING;
training_lane_set.bits.PRE_EMPHASIS_SET =
- link_settings->lane_settings[lane].PRE_EMPHASIS;
+ lane_settings[lane].PRE_EMPHASIS;
/* post cursor 2 setting only applies to HBR2 link rate */
- if (link_settings->link_settings.link_rate == LINK_RATE_HIGH2) {
+ if (link_settings->link_rate == LINK_RATE_HIGH2) {
/* this is passed to VBIOS
* to program post cursor 2 level */
training_lane_set.bits.POST_CURSOR2_SET =
- link_settings->lane_settings[lane].POST_CURSOR2;
+ lane_settings[lane].POST_CURSOR2;
}
cntl.lane_select = lane;
cntl.lane_settings = training_lane_set.raw;
/* call VBIOS table to set voltage swing and pre-emphasis */
- link_transmitter_control(enc110, &cntl);
+ if (link_transmitter_control(enc110, &cntl) != BP_RESULT_OK) {
+ DC_LOG_ERROR("%s: Failed to execute VBIOS command table!\n", __func__);
+ BREAK_TO_DEBUGGER();
+ }
}
}
@@ -1647,7 +1718,7 @@ void dce110_link_encoder_enable_hpd(struct link_encoder *enc)
uint32_t hpd_enable = 0;
uint32_t value = dm_read_reg(ctx, addr);
- get_reg_field_value(hpd_enable, DC_HPD_CONTROL, DC_HPD_EN);
+ hpd_enable = get_reg_field_value(hpd_enable, DC_HPD_CONTROL, DC_HPD_EN);
if (hpd_enable == 0)
set_reg_field_value(value, 1, DC_HPD_CONTROL, DC_HPD_EN);
@@ -1727,6 +1798,7 @@ void dce60_link_encoder_construct(
enc110->base.connector = init_data->connector;
enc110->base.preferred_engine = ENGINE_ID_UNKNOWN;
+ enc110->base.analog_engine = init_data->analog_engine;
enc110->base.features = *enc_features;
@@ -1750,6 +1822,11 @@ void dce60_link_encoder_construct(
SIGNAL_TYPE_EDP |
SIGNAL_TYPE_HDMI_TYPE_A;
+ if ((enc110->base.connector.id == CONNECTOR_ID_DUAL_LINK_DVII ||
+ enc110->base.connector.id == CONNECTOR_ID_SINGLE_LINK_DVII) &&
+ enc110->base.analog_engine != ENGINE_ID_UNKNOWN)
+ enc110->base.output_signals |= SIGNAL_TYPE_RGB;
+
/* For DCE 8.0 and 8.1, by design, UNIPHY is hardwired to DIG_BE.
* SW always assign DIG_FE 1:1 mapped to DIG_FE for non-MST UNIPHY.
* SW assign DIG_FE to non-MST UNIPHY first and MST last. So prefer
@@ -1788,6 +1865,13 @@ void dce60_link_encoder_construct(
enc110->base.preferred_engine = ENGINE_ID_DIGG;
break;
default:
+ if (init_data->analog_engine != ENGINE_ID_UNKNOWN) {
+ /* The connector is analog-only, ie. VGA */
+ enc110->base.preferred_engine = init_data->analog_engine;
+ enc110->base.output_signals = SIGNAL_TYPE_RGB;
+ enc110->base.transmitter = TRANSMITTER_UNKNOWN;
+ break;
+ }
ASSERT_CRITICAL(false);
enc110->base.preferred_engine = ENGINE_ID_UNKNOWN;
}
@@ -1799,13 +1883,13 @@ void dce60_link_encoder_construct(
enc110->base.id, &bp_cap_info);
/* Override features with DCE-specific values */
- if (BP_RESULT_OK == result) {
+ if (result == BP_RESULT_OK) {
enc110->base.features.flags.bits.IS_HBR2_CAPABLE =
bp_cap_info.DP_HBR2_EN;
enc110->base.features.flags.bits.IS_HBR3_CAPABLE =
bp_cap_info.DP_HBR3_EN;
enc110->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN;
- } else {
+ } else if (result != BP_RESULT_NORECORD) {
DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n",
__func__,
result);
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h
index fc6ade824c23..c58b69bc319b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h
@@ -101,18 +101,21 @@
SRI(DP_SEC_CNTL, DP, id), \
SRI(DP_VID_STREAM_CNTL, DP, id), \
SRI(DP_DPHY_FAST_TRAINING, DP, id), \
- SRI(DP_SEC_CNTL1, DP, id)
+ SRI(DP_SEC_CNTL1, DP, id), \
+ SR(DAC_ENABLE)
#endif
#define LE_DCE80_REG_LIST(id)\
SRI(DP_DPHY_INTERNAL_CTRL, DP, id), \
- LE_COMMON_REG_LIST_BASE(id)
+ LE_COMMON_REG_LIST_BASE(id), \
+ SR(DAC_ENABLE)
#define LE_DCE100_REG_LIST(id)\
LE_COMMON_REG_LIST_BASE(id), \
SRI(DP_DPHY_BS_SR_SWAP_CNTL, DP, id), \
SRI(DP_DPHY_INTERNAL_CTRL, DP, id), \
- SR(DCI_MEM_PWR_STATUS)
+ SR(DCI_MEM_PWR_STATUS), \
+ SR(DAC_ENABLE)
#define LE_DCE110_REG_LIST(id)\
LE_COMMON_REG_LIST_BASE(id), \
@@ -181,6 +184,9 @@ struct dce110_link_enc_registers {
uint32_t DP_DPHY_BS_SR_SWAP_CNTL;
uint32_t DP_DPHY_HBR2_PATTERN_CONTROL;
uint32_t DP_SEC_CNTL1;
+
+ /* DAC registers */
+ uint32_t DAC_ENABLE;
};
struct dce110_link_encoder {
@@ -215,10 +221,6 @@ bool dce110_link_encoder_validate_dvi_output(
enum signal_type signal,
const struct dc_crtc_timing *crtc_timing);
-bool dce110_link_encoder_validate_rgb_output(
- const struct dce110_link_encoder *enc110,
- const struct dc_crtc_timing *crtc_timing);
-
bool dce110_link_encoder_validate_dp_output(
const struct dce110_link_encoder *enc110,
const struct dc_crtc_timing *crtc_timing);
@@ -279,7 +281,8 @@ void dce110_link_encoder_disable_output(
/* set DP lane settings */
void dce110_link_encoder_dp_set_lane_settings(
struct link_encoder *enc,
- const struct link_training_settings *link_settings);
+ const struct dc_link_settings *link_settings,
+ const struct dc_lane_settings lane_settings[LANE_COUNT_DP_MAX]);
void dce110_link_encoder_dp_set_phy_pattern(
struct link_encoder *enc,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c
index 4cdd4dacb761..1c2009e38aa1 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c
@@ -98,7 +98,7 @@ static enum mi_bits_per_pixel get_mi_bpp(
}
static enum mi_tiling_format get_mi_tiling(
- union dc_tiling_info *tiling_info)
+ struct dc_tiling_info *tiling_info)
{
switch (tiling_info->gfx8.array_mode) {
case DC_ARRAY_1D_TILED_THIN1:
@@ -133,7 +133,7 @@ static bool is_vert_scan(enum dc_rotation_angle rotation)
static void dce_mi_program_pte_vm(
struct mem_input *mi,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
enum dc_rotation_angle rotation)
{
struct dce_mem_input *dce_mi = TO_DCE_MEM_INPUT(mi);
@@ -430,7 +430,7 @@ static void dce120_mi_program_display_marks(struct mem_input *mi,
}
static void program_tiling(
- struct dce_mem_input *dce_mi, const union dc_tiling_info *info)
+ struct dce_mem_input *dce_mi, const struct dc_tiling_info *info)
{
if (dce_mi->masks->GRPH_SW_MODE) { /* GFX9 */
REG_UPDATE_6(GRPH_CONTROL,
@@ -481,7 +481,6 @@ static void program_tiling(
}
}
-
static void program_size_and_rotation(
struct dce_mem_input *dce_mi,
enum dc_rotation_angle rotation,
@@ -627,10 +626,31 @@ static void program_grph_pixel_format(
GRPH_PRESCALE_B_SIGN, sign);
}
+static void dce_mi_clear_tiling(
+ struct mem_input *mi)
+{
+ struct dce_mem_input *dce_mi = TO_DCE_MEM_INPUT(mi);
+
+ if (dce_mi->masks->GRPH_SW_MODE) { /* GFX9 */
+ REG_UPDATE(GRPH_CONTROL,
+ GRPH_SW_MODE, DC_SW_LINEAR);
+ }
+
+ if (dce_mi->masks->GRPH_MICRO_TILE_MODE) { /* GFX8 */
+ REG_UPDATE(GRPH_CONTROL,
+ GRPH_ARRAY_MODE, DC_SW_LINEAR);
+ }
+
+ if (dce_mi->masks->GRPH_ARRAY_MODE) { /* GFX6 but reuses gfx8 struct */
+ REG_UPDATE(GRPH_CONTROL,
+ GRPH_ARRAY_MODE, DC_SW_LINEAR);
+ }
+}
+
static void dce_mi_program_surface_config(
struct mem_input *mi,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
@@ -642,8 +662,7 @@ static void dce_mi_program_surface_config(
program_tiling(dce_mi, tiling_info);
program_size_and_rotation(dce_mi, rotation, plane_size);
- if (format >= SURFACE_PIXEL_FORMAT_GRPH_BEGIN &&
- format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN)
+ if (format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN)
program_grph_pixel_format(dce_mi, format);
}
@@ -651,7 +670,7 @@ static void dce_mi_program_surface_config(
static void dce60_mi_program_surface_config(
struct mem_input *mi,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation, /* not used in DCE6 */
struct dc_plane_dcc_param *dcc,
@@ -663,8 +682,7 @@ static void dce60_mi_program_surface_config(
program_tiling(dce_mi, tiling_info);
dce60_program_size(dce_mi, rotation, plane_size);
- if (format >= SURFACE_PIXEL_FORMAT_GRPH_BEGIN &&
- format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN)
+ if (format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN)
program_grph_pixel_format(dce_mi, format);
}
#endif
@@ -886,7 +904,8 @@ static const struct mem_input_funcs dce_mi_funcs = {
.mem_input_program_pte_vm = dce_mi_program_pte_vm,
.mem_input_program_surface_config =
dce_mi_program_surface_config,
- .mem_input_is_flip_pending = dce_mi_is_flip_pending
+ .mem_input_is_flip_pending = dce_mi_is_flip_pending,
+ .mem_input_clear_tiling = dce_mi_clear_tiling,
};
#if defined(CONFIG_DRM_AMD_DC_SI)
@@ -899,7 +918,8 @@ static const struct mem_input_funcs dce60_mi_funcs = {
.mem_input_program_pte_vm = dce_mi_program_pte_vm,
.mem_input_program_surface_config =
dce60_mi_program_surface_config,
- .mem_input_is_flip_pending = dce_mi_is_flip_pending
+ .mem_input_is_flip_pending = dce_mi_is_flip_pending,
+ .mem_input_clear_tiling = dce_mi_clear_tiling,
};
#endif
@@ -912,7 +932,8 @@ static const struct mem_input_funcs dce112_mi_funcs = {
.mem_input_program_pte_vm = dce_mi_program_pte_vm,
.mem_input_program_surface_config =
dce_mi_program_surface_config,
- .mem_input_is_flip_pending = dce_mi_is_flip_pending
+ .mem_input_is_flip_pending = dce_mi_is_flip_pending,
+ .mem_input_clear_tiling = dce_mi_clear_tiling,
};
static const struct mem_input_funcs dce120_mi_funcs = {
@@ -924,7 +945,8 @@ static const struct mem_input_funcs dce120_mi_funcs = {
.mem_input_program_pte_vm = dce_mi_program_pte_vm,
.mem_input_program_surface_config =
dce_mi_program_surface_config,
- .mem_input_is_flip_pending = dce_mi_is_flip_pending
+ .mem_input_is_flip_pending = dce_mi_is_flip_pending,
+ .mem_input_clear_tiling = dce_mi_clear_tiling,
};
void dce_mem_input_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h
index f98400efdd9b..e34e445a4013 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h
@@ -181,6 +181,7 @@ struct dce_mem_input_registers {
SFB(blk, GRPH_ENABLE, GRPH_ENABLE, mask_sh),\
SFB(blk, GRPH_CONTROL, GRPH_DEPTH, mask_sh),\
SFB(blk, GRPH_CONTROL, GRPH_FORMAT, mask_sh),\
+ SFB(blk, GRPH_CONTROL, GRPH_NUM_BANKS, mask_sh),\
SFB(blk, GRPH_X_START, GRPH_X_START, mask_sh),\
SFB(blk, GRPH_Y_START, GRPH_Y_START, mask_sh),\
SFB(blk, GRPH_X_END, GRPH_X_END, mask_sh),\
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_opp.c b/drivers/gpu/drm/amd/display/dc/dce/dce_opp.c
index 895b015b02e8..f342da5a5e50 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_opp.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_opp.c
@@ -23,8 +23,6 @@
*
*/
-#include <linux/slab.h>
-
#include "dm_services.h"
#include "basics/conversion.h"
@@ -577,7 +575,6 @@ static void dce60_opp_program_clamping_and_pixel_encoding(
}
#endif
-
static void program_formatter_420_memory(struct output_pixel_processor *opp)
{
struct dce110_opp *opp110 = TO_DCE110_OPP(opp);
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_opp.h b/drivers/gpu/drm/amd/display/dc/dce/dce_opp.h
index bf1ffc3629c7..3d9be87aae45 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_opp.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_opp.h
@@ -111,6 +111,7 @@ enum dce110_opp_reg_type {
OPP_SF(FMT_DITHER_RAND_R_SEED, FMT_RAND_R_SEED, mask_sh),\
OPP_SF(FMT_DITHER_RAND_G_SEED, FMT_RAND_G_SEED, mask_sh),\
OPP_SF(FMT_DITHER_RAND_B_SEED, FMT_RAND_B_SEED, mask_sh),\
+ OPP_SF(FMT_BIT_DEPTH_CONTROL, FMT_TEMPORAL_DITHER_EN, mask_sh),\
OPP_SF(FMT_BIT_DEPTH_CONTROL, FMT_TEMPORAL_DITHER_RESET, mask_sh),\
OPP_SF(FMT_BIT_DEPTH_CONTROL, FMT_TEMPORAL_DITHER_OFFSET, mask_sh),\
OPP_SF(FMT_BIT_DEPTH_CONTROL, FMT_TEMPORAL_DITHER_DEPTH, mask_sh),\
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c
index e8570060d007..de31fb1b6819 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c
@@ -218,7 +218,7 @@ static void dce_driver_set_backlight(struct panel_cntl *panel_cntl,
* contain integer component, lower 16 bits contain fractional component
* of active duty cycle e.g. 0x21BDC0 = 0xEFF0 * 0x24
*/
- active_duty_cycle = backlight_pwm_u16_16 * masked_pwm_period;
+ active_duty_cycle = backlight_pwm_u16_16 * (uint64_t)masked_pwm_period;
/* 1.3 Calculate 16 bit active duty cycle from integer and fractional
* components shift by bitCount then mask 16 bits and add rounding bit
@@ -290,4 +290,5 @@ void dce_panel_cntl_construct(
dce_panel_cntl->base.funcs = &dce_link_panel_cntl_funcs;
dce_panel_cntl->base.ctx = init_data->ctx;
dce_panel_cntl->base.inst = init_data->inst;
+ dce_panel_cntl->base.pwrseq_inst = 0;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
index 779bc92a2968..574618d5d4a4 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
@@ -23,8 +23,6 @@
*
*/
-#include <linux/delay.h>
-
#include "dc_bios_types.h"
#include "dce_stream_encoder.h"
#include "reg_helper.h"
@@ -33,7 +31,6 @@
#define DC_LOGGER \
enc110->base.ctx->logger
-
#define REG(reg)\
(enc110->regs->reg)
@@ -136,7 +133,7 @@ static void dce110_update_generic_info_packet(
AFMT_GENERIC0_UPDATE, (packet_index == 0),
AFMT_GENERIC2_UPDATE, (packet_index == 2));
}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
+
if (REG(AFMT_VBI_PACKET_CONTROL1)) {
switch (packet_index) {
case 0:
@@ -175,7 +172,6 @@ static void dce110_update_generic_info_packet(
break;
}
}
-#endif
}
static void dce110_update_hdmi_info_packet(
@@ -230,7 +226,6 @@ static void dce110_update_hdmi_info_packet(
HDMI_GENERIC1_SEND, send,
HDMI_GENERIC1_LINE, line);
break;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
case 4:
if (REG(HDMI_GENERIC_PACKET_CONTROL2))
REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL2,
@@ -259,7 +254,6 @@ static void dce110_update_hdmi_info_packet(
HDMI_GENERIC1_SEND, send,
HDMI_GENERIC1_LINE, line);
break;
-#endif
default:
/* invalid HW packet index */
DC_LOG_WARNING(
@@ -277,18 +271,15 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
bool use_vsc_sdp_for_colorimetry,
uint32_t enable_sdp_splitting)
{
-#if defined(CONFIG_DRM_AMD_DC_DCN)
uint32_t h_active_start;
uint32_t v_active_start;
uint32_t misc0 = 0;
uint32_t misc1 = 0;
uint32_t h_blank;
uint32_t h_back_porch;
- uint8_t synchronous_clock = 0; /* asynchronous mode */
uint8_t colorimetry_bpc;
uint8_t dynamic_range_rgb = 0; /*full range*/
uint8_t dynamic_range_ycbcr = 1; /*bt709*/
-#endif
struct dce110_stream_encoder *enc110 = DCE110STRENC_FROM_STRENC(enc);
struct dc_crtc_timing hw_crtc_timing = *crtc_timing;
@@ -329,10 +320,8 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
if (enc110->se_mask->DP_VID_M_DOUBLE_VALUE_EN)
REG_UPDATE(DP_VID_TIMING, DP_VID_M_DOUBLE_VALUE_EN, 1);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
if (enc110->se_mask->DP_VID_N_MUL)
REG_UPDATE(DP_VID_TIMING, DP_VID_N_MUL, 1);
-#endif
break;
default:
REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING,
@@ -340,10 +329,8 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
break;
}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
if (REG(DP_MSA_MISC))
misc1 = REG_READ(DP_MSA_MISC);
-#endif
/* set color depth */
@@ -374,7 +361,6 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
/* set dynamic range and YCbCr range */
-#if defined(CONFIG_DRM_AMD_DC_DCN)
switch (hw_crtc_timing.display_color_depth) {
case COLOR_DEPTH_666:
colorimetry_bpc = 0;
@@ -393,7 +379,6 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
break;
}
- misc0 = misc0 | synchronous_clock;
misc0 = colorimetry_bpc << 5;
if (REG(DP_MSA_TIMING_PARAM1)) {
@@ -433,7 +418,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
dynamic_range_rgb = 1; /*limited range*/
break;
case COLOR_SPACE_2020_RGB_FULLRANGE:
- case COLOR_SPACE_2020_YCBCR:
+ case COLOR_SPACE_2020_YCBCR_LIMITED:
case COLOR_SPACE_XR_RGB:
case COLOR_SPACE_MSREF_SCRGB:
case COLOR_SPACE_ADOBERGB:
@@ -445,6 +430,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
case COLOR_SPACE_APPCTRL:
case COLOR_SPACE_CUSTOMPOINTS:
case COLOR_SPACE_UNKNOWN:
+ default:
/* do nothing */
break;
}
@@ -454,7 +440,6 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
DP_DYN_RANGE, dynamic_range_rgb,
DP_YCBCR_RANGE, dynamic_range_ycbcr);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
if (REG(DP_MSA_COLORIMETRY))
REG_SET(DP_MSA_COLORIMETRY, 0, DP_MSA_MISC0, misc0);
@@ -468,7 +453,6 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
REG_SET_2(DP_MSA_TIMING_PARAM1, 0,
DP_MSA_HTOTAL, hw_crtc_timing.h_total,
DP_MSA_VTOTAL, hw_crtc_timing.v_total);
-#endif
/* calcuate from vesa timing parameters
* h_active_start related to leading edge of sync
@@ -489,7 +473,6 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
hw_crtc_timing.v_front_porch;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
/* start at begining of left border */
if (REG(DP_MSA_TIMING_PARAM2))
REG_SET_2(DP_MSA_TIMING_PARAM2, 0,
@@ -514,9 +497,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
hw_crtc_timing.h_addressable + hw_crtc_timing.h_border_right,
DP_MSA_VHEIGHT, hw_crtc_timing.v_border_top +
hw_crtc_timing.v_addressable + hw_crtc_timing.v_border_bottom);
-#endif
}
-#endif
}
static void dce110_stream_encoder_set_stream_attribute_helper(
@@ -650,6 +631,8 @@ static void dce110_stream_encoder_hdmi_set_stream_attribute(
HDMI_GC_SEND, 1,
HDMI_NULL_SEND, 1);
+ REG_UPDATE(HDMI_VBI_PACKET_CONTROL, HDMI_ACP_SEND, 0);
+
/* following belongs to audio */
REG_UPDATE(HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1);
@@ -787,7 +770,6 @@ static void dce110_stream_encoder_update_hdmi_info_packets(
dce110_update_hdmi_info_packet(enc110, 3, &info_frame->hdrsmd);
}
-#if defined(CONFIG_DRM_AMD_DC_DCN)
if (enc110->se_mask->HDMI_DB_DISABLE) {
/* for bring up, disable dp double TODO */
if (REG(HDMI_DB_CONTROL))
@@ -799,7 +781,6 @@ static void dce110_stream_encoder_update_hdmi_info_packets(
dce110_update_hdmi_info_packet(enc110, 3, &info_frame->spd);
dce110_update_hdmi_info_packet(enc110, 4, &info_frame->hdrsmd);
}
-#endif
}
static void dce110_stream_encoder_stop_hdmi_info_packets(
@@ -825,7 +806,6 @@ static void dce110_stream_encoder_stop_hdmi_info_packets(
HDMI_GENERIC1_LINE, 0,
HDMI_GENERIC1_SEND, 0);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
/* stop generic packets 2 & 3 on HDMI */
if (REG(HDMI_GENERIC_PACKET_CONTROL2))
REG_SET_6(HDMI_GENERIC_PACKET_CONTROL2, 0,
@@ -844,7 +824,6 @@ static void dce110_stream_encoder_stop_hdmi_info_packets(
HDMI_GENERIC1_CONT, 0,
HDMI_GENERIC1_LINE, 0,
HDMI_GENERIC1_SEND, 0);
-#endif
}
static void dce110_stream_encoder_update_dp_info_packets(
@@ -1045,6 +1024,7 @@ static void dce110_reset_hdmi_stream_attribute(
struct stream_encoder *enc)
{
struct dce110_stream_encoder *enc110 = DCE110STRENC_FROM_STRENC(enc);
+
if (enc110->se_mask->HDMI_DATA_SCRAMBLE_EN)
REG_UPDATE_5(HDMI_CONTROL,
HDMI_PACKET_GEN_VERSION, 1,
@@ -1587,3 +1567,17 @@ void dce110_stream_encoder_construct(
enc110->se_shift = se_shift;
enc110->se_mask = se_mask;
}
+
+static const struct stream_encoder_funcs dce110_an_str_enc_funcs = {};
+
+void dce110_analog_stream_encoder_construct(
+ struct dce110_stream_encoder *enc110,
+ struct dc_context *ctx,
+ struct dc_bios *bp,
+ enum engine_id eng_id)
+{
+ enc110->base.funcs = &dce110_an_str_enc_funcs;
+ enc110->base.ctx = ctx;
+ enc110->base.id = eng_id;
+ enc110->base.bp = bp;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.h
index f9cdf2b5242c..068de1392121 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.h
@@ -115,7 +115,7 @@
#define SE_SF(reg_name, field_name, post_fix)\
.field_name = reg_name ## __ ## field_name ## post_fix
-#define SE_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(mask_sh)\
+#define SE_COMMON_MASK_SH_LIST_DCE_COMMON(mask_sh)\
SE_SF(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_INDEX, mask_sh),\
SE_SF(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC0_UPDATE, mask_sh),\
SE_SF(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC2_UPDATE, mask_sh),\
@@ -140,6 +140,7 @@
SE_SF(HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, mask_sh),\
SE_SF(HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, mask_sh),\
SE_SF(HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, mask_sh),\
+ SE_SF(HDMI_VBI_PACKET_CONTROL, HDMI_ACP_SEND, mask_sh),\
SE_SF(HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, mask_sh),\
SE_SF(AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, mask_sh),\
SE_SF(HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, mask_sh),\
@@ -202,10 +203,7 @@
SE_SF(AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, mask_sh),\
SE_SF(DIG_FE_CNTL, DIG_SOURCE_SELECT, mask_sh)
-#define SE_COMMON_MASK_SH_LIST_DCE_COMMON(mask_sh)\
- SE_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(mask_sh)
-
-#define SE_COMMON_MASK_SH_LIST_SOC_BASE(mask_sh)\
+#define SE_COMMON_MASK_SH_LIST_SOC(mask_sh)\
SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_INDEX, mask_sh),\
SE_SF(DIG0_AFMT_GENERIC_HDR, AFMT_GENERIC_HB0, mask_sh),\
SE_SF(DIG0_AFMT_GENERIC_HDR, AFMT_GENERIC_HB1, mask_sh),\
@@ -227,6 +225,7 @@
SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, mask_sh),\
SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, mask_sh),\
SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_ACP_SEND, mask_sh),\
SE_SF(DIG0_HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, mask_sh),\
SE_SF(DIG0_AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, mask_sh),\
SE_SF(DIG0_HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, mask_sh),\
@@ -288,9 +287,6 @@
SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_GATE_EN, mask_sh),\
SE_SF(DIG0_DIG_FE_CNTL, DIG_SOURCE_SELECT, mask_sh)
-#define SE_COMMON_MASK_SH_LIST_SOC(mask_sh)\
- SE_COMMON_MASK_SH_LIST_SOC_BASE(mask_sh)
-
#define SE_COMMON_MASK_SH_LIST_DCE80_100(mask_sh)\
SE_COMMON_MASK_SH_LIST_DCE_COMMON(mask_sh),\
SE_SF(TMDS_CNTL, TMDS_PIXEL_ENCODING, mask_sh),\
@@ -414,6 +410,7 @@ struct dce_stream_encoder_shift {
uint8_t HDMI_GC_SEND;
uint8_t HDMI_NULL_SEND;
uint8_t HDMI_DATA_SCRAMBLE_EN;
+ uint8_t HDMI_ACP_SEND;
uint8_t HDMI_AUDIO_INFO_SEND;
uint8_t AFMT_AUDIO_INFO_UPDATE;
uint8_t HDMI_AUDIO_INFO_LINE;
@@ -545,6 +542,7 @@ struct dce_stream_encoder_mask {
uint32_t HDMI_GC_SEND;
uint32_t HDMI_NULL_SEND;
uint32_t HDMI_DATA_SCRAMBLE_EN;
+ uint32_t HDMI_ACP_SEND;
uint32_t HDMI_AUDIO_INFO_SEND;
uint32_t AFMT_AUDIO_INFO_UPDATE;
uint32_t HDMI_AUDIO_INFO_LINE;
@@ -710,6 +708,11 @@ void dce110_stream_encoder_construct(
const struct dce_stream_encoder_shift *se_shift,
const struct dce_stream_encoder_mask *se_mask);
+void dce110_analog_stream_encoder_construct(
+ struct dce110_stream_encoder *enc110,
+ struct dc_context *ctx,
+ struct dc_bios *bp,
+ enum engine_id eng_id);
void dce110_se_audio_mute_control(
struct stream_encoder *enc, bool mute);
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c
index d9fd4ec60588..1ab5ae9b5ea5 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c
@@ -154,10 +154,13 @@ static bool dce60_setup_scaling_configuration(
REG_SET(SCL_BYPASS_CONTROL, 0, SCL_BYPASS_MODE, 0);
if (data->taps.h_taps + data->taps.v_taps <= 2) {
- /* Set bypass */
-
- /* DCE6 has no SCL_MODE register, skip scale mode programming */
+ /* Disable scaler functionality */
+ REG_WRITE(SCL_SCALER_ENABLE, 0);
+ /* Clear registers that can cause glitches even when the scaler is off */
+ REG_WRITE(SCL_TAP_CONTROL, 0);
+ REG_WRITE(SCL_AUTOMATIC_MODE_CONTROL, 0);
+ REG_WRITE(SCL_F_SHARP_CONTROL, 0);
return false;
}
@@ -165,7 +168,7 @@ static bool dce60_setup_scaling_configuration(
SCL_H_NUM_OF_TAPS, data->taps.h_taps - 1,
SCL_V_NUM_OF_TAPS, data->taps.v_taps - 1);
- /* DCE6 has no SCL_MODE register, skip scale mode programming */
+ REG_WRITE(SCL_SCALER_ENABLE, 1);
/* DCE6 has no SCL_BOUNDARY_MODE bit, skip replace out of bound pixels */
@@ -502,6 +505,8 @@ static void dce60_transform_set_scaler(
REG_SET(DC_LB_MEM_SIZE, 0,
DC_LB_MEM_SIZE, xfm_dce->lb_memory_size);
+ REG_WRITE(SCL_UPDATE, 0x00010000);
+
/* Clear SCL_F_SHARP_CONTROL value to 0 */
REG_WRITE(SCL_F_SHARP_CONTROL, 0);
@@ -527,8 +532,7 @@ static void dce60_transform_set_scaler(
if (coeffs_v != xfm_dce->filter_v || coeffs_h != xfm_dce->filter_h) {
/* 4. Program vertical filters */
if (xfm_dce->filter_v == NULL)
- REG_SET(SCL_VERT_FILTER_CONTROL, 0,
- SCL_V_2TAP_HARDCODE_COEF_EN, 0);
+ REG_WRITE(SCL_VERT_FILTER_CONTROL, 0);
program_multi_taps_filter(
xfm_dce,
data->taps.v_taps,
@@ -542,8 +546,7 @@ static void dce60_transform_set_scaler(
/* 5. Program horizontal filters */
if (xfm_dce->filter_h == NULL)
- REG_SET(SCL_HORZ_FILTER_CONTROL, 0,
- SCL_H_2TAP_HARDCODE_COEF_EN, 0);
+ REG_WRITE(SCL_HORZ_FILTER_CONTROL, 0);
program_multi_taps_filter(
xfm_dce,
data->taps.h_taps,
@@ -566,6 +569,8 @@ static void dce60_transform_set_scaler(
/* DCE6 has no SCL_COEF_UPDATE_COMPLETE bit to flip to new coefficient memory */
/* DCE6 DATA_FORMAT register does not support ALPHA_EN */
+
+ REG_WRITE(SCL_UPDATE, 0);
}
#endif
@@ -1009,7 +1014,7 @@ static void dce_transform_set_pixel_storage_depth(
color_depth = COLOR_DEPTH_101010;
pixel_depth = 0;
expan_mode = 1;
- BREAK_TO_DEBUGGER();
+ DC_LOG_DC("The pixel depth %d is not valid, set COLOR_DEPTH_101010 instead.", depth);
break;
}
@@ -1023,8 +1028,7 @@ static void dce_transform_set_pixel_storage_depth(
if (!(xfm_dce->lb_pixel_depth_supported & depth)) {
/*we should use unsupported capabilities
* unless it is required by w/a*/
- DC_LOG_WARNING("%s: Capability not supported",
- __func__);
+ DC_LOG_DC("%s: Capability not supported", __func__);
}
}
@@ -1409,7 +1413,7 @@ void dce110_opp_set_csc_default(
static void program_pwl(struct dce_transform *xfm_dce,
const struct pwl_params *params)
{
- int retval;
+ uint32_t retval;
uint8_t max_tries = 10;
uint8_t counter = 0;
uint32_t i = 0;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.h b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.h
index cbce194ec7b8..eb716e8337e2 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.h
@@ -155,6 +155,9 @@
SRI(SCL_COEF_RAM_TAP_DATA, SCL, id), \
SRI(VIEWPORT_START, SCL, id), \
SRI(VIEWPORT_SIZE, SCL, id), \
+ SRI(SCL_SCALER_ENABLE, SCL, id), \
+ SRI(SCL_HORZ_FILTER_INIT_RGB_LUMA, SCL, id), \
+ SRI(SCL_HORZ_FILTER_INIT_CHROMA, SCL, id), \
SRI(SCL_HORZ_FILTER_SCALE_RATIO, SCL, id), \
SRI(SCL_VERT_FILTER_SCALE_RATIO, SCL, id), \
SRI(SCL_VERT_FILTER_INIT, SCL, id), \
@@ -590,6 +593,7 @@ struct dce_transform_registers {
uint32_t SCL_VERT_FILTER_SCALE_RATIO;
uint32_t SCL_HORZ_FILTER_INIT;
#if defined(CONFIG_DRM_AMD_DC_SI)
+ uint32_t SCL_SCALER_ENABLE;
uint32_t SCL_HORZ_FILTER_INIT_RGB_LUMA;
uint32_t SCL_HORZ_FILTER_INIT_CHROMA;
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
index fb0dec4ed3a6..3b9011ef9b68 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
@@ -24,212 +24,177 @@
*/
#include "dmub_abm.h"
-#include "dce_abm.h"
+#include "dmub_abm_lcd.h"
#include "dc.h"
+#include "core_types.h"
+#include "dmub_cmd.h"
#include "dc_dmub_srv.h"
#include "dmub/dmub_srv.h"
-#include "core_types.h"
-#include "dm_services.h"
-#include "reg_helper.h"
-#include "fixed31_32.h"
-
-#include "atom.h"
#define TO_DMUB_ABM(abm)\
container_of(abm, struct dce_abm, base)
-#define REG(reg) \
- (dce_abm->regs->reg)
+#define ABM_FEATURE_NO_SUPPORT 0
+#define ABM_LCD_SUPPORT 1
-#undef FN
-#define FN(reg_name, field_name) \
- dce_abm->abm_shift->field_name, dce_abm->abm_mask->field_name
+static unsigned int abm_feature_support(struct abm *abm, unsigned int panel_inst)
+{
+ struct dc_context *dc = abm->ctx;
+ struct dc_link *edp_links[MAX_NUM_EDP];
+ int i;
+ int edp_num;
+ unsigned int ret = ABM_FEATURE_NO_SUPPORT;
-#define CTX \
- dce_abm->base.ctx
+ dc_get_edp_links(dc->dc, edp_links, &edp_num);
-#define DISABLE_ABM_IMMEDIATELY 255
+ for (i = 0; i < edp_num; i++) {
+ if (panel_inst == i)
+ break;
+ }
+ if (i < edp_num) {
+ ret = ABM_LCD_SUPPORT;
+ }
+ return ret;
+}
-static void dmub_abm_enable_fractional_pwm(struct dc_context *dc)
+static void dmub_abm_init_ex(struct abm *abm, uint32_t backlight, uint32_t user_level)
{
- union dmub_rb_cmd cmd;
- uint32_t fractional_pwm = (dc->dc->config.disable_fractional_pwm == false) ? 1 : 0;
- uint32_t edp_id_count = dc->dc_edp_id_count;
- int i;
- uint8_t panel_mask = 0;
-
- for (i = 0; i < edp_id_count; i++)
- panel_mask |= 0x01 << i;
-
- memset(&cmd, 0, sizeof(cmd));
- cmd.abm_set_pwm_frac.header.type = DMUB_CMD__ABM;
- cmd.abm_set_pwm_frac.header.sub_type = DMUB_CMD__ABM_SET_PWM_FRAC;
- cmd.abm_set_pwm_frac.abm_set_pwm_frac_data.fractional_pwm = fractional_pwm;
- cmd.abm_set_pwm_frac.abm_set_pwm_frac_data.version = DMUB_CMD_ABM_CONTROL_VERSION_1;
- cmd.abm_set_pwm_frac.abm_set_pwm_frac_data.panel_mask = panel_mask;
- cmd.abm_set_pwm_frac.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pwm_frac_data);
-
- dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd);
- dc_dmub_srv_cmd_execute(dc->dmub_srv);
- dc_dmub_srv_wait_idle(dc->dmub_srv);
+ dmub_abm_init(abm, backlight, user_level);
}
-static void dmub_abm_init(struct abm *abm, uint32_t backlight)
+static unsigned int dmub_abm_get_current_backlight_ex(struct abm *abm)
{
- struct dce_abm *dce_abm = TO_DMUB_ABM(abm);
-
- REG_WRITE(DC_ABM1_HG_SAMPLE_RATE, 0x3);
- REG_WRITE(DC_ABM1_HG_SAMPLE_RATE, 0x1);
- REG_WRITE(DC_ABM1_LS_SAMPLE_RATE, 0x3);
- REG_WRITE(DC_ABM1_LS_SAMPLE_RATE, 0x1);
- REG_WRITE(BL1_PWM_BL_UPDATE_SAMPLE_RATE, 0x1);
+ dc_allow_idle_optimizations(abm->ctx->dc, false);
- REG_SET_3(DC_ABM1_HG_MISC_CTRL, 0,
- ABM1_HG_NUM_OF_BINS_SEL, 0,
- ABM1_HG_VMAX_SEL, 1,
- ABM1_HG_BIN_BITWIDTH_SIZE_SEL, 0);
+ return dmub_abm_get_current_backlight(abm);
+}
- REG_SET_3(DC_ABM1_IPCSC_COEFF_SEL, 0,
- ABM1_IPCSC_COEFF_SEL_R, 2,
- ABM1_IPCSC_COEFF_SEL_G, 4,
- ABM1_IPCSC_COEFF_SEL_B, 2);
+static unsigned int dmub_abm_get_target_backlight_ex(struct abm *abm)
+{
+ dc_allow_idle_optimizations(abm->ctx->dc, false);
- REG_UPDATE(BL1_PWM_CURRENT_ABM_LEVEL,
- BL1_PWM_CURRENT_ABM_LEVEL, backlight);
+ return dmub_abm_get_target_backlight(abm);
+}
- REG_UPDATE(BL1_PWM_TARGET_ABM_LEVEL,
- BL1_PWM_TARGET_ABM_LEVEL, backlight);
+static bool dmub_abm_set_level_ex(struct abm *abm, uint32_t level)
+{
+ bool ret = false;
+ unsigned int feature_support, i;
+ uint8_t panel_mask0 = 0;
- REG_UPDATE(BL1_PWM_USER_LEVEL,
- BL1_PWM_USER_LEVEL, backlight);
+ for (i = 0; i < MAX_NUM_EDP; i++) {
+ feature_support = abm_feature_support(abm, i);
- REG_UPDATE_2(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES,
- ABM1_LS_MIN_PIXEL_VALUE_THRES, 0,
- ABM1_LS_MAX_PIXEL_VALUE_THRES, 1000);
+ if (feature_support == ABM_LCD_SUPPORT)
+ panel_mask0 |= (0x01 << i);
+ }
- REG_SET_3(DC_ABM1_HGLS_REG_READ_PROGRESS, 0,
- ABM1_HG_REG_READ_MISSED_FRAME_CLEAR, 1,
- ABM1_LS_REG_READ_MISSED_FRAME_CLEAR, 1,
- ABM1_BL_REG_READ_MISSED_FRAME_CLEAR, 1);
+ if (panel_mask0)
+ ret = dmub_abm_set_level(abm, level, panel_mask0);
- dmub_abm_enable_fractional_pwm(abm->ctx);
+ return ret;
}
-static unsigned int dmub_abm_get_current_backlight(struct abm *abm)
+static bool dmub_abm_init_config_ex(struct abm *abm,
+ const char *src,
+ unsigned int bytes,
+ unsigned int inst)
{
- struct dce_abm *dce_abm = TO_DMUB_ABM(abm);
- unsigned int backlight = REG_READ(BL1_PWM_CURRENT_ABM_LEVEL);
+ unsigned int feature_support;
- /* return backlight in hardware format which is unsigned 17 bits, with
- * 1 bit integer and 16 bit fractional
- */
- return backlight;
-}
+ feature_support = abm_feature_support(abm, inst);
-static unsigned int dmub_abm_get_target_backlight(struct abm *abm)
-{
- struct dce_abm *dce_abm = TO_DMUB_ABM(abm);
- unsigned int backlight = REG_READ(BL1_PWM_TARGET_ABM_LEVEL);
+ if (feature_support == ABM_LCD_SUPPORT)
+ dmub_abm_init_config(abm, src, bytes, inst);
- /* return backlight in hardware format which is unsigned 17 bits, with
- * 1 bit integer and 16 bit fractional
- */
- return backlight;
+ return true;
}
-static bool dmub_abm_set_level(struct abm *abm, uint32_t level)
+static bool dmub_abm_set_pause_ex(struct abm *abm, bool pause, unsigned int panel_inst, unsigned int stream_inst)
{
- union dmub_rb_cmd cmd;
- struct dc_context *dc = abm->ctx;
- struct dc_link *edp_links[MAX_NUM_EDP];
- int i;
- int edp_num;
- uint8_t panel_mask = 0;
-
- get_edp_links(dc->dc, edp_links, &edp_num);
+ bool ret = false;
+ unsigned int feature_support;
- for (i = 0; i < edp_num; i++) {
- if (edp_links[i]->link_status.link_active)
- panel_mask |= (0x01 << i);
- }
-
- memset(&cmd, 0, sizeof(cmd));
- cmd.abm_set_level.header.type = DMUB_CMD__ABM;
- cmd.abm_set_level.header.sub_type = DMUB_CMD__ABM_SET_LEVEL;
- cmd.abm_set_level.abm_set_level_data.level = level;
- cmd.abm_set_level.abm_set_level_data.version = DMUB_CMD_ABM_CONTROL_VERSION_1;
- cmd.abm_set_level.abm_set_level_data.panel_mask = panel_mask;
- cmd.abm_set_level.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_level_data);
+ feature_support = abm_feature_support(abm, panel_inst);
- dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd);
- dc_dmub_srv_cmd_execute(dc->dmub_srv);
- dc_dmub_srv_wait_idle(dc->dmub_srv);
+ if (feature_support == ABM_LCD_SUPPORT)
+ ret = dmub_abm_set_pause(abm, pause, panel_inst, stream_inst);
- return true;
+ return ret;
}
-static bool dmub_abm_init_config(struct abm *abm,
- const char *src,
- unsigned int bytes,
- unsigned int inst)
+/*****************************************************************************
+ * dmub_abm_save_restore_ex() - calls dmub_abm_save_restore for preserving DMUB's
+ * Varibright states for LCD only. OLED is TBD
+ * @abm: used to check get dc context
+ * @panel_inst: panel instance index
+ * @pData: contains command to pause/un-pause abm and abm parameters
+ *
+ *
+ ***************************************************************************/
+static bool dmub_abm_save_restore_ex(
+ struct abm *abm,
+ unsigned int panel_inst,
+ struct abm_save_restore *pData)
{
- union dmub_rb_cmd cmd;
+ bool ret = false;
+ unsigned int feature_support;
struct dc_context *dc = abm->ctx;
- uint8_t panel_mask = 0x01 << inst;
- // TODO: Optimize by only reading back final 4 bytes
- dmub_flush_buffer_mem(&dc->dmub_srv->dmub->scratch_mem_fb);
+ feature_support = abm_feature_support(abm, panel_inst);
- // Copy iramtable into cw7
- memcpy(dc->dmub_srv->dmub->scratch_mem_fb.cpu_addr, (void *)src, bytes);
+ if (feature_support == ABM_LCD_SUPPORT)
+ ret = dmub_abm_save_restore(dc, panel_inst, pData);
- memset(&cmd, 0, sizeof(cmd));
- // Fw will copy from cw7 to fw_state
- cmd.abm_init_config.header.type = DMUB_CMD__ABM;
- cmd.abm_init_config.header.sub_type = DMUB_CMD__ABM_INIT_CONFIG;
- cmd.abm_init_config.abm_init_config_data.src.quad_part = dc->dmub_srv->dmub->scratch_mem_fb.gpu_addr;
- cmd.abm_init_config.abm_init_config_data.bytes = bytes;
- cmd.abm_init_config.abm_init_config_data.version = DMUB_CMD_ABM_CONTROL_VERSION_1;
- cmd.abm_init_config.abm_init_config_data.panel_mask = panel_mask;
+ return ret;
+}
- cmd.abm_init_config.header.payload_bytes = sizeof(struct dmub_cmd_abm_init_config_data);
+static bool dmub_abm_set_pipe_ex(struct abm *abm,
+ uint32_t otg_inst,
+ uint32_t option,
+ uint32_t panel_inst,
+ uint32_t pwrseq_inst)
+{
+ bool ret = false;
+ unsigned int feature_support;
- dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd);
- dc_dmub_srv_cmd_execute(dc->dmub_srv);
- dc_dmub_srv_wait_idle(dc->dmub_srv);
+ feature_support = abm_feature_support(abm, panel_inst);
- return true;
+ if (feature_support == ABM_LCD_SUPPORT)
+ ret = dmub_abm_set_pipe(abm, otg_inst, option, panel_inst, pwrseq_inst);
+
+ return ret;
}
-static bool dmub_abm_set_pause(struct abm *abm, bool pause, unsigned int panel_inst, unsigned int stream_inst)
+static bool dmub_abm_set_backlight_level_pwm_ex(struct abm *abm,
+ unsigned int backlight_pwm_u16_16,
+ unsigned int frame_ramp,
+ unsigned int controller_id,
+ unsigned int panel_inst)
{
- union dmub_rb_cmd cmd;
- struct dc_context *dc = abm->ctx;
- uint8_t panel_mask = 0x01 << panel_inst;
+ bool ret = false;
+ unsigned int feature_support;
- memset(&cmd, 0, sizeof(cmd));
- cmd.abm_pause.header.type = DMUB_CMD__ABM;
- cmd.abm_pause.header.sub_type = DMUB_CMD__ABM_PAUSE;
- cmd.abm_pause.abm_pause_data.enable = pause;
- cmd.abm_pause.abm_pause_data.panel_mask = panel_mask;
- cmd.abm_set_level.header.payload_bytes = sizeof(struct dmub_cmd_abm_pause_data);
+ feature_support = abm_feature_support(abm, panel_inst);
- dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd);
- dc_dmub_srv_cmd_execute(dc->dmub_srv);
- dc_dmub_srv_wait_idle(dc->dmub_srv);
+ if (feature_support == ABM_LCD_SUPPORT)
+ ret = dmub_abm_set_backlight_level(abm, backlight_pwm_u16_16, frame_ramp, panel_inst);
- return true;
+ return ret;
}
static const struct abm_funcs abm_funcs = {
- .abm_init = dmub_abm_init,
- .set_abm_level = dmub_abm_set_level,
- .get_current_backlight = dmub_abm_get_current_backlight,
- .get_target_backlight = dmub_abm_get_target_backlight,
- .init_abm_config = dmub_abm_init_config,
- .set_abm_pause = dmub_abm_set_pause,
+ .abm_init = dmub_abm_init_ex,
+ .set_abm_level = dmub_abm_set_level_ex,
+ .get_current_backlight = dmub_abm_get_current_backlight_ex,
+ .get_target_backlight = dmub_abm_get_target_backlight_ex,
+ .init_abm_config = dmub_abm_init_config_ex,
+ .set_abm_pause = dmub_abm_set_pause_ex,
+ .save_restore = dmub_abm_save_restore_ex,
+ .set_pipe_ex = dmub_abm_set_pipe_ex,
+ .set_backlight_level_pwm = dmub_abm_set_backlight_level_pwm_ex,
};
static void dmub_abm_construct(
@@ -256,16 +221,19 @@ struct abm *dmub_abm_create(
const struct dce_abm_shift *abm_shift,
const struct dce_abm_mask *abm_mask)
{
- struct dce_abm *abm_dce = kzalloc(sizeof(*abm_dce), GFP_KERNEL);
+ if (ctx->dc->caps.dmcub_support) {
+ struct dce_abm *abm_dce = kzalloc(sizeof(*abm_dce), GFP_KERNEL);
- if (abm_dce == NULL) {
- BREAK_TO_DEBUGGER();
- return NULL;
- }
+ if (abm_dce == NULL) {
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
- dmub_abm_construct(abm_dce, ctx, regs, abm_shift, abm_mask);
+ dmub_abm_construct(abm_dce, ctx, regs, abm_shift, abm_mask);
- return &abm_dce->base;
+ return &abm_dce->base;
+ }
+ return NULL;
}
void dmub_abm_destroy(struct abm **abm)
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c
new file mode 100644
index 000000000000..a641ae04450c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c
@@ -0,0 +1,318 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dmub_abm.h"
+#include "dmub_abm_lcd.h"
+#include "dce_abm.h"
+#include "dc.h"
+#include "dc_dmub_srv.h"
+#include "dmub/dmub_srv.h"
+#include "core_types.h"
+#include "dm_services.h"
+#include "reg_helper.h"
+#include "fixed31_32.h"
+
+#include "atom.h"
+
+#define TO_DMUB_ABM(abm)\
+ container_of(abm, struct dce_abm, base)
+
+#define REG(reg) \
+ (dce_abm->regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+ dce_abm->abm_shift->field_name, dce_abm->abm_mask->field_name
+
+#define CTX \
+ dce_abm->base.ctx
+
+#define DISABLE_ABM_IMMEDIATELY 255
+
+
+
+static void dmub_abm_enable_fractional_pwm(struct dc_context *dc)
+{
+ union dmub_rb_cmd cmd;
+ uint32_t fractional_pwm = (dc->dc->config.disable_fractional_pwm == false) ? 1 : 0;
+ uint32_t edp_id_count = dc->dc_edp_id_count;
+ int i;
+ uint8_t panel_mask = 0;
+
+ for (i = 0; i < edp_id_count; i++)
+ panel_mask |= 0x01 << i;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.abm_set_pwm_frac.header.type = DMUB_CMD__ABM;
+ cmd.abm_set_pwm_frac.header.sub_type = DMUB_CMD__ABM_SET_PWM_FRAC;
+ cmd.abm_set_pwm_frac.abm_set_pwm_frac_data.fractional_pwm = fractional_pwm;
+ cmd.abm_set_pwm_frac.abm_set_pwm_frac_data.version = DMUB_CMD_ABM_CONTROL_VERSION_1;
+ cmd.abm_set_pwm_frac.abm_set_pwm_frac_data.panel_mask = panel_mask;
+ cmd.abm_set_pwm_frac.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pwm_frac_data);
+
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+void dmub_abm_init(struct abm *abm, uint32_t backlight, uint32_t user_level)
+{
+ struct dce_abm *dce_abm = TO_DMUB_ABM(abm);
+
+ REG_WRITE(DC_ABM1_HG_SAMPLE_RATE, 0x3);
+ REG_WRITE(DC_ABM1_HG_SAMPLE_RATE, 0x1);
+ REG_WRITE(DC_ABM1_LS_SAMPLE_RATE, 0x3);
+ REG_WRITE(DC_ABM1_LS_SAMPLE_RATE, 0x1);
+ REG_WRITE(BL1_PWM_BL_UPDATE_SAMPLE_RATE, 0x1);
+
+ REG_SET_3(DC_ABM1_HG_MISC_CTRL, 0,
+ ABM1_HG_NUM_OF_BINS_SEL, 0,
+ ABM1_HG_VMAX_SEL, 1,
+ ABM1_HG_BIN_BITWIDTH_SIZE_SEL, 0);
+
+ REG_SET_3(DC_ABM1_IPCSC_COEFF_SEL, 0,
+ ABM1_IPCSC_COEFF_SEL_R, 2,
+ ABM1_IPCSC_COEFF_SEL_G, 4,
+ ABM1_IPCSC_COEFF_SEL_B, 2);
+
+ REG_UPDATE(BL1_PWM_CURRENT_ABM_LEVEL,
+ BL1_PWM_CURRENT_ABM_LEVEL, backlight);
+
+ REG_UPDATE(BL1_PWM_TARGET_ABM_LEVEL,
+ BL1_PWM_TARGET_ABM_LEVEL, backlight);
+
+ REG_UPDATE(BL1_PWM_USER_LEVEL,
+ BL1_PWM_USER_LEVEL, user_level);
+
+ REG_UPDATE_2(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES,
+ ABM1_LS_MIN_PIXEL_VALUE_THRES, 0,
+ ABM1_LS_MAX_PIXEL_VALUE_THRES, 1000);
+
+ REG_SET_3(DC_ABM1_HGLS_REG_READ_PROGRESS, 0,
+ ABM1_HG_REG_READ_MISSED_FRAME_CLEAR, 1,
+ ABM1_LS_REG_READ_MISSED_FRAME_CLEAR, 1,
+ ABM1_BL_REG_READ_MISSED_FRAME_CLEAR, 1);
+
+ dmub_abm_enable_fractional_pwm(abm->ctx);
+}
+
+unsigned int dmub_abm_get_current_backlight(struct abm *abm)
+{
+ struct dce_abm *dce_abm = TO_DMUB_ABM(abm);
+ unsigned int backlight = REG_READ(BL1_PWM_CURRENT_ABM_LEVEL);
+
+ /* return backlight in hardware format which is unsigned 17 bits, with
+ * 1 bit integer and 16 bit fractional
+ */
+ return backlight;
+}
+
+unsigned int dmub_abm_get_target_backlight(struct abm *abm)
+{
+ struct dce_abm *dce_abm = TO_DMUB_ABM(abm);
+ unsigned int backlight = REG_READ(BL1_PWM_TARGET_ABM_LEVEL);
+
+ /* return backlight in hardware format which is unsigned 17 bits, with
+ * 1 bit integer and 16 bit fractional
+ */
+ return backlight;
+}
+
+bool dmub_abm_set_level(struct abm *abm, uint32_t level, uint8_t panel_mask)
+{
+ union dmub_rb_cmd cmd;
+ struct dc_context *dc = abm->ctx;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.abm_set_level.header.type = DMUB_CMD__ABM;
+ cmd.abm_set_level.header.sub_type = DMUB_CMD__ABM_SET_LEVEL;
+ cmd.abm_set_level.abm_set_level_data.level = level;
+ cmd.abm_set_level.abm_set_level_data.version = DMUB_CMD_ABM_CONTROL_VERSION_1;
+ cmd.abm_set_level.abm_set_level_data.panel_mask = panel_mask;
+ cmd.abm_set_level.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_level_data);
+
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+
+ return true;
+}
+
+void dmub_abm_init_config(struct abm *abm,
+ const char *src,
+ unsigned int bytes,
+ unsigned int inst)
+{
+ union dmub_rb_cmd cmd;
+ struct dc_context *dc = abm->ctx;
+ uint8_t panel_mask = 0x01 << inst;
+
+ // TODO: Optimize by only reading back final 4 bytes
+ dmub_flush_buffer_mem(&dc->dmub_srv->dmub->scratch_mem_fb);
+
+ // Copy iramtable into cw7
+ memcpy(dc->dmub_srv->dmub->scratch_mem_fb.cpu_addr, (void *)src, bytes);
+
+ memset(&cmd, 0, sizeof(cmd));
+ // Fw will copy from cw7 to fw_state
+ cmd.abm_init_config.header.type = DMUB_CMD__ABM;
+ cmd.abm_init_config.header.sub_type = DMUB_CMD__ABM_INIT_CONFIG;
+ cmd.abm_init_config.abm_init_config_data.src.quad_part = dc->dmub_srv->dmub->scratch_mem_fb.gpu_addr;
+ cmd.abm_init_config.abm_init_config_data.bytes = bytes;
+ cmd.abm_init_config.abm_init_config_data.version = DMUB_CMD_ABM_CONTROL_VERSION_1;
+ cmd.abm_init_config.abm_init_config_data.panel_mask = panel_mask;
+
+ cmd.abm_init_config.header.payload_bytes = sizeof(struct dmub_cmd_abm_init_config_data);
+
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+
+}
+
+bool dmub_abm_set_pause(struct abm *abm, bool pause, unsigned int panel_inst, unsigned int stream_inst)
+{
+ union dmub_rb_cmd cmd;
+ struct dc_context *dc = abm->ctx;
+ uint8_t panel_mask = 0x01 << panel_inst;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.abm_pause.header.type = DMUB_CMD__ABM;
+ cmd.abm_pause.header.sub_type = DMUB_CMD__ABM_PAUSE;
+ cmd.abm_pause.abm_pause_data.enable = pause;
+ cmd.abm_pause.abm_pause_data.panel_mask = panel_mask;
+ cmd.abm_set_level.header.payload_bytes = sizeof(struct dmub_cmd_abm_pause_data);
+
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+
+ return true;
+}
+
+
+/*****************************************************************************
+ * dmub_abm_save_restore() - dmub interface for abm save+pause and restore+
+ * un-pause
+ * @dc: dc context
+ * @panel_inst: panel instance index
+ * @pData: contains command to pause/un-pause abm and exchange abm parameters
+ *
+ * When called Pause will get abm data and store in pData, and un-pause will
+ * set/apply abm data stored in pData.
+ *
+ *****************************************************************************/
+bool dmub_abm_save_restore(
+ struct dc_context *dc,
+ unsigned int panel_inst,
+ struct abm_save_restore *pData)
+{
+ union dmub_rb_cmd cmd;
+ uint8_t panel_mask = 0x01 << panel_inst;
+ unsigned int bytes = sizeof(struct abm_save_restore);
+
+ // TODO: Optimize by only reading back final 4 bytes
+ dmub_flush_buffer_mem(&dc->dmub_srv->dmub->scratch_mem_fb);
+
+ // Copy iramtable into cw7
+ memcpy(dc->dmub_srv->dmub->scratch_mem_fb.cpu_addr, (void *)pData, bytes);
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.abm_save_restore.header.type = DMUB_CMD__ABM;
+ cmd.abm_save_restore.header.sub_type = DMUB_CMD__ABM_SAVE_RESTORE;
+
+ cmd.abm_save_restore.abm_init_config_data.src.quad_part = dc->dmub_srv->dmub->scratch_mem_fb.gpu_addr;
+ cmd.abm_save_restore.abm_init_config_data.bytes = bytes;
+ cmd.abm_save_restore.abm_init_config_data.version = DMUB_CMD_ABM_CONTROL_VERSION_1;
+ cmd.abm_save_restore.abm_init_config_data.panel_mask = panel_mask;
+
+ cmd.abm_save_restore.header.payload_bytes =
+ sizeof(struct dmub_rb_cmd_abm_save_restore) - sizeof(struct dmub_cmd_header);
+
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+
+ // Copy iramtable data into local structure
+ memcpy((void *)pData, dc->dmub_srv->dmub->scratch_mem_fb.cpu_addr, bytes);
+
+ return true;
+}
+
+bool dmub_abm_set_pipe(struct abm *abm,
+ uint32_t otg_inst,
+ uint32_t option,
+ uint32_t panel_inst,
+ uint32_t pwrseq_inst)
+{
+ union dmub_rb_cmd cmd;
+ struct dc_context *dc = abm->ctx;
+ uint8_t ramping_boundary = 0xFF;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.abm_set_pipe.header.type = DMUB_CMD__ABM;
+ cmd.abm_set_pipe.header.sub_type = DMUB_CMD__ABM_SET_PIPE;
+ cmd.abm_set_pipe.abm_set_pipe_data.otg_inst = otg_inst;
+ cmd.abm_set_pipe.abm_set_pipe_data.pwrseq_inst = pwrseq_inst;
+ cmd.abm_set_pipe.abm_set_pipe_data.set_pipe_option = option;
+ cmd.abm_set_pipe.abm_set_pipe_data.panel_inst = panel_inst;
+ cmd.abm_set_pipe.abm_set_pipe_data.ramping_boundary = ramping_boundary;
+ cmd.abm_set_pipe.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pipe_data);
+
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+
+ return true;
+}
+
+bool dmub_abm_set_backlight_level(struct abm *abm,
+ unsigned int backlight_pwm_u16_16,
+ unsigned int frame_ramp,
+ unsigned int panel_inst)
+{
+ union dmub_rb_cmd cmd;
+ struct dc_context *dc = abm->ctx;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.abm_set_backlight.header.type = DMUB_CMD__ABM;
+ cmd.abm_set_backlight.header.sub_type = DMUB_CMD__ABM_SET_BACKLIGHT;
+ cmd.abm_set_backlight.abm_set_backlight_data.frame_ramp = frame_ramp;
+ cmd.abm_set_backlight.abm_set_backlight_data.backlight_user_level = backlight_pwm_u16_16;
+ cmd.abm_set_backlight.abm_set_backlight_data.version = DMUB_CMD_ABM_CONTROL_VERSION_1;
+ cmd.abm_set_backlight.abm_set_backlight_data.panel_mask = (0x01 << panel_inst);
+ cmd.abm_set_backlight.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_backlight_data);
+
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+
+ return true;
+}
+
+bool dmub_abm_set_event(struct abm *abm, unsigned int scaling_enable, unsigned int scaling_strength_map,
+ unsigned int panel_inst)
+{
+ union dmub_rb_cmd cmd;
+ struct dc_context *dc = abm->ctx;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.abm_set_event.header.type = DMUB_CMD__ABM;
+ cmd.abm_set_event.header.sub_type = DMUB_CMD__ABM_SET_EVENT;
+ cmd.abm_set_event.abm_set_event_data.vb_scaling_enable = scaling_enable;
+ cmd.abm_set_event.abm_set_event_data.vb_scaling_strength_mapping = scaling_strength_map;
+ cmd.abm_set_event.abm_set_event_data.panel_mask = (1<<panel_inst);
+ cmd.abm_set_event.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_event_data);
+
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+
+ return true;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h
new file mode 100644
index 000000000000..13f54f1df780
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DMUB_ABM_LCD_H__
+#define __DMUB_ABM_LCD_H__
+
+#include "abm.h"
+
+struct abm_save_restore;
+
+void dmub_abm_init(struct abm *abm, uint32_t backlight, uint32_t user_level);
+bool dmub_abm_set_level(struct abm *abm, uint32_t level, uint8_t panel_mask);
+unsigned int dmub_abm_get_current_backlight(struct abm *abm);
+unsigned int dmub_abm_get_target_backlight(struct abm *abm);
+void dmub_abm_init_config(struct abm *abm,
+ const char *src,
+ unsigned int bytes,
+ unsigned int inst);
+
+bool dmub_abm_set_pause(struct abm *abm, bool pause, unsigned int panel_inst, unsigned int stream_inst);
+bool dmub_abm_save_restore(
+ struct dc_context *dc,
+ unsigned int panel_inst,
+ struct abm_save_restore *pData);
+bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst, uint32_t pwrseq_inst);
+bool dmub_abm_set_backlight_level(struct abm *abm,
+ unsigned int backlight_pwm_u16_16,
+ unsigned int frame_ramp,
+ unsigned int panel_inst);
+bool dmub_abm_set_event(struct abm *abm, unsigned int scaling_enable, unsigned int scaling_strength_map,
+ unsigned int panel_inst);
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c
index 9baf8ca0a920..5bfa2b0d2afd 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c
@@ -47,20 +47,63 @@ void dmub_hw_lock_mgr_cmd(struct dc_dmub_srv *dmub_srv,
if (!lock)
cmd.lock_hw.lock_hw_data.should_release = 1;
- dc_dmub_srv_cmd_queue(dmub_srv, &cmd);
- dc_dmub_srv_cmd_execute(dmub_srv);
- dc_dmub_srv_wait_idle(dmub_srv);
+ dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
void dmub_hw_lock_mgr_inbox0_cmd(struct dc_dmub_srv *dmub_srv,
union dmub_inbox0_cmd_lock_hw hw_lock_cmd)
{
union dmub_inbox0_data_register data = { 0 };
+
data.inbox0_cmd_lock_hw = hw_lock_cmd;
+ dc_dmub_srv_clear_inbox0_ack(dmub_srv);
dc_dmub_srv_send_inbox0_cmd(dmub_srv, data);
+ dc_dmub_srv_wait_for_inbox0_ack(dmub_srv);
}
-bool should_use_dmub_lock(struct dc_link *link)
+bool dmub_hw_lock_mgr_does_link_require_lock(const struct dc *dc, const struct dc_link *link)
{
+ if (!link)
+ return false;
+
+ if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1)
+ return true;
+
+ if (link->replay_settings.replay_feature_enabled)
+ return true;
+
+ if (link->psr_settings.psr_version == DC_PSR_VERSION_1) {
+ struct dc_link *edp_links[MAX_NUM_EDP];
+ int edp_num;
+
+ dc_get_edp_links(dc, edp_links, &edp_num);
+ if (edp_num == 1)
+ return true;
+ }
return false;
}
+
+bool dmub_hw_lock_mgr_does_context_require_lock(const struct dc *dc, const struct dc_state *context)
+{
+ if (!context)
+ return false;
+ for (int i = 0; i < context->stream_count; i++) {
+ const struct dc_link *link = context->streams[i]->link;
+
+ if (dmub_hw_lock_mgr_does_link_require_lock(dc, link))
+ return true;
+ }
+ return false;
+}
+
+bool should_use_dmub_inbox1_lock(const struct dc *dc, const struct dc_link *link)
+{
+ /* ASIC doesn't support DMUB */
+ if (!dc->ctx->dmub_srv)
+ return false;
+
+ if (dc->ctx->dce_version >= DCN_VERSION_4_01)
+ return false;
+
+ return dmub_hw_lock_mgr_does_link_require_lock(dc, link);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.h
index 5a72b168fb4a..4c80ca8484ad 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.h
@@ -37,6 +37,16 @@ void dmub_hw_lock_mgr_cmd(struct dc_dmub_srv *dmub_srv,
void dmub_hw_lock_mgr_inbox0_cmd(struct dc_dmub_srv *dmub_srv,
union dmub_inbox0_cmd_lock_hw hw_lock_cmd);
-bool should_use_dmub_lock(struct dc_link *link);
+/**
+ * should_use_dmub_inbox1_lock() - Checks if the DMCUB hardware lock via inbox1 should be used.
+ *
+ * @dc: pointer to DC object
+ * @link: optional pointer to the link object to check for enabled link features
+ *
+ * Return: true if the inbox1 lock should be used, false otherwise
+ */
+bool should_use_dmub_inbox1_lock(const struct dc *dc, const struct dc_link *link);
+bool dmub_hw_lock_mgr_does_link_require_lock(const struct dc *dc, const struct dc_link *link);
+bool dmub_hw_lock_mgr_does_context_require_lock(const struct dc *dc, const struct dc_state *context);
#endif /*_DMUB_HW_LOCK_MGR_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c
index faad8555ddbb..98a778996e1a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c
@@ -22,20 +22,23 @@
* Authors: AMD
*/
-#include "dmub_outbox.h"
+#include "dc.h"
#include "dc_dmub_srv.h"
+#include "dmub_outbox.h"
#include "dmub/inc/dmub_cmd.h"
-/**
- * dmub_enable_outbox_notification - Sends inbox cmd to dmub to enable outbox1
- * messages with interrupt. Dmub sends outbox1
- * message and triggers outbox1 interrupt.
- * @dc: dc structure
+/*
+ * Function: dmub_enable_outbox_notification
+ *
+ * @brief
+ * Sends inbox cmd to dmub for enabling outbox notifications to x86.
+ *
+ * @param
+ * [in] dmub_srv: dmub_srv structure
*/
-void dmub_enable_outbox_notification(struct dc *dc)
+void dmub_enable_outbox_notification(struct dc_dmub_srv *dmub_srv)
{
union dmub_rb_cmd cmd;
- struct dc_context *dc_ctx = dc->ctx;
memset(&cmd, 0x0, sizeof(cmd));
cmd.outbox1_enable.header.type = DMUB_CMD__OUTBOX1_ENABLE;
@@ -45,7 +48,5 @@ void dmub_enable_outbox_notification(struct dc *dc)
sizeof(cmd.outbox1_enable.header);
cmd.outbox1_enable.enable = true;
- dc_dmub_srv_cmd_queue(dc_ctx->dmub_srv, &cmd);
- dc_dmub_srv_cmd_execute(dc_ctx->dmub_srv);
- dc_dmub_srv_wait_idle(dc_ctx->dmub_srv);
+ dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.h
index 4e0aa0d1a2d5..58ceabb9d497 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.h
@@ -26,8 +26,8 @@
#ifndef _DMUB_OUTBOX_H_
#define _DMUB_OUTBOX_H_
-#include "dc.h"
+struct dc_dmub_srv;
-void dmub_enable_outbox_notification(struct dc *dc);
+void dmub_enable_outbox_notification(struct dc_dmub_srv *dmub_srv);
#endif /* _DMUB_OUTBOX_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
index 90eb8eedacf2..87af4fdc04a6 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
@@ -33,6 +33,10 @@
#define MAX_PIPES 6
+static const uint8_t DP_SINK_DEVICE_STR_ID_1[] = {7, 1, 8, 7, 3};
+static const uint8_t DP_SINK_DEVICE_STR_ID_2[] = {7, 1, 8, 7, 5};
+static const uint8_t DP_SINK_DEVICE_STR_ID_3[] = {0x42, 0x61, 0x6c, 0x73, 0x61};
+
/*
* Convert dmcub psr state to dmcu psr state.
*/
@@ -74,6 +78,26 @@ static enum dc_psr_state convert_psr_state(uint32_t raw_state)
state = PSR_STATE5b;
else if (raw_state == 0x53)
state = PSR_STATE5c;
+ else if (raw_state == 0x4A)
+ state = PSR_STATE4_FULL_FRAME;
+ else if (raw_state == 0x4B)
+ state = PSR_STATE4a_FULL_FRAME;
+ else if (raw_state == 0x4C)
+ state = PSR_STATE4b_FULL_FRAME;
+ else if (raw_state == 0x4D)
+ state = PSR_STATE4c_FULL_FRAME;
+ else if (raw_state == 0x4E)
+ state = PSR_STATE4_FULL_FRAME_POWERUP;
+ else if (raw_state == 0x4F)
+ state = PSR_STATE4_FULL_FRAME_HW_LOCK;
+ else if (raw_state == 0x60)
+ state = PSR_STATE_HWLOCK_MGR;
+ else if (raw_state == 0x61)
+ state = PSR_STATE_POLLVUPDATE;
+ else if (raw_state == 0x62)
+ state = PSR_STATE_RELEASE_HWLOCK_MGR_FULL_FRAME;
+ else
+ state = PSR_STATE_INVALID;
return state;
}
@@ -83,23 +107,18 @@ static enum dc_psr_state convert_psr_state(uint32_t raw_state)
*/
static void dmub_psr_get_state(struct dmub_psr *dmub, enum dc_psr_state *state, uint8_t panel_inst)
{
- struct dmub_srv *srv = dmub->ctx->dmub_srv->dmub;
uint32_t raw_state = 0;
uint32_t retry_count = 0;
- enum dmub_status status;
do {
// Send gpint command and wait for ack
- status = dmub_srv_send_gpint_command(srv, DMUB_GPINT__GET_PSR_STATE, panel_inst, 30);
-
- if (status == DMUB_STATUS_OK) {
- // GPINT was executed, get response
- dmub_srv_get_gpint_response(srv, &raw_state);
+ if (dc_wake_and_execute_gpint(dmub->ctx, DMUB_GPINT__GET_PSR_STATE, panel_inst, &raw_state,
+ DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) {
*state = convert_psr_state(raw_state);
- } else
+ } else {
// Return invalid state when GPINT times out
*state = PSR_STATE_INVALID;
-
+ }
} while (++retry_count <= 1000 && *state == PSR_STATE_INVALID);
// Assert if max retry hit
@@ -133,18 +152,23 @@ static bool dmub_psr_set_version(struct dmub_psr *dmub, struct dc_stream_state *
case DC_PSR_VERSION_1:
cmd.psr_set_version.psr_set_version_data.version = PSR_VERSION_1;
break;
+ case DC_PSR_VERSION_SU_1:
+ cmd.psr_set_version.psr_set_version_data.version = PSR_VERSION_SU_1;
+ break;
case DC_PSR_VERSION_UNSUPPORTED:
default:
cmd.psr_set_version.psr_set_version_data.version = PSR_VERSION_UNSUPPORTED;
break;
}
+
+ if (cmd.psr_set_version.psr_set_version_data.version == PSR_VERSION_UNSUPPORTED)
+ return false;
+
cmd.psr_set_version.psr_set_version_data.cmd_version = DMUB_CMD_PSR_CONTROL_VERSION_1;
cmd.psr_set_version.psr_set_version_data.panel_inst = panel_inst;
cmd.psr_set_version.header.payload_bytes = sizeof(struct dmub_cmd_psr_set_version_data);
- dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd);
- dc_dmub_srv_cmd_execute(dc->dmub_srv);
- dc_dmub_srv_wait_idle(dc->dmub_srv);
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
return true;
}
@@ -172,9 +196,7 @@ static void dmub_psr_enable(struct dmub_psr *dmub, bool enable, bool wait, uint8
cmd.psr_enable.header.payload_bytes = 0; // Send header only
- dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd);
- dc_dmub_srv_cmd_execute(dc->dmub_srv);
- dc_dmub_srv_wait_idle(dc->dmub_srv);
+ dc_wake_and_execute_dmub_cmd(dc->dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
/* Below loops 1000 x 500us = 500 ms.
* Exit PSR may need to wait 1-2 frames to power up. Timeout after at
@@ -192,6 +214,7 @@ static void dmub_psr_enable(struct dmub_psr *dmub, bool enable, bool wait, uint8
break;
}
+ /* must *not* be fsleep - this can be called from high irq levels */
udelay(500);
}
@@ -222,15 +245,32 @@ static void dmub_psr_set_level(struct dmub_psr *dmub, uint16_t psr_level, uint8_
cmd.psr_set_level.psr_set_level_data.psr_level = psr_level;
cmd.psr_set_level.psr_set_level_data.cmd_version = DMUB_CMD_PSR_CONTROL_VERSION_1;
cmd.psr_set_level.psr_set_level_data.panel_inst = panel_inst;
- dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd);
- dc_dmub_srv_cmd_execute(dc->dmub_srv);
- dc_dmub_srv_wait_idle(dc->dmub_srv);
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+/*
+ * Set PSR vtotal requirement for FreeSync PSR.
+ */
+static void dmub_psr_set_sink_vtotal_in_psr_active(struct dmub_psr *dmub,
+ uint16_t psr_vtotal_idle, uint16_t psr_vtotal_su)
+{
+ union dmub_rb_cmd cmd;
+ struct dc_context *dc = dmub->ctx;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.psr_set_vtotal.header.type = DMUB_CMD__PSR;
+ cmd.psr_set_vtotal.header.sub_type = DMUB_CMD__SET_SINK_VTOTAL_IN_PSR_ACTIVE;
+ cmd.psr_set_vtotal.header.payload_bytes = sizeof(struct dmub_cmd_psr_set_vtotal_data);
+ cmd.psr_set_vtotal.psr_set_vtotal_data.psr_vtotal_idle = psr_vtotal_idle;
+ cmd.psr_set_vtotal.psr_set_vtotal_data.psr_vtotal_su = psr_vtotal_su;
+
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
-/**
+/*
* Set PSR power optimization flags.
*/
-static void dmub_psr_set_power_opt(struct dmub_psr *dmub, unsigned int power_opt)
+static void dmub_psr_set_power_opt(struct dmub_psr *dmub, unsigned int power_opt, uint8_t panel_inst)
{
union dmub_rb_cmd cmd;
struct dc_context *dc = dmub->ctx;
@@ -239,11 +279,11 @@ static void dmub_psr_set_power_opt(struct dmub_psr *dmub, unsigned int power_opt
cmd.psr_set_power_opt.header.type = DMUB_CMD__PSR;
cmd.psr_set_power_opt.header.sub_type = DMUB_CMD__SET_PSR_POWER_OPT;
cmd.psr_set_power_opt.header.payload_bytes = sizeof(struct dmub_cmd_psr_set_power_opt_data);
+ cmd.psr_set_power_opt.psr_set_power_opt_data.cmd_version = DMUB_CMD_PSR_CONTROL_VERSION_1;
cmd.psr_set_power_opt.psr_set_power_opt_data.power_opt = power_opt;
+ cmd.psr_set_power_opt.psr_set_power_opt_data.panel_inst = panel_inst;
- dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd);
- dc_dmub_srv_cmd_execute(dc->dmub_srv);
- dc_dmub_srv_wait_idle(dc->dmub_srv);
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
/*
@@ -254,7 +294,7 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub,
struct psr_context *psr_context,
uint8_t panel_inst)
{
- union dmub_rb_cmd cmd;
+ union dmub_rb_cmd cmd = { 0 };
struct dc_context *dc = dmub->ctx;
struct dmub_cmd_psr_copy_settings_data *copy_settings_data
= &cmd.psr_copy_settings.psr_copy_settings_data;
@@ -314,6 +354,7 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub,
copy_settings_data->otg_inst = 0;
// Misc
+ copy_settings_data->use_phy_fsm = link->ctx->dc->debug.psr_power_use_phy_fsm;
copy_settings_data->psr_level = psr_context->psr_level.u32all;
copy_settings_data->smu_optimizations_en = psr_context->allow_smu_optimizations;
copy_settings_data->multi_disp_optimizations_en = psr_context->allow_multi_disp_optimizations;
@@ -323,14 +364,67 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub,
copy_settings_data->debug.u32All = 0;
copy_settings_data->debug.bitfields.visual_confirm = dc->dc->debug.visual_confirm == VISUAL_CONFIRM_PSR;
copy_settings_data->debug.bitfields.use_hw_lock_mgr = 1;
+ copy_settings_data->debug.bitfields.force_full_frame_update = 0;
+ copy_settings_data->debug.bitfields.enable_ips_visual_confirm = dc->dc->debug.enable_ips_visual_confirm;
+
+ if (psr_context->su_granularity_required == 0)
+ copy_settings_data->su_y_granularity = 0;
+ else
+ copy_settings_data->su_y_granularity = psr_context->su_y_granularity;
+
+ copy_settings_data->line_capture_indication = 0;
+ copy_settings_data->line_time_in_us = psr_context->line_time_in_us;
+ copy_settings_data->rate_control_caps = psr_context->rate_control_caps;
copy_settings_data->fec_enable_status = (link->fec_state == dc_link_fec_enabled);
copy_settings_data->fec_enable_delay_in100us = link->dc->debug.fec_enable_delay_in100us;
copy_settings_data->cmd_version = DMUB_CMD_PSR_CONTROL_VERSION_1;
copy_settings_data->panel_inst = panel_inst;
+ copy_settings_data->dsc_enable_status = (pipe_ctx->stream->timing.flags.DSC == 1);
- dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd);
- dc_dmub_srv_cmd_execute(dc->dmub_srv);
- dc_dmub_srv_wait_idle(dc->dmub_srv);
+ /**
+ * WA for PSRSU+DSC on specific TCON, if DSC is enabled, force PSRSU as ffu mode(full frame update)
+ * Note that PSRSU+DSC is still under development.
+ */
+ if (copy_settings_data->dsc_enable_status &&
+ link->dpcd_caps.sink_dev_id == DP_DEVICE_ID_38EC11 &&
+ !memcmp(link->dpcd_caps.sink_dev_id_str, DP_SINK_DEVICE_STR_ID_1,
+ sizeof(DP_SINK_DEVICE_STR_ID_1)))
+ link->psr_settings.force_ffu_mode = 1;
+
+ copy_settings_data->force_ffu_mode = link->psr_settings.force_ffu_mode || psr_context->os_request_force_ffu;
+
+ if (((link->dpcd_caps.fec_cap.bits.FEC_CAPABLE &&
+ !link->dc->debug.disable_fec) &&
+ (link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_SUPPORT &&
+ !link->panel_config.dsc.disable_dsc_edp &&
+ link->dc->caps.edp_dsc_support)) &&
+ link->dpcd_caps.sink_dev_id == DP_DEVICE_ID_38EC11 &&
+ (!memcmp(link->dpcd_caps.sink_dev_id_str, DP_SINK_DEVICE_STR_ID_1,
+ sizeof(DP_SINK_DEVICE_STR_ID_1)) ||
+ !memcmp(link->dpcd_caps.sink_dev_id_str, DP_SINK_DEVICE_STR_ID_2,
+ sizeof(DP_SINK_DEVICE_STR_ID_2))))
+ copy_settings_data->debug.bitfields.force_wakeup_by_tps3 = 1;
+ else
+ copy_settings_data->debug.bitfields.force_wakeup_by_tps3 = 0;
+
+ if (link->psr_settings.psr_version == DC_PSR_VERSION_1 &&
+ link->dpcd_caps.sink_dev_id == DP_DEVICE_ID_0022B9 &&
+ !memcmp(link->dpcd_caps.sink_dev_id_str, DP_SINK_DEVICE_STR_ID_3,
+ sizeof(DP_SINK_DEVICE_STR_ID_3))) {
+ copy_settings_data->poweroff_before_vertical_line = 16;
+ }
+
+ //WA for PSR1 on specific TCON, require frame delay for frame re-lock
+ copy_settings_data->relock_delay_frame_cnt = 0;
+ if (link->dpcd_caps.sink_dev_id == DP_BRANCH_DEVICE_ID_001CF8)
+ copy_settings_data->relock_delay_frame_cnt = 2;
+
+ copy_settings_data->power_down_phy_before_disable_stream =
+ link->psr_settings.power_down_phy_before_disable_stream;
+
+ copy_settings_data->dsc_slice_height = psr_context->dsc_slice_height;
+
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
return true;
}
@@ -351,23 +445,22 @@ static void dmub_psr_force_static(struct dmub_psr *dmub, uint8_t panel_inst)
cmd.psr_force_static.header.sub_type = DMUB_CMD__PSR_FORCE_STATIC;
cmd.psr_enable.header.payload_bytes = 0;
- dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd);
- dc_dmub_srv_cmd_execute(dc->dmub_srv);
- dc_dmub_srv_wait_idle(dc->dmub_srv);
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
/*
* Get PSR residency from firmware.
*/
-static void dmub_psr_get_residency(struct dmub_psr *dmub, uint32_t *residency, uint8_t panel_inst)
+static void dmub_psr_get_residency(struct dmub_psr *dmub, uint32_t *residency,
+ uint8_t panel_inst, enum psr_residency_mode mode)
{
- struct dmub_srv *srv = dmub->ctx->dmub_srv->dmub;
uint16_t param = (uint16_t)(panel_inst << 8);
- /* Send gpint command and wait for ack */
- dmub_srv_send_gpint_command(srv, DMUB_GPINT__PSR_RESIDENCY, param, 30);
+ param |= mode;
- dmub_srv_get_gpint_response(srv, residency);
+ /* Send gpint command and wait for ack */
+ dc_wake_and_execute_gpint(dmub->ctx, DMUB_GPINT__PSR_RESIDENCY, param, residency,
+ DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY);
}
static const struct dmub_psr_funcs psr_funcs = {
@@ -377,6 +470,7 @@ static const struct dmub_psr_funcs psr_funcs = {
.psr_set_level = dmub_psr_set_level,
.psr_force_static = dmub_psr_force_static,
.psr_get_residency = dmub_psr_get_residency,
+ .psr_set_sink_vtotal_in_psr_active = dmub_psr_set_sink_vtotal_in_psr_active,
.psr_set_power_opt = dmub_psr_set_power_opt,
};
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h
index 5dbd479660f1..a6e282d950c3 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h
@@ -26,8 +26,10 @@
#ifndef _DMUB_PSR_H_
#define _DMUB_PSR_H_
-#include "os_types.h"
-#include "dc_link.h"
+#include "dc_types.h"
+#include "dmub_cmd.h"
+struct dc_link;
+struct dmub_psr_funcs;
struct dmub_psr {
struct dc_context *ctx;
@@ -45,8 +47,10 @@ struct dmub_psr_funcs {
uint8_t panel_inst);
void (*psr_force_static)(struct dmub_psr *dmub, uint8_t panel_inst);
void (*psr_get_residency)(struct dmub_psr *dmub, uint32_t *residency,
- uint8_t panel_inst);
- void (*psr_set_power_opt)(struct dmub_psr *dmub, unsigned int power_opt);
+ uint8_t panel_inst, enum psr_residency_mode mode);
+ void (*psr_set_sink_vtotal_in_psr_active)(struct dmub_psr *dmub,
+ uint16_t psr_vtotal_idle, uint16_t psr_vtotal_su);
+ void (*psr_set_power_opt)(struct dmub_psr *dmub, unsigned int power_opt, uint8_t panel_inst);
};
struct dmub_psr *dmub_psr_create(struct dc_context *ctx);
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c
new file mode 100644
index 000000000000..cf1372aaff6c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c
@@ -0,0 +1,473 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dc.h"
+#include "link_service.h"
+#include "dc_dmub_srv.h"
+#include "dmub/dmub_srv.h"
+#include "core_types.h"
+#include "dmub_replay.h"
+
+#define DC_TRACE_LEVEL_MESSAGE(...) /* do nothing */
+
+#define MAX_PIPES 6
+
+#define GPINT_RETRY_NUM 20
+
+static const uint8_t DP_SINK_DEVICE_STR_ID_1[] = {7, 1, 8, 7, 3};
+static const uint8_t DP_SINK_DEVICE_STR_ID_2[] = {7, 1, 8, 7, 5};
+
+/*
+ * Get Replay state from firmware.
+ */
+static void dmub_replay_get_state(struct dmub_replay *dmub, enum replay_state *state, uint8_t panel_inst)
+{
+ uint32_t retry_count = 0;
+
+ do {
+ // Send gpint command and wait for ack
+ if (!dc_wake_and_execute_gpint(dmub->ctx, DMUB_GPINT__GET_REPLAY_STATE, panel_inst,
+ (uint32_t *)state, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) {
+ // Return invalid state when GPINT times out
+ *state = REPLAY_STATE_INVALID;
+ }
+ } while (++retry_count <= 1000 && *state == REPLAY_STATE_INVALID);
+
+ // Assert if max retry hit
+ if (retry_count >= 1000 && *state == REPLAY_STATE_INVALID) {
+ ASSERT(0);
+ /* To-do: Add retry fail log */
+ }
+}
+
+/*
+ * Enable/Disable Replay.
+ */
+static void dmub_replay_enable(struct dmub_replay *dmub, bool enable, bool wait, uint8_t panel_inst)
+{
+ union dmub_rb_cmd cmd;
+ struct dc_context *dc = dmub->ctx;
+ uint32_t retry_count;
+ enum replay_state state = REPLAY_STATE_0;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.replay_enable.header.type = DMUB_CMD__REPLAY;
+ cmd.replay_enable.data.panel_inst = panel_inst;
+
+ cmd.replay_enable.header.sub_type = DMUB_CMD__REPLAY_ENABLE;
+ if (enable)
+ cmd.replay_enable.data.enable = REPLAY_ENABLE;
+ else
+ cmd.replay_enable.data.enable = REPLAY_DISABLE;
+
+ cmd.replay_enable.header.payload_bytes = sizeof(struct dmub_rb_cmd_replay_enable_data);
+
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+
+ /* Below loops 1000 x 500us = 500 ms.
+ * Exit REPLAY may need to wait 1-2 frames to power up. Timeout after at
+ * least a few frames. Should never hit the max retry assert below.
+ */
+ if (wait) {
+ for (retry_count = 0; retry_count <= 1000; retry_count++) {
+ dmub_replay_get_state(dmub, &state, panel_inst);
+
+ if (enable) {
+ if (state != REPLAY_STATE_0)
+ break;
+ } else {
+ if (state == REPLAY_STATE_0)
+ break;
+ }
+
+ /* must *not* be fsleep - this can be called from high irq levels */
+ udelay(500);
+ }
+
+ /* assert if max retry hit */
+ if (retry_count >= 1000)
+ ASSERT(0);
+ }
+}
+
+/*
+ * Set REPLAY power optimization flags.
+ */
+static void dmub_replay_set_power_opt(struct dmub_replay *dmub, unsigned int power_opt, uint8_t panel_inst)
+{
+ union dmub_rb_cmd cmd;
+ struct dc_context *dc = dmub->ctx;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.replay_set_power_opt.header.type = DMUB_CMD__REPLAY;
+ cmd.replay_set_power_opt.header.sub_type = DMUB_CMD__SET_REPLAY_POWER_OPT;
+ cmd.replay_set_power_opt.header.payload_bytes = sizeof(struct dmub_cmd_replay_set_power_opt_data);
+ cmd.replay_set_power_opt.replay_set_power_opt_data.power_opt = power_opt;
+ cmd.replay_set_power_opt.replay_set_power_opt_data.panel_inst = panel_inst;
+
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+/*
+ * Setup Replay by programming phy registers and sending replay hw context values to firmware.
+ */
+static bool dmub_replay_copy_settings(struct dmub_replay *dmub,
+ struct dc_link *link,
+ struct replay_context *replay_context,
+ uint8_t panel_inst)
+{
+ union dmub_rb_cmd cmd;
+ struct dc_context *dc = dmub->ctx;
+ struct dmub_cmd_replay_copy_settings_data *copy_settings_data
+ = &cmd.replay_copy_settings.replay_copy_settings_data;
+ struct pipe_ctx *pipe_ctx = NULL;
+ struct resource_context *res_ctx = &link->ctx->dc->current_state->res_ctx;
+ int i = 0;
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ if (res_ctx &&
+ res_ctx->pipe_ctx[i].stream &&
+ res_ctx->pipe_ctx[i].stream->link &&
+ res_ctx->pipe_ctx[i].stream->link == link &&
+ res_ctx->pipe_ctx[i].stream->link->connector_signal == SIGNAL_TYPE_EDP) {
+ pipe_ctx = &res_ctx->pipe_ctx[i];
+ //TODO: refactor for multi edp support
+ break;
+ }
+ }
+
+ if (!pipe_ctx)
+ return false;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.replay_copy_settings.header.type = DMUB_CMD__REPLAY;
+ cmd.replay_copy_settings.header.sub_type = DMUB_CMD__REPLAY_COPY_SETTINGS;
+ cmd.replay_copy_settings.header.payload_bytes = sizeof(struct dmub_cmd_replay_copy_settings_data);
+
+ // HW insts
+ copy_settings_data->aux_inst = replay_context->aux_inst;
+ copy_settings_data->digbe_inst = replay_context->digbe_inst;
+ copy_settings_data->digfe_inst = replay_context->digfe_inst;
+
+ if (pipe_ctx->plane_res.dpp)
+ copy_settings_data->dpp_inst = pipe_ctx->plane_res.dpp->inst;
+ else
+ copy_settings_data->dpp_inst = 0;
+ if (pipe_ctx->stream_res.tg)
+ copy_settings_data->otg_inst = pipe_ctx->stream_res.tg->inst;
+ else
+ copy_settings_data->otg_inst = 0;
+
+ copy_settings_data->dpphy_inst = link->link_enc->transmitter;
+
+ // Misc
+ copy_settings_data->line_time_in_ns = replay_context->line_time_in_ns;
+ copy_settings_data->panel_inst = panel_inst;
+ copy_settings_data->debug.u32All = link->replay_settings.config.debug_flags;
+ copy_settings_data->pixel_deviation_per_line = link->dpcd_caps.pr_info.pixel_deviation_per_line;
+ copy_settings_data->max_deviation_line = link->dpcd_caps.pr_info.max_deviation_line;
+ copy_settings_data->smu_optimizations_en = link->replay_settings.replay_smu_opt_enable;
+ copy_settings_data->replay_timing_sync_supported = link->replay_settings.config.replay_timing_sync_supported;
+ copy_settings_data->replay_support_fast_resync_in_ultra_sleep_mode = link->replay_settings.config.replay_support_fast_resync_in_ultra_sleep_mode;
+
+ copy_settings_data->debug.bitfields.enable_ips_visual_confirm = dc->dc->debug.enable_ips_visual_confirm;
+
+ copy_settings_data->flags.u32All = 0;
+ copy_settings_data->flags.bitfields.fec_enable_status = (link->fec_state == dc_link_fec_enabled);
+ copy_settings_data->flags.bitfields.dsc_enable_status = (pipe_ctx->stream->timing.flags.DSC == 1);
+ // WA for PSRSU+DSC on specific TCON, if DSC is enabled, force PSRSU as ffu mode(full frame update)
+ if (((link->dpcd_caps.fec_cap.bits.FEC_CAPABLE &&
+ !link->dc->debug.disable_fec) &&
+ (link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_SUPPORT &&
+ !link->panel_config.dsc.disable_dsc_edp &&
+ link->dc->caps.edp_dsc_support)) &&
+ link->dpcd_caps.sink_dev_id == DP_DEVICE_ID_38EC11 &&
+ (!memcmp(link->dpcd_caps.sink_dev_id_str, DP_SINK_DEVICE_STR_ID_1,
+ sizeof(DP_SINK_DEVICE_STR_ID_1)) ||
+ !memcmp(link->dpcd_caps.sink_dev_id_str, DP_SINK_DEVICE_STR_ID_2,
+ sizeof(DP_SINK_DEVICE_STR_ID_2))))
+ copy_settings_data->flags.bitfields.force_wakeup_by_tps3 = 1;
+ else
+ copy_settings_data->flags.bitfields.force_wakeup_by_tps3 = 0;
+
+ copy_settings_data->flags.bitfields.alpm_mode = (enum dmub_alpm_mode)link->replay_settings.config.alpm_mode;
+ if (link->replay_settings.config.alpm_mode == DC_ALPM_AUXLESS) {
+ copy_settings_data->auxless_alpm_data.lfps_setup_ns = dc->dc->debug.auxless_alpm_lfps_setup_ns;
+ copy_settings_data->auxless_alpm_data.lfps_period_ns = dc->dc->debug.auxless_alpm_lfps_period_ns;
+ copy_settings_data->auxless_alpm_data.lfps_silence_ns = dc->dc->debug.auxless_alpm_lfps_silence_ns;
+ copy_settings_data->auxless_alpm_data.lfps_t1_t2_override_us =
+ dc->dc->debug.auxless_alpm_lfps_t1t2_us;
+ copy_settings_data->auxless_alpm_data.lfps_t1_t2_offset_us =
+ dc->dc->debug.auxless_alpm_lfps_t1t2_offset_us;
+ copy_settings_data->auxless_alpm_data.lttpr_count = link->dc->link_srv->dp_get_lttpr_count(link);
+ }
+
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+
+ return true;
+}
+
+/*
+ * Set coasting vtotal.
+ */
+static void dmub_replay_set_coasting_vtotal(struct dmub_replay *dmub,
+ uint32_t coasting_vtotal,
+ uint8_t panel_inst,
+ uint16_t frame_skip_number)
+{
+ union dmub_rb_cmd cmd;
+ struct dc_context *dc = dmub->ctx;
+ struct dmub_rb_cmd_replay_set_coasting_vtotal *pCmd = NULL;
+
+ pCmd = &(cmd.replay_set_coasting_vtotal);
+
+ memset(&cmd, 0, sizeof(cmd));
+ pCmd->header.type = DMUB_CMD__REPLAY;
+ pCmd->header.sub_type = DMUB_CMD__REPLAY_SET_COASTING_VTOTAL;
+ pCmd->header.payload_bytes = sizeof(struct dmub_cmd_replay_set_coasting_vtotal_data);
+ pCmd->replay_set_coasting_vtotal_data.coasting_vtotal = (coasting_vtotal & 0xFFFF);
+ pCmd->replay_set_coasting_vtotal_data.coasting_vtotal_high = (coasting_vtotal & 0xFFFF0000) >> 16;
+ pCmd->replay_set_coasting_vtotal_data.frame_skip_number = frame_skip_number;
+
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+/*
+ * Get Replay residency from firmware.
+ */
+static void dmub_replay_residency(struct dmub_replay *dmub, uint8_t panel_inst,
+ uint32_t *residency, const bool is_start, enum pr_residency_mode mode)
+{
+ uint16_t param = (uint16_t)(panel_inst << 8);
+ uint32_t i = 0;
+
+ switch (mode) {
+ case PR_RESIDENCY_MODE_PHY:
+ param |= REPLAY_RESIDENCY_FIELD_MODE_PHY;
+ break;
+ case PR_RESIDENCY_MODE_ALPM:
+ param |= REPLAY_RESIDENCY_FIELD_MODE_ALPM;
+ break;
+ case PR_RESIDENCY_MODE_IPS2:
+ param |= REPLAY_RESIDENCY_REVISION_1;
+ param |= REPLAY_RESIDENCY_FIELD_MODE2_IPS;
+ break;
+ case PR_RESIDENCY_MODE_FRAME_CNT:
+ param |= REPLAY_RESIDENCY_REVISION_1;
+ param |= REPLAY_RESIDENCY_FIELD_MODE2_FRAME_CNT;
+ break;
+ case PR_RESIDENCY_MODE_ENABLEMENT_PERIOD:
+ param |= REPLAY_RESIDENCY_REVISION_1;
+ param |= REPLAY_RESIDENCY_FIELD_MODE2_EN_PERIOD;
+ break;
+ default:
+ break;
+ }
+
+ if (is_start)
+ param |= REPLAY_RESIDENCY_ENABLE;
+
+ for (i = 0; i < GPINT_RETRY_NUM; i++) {
+ // Send gpint command and wait for ack
+ if (dc_wake_and_execute_gpint(dmub->ctx, DMUB_GPINT__REPLAY_RESIDENCY, param,
+ residency, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
+ return;
+
+ udelay(100);
+ }
+
+ // it means gpint retry many times
+ *residency = 0;
+}
+
+/*
+ * Set REPLAY power optimization flags and coasting vtotal.
+ */
+static void dmub_replay_set_power_opt_and_coasting_vtotal(struct dmub_replay *dmub,
+ unsigned int power_opt, uint8_t panel_inst, uint32_t coasting_vtotal, uint16_t frame_skip_number)
+{
+ union dmub_rb_cmd cmd;
+ struct dc_context *dc = dmub->ctx;
+ struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal *pCmd = NULL;
+
+ pCmd = &(cmd.replay_set_power_opt_and_coasting_vtotal);
+
+ memset(&cmd, 0, sizeof(cmd));
+ pCmd->header.type = DMUB_CMD__REPLAY;
+ pCmd->header.sub_type = DMUB_CMD__REPLAY_SET_POWER_OPT_AND_COASTING_VTOTAL;
+ pCmd->header.payload_bytes =
+ sizeof(struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal) -
+ sizeof(struct dmub_cmd_header);
+ pCmd->replay_set_power_opt_data.power_opt = power_opt;
+ pCmd->replay_set_power_opt_data.panel_inst = panel_inst;
+ pCmd->replay_set_coasting_vtotal_data.coasting_vtotal = (coasting_vtotal & 0xFFFF);
+ pCmd->replay_set_coasting_vtotal_data.coasting_vtotal_high = (coasting_vtotal & 0xFFFF0000) >> 16;
+ pCmd->replay_set_coasting_vtotal_data.frame_skip_number = frame_skip_number;
+
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+/*
+ * send Replay general cmd to DMUB.
+ */
+static void dmub_replay_send_cmd(struct dmub_replay *dmub,
+ enum replay_FW_Message_type msg, union dmub_replay_cmd_set *cmd_element)
+{
+ union dmub_rb_cmd cmd;
+ struct dc_context *ctx = NULL;
+
+ if (dmub == NULL || cmd_element == NULL)
+ return;
+
+ ctx = dmub->ctx;
+ if (ctx != NULL) {
+
+ if (msg != Replay_Msg_Not_Support) {
+ memset(&cmd, 0, sizeof(cmd));
+ //Header
+ cmd.replay_set_timing_sync.header.type = DMUB_CMD__REPLAY;
+ } else
+ return;
+ } else
+ return;
+
+ switch (msg) {
+ case Replay_Set_Timing_Sync_Supported:
+ //Header
+ cmd.replay_set_timing_sync.header.sub_type =
+ DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED;
+ cmd.replay_set_timing_sync.header.payload_bytes =
+ sizeof(struct dmub_rb_cmd_replay_set_timing_sync) -
+ sizeof(struct dmub_cmd_header);
+ //Cmd Body
+ cmd.replay_set_timing_sync.replay_set_timing_sync_data.panel_inst =
+ cmd_element->sync_data.panel_inst;
+ cmd.replay_set_timing_sync.replay_set_timing_sync_data.timing_sync_supported =
+ cmd_element->sync_data.timing_sync_supported;
+ break;
+ case Replay_Set_Residency_Frameupdate_Timer:
+ //Header
+ cmd.replay_set_frameupdate_timer.header.sub_type =
+ DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER;
+ cmd.replay_set_frameupdate_timer.header.payload_bytes =
+ sizeof(struct dmub_rb_cmd_replay_set_frameupdate_timer) -
+ sizeof(struct dmub_cmd_header);
+ //Cmd Body
+ cmd.replay_set_frameupdate_timer.data.panel_inst =
+ cmd_element->panel_inst;
+ cmd.replay_set_frameupdate_timer.data.enable =
+ cmd_element->timer_data.enable;
+ cmd.replay_set_frameupdate_timer.data.frameupdate_count =
+ cmd_element->timer_data.frameupdate_count;
+ break;
+ case Replay_Set_Pseudo_VTotal:
+ //Header
+ cmd.replay_set_pseudo_vtotal.header.sub_type =
+ DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL;
+ cmd.replay_set_pseudo_vtotal.header.payload_bytes =
+ sizeof(struct dmub_rb_cmd_replay_set_pseudo_vtotal) -
+ sizeof(struct dmub_cmd_header);
+ //Cmd Body
+ cmd.replay_set_pseudo_vtotal.data.panel_inst =
+ cmd_element->pseudo_vtotal_data.panel_inst;
+ cmd.replay_set_pseudo_vtotal.data.vtotal =
+ cmd_element->pseudo_vtotal_data.vtotal;
+ break;
+ case Replay_Disabled_Adaptive_Sync_SDP:
+ //Header
+ cmd.replay_disabled_adaptive_sync_sdp.header.sub_type =
+ DMUB_CMD__REPLAY_DISABLED_ADAPTIVE_SYNC_SDP;
+ cmd.replay_disabled_adaptive_sync_sdp.header.payload_bytes =
+ sizeof(struct dmub_rb_cmd_replay_disabled_adaptive_sync_sdp) -
+ sizeof(struct dmub_cmd_header);
+ //Cmd Body
+ cmd.replay_disabled_adaptive_sync_sdp.data.panel_inst =
+ cmd_element->disabled_adaptive_sync_sdp_data.panel_inst;
+ cmd.replay_disabled_adaptive_sync_sdp.data.force_disabled =
+ cmd_element->disabled_adaptive_sync_sdp_data.force_disabled;
+ break;
+ case Replay_Set_Version:
+ //Header
+ cmd.replay_set_version.header.sub_type =
+ DMUB_CMD__REPLAY_SET_VERSION;
+ cmd.replay_set_version.header.payload_bytes =
+ sizeof(struct dmub_rb_cmd_replay_set_version) -
+ sizeof(struct dmub_cmd_header);
+ //Cmd Body
+ cmd.replay_set_version.replay_set_version_data.panel_inst =
+ cmd_element->version_data.panel_inst;
+ cmd.replay_set_version.replay_set_version_data.version =
+ cmd_element->version_data.version;
+ break;
+ case Replay_Set_General_Cmd:
+ //Header
+ cmd.replay_set_general_cmd.header.sub_type =
+ DMUB_CMD__REPLAY_SET_GENERAL_CMD;
+ cmd.replay_set_general_cmd.header.payload_bytes =
+ sizeof(struct dmub_rb_cmd_replay_set_general_cmd) -
+ sizeof(struct dmub_cmd_header);
+ //Cmd Body
+ cmd.replay_set_general_cmd.data.panel_inst =
+ cmd_element->set_general_cmd_data.panel_inst;
+ cmd.replay_set_general_cmd.data.subtype =
+ cmd_element->set_general_cmd_data.subtype;
+ cmd.replay_set_general_cmd.data.param1 =
+ cmd_element->set_general_cmd_data.param1;
+ cmd.replay_set_general_cmd.data.param2 =
+ cmd_element->set_general_cmd_data.param2;
+ break;
+ case Replay_Msg_Not_Support:
+ default:
+ return;
+ break;
+ }
+
+ dc_wake_and_execute_dmub_cmd(ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+static const struct dmub_replay_funcs replay_funcs = {
+ .replay_copy_settings = dmub_replay_copy_settings,
+ .replay_enable = dmub_replay_enable,
+ .replay_get_state = dmub_replay_get_state,
+ .replay_set_power_opt = dmub_replay_set_power_opt,
+ .replay_set_coasting_vtotal = dmub_replay_set_coasting_vtotal,
+ .replay_residency = dmub_replay_residency,
+ .replay_set_power_opt_and_coasting_vtotal = dmub_replay_set_power_opt_and_coasting_vtotal,
+ .replay_send_cmd = dmub_replay_send_cmd,
+};
+
+/*
+ * Construct Replay object.
+ */
+static void dmub_replay_construct(struct dmub_replay *replay, struct dc_context *ctx)
+{
+ replay->ctx = ctx;
+ replay->funcs = &replay_funcs;
+}
+
+/*
+ * Allocate and initialize Replay object.
+ */
+struct dmub_replay *dmub_replay_create(struct dc_context *ctx)
+{
+ struct dmub_replay *replay = kzalloc(sizeof(struct dmub_replay), GFP_KERNEL);
+
+ if (replay == NULL) {
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+
+ dmub_replay_construct(replay, ctx);
+
+ return replay;
+}
+
+/*
+ * Deallocate Replay object.
+ */
+void dmub_replay_destroy(struct dmub_replay **dmub)
+{
+ kfree(*dmub);
+ *dmub = NULL;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h
new file mode 100644
index 000000000000..07c79739a980
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef _DMUB_REPLAY_H_
+#define _DMUB_REPLAY_H_
+
+#include "dc_types.h"
+#include "dmub_cmd.h"
+struct dc_link;
+struct dmub_replay_funcs;
+
+struct dmub_replay {
+ struct dc_context *ctx;
+ const struct dmub_replay_funcs *funcs;
+};
+
+struct dmub_replay_funcs {
+ void (*replay_get_state)(struct dmub_replay *dmub, enum replay_state *state,
+ uint8_t panel_inst);
+ void (*replay_enable)(struct dmub_replay *dmub, bool enable, bool wait,
+ uint8_t panel_inst);
+ bool (*replay_copy_settings)(struct dmub_replay *dmub, struct dc_link *link,
+ struct replay_context *replay_context, uint8_t panel_inst);
+ void (*replay_set_power_opt)(struct dmub_replay *dmub, unsigned int power_opt,
+ uint8_t panel_inst);
+ void (*replay_send_cmd)(struct dmub_replay *dmub,
+ enum replay_FW_Message_type msg, union dmub_replay_cmd_set *cmd_element);
+ void (*replay_set_coasting_vtotal)(struct dmub_replay *dmub, uint32_t coasting_vtotal,
+ uint8_t panel_inst, uint16_t frame_skip_number);
+ void (*replay_residency)(struct dmub_replay *dmub,
+ uint8_t panel_inst, uint32_t *residency, const bool is_start, const enum pr_residency_mode mode);
+ void (*replay_set_power_opt_and_coasting_vtotal)(struct dmub_replay *dmub,
+ unsigned int power_opt, uint8_t panel_inst, uint32_t coasting_vtotal,
+ uint16_t frame_skip_number);
+};
+
+struct dmub_replay *dmub_replay_create(struct dc_context *ctx);
+void dmub_replay_destroy(struct dmub_replay **dmub);
+
+
+#endif /* _DMUB_REPLAY_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/Makefile b/drivers/gpu/drm/amd/display/dc/dce100/Makefile
deleted file mode 100644
index ff20c47f559e..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dce100/Makefile
+++ /dev/null
@@ -1,46 +0,0 @@
-#
-# Copyright 2017 Advanced Micro Devices, Inc.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-#
-# Makefile for the 'controller' sub-component of DAL.
-# It provides the control and status of HW CRTC block.
-
-CFLAGS_$(AMDDALPATH)/dc/dce100/dce100_resource.o = $(call cc-disable-warning, override-init)
-
-DCE100 = dce100_resource.o dce100_hw_sequencer.o
-
-AMD_DAL_DCE100 = $(addprefix $(AMDDALPATH)/dc/dce100/,$(DCE100))
-
-AMD_DISPLAY_FILES += $(AMD_DAL_DCE100)
-
-
-###############################################################################
-# DCE 10x
-###############################################################################
-ifdef 0#CONFIG_DRM_AMD_DC_DCE11_0
-TG_DCE100 = dce100_resource.o
-
-AMD_DAL_TG_DCE100 = $(addprefix \
- $(AMDDALPATH)/dc/dce100/,$(TG_DCE100))
-
-AMD_DISPLAY_FILES += $(AMD_DAL_TG_DCE100)
-endif
-
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c
deleted file mode 100644
index 753cb8edd996..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright 2015 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-#include "dm_services.h"
-#include "dc.h"
-#include "core_types.h"
-#include "clk_mgr.h"
-#include "dce100_hw_sequencer.h"
-#include "resource.h"
-
-#include "dce110/dce110_hw_sequencer.h"
-
-/* include DCE10 register header files */
-#include "dce/dce_10_0_d.h"
-#include "dce/dce_10_0_sh_mask.h"
-
-struct dce100_hw_seq_reg_offsets {
- uint32_t blnd;
- uint32_t crtc;
-};
-
-static const struct dce100_hw_seq_reg_offsets reg_offsets[] = {
-{
- .crtc = (mmCRTC0_CRTC_GSL_CONTROL - mmCRTC_GSL_CONTROL),
-},
-{
- .crtc = (mmCRTC1_CRTC_GSL_CONTROL - mmCRTC_GSL_CONTROL),
-},
-{
- .crtc = (mmCRTC2_CRTC_GSL_CONTROL - mmCRTC_GSL_CONTROL),
-},
-{
- .crtc = (mmCRTC3_CRTC_GSL_CONTROL - mmCRTC_GSL_CONTROL),
-},
-{
- .crtc = (mmCRTC4_CRTC_GSL_CONTROL - mmCRTC_GSL_CONTROL),
-},
-{
- .crtc = (mmCRTC5_CRTC_GSL_CONTROL - mmCRTC_GSL_CONTROL),
-}
-};
-
-#define HW_REG_CRTC(reg, id)\
- (reg + reg_offsets[id].crtc)
-
-/*******************************************************************************
- * Private definitions
- ******************************************************************************/
-/***************************PIPE_CONTROL***********************************/
-
-bool dce100_enable_display_power_gating(
- struct dc *dc,
- uint8_t controller_id,
- struct dc_bios *dcb,
- enum pipe_gating_control power_gating)
-{
- enum bp_result bp_result = BP_RESULT_OK;
- enum bp_pipe_control_action cntl;
- struct dc_context *ctx = dc->ctx;
-
- if (power_gating == PIPE_GATING_CONTROL_INIT)
- cntl = ASIC_PIPE_INIT;
- else if (power_gating == PIPE_GATING_CONTROL_ENABLE)
- cntl = ASIC_PIPE_ENABLE;
- else
- cntl = ASIC_PIPE_DISABLE;
-
- if (!(power_gating == PIPE_GATING_CONTROL_INIT && controller_id != 0)){
-
- bp_result = dcb->funcs->enable_disp_power_gating(
- dcb, controller_id + 1, cntl);
-
- /* Revert MASTER_UPDATE_MODE to 0 because bios sets it 2
- * by default when command table is called
- */
- dm_write_reg(ctx,
- HW_REG_CRTC(mmMASTER_UPDATE_MODE, controller_id),
- 0);
- }
-
- if (bp_result == BP_RESULT_OK)
- return true;
- else
- return false;
-}
-
-void dce100_prepare_bandwidth(
- struct dc *dc,
- struct dc_state *context)
-{
- dce110_set_safe_displaymarks(&context->res_ctx, dc->res_pool);
-
- dc->clk_mgr->funcs->update_clocks(
- dc->clk_mgr,
- context,
- false);
-}
-
-void dce100_optimize_bandwidth(
- struct dc *dc,
- struct dc_state *context)
-{
- dce110_set_safe_displaymarks(&context->res_ctx, dc->res_pool);
-
- dc->clk_mgr->funcs->update_clocks(
- dc->clk_mgr,
- context,
- true);
-}
-
-/**************************************************************************/
-
-void dce100_hw_sequencer_construct(struct dc *dc)
-{
- dce110_hw_sequencer_construct(dc);
-
- dc->hwseq->funcs.enable_display_power_gating = dce100_enable_display_power_gating;
- dc->hwss.prepare_bandwidth = dce100_prepare_bandwidth;
- dc->hwss.optimize_bandwidth = dce100_optimize_bandwidth;
-}
-
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.h
deleted file mode 100644
index 34518da20009..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
-* Copyright 2012-15 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#ifndef __DC_HWSS_DCE100_H__
-#define __DC_HWSS_DCE100_H__
-
-#include "core_types.h"
-#include "hw_sequencer_private.h"
-
-struct dc;
-struct dc_state;
-
-void dce100_hw_sequencer_construct(struct dc *dc);
-
-void dce100_prepare_bandwidth(
- struct dc *dc,
- struct dc_state *context);
-
-void dce100_optimize_bandwidth(
- struct dc *dc,
- struct dc_state *context);
-
-bool dce100_enable_display_power_gating(struct dc *dc, uint8_t controller_id,
- struct dc_bios *dcb,
- enum pipe_gating_control power_gating);
-
-#endif /* __DC_HWSS_DCE100_H__ */
-
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/Makefile b/drivers/gpu/drm/amd/display/dc/dce110/Makefile
index 84ab48df0c26..c307f040e48f 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce110/Makefile
@@ -23,11 +23,11 @@
# Makefile for the 'controller' sub-component of DAL.
# It provides the control and status of HW CRTC block.
-CFLAGS_$(AMDDALPATH)/dc/dce110/dce110_resource.o = $(call cc-disable-warning, override-init)
+CFLAGS_$(AMDDALPATH)/dc/dce110/dce110_resource.o = -Wno-override-init
DCE110 = dce110_timing_generator.o \
-dce110_compressor.o dce110_hw_sequencer.o dce110_resource.o \
-dce110_opp_regamma_v.o dce110_opp_csc_v.o dce110_timing_generator_v.o \
+dce110_compressor.o dce110_opp_regamma_v.o \
+dce110_opp_csc_v.o dce110_timing_generator_v.o \
dce110_mem_input_v.o dce110_opp_v.o dce110_transform_v.o
AMD_DAL_DCE110 = $(addprefix $(AMDDALPATH)/dc/dce110/,$(DCE110))
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c
index 44564a4742b5..59a0961b49da 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c
@@ -23,9 +23,6 @@
*
*/
-#include <linux/delay.h>
-#include <linux/slab.h>
-
#include "dm_services.h"
#include "dce/dce_11_0_d.h"
@@ -412,19 +409,6 @@ void dce110_compressor_destroy(struct compressor **compressor)
*compressor = NULL;
}
-void get_max_support_fbc_buffersize(unsigned int *max_x, unsigned int *max_y)
-{
- *max_x = FBC_MAX_X;
- *max_y = FBC_MAX_Y;
-
- /* if (m_smallLocalFrameBufferMemory == 1)
- * {
- * *max_x = FBC_MAX_X_SG;
- * *max_y = FBC_MAX_Y_SG;
- * }
- */
-}
-
static const struct compressor_funcs dce110_compressor_funcs = {
.power_up_fbc = dce110_compressor_power_up_fbc,
.enable_fbc = dce110_compressor_enable_fbc,
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.h
index 26c7335a1cbf..223c57941e92 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.h
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.h
@@ -75,7 +75,5 @@ void dce110_compressor_program_lpt_control(struct compressor *cp,
bool dce110_compressor_is_lpt_enabled_in_hw(struct compressor *cp);
-void get_max_support_fbc_buffersize(unsigned int *max_x, unsigned int *max_y);
-
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
deleted file mode 100644
index 24e47df526f6..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ /dev/null
@@ -1,3130 +0,0 @@
-/*
- * Copyright 2015 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#include <linux/delay.h>
-
-#include "dm_services.h"
-#include "dc.h"
-#include "dc_bios_types.h"
-#include "core_types.h"
-#include "core_status.h"
-#include "resource.h"
-#include "dm_helpers.h"
-#include "dce110_timing_generator.h"
-#include "dce/dce_hwseq.h"
-#include "gpio_service_interface.h"
-
-#include "dce110_compressor.h"
-
-#include "bios/bios_parser_helper.h"
-#include "timing_generator.h"
-#include "mem_input.h"
-#include "opp.h"
-#include "ipp.h"
-#include "transform.h"
-#include "stream_encoder.h"
-#include "link_encoder.h"
-#include "link_enc_cfg.h"
-#include "link_hwss.h"
-#include "dc_link_dp.h"
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-#include "dccg.h"
-#endif
-#include "clock_source.h"
-#include "clk_mgr.h"
-#include "abm.h"
-#include "audio.h"
-#include "reg_helper.h"
-#include "panel_cntl.h"
-#include "inc/link_dpcd.h"
-#include "dpcd_defs.h"
-/* include DCE11 register header files */
-#include "dce/dce_11_0_d.h"
-#include "dce/dce_11_0_sh_mask.h"
-#include "custom_float.h"
-
-#include "atomfirmware.h"
-
-#include "dcn10/dcn10_hw_sequencer.h"
-
-#define GAMMA_HW_POINTS_NUM 256
-
-/*
- * All values are in milliseconds;
- * For eDP, after power-up/power/down,
- * 300/500 msec max. delay from LCDVCC to black video generation
- */
-#define PANEL_POWER_UP_TIMEOUT 300
-#define PANEL_POWER_DOWN_TIMEOUT 500
-#define HPD_CHECK_INTERVAL 10
-#define OLED_POST_T7_DELAY 100
-#define OLED_PRE_T11_DELAY 150
-
-#define CTX \
- hws->ctx
-
-#define DC_LOGGER_INIT()
-
-#define REG(reg)\
- hws->regs->reg
-
-#undef FN
-#define FN(reg_name, field_name) \
- hws->shifts->field_name, hws->masks->field_name
-
-struct dce110_hw_seq_reg_offsets {
- uint32_t crtc;
-};
-
-static const struct dce110_hw_seq_reg_offsets reg_offsets[] = {
-{
- .crtc = (mmCRTC0_CRTC_GSL_CONTROL - mmCRTC_GSL_CONTROL),
-},
-{
- .crtc = (mmCRTC1_CRTC_GSL_CONTROL - mmCRTC_GSL_CONTROL),
-},
-{
- .crtc = (mmCRTC2_CRTC_GSL_CONTROL - mmCRTC_GSL_CONTROL),
-},
-{
- .crtc = (mmCRTCV_GSL_CONTROL - mmCRTC_GSL_CONTROL),
-}
-};
-
-#define HW_REG_BLND(reg, id)\
- (reg + reg_offsets[id].blnd)
-
-#define HW_REG_CRTC(reg, id)\
- (reg + reg_offsets[id].crtc)
-
-#define MAX_WATERMARK 0xFFFF
-#define SAFE_NBP_MARK 0x7FFF
-
-/*******************************************************************************
- * Private definitions
- ******************************************************************************/
-/***************************PIPE_CONTROL***********************************/
-static void dce110_init_pte(struct dc_context *ctx)
-{
- uint32_t addr;
- uint32_t value = 0;
- uint32_t chunk_int = 0;
- uint32_t chunk_mul = 0;
-
- addr = mmUNP_DVMM_PTE_CONTROL;
- value = dm_read_reg(ctx, addr);
-
- set_reg_field_value(
- value,
- 0,
- DVMM_PTE_CONTROL,
- DVMM_USE_SINGLE_PTE);
-
- set_reg_field_value(
- value,
- 1,
- DVMM_PTE_CONTROL,
- DVMM_PTE_BUFFER_MODE0);
-
- set_reg_field_value(
- value,
- 1,
- DVMM_PTE_CONTROL,
- DVMM_PTE_BUFFER_MODE1);
-
- dm_write_reg(ctx, addr, value);
-
- addr = mmDVMM_PTE_REQ;
- value = dm_read_reg(ctx, addr);
-
- chunk_int = get_reg_field_value(
- value,
- DVMM_PTE_REQ,
- HFLIP_PTEREQ_PER_CHUNK_INT);
-
- chunk_mul = get_reg_field_value(
- value,
- DVMM_PTE_REQ,
- HFLIP_PTEREQ_PER_CHUNK_MULTIPLIER);
-
- if (chunk_int != 0x4 || chunk_mul != 0x4) {
-
- set_reg_field_value(
- value,
- 255,
- DVMM_PTE_REQ,
- MAX_PTEREQ_TO_ISSUE);
-
- set_reg_field_value(
- value,
- 4,
- DVMM_PTE_REQ,
- HFLIP_PTEREQ_PER_CHUNK_INT);
-
- set_reg_field_value(
- value,
- 4,
- DVMM_PTE_REQ,
- HFLIP_PTEREQ_PER_CHUNK_MULTIPLIER);
-
- dm_write_reg(ctx, addr, value);
- }
-}
-/**************************************************************************/
-
-static void enable_display_pipe_clock_gating(
- struct dc_context *ctx,
- bool clock_gating)
-{
- /*TODO*/
-}
-
-static bool dce110_enable_display_power_gating(
- struct dc *dc,
- uint8_t controller_id,
- struct dc_bios *dcb,
- enum pipe_gating_control power_gating)
-{
- enum bp_result bp_result = BP_RESULT_OK;
- enum bp_pipe_control_action cntl;
- struct dc_context *ctx = dc->ctx;
- unsigned int underlay_idx = dc->res_pool->underlay_pipe_index;
-
- if (IS_FPGA_MAXIMUS_DC(ctx->dce_environment))
- return true;
-
- if (power_gating == PIPE_GATING_CONTROL_INIT)
- cntl = ASIC_PIPE_INIT;
- else if (power_gating == PIPE_GATING_CONTROL_ENABLE)
- cntl = ASIC_PIPE_ENABLE;
- else
- cntl = ASIC_PIPE_DISABLE;
-
- if (controller_id == underlay_idx)
- controller_id = CONTROLLER_ID_UNDERLAY0 - 1;
-
- if (power_gating != PIPE_GATING_CONTROL_INIT || controller_id == 0){
-
- bp_result = dcb->funcs->enable_disp_power_gating(
- dcb, controller_id + 1, cntl);
-
- /* Revert MASTER_UPDATE_MODE to 0 because bios sets it 2
- * by default when command table is called
- *
- * Bios parser accepts controller_id = 6 as indicative of
- * underlay pipe in dce110. But we do not support more
- * than 3.
- */
- if (controller_id < CONTROLLER_ID_MAX - 1)
- dm_write_reg(ctx,
- HW_REG_CRTC(mmCRTC_MASTER_UPDATE_MODE, controller_id),
- 0);
- }
-
- if (power_gating != PIPE_GATING_CONTROL_ENABLE)
- dce110_init_pte(ctx);
-
- if (bp_result == BP_RESULT_OK)
- return true;
- else
- return false;
-}
-
-static void build_prescale_params(struct ipp_prescale_params *prescale_params,
- const struct dc_plane_state *plane_state)
-{
- prescale_params->mode = IPP_PRESCALE_MODE_FIXED_UNSIGNED;
-
- switch (plane_state->format) {
- case SURFACE_PIXEL_FORMAT_GRPH_RGB565:
- prescale_params->scale = 0x2082;
- break;
- case SURFACE_PIXEL_FORMAT_GRPH_ARGB8888:
- case SURFACE_PIXEL_FORMAT_GRPH_ABGR8888:
- prescale_params->scale = 0x2020;
- break;
- case SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010:
- case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010:
- prescale_params->scale = 0x2008;
- break;
- case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
- case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
- case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
- prescale_params->scale = 0x2000;
- break;
- default:
- ASSERT(false);
- break;
- }
-}
-
-static bool
-dce110_set_input_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx,
- const struct dc_plane_state *plane_state)
-{
- struct input_pixel_processor *ipp = pipe_ctx->plane_res.ipp;
- const struct dc_transfer_func *tf = NULL;
- struct ipp_prescale_params prescale_params = { 0 };
- bool result = true;
-
- if (ipp == NULL)
- return false;
-
- if (plane_state->in_transfer_func)
- tf = plane_state->in_transfer_func;
-
- build_prescale_params(&prescale_params, plane_state);
- ipp->funcs->ipp_program_prescale(ipp, &prescale_params);
-
- if (plane_state->gamma_correction &&
- !plane_state->gamma_correction->is_identity &&
- dce_use_lut(plane_state->format))
- ipp->funcs->ipp_program_input_lut(ipp, plane_state->gamma_correction);
-
- if (tf == NULL) {
- /* Default case if no input transfer function specified */
- ipp->funcs->ipp_set_degamma(ipp, IPP_DEGAMMA_MODE_HW_sRGB);
- } else if (tf->type == TF_TYPE_PREDEFINED) {
- switch (tf->tf) {
- case TRANSFER_FUNCTION_SRGB:
- ipp->funcs->ipp_set_degamma(ipp, IPP_DEGAMMA_MODE_HW_sRGB);
- break;
- case TRANSFER_FUNCTION_BT709:
- ipp->funcs->ipp_set_degamma(ipp, IPP_DEGAMMA_MODE_HW_xvYCC);
- break;
- case TRANSFER_FUNCTION_LINEAR:
- ipp->funcs->ipp_set_degamma(ipp, IPP_DEGAMMA_MODE_BYPASS);
- break;
- case TRANSFER_FUNCTION_PQ:
- default:
- result = false;
- break;
- }
- } else if (tf->type == TF_TYPE_BYPASS) {
- ipp->funcs->ipp_set_degamma(ipp, IPP_DEGAMMA_MODE_BYPASS);
- } else {
- /*TF_TYPE_DISTRIBUTED_POINTS - Not supported in DCE 11*/
- result = false;
- }
-
- return result;
-}
-
-static bool convert_to_custom_float(struct pwl_result_data *rgb_resulted,
- struct curve_points *arr_points,
- uint32_t hw_points_num)
-{
- struct custom_float_format fmt;
-
- struct pwl_result_data *rgb = rgb_resulted;
-
- uint32_t i = 0;
-
- fmt.exponenta_bits = 6;
- fmt.mantissa_bits = 12;
- fmt.sign = true;
-
- if (!convert_to_custom_float_format(arr_points[0].x, &fmt,
- &arr_points[0].custom_float_x)) {
- BREAK_TO_DEBUGGER();
- return false;
- }
-
- if (!convert_to_custom_float_format(arr_points[0].offset, &fmt,
- &arr_points[0].custom_float_offset)) {
- BREAK_TO_DEBUGGER();
- return false;
- }
-
- if (!convert_to_custom_float_format(arr_points[0].slope, &fmt,
- &arr_points[0].custom_float_slope)) {
- BREAK_TO_DEBUGGER();
- return false;
- }
-
- fmt.mantissa_bits = 10;
- fmt.sign = false;
-
- if (!convert_to_custom_float_format(arr_points[1].x, &fmt,
- &arr_points[1].custom_float_x)) {
- BREAK_TO_DEBUGGER();
- return false;
- }
-
- if (!convert_to_custom_float_format(arr_points[1].y, &fmt,
- &arr_points[1].custom_float_y)) {
- BREAK_TO_DEBUGGER();
- return false;
- }
-
- if (!convert_to_custom_float_format(arr_points[1].slope, &fmt,
- &arr_points[1].custom_float_slope)) {
- BREAK_TO_DEBUGGER();
- return false;
- }
-
- fmt.mantissa_bits = 12;
- fmt.sign = true;
-
- while (i != hw_points_num) {
- if (!convert_to_custom_float_format(rgb->red, &fmt,
- &rgb->red_reg)) {
- BREAK_TO_DEBUGGER();
- return false;
- }
-
- if (!convert_to_custom_float_format(rgb->green, &fmt,
- &rgb->green_reg)) {
- BREAK_TO_DEBUGGER();
- return false;
- }
-
- if (!convert_to_custom_float_format(rgb->blue, &fmt,
- &rgb->blue_reg)) {
- BREAK_TO_DEBUGGER();
- return false;
- }
-
- if (!convert_to_custom_float_format(rgb->delta_red, &fmt,
- &rgb->delta_red_reg)) {
- BREAK_TO_DEBUGGER();
- return false;
- }
-
- if (!convert_to_custom_float_format(rgb->delta_green, &fmt,
- &rgb->delta_green_reg)) {
- BREAK_TO_DEBUGGER();
- return false;
- }
-
- if (!convert_to_custom_float_format(rgb->delta_blue, &fmt,
- &rgb->delta_blue_reg)) {
- BREAK_TO_DEBUGGER();
- return false;
- }
-
- ++rgb;
- ++i;
- }
-
- return true;
-}
-
-#define MAX_LOW_POINT 25
-#define NUMBER_REGIONS 16
-#define NUMBER_SW_SEGMENTS 16
-
-static bool
-dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf,
- struct pwl_params *regamma_params)
-{
- struct curve_points *arr_points;
- struct pwl_result_data *rgb_resulted;
- struct pwl_result_data *rgb;
- struct pwl_result_data *rgb_plus_1;
- struct fixed31_32 y_r;
- struct fixed31_32 y_g;
- struct fixed31_32 y_b;
- struct fixed31_32 y1_min;
- struct fixed31_32 y3_max;
-
- int32_t region_start, region_end;
- uint32_t i, j, k, seg_distr[NUMBER_REGIONS], increment, start_index, hw_points;
-
- if (output_tf == NULL || regamma_params == NULL || output_tf->type == TF_TYPE_BYPASS)
- return false;
-
- arr_points = regamma_params->arr_points;
- rgb_resulted = regamma_params->rgb_resulted;
- hw_points = 0;
-
- memset(regamma_params, 0, sizeof(struct pwl_params));
-
- if (output_tf->tf == TRANSFER_FUNCTION_PQ) {
- /* 16 segments
- * segments are from 2^-11 to 2^5
- */
- region_start = -11;
- region_end = region_start + NUMBER_REGIONS;
-
- for (i = 0; i < NUMBER_REGIONS; i++)
- seg_distr[i] = 4;
-
- } else {
- /* 10 segments
- * segment is from 2^-10 to 2^1
- * We include an extra segment for range [2^0, 2^1). This is to
- * ensure that colors with normalized values of 1 don't miss the
- * LUT.
- */
- region_start = -10;
- region_end = 1;
-
- seg_distr[0] = 4;
- seg_distr[1] = 4;
- seg_distr[2] = 4;
- seg_distr[3] = 4;
- seg_distr[4] = 4;
- seg_distr[5] = 4;
- seg_distr[6] = 4;
- seg_distr[7] = 4;
- seg_distr[8] = 4;
- seg_distr[9] = 4;
- seg_distr[10] = 0;
- seg_distr[11] = -1;
- seg_distr[12] = -1;
- seg_distr[13] = -1;
- seg_distr[14] = -1;
- seg_distr[15] = -1;
- }
-
- for (k = 0; k < 16; k++) {
- if (seg_distr[k] != -1)
- hw_points += (1 << seg_distr[k]);
- }
-
- j = 0;
- for (k = 0; k < (region_end - region_start); k++) {
- increment = NUMBER_SW_SEGMENTS / (1 << seg_distr[k]);
- start_index = (region_start + k + MAX_LOW_POINT) *
- NUMBER_SW_SEGMENTS;
- for (i = start_index; i < start_index + NUMBER_SW_SEGMENTS;
- i += increment) {
- if (j == hw_points - 1)
- break;
- rgb_resulted[j].red = output_tf->tf_pts.red[i];
- rgb_resulted[j].green = output_tf->tf_pts.green[i];
- rgb_resulted[j].blue = output_tf->tf_pts.blue[i];
- j++;
- }
- }
-
- /* last point */
- start_index = (region_end + MAX_LOW_POINT) * NUMBER_SW_SEGMENTS;
- rgb_resulted[hw_points - 1].red = output_tf->tf_pts.red[start_index];
- rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index];
- rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index];
-
- arr_points[0].x = dc_fixpt_pow(dc_fixpt_from_int(2),
- dc_fixpt_from_int(region_start));
- arr_points[1].x = dc_fixpt_pow(dc_fixpt_from_int(2),
- dc_fixpt_from_int(region_end));
-
- y_r = rgb_resulted[0].red;
- y_g = rgb_resulted[0].green;
- y_b = rgb_resulted[0].blue;
-
- y1_min = dc_fixpt_min(y_r, dc_fixpt_min(y_g, y_b));
-
- arr_points[0].y = y1_min;
- arr_points[0].slope = dc_fixpt_div(arr_points[0].y,
- arr_points[0].x);
-
- y_r = rgb_resulted[hw_points - 1].red;
- y_g = rgb_resulted[hw_points - 1].green;
- y_b = rgb_resulted[hw_points - 1].blue;
-
- /* see comment above, m_arrPoints[1].y should be the Y value for the
- * region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1)
- */
- y3_max = dc_fixpt_max(y_r, dc_fixpt_max(y_g, y_b));
-
- arr_points[1].y = y3_max;
-
- arr_points[1].slope = dc_fixpt_zero;
-
- if (output_tf->tf == TRANSFER_FUNCTION_PQ) {
- /* for PQ, we want to have a straight line from last HW X point,
- * and the slope to be such that we hit 1.0 at 10000 nits.
- */
- const struct fixed31_32 end_value = dc_fixpt_from_int(125);
-
- arr_points[1].slope = dc_fixpt_div(
- dc_fixpt_sub(dc_fixpt_one, arr_points[1].y),
- dc_fixpt_sub(end_value, arr_points[1].x));
- }
-
- regamma_params->hw_points_num = hw_points;
-
- k = 0;
- for (i = 1; i < 16; i++) {
- if (seg_distr[k] != -1) {
- regamma_params->arr_curve_points[k].segments_num = seg_distr[k];
- regamma_params->arr_curve_points[i].offset =
- regamma_params->arr_curve_points[k].offset + (1 << seg_distr[k]);
- }
- k++;
- }
-
- if (seg_distr[k] != -1)
- regamma_params->arr_curve_points[k].segments_num = seg_distr[k];
-
- rgb = rgb_resulted;
- rgb_plus_1 = rgb_resulted + 1;
-
- i = 1;
-
- while (i != hw_points + 1) {
- if (dc_fixpt_lt(rgb_plus_1->red, rgb->red))
- rgb_plus_1->red = rgb->red;
- if (dc_fixpt_lt(rgb_plus_1->green, rgb->green))
- rgb_plus_1->green = rgb->green;
- if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue))
- rgb_plus_1->blue = rgb->blue;
-
- rgb->delta_red = dc_fixpt_sub(rgb_plus_1->red, rgb->red);
- rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green);
- rgb->delta_blue = dc_fixpt_sub(rgb_plus_1->blue, rgb->blue);
-
- ++rgb_plus_1;
- ++rgb;
- ++i;
- }
-
- convert_to_custom_float(rgb_resulted, arr_points, hw_points);
-
- return true;
-}
-
-static bool
-dce110_set_output_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx,
- const struct dc_stream_state *stream)
-{
- struct transform *xfm = pipe_ctx->plane_res.xfm;
-
- xfm->funcs->opp_power_on_regamma_lut(xfm, true);
- xfm->regamma_params.hw_points_num = GAMMA_HW_POINTS_NUM;
-
- if (stream->out_transfer_func &&
- stream->out_transfer_func->type == TF_TYPE_PREDEFINED &&
- stream->out_transfer_func->tf == TRANSFER_FUNCTION_SRGB) {
- xfm->funcs->opp_set_regamma_mode(xfm, OPP_REGAMMA_SRGB);
- } else if (dce110_translate_regamma_to_hw_format(stream->out_transfer_func,
- &xfm->regamma_params)) {
- xfm->funcs->opp_program_regamma_pwl(xfm, &xfm->regamma_params);
- xfm->funcs->opp_set_regamma_mode(xfm, OPP_REGAMMA_USER);
- } else {
- xfm->funcs->opp_set_regamma_mode(xfm, OPP_REGAMMA_BYPASS);
- }
-
- xfm->funcs->opp_power_on_regamma_lut(xfm, false);
-
- return true;
-}
-
-void dce110_update_info_frame(struct pipe_ctx *pipe_ctx)
-{
- bool is_hdmi_tmds;
- bool is_dp;
-
- ASSERT(pipe_ctx->stream);
-
- if (pipe_ctx->stream_res.stream_enc == NULL)
- return; /* this is not root pipe */
-
- is_hdmi_tmds = dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal);
- is_dp = dc_is_dp_signal(pipe_ctx->stream->signal);
-
- if (!is_hdmi_tmds && !is_dp)
- return;
-
- if (is_hdmi_tmds)
- pipe_ctx->stream_res.stream_enc->funcs->update_hdmi_info_packets(
- pipe_ctx->stream_res.stream_enc,
- &pipe_ctx->stream_res.encoder_info_frame);
- else
- pipe_ctx->stream_res.stream_enc->funcs->update_dp_info_packets(
- pipe_ctx->stream_res.stream_enc,
- &pipe_ctx->stream_res.encoder_info_frame);
-}
-
-void dce110_enable_stream(struct pipe_ctx *pipe_ctx)
-{
- enum dc_lane_count lane_count =
- pipe_ctx->stream->link->cur_link_settings.lane_count;
- struct dc_crtc_timing *timing = &pipe_ctx->stream->timing;
- struct dc_link *link = pipe_ctx->stream->link;
- const struct dc *dc = link->dc;
-
- uint32_t active_total_with_borders;
- uint32_t early_control = 0;
- struct timing_generator *tg = pipe_ctx->stream_res.tg;
-
- /* For MST, there are multiply stream go to only one link.
- * connect DIG back_end to front_end while enable_stream and
- * disconnect them during disable_stream
- * BY this, it is logic clean to separate stream and link */
- link->link_enc->funcs->connect_dig_be_to_fe(link->link_enc,
- pipe_ctx->stream_res.stream_enc->id, true);
-
- dc->hwss.update_info_frame(pipe_ctx);
-
- /* enable early control to avoid corruption on DP monitor*/
- active_total_with_borders =
- timing->h_addressable
- + timing->h_border_left
- + timing->h_border_right;
-
- if (lane_count != 0)
- early_control = active_total_with_borders % lane_count;
-
- if (early_control == 0)
- early_control = lane_count;
-
- tg->funcs->set_early_control(tg, early_control);
-
- /* enable audio only within mode set */
- if (pipe_ctx->stream_res.audio != NULL) {
- if (dc_is_dp_signal(pipe_ctx->stream->signal))
- pipe_ctx->stream_res.stream_enc->funcs->dp_audio_enable(pipe_ctx->stream_res.stream_enc);
- }
-
-
-
-
-}
-
-static enum bp_result link_transmitter_control(
- struct dc_bios *bios,
- struct bp_transmitter_control *cntl)
-{
- enum bp_result result;
-
- result = bios->funcs->transmitter_control(bios, cntl);
-
- return result;
-}
-
-/*
- * @brief
- * eDP only.
- */
-void dce110_edp_wait_for_hpd_ready(
- struct dc_link *link,
- bool power_up)
-{
- struct dc_context *ctx = link->ctx;
- struct graphics_object_id connector = link->link_enc->connector;
- struct gpio *hpd;
- struct dc_sink *sink = link->local_sink;
- bool edp_hpd_high = false;
- uint32_t time_elapsed = 0;
- uint32_t timeout = power_up ?
- PANEL_POWER_UP_TIMEOUT : PANEL_POWER_DOWN_TIMEOUT;
-
- if (dal_graphics_object_id_get_connector_id(connector)
- != CONNECTOR_ID_EDP) {
- BREAK_TO_DEBUGGER();
- return;
- }
-
- if (!power_up)
- /*
- * From KV, we will not HPD low after turning off VCC -
- * instead, we will check the SW timer in power_up().
- */
- return;
-
- /*
- * When we power on/off the eDP panel,
- * we need to wait until SENSE bit is high/low.
- */
-
- /* obtain HPD */
- /* TODO what to do with this? */
- hpd = get_hpd_gpio(ctx->dc_bios, connector, ctx->gpio_service);
-
- if (!hpd) {
- BREAK_TO_DEBUGGER();
- return;
- }
-
- if (sink != NULL) {
- if (sink->edid_caps.panel_patch.extra_t3_ms > 0) {
- int extra_t3_in_ms = sink->edid_caps.panel_patch.extra_t3_ms;
-
- msleep(extra_t3_in_ms);
- }
- }
-
- dal_gpio_open(hpd, GPIO_MODE_INTERRUPT);
-
- /* wait until timeout or panel detected */
-
- do {
- uint32_t detected = 0;
-
- dal_gpio_get_value(hpd, &detected);
-
- if (!(detected ^ power_up)) {
- edp_hpd_high = true;
- break;
- }
-
- msleep(HPD_CHECK_INTERVAL);
-
- time_elapsed += HPD_CHECK_INTERVAL;
- } while (time_elapsed < timeout);
-
- dal_gpio_close(hpd);
-
- dal_gpio_destroy_irq(&hpd);
-
- if (false == edp_hpd_high) {
- DC_LOG_ERROR(
- "%s: wait timed out!\n", __func__);
- }
-}
-
-void dce110_edp_power_control(
- struct dc_link *link,
- bool power_up)
-{
- struct dc_context *ctx = link->ctx;
- struct bp_transmitter_control cntl = { 0 };
- enum bp_result bp_result;
- uint8_t panel_instance;
-
-
- if (dal_graphics_object_id_get_connector_id(link->link_enc->connector)
- != CONNECTOR_ID_EDP) {
- BREAK_TO_DEBUGGER();
- return;
- }
-
- if (!link->panel_cntl)
- return;
- if (power_up !=
- link->panel_cntl->funcs->is_panel_powered_on(link->panel_cntl)) {
-
- unsigned long long current_ts = dm_get_timestamp(ctx);
- unsigned long long time_since_edp_poweroff_ms =
- div64_u64(dm_get_elapse_time_in_ns(
- ctx,
- current_ts,
- link->link_trace.time_stamp.edp_poweroff), 1000000);
- unsigned long long time_since_edp_poweron_ms =
- div64_u64(dm_get_elapse_time_in_ns(
- ctx,
- current_ts,
- link->link_trace.time_stamp.edp_poweron), 1000000);
- DC_LOG_HW_RESUME_S3(
- "%s: transition: power_up=%d current_ts=%llu edp_poweroff=%llu edp_poweron=%llu time_since_edp_poweroff_ms=%llu time_since_edp_poweron_ms=%llu",
- __func__,
- power_up,
- current_ts,
- link->link_trace.time_stamp.edp_poweroff,
- link->link_trace.time_stamp.edp_poweron,
- time_since_edp_poweroff_ms,
- time_since_edp_poweron_ms);
-
- /* Send VBIOS command to prompt eDP panel power */
- if (power_up) {
- /* edp requires a min of 500ms from LCDVDD off to on */
- unsigned long long remaining_min_edp_poweroff_time_ms = 500;
-
- /* add time defined by a patch, if any (usually patch extra_t12_ms is 0) */
- if (link->local_sink != NULL)
- remaining_min_edp_poweroff_time_ms +=
- link->local_sink->edid_caps.panel_patch.extra_t12_ms;
-
- /* Adjust remaining_min_edp_poweroff_time_ms if this is not the first time. */
- if (link->link_trace.time_stamp.edp_poweroff != 0) {
- if (time_since_edp_poweroff_ms < remaining_min_edp_poweroff_time_ms)
- remaining_min_edp_poweroff_time_ms =
- remaining_min_edp_poweroff_time_ms - time_since_edp_poweroff_ms;
- else
- remaining_min_edp_poweroff_time_ms = 0;
- }
-
- if (remaining_min_edp_poweroff_time_ms) {
- DC_LOG_HW_RESUME_S3(
- "%s: remaining_min_edp_poweroff_time_ms=%llu: begin wait.\n",
- __func__, remaining_min_edp_poweroff_time_ms);
- msleep(remaining_min_edp_poweroff_time_ms);
- DC_LOG_HW_RESUME_S3(
- "%s: remaining_min_edp_poweroff_time_ms=%llu: end wait.\n",
- __func__, remaining_min_edp_poweroff_time_ms);
- dm_output_to_console("%s: wait %lld ms to power on eDP.\n",
- __func__, remaining_min_edp_poweroff_time_ms);
- } else {
- DC_LOG_HW_RESUME_S3(
- "%s: remaining_min_edp_poweroff_time_ms=%llu: no wait required.\n",
- __func__, remaining_min_edp_poweroff_time_ms);
- }
- }
-
- DC_LOG_HW_RESUME_S3(
- "%s: BEGIN: Panel Power action: %s\n",
- __func__, (power_up ? "On":"Off"));
-
- cntl.action = power_up ?
- TRANSMITTER_CONTROL_POWER_ON :
- TRANSMITTER_CONTROL_POWER_OFF;
- cntl.transmitter = link->link_enc->transmitter;
- cntl.connector_obj_id = link->link_enc->connector;
- cntl.coherent = false;
- cntl.lanes_number = LANE_COUNT_FOUR;
- cntl.hpd_sel = link->link_enc->hpd_source;
- panel_instance = link->panel_cntl->inst;
-
- if (ctx->dc->ctx->dmub_srv &&
- ctx->dc->debug.dmub_command_table) {
- if (cntl.action == TRANSMITTER_CONTROL_POWER_ON)
- bp_result = ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios,
- LVTMA_CONTROL_POWER_ON,
- panel_instance);
- else
- bp_result = ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios,
- LVTMA_CONTROL_POWER_OFF,
- panel_instance);
- }
-
- bp_result = link_transmitter_control(ctx->dc_bios, &cntl);
-
- DC_LOG_HW_RESUME_S3(
- "%s: END: Panel Power action: %s bp_result=%u\n",
- __func__, (power_up ? "On":"Off"),
- bp_result);
-
- if (!power_up)
- /*save driver power off time stamp*/
- link->link_trace.time_stamp.edp_poweroff = dm_get_timestamp(ctx);
- else
- link->link_trace.time_stamp.edp_poweron = dm_get_timestamp(ctx);
-
- DC_LOG_HW_RESUME_S3(
- "%s: updated values: edp_poweroff=%llu edp_poweron=%llu\n",
- __func__,
- link->link_trace.time_stamp.edp_poweroff,
- link->link_trace.time_stamp.edp_poweron);
-
- if (bp_result != BP_RESULT_OK)
- DC_LOG_ERROR(
- "%s: Panel Power bp_result: %d\n",
- __func__, bp_result);
- } else {
- DC_LOG_HW_RESUME_S3(
- "%s: Skipping Panel Power action: %s\n",
- __func__, (power_up ? "On":"Off"));
- }
-}
-
-void dce110_edp_wait_for_T12(
- struct dc_link *link)
-{
- struct dc_context *ctx = link->ctx;
-
- if (dal_graphics_object_id_get_connector_id(link->link_enc->connector)
- != CONNECTOR_ID_EDP) {
- BREAK_TO_DEBUGGER();
- return;
- }
-
- if (!link->panel_cntl)
- return;
-
- if (!link->panel_cntl->funcs->is_panel_powered_on(link->panel_cntl) &&
- link->link_trace.time_stamp.edp_poweroff != 0) {
- unsigned int t12_duration = 500; // Default T12 as per spec
- unsigned long long current_ts = dm_get_timestamp(ctx);
- unsigned long long time_since_edp_poweroff_ms =
- div64_u64(dm_get_elapse_time_in_ns(
- ctx,
- current_ts,
- link->link_trace.time_stamp.edp_poweroff), 1000000);
-
- t12_duration += link->local_sink->edid_caps.panel_patch.extra_t12_ms; // Add extra T12
-
- if (time_since_edp_poweroff_ms < t12_duration)
- msleep(t12_duration - time_since_edp_poweroff_ms);
- }
-}
-
-/*todo: cloned in stream enc, fix*/
-/*
- * @brief
- * eDP only. Control the backlight of the eDP panel
- */
-void dce110_edp_backlight_control(
- struct dc_link *link,
- bool enable)
-{
- struct dc_context *ctx = link->ctx;
- struct bp_transmitter_control cntl = { 0 };
- uint8_t panel_instance;
-
- if (dal_graphics_object_id_get_connector_id(link->link_enc->connector)
- != CONNECTOR_ID_EDP) {
- BREAK_TO_DEBUGGER();
- return;
- }
-
- if (link->panel_cntl) {
- bool is_backlight_on = link->panel_cntl->funcs->is_panel_backlight_on(link->panel_cntl);
-
- if ((enable && is_backlight_on) || (!enable && !is_backlight_on)) {
- DC_LOG_HW_RESUME_S3(
- "%s: panel already powered up/off. Do nothing.\n",
- __func__);
- return;
- }
- }
-
- /* Send VBIOS command to control eDP panel backlight */
-
- DC_LOG_HW_RESUME_S3(
- "%s: backlight action: %s\n",
- __func__, (enable ? "On":"Off"));
-
- cntl.action = enable ?
- TRANSMITTER_CONTROL_BACKLIGHT_ON :
- TRANSMITTER_CONTROL_BACKLIGHT_OFF;
-
- /*cntl.engine_id = ctx->engine;*/
- cntl.transmitter = link->link_enc->transmitter;
- cntl.connector_obj_id = link->link_enc->connector;
- /*todo: unhardcode*/
- cntl.lanes_number = LANE_COUNT_FOUR;
- cntl.hpd_sel = link->link_enc->hpd_source;
- cntl.signal = SIGNAL_TYPE_EDP;
-
- /* For eDP, the following delays might need to be considered
- * after link training completed:
- * idle period - min. accounts for required BS-Idle pattern,
- * max. allows for source frame synchronization);
- * 50 msec max. delay from valid video data from source
- * to video on dislpay or backlight enable.
- *
- * Disable the delay for now.
- * Enable it in the future if necessary.
- */
- /* dc_service_sleep_in_milliseconds(50); */
- /*edp 1.2*/
- panel_instance = link->panel_cntl->inst;
-
- if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON) {
- if (!link->dc->config.edp_no_power_sequencing)
- /*
- * Sometimes, DP receiver chip power-controlled externally by an
- * Embedded Controller could be treated and used as eDP,
- * if it drives mobile display. In this case,
- * we shouldn't be doing power-sequencing, hence we can skip
- * waiting for T7-ready.
- */
- edp_receiver_ready_T7(link);
- else
- DC_LOG_DC("edp_receiver_ready_T7 skipped\n");
- }
-
- if (ctx->dc->ctx->dmub_srv &&
- ctx->dc->debug.dmub_command_table) {
- if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON)
- ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios,
- LVTMA_CONTROL_LCD_BLON,
- panel_instance);
- else
- ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios,
- LVTMA_CONTROL_LCD_BLOFF,
- panel_instance);
- }
-
- link_transmitter_control(ctx->dc_bios, &cntl);
-
- if (enable && link->dpcd_sink_ext_caps.bits.oled)
- msleep(OLED_POST_T7_DELAY);
-
- if (link->dpcd_sink_ext_caps.bits.oled ||
- link->dpcd_sink_ext_caps.bits.hdr_aux_backlight_control == 1 ||
- link->dpcd_sink_ext_caps.bits.sdr_aux_backlight_control == 1)
- dc_link_backlight_enable_aux(link, enable);
-
- /*edp 1.2*/
- if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_OFF) {
- if (!link->dc->config.edp_no_power_sequencing)
- /*
- * Sometimes, DP receiver chip power-controlled externally by an
- * Embedded Controller could be treated and used as eDP,
- * if it drives mobile display. In this case,
- * we shouldn't be doing power-sequencing, hence we can skip
- * waiting for T9-ready.
- */
- edp_add_delay_for_T9(link);
- else
- DC_LOG_DC("edp_receiver_ready_T9 skipped\n");
- }
-
- if (!enable && link->dpcd_sink_ext_caps.bits.oled)
- msleep(OLED_PRE_T11_DELAY);
-}
-
-void dce110_enable_audio_stream(struct pipe_ctx *pipe_ctx)
-{
- /* notify audio driver for audio modes of monitor */
- struct dc *dc;
- struct clk_mgr *clk_mgr;
- unsigned int i, num_audio = 1;
-
- if (!pipe_ctx->stream)
- return;
-
- dc = pipe_ctx->stream->ctx->dc;
- clk_mgr = dc->clk_mgr;
-
- if (pipe_ctx->stream_res.audio && pipe_ctx->stream_res.audio->enabled == true)
- return;
-
- if (pipe_ctx->stream_res.audio) {
- for (i = 0; i < MAX_PIPES; i++) {
- /*current_state not updated yet*/
- if (dc->current_state->res_ctx.pipe_ctx[i].stream_res.audio != NULL)
- num_audio++;
- }
-
- pipe_ctx->stream_res.audio->funcs->az_enable(pipe_ctx->stream_res.audio);
-
- if (num_audio >= 1 && clk_mgr->funcs->enable_pme_wa)
- /*this is the first audio. apply the PME w/a in order to wake AZ from D3*/
- clk_mgr->funcs->enable_pme_wa(clk_mgr);
- /* un-mute audio */
- /* TODO: audio should be per stream rather than per link */
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (is_dp_128b_132b_signal(pipe_ctx))
- pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->audio_mute_control(
- pipe_ctx->stream_res.hpo_dp_stream_enc, false);
- else
- pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control(
- pipe_ctx->stream_res.stream_enc, false);
-#else
- pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control(
- pipe_ctx->stream_res.stream_enc, false);
-#endif
- if (pipe_ctx->stream_res.audio)
- pipe_ctx->stream_res.audio->enabled = true;
- }
-
- if (dc_is_dp_signal(pipe_ctx->stream->signal))
- dp_source_sequence_trace(pipe_ctx->stream->link, DPCD_SOURCE_SEQ_AFTER_ENABLE_AUDIO_STREAM);
-}
-
-void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx)
-{
- struct dc *dc;
- struct clk_mgr *clk_mgr;
-
- if (!pipe_ctx || !pipe_ctx->stream)
- return;
-
- dc = pipe_ctx->stream->ctx->dc;
- clk_mgr = dc->clk_mgr;
-
- if (pipe_ctx->stream_res.audio && pipe_ctx->stream_res.audio->enabled == false)
- return;
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (is_dp_128b_132b_signal(pipe_ctx))
- pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->audio_mute_control(
- pipe_ctx->stream_res.hpo_dp_stream_enc, true);
- else
- pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control(
- pipe_ctx->stream_res.stream_enc, true);
-#else
- pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control(
- pipe_ctx->stream_res.stream_enc, true);
-#endif
- if (pipe_ctx->stream_res.audio) {
- pipe_ctx->stream_res.audio->enabled = false;
-
- if (dc_is_dp_signal(pipe_ctx->stream->signal))
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (is_dp_128b_132b_signal(pipe_ctx))
- pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->dp_audio_disable(
- pipe_ctx->stream_res.hpo_dp_stream_enc);
- else
- pipe_ctx->stream_res.stream_enc->funcs->dp_audio_disable(
- pipe_ctx->stream_res.stream_enc);
-#else
- pipe_ctx->stream_res.stream_enc->funcs->dp_audio_disable(
- pipe_ctx->stream_res.stream_enc);
-#endif
- else
- pipe_ctx->stream_res.stream_enc->funcs->hdmi_audio_disable(
- pipe_ctx->stream_res.stream_enc);
-
- if (clk_mgr->funcs->enable_pme_wa)
- /*this is the first audio. apply the PME w/a in order to wake AZ from D3*/
- clk_mgr->funcs->enable_pme_wa(clk_mgr);
-
- /* TODO: notify audio driver for if audio modes list changed
- * add audio mode list change flag */
- /* dal_audio_disable_azalia_audio_jack_presence(stream->audio,
- * stream->stream_engine_id);
- */
- }
-
- if (dc_is_dp_signal(pipe_ctx->stream->signal))
- dp_source_sequence_trace(pipe_ctx->stream->link, DPCD_SOURCE_SEQ_AFTER_DISABLE_AUDIO_STREAM);
-}
-
-void dce110_disable_stream(struct pipe_ctx *pipe_ctx)
-{
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct dc_link *link = stream->link;
- struct dc *dc = pipe_ctx->stream->ctx->dc;
- struct link_encoder *link_enc = NULL;
-
- if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal)) {
- pipe_ctx->stream_res.stream_enc->funcs->stop_hdmi_info_packets(
- pipe_ctx->stream_res.stream_enc);
- pipe_ctx->stream_res.stream_enc->funcs->hdmi_reset_stream_attribute(
- pipe_ctx->stream_res.stream_enc);
- }
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (is_dp_128b_132b_signal(pipe_ctx)) {
- pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->stop_dp_info_packets(
- pipe_ctx->stream_res.hpo_dp_stream_enc);
- } else if (dc_is_dp_signal(pipe_ctx->stream->signal))
-#else
- if (dc_is_dp_signal(pipe_ctx->stream->signal))
-#endif
- pipe_ctx->stream_res.stream_enc->funcs->stop_dp_info_packets(
- pipe_ctx->stream_res.stream_enc);
-
- dc->hwss.disable_audio_stream(pipe_ctx);
-
- /* Link encoder may have been dynamically assigned to non-physical display endpoint. */
- if (link->ep_type == DISPLAY_ENDPOINT_PHY)
- link_enc = link->link_enc;
- else if (dc->res_pool->funcs->link_encs_assign)
- link_enc = link_enc_cfg_get_link_enc_used_by_link(link->ctx->dc, link);
- ASSERT(link_enc);
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (is_dp_128b_132b_signal(pipe_ctx)) {
- pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->disable(
- pipe_ctx->stream_res.hpo_dp_stream_enc);
- setup_dp_hpo_stream(pipe_ctx, false);
- /* TODO - DP2.0 HW: unmap stream from link encoder here */
- } else {
- if (link_enc)
- link_enc->funcs->connect_dig_be_to_fe(
- link_enc,
- pipe_ctx->stream_res.stream_enc->id,
- false);
- }
-#else
- if (link_enc)
- link_enc->funcs->connect_dig_be_to_fe(
- link->link_enc,
- pipe_ctx->stream_res.stream_enc->id,
- false);
-#endif
- if (dc_is_dp_signal(pipe_ctx->stream->signal))
- dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_DISCONNECT_DIG_FE_BE);
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (dc->hwseq->funcs.setup_hpo_hw_control && is_dp_128b_132b_signal(pipe_ctx))
- dc->hwseq->funcs.setup_hpo_hw_control(dc->hwseq, false);
-#endif
-
-}
-
-void dce110_unblank_stream(struct pipe_ctx *pipe_ctx,
- struct dc_link_settings *link_settings)
-{
- struct encoder_unblank_param params = { { 0 } };
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct dc_link *link = stream->link;
- struct dce_hwseq *hws = link->dc->hwseq;
-
- /* only 3 items below are used by unblank */
- params.timing = pipe_ctx->stream->timing;
- params.link_settings.link_rate = link_settings->link_rate;
-
- if (dc_is_dp_signal(pipe_ctx->stream->signal))
- pipe_ctx->stream_res.stream_enc->funcs->dp_unblank(link, pipe_ctx->stream_res.stream_enc, &params);
-
- if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) {
- hws->funcs.edp_backlight_control(link, true);
- }
-}
-
-void dce110_blank_stream(struct pipe_ctx *pipe_ctx)
-{
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct dc_link *link = stream->link;
- struct dce_hwseq *hws = link->dc->hwseq;
-
- if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) {
- hws->funcs.edp_backlight_control(link, false);
- link->dc->hwss.set_abm_immediate_disable(pipe_ctx);
- }
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (is_dp_128b_132b_signal(pipe_ctx)) {
- /* TODO - DP2.0 HW: Set ODM mode in dp hpo encoder here */
- pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->dp_blank(
- pipe_ctx->stream_res.hpo_dp_stream_enc);
- } else if (dc_is_dp_signal(pipe_ctx->stream->signal)) {
-#else
- if (dc_is_dp_signal(pipe_ctx->stream->signal)) {
-#endif
- pipe_ctx->stream_res.stream_enc->funcs->dp_blank(link, pipe_ctx->stream_res.stream_enc);
-
- if (!dc_is_embedded_signal(pipe_ctx->stream->signal)) {
- /*
- * After output is idle pattern some sinks need time to recognize the stream
- * has changed or they enter protection state and hang.
- */
- msleep(60);
- } else if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP)
- edp_receiver_ready_T9(link);
- }
-
-}
-
-
-void dce110_set_avmute(struct pipe_ctx *pipe_ctx, bool enable)
-{
- if (pipe_ctx != NULL && pipe_ctx->stream_res.stream_enc != NULL)
- pipe_ctx->stream_res.stream_enc->funcs->set_avmute(pipe_ctx->stream_res.stream_enc, enable);
-}
-
-static enum audio_dto_source translate_to_dto_source(enum controller_id crtc_id)
-{
- switch (crtc_id) {
- case CONTROLLER_ID_D0:
- return DTO_SOURCE_ID0;
- case CONTROLLER_ID_D1:
- return DTO_SOURCE_ID1;
- case CONTROLLER_ID_D2:
- return DTO_SOURCE_ID2;
- case CONTROLLER_ID_D3:
- return DTO_SOURCE_ID3;
- case CONTROLLER_ID_D4:
- return DTO_SOURCE_ID4;
- case CONTROLLER_ID_D5:
- return DTO_SOURCE_ID5;
- default:
- return DTO_SOURCE_UNKNOWN;
- }
-}
-
-static void build_audio_output(
- struct dc_state *state,
- const struct pipe_ctx *pipe_ctx,
- struct audio_output *audio_output)
-{
- const struct dc_stream_state *stream = pipe_ctx->stream;
- audio_output->engine_id = pipe_ctx->stream_res.stream_enc->id;
-
- audio_output->signal = pipe_ctx->stream->signal;
-
- /* audio_crtc_info */
-
- audio_output->crtc_info.h_total =
- stream->timing.h_total;
-
- /*
- * Audio packets are sent during actual CRTC blank physical signal, we
- * need to specify actual active signal portion
- */
- audio_output->crtc_info.h_active =
- stream->timing.h_addressable
- + stream->timing.h_border_left
- + stream->timing.h_border_right;
-
- audio_output->crtc_info.v_active =
- stream->timing.v_addressable
- + stream->timing.v_border_top
- + stream->timing.v_border_bottom;
-
- audio_output->crtc_info.pixel_repetition = 1;
-
- audio_output->crtc_info.interlaced =
- stream->timing.flags.INTERLACE;
-
- audio_output->crtc_info.refresh_rate =
- (stream->timing.pix_clk_100hz*100)/
- (stream->timing.h_total*stream->timing.v_total);
-
- audio_output->crtc_info.color_depth =
- stream->timing.display_color_depth;
-
- audio_output->crtc_info.requested_pixel_clock_100Hz =
- pipe_ctx->stream_res.pix_clk_params.requested_pix_clk_100hz;
-
- audio_output->crtc_info.calculated_pixel_clock_100Hz =
- pipe_ctx->stream_res.pix_clk_params.requested_pix_clk_100hz;
-
-/*for HDMI, audio ACR is with deep color ratio factor*/
- if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal) &&
- audio_output->crtc_info.requested_pixel_clock_100Hz ==
- (stream->timing.pix_clk_100hz)) {
- if (pipe_ctx->stream_res.pix_clk_params.pixel_encoding == PIXEL_ENCODING_YCBCR420) {
- audio_output->crtc_info.requested_pixel_clock_100Hz =
- audio_output->crtc_info.requested_pixel_clock_100Hz/2;
- audio_output->crtc_info.calculated_pixel_clock_100Hz =
- pipe_ctx->stream_res.pix_clk_params.requested_pix_clk_100hz/2;
-
- }
- }
-
- if (state->clk_mgr &&
- (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT ||
- pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)) {
- audio_output->pll_info.dp_dto_source_clock_in_khz =
- state->clk_mgr->funcs->get_dp_ref_clk_frequency(
- state->clk_mgr);
- }
-
- audio_output->pll_info.feed_back_divider =
- pipe_ctx->pll_settings.feedback_divider;
-
- audio_output->pll_info.dto_source =
- translate_to_dto_source(
- pipe_ctx->stream_res.tg->inst + 1);
-
- /* TODO hard code to enable for now. Need get from stream */
- audio_output->pll_info.ss_enabled = true;
-
- audio_output->pll_info.ss_percentage =
- pipe_ctx->pll_settings.ss_percentage;
-}
-
-static void program_scaler(const struct dc *dc,
- const struct pipe_ctx *pipe_ctx)
-{
- struct tg_color color = {0};
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- /* TOFPGA */
- if (pipe_ctx->plane_res.xfm->funcs->transform_set_pixel_storage_depth == NULL)
- return;
-#endif
-
- if (dc->debug.visual_confirm == VISUAL_CONFIRM_SURFACE)
- get_surface_visual_confirm_color(pipe_ctx, &color);
- else
- color_space_to_black_color(dc,
- pipe_ctx->stream->output_color_space,
- &color);
-
- pipe_ctx->plane_res.xfm->funcs->transform_set_pixel_storage_depth(
- pipe_ctx->plane_res.xfm,
- pipe_ctx->plane_res.scl_data.lb_params.depth,
- &pipe_ctx->stream->bit_depth_params);
-
- if (pipe_ctx->stream_res.tg->funcs->set_overscan_blank_color) {
- /*
- * The way 420 is packed, 2 channels carry Y component, 1 channel
- * alternate between Cb and Cr, so both channels need the pixel
- * value for Y
- */
- if (pipe_ctx->stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420)
- color.color_r_cr = color.color_g_y;
-
- pipe_ctx->stream_res.tg->funcs->set_overscan_blank_color(
- pipe_ctx->stream_res.tg,
- &color);
- }
-
- pipe_ctx->plane_res.xfm->funcs->transform_set_scaler(pipe_ctx->plane_res.xfm,
- &pipe_ctx->plane_res.scl_data);
-}
-
-static enum dc_status dce110_enable_stream_timing(
- struct pipe_ctx *pipe_ctx,
- struct dc_state *context,
- struct dc *dc)
-{
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct pipe_ctx *pipe_ctx_old = &dc->current_state->res_ctx.
- pipe_ctx[pipe_ctx->pipe_idx];
- struct tg_color black_color = {0};
-
- if (!pipe_ctx_old->stream) {
-
- /* program blank color */
- color_space_to_black_color(dc,
- stream->output_color_space, &black_color);
- pipe_ctx->stream_res.tg->funcs->set_blank_color(
- pipe_ctx->stream_res.tg,
- &black_color);
-
- /*
- * Must blank CRTC after disabling power gating and before any
- * programming, otherwise CRTC will be hung in bad state
- */
- pipe_ctx->stream_res.tg->funcs->set_blank(pipe_ctx->stream_res.tg, true);
-
- if (false == pipe_ctx->clock_source->funcs->program_pix_clk(
- pipe_ctx->clock_source,
- &pipe_ctx->stream_res.pix_clk_params,
- &pipe_ctx->pll_settings)) {
- BREAK_TO_DEBUGGER();
- return DC_ERROR_UNEXPECTED;
- }
-
- pipe_ctx->stream_res.tg->funcs->program_timing(
- pipe_ctx->stream_res.tg,
- &stream->timing,
- 0,
- 0,
- 0,
- 0,
- pipe_ctx->stream->signal,
- true);
- }
-
- if (!pipe_ctx_old->stream) {
- if (false == pipe_ctx->stream_res.tg->funcs->enable_crtc(
- pipe_ctx->stream_res.tg)) {
- BREAK_TO_DEBUGGER();
- return DC_ERROR_UNEXPECTED;
- }
- }
-
- return DC_OK;
-}
-
-static enum dc_status apply_single_controller_ctx_to_hw(
- struct pipe_ctx *pipe_ctx,
- struct dc_state *context,
- struct dc *dc)
-{
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct dc_link *link = stream->link;
- struct drr_params params = {0};
- unsigned int event_triggers = 0;
- struct pipe_ctx *odm_pipe = pipe_ctx->next_odm_pipe;
- struct dce_hwseq *hws = dc->hwseq;
-
- if (hws->funcs.disable_stream_gating) {
- hws->funcs.disable_stream_gating(dc, pipe_ctx);
- }
-
- if (pipe_ctx->stream_res.audio != NULL) {
- struct audio_output audio_output;
-
- build_audio_output(context, pipe_ctx, &audio_output);
-
- if (dc_is_dp_signal(pipe_ctx->stream->signal))
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (is_dp_128b_132b_signal(pipe_ctx))
- pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->dp_audio_setup(
- pipe_ctx->stream_res.hpo_dp_stream_enc,
- pipe_ctx->stream_res.audio->inst,
- &pipe_ctx->stream->audio_info);
- else
- pipe_ctx->stream_res.stream_enc->funcs->dp_audio_setup(
- pipe_ctx->stream_res.stream_enc,
- pipe_ctx->stream_res.audio->inst,
- &pipe_ctx->stream->audio_info);
-#else
- pipe_ctx->stream_res.stream_enc->funcs->dp_audio_setup(
- pipe_ctx->stream_res.stream_enc,
- pipe_ctx->stream_res.audio->inst,
- &pipe_ctx->stream->audio_info);
-#endif
- else
- pipe_ctx->stream_res.stream_enc->funcs->hdmi_audio_setup(
- pipe_ctx->stream_res.stream_enc,
- pipe_ctx->stream_res.audio->inst,
- &pipe_ctx->stream->audio_info,
- &audio_output.crtc_info);
-
- pipe_ctx->stream_res.audio->funcs->az_configure(
- pipe_ctx->stream_res.audio,
- pipe_ctx->stream->signal,
- &audio_output.crtc_info,
- &pipe_ctx->stream->audio_info);
- }
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- /* DCN3.1 FPGA Workaround
- * Need to enable HPO DP Stream Encoder before setting OTG master enable.
- * To do so, move calling function enable_stream_timing to only be done AFTER calling
- * function core_link_enable_stream
- */
- if (!(hws->wa.dp_hpo_and_otg_sequence && is_dp_128b_132b_signal(pipe_ctx)))
-#endif
- /* */
- /* Do not touch stream timing on seamless boot optimization. */
- if (!pipe_ctx->stream->apply_seamless_boot_optimization)
- hws->funcs.enable_stream_timing(pipe_ctx, context, dc);
-
- if (hws->funcs.setup_vupdate_interrupt)
- hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx);
-
- params.vertical_total_min = stream->adjust.v_total_min;
- params.vertical_total_max = stream->adjust.v_total_max;
- if (pipe_ctx->stream_res.tg->funcs->set_drr)
- pipe_ctx->stream_res.tg->funcs->set_drr(
- pipe_ctx->stream_res.tg, &params);
-
- // DRR should set trigger event to monitor surface update event
- if (stream->adjust.v_total_min != 0 && stream->adjust.v_total_max != 0)
- event_triggers = 0x80;
- /* Event triggers and num frames initialized for DRR, but can be
- * later updated for PSR use. Note DRR trigger events are generated
- * regardless of whether num frames met.
- */
- if (pipe_ctx->stream_res.tg->funcs->set_static_screen_control)
- pipe_ctx->stream_res.tg->funcs->set_static_screen_control(
- pipe_ctx->stream_res.tg, event_triggers, 2);
-
- if (!dc_is_virtual_signal(pipe_ctx->stream->signal))
- pipe_ctx->stream_res.stream_enc->funcs->dig_connect_to_otg(
- pipe_ctx->stream_res.stream_enc,
- pipe_ctx->stream_res.tg->inst);
-
- if (dc_is_dp_signal(pipe_ctx->stream->signal))
- dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_CONNECT_DIG_FE_OTG);
-
- pipe_ctx->stream_res.opp->funcs->opp_set_dyn_expansion(
- pipe_ctx->stream_res.opp,
- COLOR_SPACE_YCBCR601,
- stream->timing.display_color_depth,
- stream->signal);
-
- pipe_ctx->stream_res.opp->funcs->opp_program_fmt(
- pipe_ctx->stream_res.opp,
- &stream->bit_depth_params,
- &stream->clamping);
- while (odm_pipe) {
- odm_pipe->stream_res.opp->funcs->opp_set_dyn_expansion(
- odm_pipe->stream_res.opp,
- COLOR_SPACE_YCBCR601,
- stream->timing.display_color_depth,
- stream->signal);
-
- odm_pipe->stream_res.opp->funcs->opp_program_fmt(
- odm_pipe->stream_res.opp,
- &stream->bit_depth_params,
- &stream->clamping);
- odm_pipe = odm_pipe->next_odm_pipe;
- }
-
- if (!stream->dpms_off)
- core_link_enable_stream(context, pipe_ctx);
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- /* DCN3.1 FPGA Workaround
- * Need to enable HPO DP Stream Encoder before setting OTG master enable.
- * To do so, move calling function enable_stream_timing to only be done AFTER calling
- * function core_link_enable_stream
- */
- if (hws->wa.dp_hpo_and_otg_sequence && is_dp_128b_132b_signal(pipe_ctx)) {
- if (!pipe_ctx->stream->apply_seamless_boot_optimization)
- hws->funcs.enable_stream_timing(pipe_ctx, context, dc);
- }
-#endif
-
- pipe_ctx->plane_res.scl_data.lb_params.alpha_en = pipe_ctx->bottom_pipe != 0;
-
- pipe_ctx->stream->link->psr_settings.psr_feature_enabled = false;
-
- return DC_OK;
-}
-
-/******************************************************************************/
-
-static void power_down_encoders(struct dc *dc)
-{
- int i, j;
-
- for (i = 0; i < dc->link_count; i++) {
- enum signal_type signal = dc->links[i]->connector_signal;
-
- if ((signal == SIGNAL_TYPE_EDP) ||
- (signal == SIGNAL_TYPE_DISPLAY_PORT)) {
- if (dc->links[i]->link_enc->funcs->get_dig_frontend &&
- dc->links[i]->link_enc->funcs->is_dig_enabled(dc->links[i]->link_enc)) {
- unsigned int fe = dc->links[i]->link_enc->funcs->get_dig_frontend(
- dc->links[i]->link_enc);
-
- for (j = 0; j < dc->res_pool->stream_enc_count; j++) {
- if (fe == dc->res_pool->stream_enc[j]->id) {
- dc->res_pool->stream_enc[j]->funcs->dp_blank(dc->links[i],
- dc->res_pool->stream_enc[j]);
- break;
- }
- }
- }
-
- if (!dc->links[i]->wa_flags.dp_keep_receiver_powered)
- dp_receiver_power_ctrl(dc->links[i], false);
- }
-
- if (signal != SIGNAL_TYPE_EDP)
- signal = SIGNAL_TYPE_NONE;
-
- if (dc->links[i]->ep_type == DISPLAY_ENDPOINT_PHY)
- dc->links[i]->link_enc->funcs->disable_output(
- dc->links[i]->link_enc, signal);
-
- dc->links[i]->link_status.link_active = false;
- memset(&dc->links[i]->cur_link_settings, 0,
- sizeof(dc->links[i]->cur_link_settings));
- }
-}
-
-static void power_down_controllers(struct dc *dc)
-{
- int i;
-
- for (i = 0; i < dc->res_pool->timing_generator_count; i++) {
- dc->res_pool->timing_generators[i]->funcs->disable_crtc(
- dc->res_pool->timing_generators[i]);
- }
-}
-
-static void power_down_clock_sources(struct dc *dc)
-{
- int i;
-
- if (dc->res_pool->dp_clock_source->funcs->cs_power_down(
- dc->res_pool->dp_clock_source) == false)
- dm_error("Failed to power down pll! (dp clk src)\n");
-
- for (i = 0; i < dc->res_pool->clk_src_count; i++) {
- if (dc->res_pool->clock_sources[i]->funcs->cs_power_down(
- dc->res_pool->clock_sources[i]) == false)
- dm_error("Failed to power down pll! (clk src index=%d)\n", i);
- }
-}
-
-static void power_down_all_hw_blocks(struct dc *dc)
-{
- power_down_encoders(dc);
-
- power_down_controllers(dc);
-
- power_down_clock_sources(dc);
-
- if (dc->fbc_compressor)
- dc->fbc_compressor->funcs->disable_fbc(dc->fbc_compressor);
-}
-
-static void disable_vga_and_power_gate_all_controllers(
- struct dc *dc)
-{
- int i;
- struct timing_generator *tg;
- struct dc_context *ctx = dc->ctx;
-
- for (i = 0; i < dc->res_pool->timing_generator_count; i++) {
- tg = dc->res_pool->timing_generators[i];
-
- if (tg->funcs->disable_vga)
- tg->funcs->disable_vga(tg);
- }
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- /* Enable CLOCK gating for each pipe BEFORE controller
- * powergating. */
- enable_display_pipe_clock_gating(ctx,
- true);
-
- dc->current_state->res_ctx.pipe_ctx[i].pipe_idx = i;
- dc->hwss.disable_plane(dc,
- &dc->current_state->res_ctx.pipe_ctx[i]);
- }
-}
-
-
-static void get_edp_streams(struct dc_state *context,
- struct dc_stream_state **edp_streams,
- int *edp_stream_num)
-{
- int i;
-
- *edp_stream_num = 0;
- for (i = 0; i < context->stream_count; i++) {
- if (context->streams[i]->signal == SIGNAL_TYPE_EDP) {
- edp_streams[*edp_stream_num] = context->streams[i];
- if (++(*edp_stream_num) == MAX_NUM_EDP)
- return;
- }
- }
-}
-
-static void get_edp_links_with_sink(
- struct dc *dc,
- struct dc_link **edp_links_with_sink,
- int *edp_with_sink_num)
-{
- int i;
-
- /* check if there is an eDP panel not in use */
- *edp_with_sink_num = 0;
- for (i = 0; i < dc->link_count; i++) {
- if (dc->links[i]->local_sink &&
- dc->links[i]->local_sink->sink_signal == SIGNAL_TYPE_EDP) {
- edp_links_with_sink[*edp_with_sink_num] = dc->links[i];
- if (++(*edp_with_sink_num) == MAX_NUM_EDP)
- return;
- }
- }
-}
-
-/*
- * When ASIC goes from VBIOS/VGA mode to driver/accelerated mode we need:
- * 1. Power down all DC HW blocks
- * 2. Disable VGA engine on all controllers
- * 3. Enable power gating for controller
- * 4. Set acc_mode_change bit (VBIOS will clear this bit when going to FSDOS)
- */
-void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context)
-{
- struct dc_link *edp_links_with_sink[MAX_NUM_EDP];
- struct dc_link *edp_links[MAX_NUM_EDP];
- struct dc_stream_state *edp_streams[MAX_NUM_EDP];
- struct dc_link *edp_link_with_sink = NULL;
- struct dc_link *edp_link = NULL;
- struct dc_stream_state *edp_stream = NULL;
- struct dce_hwseq *hws = dc->hwseq;
- int edp_with_sink_num;
- int edp_num;
- int edp_stream_num;
- int i;
- bool can_apply_edp_fast_boot = false;
- bool can_apply_seamless_boot = false;
- bool keep_edp_vdd_on = false;
- DC_LOGGER_INIT();
-
-
- get_edp_links_with_sink(dc, edp_links_with_sink, &edp_with_sink_num);
- get_edp_links(dc, edp_links, &edp_num);
-
- if (hws->funcs.init_pipes)
- hws->funcs.init_pipes(dc, context);
-
- get_edp_streams(context, edp_streams, &edp_stream_num);
-
- // Check fastboot support, disable on DCE8 because of blank screens
- if (edp_num && dc->ctx->dce_version != DCE_VERSION_8_0 &&
- dc->ctx->dce_version != DCE_VERSION_8_1 &&
- dc->ctx->dce_version != DCE_VERSION_8_3) {
- for (i = 0; i < edp_num; i++) {
- edp_link = edp_links[i];
- // enable fastboot if backend is enabled on eDP
- if (edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc)) {
- /* Set optimization flag on eDP stream*/
- if (edp_stream_num && edp_link->link_status.link_active) {
- edp_stream = edp_streams[0];
- can_apply_edp_fast_boot = !is_edp_ilr_optimization_required(edp_stream->link, &edp_stream->timing);
- edp_stream->apply_edp_fast_boot_optimization = can_apply_edp_fast_boot;
- if (can_apply_edp_fast_boot)
- DC_LOG_EVENT_LINK_TRAINING("eDP fast boot disabled to optimize link rate\n");
-
- break;
- }
- }
- }
- // We are trying to enable eDP, don't power down VDD
- if (edp_stream_num)
- keep_edp_vdd_on = true;
- }
-
- // Check seamless boot support
- for (i = 0; i < context->stream_count; i++) {
- if (context->streams[i]->apply_seamless_boot_optimization) {
- can_apply_seamless_boot = true;
- break;
- }
- }
-
- /* eDP should not have stream in resume from S4 and so even with VBios post
- * it should get turned off
- */
- if (edp_with_sink_num)
- edp_link_with_sink = edp_links_with_sink[0];
-
- if (!can_apply_edp_fast_boot && !can_apply_seamless_boot) {
- if (edp_link_with_sink && !keep_edp_vdd_on) {
- /*turn off backlight before DP_blank and encoder powered down*/
- hws->funcs.edp_backlight_control(edp_link_with_sink, false);
- }
- /*resume from S3, no vbios posting, no need to power down again*/
- power_down_all_hw_blocks(dc);
- disable_vga_and_power_gate_all_controllers(dc);
- if (edp_link_with_sink && !keep_edp_vdd_on)
- dc->hwss.edp_power_control(edp_link_with_sink, false);
- }
- bios_set_scratch_acc_mode_change(dc->ctx->dc_bios, 1);
-}
-
-static uint32_t compute_pstate_blackout_duration(
- struct bw_fixed blackout_duration,
- const struct dc_stream_state *stream)
-{
- uint32_t total_dest_line_time_ns;
- uint32_t pstate_blackout_duration_ns;
-
- pstate_blackout_duration_ns = 1000 * blackout_duration.value >> 24;
-
- total_dest_line_time_ns = 1000000UL *
- (stream->timing.h_total * 10) /
- stream->timing.pix_clk_100hz +
- pstate_blackout_duration_ns;
-
- return total_dest_line_time_ns;
-}
-
-static void dce110_set_displaymarks(
- const struct dc *dc,
- struct dc_state *context)
-{
- uint8_t i, num_pipes;
- unsigned int underlay_idx = dc->res_pool->underlay_pipe_index;
-
- for (i = 0, num_pipes = 0; i < MAX_PIPES; i++) {
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
- uint32_t total_dest_line_time_ns;
-
- if (pipe_ctx->stream == NULL)
- continue;
-
- total_dest_line_time_ns = compute_pstate_blackout_duration(
- dc->bw_vbios->blackout_duration, pipe_ctx->stream);
- pipe_ctx->plane_res.mi->funcs->mem_input_program_display_marks(
- pipe_ctx->plane_res.mi,
- context->bw_ctx.bw.dce.nbp_state_change_wm_ns[num_pipes],
- context->bw_ctx.bw.dce.stutter_exit_wm_ns[num_pipes],
- context->bw_ctx.bw.dce.stutter_entry_wm_ns[num_pipes],
- context->bw_ctx.bw.dce.urgent_wm_ns[num_pipes],
- total_dest_line_time_ns);
- if (i == underlay_idx) {
- num_pipes++;
- pipe_ctx->plane_res.mi->funcs->mem_input_program_chroma_display_marks(
- pipe_ctx->plane_res.mi,
- context->bw_ctx.bw.dce.nbp_state_change_wm_ns[num_pipes],
- context->bw_ctx.bw.dce.stutter_exit_wm_ns[num_pipes],
- context->bw_ctx.bw.dce.urgent_wm_ns[num_pipes],
- total_dest_line_time_ns);
- }
- num_pipes++;
- }
-}
-
-void dce110_set_safe_displaymarks(
- struct resource_context *res_ctx,
- const struct resource_pool *pool)
-{
- int i;
- int underlay_idx = pool->underlay_pipe_index;
- struct dce_watermarks max_marks = {
- MAX_WATERMARK, MAX_WATERMARK, MAX_WATERMARK, MAX_WATERMARK };
- struct dce_watermarks nbp_marks = {
- SAFE_NBP_MARK, SAFE_NBP_MARK, SAFE_NBP_MARK, SAFE_NBP_MARK };
- struct dce_watermarks min_marks = { 0, 0, 0, 0};
-
- for (i = 0; i < MAX_PIPES; i++) {
- if (res_ctx->pipe_ctx[i].stream == NULL || res_ctx->pipe_ctx[i].plane_res.mi == NULL)
- continue;
-
- res_ctx->pipe_ctx[i].plane_res.mi->funcs->mem_input_program_display_marks(
- res_ctx->pipe_ctx[i].plane_res.mi,
- nbp_marks,
- max_marks,
- min_marks,
- max_marks,
- MAX_WATERMARK);
-
- if (i == underlay_idx)
- res_ctx->pipe_ctx[i].plane_res.mi->funcs->mem_input_program_chroma_display_marks(
- res_ctx->pipe_ctx[i].plane_res.mi,
- nbp_marks,
- max_marks,
- max_marks,
- MAX_WATERMARK);
-
- }
-}
-
-/*******************************************************************************
- * Public functions
- ******************************************************************************/
-
-static void set_drr(struct pipe_ctx **pipe_ctx,
- int num_pipes, struct dc_crtc_timing_adjust adjust)
-{
- int i = 0;
- struct drr_params params = {0};
- // DRR should set trigger event to monitor surface update event
- unsigned int event_triggers = 0x80;
- // Note DRR trigger events are generated regardless of whether num frames met.
- unsigned int num_frames = 2;
-
- params.vertical_total_max = adjust.v_total_max;
- params.vertical_total_min = adjust.v_total_min;
-
- /* TODO: If multiple pipes are to be supported, you need
- * some GSL stuff. Static screen triggers may be programmed differently
- * as well.
- */
- for (i = 0; i < num_pipes; i++) {
- pipe_ctx[i]->stream_res.tg->funcs->set_drr(
- pipe_ctx[i]->stream_res.tg, &params);
-
- if (adjust.v_total_max != 0 && adjust.v_total_min != 0)
- pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control(
- pipe_ctx[i]->stream_res.tg,
- event_triggers, num_frames);
- }
-}
-
-static void get_position(struct pipe_ctx **pipe_ctx,
- int num_pipes,
- struct crtc_position *position)
-{
- int i = 0;
-
- /* TODO: handle pipes > 1
- */
- for (i = 0; i < num_pipes; i++)
- pipe_ctx[i]->stream_res.tg->funcs->get_position(pipe_ctx[i]->stream_res.tg, position);
-}
-
-static void set_static_screen_control(struct pipe_ctx **pipe_ctx,
- int num_pipes, const struct dc_static_screen_params *params)
-{
- unsigned int i;
- unsigned int triggers = 0;
-
- if (params->triggers.overlay_update)
- triggers |= 0x100;
- if (params->triggers.surface_update)
- triggers |= 0x80;
- if (params->triggers.cursor_update)
- triggers |= 0x2;
- if (params->triggers.force_trigger)
- triggers |= 0x1;
-
- if (num_pipes) {
- struct dc *dc = pipe_ctx[0]->stream->ctx->dc;
-
- if (dc->fbc_compressor)
- triggers |= 0x84;
- }
-
- for (i = 0; i < num_pipes; i++)
- pipe_ctx[i]->stream_res.tg->funcs->
- set_static_screen_control(pipe_ctx[i]->stream_res.tg,
- triggers, params->num_frames);
-}
-
-/*
- * Check if FBC can be enabled
- */
-static bool should_enable_fbc(struct dc *dc,
- struct dc_state *context,
- uint32_t *pipe_idx)
-{
- uint32_t i;
- struct pipe_ctx *pipe_ctx = NULL;
- struct resource_context *res_ctx = &context->res_ctx;
- unsigned int underlay_idx = dc->res_pool->underlay_pipe_index;
-
-
- ASSERT(dc->fbc_compressor);
-
- /* FBC memory should be allocated */
- if (!dc->ctx->fbc_gpu_addr)
- return false;
-
- /* Only supports single display */
- if (context->stream_count != 1)
- return false;
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- if (res_ctx->pipe_ctx[i].stream) {
-
- pipe_ctx = &res_ctx->pipe_ctx[i];
-
- if (!pipe_ctx)
- continue;
-
- /* fbc not applicable on underlay pipe */
- if (pipe_ctx->pipe_idx != underlay_idx) {
- *pipe_idx = i;
- break;
- }
- }
- }
-
- if (i == dc->res_pool->pipe_count)
- return false;
-
- if (!pipe_ctx->stream->link)
- return false;
-
- /* Only supports eDP */
- if (pipe_ctx->stream->link->connector_signal != SIGNAL_TYPE_EDP)
- return false;
-
- /* PSR should not be enabled */
- if (pipe_ctx->stream->link->psr_settings.psr_feature_enabled)
- return false;
-
- /* Nothing to compress */
- if (!pipe_ctx->plane_state)
- return false;
-
- /* Only for non-linear tiling */
- if (pipe_ctx->plane_state->tiling_info.gfx8.array_mode == DC_ARRAY_LINEAR_GENERAL)
- return false;
-
- return true;
-}
-
-/*
- * Enable FBC
- */
-static void enable_fbc(
- struct dc *dc,
- struct dc_state *context)
-{
- uint32_t pipe_idx = 0;
-
- if (should_enable_fbc(dc, context, &pipe_idx)) {
- /* Program GRPH COMPRESSED ADDRESS and PITCH */
- struct compr_addr_and_pitch_params params = {0, 0, 0};
- struct compressor *compr = dc->fbc_compressor;
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[pipe_idx];
-
- params.source_view_width = pipe_ctx->stream->timing.h_addressable;
- params.source_view_height = pipe_ctx->stream->timing.v_addressable;
- params.inst = pipe_ctx->stream_res.tg->inst;
- compr->compr_surface_address.quad_part = dc->ctx->fbc_gpu_addr;
-
- compr->funcs->surface_address_and_pitch(compr, &params);
- compr->funcs->set_fbc_invalidation_triggers(compr, 1);
-
- compr->funcs->enable_fbc(compr, &params);
- }
-}
-
-static void dce110_reset_hw_ctx_wrap(
- struct dc *dc,
- struct dc_state *context)
-{
- int i;
-
- /* Reset old context */
- /* look up the targets that have been removed since last commit */
- for (i = 0; i < MAX_PIPES; i++) {
- struct pipe_ctx *pipe_ctx_old =
- &dc->current_state->res_ctx.pipe_ctx[i];
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-
- /* Note: We need to disable output if clock sources change,
- * since bios does optimization and doesn't apply if changing
- * PHY when not already disabled.
- */
-
- /* Skip underlay pipe since it will be handled in commit surface*/
- if (!pipe_ctx_old->stream || pipe_ctx_old->top_pipe)
- continue;
-
- if (!pipe_ctx->stream ||
- pipe_need_reprogram(pipe_ctx_old, pipe_ctx)) {
- struct clock_source *old_clk = pipe_ctx_old->clock_source;
-
- /* Disable if new stream is null. O/w, if stream is
- * disabled already, no need to disable again.
- */
- if (!pipe_ctx->stream || !pipe_ctx->stream->dpms_off) {
- core_link_disable_stream(pipe_ctx_old);
-
- /* free acquired resources*/
- if (pipe_ctx_old->stream_res.audio) {
- /*disable az_endpoint*/
- pipe_ctx_old->stream_res.audio->funcs->
- az_disable(pipe_ctx_old->stream_res.audio);
-
- /*free audio*/
- if (dc->caps.dynamic_audio == true) {
- /*we have to dynamic arbitrate the audio endpoints*/
- /*we free the resource, need reset is_audio_acquired*/
- update_audio_usage(&dc->current_state->res_ctx, dc->res_pool,
- pipe_ctx_old->stream_res.audio, false);
- pipe_ctx_old->stream_res.audio = NULL;
- }
- }
- }
-
- pipe_ctx_old->stream_res.tg->funcs->set_blank(pipe_ctx_old->stream_res.tg, true);
- if (!hwss_wait_for_blank_complete(pipe_ctx_old->stream_res.tg)) {
- dm_error("DC: failed to blank crtc!\n");
- BREAK_TO_DEBUGGER();
- }
- pipe_ctx_old->stream_res.tg->funcs->disable_crtc(pipe_ctx_old->stream_res.tg);
- pipe_ctx_old->plane_res.mi->funcs->free_mem_input(
- pipe_ctx_old->plane_res.mi, dc->current_state->stream_count);
-
- if (old_clk && 0 == resource_get_clock_source_reference(&context->res_ctx,
- dc->res_pool,
- old_clk))
- old_clk->funcs->cs_power_down(old_clk);
-
- dc->hwss.disable_plane(dc, pipe_ctx_old);
-
- pipe_ctx_old->stream = NULL;
- }
- }
-}
-
-static void dce110_setup_audio_dto(
- struct dc *dc,
- struct dc_state *context)
-{
- int i;
-
- /* program audio wall clock. use HDMI as clock source if HDMI
- * audio active. Otherwise, use DP as clock source
- * first, loop to find any HDMI audio, if not, loop find DP audio
- */
- /* Setup audio rate clock source */
- /* Issue:
- * Audio lag happened on DP monitor when unplug a HDMI monitor
- *
- * Cause:
- * In case of DP and HDMI connected or HDMI only, DCCG_AUDIO_DTO_SEL
- * is set to either dto0 or dto1, audio should work fine.
- * In case of DP connected only, DCCG_AUDIO_DTO_SEL should be dto1,
- * set to dto0 will cause audio lag.
- *
- * Solution:
- * Not optimized audio wall dto setup. When mode set, iterate pipe_ctx,
- * find first available pipe with audio, setup audio wall DTO per topology
- * instead of per pipe.
- */
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-
- if (pipe_ctx->stream == NULL)
- continue;
-
- if (pipe_ctx->top_pipe)
- continue;
- if (pipe_ctx->stream->signal != SIGNAL_TYPE_HDMI_TYPE_A)
- continue;
- if (pipe_ctx->stream_res.audio != NULL) {
- struct audio_output audio_output;
-
- build_audio_output(context, pipe_ctx, &audio_output);
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- /* For DCN3.1, audio to HPO FRL encoder is using audio DTBCLK DTO */
- if (dc->res_pool->dccg && dc->res_pool->dccg->funcs->set_audio_dtbclk_dto) {
- /* disable audio DTBCLK DTO */
- dc->res_pool->dccg->funcs->set_audio_dtbclk_dto(
- dc->res_pool->dccg, 0);
-
- pipe_ctx->stream_res.audio->funcs->wall_dto_setup(
- pipe_ctx->stream_res.audio,
- pipe_ctx->stream->signal,
- &audio_output.crtc_info,
- &audio_output.pll_info);
- } else
- pipe_ctx->stream_res.audio->funcs->wall_dto_setup(
- pipe_ctx->stream_res.audio,
- pipe_ctx->stream->signal,
- &audio_output.crtc_info,
- &audio_output.pll_info);
-#else
- pipe_ctx->stream_res.audio->funcs->wall_dto_setup(
- pipe_ctx->stream_res.audio,
- pipe_ctx->stream->signal,
- &audio_output.crtc_info,
- &audio_output.pll_info);
-#endif
- break;
- }
- }
-
- /* no HDMI audio is found, try DP audio */
- if (i == dc->res_pool->pipe_count) {
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-
- if (pipe_ctx->stream == NULL)
- continue;
-
- if (pipe_ctx->top_pipe)
- continue;
-
- if (!dc_is_dp_signal(pipe_ctx->stream->signal))
- continue;
-
- if (pipe_ctx->stream_res.audio != NULL) {
- struct audio_output audio_output;
-
- build_audio_output(context, pipe_ctx, &audio_output);
-
- pipe_ctx->stream_res.audio->funcs->wall_dto_setup(
- pipe_ctx->stream_res.audio,
- pipe_ctx->stream->signal,
- &audio_output.crtc_info,
- &audio_output.pll_info);
- break;
- }
- }
- }
-}
-
-enum dc_status dce110_apply_ctx_to_hw(
- struct dc *dc,
- struct dc_state *context)
-{
- struct dce_hwseq *hws = dc->hwseq;
- struct dc_bios *dcb = dc->ctx->dc_bios;
- enum dc_status status;
- int i;
-
- /* Reset old context */
- /* look up the targets that have been removed since last commit */
- hws->funcs.reset_hw_ctx_wrap(dc, context);
-
- /* Skip applying if no targets */
- if (context->stream_count <= 0)
- return DC_OK;
-
- /* Apply new context */
- dcb->funcs->set_scratch_critical_state(dcb, true);
-
- /* below is for real asic only */
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx_old =
- &dc->current_state->res_ctx.pipe_ctx[i];
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-
- if (pipe_ctx->stream == NULL || pipe_ctx->top_pipe)
- continue;
-
- if (pipe_ctx->stream == pipe_ctx_old->stream) {
- if (pipe_ctx_old->clock_source != pipe_ctx->clock_source)
- dce_crtc_switch_to_clk_src(dc->hwseq,
- pipe_ctx->clock_source, i);
- continue;
- }
-
- hws->funcs.enable_display_power_gating(
- dc, i, dc->ctx->dc_bios,
- PIPE_GATING_CONTROL_DISABLE);
- }
-
- if (dc->fbc_compressor)
- dc->fbc_compressor->funcs->disable_fbc(dc->fbc_compressor);
-
- dce110_setup_audio_dto(dc, context);
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx_old =
- &dc->current_state->res_ctx.pipe_ctx[i];
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-
- if (pipe_ctx->stream == NULL)
- continue;
-
- if (pipe_ctx->stream == pipe_ctx_old->stream &&
- pipe_ctx->stream->link->link_state_valid) {
- continue;
- }
-
- if (pipe_ctx_old->stream && !pipe_need_reprogram(pipe_ctx_old, pipe_ctx))
- continue;
-
- if (pipe_ctx->top_pipe || pipe_ctx->prev_odm_pipe)
- continue;
-
- status = apply_single_controller_ctx_to_hw(
- pipe_ctx,
- context,
- dc);
-
- if (DC_OK != status)
- return status;
- }
-
- if (dc->fbc_compressor)
- enable_fbc(dc, dc->current_state);
-
- dcb->funcs->set_scratch_critical_state(dcb, false);
-
- return DC_OK;
-}
-
-/*******************************************************************************
- * Front End programming
- ******************************************************************************/
-static void set_default_colors(struct pipe_ctx *pipe_ctx)
-{
- struct default_adjustment default_adjust = { 0 };
-
- default_adjust.force_hw_default = false;
- default_adjust.in_color_space = pipe_ctx->plane_state->color_space;
- default_adjust.out_color_space = pipe_ctx->stream->output_color_space;
- default_adjust.csc_adjust_type = GRAPHICS_CSC_ADJUST_TYPE_SW;
- default_adjust.surface_pixel_format = pipe_ctx->plane_res.scl_data.format;
-
- /* display color depth */
- default_adjust.color_depth =
- pipe_ctx->stream->timing.display_color_depth;
-
- /* Lb color depth */
- default_adjust.lb_color_depth = pipe_ctx->plane_res.scl_data.lb_params.depth;
-
- pipe_ctx->plane_res.xfm->funcs->opp_set_csc_default(
- pipe_ctx->plane_res.xfm, &default_adjust);
-}
-
-
-/*******************************************************************************
- * In order to turn on/off specific surface we will program
- * Blender + CRTC
- *
- * In case that we have two surfaces and they have a different visibility
- * we can't turn off the CRTC since it will turn off the entire display
- *
- * |----------------------------------------------- |
- * |bottom pipe|curr pipe | | |
- * |Surface |Surface | Blender | CRCT |
- * |visibility |visibility | Configuration| |
- * |------------------------------------------------|
- * | off | off | CURRENT_PIPE | blank |
- * | off | on | CURRENT_PIPE | unblank |
- * | on | off | OTHER_PIPE | unblank |
- * | on | on | BLENDING | unblank |
- * -------------------------------------------------|
- *
- ******************************************************************************/
-static void program_surface_visibility(const struct dc *dc,
- struct pipe_ctx *pipe_ctx)
-{
- enum blnd_mode blender_mode = BLND_MODE_CURRENT_PIPE;
- bool blank_target = false;
-
- if (pipe_ctx->bottom_pipe) {
-
- /* For now we are supporting only two pipes */
- ASSERT(pipe_ctx->bottom_pipe->bottom_pipe == NULL);
-
- if (pipe_ctx->bottom_pipe->plane_state->visible) {
- if (pipe_ctx->plane_state->visible)
- blender_mode = BLND_MODE_BLENDING;
- else
- blender_mode = BLND_MODE_OTHER_PIPE;
-
- } else if (!pipe_ctx->plane_state->visible)
- blank_target = true;
-
- } else if (!pipe_ctx->plane_state->visible)
- blank_target = true;
-
- dce_set_blender_mode(dc->hwseq, pipe_ctx->stream_res.tg->inst, blender_mode);
- pipe_ctx->stream_res.tg->funcs->set_blank(pipe_ctx->stream_res.tg, blank_target);
-
-}
-
-static void program_gamut_remap(struct pipe_ctx *pipe_ctx)
-{
- int i = 0;
- struct xfm_grph_csc_adjustment adjust;
- memset(&adjust, 0, sizeof(adjust));
- adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
-
-
- if (pipe_ctx->stream->gamut_remap_matrix.enable_remap == true) {
- adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW;
-
- for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++)
- adjust.temperature_matrix[i] =
- pipe_ctx->stream->gamut_remap_matrix.matrix[i];
- }
-
- pipe_ctx->plane_res.xfm->funcs->transform_set_gamut_remap(pipe_ctx->plane_res.xfm, &adjust);
-}
-static void update_plane_addr(const struct dc *dc,
- struct pipe_ctx *pipe_ctx)
-{
- struct dc_plane_state *plane_state = pipe_ctx->plane_state;
-
- if (plane_state == NULL)
- return;
-
- pipe_ctx->plane_res.mi->funcs->mem_input_program_surface_flip_and_addr(
- pipe_ctx->plane_res.mi,
- &plane_state->address,
- plane_state->flip_immediate);
-
- plane_state->status.requested_address = plane_state->address;
-}
-
-static void dce110_update_pending_status(struct pipe_ctx *pipe_ctx)
-{
- struct dc_plane_state *plane_state = pipe_ctx->plane_state;
-
- if (plane_state == NULL)
- return;
-
- plane_state->status.is_flip_pending =
- pipe_ctx->plane_res.mi->funcs->mem_input_is_flip_pending(
- pipe_ctx->plane_res.mi);
-
- if (plane_state->status.is_flip_pending && !plane_state->visible)
- pipe_ctx->plane_res.mi->current_address = pipe_ctx->plane_res.mi->request_address;
-
- plane_state->status.current_address = pipe_ctx->plane_res.mi->current_address;
- if (pipe_ctx->plane_res.mi->current_address.type == PLN_ADDR_TYPE_GRPH_STEREO &&
- pipe_ctx->stream_res.tg->funcs->is_stereo_left_eye) {
- plane_state->status.is_right_eye =\
- !pipe_ctx->stream_res.tg->funcs->is_stereo_left_eye(pipe_ctx->stream_res.tg);
- }
-}
-
-void dce110_power_down(struct dc *dc)
-{
- power_down_all_hw_blocks(dc);
- disable_vga_and_power_gate_all_controllers(dc);
-}
-
-static bool wait_for_reset_trigger_to_occur(
- struct dc_context *dc_ctx,
- struct timing_generator *tg)
-{
- bool rc = false;
-
- /* To avoid endless loop we wait at most
- * frames_to_wait_on_triggered_reset frames for the reset to occur. */
- const uint32_t frames_to_wait_on_triggered_reset = 10;
- uint32_t i;
-
- for (i = 0; i < frames_to_wait_on_triggered_reset; i++) {
-
- if (!tg->funcs->is_counter_moving(tg)) {
- DC_ERROR("TG counter is not moving!\n");
- break;
- }
-
- if (tg->funcs->did_triggered_reset_occur(tg)) {
- rc = true;
- /* usually occurs at i=1 */
- DC_SYNC_INFO("GSL: reset occurred at wait count: %d\n",
- i);
- break;
- }
-
- /* Wait for one frame. */
- tg->funcs->wait_for_state(tg, CRTC_STATE_VACTIVE);
- tg->funcs->wait_for_state(tg, CRTC_STATE_VBLANK);
- }
-
- if (false == rc)
- DC_ERROR("GSL: Timeout on reset trigger!\n");
-
- return rc;
-}
-
-/* Enable timing synchronization for a group of Timing Generators. */
-static void dce110_enable_timing_synchronization(
- struct dc *dc,
- int group_index,
- int group_size,
- struct pipe_ctx *grouped_pipes[])
-{
- struct dc_context *dc_ctx = dc->ctx;
- struct dcp_gsl_params gsl_params = { 0 };
- int i;
-
- DC_SYNC_INFO("GSL: Setting-up...\n");
-
- /* Designate a single TG in the group as a master.
- * Since HW doesn't care which one, we always assign
- * the 1st one in the group. */
- gsl_params.gsl_group = 0;
- gsl_params.gsl_master = grouped_pipes[0]->stream_res.tg->inst;
-
- for (i = 0; i < group_size; i++)
- grouped_pipes[i]->stream_res.tg->funcs->setup_global_swap_lock(
- grouped_pipes[i]->stream_res.tg, &gsl_params);
-
- /* Reset slave controllers on master VSync */
- DC_SYNC_INFO("GSL: enabling trigger-reset\n");
-
- for (i = 1 /* skip the master */; i < group_size; i++)
- grouped_pipes[i]->stream_res.tg->funcs->enable_reset_trigger(
- grouped_pipes[i]->stream_res.tg,
- gsl_params.gsl_group);
-
- for (i = 1 /* skip the master */; i < group_size; i++) {
- DC_SYNC_INFO("GSL: waiting for reset to occur.\n");
- wait_for_reset_trigger_to_occur(dc_ctx, grouped_pipes[i]->stream_res.tg);
- grouped_pipes[i]->stream_res.tg->funcs->disable_reset_trigger(
- grouped_pipes[i]->stream_res.tg);
- }
-
- /* GSL Vblank synchronization is a one time sync mechanism, assumption
- * is that the sync'ed displays will not drift out of sync over time*/
- DC_SYNC_INFO("GSL: Restoring register states.\n");
- for (i = 0; i < group_size; i++)
- grouped_pipes[i]->stream_res.tg->funcs->tear_down_global_swap_lock(grouped_pipes[i]->stream_res.tg);
-
- DC_SYNC_INFO("GSL: Set-up complete.\n");
-}
-
-static void dce110_enable_per_frame_crtc_position_reset(
- struct dc *dc,
- int group_size,
- struct pipe_ctx *grouped_pipes[])
-{
- struct dc_context *dc_ctx = dc->ctx;
- struct dcp_gsl_params gsl_params = { 0 };
- int i;
-
- gsl_params.gsl_group = 0;
- gsl_params.gsl_master = 0;
-
- for (i = 0; i < group_size; i++)
- grouped_pipes[i]->stream_res.tg->funcs->setup_global_swap_lock(
- grouped_pipes[i]->stream_res.tg, &gsl_params);
-
- DC_SYNC_INFO("GSL: enabling trigger-reset\n");
-
- for (i = 1; i < group_size; i++)
- grouped_pipes[i]->stream_res.tg->funcs->enable_crtc_reset(
- grouped_pipes[i]->stream_res.tg,
- gsl_params.gsl_master,
- &grouped_pipes[i]->stream->triggered_crtc_reset);
-
- DC_SYNC_INFO("GSL: waiting for reset to occur.\n");
- for (i = 1; i < group_size; i++)
- wait_for_reset_trigger_to_occur(dc_ctx, grouped_pipes[i]->stream_res.tg);
-
- for (i = 0; i < group_size; i++)
- grouped_pipes[i]->stream_res.tg->funcs->tear_down_global_swap_lock(grouped_pipes[i]->stream_res.tg);
-
-}
-
-static void init_pipes(struct dc *dc, struct dc_state *context)
-{
- // Do nothing
-}
-
-static void init_hw(struct dc *dc)
-{
- int i;
- struct dc_bios *bp;
- struct transform *xfm;
- struct abm *abm;
- struct dmcu *dmcu;
- struct dce_hwseq *hws = dc->hwseq;
- uint32_t backlight = MAX_BACKLIGHT_LEVEL;
-
- bp = dc->ctx->dc_bios;
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- xfm = dc->res_pool->transforms[i];
- xfm->funcs->transform_reset(xfm);
-
- hws->funcs.enable_display_power_gating(
- dc, i, bp,
- PIPE_GATING_CONTROL_INIT);
- hws->funcs.enable_display_power_gating(
- dc, i, bp,
- PIPE_GATING_CONTROL_DISABLE);
- hws->funcs.enable_display_pipe_clock_gating(
- dc->ctx,
- true);
- }
-
- dce_clock_gating_power_up(dc->hwseq, false);
- /***************************************/
-
- for (i = 0; i < dc->link_count; i++) {
- /****************************************/
- /* Power up AND update implementation according to the
- * required signal (which may be different from the
- * default signal on connector). */
- struct dc_link *link = dc->links[i];
-
- link->link_enc->funcs->hw_init(link->link_enc);
- }
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct timing_generator *tg = dc->res_pool->timing_generators[i];
-
- tg->funcs->disable_vga(tg);
-
- /* Blank controller using driver code instead of
- * command table. */
- tg->funcs->set_blank(tg, true);
- hwss_wait_for_blank_complete(tg);
- }
-
- for (i = 0; i < dc->res_pool->audio_count; i++) {
- struct audio *audio = dc->res_pool->audios[i];
- audio->funcs->hw_init(audio);
- }
-
- for (i = 0; i < dc->link_count; i++) {
- struct dc_link *link = dc->links[i];
-
- if (link->panel_cntl)
- backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
- }
-
- abm = dc->res_pool->abm;
- if (abm != NULL)
- abm->funcs->abm_init(abm, backlight);
-
- dmcu = dc->res_pool->dmcu;
- if (dmcu != NULL && abm != NULL)
- abm->dmcu_is_running = dmcu->funcs->is_dmcu_initialized(dmcu);
-
- if (dc->fbc_compressor)
- dc->fbc_compressor->funcs->power_up_fbc(dc->fbc_compressor);
-
-}
-
-
-void dce110_prepare_bandwidth(
- struct dc *dc,
- struct dc_state *context)
-{
- struct clk_mgr *dccg = dc->clk_mgr;
-
- dce110_set_safe_displaymarks(&context->res_ctx, dc->res_pool);
-
- dccg->funcs->update_clocks(
- dccg,
- context,
- false);
-}
-
-void dce110_optimize_bandwidth(
- struct dc *dc,
- struct dc_state *context)
-{
- struct clk_mgr *dccg = dc->clk_mgr;
-
- dce110_set_displaymarks(dc, context);
-
- dccg->funcs->update_clocks(
- dccg,
- context,
- true);
-}
-
-static void dce110_program_front_end_for_pipe(
- struct dc *dc, struct pipe_ctx *pipe_ctx)
-{
- struct mem_input *mi = pipe_ctx->plane_res.mi;
- struct dc_plane_state *plane_state = pipe_ctx->plane_state;
- struct xfm_grph_csc_adjustment adjust;
- struct out_csc_color_matrix tbl_entry;
- unsigned int i;
- struct dce_hwseq *hws = dc->hwseq;
-
- DC_LOGGER_INIT();
- memset(&tbl_entry, 0, sizeof(tbl_entry));
-
- memset(&adjust, 0, sizeof(adjust));
- adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
-
- dce_enable_fe_clock(dc->hwseq, mi->inst, true);
-
- set_default_colors(pipe_ctx);
- if (pipe_ctx->stream->csc_color_matrix.enable_adjustment
- == true) {
- tbl_entry.color_space =
- pipe_ctx->stream->output_color_space;
-
- for (i = 0; i < 12; i++)
- tbl_entry.regval[i] =
- pipe_ctx->stream->csc_color_matrix.matrix[i];
-
- pipe_ctx->plane_res.xfm->funcs->opp_set_csc_adjustment
- (pipe_ctx->plane_res.xfm, &tbl_entry);
- }
-
- if (pipe_ctx->stream->gamut_remap_matrix.enable_remap == true) {
- adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW;
-
- for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++)
- adjust.temperature_matrix[i] =
- pipe_ctx->stream->gamut_remap_matrix.matrix[i];
- }
-
- pipe_ctx->plane_res.xfm->funcs->transform_set_gamut_remap(pipe_ctx->plane_res.xfm, &adjust);
-
- pipe_ctx->plane_res.scl_data.lb_params.alpha_en = pipe_ctx->bottom_pipe != 0;
-
- program_scaler(dc, pipe_ctx);
-
- mi->funcs->mem_input_program_surface_config(
- mi,
- plane_state->format,
- &plane_state->tiling_info,
- &plane_state->plane_size,
- plane_state->rotation,
- NULL,
- false);
- if (mi->funcs->set_blank)
- mi->funcs->set_blank(mi, pipe_ctx->plane_state->visible);
-
- if (dc->config.gpu_vm_support)
- mi->funcs->mem_input_program_pte_vm(
- pipe_ctx->plane_res.mi,
- plane_state->format,
- &plane_state->tiling_info,
- plane_state->rotation);
-
- /* Moved programming gamma from dc to hwss */
- if (pipe_ctx->plane_state->update_flags.bits.full_update ||
- pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change ||
- pipe_ctx->plane_state->update_flags.bits.gamma_change)
- hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state);
-
- if (pipe_ctx->plane_state->update_flags.bits.full_update)
- hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream);
-
- DC_LOG_SURFACE(
- "Pipe:%d %p: addr hi:0x%x, "
- "addr low:0x%x, "
- "src: %d, %d, %d,"
- " %d; dst: %d, %d, %d, %d;"
- "clip: %d, %d, %d, %d\n",
- pipe_ctx->pipe_idx,
- (void *) pipe_ctx->plane_state,
- pipe_ctx->plane_state->address.grph.addr.high_part,
- pipe_ctx->plane_state->address.grph.addr.low_part,
- pipe_ctx->plane_state->src_rect.x,
- pipe_ctx->plane_state->src_rect.y,
- pipe_ctx->plane_state->src_rect.width,
- pipe_ctx->plane_state->src_rect.height,
- pipe_ctx->plane_state->dst_rect.x,
- pipe_ctx->plane_state->dst_rect.y,
- pipe_ctx->plane_state->dst_rect.width,
- pipe_ctx->plane_state->dst_rect.height,
- pipe_ctx->plane_state->clip_rect.x,
- pipe_ctx->plane_state->clip_rect.y,
- pipe_ctx->plane_state->clip_rect.width,
- pipe_ctx->plane_state->clip_rect.height);
-
- DC_LOG_SURFACE(
- "Pipe %d: width, height, x, y\n"
- "viewport:%d, %d, %d, %d\n"
- "recout: %d, %d, %d, %d\n",
- pipe_ctx->pipe_idx,
- pipe_ctx->plane_res.scl_data.viewport.width,
- pipe_ctx->plane_res.scl_data.viewport.height,
- pipe_ctx->plane_res.scl_data.viewport.x,
- pipe_ctx->plane_res.scl_data.viewport.y,
- pipe_ctx->plane_res.scl_data.recout.width,
- pipe_ctx->plane_res.scl_data.recout.height,
- pipe_ctx->plane_res.scl_data.recout.x,
- pipe_ctx->plane_res.scl_data.recout.y);
-}
-
-static void dce110_apply_ctx_for_surface(
- struct dc *dc,
- const struct dc_stream_state *stream,
- int num_planes,
- struct dc_state *context)
-{
- int i;
-
- if (num_planes == 0)
- return;
-
- if (dc->fbc_compressor)
- dc->fbc_compressor->funcs->disable_fbc(dc->fbc_compressor);
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-
- if (pipe_ctx->stream != stream)
- continue;
-
- /* Need to allocate mem before program front end for Fiji */
- pipe_ctx->plane_res.mi->funcs->allocate_mem_input(
- pipe_ctx->plane_res.mi,
- pipe_ctx->stream->timing.h_total,
- pipe_ctx->stream->timing.v_total,
- pipe_ctx->stream->timing.pix_clk_100hz / 10,
- context->stream_count);
-
- dce110_program_front_end_for_pipe(dc, pipe_ctx);
-
- dc->hwss.update_plane_addr(dc, pipe_ctx);
-
- program_surface_visibility(dc, pipe_ctx);
-
- }
-
- if (dc->fbc_compressor)
- enable_fbc(dc, context);
-}
-
-static void dce110_post_unlock_program_front_end(
- struct dc *dc,
- struct dc_state *context)
-{
-}
-
-static void dce110_power_down_fe(struct dc *dc, struct pipe_ctx *pipe_ctx)
-{
- struct dce_hwseq *hws = dc->hwseq;
- int fe_idx = pipe_ctx->plane_res.mi ?
- pipe_ctx->plane_res.mi->inst : pipe_ctx->pipe_idx;
-
- /* Do not power down fe when stream is active on dce*/
- if (dc->current_state->res_ctx.pipe_ctx[fe_idx].stream)
- return;
-
- hws->funcs.enable_display_power_gating(
- dc, fe_idx, dc->ctx->dc_bios, PIPE_GATING_CONTROL_ENABLE);
-
- dc->res_pool->transforms[fe_idx]->funcs->transform_reset(
- dc->res_pool->transforms[fe_idx]);
-}
-
-static void dce110_wait_for_mpcc_disconnect(
- struct dc *dc,
- struct resource_pool *res_pool,
- struct pipe_ctx *pipe_ctx)
-{
- /* do nothing*/
-}
-
-static void program_output_csc(struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- enum dc_color_space colorspace,
- uint16_t *matrix,
- int opp_id)
-{
- int i;
- struct out_csc_color_matrix tbl_entry;
-
- if (pipe_ctx->stream->csc_color_matrix.enable_adjustment == true) {
- enum dc_color_space color_space = pipe_ctx->stream->output_color_space;
-
- for (i = 0; i < 12; i++)
- tbl_entry.regval[i] = pipe_ctx->stream->csc_color_matrix.matrix[i];
-
- tbl_entry.color_space = color_space;
-
- pipe_ctx->plane_res.xfm->funcs->opp_set_csc_adjustment(
- pipe_ctx->plane_res.xfm, &tbl_entry);
- }
-}
-
-static void dce110_set_cursor_position(struct pipe_ctx *pipe_ctx)
-{
- struct dc_cursor_position pos_cpy = pipe_ctx->stream->cursor_position;
- struct input_pixel_processor *ipp = pipe_ctx->plane_res.ipp;
- struct mem_input *mi = pipe_ctx->plane_res.mi;
- struct dc_cursor_mi_param param = {
- .pixel_clk_khz = pipe_ctx->stream->timing.pix_clk_100hz / 10,
- .ref_clk_khz = pipe_ctx->stream->ctx->dc->res_pool->ref_clocks.xtalin_clock_inKhz,
- .viewport = pipe_ctx->plane_res.scl_data.viewport,
- .h_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.horz,
- .v_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.vert,
- .rotation = pipe_ctx->plane_state->rotation,
- .mirror = pipe_ctx->plane_state->horizontal_mirror
- };
-
- /**
- * If the cursor's source viewport is clipped then we need to
- * translate the cursor to appear in the correct position on
- * the screen.
- *
- * This translation isn't affected by scaling so it needs to be
- * done *after* we adjust the position for the scale factor.
- *
- * This is only done by opt-in for now since there are still
- * some usecases like tiled display that might enable the
- * cursor on both streams while expecting dc to clip it.
- */
- if (pos_cpy.translate_by_source) {
- pos_cpy.x += pipe_ctx->plane_state->src_rect.x;
- pos_cpy.y += pipe_ctx->plane_state->src_rect.y;
- }
-
- if (pipe_ctx->plane_state->address.type
- == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE)
- pos_cpy.enable = false;
-
- if (pipe_ctx->top_pipe && pipe_ctx->plane_state != pipe_ctx->top_pipe->plane_state)
- pos_cpy.enable = false;
-
- if (ipp->funcs->ipp_cursor_set_position)
- ipp->funcs->ipp_cursor_set_position(ipp, &pos_cpy, &param);
- if (mi->funcs->set_cursor_position)
- mi->funcs->set_cursor_position(mi, &pos_cpy, &param);
-}
-
-static void dce110_set_cursor_attribute(struct pipe_ctx *pipe_ctx)
-{
- struct dc_cursor_attributes *attributes = &pipe_ctx->stream->cursor_attributes;
-
- if (pipe_ctx->plane_res.ipp &&
- pipe_ctx->plane_res.ipp->funcs->ipp_cursor_set_attributes)
- pipe_ctx->plane_res.ipp->funcs->ipp_cursor_set_attributes(
- pipe_ctx->plane_res.ipp, attributes);
-
- if (pipe_ctx->plane_res.mi &&
- pipe_ctx->plane_res.mi->funcs->set_cursor_attributes)
- pipe_ctx->plane_res.mi->funcs->set_cursor_attributes(
- pipe_ctx->plane_res.mi, attributes);
-
- if (pipe_ctx->plane_res.xfm &&
- pipe_ctx->plane_res.xfm->funcs->set_cursor_attributes)
- pipe_ctx->plane_res.xfm->funcs->set_cursor_attributes(
- pipe_ctx->plane_res.xfm, attributes);
-}
-
-bool dce110_set_backlight_level(struct pipe_ctx *pipe_ctx,
- uint32_t backlight_pwm_u16_16,
- uint32_t frame_ramp)
-{
- struct dc_link *link = pipe_ctx->stream->link;
- struct dc *dc = link->ctx->dc;
- struct abm *abm = pipe_ctx->stream_res.abm;
- struct panel_cntl *panel_cntl = link->panel_cntl;
- struct dmcu *dmcu = dc->res_pool->dmcu;
- bool fw_set_brightness = true;
- /* DMCU -1 for all controller id values,
- * therefore +1 here
- */
- uint32_t controller_id = pipe_ctx->stream_res.tg->inst + 1;
-
- if (abm == NULL || panel_cntl == NULL || (abm->funcs->set_backlight_level_pwm == NULL))
- return false;
-
- if (dmcu)
- fw_set_brightness = dmcu->funcs->is_dmcu_initialized(dmcu);
-
- if (!fw_set_brightness && panel_cntl->funcs->driver_set_backlight)
- panel_cntl->funcs->driver_set_backlight(panel_cntl, backlight_pwm_u16_16);
- else
- abm->funcs->set_backlight_level_pwm(
- abm,
- backlight_pwm_u16_16,
- frame_ramp,
- controller_id,
- link->panel_cntl->inst);
-
- return true;
-}
-
-void dce110_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx)
-{
- struct abm *abm = pipe_ctx->stream_res.abm;
- struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl;
-
- if (abm)
- abm->funcs->set_abm_immediate_disable(abm,
- pipe_ctx->stream->link->panel_cntl->inst);
-
- if (panel_cntl)
- panel_cntl->funcs->store_backlight_level(panel_cntl);
-}
-
-void dce110_set_pipe(struct pipe_ctx *pipe_ctx)
-{
- struct abm *abm = pipe_ctx->stream_res.abm;
- struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl;
- uint32_t otg_inst = pipe_ctx->stream_res.tg->inst + 1;
-
- if (abm && panel_cntl)
- abm->funcs->set_pipe(abm, otg_inst, panel_cntl->inst);
-}
-
-static const struct hw_sequencer_funcs dce110_funcs = {
- .program_gamut_remap = program_gamut_remap,
- .program_output_csc = program_output_csc,
- .init_hw = init_hw,
- .apply_ctx_to_hw = dce110_apply_ctx_to_hw,
- .apply_ctx_for_surface = dce110_apply_ctx_for_surface,
- .post_unlock_program_front_end = dce110_post_unlock_program_front_end,
- .update_plane_addr = update_plane_addr,
- .update_pending_status = dce110_update_pending_status,
- .enable_accelerated_mode = dce110_enable_accelerated_mode,
- .enable_timing_synchronization = dce110_enable_timing_synchronization,
- .enable_per_frame_crtc_position_reset = dce110_enable_per_frame_crtc_position_reset,
- .update_info_frame = dce110_update_info_frame,
- .enable_stream = dce110_enable_stream,
- .disable_stream = dce110_disable_stream,
- .unblank_stream = dce110_unblank_stream,
- .blank_stream = dce110_blank_stream,
- .enable_audio_stream = dce110_enable_audio_stream,
- .disable_audio_stream = dce110_disable_audio_stream,
- .disable_plane = dce110_power_down_fe,
- .pipe_control_lock = dce_pipe_control_lock,
- .interdependent_update_lock = NULL,
- .cursor_lock = dce_pipe_control_lock,
- .prepare_bandwidth = dce110_prepare_bandwidth,
- .optimize_bandwidth = dce110_optimize_bandwidth,
- .set_drr = set_drr,
- .get_position = get_position,
- .set_static_screen_control = set_static_screen_control,
- .setup_stereo = NULL,
- .set_avmute = dce110_set_avmute,
- .wait_for_mpcc_disconnect = dce110_wait_for_mpcc_disconnect,
- .edp_backlight_control = dce110_edp_backlight_control,
- .edp_power_control = dce110_edp_power_control,
- .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready,
- .set_cursor_position = dce110_set_cursor_position,
- .set_cursor_attribute = dce110_set_cursor_attribute,
- .set_backlight_level = dce110_set_backlight_level,
- .set_abm_immediate_disable = dce110_set_abm_immediate_disable,
- .set_pipe = dce110_set_pipe,
-};
-
-static const struct hwseq_private_funcs dce110_private_funcs = {
- .init_pipes = init_pipes,
- .update_plane_addr = update_plane_addr,
- .set_input_transfer_func = dce110_set_input_transfer_func,
- .set_output_transfer_func = dce110_set_output_transfer_func,
- .power_down = dce110_power_down,
- .enable_display_pipe_clock_gating = enable_display_pipe_clock_gating,
- .enable_display_power_gating = dce110_enable_display_power_gating,
- .reset_hw_ctx_wrap = dce110_reset_hw_ctx_wrap,
- .enable_stream_timing = dce110_enable_stream_timing,
- .disable_stream_gating = NULL,
- .enable_stream_gating = NULL,
- .edp_backlight_control = dce110_edp_backlight_control,
-};
-
-void dce110_hw_sequencer_construct(struct dc *dc)
-{
- dc->hwss = dce110_funcs;
- dc->hwseq->funcs = dce110_private_funcs;
-}
-
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h
deleted file mode 100644
index b6f3843d3d05..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
-* Copyright 2012-15 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#ifndef __DC_HWSS_DCE110_H__
-#define __DC_HWSS_DCE110_H__
-
-#include "core_types.h"
-#include "hw_sequencer_private.h"
-
-struct dc;
-struct dc_state;
-struct dm_pp_display_configuration;
-
-void dce110_hw_sequencer_construct(struct dc *dc);
-
-enum dc_status dce110_apply_ctx_to_hw(
- struct dc *dc,
- struct dc_state *context);
-
-
-void dce110_enable_stream(struct pipe_ctx *pipe_ctx);
-
-void dce110_disable_stream(struct pipe_ctx *pipe_ctx);
-
-void dce110_unblank_stream(struct pipe_ctx *pipe_ctx,
- struct dc_link_settings *link_settings);
-
-void dce110_blank_stream(struct pipe_ctx *pipe_ctx);
-
-void dce110_enable_audio_stream(struct pipe_ctx *pipe_ctx);
-void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx);
-
-void dce110_update_info_frame(struct pipe_ctx *pipe_ctx);
-
-void dce110_set_avmute(struct pipe_ctx *pipe_ctx, bool enable);
-void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context);
-
-void dce110_power_down(struct dc *dc);
-
-void dce110_set_safe_displaymarks(
- struct resource_context *res_ctx,
- const struct resource_pool *pool);
-
-void dce110_prepare_bandwidth(
- struct dc *dc,
- struct dc_state *context);
-
-void dce110_optimize_bandwidth(
- struct dc *dc,
- struct dc_state *context);
-
-void dp_receiver_power_ctrl(struct dc_link *link, bool on);
-
-void dce110_edp_power_control(
- struct dc_link *link,
- bool power_up);
-
-void dce110_edp_backlight_control(
- struct dc_link *link,
- bool enable);
-
-void dce110_edp_wait_for_hpd_ready(
- struct dc_link *link,
- bool power_up);
-
-bool dce110_set_backlight_level(struct pipe_ctx *pipe_ctx,
- uint32_t backlight_pwm_u16_16,
- uint32_t frame_ramp);
-void dce110_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx);
-void dce110_set_pipe(struct pipe_ctx *pipe_ctx);
-
-#endif /* __DC_HWSS_DCE110_H__ */
-
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c
index db7557a1c613..2c43c2422638 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c
@@ -76,7 +76,6 @@ UNP_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH_C__GRPH_PRIMARY_SURFACE_ADDRESS_HIGH_C_MAS
mmUNP_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH_C,
value);
- temp = 0;
value = 0;
temp = address.low_part >>
UNP_GRPH_PRIMARY_SURFACE_ADDRESS_C__GRPH_PRIMARY_SURFACE_ADDRESS_C__SHIFT;
@@ -112,7 +111,6 @@ UNP_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH_L__GRPH_PRIMARY_SURFACE_ADDRESS_HIGH_L_MAS
mmUNP_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH_L,
value);
- temp = 0;
value = 0;
temp = address.low_part >>
UNP_GRPH_PRIMARY_SURFACE_ADDRESS_L__GRPH_PRIMARY_SURFACE_ADDRESS_L__SHIFT;
@@ -164,7 +162,7 @@ static void enable(struct dce_mem_input *mem_input110)
static void program_tiling(
struct dce_mem_input *mem_input110,
- const union dc_tiling_info *info,
+ const struct dc_tiling_info *info,
const enum surface_pixel_format pixel_format)
{
uint32_t value = 0;
@@ -525,7 +523,7 @@ static const unsigned int dvmm_Hw_Setting_Linear[4][9] = {
/* Helper to get table entry from surface info */
static const unsigned int *get_dvmm_hw_setting(
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
enum surface_pixel_format format,
bool chroma)
{
@@ -565,7 +563,7 @@ static const unsigned int *get_dvmm_hw_setting(
static void dce_mem_input_v_program_pte_vm(
struct mem_input *mem_input,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
enum dc_rotation_angle rotation)
{
struct dce_mem_input *mem_input110 = TO_DCE_MEM_INPUT(mem_input);
@@ -638,7 +636,7 @@ static void dce_mem_input_v_program_pte_vm(
static void dce_mem_input_v_program_surface_config(
struct mem_input *mem_input,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_opp_regamma_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_opp_regamma_v.c
index 34c5e3c7c6d2..9b65b77e8823 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_opp_regamma_v.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_opp_regamma_v.c
@@ -23,8 +23,6 @@
*
*/
-#include <linux/delay.h>
-
#include "dm_services.h"
/* include DCE11 register header files */
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
index 27cbb5b42c7e..61b0807693fb 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
@@ -288,7 +288,7 @@ bool dce110_timing_generator_program_timing_generator(
uint32_t vsync_offset = dc_crtc_timing->v_border_bottom +
dc_crtc_timing->v_front_porch;
- uint32_t v_sync_start =dc_crtc_timing->v_addressable + vsync_offset;
+ uint32_t v_sync_start = dc_crtc_timing->v_addressable + vsync_offset;
uint32_t hsync_offset = dc_crtc_timing->h_border_right +
dc_crtc_timing->h_front_porch;
@@ -603,7 +603,7 @@ void dce110_timing_generator_program_blanking(
{
uint32_t vsync_offset = timing->v_border_bottom +
timing->v_front_porch;
- uint32_t v_sync_start =timing->v_addressable + vsync_offset;
+ uint32_t v_sync_start = timing->v_addressable + vsync_offset;
uint32_t hsync_offset = timing->h_border_right +
timing->h_front_porch;
@@ -1955,6 +1955,7 @@ void dce110_tg_program_timing(struct timing_generator *tg,
int vstartup_start,
int vupdate_offset,
int vupdate_width,
+ int pstate_keepout,
const enum signal_type signal,
bool use_vbios)
{
@@ -2015,6 +2016,23 @@ bool dce110_tg_validate_timing(struct timing_generator *tg,
return dce110_timing_generator_validate_timing(tg, timing, SIGNAL_TYPE_NONE);
}
+/* "Container" vs. "pixel" is a concept within HW blocks, mostly those closer to the back-end. It works like this:
+ *
+ * - In most of the formats (RGB or YCbCr 4:4:4, 4:2:2 uncompressed and DSC 4:2:2 Simple) pixel rate is the same as
+ * container rate.
+ *
+ * - In 4:2:0 (DSC or uncompressed) there are two pixels per container, hence the target container rate has to be
+ * halved to maintain the correct pixel rate.
+ *
+ * - Unlike 4:2:2 uncompressed, DSC 4:2:2 Native also has two pixels per container (this happens when DSC is applied
+ * to it) and has to be treated the same as 4:2:0, i.e. target containter rate has to be halved in this case as well.
+ *
+ */
+bool dce110_is_two_pixels_per_container(const struct dc_crtc_timing *timing)
+{
+ return timing->pixel_encoding == PIXEL_ENCODING_YCBCR420;
+}
+
void dce110_tg_wait_for_state(struct timing_generator *tg,
enum crtc_state state)
{
@@ -2109,70 +2127,131 @@ bool dce110_configure_crc(struct timing_generator *tg,
cntl_addr = CRTC_REG(mmCRTC_CRC_CNTL);
- /* First, disable CRC before we configure it. */
- dm_write_reg(tg->ctx, cntl_addr, 0);
+ if (!params->enable || params->reset)
+ /* First, disable CRC before we configure it. */
+ dm_write_reg(tg->ctx, cntl_addr, 0);
if (!params->enable)
return true;
/* Program frame boundaries */
- /* Window A x axis start and end. */
- value = 0;
- addr = CRTC_REG(mmCRTC_CRC0_WINDOWA_X_CONTROL);
- set_reg_field_value(value, params->windowa_x_start,
- CRTC_CRC0_WINDOWA_X_CONTROL,
- CRTC_CRC0_WINDOWA_X_START);
- set_reg_field_value(value, params->windowa_x_end,
- CRTC_CRC0_WINDOWA_X_CONTROL,
- CRTC_CRC0_WINDOWA_X_END);
- dm_write_reg(tg->ctx, addr, value);
-
- /* Window A y axis start and end. */
- value = 0;
- addr = CRTC_REG(mmCRTC_CRC0_WINDOWA_Y_CONTROL);
- set_reg_field_value(value, params->windowa_y_start,
- CRTC_CRC0_WINDOWA_Y_CONTROL,
- CRTC_CRC0_WINDOWA_Y_START);
- set_reg_field_value(value, params->windowa_y_end,
- CRTC_CRC0_WINDOWA_Y_CONTROL,
- CRTC_CRC0_WINDOWA_Y_END);
- dm_write_reg(tg->ctx, addr, value);
-
- /* Window B x axis start and end. */
- value = 0;
- addr = CRTC_REG(mmCRTC_CRC0_WINDOWB_X_CONTROL);
- set_reg_field_value(value, params->windowb_x_start,
- CRTC_CRC0_WINDOWB_X_CONTROL,
- CRTC_CRC0_WINDOWB_X_START);
- set_reg_field_value(value, params->windowb_x_end,
- CRTC_CRC0_WINDOWB_X_CONTROL,
- CRTC_CRC0_WINDOWB_X_END);
- dm_write_reg(tg->ctx, addr, value);
-
- /* Window B y axis start and end. */
- value = 0;
- addr = CRTC_REG(mmCRTC_CRC0_WINDOWB_Y_CONTROL);
- set_reg_field_value(value, params->windowb_y_start,
- CRTC_CRC0_WINDOWB_Y_CONTROL,
- CRTC_CRC0_WINDOWB_Y_START);
- set_reg_field_value(value, params->windowb_y_end,
- CRTC_CRC0_WINDOWB_Y_CONTROL,
- CRTC_CRC0_WINDOWB_Y_END);
- dm_write_reg(tg->ctx, addr, value);
-
- /* Set crc mode and selection, and enable. Only using CRC0*/
- value = 0;
- set_reg_field_value(value, params->continuous_mode ? 1 : 0,
- CRTC_CRC_CNTL, CRTC_CRC_CONT_EN);
- set_reg_field_value(value, params->selection,
- CRTC_CRC_CNTL, CRTC_CRC0_SELECT);
- set_reg_field_value(value, 1, CRTC_CRC_CNTL, CRTC_CRC_EN);
- dm_write_reg(tg->ctx, cntl_addr, value);
+ switch (params->crc_eng_inst) {
+ case 0:
+ /* Window A x axis start and end. */
+ value = 0;
+ addr = CRTC_REG(mmCRTC_CRC0_WINDOWA_X_CONTROL);
+ set_reg_field_value(value, params->windowa_x_start,
+ CRTC_CRC0_WINDOWA_X_CONTROL,
+ CRTC_CRC0_WINDOWA_X_START);
+ set_reg_field_value(value, params->windowa_x_end,
+ CRTC_CRC0_WINDOWA_X_CONTROL,
+ CRTC_CRC0_WINDOWA_X_END);
+ dm_write_reg(tg->ctx, addr, value);
+
+ /* Window A y axis start and end. */
+ value = 0;
+ addr = CRTC_REG(mmCRTC_CRC0_WINDOWA_Y_CONTROL);
+ set_reg_field_value(value, params->windowa_y_start,
+ CRTC_CRC0_WINDOWA_Y_CONTROL,
+ CRTC_CRC0_WINDOWA_Y_START);
+ set_reg_field_value(value, params->windowa_y_end,
+ CRTC_CRC0_WINDOWA_Y_CONTROL,
+ CRTC_CRC0_WINDOWA_Y_END);
+ dm_write_reg(tg->ctx, addr, value);
+
+ /* Window B x axis start and end. */
+ value = 0;
+ addr = CRTC_REG(mmCRTC_CRC0_WINDOWB_X_CONTROL);
+ set_reg_field_value(value, params->windowb_x_start,
+ CRTC_CRC0_WINDOWB_X_CONTROL,
+ CRTC_CRC0_WINDOWB_X_START);
+ set_reg_field_value(value, params->windowb_x_end,
+ CRTC_CRC0_WINDOWB_X_CONTROL,
+ CRTC_CRC0_WINDOWB_X_END);
+ dm_write_reg(tg->ctx, addr, value);
+
+ /* Window B y axis start and end. */
+ value = 0;
+ addr = CRTC_REG(mmCRTC_CRC0_WINDOWB_Y_CONTROL);
+ set_reg_field_value(value, params->windowb_y_start,
+ CRTC_CRC0_WINDOWB_Y_CONTROL,
+ CRTC_CRC0_WINDOWB_Y_START);
+ set_reg_field_value(value, params->windowb_y_end,
+ CRTC_CRC0_WINDOWB_Y_CONTROL,
+ CRTC_CRC0_WINDOWB_Y_END);
+ dm_write_reg(tg->ctx, addr, value);
+
+ /* Set crc mode and selection, and enable.*/
+ value = 0;
+ set_reg_field_value(value, params->continuous_mode ? 1 : 0,
+ CRTC_CRC_CNTL, CRTC_CRC_CONT_EN);
+ set_reg_field_value(value, params->selection,
+ CRTC_CRC_CNTL, CRTC_CRC0_SELECT);
+ set_reg_field_value(value, 1, CRTC_CRC_CNTL, CRTC_CRC_EN);
+ dm_write_reg(tg->ctx, cntl_addr, value);
+ break;
+ case 1:
+ /* Window A x axis start and end. */
+ value = 0;
+ addr = CRTC_REG(mmCRTC_CRC1_WINDOWA_X_CONTROL);
+ set_reg_field_value(value, params->windowa_x_start,
+ CRTC_CRC1_WINDOWA_X_CONTROL,
+ CRTC_CRC1_WINDOWA_X_START);
+ set_reg_field_value(value, params->windowa_x_end,
+ CRTC_CRC1_WINDOWA_X_CONTROL,
+ CRTC_CRC1_WINDOWA_X_END);
+ dm_write_reg(tg->ctx, addr, value);
+
+ /* Window A y axis start and end. */
+ value = 0;
+ addr = CRTC_REG(mmCRTC_CRC1_WINDOWA_Y_CONTROL);
+ set_reg_field_value(value, params->windowa_y_start,
+ CRTC_CRC1_WINDOWA_Y_CONTROL,
+ CRTC_CRC1_WINDOWA_Y_START);
+ set_reg_field_value(value, params->windowa_y_end,
+ CRTC_CRC1_WINDOWA_Y_CONTROL,
+ CRTC_CRC1_WINDOWA_Y_END);
+ dm_write_reg(tg->ctx, addr, value);
+
+ /* Window B x axis start and end. */
+ value = 0;
+ addr = CRTC_REG(mmCRTC_CRC1_WINDOWB_X_CONTROL);
+ set_reg_field_value(value, params->windowb_x_start,
+ CRTC_CRC1_WINDOWB_X_CONTROL,
+ CRTC_CRC1_WINDOWB_X_START);
+ set_reg_field_value(value, params->windowb_x_end,
+ CRTC_CRC1_WINDOWB_X_CONTROL,
+ CRTC_CRC1_WINDOWB_X_END);
+ dm_write_reg(tg->ctx, addr, value);
+
+ /* Window B y axis start and end. */
+ value = 0;
+ addr = CRTC_REG(mmCRTC_CRC1_WINDOWB_Y_CONTROL);
+ set_reg_field_value(value, params->windowb_y_start,
+ CRTC_CRC1_WINDOWB_Y_CONTROL,
+ CRTC_CRC1_WINDOWB_Y_START);
+ set_reg_field_value(value, params->windowb_y_end,
+ CRTC_CRC1_WINDOWB_Y_CONTROL,
+ CRTC_CRC1_WINDOWB_Y_END);
+ dm_write_reg(tg->ctx, addr, value);
+
+ /* Set crc mode and selection, and enable.*/
+ value = 0;
+ set_reg_field_value(value, params->continuous_mode ? 1 : 0,
+ CRTC_CRC_CNTL, CRTC_CRC_CONT_EN);
+ set_reg_field_value(value, params->selection,
+ CRTC_CRC_CNTL, CRTC_CRC1_SELECT);
+ set_reg_field_value(value, 1, CRTC_CRC_CNTL, CRTC_CRC_EN);
+ dm_write_reg(tg->ctx, cntl_addr, value);
+ break;
+ default:
+ return false;
+ }
return true;
}
-bool dce110_get_crc(struct timing_generator *tg,
+bool dce110_get_crc(struct timing_generator *tg, uint8_t idx,
uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb)
{
uint32_t addr = 0;
@@ -2188,14 +2267,30 @@ bool dce110_get_crc(struct timing_generator *tg,
if (!field)
return false;
- addr = CRTC_REG(mmCRTC_CRC0_DATA_RG);
- value = dm_read_reg(tg->ctx, addr);
- *r_cr = get_reg_field_value(value, CRTC_CRC0_DATA_RG, CRC0_R_CR);
- *g_y = get_reg_field_value(value, CRTC_CRC0_DATA_RG, CRC0_G_Y);
+ switch (idx) {
+ case 0:
+ addr = CRTC_REG(mmCRTC_CRC0_DATA_RG);
+ value = dm_read_reg(tg->ctx, addr);
+ *r_cr = get_reg_field_value(value, CRTC_CRC0_DATA_RG, CRC0_R_CR);
+ *g_y = get_reg_field_value(value, CRTC_CRC0_DATA_RG, CRC0_G_Y);
- addr = CRTC_REG(mmCRTC_CRC0_DATA_B);
- value = dm_read_reg(tg->ctx, addr);
- *b_cb = get_reg_field_value(value, CRTC_CRC0_DATA_B, CRC0_B_CB);
+ addr = CRTC_REG(mmCRTC_CRC0_DATA_B);
+ value = dm_read_reg(tg->ctx, addr);
+ *b_cb = get_reg_field_value(value, CRTC_CRC0_DATA_B, CRC0_B_CB);
+ break;
+ case 1:
+ addr = CRTC_REG(mmCRTC_CRC1_DATA_RG);
+ value = dm_read_reg(tg->ctx, addr);
+ *r_cr = get_reg_field_value(value, CRTC_CRC1_DATA_RG, CRC1_R_CR);
+ *g_y = get_reg_field_value(value, CRTC_CRC1_DATA_RG, CRC1_G_Y);
+
+ addr = CRTC_REG(mmCRTC_CRC1_DATA_B);
+ value = dm_read_reg(tg->ctx, addr);
+ *b_cb = get_reg_field_value(value, CRTC_CRC1_DATA_B, CRC1_B_CB);
+ break;
+ default:
+ return false;
+ }
return true;
}
@@ -2239,6 +2334,7 @@ static const struct timing_generator_funcs dce110_tg_funcs = {
.is_tg_enabled = dce110_is_tg_enabled,
.configure_crc = dce110_configure_crc,
.get_crc = dce110_get_crc,
+ .is_two_pixels_per_container = dce110_is_two_pixels_per_container,
};
void dce110_timing_generator_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h
index d8a5ed7b485d..e4f5cad64f32 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h
@@ -261,6 +261,7 @@ void dce110_tg_program_timing(struct timing_generator *tg,
int vstartup_start,
int vupdate_offset,
int vupdate_width,
+ int pstate_keepout,
const enum signal_type signal,
bool use_vbios);
@@ -285,7 +286,9 @@ bool dce110_arm_vert_intr(
bool dce110_configure_crc(struct timing_generator *tg,
const struct crc_params *params);
-bool dce110_get_crc(struct timing_generator *tg,
+bool dce110_get_crc(struct timing_generator *tg, uint8_t idx,
uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb);
+bool dce110_is_two_pixels_per_container(const struct dc_crtc_timing *timing);
+
#endif /* __DC_TIMING_GENERATOR_DCE110_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c
index c509384fff54..9837dec837ff 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c
@@ -438,6 +438,7 @@ static void dce110_timing_generator_v_program_timing(struct timing_generator *tg
int vstartup_start,
int vupdate_offset,
int vupdate_width,
+ int pstate_keepout,
const enum signal_type signal,
bool use_vbios)
{
@@ -682,7 +683,8 @@ static const struct timing_generator_funcs dce110_tg_v_funcs = {
.tear_down_global_swap_lock =
dce110_timing_generator_v_tear_down_global_swap_lock,
.enable_advanced_request =
- dce110_timing_generator_v_enable_advanced_request
+ dce110_timing_generator_v_enable_advanced_request,
+ .is_two_pixels_per_container = dce110_is_two_pixels_per_container,
};
void dce110_timing_generator_v_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_transform_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_transform_v.c
index 45bca0db5e5e..28d3b2663cd3 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_transform_v.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_transform_v.c
@@ -23,8 +23,6 @@
*
*/
-#include <linux/delay.h>
-
#include "dce110_transform_v.h"
#include "dm_services.h"
#include "dc.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/Makefile b/drivers/gpu/drm/amd/display/dc/dce112/Makefile
index 9de6501702d2..683866797709 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce112/Makefile
@@ -23,10 +23,9 @@
# Makefile for the 'controller' sub-component of DAL.
# It provides the control and status of HW CRTC block.
-CFLAGS_$(AMDDALPATH)/dc/dce112/dce112_resource.o = $(call cc-disable-warning, override-init)
+CFLAGS_$(AMDDALPATH)/dc/dce112/dce112_resource.o = -Wno-override-init
-DCE112 = dce112_compressor.o dce112_hw_sequencer.o \
-dce112_resource.o
+DCE112 = dce112_compressor.o
AMD_DAL_DCE112 = $(addprefix $(AMDDALPATH)/dc/dce112/,$(DCE112))
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_compressor.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_compressor.c
index 51cb45d8b9ab..faae12cf7968 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_compressor.c
+++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_compressor.c
@@ -23,9 +23,6 @@
*
*/
-#include <linux/delay.h>
-#include <linux/slab.h>
-
#include "dm_services.h"
#include "dce/dce_11_2_d.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_hw_sequencer.c
deleted file mode 100644
index 19873ee1f78d..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_hw_sequencer.c
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright 2015 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#include "dm_services.h"
-#include "dc.h"
-#include "core_types.h"
-#include "dce112_hw_sequencer.h"
-
-#include "dce110/dce110_hw_sequencer.h"
-
-/* include DCE11.2 register header files */
-#include "dce/dce_11_2_d.h"
-#include "dce/dce_11_2_sh_mask.h"
-
-struct dce112_hw_seq_reg_offsets {
- uint32_t crtc;
-};
-
-
-static const struct dce112_hw_seq_reg_offsets reg_offsets[] = {
-{
- .crtc = (mmCRTC0_CRTC_GSL_CONTROL - mmCRTC_GSL_CONTROL),
-},
-{
- .crtc = (mmCRTC1_CRTC_GSL_CONTROL - mmCRTC_GSL_CONTROL),
-},
-{
- .crtc = (mmCRTC2_CRTC_GSL_CONTROL - mmCRTC_GSL_CONTROL),
-},
-{
- .crtc = (mmCRTC3_CRTC_GSL_CONTROL - mmCRTC_GSL_CONTROL),
-},
-{
- .crtc = (mmCRTC4_CRTC_GSL_CONTROL - mmCRTC_GSL_CONTROL),
-},
-{
- .crtc = (mmCRTC5_CRTC_GSL_CONTROL - mmCRTC_GSL_CONTROL),
-}
-};
-#define HW_REG_CRTC(reg, id)\
- (reg + reg_offsets[id].crtc)
-
-/*******************************************************************************
- * Private definitions
- ******************************************************************************/
-
-static void dce112_init_pte(struct dc_context *ctx)
-{
- uint32_t addr;
- uint32_t value = 0;
- uint32_t chunk_int = 0;
- uint32_t chunk_mul = 0;
-
- addr = mmDVMM_PTE_REQ;
- value = dm_read_reg(ctx, addr);
-
- chunk_int = get_reg_field_value(
- value,
- DVMM_PTE_REQ,
- HFLIP_PTEREQ_PER_CHUNK_INT);
-
- chunk_mul = get_reg_field_value(
- value,
- DVMM_PTE_REQ,
- HFLIP_PTEREQ_PER_CHUNK_MULTIPLIER);
-
- if (chunk_int != 0x4 || chunk_mul != 0x4) {
-
- set_reg_field_value(
- value,
- 255,
- DVMM_PTE_REQ,
- MAX_PTEREQ_TO_ISSUE);
-
- set_reg_field_value(
- value,
- 4,
- DVMM_PTE_REQ,
- HFLIP_PTEREQ_PER_CHUNK_INT);
-
- set_reg_field_value(
- value,
- 4,
- DVMM_PTE_REQ,
- HFLIP_PTEREQ_PER_CHUNK_MULTIPLIER);
-
- dm_write_reg(ctx, addr, value);
- }
-}
-
-static bool dce112_enable_display_power_gating(
- struct dc *dc,
- uint8_t controller_id,
- struct dc_bios *dcb,
- enum pipe_gating_control power_gating)
-{
- enum bp_result bp_result = BP_RESULT_OK;
- enum bp_pipe_control_action cntl;
- struct dc_context *ctx = dc->ctx;
-
- if (IS_FPGA_MAXIMUS_DC(ctx->dce_environment))
- return true;
-
- if (power_gating == PIPE_GATING_CONTROL_INIT)
- cntl = ASIC_PIPE_INIT;
- else if (power_gating == PIPE_GATING_CONTROL_ENABLE)
- cntl = ASIC_PIPE_ENABLE;
- else
- cntl = ASIC_PIPE_DISABLE;
-
- if (power_gating != PIPE_GATING_CONTROL_INIT || controller_id == 0){
-
- bp_result = dcb->funcs->enable_disp_power_gating(
- dcb, controller_id + 1, cntl);
-
- /* Revert MASTER_UPDATE_MODE to 0 because bios sets it 2
- * by default when command table is called
- */
- dm_write_reg(ctx,
- HW_REG_CRTC(mmCRTC_MASTER_UPDATE_MODE, controller_id),
- 0);
- }
-
- if (power_gating != PIPE_GATING_CONTROL_ENABLE)
- dce112_init_pte(ctx);
-
- if (bp_result == BP_RESULT_OK)
- return true;
- else
- return false;
-}
-
-void dce112_hw_sequencer_construct(struct dc *dc)
-{
- /* All registers used by dce11.2 match those in dce11 in offset and
- * structure
- */
- dce110_hw_sequencer_construct(dc);
- dc->hwseq->funcs.enable_display_power_gating = dce112_enable_display_power_gating;
-}
-
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/Makefile b/drivers/gpu/drm/amd/display/dc/dce120/Makefile
index a9cc4b73270b..8f508e662748 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce120/Makefile
@@ -24,10 +24,9 @@
# It provides the control and status of HW CRTC block.
-CFLAGS_$(AMDDALPATH)/dc/dce120/dce120_resource.o = $(call cc-disable-warning, override-init)
+CFLAGS_$(AMDDALPATH)/dc/dce120/dce120_resource.o = -Wno-override-init
-DCE120 = dce120_resource.o dce120_timing_generator.o \
-dce120_hw_sequencer.o
+DCE120 = dce120_timing_generator.o
AMD_DAL_DCE120 = $(addprefix $(AMDDALPATH)/dc/dce120/,$(DCE120))
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.c
deleted file mode 100644
index d4afe6c824d2..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.c
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * Copyright 2015 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#include "dm_services.h"
-#include "dc.h"
-#include "core_types.h"
-#include "dce120_hw_sequencer.h"
-#include "dce/dce_hwseq.h"
-
-#include "dce110/dce110_hw_sequencer.h"
-
-#include "dce/dce_12_0_offset.h"
-#include "dce/dce_12_0_sh_mask.h"
-#include "soc15_hw_ip.h"
-#include "vega10_ip_offset.h"
-#include "reg_helper.h"
-
-#define CTX \
- hws->ctx
-#define REG(reg)\
- hws->regs->reg
-
-#undef FN
-#define FN(reg_name, field_name) \
- hws->shifts->field_name, hws->masks->field_name
-
-struct dce120_hw_seq_reg_offsets {
- uint32_t crtc;
-};
-
-#if 0
-static const struct dce120_hw_seq_reg_offsets reg_offsets[] = {
-{
- .crtc = (mmCRTC0_CRTC_GSL_CONTROL - mmCRTC0_CRTC_GSL_CONTROL),
-},
-{
- .crtc = (mmCRTC1_CRTC_GSL_CONTROL - mmCRTC0_CRTC_GSL_CONTROL),
-},
-{
- .crtc = (mmCRTC2_CRTC_GSL_CONTROL - mmCRTC0_CRTC_GSL_CONTROL),
-},
-{
- .crtc = (mmCRTC3_CRTC_GSL_CONTROL - mmCRTC0_CRTC_GSL_CONTROL),
-},
-{
- .crtc = (mmCRTC4_CRTC_GSL_CONTROL - mmCRTC0_CRTC_GSL_CONTROL),
-},
-{
- .crtc = (mmCRTC5_CRTC_GSL_CONTROL - mmCRTC0_CRTC_GSL_CONTROL),
-}
-};
-
-#define HW_REG_CRTC(reg, id)\
- (reg + reg_offsets[id].crtc)
-
-#define CNTL_ID(controller_id)\
- controller_id
-/*******************************************************************************
- * Private definitions
- ******************************************************************************/
-static void dce120_init_pte(struct dc_context *ctx, uint8_t controller_id)
-{
- uint32_t addr;
- uint32_t value = 0;
- uint32_t chunk_int = 0;
- uint32_t chunk_mul = 0;
-/*
- addr = mmDCP0_DVMM_PTE_CONTROL + controller_id *
- (mmDCP1_DVMM_PTE_CONTROL- mmDCP0_DVMM_PTE_CONTROL);
-
- value = dm_read_reg(ctx, addr);
-
- set_reg_field_value(
- value, 0, DCP, controller_id,
- DVMM_PTE_CONTROL,
- DVMM_USE_SINGLE_PTE);
-
- set_reg_field_value_soc15(
- value, 1, DCP, controller_id,
- DVMM_PTE_CONTROL,
- DVMM_PTE_BUFFER_MODE0);
-
- set_reg_field_value_soc15(
- value, 1, DCP, controller_id,
- DVMM_PTE_CONTROL,
- DVMM_PTE_BUFFER_MODE1);
-
- dm_write_reg(ctx, addr, value);*/
-
- addr = mmDVMM_PTE_REQ;
- value = dm_read_reg(ctx, addr);
-
- chunk_int = get_reg_field_value(
- value,
- DVMM_PTE_REQ,
- HFLIP_PTEREQ_PER_CHUNK_INT);
-
- chunk_mul = get_reg_field_value(
- value,
- DVMM_PTE_REQ,
- HFLIP_PTEREQ_PER_CHUNK_MULTIPLIER);
-
- if (chunk_int != 0x4 || chunk_mul != 0x4) {
-
- set_reg_field_value(
- value,
- 255,
- DVMM_PTE_REQ,
- MAX_PTEREQ_TO_ISSUE);
-
- set_reg_field_value(
- value,
- 4,
- DVMM_PTE_REQ,
- HFLIP_PTEREQ_PER_CHUNK_INT);
-
- set_reg_field_value(
- value,
- 4,
- DVMM_PTE_REQ,
- HFLIP_PTEREQ_PER_CHUNK_MULTIPLIER);
-
- dm_write_reg(ctx, addr, value);
- }
-}
-#endif
-
-static bool dce120_enable_display_power_gating(
- struct dc *dc,
- uint8_t controller_id,
- struct dc_bios *dcb,
- enum pipe_gating_control power_gating)
-{
- /* disable for bringup */
-#if 0
- enum bp_result bp_result = BP_RESULT_OK;
- enum bp_pipe_control_action cntl;
- struct dc_context *ctx = dc->ctx;
-
- if (IS_FPGA_MAXIMUS_DC(ctx->dce_environment))
- return true;
-
- if (power_gating == PIPE_GATING_CONTROL_INIT)
- cntl = ASIC_PIPE_INIT;
- else if (power_gating == PIPE_GATING_CONTROL_ENABLE)
- cntl = ASIC_PIPE_ENABLE;
- else
- cntl = ASIC_PIPE_DISABLE;
-
- if (power_gating != PIPE_GATING_CONTROL_INIT || controller_id == 0) {
-
- bp_result = dcb->funcs->enable_disp_power_gating(
- dcb, controller_id + 1, cntl);
-
- /* Revert MASTER_UPDATE_MODE to 0 because bios sets it 2
- * by default when command table is called
- */
- dm_write_reg(ctx,
- HW_REG_CRTC(mmCRTC0_CRTC_MASTER_UPDATE_MODE, controller_id),
- 0);
- }
-
- if (power_gating != PIPE_GATING_CONTROL_ENABLE)
- dce120_init_pte(ctx, controller_id);
-
- if (bp_result == BP_RESULT_OK)
- return true;
- else
- return false;
-#endif
- return false;
-}
-
-static void dce120_update_dchub(
- struct dce_hwseq *hws,
- struct dchub_init_data *dh_data)
-{
- /* TODO: port code from dal2 */
- switch (dh_data->fb_mode) {
- case FRAME_BUFFER_MODE_ZFB_ONLY:
- /*For ZFB case need to put DCHUB FB BASE and TOP upside down to indicate ZFB mode*/
- REG_UPDATE_2(DCHUB_FB_LOCATION,
- FB_TOP, 0,
- FB_BASE, 0x0FFFF);
-
- REG_UPDATE(DCHUB_AGP_BASE,
- AGP_BASE, dh_data->zfb_phys_addr_base >> 22);
-
- REG_UPDATE(DCHUB_AGP_BOT,
- AGP_BOT, dh_data->zfb_mc_base_addr >> 22);
-
- REG_UPDATE(DCHUB_AGP_TOP,
- AGP_TOP, (dh_data->zfb_mc_base_addr + dh_data->zfb_size_in_byte - 1) >> 22);
- break;
- case FRAME_BUFFER_MODE_MIXED_ZFB_AND_LOCAL:
- /*Should not touch FB LOCATION (done by VBIOS on AsicInit table)*/
- REG_UPDATE(DCHUB_AGP_BASE,
- AGP_BASE, dh_data->zfb_phys_addr_base >> 22);
-
- REG_UPDATE(DCHUB_AGP_BOT,
- AGP_BOT, dh_data->zfb_mc_base_addr >> 22);
-
- REG_UPDATE(DCHUB_AGP_TOP,
- AGP_TOP, (dh_data->zfb_mc_base_addr + dh_data->zfb_size_in_byte - 1) >> 22);
- break;
- case FRAME_BUFFER_MODE_LOCAL_ONLY:
- /*Should not touch FB LOCATION (done by VBIOS on AsicInit table)*/
- REG_UPDATE(DCHUB_AGP_BASE,
- AGP_BASE, 0);
-
- REG_UPDATE(DCHUB_AGP_BOT,
- AGP_BOT, 0x03FFFF);
-
- REG_UPDATE(DCHUB_AGP_TOP,
- AGP_TOP, 0);
- break;
- default:
- break;
- }
-
- dh_data->dchub_initialzied = true;
- dh_data->dchub_info_valid = false;
-}
-
-/**
- * dce121_xgmi_enabled() - Check if xGMI is enabled
- * @hws: DCE hardware sequencer object
- *
- * Return true if xGMI is enabled. False otherwise.
- */
-bool dce121_xgmi_enabled(struct dce_hwseq *hws)
-{
- uint32_t pf_max_region;
-
- REG_GET(MC_VM_XGMI_LFB_CNTL, PF_MAX_REGION, &pf_max_region);
- /* PF_MAX_REGION == 0 means xgmi is disabled */
- return !!pf_max_region;
-}
-
-void dce120_hw_sequencer_construct(struct dc *dc)
-{
- /* All registers used by dce11.2 match those in dce11 in offset and
- * structure
- */
- dce110_hw_sequencer_construct(dc);
- dc->hwseq->funcs.enable_display_power_gating = dce120_enable_display_power_gating;
- dc->hwss.update_dchub = dce120_update_dchub;
-}
-
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
index 4af0c70098c4..31c4f44ceaac 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
@@ -697,6 +697,7 @@ static void dce120_tg_program_timing(struct timing_generator *tg,
int vstartup_start,
int vupdate_offset,
int vupdate_width,
+ int pstate_keepout,
const enum signal_type signal,
bool use_vbios)
{
@@ -1099,45 +1100,79 @@ static bool dce120_configure_crc(struct timing_generator *tg,
if (!dce120_is_tg_enabled(tg))
return false;
- /* First, disable CRC before we configure it. */
- dm_write_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC_CNTL,
- tg110->offsets.crtc, 0);
+ if (!params->enable || params->reset)
+ /* First, disable CRC before we configure it. */
+ dm_write_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC_CNTL,
+ tg110->offsets.crtc, 0);
if (!params->enable)
return true;
/* Program frame boundaries */
- /* Window A x axis start and end. */
- CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_X_CONTROL,
- CRTC_CRC0_WINDOWA_X_START, params->windowa_x_start,
- CRTC_CRC0_WINDOWA_X_END, params->windowa_x_end);
-
- /* Window A y axis start and end. */
- CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_Y_CONTROL,
- CRTC_CRC0_WINDOWA_Y_START, params->windowa_y_start,
- CRTC_CRC0_WINDOWA_Y_END, params->windowa_y_end);
-
- /* Window B x axis start and end. */
- CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_X_CONTROL,
- CRTC_CRC0_WINDOWB_X_START, params->windowb_x_start,
- CRTC_CRC0_WINDOWB_X_END, params->windowb_x_end);
-
- /* Window B y axis start and end. */
- CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_Y_CONTROL,
- CRTC_CRC0_WINDOWB_Y_START, params->windowb_y_start,
- CRTC_CRC0_WINDOWB_Y_END, params->windowb_y_end);
-
- /* Set crc mode and selection, and enable. Only using CRC0*/
- CRTC_REG_UPDATE_3(CRTC0_CRTC_CRC_CNTL,
- CRTC_CRC_EN, params->continuous_mode ? 1 : 0,
- CRTC_CRC0_SELECT, params->selection,
- CRTC_CRC_EN, 1);
+ switch (params->crc_eng_inst) {
+ case 0:
+ /* Window A x axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_X_CONTROL,
+ CRTC_CRC0_WINDOWA_X_START, params->windowa_x_start,
+ CRTC_CRC0_WINDOWA_X_END, params->windowa_x_end);
+
+ /* Window A y axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_Y_CONTROL,
+ CRTC_CRC0_WINDOWA_Y_START, params->windowa_y_start,
+ CRTC_CRC0_WINDOWA_Y_END, params->windowa_y_end);
+
+ /* Window B x axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_X_CONTROL,
+ CRTC_CRC0_WINDOWB_X_START, params->windowb_x_start,
+ CRTC_CRC0_WINDOWB_X_END, params->windowb_x_end);
+
+ /* Window B y axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_Y_CONTROL,
+ CRTC_CRC0_WINDOWB_Y_START, params->windowb_y_start,
+ CRTC_CRC0_WINDOWB_Y_END, params->windowb_y_end);
+
+ /* Set crc mode and selection, and enable.*/
+ CRTC_REG_UPDATE_3(CRTC0_CRTC_CRC_CNTL,
+ CRTC_CRC_CONT_EN, params->continuous_mode ? 1 : 0,
+ CRTC_CRC0_SELECT, params->selection,
+ CRTC_CRC_EN, 1);
+ break;
+ case 1:
+ /* Window A x axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC1_WINDOWA_X_CONTROL,
+ CRTC_CRC1_WINDOWA_X_START, params->windowa_x_start,
+ CRTC_CRC1_WINDOWA_X_END, params->windowa_x_end);
+
+ /* Window A y axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC1_WINDOWA_Y_CONTROL,
+ CRTC_CRC1_WINDOWA_Y_START, params->windowa_y_start,
+ CRTC_CRC1_WINDOWA_Y_END, params->windowa_y_end);
+
+ /* Window B x axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC1_WINDOWB_X_CONTROL,
+ CRTC_CRC1_WINDOWB_X_START, params->windowb_x_start,
+ CRTC_CRC1_WINDOWB_X_END, params->windowb_x_end);
+
+ /* Window B y axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC1_WINDOWB_Y_CONTROL,
+ CRTC_CRC1_WINDOWB_Y_START, params->windowb_y_start,
+ CRTC_CRC1_WINDOWB_Y_END, params->windowb_y_end);
+
+ /* Set crc mode and selection, and enable */
+ CRTC_REG_UPDATE_3(CRTC0_CRTC_CRC_CNTL,
+ CRTC_CRC_CONT_EN, params->continuous_mode ? 1 : 0,
+ CRTC_CRC1_SELECT, params->selection,
+ CRTC_CRC_EN, 1);
+ break;
+ default:
+ return false;
+ }
return true;
}
-static bool dce120_get_crc(struct timing_generator *tg, uint32_t *r_cr,
- uint32_t *g_y, uint32_t *b_cb)
+static bool dce120_get_crc(struct timing_generator *tg, uint8_t idx,
+ uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb)
{
struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg);
uint32_t value, field;
@@ -1150,14 +1185,30 @@ static bool dce120_get_crc(struct timing_generator *tg, uint32_t *r_cr,
if (!field)
return false;
- value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_RG,
- tg110->offsets.crtc);
- *r_cr = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_R_CR);
- *g_y = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_G_Y);
+ switch (idx) {
+ case 0:
+ value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_RG,
+ tg110->offsets.crtc);
+ *r_cr = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_R_CR);
+ *g_y = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_G_Y);
- value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_B,
- tg110->offsets.crtc);
- *b_cb = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_B, CRC0_B_CB);
+ value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_B,
+ tg110->offsets.crtc);
+ *b_cb = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_B, CRC0_B_CB);
+ break;
+ case 1:
+ value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC1_DATA_RG,
+ tg110->offsets.crtc);
+ *r_cr = get_reg_field_value(value, CRTC0_CRTC_CRC1_DATA_RG, CRC1_R_CR);
+ *g_y = get_reg_field_value(value, CRTC0_CRTC_CRC1_DATA_RG, CRC1_G_Y);
+
+ value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC1_DATA_B,
+ tg110->offsets.crtc);
+ *b_cb = get_reg_field_value(value, CRTC0_CRTC_CRC1_DATA_B, CRC1_B_CB);
+ break;
+ default:
+ return false;
+ }
return true;
}
@@ -1197,6 +1248,7 @@ static const struct timing_generator_funcs dce120_tg_funcs = {
.is_tg_enabled = dce120_is_tg_enabled,
.configure_crc = dce120_configure_crc,
.get_crc = dce120_get_crc,
+ .is_two_pixels_per_container = dce110_is_two_pixels_per_container,
};
diff --git a/drivers/gpu/drm/amd/display/dc/dce60/Makefile b/drivers/gpu/drm/amd/display/dc/dce60/Makefile
index dda596fa1cd7..824f73eb3326 100644
--- a/drivers/gpu/drm/amd/display/dc/dce60/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce60/Makefile
@@ -23,10 +23,9 @@
# Makefile for the 'controller' sub-component of DAL.
# It provides the control and status of HW CRTC block.
-CFLAGS_AMDDALPATH)/dc/dce60/dce60_resource.o = $(call cc-disable-warning, override-init)
+CFLAGS_$(AMDDALPATH)/dc/dce60/dce60_resource.o = -Wno-override-init
-DCE60 = dce60_timing_generator.o dce60_hw_sequencer.o \
- dce60_resource.o
+DCE60 = dce60_timing_generator.o
AMD_DAL_DCE60 = $(addprefix $(AMDDALPATH)/dc/dce60/,$(DCE60))
diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce60/dce60_hw_sequencer.c
deleted file mode 100644
index 920c7ae29d53..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_hw_sequencer.c
+++ /dev/null
@@ -1,432 +0,0 @@
-/*
- * Copyright 2020 Mauro Rossi <issor.oruam@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#include "dm_services.h"
-#include "dc.h"
-#include "core_types.h"
-#include "dce60_hw_sequencer.h"
-
-#include "dce/dce_hwseq.h"
-#include "dce110/dce110_hw_sequencer.h"
-#include "dce100/dce100_hw_sequencer.h"
-
-/* include DCE6 register header files */
-#include "dce/dce_6_0_d.h"
-#include "dce/dce_6_0_sh_mask.h"
-
-#define DC_LOGGER_INIT()
-
-/*******************************************************************************
- * Private definitions
- ******************************************************************************/
-
-/***************************PIPE_CONTROL***********************************/
-
-/*
- * Check if FBC can be enabled
- */
-static bool dce60_should_enable_fbc(struct dc *dc,
- struct dc_state *context,
- uint32_t *pipe_idx)
-{
- uint32_t i;
- struct pipe_ctx *pipe_ctx = NULL;
- struct resource_context *res_ctx = &context->res_ctx;
- unsigned int underlay_idx = dc->res_pool->underlay_pipe_index;
-
-
- ASSERT(dc->fbc_compressor);
-
- /* FBC memory should be allocated */
- if (!dc->ctx->fbc_gpu_addr)
- return false;
-
- /* Only supports single display */
- if (context->stream_count != 1)
- return false;
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- if (res_ctx->pipe_ctx[i].stream) {
-
- pipe_ctx = &res_ctx->pipe_ctx[i];
-
- if (!pipe_ctx)
- continue;
-
- /* fbc not applicable on underlay pipe */
- if (pipe_ctx->pipe_idx != underlay_idx) {
- *pipe_idx = i;
- break;
- }
- }
- }
-
- if (i == dc->res_pool->pipe_count)
- return false;
-
- if (!pipe_ctx->stream->link)
- return false;
-
- /* Only supports eDP */
- if (pipe_ctx->stream->link->connector_signal != SIGNAL_TYPE_EDP)
- return false;
-
- /* PSR should not be enabled */
- if (pipe_ctx->stream->link->psr_settings.psr_feature_enabled)
- return false;
-
- /* Nothing to compress */
- if (!pipe_ctx->plane_state)
- return false;
-
- /* Only for non-linear tiling */
- if (pipe_ctx->plane_state->tiling_info.gfx8.array_mode == DC_ARRAY_LINEAR_GENERAL)
- return false;
-
- return true;
-}
-
-/*
- * Enable FBC
- */
-static void dce60_enable_fbc(
- struct dc *dc,
- struct dc_state *context)
-{
- uint32_t pipe_idx = 0;
-
- if (dce60_should_enable_fbc(dc, context, &pipe_idx)) {
- /* Program GRPH COMPRESSED ADDRESS and PITCH */
- struct compr_addr_and_pitch_params params = {0, 0, 0};
- struct compressor *compr = dc->fbc_compressor;
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[pipe_idx];
-
- params.source_view_width = pipe_ctx->stream->timing.h_addressable;
- params.source_view_height = pipe_ctx->stream->timing.v_addressable;
- params.inst = pipe_ctx->stream_res.tg->inst;
- compr->compr_surface_address.quad_part = dc->ctx->fbc_gpu_addr;
-
- compr->funcs->surface_address_and_pitch(compr, &params);
- compr->funcs->set_fbc_invalidation_triggers(compr, 1);
-
- compr->funcs->enable_fbc(compr, &params);
- }
-}
-
-
-/*******************************************************************************
- * Front End programming
- ******************************************************************************/
-
-static void dce60_set_default_colors(struct pipe_ctx *pipe_ctx)
-{
- struct default_adjustment default_adjust = { 0 };
-
- default_adjust.force_hw_default = false;
- default_adjust.in_color_space = pipe_ctx->plane_state->color_space;
- default_adjust.out_color_space = pipe_ctx->stream->output_color_space;
- default_adjust.csc_adjust_type = GRAPHICS_CSC_ADJUST_TYPE_SW;
- default_adjust.surface_pixel_format = pipe_ctx->plane_res.scl_data.format;
-
- /* display color depth */
- default_adjust.color_depth =
- pipe_ctx->stream->timing.display_color_depth;
-
- /* Lb color depth */
- default_adjust.lb_color_depth = pipe_ctx->plane_res.scl_data.lb_params.depth;
-
- pipe_ctx->plane_res.xfm->funcs->opp_set_csc_default(
- pipe_ctx->plane_res.xfm, &default_adjust);
-}
-
-/*******************************************************************************
- * In order to turn on surface we will program
- * CRTC
- *
- * DCE6 has no bottom_pipe and no Blender HW
- * We need to set 'blank_target' to false in order to turn on the display
- *
- * |-----------|------------|---------|
- * |curr pipe | set_blank | |
- * |Surface |blank_target| CRCT |
- * |visibility | argument | |
- * |-----------|------------|---------|
- * | off | true | blank |
- * | on | false | unblank |
- * |-----------|------------|---------|
- *
- ******************************************************************************/
-static void dce60_program_surface_visibility(const struct dc *dc,
- struct pipe_ctx *pipe_ctx)
-{
- bool blank_target = false;
-
- /* DCE6 has no bottom_pipe and no Blender HW */
-
- if (!pipe_ctx->plane_state->visible)
- blank_target = true;
-
- /* DCE6 skip dce_set_blender_mode() but then proceed to 'unblank' CRTC */
- pipe_ctx->stream_res.tg->funcs->set_blank(pipe_ctx->stream_res.tg, blank_target);
-
-}
-
-
-static void dce60_get_surface_visual_confirm_color(const struct pipe_ctx *pipe_ctx,
- struct tg_color *color)
-{
- uint32_t color_value = MAX_TG_COLOR_VALUE * (4 - pipe_ctx->stream_res.tg->inst) / 4;
-
- switch (pipe_ctx->plane_res.scl_data.format) {
- case PIXEL_FORMAT_ARGB8888:
- /* set boarder color to red */
- color->color_r_cr = color_value;
- break;
-
- case PIXEL_FORMAT_ARGB2101010:
- /* set boarder color to blue */
- color->color_b_cb = color_value;
- break;
- case PIXEL_FORMAT_420BPP8:
- /* set boarder color to green */
- color->color_g_y = color_value;
- break;
- case PIXEL_FORMAT_420BPP10:
- /* set boarder color to yellow */
- color->color_g_y = color_value;
- color->color_r_cr = color_value;
- break;
- case PIXEL_FORMAT_FP16:
- /* set boarder color to white */
- color->color_r_cr = color_value;
- color->color_b_cb = color_value;
- color->color_g_y = color_value;
- break;
- default:
- break;
- }
-}
-
-static void dce60_program_scaler(const struct dc *dc,
- const struct pipe_ctx *pipe_ctx)
-{
- struct tg_color color = {0};
-
- /* DCE6 skips DCN TOFPGA check for transform_set_pixel_storage_depth == NULL */
-
- if (dc->debug.visual_confirm == VISUAL_CONFIRM_SURFACE)
- dce60_get_surface_visual_confirm_color(pipe_ctx, &color);
- else
- color_space_to_black_color(dc,
- pipe_ctx->stream->output_color_space,
- &color);
-
- pipe_ctx->plane_res.xfm->funcs->transform_set_pixel_storage_depth(
- pipe_ctx->plane_res.xfm,
- pipe_ctx->plane_res.scl_data.lb_params.depth,
- &pipe_ctx->stream->bit_depth_params);
-
- if (pipe_ctx->stream_res.tg->funcs->set_overscan_blank_color) {
- /*
- * The way 420 is packed, 2 channels carry Y component, 1 channel
- * alternate between Cb and Cr, so both channels need the pixel
- * value for Y
- */
- if (pipe_ctx->stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420)
- color.color_r_cr = color.color_g_y;
-
- pipe_ctx->stream_res.tg->funcs->set_overscan_blank_color(
- pipe_ctx->stream_res.tg,
- &color);
- }
-
- pipe_ctx->plane_res.xfm->funcs->transform_set_scaler(pipe_ctx->plane_res.xfm,
- &pipe_ctx->plane_res.scl_data);
-}
-
-static void
-dce60_program_front_end_for_pipe(
- struct dc *dc, struct pipe_ctx *pipe_ctx)
-{
- struct mem_input *mi = pipe_ctx->plane_res.mi;
- struct dc_plane_state *plane_state = pipe_ctx->plane_state;
- struct xfm_grph_csc_adjustment adjust;
- struct out_csc_color_matrix tbl_entry;
- unsigned int i;
- struct dce_hwseq *hws = dc->hwseq;
-
- DC_LOGGER_INIT();
- memset(&tbl_entry, 0, sizeof(tbl_entry));
-
- memset(&adjust, 0, sizeof(adjust));
- adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
-
- dce_enable_fe_clock(dc->hwseq, mi->inst, true);
-
- dce60_set_default_colors(pipe_ctx);
- if (pipe_ctx->stream->csc_color_matrix.enable_adjustment
- == true) {
- tbl_entry.color_space =
- pipe_ctx->stream->output_color_space;
-
- for (i = 0; i < 12; i++)
- tbl_entry.regval[i] =
- pipe_ctx->stream->csc_color_matrix.matrix[i];
-
- pipe_ctx->plane_res.xfm->funcs->opp_set_csc_adjustment
- (pipe_ctx->plane_res.xfm, &tbl_entry);
- }
-
- if (pipe_ctx->stream->gamut_remap_matrix.enable_remap == true) {
- adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW;
-
- for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++)
- adjust.temperature_matrix[i] =
- pipe_ctx->stream->gamut_remap_matrix.matrix[i];
- }
-
- pipe_ctx->plane_res.xfm->funcs->transform_set_gamut_remap(pipe_ctx->plane_res.xfm, &adjust);
-
- pipe_ctx->plane_res.scl_data.lb_params.alpha_en = pipe_ctx->bottom_pipe != 0;
-
- dce60_program_scaler(dc, pipe_ctx);
-
- mi->funcs->mem_input_program_surface_config(
- mi,
- plane_state->format,
- &plane_state->tiling_info,
- &plane_state->plane_size,
- plane_state->rotation,
- NULL,
- false);
- if (mi->funcs->set_blank)
- mi->funcs->set_blank(mi, pipe_ctx->plane_state->visible);
-
- if (dc->config.gpu_vm_support)
- mi->funcs->mem_input_program_pte_vm(
- pipe_ctx->plane_res.mi,
- plane_state->format,
- &plane_state->tiling_info,
- plane_state->rotation);
-
- /* Moved programming gamma from dc to hwss */
- if (pipe_ctx->plane_state->update_flags.bits.full_update ||
- pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change ||
- pipe_ctx->plane_state->update_flags.bits.gamma_change)
- hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state);
-
- if (pipe_ctx->plane_state->update_flags.bits.full_update)
- hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream);
-
- DC_LOG_SURFACE(
- "Pipe:%d %p: addr hi:0x%x, "
- "addr low:0x%x, "
- "src: %d, %d, %d,"
- " %d; dst: %d, %d, %d, %d;"
- "clip: %d, %d, %d, %d\n",
- pipe_ctx->pipe_idx,
- (void *) pipe_ctx->plane_state,
- pipe_ctx->plane_state->address.grph.addr.high_part,
- pipe_ctx->plane_state->address.grph.addr.low_part,
- pipe_ctx->plane_state->src_rect.x,
- pipe_ctx->plane_state->src_rect.y,
- pipe_ctx->plane_state->src_rect.width,
- pipe_ctx->plane_state->src_rect.height,
- pipe_ctx->plane_state->dst_rect.x,
- pipe_ctx->plane_state->dst_rect.y,
- pipe_ctx->plane_state->dst_rect.width,
- pipe_ctx->plane_state->dst_rect.height,
- pipe_ctx->plane_state->clip_rect.x,
- pipe_ctx->plane_state->clip_rect.y,
- pipe_ctx->plane_state->clip_rect.width,
- pipe_ctx->plane_state->clip_rect.height);
-
- DC_LOG_SURFACE(
- "Pipe %d: width, height, x, y\n"
- "viewport:%d, %d, %d, %d\n"
- "recout: %d, %d, %d, %d\n",
- pipe_ctx->pipe_idx,
- pipe_ctx->plane_res.scl_data.viewport.width,
- pipe_ctx->plane_res.scl_data.viewport.height,
- pipe_ctx->plane_res.scl_data.viewport.x,
- pipe_ctx->plane_res.scl_data.viewport.y,
- pipe_ctx->plane_res.scl_data.recout.width,
- pipe_ctx->plane_res.scl_data.recout.height,
- pipe_ctx->plane_res.scl_data.recout.x,
- pipe_ctx->plane_res.scl_data.recout.y);
-}
-
-static void dce60_apply_ctx_for_surface(
- struct dc *dc,
- const struct dc_stream_state *stream,
- int num_planes,
- struct dc_state *context)
-{
- int i;
-
- if (num_planes == 0)
- return;
-
- if (dc->fbc_compressor)
- dc->fbc_compressor->funcs->disable_fbc(dc->fbc_compressor);
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-
- if (pipe_ctx->stream != stream)
- continue;
-
- /* Need to allocate mem before program front end for Fiji */
- pipe_ctx->plane_res.mi->funcs->allocate_mem_input(
- pipe_ctx->plane_res.mi,
- pipe_ctx->stream->timing.h_total,
- pipe_ctx->stream->timing.v_total,
- pipe_ctx->stream->timing.pix_clk_100hz / 10,
- context->stream_count);
-
- dce60_program_front_end_for_pipe(dc, pipe_ctx);
-
- dc->hwss.update_plane_addr(dc, pipe_ctx);
-
- dce60_program_surface_visibility(dc, pipe_ctx);
-
- }
-
- if (dc->fbc_compressor)
- dce60_enable_fbc(dc, context);
-}
-
-void dce60_hw_sequencer_construct(struct dc *dc)
-{
- dce110_hw_sequencer_construct(dc);
-
- dc->hwseq->funcs.enable_display_power_gating = dce100_enable_display_power_gating;
- dc->hwss.apply_ctx_for_surface = dce60_apply_ctx_for_surface;
- dc->hwss.cursor_lock = dce60_pipe_control_lock;
- dc->hwss.pipe_control_lock = dce60_pipe_control_lock;
- dc->hwss.prepare_bandwidth = dce100_prepare_bandwidth;
- dc->hwss.optimize_bandwidth = dce100_optimize_bandwidth;
-}
-
diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c
index c1a85ee374d9..e691a1cf3356 100644
--- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c
@@ -111,13 +111,14 @@ static void program_timing(struct timing_generator *tg,
int vstartup_start,
int vupdate_offset,
int vupdate_width,
+ int pstate_keepout,
const enum signal_type signal,
bool use_vbios)
{
if (!use_vbios)
program_pix_dur(tg, timing->pix_clk_100hz);
- dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, use_vbios);
+ dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, 0, use_vbios);
}
static void dce60_timing_generator_enable_advanced_request(
@@ -238,6 +239,7 @@ static const struct timing_generator_funcs dce60_tg_funcs = {
dce60_timing_generator_enable_advanced_request,
.configure_crc = dce60_configure_crc,
.get_crc = dce110_get_crc,
+ .is_two_pixels_per_container = dce110_is_two_pixels_per_container,
};
void dce60_timing_generator_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/Makefile b/drivers/gpu/drm/amd/display/dc/dce80/Makefile
index 0a9d1a350d8b..fba189d26652 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce80/Makefile
@@ -23,10 +23,9 @@
# Makefile for the 'controller' sub-component of DAL.
# It provides the control and status of HW CRTC block.
-CFLAGS_$(AMDDALPATH)/dc/dce80/dce80_resource.o = $(call cc-disable-warning, override-init)
+CFLAGS_$(AMDDALPATH)/dc/dce80/dce80_resource.o = -Wno-override-init
-DCE80 = dce80_timing_generator.o dce80_hw_sequencer.o \
- dce80_resource.o
+DCE80 = dce80_timing_generator.o
AMD_DAL_DCE80 = $(addprefix $(AMDDALPATH)/dc/dce80/,$(DCE80))
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c
deleted file mode 100644
index d2ceebdbdf51..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright 2015 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#include "dm_services.h"
-#include "dc.h"
-#include "core_types.h"
-#include "dce80_hw_sequencer.h"
-
-#include "dce/dce_hwseq.h"
-#include "dce110/dce110_hw_sequencer.h"
-#include "dce100/dce100_hw_sequencer.h"
-
-/* include DCE8 register header files */
-#include "dce/dce_8_0_d.h"
-#include "dce/dce_8_0_sh_mask.h"
-
-/*******************************************************************************
- * Private definitions
- ******************************************************************************/
-
-/***************************PIPE_CONTROL***********************************/
-
-void dce80_hw_sequencer_construct(struct dc *dc)
-{
- dce110_hw_sequencer_construct(dc);
-
- dc->hwseq->funcs.enable_display_power_gating = dce100_enable_display_power_gating;
- dc->hwss.pipe_control_lock = dce_pipe_control_lock;
- dc->hwss.prepare_bandwidth = dce100_prepare_bandwidth;
- dc->hwss.optimize_bandwidth = dce100_optimize_bandwidth;
-}
-
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c
index b8fd43dc010b..88e7a1fc9a30 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c
@@ -105,19 +105,20 @@ static void program_pix_dur(struct timing_generator *tg, uint32_t pix_clk_100hz)
dm_write_reg(tg->ctx, addr, value);
}
-static void program_timing(struct timing_generator *tg,
+static void dce80_timing_generator_program_timing(struct timing_generator *tg,
const struct dc_crtc_timing *timing,
int vready_offset,
int vstartup_start,
int vupdate_offset,
int vupdate_width,
+ int pstate_keepout,
const enum signal_type signal,
bool use_vbios)
{
if (!use_vbios)
program_pix_dur(tg, timing->pix_clk_100hz);
- dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, use_vbios);
+ dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, 0, use_vbios);
}
static void dce80_timing_generator_enable_advanced_request(
@@ -184,7 +185,7 @@ static void dce80_timing_generator_enable_advanced_request(
static const struct timing_generator_funcs dce80_tg_funcs = {
.validate_timing = dce110_tg_validate_timing,
- .program_timing = program_timing,
+ .program_timing = dce80_timing_generator_program_timing,
.enable_crtc = dce110_timing_generator_enable_crtc,
.disable_crtc = dce110_timing_generator_disable_crtc,
.is_counter_moving = dce110_timing_generator_is_counter_moving,
@@ -220,6 +221,7 @@ static const struct timing_generator_funcs dce80_tg_funcs = {
dce80_timing_generator_enable_advanced_request,
.configure_crc = dce110_configure_crc,
.get_crc = dce110_get_crc,
+ .is_two_pixels_per_container = dce110_is_two_pixels_per_container,
};
void dce80_timing_generator_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
index 62ad1a11bff9..e1f6623d4936 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
@@ -22,12 +22,9 @@
#
# Makefile for DCN.
-DCN10 = dcn10_init.o dcn10_resource.o dcn10_ipp.o dcn10_hw_sequencer.o \
+DCN10 = dcn10_ipp.o \
dcn10_hw_sequencer_debug.o \
- dcn10_dpp.o dcn10_opp.o dcn10_optc.o \
- dcn10_hubp.o dcn10_mpc.o \
- dcn10_dpp_dscl.o dcn10_dpp_cm.o dcn10_cm_common.o \
- dcn10_hubbub.o dcn10_stream_encoder.o dcn10_link_encoder.o
+ dcn10_cm_common.o \
AMD_DAL_DCN10 = $(addprefix $(AMDDALPATH)/dc/dcn10/,$(DCN10))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
index 7a00fe525dfb..dcd2cdfe91eb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
@@ -24,7 +24,7 @@
*/
#include "dc.h"
#include "reg_helper.h"
-#include "dcn10_dpp.h"
+#include "dcn10/dcn10_dpp.h"
#include "dcn10_cm_common.h"
#include "custom_float.h"
@@ -62,6 +62,26 @@ void cm_helper_program_color_matrices(
}
+void cm_helper_read_color_matrices(struct dc_context *ctx,
+ uint16_t *regval,
+ const struct color_matrices_reg *reg)
+{
+ uint32_t cur_csc_reg, regval0, regval1;
+ unsigned int i = 0;
+
+ for (cur_csc_reg = reg->csc_c11_c12;
+ cur_csc_reg <= reg->csc_c33_c34; cur_csc_reg++) {
+ REG_GET_2(cur_csc_reg,
+ csc_c11, &regval0,
+ csc_c12, &regval1);
+
+ regval[2 * i] = regval0;
+ regval[(2 * i) + 1] = regval1;
+
+ i++;
+ }
+}
+
void cm_helper_program_xfer_func(
struct dc_context *ctx,
const struct pwl_params *params,
@@ -308,7 +328,10 @@ bool cm_helper_convert_to_custom_float(
#define NUMBER_REGIONS 32
#define NUMBER_SW_SEGMENTS 16
-bool cm_helper_translate_curve_to_hw_format(
+#define DC_LOGGER \
+ ctx->logger
+
+bool cm_helper_translate_curve_to_hw_format(struct dc_context *ctx,
const struct dc_transfer_func *output_tf,
struct pwl_params *lut_params, bool fixpoint)
{
@@ -342,23 +365,18 @@ bool cm_helper_translate_curve_to_hw_format(
region_start = -MAX_LOW_POINT;
region_end = NUMBER_REGIONS - MAX_LOW_POINT;
} else {
- /* 11 segments
- * segment is from 2^-10 to 2^1
+ /* 13 segments
+ * segment is from 2^-12 to 2^0
* There are less than 256 points, for optimization
*/
- seg_distr[0] = 3;
- seg_distr[1] = 4;
- seg_distr[2] = 4;
- seg_distr[3] = 4;
- seg_distr[4] = 4;
- seg_distr[5] = 4;
- seg_distr[6] = 4;
- seg_distr[7] = 4;
- seg_distr[8] = 4;
- seg_distr[9] = 4;
- seg_distr[10] = 1;
-
- region_start = -10;
+ const uint8_t SEG_COUNT = 12;
+
+ for (i = 0; i < SEG_COUNT; i++)
+ seg_distr[i] = 4;
+
+ seg_distr[SEG_COUNT] = 1;
+
+ region_start = -SEG_COUNT;
region_end = 1;
}
@@ -379,6 +397,11 @@ bool cm_helper_translate_curve_to_hw_format(
i += increment) {
if (j == hw_points - 1)
break;
+ if (i >= TRANSFER_FUNC_POINTS) {
+ DC_LOG_ERROR("Index out of bounds: i=%d, TRANSFER_FUNC_POINTS=%d\n",
+ i, TRANSFER_FUNC_POINTS);
+ return false;
+ }
rgb_resulted[j].red = output_tf->tf_pts.red[i];
rgb_resulted[j].green = output_tf->tf_pts.green[i];
rgb_resulted[j].blue = output_tf->tf_pts.blue[i];
@@ -482,10 +505,18 @@ bool cm_helper_translate_curve_to_hw_format(
rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green);
rgb->delta_blue = dc_fixpt_sub(rgb_plus_1->blue, rgb->blue);
+
if (fixpoint == true) {
- rgb->delta_red_reg = dc_fixpt_clamp_u0d10(rgb->delta_red);
- rgb->delta_green_reg = dc_fixpt_clamp_u0d10(rgb->delta_green);
- rgb->delta_blue_reg = dc_fixpt_clamp_u0d10(rgb->delta_blue);
+ uint32_t red_clamp = dc_fixpt_clamp_u0d14(rgb->delta_red);
+ uint32_t green_clamp = dc_fixpt_clamp_u0d14(rgb->delta_green);
+ uint32_t blue_clamp = dc_fixpt_clamp_u0d14(rgb->delta_blue);
+
+ if (red_clamp >> 10 || green_clamp >> 10 || blue_clamp >> 10)
+ DC_LOG_WARNING("Losing delta precision while programming shaper LUT.");
+
+ rgb->delta_red_reg = red_clamp & 0x3ff;
+ rgb->delta_green_reg = green_clamp & 0x3ff;
+ rgb->delta_blue_reg = blue_clamp & 0x3ff;
rgb->red_reg = dc_fixpt_clamp_u0d14(rgb->red);
rgb->green_reg = dc_fixpt_clamp_u0d14(rgb->green);
rgb->blue_reg = dc_fixpt_clamp_u0d14(rgb->blue);
@@ -555,6 +586,8 @@ bool cm_helper_translate_curve_to_degamma_hw_format(
i += increment) {
if (j == hw_points - 1)
break;
+ if (i >= TRANSFER_FUNC_POINTS)
+ return false;
rgb_resulted[j].red = output_tf->tf_pts.red[i];
rgb_resulted[j].green = output_tf->tf_pts.green[i];
rgb_resulted[j].blue = output_tf->tf_pts.blue[i];
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h
index 3b8cd7410498..decc50b1ac53 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h
@@ -106,6 +106,7 @@ bool cm_helper_convert_to_custom_float(
bool fixpoint);
bool cm_helper_translate_curve_to_hw_format(
+ struct dc_context *ctx,
const struct dc_transfer_func *output_tf,
struct pwl_params *lut_params, bool fixpoint);
@@ -113,5 +114,7 @@ bool cm_helper_translate_curve_to_degamma_hw_format(
const struct dc_transfer_func *output_tf,
struct pwl_params *lut_params);
-
+void cm_helper_read_color_matrices(struct dc_context *ctx,
+ uint16_t *regval,
+ const struct color_matrices_reg *reg);
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dwb.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dwb.c
index b6391a5ead78..365a3215f6d5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dwb.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dwb.c
@@ -23,8 +23,6 @@
*
*/
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-
#include "reg_helper.h"
#include "resource.h"
#include "dwb.h"
@@ -129,6 +127,3 @@ void dcn10_dwbc_construct(struct dcn10_dwbc *dwbc10,
dwbc10->dwbc_shift = dwbc_shift;
dwbc10->dwbc_mask = dwbc_mask;
}
-
-
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dwb.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dwb.h
index d56ea7c8171e..5268c46ae907 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dwb.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dwb.h
@@ -24,8 +24,6 @@
#ifndef __DC_DWBC_DCN10_H__
#define __DC_DWBC_DCN10_H__
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-
/* DCN */
#define BASE_INNER(seg) \
DCE_BASE__INST0_SEG ## seg
@@ -267,5 +265,3 @@ void dcn10_dwbc_construct(struct dcn10_dwbc *dwbc10,
int inst);
#endif
-
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
deleted file mode 100644
index 04d7bddc915b..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ /dev/null
@@ -1,3775 +0,0 @@
-/*
- * Copyright 2016 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#include <linux/delay.h>
-#include "dm_services.h"
-#include "basics/dc_common.h"
-#include "core_types.h"
-#include "resource.h"
-#include "custom_float.h"
-#include "dcn10_hw_sequencer.h"
-#include "dcn10_hw_sequencer_debug.h"
-#include "dce/dce_hwseq.h"
-#include "abm.h"
-#include "dmcu.h"
-#include "dcn10_optc.h"
-#include "dcn10_dpp.h"
-#include "dcn10_mpc.h"
-#include "timing_generator.h"
-#include "opp.h"
-#include "ipp.h"
-#include "mpc.h"
-#include "reg_helper.h"
-#include "dcn10_hubp.h"
-#include "dcn10_hubbub.h"
-#include "dcn10_cm_common.h"
-#include "dc_link_dp.h"
-#include "dccg.h"
-#include "clk_mgr.h"
-#include "link_hwss.h"
-#include "dpcd_defs.h"
-#include "dsc.h"
-#include "dce/dmub_hw_lock_mgr.h"
-#include "dc_trace.h"
-#include "dce/dmub_outbox.h"
-#include "inc/dc_link_dp.h"
-#include "inc/link_dpcd.h"
-
-#define DC_LOGGER_INIT(logger)
-
-#define CTX \
- hws->ctx
-#define REG(reg)\
- hws->regs->reg
-
-#undef FN
-#define FN(reg_name, field_name) \
- hws->shifts->field_name, hws->masks->field_name
-
-/*print is 17 wide, first two characters are spaces*/
-#define DTN_INFO_MICRO_SEC(ref_cycle) \
- print_microsec(dc_ctx, log_ctx, ref_cycle)
-
-#define GAMMA_HW_POINTS_NUM 256
-
-#define PGFSM_POWER_ON 0
-#define PGFSM_POWER_OFF 2
-
-void print_microsec(struct dc_context *dc_ctx,
- struct dc_log_buffer_ctx *log_ctx,
- uint32_t ref_cycle)
-{
- const uint32_t ref_clk_mhz = dc_ctx->dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000;
- static const unsigned int frac = 1000;
- uint32_t us_x10 = (ref_cycle * frac) / ref_clk_mhz;
-
- DTN_INFO(" %11d.%03d",
- us_x10 / frac,
- us_x10 % frac);
-}
-
-void dcn10_lock_all_pipes(struct dc *dc,
- struct dc_state *context,
- bool lock)
-{
- struct pipe_ctx *pipe_ctx;
- struct timing_generator *tg;
- int i;
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- pipe_ctx = &context->res_ctx.pipe_ctx[i];
- tg = pipe_ctx->stream_res.tg;
-
- /*
- * Only lock the top pipe's tg to prevent redundant
- * (un)locking. Also skip if pipe is disabled.
- */
- if (pipe_ctx->top_pipe ||
- !pipe_ctx->stream || !pipe_ctx->plane_state ||
- !tg->funcs->is_tg_enabled(tg))
- continue;
-
- if (lock)
- dc->hwss.pipe_control_lock(dc, pipe_ctx, true);
- else
- dc->hwss.pipe_control_lock(dc, pipe_ctx, false);
- }
-}
-
-static void log_mpc_crc(struct dc *dc,
- struct dc_log_buffer_ctx *log_ctx)
-{
- struct dc_context *dc_ctx = dc->ctx;
- struct dce_hwseq *hws = dc->hwseq;
-
- if (REG(MPC_CRC_RESULT_GB))
- DTN_INFO("MPC_CRC_RESULT_GB:%d MPC_CRC_RESULT_C:%d MPC_CRC_RESULT_AR:%d\n",
- REG_READ(MPC_CRC_RESULT_GB), REG_READ(MPC_CRC_RESULT_C), REG_READ(MPC_CRC_RESULT_AR));
- if (REG(DPP_TOP0_DPP_CRC_VAL_B_A))
- DTN_INFO("DPP_TOP0_DPP_CRC_VAL_B_A:%d DPP_TOP0_DPP_CRC_VAL_R_G:%d\n",
- REG_READ(DPP_TOP0_DPP_CRC_VAL_B_A), REG_READ(DPP_TOP0_DPP_CRC_VAL_R_G));
-}
-
-void dcn10_log_hubbub_state(struct dc *dc, struct dc_log_buffer_ctx *log_ctx)
-{
- struct dc_context *dc_ctx = dc->ctx;
- struct dcn_hubbub_wm wm;
- int i;
-
- memset(&wm, 0, sizeof(struct dcn_hubbub_wm));
- dc->res_pool->hubbub->funcs->wm_read_state(dc->res_pool->hubbub, &wm);
-
- DTN_INFO("HUBBUB WM: data_urgent pte_meta_urgent"
- " sr_enter sr_exit dram_clk_change\n");
-
- for (i = 0; i < 4; i++) {
- struct dcn_hubbub_wm_set *s;
-
- s = &wm.sets[i];
- DTN_INFO("WM_Set[%d]:", s->wm_set);
- DTN_INFO_MICRO_SEC(s->data_urgent);
- DTN_INFO_MICRO_SEC(s->pte_meta_urgent);
- DTN_INFO_MICRO_SEC(s->sr_enter);
- DTN_INFO_MICRO_SEC(s->sr_exit);
- DTN_INFO_MICRO_SEC(s->dram_clk_chanage);
- DTN_INFO("\n");
- }
-
- DTN_INFO("\n");
-}
-
-static void dcn10_log_hubp_states(struct dc *dc, void *log_ctx)
-{
- struct dc_context *dc_ctx = dc->ctx;
- struct resource_pool *pool = dc->res_pool;
- int i;
-
- DTN_INFO(
- "HUBP: format addr_hi width height rot mir sw_mode dcc_en blank_en clock_en ttu_dis underflow min_ttu_vblank qos_low_wm qos_high_wm\n");
- for (i = 0; i < pool->pipe_count; i++) {
- struct hubp *hubp = pool->hubps[i];
- struct dcn_hubp_state *s = &(TO_DCN10_HUBP(hubp)->state);
-
- hubp->funcs->hubp_read_state(hubp);
-
- if (!s->blank_en) {
- DTN_INFO("[%2d]: %5xh %6xh %5d %6d %2xh %2xh %6xh %6d %8d %8d %7d %8xh",
- hubp->inst,
- s->pixel_format,
- s->inuse_addr_hi,
- s->viewport_width,
- s->viewport_height,
- s->rotation_angle,
- s->h_mirror_en,
- s->sw_mode,
- s->dcc_en,
- s->blank_en,
- s->clock_en,
- s->ttu_disable,
- s->underflow_status);
- DTN_INFO_MICRO_SEC(s->min_ttu_vblank);
- DTN_INFO_MICRO_SEC(s->qos_level_low_wm);
- DTN_INFO_MICRO_SEC(s->qos_level_high_wm);
- DTN_INFO("\n");
- }
- }
-
- DTN_INFO("\n=========RQ========\n");
- DTN_INFO("HUBP: drq_exp_m prq_exp_m mrq_exp_m crq_exp_m plane1_ba L:chunk_s min_chu_s meta_ch_s"
- " min_m_c_s dpte_gr_s mpte_gr_s swath_hei pte_row_h C:chunk_s min_chu_s meta_ch_s"
- " min_m_c_s dpte_gr_s mpte_gr_s swath_hei pte_row_h\n");
- for (i = 0; i < pool->pipe_count; i++) {
- struct dcn_hubp_state *s = &(TO_DCN10_HUBP(pool->hubps[i])->state);
- struct _vcs_dpi_display_rq_regs_st *rq_regs = &s->rq_regs;
-
- if (!s->blank_en)
- DTN_INFO("[%2d]: %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh\n",
- pool->hubps[i]->inst, rq_regs->drq_expansion_mode, rq_regs->prq_expansion_mode, rq_regs->mrq_expansion_mode,
- rq_regs->crq_expansion_mode, rq_regs->plane1_base_address, rq_regs->rq_regs_l.chunk_size,
- rq_regs->rq_regs_l.min_chunk_size, rq_regs->rq_regs_l.meta_chunk_size,
- rq_regs->rq_regs_l.min_meta_chunk_size, rq_regs->rq_regs_l.dpte_group_size,
- rq_regs->rq_regs_l.mpte_group_size, rq_regs->rq_regs_l.swath_height,
- rq_regs->rq_regs_l.pte_row_height_linear, rq_regs->rq_regs_c.chunk_size, rq_regs->rq_regs_c.min_chunk_size,
- rq_regs->rq_regs_c.meta_chunk_size, rq_regs->rq_regs_c.min_meta_chunk_size,
- rq_regs->rq_regs_c.dpte_group_size, rq_regs->rq_regs_c.mpte_group_size,
- rq_regs->rq_regs_c.swath_height, rq_regs->rq_regs_c.pte_row_height_linear);
- }
-
- DTN_INFO("========DLG========\n");
- DTN_INFO("HUBP: rc_hbe dlg_vbe min_d_y_n rc_per_ht rc_x_a_s "
- " dst_y_a_s dst_y_pf dst_y_vvb dst_y_rvb dst_y_vfl dst_y_rfl rf_pix_fq"
- " vratio_pf vrat_pf_c rc_pg_vbl rc_pg_vbc rc_mc_vbl rc_mc_vbc rc_pg_fll"
- " rc_pg_flc rc_mc_fll rc_mc_flc pr_nom_l pr_nom_c rc_pg_nl rc_pg_nc "
- " mr_nom_l mr_nom_c rc_mc_nl rc_mc_nc rc_ld_pl rc_ld_pc rc_ld_l "
- " rc_ld_c cha_cur0 ofst_cur1 cha_cur1 vr_af_vc0 ddrq_limt x_rt_dlay"
- " x_rp_dlay x_rr_sfl\n");
- for (i = 0; i < pool->pipe_count; i++) {
- struct dcn_hubp_state *s = &(TO_DCN10_HUBP(pool->hubps[i])->state);
- struct _vcs_dpi_display_dlg_regs_st *dlg_regs = &s->dlg_attr;
-
- if (!s->blank_en)
- DTN_INFO("[%2d]: %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh"
- " %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh"
- " %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh\n",
- pool->hubps[i]->inst, dlg_regs->refcyc_h_blank_end, dlg_regs->dlg_vblank_end, dlg_regs->min_dst_y_next_start,
- dlg_regs->refcyc_per_htotal, dlg_regs->refcyc_x_after_scaler, dlg_regs->dst_y_after_scaler,
- dlg_regs->dst_y_prefetch, dlg_regs->dst_y_per_vm_vblank, dlg_regs->dst_y_per_row_vblank,
- dlg_regs->dst_y_per_vm_flip, dlg_regs->dst_y_per_row_flip, dlg_regs->ref_freq_to_pix_freq,
- dlg_regs->vratio_prefetch, dlg_regs->vratio_prefetch_c, dlg_regs->refcyc_per_pte_group_vblank_l,
- dlg_regs->refcyc_per_pte_group_vblank_c, dlg_regs->refcyc_per_meta_chunk_vblank_l,
- dlg_regs->refcyc_per_meta_chunk_vblank_c, dlg_regs->refcyc_per_pte_group_flip_l,
- dlg_regs->refcyc_per_pte_group_flip_c, dlg_regs->refcyc_per_meta_chunk_flip_l,
- dlg_regs->refcyc_per_meta_chunk_flip_c, dlg_regs->dst_y_per_pte_row_nom_l,
- dlg_regs->dst_y_per_pte_row_nom_c, dlg_regs->refcyc_per_pte_group_nom_l,
- dlg_regs->refcyc_per_pte_group_nom_c, dlg_regs->dst_y_per_meta_row_nom_l,
- dlg_regs->dst_y_per_meta_row_nom_c, dlg_regs->refcyc_per_meta_chunk_nom_l,
- dlg_regs->refcyc_per_meta_chunk_nom_c, dlg_regs->refcyc_per_line_delivery_pre_l,
- dlg_regs->refcyc_per_line_delivery_pre_c, dlg_regs->refcyc_per_line_delivery_l,
- dlg_regs->refcyc_per_line_delivery_c, dlg_regs->chunk_hdl_adjust_cur0, dlg_regs->dst_y_offset_cur1,
- dlg_regs->chunk_hdl_adjust_cur1, dlg_regs->vready_after_vcount0, dlg_regs->dst_y_delta_drq_limit,
- dlg_regs->xfc_reg_transfer_delay, dlg_regs->xfc_reg_precharge_delay,
- dlg_regs->xfc_reg_remote_surface_flip_latency);
- }
-
- DTN_INFO("========TTU========\n");
- DTN_INFO("HUBP: qos_ll_wm qos_lh_wm mn_ttu_vb qos_l_flp rc_rd_p_l rc_rd_l rc_rd_p_c"
- " rc_rd_c rc_rd_c0 rc_rd_pc0 rc_rd_c1 rc_rd_pc1 qos_lf_l qos_rds_l"
- " qos_lf_c qos_rds_c qos_lf_c0 qos_rds_c0 qos_lf_c1 qos_rds_c1\n");
- for (i = 0; i < pool->pipe_count; i++) {
- struct dcn_hubp_state *s = &(TO_DCN10_HUBP(pool->hubps[i])->state);
- struct _vcs_dpi_display_ttu_regs_st *ttu_regs = &s->ttu_attr;
-
- if (!s->blank_en)
- DTN_INFO("[%2d]: %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh\n",
- pool->hubps[i]->inst, ttu_regs->qos_level_low_wm, ttu_regs->qos_level_high_wm, ttu_regs->min_ttu_vblank,
- ttu_regs->qos_level_flip, ttu_regs->refcyc_per_req_delivery_pre_l, ttu_regs->refcyc_per_req_delivery_l,
- ttu_regs->refcyc_per_req_delivery_pre_c, ttu_regs->refcyc_per_req_delivery_c, ttu_regs->refcyc_per_req_delivery_cur0,
- ttu_regs->refcyc_per_req_delivery_pre_cur0, ttu_regs->refcyc_per_req_delivery_cur1,
- ttu_regs->refcyc_per_req_delivery_pre_cur1, ttu_regs->qos_level_fixed_l, ttu_regs->qos_ramp_disable_l,
- ttu_regs->qos_level_fixed_c, ttu_regs->qos_ramp_disable_c, ttu_regs->qos_level_fixed_cur0,
- ttu_regs->qos_ramp_disable_cur0, ttu_regs->qos_level_fixed_cur1, ttu_regs->qos_ramp_disable_cur1);
- }
- DTN_INFO("\n");
-}
-
-void dcn10_log_hw_state(struct dc *dc,
- struct dc_log_buffer_ctx *log_ctx)
-{
- struct dc_context *dc_ctx = dc->ctx;
- struct resource_pool *pool = dc->res_pool;
- int i;
-
- DTN_INFO_BEGIN();
-
- dcn10_log_hubbub_state(dc, log_ctx);
-
- dcn10_log_hubp_states(dc, log_ctx);
-
- DTN_INFO("DPP: IGAM format IGAM mode DGAM mode RGAM mode"
- " GAMUT mode C11 C12 C13 C14 C21 C22 C23 C24 "
- "C31 C32 C33 C34\n");
- for (i = 0; i < pool->pipe_count; i++) {
- struct dpp *dpp = pool->dpps[i];
- struct dcn_dpp_state s = {0};
-
- dpp->funcs->dpp_read_state(dpp, &s);
-
- if (!s.is_enabled)
- continue;
-
- DTN_INFO("[%2d]: %11xh %-11s %-11s %-11s"
- "%8x %08xh %08xh %08xh %08xh %08xh %08xh",
- dpp->inst,
- s.igam_input_format,
- (s.igam_lut_mode == 0) ? "BypassFixed" :
- ((s.igam_lut_mode == 1) ? "BypassFloat" :
- ((s.igam_lut_mode == 2) ? "RAM" :
- ((s.igam_lut_mode == 3) ? "RAM" :
- "Unknown"))),
- (s.dgam_lut_mode == 0) ? "Bypass" :
- ((s.dgam_lut_mode == 1) ? "sRGB" :
- ((s.dgam_lut_mode == 2) ? "Ycc" :
- ((s.dgam_lut_mode == 3) ? "RAM" :
- ((s.dgam_lut_mode == 4) ? "RAM" :
- "Unknown")))),
- (s.rgam_lut_mode == 0) ? "Bypass" :
- ((s.rgam_lut_mode == 1) ? "sRGB" :
- ((s.rgam_lut_mode == 2) ? "Ycc" :
- ((s.rgam_lut_mode == 3) ? "RAM" :
- ((s.rgam_lut_mode == 4) ? "RAM" :
- "Unknown")))),
- s.gamut_remap_mode,
- s.gamut_remap_c11_c12,
- s.gamut_remap_c13_c14,
- s.gamut_remap_c21_c22,
- s.gamut_remap_c23_c24,
- s.gamut_remap_c31_c32,
- s.gamut_remap_c33_c34);
- DTN_INFO("\n");
- }
- DTN_INFO("\n");
-
- DTN_INFO("MPCC: OPP DPP MPCCBOT MODE ALPHA_MODE PREMULT OVERLAP_ONLY IDLE\n");
- for (i = 0; i < pool->pipe_count; i++) {
- struct mpcc_state s = {0};
-
- pool->mpc->funcs->read_mpcc_state(pool->mpc, i, &s);
- if (s.opp_id != 0xf)
- DTN_INFO("[%2d]: %2xh %2xh %6xh %4d %10d %7d %12d %4d\n",
- i, s.opp_id, s.dpp_id, s.bot_mpcc_id,
- s.mode, s.alpha_mode, s.pre_multiplied_alpha, s.overlap_only,
- s.idle);
- }
- DTN_INFO("\n");
-
- DTN_INFO("OTG: v_bs v_be v_ss v_se vpol vmax vmin vmax_sel vmin_sel h_bs h_be h_ss h_se hpol htot vtot underflow blank_en\n");
-
- for (i = 0; i < pool->timing_generator_count; i++) {
- struct timing_generator *tg = pool->timing_generators[i];
- struct dcn_otg_state s = {0};
- /* Read shared OTG state registers for all DCNx */
- optc1_read_otg_state(DCN10TG_FROM_TG(tg), &s);
-
- /*
- * For DCN2 and greater, a register on the OPP is used to
- * determine if the CRTC is blanked instead of the OTG. So use
- * dpg_is_blanked() if exists, otherwise fallback on otg.
- *
- * TODO: Implement DCN-specific read_otg_state hooks.
- */
- if (pool->opps[i]->funcs->dpg_is_blanked)
- s.blank_enabled = pool->opps[i]->funcs->dpg_is_blanked(pool->opps[i]);
- else
- s.blank_enabled = tg->funcs->is_blanked(tg);
-
- //only print if OTG master is enabled
- if ((s.otg_enabled & 1) == 0)
- continue;
-
- DTN_INFO("[%d]: %5d %5d %5d %5d %5d %5d %5d %9d %9d %5d %5d %5d %5d %5d %5d %5d %9d %8d\n",
- tg->inst,
- s.v_blank_start,
- s.v_blank_end,
- s.v_sync_a_start,
- s.v_sync_a_end,
- s.v_sync_a_pol,
- s.v_total_max,
- s.v_total_min,
- s.v_total_max_sel,
- s.v_total_min_sel,
- s.h_blank_start,
- s.h_blank_end,
- s.h_sync_a_start,
- s.h_sync_a_end,
- s.h_sync_a_pol,
- s.h_total,
- s.v_total,
- s.underflow_occurred_status,
- s.blank_enabled);
-
- // Clear underflow for debug purposes
- // We want to keep underflow sticky bit on for the longevity tests outside of test environment.
- // This function is called only from Windows or Diags test environment, hence it's safe to clear
- // it from here without affecting the original intent.
- tg->funcs->clear_optc_underflow(tg);
- }
- DTN_INFO("\n");
-
- // dcn_dsc_state struct field bytes_per_pixel was renamed to bits_per_pixel
- // TODO: Update golden log header to reflect this name change
- DTN_INFO("DSC: CLOCK_EN SLICE_WIDTH Bytes_pp\n");
- for (i = 0; i < pool->res_cap->num_dsc; i++) {
- struct display_stream_compressor *dsc = pool->dscs[i];
- struct dcn_dsc_state s = {0};
-
- dsc->funcs->dsc_read_state(dsc, &s);
- DTN_INFO("[%d]: %-9d %-12d %-10d\n",
- dsc->inst,
- s.dsc_clock_en,
- s.dsc_slice_width,
- s.dsc_bits_per_pixel);
- DTN_INFO("\n");
- }
- DTN_INFO("\n");
-
- DTN_INFO("S_ENC: DSC_MODE SEC_GSP7_LINE_NUM"
- " VBID6_LINE_REFERENCE VBID6_LINE_NUM SEC_GSP7_ENABLE SEC_STREAM_ENABLE\n");
- for (i = 0; i < pool->stream_enc_count; i++) {
- struct stream_encoder *enc = pool->stream_enc[i];
- struct enc_state s = {0};
-
- if (enc->funcs->enc_read_state) {
- enc->funcs->enc_read_state(enc, &s);
- DTN_INFO("[%-3d]: %-9d %-18d %-21d %-15d %-16d %-17d\n",
- enc->id,
- s.dsc_mode,
- s.sec_gsp_pps_line_num,
- s.vbid6_line_reference,
- s.vbid6_line_num,
- s.sec_gsp_pps_enable,
- s.sec_stream_enable);
- DTN_INFO("\n");
- }
- }
- DTN_INFO("\n");
-
- DTN_INFO("L_ENC: DPHY_FEC_EN DPHY_FEC_READY_SHADOW DPHY_FEC_ACTIVE_STATUS DP_LINK_TRAINING_COMPLETE\n");
- for (i = 0; i < dc->link_count; i++) {
- struct link_encoder *lenc = dc->links[i]->link_enc;
-
- struct link_enc_state s = {0};
-
- if (lenc->funcs->read_state) {
- lenc->funcs->read_state(lenc, &s);
- DTN_INFO("[%-3d]: %-12d %-22d %-22d %-25d\n",
- i,
- s.dphy_fec_en,
- s.dphy_fec_ready_shadow,
- s.dphy_fec_active_status,
- s.dp_link_training_complete);
- DTN_INFO("\n");
- }
- }
- DTN_INFO("\n");
-
- DTN_INFO("\nCALCULATED Clocks: dcfclk_khz:%d dcfclk_deep_sleep_khz:%d dispclk_khz:%d\n"
- "dppclk_khz:%d max_supported_dppclk_khz:%d fclk_khz:%d socclk_khz:%d\n\n",
- dc->current_state->bw_ctx.bw.dcn.clk.dcfclk_khz,
- dc->current_state->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz,
- dc->current_state->bw_ctx.bw.dcn.clk.dispclk_khz,
- dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz,
- dc->current_state->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz,
- dc->current_state->bw_ctx.bw.dcn.clk.fclk_khz,
- dc->current_state->bw_ctx.bw.dcn.clk.socclk_khz);
-
- log_mpc_crc(dc, log_ctx);
-
- {
- int hpo_dp_link_enc_count = 0;
-
- if (pool->hpo_dp_stream_enc_count > 0) {
- DTN_INFO("DP HPO S_ENC: Enabled OTG Format Depth Vid SDP Compressed Link\n");
- for (i = 0; i < pool->hpo_dp_stream_enc_count; i++) {
- struct hpo_dp_stream_encoder_state hpo_dp_se_state = {0};
- struct hpo_dp_stream_encoder *hpo_dp_stream_enc = pool->hpo_dp_stream_enc[i];
-
- if (hpo_dp_stream_enc && hpo_dp_stream_enc->funcs->read_state) {
- hpo_dp_stream_enc->funcs->read_state(hpo_dp_stream_enc, &hpo_dp_se_state);
-
- DTN_INFO("[%d]: %d %d %6s %d %d %d %d %d\n",
- hpo_dp_stream_enc->id - ENGINE_ID_HPO_DP_0,
- hpo_dp_se_state.stream_enc_enabled,
- hpo_dp_se_state.otg_inst,
- (hpo_dp_se_state.pixel_encoding == 0) ? "4:4:4" :
- ((hpo_dp_se_state.pixel_encoding == 1) ? "4:2:2" :
- (hpo_dp_se_state.pixel_encoding == 2) ? "4:2:0" : "Y-Only"),
- (hpo_dp_se_state.component_depth == 0) ? 6 :
- ((hpo_dp_se_state.component_depth == 1) ? 8 :
- (hpo_dp_se_state.component_depth == 2) ? 10 : 12),
- hpo_dp_se_state.vid_stream_enabled,
- hpo_dp_se_state.sdp_enabled,
- hpo_dp_se_state.compressed_format,
- hpo_dp_se_state.mapped_to_link_enc);
- }
- }
-
- DTN_INFO("\n");
- }
-
- /* log DP HPO L_ENC section if any hpo_dp_link_enc exists */
- for (i = 0; i < dc->link_count; i++)
- if (dc->links[i]->hpo_dp_link_enc)
- hpo_dp_link_enc_count++;
-
- if (hpo_dp_link_enc_count) {
- DTN_INFO("DP HPO L_ENC: Enabled Mode Lanes Stream Slots VC Rate X VC Rate Y\n");
-
- for (i = 0; i < dc->link_count; i++) {
- struct hpo_dp_link_encoder *hpo_dp_link_enc = dc->links[i]->hpo_dp_link_enc;
- struct hpo_dp_link_enc_state hpo_dp_le_state = {0};
-
- if (hpo_dp_link_enc && hpo_dp_link_enc->funcs->read_state) {
- hpo_dp_link_enc->funcs->read_state(hpo_dp_link_enc, &hpo_dp_le_state);
- DTN_INFO("[%d]: %d %6s %d %d %d %d %d\n",
- hpo_dp_link_enc->inst,
- hpo_dp_le_state.link_enc_enabled,
- (hpo_dp_le_state.link_mode == 0) ? "TPS1" :
- (hpo_dp_le_state.link_mode == 1) ? "TPS2" :
- (hpo_dp_le_state.link_mode == 2) ? "ACTIVE" : "TEST",
- hpo_dp_le_state.lane_count,
- hpo_dp_le_state.stream_src[0],
- hpo_dp_le_state.slot_count[0],
- hpo_dp_le_state.vc_rate_x[0],
- hpo_dp_le_state.vc_rate_y[0]);
- DTN_INFO("\n");
- }
- }
-
- DTN_INFO("\n");
- }
- }
-
- DTN_INFO_END();
-}
-
-bool dcn10_did_underflow_occur(struct dc *dc, struct pipe_ctx *pipe_ctx)
-{
- struct hubp *hubp = pipe_ctx->plane_res.hubp;
- struct timing_generator *tg = pipe_ctx->stream_res.tg;
-
- if (tg->funcs->is_optc_underflow_occurred(tg)) {
- tg->funcs->clear_optc_underflow(tg);
- return true;
- }
-
- if (hubp->funcs->hubp_get_underflow_status(hubp)) {
- hubp->funcs->hubp_clear_underflow(hubp);
- return true;
- }
- return false;
-}
-
-void dcn10_enable_power_gating_plane(
- struct dce_hwseq *hws,
- bool enable)
-{
- bool force_on = true; /* disable power gating */
-
- if (enable)
- force_on = false;
-
- /* DCHUBP0/1/2/3 */
- REG_UPDATE(DOMAIN0_PG_CONFIG, DOMAIN0_POWER_FORCEON, force_on);
- REG_UPDATE(DOMAIN2_PG_CONFIG, DOMAIN2_POWER_FORCEON, force_on);
- REG_UPDATE(DOMAIN4_PG_CONFIG, DOMAIN4_POWER_FORCEON, force_on);
- REG_UPDATE(DOMAIN6_PG_CONFIG, DOMAIN6_POWER_FORCEON, force_on);
-
- /* DPP0/1/2/3 */
- REG_UPDATE(DOMAIN1_PG_CONFIG, DOMAIN1_POWER_FORCEON, force_on);
- REG_UPDATE(DOMAIN3_PG_CONFIG, DOMAIN3_POWER_FORCEON, force_on);
- REG_UPDATE(DOMAIN5_PG_CONFIG, DOMAIN5_POWER_FORCEON, force_on);
- REG_UPDATE(DOMAIN7_PG_CONFIG, DOMAIN7_POWER_FORCEON, force_on);
-}
-
-void dcn10_disable_vga(
- struct dce_hwseq *hws)
-{
- unsigned int in_vga1_mode = 0;
- unsigned int in_vga2_mode = 0;
- unsigned int in_vga3_mode = 0;
- unsigned int in_vga4_mode = 0;
-
- REG_GET(D1VGA_CONTROL, D1VGA_MODE_ENABLE, &in_vga1_mode);
- REG_GET(D2VGA_CONTROL, D2VGA_MODE_ENABLE, &in_vga2_mode);
- REG_GET(D3VGA_CONTROL, D3VGA_MODE_ENABLE, &in_vga3_mode);
- REG_GET(D4VGA_CONTROL, D4VGA_MODE_ENABLE, &in_vga4_mode);
-
- if (in_vga1_mode == 0 && in_vga2_mode == 0 &&
- in_vga3_mode == 0 && in_vga4_mode == 0)
- return;
-
- REG_WRITE(D1VGA_CONTROL, 0);
- REG_WRITE(D2VGA_CONTROL, 0);
- REG_WRITE(D3VGA_CONTROL, 0);
- REG_WRITE(D4VGA_CONTROL, 0);
-
- /* HW Engineer's Notes:
- * During switch from vga->extended, if we set the VGA_TEST_ENABLE and
- * then hit the VGA_TEST_RENDER_START, then the DCHUBP timing gets updated correctly.
- *
- * Then vBIOS will have it poll for the VGA_TEST_RENDER_DONE and unset
- * VGA_TEST_ENABLE, to leave it in the same state as before.
- */
- REG_UPDATE(VGA_TEST_CONTROL, VGA_TEST_ENABLE, 1);
- REG_UPDATE(VGA_TEST_CONTROL, VGA_TEST_RENDER_START, 1);
-}
-
-/**
- * dcn10_dpp_pg_control - DPP power gate control.
- *
- * @hws: dce_hwseq reference.
- * @dpp_inst: DPP instance reference.
- * @power_on: true if we want to enable power gate, false otherwise.
- *
- * Enable or disable power gate in the specific DPP instance.
- */
-void dcn10_dpp_pg_control(
- struct dce_hwseq *hws,
- unsigned int dpp_inst,
- bool power_on)
-{
- uint32_t power_gate = power_on ? 0 : 1;
- uint32_t pwr_status = power_on ? PGFSM_POWER_ON : PGFSM_POWER_OFF;
-
- if (hws->ctx->dc->debug.disable_dpp_power_gate)
- return;
- if (REG(DOMAIN1_PG_CONFIG) == 0)
- return;
-
- switch (dpp_inst) {
- case 0: /* DPP0 */
- REG_UPDATE(DOMAIN1_PG_CONFIG,
- DOMAIN1_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN1_PG_STATUS,
- DOMAIN1_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- case 1: /* DPP1 */
- REG_UPDATE(DOMAIN3_PG_CONFIG,
- DOMAIN3_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN3_PG_STATUS,
- DOMAIN3_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- case 2: /* DPP2 */
- REG_UPDATE(DOMAIN5_PG_CONFIG,
- DOMAIN5_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN5_PG_STATUS,
- DOMAIN5_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- case 3: /* DPP3 */
- REG_UPDATE(DOMAIN7_PG_CONFIG,
- DOMAIN7_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN7_PG_STATUS,
- DOMAIN7_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- default:
- BREAK_TO_DEBUGGER();
- break;
- }
-}
-
-/**
- * dcn10_hubp_pg_control - HUBP power gate control.
- *
- * @hws: dce_hwseq reference.
- * @hubp_inst: DPP instance reference.
- * @power_on: true if we want to enable power gate, false otherwise.
- *
- * Enable or disable power gate in the specific HUBP instance.
- */
-void dcn10_hubp_pg_control(
- struct dce_hwseq *hws,
- unsigned int hubp_inst,
- bool power_on)
-{
- uint32_t power_gate = power_on ? 0 : 1;
- uint32_t pwr_status = power_on ? PGFSM_POWER_ON : PGFSM_POWER_OFF;
-
- if (hws->ctx->dc->debug.disable_hubp_power_gate)
- return;
- if (REG(DOMAIN0_PG_CONFIG) == 0)
- return;
-
- switch (hubp_inst) {
- case 0: /* DCHUBP0 */
- REG_UPDATE(DOMAIN0_PG_CONFIG,
- DOMAIN0_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN0_PG_STATUS,
- DOMAIN0_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- case 1: /* DCHUBP1 */
- REG_UPDATE(DOMAIN2_PG_CONFIG,
- DOMAIN2_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN2_PG_STATUS,
- DOMAIN2_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- case 2: /* DCHUBP2 */
- REG_UPDATE(DOMAIN4_PG_CONFIG,
- DOMAIN4_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN4_PG_STATUS,
- DOMAIN4_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- case 3: /* DCHUBP3 */
- REG_UPDATE(DOMAIN6_PG_CONFIG,
- DOMAIN6_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN6_PG_STATUS,
- DOMAIN6_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- default:
- BREAK_TO_DEBUGGER();
- break;
- }
-}
-
-static void power_on_plane(
- struct dce_hwseq *hws,
- int plane_id)
-{
- DC_LOGGER_INIT(hws->ctx->logger);
- if (REG(DC_IP_REQUEST_CNTL)) {
- REG_SET(DC_IP_REQUEST_CNTL, 0,
- IP_REQUEST_EN, 1);
-
- if (hws->funcs.dpp_pg_control)
- hws->funcs.dpp_pg_control(hws, plane_id, true);
-
- if (hws->funcs.hubp_pg_control)
- hws->funcs.hubp_pg_control(hws, plane_id, true);
-
- REG_SET(DC_IP_REQUEST_CNTL, 0,
- IP_REQUEST_EN, 0);
- DC_LOG_DEBUG(
- "Un-gated front end for pipe %d\n", plane_id);
- }
-}
-
-static void undo_DEGVIDCN10_253_wa(struct dc *dc)
-{
- struct dce_hwseq *hws = dc->hwseq;
- struct hubp *hubp = dc->res_pool->hubps[0];
-
- if (!hws->wa_state.DEGVIDCN10_253_applied)
- return;
-
- hubp->funcs->set_blank(hubp, true);
-
- REG_SET(DC_IP_REQUEST_CNTL, 0,
- IP_REQUEST_EN, 1);
-
- hws->funcs.hubp_pg_control(hws, 0, false);
- REG_SET(DC_IP_REQUEST_CNTL, 0,
- IP_REQUEST_EN, 0);
-
- hws->wa_state.DEGVIDCN10_253_applied = false;
-}
-
-static void apply_DEGVIDCN10_253_wa(struct dc *dc)
-{
- struct dce_hwseq *hws = dc->hwseq;
- struct hubp *hubp = dc->res_pool->hubps[0];
- int i;
-
- if (dc->debug.disable_stutter)
- return;
-
- if (!hws->wa.DEGVIDCN10_253)
- return;
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- if (!dc->res_pool->hubps[i]->power_gated)
- return;
- }
-
- /* all pipe power gated, apply work around to enable stutter. */
-
- REG_SET(DC_IP_REQUEST_CNTL, 0,
- IP_REQUEST_EN, 1);
-
- hws->funcs.hubp_pg_control(hws, 0, true);
- REG_SET(DC_IP_REQUEST_CNTL, 0,
- IP_REQUEST_EN, 0);
-
- hubp->funcs->set_hubp_blank_en(hubp, false);
- hws->wa_state.DEGVIDCN10_253_applied = true;
-}
-
-void dcn10_bios_golden_init(struct dc *dc)
-{
- struct dce_hwseq *hws = dc->hwseq;
- struct dc_bios *bp = dc->ctx->dc_bios;
- int i;
- bool allow_self_fresh_force_enable = true;
-
- if (hws->funcs.s0i3_golden_init_wa && hws->funcs.s0i3_golden_init_wa(dc))
- return;
-
- if (dc->res_pool->hubbub->funcs->is_allow_self_refresh_enabled)
- allow_self_fresh_force_enable =
- dc->res_pool->hubbub->funcs->is_allow_self_refresh_enabled(dc->res_pool->hubbub);
-
-
- /* WA for making DF sleep when idle after resume from S0i3.
- * DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_ENABLE is set to 1 by
- * command table, if DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_ENABLE = 0
- * before calling command table and it changed to 1 after,
- * it should be set back to 0.
- */
-
- /* initialize dcn global */
- bp->funcs->enable_disp_power_gating(bp,
- CONTROLLER_ID_D0, ASIC_PIPE_INIT);
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- /* initialize dcn per pipe */
- bp->funcs->enable_disp_power_gating(bp,
- CONTROLLER_ID_D0 + i, ASIC_PIPE_DISABLE);
- }
-
- if (dc->res_pool->hubbub->funcs->allow_self_refresh_control)
- if (allow_self_fresh_force_enable == false &&
- dc->res_pool->hubbub->funcs->is_allow_self_refresh_enabled(dc->res_pool->hubbub))
- dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub,
- !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter);
-
-}
-
-static void false_optc_underflow_wa(
- struct dc *dc,
- const struct dc_stream_state *stream,
- struct timing_generator *tg)
-{
- int i;
- bool underflow;
-
- if (!dc->hwseq->wa.false_optc_underflow)
- return;
-
- underflow = tg->funcs->is_optc_underflow_occurred(tg);
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
-
- if (old_pipe_ctx->stream != stream)
- continue;
-
- dc->hwss.wait_for_mpcc_disconnect(dc, dc->res_pool, old_pipe_ctx);
- }
-
- if (tg->funcs->set_blank_data_double_buffer)
- tg->funcs->set_blank_data_double_buffer(tg, true);
-
- if (tg->funcs->is_optc_underflow_occurred(tg) && !underflow)
- tg->funcs->clear_optc_underflow(tg);
-}
-
-enum dc_status dcn10_enable_stream_timing(
- struct pipe_ctx *pipe_ctx,
- struct dc_state *context,
- struct dc *dc)
-{
- struct dc_stream_state *stream = pipe_ctx->stream;
- enum dc_color_space color_space;
- struct tg_color black_color = {0};
-
- /* by upper caller loop, pipe0 is parent pipe and be called first.
- * back end is set up by for pipe0. Other children pipe share back end
- * with pipe 0. No program is needed.
- */
- if (pipe_ctx->top_pipe != NULL)
- return DC_OK;
-
- /* TODO check if timing_changed, disable stream if timing changed */
-
- /* HW program guide assume display already disable
- * by unplug sequence. OTG assume stop.
- */
- pipe_ctx->stream_res.tg->funcs->enable_optc_clock(pipe_ctx->stream_res.tg, true);
-
- if (false == pipe_ctx->clock_source->funcs->program_pix_clk(
- pipe_ctx->clock_source,
- &pipe_ctx->stream_res.pix_clk_params,
- &pipe_ctx->pll_settings)) {
- BREAK_TO_DEBUGGER();
- return DC_ERROR_UNEXPECTED;
- }
-
- pipe_ctx->stream_res.tg->funcs->program_timing(
- pipe_ctx->stream_res.tg,
- &stream->timing,
- pipe_ctx->pipe_dlg_param.vready_offset,
- pipe_ctx->pipe_dlg_param.vstartup_start,
- pipe_ctx->pipe_dlg_param.vupdate_offset,
- pipe_ctx->pipe_dlg_param.vupdate_width,
- pipe_ctx->stream->signal,
- true);
-
-#if 0 /* move to after enable_crtc */
- /* TODO: OPP FMT, ABM. etc. should be done here. */
- /* or FPGA now. instance 0 only. TODO: move to opp.c */
-
- inst_offset = reg_offsets[pipe_ctx->stream_res.tg->inst].fmt;
-
- pipe_ctx->stream_res.opp->funcs->opp_program_fmt(
- pipe_ctx->stream_res.opp,
- &stream->bit_depth_params,
- &stream->clamping);
-#endif
- /* program otg blank color */
- color_space = stream->output_color_space;
- color_space_to_black_color(dc, color_space, &black_color);
-
- /*
- * The way 420 is packed, 2 channels carry Y component, 1 channel
- * alternate between Cb and Cr, so both channels need the pixel
- * value for Y
- */
- if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420)
- black_color.color_r_cr = black_color.color_g_y;
-
- if (pipe_ctx->stream_res.tg->funcs->set_blank_color)
- pipe_ctx->stream_res.tg->funcs->set_blank_color(
- pipe_ctx->stream_res.tg,
- &black_color);
-
- if (pipe_ctx->stream_res.tg->funcs->is_blanked &&
- !pipe_ctx->stream_res.tg->funcs->is_blanked(pipe_ctx->stream_res.tg)) {
- pipe_ctx->stream_res.tg->funcs->set_blank(pipe_ctx->stream_res.tg, true);
- hwss_wait_for_blank_complete(pipe_ctx->stream_res.tg);
- false_optc_underflow_wa(dc, pipe_ctx->stream, pipe_ctx->stream_res.tg);
- }
-
- /* VTG is within DCHUB command block. DCFCLK is always on */
- if (false == pipe_ctx->stream_res.tg->funcs->enable_crtc(pipe_ctx->stream_res.tg)) {
- BREAK_TO_DEBUGGER();
- return DC_ERROR_UNEXPECTED;
- }
-
- /* TODO program crtc source select for non-virtual signal*/
- /* TODO program FMT */
- /* TODO setup link_enc */
- /* TODO set stream attributes */
- /* TODO program audio */
- /* TODO enable stream if timing changed */
- /* TODO unblank stream if DP */
-
- return DC_OK;
-}
-
-static void dcn10_reset_back_end_for_pipe(
- struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- struct dc_state *context)
-{
- int i;
- struct dc_link *link;
- DC_LOGGER_INIT(dc->ctx->logger);
- if (pipe_ctx->stream_res.stream_enc == NULL) {
- pipe_ctx->stream = NULL;
- return;
- }
-
- if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
- link = pipe_ctx->stream->link;
- /* DPMS may already disable or */
- /* dpms_off status is incorrect due to fastboot
- * feature. When system resume from S4 with second
- * screen only, the dpms_off would be true but
- * VBIOS lit up eDP, so check link status too.
- */
- if (!pipe_ctx->stream->dpms_off || link->link_status.link_active)
- core_link_disable_stream(pipe_ctx);
- else if (pipe_ctx->stream_res.audio)
- dc->hwss.disable_audio_stream(pipe_ctx);
-
- if (pipe_ctx->stream_res.audio) {
- /*disable az_endpoint*/
- pipe_ctx->stream_res.audio->funcs->az_disable(pipe_ctx->stream_res.audio);
-
- /*free audio*/
- if (dc->caps.dynamic_audio == true) {
- /*we have to dynamic arbitrate the audio endpoints*/
- /*we free the resource, need reset is_audio_acquired*/
- update_audio_usage(&dc->current_state->res_ctx, dc->res_pool,
- pipe_ctx->stream_res.audio, false);
- pipe_ctx->stream_res.audio = NULL;
- }
- }
- }
-
- /* by upper caller loop, parent pipe: pipe0, will be reset last.
- * back end share by all pipes and will be disable only when disable
- * parent pipe.
- */
- if (pipe_ctx->top_pipe == NULL) {
-
- if (pipe_ctx->stream_res.abm)
- dc->hwss.set_abm_immediate_disable(pipe_ctx);
-
- pipe_ctx->stream_res.tg->funcs->disable_crtc(pipe_ctx->stream_res.tg);
-
- pipe_ctx->stream_res.tg->funcs->enable_optc_clock(pipe_ctx->stream_res.tg, false);
- if (pipe_ctx->stream_res.tg->funcs->set_drr)
- pipe_ctx->stream_res.tg->funcs->set_drr(
- pipe_ctx->stream_res.tg, NULL);
- }
-
- for (i = 0; i < dc->res_pool->pipe_count; i++)
- if (&dc->current_state->res_ctx.pipe_ctx[i] == pipe_ctx)
- break;
-
- if (i == dc->res_pool->pipe_count)
- return;
-
- pipe_ctx->stream = NULL;
- DC_LOG_DEBUG("Reset back end for pipe %d, tg:%d\n",
- pipe_ctx->pipe_idx, pipe_ctx->stream_res.tg->inst);
-}
-
-static bool dcn10_hw_wa_force_recovery(struct dc *dc)
-{
- struct hubp *hubp ;
- unsigned int i;
- bool need_recover = true;
-
- if (!dc->debug.recovery_enabled)
- return false;
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx =
- &dc->current_state->res_ctx.pipe_ctx[i];
- if (pipe_ctx != NULL) {
- hubp = pipe_ctx->plane_res.hubp;
- if (hubp != NULL && hubp->funcs->hubp_get_underflow_status) {
- if (hubp->funcs->hubp_get_underflow_status(hubp) != 0) {
- /* one pipe underflow, we will reset all the pipes*/
- need_recover = true;
- }
- }
- }
- }
- if (!need_recover)
- return false;
- /*
- DCHUBP_CNTL:HUBP_BLANK_EN=1
- DCHUBBUB_SOFT_RESET:DCHUBBUB_GLOBAL_SOFT_RESET=1
- DCHUBP_CNTL:HUBP_DISABLE=1
- DCHUBP_CNTL:HUBP_DISABLE=0
- DCHUBBUB_SOFT_RESET:DCHUBBUB_GLOBAL_SOFT_RESET=0
- DCSURF_PRIMARY_SURFACE_ADDRESS
- DCHUBP_CNTL:HUBP_BLANK_EN=0
- */
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx =
- &dc->current_state->res_ctx.pipe_ctx[i];
- if (pipe_ctx != NULL) {
- hubp = pipe_ctx->plane_res.hubp;
- /*DCHUBP_CNTL:HUBP_BLANK_EN=1*/
- if (hubp != NULL && hubp->funcs->set_hubp_blank_en)
- hubp->funcs->set_hubp_blank_en(hubp, true);
- }
- }
- /*DCHUBBUB_SOFT_RESET:DCHUBBUB_GLOBAL_SOFT_RESET=1*/
- hubbub1_soft_reset(dc->res_pool->hubbub, true);
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx =
- &dc->current_state->res_ctx.pipe_ctx[i];
- if (pipe_ctx != NULL) {
- hubp = pipe_ctx->plane_res.hubp;
- /*DCHUBP_CNTL:HUBP_DISABLE=1*/
- if (hubp != NULL && hubp->funcs->hubp_disable_control)
- hubp->funcs->hubp_disable_control(hubp, true);
- }
- }
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx =
- &dc->current_state->res_ctx.pipe_ctx[i];
- if (pipe_ctx != NULL) {
- hubp = pipe_ctx->plane_res.hubp;
- /*DCHUBP_CNTL:HUBP_DISABLE=0*/
- if (hubp != NULL && hubp->funcs->hubp_disable_control)
- hubp->funcs->hubp_disable_control(hubp, true);
- }
- }
- /*DCHUBBUB_SOFT_RESET:DCHUBBUB_GLOBAL_SOFT_RESET=0*/
- hubbub1_soft_reset(dc->res_pool->hubbub, false);
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx =
- &dc->current_state->res_ctx.pipe_ctx[i];
- if (pipe_ctx != NULL) {
- hubp = pipe_ctx->plane_res.hubp;
- /*DCHUBP_CNTL:HUBP_BLANK_EN=0*/
- if (hubp != NULL && hubp->funcs->set_hubp_blank_en)
- hubp->funcs->set_hubp_blank_en(hubp, true);
- }
- }
- return true;
-
-}
-
-void dcn10_verify_allow_pstate_change_high(struct dc *dc)
-{
- static bool should_log_hw_state; /* prevent hw state log by default */
-
- if (!hubbub1_verify_allow_pstate_change_high(dc->res_pool->hubbub)) {
- int i = 0;
-
- if (should_log_hw_state)
- dcn10_log_hw_state(dc, NULL);
-
- TRACE_DC_PIPE_STATE(pipe_ctx, i, MAX_PIPES);
- BREAK_TO_DEBUGGER();
- if (dcn10_hw_wa_force_recovery(dc)) {
- /*check again*/
- if (!hubbub1_verify_allow_pstate_change_high(dc->res_pool->hubbub))
- BREAK_TO_DEBUGGER();
- }
- }
-}
-
-/* trigger HW to start disconnect plane from stream on the next vsync */
-void dcn10_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx)
-{
- struct dce_hwseq *hws = dc->hwseq;
- struct hubp *hubp = pipe_ctx->plane_res.hubp;
- int dpp_id = pipe_ctx->plane_res.dpp->inst;
- struct mpc *mpc = dc->res_pool->mpc;
- struct mpc_tree *mpc_tree_params;
- struct mpcc *mpcc_to_remove = NULL;
- struct output_pixel_processor *opp = pipe_ctx->stream_res.opp;
-
- mpc_tree_params = &(opp->mpc_tree_params);
- mpcc_to_remove = mpc->funcs->get_mpcc_for_dpp(mpc_tree_params, dpp_id);
-
- /*Already reset*/
- if (mpcc_to_remove == NULL)
- return;
-
- mpc->funcs->remove_mpcc(mpc, mpc_tree_params, mpcc_to_remove);
- if (opp != NULL)
- opp->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
-
- dc->optimized_required = true;
-
- if (hubp->funcs->hubp_disconnect)
- hubp->funcs->hubp_disconnect(hubp);
-
- if (dc->debug.sanity_checks)
- hws->funcs.verify_allow_pstate_change_high(dc);
-}
-
-/**
- * dcn10_plane_atomic_power_down - Power down plane components.
- *
- * @dc: dc struct reference. used for grab hwseq.
- * @dpp: dpp struct reference.
- * @hubp: hubp struct reference.
- *
- * Keep in mind that this operation requires a power gate configuration;
- * however, requests for switch power gate are precisely controlled to avoid
- * problems. For this reason, power gate request is usually disabled. This
- * function first needs to enable the power gate request before disabling DPP
- * and HUBP. Finally, it disables the power gate request again.
- */
-void dcn10_plane_atomic_power_down(struct dc *dc,
- struct dpp *dpp,
- struct hubp *hubp)
-{
- struct dce_hwseq *hws = dc->hwseq;
- DC_LOGGER_INIT(dc->ctx->logger);
-
- if (REG(DC_IP_REQUEST_CNTL)) {
- REG_SET(DC_IP_REQUEST_CNTL, 0,
- IP_REQUEST_EN, 1);
-
- if (hws->funcs.dpp_pg_control)
- hws->funcs.dpp_pg_control(hws, dpp->inst, false);
-
- if (hws->funcs.hubp_pg_control)
- hws->funcs.hubp_pg_control(hws, hubp->inst, false);
-
- dpp->funcs->dpp_reset(dpp);
- REG_SET(DC_IP_REQUEST_CNTL, 0,
- IP_REQUEST_EN, 0);
- DC_LOG_DEBUG(
- "Power gated front end %d\n", hubp->inst);
- }
-}
-
-/* disable HW used by plane.
- * note: cannot disable until disconnect is complete
- */
-void dcn10_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
-{
- struct dce_hwseq *hws = dc->hwseq;
- struct hubp *hubp = pipe_ctx->plane_res.hubp;
- struct dpp *dpp = pipe_ctx->plane_res.dpp;
- int opp_id = hubp->opp_id;
-
- dc->hwss.wait_for_mpcc_disconnect(dc, dc->res_pool, pipe_ctx);
-
- hubp->funcs->hubp_clk_cntl(hubp, false);
-
- dpp->funcs->dpp_dppclk_control(dpp, false, false);
-
- if (opp_id != 0xf && pipe_ctx->stream_res.opp->mpc_tree_params.opp_list == NULL)
- pipe_ctx->stream_res.opp->funcs->opp_pipe_clock_control(
- pipe_ctx->stream_res.opp,
- false);
-
- hubp->power_gated = true;
- dc->optimized_required = false; /* We're powering off, no need to optimize */
-
- hws->funcs.plane_atomic_power_down(dc,
- pipe_ctx->plane_res.dpp,
- pipe_ctx->plane_res.hubp);
-
- pipe_ctx->stream = NULL;
- memset(&pipe_ctx->stream_res, 0, sizeof(pipe_ctx->stream_res));
- memset(&pipe_ctx->plane_res, 0, sizeof(pipe_ctx->plane_res));
- pipe_ctx->top_pipe = NULL;
- pipe_ctx->bottom_pipe = NULL;
- pipe_ctx->plane_state = NULL;
-}
-
-void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx)
-{
- struct dce_hwseq *hws = dc->hwseq;
- DC_LOGGER_INIT(dc->ctx->logger);
-
- if (!pipe_ctx->plane_res.hubp || pipe_ctx->plane_res.hubp->power_gated)
- return;
-
- hws->funcs.plane_atomic_disable(dc, pipe_ctx);
-
- apply_DEGVIDCN10_253_wa(dc);
-
- DC_LOG_DC("Power down front end %d\n",
- pipe_ctx->pipe_idx);
-}
-
-void dcn10_init_pipes(struct dc *dc, struct dc_state *context)
-{
- int i;
- struct dce_hwseq *hws = dc->hwseq;
- bool can_apply_seamless_boot = false;
-
- for (i = 0; i < context->stream_count; i++) {
- if (context->streams[i]->apply_seamless_boot_optimization) {
- can_apply_seamless_boot = true;
- break;
- }
- }
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct timing_generator *tg = dc->res_pool->timing_generators[i];
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-
- /* There is assumption that pipe_ctx is not mapping irregularly
- * to non-preferred front end. If pipe_ctx->stream is not NULL,
- * we will use the pipe, so don't disable
- */
- if (pipe_ctx->stream != NULL && can_apply_seamless_boot)
- continue;
-
- /* Blank controller using driver code instead of
- * command table.
- */
- if (tg->funcs->is_tg_enabled(tg)) {
- if (hws->funcs.init_blank != NULL) {
- hws->funcs.init_blank(dc, tg);
- tg->funcs->lock(tg);
- } else {
- tg->funcs->lock(tg);
- tg->funcs->set_blank(tg, true);
- hwss_wait_for_blank_complete(tg);
- }
- }
- }
-
- /* num_opp will be equal to number of mpcc */
- for (i = 0; i < dc->res_pool->res_cap->num_opp; i++) {
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-
- /* Cannot reset the MPC mux if seamless boot */
- if (pipe_ctx->stream != NULL && can_apply_seamless_boot)
- continue;
-
- dc->res_pool->mpc->funcs->mpc_init_single_inst(
- dc->res_pool->mpc, i);
- }
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct timing_generator *tg = dc->res_pool->timing_generators[i];
- struct hubp *hubp = dc->res_pool->hubps[i];
- struct dpp *dpp = dc->res_pool->dpps[i];
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-
- /* There is assumption that pipe_ctx is not mapping irregularly
- * to non-preferred front end. If pipe_ctx->stream is not NULL,
- * we will use the pipe, so don't disable
- */
- if (can_apply_seamless_boot &&
- pipe_ctx->stream != NULL &&
- pipe_ctx->stream_res.tg->funcs->is_tg_enabled(
- pipe_ctx->stream_res.tg)) {
- // Enable double buffering for OTG_BLANK no matter if
- // seamless boot is enabled or not to suppress global sync
- // signals when OTG blanked. This is to prevent pipe from
- // requesting data while in PSR.
- tg->funcs->tg_init(tg);
- hubp->power_gated = true;
- continue;
- }
-
- /* Disable on the current state so the new one isn't cleared. */
- pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
-
- dpp->funcs->dpp_reset(dpp);
-
- pipe_ctx->stream_res.tg = tg;
- pipe_ctx->pipe_idx = i;
-
- pipe_ctx->plane_res.hubp = hubp;
- pipe_ctx->plane_res.dpp = dpp;
- pipe_ctx->plane_res.mpcc_inst = dpp->inst;
- hubp->mpcc_id = dpp->inst;
- hubp->opp_id = OPP_ID_INVALID;
- hubp->power_gated = false;
-
- dc->res_pool->opps[i]->mpc_tree_params.opp_id = dc->res_pool->opps[i]->inst;
- dc->res_pool->opps[i]->mpc_tree_params.opp_list = NULL;
- dc->res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
- pipe_ctx->stream_res.opp = dc->res_pool->opps[i];
-
- hws->funcs.plane_atomic_disconnect(dc, pipe_ctx);
-
- if (tg->funcs->is_tg_enabled(tg))
- tg->funcs->unlock(tg);
-
- dc->hwss.disable_plane(dc, pipe_ctx);
-
- pipe_ctx->stream_res.tg = NULL;
- pipe_ctx->plane_res.hubp = NULL;
-
- tg->funcs->tg_init(tg);
- }
-}
-
-void dcn10_init_hw(struct dc *dc)
-{
- int i, j;
- struct abm *abm = dc->res_pool->abm;
- struct dmcu *dmcu = dc->res_pool->dmcu;
- struct dce_hwseq *hws = dc->hwseq;
- struct dc_bios *dcb = dc->ctx->dc_bios;
- struct resource_pool *res_pool = dc->res_pool;
- uint32_t backlight = MAX_BACKLIGHT_LEVEL;
- bool is_optimized_init_done = false;
-
- if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks)
- dc->clk_mgr->funcs->init_clocks(dc->clk_mgr);
-
- /* Align bw context with hw config when system resume. */
- if (dc->clk_mgr->clks.dispclk_khz != 0 && dc->clk_mgr->clks.dppclk_khz != 0) {
- dc->current_state->bw_ctx.bw.dcn.clk.dispclk_khz = dc->clk_mgr->clks.dispclk_khz;
- dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz = dc->clk_mgr->clks.dppclk_khz;
- }
-
- // Initialize the dccg
- if (dc->res_pool->dccg && dc->res_pool->dccg->funcs->dccg_init)
- dc->res_pool->dccg->funcs->dccg_init(res_pool->dccg);
-
- if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
-
- REG_WRITE(REFCLK_CNTL, 0);
- REG_UPDATE(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_ENABLE, 1);
- REG_WRITE(DIO_MEM_PWR_CTRL, 0);
-
- if (!dc->debug.disable_clock_gate) {
- /* enable all DCN clock gating */
- REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0);
-
- REG_WRITE(DCCG_GATE_DISABLE_CNTL2, 0);
-
- REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0);
- }
-
- //Enable ability to power gate / don't force power on permanently
- if (hws->funcs.enable_power_gating_plane)
- hws->funcs.enable_power_gating_plane(hws, true);
-
- return;
- }
-
- if (!dcb->funcs->is_accelerated_mode(dcb))
- hws->funcs.disable_vga(dc->hwseq);
-
- hws->funcs.bios_golden_init(dc);
-
- if (dc->ctx->dc_bios->fw_info_valid) {
- res_pool->ref_clocks.xtalin_clock_inKhz =
- dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency;
-
- if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
- if (res_pool->dccg && res_pool->hubbub) {
-
- (res_pool->dccg->funcs->get_dccg_ref_freq)(res_pool->dccg,
- dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency,
- &res_pool->ref_clocks.dccg_ref_clock_inKhz);
-
- (res_pool->hubbub->funcs->get_dchub_ref_freq)(res_pool->hubbub,
- res_pool->ref_clocks.dccg_ref_clock_inKhz,
- &res_pool->ref_clocks.dchub_ref_clock_inKhz);
- } else {
- // Not all ASICs have DCCG sw component
- res_pool->ref_clocks.dccg_ref_clock_inKhz =
- res_pool->ref_clocks.xtalin_clock_inKhz;
- res_pool->ref_clocks.dchub_ref_clock_inKhz =
- res_pool->ref_clocks.xtalin_clock_inKhz;
- }
- }
- } else
- ASSERT_CRITICAL(false);
-
- for (i = 0; i < dc->link_count; i++) {
- /* Power up AND update implementation according to the
- * required signal (which may be different from the
- * default signal on connector).
- */
- struct dc_link *link = dc->links[i];
-
- if (!is_optimized_init_done)
- link->link_enc->funcs->hw_init(link->link_enc);
-
- /* Check for enabled DIG to identify enabled display */
- if (link->link_enc->funcs->is_dig_enabled &&
- link->link_enc->funcs->is_dig_enabled(link->link_enc))
- link->link_status.link_active = true;
- }
-
- /* Power gate DSCs */
- if (!is_optimized_init_done) {
- for (i = 0; i < res_pool->res_cap->num_dsc; i++)
- if (hws->funcs.dsc_pg_control != NULL)
- hws->funcs.dsc_pg_control(hws, res_pool->dscs[i]->inst, false);
- }
-
- /* Enable outbox notification feature of dmub */
- if (dc->debug.enable_dmub_aux_for_legacy_ddc)
- dmub_enable_outbox_notification(dc);
-
- /* we want to turn off all dp displays before doing detection */
- if (dc->config.power_down_display_on_boot) {
- uint8_t dpcd_power_state = '\0';
- enum dc_status status = DC_ERROR_UNEXPECTED;
-
- for (i = 0; i < dc->link_count; i++) {
- if (dc->links[i]->connector_signal != SIGNAL_TYPE_DISPLAY_PORT)
- continue;
-
- /* DP 2.0 requires that LTTPR Caps be read first */
- dp_retrieve_lttpr_cap(dc->links[i]);
-
- /*
- * If any of the displays are lit up turn them off.
- * The reason is that some MST hubs cannot be turned off
- * completely until we tell them to do so.
- * If not turned off, then displays connected to MST hub
- * won't light up.
- */
- status = core_link_read_dpcd(dc->links[i], DP_SET_POWER,
- &dpcd_power_state, sizeof(dpcd_power_state));
- if (status == DC_OK && dpcd_power_state == DP_POWER_STATE_D0) {
- /* blank dp stream before power off receiver*/
- if (dc->links[i]->link_enc->funcs->get_dig_frontend) {
- unsigned int fe = dc->links[i]->link_enc->funcs->get_dig_frontend(dc->links[i]->link_enc);
-
- for (j = 0; j < dc->res_pool->stream_enc_count; j++) {
- if (fe == dc->res_pool->stream_enc[j]->id) {
- dc->res_pool->stream_enc[j]->funcs->dp_blank(dc->links[i],
- dc->res_pool->stream_enc[j]);
- break;
- }
- }
- }
- dp_receiver_power_ctrl(dc->links[i], false);
- }
- }
- }
-
- /* If taking control over from VBIOS, we may want to optimize our first
- * mode set, so we need to skip powering down pipes until we know which
- * pipes we want to use.
- * Otherwise, if taking control is not possible, we need to power
- * everything down.
- */
- if (dcb->funcs->is_accelerated_mode(dcb) || dc->config.power_down_display_on_boot) {
- if (!is_optimized_init_done) {
- hws->funcs.init_pipes(dc, dc->current_state);
- if (dc->res_pool->hubbub->funcs->allow_self_refresh_control)
- dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub,
- !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter);
- }
- }
-
- if (!is_optimized_init_done) {
-
- for (i = 0; i < res_pool->audio_count; i++) {
- struct audio *audio = res_pool->audios[i];
-
- audio->funcs->hw_init(audio);
- }
-
- for (i = 0; i < dc->link_count; i++) {
- struct dc_link *link = dc->links[i];
-
- if (link->panel_cntl)
- backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
- }
-
- if (abm != NULL)
- abm->funcs->abm_init(abm, backlight);
-
- if (dmcu != NULL && !dmcu->auto_load_dmcu)
- dmcu->funcs->dmcu_init(dmcu);
- }
-
- if (abm != NULL && dmcu != NULL)
- abm->dmcu_is_running = dmcu->funcs->is_dmcu_initialized(dmcu);
-
- /* power AFMT HDMI memory TODO: may move to dis/en output save power*/
- if (!is_optimized_init_done)
- REG_WRITE(DIO_MEM_PWR_CTRL, 0);
-
- if (!dc->debug.disable_clock_gate) {
- /* enable all DCN clock gating */
- REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0);
-
- REG_WRITE(DCCG_GATE_DISABLE_CNTL2, 0);
-
- REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0);
- }
- if (hws->funcs.enable_power_gating_plane)
- hws->funcs.enable_power_gating_plane(dc->hwseq, true);
-
- if (dc->clk_mgr->funcs->notify_wm_ranges)
- dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr);
-}
-
-/* In headless boot cases, DIG may be turned
- * on which causes HW/SW discrepancies.
- * To avoid this, power down hardware on boot
- * if DIG is turned on
- */
-void dcn10_power_down_on_boot(struct dc *dc)
-{
- struct dc_link *edp_links[MAX_NUM_EDP];
- struct dc_link *edp_link = NULL;
- int edp_num;
- int i = 0;
-
- get_edp_links(dc, edp_links, &edp_num);
- if (edp_num)
- edp_link = edp_links[0];
-
- if (edp_link && edp_link->link_enc->funcs->is_dig_enabled &&
- edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) &&
- dc->hwseq->funcs.edp_backlight_control &&
- dc->hwss.power_down &&
- dc->hwss.edp_power_control) {
- dc->hwseq->funcs.edp_backlight_control(edp_link, false);
- dc->hwss.power_down(dc);
- dc->hwss.edp_power_control(edp_link, false);
- } else {
- for (i = 0; i < dc->link_count; i++) {
- struct dc_link *link = dc->links[i];
-
- if (link->link_enc && link->link_enc->funcs->is_dig_enabled &&
- link->link_enc->funcs->is_dig_enabled(link->link_enc) &&
- dc->hwss.power_down) {
- dc->hwss.power_down(dc);
- break;
- }
-
- }
- }
-
- /*
- * Call update_clocks with empty context
- * to send DISPLAY_OFF
- * Otherwise DISPLAY_OFF may not be asserted
- */
- if (dc->clk_mgr->funcs->set_low_power_state)
- dc->clk_mgr->funcs->set_low_power_state(dc->clk_mgr);
-}
-
-void dcn10_reset_hw_ctx_wrap(
- struct dc *dc,
- struct dc_state *context)
-{
- int i;
- struct dce_hwseq *hws = dc->hwseq;
-
- /* Reset Back End*/
- for (i = dc->res_pool->pipe_count - 1; i >= 0 ; i--) {
- struct pipe_ctx *pipe_ctx_old =
- &dc->current_state->res_ctx.pipe_ctx[i];
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-
- if (!pipe_ctx_old->stream)
- continue;
-
- if (pipe_ctx_old->top_pipe)
- continue;
-
- if (!pipe_ctx->stream ||
- pipe_need_reprogram(pipe_ctx_old, pipe_ctx)) {
- struct clock_source *old_clk = pipe_ctx_old->clock_source;
-
- dcn10_reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state);
- if (hws->funcs.enable_stream_gating)
- hws->funcs.enable_stream_gating(dc, pipe_ctx_old);
- if (old_clk)
- old_clk->funcs->cs_power_down(old_clk);
- }
- }
-}
-
-static bool patch_address_for_sbs_tb_stereo(
- struct pipe_ctx *pipe_ctx, PHYSICAL_ADDRESS_LOC *addr)
-{
- struct dc_plane_state *plane_state = pipe_ctx->plane_state;
- bool sec_split = pipe_ctx->top_pipe &&
- pipe_ctx->top_pipe->plane_state == pipe_ctx->plane_state;
- if (sec_split && plane_state->address.type == PLN_ADDR_TYPE_GRPH_STEREO &&
- (pipe_ctx->stream->timing.timing_3d_format ==
- TIMING_3D_FORMAT_SIDE_BY_SIDE ||
- pipe_ctx->stream->timing.timing_3d_format ==
- TIMING_3D_FORMAT_TOP_AND_BOTTOM)) {
- *addr = plane_state->address.grph_stereo.left_addr;
- plane_state->address.grph_stereo.left_addr =
- plane_state->address.grph_stereo.right_addr;
- return true;
- } else {
- if (pipe_ctx->stream->view_format != VIEW_3D_FORMAT_NONE &&
- plane_state->address.type != PLN_ADDR_TYPE_GRPH_STEREO) {
- plane_state->address.type = PLN_ADDR_TYPE_GRPH_STEREO;
- plane_state->address.grph_stereo.right_addr =
- plane_state->address.grph_stereo.left_addr;
- plane_state->address.grph_stereo.right_meta_addr =
- plane_state->address.grph_stereo.left_meta_addr;
- }
- }
- return false;
-}
-
-void dcn10_update_plane_addr(const struct dc *dc, struct pipe_ctx *pipe_ctx)
-{
- bool addr_patched = false;
- PHYSICAL_ADDRESS_LOC addr;
- struct dc_plane_state *plane_state = pipe_ctx->plane_state;
-
- if (plane_state == NULL)
- return;
-
- addr_patched = patch_address_for_sbs_tb_stereo(pipe_ctx, &addr);
-
- pipe_ctx->plane_res.hubp->funcs->hubp_program_surface_flip_and_addr(
- pipe_ctx->plane_res.hubp,
- &plane_state->address,
- plane_state->flip_immediate);
-
- plane_state->status.requested_address = plane_state->address;
-
- if (plane_state->flip_immediate)
- plane_state->status.current_address = plane_state->address;
-
- if (addr_patched)
- pipe_ctx->plane_state->address.grph_stereo.left_addr = addr;
-}
-
-bool dcn10_set_input_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx,
- const struct dc_plane_state *plane_state)
-{
- struct dpp *dpp_base = pipe_ctx->plane_res.dpp;
- const struct dc_transfer_func *tf = NULL;
- bool result = true;
-
- if (dpp_base == NULL)
- return false;
-
- if (plane_state->in_transfer_func)
- tf = plane_state->in_transfer_func;
-
- if (plane_state->gamma_correction &&
- !dpp_base->ctx->dc->debug.always_use_regamma
- && !plane_state->gamma_correction->is_identity
- && dce_use_lut(plane_state->format))
- dpp_base->funcs->dpp_program_input_lut(dpp_base, plane_state->gamma_correction);
-
- if (tf == NULL)
- dpp_base->funcs->dpp_set_degamma(dpp_base, IPP_DEGAMMA_MODE_BYPASS);
- else if (tf->type == TF_TYPE_PREDEFINED) {
- switch (tf->tf) {
- case TRANSFER_FUNCTION_SRGB:
- dpp_base->funcs->dpp_set_degamma(dpp_base, IPP_DEGAMMA_MODE_HW_sRGB);
- break;
- case TRANSFER_FUNCTION_BT709:
- dpp_base->funcs->dpp_set_degamma(dpp_base, IPP_DEGAMMA_MODE_HW_xvYCC);
- break;
- case TRANSFER_FUNCTION_LINEAR:
- dpp_base->funcs->dpp_set_degamma(dpp_base, IPP_DEGAMMA_MODE_BYPASS);
- break;
- case TRANSFER_FUNCTION_PQ:
- dpp_base->funcs->dpp_set_degamma(dpp_base, IPP_DEGAMMA_MODE_USER_PWL);
- cm_helper_translate_curve_to_degamma_hw_format(tf, &dpp_base->degamma_params);
- dpp_base->funcs->dpp_program_degamma_pwl(dpp_base, &dpp_base->degamma_params);
- result = true;
- break;
- default:
- result = false;
- break;
- }
- } else if (tf->type == TF_TYPE_BYPASS) {
- dpp_base->funcs->dpp_set_degamma(dpp_base, IPP_DEGAMMA_MODE_BYPASS);
- } else {
- cm_helper_translate_curve_to_degamma_hw_format(tf,
- &dpp_base->degamma_params);
- dpp_base->funcs->dpp_program_degamma_pwl(dpp_base,
- &dpp_base->degamma_params);
- result = true;
- }
-
- return result;
-}
-
-#define MAX_NUM_HW_POINTS 0x200
-
-static void log_tf(struct dc_context *ctx,
- struct dc_transfer_func *tf, uint32_t hw_points_num)
-{
- // DC_LOG_GAMMA is default logging of all hw points
- // DC_LOG_ALL_GAMMA logs all points, not only hw points
- // DC_LOG_ALL_TF_POINTS logs all channels of the tf
- int i = 0;
-
- DC_LOGGER_INIT(ctx->logger);
- DC_LOG_GAMMA("Gamma Correction TF");
- DC_LOG_ALL_GAMMA("Logging all tf points...");
- DC_LOG_ALL_TF_CHANNELS("Logging all channels...");
-
- for (i = 0; i < hw_points_num; i++) {
- DC_LOG_GAMMA("R\t%d\t%llu", i, tf->tf_pts.red[i].value);
- DC_LOG_ALL_TF_CHANNELS("G\t%d\t%llu", i, tf->tf_pts.green[i].value);
- DC_LOG_ALL_TF_CHANNELS("B\t%d\t%llu", i, tf->tf_pts.blue[i].value);
- }
-
- for (i = hw_points_num; i < MAX_NUM_HW_POINTS; i++) {
- DC_LOG_ALL_GAMMA("R\t%d\t%llu", i, tf->tf_pts.red[i].value);
- DC_LOG_ALL_TF_CHANNELS("G\t%d\t%llu", i, tf->tf_pts.green[i].value);
- DC_LOG_ALL_TF_CHANNELS("B\t%d\t%llu", i, tf->tf_pts.blue[i].value);
- }
-}
-
-bool dcn10_set_output_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx,
- const struct dc_stream_state *stream)
-{
- struct dpp *dpp = pipe_ctx->plane_res.dpp;
-
- if (dpp == NULL)
- return false;
-
- dpp->regamma_params.hw_points_num = GAMMA_HW_POINTS_NUM;
-
- if (stream->out_transfer_func &&
- stream->out_transfer_func->type == TF_TYPE_PREDEFINED &&
- stream->out_transfer_func->tf == TRANSFER_FUNCTION_SRGB)
- dpp->funcs->dpp_program_regamma_pwl(dpp, NULL, OPP_REGAMMA_SRGB);
-
- /* dcn10_translate_regamma_to_hw_format takes 750us, only do it when full
- * update.
- */
- else if (cm_helper_translate_curve_to_hw_format(
- stream->out_transfer_func,
- &dpp->regamma_params, false)) {
- dpp->funcs->dpp_program_regamma_pwl(
- dpp,
- &dpp->regamma_params, OPP_REGAMMA_USER);
- } else
- dpp->funcs->dpp_program_regamma_pwl(dpp, NULL, OPP_REGAMMA_BYPASS);
-
- if (stream != NULL && stream->ctx != NULL &&
- stream->out_transfer_func != NULL) {
- log_tf(stream->ctx,
- stream->out_transfer_func,
- dpp->regamma_params.hw_points_num);
- }
-
- return true;
-}
-
-void dcn10_pipe_control_lock(
- struct dc *dc,
- struct pipe_ctx *pipe,
- bool lock)
-{
- struct dce_hwseq *hws = dc->hwseq;
-
- /* use TG master update lock to lock everything on the TG
- * therefore only top pipe need to lock
- */
- if (!pipe || pipe->top_pipe)
- return;
-
- if (dc->debug.sanity_checks)
- hws->funcs.verify_allow_pstate_change_high(dc);
-
- if (lock)
- pipe->stream_res.tg->funcs->lock(pipe->stream_res.tg);
- else
- pipe->stream_res.tg->funcs->unlock(pipe->stream_res.tg);
-
- if (dc->debug.sanity_checks)
- hws->funcs.verify_allow_pstate_change_high(dc);
-}
-
-/**
- * delay_cursor_until_vupdate() - Delay cursor update if too close to VUPDATE.
- *
- * Software keepout workaround to prevent cursor update locking from stalling
- * out cursor updates indefinitely or from old values from being retained in
- * the case where the viewport changes in the same frame as the cursor.
- *
- * The idea is to calculate the remaining time from VPOS to VUPDATE. If it's
- * too close to VUPDATE, then stall out until VUPDATE finishes.
- *
- * TODO: Optimize cursor programming to be once per frame before VUPDATE
- * to avoid the need for this workaround.
- */
-static void delay_cursor_until_vupdate(struct dc *dc, struct pipe_ctx *pipe_ctx)
-{
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct crtc_position position;
- uint32_t vupdate_start, vupdate_end;
- unsigned int lines_to_vupdate, us_to_vupdate, vpos;
- unsigned int us_per_line, us_vupdate;
-
- if (!dc->hwss.calc_vupdate_position || !dc->hwss.get_position)
- return;
-
- if (!pipe_ctx->stream_res.stream_enc || !pipe_ctx->stream_res.tg)
- return;
-
- dc->hwss.calc_vupdate_position(dc, pipe_ctx, &vupdate_start,
- &vupdate_end);
-
- dc->hwss.get_position(&pipe_ctx, 1, &position);
- vpos = position.vertical_count;
-
- /* Avoid wraparound calculation issues */
- vupdate_start += stream->timing.v_total;
- vupdate_end += stream->timing.v_total;
- vpos += stream->timing.v_total;
-
- if (vpos <= vupdate_start) {
- /* VPOS is in VACTIVE or back porch. */
- lines_to_vupdate = vupdate_start - vpos;
- } else if (vpos > vupdate_end) {
- /* VPOS is in the front porch. */
- return;
- } else {
- /* VPOS is in VUPDATE. */
- lines_to_vupdate = 0;
- }
-
- /* Calculate time until VUPDATE in microseconds. */
- us_per_line =
- stream->timing.h_total * 10000u / stream->timing.pix_clk_100hz;
- us_to_vupdate = lines_to_vupdate * us_per_line;
-
- /* 70 us is a conservative estimate of cursor update time*/
- if (us_to_vupdate > 70)
- return;
-
- /* Stall out until the cursor update completes. */
- if (vupdate_end < vupdate_start)
- vupdate_end += stream->timing.v_total;
- us_vupdate = (vupdate_end - vupdate_start + 1) * us_per_line;
- udelay(us_to_vupdate + us_vupdate);
-}
-
-void dcn10_cursor_lock(struct dc *dc, struct pipe_ctx *pipe, bool lock)
-{
- /* cursor lock is per MPCC tree, so only need to lock one pipe per stream */
- if (!pipe || pipe->top_pipe)
- return;
-
- /* Prevent cursor lock from stalling out cursor updates. */
- if (lock)
- delay_cursor_until_vupdate(dc, pipe);
-
- if (pipe->stream && should_use_dmub_lock(pipe->stream->link)) {
- union dmub_hw_lock_flags hw_locks = { 0 };
- struct dmub_hw_lock_inst_flags inst_flags = { 0 };
-
- hw_locks.bits.lock_cursor = 1;
- inst_flags.opp_inst = pipe->stream_res.opp->inst;
-
- dmub_hw_lock_mgr_cmd(dc->ctx->dmub_srv,
- lock,
- &hw_locks,
- &inst_flags);
- } else
- dc->res_pool->mpc->funcs->cursor_lock(dc->res_pool->mpc,
- pipe->stream_res.opp->inst, lock);
-}
-
-static bool wait_for_reset_trigger_to_occur(
- struct dc_context *dc_ctx,
- struct timing_generator *tg)
-{
- bool rc = false;
-
- /* To avoid endless loop we wait at most
- * frames_to_wait_on_triggered_reset frames for the reset to occur. */
- const uint32_t frames_to_wait_on_triggered_reset = 10;
- int i;
-
- for (i = 0; i < frames_to_wait_on_triggered_reset; i++) {
-
- if (!tg->funcs->is_counter_moving(tg)) {
- DC_ERROR("TG counter is not moving!\n");
- break;
- }
-
- if (tg->funcs->did_triggered_reset_occur(tg)) {
- rc = true;
- /* usually occurs at i=1 */
- DC_SYNC_INFO("GSL: reset occurred at wait count: %d\n",
- i);
- break;
- }
-
- /* Wait for one frame. */
- tg->funcs->wait_for_state(tg, CRTC_STATE_VACTIVE);
- tg->funcs->wait_for_state(tg, CRTC_STATE_VBLANK);
- }
-
- if (false == rc)
- DC_ERROR("GSL: Timeout on reset trigger!\n");
-
- return rc;
-}
-
-uint64_t reduceSizeAndFraction(
- uint64_t *numerator,
- uint64_t *denominator,
- bool checkUint32Bounary)
-{
- int i;
- bool ret = checkUint32Bounary == false;
- uint64_t max_int32 = 0xffffffff;
- uint64_t num, denom;
- static const uint16_t prime_numbers[] = {
- 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43,
- 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103,
- 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163,
- 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227,
- 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281,
- 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353,
- 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421,
- 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487,
- 491, 499, 503, 509, 521, 523, 541, 547, 557, 563, 569,
- 571, 577, 587, 593, 599, 601, 607, 613, 617, 619, 631,
- 641, 643, 647, 653, 659, 661, 673, 677, 683, 691, 701,
- 709, 719, 727, 733, 739, 743, 751, 757, 761, 769, 773,
- 787, 797, 809, 811, 821, 823, 827, 829, 839, 853, 857,
- 859, 863, 877, 881, 883, 887, 907, 911, 919, 929, 937,
- 941, 947, 953, 967, 971, 977, 983, 991, 997};
- int count = ARRAY_SIZE(prime_numbers);
-
- num = *numerator;
- denom = *denominator;
- for (i = 0; i < count; i++) {
- uint32_t num_remainder, denom_remainder;
- uint64_t num_result, denom_result;
- if (checkUint32Bounary &&
- num <= max_int32 && denom <= max_int32) {
- ret = true;
- break;
- }
- do {
- num_result = div_u64_rem(num, prime_numbers[i], &num_remainder);
- denom_result = div_u64_rem(denom, prime_numbers[i], &denom_remainder);
- if (num_remainder == 0 && denom_remainder == 0) {
- num = num_result;
- denom = denom_result;
- }
- } while (num_remainder == 0 && denom_remainder == 0);
- }
- *numerator = num;
- *denominator = denom;
- return ret;
-}
-
-bool is_low_refresh_rate(struct pipe_ctx *pipe)
-{
- uint32_t master_pipe_refresh_rate =
- pipe->stream->timing.pix_clk_100hz * 100 /
- pipe->stream->timing.h_total /
- pipe->stream->timing.v_total;
- return master_pipe_refresh_rate <= 30;
-}
-
-uint8_t get_clock_divider(struct pipe_ctx *pipe, bool account_low_refresh_rate)
-{
- uint32_t clock_divider = 1;
- uint32_t numpipes = 1;
-
- if (account_low_refresh_rate && is_low_refresh_rate(pipe))
- clock_divider *= 2;
-
- if (pipe->stream_res.pix_clk_params.pixel_encoding == PIXEL_ENCODING_YCBCR420)
- clock_divider *= 2;
-
- while (pipe->next_odm_pipe) {
- pipe = pipe->next_odm_pipe;
- numpipes++;
- }
- clock_divider *= numpipes;
-
- return clock_divider;
-}
-
-int dcn10_align_pixel_clocks(
- struct dc *dc,
- int group_size,
- struct pipe_ctx *grouped_pipes[])
-{
- struct dc_context *dc_ctx = dc->ctx;
- int i, master = -1, embedded = -1;
- struct dc_crtc_timing hw_crtc_timing[MAX_PIPES] = {0};
- uint64_t phase[MAX_PIPES];
- uint64_t modulo[MAX_PIPES];
- unsigned int pclk;
-
- uint32_t embedded_pix_clk_100hz;
- uint16_t embedded_h_total;
- uint16_t embedded_v_total;
- bool clamshell_closed = false;
- uint32_t dp_ref_clk_100hz =
- dc->res_pool->dp_clock_source->ctx->dc->clk_mgr->dprefclk_khz*10;
-
- if (dc->config.vblank_alignment_dto_params &&
- dc->res_pool->dp_clock_source->funcs->override_dp_pix_clk) {
- clamshell_closed =
- (dc->config.vblank_alignment_dto_params >> 63);
- embedded_h_total =
- (dc->config.vblank_alignment_dto_params >> 32) & 0x7FFF;
- embedded_v_total =
- (dc->config.vblank_alignment_dto_params >> 48) & 0x7FFF;
- embedded_pix_clk_100hz =
- dc->config.vblank_alignment_dto_params & 0xFFFFFFFF;
-
- for (i = 0; i < group_size; i++) {
- grouped_pipes[i]->stream_res.tg->funcs->get_hw_timing(
- grouped_pipes[i]->stream_res.tg,
- &hw_crtc_timing[i]);
- dc->res_pool->dp_clock_source->funcs->get_pixel_clk_frequency_100hz(
- dc->res_pool->dp_clock_source,
- grouped_pipes[i]->stream_res.tg->inst,
- &pclk);
- hw_crtc_timing[i].pix_clk_100hz = pclk;
- if (dc_is_embedded_signal(
- grouped_pipes[i]->stream->signal)) {
- embedded = i;
- master = i;
- phase[i] = embedded_pix_clk_100hz*100;
- modulo[i] = dp_ref_clk_100hz*100;
- } else {
-
- phase[i] = (uint64_t)embedded_pix_clk_100hz*
- hw_crtc_timing[i].h_total*
- hw_crtc_timing[i].v_total;
- phase[i] = div_u64(phase[i], get_clock_divider(grouped_pipes[i], true));
- modulo[i] = (uint64_t)dp_ref_clk_100hz*
- embedded_h_total*
- embedded_v_total;
-
- if (reduceSizeAndFraction(&phase[i],
- &modulo[i], true) == false) {
- /*
- * this will help to stop reporting
- * this timing synchronizable
- */
- DC_SYNC_INFO("Failed to reduce DTO parameters\n");
- grouped_pipes[i]->stream->has_non_synchronizable_pclk = true;
- }
- }
- }
-
- for (i = 0; i < group_size; i++) {
- if (i != embedded && !grouped_pipes[i]->stream->has_non_synchronizable_pclk) {
- dc->res_pool->dp_clock_source->funcs->override_dp_pix_clk(
- dc->res_pool->dp_clock_source,
- grouped_pipes[i]->stream_res.tg->inst,
- phase[i], modulo[i]);
- dc->res_pool->dp_clock_source->funcs->get_pixel_clk_frequency_100hz(
- dc->res_pool->dp_clock_source,
- grouped_pipes[i]->stream_res.tg->inst, &pclk);
- grouped_pipes[i]->stream->timing.pix_clk_100hz =
- pclk*get_clock_divider(grouped_pipes[i], false);
- if (master == -1)
- master = i;
- }
- }
-
- }
- return master;
-}
-
-void dcn10_enable_vblanks_synchronization(
- struct dc *dc,
- int group_index,
- int group_size,
- struct pipe_ctx *grouped_pipes[])
-{
- struct dc_context *dc_ctx = dc->ctx;
- struct output_pixel_processor *opp;
- struct timing_generator *tg;
- int i, width, height, master;
-
- for (i = 1; i < group_size; i++) {
- opp = grouped_pipes[i]->stream_res.opp;
- tg = grouped_pipes[i]->stream_res.tg;
- tg->funcs->get_otg_active_size(tg, &width, &height);
- if (opp->funcs->opp_program_dpg_dimensions)
- opp->funcs->opp_program_dpg_dimensions(opp, width, 2*(height) + 1);
- }
-
- for (i = 0; i < group_size; i++) {
- if (grouped_pipes[i]->stream == NULL)
- continue;
- grouped_pipes[i]->stream->vblank_synchronized = false;
- grouped_pipes[i]->stream->has_non_synchronizable_pclk = false;
- }
-
- DC_SYNC_INFO("Aligning DP DTOs\n");
-
- master = dcn10_align_pixel_clocks(dc, group_size, grouped_pipes);
-
- DC_SYNC_INFO("Synchronizing VBlanks\n");
-
- if (master >= 0) {
- for (i = 0; i < group_size; i++) {
- if (i != master && !grouped_pipes[i]->stream->has_non_synchronizable_pclk)
- grouped_pipes[i]->stream_res.tg->funcs->align_vblanks(
- grouped_pipes[master]->stream_res.tg,
- grouped_pipes[i]->stream_res.tg,
- grouped_pipes[master]->stream->timing.pix_clk_100hz,
- grouped_pipes[i]->stream->timing.pix_clk_100hz,
- get_clock_divider(grouped_pipes[master], false),
- get_clock_divider(grouped_pipes[i], false));
- grouped_pipes[i]->stream->vblank_synchronized = true;
- }
- grouped_pipes[master]->stream->vblank_synchronized = true;
- DC_SYNC_INFO("Sync complete\n");
- }
-
- for (i = 1; i < group_size; i++) {
- opp = grouped_pipes[i]->stream_res.opp;
- tg = grouped_pipes[i]->stream_res.tg;
- tg->funcs->get_otg_active_size(tg, &width, &height);
- if (opp->funcs->opp_program_dpg_dimensions)
- opp->funcs->opp_program_dpg_dimensions(opp, width, height);
- }
-}
-
-void dcn10_enable_timing_synchronization(
- struct dc *dc,
- int group_index,
- int group_size,
- struct pipe_ctx *grouped_pipes[])
-{
- struct dc_context *dc_ctx = dc->ctx;
- struct output_pixel_processor *opp;
- struct timing_generator *tg;
- int i, width, height;
-
- DC_SYNC_INFO("Setting up OTG reset trigger\n");
-
- for (i = 1; i < group_size; i++) {
- opp = grouped_pipes[i]->stream_res.opp;
- tg = grouped_pipes[i]->stream_res.tg;
- tg->funcs->get_otg_active_size(tg, &width, &height);
- if (opp->funcs->opp_program_dpg_dimensions)
- opp->funcs->opp_program_dpg_dimensions(opp, width, 2*(height) + 1);
- }
-
- for (i = 0; i < group_size; i++) {
- if (grouped_pipes[i]->stream == NULL)
- continue;
- grouped_pipes[i]->stream->vblank_synchronized = false;
- }
-
- for (i = 1; i < group_size; i++)
- grouped_pipes[i]->stream_res.tg->funcs->enable_reset_trigger(
- grouped_pipes[i]->stream_res.tg,
- grouped_pipes[0]->stream_res.tg->inst);
-
- DC_SYNC_INFO("Waiting for trigger\n");
-
- /* Need to get only check 1 pipe for having reset as all the others are
- * synchronized. Look at last pipe programmed to reset.
- */
-
- wait_for_reset_trigger_to_occur(dc_ctx, grouped_pipes[1]->stream_res.tg);
- for (i = 1; i < group_size; i++)
- grouped_pipes[i]->stream_res.tg->funcs->disable_reset_trigger(
- grouped_pipes[i]->stream_res.tg);
-
- for (i = 1; i < group_size; i++) {
- opp = grouped_pipes[i]->stream_res.opp;
- tg = grouped_pipes[i]->stream_res.tg;
- tg->funcs->get_otg_active_size(tg, &width, &height);
- if (opp->funcs->opp_program_dpg_dimensions)
- opp->funcs->opp_program_dpg_dimensions(opp, width, height);
- }
-
- DC_SYNC_INFO("Sync complete\n");
-}
-
-void dcn10_enable_per_frame_crtc_position_reset(
- struct dc *dc,
- int group_size,
- struct pipe_ctx *grouped_pipes[])
-{
- struct dc_context *dc_ctx = dc->ctx;
- int i;
-
- DC_SYNC_INFO("Setting up\n");
- for (i = 0; i < group_size; i++)
- if (grouped_pipes[i]->stream_res.tg->funcs->enable_crtc_reset)
- grouped_pipes[i]->stream_res.tg->funcs->enable_crtc_reset(
- grouped_pipes[i]->stream_res.tg,
- 0,
- &grouped_pipes[i]->stream->triggered_crtc_reset);
-
- DC_SYNC_INFO("Waiting for trigger\n");
-
- for (i = 0; i < group_size; i++)
- wait_for_reset_trigger_to_occur(dc_ctx, grouped_pipes[i]->stream_res.tg);
-
- DC_SYNC_INFO("Multi-display sync is complete\n");
-}
-
-static void mmhub_read_vm_system_aperture_settings(struct dcn10_hubp *hubp1,
- struct vm_system_aperture_param *apt,
- struct dce_hwseq *hws)
-{
- PHYSICAL_ADDRESS_LOC physical_page_number;
- uint32_t logical_addr_low;
- uint32_t logical_addr_high;
-
- REG_GET(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
- PHYSICAL_PAGE_NUMBER_MSB, &physical_page_number.high_part);
- REG_GET(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
- PHYSICAL_PAGE_NUMBER_LSB, &physical_page_number.low_part);
-
- REG_GET(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
- LOGICAL_ADDR, &logical_addr_low);
-
- REG_GET(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
- LOGICAL_ADDR, &logical_addr_high);
-
- apt->sys_default.quad_part = physical_page_number.quad_part << 12;
- apt->sys_low.quad_part = (int64_t)logical_addr_low << 18;
- apt->sys_high.quad_part = (int64_t)logical_addr_high << 18;
-}
-
-/* Temporary read settings, future will get values from kmd directly */
-static void mmhub_read_vm_context0_settings(struct dcn10_hubp *hubp1,
- struct vm_context0_param *vm0,
- struct dce_hwseq *hws)
-{
- PHYSICAL_ADDRESS_LOC fb_base;
- PHYSICAL_ADDRESS_LOC fb_offset;
- uint32_t fb_base_value;
- uint32_t fb_offset_value;
-
- REG_GET(DCHUBBUB_SDPIF_FB_BASE, SDPIF_FB_BASE, &fb_base_value);
- REG_GET(DCHUBBUB_SDPIF_FB_OFFSET, SDPIF_FB_OFFSET, &fb_offset_value);
-
- REG_GET(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
- PAGE_DIRECTORY_ENTRY_HI32, &vm0->pte_base.high_part);
- REG_GET(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
- PAGE_DIRECTORY_ENTRY_LO32, &vm0->pte_base.low_part);
-
- REG_GET(VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
- LOGICAL_PAGE_NUMBER_HI4, &vm0->pte_start.high_part);
- REG_GET(VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
- LOGICAL_PAGE_NUMBER_LO32, &vm0->pte_start.low_part);
-
- REG_GET(VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
- LOGICAL_PAGE_NUMBER_HI4, &vm0->pte_end.high_part);
- REG_GET(VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
- LOGICAL_PAGE_NUMBER_LO32, &vm0->pte_end.low_part);
-
- REG_GET(VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
- PHYSICAL_PAGE_ADDR_HI4, &vm0->fault_default.high_part);
- REG_GET(VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
- PHYSICAL_PAGE_ADDR_LO32, &vm0->fault_default.low_part);
-
- /*
- * The values in VM_CONTEXT0_PAGE_TABLE_BASE_ADDR is in UMA space.
- * Therefore we need to do
- * DCN_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
- * - DCHUBBUB_SDPIF_FB_OFFSET + DCHUBBUB_SDPIF_FB_BASE
- */
- fb_base.quad_part = (uint64_t)fb_base_value << 24;
- fb_offset.quad_part = (uint64_t)fb_offset_value << 24;
- vm0->pte_base.quad_part += fb_base.quad_part;
- vm0->pte_base.quad_part -= fb_offset.quad_part;
-}
-
-
-void dcn10_program_pte_vm(struct dce_hwseq *hws, struct hubp *hubp)
-{
- struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp);
- struct vm_system_aperture_param apt = {0};
- struct vm_context0_param vm0 = {0};
-
- mmhub_read_vm_system_aperture_settings(hubp1, &apt, hws);
- mmhub_read_vm_context0_settings(hubp1, &vm0, hws);
-
- hubp->funcs->hubp_set_vm_system_aperture_settings(hubp, &apt);
- hubp->funcs->hubp_set_vm_context0_settings(hubp, &vm0);
-}
-
-static void dcn10_enable_plane(
- struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- struct dc_state *context)
-{
- struct dce_hwseq *hws = dc->hwseq;
-
- if (dc->debug.sanity_checks) {
- hws->funcs.verify_allow_pstate_change_high(dc);
- }
-
- undo_DEGVIDCN10_253_wa(dc);
-
- power_on_plane(dc->hwseq,
- pipe_ctx->plane_res.hubp->inst);
-
- /* enable DCFCLK current DCHUB */
- pipe_ctx->plane_res.hubp->funcs->hubp_clk_cntl(pipe_ctx->plane_res.hubp, true);
-
- /* make sure OPP_PIPE_CLOCK_EN = 1 */
- pipe_ctx->stream_res.opp->funcs->opp_pipe_clock_control(
- pipe_ctx->stream_res.opp,
- true);
-
- if (dc->config.gpu_vm_support)
- dcn10_program_pte_vm(hws, pipe_ctx->plane_res.hubp);
-
- if (dc->debug.sanity_checks) {
- hws->funcs.verify_allow_pstate_change_high(dc);
- }
-
- if (!pipe_ctx->top_pipe
- && pipe_ctx->plane_state
- && pipe_ctx->plane_state->flip_int_enabled
- && pipe_ctx->plane_res.hubp->funcs->hubp_set_flip_int)
- pipe_ctx->plane_res.hubp->funcs->hubp_set_flip_int(pipe_ctx->plane_res.hubp);
-
-}
-
-void dcn10_program_gamut_remap(struct pipe_ctx *pipe_ctx)
-{
- int i = 0;
- struct dpp_grph_csc_adjustment adjust;
- memset(&adjust, 0, sizeof(adjust));
- adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
-
-
- if (pipe_ctx->stream->gamut_remap_matrix.enable_remap == true) {
- adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW;
- for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++)
- adjust.temperature_matrix[i] =
- pipe_ctx->stream->gamut_remap_matrix.matrix[i];
- } else if (pipe_ctx->plane_state &&
- pipe_ctx->plane_state->gamut_remap_matrix.enable_remap == true) {
- adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW;
- for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++)
- adjust.temperature_matrix[i] =
- pipe_ctx->plane_state->gamut_remap_matrix.matrix[i];
- }
-
- pipe_ctx->plane_res.dpp->funcs->dpp_set_gamut_remap(pipe_ctx->plane_res.dpp, &adjust);
-}
-
-
-static bool dcn10_is_rear_mpo_fix_required(struct pipe_ctx *pipe_ctx, enum dc_color_space colorspace)
-{
- if (pipe_ctx->plane_state && pipe_ctx->plane_state->layer_index > 0 && is_rgb_cspace(colorspace)) {
- if (pipe_ctx->top_pipe) {
- struct pipe_ctx *top = pipe_ctx->top_pipe;
-
- while (top->top_pipe)
- top = top->top_pipe; // Traverse to top pipe_ctx
- if (top->plane_state && top->plane_state->layer_index == 0)
- return true; // Front MPO plane not hidden
- }
- }
- return false;
-}
-
-static void dcn10_set_csc_adjustment_rgb_mpo_fix(struct pipe_ctx *pipe_ctx, uint16_t *matrix)
-{
- // Override rear plane RGB bias to fix MPO brightness
- uint16_t rgb_bias = matrix[3];
-
- matrix[3] = 0;
- matrix[7] = 0;
- matrix[11] = 0;
- pipe_ctx->plane_res.dpp->funcs->dpp_set_csc_adjustment(pipe_ctx->plane_res.dpp, matrix);
- matrix[3] = rgb_bias;
- matrix[7] = rgb_bias;
- matrix[11] = rgb_bias;
-}
-
-void dcn10_program_output_csc(struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- enum dc_color_space colorspace,
- uint16_t *matrix,
- int opp_id)
-{
- if (pipe_ctx->stream->csc_color_matrix.enable_adjustment == true) {
- if (pipe_ctx->plane_res.dpp->funcs->dpp_set_csc_adjustment != NULL) {
-
- /* MPO is broken with RGB colorspaces when OCSC matrix
- * brightness offset >= 0 on DCN1 due to OCSC before MPC
- * Blending adds offsets from front + rear to rear plane
- *
- * Fix is to set RGB bias to 0 on rear plane, top plane
- * black value pixels add offset instead of rear + front
- */
-
- int16_t rgb_bias = matrix[3];
- // matrix[3/7/11] are all the same offset value
-
- if (rgb_bias > 0 && dcn10_is_rear_mpo_fix_required(pipe_ctx, colorspace)) {
- dcn10_set_csc_adjustment_rgb_mpo_fix(pipe_ctx, matrix);
- } else {
- pipe_ctx->plane_res.dpp->funcs->dpp_set_csc_adjustment(pipe_ctx->plane_res.dpp, matrix);
- }
- }
- } else {
- if (pipe_ctx->plane_res.dpp->funcs->dpp_set_csc_default != NULL)
- pipe_ctx->plane_res.dpp->funcs->dpp_set_csc_default(pipe_ctx->plane_res.dpp, colorspace);
- }
-}
-
-static void dcn10_update_dpp(struct dpp *dpp, struct dc_plane_state *plane_state)
-{
- struct dc_bias_and_scale bns_params = {0};
-
- // program the input csc
- dpp->funcs->dpp_setup(dpp,
- plane_state->format,
- EXPANSION_MODE_ZERO,
- plane_state->input_csc_color_matrix,
- plane_state->color_space,
- NULL);
-
- //set scale and bias registers
- build_prescale_params(&bns_params, plane_state);
- if (dpp->funcs->dpp_program_bias_and_scale)
- dpp->funcs->dpp_program_bias_and_scale(dpp, &bns_params);
-}
-
-void dcn10_update_visual_confirm_color(struct dc *dc, struct pipe_ctx *pipe_ctx, struct tg_color *color, int mpcc_id)
-{
- struct mpc *mpc = dc->res_pool->mpc;
-
- if (dc->debug.visual_confirm == VISUAL_CONFIRM_HDR)
- get_hdr_visual_confirm_color(pipe_ctx, color);
- else if (dc->debug.visual_confirm == VISUAL_CONFIRM_SURFACE)
- get_surface_visual_confirm_color(pipe_ctx, color);
- else if (dc->debug.visual_confirm == VISUAL_CONFIRM_SWIZZLE)
- get_surface_tile_visual_confirm_color(pipe_ctx, color);
- else
- color_space_to_black_color(
- dc, pipe_ctx->stream->output_color_space, color);
-
- if (mpc->funcs->set_bg_color)
- mpc->funcs->set_bg_color(mpc, color, mpcc_id);
-}
-
-void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
-{
- struct hubp *hubp = pipe_ctx->plane_res.hubp;
- struct mpcc_blnd_cfg blnd_cfg = {0};
- bool per_pixel_alpha = pipe_ctx->plane_state->per_pixel_alpha && pipe_ctx->bottom_pipe;
- int mpcc_id;
- struct mpcc *new_mpcc;
- struct mpc *mpc = dc->res_pool->mpc;
- struct mpc_tree *mpc_tree_params = &(pipe_ctx->stream_res.opp->mpc_tree_params);
-
- if (per_pixel_alpha)
- blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
- else
- blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA;
-
- blnd_cfg.overlap_only = false;
- blnd_cfg.global_gain = 0xff;
-
- if (pipe_ctx->plane_state->global_alpha)
- blnd_cfg.global_alpha = pipe_ctx->plane_state->global_alpha_value;
- else
- blnd_cfg.global_alpha = 0xff;
-
- /* DCN1.0 has output CM before MPC which seems to screw with
- * pre-multiplied alpha.
- */
- blnd_cfg.pre_multiplied_alpha = is_rgb_cspace(
- pipe_ctx->stream->output_color_space)
- && per_pixel_alpha;
-
-
- /*
- * TODO: remove hack
- * Note: currently there is a bug in init_hw such that
- * on resume from hibernate, BIOS sets up MPCC0, and
- * we do mpcc_remove but the mpcc cannot go to idle
- * after remove. This cause us to pick mpcc1 here,
- * which causes a pstate hang for yet unknown reason.
- */
- mpcc_id = hubp->inst;
-
- /* If there is no full update, don't need to touch MPC tree*/
- if (!pipe_ctx->plane_state->update_flags.bits.full_update) {
- mpc->funcs->update_blending(mpc, &blnd_cfg, mpcc_id);
- dc->hwss.update_visual_confirm_color(dc, pipe_ctx, &blnd_cfg.black_color, mpcc_id);
- return;
- }
-
- /* check if this MPCC is already being used */
- new_mpcc = mpc->funcs->get_mpcc_for_dpp(mpc_tree_params, mpcc_id);
- /* remove MPCC if being used */
- if (new_mpcc != NULL)
- mpc->funcs->remove_mpcc(mpc, mpc_tree_params, new_mpcc);
- else
- if (dc->debug.sanity_checks)
- mpc->funcs->assert_mpcc_idle_before_connect(
- dc->res_pool->mpc, mpcc_id);
-
- /* Call MPC to insert new plane */
- new_mpcc = mpc->funcs->insert_plane(dc->res_pool->mpc,
- mpc_tree_params,
- &blnd_cfg,
- NULL,
- NULL,
- hubp->inst,
- mpcc_id);
- dc->hwss.update_visual_confirm_color(dc, pipe_ctx, &blnd_cfg.black_color, mpcc_id);
-
- ASSERT(new_mpcc != NULL);
-
- hubp->opp_id = pipe_ctx->stream_res.opp->inst;
- hubp->mpcc_id = mpcc_id;
-}
-
-static void update_scaler(struct pipe_ctx *pipe_ctx)
-{
- bool per_pixel_alpha =
- pipe_ctx->plane_state->per_pixel_alpha && pipe_ctx->bottom_pipe;
-
- pipe_ctx->plane_res.scl_data.lb_params.alpha_en = per_pixel_alpha;
- pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_36BPP;
- /* scaler configuration */
- pipe_ctx->plane_res.dpp->funcs->dpp_set_scaler(
- pipe_ctx->plane_res.dpp, &pipe_ctx->plane_res.scl_data);
-}
-
-static void dcn10_update_dchubp_dpp(
- struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- struct dc_state *context)
-{
- struct dce_hwseq *hws = dc->hwseq;
- struct hubp *hubp = pipe_ctx->plane_res.hubp;
- struct dpp *dpp = pipe_ctx->plane_res.dpp;
- struct dc_plane_state *plane_state = pipe_ctx->plane_state;
- struct plane_size size = plane_state->plane_size;
- unsigned int compat_level = 0;
- bool should_divided_by_2 = false;
-
- /* depends on DML calculation, DPP clock value may change dynamically */
- /* If request max dpp clk is lower than current dispclk, no need to
- * divided by 2
- */
- if (plane_state->update_flags.bits.full_update) {
-
- /* new calculated dispclk, dppclk are stored in
- * context->bw_ctx.bw.dcn.clk.dispclk_khz / dppclk_khz. current
- * dispclk, dppclk are from dc->clk_mgr->clks.dispclk_khz.
- * dcn_validate_bandwidth compute new dispclk, dppclk.
- * dispclk will put in use after optimize_bandwidth when
- * ramp_up_dispclk_with_dpp is called.
- * there are two places for dppclk be put in use. One location
- * is the same as the location as dispclk. Another is within
- * update_dchubp_dpp which happens between pre_bandwidth and
- * optimize_bandwidth.
- * dppclk updated within update_dchubp_dpp will cause new
- * clock values of dispclk and dppclk not be in use at the same
- * time. when clocks are decreased, this may cause dppclk is
- * lower than previous configuration and let pipe stuck.
- * for example, eDP + external dp, change resolution of DP from
- * 1920x1080x144hz to 1280x960x60hz.
- * before change: dispclk = 337889 dppclk = 337889
- * change mode, dcn_validate_bandwidth calculate
- * dispclk = 143122 dppclk = 143122
- * update_dchubp_dpp be executed before dispclk be updated,
- * dispclk = 337889, but dppclk use new value dispclk /2 =
- * 168944. this will cause pipe pstate warning issue.
- * solution: between pre_bandwidth and optimize_bandwidth, while
- * dispclk is going to be decreased, keep dppclk = dispclk
- **/
- if (context->bw_ctx.bw.dcn.clk.dispclk_khz <
- dc->clk_mgr->clks.dispclk_khz)
- should_divided_by_2 = false;
- else
- should_divided_by_2 =
- context->bw_ctx.bw.dcn.clk.dppclk_khz <=
- dc->clk_mgr->clks.dispclk_khz / 2;
-
- dpp->funcs->dpp_dppclk_control(
- dpp,
- should_divided_by_2,
- true);
-
- if (dc->res_pool->dccg)
- dc->res_pool->dccg->funcs->update_dpp_dto(
- dc->res_pool->dccg,
- dpp->inst,
- pipe_ctx->plane_res.bw.dppclk_khz);
- else
- dc->clk_mgr->clks.dppclk_khz = should_divided_by_2 ?
- dc->clk_mgr->clks.dispclk_khz / 2 :
- dc->clk_mgr->clks.dispclk_khz;
- }
-
- /* TODO: Need input parameter to tell current DCHUB pipe tie to which OTG
- * VTG is within DCHUBBUB which is commond block share by each pipe HUBP.
- * VTG is 1:1 mapping with OTG. Each pipe HUBP will select which VTG
- */
- if (plane_state->update_flags.bits.full_update) {
- hubp->funcs->hubp_vtg_sel(hubp, pipe_ctx->stream_res.tg->inst);
-
- hubp->funcs->hubp_setup(
- hubp,
- &pipe_ctx->dlg_regs,
- &pipe_ctx->ttu_regs,
- &pipe_ctx->rq_regs,
- &pipe_ctx->pipe_dlg_param);
- hubp->funcs->hubp_setup_interdependent(
- hubp,
- &pipe_ctx->dlg_regs,
- &pipe_ctx->ttu_regs);
- }
-
- size.surface_size = pipe_ctx->plane_res.scl_data.viewport;
-
- if (plane_state->update_flags.bits.full_update ||
- plane_state->update_flags.bits.bpp_change)
- dcn10_update_dpp(dpp, plane_state);
-
- if (plane_state->update_flags.bits.full_update ||
- plane_state->update_flags.bits.per_pixel_alpha_change ||
- plane_state->update_flags.bits.global_alpha_change)
- hws->funcs.update_mpcc(dc, pipe_ctx);
-
- if (plane_state->update_flags.bits.full_update ||
- plane_state->update_flags.bits.per_pixel_alpha_change ||
- plane_state->update_flags.bits.global_alpha_change ||
- plane_state->update_flags.bits.scaling_change ||
- plane_state->update_flags.bits.position_change) {
- update_scaler(pipe_ctx);
- }
-
- if (plane_state->update_flags.bits.full_update ||
- plane_state->update_flags.bits.scaling_change ||
- plane_state->update_flags.bits.position_change) {
- hubp->funcs->mem_program_viewport(
- hubp,
- &pipe_ctx->plane_res.scl_data.viewport,
- &pipe_ctx->plane_res.scl_data.viewport_c);
- }
-
- if (pipe_ctx->stream->cursor_attributes.address.quad_part != 0) {
- dc->hwss.set_cursor_position(pipe_ctx);
- dc->hwss.set_cursor_attribute(pipe_ctx);
-
- if (dc->hwss.set_cursor_sdr_white_level)
- dc->hwss.set_cursor_sdr_white_level(pipe_ctx);
- }
-
- if (plane_state->update_flags.bits.full_update) {
- /*gamut remap*/
- dc->hwss.program_gamut_remap(pipe_ctx);
-
- dc->hwss.program_output_csc(dc,
- pipe_ctx,
- pipe_ctx->stream->output_color_space,
- pipe_ctx->stream->csc_color_matrix.matrix,
- pipe_ctx->stream_res.opp->inst);
- }
-
- if (plane_state->update_flags.bits.full_update ||
- plane_state->update_flags.bits.pixel_format_change ||
- plane_state->update_flags.bits.horizontal_mirror_change ||
- plane_state->update_flags.bits.rotation_change ||
- plane_state->update_flags.bits.swizzle_change ||
- plane_state->update_flags.bits.dcc_change ||
- plane_state->update_flags.bits.bpp_change ||
- plane_state->update_flags.bits.scaling_change ||
- plane_state->update_flags.bits.plane_size_change) {
- hubp->funcs->hubp_program_surface_config(
- hubp,
- plane_state->format,
- &plane_state->tiling_info,
- &size,
- plane_state->rotation,
- &plane_state->dcc,
- plane_state->horizontal_mirror,
- compat_level);
- }
-
- hubp->power_gated = false;
-
- hws->funcs.update_plane_addr(dc, pipe_ctx);
-
- if (is_pipe_tree_visible(pipe_ctx))
- hubp->funcs->set_blank(hubp, false);
-}
-
-void dcn10_blank_pixel_data(
- struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- bool blank)
-{
- enum dc_color_space color_space;
- struct tg_color black_color = {0};
- struct stream_resource *stream_res = &pipe_ctx->stream_res;
- struct dc_stream_state *stream = pipe_ctx->stream;
-
- /* program otg blank color */
- color_space = stream->output_color_space;
- color_space_to_black_color(dc, color_space, &black_color);
-
- /*
- * The way 420 is packed, 2 channels carry Y component, 1 channel
- * alternate between Cb and Cr, so both channels need the pixel
- * value for Y
- */
- if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420)
- black_color.color_r_cr = black_color.color_g_y;
-
-
- if (stream_res->tg->funcs->set_blank_color)
- stream_res->tg->funcs->set_blank_color(
- stream_res->tg,
- &black_color);
-
- if (!blank) {
- if (stream_res->tg->funcs->set_blank)
- stream_res->tg->funcs->set_blank(stream_res->tg, blank);
- if (stream_res->abm) {
- dc->hwss.set_pipe(pipe_ctx);
- stream_res->abm->funcs->set_abm_level(stream_res->abm, stream->abm_level);
- }
- } else if (blank) {
- dc->hwss.set_abm_immediate_disable(pipe_ctx);
- if (stream_res->tg->funcs->set_blank) {
- stream_res->tg->funcs->wait_for_state(stream_res->tg, CRTC_STATE_VBLANK);
- stream_res->tg->funcs->set_blank(stream_res->tg, blank);
- }
- }
-}
-
-void dcn10_set_hdr_multiplier(struct pipe_ctx *pipe_ctx)
-{
- struct fixed31_32 multiplier = pipe_ctx->plane_state->hdr_mult;
- uint32_t hw_mult = 0x1f000; // 1.0 default multiplier
- struct custom_float_format fmt;
-
- fmt.exponenta_bits = 6;
- fmt.mantissa_bits = 12;
- fmt.sign = true;
-
-
- if (!dc_fixpt_eq(multiplier, dc_fixpt_from_int(0))) // check != 0
- convert_to_custom_float_format(multiplier, &fmt, &hw_mult);
-
- pipe_ctx->plane_res.dpp->funcs->dpp_set_hdr_multiplier(
- pipe_ctx->plane_res.dpp, hw_mult);
-}
-
-void dcn10_program_pipe(
- struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- struct dc_state *context)
-{
- struct dce_hwseq *hws = dc->hwseq;
-
- if (pipe_ctx->top_pipe == NULL) {
- bool blank = !is_pipe_tree_visible(pipe_ctx);
-
- pipe_ctx->stream_res.tg->funcs->program_global_sync(
- pipe_ctx->stream_res.tg,
- pipe_ctx->pipe_dlg_param.vready_offset,
- pipe_ctx->pipe_dlg_param.vstartup_start,
- pipe_ctx->pipe_dlg_param.vupdate_offset,
- pipe_ctx->pipe_dlg_param.vupdate_width);
-
- pipe_ctx->stream_res.tg->funcs->set_vtg_params(
- pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true);
-
- if (hws->funcs.setup_vupdate_interrupt)
- hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx);
-
- hws->funcs.blank_pixel_data(dc, pipe_ctx, blank);
- }
-
- if (pipe_ctx->plane_state->update_flags.bits.full_update)
- dcn10_enable_plane(dc, pipe_ctx, context);
-
- dcn10_update_dchubp_dpp(dc, pipe_ctx, context);
-
- hws->funcs.set_hdr_multiplier(pipe_ctx);
-
- if (pipe_ctx->plane_state->update_flags.bits.full_update ||
- pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change ||
- pipe_ctx->plane_state->update_flags.bits.gamma_change)
- hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state);
-
- /* dcn10_translate_regamma_to_hw_format takes 750us to finish
- * only do gamma programming for full update.
- * TODO: This can be further optimized/cleaned up
- * Always call this for now since it does memcmp inside before
- * doing heavy calculation and programming
- */
- if (pipe_ctx->plane_state->update_flags.bits.full_update)
- hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream);
-}
-
-void dcn10_wait_for_pending_cleared(struct dc *dc,
- struct dc_state *context)
-{
- struct pipe_ctx *pipe_ctx;
- struct timing_generator *tg;
- int i;
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- pipe_ctx = &context->res_ctx.pipe_ctx[i];
- tg = pipe_ctx->stream_res.tg;
-
- /*
- * Only wait for top pipe's tg penindg bit
- * Also skip if pipe is disabled.
- */
- if (pipe_ctx->top_pipe ||
- !pipe_ctx->stream || !pipe_ctx->plane_state ||
- !tg->funcs->is_tg_enabled(tg))
- continue;
-
- /*
- * Wait for VBLANK then VACTIVE to ensure we get VUPDATE.
- * For some reason waiting for OTG_UPDATE_PENDING cleared
- * seems to not trigger the update right away, and if we
- * lock again before VUPDATE then we don't get a separated
- * operation.
- */
- pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VBLANK);
- pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE);
- }
-}
-
-void dcn10_post_unlock_program_front_end(
- struct dc *dc,
- struct dc_state *context)
-{
- int i;
-
- DC_LOGGER_INIT(dc->ctx->logger);
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-
- if (!pipe_ctx->top_pipe &&
- !pipe_ctx->prev_odm_pipe &&
- pipe_ctx->stream) {
- struct timing_generator *tg = pipe_ctx->stream_res.tg;
-
- if (context->stream_status[i].plane_count == 0)
- false_optc_underflow_wa(dc, pipe_ctx->stream, tg);
- }
- }
-
- for (i = 0; i < dc->res_pool->pipe_count; i++)
- if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable)
- dc->hwss.disable_plane(dc, &dc->current_state->res_ctx.pipe_ctx[i]);
-
- for (i = 0; i < dc->res_pool->pipe_count; i++)
- if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable) {
- dc->hwss.optimize_bandwidth(dc, context);
- break;
- }
-
- if (dc->hwseq->wa.DEGVIDCN10_254)
- hubbub1_wm_change_req_wa(dc->res_pool->hubbub);
-}
-
-static void dcn10_stereo_hw_frame_pack_wa(struct dc *dc, struct dc_state *context)
-{
- uint8_t i;
-
- for (i = 0; i < context->stream_count; i++) {
- if (context->streams[i]->timing.timing_3d_format
- == TIMING_3D_FORMAT_HW_FRAME_PACKING) {
- /*
- * Disable stutter
- */
- hubbub1_allow_self_refresh_control(dc->res_pool->hubbub, false);
- break;
- }
- }
-}
-
-void dcn10_prepare_bandwidth(
- struct dc *dc,
- struct dc_state *context)
-{
- struct dce_hwseq *hws = dc->hwseq;
- struct hubbub *hubbub = dc->res_pool->hubbub;
-
- if (dc->debug.sanity_checks)
- hws->funcs.verify_allow_pstate_change_high(dc);
-
- if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
- if (context->stream_count == 0)
- context->bw_ctx.bw.dcn.clk.phyclk_khz = 0;
-
- dc->clk_mgr->funcs->update_clocks(
- dc->clk_mgr,
- context,
- false);
- }
-
- dc->wm_optimized_required = hubbub->funcs->program_watermarks(hubbub,
- &context->bw_ctx.bw.dcn.watermarks,
- dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000,
- true);
- dcn10_stereo_hw_frame_pack_wa(dc, context);
-
- if (dc->debug.pplib_wm_report_mode == WM_REPORT_OVERRIDE)
- dcn_bw_notify_pplib_of_wm_ranges(dc);
-
- if (dc->debug.sanity_checks)
- hws->funcs.verify_allow_pstate_change_high(dc);
-}
-
-void dcn10_optimize_bandwidth(
- struct dc *dc,
- struct dc_state *context)
-{
- struct dce_hwseq *hws = dc->hwseq;
- struct hubbub *hubbub = dc->res_pool->hubbub;
-
- if (dc->debug.sanity_checks)
- hws->funcs.verify_allow_pstate_change_high(dc);
-
- if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
- if (context->stream_count == 0)
- context->bw_ctx.bw.dcn.clk.phyclk_khz = 0;
-
- dc->clk_mgr->funcs->update_clocks(
- dc->clk_mgr,
- context,
- true);
- }
-
- hubbub->funcs->program_watermarks(hubbub,
- &context->bw_ctx.bw.dcn.watermarks,
- dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000,
- true);
-
- dcn10_stereo_hw_frame_pack_wa(dc, context);
-
- if (dc->debug.pplib_wm_report_mode == WM_REPORT_OVERRIDE)
- dcn_bw_notify_pplib_of_wm_ranges(dc);
-
- if (dc->debug.sanity_checks)
- hws->funcs.verify_allow_pstate_change_high(dc);
-}
-
-void dcn10_set_drr(struct pipe_ctx **pipe_ctx,
- int num_pipes, struct dc_crtc_timing_adjust adjust)
-{
- int i = 0;
- struct drr_params params = {0};
- // DRR set trigger event mapped to OTG_TRIG_A (bit 11) for manual control flow
- unsigned int event_triggers = 0x800;
- // Note DRR trigger events are generated regardless of whether num frames met.
- unsigned int num_frames = 2;
-
- params.vertical_total_max = adjust.v_total_max;
- params.vertical_total_min = adjust.v_total_min;
- params.vertical_total_mid = adjust.v_total_mid;
- params.vertical_total_mid_frame_num = adjust.v_total_mid_frame_num;
- /* TODO: If multiple pipes are to be supported, you need
- * some GSL stuff. Static screen triggers may be programmed differently
- * as well.
- */
- for (i = 0; i < num_pipes; i++) {
- pipe_ctx[i]->stream_res.tg->funcs->set_drr(
- pipe_ctx[i]->stream_res.tg, &params);
- if (adjust.v_total_max != 0 && adjust.v_total_min != 0)
- pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control(
- pipe_ctx[i]->stream_res.tg,
- event_triggers, num_frames);
- }
-}
-
-void dcn10_get_position(struct pipe_ctx **pipe_ctx,
- int num_pipes,
- struct crtc_position *position)
-{
- int i = 0;
-
- /* TODO: handle pipes > 1
- */
- for (i = 0; i < num_pipes; i++)
- pipe_ctx[i]->stream_res.tg->funcs->get_position(pipe_ctx[i]->stream_res.tg, position);
-}
-
-void dcn10_set_static_screen_control(struct pipe_ctx **pipe_ctx,
- int num_pipes, const struct dc_static_screen_params *params)
-{
- unsigned int i;
- unsigned int triggers = 0;
-
- if (params->triggers.surface_update)
- triggers |= 0x80;
- if (params->triggers.cursor_update)
- triggers |= 0x2;
- if (params->triggers.force_trigger)
- triggers |= 0x1;
-
- for (i = 0; i < num_pipes; i++)
- pipe_ctx[i]->stream_res.tg->funcs->
- set_static_screen_control(pipe_ctx[i]->stream_res.tg,
- triggers, params->num_frames);
-}
-
-static void dcn10_config_stereo_parameters(
- struct dc_stream_state *stream, struct crtc_stereo_flags *flags)
-{
- enum view_3d_format view_format = stream->view_format;
- enum dc_timing_3d_format timing_3d_format =\
- stream->timing.timing_3d_format;
- bool non_stereo_timing = false;
-
- if (timing_3d_format == TIMING_3D_FORMAT_NONE ||
- timing_3d_format == TIMING_3D_FORMAT_SIDE_BY_SIDE ||
- timing_3d_format == TIMING_3D_FORMAT_TOP_AND_BOTTOM)
- non_stereo_timing = true;
-
- if (non_stereo_timing == false &&
- view_format == VIEW_3D_FORMAT_FRAME_SEQUENTIAL) {
-
- flags->PROGRAM_STEREO = 1;
- flags->PROGRAM_POLARITY = 1;
- if (timing_3d_format == TIMING_3D_FORMAT_INBAND_FA ||
- timing_3d_format == TIMING_3D_FORMAT_DP_HDMI_INBAND_FA ||
- timing_3d_format == TIMING_3D_FORMAT_SIDEBAND_FA) {
- enum display_dongle_type dongle = \
- stream->link->ddc->dongle_type;
- if (dongle == DISPLAY_DONGLE_DP_VGA_CONVERTER ||
- dongle == DISPLAY_DONGLE_DP_DVI_CONVERTER ||
- dongle == DISPLAY_DONGLE_DP_HDMI_CONVERTER)
- flags->DISABLE_STEREO_DP_SYNC = 1;
- }
- flags->RIGHT_EYE_POLARITY =\
- stream->timing.flags.RIGHT_EYE_3D_POLARITY;
- if (timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING)
- flags->FRAME_PACKED = 1;
- }
-
- return;
-}
-
-void dcn10_setup_stereo(struct pipe_ctx *pipe_ctx, struct dc *dc)
-{
- struct crtc_stereo_flags flags = { 0 };
- struct dc_stream_state *stream = pipe_ctx->stream;
-
- dcn10_config_stereo_parameters(stream, &flags);
-
- if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_SIDEBAND_FA) {
- if (!dc_set_generic_gpio_for_stereo(true, dc->ctx->gpio_service))
- dc_set_generic_gpio_for_stereo(false, dc->ctx->gpio_service);
- } else {
- dc_set_generic_gpio_for_stereo(false, dc->ctx->gpio_service);
- }
-
- pipe_ctx->stream_res.opp->funcs->opp_program_stereo(
- pipe_ctx->stream_res.opp,
- flags.PROGRAM_STEREO == 1,
- &stream->timing);
-
- pipe_ctx->stream_res.tg->funcs->program_stereo(
- pipe_ctx->stream_res.tg,
- &stream->timing,
- &flags);
-
- return;
-}
-
-static struct hubp *get_hubp_by_inst(struct resource_pool *res_pool, int mpcc_inst)
-{
- int i;
-
- for (i = 0; i < res_pool->pipe_count; i++) {
- if (res_pool->hubps[i]->inst == mpcc_inst)
- return res_pool->hubps[i];
- }
- ASSERT(false);
- return NULL;
-}
-
-void dcn10_wait_for_mpcc_disconnect(
- struct dc *dc,
- struct resource_pool *res_pool,
- struct pipe_ctx *pipe_ctx)
-{
- struct dce_hwseq *hws = dc->hwseq;
- int mpcc_inst;
-
- if (dc->debug.sanity_checks) {
- hws->funcs.verify_allow_pstate_change_high(dc);
- }
-
- if (!pipe_ctx->stream_res.opp)
- return;
-
- for (mpcc_inst = 0; mpcc_inst < MAX_PIPES; mpcc_inst++) {
- if (pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_inst]) {
- struct hubp *hubp = get_hubp_by_inst(res_pool, mpcc_inst);
-
- res_pool->mpc->funcs->wait_for_idle(res_pool->mpc, mpcc_inst);
- pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_inst] = false;
- hubp->funcs->set_blank(hubp, true);
- }
- }
-
- if (dc->debug.sanity_checks) {
- hws->funcs.verify_allow_pstate_change_high(dc);
- }
-
-}
-
-bool dcn10_dummy_display_power_gating(
- struct dc *dc,
- uint8_t controller_id,
- struct dc_bios *dcb,
- enum pipe_gating_control power_gating)
-{
- return true;
-}
-
-void dcn10_update_pending_status(struct pipe_ctx *pipe_ctx)
-{
- struct dc_plane_state *plane_state = pipe_ctx->plane_state;
- struct timing_generator *tg = pipe_ctx->stream_res.tg;
- bool flip_pending;
- struct dc *dc = plane_state->ctx->dc;
-
- if (plane_state == NULL)
- return;
-
- flip_pending = pipe_ctx->plane_res.hubp->funcs->hubp_is_flip_pending(
- pipe_ctx->plane_res.hubp);
-
- plane_state->status.is_flip_pending = plane_state->status.is_flip_pending || flip_pending;
-
- if (!flip_pending)
- plane_state->status.current_address = plane_state->status.requested_address;
-
- if (plane_state->status.current_address.type == PLN_ADDR_TYPE_GRPH_STEREO &&
- tg->funcs->is_stereo_left_eye) {
- plane_state->status.is_right_eye =
- !tg->funcs->is_stereo_left_eye(pipe_ctx->stream_res.tg);
- }
-
- if (dc->hwseq->wa_state.disallow_self_refresh_during_multi_plane_transition_applied) {
- struct dce_hwseq *hwseq = dc->hwseq;
- struct timing_generator *tg = dc->res_pool->timing_generators[0];
- unsigned int cur_frame = tg->funcs->get_frame_count(tg);
-
- if (cur_frame != hwseq->wa_state.disallow_self_refresh_during_multi_plane_transition_applied_on_frame) {
- struct hubbub *hubbub = dc->res_pool->hubbub;
-
- hubbub->funcs->allow_self_refresh_control(hubbub, !dc->debug.disable_stutter);
- hwseq->wa_state.disallow_self_refresh_during_multi_plane_transition_applied = false;
- }
- }
-}
-
-void dcn10_update_dchub(struct dce_hwseq *hws, struct dchub_init_data *dh_data)
-{
- struct hubbub *hubbub = hws->ctx->dc->res_pool->hubbub;
-
- /* In DCN, this programming sequence is owned by the hubbub */
- hubbub->funcs->update_dchub(hubbub, dh_data);
-}
-
-static bool dcn10_can_pipe_disable_cursor(struct pipe_ctx *pipe_ctx)
-{
- struct pipe_ctx *test_pipe, *split_pipe;
- const struct scaler_data *scl_data = &pipe_ctx->plane_res.scl_data;
- struct rect r1 = scl_data->recout, r2, r2_half;
- int r1_r = r1.x + r1.width, r1_b = r1.y + r1.height, r2_r, r2_b;
- int cur_layer = pipe_ctx->plane_state->layer_index;
-
- /**
- * Disable the cursor if there's another pipe above this with a
- * plane that contains this pipe's viewport to prevent double cursor
- * and incorrect scaling artifacts.
- */
- for (test_pipe = pipe_ctx->top_pipe; test_pipe;
- test_pipe = test_pipe->top_pipe) {
- // Skip invisible layer and pipe-split plane on same layer
- if (!test_pipe->plane_state->visible || test_pipe->plane_state->layer_index == cur_layer)
- continue;
-
- r2 = test_pipe->plane_res.scl_data.recout;
- r2_r = r2.x + r2.width;
- r2_b = r2.y + r2.height;
- split_pipe = test_pipe;
-
- /**
- * There is another half plane on same layer because of
- * pipe-split, merge together per same height.
- */
- for (split_pipe = pipe_ctx->top_pipe; split_pipe;
- split_pipe = split_pipe->top_pipe)
- if (split_pipe->plane_state->layer_index == test_pipe->plane_state->layer_index) {
- r2_half = split_pipe->plane_res.scl_data.recout;
- r2.x = (r2_half.x < r2.x) ? r2_half.x : r2.x;
- r2.width = r2.width + r2_half.width;
- r2_r = r2.x + r2.width;
- break;
- }
-
- if (r1.x >= r2.x && r1.y >= r2.y && r1_r <= r2_r && r1_b <= r2_b)
- return true;
- }
-
- return false;
-}
-
-void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx)
-{
- struct dc_cursor_position pos_cpy = pipe_ctx->stream->cursor_position;
- struct hubp *hubp = pipe_ctx->plane_res.hubp;
- struct dpp *dpp = pipe_ctx->plane_res.dpp;
- struct dc_cursor_mi_param param = {
- .pixel_clk_khz = pipe_ctx->stream->timing.pix_clk_100hz / 10,
- .ref_clk_khz = pipe_ctx->stream->ctx->dc->res_pool->ref_clocks.dchub_ref_clock_inKhz,
- .viewport = pipe_ctx->plane_res.scl_data.viewport,
- .h_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.horz,
- .v_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.vert,
- .rotation = pipe_ctx->plane_state->rotation,
- .mirror = pipe_ctx->plane_state->horizontal_mirror
- };
- bool pipe_split_on = (pipe_ctx->top_pipe != NULL) ||
- (pipe_ctx->bottom_pipe != NULL);
- bool odm_combine_on = (pipe_ctx->next_odm_pipe != NULL) ||
- (pipe_ctx->prev_odm_pipe != NULL);
-
- int x_plane = pipe_ctx->plane_state->dst_rect.x;
- int y_plane = pipe_ctx->plane_state->dst_rect.y;
- int x_pos = pos_cpy.x;
- int y_pos = pos_cpy.y;
-
- /**
- * DC cursor is stream space, HW cursor is plane space and drawn
- * as part of the framebuffer.
- *
- * Cursor position can't be negative, but hotspot can be used to
- * shift cursor out of the plane bounds. Hotspot must be smaller
- * than the cursor size.
- */
-
- /**
- * Translate cursor from stream space to plane space.
- *
- * If the cursor is scaled then we need to scale the position
- * to be in the approximately correct place. We can't do anything
- * about the actual size being incorrect, that's a limitation of
- * the hardware.
- */
- if (param.rotation == ROTATION_ANGLE_90 || param.rotation == ROTATION_ANGLE_270) {
- x_pos = (x_pos - x_plane) * pipe_ctx->plane_state->src_rect.height /
- pipe_ctx->plane_state->dst_rect.width;
- y_pos = (y_pos - y_plane) * pipe_ctx->plane_state->src_rect.width /
- pipe_ctx->plane_state->dst_rect.height;
- } else {
- x_pos = (x_pos - x_plane) * pipe_ctx->plane_state->src_rect.width /
- pipe_ctx->plane_state->dst_rect.width;
- y_pos = (y_pos - y_plane) * pipe_ctx->plane_state->src_rect.height /
- pipe_ctx->plane_state->dst_rect.height;
- }
-
- /**
- * If the cursor's source viewport is clipped then we need to
- * translate the cursor to appear in the correct position on
- * the screen.
- *
- * This translation isn't affected by scaling so it needs to be
- * done *after* we adjust the position for the scale factor.
- *
- * This is only done by opt-in for now since there are still
- * some usecases like tiled display that might enable the
- * cursor on both streams while expecting dc to clip it.
- */
- if (pos_cpy.translate_by_source) {
- x_pos += pipe_ctx->plane_state->src_rect.x;
- y_pos += pipe_ctx->plane_state->src_rect.y;
- }
-
- /**
- * If the position is negative then we need to add to the hotspot
- * to shift the cursor outside the plane.
- */
-
- if (x_pos < 0) {
- pos_cpy.x_hotspot -= x_pos;
- x_pos = 0;
- }
-
- if (y_pos < 0) {
- pos_cpy.y_hotspot -= y_pos;
- y_pos = 0;
- }
-
- pos_cpy.x = (uint32_t)x_pos;
- pos_cpy.y = (uint32_t)y_pos;
-
- if (pipe_ctx->plane_state->address.type
- == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE)
- pos_cpy.enable = false;
-
- if (pos_cpy.enable && dcn10_can_pipe_disable_cursor(pipe_ctx))
- pos_cpy.enable = false;
-
- // Swap axis and mirror horizontally
- if (param.rotation == ROTATION_ANGLE_90) {
- uint32_t temp_x = pos_cpy.x;
-
- pos_cpy.x = pipe_ctx->plane_res.scl_data.viewport.width -
- (pos_cpy.y - pipe_ctx->plane_res.scl_data.viewport.x) + pipe_ctx->plane_res.scl_data.viewport.x;
- pos_cpy.y = temp_x;
- }
- // Swap axis and mirror vertically
- else if (param.rotation == ROTATION_ANGLE_270) {
- uint32_t temp_y = pos_cpy.y;
- int viewport_height =
- pipe_ctx->plane_res.scl_data.viewport.height;
- int viewport_y =
- pipe_ctx->plane_res.scl_data.viewport.y;
-
- /**
- * Display groups that are 1xnY, have pos_cpy.x > 2 * viewport.height
- * For pipe split cases:
- * - apply offset of viewport.y to normalize pos_cpy.x
- * - calculate the pos_cpy.y as before
- * - shift pos_cpy.y back by same offset to get final value
- * - since we iterate through both pipes, use the lower
- * viewport.y for offset
- * For non pipe split cases, use the same calculation for
- * pos_cpy.y as the 180 degree rotation case below,
- * but use pos_cpy.x as our input because we are rotating
- * 270 degrees
- */
- if (pipe_split_on || odm_combine_on) {
- int pos_cpy_x_offset;
- int other_pipe_viewport_y;
-
- if (pipe_split_on) {
- if (pipe_ctx->bottom_pipe) {
- other_pipe_viewport_y =
- pipe_ctx->bottom_pipe->plane_res.scl_data.viewport.y;
- } else {
- other_pipe_viewport_y =
- pipe_ctx->top_pipe->plane_res.scl_data.viewport.y;
- }
- } else {
- if (pipe_ctx->next_odm_pipe) {
- other_pipe_viewport_y =
- pipe_ctx->next_odm_pipe->plane_res.scl_data.viewport.y;
- } else {
- other_pipe_viewport_y =
- pipe_ctx->prev_odm_pipe->plane_res.scl_data.viewport.y;
- }
- }
- pos_cpy_x_offset = (viewport_y > other_pipe_viewport_y) ?
- other_pipe_viewport_y : viewport_y;
- pos_cpy.x -= pos_cpy_x_offset;
- if (pos_cpy.x > viewport_height) {
- pos_cpy.x = pos_cpy.x - viewport_height;
- pos_cpy.y = viewport_height - pos_cpy.x;
- } else {
- pos_cpy.y = 2 * viewport_height - pos_cpy.x;
- }
- pos_cpy.y += pos_cpy_x_offset;
- } else {
- pos_cpy.y = (2 * viewport_y) + viewport_height - pos_cpy.x;
- }
- pos_cpy.x = temp_y;
- }
- // Mirror horizontally and vertically
- else if (param.rotation == ROTATION_ANGLE_180) {
- int viewport_width =
- pipe_ctx->plane_res.scl_data.viewport.width;
- int viewport_x =
- pipe_ctx->plane_res.scl_data.viewport.x;
-
- if (pipe_split_on || odm_combine_on) {
- if (pos_cpy.x >= viewport_width + viewport_x) {
- pos_cpy.x = 2 * viewport_width
- - pos_cpy.x + 2 * viewport_x;
- } else {
- uint32_t temp_x = pos_cpy.x;
-
- pos_cpy.x = 2 * viewport_x - pos_cpy.x;
- if (temp_x >= viewport_x +
- (int)hubp->curs_attr.width || pos_cpy.x
- <= (int)hubp->curs_attr.width +
- pipe_ctx->plane_state->src_rect.x) {
- pos_cpy.x = temp_x + viewport_width;
- }
- }
- } else {
- pos_cpy.x = viewport_width - pos_cpy.x + 2 * viewport_x;
- }
-
- /**
- * Display groups that are 1xnY, have pos_cpy.y > viewport.height
- * Calculation:
- * delta_from_bottom = viewport.y + viewport.height - pos_cpy.y
- * pos_cpy.y_new = viewport.y + delta_from_bottom
- * Simplify it as:
- * pos_cpy.y = viewport.y * 2 + viewport.height - pos_cpy.y
- */
- pos_cpy.y = (2 * pipe_ctx->plane_res.scl_data.viewport.y) +
- pipe_ctx->plane_res.scl_data.viewport.height - pos_cpy.y;
- }
-
- hubp->funcs->set_cursor_position(hubp, &pos_cpy, &param);
- dpp->funcs->set_cursor_position(dpp, &pos_cpy, &param, hubp->curs_attr.width, hubp->curs_attr.height);
-}
-
-void dcn10_set_cursor_attribute(struct pipe_ctx *pipe_ctx)
-{
- struct dc_cursor_attributes *attributes = &pipe_ctx->stream->cursor_attributes;
-
- pipe_ctx->plane_res.hubp->funcs->set_cursor_attributes(
- pipe_ctx->plane_res.hubp, attributes);
- pipe_ctx->plane_res.dpp->funcs->set_cursor_attributes(
- pipe_ctx->plane_res.dpp, attributes);
-}
-
-void dcn10_set_cursor_sdr_white_level(struct pipe_ctx *pipe_ctx)
-{
- uint32_t sdr_white_level = pipe_ctx->stream->cursor_attributes.sdr_white_level;
- struct fixed31_32 multiplier;
- struct dpp_cursor_attributes opt_attr = { 0 };
- uint32_t hw_scale = 0x3c00; // 1.0 default multiplier
- struct custom_float_format fmt;
-
- if (!pipe_ctx->plane_res.dpp->funcs->set_optional_cursor_attributes)
- return;
-
- fmt.exponenta_bits = 5;
- fmt.mantissa_bits = 10;
- fmt.sign = true;
-
- if (sdr_white_level > 80) {
- multiplier = dc_fixpt_from_fraction(sdr_white_level, 80);
- convert_to_custom_float_format(multiplier, &fmt, &hw_scale);
- }
-
- opt_attr.scale = hw_scale;
- opt_attr.bias = 0;
-
- pipe_ctx->plane_res.dpp->funcs->set_optional_cursor_attributes(
- pipe_ctx->plane_res.dpp, &opt_attr);
-}
-
-/*
- * apply_front_porch_workaround TODO FPGA still need?
- *
- * This is a workaround for a bug that has existed since R5xx and has not been
- * fixed keep Front porch at minimum 2 for Interlaced mode or 1 for progressive.
- */
-static void apply_front_porch_workaround(
- struct dc_crtc_timing *timing)
-{
- if (timing->flags.INTERLACE == 1) {
- if (timing->v_front_porch < 2)
- timing->v_front_porch = 2;
- } else {
- if (timing->v_front_porch < 1)
- timing->v_front_porch = 1;
- }
-}
-
-int dcn10_get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx)
-{
- const struct dc_crtc_timing *dc_crtc_timing = &pipe_ctx->stream->timing;
- struct dc_crtc_timing patched_crtc_timing;
- int vesa_sync_start;
- int asic_blank_end;
- int interlace_factor;
- int vertical_line_start;
-
- patched_crtc_timing = *dc_crtc_timing;
- apply_front_porch_workaround(&patched_crtc_timing);
-
- interlace_factor = patched_crtc_timing.flags.INTERLACE ? 2 : 1;
-
- vesa_sync_start = patched_crtc_timing.v_addressable +
- patched_crtc_timing.v_border_bottom +
- patched_crtc_timing.v_front_porch;
-
- asic_blank_end = (patched_crtc_timing.v_total -
- vesa_sync_start -
- patched_crtc_timing.v_border_top)
- * interlace_factor;
-
- vertical_line_start = asic_blank_end -
- pipe_ctx->pipe_dlg_param.vstartup_start + 1;
-
- return vertical_line_start;
-}
-
-void dcn10_calc_vupdate_position(
- struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- uint32_t *start_line,
- uint32_t *end_line)
-{
- const struct dc_crtc_timing *dc_crtc_timing = &pipe_ctx->stream->timing;
- int vline_int_offset_from_vupdate =
- pipe_ctx->stream->periodic_interrupt0.lines_offset;
- int vupdate_offset_from_vsync = dc->hwss.get_vupdate_offset_from_vsync(pipe_ctx);
- int start_position;
-
- if (vline_int_offset_from_vupdate > 0)
- vline_int_offset_from_vupdate--;
- else if (vline_int_offset_from_vupdate < 0)
- vline_int_offset_from_vupdate++;
-
- start_position = vline_int_offset_from_vupdate + vupdate_offset_from_vsync;
-
- if (start_position >= 0)
- *start_line = start_position;
- else
- *start_line = dc_crtc_timing->v_total + start_position - 1;
-
- *end_line = *start_line + 2;
-
- if (*end_line >= dc_crtc_timing->v_total)
- *end_line = 2;
-}
-
-static void dcn10_cal_vline_position(
- struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- enum vline_select vline,
- uint32_t *start_line,
- uint32_t *end_line)
-{
- enum vertical_interrupt_ref_point ref_point = INVALID_POINT;
-
- if (vline == VLINE0)
- ref_point = pipe_ctx->stream->periodic_interrupt0.ref_point;
- else if (vline == VLINE1)
- ref_point = pipe_ctx->stream->periodic_interrupt1.ref_point;
-
- switch (ref_point) {
- case START_V_UPDATE:
- dcn10_calc_vupdate_position(
- dc,
- pipe_ctx,
- start_line,
- end_line);
- break;
- case START_V_SYNC:
- // Suppose to do nothing because vsync is 0;
- break;
- default:
- ASSERT(0);
- break;
- }
-}
-
-void dcn10_setup_periodic_interrupt(
- struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- enum vline_select vline)
-{
- struct timing_generator *tg = pipe_ctx->stream_res.tg;
-
- if (vline == VLINE0) {
- uint32_t start_line = 0;
- uint32_t end_line = 0;
-
- dcn10_cal_vline_position(dc, pipe_ctx, vline, &start_line, &end_line);
-
- tg->funcs->setup_vertical_interrupt0(tg, start_line, end_line);
-
- } else if (vline == VLINE1) {
- pipe_ctx->stream_res.tg->funcs->setup_vertical_interrupt1(
- tg,
- pipe_ctx->stream->periodic_interrupt1.lines_offset);
- }
-}
-
-void dcn10_setup_vupdate_interrupt(struct dc *dc, struct pipe_ctx *pipe_ctx)
-{
- struct timing_generator *tg = pipe_ctx->stream_res.tg;
- int start_line = dc->hwss.get_vupdate_offset_from_vsync(pipe_ctx);
-
- if (start_line < 0) {
- ASSERT(0);
- start_line = 0;
- }
-
- if (tg->funcs->setup_vertical_interrupt2)
- tg->funcs->setup_vertical_interrupt2(tg, start_line);
-}
-
-void dcn10_unblank_stream(struct pipe_ctx *pipe_ctx,
- struct dc_link_settings *link_settings)
-{
- struct encoder_unblank_param params = {0};
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct dc_link *link = stream->link;
- struct dce_hwseq *hws = link->dc->hwseq;
-
- /* only 3 items below are used by unblank */
- params.timing = pipe_ctx->stream->timing;
-
- params.link_settings.link_rate = link_settings->link_rate;
-
- if (dc_is_dp_signal(pipe_ctx->stream->signal)) {
- if (params.timing.pixel_encoding == PIXEL_ENCODING_YCBCR420)
- params.timing.pix_clk_100hz /= 2;
- pipe_ctx->stream_res.stream_enc->funcs->dp_unblank(link, pipe_ctx->stream_res.stream_enc, &params);
- }
-
- if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) {
- hws->funcs.edp_backlight_control(link, true);
- }
-}
-
-void dcn10_send_immediate_sdp_message(struct pipe_ctx *pipe_ctx,
- const uint8_t *custom_sdp_message,
- unsigned int sdp_message_size)
-{
- if (dc_is_dp_signal(pipe_ctx->stream->signal)) {
- pipe_ctx->stream_res.stream_enc->funcs->send_immediate_sdp_message(
- pipe_ctx->stream_res.stream_enc,
- custom_sdp_message,
- sdp_message_size);
- }
-}
-enum dc_status dcn10_set_clock(struct dc *dc,
- enum dc_clock_type clock_type,
- uint32_t clk_khz,
- uint32_t stepping)
-{
- struct dc_state *context = dc->current_state;
- struct dc_clock_config clock_cfg = {0};
- struct dc_clocks *current_clocks = &context->bw_ctx.bw.dcn.clk;
-
- if (!dc->clk_mgr || !dc->clk_mgr->funcs->get_clock)
- return DC_FAIL_UNSUPPORTED_1;
-
- dc->clk_mgr->funcs->get_clock(dc->clk_mgr,
- context, clock_type, &clock_cfg);
-
- if (clk_khz > clock_cfg.max_clock_khz)
- return DC_FAIL_CLK_EXCEED_MAX;
-
- if (clk_khz < clock_cfg.min_clock_khz)
- return DC_FAIL_CLK_BELOW_MIN;
-
- if (clk_khz < clock_cfg.bw_requirequired_clock_khz)
- return DC_FAIL_CLK_BELOW_CFG_REQUIRED;
-
- /*update internal request clock for update clock use*/
- if (clock_type == DC_CLOCK_TYPE_DISPCLK)
- current_clocks->dispclk_khz = clk_khz;
- else if (clock_type == DC_CLOCK_TYPE_DPPCLK)
- current_clocks->dppclk_khz = clk_khz;
- else
- return DC_ERROR_UNEXPECTED;
-
- if (dc->clk_mgr->funcs->update_clocks)
- dc->clk_mgr->funcs->update_clocks(dc->clk_mgr,
- context, true);
- return DC_OK;
-
-}
-
-void dcn10_get_clock(struct dc *dc,
- enum dc_clock_type clock_type,
- struct dc_clock_config *clock_cfg)
-{
- struct dc_state *context = dc->current_state;
-
- if (dc->clk_mgr && dc->clk_mgr->funcs->get_clock)
- dc->clk_mgr->funcs->get_clock(dc->clk_mgr, context, clock_type, clock_cfg);
-
-}
-
-void dcn10_get_dcc_en_bits(struct dc *dc, int *dcc_en_bits)
-{
- struct resource_pool *pool = dc->res_pool;
- int i;
-
- for (i = 0; i < pool->pipe_count; i++) {
- struct hubp *hubp = pool->hubps[i];
- struct dcn_hubp_state *s = &(TO_DCN10_HUBP(hubp)->state);
-
- hubp->funcs->hubp_read_state(hubp);
-
- if (!s->blank_en)
- dcc_en_bits[i] = s->dcc_en ? 1 : 0;
- }
-}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
deleted file mode 100644
index 9ae07c77fdc0..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
-* Copyright 2016-2020 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#ifndef __DC_HWSS_DCN10_H__
-#define __DC_HWSS_DCN10_H__
-
-#include "core_types.h"
-#include "hw_sequencer_private.h"
-
-struct dc;
-
-void dcn10_hw_sequencer_construct(struct dc *dc);
-
-int dcn10_get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx);
-void dcn10_calc_vupdate_position(
- struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- uint32_t *start_line,
- uint32_t *end_line);
-void dcn10_setup_vupdate_interrupt(struct dc *dc, struct pipe_ctx *pipe_ctx);
-enum dc_status dcn10_enable_stream_timing(
- struct pipe_ctx *pipe_ctx,
- struct dc_state *context,
- struct dc *dc);
-void dcn10_optimize_bandwidth(
- struct dc *dc,
- struct dc_state *context);
-void dcn10_prepare_bandwidth(
- struct dc *dc,
- struct dc_state *context);
-void dcn10_pipe_control_lock(
- struct dc *dc,
- struct pipe_ctx *pipe,
- bool lock);
-void dcn10_cursor_lock(struct dc *dc, struct pipe_ctx *pipe, bool lock);
-void dcn10_blank_pixel_data(
- struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- bool blank);
-void dcn10_unblank_stream(struct pipe_ctx *pipe_ctx,
- struct dc_link_settings *link_settings);
-void dcn10_program_output_csc(struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- enum dc_color_space colorspace,
- uint16_t *matrix,
- int opp_id);
-bool dcn10_set_output_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx,
- const struct dc_stream_state *stream);
-bool dcn10_set_input_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx,
- const struct dc_plane_state *plane_state);
-void dcn10_update_plane_addr(const struct dc *dc, struct pipe_ctx *pipe_ctx);
-void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx);
-void dcn10_reset_hw_ctx_wrap(
- struct dc *dc,
- struct dc_state *context);
-void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx);
-void dcn10_lock_all_pipes(
- struct dc *dc,
- struct dc_state *context,
- bool lock);
-void dcn10_post_unlock_program_front_end(
- struct dc *dc,
- struct dc_state *context);
-void dcn10_hubp_pg_control(
- struct dce_hwseq *hws,
- unsigned int hubp_inst,
- bool power_on);
-void dcn10_dpp_pg_control(
- struct dce_hwseq *hws,
- unsigned int dpp_inst,
- bool power_on);
-void dcn10_enable_power_gating_plane(
- struct dce_hwseq *hws,
- bool enable);
-void dcn10_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx);
-void dcn10_disable_vga(
- struct dce_hwseq *hws);
-void dcn10_program_pipe(
- struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- struct dc_state *context);
-void dcn10_program_gamut_remap(struct pipe_ctx *pipe_ctx);
-void dcn10_init_hw(struct dc *dc);
-void dcn10_init_pipes(struct dc *dc, struct dc_state *context);
-void dcn10_power_down_on_boot(struct dc *dc);
-enum dc_status dce110_apply_ctx_to_hw(
- struct dc *dc,
- struct dc_state *context);
-void dcn10_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx);
-void dcn10_update_dchub(struct dce_hwseq *hws, struct dchub_init_data *dh_data);
-void dcn10_update_pending_status(struct pipe_ctx *pipe_ctx);
-void dce110_power_down(struct dc *dc);
-void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context);
-void dcn10_enable_timing_synchronization(
- struct dc *dc,
- int group_index,
- int group_size,
- struct pipe_ctx *grouped_pipes[]);
-void dcn10_enable_vblanks_synchronization(
- struct dc *dc,
- int group_index,
- int group_size,
- struct pipe_ctx *grouped_pipes[]);
-void dcn10_enable_per_frame_crtc_position_reset(
- struct dc *dc,
- int group_size,
- struct pipe_ctx *grouped_pipes[]);
-void dce110_update_info_frame(struct pipe_ctx *pipe_ctx);
-void dcn10_send_immediate_sdp_message(struct pipe_ctx *pipe_ctx,
- const uint8_t *custom_sdp_message,
- unsigned int sdp_message_size);
-void dce110_blank_stream(struct pipe_ctx *pipe_ctx);
-void dce110_enable_audio_stream(struct pipe_ctx *pipe_ctx);
-void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx);
-bool dcn10_dummy_display_power_gating(
- struct dc *dc,
- uint8_t controller_id,
- struct dc_bios *dcb,
- enum pipe_gating_control power_gating);
-void dcn10_set_drr(struct pipe_ctx **pipe_ctx,
- int num_pipes, struct dc_crtc_timing_adjust adjust);
-void dcn10_get_position(struct pipe_ctx **pipe_ctx,
- int num_pipes,
- struct crtc_position *position);
-void dcn10_set_static_screen_control(struct pipe_ctx **pipe_ctx,
- int num_pipes, const struct dc_static_screen_params *params);
-void dcn10_setup_stereo(struct pipe_ctx *pipe_ctx, struct dc *dc);
-void dce110_set_avmute(struct pipe_ctx *pipe_ctx, bool enable);
-void dcn10_log_hw_state(struct dc *dc,
- struct dc_log_buffer_ctx *log_ctx);
-void dcn10_get_hw_state(struct dc *dc,
- char *pBuf,
- unsigned int bufSize,
- unsigned int mask);
-void dcn10_clear_status_bits(struct dc *dc, unsigned int mask);
-void dcn10_wait_for_mpcc_disconnect(
- struct dc *dc,
- struct resource_pool *res_pool,
- struct pipe_ctx *pipe_ctx);
-void dce110_edp_backlight_control(
- struct dc_link *link,
- bool enable);
-void dce110_edp_wait_for_T12(
- struct dc_link *link);
-void dce110_edp_power_control(
- struct dc_link *link,
- bool power_up);
-void dce110_edp_wait_for_hpd_ready(
- struct dc_link *link,
- bool power_up);
-void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx);
-void dcn10_set_cursor_attribute(struct pipe_ctx *pipe_ctx);
-void dcn10_set_cursor_sdr_white_level(struct pipe_ctx *pipe_ctx);
-void dcn10_setup_periodic_interrupt(
- struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- enum vline_select vline);
-enum dc_status dcn10_set_clock(struct dc *dc,
- enum dc_clock_type clock_type,
- uint32_t clk_khz,
- uint32_t stepping);
-void dcn10_get_clock(struct dc *dc,
- enum dc_clock_type clock_type,
- struct dc_clock_config *clock_cfg);
-bool dcn10_did_underflow_occur(struct dc *dc, struct pipe_ctx *pipe_ctx);
-void dcn10_bios_golden_init(struct dc *dc);
-void dcn10_plane_atomic_power_down(struct dc *dc,
- struct dpp *dpp,
- struct hubp *hubp);
-bool dcn10_disconnect_pipes(
- struct dc *dc,
- struct dc_state *context);
-
-void dcn10_wait_for_pending_cleared(struct dc *dc,
- struct dc_state *context);
-void dcn10_set_hdr_multiplier(struct pipe_ctx *pipe_ctx);
-void dcn10_verify_allow_pstate_change_high(struct dc *dc);
-
-void dcn10_get_dcc_en_bits(struct dc *dc, int *dcc_en_bits);
-
-void dcn10_update_visual_confirm_color(
- struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- struct tg_color *color,
- int mpcc_id);
-
-#endif /* __DC_HWSS_DCN10_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
index e8b6065fffad..baf663b661c8 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
@@ -27,12 +27,12 @@
#include "core_types.h"
#include "resource.h"
#include "custom_float.h"
-#include "dcn10_hw_sequencer.h"
-#include "dce110/dce110_hw_sequencer.h"
+#include "dcn10/dcn10_hwseq.h"
+#include "dce110/dce110_hwseq.h"
#include "dce/dce_hwseq.h"
#include "abm.h"
#include "dmcu.h"
-#include "dcn10_optc.h"
+#include "dcn10/dcn10_optc.h"
#include "dcn10/dcn10_dpp.h"
#include "dcn10/dcn10_mpc.h"
#include "timing_generator.h"
@@ -40,12 +40,13 @@
#include "ipp.h"
#include "mpc.h"
#include "reg_helper.h"
-#include "dcn10_hubp.h"
-#include "dcn10_hubbub.h"
+#include "dcn10/dcn10_hubp.h"
+#include "dcn10/dcn10_hubbub.h"
#include "dcn10_cm_common.h"
#include "clk_mgr.h"
-unsigned int snprintf_count(char *pBuf, unsigned int bufSize, char *fmt, ...)
+__printf(3, 4)
+unsigned int snprintf_count(char *pbuf, unsigned int bufsize, const char *fmt, ...)
{
int ret_vsnprintf;
unsigned int chars_printed;
@@ -53,15 +54,15 @@ unsigned int snprintf_count(char *pBuf, unsigned int bufSize, char *fmt, ...)
va_list args;
va_start(args, fmt);
- ret_vsnprintf = vsnprintf(pBuf, bufSize, fmt, args);
+ ret_vsnprintf = vsnprintf(pbuf, bufsize, fmt, args);
va_end(args);
if (ret_vsnprintf > 0) {
- if (ret_vsnprintf < bufSize)
+ if (ret_vsnprintf < bufsize)
chars_printed = ret_vsnprintf;
else
- chars_printed = bufSize - 1;
+ chars_printed = bufsize - 1;
} else
chars_printed = 0;
@@ -83,7 +84,7 @@ static unsigned int dcn10_get_hubbub_state(struct dc *dc, char *pBuf, unsigned i
memset(&wm, 0, sizeof(struct dcn_hubbub_wm));
dc->res_pool->hubbub->funcs->wm_read_state(dc->res_pool->hubbub, &wm);
- chars_printed = snprintf_count(pBuf, remaining_buffer, "wm_set_index,data_urgent,pte_meta_urgent,sr_enter,sr_exit,dram_clk_chanage\n");
+ chars_printed = snprintf_count(pBuf, remaining_buffer, "wm_set_index,data_urgent,pte_meta_urgent,sr_enter,sr_exit,dram_clk_change\n");
remaining_buffer -= chars_printed;
pBuf += chars_printed;
@@ -98,7 +99,7 @@ static unsigned int dcn10_get_hubbub_state(struct dc *dc, char *pBuf, unsigned i
(s->pte_meta_urgent * frac) / ref_clk_mhz / frac, (s->pte_meta_urgent * frac) / ref_clk_mhz % frac,
(s->sr_enter * frac) / ref_clk_mhz / frac, (s->sr_enter * frac) / ref_clk_mhz % frac,
(s->sr_exit * frac) / ref_clk_mhz / frac, (s->sr_exit * frac) / ref_clk_mhz % frac,
- (s->dram_clk_chanage * frac) / ref_clk_mhz / frac, (s->dram_clk_chanage * frac) / ref_clk_mhz % frac);
+ (s->dram_clk_change * frac) / ref_clk_mhz / frac, (s->dram_clk_change * frac) / ref_clk_mhz % frac);
remaining_buffer -= chars_printed;
pBuf += chars_printed;
}
@@ -391,7 +392,7 @@ static unsigned int dcn10_get_mpcc_states(struct dc *dc, char *pBuf, unsigned in
remaining_buffer -= chars_printed;
pBuf += chars_printed;
- for (i = 0; i < pool->pipe_count; i++) {
+ for (i = 0; i < pool->mpcc_count; i++) {
struct mpcc_state s = {0};
pool->mpc->funcs->read_mpcc_state(pool->mpc, i, &s);
@@ -428,7 +429,9 @@ static unsigned int dcn10_get_otg_states(struct dc *dc, char *pBuf, unsigned int
struct dcn_otg_state s = {0};
int pix_clk = 0;
- optc1_read_otg_state(DCN10TG_FROM_TG(tg), &s);
+ if (tg->funcs->read_otg_state)
+ tg->funcs->read_otg_state(tg, &s);
+
pix_clk = dc->current_state->res_ctx.pipe_ctx[i].stream_res.pix_clk_params.requested_pix_clk_100hz / 10;
//only print if OTG master is enabled
@@ -494,7 +497,8 @@ static void dcn10_clear_otpc_underflow(struct dc *dc)
struct timing_generator *tg = pool->timing_generators[i];
struct dcn_otg_state s = {0};
- optc1_read_otg_state(DCN10TG_FROM_TG(tg), &s);
+ if (tg->funcs->read_otg_state)
+ tg->funcs->read_otg_state(tg, &s);
if (s.otg_enabled & 1)
tg->funcs->clear_optc_underflow(tg);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_ipp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_ipp.c
index f05371c1fc36..24b68337d76e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_ipp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_ipp.c
@@ -23,8 +23,6 @@
*
*/
-#include <linux/slab.h>
-
#include "dm_services.h"
#include "dcn10_ipp.h"
#include "reg_helper.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
deleted file mode 100644
index c50c29984d51..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
+++ /dev/null
@@ -1,717 +0,0 @@
-/*
- * Copyright 2012-15 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#ifndef __DC_TIMING_GENERATOR_DCN10_H__
-#define __DC_TIMING_GENERATOR_DCN10_H__
-
-#include "timing_generator.h"
-
-#define DCN10TG_FROM_TG(tg)\
- container_of(tg, struct optc, base)
-
-#define TG_COMMON_REG_LIST_DCN(inst) \
- SRI(OTG_VSTARTUP_PARAM, OTG, inst),\
- SRI(OTG_VUPDATE_PARAM, OTG, inst),\
- SRI(OTG_VREADY_PARAM, OTG, inst),\
- SRI(OTG_BLANK_CONTROL, OTG, inst),\
- SRI(OTG_MASTER_UPDATE_LOCK, OTG, inst),\
- SRI(OTG_GLOBAL_CONTROL0, OTG, inst),\
- SRI(OTG_DOUBLE_BUFFER_CONTROL, OTG, inst),\
- SRI(OTG_H_TOTAL, OTG, inst),\
- SRI(OTG_H_BLANK_START_END, OTG, inst),\
- SRI(OTG_H_SYNC_A, OTG, inst),\
- SRI(OTG_H_SYNC_A_CNTL, OTG, inst),\
- SRI(OTG_H_TIMING_CNTL, OTG, inst),\
- SRI(OTG_V_TOTAL, OTG, inst),\
- SRI(OTG_V_BLANK_START_END, OTG, inst),\
- SRI(OTG_V_SYNC_A, OTG, inst),\
- SRI(OTG_V_SYNC_A_CNTL, OTG, inst),\
- SRI(OTG_INTERLACE_CONTROL, OTG, inst),\
- SRI(OTG_CONTROL, OTG, inst),\
- SRI(OTG_STEREO_CONTROL, OTG, inst),\
- SRI(OTG_3D_STRUCTURE_CONTROL, OTG, inst),\
- SRI(OTG_STEREO_STATUS, OTG, inst),\
- SRI(OTG_V_TOTAL_MAX, OTG, inst),\
- SRI(OTG_V_TOTAL_MID, OTG, inst),\
- SRI(OTG_V_TOTAL_MIN, OTG, inst),\
- SRI(OTG_V_TOTAL_CONTROL, OTG, inst),\
- SRI(OTG_TRIGA_CNTL, OTG, inst),\
- SRI(OTG_FORCE_COUNT_NOW_CNTL, OTG, inst),\
- SRI(OTG_STATIC_SCREEN_CONTROL, OTG, inst),\
- SRI(OTG_STATUS_FRAME_COUNT, OTG, inst),\
- SRI(OTG_STATUS, OTG, inst),\
- SRI(OTG_STATUS_POSITION, OTG, inst),\
- SRI(OTG_NOM_VERT_POSITION, OTG, inst),\
- SRI(OTG_BLACK_COLOR, OTG, inst),\
- SRI(OTG_CLOCK_CONTROL, OTG, inst),\
- SRI(OTG_VERTICAL_INTERRUPT0_CONTROL, OTG, inst),\
- SRI(OTG_VERTICAL_INTERRUPT0_POSITION, OTG, inst),\
- SRI(OTG_VERTICAL_INTERRUPT1_CONTROL, OTG, inst),\
- SRI(OTG_VERTICAL_INTERRUPT1_POSITION, OTG, inst),\
- SRI(OTG_VERTICAL_INTERRUPT2_CONTROL, OTG, inst),\
- SRI(OTG_VERTICAL_INTERRUPT2_POSITION, OTG, inst),\
- SRI(OPTC_INPUT_CLOCK_CONTROL, ODM, inst),\
- SRI(OPTC_DATA_SOURCE_SELECT, ODM, inst),\
- SRI(OPTC_INPUT_GLOBAL_CONTROL, ODM, inst),\
- SRI(CONTROL, VTG, inst),\
- SRI(OTG_VERT_SYNC_CONTROL, OTG, inst),\
- SRI(OTG_MASTER_UPDATE_MODE, OTG, inst),\
- SRI(OTG_GSL_CONTROL, OTG, inst),\
- SRI(OTG_CRC_CNTL, OTG, inst),\
- SRI(OTG_CRC0_DATA_RG, OTG, inst),\
- SRI(OTG_CRC0_DATA_B, OTG, inst),\
- SRI(OTG_CRC0_WINDOWA_X_CONTROL, OTG, inst),\
- SRI(OTG_CRC0_WINDOWA_Y_CONTROL, OTG, inst),\
- SRI(OTG_CRC0_WINDOWB_X_CONTROL, OTG, inst),\
- SRI(OTG_CRC0_WINDOWB_Y_CONTROL, OTG, inst),\
- SR(GSL_SOURCE_SELECT),\
- SRI(OTG_GLOBAL_CONTROL2, OTG, inst),\
- SRI(OTG_TRIGA_MANUAL_TRIG, OTG, inst)
-
-#define TG_COMMON_REG_LIST_DCN1_0(inst) \
- TG_COMMON_REG_LIST_DCN(inst),\
- SRI(OTG_TEST_PATTERN_PARAMETERS, OTG, inst),\
- SRI(OTG_TEST_PATTERN_CONTROL, OTG, inst),\
- SRI(OTG_TEST_PATTERN_COLOR, OTG, inst),\
- SRI(OTG_MANUAL_FLOW_CONTROL, OTG, inst)
-
-
-struct dcn_optc_registers {
- uint32_t OTG_GLOBAL_CONTROL1;
- uint32_t OTG_GLOBAL_CONTROL2;
- uint32_t OTG_VERT_SYNC_CONTROL;
- uint32_t OTG_MASTER_UPDATE_MODE;
- uint32_t OTG_GSL_CONTROL;
- uint32_t OTG_VSTARTUP_PARAM;
- uint32_t OTG_VUPDATE_PARAM;
- uint32_t OTG_VREADY_PARAM;
- uint32_t OTG_BLANK_CONTROL;
- uint32_t OTG_MASTER_UPDATE_LOCK;
- uint32_t OTG_GLOBAL_CONTROL0;
- uint32_t OTG_DOUBLE_BUFFER_CONTROL;
- uint32_t OTG_H_TOTAL;
- uint32_t OTG_H_BLANK_START_END;
- uint32_t OTG_H_SYNC_A;
- uint32_t OTG_H_SYNC_A_CNTL;
- uint32_t OTG_H_TIMING_CNTL;
- uint32_t OTG_V_TOTAL;
- uint32_t OTG_V_BLANK_START_END;
- uint32_t OTG_V_SYNC_A;
- uint32_t OTG_V_SYNC_A_CNTL;
- uint32_t OTG_INTERLACE_CONTROL;
- uint32_t OTG_CONTROL;
- uint32_t OTG_STEREO_CONTROL;
- uint32_t OTG_3D_STRUCTURE_CONTROL;
- uint32_t OTG_STEREO_STATUS;
- uint32_t OTG_V_TOTAL_MAX;
- uint32_t OTG_V_TOTAL_MID;
- uint32_t OTG_V_TOTAL_MIN;
- uint32_t OTG_V_TOTAL_CONTROL;
- uint32_t OTG_TRIGA_CNTL;
- uint32_t OTG_TRIGA_MANUAL_TRIG;
- uint32_t OTG_MANUAL_FLOW_CONTROL;
- uint32_t OTG_FORCE_COUNT_NOW_CNTL;
- uint32_t OTG_STATIC_SCREEN_CONTROL;
- uint32_t OTG_STATUS_FRAME_COUNT;
- uint32_t OTG_STATUS;
- uint32_t OTG_STATUS_POSITION;
- uint32_t OTG_NOM_VERT_POSITION;
- uint32_t OTG_BLACK_COLOR;
- uint32_t OTG_TEST_PATTERN_PARAMETERS;
- uint32_t OTG_TEST_PATTERN_CONTROL;
- uint32_t OTG_TEST_PATTERN_COLOR;
- uint32_t OTG_CLOCK_CONTROL;
- uint32_t OTG_VERTICAL_INTERRUPT0_CONTROL;
- uint32_t OTG_VERTICAL_INTERRUPT0_POSITION;
- uint32_t OTG_VERTICAL_INTERRUPT1_CONTROL;
- uint32_t OTG_VERTICAL_INTERRUPT1_POSITION;
- uint32_t OTG_VERTICAL_INTERRUPT2_CONTROL;
- uint32_t OTG_VERTICAL_INTERRUPT2_POSITION;
- uint32_t OPTC_INPUT_CLOCK_CONTROL;
- uint32_t OPTC_DATA_SOURCE_SELECT;
- uint32_t OPTC_MEMORY_CONFIG;
- uint32_t OPTC_INPUT_GLOBAL_CONTROL;
- uint32_t CONTROL;
- uint32_t OTG_GSL_WINDOW_X;
- uint32_t OTG_GSL_WINDOW_Y;
- uint32_t OTG_VUPDATE_KEEPOUT;
- uint32_t OTG_CRC_CNTL;
- uint32_t OTG_CRC_CNTL2;
- uint32_t OTG_CRC0_DATA_RG;
- uint32_t OTG_CRC0_DATA_B;
- uint32_t OTG_CRC0_WINDOWA_X_CONTROL;
- uint32_t OTG_CRC0_WINDOWA_Y_CONTROL;
- uint32_t OTG_CRC0_WINDOWB_X_CONTROL;
- uint32_t OTG_CRC0_WINDOWB_Y_CONTROL;
- uint32_t GSL_SOURCE_SELECT;
- uint32_t DWB_SOURCE_SELECT;
- uint32_t OTG_DSC_START_POSITION;
- uint32_t OPTC_DATA_FORMAT_CONTROL;
- uint32_t OPTC_BYTES_PER_PIXEL;
- uint32_t OPTC_WIDTH_CONTROL;
- uint32_t OTG_DRR_CONTROL;
- uint32_t OTG_BLANK_DATA_COLOR;
- uint32_t OTG_BLANK_DATA_COLOR_EXT;
- uint32_t OTG_DRR_TRIGGER_WINDOW;
- uint32_t OTG_M_CONST_DTO0;
- uint32_t OTG_M_CONST_DTO1;
- uint32_t OTG_DRR_V_TOTAL_CHANGE;
- uint32_t OTG_GLOBAL_CONTROL4;
-};
-
-#define TG_COMMON_MASK_SH_LIST_DCN(mask_sh)\
- SF(OTG0_OTG_VSTARTUP_PARAM, VSTARTUP_START, mask_sh),\
- SF(OTG0_OTG_VUPDATE_PARAM, VUPDATE_OFFSET, mask_sh),\
- SF(OTG0_OTG_VUPDATE_PARAM, VUPDATE_WIDTH, mask_sh),\
- SF(OTG0_OTG_VREADY_PARAM, VREADY_OFFSET, mask_sh),\
- SF(OTG0_OTG_BLANK_CONTROL, OTG_BLANK_DATA_EN, mask_sh),\
- SF(OTG0_OTG_BLANK_CONTROL, OTG_BLANK_DE_MODE, mask_sh),\
- SF(OTG0_OTG_BLANK_CONTROL, OTG_CURRENT_BLANK_STATE, mask_sh),\
- SF(OTG0_OTG_MASTER_UPDATE_LOCK, OTG_MASTER_UPDATE_LOCK, mask_sh),\
- SF(OTG0_OTG_MASTER_UPDATE_LOCK, UPDATE_LOCK_STATUS, mask_sh),\
- SF(OTG0_OTG_GLOBAL_CONTROL0, OTG_MASTER_UPDATE_LOCK_SEL, mask_sh),\
- SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_UPDATE_PENDING, mask_sh),\
- SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_BLANK_DATA_DOUBLE_BUFFER_EN, mask_sh),\
- SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_RANGE_TIMING_DBUF_UPDATE_MODE, mask_sh),\
- SF(OTG0_OTG_VUPDATE_KEEPOUT, OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, mask_sh), \
- SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET, mask_sh), \
- SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET, mask_sh), \
- SF(OTG0_OTG_H_TOTAL, OTG_H_TOTAL, mask_sh),\
- SF(OTG0_OTG_H_BLANK_START_END, OTG_H_BLANK_START, mask_sh),\
- SF(OTG0_OTG_H_BLANK_START_END, OTG_H_BLANK_END, mask_sh),\
- SF(OTG0_OTG_H_SYNC_A, OTG_H_SYNC_A_START, mask_sh),\
- SF(OTG0_OTG_H_SYNC_A, OTG_H_SYNC_A_END, mask_sh),\
- SF(OTG0_OTG_H_SYNC_A_CNTL, OTG_H_SYNC_A_POL, mask_sh),\
- SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_BY2, mask_sh),\
- SF(OTG0_OTG_V_TOTAL, OTG_V_TOTAL, mask_sh),\
- SF(OTG0_OTG_V_BLANK_START_END, OTG_V_BLANK_START, mask_sh),\
- SF(OTG0_OTG_V_BLANK_START_END, OTG_V_BLANK_END, mask_sh),\
- SF(OTG0_OTG_V_SYNC_A, OTG_V_SYNC_A_START, mask_sh),\
- SF(OTG0_OTG_V_SYNC_A, OTG_V_SYNC_A_END, mask_sh),\
- SF(OTG0_OTG_V_SYNC_A_CNTL, OTG_V_SYNC_A_POL, mask_sh),\
- SF(OTG0_OTG_INTERLACE_CONTROL, OTG_INTERLACE_ENABLE, mask_sh),\
- SF(OTG0_OTG_CONTROL, OTG_MASTER_EN, mask_sh),\
- SF(OTG0_OTG_CONTROL, OTG_START_POINT_CNTL, mask_sh),\
- SF(OTG0_OTG_CONTROL, OTG_DISABLE_POINT_CNTL, mask_sh),\
- SF(OTG0_OTG_CONTROL, OTG_FIELD_NUMBER_CNTL, mask_sh),\
- SF(OTG0_OTG_CONTROL, OTG_CURRENT_MASTER_EN_STATE, mask_sh),\
- SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EN, mask_sh),\
- SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_LINE_NUM, mask_sh),\
- SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_POLARITY, mask_sh),\
- SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EYE_FLAG_POLARITY, mask_sh),\
- SF(OTG0_OTG_STEREO_CONTROL, OTG_DISABLE_STEREOSYNC_OUTPUT_FOR_DP, mask_sh),\
- SF(OTG0_OTG_STEREO_CONTROL, OTG_DISABLE_STEREOSYNC_OUTPUT_FOR_DP, mask_sh),\
- SF(OTG0_OTG_STEREO_STATUS, OTG_STEREO_CURRENT_EYE, mask_sh),\
- SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_EN, mask_sh),\
- SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_V_UPDATE_MODE, mask_sh),\
- SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_STEREO_SEL_OVR, mask_sh),\
- SF(OTG0_OTG_V_TOTAL_MAX, OTG_V_TOTAL_MAX, mask_sh),\
- SF(OTG0_OTG_V_TOTAL_MID, OTG_V_TOTAL_MID, mask_sh),\
- SF(OTG0_OTG_V_TOTAL_MIN, OTG_V_TOTAL_MIN, mask_sh),\
- SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_V_TOTAL_MIN_SEL, mask_sh),\
- SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_V_TOTAL_MAX_SEL, mask_sh),\
- SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_FORCE_LOCK_ON_EVENT, mask_sh),\
- SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_SET_V_TOTAL_MIN_MASK_EN, mask_sh),\
- SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_SET_V_TOTAL_MIN_MASK, mask_sh),\
- SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_VTOTAL_MID_REPLACING_MAX_EN, mask_sh),\
- SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_VTOTAL_MID_FRAME_NUM, mask_sh),\
- SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_CLEAR, mask_sh),\
- SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_MODE, mask_sh),\
- SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_OCCURRED, mask_sh),\
- SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_SOURCE_SELECT, mask_sh),\
- SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_SOURCE_PIPE_SELECT, mask_sh),\
- SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_RISING_EDGE_DETECT_CNTL, mask_sh),\
- SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, mask_sh),\
- SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_POLARITY_SELECT, mask_sh),\
- SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FREQUENCY_SELECT, mask_sh),\
- SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_DELAY, mask_sh),\
- SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_CLEAR, mask_sh),\
- SF(OTG0_OTG_TRIGA_MANUAL_TRIG, OTG_TRIGA_MANUAL_TRIG, mask_sh),\
- SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_EVENT_MASK, mask_sh),\
- SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_FRAME_COUNT, mask_sh),\
- SF(OTG0_OTG_STATUS_FRAME_COUNT, OTG_FRAME_COUNT, mask_sh),\
- SF(OTG0_OTG_STATUS, OTG_V_BLANK, mask_sh),\
- SF(OTG0_OTG_STATUS, OTG_V_ACTIVE_DISP, mask_sh),\
- SF(OTG0_OTG_STATUS_POSITION, OTG_HORZ_COUNT, mask_sh),\
- SF(OTG0_OTG_STATUS_POSITION, OTG_VERT_COUNT, mask_sh),\
- SF(OTG0_OTG_NOM_VERT_POSITION, OTG_VERT_COUNT_NOM, mask_sh),\
- SF(OTG0_OTG_BLACK_COLOR, OTG_BLACK_COLOR_B_CB, mask_sh),\
- SF(OTG0_OTG_BLACK_COLOR, OTG_BLACK_COLOR_G_Y, mask_sh),\
- SF(OTG0_OTG_BLACK_COLOR, OTG_BLACK_COLOR_R_CR, mask_sh),\
- SF(OTG0_OTG_CLOCK_CONTROL, OTG_BUSY, mask_sh),\
- SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_EN, mask_sh),\
- SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_ON, mask_sh),\
- SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_GATE_DIS, mask_sh),\
- SF(OTG0_OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_INT_ENABLE, mask_sh),\
- SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_START, mask_sh),\
- SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_END, mask_sh),\
- SF(OTG0_OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_INT_ENABLE, mask_sh),\
- SF(OTG0_OTG_VERTICAL_INTERRUPT1_POSITION, OTG_VERTICAL_INTERRUPT1_LINE_START, mask_sh),\
- SF(OTG0_OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_INT_ENABLE, mask_sh),\
- SF(OTG0_OTG_VERTICAL_INTERRUPT2_POSITION, OTG_VERTICAL_INTERRUPT2_LINE_START, mask_sh),\
- SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_EN, mask_sh),\
- SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_ON, mask_sh),\
- SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_GATE_DIS, mask_sh),\
- SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_OCCURRED_STATUS, mask_sh),\
- SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_CLEAR, mask_sh),\
- SF(VTG0_CONTROL, VTG0_ENABLE, mask_sh),\
- SF(VTG0_CONTROL, VTG0_FP2, mask_sh),\
- SF(VTG0_CONTROL, VTG0_VCOUNT_INIT, mask_sh),\
- SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_FORCE_VSYNC_NEXT_LINE_OCCURRED, mask_sh),\
- SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_FORCE_VSYNC_NEXT_LINE_CLEAR, mask_sh),\
- SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_AUTO_FORCE_VSYNC_MODE, mask_sh),\
- SF(OTG0_OTG_MASTER_UPDATE_MODE, MASTER_UPDATE_INTERLACED_MODE, mask_sh),\
- SF(OTG0_OTG_GSL_CONTROL, OTG_GSL0_EN, mask_sh),\
- SF(OTG0_OTG_GSL_CONTROL, OTG_GSL1_EN, mask_sh),\
- SF(OTG0_OTG_GSL_CONTROL, OTG_GSL2_EN, mask_sh),\
- SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_EN, mask_sh),\
- SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_FORCE_DELAY, mask_sh),\
- SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_CHECK_ALL_FIELDS, mask_sh),\
- SF(OTG0_OTG_CRC_CNTL, OTG_CRC_CONT_EN, mask_sh),\
- SF(OTG0_OTG_CRC_CNTL, OTG_CRC0_SELECT, mask_sh),\
- SF(OTG0_OTG_CRC_CNTL, OTG_CRC_EN, mask_sh),\
- SF(OTG0_OTG_CRC0_DATA_RG, CRC0_R_CR, mask_sh),\
- SF(OTG0_OTG_CRC0_DATA_RG, CRC0_G_Y, mask_sh),\
- SF(OTG0_OTG_CRC0_DATA_B, CRC0_B_CB, mask_sh),\
- SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL, OTG_CRC0_WINDOWA_X_START, mask_sh),\
- SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL, OTG_CRC0_WINDOWA_X_END, mask_sh),\
- SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL, OTG_CRC0_WINDOWA_Y_START, mask_sh),\
- SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL, OTG_CRC0_WINDOWA_Y_END, mask_sh),\
- SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_START, mask_sh),\
- SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_END, mask_sh),\
- SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_START, mask_sh),\
- SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_END, mask_sh),\
- SF(GSL_SOURCE_SELECT, GSL0_READY_SOURCE_SEL, mask_sh),\
- SF(GSL_SOURCE_SELECT, GSL1_READY_SOURCE_SEL, mask_sh),\
- SF(GSL_SOURCE_SELECT, GSL2_READY_SOURCE_SEL, mask_sh),\
- SF(OTG0_OTG_GLOBAL_CONTROL2, MANUAL_FLOW_CONTROL_SEL, mask_sh)
-
-
-
-#define TG_COMMON_MASK_SH_LIST_DCN1_0(mask_sh)\
- TG_COMMON_MASK_SH_LIST_DCN(mask_sh),\
- SF(OTG0_OTG_TEST_PATTERN_PARAMETERS, OTG_TEST_PATTERN_INC0, mask_sh),\
- SF(OTG0_OTG_TEST_PATTERN_PARAMETERS, OTG_TEST_PATTERN_INC1, mask_sh),\
- SF(OTG0_OTG_TEST_PATTERN_PARAMETERS, OTG_TEST_PATTERN_VRES, mask_sh),\
- SF(OTG0_OTG_TEST_PATTERN_PARAMETERS, OTG_TEST_PATTERN_HRES, mask_sh),\
- SF(OTG0_OTG_TEST_PATTERN_PARAMETERS, OTG_TEST_PATTERN_RAMP0_OFFSET, mask_sh),\
- SF(OTG0_OTG_TEST_PATTERN_CONTROL, OTG_TEST_PATTERN_EN, mask_sh),\
- SF(OTG0_OTG_TEST_PATTERN_CONTROL, OTG_TEST_PATTERN_MODE, mask_sh),\
- SF(OTG0_OTG_TEST_PATTERN_CONTROL, OTG_TEST_PATTERN_DYNAMIC_RANGE, mask_sh),\
- SF(OTG0_OTG_TEST_PATTERN_CONTROL, OTG_TEST_PATTERN_COLOR_FORMAT, mask_sh),\
- SF(OTG0_OTG_TEST_PATTERN_COLOR, OTG_TEST_PATTERN_MASK, mask_sh),\
- SF(OTG0_OTG_TEST_PATTERN_COLOR, OTG_TEST_PATTERN_DATA, mask_sh),\
- SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SRC_SEL, mask_sh),\
- SF(OTG0_OTG_MANUAL_FLOW_CONTROL, MANUAL_FLOW_CONTROL, mask_sh),\
-
-#define TG_REG_FIELD_LIST_DCN1_0(type) \
- type VSTARTUP_START;\
- type VUPDATE_OFFSET;\
- type VUPDATE_WIDTH;\
- type VREADY_OFFSET;\
- type OTG_BLANK_DATA_EN;\
- type OTG_BLANK_DE_MODE;\
- type OTG_CURRENT_BLANK_STATE;\
- type OTG_MASTER_UPDATE_LOCK;\
- type UPDATE_LOCK_STATUS;\
- type OTG_UPDATE_PENDING;\
- type OTG_MASTER_UPDATE_LOCK_SEL;\
- type OTG_BLANK_DATA_DOUBLE_BUFFER_EN;\
- type OTG_H_TOTAL;\
- type OTG_H_BLANK_START;\
- type OTG_H_BLANK_END;\
- type OTG_H_SYNC_A_START;\
- type OTG_H_SYNC_A_END;\
- type OTG_H_SYNC_A_POL;\
- type OTG_H_TIMING_DIV_BY2;\
- type OTG_V_TOTAL;\
- type OTG_V_BLANK_START;\
- type OTG_V_BLANK_END;\
- type OTG_V_SYNC_A_START;\
- type OTG_V_SYNC_A_END;\
- type OTG_V_SYNC_A_POL;\
- type OTG_INTERLACE_ENABLE;\
- type OTG_MASTER_EN;\
- type OTG_START_POINT_CNTL;\
- type OTG_DISABLE_POINT_CNTL;\
- type OTG_FIELD_NUMBER_CNTL;\
- type OTG_CURRENT_MASTER_EN_STATE;\
- type OTG_STEREO_EN;\
- type OTG_STEREO_SYNC_OUTPUT_LINE_NUM;\
- type OTG_STEREO_SYNC_OUTPUT_POLARITY;\
- type OTG_STEREO_EYE_FLAG_POLARITY;\
- type OTG_STEREO_CURRENT_EYE;\
- type OTG_DISABLE_STEREOSYNC_OUTPUT_FOR_DP;\
- type OTG_3D_STRUCTURE_EN;\
- type OTG_3D_STRUCTURE_V_UPDATE_MODE;\
- type OTG_3D_STRUCTURE_STEREO_SEL_OVR;\
- type OTG_V_TOTAL_MAX;\
- type OTG_V_TOTAL_MID;\
- type OTG_V_TOTAL_MIN;\
- type OTG_V_TOTAL_MIN_SEL;\
- type OTG_V_TOTAL_MAX_SEL;\
- type OTG_VTOTAL_MID_REPLACING_MAX_EN;\
- type OTG_VTOTAL_MID_FRAME_NUM;\
- type OTG_FORCE_LOCK_ON_EVENT;\
- type OTG_SET_V_TOTAL_MIN_MASK_EN;\
- type OTG_SET_V_TOTAL_MIN_MASK;\
- type OTG_FORCE_COUNT_NOW_CLEAR;\
- type OTG_FORCE_COUNT_NOW_MODE;\
- type OTG_FORCE_COUNT_NOW_OCCURRED;\
- type OTG_TRIGA_SOURCE_SELECT;\
- type OTG_TRIGA_SOURCE_PIPE_SELECT;\
- type OTG_TRIGA_RISING_EDGE_DETECT_CNTL;\
- type OTG_TRIGA_FALLING_EDGE_DETECT_CNTL;\
- type OTG_TRIGA_POLARITY_SELECT;\
- type OTG_TRIGA_FREQUENCY_SELECT;\
- type OTG_TRIGA_DELAY;\
- type OTG_TRIGA_CLEAR;\
- type OTG_TRIGA_MANUAL_TRIG;\
- type OTG_STATIC_SCREEN_EVENT_MASK;\
- type OTG_STATIC_SCREEN_FRAME_COUNT;\
- type OTG_FRAME_COUNT;\
- type OTG_V_BLANK;\
- type OTG_V_ACTIVE_DISP;\
- type OTG_HORZ_COUNT;\
- type OTG_VERT_COUNT;\
- type OTG_VERT_COUNT_NOM;\
- type OTG_BLACK_COLOR_B_CB;\
- type OTG_BLACK_COLOR_G_Y;\
- type OTG_BLACK_COLOR_R_CR;\
- type OTG_BLANK_DATA_COLOR_BLUE_CB;\
- type OTG_BLANK_DATA_COLOR_GREEN_Y;\
- type OTG_BLANK_DATA_COLOR_RED_CR;\
- type OTG_BLANK_DATA_COLOR_BLUE_CB_EXT;\
- type OTG_BLANK_DATA_COLOR_GREEN_Y_EXT;\
- type OTG_BLANK_DATA_COLOR_RED_CR_EXT;\
- type OTG_VTOTAL_MID_REPLACING_MIN_EN;\
- type OTG_TEST_PATTERN_INC0;\
- type OTG_TEST_PATTERN_INC1;\
- type OTG_TEST_PATTERN_VRES;\
- type OTG_TEST_PATTERN_HRES;\
- type OTG_TEST_PATTERN_RAMP0_OFFSET;\
- type OTG_TEST_PATTERN_EN;\
- type OTG_TEST_PATTERN_MODE;\
- type OTG_TEST_PATTERN_DYNAMIC_RANGE;\
- type OTG_TEST_PATTERN_COLOR_FORMAT;\
- type OTG_TEST_PATTERN_MASK;\
- type OTG_TEST_PATTERN_DATA;\
- type OTG_BUSY;\
- type OTG_CLOCK_EN;\
- type OTG_CLOCK_ON;\
- type OTG_CLOCK_GATE_DIS;\
- type OTG_VERTICAL_INTERRUPT0_INT_ENABLE;\
- type OTG_VERTICAL_INTERRUPT0_LINE_START;\
- type OTG_VERTICAL_INTERRUPT0_LINE_END;\
- type OTG_VERTICAL_INTERRUPT1_INT_ENABLE;\
- type OTG_VERTICAL_INTERRUPT1_LINE_START;\
- type OTG_VERTICAL_INTERRUPT2_INT_ENABLE;\
- type OTG_VERTICAL_INTERRUPT2_LINE_START;\
- type OPTC_INPUT_CLK_EN;\
- type OPTC_INPUT_CLK_ON;\
- type OPTC_INPUT_CLK_GATE_DIS;\
- type OPTC_UNDERFLOW_OCCURRED_STATUS;\
- type OPTC_UNDERFLOW_CLEAR;\
- type OPTC_SRC_SEL;\
- type VTG0_ENABLE;\
- type VTG0_FP2;\
- type VTG0_VCOUNT_INIT;\
- type OTG_FORCE_VSYNC_NEXT_LINE_OCCURRED;\
- type OTG_FORCE_VSYNC_NEXT_LINE_CLEAR;\
- type OTG_AUTO_FORCE_VSYNC_MODE;\
- type MASTER_UPDATE_INTERLACED_MODE;\
- type OTG_GSL0_EN;\
- type OTG_GSL1_EN;\
- type OTG_GSL2_EN;\
- type OTG_GSL_MASTER_EN;\
- type OTG_GSL_FORCE_DELAY;\
- type OTG_GSL_CHECK_ALL_FIELDS;\
- type OTG_GSL_WINDOW_START_X;\
- type OTG_GSL_WINDOW_END_X;\
- type OTG_GSL_WINDOW_START_Y;\
- type OTG_GSL_WINDOW_END_Y;\
- type OTG_RANGE_TIMING_DBUF_UPDATE_MODE;\
- type OTG_GSL_MASTER_MODE;\
- type OTG_MASTER_UPDATE_LOCK_GSL_EN;\
- type MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET;\
- type MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET;\
- type OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN;\
- type OTG_CRC_CONT_EN;\
- type OTG_CRC0_SELECT;\
- type OTG_CRC_EN;\
- type CRC0_R_CR;\
- type CRC0_G_Y;\
- type CRC0_B_CB;\
- type OTG_CRC0_WINDOWA_X_START;\
- type OTG_CRC0_WINDOWA_X_END;\
- type OTG_CRC0_WINDOWA_Y_START;\
- type OTG_CRC0_WINDOWA_Y_END;\
- type OTG_CRC0_WINDOWB_X_START;\
- type OTG_CRC0_WINDOWB_X_END;\
- type OTG_CRC0_WINDOWB_Y_START;\
- type OTG_CRC0_WINDOWB_Y_END;\
- type GSL0_READY_SOURCE_SEL;\
- type GSL1_READY_SOURCE_SEL;\
- type GSL2_READY_SOURCE_SEL;\
- type MANUAL_FLOW_CONTROL;\
- type MANUAL_FLOW_CONTROL_SEL;
-
-#define TG_REG_FIELD_LIST(type) \
- TG_REG_FIELD_LIST_DCN1_0(type)\
- type OTG_V_SYNC_MODE;\
- type OTG_DRR_TRIGGER_WINDOW_START_X;\
- type OTG_DRR_TRIGGER_WINDOW_END_X;\
- type OTG_DRR_V_TOTAL_CHANGE_LIMIT;\
- type OTG_OUT_MUX;\
- type OTG_M_CONST_DTO_PHASE;\
- type OTG_M_CONST_DTO_MODULO;\
- type MASTER_UPDATE_LOCK_DB_X;\
- type MASTER_UPDATE_LOCK_DB_Y;\
- type MASTER_UPDATE_LOCK_DB_EN;\
- type GLOBAL_UPDATE_LOCK_EN;\
- type DIG_UPDATE_LOCATION;\
- type OTG_DSC_START_POSITION_X;\
- type OTG_DSC_START_POSITION_LINE_NUM;\
- type OPTC_NUM_OF_INPUT_SEGMENT;\
- type OPTC_SEG0_SRC_SEL;\
- type OPTC_SEG1_SRC_SEL;\
- type OPTC_SEG2_SRC_SEL;\
- type OPTC_SEG3_SRC_SEL;\
- type OPTC_MEM_SEL;\
- type OPTC_DATA_FORMAT;\
- type OPTC_DSC_MODE;\
- type OPTC_DSC_BYTES_PER_PIXEL;\
- type OPTC_DSC_SLICE_WIDTH;\
- type OPTC_SEGMENT_WIDTH;\
- type OPTC_DWB0_SOURCE_SELECT;\
- type OPTC_DWB1_SOURCE_SELECT;\
- type MASTER_UPDATE_LOCK_DB_START_X;\
- type MASTER_UPDATE_LOCK_DB_END_X;\
- type MASTER_UPDATE_LOCK_DB_START_Y;\
- type MASTER_UPDATE_LOCK_DB_END_Y;\
- type DIG_UPDATE_POSITION_X;\
- type DIG_UPDATE_POSITION_Y;\
- type OTG_H_TIMING_DIV_MODE;\
- type OTG_DRR_TIMING_DBUF_UPDATE_MODE;\
- type OTG_CRC_DSC_MODE;\
- type OTG_CRC_DATA_STREAM_COMBINE_MODE;\
- type OTG_CRC_DATA_STREAM_SPLIT_MODE;\
- type OTG_CRC_DATA_FORMAT;\
- type OTG_V_TOTAL_LAST_USED_BY_DRR;
-
-
-struct dcn_optc_shift {
- TG_REG_FIELD_LIST(uint8_t)
-};
-
-struct dcn_optc_mask {
- TG_REG_FIELD_LIST(uint32_t)
-};
-
-struct optc {
- struct timing_generator base;
-
- const struct dcn_optc_registers *tg_regs;
- const struct dcn_optc_shift *tg_shift;
- const struct dcn_optc_mask *tg_mask;
-
- int opp_count;
-
- uint32_t max_h_total;
- uint32_t max_v_total;
-
- uint32_t min_h_blank;
-
- uint32_t min_h_sync_width;
- uint32_t min_v_sync_width;
- uint32_t min_v_blank;
- uint32_t min_v_blank_interlace;
-
- int vstartup_start;
- int vupdate_offset;
- int vupdate_width;
- int vready_offset;
- enum signal_type signal;
-};
-
-void dcn10_timing_generator_init(struct optc *optc);
-
-struct dcn_otg_state {
- uint32_t v_blank_start;
- uint32_t v_blank_end;
- uint32_t v_sync_a_pol;
- uint32_t v_total;
- uint32_t v_total_max;
- uint32_t v_total_min;
- uint32_t v_total_min_sel;
- uint32_t v_total_max_sel;
- uint32_t v_sync_a_start;
- uint32_t v_sync_a_end;
- uint32_t h_blank_start;
- uint32_t h_blank_end;
- uint32_t h_sync_a_start;
- uint32_t h_sync_a_end;
- uint32_t h_sync_a_pol;
- uint32_t h_total;
- uint32_t underflow_occurred_status;
- uint32_t otg_enabled;
- uint32_t blank_enabled;
- uint32_t vertical_interrupt2_en;
- uint32_t vertical_interrupt2_line;
-};
-
-void optc1_read_otg_state(struct optc *optc1,
- struct dcn_otg_state *s);
-
-bool optc1_get_hw_timing(struct timing_generator *tg,
- struct dc_crtc_timing *hw_crtc_timing);
-
-bool optc1_validate_timing(
- struct timing_generator *optc,
- const struct dc_crtc_timing *timing);
-
-void optc1_program_timing(
- struct timing_generator *optc,
- const struct dc_crtc_timing *dc_crtc_timing,
- int vready_offset,
- int vstartup_start,
- int vupdate_offset,
- int vupdate_width,
- const enum signal_type signal,
- bool use_vbios);
-
-void optc1_setup_vertical_interrupt0(
- struct timing_generator *optc,
- uint32_t start_line,
- uint32_t end_line);
-void optc1_setup_vertical_interrupt1(
- struct timing_generator *optc,
- uint32_t start_line);
-void optc1_setup_vertical_interrupt2(
- struct timing_generator *optc,
- uint32_t start_line);
-
-void optc1_program_global_sync(
- struct timing_generator *optc,
- int vready_offset,
- int vstartup_start,
- int vupdate_offset,
- int vupdate_width);
-
-bool optc1_disable_crtc(struct timing_generator *optc);
-
-bool optc1_is_counter_moving(struct timing_generator *optc);
-
-void optc1_get_position(struct timing_generator *optc,
- struct crtc_position *position);
-
-uint32_t optc1_get_vblank_counter(struct timing_generator *optc);
-
-void optc1_get_crtc_scanoutpos(
- struct timing_generator *optc,
- uint32_t *v_blank_start,
- uint32_t *v_blank_end,
- uint32_t *h_position,
- uint32_t *v_position);
-
-void optc1_set_early_control(
- struct timing_generator *optc,
- uint32_t early_cntl);
-
-void optc1_wait_for_state(struct timing_generator *optc,
- enum crtc_state state);
-
-void optc1_set_blank(struct timing_generator *optc,
- bool enable_blanking);
-
-bool optc1_is_blanked(struct timing_generator *optc);
-bool optc1_is_locked(struct timing_generator *optc);
-
-void optc1_program_blank_color(
- struct timing_generator *optc,
- const struct tg_color *black_color);
-
-bool optc1_did_triggered_reset_occur(
- struct timing_generator *optc);
-
-void optc1_enable_reset_trigger(struct timing_generator *optc, int source_tg_inst);
-
-void optc1_disable_reset_trigger(struct timing_generator *optc);
-
-void optc1_lock(struct timing_generator *optc);
-
-void optc1_unlock(struct timing_generator *optc);
-
-void optc1_enable_optc_clock(struct timing_generator *optc, bool enable);
-
-void optc1_set_drr(
- struct timing_generator *optc,
- const struct drr_params *params);
-
-void optc1_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max);
-
-void optc1_set_static_screen_control(
- struct timing_generator *optc,
- uint32_t event_triggers,
- uint32_t num_frames);
-
-void optc1_program_stereo(struct timing_generator *optc,
- const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags);
-
-bool optc1_is_stereo_left_eye(struct timing_generator *optc);
-
-void optc1_clear_optc_underflow(struct timing_generator *optc);
-
-void optc1_tg_init(struct timing_generator *optc);
-
-bool optc1_is_tg_enabled(struct timing_generator *optc);
-
-bool optc1_is_optc_underflow_occurred(struct timing_generator *optc);
-
-void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable);
-
-void optc1_set_timing_double_buffer(struct timing_generator *optc, bool enable);
-
-bool optc1_get_otg_active_size(struct timing_generator *optc,
- uint32_t *otg_active_width,
- uint32_t *otg_active_height);
-
-void optc1_enable_crtc_reset(
- struct timing_generator *optc,
- int source_tg_inst,
- struct crtc_trigger_info *crtc_tp);
-
-bool optc1_configure_crc(struct timing_generator *optc,
- const struct crc_params *params);
-
-bool optc1_get_crc(struct timing_generator *optc,
- uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb);
-
-bool optc1_is_two_pixels_per_containter(const struct dc_crtc_timing *timing);
-
-void optc1_set_vtg_params(struct timing_generator *optc,
- const struct dc_crtc_timing *dc_crtc_timing, bool program_fp2);
-
-#endif /* __DC_TIMING_GENERATOR_DCN10_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
index 5fcaf78334ff..25ba0d310d46 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
@@ -1,38 +1,7 @@
# SPDX-License-Identifier: MIT
-#
-# Makefile for DCN.
+# Copyright © 2019-2024 Advanced Micro Devices, Inc. All rights reserved.
-DCN20 = dcn20_resource.o dcn20_init.o dcn20_hwseq.o dcn20_dpp.o dcn20_dpp_cm.o dcn20_hubp.o \
- dcn20_mpc.o dcn20_opp.o dcn20_hubbub.o dcn20_optc.o dcn20_mmhubbub.o \
- dcn20_stream_encoder.o dcn20_link_encoder.o dcn20_dccg.o \
- dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o
-
-DCN20 += dcn20_dsc.o
-
-ifdef CONFIG_X86
-CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -msse
-endif
-
-ifdef CONFIG_PPC64
-CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
-IS_OLD_GCC = 1
-endif
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o += -mpreferred-stack-boundary=4
-else
-CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o += -msse2
-endif
-endif
+DCN20 = dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o
AMD_DAL_DCN20 = $(addprefix $(AMDDALPATH)/dc/dcn20/,$(DCN20))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h
deleted file mode 100644
index f98aba308028..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h
+++ /dev/null
@@ -1,273 +0,0 @@
-/*
- * Copyright 2018 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#ifndef __DCN20_DCCG_H__
-#define __DCN20_DCCG_H__
-
-#include "dccg.h"
-
-#define DCCG_COMMON_REG_LIST_DCN_BASE() \
- SR(DPPCLK_DTO_CTRL),\
- DCCG_SRII(DTO_PARAM, DPPCLK, 0),\
- DCCG_SRII(DTO_PARAM, DPPCLK, 1),\
- DCCG_SRII(DTO_PARAM, DPPCLK, 2),\
- DCCG_SRII(DTO_PARAM, DPPCLK, 3),\
- SR(REFCLK_CNTL),\
- DCCG_SRII(PIXEL_RATE_CNTL, OTG, 0),\
- DCCG_SRII(PIXEL_RATE_CNTL, OTG, 1),\
- SR(DISPCLK_FREQ_CHANGE_CNTL)
-
-#define DCCG_REG_LIST_DCN2() \
- DCCG_COMMON_REG_LIST_DCN_BASE(),\
- DCCG_SRII(DTO_PARAM, DPPCLK, 4),\
- DCCG_SRII(DTO_PARAM, DPPCLK, 5),\
- DCCG_SRII(PIXEL_RATE_CNTL, OTG, 2),\
- DCCG_SRII(PIXEL_RATE_CNTL, OTG, 3),\
- DCCG_SRII(PIXEL_RATE_CNTL, OTG, 4),\
- DCCG_SRII(PIXEL_RATE_CNTL, OTG, 5)
-
-#define DCCG_SF(reg_name, field_name, post_fix)\
- .field_name = reg_name ## __ ## field_name ## post_fix
-
-#define DCCG_SFI(reg_name, field_name, field_prefix, inst, post_fix)\
- .field_prefix ## _ ## field_name[inst] = reg_name ## __ ## field_prefix ## inst ## _ ## field_name ## post_fix
-
-#define DCCG_SFII(block, reg_name, field_prefix, field_name, inst, post_fix)\
- .field_prefix ## _ ## field_name[inst] = block ## inst ## _ ## reg_name ## __ ## field_prefix ## inst ## _ ## field_name ## post_fix
-
-#define DCCG_COMMON_MASK_SH_LIST_DCN_COMMON_BASE(mask_sh) \
- DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 0, mask_sh),\
- DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 0, mask_sh),\
- DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 1, mask_sh),\
- DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 1, mask_sh),\
- DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 2, mask_sh),\
- DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 2, mask_sh),\
- DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 3, mask_sh),\
- DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 3, mask_sh),\
- DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_PHASE, mask_sh),\
- DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_MODULO, mask_sh),\
- DCCG_SF(REFCLK_CNTL, REFCLK_CLOCK_EN, mask_sh),\
- DCCG_SF(REFCLK_CNTL, REFCLK_SRC_SEL, mask_sh),\
- DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_STEP_DELAY, mask_sh),\
- DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_STEP_SIZE, mask_sh),\
- DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_FREQ_RAMP_DONE, mask_sh),\
- DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_MAX_ERRDET_CYCLES, mask_sh),\
- DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DCCG_FIFO_ERRDET_RESET, mask_sh),\
- DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DCCG_FIFO_ERRDET_STATE, mask_sh),\
- DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DCCG_FIFO_ERRDET_OVR_EN, mask_sh),\
- DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_CHG_FWD_CORR_DISABLE, mask_sh),\
- DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 0, mask_sh),\
- DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 1, mask_sh),\
- DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 0, mask_sh),\
- DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 1, mask_sh)
-
-
-
-
-#define DCCG_MASK_SH_LIST_DCN2(mask_sh) \
- DCCG_COMMON_MASK_SH_LIST_DCN_COMMON_BASE(mask_sh),\
- DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 4, mask_sh),\
- DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 4, mask_sh),\
- DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 5, mask_sh),\
- DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 5, mask_sh),\
- DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 2, mask_sh),\
- DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 3, mask_sh),\
- DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 4, mask_sh),\
- DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 5, mask_sh),\
- DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 2, mask_sh),\
- DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 3, mask_sh),\
- DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 4, mask_sh),\
- DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 5, mask_sh)
-
-#define DCCG_MASK_SH_LIST_DCN2_1(mask_sh) \
- DCCG_COMMON_MASK_SH_LIST_DCN_COMMON_BASE(mask_sh),\
- DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 4, mask_sh),\
- DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 4, mask_sh),\
- DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 5, mask_sh),\
- DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 5, mask_sh),\
- DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 2, mask_sh),\
- DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 3, mask_sh),\
- DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 2, mask_sh),\
- DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 3, mask_sh)
-
-
-#define DCCG_REG_FIELD_LIST(type) \
- type DPPCLK0_DTO_PHASE;\
- type DPPCLK0_DTO_MODULO;\
- type DPPCLK_DTO_ENABLE[6];\
- type DPPCLK_DTO_DB_EN[6];\
- type REFCLK_CLOCK_EN;\
- type REFCLK_SRC_SEL;\
- type DISPCLK_STEP_DELAY;\
- type DISPCLK_STEP_SIZE;\
- type DISPCLK_FREQ_RAMP_DONE;\
- type DISPCLK_MAX_ERRDET_CYCLES;\
- type DCCG_FIFO_ERRDET_RESET;\
- type DCCG_FIFO_ERRDET_STATE;\
- type DCCG_FIFO_ERRDET_OVR_EN;\
- type DISPCLK_CHG_FWD_CORR_DISABLE;\
- type DISPCLK_FREQ_CHANGE_CNTL;\
- type OTG_ADD_PIXEL[MAX_PIPES];\
- type OTG_DROP_PIXEL[MAX_PIPES];
-
-#define DCCG3_REG_FIELD_LIST(type) \
- type PHYASYMCLK_FORCE_EN;\
- type PHYASYMCLK_FORCE_SRC_SEL;\
- type PHYBSYMCLK_FORCE_EN;\
- type PHYBSYMCLK_FORCE_SRC_SEL;\
- type PHYCSYMCLK_FORCE_EN;\
- type PHYCSYMCLK_FORCE_SRC_SEL;
-
-#define DCCG31_REG_FIELD_LIST(type) \
- type PHYDSYMCLK_FORCE_EN;\
- type PHYDSYMCLK_FORCE_SRC_SEL;\
- type PHYESYMCLK_FORCE_EN;\
- type PHYESYMCLK_FORCE_SRC_SEL;\
- type DPSTREAMCLK_PIPE0_EN;\
- type DPSTREAMCLK_PIPE1_EN;\
- type DPSTREAMCLK_PIPE2_EN;\
- type DPSTREAMCLK_PIPE3_EN;\
- type HDMISTREAMCLK0_SRC_SEL;\
- type HDMISTREAMCLK0_DTO_FORCE_DIS;\
- type SYMCLK32_SE0_SRC_SEL;\
- type SYMCLK32_SE1_SRC_SEL;\
- type SYMCLK32_SE2_SRC_SEL;\
- type SYMCLK32_SE3_SRC_SEL;\
- type SYMCLK32_SE0_EN;\
- type SYMCLK32_SE1_EN;\
- type SYMCLK32_SE2_EN;\
- type SYMCLK32_SE3_EN;\
- type SYMCLK32_LE0_SRC_SEL;\
- type SYMCLK32_LE1_SRC_SEL;\
- type SYMCLK32_LE0_EN;\
- type SYMCLK32_LE1_EN;\
- type DTBCLK_DTO_ENABLE[MAX_PIPES];\
- type DTBCLKDTO_ENABLE_STATUS[MAX_PIPES];\
- type PIPE_DTO_SRC_SEL[MAX_PIPES];\
- type DTBCLK_DTO_DIV[MAX_PIPES];\
- type DCCG_AUDIO_DTO_SEL;\
- type DCCG_AUDIO_DTO0_SOURCE_SEL;\
- type DENTIST_DISPCLK_CHG_MODE;\
- type DSCCLK0_DTO_PHASE;\
- type DSCCLK0_DTO_MODULO;\
- type DSCCLK1_DTO_PHASE;\
- type DSCCLK1_DTO_MODULO;\
- type DSCCLK2_DTO_PHASE;\
- type DSCCLK2_DTO_MODULO;\
- type DSCCLK0_DTO_ENABLE;\
- type DSCCLK1_DTO_ENABLE;\
- type DSCCLK2_DTO_ENABLE;\
- type SYMCLK32_ROOT_SE0_GATE_DISABLE;\
- type SYMCLK32_ROOT_SE1_GATE_DISABLE;\
- type SYMCLK32_ROOT_SE2_GATE_DISABLE;\
- type SYMCLK32_ROOT_SE3_GATE_DISABLE;\
- type SYMCLK32_ROOT_LE0_GATE_DISABLE;\
- type SYMCLK32_ROOT_LE1_GATE_DISABLE;\
- type DPSTREAMCLK_ROOT_GATE_DISABLE;\
- type DPSTREAMCLK_GATE_DISABLE;\
- type HDMISTREAMCLK0_DTO_PHASE;\
- type HDMISTREAMCLK0_DTO_MODULO;\
- type HDMICHARCLK0_GATE_DISABLE;\
- type HDMICHARCLK0_ROOT_GATE_DISABLE;
-
-
-struct dccg_shift {
- DCCG_REG_FIELD_LIST(uint8_t)
- DCCG3_REG_FIELD_LIST(uint8_t)
- DCCG31_REG_FIELD_LIST(uint8_t)
-};
-
-struct dccg_mask {
- DCCG_REG_FIELD_LIST(uint32_t)
- DCCG3_REG_FIELD_LIST(uint32_t)
- DCCG31_REG_FIELD_LIST(uint32_t)
-};
-
-struct dccg_registers {
- uint32_t DPPCLK_DTO_CTRL;
- uint32_t DPPCLK_DTO_PARAM[6];
- uint32_t REFCLK_CNTL;
- uint32_t DISPCLK_FREQ_CHANGE_CNTL;
- uint32_t OTG_PIXEL_RATE_CNTL[MAX_PIPES];
- uint32_t HDMICHARCLK_CLOCK_CNTL[6];
- uint32_t PHYASYMCLK_CLOCK_CNTL;
- uint32_t PHYBSYMCLK_CLOCK_CNTL;
- uint32_t PHYCSYMCLK_CLOCK_CNTL;
- uint32_t PHYDSYMCLK_CLOCK_CNTL;
- uint32_t PHYESYMCLK_CLOCK_CNTL;
- uint32_t DTBCLK_DTO_MODULO[MAX_PIPES];
- uint32_t DTBCLK_DTO_PHASE[MAX_PIPES];
- uint32_t DCCG_AUDIO_DTBCLK_DTO_MODULO;
- uint32_t DCCG_AUDIO_DTBCLK_DTO_PHASE;
- uint32_t DCCG_AUDIO_DTO_SOURCE;
- uint32_t DPSTREAMCLK_CNTL;
- uint32_t HDMISTREAMCLK_CNTL;
- uint32_t SYMCLK32_SE_CNTL;
- uint32_t SYMCLK32_LE_CNTL;
- uint32_t DENTIST_DISPCLK_CNTL;
- uint32_t DSCCLK_DTO_CTRL;
- uint32_t DSCCLK0_DTO_PARAM;
- uint32_t DSCCLK1_DTO_PARAM;
- uint32_t DSCCLK2_DTO_PARAM;
- uint32_t DPSTREAMCLK_ROOT_GATE_DISABLE;
- uint32_t DPSTREAMCLK_GATE_DISABLE;
- uint32_t DCCG_GATE_DISABLE_CNTL3;
- uint32_t HDMISTREAMCLK0_DTO_PARAM;
- uint32_t DCCG_GATE_DISABLE_CNTL4;
-
-};
-
-struct dcn_dccg {
- struct dccg base;
- const struct dccg_registers *regs;
- const struct dccg_shift *dccg_shift;
- const struct dccg_mask *dccg_mask;
-};
-
-void dccg2_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk);
-
-void dccg2_get_dccg_ref_freq(struct dccg *dccg,
- unsigned int xtalin_freq_inKhz,
- unsigned int *dccg_ref_freq_inKhz);
-
-void dccg2_set_fifo_errdet_ovr_en(struct dccg *dccg,
- bool en);
-void dccg2_otg_add_pixel(struct dccg *dccg,
- uint32_t otg_inst);
-void dccg2_otg_drop_pixel(struct dccg *dccg,
- uint32_t otg_inst);
-
-
-void dccg2_init(struct dccg *dccg);
-
-struct dccg *dccg2_create(
- struct dc_context *ctx,
- const struct dccg_registers *regs,
- const struct dccg_shift *dccg_shift,
- const struct dccg_mask *dccg_mask);
-
-void dcn_dccg_destroy(struct dccg **dccg);
-
-#endif //__DCN20_DCCG_H__
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb.c
index 8d3884b306dd..80779e85e2c5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb.c
@@ -101,8 +101,8 @@ static bool dwb2_enable(struct dwbc *dwbc, struct dc_dwb_params *params)
struct dcn20_dwbc *dwbc20 = TO_DCN20_DWBC(dwbc);
/* Only chroma scaling (sub-sampling) is supported in DCN2 */
-if ((params->cnv_params.src_width != params->dest_width) ||
- (params->cnv_params.src_height != params->dest_height)) {
+ if ((params->cnv_params.src_width != params->dest_width) ||
+ (params->cnv_params.src_height != params->dest_height)) {
DC_LOG_DWB("%s inst = %d, FAILED!LUMA SCALING NOT SUPPORTED", __func__, dwbc20->base.inst);
return false;
@@ -299,9 +299,20 @@ void dwb2_set_scaler(struct dwbc *dwbc, struct dc_dwb_params *params)
}
}
+
+ if (dwbc20->dwbc_mask->WBSCL_COEF_RAM_SEL) {
+ /* Swap double buffered coefficient set */
+ uint32_t wbscl_mode = REG_READ(WBSCL_MODE);
+ bool coef_ram_current = get_reg_field_value_ex(
+ wbscl_mode, dwbc20->dwbc_mask->WBSCL_COEF_RAM_SEL_CURRENT,
+ dwbc20->dwbc_shift->WBSCL_COEF_RAM_SEL_CURRENT);
+
+ REG_UPDATE(WBSCL_MODE, WBSCL_COEF_RAM_SEL, !coef_ram_current);
+ }
+
}
-const struct dwbc_funcs dcn20_dwbc_funcs = {
+static const struct dwbc_funcs dcn20_dwbc_funcs = {
.get_caps = dwb2_get_caps,
.enable = dwb2_enable,
.disable = dwb2_disable,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb.h
index a85ed228dfc2..a9dd9ae23ec9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb.h
@@ -27,204 +27,177 @@
#define TO_DCN20_DWBC(dwbc_base) \
container_of(dwbc_base, struct dcn20_dwbc, base)
-/* DCN */
-#define BASE_INNER(seg) \
- DCE_BASE__INST0_SEG ## seg
-
-#define BASE(seg) \
- BASE_INNER(seg)
-
-#define SR(reg_name)\
- .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \
- mm ## reg_name
-
-#define SRI(reg_name, block, id)\
- .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
- mm ## block ## id ## _ ## reg_name
-
-#define SRI2(reg_name, block, id)\
- .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \
- mm ## reg_name
-
-#define SRII(reg_name, block, id)\
- .reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
- mm ## block ## id ## _ ## reg_name
-
-#define SF(reg_name, field_name, post_fix)\
- .field_name = reg_name ## __ ## field_name ## post_fix
-
-
#define DWBC_COMMON_REG_LIST_DCN2_0(inst) \
- SRI2(WB_ENABLE, CNV, inst),\
- SRI2(WB_EC_CONFIG, CNV, inst),\
- SRI2(CNV_MODE, CNV, inst),\
- SRI2(CNV_WINDOW_START, CNV, inst),\
- SRI2(CNV_WINDOW_SIZE, CNV, inst),\
- SRI2(CNV_UPDATE, CNV, inst),\
- SRI2(CNV_SOURCE_SIZE, CNV, inst),\
- SRI2(CNV_TEST_CNTL, CNV, inst),\
- SRI2(CNV_TEST_CRC_RED, CNV, inst),\
- SRI2(CNV_TEST_CRC_GREEN, CNV, inst),\
- SRI2(CNV_TEST_CRC_BLUE, CNV, inst),\
- SRI2(WBSCL_COEF_RAM_SELECT, WBSCL, inst),\
- SRI2(WBSCL_COEF_RAM_TAP_DATA, WBSCL, inst),\
- SRI2(WBSCL_MODE, WBSCL, inst),\
- SRI2(WBSCL_TAP_CONTROL, WBSCL, inst),\
- SRI2(WBSCL_DEST_SIZE, WBSCL, inst),\
- SRI2(WBSCL_HORZ_FILTER_SCALE_RATIO, WBSCL, inst),\
- SRI2(WBSCL_HORZ_FILTER_INIT_Y_RGB, WBSCL, inst),\
- SRI2(WBSCL_HORZ_FILTER_INIT_CBCR, WBSCL, inst),\
- SRI2(WBSCL_VERT_FILTER_SCALE_RATIO, WBSCL, inst),\
- SRI2(WBSCL_VERT_FILTER_INIT_Y_RGB, WBSCL, inst),\
- SRI2(WBSCL_VERT_FILTER_INIT_CBCR, WBSCL, inst),\
- SRI2(WBSCL_ROUND_OFFSET, WBSCL, inst),\
- SRI2(WBSCL_OVERFLOW_STATUS, WBSCL, inst),\
- SRI2(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL, inst),\
- SRI2(WBSCL_TEST_CNTL, WBSCL, inst),\
- SRI2(WBSCL_TEST_CRC_RED, WBSCL, inst),\
- SRI2(WBSCL_TEST_CRC_GREEN, WBSCL, inst),\
- SRI2(WBSCL_TEST_CRC_BLUE, WBSCL, inst),\
- SRI2(WBSCL_BACKPRESSURE_CNT_EN, WBSCL, inst),\
- SRI2(WB_MCIF_BACKPRESSURE_CNT, WBSCL, inst),\
- SRI2(WBSCL_CLAMP_Y_RGB, WBSCL, inst),\
- SRI2(WBSCL_CLAMP_CBCR, WBSCL, inst),\
- SRI2(WBSCL_OUTSIDE_PIX_STRATEGY, WBSCL, inst),\
- SRI2(WBSCL_OUTSIDE_PIX_STRATEGY_CBCR, WBSCL, inst),\
- SRI2(WBSCL_DEBUG, WBSCL, inst),\
- SRI2(WBSCL_TEST_DEBUG_INDEX, WBSCL, inst),\
- SRI2(WBSCL_TEST_DEBUG_DATA, WBSCL, inst),\
- SRI2(WB_DEBUG_CTRL, CNV, inst),\
- SRI2(WB_DBG_MODE, CNV, inst),\
- SRI2(WB_HW_DEBUG, CNV, inst),\
- SRI2(CNV_TEST_DEBUG_INDEX, CNV, inst),\
- SRI2(CNV_TEST_DEBUG_DATA, CNV, inst),\
- SRI2(WB_SOFT_RESET, CNV, inst),\
- SRI2(WB_WARM_UP_MODE_CTL1, CNV, inst),\
- SRI2(WB_WARM_UP_MODE_CTL2, CNV, inst)
+ SRI2_DWB(WB_ENABLE, CNV, inst),\
+ SRI2_DWB(WB_EC_CONFIG, CNV, inst),\
+ SRI2_DWB(CNV_MODE, CNV, inst),\
+ SRI2_DWB(CNV_WINDOW_START, CNV, inst),\
+ SRI2_DWB(CNV_WINDOW_SIZE, CNV, inst),\
+ SRI2_DWB(CNV_UPDATE, CNV, inst),\
+ SRI2_DWB(CNV_SOURCE_SIZE, CNV, inst),\
+ SRI2_DWB(CNV_TEST_CNTL, CNV, inst),\
+ SRI2_DWB(CNV_TEST_CRC_RED, CNV, inst),\
+ SRI2_DWB(CNV_TEST_CRC_GREEN, CNV, inst),\
+ SRI2_DWB(CNV_TEST_CRC_BLUE, CNV, inst),\
+ SRI2_DWB(WBSCL_COEF_RAM_SELECT, WBSCL, inst),\
+ SRI2_DWB(WBSCL_COEF_RAM_TAP_DATA, WBSCL, inst),\
+ SRI2_DWB(WBSCL_MODE, WBSCL, inst),\
+ SRI2_DWB(WBSCL_TAP_CONTROL, WBSCL, inst),\
+ SRI2_DWB(WBSCL_DEST_SIZE, WBSCL, inst),\
+ SRI2_DWB(WBSCL_HORZ_FILTER_SCALE_RATIO, WBSCL, inst),\
+ SRI2_DWB(WBSCL_HORZ_FILTER_INIT_Y_RGB, WBSCL, inst),\
+ SRI2_DWB(WBSCL_HORZ_FILTER_INIT_CBCR, WBSCL, inst),\
+ SRI2_DWB(WBSCL_VERT_FILTER_SCALE_RATIO, WBSCL, inst),\
+ SRI2_DWB(WBSCL_VERT_FILTER_INIT_Y_RGB, WBSCL, inst),\
+ SRI2_DWB(WBSCL_VERT_FILTER_INIT_CBCR, WBSCL, inst),\
+ SRI2_DWB(WBSCL_ROUND_OFFSET, WBSCL, inst),\
+ SRI2_DWB(WBSCL_OVERFLOW_STATUS, WBSCL, inst),\
+ SRI2_DWB(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL, inst),\
+ SRI2_DWB(WBSCL_TEST_CNTL, WBSCL, inst),\
+ SRI2_DWB(WBSCL_TEST_CRC_RED, WBSCL, inst),\
+ SRI2_DWB(WBSCL_TEST_CRC_GREEN, WBSCL, inst),\
+ SRI2_DWB(WBSCL_TEST_CRC_BLUE, WBSCL, inst),\
+ SRI2_DWB(WBSCL_BACKPRESSURE_CNT_EN, WBSCL, inst),\
+ SRI2_DWB(WB_MCIF_BACKPRESSURE_CNT, WBSCL, inst),\
+ SRI2_DWB(WBSCL_CLAMP_Y_RGB, WBSCL, inst),\
+ SRI2_DWB(WBSCL_CLAMP_CBCR, WBSCL, inst),\
+ SRI2_DWB(WBSCL_OUTSIDE_PIX_STRATEGY, WBSCL, inst),\
+ SRI2_DWB(WBSCL_OUTSIDE_PIX_STRATEGY_CBCR, WBSCL, inst),\
+ SRI2_DWB(WBSCL_DEBUG, WBSCL, inst),\
+ SRI2_DWB(WBSCL_TEST_DEBUG_INDEX, WBSCL, inst),\
+ SRI2_DWB(WBSCL_TEST_DEBUG_DATA, WBSCL, inst),\
+ SRI2_DWB(WB_DEBUG_CTRL, CNV, inst),\
+ SRI2_DWB(WB_DBG_MODE, CNV, inst),\
+ SRI2_DWB(WB_HW_DEBUG, CNV, inst),\
+ SRI2_DWB(CNV_TEST_DEBUG_INDEX, CNV, inst),\
+ SRI2_DWB(CNV_TEST_DEBUG_DATA, CNV, inst),\
+ SRI2_DWB(WB_SOFT_RESET, CNV, inst),\
+ SRI2_DWB(WB_WARM_UP_MODE_CTL1, CNV, inst),\
+ SRI2_DWB(WB_WARM_UP_MODE_CTL2, CNV, inst)
#define DWBC_COMMON_MASK_SH_LIST_DCN2_0(mask_sh) \
- SF(WB_ENABLE, WB_ENABLE, mask_sh),\
- SF(WB_EC_CONFIG, DISPCLK_R_WB_GATE_DIS, mask_sh),\
- SF(WB_EC_CONFIG, DISPCLK_G_WB_GATE_DIS, mask_sh),\
- SF(WB_EC_CONFIG, DISPCLK_G_WBSCL_GATE_DIS, mask_sh),\
- SF(WB_EC_CONFIG, WB_TEST_CLK_SEL, mask_sh),\
- SF(WB_EC_CONFIG, WB_LB_LS_DIS, mask_sh),\
- SF(WB_EC_CONFIG, WB_LB_SD_DIS, mask_sh),\
- SF(WB_EC_CONFIG, WB_LUT_LS_DIS, mask_sh),\
- SF(WB_EC_CONFIG, WBSCL_LB_MEM_PWR_MODE_SEL, mask_sh),\
- SF(WB_EC_CONFIG, WBSCL_LB_MEM_PWR_DIS, mask_sh),\
- SF(WB_EC_CONFIG, WBSCL_LB_MEM_PWR_FORCE, mask_sh),\
- SF(WB_EC_CONFIG, WBSCL_LB_MEM_PWR_STATE, mask_sh),\
- SF(WB_EC_CONFIG, WB_RAM_PW_SAVE_MODE, mask_sh),\
- SF(WB_EC_CONFIG, WBSCL_LUT_MEM_PWR_STATE, mask_sh),\
- SF(CNV_MODE, CNV_OUT_BPC, mask_sh),\
- SF(CNV_MODE, CNV_FRAME_CAPTURE_RATE, mask_sh),\
- SF(CNV_MODE, CNV_WINDOW_CROP_EN, mask_sh),\
- SF(CNV_MODE, CNV_STEREO_TYPE, mask_sh),\
- SF(CNV_MODE, CNV_INTERLACED_MODE, mask_sh),\
- SF(CNV_MODE, CNV_EYE_SELECTION, mask_sh),\
- SF(CNV_MODE, CNV_STEREO_POLARITY, mask_sh),\
- SF(CNV_MODE, CNV_INTERLACED_FIELD_ORDER, mask_sh),\
- SF(CNV_MODE, CNV_STEREO_SPLIT, mask_sh),\
- SF(CNV_MODE, CNV_NEW_CONTENT, mask_sh),\
- SF(CNV_MODE, CNV_FRAME_CAPTURE_EN_CURRENT, mask_sh),\
- SF(CNV_MODE, CNV_FRAME_CAPTURE_EN, mask_sh),\
- SF(CNV_WINDOW_START, CNV_WINDOW_START_X, mask_sh),\
- SF(CNV_WINDOW_START, CNV_WINDOW_START_Y, mask_sh),\
- SF(CNV_WINDOW_SIZE, CNV_WINDOW_WIDTH, mask_sh),\
- SF(CNV_WINDOW_SIZE, CNV_WINDOW_HEIGHT, mask_sh),\
- SF(CNV_UPDATE, CNV_UPDATE_PENDING, mask_sh),\
- SF(CNV_UPDATE, CNV_UPDATE_TAKEN, mask_sh),\
- SF(CNV_UPDATE, CNV_UPDATE_LOCK, mask_sh),\
- SF(CNV_SOURCE_SIZE, CNV_SOURCE_WIDTH, mask_sh),\
- SF(CNV_SOURCE_SIZE, CNV_SOURCE_HEIGHT, mask_sh),\
- SF(CNV_TEST_CNTL, CNV_TEST_CRC_EN, mask_sh),\
- SF(CNV_TEST_CNTL, CNV_TEST_CRC_CONT_EN, mask_sh),\
- SF(CNV_TEST_CRC_RED, CNV_TEST_CRC_RED_MASK, mask_sh),\
- SF(CNV_TEST_CRC_RED, CNV_TEST_CRC_SIG_RED, mask_sh),\
- SF(CNV_TEST_CRC_GREEN, CNV_TEST_CRC_GREEN_MASK, mask_sh),\
- SF(CNV_TEST_CRC_GREEN, CNV_TEST_CRC_SIG_GREEN, mask_sh),\
- SF(CNV_TEST_CRC_BLUE, CNV_TEST_CRC_BLUE_MASK, mask_sh),\
- SF(CNV_TEST_CRC_BLUE, CNV_TEST_CRC_SIG_BLUE, mask_sh),\
- SF(WB_DEBUG_CTRL, WB_DEBUG_EN, mask_sh),\
- SF(WB_DEBUG_CTRL, WB_DEBUG_SEL, mask_sh),\
- SF(WB_DBG_MODE, WB_DBG_MODE_EN, mask_sh),\
- SF(WB_DBG_MODE, WB_DBG_DIN_FMT, mask_sh),\
- SF(WB_DBG_MODE, WB_DBG_36MODE, mask_sh),\
- SF(WB_DBG_MODE, WB_DBG_CMAP, mask_sh),\
- SF(WB_DBG_MODE, WB_DBG_PXLRATE_ERROR, mask_sh),\
- SF(WB_DBG_MODE, WB_DBG_SOURCE_WIDTH, mask_sh),\
- SF(WB_HW_DEBUG, WB_HW_DEBUG, mask_sh),\
- SF(WB_SOFT_RESET, WB_SOFT_RESET, mask_sh),\
- SF(CNV_TEST_DEBUG_INDEX, CNV_TEST_DEBUG_INDEX, mask_sh),\
- SF(CNV_TEST_DEBUG_INDEX, CNV_TEST_DEBUG_WRITE_EN, mask_sh),\
- SF(CNV_TEST_DEBUG_DATA, CNV_TEST_DEBUG_DATA, mask_sh),\
- SF(WBSCL_COEF_RAM_SELECT, WBSCL_COEF_RAM_TAP_PAIR_IDX, mask_sh),\
- SF(WBSCL_COEF_RAM_SELECT, WBSCL_COEF_RAM_PHASE, mask_sh),\
- SF(WBSCL_COEF_RAM_SELECT, WBSCL_COEF_RAM_FILTER_TYPE, mask_sh),\
- SF(WBSCL_COEF_RAM_TAP_DATA, WBSCL_COEF_RAM_EVEN_TAP_COEF, mask_sh),\
- SF(WBSCL_COEF_RAM_TAP_DATA, WBSCL_COEF_RAM_EVEN_TAP_COEF_EN, mask_sh),\
- SF(WBSCL_COEF_RAM_TAP_DATA, WBSCL_COEF_RAM_ODD_TAP_COEF, mask_sh),\
- SF(WBSCL_COEF_RAM_TAP_DATA, WBSCL_COEF_RAM_ODD_TAP_COEF_EN, mask_sh),\
- SF(WBSCL_MODE, WBSCL_MODE, mask_sh),\
- SF(WBSCL_MODE, WBSCL_OUT_BIT_DEPTH, mask_sh),\
- SF(WBSCL_TAP_CONTROL, WBSCL_V_NUM_OF_TAPS_Y_RGB, mask_sh),\
- SF(WBSCL_TAP_CONTROL, WBSCL_V_NUM_OF_TAPS_CBCR, mask_sh),\
- SF(WBSCL_TAP_CONTROL, WBSCL_H_NUM_OF_TAPS_Y_RGB, mask_sh),\
- SF(WBSCL_TAP_CONTROL, WBSCL_H_NUM_OF_TAPS_CBCR, mask_sh),\
- SF(WBSCL_DEST_SIZE, WBSCL_DEST_HEIGHT, mask_sh),\
- SF(WBSCL_DEST_SIZE, WBSCL_DEST_WIDTH, mask_sh),\
- SF(WBSCL_HORZ_FILTER_SCALE_RATIO, WBSCL_H_SCALE_RATIO, mask_sh),\
- SF(WBSCL_HORZ_FILTER_INIT_Y_RGB, WBSCL_H_INIT_FRAC_Y_RGB, mask_sh),\
- SF(WBSCL_HORZ_FILTER_INIT_Y_RGB, WBSCL_H_INIT_INT_Y_RGB, mask_sh),\
- SF(WBSCL_HORZ_FILTER_INIT_CBCR, WBSCL_H_INIT_FRAC_CBCR, mask_sh),\
- SF(WBSCL_HORZ_FILTER_INIT_CBCR, WBSCL_H_INIT_INT_CBCR, mask_sh),\
- SF(WBSCL_VERT_FILTER_SCALE_RATIO, WBSCL_V_SCALE_RATIO, mask_sh),\
- SF(WBSCL_VERT_FILTER_INIT_Y_RGB, WBSCL_V_INIT_FRAC_Y_RGB, mask_sh),\
- SF(WBSCL_VERT_FILTER_INIT_Y_RGB, WBSCL_V_INIT_INT_Y_RGB, mask_sh),\
- SF(WBSCL_VERT_FILTER_INIT_CBCR, WBSCL_V_INIT_FRAC_CBCR, mask_sh),\
- SF(WBSCL_VERT_FILTER_INIT_CBCR, WBSCL_V_INIT_INT_CBCR, mask_sh),\
- SF(WBSCL_ROUND_OFFSET, WBSCL_ROUND_OFFSET_Y_RGB, mask_sh),\
- SF(WBSCL_ROUND_OFFSET, WBSCL_ROUND_OFFSET_CBCR, mask_sh),\
- SF(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_FLAG, mask_sh),\
- SF(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_ACK, mask_sh),\
- SF(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_MASK, mask_sh),\
- SF(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_INT_STATUS, mask_sh),\
- SF(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_INT_TYPE, mask_sh),\
- SF(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_FLAG, mask_sh),\
- SF(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_ACK, mask_sh),\
- SF(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_MASK, mask_sh),\
- SF(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_INT_STATUS, mask_sh),\
- SF(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_INT_TYPE, mask_sh),\
- SF(WBSCL_TEST_CNTL, WBSCL_TEST_CRC_EN, mask_sh),\
- SF(WBSCL_TEST_CNTL, WBSCL_TEST_CRC_CONT_EN, mask_sh),\
- SF(WBSCL_TEST_CRC_RED, WBSCL_TEST_CRC_RED_MASK, mask_sh),\
- SF(WBSCL_TEST_CRC_RED, WBSCL_TEST_CRC_SIG_RED, mask_sh),\
- SF(WBSCL_TEST_CRC_GREEN, WBSCL_TEST_CRC_GREEN_MASK, mask_sh),\
- SF(WBSCL_TEST_CRC_GREEN, WBSCL_TEST_CRC_SIG_GREEN, mask_sh),\
- SF(WBSCL_TEST_CRC_BLUE, WBSCL_TEST_CRC_BLUE_MASK, mask_sh),\
- SF(WBSCL_TEST_CRC_BLUE, WBSCL_TEST_CRC_SIG_BLUE, mask_sh),\
- SF(WBSCL_BACKPRESSURE_CNT_EN, WBSCL_BACKPRESSURE_CNT_EN, mask_sh),\
- SF(WB_MCIF_BACKPRESSURE_CNT, WB_MCIF_Y_MAX_BACKPRESSURE, mask_sh),\
- SF(WB_MCIF_BACKPRESSURE_CNT, WB_MCIF_C_MAX_BACKPRESSURE, mask_sh),\
- SF(WBSCL_CLAMP_Y_RGB, WBSCL_CLAMP_UPPER_Y_RGB, mask_sh),\
- SF(WBSCL_CLAMP_Y_RGB, WBSCL_CLAMP_LOWER_Y_RGB, mask_sh),\
- SF(WBSCL_CLAMP_CBCR, WBSCL_CLAMP_UPPER_CBCR, mask_sh),\
- SF(WBSCL_CLAMP_CBCR, WBSCL_CLAMP_LOWER_CBCR, mask_sh),\
- SF(WBSCL_OUTSIDE_PIX_STRATEGY, WBSCL_OUTSIDE_PIX_STRATEGY, mask_sh),\
- SF(WBSCL_OUTSIDE_PIX_STRATEGY, WBSCL_BLACK_COLOR_G_Y, mask_sh),\
- SF(WBSCL_OUTSIDE_PIX_STRATEGY_CBCR, WBSCL_BLACK_COLOR_B_CB, mask_sh),\
- SF(WBSCL_OUTSIDE_PIX_STRATEGY_CBCR, WBSCL_BLACK_COLOR_R_CR, mask_sh),\
- SF(WBSCL_DEBUG, WBSCL_DEBUG, mask_sh),\
- SF(WBSCL_TEST_DEBUG_INDEX, WBSCL_TEST_DEBUG_INDEX, mask_sh),\
- SF(WBSCL_TEST_DEBUG_INDEX, WBSCL_TEST_DEBUG_WRITE_EN, mask_sh),\
- SF(WBSCL_TEST_DEBUG_DATA, WBSCL_TEST_DEBUG_DATA, mask_sh),\
- SF(WB_WARM_UP_MODE_CTL1, WIDTH_WARMUP, mask_sh),\
- SF(WB_WARM_UP_MODE_CTL1, HEIGHT_WARMUP, mask_sh),\
- SF(WB_WARM_UP_MODE_CTL1, GMC_WARM_UP_ENABLE, mask_sh),\
- SF(WB_WARM_UP_MODE_CTL2, DATA_VALUE_WARMUP, mask_sh),\
- SF(WB_WARM_UP_MODE_CTL2, MODE_WARMUP, mask_sh),\
- SF(WB_WARM_UP_MODE_CTL2, DATA_DEPTH_WARMUP, mask_sh)
+ SF_DWB(WB_ENABLE, WB_ENABLE, mask_sh),\
+ SF_DWB(WB_EC_CONFIG, DISPCLK_R_WB_GATE_DIS, mask_sh),\
+ SF_DWB(WB_EC_CONFIG, DISPCLK_G_WB_GATE_DIS, mask_sh),\
+ SF_DWB(WB_EC_CONFIG, DISPCLK_G_WBSCL_GATE_DIS, mask_sh),\
+ SF_DWB(WB_EC_CONFIG, WB_TEST_CLK_SEL, mask_sh),\
+ SF_DWB(WB_EC_CONFIG, WB_LB_LS_DIS, mask_sh),\
+ SF_DWB(WB_EC_CONFIG, WB_LB_SD_DIS, mask_sh),\
+ SF_DWB(WB_EC_CONFIG, WB_LUT_LS_DIS, mask_sh),\
+ SF_DWB(WB_EC_CONFIG, WBSCL_LB_MEM_PWR_MODE_SEL, mask_sh),\
+ SF_DWB(WB_EC_CONFIG, WBSCL_LB_MEM_PWR_DIS, mask_sh),\
+ SF_DWB(WB_EC_CONFIG, WBSCL_LB_MEM_PWR_FORCE, mask_sh),\
+ SF_DWB(WB_EC_CONFIG, WBSCL_LB_MEM_PWR_STATE, mask_sh),\
+ SF_DWB(WB_EC_CONFIG, WB_RAM_PW_SAVE_MODE, mask_sh),\
+ SF_DWB(WB_EC_CONFIG, WBSCL_LUT_MEM_PWR_STATE, mask_sh),\
+ SF_DWB(CNV_MODE, CNV_OUT_BPC, mask_sh),\
+ SF_DWB(CNV_MODE, CNV_FRAME_CAPTURE_RATE, mask_sh),\
+ SF_DWB(CNV_MODE, CNV_WINDOW_CROP_EN, mask_sh),\
+ SF_DWB(CNV_MODE, CNV_STEREO_TYPE, mask_sh),\
+ SF_DWB(CNV_MODE, CNV_INTERLACED_MODE, mask_sh),\
+ SF_DWB(CNV_MODE, CNV_EYE_SELECTION, mask_sh),\
+ SF_DWB(CNV_MODE, CNV_STEREO_POLARITY, mask_sh),\
+ SF_DWB(CNV_MODE, CNV_INTERLACED_FIELD_ORDER, mask_sh),\
+ SF_DWB(CNV_MODE, CNV_STEREO_SPLIT, mask_sh),\
+ SF_DWB(CNV_MODE, CNV_NEW_CONTENT, mask_sh),\
+ SF_DWB(CNV_MODE, CNV_FRAME_CAPTURE_EN_CURRENT, mask_sh),\
+ SF_DWB(CNV_MODE, CNV_FRAME_CAPTURE_EN, mask_sh),\
+ SF_DWB(CNV_WINDOW_START, CNV_WINDOW_START_X, mask_sh),\
+ SF_DWB(CNV_WINDOW_START, CNV_WINDOW_START_Y, mask_sh),\
+ SF_DWB(CNV_WINDOW_SIZE, CNV_WINDOW_WIDTH, mask_sh),\
+ SF_DWB(CNV_WINDOW_SIZE, CNV_WINDOW_HEIGHT, mask_sh),\
+ SF_DWB(CNV_UPDATE, CNV_UPDATE_PENDING, mask_sh),\
+ SF_DWB(CNV_UPDATE, CNV_UPDATE_TAKEN, mask_sh),\
+ SF_DWB(CNV_UPDATE, CNV_UPDATE_LOCK, mask_sh),\
+ SF_DWB(CNV_SOURCE_SIZE, CNV_SOURCE_WIDTH, mask_sh),\
+ SF_DWB(CNV_SOURCE_SIZE, CNV_SOURCE_HEIGHT, mask_sh),\
+ SF_DWB(CNV_TEST_CNTL, CNV_TEST_CRC_EN, mask_sh),\
+ SF_DWB(CNV_TEST_CNTL, CNV_TEST_CRC_CONT_EN, mask_sh),\
+ SF_DWB(CNV_TEST_CRC_RED, CNV_TEST_CRC_RED_MASK, mask_sh),\
+ SF_DWB(CNV_TEST_CRC_RED, CNV_TEST_CRC_SIG_RED, mask_sh),\
+ SF_DWB(CNV_TEST_CRC_GREEN, CNV_TEST_CRC_GREEN_MASK, mask_sh),\
+ SF_DWB(CNV_TEST_CRC_GREEN, CNV_TEST_CRC_SIG_GREEN, mask_sh),\
+ SF_DWB(CNV_TEST_CRC_BLUE, CNV_TEST_CRC_BLUE_MASK, mask_sh),\
+ SF_DWB(CNV_TEST_CRC_BLUE, CNV_TEST_CRC_SIG_BLUE, mask_sh),\
+ SF_DWB(WB_DEBUG_CTRL, WB_DEBUG_EN, mask_sh),\
+ SF_DWB(WB_DEBUG_CTRL, WB_DEBUG_SEL, mask_sh),\
+ SF_DWB(WB_DBG_MODE, WB_DBG_MODE_EN, mask_sh),\
+ SF_DWB(WB_DBG_MODE, WB_DBG_DIN_FMT, mask_sh),\
+ SF_DWB(WB_DBG_MODE, WB_DBG_36MODE, mask_sh),\
+ SF_DWB(WB_DBG_MODE, WB_DBG_CMAP, mask_sh),\
+ SF_DWB(WB_DBG_MODE, WB_DBG_PXLRATE_ERROR, mask_sh),\
+ SF_DWB(WB_DBG_MODE, WB_DBG_SOURCE_WIDTH, mask_sh),\
+ SF_DWB(WB_HW_DEBUG, WB_HW_DEBUG, mask_sh),\
+ SF_DWB(WB_SOFT_RESET, WB_SOFT_RESET, mask_sh),\
+ SF_DWB(CNV_TEST_DEBUG_INDEX, CNV_TEST_DEBUG_INDEX, mask_sh),\
+ SF_DWB(CNV_TEST_DEBUG_INDEX, CNV_TEST_DEBUG_WRITE_EN, mask_sh),\
+ SF_DWB(CNV_TEST_DEBUG_DATA, CNV_TEST_DEBUG_DATA, mask_sh),\
+ SF_DWB(WBSCL_COEF_RAM_SELECT, WBSCL_COEF_RAM_TAP_PAIR_IDX, mask_sh),\
+ SF_DWB(WBSCL_COEF_RAM_SELECT, WBSCL_COEF_RAM_PHASE, mask_sh),\
+ SF_DWB(WBSCL_COEF_RAM_SELECT, WBSCL_COEF_RAM_FILTER_TYPE, mask_sh),\
+ SF_DWB(WBSCL_COEF_RAM_TAP_DATA, WBSCL_COEF_RAM_EVEN_TAP_COEF, mask_sh),\
+ SF_DWB(WBSCL_COEF_RAM_TAP_DATA, WBSCL_COEF_RAM_EVEN_TAP_COEF_EN, mask_sh),\
+ SF_DWB(WBSCL_COEF_RAM_TAP_DATA, WBSCL_COEF_RAM_ODD_TAP_COEF, mask_sh),\
+ SF_DWB(WBSCL_COEF_RAM_TAP_DATA, WBSCL_COEF_RAM_ODD_TAP_COEF_EN, mask_sh),\
+ SF_DWB(WBSCL_MODE, WBSCL_MODE, mask_sh),\
+ SF_DWB(WBSCL_MODE, WBSCL_OUT_BIT_DEPTH, mask_sh),\
+ SF_DWB(WBSCL_TAP_CONTROL, WBSCL_V_NUM_OF_TAPS_Y_RGB, mask_sh),\
+ SF_DWB(WBSCL_TAP_CONTROL, WBSCL_V_NUM_OF_TAPS_CBCR, mask_sh),\
+ SF_DWB(WBSCL_TAP_CONTROL, WBSCL_H_NUM_OF_TAPS_Y_RGB, mask_sh),\
+ SF_DWB(WBSCL_TAP_CONTROL, WBSCL_H_NUM_OF_TAPS_CBCR, mask_sh),\
+ SF_DWB(WBSCL_DEST_SIZE, WBSCL_DEST_HEIGHT, mask_sh),\
+ SF_DWB(WBSCL_DEST_SIZE, WBSCL_DEST_WIDTH, mask_sh),\
+ SF_DWB(WBSCL_HORZ_FILTER_SCALE_RATIO, WBSCL_H_SCALE_RATIO, mask_sh),\
+ SF_DWB(WBSCL_HORZ_FILTER_INIT_Y_RGB, WBSCL_H_INIT_FRAC_Y_RGB, mask_sh),\
+ SF_DWB(WBSCL_HORZ_FILTER_INIT_Y_RGB, WBSCL_H_INIT_INT_Y_RGB, mask_sh),\
+ SF_DWB(WBSCL_HORZ_FILTER_INIT_CBCR, WBSCL_H_INIT_FRAC_CBCR, mask_sh),\
+ SF_DWB(WBSCL_HORZ_FILTER_INIT_CBCR, WBSCL_H_INIT_INT_CBCR, mask_sh),\
+ SF_DWB(WBSCL_VERT_FILTER_SCALE_RATIO, WBSCL_V_SCALE_RATIO, mask_sh),\
+ SF_DWB(WBSCL_VERT_FILTER_INIT_Y_RGB, WBSCL_V_INIT_FRAC_Y_RGB, mask_sh),\
+ SF_DWB(WBSCL_VERT_FILTER_INIT_Y_RGB, WBSCL_V_INIT_INT_Y_RGB, mask_sh),\
+ SF_DWB(WBSCL_VERT_FILTER_INIT_CBCR, WBSCL_V_INIT_FRAC_CBCR, mask_sh),\
+ SF_DWB(WBSCL_VERT_FILTER_INIT_CBCR, WBSCL_V_INIT_INT_CBCR, mask_sh),\
+ SF_DWB(WBSCL_ROUND_OFFSET, WBSCL_ROUND_OFFSET_Y_RGB, mask_sh),\
+ SF_DWB(WBSCL_ROUND_OFFSET, WBSCL_ROUND_OFFSET_CBCR, mask_sh),\
+ SF_DWB(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_FLAG, mask_sh),\
+ SF_DWB(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_ACK, mask_sh),\
+ SF_DWB(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_MASK, mask_sh),\
+ SF_DWB(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_INT_STATUS, mask_sh),\
+ SF_DWB(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_INT_TYPE, mask_sh),\
+ SF_DWB(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_FLAG, mask_sh),\
+ SF_DWB(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_ACK, mask_sh),\
+ SF_DWB(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_MASK, mask_sh),\
+ SF_DWB(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_INT_STATUS, mask_sh),\
+ SF_DWB(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_INT_TYPE, mask_sh),\
+ SF_DWB(WBSCL_TEST_CNTL, WBSCL_TEST_CRC_EN, mask_sh),\
+ SF_DWB(WBSCL_TEST_CNTL, WBSCL_TEST_CRC_CONT_EN, mask_sh),\
+ SF_DWB(WBSCL_TEST_CRC_RED, WBSCL_TEST_CRC_RED_MASK, mask_sh),\
+ SF_DWB(WBSCL_TEST_CRC_RED, WBSCL_TEST_CRC_SIG_RED, mask_sh),\
+ SF_DWB(WBSCL_TEST_CRC_GREEN, WBSCL_TEST_CRC_GREEN_MASK, mask_sh),\
+ SF_DWB(WBSCL_TEST_CRC_GREEN, WBSCL_TEST_CRC_SIG_GREEN, mask_sh),\
+ SF_DWB(WBSCL_TEST_CRC_BLUE, WBSCL_TEST_CRC_BLUE_MASK, mask_sh),\
+ SF_DWB(WBSCL_TEST_CRC_BLUE, WBSCL_TEST_CRC_SIG_BLUE, mask_sh),\
+ SF_DWB(WBSCL_BACKPRESSURE_CNT_EN, WBSCL_BACKPRESSURE_CNT_EN, mask_sh),\
+ SF_DWB(WB_MCIF_BACKPRESSURE_CNT, WB_MCIF_Y_MAX_BACKPRESSURE, mask_sh),\
+ SF_DWB(WB_MCIF_BACKPRESSURE_CNT, WB_MCIF_C_MAX_BACKPRESSURE, mask_sh),\
+ SF_DWB(WBSCL_CLAMP_Y_RGB, WBSCL_CLAMP_UPPER_Y_RGB, mask_sh),\
+ SF_DWB(WBSCL_CLAMP_Y_RGB, WBSCL_CLAMP_LOWER_Y_RGB, mask_sh),\
+ SF_DWB(WBSCL_CLAMP_CBCR, WBSCL_CLAMP_UPPER_CBCR, mask_sh),\
+ SF_DWB(WBSCL_CLAMP_CBCR, WBSCL_CLAMP_LOWER_CBCR, mask_sh),\
+ SF_DWB(WBSCL_OUTSIDE_PIX_STRATEGY, WBSCL_OUTSIDE_PIX_STRATEGY, mask_sh),\
+ SF_DWB(WBSCL_OUTSIDE_PIX_STRATEGY, WBSCL_BLACK_COLOR_G_Y, mask_sh),\
+ SF_DWB(WBSCL_OUTSIDE_PIX_STRATEGY_CBCR, WBSCL_BLACK_COLOR_B_CB, mask_sh),\
+ SF_DWB(WBSCL_OUTSIDE_PIX_STRATEGY_CBCR, WBSCL_BLACK_COLOR_R_CR, mask_sh),\
+ SF_DWB(WBSCL_DEBUG, WBSCL_DEBUG, mask_sh),\
+ SF_DWB(WBSCL_TEST_DEBUG_INDEX, WBSCL_TEST_DEBUG_INDEX, mask_sh),\
+ SF_DWB(WBSCL_TEST_DEBUG_INDEX, WBSCL_TEST_DEBUG_WRITE_EN, mask_sh),\
+ SF_DWB(WBSCL_TEST_DEBUG_DATA, WBSCL_TEST_DEBUG_DATA, mask_sh),\
+ SF_DWB(WB_WARM_UP_MODE_CTL1, WIDTH_WARMUP, mask_sh),\
+ SF_DWB(WB_WARM_UP_MODE_CTL1, HEIGHT_WARMUP, mask_sh),\
+ SF_DWB(WB_WARM_UP_MODE_CTL1, GMC_WARM_UP_ENABLE, mask_sh),\
+ SF_DWB(WB_WARM_UP_MODE_CTL2, DATA_VALUE_WARMUP, mask_sh),\
+ SF_DWB(WB_WARM_UP_MODE_CTL2, MODE_WARMUP, mask_sh),\
+ SF_DWB(WB_WARM_UP_MODE_CTL2, DATA_DEPTH_WARMUP, mask_sh)
#define DWBC_REG_FIELD_LIST_DCN2_0(type) \
type WB_ENABLE;\
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb_scl.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb_scl.c
index 880954ac0b02..a0d437f0ce2b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb_scl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb_scl.c
@@ -527,7 +527,7 @@ static const uint16_t filter_12tap_16p_183[108] = {
0, 84, 16328, 16032, 416, 1944, 1944, 416, 16032, 16328, 84, 0,
};
-const uint16_t *wbscl_get_filter_3tap_16p(struct fixed31_32 ratio)
+static const uint16_t *wbscl_get_filter_3tap_16p(struct fixed31_32 ratio)
{
if (ratio.value < dc_fixpt_one.value)
return filter_3tap_16p_upscale;
@@ -539,7 +539,7 @@ const uint16_t *wbscl_get_filter_3tap_16p(struct fixed31_32 ratio)
return filter_3tap_16p_183;
}
-const uint16_t *wbscl_get_filter_4tap_16p(struct fixed31_32 ratio)
+static const uint16_t *wbscl_get_filter_4tap_16p(struct fixed31_32 ratio)
{
if (ratio.value < dc_fixpt_one.value)
return filter_4tap_16p_upscale;
@@ -690,6 +690,9 @@ static void wbscl_set_scaler_filter(
int pair;
uint16_t odd_coef, even_coef;
+ if (!filter)
+ return;
+
for (phase = 0; phase < (NUM_PHASES / 2 + 1); phase++) {
for (pair = 0; pair < tap_pairs; pair++) {
even_coef = filter[phase * taps + 2 * pair];
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
deleted file mode 100644
index e6af99ae3d9f..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+++ /dev/null
@@ -1,2634 +0,0 @@
-/*
- * Copyright 2016 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-#include <linux/delay.h>
-
-#include "dm_services.h"
-#include "basics/dc_common.h"
-#include "dm_helpers.h"
-#include "core_types.h"
-#include "resource.h"
-#include "dcn20_resource.h"
-#include "dcn20_hwseq.h"
-#include "dce/dce_hwseq.h"
-#include "dcn20_dsc.h"
-#include "dcn20_optc.h"
-#include "abm.h"
-#include "clk_mgr.h"
-#include "dmcu.h"
-#include "hubp.h"
-#include "timing_generator.h"
-#include "opp.h"
-#include "ipp.h"
-#include "mpc.h"
-#include "mcif_wb.h"
-#include "dchubbub.h"
-#include "reg_helper.h"
-#include "dcn10/dcn10_cm_common.h"
-#include "dc_link_dp.h"
-#include "vm_helper.h"
-#include "dccg.h"
-#include "dc_dmub_srv.h"
-#include "dce/dmub_hw_lock_mgr.h"
-#include "hw_sequencer.h"
-#include "inc/link_dpcd.h"
-#include "dpcd_defs.h"
-#include "inc/link_enc_cfg.h"
-
-#define DC_LOGGER_INIT(logger)
-
-#define CTX \
- hws->ctx
-#define REG(reg)\
- hws->regs->reg
-
-#undef FN
-#define FN(reg_name, field_name) \
- hws->shifts->field_name, hws->masks->field_name
-
-static int find_free_gsl_group(const struct dc *dc)
-{
- if (dc->res_pool->gsl_groups.gsl_0 == 0)
- return 1;
- if (dc->res_pool->gsl_groups.gsl_1 == 0)
- return 2;
- if (dc->res_pool->gsl_groups.gsl_2 == 0)
- return 3;
-
- return 0;
-}
-
-/* NOTE: This is not a generic setup_gsl function (hence the suffix as_lock)
- * This is only used to lock pipes in pipe splitting case with immediate flip
- * Ordinary MPC/OTG locks suppress VUPDATE which doesn't help with immediate,
- * so we get tearing with freesync since we cannot flip multiple pipes
- * atomically.
- * We use GSL for this:
- * - immediate flip: find first available GSL group if not already assigned
- * program gsl with that group, set current OTG as master
- * and always us 0x4 = AND of flip_ready from all pipes
- * - vsync flip: disable GSL if used
- *
- * Groups in stream_res are stored as +1 from HW registers, i.e.
- * gsl_0 <=> pipe_ctx->stream_res.gsl_group == 1
- * Using a magic value like -1 would require tracking all inits/resets
- */
-static void dcn20_setup_gsl_group_as_lock(
- const struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- bool enable)
-{
- struct gsl_params gsl;
- int group_idx;
-
- memset(&gsl, 0, sizeof(struct gsl_params));
-
- if (enable) {
- /* return if group already assigned since GSL was set up
- * for vsync flip, we would unassign so it can't be "left over"
- */
- if (pipe_ctx->stream_res.gsl_group > 0)
- return;
-
- group_idx = find_free_gsl_group(dc);
- ASSERT(group_idx != 0);
- pipe_ctx->stream_res.gsl_group = group_idx;
-
- /* set gsl group reg field and mark resource used */
- switch (group_idx) {
- case 1:
- gsl.gsl0_en = 1;
- dc->res_pool->gsl_groups.gsl_0 = 1;
- break;
- case 2:
- gsl.gsl1_en = 1;
- dc->res_pool->gsl_groups.gsl_1 = 1;
- break;
- case 3:
- gsl.gsl2_en = 1;
- dc->res_pool->gsl_groups.gsl_2 = 1;
- break;
- default:
- BREAK_TO_DEBUGGER();
- return; // invalid case
- }
- gsl.gsl_master_en = 1;
- } else {
- group_idx = pipe_ctx->stream_res.gsl_group;
- if (group_idx == 0)
- return; // if not in use, just return
-
- pipe_ctx->stream_res.gsl_group = 0;
-
- /* unset gsl group reg field and mark resource free */
- switch (group_idx) {
- case 1:
- gsl.gsl0_en = 0;
- dc->res_pool->gsl_groups.gsl_0 = 0;
- break;
- case 2:
- gsl.gsl1_en = 0;
- dc->res_pool->gsl_groups.gsl_1 = 0;
- break;
- case 3:
- gsl.gsl2_en = 0;
- dc->res_pool->gsl_groups.gsl_2 = 0;
- break;
- default:
- BREAK_TO_DEBUGGER();
- return;
- }
- gsl.gsl_master_en = 0;
- }
-
- /* at this point we want to program whether it's to enable or disable */
- if (pipe_ctx->stream_res.tg->funcs->set_gsl != NULL &&
- pipe_ctx->stream_res.tg->funcs->set_gsl_source_select != NULL) {
- pipe_ctx->stream_res.tg->funcs->set_gsl(
- pipe_ctx->stream_res.tg,
- &gsl);
-
- pipe_ctx->stream_res.tg->funcs->set_gsl_source_select(
- pipe_ctx->stream_res.tg, group_idx, enable ? 4 : 0);
- } else
- BREAK_TO_DEBUGGER();
-}
-
-void dcn20_set_flip_control_gsl(
- struct pipe_ctx *pipe_ctx,
- bool flip_immediate)
-{
- if (pipe_ctx && pipe_ctx->plane_res.hubp->funcs->hubp_set_flip_control_surface_gsl)
- pipe_ctx->plane_res.hubp->funcs->hubp_set_flip_control_surface_gsl(
- pipe_ctx->plane_res.hubp, flip_immediate);
-
-}
-
-void dcn20_enable_power_gating_plane(
- struct dce_hwseq *hws,
- bool enable)
-{
- bool force_on = true; /* disable power gating */
-
- if (enable)
- force_on = false;
-
- /* DCHUBP0/1/2/3/4/5 */
- REG_UPDATE(DOMAIN0_PG_CONFIG, DOMAIN0_POWER_FORCEON, force_on);
- REG_UPDATE(DOMAIN2_PG_CONFIG, DOMAIN2_POWER_FORCEON, force_on);
- REG_UPDATE(DOMAIN4_PG_CONFIG, DOMAIN4_POWER_FORCEON, force_on);
- REG_UPDATE(DOMAIN6_PG_CONFIG, DOMAIN6_POWER_FORCEON, force_on);
- if (REG(DOMAIN8_PG_CONFIG))
- REG_UPDATE(DOMAIN8_PG_CONFIG, DOMAIN8_POWER_FORCEON, force_on);
- if (REG(DOMAIN10_PG_CONFIG))
- REG_UPDATE(DOMAIN10_PG_CONFIG, DOMAIN8_POWER_FORCEON, force_on);
-
- /* DPP0/1/2/3/4/5 */
- REG_UPDATE(DOMAIN1_PG_CONFIG, DOMAIN1_POWER_FORCEON, force_on);
- REG_UPDATE(DOMAIN3_PG_CONFIG, DOMAIN3_POWER_FORCEON, force_on);
- REG_UPDATE(DOMAIN5_PG_CONFIG, DOMAIN5_POWER_FORCEON, force_on);
- REG_UPDATE(DOMAIN7_PG_CONFIG, DOMAIN7_POWER_FORCEON, force_on);
- if (REG(DOMAIN9_PG_CONFIG))
- REG_UPDATE(DOMAIN9_PG_CONFIG, DOMAIN9_POWER_FORCEON, force_on);
- if (REG(DOMAIN11_PG_CONFIG))
- REG_UPDATE(DOMAIN11_PG_CONFIG, DOMAIN9_POWER_FORCEON, force_on);
-
- /* DCS0/1/2/3/4/5 */
- REG_UPDATE(DOMAIN16_PG_CONFIG, DOMAIN16_POWER_FORCEON, force_on);
- REG_UPDATE(DOMAIN17_PG_CONFIG, DOMAIN17_POWER_FORCEON, force_on);
- REG_UPDATE(DOMAIN18_PG_CONFIG, DOMAIN18_POWER_FORCEON, force_on);
- if (REG(DOMAIN19_PG_CONFIG))
- REG_UPDATE(DOMAIN19_PG_CONFIG, DOMAIN19_POWER_FORCEON, force_on);
- if (REG(DOMAIN20_PG_CONFIG))
- REG_UPDATE(DOMAIN20_PG_CONFIG, DOMAIN20_POWER_FORCEON, force_on);
- if (REG(DOMAIN21_PG_CONFIG))
- REG_UPDATE(DOMAIN21_PG_CONFIG, DOMAIN21_POWER_FORCEON, force_on);
-}
-
-void dcn20_dccg_init(struct dce_hwseq *hws)
-{
- /*
- * set MICROSECOND_TIME_BASE_DIV
- * 100Mhz refclk -> 0x120264
- * 27Mhz refclk -> 0x12021b
- * 48Mhz refclk -> 0x120230
- *
- */
- REG_WRITE(MICROSECOND_TIME_BASE_DIV, 0x120264);
-
- /*
- * set MILLISECOND_TIME_BASE_DIV
- * 100Mhz refclk -> 0x1186a0
- * 27Mhz refclk -> 0x106978
- * 48Mhz refclk -> 0x10bb80
- *
- */
- REG_WRITE(MILLISECOND_TIME_BASE_DIV, 0x1186a0);
-
- /* This value is dependent on the hardware pipeline delay so set once per SOC */
- REG_WRITE(DISPCLK_FREQ_CHANGE_CNTL, 0xe01003c);
-}
-
-void dcn20_disable_vga(
- struct dce_hwseq *hws)
-{
- REG_WRITE(D1VGA_CONTROL, 0);
- REG_WRITE(D2VGA_CONTROL, 0);
- REG_WRITE(D3VGA_CONTROL, 0);
- REG_WRITE(D4VGA_CONTROL, 0);
- REG_WRITE(D5VGA_CONTROL, 0);
- REG_WRITE(D6VGA_CONTROL, 0);
-}
-
-void dcn20_program_triple_buffer(
- const struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- bool enable_triple_buffer)
-{
- if (pipe_ctx->plane_res.hubp && pipe_ctx->plane_res.hubp->funcs) {
- pipe_ctx->plane_res.hubp->funcs->hubp_enable_tripleBuffer(
- pipe_ctx->plane_res.hubp,
- enable_triple_buffer);
- }
-}
-
-/* Blank pixel data during initialization */
-void dcn20_init_blank(
- struct dc *dc,
- struct timing_generator *tg)
-{
- struct dce_hwseq *hws = dc->hwseq;
- enum dc_color_space color_space;
- struct tg_color black_color = {0};
- struct output_pixel_processor *opp = NULL;
- struct output_pixel_processor *bottom_opp = NULL;
- uint32_t num_opps, opp_id_src0, opp_id_src1;
- uint32_t otg_active_width, otg_active_height;
-
- /* program opp dpg blank color */
- color_space = COLOR_SPACE_SRGB;
- color_space_to_black_color(dc, color_space, &black_color);
-
- /* get the OTG active size */
- tg->funcs->get_otg_active_size(tg,
- &otg_active_width,
- &otg_active_height);
-
- /* get the OPTC source */
- tg->funcs->get_optc_source(tg, &num_opps, &opp_id_src0, &opp_id_src1);
-
- if (opp_id_src0 >= dc->res_pool->res_cap->num_opp) {
- ASSERT(false);
- return;
- }
- opp = dc->res_pool->opps[opp_id_src0];
-
- if (num_opps == 2) {
- otg_active_width = otg_active_width / 2;
-
- if (opp_id_src1 >= dc->res_pool->res_cap->num_opp) {
- ASSERT(false);
- return;
- }
- bottom_opp = dc->res_pool->opps[opp_id_src1];
- }
-
- opp->funcs->opp_set_disp_pattern_generator(
- opp,
- CONTROLLER_DP_TEST_PATTERN_SOLID_COLOR,
- CONTROLLER_DP_COLOR_SPACE_UDEFINED,
- COLOR_DEPTH_UNDEFINED,
- &black_color,
- otg_active_width,
- otg_active_height,
- 0);
-
- if (num_opps == 2) {
- bottom_opp->funcs->opp_set_disp_pattern_generator(
- bottom_opp,
- CONTROLLER_DP_TEST_PATTERN_SOLID_COLOR,
- CONTROLLER_DP_COLOR_SPACE_UDEFINED,
- COLOR_DEPTH_UNDEFINED,
- &black_color,
- otg_active_width,
- otg_active_height,
- 0);
- }
-
- hws->funcs.wait_for_blank_complete(opp);
-}
-
-void dcn20_dsc_pg_control(
- struct dce_hwseq *hws,
- unsigned int dsc_inst,
- bool power_on)
-{
- uint32_t power_gate = power_on ? 0 : 1;
- uint32_t pwr_status = power_on ? 0 : 2;
- uint32_t org_ip_request_cntl = 0;
-
- if (hws->ctx->dc->debug.disable_dsc_power_gate)
- return;
-
- if (REG(DOMAIN16_PG_CONFIG) == 0)
- return;
-
- REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl);
- if (org_ip_request_cntl == 0)
- REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1);
-
- switch (dsc_inst) {
- case 0: /* DSC0 */
- REG_UPDATE(DOMAIN16_PG_CONFIG,
- DOMAIN16_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN16_PG_STATUS,
- DOMAIN16_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- case 1: /* DSC1 */
- REG_UPDATE(DOMAIN17_PG_CONFIG,
- DOMAIN17_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN17_PG_STATUS,
- DOMAIN17_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- case 2: /* DSC2 */
- REG_UPDATE(DOMAIN18_PG_CONFIG,
- DOMAIN18_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN18_PG_STATUS,
- DOMAIN18_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- case 3: /* DSC3 */
- REG_UPDATE(DOMAIN19_PG_CONFIG,
- DOMAIN19_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN19_PG_STATUS,
- DOMAIN19_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- case 4: /* DSC4 */
- REG_UPDATE(DOMAIN20_PG_CONFIG,
- DOMAIN20_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN20_PG_STATUS,
- DOMAIN20_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- case 5: /* DSC5 */
- REG_UPDATE(DOMAIN21_PG_CONFIG,
- DOMAIN21_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN21_PG_STATUS,
- DOMAIN21_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- default:
- BREAK_TO_DEBUGGER();
- break;
- }
-
- if (org_ip_request_cntl == 0)
- REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0);
-}
-
-void dcn20_dpp_pg_control(
- struct dce_hwseq *hws,
- unsigned int dpp_inst,
- bool power_on)
-{
- uint32_t power_gate = power_on ? 0 : 1;
- uint32_t pwr_status = power_on ? 0 : 2;
-
- if (hws->ctx->dc->debug.disable_dpp_power_gate)
- return;
- if (REG(DOMAIN1_PG_CONFIG) == 0)
- return;
-
- switch (dpp_inst) {
- case 0: /* DPP0 */
- REG_UPDATE(DOMAIN1_PG_CONFIG,
- DOMAIN1_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN1_PG_STATUS,
- DOMAIN1_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- case 1: /* DPP1 */
- REG_UPDATE(DOMAIN3_PG_CONFIG,
- DOMAIN3_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN3_PG_STATUS,
- DOMAIN3_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- case 2: /* DPP2 */
- REG_UPDATE(DOMAIN5_PG_CONFIG,
- DOMAIN5_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN5_PG_STATUS,
- DOMAIN5_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- case 3: /* DPP3 */
- REG_UPDATE(DOMAIN7_PG_CONFIG,
- DOMAIN7_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN7_PG_STATUS,
- DOMAIN7_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- case 4: /* DPP4 */
- REG_UPDATE(DOMAIN9_PG_CONFIG,
- DOMAIN9_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN9_PG_STATUS,
- DOMAIN9_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- case 5: /* DPP5 */
- /*
- * Do not power gate DPP5, should be left at HW default, power on permanently.
- * PG on Pipe5 is De-featured, attempting to put it to PG state may result in hard
- * reset.
- * REG_UPDATE(DOMAIN11_PG_CONFIG,
- * DOMAIN11_POWER_GATE, power_gate);
- *
- * REG_WAIT(DOMAIN11_PG_STATUS,
- * DOMAIN11_PGFSM_PWR_STATUS, pwr_status,
- * 1, 1000);
- */
- break;
- default:
- BREAK_TO_DEBUGGER();
- break;
- }
-}
-
-
-void dcn20_hubp_pg_control(
- struct dce_hwseq *hws,
- unsigned int hubp_inst,
- bool power_on)
-{
- uint32_t power_gate = power_on ? 0 : 1;
- uint32_t pwr_status = power_on ? 0 : 2;
-
- if (hws->ctx->dc->debug.disable_hubp_power_gate)
- return;
- if (REG(DOMAIN0_PG_CONFIG) == 0)
- return;
-
- switch (hubp_inst) {
- case 0: /* DCHUBP0 */
- REG_UPDATE(DOMAIN0_PG_CONFIG,
- DOMAIN0_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN0_PG_STATUS,
- DOMAIN0_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- case 1: /* DCHUBP1 */
- REG_UPDATE(DOMAIN2_PG_CONFIG,
- DOMAIN2_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN2_PG_STATUS,
- DOMAIN2_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- case 2: /* DCHUBP2 */
- REG_UPDATE(DOMAIN4_PG_CONFIG,
- DOMAIN4_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN4_PG_STATUS,
- DOMAIN4_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- case 3: /* DCHUBP3 */
- REG_UPDATE(DOMAIN6_PG_CONFIG,
- DOMAIN6_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN6_PG_STATUS,
- DOMAIN6_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- case 4: /* DCHUBP4 */
- REG_UPDATE(DOMAIN8_PG_CONFIG,
- DOMAIN8_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN8_PG_STATUS,
- DOMAIN8_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- case 5: /* DCHUBP5 */
- /*
- * Do not power gate DCHUB5, should be left at HW default, power on permanently.
- * PG on Pipe5 is De-featured, attempting to put it to PG state may result in hard
- * reset.
- * REG_UPDATE(DOMAIN10_PG_CONFIG,
- * DOMAIN10_POWER_GATE, power_gate);
- *
- * REG_WAIT(DOMAIN10_PG_STATUS,
- * DOMAIN10_PGFSM_PWR_STATUS, pwr_status,
- * 1, 1000);
- */
- break;
- default:
- BREAK_TO_DEBUGGER();
- break;
- }
-}
-
-
-/* disable HW used by plane.
- * note: cannot disable until disconnect is complete
- */
-void dcn20_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
-{
- struct dce_hwseq *hws = dc->hwseq;
- struct hubp *hubp = pipe_ctx->plane_res.hubp;
- struct dpp *dpp = pipe_ctx->plane_res.dpp;
-
- dc->hwss.wait_for_mpcc_disconnect(dc, dc->res_pool, pipe_ctx);
-
- /* In flip immediate with pipe splitting case GSL is used for
- * synchronization so we must disable it when the plane is disabled.
- */
- if (pipe_ctx->stream_res.gsl_group != 0)
- dcn20_setup_gsl_group_as_lock(dc, pipe_ctx, false);
-
- dc->hwss.set_flip_control_gsl(pipe_ctx, false);
-
- hubp->funcs->hubp_clk_cntl(hubp, false);
-
- dpp->funcs->dpp_dppclk_control(dpp, false, false);
-
- hubp->power_gated = true;
-
- hws->funcs.plane_atomic_power_down(dc,
- pipe_ctx->plane_res.dpp,
- pipe_ctx->plane_res.hubp);
-
- pipe_ctx->stream = NULL;
- memset(&pipe_ctx->stream_res, 0, sizeof(pipe_ctx->stream_res));
- memset(&pipe_ctx->plane_res, 0, sizeof(pipe_ctx->plane_res));
- pipe_ctx->top_pipe = NULL;
- pipe_ctx->bottom_pipe = NULL;
- pipe_ctx->plane_state = NULL;
-}
-
-
-void dcn20_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx)
-{
- DC_LOGGER_INIT(dc->ctx->logger);
-
- if (!pipe_ctx->plane_res.hubp || pipe_ctx->plane_res.hubp->power_gated)
- return;
-
- dcn20_plane_atomic_disable(dc, pipe_ctx);
-
- DC_LOG_DC("Power down front end %d\n",
- pipe_ctx->pipe_idx);
-}
-
-static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream,
- int opp_cnt)
-{
- bool hblank_halved = optc2_is_two_pixels_per_containter(&stream->timing);
- int flow_ctrl_cnt;
-
- if (opp_cnt >= 2)
- hblank_halved = true;
-
- flow_ctrl_cnt = stream->timing.h_total - stream->timing.h_addressable -
- stream->timing.h_border_left -
- stream->timing.h_border_right;
-
- if (hblank_halved)
- flow_ctrl_cnt /= 2;
-
- /* ODM combine 4:1 case */
- if (opp_cnt == 4)
- flow_ctrl_cnt /= 2;
-
- return flow_ctrl_cnt;
-}
-
-enum dc_status dcn20_enable_stream_timing(
- struct pipe_ctx *pipe_ctx,
- struct dc_state *context,
- struct dc *dc)
-{
- struct dce_hwseq *hws = dc->hwseq;
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct drr_params params = {0};
- unsigned int event_triggers = 0;
- struct pipe_ctx *odm_pipe;
- int opp_cnt = 1;
- int opp_inst[MAX_PIPES] = { pipe_ctx->stream_res.opp->inst };
- bool interlace = stream->timing.flags.INTERLACE;
- int i;
- struct mpc_dwb_flow_control flow_control;
- struct mpc *mpc = dc->res_pool->mpc;
- bool rate_control_2x_pclk = (interlace || optc2_is_two_pixels_per_containter(&stream->timing));
-
- /* by upper caller loop, pipe0 is parent pipe and be called first.
- * back end is set up by for pipe0. Other children pipe share back end
- * with pipe 0. No program is needed.
- */
- if (pipe_ctx->top_pipe != NULL)
- return DC_OK;
-
- /* TODO check if timing_changed, disable stream if timing changed */
-
- for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
- opp_inst[opp_cnt] = odm_pipe->stream_res.opp->inst;
- opp_cnt++;
- }
-
- if (opp_cnt > 1)
- pipe_ctx->stream_res.tg->funcs->set_odm_combine(
- pipe_ctx->stream_res.tg,
- opp_inst, opp_cnt,
- &pipe_ctx->stream->timing);
-
- /* HW program guide assume display already disable
- * by unplug sequence. OTG assume stop.
- */
- pipe_ctx->stream_res.tg->funcs->enable_optc_clock(pipe_ctx->stream_res.tg, true);
-
- if (false == pipe_ctx->clock_source->funcs->program_pix_clk(
- pipe_ctx->clock_source,
- &pipe_ctx->stream_res.pix_clk_params,
- &pipe_ctx->pll_settings)) {
- BREAK_TO_DEBUGGER();
- return DC_ERROR_UNEXPECTED;
- }
-
- if (dc->hwseq->funcs.PLAT_58856_wa && (!dc_is_dp_signal(stream->signal)))
- dc->hwseq->funcs.PLAT_58856_wa(context, pipe_ctx);
-
- pipe_ctx->stream_res.tg->funcs->program_timing(
- pipe_ctx->stream_res.tg,
- &stream->timing,
- pipe_ctx->pipe_dlg_param.vready_offset,
- pipe_ctx->pipe_dlg_param.vstartup_start,
- pipe_ctx->pipe_dlg_param.vupdate_offset,
- pipe_ctx->pipe_dlg_param.vupdate_width,
- pipe_ctx->stream->signal,
- true);
-
- rate_control_2x_pclk = rate_control_2x_pclk || opp_cnt > 1;
- flow_control.flow_ctrl_mode = 0;
- flow_control.flow_ctrl_cnt0 = 0x80;
- flow_control.flow_ctrl_cnt1 = calc_mpc_flow_ctrl_cnt(stream, opp_cnt);
- if (mpc->funcs->set_out_rate_control) {
- for (i = 0; i < opp_cnt; ++i) {
- mpc->funcs->set_out_rate_control(
- mpc, opp_inst[i],
- true,
- rate_control_2x_pclk,
- &flow_control);
- }
- }
-
- for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
- odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control(
- odm_pipe->stream_res.opp,
- true);
-
- pipe_ctx->stream_res.opp->funcs->opp_pipe_clock_control(
- pipe_ctx->stream_res.opp,
- true);
-
- hws->funcs.blank_pixel_data(dc, pipe_ctx, true);
-
- /* VTG is within DCHUB command block. DCFCLK is always on */
- if (false == pipe_ctx->stream_res.tg->funcs->enable_crtc(pipe_ctx->stream_res.tg)) {
- BREAK_TO_DEBUGGER();
- return DC_ERROR_UNEXPECTED;
- }
-
- hws->funcs.wait_for_blank_complete(pipe_ctx->stream_res.opp);
-
- params.vertical_total_min = stream->adjust.v_total_min;
- params.vertical_total_max = stream->adjust.v_total_max;
- params.vertical_total_mid = stream->adjust.v_total_mid;
- params.vertical_total_mid_frame_num = stream->adjust.v_total_mid_frame_num;
- if (pipe_ctx->stream_res.tg->funcs->set_drr)
- pipe_ctx->stream_res.tg->funcs->set_drr(
- pipe_ctx->stream_res.tg, &params);
-
- // DRR should set trigger event to monitor surface update event
- if (stream->adjust.v_total_min != 0 && stream->adjust.v_total_max != 0)
- event_triggers = 0x80;
- /* Event triggers and num frames initialized for DRR, but can be
- * later updated for PSR use. Note DRR trigger events are generated
- * regardless of whether num frames met.
- */
- if (pipe_ctx->stream_res.tg->funcs->set_static_screen_control)
- pipe_ctx->stream_res.tg->funcs->set_static_screen_control(
- pipe_ctx->stream_res.tg, event_triggers, 2);
-
- /* TODO program crtc source select for non-virtual signal*/
- /* TODO program FMT */
- /* TODO setup link_enc */
- /* TODO set stream attributes */
- /* TODO program audio */
- /* TODO enable stream if timing changed */
- /* TODO unblank stream if DP */
-
- return DC_OK;
-}
-
-void dcn20_program_output_csc(struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- enum dc_color_space colorspace,
- uint16_t *matrix,
- int opp_id)
-{
- struct mpc *mpc = dc->res_pool->mpc;
- enum mpc_output_csc_mode ocsc_mode = MPC_OUTPUT_CSC_COEF_A;
- int mpcc_id = pipe_ctx->plane_res.hubp->inst;
-
- if (mpc->funcs->power_on_mpc_mem_pwr)
- mpc->funcs->power_on_mpc_mem_pwr(mpc, mpcc_id, true);
-
- if (pipe_ctx->stream->csc_color_matrix.enable_adjustment == true) {
- if (mpc->funcs->set_output_csc != NULL)
- mpc->funcs->set_output_csc(mpc,
- opp_id,
- matrix,
- ocsc_mode);
- } else {
- if (mpc->funcs->set_ocsc_default != NULL)
- mpc->funcs->set_ocsc_default(mpc,
- opp_id,
- colorspace,
- ocsc_mode);
- }
-}
-
-bool dcn20_set_output_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx,
- const struct dc_stream_state *stream)
-{
- int mpcc_id = pipe_ctx->plane_res.hubp->inst;
- struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc;
- struct pwl_params *params = NULL;
- /*
- * program OGAM only for the top pipe
- * if there is a pipe split then fix diagnostic is required:
- * how to pass OGAM parameter for stream.
- * if programming for all pipes is required then remove condition
- * pipe_ctx->top_pipe == NULL ,but then fix the diagnostic.
- */
- if (mpc->funcs->power_on_mpc_mem_pwr)
- mpc->funcs->power_on_mpc_mem_pwr(mpc, mpcc_id, true);
- if (pipe_ctx->top_pipe == NULL
- && mpc->funcs->set_output_gamma && stream->out_transfer_func) {
- if (stream->out_transfer_func->type == TF_TYPE_HWPWL)
- params = &stream->out_transfer_func->pwl;
- else if (pipe_ctx->stream->out_transfer_func->type ==
- TF_TYPE_DISTRIBUTED_POINTS &&
- cm_helper_translate_curve_to_hw_format(
- stream->out_transfer_func,
- &mpc->blender_params, false))
- params = &mpc->blender_params;
- /*
- * there is no ROM
- */
- if (stream->out_transfer_func->type == TF_TYPE_PREDEFINED)
- BREAK_TO_DEBUGGER();
- }
- /*
- * if above if is not executed then 'params' equal to 0 and set in bypass
- */
- mpc->funcs->set_output_gamma(mpc, mpcc_id, params);
-
- return true;
-}
-
-bool dcn20_set_blend_lut(
- struct pipe_ctx *pipe_ctx, const struct dc_plane_state *plane_state)
-{
- struct dpp *dpp_base = pipe_ctx->plane_res.dpp;
- bool result = true;
- struct pwl_params *blend_lut = NULL;
-
- if (plane_state->blend_tf) {
- if (plane_state->blend_tf->type == TF_TYPE_HWPWL)
- blend_lut = &plane_state->blend_tf->pwl;
- else if (plane_state->blend_tf->type == TF_TYPE_DISTRIBUTED_POINTS) {
- cm_helper_translate_curve_to_hw_format(
- plane_state->blend_tf,
- &dpp_base->regamma_params, false);
- blend_lut = &dpp_base->regamma_params;
- }
- }
- result = dpp_base->funcs->dpp_program_blnd_lut(dpp_base, blend_lut);
-
- return result;
-}
-
-bool dcn20_set_shaper_3dlut(
- struct pipe_ctx *pipe_ctx, const struct dc_plane_state *plane_state)
-{
- struct dpp *dpp_base = pipe_ctx->plane_res.dpp;
- bool result = true;
- struct pwl_params *shaper_lut = NULL;
-
- if (plane_state->in_shaper_func) {
- if (plane_state->in_shaper_func->type == TF_TYPE_HWPWL)
- shaper_lut = &plane_state->in_shaper_func->pwl;
- else if (plane_state->in_shaper_func->type == TF_TYPE_DISTRIBUTED_POINTS) {
- cm_helper_translate_curve_to_hw_format(
- plane_state->in_shaper_func,
- &dpp_base->shaper_params, true);
- shaper_lut = &dpp_base->shaper_params;
- }
- }
-
- result = dpp_base->funcs->dpp_program_shaper_lut(dpp_base, shaper_lut);
- if (plane_state->lut3d_func &&
- plane_state->lut3d_func->state.bits.initialized == 1)
- result = dpp_base->funcs->dpp_program_3dlut(dpp_base,
- &plane_state->lut3d_func->lut_3d);
- else
- result = dpp_base->funcs->dpp_program_3dlut(dpp_base, NULL);
-
- return result;
-}
-
-bool dcn20_set_input_transfer_func(struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- const struct dc_plane_state *plane_state)
-{
- struct dce_hwseq *hws = dc->hwseq;
- struct dpp *dpp_base = pipe_ctx->plane_res.dpp;
- const struct dc_transfer_func *tf = NULL;
- bool result = true;
- bool use_degamma_ram = false;
-
- if (dpp_base == NULL || plane_state == NULL)
- return false;
-
- hws->funcs.set_shaper_3dlut(pipe_ctx, plane_state);
- hws->funcs.set_blend_lut(pipe_ctx, plane_state);
-
- if (plane_state->in_transfer_func)
- tf = plane_state->in_transfer_func;
-
-
- if (tf == NULL) {
- dpp_base->funcs->dpp_set_degamma(dpp_base,
- IPP_DEGAMMA_MODE_BYPASS);
- return true;
- }
-
- if (tf->type == TF_TYPE_HWPWL || tf->type == TF_TYPE_DISTRIBUTED_POINTS)
- use_degamma_ram = true;
-
- if (use_degamma_ram == true) {
- if (tf->type == TF_TYPE_HWPWL)
- dpp_base->funcs->dpp_program_degamma_pwl(dpp_base,
- &tf->pwl);
- else if (tf->type == TF_TYPE_DISTRIBUTED_POINTS) {
- cm_helper_translate_curve_to_degamma_hw_format(tf,
- &dpp_base->degamma_params);
- dpp_base->funcs->dpp_program_degamma_pwl(dpp_base,
- &dpp_base->degamma_params);
- }
- return true;
- }
- /* handle here the optimized cases when de-gamma ROM could be used.
- *
- */
- if (tf->type == TF_TYPE_PREDEFINED) {
- switch (tf->tf) {
- case TRANSFER_FUNCTION_SRGB:
- dpp_base->funcs->dpp_set_degamma(dpp_base,
- IPP_DEGAMMA_MODE_HW_sRGB);
- break;
- case TRANSFER_FUNCTION_BT709:
- dpp_base->funcs->dpp_set_degamma(dpp_base,
- IPP_DEGAMMA_MODE_HW_xvYCC);
- break;
- case TRANSFER_FUNCTION_LINEAR:
- dpp_base->funcs->dpp_set_degamma(dpp_base,
- IPP_DEGAMMA_MODE_BYPASS);
- break;
- case TRANSFER_FUNCTION_PQ:
- dpp_base->funcs->dpp_set_degamma(dpp_base, IPP_DEGAMMA_MODE_USER_PWL);
- cm_helper_translate_curve_to_degamma_hw_format(tf, &dpp_base->degamma_params);
- dpp_base->funcs->dpp_program_degamma_pwl(dpp_base, &dpp_base->degamma_params);
- result = true;
- break;
- default:
- result = false;
- break;
- }
- } else if (tf->type == TF_TYPE_BYPASS)
- dpp_base->funcs->dpp_set_degamma(dpp_base,
- IPP_DEGAMMA_MODE_BYPASS);
- else {
- /*
- * if we are here, we did not handle correctly.
- * fix is required for this use case
- */
- BREAK_TO_DEBUGGER();
- dpp_base->funcs->dpp_set_degamma(dpp_base,
- IPP_DEGAMMA_MODE_BYPASS);
- }
-
- return result;
-}
-
-void dcn20_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx)
-{
- struct pipe_ctx *odm_pipe;
- int opp_cnt = 1;
- int opp_inst[MAX_PIPES] = { pipe_ctx->stream_res.opp->inst };
-
- for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
- opp_inst[opp_cnt] = odm_pipe->stream_res.opp->inst;
- opp_cnt++;
- }
-
- if (opp_cnt > 1)
- pipe_ctx->stream_res.tg->funcs->set_odm_combine(
- pipe_ctx->stream_res.tg,
- opp_inst, opp_cnt,
- &pipe_ctx->stream->timing);
- else
- pipe_ctx->stream_res.tg->funcs->set_odm_bypass(
- pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
-}
-
-void dcn20_blank_pixel_data(
- struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- bool blank)
-{
- struct tg_color black_color = {0};
- struct stream_resource *stream_res = &pipe_ctx->stream_res;
- struct dc_stream_state *stream = pipe_ctx->stream;
- enum dc_color_space color_space = stream->output_color_space;
- enum controller_dp_test_pattern test_pattern = CONTROLLER_DP_TEST_PATTERN_SOLID_COLOR;
- enum controller_dp_color_space test_pattern_color_space = CONTROLLER_DP_COLOR_SPACE_UDEFINED;
- struct pipe_ctx *odm_pipe;
- int odm_cnt = 1;
-
- int width = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right;
- int height = stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top;
-
- if (stream->link->test_pattern_enabled)
- return;
-
- /* get opp dpg blank color */
- color_space_to_black_color(dc, color_space, &black_color);
-
- for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
- odm_cnt++;
-
- width = width / odm_cnt;
-
- if (blank) {
- dc->hwss.set_abm_immediate_disable(pipe_ctx);
-
- if (dc->debug.visual_confirm != VISUAL_CONFIRM_DISABLE) {
- test_pattern = CONTROLLER_DP_TEST_PATTERN_COLORSQUARES;
- test_pattern_color_space = CONTROLLER_DP_COLOR_SPACE_RGB;
- }
- } else {
- test_pattern = CONTROLLER_DP_TEST_PATTERN_VIDEOMODE;
- }
-
- dc->hwss.set_disp_pattern_generator(dc,
- pipe_ctx,
- test_pattern,
- test_pattern_color_space,
- stream->timing.display_color_depth,
- &black_color,
- width,
- height,
- 0);
-
- for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
- dc->hwss.set_disp_pattern_generator(dc,
- odm_pipe,
- dc->debug.visual_confirm != VISUAL_CONFIRM_DISABLE && blank ?
- CONTROLLER_DP_TEST_PATTERN_COLORRAMP : test_pattern,
- test_pattern_color_space,
- stream->timing.display_color_depth,
- &black_color,
- width,
- height,
- 0);
- }
-
- if (!blank)
- if (stream_res->abm) {
- dc->hwss.set_pipe(pipe_ctx);
- stream_res->abm->funcs->set_abm_level(stream_res->abm, stream->abm_level);
- }
-}
-
-
-static void dcn20_power_on_plane(
- struct dce_hwseq *hws,
- struct pipe_ctx *pipe_ctx)
-{
- DC_LOGGER_INIT(hws->ctx->logger);
- if (REG(DC_IP_REQUEST_CNTL)) {
- REG_SET(DC_IP_REQUEST_CNTL, 0,
- IP_REQUEST_EN, 1);
-
- if (hws->funcs.dpp_pg_control)
- hws->funcs.dpp_pg_control(hws, pipe_ctx->plane_res.dpp->inst, true);
-
- if (hws->funcs.hubp_pg_control)
- hws->funcs.hubp_pg_control(hws, pipe_ctx->plane_res.hubp->inst, true);
-
- REG_SET(DC_IP_REQUEST_CNTL, 0,
- IP_REQUEST_EN, 0);
- DC_LOG_DEBUG(
- "Un-gated front end for pipe %d\n", pipe_ctx->plane_res.hubp->inst);
- }
-}
-
-void dcn20_enable_plane(
- struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- struct dc_state *context)
-{
- //if (dc->debug.sanity_checks) {
- // dcn10_verify_allow_pstate_change_high(dc);
- //}
- dcn20_power_on_plane(dc->hwseq, pipe_ctx);
-
- /* enable DCFCLK current DCHUB */
- pipe_ctx->plane_res.hubp->funcs->hubp_clk_cntl(pipe_ctx->plane_res.hubp, true);
-
- /* initialize HUBP on power up */
- pipe_ctx->plane_res.hubp->funcs->hubp_init(pipe_ctx->plane_res.hubp);
-
- /* make sure OPP_PIPE_CLOCK_EN = 1 */
- pipe_ctx->stream_res.opp->funcs->opp_pipe_clock_control(
- pipe_ctx->stream_res.opp,
- true);
-
-/* TODO: enable/disable in dm as per update type.
- if (plane_state) {
- DC_LOG_DC(dc->ctx->logger,
- "Pipe:%d 0x%x: addr hi:0x%x, "
- "addr low:0x%x, "
- "src: %d, %d, %d,"
- " %d; dst: %d, %d, %d, %d;\n",
- pipe_ctx->pipe_idx,
- plane_state,
- plane_state->address.grph.addr.high_part,
- plane_state->address.grph.addr.low_part,
- plane_state->src_rect.x,
- plane_state->src_rect.y,
- plane_state->src_rect.width,
- plane_state->src_rect.height,
- plane_state->dst_rect.x,
- plane_state->dst_rect.y,
- plane_state->dst_rect.width,
- plane_state->dst_rect.height);
-
- DC_LOG_DC(dc->ctx->logger,
- "Pipe %d: width, height, x, y format:%d\n"
- "viewport:%d, %d, %d, %d\n"
- "recout: %d, %d, %d, %d\n",
- pipe_ctx->pipe_idx,
- plane_state->format,
- pipe_ctx->plane_res.scl_data.viewport.width,
- pipe_ctx->plane_res.scl_data.viewport.height,
- pipe_ctx->plane_res.scl_data.viewport.x,
- pipe_ctx->plane_res.scl_data.viewport.y,
- pipe_ctx->plane_res.scl_data.recout.width,
- pipe_ctx->plane_res.scl_data.recout.height,
- pipe_ctx->plane_res.scl_data.recout.x,
- pipe_ctx->plane_res.scl_data.recout.y);
- print_rq_dlg_ttu(dc, pipe_ctx);
- }
-*/
- if (dc->vm_pa_config.valid) {
- struct vm_system_aperture_param apt;
-
- apt.sys_default.quad_part = 0;
-
- apt.sys_low.quad_part = dc->vm_pa_config.system_aperture.start_addr;
- apt.sys_high.quad_part = dc->vm_pa_config.system_aperture.end_addr;
-
- // Program system aperture settings
- pipe_ctx->plane_res.hubp->funcs->hubp_set_vm_system_aperture_settings(pipe_ctx->plane_res.hubp, &apt);
- }
-
- if (!pipe_ctx->top_pipe
- && pipe_ctx->plane_state
- && pipe_ctx->plane_state->flip_int_enabled
- && pipe_ctx->plane_res.hubp->funcs->hubp_set_flip_int)
- pipe_ctx->plane_res.hubp->funcs->hubp_set_flip_int(pipe_ctx->plane_res.hubp);
-
-// if (dc->debug.sanity_checks) {
-// dcn10_verify_allow_pstate_change_high(dc);
-// }
-}
-
-void dcn20_pipe_control_lock(
- struct dc *dc,
- struct pipe_ctx *pipe,
- bool lock)
-{
- struct pipe_ctx *temp_pipe;
- bool flip_immediate = false;
-
- /* use TG master update lock to lock everything on the TG
- * therefore only top pipe need to lock
- */
- if (!pipe || pipe->top_pipe)
- return;
-
- if (pipe->plane_state != NULL)
- flip_immediate = pipe->plane_state->flip_immediate;
-
- if (pipe->stream_res.gsl_group > 0) {
- temp_pipe = pipe->bottom_pipe;
- while (!flip_immediate && temp_pipe) {
- if (temp_pipe->plane_state != NULL)
- flip_immediate = temp_pipe->plane_state->flip_immediate;
- temp_pipe = temp_pipe->bottom_pipe;
- }
- }
-
- if (flip_immediate && lock) {
- const int TIMEOUT_FOR_FLIP_PENDING = 100000;
- int i;
-
- temp_pipe = pipe;
- while (temp_pipe) {
- if (temp_pipe->plane_state && temp_pipe->plane_state->flip_immediate) {
- for (i = 0; i < TIMEOUT_FOR_FLIP_PENDING; ++i) {
- if (!temp_pipe->plane_res.hubp->funcs->hubp_is_flip_pending(temp_pipe->plane_res.hubp))
- break;
- udelay(1);
- }
-
- /* no reason it should take this long for immediate flips */
- ASSERT(i != TIMEOUT_FOR_FLIP_PENDING);
- }
- temp_pipe = temp_pipe->bottom_pipe;
- }
- }
-
- /* In flip immediate and pipe splitting case, we need to use GSL
- * for synchronization. Only do setup on locking and on flip type change.
- */
- if (lock && (pipe->bottom_pipe != NULL || !flip_immediate))
- if ((flip_immediate && pipe->stream_res.gsl_group == 0) ||
- (!flip_immediate && pipe->stream_res.gsl_group > 0))
- dcn20_setup_gsl_group_as_lock(dc, pipe, flip_immediate);
-
- if (pipe->plane_state != NULL)
- flip_immediate = pipe->plane_state->flip_immediate;
-
- temp_pipe = pipe->bottom_pipe;
- while (flip_immediate && temp_pipe) {
- if (temp_pipe->plane_state != NULL)
- flip_immediate = temp_pipe->plane_state->flip_immediate;
- temp_pipe = temp_pipe->bottom_pipe;
- }
-
- if (!lock && pipe->stream_res.gsl_group > 0 && pipe->plane_state &&
- !flip_immediate)
- dcn20_setup_gsl_group_as_lock(dc, pipe, false);
-
- if (pipe->stream && should_use_dmub_lock(pipe->stream->link)) {
- union dmub_hw_lock_flags hw_locks = { 0 };
- struct dmub_hw_lock_inst_flags inst_flags = { 0 };
-
- hw_locks.bits.lock_pipe = 1;
- inst_flags.otg_inst = pipe->stream_res.tg->inst;
-
- if (pipe->plane_state != NULL)
- hw_locks.bits.triple_buffer_lock = pipe->plane_state->triplebuffer_flips;
-
- dmub_hw_lock_mgr_cmd(dc->ctx->dmub_srv,
- lock,
- &hw_locks,
- &inst_flags);
- } else if (pipe->plane_state != NULL && pipe->plane_state->triplebuffer_flips) {
- if (lock)
- pipe->stream_res.tg->funcs->triplebuffer_lock(pipe->stream_res.tg);
- else
- pipe->stream_res.tg->funcs->triplebuffer_unlock(pipe->stream_res.tg);
- } else {
- if (lock)
- pipe->stream_res.tg->funcs->lock(pipe->stream_res.tg);
- else
- pipe->stream_res.tg->funcs->unlock(pipe->stream_res.tg);
- }
-}
-
-static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx *new_pipe)
-{
- new_pipe->update_flags.raw = 0;
-
- /* Exit on unchanged, unused pipe */
- if (!old_pipe->plane_state && !new_pipe->plane_state)
- return;
- /* Detect pipe enable/disable */
- if (!old_pipe->plane_state && new_pipe->plane_state) {
- new_pipe->update_flags.bits.enable = 1;
- new_pipe->update_flags.bits.mpcc = 1;
- new_pipe->update_flags.bits.dppclk = 1;
- new_pipe->update_flags.bits.hubp_interdependent = 1;
- new_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1;
- new_pipe->update_flags.bits.gamut_remap = 1;
- new_pipe->update_flags.bits.scaler = 1;
- new_pipe->update_flags.bits.viewport = 1;
- new_pipe->update_flags.bits.det_size = 1;
- if (!new_pipe->top_pipe && !new_pipe->prev_odm_pipe) {
- new_pipe->update_flags.bits.odm = 1;
- new_pipe->update_flags.bits.global_sync = 1;
- }
- return;
- }
- if (old_pipe->plane_state && !new_pipe->plane_state) {
- new_pipe->update_flags.bits.disable = 1;
- return;
- }
-
- /* Detect plane change */
- if (old_pipe->plane_state != new_pipe->plane_state) {
- new_pipe->update_flags.bits.plane_changed = true;
- }
-
- /* Detect top pipe only changes */
- if (!new_pipe->top_pipe && !new_pipe->prev_odm_pipe) {
- /* Detect odm changes */
- if ((old_pipe->next_odm_pipe && new_pipe->next_odm_pipe
- && old_pipe->next_odm_pipe->pipe_idx != new_pipe->next_odm_pipe->pipe_idx)
- || (!old_pipe->next_odm_pipe && new_pipe->next_odm_pipe)
- || (old_pipe->next_odm_pipe && !new_pipe->next_odm_pipe)
- || old_pipe->stream_res.opp != new_pipe->stream_res.opp)
- new_pipe->update_flags.bits.odm = 1;
-
- /* Detect global sync changes */
- if (old_pipe->pipe_dlg_param.vready_offset != new_pipe->pipe_dlg_param.vready_offset
- || old_pipe->pipe_dlg_param.vstartup_start != new_pipe->pipe_dlg_param.vstartup_start
- || old_pipe->pipe_dlg_param.vupdate_offset != new_pipe->pipe_dlg_param.vupdate_offset
- || old_pipe->pipe_dlg_param.vupdate_width != new_pipe->pipe_dlg_param.vupdate_width)
- new_pipe->update_flags.bits.global_sync = 1;
- }
-
- if (old_pipe->det_buffer_size_kb != new_pipe->det_buffer_size_kb)
- new_pipe->update_flags.bits.det_size = 1;
-
- /*
- * Detect opp / tg change, only set on change, not on enable
- * Assume mpcc inst = pipe index, if not this code needs to be updated
- * since mpcc is what is affected by these. In fact all of our sequence
- * makes this assumption at the moment with how hubp reset is matched to
- * same index mpcc reset.
- */
- if (old_pipe->stream_res.opp != new_pipe->stream_res.opp)
- new_pipe->update_flags.bits.opp_changed = 1;
- if (old_pipe->stream_res.tg != new_pipe->stream_res.tg)
- new_pipe->update_flags.bits.tg_changed = 1;
-
- /*
- * Detect mpcc blending changes, only dpp inst and opp matter here,
- * mpccs getting removed/inserted update connected ones during their own
- * programming
- */
- if (old_pipe->plane_res.dpp != new_pipe->plane_res.dpp
- || old_pipe->stream_res.opp != new_pipe->stream_res.opp)
- new_pipe->update_flags.bits.mpcc = 1;
-
- /* Detect dppclk change */
- if (old_pipe->plane_res.bw.dppclk_khz != new_pipe->plane_res.bw.dppclk_khz)
- new_pipe->update_flags.bits.dppclk = 1;
-
- /* Check for scl update */
- if (memcmp(&old_pipe->plane_res.scl_data, &new_pipe->plane_res.scl_data, sizeof(struct scaler_data)))
- new_pipe->update_flags.bits.scaler = 1;
- /* Check for vp update */
- if (memcmp(&old_pipe->plane_res.scl_data.viewport, &new_pipe->plane_res.scl_data.viewport, sizeof(struct rect))
- || memcmp(&old_pipe->plane_res.scl_data.viewport_c,
- &new_pipe->plane_res.scl_data.viewport_c, sizeof(struct rect)))
- new_pipe->update_flags.bits.viewport = 1;
-
- /* Detect dlg/ttu/rq updates */
- {
- struct _vcs_dpi_display_dlg_regs_st old_dlg_attr = old_pipe->dlg_regs;
- struct _vcs_dpi_display_ttu_regs_st old_ttu_attr = old_pipe->ttu_regs;
- struct _vcs_dpi_display_dlg_regs_st *new_dlg_attr = &new_pipe->dlg_regs;
- struct _vcs_dpi_display_ttu_regs_st *new_ttu_attr = &new_pipe->ttu_regs;
-
- /* Detect pipe interdependent updates */
- if (old_dlg_attr.dst_y_prefetch != new_dlg_attr->dst_y_prefetch ||
- old_dlg_attr.vratio_prefetch != new_dlg_attr->vratio_prefetch ||
- old_dlg_attr.vratio_prefetch_c != new_dlg_attr->vratio_prefetch_c ||
- old_dlg_attr.dst_y_per_vm_vblank != new_dlg_attr->dst_y_per_vm_vblank ||
- old_dlg_attr.dst_y_per_row_vblank != new_dlg_attr->dst_y_per_row_vblank ||
- old_dlg_attr.dst_y_per_vm_flip != new_dlg_attr->dst_y_per_vm_flip ||
- old_dlg_attr.dst_y_per_row_flip != new_dlg_attr->dst_y_per_row_flip ||
- old_dlg_attr.refcyc_per_meta_chunk_vblank_l != new_dlg_attr->refcyc_per_meta_chunk_vblank_l ||
- old_dlg_attr.refcyc_per_meta_chunk_vblank_c != new_dlg_attr->refcyc_per_meta_chunk_vblank_c ||
- old_dlg_attr.refcyc_per_meta_chunk_flip_l != new_dlg_attr->refcyc_per_meta_chunk_flip_l ||
- old_dlg_attr.refcyc_per_line_delivery_pre_l != new_dlg_attr->refcyc_per_line_delivery_pre_l ||
- old_dlg_attr.refcyc_per_line_delivery_pre_c != new_dlg_attr->refcyc_per_line_delivery_pre_c ||
- old_ttu_attr.refcyc_per_req_delivery_pre_l != new_ttu_attr->refcyc_per_req_delivery_pre_l ||
- old_ttu_attr.refcyc_per_req_delivery_pre_c != new_ttu_attr->refcyc_per_req_delivery_pre_c ||
- old_ttu_attr.refcyc_per_req_delivery_pre_cur0 != new_ttu_attr->refcyc_per_req_delivery_pre_cur0 ||
- old_ttu_attr.refcyc_per_req_delivery_pre_cur1 != new_ttu_attr->refcyc_per_req_delivery_pre_cur1 ||
- old_ttu_attr.min_ttu_vblank != new_ttu_attr->min_ttu_vblank ||
- old_ttu_attr.qos_level_flip != new_ttu_attr->qos_level_flip) {
- old_dlg_attr.dst_y_prefetch = new_dlg_attr->dst_y_prefetch;
- old_dlg_attr.vratio_prefetch = new_dlg_attr->vratio_prefetch;
- old_dlg_attr.vratio_prefetch_c = new_dlg_attr->vratio_prefetch_c;
- old_dlg_attr.dst_y_per_vm_vblank = new_dlg_attr->dst_y_per_vm_vblank;
- old_dlg_attr.dst_y_per_row_vblank = new_dlg_attr->dst_y_per_row_vblank;
- old_dlg_attr.dst_y_per_vm_flip = new_dlg_attr->dst_y_per_vm_flip;
- old_dlg_attr.dst_y_per_row_flip = new_dlg_attr->dst_y_per_row_flip;
- old_dlg_attr.refcyc_per_meta_chunk_vblank_l = new_dlg_attr->refcyc_per_meta_chunk_vblank_l;
- old_dlg_attr.refcyc_per_meta_chunk_vblank_c = new_dlg_attr->refcyc_per_meta_chunk_vblank_c;
- old_dlg_attr.refcyc_per_meta_chunk_flip_l = new_dlg_attr->refcyc_per_meta_chunk_flip_l;
- old_dlg_attr.refcyc_per_line_delivery_pre_l = new_dlg_attr->refcyc_per_line_delivery_pre_l;
- old_dlg_attr.refcyc_per_line_delivery_pre_c = new_dlg_attr->refcyc_per_line_delivery_pre_c;
- old_ttu_attr.refcyc_per_req_delivery_pre_l = new_ttu_attr->refcyc_per_req_delivery_pre_l;
- old_ttu_attr.refcyc_per_req_delivery_pre_c = new_ttu_attr->refcyc_per_req_delivery_pre_c;
- old_ttu_attr.refcyc_per_req_delivery_pre_cur0 = new_ttu_attr->refcyc_per_req_delivery_pre_cur0;
- old_ttu_attr.refcyc_per_req_delivery_pre_cur1 = new_ttu_attr->refcyc_per_req_delivery_pre_cur1;
- old_ttu_attr.min_ttu_vblank = new_ttu_attr->min_ttu_vblank;
- old_ttu_attr.qos_level_flip = new_ttu_attr->qos_level_flip;
- new_pipe->update_flags.bits.hubp_interdependent = 1;
- }
- /* Detect any other updates to ttu/rq/dlg */
- if (memcmp(&old_dlg_attr, &new_pipe->dlg_regs, sizeof(old_dlg_attr)) ||
- memcmp(&old_ttu_attr, &new_pipe->ttu_regs, sizeof(old_ttu_attr)) ||
- memcmp(&old_pipe->rq_regs, &new_pipe->rq_regs, sizeof(old_pipe->rq_regs)))
- new_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1;
- }
-}
-
-static void dcn20_update_dchubp_dpp(
- struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- struct dc_state *context)
-{
- struct dce_hwseq *hws = dc->hwseq;
- struct hubp *hubp = pipe_ctx->plane_res.hubp;
- struct dpp *dpp = pipe_ctx->plane_res.dpp;
- struct dc_plane_state *plane_state = pipe_ctx->plane_state;
- bool viewport_changed = false;
-
- if (pipe_ctx->update_flags.bits.dppclk)
- dpp->funcs->dpp_dppclk_control(dpp, false, true);
-
- /* TODO: Need input parameter to tell current DCHUB pipe tie to which OTG
- * VTG is within DCHUBBUB which is commond block share by each pipe HUBP.
- * VTG is 1:1 mapping with OTG. Each pipe HUBP will select which VTG
- */
- if (pipe_ctx->update_flags.bits.hubp_rq_dlg_ttu) {
- hubp->funcs->hubp_vtg_sel(hubp, pipe_ctx->stream_res.tg->inst);
-
- hubp->funcs->hubp_setup(
- hubp,
- &pipe_ctx->dlg_regs,
- &pipe_ctx->ttu_regs,
- &pipe_ctx->rq_regs,
- &pipe_ctx->pipe_dlg_param);
-
- if (hubp->funcs->set_unbounded_requesting)
- hubp->funcs->set_unbounded_requesting(hubp, pipe_ctx->unbounded_req);
- }
- if (pipe_ctx->update_flags.bits.hubp_interdependent)
- hubp->funcs->hubp_setup_interdependent(
- hubp,
- &pipe_ctx->dlg_regs,
- &pipe_ctx->ttu_regs);
-
- if (pipe_ctx->update_flags.bits.enable ||
- pipe_ctx->update_flags.bits.plane_changed ||
- plane_state->update_flags.bits.bpp_change ||
- plane_state->update_flags.bits.input_csc_change ||
- plane_state->update_flags.bits.color_space_change ||
- plane_state->update_flags.bits.coeff_reduction_change) {
- struct dc_bias_and_scale bns_params = {0};
-
- // program the input csc
- dpp->funcs->dpp_setup(dpp,
- plane_state->format,
- EXPANSION_MODE_ZERO,
- plane_state->input_csc_color_matrix,
- plane_state->color_space,
- NULL);
-
- if (dpp->funcs->dpp_program_bias_and_scale) {
- //TODO :for CNVC set scale and bias registers if necessary
- build_prescale_params(&bns_params, plane_state);
- dpp->funcs->dpp_program_bias_and_scale(dpp, &bns_params);
- }
- }
-
- if (pipe_ctx->update_flags.bits.mpcc
- || pipe_ctx->update_flags.bits.plane_changed
- || plane_state->update_flags.bits.global_alpha_change
- || plane_state->update_flags.bits.per_pixel_alpha_change) {
- // MPCC inst is equal to pipe index in practice
- int mpcc_inst = hubp->inst;
- int opp_inst;
- int opp_count = dc->res_pool->pipe_count;
-
- for (opp_inst = 0; opp_inst < opp_count; opp_inst++) {
- if (dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst]) {
- dc->res_pool->mpc->funcs->wait_for_idle(dc->res_pool->mpc, mpcc_inst);
- dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst] = false;
- break;
- }
- }
- hws->funcs.update_mpcc(dc, pipe_ctx);
- }
-
- if (pipe_ctx->update_flags.bits.scaler ||
- plane_state->update_flags.bits.scaling_change ||
- plane_state->update_flags.bits.position_change ||
- plane_state->update_flags.bits.per_pixel_alpha_change ||
- pipe_ctx->stream->update_flags.bits.scaling) {
- pipe_ctx->plane_res.scl_data.lb_params.alpha_en = pipe_ctx->plane_state->per_pixel_alpha;
- ASSERT(pipe_ctx->plane_res.scl_data.lb_params.depth == LB_PIXEL_DEPTH_36BPP);
- /* scaler configuration */
- pipe_ctx->plane_res.dpp->funcs->dpp_set_scaler(
- pipe_ctx->plane_res.dpp, &pipe_ctx->plane_res.scl_data);
- }
-
- if (pipe_ctx->update_flags.bits.viewport ||
- (context == dc->current_state && plane_state->update_flags.bits.position_change) ||
- (context == dc->current_state && plane_state->update_flags.bits.scaling_change) ||
- (context == dc->current_state && pipe_ctx->stream->update_flags.bits.scaling)) {
-
- hubp->funcs->mem_program_viewport(
- hubp,
- &pipe_ctx->plane_res.scl_data.viewport,
- &pipe_ctx->plane_res.scl_data.viewport_c);
- viewport_changed = true;
- }
-
- /* Any updates are handled in dc interface, just need to apply existing for plane enable */
- if ((pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.opp_changed ||
- pipe_ctx->update_flags.bits.scaler || viewport_changed == true) &&
- pipe_ctx->stream->cursor_attributes.address.quad_part != 0) {
- dc->hwss.set_cursor_position(pipe_ctx);
- dc->hwss.set_cursor_attribute(pipe_ctx);
-
- if (dc->hwss.set_cursor_sdr_white_level)
- dc->hwss.set_cursor_sdr_white_level(pipe_ctx);
- }
-
- /* Any updates are handled in dc interface, just need
- * to apply existing for plane enable / opp change */
- if (pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.opp_changed
- || pipe_ctx->stream->update_flags.bits.gamut_remap
- || pipe_ctx->stream->update_flags.bits.out_csc) {
- /* dpp/cm gamut remap*/
- dc->hwss.program_gamut_remap(pipe_ctx);
-
- /*call the dcn2 method which uses mpc csc*/
- dc->hwss.program_output_csc(dc,
- pipe_ctx,
- pipe_ctx->stream->output_color_space,
- pipe_ctx->stream->csc_color_matrix.matrix,
- hubp->opp_id);
- }
-
- if (pipe_ctx->update_flags.bits.enable ||
- pipe_ctx->update_flags.bits.plane_changed ||
- pipe_ctx->update_flags.bits.opp_changed ||
- plane_state->update_flags.bits.pixel_format_change ||
- plane_state->update_flags.bits.horizontal_mirror_change ||
- plane_state->update_flags.bits.rotation_change ||
- plane_state->update_flags.bits.swizzle_change ||
- plane_state->update_flags.bits.dcc_change ||
- plane_state->update_flags.bits.bpp_change ||
- plane_state->update_flags.bits.scaling_change ||
- plane_state->update_flags.bits.plane_size_change) {
- struct plane_size size = plane_state->plane_size;
-
- size.surface_size = pipe_ctx->plane_res.scl_data.viewport;
- hubp->funcs->hubp_program_surface_config(
- hubp,
- plane_state->format,
- &plane_state->tiling_info,
- &size,
- plane_state->rotation,
- &plane_state->dcc,
- plane_state->horizontal_mirror,
- 0);
- hubp->power_gated = false;
- }
-
- if (pipe_ctx->update_flags.bits.enable ||
- pipe_ctx->update_flags.bits.plane_changed ||
- plane_state->update_flags.bits.addr_update)
- hws->funcs.update_plane_addr(dc, pipe_ctx);
-
-
-
- if (pipe_ctx->update_flags.bits.enable)
- hubp->funcs->set_blank(hubp, false);
-}
-
-
-static void dcn20_program_pipe(
- struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- struct dc_state *context)
-{
- struct dce_hwseq *hws = dc->hwseq;
- /* Only need to unblank on top pipe */
- if ((pipe_ctx->update_flags.bits.enable || pipe_ctx->stream->update_flags.bits.abm_level)
- && !pipe_ctx->top_pipe && !pipe_ctx->prev_odm_pipe)
- hws->funcs.blank_pixel_data(dc, pipe_ctx, !pipe_ctx->plane_state->visible);
-
- /* Only update TG on top pipe */
- if (pipe_ctx->update_flags.bits.global_sync && !pipe_ctx->top_pipe
- && !pipe_ctx->prev_odm_pipe) {
-
- pipe_ctx->stream_res.tg->funcs->program_global_sync(
- pipe_ctx->stream_res.tg,
- pipe_ctx->pipe_dlg_param.vready_offset,
- pipe_ctx->pipe_dlg_param.vstartup_start,
- pipe_ctx->pipe_dlg_param.vupdate_offset,
- pipe_ctx->pipe_dlg_param.vupdate_width);
-
- pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VBLANK);
- pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE);
-
- pipe_ctx->stream_res.tg->funcs->set_vtg_params(
- pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true);
-
- if (hws->funcs.setup_vupdate_interrupt)
- hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx);
- }
-
- if (pipe_ctx->update_flags.bits.odm)
- hws->funcs.update_odm(dc, context, pipe_ctx);
-
- if (pipe_ctx->update_flags.bits.enable) {
- dcn20_enable_plane(dc, pipe_ctx, context);
- if (dc->res_pool->hubbub->funcs->force_wm_propagate_to_pipes)
- dc->res_pool->hubbub->funcs->force_wm_propagate_to_pipes(dc->res_pool->hubbub);
- }
-
- if (dc->res_pool->hubbub->funcs->program_det_size && pipe_ctx->update_flags.bits.det_size)
- dc->res_pool->hubbub->funcs->program_det_size(
- dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->det_buffer_size_kb);
-
- if (pipe_ctx->update_flags.raw || pipe_ctx->plane_state->update_flags.raw || pipe_ctx->stream->update_flags.raw)
- dcn20_update_dchubp_dpp(dc, pipe_ctx, context);
-
- if (pipe_ctx->update_flags.bits.enable
- || pipe_ctx->plane_state->update_flags.bits.hdr_mult)
- hws->funcs.set_hdr_multiplier(pipe_ctx);
-
- if (pipe_ctx->update_flags.bits.enable ||
- pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change ||
- pipe_ctx->plane_state->update_flags.bits.gamma_change)
- hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state);
-
- /* dcn10_translate_regamma_to_hw_format takes 750us to finish
- * only do gamma programming for powering on, internal memcmp to avoid
- * updating on slave planes
- */
- if (pipe_ctx->update_flags.bits.enable || pipe_ctx->stream->update_flags.bits.out_tf)
- hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream);
-
- /* If the pipe has been enabled or has a different opp, we
- * should reprogram the fmt. This deals with cases where
- * interation between mpc and odm combine on different streams
- * causes a different pipe to be chosen to odm combine with.
- */
- if (pipe_ctx->update_flags.bits.enable
- || pipe_ctx->update_flags.bits.opp_changed) {
-
- pipe_ctx->stream_res.opp->funcs->opp_set_dyn_expansion(
- pipe_ctx->stream_res.opp,
- COLOR_SPACE_YCBCR601,
- pipe_ctx->stream->timing.display_color_depth,
- pipe_ctx->stream->signal);
-
- pipe_ctx->stream_res.opp->funcs->opp_program_fmt(
- pipe_ctx->stream_res.opp,
- &pipe_ctx->stream->bit_depth_params,
- &pipe_ctx->stream->clamping);
- }
-}
-
-void dcn20_program_front_end_for_ctx(
- struct dc *dc,
- struct dc_state *context)
-{
- int i;
- struct dce_hwseq *hws = dc->hwseq;
- DC_LOGGER_INIT(dc->ctx->logger);
-
- /* Carry over GSL groups in case the context is changing. */
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
- struct pipe_ctx *old_pipe_ctx =
- &dc->current_state->res_ctx.pipe_ctx[i];
-
- if (pipe_ctx->stream == old_pipe_ctx->stream)
- pipe_ctx->stream_res.gsl_group =
- old_pipe_ctx->stream_res.gsl_group;
- }
-
- if (dc->hwss.program_triplebuffer != NULL && dc->debug.enable_tri_buf) {
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-
- if (!pipe_ctx->top_pipe && !pipe_ctx->prev_odm_pipe && pipe_ctx->plane_state) {
- ASSERT(!pipe_ctx->plane_state->triplebuffer_flips);
- /*turn off triple buffer for full update*/
- dc->hwss.program_triplebuffer(
- dc, pipe_ctx, pipe_ctx->plane_state->triplebuffer_flips);
- }
- }
- }
-
- /* Set pipe update flags and lock pipes */
- for (i = 0; i < dc->res_pool->pipe_count; i++)
- dcn20_detect_pipe_changes(&dc->current_state->res_ctx.pipe_ctx[i],
- &context->res_ctx.pipe_ctx[i]);
-
- /* OTG blank before disabling all front ends */
- for (i = 0; i < dc->res_pool->pipe_count; i++)
- if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable
- && !context->res_ctx.pipe_ctx[i].top_pipe
- && !context->res_ctx.pipe_ctx[i].prev_odm_pipe
- && context->res_ctx.pipe_ctx[i].stream)
- hws->funcs.blank_pixel_data(dc, &context->res_ctx.pipe_ctx[i], true);
-
-
- /* Disconnect mpcc */
- for (i = 0; i < dc->res_pool->pipe_count; i++)
- if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable
- || context->res_ctx.pipe_ctx[i].update_flags.bits.opp_changed) {
- struct hubbub *hubbub = dc->res_pool->hubbub;
-
- if (hubbub->funcs->program_det_size && context->res_ctx.pipe_ctx[i].update_flags.bits.disable)
- hubbub->funcs->program_det_size(hubbub, dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0);
- hws->funcs.plane_atomic_disconnect(dc, &dc->current_state->res_ctx.pipe_ctx[i]);
- DC_LOG_DC("Reset mpcc for pipe %d\n", dc->current_state->res_ctx.pipe_ctx[i].pipe_idx);
- }
-
- /*
- * Program all updated pipes, order matters for mpcc setup. Start with
- * top pipe and program all pipes that follow in order
- */
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
- if (pipe->plane_state && !pipe->top_pipe) {
- while (pipe) {
- if (hws->funcs.program_pipe)
- hws->funcs.program_pipe(dc, pipe, context);
- else
- dcn20_program_pipe(dc, pipe, context);
-
- pipe = pipe->bottom_pipe;
- }
- }
- /* Program secondary blending tree and writeback pipes */
- pipe = &context->res_ctx.pipe_ctx[i];
- if (!pipe->top_pipe && !pipe->prev_odm_pipe
- && pipe->stream && pipe->stream->num_wb_info > 0
- && (pipe->update_flags.raw || (pipe->plane_state && pipe->plane_state->update_flags.raw)
- || pipe->stream->update_flags.raw)
- && hws->funcs.program_all_writeback_pipes_in_tree)
- hws->funcs.program_all_writeback_pipes_in_tree(dc, pipe->stream, context);
- }
-}
-
-void dcn20_post_unlock_program_front_end(
- struct dc *dc,
- struct dc_state *context)
-{
- int i;
- const unsigned int TIMEOUT_FOR_PIPE_ENABLE_MS = 100;
- struct dce_hwseq *hwseq = dc->hwseq;
-
- DC_LOGGER_INIT(dc->ctx->logger);
-
- for (i = 0; i < dc->res_pool->pipe_count; i++)
- if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable)
- dc->hwss.disable_plane(dc, &dc->current_state->res_ctx.pipe_ctx[i]);
-
- /*
- * If we are enabling a pipe, we need to wait for pending clear as this is a critical
- * part of the enable operation otherwise, DM may request an immediate flip which
- * will cause HW to perform an "immediate enable" (as opposed to "vsync enable") which
- * is unsupported on DCN.
- */
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
- if (pipe->plane_state && !pipe->top_pipe && pipe->update_flags.bits.enable) {
- struct hubp *hubp = pipe->plane_res.hubp;
- int j = 0;
-
- for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_MS*1000
- && hubp->funcs->hubp_is_flip_pending(hubp); j++)
- mdelay(1);
- }
- }
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
- struct pipe_ctx *mpcc_pipe;
-
- if (pipe->vtp_locked) {
- dc->hwseq->funcs.wait_for_blank_complete(pipe->stream_res.opp);
- pipe->plane_res.hubp->funcs->set_blank(pipe->plane_res.hubp, true);
- pipe->vtp_locked = false;
-
- for (mpcc_pipe = pipe->bottom_pipe; mpcc_pipe; mpcc_pipe = mpcc_pipe->bottom_pipe)
- mpcc_pipe->plane_res.hubp->funcs->set_blank(mpcc_pipe->plane_res.hubp, true);
-
- for (i = 0; i < dc->res_pool->pipe_count; i++)
- if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable)
- dc->hwss.disable_plane(dc, &dc->current_state->res_ctx.pipe_ctx[i]);
- }
- }
- /* WA to apply WM setting*/
- if (hwseq->wa.DEGVIDCN21)
- dc->res_pool->hubbub->funcs->apply_DEDCN21_147_wa(dc->res_pool->hubbub);
-
-
- /* WA for stutter underflow during MPO transitions when adding 2nd plane */
- if (hwseq->wa.disallow_self_refresh_during_multi_plane_transition) {
-
- if (dc->current_state->stream_status[0].plane_count == 1 &&
- context->stream_status[0].plane_count > 1) {
-
- struct timing_generator *tg = dc->res_pool->timing_generators[0];
-
- dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub, false);
-
- hwseq->wa_state.disallow_self_refresh_during_multi_plane_transition_applied = true;
- hwseq->wa_state.disallow_self_refresh_during_multi_plane_transition_applied_on_frame = tg->funcs->get_frame_count(tg);
- }
- }
-}
-
-void dcn20_prepare_bandwidth(
- struct dc *dc,
- struct dc_state *context)
-{
- struct hubbub *hubbub = dc->res_pool->hubbub;
-
- dc->clk_mgr->funcs->update_clocks(
- dc->clk_mgr,
- context,
- false);
-
- /* program dchubbub watermarks */
- dc->wm_optimized_required = hubbub->funcs->program_watermarks(hubbub,
- &context->bw_ctx.bw.dcn.watermarks,
- dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000,
- false);
- /* decrease compbuf size */
- if (hubbub->funcs->program_compbuf_size)
- hubbub->funcs->program_compbuf_size(hubbub, context->bw_ctx.bw.dcn.compbuf_size_kb, false);
-}
-
-void dcn20_optimize_bandwidth(
- struct dc *dc,
- struct dc_state *context)
-{
- struct hubbub *hubbub = dc->res_pool->hubbub;
-
- /* program dchubbub watermarks */
- hubbub->funcs->program_watermarks(hubbub,
- &context->bw_ctx.bw.dcn.watermarks,
- dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000,
- true);
-
- dc->clk_mgr->funcs->update_clocks(
- dc->clk_mgr,
- context,
- true);
- /* increase compbuf size */
- if (hubbub->funcs->program_compbuf_size)
- hubbub->funcs->program_compbuf_size(hubbub, context->bw_ctx.bw.dcn.compbuf_size_kb, true);
-}
-
-bool dcn20_update_bandwidth(
- struct dc *dc,
- struct dc_state *context)
-{
- int i;
- struct dce_hwseq *hws = dc->hwseq;
-
- /* recalculate DML parameters */
- if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false))
- return false;
-
- /* apply updated bandwidth parameters */
- dc->hwss.prepare_bandwidth(dc, context);
-
- /* update hubp configs for all pipes */
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-
- if (pipe_ctx->plane_state == NULL)
- continue;
-
- if (pipe_ctx->top_pipe == NULL) {
- bool blank = !is_pipe_tree_visible(pipe_ctx);
-
- pipe_ctx->stream_res.tg->funcs->program_global_sync(
- pipe_ctx->stream_res.tg,
- pipe_ctx->pipe_dlg_param.vready_offset,
- pipe_ctx->pipe_dlg_param.vstartup_start,
- pipe_ctx->pipe_dlg_param.vupdate_offset,
- pipe_ctx->pipe_dlg_param.vupdate_width);
-
- pipe_ctx->stream_res.tg->funcs->set_vtg_params(
- pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, false);
-
- if (pipe_ctx->prev_odm_pipe == NULL)
- hws->funcs.blank_pixel_data(dc, pipe_ctx, blank);
-
- if (hws->funcs.setup_vupdate_interrupt)
- hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx);
- }
-
- pipe_ctx->plane_res.hubp->funcs->hubp_setup(
- pipe_ctx->plane_res.hubp,
- &pipe_ctx->dlg_regs,
- &pipe_ctx->ttu_regs,
- &pipe_ctx->rq_regs,
- &pipe_ctx->pipe_dlg_param);
- }
-
- return true;
-}
-
-void dcn20_enable_writeback(
- struct dc *dc,
- struct dc_writeback_info *wb_info,
- struct dc_state *context)
-{
- struct dwbc *dwb;
- struct mcif_wb *mcif_wb;
- struct timing_generator *optc;
-
- ASSERT(wb_info->dwb_pipe_inst < MAX_DWB_PIPES);
- ASSERT(wb_info->wb_enabled);
- dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst];
- mcif_wb = dc->res_pool->mcif_wb[wb_info->dwb_pipe_inst];
-
- /* set the OPTC source mux */
- optc = dc->res_pool->timing_generators[dwb->otg_inst];
- optc->funcs->set_dwb_source(optc, wb_info->dwb_pipe_inst);
- /* set MCIF_WB buffer and arbitration configuration */
- mcif_wb->funcs->config_mcif_buf(mcif_wb, &wb_info->mcif_buf_params, wb_info->dwb_params.dest_height);
- mcif_wb->funcs->config_mcif_arb(mcif_wb, &context->bw_ctx.bw.dcn.bw_writeback.mcif_wb_arb[wb_info->dwb_pipe_inst]);
- /* Enable MCIF_WB */
- mcif_wb->funcs->enable_mcif(mcif_wb);
- /* Enable DWB */
- dwb->funcs->enable(dwb, &wb_info->dwb_params);
- /* TODO: add sequence to enable/disable warmup */
-}
-
-void dcn20_disable_writeback(
- struct dc *dc,
- unsigned int dwb_pipe_inst)
-{
- struct dwbc *dwb;
- struct mcif_wb *mcif_wb;
-
- ASSERT(dwb_pipe_inst < MAX_DWB_PIPES);
- dwb = dc->res_pool->dwbc[dwb_pipe_inst];
- mcif_wb = dc->res_pool->mcif_wb[dwb_pipe_inst];
-
- dwb->funcs->disable(dwb);
- mcif_wb->funcs->disable_mcif(mcif_wb);
-}
-
-bool dcn20_wait_for_blank_complete(
- struct output_pixel_processor *opp)
-{
- int counter;
-
- for (counter = 0; counter < 1000; counter++) {
- if (opp->funcs->dpg_is_blanked(opp))
- break;
-
- udelay(100);
- }
-
- if (counter == 1000) {
- dm_error("DC: failed to blank crtc!\n");
- return false;
- }
-
- return true;
-}
-
-bool dcn20_dmdata_status_done(struct pipe_ctx *pipe_ctx)
-{
- struct hubp *hubp = pipe_ctx->plane_res.hubp;
-
- if (!hubp)
- return false;
- return hubp->funcs->dmdata_status_done(hubp);
-}
-
-void dcn20_disable_stream_gating(struct dc *dc, struct pipe_ctx *pipe_ctx)
-{
- struct dce_hwseq *hws = dc->hwseq;
-
- if (pipe_ctx->stream_res.dsc) {
- struct pipe_ctx *odm_pipe = pipe_ctx->next_odm_pipe;
-
- hws->funcs.dsc_pg_control(hws, pipe_ctx->stream_res.dsc->inst, true);
- while (odm_pipe) {
- hws->funcs.dsc_pg_control(hws, odm_pipe->stream_res.dsc->inst, true);
- odm_pipe = odm_pipe->next_odm_pipe;
- }
- }
-}
-
-void dcn20_enable_stream_gating(struct dc *dc, struct pipe_ctx *pipe_ctx)
-{
- struct dce_hwseq *hws = dc->hwseq;
-
- if (pipe_ctx->stream_res.dsc) {
- struct pipe_ctx *odm_pipe = pipe_ctx->next_odm_pipe;
-
- hws->funcs.dsc_pg_control(hws, pipe_ctx->stream_res.dsc->inst, false);
- while (odm_pipe) {
- hws->funcs.dsc_pg_control(hws, odm_pipe->stream_res.dsc->inst, false);
- odm_pipe = odm_pipe->next_odm_pipe;
- }
- }
-}
-
-void dcn20_set_dmdata_attributes(struct pipe_ctx *pipe_ctx)
-{
- struct dc_dmdata_attributes attr = { 0 };
- struct hubp *hubp = pipe_ctx->plane_res.hubp;
-
- attr.dmdata_mode = DMDATA_HW_MODE;
- attr.dmdata_size =
- dc_is_hdmi_signal(pipe_ctx->stream->signal) ? 32 : 36;
- attr.address.quad_part =
- pipe_ctx->stream->dmdata_address.quad_part;
- attr.dmdata_dl_delta = 0;
- attr.dmdata_qos_mode = 0;
- attr.dmdata_qos_level = 0;
- attr.dmdata_repeat = 1; /* always repeat */
- attr.dmdata_updated = 1;
- attr.dmdata_sw_data = NULL;
-
- hubp->funcs->dmdata_set_attributes(hubp, &attr);
-}
-
-void dcn20_init_vm_ctx(
- struct dce_hwseq *hws,
- struct dc *dc,
- struct dc_virtual_addr_space_config *va_config,
- int vmid)
-{
- struct dcn_hubbub_virt_addr_config config;
-
- if (vmid == 0) {
- ASSERT(0); /* VMID cannot be 0 for vm context */
- return;
- }
-
- config.page_table_start_addr = va_config->page_table_start_addr;
- config.page_table_end_addr = va_config->page_table_end_addr;
- config.page_table_block_size = va_config->page_table_block_size_in_bytes;
- config.page_table_depth = va_config->page_table_depth;
- config.page_table_base_addr = va_config->page_table_base_addr;
-
- dc->res_pool->hubbub->funcs->init_vm_ctx(dc->res_pool->hubbub, &config, vmid);
-}
-
-int dcn20_init_sys_ctx(struct dce_hwseq *hws, struct dc *dc, struct dc_phy_addr_space_config *pa_config)
-{
- struct dcn_hubbub_phys_addr_config config;
-
- config.system_aperture.fb_top = pa_config->system_aperture.fb_top;
- config.system_aperture.fb_offset = pa_config->system_aperture.fb_offset;
- config.system_aperture.fb_base = pa_config->system_aperture.fb_base;
- config.system_aperture.agp_top = pa_config->system_aperture.agp_top;
- config.system_aperture.agp_bot = pa_config->system_aperture.agp_bot;
- config.system_aperture.agp_base = pa_config->system_aperture.agp_base;
- config.gart_config.page_table_start_addr = pa_config->gart_config.page_table_start_addr;
- config.gart_config.page_table_end_addr = pa_config->gart_config.page_table_end_addr;
- config.gart_config.page_table_base_addr = pa_config->gart_config.page_table_base_addr;
- config.page_table_default_page_addr = pa_config->page_table_default_page_addr;
-
- return dc->res_pool->hubbub->funcs->init_dchub_sys_ctx(dc->res_pool->hubbub, &config);
-}
-
-static bool patch_address_for_sbs_tb_stereo(
- struct pipe_ctx *pipe_ctx, PHYSICAL_ADDRESS_LOC *addr)
-{
- struct dc_plane_state *plane_state = pipe_ctx->plane_state;
- bool sec_split = pipe_ctx->top_pipe &&
- pipe_ctx->top_pipe->plane_state == pipe_ctx->plane_state;
- if (sec_split && plane_state->address.type == PLN_ADDR_TYPE_GRPH_STEREO &&
- (pipe_ctx->stream->timing.timing_3d_format ==
- TIMING_3D_FORMAT_SIDE_BY_SIDE ||
- pipe_ctx->stream->timing.timing_3d_format ==
- TIMING_3D_FORMAT_TOP_AND_BOTTOM)) {
- *addr = plane_state->address.grph_stereo.left_addr;
- plane_state->address.grph_stereo.left_addr =
- plane_state->address.grph_stereo.right_addr;
- return true;
- }
-
- if (pipe_ctx->stream->view_format != VIEW_3D_FORMAT_NONE &&
- plane_state->address.type != PLN_ADDR_TYPE_GRPH_STEREO) {
- plane_state->address.type = PLN_ADDR_TYPE_GRPH_STEREO;
- plane_state->address.grph_stereo.right_addr =
- plane_state->address.grph_stereo.left_addr;
- plane_state->address.grph_stereo.right_meta_addr =
- plane_state->address.grph_stereo.left_meta_addr;
- }
- return false;
-}
-
-void dcn20_update_plane_addr(const struct dc *dc, struct pipe_ctx *pipe_ctx)
-{
- bool addr_patched = false;
- PHYSICAL_ADDRESS_LOC addr;
- struct dc_plane_state *plane_state = pipe_ctx->plane_state;
-
- if (plane_state == NULL)
- return;
-
- addr_patched = patch_address_for_sbs_tb_stereo(pipe_ctx, &addr);
-
- // Call Helper to track VMID use
- vm_helper_mark_vmid_used(dc->vm_helper, plane_state->address.vmid, pipe_ctx->plane_res.hubp->inst);
-
- pipe_ctx->plane_res.hubp->funcs->hubp_program_surface_flip_and_addr(
- pipe_ctx->plane_res.hubp,
- &plane_state->address,
- plane_state->flip_immediate);
-
- plane_state->status.requested_address = plane_state->address;
-
- if (plane_state->flip_immediate)
- plane_state->status.current_address = plane_state->address;
-
- if (addr_patched)
- pipe_ctx->plane_state->address.grph_stereo.left_addr = addr;
-}
-
-void dcn20_unblank_stream(struct pipe_ctx *pipe_ctx,
- struct dc_link_settings *link_settings)
-{
- struct encoder_unblank_param params = {0};
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct dc_link *link = stream->link;
- struct dce_hwseq *hws = link->dc->hwseq;
- struct pipe_ctx *odm_pipe;
-
- params.opp_cnt = 1;
- for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
- params.opp_cnt++;
- }
- /* only 3 items below are used by unblank */
- params.timing = pipe_ctx->stream->timing;
-
- params.link_settings.link_rate = link_settings->link_rate;
-
- if (is_dp_128b_132b_signal(pipe_ctx)) {
- /* TODO - DP2.0 HW: Set ODM mode in dp hpo encoder here */
- pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->dp_unblank(
- pipe_ctx->stream_res.hpo_dp_stream_enc,
- pipe_ctx->stream_res.tg->inst);
- } else if (dc_is_dp_signal(pipe_ctx->stream->signal)) {
- if (optc2_is_two_pixels_per_containter(&stream->timing) || params.opp_cnt > 1)
- params.timing.pix_clk_100hz /= 2;
- pipe_ctx->stream_res.stream_enc->funcs->dp_set_odm_combine(
- pipe_ctx->stream_res.stream_enc, params.opp_cnt > 1);
- pipe_ctx->stream_res.stream_enc->funcs->dp_unblank(link, pipe_ctx->stream_res.stream_enc, &params);
- }
-
- if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) {
- hws->funcs.edp_backlight_control(link, true);
- }
-}
-
-void dcn20_setup_vupdate_interrupt(struct dc *dc, struct pipe_ctx *pipe_ctx)
-{
- struct timing_generator *tg = pipe_ctx->stream_res.tg;
- int start_line = dc->hwss.get_vupdate_offset_from_vsync(pipe_ctx);
-
- if (start_line < 0)
- start_line = 0;
-
- if (tg->funcs->setup_vertical_interrupt2)
- tg->funcs->setup_vertical_interrupt2(tg, start_line);
-}
-
-static void dcn20_reset_back_end_for_pipe(
- struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- struct dc_state *context)
-{
- int i;
- struct dc_link *link;
- DC_LOGGER_INIT(dc->ctx->logger);
- if (pipe_ctx->stream_res.stream_enc == NULL) {
- pipe_ctx->stream = NULL;
- return;
- }
-
- if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
- link = pipe_ctx->stream->link;
- /* DPMS may already disable or */
- /* dpms_off status is incorrect due to fastboot
- * feature. When system resume from S4 with second
- * screen only, the dpms_off would be true but
- * VBIOS lit up eDP, so check link status too.
- */
- if (!pipe_ctx->stream->dpms_off || link->link_status.link_active)
- core_link_disable_stream(pipe_ctx);
- else if (pipe_ctx->stream_res.audio)
- dc->hwss.disable_audio_stream(pipe_ctx);
-
- /* free acquired resources */
- if (pipe_ctx->stream_res.audio) {
- /*disable az_endpoint*/
- pipe_ctx->stream_res.audio->funcs->az_disable(pipe_ctx->stream_res.audio);
-
- /*free audio*/
- if (dc->caps.dynamic_audio == true) {
- /*we have to dynamic arbitrate the audio endpoints*/
- /*we free the resource, need reset is_audio_acquired*/
- update_audio_usage(&dc->current_state->res_ctx, dc->res_pool,
- pipe_ctx->stream_res.audio, false);
- pipe_ctx->stream_res.audio = NULL;
- }
- }
- }
- else if (pipe_ctx->stream_res.dsc) {
- dp_set_dsc_enable(pipe_ctx, false);
- }
-
- /* by upper caller loop, parent pipe: pipe0, will be reset last.
- * back end share by all pipes and will be disable only when disable
- * parent pipe.
- */
- if (pipe_ctx->top_pipe == NULL) {
-
- dc->hwss.set_abm_immediate_disable(pipe_ctx);
-
- pipe_ctx->stream_res.tg->funcs->disable_crtc(pipe_ctx->stream_res.tg);
-
- pipe_ctx->stream_res.tg->funcs->enable_optc_clock(pipe_ctx->stream_res.tg, false);
- if (pipe_ctx->stream_res.tg->funcs->set_odm_bypass)
- pipe_ctx->stream_res.tg->funcs->set_odm_bypass(
- pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
-
- if (pipe_ctx->stream_res.tg->funcs->set_drr)
- pipe_ctx->stream_res.tg->funcs->set_drr(
- pipe_ctx->stream_res.tg, NULL);
- }
-
- for (i = 0; i < dc->res_pool->pipe_count; i++)
- if (&dc->current_state->res_ctx.pipe_ctx[i] == pipe_ctx)
- break;
-
- if (i == dc->res_pool->pipe_count)
- return;
-
- pipe_ctx->stream = NULL;
- DC_LOG_DEBUG("Reset back end for pipe %d, tg:%d\n",
- pipe_ctx->pipe_idx, pipe_ctx->stream_res.tg->inst);
-}
-
-void dcn20_reset_hw_ctx_wrap(
- struct dc *dc,
- struct dc_state *context)
-{
- int i;
- struct dce_hwseq *hws = dc->hwseq;
-
- /* Reset Back End*/
- for (i = dc->res_pool->pipe_count - 1; i >= 0 ; i--) {
- struct pipe_ctx *pipe_ctx_old =
- &dc->current_state->res_ctx.pipe_ctx[i];
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-
- if (!pipe_ctx_old->stream)
- continue;
-
- if (pipe_ctx_old->top_pipe || pipe_ctx_old->prev_odm_pipe)
- continue;
-
- if (!pipe_ctx->stream ||
- pipe_need_reprogram(pipe_ctx_old, pipe_ctx)) {
- struct clock_source *old_clk = pipe_ctx_old->clock_source;
-
- dcn20_reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state);
- if (hws->funcs.enable_stream_gating)
- hws->funcs.enable_stream_gating(dc, pipe_ctx_old);
- if (old_clk)
- old_clk->funcs->cs_power_down(old_clk);
- }
- }
-}
-
-void dcn20_update_visual_confirm_color(struct dc *dc, struct pipe_ctx *pipe_ctx, struct tg_color *color, int mpcc_id)
-{
- struct mpc *mpc = dc->res_pool->mpc;
-
- // input to MPCC is always RGB, by default leave black_color at 0
- if (dc->debug.visual_confirm == VISUAL_CONFIRM_HDR)
- get_hdr_visual_confirm_color(pipe_ctx, color);
- else if (dc->debug.visual_confirm == VISUAL_CONFIRM_SURFACE)
- get_surface_visual_confirm_color(pipe_ctx, color);
- else if (dc->debug.visual_confirm == VISUAL_CONFIRM_MPCTREE)
- get_mpctree_visual_confirm_color(pipe_ctx, color);
- else if (dc->debug.visual_confirm == VISUAL_CONFIRM_SWIZZLE)
- get_surface_tile_visual_confirm_color(pipe_ctx, color);
-
- if (mpc->funcs->set_bg_color)
- mpc->funcs->set_bg_color(mpc, color, mpcc_id);
-}
-
-void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
-{
- struct hubp *hubp = pipe_ctx->plane_res.hubp;
- struct mpcc_blnd_cfg blnd_cfg = {0};
- bool per_pixel_alpha = pipe_ctx->plane_state->per_pixel_alpha;
- int mpcc_id;
- struct mpcc *new_mpcc;
- struct mpc *mpc = dc->res_pool->mpc;
- struct mpc_tree *mpc_tree_params = &(pipe_ctx->stream_res.opp->mpc_tree_params);
-
- if (per_pixel_alpha)
- blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
- else
- blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA;
-
- blnd_cfg.overlap_only = false;
- blnd_cfg.global_gain = 0xff;
-
- if (pipe_ctx->plane_state->global_alpha)
- blnd_cfg.global_alpha = pipe_ctx->plane_state->global_alpha_value;
- else
- blnd_cfg.global_alpha = 0xff;
-
- blnd_cfg.background_color_bpc = 4;
- blnd_cfg.bottom_gain_mode = 0;
- blnd_cfg.top_gain = 0x1f000;
- blnd_cfg.bottom_inside_gain = 0x1f000;
- blnd_cfg.bottom_outside_gain = 0x1f000;
- blnd_cfg.pre_multiplied_alpha = per_pixel_alpha;
- if (pipe_ctx->plane_state->format
- == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA)
- blnd_cfg.pre_multiplied_alpha = false;
-
- /*
- * TODO: remove hack
- * Note: currently there is a bug in init_hw such that
- * on resume from hibernate, BIOS sets up MPCC0, and
- * we do mpcc_remove but the mpcc cannot go to idle
- * after remove. This cause us to pick mpcc1 here,
- * which causes a pstate hang for yet unknown reason.
- */
- mpcc_id = hubp->inst;
-
- /* If there is no full update, don't need to touch MPC tree*/
- if (!pipe_ctx->plane_state->update_flags.bits.full_update &&
- !pipe_ctx->update_flags.bits.mpcc) {
- mpc->funcs->update_blending(mpc, &blnd_cfg, mpcc_id);
- dc->hwss.update_visual_confirm_color(dc, pipe_ctx, &blnd_cfg.black_color, mpcc_id);
- return;
- }
-
- /* check if this MPCC is already being used */
- new_mpcc = mpc->funcs->get_mpcc_for_dpp(mpc_tree_params, mpcc_id);
- /* remove MPCC if being used */
- if (new_mpcc != NULL)
- mpc->funcs->remove_mpcc(mpc, mpc_tree_params, new_mpcc);
- else
- if (dc->debug.sanity_checks)
- mpc->funcs->assert_mpcc_idle_before_connect(
- dc->res_pool->mpc, mpcc_id);
-
- /* Call MPC to insert new plane */
- new_mpcc = mpc->funcs->insert_plane(dc->res_pool->mpc,
- mpc_tree_params,
- &blnd_cfg,
- NULL,
- NULL,
- hubp->inst,
- mpcc_id);
- dc->hwss.update_visual_confirm_color(dc, pipe_ctx, &blnd_cfg.black_color, mpcc_id);
-
- ASSERT(new_mpcc != NULL);
- hubp->opp_id = pipe_ctx->stream_res.opp->inst;
- hubp->mpcc_id = mpcc_id;
-}
-
-void dcn20_enable_stream(struct pipe_ctx *pipe_ctx)
-{
- enum dc_lane_count lane_count =
- pipe_ctx->stream->link->cur_link_settings.lane_count;
-
- struct dc_crtc_timing *timing = &pipe_ctx->stream->timing;
- struct dc_link *link = pipe_ctx->stream->link;
-
- uint32_t active_total_with_borders;
- uint32_t early_control = 0;
- struct timing_generator *tg = pipe_ctx->stream_res.tg;
- struct link_encoder *link_enc;
-
- if (link->is_dig_mapping_flexible &&
- link->dc->res_pool->funcs->link_encs_assign)
- link_enc = link_enc_cfg_get_link_enc_used_by_stream(link->ctx->dc, pipe_ctx->stream);
- else
- link_enc = link->link_enc;
- ASSERT(link_enc);
-
- /* For MST, there are multiply stream go to only one link.
- * connect DIG back_end to front_end while enable_stream and
- * disconnect them during disable_stream
- * BY this, it is logic clean to separate stream and link
- */
- if (is_dp_128b_132b_signal(pipe_ctx)) {
- if (pipe_ctx->stream->ctx->dc->hwseq->funcs.setup_hpo_hw_control)
- pipe_ctx->stream->ctx->dc->hwseq->funcs.setup_hpo_hw_control(
- pipe_ctx->stream->ctx->dc->hwseq, true);
- setup_dp_hpo_stream(pipe_ctx, true);
- pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->enable_stream(
- pipe_ctx->stream_res.hpo_dp_stream_enc);
- pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->map_stream_to_link(
- pipe_ctx->stream_res.hpo_dp_stream_enc,
- pipe_ctx->stream_res.hpo_dp_stream_enc->inst,
- link->hpo_dp_link_enc->inst);
- }
-
- if (!is_dp_128b_132b_signal(pipe_ctx) && link_enc)
- link_enc->funcs->connect_dig_be_to_fe(
- link_enc, pipe_ctx->stream_res.stream_enc->id, true);
-
- if (dc_is_dp_signal(pipe_ctx->stream->signal))
- dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_CONNECT_DIG_FE_BE);
-
- if (pipe_ctx->plane_state && pipe_ctx->plane_state->flip_immediate != 1) {
- if (link->dc->hwss.program_dmdata_engine)
- link->dc->hwss.program_dmdata_engine(pipe_ctx);
- }
-
- link->dc->hwss.update_info_frame(pipe_ctx);
-
- if (dc_is_dp_signal(pipe_ctx->stream->signal))
- dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_UPDATE_INFO_FRAME);
-
- /* enable early control to avoid corruption on DP monitor*/
- active_total_with_borders =
- timing->h_addressable
- + timing->h_border_left
- + timing->h_border_right;
-
- if (lane_count != 0)
- early_control = active_total_with_borders % lane_count;
-
- if (early_control == 0)
- early_control = lane_count;
-
- tg->funcs->set_early_control(tg, early_control);
-
- /* enable audio only within mode set */
- if (pipe_ctx->stream_res.audio != NULL) {
- if (is_dp_128b_132b_signal(pipe_ctx))
- pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->dp_audio_enable(pipe_ctx->stream_res.hpo_dp_stream_enc);
- else if (dc_is_dp_signal(pipe_ctx->stream->signal))
- pipe_ctx->stream_res.stream_enc->funcs->dp_audio_enable(pipe_ctx->stream_res.stream_enc);
- }
-}
-
-void dcn20_program_dmdata_engine(struct pipe_ctx *pipe_ctx)
-{
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct hubp *hubp = pipe_ctx->plane_res.hubp;
- bool enable = false;
- struct stream_encoder *stream_enc = pipe_ctx->stream_res.stream_enc;
- enum dynamic_metadata_mode mode = dc_is_dp_signal(stream->signal)
- ? dmdata_dp
- : dmdata_hdmi;
-
- /* if using dynamic meta, don't set up generic infopackets */
- if (pipe_ctx->stream->dmdata_address.quad_part != 0) {
- pipe_ctx->stream_res.encoder_info_frame.hdrsmd.valid = false;
- enable = true;
- }
-
- if (!hubp)
- return;
-
- if (!stream_enc || !stream_enc->funcs->set_dynamic_metadata)
- return;
-
- stream_enc->funcs->set_dynamic_metadata(stream_enc, enable,
- hubp->inst, mode);
-}
-
-void dcn20_fpga_init_hw(struct dc *dc)
-{
- int i, j;
- struct dce_hwseq *hws = dc->hwseq;
- struct resource_pool *res_pool = dc->res_pool;
- struct dc_state *context = dc->current_state;
-
- if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks)
- dc->clk_mgr->funcs->init_clocks(dc->clk_mgr);
-
- // Initialize the dccg
- if (res_pool->dccg->funcs->dccg_init)
- res_pool->dccg->funcs->dccg_init(res_pool->dccg);
-
- //Enable ability to power gate / don't force power on permanently
- hws->funcs.enable_power_gating_plane(hws, true);
-
- // Specific to FPGA dccg and registers
- REG_WRITE(RBBMIF_TIMEOUT_DIS, 0xFFFFFFFF);
- REG_WRITE(RBBMIF_TIMEOUT_DIS_2, 0xFFFFFFFF);
-
- hws->funcs.dccg_init(hws);
-
- REG_UPDATE(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, 2);
- REG_UPDATE(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_ENABLE, 1);
- if (REG(REFCLK_CNTL))
- REG_WRITE(REFCLK_CNTL, 0);
- //
-
-
- /* Blank pixel data with OPP DPG */
- for (i = 0; i < dc->res_pool->timing_generator_count; i++) {
- struct timing_generator *tg = dc->res_pool->timing_generators[i];
-
- if (tg->funcs->is_tg_enabled(tg))
- dcn20_init_blank(dc, tg);
- }
-
- for (i = 0; i < res_pool->timing_generator_count; i++) {
- struct timing_generator *tg = dc->res_pool->timing_generators[i];
-
- if (tg->funcs->is_tg_enabled(tg))
- tg->funcs->lock(tg);
- }
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct dpp *dpp = res_pool->dpps[i];
-
- dpp->funcs->dpp_reset(dpp);
- }
-
- /* Reset all MPCC muxes */
- res_pool->mpc->funcs->mpc_init(res_pool->mpc);
-
- /* initialize OPP mpc_tree parameter */
- for (i = 0; i < dc->res_pool->res_cap->num_opp; i++) {
- res_pool->opps[i]->mpc_tree_params.opp_id = res_pool->opps[i]->inst;
- res_pool->opps[i]->mpc_tree_params.opp_list = NULL;
- for (j = 0; j < MAX_PIPES; j++)
- res_pool->opps[i]->mpcc_disconnect_pending[j] = false;
- }
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct timing_generator *tg = dc->res_pool->timing_generators[i];
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
- struct hubp *hubp = dc->res_pool->hubps[i];
- struct dpp *dpp = dc->res_pool->dpps[i];
-
- pipe_ctx->stream_res.tg = tg;
- pipe_ctx->pipe_idx = i;
-
- pipe_ctx->plane_res.hubp = hubp;
- pipe_ctx->plane_res.dpp = dpp;
- pipe_ctx->plane_res.mpcc_inst = dpp->inst;
- hubp->mpcc_id = dpp->inst;
- hubp->opp_id = OPP_ID_INVALID;
- hubp->power_gated = false;
- pipe_ctx->stream_res.opp = NULL;
-
- hubp->funcs->hubp_init(hubp);
-
- //dc->res_pool->opps[i]->mpc_tree_params.opp_id = dc->res_pool->opps[i]->inst;
- //dc->res_pool->opps[i]->mpc_tree_params.opp_list = NULL;
- dc->res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
- pipe_ctx->stream_res.opp = dc->res_pool->opps[i];
- /*to do*/
- hws->funcs.plane_atomic_disconnect(dc, pipe_ctx);
- }
-
- /* initialize DWB pointer to MCIF_WB */
- for (i = 0; i < res_pool->res_cap->num_dwb; i++)
- res_pool->dwbc[i]->mcif = res_pool->mcif_wb[i];
-
- for (i = 0; i < dc->res_pool->timing_generator_count; i++) {
- struct timing_generator *tg = dc->res_pool->timing_generators[i];
-
- if (tg->funcs->is_tg_enabled(tg))
- tg->funcs->unlock(tg);
- }
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-
- dc->hwss.disable_plane(dc, pipe_ctx);
-
- pipe_ctx->stream_res.tg = NULL;
- pipe_ctx->plane_res.hubp = NULL;
- }
-
- for (i = 0; i < dc->res_pool->timing_generator_count; i++) {
- struct timing_generator *tg = dc->res_pool->timing_generators[i];
-
- tg->funcs->tg_init(tg);
- }
-
- if (dc->res_pool->hubbub->funcs->init_crb)
- dc->res_pool->hubbub->funcs->init_crb(dc->res_pool->hubbub);
-}
-#ifndef TRIM_FSFT
-bool dcn20_optimize_timing_for_fsft(struct dc *dc,
- struct dc_crtc_timing *timing,
- unsigned int max_input_rate_in_khz)
-{
- unsigned int old_v_front_porch;
- unsigned int old_v_total;
- unsigned int max_input_rate_in_100hz;
- unsigned long long new_v_total;
-
- max_input_rate_in_100hz = max_input_rate_in_khz * 10;
- if (max_input_rate_in_100hz < timing->pix_clk_100hz)
- return false;
-
- old_v_total = timing->v_total;
- old_v_front_porch = timing->v_front_porch;
-
- timing->fast_transport_output_rate_100hz = timing->pix_clk_100hz;
- timing->pix_clk_100hz = max_input_rate_in_100hz;
-
- new_v_total = div_u64((unsigned long long)old_v_total * max_input_rate_in_100hz, timing->pix_clk_100hz);
-
- timing->v_total = new_v_total;
- timing->v_front_porch = old_v_front_porch + (timing->v_total - old_v_total);
- return true;
-}
-#endif
-
-void dcn20_set_disp_pattern_generator(const struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- enum controller_dp_test_pattern test_pattern,
- enum controller_dp_color_space color_space,
- enum dc_color_depth color_depth,
- const struct tg_color *solid_color,
- int width, int height, int offset)
-{
- pipe_ctx->stream_res.opp->funcs->opp_set_disp_pattern_generator(pipe_ctx->stream_res.opp, test_pattern,
- color_space, color_depth, solid_color, width, height, offset);
-}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
deleted file mode 100644
index 3883f918b3bb..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ /dev/null
@@ -1,4106 +0,0 @@
-/*
-* Copyright 2016 Advanced Micro Devices, Inc.
- * Copyright 2019 Raptor Engineering, LLC
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#include <linux/slab.h>
-
-#include "dm_services.h"
-#include "dc.h"
-
-#include "dcn20_init.h"
-
-#include "resource.h"
-#include "include/irq_service_interface.h"
-#include "dcn20/dcn20_resource.h"
-
-#include "dml/dcn20/dcn20_fpu.h"
-
-#include "dcn10/dcn10_hubp.h"
-#include "dcn10/dcn10_ipp.h"
-#include "dcn20_hubbub.h"
-#include "dcn20_mpc.h"
-#include "dcn20_hubp.h"
-#include "irq/dcn20/irq_service_dcn20.h"
-#include "dcn20_dpp.h"
-#include "dcn20_optc.h"
-#include "dcn20_hwseq.h"
-#include "dce110/dce110_hw_sequencer.h"
-#include "dcn10/dcn10_resource.h"
-#include "dcn20_opp.h"
-
-#include "dcn20_dsc.h"
-
-#include "dcn20_link_encoder.h"
-#include "dcn20_stream_encoder.h"
-#include "dce/dce_clock_source.h"
-#include "dce/dce_audio.h"
-#include "dce/dce_hwseq.h"
-#include "virtual/virtual_stream_encoder.h"
-#include "dce110/dce110_resource.h"
-#include "dml/display_mode_vba.h"
-#include "dcn20_dccg.h"
-#include "dcn20_vmid.h"
-#include "dc_link_ddc.h"
-#include "dc_link_dp.h"
-#include "dce/dce_panel_cntl.h"
-
-#include "navi10_ip_offset.h"
-
-#include "dcn/dcn_2_0_0_offset.h"
-#include "dcn/dcn_2_0_0_sh_mask.h"
-#include "dpcs/dpcs_2_0_0_offset.h"
-#include "dpcs/dpcs_2_0_0_sh_mask.h"
-
-#include "nbio/nbio_2_3_offset.h"
-
-#include "dcn20/dcn20_dwb.h"
-#include "dcn20/dcn20_mmhubbub.h"
-
-#include "mmhub/mmhub_2_0_0_offset.h"
-#include "mmhub/mmhub_2_0_0_sh_mask.h"
-
-#include "reg_helper.h"
-#include "dce/dce_abm.h"
-#include "dce/dce_dmcu.h"
-#include "dce/dce_aux.h"
-#include "dce/dce_i2c.h"
-#include "vm_helper.h"
-#include "link_enc_cfg.h"
-
-#include "amdgpu_socbb.h"
-
-#define DC_LOGGER_INIT(logger)
-
-struct _vcs_dpi_ip_params_st dcn2_0_ip = {
- .odm_capable = 1,
- .gpuvm_enable = 0,
- .hostvm_enable = 0,
- .gpuvm_max_page_table_levels = 4,
- .hostvm_max_page_table_levels = 4,
- .hostvm_cached_page_table_levels = 0,
- .pte_group_size_bytes = 2048,
- .num_dsc = 6,
- .rob_buffer_size_kbytes = 168,
- .det_buffer_size_kbytes = 164,
- .dpte_buffer_size_in_pte_reqs_luma = 84,
- .pde_proc_buffer_size_64k_reqs = 48,
- .dpp_output_buffer_pixels = 2560,
- .opp_output_buffer_lines = 1,
- .pixel_chunk_size_kbytes = 8,
- .pte_chunk_size_kbytes = 2,
- .meta_chunk_size_kbytes = 2,
- .writeback_chunk_size_kbytes = 2,
- .line_buffer_size_bits = 789504,
- .is_line_buffer_bpp_fixed = 0,
- .line_buffer_fixed_bpp = 0,
- .dcc_supported = true,
- .max_line_buffer_lines = 12,
- .writeback_luma_buffer_size_kbytes = 12,
- .writeback_chroma_buffer_size_kbytes = 8,
- .writeback_chroma_line_buffer_width_pixels = 4,
- .writeback_max_hscl_ratio = 1,
- .writeback_max_vscl_ratio = 1,
- .writeback_min_hscl_ratio = 1,
- .writeback_min_vscl_ratio = 1,
- .writeback_max_hscl_taps = 12,
- .writeback_max_vscl_taps = 12,
- .writeback_line_buffer_luma_buffer_size = 0,
- .writeback_line_buffer_chroma_buffer_size = 14643,
- .cursor_buffer_size = 8,
- .cursor_chunk_size = 2,
- .max_num_otg = 6,
- .max_num_dpp = 6,
- .max_num_wb = 1,
- .max_dchub_pscl_bw_pix_per_clk = 4,
- .max_pscl_lb_bw_pix_per_clk = 2,
- .max_lb_vscl_bw_pix_per_clk = 4,
- .max_vscl_hscl_bw_pix_per_clk = 4,
- .max_hscl_ratio = 8,
- .max_vscl_ratio = 8,
- .hscl_mults = 4,
- .vscl_mults = 4,
- .max_hscl_taps = 8,
- .max_vscl_taps = 8,
- .dispclk_ramp_margin_percent = 1,
- .underscan_factor = 1.10,
- .min_vblank_lines = 32, //
- .dppclk_delay_subtotal = 77, //
- .dppclk_delay_scl_lb_only = 16,
- .dppclk_delay_scl = 50,
- .dppclk_delay_cnvc_formatter = 8,
- .dppclk_delay_cnvc_cursor = 6,
- .dispclk_delay_subtotal = 87, //
- .dcfclk_cstate_latency = 10, // SRExitTime
- .max_inter_dcn_tile_repeaters = 8,
- .xfc_supported = true,
- .xfc_fill_bw_overhead_percent = 10.0,
- .xfc_fill_constant_bytes = 0,
- .number_of_cursors = 1,
-};
-
-static struct _vcs_dpi_ip_params_st dcn2_0_nv14_ip = {
- .odm_capable = 1,
- .gpuvm_enable = 0,
- .hostvm_enable = 0,
- .gpuvm_max_page_table_levels = 4,
- .hostvm_max_page_table_levels = 4,
- .hostvm_cached_page_table_levels = 0,
- .num_dsc = 5,
- .rob_buffer_size_kbytes = 168,
- .det_buffer_size_kbytes = 164,
- .dpte_buffer_size_in_pte_reqs_luma = 84,
- .dpte_buffer_size_in_pte_reqs_chroma = 42,//todo
- .dpp_output_buffer_pixels = 2560,
- .opp_output_buffer_lines = 1,
- .pixel_chunk_size_kbytes = 8,
- .pte_enable = 1,
- .max_page_table_levels = 4,
- .pte_chunk_size_kbytes = 2,
- .meta_chunk_size_kbytes = 2,
- .writeback_chunk_size_kbytes = 2,
- .line_buffer_size_bits = 789504,
- .is_line_buffer_bpp_fixed = 0,
- .line_buffer_fixed_bpp = 0,
- .dcc_supported = true,
- .max_line_buffer_lines = 12,
- .writeback_luma_buffer_size_kbytes = 12,
- .writeback_chroma_buffer_size_kbytes = 8,
- .writeback_chroma_line_buffer_width_pixels = 4,
- .writeback_max_hscl_ratio = 1,
- .writeback_max_vscl_ratio = 1,
- .writeback_min_hscl_ratio = 1,
- .writeback_min_vscl_ratio = 1,
- .writeback_max_hscl_taps = 12,
- .writeback_max_vscl_taps = 12,
- .writeback_line_buffer_luma_buffer_size = 0,
- .writeback_line_buffer_chroma_buffer_size = 14643,
- .cursor_buffer_size = 8,
- .cursor_chunk_size = 2,
- .max_num_otg = 5,
- .max_num_dpp = 5,
- .max_num_wb = 1,
- .max_dchub_pscl_bw_pix_per_clk = 4,
- .max_pscl_lb_bw_pix_per_clk = 2,
- .max_lb_vscl_bw_pix_per_clk = 4,
- .max_vscl_hscl_bw_pix_per_clk = 4,
- .max_hscl_ratio = 8,
- .max_vscl_ratio = 8,
- .hscl_mults = 4,
- .vscl_mults = 4,
- .max_hscl_taps = 8,
- .max_vscl_taps = 8,
- .dispclk_ramp_margin_percent = 1,
- .underscan_factor = 1.10,
- .min_vblank_lines = 32, //
- .dppclk_delay_subtotal = 77, //
- .dppclk_delay_scl_lb_only = 16,
- .dppclk_delay_scl = 50,
- .dppclk_delay_cnvc_formatter = 8,
- .dppclk_delay_cnvc_cursor = 6,
- .dispclk_delay_subtotal = 87, //
- .dcfclk_cstate_latency = 10, // SRExitTime
- .max_inter_dcn_tile_repeaters = 8,
- .xfc_supported = true,
- .xfc_fill_bw_overhead_percent = 10.0,
- .xfc_fill_constant_bytes = 0,
- .ptoi_supported = 0,
- .number_of_cursors = 1,
-};
-
-static struct _vcs_dpi_soc_bounding_box_st dcn2_0_soc = {
- /* Defaults that get patched on driver load from firmware. */
- .clock_limits = {
- {
- .state = 0,
- .dcfclk_mhz = 560.0,
- .fabricclk_mhz = 560.0,
- .dispclk_mhz = 513.0,
- .dppclk_mhz = 513.0,
- .phyclk_mhz = 540.0,
- .socclk_mhz = 560.0,
- .dscclk_mhz = 171.0,
- .dram_speed_mts = 8960.0,
- },
- {
- .state = 1,
- .dcfclk_mhz = 694.0,
- .fabricclk_mhz = 694.0,
- .dispclk_mhz = 642.0,
- .dppclk_mhz = 642.0,
- .phyclk_mhz = 600.0,
- .socclk_mhz = 694.0,
- .dscclk_mhz = 214.0,
- .dram_speed_mts = 11104.0,
- },
- {
- .state = 2,
- .dcfclk_mhz = 875.0,
- .fabricclk_mhz = 875.0,
- .dispclk_mhz = 734.0,
- .dppclk_mhz = 734.0,
- .phyclk_mhz = 810.0,
- .socclk_mhz = 875.0,
- .dscclk_mhz = 245.0,
- .dram_speed_mts = 14000.0,
- },
- {
- .state = 3,
- .dcfclk_mhz = 1000.0,
- .fabricclk_mhz = 1000.0,
- .dispclk_mhz = 1100.0,
- .dppclk_mhz = 1100.0,
- .phyclk_mhz = 810.0,
- .socclk_mhz = 1000.0,
- .dscclk_mhz = 367.0,
- .dram_speed_mts = 16000.0,
- },
- {
- .state = 4,
- .dcfclk_mhz = 1200.0,
- .fabricclk_mhz = 1200.0,
- .dispclk_mhz = 1284.0,
- .dppclk_mhz = 1284.0,
- .phyclk_mhz = 810.0,
- .socclk_mhz = 1200.0,
- .dscclk_mhz = 428.0,
- .dram_speed_mts = 16000.0,
- },
- /*Extra state, no dispclk ramping*/
- {
- .state = 5,
- .dcfclk_mhz = 1200.0,
- .fabricclk_mhz = 1200.0,
- .dispclk_mhz = 1284.0,
- .dppclk_mhz = 1284.0,
- .phyclk_mhz = 810.0,
- .socclk_mhz = 1200.0,
- .dscclk_mhz = 428.0,
- .dram_speed_mts = 16000.0,
- },
- },
- .num_states = 5,
- .sr_exit_time_us = 8.6,
- .sr_enter_plus_exit_time_us = 10.9,
- .urgent_latency_us = 4.0,
- .urgent_latency_pixel_data_only_us = 4.0,
- .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
- .urgent_latency_vm_data_only_us = 4.0,
- .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
- .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
- .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
- .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 40.0,
- .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 40.0,
- .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
- .max_avg_sdp_bw_use_normal_percent = 40.0,
- .max_avg_dram_bw_use_normal_percent = 40.0,
- .writeback_latency_us = 12.0,
- .ideal_dram_bw_after_urgent_percent = 40.0,
- .max_request_size_bytes = 256,
- .dram_channel_width_bytes = 2,
- .fabric_datapath_to_dcn_data_return_bytes = 64,
- .dcn_downspread_percent = 0.5,
- .downspread_percent = 0.38,
- .dram_page_open_time_ns = 50.0,
- .dram_rw_turnaround_time_ns = 17.5,
- .dram_return_buffer_per_channel_bytes = 8192,
- .round_trip_ping_latency_dcfclk_cycles = 131,
- .urgent_out_of_order_return_per_channel_bytes = 256,
- .channel_interleave_bytes = 256,
- .num_banks = 8,
- .num_chans = 16,
- .vmm_page_size_bytes = 4096,
- .dram_clock_change_latency_us = 404.0,
- .dummy_pstate_latency_us = 5.0,
- .writeback_dram_clock_change_latency_us = 23.0,
- .return_bus_width_bytes = 64,
- .dispclk_dppclk_vco_speed_mhz = 3850,
- .xfc_bus_transport_time_us = 20,
- .xfc_xbuf_latency_tolerance_us = 4,
- .use_urgent_burst_bw = 0
-};
-
-static struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv14_soc = {
- .clock_limits = {
- {
- .state = 0,
- .dcfclk_mhz = 560.0,
- .fabricclk_mhz = 560.0,
- .dispclk_mhz = 513.0,
- .dppclk_mhz = 513.0,
- .phyclk_mhz = 540.0,
- .socclk_mhz = 560.0,
- .dscclk_mhz = 171.0,
- .dram_speed_mts = 8960.0,
- },
- {
- .state = 1,
- .dcfclk_mhz = 694.0,
- .fabricclk_mhz = 694.0,
- .dispclk_mhz = 642.0,
- .dppclk_mhz = 642.0,
- .phyclk_mhz = 600.0,
- .socclk_mhz = 694.0,
- .dscclk_mhz = 214.0,
- .dram_speed_mts = 11104.0,
- },
- {
- .state = 2,
- .dcfclk_mhz = 875.0,
- .fabricclk_mhz = 875.0,
- .dispclk_mhz = 734.0,
- .dppclk_mhz = 734.0,
- .phyclk_mhz = 810.0,
- .socclk_mhz = 875.0,
- .dscclk_mhz = 245.0,
- .dram_speed_mts = 14000.0,
- },
- {
- .state = 3,
- .dcfclk_mhz = 1000.0,
- .fabricclk_mhz = 1000.0,
- .dispclk_mhz = 1100.0,
- .dppclk_mhz = 1100.0,
- .phyclk_mhz = 810.0,
- .socclk_mhz = 1000.0,
- .dscclk_mhz = 367.0,
- .dram_speed_mts = 16000.0,
- },
- {
- .state = 4,
- .dcfclk_mhz = 1200.0,
- .fabricclk_mhz = 1200.0,
- .dispclk_mhz = 1284.0,
- .dppclk_mhz = 1284.0,
- .phyclk_mhz = 810.0,
- .socclk_mhz = 1200.0,
- .dscclk_mhz = 428.0,
- .dram_speed_mts = 16000.0,
- },
- /*Extra state, no dispclk ramping*/
- {
- .state = 5,
- .dcfclk_mhz = 1200.0,
- .fabricclk_mhz = 1200.0,
- .dispclk_mhz = 1284.0,
- .dppclk_mhz = 1284.0,
- .phyclk_mhz = 810.0,
- .socclk_mhz = 1200.0,
- .dscclk_mhz = 428.0,
- .dram_speed_mts = 16000.0,
- },
- },
- .num_states = 5,
- .sr_exit_time_us = 11.6,
- .sr_enter_plus_exit_time_us = 13.9,
- .urgent_latency_us = 4.0,
- .urgent_latency_pixel_data_only_us = 4.0,
- .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
- .urgent_latency_vm_data_only_us = 4.0,
- .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
- .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
- .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
- .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 40.0,
- .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 40.0,
- .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
- .max_avg_sdp_bw_use_normal_percent = 40.0,
- .max_avg_dram_bw_use_normal_percent = 40.0,
- .writeback_latency_us = 12.0,
- .ideal_dram_bw_after_urgent_percent = 40.0,
- .max_request_size_bytes = 256,
- .dram_channel_width_bytes = 2,
- .fabric_datapath_to_dcn_data_return_bytes = 64,
- .dcn_downspread_percent = 0.5,
- .downspread_percent = 0.38,
- .dram_page_open_time_ns = 50.0,
- .dram_rw_turnaround_time_ns = 17.5,
- .dram_return_buffer_per_channel_bytes = 8192,
- .round_trip_ping_latency_dcfclk_cycles = 131,
- .urgent_out_of_order_return_per_channel_bytes = 256,
- .channel_interleave_bytes = 256,
- .num_banks = 8,
- .num_chans = 8,
- .vmm_page_size_bytes = 4096,
- .dram_clock_change_latency_us = 404.0,
- .dummy_pstate_latency_us = 5.0,
- .writeback_dram_clock_change_latency_us = 23.0,
- .return_bus_width_bytes = 64,
- .dispclk_dppclk_vco_speed_mhz = 3850,
- .xfc_bus_transport_time_us = 20,
- .xfc_xbuf_latency_tolerance_us = 4,
- .use_urgent_burst_bw = 0
-};
-
-static struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv12_soc = { 0 };
-
-#ifndef mmDP0_DP_DPHY_INTERNAL_CTRL
- #define mmDP0_DP_DPHY_INTERNAL_CTRL 0x210f
- #define mmDP0_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2
- #define mmDP1_DP_DPHY_INTERNAL_CTRL 0x220f
- #define mmDP1_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2
- #define mmDP2_DP_DPHY_INTERNAL_CTRL 0x230f
- #define mmDP2_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2
- #define mmDP3_DP_DPHY_INTERNAL_CTRL 0x240f
- #define mmDP3_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2
- #define mmDP4_DP_DPHY_INTERNAL_CTRL 0x250f
- #define mmDP4_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2
- #define mmDP5_DP_DPHY_INTERNAL_CTRL 0x260f
- #define mmDP5_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2
- #define mmDP6_DP_DPHY_INTERNAL_CTRL 0x270f
- #define mmDP6_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2
-#endif
-
-
-enum dcn20_clk_src_array_id {
- DCN20_CLK_SRC_PLL0,
- DCN20_CLK_SRC_PLL1,
- DCN20_CLK_SRC_PLL2,
- DCN20_CLK_SRC_PLL3,
- DCN20_CLK_SRC_PLL4,
- DCN20_CLK_SRC_PLL5,
- DCN20_CLK_SRC_TOTAL
-};
-
-/* begin *********************
- * macros to expend register list macro defined in HW object header file */
-
-/* DCN */
-/* TODO awful hack. fixup dcn20_dwb.h */
-#undef BASE_INNER
-#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
-
-#define BASE(seg) BASE_INNER(seg)
-
-#define SR(reg_name)\
- .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \
- mm ## reg_name
-
-#define SRI(reg_name, block, id)\
- .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
- mm ## block ## id ## _ ## reg_name
-
-#define SRIR(var_name, reg_name, block, id)\
- .var_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
- mm ## block ## id ## _ ## reg_name
-
-#define SRII(reg_name, block, id)\
- .reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
- mm ## block ## id ## _ ## reg_name
-
-#define DCCG_SRII(reg_name, block, id)\
- .block ## _ ## reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
- mm ## block ## id ## _ ## reg_name
-
-#define VUPDATE_SRII(reg_name, block, id)\
- .reg_name[id] = BASE(mm ## reg_name ## _ ## block ## id ## _BASE_IDX) + \
- mm ## reg_name ## _ ## block ## id
-
-/* NBIO */
-#define NBIO_BASE_INNER(seg) \
- NBIO_BASE__INST0_SEG ## seg
-
-#define NBIO_BASE(seg) \
- NBIO_BASE_INNER(seg)
-
-#define NBIO_SR(reg_name)\
- .reg_name = NBIO_BASE(mm ## reg_name ## _BASE_IDX) + \
- mm ## reg_name
-
-/* MMHUB */
-#define MMHUB_BASE_INNER(seg) \
- MMHUB_BASE__INST0_SEG ## seg
-
-#define MMHUB_BASE(seg) \
- MMHUB_BASE_INNER(seg)
-
-#define MMHUB_SR(reg_name)\
- .reg_name = MMHUB_BASE(mmMM ## reg_name ## _BASE_IDX) + \
- mmMM ## reg_name
-
-static const struct bios_registers bios_regs = {
- NBIO_SR(BIOS_SCRATCH_3),
- NBIO_SR(BIOS_SCRATCH_6)
-};
-
-#define clk_src_regs(index, pllid)\
-[index] = {\
- CS_COMMON_REG_LIST_DCN2_0(index, pllid),\
-}
-
-static const struct dce110_clk_src_regs clk_src_regs[] = {
- clk_src_regs(0, A),
- clk_src_regs(1, B),
- clk_src_regs(2, C),
- clk_src_regs(3, D),
- clk_src_regs(4, E),
- clk_src_regs(5, F)
-};
-
-static const struct dce110_clk_src_shift cs_shift = {
- CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT)
-};
-
-static const struct dce110_clk_src_mask cs_mask = {
- CS_COMMON_MASK_SH_LIST_DCN2_0(_MASK)
-};
-
-static const struct dce_dmcu_registers dmcu_regs = {
- DMCU_DCN10_REG_LIST()
-};
-
-static const struct dce_dmcu_shift dmcu_shift = {
- DMCU_MASK_SH_LIST_DCN10(__SHIFT)
-};
-
-static const struct dce_dmcu_mask dmcu_mask = {
- DMCU_MASK_SH_LIST_DCN10(_MASK)
-};
-
-static const struct dce_abm_registers abm_regs = {
- ABM_DCN20_REG_LIST()
-};
-
-static const struct dce_abm_shift abm_shift = {
- ABM_MASK_SH_LIST_DCN20(__SHIFT)
-};
-
-static const struct dce_abm_mask abm_mask = {
- ABM_MASK_SH_LIST_DCN20(_MASK)
-};
-
-#define audio_regs(id)\
-[id] = {\
- AUD_COMMON_REG_LIST(id)\
-}
-
-static const struct dce_audio_registers audio_regs[] = {
- audio_regs(0),
- audio_regs(1),
- audio_regs(2),
- audio_regs(3),
- audio_regs(4),
- audio_regs(5),
- audio_regs(6),
-};
-
-#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\
- SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\
- SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\
- AUD_COMMON_MASK_SH_LIST_BASE(mask_sh)
-
-static const struct dce_audio_shift audio_shift = {
- DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dce_audio_mask audio_mask = {
- DCE120_AUD_COMMON_MASK_SH_LIST(_MASK)
-};
-
-#define stream_enc_regs(id)\
-[id] = {\
- SE_DCN2_REG_LIST(id)\
-}
-
-static const struct dcn10_stream_enc_registers stream_enc_regs[] = {
- stream_enc_regs(0),
- stream_enc_regs(1),
- stream_enc_regs(2),
- stream_enc_regs(3),
- stream_enc_regs(4),
- stream_enc_regs(5),
-};
-
-static const struct dcn10_stream_encoder_shift se_shift = {
- SE_COMMON_MASK_SH_LIST_DCN20(__SHIFT)
-};
-
-static const struct dcn10_stream_encoder_mask se_mask = {
- SE_COMMON_MASK_SH_LIST_DCN20(_MASK)
-};
-
-
-#define aux_regs(id)\
-[id] = {\
- DCN2_AUX_REG_LIST(id)\
-}
-
-static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = {
- aux_regs(0),
- aux_regs(1),
- aux_regs(2),
- aux_regs(3),
- aux_regs(4),
- aux_regs(5)
-};
-
-#define hpd_regs(id)\
-[id] = {\
- HPD_REG_LIST(id)\
-}
-
-static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = {
- hpd_regs(0),
- hpd_regs(1),
- hpd_regs(2),
- hpd_regs(3),
- hpd_regs(4),
- hpd_regs(5)
-};
-
-#define link_regs(id, phyid)\
-[id] = {\
- LE_DCN10_REG_LIST(id), \
- UNIPHY_DCN2_REG_LIST(phyid), \
- DPCS_DCN2_REG_LIST(id), \
- SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \
-}
-
-static const struct dcn10_link_enc_registers link_enc_regs[] = {
- link_regs(0, A),
- link_regs(1, B),
- link_regs(2, C),
- link_regs(3, D),
- link_regs(4, E),
- link_regs(5, F)
-};
-
-static const struct dcn10_link_enc_shift le_shift = {
- LINK_ENCODER_MASK_SH_LIST_DCN20(__SHIFT),\
- DPCS_DCN2_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dcn10_link_enc_mask le_mask = {
- LINK_ENCODER_MASK_SH_LIST_DCN20(_MASK),\
- DPCS_DCN2_MASK_SH_LIST(_MASK)
-};
-
-static const struct dce_panel_cntl_registers panel_cntl_regs[] = {
- { DCN_PANEL_CNTL_REG_LIST() }
-};
-
-static const struct dce_panel_cntl_shift panel_cntl_shift = {
- DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dce_panel_cntl_mask panel_cntl_mask = {
- DCE_PANEL_CNTL_MASK_SH_LIST(_MASK)
-};
-
-#define ipp_regs(id)\
-[id] = {\
- IPP_REG_LIST_DCN20(id),\
-}
-
-static const struct dcn10_ipp_registers ipp_regs[] = {
- ipp_regs(0),
- ipp_regs(1),
- ipp_regs(2),
- ipp_regs(3),
- ipp_regs(4),
- ipp_regs(5),
-};
-
-static const struct dcn10_ipp_shift ipp_shift = {
- IPP_MASK_SH_LIST_DCN20(__SHIFT)
-};
-
-static const struct dcn10_ipp_mask ipp_mask = {
- IPP_MASK_SH_LIST_DCN20(_MASK),
-};
-
-#define opp_regs(id)\
-[id] = {\
- OPP_REG_LIST_DCN20(id),\
-}
-
-static const struct dcn20_opp_registers opp_regs[] = {
- opp_regs(0),
- opp_regs(1),
- opp_regs(2),
- opp_regs(3),
- opp_regs(4),
- opp_regs(5),
-};
-
-static const struct dcn20_opp_shift opp_shift = {
- OPP_MASK_SH_LIST_DCN20(__SHIFT)
-};
-
-static const struct dcn20_opp_mask opp_mask = {
- OPP_MASK_SH_LIST_DCN20(_MASK)
-};
-
-#define aux_engine_regs(id)\
-[id] = {\
- AUX_COMMON_REG_LIST0(id), \
- .AUXN_IMPCAL = 0, \
- .AUXP_IMPCAL = 0, \
- .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK, \
-}
-
-static const struct dce110_aux_registers aux_engine_regs[] = {
- aux_engine_regs(0),
- aux_engine_regs(1),
- aux_engine_regs(2),
- aux_engine_regs(3),
- aux_engine_regs(4),
- aux_engine_regs(5)
-};
-
-#define tf_regs(id)\
-[id] = {\
- TF_REG_LIST_DCN20(id),\
- TF_REG_LIST_DCN20_COMMON_APPEND(id),\
-}
-
-static const struct dcn2_dpp_registers tf_regs[] = {
- tf_regs(0),
- tf_regs(1),
- tf_regs(2),
- tf_regs(3),
- tf_regs(4),
- tf_regs(5),
-};
-
-static const struct dcn2_dpp_shift tf_shift = {
- TF_REG_LIST_SH_MASK_DCN20(__SHIFT),
- TF_DEBUG_REG_LIST_SH_DCN20
-};
-
-static const struct dcn2_dpp_mask tf_mask = {
- TF_REG_LIST_SH_MASK_DCN20(_MASK),
- TF_DEBUG_REG_LIST_MASK_DCN20
-};
-
-#define dwbc_regs_dcn2(id)\
-[id] = {\
- DWBC_COMMON_REG_LIST_DCN2_0(id),\
- }
-
-static const struct dcn20_dwbc_registers dwbc20_regs[] = {
- dwbc_regs_dcn2(0),
-};
-
-static const struct dcn20_dwbc_shift dwbc20_shift = {
- DWBC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT)
-};
-
-static const struct dcn20_dwbc_mask dwbc20_mask = {
- DWBC_COMMON_MASK_SH_LIST_DCN2_0(_MASK)
-};
-
-#define mcif_wb_regs_dcn2(id)\
-[id] = {\
- MCIF_WB_COMMON_REG_LIST_DCN2_0(id),\
- }
-
-static const struct dcn20_mmhubbub_registers mcif_wb20_regs[] = {
- mcif_wb_regs_dcn2(0),
-};
-
-static const struct dcn20_mmhubbub_shift mcif_wb20_shift = {
- MCIF_WB_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT)
-};
-
-static const struct dcn20_mmhubbub_mask mcif_wb20_mask = {
- MCIF_WB_COMMON_MASK_SH_LIST_DCN2_0(_MASK)
-};
-
-static const struct dcn20_mpc_registers mpc_regs = {
- MPC_REG_LIST_DCN2_0(0),
- MPC_REG_LIST_DCN2_0(1),
- MPC_REG_LIST_DCN2_0(2),
- MPC_REG_LIST_DCN2_0(3),
- MPC_REG_LIST_DCN2_0(4),
- MPC_REG_LIST_DCN2_0(5),
- MPC_OUT_MUX_REG_LIST_DCN2_0(0),
- MPC_OUT_MUX_REG_LIST_DCN2_0(1),
- MPC_OUT_MUX_REG_LIST_DCN2_0(2),
- MPC_OUT_MUX_REG_LIST_DCN2_0(3),
- MPC_OUT_MUX_REG_LIST_DCN2_0(4),
- MPC_OUT_MUX_REG_LIST_DCN2_0(5),
- MPC_DBG_REG_LIST_DCN2_0()
-};
-
-static const struct dcn20_mpc_shift mpc_shift = {
- MPC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT),
- MPC_DEBUG_REG_LIST_SH_DCN20
-};
-
-static const struct dcn20_mpc_mask mpc_mask = {
- MPC_COMMON_MASK_SH_LIST_DCN2_0(_MASK),
- MPC_DEBUG_REG_LIST_MASK_DCN20
-};
-
-#define tg_regs(id)\
-[id] = {TG_COMMON_REG_LIST_DCN2_0(id)}
-
-
-static const struct dcn_optc_registers tg_regs[] = {
- tg_regs(0),
- tg_regs(1),
- tg_regs(2),
- tg_regs(3),
- tg_regs(4),
- tg_regs(5)
-};
-
-static const struct dcn_optc_shift tg_shift = {
- TG_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT)
-};
-
-static const struct dcn_optc_mask tg_mask = {
- TG_COMMON_MASK_SH_LIST_DCN2_0(_MASK)
-};
-
-#define hubp_regs(id)\
-[id] = {\
- HUBP_REG_LIST_DCN20(id)\
-}
-
-static const struct dcn_hubp2_registers hubp_regs[] = {
- hubp_regs(0),
- hubp_regs(1),
- hubp_regs(2),
- hubp_regs(3),
- hubp_regs(4),
- hubp_regs(5)
-};
-
-static const struct dcn_hubp2_shift hubp_shift = {
- HUBP_MASK_SH_LIST_DCN20(__SHIFT)
-};
-
-static const struct dcn_hubp2_mask hubp_mask = {
- HUBP_MASK_SH_LIST_DCN20(_MASK)
-};
-
-static const struct dcn_hubbub_registers hubbub_reg = {
- HUBBUB_REG_LIST_DCN20(0)
-};
-
-static const struct dcn_hubbub_shift hubbub_shift = {
- HUBBUB_MASK_SH_LIST_DCN20(__SHIFT)
-};
-
-static const struct dcn_hubbub_mask hubbub_mask = {
- HUBBUB_MASK_SH_LIST_DCN20(_MASK)
-};
-
-#define vmid_regs(id)\
-[id] = {\
- DCN20_VMID_REG_LIST(id)\
-}
-
-static const struct dcn_vmid_registers vmid_regs[] = {
- vmid_regs(0),
- vmid_regs(1),
- vmid_regs(2),
- vmid_regs(3),
- vmid_regs(4),
- vmid_regs(5),
- vmid_regs(6),
- vmid_regs(7),
- vmid_regs(8),
- vmid_regs(9),
- vmid_regs(10),
- vmid_regs(11),
- vmid_regs(12),
- vmid_regs(13),
- vmid_regs(14),
- vmid_regs(15)
-};
-
-static const struct dcn20_vmid_shift vmid_shifts = {
- DCN20_VMID_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dcn20_vmid_mask vmid_masks = {
- DCN20_VMID_MASK_SH_LIST(_MASK)
-};
-
-static const struct dce110_aux_registers_shift aux_shift = {
- DCN_AUX_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dce110_aux_registers_mask aux_mask = {
- DCN_AUX_MASK_SH_LIST(_MASK)
-};
-
-static int map_transmitter_id_to_phy_instance(
- enum transmitter transmitter)
-{
- switch (transmitter) {
- case TRANSMITTER_UNIPHY_A:
- return 0;
- break;
- case TRANSMITTER_UNIPHY_B:
- return 1;
- break;
- case TRANSMITTER_UNIPHY_C:
- return 2;
- break;
- case TRANSMITTER_UNIPHY_D:
- return 3;
- break;
- case TRANSMITTER_UNIPHY_E:
- return 4;
- break;
- case TRANSMITTER_UNIPHY_F:
- return 5;
- break;
- default:
- ASSERT(0);
- return 0;
- }
-}
-
-#define dsc_regsDCN20(id)\
-[id] = {\
- DSC_REG_LIST_DCN20(id)\
-}
-
-static const struct dcn20_dsc_registers dsc_regs[] = {
- dsc_regsDCN20(0),
- dsc_regsDCN20(1),
- dsc_regsDCN20(2),
- dsc_regsDCN20(3),
- dsc_regsDCN20(4),
- dsc_regsDCN20(5)
-};
-
-static const struct dcn20_dsc_shift dsc_shift = {
- DSC_REG_LIST_SH_MASK_DCN20(__SHIFT)
-};
-
-static const struct dcn20_dsc_mask dsc_mask = {
- DSC_REG_LIST_SH_MASK_DCN20(_MASK)
-};
-
-static const struct dccg_registers dccg_regs = {
- DCCG_REG_LIST_DCN2()
-};
-
-static const struct dccg_shift dccg_shift = {
- DCCG_MASK_SH_LIST_DCN2(__SHIFT)
-};
-
-static const struct dccg_mask dccg_mask = {
- DCCG_MASK_SH_LIST_DCN2(_MASK)
-};
-
-static const struct resource_caps res_cap_nv10 = {
- .num_timing_generator = 6,
- .num_opp = 6,
- .num_video_plane = 6,
- .num_audio = 7,
- .num_stream_encoder = 6,
- .num_pll = 6,
- .num_dwb = 1,
- .num_ddc = 6,
- .num_vmid = 16,
- .num_dsc = 6,
-};
-
-static const struct dc_plane_cap plane_cap = {
- .type = DC_PLANE_TYPE_DCN_UNIVERSAL,
- .blends_with_above = true,
- .blends_with_below = true,
- .per_pixel_alpha = true,
-
- .pixel_format_support = {
- .argb8888 = true,
- .nv12 = true,
- .fp16 = true,
- .p010 = true
- },
-
- .max_upscale_factor = {
- .argb8888 = 16000,
- .nv12 = 16000,
- .fp16 = 1
- },
-
- .max_downscale_factor = {
- .argb8888 = 250,
- .nv12 = 250,
- .fp16 = 1
- },
- 16,
- 16
-};
-static const struct resource_caps res_cap_nv14 = {
- .num_timing_generator = 5,
- .num_opp = 5,
- .num_video_plane = 5,
- .num_audio = 6,
- .num_stream_encoder = 5,
- .num_pll = 5,
- .num_dwb = 1,
- .num_ddc = 5,
- .num_vmid = 16,
- .num_dsc = 5,
-};
-
-static const struct dc_debug_options debug_defaults_drv = {
- .disable_dmcu = false,
- .force_abm_enable = false,
- .timing_trace = false,
- .clock_trace = true,
- .disable_pplib_clock_request = true,
- .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP,
- .force_single_disp_pipe_split = false,
- .disable_dcc = DCC_ENABLE,
- .vsr_support = true,
- .performance_trace = false,
- .max_downscale_src_width = 5120,/*upto 5K*/
- .disable_pplib_wm_range = false,
- .scl_reset_length10 = true,
- .sanity_checks = false,
- .underflow_assert_delay_us = 0xFFFFFFFF,
-};
-
-static const struct dc_debug_options debug_defaults_diags = {
- .disable_dmcu = false,
- .force_abm_enable = false,
- .timing_trace = true,
- .clock_trace = true,
- .disable_dpp_power_gate = true,
- .disable_hubp_power_gate = true,
- .disable_clock_gate = true,
- .disable_pplib_clock_request = true,
- .disable_pplib_wm_range = true,
- .disable_stutter = true,
- .scl_reset_length10 = true,
- .underflow_assert_delay_us = 0xFFFFFFFF,
- .enable_tri_buf = true,
-};
-
-void dcn20_dpp_destroy(struct dpp **dpp)
-{
- kfree(TO_DCN20_DPP(*dpp));
- *dpp = NULL;
-}
-
-struct dpp *dcn20_dpp_create(
- struct dc_context *ctx,
- uint32_t inst)
-{
- struct dcn20_dpp *dpp =
- kzalloc(sizeof(struct dcn20_dpp), GFP_ATOMIC);
-
- if (!dpp)
- return NULL;
-
- if (dpp2_construct(dpp, ctx, inst,
- &tf_regs[inst], &tf_shift, &tf_mask))
- return &dpp->base;
-
- BREAK_TO_DEBUGGER();
- kfree(dpp);
- return NULL;
-}
-
-struct input_pixel_processor *dcn20_ipp_create(
- struct dc_context *ctx, uint32_t inst)
-{
- struct dcn10_ipp *ipp =
- kzalloc(sizeof(struct dcn10_ipp), GFP_ATOMIC);
-
- if (!ipp) {
- BREAK_TO_DEBUGGER();
- return NULL;
- }
-
- dcn20_ipp_construct(ipp, ctx, inst,
- &ipp_regs[inst], &ipp_shift, &ipp_mask);
- return &ipp->base;
-}
-
-
-struct output_pixel_processor *dcn20_opp_create(
- struct dc_context *ctx, uint32_t inst)
-{
- struct dcn20_opp *opp =
- kzalloc(sizeof(struct dcn20_opp), GFP_ATOMIC);
-
- if (!opp) {
- BREAK_TO_DEBUGGER();
- return NULL;
- }
-
- dcn20_opp_construct(opp, ctx, inst,
- &opp_regs[inst], &opp_shift, &opp_mask);
- return &opp->base;
-}
-
-struct dce_aux *dcn20_aux_engine_create(
- struct dc_context *ctx,
- uint32_t inst)
-{
- struct aux_engine_dce110 *aux_engine =
- kzalloc(sizeof(struct aux_engine_dce110), GFP_ATOMIC);
-
- if (!aux_engine)
- return NULL;
-
- dce110_aux_engine_construct(aux_engine, ctx, inst,
- SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD,
- &aux_engine_regs[inst],
- &aux_mask,
- &aux_shift,
- ctx->dc->caps.extended_aux_timeout_support);
-
- return &aux_engine->base;
-}
-#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) }
-
-static const struct dce_i2c_registers i2c_hw_regs[] = {
- i2c_inst_regs(1),
- i2c_inst_regs(2),
- i2c_inst_regs(3),
- i2c_inst_regs(4),
- i2c_inst_regs(5),
- i2c_inst_regs(6),
-};
-
-static const struct dce_i2c_shift i2c_shifts = {
- I2C_COMMON_MASK_SH_LIST_DCN2(__SHIFT)
-};
-
-static const struct dce_i2c_mask i2c_masks = {
- I2C_COMMON_MASK_SH_LIST_DCN2(_MASK)
-};
-
-struct dce_i2c_hw *dcn20_i2c_hw_create(
- struct dc_context *ctx,
- uint32_t inst)
-{
- struct dce_i2c_hw *dce_i2c_hw =
- kzalloc(sizeof(struct dce_i2c_hw), GFP_ATOMIC);
-
- if (!dce_i2c_hw)
- return NULL;
-
- dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst,
- &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks);
-
- return dce_i2c_hw;
-}
-struct mpc *dcn20_mpc_create(struct dc_context *ctx)
-{
- struct dcn20_mpc *mpc20 = kzalloc(sizeof(struct dcn20_mpc),
- GFP_ATOMIC);
-
- if (!mpc20)
- return NULL;
-
- dcn20_mpc_construct(mpc20, ctx,
- &mpc_regs,
- &mpc_shift,
- &mpc_mask,
- 6);
-
- return &mpc20->base;
-}
-
-struct hubbub *dcn20_hubbub_create(struct dc_context *ctx)
-{
- int i;
- struct dcn20_hubbub *hubbub = kzalloc(sizeof(struct dcn20_hubbub),
- GFP_ATOMIC);
-
- if (!hubbub)
- return NULL;
-
- hubbub2_construct(hubbub, ctx,
- &hubbub_reg,
- &hubbub_shift,
- &hubbub_mask);
-
- for (i = 0; i < res_cap_nv10.num_vmid; i++) {
- struct dcn20_vmid *vmid = &hubbub->vmid[i];
-
- vmid->ctx = ctx;
-
- vmid->regs = &vmid_regs[i];
- vmid->shifts = &vmid_shifts;
- vmid->masks = &vmid_masks;
- }
-
- return &hubbub->base;
-}
-
-struct timing_generator *dcn20_timing_generator_create(
- struct dc_context *ctx,
- uint32_t instance)
-{
- struct optc *tgn10 =
- kzalloc(sizeof(struct optc), GFP_ATOMIC);
-
- if (!tgn10)
- return NULL;
-
- tgn10->base.inst = instance;
- tgn10->base.ctx = ctx;
-
- tgn10->tg_regs = &tg_regs[instance];
- tgn10->tg_shift = &tg_shift;
- tgn10->tg_mask = &tg_mask;
-
- dcn20_timing_generator_init(tgn10);
-
- return &tgn10->base;
-}
-
-static const struct encoder_feature_support link_enc_feature = {
- .max_hdmi_deep_color = COLOR_DEPTH_121212,
- .max_hdmi_pixel_clock = 600000,
- .hdmi_ycbcr420_supported = true,
- .dp_ycbcr420_supported = true,
- .fec_supported = true,
- .flags.bits.IS_HBR2_CAPABLE = true,
- .flags.bits.IS_HBR3_CAPABLE = true,
- .flags.bits.IS_TPS3_CAPABLE = true,
- .flags.bits.IS_TPS4_CAPABLE = true
-};
-
-struct link_encoder *dcn20_link_encoder_create(
- const struct encoder_init_data *enc_init_data)
-{
- struct dcn20_link_encoder *enc20 =
- kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL);
- int link_regs_id;
-
- if (!enc20)
- return NULL;
-
- link_regs_id =
- map_transmitter_id_to_phy_instance(enc_init_data->transmitter);
-
- dcn20_link_encoder_construct(enc20,
- enc_init_data,
- &link_enc_feature,
- &link_enc_regs[link_regs_id],
- &link_enc_aux_regs[enc_init_data->channel - 1],
- &link_enc_hpd_regs[enc_init_data->hpd_source],
- &le_shift,
- &le_mask);
-
- return &enc20->enc10.base;
-}
-
-static struct panel_cntl *dcn20_panel_cntl_create(const struct panel_cntl_init_data *init_data)
-{
- struct dce_panel_cntl *panel_cntl =
- kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL);
-
- if (!panel_cntl)
- return NULL;
-
- dce_panel_cntl_construct(panel_cntl,
- init_data,
- &panel_cntl_regs[init_data->inst],
- &panel_cntl_shift,
- &panel_cntl_mask);
-
- return &panel_cntl->base;
-}
-
-static struct clock_source *dcn20_clock_source_create(
- struct dc_context *ctx,
- struct dc_bios *bios,
- enum clock_source_id id,
- const struct dce110_clk_src_regs *regs,
- bool dp_clk_src)
-{
- struct dce110_clk_src *clk_src =
- kzalloc(sizeof(struct dce110_clk_src), GFP_ATOMIC);
-
- if (!clk_src)
- return NULL;
-
- if (dcn20_clk_src_construct(clk_src, ctx, bios, id,
- regs, &cs_shift, &cs_mask)) {
- clk_src->base.dp_clk_src = dp_clk_src;
- return &clk_src->base;
- }
-
- kfree(clk_src);
- BREAK_TO_DEBUGGER();
- return NULL;
-}
-
-static void read_dce_straps(
- struct dc_context *ctx,
- struct resource_straps *straps)
-{
- generic_reg_get(ctx, mmDC_PINSTRAPS + BASE(mmDC_PINSTRAPS_BASE_IDX),
- FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio);
-}
-
-static struct audio *dcn20_create_audio(
- struct dc_context *ctx, unsigned int inst)
-{
- return dce_audio_create(ctx, inst,
- &audio_regs[inst], &audio_shift, &audio_mask);
-}
-
-struct stream_encoder *dcn20_stream_encoder_create(
- enum engine_id eng_id,
- struct dc_context *ctx)
-{
- struct dcn10_stream_encoder *enc1 =
- kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL);
-
- if (!enc1)
- return NULL;
-
- if (ASICREV_IS_NAVI14_M(ctx->asic_id.hw_internal_rev)) {
- if (eng_id >= ENGINE_ID_DIGD)
- eng_id++;
- }
-
- dcn20_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id,
- &stream_enc_regs[eng_id],
- &se_shift, &se_mask);
-
- return &enc1->base;
-}
-
-static const struct dce_hwseq_registers hwseq_reg = {
- HWSEQ_DCN2_REG_LIST()
-};
-
-static const struct dce_hwseq_shift hwseq_shift = {
- HWSEQ_DCN2_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dce_hwseq_mask hwseq_mask = {
- HWSEQ_DCN2_MASK_SH_LIST(_MASK)
-};
-
-struct dce_hwseq *dcn20_hwseq_create(
- struct dc_context *ctx)
-{
- struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL);
-
- if (hws) {
- hws->ctx = ctx;
- hws->regs = &hwseq_reg;
- hws->shifts = &hwseq_shift;
- hws->masks = &hwseq_mask;
- }
- return hws;
-}
-
-static const struct resource_create_funcs res_create_funcs = {
- .read_dce_straps = read_dce_straps,
- .create_audio = dcn20_create_audio,
- .create_stream_encoder = dcn20_stream_encoder_create,
- .create_hwseq = dcn20_hwseq_create,
-};
-
-static const struct resource_create_funcs res_create_maximus_funcs = {
- .read_dce_straps = NULL,
- .create_audio = NULL,
- .create_stream_encoder = NULL,
- .create_hwseq = dcn20_hwseq_create,
-};
-
-static void dcn20_pp_smu_destroy(struct pp_smu_funcs **pp_smu);
-
-void dcn20_clock_source_destroy(struct clock_source **clk_src)
-{
- kfree(TO_DCE110_CLK_SRC(*clk_src));
- *clk_src = NULL;
-}
-
-
-struct display_stream_compressor *dcn20_dsc_create(
- struct dc_context *ctx, uint32_t inst)
-{
- struct dcn20_dsc *dsc =
- kzalloc(sizeof(struct dcn20_dsc), GFP_ATOMIC);
-
- if (!dsc) {
- BREAK_TO_DEBUGGER();
- return NULL;
- }
-
- dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask);
- return &dsc->base;
-}
-
-void dcn20_dsc_destroy(struct display_stream_compressor **dsc)
-{
- kfree(container_of(*dsc, struct dcn20_dsc, base));
- *dsc = NULL;
-}
-
-
-static void dcn20_resource_destruct(struct dcn20_resource_pool *pool)
-{
- unsigned int i;
-
- for (i = 0; i < pool->base.stream_enc_count; i++) {
- if (pool->base.stream_enc[i] != NULL) {
- kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i]));
- pool->base.stream_enc[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->base.res_cap->num_dsc; i++) {
- if (pool->base.dscs[i] != NULL)
- dcn20_dsc_destroy(&pool->base.dscs[i]);
- }
-
- if (pool->base.mpc != NULL) {
- kfree(TO_DCN20_MPC(pool->base.mpc));
- pool->base.mpc = NULL;
- }
- if (pool->base.hubbub != NULL) {
- kfree(pool->base.hubbub);
- pool->base.hubbub = NULL;
- }
- for (i = 0; i < pool->base.pipe_count; i++) {
- if (pool->base.dpps[i] != NULL)
- dcn20_dpp_destroy(&pool->base.dpps[i]);
-
- if (pool->base.ipps[i] != NULL)
- pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]);
-
- if (pool->base.hubps[i] != NULL) {
- kfree(TO_DCN20_HUBP(pool->base.hubps[i]));
- pool->base.hubps[i] = NULL;
- }
-
- if (pool->base.irqs != NULL) {
- dal_irq_service_destroy(&pool->base.irqs);
- }
- }
-
- for (i = 0; i < pool->base.res_cap->num_ddc; i++) {
- if (pool->base.engines[i] != NULL)
- dce110_engine_destroy(&pool->base.engines[i]);
- if (pool->base.hw_i2cs[i] != NULL) {
- kfree(pool->base.hw_i2cs[i]);
- pool->base.hw_i2cs[i] = NULL;
- }
- if (pool->base.sw_i2cs[i] != NULL) {
- kfree(pool->base.sw_i2cs[i]);
- pool->base.sw_i2cs[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->base.res_cap->num_opp; i++) {
- if (pool->base.opps[i] != NULL)
- pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]);
- }
-
- for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
- if (pool->base.timing_generators[i] != NULL) {
- kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i]));
- pool->base.timing_generators[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->base.res_cap->num_dwb; i++) {
- if (pool->base.dwbc[i] != NULL) {
- kfree(TO_DCN20_DWBC(pool->base.dwbc[i]));
- pool->base.dwbc[i] = NULL;
- }
- if (pool->base.mcif_wb[i] != NULL) {
- kfree(TO_DCN20_MMHUBBUB(pool->base.mcif_wb[i]));
- pool->base.mcif_wb[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->base.audio_count; i++) {
- if (pool->base.audios[i])
- dce_aud_destroy(&pool->base.audios[i]);
- }
-
- for (i = 0; i < pool->base.clk_src_count; i++) {
- if (pool->base.clock_sources[i] != NULL) {
- dcn20_clock_source_destroy(&pool->base.clock_sources[i]);
- pool->base.clock_sources[i] = NULL;
- }
- }
-
- if (pool->base.dp_clock_source != NULL) {
- dcn20_clock_source_destroy(&pool->base.dp_clock_source);
- pool->base.dp_clock_source = NULL;
- }
-
-
- if (pool->base.abm != NULL)
- dce_abm_destroy(&pool->base.abm);
-
- if (pool->base.dmcu != NULL)
- dce_dmcu_destroy(&pool->base.dmcu);
-
- if (pool->base.dccg != NULL)
- dcn_dccg_destroy(&pool->base.dccg);
-
- if (pool->base.pp_smu != NULL)
- dcn20_pp_smu_destroy(&pool->base.pp_smu);
-
- if (pool->base.oem_device != NULL)
- dal_ddc_service_destroy(&pool->base.oem_device);
-}
-
-struct hubp *dcn20_hubp_create(
- struct dc_context *ctx,
- uint32_t inst)
-{
- struct dcn20_hubp *hubp2 =
- kzalloc(sizeof(struct dcn20_hubp), GFP_ATOMIC);
-
- if (!hubp2)
- return NULL;
-
- if (hubp2_construct(hubp2, ctx, inst,
- &hubp_regs[inst], &hubp_shift, &hubp_mask))
- return &hubp2->base;
-
- BREAK_TO_DEBUGGER();
- kfree(hubp2);
- return NULL;
-}
-
-static void get_pixel_clock_parameters(
- struct pipe_ctx *pipe_ctx,
- struct pixel_clk_params *pixel_clk_params)
-{
- const struct dc_stream_state *stream = pipe_ctx->stream;
- struct pipe_ctx *odm_pipe;
- int opp_cnt = 1;
- struct dc_link *link = stream->link;
- struct link_encoder *link_enc = NULL;
-
- for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
- opp_cnt++;
-
- pixel_clk_params->requested_pix_clk_100hz = stream->timing.pix_clk_100hz;
-
- /* Links supporting dynamically assigned link encoder will be assigned next
- * available encoder if one not already assigned.
- */
- if (link->is_dig_mapping_flexible &&
- link->dc->res_pool->funcs->link_encs_assign) {
- link_enc = link_enc_cfg_get_link_enc_used_by_stream(stream->ctx->dc, stream);
- if (link_enc == NULL)
- link_enc = link_enc_cfg_get_next_avail_link_enc(stream->ctx->dc);
- } else
- link_enc = stream->link->link_enc;
- ASSERT(link_enc);
-
- if (link_enc)
- pixel_clk_params->encoder_object_id = link_enc->id;
- pixel_clk_params->signal_type = pipe_ctx->stream->signal;
- pixel_clk_params->controller_id = pipe_ctx->stream_res.tg->inst + 1;
- /* TODO: un-hardcode*/
- /* TODO - DP2.0 HW: calculate requested_sym_clk for UHBR rates */
- pixel_clk_params->requested_sym_clk = LINK_RATE_LOW *
- LINK_RATE_REF_FREQ_IN_KHZ;
- pixel_clk_params->flags.ENABLE_SS = 0;
- pixel_clk_params->color_depth =
- stream->timing.display_color_depth;
- pixel_clk_params->flags.DISPLAY_BLANKED = 1;
- pixel_clk_params->pixel_encoding = stream->timing.pixel_encoding;
-
- if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR422)
- pixel_clk_params->color_depth = COLOR_DEPTH_888;
-
- if (opp_cnt == 4)
- pixel_clk_params->requested_pix_clk_100hz /= 4;
- else if (optc2_is_two_pixels_per_containter(&stream->timing) || opp_cnt == 2)
- pixel_clk_params->requested_pix_clk_100hz /= 2;
-
- if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING)
- pixel_clk_params->requested_pix_clk_100hz *= 2;
-
-}
-
-static void build_clamping_params(struct dc_stream_state *stream)
-{
- stream->clamping.clamping_level = CLAMPING_FULL_RANGE;
- stream->clamping.c_depth = stream->timing.display_color_depth;
- stream->clamping.pixel_encoding = stream->timing.pixel_encoding;
-}
-
-static enum dc_status build_pipe_hw_param(struct pipe_ctx *pipe_ctx)
-{
-
- get_pixel_clock_parameters(pipe_ctx, &pipe_ctx->stream_res.pix_clk_params);
-
- pipe_ctx->clock_source->funcs->get_pix_clk_dividers(
- pipe_ctx->clock_source,
- &pipe_ctx->stream_res.pix_clk_params,
- &pipe_ctx->pll_settings);
-
- pipe_ctx->stream->clamping.pixel_encoding = pipe_ctx->stream->timing.pixel_encoding;
-
- resource_build_bit_depth_reduction_params(pipe_ctx->stream,
- &pipe_ctx->stream->bit_depth_params);
- build_clamping_params(pipe_ctx->stream);
-
- return DC_OK;
-}
-
-enum dc_status dcn20_build_mapped_resource(const struct dc *dc, struct dc_state *context, struct dc_stream_state *stream)
-{
- enum dc_status status = DC_OK;
- struct pipe_ctx *pipe_ctx = resource_get_head_pipe_for_stream(&context->res_ctx, stream);
-
- if (!pipe_ctx)
- return DC_ERROR_UNEXPECTED;
-
-
- status = build_pipe_hw_param(pipe_ctx);
-
- return status;
-}
-
-
-void dcn20_acquire_dsc(const struct dc *dc,
- struct resource_context *res_ctx,
- struct display_stream_compressor **dsc,
- int pipe_idx)
-{
- int i;
- const struct resource_pool *pool = dc->res_pool;
- struct display_stream_compressor *dsc_old = dc->current_state->res_ctx.pipe_ctx[pipe_idx].stream_res.dsc;
-
- ASSERT(*dsc == NULL); /* If this ASSERT fails, dsc was not released properly */
- *dsc = NULL;
-
- /* Always do 1-to-1 mapping when number of DSCs is same as number of pipes */
- if (pool->res_cap->num_dsc == pool->res_cap->num_opp) {
- *dsc = pool->dscs[pipe_idx];
- res_ctx->is_dsc_acquired[pipe_idx] = true;
- return;
- }
-
- /* Return old DSC to avoid the need for re-programming */
- if (dsc_old && !res_ctx->is_dsc_acquired[dsc_old->inst]) {
- *dsc = dsc_old;
- res_ctx->is_dsc_acquired[dsc_old->inst] = true;
- return ;
- }
-
- /* Find first free DSC */
- for (i = 0; i < pool->res_cap->num_dsc; i++)
- if (!res_ctx->is_dsc_acquired[i]) {
- *dsc = pool->dscs[i];
- res_ctx->is_dsc_acquired[i] = true;
- break;
- }
-}
-
-void dcn20_release_dsc(struct resource_context *res_ctx,
- const struct resource_pool *pool,
- struct display_stream_compressor **dsc)
-{
- int i;
-
- for (i = 0; i < pool->res_cap->num_dsc; i++)
- if (pool->dscs[i] == *dsc) {
- res_ctx->is_dsc_acquired[i] = false;
- *dsc = NULL;
- break;
- }
-}
-
-
-
-enum dc_status dcn20_add_dsc_to_stream_resource(struct dc *dc,
- struct dc_state *dc_ctx,
- struct dc_stream_state *dc_stream)
-{
- enum dc_status result = DC_OK;
- int i;
-
- /* Get a DSC if required and available */
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx = &dc_ctx->res_ctx.pipe_ctx[i];
-
- if (pipe_ctx->stream != dc_stream)
- continue;
-
- if (pipe_ctx->stream_res.dsc)
- continue;
-
- dcn20_acquire_dsc(dc, &dc_ctx->res_ctx, &pipe_ctx->stream_res.dsc, i);
-
- /* The number of DSCs can be less than the number of pipes */
- if (!pipe_ctx->stream_res.dsc) {
- result = DC_NO_DSC_RESOURCE;
- }
-
- break;
- }
-
- return result;
-}
-
-
-static enum dc_status remove_dsc_from_stream_resource(struct dc *dc,
- struct dc_state *new_ctx,
- struct dc_stream_state *dc_stream)
-{
- struct pipe_ctx *pipe_ctx = NULL;
- int i;
-
- for (i = 0; i < MAX_PIPES; i++) {
- if (new_ctx->res_ctx.pipe_ctx[i].stream == dc_stream && !new_ctx->res_ctx.pipe_ctx[i].top_pipe) {
- pipe_ctx = &new_ctx->res_ctx.pipe_ctx[i];
-
- if (pipe_ctx->stream_res.dsc)
- dcn20_release_dsc(&new_ctx->res_ctx, dc->res_pool, &pipe_ctx->stream_res.dsc);
- }
- }
-
- if (!pipe_ctx)
- return DC_ERROR_UNEXPECTED;
- else
- return DC_OK;
-}
-
-
-enum dc_status dcn20_add_stream_to_ctx(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream)
-{
- enum dc_status result = DC_ERROR_UNEXPECTED;
-
- result = resource_map_pool_resources(dc, new_ctx, dc_stream);
-
- if (result == DC_OK)
- result = resource_map_phy_clock_resources(dc, new_ctx, dc_stream);
-
- /* Get a DSC if required and available */
- if (result == DC_OK && dc_stream->timing.flags.DSC)
- result = dcn20_add_dsc_to_stream_resource(dc, new_ctx, dc_stream);
-
- if (result == DC_OK)
- result = dcn20_build_mapped_resource(dc, new_ctx, dc_stream);
-
- return result;
-}
-
-
-enum dc_status dcn20_remove_stream_from_ctx(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream)
-{
- enum dc_status result = DC_OK;
-
- result = remove_dsc_from_stream_resource(dc, new_ctx, dc_stream);
-
- return result;
-}
-
-
-static void swizzle_to_dml_params(
- enum swizzle_mode_values swizzle,
- unsigned int *sw_mode)
-{
- switch (swizzle) {
- case DC_SW_LINEAR:
- *sw_mode = dm_sw_linear;
- break;
- case DC_SW_4KB_S:
- *sw_mode = dm_sw_4kb_s;
- break;
- case DC_SW_4KB_S_X:
- *sw_mode = dm_sw_4kb_s_x;
- break;
- case DC_SW_4KB_D:
- *sw_mode = dm_sw_4kb_d;
- break;
- case DC_SW_4KB_D_X:
- *sw_mode = dm_sw_4kb_d_x;
- break;
- case DC_SW_64KB_S:
- *sw_mode = dm_sw_64kb_s;
- break;
- case DC_SW_64KB_S_X:
- *sw_mode = dm_sw_64kb_s_x;
- break;
- case DC_SW_64KB_S_T:
- *sw_mode = dm_sw_64kb_s_t;
- break;
- case DC_SW_64KB_D:
- *sw_mode = dm_sw_64kb_d;
- break;
- case DC_SW_64KB_D_X:
- *sw_mode = dm_sw_64kb_d_x;
- break;
- case DC_SW_64KB_D_T:
- *sw_mode = dm_sw_64kb_d_t;
- break;
- case DC_SW_64KB_R_X:
- *sw_mode = dm_sw_64kb_r_x;
- break;
- case DC_SW_VAR_S:
- *sw_mode = dm_sw_var_s;
- break;
- case DC_SW_VAR_S_X:
- *sw_mode = dm_sw_var_s_x;
- break;
- case DC_SW_VAR_D:
- *sw_mode = dm_sw_var_d;
- break;
- case DC_SW_VAR_D_X:
- *sw_mode = dm_sw_var_d_x;
- break;
- case DC_SW_VAR_R_X:
- *sw_mode = dm_sw_var_r_x;
- break;
- default:
- ASSERT(0); /* Not supported */
- break;
- }
-}
-
-bool dcn20_split_stream_for_odm(
- const struct dc *dc,
- struct resource_context *res_ctx,
- struct pipe_ctx *prev_odm_pipe,
- struct pipe_ctx *next_odm_pipe)
-{
- int pipe_idx = next_odm_pipe->pipe_idx;
- const struct resource_pool *pool = dc->res_pool;
-
- *next_odm_pipe = *prev_odm_pipe;
-
- next_odm_pipe->pipe_idx = pipe_idx;
- next_odm_pipe->plane_res.mi = pool->mis[next_odm_pipe->pipe_idx];
- next_odm_pipe->plane_res.hubp = pool->hubps[next_odm_pipe->pipe_idx];
- next_odm_pipe->plane_res.ipp = pool->ipps[next_odm_pipe->pipe_idx];
- next_odm_pipe->plane_res.xfm = pool->transforms[next_odm_pipe->pipe_idx];
- next_odm_pipe->plane_res.dpp = pool->dpps[next_odm_pipe->pipe_idx];
- next_odm_pipe->plane_res.mpcc_inst = pool->dpps[next_odm_pipe->pipe_idx]->inst;
- next_odm_pipe->stream_res.dsc = NULL;
- if (prev_odm_pipe->next_odm_pipe && prev_odm_pipe->next_odm_pipe != next_odm_pipe) {
- next_odm_pipe->next_odm_pipe = prev_odm_pipe->next_odm_pipe;
- next_odm_pipe->next_odm_pipe->prev_odm_pipe = next_odm_pipe;
- }
- if (prev_odm_pipe->top_pipe && prev_odm_pipe->top_pipe->next_odm_pipe) {
- prev_odm_pipe->top_pipe->next_odm_pipe->bottom_pipe = next_odm_pipe;
- next_odm_pipe->top_pipe = prev_odm_pipe->top_pipe->next_odm_pipe;
- }
- if (prev_odm_pipe->bottom_pipe && prev_odm_pipe->bottom_pipe->next_odm_pipe) {
- prev_odm_pipe->bottom_pipe->next_odm_pipe->top_pipe = next_odm_pipe;
- next_odm_pipe->bottom_pipe = prev_odm_pipe->bottom_pipe->next_odm_pipe;
- }
- prev_odm_pipe->next_odm_pipe = next_odm_pipe;
- next_odm_pipe->prev_odm_pipe = prev_odm_pipe;
-
- if (prev_odm_pipe->plane_state) {
- struct scaler_data *sd = &prev_odm_pipe->plane_res.scl_data;
- int new_width;
-
- /* HACTIVE halved for odm combine */
- sd->h_active /= 2;
- /* Calculate new vp and recout for left pipe */
- /* Need at least 16 pixels width per side */
- if (sd->recout.x + 16 >= sd->h_active)
- return false;
- new_width = sd->h_active - sd->recout.x;
- sd->viewport.width -= dc_fixpt_floor(dc_fixpt_mul_int(
- sd->ratios.horz, sd->recout.width - new_width));
- sd->viewport_c.width -= dc_fixpt_floor(dc_fixpt_mul_int(
- sd->ratios.horz_c, sd->recout.width - new_width));
- sd->recout.width = new_width;
-
- /* Calculate new vp and recout for right pipe */
- sd = &next_odm_pipe->plane_res.scl_data;
- /* HACTIVE halved for odm combine */
- sd->h_active /= 2;
- /* Need at least 16 pixels width per side */
- if (new_width <= 16)
- return false;
- new_width = sd->recout.width + sd->recout.x - sd->h_active;
- sd->viewport.width -= dc_fixpt_floor(dc_fixpt_mul_int(
- sd->ratios.horz, sd->recout.width - new_width));
- sd->viewport_c.width -= dc_fixpt_floor(dc_fixpt_mul_int(
- sd->ratios.horz_c, sd->recout.width - new_width));
- sd->recout.width = new_width;
- sd->viewport.x += dc_fixpt_floor(dc_fixpt_mul_int(
- sd->ratios.horz, sd->h_active - sd->recout.x));
- sd->viewport_c.x += dc_fixpt_floor(dc_fixpt_mul_int(
- sd->ratios.horz_c, sd->h_active - sd->recout.x));
- sd->recout.x = 0;
- }
- if (!next_odm_pipe->top_pipe)
- next_odm_pipe->stream_res.opp = pool->opps[next_odm_pipe->pipe_idx];
- else
- next_odm_pipe->stream_res.opp = next_odm_pipe->top_pipe->stream_res.opp;
- if (next_odm_pipe->stream->timing.flags.DSC == 1 && !next_odm_pipe->top_pipe) {
- dcn20_acquire_dsc(dc, res_ctx, &next_odm_pipe->stream_res.dsc, next_odm_pipe->pipe_idx);
- ASSERT(next_odm_pipe->stream_res.dsc);
- if (next_odm_pipe->stream_res.dsc == NULL)
- return false;
- }
-
- return true;
-}
-
-void dcn20_split_stream_for_mpc(
- struct resource_context *res_ctx,
- const struct resource_pool *pool,
- struct pipe_ctx *primary_pipe,
- struct pipe_ctx *secondary_pipe)
-{
- int pipe_idx = secondary_pipe->pipe_idx;
- struct pipe_ctx *sec_bot_pipe = secondary_pipe->bottom_pipe;
-
- *secondary_pipe = *primary_pipe;
- secondary_pipe->bottom_pipe = sec_bot_pipe;
-
- secondary_pipe->pipe_idx = pipe_idx;
- secondary_pipe->plane_res.mi = pool->mis[secondary_pipe->pipe_idx];
- secondary_pipe->plane_res.hubp = pool->hubps[secondary_pipe->pipe_idx];
- secondary_pipe->plane_res.ipp = pool->ipps[secondary_pipe->pipe_idx];
- secondary_pipe->plane_res.xfm = pool->transforms[secondary_pipe->pipe_idx];
- secondary_pipe->plane_res.dpp = pool->dpps[secondary_pipe->pipe_idx];
- secondary_pipe->plane_res.mpcc_inst = pool->dpps[secondary_pipe->pipe_idx]->inst;
- secondary_pipe->stream_res.dsc = NULL;
- if (primary_pipe->bottom_pipe && primary_pipe->bottom_pipe != secondary_pipe) {
- ASSERT(!secondary_pipe->bottom_pipe);
- secondary_pipe->bottom_pipe = primary_pipe->bottom_pipe;
- secondary_pipe->bottom_pipe->top_pipe = secondary_pipe;
- }
- primary_pipe->bottom_pipe = secondary_pipe;
- secondary_pipe->top_pipe = primary_pipe;
-
- ASSERT(primary_pipe->plane_state);
-}
-
-int dcn20_populate_dml_pipes_from_context(
- struct dc *dc,
- struct dc_state *context,
- display_e2e_pipe_params_st *pipes,
- bool fast_validate)
-{
- int pipe_cnt, i;
- bool synchronized_vblank = true;
- struct resource_context *res_ctx = &context->res_ctx;
-
- for (i = 0, pipe_cnt = -1; i < dc->res_pool->pipe_count; i++) {
- if (!res_ctx->pipe_ctx[i].stream)
- continue;
-
- if (pipe_cnt < 0) {
- pipe_cnt = i;
- continue;
- }
-
- if (res_ctx->pipe_ctx[pipe_cnt].stream == res_ctx->pipe_ctx[i].stream)
- continue;
-
- if (dc->debug.disable_timing_sync ||
- (!resource_are_streams_timing_synchronizable(
- res_ctx->pipe_ctx[pipe_cnt].stream,
- res_ctx->pipe_ctx[i].stream) &&
- !resource_are_vblanks_synchronizable(
- res_ctx->pipe_ctx[pipe_cnt].stream,
- res_ctx->pipe_ctx[i].stream))) {
- synchronized_vblank = false;
- break;
- }
- }
-
- for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
- struct dc_crtc_timing *timing = &res_ctx->pipe_ctx[i].stream->timing;
- unsigned int v_total;
- unsigned int front_porch;
- int output_bpc;
- struct audio_check aud_check = {0};
-
- if (!res_ctx->pipe_ctx[i].stream)
- continue;
-
- v_total = timing->v_total;
- front_porch = timing->v_front_porch;
-
- /* todo:
- pipes[pipe_cnt].pipe.src.dynamic_metadata_enable = 0;
- pipes[pipe_cnt].pipe.src.dcc = 0;
- pipes[pipe_cnt].pipe.src.vm = 0;*/
-
- pipes[pipe_cnt].clks_cfg.refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0;
-
- pipes[pipe_cnt].dout.dsc_enable = res_ctx->pipe_ctx[i].stream->timing.flags.DSC;
- /* todo: rotation?*/
- pipes[pipe_cnt].dout.dsc_slices = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.num_slices_h;
- if (res_ctx->pipe_ctx[i].stream->use_dynamic_meta) {
- pipes[pipe_cnt].pipe.src.dynamic_metadata_enable = true;
- /* 1/2 vblank */
- pipes[pipe_cnt].pipe.src.dynamic_metadata_lines_before_active =
- (v_total - timing->v_addressable
- - timing->v_border_top - timing->v_border_bottom) / 2;
- /* 36 bytes dp, 32 hdmi */
- pipes[pipe_cnt].pipe.src.dynamic_metadata_xmit_bytes =
- dc_is_dp_signal(res_ctx->pipe_ctx[i].stream->signal) ? 36 : 32;
- }
- pipes[pipe_cnt].pipe.src.dcc = false;
- pipes[pipe_cnt].pipe.src.dcc_rate = 1;
- pipes[pipe_cnt].pipe.dest.synchronized_vblank_all_planes = synchronized_vblank;
- pipes[pipe_cnt].pipe.dest.hblank_start = timing->h_total - timing->h_front_porch;
- pipes[pipe_cnt].pipe.dest.hblank_end = pipes[pipe_cnt].pipe.dest.hblank_start
- - timing->h_addressable
- - timing->h_border_left
- - timing->h_border_right;
- pipes[pipe_cnt].pipe.dest.vblank_start = v_total - front_porch;
- pipes[pipe_cnt].pipe.dest.vblank_end = pipes[pipe_cnt].pipe.dest.vblank_start
- - timing->v_addressable
- - timing->v_border_top
- - timing->v_border_bottom;
- pipes[pipe_cnt].pipe.dest.htotal = timing->h_total;
- pipes[pipe_cnt].pipe.dest.vtotal = v_total;
- pipes[pipe_cnt].pipe.dest.hactive =
- timing->h_addressable + timing->h_border_left + timing->h_border_right;
- pipes[pipe_cnt].pipe.dest.vactive =
- timing->v_addressable + timing->v_border_top + timing->v_border_bottom;
- pipes[pipe_cnt].pipe.dest.interlaced = timing->flags.INTERLACE;
- pipes[pipe_cnt].pipe.dest.pixel_rate_mhz = timing->pix_clk_100hz/10000.0;
- if (timing->timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING)
- pipes[pipe_cnt].pipe.dest.pixel_rate_mhz *= 2;
- pipes[pipe_cnt].pipe.dest.otg_inst = res_ctx->pipe_ctx[i].stream_res.tg->inst;
- pipes[pipe_cnt].dout.dp_lanes = 4;
- pipes[pipe_cnt].dout.is_virtual = 0;
- pipes[pipe_cnt].pipe.dest.vtotal_min = res_ctx->pipe_ctx[i].stream->adjust.v_total_min;
- pipes[pipe_cnt].pipe.dest.vtotal_max = res_ctx->pipe_ctx[i].stream->adjust.v_total_max;
- switch (get_num_odm_splits(&res_ctx->pipe_ctx[i])) {
- case 1:
- pipes[pipe_cnt].pipe.dest.odm_combine = dm_odm_combine_mode_2to1;
- break;
- case 3:
- pipes[pipe_cnt].pipe.dest.odm_combine = dm_odm_combine_mode_4to1;
- break;
- default:
- pipes[pipe_cnt].pipe.dest.odm_combine = dm_odm_combine_mode_disabled;
- }
- pipes[pipe_cnt].pipe.src.hsplit_grp = res_ctx->pipe_ctx[i].pipe_idx;
- if (res_ctx->pipe_ctx[i].top_pipe && res_ctx->pipe_ctx[i].top_pipe->plane_state
- == res_ctx->pipe_ctx[i].plane_state) {
- struct pipe_ctx *first_pipe = res_ctx->pipe_ctx[i].top_pipe;
- int split_idx = 0;
-
- while (first_pipe->top_pipe && first_pipe->top_pipe->plane_state
- == res_ctx->pipe_ctx[i].plane_state) {
- first_pipe = first_pipe->top_pipe;
- split_idx++;
- }
- /* Treat 4to1 mpc combine as an mpo of 2 2-to-1 combines */
- if (split_idx == 0)
- pipes[pipe_cnt].pipe.src.hsplit_grp = first_pipe->pipe_idx;
- else if (split_idx == 1)
- pipes[pipe_cnt].pipe.src.hsplit_grp = res_ctx->pipe_ctx[i].pipe_idx;
- else if (split_idx == 2)
- pipes[pipe_cnt].pipe.src.hsplit_grp = res_ctx->pipe_ctx[i].top_pipe->pipe_idx;
- } else if (res_ctx->pipe_ctx[i].prev_odm_pipe) {
- struct pipe_ctx *first_pipe = res_ctx->pipe_ctx[i].prev_odm_pipe;
-
- while (first_pipe->prev_odm_pipe)
- first_pipe = first_pipe->prev_odm_pipe;
- pipes[pipe_cnt].pipe.src.hsplit_grp = first_pipe->pipe_idx;
- }
-
- switch (res_ctx->pipe_ctx[i].stream->signal) {
- case SIGNAL_TYPE_DISPLAY_PORT_MST:
- case SIGNAL_TYPE_DISPLAY_PORT:
- pipes[pipe_cnt].dout.output_type = dm_dp;
- break;
- case SIGNAL_TYPE_EDP:
- pipes[pipe_cnt].dout.output_type = dm_edp;
- break;
- case SIGNAL_TYPE_HDMI_TYPE_A:
- case SIGNAL_TYPE_DVI_SINGLE_LINK:
- case SIGNAL_TYPE_DVI_DUAL_LINK:
- pipes[pipe_cnt].dout.output_type = dm_hdmi;
- break;
- default:
- /* In case there is no signal, set dp with 4 lanes to allow max config */
- pipes[pipe_cnt].dout.is_virtual = 1;
- pipes[pipe_cnt].dout.output_type = dm_dp;
- pipes[pipe_cnt].dout.dp_lanes = 4;
- }
-
- switch (res_ctx->pipe_ctx[i].stream->timing.display_color_depth) {
- case COLOR_DEPTH_666:
- output_bpc = 6;
- break;
- case COLOR_DEPTH_888:
- output_bpc = 8;
- break;
- case COLOR_DEPTH_101010:
- output_bpc = 10;
- break;
- case COLOR_DEPTH_121212:
- output_bpc = 12;
- break;
- case COLOR_DEPTH_141414:
- output_bpc = 14;
- break;
- case COLOR_DEPTH_161616:
- output_bpc = 16;
- break;
- case COLOR_DEPTH_999:
- output_bpc = 9;
- break;
- case COLOR_DEPTH_111111:
- output_bpc = 11;
- break;
- default:
- output_bpc = 8;
- break;
- }
-
- switch (res_ctx->pipe_ctx[i].stream->timing.pixel_encoding) {
- case PIXEL_ENCODING_RGB:
- case PIXEL_ENCODING_YCBCR444:
- pipes[pipe_cnt].dout.output_format = dm_444;
- pipes[pipe_cnt].dout.output_bpp = output_bpc * 3;
- break;
- case PIXEL_ENCODING_YCBCR420:
- pipes[pipe_cnt].dout.output_format = dm_420;
- pipes[pipe_cnt].dout.output_bpp = (output_bpc * 3.0) / 2;
- break;
- case PIXEL_ENCODING_YCBCR422:
- if (res_ctx->pipe_ctx[i].stream->timing.flags.DSC &&
- !res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.ycbcr422_simple)
- pipes[pipe_cnt].dout.output_format = dm_n422;
- else
- pipes[pipe_cnt].dout.output_format = dm_s422;
- pipes[pipe_cnt].dout.output_bpp = output_bpc * 2;
- break;
- default:
- pipes[pipe_cnt].dout.output_format = dm_444;
- pipes[pipe_cnt].dout.output_bpp = output_bpc * 3;
- }
-
- if (res_ctx->pipe_ctx[i].stream->timing.flags.DSC)
- pipes[pipe_cnt].dout.output_bpp = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.bits_per_pixel / 16.0;
-
- /* todo: default max for now, until there is logic reflecting this in dc*/
- pipes[pipe_cnt].dout.dsc_input_bpc = 12;
- /*fill up the audio sample rate (unit in kHz)*/
- get_audio_check(&res_ctx->pipe_ctx[i].stream->audio_info, &aud_check);
- pipes[pipe_cnt].dout.max_audio_sample_rate = aud_check.max_audiosample_rate / 1000;
- /*
- * For graphic plane, cursor number is 1, nv12 is 0
- * bw calculations due to cursor on/off
- */
- if (res_ctx->pipe_ctx[i].plane_state &&
- res_ctx->pipe_ctx[i].plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE)
- pipes[pipe_cnt].pipe.src.num_cursors = 0;
- else
- pipes[pipe_cnt].pipe.src.num_cursors = dc->dml.ip.number_of_cursors;
-
- pipes[pipe_cnt].pipe.src.cur0_src_width = 256;
- pipes[pipe_cnt].pipe.src.cur0_bpp = dm_cur_32bit;
-
- if (!res_ctx->pipe_ctx[i].plane_state) {
- pipes[pipe_cnt].pipe.src.is_hsplit = pipes[pipe_cnt].pipe.dest.odm_combine != dm_odm_combine_mode_disabled;
- pipes[pipe_cnt].pipe.src.source_scan = dm_horz;
- pipes[pipe_cnt].pipe.src.sw_mode = dm_sw_4kb_s;
- pipes[pipe_cnt].pipe.src.macro_tile_size = dm_64k_tile;
- pipes[pipe_cnt].pipe.src.viewport_width = timing->h_addressable;
- if (pipes[pipe_cnt].pipe.src.viewport_width > 1920)
- pipes[pipe_cnt].pipe.src.viewport_width = 1920;
- pipes[pipe_cnt].pipe.src.viewport_height = timing->v_addressable;
- if (pipes[pipe_cnt].pipe.src.viewport_height > 1080)
- pipes[pipe_cnt].pipe.src.viewport_height = 1080;
- pipes[pipe_cnt].pipe.src.surface_height_y = pipes[pipe_cnt].pipe.src.viewport_height;
- pipes[pipe_cnt].pipe.src.surface_width_y = pipes[pipe_cnt].pipe.src.viewport_width;
- pipes[pipe_cnt].pipe.src.surface_height_c = pipes[pipe_cnt].pipe.src.viewport_height;
- pipes[pipe_cnt].pipe.src.surface_width_c = pipes[pipe_cnt].pipe.src.viewport_width;
- pipes[pipe_cnt].pipe.src.data_pitch = ((pipes[pipe_cnt].pipe.src.viewport_width + 255) / 256) * 256;
- pipes[pipe_cnt].pipe.src.source_format = dm_444_32;
- pipes[pipe_cnt].pipe.dest.recout_width = pipes[pipe_cnt].pipe.src.viewport_width; /*vp_width/hratio*/
- pipes[pipe_cnt].pipe.dest.recout_height = pipes[pipe_cnt].pipe.src.viewport_height; /*vp_height/vratio*/
- pipes[pipe_cnt].pipe.dest.full_recout_width = pipes[pipe_cnt].pipe.dest.recout_width; /*when is_hsplit != 1*/
- pipes[pipe_cnt].pipe.dest.full_recout_height = pipes[pipe_cnt].pipe.dest.recout_height; /*when is_hsplit != 1*/
- pipes[pipe_cnt].pipe.scale_ratio_depth.lb_depth = dm_lb_16;
- pipes[pipe_cnt].pipe.scale_ratio_depth.hscl_ratio = 1.0;
- pipes[pipe_cnt].pipe.scale_ratio_depth.vscl_ratio = 1.0;
- pipes[pipe_cnt].pipe.scale_ratio_depth.scl_enable = 0; /*Lb only or Full scl*/
- pipes[pipe_cnt].pipe.scale_taps.htaps = 1;
- pipes[pipe_cnt].pipe.scale_taps.vtaps = 1;
- pipes[pipe_cnt].pipe.dest.vtotal_min = v_total;
- pipes[pipe_cnt].pipe.dest.vtotal_max = v_total;
-
- if (pipes[pipe_cnt].pipe.dest.odm_combine == dm_odm_combine_mode_2to1) {
- pipes[pipe_cnt].pipe.src.viewport_width /= 2;
- pipes[pipe_cnt].pipe.dest.recout_width /= 2;
- } else if (pipes[pipe_cnt].pipe.dest.odm_combine == dm_odm_combine_mode_4to1) {
- pipes[pipe_cnt].pipe.src.viewport_width /= 4;
- pipes[pipe_cnt].pipe.dest.recout_width /= 4;
- }
- } else {
- struct dc_plane_state *pln = res_ctx->pipe_ctx[i].plane_state;
- struct scaler_data *scl = &res_ctx->pipe_ctx[i].plane_res.scl_data;
-
- pipes[pipe_cnt].pipe.src.immediate_flip = pln->flip_immediate;
- pipes[pipe_cnt].pipe.src.is_hsplit = (res_ctx->pipe_ctx[i].bottom_pipe && res_ctx->pipe_ctx[i].bottom_pipe->plane_state == pln)
- || (res_ctx->pipe_ctx[i].top_pipe && res_ctx->pipe_ctx[i].top_pipe->plane_state == pln)
- || pipes[pipe_cnt].pipe.dest.odm_combine != dm_odm_combine_mode_disabled;
-
- /* stereo is not split */
- if (pln->stereo_format == PLANE_STEREO_FORMAT_SIDE_BY_SIDE ||
- pln->stereo_format == PLANE_STEREO_FORMAT_TOP_AND_BOTTOM) {
- pipes[pipe_cnt].pipe.src.is_hsplit = false;
- pipes[pipe_cnt].pipe.src.hsplit_grp = res_ctx->pipe_ctx[i].pipe_idx;
- }
-
- pipes[pipe_cnt].pipe.src.source_scan = pln->rotation == ROTATION_ANGLE_90
- || pln->rotation == ROTATION_ANGLE_270 ? dm_vert : dm_horz;
- pipes[pipe_cnt].pipe.src.viewport_y_y = scl->viewport.y;
- pipes[pipe_cnt].pipe.src.viewport_y_c = scl->viewport_c.y;
- pipes[pipe_cnt].pipe.src.viewport_width = scl->viewport.width;
- pipes[pipe_cnt].pipe.src.viewport_width_c = scl->viewport_c.width;
- pipes[pipe_cnt].pipe.src.viewport_height = scl->viewport.height;
- pipes[pipe_cnt].pipe.src.viewport_height_c = scl->viewport_c.height;
- pipes[pipe_cnt].pipe.src.viewport_width_max = pln->src_rect.width;
- pipes[pipe_cnt].pipe.src.viewport_height_max = pln->src_rect.height;
- pipes[pipe_cnt].pipe.src.surface_width_y = pln->plane_size.surface_size.width;
- pipes[pipe_cnt].pipe.src.surface_height_y = pln->plane_size.surface_size.height;
- pipes[pipe_cnt].pipe.src.surface_width_c = pln->plane_size.chroma_size.width;
- pipes[pipe_cnt].pipe.src.surface_height_c = pln->plane_size.chroma_size.height;
- if (pln->format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA
- || pln->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) {
- pipes[pipe_cnt].pipe.src.data_pitch = pln->plane_size.surface_pitch;
- pipes[pipe_cnt].pipe.src.data_pitch_c = pln->plane_size.chroma_pitch;
- pipes[pipe_cnt].pipe.src.meta_pitch = pln->dcc.meta_pitch;
- pipes[pipe_cnt].pipe.src.meta_pitch_c = pln->dcc.meta_pitch_c;
- } else {
- pipes[pipe_cnt].pipe.src.data_pitch = pln->plane_size.surface_pitch;
- pipes[pipe_cnt].pipe.src.meta_pitch = pln->dcc.meta_pitch;
- }
- pipes[pipe_cnt].pipe.src.dcc = pln->dcc.enable;
- pipes[pipe_cnt].pipe.dest.recout_width = scl->recout.width;
- pipes[pipe_cnt].pipe.dest.recout_height = scl->recout.height;
- pipes[pipe_cnt].pipe.dest.full_recout_height = scl->recout.height;
- pipes[pipe_cnt].pipe.dest.full_recout_width = scl->recout.width;
- if (pipes[pipe_cnt].pipe.dest.odm_combine == dm_odm_combine_mode_2to1)
- pipes[pipe_cnt].pipe.dest.full_recout_width *= 2;
- else if (pipes[pipe_cnt].pipe.dest.odm_combine == dm_odm_combine_mode_4to1)
- pipes[pipe_cnt].pipe.dest.full_recout_width *= 4;
- else {
- struct pipe_ctx *split_pipe = res_ctx->pipe_ctx[i].bottom_pipe;
-
- while (split_pipe && split_pipe->plane_state == pln) {
- pipes[pipe_cnt].pipe.dest.full_recout_width += split_pipe->plane_res.scl_data.recout.width;
- split_pipe = split_pipe->bottom_pipe;
- }
- split_pipe = res_ctx->pipe_ctx[i].top_pipe;
- while (split_pipe && split_pipe->plane_state == pln) {
- pipes[pipe_cnt].pipe.dest.full_recout_width += split_pipe->plane_res.scl_data.recout.width;
- split_pipe = split_pipe->top_pipe;
- }
- }
-
- pipes[pipe_cnt].pipe.scale_ratio_depth.lb_depth = dm_lb_16;
- pipes[pipe_cnt].pipe.scale_ratio_depth.hscl_ratio = (double) scl->ratios.horz.value / (1ULL<<32);
- pipes[pipe_cnt].pipe.scale_ratio_depth.hscl_ratio_c = (double) scl->ratios.horz_c.value / (1ULL<<32);
- pipes[pipe_cnt].pipe.scale_ratio_depth.vscl_ratio = (double) scl->ratios.vert.value / (1ULL<<32);
- pipes[pipe_cnt].pipe.scale_ratio_depth.vscl_ratio_c = (double) scl->ratios.vert_c.value / (1ULL<<32);
- pipes[pipe_cnt].pipe.scale_ratio_depth.scl_enable =
- scl->ratios.vert.value != dc_fixpt_one.value
- || scl->ratios.horz.value != dc_fixpt_one.value
- || scl->ratios.vert_c.value != dc_fixpt_one.value
- || scl->ratios.horz_c.value != dc_fixpt_one.value /*Lb only or Full scl*/
- || dc->debug.always_scale; /*support always scale*/
- pipes[pipe_cnt].pipe.scale_taps.htaps = scl->taps.h_taps;
- pipes[pipe_cnt].pipe.scale_taps.htaps_c = scl->taps.h_taps_c;
- pipes[pipe_cnt].pipe.scale_taps.vtaps = scl->taps.v_taps;
- pipes[pipe_cnt].pipe.scale_taps.vtaps_c = scl->taps.v_taps_c;
-
- pipes[pipe_cnt].pipe.src.macro_tile_size =
- swizzle_mode_to_macro_tile_size(pln->tiling_info.gfx9.swizzle);
- swizzle_to_dml_params(pln->tiling_info.gfx9.swizzle,
- &pipes[pipe_cnt].pipe.src.sw_mode);
-
- switch (pln->format) {
- case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr:
- case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb:
- pipes[pipe_cnt].pipe.src.source_format = dm_420_8;
- break;
- case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr:
- case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb:
- pipes[pipe_cnt].pipe.src.source_format = dm_420_10;
- break;
- case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
- case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
- case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F:
- case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
- pipes[pipe_cnt].pipe.src.source_format = dm_444_64;
- break;
- case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555:
- case SURFACE_PIXEL_FORMAT_GRPH_RGB565:
- pipes[pipe_cnt].pipe.src.source_format = dm_444_16;
- break;
- case SURFACE_PIXEL_FORMAT_GRPH_PALETA_256_COLORS:
- pipes[pipe_cnt].pipe.src.source_format = dm_444_8;
- break;
- case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA:
- pipes[pipe_cnt].pipe.src.source_format = dm_rgbe_alpha;
- break;
- default:
- pipes[pipe_cnt].pipe.src.source_format = dm_444_32;
- break;
- }
- }
-
- pipe_cnt++;
- }
-
- /* populate writeback information */
- DC_FP_START();
- dc->res_pool->funcs->populate_dml_writeback_from_context(dc, res_ctx, pipes);
- DC_FP_END();
-
- return pipe_cnt;
-}
-
-unsigned int dcn20_calc_max_scaled_time(
- unsigned int time_per_pixel,
- enum mmhubbub_wbif_mode mode,
- unsigned int urgent_watermark)
-{
- unsigned int time_per_byte = 0;
- unsigned int total_y_free_entry = 0x200; /* two memory piece for luma */
- unsigned int total_c_free_entry = 0x140; /* two memory piece for chroma */
- unsigned int small_free_entry, max_free_entry;
- unsigned int buf_lh_capability;
- unsigned int max_scaled_time;
-
- if (mode == PACKED_444) /* packed mode */
- time_per_byte = time_per_pixel/4;
- else if (mode == PLANAR_420_8BPC)
- time_per_byte = time_per_pixel;
- else if (mode == PLANAR_420_10BPC) /* p010 */
- time_per_byte = time_per_pixel * 819/1024;
-
- if (time_per_byte == 0)
- time_per_byte = 1;
-
- small_free_entry = (total_y_free_entry > total_c_free_entry) ? total_c_free_entry : total_y_free_entry;
- max_free_entry = (mode == PACKED_444) ? total_y_free_entry + total_c_free_entry : small_free_entry;
- buf_lh_capability = max_free_entry*time_per_byte*32/16; /* there is 4bit fraction */
- max_scaled_time = buf_lh_capability - urgent_watermark;
- return max_scaled_time;
-}
-
-void dcn20_set_mcif_arb_params(
- struct dc *dc,
- struct dc_state *context,
- display_e2e_pipe_params_st *pipes,
- int pipe_cnt)
-{
- enum mmhubbub_wbif_mode wbif_mode;
- struct mcif_arb_params *wb_arb_params;
- int i, j, k, dwb_pipe;
-
- /* Writeback MCIF_WB arbitration parameters */
- dwb_pipe = 0;
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
-
- if (!context->res_ctx.pipe_ctx[i].stream)
- continue;
-
- for (j = 0; j < MAX_DWB_PIPES; j++) {
- if (context->res_ctx.pipe_ctx[i].stream->writeback_info[j].wb_enabled == false)
- continue;
-
- //wb_arb_params = &context->res_ctx.pipe_ctx[i].stream->writeback_info[j].mcif_arb_params;
- wb_arb_params = &context->bw_ctx.bw.dcn.bw_writeback.mcif_wb_arb[dwb_pipe];
-
- if (context->res_ctx.pipe_ctx[i].stream->writeback_info[j].dwb_params.out_format == dwb_scaler_mode_yuv420) {
- if (context->res_ctx.pipe_ctx[i].stream->writeback_info[j].dwb_params.output_depth == DWB_OUTPUT_PIXEL_DEPTH_8BPC)
- wbif_mode = PLANAR_420_8BPC;
- else
- wbif_mode = PLANAR_420_10BPC;
- } else
- wbif_mode = PACKED_444;
-
- for (k = 0; k < sizeof(wb_arb_params->cli_watermark)/sizeof(wb_arb_params->cli_watermark[0]); k++) {
- wb_arb_params->cli_watermark[k] = get_wm_writeback_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- wb_arb_params->pstate_watermark[k] = get_wm_writeback_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- }
- wb_arb_params->time_per_pixel = 16.0 * 1000 / (context->res_ctx.pipe_ctx[i].stream->phy_pix_clk / 1000); /* 4 bit fraction, ms */
- wb_arb_params->slice_lines = 32;
- wb_arb_params->arbitration_slice = 2;
- wb_arb_params->max_scaled_time = dcn20_calc_max_scaled_time(wb_arb_params->time_per_pixel,
- wbif_mode,
- wb_arb_params->cli_watermark[0]); /* assume 4 watermark sets have the same value */
-
- dwb_pipe++;
-
- if (dwb_pipe >= MAX_DWB_PIPES)
- return;
- }
- if (dwb_pipe >= MAX_DWB_PIPES)
- return;
- }
-}
-
-bool dcn20_validate_dsc(struct dc *dc, struct dc_state *new_ctx)
-{
- int i;
-
- /* Validate DSC config, dsc count validation is already done */
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx = &new_ctx->res_ctx.pipe_ctx[i];
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct dsc_config dsc_cfg;
- struct pipe_ctx *odm_pipe;
- int opp_cnt = 1;
-
- for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
- opp_cnt++;
-
- /* Only need to validate top pipe */
- if (pipe_ctx->top_pipe || pipe_ctx->prev_odm_pipe || !stream || !stream->timing.flags.DSC)
- continue;
-
- dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left
- + stream->timing.h_border_right) / opp_cnt;
- dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top
- + stream->timing.v_border_bottom;
- dsc_cfg.pixel_encoding = stream->timing.pixel_encoding;
- dsc_cfg.color_depth = stream->timing.display_color_depth;
- dsc_cfg.is_odm = pipe_ctx->next_odm_pipe ? true : false;
- dsc_cfg.dc_dsc_cfg = stream->timing.dsc_cfg;
- dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt;
-
- if (!pipe_ctx->stream_res.dsc->funcs->dsc_validate_stream(pipe_ctx->stream_res.dsc, &dsc_cfg))
- return false;
- }
- return true;
-}
-
-struct pipe_ctx *dcn20_find_secondary_pipe(struct dc *dc,
- struct resource_context *res_ctx,
- const struct resource_pool *pool,
- const struct pipe_ctx *primary_pipe)
-{
- struct pipe_ctx *secondary_pipe = NULL;
-
- if (dc && primary_pipe) {
- int j;
- int preferred_pipe_idx = 0;
-
- /* first check the prev dc state:
- * if this primary pipe has a bottom pipe in prev. state
- * and if the bottom pipe is still available (which it should be),
- * pick that pipe as secondary
- * Same logic applies for ODM pipes
- */
- if (dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].next_odm_pipe) {
- preferred_pipe_idx = dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].next_odm_pipe->pipe_idx;
- if (res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) {
- secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx];
- secondary_pipe->pipe_idx = preferred_pipe_idx;
- }
- }
- if (secondary_pipe == NULL &&
- dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].bottom_pipe) {
- preferred_pipe_idx = dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].bottom_pipe->pipe_idx;
- if (res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) {
- secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx];
- secondary_pipe->pipe_idx = preferred_pipe_idx;
- }
- }
-
- /*
- * if this primary pipe does not have a bottom pipe in prev. state
- * start backward and find a pipe that did not used to be a bottom pipe in
- * prev. dc state. This way we make sure we keep the same assignment as
- * last state and will not have to reprogram every pipe
- */
- if (secondary_pipe == NULL) {
- for (j = dc->res_pool->pipe_count - 1; j >= 0; j--) {
- if (dc->current_state->res_ctx.pipe_ctx[j].top_pipe == NULL
- && dc->current_state->res_ctx.pipe_ctx[j].prev_odm_pipe == NULL) {
- preferred_pipe_idx = j;
-
- if (res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) {
- secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx];
- secondary_pipe->pipe_idx = preferred_pipe_idx;
- break;
- }
- }
- }
- }
- /*
- * We should never hit this assert unless assignments are shuffled around
- * if this happens we will prob. hit a vsync tdr
- */
- ASSERT(secondary_pipe);
- /*
- * search backwards for the second pipe to keep pipe
- * assignment more consistent
- */
- if (secondary_pipe == NULL) {
- for (j = dc->res_pool->pipe_count - 1; j >= 0; j--) {
- preferred_pipe_idx = j;
-
- if (res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) {
- secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx];
- secondary_pipe->pipe_idx = preferred_pipe_idx;
- break;
- }
- }
- }
- }
-
- return secondary_pipe;
-}
-
-void dcn20_merge_pipes_for_validate(
- struct dc *dc,
- struct dc_state *context)
-{
- int i;
-
- /* merge previously split odm pipes since mode support needs to make the decision */
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
- struct pipe_ctx *odm_pipe = pipe->next_odm_pipe;
-
- if (pipe->prev_odm_pipe)
- continue;
-
- pipe->next_odm_pipe = NULL;
- while (odm_pipe) {
- struct pipe_ctx *next_odm_pipe = odm_pipe->next_odm_pipe;
-
- odm_pipe->plane_state = NULL;
- odm_pipe->stream = NULL;
- odm_pipe->top_pipe = NULL;
- odm_pipe->bottom_pipe = NULL;
- odm_pipe->prev_odm_pipe = NULL;
- odm_pipe->next_odm_pipe = NULL;
- if (odm_pipe->stream_res.dsc)
- dcn20_release_dsc(&context->res_ctx, dc->res_pool, &odm_pipe->stream_res.dsc);
- /* Clear plane_res and stream_res */
- memset(&odm_pipe->plane_res, 0, sizeof(odm_pipe->plane_res));
- memset(&odm_pipe->stream_res, 0, sizeof(odm_pipe->stream_res));
- odm_pipe = next_odm_pipe;
- }
- if (pipe->plane_state)
- resource_build_scaling_params(pipe);
- }
-
- /* merge previously mpc split pipes since mode support needs to make the decision */
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
- struct pipe_ctx *hsplit_pipe = pipe->bottom_pipe;
-
- if (!hsplit_pipe || hsplit_pipe->plane_state != pipe->plane_state)
- continue;
-
- pipe->bottom_pipe = hsplit_pipe->bottom_pipe;
- if (hsplit_pipe->bottom_pipe)
- hsplit_pipe->bottom_pipe->top_pipe = pipe;
- hsplit_pipe->plane_state = NULL;
- hsplit_pipe->stream = NULL;
- hsplit_pipe->top_pipe = NULL;
- hsplit_pipe->bottom_pipe = NULL;
-
- /* Clear plane_res and stream_res */
- memset(&hsplit_pipe->plane_res, 0, sizeof(hsplit_pipe->plane_res));
- memset(&hsplit_pipe->stream_res, 0, sizeof(hsplit_pipe->stream_res));
- if (pipe->plane_state)
- resource_build_scaling_params(pipe);
- }
-}
-
-int dcn20_validate_apply_pipe_split_flags(
- struct dc *dc,
- struct dc_state *context,
- int vlevel,
- int *split,
- bool *merge)
-{
- int i, pipe_idx, vlevel_split;
- int plane_count = 0;
- bool force_split = false;
- bool avoid_split = dc->debug.pipe_split_policy == MPC_SPLIT_AVOID;
- struct vba_vars_st *v = &context->bw_ctx.dml.vba;
- int max_mpc_comb = v->maxMpcComb;
-
- if (context->stream_count > 1) {
- if (dc->debug.pipe_split_policy == MPC_SPLIT_AVOID_MULT_DISP)
- avoid_split = true;
- } else if (dc->debug.force_single_disp_pipe_split)
- force_split = true;
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
- /**
- * Workaround for avoiding pipe-split in cases where we'd split
- * planes that are too small, resulting in splits that aren't
- * valid for the scaler.
- */
- if (pipe->plane_state &&
- (pipe->plane_state->dst_rect.width <= 16 ||
- pipe->plane_state->dst_rect.height <= 16 ||
- pipe->plane_state->src_rect.width <= 16 ||
- pipe->plane_state->src_rect.height <= 16))
- avoid_split = true;
-
- /* TODO: fix dc bugs and remove this split threshold thing */
- if (pipe->stream && !pipe->prev_odm_pipe &&
- (!pipe->top_pipe || pipe->top_pipe->plane_state != pipe->plane_state))
- ++plane_count;
- }
- if (plane_count > dc->res_pool->pipe_count / 2)
- avoid_split = true;
-
- /* W/A: Mode timing with borders may not work well with pipe split, avoid for this corner case */
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
- struct dc_crtc_timing timing;
-
- if (!pipe->stream)
- continue;
- else {
- timing = pipe->stream->timing;
- if (timing.h_border_left + timing.h_border_right
- + timing.v_border_top + timing.v_border_bottom > 0) {
- avoid_split = true;
- break;
- }
- }
- }
-
- /* Avoid split loop looks for lowest voltage level that allows most unsplit pipes possible */
- if (avoid_split) {
- for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
- if (!context->res_ctx.pipe_ctx[i].stream)
- continue;
-
- for (vlevel_split = vlevel; vlevel <= context->bw_ctx.dml.soc.num_states; vlevel++)
- if (v->NoOfDPP[vlevel][0][pipe_idx] == 1 &&
- v->ModeSupport[vlevel][0])
- break;
- /* Impossible to not split this pipe */
- if (vlevel > context->bw_ctx.dml.soc.num_states)
- vlevel = vlevel_split;
- else
- max_mpc_comb = 0;
- pipe_idx++;
- }
- v->maxMpcComb = max_mpc_comb;
- }
-
- /* Split loop sets which pipe should be split based on dml outputs and dc flags */
- for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
- int pipe_plane = v->pipe_plane[pipe_idx];
- bool split4mpc = context->stream_count == 1 && plane_count == 1
- && dc->config.enable_4to1MPC && dc->res_pool->pipe_count >= 4;
-
- if (!context->res_ctx.pipe_ctx[i].stream)
- continue;
-
- if (split4mpc || v->NoOfDPP[vlevel][max_mpc_comb][pipe_plane] == 4)
- split[i] = 4;
- else if (force_split || v->NoOfDPP[vlevel][max_mpc_comb][pipe_plane] == 2)
- split[i] = 2;
-
- if ((pipe->stream->view_format ==
- VIEW_3D_FORMAT_SIDE_BY_SIDE ||
- pipe->stream->view_format ==
- VIEW_3D_FORMAT_TOP_AND_BOTTOM) &&
- (pipe->stream->timing.timing_3d_format ==
- TIMING_3D_FORMAT_TOP_AND_BOTTOM ||
- pipe->stream->timing.timing_3d_format ==
- TIMING_3D_FORMAT_SIDE_BY_SIDE))
- split[i] = 2;
- if (dc->debug.force_odm_combine & (1 << pipe->stream_res.tg->inst)) {
- split[i] = 2;
- v->ODMCombineEnablePerState[vlevel][pipe_plane] = dm_odm_combine_mode_2to1;
- }
- if (dc->debug.force_odm_combine_4to1 & (1 << pipe->stream_res.tg->inst)) {
- split[i] = 4;
- v->ODMCombineEnablePerState[vlevel][pipe_plane] = dm_odm_combine_mode_4to1;
- }
- /*420 format workaround*/
- if (pipe->stream->timing.h_addressable > 7680 &&
- pipe->stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) {
- split[i] = 4;
- }
- v->ODMCombineEnabled[pipe_plane] =
- v->ODMCombineEnablePerState[vlevel][pipe_plane];
-
- if (v->ODMCombineEnabled[pipe_plane] == dm_odm_combine_mode_disabled) {
- if (get_num_mpc_splits(pipe) == 1) {
- /*If need split for mpc but 2 way split already*/
- if (split[i] == 4)
- split[i] = 2; /* 2 -> 4 MPC */
- else if (split[i] == 2)
- split[i] = 0; /* 2 -> 2 MPC */
- else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state)
- merge[i] = true; /* 2 -> 1 MPC */
- } else if (get_num_mpc_splits(pipe) == 3) {
- /*If need split for mpc but 4 way split already*/
- if (split[i] == 2 && ((pipe->top_pipe && !pipe->top_pipe->top_pipe)
- || !pipe->bottom_pipe)) {
- merge[i] = true; /* 4 -> 2 MPC */
- } else if (split[i] == 0 && pipe->top_pipe &&
- pipe->top_pipe->plane_state == pipe->plane_state)
- merge[i] = true; /* 4 -> 1 MPC */
- split[i] = 0;
- } else if (get_num_odm_splits(pipe)) {
- /* ODM -> MPC transition */
- if (pipe->prev_odm_pipe) {
- split[i] = 0;
- merge[i] = true;
- }
- }
- } else {
- if (get_num_odm_splits(pipe) == 1) {
- /*If need split for odm but 2 way split already*/
- if (split[i] == 4)
- split[i] = 2; /* 2 -> 4 ODM */
- else if (split[i] == 2)
- split[i] = 0; /* 2 -> 2 ODM */
- else if (pipe->prev_odm_pipe) {
- ASSERT(0); /* NOT expected yet */
- merge[i] = true; /* exit ODM */
- }
- } else if (get_num_odm_splits(pipe) == 3) {
- /*If need split for odm but 4 way split already*/
- if (split[i] == 2 && ((pipe->prev_odm_pipe && !pipe->prev_odm_pipe->prev_odm_pipe)
- || !pipe->next_odm_pipe)) {
- ASSERT(0); /* NOT expected yet */
- merge[i] = true; /* 4 -> 2 ODM */
- } else if (split[i] == 0 && pipe->prev_odm_pipe) {
- ASSERT(0); /* NOT expected yet */
- merge[i] = true; /* exit ODM */
- }
- split[i] = 0;
- } else if (get_num_mpc_splits(pipe)) {
- /* MPC -> ODM transition */
- ASSERT(0); /* NOT expected yet */
- if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) {
- split[i] = 0;
- merge[i] = true;
- }
- }
- }
-
- /* Adjust dppclk when split is forced, do not bother with dispclk */
- if (split[i] != 0 && v->NoOfDPP[vlevel][max_mpc_comb][pipe_idx] == 1)
- v->RequiredDPPCLK[vlevel][max_mpc_comb][pipe_idx] /= 2;
- pipe_idx++;
- }
-
- return vlevel;
-}
-
-bool dcn20_fast_validate_bw(
- struct dc *dc,
- struct dc_state *context,
- display_e2e_pipe_params_st *pipes,
- int *pipe_cnt_out,
- int *pipe_split_from,
- int *vlevel_out,
- bool fast_validate)
-{
- bool out = false;
- int split[MAX_PIPES] = { 0 };
- int pipe_cnt, i, pipe_idx, vlevel;
-
- ASSERT(pipes);
- if (!pipes)
- return false;
-
- dcn20_merge_pipes_for_validate(dc, context);
-
- pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate);
-
- *pipe_cnt_out = pipe_cnt;
-
- if (!pipe_cnt) {
- out = true;
- goto validate_out;
- }
-
- vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
-
- if (vlevel > context->bw_ctx.dml.soc.num_states)
- goto validate_fail;
-
- vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, NULL);
-
- /*initialize pipe_just_split_from to invalid idx*/
- for (i = 0; i < MAX_PIPES; i++)
- pipe_split_from[i] = -1;
-
- for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
- struct pipe_ctx *hsplit_pipe = pipe->bottom_pipe;
-
- if (!pipe->stream || pipe_split_from[i] >= 0)
- continue;
-
- pipe_idx++;
-
- if (!pipe->top_pipe && !pipe->plane_state && context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx]) {
- hsplit_pipe = dcn20_find_secondary_pipe(dc, &context->res_ctx, dc->res_pool, pipe);
- ASSERT(hsplit_pipe);
- if (!dcn20_split_stream_for_odm(
- dc, &context->res_ctx,
- pipe, hsplit_pipe))
- goto validate_fail;
- pipe_split_from[hsplit_pipe->pipe_idx] = pipe_idx;
- dcn20_build_mapped_resource(dc, context, pipe->stream);
- }
-
- if (!pipe->plane_state)
- continue;
- /* Skip 2nd half of already split pipe */
- if (pipe->top_pipe && pipe->plane_state == pipe->top_pipe->plane_state)
- continue;
-
- /* We do not support mpo + odm at the moment */
- if (hsplit_pipe && hsplit_pipe->plane_state != pipe->plane_state
- && context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx])
- goto validate_fail;
-
- if (split[i] == 2) {
- if (!hsplit_pipe || hsplit_pipe->plane_state != pipe->plane_state) {
- /* pipe not split previously needs split */
- hsplit_pipe = dcn20_find_secondary_pipe(dc, &context->res_ctx, dc->res_pool, pipe);
- ASSERT(hsplit_pipe);
- if (!hsplit_pipe) {
- context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx] *= 2;
- continue;
- }
- if (context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx]) {
- if (!dcn20_split_stream_for_odm(
- dc, &context->res_ctx,
- pipe, hsplit_pipe))
- goto validate_fail;
- dcn20_build_mapped_resource(dc, context, pipe->stream);
- } else {
- dcn20_split_stream_for_mpc(
- &context->res_ctx, dc->res_pool,
- pipe, hsplit_pipe);
- resource_build_scaling_params(pipe);
- resource_build_scaling_params(hsplit_pipe);
- }
- pipe_split_from[hsplit_pipe->pipe_idx] = pipe_idx;
- }
- } else if (hsplit_pipe && hsplit_pipe->plane_state == pipe->plane_state) {
- /* merge should already have been done */
- ASSERT(0);
- }
- }
- /* Actual dsc count per stream dsc validation*/
- if (!dcn20_validate_dsc(dc, context)) {
- context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states] =
- DML_FAIL_DSC_VALIDATION_FAILURE;
- goto validate_fail;
- }
-
- *vlevel_out = vlevel;
-
- out = true;
- goto validate_out;
-
-validate_fail:
- out = false;
-
-validate_out:
- return out;
-}
-
-static void dcn20_calculate_wm(
- struct dc *dc, struct dc_state *context,
- display_e2e_pipe_params_st *pipes,
- int *out_pipe_cnt,
- int *pipe_split_from,
- int vlevel,
- bool fast_validate)
-{
- int pipe_cnt, i, pipe_idx;
-
- for (i = 0, pipe_idx = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
- if (!context->res_ctx.pipe_ctx[i].stream)
- continue;
-
- pipes[pipe_cnt].clks_cfg.refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0;
- pipes[pipe_cnt].clks_cfg.dispclk_mhz = context->bw_ctx.dml.vba.RequiredDISPCLK[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
-
- if (pipe_split_from[i] < 0) {
- pipes[pipe_cnt].clks_cfg.dppclk_mhz =
- context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx];
- if (context->bw_ctx.dml.vba.BlendingAndTiming[pipe_idx] == pipe_idx)
- pipes[pipe_cnt].pipe.dest.odm_combine =
- context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx];
- else
- pipes[pipe_cnt].pipe.dest.odm_combine = 0;
- pipe_idx++;
- } else {
- pipes[pipe_cnt].clks_cfg.dppclk_mhz =
- context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_split_from[i]];
- if (context->bw_ctx.dml.vba.BlendingAndTiming[pipe_split_from[i]] == pipe_split_from[i])
- pipes[pipe_cnt].pipe.dest.odm_combine =
- context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_split_from[i]];
- else
- pipes[pipe_cnt].pipe.dest.odm_combine = 0;
- }
-
- if (dc->config.forced_clocks) {
- pipes[pipe_cnt].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz;
- pipes[pipe_cnt].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz;
- }
- if (dc->debug.min_disp_clk_khz > pipes[pipe_cnt].clks_cfg.dispclk_mhz * 1000)
- pipes[pipe_cnt].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0;
- if (dc->debug.min_dpp_clk_khz > pipes[pipe_cnt].clks_cfg.dppclk_mhz * 1000)
- pipes[pipe_cnt].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0;
-
- pipe_cnt++;
- }
-
- if (pipe_cnt != pipe_idx) {
- if (dc->res_pool->funcs->populate_dml_pipes)
- pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc,
- context, pipes, fast_validate);
- else
- pipe_cnt = dcn20_populate_dml_pipes_from_context(dc,
- context, pipes, fast_validate);
- }
-
- *out_pipe_cnt = pipe_cnt;
-
- pipes[0].clks_cfg.voltage = vlevel;
- pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].dcfclk_mhz;
- pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz;
-
- /* only pipe 0 is read for voltage and dcf/soc clocks */
- if (vlevel < 1) {
- pipes[0].clks_cfg.voltage = 1;
- pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[1].dcfclk_mhz;
- pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[1].socclk_mhz;
- }
- context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-
- if (vlevel < 2) {
- pipes[0].clks_cfg.voltage = 2;
- pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[2].dcfclk_mhz;
- pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[2].socclk_mhz;
- }
- context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-
- if (vlevel < 3) {
- pipes[0].clks_cfg.voltage = 3;
- pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[2].dcfclk_mhz;
- pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[2].socclk_mhz;
- }
- context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-
- pipes[0].clks_cfg.voltage = vlevel;
- pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].dcfclk_mhz;
- pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz;
- context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-}
-
-static bool is_dtbclk_required(struct dc *dc, struct dc_state *context)
-{
- int i;
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- if (!context->res_ctx.pipe_ctx[i].stream)
- continue;
- if (is_dp_128b_132b_signal(&context->res_ctx.pipe_ctx[i]))
- return true;
- }
- return false;
-}
-
-static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struct dc_state *context)
-{
- int plane_count;
- int i;
-
- plane_count = 0;
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- if (context->res_ctx.pipe_ctx[i].plane_state)
- plane_count++;
- }
-
- /*
- * Zstate is allowed in following scenarios:
- * 1. Single eDP with PSR enabled
- * 2. 0 planes (No memory requests)
- * 3. Single eDP without PSR but > 5ms stutter period
- */
- if (plane_count == 0)
- return DCN_ZSTATE_SUPPORT_ALLOW;
- else if (context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) {
- struct dc_link *link = context->streams[0]->sink->link;
-
- if ((link->link_index == 0 && link->psr_settings.psr_feature_enabled)
- || context->bw_ctx.dml.vba.StutterPeriod > 5000.0)
- return DCN_ZSTATE_SUPPORT_ALLOW;
- else
- return DCN_ZSTATE_SUPPORT_DISALLOW;
- } else
- return DCN_ZSTATE_SUPPORT_DISALLOW;
-}
-
-void dcn20_calculate_dlg_params(
- struct dc *dc, struct dc_state *context,
- display_e2e_pipe_params_st *pipes,
- int pipe_cnt,
- int vlevel)
-{
- int i, pipe_idx;
-
- /* Writeback MCIF_WB arbitration parameters */
- dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt);
-
- context->bw_ctx.bw.dcn.clk.dispclk_khz = context->bw_ctx.dml.vba.DISPCLK * 1000;
- context->bw_ctx.bw.dcn.clk.dcfclk_khz = context->bw_ctx.dml.vba.DCFCLK * 1000;
- context->bw_ctx.bw.dcn.clk.socclk_khz = context->bw_ctx.dml.vba.SOCCLK * 1000;
- context->bw_ctx.bw.dcn.clk.dramclk_khz = context->bw_ctx.dml.vba.DRAMSpeed * 1000 / 16;
-
- if (dc->debug.min_dram_clk_khz > context->bw_ctx.bw.dcn.clk.dramclk_khz)
- context->bw_ctx.bw.dcn.clk.dramclk_khz = dc->debug.min_dram_clk_khz;
-
- context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = context->bw_ctx.dml.vba.DCFCLKDeepSleep * 1000;
- context->bw_ctx.bw.dcn.clk.fclk_khz = context->bw_ctx.dml.vba.FabricClock * 1000;
- context->bw_ctx.bw.dcn.clk.p_state_change_support =
- context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb]
- != dm_dram_clock_change_unsupported;
- context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;
-
- context->bw_ctx.bw.dcn.clk.zstate_support = decide_zstate_support(dc, context);
-
- context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context);
-
- if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz)
- context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz;
-
- for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
- if (!context->res_ctx.pipe_ctx[i].stream)
- continue;
- pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
- pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
- pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
- pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
- context->res_ctx.pipe_ctx[i].det_buffer_size_kb = context->bw_ctx.dml.ip.det_buffer_size_kbytes;
- context->res_ctx.pipe_ctx[i].unbounded_req = pipes[pipe_idx].pipe.src.unbounded_req_mode;
-
- if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
- context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
- context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz =
- pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
- context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest;
- pipe_idx++;
- }
- /*save a original dppclock copy*/
- context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz;
- context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz;
- context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dppclk_mhz * 1000;
- context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dispclk_mhz * 1000;
-
- context->bw_ctx.bw.dcn.compbuf_size_kb = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes
- - context->bw_ctx.dml.ip.det_buffer_size_kbytes * pipe_idx;
-
- for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
- bool cstate_en = context->bw_ctx.dml.vba.PrefetchMode[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != 2;
-
- if (!context->res_ctx.pipe_ctx[i].stream)
- continue;
-
- if (dc->ctx->dce_version == DCN_VERSION_2_01)
- cstate_en = false;
-
- context->bw_ctx.dml.funcs.rq_dlg_get_dlg_reg(&context->bw_ctx.dml,
- &context->res_ctx.pipe_ctx[i].dlg_regs,
- &context->res_ctx.pipe_ctx[i].ttu_regs,
- pipes,
- pipe_cnt,
- pipe_idx,
- cstate_en,
- context->bw_ctx.bw.dcn.clk.p_state_change_support,
- false, false, true);
-
- context->bw_ctx.dml.funcs.rq_dlg_get_rq_reg(&context->bw_ctx.dml,
- &context->res_ctx.pipe_ctx[i].rq_regs,
- &pipes[pipe_idx].pipe);
- pipe_idx++;
- }
-}
-
-static bool dcn20_validate_bandwidth_internal(struct dc *dc, struct dc_state *context,
- bool fast_validate)
-{
- bool out = false;
-
- BW_VAL_TRACE_SETUP();
-
- int vlevel = 0;
- int pipe_split_from[MAX_PIPES];
- int pipe_cnt = 0;
- display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_ATOMIC);
- DC_LOGGER_INIT(dc->ctx->logger);
-
- BW_VAL_TRACE_COUNT();
-
- out = dcn20_fast_validate_bw(dc, context, pipes, &pipe_cnt, pipe_split_from, &vlevel, fast_validate);
-
- if (pipe_cnt == 0)
- goto validate_out;
-
- if (!out)
- goto validate_fail;
-
- BW_VAL_TRACE_END_VOLTAGE_LEVEL();
-
- if (fast_validate) {
- BW_VAL_TRACE_SKIP(fast);
- goto validate_out;
- }
-
- dcn20_calculate_wm(dc, context, pipes, &pipe_cnt, pipe_split_from, vlevel, fast_validate);
- dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
-
- BW_VAL_TRACE_END_WATERMARKS();
-
- goto validate_out;
-
-validate_fail:
- DC_LOG_WARNING("Mode Validation Warning: %s failed validation.\n",
- dml_get_status_message(context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states]));
-
- BW_VAL_TRACE_SKIP(fail);
- out = false;
-
-validate_out:
- kfree(pipes);
-
- BW_VAL_TRACE_FINISH();
-
- return out;
-}
-
-/*
- * This must be noinline to ensure anything that deals with FP registers
- * is contained within this call; previously our compiling with hard-float
- * would result in fp instructions being emitted outside of the boundaries
- * of the DC_FP_START/END macros, which makes sense as the compiler has no
- * idea about what is wrapped and what is not
- *
- * This is largely just a workaround to avoid breakage introduced with 5.6,
- * ideally all fp-using code should be moved into its own file, only that
- * should be compiled with hard-float, and all code exported from there
- * should be strictly wrapped with DC_FP_START/END
- */
-static noinline bool dcn20_validate_bandwidth_fp(struct dc *dc,
- struct dc_state *context, bool fast_validate)
-{
- bool voltage_supported = false;
- bool full_pstate_supported = false;
- bool dummy_pstate_supported = false;
- double p_state_latency_us;
-
- p_state_latency_us = context->bw_ctx.dml.soc.dram_clock_change_latency_us;
- context->bw_ctx.dml.soc.disable_dram_clock_change_vactive_support =
- dc->debug.disable_dram_clock_change_vactive_support;
- context->bw_ctx.dml.soc.allow_dram_clock_one_display_vactive =
- dc->debug.enable_dram_clock_change_one_display_vactive;
-
- /*Unsafe due to current pipe merge and split logic*/
- ASSERT(context != dc->current_state);
-
- if (fast_validate) {
- return dcn20_validate_bandwidth_internal(dc, context, true);
- }
-
- // Best case, we support full UCLK switch latency
- voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false);
- full_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support;
-
- if (context->bw_ctx.dml.soc.dummy_pstate_latency_us == 0 ||
- (voltage_supported && full_pstate_supported)) {
- context->bw_ctx.bw.dcn.clk.p_state_change_support = full_pstate_supported;
- goto restore_dml_state;
- }
-
- // Fallback: Try to only support G6 temperature read latency
- context->bw_ctx.dml.soc.dram_clock_change_latency_us = context->bw_ctx.dml.soc.dummy_pstate_latency_us;
-
- voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false);
- dummy_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support;
-
- if (voltage_supported && (dummy_pstate_supported || !(context->stream_count))) {
- context->bw_ctx.bw.dcn.clk.p_state_change_support = false;
- goto restore_dml_state;
- }
-
- // ERROR: fallback is supposed to always work.
- ASSERT(false);
-
-restore_dml_state:
- context->bw_ctx.dml.soc.dram_clock_change_latency_us = p_state_latency_us;
- return voltage_supported;
-}
-
-bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context,
- bool fast_validate)
-{
- bool voltage_supported;
- DC_FP_START();
- voltage_supported = dcn20_validate_bandwidth_fp(dc, context, fast_validate);
- DC_FP_END();
- return voltage_supported;
-}
-
-struct pipe_ctx *dcn20_acquire_idle_pipe_for_layer(
- struct dc_state *state,
- const struct resource_pool *pool,
- struct dc_stream_state *stream)
-{
- struct resource_context *res_ctx = &state->res_ctx;
- struct pipe_ctx *head_pipe = resource_get_head_pipe_for_stream(res_ctx, stream);
- struct pipe_ctx *idle_pipe = find_idle_secondary_pipe(res_ctx, pool, head_pipe);
-
- if (!head_pipe)
- ASSERT(0);
-
- if (!idle_pipe)
- return NULL;
-
- idle_pipe->stream = head_pipe->stream;
- idle_pipe->stream_res.tg = head_pipe->stream_res.tg;
- idle_pipe->stream_res.opp = head_pipe->stream_res.opp;
-
- idle_pipe->plane_res.hubp = pool->hubps[idle_pipe->pipe_idx];
- idle_pipe->plane_res.ipp = pool->ipps[idle_pipe->pipe_idx];
- idle_pipe->plane_res.dpp = pool->dpps[idle_pipe->pipe_idx];
- idle_pipe->plane_res.mpcc_inst = pool->dpps[idle_pipe->pipe_idx]->inst;
-
- return idle_pipe;
-}
-
-bool dcn20_get_dcc_compression_cap(const struct dc *dc,
- const struct dc_dcc_surface_param *input,
- struct dc_surface_dcc_cap *output)
-{
- return dc->res_pool->hubbub->funcs->get_dcc_compression_cap(
- dc->res_pool->hubbub,
- input,
- output);
-}
-
-static void dcn20_destroy_resource_pool(struct resource_pool **pool)
-{
- struct dcn20_resource_pool *dcn20_pool = TO_DCN20_RES_POOL(*pool);
-
- dcn20_resource_destruct(dcn20_pool);
- kfree(dcn20_pool);
- *pool = NULL;
-}
-
-
-static struct dc_cap_funcs cap_funcs = {
- .get_dcc_compression_cap = dcn20_get_dcc_compression_cap
-};
-
-
-enum dc_status dcn20_patch_unknown_plane_state(struct dc_plane_state *plane_state)
-{
- enum surface_pixel_format surf_pix_format = plane_state->format;
- unsigned int bpp = resource_pixel_format_to_bpp(surf_pix_format);
-
- enum swizzle_mode_values swizzle = DC_SW_LINEAR;
-
- if (bpp == 64)
- swizzle = DC_SW_64KB_D;
- else
- swizzle = DC_SW_64KB_S;
-
- plane_state->tiling_info.gfx9.swizzle = swizzle;
- return DC_OK;
-}
-
-static const struct resource_funcs dcn20_res_pool_funcs = {
- .destroy = dcn20_destroy_resource_pool,
- .link_enc_create = dcn20_link_encoder_create,
- .panel_cntl_create = dcn20_panel_cntl_create,
- .validate_bandwidth = dcn20_validate_bandwidth,
- .acquire_idle_pipe_for_layer = dcn20_acquire_idle_pipe_for_layer,
- .add_stream_to_ctx = dcn20_add_stream_to_ctx,
- .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource,
- .remove_stream_from_ctx = dcn20_remove_stream_from_ctx,
- .populate_dml_writeback_from_context = dcn20_populate_dml_writeback_from_context,
- .patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
- .set_mcif_arb_params = dcn20_set_mcif_arb_params,
- .populate_dml_pipes = dcn20_populate_dml_pipes_from_context,
- .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link
-};
-
-bool dcn20_dwbc_create(struct dc_context *ctx, struct resource_pool *pool)
-{
- int i;
- uint32_t pipe_count = pool->res_cap->num_dwb;
-
- for (i = 0; i < pipe_count; i++) {
- struct dcn20_dwbc *dwbc20 = kzalloc(sizeof(struct dcn20_dwbc),
- GFP_KERNEL);
-
- if (!dwbc20) {
- dm_error("DC: failed to create dwbc20!\n");
- return false;
- }
- dcn20_dwbc_construct(dwbc20, ctx,
- &dwbc20_regs[i],
- &dwbc20_shift,
- &dwbc20_mask,
- i);
- pool->dwbc[i] = &dwbc20->base;
- }
- return true;
-}
-
-bool dcn20_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool)
-{
- int i;
- uint32_t pipe_count = pool->res_cap->num_dwb;
-
- ASSERT(pipe_count > 0);
-
- for (i = 0; i < pipe_count; i++) {
- struct dcn20_mmhubbub *mcif_wb20 = kzalloc(sizeof(struct dcn20_mmhubbub),
- GFP_KERNEL);
-
- if (!mcif_wb20) {
- dm_error("DC: failed to create mcif_wb20!\n");
- return false;
- }
-
- dcn20_mmhubbub_construct(mcif_wb20, ctx,
- &mcif_wb20_regs[i],
- &mcif_wb20_shift,
- &mcif_wb20_mask,
- i);
-
- pool->mcif_wb[i] = &mcif_wb20->base;
- }
- return true;
-}
-
-static struct pp_smu_funcs *dcn20_pp_smu_create(struct dc_context *ctx)
-{
- struct pp_smu_funcs *pp_smu = kzalloc(sizeof(*pp_smu), GFP_ATOMIC);
-
- if (!pp_smu)
- return pp_smu;
-
- dm_pp_get_funcs(ctx, pp_smu);
-
- if (pp_smu->ctx.ver != PP_SMU_VER_NV)
- pp_smu = memset(pp_smu, 0, sizeof(struct pp_smu_funcs));
-
- return pp_smu;
-}
-
-static void dcn20_pp_smu_destroy(struct pp_smu_funcs **pp_smu)
-{
- if (pp_smu && *pp_smu) {
- kfree(*pp_smu);
- *pp_smu = NULL;
- }
-}
-
-void dcn20_cap_soc_clocks(
- struct _vcs_dpi_soc_bounding_box_st *bb,
- struct pp_smu_nv_clock_table max_clocks)
-{
- int i;
-
- // First pass - cap all clocks higher than the reported max
- for (i = 0; i < bb->num_states; i++) {
- if ((bb->clock_limits[i].dcfclk_mhz > (max_clocks.dcfClockInKhz / 1000))
- && max_clocks.dcfClockInKhz != 0)
- bb->clock_limits[i].dcfclk_mhz = (max_clocks.dcfClockInKhz / 1000);
-
- if ((bb->clock_limits[i].dram_speed_mts > (max_clocks.uClockInKhz / 1000) * 16)
- && max_clocks.uClockInKhz != 0)
- bb->clock_limits[i].dram_speed_mts = (max_clocks.uClockInKhz / 1000) * 16;
-
- if ((bb->clock_limits[i].fabricclk_mhz > (max_clocks.fabricClockInKhz / 1000))
- && max_clocks.fabricClockInKhz != 0)
- bb->clock_limits[i].fabricclk_mhz = (max_clocks.fabricClockInKhz / 1000);
-
- if ((bb->clock_limits[i].dispclk_mhz > (max_clocks.displayClockInKhz / 1000))
- && max_clocks.displayClockInKhz != 0)
- bb->clock_limits[i].dispclk_mhz = (max_clocks.displayClockInKhz / 1000);
-
- if ((bb->clock_limits[i].dppclk_mhz > (max_clocks.dppClockInKhz / 1000))
- && max_clocks.dppClockInKhz != 0)
- bb->clock_limits[i].dppclk_mhz = (max_clocks.dppClockInKhz / 1000);
-
- if ((bb->clock_limits[i].phyclk_mhz > (max_clocks.phyClockInKhz / 1000))
- && max_clocks.phyClockInKhz != 0)
- bb->clock_limits[i].phyclk_mhz = (max_clocks.phyClockInKhz / 1000);
-
- if ((bb->clock_limits[i].socclk_mhz > (max_clocks.socClockInKhz / 1000))
- && max_clocks.socClockInKhz != 0)
- bb->clock_limits[i].socclk_mhz = (max_clocks.socClockInKhz / 1000);
-
- if ((bb->clock_limits[i].dscclk_mhz > (max_clocks.dscClockInKhz / 1000))
- && max_clocks.dscClockInKhz != 0)
- bb->clock_limits[i].dscclk_mhz = (max_clocks.dscClockInKhz / 1000);
- }
-
- // Second pass - remove all duplicate clock states
- for (i = bb->num_states - 1; i > 1; i--) {
- bool duplicate = true;
-
- if (bb->clock_limits[i-1].dcfclk_mhz != bb->clock_limits[i].dcfclk_mhz)
- duplicate = false;
- if (bb->clock_limits[i-1].dispclk_mhz != bb->clock_limits[i].dispclk_mhz)
- duplicate = false;
- if (bb->clock_limits[i-1].dppclk_mhz != bb->clock_limits[i].dppclk_mhz)
- duplicate = false;
- if (bb->clock_limits[i-1].dram_speed_mts != bb->clock_limits[i].dram_speed_mts)
- duplicate = false;
- if (bb->clock_limits[i-1].dscclk_mhz != bb->clock_limits[i].dscclk_mhz)
- duplicate = false;
- if (bb->clock_limits[i-1].fabricclk_mhz != bb->clock_limits[i].fabricclk_mhz)
- duplicate = false;
- if (bb->clock_limits[i-1].phyclk_mhz != bb->clock_limits[i].phyclk_mhz)
- duplicate = false;
- if (bb->clock_limits[i-1].socclk_mhz != bb->clock_limits[i].socclk_mhz)
- duplicate = false;
-
- if (duplicate)
- bb->num_states--;
- }
-}
-
-void dcn20_update_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb,
- struct pp_smu_nv_clock_table *max_clocks, unsigned int *uclk_states, unsigned int num_states)
-{
- struct _vcs_dpi_voltage_scaling_st calculated_states[DC__VOLTAGE_STATES];
- int i;
- int num_calculated_states = 0;
- int min_dcfclk = 0;
-
- if (num_states == 0)
- return;
-
- memset(calculated_states, 0, sizeof(calculated_states));
-
- if (dc->bb_overrides.min_dcfclk_mhz > 0)
- min_dcfclk = dc->bb_overrides.min_dcfclk_mhz;
- else {
- if (ASICREV_IS_NAVI12_P(dc->ctx->asic_id.hw_internal_rev))
- min_dcfclk = 310;
- else
- // Accounting for SOC/DCF relationship, we can go as high as
- // 506Mhz in Vmin.
- min_dcfclk = 506;
- }
-
- for (i = 0; i < num_states; i++) {
- int min_fclk_required_by_uclk;
- calculated_states[i].state = i;
- calculated_states[i].dram_speed_mts = uclk_states[i] * 16 / 1000;
-
- // FCLK:UCLK ratio is 1.08
- min_fclk_required_by_uclk = div_u64(((unsigned long long)uclk_states[i]) * 1080,
- 1000000);
-
- calculated_states[i].fabricclk_mhz = (min_fclk_required_by_uclk < min_dcfclk) ?
- min_dcfclk : min_fclk_required_by_uclk;
-
- calculated_states[i].socclk_mhz = (calculated_states[i].fabricclk_mhz > max_clocks->socClockInKhz / 1000) ?
- max_clocks->socClockInKhz / 1000 : calculated_states[i].fabricclk_mhz;
-
- calculated_states[i].dcfclk_mhz = (calculated_states[i].fabricclk_mhz > max_clocks->dcfClockInKhz / 1000) ?
- max_clocks->dcfClockInKhz / 1000 : calculated_states[i].fabricclk_mhz;
-
- calculated_states[i].dispclk_mhz = max_clocks->displayClockInKhz / 1000;
- calculated_states[i].dppclk_mhz = max_clocks->displayClockInKhz / 1000;
- calculated_states[i].dscclk_mhz = max_clocks->displayClockInKhz / (1000 * 3);
-
- calculated_states[i].phyclk_mhz = max_clocks->phyClockInKhz / 1000;
-
- num_calculated_states++;
- }
-
- calculated_states[num_calculated_states - 1].socclk_mhz = max_clocks->socClockInKhz / 1000;
- calculated_states[num_calculated_states - 1].fabricclk_mhz = max_clocks->socClockInKhz / 1000;
- calculated_states[num_calculated_states - 1].dcfclk_mhz = max_clocks->dcfClockInKhz / 1000;
-
- memcpy(bb->clock_limits, calculated_states, sizeof(bb->clock_limits));
- bb->num_states = num_calculated_states;
-
- // Duplicate the last state, DML always an extra state identical to max state to work
- memcpy(&bb->clock_limits[num_calculated_states], &bb->clock_limits[num_calculated_states - 1], sizeof(struct _vcs_dpi_voltage_scaling_st));
- bb->clock_limits[num_calculated_states].state = bb->num_states;
-}
-
-void dcn20_patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb)
-{
- if ((int)(bb->sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns
- && dc->bb_overrides.sr_exit_time_ns) {
- bb->sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0;
- }
-
- if ((int)(bb->sr_enter_plus_exit_time_us * 1000)
- != dc->bb_overrides.sr_enter_plus_exit_time_ns
- && dc->bb_overrides.sr_enter_plus_exit_time_ns) {
- bb->sr_enter_plus_exit_time_us =
- dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0;
- }
-
- if ((int)(bb->urgent_latency_us * 1000) != dc->bb_overrides.urgent_latency_ns
- && dc->bb_overrides.urgent_latency_ns) {
- bb->urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0;
- }
-
- if ((int)(bb->dram_clock_change_latency_us * 1000)
- != dc->bb_overrides.dram_clock_change_latency_ns
- && dc->bb_overrides.dram_clock_change_latency_ns) {
- bb->dram_clock_change_latency_us =
- dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
- }
-
- if ((int)(bb->dummy_pstate_latency_us * 1000)
- != dc->bb_overrides.dummy_clock_change_latency_ns
- && dc->bb_overrides.dummy_clock_change_latency_ns) {
- bb->dummy_pstate_latency_us =
- dc->bb_overrides.dummy_clock_change_latency_ns / 1000.0;
- }
-}
-
-static struct _vcs_dpi_soc_bounding_box_st *get_asic_rev_soc_bb(
- uint32_t hw_internal_rev)
-{
- if (ASICREV_IS_NAVI14_M(hw_internal_rev))
- return &dcn2_0_nv14_soc;
-
- if (ASICREV_IS_NAVI12_P(hw_internal_rev))
- return &dcn2_0_nv12_soc;
-
- return &dcn2_0_soc;
-}
-
-static struct _vcs_dpi_ip_params_st *get_asic_rev_ip_params(
- uint32_t hw_internal_rev)
-{
- /* NV14 */
- if (ASICREV_IS_NAVI14_M(hw_internal_rev))
- return &dcn2_0_nv14_ip;
-
- /* NV12 and NV10 */
- return &dcn2_0_ip;
-}
-
-static enum dml_project get_dml_project_version(uint32_t hw_internal_rev)
-{
- return DML_PROJECT_NAVI10v2;
-}
-
-static bool init_soc_bounding_box(struct dc *dc,
- struct dcn20_resource_pool *pool)
-{
- struct _vcs_dpi_soc_bounding_box_st *loaded_bb =
- get_asic_rev_soc_bb(dc->ctx->asic_id.hw_internal_rev);
- struct _vcs_dpi_ip_params_st *loaded_ip =
- get_asic_rev_ip_params(dc->ctx->asic_id.hw_internal_rev);
-
- DC_LOGGER_INIT(dc->ctx->logger);
-
- if (pool->base.pp_smu) {
- struct pp_smu_nv_clock_table max_clocks = {0};
- unsigned int uclk_states[8] = {0};
- unsigned int num_states = 0;
- enum pp_smu_status status;
- bool clock_limits_available = false;
- bool uclk_states_available = false;
-
- if (pool->base.pp_smu->nv_funcs.get_uclk_dpm_states) {
- status = (pool->base.pp_smu->nv_funcs.get_uclk_dpm_states)
- (&pool->base.pp_smu->nv_funcs.pp_smu, uclk_states, &num_states);
-
- uclk_states_available = (status == PP_SMU_RESULT_OK);
- }
-
- if (pool->base.pp_smu->nv_funcs.get_maximum_sustainable_clocks) {
- status = (*pool->base.pp_smu->nv_funcs.get_maximum_sustainable_clocks)
- (&pool->base.pp_smu->nv_funcs.pp_smu, &max_clocks);
- /* SMU cannot set DCF clock to anything equal to or higher than SOC clock
- */
- if (max_clocks.dcfClockInKhz >= max_clocks.socClockInKhz)
- max_clocks.dcfClockInKhz = max_clocks.socClockInKhz - 1000;
- clock_limits_available = (status == PP_SMU_RESULT_OK);
- }
-
- if (clock_limits_available && uclk_states_available && num_states) {
- DC_FP_START();
- dcn20_update_bounding_box(dc, loaded_bb, &max_clocks, uclk_states, num_states);
- DC_FP_END();
- } else if (clock_limits_available) {
- DC_FP_START();
- dcn20_cap_soc_clocks(loaded_bb, max_clocks);
- DC_FP_END();
- }
- }
-
- loaded_ip->max_num_otg = pool->base.res_cap->num_timing_generator;
- loaded_ip->max_num_dpp = pool->base.pipe_count;
- DC_FP_START();
- dcn20_patch_bounding_box(dc, loaded_bb);
- DC_FP_END();
- return true;
-}
-
-static bool dcn20_resource_construct(
- uint8_t num_virtual_links,
- struct dc *dc,
- struct dcn20_resource_pool *pool)
-{
- int i;
- struct dc_context *ctx = dc->ctx;
- struct irq_service_init_data init_data;
- struct ddc_service_init_data ddc_init_data = {0};
- struct _vcs_dpi_soc_bounding_box_st *loaded_bb =
- get_asic_rev_soc_bb(ctx->asic_id.hw_internal_rev);
- struct _vcs_dpi_ip_params_st *loaded_ip =
- get_asic_rev_ip_params(ctx->asic_id.hw_internal_rev);
- enum dml_project dml_project_version =
- get_dml_project_version(ctx->asic_id.hw_internal_rev);
-
- ctx->dc_bios->regs = &bios_regs;
- pool->base.funcs = &dcn20_res_pool_funcs;
-
- if (ASICREV_IS_NAVI14_M(ctx->asic_id.hw_internal_rev)) {
- pool->base.res_cap = &res_cap_nv14;
- pool->base.pipe_count = 5;
- pool->base.mpcc_count = 5;
- } else {
- pool->base.res_cap = &res_cap_nv10;
- pool->base.pipe_count = 6;
- pool->base.mpcc_count = 6;
- }
- /*************************************************
- * Resource + asic cap harcoding *
- *************************************************/
- pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE;
-
- dc->caps.max_downscale_ratio = 200;
- dc->caps.i2c_speed_in_khz = 100;
- dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a not applied by default*/
- dc->caps.max_cursor_size = 256;
- dc->caps.min_horizontal_blanking_period = 80;
- dc->caps.dmdata_alloc_size = 2048;
-
- dc->caps.max_slave_planes = 1;
- dc->caps.max_slave_yuv_planes = 1;
- dc->caps.max_slave_rgb_planes = 1;
- dc->caps.post_blend_color_processing = true;
- dc->caps.force_dp_tps4_for_cp2520 = true;
- dc->caps.extended_aux_timeout_support = true;
-
- /* Color pipeline capabilities */
- dc->caps.color.dpp.dcn_arch = 1;
- dc->caps.color.dpp.input_lut_shared = 0;
- dc->caps.color.dpp.icsc = 1;
- dc->caps.color.dpp.dgam_ram = 1;
- dc->caps.color.dpp.dgam_rom_caps.srgb = 1;
- dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1;
- dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 0;
- dc->caps.color.dpp.dgam_rom_caps.pq = 0;
- dc->caps.color.dpp.dgam_rom_caps.hlg = 0;
- dc->caps.color.dpp.post_csc = 0;
- dc->caps.color.dpp.gamma_corr = 0;
- dc->caps.color.dpp.dgam_rom_for_yuv = 1;
-
- dc->caps.color.dpp.hw_3d_lut = 1;
- dc->caps.color.dpp.ogam_ram = 1;
- // no OGAM ROM on DCN2, only MPC ROM
- dc->caps.color.dpp.ogam_rom_caps.srgb = 0;
- dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0;
- dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0;
- dc->caps.color.dpp.ogam_rom_caps.pq = 0;
- dc->caps.color.dpp.ogam_rom_caps.hlg = 0;
- dc->caps.color.dpp.ocsc = 0;
-
- dc->caps.color.mpc.gamut_remap = 0;
- dc->caps.color.mpc.num_3dluts = 0;
- dc->caps.color.mpc.shared_3d_lut = 0;
- dc->caps.color.mpc.ogam_ram = 1;
- dc->caps.color.mpc.ogam_rom_caps.srgb = 0;
- dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0;
- dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0;
- dc->caps.color.mpc.ogam_rom_caps.pq = 0;
- dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
- dc->caps.color.mpc.ocsc = 1;
-
- if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) {
- dc->debug = debug_defaults_drv;
- } else if (dc->ctx->dce_environment == DCE_ENV_FPGA_MAXIMUS) {
- pool->base.pipe_count = 4;
- pool->base.mpcc_count = pool->base.pipe_count;
- dc->debug = debug_defaults_diags;
- } else {
- dc->debug = debug_defaults_diags;
- }
- //dcn2.0x
- dc->work_arounds.dedcn20_305_wa = true;
-
- // Init the vm_helper
- if (dc->vm_helper)
- vm_helper_init(dc->vm_helper, 16);
-
- /*************************************************
- * Create resources *
- *************************************************/
-
- pool->base.clock_sources[DCN20_CLK_SRC_PLL0] =
- dcn20_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL0,
- &clk_src_regs[0], false);
- pool->base.clock_sources[DCN20_CLK_SRC_PLL1] =
- dcn20_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL1,
- &clk_src_regs[1], false);
- pool->base.clock_sources[DCN20_CLK_SRC_PLL2] =
- dcn20_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL2,
- &clk_src_regs[2], false);
- pool->base.clock_sources[DCN20_CLK_SRC_PLL3] =
- dcn20_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL3,
- &clk_src_regs[3], false);
- pool->base.clock_sources[DCN20_CLK_SRC_PLL4] =
- dcn20_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL4,
- &clk_src_regs[4], false);
- pool->base.clock_sources[DCN20_CLK_SRC_PLL5] =
- dcn20_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL5,
- &clk_src_regs[5], false);
- pool->base.clk_src_count = DCN20_CLK_SRC_TOTAL;
- /* todo: not reuse phy_pll registers */
- pool->base.dp_clock_source =
- dcn20_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_ID_DP_DTO,
- &clk_src_regs[0], true);
-
- for (i = 0; i < pool->base.clk_src_count; i++) {
- if (pool->base.clock_sources[i] == NULL) {
- dm_error("DC: failed to create clock sources!\n");
- BREAK_TO_DEBUGGER();
- goto create_fail;
- }
- }
-
- pool->base.dccg = dccg2_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask);
- if (pool->base.dccg == NULL) {
- dm_error("DC: failed to create dccg!\n");
- BREAK_TO_DEBUGGER();
- goto create_fail;
- }
-
- pool->base.dmcu = dcn20_dmcu_create(ctx,
- &dmcu_regs,
- &dmcu_shift,
- &dmcu_mask);
- if (pool->base.dmcu == NULL) {
- dm_error("DC: failed to create dmcu!\n");
- BREAK_TO_DEBUGGER();
- goto create_fail;
- }
-
- pool->base.abm = dce_abm_create(ctx,
- &abm_regs,
- &abm_shift,
- &abm_mask);
- if (pool->base.abm == NULL) {
- dm_error("DC: failed to create abm!\n");
- BREAK_TO_DEBUGGER();
- goto create_fail;
- }
-
- pool->base.pp_smu = dcn20_pp_smu_create(ctx);
-
-
- if (!init_soc_bounding_box(dc, pool)) {
- dm_error("DC: failed to initialize soc bounding box!\n");
- BREAK_TO_DEBUGGER();
- goto create_fail;
- }
-
- dml_init_instance(&dc->dml, loaded_bb, loaded_ip, dml_project_version);
-
- if (!dc->debug.disable_pplib_wm_range) {
- struct pp_smu_wm_range_sets ranges = {0};
- int i = 0;
-
- ranges.num_reader_wm_sets = 0;
-
- if (loaded_bb->num_states == 1) {
- ranges.reader_wm_sets[0].wm_inst = i;
- ranges.reader_wm_sets[0].min_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN;
- ranges.reader_wm_sets[0].max_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX;
- ranges.reader_wm_sets[0].min_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN;
- ranges.reader_wm_sets[0].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX;
-
- ranges.num_reader_wm_sets = 1;
- } else if (loaded_bb->num_states > 1) {
- for (i = 0; i < 4 && i < loaded_bb->num_states; i++) {
- ranges.reader_wm_sets[i].wm_inst = i;
- ranges.reader_wm_sets[i].min_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN;
- ranges.reader_wm_sets[i].max_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX;
- ranges.reader_wm_sets[i].min_fill_clk_mhz = (i > 0) ? (loaded_bb->clock_limits[i - 1].dram_speed_mts / 16) + 1 : 0;
- ranges.reader_wm_sets[i].max_fill_clk_mhz = loaded_bb->clock_limits[i].dram_speed_mts / 16;
-
- ranges.num_reader_wm_sets = i + 1;
- }
-
- ranges.reader_wm_sets[0].min_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN;
- ranges.reader_wm_sets[ranges.num_reader_wm_sets - 1].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX;
- }
-
- ranges.num_writer_wm_sets = 1;
-
- ranges.writer_wm_sets[0].wm_inst = 0;
- ranges.writer_wm_sets[0].min_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN;
- ranges.writer_wm_sets[0].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX;
- ranges.writer_wm_sets[0].min_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN;
- ranges.writer_wm_sets[0].max_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX;
-
- /* Notify PP Lib/SMU which Watermarks to use for which clock ranges */
- if (pool->base.pp_smu->nv_funcs.set_wm_ranges)
- pool->base.pp_smu->nv_funcs.set_wm_ranges(&pool->base.pp_smu->nv_funcs.pp_smu, &ranges);
- }
-
- init_data.ctx = dc->ctx;
- pool->base.irqs = dal_irq_service_dcn20_create(&init_data);
- if (!pool->base.irqs)
- goto create_fail;
-
- /* mem input -> ipp -> dpp -> opp -> TG */
- for (i = 0; i < pool->base.pipe_count; i++) {
- pool->base.hubps[i] = dcn20_hubp_create(ctx, i);
- if (pool->base.hubps[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error(
- "DC: failed to create memory input!\n");
- goto create_fail;
- }
-
- pool->base.ipps[i] = dcn20_ipp_create(ctx, i);
- if (pool->base.ipps[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error(
- "DC: failed to create input pixel processor!\n");
- goto create_fail;
- }
-
- pool->base.dpps[i] = dcn20_dpp_create(ctx, i);
- if (pool->base.dpps[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error(
- "DC: failed to create dpps!\n");
- goto create_fail;
- }
- }
- for (i = 0; i < pool->base.res_cap->num_ddc; i++) {
- pool->base.engines[i] = dcn20_aux_engine_create(ctx, i);
- if (pool->base.engines[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error(
- "DC:failed to create aux engine!!\n");
- goto create_fail;
- }
- pool->base.hw_i2cs[i] = dcn20_i2c_hw_create(ctx, i);
- if (pool->base.hw_i2cs[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error(
- "DC:failed to create hw i2c!!\n");
- goto create_fail;
- }
- pool->base.sw_i2cs[i] = NULL;
- }
-
- for (i = 0; i < pool->base.res_cap->num_opp; i++) {
- pool->base.opps[i] = dcn20_opp_create(ctx, i);
- if (pool->base.opps[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error(
- "DC: failed to create output pixel processor!\n");
- goto create_fail;
- }
- }
-
- for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
- pool->base.timing_generators[i] = dcn20_timing_generator_create(
- ctx, i);
- if (pool->base.timing_generators[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create tg!\n");
- goto create_fail;
- }
- }
-
- pool->base.timing_generator_count = i;
-
- pool->base.mpc = dcn20_mpc_create(ctx);
- if (pool->base.mpc == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create mpc!\n");
- goto create_fail;
- }
-
- pool->base.hubbub = dcn20_hubbub_create(ctx);
- if (pool->base.hubbub == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create hubbub!\n");
- goto create_fail;
- }
-
- for (i = 0; i < pool->base.res_cap->num_dsc; i++) {
- pool->base.dscs[i] = dcn20_dsc_create(ctx, i);
- if (pool->base.dscs[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create display stream compressor %d!\n", i);
- goto create_fail;
- }
- }
-
- if (!dcn20_dwbc_create(ctx, &pool->base)) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create dwbc!\n");
- goto create_fail;
- }
- if (!dcn20_mmhubbub_create(ctx, &pool->base)) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create mcif_wb!\n");
- goto create_fail;
- }
-
- if (!resource_construct(num_virtual_links, dc, &pool->base,
- (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment) ?
- &res_create_funcs : &res_create_maximus_funcs)))
- goto create_fail;
-
- dcn20_hw_sequencer_construct(dc);
-
- // IF NV12, set PG function pointer to NULL. It's not that
- // PG isn't supported for NV12, it's that we don't want to
- // program the registers because that will cause more power
- // to be consumed. We could have created dcn20_init_hw to get
- // the same effect by checking ASIC rev, but there was a
- // request at some point to not check ASIC rev on hw sequencer.
- if (ASICREV_IS_NAVI12_P(dc->ctx->asic_id.hw_internal_rev)) {
- dc->hwseq->funcs.enable_power_gating_plane = NULL;
- dc->debug.disable_dpp_power_gate = true;
- dc->debug.disable_hubp_power_gate = true;
- }
-
-
- dc->caps.max_planes = pool->base.pipe_count;
-
- for (i = 0; i < dc->caps.max_planes; ++i)
- dc->caps.planes[i] = plane_cap;
-
- dc->cap_funcs = cap_funcs;
-
- if (dc->ctx->dc_bios->fw_info.oem_i2c_present) {
- ddc_init_data.ctx = dc->ctx;
- ddc_init_data.link = NULL;
- ddc_init_data.id.id = dc->ctx->dc_bios->fw_info.oem_i2c_obj_id;
- ddc_init_data.id.enum_id = 0;
- ddc_init_data.id.type = OBJECT_TYPE_GENERIC;
- pool->base.oem_device = dal_ddc_service_create(&ddc_init_data);
- } else {
- pool->base.oem_device = NULL;
- }
-
- return true;
-
-create_fail:
-
- dcn20_resource_destruct(pool);
-
- return false;
-}
-
-struct resource_pool *dcn20_create_resource_pool(
- const struct dc_init_data *init_data,
- struct dc *dc)
-{
- struct dcn20_resource_pool *pool =
- kzalloc(sizeof(struct dcn20_resource_pool), GFP_ATOMIC);
-
- if (!pool)
- return NULL;
-
- if (dcn20_resource_construct(init_data->num_virtual_links, dc, pool))
- return &pool->base;
-
- BREAK_TO_DEBUGGER();
- kfree(pool);
- return NULL;
-}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.c
index 96c263223315..5bc3bc60a2ac 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.c
@@ -38,6 +38,9 @@
#define FN(reg_name, field_name) \
vmid->shifts->field_name, vmid->masks->field_name
+#define DC_LOGGER \
+ CTX->logger
+
static void dcn20_wait_for_vmid_ready(struct dcn20_vmid *vmid)
{
/* According the hardware spec, we need to poll for the lowest
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.h
index f1ef46e8da5b..e7a1b7fa2cce 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.h
@@ -28,12 +28,6 @@
#include "vmid.h"
-#define BASE_INNER(seg) \
- DCE_BASE__INST0_SEG ## seg
-
-#define BASE(seg) \
- BASE_INNER(seg)
-
#define DCN20_VMID_REG_LIST(id)\
SRI(CNTL, DCN_VM_CONTEXT, id),\
SRI(PAGE_TABLE_BASE_ADDR_HI32, DCN_VM_CONTEXT, id),\
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile
index f68038ceb1b1..c9f4a5a9f522 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile
@@ -1,36 +1,9 @@
# SPDX-License-Identifier: MIT
-#
-# Makefile for DCN.
-DCN201 = dcn201_init.o dcn201_resource.o dcn201_hwseq.o \
- dcn201_hubbub.o\
- dcn201_mpc.o dcn201_hubp.o dcn201_opp.o dcn201_optc.o dcn201_dpp.o \
- dcn201_dccg.o dcn201_link_encoder.o
+# Copyright © 2021-2024 Advanced Micro Devices, Inc. All rights reserved.
-ifdef CONFIG_X86
-CFLAGS_$(AMDDALPATH)/dc/dcn201/dcn201_resource.o := -mhard-float -msse
-endif
+DCN201 = dcn201_mpc.o dcn201_opp.o \
+ dcn201_link_encoder.o
-ifdef CONFIG_PPC64
-CFLAGS_$(AMDDALPATH)/dc/dcn201/dcn201_resource.o := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
-IS_OLD_GCC = 1
-endif
-CFLAGS_$(AMDDALPATH)/dc/dcn201/dcn201_resource.o += -mhard-float
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-CFLAGS_$(AMDDALPATH)/dc/dcn201/dcn201_resource.o += -mpreferred-stack-boundary=4
-else
-CFLAGS_$(AMDDALPATH)/dc/dcn201/dcn201_resource.o += -msse2
-endif
-endif
AMD_DAL_DCN201 = $(addprefix $(AMDDALPATH)/dc/dcn201/,$(DCN201))
AMD_DISPLAY_FILES += $(AMD_DAL_DCN201)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_link_encoder.c
index a65e8f7801db..8d31fa131cd6 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_link_encoder.c
@@ -29,7 +29,6 @@
#include "link_encoder.h"
#include "dcn201_link_encoder.h"
#include "stream_encoder.h"
-#include "i2caux_interface.h"
#include "dc_bios_types.h"
#include "gpio_service_interface.h"
@@ -50,8 +49,8 @@
#define IND_REG(index) \
(enc10->link_regs->index)
-void dcn201_link_encoder_get_max_link_cap(struct link_encoder *enc,
- struct dc_link_settings *link_settings)
+static void dcn201_link_encoder_get_max_link_cap(struct link_encoder *enc,
+ struct dc_link_settings *link_settings)
{
uint32_t value1, value2;
struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
@@ -66,7 +65,7 @@ void dcn201_link_encoder_get_max_link_cap(struct link_encoder *enc,
}
}
-bool dcn201_link_encoder_is_in_alt_mode(struct link_encoder *enc)
+static bool dcn201_link_encoder_is_in_alt_mode(struct link_encoder *enc)
{
uint32_t value;
struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_link_encoder.h
index 8b95ef251332..be25e8dc0636 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_link_encoder.h
@@ -30,6 +30,10 @@
#define DPCS_DCN201_MASK_SH_LIST(mask_sh)\
DPCS_MASK_SH_LIST(mask_sh),\
+ LE_SF(DPCSSYS_CR0_RAWLANE0_DIG_PCS_XF_RX_OVRD_IN_2, VCO_LD_VAL_OVRD, mask_sh),\
+ LE_SF(DPCSSYS_CR0_RAWLANE0_DIG_PCS_XF_RX_OVRD_IN_2, VCO_LD_VAL_OVRD_EN, mask_sh),\
+ LE_SF(DPCSSYS_CR0_RAWLANE0_DIG_PCS_XF_RX_OVRD_IN_3, REF_LD_VAL_OVRD, mask_sh),\
+ LE_SF(DPCSSYS_CR0_RAWLANE0_DIG_PCS_XF_RX_OVRD_IN_3, REF_LD_VAL_OVRD_EN, mask_sh),\
LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL2, RDPCS_PHY_DPALT_DISABLE_ACK, mask_sh),\
LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL2, RDPCS_PHY_DPALT_DISABLE, mask_sh),\
LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL2, RDPCS_PHY_DPALT_DP4, mask_sh),\
@@ -44,7 +48,15 @@
LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL11, RDPCS_PHY_DP_REF_CLK_EN, mask_sh)
#define DPCS_DCN201_REG_LIST(id) \
- DPCS_DCN2_CMN_REG_LIST(id)
+ DPCS_DCN2_CMN_REG_LIST(id), \
+ SRI_IX(RAWLANE0_DIG_PCS_XF_RX_OVRD_IN_2, DPCSSYS_CR, id), \
+ SRI_IX(RAWLANE0_DIG_PCS_XF_RX_OVRD_IN_3, DPCSSYS_CR, id), \
+ SRI_IX(RAWLANE1_DIG_PCS_XF_RX_OVRD_IN_2, DPCSSYS_CR, id), \
+ SRI_IX(RAWLANE1_DIG_PCS_XF_RX_OVRD_IN_3, DPCSSYS_CR, id), \
+ SRI_IX(RAWLANE2_DIG_PCS_XF_RX_OVRD_IN_2, DPCSSYS_CR, id), \
+ SRI_IX(RAWLANE2_DIG_PCS_XF_RX_OVRD_IN_3, DPCSSYS_CR, id), \
+ SRI_IX(RAWLANE3_DIG_PCS_XF_RX_OVRD_IN_2, DPCSSYS_CR, id), \
+ SRI_IX(RAWLANE3_DIG_PCS_XF_RX_OVRD_IN_3, DPCSSYS_CR, id)
void dcn201_link_encoder_construct(
struct dcn20_link_encoder *enc20,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_mpc.c
index 95c4c55f067c..1af03a86ec9b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_mpc.c
@@ -76,7 +76,7 @@ static void mpc201_init_mpcc(struct mpcc *mpcc, int mpcc_inst)
mpcc->shared_bottom = false;
}
-const struct mpc_funcs dcn201_mpc_funcs = {
+static const struct mpc_funcs dcn201_mpc_funcs = {
.read_mpcc_state = mpc1_read_mpcc_state,
.insert_plane = mpc1_insert_plane,
.remove_mpcc = mpc1_remove_mpcc,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_opp.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_opp.c
index 8e77db46a409..e83367a9b6b3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_opp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_opp.c
@@ -50,9 +50,11 @@ static struct opp_funcs dcn201_opp_funcs = {
.opp_set_disp_pattern_generator = opp2_set_disp_pattern_generator,
.opp_program_dpg_dimensions = opp2_program_dpg_dimensions,
.dpg_is_blanked = opp2_dpg_is_blanked,
+ .dpg_is_pending = opp2_dpg_is_pending,
.opp_dpg_set_blank_color = opp2_dpg_set_blank_color,
.opp_destroy = opp1_destroy,
.opp_program_left_edge_extra_pixel = opp2_program_left_edge_extra_pixel,
+ .opp_get_left_edge_extra_pixel_count = opp2_get_left_edge_extra_pixel_count,
};
void dcn201_opp_construct(struct dcn201_opp *oppn201,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_opp.h b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_opp.h
index aca389ec1779..edb7f9653cb6 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_opp.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_opp.h
@@ -42,8 +42,7 @@
OPP_MASK_SH_LIST_DCN20(mask_sh)
#define OPP_DCN201_REG_FIELD_LIST(type) \
- OPP_DCN20_REG_FIELD_LIST(type);
-
+ OPP_DCN20_REG_FIELD_LIST(type)
struct dcn201_opp_shift {
OPP_DCN201_REG_FIELD_LIST(uint8_t);
};
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
index bb8c95141082..c215f3cc6e44 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
@@ -1,34 +1,7 @@
# SPDX-License-Identifier: MIT
-#
-# Makefile for DCN21.
+# Copyright © 2019-2024 Advanced Micro Devices, Inc. All rights reserved.
-DCN21 = dcn21_init.o dcn21_hubp.o dcn21_hubbub.o dcn21_resource.o \
- dcn21_hwseq.o dcn21_link_encoder.o dcn21_dccg.o
-
-ifdef CONFIG_X86
-CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse
-endif
-
-ifdef CONFIG_PPC64
-CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
-IS_OLD_GCC = 1
-endif
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -mpreferred-stack-boundary=4
-else
-CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -msse2
-endif
-endif
+DCN21 = dcn21_link_encoder.o
AMD_DAL_DCN21 = $(addprefix $(AMDDALPATH)/dc/dcn21/,$(DCN21))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c
deleted file mode 100644
index 69cc192a7e71..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c
+++ /dev/null
@@ -1,246 +0,0 @@
-/*
- * Copyright 2016 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#include "dm_services.h"
-#include "dm_helpers.h"
-#include "core_types.h"
-#include "resource.h"
-#include "dce/dce_hwseq.h"
-#include "dce110/dce110_hw_sequencer.h"
-#include "dcn21_hwseq.h"
-#include "vmid.h"
-#include "reg_helper.h"
-#include "hw/clk_mgr.h"
-#include "dc_dmub_srv.h"
-#include "abm.h"
-
-
-#define DC_LOGGER_INIT(logger)
-
-#define CTX \
- hws->ctx
-#define REG(reg)\
- hws->regs->reg
-
-#undef FN
-#define FN(reg_name, field_name) \
- hws->shifts->field_name, hws->masks->field_name
-
-/* Temporary read settings, future will get values from kmd directly */
-static void mmhub_update_page_table_config(struct dcn_hubbub_phys_addr_config *config,
- struct dce_hwseq *hws)
-{
- uint32_t page_table_base_hi;
- uint32_t page_table_base_lo;
-
- REG_GET(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
- PAGE_DIRECTORY_ENTRY_HI32, &page_table_base_hi);
- REG_GET(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
- PAGE_DIRECTORY_ENTRY_LO32, &page_table_base_lo);
-
- config->gart_config.page_table_base_addr = ((uint64_t)page_table_base_hi << 32) | page_table_base_lo;
-
-}
-
-int dcn21_init_sys_ctx(struct dce_hwseq *hws, struct dc *dc, struct dc_phy_addr_space_config *pa_config)
-{
- struct dcn_hubbub_phys_addr_config config;
-
- config.system_aperture.fb_top = pa_config->system_aperture.fb_top;
- config.system_aperture.fb_offset = pa_config->system_aperture.fb_offset;
- config.system_aperture.fb_base = pa_config->system_aperture.fb_base;
- config.system_aperture.agp_top = pa_config->system_aperture.agp_top;
- config.system_aperture.agp_bot = pa_config->system_aperture.agp_bot;
- config.system_aperture.agp_base = pa_config->system_aperture.agp_base;
- config.gart_config.page_table_start_addr = pa_config->gart_config.page_table_start_addr;
- config.gart_config.page_table_end_addr = pa_config->gart_config.page_table_end_addr;
- config.gart_config.page_table_base_addr = pa_config->gart_config.page_table_base_addr;
-
- mmhub_update_page_table_config(&config, hws);
-
- return dc->res_pool->hubbub->funcs->init_dchub_sys_ctx(dc->res_pool->hubbub, &config);
-}
-
-// work around for Renoir s0i3, if register is programmed, bypass golden init.
-
-bool dcn21_s0i3_golden_init_wa(struct dc *dc)
-{
- struct dce_hwseq *hws = dc->hwseq;
- uint32_t value = 0;
-
- value = REG_READ(MICROSECOND_TIME_BASE_DIV);
-
- return value != 0x00120464;
-}
-
-void dcn21_exit_optimized_pwr_state(
- const struct dc *dc,
- struct dc_state *context)
-{
- dc->clk_mgr->funcs->update_clocks(
- dc->clk_mgr,
- context,
- false);
-}
-
-void dcn21_optimize_pwr_state(
- const struct dc *dc,
- struct dc_state *context)
-{
- dc->clk_mgr->funcs->update_clocks(
- dc->clk_mgr,
- context,
- true);
-}
-
-/* If user hotplug a HDMI monitor while in monitor off,
- * OS will do a mode set (with output timing) but keep output off.
- * In this case DAL will ask vbios to power up the pll in the PHY.
- * If user unplug the monitor (while we are on monitor off) or
- * system attempt to enter modern standby (which we will disable PLL),
- * PHY will hang on the next mode set attempt.
- * if enable PLL follow by disable PLL (without executing lane enable/disable),
- * RDPCS_PHY_DP_MPLLB_STATE remains 1,
- * which indicate that PLL disable attempt actually didn't go through.
- * As a workaround, insert PHY lane enable/disable before PLL disable.
- */
-void dcn21_PLAT_58856_wa(struct dc_state *context, struct pipe_ctx *pipe_ctx)
-{
- if (!pipe_ctx->stream->dpms_off)
- return;
-
- pipe_ctx->stream->dpms_off = false;
- core_link_enable_stream(context, pipe_ctx);
- core_link_disable_stream(pipe_ctx);
- pipe_ctx->stream->dpms_off = true;
-}
-
-static bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst)
-{
- union dmub_rb_cmd cmd;
- struct dc_context *dc = abm->ctx;
- uint32_t ramping_boundary = 0xFFFF;
-
- memset(&cmd, 0, sizeof(cmd));
- cmd.abm_set_pipe.header.type = DMUB_CMD__ABM;
- cmd.abm_set_pipe.header.sub_type = DMUB_CMD__ABM_SET_PIPE;
- cmd.abm_set_pipe.abm_set_pipe_data.otg_inst = otg_inst;
- cmd.abm_set_pipe.abm_set_pipe_data.set_pipe_option = option;
- cmd.abm_set_pipe.abm_set_pipe_data.panel_inst = panel_inst;
- cmd.abm_set_pipe.abm_set_pipe_data.ramping_boundary = ramping_boundary;
- cmd.abm_set_pipe.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pipe_data);
-
- dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd);
- dc_dmub_srv_cmd_execute(dc->dmub_srv);
- dc_dmub_srv_wait_idle(dc->dmub_srv);
-
- return true;
-}
-
-void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx)
-{
- struct abm *abm = pipe_ctx->stream_res.abm;
- uint32_t otg_inst = pipe_ctx->stream_res.tg->inst;
- struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl;
-
- struct dmcu *dmcu = pipe_ctx->stream->ctx->dc->res_pool->dmcu;
-
- if (dmcu) {
- dce110_set_abm_immediate_disable(pipe_ctx);
- return;
- }
-
- if (abm && panel_cntl) {
- dmub_abm_set_pipe(abm, otg_inst, SET_ABM_PIPE_IMMEDIATELY_DISABLE,
- panel_cntl->inst);
- panel_cntl->funcs->store_backlight_level(panel_cntl);
- }
-}
-
-void dcn21_set_pipe(struct pipe_ctx *pipe_ctx)
-{
- struct abm *abm = pipe_ctx->stream_res.abm;
- uint32_t otg_inst = pipe_ctx->stream_res.tg->inst;
- struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl;
- struct dmcu *dmcu = pipe_ctx->stream->ctx->dc->res_pool->dmcu;
-
- if (dmcu) {
- dce110_set_pipe(pipe_ctx);
- return;
- }
-
- if (abm && panel_cntl)
- dmub_abm_set_pipe(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst);
-}
-
-bool dcn21_set_backlight_level(struct pipe_ctx *pipe_ctx,
- uint32_t backlight_pwm_u16_16,
- uint32_t frame_ramp)
-{
- union dmub_rb_cmd cmd;
- struct dc_context *dc = pipe_ctx->stream->ctx;
- struct abm *abm = pipe_ctx->stream_res.abm;
- uint32_t otg_inst = pipe_ctx->stream_res.tg->inst;
- struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl;
-
- if (dc->dc->res_pool->dmcu) {
- dce110_set_backlight_level(pipe_ctx, backlight_pwm_u16_16, frame_ramp);
- return true;
- }
-
- if (abm && panel_cntl)
- dmub_abm_set_pipe(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst);
-
- memset(&cmd, 0, sizeof(cmd));
- cmd.abm_set_backlight.header.type = DMUB_CMD__ABM;
- cmd.abm_set_backlight.header.sub_type = DMUB_CMD__ABM_SET_BACKLIGHT;
- cmd.abm_set_backlight.abm_set_backlight_data.frame_ramp = frame_ramp;
- cmd.abm_set_backlight.abm_set_backlight_data.backlight_user_level = backlight_pwm_u16_16;
- cmd.abm_set_backlight.abm_set_backlight_data.version = DMUB_CMD_ABM_CONTROL_VERSION_1;
- cmd.abm_set_backlight.abm_set_backlight_data.panel_mask = (0x01 << panel_cntl->inst);
- cmd.abm_set_backlight.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_backlight_data);
-
- dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd);
- dc_dmub_srv_cmd_execute(dc->dmub_srv);
- dc_dmub_srv_wait_idle(dc->dmub_srv);
-
- return true;
-}
-
-bool dcn21_is_abm_supported(struct dc *dc,
- struct dc_state *context, struct dc_stream_state *stream)
-{
- int i;
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-
- if (pipe_ctx->stream == stream &&
- (pipe_ctx->prev_odm_pipe == NULL && pipe_ctx->next_odm_pipe == NULL))
- return true;
- }
- return false;
-}
-
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_link_encoder.c
index aa46c35b05a2..eb9abb9f9698 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_link_encoder.c
@@ -31,7 +31,6 @@
#include "dcn21_link_encoder.h"
#include "stream_encoder.h"
-#include "i2caux_interface.h"
#include "dc_bios_types.h"
#include "gpio_service_interface.h"
@@ -203,7 +202,7 @@ static bool update_cfg_data(
return true;
}
-bool dcn21_link_encoder_acquire_phy(struct link_encoder *enc)
+static bool dcn21_link_encoder_acquire_phy(struct link_encoder *enc)
{
struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
int value;
@@ -277,7 +276,7 @@ void dcn21_link_encoder_enable_dp_output(
}
-void dcn21_link_encoder_enable_dp_mst_output(
+static void dcn21_link_encoder_enable_dp_mst_output(
struct link_encoder *enc,
const struct dc_link_settings *link_settings,
enum clock_source_id clock_source)
@@ -288,9 +287,8 @@ void dcn21_link_encoder_enable_dp_mst_output(
dcn10_link_encoder_enable_dp_mst_output(enc, link_settings, clock_source);
}
-void dcn21_link_encoder_disable_output(
- struct link_encoder *enc,
- enum signal_type signal)
+static void dcn21_link_encoder_disable_output(struct link_encoder *enc,
+ enum signal_type signal)
{
dcn10_link_encoder_disable_output(enc, signal);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
deleted file mode 100644
index d452a0d1777e..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
+++ /dev/null
@@ -1,2289 +0,0 @@
-/*
-* Copyright 2018 Advanced Micro Devices, Inc.
- * Copyright 2019 Raptor Engineering, LLC
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#include <linux/slab.h>
-
-#include "dm_services.h"
-#include "dc.h"
-
-#include "dcn21_init.h"
-
-#include "resource.h"
-#include "include/irq_service_interface.h"
-#include "dcn20/dcn20_resource.h"
-
-#include "dml/dcn20/dcn20_fpu.h"
-
-#include "clk_mgr.h"
-#include "dcn10/dcn10_hubp.h"
-#include "dcn10/dcn10_ipp.h"
-#include "dcn20/dcn20_hubbub.h"
-#include "dcn20/dcn20_mpc.h"
-#include "dcn20/dcn20_hubp.h"
-#include "dcn21_hubp.h"
-#include "irq/dcn21/irq_service_dcn21.h"
-#include "dcn20/dcn20_dpp.h"
-#include "dcn20/dcn20_optc.h"
-#include "dcn21/dcn21_hwseq.h"
-#include "dce110/dce110_hw_sequencer.h"
-#include "dcn20/dcn20_opp.h"
-#include "dcn20/dcn20_dsc.h"
-#include "dcn21/dcn21_link_encoder.h"
-#include "dcn20/dcn20_stream_encoder.h"
-#include "dce/dce_clock_source.h"
-#include "dce/dce_audio.h"
-#include "dce/dce_hwseq.h"
-#include "virtual/virtual_stream_encoder.h"
-#include "dml/display_mode_vba.h"
-#include "dcn20/dcn20_dccg.h"
-#include "dcn21/dcn21_dccg.h"
-#include "dcn21_hubbub.h"
-#include "dcn10/dcn10_resource.h"
-#include "dce/dce_panel_cntl.h"
-
-#include "dcn20/dcn20_dwb.h"
-#include "dcn20/dcn20_mmhubbub.h"
-#include "dpcs/dpcs_2_1_0_offset.h"
-#include "dpcs/dpcs_2_1_0_sh_mask.h"
-
-#include "renoir_ip_offset.h"
-#include "dcn/dcn_2_1_0_offset.h"
-#include "dcn/dcn_2_1_0_sh_mask.h"
-
-#include "nbio/nbio_7_0_offset.h"
-
-#include "mmhub/mmhub_2_0_0_offset.h"
-#include "mmhub/mmhub_2_0_0_sh_mask.h"
-
-#include "reg_helper.h"
-#include "dce/dce_abm.h"
-#include "dce/dce_dmcu.h"
-#include "dce/dce_aux.h"
-#include "dce/dce_i2c.h"
-#include "dcn21_resource.h"
-#include "vm_helper.h"
-#include "dcn20/dcn20_vmid.h"
-#include "dce/dmub_psr.h"
-#include "dce/dmub_abm.h"
-
-#define DC_LOGGER_INIT(logger)
-
-
-struct _vcs_dpi_ip_params_st dcn2_1_ip = {
- .odm_capable = 1,
- .gpuvm_enable = 1,
- .hostvm_enable = 1,
- .gpuvm_max_page_table_levels = 1,
- .hostvm_max_page_table_levels = 4,
- .hostvm_cached_page_table_levels = 2,
- .num_dsc = 3,
- .rob_buffer_size_kbytes = 168,
- .det_buffer_size_kbytes = 164,
- .dpte_buffer_size_in_pte_reqs_luma = 44,
- .dpte_buffer_size_in_pte_reqs_chroma = 42,//todo
- .dpp_output_buffer_pixels = 2560,
- .opp_output_buffer_lines = 1,
- .pixel_chunk_size_kbytes = 8,
- .pte_enable = 1,
- .max_page_table_levels = 4,
- .pte_chunk_size_kbytes = 2,
- .meta_chunk_size_kbytes = 2,
- .min_meta_chunk_size_bytes = 256,
- .writeback_chunk_size_kbytes = 2,
- .line_buffer_size_bits = 789504,
- .is_line_buffer_bpp_fixed = 0,
- .line_buffer_fixed_bpp = 0,
- .dcc_supported = true,
- .max_line_buffer_lines = 12,
- .writeback_luma_buffer_size_kbytes = 12,
- .writeback_chroma_buffer_size_kbytes = 8,
- .writeback_chroma_line_buffer_width_pixels = 4,
- .writeback_max_hscl_ratio = 1,
- .writeback_max_vscl_ratio = 1,
- .writeback_min_hscl_ratio = 1,
- .writeback_min_vscl_ratio = 1,
- .writeback_max_hscl_taps = 12,
- .writeback_max_vscl_taps = 12,
- .writeback_line_buffer_luma_buffer_size = 0,
- .writeback_line_buffer_chroma_buffer_size = 14643,
- .cursor_buffer_size = 8,
- .cursor_chunk_size = 2,
- .max_num_otg = 4,
- .max_num_dpp = 4,
- .max_num_wb = 1,
- .max_dchub_pscl_bw_pix_per_clk = 4,
- .max_pscl_lb_bw_pix_per_clk = 2,
- .max_lb_vscl_bw_pix_per_clk = 4,
- .max_vscl_hscl_bw_pix_per_clk = 4,
- .max_hscl_ratio = 4,
- .max_vscl_ratio = 4,
- .hscl_mults = 4,
- .vscl_mults = 4,
- .max_hscl_taps = 8,
- .max_vscl_taps = 8,
- .dispclk_ramp_margin_percent = 1,
- .underscan_factor = 1.10,
- .min_vblank_lines = 32, //
- .dppclk_delay_subtotal = 77, //
- .dppclk_delay_scl_lb_only = 16,
- .dppclk_delay_scl = 50,
- .dppclk_delay_cnvc_formatter = 8,
- .dppclk_delay_cnvc_cursor = 6,
- .dispclk_delay_subtotal = 87, //
- .dcfclk_cstate_latency = 10, // SRExitTime
- .max_inter_dcn_tile_repeaters = 8,
-
- .xfc_supported = false,
- .xfc_fill_bw_overhead_percent = 10.0,
- .xfc_fill_constant_bytes = 0,
- .ptoi_supported = 0,
- .number_of_cursors = 1,
-};
-
-struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = {
- .clock_limits = {
- {
- .state = 0,
- .dcfclk_mhz = 400.0,
- .fabricclk_mhz = 400.0,
- .dispclk_mhz = 600.0,
- .dppclk_mhz = 400.00,
- .phyclk_mhz = 600.0,
- .socclk_mhz = 278.0,
- .dscclk_mhz = 205.67,
- .dram_speed_mts = 1600.0,
- },
- {
- .state = 1,
- .dcfclk_mhz = 464.52,
- .fabricclk_mhz = 800.0,
- .dispclk_mhz = 654.55,
- .dppclk_mhz = 626.09,
- .phyclk_mhz = 600.0,
- .socclk_mhz = 278.0,
- .dscclk_mhz = 205.67,
- .dram_speed_mts = 1600.0,
- },
- {
- .state = 2,
- .dcfclk_mhz = 514.29,
- .fabricclk_mhz = 933.0,
- .dispclk_mhz = 757.89,
- .dppclk_mhz = 685.71,
- .phyclk_mhz = 600.0,
- .socclk_mhz = 278.0,
- .dscclk_mhz = 287.67,
- .dram_speed_mts = 1866.0,
- },
- {
- .state = 3,
- .dcfclk_mhz = 576.00,
- .fabricclk_mhz = 1067.0,
- .dispclk_mhz = 847.06,
- .dppclk_mhz = 757.89,
- .phyclk_mhz = 600.0,
- .socclk_mhz = 715.0,
- .dscclk_mhz = 318.334,
- .dram_speed_mts = 2134.0,
- },
- {
- .state = 4,
- .dcfclk_mhz = 626.09,
- .fabricclk_mhz = 1200.0,
- .dispclk_mhz = 900.00,
- .dppclk_mhz = 847.06,
- .phyclk_mhz = 810.0,
- .socclk_mhz = 953.0,
- .dscclk_mhz = 489.0,
- .dram_speed_mts = 2400.0,
- },
- {
- .state = 5,
- .dcfclk_mhz = 685.71,
- .fabricclk_mhz = 1333.0,
- .dispclk_mhz = 1028.57,
- .dppclk_mhz = 960.00,
- .phyclk_mhz = 810.0,
- .socclk_mhz = 278.0,
- .dscclk_mhz = 287.67,
- .dram_speed_mts = 2666.0,
- },
- {
- .state = 6,
- .dcfclk_mhz = 757.89,
- .fabricclk_mhz = 1467.0,
- .dispclk_mhz = 1107.69,
- .dppclk_mhz = 1028.57,
- .phyclk_mhz = 810.0,
- .socclk_mhz = 715.0,
- .dscclk_mhz = 318.334,
- .dram_speed_mts = 3200.0,
- },
- {
- .state = 7,
- .dcfclk_mhz = 847.06,
- .fabricclk_mhz = 1600.0,
- .dispclk_mhz = 1395.0,
- .dppclk_mhz = 1285.00,
- .phyclk_mhz = 1325.0,
- .socclk_mhz = 953.0,
- .dscclk_mhz = 489.0,
- .dram_speed_mts = 4266.0,
- },
- /*Extra state, no dispclk ramping*/
- {
- .state = 8,
- .dcfclk_mhz = 847.06,
- .fabricclk_mhz = 1600.0,
- .dispclk_mhz = 1395.0,
- .dppclk_mhz = 1285.0,
- .phyclk_mhz = 1325.0,
- .socclk_mhz = 953.0,
- .dscclk_mhz = 489.0,
- .dram_speed_mts = 4266.0,
- },
-
- },
-
- .sr_exit_time_us = 12.5,
- .sr_enter_plus_exit_time_us = 17.0,
- .urgent_latency_us = 4.0,
- .urgent_latency_pixel_data_only_us = 4.0,
- .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
- .urgent_latency_vm_data_only_us = 4.0,
- .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
- .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
- .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
- .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0,
- .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 75.0,
- .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
- .max_avg_sdp_bw_use_normal_percent = 60.0,
- .max_avg_dram_bw_use_normal_percent = 100.0,
- .writeback_latency_us = 12.0,
- .max_request_size_bytes = 256,
- .dram_channel_width_bytes = 4,
- .fabric_datapath_to_dcn_data_return_bytes = 32,
- .dcn_downspread_percent = 0.5,
- .downspread_percent = 0.38,
- .dram_page_open_time_ns = 50.0,
- .dram_rw_turnaround_time_ns = 17.5,
- .dram_return_buffer_per_channel_bytes = 8192,
- .round_trip_ping_latency_dcfclk_cycles = 128,
- .urgent_out_of_order_return_per_channel_bytes = 4096,
- .channel_interleave_bytes = 256,
- .num_banks = 8,
- .num_chans = 4,
- .vmm_page_size_bytes = 4096,
- .dram_clock_change_latency_us = 23.84,
- .return_bus_width_bytes = 64,
- .dispclk_dppclk_vco_speed_mhz = 3600,
- .xfc_bus_transport_time_us = 4,
- .xfc_xbuf_latency_tolerance_us = 4,
- .use_urgent_burst_bw = 1,
- .num_states = 8
-};
-
-#ifndef MAX
-#define MAX(X, Y) ((X) > (Y) ? (X) : (Y))
-#endif
-#ifndef MIN
-#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
-#endif
-
-/* begin *********************
- * macros to expend register list macro defined in HW object header file */
-
-/* DCN */
-/* TODO awful hack. fixup dcn20_dwb.h */
-#undef BASE_INNER
-#define BASE_INNER(seg) DMU_BASE__INST0_SEG ## seg
-
-#define BASE(seg) BASE_INNER(seg)
-
-#define SR(reg_name)\
- .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \
- mm ## reg_name
-
-#define SRI(reg_name, block, id)\
- .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
- mm ## block ## id ## _ ## reg_name
-
-#define SRIR(var_name, reg_name, block, id)\
- .var_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
- mm ## block ## id ## _ ## reg_name
-
-#define SRII(reg_name, block, id)\
- .reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
- mm ## block ## id ## _ ## reg_name
-
-#define DCCG_SRII(reg_name, block, id)\
- .block ## _ ## reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
- mm ## block ## id ## _ ## reg_name
-
-#define VUPDATE_SRII(reg_name, block, id)\
- .reg_name[id] = BASE(mm ## reg_name ## _ ## block ## id ## _BASE_IDX) + \
- mm ## reg_name ## _ ## block ## id
-
-/* NBIO */
-#define NBIO_BASE_INNER(seg) \
- NBIF0_BASE__INST0_SEG ## seg
-
-#define NBIO_BASE(seg) \
- NBIO_BASE_INNER(seg)
-
-#define NBIO_SR(reg_name)\
- .reg_name = NBIO_BASE(mm ## reg_name ## _BASE_IDX) + \
- mm ## reg_name
-
-/* MMHUB */
-#define MMHUB_BASE_INNER(seg) \
- MMHUB_BASE__INST0_SEG ## seg
-
-#define MMHUB_BASE(seg) \
- MMHUB_BASE_INNER(seg)
-
-#define MMHUB_SR(reg_name)\
- .reg_name = MMHUB_BASE(mmMM ## reg_name ## _BASE_IDX) + \
- mmMM ## reg_name
-
-#define clk_src_regs(index, pllid)\
-[index] = {\
- CS_COMMON_REG_LIST_DCN2_1(index, pllid),\
-}
-
-static const struct dce110_clk_src_regs clk_src_regs[] = {
- clk_src_regs(0, A),
- clk_src_regs(1, B),
- clk_src_regs(2, C),
- clk_src_regs(3, D),
- clk_src_regs(4, E),
-};
-
-static const struct dce110_clk_src_shift cs_shift = {
- CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT)
-};
-
-static const struct dce110_clk_src_mask cs_mask = {
- CS_COMMON_MASK_SH_LIST_DCN2_0(_MASK)
-};
-
-static const struct bios_registers bios_regs = {
- NBIO_SR(BIOS_SCRATCH_3),
- NBIO_SR(BIOS_SCRATCH_6)
-};
-
-static const struct dce_dmcu_registers dmcu_regs = {
- DMCU_DCN20_REG_LIST()
-};
-
-static const struct dce_dmcu_shift dmcu_shift = {
- DMCU_MASK_SH_LIST_DCN10(__SHIFT)
-};
-
-static const struct dce_dmcu_mask dmcu_mask = {
- DMCU_MASK_SH_LIST_DCN10(_MASK)
-};
-
-static const struct dce_abm_registers abm_regs = {
- ABM_DCN20_REG_LIST()
-};
-
-static const struct dce_abm_shift abm_shift = {
- ABM_MASK_SH_LIST_DCN20(__SHIFT)
-};
-
-static const struct dce_abm_mask abm_mask = {
- ABM_MASK_SH_LIST_DCN20(_MASK)
-};
-
-#define audio_regs(id)\
-[id] = {\
- AUD_COMMON_REG_LIST(id)\
-}
-
-static const struct dce_audio_registers audio_regs[] = {
- audio_regs(0),
- audio_regs(1),
- audio_regs(2),
- audio_regs(3),
- audio_regs(4),
- audio_regs(5),
-};
-
-#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\
- SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\
- SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\
- AUD_COMMON_MASK_SH_LIST_BASE(mask_sh)
-
-static const struct dce_audio_shift audio_shift = {
- DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dce_audio_mask audio_mask = {
- DCE120_AUD_COMMON_MASK_SH_LIST(_MASK)
-};
-
-static const struct dccg_registers dccg_regs = {
- DCCG_COMMON_REG_LIST_DCN_BASE()
-};
-
-static const struct dccg_shift dccg_shift = {
- DCCG_MASK_SH_LIST_DCN2_1(__SHIFT)
-};
-
-static const struct dccg_mask dccg_mask = {
- DCCG_MASK_SH_LIST_DCN2_1(_MASK)
-};
-
-#define opp_regs(id)\
-[id] = {\
- OPP_REG_LIST_DCN20(id),\
-}
-
-static const struct dcn20_opp_registers opp_regs[] = {
- opp_regs(0),
- opp_regs(1),
- opp_regs(2),
- opp_regs(3),
- opp_regs(4),
- opp_regs(5),
-};
-
-static const struct dcn20_opp_shift opp_shift = {
- OPP_MASK_SH_LIST_DCN20(__SHIFT)
-};
-
-static const struct dcn20_opp_mask opp_mask = {
- OPP_MASK_SH_LIST_DCN20(_MASK)
-};
-
-#define tg_regs(id)\
-[id] = {TG_COMMON_REG_LIST_DCN2_0(id)}
-
-static const struct dcn_optc_registers tg_regs[] = {
- tg_regs(0),
- tg_regs(1),
- tg_regs(2),
- tg_regs(3)
-};
-
-static const struct dcn_optc_shift tg_shift = {
- TG_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT)
-};
-
-static const struct dcn_optc_mask tg_mask = {
- TG_COMMON_MASK_SH_LIST_DCN2_0(_MASK)
-};
-
-static const struct dcn20_mpc_registers mpc_regs = {
- MPC_REG_LIST_DCN2_0(0),
- MPC_REG_LIST_DCN2_0(1),
- MPC_REG_LIST_DCN2_0(2),
- MPC_REG_LIST_DCN2_0(3),
- MPC_REG_LIST_DCN2_0(4),
- MPC_REG_LIST_DCN2_0(5),
- MPC_OUT_MUX_REG_LIST_DCN2_0(0),
- MPC_OUT_MUX_REG_LIST_DCN2_0(1),
- MPC_OUT_MUX_REG_LIST_DCN2_0(2),
- MPC_OUT_MUX_REG_LIST_DCN2_0(3),
- MPC_DBG_REG_LIST_DCN2_0()
-};
-
-static const struct dcn20_mpc_shift mpc_shift = {
- MPC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT),
- MPC_DEBUG_REG_LIST_SH_DCN20
-};
-
-static const struct dcn20_mpc_mask mpc_mask = {
- MPC_COMMON_MASK_SH_LIST_DCN2_0(_MASK),
- MPC_DEBUG_REG_LIST_MASK_DCN20
-};
-
-#define hubp_regs(id)\
-[id] = {\
- HUBP_REG_LIST_DCN21(id)\
-}
-
-static const struct dcn_hubp2_registers hubp_regs[] = {
- hubp_regs(0),
- hubp_regs(1),
- hubp_regs(2),
- hubp_regs(3)
-};
-
-static const struct dcn_hubp2_shift hubp_shift = {
- HUBP_MASK_SH_LIST_DCN21(__SHIFT)
-};
-
-static const struct dcn_hubp2_mask hubp_mask = {
- HUBP_MASK_SH_LIST_DCN21(_MASK)
-};
-
-static const struct dcn_hubbub_registers hubbub_reg = {
- HUBBUB_REG_LIST_DCN21()
-};
-
-static const struct dcn_hubbub_shift hubbub_shift = {
- HUBBUB_MASK_SH_LIST_DCN21(__SHIFT)
-};
-
-static const struct dcn_hubbub_mask hubbub_mask = {
- HUBBUB_MASK_SH_LIST_DCN21(_MASK)
-};
-
-
-#define vmid_regs(id)\
-[id] = {\
- DCN20_VMID_REG_LIST(id)\
-}
-
-static const struct dcn_vmid_registers vmid_regs[] = {
- vmid_regs(0),
- vmid_regs(1),
- vmid_regs(2),
- vmid_regs(3),
- vmid_regs(4),
- vmid_regs(5),
- vmid_regs(6),
- vmid_regs(7),
- vmid_regs(8),
- vmid_regs(9),
- vmid_regs(10),
- vmid_regs(11),
- vmid_regs(12),
- vmid_regs(13),
- vmid_regs(14),
- vmid_regs(15)
-};
-
-static const struct dcn20_vmid_shift vmid_shifts = {
- DCN20_VMID_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dcn20_vmid_mask vmid_masks = {
- DCN20_VMID_MASK_SH_LIST(_MASK)
-};
-
-#define dsc_regsDCN20(id)\
-[id] = {\
- DSC_REG_LIST_DCN20(id)\
-}
-
-static const struct dcn20_dsc_registers dsc_regs[] = {
- dsc_regsDCN20(0),
- dsc_regsDCN20(1),
- dsc_regsDCN20(2),
- dsc_regsDCN20(3),
- dsc_regsDCN20(4),
- dsc_regsDCN20(5)
-};
-
-static const struct dcn20_dsc_shift dsc_shift = {
- DSC_REG_LIST_SH_MASK_DCN20(__SHIFT)
-};
-
-static const struct dcn20_dsc_mask dsc_mask = {
- DSC_REG_LIST_SH_MASK_DCN20(_MASK)
-};
-
-#define ipp_regs(id)\
-[id] = {\
- IPP_REG_LIST_DCN20(id),\
-}
-
-static const struct dcn10_ipp_registers ipp_regs[] = {
- ipp_regs(0),
- ipp_regs(1),
- ipp_regs(2),
- ipp_regs(3),
-};
-
-static const struct dcn10_ipp_shift ipp_shift = {
- IPP_MASK_SH_LIST_DCN20(__SHIFT)
-};
-
-static const struct dcn10_ipp_mask ipp_mask = {
- IPP_MASK_SH_LIST_DCN20(_MASK),
-};
-
-#define opp_regs(id)\
-[id] = {\
- OPP_REG_LIST_DCN20(id),\
-}
-
-
-#define aux_engine_regs(id)\
-[id] = {\
- AUX_COMMON_REG_LIST0(id), \
- .AUXN_IMPCAL = 0, \
- .AUXP_IMPCAL = 0, \
- .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK, \
-}
-
-static const struct dce110_aux_registers aux_engine_regs[] = {
- aux_engine_regs(0),
- aux_engine_regs(1),
- aux_engine_regs(2),
- aux_engine_regs(3),
- aux_engine_regs(4),
-};
-
-#define tf_regs(id)\
-[id] = {\
- TF_REG_LIST_DCN20(id),\
- TF_REG_LIST_DCN20_COMMON_APPEND(id),\
-}
-
-static const struct dcn2_dpp_registers tf_regs[] = {
- tf_regs(0),
- tf_regs(1),
- tf_regs(2),
- tf_regs(3),
-};
-
-static const struct dcn2_dpp_shift tf_shift = {
- TF_REG_LIST_SH_MASK_DCN20(__SHIFT),
- TF_DEBUG_REG_LIST_SH_DCN20
-};
-
-static const struct dcn2_dpp_mask tf_mask = {
- TF_REG_LIST_SH_MASK_DCN20(_MASK),
- TF_DEBUG_REG_LIST_MASK_DCN20
-};
-
-#define stream_enc_regs(id)\
-[id] = {\
- SE_DCN2_REG_LIST(id)\
-}
-
-static const struct dcn10_stream_enc_registers stream_enc_regs[] = {
- stream_enc_regs(0),
- stream_enc_regs(1),
- stream_enc_regs(2),
- stream_enc_regs(3),
- stream_enc_regs(4),
-};
-
-static const struct dce110_aux_registers_shift aux_shift = {
- DCN_AUX_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dce110_aux_registers_mask aux_mask = {
- DCN_AUX_MASK_SH_LIST(_MASK)
-};
-
-static const struct dcn10_stream_encoder_shift se_shift = {
- SE_COMMON_MASK_SH_LIST_DCN20(__SHIFT)
-};
-
-static const struct dcn10_stream_encoder_mask se_mask = {
- SE_COMMON_MASK_SH_LIST_DCN20(_MASK)
-};
-
-static void dcn21_pp_smu_destroy(struct pp_smu_funcs **pp_smu);
-
-static int dcn21_populate_dml_pipes_from_context(
- struct dc *dc,
- struct dc_state *context,
- display_e2e_pipe_params_st *pipes,
- bool fast_validate);
-
-static struct input_pixel_processor *dcn21_ipp_create(
- struct dc_context *ctx, uint32_t inst)
-{
- struct dcn10_ipp *ipp =
- kzalloc(sizeof(struct dcn10_ipp), GFP_KERNEL);
-
- if (!ipp) {
- BREAK_TO_DEBUGGER();
- return NULL;
- }
-
- dcn20_ipp_construct(ipp, ctx, inst,
- &ipp_regs[inst], &ipp_shift, &ipp_mask);
- return &ipp->base;
-}
-
-static struct dpp *dcn21_dpp_create(
- struct dc_context *ctx,
- uint32_t inst)
-{
- struct dcn20_dpp *dpp =
- kzalloc(sizeof(struct dcn20_dpp), GFP_KERNEL);
-
- if (!dpp)
- return NULL;
-
- if (dpp2_construct(dpp, ctx, inst,
- &tf_regs[inst], &tf_shift, &tf_mask))
- return &dpp->base;
-
- BREAK_TO_DEBUGGER();
- kfree(dpp);
- return NULL;
-}
-
-static struct dce_aux *dcn21_aux_engine_create(
- struct dc_context *ctx,
- uint32_t inst)
-{
- struct aux_engine_dce110 *aux_engine =
- kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL);
-
- if (!aux_engine)
- return NULL;
-
- dce110_aux_engine_construct(aux_engine, ctx, inst,
- SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD,
- &aux_engine_regs[inst],
- &aux_mask,
- &aux_shift,
- ctx->dc->caps.extended_aux_timeout_support);
-
- return &aux_engine->base;
-}
-
-#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) }
-
-static const struct dce_i2c_registers i2c_hw_regs[] = {
- i2c_inst_regs(1),
- i2c_inst_regs(2),
- i2c_inst_regs(3),
- i2c_inst_regs(4),
- i2c_inst_regs(5),
-};
-
-static const struct dce_i2c_shift i2c_shifts = {
- I2C_COMMON_MASK_SH_LIST_DCN2(__SHIFT)
-};
-
-static const struct dce_i2c_mask i2c_masks = {
- I2C_COMMON_MASK_SH_LIST_DCN2(_MASK)
-};
-
-struct dce_i2c_hw *dcn21_i2c_hw_create(
- struct dc_context *ctx,
- uint32_t inst)
-{
- struct dce_i2c_hw *dce_i2c_hw =
- kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL);
-
- if (!dce_i2c_hw)
- return NULL;
-
- dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst,
- &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks);
-
- return dce_i2c_hw;
-}
-
-static const struct resource_caps res_cap_rn = {
- .num_timing_generator = 4,
- .num_opp = 4,
- .num_video_plane = 4,
- .num_audio = 4, // 4 audio endpoints. 4 audio streams
- .num_stream_encoder = 5,
- .num_pll = 5, // maybe 3 because the last two used for USB-c
- .num_dwb = 1,
- .num_ddc = 5,
- .num_vmid = 16,
- .num_dsc = 3,
-};
-
-#ifdef DIAGS_BUILD
-static const struct resource_caps res_cap_rn_FPGA_4pipe = {
- .num_timing_generator = 4,
- .num_opp = 4,
- .num_video_plane = 4,
- .num_audio = 7,
- .num_stream_encoder = 4,
- .num_pll = 4,
- .num_dwb = 1,
- .num_ddc = 4,
- .num_dsc = 0,
-};
-
-static const struct resource_caps res_cap_rn_FPGA_2pipe_dsc = {
- .num_timing_generator = 2,
- .num_opp = 2,
- .num_video_plane = 2,
- .num_audio = 7,
- .num_stream_encoder = 2,
- .num_pll = 4,
- .num_dwb = 1,
- .num_ddc = 4,
- .num_dsc = 2,
-};
-#endif
-
-static const struct dc_plane_cap plane_cap = {
- .type = DC_PLANE_TYPE_DCN_UNIVERSAL,
- .blends_with_above = true,
- .blends_with_below = true,
- .per_pixel_alpha = true,
-
- .pixel_format_support = {
- .argb8888 = true,
- .nv12 = true,
- .fp16 = true,
- .p010 = true
- },
-
- .max_upscale_factor = {
- .argb8888 = 16000,
- .nv12 = 16000,
- .fp16 = 16000
- },
-
- .max_downscale_factor = {
- .argb8888 = 250,
- .nv12 = 250,
- .fp16 = 250
- },
- 64,
- 64
-};
-
-static const struct dc_debug_options debug_defaults_drv = {
- .disable_dmcu = false,
- .force_abm_enable = false,
- .timing_trace = false,
- .clock_trace = true,
- .disable_pplib_clock_request = true,
- .min_disp_clk_khz = 100000,
- .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP,
- .force_single_disp_pipe_split = false,
- .disable_dcc = DCC_ENABLE,
- .vsr_support = true,
- .performance_trace = false,
- .max_downscale_src_width = 4096,
- .disable_pplib_wm_range = false,
- .scl_reset_length10 = true,
- .sanity_checks = true,
- .disable_48mhz_pwrdwn = false,
- .usbc_combo_phy_reset_wa = true,
- .dmub_command_table = true,
- .use_max_lb = true,
- .optimize_edp_link_rate = true
-};
-
-static const struct dc_debug_options debug_defaults_diags = {
- .disable_dmcu = false,
- .force_abm_enable = false,
- .timing_trace = true,
- .clock_trace = true,
- .disable_dpp_power_gate = true,
- .disable_hubp_power_gate = true,
- .disable_clock_gate = true,
- .disable_pplib_clock_request = true,
- .disable_pplib_wm_range = true,
- .disable_stutter = true,
- .disable_48mhz_pwrdwn = true,
- .disable_psr = true,
- .enable_tri_buf = true,
- .use_max_lb = true
-};
-
-enum dcn20_clk_src_array_id {
- DCN20_CLK_SRC_PLL0,
- DCN20_CLK_SRC_PLL1,
- DCN20_CLK_SRC_PLL2,
- DCN20_CLK_SRC_PLL3,
- DCN20_CLK_SRC_PLL4,
- DCN20_CLK_SRC_TOTAL_DCN21
-};
-
-static void dcn21_resource_destruct(struct dcn21_resource_pool *pool)
-{
- unsigned int i;
-
- for (i = 0; i < pool->base.stream_enc_count; i++) {
- if (pool->base.stream_enc[i] != NULL) {
- kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i]));
- pool->base.stream_enc[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->base.res_cap->num_dsc; i++) {
- if (pool->base.dscs[i] != NULL)
- dcn20_dsc_destroy(&pool->base.dscs[i]);
- }
-
- if (pool->base.mpc != NULL) {
- kfree(TO_DCN20_MPC(pool->base.mpc));
- pool->base.mpc = NULL;
- }
- if (pool->base.hubbub != NULL) {
- kfree(pool->base.hubbub);
- pool->base.hubbub = NULL;
- }
- for (i = 0; i < pool->base.pipe_count; i++) {
- if (pool->base.dpps[i] != NULL)
- dcn20_dpp_destroy(&pool->base.dpps[i]);
-
- if (pool->base.ipps[i] != NULL)
- pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]);
-
- if (pool->base.hubps[i] != NULL) {
- kfree(TO_DCN20_HUBP(pool->base.hubps[i]));
- pool->base.hubps[i] = NULL;
- }
-
- if (pool->base.irqs != NULL) {
- dal_irq_service_destroy(&pool->base.irqs);
- }
- }
-
- for (i = 0; i < pool->base.res_cap->num_ddc; i++) {
- if (pool->base.engines[i] != NULL)
- dce110_engine_destroy(&pool->base.engines[i]);
- if (pool->base.hw_i2cs[i] != NULL) {
- kfree(pool->base.hw_i2cs[i]);
- pool->base.hw_i2cs[i] = NULL;
- }
- if (pool->base.sw_i2cs[i] != NULL) {
- kfree(pool->base.sw_i2cs[i]);
- pool->base.sw_i2cs[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->base.res_cap->num_opp; i++) {
- if (pool->base.opps[i] != NULL)
- pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]);
- }
-
- for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
- if (pool->base.timing_generators[i] != NULL) {
- kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i]));
- pool->base.timing_generators[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->base.res_cap->num_dwb; i++) {
- if (pool->base.dwbc[i] != NULL) {
- kfree(TO_DCN20_DWBC(pool->base.dwbc[i]));
- pool->base.dwbc[i] = NULL;
- }
- if (pool->base.mcif_wb[i] != NULL) {
- kfree(TO_DCN20_MMHUBBUB(pool->base.mcif_wb[i]));
- pool->base.mcif_wb[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->base.audio_count; i++) {
- if (pool->base.audios[i])
- dce_aud_destroy(&pool->base.audios[i]);
- }
-
- for (i = 0; i < pool->base.clk_src_count; i++) {
- if (pool->base.clock_sources[i] != NULL) {
- dcn20_clock_source_destroy(&pool->base.clock_sources[i]);
- pool->base.clock_sources[i] = NULL;
- }
- }
-
- if (pool->base.dp_clock_source != NULL) {
- dcn20_clock_source_destroy(&pool->base.dp_clock_source);
- pool->base.dp_clock_source = NULL;
- }
-
- if (pool->base.abm != NULL) {
- if (pool->base.abm->ctx->dc->config.disable_dmcu)
- dmub_abm_destroy(&pool->base.abm);
- else
- dce_abm_destroy(&pool->base.abm);
- }
-
- if (pool->base.dmcu != NULL)
- dce_dmcu_destroy(&pool->base.dmcu);
-
- if (pool->base.psr != NULL)
- dmub_psr_destroy(&pool->base.psr);
-
- if (pool->base.dccg != NULL)
- dcn_dccg_destroy(&pool->base.dccg);
-
- if (pool->base.pp_smu != NULL)
- dcn21_pp_smu_destroy(&pool->base.pp_smu);
-}
-
-
-static void calculate_wm_set_for_vlevel(
- int vlevel,
- struct wm_range_table_entry *table_entry,
- struct dcn_watermarks *wm_set,
- struct display_mode_lib *dml,
- display_e2e_pipe_params_st *pipes,
- int pipe_cnt)
-{
- double dram_clock_change_latency_cached = dml->soc.dram_clock_change_latency_us;
-
- ASSERT(vlevel < dml->soc.num_states);
- /* only pipe 0 is read for voltage and dcf/soc clocks */
- pipes[0].clks_cfg.voltage = vlevel;
- pipes[0].clks_cfg.dcfclk_mhz = dml->soc.clock_limits[vlevel].dcfclk_mhz;
- pipes[0].clks_cfg.socclk_mhz = dml->soc.clock_limits[vlevel].socclk_mhz;
-
- dml->soc.dram_clock_change_latency_us = table_entry->pstate_latency_us;
- dml->soc.sr_exit_time_us = table_entry->sr_exit_time_us;
- dml->soc.sr_enter_plus_exit_time_us = table_entry->sr_enter_plus_exit_time_us;
-
- wm_set->urgent_ns = get_wm_urgent(dml, pipes, pipe_cnt) * 1000;
- wm_set->cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(dml, pipes, pipe_cnt) * 1000;
- wm_set->cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(dml, pipes, pipe_cnt) * 1000;
- wm_set->cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(dml, pipes, pipe_cnt) * 1000;
- wm_set->pte_meta_urgent_ns = get_wm_memory_trip(dml, pipes, pipe_cnt) * 1000;
- wm_set->frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(dml, pipes, pipe_cnt) * 1000;
- wm_set->frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(dml, pipes, pipe_cnt) * 1000;
- wm_set->urgent_latency_ns = get_urgent_latency(dml, pipes, pipe_cnt) * 1000;
- dml->soc.dram_clock_change_latency_us = dram_clock_change_latency_cached;
-
-}
-
-static void patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb)
-{
- int i;
-
- if (dc->bb_overrides.sr_exit_time_ns) {
- for (i = 0; i < WM_SET_COUNT; i++) {
- dc->clk_mgr->bw_params->wm_table.entries[i].sr_exit_time_us =
- dc->bb_overrides.sr_exit_time_ns / 1000.0;
- }
- }
-
- if (dc->bb_overrides.sr_enter_plus_exit_time_ns) {
- for (i = 0; i < WM_SET_COUNT; i++) {
- dc->clk_mgr->bw_params->wm_table.entries[i].sr_enter_plus_exit_time_us =
- dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0;
- }
- }
-
- if (dc->bb_overrides.urgent_latency_ns) {
- bb->urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0;
- }
-
- if (dc->bb_overrides.dram_clock_change_latency_ns) {
- for (i = 0; i < WM_SET_COUNT; i++) {
- dc->clk_mgr->bw_params->wm_table.entries[i].pstate_latency_us =
- dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
- }
- }
-}
-
-void dcn21_calculate_wm(
- struct dc *dc, struct dc_state *context,
- display_e2e_pipe_params_st *pipes,
- int *out_pipe_cnt,
- int *pipe_split_from,
- int vlevel_req,
- bool fast_validate)
-{
- int pipe_cnt, i, pipe_idx;
- int vlevel, vlevel_max;
- struct wm_range_table_entry *table_entry;
- struct clk_bw_params *bw_params = dc->clk_mgr->bw_params;
-
- ASSERT(bw_params);
-
- patch_bounding_box(dc, &context->bw_ctx.dml.soc);
-
- for (i = 0, pipe_idx = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
- if (!context->res_ctx.pipe_ctx[i].stream)
- continue;
-
- pipes[pipe_cnt].clks_cfg.refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0;
- pipes[pipe_cnt].clks_cfg.dispclk_mhz = context->bw_ctx.dml.vba.RequiredDISPCLK[vlevel_req][context->bw_ctx.dml.vba.maxMpcComb];
-
- if (pipe_split_from[i] < 0) {
- pipes[pipe_cnt].clks_cfg.dppclk_mhz =
- context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel_req][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx];
- if (context->bw_ctx.dml.vba.BlendingAndTiming[pipe_idx] == pipe_idx)
- pipes[pipe_cnt].pipe.dest.odm_combine =
- context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel_req][pipe_idx];
- else
- pipes[pipe_cnt].pipe.dest.odm_combine = 0;
- pipe_idx++;
- } else {
- pipes[pipe_cnt].clks_cfg.dppclk_mhz =
- context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel_req][context->bw_ctx.dml.vba.maxMpcComb][pipe_split_from[i]];
- if (context->bw_ctx.dml.vba.BlendingAndTiming[pipe_split_from[i]] == pipe_split_from[i])
- pipes[pipe_cnt].pipe.dest.odm_combine =
- context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel_req][pipe_split_from[i]];
- else
- pipes[pipe_cnt].pipe.dest.odm_combine = 0;
- }
- pipe_cnt++;
- }
-
- if (pipe_cnt != pipe_idx) {
- if (dc->res_pool->funcs->populate_dml_pipes)
- pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc,
- context, pipes, fast_validate);
- else
- pipe_cnt = dcn21_populate_dml_pipes_from_context(dc,
- context, pipes, fast_validate);
- }
-
- *out_pipe_cnt = pipe_cnt;
-
- vlevel_max = bw_params->clk_table.num_entries - 1;
-
-
- /* WM Set D */
- table_entry = &bw_params->wm_table.entries[WM_D];
- if (table_entry->wm_type == WM_TYPE_RETRAINING)
- vlevel = 0;
- else
- vlevel = vlevel_max;
- calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.d,
- &context->bw_ctx.dml, pipes, pipe_cnt);
- /* WM Set C */
- table_entry = &bw_params->wm_table.entries[WM_C];
- vlevel = MIN(MAX(vlevel_req, 3), vlevel_max);
- calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.c,
- &context->bw_ctx.dml, pipes, pipe_cnt);
- /* WM Set B */
- table_entry = &bw_params->wm_table.entries[WM_B];
- vlevel = MIN(MAX(vlevel_req, 2), vlevel_max);
- calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.b,
- &context->bw_ctx.dml, pipes, pipe_cnt);
-
- /* WM Set A */
- table_entry = &bw_params->wm_table.entries[WM_A];
- vlevel = MIN(vlevel_req, vlevel_max);
- calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.a,
- &context->bw_ctx.dml, pipes, pipe_cnt);
-}
-
-
-static bool dcn21_fast_validate_bw(
- struct dc *dc,
- struct dc_state *context,
- display_e2e_pipe_params_st *pipes,
- int *pipe_cnt_out,
- int *pipe_split_from,
- int *vlevel_out,
- bool fast_validate)
-{
- bool out = false;
- int split[MAX_PIPES] = { 0 };
- int pipe_cnt, i, pipe_idx, vlevel;
-
- ASSERT(pipes);
- if (!pipes)
- return false;
-
- dcn20_merge_pipes_for_validate(dc, context);
-
- pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate);
-
- *pipe_cnt_out = pipe_cnt;
-
- if (!pipe_cnt) {
- out = true;
- goto validate_out;
- }
- /*
- * DML favors voltage over p-state, but we're more interested in
- * supporting p-state over voltage. We can't support p-state in
- * prefetch mode > 0 so try capping the prefetch mode to start.
- */
- context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank =
- dm_allow_self_refresh_and_mclk_switch;
- vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
-
- if (vlevel > context->bw_ctx.dml.soc.num_states) {
- /*
- * If mode is unsupported or there's still no p-state support then
- * fall back to favoring voltage.
- *
- * We don't actually support prefetch mode 2, so require that we
- * at least support prefetch mode 1.
- */
- context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank =
- dm_allow_self_refresh;
- vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
- if (vlevel > context->bw_ctx.dml.soc.num_states)
- goto validate_fail;
- }
-
- vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, NULL);
-
- for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
- struct pipe_ctx *mpo_pipe = pipe->bottom_pipe;
- struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
-
- if (!pipe->stream)
- continue;
-
- /* We only support full screen mpo with ODM */
- if (vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled
- && pipe->plane_state && mpo_pipe
- && memcmp(&mpo_pipe->plane_res.scl_data.recout,
- &pipe->plane_res.scl_data.recout,
- sizeof(struct rect)) != 0) {
- ASSERT(mpo_pipe->plane_state != pipe->plane_state);
- goto validate_fail;
- }
- pipe_idx++;
- }
-
- /*initialize pipe_just_split_from to invalid idx*/
- for (i = 0; i < MAX_PIPES; i++)
- pipe_split_from[i] = -1;
-
- for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
- struct pipe_ctx *hsplit_pipe = pipe->bottom_pipe;
-
- if (!pipe->stream || pipe_split_from[i] >= 0)
- continue;
-
- pipe_idx++;
-
- if (!pipe->top_pipe && !pipe->plane_state && context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx]) {
- hsplit_pipe = dcn20_find_secondary_pipe(dc, &context->res_ctx, dc->res_pool, pipe);
- ASSERT(hsplit_pipe);
- if (!dcn20_split_stream_for_odm(
- dc, &context->res_ctx,
- pipe, hsplit_pipe))
- goto validate_fail;
- pipe_split_from[hsplit_pipe->pipe_idx] = pipe_idx;
- dcn20_build_mapped_resource(dc, context, pipe->stream);
- }
-
- if (!pipe->plane_state)
- continue;
- /* Skip 2nd half of already split pipe */
- if (pipe->top_pipe && pipe->plane_state == pipe->top_pipe->plane_state)
- continue;
-
- if (split[i] == 2) {
- if (!hsplit_pipe || hsplit_pipe->plane_state != pipe->plane_state) {
- /* pipe not split previously needs split */
- hsplit_pipe = dcn20_find_secondary_pipe(dc, &context->res_ctx, dc->res_pool, pipe);
- ASSERT(hsplit_pipe);
- if (!hsplit_pipe) {
- context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx] *= 2;
- continue;
- }
- if (context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx]) {
- if (!dcn20_split_stream_for_odm(
- dc, &context->res_ctx,
- pipe, hsplit_pipe))
- goto validate_fail;
- dcn20_build_mapped_resource(dc, context, pipe->stream);
- } else {
- dcn20_split_stream_for_mpc(
- &context->res_ctx, dc->res_pool,
- pipe, hsplit_pipe);
- resource_build_scaling_params(pipe);
- resource_build_scaling_params(hsplit_pipe);
- }
- pipe_split_from[hsplit_pipe->pipe_idx] = pipe_idx;
- }
- } else if (hsplit_pipe && hsplit_pipe->plane_state == pipe->plane_state) {
- /* merge should already have been done */
- ASSERT(0);
- }
- }
- /* Actual dsc count per stream dsc validation*/
- if (!dcn20_validate_dsc(dc, context)) {
- context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states] =
- DML_FAIL_DSC_VALIDATION_FAILURE;
- goto validate_fail;
- }
-
- *vlevel_out = vlevel;
-
- out = true;
- goto validate_out;
-
-validate_fail:
- out = false;
-
-validate_out:
- return out;
-}
-
-static noinline bool dcn21_validate_bandwidth_fp(struct dc *dc,
- struct dc_state *context, bool fast_validate)
-{
- bool out = false;
-
- BW_VAL_TRACE_SETUP();
-
- int vlevel = 0;
- int pipe_split_from[MAX_PIPES];
- int pipe_cnt = 0;
- display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_ATOMIC);
- DC_LOGGER_INIT(dc->ctx->logger);
-
- BW_VAL_TRACE_COUNT();
-
- /*Unsafe due to current pipe merge and split logic*/
- ASSERT(context != dc->current_state);
-
- out = dcn21_fast_validate_bw(dc, context, pipes, &pipe_cnt, pipe_split_from, &vlevel, fast_validate);
-
- if (pipe_cnt == 0)
- goto validate_out;
-
- if (!out)
- goto validate_fail;
-
- BW_VAL_TRACE_END_VOLTAGE_LEVEL();
-
- if (fast_validate) {
- BW_VAL_TRACE_SKIP(fast);
- goto validate_out;
- }
-
- dcn21_calculate_wm(dc, context, pipes, &pipe_cnt, pipe_split_from, vlevel, fast_validate);
- dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
-
- BW_VAL_TRACE_END_WATERMARKS();
-
- goto validate_out;
-
-validate_fail:
- DC_LOG_WARNING("Mode Validation Warning: %s failed validation.\n",
- dml_get_status_message(context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states]));
-
- BW_VAL_TRACE_SKIP(fail);
- out = false;
-
-validate_out:
- kfree(pipes);
-
- BW_VAL_TRACE_FINISH();
-
- return out;
-}
-
-/*
- * Some of the functions further below use the FPU, so we need to wrap this
- * with DC_FP_START()/DC_FP_END(). Use the same approach as for
- * dcn20_validate_bandwidth in dcn20_resource.c.
- */
-bool dcn21_validate_bandwidth(struct dc *dc, struct dc_state *context,
- bool fast_validate)
-{
- bool voltage_supported;
- DC_FP_START();
- voltage_supported = dcn21_validate_bandwidth_fp(dc, context, fast_validate);
- DC_FP_END();
- return voltage_supported;
-}
-
-static void dcn21_destroy_resource_pool(struct resource_pool **pool)
-{
- struct dcn21_resource_pool *dcn21_pool = TO_DCN21_RES_POOL(*pool);
-
- dcn21_resource_destruct(dcn21_pool);
- kfree(dcn21_pool);
- *pool = NULL;
-}
-
-static struct clock_source *dcn21_clock_source_create(
- struct dc_context *ctx,
- struct dc_bios *bios,
- enum clock_source_id id,
- const struct dce110_clk_src_regs *regs,
- bool dp_clk_src)
-{
- struct dce110_clk_src *clk_src =
- kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL);
-
- if (!clk_src)
- return NULL;
-
- if (dcn20_clk_src_construct(clk_src, ctx, bios, id,
- regs, &cs_shift, &cs_mask)) {
- clk_src->base.dp_clk_src = dp_clk_src;
- return &clk_src->base;
- }
-
- BREAK_TO_DEBUGGER();
- return NULL;
-}
-
-static struct hubp *dcn21_hubp_create(
- struct dc_context *ctx,
- uint32_t inst)
-{
- struct dcn21_hubp *hubp21 =
- kzalloc(sizeof(struct dcn21_hubp), GFP_KERNEL);
-
- if (!hubp21)
- return NULL;
-
- if (hubp21_construct(hubp21, ctx, inst,
- &hubp_regs[inst], &hubp_shift, &hubp_mask))
- return &hubp21->base;
-
- BREAK_TO_DEBUGGER();
- kfree(hubp21);
- return NULL;
-}
-
-static struct hubbub *dcn21_hubbub_create(struct dc_context *ctx)
-{
- int i;
-
- struct dcn20_hubbub *hubbub = kzalloc(sizeof(struct dcn20_hubbub),
- GFP_KERNEL);
-
- if (!hubbub)
- return NULL;
-
- hubbub21_construct(hubbub, ctx,
- &hubbub_reg,
- &hubbub_shift,
- &hubbub_mask);
-
- for (i = 0; i < res_cap_rn.num_vmid; i++) {
- struct dcn20_vmid *vmid = &hubbub->vmid[i];
-
- vmid->ctx = ctx;
-
- vmid->regs = &vmid_regs[i];
- vmid->shifts = &vmid_shifts;
- vmid->masks = &vmid_masks;
- }
- hubbub->num_vmid = res_cap_rn.num_vmid;
-
- return &hubbub->base;
-}
-
-struct output_pixel_processor *dcn21_opp_create(
- struct dc_context *ctx, uint32_t inst)
-{
- struct dcn20_opp *opp =
- kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL);
-
- if (!opp) {
- BREAK_TO_DEBUGGER();
- return NULL;
- }
-
- dcn20_opp_construct(opp, ctx, inst,
- &opp_regs[inst], &opp_shift, &opp_mask);
- return &opp->base;
-}
-
-struct timing_generator *dcn21_timing_generator_create(
- struct dc_context *ctx,
- uint32_t instance)
-{
- struct optc *tgn10 =
- kzalloc(sizeof(struct optc), GFP_KERNEL);
-
- if (!tgn10)
- return NULL;
-
- tgn10->base.inst = instance;
- tgn10->base.ctx = ctx;
-
- tgn10->tg_regs = &tg_regs[instance];
- tgn10->tg_shift = &tg_shift;
- tgn10->tg_mask = &tg_mask;
-
- dcn20_timing_generator_init(tgn10);
-
- return &tgn10->base;
-}
-
-struct mpc *dcn21_mpc_create(struct dc_context *ctx)
-{
- struct dcn20_mpc *mpc20 = kzalloc(sizeof(struct dcn20_mpc),
- GFP_KERNEL);
-
- if (!mpc20)
- return NULL;
-
- dcn20_mpc_construct(mpc20, ctx,
- &mpc_regs,
- &mpc_shift,
- &mpc_mask,
- 6);
-
- return &mpc20->base;
-}
-
-static void read_dce_straps(
- struct dc_context *ctx,
- struct resource_straps *straps)
-{
- generic_reg_get(ctx, mmDC_PINSTRAPS + BASE(mmDC_PINSTRAPS_BASE_IDX),
- FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio);
-
-}
-
-
-struct display_stream_compressor *dcn21_dsc_create(
- struct dc_context *ctx, uint32_t inst)
-{
- struct dcn20_dsc *dsc =
- kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL);
-
- if (!dsc) {
- BREAK_TO_DEBUGGER();
- return NULL;
- }
-
- dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask);
- return &dsc->base;
-}
-
-static struct _vcs_dpi_voltage_scaling_st construct_low_pstate_lvl(struct clk_limit_table *clk_table, unsigned int high_voltage_lvl)
-{
- struct _vcs_dpi_voltage_scaling_st low_pstate_lvl;
- int i;
-
- low_pstate_lvl.state = 1;
- low_pstate_lvl.dcfclk_mhz = clk_table->entries[0].dcfclk_mhz;
- low_pstate_lvl.fabricclk_mhz = clk_table->entries[0].fclk_mhz;
- low_pstate_lvl.socclk_mhz = clk_table->entries[0].socclk_mhz;
- low_pstate_lvl.dram_speed_mts = clk_table->entries[0].memclk_mhz * 2;
-
- low_pstate_lvl.dispclk_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].dispclk_mhz;
- low_pstate_lvl.dppclk_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].dppclk_mhz;
- low_pstate_lvl.dram_bw_per_chan_gbps = dcn2_1_soc.clock_limits[high_voltage_lvl].dram_bw_per_chan_gbps;
- low_pstate_lvl.dscclk_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].dscclk_mhz;
- low_pstate_lvl.dtbclk_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].dtbclk_mhz;
- low_pstate_lvl.phyclk_d18_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].phyclk_d18_mhz;
- low_pstate_lvl.phyclk_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].phyclk_mhz;
-
- for (i = clk_table->num_entries; i > 1; i--)
- clk_table->entries[i] = clk_table->entries[i-1];
- clk_table->entries[1] = clk_table->entries[0];
- clk_table->num_entries++;
-
- return low_pstate_lvl;
-}
-
-static void update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
-{
- struct dcn21_resource_pool *pool = TO_DCN21_RES_POOL(dc->res_pool);
- struct clk_limit_table *clk_table = &bw_params->clk_table;
- struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES];
- unsigned int i, closest_clk_lvl = 0, k = 0;
- int j;
-
- dcn2_1_ip.max_num_otg = pool->base.res_cap->num_timing_generator;
- dcn2_1_ip.max_num_dpp = pool->base.pipe_count;
- dcn2_1_soc.num_chans = bw_params->num_channels;
-
- ASSERT(clk_table->num_entries);
- /* Copy dcn2_1_soc.clock_limits to clock_limits to avoid copying over null states later */
- for (i = 0; i < dcn2_1_soc.num_states + 1; i++) {
- clock_limits[i] = dcn2_1_soc.clock_limits[i];
- }
-
- for (i = 0; i < clk_table->num_entries; i++) {
- /* loop backwards*/
- for (closest_clk_lvl = 0, j = dcn2_1_soc.num_states - 1; j >= 0; j--) {
- if ((unsigned int) dcn2_1_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) {
- closest_clk_lvl = j;
- break;
- }
- }
-
- /* clk_table[1] is reserved for min DF PState. skip here to fill in later. */
- if (i == 1)
- k++;
-
- clock_limits[k].state = k;
- clock_limits[k].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
- clock_limits[k].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
- clock_limits[k].socclk_mhz = clk_table->entries[i].socclk_mhz;
- clock_limits[k].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2;
-
- clock_limits[k].dispclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
- clock_limits[k].dppclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
- clock_limits[k].dram_bw_per_chan_gbps = dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
- clock_limits[k].dscclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
- clock_limits[k].dtbclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
- clock_limits[k].phyclk_d18_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
- clock_limits[k].phyclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
-
- k++;
- }
- for (i = 0; i < clk_table->num_entries + 1; i++)
- dcn2_1_soc.clock_limits[i] = clock_limits[i];
- if (clk_table->num_entries) {
- dcn2_1_soc.num_states = clk_table->num_entries + 1;
- /* fill in min DF PState */
- dcn2_1_soc.clock_limits[1] = construct_low_pstate_lvl(clk_table, closest_clk_lvl);
- /* duplicate last level */
- dcn2_1_soc.clock_limits[dcn2_1_soc.num_states] = dcn2_1_soc.clock_limits[dcn2_1_soc.num_states - 1];
- dcn2_1_soc.clock_limits[dcn2_1_soc.num_states].state = dcn2_1_soc.num_states;
- }
-
- dml_init_instance(&dc->dml, &dcn2_1_soc, &dcn2_1_ip, DML_PROJECT_DCN21);
-}
-
-static struct pp_smu_funcs *dcn21_pp_smu_create(struct dc_context *ctx)
-{
- struct pp_smu_funcs *pp_smu = kzalloc(sizeof(*pp_smu), GFP_KERNEL);
-
- if (!pp_smu)
- return pp_smu;
-
- dm_pp_get_funcs(ctx, pp_smu);
-
- if (pp_smu->ctx.ver != PP_SMU_VER_RN)
- pp_smu = memset(pp_smu, 0, sizeof(struct pp_smu_funcs));
-
-
- return pp_smu;
-}
-
-static void dcn21_pp_smu_destroy(struct pp_smu_funcs **pp_smu)
-{
- if (pp_smu && *pp_smu) {
- kfree(*pp_smu);
- *pp_smu = NULL;
- }
-}
-
-static struct audio *dcn21_create_audio(
- struct dc_context *ctx, unsigned int inst)
-{
- return dce_audio_create(ctx, inst,
- &audio_regs[inst], &audio_shift, &audio_mask);
-}
-
-static struct dc_cap_funcs cap_funcs = {
- .get_dcc_compression_cap = dcn20_get_dcc_compression_cap
-};
-
-struct stream_encoder *dcn21_stream_encoder_create(
- enum engine_id eng_id,
- struct dc_context *ctx)
-{
- struct dcn10_stream_encoder *enc1 =
- kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL);
-
- if (!enc1)
- return NULL;
-
- dcn20_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id,
- &stream_enc_regs[eng_id],
- &se_shift, &se_mask);
-
- return &enc1->base;
-}
-
-static const struct dce_hwseq_registers hwseq_reg = {
- HWSEQ_DCN21_REG_LIST()
-};
-
-static const struct dce_hwseq_shift hwseq_shift = {
- HWSEQ_DCN21_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dce_hwseq_mask hwseq_mask = {
- HWSEQ_DCN21_MASK_SH_LIST(_MASK)
-};
-
-static struct dce_hwseq *dcn21_hwseq_create(
- struct dc_context *ctx)
-{
- struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL);
-
- if (hws) {
- hws->ctx = ctx;
- hws->regs = &hwseq_reg;
- hws->shifts = &hwseq_shift;
- hws->masks = &hwseq_mask;
- hws->wa.DEGVIDCN21 = true;
- hws->wa.disallow_self_refresh_during_multi_plane_transition = true;
- }
- return hws;
-}
-
-static const struct resource_create_funcs res_create_funcs = {
- .read_dce_straps = read_dce_straps,
- .create_audio = dcn21_create_audio,
- .create_stream_encoder = dcn21_stream_encoder_create,
- .create_hwseq = dcn21_hwseq_create,
-};
-
-static const struct resource_create_funcs res_create_maximus_funcs = {
- .read_dce_straps = NULL,
- .create_audio = NULL,
- .create_stream_encoder = NULL,
- .create_hwseq = dcn21_hwseq_create,
-};
-
-static const struct encoder_feature_support link_enc_feature = {
- .max_hdmi_deep_color = COLOR_DEPTH_121212,
- .max_hdmi_pixel_clock = 600000,
- .hdmi_ycbcr420_supported = true,
- .dp_ycbcr420_supported = true,
- .fec_supported = true,
- .flags.bits.IS_HBR2_CAPABLE = true,
- .flags.bits.IS_HBR3_CAPABLE = true,
- .flags.bits.IS_TPS3_CAPABLE = true,
- .flags.bits.IS_TPS4_CAPABLE = true
-};
-
-
-#define link_regs(id, phyid)\
-[id] = {\
- LE_DCN2_REG_LIST(id), \
- UNIPHY_DCN2_REG_LIST(phyid), \
- DPCS_DCN21_REG_LIST(id), \
- SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \
-}
-
-static const struct dcn10_link_enc_registers link_enc_regs[] = {
- link_regs(0, A),
- link_regs(1, B),
- link_regs(2, C),
- link_regs(3, D),
- link_regs(4, E),
-};
-
-static const struct dce_panel_cntl_registers panel_cntl_regs[] = {
- { DCN_PANEL_CNTL_REG_LIST() }
-};
-
-static const struct dce_panel_cntl_shift panel_cntl_shift = {
- DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dce_panel_cntl_mask panel_cntl_mask = {
- DCE_PANEL_CNTL_MASK_SH_LIST(_MASK)
-};
-
-#define aux_regs(id)\
-[id] = {\
- DCN2_AUX_REG_LIST(id)\
-}
-
-static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = {
- aux_regs(0),
- aux_regs(1),
- aux_regs(2),
- aux_regs(3),
- aux_regs(4)
-};
-
-#define hpd_regs(id)\
-[id] = {\
- HPD_REG_LIST(id)\
-}
-
-static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = {
- hpd_regs(0),
- hpd_regs(1),
- hpd_regs(2),
- hpd_regs(3),
- hpd_regs(4)
-};
-
-static const struct dcn10_link_enc_shift le_shift = {
- LINK_ENCODER_MASK_SH_LIST_DCN20(__SHIFT),\
- DPCS_DCN21_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dcn10_link_enc_mask le_mask = {
- LINK_ENCODER_MASK_SH_LIST_DCN20(_MASK),\
- DPCS_DCN21_MASK_SH_LIST(_MASK)
-};
-
-static int map_transmitter_id_to_phy_instance(
- enum transmitter transmitter)
-{
- switch (transmitter) {
- case TRANSMITTER_UNIPHY_A:
- return 0;
- break;
- case TRANSMITTER_UNIPHY_B:
- return 1;
- break;
- case TRANSMITTER_UNIPHY_C:
- return 2;
- break;
- case TRANSMITTER_UNIPHY_D:
- return 3;
- break;
- case TRANSMITTER_UNIPHY_E:
- return 4;
- break;
- default:
- ASSERT(0);
- return 0;
- }
-}
-
-static struct link_encoder *dcn21_link_encoder_create(
- const struct encoder_init_data *enc_init_data)
-{
- struct dcn21_link_encoder *enc21 =
- kzalloc(sizeof(struct dcn21_link_encoder), GFP_KERNEL);
- int link_regs_id;
-
- if (!enc21)
- return NULL;
-
- link_regs_id =
- map_transmitter_id_to_phy_instance(enc_init_data->transmitter);
-
- dcn21_link_encoder_construct(enc21,
- enc_init_data,
- &link_enc_feature,
- &link_enc_regs[link_regs_id],
- &link_enc_aux_regs[enc_init_data->channel - 1],
- &link_enc_hpd_regs[enc_init_data->hpd_source],
- &le_shift,
- &le_mask);
-
- return &enc21->enc10.base;
-}
-
-static struct panel_cntl *dcn21_panel_cntl_create(const struct panel_cntl_init_data *init_data)
-{
- struct dce_panel_cntl *panel_cntl =
- kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL);
-
- if (!panel_cntl)
- return NULL;
-
- dce_panel_cntl_construct(panel_cntl,
- init_data,
- &panel_cntl_regs[init_data->inst],
- &panel_cntl_shift,
- &panel_cntl_mask);
-
- return &panel_cntl->base;
-}
-
-#define CTX ctx
-
-#define REG(reg_name) \
- (DCN_BASE.instance[0].segment[mm ## reg_name ## _BASE_IDX] + mm ## reg_name)
-
-static uint32_t read_pipe_fuses(struct dc_context *ctx)
-{
- uint32_t value = REG_READ(CC_DC_PIPE_DIS);
- /* RV1 support max 4 pipes */
- value = value & 0xf;
- return value;
-}
-
-static int dcn21_populate_dml_pipes_from_context(
- struct dc *dc,
- struct dc_state *context,
- display_e2e_pipe_params_st *pipes,
- bool fast_validate)
-{
- uint32_t pipe_cnt = dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
- int i;
-
- for (i = 0; i < pipe_cnt; i++) {
-
- pipes[i].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active;
- pipes[i].pipe.src.gpuvm = 1;
- }
-
- return pipe_cnt;
-}
-
-enum dc_status dcn21_patch_unknown_plane_state(struct dc_plane_state *plane_state)
-{
- enum dc_status result = DC_OK;
-
- if (plane_state->ctx->dc->debug.disable_dcc == DCC_ENABLE) {
- plane_state->dcc.enable = 1;
- /* align to our worst case block width */
- plane_state->dcc.meta_pitch = ((plane_state->src_rect.width + 1023) / 1024) * 1024;
- }
- result = dcn20_patch_unknown_plane_state(plane_state);
- return result;
-}
-
-static const struct resource_funcs dcn21_res_pool_funcs = {
- .destroy = dcn21_destroy_resource_pool,
- .link_enc_create = dcn21_link_encoder_create,
- .panel_cntl_create = dcn21_panel_cntl_create,
- .validate_bandwidth = dcn21_validate_bandwidth,
- .populate_dml_pipes = dcn21_populate_dml_pipes_from_context,
- .add_stream_to_ctx = dcn20_add_stream_to_ctx,
- .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource,
- .remove_stream_from_ctx = dcn20_remove_stream_from_ctx,
- .acquire_idle_pipe_for_layer = dcn20_acquire_idle_pipe_for_layer,
- .populate_dml_writeback_from_context = dcn20_populate_dml_writeback_from_context,
- .patch_unknown_plane_state = dcn21_patch_unknown_plane_state,
- .set_mcif_arb_params = dcn20_set_mcif_arb_params,
- .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link,
- .update_bw_bounding_box = update_bw_bounding_box
-};
-
-static bool dcn21_resource_construct(
- uint8_t num_virtual_links,
- struct dc *dc,
- struct dcn21_resource_pool *pool)
-{
- int i, j;
- struct dc_context *ctx = dc->ctx;
- struct irq_service_init_data init_data;
- uint32_t pipe_fuses = read_pipe_fuses(ctx);
- uint32_t num_pipes;
-
- ctx->dc_bios->regs = &bios_regs;
-
- pool->base.res_cap = &res_cap_rn;
-#ifdef DIAGS_BUILD
- if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment))
- //pool->base.res_cap = &res_cap_nv10_FPGA_2pipe_dsc;
- pool->base.res_cap = &res_cap_rn_FPGA_4pipe;
-#endif
-
- pool->base.funcs = &dcn21_res_pool_funcs;
-
- /*************************************************
- * Resource + asic cap harcoding *
- *************************************************/
- pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE;
-
- /* max pipe num for ASIC before check pipe fuses */
- pool->base.pipe_count = pool->base.res_cap->num_timing_generator;
-
- dc->caps.max_downscale_ratio = 200;
- dc->caps.i2c_speed_in_khz = 100;
- dc->caps.i2c_speed_in_khz_hdcp = 5; /*1.4 w/a applied by default*/
- dc->caps.max_cursor_size = 256;
- dc->caps.min_horizontal_blanking_period = 80;
- dc->caps.dmdata_alloc_size = 2048;
-
- dc->caps.max_slave_planes = 1;
- dc->caps.max_slave_yuv_planes = 1;
- dc->caps.max_slave_rgb_planes = 1;
- dc->caps.post_blend_color_processing = true;
- dc->caps.force_dp_tps4_for_cp2520 = true;
- dc->caps.extended_aux_timeout_support = true;
- dc->caps.dmcub_support = true;
- dc->caps.is_apu = true;
-
- /* Color pipeline capabilities */
- dc->caps.color.dpp.dcn_arch = 1;
- dc->caps.color.dpp.input_lut_shared = 0;
- dc->caps.color.dpp.icsc = 1;
- dc->caps.color.dpp.dgam_ram = 1;
- dc->caps.color.dpp.dgam_rom_caps.srgb = 1;
- dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1;
- dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 0;
- dc->caps.color.dpp.dgam_rom_caps.pq = 0;
- dc->caps.color.dpp.dgam_rom_caps.hlg = 0;
- dc->caps.color.dpp.post_csc = 0;
- dc->caps.color.dpp.gamma_corr = 0;
- dc->caps.color.dpp.dgam_rom_for_yuv = 1;
-
- dc->caps.color.dpp.hw_3d_lut = 1;
- dc->caps.color.dpp.ogam_ram = 1;
- // no OGAM ROM on DCN2
- dc->caps.color.dpp.ogam_rom_caps.srgb = 0;
- dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0;
- dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0;
- dc->caps.color.dpp.ogam_rom_caps.pq = 0;
- dc->caps.color.dpp.ogam_rom_caps.hlg = 0;
- dc->caps.color.dpp.ocsc = 0;
-
- dc->caps.color.mpc.gamut_remap = 0;
- dc->caps.color.mpc.num_3dluts = 0;
- dc->caps.color.mpc.shared_3d_lut = 0;
- dc->caps.color.mpc.ogam_ram = 1;
- dc->caps.color.mpc.ogam_rom_caps.srgb = 0;
- dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0;
- dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0;
- dc->caps.color.mpc.ogam_rom_caps.pq = 0;
- dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
- dc->caps.color.mpc.ocsc = 1;
-
- if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV)
- dc->debug = debug_defaults_drv;
- else if (dc->ctx->dce_environment == DCE_ENV_FPGA_MAXIMUS) {
- pool->base.pipe_count = 4;
- dc->debug = debug_defaults_diags;
- } else
- dc->debug = debug_defaults_diags;
-
- // Init the vm_helper
- if (dc->vm_helper)
- vm_helper_init(dc->vm_helper, 16);
-
- /*************************************************
- * Create resources *
- *************************************************/
-
- pool->base.clock_sources[DCN20_CLK_SRC_PLL0] =
- dcn21_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL0,
- &clk_src_regs[0], false);
- pool->base.clock_sources[DCN20_CLK_SRC_PLL1] =
- dcn21_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL1,
- &clk_src_regs[1], false);
- pool->base.clock_sources[DCN20_CLK_SRC_PLL2] =
- dcn21_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL2,
- &clk_src_regs[2], false);
- pool->base.clock_sources[DCN20_CLK_SRC_PLL3] =
- dcn21_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL3,
- &clk_src_regs[3], false);
- pool->base.clock_sources[DCN20_CLK_SRC_PLL4] =
- dcn21_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL4,
- &clk_src_regs[4], false);
-
- pool->base.clk_src_count = DCN20_CLK_SRC_TOTAL_DCN21;
-
- /* todo: not reuse phy_pll registers */
- pool->base.dp_clock_source =
- dcn21_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_ID_DP_DTO,
- &clk_src_regs[0], true);
-
- for (i = 0; i < pool->base.clk_src_count; i++) {
- if (pool->base.clock_sources[i] == NULL) {
- dm_error("DC: failed to create clock sources!\n");
- BREAK_TO_DEBUGGER();
- goto create_fail;
- }
- }
-
- pool->base.dccg = dccg21_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask);
- if (pool->base.dccg == NULL) {
- dm_error("DC: failed to create dccg!\n");
- BREAK_TO_DEBUGGER();
- goto create_fail;
- }
-
- if (!dc->config.disable_dmcu) {
- pool->base.dmcu = dcn21_dmcu_create(ctx,
- &dmcu_regs,
- &dmcu_shift,
- &dmcu_mask);
- if (pool->base.dmcu == NULL) {
- dm_error("DC: failed to create dmcu!\n");
- BREAK_TO_DEBUGGER();
- goto create_fail;
- }
-
- dc->debug.dmub_command_table = false;
- }
-
- if (dc->config.disable_dmcu) {
- pool->base.psr = dmub_psr_create(ctx);
-
- if (pool->base.psr == NULL) {
- dm_error("DC: failed to create psr obj!\n");
- BREAK_TO_DEBUGGER();
- goto create_fail;
- }
- }
-
- if (dc->config.disable_dmcu)
- pool->base.abm = dmub_abm_create(ctx,
- &abm_regs,
- &abm_shift,
- &abm_mask);
- else
- pool->base.abm = dce_abm_create(ctx,
- &abm_regs,
- &abm_shift,
- &abm_mask);
-
- pool->base.pp_smu = dcn21_pp_smu_create(ctx);
-
- num_pipes = dcn2_1_ip.max_num_dpp;
-
- for (i = 0; i < dcn2_1_ip.max_num_dpp; i++)
- if (pipe_fuses & 1 << i)
- num_pipes--;
- dcn2_1_ip.max_num_dpp = num_pipes;
- dcn2_1_ip.max_num_otg = num_pipes;
-
- dml_init_instance(&dc->dml, &dcn2_1_soc, &dcn2_1_ip, DML_PROJECT_DCN21);
-
- init_data.ctx = dc->ctx;
- pool->base.irqs = dal_irq_service_dcn21_create(&init_data);
- if (!pool->base.irqs)
- goto create_fail;
-
- j = 0;
- /* mem input -> ipp -> dpp -> opp -> TG */
- for (i = 0; i < pool->base.pipe_count; i++) {
- /* if pipe is disabled, skip instance of HW pipe,
- * i.e, skip ASIC register instance
- */
- if ((pipe_fuses & (1 << i)) != 0)
- continue;
-
- pool->base.hubps[j] = dcn21_hubp_create(ctx, i);
- if (pool->base.hubps[j] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error(
- "DC: failed to create memory input!\n");
- goto create_fail;
- }
-
- pool->base.ipps[j] = dcn21_ipp_create(ctx, i);
- if (pool->base.ipps[j] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error(
- "DC: failed to create input pixel processor!\n");
- goto create_fail;
- }
-
- pool->base.dpps[j] = dcn21_dpp_create(ctx, i);
- if (pool->base.dpps[j] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error(
- "DC: failed to create dpps!\n");
- goto create_fail;
- }
-
- pool->base.opps[j] = dcn21_opp_create(ctx, i);
- if (pool->base.opps[j] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error(
- "DC: failed to create output pixel processor!\n");
- goto create_fail;
- }
-
- pool->base.timing_generators[j] = dcn21_timing_generator_create(
- ctx, i);
- if (pool->base.timing_generators[j] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create tg!\n");
- goto create_fail;
- }
- j++;
- }
-
- for (i = 0; i < pool->base.res_cap->num_ddc; i++) {
- pool->base.engines[i] = dcn21_aux_engine_create(ctx, i);
- if (pool->base.engines[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error(
- "DC:failed to create aux engine!!\n");
- goto create_fail;
- }
- pool->base.hw_i2cs[i] = dcn21_i2c_hw_create(ctx, i);
- if (pool->base.hw_i2cs[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error(
- "DC:failed to create hw i2c!!\n");
- goto create_fail;
- }
- pool->base.sw_i2cs[i] = NULL;
- }
-
- pool->base.timing_generator_count = j;
- pool->base.pipe_count = j;
- pool->base.mpcc_count = j;
-
- pool->base.mpc = dcn21_mpc_create(ctx);
- if (pool->base.mpc == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create mpc!\n");
- goto create_fail;
- }
-
- pool->base.hubbub = dcn21_hubbub_create(ctx);
- if (pool->base.hubbub == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create hubbub!\n");
- goto create_fail;
- }
-
- for (i = 0; i < pool->base.res_cap->num_dsc; i++) {
- pool->base.dscs[i] = dcn21_dsc_create(ctx, i);
- if (pool->base.dscs[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create display stream compressor %d!\n", i);
- goto create_fail;
- }
- }
-
- if (!dcn20_dwbc_create(ctx, &pool->base)) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create dwbc!\n");
- goto create_fail;
- }
- if (!dcn20_mmhubbub_create(ctx, &pool->base)) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create mcif_wb!\n");
- goto create_fail;
- }
-
- if (!resource_construct(num_virtual_links, dc, &pool->base,
- (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment) ?
- &res_create_funcs : &res_create_maximus_funcs)))
- goto create_fail;
-
- dcn21_hw_sequencer_construct(dc);
-
- dc->caps.max_planes = pool->base.pipe_count;
-
- for (i = 0; i < dc->caps.max_planes; ++i)
- dc->caps.planes[i] = plane_cap;
-
- dc->cap_funcs = cap_funcs;
-
- return true;
-
-create_fail:
-
- dcn21_resource_destruct(pool);
-
- return false;
-}
-
-struct resource_pool *dcn21_create_resource_pool(
- const struct dc_init_data *init_data,
- struct dc *dc)
-{
- struct dcn21_resource_pool *pool =
- kzalloc(sizeof(struct dcn21_resource_pool), GFP_KERNEL);
-
- if (!pool)
- return NULL;
-
- if (dcn21_resource_construct(init_data->num_virtual_links, dc, pool))
- return &pool->base;
-
- BREAK_TO_DEBUGGER();
- kfree(pool);
- return NULL;
-}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
index dfd77b3cc84d..b17277de0340 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
@@ -1,16 +1,16 @@
-#
+#
# Copyright 2020 Advanced Micro Devices, Inc.
-#
+#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
-#
+#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
-#
+#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
@@ -18,49 +18,15 @@
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
-#
+#
# Authors: AMD
-#
-#
+#
+#
-
-DCN30 = dcn30_init.o dcn30_hubbub.o dcn30_hubp.o dcn30_dpp.o dcn30_optc.o \
- dcn30_dccg.o dcn30_hwseq.o dcn30_mpc.o dcn30_vpg.o \
- dcn30_afmt.o dcn30_dio_stream_encoder.o dcn30_dwb.o \
- dcn30_dpp_cm.o dcn30_dwb_cm.o dcn30_cm_common.o dcn30_mmhubbub.o \
- dcn30_dio_link_encoder.o dcn30_resource.o
-
-
-ifdef CONFIG_X86
-CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -msse
-CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -msse
-endif
-
-ifdef CONFIG_PPC64
-CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mhard-float -maltivec
-CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
-IS_OLD_GCC = 1
-endif
-CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o += -mhard-float
-CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o += -mhard-float
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o += -mpreferred-stack-boundary=4
-CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o += -mpreferred-stack-boundary=4
-else
-CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o += -msse2
-CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o += -msse2
-endif
-endif
+DCN30 := dcn30_vpg.o \
+ dcn30_afmt.o \
+ dcn30_cm_common.o \
+ dcn30_mmhubbub.o \
AMD_DAL_DCN30 = $(addprefix $(AMDDALPATH)/dc/dcn30/,$(DCN30))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_afmt.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_afmt.c
index 95528e5ef89e..55e388c4c98b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_afmt.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_afmt.c
@@ -123,7 +123,6 @@ void afmt3_se_audio_setup(
{
struct dcn30_afmt *afmt3 = DCN30_AFMT_FROM_AFMT(afmt);
- uint32_t speakers = 0;
uint32_t channels = 0;
ASSERT(audio_info);
@@ -131,7 +130,6 @@ void afmt3_se_audio_setup(
if (audio_info == NULL)
return;
- speakers = audio_info->flags.info.ALLSPEAKERS;
channels = speakers_to_channels(audio_info->flags.speaker_flags).all;
/* setup the audio stream source select (audio -> dig mapping) */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c
index e0df9b0065f9..0690c346f2c5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c
@@ -26,9 +26,9 @@
#include "dm_services.h"
#include "core_types.h"
#include "reg_helper.h"
-#include "dcn30_dpp.h"
+#include "dcn30/dcn30_dpp.h"
#include "basics/conversion.h"
-#include "dcn30_cm_common.h"
+#include "dcn30/dcn30_cm_common.h"
#include "custom_float.h"
#define REG(reg) reg
@@ -114,7 +114,6 @@ bool cm3_helper_translate_curve_to_hw_format(
struct pwl_result_data *rgb;
struct pwl_result_data *rgb_plus_1;
struct pwl_result_data *rgb_minus_1;
- struct fixed31_32 end_value;
int32_t region_start, region_end;
int32_t i;
@@ -141,23 +140,18 @@ bool cm3_helper_translate_curve_to_hw_format(
region_start = -MAX_LOW_POINT;
region_end = NUMBER_REGIONS - MAX_LOW_POINT;
} else {
- /* 11 segments
- * segment is from 2^-10 to 2^0
+ /* 13 segments
+ * segment is from 2^-12 to 2^0
* There are less than 256 points, for optimization
*/
- seg_distr[0] = 3;
- seg_distr[1] = 4;
- seg_distr[2] = 4;
- seg_distr[3] = 4;
- seg_distr[4] = 4;
- seg_distr[5] = 4;
- seg_distr[6] = 4;
- seg_distr[7] = 4;
- seg_distr[8] = 4;
- seg_distr[9] = 4;
- seg_distr[10] = 1;
-
- region_start = -10;
+ const uint8_t SEG_COUNT = 12;
+
+ for (i = 0; i < SEG_COUNT; i++)
+ seg_distr[i] = 4;
+
+ seg_distr[SEG_COUNT] = 1;
+
+ region_start = -SEG_COUNT;
region_end = 1;
}
@@ -176,8 +170,10 @@ bool cm3_helper_translate_curve_to_hw_format(
NUMBER_SW_SEGMENTS;
for (i = start_index; i < start_index + NUMBER_SW_SEGMENTS;
i += increment) {
- if (j == hw_points - 1)
+ if (j == hw_points)
break;
+ if (i >= TRANSFER_FUNC_POINTS)
+ return false;
rgb_resulted[j].red = output_tf->tf_pts.red[i];
rgb_resulted[j].green = output_tf->tf_pts.green[i];
rgb_resulted[j].blue = output_tf->tf_pts.blue[i];
@@ -187,13 +183,13 @@ bool cm3_helper_translate_curve_to_hw_format(
/* last point */
start_index = (region_end + MAX_LOW_POINT) * NUMBER_SW_SEGMENTS;
- rgb_resulted[hw_points - 1].red = output_tf->tf_pts.red[start_index];
- rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index];
- rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index];
+ rgb_resulted[hw_points].red = output_tf->tf_pts.red[start_index];
+ rgb_resulted[hw_points].green = output_tf->tf_pts.green[start_index];
+ rgb_resulted[hw_points].blue = output_tf->tf_pts.blue[start_index];
- rgb_resulted[hw_points].red = rgb_resulted[hw_points - 1].red;
- rgb_resulted[hw_points].green = rgb_resulted[hw_points - 1].green;
- rgb_resulted[hw_points].blue = rgb_resulted[hw_points - 1].blue;
+ rgb_resulted[hw_points+1].red = rgb_resulted[hw_points].red;
+ rgb_resulted[hw_points+1].green = rgb_resulted[hw_points].green;
+ rgb_resulted[hw_points+1].blue = rgb_resulted[hw_points].blue;
// All 3 color channels have same x
corner_points[0].red.x = dc_fixpt_pow(dc_fixpt_from_int(2),
@@ -220,34 +216,16 @@ bool cm3_helper_translate_curve_to_hw_format(
/* see comment above, m_arrPoints[1].y should be the Y value for the
* region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1)
*/
- corner_points[1].red.y = rgb_resulted[hw_points - 1].red;
- corner_points[1].green.y = rgb_resulted[hw_points - 1].green;
- corner_points[1].blue.y = rgb_resulted[hw_points - 1].blue;
+ corner_points[1].red.y = rgb_resulted[hw_points].red;
+ corner_points[1].green.y = rgb_resulted[hw_points].green;
+ corner_points[1].blue.y = rgb_resulted[hw_points].blue;
corner_points[1].red.slope = dc_fixpt_zero;
corner_points[1].green.slope = dc_fixpt_zero;
corner_points[1].blue.slope = dc_fixpt_zero;
- if (output_tf->tf == TRANSFER_FUNCTION_PQ || output_tf->tf == TRANSFER_FUNCTION_HLG) {
- /* for PQ/HLG, we want to have a straight line from last HW X point,
- * and the slope to be such that we hit 1.0 at 10000/1000 nits.
- */
-
- if (output_tf->tf == TRANSFER_FUNCTION_PQ)
- end_value = dc_fixpt_from_int(125);
- else
- end_value = dc_fixpt_from_fraction(125, 10);
-
- corner_points[1].red.slope = dc_fixpt_div(
- dc_fixpt_sub(dc_fixpt_one, corner_points[1].red.y),
- dc_fixpt_sub(end_value, corner_points[1].red.x));
- corner_points[1].green.slope = dc_fixpt_div(
- dc_fixpt_sub(dc_fixpt_one, corner_points[1].green.y),
- dc_fixpt_sub(end_value, corner_points[1].green.x));
- corner_points[1].blue.slope = dc_fixpt_div(
- dc_fixpt_sub(dc_fixpt_one, corner_points[1].blue.y),
- dc_fixpt_sub(end_value, corner_points[1].blue.x));
- }
- lut_params->hw_points_num = hw_points;
+ // DCN3+ have 257 pts in lieu of no separate slope registers
+ // Prior HW had 256 base+slope pairs
+ lut_params->hw_points_num = hw_points + 1;
k = 0;
for (i = 1; i < MAX_REGIONS_NUMBER; i++) {
@@ -267,187 +245,37 @@ bool cm3_helper_translate_curve_to_hw_format(
rgb_plus_1 = rgb_resulted + 1;
rgb_minus_1 = rgb;
- i = 1;
- while (i != hw_points + 1) {
- if (i >= hw_points - 1) {
- if (dc_fixpt_lt(rgb_plus_1->red, rgb->red))
- rgb_plus_1->red = dc_fixpt_add(rgb->red, rgb_minus_1->delta_red);
- if (dc_fixpt_lt(rgb_plus_1->green, rgb->green))
- rgb_plus_1->green = dc_fixpt_add(rgb->green, rgb_minus_1->delta_green);
- if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue))
- rgb_plus_1->blue = dc_fixpt_add(rgb->blue, rgb_minus_1->delta_blue);
- }
-
- rgb->delta_red = dc_fixpt_sub(rgb_plus_1->red, rgb->red);
- rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green);
- rgb->delta_blue = dc_fixpt_sub(rgb_plus_1->blue, rgb->blue);
+ if (fixpoint == true) {
+ i = 1;
+ while (i != hw_points + 2) {
+ if (i >= hw_points) {
+ if (dc_fixpt_lt(rgb_plus_1->red, rgb->red))
+ rgb_plus_1->red = dc_fixpt_add(rgb->red,
+ rgb_minus_1->delta_red);
+ if (dc_fixpt_lt(rgb_plus_1->green, rgb->green))
+ rgb_plus_1->green = dc_fixpt_add(rgb->green,
+ rgb_minus_1->delta_green);
+ if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue))
+ rgb_plus_1->blue = dc_fixpt_add(rgb->blue,
+ rgb_minus_1->delta_blue);
+ }
- if (fixpoint == true) {
rgb->delta_red_reg = dc_fixpt_clamp_u0d10(rgb->delta_red);
rgb->delta_green_reg = dc_fixpt_clamp_u0d10(rgb->delta_green);
rgb->delta_blue_reg = dc_fixpt_clamp_u0d10(rgb->delta_blue);
rgb->red_reg = dc_fixpt_clamp_u0d14(rgb->red);
rgb->green_reg = dc_fixpt_clamp_u0d14(rgb->green);
rgb->blue_reg = dc_fixpt_clamp_u0d14(rgb->blue);
- }
-
- ++rgb_plus_1;
- rgb_minus_1 = rgb;
- ++rgb;
- ++i;
- }
- cm3_helper_convert_to_custom_float(rgb_resulted,
- lut_params->corner_points,
- hw_points, fixpoint);
-
- return true;
-}
-
-#define NUM_DEGAMMA_REGIONS 12
-
-
-bool cm3_helper_translate_curve_to_degamma_hw_format(
- const struct dc_transfer_func *output_tf,
- struct pwl_params *lut_params)
-{
- struct curve_points3 *corner_points;
- struct pwl_result_data *rgb_resulted;
- struct pwl_result_data *rgb;
- struct pwl_result_data *rgb_plus_1;
-
- int32_t region_start, region_end;
- int32_t i;
- uint32_t j, k, seg_distr[MAX_REGIONS_NUMBER], increment, start_index, hw_points;
- if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS)
- return false;
-
- corner_points = lut_params->corner_points;
- rgb_resulted = lut_params->rgb_resulted;
- hw_points = 0;
-
- memset(lut_params, 0, sizeof(struct pwl_params));
- memset(seg_distr, 0, sizeof(seg_distr));
-
- region_start = -NUM_DEGAMMA_REGIONS;
- region_end = 0;
-
-
- for (i = region_end - region_start; i < MAX_REGIONS_NUMBER ; i++)
- seg_distr[i] = -1;
- /* 12 segments
- * segments are from 2^-12 to 0
- */
- for (i = 0; i < NUM_DEGAMMA_REGIONS ; i++)
- seg_distr[i] = 4;
-
- for (k = 0; k < MAX_REGIONS_NUMBER; k++) {
- if (seg_distr[k] != -1)
- hw_points += (1 << seg_distr[k]);
- }
-
- j = 0;
- for (k = 0; k < (region_end - region_start); k++) {
- increment = NUMBER_SW_SEGMENTS / (1 << seg_distr[k]);
- start_index = (region_start + k + MAX_LOW_POINT) *
- NUMBER_SW_SEGMENTS;
- for (i = start_index; i < start_index + NUMBER_SW_SEGMENTS;
- i += increment) {
- if (j == hw_points - 1)
- break;
- rgb_resulted[j].red = output_tf->tf_pts.red[i];
- rgb_resulted[j].green = output_tf->tf_pts.green[i];
- rgb_resulted[j].blue = output_tf->tf_pts.blue[i];
- j++;
+ ++rgb_plus_1;
+ rgb_minus_1 = rgb;
+ ++rgb;
+ ++i;
}
}
-
- /* last point */
- start_index = (region_end + MAX_LOW_POINT) * NUMBER_SW_SEGMENTS;
- rgb_resulted[hw_points - 1].red = output_tf->tf_pts.red[start_index];
- rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index];
- rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index];
-
- corner_points[0].red.x = dc_fixpt_pow(dc_fixpt_from_int(2),
- dc_fixpt_from_int(region_start));
- corner_points[0].green.x = corner_points[0].red.x;
- corner_points[0].blue.x = corner_points[0].red.x;
- corner_points[1].red.x = dc_fixpt_pow(dc_fixpt_from_int(2),
- dc_fixpt_from_int(region_end));
- corner_points[1].green.x = corner_points[1].red.x;
- corner_points[1].blue.x = corner_points[1].red.x;
-
- corner_points[0].red.y = rgb_resulted[0].red;
- corner_points[0].green.y = rgb_resulted[0].green;
- corner_points[0].blue.y = rgb_resulted[0].blue;
-
- /* see comment above, m_arrPoints[1].y should be the Y value for the
- * region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1)
- */
- corner_points[1].red.y = rgb_resulted[hw_points - 1].red;
- corner_points[1].green.y = rgb_resulted[hw_points - 1].green;
- corner_points[1].blue.y = rgb_resulted[hw_points - 1].blue;
- corner_points[1].red.slope = dc_fixpt_zero;
- corner_points[1].green.slope = dc_fixpt_zero;
- corner_points[1].blue.slope = dc_fixpt_zero;
-
- if (output_tf->tf == TRANSFER_FUNCTION_PQ) {
- /* for PQ, we want to have a straight line from last HW X point,
- * and the slope to be such that we hit 1.0 at 10000 nits.
- */
- const struct fixed31_32 end_value =
- dc_fixpt_from_int(125);
-
- corner_points[1].red.slope = dc_fixpt_div(
- dc_fixpt_sub(dc_fixpt_one, corner_points[1].red.y),
- dc_fixpt_sub(end_value, corner_points[1].red.x));
- corner_points[1].green.slope = dc_fixpt_div(
- dc_fixpt_sub(dc_fixpt_one, corner_points[1].green.y),
- dc_fixpt_sub(end_value, corner_points[1].green.x));
- corner_points[1].blue.slope = dc_fixpt_div(
- dc_fixpt_sub(dc_fixpt_one, corner_points[1].blue.y),
- dc_fixpt_sub(end_value, corner_points[1].blue.x));
- }
-
- lut_params->hw_points_num = hw_points;
-
- k = 0;
- for (i = 1; i < MAX_REGIONS_NUMBER; i++) {
- if (seg_distr[k] != -1) {
- lut_params->arr_curve_points[k].segments_num =
- seg_distr[k];
- lut_params->arr_curve_points[i].offset =
- lut_params->arr_curve_points[k].offset + (1 << seg_distr[k]);
- }
- k++;
- }
-
- if (seg_distr[k] != -1)
- lut_params->arr_curve_points[k].segments_num = seg_distr[k];
-
- rgb = rgb_resulted;
- rgb_plus_1 = rgb_resulted + 1;
-
- i = 1;
- while (i != hw_points + 1) {
- if (dc_fixpt_lt(rgb_plus_1->red, rgb->red))
- rgb_plus_1->red = rgb->red;
- if (dc_fixpt_lt(rgb_plus_1->green, rgb->green))
- rgb_plus_1->green = rgb->green;
- if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue))
- rgb_plus_1->blue = rgb->blue;
-
- rgb->delta_red = dc_fixpt_sub(rgb_plus_1->red, rgb->red);
- rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green);
- rgb->delta_blue = dc_fixpt_sub(rgb_plus_1->blue, rgb->blue);
-
- ++rgb_plus_1;
- ++rgb;
- ++i;
- }
cm3_helper_convert_to_custom_float(rgb_resulted,
lut_params->corner_points,
- hw_points, false);
+ hw_points+1, fixpoint);
return true;
}
@@ -603,24 +431,6 @@ bool cm3_helper_convert_to_custom_float(
return false;
}
- if (!convert_to_custom_float_format(rgb->delta_red, &fmt,
- &rgb->delta_red_reg)) {
- BREAK_TO_DEBUGGER();
- return false;
- }
-
- if (!convert_to_custom_float_format(rgb->delta_green, &fmt,
- &rgb->delta_green_reg)) {
- BREAK_TO_DEBUGGER();
- return false;
- }
-
- if (!convert_to_custom_float_format(rgb->delta_blue, &fmt,
- &rgb->delta_blue_reg)) {
- BREAK_TO_DEBUGGER();
- return false;
- }
-
++rgb;
++i;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c
deleted file mode 100644
index eac08926b574..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c
+++ /dev/null
@@ -1,529 +0,0 @@
-/*
- * Copyright 2020 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#include "dcn30_hubp.h"
-
-#include "dm_services.h"
-#include "dce_calcs.h"
-#include "reg_helper.h"
-#include "basics/conversion.h"
-#include "dcn20/dcn20_hubp.h"
-#include "dcn21/dcn21_hubp.h"
-
-#define REG(reg)\
- hubp2->hubp_regs->reg
-
-#define CTX \
- hubp2->base.ctx
-
-#undef FN
-#define FN(reg_name, field_name) \
- hubp2->hubp_shift->field_name, hubp2->hubp_mask->field_name
-
-void hubp3_set_vm_system_aperture_settings(struct hubp *hubp,
- struct vm_system_aperture_param *apt)
-{
- struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
-
- PHYSICAL_ADDRESS_LOC mc_vm_apt_default;
- PHYSICAL_ADDRESS_LOC mc_vm_apt_low;
- PHYSICAL_ADDRESS_LOC mc_vm_apt_high;
-
- // The format of default addr is 48:12 of the 48 bit addr
- mc_vm_apt_default.quad_part = apt->sys_default.quad_part >> 12;
-
- // The format of high/low are 48:18 of the 48 bit addr
- mc_vm_apt_low.quad_part = apt->sys_low.quad_part >> 18;
- mc_vm_apt_high.quad_part = apt->sys_high.quad_part >> 18;
-
- REG_SET(DCN_VM_SYSTEM_APERTURE_LOW_ADDR, 0,
- MC_VM_SYSTEM_APERTURE_LOW_ADDR, mc_vm_apt_low.quad_part);
-
- REG_SET(DCN_VM_SYSTEM_APERTURE_HIGH_ADDR, 0,
- MC_VM_SYSTEM_APERTURE_HIGH_ADDR, mc_vm_apt_high.quad_part);
-
- REG_SET_2(DCN_VM_MX_L1_TLB_CNTL, 0,
- ENABLE_L1_TLB, 1,
- SYSTEM_ACCESS_MODE, 0x3);
-}
-
-bool hubp3_program_surface_flip_and_addr(
- struct hubp *hubp,
- const struct dc_plane_address *address,
- bool flip_immediate)
-{
- struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
-
- //program flip type
- REG_UPDATE(DCSURF_FLIP_CONTROL,
- SURFACE_FLIP_TYPE, flip_immediate);
-
- // Program VMID reg
- if (flip_immediate == 0)
- REG_UPDATE(VMID_SETTINGS_0,
- VMID, address->vmid);
-
- if (address->type == PLN_ADDR_TYPE_GRPH_STEREO) {
- REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0x1);
- REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x1);
-
- } else {
- // turn off stereo if not in stereo
- REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0x0);
- REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x0);
- }
-
- /* HW automatically latch rest of address register on write to
- * DCSURF_PRIMARY_SURFACE_ADDRESS if SURFACE_UPDATE_LOCK is not used
- *
- * program high first and then the low addr, order matters!
- */
- switch (address->type) {
- case PLN_ADDR_TYPE_GRAPHICS:
- /* DCN1.0 does not support const color
- * TODO: program DCHUBBUB_RET_PATH_DCC_CFGx_0/1
- * base on address->grph.dcc_const_color
- * x = 0, 2, 4, 6 for pipe 0, 1, 2, 3 for rgb and luma
- * x = 1, 3, 5, 7 for pipe 0, 1, 2, 3 for chroma
- */
-
- if (address->grph.addr.quad_part == 0)
- break;
-
- REG_UPDATE_2(DCSURF_SURFACE_CONTROL,
- PRIMARY_SURFACE_TMZ, address->tmz_surface,
- PRIMARY_META_SURFACE_TMZ, address->tmz_surface);
-
- if (address->grph.meta_addr.quad_part != 0) {
- REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH, 0,
- PRIMARY_META_SURFACE_ADDRESS_HIGH,
- address->grph.meta_addr.high_part);
-
- REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS, 0,
- PRIMARY_META_SURFACE_ADDRESS,
- address->grph.meta_addr.low_part);
- }
-
- REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0,
- PRIMARY_SURFACE_ADDRESS_HIGH,
- address->grph.addr.high_part);
-
- REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0,
- PRIMARY_SURFACE_ADDRESS,
- address->grph.addr.low_part);
- break;
- case PLN_ADDR_TYPE_VIDEO_PROGRESSIVE:
- if (address->video_progressive.luma_addr.quad_part == 0
- || address->video_progressive.chroma_addr.quad_part == 0)
- break;
-
- REG_UPDATE_4(DCSURF_SURFACE_CONTROL,
- PRIMARY_SURFACE_TMZ, address->tmz_surface,
- PRIMARY_SURFACE_TMZ_C, address->tmz_surface,
- PRIMARY_META_SURFACE_TMZ, address->tmz_surface,
- PRIMARY_META_SURFACE_TMZ_C, address->tmz_surface);
-
- if (address->video_progressive.luma_meta_addr.quad_part != 0) {
- REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C, 0,
- PRIMARY_META_SURFACE_ADDRESS_HIGH_C,
- address->video_progressive.chroma_meta_addr.high_part);
-
- REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_C, 0,
- PRIMARY_META_SURFACE_ADDRESS_C,
- address->video_progressive.chroma_meta_addr.low_part);
-
- REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH, 0,
- PRIMARY_META_SURFACE_ADDRESS_HIGH,
- address->video_progressive.luma_meta_addr.high_part);
-
- REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS, 0,
- PRIMARY_META_SURFACE_ADDRESS,
- address->video_progressive.luma_meta_addr.low_part);
- }
-
- REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, 0,
- PRIMARY_SURFACE_ADDRESS_HIGH_C,
- address->video_progressive.chroma_addr.high_part);
-
- REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_C, 0,
- PRIMARY_SURFACE_ADDRESS_C,
- address->video_progressive.chroma_addr.low_part);
-
- REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0,
- PRIMARY_SURFACE_ADDRESS_HIGH,
- address->video_progressive.luma_addr.high_part);
-
- REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0,
- PRIMARY_SURFACE_ADDRESS,
- address->video_progressive.luma_addr.low_part);
- break;
- case PLN_ADDR_TYPE_GRPH_STEREO:
- if (address->grph_stereo.left_addr.quad_part == 0)
- break;
- if (address->grph_stereo.right_addr.quad_part == 0)
- break;
-
- REG_UPDATE_8(DCSURF_SURFACE_CONTROL,
- PRIMARY_SURFACE_TMZ, address->tmz_surface,
- PRIMARY_SURFACE_TMZ_C, address->tmz_surface,
- PRIMARY_META_SURFACE_TMZ, address->tmz_surface,
- PRIMARY_META_SURFACE_TMZ_C, address->tmz_surface,
- SECONDARY_SURFACE_TMZ, address->tmz_surface,
- SECONDARY_SURFACE_TMZ_C, address->tmz_surface,
- SECONDARY_META_SURFACE_TMZ, address->tmz_surface,
- SECONDARY_META_SURFACE_TMZ_C, address->tmz_surface);
-
- if (address->grph_stereo.right_meta_addr.quad_part != 0) {
-
- REG_SET(DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH_C, 0,
- SECONDARY_META_SURFACE_ADDRESS_HIGH_C,
- address->grph_stereo.right_alpha_meta_addr.high_part);
-
- REG_SET(DCSURF_SECONDARY_META_SURFACE_ADDRESS_C, 0,
- SECONDARY_META_SURFACE_ADDRESS_C,
- address->grph_stereo.right_alpha_meta_addr.low_part);
-
- REG_SET(DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH, 0,
- SECONDARY_META_SURFACE_ADDRESS_HIGH,
- address->grph_stereo.right_meta_addr.high_part);
-
- REG_SET(DCSURF_SECONDARY_META_SURFACE_ADDRESS, 0,
- SECONDARY_META_SURFACE_ADDRESS,
- address->grph_stereo.right_meta_addr.low_part);
- }
- if (address->grph_stereo.left_meta_addr.quad_part != 0) {
-
- REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C, 0,
- PRIMARY_META_SURFACE_ADDRESS_HIGH_C,
- address->grph_stereo.left_alpha_meta_addr.high_part);
-
- REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_C, 0,
- PRIMARY_META_SURFACE_ADDRESS_C,
- address->grph_stereo.left_alpha_meta_addr.low_part);
-
- REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH, 0,
- PRIMARY_META_SURFACE_ADDRESS_HIGH,
- address->grph_stereo.left_meta_addr.high_part);
-
- REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS, 0,
- PRIMARY_META_SURFACE_ADDRESS,
- address->grph_stereo.left_meta_addr.low_part);
- }
-
- REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH_C, 0,
- SECONDARY_SURFACE_ADDRESS_HIGH_C,
- address->grph_stereo.right_alpha_addr.high_part);
-
- REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS_C, 0,
- SECONDARY_SURFACE_ADDRESS_C,
- address->grph_stereo.right_alpha_addr.low_part);
-
- REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH, 0,
- SECONDARY_SURFACE_ADDRESS_HIGH,
- address->grph_stereo.right_addr.high_part);
-
- REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS, 0,
- SECONDARY_SURFACE_ADDRESS,
- address->grph_stereo.right_addr.low_part);
-
- REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, 0,
- PRIMARY_SURFACE_ADDRESS_HIGH_C,
- address->grph_stereo.left_alpha_addr.high_part);
-
- REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_C, 0,
- PRIMARY_SURFACE_ADDRESS_C,
- address->grph_stereo.left_alpha_addr.low_part);
-
- REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0,
- PRIMARY_SURFACE_ADDRESS_HIGH,
- address->grph_stereo.left_addr.high_part);
-
- REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0,
- PRIMARY_SURFACE_ADDRESS,
- address->grph_stereo.left_addr.low_part);
- break;
- case PLN_ADDR_TYPE_RGBEA:
- if (address->rgbea.addr.quad_part == 0
- || address->rgbea.alpha_addr.quad_part == 0)
- break;
-
- REG_UPDATE_4(DCSURF_SURFACE_CONTROL,
- PRIMARY_SURFACE_TMZ, address->tmz_surface,
- PRIMARY_SURFACE_TMZ_C, address->tmz_surface,
- PRIMARY_META_SURFACE_TMZ, address->tmz_surface,
- PRIMARY_META_SURFACE_TMZ_C, address->tmz_surface);
-
- if (address->rgbea.meta_addr.quad_part != 0) {
-
- REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C, 0,
- PRIMARY_META_SURFACE_ADDRESS_HIGH_C,
- address->rgbea.alpha_meta_addr.high_part);
-
- REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_C, 0,
- PRIMARY_META_SURFACE_ADDRESS_C,
- address->rgbea.alpha_meta_addr.low_part);
-
- REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH, 0,
- PRIMARY_META_SURFACE_ADDRESS_HIGH,
- address->rgbea.meta_addr.high_part);
-
- REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS, 0,
- PRIMARY_META_SURFACE_ADDRESS,
- address->rgbea.meta_addr.low_part);
- }
-
- REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, 0,
- PRIMARY_SURFACE_ADDRESS_HIGH_C,
- address->rgbea.alpha_addr.high_part);
-
- REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_C, 0,
- PRIMARY_SURFACE_ADDRESS_C,
- address->rgbea.alpha_addr.low_part);
-
- REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0,
- PRIMARY_SURFACE_ADDRESS_HIGH,
- address->rgbea.addr.high_part);
-
- REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0,
- PRIMARY_SURFACE_ADDRESS,
- address->rgbea.addr.low_part);
- break;
- default:
- BREAK_TO_DEBUGGER();
- break;
- }
-
- hubp->request_address = *address;
-
- return true;
-}
-
-static void hubp3_program_tiling(
- struct dcn20_hubp *hubp2,
- const union dc_tiling_info *info,
- const enum surface_pixel_format pixel_format)
-{
- REG_UPDATE_4(DCSURF_ADDR_CONFIG,
- NUM_PIPES, log_2(info->gfx9.num_pipes),
- PIPE_INTERLEAVE, info->gfx9.pipe_interleave,
- MAX_COMPRESSED_FRAGS, log_2(info->gfx9.max_compressed_frags),
- NUM_PKRS, log_2(info->gfx9.num_pkrs));
-
- REG_UPDATE_3(DCSURF_TILING_CONFIG,
- SW_MODE, info->gfx9.swizzle,
- META_LINEAR, info->gfx9.meta_linear,
- PIPE_ALIGNED, info->gfx9.pipe_aligned);
-
-}
-
-void hubp3_dcc_control(struct hubp *hubp, bool enable,
- enum hubp_ind_block_size blk_size)
-{
- uint32_t dcc_en = enable ? 1 : 0;
- struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
-
- REG_UPDATE_4(DCSURF_SURFACE_CONTROL,
- PRIMARY_SURFACE_DCC_EN, dcc_en,
- PRIMARY_SURFACE_DCC_IND_BLK, blk_size,
- SECONDARY_SURFACE_DCC_EN, dcc_en,
- SECONDARY_SURFACE_DCC_IND_BLK, blk_size);
-}
-
-void hubp3_dcc_control_sienna_cichlid(struct hubp *hubp,
- struct dc_plane_dcc_param *dcc)
-{
- struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
-
- REG_UPDATE_6(DCSURF_SURFACE_CONTROL,
- PRIMARY_SURFACE_DCC_EN, dcc->enable,
- PRIMARY_SURFACE_DCC_IND_BLK, dcc->dcc_ind_blk,
- PRIMARY_SURFACE_DCC_IND_BLK_C, dcc->dcc_ind_blk_c,
- SECONDARY_SURFACE_DCC_EN, dcc->enable,
- SECONDARY_SURFACE_DCC_IND_BLK, dcc->dcc_ind_blk,
- SECONDARY_SURFACE_DCC_IND_BLK_C, dcc->dcc_ind_blk_c);
-}
-
-void hubp3_dmdata_set_attributes(
- struct hubp *hubp,
- const struct dc_dmdata_attributes *attr)
-{
- struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
-
- /*always HW mode */
- REG_UPDATE(DMDATA_CNTL,
- DMDATA_MODE, 1);
-
- /* for DMDATA flip, need to use SURFACE_UPDATE_LOCK */
- REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_UPDATE_LOCK, 1);
-
- /* toggle DMDATA_UPDATED and set repeat and size */
- REG_UPDATE(DMDATA_CNTL,
- DMDATA_UPDATED, 0);
- REG_UPDATE_3(DMDATA_CNTL,
- DMDATA_UPDATED, 1,
- DMDATA_REPEAT, attr->dmdata_repeat,
- DMDATA_SIZE, attr->dmdata_size);
-
- /* set DMDATA address */
- REG_WRITE(DMDATA_ADDRESS_LOW, attr->address.low_part);
- REG_UPDATE(DMDATA_ADDRESS_HIGH,
- DMDATA_ADDRESS_HIGH, attr->address.high_part);
-
- REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_UPDATE_LOCK, 0);
-
-}
-
-
-void hubp3_program_surface_config(
- struct hubp *hubp,
- enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
- struct plane_size *plane_size,
- enum dc_rotation_angle rotation,
- struct dc_plane_dcc_param *dcc,
- bool horizontal_mirror,
- unsigned int compat_level)
-{
- struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
-
- hubp3_dcc_control_sienna_cichlid(hubp, dcc);
- hubp3_program_tiling(hubp2, tiling_info, format);
- hubp2_program_size(hubp, format, plane_size, dcc);
- hubp2_program_rotation(hubp, rotation, horizontal_mirror);
- hubp2_program_pixel_format(hubp, format);
-}
-
-static void hubp3_program_deadline(
- struct hubp *hubp,
- struct _vcs_dpi_display_dlg_regs_st *dlg_attr,
- struct _vcs_dpi_display_ttu_regs_st *ttu_attr)
-{
- struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
-
- hubp2_program_deadline(hubp, dlg_attr, ttu_attr);
- REG_UPDATE(DCN_DMDATA_VM_CNTL,
- REFCYC_PER_VM_DMDATA, dlg_attr->refcyc_per_vm_dmdata);
-}
-
-void hubp3_read_state(struct hubp *hubp)
-{
- struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
- struct dcn_hubp_state *s = &hubp2->state;
- struct _vcs_dpi_display_rq_regs_st *rq_regs = &s->rq_regs;
-
- hubp2_read_state_common(hubp);
-
- REG_GET_7(DCHUBP_REQ_SIZE_CONFIG,
- CHUNK_SIZE, &rq_regs->rq_regs_l.chunk_size,
- MIN_CHUNK_SIZE, &rq_regs->rq_regs_l.min_chunk_size,
- META_CHUNK_SIZE, &rq_regs->rq_regs_l.meta_chunk_size,
- MIN_META_CHUNK_SIZE, &rq_regs->rq_regs_l.min_meta_chunk_size,
- DPTE_GROUP_SIZE, &rq_regs->rq_regs_l.dpte_group_size,
- SWATH_HEIGHT, &rq_regs->rq_regs_l.swath_height,
- PTE_ROW_HEIGHT_LINEAR, &rq_regs->rq_regs_l.pte_row_height_linear);
-
- REG_GET_7(DCHUBP_REQ_SIZE_CONFIG_C,
- CHUNK_SIZE_C, &rq_regs->rq_regs_c.chunk_size,
- MIN_CHUNK_SIZE_C, &rq_regs->rq_regs_c.min_chunk_size,
- META_CHUNK_SIZE_C, &rq_regs->rq_regs_c.meta_chunk_size,
- MIN_META_CHUNK_SIZE_C, &rq_regs->rq_regs_c.min_meta_chunk_size,
- DPTE_GROUP_SIZE_C, &rq_regs->rq_regs_c.dpte_group_size,
- SWATH_HEIGHT_C, &rq_regs->rq_regs_c.swath_height,
- PTE_ROW_HEIGHT_LINEAR_C, &rq_regs->rq_regs_c.pte_row_height_linear);
-
-}
-
-void hubp3_setup(
- struct hubp *hubp,
- struct _vcs_dpi_display_dlg_regs_st *dlg_attr,
- struct _vcs_dpi_display_ttu_regs_st *ttu_attr,
- struct _vcs_dpi_display_rq_regs_st *rq_regs,
- struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest)
-{
- /* otg is locked when this func is called. Register are double buffered.
- * disable the requestors is not needed
- */
- hubp2_vready_at_or_After_vsync(hubp, pipe_dest);
- hubp21_program_requestor(hubp, rq_regs);
- hubp3_program_deadline(hubp, dlg_attr, ttu_attr);
-}
-
-void hubp3_init(struct hubp *hubp)
-{
- // DEDCN21-133: Inconsistent row starting line for flip between DPTE and Meta
- // This is a chicken bit to enable the ECO fix.
-
- struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
- //hubp[i].HUBPREQ_DEBUG.HUBPREQ_DEBUG[26] = 1;
- REG_WRITE(HUBPREQ_DEBUG, 1 << 26);
-}
-
-static struct hubp_funcs dcn30_hubp_funcs = {
- .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer,
- .hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled,
- .hubp_program_surface_flip_and_addr = hubp3_program_surface_flip_and_addr,
- .hubp_program_surface_config = hubp3_program_surface_config,
- .hubp_is_flip_pending = hubp2_is_flip_pending,
- .hubp_setup = hubp3_setup,
- .hubp_setup_interdependent = hubp2_setup_interdependent,
- .hubp_set_vm_system_aperture_settings = hubp3_set_vm_system_aperture_settings,
- .set_blank = hubp2_set_blank,
- .dcc_control = hubp3_dcc_control,
- .mem_program_viewport = min_set_viewport,
- .set_cursor_attributes = hubp2_cursor_set_attributes,
- .set_cursor_position = hubp2_cursor_set_position,
- .hubp_clk_cntl = hubp2_clk_cntl,
- .hubp_vtg_sel = hubp2_vtg_sel,
- .dmdata_set_attributes = hubp3_dmdata_set_attributes,
- .dmdata_load = hubp2_dmdata_load,
- .dmdata_status_done = hubp2_dmdata_status_done,
- .hubp_read_state = hubp3_read_state,
- .hubp_clear_underflow = hubp2_clear_underflow,
- .hubp_set_flip_control_surface_gsl = hubp2_set_flip_control_surface_gsl,
- .hubp_init = hubp3_init,
- .hubp_in_blank = hubp1_in_blank,
- .hubp_soft_reset = hubp1_soft_reset,
- .hubp_set_flip_int = hubp1_set_flip_int,
-};
-
-bool hubp3_construct(
- struct dcn20_hubp *hubp2,
- struct dc_context *ctx,
- uint32_t inst,
- const struct dcn_hubp2_registers *hubp_regs,
- const struct dcn_hubp2_shift *hubp_shift,
- const struct dcn_hubp2_mask *hubp_mask)
-{
- hubp2->base.funcs = &dcn30_hubp_funcs;
- hubp2->base.ctx = ctx;
- hubp2->hubp_regs = hubp_regs;
- hubp2->hubp_shift = hubp_shift;
- hubp2->hubp_mask = hubp_mask;
- hubp2->base.inst = inst;
- hubp2->base.opp_id = OPP_ID_INVALID;
- hubp2->base.mpcc_id = 0xf;
-
- return true;
-}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
deleted file mode 100644
index df2717116604..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
+++ /dev/null
@@ -1,1011 +0,0 @@
-/*
- * Copyright 2020 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-
-#include "dm_services.h"
-#include "dm_helpers.h"
-#include "core_types.h"
-#include "resource.h"
-#include "dcn30_hwseq.h"
-#include "dccg.h"
-#include "dce/dce_hwseq.h"
-#include "dcn30_mpc.h"
-#include "dcn30_dpp.h"
-#include "dcn10/dcn10_cm_common.h"
-#include "dcn30_cm_common.h"
-#include "reg_helper.h"
-#include "abm.h"
-#include "clk_mgr.h"
-#include "hubp.h"
-#include "dchubbub.h"
-#include "timing_generator.h"
-#include "opp.h"
-#include "ipp.h"
-#include "mpc.h"
-#include "mcif_wb.h"
-#include "dc_dmub_srv.h"
-#include "link_hwss.h"
-#include "dpcd_defs.h"
-#include "inc/dc_link_dp.h"
-#include "inc/link_dpcd.h"
-
-
-
-
-#define DC_LOGGER_INIT(logger)
-
-#define CTX \
- hws->ctx
-#define REG(reg)\
- hws->regs->reg
-#define DC_LOGGER \
- dc->ctx->logger
-
-
-#undef FN
-#define FN(reg_name, field_name) \
- hws->shifts->field_name, hws->masks->field_name
-
-bool dcn30_set_blend_lut(
- struct pipe_ctx *pipe_ctx, const struct dc_plane_state *plane_state)
-{
- struct dpp *dpp_base = pipe_ctx->plane_res.dpp;
- bool result = true;
- struct pwl_params *blend_lut = NULL;
-
- if (plane_state->blend_tf) {
- if (plane_state->blend_tf->type == TF_TYPE_HWPWL)
- blend_lut = &plane_state->blend_tf->pwl;
- else if (plane_state->blend_tf->type == TF_TYPE_DISTRIBUTED_POINTS) {
- cm3_helper_translate_curve_to_hw_format(
- plane_state->blend_tf, &dpp_base->regamma_params, false);
- blend_lut = &dpp_base->regamma_params;
- }
- }
- result = dpp_base->funcs->dpp_program_blnd_lut(dpp_base, blend_lut);
-
- return result;
-}
-
-static bool dcn30_set_mpc_shaper_3dlut(
- struct pipe_ctx *pipe_ctx, const struct dc_stream_state *stream)
-{
- struct dpp *dpp_base = pipe_ctx->plane_res.dpp;
- int mpcc_id = pipe_ctx->plane_res.hubp->inst;
- struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc;
- bool result = false;
- int acquired_rmu = 0;
- int mpcc_id_projected = 0;
-
- const struct pwl_params *shaper_lut = NULL;
- //get the shaper lut params
- if (stream->func_shaper) {
- if (stream->func_shaper->type == TF_TYPE_HWPWL)
- shaper_lut = &stream->func_shaper->pwl;
- else if (stream->func_shaper->type == TF_TYPE_DISTRIBUTED_POINTS) {
- cm_helper_translate_curve_to_hw_format(
- stream->func_shaper,
- &dpp_base->shaper_params, true);
- shaper_lut = &dpp_base->shaper_params;
- }
- }
-
- if (stream->lut3d_func &&
- stream->lut3d_func->state.bits.initialized == 1 &&
- stream->lut3d_func->state.bits.rmu_idx_valid == 1) {
- if (stream->lut3d_func->state.bits.rmu_mux_num == 0)
- mpcc_id_projected = stream->lut3d_func->state.bits.mpc_rmu0_mux;
- else if (stream->lut3d_func->state.bits.rmu_mux_num == 1)
- mpcc_id_projected = stream->lut3d_func->state.bits.mpc_rmu1_mux;
- else if (stream->lut3d_func->state.bits.rmu_mux_num == 2)
- mpcc_id_projected = stream->lut3d_func->state.bits.mpc_rmu2_mux;
- if (mpcc_id_projected != mpcc_id)
- BREAK_TO_DEBUGGER();
- /*find the reason why logical layer assigned a differant mpcc_id into acquire_post_bldn_3dlut*/
- acquired_rmu = mpc->funcs->acquire_rmu(mpc, mpcc_id,
- stream->lut3d_func->state.bits.rmu_mux_num);
- if (acquired_rmu != stream->lut3d_func->state.bits.rmu_mux_num)
- BREAK_TO_DEBUGGER();
- result = mpc->funcs->program_3dlut(mpc,
- &stream->lut3d_func->lut_3d,
- stream->lut3d_func->state.bits.rmu_mux_num);
- result = mpc->funcs->program_shaper(mpc, shaper_lut,
- stream->lut3d_func->state.bits.rmu_mux_num);
- } else
- /*loop through the available mux and release the requested mpcc_id*/
- mpc->funcs->release_rmu(mpc, mpcc_id);
-
-
- return result;
-}
-
-bool dcn30_set_input_transfer_func(struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- const struct dc_plane_state *plane_state)
-{
- struct dce_hwseq *hws = dc->hwseq;
- struct dpp *dpp_base = pipe_ctx->plane_res.dpp;
- enum dc_transfer_func_predefined tf;
- bool result = true;
- struct pwl_params *params = NULL;
-
- if (dpp_base == NULL || plane_state == NULL)
- return false;
-
- tf = TRANSFER_FUNCTION_UNITY;
-
- if (plane_state->in_transfer_func &&
- plane_state->in_transfer_func->type == TF_TYPE_PREDEFINED)
- tf = plane_state->in_transfer_func->tf;
-
- dpp_base->funcs->dpp_set_pre_degam(dpp_base, tf);
-
- if (plane_state->in_transfer_func) {
- if (plane_state->in_transfer_func->type == TF_TYPE_HWPWL)
- params = &plane_state->in_transfer_func->pwl;
- else if (plane_state->in_transfer_func->type == TF_TYPE_DISTRIBUTED_POINTS &&
- cm3_helper_translate_curve_to_hw_format(plane_state->in_transfer_func,
- &dpp_base->degamma_params, false))
- params = &dpp_base->degamma_params;
- }
-
- result = dpp_base->funcs->dpp_program_gamcor_lut(dpp_base, params);
-
- if (pipe_ctx->stream_res.opp && pipe_ctx->stream_res.opp->ctx) {
- if (dpp_base->funcs->dpp_program_blnd_lut)
- hws->funcs.set_blend_lut(pipe_ctx, plane_state);
- if (dpp_base->funcs->dpp_program_shaper_lut &&
- dpp_base->funcs->dpp_program_3dlut)
- hws->funcs.set_shaper_3dlut(pipe_ctx, plane_state);
- }
-
- return result;
-}
-
-bool dcn30_set_output_transfer_func(struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- const struct dc_stream_state *stream)
-{
- int mpcc_id = pipe_ctx->plane_res.hubp->inst;
- struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc;
- struct pwl_params *params = NULL;
- bool ret = false;
-
- /* program OGAM or 3DLUT only for the top pipe*/
- if (pipe_ctx->top_pipe == NULL) {
- /*program rmu shaper and 3dlut in MPC*/
- ret = dcn30_set_mpc_shaper_3dlut(pipe_ctx, stream);
- if (ret == false && mpc->funcs->set_output_gamma && stream->out_transfer_func) {
- if (stream->out_transfer_func->type == TF_TYPE_HWPWL)
- params = &stream->out_transfer_func->pwl;
- else if (pipe_ctx->stream->out_transfer_func->type ==
- TF_TYPE_DISTRIBUTED_POINTS &&
- cm3_helper_translate_curve_to_hw_format(
- stream->out_transfer_func,
- &mpc->blender_params, false))
- params = &mpc->blender_params;
- /* there are no ROM LUTs in OUTGAM */
- if (stream->out_transfer_func->type == TF_TYPE_PREDEFINED)
- BREAK_TO_DEBUGGER();
- }
- }
-
- mpc->funcs->set_output_gamma(mpc, mpcc_id, params);
- return ret;
-}
-
-static void dcn30_set_writeback(
- struct dc *dc,
- struct dc_writeback_info *wb_info,
- struct dc_state *context)
-{
- struct mcif_wb *mcif_wb;
- struct mcif_buf_params *mcif_buf_params;
-
- ASSERT(wb_info->dwb_pipe_inst < MAX_DWB_PIPES);
- ASSERT(wb_info->wb_enabled);
- ASSERT(wb_info->mpcc_inst >= 0);
- ASSERT(wb_info->mpcc_inst < dc->res_pool->mpcc_count);
- mcif_wb = dc->res_pool->mcif_wb[wb_info->dwb_pipe_inst];
- mcif_buf_params = &wb_info->mcif_buf_params;
-
- /* set DWB MPC mux */
- dc->res_pool->mpc->funcs->set_dwb_mux(dc->res_pool->mpc,
- wb_info->dwb_pipe_inst, wb_info->mpcc_inst);
- /* set MCIF_WB buffer and arbitration configuration */
- mcif_wb->funcs->config_mcif_buf(mcif_wb, mcif_buf_params, wb_info->dwb_params.dest_height);
- mcif_wb->funcs->config_mcif_arb(mcif_wb, &context->bw_ctx.bw.dcn.bw_writeback.mcif_wb_arb[wb_info->dwb_pipe_inst]);
-}
-
-void dcn30_update_writeback(
- struct dc *dc,
- struct dc_writeback_info *wb_info,
- struct dc_state *context)
-{
- struct dwbc *dwb;
- dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst];
- DC_LOG_DWB("%s dwb_pipe_inst = %d, mpcc_inst = %d",\
- __func__, wb_info->dwb_pipe_inst,\
- wb_info->mpcc_inst);
-
- dcn30_set_writeback(dc, wb_info, context);
-
- /* update DWB */
- dwb->funcs->update(dwb, &wb_info->dwb_params);
-}
-
-bool dcn30_mmhubbub_warmup(
- struct dc *dc,
- unsigned int num_dwb,
- struct dc_writeback_info *wb_info)
-{
- struct dwbc *dwb;
- struct mcif_wb *mcif_wb;
- struct mcif_warmup_params warmup_params = {0};
- unsigned int i, i_buf;
- /*make sure there is no active DWB eanbled */
- for (i = 0; i < num_dwb; i++) {
- dwb = dc->res_pool->dwbc[wb_info[i].dwb_pipe_inst];
- if (dwb->dwb_is_efc_transition || dwb->dwb_is_drc) {
- /*can not do warmup while any dwb enabled*/
- return false;
- }
- }
-
- if (wb_info->mcif_warmup_params.p_vmid == 0)
- return false;
-
- /*check whether this is new interface: warmup big buffer once*/
- if (wb_info->mcif_warmup_params.start_address.quad_part != 0 &&
- wb_info->mcif_warmup_params.region_size != 0) {
- /*mmhubbub is shared, so it does not matter which MCIF*/
- mcif_wb = dc->res_pool->mcif_wb[0];
- /*warmup a big chunk of VM buffer at once*/
- warmup_params.start_address.quad_part = wb_info->mcif_warmup_params.start_address.quad_part;
- warmup_params.address_increment = wb_info->mcif_warmup_params.region_size;
- warmup_params.region_size = wb_info->mcif_warmup_params.region_size;
- warmup_params.p_vmid = wb_info->mcif_warmup_params.p_vmid;
-
- if (warmup_params.address_increment == 0)
- warmup_params.address_increment = dc->dml.soc.vmm_page_size_bytes;
-
- mcif_wb->funcs->warmup_mcif(mcif_wb, &warmup_params);
- return true;
- }
- /*following is the original: warmup each DWB's mcif buffer*/
- for (i = 0; i < num_dwb; i++) {
- dwb = dc->res_pool->dwbc[wb_info[i].dwb_pipe_inst];
- mcif_wb = dc->res_pool->mcif_wb[wb_info[i].dwb_pipe_inst];
- /*warmup is for VM mode only*/
- if (wb_info[i].mcif_buf_params.p_vmid == 0)
- return false;
-
- /* Warmup MCIF_WB */
- for (i_buf = 0; i_buf < MCIF_BUF_COUNT; i_buf++) {
- warmup_params.start_address.quad_part = wb_info[i].mcif_buf_params.luma_address[i_buf];
- warmup_params.address_increment = dc->dml.soc.vmm_page_size_bytes;
- warmup_params.region_size = wb_info[i].mcif_buf_params.luma_pitch * wb_info[i].dwb_params.dest_height;
- warmup_params.p_vmid = wb_info[i].mcif_buf_params.p_vmid;
- mcif_wb->funcs->warmup_mcif(mcif_wb, &warmup_params);
- }
- }
- return true;
-}
-
-void dcn30_enable_writeback(
- struct dc *dc,
- struct dc_writeback_info *wb_info,
- struct dc_state *context)
-{
- struct dwbc *dwb;
- struct mcif_wb *mcif_wb;
- struct timing_generator *optc;
-
- dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst];
- mcif_wb = dc->res_pool->mcif_wb[wb_info->dwb_pipe_inst];
-
- /* set the OPTC source mux */
- optc = dc->res_pool->timing_generators[dwb->otg_inst];
- DC_LOG_DWB("%s dwb_pipe_inst = %d, mpcc_inst = %d",\
- __func__, wb_info->dwb_pipe_inst,\
- wb_info->mpcc_inst);
- if (IS_DIAG_DC(dc->ctx->dce_environment)) {
- /*till diags switch to warmup interface*/
- dcn30_mmhubbub_warmup(dc, 1, wb_info);
- }
- /* Update writeback pipe */
- dcn30_set_writeback(dc, wb_info, context);
-
- /* Enable MCIF_WB */
- mcif_wb->funcs->enable_mcif(mcif_wb);
- /* Enable DWB */
- dwb->funcs->enable(dwb, &wb_info->dwb_params);
-}
-
-void dcn30_disable_writeback(
- struct dc *dc,
- unsigned int dwb_pipe_inst)
-{
- struct dwbc *dwb;
- struct mcif_wb *mcif_wb;
-
- ASSERT(dwb_pipe_inst < MAX_DWB_PIPES);
- dwb = dc->res_pool->dwbc[dwb_pipe_inst];
- mcif_wb = dc->res_pool->mcif_wb[dwb_pipe_inst];
- DC_LOG_DWB("%s dwb_pipe_inst = %d",\
- __func__, dwb_pipe_inst);
-
- /* disable DWB */
- dwb->funcs->disable(dwb);
- /* disable MCIF */
- mcif_wb->funcs->disable_mcif(mcif_wb);
- /* disable MPC DWB mux */
- dc->res_pool->mpc->funcs->disable_dwb_mux(dc->res_pool->mpc, dwb_pipe_inst);
-}
-
-void dcn30_program_all_writeback_pipes_in_tree(
- struct dc *dc,
- const struct dc_stream_state *stream,
- struct dc_state *context)
-{
- struct dc_writeback_info wb_info;
- struct dwbc *dwb;
- struct dc_stream_status *stream_status = NULL;
- int i_wb, i_pipe, i_stream;
- DC_LOG_DWB("%s", __func__);
-
- ASSERT(stream);
- for (i_stream = 0; i_stream < context->stream_count; i_stream++) {
- if (context->streams[i_stream] == stream) {
- stream_status = &context->stream_status[i_stream];
- break;
- }
- }
- ASSERT(stream_status);
-
- ASSERT(stream->num_wb_info <= dc->res_pool->res_cap->num_dwb);
- /* For each writeback pipe */
- for (i_wb = 0; i_wb < stream->num_wb_info; i_wb++) {
-
- /* copy writeback info to local non-const so mpcc_inst can be set */
- wb_info = stream->writeback_info[i_wb];
- if (wb_info.wb_enabled) {
-
- /* get the MPCC instance for writeback_source_plane */
- wb_info.mpcc_inst = -1;
- for (i_pipe = 0; i_pipe < dc->res_pool->pipe_count; i_pipe++) {
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i_pipe];
-
- if (!pipe_ctx->plane_state)
- continue;
-
- if (pipe_ctx->plane_state == wb_info.writeback_source_plane) {
- wb_info.mpcc_inst = pipe_ctx->plane_res.mpcc_inst;
- break;
- }
- }
-
- if (wb_info.mpcc_inst == -1) {
- /* Disable writeback pipe and disconnect from MPCC
- * if source plane has been removed
- */
- dc->hwss.disable_writeback(dc, wb_info.dwb_pipe_inst);
- continue;
- }
-
- ASSERT(wb_info.dwb_pipe_inst < dc->res_pool->res_cap->num_dwb);
- dwb = dc->res_pool->dwbc[wb_info.dwb_pipe_inst];
- if (dwb->funcs->is_enabled(dwb)) {
- /* writeback pipe already enabled, only need to update */
- dc->hwss.update_writeback(dc, &wb_info, context);
- } else {
- /* Enable writeback pipe and connect to MPCC */
- dc->hwss.enable_writeback(dc, &wb_info, context);
- }
- } else {
- /* Disable writeback pipe and disconnect from MPCC */
- dc->hwss.disable_writeback(dc, wb_info.dwb_pipe_inst);
- }
- }
-}
-
-void dcn30_init_hw(struct dc *dc)
-{
- struct abm **abms = dc->res_pool->multiple_abms;
- struct dce_hwseq *hws = dc->hwseq;
- struct dc_bios *dcb = dc->ctx->dc_bios;
- struct resource_pool *res_pool = dc->res_pool;
- int i, j;
- int edp_num;
- uint32_t backlight = MAX_BACKLIGHT_LEVEL;
-
- if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks)
- dc->clk_mgr->funcs->init_clocks(dc->clk_mgr);
-
- // Initialize the dccg
- if (res_pool->dccg->funcs->dccg_init)
- res_pool->dccg->funcs->dccg_init(res_pool->dccg);
-
- if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
-
- REG_WRITE(REFCLK_CNTL, 0);
- REG_UPDATE(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_ENABLE, 1);
- REG_WRITE(DIO_MEM_PWR_CTRL, 0);
-
- if (!dc->debug.disable_clock_gate) {
- /* enable all DCN clock gating */
- REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0);
-
- REG_WRITE(DCCG_GATE_DISABLE_CNTL2, 0);
-
- REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0);
- }
-
- //Enable ability to power gate / don't force power on permanently
- if (hws->funcs.enable_power_gating_plane)
- hws->funcs.enable_power_gating_plane(hws, true);
-
- return;
- }
-
- if (!dcb->funcs->is_accelerated_mode(dcb)) {
- hws->funcs.bios_golden_init(dc);
- hws->funcs.disable_vga(dc->hwseq);
- }
-
- if (dc->debug.enable_mem_low_power.bits.dmcu) {
- // Force ERAM to shutdown if DMCU is not enabled
- if (dc->debug.disable_dmcu || dc->config.disable_dmcu) {
- REG_UPDATE(DMU_MEM_PWR_CNTL, DMCU_ERAM_MEM_PWR_FORCE, 3);
- }
- }
-
- // Set default OPTC memory power states
- if (dc->debug.enable_mem_low_power.bits.optc) {
- // Shutdown when unassigned and light sleep in VBLANK
- REG_SET_2(ODM_MEM_PWR_CTRL3, 0, ODM_MEM_UNASSIGNED_PWR_MODE, 3, ODM_MEM_VBLANK_PWR_MODE, 1);
- }
-
- if (dc->ctx->dc_bios->fw_info_valid) {
- res_pool->ref_clocks.xtalin_clock_inKhz =
- dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency;
-
- if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
- if (res_pool->dccg && res_pool->hubbub) {
-
- (res_pool->dccg->funcs->get_dccg_ref_freq)(res_pool->dccg,
- dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency,
- &res_pool->ref_clocks.dccg_ref_clock_inKhz);
-
- (res_pool->hubbub->funcs->get_dchub_ref_freq)(res_pool->hubbub,
- res_pool->ref_clocks.dccg_ref_clock_inKhz,
- &res_pool->ref_clocks.dchub_ref_clock_inKhz);
- } else {
- // Not all ASICs have DCCG sw component
- res_pool->ref_clocks.dccg_ref_clock_inKhz =
- res_pool->ref_clocks.xtalin_clock_inKhz;
- res_pool->ref_clocks.dchub_ref_clock_inKhz =
- res_pool->ref_clocks.xtalin_clock_inKhz;
- }
- }
- } else
- ASSERT_CRITICAL(false);
-
- for (i = 0; i < dc->link_count; i++) {
- /* Power up AND update implementation according to the
- * required signal (which may be different from the
- * default signal on connector).
- */
- struct dc_link *link = dc->links[i];
-
- link->link_enc->funcs->hw_init(link->link_enc);
-
- /* Check for enabled DIG to identify enabled display */
- if (link->link_enc->funcs->is_dig_enabled &&
- link->link_enc->funcs->is_dig_enabled(link->link_enc))
- link->link_status.link_active = true;
- }
-
- /* Power gate DSCs */
- for (i = 0; i < res_pool->res_cap->num_dsc; i++)
- if (hws->funcs.dsc_pg_control != NULL)
- hws->funcs.dsc_pg_control(hws, res_pool->dscs[i]->inst, false);
-
- /* we want to turn off all dp displays before doing detection */
- if (dc->config.power_down_display_on_boot) {
- uint8_t dpcd_power_state = '\0';
- enum dc_status status = DC_ERROR_UNEXPECTED;
-
- for (i = 0; i < dc->link_count; i++) {
- if (dc->links[i]->connector_signal != SIGNAL_TYPE_DISPLAY_PORT)
- continue;
- /* DP 2.0 states that LTTPR regs must be read first */
- dp_retrieve_lttpr_cap(dc->links[i]);
-
- /* if any of the displays are lit up turn them off */
- status = core_link_read_dpcd(dc->links[i], DP_SET_POWER,
- &dpcd_power_state, sizeof(dpcd_power_state));
- if (status == DC_OK && dpcd_power_state == DP_POWER_STATE_D0) {
- /* blank dp stream before power off receiver*/
- if (dc->links[i]->link_enc->funcs->get_dig_frontend) {
- unsigned int fe;
-
- fe = dc->links[i]->link_enc->funcs->get_dig_frontend(
- dc->links[i]->link_enc);
- if (fe == ENGINE_ID_UNKNOWN)
- continue;
-
- for (j = 0; j < dc->res_pool->stream_enc_count; j++) {
- if (fe == dc->res_pool->stream_enc[j]->id) {
- dc->res_pool->stream_enc[j]->funcs->dp_blank(dc->links[i],
- dc->res_pool->stream_enc[j]);
- break;
- }
- }
- }
- dp_receiver_power_ctrl(dc->links[i], false);
- }
- }
- }
-
- /* If taking control over from VBIOS, we may want to optimize our first
- * mode set, so we need to skip powering down pipes until we know which
- * pipes we want to use.
- * Otherwise, if taking control is not possible, we need to power
- * everything down.
- */
- if (dcb->funcs->is_accelerated_mode(dcb) || dc->config.power_down_display_on_boot) {
- hws->funcs.init_pipes(dc, dc->current_state);
- if (dc->res_pool->hubbub->funcs->allow_self_refresh_control)
- dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub,
- !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter);
- }
-
- /* In headless boot cases, DIG may be turned
- * on which causes HW/SW discrepancies.
- * To avoid this, power down hardware on boot
- * if DIG is turned on and seamless boot not enabled
- */
- if (dc->config.power_down_display_on_boot) {
- struct dc_link *edp_links[MAX_NUM_EDP];
- struct dc_link *edp_link = NULL;
-
- get_edp_links(dc, edp_links, &edp_num);
- if (edp_num)
- edp_link = edp_links[0];
- if (edp_link && edp_link->link_enc->funcs->is_dig_enabled &&
- edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) &&
- dc->hwss.edp_backlight_control &&
- dc->hwss.power_down &&
- dc->hwss.edp_power_control) {
- dc->hwss.edp_backlight_control(edp_link, false);
- dc->hwss.power_down(dc);
- dc->hwss.edp_power_control(edp_link, false);
- } else {
- for (i = 0; i < dc->link_count; i++) {
- struct dc_link *link = dc->links[i];
-
- if (link->link_enc->funcs->is_dig_enabled &&
- link->link_enc->funcs->is_dig_enabled(link->link_enc) &&
- dc->hwss.power_down) {
- dc->hwss.power_down(dc);
- break;
- }
-
- }
- }
- }
-
- for (i = 0; i < res_pool->audio_count; i++) {
- struct audio *audio = res_pool->audios[i];
-
- audio->funcs->hw_init(audio);
- }
-
- for (i = 0; i < dc->link_count; i++) {
- struct dc_link *link = dc->links[i];
-
- if (link->panel_cntl)
- backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
- }
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- if (abms[i] != NULL)
- abms[i]->funcs->abm_init(abms[i], backlight);
- }
-
- /* power AFMT HDMI memory TODO: may move to dis/en output save power*/
- REG_WRITE(DIO_MEM_PWR_CTRL, 0);
-
- if (!dc->debug.disable_clock_gate) {
- /* enable all DCN clock gating */
- REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0);
-
- REG_WRITE(DCCG_GATE_DISABLE_CNTL2, 0);
-
- REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0);
- }
- if (hws->funcs.enable_power_gating_plane)
- hws->funcs.enable_power_gating_plane(dc->hwseq, true);
-
- if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks)
- dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub);
-
- if (dc->clk_mgr->funcs->notify_wm_ranges)
- dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr);
-
- if (dc->clk_mgr->funcs->set_hard_max_memclk)
- dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr);
-
- if (dc->res_pool->hubbub->funcs->force_pstate_change_control)
- dc->res_pool->hubbub->funcs->force_pstate_change_control(
- dc->res_pool->hubbub, false, false);
- if (dc->res_pool->hubbub->funcs->init_crb)
- dc->res_pool->hubbub->funcs->init_crb(dc->res_pool->hubbub);
-
-}
-
-void dcn30_set_avmute(struct pipe_ctx *pipe_ctx, bool enable)
-{
- if (pipe_ctx == NULL)
- return;
-
- if (dc_is_hdmi_signal(pipe_ctx->stream->signal) && pipe_ctx->stream_res.stream_enc != NULL)
- pipe_ctx->stream_res.stream_enc->funcs->set_avmute(
- pipe_ctx->stream_res.stream_enc,
- enable);
-}
-
-void dcn30_update_info_frame(struct pipe_ctx *pipe_ctx)
-{
- bool is_hdmi_tmds;
- bool is_dp;
-
- ASSERT(pipe_ctx->stream);
-
- if (pipe_ctx->stream_res.stream_enc == NULL)
- return; /* this is not root pipe */
-
- is_hdmi_tmds = dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal);
- is_dp = dc_is_dp_signal(pipe_ctx->stream->signal);
-
- if (!is_hdmi_tmds && !is_dp)
- return;
-
- if (is_hdmi_tmds)
- pipe_ctx->stream_res.stream_enc->funcs->update_hdmi_info_packets(
- pipe_ctx->stream_res.stream_enc,
- &pipe_ctx->stream_res.encoder_info_frame);
- else
- pipe_ctx->stream_res.stream_enc->funcs->update_dp_info_packets(
- pipe_ctx->stream_res.stream_enc,
- &pipe_ctx->stream_res.encoder_info_frame);
-}
-
-void dcn30_program_dmdata_engine(struct pipe_ctx *pipe_ctx)
-{
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct hubp *hubp = pipe_ctx->plane_res.hubp;
- bool enable = false;
- struct stream_encoder *stream_enc = pipe_ctx->stream_res.stream_enc;
- enum dynamic_metadata_mode mode = dc_is_dp_signal(stream->signal)
- ? dmdata_dp
- : dmdata_hdmi;
-
- /* if using dynamic meta, don't set up generic infopackets */
- if (pipe_ctx->stream->dmdata_address.quad_part != 0) {
- pipe_ctx->stream_res.encoder_info_frame.hdrsmd.valid = false;
- enable = true;
- }
-
- if (!hubp)
- return;
-
- if (!stream_enc || !stream_enc->funcs->set_dynamic_metadata)
- return;
-
- stream_enc->funcs->set_dynamic_metadata(stream_enc, enable,
- hubp->inst, mode);
-}
-
-bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
-{
- union dmub_rb_cmd cmd;
- uint32_t tmr_delay = 0, tmr_scale = 0;
- struct dc_cursor_attributes cursor_attr;
- bool cursor_cache_enable = false;
- struct dc_stream_state *stream = NULL;
- struct dc_plane_state *plane = NULL;
-
- if (!dc->ctx->dmub_srv)
- return false;
-
- if (enable) {
- if (dc->current_state) {
- int i;
-
- /* First, check no-memory-requests case */
- for (i = 0; i < dc->current_state->stream_count; i++) {
- if (dc->current_state->stream_status[i].plane_count)
- /* Fail eligibility on a visible stream */
- break;
- }
-
- if (i == dc->current_state->stream_count) {
- /* Enable no-memory-requests case */
- memset(&cmd, 0, sizeof(cmd));
- cmd.mall.header.type = DMUB_CMD__MALL;
- cmd.mall.header.sub_type = DMUB_CMD__MALL_ACTION_NO_DF_REQ;
- cmd.mall.header.payload_bytes = sizeof(cmd.mall) - sizeof(cmd.mall.header);
-
- dc_dmub_srv_cmd_queue(dc->ctx->dmub_srv, &cmd);
- dc_dmub_srv_cmd_execute(dc->ctx->dmub_srv);
-
- return true;
- }
-
- stream = dc->current_state->streams[0];
- plane = (stream ? dc->current_state->stream_status[0].plane_states[0] : NULL);
-
- if (stream && plane) {
- cursor_cache_enable = stream->cursor_position.enable &&
- plane->address.grph.cursor_cache_addr.quad_part;
- cursor_attr = stream->cursor_attributes;
- }
-
- /*
- * Second, check MALL eligibility
- *
- * single display only, single surface only, 8 and 16 bit formats only, no VM,
- * do not use MALL for displays that support PSR as they use D0i3.2 in DMCUB FW
- *
- * TODO: When we implement multi-display, PSR displays will be allowed if there is
- * a non-PSR display present, since in that case we can't do D0i3.2
- */
- if (dc->current_state->stream_count == 1 &&
- stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED &&
- dc->current_state->stream_status[0].plane_count == 1 &&
- plane->format <= SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F &&
- plane->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB8888 &&
- plane->address.page_table_base.quad_part == 0 &&
- dc->hwss.does_plane_fit_in_mall &&
- dc->hwss.does_plane_fit_in_mall(dc, plane,
- cursor_cache_enable ? &cursor_attr : NULL)) {
- unsigned int v_total = stream->adjust.v_total_max ?
- stream->adjust.v_total_max : stream->timing.v_total;
- unsigned int refresh_hz = div_u64((unsigned long long) stream->timing.pix_clk_100hz *
- 100LL, (v_total * stream->timing.h_total));
-
- /*
- * one frame time in microsec:
- * Delay_Us = 1000000 / refresh
- * dynamic_delay_us = 1000000 / refresh + 2 * stutter_period
- *
- * one frame time modified by 'additional timer percent' (p):
- * Delay_Us_modified = dynamic_delay_us + dynamic_delay_us * p / 100
- * = dynamic_delay_us * (1 + p / 100)
- * = (1000000 / refresh + 2 * stutter_period) * (100 + p) / 100
- * = (1000000 + 2 * stutter_period * refresh) * (100 + p) / (100 * refresh)
- *
- * formula for timer duration based on parameters, from regspec:
- * dynamic_delay_us = 65.28 * (64 + MallFrameCacheTmrDly) * 2^MallFrameCacheTmrScale
- *
- * dynamic_delay_us / 65.28 = (64 + MallFrameCacheTmrDly) * 2^MallFrameCacheTmrScale
- * (dynamic_delay_us / 65.28) / 2^MallFrameCacheTmrScale = 64 + MallFrameCacheTmrDly
- * MallFrameCacheTmrDly = ((dynamic_delay_us / 65.28) / 2^MallFrameCacheTmrScale) - 64
- * = (1000000 + 2 * stutter_period * refresh) * (100 + p) / (100 * refresh) / 65.28 / 2^MallFrameCacheTmrScale - 64
- * = (1000000 + 2 * stutter_period * refresh) * (100 + p) / (refresh * 6528 * 2^MallFrameCacheTmrScale) - 64
- *
- * need to round up the result of the division before the subtraction
- */
- unsigned int denom = refresh_hz * 6528;
- unsigned int stutter_period = dc->current_state->perf_params.stutter_period_us;
-
- tmr_delay = div_u64(((1000000LL + 2 * stutter_period * refresh_hz) *
- (100LL + dc->debug.mall_additional_timer_percent) + denom - 1),
- denom) - 64LL;
-
- /* In some cases the stutter period is really big (tiny modes) in these
- * cases MALL cant be enabled, So skip these cases to avoid a ASSERT()
- *
- * We can check if stutter_period is more than 1/10th the frame time to
- * consider if we can actually meet the range of hysteresis timer
- */
- if (stutter_period > 100000/refresh_hz)
- return false;
-
- /* scale should be increased until it fits into 6 bits */
- while (tmr_delay & ~0x3F) {
- tmr_scale++;
-
- if (tmr_scale > 3) {
- /* Delay exceeds range of hysteresis timer */
- ASSERT(false);
- return false;
- }
-
- denom *= 2;
- tmr_delay = div_u64(((1000000LL + 2 * stutter_period * refresh_hz) *
- (100LL + dc->debug.mall_additional_timer_percent) + denom - 1),
- denom) - 64LL;
- }
-
- /* Copy HW cursor */
- if (cursor_cache_enable) {
- memset(&cmd, 0, sizeof(cmd));
- cmd.mall.header.type = DMUB_CMD__MALL;
- cmd.mall.header.sub_type = DMUB_CMD__MALL_ACTION_COPY_CURSOR;
- cmd.mall.header.payload_bytes =
- sizeof(cmd.mall) - sizeof(cmd.mall.header);
-
- switch (cursor_attr.color_format) {
- case CURSOR_MODE_MONO:
- cmd.mall.cursor_bpp = 2;
- break;
- case CURSOR_MODE_COLOR_1BIT_AND:
- case CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA:
- case CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA:
- cmd.mall.cursor_bpp = 32;
- break;
-
- case CURSOR_MODE_COLOR_64BIT_FP_PRE_MULTIPLIED:
- case CURSOR_MODE_COLOR_64BIT_FP_UN_PRE_MULTIPLIED:
- cmd.mall.cursor_bpp = 64;
- break;
- }
-
- cmd.mall.cursor_copy_src.quad_part = cursor_attr.address.quad_part;
- cmd.mall.cursor_copy_dst.quad_part =
- (plane->address.grph.cursor_cache_addr.quad_part + 2047) & ~2047;
- cmd.mall.cursor_width = cursor_attr.width;
- cmd.mall.cursor_height = cursor_attr.height;
- cmd.mall.cursor_pitch = cursor_attr.pitch;
-
- dc_dmub_srv_cmd_queue(dc->ctx->dmub_srv, &cmd);
- dc_dmub_srv_cmd_execute(dc->ctx->dmub_srv);
- dc_dmub_srv_wait_idle(dc->ctx->dmub_srv);
-
- /* Use copied cursor, and it's okay to not switch back */
- cursor_attr.address.quad_part = cmd.mall.cursor_copy_dst.quad_part;
- dc_stream_set_cursor_attributes(stream, &cursor_attr);
- }
-
- /* Enable MALL */
- memset(&cmd, 0, sizeof(cmd));
- cmd.mall.header.type = DMUB_CMD__MALL;
- cmd.mall.header.sub_type = DMUB_CMD__MALL_ACTION_ALLOW;
- cmd.mall.header.payload_bytes = sizeof(cmd.mall) - sizeof(cmd.mall.header);
- cmd.mall.tmr_delay = tmr_delay;
- cmd.mall.tmr_scale = tmr_scale;
- cmd.mall.debug_bits = dc->debug.mall_error_as_fatal;
-
- dc_dmub_srv_cmd_queue(dc->ctx->dmub_srv, &cmd);
- dc_dmub_srv_cmd_execute(dc->ctx->dmub_srv);
-
- return true;
- }
- }
-
- /* No applicable optimizations */
- return false;
- }
-
- /* Disable MALL */
- memset(&cmd, 0, sizeof(cmd));
- cmd.mall.header.type = DMUB_CMD__MALL;
- cmd.mall.header.sub_type = DMUB_CMD__MALL_ACTION_DISALLOW;
- cmd.mall.header.payload_bytes =
- sizeof(cmd.mall) - sizeof(cmd.mall.header);
-
- dc_dmub_srv_cmd_queue(dc->ctx->dmub_srv, &cmd);
- dc_dmub_srv_cmd_execute(dc->ctx->dmub_srv);
- dc_dmub_srv_wait_idle(dc->ctx->dmub_srv);
-
- return true;
-}
-
-bool dcn30_does_plane_fit_in_mall(struct dc *dc, struct dc_plane_state *plane, struct dc_cursor_attributes *cursor_attr)
-{
- // add meta size?
- unsigned int surface_size = plane->plane_size.surface_pitch * plane->plane_size.surface_size.height *
- (plane->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4);
- unsigned int mall_size = dc->caps.mall_size_total;
- unsigned int cursor_size = 0;
-
- if (dc->debug.mall_size_override)
- mall_size = 1024 * 1024 * dc->debug.mall_size_override;
-
- if (cursor_attr) {
- cursor_size = dc->caps.max_cursor_size * dc->caps.max_cursor_size;
-
- switch (cursor_attr->color_format) {
- case CURSOR_MODE_MONO:
- cursor_size /= 2;
- break;
- case CURSOR_MODE_COLOR_1BIT_AND:
- case CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA:
- case CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA:
- cursor_size *= 4;
- break;
-
- case CURSOR_MODE_COLOR_64BIT_FP_PRE_MULTIPLIED:
- case CURSOR_MODE_COLOR_64BIT_FP_UN_PRE_MULTIPLIED:
- cursor_size *= 8;
- break;
- }
- }
-
- return (surface_size + cursor_size) < mall_size;
-}
-
-void dcn30_hardware_release(struct dc *dc)
-{
- /* if pstate unsupported, force it supported */
- if (!dc->clk_mgr->clks.p_state_change_support &&
- dc->res_pool->hubbub->funcs->force_pstate_change_control)
- dc->res_pool->hubbub->funcs->force_pstate_change_control(
- dc->res_pool->hubbub, true, true);
-}
-
-void dcn30_set_disp_pattern_generator(const struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- enum controller_dp_test_pattern test_pattern,
- enum controller_dp_color_space color_space,
- enum dc_color_depth color_depth,
- const struct tg_color *solid_color,
- int width, int height, int offset)
-{
- struct stream_resource *stream_res = &pipe_ctx->stream_res;
- struct pipe_ctx *mpcc_pipe;
-
- if (test_pattern != CONTROLLER_DP_TEST_PATTERN_VIDEOMODE) {
- pipe_ctx->vtp_locked = false;
- /* turning on DPG */
- stream_res->opp->funcs->opp_set_disp_pattern_generator(stream_res->opp, test_pattern, color_space,
- color_depth, solid_color, width, height, offset);
-
- /* Defer hubp blank if tg is locked */
- if (stream_res->tg->funcs->is_tg_enabled(stream_res->tg)) {
- if (stream_res->tg->funcs->is_locked(stream_res->tg))
- pipe_ctx->vtp_locked = true;
- else {
- /* Blank HUBP to allow p-state during blank on all timings */
- pipe_ctx->plane_res.hubp->funcs->set_blank(pipe_ctx->plane_res.hubp, true);
-
- for (mpcc_pipe = pipe_ctx->bottom_pipe; mpcc_pipe; mpcc_pipe = mpcc_pipe->bottom_pipe)
- mpcc_pipe->plane_res.hubp->funcs->set_blank(mpcc_pipe->plane_res.hubp, true);
- }
- }
- } else {
- /* turning off DPG */
- pipe_ctx->plane_res.hubp->funcs->set_blank(pipe_ctx->plane_res.hubp, false);
- for (mpcc_pipe = pipe_ctx->bottom_pipe; mpcc_pipe; mpcc_pipe = mpcc_pipe->bottom_pipe)
- if (mpcc_pipe->plane_res.hubp)
- mpcc_pipe->plane_res.hubp->funcs->set_blank(mpcc_pipe->plane_res.hubp, false);
-
- stream_res->opp->funcs->opp_set_disp_pattern_generator(stream_res->opp, test_pattern, color_space,
- color_depth, solid_color, width, height, offset);
- }
-}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c
index 1c4b171c68ad..6f2a0d5d963b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c
@@ -100,7 +100,7 @@ static void mmhubbub3_warmup_mcif(struct mcif_wb *mcif_wb,
REG_UPDATE(MMHUBBUB_WARMUP_CONTROL_STATUS, MMHUBBUB_WARMUP_EN, false);
}
-void mmhubbub3_config_mcif_buf(struct mcif_wb *mcif_wb,
+static void mmhubbub3_config_mcif_buf(struct mcif_wb *mcif_wb,
struct mcif_buf_params *params,
unsigned int dest_height)
{
@@ -211,7 +211,7 @@ static void mmhubbub3_config_mcif_arb(struct mcif_wb *mcif_wb,
REG_UPDATE(MCIF_WB_ARBITRATION_CONTROL, MCIF_WB_CLIENT_ARBITRATION_SLICE, params->arbitration_slice);
}
-const struct mcif_wb_funcs dcn30_mmhubbub_funcs = {
+static const struct mcif_wb_funcs dcn30_mmhubbub_funcs = {
.warmup_mcif = mmhubbub3_warmup_mcif,
.enable_mcif = mmhubbub2_enable_mcif,
.disable_mcif = mmhubbub2_disable_mcif,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.h
index f2580e65196c..376620a8f02f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.h
@@ -31,13 +31,6 @@
#define TO_DCN30_MMHUBBUB(mcif_wb_base) \
container_of(mcif_wb_base, struct dcn30_mmhubbub, base)
-/* DCN */
-#define BASE_INNER(seg) \
- DCE_BASE__INST0_SEG ## seg
-
-#define BASE(seg) \
- BASE_INNER(seg)
-
#define MCIF_WB_COMMON_REG_LIST_DCN3_0(inst) \
SRI(MCIF_WB_BUFMGR_SW_CONTROL, MCIF_WB, inst),\
SRI(MCIF_WB_BUFMGR_STATUS, MCIF_WB, inst),\
@@ -227,11 +220,7 @@
SF(MCIF_WB0_MCIF_WB_BUF_3_ADDR_C, MCIF_WB_BUF_3_ADDR_C, mask_sh),\
SF(MCIF_WB0_MCIF_WB_BUF_4_ADDR_Y, MCIF_WB_BUF_4_ADDR_Y, mask_sh),\
SF(MCIF_WB0_MCIF_WB_BUF_4_ADDR_C, MCIF_WB_BUF_4_ADDR_C, mask_sh),\
- SF(MCIF_WB0_MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_LOCK_IGNORE, mask_sh),\
- SF(MCIF_WB0_MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_INT_EN, mask_sh),\
- SF(MCIF_WB0_MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_INT_ACK, mask_sh),\
SF(MCIF_WB0_MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_SLICE_INT_EN, mask_sh),\
- SF(MCIF_WB0_MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_LOCK, mask_sh),\
SF(MCIF_WB0_MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_SLICE_SIZE, mask_sh),\
SF(MCIF_WB_NB_PSTATE_LATENCY_WATERMARK, NB_PSTATE_CHANGE_REFRESH_WATERMARK, mask_sh),\
SF(MCIF_WB_NB_PSTATE_LATENCY_WATERMARK, NB_PSTATE_CHANGE_WATERMARK_MASK, mask_sh),\
@@ -363,11 +352,7 @@
SF(MCIF_WB_BUF_3_ADDR_C, MCIF_WB_BUF_3_ADDR_C, mask_sh),\
SF(MCIF_WB_BUF_4_ADDR_Y, MCIF_WB_BUF_4_ADDR_Y, mask_sh),\
SF(MCIF_WB_BUF_4_ADDR_C, MCIF_WB_BUF_4_ADDR_C, mask_sh),\
- SF(MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_LOCK_IGNORE, mask_sh),\
- SF(MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_INT_EN, mask_sh),\
- SF(MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_INT_ACK, mask_sh),\
SF(MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_SLICE_INT_EN, mask_sh),\
- SF(MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_LOCK, mask_sh),\
SF(MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_SLICE_SIZE, mask_sh),\
SF(MCIF_WB_NB_PSTATE_LATENCY_WATERMARK, NB_PSTATE_CHANGE_REFRESH_WATERMARK, mask_sh),\
SF(MCIF_WB_NB_PSTATE_LATENCY_WATERMARK, NB_PSTATE_CHANGE_WATERMARK_MASK, mask_sh),\
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
deleted file mode 100644
index 79a66e0c4303..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
+++ /dev/null
@@ -1,2918 +0,0 @@
-/*
- * Copyright 2020 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-
-#include "dm_services.h"
-#include "dc.h"
-
-#include "dcn30_init.h"
-
-#include "resource.h"
-#include "include/irq_service_interface.h"
-#include "dcn20/dcn20_resource.h"
-
-#include "dcn30_resource.h"
-
-#include "dcn10/dcn10_ipp.h"
-#include "dcn30/dcn30_hubbub.h"
-#include "dcn30/dcn30_mpc.h"
-#include "dcn30/dcn30_hubp.h"
-#include "irq/dcn30/irq_service_dcn30.h"
-#include "dcn30/dcn30_dpp.h"
-#include "dcn30/dcn30_optc.h"
-#include "dcn20/dcn20_hwseq.h"
-#include "dcn30/dcn30_hwseq.h"
-#include "dce110/dce110_hw_sequencer.h"
-#include "dcn30/dcn30_opp.h"
-#include "dcn20/dcn20_dsc.h"
-#include "dcn30/dcn30_vpg.h"
-#include "dcn30/dcn30_afmt.h"
-#include "dcn30/dcn30_dio_stream_encoder.h"
-#include "dcn30/dcn30_dio_link_encoder.h"
-#include "dce/dce_clock_source.h"
-#include "dce/dce_audio.h"
-#include "dce/dce_hwseq.h"
-#include "clk_mgr.h"
-#include "virtual/virtual_stream_encoder.h"
-#include "dce110/dce110_resource.h"
-#include "dml/display_mode_vba.h"
-#include "dcn30/dcn30_dccg.h"
-#include "dcn10/dcn10_resource.h"
-#include "dc_link_ddc.h"
-#include "dce/dce_panel_cntl.h"
-
-#include "dcn30/dcn30_dwb.h"
-#include "dcn30/dcn30_mmhubbub.h"
-
-#include "sienna_cichlid_ip_offset.h"
-#include "dcn/dcn_3_0_0_offset.h"
-#include "dcn/dcn_3_0_0_sh_mask.h"
-
-#include "nbio/nbio_7_4_offset.h"
-
-#include "dcn/dpcs_3_0_0_offset.h"
-#include "dcn/dpcs_3_0_0_sh_mask.h"
-
-#include "mmhub/mmhub_2_0_0_offset.h"
-#include "mmhub/mmhub_2_0_0_sh_mask.h"
-
-#include "reg_helper.h"
-#include "dce/dmub_abm.h"
-#include "dce/dmub_psr.h"
-#include "dce/dce_aux.h"
-#include "dce/dce_i2c.h"
-
-#include "dml/dcn30/display_mode_vba_30.h"
-#include "vm_helper.h"
-#include "dcn20/dcn20_vmid.h"
-#include "amdgpu_socbb.h"
-
-#define DC_LOGGER_INIT(logger)
-
-struct _vcs_dpi_ip_params_st dcn3_0_ip = {
- .use_min_dcfclk = 0,
- .clamp_min_dcfclk = 0,
- .odm_capable = 1,
- .gpuvm_enable = 0,
- .hostvm_enable = 0,
- .gpuvm_max_page_table_levels = 4,
- .hostvm_max_page_table_levels = 4,
- .hostvm_cached_page_table_levels = 0,
- .pte_group_size_bytes = 2048,
- .num_dsc = 6,
- .rob_buffer_size_kbytes = 184,
- .det_buffer_size_kbytes = 184,
- .dpte_buffer_size_in_pte_reqs_luma = 84,
- .pde_proc_buffer_size_64k_reqs = 48,
- .dpp_output_buffer_pixels = 2560,
- .opp_output_buffer_lines = 1,
- .pixel_chunk_size_kbytes = 8,
- .pte_enable = 1,
- .max_page_table_levels = 2,
- .pte_chunk_size_kbytes = 2, // ?
- .meta_chunk_size_kbytes = 2,
- .writeback_chunk_size_kbytes = 8,
- .line_buffer_size_bits = 789504,
- .is_line_buffer_bpp_fixed = 0, // ?
- .line_buffer_fixed_bpp = 0, // ?
- .dcc_supported = true,
- .writeback_interface_buffer_size_kbytes = 90,
- .writeback_line_buffer_buffer_size = 0,
- .max_line_buffer_lines = 12,
- .writeback_luma_buffer_size_kbytes = 12, // writeback_line_buffer_buffer_size = 656640
- .writeback_chroma_buffer_size_kbytes = 8,
- .writeback_chroma_line_buffer_width_pixels = 4,
- .writeback_max_hscl_ratio = 1,
- .writeback_max_vscl_ratio = 1,
- .writeback_min_hscl_ratio = 1,
- .writeback_min_vscl_ratio = 1,
- .writeback_max_hscl_taps = 1,
- .writeback_max_vscl_taps = 1,
- .writeback_line_buffer_luma_buffer_size = 0,
- .writeback_line_buffer_chroma_buffer_size = 14643,
- .cursor_buffer_size = 8,
- .cursor_chunk_size = 2,
- .max_num_otg = 6,
- .max_num_dpp = 6,
- .max_num_wb = 1,
- .max_dchub_pscl_bw_pix_per_clk = 4,
- .max_pscl_lb_bw_pix_per_clk = 2,
- .max_lb_vscl_bw_pix_per_clk = 4,
- .max_vscl_hscl_bw_pix_per_clk = 4,
- .max_hscl_ratio = 6,
- .max_vscl_ratio = 6,
- .hscl_mults = 4,
- .vscl_mults = 4,
- .max_hscl_taps = 8,
- .max_vscl_taps = 8,
- .dispclk_ramp_margin_percent = 1,
- .underscan_factor = 1.11,
- .min_vblank_lines = 32,
- .dppclk_delay_subtotal = 46,
- .dynamic_metadata_vm_enabled = true,
- .dppclk_delay_scl_lb_only = 16,
- .dppclk_delay_scl = 50,
- .dppclk_delay_cnvc_formatter = 27,
- .dppclk_delay_cnvc_cursor = 6,
- .dispclk_delay_subtotal = 119,
- .dcfclk_cstate_latency = 5.2, // SRExitTime
- .max_inter_dcn_tile_repeaters = 8,
- .odm_combine_4to1_supported = true,
-
- .xfc_supported = false,
- .xfc_fill_bw_overhead_percent = 10.0,
- .xfc_fill_constant_bytes = 0,
- .gfx7_compat_tiling_supported = 0,
- .number_of_cursors = 1,
-};
-
-struct _vcs_dpi_soc_bounding_box_st dcn3_0_soc = {
- .clock_limits = {
- {
- .state = 0,
- .dispclk_mhz = 562.0,
- .dppclk_mhz = 300.0,
- .phyclk_mhz = 300.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 405.6,
- },
- },
- .min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */
- .num_states = 1,
- .sr_exit_time_us = 15.5,
- .sr_enter_plus_exit_time_us = 20,
- .urgent_latency_us = 4.0,
- .urgent_latency_pixel_data_only_us = 4.0,
- .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
- .urgent_latency_vm_data_only_us = 4.0,
- .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
- .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
- .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
- .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0,
- .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
- .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
- .max_avg_sdp_bw_use_normal_percent = 60.0,
- .max_avg_dram_bw_use_normal_percent = 40.0,
- .writeback_latency_us = 12.0,
- .max_request_size_bytes = 256,
- .fabric_datapath_to_dcn_data_return_bytes = 64,
- .dcn_downspread_percent = 0.5,
- .downspread_percent = 0.38,
- .dram_page_open_time_ns = 50.0,
- .dram_rw_turnaround_time_ns = 17.5,
- .dram_return_buffer_per_channel_bytes = 8192,
- .round_trip_ping_latency_dcfclk_cycles = 191,
- .urgent_out_of_order_return_per_channel_bytes = 4096,
- .channel_interleave_bytes = 256,
- .num_banks = 8,
- .gpuvm_min_page_size_bytes = 4096,
- .hostvm_min_page_size_bytes = 4096,
- .dram_clock_change_latency_us = 404,
- .dummy_pstate_latency_us = 5,
- .writeback_dram_clock_change_latency_us = 23.0,
- .return_bus_width_bytes = 64,
- .dispclk_dppclk_vco_speed_mhz = 3650,
- .xfc_bus_transport_time_us = 20, // ?
- .xfc_xbuf_latency_tolerance_us = 4, // ?
- .use_urgent_burst_bw = 1, // ?
- .do_urgent_latency_adjustment = true,
- .urgent_latency_adjustment_fabric_clock_component_us = 1.0,
- .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000,
-};
-
-enum dcn30_clk_src_array_id {
- DCN30_CLK_SRC_PLL0,
- DCN30_CLK_SRC_PLL1,
- DCN30_CLK_SRC_PLL2,
- DCN30_CLK_SRC_PLL3,
- DCN30_CLK_SRC_PLL4,
- DCN30_CLK_SRC_PLL5,
- DCN30_CLK_SRC_TOTAL
-};
-
-/* begin *********************
- * macros to expend register list macro defined in HW object header file
- */
-
-/* DCN */
-/* TODO awful hack. fixup dcn20_dwb.h */
-#undef BASE_INNER
-#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
-
-#define BASE(seg) BASE_INNER(seg)
-
-#define SR(reg_name)\
- .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \
- mm ## reg_name
-
-#define SRI(reg_name, block, id)\
- .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
- mm ## block ## id ## _ ## reg_name
-
-#define SRI2(reg_name, block, id)\
- .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \
- mm ## reg_name
-
-#define SRIR(var_name, reg_name, block, id)\
- .var_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
- mm ## block ## id ## _ ## reg_name
-
-#define SRII(reg_name, block, id)\
- .reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
- mm ## block ## id ## _ ## reg_name
-
-#define SRII_MPC_RMU(reg_name, block, id)\
- .RMU##_##reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
- mm ## block ## id ## _ ## reg_name
-
-#define SRII_DWB(reg_name, temp_name, block, id)\
- .reg_name[id] = BASE(mm ## block ## id ## _ ## temp_name ## _BASE_IDX) + \
- mm ## block ## id ## _ ## temp_name
-
-#define DCCG_SRII(reg_name, block, id)\
- .block ## _ ## reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
- mm ## block ## id ## _ ## reg_name
-
-#define VUPDATE_SRII(reg_name, block, id)\
- .reg_name[id] = BASE(mm ## reg_name ## _ ## block ## id ## _BASE_IDX) + \
- mm ## reg_name ## _ ## block ## id
-
-/* NBIO */
-#define NBIO_BASE_INNER(seg) \
- NBIO_BASE__INST0_SEG ## seg
-
-#define NBIO_BASE(seg) \
- NBIO_BASE_INNER(seg)
-
-#define NBIO_SR(reg_name)\
- .reg_name = NBIO_BASE(mm ## reg_name ## _BASE_IDX) + \
- mm ## reg_name
-
-/* MMHUB */
-#define MMHUB_BASE_INNER(seg) \
- MMHUB_BASE__INST0_SEG ## seg
-
-#define MMHUB_BASE(seg) \
- MMHUB_BASE_INNER(seg)
-
-#define MMHUB_SR(reg_name)\
- .reg_name = MMHUB_BASE(mmMM ## reg_name ## _BASE_IDX) + \
- mmMM ## reg_name
-
-/* CLOCK */
-#define CLK_BASE_INNER(seg) \
- CLK_BASE__INST0_SEG ## seg
-
-#define CLK_BASE(seg) \
- CLK_BASE_INNER(seg)
-
-#define CLK_SRI(reg_name, block, inst)\
- .reg_name = CLK_BASE(mm ## block ## _ ## inst ## _ ## reg_name ## _BASE_IDX) + \
- mm ## block ## _ ## inst ## _ ## reg_name
-
-
-static const struct bios_registers bios_regs = {
- NBIO_SR(BIOS_SCRATCH_3),
- NBIO_SR(BIOS_SCRATCH_6)
-};
-
-#define clk_src_regs(index, pllid)\
-[index] = {\
- CS_COMMON_REG_LIST_DCN2_0(index, pllid),\
-}
-
-static const struct dce110_clk_src_regs clk_src_regs[] = {
- clk_src_regs(0, A),
- clk_src_regs(1, B),
- clk_src_regs(2, C),
- clk_src_regs(3, D),
- clk_src_regs(4, E),
- clk_src_regs(5, F)
-};
-
-static const struct dce110_clk_src_shift cs_shift = {
- CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT)
-};
-
-static const struct dce110_clk_src_mask cs_mask = {
- CS_COMMON_MASK_SH_LIST_DCN2_0(_MASK)
-};
-
-#define abm_regs(id)\
-[id] = {\
- ABM_DCN30_REG_LIST(id)\
-}
-
-static const struct dce_abm_registers abm_regs[] = {
- abm_regs(0),
- abm_regs(1),
- abm_regs(2),
- abm_regs(3),
- abm_regs(4),
- abm_regs(5),
-};
-
-static const struct dce_abm_shift abm_shift = {
- ABM_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dce_abm_mask abm_mask = {
- ABM_MASK_SH_LIST_DCN30(_MASK)
-};
-
-
-
-#define audio_regs(id)\
-[id] = {\
- AUD_COMMON_REG_LIST(id)\
-}
-
-static const struct dce_audio_registers audio_regs[] = {
- audio_regs(0),
- audio_regs(1),
- audio_regs(2),
- audio_regs(3),
- audio_regs(4),
- audio_regs(5),
- audio_regs(6)
-};
-
-#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\
- SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\
- SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\
- AUD_COMMON_MASK_SH_LIST_BASE(mask_sh)
-
-static const struct dce_audio_shift audio_shift = {
- DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dce_audio_mask audio_mask = {
- DCE120_AUD_COMMON_MASK_SH_LIST(_MASK)
-};
-
-#define vpg_regs(id)\
-[id] = {\
- VPG_DCN3_REG_LIST(id)\
-}
-
-static const struct dcn30_vpg_registers vpg_regs[] = {
- vpg_regs(0),
- vpg_regs(1),
- vpg_regs(2),
- vpg_regs(3),
- vpg_regs(4),
- vpg_regs(5),
- vpg_regs(6),
-};
-
-static const struct dcn30_vpg_shift vpg_shift = {
- DCN3_VPG_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dcn30_vpg_mask vpg_mask = {
- DCN3_VPG_MASK_SH_LIST(_MASK)
-};
-
-#define afmt_regs(id)\
-[id] = {\
- AFMT_DCN3_REG_LIST(id)\
-}
-
-static const struct dcn30_afmt_registers afmt_regs[] = {
- afmt_regs(0),
- afmt_regs(1),
- afmt_regs(2),
- afmt_regs(3),
- afmt_regs(4),
- afmt_regs(5),
- afmt_regs(6),
-};
-
-static const struct dcn30_afmt_shift afmt_shift = {
- DCN3_AFMT_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dcn30_afmt_mask afmt_mask = {
- DCN3_AFMT_MASK_SH_LIST(_MASK)
-};
-
-#define stream_enc_regs(id)\
-[id] = {\
- SE_DCN3_REG_LIST(id)\
-}
-
-static const struct dcn10_stream_enc_registers stream_enc_regs[] = {
- stream_enc_regs(0),
- stream_enc_regs(1),
- stream_enc_regs(2),
- stream_enc_regs(3),
- stream_enc_regs(4),
- stream_enc_regs(5)
-};
-
-static const struct dcn10_stream_encoder_shift se_shift = {
- SE_COMMON_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dcn10_stream_encoder_mask se_mask = {
- SE_COMMON_MASK_SH_LIST_DCN30(_MASK)
-};
-
-
-#define aux_regs(id)\
-[id] = {\
- DCN2_AUX_REG_LIST(id)\
-}
-
-static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = {
- aux_regs(0),
- aux_regs(1),
- aux_regs(2),
- aux_regs(3),
- aux_regs(4),
- aux_regs(5)
-};
-
-#define hpd_regs(id)\
-[id] = {\
- HPD_REG_LIST(id)\
-}
-
-static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = {
- hpd_regs(0),
- hpd_regs(1),
- hpd_regs(2),
- hpd_regs(3),
- hpd_regs(4),
- hpd_regs(5)
-};
-
-#define link_regs(id, phyid)\
-[id] = {\
- LE_DCN3_REG_LIST(id), \
- UNIPHY_DCN2_REG_LIST(phyid), \
- DPCS_DCN2_REG_LIST(id), \
- SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \
-}
-
-static const struct dce110_aux_registers_shift aux_shift = {
- DCN_AUX_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dce110_aux_registers_mask aux_mask = {
- DCN_AUX_MASK_SH_LIST(_MASK)
-};
-
-static const struct dcn10_link_enc_registers link_enc_regs[] = {
- link_regs(0, A),
- link_regs(1, B),
- link_regs(2, C),
- link_regs(3, D),
- link_regs(4, E),
- link_regs(5, F)
-};
-
-static const struct dcn10_link_enc_shift le_shift = {
- LINK_ENCODER_MASK_SH_LIST_DCN30(__SHIFT),\
- DPCS_DCN2_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dcn10_link_enc_mask le_mask = {
- LINK_ENCODER_MASK_SH_LIST_DCN30(_MASK),\
- DPCS_DCN2_MASK_SH_LIST(_MASK)
-};
-
-
-static const struct dce_panel_cntl_registers panel_cntl_regs[] = {
- { DCN_PANEL_CNTL_REG_LIST() }
-};
-
-static const struct dce_panel_cntl_shift panel_cntl_shift = {
- DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dce_panel_cntl_mask panel_cntl_mask = {
- DCE_PANEL_CNTL_MASK_SH_LIST(_MASK)
-};
-
-#define dpp_regs(id)\
-[id] = {\
- DPP_REG_LIST_DCN30(id),\
-}
-
-static const struct dcn3_dpp_registers dpp_regs[] = {
- dpp_regs(0),
- dpp_regs(1),
- dpp_regs(2),
- dpp_regs(3),
- dpp_regs(4),
- dpp_regs(5),
-};
-
-static const struct dcn3_dpp_shift tf_shift = {
- DPP_REG_LIST_SH_MASK_DCN30(__SHIFT)
-};
-
-static const struct dcn3_dpp_mask tf_mask = {
- DPP_REG_LIST_SH_MASK_DCN30(_MASK)
-};
-
-#define opp_regs(id)\
-[id] = {\
- OPP_REG_LIST_DCN30(id),\
-}
-
-static const struct dcn20_opp_registers opp_regs[] = {
- opp_regs(0),
- opp_regs(1),
- opp_regs(2),
- opp_regs(3),
- opp_regs(4),
- opp_regs(5)
-};
-
-static const struct dcn20_opp_shift opp_shift = {
- OPP_MASK_SH_LIST_DCN20(__SHIFT)
-};
-
-static const struct dcn20_opp_mask opp_mask = {
- OPP_MASK_SH_LIST_DCN20(_MASK)
-};
-
-#define aux_engine_regs(id)\
-[id] = {\
- AUX_COMMON_REG_LIST0(id), \
- .AUXN_IMPCAL = 0, \
- .AUXP_IMPCAL = 0, \
- .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK, \
-}
-
-static const struct dce110_aux_registers aux_engine_regs[] = {
- aux_engine_regs(0),
- aux_engine_regs(1),
- aux_engine_regs(2),
- aux_engine_regs(3),
- aux_engine_regs(4),
- aux_engine_regs(5)
-};
-
-#define dwbc_regs_dcn3(id)\
-[id] = {\
- DWBC_COMMON_REG_LIST_DCN30(id),\
-}
-
-static const struct dcn30_dwbc_registers dwbc30_regs[] = {
- dwbc_regs_dcn3(0),
-};
-
-static const struct dcn30_dwbc_shift dwbc30_shift = {
- DWBC_COMMON_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dcn30_dwbc_mask dwbc30_mask = {
- DWBC_COMMON_MASK_SH_LIST_DCN30(_MASK)
-};
-
-#define mcif_wb_regs_dcn3(id)\
-[id] = {\
- MCIF_WB_COMMON_REG_LIST_DCN30(id),\
-}
-
-static const struct dcn30_mmhubbub_registers mcif_wb30_regs[] = {
- mcif_wb_regs_dcn3(0)
-};
-
-static const struct dcn30_mmhubbub_shift mcif_wb30_shift = {
- MCIF_WB_COMMON_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dcn30_mmhubbub_mask mcif_wb30_mask = {
- MCIF_WB_COMMON_MASK_SH_LIST_DCN30(_MASK)
-};
-
-#define dsc_regsDCN20(id)\
-[id] = {\
- DSC_REG_LIST_DCN20(id)\
-}
-
-static const struct dcn20_dsc_registers dsc_regs[] = {
- dsc_regsDCN20(0),
- dsc_regsDCN20(1),
- dsc_regsDCN20(2),
- dsc_regsDCN20(3),
- dsc_regsDCN20(4),
- dsc_regsDCN20(5)
-};
-
-static const struct dcn20_dsc_shift dsc_shift = {
- DSC_REG_LIST_SH_MASK_DCN20(__SHIFT)
-};
-
-static const struct dcn20_dsc_mask dsc_mask = {
- DSC_REG_LIST_SH_MASK_DCN20(_MASK)
-};
-
-static const struct dcn30_mpc_registers mpc_regs = {
- MPC_REG_LIST_DCN3_0(0),
- MPC_REG_LIST_DCN3_0(1),
- MPC_REG_LIST_DCN3_0(2),
- MPC_REG_LIST_DCN3_0(3),
- MPC_REG_LIST_DCN3_0(4),
- MPC_REG_LIST_DCN3_0(5),
- MPC_OUT_MUX_REG_LIST_DCN3_0(0),
- MPC_OUT_MUX_REG_LIST_DCN3_0(1),
- MPC_OUT_MUX_REG_LIST_DCN3_0(2),
- MPC_OUT_MUX_REG_LIST_DCN3_0(3),
- MPC_OUT_MUX_REG_LIST_DCN3_0(4),
- MPC_OUT_MUX_REG_LIST_DCN3_0(5),
- MPC_RMU_GLOBAL_REG_LIST_DCN3AG,
- MPC_RMU_REG_LIST_DCN3AG(0),
- MPC_RMU_REG_LIST_DCN3AG(1),
- MPC_RMU_REG_LIST_DCN3AG(2),
- MPC_DWB_MUX_REG_LIST_DCN3_0(0),
-};
-
-static const struct dcn30_mpc_shift mpc_shift = {
- MPC_COMMON_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dcn30_mpc_mask mpc_mask = {
- MPC_COMMON_MASK_SH_LIST_DCN30(_MASK)
-};
-
-#define optc_regs(id)\
-[id] = {OPTC_COMMON_REG_LIST_DCN3_0(id)}
-
-
-static const struct dcn_optc_registers optc_regs[] = {
- optc_regs(0),
- optc_regs(1),
- optc_regs(2),
- optc_regs(3),
- optc_regs(4),
- optc_regs(5)
-};
-
-static const struct dcn_optc_shift optc_shift = {
- OPTC_COMMON_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dcn_optc_mask optc_mask = {
- OPTC_COMMON_MASK_SH_LIST_DCN30(_MASK)
-};
-
-#define hubp_regs(id)\
-[id] = {\
- HUBP_REG_LIST_DCN30(id)\
-}
-
-static const struct dcn_hubp2_registers hubp_regs[] = {
- hubp_regs(0),
- hubp_regs(1),
- hubp_regs(2),
- hubp_regs(3),
- hubp_regs(4),
- hubp_regs(5)
-};
-
-static const struct dcn_hubp2_shift hubp_shift = {
- HUBP_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dcn_hubp2_mask hubp_mask = {
- HUBP_MASK_SH_LIST_DCN30(_MASK)
-};
-
-static const struct dcn_hubbub_registers hubbub_reg = {
- HUBBUB_REG_LIST_DCN30(0)
-};
-
-static const struct dcn_hubbub_shift hubbub_shift = {
- HUBBUB_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dcn_hubbub_mask hubbub_mask = {
- HUBBUB_MASK_SH_LIST_DCN30(_MASK)
-};
-
-static const struct dccg_registers dccg_regs = {
- DCCG_REG_LIST_DCN30()
-};
-
-static const struct dccg_shift dccg_shift = {
- DCCG_MASK_SH_LIST_DCN3(__SHIFT)
-};
-
-static const struct dccg_mask dccg_mask = {
- DCCG_MASK_SH_LIST_DCN3(_MASK)
-};
-
-static const struct dce_hwseq_registers hwseq_reg = {
- HWSEQ_DCN30_REG_LIST()
-};
-
-static const struct dce_hwseq_shift hwseq_shift = {
- HWSEQ_DCN30_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dce_hwseq_mask hwseq_mask = {
- HWSEQ_DCN30_MASK_SH_LIST(_MASK)
-};
-#define vmid_regs(id)\
-[id] = {\
- DCN20_VMID_REG_LIST(id)\
-}
-
-static const struct dcn_vmid_registers vmid_regs[] = {
- vmid_regs(0),
- vmid_regs(1),
- vmid_regs(2),
- vmid_regs(3),
- vmid_regs(4),
- vmid_regs(5),
- vmid_regs(6),
- vmid_regs(7),
- vmid_regs(8),
- vmid_regs(9),
- vmid_regs(10),
- vmid_regs(11),
- vmid_regs(12),
- vmid_regs(13),
- vmid_regs(14),
- vmid_regs(15)
-};
-
-static const struct dcn20_vmid_shift vmid_shifts = {
- DCN20_VMID_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dcn20_vmid_mask vmid_masks = {
- DCN20_VMID_MASK_SH_LIST(_MASK)
-};
-
-static const struct resource_caps res_cap_dcn3 = {
- .num_timing_generator = 6,
- .num_opp = 6,
- .num_video_plane = 6,
- .num_audio = 6,
- .num_stream_encoder = 6,
- .num_pll = 6,
- .num_dwb = 1,
- .num_ddc = 6,
- .num_vmid = 16,
- .num_mpc_3dlut = 3,
- .num_dsc = 6,
-};
-
-static const struct dc_plane_cap plane_cap = {
- .type = DC_PLANE_TYPE_DCN_UNIVERSAL,
- .blends_with_above = true,
- .blends_with_below = true,
- .per_pixel_alpha = true,
-
- .pixel_format_support = {
- .argb8888 = true,
- .nv12 = true,
- .fp16 = true,
- .p010 = false,
- .ayuv = false,
- },
-
- .max_upscale_factor = {
- .argb8888 = 16000,
- .nv12 = 16000,
- .fp16 = 16000
- },
-
- /* 6:1 downscaling ratio: 1000/6 = 166.666 */
- .max_downscale_factor = {
- .argb8888 = 167,
- .nv12 = 167,
- .fp16 = 167
- }
-};
-
-static const struct dc_debug_options debug_defaults_drv = {
- .disable_dmcu = true, //No DMCU on DCN30
- .force_abm_enable = false,
- .timing_trace = false,
- .clock_trace = true,
- .disable_pplib_clock_request = true,
- .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP,
- .force_single_disp_pipe_split = false,
- .disable_dcc = DCC_ENABLE,
- .vsr_support = true,
- .performance_trace = false,
- .max_downscale_src_width = 7680,/*upto 8K*/
- .disable_pplib_wm_range = false,
- .scl_reset_length10 = true,
- .sanity_checks = false,
- .underflow_assert_delay_us = 0xFFFFFFFF,
- .dwb_fi_phase = -1, // -1 = disable,
- .dmub_command_table = true,
- .disable_psr = false,
- .use_max_lb = true
-};
-
-static const struct dc_debug_options debug_defaults_diags = {
- .disable_dmcu = true, //No dmcu on DCN30
- .force_abm_enable = false,
- .timing_trace = true,
- .clock_trace = true,
- .disable_dpp_power_gate = true,
- .disable_hubp_power_gate = true,
- .disable_clock_gate = true,
- .disable_pplib_clock_request = true,
- .disable_pplib_wm_range = true,
- .disable_stutter = false,
- .scl_reset_length10 = true,
- .dwb_fi_phase = -1, // -1 = disable
- .dmub_command_table = true,
- .disable_psr = true,
- .enable_tri_buf = true,
- .use_max_lb = true
-};
-
-void dcn30_dpp_destroy(struct dpp **dpp)
-{
- kfree(TO_DCN20_DPP(*dpp));
- *dpp = NULL;
-}
-
-static struct dpp *dcn30_dpp_create(
- struct dc_context *ctx,
- uint32_t inst)
-{
- struct dcn3_dpp *dpp =
- kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL);
-
- if (!dpp)
- return NULL;
-
- if (dpp3_construct(dpp, ctx, inst,
- &dpp_regs[inst], &tf_shift, &tf_mask))
- return &dpp->base;
-
- BREAK_TO_DEBUGGER();
- kfree(dpp);
- return NULL;
-}
-
-static struct output_pixel_processor *dcn30_opp_create(
- struct dc_context *ctx, uint32_t inst)
-{
- struct dcn20_opp *opp =
- kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL);
-
- if (!opp) {
- BREAK_TO_DEBUGGER();
- return NULL;
- }
-
- dcn20_opp_construct(opp, ctx, inst,
- &opp_regs[inst], &opp_shift, &opp_mask);
- return &opp->base;
-}
-
-static struct dce_aux *dcn30_aux_engine_create(
- struct dc_context *ctx,
- uint32_t inst)
-{
- struct aux_engine_dce110 *aux_engine =
- kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL);
-
- if (!aux_engine)
- return NULL;
-
- dce110_aux_engine_construct(aux_engine, ctx, inst,
- SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD,
- &aux_engine_regs[inst],
- &aux_mask,
- &aux_shift,
- ctx->dc->caps.extended_aux_timeout_support);
-
- return &aux_engine->base;
-}
-
-#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST_DCN30(id) }
-
-static const struct dce_i2c_registers i2c_hw_regs[] = {
- i2c_inst_regs(1),
- i2c_inst_regs(2),
- i2c_inst_regs(3),
- i2c_inst_regs(4),
- i2c_inst_regs(5),
- i2c_inst_regs(6),
-};
-
-static const struct dce_i2c_shift i2c_shifts = {
- I2C_COMMON_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dce_i2c_mask i2c_masks = {
- I2C_COMMON_MASK_SH_LIST_DCN30(_MASK)
-};
-
-static struct dce_i2c_hw *dcn30_i2c_hw_create(
- struct dc_context *ctx,
- uint32_t inst)
-{
- struct dce_i2c_hw *dce_i2c_hw =
- kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL);
-
- if (!dce_i2c_hw)
- return NULL;
-
- dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst,
- &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks);
-
- return dce_i2c_hw;
-}
-
-static struct mpc *dcn30_mpc_create(
- struct dc_context *ctx,
- int num_mpcc,
- int num_rmu)
-{
- struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc),
- GFP_KERNEL);
-
- if (!mpc30)
- return NULL;
-
- dcn30_mpc_construct(mpc30, ctx,
- &mpc_regs,
- &mpc_shift,
- &mpc_mask,
- num_mpcc,
- num_rmu);
-
- return &mpc30->base;
-}
-
-struct hubbub *dcn30_hubbub_create(struct dc_context *ctx)
-{
- int i;
-
- struct dcn20_hubbub *hubbub3 = kzalloc(sizeof(struct dcn20_hubbub),
- GFP_KERNEL);
-
- if (!hubbub3)
- return NULL;
-
- hubbub3_construct(hubbub3, ctx,
- &hubbub_reg,
- &hubbub_shift,
- &hubbub_mask);
-
-
- for (i = 0; i < res_cap_dcn3.num_vmid; i++) {
- struct dcn20_vmid *vmid = &hubbub3->vmid[i];
-
- vmid->ctx = ctx;
-
- vmid->regs = &vmid_regs[i];
- vmid->shifts = &vmid_shifts;
- vmid->masks = &vmid_masks;
- }
-
- return &hubbub3->base;
-}
-
-static struct timing_generator *dcn30_timing_generator_create(
- struct dc_context *ctx,
- uint32_t instance)
-{
- struct optc *tgn10 =
- kzalloc(sizeof(struct optc), GFP_KERNEL);
-
- if (!tgn10)
- return NULL;
-
- tgn10->base.inst = instance;
- tgn10->base.ctx = ctx;
-
- tgn10->tg_regs = &optc_regs[instance];
- tgn10->tg_shift = &optc_shift;
- tgn10->tg_mask = &optc_mask;
-
- dcn30_timing_generator_init(tgn10);
-
- return &tgn10->base;
-}
-
-static const struct encoder_feature_support link_enc_feature = {
- .max_hdmi_deep_color = COLOR_DEPTH_121212,
- .max_hdmi_pixel_clock = 600000,
- .hdmi_ycbcr420_supported = true,
- .dp_ycbcr420_supported = true,
- .fec_supported = true,
- .flags.bits.IS_HBR2_CAPABLE = true,
- .flags.bits.IS_HBR3_CAPABLE = true,
- .flags.bits.IS_TPS3_CAPABLE = true,
- .flags.bits.IS_TPS4_CAPABLE = true
-};
-
-static struct link_encoder *dcn30_link_encoder_create(
- const struct encoder_init_data *enc_init_data)
-{
- struct dcn20_link_encoder *enc20 =
- kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL);
-
- if (!enc20)
- return NULL;
-
- dcn30_link_encoder_construct(enc20,
- enc_init_data,
- &link_enc_feature,
- &link_enc_regs[enc_init_data->transmitter],
- &link_enc_aux_regs[enc_init_data->channel - 1],
- &link_enc_hpd_regs[enc_init_data->hpd_source],
- &le_shift,
- &le_mask);
-
- return &enc20->enc10.base;
-}
-
-static struct panel_cntl *dcn30_panel_cntl_create(const struct panel_cntl_init_data *init_data)
-{
- struct dce_panel_cntl *panel_cntl =
- kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL);
-
- if (!panel_cntl)
- return NULL;
-
- dce_panel_cntl_construct(panel_cntl,
- init_data,
- &panel_cntl_regs[init_data->inst],
- &panel_cntl_shift,
- &panel_cntl_mask);
-
- return &panel_cntl->base;
-}
-
-static void read_dce_straps(
- struct dc_context *ctx,
- struct resource_straps *straps)
-{
- generic_reg_get(ctx, mmDC_PINSTRAPS + BASE(mmDC_PINSTRAPS_BASE_IDX),
- FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio);
-
-}
-
-static struct audio *dcn30_create_audio(
- struct dc_context *ctx, unsigned int inst)
-{
- return dce_audio_create(ctx, inst,
- &audio_regs[inst], &audio_shift, &audio_mask);
-}
-
-static struct vpg *dcn30_vpg_create(
- struct dc_context *ctx,
- uint32_t inst)
-{
- struct dcn30_vpg *vpg3 = kzalloc(sizeof(struct dcn30_vpg), GFP_KERNEL);
-
- if (!vpg3)
- return NULL;
-
- vpg3_construct(vpg3, ctx, inst,
- &vpg_regs[inst],
- &vpg_shift,
- &vpg_mask);
-
- return &vpg3->base;
-}
-
-static struct afmt *dcn30_afmt_create(
- struct dc_context *ctx,
- uint32_t inst)
-{
- struct dcn30_afmt *afmt3 = kzalloc(sizeof(struct dcn30_afmt), GFP_KERNEL);
-
- if (!afmt3)
- return NULL;
-
- afmt3_construct(afmt3, ctx, inst,
- &afmt_regs[inst],
- &afmt_shift,
- &afmt_mask);
-
- return &afmt3->base;
-}
-
-struct stream_encoder *dcn30_stream_encoder_create(
- enum engine_id eng_id,
- struct dc_context *ctx)
-{
- struct dcn10_stream_encoder *enc1;
- struct vpg *vpg;
- struct afmt *afmt;
- int vpg_inst;
- int afmt_inst;
-
- /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */
- if (eng_id <= ENGINE_ID_DIGF) {
- vpg_inst = eng_id;
- afmt_inst = eng_id;
- } else
- return NULL;
-
- enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL);
- vpg = dcn30_vpg_create(ctx, vpg_inst);
- afmt = dcn30_afmt_create(ctx, afmt_inst);
-
- if (!enc1 || !vpg || !afmt) {
- kfree(enc1);
- kfree(vpg);
- kfree(afmt);
- return NULL;
- }
-
- dcn30_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios,
- eng_id, vpg, afmt,
- &stream_enc_regs[eng_id],
- &se_shift, &se_mask);
-
- return &enc1->base;
-}
-
-struct dce_hwseq *dcn30_hwseq_create(
- struct dc_context *ctx)
-{
- struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL);
-
- if (hws) {
- hws->ctx = ctx;
- hws->regs = &hwseq_reg;
- hws->shifts = &hwseq_shift;
- hws->masks = &hwseq_mask;
- }
- return hws;
-}
-static const struct resource_create_funcs res_create_funcs = {
- .read_dce_straps = read_dce_straps,
- .create_audio = dcn30_create_audio,
- .create_stream_encoder = dcn30_stream_encoder_create,
- .create_hwseq = dcn30_hwseq_create,
-};
-
-static const struct resource_create_funcs res_create_maximus_funcs = {
- .read_dce_straps = NULL,
- .create_audio = NULL,
- .create_stream_encoder = NULL,
- .create_hwseq = dcn30_hwseq_create,
-};
-
-static void dcn30_resource_destruct(struct dcn30_resource_pool *pool)
-{
- unsigned int i;
-
- for (i = 0; i < pool->base.stream_enc_count; i++) {
- if (pool->base.stream_enc[i] != NULL) {
- if (pool->base.stream_enc[i]->vpg != NULL) {
- kfree(DCN30_VPG_FROM_VPG(pool->base.stream_enc[i]->vpg));
- pool->base.stream_enc[i]->vpg = NULL;
- }
- if (pool->base.stream_enc[i]->afmt != NULL) {
- kfree(DCN30_AFMT_FROM_AFMT(pool->base.stream_enc[i]->afmt));
- pool->base.stream_enc[i]->afmt = NULL;
- }
- kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i]));
- pool->base.stream_enc[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->base.res_cap->num_dsc; i++) {
- if (pool->base.dscs[i] != NULL)
- dcn20_dsc_destroy(&pool->base.dscs[i]);
- }
-
- if (pool->base.mpc != NULL) {
- kfree(TO_DCN20_MPC(pool->base.mpc));
- pool->base.mpc = NULL;
- }
- if (pool->base.hubbub != NULL) {
- kfree(pool->base.hubbub);
- pool->base.hubbub = NULL;
- }
- for (i = 0; i < pool->base.pipe_count; i++) {
- if (pool->base.dpps[i] != NULL)
- dcn30_dpp_destroy(&pool->base.dpps[i]);
-
- if (pool->base.ipps[i] != NULL)
- pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]);
-
- if (pool->base.hubps[i] != NULL) {
- kfree(TO_DCN20_HUBP(pool->base.hubps[i]));
- pool->base.hubps[i] = NULL;
- }
-
- if (pool->base.irqs != NULL) {
- dal_irq_service_destroy(&pool->base.irqs);
- }
- }
-
- for (i = 0; i < pool->base.res_cap->num_ddc; i++) {
- if (pool->base.engines[i] != NULL)
- dce110_engine_destroy(&pool->base.engines[i]);
- if (pool->base.hw_i2cs[i] != NULL) {
- kfree(pool->base.hw_i2cs[i]);
- pool->base.hw_i2cs[i] = NULL;
- }
- if (pool->base.sw_i2cs[i] != NULL) {
- kfree(pool->base.sw_i2cs[i]);
- pool->base.sw_i2cs[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->base.res_cap->num_opp; i++) {
- if (pool->base.opps[i] != NULL)
- pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]);
- }
-
- for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
- if (pool->base.timing_generators[i] != NULL) {
- kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i]));
- pool->base.timing_generators[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->base.res_cap->num_dwb; i++) {
- if (pool->base.dwbc[i] != NULL) {
- kfree(TO_DCN30_DWBC(pool->base.dwbc[i]));
- pool->base.dwbc[i] = NULL;
- }
- if (pool->base.mcif_wb[i] != NULL) {
- kfree(TO_DCN30_MMHUBBUB(pool->base.mcif_wb[i]));
- pool->base.mcif_wb[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->base.audio_count; i++) {
- if (pool->base.audios[i])
- dce_aud_destroy(&pool->base.audios[i]);
- }
-
- for (i = 0; i < pool->base.clk_src_count; i++) {
- if (pool->base.clock_sources[i] != NULL) {
- dcn20_clock_source_destroy(&pool->base.clock_sources[i]);
- pool->base.clock_sources[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->base.res_cap->num_mpc_3dlut; i++) {
- if (pool->base.mpc_lut[i] != NULL) {
- dc_3dlut_func_release(pool->base.mpc_lut[i]);
- pool->base.mpc_lut[i] = NULL;
- }
- if (pool->base.mpc_shaper[i] != NULL) {
- dc_transfer_func_release(pool->base.mpc_shaper[i]);
- pool->base.mpc_shaper[i] = NULL;
- }
- }
-
- if (pool->base.dp_clock_source != NULL) {
- dcn20_clock_source_destroy(&pool->base.dp_clock_source);
- pool->base.dp_clock_source = NULL;
- }
-
- for (i = 0; i < pool->base.pipe_count; i++) {
- if (pool->base.multiple_abms[i] != NULL)
- dce_abm_destroy(&pool->base.multiple_abms[i]);
- }
-
- if (pool->base.psr != NULL)
- dmub_psr_destroy(&pool->base.psr);
-
- if (pool->base.dccg != NULL)
- dcn_dccg_destroy(&pool->base.dccg);
-
- if (pool->base.oem_device != NULL)
- dal_ddc_service_destroy(&pool->base.oem_device);
-}
-
-static struct hubp *dcn30_hubp_create(
- struct dc_context *ctx,
- uint32_t inst)
-{
- struct dcn20_hubp *hubp2 =
- kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL);
-
- if (!hubp2)
- return NULL;
-
- if (hubp3_construct(hubp2, ctx, inst,
- &hubp_regs[inst], &hubp_shift, &hubp_mask))
- return &hubp2->base;
-
- BREAK_TO_DEBUGGER();
- kfree(hubp2);
- return NULL;
-}
-
-static bool dcn30_dwbc_create(struct dc_context *ctx, struct resource_pool *pool)
-{
- int i;
- uint32_t pipe_count = pool->res_cap->num_dwb;
-
- for (i = 0; i < pipe_count; i++) {
- struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc),
- GFP_KERNEL);
-
- if (!dwbc30) {
- dm_error("DC: failed to create dwbc30!\n");
- return false;
- }
-
- dcn30_dwbc_construct(dwbc30, ctx,
- &dwbc30_regs[i],
- &dwbc30_shift,
- &dwbc30_mask,
- i);
-
- pool->dwbc[i] = &dwbc30->base;
- }
- return true;
-}
-
-static bool dcn30_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool)
-{
- int i;
- uint32_t pipe_count = pool->res_cap->num_dwb;
-
- for (i = 0; i < pipe_count; i++) {
- struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub),
- GFP_KERNEL);
-
- if (!mcif_wb30) {
- dm_error("DC: failed to create mcif_wb30!\n");
- return false;
- }
-
- dcn30_mmhubbub_construct(mcif_wb30, ctx,
- &mcif_wb30_regs[i],
- &mcif_wb30_shift,
- &mcif_wb30_mask,
- i);
-
- pool->mcif_wb[i] = &mcif_wb30->base;
- }
- return true;
-}
-
-static struct display_stream_compressor *dcn30_dsc_create(
- struct dc_context *ctx, uint32_t inst)
-{
- struct dcn20_dsc *dsc =
- kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL);
-
- if (!dsc) {
- BREAK_TO_DEBUGGER();
- return NULL;
- }
-
- dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask);
- return &dsc->base;
-}
-
-enum dc_status dcn30_add_stream_to_ctx(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream)
-{
-
- return dcn20_add_stream_to_ctx(dc, new_ctx, dc_stream);
-}
-
-static void dcn30_destroy_resource_pool(struct resource_pool **pool)
-{
- struct dcn30_resource_pool *dcn30_pool = TO_DCN30_RES_POOL(*pool);
-
- dcn30_resource_destruct(dcn30_pool);
- kfree(dcn30_pool);
- *pool = NULL;
-}
-
-static struct clock_source *dcn30_clock_source_create(
- struct dc_context *ctx,
- struct dc_bios *bios,
- enum clock_source_id id,
- const struct dce110_clk_src_regs *regs,
- bool dp_clk_src)
-{
- struct dce110_clk_src *clk_src =
- kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL);
-
- if (!clk_src)
- return NULL;
-
- if (dcn3_clk_src_construct(clk_src, ctx, bios, id,
- regs, &cs_shift, &cs_mask)) {
- clk_src->base.dp_clk_src = dp_clk_src;
- return &clk_src->base;
- }
-
- BREAK_TO_DEBUGGER();
- return NULL;
-}
-
-int dcn30_populate_dml_pipes_from_context(
- struct dc *dc, struct dc_state *context,
- display_e2e_pipe_params_st *pipes,
- bool fast_validate)
-{
- int i, pipe_cnt;
- struct resource_context *res_ctx = &context->res_ctx;
-
- dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
-
- for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
- if (!res_ctx->pipe_ctx[i].stream)
- continue;
-
- pipes[pipe_cnt++].pipe.scale_ratio_depth.lb_depth =
- dm_lb_16;
- }
-
- return pipe_cnt;
-}
-
-void dcn30_populate_dml_writeback_from_context(
- struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes)
-{
- int pipe_cnt, i, j;
- double max_calc_writeback_dispclk;
- double writeback_dispclk;
- struct writeback_st dout_wb;
-
- for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
- struct dc_stream_state *stream = res_ctx->pipe_ctx[i].stream;
-
- if (!stream)
- continue;
- max_calc_writeback_dispclk = 0;
-
- /* Set writeback information */
- pipes[pipe_cnt].dout.wb_enable = 0;
- pipes[pipe_cnt].dout.num_active_wb = 0;
- for (j = 0; j < stream->num_wb_info; j++) {
- struct dc_writeback_info *wb_info = &stream->writeback_info[j];
-
- if (wb_info->wb_enabled && wb_info->writeback_source_plane &&
- (wb_info->writeback_source_plane == res_ctx->pipe_ctx[i].plane_state)) {
- pipes[pipe_cnt].dout.wb_enable = 1;
- pipes[pipe_cnt].dout.num_active_wb++;
- dout_wb.wb_src_height = wb_info->dwb_params.cnv_params.crop_en ?
- wb_info->dwb_params.cnv_params.crop_height :
- wb_info->dwb_params.cnv_params.src_height;
- dout_wb.wb_src_width = wb_info->dwb_params.cnv_params.crop_en ?
- wb_info->dwb_params.cnv_params.crop_width :
- wb_info->dwb_params.cnv_params.src_width;
- dout_wb.wb_dst_width = wb_info->dwb_params.dest_width;
- dout_wb.wb_dst_height = wb_info->dwb_params.dest_height;
-
- /* For IP that doesn't support WB scaling, set h/v taps to 1 to avoid DML validation failure */
- if (dc->dml.ip.writeback_max_hscl_taps > 1) {
- dout_wb.wb_htaps_luma = wb_info->dwb_params.scaler_taps.h_taps;
- dout_wb.wb_vtaps_luma = wb_info->dwb_params.scaler_taps.v_taps;
- } else {
- dout_wb.wb_htaps_luma = 1;
- dout_wb.wb_vtaps_luma = 1;
- }
- dout_wb.wb_htaps_chroma = 0;
- dout_wb.wb_vtaps_chroma = 0;
- dout_wb.wb_hratio = wb_info->dwb_params.cnv_params.crop_en ?
- (double)wb_info->dwb_params.cnv_params.crop_width /
- (double)wb_info->dwb_params.dest_width :
- (double)wb_info->dwb_params.cnv_params.src_width /
- (double)wb_info->dwb_params.dest_width;
- dout_wb.wb_vratio = wb_info->dwb_params.cnv_params.crop_en ?
- (double)wb_info->dwb_params.cnv_params.crop_height /
- (double)wb_info->dwb_params.dest_height :
- (double)wb_info->dwb_params.cnv_params.src_height /
- (double)wb_info->dwb_params.dest_height;
- if (wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_ARGB ||
- wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_RGBA)
- dout_wb.wb_pixel_format = dm_444_64;
- else
- dout_wb.wb_pixel_format = dm_444_32;
-
- /* Workaround for cases where multiple writebacks are connected to same plane
- * In which case, need to compute worst case and set the associated writeback parameters
- * This workaround is necessary due to DML computation assuming only 1 set of writeback
- * parameters per pipe
- */
- writeback_dispclk = dml30_CalculateWriteBackDISPCLK(
- dout_wb.wb_pixel_format,
- pipes[pipe_cnt].pipe.dest.pixel_rate_mhz,
- dout_wb.wb_hratio,
- dout_wb.wb_vratio,
- dout_wb.wb_htaps_luma,
- dout_wb.wb_vtaps_luma,
- dout_wb.wb_src_width,
- dout_wb.wb_dst_width,
- pipes[pipe_cnt].pipe.dest.htotal,
- dc->current_state->bw_ctx.dml.ip.writeback_line_buffer_buffer_size);
-
- if (writeback_dispclk > max_calc_writeback_dispclk) {
- max_calc_writeback_dispclk = writeback_dispclk;
- pipes[pipe_cnt].dout.wb = dout_wb;
- }
- }
- }
-
- pipe_cnt++;
- }
-
-}
-
-unsigned int dcn30_calc_max_scaled_time(
- unsigned int time_per_pixel,
- enum mmhubbub_wbif_mode mode,
- unsigned int urgent_watermark)
-{
- unsigned int time_per_byte = 0;
- unsigned int total_free_entry = 0xb40;
- unsigned int buf_lh_capability;
- unsigned int max_scaled_time;
-
- if (mode == PACKED_444) /* packed mode 32 bpp */
- time_per_byte = time_per_pixel/4;
- else if (mode == PACKED_444_FP16) /* packed mode 64 bpp */
- time_per_byte = time_per_pixel/8;
-
- if (time_per_byte == 0)
- time_per_byte = 1;
-
- buf_lh_capability = (total_free_entry*time_per_byte*32) >> 6; /* time_per_byte is in u6.6*/
- max_scaled_time = buf_lh_capability - urgent_watermark;
- return max_scaled_time;
-}
-
-void dcn30_set_mcif_arb_params(
- struct dc *dc,
- struct dc_state *context,
- display_e2e_pipe_params_st *pipes,
- int pipe_cnt)
-{
- enum mmhubbub_wbif_mode wbif_mode;
- struct display_mode_lib *dml = &context->bw_ctx.dml;
- struct mcif_arb_params *wb_arb_params;
- int i, j, k, dwb_pipe;
-
- /* Writeback MCIF_WB arbitration parameters */
- dwb_pipe = 0;
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
-
- if (!context->res_ctx.pipe_ctx[i].stream)
- continue;
-
- for (j = 0; j < MAX_DWB_PIPES; j++) {
- struct dc_writeback_info *writeback_info = &context->res_ctx.pipe_ctx[i].stream->writeback_info[j];
-
- if (writeback_info->wb_enabled == false)
- continue;
-
- //wb_arb_params = &context->res_ctx.pipe_ctx[i].stream->writeback_info[j].mcif_arb_params;
- wb_arb_params = &context->bw_ctx.bw.dcn.bw_writeback.mcif_wb_arb[dwb_pipe];
-
- if (writeback_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_ARGB ||
- writeback_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_RGBA)
- wbif_mode = PACKED_444_FP16;
- else
- wbif_mode = PACKED_444;
-
- for (k = 0; k < sizeof(wb_arb_params->cli_watermark)/sizeof(wb_arb_params->cli_watermark[0]); k++) {
- wb_arb_params->cli_watermark[k] = get_wm_writeback_urgent(dml, pipes, pipe_cnt) * 1000;
- wb_arb_params->pstate_watermark[k] = get_wm_writeback_dram_clock_change(dml, pipes, pipe_cnt) * 1000;
- }
- wb_arb_params->time_per_pixel = (1000000 << 6) / context->res_ctx.pipe_ctx[i].stream->phy_pix_clk; /* time_per_pixel should be in u6.6 format */
- wb_arb_params->slice_lines = 32;
- wb_arb_params->arbitration_slice = 2; /* irrelevant since there is no YUV output */
- wb_arb_params->max_scaled_time = dcn30_calc_max_scaled_time(wb_arb_params->time_per_pixel,
- wbif_mode,
- wb_arb_params->cli_watermark[0]); /* assume 4 watermark sets have the same value */
- wb_arb_params->dram_speed_change_duration = dml->vba.WritebackAllowDRAMClockChangeEndPosition[j] * pipes[0].clks_cfg.refclk_mhz; /* num_clock_cycles = us * MHz */
-
- dwb_pipe++;
-
- if (dwb_pipe >= MAX_DWB_PIPES)
- return;
- }
- if (dwb_pipe >= MAX_DWB_PIPES)
- return;
- }
-
-}
-
-static struct dc_cap_funcs cap_funcs = {
- .get_dcc_compression_cap = dcn20_get_dcc_compression_cap
-};
-
-bool dcn30_acquire_post_bldn_3dlut(
- struct resource_context *res_ctx,
- const struct resource_pool *pool,
- int mpcc_id,
- struct dc_3dlut **lut,
- struct dc_transfer_func **shaper)
-{
- int i;
- bool ret = false;
- union dc_3dlut_state *state;
-
- ASSERT(*lut == NULL && *shaper == NULL);
- *lut = NULL;
- *shaper = NULL;
-
- for (i = 0; i < pool->res_cap->num_mpc_3dlut; i++) {
- if (!res_ctx->is_mpc_3dlut_acquired[i]) {
- *lut = pool->mpc_lut[i];
- *shaper = pool->mpc_shaper[i];
- state = &pool->mpc_lut[i]->state;
- res_ctx->is_mpc_3dlut_acquired[i] = true;
- state->bits.rmu_idx_valid = 1;
- state->bits.rmu_mux_num = i;
- if (state->bits.rmu_mux_num == 0)
- state->bits.mpc_rmu0_mux = mpcc_id;
- else if (state->bits.rmu_mux_num == 1)
- state->bits.mpc_rmu1_mux = mpcc_id;
- else if (state->bits.rmu_mux_num == 2)
- state->bits.mpc_rmu2_mux = mpcc_id;
- ret = true;
- break;
- }
- }
- return ret;
-}
-
-bool dcn30_release_post_bldn_3dlut(
- struct resource_context *res_ctx,
- const struct resource_pool *pool,
- struct dc_3dlut **lut,
- struct dc_transfer_func **shaper)
-{
- int i;
- bool ret = false;
-
- for (i = 0; i < pool->res_cap->num_mpc_3dlut; i++) {
- if (pool->mpc_lut[i] == *lut && pool->mpc_shaper[i] == *shaper) {
- res_ctx->is_mpc_3dlut_acquired[i] = false;
- pool->mpc_lut[i]->state.raw = 0;
- *lut = NULL;
- *shaper = NULL;
- ret = true;
- break;
- }
- }
- return ret;
-}
-
-static bool is_soc_bounding_box_valid(struct dc *dc)
-{
- uint32_t hw_internal_rev = dc->ctx->asic_id.hw_internal_rev;
-
- if (ASICREV_IS_SIENNA_CICHLID_P(hw_internal_rev))
- return true;
-
- return false;
-}
-
-static bool init_soc_bounding_box(struct dc *dc,
- struct dcn30_resource_pool *pool)
-{
- struct _vcs_dpi_soc_bounding_box_st *loaded_bb = &dcn3_0_soc;
- struct _vcs_dpi_ip_params_st *loaded_ip = &dcn3_0_ip;
-
- DC_LOGGER_INIT(dc->ctx->logger);
-
- if (!is_soc_bounding_box_valid(dc)) {
- DC_LOG_ERROR("%s: not valid soc bounding box\n", __func__);
- return false;
- }
-
- loaded_ip->max_num_otg = pool->base.res_cap->num_timing_generator;
- loaded_ip->max_num_dpp = pool->base.pipe_count;
- loaded_ip->clamp_min_dcfclk = dc->config.clamp_min_dcfclk;
- dcn20_patch_bounding_box(dc, loaded_bb);
-
- if (dc->ctx->dc_bios->funcs->get_soc_bb_info) {
- struct bp_soc_bb_info bb_info = {0};
-
- if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) {
- if (bb_info.dram_clock_change_latency_100ns > 0)
- dcn3_0_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10;
-
- if (bb_info.dram_sr_enter_exit_latency_100ns > 0)
- dcn3_0_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10;
-
- if (bb_info.dram_sr_exit_latency_100ns > 0)
- dcn3_0_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10;
- }
- }
-
- return true;
-}
-
-static bool dcn30_split_stream_for_mpc_or_odm(
- const struct dc *dc,
- struct resource_context *res_ctx,
- struct pipe_ctx *pri_pipe,
- struct pipe_ctx *sec_pipe,
- bool odm)
-{
- int pipe_idx = sec_pipe->pipe_idx;
- const struct resource_pool *pool = dc->res_pool;
-
- *sec_pipe = *pri_pipe;
-
- sec_pipe->pipe_idx = pipe_idx;
- sec_pipe->plane_res.mi = pool->mis[pipe_idx];
- sec_pipe->plane_res.hubp = pool->hubps[pipe_idx];
- sec_pipe->plane_res.ipp = pool->ipps[pipe_idx];
- sec_pipe->plane_res.xfm = pool->transforms[pipe_idx];
- sec_pipe->plane_res.dpp = pool->dpps[pipe_idx];
- sec_pipe->plane_res.mpcc_inst = pool->dpps[pipe_idx]->inst;
- sec_pipe->stream_res.dsc = NULL;
- if (odm) {
- if (pri_pipe->next_odm_pipe) {
- ASSERT(pri_pipe->next_odm_pipe != sec_pipe);
- sec_pipe->next_odm_pipe = pri_pipe->next_odm_pipe;
- sec_pipe->next_odm_pipe->prev_odm_pipe = sec_pipe;
- }
- if (pri_pipe->top_pipe && pri_pipe->top_pipe->next_odm_pipe) {
- pri_pipe->top_pipe->next_odm_pipe->bottom_pipe = sec_pipe;
- sec_pipe->top_pipe = pri_pipe->top_pipe->next_odm_pipe;
- }
- if (pri_pipe->bottom_pipe && pri_pipe->bottom_pipe->next_odm_pipe) {
- pri_pipe->bottom_pipe->next_odm_pipe->top_pipe = sec_pipe;
- sec_pipe->bottom_pipe = pri_pipe->bottom_pipe->next_odm_pipe;
- }
- pri_pipe->next_odm_pipe = sec_pipe;
- sec_pipe->prev_odm_pipe = pri_pipe;
-
- if (!sec_pipe->top_pipe)
- sec_pipe->stream_res.opp = pool->opps[pipe_idx];
- else
- sec_pipe->stream_res.opp = sec_pipe->top_pipe->stream_res.opp;
- if (sec_pipe->stream->timing.flags.DSC == 1) {
- dcn20_acquire_dsc(dc, res_ctx, &sec_pipe->stream_res.dsc, pipe_idx);
- ASSERT(sec_pipe->stream_res.dsc);
- if (sec_pipe->stream_res.dsc == NULL)
- return false;
- }
- } else {
- if (pri_pipe->bottom_pipe) {
- ASSERT(pri_pipe->bottom_pipe != sec_pipe);
- sec_pipe->bottom_pipe = pri_pipe->bottom_pipe;
- sec_pipe->bottom_pipe->top_pipe = sec_pipe;
- }
- pri_pipe->bottom_pipe = sec_pipe;
- sec_pipe->top_pipe = pri_pipe;
-
- ASSERT(pri_pipe->plane_state);
- }
-
- return true;
-}
-
-static struct pipe_ctx *dcn30_find_split_pipe(
- struct dc *dc,
- struct dc_state *context,
- int old_index)
-{
- struct pipe_ctx *pipe = NULL;
- int i;
-
- if (old_index >= 0 && context->res_ctx.pipe_ctx[old_index].stream == NULL) {
- pipe = &context->res_ctx.pipe_ctx[old_index];
- pipe->pipe_idx = old_index;
- }
-
- if (!pipe)
- for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) {
- if (dc->current_state->res_ctx.pipe_ctx[i].top_pipe == NULL
- && dc->current_state->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) {
- if (context->res_ctx.pipe_ctx[i].stream == NULL) {
- pipe = &context->res_ctx.pipe_ctx[i];
- pipe->pipe_idx = i;
- break;
- }
- }
- }
-
- /*
- * May need to fix pipes getting tossed from 1 opp to another on flip
- * Add for debugging transient underflow during topology updates:
- * ASSERT(pipe);
- */
- if (!pipe)
- for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) {
- if (context->res_ctx.pipe_ctx[i].stream == NULL) {
- pipe = &context->res_ctx.pipe_ctx[i];
- pipe->pipe_idx = i;
- break;
- }
- }
-
- return pipe;
-}
-
-noinline bool dcn30_internal_validate_bw(
- struct dc *dc,
- struct dc_state *context,
- display_e2e_pipe_params_st *pipes,
- int *pipe_cnt_out,
- int *vlevel_out,
- bool fast_validate)
-{
- bool out = false;
- bool repopulate_pipes = false;
- int split[MAX_PIPES] = { 0 };
- bool merge[MAX_PIPES] = { false };
- bool newly_split[MAX_PIPES] = { false };
- int pipe_cnt, i, pipe_idx, vlevel;
- struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
-
- ASSERT(pipes);
- if (!pipes)
- return false;
-
- dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
- pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate);
-
- DC_FP_START();
- if (!pipe_cnt) {
- out = true;
- goto validate_out;
- }
-
- dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt);
-
- if (!fast_validate) {
- /*
- * DML favors voltage over p-state, but we're more interested in
- * supporting p-state over voltage. We can't support p-state in
- * prefetch mode > 0 so try capping the prefetch mode to start.
- */
- context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank =
- dm_allow_self_refresh_and_mclk_switch;
- vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
- /* This may adjust vlevel and maxMpcComb */
- if (vlevel < context->bw_ctx.dml.soc.num_states)
- vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge);
- }
- if (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states ||
- vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) {
- /*
- * If mode is unsupported or there's still no p-state support then
- * fall back to favoring voltage.
- *
- * We don't actually support prefetch mode 2, so require that we
- * at least support prefetch mode 1.
- */
- context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank =
- dm_allow_self_refresh;
-
- vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
- if (vlevel < context->bw_ctx.dml.soc.num_states) {
- memset(split, 0, sizeof(split));
- memset(merge, 0, sizeof(merge));
- vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge);
- }
- }
-
- dml_log_mode_support_params(&context->bw_ctx.dml);
-
- if (vlevel == context->bw_ctx.dml.soc.num_states)
- goto validate_fail;
-
- if (!dc->config.enable_windowed_mpo_odm) {
- for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
- struct pipe_ctx *mpo_pipe = pipe->bottom_pipe;
-
- if (!pipe->stream)
- continue;
-
- /* We only support full screen mpo with ODM */
- if (vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled
- && pipe->plane_state && mpo_pipe
- && memcmp(&mpo_pipe->plane_res.scl_data.recout,
- &pipe->plane_res.scl_data.recout,
- sizeof(struct rect)) != 0) {
- ASSERT(mpo_pipe->plane_state != pipe->plane_state);
- goto validate_fail;
- }
- pipe_idx++;
- }
- }
-
- /* merge pipes if necessary */
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
- /*skip pipes that don't need merging*/
- if (!merge[i])
- continue;
-
- /* if ODM merge we ignore mpc tree, mpo pipes will have their own flags */
- if (pipe->prev_odm_pipe) {
- /*split off odm pipe*/
- pipe->prev_odm_pipe->next_odm_pipe = pipe->next_odm_pipe;
- if (pipe->next_odm_pipe)
- pipe->next_odm_pipe->prev_odm_pipe = pipe->prev_odm_pipe;
-
- pipe->bottom_pipe = NULL;
- pipe->next_odm_pipe = NULL;
- pipe->plane_state = NULL;
- pipe->stream = NULL;
- pipe->top_pipe = NULL;
- pipe->prev_odm_pipe = NULL;
- if (pipe->stream_res.dsc)
- dcn20_release_dsc(&context->res_ctx, dc->res_pool, &pipe->stream_res.dsc);
- memset(&pipe->plane_res, 0, sizeof(pipe->plane_res));
- memset(&pipe->stream_res, 0, sizeof(pipe->stream_res));
- repopulate_pipes = true;
- } else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) {
- struct pipe_ctx *top_pipe = pipe->top_pipe;
- struct pipe_ctx *bottom_pipe = pipe->bottom_pipe;
-
- top_pipe->bottom_pipe = bottom_pipe;
- if (bottom_pipe)
- bottom_pipe->top_pipe = top_pipe;
-
- pipe->top_pipe = NULL;
- pipe->bottom_pipe = NULL;
- pipe->plane_state = NULL;
- pipe->stream = NULL;
- memset(&pipe->plane_res, 0, sizeof(pipe->plane_res));
- memset(&pipe->stream_res, 0, sizeof(pipe->stream_res));
- repopulate_pipes = true;
- } else
- ASSERT(0); /* Should never try to merge master pipe */
-
- }
-
- for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
- struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
- struct pipe_ctx *hsplit_pipe = NULL;
- bool odm;
- int old_index = -1;
-
- if (!pipe->stream || newly_split[i])
- continue;
-
- pipe_idx++;
- odm = vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled;
-
- if (!pipe->plane_state && !odm)
- continue;
-
- if (split[i]) {
- if (odm) {
- if (split[i] == 4 && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe)
- old_index = old_pipe->next_odm_pipe->next_odm_pipe->pipe_idx;
- else if (old_pipe->next_odm_pipe)
- old_index = old_pipe->next_odm_pipe->pipe_idx;
- } else {
- if (split[i] == 4 && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe &&
- old_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
- old_index = old_pipe->bottom_pipe->bottom_pipe->pipe_idx;
- else if (old_pipe->bottom_pipe &&
- old_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
- old_index = old_pipe->bottom_pipe->pipe_idx;
- }
- hsplit_pipe = dcn30_find_split_pipe(dc, context, old_index);
- ASSERT(hsplit_pipe);
- if (!hsplit_pipe)
- goto validate_fail;
-
- if (!dcn30_split_stream_for_mpc_or_odm(
- dc, &context->res_ctx,
- pipe, hsplit_pipe, odm))
- goto validate_fail;
-
- newly_split[hsplit_pipe->pipe_idx] = true;
- repopulate_pipes = true;
- }
- if (split[i] == 4) {
- struct pipe_ctx *pipe_4to1;
-
- if (odm && old_pipe->next_odm_pipe)
- old_index = old_pipe->next_odm_pipe->pipe_idx;
- else if (!odm && old_pipe->bottom_pipe &&
- old_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
- old_index = old_pipe->bottom_pipe->pipe_idx;
- else
- old_index = -1;
- pipe_4to1 = dcn30_find_split_pipe(dc, context, old_index);
- ASSERT(pipe_4to1);
- if (!pipe_4to1)
- goto validate_fail;
- if (!dcn30_split_stream_for_mpc_or_odm(
- dc, &context->res_ctx,
- pipe, pipe_4to1, odm))
- goto validate_fail;
- newly_split[pipe_4to1->pipe_idx] = true;
-
- if (odm && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe
- && old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe)
- old_index = old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe->pipe_idx;
- else if (!odm && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe &&
- old_pipe->bottom_pipe->bottom_pipe->bottom_pipe &&
- old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
- old_index = old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->pipe_idx;
- else
- old_index = -1;
- pipe_4to1 = dcn30_find_split_pipe(dc, context, old_index);
- ASSERT(pipe_4to1);
- if (!pipe_4to1)
- goto validate_fail;
- if (!dcn30_split_stream_for_mpc_or_odm(
- dc, &context->res_ctx,
- hsplit_pipe, pipe_4to1, odm))
- goto validate_fail;
- newly_split[pipe_4to1->pipe_idx] = true;
- }
- if (odm)
- dcn20_build_mapped_resource(dc, context, pipe->stream);
- }
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
- if (pipe->plane_state) {
- if (!resource_build_scaling_params(pipe))
- goto validate_fail;
- }
- }
-
- /* Actual dsc count per stream dsc validation*/
- if (!dcn20_validate_dsc(dc, context)) {
- vba->ValidationStatus[vba->soc.num_states] = DML_FAIL_DSC_VALIDATION_FAILURE;
- goto validate_fail;
- }
-
- if (repopulate_pipes)
- pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate);
- *vlevel_out = vlevel;
- *pipe_cnt_out = pipe_cnt;
-
- out = true;
- goto validate_out;
-
-validate_fail:
- out = false;
-
-validate_out:
- DC_FP_END();
- return out;
-}
-
-/*
- * This must be noinline to ensure anything that deals with FP registers
- * is contained within this call; previously our compiling with hard-float
- * would result in fp instructions being emitted outside of the boundaries
- * of the DC_FP_START/END macros, which makes sense as the compiler has no
- * idea about what is wrapped and what is not
- *
- * This is largely just a workaround to avoid breakage introduced with 5.6,
- * ideally all fp-using code should be moved into its own file, only that
- * should be compiled with hard-float, and all code exported from there
- * should be strictly wrapped with DC_FP_START/END
- */
-static noinline void dcn30_calculate_wm_and_dlg_fp(
- struct dc *dc, struct dc_state *context,
- display_e2e_pipe_params_st *pipes,
- int pipe_cnt,
- int vlevel)
-{
- int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
- int i, pipe_idx;
- double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][maxMpcComb];
- bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != dm_dram_clock_change_unsupported;
-
- if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk)
- dcfclk = context->bw_ctx.dml.soc.min_dcfclk;
-
- pipes[0].clks_cfg.voltage = vlevel;
- pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
- pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz;
-
- /* Set B:
- * DCFCLK: 1GHz or min required above 1GHz
- * FCLK/UCLK: Max
- */
- if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) {
- if (vlevel == 0) {
- pipes[0].clks_cfg.voltage = 1;
- pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz;
- }
- context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us;
- context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us;
- context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us;
- }
- context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-
- pipes[0].clks_cfg.voltage = vlevel;
- pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
-
- /* Set D:
- * DCFCLK: Min Required
- * FCLK(proportional to UCLK): 1GHz or Max
- * MALL stutter, sr_enter_exit = 4, sr_exit = 2us
- */
- /*
- if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) {
- context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us;
- context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us;
- context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us;
- }
- context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- */
-
- /* Set C:
- * DCFCLK: Min Required
- * FCLK(proportional to UCLK): 1GHz or Max
- * pstate latency overridden to 5us
- */
- if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) {
- unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
- unsigned int min_dram_speed_mts_margin = 160;
-
- if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_dram_clock_change_unsupported)
- min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16;
-
- /* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */
- for (i = 3; i > 0; i--)
- if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts)
- break;
-
- context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us;
- context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us;
- context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us;
- }
-
- context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-
- if (!pstate_en) {
- /* The only difference between A and C is p-state latency, if p-state is not supported we want to
- * calculate DLG based on dummy p-state latency, and max out the set A p-state watermark
- */
- context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c;
- context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0;
- } else {
- /* Set A:
- * DCFCLK: Min Required
- * FCLK(proportional to UCLK): 1GHz or Max
- *
- * Set A calculated last so that following calculations are based on Set A
- */
- dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
- context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- }
-
- context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod;
-
- /* Make set D = set A until set D is enabled */
- context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a;
-
- for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
- if (!context->res_ctx.pipe_ctx[i].stream)
- continue;
-
- pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt);
- pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
-
- if (dc->config.forced_clocks) {
- pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz;
- pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz;
- }
- if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000)
- pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0;
- if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
- pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0;
-
- pipe_idx++;
- }
-
- dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
-
- if (!pstate_en)
- /* Restore full p-state latency */
- context->bw_ctx.dml.soc.dram_clock_change_latency_us =
- dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
-}
-
-void dcn30_update_soc_for_wm_a(struct dc *dc, struct dc_state *context)
-{
- if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) {
- context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
- context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us;
- context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us;
- }
-}
-
-void dcn30_calculate_wm_and_dlg(
- struct dc *dc, struct dc_state *context,
- display_e2e_pipe_params_st *pipes,
- int pipe_cnt,
- int vlevel)
-{
- DC_FP_START();
- dcn30_calculate_wm_and_dlg_fp(dc, context, pipes, pipe_cnt, vlevel);
- DC_FP_END();
-}
-
-bool dcn30_validate_bandwidth(struct dc *dc,
- struct dc_state *context,
- bool fast_validate)
-{
- bool out = false;
-
- BW_VAL_TRACE_SETUP();
-
- int vlevel = 0;
- int pipe_cnt = 0;
- display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
- DC_LOGGER_INIT(dc->ctx->logger);
-
- BW_VAL_TRACE_COUNT();
-
- out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate);
-
- if (pipe_cnt == 0)
- goto validate_out;
-
- if (!out)
- goto validate_fail;
-
- BW_VAL_TRACE_END_VOLTAGE_LEVEL();
-
- if (fast_validate) {
- BW_VAL_TRACE_SKIP(fast);
- goto validate_out;
- }
-
- DC_FP_START();
- dc->res_pool->funcs->calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel);
- DC_FP_END();
-
- BW_VAL_TRACE_END_WATERMARKS();
-
- goto validate_out;
-
-validate_fail:
- DC_LOG_WARNING("Mode Validation Warning: %s failed validation.\n",
- dml_get_status_message(context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states]));
-
- BW_VAL_TRACE_SKIP(fail);
- out = false;
-
-validate_out:
- kfree(pipes);
-
- BW_VAL_TRACE_FINISH();
-
- return out;
-}
-
-/*
- * This must be noinline to ensure anything that deals with FP registers
- * is contained within this call; previously our compiling with hard-float
- * would result in fp instructions being emitted outside of the boundaries
- * of the DC_FP_START/END macros, which makes sense as the compiler has no
- * idea about what is wrapped and what is not
- *
- * This is largely just a workaround to avoid breakage introduced with 5.6,
- * ideally all fp-using code should be moved into its own file, only that
- * should be compiled with hard-float, and all code exported from there
- * should be strictly wrapped with DC_FP_START/END
- */
-static noinline void dcn30_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
- unsigned int *optimal_dcfclk,
- unsigned int *optimal_fclk)
-{
- double bw_from_dram, bw_from_dram1, bw_from_dram2;
-
- bw_from_dram1 = uclk_mts * dcn3_0_soc.num_chans *
- dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_dram_bw_use_normal_percent / 100);
- bw_from_dram2 = uclk_mts * dcn3_0_soc.num_chans *
- dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100);
-
- bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2;
-
- if (optimal_fclk)
- *optimal_fclk = bw_from_dram /
- (dcn3_0_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100));
-
- if (optimal_dcfclk)
- *optimal_dcfclk = bw_from_dram /
- (dcn3_0_soc.return_bus_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100));
-}
-
-void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
-{
- unsigned int i, j;
- unsigned int num_states = 0;
-
- unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0};
- unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0};
- unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0};
- unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0};
-
- unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {694, 875, 1000, 1200};
- unsigned int num_dcfclk_sta_targets = 4;
- unsigned int num_uclk_states;
-
- if (dc->ctx->dc_bios->vram_info.num_chans)
- dcn3_0_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans;
-
- if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes)
- dcn3_0_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes;
-
- dcn3_0_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
- dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
-
- if (bw_params->clk_table.entries[0].memclk_mhz) {
- int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0;
-
- for (i = 0; i < MAX_NUM_DPM_LVL; i++) {
- if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz)
- max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
- if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz)
- max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
- if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz)
- max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
- if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz)
- max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
- }
-
- if (!max_dcfclk_mhz)
- max_dcfclk_mhz = dcn3_0_soc.clock_limits[0].dcfclk_mhz;
- if (!max_dispclk_mhz)
- max_dispclk_mhz = dcn3_0_soc.clock_limits[0].dispclk_mhz;
- if (!max_dppclk_mhz)
- max_dppclk_mhz = dcn3_0_soc.clock_limits[0].dppclk_mhz;
- if (!max_phyclk_mhz)
- max_phyclk_mhz = dcn3_0_soc.clock_limits[0].phyclk_mhz;
-
- if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
- // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array
- dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz;
- num_dcfclk_sta_targets++;
- } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
- // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates
- for (i = 0; i < num_dcfclk_sta_targets; i++) {
- if (dcfclk_sta_targets[i] > max_dcfclk_mhz) {
- dcfclk_sta_targets[i] = max_dcfclk_mhz;
- break;
- }
- }
- // Update size of array since we "removed" duplicates
- num_dcfclk_sta_targets = i + 1;
- }
-
- num_uclk_states = bw_params->clk_table.num_entries;
-
- // Calculate optimal dcfclk for each uclk
- for (i = 0; i < num_uclk_states; i++) {
- DC_FP_START();
- dcn30_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16,
- &optimal_dcfclk_for_uclk[i], NULL);
- DC_FP_END();
- if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) {
- optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz;
- }
- }
-
- // Calculate optimal uclk for each dcfclk sta target
- for (i = 0; i < num_dcfclk_sta_targets; i++) {
- for (j = 0; j < num_uclk_states; j++) {
- if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) {
- optimal_uclk_for_dcfclk_sta_targets[i] =
- bw_params->clk_table.entries[j].memclk_mhz * 16;
- break;
- }
- }
- }
-
- i = 0;
- j = 0;
- // create the final dcfclk and uclk table
- while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) {
- if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) {
- dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
- dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
- } else {
- if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
- dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
- dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
- } else {
- j = num_uclk_states;
- }
- }
- }
-
- while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) {
- dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
- dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
- }
-
- while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES &&
- optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
- dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
- dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
- }
-
- dcn3_0_soc.num_states = num_states;
- for (i = 0; i < dcn3_0_soc.num_states; i++) {
- dcn3_0_soc.clock_limits[i].state = i;
- dcn3_0_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i];
- dcn3_0_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i];
- dcn3_0_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i];
-
- /* Fill all states with max values of all other clocks */
- dcn3_0_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz;
- dcn3_0_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz;
- dcn3_0_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz;
- dcn3_0_soc.clock_limits[i].dtbclk_mhz = dcn3_0_soc.clock_limits[0].dtbclk_mhz;
- /* These clocks cannot come from bw_params, always fill from dcn3_0_soc[1] */
- /* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */
- dcn3_0_soc.clock_limits[i].phyclk_d18_mhz = dcn3_0_soc.clock_limits[0].phyclk_d18_mhz;
- dcn3_0_soc.clock_limits[i].socclk_mhz = dcn3_0_soc.clock_limits[0].socclk_mhz;
- dcn3_0_soc.clock_limits[i].dscclk_mhz = dcn3_0_soc.clock_limits[0].dscclk_mhz;
- }
- /* re-init DML with updated bb */
- dml_init_instance(&dc->dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30);
- if (dc->current_state)
- dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30);
- }
-}
-
-static const struct resource_funcs dcn30_res_pool_funcs = {
- .destroy = dcn30_destroy_resource_pool,
- .link_enc_create = dcn30_link_encoder_create,
- .panel_cntl_create = dcn30_panel_cntl_create,
- .validate_bandwidth = dcn30_validate_bandwidth,
- .calculate_wm_and_dlg = dcn30_calculate_wm_and_dlg,
- .update_soc_for_wm_a = dcn30_update_soc_for_wm_a,
- .populate_dml_pipes = dcn30_populate_dml_pipes_from_context,
- .acquire_idle_pipe_for_layer = dcn20_acquire_idle_pipe_for_layer,
- .add_stream_to_ctx = dcn30_add_stream_to_ctx,
- .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource,
- .remove_stream_from_ctx = dcn20_remove_stream_from_ctx,
- .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context,
- .set_mcif_arb_params = dcn30_set_mcif_arb_params,
- .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link,
- .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut,
- .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut,
- .update_bw_bounding_box = dcn30_update_bw_bounding_box,
- .patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
-};
-
-#define CTX ctx
-
-#define REG(reg_name) \
- (DCN_BASE.instance[0].segment[mm ## reg_name ## _BASE_IDX] + mm ## reg_name)
-
-static uint32_t read_pipe_fuses(struct dc_context *ctx)
-{
- uint32_t value = REG_READ(CC_DC_PIPE_DIS);
- /* Support for max 6 pipes */
- value = value & 0x3f;
- return value;
-}
-
-static bool dcn30_resource_construct(
- uint8_t num_virtual_links,
- struct dc *dc,
- struct dcn30_resource_pool *pool)
-{
- int i;
- struct dc_context *ctx = dc->ctx;
- struct irq_service_init_data init_data;
- struct ddc_service_init_data ddc_init_data = {0};
- uint32_t pipe_fuses = read_pipe_fuses(ctx);
- uint32_t num_pipes = 0;
-
- if (!(pipe_fuses == 0 || pipe_fuses == 0x3e)) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: Unexpected fuse recipe for navi2x !\n");
- /* fault to single pipe */
- pipe_fuses = 0x3e;
- }
-
- DC_FP_START();
-
- ctx->dc_bios->regs = &bios_regs;
-
- pool->base.res_cap = &res_cap_dcn3;
-
- pool->base.funcs = &dcn30_res_pool_funcs;
-
- /*************************************************
- * Resource + asic cap harcoding *
- *************************************************/
- pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE;
- pool->base.pipe_count = pool->base.res_cap->num_timing_generator;
- pool->base.mpcc_count = pool->base.res_cap->num_timing_generator;
- dc->caps.max_downscale_ratio = 600;
- dc->caps.i2c_speed_in_khz = 100;
- dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a not applied by default*/
- dc->caps.max_cursor_size = 256;
- dc->caps.min_horizontal_blanking_period = 80;
- dc->caps.dmdata_alloc_size = 2048;
- dc->caps.mall_size_per_mem_channel = 8;
- /* total size = mall per channel * num channels * 1024 * 1024 */
- dc->caps.mall_size_total = dc->caps.mall_size_per_mem_channel * dc->ctx->dc_bios->vram_info.num_chans * 1048576;
- dc->caps.cursor_cache_size = dc->caps.max_cursor_size * dc->caps.max_cursor_size * 8;
-
- dc->caps.max_slave_planes = 1;
- dc->caps.max_slave_yuv_planes = 1;
- dc->caps.max_slave_rgb_planes = 1;
- dc->caps.post_blend_color_processing = true;
- dc->caps.force_dp_tps4_for_cp2520 = true;
- dc->caps.extended_aux_timeout_support = true;
- dc->caps.dmcub_support = true;
-
- /* Color pipeline capabilities */
- dc->caps.color.dpp.dcn_arch = 1;
- dc->caps.color.dpp.input_lut_shared = 0;
- dc->caps.color.dpp.icsc = 1;
- dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr
- dc->caps.color.dpp.dgam_rom_caps.srgb = 1;
- dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1;
- dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1;
- dc->caps.color.dpp.dgam_rom_caps.pq = 1;
- dc->caps.color.dpp.dgam_rom_caps.hlg = 1;
- dc->caps.color.dpp.post_csc = 1;
- dc->caps.color.dpp.gamma_corr = 1;
- dc->caps.color.dpp.dgam_rom_for_yuv = 0;
-
- dc->caps.color.dpp.hw_3d_lut = 1;
- dc->caps.color.dpp.ogam_ram = 1;
- // no OGAM ROM on DCN3
- dc->caps.color.dpp.ogam_rom_caps.srgb = 0;
- dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0;
- dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0;
- dc->caps.color.dpp.ogam_rom_caps.pq = 0;
- dc->caps.color.dpp.ogam_rom_caps.hlg = 0;
- dc->caps.color.dpp.ocsc = 0;
-
- dc->caps.color.mpc.gamut_remap = 1;
- dc->caps.color.mpc.num_3dluts = pool->base.res_cap->num_mpc_3dlut; //3
- dc->caps.color.mpc.ogam_ram = 1;
- dc->caps.color.mpc.ogam_rom_caps.srgb = 0;
- dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0;
- dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0;
- dc->caps.color.mpc.ogam_rom_caps.pq = 0;
- dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
- dc->caps.color.mpc.ocsc = 1;
-
- /* read VBIOS LTTPR caps */
- {
- if (ctx->dc_bios->funcs->get_lttpr_caps) {
- enum bp_result bp_query_result;
- uint8_t is_vbios_lttpr_enable = 0;
-
- bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable);
- dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable;
- }
-
- if (ctx->dc_bios->funcs->get_lttpr_interop) {
- enum bp_result bp_query_result;
- uint8_t is_vbios_interop_enabled = 0;
-
- bp_query_result = ctx->dc_bios->funcs->get_lttpr_interop(ctx->dc_bios,
- &is_vbios_interop_enabled);
- dc->caps.vbios_lttpr_aware = (bp_query_result == BP_RESULT_OK) && !!is_vbios_interop_enabled;
- }
- }
-
- if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV)
- dc->debug = debug_defaults_drv;
- else if (dc->ctx->dce_environment == DCE_ENV_FPGA_MAXIMUS) {
- dc->debug = debug_defaults_diags;
- } else
- dc->debug = debug_defaults_diags;
- // Init the vm_helper
- if (dc->vm_helper)
- vm_helper_init(dc->vm_helper, 16);
-
- /*************************************************
- * Create resources *
- *************************************************/
-
- /* Clock Sources for Pixel Clock*/
- pool->base.clock_sources[DCN30_CLK_SRC_PLL0] =
- dcn30_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL0,
- &clk_src_regs[0], false);
- pool->base.clock_sources[DCN30_CLK_SRC_PLL1] =
- dcn30_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL1,
- &clk_src_regs[1], false);
- pool->base.clock_sources[DCN30_CLK_SRC_PLL2] =
- dcn30_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL2,
- &clk_src_regs[2], false);
- pool->base.clock_sources[DCN30_CLK_SRC_PLL3] =
- dcn30_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL3,
- &clk_src_regs[3], false);
- pool->base.clock_sources[DCN30_CLK_SRC_PLL4] =
- dcn30_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL4,
- &clk_src_regs[4], false);
- pool->base.clock_sources[DCN30_CLK_SRC_PLL5] =
- dcn30_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL5,
- &clk_src_regs[5], false);
-
- pool->base.clk_src_count = DCN30_CLK_SRC_TOTAL;
-
- /* todo: not reuse phy_pll registers */
- pool->base.dp_clock_source =
- dcn30_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_ID_DP_DTO,
- &clk_src_regs[0], true);
-
- for (i = 0; i < pool->base.clk_src_count; i++) {
- if (pool->base.clock_sources[i] == NULL) {
- dm_error("DC: failed to create clock sources!\n");
- BREAK_TO_DEBUGGER();
- goto create_fail;
- }
- }
-
- /* DCCG */
- pool->base.dccg = dccg30_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask);
- if (pool->base.dccg == NULL) {
- dm_error("DC: failed to create dccg!\n");
- BREAK_TO_DEBUGGER();
- goto create_fail;
- }
-
- /* PP Lib and SMU interfaces */
- init_soc_bounding_box(dc, pool);
-
- num_pipes = dcn3_0_ip.max_num_dpp;
-
- for (i = 0; i < dcn3_0_ip.max_num_dpp; i++)
- if (pipe_fuses & 1 << i)
- num_pipes--;
-
- dcn3_0_ip.max_num_dpp = num_pipes;
- dcn3_0_ip.max_num_otg = num_pipes;
-
- dml_init_instance(&dc->dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30);
-
- /* IRQ */
- init_data.ctx = dc->ctx;
- pool->base.irqs = dal_irq_service_dcn30_create(&init_data);
- if (!pool->base.irqs)
- goto create_fail;
-
- /* HUBBUB */
- pool->base.hubbub = dcn30_hubbub_create(ctx);
- if (pool->base.hubbub == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create hubbub!\n");
- goto create_fail;
- }
-
- /* HUBPs, DPPs, OPPs and TGs */
- for (i = 0; i < pool->base.pipe_count; i++) {
- pool->base.hubps[i] = dcn30_hubp_create(ctx, i);
- if (pool->base.hubps[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error(
- "DC: failed to create hubps!\n");
- goto create_fail;
- }
-
- pool->base.dpps[i] = dcn30_dpp_create(ctx, i);
- if (pool->base.dpps[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error(
- "DC: failed to create dpps!\n");
- goto create_fail;
- }
- }
-
- for (i = 0; i < pool->base.res_cap->num_opp; i++) {
- pool->base.opps[i] = dcn30_opp_create(ctx, i);
- if (pool->base.opps[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error(
- "DC: failed to create output pixel processor!\n");
- goto create_fail;
- }
- }
-
- for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
- pool->base.timing_generators[i] = dcn30_timing_generator_create(
- ctx, i);
- if (pool->base.timing_generators[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create tg!\n");
- goto create_fail;
- }
- }
- pool->base.timing_generator_count = i;
- /* PSR */
- pool->base.psr = dmub_psr_create(ctx);
-
- if (pool->base.psr == NULL) {
- dm_error("DC: failed to create PSR obj!\n");
- BREAK_TO_DEBUGGER();
- goto create_fail;
- }
-
- /* ABM */
- for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
- pool->base.multiple_abms[i] = dmub_abm_create(ctx,
- &abm_regs[i],
- &abm_shift,
- &abm_mask);
- if (pool->base.multiple_abms[i] == NULL) {
- dm_error("DC: failed to create abm for pipe %d!\n", i);
- BREAK_TO_DEBUGGER();
- goto create_fail;
- }
- }
- /* MPC and DSC */
- pool->base.mpc = dcn30_mpc_create(ctx, pool->base.mpcc_count, pool->base.res_cap->num_mpc_3dlut);
- if (pool->base.mpc == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create mpc!\n");
- goto create_fail;
- }
-
- for (i = 0; i < pool->base.res_cap->num_dsc; i++) {
- pool->base.dscs[i] = dcn30_dsc_create(ctx, i);
- if (pool->base.dscs[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create display stream compressor %d!\n", i);
- goto create_fail;
- }
- }
-
- /* DWB and MMHUBBUB */
- if (!dcn30_dwbc_create(ctx, &pool->base)) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create dwbc!\n");
- goto create_fail;
- }
-
- if (!dcn30_mmhubbub_create(ctx, &pool->base)) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create mcif_wb!\n");
- goto create_fail;
- }
-
- /* AUX and I2C */
- for (i = 0; i < pool->base.res_cap->num_ddc; i++) {
- pool->base.engines[i] = dcn30_aux_engine_create(ctx, i);
- if (pool->base.engines[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error(
- "DC:failed to create aux engine!!\n");
- goto create_fail;
- }
- pool->base.hw_i2cs[i] = dcn30_i2c_hw_create(ctx, i);
- if (pool->base.hw_i2cs[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error(
- "DC:failed to create hw i2c!!\n");
- goto create_fail;
- }
- pool->base.sw_i2cs[i] = NULL;
- }
-
- /* Audio, Stream Encoders including DIG and virtual, MPC 3D LUTs */
- if (!resource_construct(num_virtual_links, dc, &pool->base,
- (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment) ?
- &res_create_funcs : &res_create_maximus_funcs)))
- goto create_fail;
-
- /* HW Sequencer and Plane caps */
- dcn30_hw_sequencer_construct(dc);
-
- dc->caps.max_planes = pool->base.pipe_count;
-
- for (i = 0; i < dc->caps.max_planes; ++i)
- dc->caps.planes[i] = plane_cap;
-
- dc->cap_funcs = cap_funcs;
-
- if (dc->ctx->dc_bios->fw_info.oem_i2c_present) {
- ddc_init_data.ctx = dc->ctx;
- ddc_init_data.link = NULL;
- ddc_init_data.id.id = dc->ctx->dc_bios->fw_info.oem_i2c_obj_id;
- ddc_init_data.id.enum_id = 0;
- ddc_init_data.id.type = OBJECT_TYPE_GENERIC;
- pool->base.oem_device = dal_ddc_service_create(&ddc_init_data);
- } else {
- pool->base.oem_device = NULL;
- }
-
- DC_FP_END();
-
- return true;
-
-create_fail:
-
- DC_FP_END();
- dcn30_resource_destruct(pool);
-
- return false;
-}
-
-struct resource_pool *dcn30_create_resource_pool(
- const struct dc_init_data *init_data,
- struct dc *dc)
-{
- struct dcn30_resource_pool *pool =
- kzalloc(sizeof(struct dcn30_resource_pool), GFP_KERNEL);
-
- if (!pool)
- return NULL;
-
- if (dcn30_resource_construct(init_data->num_virtual_links, dc, pool))
- return &pool->base;
-
- BREAK_TO_DEBUGGER();
- kfree(pool);
- return NULL;
-}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_vpg.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_vpg.h
index ed9a5549c389..466ba20b9c61 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_vpg.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_vpg.h
@@ -26,6 +26,7 @@
#ifndef __DAL_DCN30_VPG_H__
#define __DAL_DCN30_VPG_H__
+#include "vpg.h"
#define DCN30_VPG_FROM_VPG(vpg)\
container_of(vpg, struct dcn30_vpg, base)
@@ -132,28 +133,6 @@ struct dcn30_vpg_mask {
VPG_DCN3_REG_FIELD_LIST(uint32_t);
};
-struct vpg;
-
-struct vpg_funcs {
- void (*update_generic_info_packet)(
- struct vpg *vpg,
- uint32_t packet_index,
- const struct dc_info_packet *info_packet,
- bool immediate_update);
-
- void (*vpg_poweron)(
- struct vpg *vpg);
-
- void (*vpg_powerdown)(
- struct vpg *vpg);
-};
-
-struct vpg {
- const struct vpg_funcs *funcs;
- struct dc_context *ctx;
- int inst;
-};
-
struct dcn30_vpg {
struct vpg base;
const struct dcn30_vpg_registers *regs;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile
index 7aa628c21973..fb4814ab3f05 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile
@@ -1,17 +1,9 @@
-#
-# (c) Copyright 2020 Advanced Micro Devices, Inc. All the rights reserved
-#
-# All rights reserved. This notice is intended as a precaution against
-# inadvertent publication and does not imply publication or any waiver
-# of confidentiality. The year included in the foregoing notice is the
-# year of creation of the work.
-#
-# Authors: AMD
+# SPDX-License-Identifier: MIT
+# Copyright © 2024 Advanced Micro Devices, Inc. All rights reserved.
#
# Makefile for dcn30.
-DCN301 = dcn301_init.o dcn301_resource.o dcn301_dccg.o \
- dcn301_dio_link_encoder.o dcn301_hwseq.o dcn301_panel_cntl.o dcn301_hubbub.o
+DCN301 = dcn301_panel_cntl.o
AMD_DAL_DCN301 = $(addprefix $(AMDDALPATH)/dc/dcn301/,$(DCN301))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c
index 736bda30abc3..9e96a3ace207 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c
@@ -93,7 +93,7 @@ static unsigned int dcn301_get_16_bit_backlight_from_pwm(struct panel_cntl *pane
return (uint32_t)(current_backlight);
}
-uint32_t dcn301_panel_cntl_hw_init(struct panel_cntl *panel_cntl)
+static uint32_t dcn301_panel_cntl_hw_init(struct panel_cntl *panel_cntl)
{
struct dcn301_panel_cntl *dcn301_panel_cntl = TO_DCN301_PANEL_CNTL(panel_cntl);
uint32_t value;
@@ -147,7 +147,7 @@ uint32_t dcn301_panel_cntl_hw_init(struct panel_cntl *panel_cntl)
return current_backlight;
}
-void dcn301_panel_cntl_destroy(struct panel_cntl **panel_cntl)
+static void dcn301_panel_cntl_destroy(struct panel_cntl **panel_cntl)
{
struct dcn301_panel_cntl *dcn301_panel_cntl = TO_DCN301_PANEL_CNTL(*panel_cntl);
@@ -155,7 +155,7 @@ void dcn301_panel_cntl_destroy(struct panel_cntl **panel_cntl)
*panel_cntl = NULL;
}
-bool dcn301_is_panel_backlight_on(struct panel_cntl *panel_cntl)
+static bool dcn301_is_panel_backlight_on(struct panel_cntl *panel_cntl)
{
struct dcn301_panel_cntl *dcn301_panel_cntl = TO_DCN301_PANEL_CNTL(panel_cntl);
uint32_t value;
@@ -165,7 +165,7 @@ bool dcn301_is_panel_backlight_on(struct panel_cntl *panel_cntl)
return value;
}
-bool dcn301_is_panel_powered_on(struct panel_cntl *panel_cntl)
+static bool dcn301_is_panel_powered_on(struct panel_cntl *panel_cntl)
{
struct dcn301_panel_cntl *dcn301_panel_cntl = TO_DCN301_PANEL_CNTL(panel_cntl);
uint32_t pwr_seq_state, dig_on, dig_on_ovrd;
@@ -177,7 +177,7 @@ bool dcn301_is_panel_powered_on(struct panel_cntl *panel_cntl)
return (pwr_seq_state == 1) || (dig_on == 1 && dig_on_ovrd == 1);
}
-void dcn301_store_backlight_level(struct panel_cntl *panel_cntl)
+static void dcn301_store_backlight_level(struct panel_cntl *panel_cntl)
{
struct dcn301_panel_cntl *dcn301_panel_cntl = TO_DCN301_PANEL_CNTL(panel_cntl);
@@ -215,4 +215,5 @@ void dcn301_panel_cntl_construct(
dcn301_panel_cntl->base.funcs = &dcn301_link_panel_cntl_funcs;
dcn301_panel_cntl->base.ctx = init_data->ctx;
dcn301_panel_cntl->base.inst = init_data->inst;
+ dcn301_panel_cntl->base.pwrseq_inst = 0;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile
deleted file mode 100644
index 101620a8867a..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile
+++ /dev/null
@@ -1,43 +0,0 @@
-#
-# (c) Copyright 2020 Advanced Micro Devices, Inc. All the rights reserved
-#
-# All rights reserved. This notice is intended as a precaution against
-# inadvertent publication and does not imply publication or any waiver
-# of confidentiality. The year included in the foregoing notice is the
-# year of creation of the work.
-#
-# Authors: AMD
-#
-# Makefile for dcn302.
-
-DCN3_02 = dcn302_init.o dcn302_hwseq.o dcn302_resource.o
-
-ifdef CONFIG_X86
-CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o := -msse
-endif
-
-ifdef CONFIG_PPC64
-CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
-IS_OLD_GCC = 1
-endif
-CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o += -mhard-float
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o += -mpreferred-stack-boundary=4
-else
-CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o += -msse2
-endif
-endif
-
-AMD_DAL_DCN3_02 = $(addprefix $(AMDDALPATH)/dc/dcn302/,$(DCN3_02))
-
-AMD_DISPLAY_FILES += $(AMD_DAL_DCN3_02)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
deleted file mode 100644
index fcf96cf08c76..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
+++ /dev/null
@@ -1,1778 +0,0 @@
-/*
- * Copyright 2020 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#include "dcn302_init.h"
-#include "dcn302_resource.h"
-#include "dcn302_dccg.h"
-#include "irq/dcn302/irq_service_dcn302.h"
-
-#include "dcn30/dcn30_dio_link_encoder.h"
-#include "dcn30/dcn30_dio_stream_encoder.h"
-#include "dcn30/dcn30_dwb.h"
-#include "dcn30/dcn30_dpp.h"
-#include "dcn30/dcn30_hubbub.h"
-#include "dcn30/dcn30_hubp.h"
-#include "dcn30/dcn30_mmhubbub.h"
-#include "dcn30/dcn30_mpc.h"
-#include "dcn30/dcn30_opp.h"
-#include "dcn30/dcn30_optc.h"
-#include "dcn30/dcn30_resource.h"
-
-#include "dcn20/dcn20_dsc.h"
-#include "dcn20/dcn20_resource.h"
-
-#include "dcn10/dcn10_resource.h"
-
-#include "dce/dce_abm.h"
-#include "dce/dce_audio.h"
-#include "dce/dce_aux.h"
-#include "dce/dce_clock_source.h"
-#include "dce/dce_hwseq.h"
-#include "dce/dce_i2c_hw.h"
-#include "dce/dce_panel_cntl.h"
-#include "dce/dmub_abm.h"
-#include "dce/dmub_psr.h"
-#include "clk_mgr.h"
-
-#include "hw_sequencer_private.h"
-#include "reg_helper.h"
-#include "resource.h"
-#include "vm_helper.h"
-
-#include "dimgrey_cavefish_ip_offset.h"
-#include "dcn/dcn_3_0_2_offset.h"
-#include "dcn/dcn_3_0_2_sh_mask.h"
-#include "dcn/dpcs_3_0_0_offset.h"
-#include "dcn/dpcs_3_0_0_sh_mask.h"
-#include "nbio/nbio_7_4_offset.h"
-#include "amdgpu_socbb.h"
-
-#define DC_LOGGER_INIT(logger)
-
-struct _vcs_dpi_ip_params_st dcn3_02_ip = {
- .use_min_dcfclk = 0,
- .clamp_min_dcfclk = 0,
- .odm_capable = 1,
- .gpuvm_enable = 1,
- .hostvm_enable = 0,
- .gpuvm_max_page_table_levels = 4,
- .hostvm_max_page_table_levels = 4,
- .hostvm_cached_page_table_levels = 0,
- .pte_group_size_bytes = 2048,
- .num_dsc = 5,
- .rob_buffer_size_kbytes = 184,
- .det_buffer_size_kbytes = 184,
- .dpte_buffer_size_in_pte_reqs_luma = 64,
- .dpte_buffer_size_in_pte_reqs_chroma = 34,
- .pde_proc_buffer_size_64k_reqs = 48,
- .dpp_output_buffer_pixels = 2560,
- .opp_output_buffer_lines = 1,
- .pixel_chunk_size_kbytes = 8,
- .pte_enable = 1,
- .max_page_table_levels = 2,
- .pte_chunk_size_kbytes = 2, // ?
- .meta_chunk_size_kbytes = 2,
- .writeback_chunk_size_kbytes = 8,
- .line_buffer_size_bits = 789504,
- .is_line_buffer_bpp_fixed = 0, // ?
- .line_buffer_fixed_bpp = 0, // ?
- .dcc_supported = true,
- .writeback_interface_buffer_size_kbytes = 90,
- .writeback_line_buffer_buffer_size = 0,
- .max_line_buffer_lines = 12,
- .writeback_luma_buffer_size_kbytes = 12, // writeback_line_buffer_buffer_size = 656640
- .writeback_chroma_buffer_size_kbytes = 8,
- .writeback_chroma_line_buffer_width_pixels = 4,
- .writeback_max_hscl_ratio = 1,
- .writeback_max_vscl_ratio = 1,
- .writeback_min_hscl_ratio = 1,
- .writeback_min_vscl_ratio = 1,
- .writeback_max_hscl_taps = 1,
- .writeback_max_vscl_taps = 1,
- .writeback_line_buffer_luma_buffer_size = 0,
- .writeback_line_buffer_chroma_buffer_size = 14643,
- .cursor_buffer_size = 8,
- .cursor_chunk_size = 2,
- .max_num_otg = 5,
- .max_num_dpp = 5,
- .max_num_wb = 1,
- .max_dchub_pscl_bw_pix_per_clk = 4,
- .max_pscl_lb_bw_pix_per_clk = 2,
- .max_lb_vscl_bw_pix_per_clk = 4,
- .max_vscl_hscl_bw_pix_per_clk = 4,
- .max_hscl_ratio = 6,
- .max_vscl_ratio = 6,
- .hscl_mults = 4,
- .vscl_mults = 4,
- .max_hscl_taps = 8,
- .max_vscl_taps = 8,
- .dispclk_ramp_margin_percent = 1,
- .underscan_factor = 1.11,
- .min_vblank_lines = 32,
- .dppclk_delay_subtotal = 46,
- .dynamic_metadata_vm_enabled = true,
- .dppclk_delay_scl_lb_only = 16,
- .dppclk_delay_scl = 50,
- .dppclk_delay_cnvc_formatter = 27,
- .dppclk_delay_cnvc_cursor = 6,
- .dispclk_delay_subtotal = 119,
- .dcfclk_cstate_latency = 5.2, // SRExitTime
- .max_inter_dcn_tile_repeaters = 8,
- .max_num_hdmi_frl_outputs = 1,
- .odm_combine_4to1_supported = true,
-
- .xfc_supported = false,
- .xfc_fill_bw_overhead_percent = 10.0,
- .xfc_fill_constant_bytes = 0,
- .gfx7_compat_tiling_supported = 0,
- .number_of_cursors = 1,
-};
-
-struct _vcs_dpi_soc_bounding_box_st dcn3_02_soc = {
- .clock_limits = {
- {
- .state = 0,
- .dispclk_mhz = 562.0,
- .dppclk_mhz = 300.0,
- .phyclk_mhz = 300.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 405.6,
- },
- },
-
- .min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */
- .num_states = 1,
- .sr_exit_time_us = 26.5,
- .sr_enter_plus_exit_time_us = 31,
- .urgent_latency_us = 4.0,
- .urgent_latency_pixel_data_only_us = 4.0,
- .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
- .urgent_latency_vm_data_only_us = 4.0,
- .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
- .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
- .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
- .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0,
- .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
- .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
- .max_avg_sdp_bw_use_normal_percent = 60.0,
- .max_avg_dram_bw_use_normal_percent = 40.0,
- .writeback_latency_us = 12.0,
- .max_request_size_bytes = 256,
- .fabric_datapath_to_dcn_data_return_bytes = 64,
- .dcn_downspread_percent = 0.5,
- .downspread_percent = 0.38,
- .dram_page_open_time_ns = 50.0,
- .dram_rw_turnaround_time_ns = 17.5,
- .dram_return_buffer_per_channel_bytes = 8192,
- .round_trip_ping_latency_dcfclk_cycles = 156,
- .urgent_out_of_order_return_per_channel_bytes = 4096,
- .channel_interleave_bytes = 256,
- .num_banks = 8,
- .gpuvm_min_page_size_bytes = 4096,
- .hostvm_min_page_size_bytes = 4096,
- .dram_clock_change_latency_us = 404,
- .dummy_pstate_latency_us = 5,
- .writeback_dram_clock_change_latency_us = 23.0,
- .return_bus_width_bytes = 64,
- .dispclk_dppclk_vco_speed_mhz = 3650,
- .xfc_bus_transport_time_us = 20, // ?
- .xfc_xbuf_latency_tolerance_us = 4, // ?
- .use_urgent_burst_bw = 1, // ?
- .do_urgent_latency_adjustment = true,
- .urgent_latency_adjustment_fabric_clock_component_us = 1.0,
- .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000,
-};
-
-static const struct dc_debug_options debug_defaults_drv = {
- .disable_dmcu = true,
- .force_abm_enable = false,
- .timing_trace = false,
- .clock_trace = true,
- .disable_pplib_clock_request = true,
- .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP,
- .force_single_disp_pipe_split = false,
- .disable_dcc = DCC_ENABLE,
- .vsr_support = true,
- .performance_trace = false,
- .max_downscale_src_width = 7680,/*upto 8K*/
- .disable_pplib_wm_range = false,
- .scl_reset_length10 = true,
- .sanity_checks = false,
- .underflow_assert_delay_us = 0xFFFFFFFF,
- .dwb_fi_phase = -1, // -1 = disable,
- .dmub_command_table = true,
- .use_max_lb = true
-};
-
-static const struct dc_debug_options debug_defaults_diags = {
- .disable_dmcu = true,
- .force_abm_enable = false,
- .timing_trace = true,
- .clock_trace = true,
- .disable_dpp_power_gate = true,
- .disable_hubp_power_gate = true,
- .disable_clock_gate = true,
- .disable_pplib_clock_request = true,
- .disable_pplib_wm_range = true,
- .disable_stutter = false,
- .scl_reset_length10 = true,
- .dwb_fi_phase = -1, // -1 = disable
- .dmub_command_table = true,
- .enable_tri_buf = true,
- .disable_psr = true,
- .use_max_lb = true
-};
-
-enum dcn302_clk_src_array_id {
- DCN302_CLK_SRC_PLL0,
- DCN302_CLK_SRC_PLL1,
- DCN302_CLK_SRC_PLL2,
- DCN302_CLK_SRC_PLL3,
- DCN302_CLK_SRC_PLL4,
- DCN302_CLK_SRC_TOTAL
-};
-
-static const struct resource_caps res_cap_dcn302 = {
- .num_timing_generator = 5,
- .num_opp = 5,
- .num_video_plane = 5,
- .num_audio = 5,
- .num_stream_encoder = 5,
- .num_dwb = 1,
- .num_ddc = 5,
- .num_vmid = 16,
- .num_mpc_3dlut = 2,
- .num_dsc = 5,
-};
-
-static const struct dc_plane_cap plane_cap = {
- .type = DC_PLANE_TYPE_DCN_UNIVERSAL,
- .blends_with_above = true,
- .blends_with_below = true,
- .per_pixel_alpha = true,
- .pixel_format_support = {
- .argb8888 = true,
- .nv12 = true,
- .fp16 = true,
- .p010 = false,
- .ayuv = false,
- },
- .max_upscale_factor = {
- .argb8888 = 16000,
- .nv12 = 16000,
- .fp16 = 16000
- },
- /* 6:1 downscaling ratio: 1000/6 = 166.666 */
- .max_downscale_factor = {
- .argb8888 = 167,
- .nv12 = 167,
- .fp16 = 167
- },
- 16,
- 16
-};
-
-/* NBIO */
-#define NBIO_BASE_INNER(seg) \
- NBIO_BASE__INST0_SEG ## seg
-
-#define NBIO_BASE(seg) \
- NBIO_BASE_INNER(seg)
-
-#define NBIO_SR(reg_name)\
- .reg_name = NBIO_BASE(mm ## reg_name ## _BASE_IDX) + \
- mm ## reg_name
-
-/* DCN */
-#undef BASE_INNER
-#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
-
-#define BASE(seg) BASE_INNER(seg)
-
-#define SR(reg_name)\
- .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + mm ## reg_name
-
-#define SF(reg_name, field_name, post_fix)\
- .field_name = reg_name ## __ ## field_name ## post_fix
-
-#define SRI(reg_name, block, id)\
- .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + mm ## block ## id ## _ ## reg_name
-
-#define SRI2(reg_name, block, id)\
- .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + mm ## reg_name
-
-#define SRII(reg_name, block, id)\
- .reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
- mm ## block ## id ## _ ## reg_name
-
-#define DCCG_SRII(reg_name, block, id)\
- .block ## _ ## reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
- mm ## block ## id ## _ ## reg_name
-
-#define VUPDATE_SRII(reg_name, block, id)\
- .reg_name[id] = BASE(mm ## reg_name ## _ ## block ## id ## _BASE_IDX) + \
- mm ## reg_name ## _ ## block ## id
-
-#define SRII_DWB(reg_name, temp_name, block, id)\
- .reg_name[id] = BASE(mm ## block ## id ## _ ## temp_name ## _BASE_IDX) + \
- mm ## block ## id ## _ ## temp_name
-
-#define SRII_MPC_RMU(reg_name, block, id)\
- .RMU##_##reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
- mm ## block ## id ## _ ## reg_name
-
-static const struct dcn_hubbub_registers hubbub_reg = {
- HUBBUB_REG_LIST_DCN30(0)
-};
-
-static const struct dcn_hubbub_shift hubbub_shift = {
- HUBBUB_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dcn_hubbub_mask hubbub_mask = {
- HUBBUB_MASK_SH_LIST_DCN30(_MASK)
-};
-
-#define vmid_regs(id)\
- [id] = { DCN20_VMID_REG_LIST(id) }
-
-static const struct dcn_vmid_registers vmid_regs[] = {
- vmid_regs(0),
- vmid_regs(1),
- vmid_regs(2),
- vmid_regs(3),
- vmid_regs(4),
- vmid_regs(5),
- vmid_regs(6),
- vmid_regs(7),
- vmid_regs(8),
- vmid_regs(9),
- vmid_regs(10),
- vmid_regs(11),
- vmid_regs(12),
- vmid_regs(13),
- vmid_regs(14),
- vmid_regs(15)
-};
-
-static const struct dcn20_vmid_shift vmid_shifts = {
- DCN20_VMID_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dcn20_vmid_mask vmid_masks = {
- DCN20_VMID_MASK_SH_LIST(_MASK)
-};
-
-static struct hubbub *dcn302_hubbub_create(struct dc_context *ctx)
-{
- int i;
-
- struct dcn20_hubbub *hubbub3 = kzalloc(sizeof(struct dcn20_hubbub), GFP_KERNEL);
-
- if (!hubbub3)
- return NULL;
-
- hubbub3_construct(hubbub3, ctx, &hubbub_reg, &hubbub_shift, &hubbub_mask);
-
- for (i = 0; i < res_cap_dcn302.num_vmid; i++) {
- struct dcn20_vmid *vmid = &hubbub3->vmid[i];
-
- vmid->ctx = ctx;
-
- vmid->regs = &vmid_regs[i];
- vmid->shifts = &vmid_shifts;
- vmid->masks = &vmid_masks;
- }
-
- return &hubbub3->base;
-}
-
-#define vpg_regs(id)\
- [id] = { VPG_DCN3_REG_LIST(id) }
-
-static const struct dcn30_vpg_registers vpg_regs[] = {
- vpg_regs(0),
- vpg_regs(1),
- vpg_regs(2),
- vpg_regs(3),
- vpg_regs(4),
- vpg_regs(5)
-};
-
-static const struct dcn30_vpg_shift vpg_shift = {
- DCN3_VPG_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dcn30_vpg_mask vpg_mask = {
- DCN3_VPG_MASK_SH_LIST(_MASK)
-};
-
-static struct vpg *dcn302_vpg_create(struct dc_context *ctx, uint32_t inst)
-{
- struct dcn30_vpg *vpg3 = kzalloc(sizeof(struct dcn30_vpg), GFP_KERNEL);
-
- if (!vpg3)
- return NULL;
-
- vpg3_construct(vpg3, ctx, inst, &vpg_regs[inst], &vpg_shift, &vpg_mask);
-
- return &vpg3->base;
-}
-
-#define afmt_regs(id)\
- [id] = { AFMT_DCN3_REG_LIST(id) }
-
-static const struct dcn30_afmt_registers afmt_regs[] = {
- afmt_regs(0),
- afmt_regs(1),
- afmt_regs(2),
- afmt_regs(3),
- afmt_regs(4),
- afmt_regs(5)
-};
-
-static const struct dcn30_afmt_shift afmt_shift = {
- DCN3_AFMT_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dcn30_afmt_mask afmt_mask = {
- DCN3_AFMT_MASK_SH_LIST(_MASK)
-};
-
-static struct afmt *dcn302_afmt_create(struct dc_context *ctx, uint32_t inst)
-{
- struct dcn30_afmt *afmt3 = kzalloc(sizeof(struct dcn30_afmt), GFP_KERNEL);
-
- if (!afmt3)
- return NULL;
-
- afmt3_construct(afmt3, ctx, inst, &afmt_regs[inst], &afmt_shift, &afmt_mask);
-
- return &afmt3->base;
-}
-
-#define audio_regs(id)\
- [id] = { AUD_COMMON_REG_LIST(id) }
-
-static const struct dce_audio_registers audio_regs[] = {
- audio_regs(0),
- audio_regs(1),
- audio_regs(2),
- audio_regs(3),
- audio_regs(4),
- audio_regs(5),
- audio_regs(6)
-};
-
-#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\
- SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\
- SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\
- AUD_COMMON_MASK_SH_LIST_BASE(mask_sh)
-
-static const struct dce_audio_shift audio_shift = {
- DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dce_audio_mask audio_mask = {
- DCE120_AUD_COMMON_MASK_SH_LIST(_MASK)
-};
-
-static struct audio *dcn302_create_audio(struct dc_context *ctx, unsigned int inst)
-{
- return dce_audio_create(ctx, inst, &audio_regs[inst], &audio_shift, &audio_mask);
-}
-
-#define stream_enc_regs(id)\
- [id] = { SE_DCN3_REG_LIST(id) }
-
-static const struct dcn10_stream_enc_registers stream_enc_regs[] = {
- stream_enc_regs(0),
- stream_enc_regs(1),
- stream_enc_regs(2),
- stream_enc_regs(3),
- stream_enc_regs(4)
-};
-
-static const struct dcn10_stream_encoder_shift se_shift = {
- SE_COMMON_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dcn10_stream_encoder_mask se_mask = {
- SE_COMMON_MASK_SH_LIST_DCN30(_MASK)
-};
-
-static struct stream_encoder *dcn302_stream_encoder_create(enum engine_id eng_id, struct dc_context *ctx)
-{
- struct dcn10_stream_encoder *enc1;
- struct vpg *vpg;
- struct afmt *afmt;
- int vpg_inst;
- int afmt_inst;
-
- /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */
- if (eng_id <= ENGINE_ID_DIGE) {
- vpg_inst = eng_id;
- afmt_inst = eng_id;
- } else
- return NULL;
-
- enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL);
- vpg = dcn302_vpg_create(ctx, vpg_inst);
- afmt = dcn302_afmt_create(ctx, afmt_inst);
-
- if (!enc1 || !vpg || !afmt) {
- kfree(enc1);
- kfree(vpg);
- kfree(afmt);
- return NULL;
- }
-
- dcn30_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id, vpg, afmt, &stream_enc_regs[eng_id],
- &se_shift, &se_mask);
-
- return &enc1->base;
-}
-
-#define clk_src_regs(index, pllid)\
- [index] = { CS_COMMON_REG_LIST_DCN3_02(index, pllid) }
-
-static const struct dce110_clk_src_regs clk_src_regs[] = {
- clk_src_regs(0, A),
- clk_src_regs(1, B),
- clk_src_regs(2, C),
- clk_src_regs(3, D),
- clk_src_regs(4, E)
-};
-
-static const struct dce110_clk_src_shift cs_shift = {
- CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT)
-};
-
-static const struct dce110_clk_src_mask cs_mask = {
- CS_COMMON_MASK_SH_LIST_DCN2_0(_MASK)
-};
-
-static struct clock_source *dcn302_clock_source_create(struct dc_context *ctx, struct dc_bios *bios,
- enum clock_source_id id, const struct dce110_clk_src_regs *regs, bool dp_clk_src)
-{
- struct dce110_clk_src *clk_src = kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL);
-
- if (!clk_src)
- return NULL;
-
- if (dcn3_clk_src_construct(clk_src, ctx, bios, id, regs, &cs_shift, &cs_mask)) {
- clk_src->base.dp_clk_src = dp_clk_src;
- return &clk_src->base;
- }
-
- BREAK_TO_DEBUGGER();
- return NULL;
-}
-
-static const struct dce_hwseq_registers hwseq_reg = {
- HWSEQ_DCN302_REG_LIST()
-};
-
-static const struct dce_hwseq_shift hwseq_shift = {
- HWSEQ_DCN302_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dce_hwseq_mask hwseq_mask = {
- HWSEQ_DCN302_MASK_SH_LIST(_MASK)
-};
-
-static struct dce_hwseq *dcn302_hwseq_create(struct dc_context *ctx)
-{
- struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL);
-
- if (hws) {
- hws->ctx = ctx;
- hws->regs = &hwseq_reg;
- hws->shifts = &hwseq_shift;
- hws->masks = &hwseq_mask;
- }
- return hws;
-}
-
-#define hubp_regs(id)\
- [id] = { HUBP_REG_LIST_DCN30(id) }
-
-static const struct dcn_hubp2_registers hubp_regs[] = {
- hubp_regs(0),
- hubp_regs(1),
- hubp_regs(2),
- hubp_regs(3),
- hubp_regs(4)
-};
-
-static const struct dcn_hubp2_shift hubp_shift = {
- HUBP_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dcn_hubp2_mask hubp_mask = {
- HUBP_MASK_SH_LIST_DCN30(_MASK)
-};
-
-static struct hubp *dcn302_hubp_create(struct dc_context *ctx, uint32_t inst)
-{
- struct dcn20_hubp *hubp2 = kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL);
-
- if (!hubp2)
- return NULL;
-
- if (hubp3_construct(hubp2, ctx, inst, &hubp_regs[inst], &hubp_shift, &hubp_mask))
- return &hubp2->base;
-
- BREAK_TO_DEBUGGER();
- kfree(hubp2);
- return NULL;
-}
-
-#define dpp_regs(id)\
- [id] = { DPP_REG_LIST_DCN30(id) }
-
-static const struct dcn3_dpp_registers dpp_regs[] = {
- dpp_regs(0),
- dpp_regs(1),
- dpp_regs(2),
- dpp_regs(3),
- dpp_regs(4)
-};
-
-static const struct dcn3_dpp_shift tf_shift = {
- DPP_REG_LIST_SH_MASK_DCN30(__SHIFT)
-};
-
-static const struct dcn3_dpp_mask tf_mask = {
- DPP_REG_LIST_SH_MASK_DCN30(_MASK)
-};
-
-static struct dpp *dcn302_dpp_create(struct dc_context *ctx, uint32_t inst)
-{
- struct dcn3_dpp *dpp = kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL);
-
- if (!dpp)
- return NULL;
-
- if (dpp3_construct(dpp, ctx, inst, &dpp_regs[inst], &tf_shift, &tf_mask))
- return &dpp->base;
-
- BREAK_TO_DEBUGGER();
- kfree(dpp);
- return NULL;
-}
-
-#define opp_regs(id)\
- [id] = { OPP_REG_LIST_DCN30(id) }
-
-static const struct dcn20_opp_registers opp_regs[] = {
- opp_regs(0),
- opp_regs(1),
- opp_regs(2),
- opp_regs(3),
- opp_regs(4)
-};
-
-static const struct dcn20_opp_shift opp_shift = {
- OPP_MASK_SH_LIST_DCN20(__SHIFT)
-};
-
-static const struct dcn20_opp_mask opp_mask = {
- OPP_MASK_SH_LIST_DCN20(_MASK)
-};
-
-static struct output_pixel_processor *dcn302_opp_create(struct dc_context *ctx, uint32_t inst)
-{
- struct dcn20_opp *opp = kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL);
-
- if (!opp) {
- BREAK_TO_DEBUGGER();
- return NULL;
- }
-
- dcn20_opp_construct(opp, ctx, inst, &opp_regs[inst], &opp_shift, &opp_mask);
- return &opp->base;
-}
-
-#define optc_regs(id)\
- [id] = { OPTC_COMMON_REG_LIST_DCN3_0(id) }
-
-static const struct dcn_optc_registers optc_regs[] = {
- optc_regs(0),
- optc_regs(1),
- optc_regs(2),
- optc_regs(3),
- optc_regs(4)
-};
-
-static const struct dcn_optc_shift optc_shift = {
- OPTC_COMMON_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dcn_optc_mask optc_mask = {
- OPTC_COMMON_MASK_SH_LIST_DCN30(_MASK)
-};
-
-static struct timing_generator *dcn302_timing_generator_create(struct dc_context *ctx, uint32_t instance)
-{
- struct optc *tgn10 = kzalloc(sizeof(struct optc), GFP_KERNEL);
-
- if (!tgn10)
- return NULL;
-
- tgn10->base.inst = instance;
- tgn10->base.ctx = ctx;
-
- tgn10->tg_regs = &optc_regs[instance];
- tgn10->tg_shift = &optc_shift;
- tgn10->tg_mask = &optc_mask;
-
- dcn30_timing_generator_init(tgn10);
-
- return &tgn10->base;
-}
-
-static const struct dcn30_mpc_registers mpc_regs = {
- MPC_REG_LIST_DCN3_0(0),
- MPC_REG_LIST_DCN3_0(1),
- MPC_REG_LIST_DCN3_0(2),
- MPC_REG_LIST_DCN3_0(3),
- MPC_REG_LIST_DCN3_0(4),
- MPC_OUT_MUX_REG_LIST_DCN3_0(0),
- MPC_OUT_MUX_REG_LIST_DCN3_0(1),
- MPC_OUT_MUX_REG_LIST_DCN3_0(2),
- MPC_OUT_MUX_REG_LIST_DCN3_0(3),
- MPC_OUT_MUX_REG_LIST_DCN3_0(4),
- MPC_RMU_GLOBAL_REG_LIST_DCN3AG,
- MPC_RMU_REG_LIST_DCN3AG(0),
- MPC_RMU_REG_LIST_DCN3AG(1),
- MPC_RMU_REG_LIST_DCN3AG(2),
- MPC_DWB_MUX_REG_LIST_DCN3_0(0),
-};
-
-static const struct dcn30_mpc_shift mpc_shift = {
- MPC_COMMON_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dcn30_mpc_mask mpc_mask = {
- MPC_COMMON_MASK_SH_LIST_DCN30(_MASK)
-};
-
-static struct mpc *dcn302_mpc_create(struct dc_context *ctx, int num_mpcc, int num_rmu)
-{
- struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc), GFP_KERNEL);
-
- if (!mpc30)
- return NULL;
-
- dcn30_mpc_construct(mpc30, ctx, &mpc_regs, &mpc_shift, &mpc_mask, num_mpcc, num_rmu);
-
- return &mpc30->base;
-}
-
-#define dsc_regsDCN20(id)\
-[id] = { DSC_REG_LIST_DCN20(id) }
-
-static const struct dcn20_dsc_registers dsc_regs[] = {
- dsc_regsDCN20(0),
- dsc_regsDCN20(1),
- dsc_regsDCN20(2),
- dsc_regsDCN20(3),
- dsc_regsDCN20(4)
-};
-
-static const struct dcn20_dsc_shift dsc_shift = {
- DSC_REG_LIST_SH_MASK_DCN20(__SHIFT)
-};
-
-static const struct dcn20_dsc_mask dsc_mask = {
- DSC_REG_LIST_SH_MASK_DCN20(_MASK)
-};
-
-static struct display_stream_compressor *dcn302_dsc_create(struct dc_context *ctx, uint32_t inst)
-{
- struct dcn20_dsc *dsc = kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL);
-
- if (!dsc) {
- BREAK_TO_DEBUGGER();
- return NULL;
- }
-
- dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask);
- return &dsc->base;
-}
-
-#define dwbc_regs_dcn3(id)\
-[id] = { DWBC_COMMON_REG_LIST_DCN30(id) }
-
-static const struct dcn30_dwbc_registers dwbc30_regs[] = {
- dwbc_regs_dcn3(0)
-};
-
-static const struct dcn30_dwbc_shift dwbc30_shift = {
- DWBC_COMMON_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dcn30_dwbc_mask dwbc30_mask = {
- DWBC_COMMON_MASK_SH_LIST_DCN30(_MASK)
-};
-
-static bool dcn302_dwbc_create(struct dc_context *ctx, struct resource_pool *pool)
-{
- int i;
- uint32_t pipe_count = pool->res_cap->num_dwb;
-
- for (i = 0; i < pipe_count; i++) {
- struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc), GFP_KERNEL);
-
- if (!dwbc30) {
- dm_error("DC: failed to create dwbc30!\n");
- return false;
- }
-
- dcn30_dwbc_construct(dwbc30, ctx, &dwbc30_regs[i], &dwbc30_shift, &dwbc30_mask, i);
-
- pool->dwbc[i] = &dwbc30->base;
- }
- return true;
-}
-
-#define mcif_wb_regs_dcn3(id)\
-[id] = { MCIF_WB_COMMON_REG_LIST_DCN30(id) }
-
-static const struct dcn30_mmhubbub_registers mcif_wb30_regs[] = {
- mcif_wb_regs_dcn3(0)
-};
-
-static const struct dcn30_mmhubbub_shift mcif_wb30_shift = {
- MCIF_WB_COMMON_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dcn30_mmhubbub_mask mcif_wb30_mask = {
- MCIF_WB_COMMON_MASK_SH_LIST_DCN30(_MASK)
-};
-
-static bool dcn302_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool)
-{
- int i;
- uint32_t pipe_count = pool->res_cap->num_dwb;
-
- for (i = 0; i < pipe_count; i++) {
- struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub), GFP_KERNEL);
-
- if (!mcif_wb30) {
- dm_error("DC: failed to create mcif_wb30!\n");
- return false;
- }
-
- dcn30_mmhubbub_construct(mcif_wb30, ctx, &mcif_wb30_regs[i], &mcif_wb30_shift, &mcif_wb30_mask, i);
-
- pool->mcif_wb[i] = &mcif_wb30->base;
- }
- return true;
-}
-
-#define aux_engine_regs(id)\
-[id] = {\
- AUX_COMMON_REG_LIST0(id), \
- .AUXN_IMPCAL = 0, \
- .AUXP_IMPCAL = 0, \
- .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK, \
-}
-
-static const struct dce110_aux_registers aux_engine_regs[] = {
- aux_engine_regs(0),
- aux_engine_regs(1),
- aux_engine_regs(2),
- aux_engine_regs(3),
- aux_engine_regs(4)
-};
-
-static const struct dce110_aux_registers_shift aux_shift = {
- DCN_AUX_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dce110_aux_registers_mask aux_mask = {
- DCN_AUX_MASK_SH_LIST(_MASK)
-};
-
-static struct dce_aux *dcn302_aux_engine_create(struct dc_context *ctx, uint32_t inst)
-{
- struct aux_engine_dce110 *aux_engine = kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL);
-
- if (!aux_engine)
- return NULL;
-
- dce110_aux_engine_construct(aux_engine, ctx, inst, SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD,
- &aux_engine_regs[inst], &aux_mask, &aux_shift, ctx->dc->caps.extended_aux_timeout_support);
-
- return &aux_engine->base;
-}
-
-#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) }
-
-static const struct dce_i2c_registers i2c_hw_regs[] = {
- i2c_inst_regs(1),
- i2c_inst_regs(2),
- i2c_inst_regs(3),
- i2c_inst_regs(4),
- i2c_inst_regs(5)
-};
-
-static const struct dce_i2c_shift i2c_shifts = {
- I2C_COMMON_MASK_SH_LIST_DCN2(__SHIFT)
-};
-
-static const struct dce_i2c_mask i2c_masks = {
- I2C_COMMON_MASK_SH_LIST_DCN2(_MASK)
-};
-
-static struct dce_i2c_hw *dcn302_i2c_hw_create(struct dc_context *ctx, uint32_t inst)
-{
- struct dce_i2c_hw *dce_i2c_hw = kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL);
-
- if (!dce_i2c_hw)
- return NULL;
-
- dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks);
-
- return dce_i2c_hw;
-}
-
-static const struct encoder_feature_support link_enc_feature = {
- .max_hdmi_deep_color = COLOR_DEPTH_121212,
- .max_hdmi_pixel_clock = 600000,
- .hdmi_ycbcr420_supported = true,
- .dp_ycbcr420_supported = true,
- .fec_supported = true,
- .flags.bits.IS_HBR2_CAPABLE = true,
- .flags.bits.IS_HBR3_CAPABLE = true,
- .flags.bits.IS_TPS3_CAPABLE = true,
- .flags.bits.IS_TPS4_CAPABLE = true
-};
-
-#define link_regs(id, phyid)\
- [id] = {\
- LE_DCN3_REG_LIST(id), \
- UNIPHY_DCN2_REG_LIST(phyid), \
- DPCS_DCN2_REG_LIST(id), \
- SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \
- }
-
-static const struct dcn10_link_enc_registers link_enc_regs[] = {
- link_regs(0, A),
- link_regs(1, B),
- link_regs(2, C),
- link_regs(3, D),
- link_regs(4, E)
-};
-
-static const struct dcn10_link_enc_shift le_shift = {
- LINK_ENCODER_MASK_SH_LIST_DCN30(__SHIFT),
- DPCS_DCN2_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dcn10_link_enc_mask le_mask = {
- LINK_ENCODER_MASK_SH_LIST_DCN30(_MASK),
- DPCS_DCN2_MASK_SH_LIST(_MASK)
-};
-
-#define aux_regs(id)\
- [id] = { DCN2_AUX_REG_LIST(id) }
-
-static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = {
- aux_regs(0),
- aux_regs(1),
- aux_regs(2),
- aux_regs(3),
- aux_regs(4)
-};
-
-#define hpd_regs(id)\
- [id] = { HPD_REG_LIST(id) }
-
-static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = {
- hpd_regs(0),
- hpd_regs(1),
- hpd_regs(2),
- hpd_regs(3),
- hpd_regs(4)
-};
-
-static struct link_encoder *dcn302_link_encoder_create(const struct encoder_init_data *enc_init_data)
-{
- struct dcn20_link_encoder *enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL);
-
- if (!enc20)
- return NULL;
-
- dcn30_link_encoder_construct(enc20, enc_init_data, &link_enc_feature,
- &link_enc_regs[enc_init_data->transmitter], &link_enc_aux_regs[enc_init_data->channel - 1],
- &link_enc_hpd_regs[enc_init_data->hpd_source], &le_shift, &le_mask);
-
- return &enc20->enc10.base;
-}
-
-static const struct dce_panel_cntl_registers panel_cntl_regs[] = {
- { DCN_PANEL_CNTL_REG_LIST() }
-};
-
-static const struct dce_panel_cntl_shift panel_cntl_shift = {
- DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dce_panel_cntl_mask panel_cntl_mask = {
- DCE_PANEL_CNTL_MASK_SH_LIST(_MASK)
-};
-
-static struct panel_cntl *dcn302_panel_cntl_create(const struct panel_cntl_init_data *init_data)
-{
- struct dce_panel_cntl *panel_cntl = kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL);
-
- if (!panel_cntl)
- return NULL;
-
- dce_panel_cntl_construct(panel_cntl, init_data, &panel_cntl_regs[init_data->inst],
- &panel_cntl_shift, &panel_cntl_mask);
-
- return &panel_cntl->base;
-}
-
-static void read_dce_straps(struct dc_context *ctx, struct resource_straps *straps)
-{
- generic_reg_get(ctx, mmDC_PINSTRAPS + BASE(mmDC_PINSTRAPS_BASE_IDX),
- FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio);
-}
-
-static const struct resource_create_funcs res_create_funcs = {
- .read_dce_straps = read_dce_straps,
- .create_audio = dcn302_create_audio,
- .create_stream_encoder = dcn302_stream_encoder_create,
- .create_hwseq = dcn302_hwseq_create,
-};
-
-static const struct resource_create_funcs res_create_maximus_funcs = {
- .read_dce_straps = NULL,
- .create_audio = NULL,
- .create_stream_encoder = NULL,
- .create_hwseq = dcn302_hwseq_create,
-};
-
-static bool is_soc_bounding_box_valid(struct dc *dc)
-{
- uint32_t hw_internal_rev = dc->ctx->asic_id.hw_internal_rev;
-
- if (ASICREV_IS_DIMGREY_CAVEFISH_P(hw_internal_rev))
- return true;
-
- return false;
-}
-
-static bool init_soc_bounding_box(struct dc *dc, struct resource_pool *pool)
-{
- struct _vcs_dpi_soc_bounding_box_st *loaded_bb = &dcn3_02_soc;
- struct _vcs_dpi_ip_params_st *loaded_ip = &dcn3_02_ip;
-
- DC_LOGGER_INIT(dc->ctx->logger);
-
- if (!is_soc_bounding_box_valid(dc)) {
- DC_LOG_ERROR("%s: not valid soc bounding box\n", __func__);
- return false;
- }
-
- loaded_ip->max_num_otg = pool->pipe_count;
- loaded_ip->max_num_dpp = pool->pipe_count;
- loaded_ip->clamp_min_dcfclk = dc->config.clamp_min_dcfclk;
- dcn20_patch_bounding_box(dc, loaded_bb);
-
- if (dc->ctx->dc_bios->funcs->get_soc_bb_info) {
- struct bp_soc_bb_info bb_info = { 0 };
-
- if (dc->ctx->dc_bios->funcs->get_soc_bb_info(
- dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) {
- if (bb_info.dram_clock_change_latency_100ns > 0)
- dcn3_02_soc.dram_clock_change_latency_us =
- bb_info.dram_clock_change_latency_100ns * 10;
-
- if (bb_info.dram_sr_enter_exit_latency_100ns > 0)
- dcn3_02_soc.sr_enter_plus_exit_time_us =
- bb_info.dram_sr_enter_exit_latency_100ns * 10;
-
- if (bb_info.dram_sr_exit_latency_100ns > 0)
- dcn3_02_soc.sr_exit_time_us =
- bb_info.dram_sr_exit_latency_100ns * 10;
- }
- }
-
- return true;
-}
-
-static void dcn302_resource_destruct(struct resource_pool *pool)
-{
- unsigned int i;
-
- for (i = 0; i < pool->stream_enc_count; i++) {
- if (pool->stream_enc[i] != NULL) {
- if (pool->stream_enc[i]->vpg != NULL) {
- kfree(DCN30_VPG_FROM_VPG(pool->stream_enc[i]->vpg));
- pool->stream_enc[i]->vpg = NULL;
- }
- if (pool->stream_enc[i]->afmt != NULL) {
- kfree(DCN30_AFMT_FROM_AFMT(pool->stream_enc[i]->afmt));
- pool->stream_enc[i]->afmt = NULL;
- }
- kfree(DCN10STRENC_FROM_STRENC(pool->stream_enc[i]));
- pool->stream_enc[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->res_cap->num_dsc; i++) {
- if (pool->dscs[i] != NULL)
- dcn20_dsc_destroy(&pool->dscs[i]);
- }
-
- if (pool->mpc != NULL) {
- kfree(TO_DCN20_MPC(pool->mpc));
- pool->mpc = NULL;
- }
-
- if (pool->hubbub != NULL) {
- kfree(pool->hubbub);
- pool->hubbub = NULL;
- }
-
- for (i = 0; i < pool->pipe_count; i++) {
- if (pool->dpps[i] != NULL) {
- kfree(TO_DCN20_DPP(pool->dpps[i]));
- pool->dpps[i] = NULL;
- }
-
- if (pool->hubps[i] != NULL) {
- kfree(TO_DCN20_HUBP(pool->hubps[i]));
- pool->hubps[i] = NULL;
- }
-
- if (pool->irqs != NULL)
- dal_irq_service_destroy(&pool->irqs);
- }
-
- for (i = 0; i < pool->res_cap->num_ddc; i++) {
- if (pool->engines[i] != NULL)
- dce110_engine_destroy(&pool->engines[i]);
- if (pool->hw_i2cs[i] != NULL) {
- kfree(pool->hw_i2cs[i]);
- pool->hw_i2cs[i] = NULL;
- }
- if (pool->sw_i2cs[i] != NULL) {
- kfree(pool->sw_i2cs[i]);
- pool->sw_i2cs[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->res_cap->num_opp; i++) {
- if (pool->opps[i] != NULL)
- pool->opps[i]->funcs->opp_destroy(&pool->opps[i]);
- }
-
- for (i = 0; i < pool->res_cap->num_timing_generator; i++) {
- if (pool->timing_generators[i] != NULL) {
- kfree(DCN10TG_FROM_TG(pool->timing_generators[i]));
- pool->timing_generators[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->res_cap->num_dwb; i++) {
- if (pool->dwbc[i] != NULL) {
- kfree(TO_DCN30_DWBC(pool->dwbc[i]));
- pool->dwbc[i] = NULL;
- }
- if (pool->mcif_wb[i] != NULL) {
- kfree(TO_DCN30_MMHUBBUB(pool->mcif_wb[i]));
- pool->mcif_wb[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->audio_count; i++) {
- if (pool->audios[i])
- dce_aud_destroy(&pool->audios[i]);
- }
-
- for (i = 0; i < pool->clk_src_count; i++) {
- if (pool->clock_sources[i] != NULL)
- dcn20_clock_source_destroy(&pool->clock_sources[i]);
- }
-
- if (pool->dp_clock_source != NULL)
- dcn20_clock_source_destroy(&pool->dp_clock_source);
-
- for (i = 0; i < pool->res_cap->num_mpc_3dlut; i++) {
- if (pool->mpc_lut[i] != NULL) {
- dc_3dlut_func_release(pool->mpc_lut[i]);
- pool->mpc_lut[i] = NULL;
- }
- if (pool->mpc_shaper[i] != NULL) {
- dc_transfer_func_release(pool->mpc_shaper[i]);
- pool->mpc_shaper[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->pipe_count; i++) {
- if (pool->multiple_abms[i] != NULL)
- dce_abm_destroy(&pool->multiple_abms[i]);
- }
-
- if (pool->psr != NULL)
- dmub_psr_destroy(&pool->psr);
-
- if (pool->dccg != NULL)
- dcn_dccg_destroy(&pool->dccg);
-}
-
-static void dcn302_destroy_resource_pool(struct resource_pool **pool)
-{
- dcn302_resource_destruct(*pool);
- kfree(*pool);
- *pool = NULL;
-}
-
-static void dcn302_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
- unsigned int *optimal_dcfclk,
- unsigned int *optimal_fclk)
-{
- double bw_from_dram, bw_from_dram1, bw_from_dram2;
-
- bw_from_dram1 = uclk_mts * dcn3_02_soc.num_chans *
- dcn3_02_soc.dram_channel_width_bytes * (dcn3_02_soc.max_avg_dram_bw_use_normal_percent / 100);
- bw_from_dram2 = uclk_mts * dcn3_02_soc.num_chans *
- dcn3_02_soc.dram_channel_width_bytes * (dcn3_02_soc.max_avg_sdp_bw_use_normal_percent / 100);
-
- bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2;
-
- if (optimal_fclk)
- *optimal_fclk = bw_from_dram /
- (dcn3_02_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_02_soc.max_avg_sdp_bw_use_normal_percent / 100));
-
- if (optimal_dcfclk)
- *optimal_dcfclk = bw_from_dram /
- (dcn3_02_soc.return_bus_width_bytes * (dcn3_02_soc.max_avg_sdp_bw_use_normal_percent / 100));
-}
-
-void dcn302_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
-{
- unsigned int i, j;
- unsigned int num_states = 0;
-
- unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0};
- unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0};
- unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0};
- unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0};
-
- unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {694, 875, 1000, 1200};
- unsigned int num_dcfclk_sta_targets = 4;
- unsigned int num_uclk_states;
-
-
- if (dc->ctx->dc_bios->vram_info.num_chans)
- dcn3_02_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans;
-
- if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes)
- dcn3_02_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes;
-
- dcn3_02_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
- dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
-
- if (bw_params->clk_table.entries[0].memclk_mhz) {
- int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0;
-
- for (i = 0; i < MAX_NUM_DPM_LVL; i++) {
- if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz)
- max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
- if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz)
- max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
- if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz)
- max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
- if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz)
- max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
- }
- if (!max_dcfclk_mhz)
- max_dcfclk_mhz = dcn3_02_soc.clock_limits[0].dcfclk_mhz;
- if (!max_dispclk_mhz)
- max_dispclk_mhz = dcn3_02_soc.clock_limits[0].dispclk_mhz;
- if (!max_dppclk_mhz)
- max_dppclk_mhz = dcn3_02_soc.clock_limits[0].dppclk_mhz;
- if (!max_phyclk_mhz)
- max_phyclk_mhz = dcn3_02_soc.clock_limits[0].phyclk_mhz;
-
- if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
- /* If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array */
- dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz;
- num_dcfclk_sta_targets++;
- } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
- /* If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates */
- for (i = 0; i < num_dcfclk_sta_targets; i++) {
- if (dcfclk_sta_targets[i] > max_dcfclk_mhz) {
- dcfclk_sta_targets[i] = max_dcfclk_mhz;
- break;
- }
- }
- /* Update size of array since we "removed" duplicates */
- num_dcfclk_sta_targets = i + 1;
- }
-
- num_uclk_states = bw_params->clk_table.num_entries;
-
- /* Calculate optimal dcfclk for each uclk */
- for (i = 0; i < num_uclk_states; i++) {
- dcn302_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16,
- &optimal_dcfclk_for_uclk[i], NULL);
- if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) {
- optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz;
- }
- }
-
- /* Calculate optimal uclk for each dcfclk sta target */
- for (i = 0; i < num_dcfclk_sta_targets; i++) {
- for (j = 0; j < num_uclk_states; j++) {
- if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) {
- optimal_uclk_for_dcfclk_sta_targets[i] =
- bw_params->clk_table.entries[j].memclk_mhz * 16;
- break;
- }
- }
- }
-
- i = 0;
- j = 0;
- /* create the final dcfclk and uclk table */
- while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) {
- if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) {
- dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
- dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
- } else {
- if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
- dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
- dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
- } else {
- j = num_uclk_states;
- }
- }
- }
-
- while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) {
- dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
- dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
- }
-
- while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES &&
- optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
- dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
- dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
- }
-
- dcn3_02_soc.num_states = num_states;
- for (i = 0; i < dcn3_02_soc.num_states; i++) {
- dcn3_02_soc.clock_limits[i].state = i;
- dcn3_02_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i];
- dcn3_02_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i];
- dcn3_02_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i];
-
- /* Fill all states with max values of all other clocks */
- dcn3_02_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz;
- dcn3_02_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz;
- dcn3_02_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz;
- /* Populate from bw_params for DTBCLK, SOCCLK */
- if (!bw_params->clk_table.entries[i].dtbclk_mhz && i > 0)
- dcn3_02_soc.clock_limits[i].dtbclk_mhz = dcn3_02_soc.clock_limits[i-1].dtbclk_mhz;
- else
- dcn3_02_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
- if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0)
- dcn3_02_soc.clock_limits[i].socclk_mhz = dcn3_02_soc.clock_limits[i-1].socclk_mhz;
- else
- dcn3_02_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz;
- /* These clocks cannot come from bw_params, always fill from dcn3_02_soc[1] */
- /* FCLK, PHYCLK_D18, DSCCLK */
- dcn3_02_soc.clock_limits[i].phyclk_d18_mhz = dcn3_02_soc.clock_limits[0].phyclk_d18_mhz;
- dcn3_02_soc.clock_limits[i].dscclk_mhz = dcn3_02_soc.clock_limits[0].dscclk_mhz;
- }
- /* re-init DML with updated bb */
- dml_init_instance(&dc->dml, &dcn3_02_soc, &dcn3_02_ip, DML_PROJECT_DCN30);
- if (dc->current_state)
- dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_02_soc, &dcn3_02_ip, DML_PROJECT_DCN30);
- }
-}
-
-static struct resource_funcs dcn302_res_pool_funcs = {
- .destroy = dcn302_destroy_resource_pool,
- .link_enc_create = dcn302_link_encoder_create,
- .panel_cntl_create = dcn302_panel_cntl_create,
- .validate_bandwidth = dcn30_validate_bandwidth,
- .calculate_wm_and_dlg = dcn30_calculate_wm_and_dlg,
- .update_soc_for_wm_a = dcn30_update_soc_for_wm_a,
- .populate_dml_pipes = dcn30_populate_dml_pipes_from_context,
- .acquire_idle_pipe_for_layer = dcn20_acquire_idle_pipe_for_layer,
- .add_stream_to_ctx = dcn30_add_stream_to_ctx,
- .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource,
- .remove_stream_from_ctx = dcn20_remove_stream_from_ctx,
- .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context,
- .set_mcif_arb_params = dcn30_set_mcif_arb_params,
- .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link,
- .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut,
- .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut,
- .update_bw_bounding_box = dcn302_update_bw_bounding_box,
- .patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
-};
-
-static struct dc_cap_funcs cap_funcs = {
- .get_dcc_compression_cap = dcn20_get_dcc_compression_cap
-};
-
-static const struct bios_registers bios_regs = {
- NBIO_SR(BIOS_SCRATCH_3),
- NBIO_SR(BIOS_SCRATCH_6)
-};
-
-static const struct dccg_registers dccg_regs = {
- DCCG_REG_LIST_DCN3_02()
-};
-
-static const struct dccg_shift dccg_shift = {
- DCCG_MASK_SH_LIST_DCN3_02(__SHIFT)
-};
-
-static const struct dccg_mask dccg_mask = {
- DCCG_MASK_SH_LIST_DCN3_02(_MASK)
-};
-
-#define abm_regs(id)\
- [id] = { ABM_DCN302_REG_LIST(id) }
-
-static const struct dce_abm_registers abm_regs[] = {
- abm_regs(0),
- abm_regs(1),
- abm_regs(2),
- abm_regs(3),
- abm_regs(4)
-};
-
-static const struct dce_abm_shift abm_shift = {
- ABM_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dce_abm_mask abm_mask = {
- ABM_MASK_SH_LIST_DCN30(_MASK)
-};
-
-static bool dcn302_resource_construct(
- uint8_t num_virtual_links,
- struct dc *dc,
- struct resource_pool *pool)
-{
- int i;
- struct dc_context *ctx = dc->ctx;
- struct irq_service_init_data init_data;
-
- ctx->dc_bios->regs = &bios_regs;
-
- pool->res_cap = &res_cap_dcn302;
-
- pool->funcs = &dcn302_res_pool_funcs;
-
- /*************************************************
- * Resource + asic cap harcoding *
- *************************************************/
- pool->underlay_pipe_index = NO_UNDERLAY_PIPE;
- pool->pipe_count = pool->res_cap->num_timing_generator;
- pool->mpcc_count = pool->res_cap->num_timing_generator;
- dc->caps.max_downscale_ratio = 600;
- dc->caps.i2c_speed_in_khz = 100;
- dc->caps.i2c_speed_in_khz_hdcp = 5; /*1.4 w/a applied by derfault*/
- dc->caps.max_cursor_size = 256;
- dc->caps.min_horizontal_blanking_period = 80;
- dc->caps.dmdata_alloc_size = 2048;
- dc->caps.mall_size_per_mem_channel = 4;
- /* total size = mall per channel * num channels * 1024 * 1024 */
- dc->caps.mall_size_total = dc->caps.mall_size_per_mem_channel * dc->ctx->dc_bios->vram_info.num_chans * 1048576;
- dc->caps.cursor_cache_size = dc->caps.max_cursor_size * dc->caps.max_cursor_size * 8;
- dc->caps.max_slave_planes = 1;
- dc->caps.max_slave_yuv_planes = 1;
- dc->caps.max_slave_rgb_planes = 1;
- dc->caps.post_blend_color_processing = true;
- dc->caps.force_dp_tps4_for_cp2520 = true;
- dc->caps.extended_aux_timeout_support = true;
- dc->caps.dmcub_support = true;
-
- /* Color pipeline capabilities */
- dc->caps.color.dpp.dcn_arch = 1;
- dc->caps.color.dpp.input_lut_shared = 0;
- dc->caps.color.dpp.icsc = 1;
- dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr
- dc->caps.color.dpp.dgam_rom_caps.srgb = 1;
- dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1;
- dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1;
- dc->caps.color.dpp.dgam_rom_caps.pq = 1;
- dc->caps.color.dpp.dgam_rom_caps.hlg = 1;
- dc->caps.color.dpp.post_csc = 1;
- dc->caps.color.dpp.gamma_corr = 1;
- dc->caps.color.dpp.dgam_rom_for_yuv = 0;
-
- dc->caps.color.dpp.hw_3d_lut = 1;
- dc->caps.color.dpp.ogam_ram = 1;
- // no OGAM ROM on DCN3
- dc->caps.color.dpp.ogam_rom_caps.srgb = 0;
- dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0;
- dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0;
- dc->caps.color.dpp.ogam_rom_caps.pq = 0;
- dc->caps.color.dpp.ogam_rom_caps.hlg = 0;
- dc->caps.color.dpp.ocsc = 0;
-
- dc->caps.color.mpc.gamut_remap = 1;
- dc->caps.color.mpc.num_3dluts = pool->res_cap->num_mpc_3dlut; //3
- dc->caps.color.mpc.ogam_ram = 1;
- dc->caps.color.mpc.ogam_rom_caps.srgb = 0;
- dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0;
- dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0;
- dc->caps.color.mpc.ogam_rom_caps.pq = 0;
- dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
- dc->caps.color.mpc.ocsc = 1;
-
- if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV)
- dc->debug = debug_defaults_drv;
- else
- dc->debug = debug_defaults_diags;
-
- // Init the vm_helper
- if (dc->vm_helper)
- vm_helper_init(dc->vm_helper, 16);
-
- /*************************************************
- * Create resources *
- *************************************************/
-
- /* Clock Sources for Pixel Clock*/
- pool->clock_sources[DCN302_CLK_SRC_PLL0] =
- dcn302_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL0,
- &clk_src_regs[0], false);
- pool->clock_sources[DCN302_CLK_SRC_PLL1] =
- dcn302_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL1,
- &clk_src_regs[1], false);
- pool->clock_sources[DCN302_CLK_SRC_PLL2] =
- dcn302_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL2,
- &clk_src_regs[2], false);
- pool->clock_sources[DCN302_CLK_SRC_PLL3] =
- dcn302_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL3,
- &clk_src_regs[3], false);
- pool->clock_sources[DCN302_CLK_SRC_PLL4] =
- dcn302_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL4,
- &clk_src_regs[4], false);
-
- pool->clk_src_count = DCN302_CLK_SRC_TOTAL;
-
- /* todo: not reuse phy_pll registers */
- pool->dp_clock_source =
- dcn302_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_ID_DP_DTO,
- &clk_src_regs[0], true);
-
- for (i = 0; i < pool->clk_src_count; i++) {
- if (pool->clock_sources[i] == NULL) {
- dm_error("DC: failed to create clock sources!\n");
- BREAK_TO_DEBUGGER();
- goto create_fail;
- }
- }
-
- /* DCCG */
- pool->dccg = dccg30_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask);
- if (pool->dccg == NULL) {
- dm_error("DC: failed to create dccg!\n");
- BREAK_TO_DEBUGGER();
- goto create_fail;
- }
-
- /* PP Lib and SMU interfaces */
- init_soc_bounding_box(dc, pool);
-
- /* DML */
- dml_init_instance(&dc->dml, &dcn3_02_soc, &dcn3_02_ip, DML_PROJECT_DCN30);
-
- /* IRQ */
- init_data.ctx = dc->ctx;
- pool->irqs = dal_irq_service_dcn302_create(&init_data);
- if (!pool->irqs)
- goto create_fail;
-
- /* HUBBUB */
- pool->hubbub = dcn302_hubbub_create(ctx);
- if (pool->hubbub == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create hubbub!\n");
- goto create_fail;
- }
-
- /* HUBPs, DPPs, OPPs and TGs */
- for (i = 0; i < pool->pipe_count; i++) {
- pool->hubps[i] = dcn302_hubp_create(ctx, i);
- if (pool->hubps[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create hubps!\n");
- goto create_fail;
- }
-
- pool->dpps[i] = dcn302_dpp_create(ctx, i);
- if (pool->dpps[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create dpps!\n");
- goto create_fail;
- }
- }
-
- for (i = 0; i < pool->res_cap->num_opp; i++) {
- pool->opps[i] = dcn302_opp_create(ctx, i);
- if (pool->opps[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create output pixel processor!\n");
- goto create_fail;
- }
- }
-
- for (i = 0; i < pool->res_cap->num_timing_generator; i++) {
- pool->timing_generators[i] = dcn302_timing_generator_create(ctx, i);
- if (pool->timing_generators[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create tg!\n");
- goto create_fail;
- }
- }
- pool->timing_generator_count = i;
-
- /* PSR */
- pool->psr = dmub_psr_create(ctx);
- if (pool->psr == NULL) {
- dm_error("DC: failed to create psr!\n");
- BREAK_TO_DEBUGGER();
- goto create_fail;
- }
-
- /* ABMs */
- for (i = 0; i < pool->res_cap->num_timing_generator; i++) {
- pool->multiple_abms[i] = dmub_abm_create(ctx, &abm_regs[i], &abm_shift, &abm_mask);
- if (pool->multiple_abms[i] == NULL) {
- dm_error("DC: failed to create abm for pipe %d!\n", i);
- BREAK_TO_DEBUGGER();
- goto create_fail;
- }
- }
-
- /* MPC and DSC */
- pool->mpc = dcn302_mpc_create(ctx, pool->mpcc_count, pool->res_cap->num_mpc_3dlut);
- if (pool->mpc == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create mpc!\n");
- goto create_fail;
- }
-
- for (i = 0; i < pool->res_cap->num_dsc; i++) {
- pool->dscs[i] = dcn302_dsc_create(ctx, i);
- if (pool->dscs[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create display stream compressor %d!\n", i);
- goto create_fail;
- }
- }
-
- /* DWB and MMHUBBUB */
- if (!dcn302_dwbc_create(ctx, pool)) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create dwbc!\n");
- goto create_fail;
- }
-
- if (!dcn302_mmhubbub_create(ctx, pool)) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create mcif_wb!\n");
- goto create_fail;
- }
-
- /* AUX and I2C */
- for (i = 0; i < pool->res_cap->num_ddc; i++) {
- pool->engines[i] = dcn302_aux_engine_create(ctx, i);
- if (pool->engines[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC:failed to create aux engine!!\n");
- goto create_fail;
- }
- pool->hw_i2cs[i] = dcn302_i2c_hw_create(ctx, i);
- if (pool->hw_i2cs[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC:failed to create hw i2c!!\n");
- goto create_fail;
- }
- pool->sw_i2cs[i] = NULL;
- }
-
- /* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */
- if (!resource_construct(num_virtual_links, dc, pool,
- (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment) ?
- &res_create_funcs : &res_create_maximus_funcs)))
- goto create_fail;
-
- /* HW Sequencer and Plane caps */
- dcn302_hw_sequencer_construct(dc);
-
- dc->caps.max_planes = pool->pipe_count;
-
- for (i = 0; i < dc->caps.max_planes; ++i)
- dc->caps.planes[i] = plane_cap;
-
- dc->cap_funcs = cap_funcs;
-
- return true;
-
-create_fail:
-
- dcn302_resource_destruct(pool);
-
- return false;
-}
-
-struct resource_pool *dcn302_create_resource_pool(const struct dc_init_data *init_data, struct dc *dc)
-{
- struct resource_pool *pool = kzalloc(sizeof(struct resource_pool), GFP_KERNEL);
-
- if (!pool)
- return NULL;
-
- if (dcn302_resource_construct(init_data->num_virtual_links, dc, pool))
- return pool;
-
- BREAK_TO_DEBUGGER();
- kfree(pool);
- return NULL;
-}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/Makefile b/drivers/gpu/drm/amd/display/dc/dcn303/Makefile
deleted file mode 100644
index 6f7a1f2b49f0..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn303/Makefile
+++ /dev/null
@@ -1,39 +0,0 @@
-# SPDX-License-Identifier: MIT
-#
-# Copyright (C) 2021 Advanced Micro Devices, Inc. All the rights reserved
-#
-# Authors: AMD
-#
-# Makefile for dcn303.
-
-DCN3_03 = dcn303_init.o dcn303_hwseq.o dcn303_resource.o
-
-ifdef CONFIG_X86
-CFLAGS_$(AMDDALPATH)/dc/dcn303/dcn303_resource.o := -msse
-endif
-
-ifdef CONFIG_PPC64
-CFLAGS_$(AMDDALPATH)/dc/dcn303/dcn303_resource.o := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
-IS_OLD_GCC = 1
-endif
-CFLAGS_$(AMDDALPATH)/dc/dcn303/dcn303_resource.o += -mhard-float
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-CFLAGS_$(AMDDALPATH)/dc/dcn303/dcn303_resource.o += -mpreferred-stack-boundary=4
-else
-CFLAGS_$(AMDDALPATH)/dc/dcn303/dcn303_resource.o += -msse2
-endif
-endif
-
-AMD_DAL_DCN3_03 = $(addprefix $(AMDDALPATH)/dc/dcn303/,$(DCN3_03))
-
-AMD_DISPLAY_FILES += $(AMD_DAL_DCN3_03)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_dccg.h
deleted file mode 100644
index a79c54bbc899..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_dccg.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright (C) 2021 Advanced Micro Devices, Inc.
- *
- * Authors: AMD
- */
-
-#ifndef __DCN303_DCCG_H__
-#define __DCN303_DCCG_H__
-
-#include "dcn30/dcn30_dccg.h"
-
-
-#define DCCG_REG_LIST_DCN3_03() \
- SR(DPPCLK_DTO_CTRL),\
- DCCG_SRII(DTO_PARAM, DPPCLK, 0),\
- DCCG_SRII(DTO_PARAM, DPPCLK, 1),\
- SR(REFCLK_CNTL)
-
-#define DCCG_MASK_SH_LIST_DCN3_03(mask_sh) \
- DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 0, mask_sh),\
- DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 0, mask_sh),\
- DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 1, mask_sh),\
- DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 1, mask_sh),\
- DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_PHASE, mask_sh),\
- DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_MODULO, mask_sh),\
- DCCG_SF(REFCLK_CNTL, REFCLK_CLOCK_EN, mask_sh),\
- DCCG_SF(REFCLK_CNTL, REFCLK_SRC_SEL, mask_sh)
-
-#endif //__DCN303_DCCG_H__
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_hwseq.c
deleted file mode 100644
index b48b732aa647..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_hwseq.c
+++ /dev/null
@@ -1,45 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright (C) 2021 Advanced Micro Devices, Inc.
- *
- * Authors: AMD
- */
-
-#include "dcn303_hwseq.h"
-
-#include "dce/dce_hwseq.h"
-
-#include "reg_helper.h"
-#include "dc.h"
-
-#define DC_LOGGER_INIT(logger)
-
-#define CTX \
- hws->ctx
-#define REG(reg)\
- hws->regs->reg
-
-#undef FN
-#define FN(reg_name, field_name) \
- hws->shifts->field_name, hws->masks->field_name
-
-
-void dcn303_dpp_pg_control(struct dce_hwseq *hws, unsigned int dpp_inst, bool power_on)
-{
- /*DCN303 removes PG registers*/
-}
-
-void dcn303_hubp_pg_control(struct dce_hwseq *hws, unsigned int hubp_inst, bool power_on)
-{
- /*DCN303 removes PG registers*/
-}
-
-void dcn303_dsc_pg_control(struct dce_hwseq *hws, unsigned int dsc_inst, bool power_on)
-{
- /*DCN303 removes PG registers*/
-}
-
-void dcn303_enable_power_gating_plane(struct dce_hwseq *hws, bool enable)
-{
- /*DCN303 removes PG registers*/
-}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_hwseq.h
deleted file mode 100644
index 8b69a3b76c11..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_hwseq.h
+++ /dev/null
@@ -1,18 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright (C) 2021 Advanced Micro Devices, Inc.
- *
- * Authors: AMD
- */
-
-#ifndef __DC_HWSS_DCN303_H__
-#define __DC_HWSS_DCN303_H__
-
-#include "hw_sequencer_private.h"
-
-void dcn303_dpp_pg_control(struct dce_hwseq *hws, unsigned int dpp_inst, bool power_on);
-void dcn303_hubp_pg_control(struct dce_hwseq *hws, unsigned int hubp_inst, bool power_on);
-void dcn303_dsc_pg_control(struct dce_hwseq *hws, unsigned int dsc_inst, bool power_on);
-void dcn303_enable_power_gating_plane(struct dce_hwseq *hws, bool enable);
-
-#endif /* __DC_HWSS_DCN303_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c
deleted file mode 100644
index aa5dbbade2bd..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c
+++ /dev/null
@@ -1,20 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright (C) 2021 Advanced Micro Devices, Inc.
- *
- * Authors: AMD
- */
-
-#include "dcn303_hwseq.h"
-#include "dcn30/dcn30_init.h"
-#include "dc.h"
-
-void dcn303_hw_sequencer_construct(struct dc *dc)
-{
- dcn30_hw_sequencer_construct(dc);
-
- dc->hwseq->funcs.dpp_pg_control = dcn303_dpp_pg_control;
- dc->hwseq->funcs.hubp_pg_control = dcn303_hubp_pg_control;
- dc->hwseq->funcs.dsc_pg_control = dcn303_dsc_pg_control;
- dc->hwseq->funcs.enable_power_gating_plane = dcn303_enable_power_gating_plane;
-}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.h b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.h
deleted file mode 100644
index 66b1e3604f07..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.h
+++ /dev/null
@@ -1,15 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright (C) 2021 Advanced Micro Devices, Inc.
- *
- * Authors: AMD
- */
-
-#ifndef __DC_DCN303_INIT_H__
-#define __DC_DCN303_INIT_H__
-
-struct dc;
-
-void dcn303_hw_sequencer_construct(struct dc *dc);
-
-#endif /* __DC_DCN303_INIT_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
deleted file mode 100644
index 4a9b64023675..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
+++ /dev/null
@@ -1,1720 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright (C) 2021 Advanced Micro Devices, Inc.
- *
- * Authors: AMD
- */
-
-#include "dcn303_init.h"
-#include "dcn303_resource.h"
-#include "dcn303_dccg.h"
-#include "irq/dcn303/irq_service_dcn303.h"
-
-#include "dcn30/dcn30_dio_link_encoder.h"
-#include "dcn30/dcn30_dio_stream_encoder.h"
-#include "dcn30/dcn30_dpp.h"
-#include "dcn30/dcn30_dwb.h"
-#include "dcn30/dcn30_hubbub.h"
-#include "dcn30/dcn30_hubp.h"
-#include "dcn30/dcn30_mmhubbub.h"
-#include "dcn30/dcn30_mpc.h"
-#include "dcn30/dcn30_opp.h"
-#include "dcn30/dcn30_optc.h"
-#include "dcn30/dcn30_resource.h"
-
-#include "dcn20/dcn20_dsc.h"
-#include "dcn20/dcn20_resource.h"
-
-#include "dcn10/dcn10_resource.h"
-
-#include "dc_link_ddc.h"
-
-#include "dce/dce_abm.h"
-#include "dce/dce_audio.h"
-#include "dce/dce_aux.h"
-#include "dce/dce_clock_source.h"
-#include "dce/dce_hwseq.h"
-#include "dce/dce_i2c_hw.h"
-#include "dce/dce_panel_cntl.h"
-#include "dce/dmub_abm.h"
-#include "dce/dmub_psr.h"
-#include "clk_mgr.h"
-
-#include "hw_sequencer_private.h"
-#include "reg_helper.h"
-#include "resource.h"
-#include "vm_helper.h"
-
-#include "sienna_cichlid_ip_offset.h"
-#include "dcn/dcn_3_0_3_offset.h"
-#include "dcn/dcn_3_0_3_sh_mask.h"
-#include "dcn/dpcs_3_0_3_offset.h"
-#include "dcn/dpcs_3_0_3_sh_mask.h"
-#include "nbio/nbio_2_3_offset.h"
-
-#define DC_LOGGER_INIT(logger)
-
-struct _vcs_dpi_ip_params_st dcn3_03_ip = {
- .use_min_dcfclk = 0,
- .clamp_min_dcfclk = 0,
- .odm_capable = 1,
- .gpuvm_enable = 1,
- .hostvm_enable = 0,
- .gpuvm_max_page_table_levels = 4,
- .hostvm_max_page_table_levels = 4,
- .hostvm_cached_page_table_levels = 0,
- .pte_group_size_bytes = 2048,
- .num_dsc = 2,
- .rob_buffer_size_kbytes = 184,
- .det_buffer_size_kbytes = 184,
- .dpte_buffer_size_in_pte_reqs_luma = 64,
- .dpte_buffer_size_in_pte_reqs_chroma = 34,
- .pde_proc_buffer_size_64k_reqs = 48,
- .dpp_output_buffer_pixels = 2560,
- .opp_output_buffer_lines = 1,
- .pixel_chunk_size_kbytes = 8,
- .pte_enable = 1,
- .max_page_table_levels = 2,
- .pte_chunk_size_kbytes = 2, // ?
- .meta_chunk_size_kbytes = 2,
- .writeback_chunk_size_kbytes = 8,
- .line_buffer_size_bits = 789504,
- .is_line_buffer_bpp_fixed = 0, // ?
- .line_buffer_fixed_bpp = 0, // ?
- .dcc_supported = true,
- .writeback_interface_buffer_size_kbytes = 90,
- .writeback_line_buffer_buffer_size = 0,
- .max_line_buffer_lines = 12,
- .writeback_luma_buffer_size_kbytes = 12, // writeback_line_buffer_buffer_size = 656640
- .writeback_chroma_buffer_size_kbytes = 8,
- .writeback_chroma_line_buffer_width_pixels = 4,
- .writeback_max_hscl_ratio = 1,
- .writeback_max_vscl_ratio = 1,
- .writeback_min_hscl_ratio = 1,
- .writeback_min_vscl_ratio = 1,
- .writeback_max_hscl_taps = 1,
- .writeback_max_vscl_taps = 1,
- .writeback_line_buffer_luma_buffer_size = 0,
- .writeback_line_buffer_chroma_buffer_size = 14643,
- .cursor_buffer_size = 8,
- .cursor_chunk_size = 2,
- .max_num_otg = 2,
- .max_num_dpp = 2,
- .max_num_wb = 1,
- .max_dchub_pscl_bw_pix_per_clk = 4,
- .max_pscl_lb_bw_pix_per_clk = 2,
- .max_lb_vscl_bw_pix_per_clk = 4,
- .max_vscl_hscl_bw_pix_per_clk = 4,
- .max_hscl_ratio = 6,
- .max_vscl_ratio = 6,
- .hscl_mults = 4,
- .vscl_mults = 4,
- .max_hscl_taps = 8,
- .max_vscl_taps = 8,
- .dispclk_ramp_margin_percent = 1,
- .underscan_factor = 1.11,
- .min_vblank_lines = 32,
- .dppclk_delay_subtotal = 46,
- .dynamic_metadata_vm_enabled = true,
- .dppclk_delay_scl_lb_only = 16,
- .dppclk_delay_scl = 50,
- .dppclk_delay_cnvc_formatter = 27,
- .dppclk_delay_cnvc_cursor = 6,
- .dispclk_delay_subtotal = 119,
- .dcfclk_cstate_latency = 5.2, // SRExitTime
- .max_inter_dcn_tile_repeaters = 8,
- .max_num_hdmi_frl_outputs = 1,
- .odm_combine_4to1_supported = false,
- .xfc_supported = false,
- .xfc_fill_bw_overhead_percent = 10.0,
- .xfc_fill_constant_bytes = 0,
- .gfx7_compat_tiling_supported = 0,
- .number_of_cursors = 1,
-};
-
-struct _vcs_dpi_soc_bounding_box_st dcn3_03_soc = {
- .clock_limits = {
- {
- .state = 0,
- .dispclk_mhz = 1217.0,
- .dppclk_mhz = 1217.0,
- .phyclk_mhz = 810.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 405.6,
- },
- },
-
- .min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */
- .num_states = 1,
- .sr_exit_time_us = 35.5,
- .sr_enter_plus_exit_time_us = 40,
- .urgent_latency_us = 4.0,
- .urgent_latency_pixel_data_only_us = 4.0,
- .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
- .urgent_latency_vm_data_only_us = 4.0,
- .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
- .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
- .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
- .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0,
- .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
- .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
- .max_avg_sdp_bw_use_normal_percent = 60.0,
- .max_avg_dram_bw_use_normal_percent = 40.0,
- .writeback_latency_us = 12.0,
- .max_request_size_bytes = 256,
- .fabric_datapath_to_dcn_data_return_bytes = 64,
- .dcn_downspread_percent = 0.5,
- .downspread_percent = 0.38,
- .dram_page_open_time_ns = 50.0,
- .dram_rw_turnaround_time_ns = 17.5,
- .dram_return_buffer_per_channel_bytes = 8192,
- .round_trip_ping_latency_dcfclk_cycles = 156,
- .urgent_out_of_order_return_per_channel_bytes = 4096,
- .channel_interleave_bytes = 256,
- .num_banks = 8,
- .gpuvm_min_page_size_bytes = 4096,
- .hostvm_min_page_size_bytes = 4096,
- .dram_clock_change_latency_us = 404,
- .dummy_pstate_latency_us = 5,
- .writeback_dram_clock_change_latency_us = 23.0,
- .return_bus_width_bytes = 64,
- .dispclk_dppclk_vco_speed_mhz = 3650,
- .xfc_bus_transport_time_us = 20, // ?
- .xfc_xbuf_latency_tolerance_us = 4, // ?
- .use_urgent_burst_bw = 1, // ?
- .do_urgent_latency_adjustment = true,
- .urgent_latency_adjustment_fabric_clock_component_us = 1.0,
- .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000,
-};
-
-static const struct dc_debug_options debug_defaults_drv = {
- .disable_dmcu = true,
- .force_abm_enable = false,
- .timing_trace = false,
- .clock_trace = true,
- .disable_pplib_clock_request = true,
- .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP,
- .force_single_disp_pipe_split = false,
- .disable_dcc = DCC_ENABLE,
- .vsr_support = true,
- .performance_trace = false,
- .max_downscale_src_width = 7680,/*upto 8K*/
- .disable_pplib_wm_range = false,
- .scl_reset_length10 = true,
- .sanity_checks = false,
- .underflow_assert_delay_us = 0xFFFFFFFF,
- .dwb_fi_phase = -1, // -1 = disable,
- .dmub_command_table = true,
- .disable_idle_power_optimizations = false,
-};
-
-static const struct dc_debug_options debug_defaults_diags = {
- .disable_dmcu = true,
- .force_abm_enable = false,
- .timing_trace = true,
- .clock_trace = true,
- .disable_dpp_power_gate = true,
- .disable_hubp_power_gate = true,
- .disable_clock_gate = true,
- .disable_pplib_clock_request = true,
- .disable_pplib_wm_range = true,
- .disable_stutter = false,
- .scl_reset_length10 = true,
- .dwb_fi_phase = -1, // -1 = disable
- .dmub_command_table = true,
- .enable_tri_buf = true,
- .disable_psr = true,
-};
-
-enum dcn303_clk_src_array_id {
- DCN303_CLK_SRC_PLL0,
- DCN303_CLK_SRC_PLL1,
- DCN303_CLK_SRC_TOTAL
-};
-
-static const struct resource_caps res_cap_dcn303 = {
- .num_timing_generator = 2,
- .num_opp = 2,
- .num_video_plane = 2,
- .num_audio = 2,
- .num_stream_encoder = 2,
- .num_dwb = 1,
- .num_ddc = 2,
- .num_vmid = 16,
- .num_mpc_3dlut = 1,
- .num_dsc = 2,
-};
-
-static const struct dc_plane_cap plane_cap = {
- .type = DC_PLANE_TYPE_DCN_UNIVERSAL,
- .blends_with_above = true,
- .blends_with_below = true,
- .per_pixel_alpha = true,
- .pixel_format_support = {
- .argb8888 = true,
- .nv12 = true,
- .fp16 = true,
- .p010 = false,
- .ayuv = false,
- },
- .max_upscale_factor = {
- .argb8888 = 16000,
- .nv12 = 16000,
- .fp16 = 16000
- },
- .max_downscale_factor = {
- .argb8888 = 600,
- .nv12 = 600,
- .fp16 = 600
- },
- 16,
- 16
-};
-
-/* NBIO */
-#define NBIO_BASE_INNER(seg) \
- NBIO_BASE__INST0_SEG ## seg
-
-#define NBIO_BASE(seg) \
- NBIO_BASE_INNER(seg)
-
-#define NBIO_SR(reg_name)\
- .reg_name = NBIO_BASE(mm ## reg_name ## _BASE_IDX) + \
- mm ## reg_name
-
-/* DCN */
-#undef BASE_INNER
-#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
-
-#define BASE(seg) BASE_INNER(seg)
-
-#define SR(reg_name)\
- .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + mm ## reg_name
-
-#define SF(reg_name, field_name, post_fix)\
- .field_name = reg_name ## __ ## field_name ## post_fix
-
-#define SRI(reg_name, block, id)\
- .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + mm ## block ## id ## _ ## reg_name
-
-#define SRI2(reg_name, block, id)\
- .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + mm ## reg_name
-
-#define SRII(reg_name, block, id)\
- .reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
- mm ## block ## id ## _ ## reg_name
-
-#define DCCG_SRII(reg_name, block, id)\
- .block ## _ ## reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
- mm ## block ## id ## _ ## reg_name
-
-#define VUPDATE_SRII(reg_name, block, id)\
- .reg_name[id] = BASE(mm ## reg_name ## _ ## block ## id ## _BASE_IDX) + \
- mm ## reg_name ## _ ## block ## id
-
-#define SRII_DWB(reg_name, temp_name, block, id)\
- .reg_name[id] = BASE(mm ## block ## id ## _ ## temp_name ## _BASE_IDX) + \
- mm ## block ## id ## _ ## temp_name
-
-#define SRII_MPC_RMU(reg_name, block, id)\
- .RMU##_##reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
- mm ## block ## id ## _ ## reg_name
-
-static const struct dcn_hubbub_registers hubbub_reg = {
- HUBBUB_REG_LIST_DCN30(0)
-};
-
-static const struct dcn_hubbub_shift hubbub_shift = {
- HUBBUB_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dcn_hubbub_mask hubbub_mask = {
- HUBBUB_MASK_SH_LIST_DCN30(_MASK)
-};
-
-#define vmid_regs(id)\
- [id] = { DCN20_VMID_REG_LIST(id) }
-
-static const struct dcn_vmid_registers vmid_regs[] = {
- vmid_regs(0),
- vmid_regs(1),
- vmid_regs(2),
- vmid_regs(3),
- vmid_regs(4),
- vmid_regs(5),
- vmid_regs(6),
- vmid_regs(7),
- vmid_regs(8),
- vmid_regs(9),
- vmid_regs(10),
- vmid_regs(11),
- vmid_regs(12),
- vmid_regs(13),
- vmid_regs(14),
- vmid_regs(15)
-};
-
-static const struct dcn20_vmid_shift vmid_shifts = {
- DCN20_VMID_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dcn20_vmid_mask vmid_masks = {
- DCN20_VMID_MASK_SH_LIST(_MASK)
-};
-
-static struct hubbub *dcn303_hubbub_create(struct dc_context *ctx)
-{
- int i;
-
- struct dcn20_hubbub *hubbub3 = kzalloc(sizeof(struct dcn20_hubbub), GFP_KERNEL);
-
- if (!hubbub3)
- return NULL;
-
- hubbub3_construct(hubbub3, ctx, &hubbub_reg, &hubbub_shift, &hubbub_mask);
-
- for (i = 0; i < res_cap_dcn303.num_vmid; i++) {
- struct dcn20_vmid *vmid = &hubbub3->vmid[i];
-
- vmid->ctx = ctx;
-
- vmid->regs = &vmid_regs[i];
- vmid->shifts = &vmid_shifts;
- vmid->masks = &vmid_masks;
- }
-
- return &hubbub3->base;
-}
-
-#define vpg_regs(id)\
- [id] = { VPG_DCN3_REG_LIST(id) }
-
-static const struct dcn30_vpg_registers vpg_regs[] = {
- vpg_regs(0),
- vpg_regs(1),
- vpg_regs(2)
-};
-
-static const struct dcn30_vpg_shift vpg_shift = {
- DCN3_VPG_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dcn30_vpg_mask vpg_mask = {
- DCN3_VPG_MASK_SH_LIST(_MASK)
-};
-
-static struct vpg *dcn303_vpg_create(struct dc_context *ctx, uint32_t inst)
-{
- struct dcn30_vpg *vpg3 = kzalloc(sizeof(struct dcn30_vpg), GFP_KERNEL);
-
- if (!vpg3)
- return NULL;
-
- vpg3_construct(vpg3, ctx, inst, &vpg_regs[inst], &vpg_shift, &vpg_mask);
-
- return &vpg3->base;
-}
-
-#define afmt_regs(id)\
- [id] = { AFMT_DCN3_REG_LIST(id) }
-
-static const struct dcn30_afmt_registers afmt_regs[] = {
- afmt_regs(0),
- afmt_regs(1),
- afmt_regs(2)
-};
-
-static const struct dcn30_afmt_shift afmt_shift = {
- DCN3_AFMT_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dcn30_afmt_mask afmt_mask = {
- DCN3_AFMT_MASK_SH_LIST(_MASK)
-};
-
-static struct afmt *dcn303_afmt_create(struct dc_context *ctx, uint32_t inst)
-{
- struct dcn30_afmt *afmt3 = kzalloc(sizeof(struct dcn30_afmt), GFP_KERNEL);
-
- if (!afmt3)
- return NULL;
-
- afmt3_construct(afmt3, ctx, inst, &afmt_regs[inst], &afmt_shift, &afmt_mask);
-
- return &afmt3->base;
-}
-
-#define audio_regs(id)\
- [id] = { AUD_COMMON_REG_LIST(id) }
-
-static const struct dce_audio_registers audio_regs[] = {
- audio_regs(0),
- audio_regs(1),
- audio_regs(2),
- audio_regs(3),
- audio_regs(4),
- audio_regs(5),
- audio_regs(6)
-};
-
-#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\
- SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\
- SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\
- AUD_COMMON_MASK_SH_LIST_BASE(mask_sh)
-
-static const struct dce_audio_shift audio_shift = {
- DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dce_audio_mask audio_mask = {
- DCE120_AUD_COMMON_MASK_SH_LIST(_MASK)
-};
-
-static struct audio *dcn303_create_audio(struct dc_context *ctx, unsigned int inst)
-{
- return dce_audio_create(ctx, inst, &audio_regs[inst], &audio_shift, &audio_mask);
-}
-
-#define stream_enc_regs(id)\
- [id] = { SE_DCN3_REG_LIST(id) }
-
-static const struct dcn10_stream_enc_registers stream_enc_regs[] = {
- stream_enc_regs(0),
- stream_enc_regs(1)
-};
-
-static const struct dcn10_stream_encoder_shift se_shift = {
- SE_COMMON_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dcn10_stream_encoder_mask se_mask = {
- SE_COMMON_MASK_SH_LIST_DCN30(_MASK)
-};
-
-static struct stream_encoder *dcn303_stream_encoder_create(enum engine_id eng_id, struct dc_context *ctx)
-{
- struct dcn10_stream_encoder *enc1;
- struct vpg *vpg;
- struct afmt *afmt;
- int vpg_inst;
- int afmt_inst;
-
- /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */
- if (eng_id <= ENGINE_ID_DIGE) {
- vpg_inst = eng_id;
- afmt_inst = eng_id;
- } else
- return NULL;
-
- enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL);
- vpg = dcn303_vpg_create(ctx, vpg_inst);
- afmt = dcn303_afmt_create(ctx, afmt_inst);
-
- if (!enc1 || !vpg || !afmt) {
- kfree(enc1);
- kfree(vpg);
- kfree(afmt);
- return NULL;
- }
-
- dcn30_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id, vpg, afmt, &stream_enc_regs[eng_id],
- &se_shift, &se_mask);
-
- return &enc1->base;
-}
-
-#define clk_src_regs(index, pllid)\
- [index] = { CS_COMMON_REG_LIST_DCN3_03(index, pllid) }
-
-static const struct dce110_clk_src_regs clk_src_regs[] = {
- clk_src_regs(0, A),
- clk_src_regs(1, B)
-};
-
-static const struct dce110_clk_src_shift cs_shift = {
- CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT)
-};
-
-static const struct dce110_clk_src_mask cs_mask = {
- CS_COMMON_MASK_SH_LIST_DCN2_0(_MASK)
-};
-
-static struct clock_source *dcn303_clock_source_create(struct dc_context *ctx, struct dc_bios *bios,
- enum clock_source_id id, const struct dce110_clk_src_regs *regs, bool dp_clk_src)
-{
- struct dce110_clk_src *clk_src = kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL);
-
- if (!clk_src)
- return NULL;
-
- if (dcn3_clk_src_construct(clk_src, ctx, bios, id, regs, &cs_shift, &cs_mask)) {
- clk_src->base.dp_clk_src = dp_clk_src;
- return &clk_src->base;
- }
-
- BREAK_TO_DEBUGGER();
- return NULL;
-}
-
-static const struct dce_hwseq_registers hwseq_reg = {
- HWSEQ_DCN303_REG_LIST()
-};
-
-static const struct dce_hwseq_shift hwseq_shift = {
- HWSEQ_DCN303_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dce_hwseq_mask hwseq_mask = {
- HWSEQ_DCN303_MASK_SH_LIST(_MASK)
-};
-
-static struct dce_hwseq *dcn303_hwseq_create(struct dc_context *ctx)
-{
- struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL);
-
- if (hws) {
- hws->ctx = ctx;
- hws->regs = &hwseq_reg;
- hws->shifts = &hwseq_shift;
- hws->masks = &hwseq_mask;
- }
- return hws;
-}
-
-#define hubp_regs(id)\
- [id] = { HUBP_REG_LIST_DCN30(id) }
-
-static const struct dcn_hubp2_registers hubp_regs[] = {
- hubp_regs(0),
- hubp_regs(1)
-};
-
-static const struct dcn_hubp2_shift hubp_shift = {
- HUBP_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dcn_hubp2_mask hubp_mask = {
- HUBP_MASK_SH_LIST_DCN30(_MASK)
-};
-
-static struct hubp *dcn303_hubp_create(struct dc_context *ctx, uint32_t inst)
-{
- struct dcn20_hubp *hubp2 = kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL);
-
- if (!hubp2)
- return NULL;
-
- if (hubp3_construct(hubp2, ctx, inst, &hubp_regs[inst], &hubp_shift, &hubp_mask))
- return &hubp2->base;
-
- BREAK_TO_DEBUGGER();
- kfree(hubp2);
- return NULL;
-}
-
-#define dpp_regs(id)\
- [id] = { DPP_REG_LIST_DCN30(id) }
-
-static const struct dcn3_dpp_registers dpp_regs[] = {
- dpp_regs(0),
- dpp_regs(1)
-};
-
-static const struct dcn3_dpp_shift tf_shift = {
- DPP_REG_LIST_SH_MASK_DCN30(__SHIFT)
-};
-
-static const struct dcn3_dpp_mask tf_mask = {
- DPP_REG_LIST_SH_MASK_DCN30(_MASK)
-};
-
-static struct dpp *dcn303_dpp_create(struct dc_context *ctx, uint32_t inst)
-{
- struct dcn3_dpp *dpp = kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL);
-
- if (!dpp)
- return NULL;
-
- if (dpp3_construct(dpp, ctx, inst, &dpp_regs[inst], &tf_shift, &tf_mask))
- return &dpp->base;
-
- BREAK_TO_DEBUGGER();
- kfree(dpp);
- return NULL;
-}
-
-#define opp_regs(id)\
- [id] = { OPP_REG_LIST_DCN30(id) }
-
-static const struct dcn20_opp_registers opp_regs[] = {
- opp_regs(0),
- opp_regs(1)
-};
-
-static const struct dcn20_opp_shift opp_shift = {
- OPP_MASK_SH_LIST_DCN20(__SHIFT)
-};
-
-static const struct dcn20_opp_mask opp_mask = {
- OPP_MASK_SH_LIST_DCN20(_MASK)
-};
-
-static struct output_pixel_processor *dcn303_opp_create(struct dc_context *ctx, uint32_t inst)
-{
- struct dcn20_opp *opp = kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL);
-
- if (!opp) {
- BREAK_TO_DEBUGGER();
- return NULL;
- }
-
- dcn20_opp_construct(opp, ctx, inst, &opp_regs[inst], &opp_shift, &opp_mask);
- return &opp->base;
-}
-
-#define optc_regs(id)\
- [id] = { OPTC_COMMON_REG_LIST_DCN3_0(id) }
-
-static const struct dcn_optc_registers optc_regs[] = {
- optc_regs(0),
- optc_regs(1)
-};
-
-static const struct dcn_optc_shift optc_shift = {
- OPTC_COMMON_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dcn_optc_mask optc_mask = {
- OPTC_COMMON_MASK_SH_LIST_DCN30(_MASK)
-};
-
-static struct timing_generator *dcn303_timing_generator_create(struct dc_context *ctx, uint32_t instance)
-{
- struct optc *tgn10 = kzalloc(sizeof(struct optc), GFP_KERNEL);
-
- if (!tgn10)
- return NULL;
-
- tgn10->base.inst = instance;
- tgn10->base.ctx = ctx;
-
- tgn10->tg_regs = &optc_regs[instance];
- tgn10->tg_shift = &optc_shift;
- tgn10->tg_mask = &optc_mask;
-
- dcn30_timing_generator_init(tgn10);
-
- return &tgn10->base;
-}
-
-static const struct dcn30_mpc_registers mpc_regs = {
- MPC_REG_LIST_DCN3_0(0),
- MPC_REG_LIST_DCN3_0(1),
- MPC_OUT_MUX_REG_LIST_DCN3_0(0),
- MPC_OUT_MUX_REG_LIST_DCN3_0(1),
- MPC_RMU_GLOBAL_REG_LIST_DCN3AG,
- MPC_RMU_REG_LIST_DCN3AG(0),
- MPC_DWB_MUX_REG_LIST_DCN3_0(0),
-};
-
-static const struct dcn30_mpc_shift mpc_shift = {
- MPC_COMMON_MASK_SH_LIST_DCN303(__SHIFT)
-};
-
-static const struct dcn30_mpc_mask mpc_mask = {
- MPC_COMMON_MASK_SH_LIST_DCN303(_MASK)
-};
-
-static struct mpc *dcn303_mpc_create(struct dc_context *ctx, int num_mpcc, int num_rmu)
-{
- struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc), GFP_KERNEL);
-
- if (!mpc30)
- return NULL;
-
- dcn30_mpc_construct(mpc30, ctx, &mpc_regs, &mpc_shift, &mpc_mask, num_mpcc, num_rmu);
-
- return &mpc30->base;
-}
-
-#define dsc_regsDCN20(id)\
-[id] = { DSC_REG_LIST_DCN20(id) }
-
-static const struct dcn20_dsc_registers dsc_regs[] = {
- dsc_regsDCN20(0),
- dsc_regsDCN20(1)
-};
-
-static const struct dcn20_dsc_shift dsc_shift = {
- DSC_REG_LIST_SH_MASK_DCN20(__SHIFT)
-};
-
-static const struct dcn20_dsc_mask dsc_mask = {
- DSC_REG_LIST_SH_MASK_DCN20(_MASK)
-};
-
-static struct display_stream_compressor *dcn303_dsc_create(struct dc_context *ctx, uint32_t inst)
-{
- struct dcn20_dsc *dsc = kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL);
-
- if (!dsc) {
- BREAK_TO_DEBUGGER();
- return NULL;
- }
-
- dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask);
- return &dsc->base;
-}
-
-#define dwbc_regs_dcn3(id)\
-[id] = { DWBC_COMMON_REG_LIST_DCN30(id) }
-
-static const struct dcn30_dwbc_registers dwbc30_regs[] = {
- dwbc_regs_dcn3(0)
-};
-
-static const struct dcn30_dwbc_shift dwbc30_shift = {
- DWBC_COMMON_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dcn30_dwbc_mask dwbc30_mask = {
- DWBC_COMMON_MASK_SH_LIST_DCN30(_MASK)
-};
-
-static bool dcn303_dwbc_create(struct dc_context *ctx, struct resource_pool *pool)
-{
- int i;
- uint32_t pipe_count = pool->res_cap->num_dwb;
-
- for (i = 0; i < pipe_count; i++) {
- struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc), GFP_KERNEL);
-
- if (!dwbc30) {
- dm_error("DC: failed to create dwbc30!\n");
- return false;
- }
-
- dcn30_dwbc_construct(dwbc30, ctx, &dwbc30_regs[i], &dwbc30_shift, &dwbc30_mask, i);
-
- pool->dwbc[i] = &dwbc30->base;
- }
- return true;
-}
-
-#define mcif_wb_regs_dcn3(id)\
-[id] = { MCIF_WB_COMMON_REG_LIST_DCN30(id) }
-
-static const struct dcn30_mmhubbub_registers mcif_wb30_regs[] = {
- mcif_wb_regs_dcn3(0)
-};
-
-static const struct dcn30_mmhubbub_shift mcif_wb30_shift = {
- MCIF_WB_COMMON_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dcn30_mmhubbub_mask mcif_wb30_mask = {
- MCIF_WB_COMMON_MASK_SH_LIST_DCN30(_MASK)
-};
-
-static bool dcn303_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool)
-{
- int i;
- uint32_t pipe_count = pool->res_cap->num_dwb;
-
- for (i = 0; i < pipe_count; i++) {
- struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub), GFP_KERNEL);
-
- if (!mcif_wb30) {
- dm_error("DC: failed to create mcif_wb30!\n");
- return false;
- }
-
- dcn30_mmhubbub_construct(mcif_wb30, ctx, &mcif_wb30_regs[i], &mcif_wb30_shift, &mcif_wb30_mask, i);
-
- pool->mcif_wb[i] = &mcif_wb30->base;
- }
- return true;
-}
-
-#define aux_engine_regs(id)\
-[id] = {\
- AUX_COMMON_REG_LIST0(id), \
- .AUXN_IMPCAL = 0, \
- .AUXP_IMPCAL = 0, \
- .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK, \
-}
-
-static const struct dce110_aux_registers aux_engine_regs[] = {
- aux_engine_regs(0),
- aux_engine_regs(1)
-};
-
-static const struct dce110_aux_registers_shift aux_shift = {
- DCN_AUX_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dce110_aux_registers_mask aux_mask = {
- DCN_AUX_MASK_SH_LIST(_MASK)
-};
-
-static struct dce_aux *dcn303_aux_engine_create(struct dc_context *ctx, uint32_t inst)
-{
- struct aux_engine_dce110 *aux_engine = kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL);
-
- if (!aux_engine)
- return NULL;
-
- dce110_aux_engine_construct(aux_engine, ctx, inst, SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD,
- &aux_engine_regs[inst], &aux_mask, &aux_shift, ctx->dc->caps.extended_aux_timeout_support);
-
- return &aux_engine->base;
-}
-
-#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) }
-
-static const struct dce_i2c_registers i2c_hw_regs[] = {
- i2c_inst_regs(1),
- i2c_inst_regs(2)
-};
-
-static const struct dce_i2c_shift i2c_shifts = {
- I2C_COMMON_MASK_SH_LIST_DCN2(__SHIFT)
-};
-
-static const struct dce_i2c_mask i2c_masks = {
- I2C_COMMON_MASK_SH_LIST_DCN2(_MASK)
-};
-
-static struct dce_i2c_hw *dcn303_i2c_hw_create(struct dc_context *ctx, uint32_t inst)
-{
- struct dce_i2c_hw *dce_i2c_hw = kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL);
-
- if (!dce_i2c_hw)
- return NULL;
-
- dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks);
-
- return dce_i2c_hw;
-}
-
-static const struct encoder_feature_support link_enc_feature = {
- .max_hdmi_deep_color = COLOR_DEPTH_121212,
- .max_hdmi_pixel_clock = 600000,
- .hdmi_ycbcr420_supported = true,
- .dp_ycbcr420_supported = true,
- .fec_supported = true,
- .flags.bits.IS_HBR2_CAPABLE = true,
- .flags.bits.IS_HBR3_CAPABLE = true,
- .flags.bits.IS_TPS3_CAPABLE = true,
- .flags.bits.IS_TPS4_CAPABLE = true
-};
-
-#define link_regs(id, phyid)\
- [id] = {\
- LE_DCN3_REG_LIST(id), \
- UNIPHY_DCN2_REG_LIST(phyid), \
- SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \
- }
-
-static const struct dcn10_link_enc_registers link_enc_regs[] = {
- link_regs(0, A),
- link_regs(1, B)
-};
-
-static const struct dcn10_link_enc_shift le_shift = {
- LINK_ENCODER_MASK_SH_LIST_DCN30(__SHIFT),
- DPCS_DCN2_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dcn10_link_enc_mask le_mask = {
- LINK_ENCODER_MASK_SH_LIST_DCN30(_MASK),
- DPCS_DCN2_MASK_SH_LIST(_MASK)
-};
-
-#define aux_regs(id)\
- [id] = { DCN2_AUX_REG_LIST(id) }
-
-static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = {
- aux_regs(0),
- aux_regs(1)
-};
-
-#define hpd_regs(id)\
- [id] = { HPD_REG_LIST(id) }
-
-static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = {
- hpd_regs(0),
- hpd_regs(1)
-};
-
-static struct link_encoder *dcn303_link_encoder_create(const struct encoder_init_data *enc_init_data)
-{
- struct dcn20_link_encoder *enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL);
-
- if (!enc20)
- return NULL;
-
- dcn30_link_encoder_construct(enc20, enc_init_data, &link_enc_feature,
- &link_enc_regs[enc_init_data->transmitter], &link_enc_aux_regs[enc_init_data->channel - 1],
- &link_enc_hpd_regs[enc_init_data->hpd_source], &le_shift, &le_mask);
-
- return &enc20->enc10.base;
-}
-
-static const struct dce_panel_cntl_registers panel_cntl_regs[] = {
- { DCN_PANEL_CNTL_REG_LIST() }
-};
-
-static const struct dce_panel_cntl_shift panel_cntl_shift = {
- DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dce_panel_cntl_mask panel_cntl_mask = {
- DCE_PANEL_CNTL_MASK_SH_LIST(_MASK)
-};
-
-static struct panel_cntl *dcn303_panel_cntl_create(const struct panel_cntl_init_data *init_data)
-{
- struct dce_panel_cntl *panel_cntl = kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL);
-
- if (!panel_cntl)
- return NULL;
-
- dce_panel_cntl_construct(panel_cntl, init_data, &panel_cntl_regs[init_data->inst],
- &panel_cntl_shift, &panel_cntl_mask);
-
- return &panel_cntl->base;
-}
-
-static void read_dce_straps(struct dc_context *ctx, struct resource_straps *straps)
-{
- generic_reg_get(ctx, mmDC_PINSTRAPS + BASE(mmDC_PINSTRAPS_BASE_IDX),
- FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio);
-}
-
-static const struct resource_create_funcs res_create_funcs = {
- .read_dce_straps = read_dce_straps,
- .create_audio = dcn303_create_audio,
- .create_stream_encoder = dcn303_stream_encoder_create,
- .create_hwseq = dcn303_hwseq_create,
-};
-
-static const struct resource_create_funcs res_create_maximus_funcs = {
- .read_dce_straps = NULL,
- .create_audio = NULL,
- .create_stream_encoder = NULL,
- .create_hwseq = dcn303_hwseq_create,
-};
-
-static bool is_soc_bounding_box_valid(struct dc *dc)
-{
- uint32_t hw_internal_rev = dc->ctx->asic_id.hw_internal_rev;
-
- if (ASICREV_IS_BEIGE_GOBY_P(hw_internal_rev))
- return true;
-
- return false;
-}
-
-static bool init_soc_bounding_box(struct dc *dc, struct resource_pool *pool)
-{
- struct _vcs_dpi_soc_bounding_box_st *loaded_bb = &dcn3_03_soc;
- struct _vcs_dpi_ip_params_st *loaded_ip = &dcn3_03_ip;
-
- DC_LOGGER_INIT(dc->ctx->logger);
-
- if (!is_soc_bounding_box_valid(dc)) {
- DC_LOG_ERROR("%s: not valid soc bounding box/n", __func__);
- return false;
- }
-
- loaded_ip->max_num_otg = pool->pipe_count;
- loaded_ip->max_num_dpp = pool->pipe_count;
- loaded_ip->clamp_min_dcfclk = dc->config.clamp_min_dcfclk;
- dcn20_patch_bounding_box(dc, loaded_bb);
-
- if (dc->ctx->dc_bios->funcs->get_soc_bb_info) {
- struct bp_soc_bb_info bb_info = { 0 };
-
- if (dc->ctx->dc_bios->funcs->get_soc_bb_info(
- dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) {
- if (bb_info.dram_clock_change_latency_100ns > 0)
- dcn3_03_soc.dram_clock_change_latency_us =
- bb_info.dram_clock_change_latency_100ns * 10;
-
- if (bb_info.dram_sr_enter_exit_latency_100ns > 0)
- dcn3_03_soc.sr_enter_plus_exit_time_us =
- bb_info.dram_sr_enter_exit_latency_100ns * 10;
-
- if (bb_info.dram_sr_exit_latency_100ns > 0)
- dcn3_03_soc.sr_exit_time_us =
- bb_info.dram_sr_exit_latency_100ns * 10;
- }
- }
-
- return true;
-}
-
-static void dcn303_resource_destruct(struct resource_pool *pool)
-{
- unsigned int i;
-
- for (i = 0; i < pool->stream_enc_count; i++) {
- if (pool->stream_enc[i] != NULL) {
- if (pool->stream_enc[i]->vpg != NULL) {
- kfree(DCN30_VPG_FROM_VPG(pool->stream_enc[i]->vpg));
- pool->stream_enc[i]->vpg = NULL;
- }
- if (pool->stream_enc[i]->afmt != NULL) {
- kfree(DCN30_AFMT_FROM_AFMT(pool->stream_enc[i]->afmt));
- pool->stream_enc[i]->afmt = NULL;
- }
- kfree(DCN10STRENC_FROM_STRENC(pool->stream_enc[i]));
- pool->stream_enc[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->res_cap->num_dsc; i++) {
- if (pool->dscs[i] != NULL)
- dcn20_dsc_destroy(&pool->dscs[i]);
- }
-
- if (pool->mpc != NULL) {
- kfree(TO_DCN20_MPC(pool->mpc));
- pool->mpc = NULL;
- }
-
- if (pool->hubbub != NULL) {
- kfree(pool->hubbub);
- pool->hubbub = NULL;
- }
-
- for (i = 0; i < pool->pipe_count; i++) {
- if (pool->dpps[i] != NULL) {
- kfree(TO_DCN20_DPP(pool->dpps[i]));
- pool->dpps[i] = NULL;
- }
-
- if (pool->hubps[i] != NULL) {
- kfree(TO_DCN20_HUBP(pool->hubps[i]));
- pool->hubps[i] = NULL;
- }
-
- if (pool->irqs != NULL)
- dal_irq_service_destroy(&pool->irqs);
- }
-
- for (i = 0; i < pool->res_cap->num_ddc; i++) {
- if (pool->engines[i] != NULL)
- dce110_engine_destroy(&pool->engines[i]);
- if (pool->hw_i2cs[i] != NULL) {
- kfree(pool->hw_i2cs[i]);
- pool->hw_i2cs[i] = NULL;
- }
- if (pool->sw_i2cs[i] != NULL) {
- kfree(pool->sw_i2cs[i]);
- pool->sw_i2cs[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->res_cap->num_opp; i++) {
- if (pool->opps[i] != NULL)
- pool->opps[i]->funcs->opp_destroy(&pool->opps[i]);
- }
-
- for (i = 0; i < pool->res_cap->num_timing_generator; i++) {
- if (pool->timing_generators[i] != NULL) {
- kfree(DCN10TG_FROM_TG(pool->timing_generators[i]));
- pool->timing_generators[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->res_cap->num_dwb; i++) {
- if (pool->dwbc[i] != NULL) {
- kfree(TO_DCN30_DWBC(pool->dwbc[i]));
- pool->dwbc[i] = NULL;
- }
- if (pool->mcif_wb[i] != NULL) {
- kfree(TO_DCN30_MMHUBBUB(pool->mcif_wb[i]));
- pool->mcif_wb[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->audio_count; i++) {
- if (pool->audios[i])
- dce_aud_destroy(&pool->audios[i]);
- }
-
- for (i = 0; i < pool->clk_src_count; i++) {
- if (pool->clock_sources[i] != NULL)
- dcn20_clock_source_destroy(&pool->clock_sources[i]);
- }
-
- if (pool->dp_clock_source != NULL)
- dcn20_clock_source_destroy(&pool->dp_clock_source);
-
- for (i = 0; i < pool->res_cap->num_mpc_3dlut; i++) {
- if (pool->mpc_lut[i] != NULL) {
- dc_3dlut_func_release(pool->mpc_lut[i]);
- pool->mpc_lut[i] = NULL;
- }
- if (pool->mpc_shaper[i] != NULL) {
- dc_transfer_func_release(pool->mpc_shaper[i]);
- pool->mpc_shaper[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->pipe_count; i++) {
- if (pool->multiple_abms[i] != NULL)
- dce_abm_destroy(&pool->multiple_abms[i]);
- }
-
- if (pool->psr != NULL)
- dmub_psr_destroy(&pool->psr);
-
- if (pool->dccg != NULL)
- dcn_dccg_destroy(&pool->dccg);
-
- if (pool->oem_device != NULL)
- dal_ddc_service_destroy(&pool->oem_device);
-}
-
-static void dcn303_destroy_resource_pool(struct resource_pool **pool)
-{
- dcn303_resource_destruct(*pool);
- kfree(*pool);
- *pool = NULL;
-}
-
-static void dcn303_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
- unsigned int *optimal_dcfclk,
- unsigned int *optimal_fclk)
-{
- double bw_from_dram, bw_from_dram1, bw_from_dram2;
-
- bw_from_dram1 = uclk_mts * dcn3_03_soc.num_chans *
- dcn3_03_soc.dram_channel_width_bytes * (dcn3_03_soc.max_avg_dram_bw_use_normal_percent / 100);
- bw_from_dram2 = uclk_mts * dcn3_03_soc.num_chans *
- dcn3_03_soc.dram_channel_width_bytes * (dcn3_03_soc.max_avg_sdp_bw_use_normal_percent / 100);
-
- bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2;
-
- if (optimal_fclk)
- *optimal_fclk = bw_from_dram /
- (dcn3_03_soc.fabric_datapath_to_dcn_data_return_bytes *
- (dcn3_03_soc.max_avg_sdp_bw_use_normal_percent / 100));
-
- if (optimal_dcfclk)
- *optimal_dcfclk = bw_from_dram /
- (dcn3_03_soc.return_bus_width_bytes * (dcn3_03_soc.max_avg_sdp_bw_use_normal_percent / 100));
-}
-
-void dcn303_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
-{
- unsigned int i, j;
- unsigned int num_states = 0;
-
- unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0};
- unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0};
- unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0};
- unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0};
-
- unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {694, 875, 1000, 1200};
- unsigned int num_dcfclk_sta_targets = 4;
- unsigned int num_uclk_states;
-
-
- if (dc->ctx->dc_bios->vram_info.num_chans)
- dcn3_03_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans;
-
- if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes)
- dcn3_03_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes;
-
- dcn3_03_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
- dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
-
- if (bw_params->clk_table.entries[0].memclk_mhz) {
- int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0;
-
- for (i = 0; i < MAX_NUM_DPM_LVL; i++) {
- if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz)
- max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
- if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz)
- max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
- if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz)
- max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
- if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz)
- max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
- }
- if (!max_dcfclk_mhz)
- max_dcfclk_mhz = dcn3_03_soc.clock_limits[0].dcfclk_mhz;
- if (!max_dispclk_mhz)
- max_dispclk_mhz = dcn3_03_soc.clock_limits[0].dispclk_mhz;
- if (!max_dppclk_mhz)
- max_dppclk_mhz = dcn3_03_soc.clock_limits[0].dppclk_mhz;
- if (!max_phyclk_mhz)
- max_phyclk_mhz = dcn3_03_soc.clock_limits[0].phyclk_mhz;
-
- if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
- dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz;
- num_dcfclk_sta_targets++;
- } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
- for (i = 0; i < num_dcfclk_sta_targets; i++) {
- if (dcfclk_sta_targets[i] > max_dcfclk_mhz) {
- dcfclk_sta_targets[i] = max_dcfclk_mhz;
- break;
- }
- }
- /* Update size of array since we "removed" duplicates */
- num_dcfclk_sta_targets = i + 1;
- }
-
- num_uclk_states = bw_params->clk_table.num_entries;
-
- /* Calculate optimal dcfclk for each uclk */
- for (i = 0; i < num_uclk_states; i++) {
- dcn303_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16,
- &optimal_dcfclk_for_uclk[i], NULL);
- if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz)
- optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz;
- }
-
- /* Calculate optimal uclk for each dcfclk sta target */
- for (i = 0; i < num_dcfclk_sta_targets; i++) {
- for (j = 0; j < num_uclk_states; j++) {
- if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) {
- optimal_uclk_for_dcfclk_sta_targets[i] =
- bw_params->clk_table.entries[j].memclk_mhz * 16;
- break;
- }
- }
- }
-
- i = 0;
- j = 0;
- /* create the final dcfclk and uclk table */
- while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) {
- if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) {
- dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
- dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
- } else {
- if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
- dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
- dram_speed_mts[num_states++] =
- bw_params->clk_table.entries[j++].memclk_mhz * 16;
- } else {
- j = num_uclk_states;
- }
- }
- }
-
- while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) {
- dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
- dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
- }
-
- while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES &&
- optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
- dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
- dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
- }
-
- dcn3_03_soc.num_states = num_states;
- for (i = 0; i < dcn3_03_soc.num_states; i++) {
- dcn3_03_soc.clock_limits[i].state = i;
- dcn3_03_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i];
- dcn3_03_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i];
- dcn3_03_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i];
-
- /* Fill all states with max values of all other clocks */
- dcn3_03_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz;
- dcn3_03_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz;
- dcn3_03_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz;
- /* Populate from bw_params for DTBCLK, SOCCLK */
- if (!bw_params->clk_table.entries[i].dtbclk_mhz && i > 0)
- dcn3_03_soc.clock_limits[i].dtbclk_mhz = dcn3_03_soc.clock_limits[i-1].dtbclk_mhz;
- else
- dcn3_03_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
- if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0)
- dcn3_03_soc.clock_limits[i].socclk_mhz = dcn3_03_soc.clock_limits[i-1].socclk_mhz;
- else
- dcn3_03_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz;
- /* These clocks cannot come from bw_params, always fill from dcn3_03_soc[1] */
- /* FCLK, PHYCLK_D18, DSCCLK */
- dcn3_03_soc.clock_limits[i].phyclk_d18_mhz = dcn3_03_soc.clock_limits[0].phyclk_d18_mhz;
- dcn3_03_soc.clock_limits[i].dscclk_mhz = dcn3_03_soc.clock_limits[0].dscclk_mhz;
- }
-
- // WA: patch strobe modes to compensate for DCN303 BW issue
- if (dcn3_03_soc.num_chans <= 4) {
- for (i = 0; i < dcn3_03_soc.num_states; i++) {
- if (dcn3_03_soc.clock_limits[i].dram_speed_mts > 1700)
- break;
-
- if (dcn3_03_soc.clock_limits[i].dram_speed_mts >= 1500) {
- dcn3_03_soc.clock_limits[i].dcfclk_mhz = 100;
- dcn3_03_soc.clock_limits[i].fabricclk_mhz = 100;
- }
- }
- }
-
- /* re-init DML with updated bb */
- dml_init_instance(&dc->dml, &dcn3_03_soc, &dcn3_03_ip, DML_PROJECT_DCN30);
- if (dc->current_state)
- dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_03_soc, &dcn3_03_ip, DML_PROJECT_DCN30);
- }
-}
-
-static struct resource_funcs dcn303_res_pool_funcs = {
- .destroy = dcn303_destroy_resource_pool,
- .link_enc_create = dcn303_link_encoder_create,
- .panel_cntl_create = dcn303_panel_cntl_create,
- .validate_bandwidth = dcn30_validate_bandwidth,
- .calculate_wm_and_dlg = dcn30_calculate_wm_and_dlg,
- .update_soc_for_wm_a = dcn30_update_soc_for_wm_a,
- .populate_dml_pipes = dcn30_populate_dml_pipes_from_context,
- .acquire_idle_pipe_for_layer = dcn20_acquire_idle_pipe_for_layer,
- .add_stream_to_ctx = dcn30_add_stream_to_ctx,
- .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource,
- .remove_stream_from_ctx = dcn20_remove_stream_from_ctx,
- .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context,
- .set_mcif_arb_params = dcn30_set_mcif_arb_params,
- .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link,
- .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut,
- .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut,
- .update_bw_bounding_box = dcn303_update_bw_bounding_box,
- .patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
-};
-
-static struct dc_cap_funcs cap_funcs = {
- .get_dcc_compression_cap = dcn20_get_dcc_compression_cap
-};
-
-static const struct bios_registers bios_regs = {
- NBIO_SR(BIOS_SCRATCH_3),
- NBIO_SR(BIOS_SCRATCH_6)
-};
-
-static const struct dccg_registers dccg_regs = {
- DCCG_REG_LIST_DCN3_03()
-};
-
-static const struct dccg_shift dccg_shift = {
- DCCG_MASK_SH_LIST_DCN3_03(__SHIFT)
-};
-
-static const struct dccg_mask dccg_mask = {
- DCCG_MASK_SH_LIST_DCN3_03(_MASK)
-};
-
-#define abm_regs(id)\
- [id] = { ABM_DCN302_REG_LIST(id) }
-
-static const struct dce_abm_registers abm_regs[] = {
- abm_regs(0),
- abm_regs(1)
-};
-
-static const struct dce_abm_shift abm_shift = {
- ABM_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dce_abm_mask abm_mask = {
- ABM_MASK_SH_LIST_DCN30(_MASK)
-};
-
-static bool dcn303_resource_construct(
- uint8_t num_virtual_links,
- struct dc *dc,
- struct resource_pool *pool)
-{
- int i;
- struct dc_context *ctx = dc->ctx;
- struct irq_service_init_data init_data;
- struct ddc_service_init_data ddc_init_data;
-
- ctx->dc_bios->regs = &bios_regs;
-
- pool->res_cap = &res_cap_dcn303;
-
- pool->funcs = &dcn303_res_pool_funcs;
-
- /*************************************************
- * Resource + asic cap harcoding *
- *************************************************/
- pool->underlay_pipe_index = NO_UNDERLAY_PIPE;
- pool->pipe_count = pool->res_cap->num_timing_generator;
- pool->mpcc_count = pool->res_cap->num_timing_generator;
- dc->caps.max_downscale_ratio = 600;
- dc->caps.i2c_speed_in_khz = 100;
- dc->caps.i2c_speed_in_khz_hdcp = 5; /*1.4 w/a applied by derfault*/
- dc->caps.max_cursor_size = 256;
- dc->caps.min_horizontal_blanking_period = 80;
- dc->caps.dmdata_alloc_size = 2048;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- dc->caps.mall_size_per_mem_channel = 4;
- /* total size = mall per channel * num channels * 1024 * 1024 */
- dc->caps.mall_size_total = dc->caps.mall_size_per_mem_channel *
- dc->ctx->dc_bios->vram_info.num_chans *
- 1024 * 1024;
- dc->caps.cursor_cache_size =
- dc->caps.max_cursor_size * dc->caps.max_cursor_size * 8;
-#endif
- dc->caps.max_slave_planes = 1;
- dc->caps.post_blend_color_processing = true;
- dc->caps.force_dp_tps4_for_cp2520 = true;
- dc->caps.extended_aux_timeout_support = true;
- dc->caps.dmcub_support = true;
-
- /* Color pipeline capabilities */
- dc->caps.color.dpp.dcn_arch = 1;
- dc->caps.color.dpp.input_lut_shared = 0;
- dc->caps.color.dpp.icsc = 1;
- dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr
- dc->caps.color.dpp.dgam_rom_caps.srgb = 1;
- dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1;
- dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1;
- dc->caps.color.dpp.dgam_rom_caps.pq = 1;
- dc->caps.color.dpp.dgam_rom_caps.hlg = 1;
- dc->caps.color.dpp.post_csc = 1;
- dc->caps.color.dpp.gamma_corr = 1;
- dc->caps.color.dpp.dgam_rom_for_yuv = 0;
-
- dc->caps.color.dpp.hw_3d_lut = 1;
- dc->caps.color.dpp.ogam_ram = 1;
- // no OGAM ROM on DCN3
- dc->caps.color.dpp.ogam_rom_caps.srgb = 0;
- dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0;
- dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0;
- dc->caps.color.dpp.ogam_rom_caps.pq = 0;
- dc->caps.color.dpp.ogam_rom_caps.hlg = 0;
- dc->caps.color.dpp.ocsc = 0;
-
- dc->caps.color.mpc.gamut_remap = 1;
- dc->caps.color.mpc.num_3dluts = pool->res_cap->num_mpc_3dlut; //3
- dc->caps.color.mpc.ogam_ram = 1;
- dc->caps.color.mpc.ogam_rom_caps.srgb = 0;
- dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0;
- dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0;
- dc->caps.color.mpc.ogam_rom_caps.pq = 0;
- dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
- dc->caps.color.mpc.ocsc = 1;
-
- if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV)
- dc->debug = debug_defaults_drv;
- else
- dc->debug = debug_defaults_diags;
-
- // Init the vm_helper
- if (dc->vm_helper)
- vm_helper_init(dc->vm_helper, 16);
-
- /*************************************************
- * Create resources *
- *************************************************/
-
- /* Clock Sources for Pixel Clock*/
- pool->clock_sources[DCN303_CLK_SRC_PLL0] =
- dcn303_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL0,
- &clk_src_regs[0], false);
- pool->clock_sources[DCN303_CLK_SRC_PLL1] =
- dcn303_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL1,
- &clk_src_regs[1], false);
-
- pool->clk_src_count = DCN303_CLK_SRC_TOTAL;
-
- /* todo: not reuse phy_pll registers */
- pool->dp_clock_source =
- dcn303_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_ID_DP_DTO,
- &clk_src_regs[0], true);
-
- for (i = 0; i < pool->clk_src_count; i++) {
- if (pool->clock_sources[i] == NULL) {
- dm_error("DC: failed to create clock sources!\n");
- BREAK_TO_DEBUGGER();
- goto create_fail;
- }
- }
-
- /* DCCG */
- pool->dccg = dccg30_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask);
- if (pool->dccg == NULL) {
- dm_error("DC: failed to create dccg!\n");
- BREAK_TO_DEBUGGER();
- goto create_fail;
- }
-
- /* PP Lib and SMU interfaces */
- init_soc_bounding_box(dc, pool);
-
- /* DML */
- dml_init_instance(&dc->dml, &dcn3_03_soc, &dcn3_03_ip, DML_PROJECT_DCN30);
-
- /* IRQ */
- init_data.ctx = dc->ctx;
- pool->irqs = dal_irq_service_dcn303_create(&init_data);
- if (!pool->irqs)
- goto create_fail;
-
- /* HUBBUB */
- pool->hubbub = dcn303_hubbub_create(ctx);
- if (pool->hubbub == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create hubbub!\n");
- goto create_fail;
- }
-
- /* HUBPs, DPPs, OPPs and TGs */
- for (i = 0; i < pool->pipe_count; i++) {
- pool->hubps[i] = dcn303_hubp_create(ctx, i);
- if (pool->hubps[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create hubps!\n");
- goto create_fail;
- }
-
- pool->dpps[i] = dcn303_dpp_create(ctx, i);
- if (pool->dpps[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create dpps!\n");
- goto create_fail;
- }
- }
-
- for (i = 0; i < pool->res_cap->num_opp; i++) {
- pool->opps[i] = dcn303_opp_create(ctx, i);
- if (pool->opps[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create output pixel processor!\n");
- goto create_fail;
- }
- }
-
- for (i = 0; i < pool->res_cap->num_timing_generator; i++) {
- pool->timing_generators[i] = dcn303_timing_generator_create(ctx, i);
- if (pool->timing_generators[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create tg!\n");
- goto create_fail;
- }
- }
- pool->timing_generator_count = i;
-
- /* PSR */
- pool->psr = dmub_psr_create(ctx);
- if (pool->psr == NULL) {
- dm_error("DC: failed to create psr!\n");
- BREAK_TO_DEBUGGER();
- goto create_fail;
- }
-
- /* ABM */
- for (i = 0; i < pool->res_cap->num_timing_generator; i++) {
- pool->multiple_abms[i] = dmub_abm_create(ctx, &abm_regs[i], &abm_shift, &abm_mask);
- if (pool->multiple_abms[i] == NULL) {
- dm_error("DC: failed to create abm for pipe %d!\n", i);
- BREAK_TO_DEBUGGER();
- goto create_fail;
- }
- }
-
- /* MPC and DSC */
- pool->mpc = dcn303_mpc_create(ctx, pool->mpcc_count, pool->res_cap->num_mpc_3dlut);
- if (pool->mpc == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create mpc!\n");
- goto create_fail;
- }
-
- for (i = 0; i < pool->res_cap->num_dsc; i++) {
- pool->dscs[i] = dcn303_dsc_create(ctx, i);
- if (pool->dscs[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create display stream compressor %d!\n", i);
- goto create_fail;
- }
- }
-
- /* DWB and MMHUBBUB */
- if (!dcn303_dwbc_create(ctx, pool)) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create dwbc!\n");
- goto create_fail;
- }
-
- if (!dcn303_mmhubbub_create(ctx, pool)) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create mcif_wb!\n");
- goto create_fail;
- }
-
- /* AUX and I2C */
- for (i = 0; i < pool->res_cap->num_ddc; i++) {
- pool->engines[i] = dcn303_aux_engine_create(ctx, i);
- if (pool->engines[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC:failed to create aux engine!!\n");
- goto create_fail;
- }
- pool->hw_i2cs[i] = dcn303_i2c_hw_create(ctx, i);
- if (pool->hw_i2cs[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC:failed to create hw i2c!!\n");
- goto create_fail;
- }
- pool->sw_i2cs[i] = NULL;
- }
-
- /* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */
- if (!resource_construct(num_virtual_links, dc, pool,
- (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment) ?
- &res_create_funcs : &res_create_maximus_funcs)))
- goto create_fail;
-
- /* HW Sequencer and Plane caps */
- dcn303_hw_sequencer_construct(dc);
-
- dc->caps.max_planes = pool->pipe_count;
-
- for (i = 0; i < dc->caps.max_planes; ++i)
- dc->caps.planes[i] = plane_cap;
-
- dc->cap_funcs = cap_funcs;
-
- if (dc->ctx->dc_bios->fw_info.oem_i2c_present) {
- ddc_init_data.ctx = dc->ctx;
- ddc_init_data.link = NULL;
- ddc_init_data.id.id = dc->ctx->dc_bios->fw_info.oem_i2c_obj_id;
- ddc_init_data.id.enum_id = 0;
- ddc_init_data.id.type = OBJECT_TYPE_GENERIC;
- pool->oem_device = dal_ddc_service_create(&ddc_init_data);
- } else {
- pool->oem_device = NULL;
- }
-
- return true;
-
-create_fail:
-
- dcn303_resource_destruct(pool);
-
- return false;
-}
-
-struct resource_pool *dcn303_create_resource_pool(const struct dc_init_data *init_data, struct dc *dc)
-{
- struct resource_pool *pool = kzalloc(sizeof(struct resource_pool), GFP_KERNEL);
-
- if (!pool)
- return NULL;
-
- if (dcn303_resource_construct(init_data->num_virtual_links, dc, pool))
- return pool;
-
- BREAK_TO_DEBUGGER();
- kfree(pool);
- return NULL;
-}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.h b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.h
deleted file mode 100644
index 5b590c169763..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.h
+++ /dev/null
@@ -1,17 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright (C) 2021 Advanced Micro Devices, Inc.
- *
- * Authors: AMD
- */
-
-#ifndef _DCN303_RESOURCE_H_
-#define _DCN303_RESOURCE_H_
-
-#include "core_types.h"
-
-struct resource_pool *dcn303_create_resource_pool(const struct dc_init_data *init_data, struct dc *dc);
-
-void dcn303_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
-
-#endif /* _DCN303_RESOURCE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
index d20e3b8ccc30..d510e4652c18 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
@@ -1,46 +1,13 @@
+# SPDX-License-Identifier: MIT
+# Copyright © 2024 Advanced Micro Devices, Inc. All rights reserved.
#
-# (c) Copyright 2020 Advanced Micro Devices, Inc. All the rights reserved
-#
-# All rights reserved. This notice is intended as a precaution against
-# inadvertent publication and does not imply publication or any waiver
-# of confidentiality. The year included in the foregoing notice is the
-# year of creation of the work.
-#
-# Authors: AMD
#
# Makefile for dcn31.
-DCN31 = dcn31_resource.o dcn31_hubbub.o dcn31_hwseq.o dcn31_init.o dcn31_hubp.o \
- dcn31_dccg.o dcn31_optc.o dcn31_dio_link_encoder.o dcn31_panel_cntl.o \
- dcn31_apg.o dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o \
+DCN31 = dcn31_panel_cntl.o \
+ dcn31_apg.o \
dcn31_afmt.o dcn31_vpg.o
-ifdef CONFIG_X86
-CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o := -msse
-endif
-
-ifdef CONFIG_PPC64
-CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
-IS_OLD_GCC = 1
-endif
-CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -mhard-float
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -mpreferred-stack-boundary=4
-else
-CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -msse2
-endif
-endif
-
AMD_DAL_DCN31 = $(addprefix $(AMDDALPATH)/dc/dcn31/,$(DCN31))
AMD_DISPLAY_FILES += $(AMD_DAL_DCN31)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c
index de5e18c2a3ac..05aac3e444b4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c
@@ -72,40 +72,6 @@ static void apg31_disable(
REG_UPDATE(APG_CONTROL2, APG_ENABLE, 0);
}
-static union audio_cea_channels speakers_to_channels(
- struct audio_speaker_flags speaker_flags)
-{
- union audio_cea_channels cea_channels = {0};
-
- /* these are one to one */
- cea_channels.channels.FL = speaker_flags.FL_FR;
- cea_channels.channels.FR = speaker_flags.FL_FR;
- cea_channels.channels.LFE = speaker_flags.LFE;
- cea_channels.channels.FC = speaker_flags.FC;
-
- /* if Rear Left and Right exist move RC speaker to channel 7
- * otherwise to channel 5
- */
- if (speaker_flags.RL_RR) {
- cea_channels.channels.RL_RC = speaker_flags.RL_RR;
- cea_channels.channels.RR = speaker_flags.RL_RR;
- cea_channels.channels.RC_RLC_FLC = speaker_flags.RC;
- } else {
- cea_channels.channels.RL_RC = speaker_flags.RC;
- }
-
- /* FRONT Left Right Center and REAR Left Right Center are exclusive */
- if (speaker_flags.FLC_FRC) {
- cea_channels.channels.RC_RLC_FLC = speaker_flags.FLC_FRC;
- cea_channels.channels.RRC_FRC = speaker_flags.FLC_FRC;
- } else {
- cea_channels.channels.RC_RLC_FLC = speaker_flags.RLC_RRC;
- cea_channels.channels.RRC_FRC = speaker_flags.RLC_RRC;
- }
-
- return cea_channels;
-}
-
static void apg31_se_audio_setup(
struct apg *apg,
unsigned int az_inst,
@@ -113,44 +79,24 @@ static void apg31_se_audio_setup(
{
struct dcn31_apg *apg31 = DCN31_APG_FROM_APG(apg);
- uint32_t speakers = 0;
- uint32_t channels = 0;
-
ASSERT(audio_info);
/* This should not happen.it does so we don't get BSOD*/
if (audio_info == NULL)
return;
- speakers = audio_info->flags.info.ALLSPEAKERS;
- channels = speakers_to_channels(audio_info->flags.speaker_flags).all;
-
/* DisplayPort only allows for one audio stream with stream ID 0 */
REG_UPDATE(APG_CONTROL2, APG_DP_AUDIO_STREAM_ID, 0);
/* When running in "pair mode", pairs of audio channels have their own enable
* this is for really old audio drivers */
REG_UPDATE(APG_DBG_GEN_CONTROL, APG_DBG_AUDIO_CHANNEL_ENABLE, 0xFF);
- // REG_UPDATE(APG_DBG_GEN_CONTROL, APG_DBG_AUDIO_CHANNEL_ENABLE, channels);
/* Disable forced mem power off */
REG_UPDATE(APG_MEM_PWR, APG_MEM_PWR_FORCE, 0);
-
- apg31_enable(apg);
-}
-
-static void apg31_audio_mute_control(
- struct apg *apg,
- bool mute)
-{
- if (mute)
- apg31_disable(apg);
- else
- apg31_enable(apg);
}
static struct apg_funcs dcn31_apg_funcs = {
.se_audio_setup = apg31_se_audio_setup,
- .audio_mute_control = apg31_audio_mute_control,
.enable_apg = apg31_enable,
.disable_apg = apg31_disable,
};
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.h
index 24f568e120d8..1b81f6773c53 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.h
@@ -84,10 +84,6 @@ struct apg_funcs {
unsigned int az_inst,
struct audio_info *audio_info);
- void (*audio_mute_control)(
- struct apg *apg,
- bool mute);
-
void (*enable_apg)(
struct apg *apg);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c
deleted file mode 100644
index 815481a3ef54..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c
+++ /dev/null
@@ -1,660 +0,0 @@
-/*
- * Copyright 2018 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#include "reg_helper.h"
-#include "core_types.h"
-#include "dcn31_dccg.h"
-#include "dal_asic_id.h"
-
-#define TO_DCN_DCCG(dccg)\
- container_of(dccg, struct dcn_dccg, base)
-
-#define REG(reg) \
- (dccg_dcn->regs->reg)
-
-#undef FN
-#define FN(reg_name, field_name) \
- dccg_dcn->dccg_shift->field_name, dccg_dcn->dccg_mask->field_name
-
-#define CTX \
- dccg_dcn->base.ctx
-#define DC_LOGGER \
- dccg->ctx->logger
-
-static void dccg31_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk)
-{
- struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
-
- if (dccg->ref_dppclk && req_dppclk) {
- int ref_dppclk = dccg->ref_dppclk;
- int modulo, phase;
-
- // phase / modulo = dpp pipe clk / dpp global clk
- modulo = 0xff; // use FF at the end
- phase = ((modulo * req_dppclk) + ref_dppclk - 1) / ref_dppclk;
-
- if (phase > 0xff) {
- ASSERT(false);
- phase = 0xff;
- }
-
- REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0,
- DPPCLK0_DTO_PHASE, phase,
- DPPCLK0_DTO_MODULO, modulo);
- REG_UPDATE(DPPCLK_DTO_CTRL,
- DPPCLK_DTO_ENABLE[dpp_inst], 1);
- } else {
- //DTO must be enabled to generate a 0Hz clock output
- if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) {
- REG_UPDATE(DPPCLK_DTO_CTRL,
- DPPCLK_DTO_ENABLE[dpp_inst], 1);
- REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0,
- DPPCLK0_DTO_PHASE, 0,
- DPPCLK0_DTO_MODULO, 1);
- } else {
- REG_UPDATE(DPPCLK_DTO_CTRL,
- DPPCLK_DTO_ENABLE[dpp_inst], 0);
- }
- }
- dccg->pipe_dppclk_khz[dpp_inst] = req_dppclk;
-}
-
-static enum phyd32clk_clock_source get_phy_mux_symclk(
- struct dcn_dccg *dccg_dcn,
- enum phyd32clk_clock_source src)
-{
- if (dccg_dcn->base.ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0) {
- if (src == PHYD32CLKC)
- src = PHYD32CLKF;
- if (src == PHYD32CLKD)
- src = PHYD32CLKG;
- }
- return src;
-}
-
-static void dccg31_enable_dpstreamclk(struct dccg *dccg, int otg_inst)
-{
- struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
-
- /* enabled to select one of the DTBCLKs for pipe */
- switch (otg_inst) {
- case 0:
- REG_UPDATE(DPSTREAMCLK_CNTL,
- DPSTREAMCLK_PIPE0_EN, 1);
- break;
- case 1:
- REG_UPDATE(DPSTREAMCLK_CNTL,
- DPSTREAMCLK_PIPE1_EN, 1);
- break;
- case 2:
- REG_UPDATE(DPSTREAMCLK_CNTL,
- DPSTREAMCLK_PIPE2_EN, 1);
- break;
- case 3:
- REG_UPDATE(DPSTREAMCLK_CNTL,
- DPSTREAMCLK_PIPE3_EN, 1);
- break;
- default:
- BREAK_TO_DEBUGGER();
- return;
- }
- if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
- DPSTREAMCLK_ROOT_GATE_DISABLE, 1);
-}
-
-static void dccg31_disable_dpstreamclk(struct dccg *dccg, int otg_inst)
-{
- struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
-
- if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
- DPSTREAMCLK_ROOT_GATE_DISABLE, 0);
-
- switch (otg_inst) {
- case 0:
- REG_UPDATE(DPSTREAMCLK_CNTL,
- DPSTREAMCLK_PIPE0_EN, 0);
- break;
- case 1:
- REG_UPDATE(DPSTREAMCLK_CNTL,
- DPSTREAMCLK_PIPE1_EN, 0);
- break;
- case 2:
- REG_UPDATE(DPSTREAMCLK_CNTL,
- DPSTREAMCLK_PIPE2_EN, 0);
- break;
- case 3:
- REG_UPDATE(DPSTREAMCLK_CNTL,
- DPSTREAMCLK_PIPE3_EN, 0);
- break;
- default:
- BREAK_TO_DEBUGGER();
- return;
- }
-}
-
-void dccg31_set_dpstreamclk(
- struct dccg *dccg,
- enum hdmistreamclk_source src,
- int otg_inst)
-{
- if (src == REFCLK)
- dccg31_disable_dpstreamclk(dccg, otg_inst);
- else
- dccg31_enable_dpstreamclk(dccg, otg_inst);
-}
-
-void dccg31_enable_symclk32_se(
- struct dccg *dccg,
- int hpo_se_inst,
- enum phyd32clk_clock_source phyd32clk)
-{
- struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
-
- phyd32clk = get_phy_mux_symclk(dccg_dcn, phyd32clk);
-
- /* select one of the PHYD32CLKs as the source for symclk32_se */
- switch (hpo_se_inst) {
- case 0:
- if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
- SYMCLK32_ROOT_SE0_GATE_DISABLE, 1);
- REG_UPDATE_2(SYMCLK32_SE_CNTL,
- SYMCLK32_SE0_SRC_SEL, phyd32clk,
- SYMCLK32_SE0_EN, 1);
- break;
- case 1:
- if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
- SYMCLK32_ROOT_SE1_GATE_DISABLE, 1);
- REG_UPDATE_2(SYMCLK32_SE_CNTL,
- SYMCLK32_SE1_SRC_SEL, phyd32clk,
- SYMCLK32_SE1_EN, 1);
- break;
- case 2:
- if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
- SYMCLK32_ROOT_SE2_GATE_DISABLE, 1);
- REG_UPDATE_2(SYMCLK32_SE_CNTL,
- SYMCLK32_SE2_SRC_SEL, phyd32clk,
- SYMCLK32_SE2_EN, 1);
- break;
- case 3:
- if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
- SYMCLK32_ROOT_SE3_GATE_DISABLE, 1);
- REG_UPDATE_2(SYMCLK32_SE_CNTL,
- SYMCLK32_SE3_SRC_SEL, phyd32clk,
- SYMCLK32_SE3_EN, 1);
- break;
- default:
- BREAK_TO_DEBUGGER();
- return;
- }
-}
-
-void dccg31_disable_symclk32_se(
- struct dccg *dccg,
- int hpo_se_inst)
-{
- struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
-
- /* set refclk as the source for symclk32_se */
- switch (hpo_se_inst) {
- case 0:
- REG_UPDATE_2(SYMCLK32_SE_CNTL,
- SYMCLK32_SE0_SRC_SEL, 0,
- SYMCLK32_SE0_EN, 0);
- if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
- SYMCLK32_ROOT_SE0_GATE_DISABLE, 0);
- break;
- case 1:
- REG_UPDATE_2(SYMCLK32_SE_CNTL,
- SYMCLK32_SE1_SRC_SEL, 0,
- SYMCLK32_SE1_EN, 0);
- if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
- SYMCLK32_ROOT_SE1_GATE_DISABLE, 0);
- break;
- case 2:
- REG_UPDATE_2(SYMCLK32_SE_CNTL,
- SYMCLK32_SE2_SRC_SEL, 0,
- SYMCLK32_SE2_EN, 0);
- if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
- SYMCLK32_ROOT_SE2_GATE_DISABLE, 0);
- break;
- case 3:
- REG_UPDATE_2(SYMCLK32_SE_CNTL,
- SYMCLK32_SE3_SRC_SEL, 0,
- SYMCLK32_SE3_EN, 0);
- if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
- SYMCLK32_ROOT_SE3_GATE_DISABLE, 0);
- break;
- default:
- BREAK_TO_DEBUGGER();
- return;
- }
-}
-
-void dccg31_enable_symclk32_le(
- struct dccg *dccg,
- int hpo_le_inst,
- enum phyd32clk_clock_source phyd32clk)
-{
- struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
-
- phyd32clk = get_phy_mux_symclk(dccg_dcn, phyd32clk);
-
- /* select one of the PHYD32CLKs as the source for symclk32_le */
- switch (hpo_le_inst) {
- case 0:
- if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
- SYMCLK32_ROOT_LE0_GATE_DISABLE, 1);
- REG_UPDATE_2(SYMCLK32_LE_CNTL,
- SYMCLK32_LE0_SRC_SEL, phyd32clk,
- SYMCLK32_LE0_EN, 1);
- break;
- case 1:
- if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
- SYMCLK32_ROOT_LE1_GATE_DISABLE, 1);
- REG_UPDATE_2(SYMCLK32_LE_CNTL,
- SYMCLK32_LE1_SRC_SEL, phyd32clk,
- SYMCLK32_LE1_EN, 1);
- break;
- default:
- BREAK_TO_DEBUGGER();
- return;
- }
-}
-
-void dccg31_disable_symclk32_le(
- struct dccg *dccg,
- int hpo_le_inst)
-{
- struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
-
- /* set refclk as the source for symclk32_le */
- switch (hpo_le_inst) {
- case 0:
- REG_UPDATE_2(SYMCLK32_LE_CNTL,
- SYMCLK32_LE0_SRC_SEL, 0,
- SYMCLK32_LE0_EN, 0);
- if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
- SYMCLK32_ROOT_LE0_GATE_DISABLE, 0);
- break;
- case 1:
- REG_UPDATE_2(SYMCLK32_LE_CNTL,
- SYMCLK32_LE1_SRC_SEL, 0,
- SYMCLK32_LE1_EN, 0);
- if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
- SYMCLK32_ROOT_LE1_GATE_DISABLE, 0);
- break;
- default:
- BREAK_TO_DEBUGGER();
- return;
- }
-}
-
-static void dccg31_disable_dscclk(struct dccg *dccg, int inst)
-{
- struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
-
- if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dsc)
- return;
- //DTO must be enabled to generate a 0 Hz clock output
- switch (inst) {
- case 0:
- REG_UPDATE(DSCCLK_DTO_CTRL,
- DSCCLK0_DTO_ENABLE, 1);
- REG_UPDATE_2(DSCCLK0_DTO_PARAM,
- DSCCLK0_DTO_PHASE, 0,
- DSCCLK0_DTO_MODULO, 1);
- break;
- case 1:
- REG_UPDATE(DSCCLK_DTO_CTRL,
- DSCCLK1_DTO_ENABLE, 1);
- REG_UPDATE_2(DSCCLK1_DTO_PARAM,
- DSCCLK1_DTO_PHASE, 0,
- DSCCLK1_DTO_MODULO, 1);
- break;
- case 2:
- REG_UPDATE(DSCCLK_DTO_CTRL,
- DSCCLK2_DTO_ENABLE, 1);
- REG_UPDATE_2(DSCCLK2_DTO_PARAM,
- DSCCLK2_DTO_PHASE, 0,
- DSCCLK2_DTO_MODULO, 1);
- break;
- default:
- BREAK_TO_DEBUGGER();
- return;
- }
-}
-
-static void dccg31_enable_dscclk(struct dccg *dccg, int inst)
-{
- struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
-
- if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dsc)
- return;
- //Disable DTO
- switch (inst) {
- case 0:
- REG_UPDATE_2(DSCCLK0_DTO_PARAM,
- DSCCLK0_DTO_PHASE, 0,
- DSCCLK0_DTO_MODULO, 0);
- REG_UPDATE(DSCCLK_DTO_CTRL,
- DSCCLK0_DTO_ENABLE, 0);
- break;
- case 1:
- REG_UPDATE_2(DSCCLK1_DTO_PARAM,
- DSCCLK1_DTO_PHASE, 0,
- DSCCLK1_DTO_MODULO, 0);
- REG_UPDATE(DSCCLK_DTO_CTRL,
- DSCCLK1_DTO_ENABLE, 0);
- break;
- case 2:
- REG_UPDATE_2(DSCCLK2_DTO_PARAM,
- DSCCLK2_DTO_PHASE, 0,
- DSCCLK2_DTO_MODULO, 0);
- REG_UPDATE(DSCCLK_DTO_CTRL,
- DSCCLK2_DTO_ENABLE, 0);
- break;
- default:
- BREAK_TO_DEBUGGER();
- return;
- }
-}
-
-void dccg31_set_physymclk(
- struct dccg *dccg,
- int phy_inst,
- enum physymclk_clock_source clk_src,
- bool force_enable)
-{
- struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
-
- /* Force PHYSYMCLK on and Select phyd32clk as the source of clock which is output to PHY through DCIO */
- switch (phy_inst) {
- case 0:
- if (force_enable)
- REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL,
- PHYASYMCLK_FORCE_EN, 1,
- PHYASYMCLK_FORCE_SRC_SEL, clk_src);
- else
- REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL,
- PHYASYMCLK_FORCE_EN, 0,
- PHYASYMCLK_FORCE_SRC_SEL, 0);
- break;
- case 1:
- if (force_enable)
- REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL,
- PHYBSYMCLK_FORCE_EN, 1,
- PHYBSYMCLK_FORCE_SRC_SEL, clk_src);
- else
- REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL,
- PHYBSYMCLK_FORCE_EN, 0,
- PHYBSYMCLK_FORCE_SRC_SEL, 0);
- break;
- case 2:
- if (force_enable)
- REG_UPDATE_2(PHYCSYMCLK_CLOCK_CNTL,
- PHYCSYMCLK_FORCE_EN, 1,
- PHYCSYMCLK_FORCE_SRC_SEL, clk_src);
- else
- REG_UPDATE_2(PHYCSYMCLK_CLOCK_CNTL,
- PHYCSYMCLK_FORCE_EN, 0,
- PHYCSYMCLK_FORCE_SRC_SEL, 0);
- break;
- case 3:
- if (force_enable)
- REG_UPDATE_2(PHYDSYMCLK_CLOCK_CNTL,
- PHYDSYMCLK_FORCE_EN, 1,
- PHYDSYMCLK_FORCE_SRC_SEL, clk_src);
- else
- REG_UPDATE_2(PHYDSYMCLK_CLOCK_CNTL,
- PHYDSYMCLK_FORCE_EN, 0,
- PHYDSYMCLK_FORCE_SRC_SEL, 0);
- break;
- case 4:
- if (force_enable)
- REG_UPDATE_2(PHYESYMCLK_CLOCK_CNTL,
- PHYESYMCLK_FORCE_EN, 1,
- PHYESYMCLK_FORCE_SRC_SEL, clk_src);
- else
- REG_UPDATE_2(PHYESYMCLK_CLOCK_CNTL,
- PHYESYMCLK_FORCE_EN, 0,
- PHYESYMCLK_FORCE_SRC_SEL, 0);
- break;
- default:
- BREAK_TO_DEBUGGER();
- return;
- }
-}
-
-/* Controls the generation of pixel valid for OTG in (OTG -> HPO case) */
-void dccg31_set_dtbclk_dto(
- struct dccg *dccg,
- int dtbclk_inst,
- int req_dtbclk_khz,
- int num_odm_segments,
- const struct dc_crtc_timing *timing)
-{
- struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
- uint32_t dtbdto_div;
-
- /* Mode DTBDTO Rate DTBCLK_DTO<x>_DIV Register
- * ODM 4:1 combine pixel rate/4 2
- * ODM 2:1 combine pixel rate/2 4
- * non-DSC 4:2:0 mode pixel rate/2 4
- * DSC native 4:2:0 pixel rate/2 4
- * DSC native 4:2:2 pixel rate/2 4
- * Other modes pixel rate 8
- */
- if (num_odm_segments == 4) {
- dtbdto_div = 2;
- req_dtbclk_khz = req_dtbclk_khz / 4;
- } else if ((num_odm_segments == 2) ||
- (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) ||
- (timing->flags.DSC && timing->pixel_encoding == PIXEL_ENCODING_YCBCR422
- && !timing->dsc_cfg.ycbcr422_simple)) {
- dtbdto_div = 4;
- req_dtbclk_khz = req_dtbclk_khz / 2;
- } else
- dtbdto_div = 8;
-
- if (dccg->ref_dtbclk_khz && req_dtbclk_khz) {
- uint32_t modulo, phase;
-
- // phase / modulo = dtbclk / dtbclk ref
- modulo = dccg->ref_dtbclk_khz * 1000;
- phase = div_u64((((unsigned long long)modulo * req_dtbclk_khz) + dccg->ref_dtbclk_khz - 1),
- dccg->ref_dtbclk_khz);
-
- REG_UPDATE(OTG_PIXEL_RATE_CNTL[dtbclk_inst],
- DTBCLK_DTO_DIV[dtbclk_inst], dtbdto_div);
-
- REG_WRITE(DTBCLK_DTO_MODULO[dtbclk_inst], modulo);
- REG_WRITE(DTBCLK_DTO_PHASE[dtbclk_inst], phase);
-
- REG_UPDATE(OTG_PIXEL_RATE_CNTL[dtbclk_inst],
- DTBCLK_DTO_ENABLE[dtbclk_inst], 1);
-
- REG_WAIT(OTG_PIXEL_RATE_CNTL[dtbclk_inst],
- DTBCLKDTO_ENABLE_STATUS[dtbclk_inst], 1,
- 1, 100);
-
- /* The recommended programming sequence to enable DTBCLK DTO to generate
- * valid pixel HPO DPSTREAM ENCODER, specifies that DTO source select should
- * be set only after DTO is enabled
- */
- REG_UPDATE(OTG_PIXEL_RATE_CNTL[dtbclk_inst],
- PIPE_DTO_SRC_SEL[dtbclk_inst], 1);
-
- dccg->dtbclk_khz[dtbclk_inst] = req_dtbclk_khz;
- } else {
- REG_UPDATE_3(OTG_PIXEL_RATE_CNTL[dtbclk_inst],
- DTBCLK_DTO_ENABLE[dtbclk_inst], 0,
- PIPE_DTO_SRC_SEL[dtbclk_inst], 0,
- DTBCLK_DTO_DIV[dtbclk_inst], dtbdto_div);
-
- REG_WRITE(DTBCLK_DTO_MODULO[dtbclk_inst], 0);
- REG_WRITE(DTBCLK_DTO_PHASE[dtbclk_inst], 0);
-
- dccg->dtbclk_khz[dtbclk_inst] = 0;
- }
-}
-
-void dccg31_set_audio_dtbclk_dto(
- struct dccg *dccg,
- uint32_t req_audio_dtbclk_khz)
-{
- struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
-
- if (dccg->ref_dtbclk_khz && req_audio_dtbclk_khz) {
- uint32_t modulo, phase;
-
- // phase / modulo = dtbclk / dtbclk ref
- modulo = dccg->ref_dtbclk_khz * 1000;
- phase = div_u64((((unsigned long long)modulo * req_audio_dtbclk_khz) + dccg->ref_dtbclk_khz - 1),
- dccg->ref_dtbclk_khz);
-
-
- REG_WRITE(DCCG_AUDIO_DTBCLK_DTO_MODULO, modulo);
- REG_WRITE(DCCG_AUDIO_DTBCLK_DTO_PHASE, phase);
-
- //REG_UPDATE(DCCG_AUDIO_DTO_SOURCE,
- // DCCG_AUDIO_DTBCLK_DTO_USE_512FBR_DTO, 1);
-
- REG_UPDATE(DCCG_AUDIO_DTO_SOURCE,
- DCCG_AUDIO_DTO_SEL, 4); // 04 - DCCG_AUDIO_DTO_SEL_AUDIO_DTO_DTBCLK
-
- dccg->audio_dtbclk_khz = req_audio_dtbclk_khz;
- } else {
- REG_WRITE(DCCG_AUDIO_DTBCLK_DTO_PHASE, 0);
- REG_WRITE(DCCG_AUDIO_DTBCLK_DTO_MODULO, 0);
-
- REG_UPDATE(DCCG_AUDIO_DTO_SOURCE,
- DCCG_AUDIO_DTO_SEL, 3); // 03 - DCCG_AUDIO_DTO_SEL_NO_AUDIO_DTO
-
- dccg->audio_dtbclk_khz = 0;
- }
-}
-
-static void dccg31_get_dccg_ref_freq(struct dccg *dccg,
- unsigned int xtalin_freq_inKhz,
- unsigned int *dccg_ref_freq_inKhz)
-{
- /*
- * Assume refclk is sourced from xtalin
- * expect 24MHz
- */
- *dccg_ref_freq_inKhz = xtalin_freq_inKhz;
- return;
-}
-
-static void dccg31_set_dispclk_change_mode(
- struct dccg *dccg,
- enum dentist_dispclk_change_mode change_mode)
-{
- struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
-
- REG_UPDATE(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_MODE,
- change_mode == DISPCLK_CHANGE_MODE_RAMPING ? 2 : 0);
-}
-
-void dccg31_init(struct dccg *dccg)
-{
- /* Set HPO stream encoder to use refclk to avoid case where PHY is
- * disabled and SYMCLK32 for HPO SE is sourced from PHYD32CLK which
- * will cause DCN to hang.
- */
- dccg31_disable_symclk32_se(dccg, 0);
- dccg31_disable_symclk32_se(dccg, 1);
- dccg31_disable_symclk32_se(dccg, 2);
- dccg31_disable_symclk32_se(dccg, 3);
-
- if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le) {
- dccg31_disable_symclk32_le(dccg, 0);
- dccg31_disable_symclk32_le(dccg, 1);
- }
-
- if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) {
- dccg31_disable_dpstreamclk(dccg, 0);
- dccg31_disable_dpstreamclk(dccg, 1);
- dccg31_disable_dpstreamclk(dccg, 2);
- dccg31_disable_dpstreamclk(dccg, 3);
- }
-
-}
-
-static const struct dccg_funcs dccg31_funcs = {
- .update_dpp_dto = dccg31_update_dpp_dto,
- .get_dccg_ref_freq = dccg31_get_dccg_ref_freq,
- .dccg_init = dccg31_init,
- .set_dpstreamclk = dccg31_set_dpstreamclk,
- .enable_symclk32_se = dccg31_enable_symclk32_se,
- .disable_symclk32_se = dccg31_disable_symclk32_se,
- .enable_symclk32_le = dccg31_enable_symclk32_le,
- .disable_symclk32_le = dccg31_disable_symclk32_le,
- .set_physymclk = dccg31_set_physymclk,
- .set_dtbclk_dto = dccg31_set_dtbclk_dto,
- .set_audio_dtbclk_dto = dccg31_set_audio_dtbclk_dto,
- .set_dispclk_change_mode = dccg31_set_dispclk_change_mode,
- .disable_dsc = dccg31_disable_dscclk,
- .enable_dsc = dccg31_enable_dscclk,
-};
-
-struct dccg *dccg31_create(
- struct dc_context *ctx,
- const struct dccg_registers *regs,
- const struct dccg_shift *dccg_shift,
- const struct dccg_mask *dccg_mask)
-{
- struct dcn_dccg *dccg_dcn = kzalloc(sizeof(*dccg_dcn), GFP_KERNEL);
- struct dccg *base;
-
- if (dccg_dcn == NULL) {
- BREAK_TO_DEBUGGER();
- return NULL;
- }
-
- base = &dccg_dcn->base;
- base->ctx = ctx;
- base->funcs = &dccg31_funcs;
-
- dccg_dcn->regs = regs;
- dccg_dcn->dccg_shift = dccg_shift;
- dccg_dcn->dccg_mask = dccg_mask;
-
- return &dccg_dcn->base;
-}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c
deleted file mode 100644
index 4d4ac4ceb1e8..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c
+++ /dev/null
@@ -1,619 +0,0 @@
-/*
- * Copyright 2016 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-
-#include "dm_services.h"
-#include "dm_helpers.h"
-#include "core_types.h"
-#include "resource.h"
-#include "dccg.h"
-#include "dce/dce_hwseq.h"
-#include "clk_mgr.h"
-#include "reg_helper.h"
-#include "abm.h"
-#include "hubp.h"
-#include "dchubbub.h"
-#include "timing_generator.h"
-#include "opp.h"
-#include "ipp.h"
-#include "mpc.h"
-#include "mcif_wb.h"
-#include "dc_dmub_srv.h"
-#include "dcn31_hwseq.h"
-#include "link_hwss.h"
-#include "dpcd_defs.h"
-#include "dce/dmub_outbox.h"
-#include "dc_link_dp.h"
-#include "inc/link_dpcd.h"
-#include "dcn10/dcn10_hw_sequencer.h"
-#include "inc/link_enc_cfg.h"
-#include "dcn30/dcn30_vpg.h"
-#include "dce/dce_i2c_hw.h"
-
-#define DC_LOGGER_INIT(logger)
-
-#define CTX \
- hws->ctx
-#define REG(reg)\
- hws->regs->reg
-#define DC_LOGGER \
- dc->ctx->logger
-
-
-#undef FN
-#define FN(reg_name, field_name) \
- hws->shifts->field_name, hws->masks->field_name
-
-static void enable_memory_low_power(struct dc *dc)
-{
- struct dce_hwseq *hws = dc->hwseq;
- int i;
-
- if (dc->debug.enable_mem_low_power.bits.dmcu) {
- // Force ERAM to shutdown if DMCU is not enabled
- if (dc->debug.disable_dmcu || dc->config.disable_dmcu) {
- REG_UPDATE(DMU_MEM_PWR_CNTL, DMCU_ERAM_MEM_PWR_FORCE, 3);
- }
- }
-
- // Set default OPTC memory power states
- if (dc->debug.enable_mem_low_power.bits.optc) {
- // Shutdown when unassigned and light sleep in VBLANK
- REG_SET_2(ODM_MEM_PWR_CTRL3, 0, ODM_MEM_UNASSIGNED_PWR_MODE, 3, ODM_MEM_VBLANK_PWR_MODE, 1);
- }
-
- if (dc->debug.enable_mem_low_power.bits.vga) {
- // Power down VGA memory
- REG_UPDATE(MMHUBBUB_MEM_PWR_CNTL, VGA_MEM_PWR_FORCE, 1);
- }
-
- if (dc->debug.enable_mem_low_power.bits.mpc)
- dc->res_pool->mpc->funcs->set_mpc_mem_lp_mode(dc->res_pool->mpc);
-
-
- if (dc->debug.enable_mem_low_power.bits.vpg && dc->res_pool->stream_enc[0]->vpg->funcs->vpg_powerdown) {
- // Power down VPGs
- for (i = 0; i < dc->res_pool->stream_enc_count; i++)
- dc->res_pool->stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->stream_enc[i]->vpg);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- for (i = 0; i < dc->res_pool->hpo_dp_stream_enc_count; i++)
- dc->res_pool->hpo_dp_stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->hpo_dp_stream_enc[i]->vpg);
-#endif
- }
-
-}
-
-void dcn31_init_hw(struct dc *dc)
-{
- struct abm **abms = dc->res_pool->multiple_abms;
- struct dce_hwseq *hws = dc->hwseq;
- struct dc_bios *dcb = dc->ctx->dc_bios;
- struct resource_pool *res_pool = dc->res_pool;
- uint32_t backlight = MAX_BACKLIGHT_LEVEL;
- int i, j;
-
- if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks)
- dc->clk_mgr->funcs->init_clocks(dc->clk_mgr);
-
- if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
-
- REG_WRITE(REFCLK_CNTL, 0);
- REG_UPDATE(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_ENABLE, 1);
- REG_WRITE(DIO_MEM_PWR_CTRL, 0);
-
- if (!dc->debug.disable_clock_gate) {
- /* enable all DCN clock gating */
- REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0);
-
- REG_WRITE(DCCG_GATE_DISABLE_CNTL2, 0);
-
- REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0);
- }
-
- //Enable ability to power gate / don't force power on permanently
- if (hws->funcs.enable_power_gating_plane)
- hws->funcs.enable_power_gating_plane(hws, true);
-
- return;
- }
-
- if (!dcb->funcs->is_accelerated_mode(dcb)) {
- hws->funcs.bios_golden_init(dc);
- hws->funcs.disable_vga(dc->hwseq);
- }
- // Initialize the dccg
- if (res_pool->dccg->funcs->dccg_init)
- res_pool->dccg->funcs->dccg_init(res_pool->dccg);
-
- enable_memory_low_power(dc);
-
- if (dc->ctx->dc_bios->fw_info_valid) {
- res_pool->ref_clocks.xtalin_clock_inKhz =
- dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency;
-
- if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
- if (res_pool->dccg && res_pool->hubbub) {
-
- (res_pool->dccg->funcs->get_dccg_ref_freq)(res_pool->dccg,
- dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency,
- &res_pool->ref_clocks.dccg_ref_clock_inKhz);
-
- (res_pool->hubbub->funcs->get_dchub_ref_freq)(res_pool->hubbub,
- res_pool->ref_clocks.dccg_ref_clock_inKhz,
- &res_pool->ref_clocks.dchub_ref_clock_inKhz);
- } else {
- // Not all ASICs have DCCG sw component
- res_pool->ref_clocks.dccg_ref_clock_inKhz =
- res_pool->ref_clocks.xtalin_clock_inKhz;
- res_pool->ref_clocks.dchub_ref_clock_inKhz =
- res_pool->ref_clocks.xtalin_clock_inKhz;
- }
- }
- } else
- ASSERT_CRITICAL(false);
-
- for (i = 0; i < dc->link_count; i++) {
- /* Power up AND update implementation according to the
- * required signal (which may be different from the
- * default signal on connector).
- */
- struct dc_link *link = dc->links[i];
-
- if (link->ep_type != DISPLAY_ENDPOINT_PHY)
- continue;
-
- link->link_enc->funcs->hw_init(link->link_enc);
-
- /* Check for enabled DIG to identify enabled display */
- if (link->link_enc->funcs->is_dig_enabled &&
- link->link_enc->funcs->is_dig_enabled(link->link_enc))
- link->link_status.link_active = true;
- }
-
- /* Power gate DSCs */
- for (i = 0; i < res_pool->res_cap->num_dsc; i++)
- if (hws->funcs.dsc_pg_control != NULL)
- hws->funcs.dsc_pg_control(hws, res_pool->dscs[i]->inst, false);
-
- /* Enables outbox notifications for usb4 dpia */
- if (dc->res_pool->usb4_dpia_count)
- dmub_enable_outbox_notification(dc);
-
- /* we want to turn off all dp displays before doing detection */
- if (dc->config.power_down_display_on_boot) {
- uint8_t dpcd_power_state = '\0';
- enum dc_status status = DC_ERROR_UNEXPECTED;
-
- for (i = 0; i < dc->link_count; i++) {
- if (dc->links[i]->connector_signal != SIGNAL_TYPE_DISPLAY_PORT)
- continue;
-
- /* if any of the displays are lit up turn them off */
- status = core_link_read_dpcd(dc->links[i], DP_SET_POWER,
- &dpcd_power_state, sizeof(dpcd_power_state));
- if (status == DC_OK && dpcd_power_state == DP_POWER_STATE_D0) {
- /* blank dp stream before power off receiver*/
- if (dc->links[i]->ep_type == DISPLAY_ENDPOINT_PHY &&
- dc->links[i]->link_enc->funcs->get_dig_frontend) {
- unsigned int fe;
-
- fe = dc->links[i]->link_enc->funcs->get_dig_frontend(
- dc->links[i]->link_enc);
- if (fe == ENGINE_ID_UNKNOWN)
- continue;
-
- for (j = 0; j < dc->res_pool->stream_enc_count; j++) {
- if (fe == dc->res_pool->stream_enc[j]->id) {
- dc->res_pool->stream_enc[j]->funcs->dp_blank(dc->links[i],
- dc->res_pool->stream_enc[j]);
- break;
- }
- }
- }
- dp_receiver_power_ctrl(dc->links[i], false);
- }
- }
- }
-
- /* If taking control over from VBIOS, we may want to optimize our first
- * mode set, so we need to skip powering down pipes until we know which
- * pipes we want to use.
- * Otherwise, if taking control is not possible, we need to power
- * everything down.
- */
- if (dcb->funcs->is_accelerated_mode(dcb) || dc->config.power_down_display_on_boot) {
- hws->funcs.init_pipes(dc, dc->current_state);
- if (dc->res_pool->hubbub->funcs->allow_self_refresh_control)
- dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub,
- !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter);
- }
-
- for (i = 0; i < res_pool->audio_count; i++) {
- struct audio *audio = res_pool->audios[i];
-
- audio->funcs->hw_init(audio);
- }
-
- for (i = 0; i < dc->link_count; i++) {
- struct dc_link *link = dc->links[i];
-
- if (link->panel_cntl)
- backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
- }
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- if (abms[i] != NULL)
- abms[i]->funcs->abm_init(abms[i], backlight);
- }
-
- /* power AFMT HDMI memory TODO: may move to dis/en output save power*/
- REG_WRITE(DIO_MEM_PWR_CTRL, 0);
-
- // Set i2c to light sleep until engine is setup
- if (dc->debug.enable_mem_low_power.bits.i2c)
- REG_UPDATE(DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, 1);
-
- if (hws->funcs.setup_hpo_hw_control)
- hws->funcs.setup_hpo_hw_control(hws, false);
-
- if (!dc->debug.disable_clock_gate) {
- /* enable all DCN clock gating */
- REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0);
-
- REG_WRITE(DCCG_GATE_DISABLE_CNTL2, 0);
-
- REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0);
- }
- if (hws->funcs.enable_power_gating_plane)
- hws->funcs.enable_power_gating_plane(dc->hwseq, true);
-
- if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks)
- dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub);
-
- if (dc->clk_mgr->funcs->notify_wm_ranges)
- dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr);
-
- if (dc->clk_mgr->funcs->set_hard_max_memclk)
- dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr);
-
- if (dc->res_pool->hubbub->funcs->force_pstate_change_control)
- dc->res_pool->hubbub->funcs->force_pstate_change_control(
- dc->res_pool->hubbub, false, false);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (dc->res_pool->hubbub->funcs->init_crb)
- dc->res_pool->hubbub->funcs->init_crb(dc->res_pool->hubbub);
-#endif
-}
-
-void dcn31_dsc_pg_control(
- struct dce_hwseq *hws,
- unsigned int dsc_inst,
- bool power_on)
-{
- uint32_t power_gate = power_on ? 0 : 1;
- uint32_t pwr_status = power_on ? 0 : 2;
- uint32_t org_ip_request_cntl = 0;
-
- if (hws->ctx->dc->debug.disable_dsc_power_gate)
- return;
-
- if (hws->ctx->dc->debug.root_clock_optimization.bits.dsc &&
- hws->ctx->dc->res_pool->dccg->funcs->enable_dsc &&
- power_on)
- hws->ctx->dc->res_pool->dccg->funcs->enable_dsc(
- hws->ctx->dc->res_pool->dccg, dsc_inst);
-
- REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl);
- if (org_ip_request_cntl == 0)
- REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1);
-
- switch (dsc_inst) {
- case 0: /* DSC0 */
- REG_UPDATE(DOMAIN16_PG_CONFIG,
- DOMAIN_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN16_PG_STATUS,
- DOMAIN_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- case 1: /* DSC1 */
- REG_UPDATE(DOMAIN17_PG_CONFIG,
- DOMAIN_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN17_PG_STATUS,
- DOMAIN_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- case 2: /* DSC2 */
- REG_UPDATE(DOMAIN18_PG_CONFIG,
- DOMAIN_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN18_PG_STATUS,
- DOMAIN_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- default:
- BREAK_TO_DEBUGGER();
- break;
- }
-
- if (org_ip_request_cntl == 0)
- REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0);
-
- if (hws->ctx->dc->debug.root_clock_optimization.bits.dsc) {
- if (hws->ctx->dc->res_pool->dccg->funcs->disable_dsc && !power_on)
- hws->ctx->dc->res_pool->dccg->funcs->disable_dsc(
- hws->ctx->dc->res_pool->dccg, dsc_inst);
- }
-
-}
-
-
-void dcn31_enable_power_gating_plane(
- struct dce_hwseq *hws,
- bool enable)
-{
- bool force_on = true; /* disable power gating */
-
- if (enable)
- force_on = false;
-
- /* DCHUBP0/1/2/3/4/5 */
- REG_UPDATE(DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on);
- REG_UPDATE(DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on);
-
- /* DPP0/1/2/3/4/5 */
- REG_UPDATE(DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on);
- REG_UPDATE(DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on);
-
- /* DCS0/1/2/3/4/5 */
- REG_UPDATE(DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on);
- REG_UPDATE(DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on);
- REG_UPDATE(DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on);
-}
-
-void dcn31_update_info_frame(struct pipe_ctx *pipe_ctx)
-{
- bool is_hdmi_tmds;
- bool is_dp;
-
- ASSERT(pipe_ctx->stream);
-
- if (pipe_ctx->stream_res.stream_enc == NULL)
- return; /* this is not root pipe */
-
- is_hdmi_tmds = dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal);
- is_dp = dc_is_dp_signal(pipe_ctx->stream->signal);
-
- if (!is_hdmi_tmds && !is_dp)
- return;
-
- if (is_hdmi_tmds)
- pipe_ctx->stream_res.stream_enc->funcs->update_hdmi_info_packets(
- pipe_ctx->stream_res.stream_enc,
- &pipe_ctx->stream_res.encoder_info_frame);
- else {
- pipe_ctx->stream_res.stream_enc->funcs->update_dp_info_packets(
- pipe_ctx->stream_res.stream_enc,
- &pipe_ctx->stream_res.encoder_info_frame);
- }
-}
-void dcn31_z10_save_init(struct dc *dc)
-{
- union dmub_rb_cmd cmd;
-
- memset(&cmd, 0, sizeof(cmd));
- cmd.dcn_restore.header.type = DMUB_CMD__IDLE_OPT;
- cmd.dcn_restore.header.sub_type = DMUB_CMD__IDLE_OPT_DCN_SAVE_INIT;
-
- dc_dmub_srv_cmd_queue(dc->ctx->dmub_srv, &cmd);
- dc_dmub_srv_cmd_execute(dc->ctx->dmub_srv);
- dc_dmub_srv_wait_idle(dc->ctx->dmub_srv);
-}
-
-void dcn31_z10_restore(const struct dc *dc)
-{
- union dmub_rb_cmd cmd;
-
- /*
- * DMUB notifies whether restore is required.
- * Optimization to avoid sending commands when not required.
- */
- if (!dc_dmub_srv_is_restore_required(dc->ctx->dmub_srv))
- return;
-
- memset(&cmd, 0, sizeof(cmd));
- cmd.dcn_restore.header.type = DMUB_CMD__IDLE_OPT;
- cmd.dcn_restore.header.sub_type = DMUB_CMD__IDLE_OPT_DCN_RESTORE;
-
- dc_dmub_srv_cmd_queue(dc->ctx->dmub_srv, &cmd);
- dc_dmub_srv_cmd_execute(dc->ctx->dmub_srv);
- dc_dmub_srv_wait_idle(dc->ctx->dmub_srv);
-}
-
-void dcn31_hubp_pg_control(struct dce_hwseq *hws, unsigned int hubp_inst, bool power_on)
-{
- uint32_t power_gate = power_on ? 0 : 1;
- uint32_t pwr_status = power_on ? 0 : 2;
-
- if (hws->ctx->dc->debug.disable_hubp_power_gate)
- return;
-
- if (REG(DOMAIN0_PG_CONFIG) == 0)
- return;
-
- switch (hubp_inst) {
- case 0:
- REG_SET(DOMAIN0_PG_CONFIG, 0, DOMAIN_POWER_GATE, power_gate);
- REG_WAIT(DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 1000);
- break;
- case 1:
- REG_SET(DOMAIN1_PG_CONFIG, 0, DOMAIN_POWER_GATE, power_gate);
- REG_WAIT(DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 1000);
- break;
- case 2:
- REG_SET(DOMAIN2_PG_CONFIG, 0, DOMAIN_POWER_GATE, power_gate);
- REG_WAIT(DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 1000);
- break;
- case 3:
- REG_SET(DOMAIN3_PG_CONFIG, 0, DOMAIN_POWER_GATE, power_gate);
- REG_WAIT(DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 1000);
- break;
- default:
- BREAK_TO_DEBUGGER();
- break;
- }
-}
-
-int dcn31_init_sys_ctx(struct dce_hwseq *hws, struct dc *dc, struct dc_phy_addr_space_config *pa_config)
-{
- struct dcn_hubbub_phys_addr_config config;
-
- config.system_aperture.fb_top = pa_config->system_aperture.fb_top;
- config.system_aperture.fb_offset = pa_config->system_aperture.fb_offset;
- config.system_aperture.fb_base = pa_config->system_aperture.fb_base;
- config.system_aperture.agp_top = pa_config->system_aperture.agp_top;
- config.system_aperture.agp_bot = pa_config->system_aperture.agp_bot;
- config.system_aperture.agp_base = pa_config->system_aperture.agp_base;
- config.gart_config.page_table_start_addr = pa_config->gart_config.page_table_start_addr;
- config.gart_config.page_table_end_addr = pa_config->gart_config.page_table_end_addr;
-
- if (pa_config->gart_config.base_addr_is_mc_addr) {
- /* Convert from MC address to offset into FB */
- config.gart_config.page_table_base_addr = pa_config->gart_config.page_table_base_addr -
- pa_config->system_aperture.fb_base +
- pa_config->system_aperture.fb_offset;
- } else
- config.gart_config.page_table_base_addr = pa_config->gart_config.page_table_base_addr;
-
- return dc->res_pool->hubbub->funcs->init_dchub_sys_ctx(dc->res_pool->hubbub, &config);
-}
-
-static void dcn31_reset_back_end_for_pipe(
- struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- struct dc_state *context)
-{
- struct dc_link *link;
-
- DC_LOGGER_INIT(dc->ctx->logger);
- if (pipe_ctx->stream_res.stream_enc == NULL) {
- pipe_ctx->stream = NULL;
- return;
- }
- ASSERT(!pipe_ctx->top_pipe);
-
- dc->hwss.set_abm_immediate_disable(pipe_ctx);
-
- pipe_ctx->stream_res.tg->funcs->set_dsc_config(
- pipe_ctx->stream_res.tg,
- OPTC_DSC_DISABLED, 0, 0);
- pipe_ctx->stream_res.tg->funcs->disable_crtc(pipe_ctx->stream_res.tg);
-
- pipe_ctx->stream_res.tg->funcs->enable_optc_clock(pipe_ctx->stream_res.tg, false);
- if (pipe_ctx->stream_res.tg->funcs->set_odm_bypass)
- pipe_ctx->stream_res.tg->funcs->set_odm_bypass(
- pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
-
- if (pipe_ctx->stream_res.tg->funcs->set_drr)
- pipe_ctx->stream_res.tg->funcs->set_drr(
- pipe_ctx->stream_res.tg, NULL);
-
- if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
- link = pipe_ctx->stream->link;
- /* DPMS may already disable or */
- /* dpms_off status is incorrect due to fastboot
- * feature. When system resume from S4 with second
- * screen only, the dpms_off would be true but
- * VBIOS lit up eDP, so check link status too.
- */
- if (!pipe_ctx->stream->dpms_off || link->link_status.link_active)
- core_link_disable_stream(pipe_ctx);
- else if (pipe_ctx->stream_res.audio)
- dc->hwss.disable_audio_stream(pipe_ctx);
-
- /* free acquired resources */
- if (pipe_ctx->stream_res.audio) {
- /*disable az_endpoint*/
- pipe_ctx->stream_res.audio->funcs->az_disable(pipe_ctx->stream_res.audio);
-
- /*free audio*/
- if (dc->caps.dynamic_audio == true) {
- /*we have to dynamic arbitrate the audio endpoints*/
- /*we free the resource, need reset is_audio_acquired*/
- update_audio_usage(&dc->current_state->res_ctx, dc->res_pool,
- pipe_ctx->stream_res.audio, false);
- pipe_ctx->stream_res.audio = NULL;
- }
- }
- } else if (pipe_ctx->stream_res.dsc) {
- dp_set_dsc_enable(pipe_ctx, false);
- }
-
- pipe_ctx->stream = NULL;
- DC_LOG_DEBUG("Reset back end for pipe %d, tg:%d\n",
- pipe_ctx->pipe_idx, pipe_ctx->stream_res.tg->inst);
-}
-
-void dcn31_reset_hw_ctx_wrap(
- struct dc *dc,
- struct dc_state *context)
-{
- int i;
- struct dce_hwseq *hws = dc->hwseq;
-
- /* Reset Back End*/
- for (i = dc->res_pool->pipe_count - 1; i >= 0 ; i--) {
- struct pipe_ctx *pipe_ctx_old =
- &dc->current_state->res_ctx.pipe_ctx[i];
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-
- if (!pipe_ctx_old->stream)
- continue;
-
- if (pipe_ctx_old->top_pipe || pipe_ctx_old->prev_odm_pipe)
- continue;
-
- if (!pipe_ctx->stream ||
- pipe_need_reprogram(pipe_ctx_old, pipe_ctx)) {
- struct clock_source *old_clk = pipe_ctx_old->clock_source;
-
- dcn31_reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state);
- if (hws->funcs.enable_stream_gating)
- hws->funcs.enable_stream_gating(dc, pipe_ctx_old);
- if (old_clk)
- old_clk->funcs->cs_power_down(old_clk);
- }
- }
-
- /* New dc_state in the process of being applied to hardware. */
- dc->current_state->res_ctx.link_enc_cfg_ctx.mode = LINK_ENC_CFG_TRANSIENT;
-}
-
-void dcn31_setup_hpo_hw_control(const struct dce_hwseq *hws, bool enable)
-{
- if (hws->ctx->dc->debug.hpo_optimization)
- REG_UPDATE(HPO_TOP_HW_CONTROL, HPO_IO_EN, !!enable);
-}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c
deleted file mode 100644
index a4b1d98f0007..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- * Copyright 2012-15 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#include "dcn31_optc.h"
-
-#include "dcn30/dcn30_optc.h"
-#include "reg_helper.h"
-#include "dc.h"
-#include "dcn_calc_math.h"
-
-#define REG(reg)\
- optc1->tg_regs->reg
-
-#define CTX \
- optc1->base.ctx
-
-#undef FN
-#define FN(reg_name, field_name) \
- optc1->tg_shift->field_name, optc1->tg_mask->field_name
-
-static void optc31_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt,
- struct dc_crtc_timing *timing)
-{
- struct optc *optc1 = DCN10TG_FROM_TG(optc);
- int mpcc_hactive = (timing->h_addressable + timing->h_border_left + timing->h_border_right)
- / opp_cnt;
- uint32_t memory_mask = 0;
- int mem_count_per_opp = (mpcc_hactive + 2559) / 2560;
-
- /* Assume less than 6 pipes */
- if (opp_cnt == 4) {
- if (mem_count_per_opp == 1)
- memory_mask = 0xf;
- else {
- ASSERT(mem_count_per_opp == 2);
- memory_mask = 0xff;
- }
- } else if (mem_count_per_opp == 1)
- memory_mask = 0x1 << (opp_id[0] * 2) | 0x1 << (opp_id[1] * 2);
- else if (mem_count_per_opp == 2)
- memory_mask = 0x3 << (opp_id[0] * 2) | 0x3 << (opp_id[1] * 2);
- else if (mem_count_per_opp == 3)
- memory_mask = 0x77;
- else if (mem_count_per_opp == 4)
- memory_mask = 0xff;
-
- if (REG(OPTC_MEMORY_CONFIG))
- REG_SET(OPTC_MEMORY_CONFIG, 0,
- OPTC_MEM_SEL, memory_mask);
-
- if (opp_cnt == 2) {
- REG_SET_3(OPTC_DATA_SOURCE_SELECT, 0,
- OPTC_NUM_OF_INPUT_SEGMENT, 1,
- OPTC_SEG0_SRC_SEL, opp_id[0],
- OPTC_SEG1_SRC_SEL, opp_id[1]);
- } else if (opp_cnt == 4) {
- REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0,
- OPTC_NUM_OF_INPUT_SEGMENT, 3,
- OPTC_SEG0_SRC_SEL, opp_id[0],
- OPTC_SEG1_SRC_SEL, opp_id[1],
- OPTC_SEG2_SRC_SEL, opp_id[2],
- OPTC_SEG3_SRC_SEL, opp_id[3]);
- }
-
- REG_UPDATE(OPTC_WIDTH_CONTROL,
- OPTC_SEGMENT_WIDTH, mpcc_hactive);
-
- REG_SET(OTG_H_TIMING_CNTL, 0, OTG_H_TIMING_DIV_MODE, opp_cnt - 1);
- optc1->opp_count = opp_cnt;
-}
-
-/**
- * Enable CRTC
- * Enable CRTC - call ASIC Control Object to enable Timing generator.
- */
-static bool optc31_enable_crtc(struct timing_generator *optc)
-{
- struct optc *optc1 = DCN10TG_FROM_TG(optc);
-
- /* opp instance for OTG, 1 to 1 mapping and odm will adjust */
- REG_UPDATE(OPTC_DATA_SOURCE_SELECT,
- OPTC_SEG0_SRC_SEL, optc->inst);
-
- /* VTG enable first is for HW workaround */
- REG_UPDATE(CONTROL,
- VTG0_ENABLE, 1);
-
- REG_SEQ_START();
-
- /* Enable CRTC */
- REG_UPDATE_2(OTG_CONTROL,
- OTG_DISABLE_POINT_CNTL, 2,
- OTG_MASTER_EN, 1);
-
- REG_SEQ_SUBMIT();
- REG_SEQ_WAIT_DONE();
-
- return true;
-}
-
-/* disable_crtc - call ASIC Control Object to disable Timing generator. */
-static bool optc31_disable_crtc(struct timing_generator *optc)
-{
- struct optc *optc1 = DCN10TG_FROM_TG(optc);
-
- /* disable otg request until end of the first line
- * in the vertical blank region
- */
- REG_UPDATE(OTG_CONTROL,
- OTG_MASTER_EN, 0);
-
- REG_UPDATE(CONTROL,
- VTG0_ENABLE, 0);
-
- /* CRTC disabled, so disable clock. */
- REG_WAIT(OTG_CLOCK_CONTROL,
- OTG_BUSY, 0,
- 1, 100000);
-
- return true;
-}
-
-static bool optc31_immediate_disable_crtc(struct timing_generator *optc)
-{
- struct optc *optc1 = DCN10TG_FROM_TG(optc);
-
- REG_UPDATE_2(OTG_CONTROL,
- OTG_DISABLE_POINT_CNTL, 0,
- OTG_MASTER_EN, 0);
-
- REG_UPDATE(CONTROL,
- VTG0_ENABLE, 0);
-
- /* CRTC disabled, so disable clock. */
- REG_WAIT(OTG_CLOCK_CONTROL,
- OTG_BUSY, 0,
- 1, 100000);
-
- return true;
-}
-
-static void optc31_set_drr(
- struct timing_generator *optc,
- const struct drr_params *params)
-{
- struct optc *optc1 = DCN10TG_FROM_TG(optc);
-
- if (params != NULL &&
- params->vertical_total_max > 0 &&
- params->vertical_total_min > 0) {
-
- if (params->vertical_total_mid != 0) {
-
- REG_SET(OTG_V_TOTAL_MID, 0,
- OTG_V_TOTAL_MID, params->vertical_total_mid - 1);
-
- REG_UPDATE_2(OTG_V_TOTAL_CONTROL,
- OTG_VTOTAL_MID_REPLACING_MAX_EN, 1,
- OTG_VTOTAL_MID_FRAME_NUM,
- (uint8_t)params->vertical_total_mid_frame_num);
-
- }
-
- optc->funcs->set_vtotal_min_max(optc, params->vertical_total_min - 1, params->vertical_total_max - 1);
-
- /*
- * MIN_MASK_EN is gone and MASK is now always enabled.
- *
- * To get it to it work with manual trigger we need to make sure
- * we program the correct bit.
- */
- REG_UPDATE_4(OTG_V_TOTAL_CONTROL,
- OTG_V_TOTAL_MIN_SEL, 1,
- OTG_V_TOTAL_MAX_SEL, 1,
- OTG_FORCE_LOCK_ON_EVENT, 0,
- OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */
-
- // Setup manual flow control for EOF via TRIG_A
- optc->funcs->setup_manual_trigger(optc);
-
- } else {
- REG_UPDATE_4(OTG_V_TOTAL_CONTROL,
- OTG_SET_V_TOTAL_MIN_MASK, 0,
- OTG_V_TOTAL_MIN_SEL, 0,
- OTG_V_TOTAL_MAX_SEL, 0,
- OTG_FORCE_LOCK_ON_EVENT, 0);
-
- optc->funcs->set_vtotal_min_max(optc, 0, 0);
- }
-}
-
-static struct timing_generator_funcs dcn31_tg_funcs = {
- .validate_timing = optc1_validate_timing,
- .program_timing = optc1_program_timing,
- .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0,
- .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1,
- .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2,
- .program_global_sync = optc1_program_global_sync,
- .enable_crtc = optc31_enable_crtc,
- .disable_crtc = optc31_disable_crtc,
- .immediate_disable_crtc = optc31_immediate_disable_crtc,
- /* used by enable_timing_synchronization. Not need for FPGA */
- .is_counter_moving = optc1_is_counter_moving,
- .get_position = optc1_get_position,
- .get_frame_count = optc1_get_vblank_counter,
- .get_scanoutpos = optc1_get_crtc_scanoutpos,
- .get_otg_active_size = optc1_get_otg_active_size,
- .set_early_control = optc1_set_early_control,
- /* used by enable_timing_synchronization. Not need for FPGA */
- .wait_for_state = optc1_wait_for_state,
- .set_blank_color = optc3_program_blank_color,
- .did_triggered_reset_occur = optc1_did_triggered_reset_occur,
- .triplebuffer_lock = optc3_triplebuffer_lock,
- .triplebuffer_unlock = optc2_triplebuffer_unlock,
- .enable_reset_trigger = optc1_enable_reset_trigger,
- .enable_crtc_reset = optc1_enable_crtc_reset,
- .disable_reset_trigger = optc1_disable_reset_trigger,
- .lock = optc3_lock,
- .is_locked = optc1_is_locked,
- .unlock = optc1_unlock,
- .lock_doublebuffer_enable = optc3_lock_doublebuffer_enable,
- .lock_doublebuffer_disable = optc3_lock_doublebuffer_disable,
- .enable_optc_clock = optc1_enable_optc_clock,
- .set_drr = optc31_set_drr,
- .set_vtotal_min_max = optc1_set_vtotal_min_max,
- .set_static_screen_control = optc1_set_static_screen_control,
- .program_stereo = optc1_program_stereo,
- .is_stereo_left_eye = optc1_is_stereo_left_eye,
- .tg_init = optc3_tg_init,
- .is_tg_enabled = optc1_is_tg_enabled,
- .is_optc_underflow_occurred = optc1_is_optc_underflow_occurred,
- .clear_optc_underflow = optc1_clear_optc_underflow,
- .setup_global_swap_lock = NULL,
- .get_crc = optc1_get_crc,
- .configure_crc = optc2_configure_crc,
- .set_dsc_config = optc3_set_dsc_config,
- .set_dwb_source = NULL,
- .set_odm_bypass = optc3_set_odm_bypass,
- .set_odm_combine = optc31_set_odm_combine,
- .get_optc_source = optc2_get_optc_source,
- .set_out_mux = optc3_set_out_mux,
- .set_drr_trigger_window = optc3_set_drr_trigger_window,
- .set_vtotal_change_limit = optc3_set_vtotal_change_limit,
- .set_gsl = optc2_set_gsl,
- .set_gsl_source_select = optc2_set_gsl_source_select,
- .set_vtg_params = optc1_set_vtg_params,
- .program_manual_trigger = optc2_program_manual_trigger,
- .setup_manual_trigger = optc2_setup_manual_trigger,
- .get_hw_timing = optc1_get_hw_timing,
-};
-
-void dcn31_timing_generator_init(struct optc *optc1)
-{
- optc1->base.funcs = &dcn31_tg_funcs;
-
- optc1->max_h_total = optc1->tg_mask->OTG_H_TOTAL + 1;
- optc1->max_v_total = optc1->tg_mask->OTG_V_TOTAL + 1;
-
- optc1->min_h_blank = 32;
- optc1->min_v_blank = 3;
- optc1->min_v_blank_interlace = 5;
- optc1->min_h_sync_width = 4;
- optc1->min_v_sync_width = 1;
-}
-
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
index 3b3721386571..f9961a6446f3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
@@ -50,9 +50,9 @@ static bool dcn31_query_backlight_info(struct panel_cntl *panel_cntl, union dmub
cmd->panel_cntl.header.type = DMUB_CMD__PANEL_CNTL;
cmd->panel_cntl.header.sub_type = DMUB_CMD__PANEL_CNTL_QUERY_BACKLIGHT_INFO;
cmd->panel_cntl.header.payload_bytes = sizeof(cmd->panel_cntl.data);
- cmd->panel_cntl.data.inst = dcn31_panel_cntl->base.inst;
+ cmd->panel_cntl.data.pwrseq_inst = dcn31_panel_cntl->base.pwrseq_inst;
- return dc_dmub_srv_cmd_with_reply_data(dc_dmub_srv, cmd);
+ return dc_wake_and_execute_dmub_cmd(dc_dmub_srv->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY);
}
static uint32_t dcn31_get_16_bit_backlight_from_pwm(struct panel_cntl *panel_cntl)
@@ -65,11 +65,12 @@ static uint32_t dcn31_get_16_bit_backlight_from_pwm(struct panel_cntl *panel_cnt
return cmd.panel_cntl.data.current_backlight;
}
-uint32_t dcn31_panel_cntl_hw_init(struct panel_cntl *panel_cntl)
+static uint32_t dcn31_panel_cntl_hw_init(struct panel_cntl *panel_cntl)
{
struct dcn31_panel_cntl *dcn31_panel_cntl = TO_DCN31_PANEL_CNTL(panel_cntl);
struct dc_dmub_srv *dc_dmub_srv = panel_cntl->ctx->dmub_srv;
union dmub_rb_cmd cmd;
+ uint32_t freq_to_set = panel_cntl->ctx->dc->debug.pwm_freq;
if (!dc_dmub_srv)
return 0;
@@ -78,13 +79,14 @@ uint32_t dcn31_panel_cntl_hw_init(struct panel_cntl *panel_cntl)
cmd.panel_cntl.header.type = DMUB_CMD__PANEL_CNTL;
cmd.panel_cntl.header.sub_type = DMUB_CMD__PANEL_CNTL_HW_INIT;
cmd.panel_cntl.header.payload_bytes = sizeof(cmd.panel_cntl.data);
- cmd.panel_cntl.data.inst = dcn31_panel_cntl->base.inst;
+ cmd.panel_cntl.data.pwrseq_inst = dcn31_panel_cntl->base.pwrseq_inst;
cmd.panel_cntl.data.bl_pwm_cntl = panel_cntl->stored_backlight_registers.BL_PWM_CNTL;
cmd.panel_cntl.data.bl_pwm_period_cntl = panel_cntl->stored_backlight_registers.BL_PWM_PERIOD_CNTL;
cmd.panel_cntl.data.bl_pwm_ref_div1 =
panel_cntl->stored_backlight_registers.LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV;
-
- if (!dc_dmub_srv_cmd_with_reply_data(dc_dmub_srv, &cmd))
+ cmd.panel_cntl.data.bl_pwm_ref_div2 =
+ panel_cntl->stored_backlight_registers.PANEL_PWRSEQ_REF_DIV2;
+ if (!dc_wake_and_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
return 0;
panel_cntl->stored_backlight_registers.BL_PWM_CNTL = cmd.panel_cntl.data.bl_pwm_cntl;
@@ -92,11 +94,26 @@ uint32_t dcn31_panel_cntl_hw_init(struct panel_cntl *panel_cntl)
panel_cntl->stored_backlight_registers.BL_PWM_PERIOD_CNTL = cmd.panel_cntl.data.bl_pwm_period_cntl;
panel_cntl->stored_backlight_registers.LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV =
cmd.panel_cntl.data.bl_pwm_ref_div1;
-
+ panel_cntl->stored_backlight_registers.PANEL_PWRSEQ_REF_DIV2 =
+ cmd.panel_cntl.data.bl_pwm_ref_div2;
+
+ if (freq_to_set >= MIN_DEBUG_FREQ_HZ && freq_to_set <= MAX_DEBUG_FREQ_HZ) {
+ uint32_t xtal = panel_cntl->ctx->dc->res_pool->ref_clocks.dccg_ref_clock_inKhz;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.panel_cntl.header.type = DMUB_CMD__PANEL_CNTL;
+ cmd.panel_cntl.header.sub_type = DMUB_CMD__PANEL_DEBUG_PWM_FREQ;
+ cmd.panel_cntl.header.payload_bytes = sizeof(cmd.panel_cntl.data);
+ cmd.panel_cntl.data.pwrseq_inst = dcn31_panel_cntl->base.pwrseq_inst;
+ cmd.panel_cntl.data.bl_pwm_cntl = xtal;
+ cmd.panel_cntl.data.bl_pwm_period_cntl = freq_to_set;
+ if (!dc_wake_and_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
+ return 0;
+ }
return cmd.panel_cntl.data.current_backlight;
}
-void dcn31_panel_cntl_destroy(struct panel_cntl **panel_cntl)
+static void dcn31_panel_cntl_destroy(struct panel_cntl **panel_cntl)
{
struct dcn31_panel_cntl *dcn31_panel_cntl = TO_DCN31_PANEL_CNTL(*panel_cntl);
@@ -104,7 +121,7 @@ void dcn31_panel_cntl_destroy(struct panel_cntl **panel_cntl)
*panel_cntl = NULL;
}
-bool dcn31_is_panel_backlight_on(struct panel_cntl *panel_cntl)
+static bool dcn31_is_panel_backlight_on(struct panel_cntl *panel_cntl)
{
union dmub_rb_cmd cmd;
@@ -114,7 +131,7 @@ bool dcn31_is_panel_backlight_on(struct panel_cntl *panel_cntl)
return cmd.panel_cntl.data.is_backlight_on;
}
-bool dcn31_is_panel_powered_on(struct panel_cntl *panel_cntl)
+static bool dcn31_is_panel_powered_on(struct panel_cntl *panel_cntl)
{
union dmub_rb_cmd cmd;
@@ -124,7 +141,7 @@ bool dcn31_is_panel_powered_on(struct panel_cntl *panel_cntl)
return cmd.panel_cntl.data.is_powered_on;
}
-void dcn31_store_backlight_level(struct panel_cntl *panel_cntl)
+static void dcn31_store_backlight_level(struct panel_cntl *panel_cntl)
{
union dmub_rb_cmd cmd;
@@ -151,7 +168,33 @@ void dcn31_panel_cntl_construct(
struct dcn31_panel_cntl *dcn31_panel_cntl,
const struct panel_cntl_init_data *init_data)
{
+
dcn31_panel_cntl->base.funcs = &dcn31_link_panel_cntl_funcs;
dcn31_panel_cntl->base.ctx = init_data->ctx;
dcn31_panel_cntl->base.inst = init_data->inst;
+
+ if (dcn31_panel_cntl->base.ctx->dc->config.support_edp0_on_dp1) {
+ //If supported, power sequencer mapping shall follow the DIG instance
+ uint8_t pwrseq_inst = 0xF;
+
+ switch (init_data->eng_id) {
+ case ENGINE_ID_DIGA:
+ pwrseq_inst = 0;
+ break;
+ case ENGINE_ID_DIGB:
+ pwrseq_inst = 1;
+ break;
+ default:
+ DC_LOG_WARNING("Unsupported pwrseq engine id: %d!\n", init_data->eng_id);
+ ASSERT(false);
+ break;
+ }
+
+ dcn31_panel_cntl->base.pwrseq_inst = pwrseq_inst;
+ } else {
+ /* If not supported, pwrseq will be assigned in order,
+ * so first pwrseq will be assigned to first panel instance (legacy behavior)
+ */
+ dcn31_panel_cntl->base.pwrseq_inst = dcn31_panel_cntl->base.inst;
+ }
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.h
index d33ccd6ef8c3..8cf0259e211e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.h
@@ -29,6 +29,9 @@
#include "panel_cntl.h"
#include "dce/dce_panel_cntl.h"
+#define MIN_DEBUG_FREQ_HZ 200
+#define MAX_DEBUG_FREQ_HZ 6250
+
struct dcn31_panel_cntl {
struct panel_cntl base;
};
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
deleted file mode 100644
index 18896294ae12..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+++ /dev/null
@@ -1,2493 +0,0 @@
-/*
- * Copyright 2019 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-
-#include "dm_services.h"
-#include "dc.h"
-
-#include "dcn31/dcn31_init.h"
-
-#include "resource.h"
-#include "include/irq_service_interface.h"
-#include "dcn31_resource.h"
-
-#include "dcn20/dcn20_resource.h"
-#include "dcn30/dcn30_resource.h"
-
-#include "dcn10/dcn10_ipp.h"
-#include "dcn30/dcn30_hubbub.h"
-#include "dcn31/dcn31_hubbub.h"
-#include "dcn30/dcn30_mpc.h"
-#include "dcn31/dcn31_hubp.h"
-#include "irq/dcn31/irq_service_dcn31.h"
-#include "dcn30/dcn30_dpp.h"
-#include "dcn31/dcn31_optc.h"
-#include "dcn20/dcn20_hwseq.h"
-#include "dcn30/dcn30_hwseq.h"
-#include "dce110/dce110_hw_sequencer.h"
-#include "dcn30/dcn30_opp.h"
-#include "dcn20/dcn20_dsc.h"
-#include "dcn30/dcn30_vpg.h"
-#include "dcn30/dcn30_afmt.h"
-#include "dcn30/dcn30_dio_stream_encoder.h"
-#include "dcn31/dcn31_hpo_dp_stream_encoder.h"
-#include "dcn31/dcn31_hpo_dp_link_encoder.h"
-#include "dcn31/dcn31_apg.h"
-#include "dcn31/dcn31_dio_link_encoder.h"
-#include "dcn31/dcn31_vpg.h"
-#include "dcn31/dcn31_afmt.h"
-#include "dce/dce_clock_source.h"
-#include "dce/dce_audio.h"
-#include "dce/dce_hwseq.h"
-#include "clk_mgr.h"
-#include "virtual/virtual_stream_encoder.h"
-#include "dce110/dce110_resource.h"
-#include "dml/display_mode_vba.h"
-#include "dcn31/dcn31_dccg.h"
-#include "dcn10/dcn10_resource.h"
-#include "dcn31_panel_cntl.h"
-
-#include "dcn30/dcn30_dwb.h"
-#include "dcn30/dcn30_mmhubbub.h"
-
-// TODO: change include headers /amd/include/asic_reg after upstream
-#include "yellow_carp_offset.h"
-#include "dcn/dcn_3_1_2_offset.h"
-#include "dcn/dcn_3_1_2_sh_mask.h"
-#include "nbio/nbio_7_2_0_offset.h"
-#include "dpcs/dpcs_4_2_0_offset.h"
-#include "dpcs/dpcs_4_2_0_sh_mask.h"
-#include "mmhub/mmhub_2_3_0_offset.h"
-#include "mmhub/mmhub_2_3_0_sh_mask.h"
-
-
-#define regDCHUBBUB_DEBUG_CTRL_0 0x04d6
-#define regDCHUBBUB_DEBUG_CTRL_0_BASE_IDX 2
-#define DCHUBBUB_DEBUG_CTRL_0__DET_DEPTH__SHIFT 0x10
-#define DCHUBBUB_DEBUG_CTRL_0__DET_DEPTH_MASK 0x01FF0000L
-
-#include "reg_helper.h"
-#include "dce/dmub_abm.h"
-#include "dce/dmub_psr.h"
-#include "dce/dce_aux.h"
-#include "dce/dce_i2c.h"
-
-#include "dml/dcn30/display_mode_vba_30.h"
-#include "vm_helper.h"
-#include "dcn20/dcn20_vmid.h"
-
-#include "link_enc_cfg.h"
-
-#define DC_LOGGER_INIT(logger)
-
-#define DCN3_1_DEFAULT_DET_SIZE 384
-
-struct _vcs_dpi_ip_params_st dcn3_1_ip = {
- .gpuvm_enable = 1,
- .gpuvm_max_page_table_levels = 1,
- .hostvm_enable = 1,
- .hostvm_max_page_table_levels = 2,
- .rob_buffer_size_kbytes = 64,
- .det_buffer_size_kbytes = DCN3_1_DEFAULT_DET_SIZE,
- .config_return_buffer_size_in_kbytes = 1792,
- .compressed_buffer_segment_size_in_kbytes = 64,
- .meta_fifo_size_in_kentries = 32,
- .zero_size_buffer_entries = 512,
- .compbuf_reserved_space_64b = 256,
- .compbuf_reserved_space_zs = 64,
- .dpp_output_buffer_pixels = 2560,
- .opp_output_buffer_lines = 1,
- .pixel_chunk_size_kbytes = 8,
- .meta_chunk_size_kbytes = 2,
- .min_meta_chunk_size_bytes = 256,
- .writeback_chunk_size_kbytes = 8,
- .ptoi_supported = false,
- .num_dsc = 3,
- .maximum_dsc_bits_per_component = 10,
- .dsc422_native_support = false,
- .is_line_buffer_bpp_fixed = true,
- .line_buffer_fixed_bpp = 48,
- .line_buffer_size_bits = 789504,
- .max_line_buffer_lines = 12,
- .writeback_interface_buffer_size_kbytes = 90,
- .max_num_dpp = 4,
- .max_num_otg = 4,
- .max_num_hdmi_frl_outputs = 1,
- .max_num_wb = 1,
- .max_dchub_pscl_bw_pix_per_clk = 4,
- .max_pscl_lb_bw_pix_per_clk = 2,
- .max_lb_vscl_bw_pix_per_clk = 4,
- .max_vscl_hscl_bw_pix_per_clk = 4,
- .max_hscl_ratio = 6,
- .max_vscl_ratio = 6,
- .max_hscl_taps = 8,
- .max_vscl_taps = 8,
- .dpte_buffer_size_in_pte_reqs_luma = 64,
- .dpte_buffer_size_in_pte_reqs_chroma = 34,
- .dispclk_ramp_margin_percent = 1,
- .max_inter_dcn_tile_repeaters = 8,
- .cursor_buffer_size = 16,
- .cursor_chunk_size = 2,
- .writeback_line_buffer_buffer_size = 0,
- .writeback_min_hscl_ratio = 1,
- .writeback_min_vscl_ratio = 1,
- .writeback_max_hscl_ratio = 1,
- .writeback_max_vscl_ratio = 1,
- .writeback_max_hscl_taps = 1,
- .writeback_max_vscl_taps = 1,
- .dppclk_delay_subtotal = 46,
- .dppclk_delay_scl = 50,
- .dppclk_delay_scl_lb_only = 16,
- .dppclk_delay_cnvc_formatter = 27,
- .dppclk_delay_cnvc_cursor = 6,
- .dispclk_delay_subtotal = 119,
- .dynamic_metadata_vm_enabled = false,
- .odm_combine_4to1_supported = false,
- .dcc_supported = true,
-};
-
-struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = {
- /*TODO: correct dispclk/dppclk voltage level determination*/
- .clock_limits = {
- {
- .state = 0,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
- .phyclk_mhz = 600.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 186.0,
- .dtbclk_mhz = 625.0,
- },
- {
- .state = 1,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
- .phyclk_mhz = 810.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 209.0,
- .dtbclk_mhz = 625.0,
- },
- {
- .state = 2,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
- .phyclk_mhz = 810.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 209.0,
- .dtbclk_mhz = 625.0,
- },
- {
- .state = 3,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
- .phyclk_mhz = 810.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 371.0,
- .dtbclk_mhz = 625.0,
- },
- {
- .state = 4,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
- .phyclk_mhz = 810.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 417.0,
- .dtbclk_mhz = 625.0,
- },
- },
- .num_states = 5,
- .sr_exit_time_us = 9.0,
- .sr_enter_plus_exit_time_us = 11.0,
- .sr_exit_z8_time_us = 442.0,
- .sr_enter_plus_exit_z8_time_us = 560.0,
- .writeback_latency_us = 12.0,
- .dram_channel_width_bytes = 4,
- .round_trip_ping_latency_dcfclk_cycles = 106,
- .urgent_latency_pixel_data_only_us = 4.0,
- .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
- .urgent_latency_vm_data_only_us = 4.0,
- .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
- .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
- .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
- .pct_ideal_sdp_bw_after_urgent = 80.0,
- .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
- .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
- .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
- .max_avg_sdp_bw_use_normal_percent = 60.0,
- .max_avg_dram_bw_use_normal_percent = 60.0,
- .fabric_datapath_to_dcn_data_return_bytes = 32,
- .return_bus_width_bytes = 64,
- .downspread_percent = 0.38,
- .dcn_downspread_percent = 0.5,
- .gpuvm_min_page_size_bytes = 4096,
- .hostvm_min_page_size_bytes = 4096,
- .do_urgent_latency_adjustment = false,
- .urgent_latency_adjustment_fabric_clock_component_us = 0,
- .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
-};
-
-enum dcn31_clk_src_array_id {
- DCN31_CLK_SRC_PLL0,
- DCN31_CLK_SRC_PLL1,
- DCN31_CLK_SRC_PLL2,
- DCN31_CLK_SRC_PLL3,
- DCN31_CLK_SRC_PLL4,
- DCN30_CLK_SRC_TOTAL
-};
-
-/* begin *********************
- * macros to expend register list macro defined in HW object header file
- */
-
-/* DCN */
-/* TODO awful hack. fixup dcn20_dwb.h */
-#undef BASE_INNER
-#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
-
-#define BASE(seg) BASE_INNER(seg)
-
-#define SR(reg_name)\
- .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \
- reg ## reg_name
-
-#define SRI(reg_name, block, id)\
- .reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
- reg ## block ## id ## _ ## reg_name
-
-#define SRI2(reg_name, block, id)\
- .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \
- reg ## reg_name
-
-#define SRIR(var_name, reg_name, block, id)\
- .var_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
- reg ## block ## id ## _ ## reg_name
-
-#define SRII(reg_name, block, id)\
- .reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
- reg ## block ## id ## _ ## reg_name
-
-#define SRII_MPC_RMU(reg_name, block, id)\
- .RMU##_##reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
- reg ## block ## id ## _ ## reg_name
-
-#define SRII_DWB(reg_name, temp_name, block, id)\
- .reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \
- reg ## block ## id ## _ ## temp_name
-
-#define DCCG_SRII(reg_name, block, id)\
- .block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
- reg ## block ## id ## _ ## reg_name
-
-#define VUPDATE_SRII(reg_name, block, id)\
- .reg_name[id] = BASE(reg ## reg_name ## _ ## block ## id ## _BASE_IDX) + \
- reg ## reg_name ## _ ## block ## id
-
-/* NBIO */
-#define NBIO_BASE_INNER(seg) \
- NBIO_BASE__INST0_SEG ## seg
-
-#define NBIO_BASE(seg) \
- NBIO_BASE_INNER(seg)
-
-#define NBIO_SR(reg_name)\
- .reg_name = NBIO_BASE(regBIF_BX1_ ## reg_name ## _BASE_IDX) + \
- regBIF_BX1_ ## reg_name
-
-/* MMHUB */
-#define MMHUB_BASE_INNER(seg) \
- MMHUB_BASE__INST0_SEG ## seg
-
-#define MMHUB_BASE(seg) \
- MMHUB_BASE_INNER(seg)
-
-#define MMHUB_SR(reg_name)\
- .reg_name = MMHUB_BASE(mm ## reg_name ## _BASE_IDX) + \
- mm ## reg_name
-
-/* CLOCK */
-#define CLK_BASE_INNER(seg) \
- CLK_BASE__INST0_SEG ## seg
-
-#define CLK_BASE(seg) \
- CLK_BASE_INNER(seg)
-
-#define CLK_SRI(reg_name, block, inst)\
- .reg_name = CLK_BASE(reg ## block ## _ ## inst ## _ ## reg_name ## _BASE_IDX) + \
- reg ## block ## _ ## inst ## _ ## reg_name
-
-
-static const struct bios_registers bios_regs = {
- NBIO_SR(BIOS_SCRATCH_3),
- NBIO_SR(BIOS_SCRATCH_6)
-};
-
-#define clk_src_regs(index, pllid)\
-[index] = {\
- CS_COMMON_REG_LIST_DCN3_0(index, pllid),\
-}
-
-static const struct dce110_clk_src_regs clk_src_regs[] = {
- clk_src_regs(0, A),
- clk_src_regs(1, B),
- clk_src_regs(2, C),
- clk_src_regs(3, D),
- clk_src_regs(4, E)
-};
-
-static const struct dce110_clk_src_shift cs_shift = {
- CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT)
-};
-
-static const struct dce110_clk_src_mask cs_mask = {
- CS_COMMON_MASK_SH_LIST_DCN2_0(_MASK)
-};
-
-#define abm_regs(id)\
-[id] = {\
- ABM_DCN302_REG_LIST(id)\
-}
-
-static const struct dce_abm_registers abm_regs[] = {
- abm_regs(0),
- abm_regs(1),
- abm_regs(2),
- abm_regs(3),
-};
-
-static const struct dce_abm_shift abm_shift = {
- ABM_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dce_abm_mask abm_mask = {
- ABM_MASK_SH_LIST_DCN30(_MASK)
-};
-
-#define audio_regs(id)\
-[id] = {\
- AUD_COMMON_REG_LIST(id)\
-}
-
-static const struct dce_audio_registers audio_regs[] = {
- audio_regs(0),
- audio_regs(1),
- audio_regs(2),
- audio_regs(3),
- audio_regs(4),
- audio_regs(5),
- audio_regs(6)
-};
-
-#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\
- SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\
- SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\
- AUD_COMMON_MASK_SH_LIST_BASE(mask_sh)
-
-static const struct dce_audio_shift audio_shift = {
- DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dce_audio_mask audio_mask = {
- DCE120_AUD_COMMON_MASK_SH_LIST(_MASK)
-};
-
-#define vpg_regs(id)\
-[id] = {\
- VPG_DCN31_REG_LIST(id)\
-}
-
-static const struct dcn31_vpg_registers vpg_regs[] = {
- vpg_regs(0),
- vpg_regs(1),
- vpg_regs(2),
- vpg_regs(3),
- vpg_regs(4),
- vpg_regs(5),
- vpg_regs(6),
- vpg_regs(7),
- vpg_regs(8),
- vpg_regs(9),
-};
-
-static const struct dcn31_vpg_shift vpg_shift = {
- DCN31_VPG_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dcn31_vpg_mask vpg_mask = {
- DCN31_VPG_MASK_SH_LIST(_MASK)
-};
-
-#define afmt_regs(id)\
-[id] = {\
- AFMT_DCN31_REG_LIST(id)\
-}
-
-static const struct dcn31_afmt_registers afmt_regs[] = {
- afmt_regs(0),
- afmt_regs(1),
- afmt_regs(2),
- afmt_regs(3),
- afmt_regs(4),
- afmt_regs(5)
-};
-
-static const struct dcn31_afmt_shift afmt_shift = {
- DCN31_AFMT_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dcn31_afmt_mask afmt_mask = {
- DCN31_AFMT_MASK_SH_LIST(_MASK)
-};
-
-#define apg_regs(id)\
-[id] = {\
- APG_DCN31_REG_LIST(id)\
-}
-
-static const struct dcn31_apg_registers apg_regs[] = {
- apg_regs(0),
- apg_regs(1),
- apg_regs(2),
- apg_regs(3)
-};
-
-static const struct dcn31_apg_shift apg_shift = {
- DCN31_APG_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dcn31_apg_mask apg_mask = {
- DCN31_APG_MASK_SH_LIST(_MASK)
-};
-
-#define stream_enc_regs(id)\
-[id] = {\
- SE_DCN3_REG_LIST(id)\
-}
-
-static const struct dcn10_stream_enc_registers stream_enc_regs[] = {
- stream_enc_regs(0),
- stream_enc_regs(1),
- stream_enc_regs(2),
- stream_enc_regs(3),
- stream_enc_regs(4)
-};
-
-static const struct dcn10_stream_encoder_shift se_shift = {
- SE_COMMON_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dcn10_stream_encoder_mask se_mask = {
- SE_COMMON_MASK_SH_LIST_DCN30(_MASK)
-};
-
-
-#define aux_regs(id)\
-[id] = {\
- DCN2_AUX_REG_LIST(id)\
-}
-
-static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = {
- aux_regs(0),
- aux_regs(1),
- aux_regs(2),
- aux_regs(3),
- aux_regs(4)
-};
-
-#define hpd_regs(id)\
-[id] = {\
- HPD_REG_LIST(id)\
-}
-
-static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = {
- hpd_regs(0),
- hpd_regs(1),
- hpd_regs(2),
- hpd_regs(3),
- hpd_regs(4)
-};
-
-#define link_regs(id, phyid)\
-[id] = {\
- LE_DCN31_REG_LIST(id), \
- UNIPHY_DCN2_REG_LIST(phyid), \
- DPCS_DCN31_REG_LIST(id), \
-}
-
-static const struct dce110_aux_registers_shift aux_shift = {
- DCN_AUX_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dce110_aux_registers_mask aux_mask = {
- DCN_AUX_MASK_SH_LIST(_MASK)
-};
-
-static const struct dcn10_link_enc_registers link_enc_regs[] = {
- link_regs(0, A),
- link_regs(1, B),
- link_regs(2, C),
- link_regs(3, D),
- link_regs(4, E)
-};
-
-static const struct dcn10_link_enc_shift le_shift = {
- LINK_ENCODER_MASK_SH_LIST_DCN31(__SHIFT), \
- DPCS_DCN31_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dcn10_link_enc_mask le_mask = {
- LINK_ENCODER_MASK_SH_LIST_DCN31(_MASK), \
- DPCS_DCN31_MASK_SH_LIST(_MASK)
-};
-
-#define hpo_dp_stream_encoder_reg_list(id)\
-[id] = {\
- DCN3_1_HPO_DP_STREAM_ENC_REG_LIST(id)\
-}
-
-static const struct dcn31_hpo_dp_stream_encoder_registers hpo_dp_stream_enc_regs[] = {
- hpo_dp_stream_encoder_reg_list(0),
- hpo_dp_stream_encoder_reg_list(1),
- hpo_dp_stream_encoder_reg_list(2),
- hpo_dp_stream_encoder_reg_list(3),
-};
-
-static const struct dcn31_hpo_dp_stream_encoder_shift hpo_dp_se_shift = {
- DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dcn31_hpo_dp_stream_encoder_mask hpo_dp_se_mask = {
- DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(_MASK)
-};
-
-#define hpo_dp_link_encoder_reg_list(id)\
-[id] = {\
- DCN3_1_HPO_DP_LINK_ENC_REG_LIST(id),\
- DCN3_1_RDPCSTX_REG_LIST(0),\
- DCN3_1_RDPCSTX_REG_LIST(1),\
- DCN3_1_RDPCSTX_REG_LIST(2),\
- DCN3_1_RDPCSTX_REG_LIST(3),\
- DCN3_1_RDPCSTX_REG_LIST(4)\
-}
-
-static const struct dcn31_hpo_dp_link_encoder_registers hpo_dp_link_enc_regs[] = {
- hpo_dp_link_encoder_reg_list(0),
- hpo_dp_link_encoder_reg_list(1),
-};
-
-static const struct dcn31_hpo_dp_link_encoder_shift hpo_dp_le_shift = {
- DCN3_1_HPO_DP_LINK_ENC_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dcn31_hpo_dp_link_encoder_mask hpo_dp_le_mask = {
- DCN3_1_HPO_DP_LINK_ENC_MASK_SH_LIST(_MASK)
-};
-
-#define dpp_regs(id)\
-[id] = {\
- DPP_REG_LIST_DCN30(id),\
-}
-
-static const struct dcn3_dpp_registers dpp_regs[] = {
- dpp_regs(0),
- dpp_regs(1),
- dpp_regs(2),
- dpp_regs(3)
-};
-
-static const struct dcn3_dpp_shift tf_shift = {
- DPP_REG_LIST_SH_MASK_DCN30(__SHIFT)
-};
-
-static const struct dcn3_dpp_mask tf_mask = {
- DPP_REG_LIST_SH_MASK_DCN30(_MASK)
-};
-
-#define opp_regs(id)\
-[id] = {\
- OPP_REG_LIST_DCN30(id),\
-}
-
-static const struct dcn20_opp_registers opp_regs[] = {
- opp_regs(0),
- opp_regs(1),
- opp_regs(2),
- opp_regs(3)
-};
-
-static const struct dcn20_opp_shift opp_shift = {
- OPP_MASK_SH_LIST_DCN20(__SHIFT)
-};
-
-static const struct dcn20_opp_mask opp_mask = {
- OPP_MASK_SH_LIST_DCN20(_MASK)
-};
-
-#define aux_engine_regs(id)\
-[id] = {\
- AUX_COMMON_REG_LIST0(id), \
- .AUXN_IMPCAL = 0, \
- .AUXP_IMPCAL = 0, \
- .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK, \
-}
-
-static const struct dce110_aux_registers aux_engine_regs[] = {
- aux_engine_regs(0),
- aux_engine_regs(1),
- aux_engine_regs(2),
- aux_engine_regs(3),
- aux_engine_regs(4)
-};
-
-#define dwbc_regs_dcn3(id)\
-[id] = {\
- DWBC_COMMON_REG_LIST_DCN30(id),\
-}
-
-static const struct dcn30_dwbc_registers dwbc30_regs[] = {
- dwbc_regs_dcn3(0),
-};
-
-static const struct dcn30_dwbc_shift dwbc30_shift = {
- DWBC_COMMON_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dcn30_dwbc_mask dwbc30_mask = {
- DWBC_COMMON_MASK_SH_LIST_DCN30(_MASK)
-};
-
-#define mcif_wb_regs_dcn3(id)\
-[id] = {\
- MCIF_WB_COMMON_REG_LIST_DCN30(id),\
-}
-
-static const struct dcn30_mmhubbub_registers mcif_wb30_regs[] = {
- mcif_wb_regs_dcn3(0)
-};
-
-static const struct dcn30_mmhubbub_shift mcif_wb30_shift = {
- MCIF_WB_COMMON_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dcn30_mmhubbub_mask mcif_wb30_mask = {
- MCIF_WB_COMMON_MASK_SH_LIST_DCN30(_MASK)
-};
-
-#define dsc_regsDCN20(id)\
-[id] = {\
- DSC_REG_LIST_DCN20(id)\
-}
-
-static const struct dcn20_dsc_registers dsc_regs[] = {
- dsc_regsDCN20(0),
- dsc_regsDCN20(1),
- dsc_regsDCN20(2)
-};
-
-static const struct dcn20_dsc_shift dsc_shift = {
- DSC_REG_LIST_SH_MASK_DCN20(__SHIFT)
-};
-
-static const struct dcn20_dsc_mask dsc_mask = {
- DSC_REG_LIST_SH_MASK_DCN20(_MASK)
-};
-
-static const struct dcn30_mpc_registers mpc_regs = {
- MPC_REG_LIST_DCN3_0(0),
- MPC_REG_LIST_DCN3_0(1),
- MPC_REG_LIST_DCN3_0(2),
- MPC_REG_LIST_DCN3_0(3),
- MPC_OUT_MUX_REG_LIST_DCN3_0(0),
- MPC_OUT_MUX_REG_LIST_DCN3_0(1),
- MPC_OUT_MUX_REG_LIST_DCN3_0(2),
- MPC_OUT_MUX_REG_LIST_DCN3_0(3),
- MPC_RMU_GLOBAL_REG_LIST_DCN3AG,
- MPC_RMU_REG_LIST_DCN3AG(0),
- MPC_RMU_REG_LIST_DCN3AG(1),
- //MPC_RMU_REG_LIST_DCN3AG(2),
- MPC_DWB_MUX_REG_LIST_DCN3_0(0),
-};
-
-static const struct dcn30_mpc_shift mpc_shift = {
- MPC_COMMON_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dcn30_mpc_mask mpc_mask = {
- MPC_COMMON_MASK_SH_LIST_DCN30(_MASK)
-};
-
-#define optc_regs(id)\
-[id] = {OPTC_COMMON_REG_LIST_DCN3_1(id)}
-
-static const struct dcn_optc_registers optc_regs[] = {
- optc_regs(0),
- optc_regs(1),
- optc_regs(2),
- optc_regs(3)
-};
-
-static const struct dcn_optc_shift optc_shift = {
- OPTC_COMMON_MASK_SH_LIST_DCN3_1(__SHIFT)
-};
-
-static const struct dcn_optc_mask optc_mask = {
- OPTC_COMMON_MASK_SH_LIST_DCN3_1(_MASK)
-};
-
-#define hubp_regs(id)\
-[id] = {\
- HUBP_REG_LIST_DCN30(id)\
-}
-
-static const struct dcn_hubp2_registers hubp_regs[] = {
- hubp_regs(0),
- hubp_regs(1),
- hubp_regs(2),
- hubp_regs(3)
-};
-
-
-static const struct dcn_hubp2_shift hubp_shift = {
- HUBP_MASK_SH_LIST_DCN31(__SHIFT)
-};
-
-static const struct dcn_hubp2_mask hubp_mask = {
- HUBP_MASK_SH_LIST_DCN31(_MASK)
-};
-static const struct dcn_hubbub_registers hubbub_reg = {
- HUBBUB_REG_LIST_DCN31(0)
-};
-
-static const struct dcn_hubbub_shift hubbub_shift = {
- HUBBUB_MASK_SH_LIST_DCN31(__SHIFT)
-};
-
-static const struct dcn_hubbub_mask hubbub_mask = {
- HUBBUB_MASK_SH_LIST_DCN31(_MASK)
-};
-
-static const struct dccg_registers dccg_regs = {
- DCCG_REG_LIST_DCN31()
-};
-
-static const struct dccg_shift dccg_shift = {
- DCCG_MASK_SH_LIST_DCN31(__SHIFT)
-};
-
-static const struct dccg_mask dccg_mask = {
- DCCG_MASK_SH_LIST_DCN31(_MASK)
-};
-
-
-#define SRII2(reg_name_pre, reg_name_post, id)\
- .reg_name_pre ## _ ## reg_name_post[id] = BASE(reg ## reg_name_pre \
- ## id ## _ ## reg_name_post ## _BASE_IDX) + \
- reg ## reg_name_pre ## id ## _ ## reg_name_post
-
-
-#define HWSEQ_DCN31_REG_LIST()\
- SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \
- SR(DCHUBBUB_ARB_HOSTVM_CNTL), \
- SR(DIO_MEM_PWR_CTRL), \
- SR(ODM_MEM_PWR_CTRL3), \
- SR(DMU_MEM_PWR_CNTL), \
- SR(MMHUBBUB_MEM_PWR_CNTL), \
- SR(DCCG_GATE_DISABLE_CNTL), \
- SR(DCCG_GATE_DISABLE_CNTL2), \
- SR(DCFCLK_CNTL),\
- SR(DC_MEM_GLOBAL_PWR_REQ_CNTL), \
- SRII(PIXEL_RATE_CNTL, OTG, 0), \
- SRII(PIXEL_RATE_CNTL, OTG, 1),\
- SRII(PIXEL_RATE_CNTL, OTG, 2),\
- SRII(PIXEL_RATE_CNTL, OTG, 3),\
- SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 0),\
- SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 1),\
- SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 2),\
- SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 3),\
- SR(MICROSECOND_TIME_BASE_DIV), \
- SR(MILLISECOND_TIME_BASE_DIV), \
- SR(DISPCLK_FREQ_CHANGE_CNTL), \
- SR(RBBMIF_TIMEOUT_DIS), \
- SR(RBBMIF_TIMEOUT_DIS_2), \
- SR(DCHUBBUB_CRC_CTRL), \
- SR(DPP_TOP0_DPP_CRC_CTRL), \
- SR(DPP_TOP0_DPP_CRC_VAL_B_A), \
- SR(DPP_TOP0_DPP_CRC_VAL_R_G), \
- SR(MPC_CRC_CTRL), \
- SR(MPC_CRC_RESULT_GB), \
- SR(MPC_CRC_RESULT_C), \
- SR(MPC_CRC_RESULT_AR), \
- SR(DOMAIN0_PG_CONFIG), \
- SR(DOMAIN1_PG_CONFIG), \
- SR(DOMAIN2_PG_CONFIG), \
- SR(DOMAIN3_PG_CONFIG), \
- SR(DOMAIN16_PG_CONFIG), \
- SR(DOMAIN17_PG_CONFIG), \
- SR(DOMAIN18_PG_CONFIG), \
- SR(DOMAIN0_PG_STATUS), \
- SR(DOMAIN1_PG_STATUS), \
- SR(DOMAIN2_PG_STATUS), \
- SR(DOMAIN3_PG_STATUS), \
- SR(DOMAIN16_PG_STATUS), \
- SR(DOMAIN17_PG_STATUS), \
- SR(DOMAIN18_PG_STATUS), \
- SR(D1VGA_CONTROL), \
- SR(D2VGA_CONTROL), \
- SR(D3VGA_CONTROL), \
- SR(D4VGA_CONTROL), \
- SR(D5VGA_CONTROL), \
- SR(D6VGA_CONTROL), \
- SR(DC_IP_REQUEST_CNTL), \
- SR(AZALIA_AUDIO_DTO), \
- SR(AZALIA_CONTROLLER_CLOCK_GATING), \
- SR(HPO_TOP_HW_CONTROL)
-
-static const struct dce_hwseq_registers hwseq_reg = {
- HWSEQ_DCN31_REG_LIST()
-};
-
-#define HWSEQ_DCN31_MASK_SH_LIST(mask_sh)\
- HWSEQ_DCN_MASK_SH_LIST(mask_sh), \
- HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \
- HWS_SF(, DCHUBBUB_ARB_HOSTVM_CNTL, DISABLE_HOSTVM_FORCE_ALLOW_PSTATE, mask_sh), \
- HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
- HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
- HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
- HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
- HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
- HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
- HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
- HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
- HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
- HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
- HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
- HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
- HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
- HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
- HWS_SF(, DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
- HWS_SF(, DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
- HWS_SF(, DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
- HWS_SF(, DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
- HWS_SF(, DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
- HWS_SF(, DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
- HWS_SF(, DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
- HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \
- HWS_SF(, AZALIA_AUDIO_DTO, AZALIA_AUDIO_DTO_MODULE, mask_sh), \
- HWS_SF(, HPO_TOP_CLOCK_CONTROL, HPO_HDMISTREAMCLK_G_GATE_DIS, mask_sh), \
- HWS_SF(, DMU_MEM_PWR_CNTL, DMCU_ERAM_MEM_PWR_FORCE, mask_sh), \
- HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_UNASSIGNED_PWR_MODE, mask_sh), \
- HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_VBLANK_PWR_MODE, mask_sh), \
- HWS_SF(, MMHUBBUB_MEM_PWR_CNTL, VGA_MEM_PWR_FORCE, mask_sh), \
- HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh), \
- HWS_SF(, HPO_TOP_HW_CONTROL, HPO_IO_EN, mask_sh)
-
-static const struct dce_hwseq_shift hwseq_shift = {
- HWSEQ_DCN31_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dce_hwseq_mask hwseq_mask = {
- HWSEQ_DCN31_MASK_SH_LIST(_MASK)
-};
-#define vmid_regs(id)\
-[id] = {\
- DCN20_VMID_REG_LIST(id)\
-}
-
-static const struct dcn_vmid_registers vmid_regs[] = {
- vmid_regs(0),
- vmid_regs(1),
- vmid_regs(2),
- vmid_regs(3),
- vmid_regs(4),
- vmid_regs(5),
- vmid_regs(6),
- vmid_regs(7),
- vmid_regs(8),
- vmid_regs(9),
- vmid_regs(10),
- vmid_regs(11),
- vmid_regs(12),
- vmid_regs(13),
- vmid_regs(14),
- vmid_regs(15)
-};
-
-static const struct dcn20_vmid_shift vmid_shifts = {
- DCN20_VMID_MASK_SH_LIST(__SHIFT)
-};
-
-static const struct dcn20_vmid_mask vmid_masks = {
- DCN20_VMID_MASK_SH_LIST(_MASK)
-};
-
-static const struct resource_caps res_cap_dcn31 = {
- .num_timing_generator = 4,
- .num_opp = 4,
- .num_video_plane = 4,
- .num_audio = 5,
- .num_stream_encoder = 5,
- .num_dig_link_enc = 5,
- .num_hpo_dp_stream_encoder = 4,
- .num_hpo_dp_link_encoder = 2,
- .num_pll = 5,
- .num_dwb = 1,
- .num_ddc = 5,
- .num_vmid = 16,
- .num_mpc_3dlut = 2,
- .num_dsc = 3,
-};
-
-static const struct dc_plane_cap plane_cap = {
- .type = DC_PLANE_TYPE_DCN_UNIVERSAL,
- .blends_with_above = true,
- .blends_with_below = true,
- .per_pixel_alpha = true,
-
- .pixel_format_support = {
- .argb8888 = true,
- .nv12 = true,
- .fp16 = true,
- .p010 = false,
- .ayuv = false,
- },
-
- .max_upscale_factor = {
- .argb8888 = 16000,
- .nv12 = 16000,
- .fp16 = 16000
- },
-
- // 6:1 downscaling ratio: 1000/6 = 166.666
- .max_downscale_factor = {
- .argb8888 = 167,
- .nv12 = 167,
- .fp16 = 167
- },
- 64,
- 64
-};
-
-static const struct dc_debug_options debug_defaults_drv = {
- .disable_dmcu = true,
- .force_abm_enable = false,
- .timing_trace = false,
- .clock_trace = true,
- .disable_pplib_clock_request = false,
- .pipe_split_policy = MPC_SPLIT_AVOID,
- .force_single_disp_pipe_split = false,
- .disable_dcc = DCC_ENABLE,
- .vsr_support = true,
- .performance_trace = false,
- .max_downscale_src_width = 4096,/*upto true 4K*/
- .disable_pplib_wm_range = false,
- .scl_reset_length10 = true,
- .sanity_checks = false,
- .underflow_assert_delay_us = 0xFFFFFFFF,
- .dwb_fi_phase = -1, // -1 = disable,
- .dmub_command_table = true,
- .pstate_enabled = true,
- .use_max_lb = true,
- .enable_mem_low_power = {
- .bits = {
- .vga = true,
- .i2c = true,
- .dmcu = false, // This is previously known to cause hang on S3 cycles if enabled
- .dscl = true,
- .cm = true,
- .mpc = true,
- .optc = true,
- .vpg = true,
- .afmt = true,
- }
- },
- .optimize_edp_link_rate = true,
- .enable_sw_cntl_psr = true,
-};
-
-static const struct dc_debug_options debug_defaults_diags = {
- .disable_dmcu = true,
- .force_abm_enable = false,
- .timing_trace = true,
- .clock_trace = true,
- .disable_dpp_power_gate = true,
- .disable_hubp_power_gate = true,
- .disable_clock_gate = true,
- .disable_pplib_clock_request = true,
- .disable_pplib_wm_range = true,
- .disable_stutter = false,
- .scl_reset_length10 = true,
- .dwb_fi_phase = -1, // -1 = disable
- .dmub_command_table = true,
- .enable_tri_buf = true,
- .use_max_lb = true
-};
-
-static void dcn31_dpp_destroy(struct dpp **dpp)
-{
- kfree(TO_DCN20_DPP(*dpp));
- *dpp = NULL;
-}
-
-static struct dpp *dcn31_dpp_create(
- struct dc_context *ctx,
- uint32_t inst)
-{
- struct dcn3_dpp *dpp =
- kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL);
-
- if (!dpp)
- return NULL;
-
- if (dpp3_construct(dpp, ctx, inst,
- &dpp_regs[inst], &tf_shift, &tf_mask))
- return &dpp->base;
-
- BREAK_TO_DEBUGGER();
- kfree(dpp);
- return NULL;
-}
-
-static struct output_pixel_processor *dcn31_opp_create(
- struct dc_context *ctx, uint32_t inst)
-{
- struct dcn20_opp *opp =
- kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL);
-
- if (!opp) {
- BREAK_TO_DEBUGGER();
- return NULL;
- }
-
- dcn20_opp_construct(opp, ctx, inst,
- &opp_regs[inst], &opp_shift, &opp_mask);
- return &opp->base;
-}
-
-static struct dce_aux *dcn31_aux_engine_create(
- struct dc_context *ctx,
- uint32_t inst)
-{
- struct aux_engine_dce110 *aux_engine =
- kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL);
-
- if (!aux_engine)
- return NULL;
-
- dce110_aux_engine_construct(aux_engine, ctx, inst,
- SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD,
- &aux_engine_regs[inst],
- &aux_mask,
- &aux_shift,
- ctx->dc->caps.extended_aux_timeout_support);
-
- return &aux_engine->base;
-}
-#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST_DCN30(id) }
-
-static const struct dce_i2c_registers i2c_hw_regs[] = {
- i2c_inst_regs(1),
- i2c_inst_regs(2),
- i2c_inst_regs(3),
- i2c_inst_regs(4),
- i2c_inst_regs(5),
-};
-
-static const struct dce_i2c_shift i2c_shifts = {
- I2C_COMMON_MASK_SH_LIST_DCN30(__SHIFT)
-};
-
-static const struct dce_i2c_mask i2c_masks = {
- I2C_COMMON_MASK_SH_LIST_DCN30(_MASK)
-};
-
-static struct dce_i2c_hw *dcn31_i2c_hw_create(
- struct dc_context *ctx,
- uint32_t inst)
-{
- struct dce_i2c_hw *dce_i2c_hw =
- kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL);
-
- if (!dce_i2c_hw)
- return NULL;
-
- dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst,
- &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks);
-
- return dce_i2c_hw;
-}
-static struct mpc *dcn31_mpc_create(
- struct dc_context *ctx,
- int num_mpcc,
- int num_rmu)
-{
- struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc),
- GFP_KERNEL);
-
- if (!mpc30)
- return NULL;
-
- dcn30_mpc_construct(mpc30, ctx,
- &mpc_regs,
- &mpc_shift,
- &mpc_mask,
- num_mpcc,
- num_rmu);
-
- return &mpc30->base;
-}
-
-static struct hubbub *dcn31_hubbub_create(struct dc_context *ctx)
-{
- int i;
-
- struct dcn20_hubbub *hubbub3 = kzalloc(sizeof(struct dcn20_hubbub),
- GFP_KERNEL);
-
- if (!hubbub3)
- return NULL;
-
- hubbub31_construct(hubbub3, ctx,
- &hubbub_reg,
- &hubbub_shift,
- &hubbub_mask,
- dcn3_1_ip.det_buffer_size_kbytes,
- dcn3_1_ip.pixel_chunk_size_kbytes,
- dcn3_1_ip.config_return_buffer_size_in_kbytes);
-
-
- for (i = 0; i < res_cap_dcn31.num_vmid; i++) {
- struct dcn20_vmid *vmid = &hubbub3->vmid[i];
-
- vmid->ctx = ctx;
-
- vmid->regs = &vmid_regs[i];
- vmid->shifts = &vmid_shifts;
- vmid->masks = &vmid_masks;
- }
-
- return &hubbub3->base;
-}
-
-static struct timing_generator *dcn31_timing_generator_create(
- struct dc_context *ctx,
- uint32_t instance)
-{
- struct optc *tgn10 =
- kzalloc(sizeof(struct optc), GFP_KERNEL);
-
- if (!tgn10)
- return NULL;
-
- tgn10->base.inst = instance;
- tgn10->base.ctx = ctx;
-
- tgn10->tg_regs = &optc_regs[instance];
- tgn10->tg_shift = &optc_shift;
- tgn10->tg_mask = &optc_mask;
-
- dcn31_timing_generator_init(tgn10);
-
- return &tgn10->base;
-}
-
-static const struct encoder_feature_support link_enc_feature = {
- .max_hdmi_deep_color = COLOR_DEPTH_121212,
- .max_hdmi_pixel_clock = 600000,
- .hdmi_ycbcr420_supported = true,
- .dp_ycbcr420_supported = true,
- .fec_supported = true,
- .flags.bits.IS_HBR2_CAPABLE = true,
- .flags.bits.IS_HBR3_CAPABLE = true,
- .flags.bits.IS_TPS3_CAPABLE = true,
- .flags.bits.IS_TPS4_CAPABLE = true
-};
-
-static struct link_encoder *dcn31_link_encoder_create(
- const struct encoder_init_data *enc_init_data)
-{
- struct dcn20_link_encoder *enc20 =
- kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL);
-
- if (!enc20)
- return NULL;
-
- dcn31_link_encoder_construct(enc20,
- enc_init_data,
- &link_enc_feature,
- &link_enc_regs[enc_init_data->transmitter],
- &link_enc_aux_regs[enc_init_data->channel - 1],
- &link_enc_hpd_regs[enc_init_data->hpd_source],
- &le_shift,
- &le_mask);
-
- return &enc20->enc10.base;
-}
-
-/* Create a minimal link encoder object not associated with a particular
- * physical connector.
- * resource_funcs.link_enc_create_minimal
- */
-static struct link_encoder *dcn31_link_enc_create_minimal(
- struct dc_context *ctx, enum engine_id eng_id)
-{
- struct dcn20_link_encoder *enc20;
-
- if ((eng_id - ENGINE_ID_DIGA) > ctx->dc->res_pool->res_cap->num_dig_link_enc)
- return NULL;
-
- enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL);
- if (!enc20)
- return NULL;
-
- dcn31_link_encoder_construct_minimal(
- enc20,
- ctx,
- &link_enc_feature,
- &link_enc_regs[eng_id - ENGINE_ID_DIGA],
- eng_id);
-
- return &enc20->enc10.base;
-}
-
-struct panel_cntl *dcn31_panel_cntl_create(const struct panel_cntl_init_data *init_data)
-{
- struct dcn31_panel_cntl *panel_cntl =
- kzalloc(sizeof(struct dcn31_panel_cntl), GFP_KERNEL);
-
- if (!panel_cntl)
- return NULL;
-
- dcn31_panel_cntl_construct(panel_cntl, init_data);
-
- return &panel_cntl->base;
-}
-
-static void read_dce_straps(
- struct dc_context *ctx,
- struct resource_straps *straps)
-{
- generic_reg_get(ctx, regDC_PINSTRAPS + BASE(regDC_PINSTRAPS_BASE_IDX),
- FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio);
-
-}
-
-static struct audio *dcn31_create_audio(
- struct dc_context *ctx, unsigned int inst)
-{
- return dce_audio_create(ctx, inst,
- &audio_regs[inst], &audio_shift, &audio_mask);
-}
-
-static struct vpg *dcn31_vpg_create(
- struct dc_context *ctx,
- uint32_t inst)
-{
- struct dcn31_vpg *vpg31 = kzalloc(sizeof(struct dcn31_vpg), GFP_KERNEL);
-
- if (!vpg31)
- return NULL;
-
- vpg31_construct(vpg31, ctx, inst,
- &vpg_regs[inst],
- &vpg_shift,
- &vpg_mask);
-
- return &vpg31->base;
-}
-
-static struct afmt *dcn31_afmt_create(
- struct dc_context *ctx,
- uint32_t inst)
-{
- struct dcn31_afmt *afmt31 = kzalloc(sizeof(struct dcn31_afmt), GFP_KERNEL);
-
- if (!afmt31)
- return NULL;
-
- afmt31_construct(afmt31, ctx, inst,
- &afmt_regs[inst],
- &afmt_shift,
- &afmt_mask);
-
- // Light sleep by default, no need to power down here
-
- return &afmt31->base;
-}
-
-static struct apg *dcn31_apg_create(
- struct dc_context *ctx,
- uint32_t inst)
-{
- struct dcn31_apg *apg31 = kzalloc(sizeof(struct dcn31_apg), GFP_KERNEL);
-
- if (!apg31)
- return NULL;
-
- apg31_construct(apg31, ctx, inst,
- &apg_regs[inst],
- &apg_shift,
- &apg_mask);
-
- return &apg31->base;
-}
-
-static struct stream_encoder *dcn31_stream_encoder_create(
- enum engine_id eng_id,
- struct dc_context *ctx)
-{
- struct dcn10_stream_encoder *enc1;
- struct vpg *vpg;
- struct afmt *afmt;
- int vpg_inst;
- int afmt_inst;
-
- /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */
- if (eng_id <= ENGINE_ID_DIGF) {
- vpg_inst = eng_id;
- afmt_inst = eng_id;
- } else
- return NULL;
-
- enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL);
- vpg = dcn31_vpg_create(ctx, vpg_inst);
- afmt = dcn31_afmt_create(ctx, afmt_inst);
-
- if (!enc1 || !vpg || !afmt) {
- kfree(enc1);
- kfree(vpg);
- kfree(afmt);
- return NULL;
- }
-
- if (ctx->asic_id.chip_family == FAMILY_YELLOW_CARP &&
- ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0) {
- if ((eng_id == ENGINE_ID_DIGC) || (eng_id == ENGINE_ID_DIGD))
- eng_id = eng_id + 3; // For B0 only. C->F, D->G.
- }
-
- dcn30_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios,
- eng_id, vpg, afmt,
- &stream_enc_regs[eng_id],
- &se_shift, &se_mask);
-
- return &enc1->base;
-}
-
-static struct hpo_dp_stream_encoder *dcn31_hpo_dp_stream_encoder_create(
- enum engine_id eng_id,
- struct dc_context *ctx)
-{
- struct dcn31_hpo_dp_stream_encoder *hpo_dp_enc31;
- struct vpg *vpg;
- struct apg *apg;
- uint32_t hpo_dp_inst;
- uint32_t vpg_inst;
- uint32_t apg_inst;
-
- ASSERT((eng_id >= ENGINE_ID_HPO_DP_0) && (eng_id <= ENGINE_ID_HPO_DP_3));
- hpo_dp_inst = eng_id - ENGINE_ID_HPO_DP_0;
-
- /* Mapping of VPG register blocks to HPO DP block instance:
- * VPG[6] -> HPO_DP[0]
- * VPG[7] -> HPO_DP[1]
- * VPG[8] -> HPO_DP[2]
- * VPG[9] -> HPO_DP[3]
- */
- vpg_inst = hpo_dp_inst + 6;
-
- /* Mapping of APG register blocks to HPO DP block instance:
- * APG[0] -> HPO_DP[0]
- * APG[1] -> HPO_DP[1]
- * APG[2] -> HPO_DP[2]
- * APG[3] -> HPO_DP[3]
- */
- apg_inst = hpo_dp_inst;
-
- /* allocate HPO stream encoder and create VPG sub-block */
- hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_stream_encoder), GFP_KERNEL);
- vpg = dcn31_vpg_create(ctx, vpg_inst);
- apg = dcn31_apg_create(ctx, apg_inst);
-
- if (!hpo_dp_enc31 || !vpg || !apg) {
- kfree(hpo_dp_enc31);
- kfree(vpg);
- kfree(apg);
- return NULL;
- }
-
- dcn31_hpo_dp_stream_encoder_construct(hpo_dp_enc31, ctx, ctx->dc_bios,
- hpo_dp_inst, eng_id, vpg, apg,
- &hpo_dp_stream_enc_regs[hpo_dp_inst],
- &hpo_dp_se_shift, &hpo_dp_se_mask);
-
- return &hpo_dp_enc31->base;
-}
-
-static struct hpo_dp_link_encoder *dcn31_hpo_dp_link_encoder_create(
- uint8_t inst,
- struct dc_context *ctx)
-{
- struct dcn31_hpo_dp_link_encoder *hpo_dp_enc31;
-
- /* allocate HPO link encoder */
- hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_link_encoder), GFP_KERNEL);
-
- hpo_dp_link_encoder31_construct(hpo_dp_enc31, ctx, inst,
- &hpo_dp_link_enc_regs[inst],
- &hpo_dp_le_shift, &hpo_dp_le_mask);
-
- return &hpo_dp_enc31->base;
-}
-
-static struct dce_hwseq *dcn31_hwseq_create(
- struct dc_context *ctx)
-{
- struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL);
-
- if (hws) {
- hws->ctx = ctx;
- hws->regs = &hwseq_reg;
- hws->shifts = &hwseq_shift;
- hws->masks = &hwseq_mask;
- /* DCN3.1 FPGA Workaround
- * Need to enable HPO DP Stream Encoder before setting OTG master enable.
- * To do so, move calling function enable_stream_timing to only be done AFTER calling
- * function core_link_enable_stream
- */
- if (IS_FPGA_MAXIMUS_DC(ctx->dce_environment))
- hws->wa.dp_hpo_and_otg_sequence = true;
- }
- return hws;
-}
-static const struct resource_create_funcs res_create_funcs = {
- .read_dce_straps = read_dce_straps,
- .create_audio = dcn31_create_audio,
- .create_stream_encoder = dcn31_stream_encoder_create,
- .create_hpo_dp_stream_encoder = dcn31_hpo_dp_stream_encoder_create,
- .create_hpo_dp_link_encoder = dcn31_hpo_dp_link_encoder_create,
- .create_hwseq = dcn31_hwseq_create,
-};
-
-static const struct resource_create_funcs res_create_maximus_funcs = {
- .read_dce_straps = NULL,
- .create_audio = NULL,
- .create_stream_encoder = NULL,
- .create_hpo_dp_stream_encoder = dcn31_hpo_dp_stream_encoder_create,
- .create_hpo_dp_link_encoder = dcn31_hpo_dp_link_encoder_create,
- .create_hwseq = dcn31_hwseq_create,
-};
-
-static void dcn31_resource_destruct(struct dcn31_resource_pool *pool)
-{
- unsigned int i;
-
- for (i = 0; i < pool->base.stream_enc_count; i++) {
- if (pool->base.stream_enc[i] != NULL) {
- if (pool->base.stream_enc[i]->vpg != NULL) {
- kfree(DCN30_VPG_FROM_VPG(pool->base.stream_enc[i]->vpg));
- pool->base.stream_enc[i]->vpg = NULL;
- }
- if (pool->base.stream_enc[i]->afmt != NULL) {
- kfree(DCN30_AFMT_FROM_AFMT(pool->base.stream_enc[i]->afmt));
- pool->base.stream_enc[i]->afmt = NULL;
- }
- kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i]));
- pool->base.stream_enc[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->base.hpo_dp_stream_enc_count; i++) {
- if (pool->base.hpo_dp_stream_enc[i] != NULL) {
- if (pool->base.hpo_dp_stream_enc[i]->vpg != NULL) {
- kfree(DCN30_VPG_FROM_VPG(pool->base.hpo_dp_stream_enc[i]->vpg));
- pool->base.hpo_dp_stream_enc[i]->vpg = NULL;
- }
- if (pool->base.hpo_dp_stream_enc[i]->apg != NULL) {
- kfree(DCN31_APG_FROM_APG(pool->base.hpo_dp_stream_enc[i]->apg));
- pool->base.hpo_dp_stream_enc[i]->apg = NULL;
- }
- kfree(DCN3_1_HPO_DP_STREAM_ENC_FROM_HPO_STREAM_ENC(pool->base.hpo_dp_stream_enc[i]));
- pool->base.hpo_dp_stream_enc[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->base.hpo_dp_link_enc_count; i++) {
- if (pool->base.hpo_dp_link_enc[i] != NULL) {
- kfree(DCN3_1_HPO_DP_LINK_ENC_FROM_HPO_LINK_ENC(pool->base.hpo_dp_link_enc[i]));
- pool->base.hpo_dp_link_enc[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->base.res_cap->num_dsc; i++) {
- if (pool->base.dscs[i] != NULL)
- dcn20_dsc_destroy(&pool->base.dscs[i]);
- }
-
- if (pool->base.mpc != NULL) {
- kfree(TO_DCN20_MPC(pool->base.mpc));
- pool->base.mpc = NULL;
- }
- if (pool->base.hubbub != NULL) {
- kfree(pool->base.hubbub);
- pool->base.hubbub = NULL;
- }
- for (i = 0; i < pool->base.pipe_count; i++) {
- if (pool->base.dpps[i] != NULL)
- dcn31_dpp_destroy(&pool->base.dpps[i]);
-
- if (pool->base.ipps[i] != NULL)
- pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]);
-
- if (pool->base.hubps[i] != NULL) {
- kfree(TO_DCN20_HUBP(pool->base.hubps[i]));
- pool->base.hubps[i] = NULL;
- }
-
- if (pool->base.irqs != NULL) {
- dal_irq_service_destroy(&pool->base.irqs);
- }
- }
-
- for (i = 0; i < pool->base.res_cap->num_ddc; i++) {
- if (pool->base.engines[i] != NULL)
- dce110_engine_destroy(&pool->base.engines[i]);
- if (pool->base.hw_i2cs[i] != NULL) {
- kfree(pool->base.hw_i2cs[i]);
- pool->base.hw_i2cs[i] = NULL;
- }
- if (pool->base.sw_i2cs[i] != NULL) {
- kfree(pool->base.sw_i2cs[i]);
- pool->base.sw_i2cs[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->base.res_cap->num_opp; i++) {
- if (pool->base.opps[i] != NULL)
- pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]);
- }
-
- for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
- if (pool->base.timing_generators[i] != NULL) {
- kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i]));
- pool->base.timing_generators[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->base.res_cap->num_dwb; i++) {
- if (pool->base.dwbc[i] != NULL) {
- kfree(TO_DCN30_DWBC(pool->base.dwbc[i]));
- pool->base.dwbc[i] = NULL;
- }
- if (pool->base.mcif_wb[i] != NULL) {
- kfree(TO_DCN30_MMHUBBUB(pool->base.mcif_wb[i]));
- pool->base.mcif_wb[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->base.audio_count; i++) {
- if (pool->base.audios[i])
- dce_aud_destroy(&pool->base.audios[i]);
- }
-
- for (i = 0; i < pool->base.clk_src_count; i++) {
- if (pool->base.clock_sources[i] != NULL) {
- dcn20_clock_source_destroy(&pool->base.clock_sources[i]);
- pool->base.clock_sources[i] = NULL;
- }
- }
-
- for (i = 0; i < pool->base.res_cap->num_mpc_3dlut; i++) {
- if (pool->base.mpc_lut[i] != NULL) {
- dc_3dlut_func_release(pool->base.mpc_lut[i]);
- pool->base.mpc_lut[i] = NULL;
- }
- if (pool->base.mpc_shaper[i] != NULL) {
- dc_transfer_func_release(pool->base.mpc_shaper[i]);
- pool->base.mpc_shaper[i] = NULL;
- }
- }
-
- if (pool->base.dp_clock_source != NULL) {
- dcn20_clock_source_destroy(&pool->base.dp_clock_source);
- pool->base.dp_clock_source = NULL;
- }
-
- for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
- if (pool->base.multiple_abms[i] != NULL)
- dce_abm_destroy(&pool->base.multiple_abms[i]);
- }
-
- if (pool->base.psr != NULL)
- dmub_psr_destroy(&pool->base.psr);
-
- if (pool->base.dccg != NULL)
- dcn_dccg_destroy(&pool->base.dccg);
-}
-
-static struct hubp *dcn31_hubp_create(
- struct dc_context *ctx,
- uint32_t inst)
-{
- struct dcn20_hubp *hubp2 =
- kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL);
-
- if (!hubp2)
- return NULL;
-
- if (hubp31_construct(hubp2, ctx, inst,
- &hubp_regs[inst], &hubp_shift, &hubp_mask))
- return &hubp2->base;
-
- BREAK_TO_DEBUGGER();
- kfree(hubp2);
- return NULL;
-}
-
-static bool dcn31_dwbc_create(struct dc_context *ctx, struct resource_pool *pool)
-{
- int i;
- uint32_t pipe_count = pool->res_cap->num_dwb;
-
- for (i = 0; i < pipe_count; i++) {
- struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc),
- GFP_KERNEL);
-
- if (!dwbc30) {
- dm_error("DC: failed to create dwbc30!\n");
- return false;
- }
-
- dcn30_dwbc_construct(dwbc30, ctx,
- &dwbc30_regs[i],
- &dwbc30_shift,
- &dwbc30_mask,
- i);
-
- pool->dwbc[i] = &dwbc30->base;
- }
- return true;
-}
-
-static bool dcn31_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool)
-{
- int i;
- uint32_t pipe_count = pool->res_cap->num_dwb;
-
- for (i = 0; i < pipe_count; i++) {
- struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub),
- GFP_KERNEL);
-
- if (!mcif_wb30) {
- dm_error("DC: failed to create mcif_wb30!\n");
- return false;
- }
-
- dcn30_mmhubbub_construct(mcif_wb30, ctx,
- &mcif_wb30_regs[i],
- &mcif_wb30_shift,
- &mcif_wb30_mask,
- i);
-
- pool->mcif_wb[i] = &mcif_wb30->base;
- }
- return true;
-}
-
-static struct display_stream_compressor *dcn31_dsc_create(
- struct dc_context *ctx, uint32_t inst)
-{
- struct dcn20_dsc *dsc =
- kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL);
-
- if (!dsc) {
- BREAK_TO_DEBUGGER();
- return NULL;
- }
-
- dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask);
- return &dsc->base;
-}
-
-static void dcn31_destroy_resource_pool(struct resource_pool **pool)
-{
- struct dcn31_resource_pool *dcn31_pool = TO_DCN31_RES_POOL(*pool);
-
- dcn31_resource_destruct(dcn31_pool);
- kfree(dcn31_pool);
- *pool = NULL;
-}
-
-static struct clock_source *dcn31_clock_source_create(
- struct dc_context *ctx,
- struct dc_bios *bios,
- enum clock_source_id id,
- const struct dce110_clk_src_regs *regs,
- bool dp_clk_src)
-{
- struct dce110_clk_src *clk_src =
- kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL);
-
- if (!clk_src)
- return NULL;
-
- if (dcn3_clk_src_construct(clk_src, ctx, bios, id,
- regs, &cs_shift, &cs_mask)) {
- clk_src->base.dp_clk_src = dp_clk_src;
- return &clk_src->base;
- }
-
- BREAK_TO_DEBUGGER();
- return NULL;
-}
-
-static bool is_dual_plane(enum surface_pixel_format format)
-{
- return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA;
-}
-
-static int dcn31_populate_dml_pipes_from_context(
- struct dc *dc, struct dc_state *context,
- display_e2e_pipe_params_st *pipes,
- bool fast_validate)
-{
- int i, pipe_cnt;
- struct resource_context *res_ctx = &context->res_ctx;
- struct pipe_ctx *pipe;
-
- dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
-
- for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
- struct dc_crtc_timing *timing;
-
- if (!res_ctx->pipe_ctx[i].stream)
- continue;
- pipe = &res_ctx->pipe_ctx[i];
- timing = &pipe->stream->timing;
-
- /*
- * Immediate flip can be set dynamically after enabling the plane.
- * We need to require support for immediate flip or underflow can be
- * intermittently experienced depending on peak b/w requirements.
- */
- pipes[pipe_cnt].pipe.src.immediate_flip = true;
-
- pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
- pipes[pipe_cnt].pipe.src.gpuvm = true;
- pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
- pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
- pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
- pipes[pipe_cnt].pipe.src.dcc_rate = 3;
- pipes[pipe_cnt].dout.dsc_input_bpc = 0;
-
- if (pipes[pipe_cnt].dout.dsc_enable) {
- switch (timing->display_color_depth) {
- case COLOR_DEPTH_888:
- pipes[pipe_cnt].dout.dsc_input_bpc = 8;
- break;
- case COLOR_DEPTH_101010:
- pipes[pipe_cnt].dout.dsc_input_bpc = 10;
- break;
- case COLOR_DEPTH_121212:
- pipes[pipe_cnt].dout.dsc_input_bpc = 12;
- break;
- default:
- ASSERT(0);
- break;
- }
- }
-
- pipe_cnt++;
- }
- context->bw_ctx.dml.ip.det_buffer_size_kbytes = DCN3_1_DEFAULT_DET_SIZE;
- dc->config.enable_4to1MPC = false;
- if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) {
- if (is_dual_plane(pipe->plane_state->format)
- && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) {
- dc->config.enable_4to1MPC = true;
- } else if (!is_dual_plane(pipe->plane_state->format)) {
- context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
- pipes[0].pipe.src.unbounded_req_mode = true;
- }
- }
-
- return pipe_cnt;
-}
-
-void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context)
-{
- if (dc->clk_mgr->bw_params->wm_table.entries[WM_A].valid) {
- context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].pstate_latency_us;
- context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_enter_plus_exit_time_us;
- context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_exit_time_us;
- }
-}
-
-static void dcn31_calculate_wm_and_dlg_fp(
- struct dc *dc, struct dc_state *context,
- display_e2e_pipe_params_st *pipes,
- int pipe_cnt,
- int vlevel)
-{
- int i, pipe_idx;
- double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
-
- if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk)
- dcfclk = context->bw_ctx.dml.soc.min_dcfclk;
-
- /* We don't recalculate clocks for 0 pipe configs, which can block
- * S0i3 as high clocks will block low power states
- * Override any clocks that can block S0i3 to min here
- */
- if (pipe_cnt == 0) {
- context->bw_ctx.bw.dcn.clk.dcfclk_khz = dcfclk; // always should be vlevel 0
- return;
- }
-
- pipes[0].clks_cfg.voltage = vlevel;
- pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
- pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz;
-
-#if 0 // TODO
- /* Set B:
- * TODO
- */
- if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) {
- if (vlevel == 0) {
- pipes[0].clks_cfg.voltage = 1;
- pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz;
- }
- context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us;
- context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us;
- context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us;
- }
- context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-
- pipes[0].clks_cfg.voltage = vlevel;
- pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
-
- /* Set C:
- * TODO
- */
- if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) {
- context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us;
- context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us;
- context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us;
- }
- context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-
- /* Set D:
- * TODO
- */
- if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) {
- context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us;
- context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us;
- context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us;
- }
- context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-#endif
-
- /* Set A:
- * All clocks min required
- *
- * Set A calculated last so that following calculations are based on Set A
- */
- dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
- context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- /* TODO: remove: */
- context->bw_ctx.bw.dcn.watermarks.b = context->bw_ctx.bw.dcn.watermarks.a;
- context->bw_ctx.bw.dcn.watermarks.c = context->bw_ctx.bw.dcn.watermarks.a;
- context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a;
- /* end remove*/
-
- for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
- if (!context->res_ctx.pipe_ctx[i].stream)
- continue;
-
- pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt);
- pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
-
- if (dc->config.forced_clocks) {
- pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz;
- pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz;
- }
- if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000)
- pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0;
- if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
- pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0;
-
- pipe_idx++;
- }
-
- dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
-}
-
-void dcn31_calculate_wm_and_dlg(
- struct dc *dc, struct dc_state *context,
- display_e2e_pipe_params_st *pipes,
- int pipe_cnt,
- int vlevel)
-{
- DC_FP_START();
- dcn31_calculate_wm_and_dlg_fp(dc, context, pipes, pipe_cnt, vlevel);
- DC_FP_END();
-}
-
-bool dcn31_validate_bandwidth(struct dc *dc,
- struct dc_state *context,
- bool fast_validate)
-{
- bool out = false;
-
- BW_VAL_TRACE_SETUP();
-
- int vlevel = 0;
- int pipe_cnt = 0;
- display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
- DC_LOGGER_INIT(dc->ctx->logger);
-
- BW_VAL_TRACE_COUNT();
-
- out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate);
-
- // Disable fast_validate to set min dcfclk in alculate_wm_and_dlg
- if (pipe_cnt == 0)
- fast_validate = false;
-
- if (!out)
- goto validate_fail;
-
- BW_VAL_TRACE_END_VOLTAGE_LEVEL();
-
- if (fast_validate) {
- BW_VAL_TRACE_SKIP(fast);
- goto validate_out;
- }
-
- dc->res_pool->funcs->calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel);
-
- BW_VAL_TRACE_END_WATERMARKS();
-
- goto validate_out;
-
-validate_fail:
- DC_LOG_WARNING("Mode Validation Warning: %s failed validation.\n",
- dml_get_status_message(context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states]));
-
- BW_VAL_TRACE_SKIP(fail);
- out = false;
-
-validate_out:
- kfree(pipes);
-
- BW_VAL_TRACE_FINISH();
-
- return out;
-}
-
-static struct dc_cap_funcs cap_funcs = {
- .get_dcc_compression_cap = dcn20_get_dcc_compression_cap
-};
-
-static void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
-{
- struct clk_limit_table *clk_table = &bw_params->clk_table;
- struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES];
- unsigned int i, closest_clk_lvl;
- int j;
-
- // Default clock levels are used for diags, which may lead to overclocking.
- if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
- int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
-
- dcn3_1_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator;
- dcn3_1_ip.max_num_dpp = dc->res_pool->pipe_count;
- dcn3_1_soc.num_chans = bw_params->num_channels;
-
- ASSERT(clk_table->num_entries);
-
- /* Prepass to find max clocks independent of voltage level. */
- for (i = 0; i < clk_table->num_entries; ++i) {
- if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
- max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
- if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
- max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
- }
-
- for (i = 0; i < clk_table->num_entries; i++) {
- /* loop backwards*/
- for (closest_clk_lvl = 0, j = dcn3_1_soc.num_states - 1; j >= 0; j--) {
- if ((unsigned int) dcn3_1_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) {
- closest_clk_lvl = j;
- break;
- }
- }
-
- clock_limits[i].state = i;
-
- /* Clocks dependent on voltage level. */
- clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
- clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
- clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
- clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio;
-
- /* Clocks independent of voltage level. */
- clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
- dcn3_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
-
- clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
- dcn3_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
-
- clock_limits[i].dram_bw_per_chan_gbps = dcn3_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
- clock_limits[i].dscclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
- clock_limits[i].dtbclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
- clock_limits[i].phyclk_d18_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
- clock_limits[i].phyclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
- }
- for (i = 0; i < clk_table->num_entries; i++)
- dcn3_1_soc.clock_limits[i] = clock_limits[i];
- if (clk_table->num_entries) {
- dcn3_1_soc.num_states = clk_table->num_entries;
- }
- }
-
- dcn3_1_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
- dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
-
- if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment))
- dml_init_instance(&dc->dml, &dcn3_1_soc, &dcn3_1_ip, DML_PROJECT_DCN31);
- else
- dml_init_instance(&dc->dml, &dcn3_1_soc, &dcn3_1_ip, DML_PROJECT_DCN31_FPGA);
-}
-
-static struct resource_funcs dcn31_res_pool_funcs = {
- .destroy = dcn31_destroy_resource_pool,
- .link_enc_create = dcn31_link_encoder_create,
- .link_enc_create_minimal = dcn31_link_enc_create_minimal,
- .link_encs_assign = link_enc_cfg_link_encs_assign,
- .link_enc_unassign = link_enc_cfg_link_enc_unassign,
- .panel_cntl_create = dcn31_panel_cntl_create,
- .validate_bandwidth = dcn31_validate_bandwidth,
- .calculate_wm_and_dlg = dcn31_calculate_wm_and_dlg,
- .update_soc_for_wm_a = dcn31_update_soc_for_wm_a,
- .populate_dml_pipes = dcn31_populate_dml_pipes_from_context,
- .acquire_idle_pipe_for_layer = dcn20_acquire_idle_pipe_for_layer,
- .add_stream_to_ctx = dcn30_add_stream_to_ctx,
- .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource,
- .remove_stream_from_ctx = dcn20_remove_stream_from_ctx,
- .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context,
- .set_mcif_arb_params = dcn30_set_mcif_arb_params,
- .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link,
- .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut,
- .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut,
- .update_bw_bounding_box = dcn31_update_bw_bounding_box,
- .patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
-};
-
-static struct clock_source *dcn30_clock_source_create(
- struct dc_context *ctx,
- struct dc_bios *bios,
- enum clock_source_id id,
- const struct dce110_clk_src_regs *regs,
- bool dp_clk_src)
-{
- struct dce110_clk_src *clk_src =
- kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL);
-
- if (!clk_src)
- return NULL;
-
- if (dcn3_clk_src_construct(clk_src, ctx, bios, id,
- regs, &cs_shift, &cs_mask)) {
- clk_src->base.dp_clk_src = dp_clk_src;
- return &clk_src->base;
- }
-
- BREAK_TO_DEBUGGER();
- return NULL;
-}
-
-static bool dcn31_resource_construct(
- uint8_t num_virtual_links,
- struct dc *dc,
- struct dcn31_resource_pool *pool)
-{
- int i;
- struct dc_context *ctx = dc->ctx;
- struct irq_service_init_data init_data;
-
- DC_FP_START();
-
- ctx->dc_bios->regs = &bios_regs;
-
- pool->base.res_cap = &res_cap_dcn31;
-
- pool->base.funcs = &dcn31_res_pool_funcs;
-
- /*************************************************
- * Resource + asic cap harcoding *
- *************************************************/
- pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE;
- pool->base.pipe_count = pool->base.res_cap->num_timing_generator;
- pool->base.mpcc_count = pool->base.res_cap->num_timing_generator;
- dc->caps.max_downscale_ratio = 600;
- dc->caps.i2c_speed_in_khz = 100;
- dc->caps.i2c_speed_in_khz_hdcp = 5; /*1.4 w/a applied by default*/
- dc->caps.max_cursor_size = 256;
- dc->caps.min_horizontal_blanking_period = 80;
- dc->caps.dmdata_alloc_size = 2048;
-
- dc->caps.max_slave_planes = 1;
- dc->caps.max_slave_yuv_planes = 1;
- dc->caps.max_slave_rgb_planes = 1;
- dc->caps.post_blend_color_processing = true;
- dc->caps.force_dp_tps4_for_cp2520 = true;
- dc->caps.dp_hpo = true;
- dc->caps.extended_aux_timeout_support = true;
- dc->caps.dmcub_support = true;
- dc->caps.is_apu = true;
-
- /* Color pipeline capabilities */
- dc->caps.color.dpp.dcn_arch = 1;
- dc->caps.color.dpp.input_lut_shared = 0;
- dc->caps.color.dpp.icsc = 1;
- dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr
- dc->caps.color.dpp.dgam_rom_caps.srgb = 1;
- dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1;
- dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1;
- dc->caps.color.dpp.dgam_rom_caps.pq = 1;
- dc->caps.color.dpp.dgam_rom_caps.hlg = 1;
- dc->caps.color.dpp.post_csc = 1;
- dc->caps.color.dpp.gamma_corr = 1;
- dc->caps.color.dpp.dgam_rom_for_yuv = 0;
-
- dc->caps.color.dpp.hw_3d_lut = 1;
- dc->caps.color.dpp.ogam_ram = 1;
- // no OGAM ROM on DCN301
- dc->caps.color.dpp.ogam_rom_caps.srgb = 0;
- dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0;
- dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0;
- dc->caps.color.dpp.ogam_rom_caps.pq = 0;
- dc->caps.color.dpp.ogam_rom_caps.hlg = 0;
- dc->caps.color.dpp.ocsc = 0;
-
- dc->caps.color.mpc.gamut_remap = 1;
- dc->caps.color.mpc.num_3dluts = pool->base.res_cap->num_mpc_3dlut; //2
- dc->caps.color.mpc.ogam_ram = 1;
- dc->caps.color.mpc.ogam_rom_caps.srgb = 0;
- dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0;
- dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0;
- dc->caps.color.mpc.ogam_rom_caps.pq = 0;
- dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
- dc->caps.color.mpc.ocsc = 1;
-
- /* read VBIOS LTTPR caps */
- {
- if (ctx->dc_bios->funcs->get_lttpr_caps) {
- enum bp_result bp_query_result;
- uint8_t is_vbios_lttpr_enable = 0;
-
- bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable);
- dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable;
- }
-
- /* interop bit is implicit */
- {
- dc->caps.vbios_lttpr_aware = true;
- }
- }
-
- if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV)
- dc->debug = debug_defaults_drv;
- else if (dc->ctx->dce_environment == DCE_ENV_FPGA_MAXIMUS) {
- dc->debug = debug_defaults_diags;
- } else
- dc->debug = debug_defaults_diags;
- // Init the vm_helper
- if (dc->vm_helper)
- vm_helper_init(dc->vm_helper, 16);
-
- /*************************************************
- * Create resources *
- *************************************************/
-
- /* Clock Sources for Pixel Clock*/
- pool->base.clock_sources[DCN31_CLK_SRC_PLL0] =
- dcn30_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL0,
- &clk_src_regs[0], false);
- pool->base.clock_sources[DCN31_CLK_SRC_PLL1] =
- dcn30_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL1,
- &clk_src_regs[1], false);
- pool->base.clock_sources[DCN31_CLK_SRC_PLL2] =
- dcn30_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL2,
- &clk_src_regs[2], false);
- pool->base.clock_sources[DCN31_CLK_SRC_PLL3] =
- dcn30_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL3,
- &clk_src_regs[3], false);
- pool->base.clock_sources[DCN31_CLK_SRC_PLL4] =
- dcn30_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL4,
- &clk_src_regs[4], false);
-
- pool->base.clk_src_count = DCN30_CLK_SRC_TOTAL;
-
- /* todo: not reuse phy_pll registers */
- pool->base.dp_clock_source =
- dcn31_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_ID_DP_DTO,
- &clk_src_regs[0], true);
-
- for (i = 0; i < pool->base.clk_src_count; i++) {
- if (pool->base.clock_sources[i] == NULL) {
- dm_error("DC: failed to create clock sources!\n");
- BREAK_TO_DEBUGGER();
- goto create_fail;
- }
- }
-
- /* TODO: DCCG */
- pool->base.dccg = dccg31_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask);
- if (pool->base.dccg == NULL) {
- dm_error("DC: failed to create dccg!\n");
- BREAK_TO_DEBUGGER();
- goto create_fail;
- }
-
- /* TODO: IRQ */
- init_data.ctx = dc->ctx;
- pool->base.irqs = dal_irq_service_dcn31_create(&init_data);
- if (!pool->base.irqs)
- goto create_fail;
-
- /* HUBBUB */
- pool->base.hubbub = dcn31_hubbub_create(ctx);
- if (pool->base.hubbub == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create hubbub!\n");
- goto create_fail;
- }
-
- /* HUBPs, DPPs, OPPs and TGs */
- for (i = 0; i < pool->base.pipe_count; i++) {
- pool->base.hubps[i] = dcn31_hubp_create(ctx, i);
- if (pool->base.hubps[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error(
- "DC: failed to create hubps!\n");
- goto create_fail;
- }
-
- pool->base.dpps[i] = dcn31_dpp_create(ctx, i);
- if (pool->base.dpps[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error(
- "DC: failed to create dpps!\n");
- goto create_fail;
- }
- }
-
- for (i = 0; i < pool->base.res_cap->num_opp; i++) {
- pool->base.opps[i] = dcn31_opp_create(ctx, i);
- if (pool->base.opps[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error(
- "DC: failed to create output pixel processor!\n");
- goto create_fail;
- }
- }
-
- for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
- pool->base.timing_generators[i] = dcn31_timing_generator_create(
- ctx, i);
- if (pool->base.timing_generators[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create tg!\n");
- goto create_fail;
- }
- }
- pool->base.timing_generator_count = i;
-
- /* PSR */
- pool->base.psr = dmub_psr_create(ctx);
- if (pool->base.psr == NULL) {
- dm_error("DC: failed to create psr obj!\n");
- BREAK_TO_DEBUGGER();
- goto create_fail;
- }
-
- /* ABM */
- for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
- pool->base.multiple_abms[i] = dmub_abm_create(ctx,
- &abm_regs[i],
- &abm_shift,
- &abm_mask);
- if (pool->base.multiple_abms[i] == NULL) {
- dm_error("DC: failed to create abm for pipe %d!\n", i);
- BREAK_TO_DEBUGGER();
- goto create_fail;
- }
- }
-
- /* MPC and DSC */
- pool->base.mpc = dcn31_mpc_create(ctx, pool->base.mpcc_count, pool->base.res_cap->num_mpc_3dlut);
- if (pool->base.mpc == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create mpc!\n");
- goto create_fail;
- }
-
- for (i = 0; i < pool->base.res_cap->num_dsc; i++) {
- pool->base.dscs[i] = dcn31_dsc_create(ctx, i);
- if (pool->base.dscs[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create display stream compressor %d!\n", i);
- goto create_fail;
- }
- }
-
- /* DWB and MMHUBBUB */
- if (!dcn31_dwbc_create(ctx, &pool->base)) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create dwbc!\n");
- goto create_fail;
- }
-
- if (!dcn31_mmhubbub_create(ctx, &pool->base)) {
- BREAK_TO_DEBUGGER();
- dm_error("DC: failed to create mcif_wb!\n");
- goto create_fail;
- }
-
- /* AUX and I2C */
- for (i = 0; i < pool->base.res_cap->num_ddc; i++) {
- pool->base.engines[i] = dcn31_aux_engine_create(ctx, i);
- if (pool->base.engines[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error(
- "DC:failed to create aux engine!!\n");
- goto create_fail;
- }
- pool->base.hw_i2cs[i] = dcn31_i2c_hw_create(ctx, i);
- if (pool->base.hw_i2cs[i] == NULL) {
- BREAK_TO_DEBUGGER();
- dm_error(
- "DC:failed to create hw i2c!!\n");
- goto create_fail;
- }
- pool->base.sw_i2cs[i] = NULL;
- }
-
- if (dc->ctx->asic_id.chip_family == FAMILY_YELLOW_CARP &&
- dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0 &&
- !dc->debug.dpia_debug.bits.disable_dpia) {
- /* YELLOW CARP B0 has 4 DPIA's */
- pool->base.usb4_dpia_count = 4;
- }
-
- /* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */
- if (!resource_construct(num_virtual_links, dc, &pool->base,
- (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment) ?
- &res_create_funcs : &res_create_maximus_funcs)))
- goto create_fail;
-
- /* HW Sequencer and Plane caps */
- dcn31_hw_sequencer_construct(dc);
-
- dc->caps.max_planes = pool->base.pipe_count;
-
- for (i = 0; i < dc->caps.max_planes; ++i)
- dc->caps.planes[i] = plane_cap;
-
- dc->cap_funcs = cap_funcs;
-
- dc->dcn_ip->max_num_dpp = dcn3_1_ip.max_num_dpp;
-
- DC_FP_END();
-
- return true;
-
-create_fail:
-
- DC_FP_END();
- dcn31_resource_destruct(pool);
-
- return false;
-}
-
-struct resource_pool *dcn31_create_resource_pool(
- const struct dc_init_data *init_data,
- struct dc *dc)
-{
- struct dcn31_resource_pool *pool =
- kzalloc(sizeof(struct dcn31_resource_pool), GFP_KERNEL);
-
- if (!pool)
- return NULL;
-
- if (dcn31_resource_construct(init_data->num_virtual_links, dc, pool))
- return &pool->base;
-
- BREAK_TO_DEBUGGER();
- kfree(pool);
- return NULL;
-}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h
deleted file mode 100644
index 416fe7a721d8..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright 2020 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#ifndef _DCN31_RESOURCE_H_
-#define _DCN31_RESOURCE_H_
-
-#include "core_types.h"
-
-#define TO_DCN31_RES_POOL(pool)\
- container_of(pool, struct dcn31_resource_pool, base)
-
-struct dcn31_resource_pool {
- struct resource_pool base;
-};
-
-bool dcn31_validate_bandwidth(struct dc *dc,
- struct dc_state *context,
- bool fast_validate);
-void dcn31_calculate_wm_and_dlg(
- struct dc *dc, struct dc_state *context,
- display_e2e_pipe_params_st *pipes,
- int pipe_cnt,
- int vlevel);
-void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context);
-
-struct resource_pool *dcn31_create_resource_pool(
- const struct dc_init_data *init_data,
- struct dc *dc);
-
-#endif /* _DCN31_RESOURCE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_vpg.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_vpg.c
index f1deb1c3c363..cfb923d85630 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_vpg.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_vpg.c
@@ -63,7 +63,12 @@ void vpg31_poweron(struct vpg *vpg)
{
struct dcn31_vpg *vpg31 = DCN31_VPG_FROM_VPG(vpg);
- if (vpg->ctx->dc->debug.enable_mem_low_power.bits.vpg == false)
+ uint32_t vpg_gsp_mem_pwr_state;
+
+ REG_GET(VPG_MEM_PWR, VPG_GSP_MEM_PWR_STATE, &vpg_gsp_mem_pwr_state);
+
+ if (vpg->ctx->dc->debug.enable_mem_low_power.bits.vpg == false &&
+ vpg_gsp_mem_pwr_state == 0)
return;
REG_UPDATE_2(VPG_MEM_PWR, VPG_GSP_MEM_LIGHT_SLEEP_DIS, 1, VPG_GSP_LIGHT_SLEEP_FORCE, 0);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_vpg.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_vpg.h
index 0e76eabce441..609e58dbd056 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_vpg.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_vpg.h
@@ -26,6 +26,7 @@
#ifndef __DAL_DCN31_VPG_H__
#define __DAL_DCN31_VPG_H__
+#include "vpg.h"
#define DCN31_VPG_FROM_VPG(vpg)\
container_of(vpg, struct dcn31_vpg, base)
diff --git a/drivers/gpu/drm/amd/display/dc/dio/Makefile b/drivers/gpu/drm/amd/display/dc/dio/Makefile
new file mode 100644
index 000000000000..0dfd480976f7
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dio/Makefile
@@ -0,0 +1,117 @@
+#
+# Copyright 2020 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# Authors: AMD
+#
+#
+
+ifdef CONFIG_DRM_AMD_DC_FP
+###############################################################################
+# DCN10
+###############################################################################
+DIO_DCN10 = dcn10_link_encoder.o dcn10_stream_encoder.o
+
+AMD_DAL_DIO_DCN10 = $(addprefix $(AMDDALPATH)/dc/dio/dcn10/,$(DIO_DCN10))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN10)
+
+###############################################################################
+# DCN20
+###############################################################################
+DIO_DCN20 = dcn20_link_encoder.o dcn20_stream_encoder.o
+
+AMD_DAL_DIO_DCN20 = $(addprefix $(AMDDALPATH)/dc/dio/dcn20/,$(DIO_DCN20))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN20)
+
+###############################################################################
+# DCN30
+###############################################################################
+DIO_DCN30 = dcn30_dio_link_encoder.o dcn30_dio_stream_encoder.o
+
+AMD_DAL_DIO_DCN30 = $(addprefix $(AMDDALPATH)/dc/dio/dcn30/,$(DIO_DCN30))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN30)
+
+###############################################################################
+# DCN301
+###############################################################################
+DIO_DCN301 = dcn301_dio_link_encoder.o
+
+AMD_DAL_DIO_DCN301 = $(addprefix $(AMDDALPATH)/dc/dio/dcn301/,$(DIO_DCN301))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN301)
+
+###############################################################################
+# DCN31
+###############################################################################
+DIO_DCN31 = dcn31_dio_link_encoder.o
+
+AMD_DAL_DIO_DCN31 = $(addprefix $(AMDDALPATH)/dc/dio/dcn31/,$(DIO_DCN31))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN31)
+
+###############################################################################
+# DCN314
+###############################################################################
+DIO_DCN314 = dcn314_dio_stream_encoder.o
+
+AMD_DAL_DIO_DCN314 = $(addprefix $(AMDDALPATH)/dc/dio/dcn314/,$(DIO_DCN314))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN314)
+
+###############################################################################
+# DCN32
+###############################################################################
+DIO_DCN32 = dcn32_dio_link_encoder.o dcn32_dio_stream_encoder.o
+
+AMD_DAL_DIO_DCN32 = $(addprefix $(AMDDALPATH)/dc/dio/dcn32/,$(DIO_DCN32))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN32)
+
+###############################################################################
+# DCN35
+###############################################################################
+DIO_DCN35 = dcn35_dio_link_encoder.o dcn35_dio_stream_encoder.o
+
+AMD_DAL_DIO_DCN35 = $(addprefix $(AMDDALPATH)/dc/dio/dcn35/,$(DIO_DCN35))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN35)
+
+###############################################################################
+# DCN321
+###############################################################################
+DIO_DCN321 = dcn321_dio_link_encoder.o
+
+AMD_DAL_DIO_DCN321 = $(addprefix $(AMDDALPATH)/dc/dio/dcn321/,$(DIO_DCN321))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN321)
+
+
+###############################################################################
+# DCN401
+###############################################################################
+DIO_DCN401 = dcn401_dio_link_encoder.o dcn401_dio_stream_encoder.o
+
+AMD_DAL_DIO_DCN401 = $(addprefix $(AMDDALPATH)/dc/dio/dcn401/,$(DIO_DCN401))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN401)
+endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_link_encoder.c
index 2dc4b4e4ba02..1c1228116487 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_link_encoder.c
@@ -23,16 +23,12 @@
*
*/
-#include <linux/delay.h>
-#include <linux/slab.h>
-
#include "reg_helper.h"
#include "core_types.h"
#include "link_encoder.h"
#include "dcn10_link_encoder.h"
#include "stream_encoder.h"
-#include "i2caux_interface.h"
#include "dc_bios_types.h"
#include "gpio_service_interface.h"
@@ -646,8 +642,9 @@ static bool dcn10_link_encoder_validate_hdmi_output(
crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR420)
return false;
- if (!enc10->base.features.flags.bits.HDMI_6GB_EN &&
- adjusted_pix_clk_100hz >= 3000000)
+ if ((!enc10->base.features.flags.bits.HDMI_6GB_EN ||
+ enc10->base.ctx->dc->debug.hdmi20_disable) &&
+ adjusted_pix_clk_100hz >= 3000000)
return false;
if (enc10->base.ctx->dc->debug.hdmi20_disable &&
crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR420)
@@ -815,7 +812,7 @@ bool dcn10_link_encoder_validate_output_with_stream(
enc10, &stream->timing);
break;
case SIGNAL_TYPE_EDP:
- is_valid = (stream->timing.pixel_encoding == PIXEL_ENCODING_RGB) ? true : false;
+ is_valid = stream->timing.pixel_encoding == PIXEL_ENCODING_RGB;
break;
case SIGNAL_TYPE_VIRTUAL:
is_valid = true;
@@ -1059,7 +1056,7 @@ void dcn10_link_encoder_disable_output(
struct bp_transmitter_control cntl = { 0 };
enum bp_result result;
- if (!dcn10_is_dig_enabled(enc)) {
+ if (enc->funcs->is_dig_enabled && !enc->funcs->is_dig_enabled(enc)) {
/* OF_SKIP_POWER_DOWN_INACTIVE_ENCODER */
/*in DP_Alt_No_Connect case, we turn off the dig already,
after excuation the PHY w/a sequence, not allow touch PHY any more*/
@@ -1100,12 +1097,14 @@ void dcn10_link_encoder_disable_output(
void dcn10_link_encoder_dp_set_lane_settings(
struct link_encoder *enc,
- const struct link_training_settings *link_settings)
+ const struct dc_link_settings *link_settings,
+ const struct dc_lane_settings lane_settings[LANE_COUNT_DP_MAX])
{
struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
union dpcd_training_lane_set training_lane_set = { { 0 } };
int32_t lane = 0;
struct bp_transmitter_control cntl = { 0 };
+ enum bp_result result;
if (!link_settings) {
BREAK_TO_DEBUGGER();
@@ -1115,33 +1114,37 @@ void dcn10_link_encoder_dp_set_lane_settings(
cntl.action = TRANSMITTER_CONTROL_SET_VOLTAGE_AND_PREEMPASIS;
cntl.transmitter = enc10->base.transmitter;
cntl.connector_obj_id = enc10->base.connector;
- cntl.lanes_number = link_settings->link_settings.lane_count;
+ cntl.lanes_number = link_settings->lane_count;
cntl.hpd_sel = enc10->base.hpd_source;
- cntl.pixel_clock = link_settings->link_settings.link_rate *
- LINK_RATE_REF_FREQ_IN_KHZ;
+ cntl.pixel_clock = link_settings->link_rate * LINK_RATE_REF_FREQ_IN_KHZ;
- for (lane = 0; lane < link_settings->link_settings.lane_count; lane++) {
+ for (lane = 0; lane < link_settings->lane_count; lane++) {
/* translate lane settings */
training_lane_set.bits.VOLTAGE_SWING_SET =
- link_settings->lane_settings[lane].VOLTAGE_SWING;
+ lane_settings[lane].VOLTAGE_SWING;
training_lane_set.bits.PRE_EMPHASIS_SET =
- link_settings->lane_settings[lane].PRE_EMPHASIS;
+ lane_settings[lane].PRE_EMPHASIS;
/* post cursor 2 setting only applies to HBR2 link rate */
- if (link_settings->link_settings.link_rate == LINK_RATE_HIGH2) {
+ if (link_settings->link_rate == LINK_RATE_HIGH2) {
/* this is passed to VBIOS
* to program post cursor 2 level
*/
training_lane_set.bits.POST_CURSOR2_SET =
- link_settings->lane_settings[lane].POST_CURSOR2;
+ lane_settings[lane].POST_CURSOR2;
}
cntl.lane_select = lane;
cntl.lane_settings = training_lane_set.raw;
/* call VBIOS table to set voltage swing and pre-emphasis */
- link_transmitter_control(enc10, &cntl);
+ result = link_transmitter_control(enc10, &cntl);
+
+ if (result != BP_RESULT_OK) {
+ DC_LOG_ERROR("%s: Failed to execute VBIOS command table!\n", __func__);
+ BREAK_TO_DEBUGGER();
+ }
}
}
@@ -1222,7 +1225,6 @@ void dcn10_link_encoder_update_mst_stream_allocation_table(
const struct link_mst_stream_allocation_table *table)
{
struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
- uint32_t value0 = 0;
uint32_t value1 = 0;
uint32_t value2 = 0;
uint32_t slots = 0;
@@ -1324,7 +1326,7 @@ void dcn10_link_encoder_update_mst_stream_allocation_table(
do {
udelay(10);
- value0 = REG_READ(DP_MSE_SAT_UPDATE);
+ REG_READ(DP_MSE_SAT_UPDATE);
REG_GET(DP_MSE_SAT_UPDATE,
DP_MSE_SAT_UPDATE, &value1);
@@ -1443,7 +1445,6 @@ enum signal_type dcn10_get_dig_mode(
default:
return SIGNAL_TYPE_NONE;
}
- return SIGNAL_TYPE_NONE;
}
void dcn10_link_encoder_get_max_link_cap(struct link_encoder *enc,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_link_encoder.h
index c337588231ff..b7a89c39f445 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_link_encoder.h
@@ -167,6 +167,8 @@ struct dcn10_link_enc_registers {
uint32_t DIO_LINKD_CNTL;
uint32_t DIO_LINKE_CNTL;
uint32_t DIO_LINKF_CNTL;
+ uint32_t DIO_CLK_CNTL;
+ uint32_t DIG_BE_CLK_CNTL;
};
#define LE_SF(reg_name, field_name, post_fix)\
@@ -472,11 +474,41 @@ struct dcn10_link_enc_registers {
type HPO_DP_ENC_SEL;\
type HPO_HDMI_ENC_SEL
+#define DCN35_LINK_ENCODER_REG_FIELD_LIST(type) \
+ type DIG_BE_ENABLE;\
+ type DIG_RB_SWITCH_EN;\
+ type DIG_BE_MODE;\
+ type DIG_BE_CLK_EN;\
+ type DIG_BE_SOFT_RESET;\
+ type HDCP_SOFT_RESET;\
+ type DIG_BE_SYMCLK_G_CLOCK_ON;\
+ type DIG_BE_SYMCLK_G_HDCP_CLOCK_ON;\
+ type DIG_BE_SYMCLK_G_TMDS_CLOCK_ON;\
+ type DISPCLK_R_GATE_DIS;\
+ type DISPCLK_G_GATE_DIS;\
+ type REFCLK_R_GATE_DIS;\
+ type REFCLK_G_GATE_DIS;\
+ type SOCCLK_G_GATE_DIS;\
+ type SYMCLK_FE_R_GATE_DIS;\
+ type SYMCLK_FE_G_GATE_DIS;\
+ type SYMCLK_R_GATE_DIS;\
+ type SYMCLK_G_GATE_DIS;\
+ type DIO_FGCG_REP_DIS;\
+ type DISPCLK_G_HDCP_GATE_DIS;\
+ type SYMCLKA_G_HDCP_GATE_DIS;\
+ type SYMCLKB_G_HDCP_GATE_DIS;\
+ type SYMCLKC_G_HDCP_GATE_DIS;\
+ type SYMCLKD_G_HDCP_GATE_DIS;\
+ type SYMCLKE_G_HDCP_GATE_DIS;\
+ type SYMCLKF_G_HDCP_GATE_DIS;\
+ type SYMCLKG_G_HDCP_GATE_DIS
+
struct dcn10_link_enc_shift {
DCN_LINK_ENCODER_REG_FIELD_LIST(uint8_t);
DCN20_LINK_ENCODER_REG_FIELD_LIST(uint8_t);
DCN30_LINK_ENCODER_REG_FIELD_LIST(uint8_t);
DCN31_LINK_ENCODER_REG_FIELD_LIST(uint8_t);
+ DCN35_LINK_ENCODER_REG_FIELD_LIST(uint8_t);
};
struct dcn10_link_enc_mask {
@@ -484,6 +516,7 @@ struct dcn10_link_enc_mask {
DCN20_LINK_ENCODER_REG_FIELD_LIST(uint32_t);
DCN30_LINK_ENCODER_REG_FIELD_LIST(uint32_t);
DCN31_LINK_ENCODER_REG_FIELD_LIST(uint32_t);
+ DCN35_LINK_ENCODER_REG_FIELD_LIST(uint32_t);
};
struct dcn10_link_encoder {
@@ -581,7 +614,8 @@ void dcn10_link_encoder_disable_output(
/* set DP lane settings */
void dcn10_link_encoder_dp_set_lane_settings(
struct link_encoder *enc,
- const struct link_training_settings *link_settings);
+ const struct dc_link_settings *link_settings,
+ const struct dc_lane_settings lane_settings[LANE_COUNT_DP_MAX]);
void dcn10_link_encoder_dp_set_phy_pattern(
struct link_encoder *enc,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c
index b0c08ee6bc2c..d928b4dcf6b8 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c
@@ -23,20 +23,18 @@
*
*/
-#include <linux/delay.h>
-
+#include "dm_services.h"
#include "dc_bios_types.h"
#include "dcn10_stream_encoder.h"
#include "reg_helper.h"
#include "hw_shared.h"
-#include "inc/link_dpcd.h"
+#include "link_service.h"
#include "dpcd_defs.h"
#include "dcn30/dcn30_afmt.h"
#define DC_LOGGER \
enc1->base.ctx->logger
-
#define REG(reg)\
(enc1->regs->reg)
@@ -61,7 +59,6 @@ void enc1_update_generic_info_packet(
uint32_t packet_index,
const struct dc_info_packet *info_packet)
{
- uint32_t regval;
/* TODOFPGA Figure out a proper number for max_retries polling for lock
* use 50 for now.
*/
@@ -90,7 +87,6 @@ void enc1_update_generic_info_packet(
REG_UPDATE(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT_CLR, 1);
/* choose which generic packet to use */
- regval = REG_READ(AFMT_VBI_PACKET_CONTROL);
REG_UPDATE(AFMT_VBI_PACKET_CONTROL,
AFMT_GENERIC_INDEX, packet_index);
@@ -259,10 +255,7 @@ void enc1_stream_encoder_dp_set_stream_attribute(
uint32_t misc1 = 0;
uint32_t h_blank;
uint32_t h_back_porch;
- uint8_t synchronous_clock = 0; /* asynchronous mode */
uint8_t colorimetry_bpc;
- uint8_t dynamic_range_rgb = 0; /*full range*/
- uint8_t dynamic_range_ycbcr = 1; /*bt709*/
uint8_t dp_pixel_encoding = 0;
uint8_t dp_component_depth = 0;
@@ -368,24 +361,20 @@ void enc1_stream_encoder_dp_set_stream_attribute(
break;
}
- misc0 = misc0 | synchronous_clock;
misc0 = colorimetry_bpc << 5;
switch (output_color_space) {
case COLOR_SPACE_SRGB:
misc1 = misc1 & ~0x80; /* bit7 = 0*/
- dynamic_range_rgb = 0; /*full range*/
break;
case COLOR_SPACE_SRGB_LIMITED:
misc0 = misc0 | 0x8; /* bit3=1 */
misc1 = misc1 & ~0x80; /* bit7 = 0*/
- dynamic_range_rgb = 1; /*limited range*/
break;
case COLOR_SPACE_YCBCR601:
case COLOR_SPACE_YCBCR601_LIMITED:
misc0 = misc0 | 0x8; /* bit3=1, bit4=0 */
misc1 = misc1 & ~0x80; /* bit7 = 0*/
- dynamic_range_ycbcr = 0; /*bt601*/
if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR422)
misc0 = misc0 | 0x2; /* bit2=0, bit1=1 */
else if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR444)
@@ -395,17 +384,14 @@ void enc1_stream_encoder_dp_set_stream_attribute(
case COLOR_SPACE_YCBCR709_LIMITED:
misc0 = misc0 | 0x18; /* bit3=1, bit4=1 */
misc1 = misc1 & ~0x80; /* bit7 = 0*/
- dynamic_range_ycbcr = 1; /*bt709*/
if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR422)
misc0 = misc0 | 0x2; /* bit2=0, bit1=1 */
else if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR444)
misc0 = misc0 | 0x4; /* bit2=1, bit1=0 */
break;
case COLOR_SPACE_2020_RGB_LIMITEDRANGE:
- dynamic_range_rgb = 1; /*limited range*/
- break;
case COLOR_SPACE_2020_RGB_FULLRANGE:
- case COLOR_SPACE_2020_YCBCR:
+ case COLOR_SPACE_2020_YCBCR_LIMITED:
case COLOR_SPACE_XR_RGB:
case COLOR_SPACE_MSREF_SCRGB:
case COLOR_SPACE_ADOBERGB:
@@ -418,6 +404,7 @@ void enc1_stream_encoder_dp_set_stream_attribute(
case COLOR_SPACE_CUSTOMPOINTS:
case COLOR_SPACE_UNKNOWN:
case COLOR_SPACE_YCBCR709_BLACK:
+ default:
/* do nothing */
break;
}
@@ -596,6 +583,8 @@ void enc1_stream_encoder_hdmi_set_stream_attribute(
HDMI_GC_SEND, 1,
HDMI_NULL_SEND, 1);
+ REG_UPDATE(HDMI_VBI_PACKET_CONTROL, HDMI_ACP_SEND, 0);
+
/* following belongs to audio */
REG_UPDATE(HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1);
@@ -674,11 +663,13 @@ static void enc1_stream_encoder_update_hdmi_info_packets(
/* for bring up, disable dp double TODO */
REG_UPDATE(HDMI_DB_CONTROL, HDMI_DB_DISABLE, 1);
+ /*Always add mandatory packets first followed by optional ones*/
enc1_update_hdmi_info_packet(enc1, 0, &info_frame->avi);
- enc1_update_hdmi_info_packet(enc1, 1, &info_frame->vendor);
+ enc1_update_hdmi_info_packet(enc1, 1, &info_frame->hfvsif);
enc1_update_hdmi_info_packet(enc1, 2, &info_frame->gamut);
- enc1_update_hdmi_info_packet(enc1, 3, &info_frame->spd);
- enc1_update_hdmi_info_packet(enc1, 4, &info_frame->hdrsmd);
+ enc1_update_hdmi_info_packet(enc1, 3, &info_frame->vendor);
+ enc1_update_hdmi_info_packet(enc1, 4, &info_frame->spd);
+ enc1_update_hdmi_info_packet(enc1, 5, &info_frame->hdrsmd);
}
static void enc1_stream_encoder_stop_hdmi_info_packets(
@@ -761,12 +752,19 @@ void enc1_stream_encoder_update_dp_info_packets(
* use other packetIndex (such as 5,6) for other info packet
*/
+ if (info_frame->adaptive_sync.valid)
+ enc1_update_generic_info_packet(
+ enc1,
+ 5, /* packetIndex */
+ &info_frame->adaptive_sync);
+
/* enable/disable transmission of packet(s).
* If enabled, packet transmission begins on the next frame
*/
REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP0_ENABLE, info_frame->vsc.valid);
REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, info_frame->spd.valid);
REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, info_frame->hdrsmd.valid);
+ REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP5_ENABLE, info_frame->adaptive_sync.valid);
/* This bit is the master enable bit.
* When enabling secondary stream engine,
@@ -934,7 +932,7 @@ void enc1_stream_encoder_dp_blank(
/* disable DP stream */
REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, 0);
- dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_DISABLE_DP_VID_STREAM);
+ link->dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_DISABLE_DP_VID_STREAM);
/* the encoder stops sending the video stream
* at the start of the vertical blanking.
@@ -953,7 +951,7 @@ void enc1_stream_encoder_dp_blank(
REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, true);
- dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_FIFO_STEER_RESET);
+ link->dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_FIFO_STEER_RESET);
}
/* output video stream to link encoder */
@@ -1026,7 +1024,8 @@ void enc1_stream_encoder_dp_unblank(
REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, true);
- dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_ENABLE_DP_VID_STREAM);
+ link->dc->link_srv->dp_trace_source_sequence(link,
+ DPCD_SOURCE_SEQ_AFTER_ENABLE_DP_VID_STREAM);
}
void enc1_stream_encoder_set_avmute(
@@ -1471,10 +1470,9 @@ void enc1_se_hdmi_audio_setup(
void enc1_se_hdmi_audio_disable(
struct stream_encoder *enc)
{
-#if defined(CONFIG_DRM_AMD_DC_DCN)
if (enc->afmt && enc->afmt->funcs->afmt_powerdown)
enc->afmt->funcs->afmt_powerdown(enc->afmt);
-#endif
+
enc1_se_enable_audio_clock(enc, false);
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.h
index 687d7e4bf7ca..54a6a4ebd636 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.h
@@ -73,6 +73,7 @@
SRI(HDMI_ACR_48_1, DIG, id),\
SRI(DP_DB_CNTL, DP, id), \
SRI(DP_MSA_MISC, DP, id), \
+ SRI(DP_MSA_VBID_MISC, DP, id), \
SRI(DP_MSA_COLORIMETRY, DP, id), \
SRI(DP_MSA_TIMING_PARAM1, DP, id), \
SRI(DP_MSA_TIMING_PARAM2, DP, id), \
@@ -92,6 +93,8 @@
SRI(DP_VID_STREAM_CNTL, DP, id), \
SRI(DP_VID_TIMING, DP, id), \
SRI(DP_SEC_AUD_N, DP, id), \
+ SRI(DP_SEC_AUD_N_READBACK, DP, id), \
+ SRI(DP_SEC_AUD_M_READBACK, DP, id), \
SRI(DP_SEC_TIMESTAMP, DP, id), \
SRI(DIG_CLOCK_PATTERN, DIG, id)
@@ -124,7 +127,6 @@ struct dcn10_stream_enc_registers {
uint32_t AFMT_60958_1;
uint32_t AFMT_60958_2;
uint32_t DIG_FE_CNTL;
- uint32_t DIG_FE_CNTL2;
uint32_t DIG_FIFO_STATUS;
uint32_t DP_MSE_RATE_CNTL;
uint32_t DP_MSE_RATE_UPDATE;
@@ -140,6 +142,8 @@ struct dcn10_stream_enc_registers {
uint32_t DP_VID_STREAM_CNTL;
uint32_t DP_VID_TIMING;
uint32_t DP_SEC_AUD_N;
+ uint32_t DP_SEC_AUD_N_READBACK;
+ uint32_t DP_SEC_AUD_M_READBACK;
uint32_t DP_SEC_TIMESTAMP;
uint32_t HDMI_CONTROL;
uint32_t HDMI_GC;
@@ -182,13 +186,17 @@ struct dcn10_stream_enc_registers {
uint32_t HDMI_GENERIC_PACKET_CONTROL9;
uint32_t HDMI_GENERIC_PACKET_CONTROL10;
uint32_t DIG_CLOCK_PATTERN;
+ uint32_t DIG_FIFO_CTRL0;
+ uint32_t DIG_FE_CLK_CNTL;
+ uint32_t DIG_FE_EN_CNTL;
+ uint32_t STREAM_MAPPER_CONTROL;
};
#define SE_SF(reg_name, field_name, post_fix)\
.field_name = reg_name ## __ ## field_name ## post_fix
-#define SE_COMMON_MASK_SH_LIST_SOC_BASE(mask_sh)\
+#define SE_COMMON_MASK_SH_LIST_SOC(mask_sh)\
SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_INDEX, mask_sh),\
SE_SF(DIG0_AFMT_GENERIC_HDR, AFMT_GENERIC_HB0, mask_sh),\
SE_SF(DIG0_AFMT_GENERIC_HDR, AFMT_GENERIC_HB1, mask_sh),\
@@ -205,6 +213,7 @@ struct dcn10_stream_enc_registers {
SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, mask_sh),\
SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, mask_sh),\
SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_ACP_SEND, mask_sh),\
SE_SF(DIG0_HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, mask_sh),\
SE_SF(DIG0_AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, mask_sh),\
SE_SF(DIG0_HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, mask_sh),\
@@ -256,6 +265,8 @@ struct dcn10_stream_enc_registers {
SE_SF(DIG0_AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_6, mask_sh),\
SE_SF(DIG0_AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_7, mask_sh),\
SE_SF(DP0_DP_SEC_AUD_N, DP_SEC_AUD_N, mask_sh),\
+ SE_SF(DP0_DP_SEC_AUD_N_READBACK, DP_SEC_AUD_N_READBACK, mask_sh),\
+ SE_SF(DP0_DP_SEC_AUD_M_READBACK, DP_SEC_AUD_M_READBACK, mask_sh),\
SE_SF(DP0_DP_SEC_TIMESTAMP, DP_SEC_TIMESTAMP_MODE, mask_sh),\
SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ASP_ENABLE, mask_sh),\
SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ATP_ENABLE, mask_sh),\
@@ -331,9 +342,6 @@ struct dcn10_stream_enc_registers {
SE_SF(DIG0_DIG_FE_CNTL, DIG_SOURCE_SELECT, mask_sh),\
SE_SF(DIG0_DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, mask_sh)
-#define SE_COMMON_MASK_SH_LIST_SOC(mask_sh)\
- SE_COMMON_MASK_SH_LIST_SOC_BASE(mask_sh)
-
#define SE_COMMON_MASK_SH_LIST_DCN10(mask_sh)\
SE_COMMON_MASK_SH_LIST_SOC(mask_sh),\
SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC0_CONT, mask_sh),\
@@ -473,6 +481,8 @@ struct dcn10_stream_enc_registers {
type AFMT_60958_CS_CHANNEL_NUMBER_6;\
type AFMT_60958_CS_CHANNEL_NUMBER_7;\
type DP_SEC_AUD_N;\
+ type DP_SEC_AUD_N_READBACK;\
+ type DP_SEC_AUD_M_READBACK;\
type DP_SEC_TIMESTAMP_MODE;\
type DP_SEC_ASP_ENABLE;\
type DP_SEC_ATP_ENABLE;\
@@ -559,16 +569,53 @@ struct dcn10_stream_enc_registers {
type DP_SEC_GSP11_ENABLE;\
type DP_SEC_GSP11_LINE_NUM
+#define SE_REG_FIELD_LIST_DCN3_1_COMMON(type) \
+ type DIG_FIFO_OUTPUT_PIXEL_MODE;\
+ type DP_PIXEL_PER_CYCLE_PROCESSING_MODE;\
+ type DIG_SYMCLK_FE_ON;\
+ type DIG_FIFO_READ_START_LEVEL;\
+ type DIG_FIFO_ENABLE;\
+ type DIG_FIFO_RESET;\
+ type DIG_FIFO_RESET_DONE;\
+ type PIXEL_ENCODING_TYPE;\
+ type UNCOMPRESSED_PIXEL_FORMAT;\
+ type UNCOMPRESSED_COMPONENT_DEPTH
+
+#define SE_REG_FIELD_LIST_DCN3_5_COMMON(type) \
+ type DIG_FE_CLK_EN;\
+ type DIG_FE_MODE;\
+ type DIG_FE_SOFT_RESET;\
+ type DIG_FE_ENABLE;\
+ type DIG_FE_SYMCLK_FE_G_CLOCK_ON;\
+ type DIG_FE_DISPCLK_G_CLOCK_ON;\
+ type DIG_FE_SYMCLK_FE_G_AFMT_CLOCK_ON;\
+ type DIG_FE_SYMCLK_FE_G_TMDS_CLOCK_ON;\
+ type DIG_FE_SOCCLK_G_AFMT_CLOCK_ON;\
+ type DIG_STREAM_LINK_TARGET
+
+#define SE_REG_FIELD_LIST_DCN4_01_COMMON(type) \
+ type COMPRESSED_PIXEL_FORMAT;\
+ type DP_VID_N_INTERVAL;\
+ type DIG_FIFO_OUTPUT_PIXEL_PER_CYCLE;\
+ type DP_STEER_FIFO_ENABLE
struct dcn10_stream_encoder_shift {
SE_REG_FIELD_LIST_DCN1_0(uint8_t);
+ uint8_t HDMI_ACP_SEND;
SE_REG_FIELD_LIST_DCN2_0(uint8_t);
SE_REG_FIELD_LIST_DCN3_0(uint8_t);
+ SE_REG_FIELD_LIST_DCN3_1_COMMON(uint8_t);
+ SE_REG_FIELD_LIST_DCN3_5_COMMON(uint8_t);
+ SE_REG_FIELD_LIST_DCN4_01_COMMON(uint32_t);
};
struct dcn10_stream_encoder_mask {
SE_REG_FIELD_LIST_DCN1_0(uint32_t);
+ uint32_t HDMI_ACP_SEND;
SE_REG_FIELD_LIST_DCN2_0(uint32_t);
SE_REG_FIELD_LIST_DCN3_0(uint32_t);
+ SE_REG_FIELD_LIST_DCN3_1_COMMON(uint32_t);
+ SE_REG_FIELD_LIST_DCN3_5_COMMON(uint32_t);
+ SE_REG_FIELD_LIST_DCN4_01_COMMON(uint32_t);
};
struct dcn10_stream_encoder {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_link_encoder.c
index 2f9bfaeaba8d..51a57dae1811 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_link_encoder.c
@@ -29,7 +29,6 @@
#include "link_encoder.h"
#include "dcn20_link_encoder.h"
#include "stream_encoder.h"
-#include "i2caux_interface.h"
#include "dc_bios_types.h"
#include "gpio_service_interface.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_link_encoder.h
index b2b266953d18..762c579fcb44 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_link_encoder.h
@@ -147,7 +147,8 @@
LE_SF(DPCSTX0_DPCSTX_TX_CNTL, DPCS_TX_DATA_SWAP, mask_sh),\
LE_SF(DPCSTX0_DPCSTX_TX_CNTL, DPCS_TX_DATA_ORDER_INVERT, mask_sh),\
LE_SF(DPCSTX0_DPCSTX_TX_CNTL, DPCS_TX_FIFO_EN, mask_sh),\
- LE_SF(DPCSTX0_DPCSTX_TX_CNTL, DPCS_TX_FIFO_RD_START_DELAY, mask_sh)
+ LE_SF(DPCSTX0_DPCSTX_TX_CNTL, DPCS_TX_FIFO_RD_START_DELAY, mask_sh),\
+ LE_SF(DPCSTX0_DPCSTX_DEBUG_CONFIG, DPCS_DBG_CBUS_DIS, mask_sh)
#define DPCS_DCN2_MASK_SH_LIST(mask_sh)\
DPCS_MASK_SH_LIST(mask_sh),\
@@ -231,6 +232,8 @@
SRI(RDPCSTX_PHY_FUSE3, RDPCSTX, id), \
SRI(DPCSTX_TX_CLOCK_CNTL, DPCSTX, id), \
SRI(DPCSTX_TX_CNTL, DPCSTX, id), \
+ SRI(DPCSTX_DEBUG_CONFIG, DPCSTX, id), \
+ SRI(RDPCSTX_DEBUG_CONFIG, RDPCSTX, id), \
SR(RDPCSTX0_RDPCSTX_SCRATCH)
@@ -279,7 +282,6 @@ struct mpll_cfg {
uint32_t tx_peaking_lvl;
uint32_t ctr_reqs_pll;
-
};
struct dpcssys_phy_seq_cfg {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.c
index aab25ca8343a..bec0b4aaeb2b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.c
@@ -29,13 +29,12 @@
#include "dcn20_stream_encoder.h"
#include "reg_helper.h"
#include "hw_shared.h"
-#include "inc/link_dpcd.h"
+#include "link_service.h"
#include "dpcd_defs.h"
#define DC_LOGGER \
enc1->base.ctx->logger
-
#define REG(reg)\
(enc1->regs->reg)
@@ -159,6 +158,7 @@ static void enc2_stream_encoder_update_hdmi_info_packets(
enc2_update_hdmi_info_packet(enc1, 3, &info_frame->vendor);
enc2_update_hdmi_info_packet(enc1, 4, &info_frame->spd);
enc2_update_hdmi_info_packet(enc1, 5, &info_frame->hdrsmd);
+ enc2_update_hdmi_info_packet(enc1, 6, &info_frame->vtem);
}
static void enc2_stream_encoder_stop_hdmi_info_packets(
@@ -423,6 +423,22 @@ void enc2_set_dynamic_metadata(struct stream_encoder *enc,
}
}
+static void enc2_stream_encoder_update_dp_info_packets_sdp_line_num(
+ struct stream_encoder *enc,
+ struct encoder_info_frame *info_frame)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ if (info_frame->adaptive_sync.valid == true &&
+ info_frame->sdp_line_num.adaptive_sync_line_num_valid == true) {
+ //00: REFER_TO_DP_SOF, 01: REFER_TO_OTG_SOF
+ REG_UPDATE(DP_SEC_CNTL1, DP_SEC_GSP5_LINE_REFERENCE, 1);
+
+ REG_UPDATE(DP_SEC_CNTL5, DP_SEC_GSP5_LINE_NUM,
+ info_frame->sdp_line_num.adaptive_sync_line_num);
+ }
+}
+
static void enc2_stream_encoder_update_dp_info_packets(
struct stream_encoder *enc,
const struct encoder_info_frame *info_frame)
@@ -530,7 +546,8 @@ void enc2_stream_encoder_dp_unblank(
REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, true);
- dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_ENABLE_DP_VID_STREAM);
+ link->dc->link_srv->dp_trace_source_sequence(link,
+ DPCD_SOURCE_SEQ_AFTER_ENABLE_DP_VID_STREAM);
}
static void enc2_dp_set_odm_combine(
@@ -587,6 +604,8 @@ static const struct stream_encoder_funcs dcn20_str_enc_funcs = {
enc2_stream_encoder_update_hdmi_info_packets,
.stop_hdmi_info_packets =
enc2_stream_encoder_stop_hdmi_info_packets,
+ .update_dp_info_packets_sdp_line_num =
+ enc2_stream_encoder_update_dp_info_packets_sdp_line_num,
.update_dp_info_packets =
enc2_stream_encoder_update_dp_info_packets,
.send_immediate_sdp_message =
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.h
index baa1e539f341..baa1e539f341 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_link_encoder.c
index 6f3c2fb60790..b8e31b5ea114 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_link_encoder.c
@@ -29,10 +29,7 @@
#include "link_encoder.h"
#include "dcn30_dio_link_encoder.h"
#include "stream_encoder.h"
-#include "i2caux_interface.h"
#include "dc_bios_types.h"
-/* #include "dcn3ag/dcn3ag_phy_fw.h" */
-
#include "gpio_service_interface.h"
#define CTX \
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_link_encoder.h
index f2d90f2b8bf1..5b6177c2ae98 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_link_encoder.h
@@ -55,7 +55,8 @@
SRI(DP_DPHY_HBR2_PATTERN_CONTROL, DP, id)
#define LINK_ENCODER_MASK_SH_LIST_DCN30(mask_sh) \
- LINK_ENCODER_MASK_SH_LIST_DCN20(mask_sh)
+ LINK_ENCODER_MASK_SH_LIST_DCN20(mask_sh),\
+ LE_SF(DIG0_TMDS_DCBALANCER_CONTROL, TMDS_SYNC_DCBAL_EN, mask_sh)
#define DPCS_DCN3_MASK_SH_LIST(mask_sh)\
DPCS_DCN2_MASK_SH_LIST(mask_sh),\
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_stream_encoder.c
index ebd9c35c914f..e93be7b6d9b0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_stream_encoder.c
@@ -28,14 +28,11 @@
#include "dcn30_dio_stream_encoder.h"
#include "reg_helper.h"
#include "hw_shared.h"
-#include "core_types.h"
-#include <linux/delay.h>
-
+#include "dc.h"
#define DC_LOGGER \
enc1->base.ctx->logger
-
#define REG(reg)\
(enc1->regs->reg)
@@ -50,23 +47,7 @@
enc1->base.ctx
-void convert_dc_info_packet_to_128(
- const struct dc_info_packet *info_packet,
- struct dc_info_packet_128 *info_packet_128)
-{
- unsigned int i;
-
- info_packet_128->hb0 = info_packet->hb0;
- info_packet_128->hb1 = info_packet->hb1;
- info_packet_128->hb2 = info_packet->hb2;
- info_packet_128->hb3 = info_packet->hb3;
-
- for (i = 0; i < 32; i++) {
- info_packet_128->sb[i] = info_packet->sb[i];
- }
-
-}
-static void enc3_update_hdmi_info_packet(
+void enc3_update_hdmi_info_packet(
struct dcn10_stream_encoder *enc1,
uint32_t packet_index,
const struct dc_info_packet *info_packet)
@@ -211,7 +192,7 @@ static void enc3_update_hdmi_info_packet(
}
}
-static void enc3_stream_encoder_update_hdmi_info_packets(
+void enc3_stream_encoder_update_hdmi_info_packets(
struct stream_encoder *enc,
const struct encoder_info_frame *info_frame)
{
@@ -228,9 +209,10 @@ static void enc3_stream_encoder_update_hdmi_info_packets(
enc3_update_hdmi_info_packet(enc1, 1, &info_frame->vendor);
enc3_update_hdmi_info_packet(enc1, 3, &info_frame->spd);
enc3_update_hdmi_info_packet(enc1, 4, &info_frame->hdrsmd);
+ enc3_update_hdmi_info_packet(enc1, 6, &info_frame->vtem);
}
-static void enc3_stream_encoder_stop_hdmi_info_packets(
+void enc3_stream_encoder_stop_hdmi_info_packets(
struct stream_encoder *enc)
{
struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
@@ -334,7 +316,7 @@ static void enc3_dp_set_dsc_config(struct stream_encoder *enc,
}
-static void enc3_dp_set_dsc_pps_info_packet(struct stream_encoder *enc,
+void enc3_dp_set_dsc_pps_info_packet(struct stream_encoder *enc,
bool enable,
uint8_t *dsc_packed_pps,
bool immediate_update)
@@ -420,7 +402,23 @@ static void enc3_read_state(struct stream_encoder *enc, struct enc_state *s)
}
}
-static void enc3_stream_encoder_update_dp_info_packets(
+void enc3_stream_encoder_update_dp_info_packets_sdp_line_num(
+ struct stream_encoder *enc,
+ struct encoder_info_frame *info_frame)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ if (info_frame->adaptive_sync.valid == true &&
+ info_frame->sdp_line_num.adaptive_sync_line_num_valid == true) {
+ //00: REFER_TO_DP_SOF, 01: REFER_TO_OTG_SOF
+ REG_UPDATE(DP_SEC_CNTL1, DP_SEC_GSP5_LINE_REFERENCE, 1);
+
+ REG_UPDATE(DP_SEC_CNTL5, DP_SEC_GSP5_LINE_NUM,
+ info_frame->sdp_line_num.adaptive_sync_line_num);
+ }
+}
+
+void enc3_stream_encoder_update_dp_info_packets(
struct stream_encoder *enc,
const struct encoder_info_frame *info_frame)
{
@@ -435,6 +433,36 @@ static void enc3_stream_encoder_update_dp_info_packets(
&info_frame->vsc,
true);
}
+ /* TODO: VSC SDP at packetIndex 1 should be retricted only if PSR-SU on.
+ * There should have another Infopacket type (e.g. vsc_psrsu) for PSR_SU.
+ * In addition, currently the driver check the valid bit then update and
+ * send the corresponding Infopacket. For PSR-SU, the SDP only be sent
+ * while entering PSR-SU mode. So we need another parameter(e.g. send)
+ * in dc_info_packet to indicate which infopacket should be enabled by
+ * default here.
+ */
+ if (info_frame->vsc.valid) {
+ enc->vpg->funcs->update_generic_info_packet(
+ enc->vpg,
+ 1, /* packetIndex */
+ &info_frame->vsc,
+ true);
+ }
+ /* TODO: VSC SDP at packetIndex 1 should be restricted only if PSR-SU on.
+ * There should have another Infopacket type (e.g. vsc_psrsu) for PSR_SU.
+ * In addition, currently the driver check the valid bit then update and
+ * send the corresponding Infopacket. For PSR-SU, the SDP only be sent
+ * while entering PSR-SU mode. So we need another parameter(e.g. send)
+ * in dc_info_packet to indicate which infopacket should be enabled by
+ * default here.
+ */
+ if (info_frame->vsc.valid) {
+ enc->vpg->funcs->update_generic_info_packet(
+ enc->vpg,
+ 1, /* packetIndex */
+ &info_frame->vsc,
+ true);
+ }
if (info_frame->spd.valid) {
enc->vpg->funcs->update_generic_info_packet(
enc->vpg,
@@ -453,12 +481,20 @@ static void enc3_stream_encoder_update_dp_info_packets(
* use other packetIndex (such as 5,6) for other info packet
*/
+ if (info_frame->adaptive_sync.valid)
+ enc->vpg->funcs->update_generic_info_packet(
+ enc->vpg,
+ 5, /* packetIndex */
+ &info_frame->adaptive_sync,
+ true);
+
/* enable/disable transmission of packet(s).
* If enabled, packet transmission begins on the next frame
*/
REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP0_ENABLE, info_frame->vsc.valid);
REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, info_frame->spd.valid);
REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, info_frame->hdrsmd.valid);
+ REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP5_ENABLE, info_frame->adaptive_sync.valid);
/* This bit is the master enable bit.
* When enabling secondary stream engine,
@@ -489,7 +525,7 @@ static void enc3_dp_set_odm_combine(
}
/* setup stream encoder in dvi mode */
-void enc3_stream_encoder_dvi_set_stream_attribute(
+static void enc3_stream_encoder_dvi_set_stream_attribute(
struct stream_encoder *enc,
struct dc_crtc_timing *crtc_timing,
bool is_dual_link)
@@ -652,6 +688,9 @@ static void enc3_stream_encoder_hdmi_set_stream_attribute(
HDMI_GC_SEND, 1,
HDMI_NULL_SEND, 1);
+ /* Disable Audio Content Protection packet transmission */
+ REG_UPDATE(HDMI_VBI_PACKET_CONTROL, HDMI_ACP_SEND, 0);
+
/* following belongs to audio */
/* Enable Audio InfoFrame packet transmission. */
REG_UPDATE(HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1);
@@ -668,7 +707,7 @@ static void enc3_stream_encoder_hdmi_set_stream_attribute(
REG_UPDATE(HDMI_GC, HDMI_GC_AVMUTE, 0);
}
-static void enc3_audio_mute_control(
+void enc3_audio_mute_control(
struct stream_encoder *enc,
bool mute)
{
@@ -676,7 +715,7 @@ static void enc3_audio_mute_control(
enc->afmt->funcs->audio_mute_control(enc->afmt, mute);
}
-static void enc3_se_dp_audio_setup(
+void enc3_se_dp_audio_setup(
struct stream_encoder *enc,
unsigned int az_inst,
struct audio_info *info)
@@ -707,7 +746,7 @@ static void enc3_se_setup_dp_audio(
enc->afmt->funcs->setup_dp_audio(enc->afmt);
}
-static void enc3_se_dp_audio_enable(
+void enc3_se_dp_audio_enable(
struct stream_encoder *enc)
{
enc1_se_enable_audio_clock(enc, true);
@@ -773,7 +812,7 @@ static void enc3_se_setup_hdmi_audio(
*/
}
-static void enc3_se_hdmi_audio_setup(
+void enc3_se_hdmi_audio_setup(
struct stream_encoder *enc,
unsigned int az_inst,
struct audio_info *info,
@@ -801,6 +840,8 @@ static const struct stream_encoder_funcs dcn30_str_enc_funcs = {
enc3_stream_encoder_update_hdmi_info_packets,
.stop_hdmi_info_packets =
enc3_stream_encoder_stop_hdmi_info_packets,
+ .update_dp_info_packets_sdp_line_num =
+ enc3_stream_encoder_update_dp_info_packets_sdp_line_num,
.update_dp_info_packets =
enc3_stream_encoder_update_dp_info_packets,
.stop_dp_info_packets =
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_stream_encoder.h
index e2c264ecb20f..830ce7e47035 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_stream_encoder.h
@@ -95,6 +95,8 @@
SRI(DP_VID_STREAM_CNTL, DP, id), \
SRI(DP_VID_TIMING, DP, id), \
SRI(DP_SEC_AUD_N, DP, id), \
+ SRI(DP_SEC_AUD_N_READBACK, DP, id), \
+ SRI(DP_SEC_AUD_M_READBACK, DP, id), \
SRI(DP_SEC_TIMESTAMP, DP, id), \
SRI(DP_DSC_CNTL, DP, id), \
SRI(DP_DSC_BYTES_PER_PIXEL, DP, id), \
@@ -110,7 +112,7 @@
SRI(DIG_CLOCK_PATTERN, DIG, id)
-#define SE_COMMON_MASK_SH_LIST_DCN30_BASE(mask_sh)\
+#define SE_COMMON_MASK_SH_LIST_DCN30(mask_sh)\
SE_SF(DP0_DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, mask_sh),\
SE_SF(DP0_DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, mask_sh),\
SE_SF(DIG0_HDMI_CONTROL, HDMI_PACKET_GEN_VERSION, mask_sh),\
@@ -122,6 +124,7 @@
SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, mask_sh),\
SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, mask_sh),\
SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_ACP_SEND, mask_sh),\
SE_SF(DIG0_HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, mask_sh),\
SE_SF(DIG0_HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, mask_sh),\
SE_SF(DIG0_HDMI_GC, HDMI_GC_AVMUTE, mask_sh),\
@@ -157,6 +160,8 @@
SE_SF(DIG0_HDMI_ACR_48_0, HDMI_ACR_CTS_48, mask_sh),\
SE_SF(DIG0_HDMI_ACR_48_1, HDMI_ACR_N_48, mask_sh),\
SE_SF(DP0_DP_SEC_AUD_N, DP_SEC_AUD_N, mask_sh),\
+ SE_SF(DP0_DP_SEC_AUD_N_READBACK, DP_SEC_AUD_N_READBACK, mask_sh),\
+ SE_SF(DP0_DP_SEC_AUD_M_READBACK, DP_SEC_AUD_M_READBACK, mask_sh),\
SE_SF(DP0_DP_SEC_TIMESTAMP, DP_SEC_TIMESTAMP_MODE, mask_sh),\
SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ASP_ENABLE, mask_sh),\
SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ATP_ENABLE, mask_sh),\
@@ -269,9 +274,6 @@
SE_SF(DP0_DP_SEC_FRAMING4, DP_SST_SDP_SPLITTING, mask_sh),\
SE_SF(DIG0_DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, mask_sh)
-#define SE_COMMON_MASK_SH_LIST_DCN30(mask_sh)\
- SE_COMMON_MASK_SH_LIST_DCN30_BASE(mask_sh)
-
void dcn30_dio_stream_encoder_construct(
struct dcn10_stream_encoder *enc1,
struct dc_context *ctx,
@@ -283,4 +285,47 @@ void dcn30_dio_stream_encoder_construct(
const struct dcn10_stream_encoder_shift *se_shift,
const struct dcn10_stream_encoder_mask *se_mask);
+void enc3_stream_encoder_update_hdmi_info_packets(
+ struct stream_encoder *enc,
+ const struct encoder_info_frame *info_frame);
+
+void enc3_stream_encoder_stop_hdmi_info_packets(
+ struct stream_encoder *enc);
+
+void enc3_stream_encoder_update_dp_info_packets_sdp_line_num(
+ struct stream_encoder *enc,
+ struct encoder_info_frame *info_frame);
+
+void enc3_stream_encoder_update_dp_info_packets(
+ struct stream_encoder *enc,
+ const struct encoder_info_frame *info_frame);
+
+void enc3_audio_mute_control(
+ struct stream_encoder *enc,
+ bool mute);
+
+void enc3_se_dp_audio_setup(
+ struct stream_encoder *enc,
+ unsigned int az_inst,
+ struct audio_info *info);
+
+void enc3_se_dp_audio_enable(
+ struct stream_encoder *enc);
+
+void enc3_se_hdmi_audio_setup(
+ struct stream_encoder *enc,
+ unsigned int az_inst,
+ struct audio_info *info,
+ struct audio_crtc_info *audio_crtc_info);
+
+void enc3_dp_set_dsc_pps_info_packet(
+ struct stream_encoder *enc,
+ bool enable,
+ uint8_t *dsc_packed_pps,
+ bool immediate_update);
+void enc3_update_hdmi_info_packet(
+ struct dcn10_stream_encoder *enc1,
+ uint32_t packet_index,
+ const struct dc_info_packet *info_packet);
+
#endif /* __DC_DIO_STREAM_ENCODER_DCN30_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.c
index c9fbaed23965..1b39a6e8a1ac 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.c
@@ -29,7 +29,6 @@
#include "link_encoder.h"
#include "dcn301_dio_link_encoder.h"
#include "stream_encoder.h"
-#include "i2caux_interface.h"
#include "dc_bios_types.h"
#include "gpio_service_interface.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.h
index 49f8d91d4951..49f8d91d4951 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c
index ee6f13bef377..84cc2ddc52fe 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c
@@ -30,7 +30,6 @@
#include "link_encoder.h"
#include "dcn31_dio_link_encoder.h"
#include "stream_encoder.h"
-#include "i2caux_interface.h"
#include "dc_bios_types.h"
#include "gpio_service_interface.h"
@@ -38,6 +37,7 @@
#include "link_enc_cfg.h"
#include "dc_dmub_srv.h"
#include "dal_asic_id.h"
+#include "link_service.h"
#define CTX \
enc10->base.ctx
@@ -67,6 +67,70 @@
#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
#endif
+static uint8_t phy_id_from_transmitter(enum transmitter t)
+{
+ uint8_t phy_id;
+
+ switch (t) {
+ case TRANSMITTER_UNIPHY_A:
+ phy_id = 0;
+ break;
+ case TRANSMITTER_UNIPHY_B:
+ phy_id = 1;
+ break;
+ case TRANSMITTER_UNIPHY_C:
+ phy_id = 2;
+ break;
+ case TRANSMITTER_UNIPHY_D:
+ phy_id = 3;
+ break;
+ case TRANSMITTER_UNIPHY_E:
+ phy_id = 4;
+ break;
+ case TRANSMITTER_UNIPHY_F:
+ phy_id = 5;
+ break;
+ case TRANSMITTER_UNIPHY_G:
+ phy_id = 6;
+ break;
+ default:
+ phy_id = 0;
+ break;
+ }
+ return phy_id;
+}
+
+static bool has_query_dp_alt(struct link_encoder *enc)
+{
+ struct dc_dmub_srv *dc_dmub_srv = enc->ctx->dmub_srv;
+
+ if (enc->ctx->dce_version >= DCN_VERSION_3_15)
+ return true;
+
+ /* Supports development firmware and firmware >= 4.0.11 */
+ return dc_dmub_srv &&
+ !(dc_dmub_srv->dmub->fw_version >= DMUB_FW_VERSION(4, 0, 0) &&
+ dc_dmub_srv->dmub->fw_version <= DMUB_FW_VERSION(4, 0, 10));
+}
+
+static bool query_dp_alt_from_dmub(struct link_encoder *enc,
+ union dmub_rb_cmd *cmd)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->query_dp_alt.header.type = DMUB_CMD__VBIOS;
+ cmd->query_dp_alt.header.sub_type =
+ DMUB_CMD__VBIOS_TRANSMITTER_QUERY_DP_ALT;
+ cmd->query_dp_alt.header.payload_bytes = sizeof(cmd->query_dp_alt.data);
+ cmd->query_dp_alt.data.phy_id = phy_id_from_transmitter(enc10->base.transmitter);
+
+ if (!dc_wake_and_execute_dmub_cmd(enc->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
+ return false;
+
+ return true;
+}
+
void dcn31_link_encoder_set_dio_phy_mux(
struct link_encoder *enc,
enum encoder_type_select sel,
@@ -168,9 +232,7 @@ void enc31_hw_init(struct link_encoder *enc)
AUX_RX_PHASE_DETECT_LEN, [21,20] = 0x3 default is 3
AUX_RX_DETECTION_THRESHOLD [30:28] = 1
*/
- AUX_REG_WRITE(AUX_DPHY_RX_CONTROL0, 0x103d1110);
-
- AUX_REG_WRITE(AUX_DPHY_TX_CONTROL, 0x21c7a);
+ // dmub will read AUX_DPHY_RX_CONTROL0/AUX_DPHY_TX_CONTROL from vbios table in dp_aux_init
//AUX_DPHY_TX_REF_CONTROL'AUX_TX_REF_DIV HW default is 0x32;
// Set AUX_TX_REF_DIV Divider to generate 2 MHz reference from refclk
@@ -178,18 +240,9 @@ void enc31_hw_init(struct link_encoder *enc)
// 100MHz -> 0x32
// 48MHz -> 0x18
-#ifdef CLEANUP_FIXME
- /*from display_init*/
- REG_WRITE(RDPCSTX_DEBUG_CONFIG, 0);
-#endif
-
// Set TMDS_CTL0 to 1. This is a legacy setting.
REG_UPDATE(TMDS_CTL_BITS, TMDS_CTL0, 1);
- /*HW default is 5*/
- REG_UPDATE(RDPCSTX_CNTL,
- RDPCS_TX_FIFO_RD_START_DELAY, 4);
-
dcn10_aux_initialize(enc10);
}
@@ -371,7 +424,6 @@ static bool link_dpia_control(struct dc_context *dc_ctx,
struct dmub_cmd_dig_dpia_control_data *dpia_control)
{
union dmub_rb_cmd cmd;
- struct dc_dmub_srv *dmub = dc_ctx->dmub_srv;
memset(&cmd, 0, sizeof(cmd));
@@ -384,9 +436,7 @@ static bool link_dpia_control(struct dc_context *dc_ctx,
cmd.dig1_dpia_control.dpia_control = *dpia_control;
- dc_dmub_srv_cmd_queue(dmub, &cmd);
- dc_dmub_srv_cmd_execute(dmub);
- dc_dmub_srv_wait_idle(dmub);
+ dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
return true;
}
@@ -407,6 +457,7 @@ void dcn31_link_encoder_enable_dp_output(
/* Enable transmitter and encoder. */
if (!link_enc_cfg_is_transmitter_mappable(enc->ctx->dc, enc)) {
+ DC_LOG_DEBUG("%s: enc_id(%d)\n", __func__, enc->preferred_engine);
dcn20_link_encoder_enable_dp_output(enc, link_settings, clock_source);
} else {
@@ -431,13 +482,14 @@ void dcn31_link_encoder_enable_dp_output(
if (link) {
dpia_control.dpia_id = link->ddc_hw_inst;
- dpia_control.fec_rdy = dc_link_should_enable_fec(link);
+ dpia_control.fec_rdy = link->dc->link_srv->dp_should_enable_fec(link);
} else {
DC_LOG_ERROR("%s: Failed to execute DPIA enable DMUB command.\n", __func__);
BREAK_TO_DEBUGGER();
return;
}
+ DC_LOG_DEBUG("%s: DPIA(%d) - enc_id(%d)\n", __func__, dpia_control.dpia_id, dpia_control.enc_id);
link_dpia_control(enc->ctx, &dpia_control);
}
}
@@ -452,6 +504,7 @@ void dcn31_link_encoder_enable_dp_mst_output(
/* Enable transmitter and encoder. */
if (!link_enc_cfg_is_transmitter_mappable(enc->ctx->dc, enc)) {
+ DC_LOG_DEBUG("%s: enc_id(%d)\n", __func__, enc->preferred_engine);
dcn10_link_encoder_enable_dp_mst_output(enc, link_settings, clock_source);
} else {
@@ -476,13 +529,14 @@ void dcn31_link_encoder_enable_dp_mst_output(
if (link) {
dpia_control.dpia_id = link->ddc_hw_inst;
- dpia_control.fec_rdy = dc_link_should_enable_fec(link);
+ dpia_control.fec_rdy = link->dc->link_srv->dp_should_enable_fec(link);
} else {
DC_LOG_ERROR("%s: Failed to execute DPIA enable DMUB command.\n", __func__);
BREAK_TO_DEBUGGER();
return;
}
+ DC_LOG_DEBUG("%s: DPIA(%d) - enc_id(%d)\n", __func__, dpia_control.dpia_id, dpia_control.enc_id);
link_dpia_control(enc->ctx, &dpia_control);
}
}
@@ -496,6 +550,7 @@ void dcn31_link_encoder_disable_output(
/* Disable transmitter and encoder. */
if (!link_enc_cfg_is_transmitter_mappable(enc->ctx->dc, enc)) {
+ DC_LOG_DEBUG("%s: enc_id(%d)\n", __func__, enc->preferred_engine);
dcn10_link_encoder_disable_output(enc, signal);
} else {
@@ -503,7 +558,7 @@ void dcn31_link_encoder_disable_output(
struct dmub_cmd_dig_dpia_control_data dpia_control = { 0 };
struct dc_link *link;
- if (!dcn10_is_dig_enabled(enc))
+ if (enc->funcs->is_dig_enabled && !enc->funcs->is_dig_enabled(enc))
return;
link = link_enc_cfg_get_link_using_link_enc(enc->ctx->dc, enc->preferred_engine);
@@ -527,6 +582,7 @@ void dcn31_link_encoder_disable_output(
return;
}
+ DC_LOG_DEBUG("%s: DPIA(%d) - enc_id(%d)\n", __func__, dpia_control.dpia_id, dpia_control.enc_id);
link_dpia_control(enc->ctx, &dpia_control);
link_encoder_disable(enc10);
@@ -536,57 +592,91 @@ void dcn31_link_encoder_disable_output(
bool dcn31_link_encoder_is_in_alt_mode(struct link_encoder *enc)
{
struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+ union dmub_rb_cmd cmd;
uint32_t dp_alt_mode_disable;
- bool is_usb_c_alt_mode = false;
- if (enc->features.flags.bits.DP_IS_USB_C) {
- if (enc->ctx->asic_id.hw_internal_rev != YELLOW_CARP_B0) {
- // [Note] no need to check hw_internal_rev once phy mux selection is ready
- REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE, &dp_alt_mode_disable);
- } else {
+ /* Only applicable to USB-C PHY. */
+ if (!enc->features.flags.bits.DP_IS_USB_C)
+ return false;
+
+ /*
+ * Use the new interface from DMCUB if available.
+ * Avoids hanging the RDCPSPIPE if DMCUB wasn't already running.
+ */
+ if (has_query_dp_alt(enc)) {
+ if (!query_dp_alt_from_dmub(enc, &cmd))
+ return false;
+
+ return (cmd.query_dp_alt.data.is_dp_alt_disable == 0);
+ }
+
+ /* Legacy path, avoid if possible. */
+ if (enc->ctx->asic_id.hw_internal_rev != YELLOW_CARP_B0) {
+ REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE,
+ &dp_alt_mode_disable);
+ } else {
/*
* B0 phys use a new set of registers to check whether alt mode is disabled.
* if value == 1 alt mode is disabled, otherwise it is enabled.
*/
- if ((enc10->base.transmitter == TRANSMITTER_UNIPHY_A)
- || (enc10->base.transmitter == TRANSMITTER_UNIPHY_B)
- || (enc10->base.transmitter == TRANSMITTER_UNIPHY_E)) {
- REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE, &dp_alt_mode_disable);
- } else {
- // [Note] need to change TRANSMITTER_UNIPHY_C/D to F/G once phy mux selection is ready
- REG_GET(RDPCSPIPE_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE, &dp_alt_mode_disable);
- }
+ if ((enc10->base.transmitter == TRANSMITTER_UNIPHY_A) ||
+ (enc10->base.transmitter == TRANSMITTER_UNIPHY_B) ||
+ (enc10->base.transmitter == TRANSMITTER_UNIPHY_E)) {
+ REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE,
+ &dp_alt_mode_disable);
+ } else {
+ REG_GET(RDPCSPIPE_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE,
+ &dp_alt_mode_disable);
}
-
- is_usb_c_alt_mode = (dp_alt_mode_disable == 0);
}
- return is_usb_c_alt_mode;
+ return (dp_alt_mode_disable == 0);
}
-void dcn31_link_encoder_get_max_link_cap(struct link_encoder *enc,
- struct dc_link_settings *link_settings)
+void dcn31_link_encoder_get_max_link_cap(struct link_encoder *enc, struct dc_link_settings *link_settings)
{
struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+ union dmub_rb_cmd cmd;
uint32_t is_in_usb_c_dp4_mode = 0;
dcn10_link_encoder_get_max_link_cap(enc, link_settings);
- /* in usb c dp2 mode, max lane count is 2 */
- if (enc->funcs->is_in_alt_mode && enc->funcs->is_in_alt_mode(enc)) {
- if (enc->ctx->asic_id.hw_internal_rev != YELLOW_CARP_B0) {
- // [Note] no need to check hw_internal_rev once phy mux selection is ready
- REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DP4, &is_in_usb_c_dp4_mode);
+ /* Take the link cap directly if not USB */
+ if (!enc->features.flags.bits.DP_IS_USB_C)
+ return;
+
+ /*
+ * Use the new interface from DMCUB if available.
+ * Avoids hanging the RDCPSPIPE if DMCUB wasn't already running.
+ */
+ if (has_query_dp_alt(enc)) {
+ if (!query_dp_alt_from_dmub(enc, &cmd))
+ return;
+
+ if (cmd.query_dp_alt.data.is_dp_alt_disable == 0 &&
+ cmd.query_dp_alt.data.is_usb &&
+ cmd.query_dp_alt.data.is_dp4 == 0)
+ link_settings->lane_count = MIN(LANE_COUNT_TWO, link_settings->lane_count);
+
+ return;
+ }
+
+ /* Legacy path, avoid if possible. */
+ if (enc->ctx->asic_id.hw_internal_rev != YELLOW_CARP_B0) {
+ REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DP4,
+ &is_in_usb_c_dp4_mode);
+ } else {
+ if ((enc10->base.transmitter == TRANSMITTER_UNIPHY_A) ||
+ (enc10->base.transmitter == TRANSMITTER_UNIPHY_B) ||
+ (enc10->base.transmitter == TRANSMITTER_UNIPHY_E)) {
+ REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DP4,
+ &is_in_usb_c_dp4_mode);
} else {
- if ((enc10->base.transmitter == TRANSMITTER_UNIPHY_A)
- || (enc10->base.transmitter == TRANSMITTER_UNIPHY_B)
- || (enc10->base.transmitter == TRANSMITTER_UNIPHY_E)) {
- REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DP4, &is_in_usb_c_dp4_mode);
- } else {
- REG_GET(RDPCSPIPE_PHY_CNTL6, RDPCS_PHY_DPALT_DP4, &is_in_usb_c_dp4_mode);
- }
+ REG_GET(RDPCSPIPE_PHY_CNTL6, RDPCS_PHY_DPALT_DP4,
+ &is_in_usb_c_dp4_mode);
}
- if (!is_in_usb_c_dp4_mode)
- link_settings->lane_count = MIN(LANE_COUNT_TWO, link_settings->lane_count);
}
+
+ if (!is_in_usb_c_dp4_mode)
+ link_settings->lane_count = MIN(LANE_COUNT_TWO, link_settings->lane_count);
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.h
index 3454f1e7c1f1..ee78ba80797c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.h
@@ -89,6 +89,7 @@
SRI(RDPCSTX_PHY_FUSE1, RDPCSTX, id), \
SRI(RDPCSTX_PHY_FUSE2, RDPCSTX, id), \
SRI(RDPCSTX_PHY_FUSE3, RDPCSTX, id), \
+ SRI(RDPCSTX_DEBUG_CONFIG, RDPCSTX, id), \
SR(RDPCSTX0_RDPCSTX_SCRATCH), \
SRI(RDPCSTX_PHY_RX_LD_VAL, RDPCSTX, id),\
SRI(RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG, RDPCSTX, id)
@@ -198,6 +199,35 @@
LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE3, RDPCS_PHY_DP_TX3_EQ_PRE, mask_sh),\
LE_SF(RDPCSTX0_RDPCSTX_PHY_FUSE3, RDPCS_PHY_DP_TX3_EQ_POST, mask_sh)
+#define DPCS_DCN314_REG_LIST(id) \
+ SRI(TMDS_CTL_BITS, DIG, id), \
+ SRI(RDPCSTX_PHY_CNTL3, RDPCSTX, id), \
+ SRI(RDPCSTX_PHY_CNTL4, RDPCSTX, id), \
+ SRI(RDPCSTX_PHY_CNTL5, RDPCSTX, id), \
+ SRI(RDPCSTX_PHY_CNTL7, RDPCSTX, id), \
+ SRI(RDPCSTX_PHY_CNTL8, RDPCSTX, id), \
+ SRI(RDPCSTX_PHY_CNTL9, RDPCSTX, id), \
+ SRI(RDPCSTX_PHY_CNTL10, RDPCSTX, id), \
+ SRI(RDPCSTX_PHY_CNTL11, RDPCSTX, id), \
+ SRI(RDPCSTX_PHY_CNTL12, RDPCSTX, id), \
+ SRI(RDPCSTX_PHY_CNTL13, RDPCSTX, id), \
+ SRI(RDPCSTX_PHY_CNTL14, RDPCSTX, id), \
+ SRI(RDPCSTX_CNTL, RDPCSTX, id), \
+ SRI(RDPCSTX_CLOCK_CNTL, RDPCSTX, id), \
+ SRI(RDPCSTX_INTERRUPT_CONTROL, RDPCSTX, id), \
+ SRI(RDPCSTX_PHY_CNTL0, RDPCSTX, id), \
+ SRI(RDPCSTX_PHY_CNTL2, RDPCSTX, id), \
+ SRI(RDPCS_TX_CR_ADDR, RDPCSTX, id), \
+ SRI(RDPCS_TX_CR_DATA, RDPCSTX, id), \
+ SRI(RDPCSTX_PHY_FUSE0, RDPCSTX, id), \
+ SRI(RDPCSTX_PHY_FUSE1, RDPCSTX, id), \
+ SRI(RDPCSTX_PHY_FUSE2, RDPCSTX, id), \
+ SRI(RDPCSTX_PHY_FUSE3, RDPCSTX, id), \
+ SRI(RDPCSTX_DEBUG_CONFIG, RDPCSTX, id), \
+ SR(RDPCSTX0_RDPCSTX_SCRATCH), \
+ SRI(RDPCSTX_PHY_RX_LD_VAL, RDPCSTX, id),\
+ SRI(RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG, RDPCSTX, id)
+
void dcn31_link_encoder_construct(
struct dcn20_link_encoder *enc20,
const struct encoder_init_data *init_data,
@@ -255,4 +285,6 @@ bool dcn31_link_encoder_is_in_alt_mode(
void dcn31_link_encoder_get_max_link_cap(struct link_encoder *enc,
struct dc_link_settings *link_settings);
+void enc31_hw_init(struct link_encoder *enc);
+
#endif /* __DC_LINK_ENCODER__DCN31_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c
new file mode 100644
index 000000000000..3e85e9c3d2cb
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c
@@ -0,0 +1,506 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+
+#include "dc_bios_types.h"
+#include "dcn30/dcn30_dio_stream_encoder.h"
+#include "dcn314_dio_stream_encoder.h"
+#include "reg_helper.h"
+#include "hw_shared.h"
+#include "link_service.h"
+#include "dpcd_defs.h"
+
+#define DC_LOGGER \
+ enc1->base.ctx->logger
+
+#define REG(reg)\
+ (enc1->regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+ enc1->se_shift->field_name, enc1->se_mask->field_name
+
+#define VBI_LINE_0 0
+#define HDMI_CLOCK_CHANNEL_RATE_MORE_340M 340000
+
+#define CTX \
+ enc1->base.ctx
+
+void enc314_reset_fifo(struct stream_encoder *enc, bool reset)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+ uint32_t reset_val = reset ? 1 : 0;
+ uint32_t is_symclk_on;
+
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, reset_val);
+ REG_GET(DIG_FE_CNTL, DIG_SYMCLK_FE_ON, &is_symclk_on);
+
+ if (is_symclk_on)
+ REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, reset_val, 10, 5000);
+ else
+ udelay(10);
+}
+
+void enc314_enable_fifo(struct stream_encoder *enc)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, 0x7);
+
+ enc314_reset_fifo(enc, true);
+ enc314_reset_fifo(enc, false);
+
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 1);
+}
+
+void enc314_disable_fifo(struct stream_encoder *enc)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 0);
+}
+
+static bool enc314_is_fifo_enabled(struct stream_encoder *enc)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+ uint32_t reset_val;
+
+ REG_GET(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, &reset_val);
+ return (reset_val != 0);
+}
+
+void enc314_dp_set_odm_combine(
+ struct stream_encoder *enc,
+ bool odm_combine)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_PER_CYCLE_PROCESSING_MODE, odm_combine);
+}
+
+/* setup stream encoder in dvi mode */
+void enc314_stream_encoder_dvi_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ bool is_dual_link)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ if (!enc->ctx->dc->debug.avoid_vbios_exec_table) {
+ struct bp_encoder_control cntl = {0};
+
+ cntl.action = ENCODER_CONTROL_SETUP;
+ cntl.engine_id = enc1->base.id;
+ cntl.signal = is_dual_link ?
+ SIGNAL_TYPE_DVI_DUAL_LINK : SIGNAL_TYPE_DVI_SINGLE_LINK;
+ cntl.enable_dp_audio = false;
+ cntl.pixel_clock = crtc_timing->pix_clk_100hz / 10;
+ cntl.lanes_number = (is_dual_link) ? LANE_COUNT_EIGHT : LANE_COUNT_FOUR;
+
+ if (enc1->base.bp->funcs->encoder_control(
+ enc1->base.bp, &cntl) != BP_RESULT_OK)
+ return;
+
+ } else {
+
+ //Set pattern for clock channel, default vlue 0x63 does not work
+ REG_UPDATE(DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, 0x1F);
+
+ //DIG_BE_TMDS_DVI_MODE : TMDS-DVI mode is already set in link_encoder_setup
+
+ //DIG_SOURCE_SELECT is already set in dig_connect_to_otg
+
+ enc314_enable_fifo(enc);
+ }
+
+ ASSERT(crtc_timing->pixel_encoding == PIXEL_ENCODING_RGB);
+ ASSERT(crtc_timing->display_color_depth == COLOR_DEPTH_888);
+ enc1_stream_encoder_set_stream_attribute_helper(enc1, crtc_timing);
+}
+
+/* setup stream encoder in hdmi mode */
+void enc314_stream_encoder_hdmi_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ int actual_pix_clk_khz,
+ bool enable_audio)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ if (!enc->ctx->dc->debug.avoid_vbios_exec_table) {
+ struct bp_encoder_control cntl = {0};
+
+ cntl.action = ENCODER_CONTROL_SETUP;
+ cntl.engine_id = enc1->base.id;
+ cntl.signal = SIGNAL_TYPE_HDMI_TYPE_A;
+ cntl.enable_dp_audio = enable_audio;
+ cntl.pixel_clock = actual_pix_clk_khz;
+ cntl.lanes_number = LANE_COUNT_FOUR;
+
+ if (enc1->base.bp->funcs->encoder_control(
+ enc1->base.bp, &cntl) != BP_RESULT_OK)
+ return;
+
+ } else {
+
+ //Set pattern for clock channel, default vlue 0x63 does not work
+ REG_UPDATE(DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, 0x1F);
+
+ //DIG_BE_TMDS_HDMI_MODE : TMDS-HDMI mode is already set in link_encoder_setup
+
+ //DIG_SOURCE_SELECT is already set in dig_connect_to_otg
+
+ enc314_enable_fifo(enc);
+ }
+
+ /* Configure pixel encoding */
+ enc1_stream_encoder_set_stream_attribute_helper(enc1, crtc_timing);
+
+ /* setup HDMI engine */
+ REG_UPDATE_6(HDMI_CONTROL,
+ HDMI_PACKET_GEN_VERSION, 1,
+ HDMI_KEEPOUT_MODE, 1,
+ HDMI_DEEP_COLOR_ENABLE, 0,
+ HDMI_DATA_SCRAMBLE_EN, 0,
+ HDMI_NO_EXTRA_NULL_PACKET_FILLED, 1,
+ HDMI_CLOCK_CHANNEL_RATE, 0);
+
+ /* Configure color depth */
+ switch (crtc_timing->display_color_depth) {
+ case COLOR_DEPTH_888:
+ REG_UPDATE(HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 0);
+ break;
+ case COLOR_DEPTH_101010:
+ if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 1,
+ HDMI_DEEP_COLOR_ENABLE, 0);
+ } else {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 1,
+ HDMI_DEEP_COLOR_ENABLE, 1);
+ }
+ break;
+ case COLOR_DEPTH_121212:
+ if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 2,
+ HDMI_DEEP_COLOR_ENABLE, 0);
+ } else {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 2,
+ HDMI_DEEP_COLOR_ENABLE, 1);
+ }
+ break;
+ case COLOR_DEPTH_161616:
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 3,
+ HDMI_DEEP_COLOR_ENABLE, 1);
+ break;
+ default:
+ break;
+ }
+
+ if (actual_pix_clk_khz >= HDMI_CLOCK_CHANNEL_RATE_MORE_340M) {
+ /* enable HDMI data scrambler
+ * HDMI_CLOCK_CHANNEL_RATE_MORE_340M
+ * Clock channel frequency is 1/4 of character rate.
+ */
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DATA_SCRAMBLE_EN, 1,
+ HDMI_CLOCK_CHANNEL_RATE, 1);
+ } else if (crtc_timing->flags.LTE_340MCSC_SCRAMBLE) {
+
+ /* TODO: New feature for DCE11, still need to implement */
+
+ /* enable HDMI data scrambler
+ * HDMI_CLOCK_CHANNEL_FREQ_EQUAL_TO_CHAR_RATE
+ * Clock channel frequency is the same
+ * as character rate
+ */
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DATA_SCRAMBLE_EN, 1,
+ HDMI_CLOCK_CHANNEL_RATE, 0);
+ }
+
+
+ /* Enable transmission of General Control packet on every frame */
+ REG_UPDATE_3(HDMI_VBI_PACKET_CONTROL,
+ HDMI_GC_CONT, 1,
+ HDMI_GC_SEND, 1,
+ HDMI_NULL_SEND, 1);
+
+ /* Disable Audio Content Protection packet transmission */
+ REG_UPDATE(HDMI_VBI_PACKET_CONTROL, HDMI_ACP_SEND, 0);
+
+ /* following belongs to audio */
+ /* Enable Audio InfoFrame packet transmission. */
+ REG_UPDATE(HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1);
+
+ /* update double-buffered AUDIO_INFO registers immediately */
+ ASSERT(enc->afmt);
+ enc->afmt->funcs->audio_info_immediate_update(enc->afmt);
+
+ /* Select line number on which to send Audio InfoFrame packets */
+ REG_UPDATE(HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE,
+ VBI_LINE_0 + 2);
+
+ /* set HDMI GC AVMUTE */
+ REG_UPDATE(HDMI_GC, HDMI_GC_AVMUTE, 0);
+}
+
+
+
+static bool is_two_pixels_per_containter(const struct dc_crtc_timing *timing)
+{
+ bool two_pix = timing->pixel_encoding == PIXEL_ENCODING_YCBCR420;
+
+ two_pix = two_pix || (timing->flags.DSC && timing->pixel_encoding == PIXEL_ENCODING_YCBCR422
+ && !timing->dsc_cfg.ycbcr422_simple);
+ return two_pix;
+}
+
+void enc314_stream_encoder_dp_blank(
+ struct dc_link *link,
+ struct stream_encoder *enc)
+{
+ enc1_stream_encoder_dp_blank(link, enc);
+
+ /* Disable FIFO after the DP vid stream is disabled to avoid corruption. */
+ if (enc->ctx->dc->debug.dig_fifo_off_in_blank)
+ enc314_disable_fifo(enc);
+}
+
+void enc314_stream_encoder_dp_unblank(
+ struct dc_link *link,
+ struct stream_encoder *enc,
+ const struct encoder_unblank_param *param)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ if (param->link_settings.link_rate != LINK_RATE_UNKNOWN) {
+ uint32_t n_vid = 0x8000;
+ uint32_t m_vid;
+ uint32_t n_multiply = 0;
+ uint32_t pix_per_cycle = 0;
+ uint64_t m_vid_l = n_vid;
+
+ /* YCbCr 4:2:0 : Computed VID_M will be 2X the input rate */
+ if (is_two_pixels_per_containter(&param->timing) || param->opp_cnt > 1) {
+ /*this logic should be the same in get_pixel_clock_parameters() */
+ n_multiply = 1;
+ pix_per_cycle = 1;
+ }
+ /* M / N = Fstream / Flink
+ * m_vid / n_vid = pixel rate / link rate
+ */
+
+ m_vid_l *= param->timing.pix_clk_100hz / 10;
+ m_vid_l = div_u64(m_vid_l,
+ param->link_settings.link_rate
+ * LINK_RATE_REF_FREQ_IN_KHZ);
+
+ m_vid = (uint32_t) m_vid_l;
+
+ /* enable auto measurement */
+
+ REG_UPDATE(DP_VID_TIMING, DP_VID_M_N_GEN_EN, 0);
+
+ /* auto measurement need 1 full 0x8000 symbol cycle to kick in,
+ * therefore program initial value for Mvid and Nvid
+ */
+
+ REG_UPDATE(DP_VID_N, DP_VID_N, n_vid);
+
+ REG_UPDATE(DP_VID_M, DP_VID_M, m_vid);
+
+ REG_UPDATE_2(DP_VID_TIMING,
+ DP_VID_M_N_GEN_EN, 1,
+ DP_VID_N_MUL, n_multiply);
+
+ REG_UPDATE(DP_PIXEL_FORMAT,
+ DP_PIXEL_PER_CYCLE_PROCESSING_MODE,
+ pix_per_cycle);
+ }
+
+ /* make sure stream is disabled before resetting steer fifo */
+ REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, false);
+ REG_WAIT(DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, 0, 10, 5000);
+
+ /* DIG_START is removed from the register spec */
+
+ /* switch DP encoder to CRTC data, but reset it the fifo first. It may happen
+ * that it overflows during mode transition, and sometimes doesn't recover.
+ */
+ REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 1);
+ udelay(10);
+
+ REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 0);
+
+ /* wait 100us for DIG/DP logic to prime
+ * (i.e. a few video lines)
+ */
+ udelay(100);
+
+ /* the hardware would start sending video at the start of the next DP
+ * frame (i.e. rising edge of the vblank).
+ * NOTE: We used to program DP_VID_STREAM_DIS_DEFER = 2 here, but this
+ * register has no effect on enable transition! HW always guarantees
+ * VID_STREAM enable at start of next frame, and this is not
+ * programmable
+ */
+
+ REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, true);
+
+ /*
+ * DIG Resync FIFO now needs to be explicitly enabled.
+ * This should come after DP_VID_STREAM_ENABLE per HW docs.
+ */
+ enc314_enable_fifo(enc);
+
+ link->dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_ENABLE_DP_VID_STREAM);
+}
+
+/* Set DSC-related configuration.
+ * dsc_mode: 0 disables DSC, other values enable DSC in specified format
+ * sc_bytes_per_pixel: DP_DSC_BYTES_PER_PIXEL removed in DCN32
+ * dsc_slice_width: DP_DSC_SLICE_WIDTH removed in DCN32
+ */
+void enc314_dp_set_dsc_config(struct stream_encoder *enc,
+ enum optc_dsc_mode dsc_mode,
+ uint32_t dsc_bytes_per_pixel,
+ uint32_t dsc_slice_width)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ REG_UPDATE(DP_DSC_CNTL, DP_DSC_MODE, dsc_mode == OPTC_DSC_DISABLED ? 0 : 1);
+}
+
+/* this function read dsc related register fields to be logged later in dcn10_log_hw_state
+ * into a dcn_dsc_state struct.
+ */
+void enc314_read_state(struct stream_encoder *enc, struct enc_state *s)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ //if dsc is enabled, continue to read
+ REG_GET(DP_DSC_CNTL, DP_DSC_MODE, &s->dsc_mode);
+ if (s->dsc_mode) {
+ REG_GET(DP_GSP11_CNTL, DP_SEC_GSP11_LINE_NUM, &s->sec_gsp_pps_line_num);
+
+ REG_GET(DP_MSA_VBID_MISC, DP_VBID6_LINE_REFERENCE, &s->vbid6_line_reference);
+ REG_GET(DP_MSA_VBID_MISC, DP_VBID6_LINE_NUM, &s->vbid6_line_num);
+
+ REG_GET(DP_GSP11_CNTL, DP_SEC_GSP11_ENABLE, &s->sec_gsp_pps_enable);
+ REG_GET(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, &s->sec_stream_enable);
+ }
+}
+
+void enc314_set_dig_input_mode(struct stream_encoder *enc, unsigned int pix_per_container)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ // The naming of this field is confusing, what it means is the output mode of otg, which
+ // is the input mode of the dig
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_MODE, pix_per_container == 2 ? 0x1 : 0x0);
+}
+
+
+static const struct stream_encoder_funcs dcn314_str_enc_funcs = {
+ .dp_set_odm_combine =
+ enc314_dp_set_odm_combine,
+ .dp_set_stream_attribute =
+ enc2_stream_encoder_dp_set_stream_attribute,
+ .hdmi_set_stream_attribute =
+ enc314_stream_encoder_hdmi_set_stream_attribute,
+ .dvi_set_stream_attribute =
+ enc314_stream_encoder_dvi_set_stream_attribute,
+ .set_throttled_vcp_size =
+ enc1_stream_encoder_set_throttled_vcp_size,
+ .update_hdmi_info_packets =
+ enc3_stream_encoder_update_hdmi_info_packets,
+ .stop_hdmi_info_packets =
+ enc3_stream_encoder_stop_hdmi_info_packets,
+ .update_dp_info_packets_sdp_line_num =
+ enc3_stream_encoder_update_dp_info_packets_sdp_line_num,
+ .update_dp_info_packets =
+ enc3_stream_encoder_update_dp_info_packets,
+ .stop_dp_info_packets =
+ enc1_stream_encoder_stop_dp_info_packets,
+ .dp_blank =
+ enc314_stream_encoder_dp_blank,
+ .dp_unblank =
+ enc314_stream_encoder_dp_unblank,
+ .audio_mute_control = enc3_audio_mute_control,
+
+ .dp_audio_setup = enc3_se_dp_audio_setup,
+ .dp_audio_enable = enc3_se_dp_audio_enable,
+ .dp_audio_disable = enc1_se_dp_audio_disable,
+
+ .hdmi_audio_setup = enc3_se_hdmi_audio_setup,
+ .hdmi_audio_disable = enc1_se_hdmi_audio_disable,
+ .setup_stereo_sync = enc1_setup_stereo_sync,
+ .set_avmute = enc1_stream_encoder_set_avmute,
+ .dig_connect_to_otg = enc1_dig_connect_to_otg,
+ .dig_source_otg = enc1_dig_source_otg,
+
+ .dp_get_pixel_format = enc1_stream_encoder_dp_get_pixel_format,
+
+ .enc_read_state = enc314_read_state,
+ .dp_set_dsc_config = enc314_dp_set_dsc_config,
+ .dp_set_dsc_pps_info_packet = enc3_dp_set_dsc_pps_info_packet,
+ .set_dynamic_metadata = enc2_set_dynamic_metadata,
+ .hdmi_reset_stream_attribute = enc1_reset_hdmi_stream_attribute,
+
+ .enable_fifo = enc314_enable_fifo,
+ .disable_fifo = enc314_disable_fifo,
+ .is_fifo_enabled = enc314_is_fifo_enabled,
+ .set_input_mode = enc314_set_dig_input_mode,
+};
+
+void dcn314_dio_stream_encoder_construct(
+ struct dcn10_stream_encoder *enc1,
+ struct dc_context *ctx,
+ struct dc_bios *bp,
+ enum engine_id eng_id,
+ struct vpg *vpg,
+ struct afmt *afmt,
+ const struct dcn10_stream_enc_registers *regs,
+ const struct dcn10_stream_encoder_shift *se_shift,
+ const struct dcn10_stream_encoder_mask *se_mask)
+{
+ enc1->base.funcs = &dcn314_str_enc_funcs;
+ enc1->base.ctx = ctx;
+ enc1->base.id = eng_id;
+ enc1->base.bp = bp;
+ enc1->base.vpg = vpg;
+ enc1->base.afmt = afmt;
+ enc1->regs = regs;
+ enc1->se_shift = se_shift;
+ enc1->se_mask = se_mask;
+ enc1->base.stream_enc_inst = vpg->inst;
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.h
new file mode 100644
index 000000000000..86548be591be
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.h
@@ -0,0 +1,355 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_DIO_STREAM_ENCODER_DCN314_H__
+#define __DC_DIO_STREAM_ENCODER_DCN314_H__
+
+#include "dcn30/dcn30_vpg.h"
+#include "dcn30/dcn30_afmt.h"
+#include "stream_encoder.h"
+#include "dcn20/dcn20_stream_encoder.h"
+
+/* Register bit field name change */
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_GATE_DIS__SHIFT 0x8
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_EN__SHIFT 0x9
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_CLOCK_ON__SHIFT 0xa
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_DATA_SWAP__SHIFT 0xe
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_DATA_ORDER_INVERT__SHIFT 0xf
+
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_GATE_DIS_MASK 0x00000100L
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_EN_MASK 0x00000200L
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_CLOCK_ON_MASK 0x00000400L
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_DATA_SWAP_MASK 0x00004000L
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_DATA_ORDER_INVERT_MASK 0x00008000L
+
+
+#define SE_DCN314_REG_LIST(id)\
+ SRI(AFMT_CNTL, DIG, id), \
+ SRI(DIG_FE_CNTL, DIG, id), \
+ SRI(HDMI_CONTROL, DIG, id), \
+ SRI(HDMI_DB_CONTROL, DIG, id), \
+ SRI(HDMI_GC, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL0, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL1, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL2, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL3, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL4, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL5, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL6, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL7, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL8, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL9, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL10, DIG, id), \
+ SRI(HDMI_INFOFRAME_CONTROL0, DIG, id), \
+ SRI(HDMI_INFOFRAME_CONTROL1, DIG, id), \
+ SRI(HDMI_VBI_PACKET_CONTROL, DIG, id), \
+ SRI(HDMI_AUDIO_PACKET_CONTROL, DIG, id),\
+ SRI(HDMI_ACR_PACKET_CONTROL, DIG, id),\
+ SRI(HDMI_ACR_32_0, DIG, id),\
+ SRI(HDMI_ACR_32_1, DIG, id),\
+ SRI(HDMI_ACR_44_0, DIG, id),\
+ SRI(HDMI_ACR_44_1, DIG, id),\
+ SRI(HDMI_ACR_48_0, DIG, id),\
+ SRI(HDMI_ACR_48_1, DIG, id),\
+ SRI(DP_DB_CNTL, DP, id), \
+ SRI(DP_MSA_MISC, DP, id), \
+ SRI(DP_MSA_VBID_MISC, DP, id), \
+ SRI(DP_MSA_COLORIMETRY, DP, id), \
+ SRI(DP_MSA_TIMING_PARAM1, DP, id), \
+ SRI(DP_MSA_TIMING_PARAM2, DP, id), \
+ SRI(DP_MSA_TIMING_PARAM3, DP, id), \
+ SRI(DP_MSA_TIMING_PARAM4, DP, id), \
+ SRI(DP_MSE_RATE_CNTL, DP, id), \
+ SRI(DP_MSE_RATE_UPDATE, DP, id), \
+ SRI(DP_PIXEL_FORMAT, DP, id), \
+ SRI(DP_SEC_CNTL, DP, id), \
+ SRI(DP_SEC_CNTL1, DP, id), \
+ SRI(DP_SEC_CNTL2, DP, id), \
+ SRI(DP_SEC_CNTL5, DP, id), \
+ SRI(DP_SEC_CNTL6, DP, id), \
+ SRI(DP_STEER_FIFO, DP, id), \
+ SRI(DP_VID_M, DP, id), \
+ SRI(DP_VID_N, DP, id), \
+ SRI(DP_VID_STREAM_CNTL, DP, id), \
+ SRI(DP_VID_TIMING, DP, id), \
+ SRI(DP_SEC_AUD_N, DP, id), \
+ SRI(DP_SEC_TIMESTAMP, DP, id), \
+ SRI(DP_DSC_CNTL, DP, id), \
+ SRI(DP_SEC_METADATA_TRANSMISSION, DP, id), \
+ SRI(HDMI_METADATA_PACKET_CONTROL, DIG, id), \
+ SRI(DP_SEC_FRAMING4, DP, id), \
+ SRI(DP_GSP11_CNTL, DP, id), \
+ SRI(DME_CONTROL, DME, id),\
+ SRI(DP_SEC_METADATA_TRANSMISSION, DP, id), \
+ SRI(HDMI_METADATA_PACKET_CONTROL, DIG, id), \
+ SRI(DIG_FE_CNTL, DIG, id), \
+ SRI(DIG_CLOCK_PATTERN, DIG, id), \
+ SRI(DIG_FIFO_CTRL0, DIG, id)
+
+
+#define SE_COMMON_MASK_SH_LIST_DCN314(mask_sh)\
+ SE_SF(DP0_DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, mask_sh),\
+ SE_SF(DP0_DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, mask_sh),\
+ SE_SF(DP0_DP_PIXEL_FORMAT, DP_PIXEL_PER_CYCLE_PROCESSING_MODE, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_PACKET_GEN_VERSION, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_KEEPOUT_MODE, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_DATA_SCRAMBLE_EN, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_NO_EXTRA_NULL_PACKET_FILLED, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_ACP_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GC, HDMI_GC_AVMUTE, mask_sh),\
+ SE_SF(DP0_DP_MSE_RATE_CNTL, DP_MSE_RATE_X, mask_sh),\
+ SE_SF(DP0_DP_MSE_RATE_CNTL, DP_MSE_RATE_Y, mask_sh),\
+ SE_SF(DP0_DP_MSE_RATE_UPDATE, DP_MSE_RATE_UPDATE_PENDING, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP0_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP1_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_MPG_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL1, DP_SEC_GSP5_LINE_REFERENCE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND_PENDING, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL4, DP_SEC_GSP4_LINE_NUM, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL5, DP_SEC_GSP5_LINE_NUM, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND_ANY_LINE, mask_sh),\
+ SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_DIS_DEFER, mask_sh),\
+ SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, mask_sh),\
+ SE_SF(DP0_DP_STEER_FIFO, DP_STEER_FIFO_RESET, mask_sh),\
+ SE_SF(DP0_DP_VID_TIMING, DP_VID_M_N_GEN_EN, mask_sh),\
+ SE_SF(DP0_DP_VID_N, DP_VID_N, mask_sh),\
+ SE_SF(DP0_DP_VID_M, DP_VID_M, mask_sh),\
+ SE_SF(DIG0_HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUDIO_PRIORITY, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_32_0, HDMI_ACR_CTS_32, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_32_1, HDMI_ACR_N_32, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_44_0, HDMI_ACR_CTS_44, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_44_1, HDMI_ACR_N_44, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_48_0, HDMI_ACR_CTS_48, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_48_1, HDMI_ACR_N_48, mask_sh),\
+ SE_SF(DP0_DP_SEC_AUD_N, DP_SEC_AUD_N, mask_sh),\
+ SE_SF(DP0_DP_SEC_TIMESTAMP, DP_SEC_TIMESTAMP_MODE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ASP_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ATP_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_AIP_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ACM_ENABLE, mask_sh),\
+ SE_SF(DIG0_AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_CLOCK_CHANNEL_RATE, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, TMDS_PIXEL_ENCODING, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, TMDS_COLOR_FORMAT, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_SELECT, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_GATE_EN, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP4_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP5_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP6_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP7_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP7_SEND, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL6, DP_SEC_GSP7_LINE_NUM, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP11_PPS, mask_sh),\
+ SE_SF(DP0_DP_GSP11_CNTL, DP_SEC_GSP11_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_GSP11_CNTL, DP_SEC_GSP11_LINE_NUM, mask_sh),\
+ SE_SF(DP0_DP_DB_CNTL, DP_DB_DISABLE, mask_sh),\
+ SE_SF(DP0_DP_MSA_COLORIMETRY, DP_MSA_MISC0, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM1, DP_MSA_HTOTAL, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM1, DP_MSA_VTOTAL, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM2, DP_MSA_HSTART, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM2, DP_MSA_VSTART, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_HSYNCWIDTH, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_HSYNCPOLARITY, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_VSYNCWIDTH, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_VSYNCPOLARITY, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM4, DP_MSA_HWIDTH, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM4, DP_MSA_VHEIGHT, mask_sh),\
+ SE_SF(DIG0_HDMI_DB_CONTROL, HDMI_DB_DISABLE, mask_sh),\
+ SE_SF(DP0_DP_VID_TIMING, DP_VID_N_MUL, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, DIG_SOURCE_SELECT, mask_sh), \
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC0_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC0_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC1_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC1_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC2_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC2_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC3_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC3_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC4_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC4_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC5_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC5_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC6_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC6_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC7_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC7_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC8_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC8_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC9_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC9_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC10_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC10_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC11_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC11_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC12_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC12_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC13_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC13_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC14_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC14_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL1, HDMI_GENERIC0_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL1, HDMI_GENERIC1_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL2, HDMI_GENERIC2_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL2, HDMI_GENERIC3_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL3, HDMI_GENERIC4_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL3, HDMI_GENERIC5_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL4, HDMI_GENERIC6_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL4, HDMI_GENERIC7_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL7, HDMI_GENERIC8_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL7, HDMI_GENERIC9_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL8, HDMI_GENERIC10_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL8, HDMI_GENERIC11_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL9, HDMI_GENERIC12_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL9, HDMI_GENERIC13_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL10, HDMI_GENERIC14_LINE, mask_sh),\
+ SE_SF(DP0_DP_DSC_CNTL, DP_DSC_MODE, mask_sh),\
+ SE_SF(DP0_DP_MSA_VBID_MISC, DP_VBID6_LINE_REFERENCE, mask_sh),\
+ SE_SF(DP0_DP_MSA_VBID_MISC, DP_VBID6_LINE_NUM, mask_sh),\
+ SE_SF(DME0_DME_CONTROL, METADATA_ENGINE_EN, mask_sh),\
+ SE_SF(DME0_DME_CONTROL, METADATA_HUBP_REQUESTOR_ID, mask_sh),\
+ SE_SF(DME0_DME_CONTROL, METADATA_STREAM_TYPE, mask_sh),\
+ SE_SF(DP0_DP_SEC_METADATA_TRANSMISSION, DP_SEC_METADATA_PACKET_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_METADATA_TRANSMISSION, DP_SEC_METADATA_PACKET_LINE_REFERENCE, mask_sh),\
+ SE_SF(DP0_DP_SEC_METADATA_TRANSMISSION, DP_SEC_METADATA_PACKET_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_METADATA_PACKET_CONTROL, HDMI_METADATA_PACKET_ENABLE, mask_sh),\
+ SE_SF(DIG0_HDMI_METADATA_PACKET_CONTROL, HDMI_METADATA_PACKET_LINE_REFERENCE, mask_sh),\
+ SE_SF(DIG0_HDMI_METADATA_PACKET_CONTROL, HDMI_METADATA_PACKET_LINE, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, DOLBY_VISION_EN, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, DIG_SYMCLK_FE_ON, mask_sh),\
+ SE_SF(DP0_DP_SEC_FRAMING4, DP_SST_SDP_SPLITTING, mask_sh),\
+ SE_SF(DIG0_DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_MODE, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_RESET, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, mask_sh)
+
+void dcn314_dio_stream_encoder_construct(
+ struct dcn10_stream_encoder *enc1,
+ struct dc_context *ctx,
+ struct dc_bios *bp,
+ enum engine_id eng_id,
+ struct vpg *vpg,
+ struct afmt *afmt,
+ const struct dcn10_stream_enc_registers *regs,
+ const struct dcn10_stream_encoder_shift *se_shift,
+ const struct dcn10_stream_encoder_mask *se_mask);
+
+void enc3_stream_encoder_update_hdmi_info_packets(
+ struct stream_encoder *enc,
+ const struct encoder_info_frame *info_frame);
+
+void enc3_stream_encoder_stop_hdmi_info_packets(
+ struct stream_encoder *enc);
+
+void enc3_stream_encoder_update_dp_info_packets_sdp_line_num(
+ struct stream_encoder *enc,
+ struct encoder_info_frame *info_frame);
+
+void enc3_stream_encoder_update_dp_info_packets(
+ struct stream_encoder *enc,
+ const struct encoder_info_frame *info_frame);
+
+void enc3_audio_mute_control(
+ struct stream_encoder *enc,
+ bool mute);
+
+void enc3_se_dp_audio_setup(
+ struct stream_encoder *enc,
+ unsigned int az_inst,
+ struct audio_info *info);
+
+void enc3_se_dp_audio_enable(
+ struct stream_encoder *enc);
+
+void enc3_se_hdmi_audio_setup(
+ struct stream_encoder *enc,
+ unsigned int az_inst,
+ struct audio_info *info,
+ struct audio_crtc_info *audio_crtc_info);
+
+void enc3_dp_set_dsc_pps_info_packet(
+ struct stream_encoder *enc,
+ bool enable,
+ uint8_t *dsc_packed_pps,
+ bool immediate_update);
+
+void enc314_stream_encoder_dvi_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ bool is_dual_link);
+
+void enc314_stream_encoder_hdmi_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ int actual_pix_clk_khz,
+ bool enable_audio);
+
+void enc314_stream_encoder_dp_blank(
+ struct dc_link *link,
+ struct stream_encoder *enc);
+
+void enc314_stream_encoder_dp_unblank(
+ struct dc_link *link,
+ struct stream_encoder *enc,
+ const struct encoder_unblank_param *param);
+
+void enc314_reset_fifo(struct stream_encoder *enc, bool reset);
+
+void enc314_enable_fifo(struct stream_encoder *enc);
+
+void enc314_disable_fifo(struct stream_encoder *enc);
+
+void enc314_set_dig_input_mode(struct stream_encoder *enc, unsigned int pix_per_container);
+
+void enc314_read_state(struct stream_encoder *enc, struct enc_state *s);
+
+void enc314_dp_set_odm_combine(
+ struct stream_encoder *enc,
+ bool odm_combine);
+
+void enc314_dp_set_dsc_config(
+ struct stream_encoder *enc,
+ enum optc_dsc_mode dsc_mode,
+ uint32_t dsc_bytes_per_pixel,
+ uint32_t dsc_slice_width);
+
+#endif /* __DC_DIO_STREAM_ENCODER_DCN314_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_link_encoder.c
new file mode 100644
index 000000000000..06907e8a4eda
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_link_encoder.c
@@ -0,0 +1,328 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+
+#include "reg_helper.h"
+
+#include "core_types.h"
+#include "link_encoder.h"
+#include "dcn31/dcn31_dio_link_encoder.h"
+#include "dcn32_dio_link_encoder.h"
+#include "stream_encoder.h"
+#include "dc_bios_types.h"
+#include "link_enc_cfg.h"
+
+#include "dc_dmub_srv.h"
+#include "gpio_service_interface.h"
+
+#ifndef MIN
+#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+#endif
+
+#define CTX \
+ enc10->base.ctx
+#define DC_LOGGER \
+ enc10->base.ctx->logger
+
+#define REG(reg)\
+ (enc10->link_regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+ enc10->link_shift->field_name, enc10->link_mask->field_name
+
+#define AUX_REG(reg)\
+ (enc10->aux_regs->reg)
+
+#define AUX_REG_READ(reg_name) \
+ dm_read_reg(CTX, AUX_REG(reg_name))
+
+#define AUX_REG_WRITE(reg_name, val) \
+ dm_write_reg(CTX, AUX_REG(reg_name), val)
+
+static uint8_t phy_id_from_transmitter(enum transmitter t)
+{
+ uint8_t phy_id;
+
+ switch (t) {
+ case TRANSMITTER_UNIPHY_A:
+ phy_id = 0;
+ break;
+ case TRANSMITTER_UNIPHY_B:
+ phy_id = 1;
+ break;
+ case TRANSMITTER_UNIPHY_C:
+ phy_id = 2;
+ break;
+ case TRANSMITTER_UNIPHY_D:
+ phy_id = 3;
+ break;
+ case TRANSMITTER_UNIPHY_E:
+ phy_id = 4;
+ break;
+ case TRANSMITTER_UNIPHY_F:
+ phy_id = 5;
+ break;
+ case TRANSMITTER_UNIPHY_G:
+ phy_id = 6;
+ break;
+ default:
+ phy_id = 0;
+ break;
+ }
+ return phy_id;
+}
+
+void enc32_hw_init(struct link_encoder *enc)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+/*
+ 00 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__1to2 : 1/2
+ 01 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__3to4 : 3/4
+ 02 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__7to8 : 7/8
+ 03 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__15to16 : 15/16
+ 04 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__31to32 : 31/32
+ 05 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__63to64 : 63/64
+ 06 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__127to128 : 127/128
+ 07 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__255to256 : 255/256
+*/
+
+/*
+ AUX_REG_UPDATE_5(AUX_DPHY_RX_CONTROL0,
+ AUX_RX_START_WINDOW = 1 [6:4]
+ AUX_RX_RECEIVE_WINDOW = 1 default is 2 [10:8]
+ AUX_RX_HALF_SYM_DETECT_LEN = 1 [13:12] default is 1
+ AUX_RX_TRANSITION_FILTER_EN = 1 [16] default is 1
+ AUX_RX_ALLOW_BELOW_THRESHOLD_PHASE_DETECT [17] is 0 default is 0
+ AUX_RX_ALLOW_BELOW_THRESHOLD_START [18] is 1 default is 1
+ AUX_RX_ALLOW_BELOW_THRESHOLD_STOP [19] is 1 default is 1
+ AUX_RX_PHASE_DETECT_LEN, [21,20] = 0x3 default is 3
+ AUX_RX_DETECTION_THRESHOLD [30:28] = 1
+*/
+ AUX_REG_WRITE(AUX_DPHY_RX_CONTROL0, 0x103d1110);
+
+ AUX_REG_WRITE(AUX_DPHY_TX_CONTROL, 0x21c7a);
+
+ //AUX_DPHY_TX_REF_CONTROL'AUX_TX_REF_DIV HW default is 0x32;
+ // Set AUX_TX_REF_DIV Divider to generate 2 MHz reference from refclk
+ // 27MHz -> 0xd
+ // 100MHz -> 0x32
+ // 48MHz -> 0x18
+
+ // Set TMDS_CTL0 to 1. This is a legacy setting.
+ REG_UPDATE(TMDS_CTL_BITS, TMDS_CTL0, 1);
+
+ dcn10_aux_initialize(enc10);
+}
+
+
+void dcn32_link_encoder_enable_dp_output(
+ struct link_encoder *enc,
+ const struct dc_link_settings *link_settings,
+ enum clock_source_id clock_source)
+{
+ if (!enc->ctx->dc->debug.avoid_vbios_exec_table) {
+ dcn10_link_encoder_enable_dp_output(enc, link_settings, clock_source);
+ return;
+ }
+}
+
+static bool query_dp_alt_from_dmub(struct link_encoder *enc,
+ union dmub_rb_cmd *cmd)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->query_dp_alt.header.type = DMUB_CMD__VBIOS;
+ cmd->query_dp_alt.header.sub_type =
+ DMUB_CMD__VBIOS_TRANSMITTER_QUERY_DP_ALT;
+ cmd->query_dp_alt.header.payload_bytes = sizeof(cmd->query_dp_alt.data);
+ cmd->query_dp_alt.data.phy_id = phy_id_from_transmitter(enc10->base.transmitter);
+
+ if (!dc_wake_and_execute_dmub_cmd(enc->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
+ return false;
+
+ return true;
+}
+
+bool dcn32_link_encoder_is_in_alt_mode(struct link_encoder *enc)
+{
+ union dmub_rb_cmd cmd;
+
+ if (!query_dp_alt_from_dmub(enc, &cmd))
+ return false;
+
+ return (cmd.query_dp_alt.data.is_dp_alt_disable == 0);
+}
+
+void dcn32_link_encoder_get_max_link_cap(struct link_encoder *enc,
+ struct dc_link_settings *link_settings)
+{
+ union dmub_rb_cmd cmd;
+
+ dcn10_link_encoder_get_max_link_cap(enc, link_settings);
+
+ if (!query_dp_alt_from_dmub(enc, &cmd))
+ return;
+
+ if (cmd.query_dp_alt.data.is_usb &&
+ cmd.query_dp_alt.data.is_dp4 == 0)
+ link_settings->lane_count = MIN(LANE_COUNT_TWO, link_settings->lane_count);
+}
+
+
+static const struct link_encoder_funcs dcn32_link_enc_funcs = {
+ .read_state = link_enc2_read_state,
+ .validate_output_with_stream =
+ dcn30_link_encoder_validate_output_with_stream,
+ .hw_init = enc32_hw_init,
+ .setup = dcn10_link_encoder_setup,
+ .enable_tmds_output = dcn10_link_encoder_enable_tmds_output,
+ .enable_dp_output = dcn32_link_encoder_enable_dp_output,
+ .enable_dp_mst_output = dcn10_link_encoder_enable_dp_mst_output,
+ .disable_output = dcn10_link_encoder_disable_output,
+ .dp_set_lane_settings = dcn10_link_encoder_dp_set_lane_settings,
+ .dp_set_phy_pattern = dcn10_link_encoder_dp_set_phy_pattern,
+ .update_mst_stream_allocation_table =
+ dcn10_link_encoder_update_mst_stream_allocation_table,
+ .psr_program_dp_dphy_fast_training =
+ dcn10_psr_program_dp_dphy_fast_training,
+ .psr_program_secondary_packet = dcn10_psr_program_secondary_packet,
+ .connect_dig_be_to_fe = dcn10_link_encoder_connect_dig_be_to_fe,
+ .enable_hpd = dcn10_link_encoder_enable_hpd,
+ .disable_hpd = dcn10_link_encoder_disable_hpd,
+ .is_dig_enabled = dcn10_is_dig_enabled,
+ .destroy = dcn10_link_encoder_destroy,
+ .fec_set_enable = enc2_fec_set_enable,
+ .fec_set_ready = enc2_fec_set_ready,
+ .fec_is_active = enc2_fec_is_active,
+ .get_dig_frontend = dcn10_get_dig_frontend,
+ .get_dig_mode = dcn10_get_dig_mode,
+ .is_in_alt_mode = dcn32_link_encoder_is_in_alt_mode,
+ .get_max_link_cap = dcn32_link_encoder_get_max_link_cap,
+ .set_dio_phy_mux = dcn31_link_encoder_set_dio_phy_mux,
+};
+
+void dcn32_link_encoder_construct(
+ struct dcn20_link_encoder *enc20,
+ const struct encoder_init_data *init_data,
+ const struct encoder_feature_support *enc_features,
+ const struct dcn10_link_enc_registers *link_regs,
+ const struct dcn10_link_enc_aux_registers *aux_regs,
+ const struct dcn10_link_enc_hpd_registers *hpd_regs,
+ const struct dcn10_link_enc_shift *link_shift,
+ const struct dcn10_link_enc_mask *link_mask)
+{
+ struct bp_connector_speed_cap_info bp_cap_info = {0};
+ const struct dc_vbios_funcs *bp_funcs = init_data->ctx->dc_bios->funcs;
+ enum bp_result result = BP_RESULT_OK;
+ struct dcn10_link_encoder *enc10 = &enc20->enc10;
+
+ enc10->base.funcs = &dcn32_link_enc_funcs;
+ enc10->base.ctx = init_data->ctx;
+ enc10->base.id = init_data->encoder;
+
+ enc10->base.hpd_source = init_data->hpd_source;
+ enc10->base.connector = init_data->connector;
+
+ enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
+
+ enc10->base.features = *enc_features;
+
+ enc10->base.transmitter = init_data->transmitter;
+
+ /* set the flag to indicate whether driver poll the I2C data pin
+ * while doing the DP sink detect
+ */
+
+/* if (dal_adapter_service_is_feature_supported(as,
+ FEATURE_DP_SINK_DETECT_POLL_DATA_PIN))
+ enc10->base.features.flags.bits.
+ DP_SINK_DETECT_POLL_DATA_PIN = true;*/
+
+ enc10->base.output_signals =
+ SIGNAL_TYPE_DVI_SINGLE_LINK |
+ SIGNAL_TYPE_DVI_DUAL_LINK |
+ SIGNAL_TYPE_LVDS |
+ SIGNAL_TYPE_DISPLAY_PORT |
+ SIGNAL_TYPE_DISPLAY_PORT_MST |
+ SIGNAL_TYPE_EDP |
+ SIGNAL_TYPE_HDMI_TYPE_A;
+
+ enc10->link_regs = link_regs;
+ enc10->aux_regs = aux_regs;
+ enc10->hpd_regs = hpd_regs;
+ enc10->link_shift = link_shift;
+ enc10->link_mask = link_mask;
+
+ switch (enc10->base.transmitter) {
+ case TRANSMITTER_UNIPHY_A:
+ enc10->base.preferred_engine = ENGINE_ID_DIGA;
+ break;
+ case TRANSMITTER_UNIPHY_B:
+ enc10->base.preferred_engine = ENGINE_ID_DIGB;
+ break;
+ case TRANSMITTER_UNIPHY_C:
+ enc10->base.preferred_engine = ENGINE_ID_DIGC;
+ break;
+ case TRANSMITTER_UNIPHY_D:
+ enc10->base.preferred_engine = ENGINE_ID_DIGD;
+ break;
+ case TRANSMITTER_UNIPHY_E:
+ enc10->base.preferred_engine = ENGINE_ID_DIGE;
+ break;
+ default:
+ ASSERT_CRITICAL(false);
+ enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
+ }
+
+ /* default to one to mirror Windows behavior */
+ enc10->base.features.flags.bits.HDMI_6GB_EN = 1;
+
+ if (bp_funcs->get_connector_speed_cap_info)
+ result = bp_funcs->get_connector_speed_cap_info(enc10->base.ctx->dc_bios,
+ enc10->base.connector, &bp_cap_info);
+
+ /* Override features with DCE-specific values */
+ if (result == BP_RESULT_OK) {
+ enc10->base.features.flags.bits.IS_HBR2_CAPABLE =
+ bp_cap_info.DP_HBR2_EN;
+ enc10->base.features.flags.bits.IS_HBR3_CAPABLE =
+ bp_cap_info.DP_HBR3_EN;
+ enc10->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN;
+ enc10->base.features.flags.bits.IS_DP2_CAPABLE = 1;
+ enc10->base.features.flags.bits.IS_UHBR10_CAPABLE = bp_cap_info.DP_UHBR10_EN;
+ enc10->base.features.flags.bits.IS_UHBR13_5_CAPABLE = bp_cap_info.DP_UHBR13_5_EN;
+ enc10->base.features.flags.bits.IS_UHBR20_CAPABLE = bp_cap_info.DP_UHBR20_EN;
+ } else {
+ DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n",
+ __func__,
+ result);
+ }
+ if (enc10->base.ctx->dc->debug.hdmi20_disable) {
+ enc10->base.features.flags.bits.HDMI_6GB_EN = 0;
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_link_encoder.h
new file mode 100644
index 000000000000..35d23d9db45e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_link_encoder.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_LINK_ENCODER__DCN32_H__
+#define __DC_LINK_ENCODER__DCN32_H__
+
+#include "dcn30/dcn30_dio_link_encoder.h"
+
+void dcn32_link_encoder_construct(
+ struct dcn20_link_encoder *enc20,
+ const struct encoder_init_data *init_data,
+ const struct encoder_feature_support *enc_features,
+ const struct dcn10_link_enc_registers *link_regs,
+ const struct dcn10_link_enc_aux_registers *aux_regs,
+ const struct dcn10_link_enc_hpd_registers *hpd_regs,
+ const struct dcn10_link_enc_shift *link_shift,
+ const struct dcn10_link_enc_mask *link_mask);
+
+void enc32_hw_init(struct link_encoder *enc);
+
+void dcn32_link_encoder_enable_dp_output(
+ struct link_encoder *enc,
+ const struct dc_link_settings *link_settings,
+ enum clock_source_id clock_source);
+
+bool dcn32_link_encoder_is_in_alt_mode(struct link_encoder *enc);
+
+void dcn32_link_encoder_get_max_link_cap(struct link_encoder *enc,
+ struct dc_link_settings *link_settings);
+
+#endif /* __DC_LINK_ENCODER__DCN32_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.c
new file mode 100644
index 000000000000..3523d1cdc1a3
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.c
@@ -0,0 +1,493 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+
+#include "dc_bios_types.h"
+#include "dcn30/dcn30_dio_stream_encoder.h"
+#include "dcn32_dio_stream_encoder.h"
+#include "reg_helper.h"
+#include "hw_shared.h"
+#include "link_service.h"
+#include "dpcd_defs.h"
+
+#define DC_LOGGER \
+ enc1->base.ctx->logger
+
+#define REG(reg)\
+ (enc1->regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+ enc1->se_shift->field_name, enc1->se_mask->field_name
+
+#define VBI_LINE_0 0
+#define HDMI_CLOCK_CHANNEL_RATE_MORE_340M 340000
+
+#define CTX \
+ enc1->base.ctx
+
+
+
+static void enc32_dp_set_odm_combine(
+ struct stream_encoder *enc,
+ bool two_pixel_per_cyle)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_PER_CYCLE_PROCESSING_MODE, two_pixel_per_cyle ? 1 : 0);
+}
+
+/* setup stream encoder in dvi mode */
+static void enc32_stream_encoder_dvi_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ bool is_dual_link)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ if (!enc->ctx->dc->debug.avoid_vbios_exec_table) {
+ struct bp_encoder_control cntl = {0};
+
+ cntl.action = ENCODER_CONTROL_SETUP;
+ cntl.engine_id = enc1->base.id;
+ cntl.signal = is_dual_link ?
+ SIGNAL_TYPE_DVI_DUAL_LINK : SIGNAL_TYPE_DVI_SINGLE_LINK;
+ cntl.enable_dp_audio = false;
+ cntl.pixel_clock = crtc_timing->pix_clk_100hz / 10;
+ cntl.lanes_number = (is_dual_link) ? LANE_COUNT_EIGHT : LANE_COUNT_FOUR;
+
+ if (enc1->base.bp->funcs->encoder_control(
+ enc1->base.bp, &cntl) != BP_RESULT_OK)
+ return;
+
+ } else {
+
+ //Set pattern for clock channel, default vlue 0x63 does not work
+ REG_UPDATE(DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, 0x1F);
+
+ //DIG_BE_TMDS_DVI_MODE : TMDS-DVI mode is already set in link_encoder_setup
+
+ //DIG_SOURCE_SELECT is already set in dig_connect_to_otg
+
+ /* DIG_START is removed from the register spec */
+ }
+
+ ASSERT(crtc_timing->pixel_encoding == PIXEL_ENCODING_RGB);
+ ASSERT(crtc_timing->display_color_depth == COLOR_DEPTH_888);
+ enc1_stream_encoder_set_stream_attribute_helper(enc1, crtc_timing);
+}
+
+/* setup stream encoder in hdmi mode */
+static void enc32_stream_encoder_hdmi_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ int actual_pix_clk_khz,
+ bool enable_audio)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ if (!enc->ctx->dc->debug.avoid_vbios_exec_table) {
+ struct bp_encoder_control cntl = {0};
+
+ cntl.action = ENCODER_CONTROL_SETUP;
+ cntl.engine_id = enc1->base.id;
+ cntl.signal = SIGNAL_TYPE_HDMI_TYPE_A;
+ cntl.enable_dp_audio = enable_audio;
+ cntl.pixel_clock = actual_pix_clk_khz;
+ cntl.lanes_number = LANE_COUNT_FOUR;
+
+ if (enc1->base.bp->funcs->encoder_control(
+ enc1->base.bp, &cntl) != BP_RESULT_OK)
+ return;
+
+ } else {
+
+ //Set pattern for clock channel, default vlue 0x63 does not work
+ REG_UPDATE(DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, 0x1F);
+
+ //DIG_BE_TMDS_HDMI_MODE : TMDS-HDMI mode is already set in link_encoder_setup
+
+ //DIG_SOURCE_SELECT is already set in dig_connect_to_otg
+
+ /* DIG_START is removed from the register spec */
+ }
+
+ /* Configure pixel encoding */
+ enc1_stream_encoder_set_stream_attribute_helper(enc1, crtc_timing);
+
+ /* setup HDMI engine */
+ REG_UPDATE_6(HDMI_CONTROL,
+ HDMI_PACKET_GEN_VERSION, 1,
+ HDMI_KEEPOUT_MODE, 1,
+ HDMI_DEEP_COLOR_ENABLE, 0,
+ HDMI_DATA_SCRAMBLE_EN, 0,
+ HDMI_NO_EXTRA_NULL_PACKET_FILLED, 1,
+ HDMI_CLOCK_CHANNEL_RATE, 0);
+
+ /* Configure color depth */
+ switch (crtc_timing->display_color_depth) {
+ case COLOR_DEPTH_888:
+ REG_UPDATE(HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 0);
+ break;
+ case COLOR_DEPTH_101010:
+ if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 1,
+ HDMI_DEEP_COLOR_ENABLE, 0);
+ } else {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 1,
+ HDMI_DEEP_COLOR_ENABLE, 1);
+ }
+ break;
+ case COLOR_DEPTH_121212:
+ if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 2,
+ HDMI_DEEP_COLOR_ENABLE, 0);
+ } else {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 2,
+ HDMI_DEEP_COLOR_ENABLE, 1);
+ }
+ break;
+ case COLOR_DEPTH_161616:
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 3,
+ HDMI_DEEP_COLOR_ENABLE, 1);
+ break;
+ default:
+ break;
+ }
+
+ if (actual_pix_clk_khz >= HDMI_CLOCK_CHANNEL_RATE_MORE_340M) {
+ /* enable HDMI data scrambler
+ * HDMI_CLOCK_CHANNEL_RATE_MORE_340M
+ * Clock channel frequency is 1/4 of character rate.
+ */
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DATA_SCRAMBLE_EN, 1,
+ HDMI_CLOCK_CHANNEL_RATE, 1);
+ } else if (crtc_timing->flags.LTE_340MCSC_SCRAMBLE) {
+
+ /* TODO: New feature for DCE11, still need to implement */
+
+ /* enable HDMI data scrambler
+ * HDMI_CLOCK_CHANNEL_FREQ_EQUAL_TO_CHAR_RATE
+ * Clock channel frequency is the same
+ * as character rate
+ */
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DATA_SCRAMBLE_EN, 1,
+ HDMI_CLOCK_CHANNEL_RATE, 0);
+ }
+
+
+ /* Enable transmission of General Control packet on every frame */
+ REG_UPDATE_3(HDMI_VBI_PACKET_CONTROL,
+ HDMI_GC_CONT, 1,
+ HDMI_GC_SEND, 1,
+ HDMI_NULL_SEND, 1);
+
+ /* Disable Audio Content Protection packet transmission */
+ REG_UPDATE(HDMI_VBI_PACKET_CONTROL, HDMI_ACP_SEND, 0);
+
+ /* following belongs to audio */
+ /* Enable Audio InfoFrame packet transmission. */
+ REG_UPDATE(HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1);
+
+ /* update double-buffered AUDIO_INFO registers immediately */
+ ASSERT(enc->afmt);
+ enc->afmt->funcs->audio_info_immediate_update(enc->afmt);
+
+ /* Select line number on which to send Audio InfoFrame packets */
+ REG_UPDATE(HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE,
+ VBI_LINE_0 + 2);
+
+ /* set HDMI GC AVMUTE */
+ REG_UPDATE(HDMI_GC, HDMI_GC_AVMUTE, 0);
+}
+
+
+
+static bool is_two_pixels_per_containter(const struct dc_crtc_timing *timing)
+{
+ bool two_pix = timing->pixel_encoding == PIXEL_ENCODING_YCBCR420;
+
+ two_pix = two_pix || (timing->flags.DSC && timing->pixel_encoding == PIXEL_ENCODING_YCBCR422
+ && !timing->dsc_cfg.ycbcr422_simple);
+ return two_pix;
+}
+
+void enc32_stream_encoder_dp_unblank(
+ struct dc_link *link,
+ struct stream_encoder *enc,
+ const struct encoder_unblank_param *param)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ if (param->link_settings.link_rate != LINK_RATE_UNKNOWN) {
+ uint32_t n_vid = 0x8000;
+ uint32_t m_vid;
+ uint32_t n_multiply = 0;
+ uint32_t pix_per_cycle = 0;
+ uint64_t m_vid_l = n_vid;
+
+ /* YCbCr 4:2:0 : Computed VID_M will be 2X the input rate */
+ if (is_two_pixels_per_containter(&param->timing) || param->opp_cnt > 1
+ || param->pix_per_cycle > 1) {
+ /*this logic should be the same in get_pixel_clock_parameters() */
+ n_multiply = 1;
+ pix_per_cycle = 1;
+ }
+ /* M / N = Fstream / Flink
+ * m_vid / n_vid = pixel rate / link rate
+ */
+
+ m_vid_l *= param->timing.pix_clk_100hz / 10;
+ m_vid_l = div_u64(m_vid_l,
+ param->link_settings.link_rate
+ * LINK_RATE_REF_FREQ_IN_KHZ);
+
+ m_vid = (uint32_t) m_vid_l;
+
+ /* enable auto measurement */
+
+ REG_UPDATE(DP_VID_TIMING, DP_VID_M_N_GEN_EN, 0);
+
+ /* auto measurement need 1 full 0x8000 symbol cycle to kick in,
+ * therefore program initial value for Mvid and Nvid
+ */
+
+ REG_UPDATE(DP_VID_N, DP_VID_N, n_vid);
+
+ REG_UPDATE(DP_VID_M, DP_VID_M, m_vid);
+
+ REG_UPDATE_2(DP_VID_TIMING,
+ DP_VID_M_N_GEN_EN, 1,
+ DP_VID_N_MUL, n_multiply);
+
+ REG_UPDATE(DP_PIXEL_FORMAT,
+ DP_PIXEL_PER_CYCLE_PROCESSING_MODE,
+ pix_per_cycle);
+ }
+
+ /* make sure stream is disabled before resetting steer fifo */
+ REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, false);
+ REG_WAIT(DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, 0, 10, 5000);
+
+ /* DIG_START is removed from the register spec */
+
+ /* switch DP encoder to CRTC data, but reset it the fifo first. It may happen
+ * that it overflows during mode transition, and sometimes doesn't recover.
+ */
+ REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 1);
+ udelay(10);
+
+ REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 0);
+
+ /* DIG Resync FIFO now needs to be explicitly enabled
+ */
+ // TODO: Confirm if we need to wait for DIG_SYMCLK_FE_ON
+ REG_WAIT(DIG_FE_CNTL, DIG_SYMCLK_FE_ON, 1, 10, 5000);
+
+ /* read start level = 0 will bring underflow / overflow and DIG_FIFO_ERROR = 1
+ * so set it to 1/2 full = 7 before reset as suggested by hardware team.
+ */
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, 0x7);
+
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, 1);
+
+ REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, 1, 10, 5000);
+
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, 0);
+
+ REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, 0, 10, 5000);
+
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 1);
+
+ /* wait 100us for DIG/DP logic to prime
+ * (i.e. a few video lines)
+ */
+ udelay(100);
+
+ /* the hardware would start sending video at the start of the next DP
+ * frame (i.e. rising edge of the vblank).
+ * NOTE: We used to program DP_VID_STREAM_DIS_DEFER = 2 here, but this
+ * register has no effect on enable transition! HW always guarantees
+ * VID_STREAM enable at start of next frame, and this is not
+ * programmable
+ */
+
+ REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, true);
+
+ link->dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_ENABLE_DP_VID_STREAM);
+}
+
+/* Set DSC-related configuration.
+ * dsc_mode: 0 disables DSC, other values enable DSC in specified format
+ * sc_bytes_per_pixel: DP_DSC_BYTES_PER_PIXEL removed in DCN32
+ * dsc_slice_width: DP_DSC_SLICE_WIDTH removed in DCN32
+ */
+static void enc32_dp_set_dsc_config(struct stream_encoder *enc,
+ enum optc_dsc_mode dsc_mode,
+ uint32_t dsc_bytes_per_pixel,
+ uint32_t dsc_slice_width)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ REG_UPDATE(DP_DSC_CNTL, DP_DSC_MODE, dsc_mode == OPTC_DSC_DISABLED ? 0 : 1);
+}
+
+/* this function read dsc related register fields to be logged later in dcn10_log_hw_state
+ * into a dcn_dsc_state struct.
+ */
+static void enc32_read_state(struct stream_encoder *enc, struct enc_state *s)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ //if dsc is enabled, continue to read
+ REG_GET(DP_DSC_CNTL, DP_DSC_MODE, &s->dsc_mode);
+ if (s->dsc_mode) {
+ REG_GET(DP_GSP11_CNTL, DP_SEC_GSP11_LINE_NUM, &s->sec_gsp_pps_line_num);
+
+ REG_GET(DP_MSA_VBID_MISC, DP_VBID6_LINE_REFERENCE, &s->vbid6_line_reference);
+ REG_GET(DP_MSA_VBID_MISC, DP_VBID6_LINE_NUM, &s->vbid6_line_num);
+
+ REG_GET(DP_GSP11_CNTL, DP_SEC_GSP11_ENABLE, &s->sec_gsp_pps_enable);
+ REG_GET(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, &s->sec_stream_enable);
+ }
+}
+
+static void enc32_set_dig_input_mode(struct stream_encoder *enc, unsigned int pix_per_container)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ /* The naming of this field is confusing, what it means is the output mode of otg, which
+ * is the input mode of the dig
+ */
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_MODE, pix_per_container == 2 ? 0x1 : 0x0);
+}
+
+static void enc32_reset_fifo(struct stream_encoder *enc, bool reset)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+ uint32_t reset_val = reset ? 1 : 0;
+ uint32_t is_symclk_on;
+
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, reset_val);
+ REG_GET(DIG_FE_CNTL, DIG_SYMCLK_FE_ON, &is_symclk_on);
+
+ if (is_symclk_on)
+ REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, reset_val, 10, 5000);
+ else
+ udelay(10);
+}
+
+void enc32_enable_fifo(struct stream_encoder *enc)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, 0x7);
+
+ enc32_reset_fifo(enc, true);
+ enc32_reset_fifo(enc, false);
+
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 1);
+}
+
+static const struct stream_encoder_funcs dcn32_str_enc_funcs = {
+ .dp_set_odm_combine =
+ enc32_dp_set_odm_combine,
+ .dp_set_stream_attribute =
+ enc2_stream_encoder_dp_set_stream_attribute,
+ .hdmi_set_stream_attribute =
+ enc32_stream_encoder_hdmi_set_stream_attribute,
+ .dvi_set_stream_attribute =
+ enc32_stream_encoder_dvi_set_stream_attribute,
+ .set_throttled_vcp_size =
+ enc1_stream_encoder_set_throttled_vcp_size,
+ .update_hdmi_info_packets =
+ enc3_stream_encoder_update_hdmi_info_packets,
+ .stop_hdmi_info_packets =
+ enc3_stream_encoder_stop_hdmi_info_packets,
+ .update_dp_info_packets_sdp_line_num =
+ enc3_stream_encoder_update_dp_info_packets_sdp_line_num,
+ .update_dp_info_packets =
+ enc3_stream_encoder_update_dp_info_packets,
+ .stop_dp_info_packets =
+ enc1_stream_encoder_stop_dp_info_packets,
+ .dp_blank =
+ enc1_stream_encoder_dp_blank,
+ .dp_unblank =
+ enc32_stream_encoder_dp_unblank,
+ .audio_mute_control = enc3_audio_mute_control,
+
+ .dp_audio_setup = enc3_se_dp_audio_setup,
+ .dp_audio_enable = enc3_se_dp_audio_enable,
+ .dp_audio_disable = enc1_se_dp_audio_disable,
+
+ .hdmi_audio_setup = enc3_se_hdmi_audio_setup,
+ .hdmi_audio_disable = enc1_se_hdmi_audio_disable,
+ .setup_stereo_sync = enc1_setup_stereo_sync,
+ .set_avmute = enc1_stream_encoder_set_avmute,
+ .dig_connect_to_otg = enc1_dig_connect_to_otg,
+ .dig_source_otg = enc1_dig_source_otg,
+
+ .dp_get_pixel_format = enc1_stream_encoder_dp_get_pixel_format,
+
+ .enc_read_state = enc32_read_state,
+ .dp_set_dsc_config = enc32_dp_set_dsc_config,
+ .dp_set_dsc_pps_info_packet = enc3_dp_set_dsc_pps_info_packet,
+ .set_dynamic_metadata = enc2_set_dynamic_metadata,
+ .hdmi_reset_stream_attribute = enc1_reset_hdmi_stream_attribute,
+
+ .set_input_mode = enc32_set_dig_input_mode,
+ .enable_fifo = enc32_enable_fifo,
+};
+
+void dcn32_dio_stream_encoder_construct(
+ struct dcn10_stream_encoder *enc1,
+ struct dc_context *ctx,
+ struct dc_bios *bp,
+ enum engine_id eng_id,
+ struct vpg *vpg,
+ struct afmt *afmt,
+ const struct dcn10_stream_enc_registers *regs,
+ const struct dcn10_stream_encoder_shift *se_shift,
+ const struct dcn10_stream_encoder_mask *se_mask)
+{
+ enc1->base.funcs = &dcn32_str_enc_funcs;
+ enc1->base.ctx = ctx;
+ enc1->base.id = eng_id;
+ enc1->base.bp = bp;
+ enc1->base.vpg = vpg;
+ enc1->base.afmt = afmt;
+ enc1->regs = regs;
+ enc1->se_shift = se_shift;
+ enc1->se_mask = se_mask;
+ enc1->base.stream_enc_inst = vpg->inst;
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.h
new file mode 100644
index 000000000000..ca53d39561d2
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.h
@@ -0,0 +1,206 @@
+/*
+ * Copyright 2021 - Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_DIO_STREAM_ENCODER_DCN32_H__
+#define __DC_DIO_STREAM_ENCODER_DCN32_H__
+
+#include "dcn30/dcn30_vpg.h"
+#include "dcn30/dcn30_afmt.h"
+#include "stream_encoder.h"
+#include "dcn20/dcn20_stream_encoder.h"
+
+#define SE_COMMON_MASK_SH_LIST_DCN32(mask_sh)\
+ SE_SF(DP0_DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, mask_sh),\
+ SE_SF(DP0_DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, mask_sh),\
+ SE_SF(DP0_DP_PIXEL_FORMAT, DP_PIXEL_PER_CYCLE_PROCESSING_MODE, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_PACKET_GEN_VERSION, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_KEEPOUT_MODE, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_DATA_SCRAMBLE_EN, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_NO_EXTRA_NULL_PACKET_FILLED, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_ACP_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GC, HDMI_GC_AVMUTE, mask_sh),\
+ SE_SF(DP0_DP_MSE_RATE_CNTL, DP_MSE_RATE_X, mask_sh),\
+ SE_SF(DP0_DP_MSE_RATE_CNTL, DP_MSE_RATE_Y, mask_sh),\
+ SE_SF(DP0_DP_MSE_RATE_UPDATE, DP_MSE_RATE_UPDATE_PENDING, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP0_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP1_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_MPG_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL1, DP_SEC_GSP5_LINE_REFERENCE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND_PENDING, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL4, DP_SEC_GSP4_LINE_NUM, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL5, DP_SEC_GSP5_LINE_NUM, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND_ANY_LINE, mask_sh),\
+ SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_DIS_DEFER, mask_sh),\
+ SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, mask_sh),\
+ SE_SF(DP0_DP_STEER_FIFO, DP_STEER_FIFO_RESET, mask_sh),\
+ SE_SF(DP0_DP_VID_TIMING, DP_VID_M_N_GEN_EN, mask_sh),\
+ SE_SF(DP0_DP_VID_N, DP_VID_N, mask_sh),\
+ SE_SF(DP0_DP_VID_M, DP_VID_M, mask_sh),\
+ SE_SF(DIG0_HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUDIO_PRIORITY, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_32_0, HDMI_ACR_CTS_32, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_32_1, HDMI_ACR_N_32, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_44_0, HDMI_ACR_CTS_44, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_44_1, HDMI_ACR_N_44, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_48_0, HDMI_ACR_CTS_48, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_48_1, HDMI_ACR_N_48, mask_sh),\
+ SE_SF(DP0_DP_SEC_AUD_N, DP_SEC_AUD_N, mask_sh),\
+ SE_SF(DP0_DP_SEC_TIMESTAMP, DP_SEC_TIMESTAMP_MODE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ASP_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ATP_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_AIP_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ACM_ENABLE, mask_sh),\
+ SE_SF(DIG0_AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_CLOCK_CHANNEL_RATE, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, TMDS_PIXEL_ENCODING, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, TMDS_COLOR_FORMAT, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_SELECT, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_GATE_EN, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP4_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP5_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP6_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP7_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP7_SEND, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL6, DP_SEC_GSP7_LINE_NUM, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP11_PPS, mask_sh),\
+ SE_SF(DP0_DP_GSP11_CNTL, DP_SEC_GSP11_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_GSP11_CNTL, DP_SEC_GSP11_LINE_NUM, mask_sh),\
+ SE_SF(DP0_DP_DB_CNTL, DP_DB_DISABLE, mask_sh),\
+ SE_SF(DP0_DP_MSA_COLORIMETRY, DP_MSA_MISC0, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM1, DP_MSA_HTOTAL, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM1, DP_MSA_VTOTAL, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM2, DP_MSA_HSTART, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM2, DP_MSA_VSTART, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_HSYNCWIDTH, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_HSYNCPOLARITY, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_VSYNCWIDTH, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_VSYNCPOLARITY, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM4, DP_MSA_HWIDTH, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM4, DP_MSA_VHEIGHT, mask_sh),\
+ SE_SF(DIG0_HDMI_DB_CONTROL, HDMI_DB_DISABLE, mask_sh),\
+ SE_SF(DP0_DP_VID_TIMING, DP_VID_N_MUL, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, DIG_SOURCE_SELECT, mask_sh), \
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC0_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC0_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC1_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC1_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC2_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC2_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC3_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC3_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC4_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC4_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC5_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC5_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC6_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC6_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC7_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC7_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC8_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC8_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC9_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC9_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC10_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC10_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC11_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC11_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC12_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC12_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC13_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC13_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC14_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC14_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL1, HDMI_GENERIC0_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL1, HDMI_GENERIC1_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL2, HDMI_GENERIC2_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL2, HDMI_GENERIC3_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL3, HDMI_GENERIC4_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL3, HDMI_GENERIC5_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL4, HDMI_GENERIC6_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL4, HDMI_GENERIC7_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL7, HDMI_GENERIC8_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL7, HDMI_GENERIC9_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL8, HDMI_GENERIC10_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL8, HDMI_GENERIC11_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL9, HDMI_GENERIC12_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL9, HDMI_GENERIC13_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL10, HDMI_GENERIC14_LINE, mask_sh),\
+ SE_SF(DP0_DP_DSC_CNTL, DP_DSC_MODE, mask_sh),\
+ SE_SF(DP0_DP_MSA_VBID_MISC, DP_VBID6_LINE_REFERENCE, mask_sh),\
+ SE_SF(DP0_DP_MSA_VBID_MISC, DP_VBID6_LINE_NUM, mask_sh),\
+ SE_SF(DME0_DME_CONTROL, METADATA_ENGINE_EN, mask_sh),\
+ SE_SF(DME0_DME_CONTROL, METADATA_HUBP_REQUESTOR_ID, mask_sh),\
+ SE_SF(DME0_DME_CONTROL, METADATA_STREAM_TYPE, mask_sh),\
+ SE_SF(DP0_DP_SEC_METADATA_TRANSMISSION, DP_SEC_METADATA_PACKET_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_METADATA_TRANSMISSION, DP_SEC_METADATA_PACKET_LINE_REFERENCE, mask_sh),\
+ SE_SF(DP0_DP_SEC_METADATA_TRANSMISSION, DP_SEC_METADATA_PACKET_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_METADATA_PACKET_CONTROL, HDMI_METADATA_PACKET_ENABLE, mask_sh),\
+ SE_SF(DIG0_HDMI_METADATA_PACKET_CONTROL, HDMI_METADATA_PACKET_LINE_REFERENCE, mask_sh),\
+ SE_SF(DIG0_HDMI_METADATA_PACKET_CONTROL, HDMI_METADATA_PACKET_LINE, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, DOLBY_VISION_EN, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, DIG_SYMCLK_FE_ON, mask_sh),\
+ SE_SF(DP0_DP_SEC_FRAMING4, DP_SST_SDP_SPLITTING, mask_sh),\
+ SE_SF(DIG0_DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_MODE, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_RESET, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, mask_sh)
+
+
+void dcn32_dio_stream_encoder_construct(
+ struct dcn10_stream_encoder *enc1,
+ struct dc_context *ctx,
+ struct dc_bios *bp,
+ enum engine_id eng_id,
+ struct vpg *vpg,
+ struct afmt *afmt,
+ const struct dcn10_stream_enc_registers *regs,
+ const struct dcn10_stream_encoder_shift *se_shift,
+ const struct dcn10_stream_encoder_mask *se_mask);
+
+
+void enc32_enable_fifo(struct stream_encoder *enc);
+
+void enc32_stream_encoder_dp_unblank(
+ struct dc_link *link,
+ struct stream_encoder *enc,
+ const struct encoder_unblank_param *param);
+
+#endif /* __DC_DIO_STREAM_ENCODER_DCN32_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.c
new file mode 100644
index 000000000000..2ed382a8e79c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.c
@@ -0,0 +1,190 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "reg_helper.h"
+
+#include "core_types.h"
+#include "link_encoder.h"
+#include "dcn321_dio_link_encoder.h"
+#include "dcn31/dcn31_dio_link_encoder.h"
+#include "stream_encoder.h"
+#include "dc_bios_types.h"
+
+#include "gpio_service_interface.h"
+
+#ifndef MIN
+#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+#endif
+
+#define CTX \
+ enc10->base.ctx
+#define DC_LOGGER \
+ enc10->base.ctx->logger
+
+#define REG(reg)\
+ (enc10->link_regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+ enc10->link_shift->field_name, enc10->link_mask->field_name
+
+#define AUX_REG(reg)\
+ (enc10->aux_regs->reg)
+
+#define AUX_REG_READ(reg_name) \
+ dm_read_reg(CTX, AUX_REG(reg_name))
+
+#define AUX_REG_WRITE(reg_name, val) \
+ dm_write_reg(CTX, AUX_REG(reg_name), val)
+
+static const struct link_encoder_funcs dcn321_link_enc_funcs = {
+ .read_state = link_enc2_read_state,
+ .validate_output_with_stream =
+ dcn30_link_encoder_validate_output_with_stream,
+ .hw_init = enc32_hw_init,
+ .setup = dcn10_link_encoder_setup,
+ .enable_tmds_output = dcn10_link_encoder_enable_tmds_output,
+ .enable_dp_output = dcn32_link_encoder_enable_dp_output,
+ .enable_dp_mst_output = dcn10_link_encoder_enable_dp_mst_output,
+ .disable_output = dcn10_link_encoder_disable_output,
+ .dp_set_lane_settings = dcn10_link_encoder_dp_set_lane_settings,
+ .dp_set_phy_pattern = dcn10_link_encoder_dp_set_phy_pattern,
+ .update_mst_stream_allocation_table =
+ dcn10_link_encoder_update_mst_stream_allocation_table,
+ .psr_program_dp_dphy_fast_training =
+ dcn10_psr_program_dp_dphy_fast_training,
+ .psr_program_secondary_packet = dcn10_psr_program_secondary_packet,
+ .connect_dig_be_to_fe = dcn10_link_encoder_connect_dig_be_to_fe,
+ .enable_hpd = dcn10_link_encoder_enable_hpd,
+ .disable_hpd = dcn10_link_encoder_disable_hpd,
+ .is_dig_enabled = dcn10_is_dig_enabled,
+ .destroy = dcn10_link_encoder_destroy,
+ .fec_set_enable = enc2_fec_set_enable,
+ .fec_set_ready = enc2_fec_set_ready,
+ .fec_is_active = enc2_fec_is_active,
+ .get_dig_frontend = dcn10_get_dig_frontend,
+ .get_dig_mode = dcn10_get_dig_mode,
+ .is_in_alt_mode = dcn20_link_encoder_is_in_alt_mode,
+ .get_max_link_cap = dcn20_link_encoder_get_max_link_cap,
+ .set_dio_phy_mux = dcn31_link_encoder_set_dio_phy_mux,
+};
+
+void dcn321_link_encoder_construct(
+ struct dcn20_link_encoder *enc20,
+ const struct encoder_init_data *init_data,
+ const struct encoder_feature_support *enc_features,
+ const struct dcn10_link_enc_registers *link_regs,
+ const struct dcn10_link_enc_aux_registers *aux_regs,
+ const struct dcn10_link_enc_hpd_registers *hpd_regs,
+ const struct dcn10_link_enc_shift *link_shift,
+ const struct dcn10_link_enc_mask *link_mask)
+{
+ struct bp_connector_speed_cap_info bp_cap_info = {0};
+ const struct dc_vbios_funcs *bp_funcs = init_data->ctx->dc_bios->funcs;
+ enum bp_result result = BP_RESULT_OK;
+ struct dcn10_link_encoder *enc10 = &enc20->enc10;
+
+ enc10->base.funcs = &dcn321_link_enc_funcs;
+ enc10->base.ctx = init_data->ctx;
+ enc10->base.id = init_data->encoder;
+
+ enc10->base.hpd_source = init_data->hpd_source;
+ enc10->base.connector = init_data->connector;
+
+ if (enc10->base.connector.id == CONNECTOR_ID_USBC)
+ enc10->base.features.flags.bits.DP_IS_USB_C = 1;
+
+ enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
+
+ enc10->base.features = *enc_features;
+
+ enc10->base.transmitter = init_data->transmitter;
+
+ /* set the flag to indicate whether driver poll the I2C data pin
+ * while doing the DP sink detect
+ */
+
+ enc10->base.output_signals =
+ SIGNAL_TYPE_DVI_SINGLE_LINK |
+ SIGNAL_TYPE_DVI_DUAL_LINK |
+ SIGNAL_TYPE_LVDS |
+ SIGNAL_TYPE_DISPLAY_PORT |
+ SIGNAL_TYPE_DISPLAY_PORT_MST |
+ SIGNAL_TYPE_EDP |
+ SIGNAL_TYPE_HDMI_TYPE_A;
+
+ enc10->link_regs = link_regs;
+ enc10->aux_regs = aux_regs;
+ enc10->hpd_regs = hpd_regs;
+ enc10->link_shift = link_shift;
+ enc10->link_mask = link_mask;
+
+ switch (enc10->base.transmitter) {
+ case TRANSMITTER_UNIPHY_A:
+ enc10->base.preferred_engine = ENGINE_ID_DIGA;
+ break;
+ case TRANSMITTER_UNIPHY_B:
+ enc10->base.preferred_engine = ENGINE_ID_DIGB;
+ break;
+ case TRANSMITTER_UNIPHY_C:
+ enc10->base.preferred_engine = ENGINE_ID_DIGC;
+ break;
+ case TRANSMITTER_UNIPHY_D:
+ enc10->base.preferred_engine = ENGINE_ID_DIGD;
+ break;
+ case TRANSMITTER_UNIPHY_E:
+ enc10->base.preferred_engine = ENGINE_ID_DIGE;
+ break;
+ default:
+ ASSERT_CRITICAL(false);
+ enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
+ }
+
+ /* default to one to mirror Windows behavior */
+ enc10->base.features.flags.bits.HDMI_6GB_EN = 1;
+
+ if (bp_funcs->get_connector_speed_cap_info)
+ result = bp_funcs->get_connector_speed_cap_info(enc10->base.ctx->dc_bios,
+ enc10->base.connector, &bp_cap_info);
+
+ /* Override features with DCE-specific values */
+ if (result == BP_RESULT_OK) {
+ enc10->base.features.flags.bits.IS_HBR2_CAPABLE =
+ bp_cap_info.DP_HBR2_EN;
+ enc10->base.features.flags.bits.IS_HBR3_CAPABLE =
+ bp_cap_info.DP_HBR3_EN;
+ enc10->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN;
+ enc10->base.features.flags.bits.IS_DP2_CAPABLE = 1;
+ enc10->base.features.flags.bits.IS_UHBR10_CAPABLE = bp_cap_info.DP_UHBR10_EN;
+ enc10->base.features.flags.bits.IS_UHBR13_5_CAPABLE = bp_cap_info.DP_UHBR13_5_EN;
+ enc10->base.features.flags.bits.IS_UHBR20_CAPABLE = bp_cap_info.DP_UHBR20_EN;
+ } else {
+ DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n",
+ __func__,
+ result);
+ }
+ if (enc10->base.ctx->dc->debug.hdmi20_disable)
+ enc10->base.features.flags.bits.HDMI_6GB_EN = 0;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.h
new file mode 100644
index 000000000000..2205f39b0a24
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_LINK_ENCODER__DCN321_H__
+#define __DC_LINK_ENCODER__DCN321_H__
+
+#include "dcn32/dcn32_dio_link_encoder.h"
+
+void dcn321_link_encoder_construct(
+ struct dcn20_link_encoder *enc20,
+ const struct encoder_init_data *init_data,
+ const struct encoder_feature_support *enc_features,
+ const struct dcn10_link_enc_registers *link_regs,
+ const struct dcn10_link_enc_aux_registers *aux_regs,
+ const struct dcn10_link_enc_hpd_registers *hpd_regs,
+ const struct dcn10_link_enc_shift *link_shift,
+ const struct dcn10_link_enc_mask *link_mask);
+
+
+#endif /* __DC_LINK_ENCODER__DCN321_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.c
new file mode 100644
index 000000000000..9972911330b6
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.c
@@ -0,0 +1,391 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "reg_helper.h"
+
+#include "core_types.h"
+#include "link_encoder.h"
+#include "dcn31/dcn31_dio_link_encoder.h"
+#include "dcn35_dio_link_encoder.h"
+#include "dc_dmub_srv.h"
+#define CTX \
+ enc10->base.ctx
+#define DC_LOGGER \
+ enc10->base.ctx->logger
+
+#define REG(reg)\
+ (enc10->link_regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+ enc10->link_shift->field_name, enc10->link_mask->field_name
+/*
+ * @brief
+ * Trigger Source Select
+ * ASIC-dependent, actual values for register programming
+ */
+#define DCN35_DIG_FE_SOURCE_SELECT_INVALID 0x0
+#define DCN35_DIG_FE_SOURCE_SELECT_DIGA 0x1
+#define DCN35_DIG_FE_SOURCE_SELECT_DIGB 0x2
+#define DCN35_DIG_FE_SOURCE_SELECT_DIGC 0x4
+#define DCN35_DIG_FE_SOURCE_SELECT_DIGD 0x08
+#define DCN35_DIG_FE_SOURCE_SELECT_DIGE 0x10
+
+
+bool dcn35_is_dig_enabled(struct link_encoder *enc)
+{
+ uint32_t enabled;
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+ REG_GET(DIG_BE_CLK_CNTL, DIG_BE_CLK_EN, &enabled);
+ return (enabled == 1);
+}
+
+enum signal_type dcn35_get_dig_mode(
+ struct link_encoder *enc)
+{
+ uint32_t value;
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+ REG_GET(DIG_BE_CLK_CNTL, DIG_BE_MODE, &value);
+ switch (value) {
+ case 0:
+ return SIGNAL_TYPE_DISPLAY_PORT;
+ case 2:
+ return SIGNAL_TYPE_DVI_SINGLE_LINK;
+ case 3:
+ return SIGNAL_TYPE_HDMI_TYPE_A;
+ case 5:
+ return SIGNAL_TYPE_DISPLAY_PORT_MST;
+ default:
+ return SIGNAL_TYPE_NONE;
+ }
+}
+
+void dcn35_link_encoder_setup(
+ struct link_encoder *enc,
+ enum signal_type signal)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+ switch (signal) {
+ case SIGNAL_TYPE_EDP:
+ case SIGNAL_TYPE_DISPLAY_PORT:
+ /* DP SST */
+ REG_UPDATE(DIG_BE_CLK_CNTL, DIG_BE_MODE, 0);
+ break;
+ case SIGNAL_TYPE_DVI_SINGLE_LINK:
+ case SIGNAL_TYPE_DVI_DUAL_LINK:
+ /* TMDS-DVI */
+ REG_UPDATE(DIG_BE_CLK_CNTL, DIG_BE_MODE, 2);
+ break;
+ case SIGNAL_TYPE_HDMI_TYPE_A:
+ /* TMDS-HDMI */
+ REG_UPDATE(DIG_BE_CLK_CNTL, DIG_BE_MODE, 3);
+ break;
+ case SIGNAL_TYPE_DISPLAY_PORT_MST:
+ /* DP MST */
+ REG_UPDATE(DIG_BE_CLK_CNTL, DIG_BE_MODE, 5);
+ break;
+ default:
+ ASSERT_CRITICAL(false);
+ /* invalid mode ! */
+ break;
+ }
+ REG_UPDATE(DIG_BE_CLK_CNTL, DIG_BE_CLK_EN, 1);
+
+}
+
+void dcn35_link_encoder_init(struct link_encoder *enc)
+{
+ enc31_hw_init(enc);
+ dcn35_link_encoder_set_fgcg(enc, enc->ctx->dc->debug.enable_fine_grain_clock_gating.bits.dio);
+}
+
+void dcn35_link_encoder_set_fgcg(struct link_encoder *enc, bool enable)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+ REG_UPDATE(DIO_CLK_CNTL, DIO_FGCG_REP_DIS, !enable);
+}
+
+static const struct link_encoder_funcs dcn35_link_enc_funcs = {
+ .read_state = link_enc2_read_state,
+ .validate_output_with_stream =
+ dcn30_link_encoder_validate_output_with_stream,
+ .hw_init = dcn35_link_encoder_init,
+ .setup = dcn35_link_encoder_setup,
+ .enable_tmds_output = dcn10_link_encoder_enable_tmds_output,
+ .enable_dp_output = dcn35_link_encoder_enable_dp_output,
+ .enable_dp_mst_output = dcn35_link_encoder_enable_dp_mst_output,
+ .disable_output = dcn35_link_encoder_disable_output,
+ .dp_set_lane_settings = dcn10_link_encoder_dp_set_lane_settings,
+ .dp_set_phy_pattern = dcn10_link_encoder_dp_set_phy_pattern,
+ .update_mst_stream_allocation_table =
+ dcn10_link_encoder_update_mst_stream_allocation_table,
+ .psr_program_dp_dphy_fast_training =
+ dcn10_psr_program_dp_dphy_fast_training,
+ .psr_program_secondary_packet = dcn10_psr_program_secondary_packet,
+ .connect_dig_be_to_fe = dcn10_link_encoder_connect_dig_be_to_fe,
+ .enable_hpd = dcn10_link_encoder_enable_hpd,
+ .disable_hpd = dcn10_link_encoder_disable_hpd,
+ .is_dig_enabled = dcn35_is_dig_enabled,
+ .destroy = dcn10_link_encoder_destroy,
+ .fec_set_enable = enc2_fec_set_enable,
+ .fec_set_ready = enc2_fec_set_ready,
+ .fec_is_active = enc2_fec_is_active,
+ .get_dig_frontend = dcn10_get_dig_frontend,
+ .get_dig_mode = dcn35_get_dig_mode,
+ .is_in_alt_mode = dcn31_link_encoder_is_in_alt_mode,
+ .get_max_link_cap = dcn31_link_encoder_get_max_link_cap,
+ .set_dio_phy_mux = dcn31_link_encoder_set_dio_phy_mux,
+ .enable_dpia_output = dcn35_link_encoder_enable_dpia_output,
+ .disable_dpia_output = dcn35_link_encoder_disable_dpia_output,
+};
+
+void dcn35_link_encoder_construct(
+ struct dcn20_link_encoder *enc20,
+ const struct encoder_init_data *init_data,
+ const struct encoder_feature_support *enc_features,
+ const struct dcn10_link_enc_registers *link_regs,
+ const struct dcn10_link_enc_aux_registers *aux_regs,
+ const struct dcn10_link_enc_hpd_registers *hpd_regs,
+ const struct dcn10_link_enc_shift *link_shift,
+ const struct dcn10_link_enc_mask *link_mask)
+{
+ struct bp_connector_speed_cap_info bp_cap_info = {0};
+ const struct dc_vbios_funcs *bp_funcs = init_data->ctx->dc_bios->funcs;
+ enum bp_result result = BP_RESULT_OK;
+ struct dcn10_link_encoder *enc10 = &enc20->enc10;
+
+ enc10->base.funcs = &dcn35_link_enc_funcs;
+ enc10->base.ctx = init_data->ctx;
+ enc10->base.id = init_data->encoder;
+
+ enc10->base.hpd_source = init_data->hpd_source;
+ enc10->base.connector = init_data->connector;
+
+ enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
+
+ enc10->base.features = *enc_features;
+
+ if (enc10->base.connector.id == CONNECTOR_ID_USBC)
+ enc10->base.features.flags.bits.DP_IS_USB_C = 1;
+
+ enc10->base.transmitter = init_data->transmitter;
+
+ /* set the flag to indicate whether driver poll the I2C data pin
+ * while doing the DP sink detect
+ */
+
+/* if (dal_adapter_service_is_feature_supported(as,
+ * FEATURE_DP_SINK_DETECT_POLL_DATA_PIN))
+ * enc10->base.features.flags.bits.
+ * DP_SINK_DETECT_POLL_DATA_PIN = true;
+ */
+
+ enc10->base.output_signals =
+ SIGNAL_TYPE_DVI_SINGLE_LINK |
+ SIGNAL_TYPE_DVI_DUAL_LINK |
+ SIGNAL_TYPE_LVDS |
+ SIGNAL_TYPE_DISPLAY_PORT |
+ SIGNAL_TYPE_DISPLAY_PORT_MST |
+ SIGNAL_TYPE_EDP |
+ SIGNAL_TYPE_HDMI_TYPE_A;
+
+ enc10->link_regs = link_regs;
+ enc10->aux_regs = aux_regs;
+ enc10->hpd_regs = hpd_regs;
+ enc10->link_shift = link_shift;
+ enc10->link_mask = link_mask;
+
+ switch (enc10->base.transmitter) {
+ case TRANSMITTER_UNIPHY_A:
+ enc10->base.preferred_engine = ENGINE_ID_DIGA;
+ break;
+ case TRANSMITTER_UNIPHY_B:
+ enc10->base.preferred_engine = ENGINE_ID_DIGB;
+ break;
+ case TRANSMITTER_UNIPHY_C:
+ enc10->base.preferred_engine = ENGINE_ID_DIGC;
+ break;
+ case TRANSMITTER_UNIPHY_D:
+ enc10->base.preferred_engine = ENGINE_ID_DIGD;
+ break;
+ case TRANSMITTER_UNIPHY_E:
+ enc10->base.preferred_engine = ENGINE_ID_DIGE;
+ break;
+ default:
+ ASSERT_CRITICAL(false);
+ enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
+ }
+
+ enc10->base.features.flags.bits.HDMI_6GB_EN = 1;
+
+ if (bp_funcs->get_connector_speed_cap_info)
+ result = bp_funcs->get_connector_speed_cap_info(enc10->base.ctx->dc_bios,
+ enc10->base.connector, &bp_cap_info);
+
+ /* Override features with DCE-specific values */
+ if (result == BP_RESULT_OK) {
+ enc10->base.features.flags.bits.IS_HBR2_CAPABLE =
+ bp_cap_info.DP_HBR2_EN;
+ enc10->base.features.flags.bits.IS_HBR3_CAPABLE =
+ bp_cap_info.DP_HBR3_EN;
+ enc10->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN;
+ enc10->base.features.flags.bits.IS_DP2_CAPABLE = 1;
+ enc10->base.features.flags.bits.IS_UHBR10_CAPABLE = bp_cap_info.DP_UHBR10_EN;
+ enc10->base.features.flags.bits.IS_UHBR13_5_CAPABLE = bp_cap_info.DP_UHBR13_5_EN;
+ enc10->base.features.flags.bits.IS_UHBR20_CAPABLE = bp_cap_info.DP_UHBR20_EN;
+
+ } else {
+ DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n",
+ __func__,
+ result);
+ }
+ if (enc10->base.ctx->dc->debug.hdmi20_disable)
+ enc10->base.features.flags.bits.HDMI_6GB_EN = 0;
+
+}
+
+/* DPIA equivalent of link_transmitter_control. */
+static bool link_dpia_control(struct dc_context *dc_ctx,
+ struct dmub_cmd_dig_dpia_control_data *dpia_control)
+{
+ union dmub_rb_cmd cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.dig1_dpia_control.header.type = DMUB_CMD__DPIA;
+ cmd.dig1_dpia_control.header.sub_type =
+ DMUB_CMD__DPIA_DIG1_DPIA_CONTROL;
+ cmd.dig1_dpia_control.header.payload_bytes =
+ sizeof(cmd.dig1_dpia_control) -
+ sizeof(cmd.dig1_dpia_control.header);
+
+ cmd.dig1_dpia_control.dpia_control = *dpia_control;
+
+ dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+
+ return true;
+}
+
+static void link_encoder_disable(struct dcn10_link_encoder *enc10)
+{
+ /* reset training complete */
+ REG_UPDATE(DP_LINK_CNTL, DP_LINK_TRAINING_COMPLETE, 0);
+}
+
+void dcn35_link_encoder_enable_dp_output(
+ struct link_encoder *enc,
+ const struct dc_link_settings *link_settings,
+ enum clock_source_id clock_source)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+ if (!enc->ctx->dc->config.unify_link_enc_assignment)
+ dcn31_link_encoder_enable_dp_output(enc, link_settings, clock_source);
+ else {
+ DC_LOG_DEBUG("%s: enc_id(%d)\n", __func__, enc->preferred_engine);
+ dcn20_link_encoder_enable_dp_output(enc, link_settings, clock_source);
+ }
+}
+
+void dcn35_link_encoder_enable_dp_mst_output(
+ struct link_encoder *enc,
+ const struct dc_link_settings *link_settings,
+ enum clock_source_id clock_source)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+ if (!enc->ctx->dc->config.unify_link_enc_assignment)
+ dcn31_link_encoder_enable_dp_mst_output(enc, link_settings, clock_source);
+ else {
+ DC_LOG_DEBUG("%s: enc_id(%d)\n", __func__, enc->preferred_engine);
+ dcn10_link_encoder_enable_dp_mst_output(enc, link_settings, clock_source);
+ }
+}
+
+void dcn35_link_encoder_disable_output(
+ struct link_encoder *enc,
+ enum signal_type signal)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+ if (!enc->ctx->dc->config.unify_link_enc_assignment)
+ dcn31_link_encoder_disable_output(enc, signal);
+ else {
+ DC_LOG_DEBUG("%s: enc_id(%d)\n", __func__, enc->preferred_engine);
+ dcn10_link_encoder_disable_output(enc, signal);
+ }
+}
+
+void dcn35_link_encoder_enable_dpia_output(
+ struct link_encoder *enc,
+ const struct dc_link_settings *link_settings,
+ uint8_t dpia_id,
+ uint8_t digmode,
+ uint8_t fec_rdy)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+ struct dmub_cmd_dig_dpia_control_data dpia_control = { 0 };
+
+ enc1_configure_encoder(enc10, link_settings);
+
+ dpia_control.action = (uint8_t)TRANSMITTER_CONTROL_ENABLE;
+ dpia_control.enc_id = enc->preferred_engine;
+ dpia_control.mode_laneset.digmode = digmode;
+ dpia_control.lanenum = (uint8_t)link_settings->lane_count;
+ dpia_control.symclk_10khz = link_settings->link_rate *
+ LINK_RATE_REF_FREQ_IN_KHZ / 10;
+ /* DIG_BE_CNTL.DIG_HPD_SELECT set to 5 (hpdsel - 1) to indicate HPD pin unused by DPIA. */
+ dpia_control.hpdsel = 6;
+ dpia_control.dpia_id = dpia_id;
+ dpia_control.fec_rdy = fec_rdy;
+
+ DC_LOG_DEBUG("%s: DPIA(%d) - enc_id(%d)\n", __func__, dpia_control.dpia_id, dpia_control.enc_id);
+ link_dpia_control(enc->ctx, &dpia_control);
+}
+
+void dcn35_link_encoder_disable_dpia_output(
+ struct link_encoder *enc,
+ uint8_t dpia_id,
+ uint8_t digmode)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+ struct dmub_cmd_dig_dpia_control_data dpia_control = { 0 };
+
+ if (enc->funcs->is_dig_enabled && !enc->funcs->is_dig_enabled(enc))
+ return;
+
+ dpia_control.action = (uint8_t)TRANSMITTER_CONTROL_DISABLE;
+ dpia_control.enc_id = enc->preferred_engine;
+ dpia_control.mode_laneset.digmode = digmode;
+ dpia_control.dpia_id = dpia_id;
+
+ DC_LOG_DEBUG("%s: DPIA(%d) - enc_id(%d)\n", __func__, dpia_control.dpia_id, dpia_control.enc_id);
+ link_dpia_control(enc->ctx, &dpia_control);
+
+ link_encoder_disable(enc10);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.h
new file mode 100644
index 000000000000..5712e6553fab
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.h
@@ -0,0 +1,188 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DC_LINK_ENCODER__DCN35_H__
+#define __DC_LINK_ENCODER__DCN35_H__
+
+#include "dcn32/dcn32_dio_link_encoder.h"
+#include "dcn30/dcn30_dio_link_encoder.h"
+#include "dcn31/dcn31_dio_link_encoder.h"
+
+#define LINK_ENCODER_MASK_SH_LIST_DCN35(mask_sh) \
+ LE_SF(DIG0_DIG_BE_EN_CNTL, DIG_BE_ENABLE, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CNTL, DIG_RB_SWITCH_EN, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CNTL, DIG_HPD_SELECT, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CNTL, DIG_FE_SOURCE_SELECT, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_MODE, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_CLK_EN, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_SOFT_RESET, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, HDCP_SOFT_RESET, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_SYMCLK_G_CLOCK_ON, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_SYMCLK_G_HDCP_CLOCK_ON, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_SYMCLK_G_TMDS_CLOCK_ON, mask_sh),\
+ LE_SF(DIG0_DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, mask_sh),\
+ LE_SF(DIG0_TMDS_CTL_BITS, TMDS_CTL0, mask_sh), \
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_BYPASS, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE0, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE1, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE2, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE3, mask_sh),\
+ LE_SF(DP0_DP_DPHY_PRBS_CNTL, DPHY_PRBS_EN, mask_sh),\
+ LE_SF(DP0_DP_DPHY_PRBS_CNTL, DPHY_PRBS_SEL, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM0, DPHY_SYM1, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM0, DPHY_SYM2, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM0, DPHY_SYM3, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM1, DPHY_SYM4, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM1, DPHY_SYM5, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM1, DPHY_SYM6, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM2, DPHY_SYM7, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM2, DPHY_SYM8, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SCRAM_CNTL, DPHY_SCRAMBLER_BS_COUNT, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SCRAM_CNTL, DPHY_SCRAMBLER_ADVANCE, mask_sh),\
+ LE_SF(DP0_DP_DPHY_FAST_TRAINING, DPHY_RX_FAST_TRAINING_CAPABLE, mask_sh),\
+ LE_SF(DP0_DP_DPHY_BS_SR_SWAP_CNTL, DPHY_LOAD_BS_COUNT, mask_sh),\
+ LE_SF(DP0_DP_DPHY_TRAINING_PATTERN_SEL, DPHY_TRAINING_PATTERN_SEL, mask_sh),\
+ LE_SF(DP0_DP_DPHY_HBR2_PATTERN_CONTROL, DP_DPHY_HBR2_PATTERN_CONTROL, mask_sh),\
+ LE_SF(DP0_DP_LINK_CNTL, DP_LINK_TRAINING_COMPLETE, mask_sh),\
+ LE_SF(DP0_DP_LINK_FRAMING_CNTL, DP_IDLE_BS_INTERVAL, mask_sh),\
+ LE_SF(DP0_DP_LINK_FRAMING_CNTL, DP_VBID_DISABLE, mask_sh),\
+ LE_SF(DP0_DP_LINK_FRAMING_CNTL, DP_VID_ENHANCED_FRAME_MODE, mask_sh),\
+ LE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, mask_sh),\
+ LE_SF(DP0_DP_CONFIG, DP_UDI_LANES, mask_sh),\
+ LE_SF(DP0_DP_SEC_CNTL1, DP_SEC_GSP0_LINE_NUM, mask_sh),\
+ LE_SF(DP0_DP_SEC_CNTL1, DP_SEC_GSP0_PRIORITY, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SRC0, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SRC1, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SLOT_COUNT0, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SLOT_COUNT1, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SRC2, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SRC3, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SLOT_COUNT2, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SLOT_COUNT3, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT_UPDATE, DP_MSE_SAT_UPDATE, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT_UPDATE, DP_MSE_16_MTP_KEEPOUT, mask_sh),\
+ LE_SF(DP_AUX0_AUX_CONTROL, AUX_HPD_SEL, mask_sh),\
+ LE_SF(DP_AUX0_AUX_CONTROL, AUX_LS_READ_EN, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_RECEIVE_WINDOW, mask_sh),\
+ LE_SF(HPD0_DC_HPD_CONTROL, DC_HPD_EN, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_FEC_EN, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_FEC_READY_SHADOW, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_FEC_ACTIVE_STATUS, mask_sh),\
+ LE_SF(DIG0_TMDS_CTL_BITS, TMDS_CTL0, mask_sh), \
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_START_WINDOW, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_HALF_SYM_DETECT_LEN, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_TRANSITION_FILTER_EN, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_ALLOW_BELOW_THRESHOLD_PHASE_DETECT, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_ALLOW_BELOW_THRESHOLD_START, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_ALLOW_BELOW_THRESHOLD_STOP, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_PHASE_DETECT_LEN, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_DETECTION_THRESHOLD, mask_sh), \
+ LE_SF(DP_AUX0_AUX_DPHY_TX_CONTROL, AUX_TX_PRECHARGE_LEN, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_TX_CONTROL, AUX_TX_PRECHARGE_SYMBOLS, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_TX_CONTROL, AUX_MODE_DET_CHECK_DELAY, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL1, AUX_RX_PRECHARGE_SKIP, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL1, AUX_RX_TIMEOUT_LEN, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL1, AUX_RX_TIMEOUT_LEN_MUL, mask_sh),\
+ LE_SF(DIO_LINKA_CNTL, ENC_TYPE_SEL, mask_sh),\
+ LE_SF(DIO_LINKA_CNTL, HPO_DP_ENC_SEL, mask_sh),\
+ LE_SF(DIO_LINKA_CNTL, HPO_HDMI_ENC_SEL, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, DISPCLK_R_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, DISPCLK_G_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, REFCLK_R_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, REFCLK_G_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, SOCCLK_G_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, SYMCLK_FE_R_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, SYMCLK_FE_G_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, SYMCLK_R_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, SYMCLK_G_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, DIO_FGCG_REP_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, DISPCLK_G_HDCP_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, SYMCLKA_G_HDCP_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, SYMCLKB_G_HDCP_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, SYMCLKC_G_HDCP_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, SYMCLKD_G_HDCP_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, SYMCLKE_G_HDCP_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, SYMCLKF_G_HDCP_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, SYMCLKG_G_HDCP_GATE_DIS, mask_sh)
+
+
+void dcn35_link_encoder_construct(
+ struct dcn20_link_encoder *enc20,
+ const struct encoder_init_data *init_data,
+ const struct encoder_feature_support *enc_features,
+ const struct dcn10_link_enc_registers *link_regs,
+ const struct dcn10_link_enc_aux_registers *aux_regs,
+ const struct dcn10_link_enc_hpd_registers *hpd_regs,
+ const struct dcn10_link_enc_shift *link_shift,
+ const struct dcn10_link_enc_mask *link_mask);
+
+void dcn35_link_encoder_init(struct link_encoder *enc);
+void dcn35_link_encoder_set_fgcg(struct link_encoder *enc, bool enabled);
+bool dcn35_is_dig_enabled(struct link_encoder *enc);
+
+enum signal_type dcn35_get_dig_mode(struct link_encoder *enc);
+void dcn35_link_encoder_setup(struct link_encoder *enc, enum signal_type signal);
+
+/*
+ * Enable DP transmitter and its encoder.
+ */
+void dcn35_link_encoder_enable_dp_output(
+ struct link_encoder *enc,
+ const struct dc_link_settings *link_settings,
+ enum clock_source_id clock_source);
+
+/*
+ * Enable DP transmitter and its encoder in MST mode.
+ */
+void dcn35_link_encoder_enable_dp_mst_output(
+ struct link_encoder *enc,
+ const struct dc_link_settings *link_settings,
+ enum clock_source_id clock_source);
+
+/*
+ * Disable transmitter and its encoder.
+ */
+void dcn35_link_encoder_disable_output(
+ struct link_encoder *enc,
+ enum signal_type signal);
+
+/*
+ * Enable DP transmitter and its encoder for dpia port.
+ */
+void dcn35_link_encoder_enable_dpia_output(
+ struct link_encoder *enc,
+ const struct dc_link_settings *link_settings,
+ uint8_t dpia_id,
+ uint8_t digmode,
+ uint8_t fec_rdy);
+
+/*
+ * Disable transmitter and its encoder for dpia port.
+ */
+void dcn35_link_encoder_disable_dpia_output(
+ struct link_encoder *enc,
+ uint8_t dpia_id,
+ uint8_t digmode);
+
+#endif /* __DC_LINK_ENCODER__DCN35_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c
new file mode 100644
index 000000000000..fd5d1dbf9dc6
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c
@@ -0,0 +1,520 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "dc_bios_types.h"
+#include "dcn30/dcn30_dio_stream_encoder.h"
+#include "dcn314/dcn314_dio_stream_encoder.h"
+#include "dcn32/dcn32_dio_stream_encoder.h"
+#include "dcn35_dio_stream_encoder.h"
+#include "reg_helper.h"
+#include "hw_shared.h"
+#include "link_service.h"
+#include "dpcd_defs.h"
+
+#define DC_LOGGER \
+ enc1->base.ctx->logger
+
+#define REG(reg)\
+ (enc1->regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+ enc1->se_shift->field_name, enc1->se_mask->field_name
+
+#define VBI_LINE_0 0
+#define HDMI_CLOCK_CHANNEL_RATE_MORE_340M 340000
+
+#define CTX \
+ enc1->base.ctx
+/* setup stream encoder in dvi mode */
+static void enc35_stream_encoder_dvi_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ bool is_dual_link)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ if (!enc->ctx->dc->debug.avoid_vbios_exec_table) {
+ struct bp_encoder_control cntl = {0};
+
+ cntl.action = ENCODER_CONTROL_SETUP;
+ cntl.engine_id = enc1->base.id;
+ cntl.signal = is_dual_link ?
+ SIGNAL_TYPE_DVI_DUAL_LINK : SIGNAL_TYPE_DVI_SINGLE_LINK;
+ cntl.enable_dp_audio = false;
+ cntl.pixel_clock = crtc_timing->pix_clk_100hz / 10;
+ cntl.lanes_number = (is_dual_link) ? LANE_COUNT_EIGHT : LANE_COUNT_FOUR;
+
+ if (enc1->base.bp->funcs->encoder_control(
+ enc1->base.bp, &cntl) != BP_RESULT_OK)
+ return;
+
+ } else {
+
+ //Set pattern for clock channel, default vlue 0x63 does not work
+ REG_UPDATE(DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, 0x1F);
+
+ //DIG_BE_TMDS_DVI_MODE : TMDS-DVI mode is already set in link_encoder_setup
+
+ //DIG_SOURCE_SELECT is already set in dig_connect_to_otg
+
+ /* DIG_START is removed from the register spec */
+ }
+
+ ASSERT(crtc_timing->pixel_encoding == PIXEL_ENCODING_RGB);
+ ASSERT(crtc_timing->display_color_depth == COLOR_DEPTH_888);
+ enc1_stream_encoder_set_stream_attribute_helper(enc1, crtc_timing);
+}
+/* setup stream encoder in hdmi mode */
+static void enc35_stream_encoder_hdmi_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ int actual_pix_clk_khz,
+ bool enable_audio)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ if (!enc->ctx->dc->debug.avoid_vbios_exec_table) {
+ struct bp_encoder_control cntl = {0};
+
+ cntl.action = ENCODER_CONTROL_SETUP;
+ cntl.engine_id = enc1->base.id;
+ cntl.signal = SIGNAL_TYPE_HDMI_TYPE_A;
+ cntl.enable_dp_audio = enable_audio;
+ cntl.pixel_clock = actual_pix_clk_khz;
+ cntl.lanes_number = LANE_COUNT_FOUR;
+
+ if (enc1->base.bp->funcs->encoder_control(
+ enc1->base.bp, &cntl) != BP_RESULT_OK)
+ return;
+
+ } else {
+
+ //Set pattern for clock channel, default vlue 0x63 does not work
+ REG_UPDATE(DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, 0x1F);
+
+ //DIG_BE_TMDS_HDMI_MODE : TMDS-HDMI mode is already set in link_encoder_setup
+
+ //DIG_SOURCE_SELECT is already set in dig_connect_to_otg
+
+ /* DIG_START is removed from the register spec */
+ enc314_enable_fifo(enc);
+ }
+
+ /* Configure pixel encoding */
+ enc1_stream_encoder_set_stream_attribute_helper(enc1, crtc_timing);
+
+ /* setup HDMI engine */
+ REG_UPDATE_6(HDMI_CONTROL,
+ HDMI_PACKET_GEN_VERSION, 1,
+ HDMI_KEEPOUT_MODE, 1,
+ HDMI_DEEP_COLOR_ENABLE, 0,
+ HDMI_DATA_SCRAMBLE_EN, 0,
+ HDMI_NO_EXTRA_NULL_PACKET_FILLED, 1,
+ HDMI_CLOCK_CHANNEL_RATE, 0);
+
+ /* Configure color depth */
+ switch (crtc_timing->display_color_depth) {
+ case COLOR_DEPTH_888:
+ REG_UPDATE(HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 0);
+ break;
+ case COLOR_DEPTH_101010:
+ if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 1,
+ HDMI_DEEP_COLOR_ENABLE, 0);
+ } else {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 1,
+ HDMI_DEEP_COLOR_ENABLE, 1);
+ }
+ break;
+ case COLOR_DEPTH_121212:
+ if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 2,
+ HDMI_DEEP_COLOR_ENABLE, 0);
+ } else {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 2,
+ HDMI_DEEP_COLOR_ENABLE, 1);
+ }
+ break;
+ case COLOR_DEPTH_161616:
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 3,
+ HDMI_DEEP_COLOR_ENABLE, 1);
+ break;
+ default:
+ break;
+ }
+
+ if (actual_pix_clk_khz >= HDMI_CLOCK_CHANNEL_RATE_MORE_340M) {
+ /* enable HDMI data scrambler
+ * HDMI_CLOCK_CHANNEL_RATE_MORE_340M
+ * Clock channel frequency is 1/4 of character rate.
+ */
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DATA_SCRAMBLE_EN, 1,
+ HDMI_CLOCK_CHANNEL_RATE, 1);
+ } else if (crtc_timing->flags.LTE_340MCSC_SCRAMBLE) {
+
+ /* TODO: New feature for DCE11, still need to implement */
+
+ /* enable HDMI data scrambler
+ * HDMI_CLOCK_CHANNEL_FREQ_EQUAL_TO_CHAR_RATE
+ * Clock channel frequency is the same
+ * as character rate
+ */
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DATA_SCRAMBLE_EN, 1,
+ HDMI_CLOCK_CHANNEL_RATE, 0);
+ }
+
+
+ /* Enable transmission of General Control packet on every frame */
+ REG_UPDATE_3(HDMI_VBI_PACKET_CONTROL,
+ HDMI_GC_CONT, 1,
+ HDMI_GC_SEND, 1,
+ HDMI_NULL_SEND, 1);
+
+ /* Disable Audio Content Protection packet transmission */
+ REG_UPDATE(HDMI_VBI_PACKET_CONTROL, HDMI_ACP_SEND, 0);
+
+ /* following belongs to audio */
+ /* Enable Audio InfoFrame packet transmission. */
+ REG_UPDATE(HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1);
+
+ /* update double-buffered AUDIO_INFO registers immediately */
+ ASSERT(enc->afmt);
+ enc->afmt->funcs->audio_info_immediate_update(enc->afmt);
+
+ /* Select line number on which to send Audio InfoFrame packets */
+ REG_UPDATE(HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE,
+ VBI_LINE_0 + 2);
+
+ /* set HDMI GC AVMUTE */
+ REG_UPDATE(HDMI_GC, HDMI_GC_AVMUTE, 0);
+ switch (crtc_timing->pixel_encoding) {
+ case PIXEL_ENCODING_YCBCR422:
+ REG_UPDATE(HDMI_CONTROL, TMDS_PIXEL_ENCODING, 1);
+ break;
+ default:
+ REG_UPDATE(HDMI_CONTROL, TMDS_PIXEL_ENCODING, 0);
+ break;
+ }
+ REG_UPDATE(HDMI_CONTROL, TMDS_COLOR_FORMAT, 0);
+}
+
+
+
+static void enc35_stream_encoder_enable(
+ struct stream_encoder *enc,
+ enum signal_type signal,
+ bool enable)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ if (enable) {
+ switch (signal) {
+ case SIGNAL_TYPE_DVI_SINGLE_LINK:
+ case SIGNAL_TYPE_DVI_DUAL_LINK:
+ /* TMDS-DVI */
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_MODE, 2);
+ break;
+ case SIGNAL_TYPE_HDMI_TYPE_A:
+ /* TMDS-HDMI */
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_MODE, 3);
+ break;
+ case SIGNAL_TYPE_DISPLAY_PORT_MST:
+ /* DP MST */
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_MODE, 5);
+ break;
+ case SIGNAL_TYPE_EDP:
+ case SIGNAL_TYPE_DISPLAY_PORT:
+ case SIGNAL_TYPE_VIRTUAL:
+ /* DP SST */
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_MODE, 0);
+ break;
+ default:
+ /* invalid mode ! */
+ ASSERT_CRITICAL(false);
+ }
+ }
+}
+
+static bool is_two_pixels_per_containter(const struct dc_crtc_timing *timing)
+{
+ bool two_pix = timing->pixel_encoding == PIXEL_ENCODING_YCBCR420;
+
+ two_pix = two_pix || (timing->flags.DSC && timing->pixel_encoding == PIXEL_ENCODING_YCBCR422
+ && !timing->dsc_cfg.ycbcr422_simple);
+ return two_pix;
+}
+
+static void enc35_stream_encoder_dp_unblank(
+ struct dc_link *link,
+ struct stream_encoder *enc,
+ const struct encoder_unblank_param *param)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ if (param->link_settings.link_rate != LINK_RATE_UNKNOWN) {
+ uint32_t n_vid = 0x8000;
+ uint32_t m_vid;
+ uint32_t n_multiply = 0;
+ uint32_t pix_per_cycle = 0;
+ uint64_t m_vid_l = n_vid;
+
+ /* YCbCr 4:2:0 : Computed VID_M will be 2X the input rate */
+ if (is_two_pixels_per_containter(&param->timing) || param->opp_cnt > 1
+ || param->pix_per_cycle > 1) {
+ /*this logic should be the same in get_pixel_clock_parameters() */
+ n_multiply = 1;
+ pix_per_cycle = 1;
+ }
+ /* M / N = Fstream / Flink
+ * m_vid / n_vid = pixel rate / link rate
+ */
+
+ m_vid_l *= param->timing.pix_clk_100hz / 10;
+ m_vid_l = div_u64(m_vid_l,
+ param->link_settings.link_rate
+ * LINK_RATE_REF_FREQ_IN_KHZ);
+
+ m_vid = (uint32_t) m_vid_l;
+
+ /* enable auto measurement */
+
+ REG_UPDATE(DP_VID_TIMING, DP_VID_M_N_GEN_EN, 0);
+
+ /* auto measurement need 1 full 0x8000 symbol cycle to kick in,
+ * therefore program initial value for Mvid and Nvid
+ */
+
+ REG_UPDATE(DP_VID_N, DP_VID_N, n_vid);
+
+ REG_UPDATE(DP_VID_M, DP_VID_M, m_vid);
+
+ REG_UPDATE_2(DP_VID_TIMING,
+ DP_VID_M_N_GEN_EN, 1,
+ DP_VID_N_MUL, n_multiply);
+
+ REG_UPDATE(DP_PIXEL_FORMAT,
+ DP_PIXEL_PER_CYCLE_PROCESSING_MODE,
+ pix_per_cycle);
+ }
+
+ /* make sure stream is disabled before resetting steer fifo */
+ REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, false);
+ REG_WAIT(DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, 0, 10, 5000);
+
+ /* DIG_START is removed from the register spec */
+
+ /* switch DP encoder to CRTC data, but reset it the fifo first. It may happen
+ * that it overflows during mode transition, and sometimes doesn't recover.
+ */
+ REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 1);
+ udelay(10);
+
+ REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 0);
+
+ /* wait 100us for DIG/DP logic to prime
+ * (i.e. a few video lines)
+ */
+ udelay(100);
+
+ /* the hardware would start sending video at the start of the next DP
+ * frame (i.e. rising edge of the vblank).
+ * NOTE: We used to program DP_VID_STREAM_DIS_DEFER = 2 here, but this
+ * register has no effect on enable transition! HW always makes sure
+ * VID_STREAM enable at start of next frame, and this is not
+ * programmable
+ */
+
+ REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, true);
+
+ /*
+ * DIG Resync FIFO now needs to be explicitly enabled.
+ * This should come after DP_VID_STREAM_ENABLE per HW docs.
+ */
+ enc314_enable_fifo(enc);
+
+ link->dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_ENABLE_DP_VID_STREAM);
+}
+
+static void enc35_stream_encoder_map_to_link(
+ struct stream_encoder *enc,
+ uint32_t stream_enc_inst,
+ uint32_t link_enc_inst)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ ASSERT(stream_enc_inst < 5 && link_enc_inst < 5);
+ REG_UPDATE(STREAM_MAPPER_CONTROL,
+ DIG_STREAM_LINK_TARGET, link_enc_inst);
+}
+
+static void enc35_reset_fifo(struct stream_encoder *enc, bool reset)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+ uint32_t reset_val = reset ? 1 : 0;
+ uint32_t is_symclk_on;
+
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, reset_val);
+ REG_GET(DIG_FE_CLK_CNTL, DIG_FE_SYMCLK_FE_G_CLOCK_ON, &is_symclk_on);
+
+ if (is_symclk_on)
+ REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, reset_val, 10, 5000);
+ else
+ udelay(10);
+}
+
+static bool enc35_is_fifo_enabled(struct stream_encoder *enc)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+ uint32_t reset_val;
+
+ REG_GET(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, &reset_val);
+ return reset_val != 0;
+}
+void enc35_disable_fifo(struct stream_encoder *enc)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 0);
+ REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 0);
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 0);
+}
+
+void enc35_enable_fifo(struct stream_encoder *enc)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, 0x7);
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 1);
+ REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 1);
+
+ enc35_reset_fifo(enc, true);
+ enc35_reset_fifo(enc, false);
+
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 1);
+}
+
+static uint32_t enc35_get_pixels_per_cycle(struct stream_encoder *enc)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+ uint32_t value;
+
+ REG_GET(DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_MODE, &value);
+
+ switch (value) {
+ case 0:
+ return 1;
+ case 1:
+ return 2;
+ default:
+ ASSERT_CRITICAL(false);
+ return 1;
+ }
+}
+
+static const struct stream_encoder_funcs dcn35_str_enc_funcs = {
+ .dp_set_odm_combine =
+ enc314_dp_set_odm_combine,
+ .dp_set_stream_attribute =
+ enc2_stream_encoder_dp_set_stream_attribute,
+ .hdmi_set_stream_attribute =
+ enc35_stream_encoder_hdmi_set_stream_attribute,
+ .dvi_set_stream_attribute =
+ enc35_stream_encoder_dvi_set_stream_attribute,
+ .set_throttled_vcp_size =
+ enc1_stream_encoder_set_throttled_vcp_size,
+ .update_hdmi_info_packets =
+ enc3_stream_encoder_update_hdmi_info_packets,
+ .stop_hdmi_info_packets =
+ enc3_stream_encoder_stop_hdmi_info_packets,
+ .update_dp_info_packets_sdp_line_num =
+ enc3_stream_encoder_update_dp_info_packets_sdp_line_num,
+ .update_dp_info_packets =
+ enc3_stream_encoder_update_dp_info_packets,
+ .stop_dp_info_packets =
+ enc1_stream_encoder_stop_dp_info_packets,
+ .dp_blank =
+ enc314_stream_encoder_dp_blank,
+ .dp_unblank =
+ enc35_stream_encoder_dp_unblank,
+ .audio_mute_control = enc3_audio_mute_control,
+
+ .dp_audio_setup = enc3_se_dp_audio_setup,
+ .dp_audio_enable = enc3_se_dp_audio_enable,
+ .dp_audio_disable = enc1_se_dp_audio_disable,
+
+ .hdmi_audio_setup = enc3_se_hdmi_audio_setup,
+ .hdmi_audio_disable = enc1_se_hdmi_audio_disable,
+ .setup_stereo_sync = enc1_setup_stereo_sync,
+ .set_avmute = enc1_stream_encoder_set_avmute,
+ .dig_connect_to_otg = enc1_dig_connect_to_otg,
+ .dig_source_otg = enc1_dig_source_otg,
+
+ .dp_get_pixel_format = enc1_stream_encoder_dp_get_pixel_format,
+
+ .enc_read_state = enc314_read_state,
+ .dp_set_dsc_config = enc314_dp_set_dsc_config,
+ .dp_set_dsc_pps_info_packet = enc3_dp_set_dsc_pps_info_packet,
+ .set_dynamic_metadata = enc2_set_dynamic_metadata,
+ .hdmi_reset_stream_attribute = enc1_reset_hdmi_stream_attribute,
+ .enable_stream = enc35_stream_encoder_enable,
+
+ .set_input_mode = enc314_set_dig_input_mode,
+ .enable_fifo = enc35_enable_fifo,
+ .disable_fifo = enc35_disable_fifo,
+ .is_fifo_enabled = enc35_is_fifo_enabled,
+ .map_stream_to_link = enc35_stream_encoder_map_to_link,
+ .get_pixels_per_cycle = enc35_get_pixels_per_cycle,
+};
+
+void dcn35_dio_stream_encoder_construct(
+ struct dcn10_stream_encoder *enc1,
+ struct dc_context *ctx,
+ struct dc_bios *bp,
+ enum engine_id eng_id,
+ struct vpg *vpg,
+ struct afmt *afmt,
+ const struct dcn10_stream_enc_registers *regs,
+ const struct dcn10_stream_encoder_shift *se_shift,
+ const struct dcn10_stream_encoder_mask *se_mask)
+{
+ enc1->base.funcs = &dcn35_str_enc_funcs;
+ enc1->base.ctx = ctx;
+ enc1->base.id = eng_id;
+ enc1->base.bp = bp;
+ enc1->base.vpg = vpg;
+ enc1->base.afmt = afmt;
+ enc1->regs = regs;
+ enc1->se_shift = se_shift;
+ enc1->se_mask = se_mask;
+ enc1->base.stream_enc_inst = vpg->inst;
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.h
new file mode 100644
index 000000000000..ddb33fdfb4ee
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.h
@@ -0,0 +1,332 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DC_DIO_STREAM_ENCODER_DCN35_H__
+#define __DC_DIO_STREAM_ENCODER_DCN35_H__
+
+#include "dcn30/dcn30_vpg.h"
+#include "dcn30/dcn30_afmt.h"
+#include "stream_encoder.h"
+#include "dcn20/dcn20_stream_encoder.h"
+
+/* Register bit field name change */
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_GATE_DIS__SHIFT 0x8
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_EN__SHIFT 0x9
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_CLOCK_ON__SHIFT 0xa
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_DATA_SWAP__SHIFT 0xe
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_DATA_ORDER_INVERT__SHIFT 0xf
+
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_GATE_DIS_MASK 0x00000100L
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_EN_MASK 0x00000200L
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_CLOCK_ON_MASK 0x00000400L
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_DATA_SWAP_MASK 0x00004000L
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_DATA_ORDER_INVERT_MASK 0x00008000L
+
+
+#define SE_DCN35_REG_LIST(id)\
+ SRI(AFMT_CNTL, DIG, id), \
+ SRI(DIG_FE_CNTL, DIG, id), \
+ SRI(HDMI_CONTROL, DIG, id), \
+ SRI(HDMI_DB_CONTROL, DIG, id), \
+ SRI(HDMI_GC, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL0, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL1, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL2, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL3, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL4, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL5, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL6, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL7, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL8, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL9, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL10, DIG, id), \
+ SRI(HDMI_INFOFRAME_CONTROL0, DIG, id), \
+ SRI(HDMI_INFOFRAME_CONTROL1, DIG, id), \
+ SRI(HDMI_VBI_PACKET_CONTROL, DIG, id), \
+ SRI(HDMI_AUDIO_PACKET_CONTROL, DIG, id),\
+ SRI(HDMI_ACR_PACKET_CONTROL, DIG, id),\
+ SRI(HDMI_ACR_32_0, DIG, id),\
+ SRI(HDMI_ACR_32_1, DIG, id),\
+ SRI(HDMI_ACR_44_0, DIG, id),\
+ SRI(HDMI_ACR_44_1, DIG, id),\
+ SRI(HDMI_ACR_48_0, DIG, id),\
+ SRI(HDMI_ACR_48_1, DIG, id),\
+ SRI(DP_DB_CNTL, DP, id), \
+ SRI(DP_MSA_MISC, DP, id), \
+ SRI(DP_MSA_VBID_MISC, DP, id), \
+ SRI(DP_MSA_COLORIMETRY, DP, id), \
+ SRI(DP_MSA_TIMING_PARAM1, DP, id), \
+ SRI(DP_MSA_TIMING_PARAM2, DP, id), \
+ SRI(DP_MSA_TIMING_PARAM3, DP, id), \
+ SRI(DP_MSA_TIMING_PARAM4, DP, id), \
+ SRI(DP_MSE_RATE_CNTL, DP, id), \
+ SRI(DP_MSE_RATE_UPDATE, DP, id), \
+ SRI(DP_PIXEL_FORMAT, DP, id), \
+ SRI(DP_SEC_CNTL, DP, id), \
+ SRI(DP_SEC_CNTL1, DP, id), \
+ SRI(DP_SEC_CNTL2, DP, id), \
+ SRI(DP_SEC_CNTL5, DP, id), \
+ SRI(DP_SEC_CNTL6, DP, id), \
+ SRI(DP_STEER_FIFO, DP, id), \
+ SRI(DP_VID_M, DP, id), \
+ SRI(DP_VID_N, DP, id), \
+ SRI(DP_VID_STREAM_CNTL, DP, id), \
+ SRI(DP_VID_TIMING, DP, id), \
+ SRI(DP_SEC_AUD_N, DP, id), \
+ SRI(DP_SEC_TIMESTAMP, DP, id), \
+ SRI(DP_DSC_CNTL, DP, id), \
+ SRI(DP_SEC_METADATA_TRANSMISSION, DP, id), \
+ SRI(HDMI_METADATA_PACKET_CONTROL, DIG, id), \
+ SRI(DP_SEC_FRAMING4, DP, id), \
+ SRI(DP_GSP11_CNTL, DP, id), \
+ SRI(DME_CONTROL, DME, id),\
+ SRI(DP_SEC_METADATA_TRANSMISSION, DP, id), \
+ SRI(HDMI_METADATA_PACKET_CONTROL, DIG, id), \
+ SRI(DIG_FE_CNTL, DIG, id), \
+ SRI(DIG_FE_EN_CNTL, DIG, id), \
+ SRI(DIG_FE_CLK_CNTL, DIG, id), \
+ SRI(DIG_CLOCK_PATTERN, DIG, id), \
+ SRI(DIG_FIFO_CTRL0, DIG, id),\
+ SRI(STREAM_MAPPER_CONTROL, DIG, id)
+
+
+#define SE_COMMON_MASK_SH_LIST_DCN35(mask_sh)\
+ SE_SF(DP0_DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, mask_sh),\
+ SE_SF(DP0_DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, mask_sh),\
+ SE_SF(DP0_DP_PIXEL_FORMAT, DP_PIXEL_PER_CYCLE_PROCESSING_MODE, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_PACKET_GEN_VERSION, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_KEEPOUT_MODE, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_DATA_SCRAMBLE_EN, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_NO_EXTRA_NULL_PACKET_FILLED, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_ACP_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GC, HDMI_GC_AVMUTE, mask_sh),\
+ SE_SF(DP0_DP_MSE_RATE_CNTL, DP_MSE_RATE_X, mask_sh),\
+ SE_SF(DP0_DP_MSE_RATE_CNTL, DP_MSE_RATE_Y, mask_sh),\
+ SE_SF(DP0_DP_MSE_RATE_UPDATE, DP_MSE_RATE_UPDATE_PENDING, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP0_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP1_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_MPG_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL1, DP_SEC_GSP5_LINE_REFERENCE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND_PENDING, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL4, DP_SEC_GSP4_LINE_NUM, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL5, DP_SEC_GSP5_LINE_NUM, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND_ANY_LINE, mask_sh),\
+ SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_DIS_DEFER, mask_sh),\
+ SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, mask_sh),\
+ SE_SF(DP0_DP_STEER_FIFO, DP_STEER_FIFO_RESET, mask_sh),\
+ SE_SF(DP0_DP_VID_TIMING, DP_VID_M_N_GEN_EN, mask_sh),\
+ SE_SF(DP0_DP_VID_N, DP_VID_N, mask_sh),\
+ SE_SF(DP0_DP_VID_M, DP_VID_M, mask_sh),\
+ SE_SF(DIG0_HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUDIO_PRIORITY, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_32_0, HDMI_ACR_CTS_32, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_32_1, HDMI_ACR_N_32, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_44_0, HDMI_ACR_CTS_44, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_44_1, HDMI_ACR_N_44, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_48_0, HDMI_ACR_CTS_48, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_48_1, HDMI_ACR_N_48, mask_sh),\
+ SE_SF(DP0_DP_SEC_AUD_N, DP_SEC_AUD_N, mask_sh),\
+ SE_SF(DP0_DP_SEC_TIMESTAMP, DP_SEC_TIMESTAMP_MODE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ASP_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ATP_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_AIP_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ACM_ENABLE, mask_sh),\
+ SE_SF(DIG0_AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_CLOCK_CHANNEL_RATE, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, TMDS_PIXEL_ENCODING, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, TMDS_COLOR_FORMAT, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_SELECT, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_GATE_EN, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP4_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP5_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP6_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP7_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP7_SEND, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL6, DP_SEC_GSP7_LINE_NUM, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP11_PPS, mask_sh),\
+ SE_SF(DP0_DP_GSP11_CNTL, DP_SEC_GSP11_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_GSP11_CNTL, DP_SEC_GSP11_LINE_NUM, mask_sh),\
+ SE_SF(DP0_DP_DB_CNTL, DP_DB_DISABLE, mask_sh),\
+ SE_SF(DP0_DP_MSA_COLORIMETRY, DP_MSA_MISC0, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM1, DP_MSA_HTOTAL, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM1, DP_MSA_VTOTAL, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM2, DP_MSA_HSTART, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM2, DP_MSA_VSTART, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_HSYNCWIDTH, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_HSYNCPOLARITY, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_VSYNCWIDTH, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_VSYNCPOLARITY, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM4, DP_MSA_HWIDTH, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM4, DP_MSA_VHEIGHT, mask_sh),\
+ SE_SF(DIG0_HDMI_DB_CONTROL, HDMI_DB_DISABLE, mask_sh),\
+ SE_SF(DP0_DP_VID_TIMING, DP_VID_N_MUL, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, DIG_SOURCE_SELECT, mask_sh), \
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC0_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC0_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC1_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC1_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC2_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC2_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC3_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC3_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC4_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC4_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC5_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC5_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC6_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC6_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC7_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC7_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC8_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC8_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC9_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC9_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC10_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC10_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC11_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC11_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC12_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC12_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC13_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC13_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC14_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC14_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL1, HDMI_GENERIC0_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL1, HDMI_GENERIC1_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL2, HDMI_GENERIC2_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL2, HDMI_GENERIC3_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL3, HDMI_GENERIC4_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL3, HDMI_GENERIC5_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL4, HDMI_GENERIC6_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL4, HDMI_GENERIC7_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL7, HDMI_GENERIC8_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL7, HDMI_GENERIC9_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL8, HDMI_GENERIC10_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL8, HDMI_GENERIC11_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL9, HDMI_GENERIC12_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL9, HDMI_GENERIC13_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL10, HDMI_GENERIC14_LINE, mask_sh),\
+ SE_SF(DP0_DP_DSC_CNTL, DP_DSC_MODE, mask_sh),\
+ SE_SF(DP0_DP_MSA_VBID_MISC, DP_VBID6_LINE_REFERENCE, mask_sh),\
+ SE_SF(DP0_DP_MSA_VBID_MISC, DP_VBID6_LINE_NUM, mask_sh),\
+ SE_SF(DME0_DME_CONTROL, METADATA_ENGINE_EN, mask_sh),\
+ SE_SF(DME0_DME_CONTROL, METADATA_HUBP_REQUESTOR_ID, mask_sh),\
+ SE_SF(DME0_DME_CONTROL, METADATA_STREAM_TYPE, mask_sh),\
+ SE_SF(DP0_DP_SEC_METADATA_TRANSMISSION, DP_SEC_METADATA_PACKET_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_METADATA_TRANSMISSION, DP_SEC_METADATA_PACKET_LINE_REFERENCE, mask_sh),\
+ SE_SF(DP0_DP_SEC_METADATA_TRANSMISSION, DP_SEC_METADATA_PACKET_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_METADATA_PACKET_CONTROL, HDMI_METADATA_PACKET_ENABLE, mask_sh),\
+ SE_SF(DIG0_HDMI_METADATA_PACKET_CONTROL, HDMI_METADATA_PACKET_LINE_REFERENCE, mask_sh),\
+ SE_SF(DIG0_HDMI_METADATA_PACKET_CONTROL, HDMI_METADATA_PACKET_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, DOLBY_VISION_EN, mask_sh),\
+ SE_SF(DIG0_DIG_FE_EN_CNTL, DIG_FE_ENABLE, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CLK_CNTL, DIG_FE_MODE, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CLK_CNTL, DIG_FE_SOFT_RESET, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CLK_CNTL, DIG_FE_DISPCLK_G_CLOCK_ON, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CLK_CNTL, DIG_FE_SYMCLK_FE_G_CLOCK_ON, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CLK_CNTL, DIG_FE_SYMCLK_FE_G_AFMT_CLOCK_ON, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CLK_CNTL, DIG_FE_SYMCLK_FE_G_TMDS_CLOCK_ON, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CLK_CNTL, DIG_FE_SOCCLK_G_AFMT_CLOCK_ON, mask_sh),\
+ SE_SF(DP0_DP_SEC_FRAMING4, DP_SST_SDP_SPLITTING, mask_sh),\
+ SE_SF(DIG0_DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_MODE, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_RESET, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, mask_sh),\
+ SE_SF(DIG0_STREAM_MAPPER_CONTROL, DIG_STREAM_LINK_TARGET, mask_sh),
+
+void dcn35_dio_stream_encoder_construct(
+ struct dcn10_stream_encoder *enc1,
+ struct dc_context *ctx,
+ struct dc_bios *bp,
+ enum engine_id eng_id,
+ struct vpg *vpg,
+ struct afmt *afmt,
+ const struct dcn10_stream_enc_registers *regs,
+ const struct dcn10_stream_encoder_shift *se_shift,
+ const struct dcn10_stream_encoder_mask *se_mask);
+
+void enc3_stream_encoder_update_hdmi_info_packets(
+ struct stream_encoder *enc,
+ const struct encoder_info_frame *info_frame);
+
+void enc3_stream_encoder_stop_hdmi_info_packets(
+ struct stream_encoder *enc);
+
+void enc3_stream_encoder_update_dp_info_packets_sdp_line_num(
+ struct stream_encoder *enc,
+ struct encoder_info_frame *info_frame);
+
+void enc3_stream_encoder_update_dp_info_packets(
+ struct stream_encoder *enc,
+ const struct encoder_info_frame *info_frame);
+
+void enc3_audio_mute_control(
+ struct stream_encoder *enc,
+ bool mute);
+
+void enc3_se_dp_audio_setup(
+ struct stream_encoder *enc,
+ unsigned int az_inst,
+ struct audio_info *info);
+
+void enc3_se_dp_audio_enable(
+ struct stream_encoder *enc);
+
+void enc3_se_hdmi_audio_setup(
+ struct stream_encoder *enc,
+ unsigned int az_inst,
+ struct audio_info *info,
+ struct audio_crtc_info *audio_crtc_info);
+
+void enc3_dp_set_dsc_pps_info_packet(
+ struct stream_encoder *enc,
+ bool enable,
+ uint8_t *dsc_packed_pps,
+ bool immediate_update);
+
+void enc35_disable_fifo(
+ struct stream_encoder *enc);
+
+void enc35_enable_fifo(
+ struct stream_encoder *enc);
+
+
+#endif /* __DC_DIO_STREAM_ENCODER_DCN35_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_link_encoder.c
new file mode 100644
index 000000000000..7e558ca195ef
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_link_encoder.c
@@ -0,0 +1,322 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+
+#include "reg_helper.h"
+
+#include "core_types.h"
+#include "link_encoder.h"
+#include "dcn31/dcn31_dio_link_encoder.h"
+#include "dcn32/dcn32_dio_link_encoder.h"
+#include "dcn401_dio_link_encoder.h"
+#include "stream_encoder.h"
+#include "dc_bios_types.h"
+
+#include "gpio_service_interface.h"
+
+#ifndef MIN
+#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+#endif
+
+#define CTX \
+ enc10->base.ctx
+#define DC_LOGGER \
+ enc10->base.ctx->logger
+
+#define REG(reg)\
+ (enc10->link_regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+ enc10->link_shift->field_name, enc10->link_mask->field_name
+
+#define AUX_REG(reg)\
+ (enc10->aux_regs->reg)
+
+#define AUX_REG_READ(reg_name) \
+ dm_read_reg(CTX, AUX_REG(reg_name))
+
+#define AUX_REG_WRITE(reg_name, val) \
+ dm_write_reg(CTX, AUX_REG(reg_name), val)
+
+#ifndef MIN
+#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+#endif
+
+void enc401_hw_init(struct link_encoder *enc)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+/*
+ 00 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__1to2 : 1/2
+ 01 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__3to4 : 3/4
+ 02 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__7to8 : 7/8
+ 03 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__15to16 : 15/16
+ 04 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__31to32 : 31/32
+ 05 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__63to64 : 63/64
+ 06 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__127to128 : 127/128
+ 07 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__255to256 : 255/256
+*/
+
+/*
+ AUX_REG_UPDATE_5(AUX_DPHY_RX_CONTROL0,
+ AUX_RX_START_WINDOW = 1 [6:4]
+ AUX_RX_RECEIVE_WINDOW = 1 default is 2 [10:8]
+ AUX_RX_HALF_SYM_DETECT_LEN = 1 [13:12] default is 1
+ AUX_RX_TRANSITION_FILTER_EN = 1 [16] default is 1
+ AUX_RX_ALLOW_BELOW_THRESHOLD_PHASE_DETECT [17] is 0 default is 0
+ AUX_RX_ALLOW_BELOW_THRESHOLD_START [18] is 1 default is 1
+ AUX_RX_ALLOW_BELOW_THRESHOLD_STOP [19] is 1 default is 1
+ AUX_RX_PHASE_DETECT_LEN, [21,20] = 0x3 default is 3
+ AUX_RX_DETECTION_THRESHOLD [30:28] = 1
+*/
+ AUX_REG_WRITE(AUX_DPHY_RX_CONTROL0, 0x103d1110);
+
+ AUX_REG_WRITE(AUX_DPHY_TX_CONTROL, 0x21c7a);
+
+ //AUX_DPHY_TX_REF_CONTROL'AUX_TX_REF_DIV HW default is 0x32;
+ // Set AUX_TX_REF_DIV Divider to generate 2 MHz reference from refclk
+ // 27MHz -> 0xd
+ // 100MHz -> 0x32
+ // 48MHz -> 0x18
+
+ // Set TMDS_CTL0 to 1. This is a legacy setting.
+ REG_UPDATE(TMDS_CTL_BITS, TMDS_CTL0, 1);
+
+ dcn10_aux_initialize(enc10);
+}
+
+
+void dcn401_link_encoder_enable_dp_output(
+ struct link_encoder *enc,
+ const struct dc_link_settings *link_settings,
+ enum clock_source_id clock_source)
+{
+ if (!enc->ctx->dc->debug.avoid_vbios_exec_table) {
+ dcn10_link_encoder_enable_dp_output(enc, link_settings, clock_source);
+ return;
+ }
+}
+
+void dcn401_link_encoder_setup(
+ struct link_encoder *enc,
+ enum signal_type signal)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+ switch (signal) {
+ case SIGNAL_TYPE_EDP:
+ case SIGNAL_TYPE_DISPLAY_PORT:
+ /* DP SST */
+ REG_UPDATE(DIG_BE_CLK_CNTL, DIG_BE_MODE, 0);
+ break;
+ case SIGNAL_TYPE_DVI_SINGLE_LINK:
+ case SIGNAL_TYPE_DVI_DUAL_LINK:
+ /* TMDS-DVI */
+ REG_UPDATE(DIG_BE_CLK_CNTL, DIG_BE_MODE, 2);
+ break;
+ case SIGNAL_TYPE_HDMI_TYPE_A:
+ /* TMDS-HDMI */
+ REG_UPDATE(DIG_BE_CLK_CNTL, DIG_BE_MODE, 3);
+ break;
+ case SIGNAL_TYPE_DISPLAY_PORT_MST:
+ /* DP MST */
+ REG_UPDATE(DIG_BE_CLK_CNTL, DIG_BE_MODE, 5);
+ break;
+ default:
+ ASSERT_CRITICAL(false);
+ /* invalid mode ! */
+ break;
+ }
+ REG_UPDATE(DIG_BE_CLK_CNTL, DIG_BE_CLK_EN, 1);
+ REG_UPDATE(DIG_BE_EN_CNTL, DIG_BE_ENABLE, 1);
+}
+
+bool dcn401_is_dig_enabled(struct link_encoder *enc)
+{
+ uint32_t clk_enabled;
+ uint32_t dig_enabled;
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+ REG_GET(DIG_BE_CLK_CNTL, DIG_BE_CLK_EN, &clk_enabled);
+ REG_GET(DIG_BE_EN_CNTL, DIG_BE_ENABLE, &dig_enabled);
+ return (clk_enabled == 1 && dig_enabled == 1);
+}
+
+enum signal_type dcn401_get_dig_mode(
+ struct link_encoder *enc)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+ uint32_t value;
+ REG_GET(DIG_BE_CLK_CNTL, DIG_BE_MODE, &value);
+ switch (value) {
+ case 0:
+ return SIGNAL_TYPE_DISPLAY_PORT;
+ case 2:
+ return SIGNAL_TYPE_DVI_SINGLE_LINK;
+ case 3:
+ return SIGNAL_TYPE_HDMI_TYPE_A;
+ case 5:
+ return SIGNAL_TYPE_DISPLAY_PORT_MST;
+ default:
+ return SIGNAL_TYPE_NONE;
+ }
+}
+
+static const struct link_encoder_funcs dcn401_link_enc_funcs = {
+ .read_state = link_enc2_read_state,
+ .validate_output_with_stream =
+ dcn30_link_encoder_validate_output_with_stream,
+ .hw_init = enc401_hw_init,
+ .setup = dcn401_link_encoder_setup,
+ .enable_tmds_output = dcn10_link_encoder_enable_tmds_output,
+ .enable_dp_output = dcn401_link_encoder_enable_dp_output,
+ .enable_dp_mst_output = dcn10_link_encoder_enable_dp_mst_output,
+ .disable_output = dcn10_link_encoder_disable_output,
+ .dp_set_lane_settings = dcn10_link_encoder_dp_set_lane_settings,
+ .dp_set_phy_pattern = dcn10_link_encoder_dp_set_phy_pattern,
+ .update_mst_stream_allocation_table =
+ dcn10_link_encoder_update_mst_stream_allocation_table,
+ .psr_program_dp_dphy_fast_training =
+ dcn10_psr_program_dp_dphy_fast_training,
+ .psr_program_secondary_packet = dcn10_psr_program_secondary_packet,
+ .connect_dig_be_to_fe = dcn10_link_encoder_connect_dig_be_to_fe,
+ .enable_hpd = dcn10_link_encoder_enable_hpd,
+ .disable_hpd = dcn10_link_encoder_disable_hpd,
+ .is_dig_enabled = dcn401_is_dig_enabled,
+ .destroy = dcn10_link_encoder_destroy,
+ .fec_set_enable = enc2_fec_set_enable,
+ .fec_set_ready = enc2_fec_set_ready,
+ .fec_is_active = enc2_fec_is_active,
+ .get_dig_frontend = dcn10_get_dig_frontend,
+ .get_dig_mode = dcn401_get_dig_mode,
+ .is_in_alt_mode = dcn32_link_encoder_is_in_alt_mode,
+ .get_max_link_cap = dcn32_link_encoder_get_max_link_cap,
+ .set_dio_phy_mux = dcn31_link_encoder_set_dio_phy_mux,
+};
+
+void dcn401_link_encoder_construct(
+ struct dcn20_link_encoder *enc20,
+ const struct encoder_init_data *init_data,
+ const struct encoder_feature_support *enc_features,
+ const struct dcn10_link_enc_registers *link_regs,
+ const struct dcn10_link_enc_aux_registers *aux_regs,
+ const struct dcn10_link_enc_hpd_registers *hpd_regs,
+ const struct dcn10_link_enc_shift *link_shift,
+ const struct dcn10_link_enc_mask *link_mask)
+{
+ struct bp_connector_speed_cap_info bp_cap_info = {0};
+ const struct dc_vbios_funcs *bp_funcs = init_data->ctx->dc_bios->funcs;
+ enum bp_result result = BP_RESULT_OK;
+ struct dcn10_link_encoder *enc10 = &enc20->enc10;
+
+ enc10->base.funcs = &dcn401_link_enc_funcs;
+ enc10->base.ctx = init_data->ctx;
+ enc10->base.id = init_data->encoder;
+
+ enc10->base.hpd_source = init_data->hpd_source;
+ enc10->base.connector = init_data->connector;
+
+
+ enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
+
+ enc10->base.features = *enc_features;
+ if (enc10->base.connector.id == CONNECTOR_ID_USBC)
+ enc10->base.features.flags.bits.DP_IS_USB_C = 1;
+
+ enc10->base.transmitter = init_data->transmitter;
+
+ /* set the flag to indicate whether driver poll the I2C data pin
+ * while doing the DP sink detect
+ */
+
+/* if (dal_adapter_service_is_feature_supported(as,
+ FEATURE_DP_SINK_DETECT_POLL_DATA_PIN))
+ enc10->base.features.flags.bits.
+ DP_SINK_DETECT_POLL_DATA_PIN = true;*/
+
+ enc10->base.output_signals =
+ SIGNAL_TYPE_DVI_SINGLE_LINK |
+ SIGNAL_TYPE_DVI_DUAL_LINK |
+ SIGNAL_TYPE_LVDS |
+ SIGNAL_TYPE_DISPLAY_PORT |
+ SIGNAL_TYPE_DISPLAY_PORT_MST |
+ SIGNAL_TYPE_EDP |
+ SIGNAL_TYPE_HDMI_TYPE_A;
+
+ enc10->link_regs = link_regs;
+ enc10->aux_regs = aux_regs;
+ enc10->hpd_regs = hpd_regs;
+ enc10->link_shift = link_shift;
+ enc10->link_mask = link_mask;
+
+ switch (enc10->base.transmitter) {
+ case TRANSMITTER_UNIPHY_A:
+ enc10->base.preferred_engine = ENGINE_ID_DIGA;
+ break;
+ case TRANSMITTER_UNIPHY_B:
+ enc10->base.preferred_engine = ENGINE_ID_DIGB;
+ break;
+ case TRANSMITTER_UNIPHY_C:
+ enc10->base.preferred_engine = ENGINE_ID_DIGC;
+ break;
+ case TRANSMITTER_UNIPHY_D:
+ enc10->base.preferred_engine = ENGINE_ID_DIGD;
+ break;
+ case TRANSMITTER_UNIPHY_E:
+ enc10->base.preferred_engine = ENGINE_ID_DIGE;
+ break;
+ default:
+ ASSERT_CRITICAL(false);
+ enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
+ }
+
+ /* default to one to mirror Windows behavior */
+ enc10->base.features.flags.bits.HDMI_6GB_EN = 1;
+
+ if (bp_funcs->get_connector_speed_cap_info)
+ result = bp_funcs->get_connector_speed_cap_info(enc10->base.ctx->dc_bios,
+ enc10->base.connector, &bp_cap_info);
+
+ /* Override features with DCE-specific values */
+ if (result == BP_RESULT_OK) {
+ enc10->base.features.flags.bits.IS_HBR2_CAPABLE =
+ bp_cap_info.DP_HBR2_EN;
+ enc10->base.features.flags.bits.IS_HBR3_CAPABLE =
+ bp_cap_info.DP_HBR3_EN;
+ enc10->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN;
+ enc10->base.features.flags.bits.IS_DP2_CAPABLE = 1;
+ enc10->base.features.flags.bits.IS_UHBR10_CAPABLE = bp_cap_info.DP_UHBR10_EN;
+ enc10->base.features.flags.bits.IS_UHBR13_5_CAPABLE = bp_cap_info.DP_UHBR13_5_EN;
+ enc10->base.features.flags.bits.IS_UHBR20_CAPABLE = bp_cap_info.DP_UHBR20_EN;
+ } else {
+ DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n",
+ __func__,
+ result);
+ }
+ if (enc10->base.ctx->dc->debug.hdmi20_disable) {
+ enc10->base.features.flags.bits.HDMI_6GB_EN = 0;
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_link_encoder.h
new file mode 100644
index 000000000000..6baab8302b81
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_link_encoder.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_LINK_ENCODER__DCN401_H__
+#define __DC_LINK_ENCODER__DCN401_H__
+
+#include "dcn30/dcn30_dio_link_encoder.h"
+
+#define LINK_ENCODER_MASK_SH_LIST_DCN401(mask_sh) \
+ LE_SF(DIG0_DIG_BE_EN_CNTL, DIG_BE_ENABLE, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CNTL, DIG_RB_SWITCH_EN, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CNTL, DIG_HPD_SELECT, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CNTL, DIG_FE_SOURCE_SELECT, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_MODE, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_CLK_EN, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_SOFT_RESET, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, HDCP_SOFT_RESET, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_SYMCLK_G_CLOCK_ON, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_SYMCLK_G_HDCP_CLOCK_ON, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_SYMCLK_G_TMDS_CLOCK_ON, mask_sh),\
+ LE_SF(DIG0_DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, mask_sh),\
+ LE_SF(DIG0_TMDS_CTL_BITS, TMDS_CTL0, mask_sh), \
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_BYPASS, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE0, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE1, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE2, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE3, mask_sh),\
+ LE_SF(DP0_DP_DPHY_PRBS_CNTL, DPHY_PRBS_EN, mask_sh),\
+ LE_SF(DP0_DP_DPHY_PRBS_CNTL, DPHY_PRBS_SEL, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM0, DPHY_SYM1, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM0, DPHY_SYM2, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM0, DPHY_SYM3, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM1, DPHY_SYM4, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM1, DPHY_SYM5, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM1, DPHY_SYM6, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM2, DPHY_SYM7, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM2, DPHY_SYM8, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SCRAM_CNTL, DPHY_SCRAMBLER_BS_COUNT, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SCRAM_CNTL, DPHY_SCRAMBLER_ADVANCE, mask_sh),\
+ LE_SF(DP0_DP_DPHY_FAST_TRAINING, DPHY_RX_FAST_TRAINING_CAPABLE, mask_sh),\
+ LE_SF(DP0_DP_DPHY_BS_SR_SWAP_CNTL, DPHY_LOAD_BS_COUNT, mask_sh),\
+ LE_SF(DP0_DP_DPHY_TRAINING_PATTERN_SEL, DPHY_TRAINING_PATTERN_SEL, mask_sh),\
+ LE_SF(DP0_DP_DPHY_HBR2_PATTERN_CONTROL, DP_DPHY_HBR2_PATTERN_CONTROL, mask_sh),\
+ LE_SF(DP0_DP_LINK_CNTL, DP_LINK_TRAINING_COMPLETE, mask_sh),\
+ LE_SF(DP0_DP_LINK_FRAMING_CNTL, DP_IDLE_BS_INTERVAL, mask_sh),\
+ LE_SF(DP0_DP_LINK_FRAMING_CNTL, DP_VBID_DISABLE, mask_sh),\
+ LE_SF(DP0_DP_LINK_FRAMING_CNTL, DP_VID_ENHANCED_FRAME_MODE, mask_sh),\
+ LE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, mask_sh),\
+ LE_SF(DP0_DP_CONFIG, DP_UDI_LANES, mask_sh),\
+ LE_SF(DP0_DP_SEC_CNTL1, DP_SEC_GSP0_LINE_NUM, mask_sh),\
+ LE_SF(DP0_DP_SEC_CNTL1, DP_SEC_GSP0_PRIORITY, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SRC0, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SRC1, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SLOT_COUNT0, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SLOT_COUNT1, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SRC2, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SRC3, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SLOT_COUNT2, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SLOT_COUNT3, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT_UPDATE, DP_MSE_SAT_UPDATE, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT_UPDATE, DP_MSE_16_MTP_KEEPOUT, mask_sh),\
+ LE_SF(DP_AUX0_AUX_CONTROL, AUX_HPD_SEL, mask_sh),\
+ LE_SF(DP_AUX0_AUX_CONTROL, AUX_LS_READ_EN, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_RECEIVE_WINDOW, mask_sh),\
+ LE_SF(HPD0_DC_HPD_CONTROL, DC_HPD_EN, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_FEC_EN, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_FEC_READY_SHADOW, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_FEC_ACTIVE_STATUS, mask_sh),\
+ LE_SF(DIG0_TMDS_CTL_BITS, TMDS_CTL0, mask_sh), \
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_START_WINDOW, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_HALF_SYM_DETECT_LEN, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_TRANSITION_FILTER_EN, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_ALLOW_BELOW_THRESHOLD_PHASE_DETECT, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_ALLOW_BELOW_THRESHOLD_START, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_ALLOW_BELOW_THRESHOLD_STOP, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_PHASE_DETECT_LEN, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_DETECTION_THRESHOLD, mask_sh), \
+ LE_SF(DP_AUX0_AUX_DPHY_TX_CONTROL, AUX_TX_PRECHARGE_LEN, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_TX_CONTROL, AUX_TX_PRECHARGE_SYMBOLS, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_TX_CONTROL, AUX_MODE_DET_CHECK_DELAY, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL1, AUX_RX_PRECHARGE_SKIP, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL1, AUX_RX_TIMEOUT_LEN, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL1, AUX_RX_TIMEOUT_LEN_MUL, mask_sh)
+
+void dcn401_link_encoder_construct(
+ struct dcn20_link_encoder *enc20,
+ const struct encoder_init_data *init_data,
+ const struct encoder_feature_support *enc_features,
+ const struct dcn10_link_enc_registers *link_regs,
+ const struct dcn10_link_enc_aux_registers *aux_regs,
+ const struct dcn10_link_enc_hpd_registers *hpd_regs,
+ const struct dcn10_link_enc_shift *link_shift,
+ const struct dcn10_link_enc_mask *link_mask);
+
+void enc401_hw_init(struct link_encoder *enc);
+
+void dcn401_link_encoder_enable_dp_output(
+ struct link_encoder *enc,
+ const struct dc_link_settings *link_settings,
+ enum clock_source_id clock_source);
+
+void dcn401_link_encoder_setup(
+ struct link_encoder *enc,
+ enum signal_type signal);
+
+enum signal_type dcn401_get_dig_mode(
+ struct link_encoder *enc);
+
+bool dcn401_is_dig_enabled(struct link_encoder *enc);
+
+enum signal_type dcn401_get_dig_mode(struct link_encoder *enc);
+#endif /* __DC_LINK_ENCODER__DCN401_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c
new file mode 100644
index 000000000000..99aab70ef3e1
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c
@@ -0,0 +1,856 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+
+#include "dc_bios_types.h"
+#include "dcn30/dcn30_dio_stream_encoder.h"
+#include "dcn32/dcn32_dio_stream_encoder.h"
+#include "dcn35/dcn35_dio_stream_encoder.h"
+
+#include "dcn401_dio_stream_encoder.h"
+#include "reg_helper.h"
+#include "hw_shared.h"
+#include "link_service.h"
+#include "dpcd_defs.h"
+
+#define DC_LOGGER \
+ enc1->base.ctx->logger
+
+#define REG(reg)\
+ (enc1->regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+ enc1->se_shift->field_name, enc1->se_mask->field_name
+
+#define VBI_LINE_0 0
+#define HDMI_CLOCK_CHANNEL_RATE_MORE_340M 340000
+
+#define CTX \
+ enc1->base.ctx
+
+
+
+static void enc401_dp_set_odm_combine(
+ struct stream_encoder *enc,
+ bool odm_combine)
+{
+}
+
+/* setup stream encoder in dvi mode */
+void enc401_stream_encoder_dvi_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ bool is_dual_link)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ if (!enc->ctx->dc->debug.avoid_vbios_exec_table) {
+ struct bp_encoder_control cntl = {0};
+
+ cntl.action = ENCODER_CONTROL_SETUP;
+ cntl.engine_id = enc1->base.id;
+ cntl.signal = is_dual_link ?
+ SIGNAL_TYPE_DVI_DUAL_LINK : SIGNAL_TYPE_DVI_SINGLE_LINK;
+ cntl.enable_dp_audio = false;
+ cntl.pixel_clock = crtc_timing->pix_clk_100hz / 10;
+ cntl.lanes_number = (is_dual_link) ? LANE_COUNT_EIGHT : LANE_COUNT_FOUR;
+
+ if (enc1->base.bp->funcs->encoder_control(
+ enc1->base.bp, &cntl) != BP_RESULT_OK)
+ return;
+
+ } else {
+
+ //Set pattern for clock channel, default vlue 0x63 does not work
+ REG_UPDATE(DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, 0x1F);
+
+ //DIG_BE_TMDS_DVI_MODE : TMDS-DVI mode is already set in link_encoder_setup
+
+ //DIG_SOURCE_SELECT is already set in dig_connect_to_otg
+
+ /* DIG_START is removed from the register spec */
+ }
+
+ ASSERT(crtc_timing->pixel_encoding == PIXEL_ENCODING_RGB);
+ ASSERT(crtc_timing->display_color_depth == COLOR_DEPTH_888);
+ enc401_stream_encoder_set_stream_attribute_helper(enc1, crtc_timing);
+}
+
+/* setup stream encoder in hdmi mode */
+void enc401_stream_encoder_hdmi_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ int actual_pix_clk_khz,
+ bool enable_audio)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ if (!enc->ctx->dc->debug.avoid_vbios_exec_table) {
+ struct bp_encoder_control cntl = {0};
+
+ cntl.action = ENCODER_CONTROL_SETUP;
+ cntl.engine_id = enc1->base.id;
+ cntl.signal = SIGNAL_TYPE_HDMI_TYPE_A;
+ cntl.enable_dp_audio = enable_audio;
+ cntl.pixel_clock = actual_pix_clk_khz;
+ cntl.lanes_number = LANE_COUNT_FOUR;
+
+ if (enc1->base.bp->funcs->encoder_control(
+ enc1->base.bp, &cntl) != BP_RESULT_OK)
+ return;
+
+ } else {
+
+ //Set pattern for clock channel, default vlue 0x63 does not work
+ REG_UPDATE(DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, 0x1F);
+
+ //DIG_BE_TMDS_HDMI_MODE : TMDS-HDMI mode is already set in link_encoder_setup
+
+ //DIG_SOURCE_SELECT is already set in dig_connect_to_otg
+
+ /* DIG_START is removed from the register spec */
+ }
+
+ /* Configure pixel encoding */
+ enc401_stream_encoder_set_stream_attribute_helper(enc1, crtc_timing);
+
+ /* setup HDMI engine */
+ REG_UPDATE_6(HDMI_CONTROL,
+ HDMI_PACKET_GEN_VERSION, 1,
+ HDMI_KEEPOUT_MODE, 1,
+ HDMI_DEEP_COLOR_ENABLE, 0,
+ HDMI_DATA_SCRAMBLE_EN, 0,
+ HDMI_NO_EXTRA_NULL_PACKET_FILLED, 1,
+ HDMI_CLOCK_CHANNEL_RATE, 0);
+
+ /* Configure color depth */
+ switch (crtc_timing->display_color_depth) {
+ case COLOR_DEPTH_888:
+ REG_UPDATE(HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 0);
+ break;
+ case COLOR_DEPTH_101010:
+ if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 1,
+ HDMI_DEEP_COLOR_ENABLE, 0);
+ } else {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 1,
+ HDMI_DEEP_COLOR_ENABLE, 1);
+ }
+ break;
+ case COLOR_DEPTH_121212:
+ if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 2,
+ HDMI_DEEP_COLOR_ENABLE, 0);
+ } else {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 2,
+ HDMI_DEEP_COLOR_ENABLE, 1);
+ }
+ break;
+ case COLOR_DEPTH_161616:
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 3,
+ HDMI_DEEP_COLOR_ENABLE, 1);
+ break;
+ default:
+ break;
+ }
+
+ if (actual_pix_clk_khz >= HDMI_CLOCK_CHANNEL_RATE_MORE_340M) {
+ /* enable HDMI data scrambler
+ * HDMI_CLOCK_CHANNEL_RATE_MORE_340M
+ * Clock channel frequency is 1/4 of character rate.
+ */
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DATA_SCRAMBLE_EN, 1,
+ HDMI_CLOCK_CHANNEL_RATE, 1);
+ } else if (crtc_timing->flags.LTE_340MCSC_SCRAMBLE) {
+
+ /* TODO: New feature for DCE11, still need to implement */
+
+ /* enable HDMI data scrambler
+ * HDMI_CLOCK_CHANNEL_FREQ_EQUAL_TO_CHAR_RATE
+ * Clock channel frequency is the same
+ * as character rate
+ */
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DATA_SCRAMBLE_EN, 1,
+ HDMI_CLOCK_CHANNEL_RATE, 0);
+ }
+
+
+ /* Enable transmission of General Control packet on every frame */
+ REG_UPDATE_3(HDMI_VBI_PACKET_CONTROL,
+ HDMI_GC_CONT, 1,
+ HDMI_GC_SEND, 1,
+ HDMI_NULL_SEND, 1);
+
+ /* Disable Audio Content Protection packet transmission */
+ REG_UPDATE(HDMI_VBI_PACKET_CONTROL, HDMI_ACP_SEND, 0);
+ /* following belongs to audio */
+ /* Enable Audio InfoFrame packet transmission. */
+ REG_UPDATE(HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1);
+
+ /* update double-buffered AUDIO_INFO registers immediately */
+ ASSERT(enc->afmt);
+ enc->afmt->funcs->audio_info_immediate_update(enc->afmt);
+
+ /* Select line number on which to send Audio InfoFrame packets */
+ REG_UPDATE(HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE,
+ VBI_LINE_0 + 2);
+
+ /* set HDMI GC AVMUTE */
+ REG_UPDATE(HDMI_GC, HDMI_GC_AVMUTE, 0);
+}
+
+void enc401_set_dig_input_mode(struct stream_encoder *enc, unsigned int pix_per_container)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ // The naming of this field is confusing, what it means is the output mode of otg, which
+ // is the input mode of the dig
+ switch (pix_per_container) {
+ case 2:
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_PER_CYCLE, 0x1);
+ break;
+ case 4:
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_PER_CYCLE, 0x2);
+ break;
+ case 8:
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_PER_CYCLE, 0x3);
+ break;
+ default:
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_PER_CYCLE, 0x0);
+ break;
+ }
+}
+
+static bool is_two_pixels_per_containter(const struct dc_crtc_timing *timing)
+{
+ bool two_pix = timing->pixel_encoding == PIXEL_ENCODING_YCBCR420;
+
+ two_pix = two_pix || (timing->flags.DSC && timing->pixel_encoding == PIXEL_ENCODING_YCBCR422
+ && !timing->dsc_cfg.ycbcr422_simple);
+ return two_pix;
+}
+
+void enc401_stream_encoder_dp_unblank(
+ struct dc_link *link,
+ struct stream_encoder *enc,
+ const struct encoder_unblank_param *param)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ if (param->link_settings.link_rate != LINK_RATE_UNKNOWN) {
+ uint32_t n_vid = 0x8000;
+ uint32_t m_vid;
+ uint32_t pix_per_container = 1;
+ uint64_t m_vid_l = n_vid;
+
+ /* YCbCr 4:2:0 or YCbCr4:2:2 simple + DSC: Computed VID_M will be 2X the input rate */
+ if (is_two_pixels_per_containter(&param->timing)) {
+ pix_per_container = 2;
+ }
+
+ /* M / N = Fstream / Flink
+ * m_vid / n_vid = pixel rate / link rate
+ */
+ m_vid_l *= param->timing.pix_clk_100hz / pix_per_container / 10;
+ m_vid_l = div_u64(m_vid_l,
+ param->link_settings.link_rate
+ * LINK_RATE_REF_FREQ_IN_KHZ);
+
+ m_vid = (uint32_t) m_vid_l;
+
+ /* enable auto measurement */
+
+ REG_UPDATE(DP_VID_TIMING, DP_VID_M_N_GEN_EN, 0);
+
+ /* auto measurement need 1 full 0x8000 symbol cycle to kick in,
+ * therefore program initial value for Mvid and Nvid
+ */
+
+ REG_UPDATE(DP_VID_N, DP_VID_N, n_vid);
+
+ REG_UPDATE(DP_VID_M, DP_VID_M, m_vid);
+
+ /* reduce jitter based on read rate */
+ switch (param->pix_per_cycle) {
+ case 2:
+ REG_UPDATE(DP_VID_TIMING, DP_VID_N_INTERVAL, 0x1);
+ break;
+ case 4:
+ REG_UPDATE(DP_VID_TIMING, DP_VID_N_INTERVAL, 0x2);
+ break;
+ case 8:
+ REG_UPDATE(DP_VID_TIMING, DP_VID_N_INTERVAL, 0x3);
+ break;
+ default:
+ REG_UPDATE(DP_VID_TIMING, DP_VID_N_INTERVAL, 0x0);
+ break;
+ }
+
+ REG_UPDATE(DP_VID_TIMING, DP_VID_M_N_GEN_EN, 1);
+ }
+
+ /* make sure stream is disabled before resetting steer fifo */
+ REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, false);
+ REG_WAIT(DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, 0, 10, 5000);
+
+ /* DIG_START is removed from the register spec */
+
+ /* switch DP encoder to CRTC data, but reset it the fifo first. It may happen
+ * that it overflows during mode transition, and sometimes doesn't recover.
+ */
+ REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 1);
+ udelay(10);
+
+ REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 0);
+
+ REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_ENABLE, 1);
+
+ REG_UPDATE_2(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, 1, DP_VID_STREAM_DIS_DEFER, 2);
+ udelay(200);
+
+ /* DIG Resync FIFO now needs to be explicitly enabled
+ */
+ /* read start level = 0 will bring underflow / overflow and DIG_FIFO_ERROR = 1
+ * so set it to 1/2 full = 7 before reset as suggested by hardware team.
+ */
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, 0x7);
+
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, 1);
+
+ REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, 1, 10, 5000);
+
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, 0);
+
+ REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, 0, 10, 5000);
+
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 1);
+
+ /* wait 100us for DIG/DP logic to prime
+ * (i.e. a few video lines)
+ */
+ udelay(100);
+
+ /* the hardware would start sending video at the start of the next DP
+ * frame (i.e. rising edge of the vblank).
+ * NOTE: We used to program DP_VID_STREAM_DIS_DEFER = 2 here, but this
+ * register has no effect on enable transition! HW always guarantees
+ * VID_STREAM enable at start of next frame, and this is not
+ * programmable
+ */
+
+ REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, true);
+
+ link->dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_ENABLE_DP_VID_STREAM);
+}
+
+/* this function read dsc related register fields to be logged later in dcn10_log_hw_state
+ * into a dcn_dsc_state struct.
+ */
+void enc401_read_state(struct stream_encoder *enc, struct enc_state *s)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ //if dsc is enabled, continue to read
+ REG_GET(DP_PIXEL_FORMAT, PIXEL_ENCODING_TYPE, &s->dsc_mode);
+
+ if (s->dsc_mode) {
+ REG_GET(DP_GSP11_CNTL, DP_SEC_GSP11_LINE_NUM, &s->sec_gsp_pps_line_num);
+
+ REG_GET(DP_MSA_VBID_MISC, DP_VBID6_LINE_REFERENCE, &s->vbid6_line_reference);
+ REG_GET(DP_MSA_VBID_MISC, DP_VBID6_LINE_NUM, &s->vbid6_line_num);
+
+ REG_GET(DP_GSP11_CNTL, DP_SEC_GSP11_ENABLE, &s->sec_gsp_pps_enable);
+ REG_GET(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, &s->sec_stream_enable);
+ }
+}
+
+void enc401_stream_encoder_enable(
+ struct stream_encoder *enc,
+ enum signal_type signal,
+ bool enable)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+ if (enable) {
+ switch (signal) {
+ case SIGNAL_TYPE_DVI_SINGLE_LINK:
+ case SIGNAL_TYPE_DVI_DUAL_LINK:
+ /* TMDS-DVI */
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_MODE, 2);
+ break;
+ case SIGNAL_TYPE_HDMI_TYPE_A:
+ /* TMDS-HDMI */
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_MODE, 3);
+ break;
+ case SIGNAL_TYPE_DISPLAY_PORT_MST:
+ /* DP MST */
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_MODE, 5);
+ break;
+ case SIGNAL_TYPE_EDP:
+ case SIGNAL_TYPE_DISPLAY_PORT:
+ case SIGNAL_TYPE_VIRTUAL:
+ /* DP SST */
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_MODE, 0);
+ break;
+ default:
+ /* invalid mode ! */
+ ASSERT_CRITICAL(false);
+ }
+
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 1);
+ REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 1);
+ } else {
+ REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 0);
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 0);
+ }
+}
+
+void enc401_stream_encoder_dp_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ enum dc_color_space output_color_space,
+ bool use_vsc_sdp_for_colorimetry,
+ uint32_t enable_sdp_splitting)
+{
+ uint32_t h_active_start;
+ uint32_t v_active_start;
+ uint32_t misc0 = 0;
+ uint32_t misc1 = 0;
+ uint32_t h_blank;
+ uint32_t h_back_porch;
+ uint8_t colorimetry_bpc;
+ uint8_t dp_pixel_encoding = 0;
+ uint8_t dp_component_depth = 0;
+ uint8_t dp_translate_pixel_enc = 0;
+ // Fix set but not used warnings
+ //uint8_t dp_pixel_encoding_type = 0;
+ uint8_t dp_compressed_pixel_format = 0;
+
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+ struct dc_crtc_timing hw_crtc_timing = *crtc_timing;
+
+ if (hw_crtc_timing.flags.INTERLACE) {
+ /*the input timing is in VESA spec format with Interlace flag =1*/
+ hw_crtc_timing.v_total /= 2;
+ hw_crtc_timing.v_border_top /= 2;
+ hw_crtc_timing.v_addressable /= 2;
+ hw_crtc_timing.v_border_bottom /= 2;
+ hw_crtc_timing.v_front_porch /= 2;
+ hw_crtc_timing.v_sync_width /= 2;
+ }
+
+
+ /* set pixel encoding */
+ switch (hw_crtc_timing.pixel_encoding) {
+ case PIXEL_ENCODING_YCBCR422:
+ dp_pixel_encoding = DP_PIXEL_ENCODING_TYPE_YCBCR422;
+ break;
+ case PIXEL_ENCODING_YCBCR444:
+ dp_pixel_encoding = DP_PIXEL_ENCODING_TYPE_YCBCR444;
+
+ if (hw_crtc_timing.flags.Y_ONLY)
+ if (hw_crtc_timing.display_color_depth != COLOR_DEPTH_666)
+ /* HW testing only, no use case yet.
+ * Color depth of Y-only could be
+ * 8, 10, 12, 16 bits
+ */
+ dp_pixel_encoding = DP_PIXEL_ENCODING_TYPE_Y_ONLY;
+
+ /* Note: DP_MSA_MISC1 bit 7 is the indicator
+ * of Y-only mode.
+ * This bit is set in HW if register
+ * DP_PIXEL_ENCODING is programmed to 0x4
+ */
+ break;
+ case PIXEL_ENCODING_YCBCR420:
+ dp_pixel_encoding = DP_PIXEL_ENCODING_TYPE_YCBCR420;
+ break;
+ default:
+ dp_pixel_encoding = DP_PIXEL_ENCODING_TYPE_RGB444;
+ break;
+ }
+
+ misc1 = REG_READ(DP_MSA_MISC);
+ /* For YCbCr420 and BT2020 Colorimetry Formats, VSC SDP shall be used.
+ * When MISC1, bit 6, is Set to 1, a Source device uses a VSC SDP to indicate the
+ * Pixel Encoding/Colorimetry Format and that a Sink device shall ignore MISC1, bit 7,
+ * and MISC0, bits 7:1 (MISC1, bit 7, and MISC0, bits 7:1, become "don't care").
+ */
+ if (use_vsc_sdp_for_colorimetry)
+ misc1 = misc1 | 0x40;
+ else
+ misc1 = misc1 & ~0x40;
+
+ /* set color depth */
+ switch (hw_crtc_timing.display_color_depth) {
+ case COLOR_DEPTH_666:
+ dp_component_depth = DP_COMPONENT_PIXEL_DEPTH_6BPC;
+ break;
+ case COLOR_DEPTH_888:
+ dp_component_depth = DP_COMPONENT_PIXEL_DEPTH_8BPC;
+ break;
+ case COLOR_DEPTH_101010:
+ dp_component_depth = DP_COMPONENT_PIXEL_DEPTH_10BPC;
+ break;
+ case COLOR_DEPTH_121212:
+ dp_component_depth = DP_COMPONENT_PIXEL_DEPTH_12BPC;
+ break;
+ case COLOR_DEPTH_161616:
+ dp_component_depth = DP_COMPONENT_PIXEL_DEPTH_16BPC;
+ break;
+ default:
+ dp_component_depth = DP_COMPONENT_PIXEL_DEPTH_6BPC;
+ break;
+ }
+
+ if (hw_crtc_timing.flags.DSC) {
+ // Fix set but not used error
+ //dp_pixel_encoding_type = 1;
+ switch (hw_crtc_timing.pixel_encoding) {
+ case PIXEL_ENCODING_YCBCR444:
+ dp_compressed_pixel_format = 0;
+ break;
+ case PIXEL_ENCODING_YCBCR422:
+ dp_compressed_pixel_format = 1;
+ if (hw_crtc_timing.dsc_cfg.ycbcr422_simple)
+ dp_compressed_pixel_format = 0;
+ break;
+ case PIXEL_ENCODING_YCBCR420:
+ dp_compressed_pixel_format = 1;
+ break;
+ default:
+ dp_compressed_pixel_format = 0;
+ break;
+ }
+ } else {
+ // Fix set but not used error
+ //dp_pixel_encoding_type = 0;
+ switch (dp_pixel_encoding) {
+ case DP_PIXEL_ENCODING_TYPE_RGB444:
+ dp_translate_pixel_enc = 0;
+ break;
+ case DP_PIXEL_ENCODING_TYPE_YCBCR422:
+ dp_translate_pixel_enc = 1;
+ break;
+ case DP_PIXEL_ENCODING_TYPE_YCBCR444:
+ dp_translate_pixel_enc = 0;
+ break;
+ case DP_PIXEL_ENCODING_TYPE_Y_ONLY:
+ dp_translate_pixel_enc = 3;
+ break;
+ case DP_PIXEL_ENCODING_TYPE_YCBCR420:
+ dp_translate_pixel_enc = 2;
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+ }
+ /* Set DP pixel encoding and component depth */
+ REG_UPDATE_4(DP_PIXEL_FORMAT,
+ PIXEL_ENCODING_TYPE, hw_crtc_timing.flags.DSC ? 1 : 0,
+ UNCOMPRESSED_PIXEL_FORMAT, dp_translate_pixel_enc,
+ UNCOMPRESSED_COMPONENT_DEPTH, dp_component_depth,
+ COMPRESSED_PIXEL_FORMAT, dp_compressed_pixel_format);
+
+ /* set dynamic range and YCbCr range */
+
+ switch (hw_crtc_timing.display_color_depth) {
+ case COLOR_DEPTH_666:
+ colorimetry_bpc = 0;
+ break;
+ case COLOR_DEPTH_888:
+ colorimetry_bpc = 1;
+ break;
+ case COLOR_DEPTH_101010:
+ colorimetry_bpc = 2;
+ break;
+ case COLOR_DEPTH_121212:
+ colorimetry_bpc = 3;
+ break;
+ default:
+ colorimetry_bpc = 0;
+ break;
+ }
+
+ misc0 = colorimetry_bpc << 5;
+
+ switch (output_color_space) {
+ case COLOR_SPACE_SRGB:
+ misc1 = misc1 & ~0x80; /* bit7 = 0*/
+ break;
+ case COLOR_SPACE_SRGB_LIMITED:
+ misc0 = misc0 | 0x8; /* bit3=1 */
+ misc1 = misc1 & ~0x80; /* bit7 = 0*/
+ break;
+ case COLOR_SPACE_YCBCR601:
+ case COLOR_SPACE_YCBCR601_LIMITED:
+ misc0 = misc0 | 0x8; /* bit3=1, bit4=0 */
+ misc1 = misc1 & ~0x80; /* bit7 = 0*/
+ if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR422)
+ misc0 = misc0 | 0x2; /* bit2=0, bit1=1 */
+ else if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR444)
+ misc0 = misc0 | 0x4; /* bit2=1, bit1=0 */
+ break;
+ case COLOR_SPACE_YCBCR709:
+ case COLOR_SPACE_YCBCR709_LIMITED:
+ misc0 = misc0 | 0x18; /* bit3=1, bit4=1 */
+ misc1 = misc1 & ~0x80; /* bit7 = 0*/
+ if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR422)
+ misc0 = misc0 | 0x2; /* bit2=0, bit1=1 */
+ else if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR444)
+ misc0 = misc0 | 0x4; /* bit2=1, bit1=0 */
+ break;
+ case COLOR_SPACE_2020_RGB_LIMITEDRANGE:
+ case COLOR_SPACE_2020_RGB_FULLRANGE:
+ case COLOR_SPACE_2020_YCBCR_LIMITED:
+ case COLOR_SPACE_XR_RGB:
+ case COLOR_SPACE_MSREF_SCRGB:
+ case COLOR_SPACE_ADOBERGB:
+ case COLOR_SPACE_DCIP3:
+ case COLOR_SPACE_XV_YCC_709:
+ case COLOR_SPACE_XV_YCC_601:
+ case COLOR_SPACE_DISPLAYNATIVE:
+ case COLOR_SPACE_DOLBYVISION:
+ case COLOR_SPACE_APPCTRL:
+ case COLOR_SPACE_CUSTOMPOINTS:
+ case COLOR_SPACE_UNKNOWN:
+ case COLOR_SPACE_YCBCR709_BLACK:
+ default:
+ /* do nothing */
+ break;
+ }
+
+ REG_SET(DP_MSA_COLORIMETRY, 0, DP_MSA_MISC0, misc0);
+ REG_WRITE(DP_MSA_MISC, misc1); /* MSA_MISC1 */
+
+ /* dcn new register
+ * dc_crtc_timing is vesa dmt struct. data from edid
+ */
+ REG_SET_2(DP_MSA_TIMING_PARAM1, 0,
+ DP_MSA_HTOTAL, hw_crtc_timing.h_total,
+ DP_MSA_VTOTAL, hw_crtc_timing.v_total);
+
+ /* calculate from vesa timing parameters
+ * h_active_start related to leading edge of sync
+ */
+
+ h_blank = hw_crtc_timing.h_total - hw_crtc_timing.h_border_left -
+ hw_crtc_timing.h_addressable - hw_crtc_timing.h_border_right;
+
+ h_back_porch = h_blank - hw_crtc_timing.h_front_porch -
+ hw_crtc_timing.h_sync_width;
+
+ /* start at beginning of left border */
+ h_active_start = hw_crtc_timing.h_sync_width + h_back_porch;
+
+
+ v_active_start = hw_crtc_timing.v_total - hw_crtc_timing.v_border_top -
+ hw_crtc_timing.v_addressable - hw_crtc_timing.v_border_bottom -
+ hw_crtc_timing.v_front_porch;
+
+
+ /* start at beginning of left border */
+ REG_SET_2(DP_MSA_TIMING_PARAM2, 0,
+ DP_MSA_HSTART, h_active_start,
+ DP_MSA_VSTART, v_active_start);
+
+ REG_SET_4(DP_MSA_TIMING_PARAM3, 0,
+ DP_MSA_HSYNCWIDTH,
+ hw_crtc_timing.h_sync_width,
+ DP_MSA_HSYNCPOLARITY,
+ !hw_crtc_timing.flags.HSYNC_POSITIVE_POLARITY,
+ DP_MSA_VSYNCWIDTH,
+ hw_crtc_timing.v_sync_width,
+ DP_MSA_VSYNCPOLARITY,
+ !hw_crtc_timing.flags.VSYNC_POSITIVE_POLARITY);
+
+ /* HWDITH include border or overscan */
+ REG_SET_2(DP_MSA_TIMING_PARAM4, 0,
+ DP_MSA_HWIDTH, hw_crtc_timing.h_border_left +
+ hw_crtc_timing.h_addressable + hw_crtc_timing.h_border_right,
+ DP_MSA_VHEIGHT, hw_crtc_timing.v_border_top +
+ hw_crtc_timing.v_addressable + hw_crtc_timing.v_border_bottom);
+
+ REG_UPDATE(DP_SEC_FRAMING4,
+ DP_SST_SDP_SPLITTING, enable_sdp_splitting);
+}
+
+void enc401_stream_encoder_map_to_link(
+ struct stream_encoder *enc,
+ uint32_t stream_enc_inst,
+ uint32_t link_enc_inst)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ REG_UPDATE(STREAM_MAPPER_CONTROL,
+ DIG_STREAM_LINK_TARGET, link_enc_inst);
+}
+
+static const struct stream_encoder_funcs dcn401_str_enc_funcs = {
+ .dp_set_odm_combine =
+ enc401_dp_set_odm_combine,
+ .dp_set_stream_attribute =
+ enc401_stream_encoder_dp_set_stream_attribute,
+ .hdmi_set_stream_attribute =
+ enc401_stream_encoder_hdmi_set_stream_attribute,
+ .dvi_set_stream_attribute =
+ enc401_stream_encoder_dvi_set_stream_attribute,
+ .set_throttled_vcp_size =
+ enc1_stream_encoder_set_throttled_vcp_size,
+ .update_hdmi_info_packets =
+ enc3_stream_encoder_update_hdmi_info_packets,
+ .stop_hdmi_info_packets =
+ enc3_stream_encoder_stop_hdmi_info_packets,
+ .update_dp_info_packets_sdp_line_num =
+ enc3_stream_encoder_update_dp_info_packets_sdp_line_num,
+ .update_dp_info_packets =
+ enc3_stream_encoder_update_dp_info_packets,
+ .stop_dp_info_packets =
+ enc1_stream_encoder_stop_dp_info_packets,
+ .dp_blank =
+ enc1_stream_encoder_dp_blank,
+ .dp_unblank =
+ enc401_stream_encoder_dp_unblank,
+ .audio_mute_control = enc3_audio_mute_control,
+
+ .dp_audio_setup = enc3_se_dp_audio_setup,
+ .dp_audio_enable = enc3_se_dp_audio_enable,
+ .dp_audio_disable = enc1_se_dp_audio_disable,
+
+ .hdmi_audio_setup = enc3_se_hdmi_audio_setup,
+ .hdmi_audio_disable = enc1_se_hdmi_audio_disable,
+ .setup_stereo_sync = enc1_setup_stereo_sync,
+ .set_avmute = enc1_stream_encoder_set_avmute,
+ .dig_connect_to_otg = enc1_dig_connect_to_otg,
+ .dig_source_otg = enc1_dig_source_otg,
+
+ .dp_get_pixel_format = enc1_stream_encoder_dp_get_pixel_format,
+
+ .enc_read_state = enc401_read_state,
+ .dp_set_dsc_config = NULL,
+ .dp_set_dsc_pps_info_packet = enc3_dp_set_dsc_pps_info_packet,
+ .set_dynamic_metadata = enc401_set_dynamic_metadata,
+ .hdmi_reset_stream_attribute = enc1_reset_hdmi_stream_attribute,
+ .enable_stream = enc401_stream_encoder_enable,
+
+ .set_input_mode = enc401_set_dig_input_mode,
+ .enable_fifo = enc35_enable_fifo,
+ .disable_fifo = enc35_disable_fifo,
+ .map_stream_to_link = enc401_stream_encoder_map_to_link,
+};
+
+void dcn401_dio_stream_encoder_construct(
+ struct dcn10_stream_encoder *enc1,
+ struct dc_context *ctx,
+ struct dc_bios *bp,
+ enum engine_id eng_id,
+ struct vpg *vpg,
+ struct afmt *afmt,
+ const struct dcn10_stream_enc_registers *regs,
+ const struct dcn10_stream_encoder_shift *se_shift,
+ const struct dcn10_stream_encoder_mask *se_mask)
+{
+ enc1->base.funcs = &dcn401_str_enc_funcs;
+ enc1->base.ctx = ctx;
+ enc1->base.id = eng_id;
+ enc1->base.bp = bp;
+ enc1->base.vpg = vpg;
+ enc1->base.afmt = afmt;
+ enc1->regs = regs;
+ enc1->se_shift = se_shift;
+ enc1->se_mask = se_mask;
+ enc1->base.stream_enc_inst = vpg->inst;
+}
+
+void enc401_set_dynamic_metadata(struct stream_encoder *enc,
+ bool enable_dme,
+ uint32_t hubp_requestor_id,
+ enum dynamic_metadata_mode dmdata_mode)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ if (enable_dme) {
+ REG_UPDATE_2(DME_CONTROL,
+ METADATA_HUBP_REQUESTOR_ID, hubp_requestor_id,
+ METADATA_STREAM_TYPE, (dmdata_mode == dmdata_dolby_vision) ? 1 : 0);
+
+ /* Use default line reference DP_SOF for bringup.
+ * Should use OTG_SOF for DRR cases
+ */
+ if (dmdata_mode == dmdata_dp)
+ REG_UPDATE_3(DP_SEC_METADATA_TRANSMISSION,
+ DP_SEC_METADATA_PACKET_ENABLE, 1,
+ DP_SEC_METADATA_PACKET_LINE_REFERENCE, 0,
+ DP_SEC_METADATA_PACKET_LINE, 20);
+ else {
+ REG_UPDATE_3(HDMI_METADATA_PACKET_CONTROL,
+ HDMI_METADATA_PACKET_ENABLE, 1,
+ HDMI_METADATA_PACKET_LINE_REFERENCE, 0,
+ HDMI_METADATA_PACKET_LINE, 2);
+
+ if (dmdata_mode == dmdata_dolby_vision)
+ REG_UPDATE(HDMI_CONTROL,
+ DOLBY_VISION_EN, 1);
+ }
+
+ REG_UPDATE(DME_CONTROL,
+ METADATA_ENGINE_EN, 1);
+ } else {
+ REG_UPDATE(DME_CONTROL,
+ METADATA_ENGINE_EN, 0);
+
+ if (dmdata_mode == dmdata_dp)
+ REG_UPDATE(DP_SEC_METADATA_TRANSMISSION,
+ DP_SEC_METADATA_PACKET_ENABLE, 0);
+ else {
+ REG_UPDATE(HDMI_METADATA_PACKET_CONTROL,
+ HDMI_METADATA_PACKET_ENABLE, 0);
+ REG_UPDATE(HDMI_CONTROL,
+ DOLBY_VISION_EN, 0);
+ }
+ }
+}
+void enc401_stream_encoder_set_stream_attribute_helper(
+ struct dcn10_stream_encoder *enc1,
+ struct dc_crtc_timing *crtc_timing)
+{
+ switch (crtc_timing->pixel_encoding) {
+ case PIXEL_ENCODING_YCBCR422:
+ REG_UPDATE(HDMI_CONTROL, TMDS_PIXEL_ENCODING, 1);
+ break;
+ default:
+ REG_UPDATE(HDMI_CONTROL, TMDS_PIXEL_ENCODING, 0);
+ break;
+ }
+ REG_UPDATE(HDMI_CONTROL, TMDS_COLOR_FORMAT, 0);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.h
new file mode 100644
index 000000000000..d6b00cd246b1
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.h
@@ -0,0 +1,240 @@
+/*
+ * Copyright 2021 - Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_DIO_STREAM_ENCODER_DCN401_H__
+#define __DC_DIO_STREAM_ENCODER_DCN401_H__
+
+#include "dcn30/dcn30_vpg.h"
+#include "dcn30/dcn30_afmt.h"
+#include "stream_encoder.h"
+#include "dcn20/dcn20_stream_encoder.h"
+
+#define SE_COMMON_MASK_SH_LIST_DCN401(mask_sh)\
+ SE_SF(DP0_DP_PIXEL_FORMAT, PIXEL_ENCODING_TYPE, mask_sh),\
+ SE_SF(DP0_DP_PIXEL_FORMAT, UNCOMPRESSED_PIXEL_FORMAT, mask_sh),\
+ SE_SF(DP0_DP_PIXEL_FORMAT, UNCOMPRESSED_COMPONENT_DEPTH, mask_sh),\
+ SE_SF(DP0_DP_PIXEL_FORMAT, COMPRESSED_PIXEL_FORMAT, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_PACKET_GEN_VERSION, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_KEEPOUT_MODE, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_DATA_SCRAMBLE_EN, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_NO_EXTRA_NULL_PACKET_FILLED, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_ACP_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GC, HDMI_GC_AVMUTE, mask_sh),\
+ SE_SF(DP0_DP_MSE_RATE_CNTL, DP_MSE_RATE_X, mask_sh),\
+ SE_SF(DP0_DP_MSE_RATE_CNTL, DP_MSE_RATE_Y, mask_sh),\
+ SE_SF(DP0_DP_MSE_RATE_UPDATE, DP_MSE_RATE_UPDATE_PENDING, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP0_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP1_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_MPG_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL1, DP_SEC_GSP5_LINE_REFERENCE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND_PENDING, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL4, DP_SEC_GSP4_LINE_NUM, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL5, DP_SEC_GSP5_LINE_NUM, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND_ANY_LINE, mask_sh),\
+ SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_DIS_DEFER, mask_sh),\
+ SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, mask_sh),\
+ SE_SF(DP0_DP_STEER_FIFO, DP_STEER_FIFO_RESET, mask_sh),\
+ SE_SF(DP0_DP_STEER_FIFO, DP_STEER_FIFO_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_VID_TIMING, DP_VID_M_N_GEN_EN, mask_sh),\
+ SE_SF(DP0_DP_VID_N, DP_VID_N, mask_sh),\
+ SE_SF(DP0_DP_VID_M, DP_VID_M, mask_sh),\
+ SE_SF(DIG0_HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUDIO_PRIORITY, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_32_0, HDMI_ACR_CTS_32, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_32_1, HDMI_ACR_N_32, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_44_0, HDMI_ACR_CTS_44, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_44_1, HDMI_ACR_N_44, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_48_0, HDMI_ACR_CTS_48, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_48_1, HDMI_ACR_N_48, mask_sh),\
+ SE_SF(DP0_DP_SEC_AUD_N, DP_SEC_AUD_N, mask_sh),\
+ SE_SF(DP0_DP_SEC_TIMESTAMP, DP_SEC_TIMESTAMP_MODE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ASP_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ATP_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_AIP_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ACM_ENABLE, mask_sh),\
+ SE_SF(DIG0_AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_CLOCK_CHANNEL_RATE, mask_sh),\
+ SE_SF(DIG1_HDMI_CONTROL, TMDS_PIXEL_ENCODING, mask_sh),\
+ SE_SF(DIG1_HDMI_CONTROL, TMDS_COLOR_FORMAT, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_SELECT, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_GATE_EN, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP4_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP5_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP6_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP7_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP7_SEND, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL6, DP_SEC_GSP7_LINE_NUM, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP11_PPS, mask_sh),\
+ SE_SF(DP0_DP_GSP11_CNTL, DP_SEC_GSP11_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_GSP11_CNTL, DP_SEC_GSP11_LINE_NUM, mask_sh),\
+ SE_SF(DP0_DP_DB_CNTL, DP_DB_DISABLE, mask_sh),\
+ SE_SF(DP0_DP_MSA_COLORIMETRY, DP_MSA_MISC0, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM1, DP_MSA_HTOTAL, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM1, DP_MSA_VTOTAL, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM2, DP_MSA_HSTART, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM2, DP_MSA_VSTART, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_HSYNCWIDTH, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_HSYNCPOLARITY, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_VSYNCWIDTH, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_VSYNCPOLARITY, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM4, DP_MSA_HWIDTH, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM4, DP_MSA_VHEIGHT, mask_sh),\
+ SE_SF(DIG0_HDMI_DB_CONTROL, HDMI_DB_DISABLE, mask_sh),\
+ SE_SF(DP0_DP_VID_TIMING, DP_VID_N_INTERVAL, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, DIG_SOURCE_SELECT, mask_sh), \
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC0_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC0_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC1_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC1_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC2_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC2_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC3_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC3_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC4_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC4_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC5_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC5_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC6_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC6_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC7_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC7_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC8_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC8_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC9_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC9_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC10_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC10_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC11_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC11_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC12_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC12_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC13_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC13_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC14_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC14_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL1, HDMI_GENERIC0_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL1, HDMI_GENERIC1_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL2, HDMI_GENERIC2_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL2, HDMI_GENERIC3_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL3, HDMI_GENERIC4_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL3, HDMI_GENERIC5_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL4, HDMI_GENERIC6_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL4, HDMI_GENERIC7_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL7, HDMI_GENERIC8_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL7, HDMI_GENERIC9_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL8, HDMI_GENERIC10_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL8, HDMI_GENERIC11_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL9, HDMI_GENERIC12_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL9, HDMI_GENERIC13_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL10, HDMI_GENERIC14_LINE, mask_sh),\
+ SE_SF(DP0_DP_MSA_VBID_MISC, DP_VBID6_LINE_REFERENCE, mask_sh),\
+ SE_SF(DP0_DP_MSA_VBID_MISC, DP_VBID6_LINE_NUM, mask_sh),\
+ SE_SF(DME0_DME_CONTROL, METADATA_ENGINE_EN, mask_sh),\
+ SE_SF(DME0_DME_CONTROL, METADATA_HUBP_REQUESTOR_ID, mask_sh),\
+ SE_SF(DME0_DME_CONTROL, METADATA_STREAM_TYPE, mask_sh),\
+ SE_SF(DP0_DP_SEC_METADATA_TRANSMISSION, DP_SEC_METADATA_PACKET_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_METADATA_TRANSMISSION, DP_SEC_METADATA_PACKET_LINE_REFERENCE, mask_sh),\
+ SE_SF(DP0_DP_SEC_METADATA_TRANSMISSION, DP_SEC_METADATA_PACKET_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_METADATA_PACKET_CONTROL, HDMI_METADATA_PACKET_ENABLE, mask_sh),\
+ SE_SF(DIG0_HDMI_METADATA_PACKET_CONTROL, HDMI_METADATA_PACKET_LINE_REFERENCE, mask_sh),\
+ SE_SF(DIG0_HDMI_METADATA_PACKET_CONTROL, HDMI_METADATA_PACKET_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, DOLBY_VISION_EN, mask_sh),\
+ SE_SF(DIG0_DIG_FE_EN_CNTL, DIG_FE_ENABLE, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CLK_CNTL, DIG_FE_MODE, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CLK_CNTL, DIG_FE_SOFT_RESET, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_GATE_EN, mask_sh),\
+ SE_SF(DP0_DP_SEC_FRAMING4, DP_SST_SDP_SPLITTING, mask_sh),\
+ SE_SF(DIG0_DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_PER_CYCLE, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_RESET, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, mask_sh),\
+ SE_SF(DIG0_STREAM_MAPPER_CONTROL, DIG_STREAM_LINK_TARGET, mask_sh),
+
+
+void dcn401_dio_stream_encoder_construct(
+ struct dcn10_stream_encoder *enc1,
+ struct dc_context *ctx,
+ struct dc_bios *bp,
+ enum engine_id eng_id,
+ struct vpg *vpg,
+ struct afmt *afmt,
+ const struct dcn10_stream_enc_registers *regs,
+ const struct dcn10_stream_encoder_shift *se_shift,
+ const struct dcn10_stream_encoder_mask *se_mask);
+
+void enc401_set_dynamic_metadata(struct stream_encoder *enc,
+ bool enable_dme,
+ uint32_t hubp_requestor_id,
+ enum dynamic_metadata_mode dmdata_mode);
+void enc401_stream_encoder_set_stream_attribute_helper(
+ struct dcn10_stream_encoder *enc1,
+ struct dc_crtc_timing *crtc_timing);
+void enc401_stream_encoder_dp_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ enum dc_color_space output_color_space,
+ bool use_vsc_sdp_for_colorimetry,
+ uint32_t enable_sdp_splitting);
+void enc401_stream_encoder_dvi_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ bool is_dual_link);
+void enc401_stream_encoder_dp_unblank(
+ struct dc_link *link,
+ struct stream_encoder *enc,
+ const struct encoder_unblank_param *param);
+void enc401_stream_encoder_enable(
+ struct stream_encoder *enc,
+ enum signal_type signal,
+ bool enable);
+void enc401_set_dig_input_mode(struct stream_encoder *enc, unsigned int pix_per_container);
+void enc401_stream_encoder_map_to_link(
+ struct stream_encoder *enc,
+ uint32_t stream_enc_inst,
+ uint32_t link_enc_inst);
+void enc401_read_state(struct stream_encoder *enc, struct enc_state *s);
+void enc401_stream_encoder_hdmi_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ int actual_pix_clk_khz,
+ bool enable_audio);
+#endif /* __DC_DIO_STREAM_ENCODER_DCN401_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dm_cp_psp.h b/drivers/gpu/drm/amd/display/dc/dm_cp_psp.h
index 511f9e1159c7..f4d3f04ec857 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_cp_psp.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_cp_psp.h
@@ -26,6 +26,9 @@
#ifndef DM_CP_PSP_IF__H
#define DM_CP_PSP_IF__H
+/*
+ * Interface to CPLIB/PSP to enable ASSR
+ */
struct dc_link;
struct cp_psp_stream_config {
@@ -34,12 +37,12 @@ struct cp_psp_stream_config {
uint8_t dig_fe;
uint8_t link_enc_idx;
uint8_t stream_enc_idx;
- uint8_t phy_idx;
uint8_t dio_output_idx;
- uint8_t dio_output_type;
+ uint8_t phy_idx;
uint8_t assr_enabled;
uint8_t mst_enabled;
uint8_t dp2_enabled;
+ uint8_t usb4_enabled;
void *dm_stream_ctx;
bool dpms_off;
};
diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h
index 0fe66b080a03..9d160b39e8c5 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h
@@ -33,9 +33,10 @@
#include "dc_types.h"
#include "dc.h"
-struct dp_mst_stream_allocation_table;
+struct dc_dp_mst_stream_allocation_table;
struct aux_payload;
enum aux_return_code_type;
+enum set_config_status;
/*
* Allocate memory accessible by the GPU
@@ -59,7 +60,7 @@ void dm_helpers_free_gpu_mem(
void *pvMem);
enum dc_edid_status dm_helpers_parse_edid_caps(
- struct dc_context *ctx,
+ struct dc_link *link,
const struct dc_edid *edid,
struct dc_edid_caps *edid_caps);
@@ -77,7 +78,7 @@ void dm_helpers_dp_update_branch_info(
bool dm_helpers_dp_mst_write_payload_allocation_table(
struct dc_context *ctx,
const struct dc_stream_state *stream,
- struct dp_mst_stream_allocation_table *proposed_table,
+ struct dc_dp_mst_stream_allocation_table *proposed_table,
bool enable);
/*
@@ -103,19 +104,30 @@ enum act_return_status dm_helpers_dp_mst_poll_for_allocation_change_trigger(
/*
* Sends ALLOCATE_PAYLOAD message.
*/
-bool dm_helpers_dp_mst_send_payload_allocation(
+void dm_helpers_dp_mst_send_payload_allocation(
struct dc_context *ctx,
- const struct dc_stream_state *stream,
- bool enable);
+ const struct dc_stream_state *stream);
+
+/*
+ * Update mst manager relevant variables
+ */
+void dm_helpers_dp_mst_update_mst_mgr_for_deallocation(
+ struct dc_context *ctx,
+ const struct dc_stream_state *stream);
bool dm_helpers_dp_mst_start_top_mgr(
struct dc_context *ctx,
const struct dc_link *link,
bool boot);
-void dm_helpers_dp_mst_stop_top_mgr(
+bool dm_helpers_dp_mst_stop_top_mgr(
+ struct dc_context *ctx,
+ struct dc_link *link);
+
+void dm_helpers_dp_mst_update_branch_bandwidth(
struct dc_context *ctx,
struct dc_link *link);
+
/**
* OS specific aux read callback.
*/
@@ -141,11 +153,24 @@ bool dm_helpers_submit_i2c(
const struct dc_link *link,
struct i2c_command *cmd);
+bool dm_helpers_execute_fused_io(
+ struct dc_context *ctx,
+ struct dc_link *link,
+ union dmub_rb_cmd *commands,
+ uint8_t count,
+ uint32_t timeout_us
+);
+
bool dm_helpers_dp_write_dsc_enable(
struct dc_context *ctx,
const struct dc_stream_state *stream,
bool enable
);
+
+bool dm_helpers_dp_write_hblank_reduction(
+ struct dc_context *ctx,
+ const struct dc_stream_state *stream);
+
bool dm_helpers_is_dp_sink_present(
struct dc_link *link);
@@ -156,13 +181,17 @@ enum dc_edid_status dm_helpers_read_local_edid(
struct dc_link *link,
struct dc_sink *sink);
+bool dm_helpers_dp_handle_test_pattern_request(
+ struct dc_context *ctx,
+ const struct dc_link *link,
+ union link_test_pattern dpcd_test_pattern,
+ union test_misc dpcd_test_params);
+
void dm_set_dcn_clocks(
struct dc_context *ctx,
struct dc_clocks *clks);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
void dm_helpers_enable_periodic_detection(struct dc_context *ctx, bool enable);
-#endif
void dm_set_phyd32clk(struct dc_context *ctx, int freq_khz);
@@ -170,18 +199,31 @@ bool dm_helpers_dmub_outbox_interrupt_control(struct dc_context *ctx, bool enabl
void dm_helpers_smu_timeout(struct dc_context *ctx, unsigned int msg_id, unsigned int param, unsigned int timeout_us);
-// 0x1 = Result_OK, 0xFE = Result_UnkmownCmd
+// 0x1 = Result_OK, 0xFE = Result_UnkmownCmd, 0x0 = Status_Busy
#define IS_SMU_TIMEOUT(result) \
- (!(result == 0x1 || result == 0xFE))
-
+ (result == 0x0)
+void dm_helpers_init_panel_settings(
+ struct dc_context *ctx,
+ struct dc_panel_config *config,
+ struct dc_sink *sink);
+void dm_helpers_override_panel_settings(
+ struct dc_context *ctx,
+ struct dc_panel_config *config);
int dm_helper_dmub_aux_transfer_sync(
struct dc_context *ctx,
const struct dc_link *link,
struct aux_payload *payload,
enum aux_return_code_type *operation_result);
-enum set_config_status;
+
int dm_helpers_dmub_set_config_sync(struct dc_context *ctx,
const struct dc_link *link,
struct set_config_cmd_payload *payload,
enum set_config_status *operation_result);
+enum adaptive_sync_type dm_get_adaptive_sync_support_type(struct dc_link *link);
+
+enum dc_edid_status dm_helpers_get_sbios_edid(struct dc_link *link, struct dc_edid *edid);
+
+bool dm_helpers_is_fullscreen(struct dc_context *ctx, struct dc_stream_state *stream);
+bool dm_helpers_is_hdr_on(struct dc_context *ctx, struct dc_stream_state *stream);
+
#endif /* __DM_HELPERS__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
index 4440d08743aa..b0e17a594ec3 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
@@ -40,8 +40,9 @@ enum pp_smu_ver {
PP_SMU_UNSUPPORTED,
PP_SMU_VER_RV,
PP_SMU_VER_NV,
- PP_SMU_VER_RN,
+ PP_SMU_VER_RN,
+ PP_SMU_VER_VG,
PP_SMU_VER_MAX
};
@@ -247,6 +248,7 @@ struct pp_smu_funcs_nv {
#define PP_SMU_NUM_MEMCLK_DPM_LEVELS 4
#define PP_SMU_NUM_DCLK_DPM_LEVELS 8
#define PP_SMU_NUM_VCLK_DPM_LEVELS 8
+#define PP_SMU_NUM_VPECLK_DPM_LEVELS 8
struct dpm_clock {
uint32_t Freq; // In MHz
@@ -262,6 +264,7 @@ struct dpm_clocks {
struct dpm_clock MemClocks[PP_SMU_NUM_MEMCLK_DPM_LEVELS];
struct dpm_clock VClocks[PP_SMU_NUM_VCLK_DPM_LEVELS];
struct dpm_clock DClocks[PP_SMU_NUM_DCLK_DPM_LEVELS];
+ struct dpm_clock VPEClocks[PP_SMU_NUM_VPECLK_DPM_LEVELS];
};
diff --git a/drivers/gpu/drm/amd/display/dc/dm_services.h b/drivers/gpu/drm/amd/display/dc/dm_services.h
index 9a3f2a44f882..fbbf9c757b3c 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_services.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_services.h
@@ -40,6 +40,7 @@
struct dmub_srv;
struct dc_dmub_srv;
+union dmub_rb_cmd;
irq_handler_idx dm_register_interrupt(
struct dc_context *ctx,
@@ -142,7 +143,7 @@ void generic_reg_wait(const struct dc_context *ctx,
unsigned int delay_between_poll_us, unsigned int time_out_num_tries,
const char *func_name, int line);
-unsigned int snprintf_count(char *pBuf, unsigned int bufSize, char *fmt, ...);
+unsigned int snprintf_count(char *pBuf, unsigned int bufSize, const char *fmt, ...);
/* These macros need to be used with soc15 registers in order to retrieve
* the actual offset.
@@ -274,6 +275,30 @@ void dm_perf_trace_timestamp(const char *func_name, unsigned int line, struct dc
#define PERF_TRACE_CTX(__CTX) dm_perf_trace_timestamp(__func__, __LINE__, __CTX)
/*
+ * SMU message tracing
+ */
+void dm_trace_smu_enter(uint32_t msg_id, uint32_t param_in, unsigned int delay, struct dc_context *ctx);
+void dm_trace_smu_exit(bool success, uint32_t response, struct dc_context *ctx);
+
+#define TRACE_SMU_MSG_DELAY(msg_id, param_in, delay, ctx) dm_trace_smu_enter(msg_id, param_in, delay, ctx)
+#define TRACE_SMU_MSG(msg_id, param_in, ctx) dm_trace_smu_enter(msg_id, param_in, 0, ctx)
+#define TRACE_SMU_MSG_ENTER(msg_id, param_in, ctx) dm_trace_smu_enter(msg_id, param_in, 0, ctx)
+#define TRACE_SMU_MSG_EXIT(success, response, ctx) dm_trace_smu_exit(success, response, ctx)
+
+/*
+ * DMUB Interfaces
+ */
+bool dm_execute_dmub_cmd(const struct dc_context *ctx, union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type);
+bool dm_execute_dmub_cmd_list(const struct dc_context *ctx, unsigned int count, union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type);
+
+/*
+ * ACPI Interfaces
+ */
+void dm_acpi_process_phy_transition_interlock(
+ const struct dc_context *ctx,
+ struct dm_process_phy_transition_init_params process_phy_transition_init_params);
+
+/*
* Debug and verification hooks
*/
@@ -285,4 +310,8 @@ void dm_dtn_log_append_v(struct dc_context *ctx,
void dm_dtn_log_end(struct dc_context *ctx,
struct dc_log_buffer_ctx *log_ctx);
+char *dce_version_to_string(const int version);
+
+bool dc_supports_vrr(const enum dce_version v);
+
#endif /* __DM_SERVICES_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dm_services_types.h b/drivers/gpu/drm/amd/display/dc/dm_services_types.h
index b52ba6ffabe1..3b093b8699ab 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_services_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_services_types.h
@@ -127,7 +127,7 @@ struct dm_pp_single_disp_config {
uint32_t src_height;
uint32_t src_width;
uint32_t v_refresh;
- uint32_t sym_clock; /* HDMI only */
+ uint32_t pixel_clock; /* Pixel clock in KHz (for HDMI only: normalized) */
struct dc_link_settings link_settings; /* DP only */
};
@@ -269,4 +269,36 @@ struct dtn_min_clk_info {
uint32_t min_memory_clock_khz;
};
+enum dm_dmub_wait_type {
+ DM_DMUB_WAIT_TYPE_NO_WAIT,
+ DM_DMUB_WAIT_TYPE_WAIT,
+ DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY,
+};
+
+enum dm_acpi_transition_link_type {
+ hdmi_tmds,
+ hdmi_frl,
+ dp_8b_10b,
+ dp_128b_132b,
+ none
+};
+
+struct dm_process_phy_transition_init_params {
+ uint32_t phy_id;
+ uint8_t action;
+ uint32_t sym_clock_10khz;
+ enum signal_type signal;
+ enum dc_lane_count display_port_lanes_count;
+ enum dc_link_rate display_port_link_rate;
+ uint32_t transition_bitmask;
+ uint8_t hdmi_frl_num_lanes;
+};
+
+struct dm_process_phy_transition_input_params {
+ uint32_t phy_id;
+ uint32_t transition_id;
+ uint32_t phy_configuration;
+ uint32_t data_rate;
+};
+
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index eee6672bd32d..b357683b4255 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -25,56 +25,55 @@
# It provides the general basic services required by other DAL
# subcomponents.
-ifdef CONFIG_X86
-dml_ccflags := -mhard-float -msse
-endif
-
-ifdef CONFIG_PPC64
-dml_ccflags := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
-IS_OLD_GCC = 1
-endif
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-dml_ccflags += -mpreferred-stack-boundary=4
-else
-dml_ccflags += -msse2
-endif
-endif
+dml_ccflags := $(CC_FLAGS_FPU)
+dml_rcflags := $(CC_FLAGS_NO_FPU)
ifneq ($(CONFIG_FRAME_WARN),0)
-frame_warn_flag := -Wframe-larger-than=2048
+ ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y)
+ frame_warn_limit := 3072
+ else
+ frame_warn_limit := 2048
+ endif
+
+ ifeq ($(call test-lt, $(CONFIG_FRAME_WARN), $(frame_warn_limit)),y)
+ frame_warn_flag := -Wframe-larger-than=$(frame_warn_limit)
+ endif
endif
CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
-
-ifdef CONFIG_DRM_AMD_DC_DCN
CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn314/display_mode_vba_314.o := $(dml_ccflags) $(frame_warn_flag)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn314/display_rq_dlg_calc_314.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn314/dcn314_fpu.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_ccflags) $(frame_warn_flag)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_ccflags) $(frame_warn_flag)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn321/dcn321_fpu.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn35/dcn35_fpu.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn351/dcn351_fpu.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn303/dcn303_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_ccflags) -Wno-tautological-compare
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn2x/dcn2x.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_rcflags)
@@ -85,26 +84,58 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_mode_vba_314.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_rq_dlg_calc_314.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_rcflags)
-endif
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_mode_vba_314.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_rq_dlg_calc_314.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/dcn314_fpu.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn321/dcn321_fpu.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn35/dcn35_fpu.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn351/dcn351_fpu.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn303/dcn303_fpu.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_rcflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_ccflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_rcflags)
-DML = display_mode_lib.o display_rq_dlg_helpers.o dml1_display_rq_dlg_calc.o \
-
-ifdef CONFIG_DRM_AMD_DC_DCN
+ifdef CONFIG_DRM_AMD_DC_FP
+DML += display_mode_lib.o display_rq_dlg_helpers.o dml1_display_rq_dlg_calc.o
+DML += dcn10/dcn10_fpu.o
DML += dcn20/dcn20_fpu.o
DML += display_mode_vba.o dcn20/display_rq_dlg_calc_20.o dcn20/display_mode_vba_20.o
DML += dcn20/display_rq_dlg_calc_20v2.o dcn20/display_mode_vba_20v2.o
DML += dcn21/display_rq_dlg_calc_21.o dcn21/display_mode_vba_21.o
-DML += dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_30.o
+DML += dcn30/dcn30_fpu.o dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_30.o
DML += dcn31/display_mode_vba_31.o dcn31/display_rq_dlg_calc_31.o
+DML += dcn314/display_mode_vba_314.o dcn314/display_rq_dlg_calc_314.o
+DML += dcn32/display_mode_vba_32.o dcn32/display_rq_dlg_calc_32.o dcn32/display_mode_vba_util_32.o
+DML += dcn31/dcn31_fpu.o
+DML += dcn32/dcn32_fpu.o
+DML += dcn321/dcn321_fpu.o
DML += dcn301/dcn301_fpu.o
+DML += dcn302/dcn302_fpu.o
+DML += dcn303/dcn303_fpu.o
+DML += dcn314/dcn314_fpu.o
+DML += dcn35/dcn35_fpu.o
+DML += dcn351/dcn351_fpu.o
DML += dsc/rc_calc_fpu.o
+DML += calcs/dcn_calcs.o calcs/dcn_calc_math.o calcs/dcn_calc_auto.o
endif
AMD_DAL_DML = $(addprefix $(AMDDALPATH)/dc/dml/,$(DML))
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.c
index 41284e263325..288d22a16cf2 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.c
@@ -526,10 +526,10 @@ void mode_support_and_system_configuration(struct dcn_bw_internal_vars *v)
}
if (v->max_swath_height_c[k] > 0.0) {
v->swath_width_granularity_c = 256.0 /dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0) / v->max_swath_height_c[k];
- }
- v->rounded_up_max_swath_size_bytes_c = (dcn_bw_ceil2(v->swath_width_yper_state[i][j][k] / 2.0 - 1.0, v->swath_width_granularity_c) + v->swath_width_granularity_c) * v->byte_per_pixel_in_detc[k] * v->max_swath_height_c[k];
- if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_10) {
- v->rounded_up_max_swath_size_bytes_c =dcn_bw_ceil2(v->rounded_up_max_swath_size_bytes_c, 256.0) + 256;
+ v->rounded_up_max_swath_size_bytes_c = (dcn_bw_ceil2(v->swath_width_yper_state[i][j][k] / 2.0 - 1.0, v->swath_width_granularity_c) + v->swath_width_granularity_c) * v->byte_per_pixel_in_detc[k] * v->max_swath_height_c[k];
+ if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_10) {
+ v->rounded_up_max_swath_size_bytes_c = dcn_bw_ceil2(v->rounded_up_max_swath_size_bytes_c, 256.0) + 256;
+ }
}
if (v->rounded_up_max_swath_size_bytes_y + v->rounded_up_max_swath_size_bytes_c <= v->det_buffer_size_in_kbyte * 1024.0 / 2.0) {
v->swath_height_yper_state[i][j][k] = v->max_swath_height_y[k];
@@ -552,14 +552,14 @@ void mode_support_and_system_configuration(struct dcn_bw_internal_vars *v)
v->lines_in_det_chroma = v->det_buffer_size_in_kbyte * 1024.0 / 3.0 / v->byte_per_pixel_in_dety[k] / (v->swath_width_yper_state[i][j][k] / 2.0);
}
v->effective_lb_latency_hiding_source_lines_luma =dcn_bw_min2(v->max_line_buffer_lines,dcn_bw_floor2(v->line_buffer_size / v->lb_bit_per_pixel[k] / (v->swath_width_yper_state[i][j][k] /dcn_bw_max2(v->h_ratio[k], 1.0)), 1.0)) - (v->vtaps[k] - 1.0);
- v->effective_lb_latency_hiding_source_lines_chroma =dcn_bw_min2(v->max_line_buffer_lines,dcn_bw_floor2(v->line_buffer_size / v->lb_bit_per_pixel[k] / (v->swath_width_yper_state[i][j][k] / 2.0 /dcn_bw_max2(v->h_ratio[k] / 2.0, 1.0)), 1.0)) - (v->vta_pschroma[k] - 1.0);
v->effective_detlb_lines_luma =dcn_bw_floor2(v->lines_in_det_luma +dcn_bw_min2(v->lines_in_det_luma * v->required_dispclk[i][j] * v->byte_per_pixel_in_dety[k] * v->pscl_factor[k] / v->return_bw_per_state[i], v->effective_lb_latency_hiding_source_lines_luma), v->swath_height_yper_state[i][j][k]);
- v->effective_detlb_lines_chroma =dcn_bw_floor2(v->lines_in_det_chroma +dcn_bw_min2(v->lines_in_det_chroma * v->required_dispclk[i][j] * v->byte_per_pixel_in_detc[k] * v->pscl_factor_chroma[k] / v->return_bw_per_state[i], v->effective_lb_latency_hiding_source_lines_chroma), v->swath_height_cper_state[i][j][k]);
if (v->byte_per_pixel_in_detc[k] == 0.0) {
v->urgent_latency_support_us_per_state[i][j][k] = v->effective_detlb_lines_luma * (v->htotal[k] / v->pixel_clock[k]) / v->v_ratio[k] - v->effective_detlb_lines_luma * v->swath_width_yper_state[i][j][k] *dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) / (v->return_bw_per_state[i] / v->no_of_dpp[i][j][k]);
}
else {
- v->urgent_latency_support_us_per_state[i][j][k] =dcn_bw_min2(v->effective_detlb_lines_luma * (v->htotal[k] / v->pixel_clock[k]) / v->v_ratio[k] - v->effective_detlb_lines_luma * v->swath_width_yper_state[i][j][k] *dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) / (v->return_bw_per_state[i] / v->no_of_dpp[i][j][k]), v->effective_detlb_lines_chroma * (v->htotal[k] / v->pixel_clock[k]) / (v->v_ratio[k] / 2.0) - v->effective_detlb_lines_chroma * v->swath_width_yper_state[i][j][k] / 2.0 *dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0) / (v->return_bw_per_state[i] / v->no_of_dpp[i][j][k]));
+ v->effective_lb_latency_hiding_source_lines_chroma = dcn_bw_min2(v->max_line_buffer_lines, dcn_bw_floor2(v->line_buffer_size / v->lb_bit_per_pixel[k] / (v->swath_width_yper_state[i][j][k] / 2.0 / dcn_bw_max2(v->h_ratio[k] / 2.0, 1.0)), 1.0)) - (v->vta_pschroma[k] - 1.0);
+ v->effective_detlb_lines_chroma = dcn_bw_floor2(v->lines_in_det_chroma + dcn_bw_min2(v->lines_in_det_chroma * v->required_dispclk[i][j] * v->byte_per_pixel_in_detc[k] * v->pscl_factor_chroma[k] / v->return_bw_per_state[i], v->effective_lb_latency_hiding_source_lines_chroma), v->swath_height_cper_state[i][j][k]);
+ v->urgent_latency_support_us_per_state[i][j][k] = dcn_bw_min2(v->effective_detlb_lines_luma * (v->htotal[k] / v->pixel_clock[k]) / v->v_ratio[k] - v->effective_detlb_lines_luma * v->swath_width_yper_state[i][j][k] * dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) / (v->return_bw_per_state[i] / v->no_of_dpp[i][j][k]), v->effective_detlb_lines_chroma * (v->htotal[k] / v->pixel_clock[k]) / (v->v_ratio[k] / 2.0) - v->effective_detlb_lines_chroma * v->swath_width_yper_state[i][j][k] / 2.0 * dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0) / (v->return_bw_per_state[i] / v->no_of_dpp[i][j][k]));
}
}
}
@@ -1146,10 +1146,10 @@ void display_pipe_configuration(struct dcn_bw_internal_vars *v)
}
if (v->maximum_swath_height_c > 0.0) {
v->swath_width_granularity_c = 256.0 /dcn_bw_ceil2(v->byte_per_pix_detc, 2.0) / v->maximum_swath_height_c;
- }
- v->rounded_up_max_swath_size_bytes_c = (dcn_bw_ceil2(v->swath_width / 2.0 - 1.0, v->swath_width_granularity_c) + v->swath_width_granularity_c) * v->byte_per_pix_detc * v->maximum_swath_height_c;
- if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_10) {
- v->rounded_up_max_swath_size_bytes_c =dcn_bw_ceil2(v->rounded_up_max_swath_size_bytes_c, 256.0) + 256;
+ v->rounded_up_max_swath_size_bytes_c = (dcn_bw_ceil2(v->swath_width / 2.0 - 1.0, v->swath_width_granularity_c) + v->swath_width_granularity_c) * v->byte_per_pix_detc * v->maximum_swath_height_c;
+ if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_10) {
+ v->rounded_up_max_swath_size_bytes_c = dcn_bw_ceil2(v->rounded_up_max_swath_size_bytes_c, 256.0) + 256;
+ }
}
if (v->rounded_up_max_swath_size_bytes_y + v->rounded_up_max_swath_size_bytes_c <= v->det_buffer_size_in_kbyte * 1024.0 / 2.0) {
v->swath_height_y[k] = v->maximum_swath_height_y;
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.h b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.h
index ce35de79a6c7..ce35de79a6c7 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.h
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_math.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_math.c
index 07d18e78de49..cac72413a097 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_math.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_math.c
@@ -23,6 +23,7 @@
*
*/
+#include "os_types.h"
#include "dcn_calc_math.h"
#define isNaN(number) ((number) != (number))
@@ -69,8 +70,8 @@ float dcn_bw_max2(const float arg1, const float arg2)
float dcn_bw_floor2(const float arg, const float significance)
{
- if (significance == 0)
- return 0;
+ ASSERT(significance != 0);
+
return ((int) (arg / significance)) * significance;
}
float dcn_bw_floor(const float arg)
@@ -80,17 +81,14 @@ float dcn_bw_floor(const float arg)
float dcn_bw_ceil(const float arg)
{
- float flr = dcn_bw_floor2(arg, 1);
-
- return flr + 0.00001 >= arg ? arg : flr + 1;
+ return (int) (arg + 0.99999);
}
float dcn_bw_ceil2(const float arg, const float significance)
{
- float flr = dcn_bw_floor2(arg, significance);
- if (significance == 0)
- return 0;
- return flr + 0.00001 >= arg ? arg : flr + significance;
+ ASSERT(significance != 0);
+
+ return ((int) (arg / significance + 0.99999)) * significance;
}
float dcn_bw_max3(float v1, float v2, float v3)
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c
index 6b248cd2a461..74962791302f 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c
@@ -30,7 +30,7 @@
#include "dcn_calc_auto.h"
#include "dal_asic_id.h"
#include "resource.h"
-#include "dcn10/dcn10_resource.h"
+#include "resource/dcn10/dcn10_resource.h"
#include "dcn10/dcn10_hubbub.h"
#include "dml/dml1_display_rq_dlg_calc.h"
@@ -503,7 +503,6 @@ static void dcn_bw_calc_rq_dlg_ttu(
//input[in_idx].dout.output_standard;
/*todo: soc->sr_enter_plus_exit_time??*/
- dlg_sys_param->t_srx_delay_us = dc->dcn_ip->dcfclk_cstate_latency / v->dcf_clk_deep_sleep;
dml1_rq_dlg_get_rq_params(dml, rq_param, &input->pipe.src);
dml1_extract_rq_regs(dml, rq_regs, rq_param);
@@ -640,7 +639,6 @@ static bool dcn_bw_apply_registry_override(struct dc *dc)
{
bool updated = false;
- DC_FP_START();
if ((int)(dc->dcn_soc->sr_exit_time * 1000) != dc->debug.sr_exit_time_ns
&& dc->debug.sr_exit_time_ns) {
updated = true;
@@ -676,7 +674,6 @@ static bool dcn_bw_apply_registry_override(struct dc *dc)
dc->dcn_soc->dram_clock_change_latency =
dc->debug.dram_clock_change_latency_ns / 1000.0;
}
- DC_FP_END();
return updated;
}
@@ -693,7 +690,7 @@ static void hack_disable_optional_pipe_split(struct dcn_bw_internal_vars *v)
static void hack_force_pipe_split(struct dcn_bw_internal_vars *v,
unsigned int pixel_rate_100hz)
{
- float pixel_rate_mhz = pixel_rate_100hz / 10000;
+ float pixel_rate_mhz = pixel_rate_100hz / 10000.0;
/*
* force enabling pipe split by lower dpp clock for DPM0 to just
@@ -739,34 +736,19 @@ static void hack_bounding_box(struct dcn_bw_internal_vars *v,
hack_force_pipe_split(v, context->streams[0]->timing.pix_clk_100hz);
}
-unsigned int get_highest_allowed_voltage_level(uint32_t chip_family, uint32_t hw_internal_rev, uint32_t pci_revision_id)
+static unsigned int get_highest_allowed_voltage_level(bool is_vmin_only_asic)
{
/* for low power RV2 variants, the highest voltage level we want is 0 */
- if ((chip_family == FAMILY_RV) &&
- ASICREV_IS_RAVEN2(hw_internal_rev))
- switch (pci_revision_id) {
- case PRID_DALI_DE:
- case PRID_DALI_DF:
- case PRID_DALI_E3:
- case PRID_DALI_E4:
- case PRID_POLLOCK_94:
- case PRID_POLLOCK_95:
- case PRID_POLLOCK_E9:
- case PRID_POLLOCK_EA:
- case PRID_POLLOCK_EB:
- return 0;
- default:
- break;
- }
-
- /* we are ok with all levels */
- return 4;
+ if (is_vmin_only_asic)
+ return 0;
+ else /* we are ok with all levels */
+ return 4;
}
bool dcn_validate_bandwidth(
struct dc *dc,
struct dc_state *context,
- bool fast_validate)
+ enum dc_validate_mode validate_mode)
{
/*
* we want a breakdown of the various stages of validation, which the
@@ -789,7 +771,6 @@ bool dcn_validate_bandwidth(
dcn_bw_sync_calcs_and_dml(dc);
memset(v, 0, sizeof(*v));
- DC_FP_START();
v->sr_exit_time = dc->dcn_soc->sr_exit_time;
v->sr_enter_plus_exit_time = dc->dcn_soc->sr_enter_plus_exit_time;
@@ -1138,7 +1119,7 @@ bool dcn_validate_bandwidth(
BW_VAL_TRACE_END_VOLTAGE_LEVEL();
- if (v->voltage_level != number_of_states_plus_one && !fast_validate) {
+ if (v->voltage_level != number_of_states_plus_one && validate_mode == DC_VALIDATE_MODE_AND_PROGRAMMING) {
float bw_consumed = v->total_bandwidth_consumed_gbyte_per_second;
if (bw_consumed < v->fabric_and_dram_bandwidth_vmin0p65)
@@ -1277,7 +1258,7 @@ bool dcn_validate_bandwidth(
hsplit_pipe->pipe_dlg_param.vblank_end = pipe->pipe_dlg_param.vblank_end;
} else {
/* pipe not split previously needs split */
- hsplit_pipe = find_idle_secondary_pipe(&context->res_ctx, pool, pipe);
+ hsplit_pipe = resource_find_free_secondary_pipe_legacy(&context->res_ctx, pool, pipe);
ASSERT(hsplit_pipe);
split_stream_across_pipes(&context->res_ctx, pool, pipe, hsplit_pipe);
}
@@ -1305,7 +1286,7 @@ bool dcn_validate_bandwidth(
}
} else if (v->voltage_level == number_of_states_plus_one) {
BW_VAL_TRACE_SKIP(fail);
- } else if (fast_validate) {
+ } else if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) {
BW_VAL_TRACE_SKIP(fast);
}
@@ -1322,235 +1303,75 @@ bool dcn_validate_bandwidth(
bw_limit = dc->dcn_soc->percent_disp_bw_limit * v->fabric_and_dram_bandwidth_vmax0p9;
bw_limit_pass = (v->total_data_read_bandwidth / 1000.0) < bw_limit;
- DC_FP_END();
-
PERFORMANCE_TRACE_END();
BW_VAL_TRACE_FINISH();
- if (bw_limit_pass && v->voltage_level <= get_highest_allowed_voltage_level(
- dc->ctx->asic_id.chip_family,
- dc->ctx->asic_id.hw_internal_rev,
- dc->ctx->asic_id.pci_revision_id))
+ if (bw_limit_pass && v->voltage_level <= get_highest_allowed_voltage_level(dc->config.is_vmin_only_asic))
return true;
else
return false;
}
-static unsigned int dcn_find_normalized_clock_vdd_Level(
- const struct dc *dc,
- enum dm_pp_clock_type clocks_type,
- int clocks_in_khz)
-{
- int vdd_level = dcn_bw_v_min0p65;
-
- if (clocks_in_khz == 0)/*todo some clock not in the considerations*/
- return vdd_level;
-
- switch (clocks_type) {
- case DM_PP_CLOCK_TYPE_DISPLAY_CLK:
- if (clocks_in_khz > dc->dcn_soc->max_dispclk_vmax0p9*1000) {
- vdd_level = dcn_bw_v_max0p91;
- BREAK_TO_DEBUGGER();
- } else if (clocks_in_khz > dc->dcn_soc->max_dispclk_vnom0p8*1000) {
- vdd_level = dcn_bw_v_max0p9;
- } else if (clocks_in_khz > dc->dcn_soc->max_dispclk_vmid0p72*1000) {
- vdd_level = dcn_bw_v_nom0p8;
- } else if (clocks_in_khz > dc->dcn_soc->max_dispclk_vmin0p65*1000) {
- vdd_level = dcn_bw_v_mid0p72;
- } else
- vdd_level = dcn_bw_v_min0p65;
- break;
- case DM_PP_CLOCK_TYPE_DISPLAYPHYCLK:
- if (clocks_in_khz > dc->dcn_soc->phyclkv_max0p9*1000) {
- vdd_level = dcn_bw_v_max0p91;
- BREAK_TO_DEBUGGER();
- } else if (clocks_in_khz > dc->dcn_soc->phyclkv_nom0p8*1000) {
- vdd_level = dcn_bw_v_max0p9;
- } else if (clocks_in_khz > dc->dcn_soc->phyclkv_mid0p72*1000) {
- vdd_level = dcn_bw_v_nom0p8;
- } else if (clocks_in_khz > dc->dcn_soc->phyclkv_min0p65*1000) {
- vdd_level = dcn_bw_v_mid0p72;
- } else
- vdd_level = dcn_bw_v_min0p65;
- break;
-
- case DM_PP_CLOCK_TYPE_DPPCLK:
- if (clocks_in_khz > dc->dcn_soc->max_dppclk_vmax0p9*1000) {
- vdd_level = dcn_bw_v_max0p91;
- BREAK_TO_DEBUGGER();
- } else if (clocks_in_khz > dc->dcn_soc->max_dppclk_vnom0p8*1000) {
- vdd_level = dcn_bw_v_max0p9;
- } else if (clocks_in_khz > dc->dcn_soc->max_dppclk_vmid0p72*1000) {
- vdd_level = dcn_bw_v_nom0p8;
- } else if (clocks_in_khz > dc->dcn_soc->max_dppclk_vmin0p65*1000) {
- vdd_level = dcn_bw_v_mid0p72;
- } else
- vdd_level = dcn_bw_v_min0p65;
- break;
-
- case DM_PP_CLOCK_TYPE_MEMORY_CLK:
- {
- unsigned factor = (ddr4_dram_factor_single_Channel * dc->dcn_soc->number_of_channels);
-
- if (clocks_in_khz > dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9*1000000/factor) {
- vdd_level = dcn_bw_v_max0p91;
- BREAK_TO_DEBUGGER();
- } else if (clocks_in_khz > dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8*1000000/factor) {
- vdd_level = dcn_bw_v_max0p9;
- } else if (clocks_in_khz > dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72*1000000/factor) {
- vdd_level = dcn_bw_v_nom0p8;
- } else if (clocks_in_khz > dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65*1000000/factor) {
- vdd_level = dcn_bw_v_mid0p72;
- } else
- vdd_level = dcn_bw_v_min0p65;
- }
- break;
-
- case DM_PP_CLOCK_TYPE_DCFCLK:
- if (clocks_in_khz > dc->dcn_soc->dcfclkv_max0p9*1000) {
- vdd_level = dcn_bw_v_max0p91;
- BREAK_TO_DEBUGGER();
- } else if (clocks_in_khz > dc->dcn_soc->dcfclkv_nom0p8*1000) {
- vdd_level = dcn_bw_v_max0p9;
- } else if (clocks_in_khz > dc->dcn_soc->dcfclkv_mid0p72*1000) {
- vdd_level = dcn_bw_v_nom0p8;
- } else if (clocks_in_khz > dc->dcn_soc->dcfclkv_min0p65*1000) {
- vdd_level = dcn_bw_v_mid0p72;
- } else
- vdd_level = dcn_bw_v_min0p65;
- break;
-
- default:
- break;
- }
- return vdd_level;
-}
-
-unsigned int dcn_find_dcfclk_suits_all(
- const struct dc *dc,
- struct dc_clocks *clocks)
+void dcn_bw_update_from_pplib_fclks(
+ struct dc *dc,
+ struct dm_pp_clock_levels_with_voltage *fclks)
{
- unsigned vdd_level, vdd_level_temp;
- unsigned dcf_clk;
-
- /*find a common supported voltage level*/
- vdd_level = dcn_find_normalized_clock_vdd_Level(
- dc, DM_PP_CLOCK_TYPE_DISPLAY_CLK, clocks->dispclk_khz);
- vdd_level_temp = dcn_find_normalized_clock_vdd_Level(
- dc, DM_PP_CLOCK_TYPE_DISPLAYPHYCLK, clocks->phyclk_khz);
-
- vdd_level = dcn_bw_max(vdd_level, vdd_level_temp);
- vdd_level_temp = dcn_find_normalized_clock_vdd_Level(
- dc, DM_PP_CLOCK_TYPE_DPPCLK, clocks->dppclk_khz);
- vdd_level = dcn_bw_max(vdd_level, vdd_level_temp);
-
- vdd_level_temp = dcn_find_normalized_clock_vdd_Level(
- dc, DM_PP_CLOCK_TYPE_MEMORY_CLK, clocks->fclk_khz);
- vdd_level = dcn_bw_max(vdd_level, vdd_level_temp);
- vdd_level_temp = dcn_find_normalized_clock_vdd_Level(
- dc, DM_PP_CLOCK_TYPE_DCFCLK, clocks->dcfclk_khz);
-
- /*find that level conresponding dcfclk*/
- vdd_level = dcn_bw_max(vdd_level, vdd_level_temp);
- if (vdd_level == dcn_bw_v_max0p91) {
- BREAK_TO_DEBUGGER();
- dcf_clk = dc->dcn_soc->dcfclkv_max0p9*1000;
- } else if (vdd_level == dcn_bw_v_max0p9)
- dcf_clk = dc->dcn_soc->dcfclkv_max0p9*1000;
- else if (vdd_level == dcn_bw_v_nom0p8)
- dcf_clk = dc->dcn_soc->dcfclkv_nom0p8*1000;
- else if (vdd_level == dcn_bw_v_mid0p72)
- dcf_clk = dc->dcn_soc->dcfclkv_mid0p72*1000;
- else
- dcf_clk = dc->dcn_soc->dcfclkv_min0p65*1000;
+ unsigned vmin0p65_idx, vmid0p72_idx, vnom0p8_idx, vmax0p9_idx;
- DC_LOG_BANDWIDTH_CALCS("\tdcf_clk for voltage = %d\n", dcf_clk);
- return dcf_clk;
+ ASSERT(fclks->num_levels);
+
+ vmin0p65_idx = 0;
+ vmid0p72_idx = fclks->num_levels > 2 ? fclks->num_levels - 3 : 0;
+ vnom0p8_idx = fclks->num_levels > 1 ? fclks->num_levels - 2 : 0;
+ vmax0p9_idx = fclks->num_levels > 0 ? fclks->num_levels - 1 : 0;
+
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 =
+ 32 * (fclks->data[vmin0p65_idx].clocks_in_khz / 1000.0) / 1000.0;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 =
+ dc->dcn_soc->number_of_channels *
+ (fclks->data[vmid0p72_idx].clocks_in_khz / 1000.0)
+ * ddr4_dram_factor_single_Channel / 1000.0;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 =
+ dc->dcn_soc->number_of_channels *
+ (fclks->data[vnom0p8_idx].clocks_in_khz / 1000.0)
+ * ddr4_dram_factor_single_Channel / 1000.0;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 =
+ dc->dcn_soc->number_of_channels *
+ (fclks->data[vmax0p9_idx].clocks_in_khz / 1000.0)
+ * ddr4_dram_factor_single_Channel / 1000.0;
}
-static bool verify_clock_values(struct dm_pp_clock_levels_with_voltage *clks)
+void dcn_bw_update_from_pplib_dcfclks(
+ struct dc *dc,
+ struct dm_pp_clock_levels_with_voltage *dcfclks)
{
- int i;
-
- if (clks->num_levels == 0)
- return false;
-
- for (i = 0; i < clks->num_levels; i++)
- /* Ensure that the result is sane */
- if (clks->data[i].clocks_in_khz == 0)
- return false;
-
- return true;
+ if (dcfclks->num_levels >= 3) {
+ dc->dcn_soc->dcfclkv_min0p65 = dcfclks->data[0].clocks_in_khz / 1000.0;
+ dc->dcn_soc->dcfclkv_mid0p72 = dcfclks->data[dcfclks->num_levels - 3].clocks_in_khz / 1000.0;
+ dc->dcn_soc->dcfclkv_nom0p8 = dcfclks->data[dcfclks->num_levels - 2].clocks_in_khz / 1000.0;
+ dc->dcn_soc->dcfclkv_max0p9 = dcfclks->data[dcfclks->num_levels - 1].clocks_in_khz / 1000.0;
+ }
}
-void dcn_bw_update_from_pplib(struct dc *dc)
+void dcn_get_soc_clks(
+ struct dc *dc,
+ int *min_fclk_khz,
+ int *min_dcfclk_khz,
+ int *socclk_khz)
{
- struct dc_context *ctx = dc->ctx;
- struct dm_pp_clock_levels_with_voltage fclks = {0}, dcfclks = {0};
- bool res;
- unsigned vmin0p65_idx, vmid0p72_idx, vnom0p8_idx, vmax0p9_idx;
-
- /* TODO: This is not the proper way to obtain fabric_and_dram_bandwidth, should be min(fclk, memclk) */
- res = dm_pp_get_clock_levels_by_type_with_voltage(
- ctx, DM_PP_CLOCK_TYPE_FCLK, &fclks);
-
- DC_FP_START();
-
- if (res)
- res = verify_clock_values(&fclks);
-
- if (res) {
- ASSERT(fclks.num_levels);
-
- vmin0p65_idx = 0;
- vmid0p72_idx = fclks.num_levels -
- (fclks.num_levels > 2 ? 3 : (fclks.num_levels > 1 ? 2 : 1));
- vnom0p8_idx = fclks.num_levels - (fclks.num_levels > 1 ? 2 : 1);
- vmax0p9_idx = fclks.num_levels - 1;
-
- dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 =
- 32 * (fclks.data[vmin0p65_idx].clocks_in_khz / 1000.0) / 1000.0;
- dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 =
- dc->dcn_soc->number_of_channels *
- (fclks.data[vmid0p72_idx].clocks_in_khz / 1000.0)
- * ddr4_dram_factor_single_Channel / 1000.0;
- dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 =
- dc->dcn_soc->number_of_channels *
- (fclks.data[vnom0p8_idx].clocks_in_khz / 1000.0)
- * ddr4_dram_factor_single_Channel / 1000.0;
- dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 =
- dc->dcn_soc->number_of_channels *
- (fclks.data[vmax0p9_idx].clocks_in_khz / 1000.0)
- * ddr4_dram_factor_single_Channel / 1000.0;
- } else
- BREAK_TO_DEBUGGER();
-
- DC_FP_END();
-
- res = dm_pp_get_clock_levels_by_type_with_voltage(
- ctx, DM_PP_CLOCK_TYPE_DCFCLK, &dcfclks);
-
- DC_FP_START();
-
- if (res)
- res = verify_clock_values(&dcfclks);
-
- if (res && dcfclks.num_levels >= 3) {
- dc->dcn_soc->dcfclkv_min0p65 = dcfclks.data[0].clocks_in_khz / 1000.0;
- dc->dcn_soc->dcfclkv_mid0p72 = dcfclks.data[dcfclks.num_levels - 3].clocks_in_khz / 1000.0;
- dc->dcn_soc->dcfclkv_nom0p8 = dcfclks.data[dcfclks.num_levels - 2].clocks_in_khz / 1000.0;
- dc->dcn_soc->dcfclkv_max0p9 = dcfclks.data[dcfclks.num_levels - 1].clocks_in_khz / 1000.0;
- } else
- BREAK_TO_DEBUGGER();
-
- DC_FP_END();
+ *min_fclk_khz = dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 * 1000000 / 32;
+ *min_dcfclk_khz = dc->dcn_soc->dcfclkv_min0p65 * 1000;
+ *socclk_khz = dc->dcn_soc->socclk * 1000;
}
-void dcn_bw_notify_pplib_of_wm_ranges(struct dc *dc)
+void dcn_bw_notify_pplib_of_wm_ranges(
+ struct dc *dc,
+ int min_fclk_khz,
+ int min_dcfclk_khz,
+ int socclk_khz)
{
struct pp_smu_funcs_rv *pp = NULL;
struct pp_smu_wm_range_sets ranges = {0};
- int min_fclk_khz, min_dcfclk_khz, socclk_khz;
const int overdrive = 5000000; /* 5 GHz to cover Overdrive */
if (dc->res_pool->pp_smu)
@@ -1558,12 +1379,6 @@ void dcn_bw_notify_pplib_of_wm_ranges(struct dc *dc)
if (!pp || !pp->set_wm_ranges)
return;
- DC_FP_START();
- min_fclk_khz = dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 * 1000000 / 32;
- min_dcfclk_khz = dc->dcn_soc->dcfclkv_min0p65 * 1000;
- socclk_khz = dc->dcn_soc->socclk * 1000;
- DC_FP_END();
-
/* Now notify PPLib/SMU about which Watermarks sets they should select
* depending on DPM state they are in. And update BW MGR GFX Engine and
* Memory clock member variables for Watermarks calculations for each
@@ -1613,7 +1428,6 @@ void dcn_bw_notify_pplib_of_wm_ranges(struct dc *dc)
void dcn_bw_sync_calcs_and_dml(struct dc *dc)
{
- DC_FP_START();
DC_LOG_BANDWIDTH_CALCS("sr_exit_time: %f ns\n"
"sr_enter_plus_exit_time: %f ns\n"
"urgent_latency: %f ns\n"
@@ -1802,5 +1616,4 @@ void dcn_bw_sync_calcs_and_dml(struct dc *dc)
dc->dml.ip.bug_forcing_LC_req_same_size_fixed =
dc->dcn_ip->bug_forcing_luma_and_chroma_request_to_same_size_fixed == dcn_bw_yes;
dc->dml.ip.dcfclk_cstate_latency = dc->dcn_ip->dcfclk_cstate_latency;
- DC_FP_END();
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dc_features.h b/drivers/gpu/drm/amd/display/dc/dml/dc_features.h
index 2a1983324629..6e669a2c5b2d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dc_features.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dc_features.h
@@ -29,7 +29,14 @@
#define DC__PRESENT 1
#define DC__PRESENT__1 1
#define DC__NUM_DPP 4
-#define DC__VOLTAGE_STATES 9
+
+/**
+ * @DC__VOLTAGE_STATES:
+ *
+ * Define the maximum amount of states supported by the ASIC. Every ASIC has a
+ * specific number of states; this macro defines the maximum number of states.
+ */
+#define DC__VOLTAGE_STATES 40
#define DC__NUM_DPP__4 1
#define DC__NUM_DPP__0_PRESENT 1
#define DC__NUM_DPP__1_PRESENT 1
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
new file mode 100644
index 000000000000..c5e84190c17a
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dcn10/dcn10_resource.h"
+
+#include "dcn10_fpu.h"
+#include "resource.h"
+#include "amdgpu_dm/dc_fpu.h"
+
+/**
+ * DOC: DCN10 FPU manipulation Overview
+ *
+ * The DCN architecture relies on FPU operations, which require special
+ * compilation flags and the use of kernel_fpu_begin/end functions; ideally, we
+ * want to avoid spreading FPU access across multiple files. With this idea in
+ * mind, this file aims to centralize DCN10 functions that require FPU access
+ * in a single place. Code in this file follows the following code pattern:
+ *
+ * 1. Functions that use FPU operations should be isolated in static functions.
+ * 2. The FPU functions should have the noinline attribute to ensure anything
+ * that deals with FP register is contained within this call.
+ * 3. All function that needs to be accessed outside this file requires a
+ * public interface that not uses any FPU reference.
+ * 4. Developers **must not** use DC_FP_START/END in this file, but they need
+ * to ensure that the caller invokes it before access any function available
+ * in this file. For this reason, public functions in this file must invoke
+ * dc_assert_fp_enabled();
+ *
+ * Let's expand a little bit more the idea in the code pattern. To fully
+ * isolate FPU operations in a single place, we must avoid situations where
+ * compilers spill FP values to registers due to FP enable in a specific C
+ * file. Note that even if we isolate all FPU functions in a single file and
+ * call its interface from other files, the compiler might enable the use of
+ * FPU before we call DC_FP_START. Nevertheless, it is the programmer's
+ * responsibility to invoke DC_FP_START/END in the correct place. To highlight
+ * situations where developers forgot to use the FP protection before calling
+ * the DC FPU interface functions, we introduce a helper that checks if the
+ * function is invoked under FP protection. If not, it will trigger a kernel
+ * warning.
+ */
+
+struct _vcs_dpi_ip_params_st dcn1_0_ip = {
+ .rob_buffer_size_kbytes = 64,
+ .det_buffer_size_kbytes = 164,
+ .dpte_buffer_size_in_pte_reqs_luma = 42,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .pte_enable = 1,
+ .pte_chunk_size_kbytes = 2,
+ .meta_chunk_size_kbytes = 2,
+ .writeback_chunk_size_kbytes = 2,
+ .line_buffer_size_bits = 589824,
+ .max_line_buffer_lines = 12,
+ .IsLineBufferBppFixed = 0,
+ .LineBufferFixedBpp = -1,
+ .writeback_luma_buffer_size_kbytes = 12,
+ .writeback_chroma_buffer_size_kbytes = 8,
+ .max_num_dpp = 4,
+ .max_num_wb = 2,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 4,
+ .max_vscl_ratio = 4,
+ .hscl_mults = 4,
+ .vscl_mults = 4,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dispclk_ramp_margin_percent = 1,
+ .underscan_factor = 1.10,
+ .min_vblank_lines = 14,
+ .dppclk_delay_subtotal = 90,
+ .dispclk_delay_subtotal = 42,
+ .dcfclk_cstate_latency = 10,
+ .max_inter_dcn_tile_repeaters = 8,
+ .can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one = 0,
+ .bug_forcing_LC_req_same_size_fixed = 0,
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = {
+ .sr_exit_time_us = 9.0,
+ .sr_enter_plus_exit_time_us = 11.0,
+ .urgent_latency_us = 4.0,
+ .writeback_latency_us = 12.0,
+ .ideal_dram_bw_after_urgent_percent = 80.0,
+ .max_request_size_bytes = 256,
+ .downspread_percent = 0.5,
+ .dram_page_open_time_ns = 50.0,
+ .dram_rw_turnaround_time_ns = 17.5,
+ .dram_return_buffer_per_channel_bytes = 8192,
+ .round_trip_ping_latency_dcfclk_cycles = 128,
+ .urgent_out_of_order_return_per_channel_bytes = 256,
+ .channel_interleave_bytes = 256,
+ .num_banks = 8,
+ .num_chans = 2,
+ .vmm_page_size_bytes = 4096,
+ .dram_clock_change_latency_us = 17.0,
+ .writeback_dram_clock_change_latency_us = 23.0,
+ .return_bus_width_bytes = 64,
+};
+
+void dcn10_resource_construct_fp(struct dc *dc)
+{
+ dc_assert_fp_enabled();
+ if (dc->ctx->dce_version == DCN_VERSION_1_01) {
+ struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
+ struct dcn_ip_params *dcn_ip = dc->dcn_ip;
+ struct display_mode_lib *dml = &dc->dml;
+
+ dml->ip.max_num_dpp = 3;
+ /* TODO how to handle 23.84? */
+ dcn_soc->dram_clock_change_latency = 23;
+ dcn_ip->max_num_dpp = 3;
+ }
+ if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
+ dc->dcn_soc->urgent_latency = 3;
+ dc->debug.disable_dmcu = true;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
+ }
+
+ dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
+ ASSERT(dc->dcn_soc->number_of_channels < 3);
+ if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
+ dc->dcn_soc->number_of_channels = 2;
+
+ if (dc->dcn_soc->number_of_channels == 1) {
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
+ if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev))
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
new file mode 100644
index 000000000000..1bf6b12f5663
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN10_FPU_H__
+#define __DCN10_FPU_H__
+
+void dcn10_resource_construct_fp(struct dc *dc);
+
+#endif /* __DCN10_FPU_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
index d590dc917363..7aaf13bbd4e4 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
@@ -25,8 +25,28 @@
*/
#include "resource.h"
-
+#include "clk_mgr.h"
+#include "dchubbub.h"
+#include "dcn20/dcn20_resource.h"
+#include "dcn21/dcn21_resource.h"
+#include "clk_mgr/dcn21/rn_clk_mgr.h"
+#include "link_service.h"
#include "dcn20_fpu.h"
+#include "dc_state_priv.h"
+
+#define DC_LOGGER \
+ dc->ctx->logger
+#define DC_LOGGER_INIT(logger)
+
+#ifndef MAX
+#define MAX(X, Y) ((X) > (Y) ? (X) : (Y))
+#endif
+#ifndef MIN
+#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+#endif
+
+/* Constant */
+#define LPDDR_MEM_RETRAIN_LATENCY 4.977 /* Number obtained from LPDDR4 Training Counter Requirement doc */
/**
* DOC: DCN2x FPU manipulation Overview
@@ -61,6 +81,911 @@
* warning.
*/
+struct _vcs_dpi_ip_params_st dcn2_0_ip = {
+ .odm_capable = 1,
+ .gpuvm_enable = 0,
+ .hostvm_enable = 0,
+ .gpuvm_max_page_table_levels = 4,
+ .hostvm_max_page_table_levels = 4,
+ .hostvm_cached_page_table_levels = 0,
+ .pte_group_size_bytes = 2048,
+ .num_dsc = 6,
+ .rob_buffer_size_kbytes = 168,
+ .det_buffer_size_kbytes = 164,
+ .dpte_buffer_size_in_pte_reqs_luma = 84,
+ .pde_proc_buffer_size_64k_reqs = 48,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .pte_chunk_size_kbytes = 2,
+ .meta_chunk_size_kbytes = 2,
+ .writeback_chunk_size_kbytes = 2,
+ .line_buffer_size_bits = 789504,
+ .is_line_buffer_bpp_fixed = 0,
+ .line_buffer_fixed_bpp = 0,
+ .dcc_supported = true,
+ .max_line_buffer_lines = 12,
+ .writeback_luma_buffer_size_kbytes = 12,
+ .writeback_chroma_buffer_size_kbytes = 8,
+ .writeback_chroma_line_buffer_width_pixels = 4,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 12,
+ .writeback_max_vscl_taps = 12,
+ .writeback_line_buffer_luma_buffer_size = 0,
+ .writeback_line_buffer_chroma_buffer_size = 14643,
+ .cursor_buffer_size = 8,
+ .cursor_chunk_size = 2,
+ .max_num_otg = 6,
+ .max_num_dpp = 6,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 8,
+ .max_vscl_ratio = 8,
+ .hscl_mults = 4,
+ .vscl_mults = 4,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dispclk_ramp_margin_percent = 1,
+ .underscan_factor = 1.10,
+ .min_vblank_lines = 32, //
+ .dppclk_delay_subtotal = 77, //
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_cnvc_formatter = 8,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 87, //
+ .dcfclk_cstate_latency = 10, // SRExitTime
+ .max_inter_dcn_tile_repeaters = 8,
+ .xfc_supported = true,
+ .xfc_fill_bw_overhead_percent = 10.0,
+ .xfc_fill_constant_bytes = 0,
+ .number_of_cursors = 1,
+};
+
+struct _vcs_dpi_ip_params_st dcn2_0_nv14_ip = {
+ .odm_capable = 1,
+ .gpuvm_enable = 0,
+ .hostvm_enable = 0,
+ .gpuvm_max_page_table_levels = 4,
+ .hostvm_max_page_table_levels = 4,
+ .hostvm_cached_page_table_levels = 0,
+ .num_dsc = 5,
+ .rob_buffer_size_kbytes = 168,
+ .det_buffer_size_kbytes = 164,
+ .dpte_buffer_size_in_pte_reqs_luma = 84,
+ .dpte_buffer_size_in_pte_reqs_chroma = 42,//todo
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .pte_enable = 1,
+ .max_page_table_levels = 4,
+ .pte_chunk_size_kbytes = 2,
+ .meta_chunk_size_kbytes = 2,
+ .writeback_chunk_size_kbytes = 2,
+ .line_buffer_size_bits = 789504,
+ .is_line_buffer_bpp_fixed = 0,
+ .line_buffer_fixed_bpp = 0,
+ .dcc_supported = true,
+ .max_line_buffer_lines = 12,
+ .writeback_luma_buffer_size_kbytes = 12,
+ .writeback_chroma_buffer_size_kbytes = 8,
+ .writeback_chroma_line_buffer_width_pixels = 4,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 12,
+ .writeback_max_vscl_taps = 12,
+ .writeback_line_buffer_luma_buffer_size = 0,
+ .writeback_line_buffer_chroma_buffer_size = 14643,
+ .cursor_buffer_size = 8,
+ .cursor_chunk_size = 2,
+ .max_num_otg = 5,
+ .max_num_dpp = 5,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 8,
+ .max_vscl_ratio = 8,
+ .hscl_mults = 4,
+ .vscl_mults = 4,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dispclk_ramp_margin_percent = 1,
+ .underscan_factor = 1.10,
+ .min_vblank_lines = 32, //
+ .dppclk_delay_subtotal = 77, //
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_cnvc_formatter = 8,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 87, //
+ .dcfclk_cstate_latency = 10, // SRExitTime
+ .max_inter_dcn_tile_repeaters = 8,
+ .xfc_supported = true,
+ .xfc_fill_bw_overhead_percent = 10.0,
+ .xfc_fill_constant_bytes = 0,
+ .ptoi_supported = 0,
+ .number_of_cursors = 1,
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn2_0_soc = {
+ /* Defaults that get patched on driver load from firmware. */
+ .clock_limits = {
+ {
+ .state = 0,
+ .dcfclk_mhz = 560.0,
+ .fabricclk_mhz = 560.0,
+ .dispclk_mhz = 513.0,
+ .dppclk_mhz = 513.0,
+ .phyclk_mhz = 540.0,
+ .socclk_mhz = 560.0,
+ .dscclk_mhz = 171.0,
+ .dram_speed_mts = 8960.0,
+ },
+ {
+ .state = 1,
+ .dcfclk_mhz = 694.0,
+ .fabricclk_mhz = 694.0,
+ .dispclk_mhz = 642.0,
+ .dppclk_mhz = 642.0,
+ .phyclk_mhz = 600.0,
+ .socclk_mhz = 694.0,
+ .dscclk_mhz = 214.0,
+ .dram_speed_mts = 11104.0,
+ },
+ {
+ .state = 2,
+ .dcfclk_mhz = 875.0,
+ .fabricclk_mhz = 875.0,
+ .dispclk_mhz = 734.0,
+ .dppclk_mhz = 734.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 875.0,
+ .dscclk_mhz = 245.0,
+ .dram_speed_mts = 14000.0,
+ },
+ {
+ .state = 3,
+ .dcfclk_mhz = 1000.0,
+ .fabricclk_mhz = 1000.0,
+ .dispclk_mhz = 1100.0,
+ .dppclk_mhz = 1100.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 1000.0,
+ .dscclk_mhz = 367.0,
+ .dram_speed_mts = 16000.0,
+ },
+ {
+ .state = 4,
+ .dcfclk_mhz = 1200.0,
+ .fabricclk_mhz = 1200.0,
+ .dispclk_mhz = 1284.0,
+ .dppclk_mhz = 1284.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 1200.0,
+ .dscclk_mhz = 428.0,
+ .dram_speed_mts = 16000.0,
+ },
+ /*Extra state, no dispclk ramping*/
+ {
+ .state = 5,
+ .dcfclk_mhz = 1200.0,
+ .fabricclk_mhz = 1200.0,
+ .dispclk_mhz = 1284.0,
+ .dppclk_mhz = 1284.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 1200.0,
+ .dscclk_mhz = 428.0,
+ .dram_speed_mts = 16000.0,
+ },
+ },
+ .num_states = 5,
+ .sr_exit_time_us = 8.6,
+ .sr_enter_plus_exit_time_us = 10.9,
+ .urgent_latency_us = 4.0,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 40.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 40.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
+ .max_avg_sdp_bw_use_normal_percent = 40.0,
+ .max_avg_dram_bw_use_normal_percent = 40.0,
+ .writeback_latency_us = 12.0,
+ .ideal_dram_bw_after_urgent_percent = 40.0,
+ .max_request_size_bytes = 256,
+ .dram_channel_width_bytes = 2,
+ .fabric_datapath_to_dcn_data_return_bytes = 64,
+ .dcn_downspread_percent = 0.5,
+ .downspread_percent = 0.38,
+ .dram_page_open_time_ns = 50.0,
+ .dram_rw_turnaround_time_ns = 17.5,
+ .dram_return_buffer_per_channel_bytes = 8192,
+ .round_trip_ping_latency_dcfclk_cycles = 131,
+ .urgent_out_of_order_return_per_channel_bytes = 256,
+ .channel_interleave_bytes = 256,
+ .num_banks = 8,
+ .num_chans = 16,
+ .vmm_page_size_bytes = 4096,
+ .dram_clock_change_latency_us = 404.0,
+ .dummy_pstate_latency_us = 5.0,
+ .writeback_dram_clock_change_latency_us = 23.0,
+ .return_bus_width_bytes = 64,
+ .dispclk_dppclk_vco_speed_mhz = 3850,
+ .xfc_bus_transport_time_us = 20,
+ .xfc_xbuf_latency_tolerance_us = 4,
+ .use_urgent_burst_bw = 0
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv14_soc = {
+ .clock_limits = {
+ {
+ .state = 0,
+ .dcfclk_mhz = 560.0,
+ .fabricclk_mhz = 560.0,
+ .dispclk_mhz = 513.0,
+ .dppclk_mhz = 513.0,
+ .phyclk_mhz = 540.0,
+ .socclk_mhz = 560.0,
+ .dscclk_mhz = 171.0,
+ .dram_speed_mts = 8960.0,
+ },
+ {
+ .state = 1,
+ .dcfclk_mhz = 694.0,
+ .fabricclk_mhz = 694.0,
+ .dispclk_mhz = 642.0,
+ .dppclk_mhz = 642.0,
+ .phyclk_mhz = 600.0,
+ .socclk_mhz = 694.0,
+ .dscclk_mhz = 214.0,
+ .dram_speed_mts = 11104.0,
+ },
+ {
+ .state = 2,
+ .dcfclk_mhz = 875.0,
+ .fabricclk_mhz = 875.0,
+ .dispclk_mhz = 734.0,
+ .dppclk_mhz = 734.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 875.0,
+ .dscclk_mhz = 245.0,
+ .dram_speed_mts = 14000.0,
+ },
+ {
+ .state = 3,
+ .dcfclk_mhz = 1000.0,
+ .fabricclk_mhz = 1000.0,
+ .dispclk_mhz = 1100.0,
+ .dppclk_mhz = 1100.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 1000.0,
+ .dscclk_mhz = 367.0,
+ .dram_speed_mts = 16000.0,
+ },
+ {
+ .state = 4,
+ .dcfclk_mhz = 1200.0,
+ .fabricclk_mhz = 1200.0,
+ .dispclk_mhz = 1284.0,
+ .dppclk_mhz = 1284.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 1200.0,
+ .dscclk_mhz = 428.0,
+ .dram_speed_mts = 16000.0,
+ },
+ /*Extra state, no dispclk ramping*/
+ {
+ .state = 5,
+ .dcfclk_mhz = 1200.0,
+ .fabricclk_mhz = 1200.0,
+ .dispclk_mhz = 1284.0,
+ .dppclk_mhz = 1284.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 1200.0,
+ .dscclk_mhz = 428.0,
+ .dram_speed_mts = 16000.0,
+ },
+ },
+ .num_states = 5,
+ .sr_exit_time_us = 11.6,
+ .sr_enter_plus_exit_time_us = 13.9,
+ .urgent_latency_us = 4.0,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 40.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 40.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
+ .max_avg_sdp_bw_use_normal_percent = 40.0,
+ .max_avg_dram_bw_use_normal_percent = 40.0,
+ .writeback_latency_us = 12.0,
+ .ideal_dram_bw_after_urgent_percent = 40.0,
+ .max_request_size_bytes = 256,
+ .dram_channel_width_bytes = 2,
+ .fabric_datapath_to_dcn_data_return_bytes = 64,
+ .dcn_downspread_percent = 0.5,
+ .downspread_percent = 0.38,
+ .dram_page_open_time_ns = 50.0,
+ .dram_rw_turnaround_time_ns = 17.5,
+ .dram_return_buffer_per_channel_bytes = 8192,
+ .round_trip_ping_latency_dcfclk_cycles = 131,
+ .urgent_out_of_order_return_per_channel_bytes = 256,
+ .channel_interleave_bytes = 256,
+ .num_banks = 8,
+ .num_chans = 8,
+ .vmm_page_size_bytes = 4096,
+ .dram_clock_change_latency_us = 404.0,
+ .dummy_pstate_latency_us = 5.0,
+ .writeback_dram_clock_change_latency_us = 23.0,
+ .return_bus_width_bytes = 64,
+ .dispclk_dppclk_vco_speed_mhz = 3850,
+ .xfc_bus_transport_time_us = 20,
+ .xfc_xbuf_latency_tolerance_us = 4,
+ .use_urgent_burst_bw = 0
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv12_soc = {
+ .clock_limits = {
+ {
+ .state = 0,
+ .dcfclk_mhz = 560.0,
+ .fabricclk_mhz = 560.0,
+ .dispclk_mhz = 513.0,
+ .dppclk_mhz = 513.0,
+ .phyclk_mhz = 540.0,
+ .socclk_mhz = 560.0,
+ .dscclk_mhz = 171.0,
+ .dram_speed_mts = 1069.0,
+ },
+ {
+ .state = 1,
+ .dcfclk_mhz = 694.0,
+ .fabricclk_mhz = 694.0,
+ .dispclk_mhz = 642.0,
+ .dppclk_mhz = 642.0,
+ .phyclk_mhz = 600.0,
+ .socclk_mhz = 694.0,
+ .dscclk_mhz = 214.0,
+ .dram_speed_mts = 1324.0,
+ },
+ {
+ .state = 2,
+ .dcfclk_mhz = 875.0,
+ .fabricclk_mhz = 875.0,
+ .dispclk_mhz = 734.0,
+ .dppclk_mhz = 734.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 875.0,
+ .dscclk_mhz = 245.0,
+ .dram_speed_mts = 1670.0,
+ },
+ {
+ .state = 3,
+ .dcfclk_mhz = 1000.0,
+ .fabricclk_mhz = 1000.0,
+ .dispclk_mhz = 1100.0,
+ .dppclk_mhz = 1100.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 1000.0,
+ .dscclk_mhz = 367.0,
+ .dram_speed_mts = 2000.0,
+ },
+ {
+ .state = 4,
+ .dcfclk_mhz = 1200.0,
+ .fabricclk_mhz = 1200.0,
+ .dispclk_mhz = 1284.0,
+ .dppclk_mhz = 1284.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 1200.0,
+ .dscclk_mhz = 428.0,
+ .dram_speed_mts = 2000.0,
+ },
+ {
+ .state = 5,
+ .dcfclk_mhz = 1200.0,
+ .fabricclk_mhz = 1200.0,
+ .dispclk_mhz = 1284.0,
+ .dppclk_mhz = 1284.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 1200.0,
+ .dscclk_mhz = 428.0,
+ .dram_speed_mts = 2000.0,
+ },
+ },
+
+ .num_states = 5,
+ .sr_exit_time_us = 1.9,
+ .sr_enter_plus_exit_time_us = 4.4,
+ .urgent_latency_us = 3.0,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 40.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 40.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
+ .max_avg_sdp_bw_use_normal_percent = 40.0,
+ .max_avg_dram_bw_use_normal_percent = 40.0,
+ .writeback_latency_us = 12.0,
+ .ideal_dram_bw_after_urgent_percent = 40.0,
+ .max_request_size_bytes = 256,
+ .dram_channel_width_bytes = 16,
+ .fabric_datapath_to_dcn_data_return_bytes = 64,
+ .dcn_downspread_percent = 0.5,
+ .downspread_percent = 0.5,
+ .dram_page_open_time_ns = 50.0,
+ .dram_rw_turnaround_time_ns = 17.5,
+ .dram_return_buffer_per_channel_bytes = 8192,
+ .round_trip_ping_latency_dcfclk_cycles = 131,
+ .urgent_out_of_order_return_per_channel_bytes = 4096,
+ .channel_interleave_bytes = 256,
+ .num_banks = 8,
+ .num_chans = 16,
+ .vmm_page_size_bytes = 4096,
+ .dram_clock_change_latency_us = 45.0,
+ .writeback_dram_clock_change_latency_us = 23.0,
+ .return_bus_width_bytes = 64,
+ .dispclk_dppclk_vco_speed_mhz = 3850,
+ .xfc_bus_transport_time_us = 20,
+ .xfc_xbuf_latency_tolerance_us = 50,
+ .use_urgent_burst_bw = 0,
+};
+
+struct _vcs_dpi_ip_params_st dcn2_1_ip = {
+ .odm_capable = 1,
+ .gpuvm_enable = 1,
+ .hostvm_enable = 1,
+ .gpuvm_max_page_table_levels = 1,
+ .hostvm_max_page_table_levels = 4,
+ .hostvm_cached_page_table_levels = 2,
+ .num_dsc = 3,
+ .rob_buffer_size_kbytes = 168,
+ .det_buffer_size_kbytes = 164,
+ .dpte_buffer_size_in_pte_reqs_luma = 44,
+ .dpte_buffer_size_in_pte_reqs_chroma = 42,//todo
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .pte_enable = 1,
+ .max_page_table_levels = 4,
+ .pte_chunk_size_kbytes = 2,
+ .meta_chunk_size_kbytes = 2,
+ .min_meta_chunk_size_bytes = 256,
+ .writeback_chunk_size_kbytes = 2,
+ .line_buffer_size_bits = 789504,
+ .is_line_buffer_bpp_fixed = 0,
+ .line_buffer_fixed_bpp = 0,
+ .dcc_supported = true,
+ .max_line_buffer_lines = 12,
+ .writeback_luma_buffer_size_kbytes = 12,
+ .writeback_chroma_buffer_size_kbytes = 8,
+ .writeback_chroma_line_buffer_width_pixels = 4,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 12,
+ .writeback_max_vscl_taps = 12,
+ .writeback_line_buffer_luma_buffer_size = 0,
+ .writeback_line_buffer_chroma_buffer_size = 14643,
+ .cursor_buffer_size = 8,
+ .cursor_chunk_size = 2,
+ .max_num_otg = 4,
+ .max_num_dpp = 4,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 4,
+ .max_vscl_ratio = 4,
+ .hscl_mults = 4,
+ .vscl_mults = 4,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dispclk_ramp_margin_percent = 1,
+ .underscan_factor = 1.10,
+ .min_vblank_lines = 32, //
+ .dppclk_delay_subtotal = 77, //
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_cnvc_formatter = 8,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 87, //
+ .dcfclk_cstate_latency = 10, // SRExitTime
+ .max_inter_dcn_tile_repeaters = 8,
+
+ .xfc_supported = false,
+ .xfc_fill_bw_overhead_percent = 10.0,
+ .xfc_fill_constant_bytes = 0,
+ .ptoi_supported = 0,
+ .number_of_cursors = 1,
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = {
+ .clock_limits = {
+ {
+ .state = 0,
+ .dcfclk_mhz = 400.0,
+ .fabricclk_mhz = 400.0,
+ .dispclk_mhz = 600.0,
+ .dppclk_mhz = 400.00,
+ .phyclk_mhz = 600.0,
+ .socclk_mhz = 278.0,
+ .dscclk_mhz = 205.67,
+ .dram_speed_mts = 1600.0,
+ },
+ {
+ .state = 1,
+ .dcfclk_mhz = 464.52,
+ .fabricclk_mhz = 800.0,
+ .dispclk_mhz = 654.55,
+ .dppclk_mhz = 626.09,
+ .phyclk_mhz = 600.0,
+ .socclk_mhz = 278.0,
+ .dscclk_mhz = 205.67,
+ .dram_speed_mts = 1600.0,
+ },
+ {
+ .state = 2,
+ .dcfclk_mhz = 514.29,
+ .fabricclk_mhz = 933.0,
+ .dispclk_mhz = 757.89,
+ .dppclk_mhz = 685.71,
+ .phyclk_mhz = 600.0,
+ .socclk_mhz = 278.0,
+ .dscclk_mhz = 287.67,
+ .dram_speed_mts = 1866.0,
+ },
+ {
+ .state = 3,
+ .dcfclk_mhz = 576.00,
+ .fabricclk_mhz = 1067.0,
+ .dispclk_mhz = 847.06,
+ .dppclk_mhz = 757.89,
+ .phyclk_mhz = 600.0,
+ .socclk_mhz = 715.0,
+ .dscclk_mhz = 318.334,
+ .dram_speed_mts = 2134.0,
+ },
+ {
+ .state = 4,
+ .dcfclk_mhz = 626.09,
+ .fabricclk_mhz = 1200.0,
+ .dispclk_mhz = 900.00,
+ .dppclk_mhz = 847.06,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 953.0,
+ .dscclk_mhz = 300.0,
+ .dram_speed_mts = 2400.0,
+ },
+ {
+ .state = 5,
+ .dcfclk_mhz = 685.71,
+ .fabricclk_mhz = 1333.0,
+ .dispclk_mhz = 1028.57,
+ .dppclk_mhz = 960.00,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 278.0,
+ .dscclk_mhz = 342.86,
+ .dram_speed_mts = 2666.0,
+ },
+ {
+ .state = 6,
+ .dcfclk_mhz = 757.89,
+ .fabricclk_mhz = 1467.0,
+ .dispclk_mhz = 1107.69,
+ .dppclk_mhz = 1028.57,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 715.0,
+ .dscclk_mhz = 369.23,
+ .dram_speed_mts = 3200.0,
+ },
+ {
+ .state = 7,
+ .dcfclk_mhz = 847.06,
+ .fabricclk_mhz = 1600.0,
+ .dispclk_mhz = 1395.0,
+ .dppclk_mhz = 1285.00,
+ .phyclk_mhz = 1325.0,
+ .socclk_mhz = 953.0,
+ .dscclk_mhz = 489.0,
+ .dram_speed_mts = 4266.0,
+ },
+ /*Extra state, no dispclk ramping*/
+ {
+ .state = 8,
+ .dcfclk_mhz = 847.06,
+ .fabricclk_mhz = 1600.0,
+ .dispclk_mhz = 1395.0,
+ .dppclk_mhz = 1285.0,
+ .phyclk_mhz = 1325.0,
+ .socclk_mhz = 953.0,
+ .dscclk_mhz = 489.0,
+ .dram_speed_mts = 4266.0,
+ },
+
+ },
+
+ .sr_exit_time_us = 12.5,
+ .sr_enter_plus_exit_time_us = 17.0,
+ .urgent_latency_us = 4.0,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 75.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
+ .max_avg_sdp_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_percent = 100.0,
+ .writeback_latency_us = 12.0,
+ .max_request_size_bytes = 256,
+ .dram_channel_width_bytes = 4,
+ .fabric_datapath_to_dcn_data_return_bytes = 32,
+ .dcn_downspread_percent = 0.5,
+ .downspread_percent = 0.38,
+ .dram_page_open_time_ns = 50.0,
+ .dram_rw_turnaround_time_ns = 17.5,
+ .dram_return_buffer_per_channel_bytes = 8192,
+ .round_trip_ping_latency_dcfclk_cycles = 128,
+ .urgent_out_of_order_return_per_channel_bytes = 4096,
+ .channel_interleave_bytes = 256,
+ .num_banks = 8,
+ .num_chans = 4,
+ .vmm_page_size_bytes = 4096,
+ .dram_clock_change_latency_us = 23.84,
+ .return_bus_width_bytes = 64,
+ .dispclk_dppclk_vco_speed_mhz = 3600,
+ .xfc_bus_transport_time_us = 4,
+ .xfc_xbuf_latency_tolerance_us = 4,
+ .use_urgent_burst_bw = 1,
+ .num_states = 8
+};
+
+struct wm_table ddr4_wm_table_gs = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 7.09,
+ .sr_enter_plus_exit_time_us = 8.14,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 10.12,
+ .sr_enter_plus_exit_time_us = 11.48,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 10.12,
+ .sr_enter_plus_exit_time_us = 11.48,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 10.12,
+ .sr_enter_plus_exit_time_us = 11.48,
+ .valid = true,
+ },
+ }
+};
+
+struct wm_table lpddr4_wm_table_gs = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 5.32,
+ .sr_enter_plus_exit_time_us = 6.38,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 9.82,
+ .sr_enter_plus_exit_time_us = 11.196,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 9.89,
+ .sr_enter_plus_exit_time_us = 11.24,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 9.748,
+ .sr_enter_plus_exit_time_us = 11.102,
+ .valid = true,
+ },
+ }
+};
+
+struct wm_table lpddr4_wm_table_with_disabled_ppt = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 8.32,
+ .sr_enter_plus_exit_time_us = 9.38,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 9.82,
+ .sr_enter_plus_exit_time_us = 11.196,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 9.89,
+ .sr_enter_plus_exit_time_us = 11.24,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 9.748,
+ .sr_enter_plus_exit_time_us = 11.102,
+ .valid = true,
+ },
+ }
+};
+
+struct wm_table ddr4_wm_table_rn = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 11.90,
+ .sr_enter_plus_exit_time_us = 12.80,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 13.18,
+ .sr_enter_plus_exit_time_us = 14.30,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 13.18,
+ .sr_enter_plus_exit_time_us = 14.30,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 13.18,
+ .sr_enter_plus_exit_time_us = 14.30,
+ .valid = true,
+ },
+ }
+};
+
+struct wm_table ddr4_1R_wm_table_rn = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 13.90,
+ .sr_enter_plus_exit_time_us = 14.80,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 13.90,
+ .sr_enter_plus_exit_time_us = 14.80,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 13.90,
+ .sr_enter_plus_exit_time_us = 14.80,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 13.90,
+ .sr_enter_plus_exit_time_us = 14.80,
+ .valid = true,
+ },
+ }
+};
+
+struct wm_table lpddr4_wm_table_rn = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 7.32,
+ .sr_enter_plus_exit_time_us = 8.38,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 9.82,
+ .sr_enter_plus_exit_time_us = 11.196,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 9.89,
+ .sr_enter_plus_exit_time_us = 11.24,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 9.748,
+ .sr_enter_plus_exit_time_us = 11.102,
+ .valid = true,
+ },
+ }
+};
+
void dcn20_populate_dml_writeback_from_context(struct dc *dc,
struct resource_context *res_ctx,
display_e2e_pipe_params_st *pipes)
@@ -100,3 +1025,1535 @@ void dcn20_populate_dml_writeback_from_context(struct dc *dc,
pipe_cnt++;
}
}
+
+void dcn20_fpu_set_wb_arb_params(struct mcif_arb_params *wb_arb_params,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt, int i)
+{
+ int k;
+
+ dc_assert_fp_enabled();
+
+ for (k = 0; k < sizeof(wb_arb_params->cli_watermark)/sizeof(wb_arb_params->cli_watermark[0]); k++) {
+ wb_arb_params->cli_watermark[k] = get_wm_writeback_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ wb_arb_params->pstate_watermark[k] = get_wm_writeback_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ }
+ wb_arb_params->time_per_pixel = 16.0 * 1000 / (context->res_ctx.pipe_ctx[i].stream->phy_pix_clk / 1000); /* 4 bit fraction, ms */
+}
+
+static bool is_dtbclk_required(struct dc *dc, struct dc_state *context)
+{
+ int i;
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+ if (dc->link_srv->dp_is_128b_132b_signal(&context->res_ctx.pipe_ctx[i]))
+ return true;
+ }
+ return false;
+}
+
+static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struct dc_state *context)
+{
+ int plane_count;
+ int i;
+
+ plane_count = 0;
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (context->res_ctx.pipe_ctx[i].plane_state)
+ plane_count++;
+ }
+
+ /*
+ * Z9 and Z10 allowed cases:
+ * 1. 0 Planes enabled
+ * 2. single eDP, on link 0, 1 plane and stutter period > 5ms
+ * Z10 only cases:
+ * 1. single eDP, on link 0, 1 plane and stutter period >= 5ms
+ * Z8 cases:
+ * 1. stutter period sufficient
+ * Zstate not allowed cases:
+ * 1. Everything else
+ */
+ if (plane_count == 0)
+ return DCN_ZSTATE_SUPPORT_ALLOW;
+ else if (context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) {
+ struct dc_link *link = context->streams[0]->sink->link;
+ struct dc_stream_status *stream_status = &context->stream_status[0];
+ int minmum_z8_residency = dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000;
+ bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency;
+ bool is_pwrseq0 = (link && link->link_index == 0);
+ bool is_psr = (link && (link->psr_settings.psr_version == DC_PSR_VERSION_1 ||
+ link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) && !link->panel_config.psr.disable_psr);
+ bool is_replay = link && link->replay_settings.replay_feature_enabled;
+
+ /* Don't support multi-plane configurations */
+ if (stream_status->plane_count > 1)
+ return DCN_ZSTATE_SUPPORT_DISALLOW;
+
+ if (is_pwrseq0 && context->bw_ctx.dml.vba.StutterPeriod > 5000.0)
+ return DCN_ZSTATE_SUPPORT_ALLOW;
+ else if (is_pwrseq0 && (is_psr || is_replay))
+ return DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY;
+ else
+ return allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY : DCN_ZSTATE_SUPPORT_DISALLOW;
+ } else {
+ return DCN_ZSTATE_SUPPORT_DISALLOW;
+ }
+}
+
+static void dcn20_adjust_freesync_v_startup(
+ const struct dc_crtc_timing *dc_crtc_timing, int *vstartup_start)
+{
+ struct dc_crtc_timing patched_crtc_timing;
+ uint32_t asic_blank_end = 0;
+ uint32_t asic_blank_start = 0;
+ uint32_t newVstartup = 0;
+
+ patched_crtc_timing = *dc_crtc_timing;
+
+ if (patched_crtc_timing.flags.INTERLACE == 1) {
+ if (patched_crtc_timing.v_front_porch < 2)
+ patched_crtc_timing.v_front_porch = 2;
+ } else {
+ if (patched_crtc_timing.v_front_porch < 1)
+ patched_crtc_timing.v_front_porch = 1;
+ }
+
+ /* blank_start = frame end - front porch */
+ asic_blank_start = patched_crtc_timing.v_total -
+ patched_crtc_timing.v_front_porch;
+
+ /* blank_end = blank_start - active */
+ asic_blank_end = asic_blank_start -
+ patched_crtc_timing.v_border_bottom -
+ patched_crtc_timing.v_addressable -
+ patched_crtc_timing.v_border_top;
+
+ /* The newVStartUp is 1 line before vsync point */
+ newVstartup = asic_blank_end + 1;
+
+ *vstartup_start = ((newVstartup > *vstartup_start) ? newVstartup : *vstartup_start);
+}
+
+void dcn20_calculate_dlg_params(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel)
+{
+ int i, pipe_idx, active_hubp_count = 0;
+
+ dc_assert_fp_enabled();
+
+ /* Writeback MCIF_WB arbitration parameters */
+ dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt);
+
+ context->bw_ctx.bw.dcn.clk.dispclk_khz = context->bw_ctx.dml.vba.DISPCLK * 1000;
+ context->bw_ctx.bw.dcn.clk.dcfclk_khz = context->bw_ctx.dml.vba.DCFCLK * 1000;
+ context->bw_ctx.bw.dcn.clk.socclk_khz = context->bw_ctx.dml.vba.SOCCLK * 1000;
+ context->bw_ctx.bw.dcn.clk.dramclk_khz = context->bw_ctx.dml.vba.DRAMSpeed * 1000 / 16;
+
+ if (dc->debug.min_dram_clk_khz > context->bw_ctx.bw.dcn.clk.dramclk_khz)
+ context->bw_ctx.bw.dcn.clk.dramclk_khz = dc->debug.min_dram_clk_khz;
+
+ context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = context->bw_ctx.dml.vba.DCFCLKDeepSleep * 1000;
+ context->bw_ctx.bw.dcn.clk.fclk_khz = context->bw_ctx.dml.vba.FabricClock * 1000;
+ context->bw_ctx.bw.dcn.clk.p_state_change_support =
+ context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb]
+ != dm_dram_clock_change_unsupported;
+
+ /* Pstate change might not be supported by hardware, but it might be
+ * possible with firmware driven vertical blank stretching.
+ */
+ context->bw_ctx.bw.dcn.clk.p_state_change_support |= context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching;
+
+ context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;
+
+ context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context);
+
+ if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz)
+ context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz;
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+ if (context->res_ctx.pipe_ctx[i].plane_state)
+ active_hubp_count++;
+ pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+ pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+ pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+ pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+
+ if (dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM) {
+ // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests
+ context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0;
+ context->res_ctx.pipe_ctx[i].unbounded_req = false;
+ } else {
+ context->res_ctx.pipe_ctx[i].det_buffer_size_kb = context->bw_ctx.dml.ip.det_buffer_size_kbytes;
+ context->res_ctx.pipe_ctx[i].unbounded_req = pipes[pipe_idx].pipe.src.unbounded_req_mode;
+ }
+
+ if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
+ context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
+ context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz =
+ pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
+ context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest;
+ if (dc->ctx->dce_version < DCN_VERSION_3_1 &&
+ context->res_ctx.pipe_ctx[i].stream->adaptive_sync_infopacket.valid)
+ dcn20_adjust_freesync_v_startup(
+ &context->res_ctx.pipe_ctx[i].stream->timing,
+ &context->res_ctx.pipe_ctx[i].pipe_dlg_param.vstartup_start);
+
+ pipe_idx++;
+ }
+ /* If DCN isn't making memory requests we can allow pstate change */
+ if (!active_hubp_count) {
+ context->bw_ctx.bw.dcn.clk.p_state_change_support = true;
+ }
+ /*save a original dppclock copy*/
+ context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz;
+ context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz;
+ context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dppclk_mhz * 1000;
+ context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dispclk_mhz * 1000;
+
+ context->bw_ctx.bw.dcn.compbuf_size_kb = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes
+ - context->bw_ctx.dml.ip.det_buffer_size_kbytes * pipe_idx;
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ bool cstate_en = context->bw_ctx.dml.vba.PrefetchMode[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != 2;
+
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+
+ /* cstate disabled on 201 */
+ if (dc->ctx->dce_version == DCN_VERSION_2_01)
+ cstate_en = false;
+
+ context->bw_ctx.dml.funcs.rq_dlg_get_dlg_reg(&context->bw_ctx.dml,
+ &context->res_ctx.pipe_ctx[i].dlg_regs,
+ &context->res_ctx.pipe_ctx[i].ttu_regs,
+ pipes,
+ pipe_cnt,
+ pipe_idx,
+ cstate_en,
+ context->bw_ctx.bw.dcn.clk.p_state_change_support,
+ false, false, true);
+
+ context->bw_ctx.dml.funcs.rq_dlg_get_rq_reg(&context->bw_ctx.dml,
+ &context->res_ctx.pipe_ctx[i].rq_regs,
+ &pipes[pipe_idx].pipe);
+ pipe_idx++;
+ }
+ context->bw_ctx.bw.dcn.clk.zstate_support = decide_zstate_support(dc, context);
+}
+
+static void swizzle_to_dml_params(
+ enum swizzle_mode_values swizzle,
+ unsigned int *sw_mode)
+{
+ switch (swizzle) {
+ case DC_SW_LINEAR:
+ *sw_mode = dm_sw_linear;
+ break;
+ case DC_SW_4KB_S:
+ *sw_mode = dm_sw_4kb_s;
+ break;
+ case DC_SW_4KB_S_X:
+ *sw_mode = dm_sw_4kb_s_x;
+ break;
+ case DC_SW_4KB_D:
+ *sw_mode = dm_sw_4kb_d;
+ break;
+ case DC_SW_4KB_D_X:
+ *sw_mode = dm_sw_4kb_d_x;
+ break;
+ case DC_SW_64KB_S:
+ *sw_mode = dm_sw_64kb_s;
+ break;
+ case DC_SW_64KB_S_X:
+ *sw_mode = dm_sw_64kb_s_x;
+ break;
+ case DC_SW_64KB_S_T:
+ *sw_mode = dm_sw_64kb_s_t;
+ break;
+ case DC_SW_64KB_D:
+ *sw_mode = dm_sw_64kb_d;
+ break;
+ case DC_SW_64KB_D_X:
+ *sw_mode = dm_sw_64kb_d_x;
+ break;
+ case DC_SW_64KB_D_T:
+ *sw_mode = dm_sw_64kb_d_t;
+ break;
+ case DC_SW_64KB_R_X:
+ *sw_mode = dm_sw_64kb_r_x;
+ break;
+ case DC_SW_VAR_S:
+ *sw_mode = dm_sw_var_s;
+ break;
+ case DC_SW_VAR_S_X:
+ *sw_mode = dm_sw_var_s_x;
+ break;
+ case DC_SW_VAR_D:
+ *sw_mode = dm_sw_var_d;
+ break;
+ case DC_SW_VAR_D_X:
+ *sw_mode = dm_sw_var_d_x;
+ break;
+ case DC_SW_VAR_R_X:
+ *sw_mode = dm_sw_var_r_x;
+ break;
+ default:
+ ASSERT(0); /* Not supported */
+ break;
+ }
+}
+
+int dcn20_populate_dml_pipes_from_context(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ enum dc_validate_mode validate_mode)
+{
+ int pipe_cnt, i;
+ bool synchronized_vblank = true;
+ struct resource_context *res_ctx = &context->res_ctx;
+
+ dc_assert_fp_enabled();
+
+ for (i = 0, pipe_cnt = -1; i < dc->res_pool->pipe_count; i++) {
+ if (!res_ctx->pipe_ctx[i].stream)
+ continue;
+
+ if (pipe_cnt < 0) {
+ pipe_cnt = i;
+ continue;
+ }
+
+ if (res_ctx->pipe_ctx[pipe_cnt].stream == res_ctx->pipe_ctx[i].stream)
+ continue;
+
+ if (dc->debug.disable_timing_sync ||
+ (!resource_are_streams_timing_synchronizable(
+ res_ctx->pipe_ctx[pipe_cnt].stream,
+ res_ctx->pipe_ctx[i].stream) &&
+ !resource_are_vblanks_synchronizable(
+ res_ctx->pipe_ctx[pipe_cnt].stream,
+ res_ctx->pipe_ctx[i].stream))) {
+ synchronized_vblank = false;
+ break;
+ }
+ }
+
+ for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+ struct dc_crtc_timing *timing = &res_ctx->pipe_ctx[i].stream->timing;
+ unsigned int v_total;
+ unsigned int front_porch;
+ int output_bpc;
+ struct audio_check aud_check = {0};
+
+ if (!res_ctx->pipe_ctx[i].stream)
+ continue;
+
+ v_total = timing->v_total;
+ front_porch = timing->v_front_porch;
+
+ /* todo:
+ pipes[pipe_cnt].pipe.src.dynamic_metadata_enable = 0;
+ pipes[pipe_cnt].pipe.src.dcc = 0;
+ pipes[pipe_cnt].pipe.src.vm = 0;*/
+
+ pipes[pipe_cnt].clks_cfg.refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0;
+
+ pipes[pipe_cnt].pipe.dest.use_maximum_vstartup = dc->ctx->dce_version == DCN_VERSION_2_01;
+
+ pipes[pipe_cnt].dout.dsc_enable = res_ctx->pipe_ctx[i].stream->timing.flags.DSC;
+ /* todo: rotation?*/
+ pipes[pipe_cnt].dout.dsc_slices = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.num_slices_h;
+ if (res_ctx->pipe_ctx[i].stream->use_dynamic_meta) {
+ pipes[pipe_cnt].pipe.src.dynamic_metadata_enable = true;
+ /* 1/2 vblank */
+ pipes[pipe_cnt].pipe.src.dynamic_metadata_lines_before_active =
+ (v_total - timing->v_addressable
+ - timing->v_border_top - timing->v_border_bottom) / 2;
+ /* 36 bytes dp, 32 hdmi */
+ pipes[pipe_cnt].pipe.src.dynamic_metadata_xmit_bytes =
+ dc_is_dp_signal(res_ctx->pipe_ctx[i].stream->signal) ? 36 : 32;
+ }
+ pipes[pipe_cnt].pipe.src.dcc = false;
+ pipes[pipe_cnt].pipe.src.dcc_rate = 1;
+ pipes[pipe_cnt].pipe.dest.synchronized_vblank_all_planes = synchronized_vblank;
+ pipes[pipe_cnt].pipe.dest.synchronize_timings = synchronized_vblank;
+ pipes[pipe_cnt].pipe.dest.hblank_start = timing->h_total - timing->h_front_porch;
+ pipes[pipe_cnt].pipe.dest.hblank_end = pipes[pipe_cnt].pipe.dest.hblank_start
+ - timing->h_addressable
+ - timing->h_border_left
+ - timing->h_border_right;
+ pipes[pipe_cnt].pipe.dest.vblank_start = v_total - front_porch;
+ pipes[pipe_cnt].pipe.dest.vblank_end = pipes[pipe_cnt].pipe.dest.vblank_start
+ - timing->v_addressable
+ - timing->v_border_top
+ - timing->v_border_bottom;
+ pipes[pipe_cnt].pipe.dest.htotal = timing->h_total;
+ pipes[pipe_cnt].pipe.dest.vtotal = v_total;
+ pipes[pipe_cnt].pipe.dest.hactive =
+ timing->h_addressable + timing->h_border_left + timing->h_border_right;
+ pipes[pipe_cnt].pipe.dest.vactive =
+ timing->v_addressable + timing->v_border_top + timing->v_border_bottom;
+ pipes[pipe_cnt].pipe.dest.interlaced = timing->flags.INTERLACE;
+ pipes[pipe_cnt].pipe.dest.pixel_rate_mhz = timing->pix_clk_100hz/10000.0;
+ if (timing->timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING)
+ pipes[pipe_cnt].pipe.dest.pixel_rate_mhz *= 2;
+ pipes[pipe_cnt].pipe.dest.otg_inst = res_ctx->pipe_ctx[i].stream_res.tg->inst;
+ pipes[pipe_cnt].dout.dp_lanes = 4;
+ pipes[pipe_cnt].dout.dp_rate = dm_dp_rate_na;
+ pipes[pipe_cnt].dout.is_virtual = 0;
+ pipes[pipe_cnt].pipe.dest.vtotal_min = res_ctx->pipe_ctx[i].stream->adjust.v_total_min;
+ pipes[pipe_cnt].pipe.dest.vtotal_max = res_ctx->pipe_ctx[i].stream->adjust.v_total_max;
+ switch (resource_get_odm_slice_count(&res_ctx->pipe_ctx[i])) {
+ case 2:
+ pipes[pipe_cnt].pipe.dest.odm_combine = dm_odm_combine_mode_2to1;
+ break;
+ case 4:
+ pipes[pipe_cnt].pipe.dest.odm_combine = dm_odm_combine_mode_4to1;
+ break;
+ default:
+ pipes[pipe_cnt].pipe.dest.odm_combine = dm_odm_combine_mode_disabled;
+ }
+ pipes[pipe_cnt].pipe.src.hsplit_grp = res_ctx->pipe_ctx[i].pipe_idx;
+ if (res_ctx->pipe_ctx[i].top_pipe && res_ctx->pipe_ctx[i].top_pipe->plane_state
+ == res_ctx->pipe_ctx[i].plane_state) {
+ struct pipe_ctx *first_pipe = res_ctx->pipe_ctx[i].top_pipe;
+ int split_idx = 0;
+
+ while (first_pipe->top_pipe && first_pipe->top_pipe->plane_state
+ == res_ctx->pipe_ctx[i].plane_state) {
+ first_pipe = first_pipe->top_pipe;
+ split_idx++;
+ }
+ /* Treat 4to1 mpc combine as an mpo of 2 2-to-1 combines */
+ if (split_idx == 0)
+ pipes[pipe_cnt].pipe.src.hsplit_grp = first_pipe->pipe_idx;
+ else if (split_idx == 1)
+ pipes[pipe_cnt].pipe.src.hsplit_grp = res_ctx->pipe_ctx[i].pipe_idx;
+ else if (split_idx == 2)
+ pipes[pipe_cnt].pipe.src.hsplit_grp = res_ctx->pipe_ctx[i].top_pipe->pipe_idx;
+ } else if (res_ctx->pipe_ctx[i].prev_odm_pipe) {
+ struct pipe_ctx *first_pipe = res_ctx->pipe_ctx[i].prev_odm_pipe;
+
+ while (first_pipe->prev_odm_pipe)
+ first_pipe = first_pipe->prev_odm_pipe;
+ pipes[pipe_cnt].pipe.src.hsplit_grp = first_pipe->pipe_idx;
+ }
+
+ switch (res_ctx->pipe_ctx[i].stream->signal) {
+ case SIGNAL_TYPE_DISPLAY_PORT_MST:
+ case SIGNAL_TYPE_DISPLAY_PORT:
+ pipes[pipe_cnt].dout.output_type = dm_dp;
+ if (dc->link_srv->dp_is_128b_132b_signal(&res_ctx->pipe_ctx[i]))
+ pipes[pipe_cnt].dout.output_type = dm_dp2p0;
+ break;
+ case SIGNAL_TYPE_EDP:
+ pipes[pipe_cnt].dout.output_type = dm_edp;
+ break;
+ case SIGNAL_TYPE_HDMI_TYPE_A:
+ case SIGNAL_TYPE_DVI_SINGLE_LINK:
+ case SIGNAL_TYPE_DVI_DUAL_LINK:
+ pipes[pipe_cnt].dout.output_type = dm_hdmi;
+ break;
+ default:
+ /* In case there is no signal, set dp with 4 lanes to allow max config */
+ pipes[pipe_cnt].dout.is_virtual = 1;
+ pipes[pipe_cnt].dout.output_type = dm_dp;
+ pipes[pipe_cnt].dout.dp_lanes = 4;
+ }
+
+ switch (res_ctx->pipe_ctx[i].stream->timing.display_color_depth) {
+ case COLOR_DEPTH_666:
+ output_bpc = 6;
+ break;
+ case COLOR_DEPTH_888:
+ output_bpc = 8;
+ break;
+ case COLOR_DEPTH_101010:
+ output_bpc = 10;
+ break;
+ case COLOR_DEPTH_121212:
+ output_bpc = 12;
+ break;
+ case COLOR_DEPTH_141414:
+ output_bpc = 14;
+ break;
+ case COLOR_DEPTH_161616:
+ output_bpc = 16;
+ break;
+ case COLOR_DEPTH_999:
+ output_bpc = 9;
+ break;
+ case COLOR_DEPTH_111111:
+ output_bpc = 11;
+ break;
+ default:
+ output_bpc = 8;
+ break;
+ }
+
+ switch (res_ctx->pipe_ctx[i].stream->timing.pixel_encoding) {
+ case PIXEL_ENCODING_RGB:
+ case PIXEL_ENCODING_YCBCR444:
+ pipes[pipe_cnt].dout.output_format = dm_444;
+ pipes[pipe_cnt].dout.output_bpp = output_bpc * 3;
+ break;
+ case PIXEL_ENCODING_YCBCR420:
+ pipes[pipe_cnt].dout.output_format = dm_420;
+ pipes[pipe_cnt].dout.output_bpp = (output_bpc * 3.0) / 2;
+ break;
+ case PIXEL_ENCODING_YCBCR422:
+ if (res_ctx->pipe_ctx[i].stream->timing.flags.DSC &&
+ !res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.ycbcr422_simple)
+ pipes[pipe_cnt].dout.output_format = dm_n422;
+ else
+ pipes[pipe_cnt].dout.output_format = dm_s422;
+ pipes[pipe_cnt].dout.output_bpp = output_bpc * 2;
+ break;
+ default:
+ pipes[pipe_cnt].dout.output_format = dm_444;
+ pipes[pipe_cnt].dout.output_bpp = output_bpc * 3;
+ }
+
+ if (res_ctx->pipe_ctx[i].stream->timing.flags.DSC)
+ pipes[pipe_cnt].dout.output_bpp = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.bits_per_pixel / 16.0;
+
+ /* todo: default max for now, until there is logic reflecting this in dc*/
+ pipes[pipe_cnt].dout.dsc_input_bpc = 12;
+ /*fill up the audio sample rate (unit in kHz)*/
+ get_audio_check(&res_ctx->pipe_ctx[i].stream->audio_info, &aud_check);
+ pipes[pipe_cnt].dout.max_audio_sample_rate = aud_check.max_audiosample_rate / 1000;
+ /*
+ * For graphic plane, cursor number is 1, nv12 is 0
+ * bw calculations due to cursor on/off
+ */
+ if (res_ctx->pipe_ctx[i].plane_state &&
+ (res_ctx->pipe_ctx[i].plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE ||
+ dc_state_get_pipe_subvp_type(context, &res_ctx->pipe_ctx[i]) == SUBVP_PHANTOM))
+ pipes[pipe_cnt].pipe.src.num_cursors = 0;
+ else
+ pipes[pipe_cnt].pipe.src.num_cursors = dc->dml.ip.number_of_cursors;
+
+ pipes[pipe_cnt].pipe.src.cur0_src_width = 256;
+ pipes[pipe_cnt].pipe.src.cur0_bpp = dm_cur_32bit;
+
+ if (!res_ctx->pipe_ctx[i].plane_state) {
+ pipes[pipe_cnt].pipe.src.is_hsplit = pipes[pipe_cnt].pipe.dest.odm_combine != dm_odm_combine_mode_disabled;
+ pipes[pipe_cnt].pipe.src.source_scan = dm_horz;
+ pipes[pipe_cnt].pipe.src.source_rotation = dm_rotation_0;
+ pipes[pipe_cnt].pipe.src.sw_mode = dm_sw_4kb_s;
+ pipes[pipe_cnt].pipe.src.macro_tile_size = dm_64k_tile;
+ pipes[pipe_cnt].pipe.src.viewport_width = timing->h_addressable;
+ if (pipes[pipe_cnt].pipe.src.viewport_width > 1920)
+ pipes[pipe_cnt].pipe.src.viewport_width = 1920;
+ pipes[pipe_cnt].pipe.src.viewport_height = timing->v_addressable;
+ if (pipes[pipe_cnt].pipe.src.viewport_height > 1080)
+ pipes[pipe_cnt].pipe.src.viewport_height = 1080;
+ pipes[pipe_cnt].pipe.src.surface_height_y = pipes[pipe_cnt].pipe.src.viewport_height;
+ pipes[pipe_cnt].pipe.src.surface_width_y = pipes[pipe_cnt].pipe.src.viewport_width;
+ pipes[pipe_cnt].pipe.src.surface_height_c = pipes[pipe_cnt].pipe.src.viewport_height;
+ pipes[pipe_cnt].pipe.src.surface_width_c = pipes[pipe_cnt].pipe.src.viewport_width;
+ pipes[pipe_cnt].pipe.src.data_pitch = ((pipes[pipe_cnt].pipe.src.viewport_width + 255) / 256) * 256;
+ pipes[pipe_cnt].pipe.src.source_format = dm_444_32;
+ pipes[pipe_cnt].pipe.src.cur0_src_width = 0;
+ pipes[pipe_cnt].pipe.src.cur1_src_width = 0;
+ pipes[pipe_cnt].pipe.dest.recout_width = pipes[pipe_cnt].pipe.src.viewport_width; /*vp_width/hratio*/
+ pipes[pipe_cnt].pipe.dest.recout_height = pipes[pipe_cnt].pipe.src.viewport_height; /*vp_height/vratio*/
+ pipes[pipe_cnt].pipe.dest.full_recout_width = pipes[pipe_cnt].pipe.dest.recout_width; /*when is_hsplit != 1*/
+ pipes[pipe_cnt].pipe.dest.full_recout_height = pipes[pipe_cnt].pipe.dest.recout_height; /*when is_hsplit != 1*/
+ pipes[pipe_cnt].pipe.scale_ratio_depth.lb_depth = dm_lb_16;
+ pipes[pipe_cnt].pipe.scale_ratio_depth.hscl_ratio = 1.0;
+ pipes[pipe_cnt].pipe.scale_ratio_depth.vscl_ratio = 1.0;
+ pipes[pipe_cnt].pipe.scale_ratio_depth.scl_enable = 0; /*Lb only or Full scl*/
+ pipes[pipe_cnt].pipe.scale_taps.htaps = 1;
+ pipes[pipe_cnt].pipe.scale_taps.vtaps = 1;
+ pipes[pipe_cnt].pipe.dest.vtotal_min = v_total;
+ pipes[pipe_cnt].pipe.dest.vtotal_max = v_total;
+
+ if (pipes[pipe_cnt].pipe.dest.odm_combine == dm_odm_combine_mode_2to1) {
+ pipes[pipe_cnt].pipe.src.viewport_width /= 2;
+ pipes[pipe_cnt].pipe.dest.recout_width /= 2;
+ } else if (pipes[pipe_cnt].pipe.dest.odm_combine == dm_odm_combine_mode_4to1) {
+ pipes[pipe_cnt].pipe.src.viewport_width /= 4;
+ pipes[pipe_cnt].pipe.dest.recout_width /= 4;
+ }
+ } else {
+ struct dc_plane_state *pln = res_ctx->pipe_ctx[i].plane_state;
+ struct scaler_data *scl = &res_ctx->pipe_ctx[i].plane_res.scl_data;
+
+ pipes[pipe_cnt].pipe.src.immediate_flip = pln->flip_immediate;
+ pipes[pipe_cnt].pipe.src.is_hsplit = (res_ctx->pipe_ctx[i].bottom_pipe && res_ctx->pipe_ctx[i].bottom_pipe->plane_state == pln)
+ || (res_ctx->pipe_ctx[i].top_pipe && res_ctx->pipe_ctx[i].top_pipe->plane_state == pln)
+ || pipes[pipe_cnt].pipe.dest.odm_combine != dm_odm_combine_mode_disabled;
+
+ /* stereo is not split */
+ if (pln->stereo_format == PLANE_STEREO_FORMAT_SIDE_BY_SIDE ||
+ pln->stereo_format == PLANE_STEREO_FORMAT_TOP_AND_BOTTOM) {
+ pipes[pipe_cnt].pipe.src.is_hsplit = false;
+ pipes[pipe_cnt].pipe.src.hsplit_grp = res_ctx->pipe_ctx[i].pipe_idx;
+ }
+
+ pipes[pipe_cnt].pipe.src.source_scan = pln->rotation == ROTATION_ANGLE_90
+ || pln->rotation == ROTATION_ANGLE_270 ? dm_vert : dm_horz;
+ switch (pln->rotation) {
+ case ROTATION_ANGLE_0:
+ pipes[pipe_cnt].pipe.src.source_rotation = dm_rotation_0;
+ break;
+ case ROTATION_ANGLE_90:
+ pipes[pipe_cnt].pipe.src.source_rotation = dm_rotation_90;
+ break;
+ case ROTATION_ANGLE_180:
+ pipes[pipe_cnt].pipe.src.source_rotation = dm_rotation_180;
+ break;
+ case ROTATION_ANGLE_270:
+ pipes[pipe_cnt].pipe.src.source_rotation = dm_rotation_270;
+ break;
+ default:
+ break;
+ }
+
+ pipes[pipe_cnt].pipe.src.viewport_y_y = scl->viewport.y;
+ pipes[pipe_cnt].pipe.src.viewport_y_c = scl->viewport_c.y;
+ pipes[pipe_cnt].pipe.src.viewport_x_y = scl->viewport.x;
+ pipes[pipe_cnt].pipe.src.viewport_x_c = scl->viewport_c.x;
+ pipes[pipe_cnt].pipe.src.viewport_width = scl->viewport.width;
+ pipes[pipe_cnt].pipe.src.viewport_width_c = scl->viewport_c.width;
+ pipes[pipe_cnt].pipe.src.viewport_height = scl->viewport.height;
+ pipes[pipe_cnt].pipe.src.viewport_height_c = scl->viewport_c.height;
+ pipes[pipe_cnt].pipe.src.viewport_width_max = pln->src_rect.width;
+ pipes[pipe_cnt].pipe.src.viewport_height_max = pln->src_rect.height;
+ pipes[pipe_cnt].pipe.src.surface_width_y = pln->plane_size.surface_size.width;
+ pipes[pipe_cnt].pipe.src.surface_height_y = pln->plane_size.surface_size.height;
+ pipes[pipe_cnt].pipe.src.surface_width_c = pln->plane_size.chroma_size.width;
+ pipes[pipe_cnt].pipe.src.surface_height_c = pln->plane_size.chroma_size.height;
+ if (pln->format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA
+ || pln->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) {
+ pipes[pipe_cnt].pipe.src.data_pitch = pln->plane_size.surface_pitch;
+ pipes[pipe_cnt].pipe.src.data_pitch_c = pln->plane_size.chroma_pitch;
+ pipes[pipe_cnt].pipe.src.meta_pitch = pln->dcc.meta_pitch;
+ pipes[pipe_cnt].pipe.src.meta_pitch_c = pln->dcc.meta_pitch_c;
+ } else {
+ pipes[pipe_cnt].pipe.src.data_pitch = pln->plane_size.surface_pitch;
+ pipes[pipe_cnt].pipe.src.meta_pitch = pln->dcc.meta_pitch;
+ }
+ pipes[pipe_cnt].pipe.src.dcc = pln->dcc.enable;
+ pipes[pipe_cnt].pipe.dest.recout_width = scl->recout.width;
+ pipes[pipe_cnt].pipe.dest.recout_height = scl->recout.height;
+ pipes[pipe_cnt].pipe.dest.full_recout_height = scl->recout.height;
+ pipes[pipe_cnt].pipe.dest.full_recout_width = scl->recout.width;
+ if (pipes[pipe_cnt].pipe.dest.odm_combine == dm_odm_combine_mode_2to1)
+ pipes[pipe_cnt].pipe.dest.full_recout_width *= 2;
+ else if (pipes[pipe_cnt].pipe.dest.odm_combine == dm_odm_combine_mode_4to1)
+ pipes[pipe_cnt].pipe.dest.full_recout_width *= 4;
+ else {
+ struct pipe_ctx *split_pipe = res_ctx->pipe_ctx[i].bottom_pipe;
+
+ while (split_pipe && split_pipe->plane_state == pln) {
+ pipes[pipe_cnt].pipe.dest.full_recout_width += split_pipe->plane_res.scl_data.recout.width;
+ split_pipe = split_pipe->bottom_pipe;
+ }
+ split_pipe = res_ctx->pipe_ctx[i].top_pipe;
+ while (split_pipe && split_pipe->plane_state == pln) {
+ pipes[pipe_cnt].pipe.dest.full_recout_width += split_pipe->plane_res.scl_data.recout.width;
+ split_pipe = split_pipe->top_pipe;
+ }
+ }
+
+ pipes[pipe_cnt].pipe.scale_ratio_depth.lb_depth = dm_lb_16;
+ pipes[pipe_cnt].pipe.scale_ratio_depth.hscl_ratio = (double) scl->ratios.horz.value / (1ULL<<32);
+ pipes[pipe_cnt].pipe.scale_ratio_depth.hscl_ratio_c = (double) scl->ratios.horz_c.value / (1ULL<<32);
+ pipes[pipe_cnt].pipe.scale_ratio_depth.vscl_ratio = (double) scl->ratios.vert.value / (1ULL<<32);
+ pipes[pipe_cnt].pipe.scale_ratio_depth.vscl_ratio_c = (double) scl->ratios.vert_c.value / (1ULL<<32);
+ pipes[pipe_cnt].pipe.scale_ratio_depth.scl_enable =
+ scl->ratios.vert.value != dc_fixpt_one.value
+ || scl->ratios.horz.value != dc_fixpt_one.value
+ || scl->ratios.vert_c.value != dc_fixpt_one.value
+ || scl->ratios.horz_c.value != dc_fixpt_one.value /*Lb only or Full scl*/
+ || dc->debug.always_scale; /*support always scale*/
+ pipes[pipe_cnt].pipe.scale_taps.htaps = scl->taps.h_taps;
+ pipes[pipe_cnt].pipe.scale_taps.htaps_c = scl->taps.h_taps_c;
+ pipes[pipe_cnt].pipe.scale_taps.vtaps = scl->taps.v_taps;
+ pipes[pipe_cnt].pipe.scale_taps.vtaps_c = scl->taps.v_taps_c;
+
+ pipes[pipe_cnt].pipe.src.macro_tile_size =
+ swizzle_mode_to_macro_tile_size(pln->tiling_info.gfx9.swizzle);
+ swizzle_to_dml_params(pln->tiling_info.gfx9.swizzle,
+ &pipes[pipe_cnt].pipe.src.sw_mode);
+
+ switch (pln->format) {
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr:
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb:
+ pipes[pipe_cnt].pipe.src.source_format = dm_420_8;
+ break;
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr:
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb:
+ pipes[pipe_cnt].pipe.src.source_format = dm_420_10;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
+ pipes[pipe_cnt].pipe.src.source_format = dm_444_64;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555:
+ case SURFACE_PIXEL_FORMAT_GRPH_RGB565:
+ pipes[pipe_cnt].pipe.src.source_format = dm_444_16;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_PALETA_256_COLORS:
+ pipes[pipe_cnt].pipe.src.source_format = dm_444_8;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA:
+ pipes[pipe_cnt].pipe.src.source_format = dm_rgbe_alpha;
+ break;
+ default:
+ pipes[pipe_cnt].pipe.src.source_format = dm_444_32;
+ break;
+ }
+ }
+
+ pipe_cnt++;
+ }
+
+ /* populate writeback information */
+ dc->res_pool->funcs->populate_dml_writeback_from_context(dc, res_ctx, pipes);
+
+ return pipe_cnt;
+}
+
+void dcn20_calculate_wm(struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int *out_pipe_cnt,
+ int *pipe_split_from,
+ int vlevel,
+ enum dc_validate_mode validate_mode)
+{
+ int pipe_cnt, i, pipe_idx;
+
+ dc_assert_fp_enabled();
+
+ for (i = 0, pipe_idx = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+
+ pipes[pipe_cnt].clks_cfg.refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0;
+ pipes[pipe_cnt].clks_cfg.dispclk_mhz = context->bw_ctx.dml.vba.RequiredDISPCLK[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
+
+ if (pipe_split_from[i] < 0) {
+ pipes[pipe_cnt].clks_cfg.dppclk_mhz =
+ context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx];
+ if (context->bw_ctx.dml.vba.BlendingAndTiming[pipe_idx] == pipe_idx)
+ pipes[pipe_cnt].pipe.dest.odm_combine =
+ context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx];
+ else
+ pipes[pipe_cnt].pipe.dest.odm_combine = 0;
+ pipe_idx++;
+ } else {
+ pipes[pipe_cnt].clks_cfg.dppclk_mhz =
+ context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_split_from[i]];
+ if (context->bw_ctx.dml.vba.BlendingAndTiming[pipe_split_from[i]] == pipe_split_from[i])
+ pipes[pipe_cnt].pipe.dest.odm_combine =
+ context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_split_from[i]];
+ else
+ pipes[pipe_cnt].pipe.dest.odm_combine = 0;
+ }
+
+ if (dc->config.forced_clocks) {
+ pipes[pipe_cnt].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz;
+ pipes[pipe_cnt].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz;
+ }
+ if (dc->debug.min_disp_clk_khz > pipes[pipe_cnt].clks_cfg.dispclk_mhz * 1000)
+ pipes[pipe_cnt].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0;
+ if (dc->debug.min_dpp_clk_khz > pipes[pipe_cnt].clks_cfg.dppclk_mhz * 1000)
+ pipes[pipe_cnt].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0;
+
+ pipe_cnt++;
+ }
+
+ if (pipe_cnt != pipe_idx) {
+ if (dc->res_pool->funcs->populate_dml_pipes)
+ pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc,
+ context, pipes, validate_mode);
+ else
+ pipe_cnt = dcn20_populate_dml_pipes_from_context(dc,
+ context, pipes, validate_mode);
+ }
+
+ *out_pipe_cnt = pipe_cnt;
+
+ pipes[0].clks_cfg.voltage = vlevel;
+ pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].dcfclk_mhz;
+ pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz;
+
+ /* only pipe 0 is read for voltage and dcf/soc clocks */
+ if (vlevel < 1) {
+ pipes[0].clks_cfg.voltage = 1;
+ pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[1].dcfclk_mhz;
+ pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[1].socclk_mhz;
+ }
+ context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+
+ if (vlevel < 2) {
+ pipes[0].clks_cfg.voltage = 2;
+ pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[2].dcfclk_mhz;
+ pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[2].socclk_mhz;
+ }
+ context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+
+ if (vlevel < 3) {
+ pipes[0].clks_cfg.voltage = 3;
+ pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[2].dcfclk_mhz;
+ pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[2].socclk_mhz;
+ }
+ context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+
+ pipes[0].clks_cfg.voltage = vlevel;
+ pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].dcfclk_mhz;
+ pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz;
+ context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+}
+
+void dcn20_update_bounding_box(struct dc *dc,
+ struct _vcs_dpi_soc_bounding_box_st *bb,
+ struct pp_smu_nv_clock_table *max_clocks,
+ unsigned int *uclk_states,
+ unsigned int num_states)
+{
+ int num_calculated_states = 0;
+ int min_dcfclk = 0;
+ int i;
+
+ dc_assert_fp_enabled();
+
+ if (num_states == 0)
+ return;
+
+ memset(bb->clock_limits, 0, sizeof(bb->clock_limits));
+
+ if (dc->bb_overrides.min_dcfclk_mhz > 0) {
+ min_dcfclk = dc->bb_overrides.min_dcfclk_mhz;
+ } else {
+ if (ASICREV_IS_NAVI12_P(dc->ctx->asic_id.hw_internal_rev))
+ min_dcfclk = 310;
+ else
+ // Accounting for SOC/DCF relationship, we can go as high as
+ // 506Mhz in Vmin.
+ min_dcfclk = 506;
+ }
+
+ for (i = 0; i < num_states; i++) {
+ int min_fclk_required_by_uclk;
+ bb->clock_limits[i].state = i;
+ bb->clock_limits[i].dram_speed_mts = uclk_states[i] * 16 / 1000;
+
+ // FCLK:UCLK ratio is 1.08
+ min_fclk_required_by_uclk = div_u64(((unsigned long long)uclk_states[i]) * 1080,
+ 1000000);
+
+ bb->clock_limits[i].fabricclk_mhz = (min_fclk_required_by_uclk < min_dcfclk) ?
+ min_dcfclk : min_fclk_required_by_uclk;
+
+ bb->clock_limits[i].socclk_mhz = (bb->clock_limits[i].fabricclk_mhz > max_clocks->socClockInKhz / 1000.0) ?
+ max_clocks->socClockInKhz / 1000 : bb->clock_limits[i].fabricclk_mhz;
+
+ bb->clock_limits[i].dcfclk_mhz = (bb->clock_limits[i].fabricclk_mhz > max_clocks->dcfClockInKhz / 1000.0) ?
+ max_clocks->dcfClockInKhz / 1000 : bb->clock_limits[i].fabricclk_mhz;
+
+ bb->clock_limits[i].dispclk_mhz = max_clocks->displayClockInKhz / 1000;
+ bb->clock_limits[i].dppclk_mhz = max_clocks->displayClockInKhz / 1000;
+ bb->clock_limits[i].dscclk_mhz = max_clocks->displayClockInKhz / (1000 * 3);
+
+ bb->clock_limits[i].phyclk_mhz = max_clocks->phyClockInKhz / 1000;
+
+ num_calculated_states++;
+ }
+
+ bb->clock_limits[num_calculated_states - 1].socclk_mhz = max_clocks->socClockInKhz / 1000;
+ bb->clock_limits[num_calculated_states - 1].fabricclk_mhz = max_clocks->socClockInKhz / 1000;
+ bb->clock_limits[num_calculated_states - 1].dcfclk_mhz = max_clocks->dcfClockInKhz / 1000;
+
+ bb->num_states = num_calculated_states;
+
+ // Duplicate the last state, DML always an extra state identical to max state to work
+ memcpy(&bb->clock_limits[num_calculated_states], &bb->clock_limits[num_calculated_states - 1], sizeof(struct _vcs_dpi_voltage_scaling_st));
+ bb->clock_limits[num_calculated_states].state = bb->num_states;
+}
+
+void dcn20_cap_soc_clocks(struct _vcs_dpi_soc_bounding_box_st *bb,
+ struct pp_smu_nv_clock_table max_clocks)
+{
+ int i;
+
+ dc_assert_fp_enabled();
+
+ // First pass - cap all clocks higher than the reported max
+ for (i = 0; i < bb->num_states; i++) {
+ if ((bb->clock_limits[i].dcfclk_mhz > (max_clocks.dcfClockInKhz / 1000.0))
+ && max_clocks.dcfClockInKhz != 0)
+ bb->clock_limits[i].dcfclk_mhz = (max_clocks.dcfClockInKhz / 1000);
+
+ if ((bb->clock_limits[i].dram_speed_mts > (max_clocks.uClockInKhz / 1000.0) * 16)
+ && max_clocks.uClockInKhz != 0)
+ bb->clock_limits[i].dram_speed_mts = (max_clocks.uClockInKhz / 1000) * 16;
+
+ if ((bb->clock_limits[i].fabricclk_mhz > (max_clocks.fabricClockInKhz / 1000.0))
+ && max_clocks.fabricClockInKhz != 0)
+ bb->clock_limits[i].fabricclk_mhz = (max_clocks.fabricClockInKhz / 1000);
+
+ if ((bb->clock_limits[i].dispclk_mhz > (max_clocks.displayClockInKhz / 1000.0))
+ && max_clocks.displayClockInKhz != 0)
+ bb->clock_limits[i].dispclk_mhz = (max_clocks.displayClockInKhz / 1000);
+
+ if ((bb->clock_limits[i].dppclk_mhz > (max_clocks.dppClockInKhz / 1000.0))
+ && max_clocks.dppClockInKhz != 0)
+ bb->clock_limits[i].dppclk_mhz = (max_clocks.dppClockInKhz / 1000);
+
+ if ((bb->clock_limits[i].phyclk_mhz > (max_clocks.phyClockInKhz / 1000.0))
+ && max_clocks.phyClockInKhz != 0)
+ bb->clock_limits[i].phyclk_mhz = (max_clocks.phyClockInKhz / 1000);
+
+ if ((bb->clock_limits[i].socclk_mhz > (max_clocks.socClockInKhz / 1000.0))
+ && max_clocks.socClockInKhz != 0)
+ bb->clock_limits[i].socclk_mhz = (max_clocks.socClockInKhz / 1000);
+
+ if ((bb->clock_limits[i].dscclk_mhz > (max_clocks.dscClockInKhz / 1000.0))
+ && max_clocks.dscClockInKhz != 0)
+ bb->clock_limits[i].dscclk_mhz = (max_clocks.dscClockInKhz / 1000);
+ }
+
+ // Second pass - remove all duplicate clock states
+ for (i = bb->num_states - 1; i > 1; i--) {
+ bool duplicate = true;
+
+ if (bb->clock_limits[i-1].dcfclk_mhz != bb->clock_limits[i].dcfclk_mhz)
+ duplicate = false;
+ if (bb->clock_limits[i-1].dispclk_mhz != bb->clock_limits[i].dispclk_mhz)
+ duplicate = false;
+ if (bb->clock_limits[i-1].dppclk_mhz != bb->clock_limits[i].dppclk_mhz)
+ duplicate = false;
+ if (bb->clock_limits[i-1].dram_speed_mts != bb->clock_limits[i].dram_speed_mts)
+ duplicate = false;
+ if (bb->clock_limits[i-1].dscclk_mhz != bb->clock_limits[i].dscclk_mhz)
+ duplicate = false;
+ if (bb->clock_limits[i-1].fabricclk_mhz != bb->clock_limits[i].fabricclk_mhz)
+ duplicate = false;
+ if (bb->clock_limits[i-1].phyclk_mhz != bb->clock_limits[i].phyclk_mhz)
+ duplicate = false;
+ if (bb->clock_limits[i-1].socclk_mhz != bb->clock_limits[i].socclk_mhz)
+ duplicate = false;
+
+ if (duplicate)
+ bb->num_states--;
+ }
+}
+
+void dcn20_patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb)
+{
+ dc_assert_fp_enabled();
+
+ if ((int)(bb->sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns
+ && dc->bb_overrides.sr_exit_time_ns) {
+ bb->sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0;
+ }
+
+ if ((int)(bb->sr_enter_plus_exit_time_us * 1000)
+ != dc->bb_overrides.sr_enter_plus_exit_time_ns
+ && dc->bb_overrides.sr_enter_plus_exit_time_ns) {
+ bb->sr_enter_plus_exit_time_us =
+ dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0;
+ }
+
+ if ((int)(bb->sr_exit_z8_time_us * 1000)
+ != dc->bb_overrides.sr_exit_z8_time_ns
+ && dc->bb_overrides.sr_exit_z8_time_ns) {
+ bb->sr_exit_z8_time_us = dc->bb_overrides.sr_exit_z8_time_ns / 1000.0;
+ }
+
+ if ((int)(bb->sr_enter_plus_exit_z8_time_us * 1000)
+ != dc->bb_overrides.sr_enter_plus_exit_z8_time_ns
+ && dc->bb_overrides.sr_enter_plus_exit_z8_time_ns) {
+ bb->sr_enter_plus_exit_z8_time_us = dc->bb_overrides.sr_enter_plus_exit_z8_time_ns / 1000.0;
+ }
+ if ((int)(bb->urgent_latency_us * 1000) != dc->bb_overrides.urgent_latency_ns
+ && dc->bb_overrides.urgent_latency_ns) {
+ bb->urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0;
+ }
+
+ if ((int)(bb->dram_clock_change_latency_us * 1000)
+ != dc->bb_overrides.dram_clock_change_latency_ns
+ && dc->bb_overrides.dram_clock_change_latency_ns) {
+ bb->dram_clock_change_latency_us =
+ dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
+ }
+
+ if ((int)(bb->dummy_pstate_latency_us * 1000)
+ != dc->bb_overrides.dummy_clock_change_latency_ns
+ && dc->bb_overrides.dummy_clock_change_latency_ns) {
+ bb->dummy_pstate_latency_us =
+ dc->bb_overrides.dummy_clock_change_latency_ns / 1000.0;
+ }
+}
+
+static bool dcn20_validate_bandwidth_internal(struct dc *dc, struct dc_state *context,
+ enum dc_validate_mode validate_mode, display_e2e_pipe_params_st *pipes)
+{
+ bool out = false;
+
+ BW_VAL_TRACE_SETUP();
+
+ int vlevel = 0;
+ int pipe_split_from[MAX_PIPES];
+ int pipe_cnt = 0;
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ BW_VAL_TRACE_COUNT();
+
+ out = dcn20_fast_validate_bw(dc, context, pipes, &pipe_cnt, pipe_split_from, &vlevel, validate_mode);
+
+ if (pipe_cnt == 0)
+ goto validate_out;
+
+ if (!out)
+ goto validate_fail;
+
+ BW_VAL_TRACE_END_VOLTAGE_LEVEL();
+
+ if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) {
+ BW_VAL_TRACE_SKIP(fast);
+ goto validate_out;
+ }
+
+ dcn20_calculate_wm(dc, context, pipes, &pipe_cnt, pipe_split_from, vlevel, validate_mode);
+ dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
+
+ BW_VAL_TRACE_END_WATERMARKS();
+
+ goto validate_out;
+
+validate_fail:
+ DC_LOG_WARNING("Mode Validation Warning: %s failed validation.\n",
+ dml_get_status_message(context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states]));
+
+ BW_VAL_TRACE_SKIP(fail);
+ out = false;
+
+validate_out:
+
+ BW_VAL_TRACE_FINISH();
+
+ return out;
+}
+
+bool dcn20_validate_bandwidth_fp(struct dc *dc, struct dc_state *context,
+ enum dc_validate_mode validate_mode, display_e2e_pipe_params_st *pipes)
+{
+ bool voltage_supported = false;
+ bool full_pstate_supported = false;
+ bool dummy_pstate_supported = false;
+ double p_state_latency_us;
+
+ dc_assert_fp_enabled();
+
+ p_state_latency_us = context->bw_ctx.dml.soc.dram_clock_change_latency_us;
+ context->bw_ctx.dml.soc.disable_dram_clock_change_vactive_support =
+ dc->debug.disable_dram_clock_change_vactive_support;
+ context->bw_ctx.dml.soc.allow_dram_clock_one_display_vactive =
+ dc->debug.enable_dram_clock_change_one_display_vactive;
+
+ /*Unsafe due to current pipe merge and split logic*/
+ ASSERT(context != dc->current_state);
+
+ if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING)
+ return dcn20_validate_bandwidth_internal(dc, context, validate_mode, pipes);
+
+ // Best case, we support full UCLK switch latency
+ voltage_supported = dcn20_validate_bandwidth_internal(dc, context, DC_VALIDATE_MODE_AND_PROGRAMMING, pipes);
+ full_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support;
+
+ if (context->bw_ctx.dml.soc.dummy_pstate_latency_us == 0 ||
+ (voltage_supported && full_pstate_supported)) {
+ context->bw_ctx.bw.dcn.clk.p_state_change_support = full_pstate_supported;
+ goto restore_dml_state;
+ }
+
+ // Fallback: Try to only support G6 temperature read latency
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us = context->bw_ctx.dml.soc.dummy_pstate_latency_us;
+
+ memset(pipes, 0, dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st));
+ voltage_supported = dcn20_validate_bandwidth_internal(dc, context, DC_VALIDATE_MODE_AND_PROGRAMMING, pipes);
+ dummy_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support;
+
+ if (voltage_supported && (dummy_pstate_supported || !(context->stream_count))) {
+ context->bw_ctx.bw.dcn.clk.p_state_change_support = false;
+ goto restore_dml_state;
+ }
+
+ // ERROR: fallback is supposed to always work.
+ ASSERT(false);
+
+restore_dml_state:
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us = p_state_latency_us;
+ return voltage_supported;
+}
+
+void dcn20_fpu_set_wm_ranges(int i,
+ struct pp_smu_wm_range_sets *ranges,
+ struct _vcs_dpi_soc_bounding_box_st *loaded_bb)
+{
+ dc_assert_fp_enabled();
+
+ ranges->reader_wm_sets[i].min_fill_clk_mhz = (i > 0) ? (loaded_bb->clock_limits[i - 1].dram_speed_mts / 16) + 1 : 0;
+ ranges->reader_wm_sets[i].max_fill_clk_mhz = loaded_bb->clock_limits[i].dram_speed_mts / 16;
+}
+
+void dcn20_fpu_adjust_dppclk(struct vba_vars_st *v,
+ int vlevel,
+ int max_mpc_comb,
+ int pipe_idx,
+ bool is_validating_bw)
+{
+ dc_assert_fp_enabled();
+
+ if (is_validating_bw)
+ v->RequiredDPPCLK[vlevel][max_mpc_comb][pipe_idx] *= 2;
+ else
+ v->RequiredDPPCLK[vlevel][max_mpc_comb][pipe_idx] /= 2;
+}
+
+int dcn21_populate_dml_pipes_from_context(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ enum dc_validate_mode validate_mode)
+{
+ uint32_t pipe_cnt;
+ int i;
+
+ dc_assert_fp_enabled();
+
+ pipe_cnt = dcn20_populate_dml_pipes_from_context(dc, context, pipes, validate_mode);
+
+ for (i = 0; i < pipe_cnt; i++) {
+
+ pipes[i].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active;
+ pipes[i].pipe.src.gpuvm = 1;
+ }
+
+ return pipe_cnt;
+}
+
+static void patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb)
+{
+ int i;
+
+ if (dc->bb_overrides.sr_exit_time_ns) {
+ for (i = 0; i < WM_SET_COUNT; i++) {
+ dc->clk_mgr->bw_params->wm_table.entries[i].sr_exit_time_us =
+ dc->bb_overrides.sr_exit_time_ns / 1000.0;
+ }
+ }
+
+ if (dc->bb_overrides.sr_enter_plus_exit_time_ns) {
+ for (i = 0; i < WM_SET_COUNT; i++) {
+ dc->clk_mgr->bw_params->wm_table.entries[i].sr_enter_plus_exit_time_us =
+ dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0;
+ }
+ }
+
+ if (dc->bb_overrides.urgent_latency_ns) {
+ bb->urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0;
+ }
+
+ if (dc->bb_overrides.dram_clock_change_latency_ns) {
+ for (i = 0; i < WM_SET_COUNT; i++) {
+ dc->clk_mgr->bw_params->wm_table.entries[i].pstate_latency_us =
+ dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
+ }
+ }
+}
+
+static void calculate_wm_set_for_vlevel(int vlevel,
+ struct wm_range_table_entry *table_entry,
+ struct dcn_watermarks *wm_set,
+ struct display_mode_lib *dml,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt)
+{
+ double dram_clock_change_latency_cached = dml->soc.dram_clock_change_latency_us;
+
+ ASSERT(vlevel < dml->soc.num_states);
+ /* only pipe 0 is read for voltage and dcf/soc clocks */
+ pipes[0].clks_cfg.voltage = vlevel;
+ pipes[0].clks_cfg.dcfclk_mhz = dml->soc.clock_limits[vlevel].dcfclk_mhz;
+ pipes[0].clks_cfg.socclk_mhz = dml->soc.clock_limits[vlevel].socclk_mhz;
+
+ dml->soc.dram_clock_change_latency_us = table_entry->pstate_latency_us;
+ dml->soc.sr_exit_time_us = table_entry->sr_exit_time_us;
+ dml->soc.sr_enter_plus_exit_time_us = table_entry->sr_enter_plus_exit_time_us;
+
+ wm_set->urgent_ns = get_wm_urgent(dml, pipes, pipe_cnt) * 1000;
+ wm_set->cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(dml, pipes, pipe_cnt) * 1000;
+ wm_set->cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(dml, pipes, pipe_cnt) * 1000;
+ wm_set->cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(dml, pipes, pipe_cnt) * 1000;
+ wm_set->pte_meta_urgent_ns = get_wm_memory_trip(dml, pipes, pipe_cnt) * 1000;
+ wm_set->frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(dml, pipes, pipe_cnt) * 1000;
+ wm_set->frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(dml, pipes, pipe_cnt) * 1000;
+ wm_set->urgent_latency_ns = get_urgent_latency(dml, pipes, pipe_cnt) * 1000;
+ dml->soc.dram_clock_change_latency_us = dram_clock_change_latency_cached;
+}
+
+static void dcn21_calculate_wm(struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int *out_pipe_cnt,
+ int *pipe_split_from,
+ int vlevel_req,
+ enum dc_validate_mode validate_mode)
+{
+ int pipe_cnt, i, pipe_idx;
+ int vlevel, vlevel_max;
+ struct wm_range_table_entry *table_entry;
+ struct clk_bw_params *bw_params = dc->clk_mgr->bw_params;
+
+ ASSERT(bw_params);
+
+ patch_bounding_box(dc, &context->bw_ctx.dml.soc);
+
+ for (i = 0, pipe_idx = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+
+ pipes[pipe_cnt].clks_cfg.refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0;
+ pipes[pipe_cnt].clks_cfg.dispclk_mhz = context->bw_ctx.dml.vba.RequiredDISPCLK[vlevel_req][context->bw_ctx.dml.vba.maxMpcComb];
+
+ if (pipe_split_from[i] < 0) {
+ pipes[pipe_cnt].clks_cfg.dppclk_mhz =
+ context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel_req][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx];
+ if (context->bw_ctx.dml.vba.BlendingAndTiming[pipe_idx] == pipe_idx)
+ pipes[pipe_cnt].pipe.dest.odm_combine =
+ context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel_req][pipe_idx];
+ else
+ pipes[pipe_cnt].pipe.dest.odm_combine = 0;
+ pipe_idx++;
+ } else {
+ pipes[pipe_cnt].clks_cfg.dppclk_mhz =
+ context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel_req][context->bw_ctx.dml.vba.maxMpcComb][pipe_split_from[i]];
+ if (context->bw_ctx.dml.vba.BlendingAndTiming[pipe_split_from[i]] == pipe_split_from[i])
+ pipes[pipe_cnt].pipe.dest.odm_combine =
+ context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel_req][pipe_split_from[i]];
+ else
+ pipes[pipe_cnt].pipe.dest.odm_combine = 0;
+ }
+ pipe_cnt++;
+ }
+
+ if (pipe_cnt != pipe_idx) {
+ if (dc->res_pool->funcs->populate_dml_pipes)
+ pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc,
+ context, pipes, validate_mode);
+ else
+ pipe_cnt = dcn21_populate_dml_pipes_from_context(dc,
+ context, pipes, validate_mode);
+ }
+
+ *out_pipe_cnt = pipe_cnt;
+
+ vlevel_max = bw_params->clk_table.num_entries - 1;
+
+
+ /* WM Set D */
+ table_entry = &bw_params->wm_table.entries[WM_D];
+ if (table_entry->wm_type == WM_TYPE_RETRAINING)
+ vlevel = 0;
+ else
+ vlevel = vlevel_max;
+ calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.d,
+ &context->bw_ctx.dml, pipes, pipe_cnt);
+ /* WM Set C */
+ table_entry = &bw_params->wm_table.entries[WM_C];
+ vlevel = MIN(MAX(vlevel_req, 3), vlevel_max);
+ calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.c,
+ &context->bw_ctx.dml, pipes, pipe_cnt);
+ /* WM Set B */
+ table_entry = &bw_params->wm_table.entries[WM_B];
+ vlevel = MIN(MAX(vlevel_req, 2), vlevel_max);
+ calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.b,
+ &context->bw_ctx.dml, pipes, pipe_cnt);
+
+ /* WM Set A */
+ table_entry = &bw_params->wm_table.entries[WM_A];
+ vlevel = MIN(vlevel_req, vlevel_max);
+ calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.a,
+ &context->bw_ctx.dml, pipes, pipe_cnt);
+}
+
+bool dcn21_validate_bandwidth_fp(struct dc *dc, struct dc_state *context,
+ enum dc_validate_mode validate_mode, display_e2e_pipe_params_st *pipes)
+{
+ bool out = false;
+
+ BW_VAL_TRACE_SETUP();
+
+ int vlevel = 0;
+ int pipe_split_from[MAX_PIPES];
+ int pipe_cnt = 0;
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ BW_VAL_TRACE_COUNT();
+
+ dc_assert_fp_enabled();
+
+ /*Unsafe due to current pipe merge and split logic*/
+ ASSERT(context != dc->current_state);
+
+ out = dcn21_fast_validate_bw(dc, context, pipes, &pipe_cnt, pipe_split_from, &vlevel, validate_mode);
+
+ if (pipe_cnt == 0)
+ goto validate_out;
+
+ if (!out)
+ goto validate_fail;
+
+ BW_VAL_TRACE_END_VOLTAGE_LEVEL();
+
+ if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) {
+ BW_VAL_TRACE_SKIP(fast);
+ goto validate_out;
+ }
+
+ dcn21_calculate_wm(dc, context, pipes, &pipe_cnt, pipe_split_from, vlevel, validate_mode);
+ dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
+
+ BW_VAL_TRACE_END_WATERMARKS();
+
+ goto validate_out;
+
+validate_fail:
+ DC_LOG_WARNING("Mode Validation Warning: %s failed validation.\n",
+ dml_get_status_message(context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states]));
+
+ BW_VAL_TRACE_SKIP(fail);
+ out = false;
+
+validate_out:
+
+ BW_VAL_TRACE_FINISH();
+
+ return out;
+}
+
+static struct _vcs_dpi_voltage_scaling_st construct_low_pstate_lvl(struct clk_limit_table *clk_table, unsigned int high_voltage_lvl)
+{
+ struct _vcs_dpi_voltage_scaling_st low_pstate_lvl = {0};
+ int i;
+
+ low_pstate_lvl.state = 1;
+ low_pstate_lvl.dcfclk_mhz = clk_table->entries[0].dcfclk_mhz;
+ low_pstate_lvl.fabricclk_mhz = clk_table->entries[0].fclk_mhz;
+ low_pstate_lvl.socclk_mhz = clk_table->entries[0].socclk_mhz;
+ low_pstate_lvl.dram_speed_mts = clk_table->entries[0].memclk_mhz * 2;
+
+ low_pstate_lvl.dispclk_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].dispclk_mhz;
+ low_pstate_lvl.dppclk_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].dppclk_mhz;
+ low_pstate_lvl.dram_bw_per_chan_gbps = dcn2_1_soc.clock_limits[high_voltage_lvl].dram_bw_per_chan_gbps;
+ low_pstate_lvl.dscclk_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].dscclk_mhz;
+ low_pstate_lvl.dtbclk_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].dtbclk_mhz;
+ low_pstate_lvl.phyclk_d18_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].phyclk_d18_mhz;
+ low_pstate_lvl.phyclk_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].phyclk_mhz;
+
+ for (i = clk_table->num_entries; i > 1; i--)
+ clk_table->entries[i] = clk_table->entries[i-1];
+ clk_table->entries[1] = clk_table->entries[0];
+ clk_table->num_entries++;
+
+ return low_pstate_lvl;
+}
+
+void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
+{
+ struct _vcs_dpi_voltage_scaling_st *s = dc->scratch.update_bw_bounding_box.clock_limits;
+ struct dcn21_resource_pool *pool = TO_DCN21_RES_POOL(dc->res_pool);
+ struct clk_limit_table *clk_table = &bw_params->clk_table;
+ unsigned int i, closest_clk_lvl = 0, k = 0;
+ int j;
+
+ dc_assert_fp_enabled();
+
+ dcn2_1_ip.max_num_otg = pool->base.res_cap->num_timing_generator;
+ dcn2_1_ip.max_num_dpp = pool->base.pipe_count;
+ dcn2_1_soc.num_chans = bw_params->num_channels;
+
+ ASSERT(clk_table->num_entries);
+ /* Copy dcn2_1_soc.clock_limits to clock_limits to avoid copying over null states later */
+ memcpy(s, dcn2_1_soc.clock_limits, sizeof(dcn2_1_soc.clock_limits));
+
+ for (i = 0; i < clk_table->num_entries; i++) {
+ /* loop backwards*/
+ for (closest_clk_lvl = 0, j = dcn2_1_soc.num_states - 1; j >= 0; j--) {
+ if ((unsigned int) dcn2_1_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) {
+ closest_clk_lvl = j;
+ break;
+ }
+ }
+
+ /* clk_table[1] is reserved for min DF PState. skip here to fill in later. */
+ if (i == 1)
+ k++;
+
+ s[k].state = k;
+ s[k].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+ s[k].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
+ s[k].socclk_mhz = clk_table->entries[i].socclk_mhz;
+ s[k].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2;
+
+ s[k].dispclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+ s[k].dppclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+ s[k].dram_bw_per_chan_gbps =
+ dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+ s[k].dscclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+ s[k].dtbclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+ s[k].phyclk_d18_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+ s[k].phyclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+
+ k++;
+ }
+
+ memcpy(&dcn2_1_soc.clock_limits, s, sizeof(dcn2_1_soc.clock_limits));
+
+ if (clk_table->num_entries) {
+ dcn2_1_soc.num_states = clk_table->num_entries + 1;
+ /* fill in min DF PState */
+ dcn2_1_soc.clock_limits[1] = construct_low_pstate_lvl(clk_table, closest_clk_lvl);
+ /* duplicate last level */
+ dcn2_1_soc.clock_limits[dcn2_1_soc.num_states] = dcn2_1_soc.clock_limits[dcn2_1_soc.num_states - 1];
+ dcn2_1_soc.clock_limits[dcn2_1_soc.num_states].state = dcn2_1_soc.num_states;
+ }
+
+ dml_init_instance(&dc->dml, &dcn2_1_soc, &dcn2_1_ip, DML_PROJECT_DCN21);
+}
+
+void dcn21_clk_mgr_set_bw_params_wm_table(struct clk_bw_params *bw_params)
+{
+ dc_assert_fp_enabled();
+
+ bw_params->wm_table.entries[WM_D].pstate_latency_us = LPDDR_MEM_RETRAIN_LATENCY;
+ bw_params->wm_table.entries[WM_D].wm_inst = WM_D;
+ bw_params->wm_table.entries[WM_D].wm_type = WM_TYPE_RETRAINING;
+ bw_params->wm_table.entries[WM_D].valid = true;
+}
+
+void dcn201_populate_dml_writeback_from_context_fpu(struct dc *dc,
+ struct resource_context *res_ctx,
+ display_e2e_pipe_params_st *pipes)
+{
+ int pipe_cnt, i, j;
+ double max_calc_writeback_dispclk;
+ double writeback_dispclk;
+ struct writeback_st dout_wb = {0};
+
+ dc_assert_fp_enabled();
+
+ for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+ struct dc_stream_state *stream = res_ctx->pipe_ctx[i].stream;
+
+ if (!stream)
+ continue;
+ max_calc_writeback_dispclk = 0;
+
+ /* Set writeback information */
+ pipes[pipe_cnt].dout.wb_enable = 0;
+ pipes[pipe_cnt].dout.num_active_wb = 0;
+ for (j = 0; j < stream->num_wb_info; j++) {
+ struct dc_writeback_info *wb_info = &stream->writeback_info[j];
+
+ if (wb_info->wb_enabled && wb_info->writeback_source_plane &&
+ (wb_info->writeback_source_plane == res_ctx->pipe_ctx[i].plane_state)) {
+ pipes[pipe_cnt].dout.wb_enable = 1;
+ pipes[pipe_cnt].dout.num_active_wb++;
+ dout_wb.wb_src_height = wb_info->dwb_params.cnv_params.crop_en ?
+ wb_info->dwb_params.cnv_params.crop_height :
+ wb_info->dwb_params.cnv_params.src_height;
+ dout_wb.wb_src_width = wb_info->dwb_params.cnv_params.crop_en ?
+ wb_info->dwb_params.cnv_params.crop_width :
+ wb_info->dwb_params.cnv_params.src_width;
+ dout_wb.wb_dst_width = wb_info->dwb_params.dest_width;
+ dout_wb.wb_dst_height = wb_info->dwb_params.dest_height;
+ dout_wb.wb_htaps_luma = wb_info->dwb_params.scaler_taps.h_taps;
+ dout_wb.wb_vtaps_luma = wb_info->dwb_params.scaler_taps.v_taps;
+ dout_wb.wb_htaps_chroma = wb_info->dwb_params.scaler_taps.h_taps_c;
+ dout_wb.wb_vtaps_chroma = wb_info->dwb_params.scaler_taps.v_taps_c;
+ dout_wb.wb_hratio = wb_info->dwb_params.cnv_params.crop_en ?
+ (double)wb_info->dwb_params.cnv_params.crop_width /
+ (double)wb_info->dwb_params.dest_width :
+ (double)wb_info->dwb_params.cnv_params.src_width /
+ (double)wb_info->dwb_params.dest_width;
+ dout_wb.wb_vratio = wb_info->dwb_params.cnv_params.crop_en ?
+ (double)wb_info->dwb_params.cnv_params.crop_height /
+ (double)wb_info->dwb_params.dest_height :
+ (double)wb_info->dwb_params.cnv_params.src_height /
+ (double)wb_info->dwb_params.dest_height;
+ if (wb_info->dwb_params.out_format == dwb_scaler_mode_yuv420) {
+ if (wb_info->dwb_params.output_depth == DWB_OUTPUT_PIXEL_DEPTH_8BPC)
+ dout_wb.wb_pixel_format = dm_420_8;
+ else
+ dout_wb.wb_pixel_format = dm_420_10;
+ } else
+ dout_wb.wb_pixel_format = dm_444_32;
+
+ /* Workaround for cases where multiple writebacks are connected to same plane
+ * In which case, need to compute worst case and set the associated writeback parameters
+ * This workaround is necessary due to DML computation assuming only 1 set of writeback
+ * parameters per pipe */
+ writeback_dispclk = CalculateWriteBackDISPCLK(
+ dout_wb.wb_pixel_format,
+ pipes[pipe_cnt].pipe.dest.pixel_rate_mhz,
+ dout_wb.wb_hratio,
+ dout_wb.wb_vratio,
+ dout_wb.wb_htaps_luma,
+ dout_wb.wb_vtaps_luma,
+ dout_wb.wb_htaps_chroma,
+ dout_wb.wb_vtaps_chroma,
+ dout_wb.wb_dst_width,
+ pipes[pipe_cnt].pipe.dest.htotal,
+ 2);
+
+ if (writeback_dispclk > max_calc_writeback_dispclk) {
+ max_calc_writeback_dispclk = writeback_dispclk;
+ pipes[pipe_cnt].dout.wb = dout_wb;
+ }
+ }
+ }
+
+ pipe_cnt++;
+ }
+
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h
index 36f26126d574..aed00039ca62 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h
@@ -23,6 +23,7 @@
* Authors: AMD
*
*/
+#include "core_types.h"
#ifndef __DCN20_FPU_H__
#define __DCN20_FPU_H__
@@ -31,4 +32,58 @@ void dcn20_populate_dml_writeback_from_context(struct dc *dc,
struct resource_context *res_ctx,
display_e2e_pipe_params_st *pipes);
+void dcn20_fpu_set_wb_arb_params(struct mcif_arb_params *wb_arb_params,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt, int i);
+void dcn20_calculate_dlg_params(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel);
+int dcn20_populate_dml_pipes_from_context(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ enum dc_validate_mode validate_mode);
+void dcn20_calculate_wm(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int *out_pipe_cnt,
+ int *pipe_split_from,
+ int vlevel,
+ enum dc_validate_mode validate_mode);
+void dcn20_cap_soc_clocks(struct _vcs_dpi_soc_bounding_box_st *bb,
+ struct pp_smu_nv_clock_table max_clocks);
+void dcn20_update_bounding_box(struct dc *dc,
+ struct _vcs_dpi_soc_bounding_box_st *bb,
+ struct pp_smu_nv_clock_table *max_clocks,
+ unsigned int *uclk_states,
+ unsigned int num_states);
+void dcn20_patch_bounding_box(struct dc *dc,
+ struct _vcs_dpi_soc_bounding_box_st *bb);
+bool dcn20_validate_bandwidth_fp(struct dc *dc, struct dc_state *context,
+ enum dc_validate_mode validate_mode, display_e2e_pipe_params_st *pipes);
+void dcn20_fpu_set_wm_ranges(int i,
+ struct pp_smu_wm_range_sets *ranges,
+ struct _vcs_dpi_soc_bounding_box_st *loaded_bb);
+void dcn20_fpu_adjust_dppclk(struct vba_vars_st *v,
+ int vlevel,
+ int max_mpc_comb,
+ int pipe_idx,
+ bool is_validating_bw);
+
+int dcn21_populate_dml_pipes_from_context(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ enum dc_validate_mode validate_mode);
+bool dcn21_validate_bandwidth_fp(struct dc *dc, struct dc_state *context, enum
+ dc_validate_mode, display_e2e_pipe_params_st *pipes);
+void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
+
+void dcn21_clk_mgr_set_bw_params_wm_table(struct clk_bw_params *bw_params);
+
+void dcn201_populate_dml_writeback_from_context_fpu(struct dc *dc,
+ struct resource_context *res_ctx,
+ display_e2e_pipe_params_st *pipes);
+
#endif /* __DCN20_FPU_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
index d3b5b6fedf04..0c8c4a080c50 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
@@ -785,12 +785,9 @@ static bool CalculatePrefetchSchedule(
if (MyError) {
*PrefetchBandwidth = 0;
- TimeForFetchingMetaPTE = 0;
- TimeForFetchingRowInVBlank = 0;
*DestinationLinesToRequestVMInVBlank = 0;
*DestinationLinesToRequestRowInVBlank = 0;
*DestinationLinesForPrefetch = 0;
- LinesToRequestPrefetchPixelData = 0;
*VRatioPrefetchY = 0;
*VRatioPrefetchC = 0;
*RequiredPrefetchPixDataBW = 0;
@@ -1017,7 +1014,7 @@ static unsigned int CalculateVMAndRowBytes(
if (ScanDirection == dm_horz)
FractionOfPTEReturnDrop = 0;
else
- FractionOfPTEReturnDrop = 7 / 8;
+ FractionOfPTEReturnDrop = 7.0 / 8;
} else if (VMMPageSize == 4096 && MacroTileSizeBytes > 4096) {
PixelPTEReqHeight = 16 * BlockHeight256Bytes;
PixelPTEReqWidth = 16 * BlockWidth256Bytes;
@@ -3231,22 +3228,22 @@ static unsigned int TruncToValidBPP(
if (Format == dm_420) {
if (DecimalBPP < 6)
return BPP_INVALID;
- else if (DecimalBPP >= 1.5 * DSCInputBitPerComponent - 1 / 16)
- return 1.5 * DSCInputBitPerComponent - 1 / 16;
+ else if (DecimalBPP >= 1.5 * DSCInputBitPerComponent - 1.0 / 16)
+ return 1.5 * DSCInputBitPerComponent - 1.0 / 16;
else
return dml_floor(16 * DecimalBPP, 1) / 16;
} else if (Format == dm_n422) {
if (DecimalBPP < 7)
return BPP_INVALID;
- else if (DecimalBPP >= 2 * DSCInputBitPerComponent - 1 / 16)
- return 2 * DSCInputBitPerComponent - 1 / 16;
+ else if (DecimalBPP >= 2 * DSCInputBitPerComponent - 1.0 / 16)
+ return 2 * DSCInputBitPerComponent - 1.0 / 16;
else
return dml_floor(16 * DecimalBPP, 1) / 16;
} else {
if (DecimalBPP < 8)
return BPP_INVALID;
- else if (DecimalBPP >= 3 * DSCInputBitPerComponent - 1 / 16)
- return 3 * DSCInputBitPerComponent - 1 / 16;
+ else if (DecimalBPP >= 3 * DSCInputBitPerComponent - 1.0 / 16)
+ return 3 * DSCInputBitPerComponent - 1.0 / 16;
else
return dml_floor(16 * DecimalBPP, 1) / 16;
}
@@ -3897,14 +3894,14 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2
* (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
- locals->ODMCombineEnablePerState[i][k] = false;
+ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine;
if (mode_lib->vba.ODMCapability) {
if (locals->PlaneRequiredDISPCLKWithoutODMCombine > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) {
- locals->ODMCombineEnablePerState[i][k] = true;
+ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
} else if (locals->HActive[k] > DCN20_MAX_420_IMAGE_WIDTH && locals->OutputFormat[k] == dm_420) {
- locals->ODMCombineEnablePerState[i][k] = true;
+ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
}
}
@@ -3957,7 +3954,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
locals->RequiredDISPCLK[i][j] = 0.0;
locals->DISPCLK_DPPCLK_Support[i][j] = true;
for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
- locals->ODMCombineEnablePerState[i][k] = false;
+ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
if (locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]) {
locals->NoOfDPP[i][j][k] = 1;
locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k]
@@ -4322,7 +4319,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
locals->RoundedUpMaxSwathSizeBytesC = 0;
}
- if (locals->RoundedUpMaxSwathSizeBytesY + locals->RoundedUpMaxSwathSizeBytesC <= locals->DETBufferSizeInKByte[0] * 1024 / 2) {
+ if (locals->RoundedUpMaxSwathSizeBytesY + locals->RoundedUpMaxSwathSizeBytesC <= locals->DETBufferSizeInKByte[0] * 1024.0 / 2) {
locals->SwathHeightYPerState[i][j][k] = locals->MaxSwathHeightY[k];
locals->SwathHeightCPerState[i][j][k] = locals->MaxSwathHeightC[k];
} else {
@@ -4356,12 +4353,16 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i][0],
locals->EffectiveLBLatencyHidingSourceLinesLuma),
locals->SwathHeightYPerState[i][j][k]);
-
- locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min(
- locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] *
- locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i][0],
- locals->EffectiveLBLatencyHidingSourceLinesChroma),
- locals->SwathHeightCPerState[i][j][k]);
+ if (locals->LinesInDETChroma) {
+ locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma +
+ dml_min(locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] *
+ locals->BytePerPixelInDETC[k] *
+ locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i][0],
+ locals->EffectiveLBLatencyHidingSourceLinesChroma),
+ locals->SwathHeightCPerState[i][j][k]);
+ } else {
+ locals->EffectiveDETLBLinesChroma = 0;
+ }
if (locals->BytePerPixelInDETC[k] == 0) {
locals->UrgentLatencySupportUsPerState[i][j][k] = locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k])
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
index 63bbdf8b8678..c935903b68e1 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
@@ -845,12 +845,9 @@ static bool CalculatePrefetchSchedule(
if (MyError) {
*PrefetchBandwidth = 0;
- TimeForFetchingMetaPTE = 0;
- TimeForFetchingRowInVBlank = 0;
*DestinationLinesToRequestVMInVBlank = 0;
*DestinationLinesToRequestRowInVBlank = 0;
*DestinationLinesForPrefetch = 0;
- LinesToRequestPrefetchPixelData = 0;
*VRatioPrefetchY = 0;
*VRatioPrefetchC = 0;
*RequiredPrefetchPixDataBW = 0;
@@ -1077,7 +1074,7 @@ static unsigned int CalculateVMAndRowBytes(
if (ScanDirection == dm_horz)
FractionOfPTEReturnDrop = 0;
else
- FractionOfPTEReturnDrop = 7 / 8;
+ FractionOfPTEReturnDrop = 7.0 / 8;
} else if (VMMPageSize == 4096 && MacroTileSizeBytes > 4096) {
PixelPTEReqHeight = 16 * BlockHeight256Bytes;
PixelPTEReqWidth = 16 * BlockWidth256Bytes;
@@ -4008,17 +4005,17 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2
* (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
- locals->ODMCombineEnablePerState[i][k] = false;
+ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine;
if (mode_lib->vba.ODMCapability) {
if (locals->PlaneRequiredDISPCLKWithoutODMCombine > MaxMaxDispclkRoundedDown) {
- locals->ODMCombineEnablePerState[i][k] = true;
+ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
} else if (locals->DSCEnabled[k] && (locals->HActive[k] > DCN20_MAX_DSC_IMAGE_WIDTH)) {
- locals->ODMCombineEnablePerState[i][k] = true;
+ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
} else if (locals->HActive[k] > DCN20_MAX_420_IMAGE_WIDTH && locals->OutputFormat[k] == dm_420) {
- locals->ODMCombineEnablePerState[i][k] = true;
+ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
}
}
@@ -4071,7 +4068,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
locals->RequiredDISPCLK[i][j] = 0.0;
locals->DISPCLK_DPPCLK_Support[i][j] = true;
for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
- locals->ODMCombineEnablePerState[i][k] = false;
+ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
if (locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]) {
locals->NoOfDPP[i][j][k] = 1;
locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k]
@@ -4443,7 +4440,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
locals->RoundedUpMaxSwathSizeBytesC = 0;
}
- if (locals->RoundedUpMaxSwathSizeBytesY + locals->RoundedUpMaxSwathSizeBytesC <= locals->DETBufferSizeInKByte[0] * 1024 / 2) {
+ if (locals->RoundedUpMaxSwathSizeBytesY + locals->RoundedUpMaxSwathSizeBytesC <= locals->DETBufferSizeInKByte[0] * 1024.0 / 2) {
locals->SwathHeightYPerState[i][j][k] = locals->MaxSwathHeightY[k];
locals->SwathHeightCPerState[i][j][k] = locals->MaxSwathHeightC[k];
} else {
@@ -4478,17 +4475,17 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
locals->EffectiveLBLatencyHidingSourceLinesLuma),
locals->SwathHeightYPerState[i][j][k]);
- locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min(
- locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] *
- locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i][0],
- locals->EffectiveLBLatencyHidingSourceLinesChroma),
- locals->SwathHeightCPerState[i][j][k]);
if (locals->BytePerPixelInDETC[k] == 0) {
locals->UrgentLatencySupportUsPerState[i][j][k] = locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k])
/ locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] *
dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]);
} else {
+ locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min(
+ locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] *
+ locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i][0],
+ locals->EffectiveLBLatencyHidingSourceLinesChroma),
+ locals->SwathHeightCPerState[i][j][k]);
locals->UrgentLatencySupportUsPerState[i][j][k] = dml_min(
locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k])
/ locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] *
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
index 246071c72f6b..9c58ff1069d6 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
@@ -78,7 +78,7 @@ static void calculate_ttu_cursor(struct display_mode_lib *mode_lib,
static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma)
{
- unsigned int ret_val = 0;
+ unsigned int ret_val = 1;
if (source_format == dm_444_16) {
if (!is_chroma)
@@ -313,9 +313,6 @@ static void handle_det_buf_split(struct display_mode_lib *mode_lib,
if (swath_height_c > 0)
log2_swath_height_c = dml_log2(swath_height_c);
-
- if (req128_c && log2_swath_height_c > 0)
- log2_swath_height_c -= 1;
}
rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l;
@@ -446,8 +443,6 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib,
blk_bytes = surf_linear ?
256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size);
log2_blk_bytes = dml_log2((double) blk_bytes);
- log2_blk_height = 0;
- log2_blk_width = 0;
// remember log rule
// "+" in log is multiply
@@ -494,8 +489,6 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib,
- log2_meta_req_height;
meta_req_width = 1 << log2_meta_req_width;
meta_req_height = 1 << log2_meta_req_height;
- log2_meta_row_height = 0;
- meta_row_width_ub = 0;
// the dimensions of a meta row are meta_row_width x meta_row_height in elements.
// calculate upper bound of the meta_row_width
@@ -653,7 +646,7 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib,
// the dpte_group_bytes is reduced for the specific case of vertical
// access of a tile surface that has dpte request of 8x1 ptes.
- if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group
+ if (!surf_linear && (log2_dpte_req_height_ptes == 0) && surf_vert) //reduced, in this case, will have page fault within a group
rq_sizing_param->dpte_group_bytes = 512;
else
//full size
@@ -688,12 +681,11 @@ static void get_surf_rq_param(struct display_mode_lib *mode_lib,
const display_pipe_source_params_st *pipe_src_param,
bool is_chroma)
{
- bool mode_422 = false;
unsigned int vp_width = 0;
unsigned int vp_height = 0;
unsigned int data_pitch = 0;
unsigned int meta_pitch = 0;
- unsigned int ppe = mode_422 ? 2 : 1;
+ unsigned int ppe = 1;
// TODO check if ppe apply for both luma and chroma in 422 case
if (is_chroma) {
@@ -825,7 +817,6 @@ static void dml20_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
double min_dst_y_ttu_vblank;
unsigned int dlg_vblank_start;
bool dual_plane;
- bool mode_422;
unsigned int access_dir;
unsigned int vp_height_l;
unsigned int vp_width_l;
@@ -971,7 +962,6 @@ static void dml20_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
// Source
// dcc_en = src.dcc;
dual_plane = is_dual_plane((enum source_format_class)(src->source_format));
- mode_422 = false; // TODO
access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
// bytes_per_element_l = get_bytes_per_element(source_format_class(src.source_format), 0);
// bytes_per_element_c = get_bytes_per_element(source_format_class(src.source_format), 1);
@@ -1148,18 +1138,8 @@ static void dml20_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
dpte_row_height_l = rq_dlg_param->rq_l.dpte_row_height;
dpte_row_height_c = rq_dlg_param->rq_c.dpte_row_height;
- if (mode_422) {
- swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element
- swath_width_pixels_ub_c = swath_width_ub_c * 2;
- } else {
- swath_width_pixels_ub_l = swath_width_ub_l * 1;
- swath_width_pixels_ub_c = swath_width_ub_c * 1;
- }
-
- hscale_pixel_rate_l = 0.;
- hscale_pixel_rate_c = 0.;
- min_hratio_fact_l = 1.0;
- min_hratio_fact_c = 1.0;
+ swath_width_pixels_ub_l = swath_width_ub_l;
+ swath_width_pixels_ub_c = swath_width_ub_c;
if (htaps_l <= 1)
min_hratio_fact_l = 2.0;
@@ -1576,8 +1556,6 @@ void dml20_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib,
e2e_pipe_param,
num_pipes);
- dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency
- / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated
print__dlg_sys_params_st(mode_lib, &dlg_sys_param);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
index 015e7f2c0b16..570e6e39eb45 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
@@ -78,7 +78,7 @@ static void calculate_ttu_cursor(struct display_mode_lib *mode_lib,
static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma)
{
- unsigned int ret_val = 0;
+ unsigned int ret_val = 1;
if (source_format == dm_444_16) {
if (!is_chroma)
@@ -313,9 +313,6 @@ static void handle_det_buf_split(struct display_mode_lib *mode_lib,
if (swath_height_c > 0)
log2_swath_height_c = dml_log2(swath_height_c);
-
- if (req128_c && log2_swath_height_c > 0)
- log2_swath_height_c -= 1;
}
rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l;
@@ -446,8 +443,6 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib,
blk_bytes = surf_linear ?
256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size);
log2_blk_bytes = dml_log2((double) blk_bytes);
- log2_blk_height = 0;
- log2_blk_width = 0;
// remember log rule
// "+" in log is multiply
@@ -494,8 +489,6 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib,
- log2_meta_req_height;
meta_req_width = 1 << log2_meta_req_width;
meta_req_height = 1 << log2_meta_req_height;
- log2_meta_row_height = 0;
- meta_row_width_ub = 0;
// the dimensions of a meta row are meta_row_width x meta_row_height in elements.
// calculate upper bound of the meta_row_width
@@ -653,7 +646,7 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib,
// the dpte_group_bytes is reduced for the specific case of vertical
// access of a tile surface that has dpte request of 8x1 ptes.
- if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group
+ if (!surf_linear && (log2_dpte_req_height_ptes == 0) && surf_vert) //reduced, in this case, will have page fault within a group
rq_sizing_param->dpte_group_bytes = 512;
else
//full size
@@ -688,12 +681,11 @@ static void get_surf_rq_param(struct display_mode_lib *mode_lib,
const display_pipe_source_params_st *pipe_src_param,
bool is_chroma)
{
- bool mode_422 = false;
unsigned int vp_width = 0;
unsigned int vp_height = 0;
unsigned int data_pitch = 0;
unsigned int meta_pitch = 0;
- unsigned int ppe = mode_422 ? 2 : 1;
+ unsigned int ppe = 1;
// TODO check if ppe apply for both luma and chroma in 422 case
if (is_chroma) {
@@ -825,7 +817,6 @@ static void dml20v2_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
double min_dst_y_ttu_vblank;
unsigned int dlg_vblank_start;
bool dual_plane;
- bool mode_422;
unsigned int access_dir;
unsigned int vp_height_l;
unsigned int vp_width_l;
@@ -972,7 +963,6 @@ static void dml20v2_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
// Source
// dcc_en = src.dcc;
dual_plane = is_dual_plane((enum source_format_class)(src->source_format));
- mode_422 = false; // TODO
access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
// bytes_per_element_l = get_bytes_per_element(source_format_class(src.source_format), 0);
// bytes_per_element_c = get_bytes_per_element(source_format_class(src.source_format), 1);
@@ -1149,18 +1139,8 @@ static void dml20v2_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
dpte_row_height_l = rq_dlg_param->rq_l.dpte_row_height;
dpte_row_height_c = rq_dlg_param->rq_c.dpte_row_height;
- if (mode_422) {
- swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element
- swath_width_pixels_ub_c = swath_width_ub_c * 2;
- } else {
- swath_width_pixels_ub_l = swath_width_ub_l * 1;
- swath_width_pixels_ub_c = swath_width_ub_c * 1;
- }
-
- hscale_pixel_rate_l = 0.;
- hscale_pixel_rate_c = 0.;
- min_hratio_fact_l = 1.0;
- min_hratio_fact_c = 1.0;
+ swath_width_pixels_ub_l = swath_width_ub_l;
+ swath_width_pixels_ub_c = swath_width_ub_c;
if (htaps_l <= 1)
min_hratio_fact_l = 2.0;
@@ -1577,8 +1557,6 @@ void dml20v2_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib,
e2e_pipe_param,
num_pipes);
- dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency
- / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated
print__dlg_sys_params_st(mode_lib, &dlg_sys_param);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
index 8a7485e21d53..cd8cca651419 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
@@ -806,10 +806,12 @@ static bool CalculatePrefetchSchedule(
if (myPipe->SourceScan == dm_horz) {
*swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockWidth256BytesY) + myPipe->BlockWidth256BytesY;
- *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockWidth256BytesC) + myPipe->BlockWidth256BytesC;
+ if (myPipe->BlockWidth256BytesC > 0)
+ *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockWidth256BytesC) + myPipe->BlockWidth256BytesC;
} else {
*swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockHeight256BytesY) + myPipe->BlockHeight256BytesY;
- *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockHeight256BytesC) + myPipe->BlockHeight256BytesC;
+ if (myPipe->BlockHeight256BytesC > 0)
+ *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockHeight256BytesC) + myPipe->BlockHeight256BytesC;
}
prefetch_bw_oto = (PrefetchSourceLinesY * *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) + PrefetchSourceLinesC * *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2)) / Tsw_oto;
@@ -1047,12 +1049,9 @@ static bool CalculatePrefetchSchedule(
if (MyError) {
*PrefetchBandwidth = 0;
- TimeForFetchingMetaPTE = 0;
- TimeForFetchingRowInVBlank = 0;
*DestinationLinesToRequestVMInVBlank = 0;
*DestinationLinesToRequestRowInVBlank = 0;
*DestinationLinesForPrefetch = 0;
- LinesToRequestPrefetchPixelData = 0;
*VRatioPrefetchY = 0;
*VRatioPrefetchC = 0;
*RequiredPrefetchPixDataBWLuma = 0;
@@ -1397,7 +1396,7 @@ static unsigned int CalculateVMAndRowBytes(
if (ScanDirection == dm_horz)
FractionOfPTEReturnDrop = 0;
else
- FractionOfPTEReturnDrop = 7 / 8;
+ FractionOfPTEReturnDrop = 7.0 / 8;
} else if (VMMPageSize == 4096 && MacroTileSizeBytes > 4096) {
PixelPTEReqHeightPTEs = 16;
*PixelPTEReqHeight = 16 * BlockHeight256Bytes;
@@ -2634,7 +2633,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
&mode_lib->vba.SrcActiveDrainRate,
&mode_lib->vba.TInitXFill,
&mode_lib->vba.TslvChk);
- locals->XFCRemoteSurfaceFlipLatency[k] =
+ locals->XFCRemoteSurfaceFlipLatency[k] =
dml_floor(
mode_lib->vba.XFCRemoteSurfaceFlipDelay
/ (mode_lib->vba.HTotal[k]
@@ -3192,7 +3191,7 @@ static void CalculateFlipSchedule(
unsigned int HostVMDynamicLevels;
double TimeForFetchingMetaPTEImmediateFlip;
double TimeForFetchingRowInVBlankImmediateFlip;
- double ImmediateFlipBW;
+ double ImmediateFlipBW = 1.0;
double HostVMInefficiencyFactor;
double VRatioClamped;
@@ -4100,17 +4099,17 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2
* (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
- locals->ODMCombineEnablePerState[i][k] = false;
+ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine;
if (mode_lib->vba.ODMCapability) {
if (locals->PlaneRequiredDISPCLKWithoutODMCombine > MaxMaxDispclkRoundedDown) {
- locals->ODMCombineEnablePerState[i][k] = true;
+ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
} else if (locals->DSCEnabled[k] && (locals->HActive[k] > DCN21_MAX_DSC_IMAGE_WIDTH)) {
- locals->ODMCombineEnablePerState[i][k] = true;
+ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
} else if (locals->HActive[k] > DCN21_MAX_420_IMAGE_WIDTH && locals->OutputFormat[k] == dm_420) {
- locals->ODMCombineEnablePerState[i][k] = true;
+ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
}
}
@@ -4163,7 +4162,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
locals->RequiredDISPCLK[i][j] = 0.0;
locals->DISPCLK_DPPCLK_Support[i][j] = true;
for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
- locals->ODMCombineEnablePerState[i][k] = false;
+ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
if (locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]) {
locals->NoOfDPP[i][j][k] = 1;
locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k]
@@ -5228,7 +5227,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
mode_lib->vba.ODMCombineEnabled[k] =
locals->ODMCombineEnablePerState[mode_lib->vba.VoltageLevel][k];
} else {
- mode_lib->vba.ODMCombineEnabled[k] = false;
+ mode_lib->vba.ODMCombineEnabled[k] = dm_odm_combine_mode_disabled;
}
mode_lib->vba.DSCEnabled[k] =
locals->RequiresDSC[mode_lib->vba.VoltageLevel][k];
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
index 46c433c0bcb0..f549da082c01 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
@@ -53,7 +53,7 @@ static void calculate_ttu_cursor(
static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma)
{
- unsigned int ret_val = 0;
+ unsigned int ret_val = 1;
if (source_format == dm_444_16) {
if (!is_chroma)
@@ -297,9 +297,6 @@ static void handle_det_buf_split(
if (swath_height_c > 0)
log2_swath_height_c = dml_log2(swath_height_c);
-
- if (req128_c && log2_swath_height_c > 0)
- log2_swath_height_c -= 1;
}
rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l;
@@ -438,8 +435,6 @@ static void get_meta_and_pte_attr(
blk_bytes = surf_linear ?
256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size);
log2_blk_bytes = dml_log2((double) blk_bytes);
- log2_blk_height = 0;
- log2_blk_width = 0;
// remember log rule
// "+" in log is multiply
@@ -488,8 +483,6 @@ static void get_meta_and_pte_attr(
- log2_meta_req_height;
meta_req_width = 1 << log2_meta_req_width;
meta_req_height = 1 << log2_meta_req_height;
- log2_meta_row_height = 0;
- meta_row_width_ub = 0;
// the dimensions of a meta row are meta_row_width x meta_row_height in elements.
// calculate upper bound of the meta_row_width
@@ -659,7 +652,7 @@ static void get_meta_and_pte_attr(
if (hostvm_enable)
rq_sizing_param->dpte_group_bytes = 512;
else {
- if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group
+ if (!surf_linear && (log2_dpte_req_height_ptes == 0) && surf_vert) //reduced, in this case, will have page fault within a group
rq_sizing_param->dpte_group_bytes = 512;
else
//full size
@@ -697,12 +690,11 @@ static void get_surf_rq_param(
const display_pipe_params_st *pipe_param,
bool is_chroma)
{
- bool mode_422 = false;
unsigned int vp_width = 0;
unsigned int vp_height = 0;
unsigned int data_pitch = 0;
unsigned int meta_pitch = 0;
- unsigned int ppe = mode_422 ? 2 : 1;
+ unsigned int ppe = 1;
// FIXME check if ppe apply for both luma and chroma in 422 case
if (is_chroma) {
@@ -871,7 +863,6 @@ static void dml_rq_dlg_get_dlg_params(
double min_dst_y_ttu_vblank;
unsigned int dlg_vblank_start;
bool dual_plane;
- bool mode_422;
unsigned int access_dir;
unsigned int vp_height_l;
unsigned int vp_width_l;
@@ -1023,7 +1014,6 @@ static void dml_rq_dlg_get_dlg_params(
// Source
// dcc_en = src.dcc;
dual_plane = is_dual_plane((enum source_format_class) (src->source_format));
- mode_422 = false; // FIXME
access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
// bytes_per_element_l = get_bytes_per_element(source_format_class(src.source_format), 0);
// bytes_per_element_c = get_bytes_per_element(source_format_class(src.source_format), 1);
@@ -1200,18 +1190,8 @@ static void dml_rq_dlg_get_dlg_params(
dpte_row_height_l = rq_dlg_param->rq_l.dpte_row_height;
dpte_row_height_c = rq_dlg_param->rq_c.dpte_row_height;
- if (mode_422) {
- swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element
- swath_width_pixels_ub_c = swath_width_ub_c * 2;
- } else {
- swath_width_pixels_ub_l = swath_width_ub_l * 1;
- swath_width_pixels_ub_c = swath_width_ub_c * 1;
- }
-
- hscale_pixel_rate_l = 0.;
- hscale_pixel_rate_c = 0.;
- min_hratio_fact_l = 1.0;
- min_hratio_fact_c = 1.0;
+ swath_width_pixels_ub_l = swath_width_ub_l;
+ swath_width_pixels_ub_c = swath_width_ub_c;
if (hratio_l <= 1)
min_hratio_fact_l = 2.0;
@@ -1688,8 +1668,6 @@ void dml21_rq_dlg_get_dlg_reg(
mode_lib,
e2e_pipe_param,
num_pipes);
- dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency
- / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated
print__dlg_sys_params_st(mode_lib, &dlg_sys_param);
@@ -1711,14 +1689,6 @@ void dml21_rq_dlg_get_dlg_reg(
dml_print("DML_DLG: Calculation for pipe[%d] end\n", pipe_idx);
}
-void dml_rq_dlg_get_arb_params(struct display_mode_lib *mode_lib, display_arb_params_st *arb_param)
-{
- memset(arb_param, 0, sizeof(*arb_param));
- arb_param->max_req_outstanding = 256;
- arb_param->min_req_outstanding = 68;
- arb_param->sat_level_us = 60;
-}
-
static void calculate_ttu_cursor(
struct display_mode_lib *mode_lib,
double *refcyc_per_req_delivery_pre_cur,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
new file mode 100644
index 000000000000..e5f5c0663750
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
@@ -0,0 +1,739 @@
+/*
+ * Copyright 2020-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#include "resource.h"
+#include "clk_mgr.h"
+#include "reg_helper.h"
+#include "dcn_calc_math.h"
+#include "dcn20/dcn20_resource.h"
+#include "dcn30/dcn30_resource.h"
+
+#include "clk_mgr/dcn30/dcn30_smu11_driver_if.h"
+#include "display_mode_vba_30.h"
+#include "dcn30_fpu.h"
+
+#define REG(reg)\
+ optc1->tg_regs->reg
+
+#define CTX \
+ optc1->base.ctx
+
+#undef FN
+#define FN(reg_name, field_name) \
+ optc1->tg_shift->field_name, optc1->tg_mask->field_name
+
+
+struct _vcs_dpi_ip_params_st dcn3_0_ip = {
+ .use_min_dcfclk = 0,
+ .clamp_min_dcfclk = 0,
+ .odm_capable = 1,
+ .gpuvm_enable = 0,
+ .hostvm_enable = 0,
+ .gpuvm_max_page_table_levels = 4,
+ .hostvm_max_page_table_levels = 4,
+ .hostvm_cached_page_table_levels = 0,
+ .pte_group_size_bytes = 2048,
+ .num_dsc = 6,
+ .rob_buffer_size_kbytes = 184,
+ .det_buffer_size_kbytes = 184,
+ .dpte_buffer_size_in_pte_reqs_luma = 84,
+ .pde_proc_buffer_size_64k_reqs = 48,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .pte_enable = 1,
+ .max_page_table_levels = 2,
+ .pte_chunk_size_kbytes = 2, // ?
+ .meta_chunk_size_kbytes = 2,
+ .writeback_chunk_size_kbytes = 8,
+ .line_buffer_size_bits = 789504,
+ .is_line_buffer_bpp_fixed = 0, // ?
+ .line_buffer_fixed_bpp = 0, // ?
+ .dcc_supported = true,
+ .writeback_interface_buffer_size_kbytes = 90,
+ .writeback_line_buffer_buffer_size = 0,
+ .max_line_buffer_lines = 12,
+ .writeback_luma_buffer_size_kbytes = 12, // writeback_line_buffer_buffer_size = 656640
+ .writeback_chroma_buffer_size_kbytes = 8,
+ .writeback_chroma_line_buffer_width_pixels = 4,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .writeback_line_buffer_luma_buffer_size = 0,
+ .writeback_line_buffer_chroma_buffer_size = 14643,
+ .cursor_buffer_size = 8,
+ .cursor_chunk_size = 2,
+ .max_num_otg = 6,
+ .max_num_dpp = 6,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .hscl_mults = 4,
+ .vscl_mults = 4,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dispclk_ramp_margin_percent = 1,
+ .underscan_factor = 1.11,
+ .min_vblank_lines = 32,
+ .dppclk_delay_subtotal = 46,
+ .dynamic_metadata_vm_enabled = true,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_cnvc_formatter = 27,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 119,
+ .dcfclk_cstate_latency = 5.2, // SRExitTime
+ .max_inter_dcn_tile_repeaters = 8,
+ .max_num_hdmi_frl_outputs = 1,
+ .odm_combine_4to1_supported = true,
+
+ .xfc_supported = false,
+ .xfc_fill_bw_overhead_percent = 10.0,
+ .xfc_fill_constant_bytes = 0,
+ .gfx7_compat_tiling_supported = 0,
+ .number_of_cursors = 1,
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn3_0_soc = {
+ .clock_limits = {
+ {
+ .state = 0,
+ .dispclk_mhz = 562.0,
+ .dppclk_mhz = 300.0,
+ .phyclk_mhz = 300.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 405.6,
+ },
+ },
+
+ .min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */
+ .num_states = 1,
+ .sr_exit_time_us = 15.5,
+ .sr_enter_plus_exit_time_us = 20,
+ .urgent_latency_us = 4.0,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
+ .max_avg_sdp_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_percent = 40.0,
+ .writeback_latency_us = 12.0,
+ .max_request_size_bytes = 256,
+ .fabric_datapath_to_dcn_data_return_bytes = 64,
+ .dcn_downspread_percent = 0.5,
+ .downspread_percent = 0.38,
+ .dram_page_open_time_ns = 50.0,
+ .dram_rw_turnaround_time_ns = 17.5,
+ .dram_return_buffer_per_channel_bytes = 8192,
+ .round_trip_ping_latency_dcfclk_cycles = 191,
+ .urgent_out_of_order_return_per_channel_bytes = 4096,
+ .channel_interleave_bytes = 256,
+ .num_banks = 8,
+ .gpuvm_min_page_size_bytes = 4096,
+ .hostvm_min_page_size_bytes = 4096,
+ .dram_clock_change_latency_us = 404,
+ .dummy_pstate_latency_us = 5,
+ .writeback_dram_clock_change_latency_us = 23.0,
+ .return_bus_width_bytes = 64,
+ .dispclk_dppclk_vco_speed_mhz = 3650,
+ .xfc_bus_transport_time_us = 20, // ?
+ .xfc_xbuf_latency_tolerance_us = 4, // ?
+ .use_urgent_burst_bw = 1, // ?
+ .do_urgent_latency_adjustment = true,
+ .urgent_latency_adjustment_fabric_clock_component_us = 1.0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000,
+};
+
+
+void dcn30_fpu_populate_dml_writeback_from_context(
+ struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes)
+{
+ int pipe_cnt, i, j;
+ double max_calc_writeback_dispclk;
+ double writeback_dispclk;
+ struct writeback_st dout_wb = {0};
+
+ dc_assert_fp_enabled();
+
+ for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+ struct dc_stream_state *stream = res_ctx->pipe_ctx[i].stream;
+
+ if (!stream)
+ continue;
+ max_calc_writeback_dispclk = 0;
+
+ /* Set writeback information */
+ pipes[pipe_cnt].dout.wb_enable = 0;
+ pipes[pipe_cnt].dout.num_active_wb = 0;
+ for (j = 0; j < stream->num_wb_info; j++) {
+ struct dc_writeback_info *wb_info = &stream->writeback_info[j];
+
+ if (wb_info->wb_enabled && wb_info->writeback_source_plane &&
+ (wb_info->writeback_source_plane == res_ctx->pipe_ctx[i].plane_state)) {
+ pipes[pipe_cnt].dout.wb_enable = 1;
+ pipes[pipe_cnt].dout.num_active_wb++;
+ dout_wb.wb_src_height = wb_info->dwb_params.cnv_params.crop_en ?
+ wb_info->dwb_params.cnv_params.crop_height :
+ wb_info->dwb_params.cnv_params.src_height;
+ dout_wb.wb_src_width = wb_info->dwb_params.cnv_params.crop_en ?
+ wb_info->dwb_params.cnv_params.crop_width :
+ wb_info->dwb_params.cnv_params.src_width;
+ dout_wb.wb_dst_width = wb_info->dwb_params.dest_width;
+ dout_wb.wb_dst_height = wb_info->dwb_params.dest_height;
+
+ /* For IP that doesn't support WB scaling, set h/v taps to 1 to avoid DML validation failure */
+ if (dc->dml.ip.writeback_max_hscl_taps > 1) {
+ dout_wb.wb_htaps_luma = wb_info->dwb_params.scaler_taps.h_taps;
+ dout_wb.wb_vtaps_luma = wb_info->dwb_params.scaler_taps.v_taps;
+ } else {
+ dout_wb.wb_htaps_luma = 1;
+ dout_wb.wb_vtaps_luma = 1;
+ }
+ dout_wb.wb_htaps_chroma = 0;
+ dout_wb.wb_vtaps_chroma = 0;
+ dout_wb.wb_hratio = wb_info->dwb_params.cnv_params.crop_en ?
+ (double)wb_info->dwb_params.cnv_params.crop_width /
+ (double)wb_info->dwb_params.dest_width :
+ (double)wb_info->dwb_params.cnv_params.src_width /
+ (double)wb_info->dwb_params.dest_width;
+ dout_wb.wb_vratio = wb_info->dwb_params.cnv_params.crop_en ?
+ (double)wb_info->dwb_params.cnv_params.crop_height /
+ (double)wb_info->dwb_params.dest_height :
+ (double)wb_info->dwb_params.cnv_params.src_height /
+ (double)wb_info->dwb_params.dest_height;
+ if (wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_ARGB ||
+ wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_RGBA)
+ dout_wb.wb_pixel_format = dm_444_64;
+ else
+ dout_wb.wb_pixel_format = dm_444_32;
+
+ /* Workaround for cases where multiple writebacks are connected to same plane
+ * In which case, need to compute worst case and set the associated writeback parameters
+ * This workaround is necessary due to DML computation assuming only 1 set of writeback
+ * parameters per pipe
+ */
+ writeback_dispclk = dml30_CalculateWriteBackDISPCLK(
+ dout_wb.wb_pixel_format,
+ pipes[pipe_cnt].pipe.dest.pixel_rate_mhz,
+ dout_wb.wb_hratio,
+ dout_wb.wb_vratio,
+ dout_wb.wb_htaps_luma,
+ dout_wb.wb_vtaps_luma,
+ dout_wb.wb_src_width,
+ dout_wb.wb_dst_width,
+ pipes[pipe_cnt].pipe.dest.htotal,
+ dc->current_state->bw_ctx.dml.ip.writeback_line_buffer_buffer_size);
+
+ if (writeback_dispclk > max_calc_writeback_dispclk) {
+ max_calc_writeback_dispclk = writeback_dispclk;
+ pipes[pipe_cnt].dout.wb = dout_wb;
+ }
+ }
+ }
+
+ pipe_cnt++;
+ }
+}
+
+void dcn30_fpu_set_mcif_arb_params(struct mcif_arb_params *wb_arb_params,
+ struct display_mode_lib *dml,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int cur_pipe)
+{
+ int i;
+
+ dc_assert_fp_enabled();
+
+ for (i = 0; i < ARRAY_SIZE(wb_arb_params->cli_watermark); i++) {
+ wb_arb_params->cli_watermark[i] = get_wm_writeback_urgent(dml, pipes, pipe_cnt) * 1000;
+ wb_arb_params->pstate_watermark[i] = get_wm_writeback_dram_clock_change(dml, pipes, pipe_cnt) * 1000;
+ }
+
+ wb_arb_params->dram_speed_change_duration = dml->vba.WritebackAllowDRAMClockChangeEndPosition[cur_pipe] * pipes[0].clks_cfg.refclk_mhz; /* num_clock_cycles = us * MHz */
+}
+
+void dcn30_fpu_update_soc_for_wm_a(struct dc *dc, struct dc_state *context)
+{
+
+ dc_assert_fp_enabled();
+
+ if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) {
+ if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching ||
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us == 0)
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
+ context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us;
+ context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us;
+ }
+}
+
+void dcn30_fpu_calculate_wm_and_dlg(
+ struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel)
+{
+ int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
+ int i, pipe_idx;
+ double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][maxMpcComb];
+ bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != dm_dram_clock_change_unsupported;
+ unsigned int dummy_latency_index = 0;
+ struct dc_stream_status *stream_status = NULL;
+
+ dc_assert_fp_enabled();
+
+ context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = false;
+ for (i = 0; i < context->stream_count; i++) {
+ stream_status = NULL;
+ if (context->streams[i])
+ stream_status = dc_state_get_stream_status(context, context->streams[i]);
+ if (stream_status)
+ stream_status->fpo_in_use = false;
+ }
+
+ if (!pstate_en) {
+ /* only when the mclk switch can not be natural, is the fw based vblank stretch attempted */
+ context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching =
+ dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch(dc, context);
+
+ if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {
+ dummy_latency_index = dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(dc,
+ context, pipes, pipe_cnt, vlevel);
+
+ /* After calling dcn30_find_dummy_latency_index_for_fw_based_mclk_switch
+ * we reinstate the original dram_clock_change_latency_us on the context
+ * and all variables that may have changed up to this point, except the
+ * newly found dummy_latency_index
+ */
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
+ dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel,
+ DC_VALIDATE_MODE_AND_PROGRAMMING, true);
+ maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
+ dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
+ pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != dm_dram_clock_change_unsupported;
+ }
+ }
+
+ if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk)
+ dcfclk = context->bw_ctx.dml.soc.min_dcfclk;
+
+ pipes[0].clks_cfg.voltage = vlevel;
+ pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
+ pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz;
+
+ /* Set B:
+ * DCFCLK: 1GHz or min required above 1GHz
+ * FCLK/UCLK: Max
+ */
+ if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) {
+ if (vlevel == 0) {
+ pipes[0].clks_cfg.voltage = 1;
+ pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz;
+ }
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us;
+ context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us;
+ context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us;
+ }
+ context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+
+ pipes[0].clks_cfg.voltage = vlevel;
+ pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
+
+ /* Set D:
+ * DCFCLK: Min Required
+ * FCLK(proportional to UCLK): 1GHz or Max
+ * MALL stutter, sr_enter_exit = 4, sr_exit = 2us
+ */
+ /*
+ if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) {
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us;
+ context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us;
+ context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us;
+ }
+ context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ */
+
+ /* Set C:
+ * DCFCLK: Min Required
+ * FCLK(proportional to UCLK): 1GHz or Max
+ * pstate latency overridden to 5us
+ */
+ if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) {
+ unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
+ unsigned int min_dram_speed_mts_margin = 160;
+
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us =
+ dc->clk_mgr->bw_params->dummy_pstate_table[0].dummy_pstate_latency_us;
+
+ if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] ==
+ dm_dram_clock_change_unsupported) {
+ int min_dram_speed_mts_offset = dc->clk_mgr->bw_params->clk_table.num_entries - 1;
+
+ min_dram_speed_mts =
+ dc->clk_mgr->bw_params->clk_table.entries[min_dram_speed_mts_offset].memclk_mhz * 16;
+ }
+
+ if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {
+ /* find largest table entry that is lower than dram speed,
+ * but lower than DPM0 still uses DPM0
+ */
+ for (dummy_latency_index = 3; dummy_latency_index > 0; dummy_latency_index--)
+ if (min_dram_speed_mts + min_dram_speed_mts_margin >
+ dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dram_speed_mts)
+ break;
+ }
+
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us =
+ dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us;
+
+ context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us;
+ context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us;
+ }
+
+ context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+
+ if (!pstate_en) {
+ /* The only difference between A and C is p-state latency, if p-state is not supported we want to
+ * calculate DLG based on dummy p-state latency, and max out the set A p-state watermark
+ */
+ context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0;
+ } else {
+ /* Set A:
+ * DCFCLK: Min Required
+ * FCLK(proportional to UCLK): 1GHz or Max
+ *
+ * Set A calculated last so that following calculations are based on Set A
+ */
+ dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
+ context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ }
+
+ context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod;
+
+ /* Make set D = set A until set D is enabled */
+ context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a;
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+
+ pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt);
+ pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+
+ if (dc->config.forced_clocks) {
+ pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz;
+ pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz;
+ }
+ if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000)
+ pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0;
+ if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
+ pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0;
+
+ pipe_idx++;
+ }
+
+ // WA: restrict FPO to use first non-strobe mode (NV24 BW issue)
+ if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching &&
+ dc->dml.soc.num_chans <= 4 &&
+ context->bw_ctx.dml.vba.DRAMSpeed <= 1700 &&
+ context->bw_ctx.dml.vba.DRAMSpeed >= 1500) {
+
+ for (i = 0; i < dc->dml.soc.num_states; i++) {
+ if (dc->dml.soc.clock_limits[i].dram_speed_mts > 1700) {
+ context->bw_ctx.dml.vba.DRAMSpeed = dc->dml.soc.clock_limits[i].dram_speed_mts;
+ break;
+ }
+ }
+ }
+
+ dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
+
+ if (!pstate_en)
+ /* Restore full p-state latency */
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us =
+ dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
+
+ if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching)
+ dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(dc, context);
+}
+
+void dcn30_fpu_update_dram_channel_width_bytes(struct dc *dc)
+{
+ dc_assert_fp_enabled();
+
+ if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes)
+ dcn3_0_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes;
+}
+
+void dcn30_fpu_update_max_clk(struct dc_bounding_box_max_clk *dcn30_bb_max_clk)
+{
+ dc_assert_fp_enabled();
+
+ if (!dcn30_bb_max_clk->max_dcfclk_mhz)
+ dcn30_bb_max_clk->max_dcfclk_mhz = dcn3_0_soc.clock_limits[0].dcfclk_mhz;
+ if (!dcn30_bb_max_clk->max_dispclk_mhz)
+ dcn30_bb_max_clk->max_dispclk_mhz = dcn3_0_soc.clock_limits[0].dispclk_mhz;
+ if (!dcn30_bb_max_clk->max_dppclk_mhz)
+ dcn30_bb_max_clk->max_dppclk_mhz = dcn3_0_soc.clock_limits[0].dppclk_mhz;
+ if (!dcn30_bb_max_clk->max_phyclk_mhz)
+ dcn30_bb_max_clk->max_phyclk_mhz = dcn3_0_soc.clock_limits[0].phyclk_mhz;
+}
+
+void dcn30_fpu_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
+ unsigned int *optimal_dcfclk,
+ unsigned int *optimal_fclk)
+{
+ double bw_from_dram, bw_from_dram1, bw_from_dram2;
+
+ dc_assert_fp_enabled();
+
+ bw_from_dram1 = uclk_mts * dcn3_0_soc.num_chans *
+ dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_dram_bw_use_normal_percent / 100);
+ bw_from_dram2 = uclk_mts * dcn3_0_soc.num_chans *
+ dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100);
+
+ bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2;
+
+ if (optimal_fclk)
+ *optimal_fclk = bw_from_dram /
+ (dcn3_0_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100));
+
+ if (optimal_dcfclk)
+ *optimal_dcfclk = bw_from_dram /
+ (dcn3_0_soc.return_bus_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100));
+}
+
+void dcn30_fpu_update_bw_bounding_box(struct dc *dc,
+ struct clk_bw_params *bw_params,
+ struct dc_bounding_box_max_clk *dcn30_bb_max_clk,
+ unsigned int *dcfclk_mhz,
+ unsigned int *dram_speed_mts)
+{
+ unsigned int i;
+
+ dc_assert_fp_enabled();
+
+ dcn3_0_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+
+ for (i = 0; i < dcn3_0_soc.num_states; i++) {
+ dcn3_0_soc.clock_limits[i].state = i;
+ dcn3_0_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i];
+ dcn3_0_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i];
+ dcn3_0_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i];
+
+ /* Fill all states with max values of all other clocks */
+ dcn3_0_soc.clock_limits[i].dispclk_mhz = dcn30_bb_max_clk->max_dispclk_mhz;
+ dcn3_0_soc.clock_limits[i].dppclk_mhz = dcn30_bb_max_clk->max_dppclk_mhz;
+ dcn3_0_soc.clock_limits[i].phyclk_mhz = dcn30_bb_max_clk->max_phyclk_mhz;
+ dcn3_0_soc.clock_limits[i].dtbclk_mhz = dcn3_0_soc.clock_limits[0].dtbclk_mhz;
+ /* These clocks cannot come from bw_params, always fill from dcn3_0_soc[1] */
+ /* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */
+ dcn3_0_soc.clock_limits[i].phyclk_d18_mhz = dcn3_0_soc.clock_limits[0].phyclk_d18_mhz;
+ dcn3_0_soc.clock_limits[i].socclk_mhz = dcn3_0_soc.clock_limits[0].socclk_mhz;
+ dcn3_0_soc.clock_limits[i].dscclk_mhz = dcn3_0_soc.clock_limits[0].dscclk_mhz;
+ }
+ /* re-init DML with updated bb */
+ dml_init_instance(&dc->dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30);
+ if (dc->current_state)
+ dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30);
+
+}
+
+/**
+ * dcn30_find_dummy_latency_index_for_fw_based_mclk_switch() - Finds
+ * dummy_latency_index when MCLK switching using firmware based vblank stretch
+ * is enabled. This function will iterate through the table of dummy pstate
+ * latencies until the lowest value that allows
+ * dm_allow_self_refresh_and_mclk_switch to happen is found
+ *
+ * @dc: Current DC state
+ * @context: new dc state
+ * @pipes: DML pipe params
+ * @pipe_cnt: number of DML pipes
+ * @vlevel: Voltage level calculated by DML
+ *
+ * Return: lowest dummy_latency_index value
+ */
+int dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel)
+{
+ const int max_latency_table_entries = 4;
+ int dummy_latency_index = 0;
+
+ dc_assert_fp_enabled();
+
+ while (dummy_latency_index < max_latency_table_entries) {
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us =
+ dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us;
+ dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel,
+ DC_VALIDATE_MODE_AND_PROGRAMMING, true);
+
+ if (context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank ==
+ dm_allow_self_refresh_and_mclk_switch)
+ break;
+
+ dummy_latency_index++;
+ }
+
+ if (dummy_latency_index == max_latency_table_entries) {
+ ASSERT(dummy_latency_index != max_latency_table_entries);
+ /* If the execution gets here, it means dummy p_states are
+ * not possible. This should never happen and would mean
+ * something is severely wrong.
+ * Here we reset dummy_latency_index to 3, because it is
+ * better to have underflows than system crashes.
+ */
+ dummy_latency_index = 3;
+ }
+
+ return dummy_latency_index;
+}
+
+void dcn3_fpu_build_wm_range_table(struct clk_mgr *base)
+{
+ /* defaults */
+ double pstate_latency_us = base->ctx->dc->dml.soc.dram_clock_change_latency_us;
+ double sr_exit_time_us = base->ctx->dc->dml.soc.sr_exit_time_us;
+ double sr_enter_plus_exit_time_us = base->ctx->dc->dml.soc.sr_enter_plus_exit_time_us;
+ uint16_t min_uclk_mhz = base->bw_params->clk_table.entries[0].memclk_mhz;
+
+ dc_assert_fp_enabled();
+
+ /* Set A - Normal - default values*/
+ base->bw_params->wm_table.nv_entries[WM_A].valid = true;
+ base->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us;
+ base->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us;
+ base->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
+ base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE;
+ base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = 0;
+ base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF;
+ base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz;
+ base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF;
+
+ /* Set B - Performance - higher minimum clocks */
+// base->bw_params->wm_table.nv_entries[WM_B].valid = true;
+// base->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us;
+// base->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us;
+// base->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
+// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE;
+// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = TUNED VALUE;
+// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF;
+// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = TUNED VALUE;
+// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF;
+
+ /* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */
+ base->bw_params->wm_table.nv_entries[WM_C].valid = true;
+ base->bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 0;
+ base->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us;
+ base->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
+ base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE;
+ base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = 0;
+ base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF;
+ base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz;
+ base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF;
+ base->bw_params->dummy_pstate_table[0].dram_speed_mts = 1600;
+ base->bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38;
+ base->bw_params->dummy_pstate_table[1].dram_speed_mts = 8000;
+ base->bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9;
+ base->bw_params->dummy_pstate_table[2].dram_speed_mts = 10000;
+ base->bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8;
+ base->bw_params->dummy_pstate_table[3].dram_speed_mts = 16000;
+ base->bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5;
+
+ /* Set D - MALL - SR enter and exit times adjusted for MALL */
+ base->bw_params->wm_table.nv_entries[WM_D].valid = true;
+ base->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = pstate_latency_us;
+ base->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = 2;
+ base->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = 4;
+ base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL;
+ base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = 0;
+ base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF;
+ base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz;
+ base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF;
+}
+
+void patch_dcn30_soc_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *dcn3_0_ip)
+{
+ dc_assert_fp_enabled();
+
+ if (dc->ctx->dc_bios->funcs->get_soc_bb_info) {
+ struct bp_soc_bb_info bb_info = {0};
+
+ if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) {
+ if (bb_info.dram_clock_change_latency_100ns > 0)
+ dcn3_0_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10;
+
+ if (bb_info.dram_sr_enter_exit_latency_100ns > 0)
+ dcn3_0_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10;
+
+ if (bb_info.dram_sr_exit_latency_100ns > 0)
+ dcn3_0_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10;
+ }
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h
new file mode 100644
index 000000000000..e3b6ad6a8784
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2020-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN30_FPU_H__
+#define __DCN30_FPU_H__
+
+#include "core_types.h"
+#include "dcn20/dcn20_optc.h"
+
+void dcn30_fpu_populate_dml_writeback_from_context(
+ struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes);
+
+void dcn30_fpu_set_mcif_arb_params(struct mcif_arb_params *wb_arb_params,
+ struct display_mode_lib *dml,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int cur_pipe);
+
+void dcn30_fpu_update_soc_for_wm_a(struct dc *dc, struct dc_state *context);
+
+void dcn30_fpu_calculate_wm_and_dlg(
+ struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel);
+
+void dcn30_fpu_update_dram_channel_width_bytes(struct dc *dc);
+
+void dcn30_fpu_update_max_clk(struct dc_bounding_box_max_clk *dcn30_bb_max_clk);
+
+void dcn30_fpu_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
+ unsigned int *optimal_dcfclk,
+ unsigned int *optimal_fclk);
+
+void dcn30_fpu_update_bw_bounding_box(struct dc *dc,
+ struct clk_bw_params *bw_params,
+ struct dc_bounding_box_max_clk *dcn30_bb_max_clk,
+ unsigned int *dcfclk_mhz,
+ unsigned int *dram_speed_mts);
+
+int dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel);
+
+void dcn3_fpu_build_wm_range_table(struct clk_mgr *base);
+
+void patch_dcn30_soc_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *dcn3_0_ip);
+
+#endif /* __DCN30_FPU_H__*/
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
index f47d82da115c..8d24763938ea 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
@@ -23,9 +23,7 @@
*
*/
-#ifdef CONFIG_DRM_AMD_DC_DCN
#include "dc.h"
-#include "dc_link.h"
#include "../display_mode_lib.h"
#include "display_mode_vba_30.h"
#include "../dml_inline_defs.h"
@@ -283,10 +281,10 @@ static void CalculateDynamicMetadataParameters(
double DISPCLK,
double DCFClkDeepSleep,
double PixelClock,
- long HTotal,
- long VBlank,
- long DynamicMetadataTransmittedBytes,
- long DynamicMetadataLinesBeforeActiveRequired,
+ unsigned int HTotal,
+ unsigned int VBlank,
+ unsigned int DynamicMetadataTransmittedBytes,
+ int DynamicMetadataLinesBeforeActiveRequired,
int InterlaceEnable,
bool ProgressiveToInterlaceUnitInOPP,
double *Tsetup,
@@ -396,64 +394,10 @@ static void CalculateUrgentBurstFactor(
static void UseMinimumDCFCLK(
struct display_mode_lib *mode_lib,
- int MaxInterDCNTileRepeaters,
+ struct vba_vars_st *v,
int MaxPrefetchMode,
- double FinalDRAMClockChangeLatency,
- double SREnterPlusExitTime,
- int ReturnBusWidth,
- int RoundTripPingLatencyCycles,
- int ReorderingBytes,
- int PixelChunkSizeInKByte,
- int MetaChunkSize,
- bool GPUVMEnable,
- int GPUVMMaxPageTableLevels,
- bool HostVMEnable,
- int NumberOfActivePlanes,
- double HostVMMinPageSize,
- int HostVMMaxNonCachedPageTableLevels,
- bool DynamicMetadataVMEnabled,
- enum immediate_flip_requirement ImmediateFlipRequirement,
- bool ProgressiveToInterlaceUnitInOPP,
- double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
- double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
- double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
- double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly,
- int VTotal[],
- int VActive[],
- int DynamicMetadataTransmittedBytes[],
- int DynamicMetadataLinesBeforeActiveRequired[],
- bool Interlace[],
- double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
- double RequiredDISPCLK[][2],
- double UrgLatency[],
- unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
- double ProjectedDCFCLKDeepSleep[][2],
- double MaximumVStartup[][2][DC__NUM_DPP__MAX],
- double TotalVActivePixelBandwidth[][2],
- double TotalVActiveCursorBandwidth[][2],
- double TotalMetaRowBandwidth[][2],
- double TotalDPTERowBandwidth[][2],
- unsigned int TotalNumberOfActiveDPP[][2],
- unsigned int TotalNumberOfDCCActiveDPP[][2],
- int dpte_group_bytes[],
- double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
- double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
- int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
- int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
- int BytePerPixelY[],
- int BytePerPixelC[],
- int HTotal[],
- double PixelClock[],
- double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
- double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
- double MetaRowBytes[][2][DC__NUM_DPP__MAX],
- bool DynamicMetadataEnable[],
- double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
- double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
- double ReadBandwidthLuma[],
- double ReadBandwidthChroma[],
- double DCFCLKPerState[],
- double DCFCLKState[][2]);
+ int ReorderingBytes);
+
static void CalculatePixelDeliveryTimes(
unsigned int NumberOfActivePlanes,
double VRatio[],
@@ -712,18 +656,6 @@ static double CalculateUrgentLatency(
double UrgentLatencyAdjustmentFabricClockReference,
double FabricClockSingle);
-static bool CalculateBytePerPixelAnd256BBlockSizes(
- enum source_format_class SourcePixelFormat,
- enum dm_swizzle_mode SurfaceTiling,
- unsigned int *BytePerPixelY,
- unsigned int *BytePerPixelC,
- double *BytePerPixelDETY,
- double *BytePerPixelDETC,
- unsigned int *BlockHeight256BytesY,
- unsigned int *BlockHeight256BytesC,
- unsigned int *BlockWidth256BytesY,
- unsigned int *BlockWidth256BytesC);
-
void dml30_recalculate(struct display_mode_lib *mode_lib)
{
ModeSupportAndSystemConfiguration(mode_lib);
@@ -852,8 +784,7 @@ static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum o
Delay = Delay + 1;
// sft
Delay = Delay + 1;
- }
- else {
+ } else {
// sfr
Delay = Delay + 2;
// dsccif
@@ -1071,6 +1002,7 @@ static bool CalculatePrefetchSchedule(
dst_y_prefetch_equ = VStartup - (Tsetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime
- (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
+ dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
Lsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC);
Tsw_oto = Lsw_oto * LineTime;
@@ -1349,12 +1281,9 @@ static bool CalculatePrefetchSchedule(
if (MyError) {
*PrefetchBandwidth = 0;
- TimeForFetchingMetaPTE = 0;
- TimeForFetchingRowInVBlank = 0;
*DestinationLinesToRequestVMInVBlank = 0;
*DestinationLinesToRequestRowInVBlank = 0;
*DestinationLinesForPrefetch = 0;
- LinesToRequestPrefetchPixelData = 0;
*VRatioPrefetchY = 0;
*VRatioPrefetchC = 0;
*RequiredPrefetchPixDataBWLuma = 0;
@@ -1844,15 +1773,6 @@ static unsigned int CalculateVMAndRowBytes(
*PixelPTEReqWidth = 32768.0 / BytePerPixel;
*PTERequestSize = 64;
FractionOfPTEReturnDrop = 0;
- } else if (MacroTileSizeBytes == 4096) {
- PixelPTEReqHeightPTEs = 1;
- *PixelPTEReqHeight = MacroTileHeight;
- *PixelPTEReqWidth = 8 * *MacroTileWidth;
- *PTERequestSize = 64;
- if (ScanDirection != dm_vert)
- FractionOfPTEReturnDrop = 0;
- else
- FractionOfPTEReturnDrop = 7 / 8;
} else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
PixelPTEReqHeightPTEs = 16;
*PixelPTEReqHeight = 16 * BlockHeight256Bytes;
@@ -1868,7 +1788,10 @@ static unsigned int CalculateVMAndRowBytes(
}
if (SurfaceTiling == dm_sw_linear) {
- *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
+ if (PTEBufferSizeInRequests == 0)
+ *dpte_row_height = 1;
+ else
+ *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
*dpte_row_width_ub = (dml_ceil(((double) SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
*PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
} else if (ScanDirection != dm_vert) {
@@ -2095,7 +2018,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
DTRACE(" return_bus_bw = %f", v->ReturnBW);
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
- CalculateBytePerPixelAnd256BBlockSizes(
+ dml30_CalculateBytePerPixelAnd256BBlockSizes(
v->SourcePixelFormat[k],
v->SurfaceTiling[k],
&v->BytePerPixelY[k],
@@ -3049,40 +2972,12 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
{
//Maximum Bandwidth Used
- double TotalWRBandwidth = 0;
- double MaxPerPlaneVActiveWRBandwidth = 0;
- double WRBandwidth = 0;
- double MaxUsedBW = 0;
- for (k = 0; k < v->NumberOfActivePlanes; ++k) {
- if (v->WritebackEnable[k] == true
- && v->WritebackPixelFormat[k] == dm_444_32) {
- WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
- / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
- } else if (v->WritebackEnable[k] == true) {
- WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
- / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
- }
- TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
- MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
- }
-
v->TotalDataReadBandwidth = 0;
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
v->TotalDataReadBandwidth = v->TotalDataReadBandwidth
+ v->ReadBandwidthPlaneLuma[k]
+ v->ReadBandwidthPlaneChroma[k];
}
-
- {
- double MaxPerPlaneVActiveRDBandwidth = 0;
- for (k = 0; k < v->NumberOfActivePlanes; ++k) {
- MaxPerPlaneVActiveRDBandwidth = dml_max(MaxPerPlaneVActiveRDBandwidth,
- v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
-
- }
- }
-
- MaxUsedBW = MaxTotalRDBandwidth + TotalWRBandwidth;
}
// VStartup Margin
@@ -3165,7 +3060,7 @@ static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
- CalculateBytePerPixelAnd256BBlockSizes(
+ dml30_CalculateBytePerPixelAnd256BBlockSizes(
mode_lib->vba.SourcePixelFormat[k],
mode_lib->vba.SurfaceTiling[k],
&BytePerPixY[k],
@@ -3218,7 +3113,7 @@ static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
&dummysinglestring);
}
-static bool CalculateBytePerPixelAnd256BBlockSizes(
+void dml30_CalculateBytePerPixelAnd256BBlockSizes(
enum source_format_class SourcePixelFormat,
enum dm_swizzle_mode SurfaceTiling,
unsigned int *BytePerPixelY,
@@ -3305,7 +3200,6 @@ static bool CalculateBytePerPixelAnd256BBlockSizes(
*BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
*BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
}
- return true;
}
static double CalculateTWait(
@@ -3372,8 +3266,8 @@ static double CalculateWriteBackDelay(
static void CalculateDynamicMetadataParameters(int MaxInterDCNTileRepeaters, double DPPCLK, double DISPCLK,
- double DCFClkDeepSleep, double PixelClock, long HTotal, long VBlank, long DynamicMetadataTransmittedBytes,
- long DynamicMetadataLinesBeforeActiveRequired, int InterlaceEnable, bool ProgressiveToInterlaceUnitInOPP,
+ double DCFClkDeepSleep, double PixelClock, unsigned int HTotal, unsigned int VBlank, unsigned int DynamicMetadataTransmittedBytes,
+ int DynamicMetadataLinesBeforeActiveRequired, int InterlaceEnable, bool ProgressiveToInterlaceUnitInOPP,
double *Tsetup, double *Tdmbf, double *Tdmec, double *Tdmsks)
{
double TotalRepeaterDelayTime = 0;
@@ -3583,8 +3477,7 @@ static double TruncToValidBPP(
if (Format == dm_n422) {
MinDSCBPP = 7;
MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
- }
- else {
+ } else {
MinDSCBPP = 8;
MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
}
@@ -3631,14 +3524,13 @@ static double TruncToValidBPP(
return DesiredBPP;
}
}
- return BPP_INVALID;
}
void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
{
struct vba_vars_st *v = &mode_lib->vba;
int MinPrefetchMode, MaxPrefetchMode;
- int i;
+ int i, start_state;
unsigned int j, k, m;
bool EnoughWritebackUnits = true;
bool WritebackModeSupport = true;
@@ -3649,6 +3541,11 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
/*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
+ if (mode_lib->validate_max_state)
+ start_state = v->soc.num_states - 1;
+ else
+ start_state = 0;
+
CalculateMinAndMaxPrefetchMode(
mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
&MinPrefetchMode, &MaxPrefetchMode);
@@ -3709,7 +3606,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
/*Bandwidth Support Check*/
for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
- CalculateBytePerPixelAnd256BBlockSizes(
+ dml30_CalculateBytePerPixelAnd256BBlockSizes(
v->SourcePixelFormat[k],
v->SurfaceTiling[k],
&v->BytePerPixelY[k],
@@ -3947,7 +3844,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
v->SingleDPPViewportSizeSupportPerPlane,
&v->ViewportSizeSupport[0][0]);
- for (i = 0; i < v->soc.num_states; i++) {
+ for (i = start_state; i < v->soc.num_states; i++) {
for (j = 0; j < 2; j++) {
v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
@@ -4103,7 +4000,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
/*Total Available Pipes Support Check*/
- for (i = 0; i < v->soc.num_states; i++) {
+ for (i = start_state; i < v->soc.num_states; i++) {
for (j = 0; j < 2; j++) {
if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
v->TotalAvailablePipesSupport[i][j] = true;
@@ -4142,7 +4039,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
}
}
- for (i = 0; i < v->soc.num_states; i++) {
+ for (i = start_state; i < v->soc.num_states; i++) {
for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
v->RequiresDSC[i][k] = false;
v->RequiresFEC[i][k] = false;
@@ -4270,7 +4167,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
}
}
}
- for (i = 0; i < v->soc.num_states; i++) {
+ for (i = start_state; i < v->soc.num_states; i++) {
v->DIOSupport[i] = true;
for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
if (!v->skip_dio_check[k] && v->BlendingAndTiming[k] == k && (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)
@@ -4281,7 +4178,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
}
}
- for (i = 0; i < v->soc.num_states; ++i) {
+ for (i = start_state; i < v->soc.num_states; ++i) {
v->ODMCombine4To1SupportCheckOK[i] = true;
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
@@ -4293,7 +4190,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
/* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
- for (i = 0; i < v->soc.num_states; i++) {
+ for (i = start_state; i < v->soc.num_states; i++) {
v->NotEnoughDSCUnits[i] = false;
v->TotalDSCUnitsRequired = 0.0;
for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
@@ -4313,7 +4210,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
}
/*DSC Delay per state*/
- for (i = 0; i < v->soc.num_states; i++) {
+ for (i = start_state; i < v->soc.num_states; i++) {
for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
if (v->OutputBppPerState[i][k] == BPP_INVALID) {
v->BPP = 0.0;
@@ -4364,7 +4261,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
//Calculate Swath, DET Configuration, DCFCLKDeepSleep
//
- for (i = 0; i < mode_lib->soc.num_states; ++i) {
+ for (i = start_state; i < mode_lib->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
@@ -4429,7 +4326,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
}
- for (i = 0; i < v->soc.num_states; i++) {
+ for (i = start_state; i < v->soc.num_states; i++) {
for (j = 0; j < 2; j++) {
for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
@@ -4667,7 +4564,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
//Calculate Return BW
- for (i = 0; i < mode_lib->soc.num_states; ++i) {
+ for (i = start_state; i < mode_lib->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
if (v->BlendingAndTiming[k] == k) {
@@ -4726,77 +4623,18 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
v->FinalDRAMClockChangeLatency = (v->DRAMClockChangeLatencyOverride > 0 ? v->DRAMClockChangeLatencyOverride : v->DRAMClockChangeLatency);
- for (i = 0; i < mode_lib->soc.num_states; ++i) {
+ for (i = start_state; i < mode_lib->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
}
}
if (v->UseMinimumRequiredDCFCLK == true) {
- UseMinimumDCFCLK(
- mode_lib,
- v->MaxInterDCNTileRepeaters,
- MaxPrefetchMode,
- v->FinalDRAMClockChangeLatency,
- v->SREnterPlusExitTime,
- v->ReturnBusWidth,
- v->RoundTripPingLatencyCycles,
- ReorderingBytes,
- v->PixelChunkSizeInKByte,
- v->MetaChunkSize,
- v->GPUVMEnable,
- v->GPUVMMaxPageTableLevels,
- v->HostVMEnable,
- v->NumberOfActivePlanes,
- v->HostVMMinPageSize,
- v->HostVMMaxNonCachedPageTableLevels,
- v->DynamicMetadataVMEnabled,
- v->ImmediateFlipRequirement[0],
- v->ProgressiveToInterlaceUnitInOPP,
- v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
- v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
- v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
- v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly,
- v->VTotal,
- v->VActive,
- v->DynamicMetadataTransmittedBytes,
- v->DynamicMetadataLinesBeforeActiveRequired,
- v->Interlace,
- v->RequiredDPPCLK,
- v->RequiredDISPCLK,
- v->UrgLatency,
- v->NoOfDPP,
- v->ProjectedDCFCLKDeepSleep,
- v->MaximumVStartup,
- v->TotalVActivePixelBandwidth,
- v->TotalVActiveCursorBandwidth,
- v->TotalMetaRowBandwidth,
- v->TotalDPTERowBandwidth,
- v->TotalNumberOfActiveDPP,
- v->TotalNumberOfDCCActiveDPP,
- v->dpte_group_bytes,
- v->PrefetchLinesY,
- v->PrefetchLinesC,
- v->swath_width_luma_ub_all_states,
- v->swath_width_chroma_ub_all_states,
- v->BytePerPixelY,
- v->BytePerPixelC,
- v->HTotal,
- v->PixelClock,
- v->PDEAndMetaPTEBytesPerFrame,
- v->DPTEBytesPerRow,
- v->MetaRowBytes,
- v->DynamicMetadataEnable,
- v->VActivePixelBandwidth,
- v->VActiveCursorBandwidth,
- v->ReadBandwidthLuma,
- v->ReadBandwidthChroma,
- v->DCFCLKPerState,
- v->DCFCLKState);
+ UseMinimumDCFCLK(mode_lib, v, MaxPrefetchMode, ReorderingBytes);
if (v->ClampMinDCFCLK) {
/* Clamp calculated values to actual minimum */
- for (i = 0; i < mode_lib->soc.num_states; ++i) {
+ for (i = start_state; i < mode_lib->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
if (v->DCFCLKState[i][j] < mode_lib->soc.min_dcfclk) {
v->DCFCLKState[i][j] = mode_lib->soc.min_dcfclk;
@@ -4806,7 +4644,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
}
}
- for (i = 0; i < mode_lib->soc.num_states; ++i) {
+ for (i = start_state; i < mode_lib->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
v->IdealSDPPortBandwidthPerState[i][j] = dml_min3(
v->ReturnBusWidth * v->DCFCLKState[i][j],
@@ -4824,7 +4662,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
//Re-ordering Buffer Support Check
- for (i = 0; i < mode_lib->soc.num_states; ++i) {
+ for (i = start_state; i < mode_lib->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
> (v->RoundTripPingLatencyCycles + 32) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
@@ -4842,7 +4680,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
}
- for (i = 0; i < mode_lib->soc.num_states; ++i) {
+ for (i = start_state; i < mode_lib->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
v->IdealSDPPortBandwidthPerState[i][j] * v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
@@ -4858,7 +4696,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
//Prefetch Check
- for (i = 0; i < mode_lib->soc.num_states; ++i) {
+ for (i = start_state; i < mode_lib->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
int NextPrefetchModeState = MinPrefetchMode;
@@ -5005,7 +4843,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
v->SwathHeightYThisState[k],
v->SwathHeightCThisState[k],
v->HTotal[k] / v->PixelClock[k],
- v->UrgentLatency,
+ v->UrgLatency[i],
v->CursorBufferSize,
v->CursorWidth[k][0],
v->CursorBPP[k][0],
@@ -5017,7 +4855,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
v->DETBufferSizeCThisState[k],
&v->UrgentBurstFactorCursorPre[k],
&v->UrgentBurstFactorLumaPre[k],
- &v->UrgentBurstFactorChroma[k],
+ &v->UrgentBurstFactorChromaPre[k],
&v->NoUrgentLatencyHidingPre[k]);
}
@@ -5092,8 +4930,8 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
}
v->TotImmediateFlipBytes = 0.0;
for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
- v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k]
- + v->MetaRowBytes[i][j][k] + v->DPTEBytesPerRow[i][j][k];
+ v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + v->NoOfDPP[i][j][k] * (v->PDEAndMetaPTEBytesPerFrame[i][j][k]
+ + v->MetaRowBytes[i][j][k] + v->DPTEBytesPerRow[i][j][k]);
}
for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
@@ -5230,7 +5068,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
/*PTE Buffer Size Check*/
- for (i = 0; i < v->soc.num_states; i++) {
+ for (i = start_state; i < v->soc.num_states; i++) {
for (j = 0; j < 2; j++) {
v->PTEBufferSizeNotExceeded[i][j] = true;
for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
@@ -5291,7 +5129,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
}
/*Mode Support, Voltage State and SOC Configuration*/
- for (i = v->soc.num_states - 1; i >= 0; i--) {
+ for (i = v->soc.num_states - 1; i >= start_state; i--) {
for (j = 0; j < 2; j++) {
if (v->ScaleRatioAndTapsSupport == 1 && v->SourceFormatPixelAndScanSupport == 1 && v->ViewportSizeSupport[i][j] == 1
&& v->DIOSupport[i] == 1 && v->ODMCombine4To1SupportCheckOK[i] == 1
@@ -5313,7 +5151,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
}
{
unsigned int MaximumMPCCombine = 0;
- for (i = v->soc.num_states; i >= 0; i--) {
+ for (i = v->soc.num_states; i >= start_state; i--) {
if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
v->VoltageLevel = i;
v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
@@ -6476,10 +6314,6 @@ static void CalculateSwathWidth(
for (k = 0; k < NumberOfActivePlanes; ++k) {
enum odm_combine_mode MainPlaneODMCombine = 0;
- surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
- surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
- surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
- surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
if (SourceScan[k] != dm_vert) {
SwathWidthSingleDPPY[k] = ViewportWidth[k];
@@ -6519,8 +6353,6 @@ static void CalculateSwathWidth(
surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
- surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
- surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
if (SourceScan[k] != dm_vert) {
MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
@@ -6528,6 +6360,7 @@ static void CalculateSwathWidth(
swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
if (BytePerPixC[k] > 0) {
+ surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
} else {
@@ -6539,6 +6372,7 @@ static void CalculateSwathWidth(
swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
if (BytePerPixC[k] > 0) {
+ surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
} else {
@@ -6651,77 +6485,21 @@ static double CalculateUrgentLatency(
return ret;
}
-
-static void UseMinimumDCFCLK(
+static noinline_for_stack void UseMinimumDCFCLK(
struct display_mode_lib *mode_lib,
- int MaxInterDCNTileRepeaters,
+ struct vba_vars_st *v,
int MaxPrefetchMode,
- double FinalDRAMClockChangeLatency,
- double SREnterPlusExitTime,
- int ReturnBusWidth,
- int RoundTripPingLatencyCycles,
- int ReorderingBytes,
- int PixelChunkSizeInKByte,
- int MetaChunkSize,
- bool GPUVMEnable,
- int GPUVMMaxPageTableLevels,
- bool HostVMEnable,
- int NumberOfActivePlanes,
- double HostVMMinPageSize,
- int HostVMMaxNonCachedPageTableLevels,
- bool DynamicMetadataVMEnabled,
- enum immediate_flip_requirement ImmediateFlipRequirement,
- bool ProgressiveToInterlaceUnitInOPP,
- double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
- double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
- double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
- double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly,
- int VTotal[],
- int VActive[],
- int DynamicMetadataTransmittedBytes[],
- int DynamicMetadataLinesBeforeActiveRequired[],
- bool Interlace[],
- double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
- double RequiredDISPCLK[][2],
- double UrgLatency[],
- unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
- double ProjectedDCFCLKDeepSleep[][2],
- double MaximumVStartup[][2][DC__NUM_DPP__MAX],
- double TotalVActivePixelBandwidth[][2],
- double TotalVActiveCursorBandwidth[][2],
- double TotalMetaRowBandwidth[][2],
- double TotalDPTERowBandwidth[][2],
- unsigned int TotalNumberOfActiveDPP[][2],
- unsigned int TotalNumberOfDCCActiveDPP[][2],
- int dpte_group_bytes[],
- double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
- double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
- int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
- int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
- int BytePerPixelY[],
- int BytePerPixelC[],
- int HTotal[],
- double PixelClock[],
- double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
- double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
- double MetaRowBytes[][2][DC__NUM_DPP__MAX],
- bool DynamicMetadataEnable[],
- double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
- double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
- double ReadBandwidthLuma[],
- double ReadBandwidthChroma[],
- double DCFCLKPerState[],
- double DCFCLKState[][2])
+ int ReorderingBytes)
{
double NormalEfficiency = 0;
double PTEEfficiency = 0;
double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2] = { { 0 } };
unsigned int i, j, k;
- NormalEfficiency = (HostVMEnable == true ? PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
- : PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly) / 100.0;
- PTEEfficiency = (HostVMEnable == true ? PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly
- / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData : 1.0);
+ NormalEfficiency = (v->HostVMEnable == true ? v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
+ : v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly) / 100.0;
+ PTEEfficiency = (v->HostVMEnable == true ? v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly
+ / v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData : 1.0);
for (i = 0; i < mode_lib->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX] = { 0 };
@@ -6739,58 +6517,58 @@ static void UseMinimumDCFCLK(
double MinimumTvmPlus2Tr0 = 0;
TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
- for (k = 0; k < NumberOfActivePlanes; ++k) {
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
- + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] / (15.75 * HTotal[k] / PixelClock[k]);
+ + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
}
- for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
- NoOfDPPState[k] = NoOfDPP[i][j][k];
+ for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
+ NoOfDPPState[k] = v->NoOfDPP[i][j][k];
}
- MinimumTWait = CalculateTWait(MaxPrefetchMode, FinalDRAMClockChangeLatency, UrgLatency[i], SREnterPlusExitTime);
- NonDPTEBandwidth = TotalVActivePixelBandwidth[i][j] + TotalVActiveCursorBandwidth[i][j] + TotalMetaRowBandwidth[i][j];
- DPTEBandwidth = (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) ?
- TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : TotalDPTERowBandwidth[i][j];
- DCFCLKRequiredForAverageBandwidth = dml_max3(ProjectedDCFCLKDeepSleep[i][j],
- (NonDPTEBandwidth + TotalDPTERowBandwidth[i][j]) / ReturnBusWidth / (MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
- (NonDPTEBandwidth + DPTEBandwidth / PTEEfficiency) / NormalEfficiency / ReturnBusWidth);
-
- ExtraLatencyBytes = CalculateExtraLatencyBytes(ReorderingBytes, TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte, TotalNumberOfDCCActiveDPP[i][j],
- MetaChunkSize, GPUVMEnable, HostVMEnable, NumberOfActivePlanes, NoOfDPPState, dpte_group_bytes,
- PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
- HostVMMinPageSize, HostVMMaxNonCachedPageTableLevels);
- ExtraLatencyCycles = RoundTripPingLatencyCycles + 32 + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
- for (k = 0; k < NumberOfActivePlanes; ++k) {
+ MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
+ NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
+ DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
+ TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
+ DCFCLKRequiredForAverageBandwidth = dml_max3(v->ProjectedDCFCLKDeepSleep[i][j],
+ (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth / (v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
+ (NonDPTEBandwidth + DPTEBandwidth / PTEEfficiency) / NormalEfficiency / v->ReturnBusWidth);
+
+ ExtraLatencyBytes = CalculateExtraLatencyBytes(ReorderingBytes, v->TotalNumberOfActiveDPP[i][j], v->PixelChunkSizeInKByte, v->TotalNumberOfDCCActiveDPP[i][j],
+ v->MetaChunkSize, v->GPUVMEnable, v->HostVMEnable, v->NumberOfActivePlanes, NoOfDPPState, v->dpte_group_bytes,
+ v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
+ v->HostVMMinPageSize, v->HostVMMaxNonCachedPageTableLevels);
+ ExtraLatencyCycles = v->RoundTripPingLatencyCycles + 32 + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
double DCFCLKCyclesRequiredInPrefetch = { 0 };
double ExpectedPrefetchBWAcceleration = { 0 };
double PrefetchTime = { 0 };
- PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
- + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] * BytePerPixelC[k]) / NormalEfficiency / ReturnBusWidth;
- DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] + PDEAndMetaPTEBytesPerFrame[i][j][k] / PTEEfficiency
- / NormalEfficiency / ReturnBusWidth * (GPUVMMaxPageTableLevels > 2 ? 1 : 0) + 2 * DPTEBytesPerRow[i][j][k] / PTEEfficiency
- / NormalEfficiency / ReturnBusWidth + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
- PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) * HTotal[k] / PixelClock[k];
- ExpectedPrefetchBWAcceleration = (VActivePixelBandwidth[i][j][k] + VActiveCursorBandwidth[i][j][k]) / (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
- DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true && DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
- UrgLatency[i] * GPUVMMaxPageTableLevels * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
- PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - MinimumTWait - UrgLatency[i] * ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels
- : GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - DynamicMetadataVMExtraLatency[k];
+ PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
+ + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
+ DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / PTEEfficiency
+ / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0) + 2 * v->DPTEBytesPerRow[i][j][k] / PTEEfficiency
+ / NormalEfficiency / v->ReturnBusWidth + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
+ PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
+ ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k]) / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
+ DynamicMetadataVMExtraLatency[k] = (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
+ v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
+ PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - v->UrgLatency[i] * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels
+ : v->GPUVMMaxPageTableLevels - 2) * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - DynamicMetadataVMExtraLatency[k];
if (PrefetchTime > 0) {
double ExpectedVRatioPrefetch = { 0 };
ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
* dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
- if (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) {
+ if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
- + NoOfDPPState[k] * DPTEBandwidth / PTEEfficiency / NormalEfficiency / ReturnBusWidth;
+ + NoOfDPPState[k] * DPTEBandwidth / PTEEfficiency / NormalEfficiency / v->ReturnBusWidth;
}
} else {
- DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
+ DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
}
- if (DynamicMetadataEnable[k] == true) {
+ if (v->DynamicMetadataEnable[k] == true) {
double TsetupPipe = { 0 };
double TdmbfPipe = { 0 };
double TdmsksPipe = { 0 };
@@ -6798,52 +6576,51 @@ static void UseMinimumDCFCLK(
double AllowedTimeForUrgentExtraLatency = { 0 };
CalculateDynamicMetadataParameters(
- MaxInterDCNTileRepeaters,
- RequiredDPPCLK[i][j][k],
- RequiredDISPCLK[i][j],
- ProjectedDCFCLKDeepSleep[i][j],
- PixelClock[k],
- HTotal[k],
- VTotal[k] - VActive[k],
- DynamicMetadataTransmittedBytes[k],
- DynamicMetadataLinesBeforeActiveRequired[k],
- Interlace[k],
- ProgressiveToInterlaceUnitInOPP,
+ v->MaxInterDCNTileRepeaters,
+ v->RequiredDPPCLK[i][j][k],
+ v->RequiredDISPCLK[i][j],
+ v->ProjectedDCFCLKDeepSleep[i][j],
+ v->PixelClock[k],
+ v->HTotal[k],
+ v->VTotal[k] - v->VActive[k],
+ v->DynamicMetadataTransmittedBytes[k],
+ v->DynamicMetadataLinesBeforeActiveRequired[k],
+ v->Interlace[k],
+ v->ProgressiveToInterlaceUnitInOPP,
&TsetupPipe,
&TdmbfPipe,
&TdmecPipe,
&TdmsksPipe);
- AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / PixelClock[k] - MinimumTWait - TsetupPipe
+ AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TsetupPipe
- TdmbfPipe - TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
if (AllowedTimeForUrgentExtraLatency > 0) {
DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(DCFCLKRequiredForPeakBandwidthPerPlane[k],
ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
} else {
- DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
+ DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
}
}
}
DCFCLKRequiredForPeakBandwidth = 0;
- for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
+ for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
}
- MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ? (HostVMEnable == true ?
- (GPUVMMaxPageTableLevels + 2) * (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
- for (k = 0; k < NumberOfActivePlanes; ++k) {
+ MinimumTvmPlus2Tr0 = v->UrgLatency[i] * (v->GPUVMEnable == true ? (v->HostVMEnable == true ?
+ (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) : 0);
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
double MaximumTvmPlus2Tr0PlusTsw = { 0 };
- MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
+ MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
- DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
+ DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
} else {
DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth, 2 * ExtraLatencyCycles
/ (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
(2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
}
}
- DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * (1 + mode_lib->vba.PercentMarginOverMinimumRequiredDCFCLK / 100)
+ v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * (1 + mode_lib->vba.PercentMarginOverMinimumRequiredDCFCLK / 100)
* dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
}
}
}
-#endif /* CONFIG_DRM_AMD_DC_DCN */
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.h b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.h
index 4e249eaabfdb..daaf0883b84d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.h
@@ -39,5 +39,16 @@ double dml30_CalculateWriteBackDISPCLK(
long WritebackDestinationWidth,
unsigned int HTotal,
unsigned int WritebackLineBufferSize);
+void dml30_CalculateBytePerPixelAnd256BBlockSizes(
+ enum source_format_class SourcePixelFormat,
+ enum dm_swizzle_mode SurfaceTiling,
+ unsigned int *BytePerPixelY,
+ unsigned int *BytePerPixelC,
+ double *BytePerPixelDETY,
+ double *BytePerPixelDETC,
+ unsigned int *BlockHeight256BytesY,
+ unsigned int *BlockHeight256BytesC,
+ unsigned int *BlockWidth256BytesY,
+ unsigned int *BlockWidth256BytesC);
#endif /* __DML30_DISPLAY_MODE_VBA_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c
index aef854270054..4fb37df54d59 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c
@@ -23,12 +23,11 @@
*
*/
-#ifdef CONFIG_DRM_AMD_DC_DCN
-
#include "../display_mode_lib.h"
#include "../display_mode_vba.h"
#include "../dml_inline_defs.h"
#include "display_rq_dlg_calc_30.h"
+#include "display_mode_vba_30.h"
static bool is_dual_plane(enum source_format_class source_format)
{
@@ -275,96 +274,6 @@ static void handle_det_buf_split(struct display_mode_lib *mode_lib,
full_swath_bytes_packed_c);
}
-static bool CalculateBytePerPixelAnd256BBlockSizes(
- enum source_format_class SourcePixelFormat,
- enum dm_swizzle_mode SurfaceTiling,
- unsigned int *BytePerPixelY,
- unsigned int *BytePerPixelC,
- double *BytePerPixelDETY,
- double *BytePerPixelDETC,
- unsigned int *BlockHeight256BytesY,
- unsigned int *BlockHeight256BytesC,
- unsigned int *BlockWidth256BytesY,
- unsigned int *BlockWidth256BytesC)
-{
- if (SourcePixelFormat == dm_444_64) {
- *BytePerPixelDETY = 8;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 8;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
- *BytePerPixelDETY = 4;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 4;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_444_16) {
- *BytePerPixelDETY = 2;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 2;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_444_8) {
- *BytePerPixelDETY = 1;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 1;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_rgbe_alpha) {
- *BytePerPixelDETY = 4;
- *BytePerPixelDETC = 1;
- *BytePerPixelY = 4;
- *BytePerPixelC = 1;
- } else if (SourcePixelFormat == dm_420_8) {
- *BytePerPixelDETY = 1;
- *BytePerPixelDETC = 2;
- *BytePerPixelY = 1;
- *BytePerPixelC = 2;
- } else if (SourcePixelFormat == dm_420_12) {
- *BytePerPixelDETY = 2;
- *BytePerPixelDETC = 4;
- *BytePerPixelY = 2;
- *BytePerPixelC = 4;
- } else {
- *BytePerPixelDETY = 4.0 / 3;
- *BytePerPixelDETC = 8.0 / 3;
- *BytePerPixelY = 2;
- *BytePerPixelC = 4;
- }
-
- if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
- || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8
- || SourcePixelFormat == dm_mono_16 || SourcePixelFormat == dm_mono_8
- || SourcePixelFormat == dm_rgbe)) {
- if (SurfaceTiling == dm_sw_linear) {
- *BlockHeight256BytesY = 1;
- } else if (SourcePixelFormat == dm_444_64) {
- *BlockHeight256BytesY = 4;
- } else if (SourcePixelFormat == dm_444_8) {
- *BlockHeight256BytesY = 16;
- } else {
- *BlockHeight256BytesY = 8;
- }
- *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
- *BlockHeight256BytesC = 0;
- *BlockWidth256BytesC = 0;
- } else {
- if (SurfaceTiling == dm_sw_linear) {
- *BlockHeight256BytesY = 1;
- *BlockHeight256BytesC = 1;
- } else if (SourcePixelFormat == dm_rgbe_alpha) {
- *BlockHeight256BytesY = 8;
- *BlockHeight256BytesC = 16;
- } else if (SourcePixelFormat == dm_420_8) {
- *BlockHeight256BytesY = 16;
- *BlockHeight256BytesC = 8;
- } else {
- *BlockHeight256BytesY = 8;
- *BlockHeight256BytesC = 8;
- }
- *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
- *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
- }
- return true;
-}
-
static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib,
display_data_rq_dlg_params_st *rq_dlg_param,
display_data_rq_misc_params_st *rq_misc_param,
@@ -450,7 +359,7 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib,
double byte_per_pixel_det_y = 0;
double byte_per_pixel_det_c = 0;
- CalculateBytePerPixelAnd256BBlockSizes((enum source_format_class)(source_format),
+ dml30_CalculateBytePerPixelAnd256BBlockSizes((enum source_format_class)(source_format),
(enum dm_swizzle_mode)(tiling),
&bytes_per_element_y,
&bytes_per_element_c,
@@ -483,8 +392,6 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib,
blk_bytes = surf_linear ?
256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size);
log2_blk_bytes = dml_log2((double)blk_bytes);
- log2_blk_height = 0;
- log2_blk_width = 0;
// remember log rule
// "+" in log is multiply
@@ -555,8 +462,6 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib,
- log2_meta_req_height;
meta_req_width = 1 << log2_meta_req_width;
meta_req_height = 1 << log2_meta_req_height;
- log2_meta_row_height = 0;
- meta_row_width_ub = 0;
// the dimensions of a meta row are meta_row_width x meta_row_height in elements.
// calculate upper bound of the meta_row_width
@@ -715,7 +620,7 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib,
if (hostvm_enable)
rq_sizing_param->dpte_group_bytes = 512;
else {
- if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group
+ if (!surf_linear && (log2_dpte_req_height_ptes == 0) && surf_vert) //reduced, in this case, will have page fault within a group
rq_sizing_param->dpte_group_bytes = 512;
else
rq_sizing_param->dpte_group_bytes = 2048;
@@ -751,13 +656,12 @@ static void get_surf_rq_param(struct display_mode_lib *mode_lib,
bool is_chroma,
bool is_alpha)
{
- bool mode_422 = 0;
unsigned int vp_width = 0;
unsigned int vp_height = 0;
unsigned int data_pitch = 0;
unsigned int meta_pitch = 0;
unsigned int surface_height = 0;
- unsigned int ppe = mode_422 ? 2 : 1;
+ unsigned int ppe = 1;
// FIXME check if ppe apply for both luma and chroma in 422 case
if (is_chroma | is_alpha) {
@@ -1025,7 +929,6 @@ static void dml_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
double min_dst_y_ttu_vblank = 0;
unsigned int dlg_vblank_start = 0;
bool dual_plane = false;
- bool mode_422 = false;
unsigned int access_dir = 0;
unsigned int vp_height_l = 0;
unsigned int vp_width_l = 0;
@@ -1071,7 +974,7 @@ static void dml_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
unsigned int vstartup_start = 0;
unsigned int dst_x_after_scaler = 0;
- unsigned int dst_y_after_scaler = 0;
+ int dst_y_after_scaler = 0;
double line_wait = 0;
double dst_y_prefetch = 0;
double dst_y_per_vm_vblank = 0;
@@ -1174,7 +1077,6 @@ static void dml_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
// Source
// dcc_en = src.dcc;
dual_plane = is_dual_plane((enum source_format_class)(src->source_format));
- mode_422 = false; // TODO
access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
vp_height_l = src->viewport_height;
vp_width_l = src->viewport_width;
@@ -1262,6 +1164,8 @@ static void dml_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
dst_x_after_scaler = get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
dst_y_after_scaler = get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+ if (dst_y_after_scaler < 0)
+ dst_y_after_scaler = 0;
// do some adjustment on the dst_after scaler to account for odm combine mode
dml_print("DML_DLG: %s: input dst_x_after_scaler = %d\n",
@@ -1390,18 +1294,8 @@ static void dml_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
dpte_row_height_l = rq_dlg_param.rq_l.dpte_row_height;
dpte_row_height_c = rq_dlg_param.rq_c.dpte_row_height;
- if (mode_422) {
- swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element
- swath_width_pixels_ub_c = swath_width_ub_c * 2;
- } else {
- swath_width_pixels_ub_l = swath_width_ub_l * 1;
- swath_width_pixels_ub_c = swath_width_ub_c * 1;
- }
-
- hscale_pixel_rate_l = 0.;
- hscale_pixel_rate_c = 0.;
- min_hratio_fact_l = 1.0;
- min_hratio_fact_c = 1.0;
+ swath_width_pixels_ub_l = swath_width_ub_l;
+ swath_width_pixels_ub_c = swath_width_ub_c;
if (hratio_l <= 1)
min_hratio_fact_l = 2.0;
@@ -1668,6 +1562,7 @@ static void dml_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank);
dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip);
dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip);
+
disp_dlg_regs->refcyc_per_pte_group_vblank_l =
(unsigned int)(dst_y_per_row_vblank * (double)htotal
* ref_freq_to_pix_freq / (double)dpte_groups_per_row_ub_l);
@@ -1858,8 +1753,6 @@ void dml30_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib,
e2e_pipe_param,
num_pipes);
- dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency
- / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated
print__dlg_sys_params_st(mode_lib, &dlg_sys_param);
@@ -1883,4 +1776,3 @@ void dml30_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
dml_print("DML_DLG: Calculation for pipe[%d] end\n", pipe_idx);
}
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
index 94c32832a0e7..1aaa77265eed 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
@@ -26,6 +26,7 @@
#include "clk_mgr.h"
#include "dcn20/dcn20_resource.h"
#include "dcn301/dcn301_resource.h"
+#include "clk_mgr/dcn301/vg_clk_mgr.h"
#include "dml/dcn20/dcn20_fpu.h"
#include "dcn301_fpu.h"
@@ -214,6 +215,80 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_01_soc = {
.urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
};
+struct wm_table ddr4_wm_table = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 6.09,
+ .sr_enter_plus_exit_time_us = 7.14,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 10.12,
+ .sr_enter_plus_exit_time_us = 11.48,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 10.12,
+ .sr_enter_plus_exit_time_us = 11.48,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 10.12,
+ .sr_enter_plus_exit_time_us = 11.48,
+ .valid = true,
+ },
+ }
+};
+
+struct wm_table lpddr5_wm_table = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 13.5,
+ .sr_enter_plus_exit_time_us = 16.5,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 13.5,
+ .sr_enter_plus_exit_time_us = 16.5,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 13.5,
+ .sr_enter_plus_exit_time_us = 16.5,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 13.5,
+ .sr_enter_plus_exit_time_us = 16.5,
+ .valid = true,
+ },
+ }
+};
+
static void calculate_wm_set_for_vlevel(int vlevel,
struct wm_range_table_entry *table_entry,
struct dcn_watermarks *wm_set,
@@ -245,61 +320,82 @@ static void calculate_wm_set_for_vlevel(int vlevel,
}
-void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
+void dcn301_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
{
+ struct _vcs_dpi_voltage_scaling_st *s = dc->scratch.update_bw_bounding_box.clock_limits;
struct dcn301_resource_pool *pool = TO_DCN301_RES_POOL(dc->res_pool);
struct clk_limit_table *clk_table = &bw_params->clk_table;
- struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES];
unsigned int i, closest_clk_lvl;
- int j;
+ int j = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0;
dc_assert_fp_enabled();
+ memcpy(s, dcn3_01_soc.clock_limits, sizeof(dcn3_01_soc.clock_limits));
+
/* Default clock levels are used for diags, which may lead to overclocking. */
- if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
- dcn3_01_ip.max_num_otg = pool->base.res_cap->num_timing_generator;
- dcn3_01_ip.max_num_dpp = pool->base.pipe_count;
- dcn3_01_soc.num_chans = bw_params->num_channels;
-
- ASSERT(clk_table->num_entries);
- for (i = 0; i < clk_table->num_entries; i++) {
- /* loop backwards*/
- for (closest_clk_lvl = 0, j = dcn3_01_soc.num_states - 1; j >= 0; j--) {
- if ((unsigned int) dcn3_01_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) {
- closest_clk_lvl = j;
- break;
- }
- }
+ dcn3_01_ip.max_num_otg = pool->base.res_cap->num_timing_generator;
+ dcn3_01_ip.max_num_dpp = pool->base.pipe_count;
+ dcn3_01_soc.num_chans = bw_params->num_channels;
+
+ ASSERT(clk_table->num_entries);
+
+ /* Prepass to find max clocks independent of voltage level. */
+ for (i = 0; i < clk_table->num_entries; ++i) {
+ if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
+ if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
+ }
- clock_limits[i].state = i;
- clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
- clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
- clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
- clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2;
-
- clock_limits[i].dispclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
- clock_limits[i].dppclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
- clock_limits[i].dram_bw_per_chan_gbps = dcn3_01_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
- clock_limits[i].dscclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
- clock_limits[i].dtbclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
- clock_limits[i].phyclk_d18_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
- clock_limits[i].phyclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+ for (i = 0; i < clk_table->num_entries; i++) {
+ /* loop backwards*/
+ for (closest_clk_lvl = 0, j = dcn3_01_soc.num_states - 1; j >= 0; j--) {
+ if ((unsigned int) dcn3_01_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) {
+ closest_clk_lvl = j;
+ break;
+ }
}
- for (i = 0; i < clk_table->num_entries; i++)
- dcn3_01_soc.clock_limits[i] = clock_limits[i];
+ s[i].state = i;
+ s[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+ s[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
+ s[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
+ s[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2;
+
+ /* Clocks independent of voltage level. */
+ s[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
+ dcn3_01_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+
+ s[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
+ dcn3_01_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+
+ s[i].dram_bw_per_chan_gbps =
+ dcn3_01_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+ s[i].dscclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+ s[i].dtbclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+ s[i].phyclk_d18_mhz =
+ dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+ s[i].phyclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+ }
- if (clk_table->num_entries) {
- dcn3_01_soc.num_states = clk_table->num_entries;
- /* duplicate last level */
- dcn3_01_soc.clock_limits[dcn3_01_soc.num_states] = dcn3_01_soc.clock_limits[dcn3_01_soc.num_states - 1];
- dcn3_01_soc.clock_limits[dcn3_01_soc.num_states].state = dcn3_01_soc.num_states;
- }
+ if (clk_table->num_entries) {
+ dcn3_01_soc.num_states = clk_table->num_entries;
+ /* duplicate last level */
+ s[dcn3_01_soc.num_states] =
+ dcn3_01_soc.clock_limits[dcn3_01_soc.num_states - 1];
+ s[dcn3_01_soc.num_states].state = dcn3_01_soc.num_states;
}
+ memcpy(dcn3_01_soc.clock_limits, s, sizeof(dcn3_01_soc.clock_limits));
+
dcn3_01_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+ if ((int)(dcn3_01_soc.dram_clock_change_latency_us * 1000)
+ != dc->debug.dram_clock_change_latency_ns
+ && dc->debug.dram_clock_change_latency_ns) {
+ dcn3_01_soc.dram_clock_change_latency_us = dc->debug.dram_clock_change_latency_ns / 1000.0;
+ }
dml_init_instance(&dc->dml, &dcn3_01_soc, &dcn3_01_ip, DML_PROJECT_DCN30);
}
@@ -327,7 +423,7 @@ void dcn301_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info)
dcn3_01_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10;
}
-void dcn301_calculate_wm_and_dlg(struct dc *dc,
+void dcn301_fpu_calculate_wm_and_dlg(struct dc *dc,
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
int pipe_cnt,
@@ -353,12 +449,12 @@ void dcn301_calculate_wm_and_dlg(struct dc *dc,
&context->bw_ctx.dml, pipes, pipe_cnt);
/* WM Set C */
table_entry = &bw_params->wm_table.entries[WM_C];
- vlevel = min(max(vlevel_req, 2), vlevel_max);
+ vlevel = clamp(vlevel_req, 2, vlevel_max);
calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.c,
&context->bw_ctx.dml, pipes, pipe_cnt);
/* WM Set B */
table_entry = &bw_params->wm_table.entries[WM_B];
- vlevel = min(max(vlevel_req, 1), vlevel_max);
+ vlevel = clamp(vlevel_req, 1, vlevel_max);
calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.b,
&context->bw_ctx.dml, pipes, pipe_cnt);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h
index fc7065d17842..3e103e23dc6f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h
@@ -26,15 +26,14 @@
#ifndef __DCN301_FPU_H__
#define __DCN301_FPU_H__
-void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
+void dcn301_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info);
+void dcn301_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
void dcn301_fpu_set_wm_ranges(int i,
struct pp_smu_wm_range_sets *ranges,
struct _vcs_dpi_soc_bounding_box_st *loaded_bb);
-void dcn301_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info);
-
-void dcn301_calculate_wm_and_dlg(struct dc *dc,
+void dcn301_fpu_calculate_wm_and_dlg(struct dc *dc,
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
int pipe_cnt,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.c
new file mode 100644
index 000000000000..8d7c59ec701d
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.c
@@ -0,0 +1,367 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "resource.h"
+#include "clk_mgr.h"
+#include "dcn20/dcn20_resource.h"
+#include "dcn302/dcn302_resource.h"
+
+#include "dml/dcn20/dcn20_fpu.h"
+#include "dcn302_fpu.h"
+
+struct _vcs_dpi_ip_params_st dcn3_02_ip = {
+ .use_min_dcfclk = 0,
+ .clamp_min_dcfclk = 0,
+ .odm_capable = 1,
+ .gpuvm_enable = 1,
+ .hostvm_enable = 0,
+ .gpuvm_max_page_table_levels = 4,
+ .hostvm_max_page_table_levels = 4,
+ .hostvm_cached_page_table_levels = 0,
+ .pte_group_size_bytes = 2048,
+ .num_dsc = 5,
+ .rob_buffer_size_kbytes = 184,
+ .det_buffer_size_kbytes = 184,
+ .dpte_buffer_size_in_pte_reqs_luma = 64,
+ .dpte_buffer_size_in_pte_reqs_chroma = 34,
+ .pde_proc_buffer_size_64k_reqs = 48,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .pte_enable = 1,
+ .max_page_table_levels = 2,
+ .pte_chunk_size_kbytes = 2, // ?
+ .meta_chunk_size_kbytes = 2,
+ .writeback_chunk_size_kbytes = 8,
+ .line_buffer_size_bits = 789504,
+ .is_line_buffer_bpp_fixed = 0, // ?
+ .line_buffer_fixed_bpp = 0, // ?
+ .dcc_supported = true,
+ .writeback_interface_buffer_size_kbytes = 90,
+ .writeback_line_buffer_buffer_size = 0,
+ .max_line_buffer_lines = 12,
+ .writeback_luma_buffer_size_kbytes = 12, // writeback_line_buffer_buffer_size = 656640
+ .writeback_chroma_buffer_size_kbytes = 8,
+ .writeback_chroma_line_buffer_width_pixels = 4,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .writeback_line_buffer_luma_buffer_size = 0,
+ .writeback_line_buffer_chroma_buffer_size = 14643,
+ .cursor_buffer_size = 8,
+ .cursor_chunk_size = 2,
+ .max_num_otg = 5,
+ .max_num_dpp = 5,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .hscl_mults = 4,
+ .vscl_mults = 4,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dispclk_ramp_margin_percent = 1,
+ .underscan_factor = 1.11,
+ .min_vblank_lines = 32,
+ .dppclk_delay_subtotal = 46,
+ .dynamic_metadata_vm_enabled = true,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_cnvc_formatter = 27,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 119,
+ .dcfclk_cstate_latency = 5.2, // SRExitTime
+ .max_inter_dcn_tile_repeaters = 8,
+ .max_num_hdmi_frl_outputs = 1,
+ .odm_combine_4to1_supported = true,
+
+ .xfc_supported = false,
+ .xfc_fill_bw_overhead_percent = 10.0,
+ .xfc_fill_constant_bytes = 0,
+ .gfx7_compat_tiling_supported = 0,
+ .number_of_cursors = 1,
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn3_02_soc = {
+ .clock_limits = {
+ {
+ .state = 0,
+ .dispclk_mhz = 562.0,
+ .dppclk_mhz = 300.0,
+ .phyclk_mhz = 300.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 405.6,
+ },
+ },
+
+ .min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */
+ .num_states = 1,
+ .sr_exit_time_us = 26.5,
+ .sr_enter_plus_exit_time_us = 31,
+ .urgent_latency_us = 4.0,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
+ .max_avg_sdp_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_percent = 40.0,
+ .writeback_latency_us = 12.0,
+ .max_request_size_bytes = 256,
+ .fabric_datapath_to_dcn_data_return_bytes = 64,
+ .dcn_downspread_percent = 0.5,
+ .downspread_percent = 0.38,
+ .dram_page_open_time_ns = 50.0,
+ .dram_rw_turnaround_time_ns = 17.5,
+ .dram_return_buffer_per_channel_bytes = 8192,
+ .round_trip_ping_latency_dcfclk_cycles = 156,
+ .urgent_out_of_order_return_per_channel_bytes = 4096,
+ .channel_interleave_bytes = 256,
+ .num_banks = 8,
+ .gpuvm_min_page_size_bytes = 4096,
+ .hostvm_min_page_size_bytes = 4096,
+ .dram_clock_change_latency_us = 404,
+ .dummy_pstate_latency_us = 5,
+ .writeback_dram_clock_change_latency_us = 23.0,
+ .return_bus_width_bytes = 64,
+ .dispclk_dppclk_vco_speed_mhz = 3650,
+ .xfc_bus_transport_time_us = 20, // ?
+ .xfc_xbuf_latency_tolerance_us = 4, // ?
+ .use_urgent_burst_bw = 1, // ?
+ .do_urgent_latency_adjustment = true,
+ .urgent_latency_adjustment_fabric_clock_component_us = 1.0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000,
+};
+
+static void dcn302_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
+ unsigned int *optimal_dcfclk,
+ unsigned int *optimal_fclk)
+{
+
+ double bw_from_dram, bw_from_dram1, bw_from_dram2;
+
+ bw_from_dram1 = uclk_mts * dcn3_02_soc.num_chans *
+ dcn3_02_soc.dram_channel_width_bytes *
+ (dcn3_02_soc.max_avg_dram_bw_use_normal_percent / 100);
+ bw_from_dram2 = uclk_mts * dcn3_02_soc.num_chans *
+ dcn3_02_soc.dram_channel_width_bytes *
+ (dcn3_02_soc.max_avg_sdp_bw_use_normal_percent / 100);
+
+ bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2;
+
+ if (optimal_fclk)
+ *optimal_fclk = bw_from_dram /
+ (dcn3_02_soc.fabric_datapath_to_dcn_data_return_bytes *
+ (dcn3_02_soc.max_avg_sdp_bw_use_normal_percent / 100));
+
+ if (optimal_dcfclk)
+ *optimal_dcfclk = bw_from_dram /
+ (dcn3_02_soc.return_bus_width_bytes *
+ (dcn3_02_soc.max_avg_sdp_bw_use_normal_percent / 100));
+}
+
+void dcn302_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
+{
+ unsigned int i, j;
+ unsigned int num_states = 0;
+
+ unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0};
+ unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0};
+ unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0};
+ unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0};
+
+ unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {694, 875, 1000, 1200};
+ unsigned int num_dcfclk_sta_targets = 4;
+ unsigned int num_uclk_states;
+
+ dc_assert_fp_enabled();
+
+ if (dc->ctx->dc_bios->vram_info.num_chans)
+ dcn3_02_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans;
+
+ if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes)
+ dcn3_02_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes;
+
+ dcn3_02_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+
+ if (bw_params->clk_table.entries[0].memclk_mhz) {
+ int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0;
+
+ for (i = 0; i < MAX_NUM_DPM_LVL; i++) {
+ if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz)
+ max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
+ if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
+ if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
+ if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz)
+ max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
+ }
+ if (!max_dcfclk_mhz)
+ max_dcfclk_mhz = dcn3_02_soc.clock_limits[0].dcfclk_mhz;
+ if (!max_dispclk_mhz)
+ max_dispclk_mhz = dcn3_02_soc.clock_limits[0].dispclk_mhz;
+ if (!max_dppclk_mhz)
+ max_dppclk_mhz = dcn3_02_soc.clock_limits[0].dppclk_mhz;
+ if (!max_phyclk_mhz)
+ max_phyclk_mhz = dcn3_02_soc.clock_limits[0].phyclk_mhz;
+
+ if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
+ /* If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array */
+ dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz;
+ num_dcfclk_sta_targets++;
+ } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
+ /* If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates */
+ for (i = 0; i < num_dcfclk_sta_targets; i++) {
+ if (dcfclk_sta_targets[i] > max_dcfclk_mhz) {
+ dcfclk_sta_targets[i] = max_dcfclk_mhz;
+ break;
+ }
+ }
+ /* Update size of array since we "removed" duplicates */
+ num_dcfclk_sta_targets = i + 1;
+ }
+
+ num_uclk_states = bw_params->clk_table.num_entries;
+
+ /* Calculate optimal dcfclk for each uclk */
+ for (i = 0; i < num_uclk_states; i++) {
+ dcn302_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16,
+ &optimal_dcfclk_for_uclk[i], NULL);
+ if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz)
+ optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz;
+ }
+
+ /* Calculate optimal uclk for each dcfclk sta target */
+ for (i = 0; i < num_dcfclk_sta_targets; i++) {
+ for (j = 0; j < num_uclk_states; j++) {
+ if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) {
+ optimal_uclk_for_dcfclk_sta_targets[i] =
+ bw_params->clk_table.entries[j].memclk_mhz * 16;
+ break;
+ }
+ }
+ }
+
+ i = 0;
+ j = 0;
+ /* create the final dcfclk and uclk table */
+ while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) {
+ if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) {
+ dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
+ dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
+ } else {
+ if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
+ dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
+ dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
+ } else {
+ j = num_uclk_states;
+ }
+ }
+ }
+
+ while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) {
+ dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
+ dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
+ }
+
+ while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES &&
+ optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
+ dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
+ dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
+ }
+
+ /* bw_params->clk_table.entries[MAX_NUM_DPM_LVL].
+ * MAX_NUM_DPM_LVL is 8.
+ * dcn3_02_soc.clock_limits[DC__VOLTAGE_STATES].
+ * DC__VOLTAGE_STATES is 40.
+ */
+ if (num_states > MAX_NUM_DPM_LVL) {
+ ASSERT(0);
+ return;
+ }
+
+ dcn3_02_soc.num_states = num_states;
+ for (i = 0; i < dcn3_02_soc.num_states; i++) {
+ dcn3_02_soc.clock_limits[i].state = i;
+ dcn3_02_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i];
+ dcn3_02_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i];
+ dcn3_02_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i];
+
+ /* Fill all states with max values of all other clocks */
+ dcn3_02_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz;
+ dcn3_02_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz;
+ dcn3_02_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz;
+ /* Populate from bw_params for DTBCLK, SOCCLK */
+ if (!bw_params->clk_table.entries[i].dtbclk_mhz && i > 0)
+ dcn3_02_soc.clock_limits[i].dtbclk_mhz = dcn3_02_soc.clock_limits[i-1].dtbclk_mhz;
+ else
+ dcn3_02_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+ if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0)
+ dcn3_02_soc.clock_limits[i].socclk_mhz = dcn3_02_soc.clock_limits[i-1].socclk_mhz;
+ else
+ dcn3_02_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz;
+ /* These clocks cannot come from bw_params, always fill from dcn3_02_soc[1] */
+ /* FCLK, PHYCLK_D18, DSCCLK */
+ dcn3_02_soc.clock_limits[i].phyclk_d18_mhz = dcn3_02_soc.clock_limits[0].phyclk_d18_mhz;
+ dcn3_02_soc.clock_limits[i].dscclk_mhz = dcn3_02_soc.clock_limits[0].dscclk_mhz;
+ }
+ /* re-init DML with updated bb */
+ dml_init_instance(&dc->dml, &dcn3_02_soc, &dcn3_02_ip, DML_PROJECT_DCN30);
+ if (dc->current_state)
+ dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_02_soc, &dcn3_02_ip, DML_PROJECT_DCN30);
+ }
+}
+
+void dcn302_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info)
+{
+
+ dc_assert_fp_enabled();
+
+ if (bb_info.dram_clock_change_latency_100ns > 0)
+ dcn3_02_soc.dram_clock_change_latency_us =
+ bb_info.dram_clock_change_latency_100ns * 10;
+
+ if (bb_info.dram_sr_enter_exit_latency_100ns > 0)
+ dcn3_02_soc.sr_enter_plus_exit_time_us =
+ bb_info.dram_sr_enter_exit_latency_100ns * 10;
+
+ if (bb_info.dram_sr_exit_latency_100ns > 0)
+ dcn3_02_soc.sr_exit_time_us =
+ bb_info.dram_sr_exit_latency_100ns * 10;
+}
+
+
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.h
new file mode 100644
index 000000000000..548305d96cee
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2019-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN302_FPU_H__
+#define __DCN302_FPU_H__
+
+void dcn302_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info);
+void dcn302_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
+
+#endif /* __DCN302_FPU_H__*/
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c
new file mode 100644
index 000000000000..b5d3fd4c3694
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c
@@ -0,0 +1,380 @@
+/*
+ * Copyright 2019-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#include "resource.h"
+#include "clk_mgr.h"
+#include "dcn20/dcn20_resource.h"
+#include "dcn303/dcn303_resource.h"
+
+#include "dml/dcn20/dcn20_fpu.h"
+#include "dcn303_fpu.h"
+
+struct _vcs_dpi_ip_params_st dcn3_03_ip = {
+ .use_min_dcfclk = 0,
+ .clamp_min_dcfclk = 0,
+ .odm_capable = 1,
+ .gpuvm_enable = 1,
+ .hostvm_enable = 0,
+ .gpuvm_max_page_table_levels = 4,
+ .hostvm_max_page_table_levels = 4,
+ .hostvm_cached_page_table_levels = 0,
+ .pte_group_size_bytes = 2048,
+ .num_dsc = 2,
+ .rob_buffer_size_kbytes = 184,
+ .det_buffer_size_kbytes = 184,
+ .dpte_buffer_size_in_pte_reqs_luma = 64,
+ .dpte_buffer_size_in_pte_reqs_chroma = 34,
+ .pde_proc_buffer_size_64k_reqs = 48,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .pte_enable = 1,
+ .max_page_table_levels = 2,
+ .pte_chunk_size_kbytes = 2, // ?
+ .meta_chunk_size_kbytes = 2,
+ .writeback_chunk_size_kbytes = 8,
+ .line_buffer_size_bits = 789504,
+ .is_line_buffer_bpp_fixed = 0, // ?
+ .line_buffer_fixed_bpp = 0, // ?
+ .dcc_supported = true,
+ .writeback_interface_buffer_size_kbytes = 90,
+ .writeback_line_buffer_buffer_size = 0,
+ .max_line_buffer_lines = 12,
+ .writeback_luma_buffer_size_kbytes = 12, // writeback_line_buffer_buffer_size = 656640
+ .writeback_chroma_buffer_size_kbytes = 8,
+ .writeback_chroma_line_buffer_width_pixels = 4,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .writeback_line_buffer_luma_buffer_size = 0,
+ .writeback_line_buffer_chroma_buffer_size = 14643,
+ .cursor_buffer_size = 8,
+ .cursor_chunk_size = 2,
+ .max_num_otg = 2,
+ .max_num_dpp = 2,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .hscl_mults = 4,
+ .vscl_mults = 4,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dispclk_ramp_margin_percent = 1,
+ .underscan_factor = 1.11,
+ .min_vblank_lines = 32,
+ .dppclk_delay_subtotal = 46,
+ .dynamic_metadata_vm_enabled = true,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_cnvc_formatter = 27,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 119,
+ .dcfclk_cstate_latency = 5.2, // SRExitTime
+ .max_inter_dcn_tile_repeaters = 8,
+ .max_num_hdmi_frl_outputs = 1,
+ .odm_combine_4to1_supported = false,
+
+ .xfc_supported = false,
+ .xfc_fill_bw_overhead_percent = 10.0,
+ .xfc_fill_constant_bytes = 0,
+ .gfx7_compat_tiling_supported = 0,
+ .number_of_cursors = 1,
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn3_03_soc = {
+ .clock_limits = {
+ {
+ .state = 0,
+ .dispclk_mhz = 562.0,
+ .dppclk_mhz = 300.0,
+ .phyclk_mhz = 300.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 405.6,
+ },
+ },
+
+ .min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */
+ .num_states = 1,
+ .sr_exit_time_us = 35.5,
+ .sr_enter_plus_exit_time_us = 40,
+ .urgent_latency_us = 4.0,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
+ .max_avg_sdp_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_percent = 40.0,
+ .writeback_latency_us = 12.0,
+ .max_request_size_bytes = 256,
+ .fabric_datapath_to_dcn_data_return_bytes = 64,
+ .dcn_downspread_percent = 0.5,
+ .downspread_percent = 0.38,
+ .dram_page_open_time_ns = 50.0,
+ .dram_rw_turnaround_time_ns = 17.5,
+ .dram_return_buffer_per_channel_bytes = 8192,
+ .round_trip_ping_latency_dcfclk_cycles = 156,
+ .urgent_out_of_order_return_per_channel_bytes = 4096,
+ .channel_interleave_bytes = 256,
+ .num_banks = 8,
+ .gpuvm_min_page_size_bytes = 4096,
+ .hostvm_min_page_size_bytes = 4096,
+ .dram_clock_change_latency_us = 404,
+ .dummy_pstate_latency_us = 5,
+ .writeback_dram_clock_change_latency_us = 23.0,
+ .return_bus_width_bytes = 64,
+ .dispclk_dppclk_vco_speed_mhz = 3650,
+ .xfc_bus_transport_time_us = 20, // ?
+ .xfc_xbuf_latency_tolerance_us = 4, // ?
+ .use_urgent_burst_bw = 1, // ?
+ .do_urgent_latency_adjustment = true,
+ .urgent_latency_adjustment_fabric_clock_component_us = 1.0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000,
+};
+
+static void dcn303_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
+ unsigned int *optimal_dcfclk,
+ unsigned int *optimal_fclk)
+{
+ double bw_from_dram, bw_from_dram1, bw_from_dram2;
+
+ bw_from_dram1 = uclk_mts * dcn3_03_soc.num_chans *
+ dcn3_03_soc.dram_channel_width_bytes * (dcn3_03_soc.max_avg_dram_bw_use_normal_percent / 100);
+ bw_from_dram2 = uclk_mts * dcn3_03_soc.num_chans *
+ dcn3_03_soc.dram_channel_width_bytes * (dcn3_03_soc.max_avg_sdp_bw_use_normal_percent / 100);
+
+ bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2;
+
+ if (optimal_fclk)
+ *optimal_fclk = bw_from_dram /
+ (dcn3_03_soc.fabric_datapath_to_dcn_data_return_bytes *
+ (dcn3_03_soc.max_avg_sdp_bw_use_normal_percent / 100));
+
+ if (optimal_dcfclk)
+ *optimal_dcfclk = bw_from_dram /
+ (dcn3_03_soc.return_bus_width_bytes * (dcn3_03_soc.max_avg_sdp_bw_use_normal_percent / 100));
+}
+
+
+void dcn303_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
+{
+ unsigned int i, j;
+ unsigned int num_states = 0;
+
+ unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0};
+ unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0};
+ unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0};
+ unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0};
+
+ unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {694, 875, 1000, 1200};
+ unsigned int num_dcfclk_sta_targets = 4;
+ unsigned int num_uclk_states;
+
+ dc_assert_fp_enabled();
+
+ if (dc->ctx->dc_bios->vram_info.num_chans)
+ dcn3_03_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans;
+
+ if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes)
+ dcn3_03_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes;
+
+ dcn3_03_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+
+ if (bw_params->clk_table.entries[0].memclk_mhz) {
+ int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0;
+
+ for (i = 0; i < MAX_NUM_DPM_LVL; i++) {
+ if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz)
+ max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
+ if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
+ if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
+ if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz)
+ max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
+ }
+ if (!max_dcfclk_mhz)
+ max_dcfclk_mhz = dcn3_03_soc.clock_limits[0].dcfclk_mhz;
+ if (!max_dispclk_mhz)
+ max_dispclk_mhz = dcn3_03_soc.clock_limits[0].dispclk_mhz;
+ if (!max_dppclk_mhz)
+ max_dppclk_mhz = dcn3_03_soc.clock_limits[0].dppclk_mhz;
+ if (!max_phyclk_mhz)
+ max_phyclk_mhz = dcn3_03_soc.clock_limits[0].phyclk_mhz;
+
+ if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
+ dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz;
+ num_dcfclk_sta_targets++;
+ } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
+ for (i = 0; i < num_dcfclk_sta_targets; i++) {
+ if (dcfclk_sta_targets[i] > max_dcfclk_mhz) {
+ dcfclk_sta_targets[i] = max_dcfclk_mhz;
+ break;
+ }
+ }
+ /* Update size of array since we "removed" duplicates */
+ num_dcfclk_sta_targets = i + 1;
+ }
+
+ num_uclk_states = bw_params->clk_table.num_entries;
+
+ /* Calculate optimal dcfclk for each uclk */
+ for (i = 0; i < num_uclk_states; i++) {
+ dcn303_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16,
+ &optimal_dcfclk_for_uclk[i], NULL);
+ if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz)
+ optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz;
+ }
+
+ /* Calculate optimal uclk for each dcfclk sta target */
+ for (i = 0; i < num_dcfclk_sta_targets; i++) {
+ for (j = 0; j < num_uclk_states; j++) {
+ if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) {
+ optimal_uclk_for_dcfclk_sta_targets[i] =
+ bw_params->clk_table.entries[j].memclk_mhz * 16;
+ break;
+ } else {
+ /* condition where (dcfclk_sta_targets[i] >= optimal_dcfclk_for_uclk[j]):
+ * This is required for dcn303 because it just so happens that the memory
+ * bandwidth is low enough such that all the optimal DCFCLK for each UCLK
+ * is lower than the smallest DCFCLK STA target. In this case we need to
+ * populate the optimal UCLK for each DCFCLK STA target to be the max UCLK.
+ */
+ if (j == num_uclk_states - 1) {
+ optimal_uclk_for_dcfclk_sta_targets[i] =
+ bw_params->clk_table.entries[j].memclk_mhz * 16;
+ }
+ }
+ }
+ }
+
+ i = 0;
+ j = 0;
+ /* create the final dcfclk and uclk table */
+ while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) {
+ if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) {
+ dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
+ dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
+ } else {
+ if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
+ dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
+ dram_speed_mts[num_states++] =
+ bw_params->clk_table.entries[j++].memclk_mhz * 16;
+ } else {
+ j = num_uclk_states;
+ }
+ }
+ }
+
+ while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) {
+ dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
+ dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
+ }
+
+ while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES &&
+ optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
+ dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
+ dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
+ }
+
+ /* bw_params->clk_table.entries[MAX_NUM_DPM_LVL].
+ * MAX_NUM_DPM_LVL is 8.
+ * dcn3_02_soc.clock_limits[DC__VOLTAGE_STATES].
+ * DC__VOLTAGE_STATES is 40.
+ */
+ if (num_states > MAX_NUM_DPM_LVL) {
+ ASSERT(0);
+ return;
+ }
+
+ dcn3_03_soc.num_states = num_states;
+ for (i = 0; i < dcn3_03_soc.num_states; i++) {
+ dcn3_03_soc.clock_limits[i].state = i;
+ dcn3_03_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i];
+ dcn3_03_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i];
+ dcn3_03_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i];
+
+ /* Fill all states with max values of all other clocks */
+ dcn3_03_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz;
+ dcn3_03_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz;
+ dcn3_03_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz;
+ /* Populate from bw_params for DTBCLK, SOCCLK */
+ if (!bw_params->clk_table.entries[i].dtbclk_mhz && i > 0)
+ dcn3_03_soc.clock_limits[i].dtbclk_mhz = dcn3_03_soc.clock_limits[i-1].dtbclk_mhz;
+ else
+ dcn3_03_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+ if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0)
+ dcn3_03_soc.clock_limits[i].socclk_mhz = dcn3_03_soc.clock_limits[i-1].socclk_mhz;
+ else
+ dcn3_03_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz;
+ /* These clocks cannot come from bw_params, always fill from dcn3_03_soc[1] */
+ /* FCLK, PHYCLK_D18, DSCCLK */
+ dcn3_03_soc.clock_limits[i].phyclk_d18_mhz = dcn3_03_soc.clock_limits[0].phyclk_d18_mhz;
+ dcn3_03_soc.clock_limits[i].dscclk_mhz = dcn3_03_soc.clock_limits[0].dscclk_mhz;
+ }
+
+ if (dcn3_03_soc.num_chans <= 4) {
+ for (i = 0; i < dcn3_03_soc.num_states; i++) {
+ if (dcn3_03_soc.clock_limits[i].dram_speed_mts > 1700)
+ break;
+
+ if (dcn3_03_soc.clock_limits[i].dram_speed_mts >= 1500) {
+ dcn3_03_soc.clock_limits[i].dcfclk_mhz = 100;
+ dcn3_03_soc.clock_limits[i].fabricclk_mhz = 100;
+ }
+ }
+ }
+
+ /* re-init DML with updated bb */
+ dml_init_instance(&dc->dml, &dcn3_03_soc, &dcn3_03_ip, DML_PROJECT_DCN30);
+ if (dc->current_state)
+ dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_03_soc, &dcn3_03_ip, DML_PROJECT_DCN30);
+ }
+}
+
+void dcn303_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info)
+{
+ dc_assert_fp_enabled();
+
+ if (bb_info.dram_clock_change_latency_100ns > 0)
+ dcn3_03_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10;
+
+ if (bb_info.dram_sr_enter_exit_latency_100ns > 0)
+ dcn3_03_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10;
+
+ if (bb_info.dram_sr_exit_latency_100ns > 0)
+ dcn3_03_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.h
new file mode 100644
index 000000000000..92ec833fa528
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2019-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN303_FPU_H__
+#define __DCN303_FPU_H__
+
+void dcn303_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
+void dcn303_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info);
+
+#endif /* __DCN303_FPU_H__*/
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
new file mode 100644
index 000000000000..1a28061bb9ff
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
@@ -0,0 +1,827 @@
+/*
+ * Copyright 2019-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "resource.h"
+#include "clk_mgr.h"
+#include "dcn31/dcn31_resource.h"
+#include "dcn315/dcn315_resource.h"
+#include "dcn316/dcn316_resource.h"
+
+#include "dml/dcn20/dcn20_fpu.h"
+#include "dcn31_fpu.h"
+
+/**
+ * DOC: DCN31x FPU manipulation Overview
+ *
+ * The DCN architecture relies on FPU operations, which require special
+ * compilation flags and the use of kernel_fpu_begin/end functions; ideally, we
+ * want to avoid spreading FPU access across multiple files. With this idea in
+ * mind, this file aims to centralize all DCN3.1.x functions that require FPU
+ * access in a single place. Code in this file follows the following code
+ * pattern:
+ *
+ * 1. Functions that use FPU operations should be isolated in static functions.
+ * 2. The FPU functions should have the noinline attribute to ensure anything
+ * that deals with FP register is contained within this call.
+ * 3. All function that needs to be accessed outside this file requires a
+ * public interface that not uses any FPU reference.
+ * 4. Developers **must not** use DC_FP_START/END in this file, but they need
+ * to ensure that the caller invokes it before access any function available
+ * in this file. For this reason, public functions in this file must invoke
+ * dc_assert_fp_enabled();
+ */
+
+struct _vcs_dpi_ip_params_st dcn3_1_ip = {
+ .gpuvm_enable = 1,
+ .gpuvm_max_page_table_levels = 1,
+ .hostvm_enable = 1,
+ .hostvm_max_page_table_levels = 2,
+ .rob_buffer_size_kbytes = 64,
+ .det_buffer_size_kbytes = DCN3_1_DEFAULT_DET_SIZE,
+ .config_return_buffer_size_in_kbytes = 1792,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .meta_fifo_size_in_kentries = 32,
+ .zero_size_buffer_entries = 512,
+ .compbuf_reserved_space_64b = 256,
+ .compbuf_reserved_space_zs = 64,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .meta_chunk_size_kbytes = 2,
+ .min_meta_chunk_size_bytes = 256,
+ .writeback_chunk_size_kbytes = 8,
+ .ptoi_supported = false,
+ .num_dsc = 3,
+ .maximum_dsc_bits_per_component = 10,
+ .dsc422_native_support = false,
+ .is_line_buffer_bpp_fixed = true,
+ .line_buffer_fixed_bpp = 48,
+ .line_buffer_size_bits = 789504,
+ .max_line_buffer_lines = 12,
+ .writeback_interface_buffer_size_kbytes = 90,
+ .max_num_dpp = 4,
+ .max_num_otg = 4,
+ .max_num_hdmi_frl_outputs = 1,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dpte_buffer_size_in_pte_reqs_luma = 64,
+ .dpte_buffer_size_in_pte_reqs_chroma = 34,
+ .dispclk_ramp_margin_percent = 1,
+ .max_inter_dcn_tile_repeaters = 8,
+ .cursor_buffer_size = 16,
+ .cursor_chunk_size = 2,
+ .writeback_line_buffer_buffer_size = 0,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .dppclk_delay_subtotal = 46,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_cnvc_formatter = 27,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 119,
+ .dynamic_metadata_vm_enabled = false,
+ .odm_combine_4to1_supported = false,
+ .dcc_supported = true,
+};
+
+static struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = {
+ /*TODO: correct dispclk/dppclk voltage level determination*/
+ .clock_limits = {
+ {
+ .state = 0,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 600.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 186.0,
+ .dtbclk_mhz = 625.0,
+ },
+ {
+ .state = 1,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 209.0,
+ .dtbclk_mhz = 625.0,
+ },
+ {
+ .state = 2,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 209.0,
+ .dtbclk_mhz = 625.0,
+ },
+ {
+ .state = 3,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 371.0,
+ .dtbclk_mhz = 625.0,
+ },
+ {
+ .state = 4,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 417.0,
+ .dtbclk_mhz = 625.0,
+ },
+ },
+ .num_states = 5,
+ .sr_exit_time_us = 9.0,
+ .sr_enter_plus_exit_time_us = 11.0,
+ .sr_exit_z8_time_us = 442.0,
+ .sr_enter_plus_exit_z8_time_us = 560.0,
+ .writeback_latency_us = 12.0,
+ .dram_channel_width_bytes = 4,
+ .round_trip_ping_latency_dcfclk_cycles = 106,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_sdp_bw_after_urgent = 80.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
+ .max_avg_sdp_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_percent = 60.0,
+ .fabric_datapath_to_dcn_data_return_bytes = 32,
+ .return_bus_width_bytes = 64,
+ .downspread_percent = 0.38,
+ .dcn_downspread_percent = 0.5,
+ .gpuvm_min_page_size_bytes = 4096,
+ .hostvm_min_page_size_bytes = 4096,
+ .do_urgent_latency_adjustment = false,
+ .urgent_latency_adjustment_fabric_clock_component_us = 0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
+};
+
+struct _vcs_dpi_ip_params_st dcn3_15_ip = {
+ .gpuvm_enable = 1,
+ .gpuvm_max_page_table_levels = 1,
+ .hostvm_enable = 1,
+ .hostvm_max_page_table_levels = 2,
+ .rob_buffer_size_kbytes = 64,
+ .det_buffer_size_kbytes = DCN3_15_DEFAULT_DET_SIZE,
+ .min_comp_buffer_size_kbytes = 64,
+ .config_return_buffer_size_in_kbytes = 1024,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .meta_fifo_size_in_kentries = 32,
+ .zero_size_buffer_entries = 512,
+ .compbuf_reserved_space_64b = 256,
+ .compbuf_reserved_space_zs = 64,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .meta_chunk_size_kbytes = 2,
+ .min_meta_chunk_size_bytes = 256,
+ .writeback_chunk_size_kbytes = 8,
+ .ptoi_supported = false,
+ .num_dsc = 3,
+ .maximum_dsc_bits_per_component = 10,
+ .dsc422_native_support = false,
+ .is_line_buffer_bpp_fixed = true,
+ .line_buffer_fixed_bpp = 48,
+ .line_buffer_size_bits = 789504,
+ .max_line_buffer_lines = 12,
+ .writeback_interface_buffer_size_kbytes = 90,
+ .max_num_dpp = 4,
+ .max_num_otg = 4,
+ .max_num_hdmi_frl_outputs = 1,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dpte_buffer_size_in_pte_reqs_luma = 64,
+ .dpte_buffer_size_in_pte_reqs_chroma = 34,
+ .dispclk_ramp_margin_percent = 1,
+ .max_inter_dcn_tile_repeaters = 9,
+ .cursor_buffer_size = 16,
+ .cursor_chunk_size = 2,
+ .writeback_line_buffer_buffer_size = 0,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .dppclk_delay_subtotal = 46,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_cnvc_formatter = 27,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 119,
+ .dynamic_metadata_vm_enabled = false,
+ .odm_combine_4to1_supported = false,
+ .dcc_supported = true,
+};
+
+static struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = {
+ .sr_exit_time_us = 9.0,
+ .sr_enter_plus_exit_time_us = 11.0,
+ .sr_exit_z8_time_us = 50.0,
+ .sr_enter_plus_exit_z8_time_us = 50.0,
+ .writeback_latency_us = 12.0,
+ .dram_channel_width_bytes = 4,
+ .round_trip_ping_latency_dcfclk_cycles = 106,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_sdp_bw_after_urgent = 80.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
+ .max_avg_sdp_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_percent = 60.0,
+ .fabric_datapath_to_dcn_data_return_bytes = 32,
+ .return_bus_width_bytes = 64,
+ .downspread_percent = 0.38,
+ .dcn_downspread_percent = 0.38,
+ .gpuvm_min_page_size_bytes = 4096,
+ .hostvm_min_page_size_bytes = 4096,
+ .do_urgent_latency_adjustment = false,
+ .urgent_latency_adjustment_fabric_clock_component_us = 0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
+ .dispclk_dppclk_vco_speed_mhz = 2400.0,
+ .num_chans = 4,
+ .dummy_pstate_latency_us = 10.0
+};
+
+struct _vcs_dpi_ip_params_st dcn3_16_ip = {
+ .gpuvm_enable = 1,
+ .gpuvm_max_page_table_levels = 1,
+ .hostvm_enable = 1,
+ .hostvm_max_page_table_levels = 2,
+ .rob_buffer_size_kbytes = 64,
+ .det_buffer_size_kbytes = DCN3_16_DEFAULT_DET_SIZE,
+ .min_comp_buffer_size_kbytes = 64,
+ .config_return_buffer_size_in_kbytes = 1024,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .meta_fifo_size_in_kentries = 32,
+ .zero_size_buffer_entries = 512,
+ .compbuf_reserved_space_64b = 256,
+ .compbuf_reserved_space_zs = 64,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .meta_chunk_size_kbytes = 2,
+ .min_meta_chunk_size_bytes = 256,
+ .writeback_chunk_size_kbytes = 8,
+ .ptoi_supported = false,
+ .num_dsc = 3,
+ .maximum_dsc_bits_per_component = 10,
+ .dsc422_native_support = false,
+ .is_line_buffer_bpp_fixed = true,
+ .line_buffer_fixed_bpp = 48,
+ .line_buffer_size_bits = 789504,
+ .max_line_buffer_lines = 12,
+ .writeback_interface_buffer_size_kbytes = 90,
+ .max_num_dpp = 4,
+ .max_num_otg = 4,
+ .max_num_hdmi_frl_outputs = 1,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dpte_buffer_size_in_pte_reqs_luma = 64,
+ .dpte_buffer_size_in_pte_reqs_chroma = 34,
+ .dispclk_ramp_margin_percent = 1,
+ .max_inter_dcn_tile_repeaters = 8,
+ .cursor_buffer_size = 16,
+ .cursor_chunk_size = 2,
+ .writeback_line_buffer_buffer_size = 0,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .dppclk_delay_subtotal = 46,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_cnvc_formatter = 27,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 119,
+ .dynamic_metadata_vm_enabled = false,
+ .odm_combine_4to1_supported = false,
+ .dcc_supported = true,
+};
+
+static struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = {
+ /*TODO: correct dispclk/dppclk voltage level determination*/
+ .clock_limits = {
+ {
+ .state = 0,
+ .dispclk_mhz = 556.0,
+ .dppclk_mhz = 556.0,
+ .phyclk_mhz = 600.0,
+ .phyclk_d18_mhz = 445.0,
+ .dscclk_mhz = 186.0,
+ .dtbclk_mhz = 625.0,
+ },
+ {
+ .state = 1,
+ .dispclk_mhz = 625.0,
+ .dppclk_mhz = 625.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 209.0,
+ .dtbclk_mhz = 625.0,
+ },
+ {
+ .state = 2,
+ .dispclk_mhz = 625.0,
+ .dppclk_mhz = 625.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 209.0,
+ .dtbclk_mhz = 625.0,
+ },
+ {
+ .state = 3,
+ .dispclk_mhz = 1112.0,
+ .dppclk_mhz = 1112.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 371.0,
+ .dtbclk_mhz = 625.0,
+ },
+ {
+ .state = 4,
+ .dispclk_mhz = 1250.0,
+ .dppclk_mhz = 1250.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 417.0,
+ .dtbclk_mhz = 625.0,
+ },
+ },
+ .num_states = 5,
+ .sr_exit_time_us = 9.0,
+ .sr_enter_plus_exit_time_us = 11.0,
+ .sr_exit_z8_time_us = 442.0,
+ .sr_enter_plus_exit_z8_time_us = 560.0,
+ .writeback_latency_us = 12.0,
+ .dram_channel_width_bytes = 4,
+ .round_trip_ping_latency_dcfclk_cycles = 106,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_sdp_bw_after_urgent = 80.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
+ .max_avg_sdp_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_percent = 60.0,
+ .fabric_datapath_to_dcn_data_return_bytes = 32,
+ .return_bus_width_bytes = 64,
+ .downspread_percent = 0.38,
+ .dcn_downspread_percent = 0.5,
+ .gpuvm_min_page_size_bytes = 4096,
+ .hostvm_min_page_size_bytes = 4096,
+ .do_urgent_latency_adjustment = false,
+ .urgent_latency_adjustment_fabric_clock_component_us = 0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
+ .dispclk_dppclk_vco_speed_mhz = 2500.0,
+};
+
+void dcn31_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+ int pipe_cnt)
+{
+ dc_assert_fp_enabled();
+
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+}
+
+void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context)
+{
+ dc_assert_fp_enabled();
+
+ if (dc->clk_mgr->bw_params->wm_table.entries[WM_A].valid) {
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].pstate_latency_us;
+ context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_enter_plus_exit_time_us;
+ context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_exit_time_us;
+ }
+}
+
+void dcn315_update_soc_for_wm_a(struct dc *dc, struct dc_state *context)
+{
+ dc_assert_fp_enabled();
+
+ if (dc->clk_mgr->bw_params->wm_table.entries[WM_A].valid) {
+ /* For 315 pstate change is only supported if possible in vactive */
+ if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[context->bw_ctx.dml.vba.VoltageLevel][context->bw_ctx.dml.vba.maxMpcComb] != dm_dram_clock_change_vactive)
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us = context->bw_ctx.dml.soc.dummy_pstate_latency_us;
+ else
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].pstate_latency_us;
+ context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us =
+ dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_enter_plus_exit_time_us;
+ context->bw_ctx.dml.soc.sr_exit_time_us =
+ dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_exit_time_us;
+ }
+}
+
+void dcn31_calculate_wm_and_dlg_fp(
+ struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel)
+{
+ int i, pipe_idx, total_det = 0, active_hubp_count = 0;
+ double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
+ uint32_t cstate_enter_plus_exit_z8_ns;
+
+ dc_assert_fp_enabled();
+
+ if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk)
+ dcfclk = context->bw_ctx.dml.soc.min_dcfclk;
+
+ /* We don't recalculate clocks for 0 pipe configs, which can block
+ * S0i3 as high clocks will block low power states
+ * Override any clocks that can block S0i3 to min here
+ */
+ if (pipe_cnt == 0) {
+ context->bw_ctx.bw.dcn.clk.dcfclk_khz = dcfclk; // always should be vlevel 0
+ return;
+ }
+
+ pipes[0].clks_cfg.voltage = vlevel;
+ pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
+ pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz;
+
+ cstate_enter_plus_exit_z8_ns =
+ get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+
+ if (get_stutter_period(&context->bw_ctx.dml, pipes, pipe_cnt) < dc->debug.minimum_z8_residency_time &&
+ cstate_enter_plus_exit_z8_ns < dc->debug.minimum_z8_residency_time * 1000)
+ cstate_enter_plus_exit_z8_ns = dc->debug.minimum_z8_residency_time * 1000;
+
+ /* Set A:
+ * All clocks min required
+ *
+ * Set A calculated last so that following calculations are based on Set A
+ */
+ dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
+ context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_z8_ns = cstate_enter_plus_exit_z8_ns;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b = context->bw_ctx.bw.dcn.watermarks.a;
+ context->bw_ctx.bw.dcn.watermarks.c = context->bw_ctx.bw.dcn.watermarks.a;
+ context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a;
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+
+ if (context->res_ctx.pipe_ctx[i].plane_state)
+ active_hubp_count++;
+
+ pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt);
+ pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+
+ if (dc->config.forced_clocks || dc->debug.max_disp_clk) {
+ pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz;
+ pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz;
+ }
+ if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000)
+ pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0;
+ if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
+ pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0;
+
+ pipe_idx++;
+ }
+
+ dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
+ /* For 31x apu pstate change is only supported if possible in vactive*/
+ context->bw_ctx.bw.dcn.clk.p_state_change_support =
+ context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_dram_clock_change_vactive;
+ /* If DCN isn't making memory requests we can allow pstate change and lower clocks */
+ if (!active_hubp_count) {
+ context->bw_ctx.bw.dcn.clk.socclk_khz = 0;
+ context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;
+ context->bw_ctx.bw.dcn.clk.dcfclk_khz = 0;
+ context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = 0;
+ context->bw_ctx.bw.dcn.clk.dramclk_khz = 0;
+ context->bw_ctx.bw.dcn.clk.fclk_khz = 0;
+ context->bw_ctx.bw.dcn.clk.p_state_change_support = true;
+ for (i = 0; i < dc->res_pool->pipe_count; i++)
+ if (context->res_ctx.pipe_ctx[i].stream)
+ context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = 0;
+ }
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+
+ context->res_ctx.pipe_ctx[i].det_buffer_size_kb =
+ get_det_buffer_size_kbytes(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+ if (context->res_ctx.pipe_ctx[i].det_buffer_size_kb > 384)
+ context->res_ctx.pipe_ctx[i].det_buffer_size_kb /= 2;
+ total_det += context->res_ctx.pipe_ctx[i].det_buffer_size_kb;
+ pipe_idx++;
+ }
+ context->bw_ctx.bw.dcn.compbuf_size_kb = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes - total_det;
+}
+
+void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
+{
+ struct _vcs_dpi_voltage_scaling_st *s = dc->scratch.update_bw_bounding_box.clock_limits;
+ struct clk_limit_table *clk_table = &bw_params->clk_table;
+ unsigned int i, closest_clk_lvl;
+ int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
+ int j;
+
+ dc_assert_fp_enabled();
+
+ memcpy(s, dcn3_1_soc.clock_limits, sizeof(dcn3_1_soc.clock_limits));
+
+ // Default clock levels are used for diags, which may lead to overclocking.
+ dcn3_1_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator;
+ dcn3_1_ip.max_num_dpp = dc->res_pool->pipe_count;
+ dcn3_1_soc.num_chans = bw_params->num_channels;
+
+ ASSERT(clk_table->num_entries);
+
+ /* Prepass to find max clocks independent of voltage level. */
+ for (i = 0; i < clk_table->num_entries; ++i) {
+ if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
+ if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
+ }
+
+ for (i = 0; i < clk_table->num_entries; i++) {
+ /* loop backwards*/
+ for (closest_clk_lvl = 0, j = dcn3_1_soc.num_states - 1; j >= 0; j--) {
+ if ((unsigned int) dcn3_1_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) {
+ closest_clk_lvl = j;
+ break;
+ }
+ }
+
+ s[i].state = i;
+
+ /* Clocks dependent on voltage level. */
+ s[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+ s[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
+ s[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
+ s[i].dram_speed_mts = clk_table->entries[i].memclk_mhz *
+ 2 * clk_table->entries[i].wck_ratio;
+
+ /* Clocks independent of voltage level. */
+ s[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
+ dcn3_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+
+ s[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
+ dcn3_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+
+ s[i].dram_bw_per_chan_gbps =
+ dcn3_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+ s[i].dscclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+ s[i].dtbclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+ s[i].phyclk_d18_mhz =
+ dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+ s[i].phyclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+ }
+
+ if (clk_table->num_entries)
+ dcn3_1_soc.num_states = clk_table->num_entries;
+
+ memcpy(dcn3_1_soc.clock_limits, s, sizeof(dcn3_1_soc.clock_limits));
+
+ dcn3_1_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+
+ if ((int)(dcn3_1_soc.dram_clock_change_latency_us * 1000)
+ != dc->debug.dram_clock_change_latency_ns
+ && dc->debug.dram_clock_change_latency_ns) {
+ dcn3_1_soc.dram_clock_change_latency_us = dc->debug.dram_clock_change_latency_ns / 1000;
+ }
+
+ dml_init_instance(&dc->dml, &dcn3_1_soc, &dcn3_1_ip, DML_PROJECT_DCN31);
+}
+
+void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
+{
+ struct clk_limit_table *clk_table = &bw_params->clk_table;
+ int i, max_dispclk_mhz = 0, max_dppclk_mhz = 0;
+
+ dc_assert_fp_enabled();
+
+ dcn3_15_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator;
+ dcn3_15_ip.max_num_dpp = dc->res_pool->pipe_count;
+
+ if (bw_params->num_channels > 0)
+ dcn3_15_soc.num_chans = bw_params->num_channels;
+ if (bw_params->dram_channel_width_bytes > 0)
+ dcn3_15_soc.dram_channel_width_bytes = bw_params->dram_channel_width_bytes;
+
+ ASSERT(clk_table->num_entries);
+
+ /* Setup soc to always use max dispclk/dppclk to avoid odm-to-lower-voltage */
+ for (i = 0; i < clk_table->num_entries; ++i) {
+ if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
+ if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
+ }
+
+ for (i = 0; i < clk_table->num_entries; i++) {
+ dcn3_15_soc.clock_limits[i].state = i;
+
+ /* Clocks dependent on voltage level. */
+ dcn3_15_soc.clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+ dcn3_15_soc.clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
+ dcn3_15_soc.clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
+ dcn3_15_soc.clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio;
+
+ /* These aren't actually read from smu, but rather set in clk_mgr defaults */
+ dcn3_15_soc.clock_limits[i].dtbclk_mhz = clk_table->entries[i].dtbclk_mhz;
+ dcn3_15_soc.clock_limits[i].phyclk_d18_mhz = clk_table->entries[i].phyclk_d18_mhz;
+ dcn3_15_soc.clock_limits[i].phyclk_mhz = clk_table->entries[i].phyclk_mhz;
+
+ /* Clocks independent of voltage level. */
+ dcn3_15_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz;
+ dcn3_15_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz;
+ dcn3_15_soc.clock_limits[i].dscclk_mhz = max_dispclk_mhz / 3.0;
+ }
+ dcn3_15_soc.num_states = clk_table->num_entries;
+
+
+ /* Set vco to max_dispclk * 2 to make sure the highest dispclk is always available for dml calcs,
+ * no impact outside of dml validation
+ */
+ dcn3_15_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+
+ if ((int)(dcn3_15_soc.dram_clock_change_latency_us * 1000)
+ != dc->debug.dram_clock_change_latency_ns
+ && dc->debug.dram_clock_change_latency_ns) {
+ dcn3_15_soc.dram_clock_change_latency_us = dc->debug.dram_clock_change_latency_ns / 1000;
+ }
+
+ dml_init_instance(&dc->dml, &dcn3_15_soc, &dcn3_15_ip, DML_PROJECT_DCN315);
+}
+
+void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
+{
+ struct _vcs_dpi_voltage_scaling_st *s = dc->scratch.update_bw_bounding_box.clock_limits;
+ struct clk_limit_table *clk_table = &bw_params->clk_table;
+ unsigned int i, closest_clk_lvl;
+ int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
+ int j;
+
+ dc_assert_fp_enabled();
+
+ memcpy(s, dcn3_16_soc.clock_limits, sizeof(dcn3_16_soc.clock_limits));
+
+ // Default clock levels are used for diags, which may lead to overclocking.
+ dcn3_16_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator;
+ dcn3_16_ip.max_num_dpp = dc->res_pool->pipe_count;
+ dcn3_16_soc.num_chans = bw_params->num_channels;
+
+ ASSERT(clk_table->num_entries);
+
+ /* Prepass to find max clocks independent of voltage level. */
+ for (i = 0; i < clk_table->num_entries; ++i) {
+ if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
+ if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
+ }
+
+ for (i = 0; i < clk_table->num_entries; i++) {
+ /* loop backwards*/
+ for (closest_clk_lvl = 0, j = dcn3_16_soc.num_states - 1; j >= 0; j--) {
+ if ((unsigned int) dcn3_16_soc.clock_limits[j].dcfclk_mhz <=
+ clk_table->entries[i].dcfclk_mhz) {
+ closest_clk_lvl = j;
+ break;
+ }
+ }
+
+ s[i].state = i;
+
+ /* Clocks dependent on voltage level. */
+ s[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+ s[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
+ s[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
+ s[i].dram_speed_mts = clk_table->entries[i].memclk_mhz *
+ 2 * clk_table->entries[i].wck_ratio;
+
+ /* Clocks independent of voltage level. */
+ s[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
+ dcn3_16_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+
+ s[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
+ dcn3_16_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+
+ s[i].dram_bw_per_chan_gbps =
+ dcn3_16_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+ s[i].dscclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+ s[i].dtbclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+ s[i].phyclk_d18_mhz =
+ dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+ s[i].phyclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+ }
+
+ if (clk_table->num_entries)
+ dcn3_16_soc.num_states = clk_table->num_entries;
+
+ memcpy(dcn3_16_soc.clock_limits, s, sizeof(dcn3_16_soc.clock_limits));
+
+ if (max_dispclk_mhz) {
+ dcn3_16_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ }
+ if ((int)(dcn3_16_soc.dram_clock_change_latency_us * 1000)
+ != dc->debug.dram_clock_change_latency_ns
+ && dc->debug.dram_clock_change_latency_ns) {
+ dcn3_16_soc.dram_clock_change_latency_us = dc->debug.dram_clock_change_latency_ns / 1000;
+ }
+
+ dml_init_instance(&dc->dml, &dcn3_16_soc, &dcn3_16_ip, DML_PROJECT_DCN31);
+}
+
+int dcn_get_max_non_odm_pix_rate_100hz(struct _vcs_dpi_soc_bounding_box_st *soc)
+{
+ dc_assert_fp_enabled();
+
+ return soc->clock_limits[0].dispclk_mhz * 10000.0 / (1.0 + soc->dcn_downspread_percent / 100.0);
+}
+
+int dcn_get_approx_det_segs_required_for_pstate(
+ struct _vcs_dpi_soc_bounding_box_st *soc,
+ int pix_clk_100hz, int bpp, int seg_size_kb)
+{
+ dc_assert_fp_enabled();
+
+ /* Roughly calculate required crb to hide latency. In practice there is slightly
+ * more buffer available for latency hiding
+ */
+ return (int)(soc->dram_clock_change_latency_us * pix_clk_100hz * bpp
+ / 10240000 + seg_size_kb - 1) / seg_size_kb;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h
new file mode 100644
index 000000000000..dfcc5d50071e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2019-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN31_FPU_H__
+#define __DCN31_FPU_H__
+
+#define DCN3_1_DEFAULT_DET_SIZE 384
+#define DCN3_15_DEFAULT_DET_SIZE 192
+#define DCN3_15_MIN_COMPBUF_SIZE_KB 128
+#define DCN3_16_DEFAULT_DET_SIZE 192
+#define DCN3_16_MIN_COMPBUF_SIZE_KB 128
+
+void dcn31_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+ int pipe_cnt);
+
+void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context);
+void dcn315_update_soc_for_wm_a(struct dc *dc, struct dc_state *context);
+
+void dcn31_calculate_wm_and_dlg_fp(
+ struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel);
+
+void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
+void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
+void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
+int dcn_get_max_non_odm_pix_rate_100hz(struct _vcs_dpi_soc_bounding_box_st *soc);
+int dcn_get_approx_det_segs_required_for_pstate(
+ struct _vcs_dpi_soc_bounding_box_st *soc,
+ int pix_clk_100hz, int bpp, int seg_size_kb);
+
+int dcn31x_populate_dml_pipes_from_context(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ enum dc_validate_mode validate_mode);
+#endif /* __DCN31_FPU_H__*/
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
index 7e937bdcea00..ed59c77bc6f6 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
@@ -24,8 +24,8 @@
*/
#include "dc.h"
-#include "dc_link.h"
#include "../display_mode_lib.h"
+#include "../dcn30/display_mode_vba_30.h"
#include "display_mode_vba_31.h"
#include "../dml_inline_defs.h"
@@ -42,6 +42,8 @@
#define BPP_BLENDED_PIPE 0xffffffff
#define DCN31_MAX_DSC_IMAGE_WIDTH 5184
#define DCN31_MAX_FMT_420_BUFFER_WIDTH 4096
+#define DCN3_15_MIN_COMPBUF_SIZE_KB 128
+#define DCN3_15_MAX_DET_SIZE 384
// For DML-C changes that hasn't been propagated to VBA yet
//#define __DML_VBA_ALLOW_DELTA__
@@ -64,6 +66,8 @@ typedef struct {
double DCFCLKDeepSleep;
unsigned int DPPPerPlane;
bool ScalerEnabled;
+ double VRatio;
+ double VRatioChroma;
enum scan_direction_class SourceScan;
unsigned int BlockWidth256BytesY;
unsigned int BlockHeight256BytesY;
@@ -84,17 +88,6 @@ typedef struct {
#define BPP_INVALID 0
#define BPP_BLENDED_PIPE 0xffffffff
-static bool CalculateBytePerPixelAnd256BBlockSizes(
- enum source_format_class SourcePixelFormat,
- enum dm_swizzle_mode SurfaceTiling,
- unsigned int *BytePerPixelY,
- unsigned int *BytePerPixelC,
- double *BytePerPixelDETY,
- double *BytePerPixelDETC,
- unsigned int *BlockHeight256BytesY,
- unsigned int *BlockHeight256BytesC,
- unsigned int *BlockWidth256BytesY,
- unsigned int *BlockWidth256BytesC);
static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
static unsigned int dscceComputeDelay(
@@ -259,33 +252,13 @@ static void CalculateRowBandwidth(
static void CalculateFlipSchedule(
struct display_mode_lib *mode_lib,
+ unsigned int k,
double HostVMInefficiencyFactor,
double UrgentExtraLatency,
double UrgentLatency,
- unsigned int GPUVMMaxPageTableLevels,
- bool HostVMEnable,
- unsigned int HostVMMaxNonCachedPageTableLevels,
- bool GPUVMEnable,
- double HostVMMinPageSize,
double PDEAndMetaPTEBytesPerFrame,
double MetaRowBytes,
- double DPTEBytesPerRow,
- double BandwidthAvailableForImmediateFlip,
- unsigned int TotImmediateFlipBytes,
- enum source_format_class SourcePixelFormat,
- double LineTime,
- double VRatio,
- double VRatioChroma,
- double Tno_bw,
- bool DCCEnable,
- unsigned int dpte_row_height,
- unsigned int meta_row_height,
- unsigned int dpte_row_height_chroma,
- unsigned int meta_row_height_chroma,
- double *DestinationLinesToRequestVMInImmediateFlip,
- double *DestinationLinesToRequestRowInImmediateFlip,
- double *final_flip_bw,
- bool *ImmediateFlipSupportedForPipe);
+ double DPTEBytesPerRow);
static double CalculateWriteBackDelay(
enum source_format_class WritebackPixelFormat,
double WritebackHRatio,
@@ -319,64 +292,28 @@ static void CalculateVupdateAndDynamicMetadataParameters(
static void CalculateWatermarksAndDRAMSpeedChangeSupport(
struct display_mode_lib *mode_lib,
unsigned int PrefetchMode,
- unsigned int NumberOfActivePlanes,
- unsigned int MaxLineBufferLines,
- unsigned int LineBufferSize,
- unsigned int WritebackInterfaceBufferSize,
double DCFCLK,
double ReturnBW,
- bool SynchronizedVBlank,
- unsigned int dpte_group_bytes[],
- unsigned int MetaChunkSize,
double UrgentLatency,
double ExtraLatency,
- double WritebackLatency,
- double WritebackChunkSize,
double SOCCLK,
- double DRAMClockChangeLatency,
- double SRExitTime,
- double SREnterPlusExitTime,
- double SRExitZ8Time,
- double SREnterPlusExitZ8Time,
double DCFCLKDeepSleep,
unsigned int DETBufferSizeY[],
unsigned int DETBufferSizeC[],
unsigned int SwathHeightY[],
unsigned int SwathHeightC[],
- unsigned int LBBitPerPixel[],
double SwathWidthY[],
double SwathWidthC[],
- double HRatio[],
- double HRatioChroma[],
- unsigned int vtaps[],
- unsigned int VTAPsChroma[],
- double VRatio[],
- double VRatioChroma[],
- unsigned int HTotal[],
- double PixelClock[],
- unsigned int BlendingAndTiming[],
unsigned int DPPPerPlane[],
double BytePerPixelDETY[],
double BytePerPixelDETC[],
- double DSTXAfterScaler[],
- double DSTYAfterScaler[],
- bool WritebackEnable[],
- enum source_format_class WritebackPixelFormat[],
- double WritebackDestinationWidth[],
- double WritebackDestinationHeight[],
- double WritebackSourceHeight[],
bool UnboundedRequestEnabled,
int unsigned CompressedBufferSizeInkByte,
enum clock_change_support *DRAMClockChangeSupport,
- double *UrgentWatermark,
- double *WritebackUrgentWatermark,
- double *DRAMClockChangeWatermark,
- double *WritebackDRAMClockChangeWatermark,
double *StutterExitWatermark,
double *StutterEnterPlusExitWatermark,
double *Z8StutterExitWatermark,
- double *Z8StutterEnterPlusExitWatermark,
- double *MinActiveDRAMClockChangeLatencySupported);
+ double *Z8StutterEnterPlusExitWatermark);
static void CalculateDCFCLKDeepSleep(
struct display_mode_lib *mode_lib,
@@ -422,62 +359,8 @@ static void CalculateUrgentBurstFactor(
static void UseMinimumDCFCLK(
struct display_mode_lib *mode_lib,
- int MaxInterDCNTileRepeaters,
int MaxPrefetchMode,
- double FinalDRAMClockChangeLatency,
- double SREnterPlusExitTime,
- int ReturnBusWidth,
- int RoundTripPingLatencyCycles,
- int ReorderingBytes,
- int PixelChunkSizeInKByte,
- int MetaChunkSize,
- bool GPUVMEnable,
- int GPUVMMaxPageTableLevels,
- bool HostVMEnable,
- int NumberOfActivePlanes,
- double HostVMMinPageSize,
- int HostVMMaxNonCachedPageTableLevels,
- bool DynamicMetadataVMEnabled,
- enum immediate_flip_requirement ImmediateFlipRequirement,
- bool ProgressiveToInterlaceUnitInOPP,
- double MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation,
- double PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency,
- int VTotal[],
- int VActive[],
- int DynamicMetadataTransmittedBytes[],
- int DynamicMetadataLinesBeforeActiveRequired[],
- bool Interlace[],
- double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
- double RequiredDISPCLK[][2],
- double UrgLatency[],
- unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
- double ProjectedDCFCLKDeepSleep[][2],
- double MaximumVStartup[][2][DC__NUM_DPP__MAX],
- double TotalVActivePixelBandwidth[][2],
- double TotalVActiveCursorBandwidth[][2],
- double TotalMetaRowBandwidth[][2],
- double TotalDPTERowBandwidth[][2],
- unsigned int TotalNumberOfActiveDPP[][2],
- unsigned int TotalNumberOfDCCActiveDPP[][2],
- int dpte_group_bytes[],
- double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
- double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
- int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
- int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
- int BytePerPixelY[],
- int BytePerPixelC[],
- int HTotal[],
- double PixelClock[],
- double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
- double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
- double MetaRowBytes[][2][DC__NUM_DPP__MAX],
- bool DynamicMetadataEnable[],
- double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
- double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
- double ReadBandwidthLuma[],
- double ReadBandwidthChroma[],
- double DCFCLKPerState[],
- double DCFCLKState[][2]);
+ int ReorderingBytes);
static void CalculatePixelDeliveryTimes(
unsigned int NumberOfActivePlanes,
@@ -649,7 +532,8 @@ static void CalculateStutterEfficiency(
static void CalculateSwathAndDETConfiguration(
bool ForceSingleDPP,
int NumberOfActivePlanes,
- unsigned int DETBufferSizeInKByte,
+ bool DETSharedByAllDPP,
+ unsigned int DETBufferSizeInKByte[],
double MaximumSwathWidthLuma[],
double MaximumSwathWidthChroma[],
enum scan_direction_class SourceScan[],
@@ -994,8 +878,11 @@ static bool CalculatePrefetchSchedule(
double DSTTotalPixelsAfterScaler;
double LineTime;
double dst_y_prefetch_equ;
+#ifdef __DML_VBA_DEBUG__
double Tsw_oto;
+#endif
double prefetch_bw_oto;
+ double prefetch_bw_pr;
double Tvm_oto;
double Tr0_oto;
double Tvm_oto_lines;
@@ -1025,6 +912,7 @@ static bool CalculatePrefetchSchedule(
double min_Lsw;
double Tsw_est1 = 0;
double Tsw_est3 = 0;
+ double max_Tsw = 0;
if (GPUVMEnable == true && HostVMEnable == true) {
HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
@@ -1165,15 +1053,19 @@ static bool CalculatePrefetchSchedule(
bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
else
bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
-
+ /*rev 99*/
+ prefetch_bw_pr = bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane;
+ prefetch_bw_pr = dml_min(1, myPipe->VRatio) * prefetch_bw_pr;
+ max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
- prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
+ prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw);
- min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
+ min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre);
Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
+#ifdef __DML_VBA_DEBUG__
Tsw_oto = Lsw_oto * LineTime;
+#endif
- prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto;
#ifdef __DML_VBA_DEBUG__
dml_print("DML: HTotal: %d\n", myPipe->HTotal);
@@ -1213,6 +1105,7 @@ static bool CalculatePrefetchSchedule(
Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
+ dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
Tpre_rounded = dst_y_prefetch_equ * LineTime;
@@ -1443,7 +1336,7 @@ static bool CalculatePrefetchSchedule(
dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
#endif
- if ((SwathHeightC > 4)) {
+ if ((SwathHeightC > 4) || VInitPreFillC > 3) {
if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
*VRatioPrefetchC = dml_max(
*VRatioPrefetchC,
@@ -1552,12 +1445,9 @@ static bool CalculatePrefetchSchedule(
if (MyError) {
*PrefetchBandwidth = 0;
- TimeForFetchingMetaPTE = 0;
- TimeForFetchingRowInVBlank = 0;
*DestinationLinesToRequestVMInVBlank = 0;
*DestinationLinesToRequestRowInVBlank = 0;
*DestinationLinesForPrefetch = 0;
- LinesToRequestPrefetchPixelData = 0;
*VRatioPrefetchY = 0;
*VRatioPrefetchC = 0;
*RequiredPrefetchPixDataBWLuma = 0;
@@ -2032,15 +1922,6 @@ static unsigned int CalculateVMAndRowBytes(
*PixelPTEReqWidth = 32768.0 / BytePerPixel;
*PTERequestSize = 64;
FractionOfPTEReturnDrop = 0;
- } else if (MacroTileSizeBytes == 4096) {
- PixelPTEReqHeightPTEs = 1;
- *PixelPTEReqHeight = MacroTileHeight;
- *PixelPTEReqWidth = 8 * *MacroTileWidth;
- *PTERequestSize = 64;
- if (ScanDirection != dm_vert)
- FractionOfPTEReturnDrop = 0;
- else
- FractionOfPTEReturnDrop = 7 / 8;
} else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
PixelPTEReqHeightPTEs = 16;
*PixelPTEReqHeight = 16 * BlockHeight256Bytes;
@@ -2267,7 +2148,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
DTRACE(" return_bus_bw = %f", v->ReturnBW);
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
- CalculateBytePerPixelAnd256BBlockSizes(
+ dml30_CalculateBytePerPixelAnd256BBlockSizes(
v->SourcePixelFormat[k],
v->SurfaceTiling[k],
&v->BytePerPixelY[k],
@@ -2717,6 +2598,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
myPipe.DPPPerPlane = v->DPPPerPlane[k];
myPipe.ScalerEnabled = v->ScalerEnabled[k];
+ myPipe.VRatio = v->VRatio[k];
+ myPipe.VRatioChroma = v->VRatioChroma[k];
myPipe.SourceScan = v->SourceScan[k];
myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
@@ -2959,33 +2842,13 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
CalculateFlipSchedule(
mode_lib,
+ k,
HostVMInefficiencyFactor,
v->UrgentExtraLatency,
v->UrgentLatency,
- v->GPUVMMaxPageTableLevels,
- v->HostVMEnable,
- v->HostVMMaxNonCachedPageTableLevels,
- v->GPUVMEnable,
- v->HostVMMinPageSize,
v->PDEAndMetaPTEBytesFrame[k],
v->MetaRowByte[k],
- v->PixelPTEBytesPerRow[k],
- v->BandwidthAvailableForImmediateFlip,
- v->TotImmediateFlipBytes,
- v->SourcePixelFormat[k],
- v->HTotal[k] / v->PixelClock[k],
- v->VRatio[k],
- v->VRatioChroma[k],
- v->Tno_bw[k],
- v->DCCEnable[k],
- v->dpte_row_height[k],
- v->meta_row_height[k],
- v->dpte_row_height_chroma[k],
- v->meta_row_height_chroma[k],
- &v->DestinationLinesToRequestVMInImmediateFlip[k],
- &v->DestinationLinesToRequestRowInImmediateFlip[k],
- &v->final_flip_bw[k],
- &v->ImmediateFlipSupportedForPipe[k]);
+ v->PixelPTEBytesPerRow[k]);
}
v->total_dcn_read_bw_with_flip = 0.0;
@@ -3041,7 +2904,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
v->ImmediateFlipSupported)) ? true : false;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
- dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required);
+ dml_print("DML::%s: ImmediateFlipRequirement[0] %d\n", __func__, v->ImmediateFlipRequirement[0] == dm_immediate_flip_required);
dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
@@ -3072,64 +2935,28 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
CalculateWatermarksAndDRAMSpeedChangeSupport(
mode_lib,
PrefetchMode,
- v->NumberOfActivePlanes,
- v->MaxLineBufferLines,
- v->LineBufferSize,
- v->WritebackInterfaceBufferSize,
v->DCFCLK,
v->ReturnBW,
- v->SynchronizedVBlank,
- v->dpte_group_bytes,
- v->MetaChunkSize,
v->UrgentLatency,
v->UrgentExtraLatency,
- v->WritebackLatency,
- v->WritebackChunkSize,
v->SOCCLK,
- v->DRAMClockChangeLatency,
- v->SRExitTime,
- v->SREnterPlusExitTime,
- v->SRExitZ8Time,
- v->SREnterPlusExitZ8Time,
v->DCFCLKDeepSleep,
v->DETBufferSizeY,
v->DETBufferSizeC,
v->SwathHeightY,
v->SwathHeightC,
- v->LBBitPerPixel,
v->SwathWidthY,
v->SwathWidthC,
- v->HRatio,
- v->HRatioChroma,
- v->vtaps,
- v->VTAPsChroma,
- v->VRatio,
- v->VRatioChroma,
- v->HTotal,
- v->PixelClock,
- v->BlendingAndTiming,
v->DPPPerPlane,
v->BytePerPixelDETY,
v->BytePerPixelDETC,
- v->DSTXAfterScaler,
- v->DSTYAfterScaler,
- v->WritebackEnable,
- v->WritebackPixelFormat,
- v->WritebackDestinationWidth,
- v->WritebackDestinationHeight,
- v->WritebackSourceHeight,
v->UnboundedRequestEnabled,
v->CompressedBufferSizeInkByte,
&DRAMClockChangeSupport,
- &v->UrgentWatermark,
- &v->WritebackUrgentWatermark,
- &v->DRAMClockChangeWatermark,
- &v->WritebackDRAMClockChangeWatermark,
&v->StutterExitWatermark,
&v->StutterEnterPlusExitWatermark,
&v->Z8StutterExitWatermark,
- &v->Z8StutterEnterPlusExitWatermark,
- &v->MinActiveDRAMClockChangeLatencySupported);
+ &v->Z8StutterEnterPlusExitWatermark);
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
if (v->WritebackEnable[k] == true) {
@@ -3281,7 +3108,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
v->SurfaceWidthC[k],
v->SurfaceHeightY[k],
v->SurfaceHeightC[k],
- v->DETBufferSizeInKByte[0] * 1024,
+ v->DETBufferSizeInKByte[k] * 1024,
v->BlockHeight256BytesY[k],
v->BlockHeight256BytesC[k],
v->SurfaceTiling[k],
@@ -3460,7 +3287,7 @@ static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
- CalculateBytePerPixelAnd256BBlockSizes(
+ dml30_CalculateBytePerPixelAnd256BBlockSizes(
v->SourcePixelFormat[k],
v->SurfaceTiling[k],
&BytePerPixY[k],
@@ -3476,7 +3303,8 @@ static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
CalculateSwathAndDETConfiguration(
false,
v->NumberOfActivePlanes,
- v->DETBufferSizeInKByte[0],
+ mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0],
+ v->DETBufferSizeInKByte,
dummy1,
dummy2,
v->SourceScan,
@@ -3514,94 +3342,6 @@ static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
&dummysinglestring);
}
-static bool CalculateBytePerPixelAnd256BBlockSizes(
- enum source_format_class SourcePixelFormat,
- enum dm_swizzle_mode SurfaceTiling,
- unsigned int *BytePerPixelY,
- unsigned int *BytePerPixelC,
- double *BytePerPixelDETY,
- double *BytePerPixelDETC,
- unsigned int *BlockHeight256BytesY,
- unsigned int *BlockHeight256BytesC,
- unsigned int *BlockWidth256BytesY,
- unsigned int *BlockWidth256BytesC)
-{
- if (SourcePixelFormat == dm_444_64) {
- *BytePerPixelDETY = 8;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 8;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
- *BytePerPixelDETY = 4;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 4;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_444_16) {
- *BytePerPixelDETY = 2;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 2;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_444_8) {
- *BytePerPixelDETY = 1;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 1;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_rgbe_alpha) {
- *BytePerPixelDETY = 4;
- *BytePerPixelDETC = 1;
- *BytePerPixelY = 4;
- *BytePerPixelC = 1;
- } else if (SourcePixelFormat == dm_420_8) {
- *BytePerPixelDETY = 1;
- *BytePerPixelDETC = 2;
- *BytePerPixelY = 1;
- *BytePerPixelC = 2;
- } else if (SourcePixelFormat == dm_420_12) {
- *BytePerPixelDETY = 2;
- *BytePerPixelDETC = 4;
- *BytePerPixelY = 2;
- *BytePerPixelC = 4;
- } else {
- *BytePerPixelDETY = 4.0 / 3;
- *BytePerPixelDETC = 8.0 / 3;
- *BytePerPixelY = 2;
- *BytePerPixelC = 4;
- }
-
- if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
- || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
- if (SurfaceTiling == dm_sw_linear) {
- *BlockHeight256BytesY = 1;
- } else if (SourcePixelFormat == dm_444_64) {
- *BlockHeight256BytesY = 4;
- } else if (SourcePixelFormat == dm_444_8) {
- *BlockHeight256BytesY = 16;
- } else {
- *BlockHeight256BytesY = 8;
- }
- *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
- *BlockHeight256BytesC = 0;
- *BlockWidth256BytesC = 0;
- } else {
- if (SurfaceTiling == dm_sw_linear) {
- *BlockHeight256BytesY = 1;
- *BlockHeight256BytesC = 1;
- } else if (SourcePixelFormat == dm_rgbe_alpha) {
- *BlockHeight256BytesY = 8;
- *BlockHeight256BytesC = 16;
- } else if (SourcePixelFormat == dm_420_8) {
- *BlockHeight256BytesY = 16;
- *BlockHeight256BytesC = 8;
- } else {
- *BlockHeight256BytesY = 8;
- *BlockHeight256BytesC = 8;
- }
- *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
- *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
- }
- return true;
-}
-
static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
{
if (PrefetchMode == 0) {
@@ -3741,61 +3481,43 @@ static void CalculateRowBandwidth(
static void CalculateFlipSchedule(
struct display_mode_lib *mode_lib,
+ unsigned int k,
double HostVMInefficiencyFactor,
double UrgentExtraLatency,
double UrgentLatency,
- unsigned int GPUVMMaxPageTableLevels,
- bool HostVMEnable,
- unsigned int HostVMMaxNonCachedPageTableLevels,
- bool GPUVMEnable,
- double HostVMMinPageSize,
double PDEAndMetaPTEBytesPerFrame,
double MetaRowBytes,
- double DPTEBytesPerRow,
- double BandwidthAvailableForImmediateFlip,
- unsigned int TotImmediateFlipBytes,
- enum source_format_class SourcePixelFormat,
- double LineTime,
- double VRatio,
- double VRatioChroma,
- double Tno_bw,
- bool DCCEnable,
- unsigned int dpte_row_height,
- unsigned int meta_row_height,
- unsigned int dpte_row_height_chroma,
- unsigned int meta_row_height_chroma,
- double *DestinationLinesToRequestVMInImmediateFlip,
- double *DestinationLinesToRequestRowInImmediateFlip,
- double *final_flip_bw,
- bool *ImmediateFlipSupportedForPipe)
+ double DPTEBytesPerRow)
{
+ struct vba_vars_st *v = &mode_lib->vba;
double min_row_time = 0.0;
unsigned int HostVMDynamicLevelsTrips;
double TimeForFetchingMetaPTEImmediateFlip;
double TimeForFetchingRowInVBlankImmediateFlip;
- double ImmediateFlipBW;
+ double ImmediateFlipBW = 1.0;
+ double LineTime = v->HTotal[k] / v->PixelClock[k];
- if (GPUVMEnable == true && HostVMEnable == true) {
- HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
+ if (v->GPUVMEnable == true && v->HostVMEnable == true) {
+ HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
} else {
HostVMDynamicLevelsTrips = 0;
}
- if (GPUVMEnable == true || DCCEnable == true) {
- ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
+ if (v->GPUVMEnable == true || v->DCCEnable[k] == true) {
+ ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes;
}
- if (GPUVMEnable == true) {
+ if (v->GPUVMEnable == true) {
TimeForFetchingMetaPTEImmediateFlip = dml_max3(
- Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
- UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
+ v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
+ UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
LineTime / 4.0);
} else {
TimeForFetchingMetaPTEImmediateFlip = 0;
}
- *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
- if ((GPUVMEnable == true || DCCEnable == true)) {
+ v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
+ if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
UrgentLatency * (HostVMDynamicLevelsTrips + 1),
@@ -3804,54 +3526,54 @@ static void CalculateFlipSchedule(
TimeForFetchingRowInVBlankImmediateFlip = 0;
}
- *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
+ v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
- if (GPUVMEnable == true) {
- *final_flip_bw = dml_max(
- PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
- (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
- } else if ((GPUVMEnable == true || DCCEnable == true)) {
- *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
+ if (v->GPUVMEnable == true) {
+ v->final_flip_bw[k] = dml_max(
+ PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime),
+ (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime));
+ } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
+ v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime);
} else {
- *final_flip_bw = 0;
+ v->final_flip_bw[k] = 0;
}
- if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
- if (GPUVMEnable == true && DCCEnable != true) {
- min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
- } else if (GPUVMEnable != true && DCCEnable == true) {
- min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
+ if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
+ if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
+ min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
+ } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
+ min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
} else {
min_row_time = dml_min4(
- dpte_row_height * LineTime / VRatio,
- meta_row_height * LineTime / VRatio,
- dpte_row_height_chroma * LineTime / VRatioChroma,
- meta_row_height_chroma * LineTime / VRatioChroma);
+ v->dpte_row_height[k] * LineTime / v->VRatio[k],
+ v->meta_row_height[k] * LineTime / v->VRatio[k],
+ v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k],
+ v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
}
} else {
- if (GPUVMEnable == true && DCCEnable != true) {
- min_row_time = dpte_row_height * LineTime / VRatio;
- } else if (GPUVMEnable != true && DCCEnable == true) {
- min_row_time = meta_row_height * LineTime / VRatio;
+ if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
+ min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k];
+ } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
+ min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k];
} else {
- min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
+ min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]);
}
}
- if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
+ if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16
|| TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
- *ImmediateFlipSupportedForPipe = false;
+ v->ImmediateFlipSupportedForPipe[k] = false;
} else {
- *ImmediateFlipSupportedForPipe = true;
+ v->ImmediateFlipSupportedForPipe[k] = true;
}
#ifdef __DML_VBA_DEBUG__
- dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip);
- dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip);
+ dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]);
+ dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]);
dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
- dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
+ dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]);
#endif
}
@@ -3884,7 +3606,7 @@ static double TruncToValidBPP(
NonDSCBPP1 = 15;
NonDSCBPP2 = 18;
MinDSCBPP = 6;
- MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
+ MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
} else if (Format == dm_444) {
NonDSCBPP0 = 24;
NonDSCBPP1 = 30;
@@ -3946,9 +3668,120 @@ static double TruncToValidBPP(
return DesiredBPP;
}
}
- return BPP_INVALID;
}
+static noinline void CalculatePrefetchSchedulePerPlane(
+ struct display_mode_lib *mode_lib,
+ double HostVMInefficiencyFactor,
+ int i,
+ unsigned j,
+ unsigned k)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ Pipe myPipe;
+
+ myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
+ myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
+ myPipe.PixelClock = v->PixelClock[k];
+ myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
+ myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
+ myPipe.ScalerEnabled = v->ScalerEnabled[k];
+ myPipe.VRatio = mode_lib->vba.VRatio[k];
+ myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k];
+
+ myPipe.SourceScan = v->SourceScan[k];
+ myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
+ myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
+ myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
+ myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
+ myPipe.InterlaceEnable = v->Interlace[k];
+ myPipe.NumberOfCursors = v->NumberOfCursors[k];
+ myPipe.VBlank = v->VTotal[k] - v->VActive[k];
+ myPipe.HTotal = v->HTotal[k];
+ myPipe.DCCEnable = v->DCCEnable[k];
+ myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
+ || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
+ myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
+ myPipe.BytePerPixelY = v->BytePerPixelY[k];
+ myPipe.BytePerPixelC = v->BytePerPixelC[k];
+ myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
+ v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
+ mode_lib,
+ HostVMInefficiencyFactor,
+ &myPipe,
+ v->DSCDelayPerState[i][k],
+ v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
+ v->DPPCLKDelaySCL,
+ v->DPPCLKDelaySCLLBOnly,
+ v->DPPCLKDelayCNVCCursor,
+ v->DISPCLKDelaySubtotal,
+ v->SwathWidthYThisState[k] / v->HRatio[k],
+ v->OutputFormat[k],
+ v->MaxInterDCNTileRepeaters,
+ dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
+ v->MaximumVStartup[i][j][k],
+ v->GPUVMMaxPageTableLevels,
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->HostVMMaxNonCachedPageTableLevels,
+ v->HostVMMinPageSize,
+ v->DynamicMetadataEnable[k],
+ v->DynamicMetadataVMEnabled,
+ v->DynamicMetadataLinesBeforeActiveRequired[k],
+ v->DynamicMetadataTransmittedBytes[k],
+ v->UrgLatency[i],
+ v->ExtraLatency,
+ v->TimeCalc,
+ v->PDEAndMetaPTEBytesPerFrame[i][j][k],
+ v->MetaRowBytes[i][j][k],
+ v->DPTEBytesPerRow[i][j][k],
+ v->PrefetchLinesY[i][j][k],
+ v->SwathWidthYThisState[k],
+ v->PrefillY[k],
+ v->MaxNumSwY[k],
+ v->PrefetchLinesC[i][j][k],
+ v->SwathWidthCThisState[k],
+ v->PrefillC[k],
+ v->MaxNumSwC[k],
+ v->swath_width_luma_ub_this_state[k],
+ v->swath_width_chroma_ub_this_state[k],
+ v->SwathHeightYThisState[k],
+ v->SwathHeightCThisState[k],
+ v->TWait,
+ &v->DSTXAfterScaler[k],
+ &v->DSTYAfterScaler[k],
+ &v->LineTimesForPrefetch[k],
+ &v->PrefetchBW[k],
+ &v->LinesForMetaPTE[k],
+ &v->LinesForMetaAndDPTERow[k],
+ &v->VRatioPreY[i][j][k],
+ &v->VRatioPreC[i][j][k],
+ &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
+ &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
+ &v->NoTimeForDynamicMetadata[i][j][k],
+ &v->Tno_bw[k],
+ &v->prefetch_vmrow_bw[k],
+ &v->dummy7[k],
+ &v->dummy8[k],
+ &v->dummy13[k],
+ &v->VUpdateOffsetPix[k],
+ &v->VUpdateWidthPix[k],
+ &v->VReadyOffsetPix[k]);
+}
+
+static void PatchDETBufferSizeInKByte(unsigned int NumberOfActivePlanes, int NoOfDPPThisState[], unsigned int config_return_buffer_size_in_kbytes, unsigned int DETBufferSizeInKByte[])
+{
+ int i, total_pipes = 0;
+ for (i = 0; i < NumberOfActivePlanes; i++)
+ total_pipes += NoOfDPPThisState[i];
+ DETBufferSizeInKByte[0] = ((config_return_buffer_size_in_kbytes - DCN3_15_MIN_COMPBUF_SIZE_KB) / 64 / total_pipes) * 64;
+ if (DETBufferSizeInKByte[0] > DCN3_15_MAX_DET_SIZE)
+ DETBufferSizeInKByte[0] = DCN3_15_MAX_DET_SIZE;
+ for (i = 1; i < NumberOfActivePlanes; i++)
+ DETBufferSizeInKByte[i] = DETBufferSizeInKByte[0];
+}
+
+
void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
{
struct vba_vars_st *v = &mode_lib->vba;
@@ -4012,7 +3845,7 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
/*Bandwidth Support Check*/
for (k = 0; k < v->NumberOfActivePlanes; k++) {
- CalculateBytePerPixelAnd256BBlockSizes(
+ dml30_CalculateBytePerPixelAnd256BBlockSizes(
v->SourcePixelFormat[k],
v->SurfaceTiling[k],
&v->BytePerPixelY[k],
@@ -4185,7 +4018,8 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
CalculateSwathAndDETConfiguration(
true,
v->NumberOfActivePlanes,
- v->DETBufferSizeInKByte[0],
+ mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0],
+ v->DETBufferSizeInKByte,
v->MaximumSwathWidthLuma,
v->MaximumSwathWidthChroma,
v->SourceScan,
@@ -4256,6 +4090,7 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
if (v->ODMCombinePolicy == dm_odm_combine_policy_none
|| !(v->Output[k] == dm_dp ||
+ v->Output[k] == dm_dp2p0 ||
v->Output[k] == dm_edp)) {
v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
@@ -4288,7 +4123,9 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
}
if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN31_MAX_FMT_420_BUFFER_WIDTH
&& v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
- if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) {
+ if (v->Output[k] == dm_hdmi) {
+ FMTBufferExceeded = true;
+ } else if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) {
v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
@@ -4324,6 +4161,10 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
|| (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
v->DISPCLK_DPPCLK_Support[i][j] = false;
}
+ if (mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[k] > DCN3_15_MAX_DET_SIZE && v->NoOfDPP[i][j][k] < 2) {
+ v->MPCCombine[i][j][k] = true;
+ v->NoOfDPP[i][j][k] = 2;
+ }
}
v->TotalNumberOfActiveDPP[i][j] = 0;
v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
@@ -4465,11 +4306,11 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
v->AudioSampleRate[k],
v->AudioSampleLayout[k],
v->ODMCombineEnablePerState[i][k]);
- } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) {
+ } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_dp2p0) {
if (v->DSCEnable[k] == true) {
v->RequiresDSC[i][k] = true;
v->LinkDSCEnable = true;
- if (v->Output[k] == dm_dp) {
+ if (v->Output[k] == dm_dp || v->Output[k] == dm_dp2p0) {
v->RequiresFEC[i][k] = true;
} else {
v->RequiresFEC[i][k] = false;
@@ -4477,107 +4318,201 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
} else {
v->RequiresDSC[i][k] = false;
v->LinkDSCEnable = false;
- v->RequiresFEC[i][k] = false;
- }
-
- v->Outbpp = BPP_INVALID;
- if (v->PHYCLKPerState[i] >= 270.0) {
- v->Outbpp = TruncToValidBPP(
- (1.0 - v->Downspreading / 100.0) * 2700,
- v->OutputLinkDPLanes[k],
- v->HTotal[k],
- v->HActive[k],
- v->PixelClockBackEnd[k],
- v->ForcedOutputLinkBPP[k],
- v->LinkDSCEnable,
- v->Output[k],
- v->OutputFormat[k],
- v->DSCInputBitPerComponent[k],
- v->NumberOfDSCSlices[k],
- v->AudioSampleRate[k],
- v->AudioSampleLayout[k],
- v->ODMCombineEnablePerState[i][k]);
- v->OutputBppPerState[i][k] = v->Outbpp;
- // TODO: Need some other way to handle this nonsense
- // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
- }
- if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
- v->Outbpp = TruncToValidBPP(
- (1.0 - v->Downspreading / 100.0) * 5400,
- v->OutputLinkDPLanes[k],
- v->HTotal[k],
- v->HActive[k],
- v->PixelClockBackEnd[k],
- v->ForcedOutputLinkBPP[k],
- v->LinkDSCEnable,
- v->Output[k],
- v->OutputFormat[k],
- v->DSCInputBitPerComponent[k],
- v->NumberOfDSCSlices[k],
- v->AudioSampleRate[k],
- v->AudioSampleLayout[k],
- v->ODMCombineEnablePerState[i][k]);
- v->OutputBppPerState[i][k] = v->Outbpp;
- // TODO: Need some other way to handle this nonsense
- // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
- }
- if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
- v->Outbpp = TruncToValidBPP(
- (1.0 - v->Downspreading / 100.0) * 8100,
- v->OutputLinkDPLanes[k],
- v->HTotal[k],
- v->HActive[k],
- v->PixelClockBackEnd[k],
- v->ForcedOutputLinkBPP[k],
- v->LinkDSCEnable,
- v->Output[k],
- v->OutputFormat[k],
- v->DSCInputBitPerComponent[k],
- v->NumberOfDSCSlices[k],
- v->AudioSampleRate[k],
- v->AudioSampleLayout[k],
- v->ODMCombineEnablePerState[i][k]);
- v->OutputBppPerState[i][k] = v->Outbpp;
- // TODO: Need some other way to handle this nonsense
- // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
- }
- if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 10000.0 / 18) {
- v->Outbpp = TruncToValidBPP(
- (1.0 - v->Downspreading / 100.0) * 10000,
- 4,
- v->HTotal[k],
- v->HActive[k],
- v->PixelClockBackEnd[k],
- v->ForcedOutputLinkBPP[k],
- v->LinkDSCEnable,
- v->Output[k],
- v->OutputFormat[k],
- v->DSCInputBitPerComponent[k],
- v->NumberOfDSCSlices[k],
- v->AudioSampleRate[k],
- v->AudioSampleLayout[k],
- v->ODMCombineEnablePerState[i][k]);
- v->OutputBppPerState[i][k] = v->Outbpp;
- //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "10x4";
+ if (v->Output[k] == dm_dp2p0) {
+ v->RequiresFEC[i][k] = true;
+ } else {
+ v->RequiresFEC[i][k] = false;
+ }
}
- if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 12000.0 / 18) {
- v->Outbpp = TruncToValidBPP(
- 12000,
- 4,
- v->HTotal[k],
- v->HActive[k],
- v->PixelClockBackEnd[k],
- v->ForcedOutputLinkBPP[k],
- v->LinkDSCEnable,
- v->Output[k],
- v->OutputFormat[k],
- v->DSCInputBitPerComponent[k],
- v->NumberOfDSCSlices[k],
- v->AudioSampleRate[k],
- v->AudioSampleLayout[k],
- v->ODMCombineEnablePerState[i][k]);
- v->OutputBppPerState[i][k] = v->Outbpp;
- //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "12x4";
+ if (v->Output[k] == dm_dp2p0) {
+ v->Outbpp = BPP_INVALID;
+ if ((v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr10) &&
+ v->PHYCLKD18PerState[k] >= 10000.0 / 18.0) {
+ v->Outbpp = TruncToValidBPP(
+ (1.0 - v->Downspreading / 100.0) * 10000,
+ v->OutputLinkDPLanes[k],
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 13500.0 / 18.0 &&
+ v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
+ v->RequiresDSC[i][k] = true;
+ v->LinkDSCEnable = true;
+ v->Outbpp = TruncToValidBPP(
+ (1.0 - v->Downspreading / 100.0) * 10000,
+ v->OutputLinkDPLanes[k],
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ }
+ v->OutputBppPerState[i][k] = v->Outbpp;
+ // TODO: Need some other way to handle this nonsense
+ // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR10"
+ }
+ if (v->Outbpp == BPP_INVALID &&
+ (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr13p5) &&
+ v->PHYCLKD18PerState[k] >= 13500.0 / 18.0) {
+ v->Outbpp = TruncToValidBPP(
+ (1.0 - v->Downspreading / 100.0) * 13500,
+ v->OutputLinkDPLanes[k],
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 20000.0 / 18.0 &&
+ v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
+ v->RequiresDSC[i][k] = true;
+ v->LinkDSCEnable = true;
+ v->Outbpp = TruncToValidBPP(
+ (1.0 - v->Downspreading / 100.0) * 13500,
+ v->OutputLinkDPLanes[k],
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ }
+ v->OutputBppPerState[i][k] = v->Outbpp;
+ // TODO: Need some other way to handle this nonsense
+ // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR13p5"
+ }
+ if (v->Outbpp == BPP_INVALID &&
+ (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr20) &&
+ v->PHYCLKD18PerState[k] >= 20000.0 / 18.0) {
+ v->Outbpp = TruncToValidBPP(
+ (1.0 - v->Downspreading / 100.0) * 20000,
+ v->OutputLinkDPLanes[k],
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ if (v->Outbpp == BPP_INVALID && v->DSCEnable[k] == true &&
+ v->ForcedOutputLinkBPP[k] == 0) {
+ v->RequiresDSC[i][k] = true;
+ v->LinkDSCEnable = true;
+ v->Outbpp = TruncToValidBPP(
+ (1.0 - v->Downspreading / 100.0) * 20000,
+ v->OutputLinkDPLanes[k],
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ }
+ v->OutputBppPerState[i][k] = v->Outbpp;
+ // TODO: Need some other way to handle this nonsense
+ // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR20"
+ }
+ } else {
+ v->Outbpp = BPP_INVALID;
+ if (v->PHYCLKPerState[i] >= 270.0) {
+ v->Outbpp = TruncToValidBPP(
+ (1.0 - v->Downspreading / 100.0) * 2700,
+ v->OutputLinkDPLanes[k],
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ v->OutputBppPerState[i][k] = v->Outbpp;
+ // TODO: Need some other way to handle this nonsense
+ // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
+ }
+ if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
+ v->Outbpp = TruncToValidBPP(
+ (1.0 - v->Downspreading / 100.0) * 5400,
+ v->OutputLinkDPLanes[k],
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ v->OutputBppPerState[i][k] = v->Outbpp;
+ // TODO: Need some other way to handle this nonsense
+ // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
+ }
+ if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
+ v->Outbpp = TruncToValidBPP(
+ (1.0 - v->Downspreading / 100.0) * 8100,
+ v->OutputLinkDPLanes[k],
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ v->OutputBppPerState[i][k] = v->Outbpp;
+ // TODO: Need some other way to handle this nonsense
+ // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
+ }
}
}
} else {
@@ -4706,10 +4641,13 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
}
+ if (v->NumberOfActivePlanes > 1 && mode_lib->project == DML_PROJECT_DCN315 && !v->DETSizeOverride[0])
+ PatchDETBufferSizeInKByte(v->NumberOfActivePlanes, v->NoOfDPPThisState, v->ip.config_return_buffer_size_in_kbytes, v->DETBufferSizeInKByte);
CalculateSwathAndDETConfiguration(
false,
v->NumberOfActivePlanes,
- v->DETBufferSizeInKByte[0],
+ mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0],
+ v->DETBufferSizeInKByte,
v->MaximumSwathWidthLuma,
v->MaximumSwathWidthChroma,
v->SourceScan,
@@ -4945,6 +4883,17 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
&v->meta_row_bandwidth[i][j][k],
&v->dpte_row_bandwidth[i][j][k]);
}
+ /*DCCMetaBufferSizeSupport(i, j) = True
+ For k = 0 To NumberOfActivePlanes - 1
+ If MetaRowBytes(i, j, k) > 24064 Then
+ DCCMetaBufferSizeSupport(i, j) = False
+ End If
+ Next k*/
+ v->DCCMetaBufferSizeSupport[i][j] = true;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->MetaRowBytes[i][j][k] > 24064)
+ v->DCCMetaBufferSizeSupport[i][j] = false;
+ }
v->UrgLatency[i] = CalculateUrgentLatency(
v->UrgentLatencyPixelDataOnly,
v->UrgentLatencyPixelMixedWithVMData,
@@ -5079,66 +5028,8 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
}
}
- if (v->UseMinimumRequiredDCFCLK == true) {
- UseMinimumDCFCLK(
- mode_lib,
- v->MaxInterDCNTileRepeaters,
- MaxPrefetchMode,
- v->DRAMClockChangeLatency,
- v->SREnterPlusExitTime,
- v->ReturnBusWidth,
- v->RoundTripPingLatencyCycles,
- ReorderingBytes,
- v->PixelChunkSizeInKByte,
- v->MetaChunkSize,
- v->GPUVMEnable,
- v->GPUVMMaxPageTableLevels,
- v->HostVMEnable,
- v->NumberOfActivePlanes,
- v->HostVMMinPageSize,
- v->HostVMMaxNonCachedPageTableLevels,
- v->DynamicMetadataVMEnabled,
- v->ImmediateFlipRequirement[0],
- v->ProgressiveToInterlaceUnitInOPP,
- v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation,
- v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency,
- v->VTotal,
- v->VActive,
- v->DynamicMetadataTransmittedBytes,
- v->DynamicMetadataLinesBeforeActiveRequired,
- v->Interlace,
- v->RequiredDPPCLK,
- v->RequiredDISPCLK,
- v->UrgLatency,
- v->NoOfDPP,
- v->ProjectedDCFCLKDeepSleep,
- v->MaximumVStartup,
- v->TotalVActivePixelBandwidth,
- v->TotalVActiveCursorBandwidth,
- v->TotalMetaRowBandwidth,
- v->TotalDPTERowBandwidth,
- v->TotalNumberOfActiveDPP,
- v->TotalNumberOfDCCActiveDPP,
- v->dpte_group_bytes,
- v->PrefetchLinesY,
- v->PrefetchLinesC,
- v->swath_width_luma_ub_all_states,
- v->swath_width_chroma_ub_all_states,
- v->BytePerPixelY,
- v->BytePerPixelC,
- v->HTotal,
- v->PixelClock,
- v->PDEAndMetaPTEBytesPerFrame,
- v->DPTEBytesPerRow,
- v->MetaRowBytes,
- v->DynamicMetadataEnable,
- v->VActivePixelBandwidth,
- v->VActiveCursorBandwidth,
- v->ReadBandwidthLuma,
- v->ReadBandwidthChroma,
- v->DCFCLKPerState,
- v->DCFCLKState);
- }
+ if (v->UseMinimumRequiredDCFCLK == true)
+ UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes);
for (i = 0; i < v->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
@@ -5276,92 +5167,9 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
v->SREnterPlusExitTime);
for (k = 0; k < v->NumberOfActivePlanes; k++) {
- Pipe myPipe;
-
- myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
- myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
- myPipe.PixelClock = v->PixelClock[k];
- myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
- myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
- myPipe.ScalerEnabled = v->ScalerEnabled[k];
- myPipe.SourceScan = v->SourceScan[k];
- myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
- myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
- myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
- myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
- myPipe.InterlaceEnable = v->Interlace[k];
- myPipe.NumberOfCursors = v->NumberOfCursors[k];
- myPipe.VBlank = v->VTotal[k] - v->VActive[k];
- myPipe.HTotal = v->HTotal[k];
- myPipe.DCCEnable = v->DCCEnable[k];
- myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
- || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
- myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
- myPipe.BytePerPixelY = v->BytePerPixelY[k];
- myPipe.BytePerPixelC = v->BytePerPixelC[k];
- myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
- v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
- mode_lib,
- HostVMInefficiencyFactor,
- &myPipe,
- v->DSCDelayPerState[i][k],
- v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
- v->DPPCLKDelaySCL,
- v->DPPCLKDelaySCLLBOnly,
- v->DPPCLKDelayCNVCCursor,
- v->DISPCLKDelaySubtotal,
- v->SwathWidthYThisState[k] / v->HRatio[k],
- v->OutputFormat[k],
- v->MaxInterDCNTileRepeaters,
- dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
- v->MaximumVStartup[i][j][k],
- v->GPUVMMaxPageTableLevels,
- v->GPUVMEnable,
- v->HostVMEnable,
- v->HostVMMaxNonCachedPageTableLevels,
- v->HostVMMinPageSize,
- v->DynamicMetadataEnable[k],
- v->DynamicMetadataVMEnabled,
- v->DynamicMetadataLinesBeforeActiveRequired[k],
- v->DynamicMetadataTransmittedBytes[k],
- v->UrgLatency[i],
- v->ExtraLatency,
- v->TimeCalc,
- v->PDEAndMetaPTEBytesPerFrame[i][j][k],
- v->MetaRowBytes[i][j][k],
- v->DPTEBytesPerRow[i][j][k],
- v->PrefetchLinesY[i][j][k],
- v->SwathWidthYThisState[k],
- v->PrefillY[k],
- v->MaxNumSwY[k],
- v->PrefetchLinesC[i][j][k],
- v->SwathWidthCThisState[k],
- v->PrefillC[k],
- v->MaxNumSwC[k],
- v->swath_width_luma_ub_this_state[k],
- v->swath_width_chroma_ub_this_state[k],
- v->SwathHeightYThisState[k],
- v->SwathHeightCThisState[k],
- v->TWait,
- &v->DSTXAfterScaler[k],
- &v->DSTYAfterScaler[k],
- &v->LineTimesForPrefetch[k],
- &v->PrefetchBW[k],
- &v->LinesForMetaPTE[k],
- &v->LinesForMetaAndDPTERow[k],
- &v->VRatioPreY[i][j][k],
- &v->VRatioPreC[i][j][k],
- &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
- &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
- &v->NoTimeForDynamicMetadata[i][j][k],
- &v->Tno_bw[k],
- &v->prefetch_vmrow_bw[k],
- &v->dummy7[k],
- &v->dummy8[k],
- &v->dummy13[k],
- &v->VUpdateOffsetPix[k],
- &v->VUpdateWidthPix[k],
- &v->VReadyOffsetPix[k]);
+ CalculatePrefetchSchedulePerPlane(mode_lib,
+ HostVMInefficiencyFactor,
+ i, j, k);
}
for (k = 0; k < v->NumberOfActivePlanes; k++) {
@@ -5371,7 +5179,7 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
v->SwathHeightYThisState[k],
v->SwathHeightCThisState[k],
v->HTotal[k] / v->PixelClock[k],
- v->UrgentLatency,
+ v->UrgLatency[i],
v->CursorBufferSize,
v->CursorWidth[k][0],
v->CursorBPP[k][0],
@@ -5383,7 +5191,7 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
v->DETBufferSizeCThisState[k],
&v->UrgentBurstFactorCursorPre[k],
&v->UrgentBurstFactorLumaPre[k],
- &v->UrgentBurstFactorChroma[k],
+ &v->UrgentBurstFactorChromaPre[k],
&v->NotUrgentLatencyHidingPre[k]);
}
@@ -5466,40 +5274,20 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
v->TotImmediateFlipBytes = 0.0;
for (k = 0; k < v->NumberOfActivePlanes; k++) {
v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
- + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
- + v->DPTEBytesPerRow[i][j][k];
+ + v->NoOfDPP[i][j][k] * (v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
+ + v->DPTEBytesPerRow[i][j][k]);
}
for (k = 0; k < v->NumberOfActivePlanes; k++) {
CalculateFlipSchedule(
mode_lib,
+ k,
HostVMInefficiencyFactor,
v->ExtraLatency,
v->UrgLatency[i],
- v->GPUVMMaxPageTableLevels,
- v->HostVMEnable,
- v->HostVMMaxNonCachedPageTableLevels,
- v->GPUVMEnable,
- v->HostVMMinPageSize,
v->PDEAndMetaPTEBytesPerFrame[i][j][k],
v->MetaRowBytes[i][j][k],
- v->DPTEBytesPerRow[i][j][k],
- v->BandwidthAvailableForImmediateFlip,
- v->TotImmediateFlipBytes,
- v->SourcePixelFormat[k],
- v->HTotal[k] / v->PixelClock[k],
- v->VRatio[k],
- v->VRatioChroma[k],
- v->Tno_bw[k],
- v->DCCEnable[k],
- v->dpte_row_height[k],
- v->meta_row_height[k],
- v->dpte_row_height_chroma[k],
- v->meta_row_height_chroma[k],
- &v->DestinationLinesToRequestVMInImmediateFlip[k],
- &v->DestinationLinesToRequestRowInImmediateFlip[k],
- &v->final_flip_bw[k],
- &v->ImmediateFlipSupportedForPipe[k]);
+ v->DPTEBytesPerRow[i][j][k]);
}
v->total_dcn_read_bw_with_flip = 0.0;
for (k = 0; k < v->NumberOfActivePlanes; k++) {
@@ -5557,64 +5345,28 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
CalculateWatermarksAndDRAMSpeedChangeSupport(
mode_lib,
v->PrefetchModePerState[i][j],
- v->NumberOfActivePlanes,
- v->MaxLineBufferLines,
- v->LineBufferSize,
- v->WritebackInterfaceBufferSize,
v->DCFCLKState[i][j],
v->ReturnBWPerState[i][j],
- v->SynchronizedVBlank,
- v->dpte_group_bytes,
- v->MetaChunkSize,
v->UrgLatency[i],
v->ExtraLatency,
- v->WritebackLatency,
- v->WritebackChunkSize,
v->SOCCLKPerState[i],
- v->DRAMClockChangeLatency,
- v->SRExitTime,
- v->SREnterPlusExitTime,
- v->SRExitZ8Time,
- v->SREnterPlusExitZ8Time,
v->ProjectedDCFCLKDeepSleep[i][j],
v->DETBufferSizeYThisState,
v->DETBufferSizeCThisState,
v->SwathHeightYThisState,
v->SwathHeightCThisState,
- v->LBBitPerPixel,
v->SwathWidthYThisState,
v->SwathWidthCThisState,
- v->HRatio,
- v->HRatioChroma,
- v->vtaps,
- v->VTAPsChroma,
- v->VRatio,
- v->VRatioChroma,
- v->HTotal,
- v->PixelClock,
- v->BlendingAndTiming,
v->NoOfDPPThisState,
v->BytePerPixelInDETY,
v->BytePerPixelInDETC,
- v->DSTXAfterScaler,
- v->DSTYAfterScaler,
- v->WritebackEnable,
- v->WritebackPixelFormat,
- v->WritebackDestinationWidth,
- v->WritebackDestinationHeight,
- v->WritebackSourceHeight,
UnboundedRequestEnabledThisState,
CompressedBufferSizeInkByteThisState,
&v->DRAMClockChangeSupport[i][j],
- &v->UrgentWatermark,
- &v->WritebackUrgentWatermark,
- &v->DRAMClockChangeWatermark,
- &v->WritebackDRAMClockChangeWatermark,
&dummy,
&dummy,
&dummy,
- &dummy,
- &v->MinActiveDRAMClockChangeLatencySupported);
+ &dummy);
}
}
@@ -5705,6 +5457,58 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
v->ModeSupport[i][j] = true;
} else {
v->ModeSupport[i][j] = false;
+#ifdef __DML_VBA_DEBUG__
+ if (v->ScaleRatioAndTapsSupport == false)
+ dml_print("DML SUPPORT: ScaleRatioAndTapsSupport failed");
+ if (v->SourceFormatPixelAndScanSupport == false)
+ dml_print("DML SUPPORT: SourceFormatPixelAndScanSupport failed");
+ if (v->ViewportSizeSupport[i][j] == false)
+ dml_print("DML SUPPORT: ViewportSizeSupport failed");
+ if (v->LinkCapacitySupport[i] == false)
+ dml_print("DML SUPPORT: LinkCapacitySupport failed");
+ if (v->ODMCombine4To1SupportCheckOK[i] == false)
+ dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
+ if (v->NotEnoughDSCUnits[i] == true)
+ dml_print("DML SUPPORT: NotEnoughDSCUnits");
+ if (v->DTBCLKRequiredMoreThanSupported[i] == true)
+ dml_print("DML SUPPORT: DTBCLKRequiredMoreThanSupported");
+ if (v->ROBSupport[i][j] == false)
+ dml_print("DML SUPPORT: ROBSupport failed");
+ if (v->DISPCLK_DPPCLK_Support[i][j] == false)
+ dml_print("DML SUPPORT: DISPCLK_DPPCLK_Support failed");
+ if (v->TotalAvailablePipesSupport[i][j] == false)
+ dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
+ if (EnoughWritebackUnits == false)
+ dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
+ if (v->WritebackLatencySupport == false)
+ dml_print("DML SUPPORT: WritebackLatencySupport failed");
+ if (v->WritebackScaleRatioAndTapsSupport == false)
+ dml_print("DML SUPPORT: DSC422NativeNotSupported ");
+ if (v->CursorSupport == false)
+ dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
+ if (v->PitchSupport == false)
+ dml_print("DML SUPPORT: PitchSupport failed");
+ if (ViewportExceedsSurface == true)
+ dml_print("DML SUPPORT: ViewportExceedsSurface failed");
+ if (v->PrefetchSupported[i][j] == false)
+ dml_print("DML SUPPORT: PrefetchSupported failed");
+ if (v->DynamicMetadataSupported[i][j] == false)
+ dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
+ if (v->TotalVerticalActiveBandwidthSupport[i][j] == false)
+ dml_print("DML SUPPORT: TotalVerticalActiveBandwidthSupport failed");
+ if (v->VRatioInPrefetchSupported[i][j] == false)
+ dml_print("DML SUPPORT: VRatioInPrefetchSupported failed");
+ if (v->PTEBufferSizeNotExceeded[i][j] == false)
+ dml_print("DML SUPPORT: PTEBufferSizeNotExceeded failed");
+ if (v->NonsupportedDSCInputBPC == true)
+ dml_print("DML SUPPORT: NonsupportedDSCInputBPC failed");
+ if (!((v->HostVMEnable == false
+ && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
+ || v->ImmediateFlipSupportedForState[i][j] == true))
+ dml_print("DML SUPPORT: ImmediateFlipRequirement failed");
+ if (FMTBufferExceeded == true)
+ dml_print("DML SUPPORT: FMTBufferExceeded failed");
+#endif
}
}
}
@@ -5739,64 +5543,28 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
static void CalculateWatermarksAndDRAMSpeedChangeSupport(
struct display_mode_lib *mode_lib,
unsigned int PrefetchMode,
- unsigned int NumberOfActivePlanes,
- unsigned int MaxLineBufferLines,
- unsigned int LineBufferSize,
- unsigned int WritebackInterfaceBufferSize,
double DCFCLK,
double ReturnBW,
- bool SynchronizedVBlank,
- unsigned int dpte_group_bytes[],
- unsigned int MetaChunkSize,
double UrgentLatency,
double ExtraLatency,
- double WritebackLatency,
- double WritebackChunkSize,
double SOCCLK,
- double DRAMClockChangeLatency,
- double SRExitTime,
- double SREnterPlusExitTime,
- double SRExitZ8Time,
- double SREnterPlusExitZ8Time,
double DCFCLKDeepSleep,
unsigned int DETBufferSizeY[],
unsigned int DETBufferSizeC[],
unsigned int SwathHeightY[],
unsigned int SwathHeightC[],
- unsigned int LBBitPerPixel[],
double SwathWidthY[],
double SwathWidthC[],
- double HRatio[],
- double HRatioChroma[],
- unsigned int vtaps[],
- unsigned int VTAPsChroma[],
- double VRatio[],
- double VRatioChroma[],
- unsigned int HTotal[],
- double PixelClock[],
- unsigned int BlendingAndTiming[],
unsigned int DPPPerPlane[],
double BytePerPixelDETY[],
double BytePerPixelDETC[],
- double DSTXAfterScaler[],
- double DSTYAfterScaler[],
- bool WritebackEnable[],
- enum source_format_class WritebackPixelFormat[],
- double WritebackDestinationWidth[],
- double WritebackDestinationHeight[],
- double WritebackSourceHeight[],
bool UnboundedRequestEnabled,
int unsigned CompressedBufferSizeInkByte,
enum clock_change_support *DRAMClockChangeSupport,
- double *UrgentWatermark,
- double *WritebackUrgentWatermark,
- double *DRAMClockChangeWatermark,
- double *WritebackDRAMClockChangeWatermark,
double *StutterExitWatermark,
double *StutterEnterPlusExitWatermark,
double *Z8StutterExitWatermark,
- double *Z8StutterEnterPlusExitWatermark,
- double *MinActiveDRAMClockChangeLatencySupported)
+ double *Z8StutterEnterPlusExitWatermark)
{
struct vba_vars_st *v = &mode_lib->vba;
double EffectiveLBLatencyHidingY;
@@ -5816,103 +5584,103 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
double TotalPixelBW = 0.0;
int k, j;
- *UrgentWatermark = UrgentLatency + ExtraLatency;
+ v->UrgentWatermark = UrgentLatency + ExtraLatency;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
- dml_print("DML::%s: UrgentWatermark = %f\n", __func__, *UrgentWatermark);
+ dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark);
#endif
- *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
+ v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark;
#ifdef __DML_VBA_DEBUG__
- dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, DRAMClockChangeLatency);
- dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, *DRAMClockChangeWatermark);
+ dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency);
+ dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark);
#endif
v->TotalActiveWriteback = 0;
- for (k = 0; k < NumberOfActivePlanes; ++k) {
- if (WritebackEnable[k] == true) {
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->WritebackEnable[k] == true) {
v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
}
}
if (v->TotalActiveWriteback <= 1) {
- *WritebackUrgentWatermark = WritebackLatency;
+ v->WritebackUrgentWatermark = v->WritebackLatency;
} else {
- *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
+ v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
}
if (v->TotalActiveWriteback <= 1) {
- *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
+ v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency;
} else {
- *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
+ v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
}
- for (k = 0; k < NumberOfActivePlanes; ++k) {
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
TotalPixelBW = TotalPixelBW
- + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k])
- / (HTotal[k] / PixelClock[k]);
+ + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k])
+ / (v->HTotal[k] / v->PixelClock[k]);
}
- for (k = 0; k < NumberOfActivePlanes; ++k) {
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
double EffectiveDETBufferSizeY = DETBufferSizeY[k];
v->LBLatencyHidingSourceLinesY = dml_min(
- (double) MaxLineBufferLines,
- dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1);
+ (double) v->MaxLineBufferLines,
+ dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
v->LBLatencyHidingSourceLinesC = dml_min(
- (double) MaxLineBufferLines,
- dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1);
+ (double) v->MaxLineBufferLines,
+ dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
- EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]);
+ EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
- EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
+ EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
if (UnboundedRequestEnabled) {
EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
- + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] / (HTotal[k] / PixelClock[k]) / TotalPixelBW;
+ + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
}
LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
- FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
+ FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
if (BytePerPixelDETC[k] > 0) {
LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
- FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k];
+ FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k];
} else {
LinesInDETC = 0;
FullDETBufferingTimeC = 999999;
}
ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
- - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark;
+ - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
- if (NumberOfActivePlanes > 1) {
+ if (v->NumberOfActivePlanes > 1) {
ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
- - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
+ - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k];
}
if (BytePerPixelDETC[k] > 0) {
ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
- - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark;
+ - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
- if (NumberOfActivePlanes > 1) {
+ if (v->NumberOfActivePlanes > 1) {
ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
- - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k];
+ - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k];
}
v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
} else {
v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
}
- if (WritebackEnable[k] == true) {
- WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024
- / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
- if (WritebackPixelFormat[k] == dm_444_64) {
+ if (v->WritebackEnable[k] == true) {
+ WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024
+ / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
+ if (v->WritebackPixelFormat[k] == dm_444_64) {
WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
}
WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
@@ -5922,14 +5690,14 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
v->MinActiveDRAMClockChangeMargin = 999999;
PlaneWithMinActiveDRAMClockChangeMargin = 0;
- for (k = 0; k < NumberOfActivePlanes; ++k) {
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
- if (BlendingAndTiming[k] == k) {
+ if (v->BlendingAndTiming[k] == k) {
PlaneWithMinActiveDRAMClockChangeMargin = k;
} else {
- for (j = 0; j < NumberOfActivePlanes; ++j) {
- if (BlendingAndTiming[k] == j) {
+ for (j = 0; j < v->NumberOfActivePlanes; ++j) {
+ if (v->BlendingAndTiming[k] == j) {
PlaneWithMinActiveDRAMClockChangeMargin = j;
}
}
@@ -5937,11 +5705,11 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
}
}
- *MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
+ v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ;
SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
- for (k = 0; k < NumberOfActivePlanes; ++k) {
- if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
&& v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
}
@@ -5949,25 +5717,25 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
v->TotalNumberOfActiveOTG = 0;
- for (k = 0; k < NumberOfActivePlanes; ++k) {
- if (BlendingAndTiming[k] == k) {
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->BlendingAndTiming[k] == k) {
v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
}
}
if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
*DRAMClockChangeSupport = dm_dram_clock_change_vactive;
- } else if ((SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
+ } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
|| SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
*DRAMClockChangeSupport = dm_dram_clock_change_vblank;
} else {
*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
}
- *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
- *StutterEnterPlusExitWatermark = (SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
- *Z8StutterExitWatermark = SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
- *Z8StutterEnterPlusExitWatermark = SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
+ *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
+ *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
+ *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
+ *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
@@ -6843,7 +6611,8 @@ static void CalculateStutterEfficiency(
static void CalculateSwathAndDETConfiguration(
bool ForceSingleDPP,
int NumberOfActivePlanes,
- unsigned int DETBufferSizeInKByte,
+ bool DETSharedByAllDPP,
+ unsigned int DETBufferSizeInKByteA[],
double MaximumSwathWidthLuma[],
double MaximumSwathWidthChroma[],
enum scan_direction_class SourceScan[],
@@ -6927,6 +6696,10 @@ static void CalculateSwathAndDETConfiguration(
*ViewportSizeSupport = true;
for (k = 0; k < NumberOfActivePlanes; ++k) {
+ unsigned int DETBufferSizeInKByte = DETBufferSizeInKByteA[k];
+
+ if (DETSharedByAllDPP && DPPPerPlane[k])
+ DETBufferSizeInKByte /= DPPPerPlane[k];
if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
|| SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
if (SurfaceTiling[k] == dm_sw_linear
@@ -7106,8 +6879,6 @@ static void CalculateSwathWidth(
{
int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
- int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
- int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
@@ -7118,6 +6889,8 @@ static void CalculateSwathWidth(
MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
if (BytePerPixC[k] > 0) {
+ int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
+
swath_width_chroma_ub[k] = dml_min(
surface_width_ub_c,
(int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
@@ -7129,6 +6902,8 @@ static void CalculateSwathWidth(
MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
if (BytePerPixC[k] > 0) {
+ int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
+
swath_width_chroma_ub[k] = dml_min(
surface_height_ub_c,
(int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
@@ -7247,71 +7022,17 @@ static double CalculateUrgentLatency(
return ret;
}
-static void UseMinimumDCFCLK(
+static noinline_for_stack void UseMinimumDCFCLK(
struct display_mode_lib *mode_lib,
- int MaxInterDCNTileRepeaters,
int MaxPrefetchMode,
- double FinalDRAMClockChangeLatency,
- double SREnterPlusExitTime,
- int ReturnBusWidth,
- int RoundTripPingLatencyCycles,
- int ReorderingBytes,
- int PixelChunkSizeInKByte,
- int MetaChunkSize,
- bool GPUVMEnable,
- int GPUVMMaxPageTableLevels,
- bool HostVMEnable,
- int NumberOfActivePlanes,
- double HostVMMinPageSize,
- int HostVMMaxNonCachedPageTableLevels,
- bool DynamicMetadataVMEnabled,
- enum immediate_flip_requirement ImmediateFlipRequirement,
- bool ProgressiveToInterlaceUnitInOPP,
- double MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation,
- double PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency,
- int VTotal[],
- int VActive[],
- int DynamicMetadataTransmittedBytes[],
- int DynamicMetadataLinesBeforeActiveRequired[],
- bool Interlace[],
- double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
- double RequiredDISPCLK[][2],
- double UrgLatency[],
- unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
- double ProjectedDCFCLKDeepSleep[][2],
- double MaximumVStartup[][2][DC__NUM_DPP__MAX],
- double TotalVActivePixelBandwidth[][2],
- double TotalVActiveCursorBandwidth[][2],
- double TotalMetaRowBandwidth[][2],
- double TotalDPTERowBandwidth[][2],
- unsigned int TotalNumberOfActiveDPP[][2],
- unsigned int TotalNumberOfDCCActiveDPP[][2],
- int dpte_group_bytes[],
- double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
- double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
- int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
- int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
- int BytePerPixelY[],
- int BytePerPixelC[],
- int HTotal[],
- double PixelClock[],
- double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
- double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
- double MetaRowBytes[][2][DC__NUM_DPP__MAX],
- bool DynamicMetadataEnable[],
- double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
- double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
- double ReadBandwidthLuma[],
- double ReadBandwidthChroma[],
- double DCFCLKPerState[],
- double DCFCLKState[][2])
+ int ReorderingBytes)
{
struct vba_vars_st *v = &mode_lib->vba;
int dummy1, i, j, k;
double NormalEfficiency, dummy2, dummy3;
double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
- NormalEfficiency = PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
+ NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
for (i = 0; i < v->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
@@ -7329,61 +7050,61 @@ static void UseMinimumDCFCLK(
double MinimumTvmPlus2Tr0;
TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
- for (k = 0; k < NumberOfActivePlanes; ++k) {
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
- + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] / (15.75 * HTotal[k] / PixelClock[k]);
+ + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
}
- for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
- NoOfDPPState[k] = NoOfDPP[i][j][k];
+ for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
+ NoOfDPPState[k] = v->NoOfDPP[i][j][k];
}
- MinimumTWait = CalculateTWait(MaxPrefetchMode, FinalDRAMClockChangeLatency, UrgLatency[i], SREnterPlusExitTime);
- NonDPTEBandwidth = TotalVActivePixelBandwidth[i][j] + TotalVActiveCursorBandwidth[i][j] + TotalMetaRowBandwidth[i][j];
- DPTEBandwidth = (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) ?
- TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : TotalDPTERowBandwidth[i][j];
+ MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
+ NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
+ DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
+ TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
DCFCLKRequiredForAverageBandwidth = dml_max3(
- ProjectedDCFCLKDeepSleep[i][j],
- (NonDPTEBandwidth + TotalDPTERowBandwidth[i][j]) / ReturnBusWidth
- / (MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
- (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / ReturnBusWidth);
+ v->ProjectedDCFCLKDeepSleep[i][j],
+ (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
+ / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
+ (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth);
ExtraLatencyBytes = CalculateExtraLatencyBytes(
ReorderingBytes,
- TotalNumberOfActiveDPP[i][j],
- PixelChunkSizeInKByte,
- TotalNumberOfDCCActiveDPP[i][j],
- MetaChunkSize,
- GPUVMEnable,
- HostVMEnable,
- NumberOfActivePlanes,
+ v->TotalNumberOfActiveDPP[i][j],
+ v->PixelChunkSizeInKByte,
+ v->TotalNumberOfDCCActiveDPP[i][j],
+ v->MetaChunkSize,
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->NumberOfActivePlanes,
NoOfDPPState,
- dpte_group_bytes,
+ v->dpte_group_bytes,
1,
- HostVMMinPageSize,
- HostVMMaxNonCachedPageTableLevels);
- ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
- for (k = 0; k < NumberOfActivePlanes; ++k) {
+ v->HostVMMinPageSize,
+ v->HostVMMaxNonCachedPageTableLevels);
+ ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
double DCFCLKCyclesRequiredInPrefetch;
double ExpectedPrefetchBWAcceleration;
double PrefetchTime;
- PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
- + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] * BytePerPixelC[k]) / NormalEfficiency / ReturnBusWidth;
+ PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
+ + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
- + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / ReturnBusWidth * (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
- + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / ReturnBusWidth
- + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
- PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) * HTotal[k] / PixelClock[k];
- ExpectedPrefetchBWAcceleration = (VActivePixelBandwidth[i][j][k] + VActiveCursorBandwidth[i][j][k])
- / (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
+ + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
+ + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth
+ + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
+ PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
+ ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
+ / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
DynamicMetadataVMExtraLatency[k] =
- (GPUVMEnable == true && DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
- UrgLatency[i] * GPUVMMaxPageTableLevels * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
- PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - MinimumTWait
- - UrgLatency[i]
- * ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels : GPUVMMaxPageTableLevels - 2)
- * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
+ (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
+ v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
+ PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
+ - v->UrgLatency[i]
+ * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2)
+ * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
- DynamicMetadataVMExtraLatency[k];
if (PrefetchTime > 0) {
@@ -7392,14 +7113,14 @@ static void UseMinimumDCFCLK(
/ (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
* dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
- if (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) {
+ if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
- + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / ReturnBusWidth;
+ + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth;
}
} else {
- DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
+ DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
}
- if (DynamicMetadataEnable[k] == true) {
+ if (v->DynamicMetadataEnable[k] == true) {
double TSetupPipe;
double TdmbfPipe;
double TdmsksPipe;
@@ -7407,17 +7128,17 @@ static void UseMinimumDCFCLK(
double AllowedTimeForUrgentExtraLatency;
CalculateVupdateAndDynamicMetadataParameters(
- MaxInterDCNTileRepeaters,
- RequiredDPPCLK[i][j][k],
- RequiredDISPCLK[i][j],
- ProjectedDCFCLKDeepSleep[i][j],
- PixelClock[k],
- HTotal[k],
- VTotal[k] - VActive[k],
- DynamicMetadataTransmittedBytes[k],
- DynamicMetadataLinesBeforeActiveRequired[k],
- Interlace[k],
- ProgressiveToInterlaceUnitInOPP,
+ v->MaxInterDCNTileRepeaters,
+ v->RequiredDPPCLK[i][j][k],
+ v->RequiredDISPCLK[i][j],
+ v->ProjectedDCFCLKDeepSleep[i][j],
+ v->PixelClock[k],
+ v->HTotal[k],
+ v->VTotal[k] - v->VActive[k],
+ v->DynamicMetadataTransmittedBytes[k],
+ v->DynamicMetadataLinesBeforeActiveRequired[k],
+ v->Interlace[k],
+ v->ProgressiveToInterlaceUnitInOPP,
&TSetupPipe,
&TdmbfPipe,
&TdmecPipe,
@@ -7425,31 +7146,31 @@ static void UseMinimumDCFCLK(
&dummy1,
&dummy2,
&dummy3);
- AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
+ AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
- TdmsksPipe - DynamicMetadataVMExtraLatency[k];
if (AllowedTimeForUrgentExtraLatency > 0) {
DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
DCFCLKRequiredForPeakBandwidthPerPlane[k],
ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
} else {
- DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
+ DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
}
}
}
DCFCLKRequiredForPeakBandwidth = 0;
- for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
+ for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
}
- MinimumTvmPlus2Tr0 = UrgLatency[i]
- * (GPUVMEnable == true ?
- (HostVMEnable == true ?
- (GPUVMMaxPageTableLevels + 2) * (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) :
+ MinimumTvmPlus2Tr0 = v->UrgLatency[i]
+ * (v->GPUVMEnable == true ?
+ (v->HostVMEnable == true ?
+ (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) :
0);
- for (k = 0; k < NumberOfActivePlanes; ++k) {
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
double MaximumTvmPlus2Tr0PlusTsw;
- MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
+ MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
- DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
+ DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
} else {
DCFCLKRequiredForPeakBandwidth = dml_max3(
DCFCLKRequiredForPeakBandwidth,
@@ -7457,7 +7178,7 @@ static void UseMinimumDCFCLK(
(2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
}
}
- DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
+ v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
}
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
index e0fecf127bd5..bfeb01477f0c 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
@@ -27,94 +27,7 @@
#include "../display_mode_vba.h"
#include "../dml_inline_defs.h"
#include "display_rq_dlg_calc_31.h"
-
-static bool CalculateBytePerPixelAnd256BBlockSizes(
- enum source_format_class SourcePixelFormat,
- enum dm_swizzle_mode SurfaceTiling,
- unsigned int *BytePerPixelY,
- unsigned int *BytePerPixelC,
- double *BytePerPixelDETY,
- double *BytePerPixelDETC,
- unsigned int *BlockHeight256BytesY,
- unsigned int *BlockHeight256BytesC,
- unsigned int *BlockWidth256BytesY,
- unsigned int *BlockWidth256BytesC)
-{
- if (SourcePixelFormat == dm_444_64) {
- *BytePerPixelDETY = 8;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 8;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
- *BytePerPixelDETY = 4;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 4;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_444_16) {
- *BytePerPixelDETY = 2;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 2;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_444_8) {
- *BytePerPixelDETY = 1;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 1;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_rgbe_alpha) {
- *BytePerPixelDETY = 4;
- *BytePerPixelDETC = 1;
- *BytePerPixelY = 4;
- *BytePerPixelC = 1;
- } else if (SourcePixelFormat == dm_420_8) {
- *BytePerPixelDETY = 1;
- *BytePerPixelDETC = 2;
- *BytePerPixelY = 1;
- *BytePerPixelC = 2;
- } else if (SourcePixelFormat == dm_420_12) {
- *BytePerPixelDETY = 2;
- *BytePerPixelDETC = 4;
- *BytePerPixelY = 2;
- *BytePerPixelC = 4;
- } else {
- *BytePerPixelDETY = 4.0 / 3;
- *BytePerPixelDETC = 8.0 / 3;
- *BytePerPixelY = 2;
- *BytePerPixelC = 4;
- }
-
- if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
- || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
- if (SurfaceTiling == dm_sw_linear) {
- *BlockHeight256BytesY = 1;
- } else if (SourcePixelFormat == dm_444_64) {
- *BlockHeight256BytesY = 4;
- } else if (SourcePixelFormat == dm_444_8) {
- *BlockHeight256BytesY = 16;
- } else {
- *BlockHeight256BytesY = 8;
- }
- *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
- *BlockHeight256BytesC = 0;
- *BlockWidth256BytesC = 0;
- } else {
- if (SurfaceTiling == dm_sw_linear) {
- *BlockHeight256BytesY = 1;
- *BlockHeight256BytesC = 1;
- } else if (SourcePixelFormat == dm_rgbe_alpha) {
- *BlockHeight256BytesY = 8;
- *BlockHeight256BytesC = 16;
- } else if (SourcePixelFormat == dm_420_8) {
- *BlockHeight256BytesY = 16;
- *BlockHeight256BytesC = 8;
- } else {
- *BlockHeight256BytesY = 8;
- *BlockHeight256BytesC = 8;
- }
- *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
- *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
- }
- return true;
-}
+#include "../dcn30/display_mode_vba_30.h"
static bool is_dual_plane(enum source_format_class source_format)
{
@@ -467,7 +380,7 @@ static void get_meta_and_pte_attr(
double byte_per_pixel_det_y;
double byte_per_pixel_det_c;
- CalculateBytePerPixelAnd256BBlockSizes(
+ dml30_CalculateBytePerPixelAnd256BBlockSizes(
(enum source_format_class) (source_format),
(enum dm_swizzle_mode) (tiling),
&bytes_per_element_y,
@@ -500,8 +413,6 @@ static void get_meta_and_pte_attr(
log2_blk256_height = dml_log2((double) blk256_height);
blk_bytes = surf_linear ? 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size);
log2_blk_bytes = dml_log2((double) blk_bytes);
- log2_blk_height = 0;
- log2_blk_width = 0;
// remember log rule
// "+" in log is multiply
@@ -568,8 +479,6 @@ static void get_meta_and_pte_attr(
log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element - log2_meta_req_height;
meta_req_width = 1 << log2_meta_req_width;
meta_req_height = 1 << log2_meta_req_height;
- log2_meta_row_height = 0;
- meta_row_width_ub = 0;
// the dimensions of a meta row are meta_row_width x meta_row_height in elements.
// calculate upper bound of the meta_row_width
@@ -706,7 +615,7 @@ static void get_meta_and_pte_attr(
if (hostvm_enable)
rq_sizing_param->dpte_group_bytes = 512;
else {
- if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group
+ if (!surf_linear && (log2_dpte_req_height_ptes == 0) && surf_vert) //reduced, in this case, will have page fault within a group
rq_sizing_param->dpte_group_bytes = 512;
else
rq_sizing_param->dpte_group_bytes = 2048;
@@ -742,13 +651,12 @@ static void get_surf_rq_param(
bool is_chroma,
bool is_alpha)
{
- bool mode_422 = 0;
unsigned int vp_width = 0;
unsigned int vp_height = 0;
unsigned int data_pitch = 0;
unsigned int meta_pitch = 0;
unsigned int surface_height = 0;
- unsigned int ppe = mode_422 ? 2 : 1;
+ unsigned int ppe = 1;
// FIXME check if ppe apply for both luma and chroma in 422 case
if (is_chroma | is_alpha) {
@@ -953,7 +861,6 @@ static void dml_rq_dlg_get_dlg_params(
{
const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src;
const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest;
- const display_output_params_st *dout = &e2e_pipe_param[pipe_idx].dout;
const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg;
const scaler_ratio_depth_st *scl = &e2e_pipe_param[pipe_idx].pipe.scale_ratio_depth;
const scaler_taps_st *taps = &e2e_pipe_param[pipe_idx].pipe.scale_taps;
@@ -976,7 +883,6 @@ static void dml_rq_dlg_get_dlg_params(
double min_ttu_vblank;
unsigned int dlg_vblank_start;
bool dual_plane;
- bool mode_422;
unsigned int access_dir;
unsigned int vp_height_l;
unsigned int vp_width_l;
@@ -990,7 +896,6 @@ static void dml_rq_dlg_get_dlg_params(
double hratio_c;
double vratio_l;
double vratio_c;
- bool scl_enable;
unsigned int swath_width_ub_l;
unsigned int dpte_groups_per_row_ub_l;
@@ -1003,9 +908,6 @@ static void dml_rq_dlg_get_dlg_params(
unsigned int vupdate_width;
unsigned int vready_offset;
- unsigned int dppclk_delay_subtotal;
- unsigned int dispclk_delay_subtotal;
-
unsigned int vstartup_start;
unsigned int dst_x_after_scaler;
unsigned int dst_y_after_scaler;
@@ -1078,8 +980,8 @@ static void dml_rq_dlg_get_dlg_params(
min_ttu_vblank = get_min_ttu_vblank_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start;
-
disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start) * dml_pow(2, 2));
+ disp_dlg_regs->min_dst_y_next_start_us = 0;
ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)dml_pow(2, 18));
dml_print("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, min_ttu_vblank);
@@ -1096,7 +998,6 @@ static void dml_rq_dlg_get_dlg_params(
// Prefetch Calc
// Source
dual_plane = is_dual_plane((enum source_format_class) (src->source_format));
- mode_422 = 0;
access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
vp_height_l = src->viewport_height;
vp_width_l = src->viewport_width;
@@ -1110,7 +1011,6 @@ static void dml_rq_dlg_get_dlg_params(
hratio_c = scl->hscl_ratio_c;
vratio_l = scl->vscl_ratio;
vratio_c = scl->vscl_ratio_c;
- scl_enable = scl->scl_enable;
swath_width_ub_l = rq_dlg_param->rq_l.swath_width_ub;
dpte_groups_per_row_ub_l = rq_dlg_param->rq_l.dpte_groups_per_row_ub;
@@ -1123,21 +1023,6 @@ static void dml_rq_dlg_get_dlg_params(
vupdate_width = dst->vupdate_width;
vready_offset = dst->vready_offset;
- dppclk_delay_subtotal = mode_lib->ip.dppclk_delay_subtotal;
- dispclk_delay_subtotal = mode_lib->ip.dispclk_delay_subtotal;
-
- if (scl_enable)
- dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl;
- else
- dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl_lb_only;
-
- dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_cnvc_formatter + src->num_cursors * mode_lib->ip.dppclk_delay_cnvc_cursor;
-
- if (dout->dsc_enable) {
- double dsc_delay = get_dsc_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // FROM VBA
- dispclk_delay_subtotal += dsc_delay;
- }
-
vstartup_start = dst->vstartup_start;
if (interlaced) {
if (vstartup_start / 2.0 - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal <= vblank_end / 2.0)
@@ -1250,18 +1135,8 @@ static void dml_rq_dlg_get_dlg_params(
dpte_row_height_l = rq_dlg_param->rq_l.dpte_row_height;
dpte_row_height_c = rq_dlg_param->rq_c.dpte_row_height;
- if (mode_422) {
- swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element
- swath_width_pixels_ub_c = swath_width_ub_c * 2;
- } else {
- swath_width_pixels_ub_l = swath_width_ub_l * 1;
- swath_width_pixels_ub_c = swath_width_ub_c * 1;
- }
-
- hscale_pixel_rate_l = 0.;
- hscale_pixel_rate_c = 0.;
- min_hratio_fact_l = 1.0;
- min_hratio_fact_c = 1.0;
+ swath_width_pixels_ub_l = swath_width_ub_l;
+ swath_width_pixels_ub_c = swath_width_ub_c;
if (hratio_l <= 1)
min_hratio_fact_l = 2.0;
@@ -1540,14 +1415,6 @@ static void dml_rq_dlg_get_dlg_params(
dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip);
dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip);
- // hack for FPGA
- if (mode_lib->project == DML_PROJECT_DCN31_FPGA) {
- if (disp_dlg_regs->vratio_prefetch >= (unsigned int) dml_pow(2, 22)) {
- disp_dlg_regs->vratio_prefetch = (unsigned int) dml_pow(2, 22) - 1;
- dml_print("vratio_prefetch exceed the max value, the register field is [21:0]\n");
- }
- }
-
disp_dlg_regs->refcyc_per_pte_group_vblank_l = (unsigned int) (dst_y_per_row_vblank * (double) htotal * ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_l);
ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)dml_pow(2, 13));
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
new file mode 100644
index 000000000000..df9d50b9b57c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "clk_mgr.h"
+#include "resource.h"
+#include "dcn31/dcn31_hubbub.h"
+#include "dcn314_fpu.h"
+#include "dml/dcn20/dcn20_fpu.h"
+#include "dml/dcn31/dcn31_fpu.h"
+#include "dml/display_mode_vba.h"
+#include "dml/dml_inline_defs.h"
+
+struct _vcs_dpi_ip_params_st dcn3_14_ip = {
+ .VBlankNomDefaultUS = 668,
+ .gpuvm_enable = 1,
+ .gpuvm_max_page_table_levels = 1,
+ .hostvm_enable = 1,
+ .hostvm_max_page_table_levels = 2,
+ .rob_buffer_size_kbytes = 64,
+ .det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE,
+ .config_return_buffer_size_in_kbytes = 1792,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .meta_fifo_size_in_kentries = 32,
+ .zero_size_buffer_entries = 512,
+ .compbuf_reserved_space_64b = 256,
+ .compbuf_reserved_space_zs = 64,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .meta_chunk_size_kbytes = 2,
+ .min_meta_chunk_size_bytes = 256,
+ .writeback_chunk_size_kbytes = 8,
+ .ptoi_supported = false,
+ .num_dsc = 4,
+ .maximum_dsc_bits_per_component = 10,
+ .dsc422_native_support = false,
+ .is_line_buffer_bpp_fixed = true,
+ .line_buffer_fixed_bpp = 48,
+ .line_buffer_size_bits = 789504,
+ .max_line_buffer_lines = 12,
+ .writeback_interface_buffer_size_kbytes = 90,
+ .max_num_dpp = 4,
+ .max_num_otg = 4,
+ .max_num_hdmi_frl_outputs = 1,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dpte_buffer_size_in_pte_reqs_luma = 64,
+ .dpte_buffer_size_in_pte_reqs_chroma = 34,
+ .dispclk_ramp_margin_percent = 1,
+ .max_inter_dcn_tile_repeaters = 8,
+ .cursor_buffer_size = 16,
+ .cursor_chunk_size = 2,
+ .writeback_line_buffer_buffer_size = 0,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .dppclk_delay_subtotal = 46,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_cnvc_formatter = 27,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 119,
+ .dynamic_metadata_vm_enabled = false,
+ .odm_combine_4to1_supported = false,
+ .dcc_supported = true,
+};
+
+static struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = {
+ /*TODO: correct dispclk/dppclk voltage level determination*/
+ .clock_limits = {
+ {
+ .state = 0,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 600.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 186.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 1,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 209.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 2,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 209.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 3,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 371.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 4,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 417.0,
+ .dtbclk_mhz = 600.0,
+ },
+ },
+ .num_states = 5,
+ .sr_exit_time_us = 16.5,
+ .sr_enter_plus_exit_time_us = 18.5,
+ .sr_exit_z8_time_us = 268.0,
+ .sr_enter_plus_exit_z8_time_us = 393.0,
+ .writeback_latency_us = 12.0,
+ .dram_channel_width_bytes = 4,
+ .round_trip_ping_latency_dcfclk_cycles = 106,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_sdp_bw_after_urgent = 80.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
+ .max_avg_sdp_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_percent = 60.0,
+ .fabric_datapath_to_dcn_data_return_bytes = 32,
+ .return_bus_width_bytes = 64,
+ .downspread_percent = 0.38,
+ .dcn_downspread_percent = 0.5,
+ .gpuvm_min_page_size_bytes = 4096,
+ .hostvm_min_page_size_bytes = 4096,
+ .do_urgent_latency_adjustment = false,
+ .urgent_latency_adjustment_fabric_clock_component_us = 0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
+};
+
+
+void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params)
+{
+ struct clk_limit_table *clk_table = &bw_params->clk_table;
+ struct _vcs_dpi_voltage_scaling_st *clock_limits =
+ dcn3_14_soc.clock_limits;
+ unsigned int i, closest_clk_lvl;
+ int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
+ int j;
+
+ dc_assert_fp_enabled();
+
+ // Default clock levels are used for diags, which may lead to overclocking.
+ if (dc->config.use_default_clock_table == false) {
+ dcn3_14_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator;
+ dcn3_14_ip.max_num_dpp = dc->res_pool->pipe_count;
+
+ if (bw_params->dram_channel_width_bytes > 0)
+ dcn3_14_soc.dram_channel_width_bytes = bw_params->dram_channel_width_bytes;
+
+ if (bw_params->num_channels > 0)
+ dcn3_14_soc.num_chans = bw_params->num_channels;
+
+ ASSERT(dcn3_14_soc.num_chans);
+ ASSERT(clk_table->num_entries);
+
+ /* Prepass to find max clocks independent of voltage level. */
+ for (i = 0; i < clk_table->num_entries; ++i) {
+ if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
+ if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
+ }
+
+ for (i = 0; i < clk_table->num_entries; i++) {
+ /* loop backwards*/
+ for (closest_clk_lvl = 0, j = dcn3_14_soc.num_states - 1; j >= 0; j--) {
+ if ((unsigned int) dcn3_14_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) {
+ closest_clk_lvl = j;
+ break;
+ }
+ }
+ if (clk_table->num_entries == 1) {
+ /*smu gives one DPM level, let's take the highest one*/
+ closest_clk_lvl = dcn3_14_soc.num_states - 1;
+ }
+
+ clock_limits[i].state = i;
+
+ /* Clocks dependent on voltage level. */
+ clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+ if (clk_table->num_entries == 1 &&
+ clock_limits[i].dcfclk_mhz < dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) {
+ /*SMU fix not released yet*/
+ clock_limits[i].dcfclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz;
+ }
+ clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
+ clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
+
+ if (clk_table->entries[i].memclk_mhz && clk_table->entries[i].wck_ratio)
+ clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio;
+
+ /* Clocks independent of voltage level. */
+ clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
+ dcn3_14_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+
+ clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
+ dcn3_14_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+
+ clock_limits[i].dram_bw_per_chan_gbps = dcn3_14_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+ clock_limits[i].dscclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+ clock_limits[i].dtbclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+ clock_limits[i].phyclk_d18_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+ clock_limits[i].phyclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+ }
+ for (i = 0; i < clk_table->num_entries; i++)
+ dcn3_14_soc.clock_limits[i] = clock_limits[i];
+ if (clk_table->num_entries) {
+ dcn3_14_soc.num_states = clk_table->num_entries;
+ }
+ }
+
+ if (max_dispclk_mhz) {
+ dcn3_14_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ }
+
+ dcn20_patch_bounding_box(dc, &dcn3_14_soc);
+ dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN314);
+}
+
+static bool is_dual_plane(enum surface_pixel_format format)
+{
+ return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA;
+}
+
+/*
+ * micro_sec_to_vert_lines () - converts time to number of vertical lines for a given timing
+ *
+ * @param: num_us: number of microseconds
+ * @return: number of vertical lines. If exact number of vertical lines is not found then
+ * it will round up to next number of lines to guarantee num_us
+ */
+static unsigned int micro_sec_to_vert_lines(unsigned int num_us, struct dc_crtc_timing *timing)
+{
+ unsigned int num_lines = 0;
+ unsigned int lines_time_in_ns = 1000.0 *
+ (((float)timing->h_total * 1000.0) /
+ ((float)timing->pix_clk_100hz / 10.0));
+
+ num_lines = dml_ceil(1000.0 * num_us / lines_time_in_ns, 1.0);
+
+ return num_lines;
+}
+
+static unsigned int get_vertical_back_porch(struct dc_crtc_timing *timing)
+{
+ unsigned int v_active = 0, v_blank = 0, v_back_porch = 0;
+
+ v_active = timing->v_border_top + timing->v_addressable + timing->v_border_bottom;
+ v_blank = timing->v_total - v_active;
+ v_back_porch = v_blank - timing->v_front_porch - timing->v_sync_width;
+
+ return v_back_porch;
+}
+
+int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ enum dc_validate_mode validate_mode)
+{
+ int i, pipe_cnt;
+ struct resource_context *res_ctx = &context->res_ctx;
+ struct pipe_ctx *pipe = 0;
+ bool upscaled = false;
+ const unsigned int max_allowed_vblank_nom = 1023;
+
+ dc_assert_fp_enabled();
+
+ dcn31x_populate_dml_pipes_from_context(dc, context, pipes, validate_mode);
+
+ for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+ struct dc_crtc_timing *timing;
+ unsigned int num_lines = 0;
+ unsigned int v_back_porch = 0;
+
+ if (!res_ctx->pipe_ctx[i].stream)
+ continue;
+ pipe = &res_ctx->pipe_ctx[i];
+ timing = &pipe->stream->timing;
+
+ num_lines = micro_sec_to_vert_lines(dcn3_14_ip.VBlankNomDefaultUS, timing);
+
+ if (pipe->stream->adjust.v_total_min != 0)
+ pipes[pipe_cnt].pipe.dest.vtotal = pipe->stream->adjust.v_total_min;
+ else
+ pipes[pipe_cnt].pipe.dest.vtotal = timing->v_total;
+
+ v_back_porch = get_vertical_back_porch(timing);
+
+ pipes[pipe_cnt].pipe.dest.vblank_nom = timing->v_total - pipes[pipe_cnt].pipe.dest.vactive;
+ pipes[pipe_cnt].pipe.dest.vblank_nom = min(pipes[pipe_cnt].pipe.dest.vblank_nom, num_lines);
+ // vblank_nom should not smaller than (VSync (timing->v_sync_width + v_back_porch) + 2)
+ // + 2 is because
+ // 1 -> VStartup_start should be 1 line before VSync
+ // 1 -> always reserve 1 line between start of vblank to vstartup signal
+ pipes[pipe_cnt].pipe.dest.vblank_nom =
+ max(pipes[pipe_cnt].pipe.dest.vblank_nom, timing->v_sync_width + v_back_porch + 2);
+ pipes[pipe_cnt].pipe.dest.vblank_nom = min(pipes[pipe_cnt].pipe.dest.vblank_nom, max_allowed_vblank_nom);
+
+ if (pipe->plane_state &&
+ (pipe->plane_state->src_rect.height < pipe->plane_state->dst_rect.height ||
+ pipe->plane_state->src_rect.width < pipe->plane_state->dst_rect.width))
+ upscaled = true;
+
+ /* Apply HostVM policy - either based on hypervisor globally enabled, or rIOMMU active */
+ if (dc->debug.dml_hostvm_override == DML_HOSTVM_NO_OVERRIDE)
+ pipes[i].pipe.src.hostvm = dc->vm_pa_config.is_hvm_enabled || dc->res_pool->hubbub->riommu_active;
+
+ /*
+ * Immediate flip can be set dynamically after enabling the plane.
+ * We need to require support for immediate flip or underflow can be
+ * intermittently experienced depending on peak b/w requirements.
+ */
+ pipes[pipe_cnt].pipe.src.immediate_flip = true;
+
+ pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+ pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
+ pipes[pipe_cnt].pipe.src.dcc_rate = 3;
+ pipes[pipe_cnt].dout.dsc_input_bpc = 0;
+
+ if (pipes[pipe_cnt].dout.dsc_enable) {
+ switch (timing->display_color_depth) {
+ case COLOR_DEPTH_888:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 8;
+ break;
+ case COLOR_DEPTH_101010:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 10;
+ break;
+ case COLOR_DEPTH_121212:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 12;
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+ }
+
+ pipe_cnt++;
+ }
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE;
+
+ dc->config.enable_4to1MPC = false;
+ if (pipe_cnt == 1 && pipe->plane_state
+ && pipe->plane_state->rotation == ROTATION_ANGLE_0 && !dc->debug.disable_z9_mpc) {
+ if (is_dual_plane(pipe->plane_state->format)
+ && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) {
+ dc->config.enable_4to1MPC = true;
+ } else if (!is_dual_plane(pipe->plane_state->format) && pipe->plane_state->src_rect.width <= 5120) {
+ /* Limit to 5k max to avoid forced pipe split when there is not enough detile for swath */
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
+ pipes[0].pipe.src.unbounded_req_mode = true;
+ }
+ } else if (context->stream_count >= dc->debug.crb_alloc_policy_min_disp_count
+ && dc->debug.crb_alloc_policy > DET_SIZE_DEFAULT) {
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = dc->debug.crb_alloc_policy * 64;
+ } else if (context->stream_count >= 3 && upscaled) {
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
+ }
+
+ if (dc->debug.force_odm_combine_4to1)
+ context->bw_ctx.dml.ip.odm_combine_4to1_supported = true;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe->stream)
+ continue;
+
+ if (pipe->stream->signal == SIGNAL_TYPE_EDP && dc->debug.seamless_boot_odm_combine &&
+ pipe->stream->apply_seamless_boot_optimization) {
+
+ if (pipe->stream->apply_boot_odm_mode == dm_odm_combine_policy_2to1) {
+ context->bw_ctx.dml.vba.ODMCombinePolicy = dm_odm_combine_policy_2to1;
+ break;
+ }
+ }
+ }
+
+ return pipe_cnt;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h
new file mode 100644
index 000000000000..362ac79184ea
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN314_FPU_H__
+#define __DCN314_FPU_H__
+
+#define DCN3_14_DEFAULT_DET_SIZE 384
+#define DCN3_14_MAX_DET_SIZE 384
+#define DCN3_14_MIN_COMPBUF_SIZE_KB 128
+#define DCN3_14_CRB_SEGMENT_SIZE_KB 64
+
+void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params);
+int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ enum dc_validate_mode validate_mode);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
new file mode 100644
index 000000000000..9f3938a50240
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
@@ -0,0 +1,7343 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dc.h"
+#include "../display_mode_lib.h"
+#include "display_mode_vba_314.h"
+#include "../dml_inline_defs.h"
+
+/*
+ * NOTE:
+ * This file is gcc-parsable HW gospel, coming straight from HW engineers.
+ *
+ * It doesn't adhere to Linux kernel style and sometimes will do things in odd
+ * ways. Unless there is something clearly wrong with it the code should
+ * remain as-is as it provides us with a guarantee from HW that it is correct.
+ */
+
+#define BPP_INVALID 0
+#define BPP_BLENDED_PIPE 0xffffffff
+#define DCN314_MAX_DSC_IMAGE_WIDTH 5184
+#define DCN314_MAX_FMT_420_BUFFER_WIDTH 4096
+
+// For DML-C changes that hasn't been propagated to VBA yet
+//#define __DML_VBA_ALLOW_DELTA__
+
+// Move these to ip parameters/constant
+
+// At which vstartup the DML start to try if the mode can be supported
+#define __DML_VBA_MIN_VSTARTUP__ 9
+
+// Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
+#define __DML_ARB_TO_RET_DELAY__ (7 + 95)
+
+// fudge factor for min dcfclk calclation
+#define __DML_MIN_DCFCLK_FACTOR__ 1.15
+
+typedef struct {
+ double DPPCLK;
+ double DISPCLK;
+ double PixelClock;
+ double DCFCLKDeepSleep;
+ unsigned int DPPPerPlane;
+ bool ScalerEnabled;
+ double VRatio;
+ double VRatioChroma;
+ enum scan_direction_class SourceScan;
+ unsigned int BlockWidth256BytesY;
+ unsigned int BlockHeight256BytesY;
+ unsigned int BlockWidth256BytesC;
+ unsigned int BlockHeight256BytesC;
+ unsigned int InterlaceEnable;
+ unsigned int NumberOfCursors;
+ unsigned int VBlank;
+ unsigned int HTotal;
+ unsigned int DCCEnable;
+ bool ODMCombineIsEnabled;
+ enum source_format_class SourcePixelFormat;
+ int BytePerPixelY;
+ int BytePerPixelC;
+ bool ProgressiveToInterlaceUnitInOPP;
+} Pipe;
+
+#define BPP_INVALID 0
+#define BPP_BLENDED_PIPE 0xffffffff
+
+static bool CalculateBytePerPixelAnd256BBlockSizes(
+ enum source_format_class SourcePixelFormat,
+ enum dm_swizzle_mode SurfaceTiling,
+ unsigned int *BytePerPixelY,
+ unsigned int *BytePerPixelC,
+ double *BytePerPixelDETY,
+ double *BytePerPixelDETC,
+ unsigned int *BlockHeight256BytesY,
+ unsigned int *BlockHeight256BytesC,
+ unsigned int *BlockWidth256BytesY,
+ unsigned int *BlockWidth256BytesC);
+static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
+static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
+static unsigned int dscceComputeDelay(
+ unsigned int bpc,
+ double BPP,
+ unsigned int sliceWidth,
+ unsigned int numSlices,
+ enum output_format_class pixelFormat,
+ enum output_encoder_class Output);
+static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
+static bool CalculatePrefetchSchedule(
+ struct display_mode_lib *mode_lib,
+ double HostVMInefficiencyFactor,
+ Pipe *myPipe,
+ unsigned int DSCDelay,
+ double DPPCLKDelaySubtotalPlusCNVCFormater,
+ double DPPCLKDelaySCL,
+ double DPPCLKDelaySCLLBOnly,
+ double DPPCLKDelayCNVCCursor,
+ double DISPCLKDelaySubtotal,
+ unsigned int DPP_RECOUT_WIDTH,
+ enum output_format_class OutputFormat,
+ unsigned int MaxInterDCNTileRepeaters,
+ unsigned int VStartup,
+ unsigned int MaxVStartup,
+ unsigned int GPUVMPageTableLevels,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ double HostVMMinPageSize,
+ bool DynamicMetadataEnable,
+ bool DynamicMetadataVMEnabled,
+ int DynamicMetadataLinesBeforeActiveRequired,
+ unsigned int DynamicMetadataTransmittedBytes,
+ double UrgentLatency,
+ double UrgentExtraLatency,
+ double TCalc,
+ unsigned int PDEAndMetaPTEBytesFrame,
+ unsigned int MetaRowByte,
+ unsigned int PixelPTEBytesPerRow,
+ double PrefetchSourceLinesY,
+ unsigned int SwathWidthY,
+ double VInitPreFillY,
+ unsigned int MaxNumSwathY,
+ double PrefetchSourceLinesC,
+ unsigned int SwathWidthC,
+ double VInitPreFillC,
+ unsigned int MaxNumSwathC,
+ int swath_width_luma_ub,
+ int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double TWait,
+ double *DSTXAfterScaler,
+ double *DSTYAfterScaler,
+ double *DestinationLinesForPrefetch,
+ double *PrefetchBandwidth,
+ double *DestinationLinesToRequestVMInVBlank,
+ double *DestinationLinesToRequestRowInVBlank,
+ double *VRatioPrefetchY,
+ double *VRatioPrefetchC,
+ double *RequiredPrefetchPixDataBWLuma,
+ double *RequiredPrefetchPixDataBWChroma,
+ bool *NotEnoughTimeForDynamicMetadata,
+ double *Tno_bw,
+ double *prefetch_vmrow_bw,
+ double *Tdmdl_vm,
+ double *Tdmdl,
+ double *TSetup,
+ int *VUpdateOffsetPix,
+ double *VUpdateWidthPix,
+ double *VReadyOffsetPix);
+static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
+static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
+static void CalculateDCCConfiguration(
+ bool DCCEnabled,
+ bool DCCProgrammingAssumesScanDirectionUnknown,
+ enum source_format_class SourcePixelFormat,
+ unsigned int SurfaceWidthLuma,
+ unsigned int SurfaceWidthChroma,
+ unsigned int SurfaceHeightLuma,
+ unsigned int SurfaceHeightChroma,
+ double DETBufferSize,
+ unsigned int RequestHeight256ByteLuma,
+ unsigned int RequestHeight256ByteChroma,
+ enum dm_swizzle_mode TilingFormat,
+ unsigned int BytePerPixelY,
+ unsigned int BytePerPixelC,
+ double BytePerPixelDETY,
+ double BytePerPixelDETC,
+ enum scan_direction_class ScanOrientation,
+ unsigned int *MaxUncompressedBlockLuma,
+ unsigned int *MaxUncompressedBlockChroma,
+ unsigned int *MaxCompressedBlockLuma,
+ unsigned int *MaxCompressedBlockChroma,
+ unsigned int *IndependentBlockLuma,
+ unsigned int *IndependentBlockChroma);
+static double CalculatePrefetchSourceLines(
+ struct display_mode_lib *mode_lib,
+ double VRatio,
+ double vtaps,
+ bool Interlace,
+ bool ProgressiveToInterlaceUnitInOPP,
+ unsigned int SwathHeight,
+ unsigned int ViewportYStart,
+ double *VInitPreFill,
+ unsigned int *MaxNumSwath);
+static unsigned int CalculateVMAndRowBytes(
+ struct display_mode_lib *mode_lib,
+ bool DCCEnable,
+ unsigned int BlockHeight256Bytes,
+ unsigned int BlockWidth256Bytes,
+ enum source_format_class SourcePixelFormat,
+ unsigned int SurfaceTiling,
+ unsigned int BytePerPixel,
+ enum scan_direction_class ScanDirection,
+ unsigned int SwathWidth,
+ unsigned int ViewportHeight,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ unsigned int GPUVMMinPageSize,
+ unsigned int HostVMMinPageSize,
+ unsigned int PTEBufferSizeInRequests,
+ unsigned int Pitch,
+ unsigned int DCCMetaPitch,
+ unsigned int *MacroTileWidth,
+ unsigned int *MetaRowByte,
+ unsigned int *PixelPTEBytesPerRow,
+ bool *PTEBufferSizeNotExceeded,
+ int *dpte_row_width_ub,
+ unsigned int *dpte_row_height,
+ unsigned int *MetaRequestWidth,
+ unsigned int *MetaRequestHeight,
+ unsigned int *meta_row_width,
+ unsigned int *meta_row_height,
+ int *vm_group_bytes,
+ unsigned int *dpte_group_bytes,
+ unsigned int *PixelPTEReqWidth,
+ unsigned int *PixelPTEReqHeight,
+ unsigned int *PTERequestSize,
+ int *DPDE0BytesFrame,
+ int *MetaPTEBytesFrame);
+static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
+static void CalculateRowBandwidth(
+ bool GPUVMEnable,
+ enum source_format_class SourcePixelFormat,
+ double VRatio,
+ double VRatioChroma,
+ bool DCCEnable,
+ double LineTime,
+ unsigned int MetaRowByteLuma,
+ unsigned int MetaRowByteChroma,
+ unsigned int meta_row_height_luma,
+ unsigned int meta_row_height_chroma,
+ unsigned int PixelPTEBytesPerRowLuma,
+ unsigned int PixelPTEBytesPerRowChroma,
+ unsigned int dpte_row_height_luma,
+ unsigned int dpte_row_height_chroma,
+ double *meta_row_bw,
+ double *dpte_row_bw);
+
+static void CalculateFlipSchedule(
+ struct display_mode_lib *mode_lib,
+ unsigned int k,
+ double HostVMInefficiencyFactor,
+ double UrgentExtraLatency,
+ double UrgentLatency,
+ double PDEAndMetaPTEBytesPerFrame,
+ double MetaRowBytes,
+ double DPTEBytesPerRow);
+static double CalculateWriteBackDelay(
+ enum source_format_class WritebackPixelFormat,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackVTaps,
+ int WritebackDestinationWidth,
+ int WritebackDestinationHeight,
+ int WritebackSourceHeight,
+ unsigned int HTotal);
+
+static void CalculateVupdateAndDynamicMetadataParameters(
+ int MaxInterDCNTileRepeaters,
+ double DPPCLK,
+ double DISPCLK,
+ double DCFClkDeepSleep,
+ double PixelClock,
+ int HTotal,
+ int VBlank,
+ int DynamicMetadataTransmittedBytes,
+ int DynamicMetadataLinesBeforeActiveRequired,
+ int InterlaceEnable,
+ bool ProgressiveToInterlaceUnitInOPP,
+ double *TSetup,
+ double *Tdmbf,
+ double *Tdmec,
+ double *Tdmsks,
+ int *VUpdateOffsetPix,
+ double *VUpdateWidthPix,
+ double *VReadyOffsetPix);
+
+static void CalculateWatermarksAndDRAMSpeedChangeSupport(
+ struct display_mode_lib *mode_lib,
+ unsigned int PrefetchMode,
+ double DCFCLK,
+ double ReturnBW,
+ double UrgentLatency,
+ double ExtraLatency,
+ double SOCCLK,
+ double DCFCLKDeepSleep,
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ unsigned int DPPPerPlane[],
+ double BytePerPixelDETY[],
+ double BytePerPixelDETC[],
+ bool UnboundedRequestEnabled,
+ unsigned int CompressedBufferSizeInkByte,
+ enum clock_change_support *DRAMClockChangeSupport,
+ double *StutterExitWatermark,
+ double *StutterEnterPlusExitWatermark,
+ double *Z8StutterExitWatermark,
+ double *Z8StutterEnterPlusExitWatermark);
+
+static void CalculateDCFCLKDeepSleep(
+ struct display_mode_lib *mode_lib,
+ unsigned int NumberOfActivePlanes,
+ int BytePerPixelY[],
+ int BytePerPixelC[],
+ double VRatio[],
+ double VRatioChroma[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ unsigned int DPPPerPlane[],
+ double HRatio[],
+ double HRatioChroma[],
+ double PixelClock[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double DPPCLK[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ int ReturnBusWidth,
+ double *DCFCLKDeepSleep);
+
+static void CalculateUrgentBurstFactor(
+ int swath_width_luma_ub,
+ int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double LineTime,
+ double UrgentLatency,
+ double CursorBufferSize,
+ unsigned int CursorWidth,
+ unsigned int CursorBPP,
+ double VRatio,
+ double VRatioC,
+ double BytePerPixelInDETY,
+ double BytePerPixelInDETC,
+ double DETBufferSizeY,
+ double DETBufferSizeC,
+ double *UrgentBurstFactorCursor,
+ double *UrgentBurstFactorLuma,
+ double *UrgentBurstFactorChroma,
+ bool *NotEnoughUrgentLatencyHiding);
+
+static void UseMinimumDCFCLK(
+ struct display_mode_lib *mode_lib,
+ int MaxPrefetchMode,
+ int ReorderingBytes);
+
+static void CalculatePixelDeliveryTimes(
+ unsigned int NumberOfActivePlanes,
+ double VRatio[],
+ double VRatioChroma[],
+ double VRatioPrefetchY[],
+ double VRatioPrefetchC[],
+ unsigned int swath_width_luma_ub[],
+ unsigned int swath_width_chroma_ub[],
+ unsigned int DPPPerPlane[],
+ double HRatio[],
+ double HRatioChroma[],
+ double PixelClock[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double DPPCLK[],
+ int BytePerPixelC[],
+ enum scan_direction_class SourceScan[],
+ unsigned int NumberOfCursors[],
+ unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
+ unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
+ unsigned int BlockWidth256BytesY[],
+ unsigned int BlockHeight256BytesY[],
+ unsigned int BlockWidth256BytesC[],
+ unsigned int BlockHeight256BytesC[],
+ double DisplayPipeLineDeliveryTimeLuma[],
+ double DisplayPipeLineDeliveryTimeChroma[],
+ double DisplayPipeLineDeliveryTimeLumaPrefetch[],
+ double DisplayPipeLineDeliveryTimeChromaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeLuma[],
+ double DisplayPipeRequestDeliveryTimeChroma[],
+ double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
+ double CursorRequestDeliveryTime[],
+ double CursorRequestDeliveryTimePrefetch[]);
+
+static void CalculateMetaAndPTETimes(
+ int NumberOfActivePlanes,
+ bool GPUVMEnable,
+ int MetaChunkSize,
+ int MinMetaChunkSizeBytes,
+ int HTotal[],
+ double VRatio[],
+ double VRatioChroma[],
+ double DestinationLinesToRequestRowInVBlank[],
+ double DestinationLinesToRequestRowInImmediateFlip[],
+ bool DCCEnable[],
+ double PixelClock[],
+ int BytePerPixelY[],
+ int BytePerPixelC[],
+ enum scan_direction_class SourceScan[],
+ int dpte_row_height[],
+ int dpte_row_height_chroma[],
+ int meta_row_width[],
+ int meta_row_width_chroma[],
+ int meta_row_height[],
+ int meta_row_height_chroma[],
+ int meta_req_width[],
+ int meta_req_width_chroma[],
+ int meta_req_height[],
+ int meta_req_height_chroma[],
+ int dpte_group_bytes[],
+ int PTERequestSizeY[],
+ int PTERequestSizeC[],
+ int PixelPTEReqWidthY[],
+ int PixelPTEReqHeightY[],
+ int PixelPTEReqWidthC[],
+ int PixelPTEReqHeightC[],
+ int dpte_row_width_luma_ub[],
+ int dpte_row_width_chroma_ub[],
+ double DST_Y_PER_PTE_ROW_NOM_L[],
+ double DST_Y_PER_PTE_ROW_NOM_C[],
+ double DST_Y_PER_META_ROW_NOM_L[],
+ double DST_Y_PER_META_ROW_NOM_C[],
+ double TimePerMetaChunkNominal[],
+ double TimePerChromaMetaChunkNominal[],
+ double TimePerMetaChunkVBlank[],
+ double TimePerChromaMetaChunkVBlank[],
+ double TimePerMetaChunkFlip[],
+ double TimePerChromaMetaChunkFlip[],
+ double time_per_pte_group_nom_luma[],
+ double time_per_pte_group_vblank_luma[],
+ double time_per_pte_group_flip_luma[],
+ double time_per_pte_group_nom_chroma[],
+ double time_per_pte_group_vblank_chroma[],
+ double time_per_pte_group_flip_chroma[]);
+
+static void CalculateVMGroupAndRequestTimes(
+ unsigned int NumberOfActivePlanes,
+ bool GPUVMEnable,
+ unsigned int GPUVMMaxPageTableLevels,
+ unsigned int HTotal[],
+ int BytePerPixelC[],
+ double DestinationLinesToRequestVMInVBlank[],
+ double DestinationLinesToRequestVMInImmediateFlip[],
+ bool DCCEnable[],
+ double PixelClock[],
+ int dpte_row_width_luma_ub[],
+ int dpte_row_width_chroma_ub[],
+ int vm_group_bytes[],
+ unsigned int dpde0_bytes_per_frame_ub_l[],
+ unsigned int dpde0_bytes_per_frame_ub_c[],
+ int meta_pte_bytes_per_frame_ub_l[],
+ int meta_pte_bytes_per_frame_ub_c[],
+ double TimePerVMGroupVBlank[],
+ double TimePerVMGroupFlip[],
+ double TimePerVMRequestVBlank[],
+ double TimePerVMRequestFlip[]);
+
+static void CalculateStutterEfficiency(
+ struct display_mode_lib *mode_lib,
+ int CompressedBufferSizeInkByte,
+ bool UnboundedRequestEnabled,
+ int ConfigReturnBufferSizeInKByte,
+ int MetaFIFOSizeInKEntries,
+ int ZeroSizeBufferEntries,
+ int NumberOfActivePlanes,
+ int ROBBufferSizeInKByte,
+ double TotalDataReadBandwidth,
+ double DCFCLK,
+ double ReturnBW,
+ double COMPBUF_RESERVED_SPACE_64B,
+ double COMPBUF_RESERVED_SPACE_ZS,
+ double SRExitTime,
+ double SRExitZ8Time,
+ bool SynchronizedVBlank,
+ double Z8StutterEnterPlusExitWatermark,
+ double StutterEnterPlusExitWatermark,
+ bool ProgressiveToInterlaceUnitInOPP,
+ bool Interlace[],
+ double MinTTUVBlank[],
+ int DPPPerPlane[],
+ unsigned int DETBufferSizeY[],
+ int BytePerPixelY[],
+ double BytePerPixelDETY[],
+ double SwathWidthY[],
+ int SwathHeightY[],
+ int SwathHeightC[],
+ double NetDCCRateLuma[],
+ double NetDCCRateChroma[],
+ double DCCFractionOfZeroSizeRequestsLuma[],
+ double DCCFractionOfZeroSizeRequestsChroma[],
+ int HTotal[],
+ int VTotal[],
+ double PixelClock[],
+ double VRatio[],
+ enum scan_direction_class SourceScan[],
+ int BlockHeight256BytesY[],
+ int BlockWidth256BytesY[],
+ int BlockHeight256BytesC[],
+ int BlockWidth256BytesC[],
+ int DCCYMaxUncompressedBlock[],
+ int DCCCMaxUncompressedBlock[],
+ int VActive[],
+ bool DCCEnable[],
+ bool WritebackEnable[],
+ double ReadBandwidthPlaneLuma[],
+ double ReadBandwidthPlaneChroma[],
+ double meta_row_bw[],
+ double dpte_row_bw[],
+ double *StutterEfficiencyNotIncludingVBlank,
+ double *StutterEfficiency,
+ int *NumberOfStutterBurstsPerFrame,
+ double *Z8StutterEfficiencyNotIncludingVBlank,
+ double *Z8StutterEfficiency,
+ int *Z8NumberOfStutterBurstsPerFrame,
+ double *StutterPeriod);
+
+static void CalculateSwathAndDETConfiguration(
+ bool ForceSingleDPP,
+ int NumberOfActivePlanes,
+ unsigned int DETBufferSizeInKByte,
+ double MaximumSwathWidthLuma[],
+ double MaximumSwathWidthChroma[],
+ enum scan_direction_class SourceScan[],
+ enum source_format_class SourcePixelFormat[],
+ enum dm_swizzle_mode SurfaceTiling[],
+ int ViewportWidth[],
+ int ViewportHeight[],
+ int SurfaceWidthY[],
+ int SurfaceWidthC[],
+ int SurfaceHeightY[],
+ int SurfaceHeightC[],
+ int Read256BytesBlockHeightY[],
+ int Read256BytesBlockHeightC[],
+ int Read256BytesBlockWidthY[],
+ int Read256BytesBlockWidthC[],
+ enum odm_combine_mode ODMCombineEnabled[],
+ int BlendingAndTiming[],
+ int BytePerPixY[],
+ int BytePerPixC[],
+ double BytePerPixDETY[],
+ double BytePerPixDETC[],
+ int HActive[],
+ double HRatio[],
+ double HRatioChroma[],
+ int DPPPerPlane[],
+ int swath_width_luma_ub[],
+ int swath_width_chroma_ub[],
+ double SwathWidth[],
+ double SwathWidthChroma[],
+ int SwathHeightY[],
+ int SwathHeightC[],
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ bool ViewportSizeSupportPerPlane[],
+ bool *ViewportSizeSupport);
+static void CalculateSwathWidth(
+ bool ForceSingleDPP,
+ int NumberOfActivePlanes,
+ enum source_format_class SourcePixelFormat[],
+ enum scan_direction_class SourceScan[],
+ int ViewportWidth[],
+ int ViewportHeight[],
+ int SurfaceWidthY[],
+ int SurfaceWidthC[],
+ int SurfaceHeightY[],
+ int SurfaceHeightC[],
+ enum odm_combine_mode ODMCombineEnabled[],
+ int BytePerPixY[],
+ int BytePerPixC[],
+ int Read256BytesBlockHeightY[],
+ int Read256BytesBlockHeightC[],
+ int Read256BytesBlockWidthY[],
+ int Read256BytesBlockWidthC[],
+ int BlendingAndTiming[],
+ int HActive[],
+ double HRatio[],
+ int DPPPerPlane[],
+ double SwathWidthSingleDPPY[],
+ double SwathWidthSingleDPPC[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ int MaximumSwathHeightY[],
+ int MaximumSwathHeightC[],
+ int swath_width_luma_ub[],
+ int swath_width_chroma_ub[]);
+
+static double CalculateExtraLatency(
+ int RoundTripPingLatencyCycles,
+ int ReorderingBytes,
+ double DCFCLK,
+ int TotalNumberOfActiveDPP,
+ int PixelChunkSizeInKByte,
+ int TotalNumberOfDCCActiveDPP,
+ int MetaChunkSize,
+ double ReturnBW,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ int NumberOfActivePlanes,
+ int NumberOfDPP[],
+ int dpte_group_bytes[],
+ double HostVMInefficiencyFactor,
+ double HostVMMinPageSize,
+ int HostVMMaxNonCachedPageTableLevels);
+
+static double CalculateExtraLatencyBytes(
+ int ReorderingBytes,
+ int TotalNumberOfActiveDPP,
+ int PixelChunkSizeInKByte,
+ int TotalNumberOfDCCActiveDPP,
+ int MetaChunkSize,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ int NumberOfActivePlanes,
+ int NumberOfDPP[],
+ int dpte_group_bytes[],
+ double HostVMInefficiencyFactor,
+ double HostVMMinPageSize,
+ int HostVMMaxNonCachedPageTableLevels);
+
+static double CalculateUrgentLatency(
+ double UrgentLatencyPixelDataOnly,
+ double UrgentLatencyPixelMixedWithVMData,
+ double UrgentLatencyVMDataOnly,
+ bool DoUrgentLatencyAdjustment,
+ double UrgentLatencyAdjustmentFabricClockComponent,
+ double UrgentLatencyAdjustmentFabricClockReference,
+ double FabricClockSingle);
+
+static void CalculateUnboundedRequestAndCompressedBufferSize(
+ unsigned int DETBufferSizeInKByte,
+ int ConfigReturnBufferSizeInKByte,
+ enum unbounded_requesting_policy UseUnboundedRequestingFinal,
+ int TotalActiveDPP,
+ bool NoChromaPlanes,
+ int MaxNumDPP,
+ int CompressedBufferSegmentSizeInkByteFinal,
+ enum output_encoder_class *Output,
+ bool *UnboundedRequestEnabled,
+ int *CompressedBufferSizeInkByte);
+
+static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
+static unsigned int CalculateMaxVStartup(
+ unsigned int VTotal,
+ unsigned int VActive,
+ unsigned int VBlankNom,
+ unsigned int HTotal,
+ double PixelClock,
+ bool ProgressiveTointerlaceUnitinOPP,
+ bool Interlace,
+ unsigned int VBlankNomDefaultUS,
+ double WritebackDelayTime);
+
+void dml314_recalculate(struct display_mode_lib *mode_lib)
+{
+ ModeSupportAndSystemConfiguration(mode_lib);
+ PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
+ DisplayPipeConfiguration(mode_lib);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
+#endif
+ DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
+}
+
+static unsigned int dscceComputeDelay(
+ unsigned int bpc,
+ double BPP,
+ unsigned int sliceWidth,
+ unsigned int numSlices,
+ enum output_format_class pixelFormat,
+ enum output_encoder_class Output)
+{
+ // valid bpc = source bits per component in the set of {8, 10, 12}
+ // valid bpp = increments of 1/16 of a bit
+ // min = 6/7/8 in N420/N422/444, respectively
+ // max = such that compression is 1:1
+ //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
+ //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
+ //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
+
+ // fixed value
+ unsigned int rcModelSize = 8192;
+
+ // N422/N420 operate at 2 pixels per clock
+ unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
+
+ if (pixelFormat == dm_420)
+ pixelsPerClock = 2;
+ else if (pixelFormat == dm_444)
+ pixelsPerClock = 1;
+ else if (pixelFormat == dm_n422)
+ pixelsPerClock = 2;
+ // #all other modes operate at 1 pixel per clock
+ else
+ pixelsPerClock = 1;
+
+ //initial transmit delay as per PPS
+ initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
+
+ //compute ssm delay
+ if (bpc == 8)
+ D = 81;
+ else if (bpc == 10)
+ D = 89;
+ else
+ D = 113;
+
+ //divide by pixel per cycle to compute slice width as seen by DSC
+ w = sliceWidth / pixelsPerClock;
+
+ //422 mode has an additional cycle of delay
+ if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
+ s = 0;
+ else
+ s = 1;
+
+ //main calculation for the dscce
+ ix = initalXmitDelay + 45;
+ wx = (w + 2) / 3;
+ P = 3 * wx - w;
+ l0 = ix / w;
+ a = ix + P * l0;
+ ax = (a + 2) / 3 + D + 6 + 1;
+ L = (ax + wx - 1) / wx;
+ if ((ix % w) == 0 && P != 0)
+ lstall = 1;
+ else
+ lstall = 0;
+ Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
+
+ //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
+ pixels = Delay * 3 * pixelsPerClock;
+ return pixels;
+}
+
+static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
+{
+ unsigned int Delay = 0;
+
+ if (pixelFormat == dm_420) {
+ // sfr
+ Delay = Delay + 2;
+ // dsccif
+ Delay = Delay + 0;
+ // dscc - input deserializer
+ Delay = Delay + 3;
+ // dscc gets pixels every other cycle
+ Delay = Delay + 2;
+ // dscc - input cdc fifo
+ Delay = Delay + 12;
+ // dscc gets pixels every other cycle
+ Delay = Delay + 13;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output cdc fifo
+ Delay = Delay + 7;
+ // dscc gets pixels every other cycle
+ Delay = Delay + 3;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output serializer
+ Delay = Delay + 1;
+ // sft
+ Delay = Delay + 1;
+ } else if (pixelFormat == dm_n422) {
+ // sfr
+ Delay = Delay + 2;
+ // dsccif
+ Delay = Delay + 1;
+ // dscc - input deserializer
+ Delay = Delay + 5;
+ // dscc - input cdc fifo
+ Delay = Delay + 25;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output cdc fifo
+ Delay = Delay + 10;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output serializer
+ Delay = Delay + 1;
+ // sft
+ Delay = Delay + 1;
+ } else {
+ // sfr
+ Delay = Delay + 2;
+ // dsccif
+ Delay = Delay + 0;
+ // dscc - input deserializer
+ Delay = Delay + 3;
+ // dscc - input cdc fifo
+ Delay = Delay + 12;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output cdc fifo
+ Delay = Delay + 7;
+ // dscc - output serializer
+ Delay = Delay + 1;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // sft
+ Delay = Delay + 1;
+ }
+
+ return Delay;
+}
+
+static bool CalculatePrefetchSchedule(
+ struct display_mode_lib *mode_lib,
+ double HostVMInefficiencyFactor,
+ Pipe *myPipe,
+ unsigned int DSCDelay,
+ double DPPCLKDelaySubtotalPlusCNVCFormater,
+ double DPPCLKDelaySCL,
+ double DPPCLKDelaySCLLBOnly,
+ double DPPCLKDelayCNVCCursor,
+ double DISPCLKDelaySubtotal,
+ unsigned int DPP_RECOUT_WIDTH,
+ enum output_format_class OutputFormat,
+ unsigned int MaxInterDCNTileRepeaters,
+ unsigned int VStartup,
+ unsigned int MaxVStartup,
+ unsigned int GPUVMPageTableLevels,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ double HostVMMinPageSize,
+ bool DynamicMetadataEnable,
+ bool DynamicMetadataVMEnabled,
+ int DynamicMetadataLinesBeforeActiveRequired,
+ unsigned int DynamicMetadataTransmittedBytes,
+ double UrgentLatency,
+ double UrgentExtraLatency,
+ double TCalc,
+ unsigned int PDEAndMetaPTEBytesFrame,
+ unsigned int MetaRowByte,
+ unsigned int PixelPTEBytesPerRow,
+ double PrefetchSourceLinesY,
+ unsigned int SwathWidthY,
+ double VInitPreFillY,
+ unsigned int MaxNumSwathY,
+ double PrefetchSourceLinesC,
+ unsigned int SwathWidthC,
+ double VInitPreFillC,
+ unsigned int MaxNumSwathC,
+ int swath_width_luma_ub,
+ int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double TWait,
+ double *DSTXAfterScaler,
+ double *DSTYAfterScaler,
+ double *DestinationLinesForPrefetch,
+ double *PrefetchBandwidth,
+ double *DestinationLinesToRequestVMInVBlank,
+ double *DestinationLinesToRequestRowInVBlank,
+ double *VRatioPrefetchY,
+ double *VRatioPrefetchC,
+ double *RequiredPrefetchPixDataBWLuma,
+ double *RequiredPrefetchPixDataBWChroma,
+ bool *NotEnoughTimeForDynamicMetadata,
+ double *Tno_bw,
+ double *prefetch_vmrow_bw,
+ double *Tdmdl_vm,
+ double *Tdmdl,
+ double *TSetup,
+ int *VUpdateOffsetPix,
+ double *VUpdateWidthPix,
+ double *VReadyOffsetPix)
+{
+ bool MyError = false;
+ unsigned int DPPCycles, DISPCLKCycles;
+ double DSTTotalPixelsAfterScaler;
+ double LineTime;
+ double dst_y_prefetch_equ;
+#ifdef __DML_VBA_DEBUG__
+ double Tsw_oto;
+#endif
+ double prefetch_bw_oto;
+ double prefetch_bw_pr;
+ double Tvm_oto;
+ double Tr0_oto;
+ double Tvm_oto_lines;
+ double Tr0_oto_lines;
+ double dst_y_prefetch_oto;
+ double TimeForFetchingMetaPTE = 0;
+ double TimeForFetchingRowInVBlank = 0;
+ double LinesToRequestPrefetchPixelData = 0;
+ unsigned int HostVMDynamicLevelsTrips;
+ double trip_to_mem;
+ double Tvm_trips;
+ double Tr0_trips;
+ double Tvm_trips_rounded;
+ double Tr0_trips_rounded;
+ double Lsw_oto;
+ double Tpre_rounded;
+ double prefetch_bw_equ;
+ double Tvm_equ;
+ double Tr0_equ;
+ double Tdmbf;
+ double Tdmec;
+ double Tdmsks;
+ double prefetch_sw_bytes;
+ double bytes_pp;
+ double dep_bytes;
+ int max_vratio_pre = 4;
+ double min_Lsw;
+ double Tsw_est1 = 0;
+ double Tsw_est3 = 0;
+ double max_Tsw = 0;
+
+ if (GPUVMEnable == true && HostVMEnable == true) {
+ HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
+ } else {
+ HostVMDynamicLevelsTrips = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
+#endif
+ CalculateVupdateAndDynamicMetadataParameters(
+ MaxInterDCNTileRepeaters,
+ myPipe->DPPCLK,
+ myPipe->DISPCLK,
+ myPipe->DCFCLKDeepSleep,
+ myPipe->PixelClock,
+ myPipe->HTotal,
+ myPipe->VBlank,
+ DynamicMetadataTransmittedBytes,
+ DynamicMetadataLinesBeforeActiveRequired,
+ myPipe->InterlaceEnable,
+ myPipe->ProgressiveToInterlaceUnitInOPP,
+ TSetup,
+ &Tdmbf,
+ &Tdmec,
+ &Tdmsks,
+ VUpdateOffsetPix,
+ VUpdateWidthPix,
+ VReadyOffsetPix);
+
+ LineTime = myPipe->HTotal / myPipe->PixelClock;
+ trip_to_mem = UrgentLatency;
+ Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
+
+#ifdef __DML_VBA_ALLOW_DELTA__
+ if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
+#else
+ if (DynamicMetadataVMEnabled == true) {
+#endif
+ *Tdmdl = TWait + Tvm_trips + trip_to_mem;
+ } else {
+ *Tdmdl = TWait + UrgentExtraLatency;
+ }
+
+#ifdef __DML_VBA_ALLOW_DELTA__
+ if (DynamicMetadataEnable == false) {
+ *Tdmdl = 0.0;
+ }
+#endif
+
+ if (DynamicMetadataEnable == true) {
+ if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
+ *NotEnoughTimeForDynamicMetadata = true;
+ dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
+ dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
+ dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
+ dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
+ dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
+ } else {
+ *NotEnoughTimeForDynamicMetadata = false;
+ }
+ } else {
+ *NotEnoughTimeForDynamicMetadata = false;
+ }
+
+ *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
+
+ if (myPipe->ScalerEnabled)
+ DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
+ else
+ DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
+
+ DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
+
+ DISPCLKCycles = DISPCLKDelaySubtotal;
+
+ if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
+ return true;
+
+ *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
+ dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
+ dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
+ dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
+ dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
+ dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
+ dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
+ dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
+#endif
+
+ *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
+
+ if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
+ *DSTYAfterScaler = 1;
+ else
+ *DSTYAfterScaler = 0;
+
+ DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
+ *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
+ *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
+#endif
+
+ MyError = false;
+
+ Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
+ Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
+ Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
+
+#ifdef __DML_VBA_ALLOW_DELTA__
+ if (!myPipe->DCCEnable) {
+ Tr0_trips = 0.0;
+ Tr0_trips_rounded = 0.0;
+ }
+#endif
+
+ if (!GPUVMEnable) {
+ Tvm_trips = 0.0;
+ Tvm_trips_rounded = 0.0;
+ }
+
+ if (GPUVMEnable) {
+ if (GPUVMPageTableLevels >= 3) {
+ *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
+ } else {
+ *Tno_bw = 0;
+ }
+ } else if (!myPipe->DCCEnable) {
+ *Tno_bw = LineTime;
+ } else {
+ *Tno_bw = LineTime / 4;
+ }
+
+ if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
+ bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
+ else
+ bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
+ /*rev 99*/
+ prefetch_bw_pr = bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane;
+ prefetch_bw_pr = dml_min(1, myPipe->VRatio) * prefetch_bw_pr;
+ max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
+ prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
+ prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw);
+
+ min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre);
+ Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
+#ifdef __DML_VBA_DEBUG__
+ Tsw_oto = Lsw_oto * LineTime;
+#endif
+
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML: HTotal: %d\n", myPipe->HTotal);
+ dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
+ dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
+ dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
+ dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
+ dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
+#endif
+
+ if (GPUVMEnable == true)
+ Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0);
+ else
+ Tvm_oto = LineTime / 4.0;
+
+ if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
+ Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term)
+ LineTime - Tvm_oto,
+ LineTime / 4);
+ } else {
+ Tr0_oto = (LineTime - Tvm_oto) / 2.0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
+ dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
+ dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
+ dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
+ dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
+ dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
+ dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
+ dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
+#endif
+
+ Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
+ Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
+ dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
+ dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
+ dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
+ dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
+ Tpre_rounded = dst_y_prefetch_equ * LineTime;
+
+ dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
+
+ if (prefetch_sw_bytes < dep_bytes)
+ prefetch_sw_bytes = 2 * dep_bytes;
+
+ dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
+ dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
+ dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
+ dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
+ dml_print("DML: LineTime: %f\n", LineTime);
+ dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
+
+ dml_print("DML: LineTime: %f\n", LineTime);
+ dml_print("DML: VStartup: %d\n", VStartup);
+ dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
+ dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
+ dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
+ dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
+ dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
+ dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
+ dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
+ dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd\n", *Tdmdl_vm);
+ dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", *Tdmdl);
+ dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler\n", *DSTXAfterScaler);
+ dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler\n", *DSTYAfterScaler);
+
+ *PrefetchBandwidth = 0;
+ *DestinationLinesToRequestVMInVBlank = 0;
+ *DestinationLinesToRequestRowInVBlank = 0;
+ *VRatioPrefetchY = 0;
+ *VRatioPrefetchC = 0;
+ *RequiredPrefetchPixDataBWLuma = 0;
+ if (dst_y_prefetch_equ > 1) {
+ double PrefetchBandwidth1;
+ double PrefetchBandwidth2;
+ double PrefetchBandwidth3;
+ double PrefetchBandwidth4;
+
+ if (Tpre_rounded - *Tno_bw > 0) {
+ PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
+ + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
+ Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
+ } else {
+ PrefetchBandwidth1 = 0;
+ }
+
+ if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
+ PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
+ / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
+ }
+
+ if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
+ PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
+ else
+ PrefetchBandwidth2 = 0;
+
+ if (Tpre_rounded - Tvm_trips_rounded > 0) {
+ PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
+ + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
+ Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
+ } else {
+ PrefetchBandwidth3 = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
+ dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
+ dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
+#endif
+ if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
+ PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
+ / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
+ }
+
+ if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
+ PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
+ else
+ PrefetchBandwidth4 = 0;
+
+ {
+ bool Case1OK;
+ bool Case2OK;
+ bool Case3OK;
+
+ if (PrefetchBandwidth1 > 0) {
+ if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
+ && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
+ Case1OK = true;
+ } else {
+ Case1OK = false;
+ }
+ } else {
+ Case1OK = false;
+ }
+
+ if (PrefetchBandwidth2 > 0) {
+ if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
+ && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
+ Case2OK = true;
+ } else {
+ Case2OK = false;
+ }
+ } else {
+ Case2OK = false;
+ }
+
+ if (PrefetchBandwidth3 > 0) {
+ if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
+ && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
+ Case3OK = true;
+ } else {
+ Case3OK = false;
+ }
+ } else {
+ Case3OK = false;
+ }
+
+ if (Case1OK) {
+ prefetch_bw_equ = PrefetchBandwidth1;
+ } else if (Case2OK) {
+ prefetch_bw_equ = PrefetchBandwidth2;
+ } else if (Case3OK) {
+ prefetch_bw_equ = PrefetchBandwidth3;
+ } else {
+ prefetch_bw_equ = PrefetchBandwidth4;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
+ dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
+ dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
+ dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
+#endif
+
+ if (prefetch_bw_equ > 0) {
+ if (GPUVMEnable == true) {
+ Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
+ } else {
+ Tvm_equ = LineTime / 4;
+ }
+
+ if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
+ Tr0_equ = dml_max4(
+ (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
+ Tr0_trips,
+ (LineTime - Tvm_equ) / 2,
+ LineTime / 4);
+ } else {
+ Tr0_equ = (LineTime - Tvm_equ) / 2;
+ }
+ } else {
+ Tvm_equ = 0;
+ Tr0_equ = 0;
+ dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
+ }
+ }
+
+ if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
+ *DestinationLinesForPrefetch = dst_y_prefetch_oto;
+ TimeForFetchingMetaPTE = Tvm_oto;
+ TimeForFetchingRowInVBlank = Tr0_oto;
+ *PrefetchBandwidth = prefetch_bw_oto;
+ } else {
+ *DestinationLinesForPrefetch = dst_y_prefetch_equ;
+ TimeForFetchingMetaPTE = Tvm_equ;
+ TimeForFetchingRowInVBlank = Tr0_equ;
+ *PrefetchBandwidth = prefetch_bw_equ;
+ }
+
+ *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
+
+ *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
+
+#ifdef __DML_VBA_ALLOW_DELTA__
+ LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
+ // See note above dated 5/30/2018
+ // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
+ - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
+#else
+ LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
+#endif
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
+ dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
+ dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
+ dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
+ dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
+ dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
+#endif
+
+ if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
+
+ *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
+ *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
+ dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
+ dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
+#endif
+ if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
+ if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
+ *VRatioPrefetchY = dml_max(
+ (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
+ (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
+ *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
+ } else {
+ MyError = true;
+ dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
+ *VRatioPrefetchY = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
+ dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
+ dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
+#endif
+ }
+
+ *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
+ *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
+ dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
+ dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
+#endif
+ if ((SwathHeightC > 4) || VInitPreFillC > 3) {
+ if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
+ *VRatioPrefetchC = dml_max(
+ *VRatioPrefetchC,
+ (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
+ *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
+ } else {
+ MyError = true;
+ dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
+ *VRatioPrefetchC = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
+ dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
+ dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
+#endif
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
+ dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
+#endif
+
+ *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
+#endif
+
+ *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
+ / LineTime;
+ } else {
+ MyError = true;
+ dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
+ dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
+ *VRatioPrefetchY = 0;
+ *VRatioPrefetchC = 0;
+ *RequiredPrefetchPixDataBWLuma = 0;
+ *RequiredPrefetchPixDataBWChroma = 0;
+ }
+
+ dml_print(
+ "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
+ (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
+ dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
+ dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
+ dml_print(
+ "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
+ (double) LinesToRequestPrefetchPixelData * LineTime);
+ dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
+ dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
+ dml_print(
+ "DML: Tslack(pre): %fus - time left over in schedule\n",
+ VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
+ - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
+ dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
+
+ } else {
+ MyError = true;
+ dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
+ }
+
+ {
+ double prefetch_vm_bw;
+ double prefetch_row_bw;
+
+ if (PDEAndMetaPTEBytesFrame == 0) {
+ prefetch_vm_bw = 0;
+ } else if (*DestinationLinesToRequestVMInVBlank > 0) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
+ dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
+ dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
+#endif
+ prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
+#endif
+ } else {
+ prefetch_vm_bw = 0;
+ MyError = true;
+ dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
+ }
+
+ if (MetaRowByte + PixelPTEBytesPerRow == 0) {
+ prefetch_row_bw = 0;
+ } else if (*DestinationLinesToRequestRowInVBlank > 0) {
+ prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
+ dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
+ dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
+ dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
+#endif
+ } else {
+ prefetch_row_bw = 0;
+ MyError = true;
+ dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
+ }
+
+ *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
+ }
+
+ if (MyError) {
+ *PrefetchBandwidth = 0;
+ *DestinationLinesToRequestVMInVBlank = 0;
+ *DestinationLinesToRequestRowInVBlank = 0;
+ *DestinationLinesForPrefetch = 0;
+ *VRatioPrefetchY = 0;
+ *VRatioPrefetchC = 0;
+ *RequiredPrefetchPixDataBWLuma = 0;
+ *RequiredPrefetchPixDataBWChroma = 0;
+ }
+
+ return MyError;
+}
+
+static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
+{
+ return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
+}
+
+static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
+{
+ return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
+}
+
+static void CalculateDCCConfiguration(
+ bool DCCEnabled,
+ bool DCCProgrammingAssumesScanDirectionUnknown,
+ enum source_format_class SourcePixelFormat,
+ unsigned int SurfaceWidthLuma,
+ unsigned int SurfaceWidthChroma,
+ unsigned int SurfaceHeightLuma,
+ unsigned int SurfaceHeightChroma,
+ double DETBufferSize,
+ unsigned int RequestHeight256ByteLuma,
+ unsigned int RequestHeight256ByteChroma,
+ enum dm_swizzle_mode TilingFormat,
+ unsigned int BytePerPixelY,
+ unsigned int BytePerPixelC,
+ double BytePerPixelDETY,
+ double BytePerPixelDETC,
+ enum scan_direction_class ScanOrientation,
+ unsigned int *MaxUncompressedBlockLuma,
+ unsigned int *MaxUncompressedBlockChroma,
+ unsigned int *MaxCompressedBlockLuma,
+ unsigned int *MaxCompressedBlockChroma,
+ unsigned int *IndependentBlockLuma,
+ unsigned int *IndependentBlockChroma)
+{
+ int yuv420;
+ int horz_div_l;
+ int horz_div_c;
+ int vert_div_l;
+ int vert_div_c;
+
+ int swath_buf_size;
+ double detile_buf_vp_horz_limit;
+ double detile_buf_vp_vert_limit;
+
+ int MAS_vp_horz_limit;
+ int MAS_vp_vert_limit;
+ int max_vp_horz_width;
+ int max_vp_vert_height;
+ int eff_surf_width_l;
+ int eff_surf_width_c;
+ int eff_surf_height_l;
+ int eff_surf_height_c;
+
+ int full_swath_bytes_horz_wc_l;
+ int full_swath_bytes_horz_wc_c;
+ int full_swath_bytes_vert_wc_l;
+ int full_swath_bytes_vert_wc_c;
+ int req128_horz_wc_l;
+ int req128_horz_wc_c;
+ int req128_vert_wc_l;
+ int req128_vert_wc_c;
+ int segment_order_horz_contiguous_luma;
+ int segment_order_horz_contiguous_chroma;
+ int segment_order_vert_contiguous_luma;
+ int segment_order_vert_contiguous_chroma;
+
+ typedef enum {
+ REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
+ } RequestType;
+ RequestType RequestLuma;
+ RequestType RequestChroma;
+
+ yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
+ horz_div_l = 1;
+ horz_div_c = 1;
+ vert_div_l = 1;
+ vert_div_c = 1;
+
+ if (BytePerPixelY == 1)
+ vert_div_l = 0;
+ if (BytePerPixelC == 1)
+ vert_div_c = 0;
+ if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
+ horz_div_l = 0;
+ if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
+ horz_div_c = 0;
+
+ if (BytePerPixelC == 0) {
+ swath_buf_size = DETBufferSize / 2 - 2 * 256;
+ detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
+ detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
+ } else {
+ swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
+ detile_buf_vp_horz_limit = (double) swath_buf_size
+ / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
+ + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
+ detile_buf_vp_vert_limit = (double) swath_buf_size
+ / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
+ }
+
+ if (SourcePixelFormat == dm_420_10) {
+ detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
+ detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
+ }
+
+ detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
+ detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
+
+ MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
+ MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
+ max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
+ max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
+ eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
+ eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
+ eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
+ eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
+
+ full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
+ full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
+ if (BytePerPixelC > 0) {
+ full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
+ full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
+ } else {
+ full_swath_bytes_horz_wc_c = 0;
+ full_swath_bytes_vert_wc_c = 0;
+ }
+
+ if (SourcePixelFormat == dm_420_10) {
+ full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
+ full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
+ full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
+ full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
+ }
+
+ if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
+ req128_horz_wc_l = 0;
+ req128_horz_wc_c = 0;
+ } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
+ req128_horz_wc_l = 0;
+ req128_horz_wc_c = 1;
+ } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
+ req128_horz_wc_l = 1;
+ req128_horz_wc_c = 0;
+ } else {
+ req128_horz_wc_l = 1;
+ req128_horz_wc_c = 1;
+ }
+
+ if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
+ req128_vert_wc_l = 0;
+ req128_vert_wc_c = 0;
+ } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
+ req128_vert_wc_l = 0;
+ req128_vert_wc_c = 1;
+ } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
+ req128_vert_wc_l = 1;
+ req128_vert_wc_c = 0;
+ } else {
+ req128_vert_wc_l = 1;
+ req128_vert_wc_c = 1;
+ }
+
+ if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
+ segment_order_horz_contiguous_luma = 0;
+ } else {
+ segment_order_horz_contiguous_luma = 1;
+ }
+ if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
+ || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
+ segment_order_vert_contiguous_luma = 0;
+ } else {
+ segment_order_vert_contiguous_luma = 1;
+ }
+ if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
+ segment_order_horz_contiguous_chroma = 0;
+ } else {
+ segment_order_horz_contiguous_chroma = 1;
+ }
+ if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
+ || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
+ segment_order_vert_contiguous_chroma = 0;
+ } else {
+ segment_order_vert_contiguous_chroma = 1;
+ }
+
+ if (DCCProgrammingAssumesScanDirectionUnknown == true) {
+ if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
+ RequestLuma = REQ_256Bytes;
+ } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
+ RequestLuma = REQ_128BytesNonContiguous;
+ } else {
+ RequestLuma = REQ_128BytesContiguous;
+ }
+ if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
+ RequestChroma = REQ_256Bytes;
+ } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
+ RequestChroma = REQ_128BytesNonContiguous;
+ } else {
+ RequestChroma = REQ_128BytesContiguous;
+ }
+ } else if (ScanOrientation != dm_vert) {
+ if (req128_horz_wc_l == 0) {
+ RequestLuma = REQ_256Bytes;
+ } else if (segment_order_horz_contiguous_luma == 0) {
+ RequestLuma = REQ_128BytesNonContiguous;
+ } else {
+ RequestLuma = REQ_128BytesContiguous;
+ }
+ if (req128_horz_wc_c == 0) {
+ RequestChroma = REQ_256Bytes;
+ } else if (segment_order_horz_contiguous_chroma == 0) {
+ RequestChroma = REQ_128BytesNonContiguous;
+ } else {
+ RequestChroma = REQ_128BytesContiguous;
+ }
+ } else {
+ if (req128_vert_wc_l == 0) {
+ RequestLuma = REQ_256Bytes;
+ } else if (segment_order_vert_contiguous_luma == 0) {
+ RequestLuma = REQ_128BytesNonContiguous;
+ } else {
+ RequestLuma = REQ_128BytesContiguous;
+ }
+ if (req128_vert_wc_c == 0) {
+ RequestChroma = REQ_256Bytes;
+ } else if (segment_order_vert_contiguous_chroma == 0) {
+ RequestChroma = REQ_128BytesNonContiguous;
+ } else {
+ RequestChroma = REQ_128BytesContiguous;
+ }
+ }
+
+ if (RequestLuma == REQ_256Bytes) {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 256;
+ *IndependentBlockLuma = 0;
+ } else if (RequestLuma == REQ_128BytesContiguous) {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 128;
+ *IndependentBlockLuma = 128;
+ } else {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 64;
+ *IndependentBlockLuma = 64;
+ }
+
+ if (RequestChroma == REQ_256Bytes) {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 256;
+ *IndependentBlockChroma = 0;
+ } else if (RequestChroma == REQ_128BytesContiguous) {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 128;
+ *IndependentBlockChroma = 128;
+ } else {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 64;
+ *IndependentBlockChroma = 64;
+ }
+
+ if (DCCEnabled != true || BytePerPixelC == 0) {
+ *MaxUncompressedBlockChroma = 0;
+ *MaxCompressedBlockChroma = 0;
+ *IndependentBlockChroma = 0;
+ }
+
+ if (DCCEnabled != true) {
+ *MaxUncompressedBlockLuma = 0;
+ *MaxCompressedBlockLuma = 0;
+ *IndependentBlockLuma = 0;
+ }
+}
+
+static double CalculatePrefetchSourceLines(
+ struct display_mode_lib *mode_lib,
+ double VRatio,
+ double vtaps,
+ bool Interlace,
+ bool ProgressiveToInterlaceUnitInOPP,
+ unsigned int SwathHeight,
+ unsigned int ViewportYStart,
+ double *VInitPreFill,
+ unsigned int *MaxNumSwath)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ unsigned int MaxPartialSwath;
+
+ if (ProgressiveToInterlaceUnitInOPP)
+ *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
+ else
+ *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
+
+ if (!v->IgnoreViewportPositioning) {
+
+ *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
+
+ if (*VInitPreFill > 1.0)
+ MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
+ else
+ MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
+ MaxPartialSwath = dml_max(1U, MaxPartialSwath);
+
+ } else {
+
+ if (ViewportYStart != 0)
+ dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
+
+ *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
+
+ if (*VInitPreFill > 1.0)
+ MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
+ else
+ MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
+ dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
+ dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
+ dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
+ dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
+ dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
+ dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
+ dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
+ dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
+#endif
+ return *MaxNumSwath * SwathHeight + MaxPartialSwath;
+}
+
+static unsigned int CalculateVMAndRowBytes(
+ struct display_mode_lib *mode_lib,
+ bool DCCEnable,
+ unsigned int BlockHeight256Bytes,
+ unsigned int BlockWidth256Bytes,
+ enum source_format_class SourcePixelFormat,
+ unsigned int SurfaceTiling,
+ unsigned int BytePerPixel,
+ enum scan_direction_class ScanDirection,
+ unsigned int SwathWidth,
+ unsigned int ViewportHeight,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ unsigned int GPUVMMinPageSize,
+ unsigned int HostVMMinPageSize,
+ unsigned int PTEBufferSizeInRequests,
+ unsigned int Pitch,
+ unsigned int DCCMetaPitch,
+ unsigned int *MacroTileWidth,
+ unsigned int *MetaRowByte,
+ unsigned int *PixelPTEBytesPerRow,
+ bool *PTEBufferSizeNotExceeded,
+ int *dpte_row_width_ub,
+ unsigned int *dpte_row_height,
+ unsigned int *MetaRequestWidth,
+ unsigned int *MetaRequestHeight,
+ unsigned int *meta_row_width,
+ unsigned int *meta_row_height,
+ int *vm_group_bytes,
+ unsigned int *dpte_group_bytes,
+ unsigned int *PixelPTEReqWidth,
+ unsigned int *PixelPTEReqHeight,
+ unsigned int *PTERequestSize,
+ int *DPDE0BytesFrame,
+ int *MetaPTEBytesFrame)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ unsigned int MPDEBytesFrame;
+ unsigned int DCCMetaSurfaceBytes;
+ unsigned int MacroTileSizeBytes;
+ unsigned int MacroTileHeight;
+ unsigned int ExtraDPDEBytesFrame;
+ unsigned int PDEAndMetaPTEBytesFrame;
+ unsigned int PixelPTEReqHeightPTEs = 0;
+ unsigned int HostVMDynamicLevels = 0;
+ double FractionOfPTEReturnDrop;
+
+ if (GPUVMEnable == true && HostVMEnable == true) {
+ if (HostVMMinPageSize < 2048) {
+ HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
+ } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
+ } else {
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
+ }
+ }
+
+ *MetaRequestHeight = 8 * BlockHeight256Bytes;
+ *MetaRequestWidth = 8 * BlockWidth256Bytes;
+ if (ScanDirection != dm_vert) {
+ *meta_row_height = *MetaRequestHeight;
+ *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
+ *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
+ } else {
+ *meta_row_height = *MetaRequestWidth;
+ *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
+ *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
+ }
+ DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
+ if (GPUVMEnable == true) {
+ *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
+ MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
+ } else {
+ *MetaPTEBytesFrame = 0;
+ MPDEBytesFrame = 0;
+ }
+
+ if (DCCEnable != true) {
+ *MetaPTEBytesFrame = 0;
+ MPDEBytesFrame = 0;
+ *MetaRowByte = 0;
+ }
+
+ if (SurfaceTiling == dm_sw_linear) {
+ MacroTileSizeBytes = 256;
+ MacroTileHeight = BlockHeight256Bytes;
+ } else {
+ MacroTileSizeBytes = 65536;
+ MacroTileHeight = 16 * BlockHeight256Bytes;
+ }
+ *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
+
+ if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
+ if (ScanDirection != dm_vert) {
+ *DPDE0BytesFrame = 64
+ * (dml_ceil(
+ ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
+ / (8 * 2097152),
+ 1) + 1);
+ } else {
+ *DPDE0BytesFrame = 64
+ * (dml_ceil(
+ ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
+ / (8 * 2097152),
+ 1) + 1);
+ }
+ ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
+ } else {
+ *DPDE0BytesFrame = 0;
+ ExtraDPDEBytesFrame = 0;
+ }
+
+ PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
+ dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
+ dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
+ dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
+#endif
+
+ if (HostVMEnable == true) {
+ PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
+#endif
+
+ if (SurfaceTiling == dm_sw_linear) {
+ PixelPTEReqHeightPTEs = 1;
+ *PixelPTEReqHeight = 1;
+ *PixelPTEReqWidth = 32768.0 / BytePerPixel;
+ *PTERequestSize = 64;
+ FractionOfPTEReturnDrop = 0;
+ } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
+ PixelPTEReqHeightPTEs = 16;
+ *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
+ *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
+ *PTERequestSize = 128;
+ FractionOfPTEReturnDrop = 0;
+ } else {
+ PixelPTEReqHeightPTEs = 1;
+ *PixelPTEReqHeight = MacroTileHeight;
+ *PixelPTEReqWidth = 8 * *MacroTileWidth;
+ *PTERequestSize = 64;
+ FractionOfPTEReturnDrop = 0;
+ }
+
+ if (SurfaceTiling == dm_sw_linear) {
+ *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
+ *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
+ *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
+ } else if (ScanDirection != dm_vert) {
+ *dpte_row_height = *PixelPTEReqHeight;
+ *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
+ *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
+ } else {
+ *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
+ *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
+ *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
+ }
+
+ if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
+ *PTEBufferSizeNotExceeded = true;
+ } else {
+ *PTEBufferSizeNotExceeded = false;
+ }
+
+ if (GPUVMEnable != true) {
+ *PixelPTEBytesPerRow = 0;
+ *PTEBufferSizeNotExceeded = true;
+ }
+
+ dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
+
+ if (HostVMEnable == true) {
+ *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
+ }
+
+ if (HostVMEnable == true) {
+ *vm_group_bytes = 512;
+ *dpte_group_bytes = 512;
+ } else if (GPUVMEnable == true) {
+ *vm_group_bytes = 2048;
+ if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
+ *dpte_group_bytes = 512;
+ } else {
+ *dpte_group_bytes = 2048;
+ }
+ } else {
+ *vm_group_bytes = 0;
+ *dpte_group_bytes = 0;
+ }
+ return PDEAndMetaPTEBytesFrame;
+}
+
+static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ unsigned int j, k;
+ double HostVMInefficiencyFactor = 1.0;
+ bool NoChromaPlanes = true;
+ int ReorderBytes;
+ double VMDataOnlyReturnBW;
+ double MaxTotalRDBandwidth = 0;
+ int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
+
+ v->WritebackDISPCLK = 0.0;
+ v->DISPCLKWithRamping = 0;
+ v->DISPCLKWithoutRamping = 0;
+ v->GlobalDPPCLK = 0.0;
+ /* DAL custom code: need to update ReturnBW in case min dcfclk is overridden */
+ {
+ double IdealFabricAndSDPPortBandwidthPerState = dml_min(
+ v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
+ v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
+ double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
+
+ if (v->HostVMEnable != true) {
+ v->ReturnBW = dml_min(
+ IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
+ IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
+ } else {
+ v->ReturnBW = dml_min(
+ IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
+ IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
+ }
+ }
+ /* End DAL custom code */
+
+ // DISPCLK and DPPCLK Calculation
+ //
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->WritebackEnable[k]) {
+ v->WritebackDISPCLK = dml_max(
+ v->WritebackDISPCLK,
+ dml314_CalculateWriteBackDISPCLK(
+ v->WritebackPixelFormat[k],
+ v->PixelClock[k],
+ v->WritebackHRatio[k],
+ v->WritebackVRatio[k],
+ v->WritebackHTaps[k],
+ v->WritebackVTaps[k],
+ v->WritebackSourceWidth[k],
+ v->WritebackDestinationWidth[k],
+ v->HTotal[k],
+ v->WritebackLineBufferSize));
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->HRatio[k] > 1) {
+ v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
+ v->MaxDCHUBToPSCLThroughput,
+ v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
+ } else {
+ v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
+ }
+
+ v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
+ * dml_max(
+ v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
+ dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
+
+ if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
+ v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
+ }
+
+ if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
+ && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
+ v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
+ v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
+ } else {
+ if (v->HRatioChroma[k] > 1) {
+ v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
+ v->MaxDCHUBToPSCLThroughput,
+ v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
+ } else {
+ v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
+ }
+ v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
+ * dml_max3(
+ v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
+ v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
+ 1.0);
+
+ if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
+ v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
+ }
+
+ v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma);
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->BlendingAndTiming[k] != k)
+ continue;
+ if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
+ v->DISPCLKWithRamping = dml_max(
+ v->DISPCLKWithRamping,
+ v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
+ * (1 + v->DISPCLKRampingMargin / 100));
+ v->DISPCLKWithoutRamping = dml_max(
+ v->DISPCLKWithoutRamping,
+ v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
+ } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
+ v->DISPCLKWithRamping = dml_max(
+ v->DISPCLKWithRamping,
+ v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
+ * (1 + v->DISPCLKRampingMargin / 100));
+ v->DISPCLKWithoutRamping = dml_max(
+ v->DISPCLKWithoutRamping,
+ v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
+ } else {
+ v->DISPCLKWithRamping = dml_max(
+ v->DISPCLKWithRamping,
+ v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
+ v->DISPCLKWithoutRamping = dml_max(
+ v->DISPCLKWithoutRamping,
+ v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
+ }
+ }
+
+ v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK);
+ v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK);
+
+ ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
+ v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed);
+ v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed);
+ v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
+ v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
+ v->DISPCLKDPPCLKVCOSpeed);
+ if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
+ v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
+ } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
+ v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
+ } else {
+ v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
+ }
+ v->DISPCLK = v->DISPCLK_calculated;
+ DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
+ v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]);
+ }
+ v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed);
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1);
+ DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->DPPCLK[k] = v->DPPCLK_calculated[k];
+ }
+
+ // Urgent and B P-State/DRAM Clock Change Watermark
+ DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
+ DTRACE(" return_bus_bw = %f", v->ReturnBW);
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ CalculateBytePerPixelAnd256BBlockSizes(
+ v->SourcePixelFormat[k],
+ v->SurfaceTiling[k],
+ &v->BytePerPixelY[k],
+ &v->BytePerPixelC[k],
+ &v->BytePerPixelDETY[k],
+ &v->BytePerPixelDETC[k],
+ &v->BlockHeight256BytesY[k],
+ &v->BlockHeight256BytesC[k],
+ &v->BlockWidth256BytesY[k],
+ &v->BlockWidth256BytesC[k]);
+ }
+
+ CalculateSwathWidth(
+ false,
+ v->NumberOfActivePlanes,
+ v->SourcePixelFormat,
+ v->SourceScan,
+ v->ViewportWidth,
+ v->ViewportHeight,
+ v->SurfaceWidthY,
+ v->SurfaceWidthC,
+ v->SurfaceHeightY,
+ v->SurfaceHeightC,
+ v->ODMCombineEnabled,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ v->BlockHeight256BytesY,
+ v->BlockHeight256BytesC,
+ v->BlockWidth256BytesY,
+ v->BlockWidth256BytesC,
+ v->BlendingAndTiming,
+ v->HActive,
+ v->HRatio,
+ v->DPPPerPlane,
+ v->SwathWidthSingleDPPY,
+ v->SwathWidthSingleDPPC,
+ v->SwathWidthY,
+ v->SwathWidthC,
+ v->dummyinteger3,
+ v->dummyinteger4,
+ v->swath_width_luma_ub,
+ v->swath_width_chroma_ub);
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
+ * v->VRatio[k];
+ v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
+ * v->VRatioChroma[k];
+ DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
+ }
+
+ // DCFCLK Deep Sleep
+ CalculateDCFCLKDeepSleep(
+ mode_lib,
+ v->NumberOfActivePlanes,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ v->VRatio,
+ v->VRatioChroma,
+ v->SwathWidthY,
+ v->SwathWidthC,
+ v->DPPPerPlane,
+ v->HRatio,
+ v->HRatioChroma,
+ v->PixelClock,
+ v->PSCL_THROUGHPUT_LUMA,
+ v->PSCL_THROUGHPUT_CHROMA,
+ v->DPPCLK,
+ v->ReadBandwidthPlaneLuma,
+ v->ReadBandwidthPlaneChroma,
+ v->ReturnBusWidth,
+ &v->DCFCLKDeepSleep);
+
+ // DSCCLK
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
+ v->DSCCLK_calculated[k] = 0.0;
+ } else {
+ if (v->OutputFormat[k] == dm_420)
+ v->DSCFormatFactor = 2;
+ else if (v->OutputFormat[k] == dm_444)
+ v->DSCFormatFactor = 1;
+ else if (v->OutputFormat[k] == dm_n422)
+ v->DSCFormatFactor = 2;
+ else
+ v->DSCFormatFactor = 1;
+ if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
+ v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
+ / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
+ else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
+ v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
+ / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
+ else
+ v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
+ / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
+ }
+ }
+
+ // DSC Delay
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ double BPP = v->OutputBpp[k];
+
+ if (v->DSCEnabled[k] && BPP != 0) {
+ if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
+ v->DSCDelay[k] = dscceComputeDelay(
+ v->DSCInputBitPerComponent[k],
+ BPP,
+ dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
+ v->NumberOfDSCSlices[k],
+ v->OutputFormat[k],
+ v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
+ } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
+ v->DSCDelay[k] = 2
+ * (dscceComputeDelay(
+ v->DSCInputBitPerComponent[k],
+ BPP,
+ dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
+ v->NumberOfDSCSlices[k] / 2.0,
+ v->OutputFormat[k],
+ v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
+ } else {
+ v->DSCDelay[k] = 4
+ * (dscceComputeDelay(
+ v->DSCInputBitPerComponent[k],
+ BPP,
+ dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
+ v->NumberOfDSCSlices[k] / 4.0,
+ v->OutputFormat[k],
+ v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
+ }
+ v->DSCDelay[k] = v->DSCDelay[k] + (v->HTotal[k] - v->HActive[k]) * dml_ceil((double) v->DSCDelay[k] / v->HActive[k], 1);
+ v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
+ } else {
+ v->DSCDelay[k] = 0;
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k)
+ for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
+ if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
+ v->DSCDelay[k] = v->DSCDelay[j];
+
+ // Prefetch
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ unsigned int PDEAndMetaPTEBytesFrameY;
+ unsigned int PixelPTEBytesPerRowY;
+ unsigned int MetaRowByteY;
+ unsigned int MetaRowByteC;
+ unsigned int PDEAndMetaPTEBytesFrameC;
+ unsigned int PixelPTEBytesPerRowC;
+ bool PTEBufferSizeNotExceededY;
+ bool PTEBufferSizeNotExceededC;
+
+ if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
+ || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
+ if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
+ v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
+ v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
+ } else {
+ v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
+ v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
+ }
+
+ PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
+ mode_lib,
+ v->DCCEnable[k],
+ v->BlockHeight256BytesC[k],
+ v->BlockWidth256BytesC[k],
+ v->SourcePixelFormat[k],
+ v->SurfaceTiling[k],
+ v->BytePerPixelC[k],
+ v->SourceScan[k],
+ v->SwathWidthC[k],
+ v->ViewportHeightChroma[k],
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->HostVMMaxNonCachedPageTableLevels,
+ v->GPUVMMinPageSize,
+ v->HostVMMinPageSize,
+ v->PTEBufferSizeInRequestsForChroma,
+ v->PitchC[k],
+ v->DCCMetaPitchC[k],
+ &v->MacroTileWidthC[k],
+ &MetaRowByteC,
+ &PixelPTEBytesPerRowC,
+ &PTEBufferSizeNotExceededC,
+ &v->dpte_row_width_chroma_ub[k],
+ &v->dpte_row_height_chroma[k],
+ &v->meta_req_width_chroma[k],
+ &v->meta_req_height_chroma[k],
+ &v->meta_row_width_chroma[k],
+ &v->meta_row_height_chroma[k],
+ &v->dummyinteger1,
+ &v->dummyinteger2,
+ &v->PixelPTEReqWidthC[k],
+ &v->PixelPTEReqHeightC[k],
+ &v->PTERequestSizeC[k],
+ &v->dpde0_bytes_per_frame_ub_c[k],
+ &v->meta_pte_bytes_per_frame_ub_c[k]);
+
+ v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
+ mode_lib,
+ v->VRatioChroma[k],
+ v->VTAPsChroma[k],
+ v->Interlace[k],
+ v->ProgressiveToInterlaceUnitInOPP,
+ v->SwathHeightC[k],
+ v->ViewportYStartC[k],
+ &v->VInitPreFillC[k],
+ &v->MaxNumSwathC[k]);
+ } else {
+ v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
+ v->PTEBufferSizeInRequestsForChroma = 0;
+ PixelPTEBytesPerRowC = 0;
+ PDEAndMetaPTEBytesFrameC = 0;
+ MetaRowByteC = 0;
+ v->MaxNumSwathC[k] = 0;
+ v->PrefetchSourceLinesC[k] = 0;
+ }
+
+ PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
+ mode_lib,
+ v->DCCEnable[k],
+ v->BlockHeight256BytesY[k],
+ v->BlockWidth256BytesY[k],
+ v->SourcePixelFormat[k],
+ v->SurfaceTiling[k],
+ v->BytePerPixelY[k],
+ v->SourceScan[k],
+ v->SwathWidthY[k],
+ v->ViewportHeight[k],
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->HostVMMaxNonCachedPageTableLevels,
+ v->GPUVMMinPageSize,
+ v->HostVMMinPageSize,
+ v->PTEBufferSizeInRequestsForLuma,
+ v->PitchY[k],
+ v->DCCMetaPitchY[k],
+ &v->MacroTileWidthY[k],
+ &MetaRowByteY,
+ &PixelPTEBytesPerRowY,
+ &PTEBufferSizeNotExceededY,
+ &v->dpte_row_width_luma_ub[k],
+ &v->dpte_row_height[k],
+ &v->meta_req_width[k],
+ &v->meta_req_height[k],
+ &v->meta_row_width[k],
+ &v->meta_row_height[k],
+ &v->vm_group_bytes[k],
+ &v->dpte_group_bytes[k],
+ &v->PixelPTEReqWidthY[k],
+ &v->PixelPTEReqHeightY[k],
+ &v->PTERequestSizeY[k],
+ &v->dpde0_bytes_per_frame_ub_l[k],
+ &v->meta_pte_bytes_per_frame_ub_l[k]);
+
+ v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
+ mode_lib,
+ v->VRatio[k],
+ v->vtaps[k],
+ v->Interlace[k],
+ v->ProgressiveToInterlaceUnitInOPP,
+ v->SwathHeightY[k],
+ v->ViewportYStartY[k],
+ &v->VInitPreFillY[k],
+ &v->MaxNumSwathY[k]);
+ v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
+ v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
+ v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
+
+ CalculateRowBandwidth(
+ v->GPUVMEnable,
+ v->SourcePixelFormat[k],
+ v->VRatio[k],
+ v->VRatioChroma[k],
+ v->DCCEnable[k],
+ v->HTotal[k] / v->PixelClock[k],
+ MetaRowByteY,
+ MetaRowByteC,
+ v->meta_row_height[k],
+ v->meta_row_height_chroma[k],
+ PixelPTEBytesPerRowY,
+ PixelPTEBytesPerRowC,
+ v->dpte_row_height[k],
+ v->dpte_row_height_chroma[k],
+ &v->meta_row_bw[k],
+ &v->dpte_row_bw[k]);
+ }
+
+ v->TotalDCCActiveDPP = 0;
+ v->TotalActiveDPP = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
+ if (v->DCCEnable[k])
+ v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
+ if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
+ || v->SourcePixelFormat[k] == dm_rgbe_alpha)
+ NoChromaPlanes = false;
+ }
+
+ ReorderBytes = v->NumberOfChannels
+ * dml_max3(
+ v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
+ v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
+ v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
+
+ VMDataOnlyReturnBW = dml_min(
+ dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn)
+ * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
+ v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
+ * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
+ dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
+ dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
+ dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
+ dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
+ dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
+ dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
+ dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
+ dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
+ dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
+ dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
+#endif
+
+ if (v->GPUVMEnable && v->HostVMEnable)
+ HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
+
+ v->UrgentExtraLatency = CalculateExtraLatency(
+ v->RoundTripPingLatencyCycles,
+ ReorderBytes,
+ v->DCFCLK,
+ v->TotalActiveDPP,
+ v->PixelChunkSizeInKByte,
+ v->TotalDCCActiveDPP,
+ v->MetaChunkSize,
+ v->ReturnBW,
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->NumberOfActivePlanes,
+ v->DPPPerPlane,
+ v->dpte_group_bytes,
+ HostVMInefficiencyFactor,
+ v->HostVMMinPageSize,
+ v->HostVMMaxNonCachedPageTableLevels);
+
+ v->TCalc = 24.0 / v->DCFCLKDeepSleep;
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->BlendingAndTiming[k] == k) {
+ if (v->WritebackEnable[k] == true) {
+ v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
+ + CalculateWriteBackDelay(
+ v->WritebackPixelFormat[k],
+ v->WritebackHRatio[k],
+ v->WritebackVRatio[k],
+ v->WritebackVTaps[k],
+ v->WritebackDestinationWidth[k],
+ v->WritebackDestinationHeight[k],
+ v->WritebackSourceHeight[k],
+ v->HTotal[k]) / v->DISPCLK;
+ } else
+ v->WritebackDelay[v->VoltageLevel][k] = 0;
+ for (j = 0; j < v->NumberOfActivePlanes; ++j) {
+ if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
+ v->WritebackDelay[v->VoltageLevel][k] = dml_max(
+ v->WritebackDelay[v->VoltageLevel][k],
+ v->WritebackLatency
+ + CalculateWriteBackDelay(
+ v->WritebackPixelFormat[j],
+ v->WritebackHRatio[j],
+ v->WritebackVRatio[j],
+ v->WritebackVTaps[j],
+ v->WritebackDestinationWidth[j],
+ v->WritebackDestinationHeight[j],
+ v->WritebackSourceHeight[j],
+ v->HTotal[k]) / v->DISPCLK);
+ }
+ }
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k)
+ for (j = 0; j < v->NumberOfActivePlanes; ++j)
+ if (v->BlendingAndTiming[k] == j)
+ v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->MaxVStartupLines[k] =
+ CalculateMaxVStartup(
+ v->VTotal[k],
+ v->VActive[k],
+ v->VBlankNom[k],
+ v->HTotal[k],
+ v->PixelClock[k],
+ v->ProgressiveToInterlaceUnitInOPP,
+ v->Interlace[k],
+ v->ip.VBlankNomDefaultUS,
+ v->WritebackDelay[v->VoltageLevel][k]);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
+ dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
+ dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
+#endif
+ }
+
+ v->MaximumMaxVStartupLines = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k)
+ v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
+
+ // VBA_DELTA
+ // We don't really care to iterate between the various prefetch modes
+ //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
+
+ v->UrgentLatency = CalculateUrgentLatency(
+ v->UrgentLatencyPixelDataOnly,
+ v->UrgentLatencyPixelMixedWithVMData,
+ v->UrgentLatencyVMDataOnly,
+ v->DoUrgentLatencyAdjustment,
+ v->UrgentLatencyAdjustmentFabricClockComponent,
+ v->UrgentLatencyAdjustmentFabricClockReference,
+ v->FabricClock);
+
+ v->FractionOfUrgentBandwidth = 0.0;
+ v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
+
+ v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
+
+ do {
+ double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
+ bool DestinationLineTimesForPrefetchLessThan2 = false;
+ bool VRatioPrefetchMoreThan4 = false;
+ double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime);
+
+ MaxTotalRDBandwidth = 0;
+
+ dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ Pipe myPipe;
+
+ myPipe.DPPCLK = v->DPPCLK[k];
+ myPipe.DISPCLK = v->DISPCLK;
+ myPipe.PixelClock = v->PixelClock[k];
+ myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
+ myPipe.DPPPerPlane = v->DPPPerPlane[k];
+ myPipe.ScalerEnabled = v->ScalerEnabled[k];
+ myPipe.VRatio = v->VRatio[k];
+ myPipe.VRatioChroma = v->VRatioChroma[k];
+ myPipe.SourceScan = v->SourceScan[k];
+ myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
+ myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
+ myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
+ myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
+ myPipe.InterlaceEnable = v->Interlace[k];
+ myPipe.NumberOfCursors = v->NumberOfCursors[k];
+ myPipe.VBlank = v->VTotal[k] - v->VActive[k];
+ myPipe.HTotal = v->HTotal[k];
+ myPipe.DCCEnable = v->DCCEnable[k];
+ myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
+ || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
+ myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
+ myPipe.BytePerPixelY = v->BytePerPixelY[k];
+ myPipe.BytePerPixelC = v->BytePerPixelC[k];
+ myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
+ v->ErrorResult[k] = CalculatePrefetchSchedule(
+ mode_lib,
+ HostVMInefficiencyFactor,
+ &myPipe,
+ v->DSCDelay[k],
+ v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
+ v->DPPCLKDelaySCL,
+ v->DPPCLKDelaySCLLBOnly,
+ v->DPPCLKDelayCNVCCursor,
+ v->DISPCLKDelaySubtotal,
+ (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
+ v->OutputFormat[k],
+ v->MaxInterDCNTileRepeaters,
+ dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
+ v->MaxVStartupLines[k],
+ v->GPUVMMaxPageTableLevels,
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->HostVMMaxNonCachedPageTableLevels,
+ v->HostVMMinPageSize,
+ v->DynamicMetadataEnable[k],
+ v->DynamicMetadataVMEnabled,
+ v->DynamicMetadataLinesBeforeActiveRequired[k],
+ v->DynamicMetadataTransmittedBytes[k],
+ v->UrgentLatency,
+ v->UrgentExtraLatency,
+ v->TCalc,
+ v->PDEAndMetaPTEBytesFrame[k],
+ v->MetaRowByte[k],
+ v->PixelPTEBytesPerRow[k],
+ v->PrefetchSourceLinesY[k],
+ v->SwathWidthY[k],
+ v->VInitPreFillY[k],
+ v->MaxNumSwathY[k],
+ v->PrefetchSourceLinesC[k],
+ v->SwathWidthC[k],
+ v->VInitPreFillC[k],
+ v->MaxNumSwathC[k],
+ v->swath_width_luma_ub[k],
+ v->swath_width_chroma_ub[k],
+ v->SwathHeightY[k],
+ v->SwathHeightC[k],
+ TWait,
+ &v->DSTXAfterScaler[k],
+ &v->DSTYAfterScaler[k],
+ &v->DestinationLinesForPrefetch[k],
+ &v->PrefetchBandwidth[k],
+ &v->DestinationLinesToRequestVMInVBlank[k],
+ &v->DestinationLinesToRequestRowInVBlank[k],
+ &v->VRatioPrefetchY[k],
+ &v->VRatioPrefetchC[k],
+ &v->RequiredPrefetchPixDataBWLuma[k],
+ &v->RequiredPrefetchPixDataBWChroma[k],
+ &v->NotEnoughTimeForDynamicMetadata[k],
+ &v->Tno_bw[k],
+ &v->prefetch_vmrow_bw[k],
+ &v->Tdmdl_vm[k],
+ &v->Tdmdl[k],
+ &v->TSetup[k],
+ &v->VUpdateOffsetPix[k],
+ &v->VUpdateWidthPix[k],
+ &v->VReadyOffsetPix[k]);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
+#endif
+ v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
+ }
+
+ v->NoEnoughUrgentLatencyHiding = false;
+ v->NoEnoughUrgentLatencyHidingPre = false;
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
+ / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
+ v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
+ / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
+
+ CalculateUrgentBurstFactor(
+ v->swath_width_luma_ub[k],
+ v->swath_width_chroma_ub[k],
+ v->SwathHeightY[k],
+ v->SwathHeightC[k],
+ v->HTotal[k] / v->PixelClock[k],
+ v->UrgentLatency,
+ v->CursorBufferSize,
+ v->CursorWidth[k][0],
+ v->CursorBPP[k][0],
+ v->VRatio[k],
+ v->VRatioChroma[k],
+ v->BytePerPixelDETY[k],
+ v->BytePerPixelDETC[k],
+ v->DETBufferSizeY[k],
+ v->DETBufferSizeC[k],
+ &v->UrgBurstFactorCursor[k],
+ &v->UrgBurstFactorLuma[k],
+ &v->UrgBurstFactorChroma[k],
+ &v->NoUrgentLatencyHiding[k]);
+
+ CalculateUrgentBurstFactor(
+ v->swath_width_luma_ub[k],
+ v->swath_width_chroma_ub[k],
+ v->SwathHeightY[k],
+ v->SwathHeightC[k],
+ v->HTotal[k] / v->PixelClock[k],
+ v->UrgentLatency,
+ v->CursorBufferSize,
+ v->CursorWidth[k][0],
+ v->CursorBPP[k][0],
+ v->VRatioPrefetchY[k],
+ v->VRatioPrefetchC[k],
+ v->BytePerPixelDETY[k],
+ v->BytePerPixelDETC[k],
+ v->DETBufferSizeY[k],
+ v->DETBufferSizeC[k],
+ &v->UrgBurstFactorCursorPre[k],
+ &v->UrgBurstFactorLumaPre[k],
+ &v->UrgBurstFactorChromaPre[k],
+ &v->NoUrgentLatencyHidingPre[k]);
+
+ MaxTotalRDBandwidth = MaxTotalRDBandwidth
+ + dml_max3(
+ v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
+ v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
+ + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
+ + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
+ + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
+ v->DPPPerPlane[k]
+ * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
+ + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
+ + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
+
+ MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
+ + dml_max3(
+ v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
+ v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
+ + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
+ v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
+ + v->cursor_bw_pre[k]);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
+ dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
+ dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
+ dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
+ dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
+
+ dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
+ dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
+
+ dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
+ dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
+ dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
+ dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
+ dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
+ dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
+ dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
+ dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
+ dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
+ dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
+#endif
+
+ if (v->DestinationLinesForPrefetch[k] < 2)
+ DestinationLineTimesForPrefetchLessThan2 = true;
+
+ if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
+ VRatioPrefetchMoreThan4 = true;
+
+ if (v->NoUrgentLatencyHiding[k] == true)
+ v->NoEnoughUrgentLatencyHiding = true;
+
+ if (v->NoUrgentLatencyHidingPre[k] == true)
+ v->NoEnoughUrgentLatencyHidingPre = true;
+ }
+
+ v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
+ dml_print("DML::%s: ReturnBW=%f\n", __func__, v->ReturnBW);
+ dml_print("DML::%s: FractionOfUrgentBandwidth=%f\n", __func__, v->FractionOfUrgentBandwidth);
+#endif
+
+ if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
+ && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
+ v->PrefetchModeSupported = true;
+ else {
+ v->PrefetchModeSupported = false;
+ dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
+ dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
+ dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
+ dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
+ }
+
+ // PREVIOUS_ERROR
+ // This error result check was done after the PrefetchModeSupported. So we will
+ // still try to calculate flip schedule even prefetch mode not supported
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
+ v->PrefetchModeSupported = false;
+ dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
+ }
+ }
+
+ if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
+ v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
+ - dml_max(
+ v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
+ + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
+ + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
+ v->DPPPerPlane[k]
+ * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
+ + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
+ + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
+ }
+
+ v->TotImmediateFlipBytes = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
+ + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ CalculateFlipSchedule(
+ mode_lib,
+ k,
+ HostVMInefficiencyFactor,
+ v->UrgentExtraLatency,
+ v->UrgentLatency,
+ v->PDEAndMetaPTEBytesFrame[k],
+ v->MetaRowByte[k],
+ v->PixelPTEBytesPerRow[k]);
+ }
+
+ v->total_dcn_read_bw_with_flip = 0.0;
+ v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
+ + dml_max3(
+ v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
+ v->DPPPerPlane[k] * v->final_flip_bw[k]
+ + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
+ + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
+ + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
+ v->DPPPerPlane[k]
+ * (v->final_flip_bw[k]
+ + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
+ + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
+ + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
+ v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
+ + dml_max3(
+ v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
+ v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
+ + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
+ v->DPPPerPlane[k]
+ * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
+ + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
+ }
+ v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
+
+ v->ImmediateFlipSupported = true;
+ if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
+#endif
+ v->ImmediateFlipSupported = false;
+ v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->ImmediateFlipSupportedForPipe[k] == false) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k);
+#endif
+ v->ImmediateFlipSupported = false;
+ }
+ }
+ } else {
+ v->ImmediateFlipSupported = false;
+ }
+
+ v->PrefetchAndImmediateFlipSupported =
+ (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable
+ && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
+ v->ImmediateFlipSupported)) ? true : false;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
+ dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required);
+ dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
+ dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
+ dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
+ dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
+#endif
+ dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
+
+ v->VStartupLines = v->VStartupLines + 1;
+ } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
+ ASSERT(v->PrefetchAndImmediateFlipSupported);
+
+ // Unbounded Request Enabled
+ CalculateUnboundedRequestAndCompressedBufferSize(
+ v->DETBufferSizeInKByte[0],
+ v->ConfigReturnBufferSizeInKByte,
+ v->UseUnboundedRequesting,
+ v->TotalActiveDPP,
+ NoChromaPlanes,
+ v->MaxNumDPP,
+ v->CompressedBufferSegmentSizeInkByte,
+ v->Output,
+ &v->UnboundedRequestEnabled,
+ &v->CompressedBufferSizeInkByte);
+
+ //Watermarks and NB P-State/DRAM Clock Change Support
+ {
+ enum clock_change_support DRAMClockChangeSupport; // dummy
+
+ CalculateWatermarksAndDRAMSpeedChangeSupport(
+ mode_lib,
+ PrefetchMode,
+ v->DCFCLK,
+ v->ReturnBW,
+ v->UrgentLatency,
+ v->UrgentExtraLatency,
+ v->SOCCLK,
+ v->DCFCLKDeepSleep,
+ v->DETBufferSizeY,
+ v->DETBufferSizeC,
+ v->SwathHeightY,
+ v->SwathHeightC,
+ v->SwathWidthY,
+ v->SwathWidthC,
+ v->DPPPerPlane,
+ v->BytePerPixelDETY,
+ v->BytePerPixelDETC,
+ v->UnboundedRequestEnabled,
+ v->CompressedBufferSizeInkByte,
+ &DRAMClockChangeSupport,
+ &v->StutterExitWatermark,
+ &v->StutterEnterPlusExitWatermark,
+ &v->Z8StutterExitWatermark,
+ &v->Z8StutterEnterPlusExitWatermark);
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->WritebackEnable[k] == true) {
+ v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
+ 0,
+ v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
+ } else {
+ v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
+ }
+ }
+ }
+
+ //Display Pipeline Delivery Time in Prefetch, Groups
+ CalculatePixelDeliveryTimes(
+ v->NumberOfActivePlanes,
+ v->VRatio,
+ v->VRatioChroma,
+ v->VRatioPrefetchY,
+ v->VRatioPrefetchC,
+ v->swath_width_luma_ub,
+ v->swath_width_chroma_ub,
+ v->DPPPerPlane,
+ v->HRatio,
+ v->HRatioChroma,
+ v->PixelClock,
+ v->PSCL_THROUGHPUT_LUMA,
+ v->PSCL_THROUGHPUT_CHROMA,
+ v->DPPCLK,
+ v->BytePerPixelC,
+ v->SourceScan,
+ v->NumberOfCursors,
+ v->CursorWidth,
+ v->CursorBPP,
+ v->BlockWidth256BytesY,
+ v->BlockHeight256BytesY,
+ v->BlockWidth256BytesC,
+ v->BlockHeight256BytesC,
+ v->DisplayPipeLineDeliveryTimeLuma,
+ v->DisplayPipeLineDeliveryTimeChroma,
+ v->DisplayPipeLineDeliveryTimeLumaPrefetch,
+ v->DisplayPipeLineDeliveryTimeChromaPrefetch,
+ v->DisplayPipeRequestDeliveryTimeLuma,
+ v->DisplayPipeRequestDeliveryTimeChroma,
+ v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
+ v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
+ v->CursorRequestDeliveryTime,
+ v->CursorRequestDeliveryTimePrefetch);
+
+ CalculateMetaAndPTETimes(
+ v->NumberOfActivePlanes,
+ v->GPUVMEnable,
+ v->MetaChunkSize,
+ v->MinMetaChunkSizeBytes,
+ v->HTotal,
+ v->VRatio,
+ v->VRatioChroma,
+ v->DestinationLinesToRequestRowInVBlank,
+ v->DestinationLinesToRequestRowInImmediateFlip,
+ v->DCCEnable,
+ v->PixelClock,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ v->SourceScan,
+ v->dpte_row_height,
+ v->dpte_row_height_chroma,
+ v->meta_row_width,
+ v->meta_row_width_chroma,
+ v->meta_row_height,
+ v->meta_row_height_chroma,
+ v->meta_req_width,
+ v->meta_req_width_chroma,
+ v->meta_req_height,
+ v->meta_req_height_chroma,
+ v->dpte_group_bytes,
+ v->PTERequestSizeY,
+ v->PTERequestSizeC,
+ v->PixelPTEReqWidthY,
+ v->PixelPTEReqHeightY,
+ v->PixelPTEReqWidthC,
+ v->PixelPTEReqHeightC,
+ v->dpte_row_width_luma_ub,
+ v->dpte_row_width_chroma_ub,
+ v->DST_Y_PER_PTE_ROW_NOM_L,
+ v->DST_Y_PER_PTE_ROW_NOM_C,
+ v->DST_Y_PER_META_ROW_NOM_L,
+ v->DST_Y_PER_META_ROW_NOM_C,
+ v->TimePerMetaChunkNominal,
+ v->TimePerChromaMetaChunkNominal,
+ v->TimePerMetaChunkVBlank,
+ v->TimePerChromaMetaChunkVBlank,
+ v->TimePerMetaChunkFlip,
+ v->TimePerChromaMetaChunkFlip,
+ v->time_per_pte_group_nom_luma,
+ v->time_per_pte_group_vblank_luma,
+ v->time_per_pte_group_flip_luma,
+ v->time_per_pte_group_nom_chroma,
+ v->time_per_pte_group_vblank_chroma,
+ v->time_per_pte_group_flip_chroma);
+
+ CalculateVMGroupAndRequestTimes(
+ v->NumberOfActivePlanes,
+ v->GPUVMEnable,
+ v->GPUVMMaxPageTableLevels,
+ v->HTotal,
+ v->BytePerPixelC,
+ v->DestinationLinesToRequestVMInVBlank,
+ v->DestinationLinesToRequestVMInImmediateFlip,
+ v->DCCEnable,
+ v->PixelClock,
+ v->dpte_row_width_luma_ub,
+ v->dpte_row_width_chroma_ub,
+ v->vm_group_bytes,
+ v->dpde0_bytes_per_frame_ub_l,
+ v->dpde0_bytes_per_frame_ub_c,
+ v->meta_pte_bytes_per_frame_ub_l,
+ v->meta_pte_bytes_per_frame_ub_c,
+ v->TimePerVMGroupVBlank,
+ v->TimePerVMGroupFlip,
+ v->TimePerVMRequestVBlank,
+ v->TimePerVMRequestFlip);
+
+ // Min TTUVBlank
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (PrefetchMode == 0) {
+ v->AllowDRAMClockChangeDuringVBlank[k] = true;
+ v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
+ v->MinTTUVBlank[k] = dml_max(
+ v->DRAMClockChangeWatermark,
+ dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark));
+ } else if (PrefetchMode == 1) {
+ v->AllowDRAMClockChangeDuringVBlank[k] = false;
+ v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
+ v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark);
+ } else {
+ v->AllowDRAMClockChangeDuringVBlank[k] = false;
+ v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
+ v->MinTTUVBlank[k] = v->UrgentWatermark;
+ }
+ if (!v->DynamicMetadataEnable[k])
+ v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
+ }
+
+ // DCC Configuration
+ v->ActiveDPPs = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
+ v->SourcePixelFormat[k],
+ v->SurfaceWidthY[k],
+ v->SurfaceWidthC[k],
+ v->SurfaceHeightY[k],
+ v->SurfaceHeightC[k],
+ v->DETBufferSizeInKByte[0] * 1024,
+ v->BlockHeight256BytesY[k],
+ v->BlockHeight256BytesC[k],
+ v->SurfaceTiling[k],
+ v->BytePerPixelY[k],
+ v->BytePerPixelC[k],
+ v->BytePerPixelDETY[k],
+ v->BytePerPixelDETC[k],
+ v->SourceScan[k],
+ &v->DCCYMaxUncompressedBlock[k],
+ &v->DCCCMaxUncompressedBlock[k],
+ &v->DCCYMaxCompressedBlock[k],
+ &v->DCCCMaxCompressedBlock[k],
+ &v->DCCYIndependentBlock[k],
+ &v->DCCCIndependentBlock[k]);
+ }
+
+ // VStartup Adjustment
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ bool isInterlaceTiming;
+ double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
+#endif
+
+ v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
+ dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
+ dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
+ dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
+#endif
+
+ v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
+ if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
+ v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
+ }
+
+ isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
+ v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
+ if (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) {
+ v->MIN_DST_Y_NEXT_START[k] = dml_floor((v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k]) / 2.0, 1.0);
+ } else {
+ v->MIN_DST_Y_NEXT_START[k] = v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k];
+ }
+ v->MIN_DST_Y_NEXT_START[k] += dml_floor(4.0 * v->TSetup[k] / ((double)v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0;
+ if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
+ <= (isInterlaceTiming ?
+ dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
+ (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
+ v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
+ } else {
+ v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
+ dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
+ dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
+ dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
+ dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
+ dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
+ dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
+ dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
+ dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
+ dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
+ dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
+#endif
+ }
+
+ {
+ //Maximum Bandwidth Used
+ double TotalWRBandwidth = 0;
+ double MaxPerPlaneVActiveWRBandwidth = 0;
+ double WRBandwidth = 0;
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
+ WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
+ / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
+ } else if (v->WritebackEnable[k] == true) {
+ WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
+ / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
+ }
+ TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
+ MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
+ }
+
+ v->TotalDataReadBandwidth = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
+ }
+ }
+ // Stutter Efficiency
+ CalculateStutterEfficiency(
+ mode_lib,
+ v->CompressedBufferSizeInkByte,
+ v->UnboundedRequestEnabled,
+ v->ConfigReturnBufferSizeInKByte,
+ v->MetaFIFOSizeInKEntries,
+ v->ZeroSizeBufferEntries,
+ v->NumberOfActivePlanes,
+ v->ROBBufferSizeInKByte,
+ v->TotalDataReadBandwidth,
+ v->DCFCLK,
+ v->ReturnBW,
+ v->COMPBUF_RESERVED_SPACE_64B,
+ v->COMPBUF_RESERVED_SPACE_ZS,
+ v->SRExitTime,
+ v->SRExitZ8Time,
+ v->SynchronizedVBlank,
+ v->StutterEnterPlusExitWatermark,
+ v->Z8StutterEnterPlusExitWatermark,
+ v->ProgressiveToInterlaceUnitInOPP,
+ v->Interlace,
+ v->MinTTUVBlank,
+ v->DPPPerPlane,
+ v->DETBufferSizeY,
+ v->BytePerPixelY,
+ v->BytePerPixelDETY,
+ v->SwathWidthY,
+ v->SwathHeightY,
+ v->SwathHeightC,
+ v->DCCRateLuma,
+ v->DCCRateChroma,
+ v->DCCFractionOfZeroSizeRequestsLuma,
+ v->DCCFractionOfZeroSizeRequestsChroma,
+ v->HTotal,
+ v->VTotal,
+ v->PixelClock,
+ v->VRatio,
+ v->SourceScan,
+ v->BlockHeight256BytesY,
+ v->BlockWidth256BytesY,
+ v->BlockHeight256BytesC,
+ v->BlockWidth256BytesC,
+ v->DCCYMaxUncompressedBlock,
+ v->DCCCMaxUncompressedBlock,
+ v->VActive,
+ v->DCCEnable,
+ v->WritebackEnable,
+ v->ReadBandwidthPlaneLuma,
+ v->ReadBandwidthPlaneChroma,
+ v->meta_row_bw,
+ v->dpte_row_bw,
+ &v->StutterEfficiencyNotIncludingVBlank,
+ &v->StutterEfficiency,
+ &v->NumberOfStutterBurstsPerFrame,
+ &v->Z8StutterEfficiencyNotIncludingVBlank,
+ &v->Z8StutterEfficiency,
+ &v->Z8NumberOfStutterBurstsPerFrame,
+ &v->StutterPeriod);
+}
+
+static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ // Display Pipe Configuration
+ double BytePerPixDETY[DC__NUM_DPP__MAX];
+ double BytePerPixDETC[DC__NUM_DPP__MAX];
+ int BytePerPixY[DC__NUM_DPP__MAX];
+ int BytePerPixC[DC__NUM_DPP__MAX];
+ int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
+ int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
+ int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
+ int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
+ double dummy1[DC__NUM_DPP__MAX];
+ double dummy2[DC__NUM_DPP__MAX];
+ double dummy3[DC__NUM_DPP__MAX];
+ double dummy4[DC__NUM_DPP__MAX];
+ int dummy5[DC__NUM_DPP__MAX];
+ int dummy6[DC__NUM_DPP__MAX];
+ bool dummy7[DC__NUM_DPP__MAX];
+ bool dummysinglestring;
+
+ unsigned int k;
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+
+ CalculateBytePerPixelAnd256BBlockSizes(
+ v->SourcePixelFormat[k],
+ v->SurfaceTiling[k],
+ &BytePerPixY[k],
+ &BytePerPixC[k],
+ &BytePerPixDETY[k],
+ &BytePerPixDETC[k],
+ &Read256BytesBlockHeightY[k],
+ &Read256BytesBlockHeightC[k],
+ &Read256BytesBlockWidthY[k],
+ &Read256BytesBlockWidthC[k]);
+ }
+
+ CalculateSwathAndDETConfiguration(
+ false,
+ v->NumberOfActivePlanes,
+ v->DETBufferSizeInKByte[0],
+ dummy1,
+ dummy2,
+ v->SourceScan,
+ v->SourcePixelFormat,
+ v->SurfaceTiling,
+ v->ViewportWidth,
+ v->ViewportHeight,
+ v->SurfaceWidthY,
+ v->SurfaceWidthC,
+ v->SurfaceHeightY,
+ v->SurfaceHeightC,
+ Read256BytesBlockHeightY,
+ Read256BytesBlockHeightC,
+ Read256BytesBlockWidthY,
+ Read256BytesBlockWidthC,
+ v->ODMCombineEnabled,
+ v->BlendingAndTiming,
+ BytePerPixY,
+ BytePerPixC,
+ BytePerPixDETY,
+ BytePerPixDETC,
+ v->HActive,
+ v->HRatio,
+ v->HRatioChroma,
+ v->DPPPerPlane,
+ dummy5,
+ dummy6,
+ dummy3,
+ dummy4,
+ v->SwathHeightY,
+ v->SwathHeightC,
+ v->DETBufferSizeY,
+ v->DETBufferSizeC,
+ dummy7,
+ &dummysinglestring);
+}
+
+static bool CalculateBytePerPixelAnd256BBlockSizes(
+ enum source_format_class SourcePixelFormat,
+ enum dm_swizzle_mode SurfaceTiling,
+ unsigned int *BytePerPixelY,
+ unsigned int *BytePerPixelC,
+ double *BytePerPixelDETY,
+ double *BytePerPixelDETC,
+ unsigned int *BlockHeight256BytesY,
+ unsigned int *BlockHeight256BytesC,
+ unsigned int *BlockWidth256BytesY,
+ unsigned int *BlockWidth256BytesC)
+{
+ if (SourcePixelFormat == dm_444_64) {
+ *BytePerPixelDETY = 8;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 8;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_16) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_rgbe_alpha) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 1;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 1;
+ } else if (SourcePixelFormat == dm_420_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 2;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 2;
+ } else if (SourcePixelFormat == dm_420_12) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 4;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ } else {
+ *BytePerPixelDETY = 4.0 / 3;
+ *BytePerPixelDETC = 8.0 / 3;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ }
+
+ if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
+ || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
+ if (SurfaceTiling == dm_sw_linear) {
+ *BlockHeight256BytesY = 1;
+ } else if (SourcePixelFormat == dm_444_64) {
+ *BlockHeight256BytesY = 4;
+ } else if (SourcePixelFormat == dm_444_8) {
+ *BlockHeight256BytesY = 16;
+ } else {
+ *BlockHeight256BytesY = 8;
+ }
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockHeight256BytesC = 0;
+ *BlockWidth256BytesC = 0;
+ } else {
+ if (SurfaceTiling == dm_sw_linear) {
+ *BlockHeight256BytesY = 1;
+ *BlockHeight256BytesC = 1;
+ } else if (SourcePixelFormat == dm_rgbe_alpha) {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 16;
+ } else if (SourcePixelFormat == dm_420_8) {
+ *BlockHeight256BytesY = 16;
+ *BlockHeight256BytesC = 8;
+ } else {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 8;
+ }
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
+ }
+ return true;
+}
+
+static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
+{
+ if (PrefetchMode == 0) {
+ return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency));
+ } else if (PrefetchMode == 1) {
+ return dml_max(SREnterPlusExitTime, UrgentLatency);
+ } else {
+ return UrgentLatency;
+ }
+}
+
+double dml314_CalculateWriteBackDISPCLK(
+ enum source_format_class WritebackPixelFormat,
+ double PixelClock,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackHTaps,
+ unsigned int WritebackVTaps,
+ long WritebackSourceWidth,
+ long WritebackDestinationWidth,
+ unsigned int HTotal,
+ unsigned int WritebackLineBufferSize)
+{
+ double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
+
+ DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
+ DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
+ DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
+ return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
+}
+
+static double CalculateWriteBackDelay(
+ enum source_format_class WritebackPixelFormat,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackVTaps,
+ int WritebackDestinationWidth,
+ int WritebackDestinationHeight,
+ int WritebackSourceHeight,
+ unsigned int HTotal)
+{
+ double CalculateWriteBackDelay;
+ double Line_length;
+ double Output_lines_last_notclamped;
+ double WritebackVInit;
+
+ WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
+ Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
+ Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
+ if (Output_lines_last_notclamped < 0) {
+ CalculateWriteBackDelay = 0;
+ } else {
+ CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
+ }
+ return CalculateWriteBackDelay;
+}
+
+static void CalculateVupdateAndDynamicMetadataParameters(
+ int MaxInterDCNTileRepeaters,
+ double DPPCLK,
+ double DISPCLK,
+ double DCFClkDeepSleep,
+ double PixelClock,
+ int HTotal,
+ int VBlank,
+ int DynamicMetadataTransmittedBytes,
+ int DynamicMetadataLinesBeforeActiveRequired,
+ int InterlaceEnable,
+ bool ProgressiveToInterlaceUnitInOPP,
+ double *TSetup,
+ double *Tdmbf,
+ double *Tdmec,
+ double *Tdmsks,
+ int *VUpdateOffsetPix,
+ double *VUpdateWidthPix,
+ double *VReadyOffsetPix)
+{
+ double TotalRepeaterDelayTime;
+
+ TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
+ *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0);
+ *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0);
+ *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
+ *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
+ *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
+ *Tdmec = HTotal / PixelClock;
+ if (DynamicMetadataLinesBeforeActiveRequired == 0) {
+ *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
+ } else {
+ *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
+ }
+ if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
+ *Tdmsks = *Tdmsks / 2;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
+ dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
+ dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
+#endif
+}
+
+static void CalculateRowBandwidth(
+ bool GPUVMEnable,
+ enum source_format_class SourcePixelFormat,
+ double VRatio,
+ double VRatioChroma,
+ bool DCCEnable,
+ double LineTime,
+ unsigned int MetaRowByteLuma,
+ unsigned int MetaRowByteChroma,
+ unsigned int meta_row_height_luma,
+ unsigned int meta_row_height_chroma,
+ unsigned int PixelPTEBytesPerRowLuma,
+ unsigned int PixelPTEBytesPerRowChroma,
+ unsigned int dpte_row_height_luma,
+ unsigned int dpte_row_height_chroma,
+ double *meta_row_bw,
+ double *dpte_row_bw)
+{
+ if (DCCEnable != true) {
+ *meta_row_bw = 0;
+ } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
+ *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
+ } else {
+ *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
+ }
+
+ if (GPUVMEnable != true) {
+ *dpte_row_bw = 0;
+ } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
+ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
+ + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
+ } else {
+ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
+ }
+}
+
+static void CalculateFlipSchedule(
+ struct display_mode_lib *mode_lib,
+ unsigned int k,
+ double HostVMInefficiencyFactor,
+ double UrgentExtraLatency,
+ double UrgentLatency,
+ double PDEAndMetaPTEBytesPerFrame,
+ double MetaRowBytes,
+ double DPTEBytesPerRow)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ double min_row_time = 0.0;
+ unsigned int HostVMDynamicLevelsTrips;
+ double TimeForFetchingMetaPTEImmediateFlip;
+ double TimeForFetchingRowInVBlankImmediateFlip;
+ double ImmediateFlipBW = 1.0;
+ double LineTime = v->HTotal[k] / v->PixelClock[k];
+
+ if (v->GPUVMEnable == true && v->HostVMEnable == true) {
+ HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
+ } else {
+ HostVMDynamicLevelsTrips = 0;
+ }
+
+ if (v->GPUVMEnable == true || v->DCCEnable[k] == true) {
+ ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes;
+ }
+
+ if (v->GPUVMEnable == true) {
+ TimeForFetchingMetaPTEImmediateFlip = dml_max3(
+ v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
+ UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
+ LineTime / 4.0);
+ } else {
+ TimeForFetchingMetaPTEImmediateFlip = 0;
+ }
+
+ v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
+ if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
+ TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
+ (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
+ UrgentLatency * (HostVMDynamicLevelsTrips + 1),
+ LineTime / 4);
+ } else {
+ TimeForFetchingRowInVBlankImmediateFlip = 0;
+ }
+
+ v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
+
+ if (v->GPUVMEnable == true) {
+ v->final_flip_bw[k] = dml_max(
+ PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime),
+ (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime));
+ } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
+ v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime);
+ } else {
+ v->final_flip_bw[k] = 0;
+ }
+
+ if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
+ if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
+ min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
+ } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
+ min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
+ } else {
+ min_row_time = dml_min4(
+ v->dpte_row_height[k] * LineTime / v->VRatio[k],
+ v->meta_row_height[k] * LineTime / v->VRatio[k],
+ v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k],
+ v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
+ }
+ } else {
+ if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
+ min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k];
+ } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
+ min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k];
+ } else {
+ min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]);
+ }
+ }
+
+ if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16
+ || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
+ v->ImmediateFlipSupportedForPipe[k] = false;
+ } else {
+ v->ImmediateFlipSupportedForPipe[k] = true;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]);
+ dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]);
+ dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
+ dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
+ dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
+ dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]);
+#endif
+
+}
+
+static double TruncToValidBPP(
+ double LinkBitRate,
+ int Lanes,
+ int HTotal,
+ int HActive,
+ double PixelClock,
+ double DesiredBPP,
+ bool DSCEnable,
+ enum output_encoder_class Output,
+ enum output_format_class Format,
+ unsigned int DSCInputBitPerComponent,
+ int DSCSlices,
+ int AudioRate,
+ int AudioLayout,
+ enum odm_combine_mode ODMCombine)
+{
+ double MaxLinkBPP;
+ int MinDSCBPP;
+ double MaxDSCBPP;
+ int NonDSCBPP0;
+ int NonDSCBPP1;
+ int NonDSCBPP2;
+
+ if (Format == dm_420) {
+ NonDSCBPP0 = 12;
+ NonDSCBPP1 = 15;
+ NonDSCBPP2 = 18;
+ MinDSCBPP = 6;
+ MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
+ } else if (Format == dm_444) {
+ NonDSCBPP0 = 24;
+ NonDSCBPP1 = 30;
+ NonDSCBPP2 = 36;
+ MinDSCBPP = 8;
+ MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
+ } else {
+
+ NonDSCBPP0 = 16;
+ NonDSCBPP1 = 20;
+ NonDSCBPP2 = 24;
+
+ if (Format == dm_n422) {
+ MinDSCBPP = 7;
+ MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
+ } else {
+ MinDSCBPP = 8;
+ MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
+ }
+ }
+
+ if (DSCEnable && Output == dm_dp) {
+ MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
+ } else {
+ MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
+ }
+
+ if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
+ MaxLinkBPP = 16;
+ } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
+ MaxLinkBPP = 32;
+ }
+
+ if (DesiredBPP == 0) {
+ if (DSCEnable) {
+ if (MaxLinkBPP < MinDSCBPP) {
+ return BPP_INVALID;
+ } else if (MaxLinkBPP >= MaxDSCBPP) {
+ return MaxDSCBPP;
+ } else {
+ return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
+ }
+ } else {
+ if (MaxLinkBPP >= NonDSCBPP2) {
+ return NonDSCBPP2;
+ } else if (MaxLinkBPP >= NonDSCBPP1) {
+ return NonDSCBPP1;
+ } else if (MaxLinkBPP >= NonDSCBPP0) {
+ return 16.0;
+ } else {
+ return BPP_INVALID;
+ }
+ }
+ } else {
+ if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
+ || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
+ return BPP_INVALID;
+ } else {
+ return DesiredBPP;
+ }
+ }
+}
+
+static noinline void CalculatePrefetchSchedulePerPlane(
+ struct display_mode_lib *mode_lib,
+ double HostVMInefficiencyFactor,
+ int i,
+ unsigned int j,
+ unsigned int k)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ Pipe myPipe;
+
+ myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
+ myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
+ myPipe.PixelClock = v->PixelClock[k];
+ myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
+ myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
+ myPipe.ScalerEnabled = v->ScalerEnabled[k];
+ myPipe.VRatio = mode_lib->vba.VRatio[k];
+ myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k];
+
+ myPipe.SourceScan = v->SourceScan[k];
+ myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
+ myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
+ myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
+ myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
+ myPipe.InterlaceEnable = v->Interlace[k];
+ myPipe.NumberOfCursors = v->NumberOfCursors[k];
+ myPipe.VBlank = v->VTotal[k] - v->VActive[k];
+ myPipe.HTotal = v->HTotal[k];
+ myPipe.DCCEnable = v->DCCEnable[k];
+ myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
+ || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
+ myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
+ myPipe.BytePerPixelY = v->BytePerPixelY[k];
+ myPipe.BytePerPixelC = v->BytePerPixelC[k];
+ myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
+ v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
+ mode_lib,
+ HostVMInefficiencyFactor,
+ &myPipe,
+ v->DSCDelayPerState[i][k],
+ v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
+ v->DPPCLKDelaySCL,
+ v->DPPCLKDelaySCLLBOnly,
+ v->DPPCLKDelayCNVCCursor,
+ v->DISPCLKDelaySubtotal,
+ v->SwathWidthYThisState[k] / v->HRatio[k],
+ v->OutputFormat[k],
+ v->MaxInterDCNTileRepeaters,
+ dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
+ v->MaximumVStartup[i][j][k],
+ v->GPUVMMaxPageTableLevels,
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->HostVMMaxNonCachedPageTableLevels,
+ v->HostVMMinPageSize,
+ v->DynamicMetadataEnable[k],
+ v->DynamicMetadataVMEnabled,
+ v->DynamicMetadataLinesBeforeActiveRequired[k],
+ v->DynamicMetadataTransmittedBytes[k],
+ v->UrgLatency[i],
+ v->ExtraLatency,
+ v->TimeCalc,
+ v->PDEAndMetaPTEBytesPerFrame[i][j][k],
+ v->MetaRowBytes[i][j][k],
+ v->DPTEBytesPerRow[i][j][k],
+ v->PrefetchLinesY[i][j][k],
+ v->SwathWidthYThisState[k],
+ v->PrefillY[k],
+ v->MaxNumSwY[k],
+ v->PrefetchLinesC[i][j][k],
+ v->SwathWidthCThisState[k],
+ v->PrefillC[k],
+ v->MaxNumSwC[k],
+ v->swath_width_luma_ub_this_state[k],
+ v->swath_width_chroma_ub_this_state[k],
+ v->SwathHeightYThisState[k],
+ v->SwathHeightCThisState[k],
+ v->TWait,
+ &v->DSTXAfterScaler[k],
+ &v->DSTYAfterScaler[k],
+ &v->LineTimesForPrefetch[k],
+ &v->PrefetchBW[k],
+ &v->LinesForMetaPTE[k],
+ &v->LinesForMetaAndDPTERow[k],
+ &v->VRatioPreY[i][j][k],
+ &v->VRatioPreC[i][j][k],
+ &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
+ &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
+ &v->NoTimeForDynamicMetadata[i][j][k],
+ &v->Tno_bw[k],
+ &v->prefetch_vmrow_bw[k],
+ &v->dummy7[k],
+ &v->dummy8[k],
+ &v->dummy13[k],
+ &v->VUpdateOffsetPix[k],
+ &v->VUpdateWidthPix[k],
+ &v->VReadyOffsetPix[k]);
+}
+
+void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+
+ int i, j;
+ unsigned int k, m;
+ int ReorderingBytes;
+ int MinPrefetchMode = 0, MaxPrefetchMode = 2;
+ bool NoChroma = true;
+ bool EnoughWritebackUnits = true;
+ bool P2IWith420 = false;
+ bool DSCOnlyIfNecessaryWithBPP = false;
+ bool DSC422NativeNotSupported = false;
+ double MaxTotalVActiveRDBandwidth;
+ bool ViewportExceedsSurface = false;
+ bool FMTBufferExceeded = false;
+
+ /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
+
+ CalculateMinAndMaxPrefetchMode(
+ mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
+ &MinPrefetchMode, &MaxPrefetchMode);
+
+ /*Scale Ratio, taps Support Check*/
+
+ v->ScaleRatioAndTapsSupport = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->ScalerEnabled[k] == false
+ && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
+ && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
+ && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
+ && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
+ || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
+ v->ScaleRatioAndTapsSupport = false;
+ } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
+ || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
+ || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
+ || v->VRatio[k] > v->vtaps[k]
+ || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
+ && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
+ && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
+ && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
+ || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
+ || v->HRatioChroma[k] > v->MaxHSCLRatio
+ || v->VRatioChroma[k] > v->MaxVSCLRatio
+ || v->HRatioChroma[k] > v->HTAPsChroma[k]
+ || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
+ v->ScaleRatioAndTapsSupport = false;
+ }
+ }
+ /*Source Format, Pixel Format and Scan Support Check*/
+
+ v->SourceFormatPixelAndScanSupport = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) {
+ v->SourceFormatPixelAndScanSupport = false;
+ }
+ }
+ /*Bandwidth Support Check*/
+
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ CalculateBytePerPixelAnd256BBlockSizes(
+ v->SourcePixelFormat[k],
+ v->SurfaceTiling[k],
+ &v->BytePerPixelY[k],
+ &v->BytePerPixelC[k],
+ &v->BytePerPixelInDETY[k],
+ &v->BytePerPixelInDETC[k],
+ &v->Read256BlockHeightY[k],
+ &v->Read256BlockHeightC[k],
+ &v->Read256BlockWidthY[k],
+ &v->Read256BlockWidthC[k]);
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->SourceScan[k] != dm_vert) {
+ v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
+ v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
+ } else {
+ v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
+ v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
+ }
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
+ / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
+ v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
+ / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
+ v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
+ / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
+ } else if (v->WritebackEnable[k] == true) {
+ v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
+ / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
+ } else {
+ v->WriteBandwidth[k] = 0.0;
+ }
+ }
+
+ /*Writeback Latency support check*/
+
+ v->WritebackLatencySupport = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
+ v->WritebackLatencySupport = false;
+ }
+ }
+
+ /*Writeback Mode Support Check*/
+
+ v->TotalNumberOfActiveWriteback = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->WritebackEnable[k] == true) {
+ v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
+ }
+ }
+
+ if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
+ EnoughWritebackUnits = false;
+ }
+
+ /*Writeback Scale Ratio and Taps Support Check*/
+
+ v->WritebackScaleRatioAndTapsSupport = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->WritebackEnable[k] == true) {
+ if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
+ || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
+ || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
+ || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
+ || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
+ || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
+ || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
+ v->WritebackScaleRatioAndTapsSupport = false;
+ }
+ if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
+ v->WritebackScaleRatioAndTapsSupport = false;
+ }
+ }
+ }
+ /*Maximum DISPCLK/DPPCLK Support check*/
+
+ v->WritebackRequiredDISPCLK = 0.0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->WritebackEnable[k] == true) {
+ v->WritebackRequiredDISPCLK = dml_max(
+ v->WritebackRequiredDISPCLK,
+ dml314_CalculateWriteBackDISPCLK(
+ v->WritebackPixelFormat[k],
+ v->PixelClock[k],
+ v->WritebackHRatio[k],
+ v->WritebackVRatio[k],
+ v->WritebackHTaps[k],
+ v->WritebackVTaps[k],
+ v->WritebackSourceWidth[k],
+ v->WritebackDestinationWidth[k],
+ v->HTotal[k],
+ v->WritebackLineBufferSize));
+ }
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->HRatio[k] > 1.0) {
+ v->PSCL_FACTOR[k] = dml_min(
+ v->MaxDCHUBToPSCLThroughput,
+ v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
+ } else {
+ v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
+ }
+ if (v->BytePerPixelC[k] == 0.0) {
+ v->PSCL_FACTOR_CHROMA[k] = 0.0;
+ v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
+ * dml_max3(
+ v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
+ v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
+ 1.0);
+ if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
+ v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
+ }
+ } else {
+ if (v->HRatioChroma[k] > 1.0) {
+ v->PSCL_FACTOR_CHROMA[k] = dml_min(
+ v->MaxDCHUBToPSCLThroughput,
+ v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
+ } else {
+ v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
+ }
+ v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
+ * dml_max5(
+ v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
+ v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
+ v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
+ v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
+ 1.0);
+ if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
+ && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
+ v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
+ }
+ }
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ int MaximumSwathWidthSupportLuma;
+ int MaximumSwathWidthSupportChroma;
+
+ if (v->SurfaceTiling[k] == dm_sw_linear) {
+ MaximumSwathWidthSupportLuma = 8192.0;
+ } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
+ MaximumSwathWidthSupportLuma = 2880.0;
+ } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
+ MaximumSwathWidthSupportLuma = 3840.0;
+ } else {
+ MaximumSwathWidthSupportLuma = 5760.0;
+ }
+
+ if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
+ MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
+ } else {
+ MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
+ }
+ v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
+ / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
+ if (v->BytePerPixelC[k] == 0.0) {
+ v->MaximumSwathWidthInLineBufferChroma = 0;
+ } else {
+ v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
+ / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
+ }
+ v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
+ v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
+ }
+
+ CalculateSwathAndDETConfiguration(
+ true,
+ v->NumberOfActivePlanes,
+ v->DETBufferSizeInKByte[0],
+ v->MaximumSwathWidthLuma,
+ v->MaximumSwathWidthChroma,
+ v->SourceScan,
+ v->SourcePixelFormat,
+ v->SurfaceTiling,
+ v->ViewportWidth,
+ v->ViewportHeight,
+ v->SurfaceWidthY,
+ v->SurfaceWidthC,
+ v->SurfaceHeightY,
+ v->SurfaceHeightC,
+ v->Read256BlockHeightY,
+ v->Read256BlockHeightC,
+ v->Read256BlockWidthY,
+ v->Read256BlockWidthC,
+ v->odm_combine_dummy,
+ v->BlendingAndTiming,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ v->BytePerPixelInDETY,
+ v->BytePerPixelInDETC,
+ v->HActive,
+ v->HRatio,
+ v->HRatioChroma,
+ v->NoOfDPPThisState,
+ v->swath_width_luma_ub_this_state,
+ v->swath_width_chroma_ub_this_state,
+ v->SwathWidthYThisState,
+ v->SwathWidthCThisState,
+ v->SwathHeightYThisState,
+ v->SwathHeightCThisState,
+ v->DETBufferSizeYThisState,
+ v->DETBufferSizeCThisState,
+ v->SingleDPPViewportSizeSupportPerPlane,
+ &v->ViewportSizeSupport[0][0]);
+
+ for (i = 0; i < v->soc.num_states; i++) {
+ for (j = 0; j < 2; j++) {
+ v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
+ v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
+ v->RequiredDISPCLK[i][j] = 0.0;
+ v->DISPCLK_DPPCLK_Support[i][j] = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
+ * (1.0 + v->DISPCLKRampingMargin / 100.0);
+ if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
+ && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
+ && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
+ v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
+ * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
+ }
+ v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
+ * (1 + v->DISPCLKRampingMargin / 100.0);
+ if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
+ && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
+ && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
+ v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
+ * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
+ }
+ v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
+ * (1 + v->DISPCLKRampingMargin / 100.0);
+ if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
+ && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
+ && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
+ v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
+ * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
+ }
+
+ if (v->ODMCombinePolicy == dm_odm_combine_policy_none
+ || !(v->Output[k] == dm_dp ||
+ v->Output[k] == dm_dp2p0 ||
+ v->Output[k] == dm_edp)) {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
+ v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
+
+ if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH)
+ FMTBufferExceeded = true;
+ } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
+ v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
+ } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
+ || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
+ v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
+ } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
+ v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
+ } else {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
+ v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
+ }
+ if (v->DSCEnabled[k] && v->HActive[k] > DCN314_MAX_DSC_IMAGE_WIDTH
+ && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
+ if (v->HActive[k] / 2 > DCN314_MAX_DSC_IMAGE_WIDTH) {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
+ v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
+ } else {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
+ v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
+ }
+ }
+ if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN314_MAX_FMT_420_BUFFER_WIDTH
+ && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
+ if (v->Output[k] == dm_hdmi) {
+ FMTBufferExceeded = true;
+ } else if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH) {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
+ v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
+
+ if (v->HActive[k] / 4 > DCN314_MAX_FMT_420_BUFFER_WIDTH)
+ FMTBufferExceeded = true;
+ } else {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
+ v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
+ }
+ }
+ if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
+ v->MPCCombine[i][j][k] = false;
+ v->NoOfDPP[i][j][k] = 4;
+ v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
+ } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
+ v->MPCCombine[i][j][k] = false;
+ v->NoOfDPP[i][j][k] = 2;
+ v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
+ } else if ((v->WhenToDoMPCCombine == dm_mpc_never
+ || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
+ <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
+ v->MPCCombine[i][j][k] = false;
+ v->NoOfDPP[i][j][k] = 1;
+ v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
+ } else {
+ v->MPCCombine[i][j][k] = true;
+ v->NoOfDPP[i][j][k] = 2;
+ v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
+ }
+ v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
+ if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
+ > v->MaxDppclkRoundedDownToDFSGranularity)
+ || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
+ v->DISPCLK_DPPCLK_Support[i][j] = false;
+ }
+ }
+ v->TotalNumberOfActiveDPP[i][j] = 0;
+ v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
+ if (v->NoOfDPP[i][j][k] == 1)
+ v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
+ if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
+ || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
+ NoChroma = false;
+ }
+
+ // UPTO
+ if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
+ && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) {
+ while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
+ double BWOfNonSplitPlaneOfMaximumBandwidth;
+ unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
+
+ BWOfNonSplitPlaneOfMaximumBandwidth = 0;
+ NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
+ && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
+ BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
+ NumberOfNonSplitPlaneOfMaximumBandwidth = k;
+ }
+ }
+ v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
+ v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
+ v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
+ v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
+ * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
+ v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
+ v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
+ }
+ }
+ if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
+ v->RequiredDISPCLK[i][j] = 0.0;
+ v->DISPCLK_DPPCLK_Support[i][j] = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
+ if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
+ v->MPCCombine[i][j][k] = true;
+ v->NoOfDPP[i][j][k] = 2;
+ v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
+ * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
+ } else {
+ v->MPCCombine[i][j][k] = false;
+ v->NoOfDPP[i][j][k] = 1;
+ v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
+ * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
+ }
+ if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
+ && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
+ v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
+ * (1.0 + v->DISPCLKRampingMargin / 100.0);
+ } else {
+ v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
+ }
+ v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
+ if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
+ > v->MaxDppclkRoundedDownToDFSGranularity)
+ || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
+ v->DISPCLK_DPPCLK_Support[i][j] = false;
+ }
+ }
+ v->TotalNumberOfActiveDPP[i][j] = 0.0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
+ }
+ }
+ v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
+ if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
+ v->DISPCLK_DPPCLK_Support[i][j] = false;
+ }
+ }
+ }
+
+ /*Total Available Pipes Support Check*/
+
+ for (i = 0; i < v->soc.num_states; i++) {
+ for (j = 0; j < 2; j++) {
+ if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
+ v->TotalAvailablePipesSupport[i][j] = true;
+ } else {
+ v->TotalAvailablePipesSupport[i][j] = false;
+ }
+ }
+ }
+ /*Display IO and DSC Support Check*/
+
+ v->NonsupportedDSCInputBPC = false;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
+ || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
+ v->NonsupportedDSCInputBPC = true;
+ }
+ }
+
+ /*Number Of DSC Slices*/
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->BlendingAndTiming[k] == k) {
+ if (v->PixelClockBackEnd[k] > 3200) {
+ v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
+ } else if (v->PixelClockBackEnd[k] > 1360) {
+ v->NumberOfDSCSlices[k] = 8;
+ } else if (v->PixelClockBackEnd[k] > 680) {
+ v->NumberOfDSCSlices[k] = 4;
+ } else if (v->PixelClockBackEnd[k] > 340) {
+ v->NumberOfDSCSlices[k] = 2;
+ } else {
+ v->NumberOfDSCSlices[k] = 1;
+ }
+ } else {
+ v->NumberOfDSCSlices[k] = 0;
+ }
+ }
+
+ for (i = 0; i < v->soc.num_states; i++) {
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->RequiresDSC[i][k] = false;
+ v->RequiresFEC[i][k] = false;
+ if (v->BlendingAndTiming[k] == k) {
+ if (v->Output[k] == dm_hdmi) {
+ v->RequiresDSC[i][k] = false;
+ v->RequiresFEC[i][k] = false;
+ v->OutputBppPerState[i][k] = TruncToValidBPP(
+ dml_min(600.0, v->PHYCLKPerState[i]) * 10,
+ 3,
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ false,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_dp2p0) {
+ if (v->DSCEnable[k] == true) {
+ v->RequiresDSC[i][k] = true;
+ v->LinkDSCEnable = true;
+ if (v->Output[k] == dm_dp || v->Output[k] == dm_dp2p0) {
+ v->RequiresFEC[i][k] = true;
+ } else {
+ v->RequiresFEC[i][k] = false;
+ }
+ } else {
+ v->RequiresDSC[i][k] = false;
+ v->LinkDSCEnable = false;
+ if (v->Output[k] == dm_dp2p0) {
+ v->RequiresFEC[i][k] = true;
+ } else {
+ v->RequiresFEC[i][k] = false;
+ }
+ }
+ if (v->Output[k] == dm_dp2p0) {
+ v->Outbpp = BPP_INVALID;
+ if ((v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr10) &&
+ v->PHYCLKD18PerState[k] >= 10000.0 / 18.0) {
+ v->Outbpp = TruncToValidBPP(
+ (1.0 - v->Downspreading / 100.0) * 10000,
+ v->OutputLinkDPLanes[k],
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 13500.0 / 18.0 &&
+ v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
+ v->RequiresDSC[i][k] = true;
+ v->LinkDSCEnable = true;
+ v->Outbpp = TruncToValidBPP(
+ (1.0 - v->Downspreading / 100.0) * 10000,
+ v->OutputLinkDPLanes[k],
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ }
+ v->OutputBppPerState[i][k] = v->Outbpp;
+ // TODO: Need some other way to handle this nonsense
+ // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR10"
+ }
+ if (v->Outbpp == BPP_INVALID &&
+ (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr13p5) &&
+ v->PHYCLKD18PerState[k] >= 13500.0 / 18.0) {
+ v->Outbpp = TruncToValidBPP(
+ (1.0 - v->Downspreading / 100.0) * 13500,
+ v->OutputLinkDPLanes[k],
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 20000.0 / 18.0 &&
+ v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
+ v->RequiresDSC[i][k] = true;
+ v->LinkDSCEnable = true;
+ v->Outbpp = TruncToValidBPP(
+ (1.0 - v->Downspreading / 100.0) * 13500,
+ v->OutputLinkDPLanes[k],
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ }
+ v->OutputBppPerState[i][k] = v->Outbpp;
+ // TODO: Need some other way to handle this nonsense
+ // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR13p5"
+ }
+ if (v->Outbpp == BPP_INVALID &&
+ (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr20) &&
+ v->PHYCLKD18PerState[k] >= 20000.0 / 18.0) {
+ v->Outbpp = TruncToValidBPP(
+ (1.0 - v->Downspreading / 100.0) * 20000,
+ v->OutputLinkDPLanes[k],
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ if (v->Outbpp == BPP_INVALID && v->DSCEnable[k] == true &&
+ v->ForcedOutputLinkBPP[k] == 0) {
+ v->RequiresDSC[i][k] = true;
+ v->LinkDSCEnable = true;
+ v->Outbpp = TruncToValidBPP(
+ (1.0 - v->Downspreading / 100.0) * 20000,
+ v->OutputLinkDPLanes[k],
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ }
+ v->OutputBppPerState[i][k] = v->Outbpp;
+ // TODO: Need some other way to handle this nonsense
+ // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR20"
+ }
+ } else {
+ v->Outbpp = BPP_INVALID;
+ if (v->PHYCLKPerState[i] >= 270.0) {
+ v->Outbpp = TruncToValidBPP(
+ (1.0 - v->Downspreading / 100.0) * 2700,
+ v->OutputLinkDPLanes[k],
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ v->OutputBppPerState[i][k] = v->Outbpp;
+ // TODO: Need some other way to handle this nonsense
+ // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
+ }
+ if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
+ v->Outbpp = TruncToValidBPP(
+ (1.0 - v->Downspreading / 100.0) * 5400,
+ v->OutputLinkDPLanes[k],
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ v->OutputBppPerState[i][k] = v->Outbpp;
+ // TODO: Need some other way to handle this nonsense
+ // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
+ }
+ if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
+ v->Outbpp = TruncToValidBPP(
+ (1.0 - v->Downspreading / 100.0) * 8100,
+ v->OutputLinkDPLanes[k],
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ v->OutputBppPerState[i][k] = v->Outbpp;
+ // TODO: Need some other way to handle this nonsense
+ // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
+ }
+ }
+ }
+ } else {
+ v->OutputBppPerState[i][k] = 0;
+ }
+ }
+ }
+
+ for (i = 0; i < v->soc.num_states; i++) {
+ v->LinkCapacitySupport[i] = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->BlendingAndTiming[k] == k
+ && (v->Output[k] == dm_dp ||
+ v->Output[k] == dm_edp ||
+ v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
+ v->LinkCapacitySupport[i] = false;
+ }
+ }
+ }
+
+ // UPTO 2172
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->BlendingAndTiming[k] == k
+ && (v->Output[k] == dm_dp ||
+ v->Output[k] == dm_edp ||
+ v->Output[k] == dm_hdmi)) {
+ if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
+ P2IWith420 = true;
+ }
+ if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
+ && !v->DSC422NativeSupport) {
+ DSC422NativeNotSupported = true;
+ }
+ }
+ }
+
+
+ for (i = 0; i < v->soc.num_states; ++i) {
+ v->ODMCombine4To1SupportCheckOK[i] = true;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
+ && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
+ || v->Output[k] == dm_hdmi)) {
+ v->ODMCombine4To1SupportCheckOK[i] = false;
+ }
+ }
+ }
+
+ /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
+
+ for (i = 0; i < v->soc.num_states; i++) {
+ v->NotEnoughDSCUnits[i] = false;
+ v->TotalDSCUnitsRequired = 0.0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->RequiresDSC[i][k] == true) {
+ if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
+ v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
+ } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
+ v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
+ } else {
+ v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
+ }
+ }
+ }
+ if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
+ v->NotEnoughDSCUnits[i] = true;
+ }
+ }
+ /*DSC Delay per state*/
+
+ for (i = 0; i < v->soc.num_states; i++) {
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->OutputBppPerState[i][k] == BPP_INVALID) {
+ v->BPP = 0.0;
+ } else {
+ v->BPP = v->OutputBppPerState[i][k];
+ }
+ if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
+ if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
+ v->DSCDelayPerState[i][k] = dscceComputeDelay(
+ v->DSCInputBitPerComponent[k],
+ v->BPP,
+ dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
+ v->NumberOfDSCSlices[k],
+ v->OutputFormat[k],
+ v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
+ } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
+ v->DSCDelayPerState[i][k] = 2.0
+ * (dscceComputeDelay(
+ v->DSCInputBitPerComponent[k],
+ v->BPP,
+ dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
+ v->NumberOfDSCSlices[k] / 2,
+ v->OutputFormat[k],
+ v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
+ } else {
+ v->DSCDelayPerState[i][k] = 4.0
+ * (dscceComputeDelay(
+ v->DSCInputBitPerComponent[k],
+ v->BPP,
+ dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
+ v->NumberOfDSCSlices[k] / 4,
+ v->OutputFormat[k],
+ v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
+ }
+ v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] + (v->HTotal[k] - v->HActive[k]) * dml_ceil((double) v->DSCDelayPerState[i][k] / v->HActive[k], 1.0);
+ v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
+ } else {
+ v->DSCDelayPerState[i][k] = 0.0;
+ }
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ for (m = 0; m < v->NumberOfActivePlanes; m++) {
+ if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
+ v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
+ }
+ }
+ }
+ }
+
+ //Calculate Swath, DET Configuration, DCFCLKDeepSleep
+ //
+ for (i = 0; i < v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
+ v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
+ v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
+ }
+
+ CalculateSwathAndDETConfiguration(
+ false,
+ v->NumberOfActivePlanes,
+ v->DETBufferSizeInKByte[0],
+ v->MaximumSwathWidthLuma,
+ v->MaximumSwathWidthChroma,
+ v->SourceScan,
+ v->SourcePixelFormat,
+ v->SurfaceTiling,
+ v->ViewportWidth,
+ v->ViewportHeight,
+ v->SurfaceWidthY,
+ v->SurfaceWidthC,
+ v->SurfaceHeightY,
+ v->SurfaceHeightC,
+ v->Read256BlockHeightY,
+ v->Read256BlockHeightC,
+ v->Read256BlockWidthY,
+ v->Read256BlockWidthC,
+ v->ODMCombineEnableThisState,
+ v->BlendingAndTiming,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ v->BytePerPixelInDETY,
+ v->BytePerPixelInDETC,
+ v->HActive,
+ v->HRatio,
+ v->HRatioChroma,
+ v->NoOfDPPThisState,
+ v->swath_width_luma_ub_this_state,
+ v->swath_width_chroma_ub_this_state,
+ v->SwathWidthYThisState,
+ v->SwathWidthCThisState,
+ v->SwathHeightYThisState,
+ v->SwathHeightCThisState,
+ v->DETBufferSizeYThisState,
+ v->DETBufferSizeCThisState,
+ v->dummystring,
+ &v->ViewportSizeSupport[i][j]);
+
+ CalculateDCFCLKDeepSleep(
+ mode_lib,
+ v->NumberOfActivePlanes,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ v->VRatio,
+ v->VRatioChroma,
+ v->SwathWidthYThisState,
+ v->SwathWidthCThisState,
+ v->NoOfDPPThisState,
+ v->HRatio,
+ v->HRatioChroma,
+ v->PixelClock,
+ v->PSCL_FACTOR,
+ v->PSCL_FACTOR_CHROMA,
+ v->RequiredDPPCLKThisState,
+ v->ReadBandwidthLuma,
+ v->ReadBandwidthChroma,
+ v->ReturnBusWidth,
+ &v->ProjectedDCFCLKDeepSleep[i][j]);
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
+ v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
+ v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
+ v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
+ v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
+ v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
+ v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
+ v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
+ }
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
+ / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
+ }
+
+ for (i = 0; i < v->soc.num_states; i++) {
+ for (j = 0; j < 2; j++) {
+ bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
+
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
+ v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
+ v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
+ v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
+ v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
+ v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
+ v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
+ v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
+ }
+
+ v->TotalNumberOfDCCActiveDPP[i][j] = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->DCCEnable[k] == true) {
+ v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
+ || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
+
+ if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
+ && v->SourceScan[k] != dm_vert) {
+ v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
+ / 2;
+ v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
+ } else {
+ v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
+ v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
+ }
+
+ v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
+ mode_lib,
+ v->DCCEnable[k],
+ v->Read256BlockHeightC[k],
+ v->Read256BlockWidthC[k],
+ v->SourcePixelFormat[k],
+ v->SurfaceTiling[k],
+ v->BytePerPixelC[k],
+ v->SourceScan[k],
+ v->SwathWidthCThisState[k],
+ v->ViewportHeightChroma[k],
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->HostVMMaxNonCachedPageTableLevels,
+ v->GPUVMMinPageSize,
+ v->HostVMMinPageSize,
+ v->PTEBufferSizeInRequestsForChroma,
+ v->PitchC[k],
+ 0.0,
+ &v->MacroTileWidthC[k],
+ &v->MetaRowBytesC,
+ &v->DPTEBytesPerRowC,
+ &v->PTEBufferSizeNotExceededC[i][j][k],
+ &v->dummyinteger7,
+ &v->dpte_row_height_chroma[k],
+ &v->dummyinteger28,
+ &v->dummyinteger26,
+ &v->dummyinteger23,
+ &v->meta_row_height_chroma[k],
+ &v->dummyinteger8,
+ &v->dummyinteger9,
+ &v->dummyinteger19,
+ &v->dummyinteger20,
+ &v->dummyinteger17,
+ &v->dummyinteger10,
+ &v->dummyinteger11);
+
+ v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
+ mode_lib,
+ v->VRatioChroma[k],
+ v->VTAPsChroma[k],
+ v->Interlace[k],
+ v->ProgressiveToInterlaceUnitInOPP,
+ v->SwathHeightCThisState[k],
+ v->ViewportYStartC[k],
+ &v->PrefillC[k],
+ &v->MaxNumSwC[k]);
+ } else {
+ v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
+ v->PTEBufferSizeInRequestsForChroma = 0;
+ v->PDEAndMetaPTEBytesPerFrameC = 0.0;
+ v->MetaRowBytesC = 0.0;
+ v->DPTEBytesPerRowC = 0.0;
+ v->PrefetchLinesC[i][j][k] = 0.0;
+ v->PTEBufferSizeNotExceededC[i][j][k] = true;
+ }
+ v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
+ mode_lib,
+ v->DCCEnable[k],
+ v->Read256BlockHeightY[k],
+ v->Read256BlockWidthY[k],
+ v->SourcePixelFormat[k],
+ v->SurfaceTiling[k],
+ v->BytePerPixelY[k],
+ v->SourceScan[k],
+ v->SwathWidthYThisState[k],
+ v->ViewportHeight[k],
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->HostVMMaxNonCachedPageTableLevels,
+ v->GPUVMMinPageSize,
+ v->HostVMMinPageSize,
+ v->PTEBufferSizeInRequestsForLuma,
+ v->PitchY[k],
+ v->DCCMetaPitchY[k],
+ &v->MacroTileWidthY[k],
+ &v->MetaRowBytesY,
+ &v->DPTEBytesPerRowY,
+ &v->PTEBufferSizeNotExceededY[i][j][k],
+ &v->dummyinteger7,
+ &v->dpte_row_height[k],
+ &v->dummyinteger29,
+ &v->dummyinteger27,
+ &v->dummyinteger24,
+ &v->meta_row_height[k],
+ &v->dummyinteger25,
+ &v->dpte_group_bytes[k],
+ &v->dummyinteger21,
+ &v->dummyinteger22,
+ &v->dummyinteger18,
+ &v->dummyinteger5,
+ &v->dummyinteger6);
+ v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
+ mode_lib,
+ v->VRatio[k],
+ v->vtaps[k],
+ v->Interlace[k],
+ v->ProgressiveToInterlaceUnitInOPP,
+ v->SwathHeightYThisState[k],
+ v->ViewportYStartY[k],
+ &v->PrefillY[k],
+ &v->MaxNumSwY[k]);
+ v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
+ v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
+ v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
+
+ CalculateRowBandwidth(
+ v->GPUVMEnable,
+ v->SourcePixelFormat[k],
+ v->VRatio[k],
+ v->VRatioChroma[k],
+ v->DCCEnable[k],
+ v->HTotal[k] / v->PixelClock[k],
+ v->MetaRowBytesY,
+ v->MetaRowBytesC,
+ v->meta_row_height[k],
+ v->meta_row_height_chroma[k],
+ v->DPTEBytesPerRowY,
+ v->DPTEBytesPerRowC,
+ v->dpte_row_height[k],
+ v->dpte_row_height_chroma[k],
+ &v->meta_row_bandwidth[i][j][k],
+ &v->dpte_row_bandwidth[i][j][k]);
+ }
+ /*
+ * DCCMetaBufferSizeSupport(i, j) = True
+ * For k = 0 To NumberOfActivePlanes - 1
+ * If MetaRowBytes(i, j, k) > 24064 Then
+ * DCCMetaBufferSizeSupport(i, j) = False
+ * End If
+ * Next k
+ */
+ v->DCCMetaBufferSizeSupport[i][j] = true;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->MetaRowBytes[i][j][k] > 24064)
+ v->DCCMetaBufferSizeSupport[i][j] = false;
+ }
+ v->UrgLatency[i] = CalculateUrgentLatency(
+ v->UrgentLatencyPixelDataOnly,
+ v->UrgentLatencyPixelMixedWithVMData,
+ v->UrgentLatencyVMDataOnly,
+ v->DoUrgentLatencyAdjustment,
+ v->UrgentLatencyAdjustmentFabricClockComponent,
+ v->UrgentLatencyAdjustmentFabricClockReference,
+ v->FabricClockPerState[i]);
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ CalculateUrgentBurstFactor(
+ v->swath_width_luma_ub_this_state[k],
+ v->swath_width_chroma_ub_this_state[k],
+ v->SwathHeightYThisState[k],
+ v->SwathHeightCThisState[k],
+ v->HTotal[k] / v->PixelClock[k],
+ v->UrgLatency[i],
+ v->CursorBufferSize,
+ v->CursorWidth[k][0],
+ v->CursorBPP[k][0],
+ v->VRatio[k],
+ v->VRatioChroma[k],
+ v->BytePerPixelInDETY[k],
+ v->BytePerPixelInDETC[k],
+ v->DETBufferSizeYThisState[k],
+ v->DETBufferSizeCThisState[k],
+ &v->UrgentBurstFactorCursor[k],
+ &v->UrgentBurstFactorLuma[k],
+ &v->UrgentBurstFactorChroma[k],
+ &NotUrgentLatencyHiding[k]);
+ }
+
+ v->NotEnoughUrgentLatencyHidingA[i][j] = false;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (NotUrgentLatencyHiding[k]) {
+ v->NotEnoughUrgentLatencyHidingA[i][j] = true;
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
+ + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
+ v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
+ }
+
+ v->TotalVActivePixelBandwidth[i][j] = 0;
+ v->TotalVActiveCursorBandwidth[i][j] = 0;
+ v->TotalMetaRowBandwidth[i][j] = 0;
+ v->TotalDPTERowBandwidth[i][j] = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
+ v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
+ v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
+ v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
+ }
+ }
+ }
+
+ //Calculate Return BW
+ for (i = 0; i < v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->BlendingAndTiming[k] == k) {
+ if (v->WritebackEnable[k] == true) {
+ v->WritebackDelayTime[k] = v->WritebackLatency
+ + CalculateWriteBackDelay(
+ v->WritebackPixelFormat[k],
+ v->WritebackHRatio[k],
+ v->WritebackVRatio[k],
+ v->WritebackVTaps[k],
+ v->WritebackDestinationWidth[k],
+ v->WritebackDestinationHeight[k],
+ v->WritebackSourceHeight[k],
+ v->HTotal[k]) / v->RequiredDISPCLK[i][j];
+ } else {
+ v->WritebackDelayTime[k] = 0.0;
+ }
+ for (m = 0; m < v->NumberOfActivePlanes; m++) {
+ if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
+ v->WritebackDelayTime[k] = dml_max(
+ v->WritebackDelayTime[k],
+ v->WritebackLatency
+ + CalculateWriteBackDelay(
+ v->WritebackPixelFormat[m],
+ v->WritebackHRatio[m],
+ v->WritebackVRatio[m],
+ v->WritebackVTaps[m],
+ v->WritebackDestinationWidth[m],
+ v->WritebackDestinationHeight[m],
+ v->WritebackSourceHeight[m],
+ v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
+ }
+ }
+ }
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ for (m = 0; m < v->NumberOfActivePlanes; m++) {
+ if (v->BlendingAndTiming[k] == m) {
+ v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
+ }
+ }
+ }
+ v->MaxMaxVStartup[i][j] = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->MaximumVStartup[i][j][k] =
+ CalculateMaxVStartup(
+ v->VTotal[k],
+ v->VActive[k],
+ v->VBlankNom[k],
+ v->HTotal[k],
+ v->PixelClock[k],
+ v->ProgressiveToInterlaceUnitInOPP,
+ v->Interlace[k],
+ v->ip.VBlankNomDefaultUS,
+ v->WritebackDelayTime[k]);
+ v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
+ }
+ }
+ }
+
+ ReorderingBytes = v->NumberOfChannels
+ * dml_max3(
+ v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
+ v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
+ v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
+
+ for (i = 0; i < v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
+ }
+ }
+
+ if (v->UseMinimumRequiredDCFCLK == true)
+ UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes);
+
+ for (i = 0; i < v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ double IdealFabricAndSDPPortBandwidthPerState = dml_min(
+ v->ReturnBusWidth * v->DCFCLKState[i][j],
+ v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
+ double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
+ double PixelDataOnlyReturnBWPerState = dml_min(
+ IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
+ IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
+ double PixelMixedWithVMDataReturnBWPerState = dml_min(
+ IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
+ IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
+
+ if (v->HostVMEnable != true) {
+ v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
+ } else {
+ v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
+ }
+ }
+ }
+
+ //Re-ordering Buffer Support Check
+ for (i = 0; i < v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
+ > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
+ v->ROBSupport[i][j] = true;
+ } else {
+ v->ROBSupport[i][j] = false;
+ }
+ }
+ }
+
+ //Vertical Active BW support check
+
+ MaxTotalVActiveRDBandwidth = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
+ }
+
+ for (i = 0; i < v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
+ dml_min(
+ v->ReturnBusWidth * v->DCFCLKState[i][j],
+ v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
+ * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
+ v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
+ * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
+
+ if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
+ v->TotalVerticalActiveBandwidthSupport[i][j] = true;
+ } else {
+ v->TotalVerticalActiveBandwidthSupport[i][j] = false;
+ }
+ }
+ }
+
+ v->UrgentLatency = CalculateUrgentLatency(
+ v->UrgentLatencyPixelDataOnly,
+ v->UrgentLatencyPixelMixedWithVMData,
+ v->UrgentLatencyVMDataOnly,
+ v->DoUrgentLatencyAdjustment,
+ v->UrgentLatencyAdjustmentFabricClockComponent,
+ v->UrgentLatencyAdjustmentFabricClockReference,
+ v->FabricClock);
+ //Prefetch Check
+ for (i = 0; i < v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ double VMDataOnlyReturnBWPerState;
+ double HostVMInefficiencyFactor = 1;
+ int NextPrefetchModeState = MinPrefetchMode;
+ bool UnboundedRequestEnabledThisState = false;
+ int CompressedBufferSizeInkByteThisState = 0;
+ double dummy;
+
+ v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
+
+ v->BandwidthWithoutPrefetchSupported[i][j] = true;
+ if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
+ + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
+ v->BandwidthWithoutPrefetchSupported[i][j] = false;
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
+ v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
+ v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
+ v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
+ v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
+ v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
+ v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
+ v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
+ v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
+ }
+
+ VMDataOnlyReturnBWPerState = dml_min(
+ dml_min(
+ v->ReturnBusWidth * v->DCFCLKState[i][j],
+ v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
+ * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
+ v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
+ * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
+ if (v->GPUVMEnable && v->HostVMEnable)
+ HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
+
+ v->ExtraLatency = CalculateExtraLatency(
+ v->RoundTripPingLatencyCycles,
+ ReorderingBytes,
+ v->DCFCLKState[i][j],
+ v->TotalNumberOfActiveDPP[i][j],
+ v->PixelChunkSizeInKByte,
+ v->TotalNumberOfDCCActiveDPP[i][j],
+ v->MetaChunkSize,
+ v->ReturnBWPerState[i][j],
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->NumberOfActivePlanes,
+ v->NoOfDPPThisState,
+ v->dpte_group_bytes,
+ HostVMInefficiencyFactor,
+ v->HostVMMinPageSize,
+ v->HostVMMaxNonCachedPageTableLevels);
+
+ v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
+ do {
+ v->PrefetchModePerState[i][j] = NextPrefetchModeState;
+ v->MaxVStartup = v->NextMaxVStartup;
+
+ v->TWait = CalculateTWait(
+ v->PrefetchModePerState[i][j],
+ v->DRAMClockChangeLatency,
+ v->UrgLatency[i],
+ v->SREnterPlusExitTime);
+
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ CalculatePrefetchSchedulePerPlane(mode_lib,
+ HostVMInefficiencyFactor,
+ i, j, k);
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ CalculateUrgentBurstFactor(
+ v->swath_width_luma_ub_this_state[k],
+ v->swath_width_chroma_ub_this_state[k],
+ v->SwathHeightYThisState[k],
+ v->SwathHeightCThisState[k],
+ v->HTotal[k] / v->PixelClock[k],
+ v->UrgLatency[i],
+ v->CursorBufferSize,
+ v->CursorWidth[k][0],
+ v->CursorBPP[k][0],
+ v->VRatioPreY[i][j][k],
+ v->VRatioPreC[i][j][k],
+ v->BytePerPixelInDETY[k],
+ v->BytePerPixelInDETC[k],
+ v->DETBufferSizeYThisState[k],
+ v->DETBufferSizeCThisState[k],
+ &v->UrgentBurstFactorCursorPre[k],
+ &v->UrgentBurstFactorLumaPre[k],
+ &v->UrgentBurstFactorChromaPre[k],
+ &v->NotUrgentLatencyHidingPre[k]);
+ }
+
+ v->MaximumReadBandwidthWithPrefetch = 0.0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
+ / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
+
+ v->MaximumReadBandwidthWithPrefetch =
+ v->MaximumReadBandwidthWithPrefetch
+ + dml_max3(
+ v->VActivePixelBandwidth[i][j][k]
+ + v->VActiveCursorBandwidth[i][j][k]
+ + v->NoOfDPP[i][j][k]
+ * (v->meta_row_bandwidth[i][j][k]
+ + v->dpte_row_bandwidth[i][j][k]),
+ v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
+ v->NoOfDPP[i][j][k]
+ * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
+ * v->UrgentBurstFactorLumaPre[k]
+ + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
+ * v->UrgentBurstFactorChromaPre[k])
+ + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
+ }
+
+ v->NotEnoughUrgentLatencyHidingPre = false;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->NotUrgentLatencyHidingPre[k] == true) {
+ v->NotEnoughUrgentLatencyHidingPre = true;
+ }
+ }
+
+ v->PrefetchSupported[i][j] = true;
+ if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
+ || v->NotEnoughUrgentLatencyHidingPre == 1) {
+ v->PrefetchSupported[i][j] = false;
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
+ || v->NoTimeForPrefetch[i][j][k] == true) {
+ v->PrefetchSupported[i][j] = false;
+ }
+ }
+
+ v->DynamicMetadataSupported[i][j] = true;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
+ v->DynamicMetadataSupported[i][j] = false;
+ }
+ }
+
+ v->VRatioInPrefetchSupported[i][j] = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
+ v->VRatioInPrefetchSupported[i][j] = false;
+ }
+ }
+ v->AnyLinesForVMOrRowTooLarge = false;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
+ v->AnyLinesForVMOrRowTooLarge = true;
+ }
+ }
+
+ v->NextPrefetchMode = v->NextPrefetchMode + 1;
+
+ if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
+ v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
+ - dml_max(
+ v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
+ v->NoOfDPP[i][j][k]
+ * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
+ * v->UrgentBurstFactorLumaPre[k]
+ + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
+ * v->UrgentBurstFactorChromaPre[k])
+ + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
+ }
+ v->TotImmediateFlipBytes = 0.0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
+ + v->NoOfDPP[i][j][k] * (v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
+ + v->DPTEBytesPerRow[i][j][k]);
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ CalculateFlipSchedule(
+ mode_lib,
+ k,
+ HostVMInefficiencyFactor,
+ v->ExtraLatency,
+ v->UrgLatency[i],
+ v->PDEAndMetaPTEBytesPerFrame[i][j][k],
+ v->MetaRowBytes[i][j][k],
+ v->DPTEBytesPerRow[i][j][k]);
+ }
+ v->total_dcn_read_bw_with_flip = 0.0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
+ + dml_max3(
+ v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
+ v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
+ + v->VActiveCursorBandwidth[i][j][k],
+ v->NoOfDPP[i][j][k]
+ * (v->final_flip_bw[k]
+ + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
+ * v->UrgentBurstFactorLumaPre[k]
+ + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
+ * v->UrgentBurstFactorChromaPre[k])
+ + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
+ }
+ v->ImmediateFlipSupportedForState[i][j] = true;
+ if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
+ v->ImmediateFlipSupportedForState[i][j] = false;
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->ImmediateFlipSupportedForPipe[k] == false) {
+ v->ImmediateFlipSupportedForState[i][j] = false;
+ }
+ }
+ } else {
+ v->ImmediateFlipSupportedForState[i][j] = false;
+ }
+
+ if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
+ v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
+ NextPrefetchModeState = NextPrefetchModeState + 1;
+ } else {
+ v->NextMaxVStartup = v->NextMaxVStartup - 1;
+ }
+ v->NextPrefetchMode = v->NextPrefetchMode + 1;
+ } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
+ && ((v->HostVMEnable == false &&
+ v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
+ || v->ImmediateFlipSupportedForState[i][j] == true))
+ || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
+
+ CalculateUnboundedRequestAndCompressedBufferSize(
+ v->DETBufferSizeInKByte[0],
+ v->ConfigReturnBufferSizeInKByte,
+ v->UseUnboundedRequesting,
+ v->TotalNumberOfActiveDPP[i][j],
+ NoChroma,
+ v->MaxNumDPP,
+ v->CompressedBufferSegmentSizeInkByte,
+ v->Output,
+ &UnboundedRequestEnabledThisState,
+ &CompressedBufferSizeInkByteThisState);
+
+ CalculateWatermarksAndDRAMSpeedChangeSupport(
+ mode_lib,
+ v->PrefetchModePerState[i][j],
+ v->DCFCLKState[i][j],
+ v->ReturnBWPerState[i][j],
+ v->UrgLatency[i],
+ v->ExtraLatency,
+ v->SOCCLKPerState[i],
+ v->ProjectedDCFCLKDeepSleep[i][j],
+ v->DETBufferSizeYThisState,
+ v->DETBufferSizeCThisState,
+ v->SwathHeightYThisState,
+ v->SwathHeightCThisState,
+ v->SwathWidthYThisState,
+ v->SwathWidthCThisState,
+ v->NoOfDPPThisState,
+ v->BytePerPixelInDETY,
+ v->BytePerPixelInDETC,
+ UnboundedRequestEnabledThisState,
+ CompressedBufferSizeInkByteThisState,
+ &v->DRAMClockChangeSupport[i][j],
+ &dummy,
+ &dummy,
+ &dummy,
+ &dummy);
+ }
+ }
+
+ /*PTE Buffer Size Check*/
+ for (i = 0; i < v->soc.num_states; i++) {
+ for (j = 0; j < 2; j++) {
+ v->PTEBufferSizeNotExceeded[i][j] = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
+ v->PTEBufferSizeNotExceeded[i][j] = false;
+ }
+ }
+ }
+ }
+
+ /*Cursor Support Check*/
+ v->CursorSupport = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->CursorWidth[k][0] > 0.0) {
+ if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
+ v->CursorSupport = false;
+ }
+ }
+ }
+
+ /*Valid Pitch Check*/
+ v->PitchSupport = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
+ if (v->DCCEnable[k] == true) {
+ v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
+ } else {
+ v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
+ }
+ if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
+ && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
+ && v->SourcePixelFormat[k] != dm_mono_8) {
+ v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
+ if (v->DCCEnable[k] == true) {
+ v->AlignedDCCMetaPitchC[k] = dml_ceil(
+ dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]),
+ 64.0 * v->Read256BlockWidthC[k]);
+ } else {
+ v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
+ }
+ } else {
+ v->AlignedCPitch[k] = v->PitchC[k];
+ v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
+ }
+ if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
+ || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
+ v->PitchSupport = false;
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
+ ViewportExceedsSurface = true;
+ if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
+ && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
+ && v->SourcePixelFormat[k] != dm_rgbe) {
+ if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
+ || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
+ ViewportExceedsSurface = true;
+ }
+ }
+ }
+ }
+
+ /*Mode Support, Voltage State and SOC Configuration*/
+ for (i = v->soc.num_states - 1; i >= 0; i--) {
+ for (j = 0; j < 2; j++) {
+ if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
+ && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
+ && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
+ && v->DTBCLKRequiredMoreThanSupported[i] == false
+ && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
+ && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
+ && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
+ && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
+ && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
+ && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
+ && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
+ && ((v->HostVMEnable == false
+ && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
+ || v->ImmediateFlipSupportedForState[i][j] == true)
+ && FMTBufferExceeded == false) {
+ v->ModeSupport[i][j] = true;
+ } else {
+ v->ModeSupport[i][j] = false;
+ }
+ }
+ }
+ for (i = v->soc.num_states; i >= 0; i--) {
+ for (j = 0; j < 2; j++) {
+ enum dm_validation_status status = DML_VALIDATION_OK;
+
+ if (!v->ScaleRatioAndTapsSupport) {
+ status = DML_FAIL_SCALE_RATIO_TAP;
+ } else if (!v->SourceFormatPixelAndScanSupport) {
+ status = DML_FAIL_SOURCE_PIXEL_FORMAT;
+ } else if (!v->ViewportSizeSupport[i][j]) {
+ status = DML_FAIL_VIEWPORT_SIZE;
+ } else if (P2IWith420) {
+ status = DML_FAIL_P2I_WITH_420;
+ } else if (DSCOnlyIfNecessaryWithBPP) {
+ status = DML_FAIL_DSC_ONLY_IF_NECESSARY_WITH_BPP;
+ } else if (DSC422NativeNotSupported) {
+ status = DML_FAIL_NOT_DSC422_NATIVE;
+ } else if (!v->ODMCombine4To1SupportCheckOK[i]) {
+ status = DML_FAIL_ODM_COMBINE4TO1;
+ } else if (v->NotEnoughDSCUnits[i]) {
+ status = DML_FAIL_NOT_ENOUGH_DSC;
+ } else if (!v->ROBSupport[i][j]) {
+ status = DML_FAIL_REORDERING_BUFFER;
+ } else if (!v->DISPCLK_DPPCLK_Support[i][j]) {
+ status = DML_FAIL_DISPCLK_DPPCLK;
+ } else if (!v->TotalAvailablePipesSupport[i][j]) {
+ status = DML_FAIL_TOTAL_AVAILABLE_PIPES;
+ } else if (!EnoughWritebackUnits) {
+ status = DML_FAIL_ENOUGH_WRITEBACK_UNITS;
+ } else if (!v->WritebackLatencySupport) {
+ status = DML_FAIL_WRITEBACK_LATENCY;
+ } else if (!v->WritebackScaleRatioAndTapsSupport) {
+ status = DML_FAIL_WRITEBACK_SCALE_RATIO_TAP;
+ } else if (!v->CursorSupport) {
+ status = DML_FAIL_CURSOR_SUPPORT;
+ } else if (!v->PitchSupport) {
+ status = DML_FAIL_PITCH_SUPPORT;
+ } else if (ViewportExceedsSurface) {
+ status = DML_FAIL_VIEWPORT_EXCEEDS_SURFACE;
+ } else if (!v->PrefetchSupported[i][j]) {
+ status = DML_FAIL_PREFETCH_SUPPORT;
+ } else if (!v->DynamicMetadataSupported[i][j]) {
+ status = DML_FAIL_DYNAMIC_METADATA;
+ } else if (!v->TotalVerticalActiveBandwidthSupport[i][j]) {
+ status = DML_FAIL_TOTAL_V_ACTIVE_BW;
+ } else if (!v->VRatioInPrefetchSupported[i][j]) {
+ status = DML_FAIL_V_RATIO_PREFETCH;
+ } else if (!v->PTEBufferSizeNotExceeded[i][j]) {
+ status = DML_FAIL_PTE_BUFFER_SIZE;
+ } else if (v->NonsupportedDSCInputBPC) {
+ status = DML_FAIL_DSC_INPUT_BPC;
+ } else if ((v->HostVMEnable
+ && !v->ImmediateFlipSupportedForState[i][j])) {
+ status = DML_FAIL_HOST_VM_IMMEDIATE_FLIP;
+ } else if (FMTBufferExceeded) {
+ status = DML_FAIL_FMT_BUFFER_EXCEEDED;
+ }
+ mode_lib->vba.ValidationStatus[i] = status;
+ }
+ }
+
+ {
+ unsigned int MaximumMPCCombine = 0;
+
+ for (i = v->soc.num_states; i >= 0; i--) {
+ if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
+ v->VoltageLevel = i;
+ v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
+ if (v->ModeSupport[i][0] == true) {
+ MaximumMPCCombine = 0;
+ } else {
+ MaximumMPCCombine = 1;
+ }
+ }
+ }
+ v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
+ for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
+ v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
+ v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
+ }
+ v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
+ v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
+ v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
+ v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
+ v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
+ v->maxMpcComb = MaximumMPCCombine;
+ }
+}
+
+static void CalculateWatermarksAndDRAMSpeedChangeSupport(
+ struct display_mode_lib *mode_lib,
+ unsigned int PrefetchMode,
+ double DCFCLK,
+ double ReturnBW,
+ double UrgentLatency,
+ double ExtraLatency,
+ double SOCCLK,
+ double DCFCLKDeepSleep,
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ unsigned int DPPPerPlane[],
+ double BytePerPixelDETY[],
+ double BytePerPixelDETC[],
+ bool UnboundedRequestEnabled,
+ unsigned int CompressedBufferSizeInkByte,
+ enum clock_change_support *DRAMClockChangeSupport,
+ double *StutterExitWatermark,
+ double *StutterEnterPlusExitWatermark,
+ double *Z8StutterExitWatermark,
+ double *Z8StutterEnterPlusExitWatermark)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ double EffectiveLBLatencyHidingY;
+ double EffectiveLBLatencyHidingC;
+ double LinesInDETY[DC__NUM_DPP__MAX];
+ double LinesInDETC;
+ unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
+ unsigned int LinesInDETCRoundedDownToSwath;
+ double FullDETBufferingTimeY;
+ double FullDETBufferingTimeC;
+ double ActiveDRAMClockChangeLatencyMarginY;
+ double ActiveDRAMClockChangeLatencyMarginC;
+ double WritebackDRAMClockChangeLatencyMargin;
+ double PlaneWithMinActiveDRAMClockChangeMargin;
+ double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
+ double WritebackDRAMClockChangeLatencyHiding;
+ double TotalPixelBW = 0.0;
+ int k, j;
+
+ v->UrgentWatermark = UrgentLatency + ExtraLatency;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
+ dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
+ dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark);
+#endif
+
+ v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency);
+ dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark);
+#endif
+
+ v->TotalActiveWriteback = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->WritebackEnable[k] == true) {
+ v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
+ }
+ }
+
+ if (v->TotalActiveWriteback <= 1) {
+ v->WritebackUrgentWatermark = v->WritebackLatency;
+ } else {
+ v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
+ }
+
+ if (v->TotalActiveWriteback <= 1) {
+ v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency;
+ } else {
+ v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ TotalPixelBW = TotalPixelBW
+ + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k])
+ / (v->HTotal[k] / v->PixelClock[k]);
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ double EffectiveDETBufferSizeY = DETBufferSizeY[k];
+
+ v->LBLatencyHidingSourceLinesY = dml_min(
+ (double) v->MaxLineBufferLines,
+ dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
+
+ v->LBLatencyHidingSourceLinesC = dml_min(
+ (double) v->MaxLineBufferLines,
+ dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
+
+ EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
+
+ EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
+
+ if (UnboundedRequestEnabled) {
+ EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
+ + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
+ }
+
+ LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
+ LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
+ FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
+ if (BytePerPixelDETC[k] > 0) {
+ LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
+ LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
+ FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k];
+ } else {
+ LinesInDETC = 0;
+ FullDETBufferingTimeC = 999999;
+ }
+
+ ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
+ - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
+
+ if (v->NumberOfActivePlanes > 1) {
+ ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
+ - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k];
+ }
+
+ if (BytePerPixelDETC[k] > 0) {
+ ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
+ - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
+
+ if (v->NumberOfActivePlanes > 1) {
+ ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
+ - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k];
+ }
+ v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
+ } else {
+ v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
+ }
+
+ if (v->WritebackEnable[k] == true) {
+ WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024
+ / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
+ if (v->WritebackPixelFormat[k] == dm_444_64) {
+ WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
+ }
+ WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
+ v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
+ }
+ }
+
+ v->MinActiveDRAMClockChangeMargin = 999999;
+ PlaneWithMinActiveDRAMClockChangeMargin = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
+ v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
+ if (v->BlendingAndTiming[k] == k) {
+ PlaneWithMinActiveDRAMClockChangeMargin = k;
+ } else {
+ for (j = 0; j < v->NumberOfActivePlanes; ++j) {
+ if (v->BlendingAndTiming[k] == j) {
+ PlaneWithMinActiveDRAMClockChangeMargin = j;
+ }
+ }
+ }
+ }
+ }
+
+ v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ;
+
+ SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
+ && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
+ SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
+ }
+ }
+
+ v->TotalNumberOfActiveOTG = 0;
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->BlendingAndTiming[k] == k) {
+ v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
+ }
+ }
+
+ if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
+ *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
+ } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
+ || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
+ *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
+ } else {
+ *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
+ }
+
+ *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
+ *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
+ *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
+ *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
+ dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
+ dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
+ dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
+#endif
+}
+
+static void CalculateDCFCLKDeepSleep(
+ struct display_mode_lib *mode_lib,
+ unsigned int NumberOfActivePlanes,
+ int BytePerPixelY[],
+ int BytePerPixelC[],
+ double VRatio[],
+ double VRatioChroma[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ unsigned int DPPPerPlane[],
+ double HRatio[],
+ double HRatioChroma[],
+ double PixelClock[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double DPPCLK[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ int ReturnBusWidth,
+ double *DCFCLKDeepSleep)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ double DisplayPipeLineDeliveryTimeLuma;
+ double DisplayPipeLineDeliveryTimeChroma;
+ double ReadBandwidth = 0.0;
+ int k;
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+
+ if (VRatio[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
+ }
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChroma = 0;
+ } else {
+ if (VRatioChroma[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
+ }
+ }
+
+ if (BytePerPixelC[k] > 0) {
+ v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
+ __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
+ } else {
+ v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
+ }
+ v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
+
+ }
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
+ }
+
+ *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]);
+ }
+}
+
+static void CalculateUrgentBurstFactor(
+ int swath_width_luma_ub,
+ int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double LineTime,
+ double UrgentLatency,
+ double CursorBufferSize,
+ unsigned int CursorWidth,
+ unsigned int CursorBPP,
+ double VRatio,
+ double VRatioC,
+ double BytePerPixelInDETY,
+ double BytePerPixelInDETC,
+ double DETBufferSizeY,
+ double DETBufferSizeC,
+ double *UrgentBurstFactorCursor,
+ double *UrgentBurstFactorLuma,
+ double *UrgentBurstFactorChroma,
+ bool *NotEnoughUrgentLatencyHiding)
+{
+ double LinesInDETLuma;
+ double LinesInDETChroma;
+ unsigned int LinesInCursorBuffer;
+ double CursorBufferSizeInTime;
+ double DETBufferSizeInTimeLuma;
+ double DETBufferSizeInTimeChroma;
+
+ *NotEnoughUrgentLatencyHiding = 0;
+
+ if (CursorWidth > 0) {
+ LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
+ if (VRatio > 0) {
+ CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
+ if (CursorBufferSizeInTime - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorCursor = 0;
+ } else {
+ *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
+ }
+ } else {
+ *UrgentBurstFactorCursor = 1;
+ }
+ }
+
+ LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
+ if (VRatio > 0) {
+ DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
+ if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorLuma = 0;
+ } else {
+ *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
+ }
+ } else {
+ *UrgentBurstFactorLuma = 1;
+ }
+
+ if (BytePerPixelInDETC > 0) {
+ LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
+ if (VRatio > 0) {
+ DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
+ if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorChroma = 0;
+ } else {
+ *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
+ }
+ } else {
+ *UrgentBurstFactorChroma = 1;
+ }
+ }
+}
+
+static void CalculatePixelDeliveryTimes(
+ unsigned int NumberOfActivePlanes,
+ double VRatio[],
+ double VRatioChroma[],
+ double VRatioPrefetchY[],
+ double VRatioPrefetchC[],
+ unsigned int swath_width_luma_ub[],
+ unsigned int swath_width_chroma_ub[],
+ unsigned int DPPPerPlane[],
+ double HRatio[],
+ double HRatioChroma[],
+ double PixelClock[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double DPPCLK[],
+ int BytePerPixelC[],
+ enum scan_direction_class SourceScan[],
+ unsigned int NumberOfCursors[],
+ unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
+ unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
+ unsigned int BlockWidth256BytesY[],
+ unsigned int BlockHeight256BytesY[],
+ unsigned int BlockWidth256BytesC[],
+ unsigned int BlockHeight256BytesC[],
+ double DisplayPipeLineDeliveryTimeLuma[],
+ double DisplayPipeLineDeliveryTimeChroma[],
+ double DisplayPipeLineDeliveryTimeLumaPrefetch[],
+ double DisplayPipeLineDeliveryTimeChromaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeLuma[],
+ double DisplayPipeRequestDeliveryTimeChroma[],
+ double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
+ double CursorRequestDeliveryTime[],
+ double CursorRequestDeliveryTimePrefetch[])
+{
+ double req_per_swath_ub;
+ int k;
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (VRatio[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
+ }
+
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChroma[k] = 0;
+ } else {
+ if (VRatioChroma[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
+ }
+ }
+
+ if (VRatioPrefetchY[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
+ }
+
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
+ } else {
+ if (VRatioPrefetchC[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
+ }
+ }
+ }
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (SourceScan[k] != dm_vert) {
+ req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
+ } else {
+ req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
+ }
+ DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
+ DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeRequestDeliveryTimeChroma[k] = 0;
+ DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
+ } else {
+ if (SourceScan[k] != dm_vert) {
+ req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
+ } else {
+ req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
+ }
+ DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
+ DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
+ dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
+ dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
+ dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
+#endif
+ }
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ int cursor_req_per_width;
+
+ cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
+ if (NumberOfCursors[k] > 0) {
+ if (VRatio[k] <= 1) {
+ CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
+ } else {
+ CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
+ }
+ if (VRatioPrefetchY[k] <= 1) {
+ CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
+ } else {
+ CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
+ }
+ } else {
+ CursorRequestDeliveryTime[k] = 0;
+ CursorRequestDeliveryTimePrefetch[k] = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
+ dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
+ dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
+#endif
+ }
+}
+
+static void CalculateMetaAndPTETimes(
+ int NumberOfActivePlanes,
+ bool GPUVMEnable,
+ int MetaChunkSize,
+ int MinMetaChunkSizeBytes,
+ int HTotal[],
+ double VRatio[],
+ double VRatioChroma[],
+ double DestinationLinesToRequestRowInVBlank[],
+ double DestinationLinesToRequestRowInImmediateFlip[],
+ bool DCCEnable[],
+ double PixelClock[],
+ int BytePerPixelY[],
+ int BytePerPixelC[],
+ enum scan_direction_class SourceScan[],
+ int dpte_row_height[],
+ int dpte_row_height_chroma[],
+ int meta_row_width[],
+ int meta_row_width_chroma[],
+ int meta_row_height[],
+ int meta_row_height_chroma[],
+ int meta_req_width[],
+ int meta_req_width_chroma[],
+ int meta_req_height[],
+ int meta_req_height_chroma[],
+ int dpte_group_bytes[],
+ int PTERequestSizeY[],
+ int PTERequestSizeC[],
+ int PixelPTEReqWidthY[],
+ int PixelPTEReqHeightY[],
+ int PixelPTEReqWidthC[],
+ int PixelPTEReqHeightC[],
+ int dpte_row_width_luma_ub[],
+ int dpte_row_width_chroma_ub[],
+ double DST_Y_PER_PTE_ROW_NOM_L[],
+ double DST_Y_PER_PTE_ROW_NOM_C[],
+ double DST_Y_PER_META_ROW_NOM_L[],
+ double DST_Y_PER_META_ROW_NOM_C[],
+ double TimePerMetaChunkNominal[],
+ double TimePerChromaMetaChunkNominal[],
+ double TimePerMetaChunkVBlank[],
+ double TimePerChromaMetaChunkVBlank[],
+ double TimePerMetaChunkFlip[],
+ double TimePerChromaMetaChunkFlip[],
+ double time_per_pte_group_nom_luma[],
+ double time_per_pte_group_vblank_luma[],
+ double time_per_pte_group_flip_luma[],
+ double time_per_pte_group_nom_chroma[],
+ double time_per_pte_group_vblank_chroma[],
+ double time_per_pte_group_flip_chroma[])
+{
+ unsigned int meta_chunk_width;
+ unsigned int min_meta_chunk_width;
+ unsigned int meta_chunk_per_row_int;
+ unsigned int meta_row_remainder;
+ unsigned int meta_chunk_threshold;
+ unsigned int meta_chunks_per_row_ub;
+ unsigned int meta_chunk_width_chroma;
+ unsigned int min_meta_chunk_width_chroma;
+ unsigned int meta_chunk_per_row_int_chroma;
+ unsigned int meta_row_remainder_chroma;
+ unsigned int meta_chunk_threshold_chroma;
+ unsigned int meta_chunks_per_row_ub_chroma;
+ unsigned int dpte_group_width_luma;
+ unsigned int dpte_groups_per_row_luma_ub;
+ unsigned int dpte_group_width_chroma;
+ unsigned int dpte_groups_per_row_chroma_ub;
+ int k;
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
+ if (BytePerPixelC[k] == 0) {
+ DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
+ } else {
+ DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
+ }
+ DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
+ if (BytePerPixelC[k] == 0) {
+ DST_Y_PER_META_ROW_NOM_C[k] = 0;
+ } else {
+ DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
+ }
+ }
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (DCCEnable[k] == true) {
+ meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
+ min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
+ meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
+ meta_row_remainder = meta_row_width[k] % meta_chunk_width;
+ if (SourceScan[k] != dm_vert) {
+ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
+ } else {
+ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
+ }
+ if (meta_row_remainder <= meta_chunk_threshold) {
+ meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
+ } else {
+ meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
+ }
+ TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
+ TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
+ TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
+ if (BytePerPixelC[k] == 0) {
+ TimePerChromaMetaChunkNominal[k] = 0;
+ TimePerChromaMetaChunkVBlank[k] = 0;
+ TimePerChromaMetaChunkFlip[k] = 0;
+ } else {
+ meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
+ min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
+ meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
+ meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
+ if (SourceScan[k] != dm_vert) {
+ meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
+ } else {
+ meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
+ }
+ if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
+ meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
+ } else {
+ meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
+ }
+ TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
+ TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
+ TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
+ }
+ } else {
+ TimePerMetaChunkNominal[k] = 0;
+ TimePerMetaChunkVBlank[k] = 0;
+ TimePerMetaChunkFlip[k] = 0;
+ TimePerChromaMetaChunkNominal[k] = 0;
+ TimePerChromaMetaChunkVBlank[k] = 0;
+ TimePerChromaMetaChunkFlip[k] = 0;
+ }
+ }
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (GPUVMEnable == true) {
+ if (SourceScan[k] != dm_vert) {
+ dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
+ } else {
+ dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
+ }
+ dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
+ time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
+ time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
+ time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
+ if (BytePerPixelC[k] == 0) {
+ time_per_pte_group_nom_chroma[k] = 0;
+ time_per_pte_group_vblank_chroma[k] = 0;
+ time_per_pte_group_flip_chroma[k] = 0;
+ } else {
+ if (SourceScan[k] != dm_vert) {
+ dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
+ } else {
+ dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
+ }
+ dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
+ time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
+ time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
+ time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
+ }
+ } else {
+ time_per_pte_group_nom_luma[k] = 0;
+ time_per_pte_group_vblank_luma[k] = 0;
+ time_per_pte_group_flip_luma[k] = 0;
+ time_per_pte_group_nom_chroma[k] = 0;
+ time_per_pte_group_vblank_chroma[k] = 0;
+ time_per_pte_group_flip_chroma[k] = 0;
+ }
+ }
+}
+
+static void CalculateVMGroupAndRequestTimes(
+ unsigned int NumberOfActivePlanes,
+ bool GPUVMEnable,
+ unsigned int GPUVMMaxPageTableLevels,
+ unsigned int HTotal[],
+ int BytePerPixelC[],
+ double DestinationLinesToRequestVMInVBlank[],
+ double DestinationLinesToRequestVMInImmediateFlip[],
+ bool DCCEnable[],
+ double PixelClock[],
+ int dpte_row_width_luma_ub[],
+ int dpte_row_width_chroma_ub[],
+ int vm_group_bytes[],
+ unsigned int dpde0_bytes_per_frame_ub_l[],
+ unsigned int dpde0_bytes_per_frame_ub_c[],
+ int meta_pte_bytes_per_frame_ub_l[],
+ int meta_pte_bytes_per_frame_ub_c[],
+ double TimePerVMGroupVBlank[],
+ double TimePerVMGroupFlip[],
+ double TimePerVMRequestVBlank[],
+ double TimePerVMRequestFlip[])
+{
+ int num_group_per_lower_vm_stage;
+ int num_req_per_lower_vm_stage;
+ int k;
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
+ if (DCCEnable[k] == false) {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
+ + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
+ } else {
+ num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
+ }
+ } else {
+ if (GPUVMMaxPageTableLevels == 1) {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
+ + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
+ } else {
+ num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
+ }
+ } else {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
+ + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
+ + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
+ + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
+ } else {
+ num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
+ + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
+ }
+ }
+ }
+
+ if (DCCEnable[k] == false) {
+ if (BytePerPixelC[k] > 0) {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
+ } else {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
+ }
+ } else {
+ if (GPUVMMaxPageTableLevels == 1) {
+ if (BytePerPixelC[k] > 0) {
+ num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
+ } else {
+ num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
+ }
+ } else {
+ if (BytePerPixelC[k] > 0) {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
+ + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
+ } else {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
+ }
+ }
+ }
+
+ TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
+ TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
+ TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
+ TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
+
+ if (GPUVMMaxPageTableLevels > 2) {
+ TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
+ TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
+ TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
+ TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
+ }
+
+ } else {
+ TimePerVMGroupVBlank[k] = 0;
+ TimePerVMGroupFlip[k] = 0;
+ TimePerVMRequestVBlank[k] = 0;
+ TimePerVMRequestFlip[k] = 0;
+ }
+ }
+}
+
+static void CalculateStutterEfficiency(
+ struct display_mode_lib *mode_lib,
+ int CompressedBufferSizeInkByte,
+ bool UnboundedRequestEnabled,
+ int ConfigReturnBufferSizeInKByte,
+ int MetaFIFOSizeInKEntries,
+ int ZeroSizeBufferEntries,
+ int NumberOfActivePlanes,
+ int ROBBufferSizeInKByte,
+ double TotalDataReadBandwidth,
+ double DCFCLK,
+ double ReturnBW,
+ double COMPBUF_RESERVED_SPACE_64B,
+ double COMPBUF_RESERVED_SPACE_ZS,
+ double SRExitTime,
+ double SRExitZ8Time,
+ bool SynchronizedVBlank,
+ double Z8StutterEnterPlusExitWatermark,
+ double StutterEnterPlusExitWatermark,
+ bool ProgressiveToInterlaceUnitInOPP,
+ bool Interlace[],
+ double MinTTUVBlank[],
+ int DPPPerPlane[],
+ unsigned int DETBufferSizeY[],
+ int BytePerPixelY[],
+ double BytePerPixelDETY[],
+ double SwathWidthY[],
+ int SwathHeightY[],
+ int SwathHeightC[],
+ double NetDCCRateLuma[],
+ double NetDCCRateChroma[],
+ double DCCFractionOfZeroSizeRequestsLuma[],
+ double DCCFractionOfZeroSizeRequestsChroma[],
+ int HTotal[],
+ int VTotal[],
+ double PixelClock[],
+ double VRatio[],
+ enum scan_direction_class SourceScan[],
+ int BlockHeight256BytesY[],
+ int BlockWidth256BytesY[],
+ int BlockHeight256BytesC[],
+ int BlockWidth256BytesC[],
+ int DCCYMaxUncompressedBlock[],
+ int DCCCMaxUncompressedBlock[],
+ int VActive[],
+ bool DCCEnable[],
+ bool WritebackEnable[],
+ double ReadBandwidthPlaneLuma[],
+ double ReadBandwidthPlaneChroma[],
+ double meta_row_bw[],
+ double dpte_row_bw[],
+ double *StutterEfficiencyNotIncludingVBlank,
+ double *StutterEfficiency,
+ int *NumberOfStutterBurstsPerFrame,
+ double *Z8StutterEfficiencyNotIncludingVBlank,
+ double *Z8StutterEfficiency,
+ int *Z8NumberOfStutterBurstsPerFrame,
+ double *StutterPeriod)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+
+ double DETBufferingTimeY;
+ double SwathWidthYCriticalPlane = 0;
+ double VActiveTimeCriticalPlane = 0;
+ double FrameTimeCriticalPlane = 0;
+ int BytePerPixelYCriticalPlane = 0;
+ double LinesToFinishSwathTransferStutterCriticalPlane = 0;
+ double MinTTUVBlankCriticalPlane = 0;
+ double TotalCompressedReadBandwidth;
+ double TotalRowReadBandwidth;
+ double AverageDCCCompressionRate;
+ double EffectiveCompressedBufferSize;
+ double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
+ double StutterBurstTime;
+ int TotalActiveWriteback;
+ double LinesInDETY;
+ double LinesInDETYRoundedDownToSwath;
+ double MaximumEffectiveCompressionLuma;
+ double MaximumEffectiveCompressionChroma;
+ double TotalZeroSizeRequestReadBandwidth;
+ double TotalZeroSizeCompressedReadBandwidth;
+ double AverageDCCZeroSizeFraction;
+ double AverageZeroSizeCompressionRate;
+ int TotalNumberOfActiveOTG = 0;
+ double LastStutterPeriod = 0.0;
+ double LastZ8StutterPeriod = 0.0;
+ int k;
+
+ TotalZeroSizeRequestReadBandwidth = 0;
+ TotalZeroSizeCompressedReadBandwidth = 0;
+ TotalRowReadBandwidth = 0;
+ TotalCompressedReadBandwidth = 0;
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (DCCEnable[k] == true) {
+ if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
+ || DCCYMaxUncompressedBlock[k] < 256) {
+ MaximumEffectiveCompressionLuma = 2;
+ } else {
+ MaximumEffectiveCompressionLuma = 4;
+ }
+ TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
+ TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
+ TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
+ + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
+ if (ReadBandwidthPlaneChroma[k] > 0) {
+ if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
+ || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
+ MaximumEffectiveCompressionChroma = 2;
+ } else {
+ MaximumEffectiveCompressionChroma = 4;
+ }
+ TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
+ + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
+ TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
+ TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
+ + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
+ }
+ } else {
+ TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
+ }
+ TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
+ }
+
+ AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
+ AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
+ dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
+ dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
+ dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
+ dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
+ dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
+ dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
+ dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
+#endif
+
+ if (AverageDCCZeroSizeFraction == 1) {
+ AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
+ EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
+ } else if (AverageDCCZeroSizeFraction > 0) {
+ AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
+ EffectiveCompressedBufferSize = dml_min(
+ CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
+ MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
+ + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
+ (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
+ dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
+ dml_print(
+ "DML::%s: min 2 = %f\n",
+ __func__,
+ MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
+ dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
+ dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
+ } else {
+ EffectiveCompressedBufferSize = dml_min(
+ CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
+ MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
+ dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
+ dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
+ dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
+ dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
+#endif
+
+ *StutterPeriod = 0;
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
+ / BytePerPixelDETY[k] / SwathWidthY[k];
+ LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
+ DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
+ dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
+ dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
+ dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
+ dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
+ dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
+ dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
+ dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
+ dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
+ dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
+ dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
+ dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
+#endif
+
+ if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
+ bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
+
+ *StutterPeriod = DETBufferingTimeY;
+ FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
+ VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
+ BytePerPixelYCriticalPlane = BytePerPixelY[k];
+ SwathWidthYCriticalPlane = SwathWidthY[k];
+ LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
+ MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
+ dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
+ dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
+ dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
+ dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
+ dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
+ dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
+#endif
+ }
+ }
+
+ PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
+ dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
+ dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
+ dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
+ dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
+ dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
+ dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
+ dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
+ dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
+ dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
+#endif
+
+ StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
+ + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
+ + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
+ dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
+ dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
+ dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
+ dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
+#endif
+ StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
+
+ dml_print(
+ "DML::%s: Time to finish residue swath=%f\n",
+ __func__,
+ LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
+
+ TotalActiveWriteback = 0;
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (WritebackEnable[k]) {
+ TotalActiveWriteback = TotalActiveWriteback + 1;
+ }
+ }
+
+ if (TotalActiveWriteback == 0) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
+ dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
+ dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
+ dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
+#endif
+ *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
+ *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
+ *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
+ *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
+ } else {
+ *StutterEfficiencyNotIncludingVBlank = 0.;
+ *Z8StutterEfficiencyNotIncludingVBlank = 0.;
+ *NumberOfStutterBurstsPerFrame = 0;
+ *Z8NumberOfStutterBurstsPerFrame = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
+ dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
+ dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
+ dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
+ dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
+#endif
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (v->BlendingAndTiming[k] == k) {
+ TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
+ }
+ }
+
+ if (*StutterEfficiencyNotIncludingVBlank > 0) {
+ LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
+
+ if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
+ *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
+ / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
+ } else {
+ *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
+ }
+ } else {
+ *StutterEfficiency = 0;
+ }
+
+ if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
+ LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
+ if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
+ *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
+ / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
+ } else {
+ *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
+ }
+ } else {
+ *Z8StutterEfficiency = 0.;
+ }
+
+ dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
+ dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
+ dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
+ dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
+ dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
+ dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
+ dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
+ dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
+}
+
+static void CalculateSwathAndDETConfiguration(
+ bool ForceSingleDPP,
+ int NumberOfActivePlanes,
+ unsigned int DETBufferSizeInKByte,
+ double MaximumSwathWidthLuma[],
+ double MaximumSwathWidthChroma[],
+ enum scan_direction_class SourceScan[],
+ enum source_format_class SourcePixelFormat[],
+ enum dm_swizzle_mode SurfaceTiling[],
+ int ViewportWidth[],
+ int ViewportHeight[],
+ int SurfaceWidthY[],
+ int SurfaceWidthC[],
+ int SurfaceHeightY[],
+ int SurfaceHeightC[],
+ int Read256BytesBlockHeightY[],
+ int Read256BytesBlockHeightC[],
+ int Read256BytesBlockWidthY[],
+ int Read256BytesBlockWidthC[],
+ enum odm_combine_mode ODMCombineEnabled[],
+ int BlendingAndTiming[],
+ int BytePerPixY[],
+ int BytePerPixC[],
+ double BytePerPixDETY[],
+ double BytePerPixDETC[],
+ int HActive[],
+ double HRatio[],
+ double HRatioChroma[],
+ int DPPPerPlane[],
+ int swath_width_luma_ub[],
+ int swath_width_chroma_ub[],
+ double SwathWidth[],
+ double SwathWidthChroma[],
+ int SwathHeightY[],
+ int SwathHeightC[],
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ bool ViewportSizeSupportPerPlane[],
+ bool *ViewportSizeSupport)
+{
+ int MaximumSwathHeightY[DC__NUM_DPP__MAX];
+ int MaximumSwathHeightC[DC__NUM_DPP__MAX];
+ int MinimumSwathHeightY;
+ int MinimumSwathHeightC;
+ int RoundedUpMaxSwathSizeBytesY;
+ int RoundedUpMaxSwathSizeBytesC;
+ int RoundedUpMinSwathSizeBytesY;
+ int RoundedUpMinSwathSizeBytesC;
+ int RoundedUpSwathSizeBytesY;
+ int RoundedUpSwathSizeBytesC;
+ double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
+ double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
+ int k;
+
+ CalculateSwathWidth(
+ ForceSingleDPP,
+ NumberOfActivePlanes,
+ SourcePixelFormat,
+ SourceScan,
+ ViewportWidth,
+ ViewportHeight,
+ SurfaceWidthY,
+ SurfaceWidthC,
+ SurfaceHeightY,
+ SurfaceHeightC,
+ ODMCombineEnabled,
+ BytePerPixY,
+ BytePerPixC,
+ Read256BytesBlockHeightY,
+ Read256BytesBlockHeightC,
+ Read256BytesBlockWidthY,
+ Read256BytesBlockWidthC,
+ BlendingAndTiming,
+ HActive,
+ HRatio,
+ DPPPerPlane,
+ SwathWidthSingleDPP,
+ SwathWidthSingleDPPChroma,
+ SwathWidth,
+ SwathWidthChroma,
+ MaximumSwathHeightY,
+ MaximumSwathHeightC,
+ swath_width_luma_ub,
+ swath_width_chroma_ub);
+
+ *ViewportSizeSupport = true;
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
+ || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
+ if (SurfaceTiling[k] == dm_sw_linear
+ || (SourcePixelFormat[k] == dm_444_64
+ && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
+ && SourceScan[k] != dm_vert)) {
+ MinimumSwathHeightY = MaximumSwathHeightY[k];
+ } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
+ MinimumSwathHeightY = MaximumSwathHeightY[k];
+ } else {
+ MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
+ }
+ MinimumSwathHeightC = MaximumSwathHeightC[k];
+ } else {
+ if (SurfaceTiling[k] == dm_sw_linear) {
+ MinimumSwathHeightY = MaximumSwathHeightY[k];
+ MinimumSwathHeightC = MaximumSwathHeightC[k];
+ } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
+ MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
+ MinimumSwathHeightC = MaximumSwathHeightC[k];
+ } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
+ MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
+ MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
+ } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
+ MinimumSwathHeightY = MaximumSwathHeightY[k];
+ MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
+ } else {
+ MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
+ MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
+ }
+ }
+
+ RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
+ RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
+ if (SourcePixelFormat[k] == dm_420_10) {
+ RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
+ RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
+ }
+ RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
+ RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
+ if (SourcePixelFormat[k] == dm_420_10) {
+ RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
+ RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
+ }
+
+ if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
+ SwathHeightY[k] = MaximumSwathHeightY[k];
+ SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
+ } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
+ && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
+ SwathHeightY[k] = MinimumSwathHeightY;
+ SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
+ } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
+ && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
+ SwathHeightY[k] = MaximumSwathHeightY[k];
+ SwathHeightC[k] = MinimumSwathHeightC;
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
+ RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
+ } else {
+ SwathHeightY[k] = MinimumSwathHeightY;
+ SwathHeightC[k] = MinimumSwathHeightC;
+ RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
+ RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
+ }
+ {
+ double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
+
+ if (SwathHeightC[k] == 0) {
+ DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
+ DETBufferSizeC[k] = 0;
+ } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
+ DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
+ DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
+ } else {
+ DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024);
+ DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
+ }
+
+ if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
+ || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
+ *ViewportSizeSupport = false;
+ ViewportSizeSupportPerPlane[k] = false;
+ } else {
+ ViewportSizeSupportPerPlane[k] = true;
+ }
+ }
+ }
+}
+
+static void CalculateSwathWidth(
+ bool ForceSingleDPP,
+ int NumberOfActivePlanes,
+ enum source_format_class SourcePixelFormat[],
+ enum scan_direction_class SourceScan[],
+ int ViewportWidth[],
+ int ViewportHeight[],
+ int SurfaceWidthY[],
+ int SurfaceWidthC[],
+ int SurfaceHeightY[],
+ int SurfaceHeightC[],
+ enum odm_combine_mode ODMCombineEnabled[],
+ int BytePerPixY[],
+ int BytePerPixC[],
+ int Read256BytesBlockHeightY[],
+ int Read256BytesBlockHeightC[],
+ int Read256BytesBlockWidthY[],
+ int Read256BytesBlockWidthC[],
+ int BlendingAndTiming[],
+ int HActive[],
+ double HRatio[],
+ int DPPPerPlane[],
+ double SwathWidthSingleDPPY[],
+ double SwathWidthSingleDPPC[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ int MaximumSwathHeightY[],
+ int MaximumSwathHeightC[],
+ int swath_width_luma_ub[],
+ int swath_width_chroma_ub[])
+{
+ enum odm_combine_mode MainPlaneODMCombine;
+ int j, k;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
+#endif
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (SourceScan[k] != dm_vert) {
+ SwathWidthSingleDPPY[k] = ViewportWidth[k];
+ } else {
+ SwathWidthSingleDPPY[k] = ViewportHeight[k];
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
+ dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
+#endif
+
+ MainPlaneODMCombine = ODMCombineEnabled[k];
+ for (j = 0; j < NumberOfActivePlanes; ++j) {
+ if (BlendingAndTiming[k] == j) {
+ MainPlaneODMCombine = ODMCombineEnabled[j];
+ }
+ }
+
+ if (MainPlaneODMCombine == dm_odm_combine_mode_4to1)
+ SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
+ else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1)
+ SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
+ else if (DPPPerPlane[k] == 2)
+ SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
+ else
+ SwathWidthY[k] = SwathWidthSingleDPPY[k];
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
+ dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
+#endif
+
+ if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
+ SwathWidthC[k] = SwathWidthY[k] / 2;
+ SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
+ } else {
+ SwathWidthC[k] = SwathWidthY[k];
+ SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
+ }
+
+ if (ForceSingleDPP == true) {
+ SwathWidthY[k] = SwathWidthSingleDPPY[k];
+ SwathWidthC[k] = SwathWidthSingleDPPC[k];
+ }
+ {
+ int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
+ int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
+#endif
+
+ if (SourceScan[k] != dm_vert) {
+ MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
+ MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
+ swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
+ if (BytePerPixC[k] > 0) {
+ int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
+
+ swath_width_chroma_ub[k] = dml_min(
+ surface_width_ub_c,
+ (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
+ } else {
+ swath_width_chroma_ub[k] = 0;
+ }
+ } else {
+ MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
+ MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
+ swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
+ if (BytePerPixC[k] > 0) {
+ int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
+
+ swath_width_chroma_ub[k] = dml_min(
+ surface_height_ub_c,
+ (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
+ } else {
+ swath_width_chroma_ub[k] = 0;
+ }
+ }
+ }
+ }
+}
+
+static double CalculateExtraLatency(
+ int RoundTripPingLatencyCycles,
+ int ReorderingBytes,
+ double DCFCLK,
+ int TotalNumberOfActiveDPP,
+ int PixelChunkSizeInKByte,
+ int TotalNumberOfDCCActiveDPP,
+ int MetaChunkSize,
+ double ReturnBW,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ int NumberOfActivePlanes,
+ int NumberOfDPP[],
+ int dpte_group_bytes[],
+ double HostVMInefficiencyFactor,
+ double HostVMMinPageSize,
+ int HostVMMaxNonCachedPageTableLevels)
+{
+ double ExtraLatencyBytes;
+ double ExtraLatency;
+
+ ExtraLatencyBytes = CalculateExtraLatencyBytes(
+ ReorderingBytes,
+ TotalNumberOfActiveDPP,
+ PixelChunkSizeInKByte,
+ TotalNumberOfDCCActiveDPP,
+ MetaChunkSize,
+ GPUVMEnable,
+ HostVMEnable,
+ NumberOfActivePlanes,
+ NumberOfDPP,
+ dpte_group_bytes,
+ HostVMInefficiencyFactor,
+ HostVMMinPageSize,
+ HostVMMaxNonCachedPageTableLevels);
+
+ ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
+ dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
+ dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
+ dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
+ dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
+#endif
+
+ return ExtraLatency;
+}
+
+static double CalculateExtraLatencyBytes(
+ int ReorderingBytes,
+ int TotalNumberOfActiveDPP,
+ int PixelChunkSizeInKByte,
+ int TotalNumberOfDCCActiveDPP,
+ int MetaChunkSize,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ int NumberOfActivePlanes,
+ int NumberOfDPP[],
+ int dpte_group_bytes[],
+ double HostVMInefficiencyFactor,
+ double HostVMMinPageSize,
+ int HostVMMaxNonCachedPageTableLevels)
+{
+ double ret;
+ int HostVMDynamicLevels = 0, k;
+
+ if (GPUVMEnable == true && HostVMEnable == true) {
+ if (HostVMMinPageSize < 2048)
+ HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
+ else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
+ else
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
+ } else {
+ HostVMDynamicLevels = 0;
+ }
+
+ ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
+
+ if (GPUVMEnable == true) {
+ for (k = 0; k < NumberOfActivePlanes; ++k)
+ ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
+ }
+ return ret;
+}
+
+static double CalculateUrgentLatency(
+ double UrgentLatencyPixelDataOnly,
+ double UrgentLatencyPixelMixedWithVMData,
+ double UrgentLatencyVMDataOnly,
+ bool DoUrgentLatencyAdjustment,
+ double UrgentLatencyAdjustmentFabricClockComponent,
+ double UrgentLatencyAdjustmentFabricClockReference,
+ double FabricClock)
+{
+ double ret;
+
+ ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
+ if (DoUrgentLatencyAdjustment == true)
+ ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
+ return ret;
+}
+
+static noinline_for_stack void UseMinimumDCFCLK(
+ struct display_mode_lib *mode_lib,
+ int MaxPrefetchMode,
+ int ReorderingBytes)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ int dummy1, i, j, k;
+ double NormalEfficiency, dummy2, dummy3;
+ double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
+
+ NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
+ for (i = 0; i < v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
+ double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
+ double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
+ double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
+ double MinimumTWait;
+ double NonDPTEBandwidth;
+ double DPTEBandwidth;
+ double DCFCLKRequiredForAverageBandwidth;
+ double ExtraLatencyBytes;
+ double ExtraLatencyCycles;
+ double DCFCLKRequiredForPeakBandwidth;
+ int NoOfDPPState[DC__NUM_DPP__MAX];
+ double MinimumTvmPlus2Tr0;
+
+ TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
+ + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
+ }
+
+ for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k)
+ NoOfDPPState[k] = v->NoOfDPP[i][j][k];
+
+ MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
+ NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
+ DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
+ TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
+ DCFCLKRequiredForAverageBandwidth = dml_max3(
+ v->ProjectedDCFCLKDeepSleep[i][j],
+ (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
+ / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
+ (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth);
+
+ ExtraLatencyBytes = CalculateExtraLatencyBytes(
+ ReorderingBytes,
+ v->TotalNumberOfActiveDPP[i][j],
+ v->PixelChunkSizeInKByte,
+ v->TotalNumberOfDCCActiveDPP[i][j],
+ v->MetaChunkSize,
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->NumberOfActivePlanes,
+ NoOfDPPState,
+ v->dpte_group_bytes,
+ 1,
+ v->HostVMMinPageSize,
+ v->HostVMMaxNonCachedPageTableLevels);
+ ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ double DCFCLKCyclesRequiredInPrefetch;
+ double ExpectedPrefetchBWAcceleration;
+ double PrefetchTime;
+
+ PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
+ + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
+ DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
+ + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
+ + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth
+ + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
+ PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
+ ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
+ / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
+ DynamicMetadataVMExtraLatency[k] =
+ (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
+ v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
+ PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
+ - v->UrgLatency[i]
+ * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2)
+ * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
+ - DynamicMetadataVMExtraLatency[k];
+
+ if (PrefetchTime > 0) {
+ double ExpectedVRatioPrefetch;
+
+ ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
+ / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
+ DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
+ * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
+ if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
+ DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
+ + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth;
+ }
+ } else {
+ DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
+ }
+ if (v->DynamicMetadataEnable[k] == true) {
+ double TSetupPipe;
+ double TdmbfPipe;
+ double TdmsksPipe;
+ double TdmecPipe;
+ double AllowedTimeForUrgentExtraLatency;
+
+ CalculateVupdateAndDynamicMetadataParameters(
+ v->MaxInterDCNTileRepeaters,
+ v->RequiredDPPCLK[i][j][k],
+ v->RequiredDISPCLK[i][j],
+ v->ProjectedDCFCLKDeepSleep[i][j],
+ v->PixelClock[k],
+ v->HTotal[k],
+ v->VTotal[k] - v->VActive[k],
+ v->DynamicMetadataTransmittedBytes[k],
+ v->DynamicMetadataLinesBeforeActiveRequired[k],
+ v->Interlace[k],
+ v->ProgressiveToInterlaceUnitInOPP,
+ &TSetupPipe,
+ &TdmbfPipe,
+ &TdmecPipe,
+ &TdmsksPipe,
+ &dummy1,
+ &dummy2,
+ &dummy3);
+ AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
+ - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
+ if (AllowedTimeForUrgentExtraLatency > 0) {
+ DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
+ DCFCLKRequiredForPeakBandwidthPerPlane[k],
+ ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
+ } else {
+ DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
+ }
+ }
+ }
+ DCFCLKRequiredForPeakBandwidth = 0;
+ for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k)
+ DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
+
+ MinimumTvmPlus2Tr0 = v->UrgLatency[i]
+ * (v->GPUVMEnable == true ?
+ (v->HostVMEnable == true ?
+ (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) :
+ 0);
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ double MaximumTvmPlus2Tr0PlusTsw;
+
+ MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
+ if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
+ DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
+ } else {
+ DCFCLKRequiredForPeakBandwidth = dml_max3(
+ DCFCLKRequiredForPeakBandwidth,
+ 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
+ (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
+ }
+ }
+ v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
+ }
+ }
+}
+
+static void CalculateUnboundedRequestAndCompressedBufferSize(
+ unsigned int DETBufferSizeInKByte,
+ int ConfigReturnBufferSizeInKByte,
+ enum unbounded_requesting_policy UseUnboundedRequestingFinal,
+ int TotalActiveDPP,
+ bool NoChromaPlanes,
+ int MaxNumDPP,
+ int CompressedBufferSegmentSizeInkByteFinal,
+ enum output_encoder_class *Output,
+ bool *UnboundedRequestEnabled,
+ int *CompressedBufferSizeInkByte)
+{
+ double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
+
+ *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]);
+ *CompressedBufferSizeInkByte = (
+ *UnboundedRequestEnabled == true ?
+ ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
+ ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
+ *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
+ dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
+ dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
+ dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
+ dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
+ dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
+ dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
+#endif
+}
+
+static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
+{
+ bool ret_val = false;
+
+ ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
+ if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
+ ret_val = false;
+ return ret_val;
+}
+
+static unsigned int CalculateMaxVStartup(
+ unsigned int VTotal,
+ unsigned int VActive,
+ unsigned int VBlankNom,
+ unsigned int HTotal,
+ double PixelClock,
+ bool ProgressiveTointerlaceUnitinOPP,
+ bool Interlace,
+ unsigned int VBlankNomDefaultUS,
+ double WritebackDelayTime)
+{
+ unsigned int MaxVStartup = 0;
+ unsigned int vblank_size = 0;
+ double line_time_us = HTotal / PixelClock;
+ unsigned int vblank_actual = VTotal - VActive;
+ unsigned int vblank_nom_default_in_line = dml_floor(VBlankNomDefaultUS / line_time_us, 1.0);
+ unsigned int vblank_nom_input = VBlankNom; //dml_min(VBlankNom, vblank_nom_default_in_line);
+ unsigned int vblank_avail = vblank_nom_input == 0 ? vblank_nom_default_in_line : vblank_nom_input;
+
+ vblank_size = (unsigned int) dml_min(vblank_actual, vblank_avail);
+ if (Interlace && !ProgressiveTointerlaceUnitinOPP)
+ MaxVStartup = dml_floor(vblank_size / 2.0, 1.0);
+ else
+ MaxVStartup = vblank_size - dml_max(1.0, dml_ceil(WritebackDelayTime / line_time_us, 1.0));
+ if (MaxVStartup > 1023)
+ MaxVStartup = 1023;
+ return MaxVStartup;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.h b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.h
new file mode 100644
index 000000000000..a8199ab7d26a
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DML314_DISPLAY_MODE_VBA_H__
+#define __DML314_DISPLAY_MODE_VBA_H__
+
+void dml314_recalculate(struct display_mode_lib *mode_lib);
+void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib);
+double dml314_CalculateWriteBackDISPCLK(
+ enum source_format_class WritebackPixelFormat,
+ double PixelClock,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackHTaps,
+ unsigned int WritebackVTaps,
+ long WritebackSourceWidth,
+ long WritebackDestinationWidth,
+ unsigned int HTotal,
+ unsigned int WritebackLineBufferSize);
+
+#endif /* __DML314_DISPLAY_MODE_VBA_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c
new file mode 100644
index 000000000000..04df263ff65e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c
@@ -0,0 +1,1678 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "../display_mode_lib.h"
+#include "../display_mode_vba.h"
+#include "../dml_inline_defs.h"
+#include "display_rq_dlg_calc_314.h"
+
+static bool CalculateBytePerPixelAnd256BBlockSizes(
+ enum source_format_class SourcePixelFormat,
+ enum dm_swizzle_mode SurfaceTiling,
+ unsigned int *BytePerPixelY,
+ unsigned int *BytePerPixelC,
+ double *BytePerPixelDETY,
+ double *BytePerPixelDETC,
+ unsigned int *BlockHeight256BytesY,
+ unsigned int *BlockHeight256BytesC,
+ unsigned int *BlockWidth256BytesY,
+ unsigned int *BlockWidth256BytesC)
+{
+ if (SourcePixelFormat == dm_444_64) {
+ *BytePerPixelDETY = 8;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 8;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_16) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_rgbe_alpha) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 1;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 1;
+ } else if (SourcePixelFormat == dm_420_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 2;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 2;
+ } else if (SourcePixelFormat == dm_420_12) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 4;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ } else {
+ *BytePerPixelDETY = 4.0 / 3;
+ *BytePerPixelDETC = 8.0 / 3;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ }
+
+ if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
+ || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
+ if (SurfaceTiling == dm_sw_linear)
+ *BlockHeight256BytesY = 1;
+ else if (SourcePixelFormat == dm_444_64)
+ *BlockHeight256BytesY = 4;
+ else if (SourcePixelFormat == dm_444_8)
+ *BlockHeight256BytesY = 16;
+ else
+ *BlockHeight256BytesY = 8;
+
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockHeight256BytesC = 0;
+ *BlockWidth256BytesC = 0;
+ } else {
+ if (SurfaceTiling == dm_sw_linear) {
+ *BlockHeight256BytesY = 1;
+ *BlockHeight256BytesC = 1;
+ } else if (SourcePixelFormat == dm_rgbe_alpha) {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 16;
+ } else if (SourcePixelFormat == dm_420_8) {
+ *BlockHeight256BytesY = 16;
+ *BlockHeight256BytesC = 8;
+ } else {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 8;
+ }
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
+ }
+ return true;
+}
+
+static bool is_dual_plane(enum source_format_class source_format)
+{
+ bool ret_val = 0;
+
+ if ((source_format == dm_420_12) || (source_format == dm_420_8) || (source_format == dm_420_10) || (source_format == dm_rgbe_alpha))
+ ret_val = 1;
+
+ return ret_val;
+}
+
+static double get_refcyc_per_delivery(
+ struct display_mode_lib *mode_lib,
+ double refclk_freq_in_mhz,
+ double pclk_freq_in_mhz,
+ unsigned int odm_combine,
+ unsigned int recout_width,
+ unsigned int hactive,
+ double vratio,
+ double hscale_pixel_rate,
+ unsigned int delivery_width,
+ unsigned int req_per_swath_ub)
+{
+ double refcyc_per_delivery = 0.0;
+
+ if (vratio <= 1.0) {
+ if (odm_combine)
+ refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) ((unsigned int) odm_combine * 2)
+ * dml_min((double) recout_width, (double) hactive / ((unsigned int) odm_combine * 2)) / pclk_freq_in_mhz / (double) req_per_swath_ub;
+ else
+ refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) recout_width / pclk_freq_in_mhz / (double) req_per_swath_ub;
+ } else {
+ refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) delivery_width / (double) hscale_pixel_rate / (double) req_per_swath_ub;
+ }
+
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz);
+ dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz);
+ dml_print("DML_DLG: %s: recout_width = %d\n", __func__, recout_width);
+ dml_print("DML_DLG: %s: vratio = %3.2f\n", __func__, vratio);
+ dml_print("DML_DLG: %s: req_per_swath_ub = %d\n", __func__, req_per_swath_ub);
+ dml_print("DML_DLG: %s: hscale_pixel_rate = %3.2f\n", __func__, hscale_pixel_rate);
+ dml_print("DML_DLG: %s: delivery_width = %d\n", __func__, delivery_width);
+ dml_print("DML_DLG: %s: refcyc_per_delivery= %3.2f\n", __func__, refcyc_per_delivery);
+#endif
+
+ return refcyc_per_delivery;
+
+}
+
+static unsigned int get_blk_size_bytes(const enum source_macro_tile_size tile_size)
+{
+ if (tile_size == dm_256k_tile)
+ return (256 * 1024);
+ else if (tile_size == dm_64k_tile)
+ return (64 * 1024);
+ else
+ return (4 * 1024);
+}
+
+static void extract_rq_sizing_regs(struct display_mode_lib *mode_lib, display_data_rq_regs_st *rq_regs, const display_data_rq_sizing_params_st *rq_sizing)
+{
+ print__data_rq_sizing_params_st(mode_lib, rq_sizing);
+
+ rq_regs->chunk_size = dml_log2(rq_sizing->chunk_bytes) - 10;
+
+ if (rq_sizing->min_chunk_bytes == 0)
+ rq_regs->min_chunk_size = 0;
+ else
+ rq_regs->min_chunk_size = dml_log2(rq_sizing->min_chunk_bytes) - 8 + 1;
+
+ rq_regs->meta_chunk_size = dml_log2(rq_sizing->meta_chunk_bytes) - 10;
+ if (rq_sizing->min_meta_chunk_bytes == 0)
+ rq_regs->min_meta_chunk_size = 0;
+ else
+ rq_regs->min_meta_chunk_size = dml_log2(rq_sizing->min_meta_chunk_bytes) - 6 + 1;
+
+ rq_regs->dpte_group_size = dml_log2(rq_sizing->dpte_group_bytes) - 6;
+ rq_regs->mpte_group_size = dml_log2(rq_sizing->mpte_group_bytes) - 6;
+}
+
+static void extract_rq_regs(struct display_mode_lib *mode_lib, display_rq_regs_st *rq_regs, const display_rq_params_st *rq_param)
+{
+ unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024;
+ unsigned int detile_buf_plane1_addr = 0;
+
+ extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_l), &rq_param->sizing.rq_l);
+
+ rq_regs->rq_regs_l.pte_row_height_linear = dml_floor(dml_log2(rq_param->dlg.rq_l.dpte_row_height), 1) - 3;
+
+ if (rq_param->yuv420) {
+ extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_c), &rq_param->sizing.rq_c);
+ rq_regs->rq_regs_c.pte_row_height_linear = dml_floor(dml_log2(rq_param->dlg.rq_c.dpte_row_height), 1) - 3;
+ }
+
+ rq_regs->rq_regs_l.swath_height = dml_log2(rq_param->dlg.rq_l.swath_height);
+ rq_regs->rq_regs_c.swath_height = dml_log2(rq_param->dlg.rq_c.swath_height);
+
+ // FIXME: take the max between luma, chroma chunk size?
+ // okay for now, as we are setting chunk_bytes to 8kb anyways
+ if (rq_param->sizing.rq_l.chunk_bytes >= 32 * 1024 || (rq_param->yuv420 && rq_param->sizing.rq_c.chunk_bytes >= 32 * 1024)) { //32kb
+ rq_regs->drq_expansion_mode = 0;
+ } else {
+ rq_regs->drq_expansion_mode = 2;
+ }
+ rq_regs->prq_expansion_mode = 1;
+ rq_regs->mrq_expansion_mode = 1;
+ rq_regs->crq_expansion_mode = 1;
+
+ // Note: detile_buf_plane1_addr is in unit of 1KB
+ if (rq_param->yuv420) {
+ if ((double) rq_param->misc.rq_l.stored_swath_bytes / (double) rq_param->misc.rq_c.stored_swath_bytes <= 1.5) {
+ detile_buf_plane1_addr = (detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: detile_buf_plane1_addr = %0d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr);
+#endif
+ } else {
+ detile_buf_plane1_addr = dml_round_to_multiple((unsigned int) ((2.0 * detile_buf_size_in_bytes) / 3.0), 1024, 0) / 1024.0; // 2/3 to luma
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: detile_buf_plane1_addr = %0d (1/3 chroma)\n", __func__, detile_buf_plane1_addr);
+#endif
+ }
+ }
+ rq_regs->plane1_base_address = detile_buf_plane1_addr;
+
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: detile_buf_size_in_bytes = %0d\n", __func__, detile_buf_size_in_bytes);
+ dml_print("DML_DLG: %s: detile_buf_plane1_addr = %0d\n", __func__, detile_buf_plane1_addr);
+ dml_print("DML_DLG: %s: plane1_base_address = %0d\n", __func__, rq_regs->plane1_base_address);
+ dml_print("DML_DLG: %s: rq_l.stored_swath_bytes = %0d\n", __func__, rq_param->misc.rq_l.stored_swath_bytes);
+ dml_print("DML_DLG: %s: rq_c.stored_swath_bytes = %0d\n", __func__, rq_param->misc.rq_c.stored_swath_bytes);
+ dml_print("DML_DLG: %s: rq_l.swath_height = %0d\n", __func__, rq_param->dlg.rq_l.swath_height);
+ dml_print("DML_DLG: %s: rq_c.swath_height = %0d\n", __func__, rq_param->dlg.rq_c.swath_height);
+#endif
+}
+
+static void handle_det_buf_split(struct display_mode_lib *mode_lib, display_rq_params_st *rq_param, const display_pipe_source_params_st *pipe_src_param)
+{
+ unsigned int total_swath_bytes = 0;
+ unsigned int swath_bytes_l = 0;
+ unsigned int swath_bytes_c = 0;
+ unsigned int full_swath_bytes_packed_l = 0;
+ unsigned int full_swath_bytes_packed_c = 0;
+ bool req128_l = 0;
+ bool req128_c = 0;
+ bool surf_linear = (pipe_src_param->sw_mode == dm_sw_linear);
+ bool surf_vert = (pipe_src_param->source_scan == dm_vert);
+ unsigned int log2_swath_height_l = 0;
+ unsigned int log2_swath_height_c = 0;
+ unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024;
+
+ full_swath_bytes_packed_l = rq_param->misc.rq_l.full_swath_bytes;
+ full_swath_bytes_packed_c = rq_param->misc.rq_c.full_swath_bytes;
+
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: full_swath_bytes_packed_l = %0d\n", __func__, full_swath_bytes_packed_l);
+ dml_print("DML_DLG: %s: full_swath_bytes_packed_c = %0d\n", __func__, full_swath_bytes_packed_c);
+#endif
+
+ if (rq_param->yuv420_10bpc) {
+ full_swath_bytes_packed_l = dml_round_to_multiple(rq_param->misc.rq_l.full_swath_bytes * 2.0 / 3.0, 256, 1) + 256;
+ full_swath_bytes_packed_c = dml_round_to_multiple(rq_param->misc.rq_c.full_swath_bytes * 2.0 / 3.0, 256, 1) + 256;
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: full_swath_bytes_packed_l = %0d (3-2 packing)\n", __func__, full_swath_bytes_packed_l);
+ dml_print("DML_DLG: %s: full_swath_bytes_packed_c = %0d (3-2 packing)\n", __func__, full_swath_bytes_packed_c);
+#endif
+ }
+
+ if (rq_param->yuv420)
+ total_swath_bytes = 2 * full_swath_bytes_packed_l + 2 * full_swath_bytes_packed_c;
+ else
+ total_swath_bytes = 2 * full_swath_bytes_packed_l;
+
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: total_swath_bytes = %0d\n", __func__, total_swath_bytes);
+ dml_print("DML_DLG: %s: detile_buf_size_in_bytes = %0d\n", __func__, detile_buf_size_in_bytes);
+#endif
+
+ if (total_swath_bytes <= detile_buf_size_in_bytes) { //full 256b request
+ req128_l = 0;
+ req128_c = 0;
+ swath_bytes_l = full_swath_bytes_packed_l;
+ swath_bytes_c = full_swath_bytes_packed_c;
+ } else if (!rq_param->yuv420) {
+ req128_l = 1;
+ req128_c = 0;
+ swath_bytes_c = full_swath_bytes_packed_c;
+ swath_bytes_l = full_swath_bytes_packed_l / 2;
+ } else if ((double) full_swath_bytes_packed_l / (double) full_swath_bytes_packed_c < 1.5) {
+ req128_l = 0;
+ req128_c = 1;
+ swath_bytes_l = full_swath_bytes_packed_l;
+ swath_bytes_c = full_swath_bytes_packed_c / 2;
+
+ total_swath_bytes = 2 * swath_bytes_l + 2 * swath_bytes_c;
+
+ if (total_swath_bytes > detile_buf_size_in_bytes) {
+ req128_l = 1;
+ swath_bytes_l = full_swath_bytes_packed_l / 2;
+ }
+ } else {
+ req128_l = 1;
+ req128_c = 0;
+ swath_bytes_l = full_swath_bytes_packed_l / 2;
+ swath_bytes_c = full_swath_bytes_packed_c;
+
+ total_swath_bytes = 2 * swath_bytes_l + 2 * swath_bytes_c;
+
+ if (total_swath_bytes > detile_buf_size_in_bytes) {
+ req128_c = 1;
+ swath_bytes_c = full_swath_bytes_packed_c / 2;
+ }
+ }
+
+ if (rq_param->yuv420)
+ total_swath_bytes = 2 * swath_bytes_l + 2 * swath_bytes_c;
+ else
+ total_swath_bytes = 2 * swath_bytes_l;
+
+ rq_param->misc.rq_l.stored_swath_bytes = swath_bytes_l;
+ rq_param->misc.rq_c.stored_swath_bytes = swath_bytes_c;
+
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: total_swath_bytes = %0d\n", __func__, total_swath_bytes);
+ dml_print("DML_DLG: %s: rq_l.stored_swath_bytes = %0d\n", __func__, rq_param->misc.rq_l.stored_swath_bytes);
+ dml_print("DML_DLG: %s: rq_c.stored_swath_bytes = %0d\n", __func__, rq_param->misc.rq_c.stored_swath_bytes);
+#endif
+ if (surf_linear) {
+ log2_swath_height_l = 0;
+ log2_swath_height_c = 0;
+ } else {
+ unsigned int swath_height_l;
+ unsigned int swath_height_c;
+
+ if (!surf_vert) {
+ swath_height_l = rq_param->misc.rq_l.blk256_height;
+ swath_height_c = rq_param->misc.rq_c.blk256_height;
+ } else {
+ swath_height_l = rq_param->misc.rq_l.blk256_width;
+ swath_height_c = rq_param->misc.rq_c.blk256_width;
+ }
+
+ if (swath_height_l > 0)
+ log2_swath_height_l = dml_log2(swath_height_l);
+
+ if (req128_l && log2_swath_height_l > 0)
+ log2_swath_height_l -= 1;
+
+ if (swath_height_c > 0)
+ log2_swath_height_c = dml_log2(swath_height_c);
+
+ if (req128_c && log2_swath_height_c > 0)
+ log2_swath_height_c -= 1;
+ }
+
+ rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l;
+ rq_param->dlg.rq_c.swath_height = 1 << log2_swath_height_c;
+
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: req128_l = %0d\n", __func__, req128_l);
+ dml_print("DML_DLG: %s: req128_c = %0d\n", __func__, req128_c);
+ dml_print("DML_DLG: %s: full_swath_bytes_packed_l = %0d\n", __func__, full_swath_bytes_packed_l);
+ dml_print("DML_DLG: %s: full_swath_bytes_packed_c = %0d\n", __func__, full_swath_bytes_packed_c);
+ dml_print("DML_DLG: %s: swath_height luma = %0d\n", __func__, rq_param->dlg.rq_l.swath_height);
+ dml_print("DML_DLG: %s: swath_height chroma = %0d\n", __func__, rq_param->dlg.rq_c.swath_height);
+#endif
+}
+
+static void get_meta_and_pte_attr(
+ struct display_mode_lib *mode_lib,
+ display_data_rq_dlg_params_st *rq_dlg_param,
+ display_data_rq_misc_params_st *rq_misc_param,
+ display_data_rq_sizing_params_st *rq_sizing_param,
+ unsigned int vp_width,
+ unsigned int vp_height,
+ unsigned int data_pitch,
+ unsigned int meta_pitch,
+ unsigned int source_format,
+ unsigned int tiling,
+ unsigned int macro_tile_size,
+ unsigned int source_scan,
+ unsigned int hostvm_enable,
+ unsigned int is_chroma,
+ unsigned int surface_height)
+{
+ bool surf_linear = (tiling == dm_sw_linear);
+ bool surf_vert = (source_scan == dm_vert);
+
+ unsigned int bytes_per_element;
+ unsigned int bytes_per_element_y;
+ unsigned int bytes_per_element_c;
+
+ unsigned int blk256_width = 0;
+ unsigned int blk256_height = 0;
+
+ unsigned int blk256_width_y = 0;
+ unsigned int blk256_height_y = 0;
+ unsigned int blk256_width_c = 0;
+ unsigned int blk256_height_c = 0;
+ unsigned int log2_bytes_per_element;
+ unsigned int log2_blk256_width;
+ unsigned int log2_blk256_height;
+ unsigned int blk_bytes;
+ unsigned int log2_blk_bytes;
+ unsigned int log2_blk_height;
+ unsigned int log2_blk_width;
+ unsigned int log2_meta_req_bytes;
+ unsigned int log2_meta_req_height;
+ unsigned int log2_meta_req_width;
+ unsigned int meta_req_width;
+ unsigned int meta_req_height;
+ unsigned int log2_meta_row_height;
+ unsigned int meta_row_width_ub;
+ unsigned int log2_meta_chunk_bytes;
+ unsigned int log2_meta_chunk_height;
+
+ //full sized meta chunk width in unit of data elements
+ unsigned int log2_meta_chunk_width;
+ unsigned int log2_min_meta_chunk_bytes;
+ unsigned int min_meta_chunk_width;
+ unsigned int meta_chunk_width;
+ unsigned int meta_chunk_per_row_int;
+ unsigned int meta_row_remainder;
+ unsigned int meta_chunk_threshold;
+ unsigned int meta_blk_height;
+ unsigned int meta_surface_bytes;
+ unsigned int vmpg_bytes;
+ unsigned int meta_pte_req_per_frame_ub;
+ unsigned int meta_pte_bytes_per_frame_ub;
+ const unsigned int log2_vmpg_bytes = dml_log2(mode_lib->soc.gpuvm_min_page_size_bytes);
+ const bool dual_plane_en = is_dual_plane((enum source_format_class) (source_format));
+ const unsigned int dpte_buf_in_pte_reqs =
+ dual_plane_en ? (is_chroma ? mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma : mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma) : (mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma
+ + mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma);
+
+ unsigned int log2_vmpg_height = 0;
+ unsigned int log2_vmpg_width = 0;
+ unsigned int log2_dpte_req_height_ptes = 0;
+ unsigned int log2_dpte_req_height = 0;
+ unsigned int log2_dpte_req_width = 0;
+ unsigned int log2_dpte_row_height_linear = 0;
+ unsigned int log2_dpte_row_height = 0;
+ unsigned int log2_dpte_group_width = 0;
+ unsigned int dpte_row_width_ub = 0;
+ unsigned int dpte_req_height = 0;
+ unsigned int dpte_req_width = 0;
+ unsigned int dpte_group_width = 0;
+ unsigned int log2_dpte_group_bytes = 0;
+ unsigned int log2_dpte_group_length = 0;
+ double byte_per_pixel_det_y;
+ double byte_per_pixel_det_c;
+
+ CalculateBytePerPixelAnd256BBlockSizes(
+ (enum source_format_class) (source_format),
+ (enum dm_swizzle_mode) (tiling),
+ &bytes_per_element_y,
+ &bytes_per_element_c,
+ &byte_per_pixel_det_y,
+ &byte_per_pixel_det_c,
+ &blk256_height_y,
+ &blk256_height_c,
+ &blk256_width_y,
+ &blk256_width_c);
+
+ if (!is_chroma) {
+ blk256_width = blk256_width_y;
+ blk256_height = blk256_height_y;
+ bytes_per_element = bytes_per_element_y;
+ } else {
+ blk256_width = blk256_width_c;
+ blk256_height = blk256_height_c;
+ bytes_per_element = bytes_per_element_c;
+ }
+
+ log2_bytes_per_element = dml_log2(bytes_per_element);
+
+ dml_print("DML_DLG: %s: surf_linear = %d\n", __func__, surf_linear);
+ dml_print("DML_DLG: %s: surf_vert = %d\n", __func__, surf_vert);
+ dml_print("DML_DLG: %s: blk256_width = %d\n", __func__, blk256_width);
+ dml_print("DML_DLG: %s: blk256_height = %d\n", __func__, blk256_height);
+
+ log2_blk256_width = dml_log2((double) blk256_width);
+ log2_blk256_height = dml_log2((double) blk256_height);
+ blk_bytes = surf_linear ? 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size);
+ log2_blk_bytes = dml_log2((double) blk_bytes);
+
+ // remember log rule
+ // "+" in log is multiply
+ // "-" in log is divide
+ // "/2" is like square root
+ // blk is vertical biased
+ if (tiling != dm_sw_linear)
+ log2_blk_height = log2_blk256_height + dml_ceil((double) (log2_blk_bytes - 8) / 2.0, 1);
+ else
+ log2_blk_height = 0; // blk height of 1
+
+ log2_blk_width = log2_blk_bytes - log2_bytes_per_element - log2_blk_height;
+
+ if (!surf_vert) {
+ unsigned int temp;
+
+ temp = dml_round_to_multiple(vp_width - 1, blk256_width, 1) + blk256_width;
+ if (data_pitch < blk256_width) {
+ dml_print("WARNING: DML_DLG: %s: swath_size calculation ignoring data_pitch=%u < blk256_width=%u\n", __func__, data_pitch, blk256_width);
+ } else {
+ if (temp > data_pitch) {
+ if (data_pitch >= vp_width)
+ temp = data_pitch;
+ else
+ dml_print("WARNING: DML_DLG: %s: swath_size calculation ignoring data_pitch=%u < vp_width=%u\n", __func__, data_pitch, vp_width);
+ }
+ }
+ rq_dlg_param->swath_width_ub = temp;
+ rq_dlg_param->req_per_swath_ub = temp >> log2_blk256_width;
+ } else {
+ unsigned int temp;
+
+ temp = dml_round_to_multiple(vp_height - 1, blk256_height, 1) + blk256_height;
+ if (surface_height < blk256_height) {
+ dml_print("WARNING: DML_DLG: %s swath_size calculation ignored surface_height=%u < blk256_height=%u\n", __func__, surface_height, blk256_height);
+ } else {
+ if (temp > surface_height) {
+ if (surface_height >= vp_height)
+ temp = surface_height;
+ else
+ dml_print("WARNING: DML_DLG: %s swath_size calculation ignored surface_height=%u < vp_height=%u\n", __func__, surface_height, vp_height);
+ }
+ }
+ rq_dlg_param->swath_width_ub = temp;
+ rq_dlg_param->req_per_swath_ub = temp >> log2_blk256_height;
+ }
+
+ if (!surf_vert)
+ rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_height * bytes_per_element;
+ else
+ rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_width * bytes_per_element;
+
+ rq_misc_param->blk256_height = blk256_height;
+ rq_misc_param->blk256_width = blk256_width;
+
+ // -------
+ // meta
+ // -------
+ log2_meta_req_bytes = 6; // meta request is 64b and is 8x8byte meta element
+
+ // each 64b meta request for dcn is 8x8 meta elements and
+ // a meta element covers one 256b block of the data surface.
+ log2_meta_req_height = log2_blk256_height + 3; // meta req is 8x8 byte, each byte represent 1 blk256
+ log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element - log2_meta_req_height;
+ meta_req_width = 1 << log2_meta_req_width;
+ meta_req_height = 1 << log2_meta_req_height;
+
+ // the dimensions of a meta row are meta_row_width x meta_row_height in elements.
+ // calculate upper bound of the meta_row_width
+ if (!surf_vert) {
+ log2_meta_row_height = log2_meta_req_height;
+ meta_row_width_ub = dml_round_to_multiple(vp_width - 1, meta_req_width, 1) + meta_req_width;
+ rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_width;
+ } else {
+ log2_meta_row_height = log2_meta_req_width;
+ meta_row_width_ub = dml_round_to_multiple(vp_height - 1, meta_req_height, 1) + meta_req_height;
+ rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_height;
+ }
+ rq_dlg_param->meta_bytes_per_row_ub = rq_dlg_param->meta_req_per_row_ub * 64;
+
+ rq_dlg_param->meta_row_height = 1 << log2_meta_row_height;
+
+ log2_meta_chunk_bytes = dml_log2(rq_sizing_param->meta_chunk_bytes);
+ log2_meta_chunk_height = log2_meta_row_height;
+
+ //full sized meta chunk width in unit of data elements
+ log2_meta_chunk_width = log2_meta_chunk_bytes + 8 - log2_bytes_per_element - log2_meta_chunk_height;
+ log2_min_meta_chunk_bytes = dml_log2(rq_sizing_param->min_meta_chunk_bytes);
+ min_meta_chunk_width = 1 << (log2_min_meta_chunk_bytes + 8 - log2_bytes_per_element - log2_meta_chunk_height);
+ meta_chunk_width = 1 << log2_meta_chunk_width;
+ meta_chunk_per_row_int = (unsigned int) (meta_row_width_ub / meta_chunk_width);
+ meta_row_remainder = meta_row_width_ub % meta_chunk_width;
+ meta_chunk_threshold = 0;
+ meta_blk_height = blk256_height * 64;
+ meta_surface_bytes = meta_pitch * (dml_round_to_multiple(vp_height - 1, meta_blk_height, 1) + meta_blk_height) * bytes_per_element / 256;
+ vmpg_bytes = mode_lib->soc.gpuvm_min_page_size_bytes;
+ meta_pte_req_per_frame_ub = (dml_round_to_multiple(meta_surface_bytes - vmpg_bytes, 8 * vmpg_bytes, 1) + 8 * vmpg_bytes) / (8 * vmpg_bytes);
+ meta_pte_bytes_per_frame_ub = meta_pte_req_per_frame_ub * 64; //64B mpte request
+ rq_dlg_param->meta_pte_bytes_per_frame_ub = meta_pte_bytes_per_frame_ub;
+
+ dml_print("DML_DLG: %s: meta_blk_height = %d\n", __func__, meta_blk_height);
+ dml_print("DML_DLG: %s: meta_surface_bytes = %d\n", __func__, meta_surface_bytes);
+ dml_print("DML_DLG: %s: meta_pte_req_per_frame_ub = %d\n", __func__, meta_pte_req_per_frame_ub);
+ dml_print("DML_DLG: %s: meta_pte_bytes_per_frame_ub = %d\n", __func__, meta_pte_bytes_per_frame_ub);
+
+ if (!surf_vert)
+ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width;
+ else
+ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height;
+
+ if (meta_row_remainder <= meta_chunk_threshold)
+ rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
+ else
+ rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
+
+ // ------
+ // dpte
+ // ------
+ if (surf_linear)
+ log2_vmpg_height = 0; // one line high
+ else
+ log2_vmpg_height = (log2_vmpg_bytes - 8) / 2 + log2_blk256_height;
+
+ log2_vmpg_width = log2_vmpg_bytes - log2_bytes_per_element - log2_vmpg_height;
+
+ // only 3 possible shapes for dpte request in dimensions of ptes: 8x1, 4x2, 2x4.
+ if (surf_linear) { //one 64B PTE request returns 8 PTEs
+ log2_dpte_req_height_ptes = 0;
+ log2_dpte_req_width = log2_vmpg_width + 3;
+ log2_dpte_req_height = 0;
+ } else if (log2_blk_bytes == 12) { //4KB tile means 4kB page size
+ //one 64B req gives 8x1 PTEs for 4KB tile
+ log2_dpte_req_height_ptes = 0;
+ log2_dpte_req_width = log2_blk_width + 3;
+ log2_dpte_req_height = log2_blk_height + 0;
+ } else if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) { // tile block >= 64KB
+ //two 64B reqs of 2x4 PTEs give 16 PTEs to cover 64KB
+ log2_dpte_req_height_ptes = 4;
+ log2_dpte_req_width = log2_blk256_width + 4; // log2_64KB_width
+ log2_dpte_req_height = log2_blk256_height + 4; // log2_64KB_height
+ } else { //64KB page size and must 64KB tile block
+ //one 64B req gives 8x1 PTEs for 64KB tile
+ log2_dpte_req_height_ptes = 0;
+ log2_dpte_req_width = log2_blk_width + 3;
+ log2_dpte_req_height = log2_blk_height + 0;
+ }
+
+ // The dpte request dimensions in data elements is dpte_req_width x dpte_req_height
+ // log2_vmpg_width is how much 1 pte represent, now calculating how much a 64b pte req represent
+ // That depends on the pte shape (i.e. 8x1, 4x2, 2x4)
+ //log2_dpte_req_height = log2_vmpg_height + log2_dpte_req_height_ptes;
+ //log2_dpte_req_width = log2_vmpg_width + log2_dpte_req_width_ptes;
+ dpte_req_height = 1 << log2_dpte_req_height;
+ dpte_req_width = 1 << log2_dpte_req_width;
+
+ // calculate pitch dpte row buffer can hold
+ // round the result down to a power of two.
+ if (surf_linear) {
+ unsigned int dpte_row_height;
+
+ log2_dpte_row_height_linear = dml_floor(dml_log2(dpte_buf_in_pte_reqs * dpte_req_width / data_pitch), 1);
+
+ dml_print("DML_DLG: %s: is_chroma = %d\n", __func__, is_chroma);
+ dml_print("DML_DLG: %s: dpte_buf_in_pte_reqs = %d\n", __func__, dpte_buf_in_pte_reqs);
+ dml_print("DML_DLG: %s: log2_dpte_row_height_linear = %d\n", __func__, log2_dpte_row_height_linear);
+
+ ASSERT(log2_dpte_row_height_linear >= 3);
+
+ if (log2_dpte_row_height_linear > 7)
+ log2_dpte_row_height_linear = 7;
+
+ log2_dpte_row_height = log2_dpte_row_height_linear;
+ // For linear, the dpte row is pitch dependent and the pte requests wrap at the pitch boundary.
+ // the dpte_row_width_ub is the upper bound of data_pitch*dpte_row_height in elements with this unique buffering.
+ dpte_row_height = 1 << log2_dpte_row_height;
+ dpte_row_width_ub = dml_round_to_multiple(data_pitch * dpte_row_height - 1, dpte_req_width, 1) + dpte_req_width;
+ rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width;
+ } else {
+ // the upper bound of the dpte_row_width without dependency on viewport position follows.
+ // for tiled mode, row height is the same as req height and row store up to vp size upper bound
+ if (!surf_vert) {
+ log2_dpte_row_height = log2_dpte_req_height;
+ dpte_row_width_ub = dml_round_to_multiple(vp_width - 1, dpte_req_width, 1) + dpte_req_width;
+ rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width;
+ } else {
+ log2_dpte_row_height = (log2_blk_width < log2_dpte_req_width) ? log2_blk_width : log2_dpte_req_width;
+ dpte_row_width_ub = dml_round_to_multiple(vp_height - 1, dpte_req_height, 1) + dpte_req_height;
+ rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_height;
+ }
+ }
+ if (log2_blk_bytes >= 16 && log2_vmpg_bytes == 12) // tile block >= 64KB
+ rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 128; //2*64B dpte request
+ else
+ rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 64; //64B dpte request
+
+ rq_dlg_param->dpte_row_height = 1 << log2_dpte_row_height;
+
+ // the dpte_group_bytes is reduced for the specific case of vertical
+ // access of a tile surface that has dpte request of 8x1 ptes.
+ if (hostvm_enable)
+ rq_sizing_param->dpte_group_bytes = 512;
+ else {
+ if (!surf_linear && (log2_dpte_req_height_ptes == 0) && surf_vert) //reduced, in this case, will have page fault within a group
+ rq_sizing_param->dpte_group_bytes = 512;
+ else
+ rq_sizing_param->dpte_group_bytes = 2048;
+ }
+
+ //since pte request size is 64byte, the number of data pte requests per full sized group is as follows.
+ log2_dpte_group_bytes = dml_log2(rq_sizing_param->dpte_group_bytes);
+ log2_dpte_group_length = log2_dpte_group_bytes - 6; //length in 64b requests
+
+ // full sized data pte group width in elements
+ if (!surf_vert)
+ log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_width;
+ else
+ log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_height;
+
+ //But if the tile block >=64KB and the page size is 4KB, then each dPTE request is 2*64B
+ if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) // tile block >= 64KB
+ log2_dpte_group_width = log2_dpte_group_width - 1;
+
+ dpte_group_width = 1 << log2_dpte_group_width;
+
+ // since dpte groups are only aligned to dpte_req_width and not dpte_group_width,
+ // the upper bound for the dpte groups per row is as follows.
+ rq_dlg_param->dpte_groups_per_row_ub = dml_ceil((double) dpte_row_width_ub / dpte_group_width, 1);
+}
+
+static void get_surf_rq_param(
+ struct display_mode_lib *mode_lib,
+ display_data_rq_sizing_params_st *rq_sizing_param,
+ display_data_rq_dlg_params_st *rq_dlg_param,
+ display_data_rq_misc_params_st *rq_misc_param,
+ const display_pipe_params_st *pipe_param,
+ bool is_chroma,
+ bool is_alpha)
+{
+ unsigned int vp_width = 0;
+ unsigned int vp_height = 0;
+ unsigned int data_pitch = 0;
+ unsigned int meta_pitch = 0;
+ unsigned int surface_height = 0;
+ unsigned int ppe = 1;
+
+ // FIXME check if ppe apply for both luma and chroma in 422 case
+ if (is_chroma | is_alpha) {
+ vp_width = pipe_param->src.viewport_width_c / ppe;
+ vp_height = pipe_param->src.viewport_height_c;
+ data_pitch = pipe_param->src.data_pitch_c;
+ meta_pitch = pipe_param->src.meta_pitch_c;
+ surface_height = pipe_param->src.surface_height_y / 2.0;
+ } else {
+ vp_width = pipe_param->src.viewport_width / ppe;
+ vp_height = pipe_param->src.viewport_height;
+ data_pitch = pipe_param->src.data_pitch;
+ meta_pitch = pipe_param->src.meta_pitch;
+ surface_height = pipe_param->src.surface_height_y;
+ }
+
+ if (pipe_param->dest.odm_combine) {
+ unsigned int access_dir;
+ unsigned int full_src_vp_width;
+ unsigned int hactive_odm;
+ unsigned int src_hactive_odm;
+
+ access_dir = (pipe_param->src.source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
+ hactive_odm = pipe_param->dest.hactive / ((unsigned int) pipe_param->dest.odm_combine * 2);
+ if (is_chroma) {
+ full_src_vp_width = pipe_param->scale_ratio_depth.hscl_ratio_c * pipe_param->dest.full_recout_width;
+ src_hactive_odm = pipe_param->scale_ratio_depth.hscl_ratio_c * hactive_odm;
+ } else {
+ full_src_vp_width = pipe_param->scale_ratio_depth.hscl_ratio * pipe_param->dest.full_recout_width;
+ src_hactive_odm = pipe_param->scale_ratio_depth.hscl_ratio * hactive_odm;
+ }
+
+ if (access_dir == 0) {
+ vp_width = dml_min(full_src_vp_width, src_hactive_odm);
+ dml_print("DML_DLG: %s: vp_width = %d\n", __func__, vp_width);
+ } else {
+ vp_height = dml_min(full_src_vp_width, src_hactive_odm);
+ dml_print("DML_DLG: %s: vp_height = %d\n", __func__, vp_height);
+
+ }
+ dml_print("DML_DLG: %s: full_src_vp_width = %d\n", __func__, full_src_vp_width);
+ dml_print("DML_DLG: %s: hactive_odm = %d\n", __func__, hactive_odm);
+ dml_print("DML_DLG: %s: src_hactive_odm = %d\n", __func__, src_hactive_odm);
+ }
+
+ rq_sizing_param->chunk_bytes = 8192;
+
+ if (is_alpha)
+ rq_sizing_param->chunk_bytes = 4096;
+
+ if (rq_sizing_param->chunk_bytes == 64 * 1024)
+ rq_sizing_param->min_chunk_bytes = 0;
+ else
+ rq_sizing_param->min_chunk_bytes = 1024;
+
+ rq_sizing_param->meta_chunk_bytes = 2048;
+ rq_sizing_param->min_meta_chunk_bytes = 256;
+
+ if (pipe_param->src.hostvm)
+ rq_sizing_param->mpte_group_bytes = 512;
+ else
+ rq_sizing_param->mpte_group_bytes = 2048;
+
+ get_meta_and_pte_attr(
+ mode_lib,
+ rq_dlg_param,
+ rq_misc_param,
+ rq_sizing_param,
+ vp_width,
+ vp_height,
+ data_pitch,
+ meta_pitch,
+ pipe_param->src.source_format,
+ pipe_param->src.sw_mode,
+ pipe_param->src.macro_tile_size,
+ pipe_param->src.source_scan,
+ pipe_param->src.hostvm,
+ is_chroma,
+ surface_height);
+}
+
+static void dml_rq_dlg_get_rq_params(struct display_mode_lib *mode_lib, display_rq_params_st *rq_param, const display_pipe_params_st *pipe_param)
+{
+ // get param for luma surface
+ rq_param->yuv420 = pipe_param->src.source_format == dm_420_8 || pipe_param->src.source_format == dm_420_10 || pipe_param->src.source_format == dm_rgbe_alpha
+ || pipe_param->src.source_format == dm_420_12;
+
+ rq_param->yuv420_10bpc = pipe_param->src.source_format == dm_420_10;
+
+ rq_param->rgbe_alpha = (pipe_param->src.source_format == dm_rgbe_alpha) ? 1 : 0;
+
+ get_surf_rq_param(mode_lib, &(rq_param->sizing.rq_l), &(rq_param->dlg.rq_l), &(rq_param->misc.rq_l), pipe_param, 0, 0);
+
+ if (is_dual_plane((enum source_format_class) (pipe_param->src.source_format))) {
+ // get param for chroma surface
+ get_surf_rq_param(mode_lib, &(rq_param->sizing.rq_c), &(rq_param->dlg.rq_c), &(rq_param->misc.rq_c), pipe_param, 1, rq_param->rgbe_alpha);
+ }
+
+ // calculate how to split the det buffer space between luma and chroma
+ handle_det_buf_split(mode_lib, rq_param, &pipe_param->src);
+ print__rq_params_st(mode_lib, rq_param);
+}
+
+void dml314_rq_dlg_get_rq_reg(struct display_mode_lib *mode_lib, display_rq_regs_st *rq_regs, const display_pipe_params_st *pipe_param)
+{
+ display_rq_params_st rq_param = {0};
+
+ memset(rq_regs, 0, sizeof(*rq_regs));
+ dml_rq_dlg_get_rq_params(mode_lib, &rq_param, pipe_param);
+ extract_rq_regs(mode_lib, rq_regs, &rq_param);
+
+ print__rq_regs_st(mode_lib, rq_regs);
+}
+
+static void calculate_ttu_cursor(
+ struct display_mode_lib *mode_lib,
+ double *refcyc_per_req_delivery_pre_cur,
+ double *refcyc_per_req_delivery_cur,
+ double refclk_freq_in_mhz,
+ double ref_freq_to_pix_freq,
+ double hscale_pixel_rate_l,
+ double hscl_ratio,
+ double vratio_pre_l,
+ double vratio_l,
+ unsigned int cur_width,
+ enum cursor_bpp cur_bpp)
+{
+ unsigned int cur_src_width = cur_width;
+ unsigned int cur_req_size = 0;
+ unsigned int cur_req_width = 0;
+ double cur_width_ub = 0.0;
+ double cur_req_per_width = 0.0;
+ double hactive_cur = 0.0;
+
+ ASSERT(cur_src_width <= 256);
+
+ *refcyc_per_req_delivery_pre_cur = 0.0;
+ *refcyc_per_req_delivery_cur = 0.0;
+ if (cur_src_width > 0) {
+ unsigned int cur_bit_per_pixel = 0;
+
+ if (cur_bpp == dm_cur_2bit) {
+ cur_req_size = 64; // byte
+ cur_bit_per_pixel = 2;
+ } else { // 32bit
+ cur_bit_per_pixel = 32;
+ if (cur_src_width >= 1 && cur_src_width <= 16)
+ cur_req_size = 64;
+ else if (cur_src_width >= 17 && cur_src_width <= 31)
+ cur_req_size = 128;
+ else
+ cur_req_size = 256;
+ }
+
+ cur_req_width = (double) cur_req_size / ((double) cur_bit_per_pixel / 8.0);
+ cur_width_ub = dml_ceil((double) cur_src_width / (double) cur_req_width, 1) * (double) cur_req_width;
+ cur_req_per_width = cur_width_ub / (double) cur_req_width;
+ hactive_cur = (double) cur_src_width / hscl_ratio; // FIXME: oswin to think about what to do for cursor
+
+ if (vratio_pre_l <= 1.0)
+ *refcyc_per_req_delivery_pre_cur = hactive_cur * ref_freq_to_pix_freq / (double) cur_req_per_width;
+ else
+ *refcyc_per_req_delivery_pre_cur = (double) refclk_freq_in_mhz * (double) cur_src_width / hscale_pixel_rate_l / (double) cur_req_per_width;
+
+ ASSERT(*refcyc_per_req_delivery_pre_cur < dml_pow(2, 13));
+
+ if (vratio_l <= 1.0)
+ *refcyc_per_req_delivery_cur = hactive_cur * ref_freq_to_pix_freq / (double) cur_req_per_width;
+ else
+ *refcyc_per_req_delivery_cur = (double) refclk_freq_in_mhz * (double) cur_src_width / hscale_pixel_rate_l / (double) cur_req_per_width;
+
+ dml_print("DML_DLG: %s: cur_req_width = %d\n", __func__, cur_req_width);
+ dml_print("DML_DLG: %s: cur_width_ub = %3.2f\n", __func__, cur_width_ub);
+ dml_print("DML_DLG: %s: cur_req_per_width = %3.2f\n", __func__, cur_req_per_width);
+ dml_print("DML_DLG: %s: hactive_cur = %3.2f\n", __func__, hactive_cur);
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_cur = %3.2f\n", __func__, *refcyc_per_req_delivery_pre_cur);
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_cur = %3.2f\n", __func__, *refcyc_per_req_delivery_cur);
+
+ ASSERT(*refcyc_per_req_delivery_cur < dml_pow(2, 13));
+ }
+}
+
+// Note: currently taken in as is.
+// Nice to decouple code from hw register implement and extract code that are repeated for luma and chroma.
+static void dml_rq_dlg_get_dlg_params(
+ struct display_mode_lib *mode_lib,
+ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx,
+ display_dlg_regs_st *disp_dlg_regs,
+ display_ttu_regs_st *disp_ttu_regs,
+ const display_rq_dlg_params_st *rq_dlg_param,
+ const display_dlg_sys_params_st *dlg_sys_param,
+ const bool cstate_en,
+ const bool pstate_en,
+ const bool vm_en,
+ const bool ignore_viewport_pos,
+ const bool immediate_flip_support)
+{
+ const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src;
+ const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest;
+ const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg;
+ const scaler_ratio_depth_st *scl = &e2e_pipe_param[pipe_idx].pipe.scale_ratio_depth;
+ const scaler_taps_st *taps = &e2e_pipe_param[pipe_idx].pipe.scale_taps;
+ unsigned int pipe_index_in_combine[DC__NUM_PIPES__MAX];
+
+ // -------------------------
+ // Section 1.15.2.1: OTG dependent Params
+ // -------------------------
+ // Timing
+ unsigned int htotal = dst->htotal;
+ unsigned int hblank_end = dst->hblank_end;
+ unsigned int vblank_start = dst->vblank_start;
+ unsigned int vblank_end = dst->vblank_end;
+
+ double dppclk_freq_in_mhz = clks->dppclk_mhz;
+ double refclk_freq_in_mhz = clks->refclk_mhz;
+ double pclk_freq_in_mhz = dst->pixel_rate_mhz;
+ bool interlaced = dst->interlaced;
+ double ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz;
+ double min_ttu_vblank;
+ unsigned int dlg_vblank_start;
+ bool dual_plane;
+ unsigned int access_dir;
+ unsigned int vp_height_l;
+ unsigned int vp_width_l;
+ unsigned int vp_height_c;
+ unsigned int vp_width_c;
+
+ // Scaling
+ unsigned int htaps_l;
+ unsigned int htaps_c;
+ double hratio_l;
+ double hratio_c;
+ double vratio_l;
+ double vratio_c;
+
+ unsigned int swath_width_ub_l;
+ unsigned int dpte_groups_per_row_ub_l;
+ unsigned int swath_width_ub_c;
+ unsigned int dpte_groups_per_row_ub_c;
+
+ unsigned int meta_chunks_per_row_ub_l;
+ unsigned int meta_chunks_per_row_ub_c;
+ unsigned int vupdate_offset;
+ unsigned int vupdate_width;
+ unsigned int vready_offset;
+
+ unsigned int vstartup_start;
+ unsigned int dst_x_after_scaler;
+ unsigned int dst_y_after_scaler;
+ double dst_y_prefetch;
+ double dst_y_per_vm_vblank;
+ double dst_y_per_row_vblank;
+ double dst_y_per_vm_flip;
+ double dst_y_per_row_flip;
+ double max_dst_y_per_vm_vblank;
+ double max_dst_y_per_row_vblank;
+ double vratio_pre_l;
+ double vratio_pre_c;
+ unsigned int req_per_swath_ub_l;
+ unsigned int req_per_swath_ub_c;
+ unsigned int meta_row_height_l;
+ unsigned int meta_row_height_c;
+ unsigned int swath_width_pixels_ub_l;
+ unsigned int swath_width_pixels_ub_c;
+ unsigned int scaler_rec_in_width_l;
+ unsigned int scaler_rec_in_width_c;
+ unsigned int dpte_row_height_l;
+ unsigned int dpte_row_height_c;
+ double hscale_pixel_rate_l;
+ double hscale_pixel_rate_c;
+ double min_hratio_fact_l;
+ double min_hratio_fact_c;
+ double refcyc_per_line_delivery_pre_l;
+ double refcyc_per_line_delivery_pre_c;
+ double refcyc_per_line_delivery_l;
+ double refcyc_per_line_delivery_c;
+
+ double refcyc_per_req_delivery_pre_l;
+ double refcyc_per_req_delivery_pre_c;
+ double refcyc_per_req_delivery_l;
+ double refcyc_per_req_delivery_c;
+
+ unsigned int full_recout_width;
+ double refcyc_per_req_delivery_pre_cur0;
+ double refcyc_per_req_delivery_cur0;
+ double refcyc_per_req_delivery_pre_cur1;
+ double refcyc_per_req_delivery_cur1;
+ unsigned int vba__min_dst_y_next_start = get_min_dst_y_next_start(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // FROM VBA
+ unsigned int vba__vready_after_vcount0 = get_vready_at_or_after_vsync(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+
+ float vba__refcyc_per_line_delivery_pre_l = get_refcyc_per_line_delivery_pre_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ float vba__refcyc_per_line_delivery_l = get_refcyc_per_line_delivery_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ float vba__refcyc_per_req_delivery_pre_l = get_refcyc_per_req_delivery_pre_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ float vba__refcyc_per_req_delivery_l = get_refcyc_per_req_delivery_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs));
+ memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs));
+
+ dml_print("DML_DLG: %s: cstate_en = %d\n", __func__, cstate_en);
+ dml_print("DML_DLG: %s: pstate_en = %d\n", __func__, pstate_en);
+ dml_print("DML_DLG: %s: vm_en = %d\n", __func__, vm_en);
+ dml_print("DML_DLG: %s: ignore_viewport_pos = %d\n", __func__, ignore_viewport_pos);
+ dml_print("DML_DLG: %s: immediate_flip_support = %d\n", __func__, immediate_flip_support);
+
+ dml_print("DML_DLG: %s: dppclk_freq_in_mhz = %3.2f\n", __func__, dppclk_freq_in_mhz);
+ dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz);
+ dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz);
+ dml_print("DML_DLG: %s: interlaced = %d\n", __func__, interlaced); ASSERT(ref_freq_to_pix_freq < 4.0);
+
+ disp_dlg_regs->ref_freq_to_pix_freq = (unsigned int) (ref_freq_to_pix_freq * dml_pow(2, 19));
+ disp_dlg_regs->refcyc_per_htotal = (unsigned int) (ref_freq_to_pix_freq * (double) htotal * dml_pow(2, 8));
+ disp_dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; // 15 bits
+
+ //set_prefetch_mode(mode_lib, cstate_en, pstate_en, ignore_viewport_pos, immediate_flip_support);
+ min_ttu_vblank = get_min_ttu_vblank_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+
+ dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start;
+ disp_dlg_regs->min_dst_y_next_start_us =
+ (vba__min_dst_y_next_start * dst->hactive) / (unsigned int) dst->pixel_rate_mhz;
+ disp_dlg_regs->min_dst_y_next_start = vba__min_dst_y_next_start * dml_pow(2, 2);
+
+ ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)dml_pow(2, 18));
+
+ dml_print("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, min_ttu_vblank);
+ dml_print("DML_DLG: %s: min_dst_y_next_start = 0x%0x\n", __func__, disp_dlg_regs->min_dst_y_next_start);
+ dml_print("DML_DLG: %s: dlg_vblank_start = 0x%0x\n", __func__, dlg_vblank_start);
+ dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, ref_freq_to_pix_freq);
+ dml_print("DML_DLG: %s: vba__min_dst_y_next_start = 0x%0x\n", __func__, vba__min_dst_y_next_start);
+
+ //old_impl_vs_vba_impl("min_dst_y_next_start", dlg_vblank_start, vba__min_dst_y_next_start);
+
+ // -------------------------
+ // Section 1.15.2.2: Prefetch, Active and TTU
+ // -------------------------
+ // Prefetch Calc
+ // Source
+ dual_plane = is_dual_plane((enum source_format_class) (src->source_format));
+ access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
+ vp_height_l = src->viewport_height;
+ vp_width_l = src->viewport_width;
+ vp_height_c = src->viewport_height_c;
+ vp_width_c = src->viewport_width_c;
+
+ // Scaling
+ htaps_l = taps->htaps;
+ htaps_c = taps->htaps_c;
+ hratio_l = scl->hscl_ratio;
+ hratio_c = scl->hscl_ratio_c;
+ vratio_l = scl->vscl_ratio;
+ vratio_c = scl->vscl_ratio_c;
+
+ swath_width_ub_l = rq_dlg_param->rq_l.swath_width_ub;
+ dpte_groups_per_row_ub_l = rq_dlg_param->rq_l.dpte_groups_per_row_ub;
+ swath_width_ub_c = rq_dlg_param->rq_c.swath_width_ub;
+ dpte_groups_per_row_ub_c = rq_dlg_param->rq_c.dpte_groups_per_row_ub;
+
+ meta_chunks_per_row_ub_l = rq_dlg_param->rq_l.meta_chunks_per_row_ub;
+ meta_chunks_per_row_ub_c = rq_dlg_param->rq_c.meta_chunks_per_row_ub;
+ vupdate_offset = dst->vupdate_offset;
+ vupdate_width = dst->vupdate_width;
+ vready_offset = dst->vready_offset;
+
+ vstartup_start = dst->vstartup_start;
+ if (interlaced) {
+ if (vstartup_start / 2.0 - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal <= vblank_end / 2.0)
+ disp_dlg_regs->vready_after_vcount0 = 1;
+ else
+ disp_dlg_regs->vready_after_vcount0 = 0;
+ } else {
+ if (vstartup_start - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal <= vblank_end)
+ disp_dlg_regs->vready_after_vcount0 = 1;
+ else
+ disp_dlg_regs->vready_after_vcount0 = 0;
+ }
+
+ dml_print("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0);
+ dml_print("DML_DLG: %s: vba__vready_after_vcount0 = %d\n", __func__, vba__vready_after_vcount0);
+ //old_impl_vs_vba_impl("vready_after_vcount0", disp_dlg_regs->vready_after_vcount0, vba__vready_after_vcount0);
+
+ if (interlaced)
+ vstartup_start = vstartup_start / 2;
+
+ dst_x_after_scaler = get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ dst_y_after_scaler = get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+
+ // do some adjustment on the dst_after scaler to account for odm combine mode
+ dml_print("DML_DLG: %s: input dst_x_after_scaler = %d\n", __func__, dst_x_after_scaler);
+ dml_print("DML_DLG: %s: input dst_y_after_scaler = %d\n", __func__, dst_y_after_scaler);
+
+ // need to figure out which side of odm combine we're in
+ if (dst->odm_combine) {
+ // figure out which pipes go together
+ bool visited[DC__NUM_PIPES__MAX];
+ unsigned int i, j, k;
+
+ for (k = 0; k < num_pipes; ++k) {
+ visited[k] = false;
+ pipe_index_in_combine[k] = 0;
+ }
+
+ for (i = 0; i < num_pipes; i++) {
+ if (e2e_pipe_param[i].pipe.src.is_hsplit && !visited[i]) {
+
+ unsigned int grp = e2e_pipe_param[i].pipe.src.hsplit_grp;
+ unsigned int grp_idx = 0;
+
+ for (j = i; j < num_pipes; j++) {
+ if (e2e_pipe_param[j].pipe.src.hsplit_grp == grp && e2e_pipe_param[j].pipe.src.is_hsplit && !visited[j]) {
+ pipe_index_in_combine[j] = grp_idx;
+ dml_print("DML_DLG: %s: pipe[%d] is in grp %d idx %d\n", __func__, j, grp, grp_idx);
+ grp_idx++;
+ visited[j] = true;
+ }
+ }
+ }
+ }
+
+ }
+
+ if (dst->odm_combine == dm_odm_combine_mode_disabled) {
+ disp_dlg_regs->refcyc_h_blank_end = (unsigned int) ((double) hblank_end * ref_freq_to_pix_freq);
+ } else {
+ unsigned int odm_combine_factor = (dst->odm_combine == dm_odm_combine_mode_2to1 ? 2 : 4); // TODO: We should really check that 4to1 is supported before setting it to 4
+ unsigned int odm_pipe_index = pipe_index_in_combine[pipe_idx];
+
+ disp_dlg_regs->refcyc_h_blank_end = (unsigned int) (((double) hblank_end + odm_pipe_index * (double) dst->hactive / odm_combine_factor) * ref_freq_to_pix_freq);
+ } ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)dml_pow(2, 13));
+
+ dml_print("DML_DLG: %s: htotal = %d\n", __func__, htotal);
+ dml_print("DML_DLG: %s: dst_x_after_scaler[%d] = %d\n", __func__, pipe_idx, dst_x_after_scaler);
+ dml_print("DML_DLG: %s: dst_y_after_scaler[%d] = %d\n", __func__, pipe_idx, dst_y_after_scaler);
+
+ dst_y_prefetch = get_dst_y_prefetch(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ dst_y_per_vm_vblank = get_dst_y_per_vm_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ dst_y_per_row_vblank = get_dst_y_per_row_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ dst_y_per_vm_flip = get_dst_y_per_vm_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ dst_y_per_row_flip = get_dst_y_per_row_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+
+ max_dst_y_per_vm_vblank = 32.0; //U5.2
+ max_dst_y_per_row_vblank = 16.0; //U4.2
+
+ // magic!
+ if (htotal <= 75) {
+ max_dst_y_per_vm_vblank = 100.0;
+ max_dst_y_per_row_vblank = 100.0;
+ }
+
+ dml_print("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, dst_y_prefetch);
+ dml_print("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, dst_y_per_vm_flip);
+ dml_print("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, dst_y_per_row_flip);
+ dml_print("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, dst_y_per_vm_vblank);
+ dml_print("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, dst_y_per_row_vblank);
+
+ ASSERT(dst_y_per_vm_vblank < max_dst_y_per_vm_vblank); ASSERT(dst_y_per_row_vblank < max_dst_y_per_row_vblank);
+
+ ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank));
+
+ vratio_pre_l = get_vratio_prefetch_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ vratio_pre_c = get_vratio_prefetch_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+
+ dml_print("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, vratio_pre_l);
+ dml_print("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, vratio_pre_c);
+
+ // Active
+ req_per_swath_ub_l = rq_dlg_param->rq_l.req_per_swath_ub;
+ req_per_swath_ub_c = rq_dlg_param->rq_c.req_per_swath_ub;
+ meta_row_height_l = rq_dlg_param->rq_l.meta_row_height;
+ meta_row_height_c = rq_dlg_param->rq_c.meta_row_height;
+ swath_width_pixels_ub_l = 0;
+ swath_width_pixels_ub_c = 0;
+ scaler_rec_in_width_l = 0;
+ scaler_rec_in_width_c = 0;
+ dpte_row_height_l = rq_dlg_param->rq_l.dpte_row_height;
+ dpte_row_height_c = rq_dlg_param->rq_c.dpte_row_height;
+
+ swath_width_pixels_ub_l = swath_width_ub_l;
+ swath_width_pixels_ub_c = swath_width_ub_c;
+
+ if (hratio_l <= 1)
+ min_hratio_fact_l = 2.0;
+ else if (htaps_l <= 6) {
+ if ((hratio_l * 2.0) > 4.0)
+ min_hratio_fact_l = 4.0;
+ else
+ min_hratio_fact_l = hratio_l * 2.0;
+ } else {
+ if (hratio_l > 4.0)
+ min_hratio_fact_l = 4.0;
+ else
+ min_hratio_fact_l = hratio_l;
+ }
+
+ hscale_pixel_rate_l = min_hratio_fact_l * dppclk_freq_in_mhz;
+
+ dml_print("DML_DLG: %s: hratio_l = %3.2f\n", __func__, hratio_l);
+ dml_print("DML_DLG: %s: min_hratio_fact_l = %3.2f\n", __func__, min_hratio_fact_l);
+ dml_print("DML_DLG: %s: hscale_pixel_rate_l = %3.2f\n", __func__, hscale_pixel_rate_l);
+
+ if (hratio_c <= 1)
+ min_hratio_fact_c = 2.0;
+ else if (htaps_c <= 6) {
+ if ((hratio_c * 2.0) > 4.0)
+ min_hratio_fact_c = 4.0;
+ else
+ min_hratio_fact_c = hratio_c * 2.0;
+ } else {
+ if (hratio_c > 4.0)
+ min_hratio_fact_c = 4.0;
+ else
+ min_hratio_fact_c = hratio_c;
+ }
+
+ hscale_pixel_rate_c = min_hratio_fact_c * dppclk_freq_in_mhz;
+
+ refcyc_per_line_delivery_pre_l = 0.;
+ refcyc_per_line_delivery_pre_c = 0.;
+ refcyc_per_line_delivery_l = 0.;
+ refcyc_per_line_delivery_c = 0.;
+
+ refcyc_per_req_delivery_pre_l = 0.;
+ refcyc_per_req_delivery_pre_c = 0.;
+ refcyc_per_req_delivery_l = 0.;
+ refcyc_per_req_delivery_c = 0.;
+
+ full_recout_width = 0;
+ // In ODM
+ if (src->is_hsplit) {
+ // This "hack" is only allowed (and valid) for MPC combine. In ODM
+ // combine, you MUST specify the full_recout_width...according to Oswin
+ if (dst->full_recout_width == 0 && !dst->odm_combine) {
+ dml_print("DML_DLG: %s: Warning: full_recout_width not set in hsplit mode\n", __func__);
+ full_recout_width = dst->recout_width * 2; // assume half split for dcn1
+ } else
+ full_recout_width = dst->full_recout_width;
+ } else
+ full_recout_width = dst->recout_width;
+
+ // As of DCN2, mpc_combine and odm_combine are mutually exclusive
+ refcyc_per_line_delivery_pre_l = get_refcyc_per_delivery(
+ mode_lib,
+ refclk_freq_in_mhz,
+ pclk_freq_in_mhz,
+ dst->odm_combine,
+ full_recout_width,
+ dst->hactive,
+ vratio_pre_l,
+ hscale_pixel_rate_l,
+ swath_width_pixels_ub_l,
+ 1); // per line
+
+ refcyc_per_line_delivery_l = get_refcyc_per_delivery(
+ mode_lib,
+ refclk_freq_in_mhz,
+ pclk_freq_in_mhz,
+ dst->odm_combine,
+ full_recout_width,
+ dst->hactive,
+ vratio_l,
+ hscale_pixel_rate_l,
+ swath_width_pixels_ub_l,
+ 1); // per line
+
+ dml_print("DML_DLG: %s: full_recout_width = %d\n", __func__, full_recout_width);
+ dml_print("DML_DLG: %s: hscale_pixel_rate_l = %3.2f\n", __func__, hscale_pixel_rate_l);
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, refcyc_per_line_delivery_pre_l);
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, refcyc_per_line_delivery_l);
+ dml_print("DML_DLG: %s: vba__refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, vba__refcyc_per_line_delivery_pre_l);
+ dml_print("DML_DLG: %s: vba__refcyc_per_line_delivery_l = %3.2f\n", __func__, vba__refcyc_per_line_delivery_l);
+
+ //old_impl_vs_vba_impl("refcyc_per_line_delivery_pre_l", refcyc_per_line_delivery_pre_l, vba__refcyc_per_line_delivery_pre_l);
+ //old_impl_vs_vba_impl("refcyc_per_line_delivery_l", refcyc_per_line_delivery_l, vba__refcyc_per_line_delivery_l);
+
+ if (dual_plane) {
+ float vba__refcyc_per_line_delivery_pre_c = get_refcyc_per_line_delivery_pre_c_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ float vba__refcyc_per_line_delivery_c = get_refcyc_per_line_delivery_c_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ refcyc_per_line_delivery_pre_c = get_refcyc_per_delivery(
+ mode_lib,
+ refclk_freq_in_mhz,
+ pclk_freq_in_mhz,
+ dst->odm_combine,
+ full_recout_width,
+ dst->hactive,
+ vratio_pre_c,
+ hscale_pixel_rate_c,
+ swath_width_pixels_ub_c,
+ 1); // per line
+
+ refcyc_per_line_delivery_c = get_refcyc_per_delivery(
+ mode_lib,
+ refclk_freq_in_mhz,
+ pclk_freq_in_mhz,
+ dst->odm_combine,
+ full_recout_width,
+ dst->hactive,
+ vratio_c,
+ hscale_pixel_rate_c,
+ swath_width_pixels_ub_c,
+ 1); // per line
+
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, refcyc_per_line_delivery_pre_c);
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, refcyc_per_line_delivery_c);
+ dml_print("DML_DLG: %s: vba__refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, vba__refcyc_per_line_delivery_pre_c);
+ dml_print("DML_DLG: %s: vba__refcyc_per_line_delivery_c = %3.2f\n", __func__, vba__refcyc_per_line_delivery_c);
+
+ //old_impl_vs_vba_impl("refcyc_per_line_delivery_pre_c", refcyc_per_line_delivery_pre_c, vba__refcyc_per_line_delivery_pre_c);
+ //old_impl_vs_vba_impl("refcyc_per_line_delivery_c", refcyc_per_line_delivery_c, vba__refcyc_per_line_delivery_c);
+ }
+
+ if (src->dynamic_metadata_enable && src->gpuvm)
+ disp_dlg_regs->refcyc_per_vm_dmdata = get_refcyc_per_vm_dmdata_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ disp_dlg_regs->dmdata_dl_delta = get_dmdata_dl_delta_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ // TTU - Luma / Chroma
+ if (access_dir) { // vertical access
+ scaler_rec_in_width_l = vp_height_l;
+ scaler_rec_in_width_c = vp_height_c;
+ } else {
+ scaler_rec_in_width_l = vp_width_l;
+ scaler_rec_in_width_c = vp_width_c;
+ }
+
+ refcyc_per_req_delivery_pre_l = get_refcyc_per_delivery(
+ mode_lib,
+ refclk_freq_in_mhz,
+ pclk_freq_in_mhz,
+ dst->odm_combine,
+ full_recout_width,
+ dst->hactive,
+ vratio_pre_l,
+ hscale_pixel_rate_l,
+ scaler_rec_in_width_l,
+ req_per_swath_ub_l); // per req
+
+ refcyc_per_req_delivery_l = get_refcyc_per_delivery(
+ mode_lib,
+ refclk_freq_in_mhz,
+ pclk_freq_in_mhz,
+ dst->odm_combine,
+ full_recout_width,
+ dst->hactive,
+ vratio_l,
+ hscale_pixel_rate_l,
+ scaler_rec_in_width_l,
+ req_per_swath_ub_l); // per req
+
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, refcyc_per_req_delivery_pre_l);
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, refcyc_per_req_delivery_l);
+ dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, vba__refcyc_per_req_delivery_pre_l);
+ dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_l = %3.2f\n", __func__, vba__refcyc_per_req_delivery_l);
+
+ //old_impl_vs_vba_impl("refcyc_per_req_delivery_pre_l", refcyc_per_req_delivery_pre_l, vba__refcyc_per_req_delivery_pre_l);
+ //old_impl_vs_vba_impl("refcyc_per_req_delivery_l", refcyc_per_req_delivery_l, vba__refcyc_per_req_delivery_l);
+
+ ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13)); ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13));
+
+ if (dual_plane) {
+ float vba__refcyc_per_req_delivery_pre_c = get_refcyc_per_req_delivery_pre_c_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ float vba__refcyc_per_req_delivery_c = get_refcyc_per_req_delivery_c_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ refcyc_per_req_delivery_pre_c = get_refcyc_per_delivery(
+ mode_lib,
+ refclk_freq_in_mhz,
+ pclk_freq_in_mhz,
+ dst->odm_combine,
+ full_recout_width,
+ dst->hactive,
+ vratio_pre_c,
+ hscale_pixel_rate_c,
+ scaler_rec_in_width_c,
+ req_per_swath_ub_c); // per req
+ refcyc_per_req_delivery_c = get_refcyc_per_delivery(
+ mode_lib,
+ refclk_freq_in_mhz,
+ pclk_freq_in_mhz,
+ dst->odm_combine,
+ full_recout_width,
+ dst->hactive,
+ vratio_c,
+ hscale_pixel_rate_c,
+ scaler_rec_in_width_c,
+ req_per_swath_ub_c); // per req
+
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, refcyc_per_req_delivery_pre_c);
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, refcyc_per_req_delivery_c);
+ dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, vba__refcyc_per_req_delivery_pre_c);
+ dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_c = %3.2f\n", __func__, vba__refcyc_per_req_delivery_c);
+
+ //old_impl_vs_vba_impl("refcyc_per_req_delivery_pre_c", refcyc_per_req_delivery_pre_c, vba__refcyc_per_req_delivery_pre_c);
+ //old_impl_vs_vba_impl("refcyc_per_req_delivery_c", refcyc_per_req_delivery_c, vba__refcyc_per_req_delivery_c);
+
+ ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13)); ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13));
+ }
+
+ // TTU - Cursor
+ refcyc_per_req_delivery_pre_cur0 = 0.0;
+ refcyc_per_req_delivery_cur0 = 0.0;
+
+ ASSERT(src->num_cursors <= 1);
+
+ if (src->num_cursors > 0) {
+ float vba__refcyc_per_req_delivery_pre_cur0;
+ float vba__refcyc_per_req_delivery_cur0;
+
+ calculate_ttu_cursor(
+ mode_lib,
+ &refcyc_per_req_delivery_pre_cur0,
+ &refcyc_per_req_delivery_cur0,
+ refclk_freq_in_mhz,
+ ref_freq_to_pix_freq,
+ hscale_pixel_rate_l,
+ scl->hscl_ratio,
+ vratio_pre_l,
+ vratio_l,
+ src->cur0_src_width,
+ (enum cursor_bpp) (src->cur0_bpp));
+
+ vba__refcyc_per_req_delivery_pre_cur0 = get_refcyc_per_cursor_req_delivery_pre_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ vba__refcyc_per_req_delivery_cur0 = get_refcyc_per_cursor_req_delivery_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_cur0 = %3.2f\n", __func__, refcyc_per_req_delivery_pre_cur0);
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_cur0 = %3.2f\n", __func__, refcyc_per_req_delivery_cur0);
+ dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_pre_cur0 = %3.2f\n", __func__, vba__refcyc_per_req_delivery_pre_cur0);
+ dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_cur0 = %3.2f\n", __func__, vba__refcyc_per_req_delivery_cur0);
+
+ //old_impl_vs_vba_impl("refcyc_per_req_delivery_pre_cur0", refcyc_per_req_delivery_pre_cur0, vba__refcyc_per_req_delivery_pre_cur0);
+ //old_impl_vs_vba_impl("refcyc_per_req_delivery_cur0", refcyc_per_req_delivery_cur0, vba__refcyc_per_req_delivery_cur0);
+ }
+
+ refcyc_per_req_delivery_pre_cur1 = 0.0;
+ refcyc_per_req_delivery_cur1 = 0.0;
+
+ // TTU - Misc
+ // all hard-coded
+
+ // Assignment to register structures
+ disp_dlg_regs->dst_y_after_scaler = dst_y_after_scaler; // in terms of line
+ ASSERT(disp_dlg_regs->dst_y_after_scaler < 8);
+ disp_dlg_regs->refcyc_x_after_scaler = dst_x_after_scaler * ref_freq_to_pix_freq; // in terms of refclk
+ ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)dml_pow(2, 13));
+ disp_dlg_regs->dst_y_prefetch = (unsigned int) (dst_y_prefetch * dml_pow(2, 2));
+ disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int) (dst_y_per_vm_vblank * dml_pow(2, 2));
+ disp_dlg_regs->dst_y_per_row_vblank = (unsigned int) (dst_y_per_row_vblank * dml_pow(2, 2));
+ disp_dlg_regs->dst_y_per_vm_flip = (unsigned int) (dst_y_per_vm_flip * dml_pow(2, 2));
+ disp_dlg_regs->dst_y_per_row_flip = (unsigned int) (dst_y_per_row_flip * dml_pow(2, 2));
+
+ disp_dlg_regs->vratio_prefetch = (unsigned int) (vratio_pre_l * dml_pow(2, 19));
+ disp_dlg_regs->vratio_prefetch_c = (unsigned int) (vratio_pre_c * dml_pow(2, 19));
+
+ dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank);
+ dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank);
+ dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip);
+ dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip);
+
+ disp_dlg_regs->refcyc_per_pte_group_vblank_l = (unsigned int) (dst_y_per_row_vblank * (double) htotal * ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_l);
+ ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)dml_pow(2, 13));
+
+ if (dual_plane) {
+ disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int) (dst_y_per_row_vblank * (double) htotal * ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_c);
+ ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)dml_pow(2, 13));
+ }
+
+ disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = (unsigned int) (dst_y_per_row_vblank * (double) htotal * ref_freq_to_pix_freq / (double) meta_chunks_per_row_ub_l);
+ ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_l < (unsigned int)dml_pow(2, 13));
+
+ disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = disp_dlg_regs->refcyc_per_meta_chunk_vblank_l; // dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now
+
+ disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int) (dst_y_per_row_flip * htotal * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_l;
+ disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int) (dst_y_per_row_flip * htotal * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_l;
+
+ if (dual_plane) {
+ disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int) (dst_y_per_row_flip * htotal * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_c;
+ disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int) (dst_y_per_row_flip * htotal * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_c;
+ }
+
+ disp_dlg_regs->refcyc_per_vm_group_vblank = get_refcyc_per_vm_group_vblank_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ disp_dlg_regs->refcyc_per_vm_group_flip = get_refcyc_per_vm_group_flip_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ disp_dlg_regs->refcyc_per_vm_req_vblank = get_refcyc_per_vm_req_vblank_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10); // From VBA
+ disp_dlg_regs->refcyc_per_vm_req_flip = get_refcyc_per_vm_req_flip_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10); // From VBA
+
+ // Clamp to max for now
+ if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int) dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_vm_group_vblank = dml_pow(2, 23) - 1;
+
+ if (disp_dlg_regs->refcyc_per_vm_group_flip >= (unsigned int) dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_vm_group_flip = dml_pow(2, 23) - 1;
+
+ if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int) dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_vm_req_vblank = dml_pow(2, 23) - 1;
+
+ if (disp_dlg_regs->refcyc_per_vm_req_flip >= (unsigned int) dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_vm_req_flip = dml_pow(2, 23) - 1;
+
+ disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int) ((double) dpte_row_height_l / (double) vratio_l * dml_pow(2, 2));
+ ASSERT(disp_dlg_regs->dst_y_per_pte_row_nom_l < (unsigned int)dml_pow(2, 17));
+ if (dual_plane) {
+ disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int) ((double) dpte_row_height_c / (double) vratio_c * dml_pow(2, 2));
+ if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int) dml_pow(2, 17)) {
+ dml_print(
+ "DML_DLG: %s: Warning dst_y_per_pte_row_nom_c %u larger than supported by register format U15.2 %u\n",
+ __func__,
+ disp_dlg_regs->dst_y_per_pte_row_nom_c,
+ (unsigned int) dml_pow(2, 17) - 1);
+ }
+ }
+
+ disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int) ((double) meta_row_height_l / (double) vratio_l * dml_pow(2, 2));
+ ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_l < (unsigned int)dml_pow(2, 17));
+
+ disp_dlg_regs->dst_y_per_meta_row_nom_c = (unsigned int) ((double) meta_row_height_c / (double) vratio_c * dml_pow(2, 2));
+ ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_c < (unsigned int)dml_pow(2, 17));
+
+ disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int) ((double) dpte_row_height_l / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq
+ / (double) dpte_groups_per_row_ub_l);
+ if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int) dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_pte_group_nom_l = dml_pow(2, 23) - 1;
+ disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int) ((double) meta_row_height_l / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq
+ / (double) meta_chunks_per_row_ub_l);
+ if (disp_dlg_regs->refcyc_per_meta_chunk_nom_l >= (unsigned int) dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_meta_chunk_nom_l = dml_pow(2, 23) - 1;
+
+ if (dual_plane) {
+ disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int) ((double) dpte_row_height_c / (double) vratio_c * (double) htotal * ref_freq_to_pix_freq
+ / (double) dpte_groups_per_row_ub_c);
+ if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int) dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_pte_group_nom_c = dml_pow(2, 23) - 1;
+
+ // TODO: Is this the right calculation? Does htotal need to be halved?
+ disp_dlg_regs->refcyc_per_meta_chunk_nom_c = (unsigned int) ((double) meta_row_height_c / (double) vratio_c * (double) htotal * ref_freq_to_pix_freq
+ / (double) meta_chunks_per_row_ub_c);
+ if (disp_dlg_regs->refcyc_per_meta_chunk_nom_c >= (unsigned int) dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_meta_chunk_nom_c = dml_pow(2, 23) - 1;
+ }
+
+ disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_l, 1);
+ disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int) dml_floor(refcyc_per_line_delivery_l, 1);
+ ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)dml_pow(2, 13)); ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)dml_pow(2, 13));
+
+ disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_c, 1);
+ disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int) dml_floor(refcyc_per_line_delivery_c, 1);
+ ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)dml_pow(2, 13)); ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)dml_pow(2, 13));
+
+ disp_dlg_regs->chunk_hdl_adjust_cur0 = 3;
+ disp_dlg_regs->dst_y_offset_cur0 = 0;
+ disp_dlg_regs->chunk_hdl_adjust_cur1 = 3;
+ disp_dlg_regs->dst_y_offset_cur1 = 0;
+
+ disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off
+
+ disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int) (refcyc_per_req_delivery_pre_l * dml_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int) (refcyc_per_req_delivery_l * dml_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int) (refcyc_per_req_delivery_pre_c * dml_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int) (refcyc_per_req_delivery_c * dml_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_pre_cur0 = (unsigned int) (refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_cur0 = (unsigned int) (refcyc_per_req_delivery_cur0 * dml_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_pre_cur1 = (unsigned int) (refcyc_per_req_delivery_pre_cur1 * dml_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_cur1 = (unsigned int) (refcyc_per_req_delivery_cur1 * dml_pow(2, 10));
+
+ disp_ttu_regs->qos_level_low_wm = 0;
+ ASSERT(disp_ttu_regs->qos_level_low_wm < dml_pow(2, 14));
+
+ disp_ttu_regs->qos_level_high_wm = (unsigned int) (4.0 * (double) htotal * ref_freq_to_pix_freq);
+ ASSERT(disp_ttu_regs->qos_level_high_wm < dml_pow(2, 14));
+
+ disp_ttu_regs->qos_level_flip = 14;
+ disp_ttu_regs->qos_level_fixed_l = 8;
+ disp_ttu_regs->qos_level_fixed_c = 8;
+ disp_ttu_regs->qos_level_fixed_cur0 = 8;
+ disp_ttu_regs->qos_ramp_disable_l = 0;
+ disp_ttu_regs->qos_ramp_disable_c = 0;
+ disp_ttu_regs->qos_ramp_disable_cur0 = 0;
+
+ disp_ttu_regs->min_ttu_vblank = min_ttu_vblank * refclk_freq_in_mhz;
+ ASSERT(disp_ttu_regs->min_ttu_vblank < dml_pow(2, 24));
+
+ print__ttu_regs_st(mode_lib, disp_ttu_regs);
+ print__dlg_regs_st(mode_lib, disp_dlg_regs);
+}
+
+void dml314_rq_dlg_get_dlg_reg(
+ struct display_mode_lib *mode_lib,
+ display_dlg_regs_st *dlg_regs,
+ display_ttu_regs_st *ttu_regs,
+ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx,
+ const bool cstate_en,
+ const bool pstate_en,
+ const bool vm_en,
+ const bool ignore_viewport_pos,
+ const bool immediate_flip_support)
+{
+ display_rq_params_st rq_param = {0};
+ display_dlg_sys_params_st dlg_sys_param = {0};
+
+ // Get watermark and Tex.
+ dlg_sys_param.t_urg_wm_us = get_wm_urgent(mode_lib, e2e_pipe_param, num_pipes);
+ dlg_sys_param.deepsleep_dcfclk_mhz = get_clk_dcf_deepsleep(mode_lib, e2e_pipe_param, num_pipes);
+ dlg_sys_param.t_extra_us = get_urgent_extra_latency(mode_lib, e2e_pipe_param, num_pipes);
+ dlg_sys_param.mem_trip_us = get_wm_memory_trip(mode_lib, e2e_pipe_param, num_pipes);
+ dlg_sys_param.t_mclk_wm_us = get_wm_dram_clock_change(mode_lib, e2e_pipe_param, num_pipes);
+ dlg_sys_param.t_sr_wm_us = get_wm_stutter_enter_exit(mode_lib, e2e_pipe_param, num_pipes);
+ dlg_sys_param.total_flip_bw = get_total_immediate_flip_bw(mode_lib, e2e_pipe_param, num_pipes);
+ dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib, e2e_pipe_param, num_pipes);
+
+ print__dlg_sys_params_st(mode_lib, &dlg_sys_param);
+
+ // system parameter calculation done
+
+ dml_print("DML_DLG: Calculation for pipe[%d] start\n\n", pipe_idx);
+ dml_rq_dlg_get_rq_params(mode_lib, &rq_param, &e2e_pipe_param[pipe_idx].pipe);
+ dml_rq_dlg_get_dlg_params(
+ mode_lib,
+ e2e_pipe_param,
+ num_pipes,
+ pipe_idx,
+ dlg_regs,
+ ttu_regs,
+ &rq_param.dlg,
+ &dlg_sys_param,
+ cstate_en,
+ pstate_en,
+ vm_en,
+ ignore_viewport_pos,
+ immediate_flip_support);
+ dml_print("DML_DLG: Calculation for pipe[%d] end\n", pipe_idx);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.h b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.h
new file mode 100644
index 000000000000..49cb85d1056c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DML314_DISPLAY_RQ_DLG_CALC_H__
+#define __DML314_DISPLAY_RQ_DLG_CALC_H__
+
+#include "../display_rq_dlg_helpers.h"
+
+struct display_mode_lib;
+
+// Function: dml_rq_dlg_get_rq_reg
+// Main entry point for test to get the register values out of this DML class.
+// This function calls <get_rq_param> and <extract_rq_regs> fucntions to calculate
+// and then populate the rq_regs struct
+// Input:
+// pipe_param - pipe source configuration (e.g. vp, pitch, scaling, dest, etc.)
+// Output:
+// rq_regs - struct that holds all the RQ registers field value.
+// See also: <display_rq_regs_st>
+void dml314_rq_dlg_get_rq_reg(struct display_mode_lib *mode_lib,
+ display_rq_regs_st *rq_regs,
+ const display_pipe_params_st *pipe_param);
+
+// Function: dml_rq_dlg_get_dlg_reg
+// Calculate and return DLG and TTU register struct given the system setting
+// Output:
+// dlg_regs - output DLG register struct
+// ttu_regs - output DLG TTU register struct
+// Input:
+// e2e_pipe_param - "compacted" array of e2e pipe param struct
+// num_pipes - num of active "pipe" or "route"
+// pipe_idx - index that identifies the e2e_pipe_param that corresponding to this dlg
+// cstate - 0: when calculate min_ttu_vblank it is assumed cstate is not required. 1: Normal mode, cstate is considered.
+// Added for legacy or unrealistic timing tests.
+void dml314_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
+ display_dlg_regs_st *dlg_regs,
+ display_ttu_regs_st *ttu_regs,
+ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx,
+ const bool cstate_en,
+ const bool pstate_en,
+ const bool vm_en,
+ const bool ignore_viewport_pos,
+ const bool immediate_flip_support);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
new file mode 100644
index 000000000000..8a0f128722b0
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -0,0 +1,3613 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#include "dcn32_fpu.h"
+#include "dcn32/dcn32_resource.h"
+#include "dcn20/dcn20_resource.h"
+#include "display_mode_vba_util_32.h"
+#include "dml/dcn32/display_mode_vba_32.h"
+// We need this includes for WATERMARKS_* defines
+#include "clk_mgr/dcn32/dcn32_smu13_driver_if.h"
+#include "dcn30/dcn30_resource.h"
+#include "link_service.h"
+#include "dc_state_priv.h"
+
+#define DC_LOGGER_INIT(logger)
+
+static const struct subvp_high_refresh_list subvp_high_refresh_list = {
+ .min_refresh = 120,
+ .max_refresh = 175,
+ .res = {
+ {.width = 3840, .height = 2160, },
+ {.width = 3440, .height = 1440, },
+ {.width = 2560, .height = 1440, },
+ {.width = 1920, .height = 1080, }},
+};
+
+static const struct subvp_active_margin_list subvp_active_margin_list = {
+ .min_refresh = 55,
+ .max_refresh = 65,
+ .res = {
+ {.width = 2560, .height = 1440, },
+ {.width = 1920, .height = 1080, }},
+};
+
+struct _vcs_dpi_ip_params_st dcn3_2_ip = {
+ .gpuvm_enable = 0,
+ .gpuvm_max_page_table_levels = 4,
+ .hostvm_enable = 0,
+ .rob_buffer_size_kbytes = 128,
+ .det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE,
+ .config_return_buffer_size_in_kbytes = 1280,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .meta_fifo_size_in_kentries = 22,
+ .zero_size_buffer_entries = 512,
+ .compbuf_reserved_space_64b = 256,
+ .compbuf_reserved_space_zs = 64,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .alpha_pixel_chunk_size_kbytes = 4,
+ .min_pixel_chunk_size_bytes = 1024,
+ .dcc_meta_buffer_size_bytes = 6272,
+ .meta_chunk_size_kbytes = 2,
+ .min_meta_chunk_size_bytes = 256,
+ .writeback_chunk_size_kbytes = 8,
+ .ptoi_supported = false,
+ .num_dsc = 4,
+ .maximum_dsc_bits_per_component = 12,
+ .maximum_pixels_per_line_per_dsc_unit = 6016,
+ .dsc422_native_support = true,
+ .is_line_buffer_bpp_fixed = true,
+ .line_buffer_fixed_bpp = 57,
+ .line_buffer_size_bits = 1171920,
+ .max_line_buffer_lines = 32,
+ .writeback_interface_buffer_size_kbytes = 90,
+ .max_num_dpp = 4,
+ .max_num_otg = 4,
+ .max_num_hdmi_frl_outputs = 1,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dpte_buffer_size_in_pte_reqs_luma = 64,
+ .dpte_buffer_size_in_pte_reqs_chroma = 34,
+ .dispclk_ramp_margin_percent = 1,
+ .max_inter_dcn_tile_repeaters = 8,
+ .cursor_buffer_size = 16,
+ .cursor_chunk_size = 2,
+ .writeback_line_buffer_buffer_size = 0,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .dppclk_delay_subtotal = 47,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_cnvc_formatter = 28,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 125,
+ .dynamic_metadata_vm_enabled = false,
+ .odm_combine_4to1_supported = false,
+ .dcc_supported = true,
+ .max_num_dp2p0_outputs = 2,
+ .max_num_dp2p0_streams = 4,
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = {
+ .clock_limits = {
+ {
+ .state = 0,
+ .dcfclk_mhz = 1564.0,
+ .fabricclk_mhz = 2500.0,
+ .dispclk_mhz = 2150.0,
+ .dppclk_mhz = 2150.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .phyclk_d32_mhz = 625.0,
+ .socclk_mhz = 1200.0,
+ .dscclk_mhz = 716.667,
+ .dram_speed_mts = 18000.0,
+ .dtbclk_mhz = 1564.0,
+ },
+ },
+ .num_states = 1,
+ .sr_exit_time_us = 42.97,
+ .sr_enter_plus_exit_time_us = 49.94,
+ .sr_exit_z8_time_us = 285.0,
+ .sr_enter_plus_exit_z8_time_us = 320,
+ .writeback_latency_us = 12.0,
+ .round_trip_ping_latency_dcfclk_cycles = 263,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .fclk_change_latency_us = 25,
+ .usr_retraining_latency_us = 2,
+ .smn_latency_us = 2,
+ .mall_allocated_for_dcn_mbytes = 64,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_sdp_bw_after_urgent = 90.0,
+ .pct_ideal_fabric_bw_after_urgent = 67.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
+ .pct_ideal_dram_bw_after_urgent_strobe = 67.0,
+ .max_avg_sdp_bw_use_normal_percent = 80.0,
+ .max_avg_fabric_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_strobe_percent = 50.0,
+ .max_avg_dram_bw_use_normal_percent = 15.0,
+ .num_chans = 24,
+ .dram_channel_width_bytes = 2,
+ .fabric_datapath_to_dcn_data_return_bytes = 64,
+ .return_bus_width_bytes = 64,
+ .downspread_percent = 0.38,
+ .dcn_downspread_percent = 0.5,
+ .dram_clock_change_latency_us = 400,
+ .dispclk_dppclk_vco_speed_mhz = 4300.0,
+ .do_urgent_latency_adjustment = true,
+ .urgent_latency_adjustment_fabric_clock_component_us = 1.0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000,
+};
+
+static bool dcn32_apply_merge_split_flags_helper(struct dc *dc, struct dc_state *context,
+ bool *repopulate_pipes, int *split, bool *merge);
+
+void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr)
+{
+ /* defaults */
+ double pstate_latency_us = clk_mgr->base.ctx->dc->dml.soc.dram_clock_change_latency_us;
+ double fclk_change_latency_us = clk_mgr->base.ctx->dc->dml.soc.fclk_change_latency_us;
+ double sr_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_exit_time_us;
+ double sr_enter_plus_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_enter_plus_exit_time_us;
+ /* For min clocks use as reported by PM FW and report those as min */
+ uint16_t min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz;
+ uint16_t min_dcfclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].dcfclk_mhz;
+ uint16_t setb_min_uclk_mhz = min_uclk_mhz;
+ uint16_t dcfclk_mhz_for_the_second_state = clk_mgr->base.ctx->dc->dml.soc.clock_limits[2].dcfclk_mhz;
+
+ dc_assert_fp_enabled();
+
+ /* For Set B ranges use min clocks state 2 when available, and report those to PM FW */
+ if (dcfclk_mhz_for_the_second_state)
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = dcfclk_mhz_for_the_second_state;
+ else
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = clk_mgr->base.bw_params->clk_table.entries[0].dcfclk_mhz;
+
+ if (clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz)
+ setb_min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz;
+
+ /* Set A - Normal - default values */
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].valid = true;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us = fclk_change_latency_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF;
+
+ /* Set B - Performance - higher clocks, using DPM[2] DCFCLK and UCLK */
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].valid = true;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.fclk_change_latency_us = fclk_change_latency_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = setb_min_uclk_mhz;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF;
+
+ /* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */
+ /* 'DalDummyClockChangeLatencyNs' registry key option set to 0x7FFFFFFF can be used to disable Set C for dummy p-state */
+ if (clk_mgr->base.ctx->dc->bb_overrides.dummy_clock_change_latency_ns != 0x7FFFFFFF) {
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].valid = true;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 50;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us = fclk_change_latency_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF;
+ clk_mgr->base.bw_params->dummy_pstate_table[0].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz * 16;
+ clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 50;
+ clk_mgr->base.bw_params->dummy_pstate_table[1].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[1].memclk_mhz * 16;
+ clk_mgr->base.bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9;
+ clk_mgr->base.bw_params->dummy_pstate_table[2].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz * 16;
+ clk_mgr->base.bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8;
+ clk_mgr->base.bw_params->dummy_pstate_table[3].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[3].memclk_mhz * 16;
+ clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5;
+ }
+ /* Set D - MALL - SR enter and exit time specific to MALL, TBD after bringup or later phase for now use DRAM values / 2 */
+ /* For MALL DRAM clock change latency is N/A, for watermak calculations use lowest value dummy P state latency */
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].valid = true;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.fclk_change_latency_us = fclk_change_latency_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = sr_exit_time_us / 2; // TBD
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us / 2; // TBD
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF;
+}
+
+/*
+ * Finds dummy_latency_index when MCLK switching using firmware based
+ * vblank stretch is enabled. This function will iterate through the
+ * table of dummy pstate latencies until the lowest value that allows
+ * dm_allow_self_refresh_and_mclk_switch to happen is found
+ */
+int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel)
+{
+ const int max_latency_table_entries = 4;
+ struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+ int dummy_latency_index = 0;
+ enum clock_change_support temp_clock_change_support = vba->DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
+
+ dc_assert_fp_enabled();
+
+ while (dummy_latency_index < max_latency_table_entries) {
+ if (temp_clock_change_support != dm_dram_clock_change_unsupported)
+ vba->DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = temp_clock_change_support;
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us =
+ dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us;
+ dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, DC_VALIDATE_MODE_AND_PROGRAMMING);
+
+ /* for subvp + DRR case, if subvp pipes are still present we support pstate */
+ if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported &&
+ dcn32_subvp_in_use(dc, context))
+ vba->DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = temp_clock_change_support;
+
+ if (vlevel < context->bw_ctx.dml.vba.soc.num_states &&
+ vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] != dm_dram_clock_change_unsupported)
+ break;
+
+ dummy_latency_index++;
+ }
+
+ if (dummy_latency_index == max_latency_table_entries) {
+ ASSERT(dummy_latency_index != max_latency_table_entries);
+ /* If the execution gets here, it means dummy p_states are
+ * not possible. This should never happen and would mean
+ * something is severely wrong.
+ * Here we reset dummy_latency_index to 3, because it is
+ * better to have underflows than system crashes.
+ */
+ dummy_latency_index = max_latency_table_entries - 1;
+ }
+
+ return dummy_latency_index;
+}
+
+/**
+ * dcn32_helper_populate_phantom_dlg_params - Get DLG params for phantom pipes
+ * and populate pipe_ctx with those params.
+ * @dc: [in] current dc state
+ * @context: [in] new dc state
+ * @pipes: [in] DML pipe params array
+ * @pipe_cnt: [in] DML pipe count
+ *
+ * This function must be called AFTER the phantom pipes are added to context
+ * and run through DML (so that the DLG params for the phantom pipes can be
+ * populated), and BEFORE we program the timing for the phantom pipes.
+ */
+void dcn32_helper_populate_phantom_dlg_params(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt)
+{
+ uint32_t i, pipe_idx;
+
+ dc_assert_fp_enabled();
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe->stream)
+ continue;
+
+ if (pipe->plane_state && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
+ pipes[pipe_idx].pipe.dest.vstartup_start =
+ get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+ pipes[pipe_idx].pipe.dest.vupdate_offset =
+ get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+ pipes[pipe_idx].pipe.dest.vupdate_width =
+ get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+ pipes[pipe_idx].pipe.dest.vready_offset =
+ get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+ pipe->pipe_dlg_param = pipes[pipe_idx].pipe.dest;
+ }
+ pipe_idx++;
+ }
+}
+
+static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st *entry)
+{
+ float memory_bw_kbytes_sec;
+ float fabric_bw_kbytes_sec;
+ float sdp_bw_kbytes_sec;
+ float limiting_bw_kbytes_sec;
+
+ memory_bw_kbytes_sec = entry->dram_speed_mts *
+ dcn3_2_soc.num_chans *
+ dcn3_2_soc.dram_channel_width_bytes *
+ ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100);
+
+ fabric_bw_kbytes_sec = entry->fabricclk_mhz *
+ dcn3_2_soc.return_bus_width_bytes *
+ ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100);
+
+ sdp_bw_kbytes_sec = entry->dcfclk_mhz *
+ dcn3_2_soc.return_bus_width_bytes *
+ ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100);
+
+ limiting_bw_kbytes_sec = memory_bw_kbytes_sec;
+
+ if (fabric_bw_kbytes_sec < limiting_bw_kbytes_sec)
+ limiting_bw_kbytes_sec = fabric_bw_kbytes_sec;
+
+ if (sdp_bw_kbytes_sec < limiting_bw_kbytes_sec)
+ limiting_bw_kbytes_sec = sdp_bw_kbytes_sec;
+
+ return limiting_bw_kbytes_sec;
+}
+
+static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry)
+{
+ if (entry->dcfclk_mhz > 0) {
+ float bw_on_sdp = entry->dcfclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100);
+
+ entry->fabricclk_mhz = bw_on_sdp / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100));
+ entry->dram_speed_mts = bw_on_sdp / (dcn3_2_soc.num_chans *
+ dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100));
+ } else if (entry->fabricclk_mhz > 0) {
+ float bw_on_fabric = entry->fabricclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100);
+
+ entry->dcfclk_mhz = bw_on_fabric / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100));
+ entry->dram_speed_mts = bw_on_fabric / (dcn3_2_soc.num_chans *
+ dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100));
+ } else if (entry->dram_speed_mts > 0) {
+ float bw_on_dram = entry->dram_speed_mts * dcn3_2_soc.num_chans *
+ dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100);
+
+ entry->fabricclk_mhz = bw_on_dram / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100));
+ entry->dcfclk_mhz = bw_on_dram / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100));
+ }
+}
+
+static void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table,
+ unsigned int *num_entries,
+ struct _vcs_dpi_voltage_scaling_st *entry)
+{
+ int i = 0;
+ int index = 0;
+
+ dc_assert_fp_enabled();
+
+ if (*num_entries == 0) {
+ table[0] = *entry;
+ (*num_entries)++;
+ } else {
+ while (entry->net_bw_in_kbytes_sec > table[index].net_bw_in_kbytes_sec) {
+ index++;
+ if (index >= *num_entries)
+ break;
+ }
+
+ for (i = *num_entries; i > index; i--)
+ table[i] = table[i - 1];
+
+ table[index] = *entry;
+ (*num_entries)++;
+ }
+}
+
+/**
+ * dcn32_set_phantom_stream_timing - Set timing params for the phantom stream
+ * @dc: current dc state
+ * @context: new dc state
+ * @ref_pipe: Main pipe for the phantom stream
+ * @phantom_stream: target phantom stream state
+ * @pipes: DML pipe params
+ * @pipe_cnt: number of DML pipes
+ * @dc_pipe_idx: DC pipe index for the main pipe (i.e. ref_pipe)
+ *
+ * Set timing params of the phantom stream based on calculated output from DML.
+ * This function first gets the DML pipe index using the DC pipe index, then
+ * calls into DML (get_subviewport_lines_needed_in_mall) to get the number of
+ * lines required for SubVP MCLK switching and assigns to the phantom stream
+ * accordingly.
+ *
+ * - The number of SubVP lines calculated in DML does not take into account
+ * FW processing delays and required pstate allow width, so we must include
+ * that separately.
+ *
+ * - Set phantom backporch = vstartup of main pipe
+ */
+void dcn32_set_phantom_stream_timing(struct dc *dc,
+ struct dc_state *context,
+ struct pipe_ctx *ref_pipe,
+ struct dc_stream_state *phantom_stream,
+ display_e2e_pipe_params_st *pipes,
+ unsigned int pipe_cnt,
+ unsigned int dc_pipe_idx)
+{
+ unsigned int i, pipe_idx;
+ struct pipe_ctx *pipe;
+ uint32_t phantom_vactive, phantom_bp, pstate_width_fw_delay_lines;
+ unsigned int num_dpp;
+ unsigned int vlevel = context->bw_ctx.dml.vba.VoltageLevel;
+ unsigned int dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
+ unsigned int socclk = context->bw_ctx.dml.vba.SOCCLKPerState[vlevel];
+ struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+ struct dc_stream_state *main_stream = ref_pipe->stream;
+
+ dc_assert_fp_enabled();
+
+ // Find DML pipe index (pipe_idx) using dc_pipe_idx
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe->stream)
+ continue;
+
+ if (i == dc_pipe_idx)
+ break;
+
+ pipe_idx++;
+ }
+
+ // Calculate lines required for pstate allow width and FW processing delays
+ pstate_width_fw_delay_lines = ((double)(dc->caps.subvp_fw_processing_delay_us +
+ dc->caps.subvp_pstate_allow_width_us) / 1000000) *
+ (ref_pipe->stream->timing.pix_clk_100hz * 100) /
+ (double)ref_pipe->stream->timing.h_total;
+
+ // Update clks_cfg for calling into recalculate
+ pipes[0].clks_cfg.voltage = vlevel;
+ pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
+ pipes[0].clks_cfg.socclk_mhz = socclk;
+
+ // DML calculation for MALL region doesn't take into account FW delay
+ // and required pstate allow width for multi-display cases
+ /* Add 16 lines margin to the MALL REGION because SUB_VP_START_LINE must be aligned
+ * to 2 swaths (i.e. 16 lines)
+ */
+ phantom_vactive = get_subviewport_lines_needed_in_mall(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx) +
+ pstate_width_fw_delay_lines + dc->caps.subvp_swath_height_margin_lines;
+
+ // W/A for DCC corruption with certain high resolution timings.
+ // Determing if pipesplit is used. If so, add meta_row_height to the phantom vactive.
+ num_dpp = vba->NoOfDPP[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]];
+ phantom_vactive += num_dpp > 1 ? vba->meta_row_height[vba->pipe_plane[pipe_idx]] : 0;
+
+ /* dc->debug.subvp_extra_lines 0 by default*/
+ phantom_vactive += dc->debug.subvp_extra_lines;
+
+ // For backporch of phantom pipe, use vstartup of the main pipe
+ phantom_bp = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+
+ phantom_stream->dst.y = 0;
+ phantom_stream->dst.height = phantom_vactive;
+ /* When scaling, DML provides the end to end required number of lines for MALL.
+ * dst.height is always correct for this case, but src.height is not which causes a
+ * delta between main and phantom pipe scaling outputs. Need to adjust src.height on
+ * phantom for this case.
+ */
+ phantom_stream->src.y = 0;
+ phantom_stream->src.height = (double)phantom_vactive * (double)main_stream->src.height / (double)main_stream->dst.height;
+
+ phantom_stream->timing.v_addressable = phantom_vactive;
+ phantom_stream->timing.v_front_porch = 1;
+ phantom_stream->timing.v_total = phantom_stream->timing.v_addressable +
+ phantom_stream->timing.v_front_porch +
+ phantom_stream->timing.v_sync_width +
+ phantom_bp;
+ phantom_stream->timing.flags.DSC = 0; // Don't need DSC for phantom timing
+}
+
+/**
+ * dcn32_get_num_free_pipes - Calculate number of free pipes
+ * @dc: current dc state
+ * @context: new dc state
+ *
+ * This function assumes that a "used" pipe is a pipe that has
+ * both a stream and a plane assigned to it.
+ *
+ * Return: Number of free pipes available in the context
+ */
+static unsigned int dcn32_get_num_free_pipes(struct dc *dc, struct dc_state *context)
+{
+ unsigned int i;
+ unsigned int free_pipes = 0;
+ unsigned int num_pipes = 0;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe->stream && !pipe->top_pipe) {
+ while (pipe) {
+ num_pipes++;
+ pipe = pipe->bottom_pipe;
+ }
+ }
+ }
+
+ free_pipes = dc->res_pool->pipe_count - num_pipes;
+ return free_pipes;
+}
+
+/**
+ * dcn32_assign_subvp_pipe - Function to decide which pipe will use Sub-VP.
+ * @dc: current dc state
+ * @context: new dc state
+ * @index: [out] dc pipe index for the pipe chosen to have phantom pipes assigned
+ *
+ * We enter this function if we are Sub-VP capable (i.e. enough pipes available)
+ * and regular P-State switching (i.e. VACTIVE/VBLANK) is not supported, or if
+ * we are forcing SubVP P-State switching on the current config.
+ *
+ * The number of pipes used for the chosen surface must be less than or equal to the
+ * number of free pipes available.
+ *
+ * In general we choose surfaces with the longest frame time first (better for SubVP + VBLANK).
+ * For multi-display cases the ActiveDRAMClockChangeMargin doesn't provide enough info on its own
+ * for determining which should be the SubVP pipe (need a way to determine if a pipe / plane doesn't
+ * support MCLK switching naturally [i.e. ACTIVE or VBLANK]).
+ *
+ * Return: True if a valid pipe assignment was found for Sub-VP. Otherwise false.
+ */
+static bool dcn32_assign_subvp_pipe(struct dc *dc,
+ struct dc_state *context,
+ unsigned int *index)
+{
+ unsigned int i, pipe_idx;
+ unsigned int max_frame_time = 0;
+ bool valid_assignment_found = false;
+ unsigned int free_pipes = dcn32_get_num_free_pipes(dc, context);
+ struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ unsigned int num_pipes = 0;
+ unsigned int refresh_rate = 0;
+
+ if (!pipe->stream)
+ continue;
+
+ // Round up
+ refresh_rate = (pipe->stream->timing.pix_clk_100hz * 100 +
+ pipe->stream->timing.v_total * pipe->stream->timing.h_total - 1)
+ / (double)(pipe->stream->timing.v_total * pipe->stream->timing.h_total);
+ /* SubVP pipe candidate requirements:
+ * - Refresh rate < 120hz
+ * - Not able to switch in vactive naturally (switching in active means the
+ * DET provides enough buffer to hide the P-State switch latency -- trying
+ * to combine this with SubVP can cause issues with the scheduling).
+ * - Not TMZ surface
+ */
+ if (pipe->plane_state && !pipe->top_pipe && !pipe->prev_odm_pipe && !dcn32_is_center_timing(pipe) &&
+ !pipe->stream->hw_cursor_req &&
+ !dc_state_get_stream_cursor_subvp_limit(pipe->stream, context) &&
+ !(pipe->stream->timing.pix_clk_100hz / 10000 > DCN3_2_MAX_SUBVP_PIXEL_RATE_MHZ) &&
+ (!dcn32_is_psr_capable(pipe) || (context->stream_count == 1 && dc->caps.dmub_caps.subvp_psr)) &&
+ dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_NONE &&
+ (refresh_rate < 120 || dcn32_allow_subvp_high_refresh_rate(dc, context, pipe)) &&
+ !pipe->plane_state->address.tmz_surface &&
+ (vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0 ||
+ (vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] > 0 &&
+ dcn32_allow_subvp_with_active_margin(pipe)))) {
+ while (pipe) {
+ num_pipes++;
+ pipe = pipe->bottom_pipe;
+ }
+
+ pipe = &context->res_ctx.pipe_ctx[i];
+ if (num_pipes <= free_pipes) {
+ struct dc_stream_state *stream = pipe->stream;
+ unsigned int frame_us = (stream->timing.v_total * stream->timing.h_total /
+ (double)(stream->timing.pix_clk_100hz * 100)) * 1000000;
+ if (frame_us > max_frame_time) {
+ *index = i;
+ max_frame_time = frame_us;
+ valid_assignment_found = true;
+ }
+ }
+ }
+ pipe_idx++;
+ }
+ return valid_assignment_found;
+}
+
+/**
+ * dcn32_enough_pipes_for_subvp - Function to check if there are "enough" pipes for SubVP.
+ * @dc: current dc state
+ * @context: new dc state
+ *
+ * This function returns true if there are enough free pipes
+ * to create the required phantom pipes for any given stream
+ * (that does not already have phantom pipe assigned).
+ *
+ * e.g. For a 2 stream config where the first stream uses one
+ * pipe and the second stream uses 2 pipes (i.e. pipe split),
+ * this function will return true because there is 1 remaining
+ * pipe which can be used as the phantom pipe for the non pipe
+ * split pipe.
+ *
+ * Return:
+ * True if there are enough free pipes to assign phantom pipes to at least one
+ * stream that does not already have phantom pipes assigned. Otherwise false.
+ */
+static bool dcn32_enough_pipes_for_subvp(struct dc *dc, struct dc_state *context)
+{
+ unsigned int i, split_cnt, free_pipes;
+ unsigned int min_pipe_split = dc->res_pool->pipe_count + 1; // init as max number of pipes + 1
+ bool subvp_possible = false;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ // Find the minimum pipe split count for non SubVP pipes
+ if (resource_is_pipe_type(pipe, OPP_HEAD) &&
+ dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_NONE) {
+ split_cnt = 0;
+ while (pipe) {
+ split_cnt++;
+ pipe = pipe->bottom_pipe;
+ }
+
+ if (split_cnt < min_pipe_split)
+ min_pipe_split = split_cnt;
+ }
+ }
+
+ free_pipes = dcn32_get_num_free_pipes(dc, context);
+
+ // SubVP only possible if at least one pipe is being used (i.e. free_pipes
+ // should not equal to the pipe_count)
+ if (free_pipes >= min_pipe_split && free_pipes < dc->res_pool->pipe_count)
+ subvp_possible = true;
+
+ return subvp_possible;
+}
+
+/**
+ * subvp_subvp_schedulable - Determine if SubVP + SubVP config is schedulable
+ * @dc: current dc state
+ * @context: new dc state
+ *
+ * High level algorithm:
+ * 1. Find longest microschedule length (in us) between the two SubVP pipes
+ * 2. Check if the worst case overlap (VBLANK in middle of ACTIVE) for both
+ * pipes still allows for the maximum microschedule to fit in the active
+ * region for both pipes.
+ *
+ * Return: True if the SubVP + SubVP config is schedulable, false otherwise
+ */
+static bool subvp_subvp_schedulable(struct dc *dc, struct dc_state *context)
+{
+ struct pipe_ctx *subvp_pipes[2] = {0};
+ struct dc_stream_state *phantom = NULL;
+ uint32_t microschedule_lines = 0;
+ uint32_t index = 0;
+ uint32_t i;
+ uint32_t max_microschedule_us = 0;
+ int32_t vactive1_us, vactive2_us, vblank1_us, vblank2_us;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ uint32_t time_us = 0;
+
+ /* Loop to calculate the maximum microschedule time between the two SubVP pipes,
+ * and also to store the two main SubVP pipe pointers in subvp_pipes[2].
+ */
+ phantom = dc_state_get_paired_subvp_stream(context, pipe->stream);
+ if (phantom && pipe->stream && pipe->plane_state && !pipe->top_pipe &&
+ dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) {
+ microschedule_lines = (phantom->timing.v_total - phantom->timing.v_front_porch) +
+ phantom->timing.v_addressable;
+
+ // Round up when calculating microschedule time (+ 1 at the end)
+ time_us = (microschedule_lines * phantom->timing.h_total) /
+ (double)(phantom->timing.pix_clk_100hz * 100) * 1000000 +
+ dc->caps.subvp_prefetch_end_to_mall_start_us +
+ dc->caps.subvp_fw_processing_delay_us + 1;
+ if (time_us > max_microschedule_us)
+ max_microschedule_us = time_us;
+
+ subvp_pipes[index] = pipe;
+ index++;
+
+ // Maximum 2 SubVP pipes
+ if (index == 2)
+ break;
+ }
+ }
+ vactive1_us = ((subvp_pipes[0]->stream->timing.v_addressable * subvp_pipes[0]->stream->timing.h_total) /
+ (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000;
+ vactive2_us = ((subvp_pipes[1]->stream->timing.v_addressable * subvp_pipes[1]->stream->timing.h_total) /
+ (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000;
+ vblank1_us = (((subvp_pipes[0]->stream->timing.v_total - subvp_pipes[0]->stream->timing.v_addressable) *
+ subvp_pipes[0]->stream->timing.h_total) /
+ (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000;
+ vblank2_us = (((subvp_pipes[1]->stream->timing.v_total - subvp_pipes[1]->stream->timing.v_addressable) *
+ subvp_pipes[1]->stream->timing.h_total) /
+ (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000;
+
+ if ((vactive1_us - vblank2_us) / 2 > max_microschedule_us &&
+ (vactive2_us - vblank1_us) / 2 > max_microschedule_us)
+ return true;
+
+ return false;
+}
+
+/**
+ * subvp_drr_schedulable() - Determine if SubVP + DRR config is schedulable
+ * @dc: current dc state
+ * @context: new dc state
+ *
+ * High level algorithm:
+ * 1. Get timing for SubVP pipe, phantom pipe, and DRR pipe
+ * 2. Determine the frame time for the DRR display when adding required margin for MCLK switching
+ * (the margin is equal to the MALL region + DRR margin (500us))
+ * 3.If (SubVP Active - Prefetch > Stretched DRR frame + max(MALL region, Stretched DRR frame))
+ * then report the configuration as supported
+ *
+ * Return: True if the SubVP + DRR config is schedulable, false otherwise
+ */
+static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context)
+{
+ bool schedulable = false;
+ uint32_t i;
+ struct pipe_ctx *pipe = NULL;
+ struct pipe_ctx *drr_pipe = NULL;
+ struct dc_crtc_timing *main_timing = NULL;
+ struct dc_crtc_timing *phantom_timing = NULL;
+ struct dc_crtc_timing *drr_timing = NULL;
+ int16_t prefetch_us = 0;
+ int16_t mall_region_us = 0;
+ int16_t drr_frame_us = 0; // nominal frame time
+ int16_t subvp_active_us = 0;
+ int16_t stretched_drr_us = 0;
+ int16_t drr_stretched_vblank_us = 0;
+ int16_t max_vblank_mallregion = 0;
+ struct dc_stream_state *phantom_stream;
+ bool subvp_found = false;
+ bool drr_found = false;
+
+ // Find SubVP pipe
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+
+ // We check for master pipe, but it shouldn't matter since we only need
+ // the pipe for timing info (stream should be same for any pipe splits)
+ if (!resource_is_pipe_type(pipe, OTG_MASTER) ||
+ !resource_is_pipe_type(pipe, DPP_PIPE))
+ continue;
+
+ // Find the SubVP pipe
+ if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) {
+ subvp_found = true;
+ break;
+ }
+ }
+
+ // Find the DRR pipe
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ drr_pipe = &context->res_ctx.pipe_ctx[i];
+
+ // We check for master pipe only
+ if (!resource_is_pipe_type(drr_pipe, OTG_MASTER) ||
+ !resource_is_pipe_type(drr_pipe, DPP_PIPE))
+ continue;
+
+ if (dc_state_get_pipe_subvp_type(context, drr_pipe) == SUBVP_NONE && drr_pipe->stream->ignore_msa_timing_param &&
+ (drr_pipe->stream->allow_freesync || drr_pipe->stream->vrr_active_variable || drr_pipe->stream->vrr_active_fixed)) {
+ drr_found = true;
+ break;
+ }
+ }
+
+ phantom_stream = dc_state_get_paired_subvp_stream(context, pipe->stream);
+ if (phantom_stream && subvp_found && drr_found) {
+ main_timing = &pipe->stream->timing;
+ phantom_timing = &phantom_stream->timing;
+ drr_timing = &drr_pipe->stream->timing;
+ prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total /
+ (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 +
+ dc->caps.subvp_prefetch_end_to_mall_start_us;
+ subvp_active_us = main_timing->v_addressable * main_timing->h_total /
+ (double)(main_timing->pix_clk_100hz * 100) * 1000000;
+ drr_frame_us = drr_timing->v_total * drr_timing->h_total /
+ (double)(drr_timing->pix_clk_100hz * 100) * 1000000;
+ // P-State allow width and FW delays already included phantom_timing->v_addressable
+ mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total /
+ (double)(phantom_timing->pix_clk_100hz * 100) * 1000000;
+ stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US;
+ drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total /
+ (double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us);
+ max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us;
+ }
+
+ /* We consider SubVP + DRR schedulable if the stretched frame duration of the DRR display (i.e. the
+ * highest refresh rate + margin that can support UCLK P-State switch) passes the static analysis
+ * for VBLANK: (VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time,
+ * and the max of (VBLANK blanking time, MALL region)).
+ */
+ if (drr_timing &&
+ stretched_drr_us < (1 / (double)drr_timing->min_refresh_in_uhz) * 1000000 * 1000000 &&
+ subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0)
+ schedulable = true;
+
+ return schedulable;
+}
+
+
+/**
+ * subvp_vblank_schedulable - Determine if SubVP + VBLANK config is schedulable
+ * @dc: current dc state
+ * @context: new dc state
+ *
+ * High level algorithm:
+ * 1. Get timing for SubVP pipe, phantom pipe, and VBLANK pipe
+ * 2. If (SubVP Active - Prefetch > Vblank Frame Time + max(MALL region, Vblank blanking time))
+ * then report the configuration as supported
+ * 3. If the VBLANK display is DRR, then take the DRR static schedulability path
+ *
+ * Return: True if the SubVP + VBLANK/DRR config is schedulable, false otherwise
+ */
+static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context)
+{
+ struct pipe_ctx *pipe = NULL;
+ struct pipe_ctx *subvp_pipe = NULL;
+ bool found = false;
+ bool schedulable = false;
+ uint32_t i = 0;
+ uint8_t vblank_index = 0;
+ uint16_t prefetch_us = 0;
+ uint16_t mall_region_us = 0;
+ uint16_t vblank_frame_us = 0;
+ uint16_t subvp_active_us = 0;
+ uint16_t vblank_blank_us = 0;
+ uint16_t max_vblank_mallregion = 0;
+ struct dc_crtc_timing *main_timing = NULL;
+ struct dc_crtc_timing *phantom_timing = NULL;
+ struct dc_crtc_timing *vblank_timing = NULL;
+ struct dc_stream_state *phantom_stream;
+ enum mall_stream_type pipe_mall_type;
+
+ /* For SubVP + VBLANK/DRR cases, we assume there can only be
+ * a single VBLANK/DRR display. If DML outputs SubVP + VBLANK
+ * is supported, it is either a single VBLANK case or two VBLANK
+ * displays which are synchronized (in which case they have identical
+ * timings).
+ */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe);
+
+ // We check for master pipe, but it shouldn't matter since we only need
+ // the pipe for timing info (stream should be same for any pipe splits)
+ if (!resource_is_pipe_type(pipe, OTG_MASTER) ||
+ !resource_is_pipe_type(pipe, DPP_PIPE))
+ continue;
+
+ if (!found && pipe_mall_type == SUBVP_NONE) {
+ // Found pipe which is not SubVP or Phantom (i.e. the VBLANK pipe).
+ vblank_index = i;
+ found = true;
+ }
+
+ if (!subvp_pipe && pipe_mall_type == SUBVP_MAIN)
+ subvp_pipe = pipe;
+ }
+ if (found && subvp_pipe) {
+ phantom_stream = dc_state_get_paired_subvp_stream(context, subvp_pipe->stream);
+ main_timing = &subvp_pipe->stream->timing;
+ phantom_timing = &phantom_stream->timing;
+ vblank_timing = &context->res_ctx.pipe_ctx[vblank_index].stream->timing;
+ // Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe
+ // Also include the prefetch end to mallstart delay time
+ prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total /
+ (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 +
+ dc->caps.subvp_prefetch_end_to_mall_start_us;
+ // P-State allow width and FW delays already included phantom_timing->v_addressable
+ mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total /
+ (double)(phantom_timing->pix_clk_100hz * 100) * 1000000;
+ vblank_frame_us = vblank_timing->v_total * vblank_timing->h_total /
+ (double)(vblank_timing->pix_clk_100hz * 100) * 1000000;
+ vblank_blank_us = (vblank_timing->v_total - vblank_timing->v_addressable) * vblank_timing->h_total /
+ (double)(vblank_timing->pix_clk_100hz * 100) * 1000000;
+ subvp_active_us = main_timing->v_addressable * main_timing->h_total /
+ (double)(main_timing->pix_clk_100hz * 100) * 1000000;
+ max_vblank_mallregion = vblank_blank_us > mall_region_us ? vblank_blank_us : mall_region_us;
+
+ // Schedulable if VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time,
+ // and the max of (VBLANK blanking time, MALL region)
+ // TODO: Possibly add some margin (i.e. the below conditions should be [...] > X instead of [...] > 0)
+ if (subvp_active_us - prefetch_us - vblank_frame_us - max_vblank_mallregion > 0)
+ schedulable = true;
+ }
+ return schedulable;
+}
+
+/**
+ * subvp_subvp_admissable() - Determine if subvp + subvp config is admissible
+ *
+ * @dc: Current DC state
+ * @context: New DC state to be programmed
+ *
+ * SubVP + SubVP is admissible under the following conditions:
+ * - All SubVP pipes are < 120Hz OR
+ * - All SubVP pipes are >= 120hz
+ *
+ * Return: True if admissible, false otherwise
+ */
+static bool subvp_subvp_admissable(struct dc *dc,
+ struct dc_state *context)
+{
+ bool result = false;
+ uint32_t i;
+ uint8_t subvp_count = 0;
+ uint32_t min_refresh = subvp_high_refresh_list.min_refresh, max_refresh = 0;
+ uint64_t refresh_rate = 0;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe->stream)
+ continue;
+
+ if (pipe->plane_state && !pipe->top_pipe &&
+ dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) {
+ refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 +
+ pipe->stream->timing.v_total * (uint64_t)pipe->stream->timing.h_total - (uint64_t)1);
+ refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total);
+ refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total);
+
+ if ((uint32_t)refresh_rate < min_refresh)
+ min_refresh = (uint32_t)refresh_rate;
+ if ((uint32_t)refresh_rate > max_refresh)
+ max_refresh = (uint32_t)refresh_rate;
+ subvp_count++;
+ }
+ }
+
+ if (subvp_count == 2 && ((min_refresh < 120 && max_refresh < 120) ||
+ (min_refresh >= subvp_high_refresh_list.min_refresh &&
+ max_refresh <= subvp_high_refresh_list.max_refresh)))
+ result = true;
+
+ return result;
+}
+
+/**
+ * subvp_validate_static_schedulability - Check which SubVP case is calculated
+ * and handle static analysis based on the case.
+ * @dc: current dc state
+ * @context: new dc state
+ * @vlevel: Voltage level calculated by DML
+ *
+ * Three cases:
+ * 1. SubVP + SubVP
+ * 2. SubVP + VBLANK (DRR checked internally)
+ * 3. SubVP + VACTIVE (currently unsupported)
+ *
+ * Return: True if statically schedulable, false otherwise
+ */
+static bool subvp_validate_static_schedulability(struct dc *dc,
+ struct dc_state *context,
+ int vlevel)
+{
+ bool schedulable = false;
+ struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+ uint32_t i, pipe_idx;
+ uint8_t subvp_count = 0;
+ uint8_t vactive_count = 0;
+ uint8_t non_subvp_pipes = 0;
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe);
+
+ if (!pipe->stream)
+ continue;
+
+ if (pipe->plane_state && !pipe->top_pipe) {
+ if (pipe_mall_type == SUBVP_MAIN)
+ subvp_count++;
+ if (pipe_mall_type == SUBVP_NONE)
+ non_subvp_pipes++;
+ }
+
+ // Count how many planes that aren't SubVP/phantom are capable of VACTIVE
+ // switching (SubVP + VACTIVE unsupported). In situations where we force
+ // SubVP for a VACTIVE plane, we don't want to increment the vactive_count.
+ if (vba->ActiveDRAMClockChangeLatencyMarginPerState[vlevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] > 0 &&
+ pipe_mall_type == SUBVP_NONE) {
+ vactive_count++;
+ }
+ pipe_idx++;
+ }
+
+ if (subvp_count == 2) {
+ // Static schedulability check for SubVP + SubVP case
+ schedulable = subvp_subvp_admissable(dc, context) && subvp_subvp_schedulable(dc, context);
+ } else if (subvp_count == 1 && non_subvp_pipes == 0) {
+ // Single SubVP configs will be supported by default as long as it's suppported by DML
+ schedulable = true;
+ } else if (subvp_count == 1 && non_subvp_pipes == 1) {
+ if (dcn32_subvp_drr_admissable(dc, context))
+ schedulable = subvp_drr_schedulable(dc, context);
+ else if (dcn32_subvp_vblank_admissable(dc, context, vlevel))
+ schedulable = subvp_vblank_schedulable(dc, context);
+ } else if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_vactive_w_mall_sub_vp &&
+ vactive_count > 0) {
+ // For single display SubVP cases, DML will output dm_dram_clock_change_vactive_w_mall_sub_vp by default.
+ // We tell the difference between SubVP vs. SubVP + VACTIVE by checking the vactive_count.
+ // SubVP + VACTIVE currently unsupported
+ schedulable = false;
+ }
+ return schedulable;
+}
+
+static void assign_subvp_index(struct dc *dc, struct dc_state *context)
+{
+ int i;
+ int index = 0;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ if (resource_is_pipe_type(pipe_ctx, OTG_MASTER) &&
+ dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_MAIN) {
+ pipe_ctx->subvp_index = index++;
+ } else {
+ pipe_ctx->subvp_index = 0;
+ }
+ }
+}
+
+struct pipe_slice_table {
+ struct {
+ struct dc_stream_state *stream;
+ int slice_count;
+ } odm_combines[MAX_STREAMS];
+ int odm_combine_count;
+
+ struct {
+ struct pipe_ctx *pri_pipe;
+ struct dc_plane_state *plane;
+ int slice_count;
+ } mpc_combines[MAX_PLANES];
+ int mpc_combine_count;
+};
+
+
+static void update_slice_table_for_stream(struct pipe_slice_table *table,
+ struct dc_stream_state *stream, int diff)
+{
+ int i;
+
+ for (i = 0; i < table->odm_combine_count; i++) {
+ if (table->odm_combines[i].stream == stream) {
+ table->odm_combines[i].slice_count += diff;
+ break;
+ }
+ }
+
+ if (i == table->odm_combine_count) {
+ table->odm_combine_count++;
+ table->odm_combines[i].stream = stream;
+ table->odm_combines[i].slice_count = diff;
+ }
+}
+
+static void update_slice_table_for_plane(struct pipe_slice_table *table,
+ struct pipe_ctx *dpp_pipe, struct dc_plane_state *plane, int diff)
+{
+ int i;
+ struct pipe_ctx *pri_dpp_pipe = resource_get_primary_dpp_pipe(dpp_pipe);
+
+ for (i = 0; i < table->mpc_combine_count; i++) {
+ if (table->mpc_combines[i].plane == plane &&
+ table->mpc_combines[i].pri_pipe == pri_dpp_pipe) {
+ table->mpc_combines[i].slice_count += diff;
+ break;
+ }
+ }
+
+ if (i == table->mpc_combine_count) {
+ table->mpc_combine_count++;
+ table->mpc_combines[i].plane = plane;
+ table->mpc_combines[i].pri_pipe = pri_dpp_pipe;
+ table->mpc_combines[i].slice_count = diff;
+ }
+}
+
+static void init_pipe_slice_table_from_context(
+ struct pipe_slice_table *table,
+ struct dc_state *context)
+{
+ int i, j;
+ struct pipe_ctx *otg_master;
+ struct pipe_ctx *dpp_pipes[MAX_PIPES];
+ struct dc_stream_state *stream;
+ int count;
+
+ memset(table, 0, sizeof(*table));
+
+ for (i = 0; i < context->stream_count; i++) {
+ stream = context->streams[i];
+ otg_master = resource_get_otg_master_for_stream(
+ &context->res_ctx, stream);
+ if (!otg_master)
+ continue;
+
+ count = resource_get_odm_slice_count(otg_master);
+ update_slice_table_for_stream(table, stream, count);
+
+ count = resource_get_dpp_pipes_for_opp_head(otg_master,
+ &context->res_ctx, dpp_pipes);
+ for (j = 0; j < count; j++)
+ if (dpp_pipes[j]->plane_state)
+ update_slice_table_for_plane(table, dpp_pipes[j],
+ dpp_pipes[j]->plane_state, 1);
+ }
+}
+
+static bool update_pipe_slice_table_with_split_flags(
+ struct pipe_slice_table *table,
+ struct dc *dc,
+ struct dc_state *context,
+ struct vba_vars_st *vba,
+ int split[MAX_PIPES],
+ bool merge[MAX_PIPES])
+{
+ /* NOTE: we are deprecating the support for the concept of pipe splitting
+ * or pipe merging. Instead we append slices to the end and remove
+ * slices from the end. The following code converts a pipe split or
+ * merge to an append or remove operation.
+ *
+ * For example:
+ * When split flags describe the following pipe connection transition
+ *
+ * from:
+ * pipe 0 (split=2) -> pipe 1 (split=2)
+ * to: (old behavior)
+ * pipe 0 -> pipe 2 -> pipe 1 -> pipe 3
+ *
+ * the code below actually does:
+ * pipe 0 -> pipe 1 -> pipe 2 -> pipe 3
+ *
+ * This is the new intended behavior and for future DCNs we will retire
+ * the old concept completely.
+ */
+ struct pipe_ctx *pipe;
+ bool odm;
+ int dc_pipe_idx, dml_pipe_idx = 0;
+ bool updated = false;
+
+ for (dc_pipe_idx = 0;
+ dc_pipe_idx < dc->res_pool->pipe_count; dc_pipe_idx++) {
+ pipe = &context->res_ctx.pipe_ctx[dc_pipe_idx];
+ if (resource_is_pipe_type(pipe, FREE_PIPE))
+ continue;
+
+ if (merge[dc_pipe_idx]) {
+ if (resource_is_pipe_type(pipe, OPP_HEAD))
+ /* merging OPP head means reducing ODM slice
+ * count by 1
+ */
+ update_slice_table_for_stream(table, pipe->stream, -1);
+ else if (resource_is_pipe_type(pipe, DPP_PIPE) &&
+ resource_get_odm_slice_index(resource_get_opp_head(pipe)) == 0)
+ /* merging DPP pipe of the first ODM slice means
+ * reducing MPC slice count by 1
+ */
+ update_slice_table_for_plane(table, pipe, pipe->plane_state, -1);
+ updated = true;
+ }
+
+ if (split[dc_pipe_idx]) {
+ odm = vba->ODMCombineEnabled[vba->pipe_plane[dml_pipe_idx]] !=
+ dm_odm_combine_mode_disabled;
+ if (odm && resource_is_pipe_type(pipe, OPP_HEAD))
+ update_slice_table_for_stream(
+ table, pipe->stream, split[dc_pipe_idx] - 1);
+ else if (!odm && resource_is_pipe_type(pipe, DPP_PIPE))
+ update_slice_table_for_plane(table, pipe,
+ pipe->plane_state, split[dc_pipe_idx] - 1);
+ updated = true;
+ }
+ dml_pipe_idx++;
+ }
+ return updated;
+}
+
+static void update_pipes_with_slice_table(struct dc *dc, struct dc_state *context,
+ struct pipe_slice_table *table)
+{
+ int i;
+
+ for (i = 0; i < table->odm_combine_count; i++)
+ resource_update_pipes_for_stream_with_slice_count(context,
+ dc->current_state, dc->res_pool,
+ table->odm_combines[i].stream,
+ table->odm_combines[i].slice_count);
+
+ for (i = 0; i < table->mpc_combine_count; i++)
+ resource_update_pipes_for_plane_with_slice_count(context,
+ dc->current_state, dc->res_pool,
+ table->mpc_combines[i].plane,
+ table->mpc_combines[i].slice_count);
+}
+
+static bool update_pipes_with_split_flags(struct dc *dc, struct dc_state *context,
+ struct vba_vars_st *vba, int split[MAX_PIPES],
+ bool merge[MAX_PIPES])
+{
+ struct pipe_slice_table slice_table;
+ bool updated;
+
+ init_pipe_slice_table_from_context(&slice_table, context);
+ updated = update_pipe_slice_table_with_split_flags(
+ &slice_table, dc, context, vba,
+ split, merge);
+ update_pipes_with_slice_table(dc, context, &slice_table);
+ return updated;
+}
+
+static bool should_apply_odm_power_optimization(struct dc *dc,
+ struct dc_state *context, struct vba_vars_st *v, int *split,
+ bool *merge)
+{
+ struct dc_stream_state *stream = context->streams[0];
+ struct pipe_slice_table slice_table;
+ int i;
+
+ /*
+ * this debug flag allows us to disable ODM power optimization feature
+ * unconditionally. we force the feature off if this is set to false.
+ */
+ if (!dc->debug.enable_single_display_2to1_odm_policy)
+ return false;
+
+ /* current design and test coverage is only limited to allow ODM power
+ * optimization for single stream. Supporting it for multiple streams
+ * use case would require additional algorithm to decide how to
+ * optimize power consumption when there are not enough free pipes to
+ * allocate for all the streams. This level of optimization would
+ * require multiple attempts of revalidation to make an optimized
+ * decision. Unfortunately We do not support revalidation flow in
+ * current version of DML.
+ */
+ if (context->stream_count != 1)
+ return false;
+
+ /*
+ * Our hardware doesn't support ODM for HDMI TMDS
+ */
+ if (dc_is_hdmi_signal(stream->signal))
+ return false;
+
+ /*
+ * ODM Combine 2:1 requires horizontal timing divisible by 2 so each
+ * ODM segment has the same size.
+ */
+ if (!is_h_timing_divisible_by_2(stream))
+ return false;
+
+ /*
+ * No power benefits if the timing's pixel clock is not high enough to
+ * raise display clock from minimum power state.
+ */
+ if (stream->timing.pix_clk_100hz * 100 <= DCN3_2_VMIN_DISPCLK_HZ)
+ return false;
+
+ if (dc->config.enable_windowed_mpo_odm) {
+ /*
+ * ODM power optimization should only be allowed if the feature
+ * can be seamlessly toggled off within an update. This would
+ * require that the feature is applied on top of a minimal
+ * state. A minimal state is defined as a state validated
+ * without the need of pipe split. Therefore, when transition to
+ * toggle the feature off, the same stream and plane
+ * configuration can be supported by the pipe resource in the
+ * first ODM slice alone without the need to acquire extra
+ * resources.
+ */
+ init_pipe_slice_table_from_context(&slice_table, context);
+ update_pipe_slice_table_with_split_flags(
+ &slice_table, dc, context, v,
+ split, merge);
+ for (i = 0; i < slice_table.mpc_combine_count; i++)
+ if (slice_table.mpc_combines[i].slice_count > 1)
+ return false;
+
+ for (i = 0; i < slice_table.odm_combine_count; i++)
+ if (slice_table.odm_combines[i].slice_count > 1)
+ return false;
+ } else {
+ /*
+ * the new ODM power optimization feature reduces software
+ * design limitation and allows ODM power optimization to be
+ * supported even with presence of overlay planes. The new
+ * feature is enabled based on enable_windowed_mpo_odm flag. If
+ * the flag is not set, we limit our feature scope due to
+ * previous software design limitation
+ */
+ if (context->stream_status[0].plane_count != 1)
+ return false;
+
+ if (memcmp(&context->stream_status[0].plane_states[0]->clip_rect,
+ &stream->src, sizeof(struct rect)) != 0)
+ return false;
+
+ if (stream->src.width >= 5120 &&
+ stream->src.width > stream->dst.width)
+ return false;
+ }
+ return true;
+}
+
+static void try_odm_power_optimization_and_revalidate(
+ struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int *split,
+ bool *merge,
+ unsigned int *vlevel,
+ int pipe_cnt)
+{
+ int i;
+ unsigned int new_vlevel;
+ unsigned int cur_policy[MAX_PIPES];
+
+ for (i = 0; i < pipe_cnt; i++) {
+ cur_policy[i] = pipes[i].pipe.dest.odm_combine_policy;
+ pipes[i].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1;
+ }
+
+ new_vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
+
+ if (new_vlevel < context->bw_ctx.dml.soc.num_states) {
+ memset(split, 0, MAX_PIPES * sizeof(int));
+ memset(merge, 0, MAX_PIPES * sizeof(bool));
+ *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, new_vlevel, split, merge);
+ context->bw_ctx.dml.vba.VoltageLevel = *vlevel;
+ } else {
+ for (i = 0; i < pipe_cnt; i++)
+ pipes[i].pipe.dest.odm_combine_policy = cur_policy[i];
+ }
+}
+
+static bool is_test_pattern_enabled(
+ struct dc_state *context)
+{
+ int i;
+
+ for (i = 0; i < context->stream_count; i++) {
+ if (context->streams[i]->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE)
+ return true;
+ }
+
+ return false;
+}
+
+static bool dcn32_full_validate_bw_helper(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int *vlevel,
+ int *split,
+ bool *merge,
+ int *pipe_cnt,
+ bool *repopulate_pipes)
+{
+ struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+ unsigned int dc_pipe_idx = 0;
+ int i = 0;
+ bool found_supported_config = false;
+ int vlevel_temp = 0;
+
+ dc_assert_fp_enabled();
+
+ /*
+ * DML favors voltage over p-state, but we're more interested in
+ * supporting p-state over voltage. We can't support p-state in
+ * prefetch mode > 0 so try capping the prefetch mode to start.
+ * Override present for testing.
+ */
+ if (dc->debug.dml_disallow_alternate_prefetch_modes)
+ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
+ dm_prefetch_support_uclk_fclk_and_stutter;
+ else
+ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
+ dm_prefetch_support_uclk_fclk_and_stutter_if_possible;
+
+ *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt);
+ /* This may adjust vlevel and maxMpcComb */
+ if (*vlevel < context->bw_ctx.dml.soc.num_states) {
+ *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge);
+ vba->VoltageLevel = *vlevel;
+ }
+
+ /* Apply split and merge flags before checking for subvp */
+ if (!dcn32_apply_merge_split_flags_helper(dc, context, repopulate_pipes, split, merge))
+ return false;
+ memset(split, 0, MAX_PIPES * sizeof(int));
+ memset(merge, 0, MAX_PIPES * sizeof(bool));
+
+ /* Conditions for setting up phantom pipes for SubVP:
+ * 1. Not force disable SubVP
+ * 2. Full update (i.e. DC_VALIDATE_MODE_AND_PROGRAMMING)
+ * 3. Enough pipes are available to support SubVP (TODO: Which pipes will use VACTIVE / VBLANK / SUBVP?)
+ * 4. Display configuration passes validation
+ * 5. (Config doesn't support MCLK in VACTIVE/VBLANK || dc->debug.force_subvp_mclk_switch)
+ */
+ if (!dc->debug.force_disable_subvp && !dc->caps.dmub_caps.gecc_enable && dcn32_all_pipes_have_stream_and_plane(dc, context) &&
+ !dcn32_mpo_in_use(context) && !dcn32_any_surfaces_rotated(dc, context) && !is_test_pattern_enabled(context) &&
+ (*vlevel == context->bw_ctx.dml.soc.num_states || (vba->DRAMSpeedPerState[*vlevel] != vba->DRAMSpeedPerState[0] &&
+ vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] != dm_dram_clock_change_unsupported) ||
+ vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported ||
+ dc->debug.force_subvp_mclk_switch)) {
+
+ vlevel_temp = *vlevel;
+
+ while (!found_supported_config && dcn32_enough_pipes_for_subvp(dc, context) &&
+ dcn32_assign_subvp_pipe(dc, context, &dc_pipe_idx)) {
+ /* For the case where *vlevel = num_states, bandwidth validation has failed for this config.
+ * Adding phantom pipes won't change the validation result, so change the DML input param
+ * for P-State support before adding phantom pipes and recalculating the DML result.
+ * However, this case is only applicable for SubVP + DRR cases because the prefetch mode
+ * will not allow for switch in VBLANK. The DRR display must have it's VBLANK stretched
+ * enough to support MCLK switching.
+ */
+ if (*vlevel == context->bw_ctx.dml.soc.num_states &&
+ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final ==
+ dm_prefetch_support_uclk_fclk_and_stutter) {
+ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
+ dm_prefetch_support_fclk_and_stutter;
+ /* There are params (such as FabricClock) that need to be recalculated
+ * after validation fails (otherwise it will be 0). Calculation for
+ * phantom vactive requires call into DML, so we must ensure all the
+ * vba params are valid otherwise we'll get incorrect phantom vactive.
+ */
+ *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt);
+ }
+
+ dc->res_pool->funcs->add_phantom_pipes(dc, context, pipes, *pipe_cnt, dc_pipe_idx);
+
+ *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes,
+ DC_VALIDATE_MODE_AND_PROGRAMMING);
+ // Populate dppclk to trigger a recalculate in dml_get_voltage_level
+ // so the phantom pipe DLG params can be assigned correctly.
+ pipes[0].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, *pipe_cnt, 0);
+ *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt);
+
+ /* Check that vlevel requested supports pstate or not
+ * if not, select the lowest vlevel that supports it
+ */
+ for (i = *vlevel; i < context->bw_ctx.dml.soc.num_states; i++) {
+ if (vba->DRAMClockChangeSupport[i][vba->maxMpcComb] != dm_dram_clock_change_unsupported) {
+ *vlevel = i;
+ break;
+ }
+ }
+
+ if (*vlevel < context->bw_ctx.dml.soc.num_states
+ && subvp_validate_static_schedulability(dc, context, *vlevel))
+ found_supported_config = true;
+ if (found_supported_config) {
+ // For SubVP + DRR cases, we can force the lowest vlevel that supports the mode
+ if (dcn32_subvp_drr_admissable(dc, context) && subvp_drr_schedulable(dc, context)) {
+ /* find lowest vlevel that supports the config */
+ for (i = *vlevel; i >= 0; i--) {
+ if (vba->ModeSupport[i][vba->maxMpcComb]) {
+ *vlevel = i;
+ } else {
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ if (vba->DRAMSpeedPerState[*vlevel] >= vba->DRAMSpeedPerState[vlevel_temp])
+ found_supported_config = false;
+
+ // If SubVP pipe config is unsupported (or cannot be used for UCLK switching)
+ // remove phantom pipes and repopulate dml pipes
+ if (!found_supported_config) {
+ dc_state_remove_phantom_streams_and_planes(dc, context);
+ dc_state_release_phantom_streams_and_planes(dc, context);
+ vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] = dm_dram_clock_change_unsupported;
+ *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes,
+ DC_VALIDATE_MODE_AND_PROGRAMMING);
+
+ *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt);
+ /* This may adjust vlevel and maxMpcComb */
+ if (*vlevel < context->bw_ctx.dml.soc.num_states) {
+ *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge);
+ vba->VoltageLevel = *vlevel;
+ }
+ } else {
+ // Most populate phantom DLG params before programming hardware / timing for phantom pipe
+ dcn32_helper_populate_phantom_dlg_params(dc, context, pipes, *pipe_cnt);
+
+ /* Call validate_apply_pipe_split flags after calling DML getters for
+ * phantom dlg params, or some of the VBA params indicating pipe split
+ * can be overwritten by the getters.
+ *
+ * When setting up SubVP config, all pipes are merged before attempting to
+ * add phantom pipes. If pipe split (ODM / MPC) is required, both the main
+ * and phantom pipes will be split in the regular pipe splitting sequence.
+ */
+ *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge);
+ vba->VoltageLevel = *vlevel;
+ // Note: We can't apply the phantom pipes to hardware at this time. We have to wait
+ // until driver has acquired the DMCUB lock to do it safely.
+ assign_subvp_index(dc, context);
+ }
+ }
+
+ if (should_apply_odm_power_optimization(dc, context, vba, split, merge))
+ try_odm_power_optimization_and_revalidate(
+ dc, context, pipes, split, merge, vlevel, *pipe_cnt);
+
+ return true;
+}
+
+static bool is_dtbclk_required(struct dc *dc, struct dc_state *context)
+{
+ int i;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+ if (dc->link_srv->dp_is_128b_132b_signal(&context->res_ctx.pipe_ctx[i]))
+ return true;
+ }
+ return false;
+}
+
+static void dcn20_adjust_freesync_v_startup(const struct dc_crtc_timing *dc_crtc_timing, int *vstartup_start)
+{
+ struct dc_crtc_timing patched_crtc_timing;
+ uint32_t asic_blank_end = 0;
+ uint32_t asic_blank_start = 0;
+ uint32_t newVstartup = 0;
+
+ patched_crtc_timing = *dc_crtc_timing;
+
+ if (patched_crtc_timing.flags.INTERLACE == 1) {
+ if (patched_crtc_timing.v_front_porch < 2)
+ patched_crtc_timing.v_front_porch = 2;
+ } else {
+ if (patched_crtc_timing.v_front_porch < 1)
+ patched_crtc_timing.v_front_porch = 1;
+ }
+
+ /* blank_start = frame end - front porch */
+ asic_blank_start = patched_crtc_timing.v_total -
+ patched_crtc_timing.v_front_porch;
+
+ /* blank_end = blank_start - active */
+ asic_blank_end = asic_blank_start -
+ patched_crtc_timing.v_border_bottom -
+ patched_crtc_timing.v_addressable -
+ patched_crtc_timing.v_border_top;
+
+ newVstartup = asic_blank_end + (patched_crtc_timing.v_total - asic_blank_start);
+
+ *vstartup_start = ((newVstartup > *vstartup_start) ? newVstartup : *vstartup_start);
+}
+
+static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt, int vlevel)
+{
+ int i, pipe_idx, active_hubp_count = 0;
+ bool usr_retraining_support = false;
+ bool unbounded_req_enabled = false;
+ struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+
+ dc_assert_fp_enabled();
+
+ /* Writeback MCIF_WB arbitration parameters */
+ dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt);
+
+ context->bw_ctx.bw.dcn.clk.dispclk_khz = context->bw_ctx.dml.vba.DISPCLK * 1000;
+ context->bw_ctx.bw.dcn.clk.dcfclk_khz = context->bw_ctx.dml.vba.DCFCLK * 1000;
+ context->bw_ctx.bw.dcn.clk.socclk_khz = context->bw_ctx.dml.vba.SOCCLK * 1000;
+ context->bw_ctx.bw.dcn.clk.dramclk_khz = context->bw_ctx.dml.vba.DRAMSpeed * 1000 / 16;
+ context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = context->bw_ctx.dml.vba.DCFCLKDeepSleep * 1000;
+ context->bw_ctx.bw.dcn.clk.fclk_khz = context->bw_ctx.dml.vba.FabricClock * 1000;
+ context->bw_ctx.bw.dcn.clk.p_state_change_support =
+ context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb]
+ != dm_dram_clock_change_unsupported;
+
+ /* Pstate change might not be supported by hardware, but it might be
+ * possible with firmware driven vertical blank stretching.
+ */
+ context->bw_ctx.bw.dcn.clk.p_state_change_support |= context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching;
+
+ context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;
+ context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context);
+ context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = context->bw_ctx.dml.vba.DTBCLKPerState[vlevel] * 1000;
+ if (context->bw_ctx.dml.vba.FCLKChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_fclock_change_unsupported)
+ context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = false;
+ else
+ context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = true;
+
+ usr_retraining_support = context->bw_ctx.dml.vba.USRRetrainingSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
+ ASSERT(usr_retraining_support);
+
+ if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz)
+ context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz;
+
+ unbounded_req_enabled = get_unbounded_request_enabled(&context->bw_ctx.dml, pipes, pipe_cnt);
+
+ if (unbounded_req_enabled && pipe_cnt > 1) {
+ // Unbounded requesting should not ever be used when more than 1 pipe is enabled.
+ ASSERT(false);
+ unbounded_req_enabled = false;
+ }
+
+ context->bw_ctx.bw.dcn.mall_ss_size_bytes = 0;
+ context->bw_ctx.bw.dcn.mall_ss_psr_active_size_bytes = 0;
+ context->bw_ctx.bw.dcn.mall_subvp_size_bytes = 0;
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+ if (context->res_ctx.pipe_ctx[i].plane_state)
+ active_hubp_count++;
+ pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt,
+ pipe_idx);
+ pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt,
+ pipe_idx);
+ pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt,
+ pipe_idx);
+ pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt,
+ pipe_idx);
+
+ if (dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM) {
+ // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests
+ context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0;
+ context->res_ctx.pipe_ctx[i].unbounded_req = false;
+ } else {
+ context->res_ctx.pipe_ctx[i].det_buffer_size_kb = get_det_buffer_size_kbytes(&context->bw_ctx.dml, pipes, pipe_cnt,
+ pipe_idx);
+ context->res_ctx.pipe_ctx[i].unbounded_req = unbounded_req_enabled;
+ }
+
+ if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
+ context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
+ if (context->res_ctx.pipe_ctx[i].plane_state)
+ context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
+ else
+ context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = 0;
+ context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest;
+
+ context->res_ctx.pipe_ctx[i].surface_size_in_mall_bytes = get_surface_size_in_mall(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+
+ if (vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] > 0)
+ context->res_ctx.pipe_ctx[i].has_vactive_margin = true;
+ else
+ context->res_ctx.pipe_ctx[i].has_vactive_margin = false;
+
+ /* MALL Allocation Sizes */
+ /* count from active, top pipes per plane only */
+ if (context->res_ctx.pipe_ctx[i].stream && context->res_ctx.pipe_ctx[i].plane_state &&
+ (context->res_ctx.pipe_ctx[i].top_pipe == NULL ||
+ context->res_ctx.pipe_ctx[i].plane_state != context->res_ctx.pipe_ctx[i].top_pipe->plane_state) &&
+ context->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) {
+ /* SS: all active surfaces stored in MALL */
+ if (dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) != SUBVP_PHANTOM) {
+ context->bw_ctx.bw.dcn.mall_ss_size_bytes += context->res_ctx.pipe_ctx[i].surface_size_in_mall_bytes;
+
+ if (context->res_ctx.pipe_ctx[i].stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED) {
+ /* SS PSR On: all active surfaces part of streams not supporting PSR stored in MALL */
+ context->bw_ctx.bw.dcn.mall_ss_psr_active_size_bytes += context->res_ctx.pipe_ctx[i].surface_size_in_mall_bytes;
+ }
+ } else {
+ /* SUBVP: phantom surfaces only stored in MALL */
+ context->bw_ctx.bw.dcn.mall_subvp_size_bytes += context->res_ctx.pipe_ctx[i].surface_size_in_mall_bytes;
+ }
+ }
+
+ if (context->res_ctx.pipe_ctx[i].stream->adaptive_sync_infopacket.valid)
+ dcn20_adjust_freesync_v_startup(
+ &context->res_ctx.pipe_ctx[i].stream->timing,
+ &context->res_ctx.pipe_ctx[i].pipe_dlg_param.vstartup_start);
+
+ pipe_idx++;
+ }
+ /* If DCN isn't making memory requests we can allow pstate change and lower clocks */
+ if (!active_hubp_count) {
+ context->bw_ctx.bw.dcn.clk.socclk_khz = 0;
+ context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;
+ context->bw_ctx.bw.dcn.clk.dcfclk_khz = 0;
+ context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = 0;
+ context->bw_ctx.bw.dcn.clk.dramclk_khz = 0;
+ context->bw_ctx.bw.dcn.clk.fclk_khz = 0;
+ context->bw_ctx.bw.dcn.clk.p_state_change_support = true;
+ context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = true;
+ }
+ /*save a original dppclock copy*/
+ context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz;
+ context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz;
+ context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dppclk_mhz
+ * 1000;
+ context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dispclk_mhz
+ * 1000;
+
+ context->bw_ctx.bw.dcn.clk.num_ways = dcn32_helper_calculate_num_ways_for_subvp(dc, context);
+
+ context->bw_ctx.bw.dcn.compbuf_size_kb = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (context->res_ctx.pipe_ctx[i].stream)
+ context->bw_ctx.bw.dcn.compbuf_size_kb -= context->res_ctx.pipe_ctx[i].det_buffer_size_kb;
+ }
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+
+ context->bw_ctx.dml.funcs.rq_dlg_get_dlg_reg_v2(&context->bw_ctx.dml,
+ &context->res_ctx.pipe_ctx[i].dlg_regs, &context->res_ctx.pipe_ctx[i].ttu_regs, pipes,
+ pipe_cnt, pipe_idx);
+
+ context->bw_ctx.dml.funcs.rq_dlg_get_rq_reg_v2(&context->res_ctx.pipe_ctx[i].rq_regs,
+ &context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+ pipe_idx++;
+ }
+}
+
+static struct pipe_ctx *dcn32_find_split_pipe(
+ struct dc *dc,
+ struct dc_state *context,
+ int old_index)
+{
+ struct pipe_ctx *pipe = NULL;
+ int i;
+
+ if (old_index >= 0 && context->res_ctx.pipe_ctx[old_index].stream == NULL) {
+ pipe = &context->res_ctx.pipe_ctx[old_index];
+ pipe->pipe_idx = old_index;
+ }
+
+ if (!pipe)
+ for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) {
+ if (dc->current_state->res_ctx.pipe_ctx[i].top_pipe == NULL
+ && dc->current_state->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) {
+ if (context->res_ctx.pipe_ctx[i].stream == NULL) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ pipe->pipe_idx = i;
+ break;
+ }
+ }
+ }
+
+ /*
+ * May need to fix pipes getting tossed from 1 opp to another on flip
+ * Add for debugging transient underflow during topology updates:
+ * ASSERT(pipe);
+ */
+ if (!pipe)
+ for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) {
+ if (context->res_ctx.pipe_ctx[i].stream == NULL) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ pipe->pipe_idx = i;
+ break;
+ }
+ }
+
+ return pipe;
+}
+
+static bool dcn32_split_stream_for_mpc_or_odm(
+ const struct dc *dc,
+ struct resource_context *res_ctx,
+ struct pipe_ctx *pri_pipe,
+ struct pipe_ctx *sec_pipe,
+ bool odm)
+{
+ int pipe_idx = sec_pipe->pipe_idx;
+ const struct resource_pool *pool = dc->res_pool;
+
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ if (odm && pri_pipe->plane_state) {
+ /* ODM + window MPO, where MPO window is on left half only */
+ if (pri_pipe->plane_state->clip_rect.x + pri_pipe->plane_state->clip_rect.width <=
+ pri_pipe->stream->src.x + pri_pipe->stream->src.width/2) {
+
+ DC_LOG_SCALER("%s - ODM + window MPO(left). pri_pipe:%d\n",
+ __func__,
+ pri_pipe->pipe_idx);
+ return true;
+ }
+
+ /* ODM + window MPO, where MPO window is on right half only */
+ if (pri_pipe->plane_state->clip_rect.x >= pri_pipe->stream->src.x + pri_pipe->stream->src.width/2) {
+
+ DC_LOG_SCALER("%s - ODM + window MPO(right). pri_pipe:%d\n",
+ __func__,
+ pri_pipe->pipe_idx);
+ return true;
+ }
+ }
+
+ *sec_pipe = *pri_pipe;
+
+ sec_pipe->pipe_idx = pipe_idx;
+ sec_pipe->plane_res.mi = pool->mis[pipe_idx];
+ sec_pipe->plane_res.hubp = pool->hubps[pipe_idx];
+ sec_pipe->plane_res.ipp = pool->ipps[pipe_idx];
+ sec_pipe->plane_res.xfm = pool->transforms[pipe_idx];
+ sec_pipe->plane_res.dpp = pool->dpps[pipe_idx];
+ sec_pipe->plane_res.mpcc_inst = pool->dpps[pipe_idx]->inst;
+ sec_pipe->stream_res.dsc = NULL;
+ if (odm) {
+ if (pri_pipe->next_odm_pipe) {
+ ASSERT(pri_pipe->next_odm_pipe != sec_pipe);
+ sec_pipe->next_odm_pipe = pri_pipe->next_odm_pipe;
+ sec_pipe->next_odm_pipe->prev_odm_pipe = sec_pipe;
+ }
+ if (pri_pipe->top_pipe && pri_pipe->top_pipe->next_odm_pipe) {
+ pri_pipe->top_pipe->next_odm_pipe->bottom_pipe = sec_pipe;
+ sec_pipe->top_pipe = pri_pipe->top_pipe->next_odm_pipe;
+ }
+ if (pri_pipe->bottom_pipe && pri_pipe->bottom_pipe->next_odm_pipe) {
+ pri_pipe->bottom_pipe->next_odm_pipe->top_pipe = sec_pipe;
+ sec_pipe->bottom_pipe = pri_pipe->bottom_pipe->next_odm_pipe;
+ }
+ pri_pipe->next_odm_pipe = sec_pipe;
+ sec_pipe->prev_odm_pipe = pri_pipe;
+ ASSERT(sec_pipe->top_pipe == NULL);
+
+ if (!sec_pipe->top_pipe)
+ sec_pipe->stream_res.opp = pool->opps[pipe_idx];
+ else
+ sec_pipe->stream_res.opp = sec_pipe->top_pipe->stream_res.opp;
+ if (sec_pipe->stream->timing.flags.DSC == 1) {
+ dcn20_acquire_dsc(dc, res_ctx, &sec_pipe->stream_res.dsc, pipe_idx);
+ ASSERT(sec_pipe->stream_res.dsc);
+ if (sec_pipe->stream_res.dsc == NULL)
+ return false;
+ }
+ } else {
+ if (pri_pipe->bottom_pipe) {
+ ASSERT(pri_pipe->bottom_pipe != sec_pipe);
+ sec_pipe->bottom_pipe = pri_pipe->bottom_pipe;
+ sec_pipe->bottom_pipe->top_pipe = sec_pipe;
+ }
+ pri_pipe->bottom_pipe = sec_pipe;
+ sec_pipe->top_pipe = pri_pipe;
+
+ ASSERT(pri_pipe->plane_state);
+ }
+
+ return true;
+}
+
+static bool dcn32_apply_merge_split_flags_helper(
+ struct dc *dc,
+ struct dc_state *context,
+ bool *repopulate_pipes,
+ int *split,
+ bool *merge)
+{
+ int i, pipe_idx;
+ bool newly_split[MAX_PIPES] = { false };
+ struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+
+ if (dc->config.enable_windowed_mpo_odm) {
+ if (update_pipes_with_split_flags(
+ dc, context, vba, split, merge))
+ *repopulate_pipes = true;
+ } else {
+
+ /* the code below will be removed once windowed mpo odm is fully
+ * enabled.
+ */
+ /* merge pipes if necessary */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ /*skip pipes that don't need merging*/
+ if (!merge[i])
+ continue;
+
+ /* if ODM merge we ignore mpc tree, mpo pipes will have their own flags */
+ if (pipe->prev_odm_pipe) {
+ /*split off odm pipe*/
+ pipe->prev_odm_pipe->next_odm_pipe = pipe->next_odm_pipe;
+ if (pipe->next_odm_pipe)
+ pipe->next_odm_pipe->prev_odm_pipe = pipe->prev_odm_pipe;
+
+ /*2:1ODM+MPC Split MPO to Single Pipe + MPC Split MPO*/
+ if (pipe->bottom_pipe) {
+ if (pipe->bottom_pipe->prev_odm_pipe || pipe->bottom_pipe->next_odm_pipe) {
+ /*MPC split rules will handle this case*/
+ pipe->bottom_pipe->top_pipe = NULL;
+ } else {
+ /* when merging an ODM pipes, the bottom MPC pipe must now point to
+ * the previous ODM pipe and its associated stream assets
+ */
+ if (pipe->prev_odm_pipe->bottom_pipe) {
+ /* 3 plane MPO*/
+ pipe->bottom_pipe->top_pipe = pipe->prev_odm_pipe->bottom_pipe;
+ pipe->prev_odm_pipe->bottom_pipe->bottom_pipe = pipe->bottom_pipe;
+ } else {
+ /* 2 plane MPO*/
+ pipe->bottom_pipe->top_pipe = pipe->prev_odm_pipe;
+ pipe->prev_odm_pipe->bottom_pipe = pipe->bottom_pipe;
+ }
+
+ memcpy(&pipe->bottom_pipe->stream_res, &pipe->bottom_pipe->top_pipe->stream_res, sizeof(struct stream_resource));
+ }
+ }
+
+ if (pipe->top_pipe) {
+ pipe->top_pipe->bottom_pipe = NULL;
+ }
+
+ pipe->bottom_pipe = NULL;
+ pipe->next_odm_pipe = NULL;
+ pipe->plane_state = NULL;
+ pipe->stream = NULL;
+ pipe->top_pipe = NULL;
+ pipe->prev_odm_pipe = NULL;
+ if (pipe->stream_res.dsc)
+ dcn20_release_dsc(&context->res_ctx, dc->res_pool, &pipe->stream_res.dsc);
+ memset(&pipe->plane_res, 0, sizeof(pipe->plane_res));
+ memset(&pipe->stream_res, 0, sizeof(pipe->stream_res));
+ memset(&pipe->link_res, 0, sizeof(pipe->link_res));
+ *repopulate_pipes = true;
+ } else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) {
+ struct pipe_ctx *top_pipe = pipe->top_pipe;
+ struct pipe_ctx *bottom_pipe = pipe->bottom_pipe;
+
+ top_pipe->bottom_pipe = bottom_pipe;
+ if (bottom_pipe)
+ bottom_pipe->top_pipe = top_pipe;
+
+ pipe->top_pipe = NULL;
+ pipe->bottom_pipe = NULL;
+ pipe->plane_state = NULL;
+ pipe->stream = NULL;
+ memset(&pipe->plane_res, 0, sizeof(pipe->plane_res));
+ memset(&pipe->stream_res, 0, sizeof(pipe->stream_res));
+ memset(&pipe->link_res, 0, sizeof(pipe->link_res));
+ *repopulate_pipes = true;
+ } else
+ ASSERT(0); /* Should never try to merge master pipe */
+
+ }
+
+ for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *hsplit_pipe = NULL;
+ bool odm;
+ int old_index = -1;
+
+ if (!pipe->stream || newly_split[i])
+ continue;
+
+ pipe_idx++;
+ odm = vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled;
+
+ if (!pipe->plane_state && !odm)
+ continue;
+
+ if (split[i]) {
+ if (odm) {
+ if (split[i] == 4 && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe)
+ old_index = old_pipe->next_odm_pipe->next_odm_pipe->pipe_idx;
+ else if (old_pipe->next_odm_pipe)
+ old_index = old_pipe->next_odm_pipe->pipe_idx;
+ } else {
+ if (split[i] == 4 && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe &&
+ old_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
+ old_index = old_pipe->bottom_pipe->bottom_pipe->pipe_idx;
+ else if (old_pipe->bottom_pipe &&
+ old_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
+ old_index = old_pipe->bottom_pipe->pipe_idx;
+ }
+ hsplit_pipe = dcn32_find_split_pipe(dc, context, old_index);
+ ASSERT(hsplit_pipe);
+ if (!hsplit_pipe)
+ return false;
+
+ if (!dcn32_split_stream_for_mpc_or_odm(
+ dc, &context->res_ctx,
+ pipe, hsplit_pipe, odm))
+ return false;
+
+ newly_split[hsplit_pipe->pipe_idx] = true;
+ *repopulate_pipes = true;
+ }
+ if (split[i] == 4) {
+ struct pipe_ctx *pipe_4to1;
+
+ if (odm && old_pipe->next_odm_pipe)
+ old_index = old_pipe->next_odm_pipe->pipe_idx;
+ else if (!odm && old_pipe->bottom_pipe &&
+ old_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
+ old_index = old_pipe->bottom_pipe->pipe_idx;
+ else
+ old_index = -1;
+ pipe_4to1 = dcn32_find_split_pipe(dc, context, old_index);
+ ASSERT(pipe_4to1);
+ if (!pipe_4to1)
+ return false;
+ if (!dcn32_split_stream_for_mpc_or_odm(
+ dc, &context->res_ctx,
+ pipe, pipe_4to1, odm))
+ return false;
+ newly_split[pipe_4to1->pipe_idx] = true;
+
+ if (odm && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe
+ && old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe)
+ old_index = old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe->pipe_idx;
+ else if (!odm && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe &&
+ old_pipe->bottom_pipe->bottom_pipe->bottom_pipe &&
+ old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
+ old_index = old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->pipe_idx;
+ else
+ old_index = -1;
+ pipe_4to1 = dcn32_find_split_pipe(dc, context, old_index);
+ ASSERT(pipe_4to1);
+ if (!pipe_4to1)
+ return false;
+ if (!dcn32_split_stream_for_mpc_or_odm(
+ dc, &context->res_ctx,
+ hsplit_pipe, pipe_4to1, odm))
+ return false;
+ newly_split[pipe_4to1->pipe_idx] = true;
+ }
+ if (odm)
+ dcn20_build_mapped_resource(dc, context, pipe->stream);
+ }
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe->plane_state) {
+ if (!resource_build_scaling_params(pipe))
+ return false;
+ }
+ }
+
+ for (i = 0; i < context->stream_count; i++) {
+ struct pipe_ctx *otg_master = resource_get_otg_master_for_stream(&context->res_ctx,
+ context->streams[i]);
+
+ if (otg_master)
+ resource_build_test_pattern_params(&context->res_ctx, otg_master);
+ }
+ }
+ return true;
+}
+
+bool dcn32_internal_validate_bw(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int *pipe_cnt_out,
+ int *vlevel_out,
+ enum dc_validate_mode validate_mode)
+{
+ bool out = false;
+ bool repopulate_pipes = false;
+ int split[MAX_PIPES] = { 0 };
+ bool merge[MAX_PIPES] = { false };
+ int pipe_cnt, i, pipe_idx;
+ int vlevel = context->bw_ctx.dml.soc.num_states;
+ struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+
+ dc_assert_fp_enabled();
+
+ ASSERT(pipes);
+ if (!pipes)
+ return false;
+
+ /* For each full update, remove all existing phantom pipes first */
+ dc_state_remove_phantom_streams_and_planes(dc, context);
+ dc_state_release_phantom_streams_and_planes(dc, context);
+
+ dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
+
+ for (i = 0; i < context->stream_count; i++)
+ resource_update_pipes_for_stream_with_slice_count(context, dc->current_state, dc->res_pool, context->streams[i], 1);
+ pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, validate_mode);
+
+ if (!pipe_cnt) {
+ out = true;
+ goto validate_out;
+ }
+
+ dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt);
+ context->bw_ctx.dml.soc.max_vratio_pre = dcn32_determine_max_vratio_prefetch(dc, context);
+
+ if (validate_mode == DC_VALIDATE_MODE_AND_PROGRAMMING) {
+ if (!dcn32_full_validate_bw_helper(dc, context, pipes, &vlevel, split, merge,
+ &pipe_cnt, &repopulate_pipes))
+ goto validate_fail;
+ }
+
+ if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING ||
+ (dc->debug.dml_disallow_alternate_prefetch_modes &&
+ (vlevel == context->bw_ctx.dml.soc.num_states ||
+ vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported))) {
+ /*
+ * If dml_disallow_alternate_prefetch_modes is false, then we have already
+ * tried alternate prefetch modes during full validation.
+ *
+ * If mode is unsupported or there is no p-state support, then
+ * fall back to favouring voltage.
+ *
+ * If Prefetch mode 0 failed for this config, or passed with Max UCLK, then try
+ * to support with Prefetch mode 1 (dm_prefetch_support_fclk_and_stutter == 2)
+ */
+ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
+ dm_prefetch_support_none;
+
+ context->bw_ctx.dml.validate_max_state = (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING);
+ vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
+
+ context->bw_ctx.dml.validate_max_state = false;
+
+ if (vlevel < context->bw_ctx.dml.soc.num_states) {
+ memset(split, 0, sizeof(split));
+ memset(merge, 0, sizeof(merge));
+ vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge);
+ /* dcn20_validate_apply_pipe_split_flags can modify voltage level outside of DML */
+ vba->VoltageLevel = vlevel;
+ }
+ }
+
+ dml_log_mode_support_params(&context->bw_ctx.dml);
+
+ if (vlevel == context->bw_ctx.dml.soc.num_states)
+ goto validate_fail;
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *mpo_pipe = pipe->bottom_pipe;
+
+ if (!pipe->stream)
+ continue;
+
+ if (vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled
+ && !dc->config.enable_windowed_mpo_odm
+ && pipe->plane_state && mpo_pipe
+ && memcmp(&mpo_pipe->plane_state->clip_rect,
+ &pipe->stream->src,
+ sizeof(struct rect)) != 0) {
+ ASSERT(mpo_pipe->plane_state != pipe->plane_state);
+ goto validate_fail;
+ }
+ pipe_idx++;
+ }
+
+ if (!dcn32_apply_merge_split_flags_helper(dc, context, &repopulate_pipes, split, merge))
+ goto validate_fail;
+
+ /* Actual dsc count per stream dsc validation*/
+ if (!dcn20_validate_dsc(dc, context)) {
+ vba->ValidationStatus[vba->soc.num_states] = DML_FAIL_DSC_VALIDATION_FAILURE;
+ goto validate_fail;
+ }
+
+ if (repopulate_pipes) {
+ int flag_max_mpc_comb = vba->maxMpcComb;
+ int flag_vlevel = vlevel;
+ int i;
+
+ pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, validate_mode);
+ if (!dc->config.enable_windowed_mpo_odm)
+ dcn32_update_dml_pipes_odm_policy_based_on_context(dc, context, pipes);
+
+ /* repopulate_pipes = 1 means the pipes were either split or merged. In this case
+ * we have to re-calculate the DET allocation and run through DML once more to
+ * ensure all the params are calculated correctly. We do not need to run the
+ * pipe split check again after this call (pipes are already split / merged).
+ * */
+ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
+ dm_prefetch_support_uclk_fclk_and_stutter_if_possible;
+
+ vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
+
+ if (vlevel == context->bw_ctx.dml.soc.num_states) {
+ /* failed after DET size changes */
+ goto validate_fail;
+ } else if (flag_max_mpc_comb == 0 &&
+ flag_max_mpc_comb != context->bw_ctx.dml.vba.maxMpcComb) {
+ /* check the context constructed with pipe split flags is still valid*/
+ bool flags_valid = false;
+ for (i = flag_vlevel; i < context->bw_ctx.dml.soc.num_states; i++) {
+ if (vba->ModeSupport[i][flag_max_mpc_comb]) {
+ vba->maxMpcComb = flag_max_mpc_comb;
+ vba->VoltageLevel = i;
+ vlevel = i;
+ flags_valid = true;
+ break;
+ }
+ }
+
+ /* this should never happen */
+ if (!flags_valid)
+ goto validate_fail;
+ }
+ }
+ *vlevel_out = vlevel;
+ *pipe_cnt_out = pipe_cnt;
+
+ out = true;
+ goto validate_out;
+
+validate_fail:
+ out = false;
+
+validate_out:
+ return out;
+}
+
+
+void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel)
+{
+ int i, pipe_idx, vlevel_temp = 0;
+ double dcfclk = dcn3_2_soc.clock_limits[0].dcfclk_mhz;
+ double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
+ double dram_speed_from_validation = context->bw_ctx.dml.vba.DRAMSpeed;
+ double dcfclk_from_fw_based_mclk_switching = dcfclk_from_validation;
+ bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] !=
+ dm_dram_clock_change_unsupported;
+ unsigned int dummy_latency_index = 0;
+ int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
+ unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
+ bool subvp_in_use = dcn32_subvp_in_use(dc, context);
+ unsigned int min_dram_speed_mts_margin;
+ bool need_fclk_lat_as_dummy = false;
+ bool is_subvp_p_drr = false;
+ struct dc_stream_state *fpo_candidate_stream = NULL;
+ struct dc_stream_status *stream_status = NULL;
+
+ dc_assert_fp_enabled();
+
+ /* need to find dummy latency index for subvp */
+ if (subvp_in_use) {
+ /* Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK */
+ if (!pstate_en) {
+ context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp;
+ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = dm_prefetch_support_fclk_and_stutter;
+ pstate_en = true;
+ is_subvp_p_drr = true;
+ }
+ dummy_latency_index = dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(dc,
+ context, pipes, pipe_cnt, vlevel);
+
+ /* For DCN32/321 need to validate with fclk pstate change latency equal to dummy so prefetch is
+ * scheduled correctly to account for dummy pstate.
+ */
+ if (context->bw_ctx.dml.soc.fclk_change_latency_us < dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us) {
+ need_fclk_lat_as_dummy = true;
+ context->bw_ctx.dml.soc.fclk_change_latency_us =
+ dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us;
+ }
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us =
+ dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
+ dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, DC_VALIDATE_MODE_AND_PROGRAMMING);
+ maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
+ if (is_subvp_p_drr) {
+ context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp;
+ }
+ }
+
+ context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = false;
+ for (i = 0; i < context->stream_count; i++) {
+ stream_status = NULL;
+ if (context->streams[i])
+ stream_status = dc_state_get_stream_status(context, context->streams[i]);
+ if (stream_status)
+ stream_status->fpo_in_use = false;
+ }
+
+ if (!pstate_en || (!dc->debug.disable_fpo_optimizations &&
+ pstate_en && vlevel != 0)) {
+ /* only when the mclk switch can not be natural, is the fw based vblank stretch attempted */
+ fpo_candidate_stream = dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(dc, context);
+ if (fpo_candidate_stream) {
+ stream_status = dc_state_get_stream_status(context, fpo_candidate_stream);
+ if (stream_status)
+ stream_status->fpo_in_use = true;
+ context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = true;
+ }
+
+ if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {
+ dummy_latency_index = dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(dc,
+ context, pipes, pipe_cnt, vlevel);
+
+ /* After calling dcn30_find_dummy_latency_index_for_fw_based_mclk_switch
+ * we reinstate the original dram_clock_change_latency_us on the context
+ * and all variables that may have changed up to this point, except the
+ * newly found dummy_latency_index
+ */
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us =
+ dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
+ /* For DCN32/321 need to validate with fclk pstate change latency equal to dummy so
+ * prefetch is scheduled correctly to account for dummy pstate.
+ */
+ if (context->bw_ctx.dml.soc.fclk_change_latency_us < dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us) {
+ need_fclk_lat_as_dummy = true;
+ context->bw_ctx.dml.soc.fclk_change_latency_us =
+ dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us;
+ }
+ dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel_temp,
+ DC_VALIDATE_MODE_AND_PROGRAMMING);
+ if (vlevel_temp < vlevel) {
+ vlevel = vlevel_temp;
+ maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
+ dcfclk_from_fw_based_mclk_switching = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
+ pstate_en = true;
+ context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] = dm_dram_clock_change_vblank;
+ } else {
+ /* Restore FCLK latency and re-run validation to go back to original validation
+ * output if we find that enabling FPO does not give us any benefit (i.e. lower
+ * voltage level)
+ */
+ context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = false;
+ for (i = 0; i < context->stream_count; i++) {
+ stream_status = NULL;
+ if (context->streams[i])
+ stream_status = dc_state_get_stream_status(context, context->streams[i]);
+ if (stream_status)
+ stream_status->fpo_in_use = false;
+ }
+ context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us;
+ dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel,
+ DC_VALIDATE_MODE_AND_PROGRAMMING);
+ }
+ }
+ }
+
+ /* Set B:
+ * For Set B calculations use clocks from clock_limits[2] when available i.e. when SMU is present,
+ * otherwise use arbitrary low value from spreadsheet for DCFCLK as lower is safer for watermark
+ * calculations to cover bootup clocks.
+ * DCFCLK: soc.clock_limits[2] when available
+ * UCLK: soc.clock_limits[2] when available
+ */
+ if (dcn3_2_soc.num_states > 2) {
+ vlevel_temp = 2;
+ dcfclk = dcn3_2_soc.clock_limits[2].dcfclk_mhz;
+ } else
+ dcfclk = 615; //DCFCLK Vmin_lv
+
+ pipes[0].clks_cfg.voltage = vlevel_temp;
+ pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
+ pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz;
+
+ if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) {
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us;
+ context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.fclk_change_latency_us;
+ context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us;
+ context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us;
+ }
+ context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+
+ /* Set D:
+ * All clocks min.
+ * DCFCLK: Min, as reported by PM FW when available
+ * UCLK : Min, as reported by PM FW when available
+ * sr_enter_exit/sr_exit should be lower than used for DRAM (TBD after bringup or later, use as decided in Clk Mgr)
+ */
+
+ /*
+ if (dcn3_2_soc.num_states > 2) {
+ vlevel_temp = 0;
+ dcfclk = dc->clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz;
+ } else
+ dcfclk = 615; //DCFCLK Vmin_lv
+
+ pipes[0].clks_cfg.voltage = vlevel_temp;
+ pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
+ pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz;
+
+ if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) {
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us;
+ context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.fclk_change_latency_us;
+ context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us;
+ context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us;
+ }
+ context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ */
+
+ /* Set C, for Dummy P-State:
+ * All clocks min.
+ * DCFCLK: Min, as reported by PM FW, when available
+ * UCLK : Min, as reported by PM FW, when available
+ * pstate latency as per UCLK state dummy pstate latency
+ */
+
+ // For Set A and Set C use values from validation
+ pipes[0].clks_cfg.voltage = vlevel;
+ pipes[0].clks_cfg.dcfclk_mhz = dcfclk_from_validation;
+ pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz;
+
+ if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {
+ pipes[0].clks_cfg.dcfclk_mhz = dcfclk_from_fw_based_mclk_switching;
+ }
+
+ if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) {
+ min_dram_speed_mts = dram_speed_from_validation;
+ min_dram_speed_mts_margin = 160;
+
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us =
+ dc->clk_mgr->bw_params->dummy_pstate_table[0].dummy_pstate_latency_us;
+
+ if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] ==
+ dm_dram_clock_change_unsupported) {
+ int min_dram_speed_mts_offset = dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels - 1;
+
+ min_dram_speed_mts =
+ dc->clk_mgr->bw_params->clk_table.entries[min_dram_speed_mts_offset].memclk_mhz * 16;
+ }
+
+ if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && !subvp_in_use) {
+ /* find largest table entry that is lower than dram speed,
+ * but lower than DPM0 still uses DPM0
+ */
+ for (dummy_latency_index = 3; dummy_latency_index > 0; dummy_latency_index--)
+ if (min_dram_speed_mts + min_dram_speed_mts_margin >
+ dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dram_speed_mts)
+ break;
+ }
+
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us =
+ dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us;
+
+ context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us;
+ context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us;
+ context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us;
+ }
+
+ context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ /* On DCN32/321, PMFW will set PSTATE_CHANGE_TYPE = 1 (FCLK) for UCLK dummy p-state.
+ * In this case we must program FCLK WM Set C to use the UCLK dummy p-state WM
+ * value.
+ */
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.fclk_pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+
+ if ((!pstate_en) && (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid)) {
+ /* The only difference between A and C is p-state latency, if p-state is not supported
+ * with full p-state latency we want to calculate DLG based on dummy p-state latency,
+ * Set A p-state watermark set to 0 on DCN30, when p-state unsupported, for now keep as DCN30.
+ */
+ context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0;
+ /* Calculate FCLK p-state change watermark based on FCLK pstate change latency in case
+ * UCLK p-state is not supported, to avoid underflow in case FCLK pstate is supported
+ */
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ } else {
+ /* Set A:
+ * All clocks min.
+ * DCFCLK: Min, as reported by PM FW, when available
+ * UCLK: Min, as reported by PM FW, when available
+ */
+
+ /* For set A set the correct latency values (i.e. non-dummy values) unconditionally
+ */
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
+ context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us;
+ context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us;
+
+ context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ }
+
+ /* Make set D = set A since we do not optimized watermarks for MALL */
+ context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a;
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+
+ pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt);
+ pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+
+ if (dc->config.forced_clocks) {
+ pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz;
+ pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz;
+ }
+ if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000)
+ pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0;
+ if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
+ pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0;
+
+ pipe_idx++;
+ }
+
+ context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod;
+
+ /* for proper prefetch calculations, if dummy lat > fclk lat, use fclk lat = dummy lat */
+ if (need_fclk_lat_as_dummy)
+ context->bw_ctx.dml.soc.fclk_change_latency_us =
+ dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us;
+
+ dcn32_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
+
+ if (!pstate_en)
+ /* Restore full p-state latency */
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us =
+ dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
+
+ /* revert fclk lat changes if required */
+ if (need_fclk_lat_as_dummy)
+ context->bw_ctx.dml.soc.fclk_change_latency_us =
+ dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us;
+}
+
+static void dcn32_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
+ unsigned int *optimal_dcfclk,
+ unsigned int *optimal_fclk)
+{
+ double bw_from_dram, bw_from_dram1, bw_from_dram2;
+
+ bw_from_dram1 = uclk_mts * dcn3_2_soc.num_chans *
+ dcn3_2_soc.dram_channel_width_bytes * (dcn3_2_soc.max_avg_dram_bw_use_normal_percent / 100);
+ bw_from_dram2 = uclk_mts * dcn3_2_soc.num_chans *
+ dcn3_2_soc.dram_channel_width_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100);
+
+ bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2;
+
+ if (optimal_fclk)
+ *optimal_fclk = bw_from_dram /
+ (dcn3_2_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100));
+
+ if (optimal_dcfclk)
+ *optimal_dcfclk = bw_from_dram /
+ (dcn3_2_soc.return_bus_width_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100));
+}
+
+static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries,
+ unsigned int index)
+{
+ int i;
+
+ if (*num_entries == 0)
+ return;
+
+ for (i = index; i < *num_entries - 1; i++) {
+ table[i] = table[i + 1];
+ }
+ memset(&table[--(*num_entries)], 0, sizeof(struct _vcs_dpi_voltage_scaling_st));
+}
+
+void dcn32_patch_dpm_table(struct clk_bw_params *bw_params)
+{
+ int i;
+ unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0,
+ max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0;
+
+ for (i = 0; i < MAX_NUM_DPM_LVL; i++) {
+ if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz)
+ max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
+ if (bw_params->clk_table.entries[i].fclk_mhz > max_fclk_mhz)
+ max_fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz;
+ if (bw_params->clk_table.entries[i].memclk_mhz > max_uclk_mhz)
+ max_uclk_mhz = bw_params->clk_table.entries[i].memclk_mhz;
+ if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
+ if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
+ if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz)
+ max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
+ if (bw_params->clk_table.entries[i].dtbclk_mhz > max_dtbclk_mhz)
+ max_dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+ }
+
+ /* Scan through clock values we currently have and if they are 0,
+ * then populate it with dcn3_2_soc.clock_limits[] value.
+ *
+ * Do it for DCFCLK, DISPCLK, DTBCLK and UCLK as any of those being
+ * 0, will cause it to skip building the clock table.
+ */
+ if (max_dcfclk_mhz == 0)
+ bw_params->clk_table.entries[0].dcfclk_mhz = dcn3_2_soc.clock_limits[0].dcfclk_mhz;
+ if (max_dispclk_mhz == 0)
+ bw_params->clk_table.entries[0].dispclk_mhz = dcn3_2_soc.clock_limits[0].dispclk_mhz;
+ if (max_dtbclk_mhz == 0)
+ bw_params->clk_table.entries[0].dtbclk_mhz = dcn3_2_soc.clock_limits[0].dtbclk_mhz;
+ if (max_uclk_mhz == 0)
+ bw_params->clk_table.entries[0].memclk_mhz = dcn3_2_soc.clock_limits[0].dram_speed_mts / 16;
+}
+
+static void swap_table_entries(struct _vcs_dpi_voltage_scaling_st *first_entry,
+ struct _vcs_dpi_voltage_scaling_st *second_entry)
+{
+ struct _vcs_dpi_voltage_scaling_st temp_entry = *first_entry;
+ *first_entry = *second_entry;
+ *second_entry = temp_entry;
+}
+
+/*
+ * sort_entries_with_same_bw - Sort entries sharing the same bandwidth by DCFCLK
+ */
+static void sort_entries_with_same_bw(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries)
+{
+ unsigned int start_index = 0;
+ unsigned int end_index = 0;
+ unsigned int current_bw = 0;
+
+ for (int i = 0; i < (*num_entries - 1); i++) {
+ if (table[i].net_bw_in_kbytes_sec == table[i+1].net_bw_in_kbytes_sec) {
+ current_bw = table[i].net_bw_in_kbytes_sec;
+ start_index = i;
+ end_index = ++i;
+
+ while ((i < (*num_entries - 1)) && (table[i+1].net_bw_in_kbytes_sec == current_bw))
+ end_index = ++i;
+ }
+
+ if (start_index != end_index) {
+ for (int j = start_index; j < end_index; j++) {
+ for (int k = start_index; k < end_index; k++) {
+ if (table[k].dcfclk_mhz > table[k+1].dcfclk_mhz)
+ swap_table_entries(&table[k], &table[k+1]);
+ }
+ }
+ }
+
+ start_index = 0;
+ end_index = 0;
+
+ }
+}
+
+/*
+ * remove_inconsistent_entries - Ensure entries with the same bandwidth have MEMCLK and FCLK monotonically increasing
+ * and remove entries that do not
+ */
+static void remove_inconsistent_entries(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries)
+{
+ for (int i = 0; i < (*num_entries - 1); i++) {
+ if (table[i].net_bw_in_kbytes_sec == table[i+1].net_bw_in_kbytes_sec) {
+ if ((table[i].dram_speed_mts > table[i+1].dram_speed_mts) ||
+ (table[i].fabricclk_mhz > table[i+1].fabricclk_mhz))
+ remove_entry_from_table_at_index(table, num_entries, i);
+ }
+ }
+}
+
+/*
+ * override_max_clk_values - Overwrite the max clock frequencies with the max DC mode timings
+ * Input:
+ * max_clk_limit - struct containing the desired clock timings
+ * Output:
+ * curr_clk_limit - struct containing the timings that need to be overwritten
+ * Return: 0 upon success, non-zero for failure
+ */
+static int override_max_clk_values(struct clk_limit_table_entry *max_clk_limit,
+ struct clk_limit_table_entry *curr_clk_limit)
+{
+ if (NULL == max_clk_limit || NULL == curr_clk_limit)
+ return -1; //invalid parameters
+
+ //only overwrite if desired max clock frequency is initialized
+ if (max_clk_limit->dcfclk_mhz != 0)
+ curr_clk_limit->dcfclk_mhz = max_clk_limit->dcfclk_mhz;
+
+ if (max_clk_limit->fclk_mhz != 0)
+ curr_clk_limit->fclk_mhz = max_clk_limit->fclk_mhz;
+
+ if (max_clk_limit->memclk_mhz != 0)
+ curr_clk_limit->memclk_mhz = max_clk_limit->memclk_mhz;
+
+ if (max_clk_limit->socclk_mhz != 0)
+ curr_clk_limit->socclk_mhz = max_clk_limit->socclk_mhz;
+
+ if (max_clk_limit->dtbclk_mhz != 0)
+ curr_clk_limit->dtbclk_mhz = max_clk_limit->dtbclk_mhz;
+
+ if (max_clk_limit->dispclk_mhz != 0)
+ curr_clk_limit->dispclk_mhz = max_clk_limit->dispclk_mhz;
+
+ return 0;
+}
+
+static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk_bw_params *bw_params,
+ struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries)
+{
+ int i, j;
+ struct _vcs_dpi_voltage_scaling_st entry = {0};
+ struct clk_limit_table_entry max_clk_data = {0};
+
+ unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299;
+
+ static const unsigned int num_dcfclk_stas = 5;
+ unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564};
+
+ unsigned int num_uclk_dpms = 0;
+ unsigned int num_fclk_dpms = 0;
+ unsigned int num_dcfclk_dpms = 0;
+
+ unsigned int num_dc_uclk_dpms = 0;
+ unsigned int num_dc_fclk_dpms = 0;
+ unsigned int num_dc_dcfclk_dpms = 0;
+
+ for (i = 0; i < MAX_NUM_DPM_LVL; i++) {
+ if (bw_params->clk_table.entries[i].dcfclk_mhz > max_clk_data.dcfclk_mhz)
+ max_clk_data.dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
+ if (bw_params->clk_table.entries[i].fclk_mhz > max_clk_data.fclk_mhz)
+ max_clk_data.fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz;
+ if (bw_params->clk_table.entries[i].memclk_mhz > max_clk_data.memclk_mhz)
+ max_clk_data.memclk_mhz = bw_params->clk_table.entries[i].memclk_mhz;
+ if (bw_params->clk_table.entries[i].dispclk_mhz > max_clk_data.dispclk_mhz)
+ max_clk_data.dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
+ if (bw_params->clk_table.entries[i].dppclk_mhz > max_clk_data.dppclk_mhz)
+ max_clk_data.dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
+ if (bw_params->clk_table.entries[i].phyclk_mhz > max_clk_data.phyclk_mhz)
+ max_clk_data.phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
+ if (bw_params->clk_table.entries[i].dtbclk_mhz > max_clk_data.dtbclk_mhz)
+ max_clk_data.dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+
+ if (bw_params->clk_table.entries[i].memclk_mhz > 0) {
+ num_uclk_dpms++;
+ if (bw_params->clk_table.entries[i].memclk_mhz <= bw_params->dc_mode_limit.memclk_mhz)
+ num_dc_uclk_dpms++;
+ }
+ if (bw_params->clk_table.entries[i].fclk_mhz > 0) {
+ num_fclk_dpms++;
+ if (bw_params->clk_table.entries[i].fclk_mhz <= bw_params->dc_mode_limit.fclk_mhz)
+ num_dc_fclk_dpms++;
+ }
+ if (bw_params->clk_table.entries[i].dcfclk_mhz > 0) {
+ num_dcfclk_dpms++;
+ if (bw_params->clk_table.entries[i].dcfclk_mhz <= bw_params->dc_mode_limit.dcfclk_mhz)
+ num_dc_dcfclk_dpms++;
+ }
+ }
+
+ if (!disable_dc_mode_overwrite) {
+ //Overwrite max frequencies with max DC mode frequencies for DC mode systems
+ override_max_clk_values(&bw_params->dc_mode_limit, &max_clk_data);
+ num_uclk_dpms = num_dc_uclk_dpms;
+ num_fclk_dpms = num_dc_fclk_dpms;
+ num_dcfclk_dpms = num_dc_dcfclk_dpms;
+ bw_params->clk_table.num_entries_per_clk.num_memclk_levels = num_uclk_dpms;
+ bw_params->clk_table.num_entries_per_clk.num_fclk_levels = num_fclk_dpms;
+ }
+
+ if (num_dcfclk_dpms > 0 && bw_params->clk_table.entries[0].fclk_mhz > min_fclk_mhz)
+ min_fclk_mhz = bw_params->clk_table.entries[0].fclk_mhz;
+
+ if (!max_clk_data.dcfclk_mhz || !max_clk_data.dispclk_mhz || !max_clk_data.dtbclk_mhz)
+ return -1;
+
+ if (max_clk_data.dppclk_mhz == 0)
+ max_clk_data.dppclk_mhz = max_clk_data.dispclk_mhz;
+
+ if (max_clk_data.fclk_mhz == 0)
+ max_clk_data.fclk_mhz = max_clk_data.dcfclk_mhz *
+ dcn3_2_soc.pct_ideal_sdp_bw_after_urgent /
+ dcn3_2_soc.pct_ideal_fabric_bw_after_urgent;
+
+ if (max_clk_data.phyclk_mhz == 0)
+ max_clk_data.phyclk_mhz = dcn3_2_soc.clock_limits[0].phyclk_mhz;
+
+ *num_entries = 0;
+ entry.dispclk_mhz = max_clk_data.dispclk_mhz;
+ entry.dscclk_mhz = max_clk_data.dispclk_mhz / 3;
+ entry.dppclk_mhz = max_clk_data.dppclk_mhz;
+ entry.dtbclk_mhz = max_clk_data.dtbclk_mhz;
+ entry.phyclk_mhz = max_clk_data.phyclk_mhz;
+ entry.phyclk_d18_mhz = dcn3_2_soc.clock_limits[0].phyclk_d18_mhz;
+ entry.phyclk_d32_mhz = dcn3_2_soc.clock_limits[0].phyclk_d32_mhz;
+
+ // Insert all the DCFCLK STAs
+ for (i = 0; i < num_dcfclk_stas; i++) {
+ entry.dcfclk_mhz = dcfclk_sta_targets[i];
+ entry.fabricclk_mhz = 0;
+ entry.dram_speed_mts = 0;
+
+ get_optimal_ntuple(&entry);
+ entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry);
+ insert_entry_into_table_sorted(table, num_entries, &entry);
+ }
+
+ // Insert the max DCFCLK
+ entry.dcfclk_mhz = max_clk_data.dcfclk_mhz;
+ entry.fabricclk_mhz = 0;
+ entry.dram_speed_mts = 0;
+
+ get_optimal_ntuple(&entry);
+ entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry);
+ insert_entry_into_table_sorted(table, num_entries, &entry);
+
+ // Insert the UCLK DPMS
+ for (i = 0; i < num_uclk_dpms; i++) {
+ entry.dcfclk_mhz = 0;
+ entry.fabricclk_mhz = 0;
+ entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16;
+
+ get_optimal_ntuple(&entry);
+ entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry);
+ insert_entry_into_table_sorted(table, num_entries, &entry);
+ }
+
+ // If FCLK is coarse grained, insert individual DPMs.
+ if (num_fclk_dpms > 2) {
+ for (i = 0; i < num_fclk_dpms; i++) {
+ entry.dcfclk_mhz = 0;
+ entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz;
+ entry.dram_speed_mts = 0;
+
+ get_optimal_ntuple(&entry);
+ entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry);
+ insert_entry_into_table_sorted(table, num_entries, &entry);
+ }
+ }
+ // If FCLK fine grained, only insert max
+ else {
+ entry.dcfclk_mhz = 0;
+ entry.fabricclk_mhz = max_clk_data.fclk_mhz;
+ entry.dram_speed_mts = 0;
+
+ get_optimal_ntuple(&entry);
+ entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry);
+ insert_entry_into_table_sorted(table, num_entries, &entry);
+ }
+
+ // At this point, the table contains all "points of interest" based on
+ // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock
+ // ratios (by derate, are exact).
+
+ // Remove states that require higher clocks than are supported
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ if (table[i].dcfclk_mhz > max_clk_data.dcfclk_mhz ||
+ table[i].fabricclk_mhz > max_clk_data.fclk_mhz ||
+ table[i].dram_speed_mts > max_clk_data.memclk_mhz * 16)
+ remove_entry_from_table_at_index(table, num_entries, i);
+ }
+
+ // Insert entry with all max dc limits without bandwidth matching
+ if (!disable_dc_mode_overwrite) {
+ struct _vcs_dpi_voltage_scaling_st max_dc_limits_entry = entry;
+
+ max_dc_limits_entry.dcfclk_mhz = max_clk_data.dcfclk_mhz;
+ max_dc_limits_entry.fabricclk_mhz = max_clk_data.fclk_mhz;
+ max_dc_limits_entry.dram_speed_mts = max_clk_data.memclk_mhz * 16;
+
+ max_dc_limits_entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&max_dc_limits_entry);
+ insert_entry_into_table_sorted(table, num_entries, &max_dc_limits_entry);
+
+ sort_entries_with_same_bw(table, num_entries);
+ remove_inconsistent_entries(table, num_entries);
+ }
+
+ // At this point, the table only contains supported points of interest
+ // it could be used as is, but some states may be redundant due to
+ // coarse grained nature of some clocks, so we want to round up to
+ // coarse grained DPMs and remove duplicates.
+
+ // Round up UCLKs
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ for (j = 0; j < num_uclk_dpms; j++) {
+ if (bw_params->clk_table.entries[j].memclk_mhz * 16 >= table[i].dram_speed_mts) {
+ table[i].dram_speed_mts = bw_params->clk_table.entries[j].memclk_mhz * 16;
+ break;
+ }
+ }
+ }
+
+ // If FCLK is coarse grained, round up to next DPMs
+ if (num_fclk_dpms > 2) {
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ for (j = 0; j < num_fclk_dpms; j++) {
+ if (bw_params->clk_table.entries[j].fclk_mhz >= table[i].fabricclk_mhz) {
+ table[i].fabricclk_mhz = bw_params->clk_table.entries[j].fclk_mhz;
+ break;
+ }
+ }
+ }
+ }
+ // Otherwise, round up to minimum.
+ else {
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ if (table[i].fabricclk_mhz < min_fclk_mhz) {
+ table[i].fabricclk_mhz = min_fclk_mhz;
+ }
+ }
+ }
+
+ // Round DCFCLKs up to minimum
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ if (table[i].dcfclk_mhz < min_dcfclk_mhz) {
+ table[i].dcfclk_mhz = min_dcfclk_mhz;
+ }
+ }
+
+ // Remove duplicate states, note duplicate states are always neighbouring since table is sorted.
+ i = 0;
+ while (i < *num_entries - 1) {
+ if (table[i].dcfclk_mhz == table[i + 1].dcfclk_mhz &&
+ table[i].fabricclk_mhz == table[i + 1].fabricclk_mhz &&
+ table[i].dram_speed_mts == table[i + 1].dram_speed_mts)
+ remove_entry_from_table_at_index(table, num_entries, i + 1);
+ else
+ i++;
+ }
+
+ // Fix up the state indicies
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ table[i].state = i;
+ }
+
+ return 0;
+}
+
+/*
+ * dcn32_update_bw_bounding_box
+ *
+ * This would override some dcn3_2 ip_or_soc initial parameters hardcoded from
+ * spreadsheet with actual values as per dGPU SKU:
+ * - with passed few options from dc->config
+ * - with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might
+ * need to get it from PM FW)
+ * - with passed latency values (passed in ns units) in dc-> bb override for
+ * debugging purposes
+ * - with passed latencies from VBIOS (in 100_ns units) if available for
+ * certain dGPU SKU
+ * - with number of DRAM channels from VBIOS (which differ for certain dGPU SKU
+ * of the same ASIC)
+ * - clocks levels with passed clk_table entries from Clk Mgr as reported by PM
+ * FW for different clocks (which might differ for certain dGPU SKU of the
+ * same ASIC)
+ */
+void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params)
+{
+ dc_assert_fp_enabled();
+
+ /* Overrides from dc->config options */
+ dcn3_2_ip.clamp_min_dcfclk = dc->config.clamp_min_dcfclk;
+
+ /* Override from passed dc->bb_overrides if available*/
+ if ((int)(dcn3_2_soc.sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns
+ && dc->bb_overrides.sr_exit_time_ns) {
+ dc->dml2_options.bbox_overrides.sr_exit_latency_us =
+ dcn3_2_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0;
+ }
+
+ if ((int)(dcn3_2_soc.sr_enter_plus_exit_time_us * 1000)
+ != dc->bb_overrides.sr_enter_plus_exit_time_ns
+ && dc->bb_overrides.sr_enter_plus_exit_time_ns) {
+ dc->dml2_options.bbox_overrides.sr_enter_plus_exit_latency_us =
+ dcn3_2_soc.sr_enter_plus_exit_time_us =
+ dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0;
+ }
+
+ if ((int)(dcn3_2_soc.urgent_latency_us * 1000) != dc->bb_overrides.urgent_latency_ns
+ && dc->bb_overrides.urgent_latency_ns) {
+ dcn3_2_soc.urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0;
+ dc->dml2_options.bbox_overrides.urgent_latency_us =
+ dcn3_2_soc.urgent_latency_pixel_data_only_us = dc->bb_overrides.urgent_latency_ns / 1000.0;
+ }
+
+ if ((int)(dcn3_2_soc.dram_clock_change_latency_us * 1000)
+ != dc->bb_overrides.dram_clock_change_latency_ns
+ && dc->bb_overrides.dram_clock_change_latency_ns) {
+ dc->dml2_options.bbox_overrides.dram_clock_change_latency_us =
+ dcn3_2_soc.dram_clock_change_latency_us =
+ dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
+ }
+
+ if ((int)(dcn3_2_soc.fclk_change_latency_us * 1000)
+ != dc->bb_overrides.fclk_clock_change_latency_ns
+ && dc->bb_overrides.fclk_clock_change_latency_ns) {
+ dc->dml2_options.bbox_overrides.fclk_change_latency_us =
+ dcn3_2_soc.fclk_change_latency_us =
+ dc->bb_overrides.fclk_clock_change_latency_ns / 1000;
+ }
+
+ if ((int)(dcn3_2_soc.dummy_pstate_latency_us * 1000)
+ != dc->bb_overrides.dummy_clock_change_latency_ns
+ && dc->bb_overrides.dummy_clock_change_latency_ns) {
+ dcn3_2_soc.dummy_pstate_latency_us =
+ dc->bb_overrides.dummy_clock_change_latency_ns / 1000.0;
+ }
+
+ /* Override from VBIOS if VBIOS bb_info available */
+ if (dc->ctx->dc_bios->funcs->get_soc_bb_info) {
+ struct bp_soc_bb_info bb_info = {0};
+
+ if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) {
+ if (bb_info.dram_clock_change_latency_100ns > 0)
+ dc->dml2_options.bbox_overrides.dram_clock_change_latency_us =
+ dcn3_2_soc.dram_clock_change_latency_us =
+ bb_info.dram_clock_change_latency_100ns * 10;
+
+ if (bb_info.dram_sr_enter_exit_latency_100ns > 0)
+ dc->dml2_options.bbox_overrides.sr_enter_plus_exit_latency_us =
+ dcn3_2_soc.sr_enter_plus_exit_time_us =
+ bb_info.dram_sr_enter_exit_latency_100ns * 10;
+
+ if (bb_info.dram_sr_exit_latency_100ns > 0)
+ dc->dml2_options.bbox_overrides.sr_exit_latency_us =
+ dcn3_2_soc.sr_exit_time_us =
+ bb_info.dram_sr_exit_latency_100ns * 10;
+ }
+ }
+
+ /* Override from VBIOS for num_chan */
+ if (dc->ctx->dc_bios->vram_info.num_chans) {
+ dc->dml2_options.bbox_overrides.dram_num_chan =
+ dcn3_2_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans;
+ dcn3_2_soc.mall_allocated_for_dcn_mbytes = (double)(dcn32_calc_num_avail_chans_for_mall(dc,
+ dc->ctx->dc_bios->vram_info.num_chans) * dc->caps.mall_size_per_mem_channel);
+ }
+
+ if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes)
+ dc->dml2_options.bbox_overrides.dram_chanel_width_bytes =
+ dcn3_2_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes;
+
+ /* DML DSC delay factor workaround */
+ dcn3_2_ip.dsc_delay_factor_wa = dc->debug.dsc_delay_factor_wa_x1000 / 1000.0;
+
+ dcn3_2_ip.min_prefetch_in_strobe_us = dc->debug.min_prefetch_in_strobe_ns / 1000.0;
+
+ /* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */
+ dcn3_2_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+ dc->dml2_options.bbox_overrides.disp_pll_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+ dc->dml2_options.bbox_overrides.xtalclk_mhz = dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency / 1000.0;
+ dc->dml2_options.bbox_overrides.dchub_refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0;
+ dc->dml2_options.bbox_overrides.dprefclk_mhz = dc->clk_mgr->dprefclk_khz / 1000.0;
+
+ /* Overrides Clock levelsfrom CLK Mgr table entries as reported by PM FW */
+ if (bw_params->clk_table.entries[0].memclk_mhz) {
+ if (dc->debug.use_legacy_soc_bb_mechanism) {
+ unsigned int i = 0, j = 0, num_states = 0;
+
+ unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0};
+ unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0};
+ unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0};
+ unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0};
+ unsigned int min_dcfclk = UINT_MAX;
+ /* Set 199 as first value in STA target array to have a minimum DCFCLK value.
+ * For DCN32 we set min to 199 so minimum FCLK DPM0 (300Mhz can be achieved) */
+ unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564};
+ unsigned int num_dcfclk_sta_targets = 4, num_uclk_states = 0;
+ unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0;
+
+ for (i = 0; i < MAX_NUM_DPM_LVL; i++) {
+ if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz)
+ max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
+ if (bw_params->clk_table.entries[i].dcfclk_mhz != 0 &&
+ bw_params->clk_table.entries[i].dcfclk_mhz < min_dcfclk)
+ min_dcfclk = bw_params->clk_table.entries[i].dcfclk_mhz;
+ if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
+ if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
+ if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz)
+ max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
+ }
+ if (min_dcfclk > dcfclk_sta_targets[0])
+ dcfclk_sta_targets[0] = min_dcfclk;
+ if (!max_dcfclk_mhz)
+ max_dcfclk_mhz = dcn3_2_soc.clock_limits[0].dcfclk_mhz;
+ if (!max_dispclk_mhz)
+ max_dispclk_mhz = dcn3_2_soc.clock_limits[0].dispclk_mhz;
+ if (!max_dppclk_mhz)
+ max_dppclk_mhz = dcn3_2_soc.clock_limits[0].dppclk_mhz;
+ if (!max_phyclk_mhz)
+ max_phyclk_mhz = dcn3_2_soc.clock_limits[0].phyclk_mhz;
+
+ if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
+ // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array
+ dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz;
+ num_dcfclk_sta_targets++;
+ } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
+ // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates
+ for (i = 0; i < num_dcfclk_sta_targets; i++) {
+ if (dcfclk_sta_targets[i] > max_dcfclk_mhz) {
+ dcfclk_sta_targets[i] = max_dcfclk_mhz;
+ break;
+ }
+ }
+ // Update size of array since we "removed" duplicates
+ num_dcfclk_sta_targets = i + 1;
+ }
+
+ num_uclk_states = bw_params->clk_table.num_entries;
+
+ // Calculate optimal dcfclk for each uclk
+ for (i = 0; i < num_uclk_states; i++) {
+ dcn32_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16,
+ &optimal_dcfclk_for_uclk[i], NULL);
+ if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) {
+ optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz;
+ }
+ }
+
+ // Calculate optimal uclk for each dcfclk sta target
+ for (i = 0; i < num_dcfclk_sta_targets; i++) {
+ for (j = 0; j < num_uclk_states; j++) {
+ if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) {
+ optimal_uclk_for_dcfclk_sta_targets[i] =
+ bw_params->clk_table.entries[j].memclk_mhz * 16;
+ break;
+ }
+ }
+ }
+
+ i = 0;
+ j = 0;
+ // create the final dcfclk and uclk table
+ while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) {
+ if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) {
+ dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
+ dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
+ } else {
+ if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
+ dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
+ dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
+ } else {
+ j = num_uclk_states;
+ }
+ }
+ }
+
+ while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) {
+ dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
+ dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
+ }
+
+ while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES &&
+ optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
+ dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
+ dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
+ }
+
+ /* bw_params->clk_table.entries[MAX_NUM_DPM_LVL].
+ * MAX_NUM_DPM_LVL is 8.
+ * dcn3_02_soc.clock_limits[DC__VOLTAGE_STATES].
+ * DC__VOLTAGE_STATES is 40.
+ */
+ if (num_states > MAX_NUM_DPM_LVL) {
+ ASSERT(0);
+ return;
+ }
+
+ dcn3_2_soc.num_states = num_states;
+ for (i = 0; i < dcn3_2_soc.num_states; i++) {
+ dcn3_2_soc.clock_limits[i].state = i;
+ dcn3_2_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i];
+ dcn3_2_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i];
+
+ /* Fill all states with max values of all these clocks */
+ dcn3_2_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz;
+ dcn3_2_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz;
+ dcn3_2_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz;
+ dcn3_2_soc.clock_limits[i].dscclk_mhz = max_dispclk_mhz / 3;
+
+ /* Populate from bw_params for DTBCLK, SOCCLK */
+ if (i > 0) {
+ if (!bw_params->clk_table.entries[i].dtbclk_mhz) {
+ dcn3_2_soc.clock_limits[i].dtbclk_mhz = dcn3_2_soc.clock_limits[i-1].dtbclk_mhz;
+ } else {
+ dcn3_2_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+ }
+ } else if (bw_params->clk_table.entries[i].dtbclk_mhz) {
+ dcn3_2_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+ }
+
+ if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0)
+ dcn3_2_soc.clock_limits[i].socclk_mhz = dcn3_2_soc.clock_limits[i-1].socclk_mhz;
+ else
+ dcn3_2_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz;
+
+ if (!dram_speed_mts[i] && i > 0)
+ dcn3_2_soc.clock_limits[i].dram_speed_mts = dcn3_2_soc.clock_limits[i-1].dram_speed_mts;
+ else
+ dcn3_2_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i];
+
+ /* These clocks cannot come from bw_params, always fill from dcn3_2_soc[0] */
+ /* PHYCLK_D18, PHYCLK_D32 */
+ dcn3_2_soc.clock_limits[i].phyclk_d18_mhz = dcn3_2_soc.clock_limits[0].phyclk_d18_mhz;
+ dcn3_2_soc.clock_limits[i].phyclk_d32_mhz = dcn3_2_soc.clock_limits[0].phyclk_d32_mhz;
+ }
+ } else {
+ build_synthetic_soc_states(dc->debug.disable_dc_mode_overwrite, bw_params,
+ dcn3_2_soc.clock_limits, &dcn3_2_soc.num_states);
+ }
+
+ /* Re-init DML with updated bb */
+ dml_init_instance(&dc->dml, &dcn3_2_soc, &dcn3_2_ip, DML_PROJECT_DCN32);
+ if (dc->current_state)
+ dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_2_soc, &dcn3_2_ip, DML_PROJECT_DCN32);
+ }
+
+ if (dc->clk_mgr->bw_params->clk_table.num_entries > 1) {
+ unsigned int i = 0;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_states = dc->clk_mgr->bw_params->clk_table.num_entries;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dcfclk_levels;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_fclk_levels;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_socclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_socclk_levels;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dtbclk_levels;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dispclk_levels;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dppclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dppclk_levels;
+
+ for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dcfclk_levels; i++) {
+ if (dc->clk_mgr->bw_params->clk_table.entries[i].dcfclk_mhz)
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].dcfclk_mhz;
+ }
+
+ for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_fclk_levels; i++) {
+ if (dc->clk_mgr->bw_params->clk_table.entries[i].fclk_mhz)
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].fclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].fclk_mhz;
+ }
+
+ for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels; i++) {
+ if (dc->clk_mgr->bw_params->clk_table.entries[i].memclk_mhz)
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].memclk_mhz;
+ }
+
+ for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_socclk_levels; i++) {
+ if (dc->clk_mgr->bw_params->clk_table.entries[i].socclk_mhz)
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].socclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].socclk_mhz;
+ }
+
+ for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dtbclk_levels; i++) {
+ if (dc->clk_mgr->bw_params->clk_table.entries[i].dtbclk_mhz)
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].dtbclk_mhz;
+ }
+
+ for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dispclk_levels; i++) {
+ if (dc->clk_mgr->bw_params->clk_table.entries[i].dispclk_mhz) {
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dispclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].dispclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dppclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].dispclk_mhz;
+ }
+ }
+ }
+}
+
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+ int pipe_cnt)
+{
+ dc_assert_fp_enabled();
+
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+}
+
+bool dcn32_allow_subvp_with_active_margin(struct pipe_ctx *pipe)
+{
+ bool allow = false;
+ uint32_t refresh_rate = 0;
+ uint32_t min_refresh = subvp_active_margin_list.min_refresh;
+ uint32_t max_refresh = subvp_active_margin_list.max_refresh;
+ uint32_t i;
+
+ for (i = 0; i < SUBVP_ACTIVE_MARGIN_LIST_LEN; i++) {
+ uint32_t width = subvp_active_margin_list.res[i].width;
+ uint32_t height = subvp_active_margin_list.res[i].height;
+
+ refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 +
+ (uint64_t)pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1);
+ refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total);
+ refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total);
+
+ if (refresh_rate >= min_refresh && refresh_rate <= max_refresh &&
+ dcn32_check_native_scaling_for_res(pipe, width, height)) {
+ allow = true;
+ break;
+ }
+ }
+ return allow;
+}
+
+/**
+ * dcn32_allow_subvp_high_refresh_rate: Determine if the high refresh rate config will allow subvp
+ *
+ * @dc: Current DC state
+ * @context: New DC state to be programmed
+ * @pipe: Pipe to be considered for use in subvp
+ *
+ * On high refresh rate display configs, we will allow subvp under the following conditions:
+ * 1. Resolution is 3840x2160, 3440x1440, or 2560x1440
+ * 2. Refresh rate is between 120hz - 165hz
+ * 3. No scaling
+ * 4. Freesync is inactive
+ * 5. For single display cases, freesync must be disabled
+ *
+ * Return: True if pipe can be used for subvp, false otherwise
+ */
+bool dcn32_allow_subvp_high_refresh_rate(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe)
+{
+ bool allow = false;
+ uint32_t refresh_rate = 0;
+ uint32_t subvp_min_refresh = subvp_high_refresh_list.min_refresh;
+ uint32_t subvp_max_refresh = subvp_high_refresh_list.max_refresh;
+ uint32_t min_refresh = subvp_max_refresh;
+ uint32_t i;
+
+ /* Only allow SubVP on high refresh displays if all connected displays
+ * are considered "high refresh" (i.e. >= 120hz). We do not want to
+ * allow combinations such as 120hz (SubVP) + 60hz (SubVP).
+ */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe_ctx->stream)
+ continue;
+ refresh_rate = (pipe_ctx->stream->timing.pix_clk_100hz * 100 +
+ pipe_ctx->stream->timing.v_total * pipe_ctx->stream->timing.h_total - 1)
+ / (double)(pipe_ctx->stream->timing.v_total * pipe_ctx->stream->timing.h_total);
+
+ if (refresh_rate < min_refresh)
+ min_refresh = refresh_rate;
+ }
+
+ if (!dc->debug.disable_subvp_high_refresh && min_refresh >= subvp_min_refresh && pipe->stream &&
+ pipe->plane_state && !(pipe->stream->vrr_active_variable || pipe->stream->vrr_active_fixed)) {
+ refresh_rate = (pipe->stream->timing.pix_clk_100hz * 100 +
+ pipe->stream->timing.v_total * pipe->stream->timing.h_total - 1)
+ / (double)(pipe->stream->timing.v_total * pipe->stream->timing.h_total);
+ if (refresh_rate >= subvp_min_refresh && refresh_rate <= subvp_max_refresh) {
+ for (i = 0; i < SUBVP_HIGH_REFRESH_LIST_LEN; i++) {
+ uint32_t width = subvp_high_refresh_list.res[i].width;
+ uint32_t height = subvp_high_refresh_list.res[i].height;
+
+ if (dcn32_check_native_scaling_for_res(pipe, width, height)) {
+ if ((context->stream_count == 1 && !pipe->stream->allow_freesync) || context->stream_count > 1) {
+ allow = true;
+ break;
+ }
+ }
+ }
+ }
+ }
+ return allow;
+}
+
+/**
+ * dcn32_determine_max_vratio_prefetch: Determine max Vratio for prefetch by driver policy
+ *
+ * @dc: Current DC state
+ * @context: New DC state to be programmed
+ *
+ * Return: Max vratio for prefetch
+ */
+double dcn32_determine_max_vratio_prefetch(struct dc *dc, struct dc_state *context)
+{
+ double max_vratio_pre = __DML_MAX_BW_RATIO_PRE__; // Default value is 4
+ int i;
+
+ /* For single display MPO configs, allow the max vratio to be 8
+ * if any plane is YUV420 format
+ */
+ if (context->stream_count == 1 && context->stream_status[0].plane_count > 1) {
+ for (i = 0; i < context->stream_status[0].plane_count; i++) {
+ if (context->stream_status[0].plane_states[i]->format == SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr ||
+ context->stream_status[0].plane_states[i]->format == SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb) {
+ max_vratio_pre = __DML_MAX_VRATIO_PRE__;
+ }
+ }
+ }
+ return max_vratio_pre;
+}
+
+/**
+ * dcn32_assign_fpo_vactive_candidate - Assign the FPO stream candidate for FPO + VActive case
+ *
+ * This function chooses the FPO candidate stream for FPO + VActive cases (2 stream config).
+ * For FPO + VAtive cases, the assumption is that one display has ActiveMargin > 0, and the
+ * other display has ActiveMargin <= 0. This function will choose the pipe/stream that has
+ * ActiveMargin <= 0 to be the FPO stream candidate if found.
+ *
+ *
+ * @dc: current dc state
+ * @context: new dc state
+ * @fpo_candidate_stream: pointer to FPO stream candidate if one is found
+ *
+ * Return: void
+ */
+void dcn32_assign_fpo_vactive_candidate(struct dc *dc, const struct dc_state *context, struct dc_stream_state **fpo_candidate_stream)
+{
+ unsigned int i, pipe_idx;
+ const struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ /* In DCN32/321, FPO uses per-pipe P-State force.
+ * If there's no planes, HUBP is power gated and
+ * therefore programming UCLK_PSTATE_FORCE does
+ * nothing (P-State will always be asserted naturally
+ * on a pipe that has HUBP power gated. Therefore we
+ * only want to enable FPO if the FPO pipe has both
+ * a stream and a plane.
+ */
+ if (!pipe->stream || !pipe->plane_state)
+ continue;
+
+ if (vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0) {
+ *fpo_candidate_stream = pipe->stream;
+ break;
+ }
+ pipe_idx++;
+ }
+}
+
+/**
+ * dcn32_find_vactive_pipe - Determines if the config has a pipe that can switch in VACTIVE
+ *
+ * @dc: current dc state
+ * @context: new dc state
+ * @fpo_candidate_stream: candidate stream to be chosen for FPO
+ * @vactive_margin_req_us: The vactive marign required for a vactive pipe to be considered "found"
+ *
+ * Return: True if VACTIVE display is found, false otherwise
+ */
+bool dcn32_find_vactive_pipe(struct dc *dc, const struct dc_state *context, struct dc_stream_state *fpo_candidate_stream, uint32_t vactive_margin_req_us)
+{
+ unsigned int i, pipe_idx;
+ const struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+ bool vactive_found = true;
+ unsigned int blank_us = 0;
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe->stream)
+ continue;
+
+ /* Don't need to check for vactive margin on the FPO candidate stream */
+ if (fpo_candidate_stream && pipe->stream == fpo_candidate_stream) {
+ pipe_idx++;
+ continue;
+ }
+
+ /* Every plane (apart from the ones driven by the FPO pipes) needs to have active margin
+ * in order for us to have found a valid "vactive" config for FPO + Vactive
+ */
+ blank_us = ((pipe->stream->timing.v_total - pipe->stream->timing.v_addressable) * pipe->stream->timing.h_total /
+ (double)(pipe->stream->timing.pix_clk_100hz * 100)) * 1000000;
+ if (vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] < vactive_margin_req_us ||
+ pipe->stream->vrr_active_variable || pipe->stream->vrr_active_fixed || blank_us >= dc->debug.fpo_vactive_max_blank_us) {
+ vactive_found = false;
+ break;
+ }
+ pipe_idx++;
+ }
+ return vactive_found;
+}
+
+void dcn32_set_clock_limits(const struct _vcs_dpi_soc_bounding_box_st *soc_bb)
+{
+ dc_assert_fp_enabled();
+ dcn3_2_soc.clock_limits[0].dcfclk_mhz = 1200.0;
+}
+
+void dcn32_override_min_req_memclk(struct dc *dc, struct dc_state *context)
+{
+ // WA: restrict FPO and SubVP to use first non-strobe mode (DCN32 BW issue)
+ if ((context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dcn32_subvp_in_use(dc, context)) &&
+ dc->dml.soc.num_chans <= 8) {
+ int num_mclk_levels = dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels;
+
+ if (context->bw_ctx.dml.vba.DRAMSpeed <= dc->clk_mgr->bw_params->clk_table.entries[0].memclk_mhz * 16 &&
+ num_mclk_levels > 1) {
+ context->bw_ctx.dml.vba.DRAMSpeed = dc->clk_mgr->bw_params->clk_table.entries[1].memclk_mhz * 16;
+ context->bw_ctx.bw.dcn.clk.dramclk_khz = context->bw_ctx.dml.vba.DRAMSpeed * 1000 / 16;
+ }
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
new file mode 100644
index 000000000000..273d2bd79d85
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN32_FPU_H__
+#define __DCN32_FPU_H__
+
+#include "clk_mgr_internal.h"
+
+void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr);
+
+void dcn32_helper_populate_phantom_dlg_params(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt);
+
+void dcn32_set_phantom_stream_timing(struct dc *dc,
+ struct dc_state *context,
+ struct pipe_ctx *ref_pipe,
+ struct dc_stream_state *phantom_stream,
+ display_e2e_pipe_params_st *pipes,
+ unsigned int pipe_cnt,
+ unsigned int dc_pipe_idx);
+
+bool dcn32_internal_validate_bw(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int *pipe_cnt_out,
+ int *vlevel_out,
+ enum dc_validate_mode validate_mode);
+
+void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel);
+
+void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params);
+
+int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel);
+
+void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
+
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+ int pipe_cnt);
+
+void dcn32_assign_fpo_vactive_candidate(struct dc *dc, const struct dc_state *context, struct dc_stream_state **fpo_candidate_stream);
+
+bool dcn32_find_vactive_pipe(struct dc *dc, const struct dc_state *context, struct dc_stream_state *fpo_candidate_stream, uint32_t vactive_margin_req);
+
+void dcn32_override_min_req_memclk(struct dc *dc, struct dc_state *context);
+
+void dcn32_set_clock_limits(const struct _vcs_dpi_soc_bounding_box_st *soc_bb);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
new file mode 100644
index 000000000000..0782a34689a0
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -0,0 +1,3762 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dc.h"
+#include "../display_mode_lib.h"
+#include "display_mode_vba_32.h"
+#include "../dml_inline_defs.h"
+#include "display_mode_vba_util_32.h"
+
+void dml32_recalculate(struct display_mode_lib *mode_lib);
+static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
+ struct display_mode_lib *mode_lib);
+void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib);
+
+void dml32_recalculate(struct display_mode_lib *mode_lib)
+{
+ ModeSupportAndSystemConfiguration(mode_lib);
+
+ dml32_CalculateMaxDETAndMinCompressedBufferSize(mode_lib->vba.ConfigReturnBufferSizeInKByte,
+ mode_lib->vba.ROBBufferSizeInKByte,
+ DC__NUM_DPP,
+ false, //mode_lib->vba.override_setting.nomDETInKByteOverrideEnable,
+ 0, //mode_lib->vba.override_setting.nomDETInKByteOverrideValue,
+
+ /* Output */
+ &mode_lib->vba.MaxTotalDETInKByte, &mode_lib->vba.nomDETInKByte,
+ &mode_lib->vba.MinCompressedBufferSizeInKByte);
+
+ PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
+#endif
+ DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
+}
+
+static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
+ struct display_mode_lib *mode_lib)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ unsigned int j, k;
+ bool ImmediateFlipRequirementFinal;
+ int iteration;
+ double MaxTotalRDBandwidth;
+ unsigned int NextPrefetchMode;
+ double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
+ bool DestinationLineTimesForPrefetchLessThan2 = false;
+ bool VRatioPrefetchMoreThanMax = false;
+ double TWait;
+ double TotalWRBandwidth = 0;
+ double WRBandwidth = 0;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: --- START ---\n", __func__);
+ dml_print("DML::%s: mode_lib->vba.PrefetchMode = %d\n", __func__, mode_lib->vba.PrefetchMode);
+ dml_print("DML::%s: mode_lib->vba.ImmediateFlipSupport = %d\n", __func__, mode_lib->vba.ImmediateFlipSupport);
+ dml_print("DML::%s: mode_lib->vba.VoltageLevel = %d\n", __func__, mode_lib->vba.VoltageLevel);
+#endif
+
+ v->WritebackDISPCLK = 0.0;
+ v->GlobalDPPCLK = 0.0;
+
+ // DISPCLK and DPPCLK Calculation
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.WritebackEnable[k]) {
+ v->WritebackDISPCLK = dml_max(v->WritebackDISPCLK,
+ dml32_CalculateWriteBackDISPCLK(
+ mode_lib->vba.WritebackPixelFormat[k],
+ mode_lib->vba.PixelClock[k], mode_lib->vba.WritebackHRatio[k],
+ mode_lib->vba.WritebackVRatio[k],
+ mode_lib->vba.WritebackHTaps[k],
+ mode_lib->vba.WritebackVTaps[k],
+ mode_lib->vba.WritebackSourceWidth[k],
+ mode_lib->vba.WritebackDestinationWidth[k],
+ mode_lib->vba.HTotal[k], mode_lib->vba.WritebackLineBufferSize,
+ mode_lib->vba.DISPCLKDPPCLKVCOSpeed));
+ }
+ }
+
+ v->DISPCLK_calculated = v->WritebackDISPCLK;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k) {
+ v->DISPCLK_calculated = dml_max(v->DISPCLK_calculated,
+ dml32_CalculateRequiredDispclk(
+ mode_lib->vba.ODMCombineEnabled[k],
+ mode_lib->vba.PixelClock[k],
+ mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading,
+ mode_lib->vba.DISPCLKRampingMargin,
+ mode_lib->vba.DISPCLKDPPCLKVCOSpeed,
+ mode_lib->vba.MaxDppclk[v->soc.num_states - 1]));
+ }
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(mode_lib->vba.HRatio[k],
+ mode_lib->vba.HRatioChroma[k],
+ mode_lib->vba.VRatio[k],
+ mode_lib->vba.VRatioChroma[k],
+ mode_lib->vba.MaxDCHUBToPSCLThroughput,
+ mode_lib->vba.MaxPSCLToLBThroughput,
+ mode_lib->vba.PixelClock[k],
+ mode_lib->vba.SourcePixelFormat[k],
+ mode_lib->vba.htaps[k],
+ mode_lib->vba.HTAPsChroma[k],
+ mode_lib->vba.vtaps[k],
+ mode_lib->vba.VTAPsChroma[k],
+
+ /* Output */
+ &v->PSCL_THROUGHPUT_LUMA[k], &v->PSCL_THROUGHPUT_CHROMA[k],
+ &v->DPPCLKUsingSingleDPP[k]);
+ }
+
+ dml32_CalculateDPPCLK(mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading,
+ mode_lib->vba.DISPCLKDPPCLKVCOSpeed, v->DPPCLKUsingSingleDPP, mode_lib->vba.DPPPerPlane,
+ /* Output */
+ &v->GlobalDPPCLK, v->DPPCLK);
+
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+ v->DPPCLK_calculated[k] = v->DPPCLK[k];
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ dml32_CalculateBytePerPixelAndBlockSizes(
+ mode_lib->vba.SourcePixelFormat[k],
+ mode_lib->vba.SurfaceTiling[k],
+
+ /* Output */
+ &v->BytePerPixelY[k],
+ &v->BytePerPixelC[k],
+ &v->BytePerPixelDETY[k],
+ &v->BytePerPixelDETC[k],
+ &v->BlockHeight256BytesY[k],
+ &v->BlockHeight256BytesC[k],
+ &v->BlockWidth256BytesY[k],
+ &v->BlockWidth256BytesC[k],
+ &v->BlockHeightY[k],
+ &v->BlockHeightC[k],
+ &v->BlockWidthY[k],
+ &v->BlockWidthC[k]);
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: %d\n", __func__, __LINE__);
+#endif
+ dml32_CalculateSwathWidth(
+ false, // ForceSingleDPP
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.SourcePixelFormat,
+ mode_lib->vba.SourceRotation,
+ mode_lib->vba.ViewportStationary,
+ mode_lib->vba.ViewportWidth,
+ mode_lib->vba.ViewportHeight,
+ mode_lib->vba.ViewportXStartY,
+ mode_lib->vba.ViewportYStartY,
+ mode_lib->vba.ViewportXStartC,
+ mode_lib->vba.ViewportYStartC,
+ mode_lib->vba.SurfaceWidthY,
+ mode_lib->vba.SurfaceWidthC,
+ mode_lib->vba.SurfaceHeightY,
+ mode_lib->vba.SurfaceHeightC,
+ mode_lib->vba.ODMCombineEnabled,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ v->BlockHeight256BytesY,
+ v->BlockHeight256BytesC,
+ v->BlockWidth256BytesY,
+ v->BlockWidth256BytesC,
+ mode_lib->vba.BlendingAndTiming,
+ mode_lib->vba.HActive,
+ mode_lib->vba.HRatio,
+ mode_lib->vba.DPPPerPlane,
+
+ /* Output */
+ v->SwathWidthSingleDPPY, v->SwathWidthSingleDPPC, v->SwathWidthY, v->SwathWidthC,
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_integer_array[0], // Integer MaximumSwathHeightY[]
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_integer_array[1], // Integer MaximumSwathHeightC[]
+ v->swath_width_luma_ub, v->swath_width_chroma_ub);
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ v->ReadBandwidthSurfaceLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k]
+ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
+ v->ReadBandwidthSurfaceChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k]
+ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k])
+ * mode_lib->vba.VRatioChroma[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ReadBandwidthSurfaceLuma[%i] = %fBps\n",
+ __func__, k, v->ReadBandwidthSurfaceLuma[k]);
+ dml_print("DML::%s: ReadBandwidthSurfaceChroma[%i] = %fBps\n",
+ __func__, k, v->ReadBandwidthSurfaceChroma[k]);
+#endif
+ }
+
+ {
+ // VBA_DELTA
+ // Calculate DET size, swath height
+ dml32_CalculateSwathAndDETConfiguration(
+ mode_lib->vba.DETSizeOverride,
+ mode_lib->vba.UsesMALLForPStateChange,
+ mode_lib->vba.ConfigReturnBufferSizeInKByte,
+ mode_lib->vba.MaxTotalDETInKByte,
+ mode_lib->vba.MinCompressedBufferSizeInKByte,
+ false, /* ForceSingleDPP */
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.nomDETInKByte,
+ mode_lib->vba.UseUnboundedRequesting,
+ mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
+ mode_lib->vba.ip.pixel_chunk_size_kbytes,
+ mode_lib->vba.ip.rob_buffer_size_kbytes,
+ mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal,
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_output_encoder_array, /* output_encoder_class Output[] */
+ v->ReadBandwidthSurfaceLuma,
+ v->ReadBandwidthSurfaceChroma,
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_single_array[0], /* Single MaximumSwathWidthLuma[] */
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_single_array[1], /* Single MaximumSwathWidthChroma[] */
+ mode_lib->vba.SourceRotation,
+ mode_lib->vba.ViewportStationary,
+ mode_lib->vba.SourcePixelFormat,
+ mode_lib->vba.SurfaceTiling,
+ mode_lib->vba.ViewportWidth,
+ mode_lib->vba.ViewportHeight,
+ mode_lib->vba.ViewportXStartY,
+ mode_lib->vba.ViewportYStartY,
+ mode_lib->vba.ViewportXStartC,
+ mode_lib->vba.ViewportYStartC,
+ mode_lib->vba.SurfaceWidthY,
+ mode_lib->vba.SurfaceWidthC,
+ mode_lib->vba.SurfaceHeightY,
+ mode_lib->vba.SurfaceHeightC,
+ v->BlockHeight256BytesY,
+ v->BlockHeight256BytesC,
+ v->BlockWidth256BytesY,
+ v->BlockWidth256BytesC,
+ mode_lib->vba.ODMCombineEnabled,
+ mode_lib->vba.BlendingAndTiming,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ v->BytePerPixelDETY,
+ v->BytePerPixelDETC,
+ mode_lib->vba.HActive,
+ mode_lib->vba.HRatio,
+ mode_lib->vba.HRatioChroma,
+ mode_lib->vba.DPPPerPlane,
+
+ /* Output */
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_long_array[0], /* Long swath_width_luma_ub[] */
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_long_array[1], /* Long swath_width_chroma_ub[] */
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_double_array[0], /* Long SwathWidth[] */
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_double_array[1], /* Long SwathWidthChroma[] */
+ mode_lib->vba.SwathHeightY,
+ mode_lib->vba.SwathHeightC,
+ mode_lib->vba.DETBufferSizeInKByte,
+ mode_lib->vba.DETBufferSizeY,
+ mode_lib->vba.DETBufferSizeC,
+ &v->UnboundedRequestEnabled,
+ &v->CompressedBufferSizeInkByte,
+ &v->CompBufReservedSpaceKBytes,
+ &v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_boolean, /* bool *CompBufReservedSpaceNeedAjustment */
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_boolean_array, /* bool ViewportSizeSupportPerSurface[] */
+ &v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_boolean); /* bool *ViewportSizeSupport */
+ }
+
+ v->CompBufReservedSpaceZs = v->CompBufReservedSpaceKBytes * 1024.0 / 256.0;
+ v->CompBufReservedSpace64B = v->CompBufReservedSpaceKBytes * 1024.0 / 64.0;
+
+ // DCFCLK Deep Sleep
+ dml32_CalculateDCFCLKDeepSleep(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ mode_lib->vba.VRatio,
+ mode_lib->vba.VRatioChroma,
+ v->SwathWidthY,
+ v->SwathWidthC,
+ mode_lib->vba.DPPPerPlane,
+ mode_lib->vba.HRatio,
+ mode_lib->vba.HRatioChroma,
+ mode_lib->vba.PixelClock,
+ v->PSCL_THROUGHPUT_LUMA,
+ v->PSCL_THROUGHPUT_CHROMA,
+ mode_lib->vba.DPPCLK,
+ v->ReadBandwidthSurfaceLuma,
+ v->ReadBandwidthSurfaceChroma,
+ mode_lib->vba.ReturnBusWidth,
+
+ /* Output */
+ &v->DCFCLKDeepSleep);
+
+ // DSCCLK
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if ((mode_lib->vba.BlendingAndTiming[k] != k) || !mode_lib->vba.DSCEnabled[k]) {
+ v->DSCCLK_calculated[k] = 0.0;
+ } else {
+ if (mode_lib->vba.OutputFormat[k] == dm_420)
+ mode_lib->vba.DSCFormatFactor = 2;
+ else if (mode_lib->vba.OutputFormat[k] == dm_444)
+ mode_lib->vba.DSCFormatFactor = 1;
+ else if (mode_lib->vba.OutputFormat[k] == dm_n422)
+ mode_lib->vba.DSCFormatFactor = 2;
+ else
+ mode_lib->vba.DSCFormatFactor = 1;
+ if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
+ v->DSCCLK_calculated[k] = mode_lib->vba.PixelClockBackEnd[k] / 12
+ / mode_lib->vba.DSCFormatFactor
+ / (1 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100);
+ else if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
+ v->DSCCLK_calculated[k] = mode_lib->vba.PixelClockBackEnd[k] / 6
+ / mode_lib->vba.DSCFormatFactor
+ / (1 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100);
+ else
+ v->DSCCLK_calculated[k] = mode_lib->vba.PixelClockBackEnd[k] / 3
+ / mode_lib->vba.DSCFormatFactor
+ / (1 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100);
+ }
+ }
+
+ // DSC Delay
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ v->DSCDelay[k] = dml32_DSCDelayRequirement(mode_lib->vba.DSCEnabled[k],
+ mode_lib->vba.ODMCombineEnabled[k], mode_lib->vba.DSCInputBitPerComponent[k],
+ mode_lib->vba.OutputBppPerState[mode_lib->vba.VoltageLevel][k],
+ mode_lib->vba.HActive[k], mode_lib->vba.HTotal[k],
+ mode_lib->vba.NumberOfDSCSlices[k], mode_lib->vba.OutputFormat[k],
+ mode_lib->vba.Output[k], mode_lib->vba.PixelClock[k],
+ mode_lib->vba.PixelClockBackEnd[k], mode_lib->vba.ip.dsc_delay_factor_wa);
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k)
+ for (j = 0; j < mode_lib->vba.NumberOfActiveSurfaces; ++j) // NumberOfSurfaces
+ if (j != k && mode_lib->vba.BlendingAndTiming[k] == j && mode_lib->vba.DSCEnabled[j])
+ v->DSCDelay[k] = v->DSCDelay[j];
+
+ //Immediate Flip
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ v->ImmediateFlipSupportedSurface[k] = mode_lib->vba.ImmediateFlipSupport
+ && (mode_lib->vba.ImmediateFlipRequirement[k] != dm_immediate_flip_not_required);
+ }
+
+ // Prefetch
+ dml32_CalculateSurfaceSizeInMall(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.MALLAllocatedForDCNFinal,
+ mode_lib->vba.UseMALLForStaticScreen,
+ mode_lib->vba.UsesMALLForPStateChange,
+ mode_lib->vba.DCCEnable,
+ mode_lib->vba.ViewportStationary,
+ mode_lib->vba.ViewportXStartY,
+ mode_lib->vba.ViewportYStartY,
+ mode_lib->vba.ViewportXStartC,
+ mode_lib->vba.ViewportYStartC,
+ mode_lib->vba.ViewportWidth,
+ mode_lib->vba.ViewportHeight,
+ v->BytePerPixelY,
+ mode_lib->vba.ViewportWidthChroma,
+ mode_lib->vba.ViewportHeightChroma,
+ v->BytePerPixelC,
+ mode_lib->vba.SurfaceWidthY,
+ mode_lib->vba.SurfaceWidthC,
+ mode_lib->vba.SurfaceHeightY,
+ mode_lib->vba.SurfaceHeightC,
+ v->BlockWidth256BytesY,
+ v->BlockWidth256BytesC,
+ v->BlockHeight256BytesY,
+ v->BlockHeight256BytesC,
+ v->BlockWidthY,
+ v->BlockWidthC,
+ v->BlockHeightY,
+ v->BlockHeightC,
+ mode_lib->vba.DCCMetaPitchY,
+ mode_lib->vba.DCCMetaPitchC,
+
+ /* Output */
+ v->SurfaceSizeInMALL,
+ &v->dummy_vars.
+ DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_boolean2); /* Boolean *ExceededMALLSize */
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].PixelClock = mode_lib->vba.PixelClock[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].DPPPerSurface = mode_lib->vba.DPPPerPlane[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].SourceRotation = mode_lib->vba.SourceRotation[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportHeight = mode_lib->vba.ViewportHeight[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportHeightChroma = mode_lib->vba.ViewportHeightChroma[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockWidth256BytesY = v->BlockWidth256BytesY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockHeight256BytesY = v->BlockHeight256BytesY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockWidth256BytesC = v->BlockWidth256BytesC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockHeight256BytesC = v->BlockHeight256BytesC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockWidthY = v->BlockWidthY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockHeightY = v->BlockHeightY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockWidthC = v->BlockWidthC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockHeightC = v->BlockHeightC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].InterlaceEnable = mode_lib->vba.Interlace[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].HTotal = mode_lib->vba.HTotal[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].DCCEnable = mode_lib->vba.DCCEnable[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].SurfaceTiling = mode_lib->vba.SurfaceTiling[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BytePerPixelY = v->BytePerPixelY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BytePerPixelC = v->BytePerPixelC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].VRatio = mode_lib->vba.VRatio[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].VRatioChroma = mode_lib->vba.VRatioChroma[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].VTaps = mode_lib->vba.vtaps[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].VTapsChroma = mode_lib->vba.VTAPsChroma[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].PitchY = mode_lib->vba.PitchY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].DCCMetaPitchY = mode_lib->vba.DCCMetaPitchY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].PitchC = mode_lib->vba.PitchC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].DCCMetaPitchC = mode_lib->vba.DCCMetaPitchC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportStationary = mode_lib->vba.ViewportStationary[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportXStart = mode_lib->vba.ViewportXStartY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportYStart = mode_lib->vba.ViewportYStartY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportXStartC = mode_lib->vba.ViewportXStartC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportYStartC = mode_lib->vba.ViewportYStartC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].FORCE_ONE_ROW_FOR_FRAME = mode_lib->vba.ForceOneRowForFrame[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].SwathHeightY = mode_lib->vba.SwathHeightY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].SwathHeightC = mode_lib->vba.SwathHeightC[k];
+ }
+
+ {
+
+ dml32_CalculateVMRowAndSwath(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters,
+ v->SurfaceSizeInMALL,
+ mode_lib->vba.PTEBufferSizeInRequestsLuma,
+ mode_lib->vba.PTEBufferSizeInRequestsChroma,
+ mode_lib->vba.DCCMetaBufferSizeBytes,
+ mode_lib->vba.UseMALLForStaticScreen,
+ mode_lib->vba.UsesMALLForPStateChange,
+ mode_lib->vba.MALLAllocatedForDCNFinal,
+ v->SwathWidthY,
+ v->SwathWidthC,
+ mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.HostVMEnable,
+ mode_lib->vba.HostVMMaxNonCachedPageTableLevels,
+ mode_lib->vba.GPUVMMaxPageTableLevels,
+ mode_lib->vba.GPUVMMinPageSizeKBytes,
+ mode_lib->vba.HostVMMinPageSize,
+
+ /* Output */
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean_array2[0], // Boolean PTEBufferSizeNotExceeded[]
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean_array2[1], // Boolean DCCMetaBufferSizeNotExceeded[]
+ v->dpte_row_width_luma_ub,
+ v->dpte_row_width_chroma_ub,
+ v->dpte_row_height,
+ v->dpte_row_height_chroma,
+ v->dpte_row_height_linear,
+ v->dpte_row_height_linear_chroma,
+ v->meta_req_width,
+ v->meta_req_width_chroma,
+ v->meta_req_height,
+ v->meta_req_height_chroma,
+ v->meta_row_width,
+ v->meta_row_width_chroma,
+ v->meta_row_height,
+ v->meta_row_height_chroma,
+ v->vm_group_bytes,
+ v->dpte_group_bytes,
+ v->PixelPTEReqWidthY,
+ v->PixelPTEReqHeightY,
+ v->PTERequestSizeY,
+ v->PixelPTEReqWidthC,
+ v->PixelPTEReqHeightC,
+ v->PTERequestSizeC,
+ v->dpde0_bytes_per_frame_ub_l,
+ v->meta_pte_bytes_per_frame_ub_l,
+ v->dpde0_bytes_per_frame_ub_c,
+ v->meta_pte_bytes_per_frame_ub_c,
+ v->PrefetchSourceLinesY,
+ v->PrefetchSourceLinesC,
+ v->VInitPreFillY, v->VInitPreFillC,
+ v->MaxNumSwathY,
+ v->MaxNumSwathC,
+ v->meta_row_bw,
+ v->dpte_row_bw,
+ v->PixelPTEBytesPerRow,
+ v->PDEAndMetaPTEBytesFrame,
+ v->MetaRowByte,
+ v->Use_One_Row_For_Frame,
+ v->Use_One_Row_For_Frame_Flip,
+ v->UsesMALLForStaticScreen,
+ v->PTE_BUFFER_MODE,
+ v->BIGK_FRAGMENT_SIZE);
+ }
+
+
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.ReorderBytes = mode_lib->vba.NumberOfChannels
+ * dml_max3(mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly,
+ mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
+ mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly);
+
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.VMDataOnlyReturnBW = dml32_get_return_bw_mbps_vm_only(
+ &mode_lib->vba.soc,
+ mode_lib->vba.VoltageLevel,
+ mode_lib->vba.DCFCLK,
+ mode_lib->vba.FabricClock,
+ mode_lib->vba.DRAMSpeed);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: mode_lib->vba.ReturnBusWidth = %f\n", __func__, mode_lib->vba.ReturnBusWidth);
+ dml_print("DML::%s: mode_lib->vba.DCFCLK = %f\n", __func__, mode_lib->vba.DCFCLK);
+ dml_print("DML::%s: mode_lib->vba.FabricClock = %f\n", __func__, mode_lib->vba.FabricClock);
+ dml_print("DML::%s: mode_lib->vba.FabricDatapathToDCNDataReturn = %f\n", __func__,
+ mode_lib->vba.FabricDatapathToDCNDataReturn);
+ dml_print("DML::%s: mode_lib->vba.PercentOfIdealSDPPortBWReceivedAfterUrgLatency = %f\n",
+ __func__, mode_lib->vba.PercentOfIdealSDPPortBWReceivedAfterUrgLatency);
+ dml_print("DML::%s: mode_lib->vba.DRAMSpeed = %f\n", __func__, mode_lib->vba.DRAMSpeed);
+ dml_print("DML::%s: mode_lib->vba.NumberOfChannels = %f\n", __func__, mode_lib->vba.NumberOfChannels);
+ dml_print("DML::%s: mode_lib->vba.DRAMChannelWidth = %f\n", __func__, mode_lib->vba.DRAMChannelWidth);
+ dml_print("DML::%s: mode_lib->vba.PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n",
+ __func__, mode_lib->vba.PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
+ dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
+ dml_print("DML::%s: ReturnBW = %f\n", __func__, mode_lib->vba.ReturnBW);
+#endif
+
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor = 1.0;
+
+ if (mode_lib->vba.GPUVMEnable && mode_lib->vba.HostVMEnable)
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .HostVMInefficiencyFactor =
+ mode_lib->vba.ReturnBW / v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .VMDataOnlyReturnBW;
+
+ mode_lib->vba.TotalDCCActiveDPP = 0;
+ mode_lib->vba.TotalActiveDPP = 0;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + mode_lib->vba.DPPPerPlane[k];
+ if (mode_lib->vba.DCCEnable[k])
+ mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP
+ + mode_lib->vba.DPPPerPlane[k];
+ }
+
+ v->UrgentExtraLatency = dml32_CalculateExtraLatency(
+ mode_lib->vba.RoundTripPingLatencyCycles,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.ReorderBytes,
+ mode_lib->vba.DCFCLK,
+ mode_lib->vba.TotalActiveDPP,
+ mode_lib->vba.PixelChunkSizeInKByte,
+ mode_lib->vba.TotalDCCActiveDPP,
+ mode_lib->vba.MetaChunkSize,
+ mode_lib->vba.ReturnBW,
+ mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.HostVMEnable,
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.DPPPerPlane,
+ v->dpte_group_bytes,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor,
+ mode_lib->vba.HostVMMinPageSize,
+ mode_lib->vba.HostVMMaxNonCachedPageTableLevels);
+
+ mode_lib->vba.TCalc = 24.0 / v->DCFCLKDeepSleep;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k) {
+ if (mode_lib->vba.WritebackEnable[k] == true) {
+ v->WritebackDelay[mode_lib->vba.VoltageLevel][k] = mode_lib->vba.WritebackLatency
+ + dml32_CalculateWriteBackDelay(
+ mode_lib->vba.WritebackPixelFormat[k],
+ mode_lib->vba.WritebackHRatio[k],
+ mode_lib->vba.WritebackVRatio[k],
+ mode_lib->vba.WritebackVTaps[k],
+ mode_lib->vba.WritebackDestinationWidth[k],
+ mode_lib->vba.WritebackDestinationHeight[k],
+ mode_lib->vba.WritebackSourceHeight[k],
+ mode_lib->vba.HTotal[k]) / mode_lib->vba.DISPCLK;
+ } else
+ v->WritebackDelay[mode_lib->vba.VoltageLevel][k] = 0;
+ for (j = 0; j < mode_lib->vba.NumberOfActiveSurfaces; ++j) {
+ if (mode_lib->vba.BlendingAndTiming[j] == k &&
+ mode_lib->vba.WritebackEnable[j] == true) {
+ v->WritebackDelay[mode_lib->vba.VoltageLevel][k] =
+ dml_max(v->WritebackDelay[mode_lib->vba.VoltageLevel][k],
+ mode_lib->vba.WritebackLatency +
+ dml32_CalculateWriteBackDelay(
+ mode_lib->vba.WritebackPixelFormat[j],
+ mode_lib->vba.WritebackHRatio[j],
+ mode_lib->vba.WritebackVRatio[j],
+ mode_lib->vba.WritebackVTaps[j],
+ mode_lib->vba.WritebackDestinationWidth[j],
+ mode_lib->vba.WritebackDestinationHeight[j],
+ mode_lib->vba.WritebackSourceHeight[j],
+ mode_lib->vba.HTotal[k]) / mode_lib->vba.DISPCLK);
+ }
+ }
+ }
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k)
+ for (j = 0; j < mode_lib->vba.NumberOfActiveSurfaces; ++j)
+ if (mode_lib->vba.BlendingAndTiming[k] == j)
+ v->WritebackDelay[mode_lib->vba.VoltageLevel][k] =
+ v->WritebackDelay[mode_lib->vba.VoltageLevel][j];
+
+ v->UrgentLatency = dml32_CalculateUrgentLatency(mode_lib->vba.UrgentLatencyPixelDataOnly,
+ mode_lib->vba.UrgentLatencyPixelMixedWithVMData,
+ mode_lib->vba.UrgentLatencyVMDataOnly,
+ mode_lib->vba.DoUrgentLatencyAdjustment,
+ mode_lib->vba.UrgentLatencyAdjustmentFabricClockComponent,
+ mode_lib->vba.UrgentLatencyAdjustmentFabricClockReference,
+ mode_lib->vba.FabricClock);
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ dml32_CalculateUrgentBurstFactor(mode_lib->vba.UsesMALLForPStateChange[k],
+ v->swath_width_luma_ub[k],
+ v->swath_width_chroma_ub[k],
+ mode_lib->vba.SwathHeightY[k],
+ mode_lib->vba.SwathHeightC[k],
+ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
+ v->UrgentLatency,
+ mode_lib->vba.CursorBufferSize,
+ mode_lib->vba.CursorWidth[k][0],
+ mode_lib->vba.CursorBPP[k][0],
+ mode_lib->vba.VRatio[k],
+ mode_lib->vba.VRatioChroma[k],
+ v->BytePerPixelDETY[k],
+ v->BytePerPixelDETC[k],
+ mode_lib->vba.DETBufferSizeY[k],
+ mode_lib->vba.DETBufferSizeC[k],
+
+ /* output */
+ &v->UrgBurstFactorCursor[k],
+ &v->UrgBurstFactorLuma[k],
+ &v->UrgBurstFactorChroma[k],
+ &v->NoUrgentLatencyHiding[k]);
+
+ v->cursor_bw[k] = mode_lib->vba.NumberOfCursors[k] * mode_lib->vba.CursorWidth[k][0] * mode_lib->vba.CursorBPP[k][0] / 8 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
+ }
+
+ v->NotEnoughDETSwathFillLatencyHiding = dml32_CalculateDETSwathFillLatencyHiding(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBW,
+ v->UrgentLatency,
+ mode_lib->vba.SwathHeightY,
+ mode_lib->vba.SwathHeightC,
+ v->swath_width_luma_ub,
+ v->swath_width_chroma_ub,
+ v->BytePerPixelDETY,
+ v->BytePerPixelDETC,
+ mode_lib->vba.DETBufferSizeY,
+ mode_lib->vba.DETBufferSizeC,
+ mode_lib->vba.DPPPerPlane,
+ mode_lib->vba.HTotal,
+ mode_lib->vba.PixelClock,
+ mode_lib->vba.VRatio,
+ mode_lib->vba.VRatioChroma,
+ mode_lib->vba.UsesMALLForPStateChange,
+ mode_lib->vba.UseUnboundedRequesting);
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ v->MaxVStartupLines[k] = ((mode_lib->vba.Interlace[k] &&
+ !mode_lib->vba.ProgressiveToInterlaceUnitInOPP) ?
+ dml_floor((mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]) / 2.0, 1.0) :
+ mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]) - dml_max(1.0,
+ dml_ceil((double) v->WritebackDelay[mode_lib->vba.VoltageLevel][k]
+ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1));
+
+ // Clamp to max OTG vstartup register limit
+ if (v->MaxVStartupLines[k] > 1023)
+ v->MaxVStartupLines[k] = 1023;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
+ dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, mode_lib->vba.VoltageLevel);
+ dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__,
+ k, v->WritebackDelay[mode_lib->vba.VoltageLevel][k]);
+#endif
+ }
+
+ v->MaximumMaxVStartupLines = 0;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k)
+ v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
+
+ ImmediateFlipRequirementFinal = false;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ ImmediateFlipRequirementFinal = ImmediateFlipRequirementFinal
+ || (mode_lib->vba.ImmediateFlipRequirement[k] == dm_immediate_flip_required);
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ImmediateFlipRequirementFinal = %d\n", __func__, ImmediateFlipRequirementFinal);
+#endif
+ // ModeProgramming will not repeat the schedule calculation using different prefetch mode,
+ //it is just calcualated once with given prefetch mode
+ dml32_CalculateMinAndMaxPrefetchMode(
+ mode_lib->vba.AllowForPStateChangeOrStutterInVBlankFinal,
+ &mode_lib->vba.MinPrefetchMode,
+ &mode_lib->vba.MaxPrefetchMode);
+
+ v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
+
+ iteration = 0;
+ MaxTotalRDBandwidth = 0;
+ NextPrefetchMode = mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb];
+
+ do {
+ MaxTotalRDBandwidth = 0;
+ DestinationLineTimesForPrefetchLessThan2 = false;
+ VRatioPrefetchMoreThanMax = false;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, mode_lib->vba.VStartupLines);
+#endif
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ /* NOTE PerfetchMode variable is invalid in DAL as per the input received.
+ * Hence the direction is to use PrefetchModePerState.
+ */
+ TWait = dml32_CalculateTWait(
+ mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
+ mode_lib->vba.UsesMALLForPStateChange[k],
+ mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ mode_lib->vba.DRRDisplay[k],
+ mode_lib->vba.DRAMClockChangeLatency,
+ mode_lib->vba.FCLKChangeLatency, v->UrgentLatency,
+ mode_lib->vba.SREnterPlusExitTime);
+
+ memset(&v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe, 0, sizeof(DmlPipe));
+
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.Dppclk = mode_lib->vba.DPPCLK[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.Dispclk = mode_lib->vba.DISPCLK;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.PixelClock = mode_lib->vba.PixelClock[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.DCFClkDeepSleep = v->DCFCLKDeepSleep;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.DPPPerSurface = mode_lib->vba.DPPPerPlane[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.SourceRotation = mode_lib->vba.SourceRotation[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.InterlaceEnable = mode_lib->vba.Interlace[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.HTotal = mode_lib->vba.HTotal[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.HActive = mode_lib->vba.HActive[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.DCCEnable = mode_lib->vba.DCCEnable[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ODMMode = mode_lib->vba.ODMCombineEnabled[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelY = v->BytePerPixelY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelC = v->BytePerPixelC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP;
+ v->ErrorResult[k] = dml32_CalculatePrefetchSchedule(
+ v,
+ k,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor,
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe,
+ v->DSCDelay[k],
+ (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
+ dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
+ v->MaxVStartupLines[k],
+ v->UrgentLatency,
+ v->UrgentExtraLatency,
+ v->TCalc,
+ v->PDEAndMetaPTEBytesFrame[k],
+ v->MetaRowByte[k],
+ v->PixelPTEBytesPerRow[k],
+ v->PrefetchSourceLinesY[k],
+ v->SwathWidthY[k],
+ v->VInitPreFillY[k],
+ v->MaxNumSwathY[k],
+ v->PrefetchSourceLinesC[k],
+ v->SwathWidthC[k],
+ v->VInitPreFillC[k],
+ v->MaxNumSwathC[k],
+ v->swath_width_luma_ub[k],
+ v->swath_width_chroma_ub[k],
+ v->SwathHeightY[k],
+ v->SwathHeightC[k],
+ TWait,
+ (v->DRAMSpeedPerState[mode_lib->vba.VoltageLevel] <= MEM_STROBE_FREQ_MHZ ||
+ v->DCFCLKPerState[mode_lib->vba.VoltageLevel] <= DCFCLK_FREQ_EXTRA_PREFETCH_REQ_MHZ) ?
+ mode_lib->vba.ip.min_prefetch_in_strobe_us : 0,
+ mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] > 0 || mode_lib->vba.DRAMClockChangeRequirementFinal == false,
+
+ /* Output */
+ &v->DSTXAfterScaler[k],
+ &v->DSTYAfterScaler[k],
+ &v->DestinationLinesForPrefetch[k],
+ &v->PrefetchBandwidth[k],
+ &v->DestinationLinesToRequestVMInVBlank[k],
+ &v->DestinationLinesToRequestRowInVBlank[k],
+ &v->VRatioPrefetchY[k],
+ &v->VRatioPrefetchC[k],
+ &v->RequiredPrefetchPixDataBWLuma[k],
+ &v->RequiredPrefetchPixDataBWChroma[k],
+ &v->NotEnoughTimeForDynamicMetadata[k],
+ &v->Tno_bw[k], &v->prefetch_vmrow_bw[k],
+ &v->Tdmdl_vm[k],
+ &v->Tdmdl[k],
+ &v->TSetup[k],
+ &v->VUpdateOffsetPix[k],
+ &v->VUpdateWidthPix[k],
+ &v->VReadyOffsetPix[k]);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d Prefetch calculation errResult=%0d\n",
+ __func__, k, mode_lib->vba.ErrorResult[k]);
+#endif
+ v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ dml32_CalculateUrgentBurstFactor(mode_lib->vba.UsesMALLForPStateChange[k],
+ v->swath_width_luma_ub[k],
+ v->swath_width_chroma_ub[k],
+ mode_lib->vba.SwathHeightY[k],
+ mode_lib->vba.SwathHeightC[k],
+ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
+ v->UrgentLatency,
+ mode_lib->vba.CursorBufferSize,
+ mode_lib->vba.CursorWidth[k][0],
+ mode_lib->vba.CursorBPP[k][0],
+ v->VRatioPrefetchY[k],
+ v->VRatioPrefetchC[k],
+ v->BytePerPixelDETY[k],
+ v->BytePerPixelDETC[k],
+ mode_lib->vba.DETBufferSizeY[k],
+ mode_lib->vba.DETBufferSizeC[k],
+ /* Output */
+ &v->UrgBurstFactorCursorPre[k],
+ &v->UrgBurstFactorLumaPre[k],
+ &v->UrgBurstFactorChromaPre[k],
+ &v->NoUrgentLatencyHidingPre[k]);
+
+ v->cursor_bw_pre[k] = mode_lib->vba.NumberOfCursors[k] * mode_lib->vba.CursorWidth[k][0] * mode_lib->vba.CursorBPP[k][0] /
+ 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * v->VRatioPrefetchY[k];
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d DPPPerSurface=%d\n", __func__, k, mode_lib->vba.DPPPerPlane[k]);
+ dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
+ dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
+ dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k,
+ v->UrgBurstFactorLumaPre[k]);
+ dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k,
+ v->UrgBurstFactorChromaPre[k]);
+
+ dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
+ dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, mode_lib->vba.VRatio[k]);
+
+ dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
+ dml_print("DML::%s: k=%0d ReadBandwidthSurfaceLuma=%f\n", __func__, k,
+ v->ReadBandwidthSurfaceLuma[k]);
+ dml_print("DML::%s: k=%0d ReadBandwidthSurfaceChroma=%f\n", __func__, k,
+ v->ReadBandwidthSurfaceChroma[k]);
+ dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
+ dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
+ dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
+ dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k,
+ v->RequiredPrefetchPixDataBWLuma[k]);
+ dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k,
+ v->RequiredPrefetchPixDataBWChroma[k]);
+ dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
+ dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k,
+ MaxTotalRDBandwidthNoUrgentBurst);
+#endif
+ if (v->DestinationLinesForPrefetch[k] < 2)
+ DestinationLineTimesForPrefetchLessThan2 = true;
+
+ if (v->VRatioPrefetchY[k] > v->MaxVRatioPre
+ || v->VRatioPrefetchC[k] > v->MaxVRatioPre)
+ VRatioPrefetchMoreThanMax = true;
+
+ //bool DestinationLinesToRequestVMInVBlankEqualOrMoreThan32 = false;
+ //bool DestinationLinesToRequestRowInVBlankEqualOrMoreThan16 = false;
+ //if (v->DestinationLinesToRequestVMInVBlank[k] >= 32) {
+ // DestinationLinesToRequestVMInVBlankEqualOrMoreThan32 = true;
+ //}
+
+ //if (v->DestinationLinesToRequestRowInVBlank[k] >= 16) {
+ // DestinationLinesToRequestRowInVBlankEqualOrMoreThan16 = true;
+ //}
+ }
+
+ v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / mode_lib->vba.ReturnBW;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f\n",
+ __func__, MaxTotalRDBandwidthNoUrgentBurst);
+ dml_print("DML::%s: ReturnBW=%f\n", __func__, mode_lib->vba.ReturnBW);
+ dml_print("DML::%s: FractionOfUrgentBandwidth=%f\n",
+ __func__, mode_lib->vba.FractionOfUrgentBandwidth);
+#endif
+
+ {
+ dml32_CalculatePrefetchBandwithSupport(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBW,
+ v->NoUrgentLatencyHidingPre,
+ v->ReadBandwidthSurfaceLuma,
+ v->ReadBandwidthSurfaceChroma,
+ v->RequiredPrefetchPixDataBWLuma,
+ v->RequiredPrefetchPixDataBWChroma,
+ v->cursor_bw,
+ v->meta_row_bw,
+ v->dpte_row_bw,
+ v->cursor_bw_pre,
+ v->prefetch_vmrow_bw,
+ mode_lib->vba.DPPPerPlane,
+ v->UrgBurstFactorLuma,
+ v->UrgBurstFactorChroma,
+ v->UrgBurstFactorCursor,
+ v->UrgBurstFactorLumaPre,
+ v->UrgBurstFactorChromaPre,
+ v->UrgBurstFactorCursorPre,
+ v->PrefetchBandwidth,
+ v->VRatio,
+ v->MaxVRatioPre,
+
+ /* output */
+ &MaxTotalRDBandwidth,
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0],
+ &v->PrefetchModeSupported);
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k)
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector[k] = 1.0;
+
+ {
+ dml32_CalculatePrefetchBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBW,
+ v->NoUrgentLatencyHidingPre,
+ v->ReadBandwidthSurfaceLuma,
+ v->ReadBandwidthSurfaceChroma,
+ v->RequiredPrefetchPixDataBWLuma,
+ v->RequiredPrefetchPixDataBWChroma,
+ v->cursor_bw,
+ v->meta_row_bw,
+ v->dpte_row_bw,
+ v->cursor_bw_pre,
+ v->prefetch_vmrow_bw,
+ mode_lib->vba.DPPPerPlane,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->PrefetchBandwidth,
+ v->VRatio,
+ v->MaxVRatioPre,
+
+ /* output */
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0],
+ &v->FractionOfUrgentBandwidth,
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean);
+ }
+
+ if (VRatioPrefetchMoreThanMax != false || DestinationLineTimesForPrefetchLessThan2 != false) {
+ v->PrefetchModeSupported = false;
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k]) {
+ v->PrefetchModeSupported = false;
+ }
+ }
+
+ if (v->PrefetchModeSupported == true && mode_lib->vba.ImmediateFlipSupport == true) {
+ mode_lib->vba.BandwidthAvailableForImmediateFlip = dml32_CalculateBandwidthAvailableForImmediateFlip(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBW,
+ v->ReadBandwidthSurfaceLuma,
+ v->ReadBandwidthSurfaceChroma,
+ v->RequiredPrefetchPixDataBWLuma,
+ v->RequiredPrefetchPixDataBWChroma,
+ v->cursor_bw,
+ v->cursor_bw_pre,
+ mode_lib->vba.DPPPerPlane,
+ v->UrgBurstFactorLuma,
+ v->UrgBurstFactorChroma,
+ v->UrgBurstFactorCursor,
+ v->UrgBurstFactorLumaPre,
+ v->UrgBurstFactorChromaPre,
+ v->UrgBurstFactorCursorPre);
+
+ mode_lib->vba.TotImmediateFlipBytes = 0;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
+ mode_lib->vba.TotImmediateFlipBytes = mode_lib->vba.TotImmediateFlipBytes
+ + mode_lib->vba.DPPPerPlane[k]
+ * (v->PDEAndMetaPTEBytesFrame[k]
+ + v->MetaRowByte[k]);
+ if (v->use_one_row_for_frame_flip[k][0][0]) {
+ mode_lib->vba.TotImmediateFlipBytes =
+ mode_lib->vba.TotImmediateFlipBytes
+ + 2 * v->PixelPTEBytesPerRow[k];
+ } else {
+ mode_lib->vba.TotImmediateFlipBytes =
+ mode_lib->vba.TotImmediateFlipBytes
+ + v->PixelPTEBytesPerRow[k];
+ }
+ }
+ }
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ dml32_CalculateFlipSchedule(v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor,
+ v->UrgentExtraLatency,
+ v->UrgentLatency,
+ mode_lib->vba.GPUVMMaxPageTableLevels,
+ mode_lib->vba.HostVMEnable,
+ mode_lib->vba.HostVMMaxNonCachedPageTableLevels,
+ mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.HostVMMinPageSize,
+ v->PDEAndMetaPTEBytesFrame[k],
+ v->MetaRowByte[k],
+ v->PixelPTEBytesPerRow[k],
+ mode_lib->vba.BandwidthAvailableForImmediateFlip,
+ mode_lib->vba.TotImmediateFlipBytes,
+ mode_lib->vba.SourcePixelFormat[k],
+ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
+ mode_lib->vba.VRatio[k],
+ mode_lib->vba.VRatioChroma[k],
+ v->Tno_bw[k],
+ mode_lib->vba.DCCEnable[k],
+ v->dpte_row_height[k],
+ v->meta_row_height[k],
+ v->dpte_row_height_chroma[k],
+ v->meta_row_height_chroma[k],
+ v->Use_One_Row_For_Frame_Flip[k],
+
+ /* Output */
+ &v->DestinationLinesToRequestVMInImmediateFlip[k],
+ &v->DestinationLinesToRequestRowInImmediateFlip[k],
+ &v->final_flip_bw[k],
+ &v->ImmediateFlipSupportedForPipe[k]);
+ }
+
+ {
+ dml32_CalculateImmediateFlipBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBW,
+ mode_lib->vba.ImmediateFlipRequirement,
+ v->final_flip_bw,
+ v->ReadBandwidthSurfaceLuma,
+ v->ReadBandwidthSurfaceChroma,
+ v->RequiredPrefetchPixDataBWLuma,
+ v->RequiredPrefetchPixDataBWChroma,
+ v->cursor_bw,
+ v->meta_row_bw,
+ v->dpte_row_bw,
+ v->cursor_bw_pre,
+ v->prefetch_vmrow_bw,
+ mode_lib->vba.DPPPerPlane,
+ v->UrgBurstFactorLuma,
+ v->UrgBurstFactorChroma,
+ v->UrgBurstFactorCursor,
+ v->UrgBurstFactorLumaPre,
+ v->UrgBurstFactorChromaPre,
+ v->UrgBurstFactorCursorPre,
+
+ /* output */
+ &v->total_dcn_read_bw_with_flip, // Single *TotalBandwidth
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0], // Single *FractionOfUrgentBandwidth
+ &v->ImmediateFlipSupported); // Boolean *ImmediateFlipBandwidthSupport
+
+ dml32_CalculateImmediateFlipBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBW,
+ mode_lib->vba.ImmediateFlipRequirement,
+ v->final_flip_bw,
+ v->ReadBandwidthSurfaceLuma,
+ v->ReadBandwidthSurfaceChroma,
+ v->RequiredPrefetchPixDataBWLuma,
+ v->RequiredPrefetchPixDataBWChroma,
+ v->cursor_bw,
+ v->meta_row_bw,
+ v->dpte_row_bw,
+ v->cursor_bw_pre,
+ v->prefetch_vmrow_bw,
+ mode_lib->vba.DPPPerPlane,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+
+ /* output */
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[1], // Single *TotalBandwidth
+ &v->FractionOfUrgentBandwidthImmediateFlip, // Single *FractionOfUrgentBandwidth
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean); // Boolean *ImmediateFlipBandwidthSupport
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.ImmediateFlipRequirement[k] != dm_immediate_flip_not_required && v->ImmediateFlipSupportedForPipe[k] == false) {
+ v->ImmediateFlipSupported = false;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k);
+#endif
+ }
+ }
+ } else {
+ v->ImmediateFlipSupported = false;
+ }
+
+ /* consider flip support is okay if the flip bw is ok or (when user does't require a iflip and there is no host vm) */
+ v->PrefetchAndImmediateFlipSupported = (v->PrefetchModeSupported == true &&
+ ((!mode_lib->vba.ImmediateFlipSupport && !mode_lib->vba.HostVMEnable && !ImmediateFlipRequirementFinal) ||
+ v->ImmediateFlipSupported)) ? true : false;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PrefetchModeSupported = %d\n", __func__, locals->PrefetchModeSupported);
+ for (uint k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k)
+ dml_print("DML::%s: ImmediateFlipRequirement[%d] = %d\n", __func__, k, mode_lib->vba.ImmediateFlipRequirement[k] == dm_immediate_flip_required);
+ dml_print("DML::%s: ImmediateFlipSupported = %d\n", __func__, locals->ImmediateFlipSupported);
+ dml_print("DML::%s: ImmediateFlipSupport = %d\n", __func__, mode_lib->vba.ImmediateFlipSupport);
+ dml_print("DML::%s: HostVMEnable = %d\n", __func__, mode_lib->vba.HostVMEnable);
+ dml_print("DML::%s: PrefetchAndImmediateFlipSupported = %d\n", __func__, locals->PrefetchAndImmediateFlipSupported);
+ dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup=%d\n", __func__, locals->VStartupLines, locals->MaximumMaxVStartupLines);
+#endif
+
+ v->VStartupLines = v->VStartupLines + 1;
+
+ if (v->VStartupLines > v->MaximumMaxVStartupLines) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Vstartup exceeds max vstartup, exiting loop\n", __func__);
+#endif
+ break; // VBA_DELTA: Implementation divergence! Gabe is *still* iterating across prefetch modes which we don't care to do
+ }
+ iteration++;
+ if (iteration > 2500) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: too many errors, exit now\n", __func__);
+ assert(0);
+#endif
+ }
+ } while (!(v->PrefetchAndImmediateFlipSupported || NextPrefetchMode > mode_lib->vba.MaxPrefetchMode));
+
+
+ if (v->VStartupLines <= v->MaximumMaxVStartupLines) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Good, Prefetch and flip scheduling found solution at VStartupLines=%d\n", __func__, locals->VStartupLines-1);
+#endif
+ }
+
+
+ //Watermarks and NB P-State/DRAM Clock Change Support
+ {
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.UrgentLatency = v->UrgentLatency;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.ExtraLatency = v->UrgentExtraLatency;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.WritebackLatency = mode_lib->vba.WritebackLatency;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.DRAMClockChangeLatency = mode_lib->vba.DRAMClockChangeLatency;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.FCLKChangeLatency = mode_lib->vba.FCLKChangeLatency;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SRExitTime = mode_lib->vba.SRExitTime;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SREnterPlusExitTime = mode_lib->vba.SREnterPlusExitTime;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SRExitZ8Time = mode_lib->vba.SRExitZ8Time;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->vba.SREnterPlusExitZ8Time;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.USRRetrainingLatency = mode_lib->vba.USRRetrainingLatency;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SMNLatency = mode_lib->vba.SMNLatency;
+
+ dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
+ v,
+ v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb],
+ v->DCFCLK,
+ v->ReturnBW,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters,
+ v->SOCCLK,
+ v->DCFCLKDeepSleep,
+ v->DETBufferSizeY,
+ v->DETBufferSizeC,
+ v->SwathHeightY,
+ v->SwathHeightC,
+ v->SwathWidthY,
+ v->SwathWidthC,
+ v->DPPPerPlane,
+ v->BytePerPixelDETY,
+ v->BytePerPixelDETC,
+ v->DSTXAfterScaler,
+ v->DSTYAfterScaler,
+ v->UnboundedRequestEnabled,
+ v->CompressedBufferSizeInkByte,
+
+ /* Output */
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_dramchange_support,
+ v->MaxActiveDRAMClockChangeLatencySupported,
+ v->SubViewportLinesNeededInMALL,
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_fclkchange_support,
+ &v->MinActiveFCLKChangeLatencySupported,
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_USRRetrainingSupport,
+ mode_lib->vba.ActiveDRAMClockChangeLatencyMargin);
+
+ /* DCN32 has a new struct Watermarks (typedef) which is used to store
+ * calculated WM values. Copy over values from struct to vba varaibles
+ * to ensure that the DCN32 getters return the correct value.
+ */
+ v->UrgentWatermark = v->Watermark.UrgentWatermark;
+ v->WritebackUrgentWatermark = v->Watermark.WritebackUrgentWatermark;
+ v->DRAMClockChangeWatermark = v->Watermark.DRAMClockChangeWatermark;
+ v->WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark;
+ v->StutterExitWatermark = v->Watermark.StutterExitWatermark;
+ v->StutterEnterPlusExitWatermark = v->Watermark.StutterEnterPlusExitWatermark;
+ v->Z8StutterExitWatermark = v->Watermark.Z8StutterExitWatermark;
+ v->Z8StutterEnterPlusExitWatermark = v->Watermark.Z8StutterEnterPlusExitWatermark;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.WritebackEnable[k] == true) {
+ v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(0,
+ v->VStartup[k] * mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]
+ - v->Watermark.WritebackDRAMClockChangeWatermark);
+ v->WritebackAllowFCLKChangeEndPosition[k] = dml_max(0,
+ v->VStartup[k] * mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]
+ - v->Watermark.WritebackFCLKChangeWatermark);
+ } else {
+ v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
+ v->WritebackAllowFCLKChangeEndPosition[k] = 0;
+ }
+ }
+ }
+
+ //Display Pipeline Delivery Time in Prefetch, Groups
+ dml32_CalculatePixelDeliveryTimes(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.VRatio,
+ mode_lib->vba.VRatioChroma,
+ v->VRatioPrefetchY,
+ v->VRatioPrefetchC,
+ v->swath_width_luma_ub,
+ v->swath_width_chroma_ub,
+ mode_lib->vba.DPPPerPlane,
+ mode_lib->vba.HRatio,
+ mode_lib->vba.HRatioChroma,
+ mode_lib->vba.PixelClock,
+ v->PSCL_THROUGHPUT_LUMA,
+ v->PSCL_THROUGHPUT_CHROMA,
+ mode_lib->vba.DPPCLK,
+ v->BytePerPixelC,
+ mode_lib->vba.SourceRotation,
+ mode_lib->vba.NumberOfCursors,
+ mode_lib->vba.CursorWidth,
+ mode_lib->vba.CursorBPP,
+ v->BlockWidth256BytesY,
+ v->BlockHeight256BytesY,
+ v->BlockWidth256BytesC,
+ v->BlockHeight256BytesC,
+
+ /* Output */
+ v->DisplayPipeLineDeliveryTimeLuma,
+ v->DisplayPipeLineDeliveryTimeChroma,
+ v->DisplayPipeLineDeliveryTimeLumaPrefetch,
+ v->DisplayPipeLineDeliveryTimeChromaPrefetch,
+ v->DisplayPipeRequestDeliveryTimeLuma,
+ v->DisplayPipeRequestDeliveryTimeChroma,
+ v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
+ v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
+ v->CursorRequestDeliveryTime,
+ v->CursorRequestDeliveryTimePrefetch);
+
+ dml32_CalculateMetaAndPTETimes(v->Use_One_Row_For_Frame,
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.MetaChunkSize,
+ mode_lib->vba.MinMetaChunkSizeBytes,
+ mode_lib->vba.HTotal,
+ mode_lib->vba.VRatio,
+ mode_lib->vba.VRatioChroma,
+ v->DestinationLinesToRequestRowInVBlank,
+ v->DestinationLinesToRequestRowInImmediateFlip,
+ mode_lib->vba.DCCEnable,
+ mode_lib->vba.PixelClock,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ mode_lib->vba.SourceRotation,
+ v->dpte_row_height,
+ v->dpte_row_height_chroma,
+ v->meta_row_width,
+ v->meta_row_width_chroma,
+ v->meta_row_height,
+ v->meta_row_height_chroma,
+ v->meta_req_width,
+ v->meta_req_width_chroma,
+ v->meta_req_height,
+ v->meta_req_height_chroma,
+ v->dpte_group_bytes,
+ v->PTERequestSizeY,
+ v->PTERequestSizeC,
+ v->PixelPTEReqWidthY,
+ v->PixelPTEReqHeightY,
+ v->PixelPTEReqWidthC,
+ v->PixelPTEReqHeightC,
+ v->dpte_row_width_luma_ub,
+ v->dpte_row_width_chroma_ub,
+
+ /* Output */
+ v->DST_Y_PER_PTE_ROW_NOM_L,
+ v->DST_Y_PER_PTE_ROW_NOM_C,
+ v->DST_Y_PER_META_ROW_NOM_L,
+ v->DST_Y_PER_META_ROW_NOM_C,
+ v->TimePerMetaChunkNominal,
+ v->TimePerChromaMetaChunkNominal,
+ v->TimePerMetaChunkVBlank,
+ v->TimePerChromaMetaChunkVBlank,
+ v->TimePerMetaChunkFlip,
+ v->TimePerChromaMetaChunkFlip,
+ v->time_per_pte_group_nom_luma,
+ v->time_per_pte_group_vblank_luma,
+ v->time_per_pte_group_flip_luma,
+ v->time_per_pte_group_nom_chroma,
+ v->time_per_pte_group_vblank_chroma,
+ v->time_per_pte_group_flip_chroma);
+
+ dml32_CalculateVMGroupAndRequestTimes(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.GPUVMMaxPageTableLevels,
+ mode_lib->vba.HTotal,
+ v->BytePerPixelC,
+ v->DestinationLinesToRequestVMInVBlank,
+ v->DestinationLinesToRequestVMInImmediateFlip,
+ mode_lib->vba.DCCEnable,
+ mode_lib->vba.PixelClock,
+ v->dpte_row_width_luma_ub,
+ v->dpte_row_width_chroma_ub,
+ v->vm_group_bytes,
+ v->dpde0_bytes_per_frame_ub_l,
+ v->dpde0_bytes_per_frame_ub_c,
+ v->meta_pte_bytes_per_frame_ub_l,
+ v->meta_pte_bytes_per_frame_ub_c,
+
+ /* Output */
+ v->TimePerVMGroupVBlank,
+ v->TimePerVMGroupFlip,
+ v->TimePerVMRequestVBlank,
+ v->TimePerVMRequestFlip);
+
+ // Min TTUVBlank
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) {
+ v->MinTTUVBlank[k] = dml_max4(v->Watermark.DRAMClockChangeWatermark,
+ v->Watermark.FCLKChangeWatermark, v->Watermark.StutterEnterPlusExitWatermark,
+ v->Watermark.UrgentWatermark);
+ } else if (mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb]
+ == 1) {
+ v->MinTTUVBlank[k] = dml_max3(v->Watermark.FCLKChangeWatermark,
+ v->Watermark.StutterEnterPlusExitWatermark, v->Watermark.UrgentWatermark);
+ } else if (mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb]
+ == 2) {
+ v->MinTTUVBlank[k] = dml_max(v->Watermark.StutterEnterPlusExitWatermark,
+ v->Watermark.UrgentWatermark);
+ } else {
+ v->MinTTUVBlank[k] = v->Watermark.UrgentWatermark;
+ }
+ if (!mode_lib->vba.DynamicMetadataEnable[k])
+ v->MinTTUVBlank[k] = mode_lib->vba.TCalc + v->MinTTUVBlank[k];
+ }
+
+ // DCC Configuration
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Calculate DCC configuration for surface k=%d\n", __func__, k);
+#endif
+ dml32_CalculateDCCConfiguration(
+ mode_lib->vba.DCCEnable[k],
+ mode_lib->vba.DCCProgrammingAssumesScanDirectionUnknownFinal,
+ mode_lib->vba.SourcePixelFormat[k], mode_lib->vba.SurfaceWidthY[k],
+ mode_lib->vba.SurfaceWidthC[k],
+ mode_lib->vba.SurfaceHeightY[k],
+ mode_lib->vba.SurfaceHeightC[k],
+ mode_lib->vba.nomDETInKByte,
+ v->BlockHeight256BytesY[k],
+ v->BlockHeight256BytesC[k],
+ mode_lib->vba.SurfaceTiling[k],
+ v->BytePerPixelY[k],
+ v->BytePerPixelC[k],
+ v->BytePerPixelDETY[k],
+ v->BytePerPixelDETC[k],
+ (enum dm_rotation_angle) mode_lib->vba.SourceScan[k],
+ /* Output */
+ &v->DCCYMaxUncompressedBlock[k],
+ &v->DCCCMaxUncompressedBlock[k],
+ &v->DCCYMaxCompressedBlock[k],
+ &v->DCCCMaxCompressedBlock[k],
+ &v->DCCYIndependentBlock[k],
+ &v->DCCCIndependentBlock[k]);
+ }
+
+ // VStartup Adjustment
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ bool isInterlaceTiming;
+ double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * mode_lib->vba.HTotal[k]
+ / mode_lib->vba.PixelClock[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k,
+ v->MinTTUVBlank[k]);
+#endif
+
+ v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
+ dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
+ dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
+ dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
+#endif
+
+ v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
+ if (mode_lib->vba.DynamicMetadataEnable[k] && mode_lib->vba.DynamicMetadataVMEnabled)
+ v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
+
+ isInterlaceTiming = (mode_lib->vba.Interlace[k] &&
+ !mode_lib->vba.ProgressiveToInterlaceUnitInOPP);
+
+ v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((mode_lib->vba.VTotal[k] -
+ mode_lib->vba.VFrontPorch[k]) / 2.0, 1.0) :
+ mode_lib->vba.VTotal[k]) - mode_lib->vba.VFrontPorch[k])
+ + dml_max(1.0,
+ dml_ceil(v->WritebackDelay[mode_lib->vba.VoltageLevel][k]
+ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0))
+ + dml_floor(4.0 * v->TSetup[k] / (mode_lib->vba.HTotal[k]
+ / mode_lib->vba.PixelClock[k]), 1.0) / 4.0;
+
+ v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
+
+ if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k])
+ / mode_lib->vba.HTotal[k]) <= (isInterlaceTiming ? dml_floor((mode_lib->vba.VTotal[k]
+ - mode_lib->vba.VActive[k] - mode_lib->vba.VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
+ (int) (mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]
+ - mode_lib->vba.VFrontPorch[k] - v->VStartup[k]))) {
+ v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
+ } else {
+ v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
+ dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
+ dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
+ dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
+ dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, mode_lib->vba.HTotal[k]);
+ dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, mode_lib->vba.VTotal[k]);
+ dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, mode_lib->vba.VActive[k]);
+ dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, mode_lib->vba.VFrontPorch[k]);
+ dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
+ dml_print("DML::%s: k=%d, TSetup = %f\n", __func__, k, v->TSetup[k]);
+ dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
+ dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k,
+ v->VREADY_AT_OR_AFTER_VSYNC[k]);
+#endif
+ }
+
+ {
+ //Maximum Bandwidth Used
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.WritebackEnable[k] == true
+ && mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) {
+ WRBandwidth = mode_lib->vba.WritebackDestinationWidth[k]
+ * mode_lib->vba.WritebackDestinationHeight[k]
+ / (mode_lib->vba.HTotal[k] * mode_lib->vba.WritebackSourceHeight[k]
+ / mode_lib->vba.PixelClock[k]) * 4;
+ } else if (mode_lib->vba.WritebackEnable[k] == true) {
+ WRBandwidth = mode_lib->vba.WritebackDestinationWidth[k]
+ * mode_lib->vba.WritebackDestinationHeight[k]
+ / (mode_lib->vba.HTotal[k] * mode_lib->vba.WritebackSourceHeight[k]
+ / mode_lib->vba.PixelClock[k]) * 8;
+ }
+ TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
+ }
+
+ v->TotalDataReadBandwidth = 0;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthSurfaceLuma[k]
+ + v->ReadBandwidthSurfaceChroma[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, TotalDataReadBandwidth = %f\n",
+ __func__, k, v->TotalDataReadBandwidth);
+ dml_print("DML::%s: k=%d, ReadBandwidthSurfaceLuma = %f\n",
+ __func__, k, v->ReadBandwidthSurfaceLuma[k]);
+ dml_print("DML::%s: k=%d, ReadBandwidthSurfaceChroma = %f\n",
+ __func__, k, v->ReadBandwidthSurfaceChroma[k]);
+#endif
+ }
+ }
+
+ // Stutter Efficiency
+ dml32_CalculateStutterEfficiency(v->CompressedBufferSizeInkByte,
+ mode_lib->vba.UsesMALLForPStateChange,
+ v->UnboundedRequestEnabled,
+ mode_lib->vba.MetaFIFOSizeInKEntries,
+ mode_lib->vba.ZeroSizeBufferEntries,
+ mode_lib->vba.PixelChunkSizeInKByte,
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ROBBufferSizeInKByte,
+ v->TotalDataReadBandwidth,
+ mode_lib->vba.DCFCLK,
+ mode_lib->vba.ReturnBW,
+ v->CompbufReservedSpace64B,
+ v->CompbufReservedSpaceZs,
+ mode_lib->vba.SRExitTime,
+ mode_lib->vba.SRExitZ8Time,
+ mode_lib->vba.SynchronizeTimingsFinal,
+ mode_lib->vba.BlendingAndTiming,
+ v->Watermark.StutterEnterPlusExitWatermark,
+ v->Watermark.Z8StutterEnterPlusExitWatermark,
+ mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
+ mode_lib->vba.Interlace,
+ v->MinTTUVBlank, mode_lib->vba.DPPPerPlane,
+ mode_lib->vba.DETBufferSizeY,
+ v->BytePerPixelY,
+ v->BytePerPixelDETY,
+ v->SwathWidthY,
+ mode_lib->vba.SwathHeightY,
+ mode_lib->vba.SwathHeightC,
+ mode_lib->vba.DCCRateLuma,
+ mode_lib->vba.DCCRateChroma,
+ mode_lib->vba.DCCFractionOfZeroSizeRequestsLuma,
+ mode_lib->vba.DCCFractionOfZeroSizeRequestsChroma,
+ mode_lib->vba.HTotal, mode_lib->vba.VTotal,
+ mode_lib->vba.PixelClock,
+ mode_lib->vba.VRatio,
+ mode_lib->vba.SourceRotation,
+ v->BlockHeight256BytesY,
+ v->BlockWidth256BytesY,
+ v->BlockHeight256BytesC,
+ v->BlockWidth256BytesC,
+ v->DCCYMaxUncompressedBlock,
+ v->DCCCMaxUncompressedBlock,
+ mode_lib->vba.VActive,
+ mode_lib->vba.DCCEnable,
+ mode_lib->vba.WritebackEnable,
+ v->ReadBandwidthSurfaceLuma,
+ v->ReadBandwidthSurfaceChroma,
+ v->meta_row_bw,
+ v->dpte_row_bw,
+ /* Output */
+ &v->StutterEfficiencyNotIncludingVBlank,
+ &v->StutterEfficiency,
+ &v->NumberOfStutterBurstsPerFrame,
+ &v->Z8StutterEfficiencyNotIncludingVBlank,
+ &v->Z8StutterEfficiency,
+ &v->Z8NumberOfStutterBurstsPerFrame,
+ &v->StutterPeriod,
+ &v->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
+
+#ifdef __DML_VBA_ALLOW_DELTA__
+ {
+ unsigned int dummy_integer[1];
+
+ // Calculate z8 stutter eff assuming 0 reserved space
+ dml32_CalculateStutterEfficiency(v->CompressedBufferSizeInkByte,
+ mode_lib->vba.UsesMALLForPStateChange,
+ v->UnboundedRequestEnabled,
+ mode_lib->vba.MetaFIFOSizeInKEntries,
+ mode_lib->vba.ZeroSizeBufferEntries,
+ mode_lib->vba.PixelChunkSizeInKByte,
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ROBBufferSizeInKByte,
+ v->TotalDataReadBandwidth,
+ mode_lib->vba.DCFCLK,
+ mode_lib->vba.ReturnBW,
+ 0, //CompbufReservedSpace64B,
+ 0, //CompbufReservedSpaceZs,
+ mode_lib->vba.SRExitTime,
+ mode_lib->vba.SRExitZ8Time,
+ mode_lib->vba.SynchronizeTimingsFinal,
+ mode_lib->vba.BlendingAndTiming,
+ v->Watermark.StutterEnterPlusExitWatermark,
+ v->Watermark.Z8StutterEnterPlusExitWatermark,
+ mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
+ mode_lib->vba.Interlace,
+ v->MinTTUVBlank,
+ mode_lib->vba.DPPPerPlane,
+ mode_lib->vba.DETBufferSizeY,
+ v->BytePerPixelY, v->BytePerPixelDETY,
+ v->SwathWidthY, mode_lib->vba.SwathHeightY,
+ mode_lib->vba.SwathHeightC,
+ mode_lib->vba.DCCRateLuma,
+ mode_lib->vba.DCCRateChroma,
+ mode_lib->vba.DCCFractionOfZeroSizeRequestsLuma,
+ mode_lib->vba.DCCFractionOfZeroSizeRequestsChroma,
+ mode_lib->vba.HTotal,
+ mode_lib->vba.VTotal,
+ mode_lib->vba.PixelClock,
+ mode_lib->vba.VRatio,
+ mode_lib->vba.SourceRotation,
+ v->BlockHeight256BytesY,
+ v->BlockWidth256BytesY,
+ v->BlockHeight256BytesC,
+ v->BlockWidth256BytesC,
+ v->DCCYMaxUncompressedBlock,
+ v->DCCCMaxUncompressedBlock,
+ mode_lib->vba.VActive,
+ mode_lib->vba.DCCEnable,
+ mode_lib->vba.WritebackEnable,
+ v->ReadBandwidthSurfaceLuma,
+ v->ReadBandwidthSurfaceChroma,
+ v->meta_row_bw, v->dpte_row_bw,
+
+ /* Output */
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0],
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[1],
+ &dummy_integer[0],
+ &v->Z8StutterEfficiencyNotIncludingVBlankBestCase,
+ &v->Z8StutterEfficiencyBestCase,
+ &v->Z8NumberOfStutterBurstsPerFrameBestCase,
+ &v->StutterPeriodBestCase,
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean);
+ }
+#else
+ v->Z8StutterEfficiencyNotIncludingVBlankBestCase = v->Z8StutterEfficiencyNotIncludingVBlank;
+ v->Z8StutterEfficiencyBestCase = v->Z8StutterEfficiency;
+ v->Z8NumberOfStutterBurstsPerFrameBestCase = v->Z8NumberOfStutterBurstsPerFrame;
+ v->StutterPeriodBestCase = v->StutterPeriod;
+#endif
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: --- END ---\n", __func__);
+#endif
+}
+
+static void mode_support_configuration(struct vba_vars_st *v,
+ struct display_mode_lib *mode_lib)
+{
+ int i, j, start_state;
+
+ if (mode_lib->validate_max_state)
+ start_state = v->soc.num_states - 1;
+ else
+ start_state = 0;
+
+ for (i = v->soc.num_states - 1; i >= start_state; i--) {
+ for (j = 0; j < 2; j++) {
+ if (mode_lib->vba.ScaleRatioAndTapsSupport == true
+ && mode_lib->vba.SourceFormatPixelAndScanSupport == true
+ && mode_lib->vba.ViewportSizeSupport[i][j] == true
+ && !mode_lib->vba.LinkRateDoesNotMatchDPVersion
+ && !mode_lib->vba.LinkRateForMultistreamNotIndicated
+ && !mode_lib->vba.BPPForMultistreamNotIndicated
+ && !mode_lib->vba.MultistreamWithHDMIOreDP
+ && !mode_lib->vba.ExceededMultistreamSlots[i]
+ && !mode_lib->vba.MSOOrODMSplitWithNonDPLink
+ && !mode_lib->vba.NotEnoughLanesForMSO
+ && mode_lib->vba.LinkCapacitySupport[i] == true && !mode_lib->vba.P2IWith420
+ //&& !mode_lib->vba.DSCOnlyIfNecessaryWithBPP
+ && !mode_lib->vba.DSC422NativeNotSupported
+ && !mode_lib->vba.MPCCombineMethodIncompatible
+ && mode_lib->vba.ODMCombine2To1SupportCheckOK[i] == true
+ && mode_lib->vba.ODMCombine4To1SupportCheckOK[i] == true
+ && mode_lib->vba.NotEnoughDSCUnits[i] == false
+ && !mode_lib->vba.NotEnoughDSCSlices[i]
+ && !mode_lib->vba.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe
+ && !mode_lib->vba.InvalidCombinationOfMALLUseForPStateAndStaticScreen
+ && mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] == false
+ && mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i]
+ && mode_lib->vba.DTBCLKRequiredMoreThanSupported[i] == false
+ && !mode_lib->vba.InvalidCombinationOfMALLUseForPState
+ && !mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified
+ && mode_lib->vba.ROBSupport[i][j] == true
+ && mode_lib->vba.DISPCLK_DPPCLK_Support[i][j] == true
+ && mode_lib->vba.TotalAvailablePipesSupport[i][j] == true
+ && mode_lib->vba.NumberOfOTGSupport == true
+ && mode_lib->vba.NumberOfHDMIFRLSupport == true
+ && mode_lib->vba.EnoughWritebackUnits == true
+ && mode_lib->vba.WritebackLatencySupport == true
+ && mode_lib->vba.WritebackScaleRatioAndTapsSupport == true
+ && mode_lib->vba.CursorSupport == true && mode_lib->vba.PitchSupport == true
+ && mode_lib->vba.ViewportExceedsSurface == false
+ && mode_lib->vba.PrefetchSupported[i][j] == true
+ && mode_lib->vba.VActiveBandwithSupport[i][j] == true
+ && mode_lib->vba.DynamicMetadataSupported[i][j] == true
+ && mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][j] == true
+ && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true
+ && mode_lib->vba.PTEBufferSizeNotExceeded[i][j] == true
+ && mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] == true
+ && mode_lib->vba.NonsupportedDSCInputBPC == false
+ && !mode_lib->vba.ExceededMALLSize
+ && (mode_lib->vba.NotEnoughDETSwathFillLatencyHidingPerState[i][j] == false
+ || i == v->soc.num_states - 1)
+ && ((mode_lib->vba.HostVMEnable == false
+ && !mode_lib->vba.ImmediateFlipRequiredFinal)
+ || mode_lib->vba.ImmediateFlipSupportedForState[i][j])
+ && (!mode_lib->vba.DRAMClockChangeRequirementFinal
+ || i == v->soc.num_states - 1
+ || mode_lib->vba.DRAMClockChangeSupport[i][j] != dm_dram_clock_change_unsupported)
+ && (!mode_lib->vba.FCLKChangeRequirementFinal || i == v->soc.num_states - 1
+ || mode_lib->vba.FCLKChangeSupport[i][j] != dm_fclock_change_unsupported)
+ && (!mode_lib->vba.USRRetrainingRequiredFinal
+ || mode_lib->vba.USRRetrainingSupport[i][j])) {
+ mode_lib->vba.ModeSupport[i][j] = true;
+ } else {
+ mode_lib->vba.ModeSupport[i][j] = false;
+ }
+ }
+ }
+}
+
+void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ int i, j, start_state;
+ unsigned int k, m;
+ unsigned int MaximumMPCCombine;
+ unsigned int NumberOfNonCombinedSurfaceOfMaximumBandwidth;
+ unsigned int TotalSlots;
+ bool CompBufReservedSpaceNeedAdjustment;
+ bool CompBufReservedSpaceNeedAdjustmentSingleDPP;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: called\n", __func__);
+#endif
+
+ /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
+ if (mode_lib->validate_max_state)
+ start_state = v->soc.num_states - 1;
+ else
+ start_state = 0;
+
+ /*Scale Ratio, taps Support Check*/
+
+ mode_lib->vba.ScaleRatioAndTapsSupport = true;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.ScalerEnabled[k] == false
+ && ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64
+ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
+ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
+ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
+ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8
+ && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe
+ && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe_alpha)
+ || mode_lib->vba.HRatio[k] != 1.0 || mode_lib->vba.htaps[k] != 1.0
+ || mode_lib->vba.VRatio[k] != 1.0 || mode_lib->vba.vtaps[k] != 1.0)) {
+ mode_lib->vba.ScaleRatioAndTapsSupport = false;
+ } else if (mode_lib->vba.vtaps[k] < 1.0 || mode_lib->vba.vtaps[k] > 8.0 || mode_lib->vba.htaps[k] < 1.0
+ || mode_lib->vba.htaps[k] > 8.0
+ || (mode_lib->vba.htaps[k] > 1.0 && (mode_lib->vba.htaps[k] % 2) == 1)
+ || mode_lib->vba.HRatio[k] > mode_lib->vba.MaxHSCLRatio
+ || mode_lib->vba.VRatio[k] > mode_lib->vba.MaxVSCLRatio
+ || mode_lib->vba.HRatio[k] > mode_lib->vba.htaps[k]
+ || mode_lib->vba.VRatio[k] > mode_lib->vba.vtaps[k]
+ || (mode_lib->vba.SourcePixelFormat[k] != dm_444_64
+ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
+ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
+ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
+ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8
+ && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe
+ && (mode_lib->vba.VTAPsChroma[k] < 1
+ || mode_lib->vba.VTAPsChroma[k] > 8
+ || mode_lib->vba.HTAPsChroma[k] < 1
+ || mode_lib->vba.HTAPsChroma[k] > 8
+ || (mode_lib->vba.HTAPsChroma[k] > 1
+ && mode_lib->vba.HTAPsChroma[k] % 2
+ == 1)
+ || mode_lib->vba.HRatioChroma[k]
+ > mode_lib->vba.MaxHSCLRatio
+ || mode_lib->vba.VRatioChroma[k]
+ > mode_lib->vba.MaxVSCLRatio
+ || mode_lib->vba.HRatioChroma[k]
+ > mode_lib->vba.HTAPsChroma[k]
+ || mode_lib->vba.VRatioChroma[k]
+ > mode_lib->vba.VTAPsChroma[k]))) {
+ mode_lib->vba.ScaleRatioAndTapsSupport = false;
+ }
+ }
+
+ /*Source Format, Pixel Format and Scan Support Check*/
+ mode_lib->vba.SourceFormatPixelAndScanSupport = true;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear
+ && (!(!IsVertical((enum dm_rotation_angle) mode_lib->vba.SourceScan[k]))
+ || mode_lib->vba.DCCEnable[k] == true)) {
+ mode_lib->vba.SourceFormatPixelAndScanSupport = false;
+ }
+ }
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ dml32_CalculateBytePerPixelAndBlockSizes(
+ mode_lib->vba.SourcePixelFormat[k],
+ mode_lib->vba.SurfaceTiling[k],
+
+ /* Output */
+ &mode_lib->vba.BytePerPixelY[k],
+ &mode_lib->vba.BytePerPixelC[k],
+ &mode_lib->vba.BytePerPixelInDETY[k],
+ &mode_lib->vba.BytePerPixelInDETC[k],
+ &mode_lib->vba.Read256BlockHeightY[k],
+ &mode_lib->vba.Read256BlockHeightC[k],
+ &mode_lib->vba.Read256BlockWidthY[k],
+ &mode_lib->vba.Read256BlockWidthC[k],
+ &mode_lib->vba.MacroTileHeightY[k],
+ &mode_lib->vba.MacroTileHeightC[k],
+ &mode_lib->vba.MacroTileWidthY[k],
+ &mode_lib->vba.MacroTileWidthC[k]);
+ }
+
+ /*Bandwidth Support Check*/
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (!IsVertical(mode_lib->vba.SourceRotation[k])) {
+ v->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportWidth[k];
+ v->SwathWidthCSingleDPP[k] = mode_lib->vba.ViewportWidthChroma[k];
+ } else {
+ v->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportHeight[k];
+ v->SwathWidthCSingleDPP[k] = mode_lib->vba.ViewportHeightChroma[k];
+ }
+ }
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
+ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
+ v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
+ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k]
+ / 2.0;
+ }
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.WritebackEnable[k] == true && mode_lib->vba.WritebackPixelFormat[k] == dm_444_64) {
+ v->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k]
+ * mode_lib->vba.WritebackDestinationHeight[k]
+ / (mode_lib->vba.WritebackSourceHeight[k] * mode_lib->vba.HTotal[k]
+ / mode_lib->vba.PixelClock[k]) * 8.0;
+ } else if (mode_lib->vba.WritebackEnable[k] == true) {
+ v->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k]
+ * mode_lib->vba.WritebackDestinationHeight[k]
+ / (mode_lib->vba.WritebackSourceHeight[k] * mode_lib->vba.HTotal[k]
+ / mode_lib->vba.PixelClock[k]) * 4.0;
+ } else {
+ v->WriteBandwidth[k] = 0.0;
+ }
+ }
+
+ /*Writeback Latency support check*/
+
+ mode_lib->vba.WritebackLatencySupport = true;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.WritebackEnable[k] == true
+ && (v->WriteBandwidth[k]
+ > mode_lib->vba.WritebackInterfaceBufferSize * 1024
+ / mode_lib->vba.WritebackLatency)) {
+ mode_lib->vba.WritebackLatencySupport = false;
+ }
+ }
+
+ /*Writeback Mode Support Check*/
+ mode_lib->vba.EnoughWritebackUnits = true;
+ mode_lib->vba.TotalNumberOfActiveWriteback = 0;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.WritebackEnable[k] == true)
+ mode_lib->vba.TotalNumberOfActiveWriteback = mode_lib->vba.TotalNumberOfActiveWriteback + 1;
+ }
+
+ if (mode_lib->vba.TotalNumberOfActiveWriteback > mode_lib->vba.MaxNumWriteback)
+ mode_lib->vba.EnoughWritebackUnits = false;
+
+ /*Writeback Scale Ratio and Taps Support Check*/
+ mode_lib->vba.WritebackScaleRatioAndTapsSupport = true;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.WritebackEnable[k] == true) {
+ if (mode_lib->vba.WritebackHRatio[k] > mode_lib->vba.WritebackMaxHSCLRatio
+ || mode_lib->vba.WritebackVRatio[k] > mode_lib->vba.WritebackMaxVSCLRatio
+ || mode_lib->vba.WritebackHRatio[k] < mode_lib->vba.WritebackMinHSCLRatio
+ || mode_lib->vba.WritebackVRatio[k] < mode_lib->vba.WritebackMinVSCLRatio
+ || mode_lib->vba.WritebackHTaps[k] > mode_lib->vba.WritebackMaxHSCLTaps
+ || mode_lib->vba.WritebackVTaps[k] > mode_lib->vba.WritebackMaxVSCLTaps
+ || mode_lib->vba.WritebackHRatio[k] > mode_lib->vba.WritebackHTaps[k]
+ || mode_lib->vba.WritebackVRatio[k] > mode_lib->vba.WritebackVTaps[k]
+ || (mode_lib->vba.WritebackHTaps[k] > 2.0
+ && ((mode_lib->vba.WritebackHTaps[k] % 2) == 1))) {
+ mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
+ }
+ if (2.0 * mode_lib->vba.WritebackDestinationWidth[k] * (mode_lib->vba.WritebackVTaps[k] - 1)
+ * 57 > mode_lib->vba.WritebackLineBufferSize) {
+ mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
+ }
+ }
+ }
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(mode_lib->vba.HRatio[k], mode_lib->vba.HRatioChroma[k],
+ mode_lib->vba.VRatio[k], mode_lib->vba.VRatioChroma[k],
+ mode_lib->vba.MaxDCHUBToPSCLThroughput, mode_lib->vba.MaxPSCLToLBThroughput,
+ mode_lib->vba.PixelClock[k], mode_lib->vba.SourcePixelFormat[k],
+ mode_lib->vba.htaps[k], mode_lib->vba.HTAPsChroma[k], mode_lib->vba.vtaps[k],
+ mode_lib->vba.VTAPsChroma[k],
+ /* Output */
+ &mode_lib->vba.PSCL_FACTOR[k], &mode_lib->vba.PSCL_FACTOR_CHROMA[k],
+ &mode_lib->vba.MinDPPCLKUsingSingleDPP[k]);
+ }
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+
+ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma = 8192;
+ } else if (!IsVertical(mode_lib->vba.SourceRotation[k]) && v->BytePerPixelC[k] > 0
+ && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe_alpha) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma = 7680;
+ } else if (IsVertical(mode_lib->vba.SourceRotation[k]) && v->BytePerPixelC[k] > 0
+ && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe_alpha) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma = 4320;
+ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_rgbe_alpha) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma = 3840;
+ } else if (IsVertical(mode_lib->vba.SourceRotation[k]) && v->BytePerPixelY[k] == 8 &&
+ mode_lib->vba.DCCEnable[k] == true) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma = 3072;
+ } else {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma = 6144;
+ }
+
+ if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8 || mode_lib->vba.SourcePixelFormat[k] == dm_420_10
+ || mode_lib->vba.SourcePixelFormat[k] == dm_420_12) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportChroma = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma / 2.0;
+ } else {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportChroma = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma;
+ }
+ v->MaximumSwathWidthInLineBufferLuma = mode_lib->vba.LineBufferSizeFinal
+ * dml_max(mode_lib->vba.HRatio[k], 1.0) / mode_lib->vba.LBBitPerPixel[k]
+ / (mode_lib->vba.vtaps[k] + dml_max(dml_ceil(mode_lib->vba.VRatio[k], 1.0) - 2, 0.0));
+ if (v->BytePerPixelC[k] == 0.0) {
+ v->MaximumSwathWidthInLineBufferChroma = 0;
+ } else {
+ v->MaximumSwathWidthInLineBufferChroma = mode_lib->vba.LineBufferSizeFinal
+ * dml_max(mode_lib->vba.HRatioChroma[k], 1.0) / mode_lib->vba.LBBitPerPixel[k]
+ / (mode_lib->vba.VTAPsChroma[k]
+ + dml_max(dml_ceil(mode_lib->vba.VRatioChroma[k], 1.0) - 2,
+ 0.0));
+ }
+ v->MaximumSwathWidthLuma[k] = dml_min(v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma,
+ v->MaximumSwathWidthInLineBufferLuma);
+ v->MaximumSwathWidthChroma[k] = dml_min(v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportChroma,
+ v->MaximumSwathWidthInLineBufferChroma);
+ }
+
+ dml32_CalculateSwathAndDETConfiguration(
+ mode_lib->vba.DETSizeOverride,
+ mode_lib->vba.UsesMALLForPStateChange,
+ mode_lib->vba.ConfigReturnBufferSizeInKByte,
+ mode_lib->vba.MaxTotalDETInKByte,
+ mode_lib->vba.MinCompressedBufferSizeInKByte,
+ 1, /* ForceSingleDPP */
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.nomDETInKByte,
+ mode_lib->vba.UseUnboundedRequesting,
+ mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
+ mode_lib->vba.ip.pixel_chunk_size_kbytes,
+ mode_lib->vba.ip.rob_buffer_size_kbytes,
+ mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal,
+ mode_lib->vba.Output,
+ mode_lib->vba.ReadBandwidthLuma,
+ mode_lib->vba.ReadBandwidthChroma,
+ mode_lib->vba.MaximumSwathWidthLuma,
+ mode_lib->vba.MaximumSwathWidthChroma,
+ mode_lib->vba.SourceRotation,
+ mode_lib->vba.ViewportStationary,
+ mode_lib->vba.SourcePixelFormat,
+ mode_lib->vba.SurfaceTiling,
+ mode_lib->vba.ViewportWidth,
+ mode_lib->vba.ViewportHeight,
+ mode_lib->vba.ViewportXStartY,
+ mode_lib->vba.ViewportYStartY,
+ mode_lib->vba.ViewportXStartC,
+ mode_lib->vba.ViewportYStartC,
+ mode_lib->vba.SurfaceWidthY,
+ mode_lib->vba.SurfaceWidthC,
+ mode_lib->vba.SurfaceHeightY,
+ mode_lib->vba.SurfaceHeightC,
+ mode_lib->vba.Read256BlockHeightY,
+ mode_lib->vba.Read256BlockHeightC,
+ mode_lib->vba.Read256BlockWidthY,
+ mode_lib->vba.Read256BlockWidthC,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_odm_mode,
+ mode_lib->vba.BlendingAndTiming,
+ mode_lib->vba.BytePerPixelY,
+ mode_lib->vba.BytePerPixelC,
+ mode_lib->vba.BytePerPixelInDETY,
+ mode_lib->vba.BytePerPixelInDETC,
+ mode_lib->vba.HActive,
+ mode_lib->vba.HRatio,
+ mode_lib->vba.HRatioChroma,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[0], /* Integer DPPPerSurface[] */
+
+ /* Output */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[1], /* Long swath_width_luma_ub[] */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[2], /* Long swath_width_chroma_ub[] */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_double_array[0], /* Long SwathWidth[] */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_double_array[1], /* Long SwathWidthChroma[] */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[3], /* Integer SwathHeightY[] */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[4], /* Integer SwathHeightC[] */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[5], /* Long DETBufferSizeInKByte[] */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[6], /* Long DETBufferSizeY[] */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[7], /* Long DETBufferSizeC[] */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[0][0], /* bool *UnboundedRequestEnabled */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[0][0], /* Long *CompressedBufferSizeInkByte */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[1][0], /* Long *CompBufReservedSpaceKBytes */
+ &CompBufReservedSpaceNeedAdjustmentSingleDPP,
+ mode_lib->vba.SingleDPPViewportSizeSupportPerSurface,/* bool ViewportSizeSupportPerSurface[] */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[1][0]); /* bool *ViewportSizeSupport */
+
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage = false;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible = false;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.MPCCombineUse[k] == dm_mpc_reduce_voltage_and_clocks)
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage = true;
+ if (mode_lib->vba.MPCCombineUse[k] == dm_mpc_always_when_possible)
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible = true;
+ }
+ mode_lib->vba.MPCCombineMethodIncompatible = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage
+ && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible;
+
+ for (i = start_state; i < v->soc.num_states; i++) {
+ for (j = 0; j < 2; j++) {
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j] = 0;
+ mode_lib->vba.TotalAvailablePipesSupport[i][j] = true;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC = dm_odm_combine_mode_disabled;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC = dm_odm_combine_mode_disabled;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ dml32_CalculateODMMode(
+ mode_lib->vba.MaximumPixelsPerLinePerDSCUnit,
+ mode_lib->vba.HActive[k],
+ mode_lib->vba.OutputFormat[k],
+ mode_lib->vba.Output[k],
+ mode_lib->vba.ODMUse[k],
+ mode_lib->vba.MaxDispclk[i],
+ mode_lib->vba.MaxDispclk[v->soc.num_states - 1],
+ false,
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j],
+ mode_lib->vba.MaxNumDPP,
+ mode_lib->vba.PixelClock[k],
+ mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading,
+ mode_lib->vba.DISPCLKRampingMargin,
+ mode_lib->vba.DISPCLKDPPCLKVCOSpeed,
+ mode_lib->vba.NumberOfDSCSlices[k],
+
+ /* Output */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportNoDSC,
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPNoDSC,
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC,
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceNoDSC);
+
+ dml32_CalculateODMMode(
+ mode_lib->vba.MaximumPixelsPerLinePerDSCUnit,
+ mode_lib->vba.HActive[k],
+ mode_lib->vba.OutputFormat[k],
+ mode_lib->vba.Output[k],
+ mode_lib->vba.ODMUse[k],
+ mode_lib->vba.MaxDispclk[i],
+ mode_lib->vba.MaxDispclk[v->soc.num_states - 1],
+ true,
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j],
+ mode_lib->vba.MaxNumDPP,
+ mode_lib->vba.PixelClock[k],
+ mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading,
+ mode_lib->vba.DISPCLKRampingMargin,
+ mode_lib->vba.DISPCLKDPPCLKVCOSpeed,
+ mode_lib->vba.NumberOfDSCSlices[k],
+
+ /* Output */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportDSC,
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPDSC,
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC,
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceDSC);
+
+ dml32_CalculateOutputLink(
+ mode_lib->vba.PHYCLKPerState[i],
+ mode_lib->vba.PHYCLKD18PerState[i],
+ mode_lib->vba.PHYCLKD32PerState[i],
+ mode_lib->vba.Downspreading,
+ (mode_lib->vba.BlendingAndTiming[k] == k),
+ mode_lib->vba.Output[k],
+ mode_lib->vba.OutputFormat[k],
+ mode_lib->vba.HTotal[k],
+ mode_lib->vba.HActive[k],
+ mode_lib->vba.PixelClockBackEnd[k],
+ mode_lib->vba.ForcedOutputLinkBPP[k],
+ mode_lib->vba.DSCInputBitPerComponent[k],
+ mode_lib->vba.NumberOfDSCSlices[k],
+ mode_lib->vba.AudioSampleRate[k],
+ mode_lib->vba.AudioSampleLayout[k],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC,
+ mode_lib->vba.DSCEnable[k],
+ mode_lib->vba.OutputLinkDPLanes[k],
+ mode_lib->vba.OutputLinkDPRate[k],
+
+ /* Output */
+ &mode_lib->vba.RequiresDSC[i][k],
+ &mode_lib->vba.RequiresFEC[i][k],
+ &mode_lib->vba.OutputBppPerState[i][k],
+ &mode_lib->vba.OutputTypePerState[i][k],
+ &mode_lib->vba.OutputRatePerState[i][k],
+ &mode_lib->vba.RequiredSlots[i][k]);
+
+ if (mode_lib->vba.RequiresDSC[i][k] == false) {
+ mode_lib->vba.ODMCombineEnablePerState[i][k] = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC;
+ mode_lib->vba.RequiredDISPCLKPerSurface[i][j][k] =
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceNoDSC;
+ if (!v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportNoDSC)
+ mode_lib->vba.TotalAvailablePipesSupport[i][j] = false;
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j] =
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j] + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPNoDSC;
+ } else {
+ mode_lib->vba.ODMCombineEnablePerState[i][k] = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC;
+ mode_lib->vba.RequiredDISPCLKPerSurface[i][j][k] =
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceDSC;
+ if (!v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportDSC)
+ mode_lib->vba.TotalAvailablePipesSupport[i][j] = false;
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j] =
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j] + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPDSC;
+ }
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
+ mode_lib->vba.MPCCombine[i][j][k] = false;
+ mode_lib->vba.NoOfDPP[i][j][k] = 4;
+ } else if (mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
+ mode_lib->vba.MPCCombine[i][j][k] = false;
+ mode_lib->vba.NoOfDPP[i][j][k] = 2;
+ } else if (mode_lib->vba.MPCCombineUse[k] == dm_mpc_never) {
+ mode_lib->vba.MPCCombine[i][j][k] = false;
+ mode_lib->vba.NoOfDPP[i][j][k] = 1;
+ } else if (dml32_RoundToDFSGranularity(
+ mode_lib->vba.MinDPPCLKUsingSingleDPP[k]
+ * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
+ / 100), 1,
+ mode_lib->vba.DISPCLKDPPCLKVCOSpeed) <= mode_lib->vba.MaxDppclk[i] &&
+ mode_lib->vba.SingleDPPViewportSizeSupportPerSurface[k] == true) {
+ mode_lib->vba.MPCCombine[i][j][k] = false;
+ mode_lib->vba.NoOfDPP[i][j][k] = 1;
+ } else if (mode_lib->vba.TotalNumberOfActiveDPP[i][j] < mode_lib->vba.MaxNumDPP) {
+ mode_lib->vba.MPCCombine[i][j][k] = true;
+ mode_lib->vba.NoOfDPP[i][j][k] = 2;
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j] =
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j] + 1;
+ } else {
+ mode_lib->vba.MPCCombine[i][j][k] = false;
+ mode_lib->vba.NoOfDPP[i][j][k] = 1;
+ mode_lib->vba.TotalAvailablePipesSupport[i][j] = false;
+ }
+ }
+
+ mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] = 0;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NoChroma = true;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.NoOfDPP[i][j][k] == 1)
+ mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] =
+ mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] + 1;
+ if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8
+ || mode_lib->vba.SourcePixelFormat[k] == dm_420_10
+ || mode_lib->vba.SourcePixelFormat[k] == dm_420_12
+ || mode_lib->vba.SourcePixelFormat[k] == dm_rgbe_alpha) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NoChroma = false;
+ }
+ }
+
+ // if TotalNumberOfActiveDPP is > 1, then there should be no unbounded req mode (hw limitation), the comp buf reserved adjustment is not needed regardless
+ // if TotalNumberOfActiveDPP is == 1, then will use the SingleDPP version of unbounded_req for the decision
+ CompBufReservedSpaceNeedAdjustment = (mode_lib->vba.TotalNumberOfActiveDPP[i][j] > 1) ? 0 : CompBufReservedSpaceNeedAdjustmentSingleDPP;
+
+
+
+ if (j == 1 && !dml32_UnboundedRequest(mode_lib->vba.UseUnboundedRequesting,
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j], v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NoChroma,
+ mode_lib->vba.Output[0],
+ mode_lib->vba.SurfaceTiling[0],
+ CompBufReservedSpaceNeedAdjustment,
+ mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)) {
+ while (!(mode_lib->vba.TotalNumberOfActiveDPP[i][j] >= mode_lib->vba.MaxNumDPP
+ || mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] == 0)) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.BWOfNonCombinedSurfaceOfMaximumBandwidth = 0;
+ NumberOfNonCombinedSurfaceOfMaximumBandwidth = 0;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.MPCCombineUse[k]
+ != dm_mpc_never &&
+ mode_lib->vba.MPCCombineUse[k] != dm_mpc_reduce_voltage &&
+ mode_lib->vba.ReadBandwidthLuma[k] +
+ mode_lib->vba.ReadBandwidthChroma[k] >
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.BWOfNonCombinedSurfaceOfMaximumBandwidth &&
+ (mode_lib->vba.ODMCombineEnablePerState[i][k] !=
+ dm_odm_combine_mode_2to1 &&
+ mode_lib->vba.ODMCombineEnablePerState[i][k] !=
+ dm_odm_combine_mode_4to1) &&
+ mode_lib->vba.MPCCombine[i][j][k] == false) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.BWOfNonCombinedSurfaceOfMaximumBandwidth =
+ mode_lib->vba.ReadBandwidthLuma[k]
+ + mode_lib->vba.ReadBandwidthChroma[k];
+ NumberOfNonCombinedSurfaceOfMaximumBandwidth = k;
+ }
+ }
+ mode_lib->vba.MPCCombine[i][j][NumberOfNonCombinedSurfaceOfMaximumBandwidth] =
+ true;
+ mode_lib->vba.NoOfDPP[i][j][NumberOfNonCombinedSurfaceOfMaximumBandwidth] = 2;
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j] =
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j] + 1;
+ mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] =
+ mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] - 1;
+ }
+ }
+
+ //DISPCLK/DPPCLK
+ mode_lib->vba.WritebackRequiredDISPCLK = 0;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.WritebackEnable[k]) {
+ mode_lib->vba.WritebackRequiredDISPCLK = dml_max(
+ mode_lib->vba.WritebackRequiredDISPCLK,
+ dml32_CalculateWriteBackDISPCLK(
+ mode_lib->vba.WritebackPixelFormat[k],
+ mode_lib->vba.PixelClock[k],
+ mode_lib->vba.WritebackHRatio[k],
+ mode_lib->vba.WritebackVRatio[k],
+ mode_lib->vba.WritebackHTaps[k],
+ mode_lib->vba.WritebackVTaps[k],
+ mode_lib->vba.WritebackSourceWidth[k],
+ mode_lib->vba.WritebackDestinationWidth[k],
+ mode_lib->vba.HTotal[k],
+ mode_lib->vba.WritebackLineBufferSize,
+ mode_lib->vba.DISPCLKDPPCLKVCOSpeed));
+ }
+ }
+
+ mode_lib->vba.RequiredDISPCLK[i][j] = mode_lib->vba.WritebackRequiredDISPCLK;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.RequiredDISPCLK[i][j] = dml_max(mode_lib->vba.RequiredDISPCLK[i][j],
+ mode_lib->vba.RequiredDISPCLKPerSurface[i][j][k]);
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k)
+ mode_lib->vba.NoOfDPPThisState[k] = mode_lib->vba.NoOfDPP[i][j][k];
+
+ dml32_CalculateDPPCLK(mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading,
+ mode_lib->vba.DISPCLKDPPCLKVCOSpeed, mode_lib->vba.MinDPPCLKUsingSingleDPP,
+ mode_lib->vba.NoOfDPPThisState,
+ /* Output */
+ &mode_lib->vba.GlobalDPPCLK, mode_lib->vba.RequiredDPPCLKThisState);
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k)
+ mode_lib->vba.RequiredDPPCLK[i][j][k] = mode_lib->vba.RequiredDPPCLKThisState[k];
+
+ mode_lib->vba.DISPCLK_DPPCLK_Support[i][j] = !((mode_lib->vba.RequiredDISPCLK[i][j]
+ > mode_lib->vba.MaxDispclk[i])
+ || (mode_lib->vba.GlobalDPPCLK > mode_lib->vba.MaxDppclk[i]));
+
+ if (mode_lib->vba.TotalNumberOfActiveDPP[i][j] > mode_lib->vba.MaxNumDPP)
+ mode_lib->vba.TotalAvailablePipesSupport[i][j] = false;
+ } // j
+ } // i (VOLTAGE_STATE)
+
+ /* Total Available OTG, HDMIFRL, DP Support Check */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG = 0;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveHDMIFRL = 0;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 = 0;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs = 0;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG + 1;
+ if (mode_lib->vba.Output[k] == dm_dp2p0) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 + 1;
+ if (mode_lib->vba.OutputMultistreamId[k]
+ == k || mode_lib->vba.OutputMultistreamEn[k] == false) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs + 1;
+ }
+ }
+ }
+ }
+
+ mode_lib->vba.NumberOfOTGSupport = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG);
+ mode_lib->vba.NumberOfHDMIFRLSupport = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveHDMIFRL <= mode_lib->vba.MaxNumHDMIFRLOutputs);
+ mode_lib->vba.NumberOfDP2p0Support = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 <= mode_lib->vba.MaxNumDP2p0Streams
+ && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs <= mode_lib->vba.MaxNumDP2p0Outputs);
+
+ /* Display IO and DSC Support Check */
+ mode_lib->vba.NonsupportedDSCInputBPC = false;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (!(mode_lib->vba.DSCInputBitPerComponent[k] == 12.0
+ || mode_lib->vba.DSCInputBitPerComponent[k] == 10.0
+ || mode_lib->vba.DSCInputBitPerComponent[k] == 8.0)
+ || mode_lib->vba.DSCInputBitPerComponent[k] > mode_lib->vba.MaximumDSCBitsPerComponent) {
+ mode_lib->vba.NonsupportedDSCInputBPC = true;
+ }
+ }
+
+ for (i = start_state; i < v->soc.num_states; ++i) {
+ mode_lib->vba.ExceededMultistreamSlots[i] = false;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.OutputMultistreamEn[k] == true && mode_lib->vba.OutputMultistreamId[k] == k) {
+ TotalSlots = mode_lib->vba.RequiredSlots[i][k];
+ for (j = 0; j < mode_lib->vba.NumberOfActiveSurfaces; ++j) {
+ if (mode_lib->vba.OutputMultistreamId[j] == k)
+ TotalSlots = TotalSlots + mode_lib->vba.RequiredSlots[i][j];
+ }
+ if (mode_lib->vba.Output[k] == dm_dp && TotalSlots > 63)
+ mode_lib->vba.ExceededMultistreamSlots[i] = true;
+ if (mode_lib->vba.Output[k] == dm_dp2p0 && TotalSlots > 64)
+ mode_lib->vba.ExceededMultistreamSlots[i] = true;
+ }
+ }
+ mode_lib->vba.LinkCapacitySupport[i] = true;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k
+ && (mode_lib->vba.Output[k] == dm_dp || mode_lib->vba.Output[k] == dm_dp2p0
+ || mode_lib->vba.Output[k] == dm_edp
+ || mode_lib->vba.Output[k] == dm_hdmi)
+ && mode_lib->vba.OutputBppPerState[i][k] == 0 &&
+ (mode_lib->vba.UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)) {
+ /* Phantom pipes don't consider DSC in DML, so it could fail link check.
+ * However, we don't care about the link for phantom pipes.
+ */
+ mode_lib->vba.LinkCapacitySupport[i] = false;
+ }
+ }
+ }
+
+ mode_lib->vba.P2IWith420 = false;
+ mode_lib->vba.DSCOnlyIfNecessaryWithBPP = false;
+ mode_lib->vba.DSC422NativeNotSupported = false;
+ mode_lib->vba.LinkRateDoesNotMatchDPVersion = false;
+ mode_lib->vba.LinkRateForMultistreamNotIndicated = false;
+ mode_lib->vba.BPPForMultistreamNotIndicated = false;
+ mode_lib->vba.MultistreamWithHDMIOreDP = false;
+ mode_lib->vba.MSOOrODMSplitWithNonDPLink = false;
+ mode_lib->vba.NotEnoughLanesForMSO = false;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k
+ && (mode_lib->vba.Output[k] == dm_dp || mode_lib->vba.Output[k] == dm_dp2p0
+ || mode_lib->vba.Output[k] == dm_edp
+ || mode_lib->vba.Output[k] == dm_hdmi)) {
+ if (mode_lib->vba.OutputFormat[k]
+ == dm_420 && mode_lib->vba.Interlace[k] == 1 &&
+ mode_lib->vba.ProgressiveToInterlaceUnitInOPP == true)
+ mode_lib->vba.P2IWith420 = true;
+
+ if (mode_lib->vba.DSCEnable[k] && mode_lib->vba.ForcedOutputLinkBPP[k] != 0)
+ mode_lib->vba.DSCOnlyIfNecessaryWithBPP = true;
+ if (mode_lib->vba.DSCEnable[k] && mode_lib->vba.OutputFormat[k] == dm_n422
+ && !mode_lib->vba.DSC422NativeSupport)
+ mode_lib->vba.DSC422NativeNotSupported = true;
+
+ if (((mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_hbr
+ || mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_hbr2
+ || mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_hbr3)
+ && mode_lib->vba.Output[k] != dm_dp && mode_lib->vba.Output[k] != dm_edp)
+ || ((mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_uhbr10
+ || mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_uhbr13p5
+ || mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_uhbr20)
+ && mode_lib->vba.Output[k] != dm_dp2p0))
+ mode_lib->vba.LinkRateDoesNotMatchDPVersion = true;
+
+ if (mode_lib->vba.OutputMultistreamEn[k] == true) {
+ if (mode_lib->vba.OutputMultistreamId[k] == k
+ && mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_na)
+ mode_lib->vba.LinkRateForMultistreamNotIndicated = true;
+ if (mode_lib->vba.OutputMultistreamId[k] == k && mode_lib->vba.ForcedOutputLinkBPP[k] == 0)
+ mode_lib->vba.BPPForMultistreamNotIndicated = true;
+ for (j = 0; j < mode_lib->vba.NumberOfActiveSurfaces; ++j) {
+ if (mode_lib->vba.OutputMultistreamId[k] == j
+ && mode_lib->vba.ForcedOutputLinkBPP[k] == 0)
+ mode_lib->vba.BPPForMultistreamNotIndicated = true;
+ }
+ }
+
+ if ((mode_lib->vba.Output[k] == dm_edp || mode_lib->vba.Output[k] == dm_hdmi)) {
+ if (mode_lib->vba.OutputMultistreamEn[k] == true && mode_lib->vba.OutputMultistreamId[k] == k)
+ mode_lib->vba.MultistreamWithHDMIOreDP = true;
+ for (j = 0; j < mode_lib->vba.NumberOfActiveSurfaces; ++j) {
+ if (mode_lib->vba.OutputMultistreamEn[k] == true && mode_lib->vba.OutputMultistreamId[k] == j)
+ mode_lib->vba.MultistreamWithHDMIOreDP = true;
+ }
+ }
+
+ if (mode_lib->vba.Output[k] != dm_dp
+ && (mode_lib->vba.ODMUse[k] == dm_odm_split_policy_1to2
+ || mode_lib->vba.ODMUse[k] == dm_odm_mso_policy_1to2
+ || mode_lib->vba.ODMUse[k] == dm_odm_mso_policy_1to4))
+ mode_lib->vba.MSOOrODMSplitWithNonDPLink = true;
+
+ if ((mode_lib->vba.ODMUse[k] == dm_odm_mso_policy_1to2
+ && mode_lib->vba.OutputLinkDPLanes[k] < 2)
+ || (mode_lib->vba.ODMUse[k] == dm_odm_mso_policy_1to4
+ && mode_lib->vba.OutputLinkDPLanes[k] < 4))
+ mode_lib->vba.NotEnoughLanesForMSO = true;
+ }
+ }
+
+ for (i = start_state; i < v->soc.num_states; ++i) {
+ mode_lib->vba.DTBCLKRequiredMoreThanSupported[i] = false;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k
+ && dml32_RequiredDTBCLK(mode_lib->vba.RequiresDSC[i][k],
+ mode_lib->vba.PixelClockBackEnd[k],
+ mode_lib->vba.OutputFormat[k],
+ mode_lib->vba.OutputBppPerState[i][k],
+ mode_lib->vba.NumberOfDSCSlices[k], mode_lib->vba.HTotal[k],
+ mode_lib->vba.HActive[k], mode_lib->vba.AudioSampleRate[k],
+ mode_lib->vba.AudioSampleLayout[k])
+ > mode_lib->vba.DTBCLKPerState[i]) {
+ mode_lib->vba.DTBCLKRequiredMoreThanSupported[i] = true;
+ }
+ }
+ }
+
+ for (i = start_state; i < v->soc.num_states; ++i) {
+ mode_lib->vba.ODMCombine2To1SupportCheckOK[i] = true;
+ mode_lib->vba.ODMCombine4To1SupportCheckOK[i] = true;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k
+ && mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1
+ && mode_lib->vba.Output[k] == dm_hdmi) {
+ mode_lib->vba.ODMCombine2To1SupportCheckOK[i] = false;
+ }
+ if (mode_lib->vba.BlendingAndTiming[k] == k
+ && mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
+ && (mode_lib->vba.Output[k] == dm_dp || mode_lib->vba.Output[k] == dm_edp
+ || mode_lib->vba.Output[k] == dm_hdmi)) {
+ mode_lib->vba.ODMCombine4To1SupportCheckOK[i] = false;
+ }
+ }
+ }
+
+ for (i = start_state; i < v->soc.num_states; i++) {
+ mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] = false;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k) {
+ if (mode_lib->vba.Output[k] == dm_dp || mode_lib->vba.Output[k] == dm_dp2p0
+ || mode_lib->vba.Output[k] == dm_edp) {
+ if (mode_lib->vba.OutputFormat[k] == dm_420) {
+ mode_lib->vba.DSCFormatFactor = 2;
+ } else if (mode_lib->vba.OutputFormat[k] == dm_444) {
+ mode_lib->vba.DSCFormatFactor = 1;
+ } else if (mode_lib->vba.OutputFormat[k] == dm_n422) {
+ mode_lib->vba.DSCFormatFactor = 2;
+ } else {
+ mode_lib->vba.DSCFormatFactor = 1;
+ }
+ if (mode_lib->vba.RequiresDSC[i][k] == true) {
+ if (mode_lib->vba.ODMCombineEnablePerState[i][k]
+ == dm_odm_combine_mode_4to1) {
+ if (mode_lib->vba.PixelClockBackEnd[k] / 12.0 / mode_lib->vba.DSCFormatFactor > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i])
+ mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] = true;
+ } else if (mode_lib->vba.ODMCombineEnablePerState[i][k]
+ == dm_odm_combine_mode_2to1) {
+ if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i])
+ mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] = true;
+ } else {
+ if (mode_lib->vba.PixelClockBackEnd[k] / 3.0 / mode_lib->vba.DSCFormatFactor > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i])
+ mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] = true;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /* Check DSC Unit and Slices Support */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = 0;
+
+ for (i = start_state; i < v->soc.num_states; ++i) {
+ mode_lib->vba.NotEnoughDSCUnits[i] = false;
+ mode_lib->vba.NotEnoughDSCSlices[i] = false;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = 0;
+ mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = true;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.RequiresDSC[i][k] == true) {
+ if (mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
+ if (mode_lib->vba.HActive[k]
+ > 4 * mode_lib->vba.MaximumPixelsPerLinePerDSCUnit)
+ mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = false;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired + 4;
+ if (mode_lib->vba.NumberOfDSCSlices[k] > 16)
+ mode_lib->vba.NotEnoughDSCSlices[i] = true;
+ } else if (mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
+ if (mode_lib->vba.HActive[k]
+ > 2 * mode_lib->vba.MaximumPixelsPerLinePerDSCUnit)
+ mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = false;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired + 2;
+ if (mode_lib->vba.NumberOfDSCSlices[k] > 8)
+ mode_lib->vba.NotEnoughDSCSlices[i] = true;
+ } else {
+ if (mode_lib->vba.HActive[k] > mode_lib->vba.MaximumPixelsPerLinePerDSCUnit)
+ mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = false;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired + 1;
+ if (mode_lib->vba.NumberOfDSCSlices[k] > 4)
+ mode_lib->vba.NotEnoughDSCSlices[i] = true;
+ }
+ }
+ }
+ if (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC)
+ mode_lib->vba.NotEnoughDSCUnits[i] = true;
+ }
+
+ /*DSC Delay per state*/
+ for (i = start_state; i < v->soc.num_states; ++i) {
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.DSCDelayPerState[i][k] = dml32_DSCDelayRequirement(
+ mode_lib->vba.RequiresDSC[i][k], mode_lib->vba.ODMCombineEnablePerState[i][k],
+ mode_lib->vba.DSCInputBitPerComponent[k],
+ mode_lib->vba.OutputBppPerState[i][k], mode_lib->vba.HActive[k],
+ mode_lib->vba.HTotal[k], mode_lib->vba.NumberOfDSCSlices[k],
+ mode_lib->vba.OutputFormat[k], mode_lib->vba.Output[k],
+ mode_lib->vba.PixelClock[k], mode_lib->vba.PixelClockBackEnd[k],
+ mode_lib->vba.ip.dsc_delay_factor_wa);
+ }
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ for (m = 0; m <= mode_lib->vba.NumberOfActiveSurfaces - 1; m++) {
+ for (j = 0; j <= mode_lib->vba.NumberOfActiveSurfaces - 1; j++) {
+ if (mode_lib->vba.BlendingAndTiming[k] == m &&
+ mode_lib->vba.RequiresDSC[i][m] == true) {
+ mode_lib->vba.DSCDelayPerState[i][k] =
+ mode_lib->vba.DSCDelayPerState[i][m];
+ }
+ }
+ }
+ }
+ }
+
+ //Calculate Swath, DET Configuration, DCFCLKDeepSleep
+ //
+ for (i = start_state; i < (int) v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.RequiredDPPCLKThisState[k] = mode_lib->vba.RequiredDPPCLK[i][j][k];
+ mode_lib->vba.NoOfDPPThisState[k] = mode_lib->vba.NoOfDPP[i][j][k];
+ mode_lib->vba.ODMCombineEnableThisState[k] =
+ mode_lib->vba.ODMCombineEnablePerState[i][k];
+ }
+
+ dml32_CalculateSwathAndDETConfiguration(
+ mode_lib->vba.DETSizeOverride,
+ mode_lib->vba.UsesMALLForPStateChange,
+ mode_lib->vba.ConfigReturnBufferSizeInKByte,
+ mode_lib->vba.MaxTotalDETInKByte,
+ mode_lib->vba.MinCompressedBufferSizeInKByte,
+ false, /* ForceSingleDPP */
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.nomDETInKByte,
+ mode_lib->vba.UseUnboundedRequesting,
+ mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
+ mode_lib->vba.ip.pixel_chunk_size_kbytes,
+ mode_lib->vba.ip.rob_buffer_size_kbytes,
+ mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal,
+ mode_lib->vba.Output,
+ mode_lib->vba.ReadBandwidthLuma,
+ mode_lib->vba.ReadBandwidthChroma,
+ mode_lib->vba.MaximumSwathWidthLuma,
+ mode_lib->vba.MaximumSwathWidthChroma,
+ mode_lib->vba.SourceRotation,
+ mode_lib->vba.ViewportStationary,
+ mode_lib->vba.SourcePixelFormat,
+ mode_lib->vba.SurfaceTiling,
+ mode_lib->vba.ViewportWidth,
+ mode_lib->vba.ViewportHeight,
+ mode_lib->vba.ViewportXStartY,
+ mode_lib->vba.ViewportYStartY,
+ mode_lib->vba.ViewportXStartC,
+ mode_lib->vba.ViewportYStartC,
+ mode_lib->vba.SurfaceWidthY,
+ mode_lib->vba.SurfaceWidthC,
+ mode_lib->vba.SurfaceHeightY,
+ mode_lib->vba.SurfaceHeightC,
+ mode_lib->vba.Read256BlockHeightY,
+ mode_lib->vba.Read256BlockHeightC,
+ mode_lib->vba.Read256BlockWidthY,
+ mode_lib->vba.Read256BlockWidthC,
+ mode_lib->vba.ODMCombineEnableThisState,
+ mode_lib->vba.BlendingAndTiming,
+ mode_lib->vba.BytePerPixelY,
+ mode_lib->vba.BytePerPixelC,
+ mode_lib->vba.BytePerPixelInDETY,
+ mode_lib->vba.BytePerPixelInDETC,
+ mode_lib->vba.HActive,
+ mode_lib->vba.HRatio,
+ mode_lib->vba.HRatioChroma,
+ mode_lib->vba.NoOfDPPThisState,
+ /* Output */
+ mode_lib->vba.swath_width_luma_ub_this_state,
+ mode_lib->vba.swath_width_chroma_ub_this_state,
+ mode_lib->vba.SwathWidthYThisState,
+ mode_lib->vba.SwathWidthCThisState,
+ mode_lib->vba.SwathHeightYThisState,
+ mode_lib->vba.SwathHeightCThisState,
+ mode_lib->vba.DETBufferSizeInKByteThisState,
+ mode_lib->vba.DETBufferSizeYThisState,
+ mode_lib->vba.DETBufferSizeCThisState,
+ &mode_lib->vba.UnboundedRequestEnabledThisState,
+ &mode_lib->vba.CompressedBufferSizeInkByteThisState,
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], /* Long CompBufReservedSpaceKBytes */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean[0], /* bool CompBufReservedSpaceNeedAdjustment */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[0],
+ &mode_lib->vba.ViewportSizeSupport[i][j]);
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.swath_width_luma_ub_all_states[i][j][k] =
+ mode_lib->vba.swath_width_luma_ub_this_state[k];
+ mode_lib->vba.swath_width_chroma_ub_all_states[i][j][k] =
+ mode_lib->vba.swath_width_chroma_ub_this_state[k];
+ mode_lib->vba.SwathWidthYAllStates[i][j][k] = mode_lib->vba.SwathWidthYThisState[k];
+ mode_lib->vba.SwathWidthCAllStates[i][j][k] = mode_lib->vba.SwathWidthCThisState[k];
+ mode_lib->vba.SwathHeightYAllStates[i][j][k] = mode_lib->vba.SwathHeightYThisState[k];
+ mode_lib->vba.SwathHeightCAllStates[i][j][k] = mode_lib->vba.SwathHeightCThisState[k];
+ mode_lib->vba.UnboundedRequestEnabledAllStates[i][j] =
+ mode_lib->vba.UnboundedRequestEnabledThisState;
+ mode_lib->vba.CompressedBufferSizeInkByteAllStates[i][j] =
+ mode_lib->vba.CompressedBufferSizeInkByteThisState;
+ mode_lib->vba.DETBufferSizeInKByteAllStates[i][j][k] =
+ mode_lib->vba.DETBufferSizeInKByteThisState[k];
+ mode_lib->vba.DETBufferSizeYAllStates[i][j][k] =
+ mode_lib->vba.DETBufferSizeYThisState[k];
+ mode_lib->vba.DETBufferSizeCAllStates[i][j][k] =
+ mode_lib->vba.DETBufferSizeCThisState[k];
+ }
+ }
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.cursor_bw[k] = mode_lib->vba.NumberOfCursors[k] * mode_lib->vba.CursorWidth[k][0]
+ * mode_lib->vba.CursorBPP[k][0] / 8.0
+ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
+ }
+
+ dml32_CalculateSurfaceSizeInMall(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.MALLAllocatedForDCNFinal,
+ mode_lib->vba.UseMALLForStaticScreen,
+ mode_lib->vba.UsesMALLForPStateChange,
+ mode_lib->vba.DCCEnable,
+ mode_lib->vba.ViewportStationary,
+ mode_lib->vba.ViewportXStartY,
+ mode_lib->vba.ViewportYStartY,
+ mode_lib->vba.ViewportXStartC,
+ mode_lib->vba.ViewportYStartC,
+ mode_lib->vba.ViewportWidth,
+ mode_lib->vba.ViewportHeight,
+ mode_lib->vba.BytePerPixelY,
+ mode_lib->vba.ViewportWidthChroma,
+ mode_lib->vba.ViewportHeightChroma,
+ mode_lib->vba.BytePerPixelC,
+ mode_lib->vba.SurfaceWidthY,
+ mode_lib->vba.SurfaceWidthC,
+ mode_lib->vba.SurfaceHeightY,
+ mode_lib->vba.SurfaceHeightC,
+ mode_lib->vba.Read256BlockWidthY,
+ mode_lib->vba.Read256BlockWidthC,
+ mode_lib->vba.Read256BlockHeightY,
+ mode_lib->vba.Read256BlockHeightC,
+ mode_lib->vba.MacroTileWidthY,
+ mode_lib->vba.MacroTileWidthC,
+ mode_lib->vba.MacroTileHeightY,
+ mode_lib->vba.MacroTileHeightC,
+ mode_lib->vba.DCCMetaPitchY,
+ mode_lib->vba.DCCMetaPitchC,
+
+ /* Output */
+ mode_lib->vba.SurfaceSizeInMALL,
+ &mode_lib->vba.ExceededMALLSize);
+
+ for (i = start_state; i < v->soc.num_states; i++) {
+ for (j = 0; j < 2; j++) {
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ mode_lib->vba.swath_width_luma_ub_this_state[k] =
+ mode_lib->vba.swath_width_luma_ub_all_states[i][j][k];
+ mode_lib->vba.swath_width_chroma_ub_this_state[k] =
+ mode_lib->vba.swath_width_chroma_ub_all_states[i][j][k];
+ mode_lib->vba.SwathWidthYThisState[k] = mode_lib->vba.SwathWidthYAllStates[i][j][k];
+ mode_lib->vba.SwathWidthCThisState[k] = mode_lib->vba.SwathWidthCAllStates[i][j][k];
+ mode_lib->vba.SwathHeightYThisState[k] = mode_lib->vba.SwathHeightYAllStates[i][j][k];
+ mode_lib->vba.SwathHeightCThisState[k] = mode_lib->vba.SwathHeightCAllStates[i][j][k];
+ mode_lib->vba.DETBufferSizeInKByteThisState[k] =
+ mode_lib->vba.DETBufferSizeInKByteAllStates[i][j][k];
+ mode_lib->vba.DETBufferSizeYThisState[k] =
+ mode_lib->vba.DETBufferSizeYAllStates[i][j][k];
+ mode_lib->vba.DETBufferSizeCThisState[k] =
+ mode_lib->vba.DETBufferSizeCAllStates[i][j][k];
+ mode_lib->vba.RequiredDPPCLKThisState[k] = mode_lib->vba.RequiredDPPCLK[i][j][k];
+ mode_lib->vba.NoOfDPPThisState[k] = mode_lib->vba.NoOfDPP[i][j][k];
+ }
+
+ mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j] = 0;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.DCCEnable[k] == true) {
+ mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j] =
+ mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j]
+ + mode_lib->vba.NoOfDPP[i][j][k];
+ }
+ }
+
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].PixelClock = mode_lib->vba.PixelClock[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].DPPPerSurface = mode_lib->vba.NoOfDPP[i][j][k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].SourceRotation = mode_lib->vba.SourceRotation[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportHeight = mode_lib->vba.ViewportHeight[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportHeightChroma = mode_lib->vba.ViewportHeightChroma[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidth256BytesY = mode_lib->vba.Read256BlockWidthY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeight256BytesY = mode_lib->vba.Read256BlockHeightY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidth256BytesC = mode_lib->vba.Read256BlockWidthC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeight256BytesC = mode_lib->vba.Read256BlockHeightC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidthY = mode_lib->vba.MacroTileWidthY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeightY = mode_lib->vba.MacroTileHeightY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidthC = mode_lib->vba.MacroTileWidthC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeightC = mode_lib->vba.MacroTileHeightC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].InterlaceEnable = mode_lib->vba.Interlace[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].HTotal = mode_lib->vba.HTotal[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].DCCEnable = mode_lib->vba.DCCEnable[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].SurfaceTiling = mode_lib->vba.SurfaceTiling[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BytePerPixelY = mode_lib->vba.BytePerPixelY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BytePerPixelC = mode_lib->vba.BytePerPixelC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ProgressiveToInterlaceUnitInOPP =
+ mode_lib->vba.ProgressiveToInterlaceUnitInOPP;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].VRatio = mode_lib->vba.VRatio[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].VRatioChroma = mode_lib->vba.VRatioChroma[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].VTaps = mode_lib->vba.vtaps[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].VTapsChroma = mode_lib->vba.VTAPsChroma[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].PitchY = mode_lib->vba.PitchY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].DCCMetaPitchY = mode_lib->vba.DCCMetaPitchY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].PitchC = mode_lib->vba.PitchC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].DCCMetaPitchC = mode_lib->vba.DCCMetaPitchC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportStationary = mode_lib->vba.ViewportStationary[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportXStart = mode_lib->vba.ViewportXStartY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportYStart = mode_lib->vba.ViewportYStartY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportXStartC = mode_lib->vba.ViewportXStartC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportYStartC = mode_lib->vba.ViewportYStartC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].FORCE_ONE_ROW_FOR_FRAME = mode_lib->vba.ForceOneRowForFrame[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].SwathHeightY = mode_lib->vba.SwathHeightYThisState[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].SwathHeightC = mode_lib->vba.SwathHeightCThisState[k];
+ }
+
+ {
+ dml32_CalculateVMRowAndSwath(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters,
+ mode_lib->vba.SurfaceSizeInMALL,
+ mode_lib->vba.PTEBufferSizeInRequestsLuma,
+ mode_lib->vba.PTEBufferSizeInRequestsChroma,
+ mode_lib->vba.DCCMetaBufferSizeBytes,
+ mode_lib->vba.UseMALLForStaticScreen,
+ mode_lib->vba.UsesMALLForPStateChange,
+ mode_lib->vba.MALLAllocatedForDCNFinal,
+ mode_lib->vba.SwathWidthYThisState,
+ mode_lib->vba.SwathWidthCThisState,
+ mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.HostVMEnable,
+ mode_lib->vba.HostVMMaxNonCachedPageTableLevels,
+ mode_lib->vba.GPUVMMaxPageTableLevels,
+ mode_lib->vba.GPUVMMinPageSizeKBytes,
+ mode_lib->vba.HostVMMinPageSize,
+
+ /* Output */
+ mode_lib->vba.PTEBufferSizeNotExceededPerState,
+ mode_lib->vba.DCCMetaBufferSizeNotExceededPerState,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[0],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[1],
+ mode_lib->vba.dpte_row_height,
+ mode_lib->vba.dpte_row_height_chroma,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[2],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[3],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[4],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[5],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[6],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[7],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[8],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[9],
+ mode_lib->vba.meta_row_height,
+ mode_lib->vba.meta_row_height_chroma,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[10],
+ mode_lib->vba.dpte_group_bytes,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[11],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[12],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[13],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[14],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[15],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[16],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[17],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[18],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[19],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[20],
+ mode_lib->vba.PrefetchLinesYThisState,
+ mode_lib->vba.PrefetchLinesCThisState,
+ mode_lib->vba.PrefillY,
+ mode_lib->vba.PrefillC,
+ mode_lib->vba.MaxNumSwY,
+ mode_lib->vba.MaxNumSwC,
+ mode_lib->vba.meta_row_bandwidth_this_state,
+ mode_lib->vba.dpte_row_bandwidth_this_state,
+ mode_lib->vba.DPTEBytesPerRowThisState,
+ mode_lib->vba.PDEAndMetaPTEBytesPerFrameThisState,
+ mode_lib->vba.MetaRowBytesThisState,
+ mode_lib->vba.use_one_row_for_frame_this_state,
+ mode_lib->vba.use_one_row_for_frame_flip_this_state,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[0], // Boolean UsesMALLForStaticScreen[]
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[1], // Boolean PTE_BUFFER_MODE[]
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[21]); // Long BIGK_FRAGMENT_SIZE[]
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.PrefetchLinesY[i][j][k] = mode_lib->vba.PrefetchLinesYThisState[k];
+ mode_lib->vba.PrefetchLinesC[i][j][k] = mode_lib->vba.PrefetchLinesCThisState[k];
+ mode_lib->vba.meta_row_bandwidth[i][j][k] =
+ mode_lib->vba.meta_row_bandwidth_this_state[k];
+ mode_lib->vba.dpte_row_bandwidth[i][j][k] =
+ mode_lib->vba.dpte_row_bandwidth_this_state[k];
+ mode_lib->vba.DPTEBytesPerRow[i][j][k] = mode_lib->vba.DPTEBytesPerRowThisState[k];
+ mode_lib->vba.PDEAndMetaPTEBytesPerFrame[i][j][k] =
+ mode_lib->vba.PDEAndMetaPTEBytesPerFrameThisState[k];
+ mode_lib->vba.MetaRowBytes[i][j][k] = mode_lib->vba.MetaRowBytesThisState[k];
+ mode_lib->vba.use_one_row_for_frame[i][j][k] =
+ mode_lib->vba.use_one_row_for_frame_this_state[k];
+ mode_lib->vba.use_one_row_for_frame_flip[i][j][k] =
+ mode_lib->vba.use_one_row_for_frame_flip_this_state[k];
+ }
+
+ mode_lib->vba.PTEBufferSizeNotExceeded[i][j] = true;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.PTEBufferSizeNotExceededPerState[k] == false)
+ mode_lib->vba.PTEBufferSizeNotExceeded[i][j] = false;
+ }
+
+ mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] = true;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.DCCMetaBufferSizeNotExceededPerState[k] == false)
+ mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] = false;
+ }
+
+ mode_lib->vba.UrgLatency[i] = dml32_CalculateUrgentLatency(
+ mode_lib->vba.UrgentLatencyPixelDataOnly,
+ mode_lib->vba.UrgentLatencyPixelMixedWithVMData,
+ mode_lib->vba.UrgentLatencyVMDataOnly, mode_lib->vba.DoUrgentLatencyAdjustment,
+ mode_lib->vba.UrgentLatencyAdjustmentFabricClockComponent,
+ mode_lib->vba.UrgentLatencyAdjustmentFabricClockReference,
+ mode_lib->vba.FabricClockPerState[i]);
+
+ //bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ dml32_CalculateUrgentBurstFactor(
+ mode_lib->vba.UsesMALLForPStateChange[k],
+ mode_lib->vba.swath_width_luma_ub_this_state[k],
+ mode_lib->vba.swath_width_chroma_ub_this_state[k],
+ mode_lib->vba.SwathHeightYThisState[k],
+ mode_lib->vba.SwathHeightCThisState[k],
+ (double) mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
+ mode_lib->vba.UrgLatency[i],
+ mode_lib->vba.CursorBufferSize,
+ mode_lib->vba.CursorWidth[k][0],
+ mode_lib->vba.CursorBPP[k][0],
+ mode_lib->vba.VRatio[k],
+ mode_lib->vba.VRatioChroma[k],
+ mode_lib->vba.BytePerPixelInDETY[k],
+ mode_lib->vba.BytePerPixelInDETC[k],
+ mode_lib->vba.DETBufferSizeYThisState[k],
+ mode_lib->vba.DETBufferSizeCThisState[k],
+ /* Output */
+ &mode_lib->vba.UrgentBurstFactorCursor[k],
+ &mode_lib->vba.UrgentBurstFactorLuma[k],
+ &mode_lib->vba.UrgentBurstFactorChroma[k],
+ &mode_lib->vba.NoUrgentLatencyHiding[k]);
+ }
+
+ dml32_CalculateDCFCLKDeepSleep(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.BytePerPixelY,
+ mode_lib->vba.BytePerPixelC,
+ mode_lib->vba.VRatio,
+ mode_lib->vba.VRatioChroma,
+ mode_lib->vba.SwathWidthYThisState,
+ mode_lib->vba.SwathWidthCThisState,
+ mode_lib->vba.NoOfDPPThisState,
+ mode_lib->vba.HRatio,
+ mode_lib->vba.HRatioChroma,
+ mode_lib->vba.PixelClock,
+ mode_lib->vba.PSCL_FACTOR,
+ mode_lib->vba.PSCL_FACTOR_CHROMA,
+ mode_lib->vba.RequiredDPPCLKThisState,
+ mode_lib->vba.ReadBandwidthLuma,
+ mode_lib->vba.ReadBandwidthChroma,
+ mode_lib->vba.ReturnBusWidth,
+
+ /* Output */
+ &mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j]);
+ }
+ }
+
+ //Calculate Return BW
+ for (i = start_state; i < (int) v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k) {
+ if (mode_lib->vba.WritebackEnable[k] == true) {
+ mode_lib->vba.WritebackDelayTime[k] =
+ mode_lib->vba.WritebackLatency
+ + dml32_CalculateWriteBackDelay(
+ mode_lib->vba.WritebackPixelFormat[k],
+ mode_lib->vba.WritebackHRatio[k],
+ mode_lib->vba.WritebackVRatio[k],
+ mode_lib->vba.WritebackVTaps[k],
+ mode_lib->vba.WritebackDestinationWidth[k],
+ mode_lib->vba.WritebackDestinationHeight[k],
+ mode_lib->vba.WritebackSourceHeight[k],
+ mode_lib->vba.HTotal[k])
+ / mode_lib->vba.RequiredDISPCLK[i][j];
+ } else {
+ mode_lib->vba.WritebackDelayTime[k] = 0.0;
+ }
+ for (m = 0; m <= mode_lib->vba.NumberOfActiveSurfaces - 1; m++) {
+ if (mode_lib->vba.BlendingAndTiming[m]
+ == k && mode_lib->vba.WritebackEnable[m] == true) {
+ mode_lib->vba.WritebackDelayTime[k] =
+ dml_max(mode_lib->vba.WritebackDelayTime[k],
+ mode_lib->vba.WritebackLatency
+ + dml32_CalculateWriteBackDelay(
+ mode_lib->vba.WritebackPixelFormat[m],
+ mode_lib->vba.WritebackHRatio[m],
+ mode_lib->vba.WritebackVRatio[m],
+ mode_lib->vba.WritebackVTaps[m],
+ mode_lib->vba.WritebackDestinationWidth[m],
+ mode_lib->vba.WritebackDestinationHeight[m],
+ mode_lib->vba.WritebackSourceHeight[m],
+ mode_lib->vba.HTotal[m]) /
+ mode_lib->vba.RequiredDISPCLK[i][j]);
+ }
+ }
+ }
+ }
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ for (m = 0; m <= mode_lib->vba.NumberOfActiveSurfaces - 1; m++) {
+ if (mode_lib->vba.BlendingAndTiming[k] == m) {
+ mode_lib->vba.WritebackDelayTime[k] =
+ mode_lib->vba.WritebackDelayTime[m];
+ }
+ }
+ }
+ mode_lib->vba.MaxMaxVStartup[i][j] = 0;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ mode_lib->vba.MaximumVStartup[i][j][k] = ((mode_lib->vba.Interlace[k] &&
+ !mode_lib->vba.ProgressiveToInterlaceUnitInOPP) ?
+ dml_floor((mode_lib->vba.VTotal[k] -
+ mode_lib->vba.VActive[k]) / 2.0, 1.0) :
+ mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k])
+ - dml_max(1.0, dml_ceil(1.0 *
+ mode_lib->vba.WritebackDelayTime[k] /
+ (mode_lib->vba.HTotal[k] /
+ mode_lib->vba.PixelClock[k]), 1.0));
+
+ // Clamp to max OTG vstartup register limit
+ if (mode_lib->vba.MaximumVStartup[i][j][k] > 1023)
+ mode_lib->vba.MaximumVStartup[i][j][k] = 1023;
+
+ mode_lib->vba.MaxMaxVStartup[i][j] = dml_max(mode_lib->vba.MaxMaxVStartup[i][j],
+ mode_lib->vba.MaximumVStartup[i][j][k]);
+ }
+ }
+ }
+
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes = mode_lib->vba.NumberOfChannels
+ * dml_max3(mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly,
+ mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
+ mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly);
+
+ dml32_CalculateMinAndMaxPrefetchMode(mode_lib->vba.AllowForPStateChangeOrStutterInVBlankFinal,
+ &mode_lib->vba.MinPrefetchMode,
+ &mode_lib->vba.MaxPrefetchMode);
+
+ for (i = start_state; i < (int) v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j)
+ mode_lib->vba.DCFCLKState[i][j] = mode_lib->vba.DCFCLKPerState[i];
+ }
+
+ /* Immediate Flip and MALL parameters */
+ mode_lib->vba.ImmediateFlipRequiredFinal = false;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.ImmediateFlipRequiredFinal = mode_lib->vba.ImmediateFlipRequiredFinal
+ || (mode_lib->vba.ImmediateFlipRequirement[k] == dm_immediate_flip_required);
+ }
+
+ mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = false;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified =
+ mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified
+ || ((mode_lib->vba.ImmediateFlipRequirement[k]
+ != dm_immediate_flip_required)
+ && (mode_lib->vba.ImmediateFlipRequirement[k]
+ != dm_immediate_flip_not_required));
+ }
+ mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified =
+ mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified
+ && mode_lib->vba.ImmediateFlipRequiredFinal;
+
+ mode_lib->vba.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = false;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe =
+ mode_lib->vba.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe ||
+ ((mode_lib->vba.HostVMEnable == true || mode_lib->vba.ImmediateFlipRequirement[k] !=
+ dm_immediate_flip_not_required) &&
+ (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame ||
+ mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe));
+ }
+
+ mode_lib->vba.InvalidCombinationOfMALLUseForPStateAndStaticScreen = false;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.InvalidCombinationOfMALLUseForPStateAndStaticScreen =
+ mode_lib->vba.InvalidCombinationOfMALLUseForPStateAndStaticScreen
+ || ((mode_lib->vba.UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable
+ || mode_lib->vba.UseMALLForStaticScreen[k] == dm_use_mall_static_screen_optimize)
+ && (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe))
+ || ((mode_lib->vba.UseMALLForStaticScreen[k] == dm_use_mall_static_screen_disable
+ || mode_lib->vba.UseMALLForStaticScreen[k] == dm_use_mall_static_screen_optimize)
+ && (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame));
+ }
+
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.FullFrameMALLPStateMethod = false;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod = false;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.PhantomPipeMALLPStateMethod = false;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.FullFrameMALLPStateMethod = true;
+ if (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod = true;
+ if (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe)
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.PhantomPipeMALLPStateMethod = true;
+ }
+ mode_lib->vba.InvalidCombinationOfMALLUseForPState = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod
+ != v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.PhantomPipeMALLPStateMethod) || (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.FullFrameMALLPStateMethod);
+
+ if (mode_lib->vba.UseMinimumRequiredDCFCLK == true) {
+ dml32_UseMinimumDCFCLK(
+ mode_lib->vba.UsesMALLForPStateChange,
+ mode_lib->vba.DRRDisplay,
+ mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ mode_lib->vba.MaxInterDCNTileRepeaters,
+ mode_lib->vba.MaxPrefetchMode,
+ mode_lib->vba.DRAMClockChangeLatency,
+ mode_lib->vba.FCLKChangeLatency,
+ mode_lib->vba.SREnterPlusExitTime,
+ mode_lib->vba.ReturnBusWidth,
+ mode_lib->vba.RoundTripPingLatencyCycles,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes,
+ mode_lib->vba.PixelChunkSizeInKByte,
+ mode_lib->vba.MetaChunkSize,
+ mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.GPUVMMaxPageTableLevels,
+ mode_lib->vba.HostVMEnable,
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.HostVMMinPageSize,
+ mode_lib->vba.HostVMMaxNonCachedPageTableLevels,
+ mode_lib->vba.DynamicMetadataVMEnabled,
+ mode_lib->vba.ImmediateFlipRequiredFinal,
+ mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
+ mode_lib->vba.MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
+ mode_lib->vba.PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency,
+ mode_lib->vba.VTotal,
+ mode_lib->vba.VActive,
+ mode_lib->vba.DynamicMetadataTransmittedBytes,
+ mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired,
+ mode_lib->vba.Interlace,
+ mode_lib->vba.RequiredDPPCLK,
+ mode_lib->vba.RequiredDISPCLK,
+ mode_lib->vba.UrgLatency,
+ mode_lib->vba.NoOfDPP,
+ mode_lib->vba.ProjectedDCFCLKDeepSleep,
+ mode_lib->vba.MaximumVStartup,
+ mode_lib->vba.TotalNumberOfActiveDPP,
+ mode_lib->vba.TotalNumberOfDCCActiveDPP,
+ mode_lib->vba.dpte_group_bytes,
+ mode_lib->vba.PrefetchLinesY,
+ mode_lib->vba.PrefetchLinesC,
+ mode_lib->vba.swath_width_luma_ub_all_states,
+ mode_lib->vba.swath_width_chroma_ub_all_states,
+ mode_lib->vba.BytePerPixelY,
+ mode_lib->vba.BytePerPixelC,
+ mode_lib->vba.HTotal,
+ mode_lib->vba.PixelClock,
+ mode_lib->vba.PDEAndMetaPTEBytesPerFrame,
+ mode_lib->vba.DPTEBytesPerRow,
+ mode_lib->vba.MetaRowBytes,
+ mode_lib->vba.DynamicMetadataEnable,
+ mode_lib->vba.ReadBandwidthLuma,
+ mode_lib->vba.ReadBandwidthChroma,
+ mode_lib->vba.DCFCLKPerState,
+
+ /* Output */
+ mode_lib->vba.DCFCLKState);
+ } // UseMinimumRequiredDCFCLK == true
+
+ for (i = start_state; i < (int) v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ mode_lib->vba.ReturnBWPerState[i][j] = dml32_get_return_bw_mbps(&mode_lib->vba.soc, i,
+ mode_lib->vba.HostVMEnable, mode_lib->vba.DCFCLKState[i][j],
+ mode_lib->vba.FabricClockPerState[i], mode_lib->vba.DRAMSpeedPerState[i]);
+ }
+ }
+
+ //Re-ordering Buffer Support Check
+ for (i = start_state; i < (int) v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024
+ / mode_lib->vba.ReturnBWPerState[i][j]
+ > (mode_lib->vba.RoundTripPingLatencyCycles + 32)
+ / mode_lib->vba.DCFCLKState[i][j]
+ + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes / mode_lib->vba.ReturnBWPerState[i][j]) {
+ mode_lib->vba.ROBSupport[i][j] = true;
+ } else {
+ mode_lib->vba.ROBSupport[i][j] = false;
+ }
+ }
+ }
+
+ //Vertical Active BW support check
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaxTotalVActiveRDBandwidth = 0;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaxTotalVActiveRDBandwidth += mode_lib->vba.ReadBandwidthLuma[k]
+ + mode_lib->vba.ReadBandwidthChroma[k];
+ }
+
+ for (i = start_state; i < (int) v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][j] =
+ dml_min3(mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLKState[i][j]
+ * mode_lib->vba.MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
+ mode_lib->vba.FabricClockPerState[i]
+ * mode_lib->vba.FabricDatapathToDCNDataReturn
+ * mode_lib->vba.MaxAveragePercentOfIdealFabricBWDisplayCanUseInNormalSystemOperation / 100,
+ mode_lib->vba.DRAMSpeedPerState[i]
+ * mode_lib->vba.NumberOfChannels
+ * mode_lib->vba.DRAMChannelWidth
+ * (i < 2 ? mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperationSTROBE : mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation) / 100);
+
+ if (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaxTotalVActiveRDBandwidth
+ <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
+ mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][j] = true;
+ } else {
+ mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][j] = false;
+ }
+ }
+ }
+
+ /* Prefetch Check */
+
+ for (i = start_state; i < (int) v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+
+ mode_lib->vba.TimeCalc = 24 / mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j];
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.NoOfDPPThisState[k] = mode_lib->vba.NoOfDPP[i][j][k];
+ mode_lib->vba.swath_width_luma_ub_this_state[k] =
+ mode_lib->vba.swath_width_luma_ub_all_states[i][j][k];
+ mode_lib->vba.swath_width_chroma_ub_this_state[k] =
+ mode_lib->vba.swath_width_chroma_ub_all_states[i][j][k];
+ mode_lib->vba.SwathWidthYThisState[k] = mode_lib->vba.SwathWidthYAllStates[i][j][k];
+ mode_lib->vba.SwathWidthCThisState[k] = mode_lib->vba.SwathWidthCAllStates[i][j][k];
+ mode_lib->vba.SwathHeightYThisState[k] = mode_lib->vba.SwathHeightYAllStates[i][j][k];
+ mode_lib->vba.SwathHeightCThisState[k] = mode_lib->vba.SwathHeightCAllStates[i][j][k];
+ mode_lib->vba.UnboundedRequestEnabledThisState =
+ mode_lib->vba.UnboundedRequestEnabledAllStates[i][j];
+ mode_lib->vba.CompressedBufferSizeInkByteThisState =
+ mode_lib->vba.CompressedBufferSizeInkByteAllStates[i][j];
+ mode_lib->vba.DETBufferSizeInKByteThisState[k] =
+ mode_lib->vba.DETBufferSizeInKByteAllStates[i][j][k];
+ mode_lib->vba.DETBufferSizeYThisState[k] =
+ mode_lib->vba.DETBufferSizeYAllStates[i][j][k];
+ mode_lib->vba.DETBufferSizeCThisState[k] =
+ mode_lib->vba.DETBufferSizeCAllStates[i][j][k];
+ }
+
+ mode_lib->vba.VActiveBandwithSupport[i][j] = dml32_CalculateVActiveBandwithSupport(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBWPerState[i][j],
+ mode_lib->vba.NoUrgentLatencyHiding,
+ mode_lib->vba.ReadBandwidthLuma,
+ mode_lib->vba.ReadBandwidthChroma,
+ mode_lib->vba.cursor_bw,
+ mode_lib->vba.meta_row_bandwidth_this_state,
+ mode_lib->vba.dpte_row_bandwidth_this_state,
+ mode_lib->vba.NoOfDPPThisState,
+ mode_lib->vba.UrgentBurstFactorLuma,
+ mode_lib->vba.UrgentBurstFactorChroma,
+ mode_lib->vba.UrgentBurstFactorCursor);
+
+ mode_lib->vba.NotEnoughDETSwathFillLatencyHidingPerState[i][j] = dml32_CalculateDETSwathFillLatencyHiding(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBWPerState[i][j],
+ mode_lib->vba.UrgLatency[i],
+ mode_lib->vba.SwathHeightYThisState,
+ mode_lib->vba.SwathHeightCThisState,
+ mode_lib->vba.swath_width_luma_ub_this_state,
+ mode_lib->vba.swath_width_chroma_ub_this_state,
+ mode_lib->vba.BytePerPixelInDETY,
+ mode_lib->vba.BytePerPixelInDETC,
+ mode_lib->vba.DETBufferSizeYThisState,
+ mode_lib->vba.DETBufferSizeCThisState,
+ mode_lib->vba.NoOfDPPThisState,
+ mode_lib->vba.HTotal,
+ mode_lib->vba.PixelClock,
+ mode_lib->vba.VRatio,
+ mode_lib->vba.VRatioChroma,
+ mode_lib->vba.UsesMALLForPStateChange,
+ mode_lib->vba.UseUnboundedRequesting);
+
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.VMDataOnlyReturnBWPerState = dml32_get_return_bw_mbps_vm_only(&mode_lib->vba.soc, i,
+ mode_lib->vba.DCFCLKState[i][j], mode_lib->vba.FabricClockPerState[i],
+ mode_lib->vba.DRAMSpeedPerState[i]);
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor = 1;
+
+ if (mode_lib->vba.GPUVMEnable && mode_lib->vba.HostVMEnable)
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor = mode_lib->vba.ReturnBWPerState[i][j]
+ / v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.VMDataOnlyReturnBWPerState;
+
+ mode_lib->vba.ExtraLatency = dml32_CalculateExtraLatency(
+ mode_lib->vba.RoundTripPingLatencyCycles, v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes,
+ mode_lib->vba.DCFCLKState[i][j], mode_lib->vba.TotalNumberOfActiveDPP[i][j],
+ mode_lib->vba.PixelChunkSizeInKByte,
+ mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j], mode_lib->vba.MetaChunkSize,
+ mode_lib->vba.ReturnBWPerState[i][j], mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.HostVMEnable, mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.NoOfDPPThisState, mode_lib->vba.dpte_group_bytes,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, mode_lib->vba.HostVMMinPageSize,
+ mode_lib->vba.HostVMMaxNonCachedPageTableLevels);
+
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState = mode_lib->vba.MinPrefetchMode;
+
+ mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[i][j];
+
+ do {
+ mode_lib->vba.PrefetchModePerState[i][j] = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState;
+ mode_lib->vba.MaxVStartup = mode_lib->vba.NextMaxVStartup;
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ mode_lib->vba.TWait = dml32_CalculateTWait(
+ mode_lib->vba.PrefetchModePerState[i][j],
+ mode_lib->vba.UsesMALLForPStateChange[k],
+ mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ mode_lib->vba.DRRDisplay[k],
+ mode_lib->vba.DRAMClockChangeLatency,
+ mode_lib->vba.FCLKChangeLatency, mode_lib->vba.UrgLatency[i],
+ mode_lib->vba.SREnterPlusExitTime);
+
+ memset(&v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull, 0, sizeof(DmlPipe));
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.Dppclk = mode_lib->vba.RequiredDPPCLK[i][j][k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.Dispclk = mode_lib->vba.RequiredDISPCLK[i][j];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.PixelClock = mode_lib->vba.PixelClock[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.DCFClkDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.DPPPerSurface = mode_lib->vba.NoOfDPP[i][j][k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.SourceRotation = mode_lib->vba.SourceRotation[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockWidth256BytesY = mode_lib->vba.Read256BlockWidthY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockHeight256BytesY = mode_lib->vba.Read256BlockHeightY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockWidth256BytesC = mode_lib->vba.Read256BlockWidthC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockHeight256BytesC = mode_lib->vba.Read256BlockHeightC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.InterlaceEnable = mode_lib->vba.Interlace[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.HTotal = mode_lib->vba.HTotal[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.HActive = mode_lib->vba.HActive[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.DCCEnable = mode_lib->vba.DCCEnable[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.ODMMode = mode_lib->vba.ODMCombineEnablePerState[i][k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BytePerPixelY = mode_lib->vba.BytePerPixelY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BytePerPixelC = mode_lib->vba.BytePerPixelC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.ProgressiveToInterlaceUnitInOPP =
+ mode_lib->vba.ProgressiveToInterlaceUnitInOPP;
+
+ mode_lib->vba.NoTimeForPrefetch[i][j][k] =
+ dml32_CalculatePrefetchSchedule(
+ v,
+ k,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor,
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe,
+ v->DSCDelayPerState[i][k],
+ v->SwathWidthYThisState[k] / v->HRatio[k],
+ dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
+ v->MaximumVStartup[i][j][k],
+ v->UrgLatency[i],
+ v->ExtraLatency,
+ v->TimeCalc,
+ v->PDEAndMetaPTEBytesPerFrame[i][j][k],
+ v->MetaRowBytes[i][j][k],
+ v->DPTEBytesPerRow[i][j][k],
+ v->PrefetchLinesY[i][j][k],
+ v->SwathWidthYThisState[k],
+ v->PrefillY[k],
+ v->MaxNumSwY[k],
+ v->PrefetchLinesC[i][j][k],
+ v->SwathWidthCThisState[k],
+ v->PrefillC[k],
+ v->MaxNumSwC[k],
+ v->swath_width_luma_ub_this_state[k],
+ v->swath_width_chroma_ub_this_state[k],
+ v->SwathHeightYThisState[k],
+ v->SwathHeightCThisState[k], v->TWait,
+ (v->DRAMSpeedPerState[i] <= MEM_STROBE_FREQ_MHZ || v->DCFCLKState[i][j] <= DCFCLK_FREQ_EXTRA_PREFETCH_REQ_MHZ) ?
+ mode_lib->vba.ip.min_prefetch_in_strobe_us : 0,
+ mode_lib->vba.PrefetchModePerState[i][j] > 0 || mode_lib->vba.DRAMClockChangeRequirementFinal == false,
+
+ /* Output */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler[k],
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTYAfterScaler[k],
+ &v->LineTimesForPrefetch[k],
+ &v->PrefetchBW[k],
+ &v->LinesForMetaPTE[k],
+ &v->LinesForMetaAndDPTERow[k],
+ &v->VRatioPreY[i][j][k],
+ &v->VRatioPreC[i][j][k],
+ &v->RequiredPrefetchPixelDataBWLuma[0][0][k],
+ &v->RequiredPrefetchPixelDataBWChroma[0][0][k],
+ &v->NoTimeForDynamicMetadata[i][j][k],
+ &v->Tno_bw[k],
+ &v->prefetch_vmrow_bw[k],
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[0], // double *Tdmdl_vm
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[1], // double *Tdmdl
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[2], // double *TSetup
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], // unsigned int *VUpdateOffsetPix
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[3], // unsigned int *VUpdateWidthPix
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[4]); // unsigned int *VReadyOffsetPix
+ }
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ dml32_CalculateUrgentBurstFactor(
+ mode_lib->vba.UsesMALLForPStateChange[k],
+ mode_lib->vba.swath_width_luma_ub_this_state[k],
+ mode_lib->vba.swath_width_chroma_ub_this_state[k],
+ mode_lib->vba.SwathHeightYThisState[k],
+ mode_lib->vba.SwathHeightCThisState[k],
+ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
+ mode_lib->vba.UrgLatency[i], mode_lib->vba.CursorBufferSize,
+ mode_lib->vba.CursorWidth[k][0], mode_lib->vba.CursorBPP[k][0],
+ mode_lib->vba.VRatioPreY[i][j][k],
+ mode_lib->vba.VRatioPreC[i][j][k],
+ mode_lib->vba.BytePerPixelInDETY[k],
+ mode_lib->vba.BytePerPixelInDETC[k],
+ mode_lib->vba.DETBufferSizeYThisState[k],
+ mode_lib->vba.DETBufferSizeCThisState[k],
+ /* Output */
+ &mode_lib->vba.UrgentBurstFactorCursorPre[k],
+ &mode_lib->vba.UrgentBurstFactorLumaPre[k],
+ &mode_lib->vba.UrgentBurstFactorChromaPre[k],
+ &mode_lib->vba.NotUrgentLatencyHidingPre[k]);
+
+ v->cursor_bw_pre[k] = mode_lib->vba.NumberOfCursors[k] * mode_lib->vba.CursorWidth[k][0] * mode_lib->vba.CursorBPP[k][0] /
+ 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * v->VRatioPreY[i][j][k];
+ }
+
+ {
+ dml32_CalculatePrefetchBandwithSupport(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBWPerState[i][j],
+ mode_lib->vba.NotUrgentLatencyHidingPre,
+ mode_lib->vba.ReadBandwidthLuma,
+ mode_lib->vba.ReadBandwidthChroma,
+ mode_lib->vba.RequiredPrefetchPixelDataBWLuma[0][0],
+ mode_lib->vba.RequiredPrefetchPixelDataBWChroma[0][0],
+ mode_lib->vba.cursor_bw,
+ mode_lib->vba.meta_row_bandwidth_this_state,
+ mode_lib->vba.dpte_row_bandwidth_this_state,
+ mode_lib->vba.cursor_bw_pre,
+ mode_lib->vba.prefetch_vmrow_bw,
+ mode_lib->vba.NoOfDPPThisState,
+ mode_lib->vba.UrgentBurstFactorLuma,
+ mode_lib->vba.UrgentBurstFactorChroma,
+ mode_lib->vba.UrgentBurstFactorCursor,
+ mode_lib->vba.UrgentBurstFactorLumaPre,
+ mode_lib->vba.UrgentBurstFactorChromaPre,
+ mode_lib->vba.UrgentBurstFactorCursorPre,
+ v->PrefetchBW,
+ v->VRatio,
+ v->MaxVRatioPre,
+
+ /* output */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[0], // Single *PrefetchBandwidth
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[1], // Single *FractionOfUrgentBandwidth
+ &mode_lib->vba.PrefetchSupported[i][j]);
+ }
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.LineTimesForPrefetch[k]
+ < 2.0 || mode_lib->vba.LinesForMetaPTE[k] >= 32.0
+ || mode_lib->vba.LinesForMetaAndDPTERow[k] >= 16.0
+ || mode_lib->vba.NoTimeForPrefetch[i][j][k] == true) {
+ mode_lib->vba.PrefetchSupported[i][j] = false;
+ }
+ }
+
+ mode_lib->vba.DynamicMetadataSupported[i][j] = true;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.NoTimeForDynamicMetadata[i][j][k] == true)
+ mode_lib->vba.DynamicMetadataSupported[i][j] = false;
+ }
+
+ mode_lib->vba.VRatioInPrefetchSupported[i][j] = true;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.VRatioPreY[i][j][k] > mode_lib->vba.MaxVRatioPre
+ || mode_lib->vba.VRatioPreC[i][j][k] > mode_lib->vba.MaxVRatioPre
+ || mode_lib->vba.NoTimeForPrefetch[i][j][k] == true) {
+ mode_lib->vba.VRatioInPrefetchSupported[i][j] = false;
+ }
+ }
+ mode_lib->vba.AnyLinesForVMOrRowTooLarge = false;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.LinesForMetaAndDPTERow[k] >= 16
+ || mode_lib->vba.LinesForMetaPTE[k] >= 32) {
+ mode_lib->vba.AnyLinesForVMOrRowTooLarge = true;
+ }
+ }
+
+ if (mode_lib->vba.PrefetchSupported[i][j] == true
+ && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true) {
+ mode_lib->vba.BandwidthAvailableForImmediateFlip =
+ dml32_CalculateBandwidthAvailableForImmediateFlip(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBWPerState[i][j],
+ mode_lib->vba.ReadBandwidthLuma,
+ mode_lib->vba.ReadBandwidthChroma,
+ mode_lib->vba.RequiredPrefetchPixelDataBWLuma[0][0],
+ mode_lib->vba.RequiredPrefetchPixelDataBWChroma[0][0],
+ mode_lib->vba.cursor_bw,
+ mode_lib->vba.cursor_bw_pre,
+ mode_lib->vba.NoOfDPPThisState,
+ mode_lib->vba.UrgentBurstFactorLuma,
+ mode_lib->vba.UrgentBurstFactorChroma,
+ mode_lib->vba.UrgentBurstFactorCursor,
+ mode_lib->vba.UrgentBurstFactorLumaPre,
+ mode_lib->vba.UrgentBurstFactorChromaPre,
+ mode_lib->vba.UrgentBurstFactorCursorPre);
+
+ mode_lib->vba.TotImmediateFlipBytes = 0.0;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (!(mode_lib->vba.ImmediateFlipRequirement[k] ==
+ dm_immediate_flip_not_required)) {
+ mode_lib->vba.TotImmediateFlipBytes =
+ mode_lib->vba.TotImmediateFlipBytes
+ + mode_lib->vba.NoOfDPP[i][j][k]
+ * mode_lib->vba.PDEAndMetaPTEBytesPerFrame[i][j][k]
+ + mode_lib->vba.MetaRowBytes[i][j][k];
+ if (mode_lib->vba.use_one_row_for_frame_flip[i][j][k]) {
+ mode_lib->vba.TotImmediateFlipBytes =
+ mode_lib->vba.TotImmediateFlipBytes + 2
+ * mode_lib->vba.DPTEBytesPerRow[i][j][k];
+ } else {
+ mode_lib->vba.TotImmediateFlipBytes =
+ mode_lib->vba.TotImmediateFlipBytes
+ + mode_lib->vba.DPTEBytesPerRow[i][j][k];
+ }
+ }
+ }
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ dml32_CalculateFlipSchedule(v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor,
+ mode_lib->vba.ExtraLatency,
+ mode_lib->vba.UrgLatency[i],
+ mode_lib->vba.GPUVMMaxPageTableLevels,
+ mode_lib->vba.HostVMEnable,
+ mode_lib->vba.HostVMMaxNonCachedPageTableLevels,
+ mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.HostVMMinPageSize,
+ mode_lib->vba.PDEAndMetaPTEBytesPerFrame[i][j][k],
+ mode_lib->vba.MetaRowBytes[i][j][k],
+ mode_lib->vba.DPTEBytesPerRow[i][j][k],
+ mode_lib->vba.BandwidthAvailableForImmediateFlip,
+ mode_lib->vba.TotImmediateFlipBytes,
+ mode_lib->vba.SourcePixelFormat[k],
+ (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]),
+ mode_lib->vba.VRatio[k],
+ mode_lib->vba.VRatioChroma[k],
+ mode_lib->vba.Tno_bw[k],
+ mode_lib->vba.DCCEnable[k],
+ mode_lib->vba.dpte_row_height[k],
+ mode_lib->vba.meta_row_height[k],
+ mode_lib->vba.dpte_row_height_chroma[k],
+ mode_lib->vba.meta_row_height_chroma[k],
+ mode_lib->vba.use_one_row_for_frame_flip[i][j][k], // 24
+
+ /* Output */
+ &mode_lib->vba.DestinationLinesToRequestVMInImmediateFlip[k],
+ &mode_lib->vba.DestinationLinesToRequestRowInImmediateFlip[k],
+ &mode_lib->vba.final_flip_bw[k],
+ &mode_lib->vba.ImmediateFlipSupportedForPipe[k]);
+ }
+
+ {
+ dml32_CalculateImmediateFlipBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBWPerState[i][j],
+ mode_lib->vba.ImmediateFlipRequirement,
+ mode_lib->vba.final_flip_bw,
+ mode_lib->vba.ReadBandwidthLuma,
+ mode_lib->vba.ReadBandwidthChroma,
+ mode_lib->vba.RequiredPrefetchPixelDataBWLuma[0][0],
+ mode_lib->vba.RequiredPrefetchPixelDataBWChroma[0][0],
+ mode_lib->vba.cursor_bw,
+ mode_lib->vba.meta_row_bandwidth_this_state,
+ mode_lib->vba.dpte_row_bandwidth_this_state,
+ mode_lib->vba.cursor_bw_pre,
+ mode_lib->vba.prefetch_vmrow_bw,
+ mode_lib->vba.DPPPerPlane,
+ mode_lib->vba.UrgentBurstFactorLuma,
+ mode_lib->vba.UrgentBurstFactorChroma,
+ mode_lib->vba.UrgentBurstFactorCursor,
+ mode_lib->vba.UrgentBurstFactorLumaPre,
+ mode_lib->vba.UrgentBurstFactorChromaPre,
+ mode_lib->vba.UrgentBurstFactorCursorPre,
+
+ /* output */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[0], // Single *TotalBandwidth
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[1], // Single *FractionOfUrgentBandwidth
+ &mode_lib->vba.ImmediateFlipSupportedForState[i][j]); // Boolean *ImmediateFlipBandwidthSupport
+ }
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (!(mode_lib->vba.ImmediateFlipRequirement[k]
+ == dm_immediate_flip_not_required)
+ && (mode_lib->vba.ImmediateFlipSupportedForPipe[k]
+ == false))
+ mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false;
+ }
+ } else { // if prefetch not support, assume iflip not supported
+ mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false;
+ }
+
+ if (mode_lib->vba.MaxVStartup <= __DML_VBA_MIN_VSTARTUP__
+ || mode_lib->vba.AnyLinesForVMOrRowTooLarge == false) {
+ mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[i][j];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState + 1;
+ } else {
+ mode_lib->vba.NextMaxVStartup = mode_lib->vba.NextMaxVStartup - 1;
+ }
+ } while (!((mode_lib->vba.PrefetchSupported[i][j] == true
+ && mode_lib->vba.DynamicMetadataSupported[i][j] == true
+ && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true &&
+ // consider flip support is okay if when there is no hostvm and the
+ // user does't require a iflip OR the flip bw is ok
+ // If there is hostvm, DCN needs to support iflip for invalidation
+ ((mode_lib->vba.HostVMEnable == false
+ && !mode_lib->vba.ImmediateFlipRequiredFinal)
+ || mode_lib->vba.ImmediateFlipSupportedForState[i][j] == true))
+ || (mode_lib->vba.NextMaxVStartup == mode_lib->vba.MaxMaxVStartup[i][j]
+ && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState > mode_lib->vba.MaxPrefetchMode)));
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.use_one_row_for_frame_this_state[k] =
+ mode_lib->vba.use_one_row_for_frame[i][j][k];
+ }
+
+
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.UrgentLatency = mode_lib->vba.UrgLatency[i];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.ExtraLatency = mode_lib->vba.ExtraLatency;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.WritebackLatency = mode_lib->vba.WritebackLatency;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.DRAMClockChangeLatency = mode_lib->vba.DRAMClockChangeLatency;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.FCLKChangeLatency = mode_lib->vba.FCLKChangeLatency;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.SRExitTime = mode_lib->vba.SRExitTime;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.SREnterPlusExitTime = mode_lib->vba.SREnterPlusExitTime;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.SRExitZ8Time = mode_lib->vba.SRExitZ8Time;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.SREnterPlusExitZ8Time = mode_lib->vba.SREnterPlusExitZ8Time;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.USRRetrainingLatency = mode_lib->vba.USRRetrainingLatency;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.SMNLatency = mode_lib->vba.SMNLatency;
+
+ {
+ dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
+ v,
+ v->PrefetchModePerState[i][j],
+ v->DCFCLKState[i][j],
+ v->ReturnBWPerState[i][j],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters,
+ v->SOCCLKPerState[i],
+ v->ProjectedDCFCLKDeepSleep[i][j],
+ v->DETBufferSizeYThisState,
+ v->DETBufferSizeCThisState,
+ v->SwathHeightYThisState,
+ v->SwathHeightCThisState,
+ v->SwathWidthYThisState, // 24
+ v->SwathWidthCThisState,
+ v->NoOfDPPThisState,
+ v->BytePerPixelInDETY,
+ v->BytePerPixelInDETC,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTYAfterScaler,
+ v->UnboundedRequestEnabledThisState,
+ v->CompressedBufferSizeInkByteThisState,
+
+ /* Output */
+ &v->DRAMClockChangeSupport[i][j],
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single2[0], // double *MaxActiveDRAMClockChangeLatencySupported
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], // Long SubViewportLinesNeededInMALL[]
+ &v->FCLKChangeSupport[i][j],
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single2[1], // double *MinActiveFCLKChangeLatencySupported
+ &mode_lib->vba.USRRetrainingSupport[i][j],
+ mode_lib->vba.ActiveDRAMClockChangeLatencyMarginPerState[i][j]);
+ }
+ }
+ } // End of Prefetch Check
+
+ /*Cursor Support Check*/
+ mode_lib->vba.CursorSupport = true;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.CursorWidth[k][0] > 0.0) {
+ if (mode_lib->vba.CursorBPP[k][0] == 64 && mode_lib->vba.Cursor64BppSupport == false)
+ mode_lib->vba.CursorSupport = false;
+ }
+ }
+
+ /*Valid Pitch Check*/
+ mode_lib->vba.PitchSupport = true;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ mode_lib->vba.AlignedYPitch[k] = dml_ceil(
+ dml_max(mode_lib->vba.PitchY[k], mode_lib->vba.SurfaceWidthY[k]),
+ mode_lib->vba.MacroTileWidthY[k]);
+ if (mode_lib->vba.DCCEnable[k] == true) {
+ mode_lib->vba.AlignedDCCMetaPitchY[k] = dml_ceil(
+ dml_max(mode_lib->vba.DCCMetaPitchY[k], mode_lib->vba.SurfaceWidthY[k]),
+ 64.0 * mode_lib->vba.Read256BlockWidthY[k]);
+ } else {
+ mode_lib->vba.AlignedDCCMetaPitchY[k] = mode_lib->vba.DCCMetaPitchY[k];
+ }
+ if (mode_lib->vba.SourcePixelFormat[k] != dm_444_64 && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
+ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
+ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
+ && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe
+ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) {
+ mode_lib->vba.AlignedCPitch[k] = dml_ceil(
+ dml_max(mode_lib->vba.PitchC[k], mode_lib->vba.SurfaceWidthC[k]),
+ mode_lib->vba.MacroTileWidthC[k]);
+ if (mode_lib->vba.DCCEnable[k] == true) {
+ mode_lib->vba.AlignedDCCMetaPitchC[k] = dml_ceil(
+ dml_max(mode_lib->vba.DCCMetaPitchC[k],
+ mode_lib->vba.SurfaceWidthC[k]),
+ 64.0 * mode_lib->vba.Read256BlockWidthC[k]);
+ } else {
+ mode_lib->vba.AlignedDCCMetaPitchC[k] = mode_lib->vba.DCCMetaPitchC[k];
+ }
+ } else {
+ mode_lib->vba.AlignedCPitch[k] = mode_lib->vba.PitchC[k];
+ mode_lib->vba.AlignedDCCMetaPitchC[k] = mode_lib->vba.DCCMetaPitchC[k];
+ }
+ if (mode_lib->vba.AlignedYPitch[k] > mode_lib->vba.PitchY[k]
+ || mode_lib->vba.AlignedCPitch[k] > mode_lib->vba.PitchC[k]
+ || mode_lib->vba.AlignedDCCMetaPitchY[k] > mode_lib->vba.DCCMetaPitchY[k]
+ || mode_lib->vba.AlignedDCCMetaPitchC[k] > mode_lib->vba.DCCMetaPitchC[k]) {
+ mode_lib->vba.PitchSupport = false;
+ }
+ }
+
+ mode_lib->vba.ViewportExceedsSurface = false;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.ViewportWidth[k] > mode_lib->vba.SurfaceWidthY[k]
+ || mode_lib->vba.ViewportHeight[k] > mode_lib->vba.SurfaceHeightY[k]) {
+ mode_lib->vba.ViewportExceedsSurface = true;
+ if (mode_lib->vba.SourcePixelFormat[k] != dm_444_64
+ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
+ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
+ && mode_lib->vba.SourcePixelFormat[k] != dm_444_8
+ && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe) {
+ if (mode_lib->vba.ViewportWidthChroma[k] > mode_lib->vba.SurfaceWidthC[k]
+ || mode_lib->vba.ViewportHeightChroma[k]
+ > mode_lib->vba.SurfaceHeightC[k]) {
+ mode_lib->vba.ViewportExceedsSurface = true;
+ }
+ }
+ }
+ }
+
+ /*Mode Support, Voltage State and SOC Configuration*/
+ mode_support_configuration(v, mode_lib);
+
+ MaximumMPCCombine = 0;
+
+ for (i = v->soc.num_states; i >= start_state; i--) {
+ if (i == v->soc.num_states || mode_lib->vba.ModeSupport[i][0] == true ||
+ mode_lib->vba.ModeSupport[i][1] == true) {
+ mode_lib->vba.VoltageLevel = i;
+ mode_lib->vba.ModeIsSupported = mode_lib->vba.ModeSupport[i][0] == true
+ || mode_lib->vba.ModeSupport[i][1] == true;
+
+ if (mode_lib->vba.ModeSupport[i][0] == true)
+ MaximumMPCCombine = 0;
+ else
+ MaximumMPCCombine = 1;
+ }
+ }
+
+ mode_lib->vba.ImmediateFlipSupport =
+ mode_lib->vba.ImmediateFlipSupportedForState[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
+ mode_lib->vba.UnboundedRequestEnabled =
+ mode_lib->vba.UnboundedRequestEnabledAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
+ mode_lib->vba.CompressedBufferSizeInkByte =
+ mode_lib->vba.CompressedBufferSizeInkByteAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine]; // Not used, informational
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ mode_lib->vba.MPCCombineEnable[k] =
+ mode_lib->vba.MPCCombine[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
+ mode_lib->vba.DPPPerPlane[k] = mode_lib->vba.NoOfDPP[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
+ mode_lib->vba.SwathHeightY[k] =
+ mode_lib->vba.SwathHeightYAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
+ mode_lib->vba.SwathHeightC[k] =
+ mode_lib->vba.SwathHeightCAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
+ mode_lib->vba.DETBufferSizeInKByte[k] =
+ mode_lib->vba.DETBufferSizeInKByteAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
+ mode_lib->vba.DETBufferSizeY[k] =
+ mode_lib->vba.DETBufferSizeYAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
+ mode_lib->vba.DETBufferSizeC[k] =
+ mode_lib->vba.DETBufferSizeCAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
+ mode_lib->vba.OutputType[k] = mode_lib->vba.OutputTypePerState[mode_lib->vba.VoltageLevel][k];
+ mode_lib->vba.OutputRate[k] = mode_lib->vba.OutputRatePerState[mode_lib->vba.VoltageLevel][k];
+ }
+
+ mode_lib->vba.DCFCLK = mode_lib->vba.DCFCLKState[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
+ mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel];
+ mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel];
+ mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel];
+ mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBWPerState[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
+ mode_lib->vba.DISPCLK = mode_lib->vba.RequiredDISPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
+ mode_lib->vba.maxMpcComb = MaximumMPCCombine;
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k) {
+ mode_lib->vba.ODMCombineEnabled[k] =
+ mode_lib->vba.ODMCombineEnablePerState[mode_lib->vba.VoltageLevel][k];
+ } else {
+ mode_lib->vba.ODMCombineEnabled[k] = dm_odm_combine_mode_disabled;
+ }
+
+ mode_lib->vba.DSCEnabled[k] = mode_lib->vba.RequiresDSC[mode_lib->vba.VoltageLevel][k];
+ mode_lib->vba.FECEnable[k] = mode_lib->vba.RequiresFEC[mode_lib->vba.VoltageLevel][k];
+ mode_lib->vba.OutputBpp[k] = mode_lib->vba.OutputBppPerState[mode_lib->vba.VoltageLevel][k];
+ }
+
+ mode_lib->vba.UrgentWatermark = mode_lib->vba.Watermark.UrgentWatermark;
+ mode_lib->vba.StutterEnterPlusExitWatermark = mode_lib->vba.Watermark.StutterEnterPlusExitWatermark;
+ mode_lib->vba.StutterExitWatermark = mode_lib->vba.Watermark.StutterExitWatermark;
+ mode_lib->vba.WritebackDRAMClockChangeWatermark = mode_lib->vba.Watermark.WritebackDRAMClockChangeWatermark;
+ mode_lib->vba.DRAMClockChangeWatermark = mode_lib->vba.Watermark.DRAMClockChangeWatermark;
+ mode_lib->vba.UrgentLatency = mode_lib->vba.UrgLatency[mode_lib->vba.VoltageLevel];
+ mode_lib->vba.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
+
+ /* VBA has Error type to Error Msg output here, but not necessary for DML-C */
+} // ModeSupportAndSystemConfigurationFull
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h
new file mode 100644
index 000000000000..c4745d63039b
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DML32_DISPLAY_MODE_VBA_H__
+#define __DML32_DISPLAY_MODE_VBA_H__
+
+#include "../display_mode_enums.h"
+
+// To enable a lot of debug msg
+//#define __DML_VBA_DEBUG__
+// For DML-C changes that hasn't been propagated to VBA yet
+//#define __DML_VBA_ALLOW_DELTA__
+
+// Move these to ip parameters/constant
+// At which vstartup the DML start to try if the mode can be supported
+#define __DML_VBA_MIN_VSTARTUP__ 9
+
+// Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
+#define __DML_ARB_TO_RET_DELAY__ 7 + 95
+
+// fudge factor for min dcfclk calclation
+#define __DML_MIN_DCFCLK_FACTOR__ 1.15
+
+// Prefetch schedule max vratio
+#define __DML_MAX_VRATIO_PRE__ 7.9
+#define __DML_MAX_BW_RATIO_PRE__ 4.0
+
+#define __DML_VBA_MAX_DST_Y_PRE__ 63.75
+
+#define BPP_INVALID 0
+#define BPP_BLENDED_PIPE 0xffffffff
+
+#define MEM_STROBE_FREQ_MHZ 1600
+#define DCFCLK_FREQ_EXTRA_PREFETCH_REQ_MHZ 300
+#define MEM_STROBE_MAX_DELIVERY_TIME_US 60.0
+
+struct display_mode_lib;
+
+void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib);
+void dml32_recalculate(struct display_mode_lib *mode_lib);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
new file mode 100644
index 000000000000..0748ef36a16a
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
@@ -0,0 +1,6350 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#include "display_mode_vba_util_32.h"
+#include "../dml_inline_defs.h"
+#include "display_mode_vba_32.h"
+#include "../display_mode_lib.h"
+
+#define DCN32_MAX_FMT_420_BUFFER_WIDTH 4096
+
+unsigned int dml32_dscceComputeDelay(
+ unsigned int bpc,
+ double BPP,
+ unsigned int sliceWidth,
+ unsigned int numSlices,
+ enum output_format_class pixelFormat,
+ enum output_encoder_class Output)
+{
+ // valid bpc = source bits per component in the set of {8, 10, 12}
+ // valid bpp = increments of 1/16 of a bit
+ // min = 6/7/8 in N420/N422/444, respectively
+ // max = such that compression is 1:1
+ //valid sliceWidth = number of pixels per slice line,
+ // must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
+ //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
+ //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
+
+ // fixed value
+ unsigned int rcModelSize = 8192;
+
+ // N422/N420 operate at 2 pixels per clock
+ unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
+ Delay, pixels;
+
+ if (pixelFormat == dm_420)
+ pixelsPerClock = 2;
+ else if (pixelFormat == dm_n422)
+ pixelsPerClock = 2;
+ // #all other modes operate at 1 pixel per clock
+ else
+ pixelsPerClock = 1;
+
+ //initial transmit delay as per PPS
+ initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
+
+ //compute ssm delay
+ if (bpc == 8)
+ D = 81;
+ else if (bpc == 10)
+ D = 89;
+ else
+ D = 113;
+
+ //divide by pixel per cycle to compute slice width as seen by DSC
+ w = sliceWidth / pixelsPerClock;
+
+ //422 mode has an additional cycle of delay
+ if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
+ s = 0;
+ else
+ s = 1;
+
+ //main calculation for the dscce
+ ix = initalXmitDelay + 45;
+ wx = (w + 2) / 3;
+ p = 3 * wx - w;
+ l0 = ix / w;
+ a = ix + p * l0;
+ ax = (a + 2) / 3 + D + 6 + 1;
+ L = (ax + wx - 1) / wx;
+ if ((ix % w) == 0 && p != 0)
+ lstall = 1;
+ else
+ lstall = 0;
+ Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
+
+ //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
+ pixels = Delay * 3 * pixelsPerClock;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: bpc: %d\n", __func__, bpc);
+ dml_print("DML::%s: BPP: %f\n", __func__, BPP);
+ dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
+ dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
+ dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
+ dml_print("DML::%s: Output: %d\n", __func__, Output);
+ dml_print("DML::%s: pixels: %d\n", __func__, pixels);
+#endif
+
+ return pixels;
+}
+
+unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
+{
+ unsigned int Delay = 0;
+
+ if (pixelFormat == dm_420) {
+ // sfr
+ Delay = Delay + 2;
+ // dsccif
+ Delay = Delay + 0;
+ // dscc - input deserializer
+ Delay = Delay + 3;
+ // dscc gets pixels every other cycle
+ Delay = Delay + 2;
+ // dscc - input cdc fifo
+ Delay = Delay + 12;
+ // dscc gets pixels every other cycle
+ Delay = Delay + 13;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output cdc fifo
+ Delay = Delay + 7;
+ // dscc gets pixels every other cycle
+ Delay = Delay + 3;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output serializer
+ Delay = Delay + 1;
+ // sft
+ Delay = Delay + 1;
+ } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
+ // sfr
+ Delay = Delay + 2;
+ // dsccif
+ Delay = Delay + 1;
+ // dscc - input deserializer
+ Delay = Delay + 5;
+ // dscc - input cdc fifo
+ Delay = Delay + 25;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output cdc fifo
+ Delay = Delay + 10;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output serializer
+ Delay = Delay + 1;
+ // sft
+ Delay = Delay + 1;
+ } else {
+ // sfr
+ Delay = Delay + 2;
+ // dsccif
+ Delay = Delay + 0;
+ // dscc - input deserializer
+ Delay = Delay + 3;
+ // dscc - input cdc fifo
+ Delay = Delay + 12;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output cdc fifo
+ Delay = Delay + 7;
+ // dscc - output serializer
+ Delay = Delay + 1;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // sft
+ Delay = Delay + 1;
+ }
+
+ return Delay;
+}
+
+
+bool IsVertical(enum dm_rotation_angle Scan)
+{
+ bool is_vert = false;
+
+ if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
+ is_vert = true;
+ else
+ is_vert = false;
+ return is_vert;
+}
+
+void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
+ double HRatio,
+ double HRatioChroma,
+ double VRatio,
+ double VRatioChroma,
+ double MaxDCHUBToPSCLThroughput,
+ double MaxPSCLToLBThroughput,
+ double PixelClock,
+ enum source_format_class SourcePixelFormat,
+ unsigned int HTaps,
+ unsigned int HTapsChroma,
+ unsigned int VTaps,
+ unsigned int VTapsChroma,
+
+ /* output */
+ double *PSCL_THROUGHPUT,
+ double *PSCL_THROUGHPUT_CHROMA,
+ double *DPPCLKUsingSingleDPP)
+{
+ double DPPCLKUsingSingleDPPLuma;
+ double DPPCLKUsingSingleDPPChroma;
+
+ if (HRatio > 1) {
+ *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
+ dml_ceil((double) HTaps / 6.0, 1.0));
+ } else {
+ *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
+ }
+
+ DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
+ *PSCL_THROUGHPUT, 1);
+
+ if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
+ DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
+
+ if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
+ SourcePixelFormat != dm_rgbe_alpha)) {
+ *PSCL_THROUGHPUT_CHROMA = 0;
+ *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
+ } else {
+ if (HRatioChroma > 1) {
+ *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
+ HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
+ } else {
+ *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
+ }
+ DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
+ HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
+ if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
+ DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
+ *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
+ }
+}
+
+void dml32_CalculateBytePerPixelAndBlockSizes(
+ enum source_format_class SourcePixelFormat,
+ enum dm_swizzle_mode SurfaceTiling,
+
+ /* Output */
+ unsigned int *BytePerPixelY,
+ unsigned int *BytePerPixelC,
+ double *BytePerPixelDETY,
+ double *BytePerPixelDETC,
+ unsigned int *BlockHeight256BytesY,
+ unsigned int *BlockHeight256BytesC,
+ unsigned int *BlockWidth256BytesY,
+ unsigned int *BlockWidth256BytesC,
+ unsigned int *MacroTileHeightY,
+ unsigned int *MacroTileHeightC,
+ unsigned int *MacroTileWidthY,
+ unsigned int *MacroTileWidthC)
+{
+ if (SourcePixelFormat == dm_444_64) {
+ *BytePerPixelDETY = 8;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 8;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_16) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_rgbe_alpha) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 1;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 1;
+ } else if (SourcePixelFormat == dm_420_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 2;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 2;
+ } else if (SourcePixelFormat == dm_420_12) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 4;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ } else {
+ *BytePerPixelDETY = 4.0 / 3;
+ *BytePerPixelDETC = 8.0 / 3;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
+ dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
+ dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
+ dml_print("DML::%s: BytePerPixelY = %d\n", __func__, *BytePerPixelY);
+ dml_print("DML::%s: BytePerPixelC = %d\n", __func__, *BytePerPixelC);
+#endif
+ if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
+ || SourcePixelFormat == dm_444_16
+ || SourcePixelFormat == dm_444_8
+ || SourcePixelFormat == dm_mono_16
+ || SourcePixelFormat == dm_mono_8
+ || SourcePixelFormat == dm_rgbe)) {
+ if (SurfaceTiling == dm_sw_linear)
+ *BlockHeight256BytesY = 1;
+ else if (SourcePixelFormat == dm_444_64)
+ *BlockHeight256BytesY = 4;
+ else if (SourcePixelFormat == dm_444_8)
+ *BlockHeight256BytesY = 16;
+ else
+ *BlockHeight256BytesY = 8;
+
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockHeight256BytesC = 0;
+ *BlockWidth256BytesC = 0;
+ } else {
+ if (SurfaceTiling == dm_sw_linear) {
+ *BlockHeight256BytesY = 1;
+ *BlockHeight256BytesC = 1;
+ } else if (SourcePixelFormat == dm_rgbe_alpha) {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 16;
+ } else if (SourcePixelFormat == dm_420_8) {
+ *BlockHeight256BytesY = 16;
+ *BlockHeight256BytesC = 8;
+ } else {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 8;
+ }
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: BlockWidth256BytesY = %d\n", __func__, *BlockWidth256BytesY);
+ dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
+ dml_print("DML::%s: BlockWidth256BytesC = %d\n", __func__, *BlockWidth256BytesC);
+ dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
+#endif
+
+ if (SurfaceTiling == dm_sw_linear) {
+ *MacroTileHeightY = *BlockHeight256BytesY;
+ *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
+ *MacroTileHeightC = *BlockHeight256BytesC;
+ if (*MacroTileHeightC == 0)
+ *MacroTileWidthC = 0;
+ else
+ *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
+ } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
+ SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
+ *MacroTileHeightY = 16 * *BlockHeight256BytesY;
+ *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
+ *MacroTileHeightC = 16 * *BlockHeight256BytesC;
+ if (*MacroTileHeightC == 0)
+ *MacroTileWidthC = 0;
+ else
+ *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
+ } else {
+ *MacroTileHeightY = 32 * *BlockHeight256BytesY;
+ *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
+ *MacroTileHeightC = 32 * *BlockHeight256BytesC;
+ if (*MacroTileHeightC == 0)
+ *MacroTileWidthC = 0;
+ else
+ *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MacroTileWidthY = %d\n", __func__, *MacroTileWidthY);
+ dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
+ dml_print("DML::%s: MacroTileWidthC = %d\n", __func__, *MacroTileWidthC);
+ dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
+#endif
+} // CalculateBytePerPixelAndBlockSizes
+
+void dml32_CalculateSwathAndDETConfiguration(
+ unsigned int DETSizeOverride[],
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ unsigned int ConfigReturnBufferSizeInKByte,
+ unsigned int MaxTotalDETInKByte,
+ unsigned int MinCompressedBufferSizeInKByte,
+ double ForceSingleDPP,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int nomDETInKByte,
+ enum unbounded_requesting_policy UseUnboundedRequestingFinal,
+ bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
+ unsigned int PixelChunkSizeKBytes,
+ unsigned int ROBSizeKBytes,
+ unsigned int CompressedBufferSegmentSizeInkByteFinal,
+ enum output_encoder_class Output[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double MaximumSwathWidthLuma[],
+ double MaximumSwathWidthChroma[],
+ enum dm_rotation_angle SourceRotation[],
+ bool ViewportStationary[],
+ enum source_format_class SourcePixelFormat[],
+ enum dm_swizzle_mode SurfaceTiling[],
+ unsigned int ViewportWidth[],
+ unsigned int ViewportHeight[],
+ unsigned int ViewportXStart[],
+ unsigned int ViewportYStart[],
+ unsigned int ViewportXStartC[],
+ unsigned int ViewportYStartC[],
+ unsigned int SurfaceWidthY[],
+ unsigned int SurfaceWidthC[],
+ unsigned int SurfaceHeightY[],
+ unsigned int SurfaceHeightC[],
+ unsigned int Read256BytesBlockHeightY[],
+ unsigned int Read256BytesBlockHeightC[],
+ unsigned int Read256BytesBlockWidthY[],
+ unsigned int Read256BytesBlockWidthC[],
+ enum odm_combine_mode ODMMode[],
+ unsigned int BlendingAndTiming[],
+ unsigned int BytePerPixY[],
+ unsigned int BytePerPixC[],
+ double BytePerPixDETY[],
+ double BytePerPixDETC[],
+ unsigned int HActive[],
+ double HRatio[],
+ double HRatioChroma[],
+ unsigned int DPPPerSurface[],
+
+ /* Output */
+ unsigned int swath_width_luma_ub[],
+ unsigned int swath_width_chroma_ub[],
+ double SwathWidth[],
+ double SwathWidthChroma[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ unsigned int DETBufferSizeInKByte[],
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ bool *UnboundedRequestEnabled,
+ unsigned int *CompressedBufferSizeInkByte,
+ unsigned int *CompBufReservedSpaceKBytes,
+ bool *CompBufReservedSpaceNeedAdjustment,
+ bool ViewportSizeSupportPerSurface[],
+ bool *ViewportSizeSupport)
+{
+ unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
+ unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
+ unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
+ unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
+ unsigned int RoundedUpSwathSizeBytesY;
+ unsigned int RoundedUpSwathSizeBytesC;
+ double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
+ double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
+ unsigned int k;
+ unsigned int TotalActiveDPP = 0;
+ bool NoChromaSurfaces = true;
+ unsigned int DETBufferSizeInKByteForSwathCalculation;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
+ dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes);
+ dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes);
+#endif
+ dml32_CalculateSwathWidth(ForceSingleDPP,
+ NumberOfActiveSurfaces,
+ SourcePixelFormat,
+ SourceRotation,
+ ViewportStationary,
+ ViewportWidth,
+ ViewportHeight,
+ ViewportXStart,
+ ViewportYStart,
+ ViewportXStartC,
+ ViewportYStartC,
+ SurfaceWidthY,
+ SurfaceWidthC,
+ SurfaceHeightY,
+ SurfaceHeightC,
+ ODMMode,
+ BytePerPixY,
+ BytePerPixC,
+ Read256BytesBlockHeightY,
+ Read256BytesBlockHeightC,
+ Read256BytesBlockWidthY,
+ Read256BytesBlockWidthC,
+ BlendingAndTiming,
+ HActive,
+ HRatio,
+ DPPPerSurface,
+
+ /* Output */
+ SwathWidthdoubleDPP,
+ SwathWidthdoubleDPPChroma,
+ SwathWidth,
+ SwathWidthChroma,
+ MaximumSwathHeightY,
+ MaximumSwathHeightC,
+ swath_width_luma_ub,
+ swath_width_chroma_ub);
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
+ RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
+ dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
+ dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
+ dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]);
+ dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
+ RoundedUpMaxSwathSizeBytesY[k]);
+ dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
+ dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]);
+ dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
+ RoundedUpMaxSwathSizeBytesC[k]);
+#endif
+
+ if (SourcePixelFormat[k] == dm_420_10) {
+ RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256);
+ RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256);
+ }
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
+ if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
+ SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
+ NoChromaSurfaces = false;
+ }
+ }
+
+ // By default, just set the reserved space to 2 pixel chunks size
+ *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2;
+
+ // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
+ // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
+ // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
+ *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512);
+
+ if (*CompBufReservedSpaceNeedAdjustment == 1) {
+ *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512;
+ }
+
+ #ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: CompBufReservedSpaceKBytes = %d\n", __func__, *CompBufReservedSpaceKBytes);
+ dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment);
+ #endif
+
+ *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
+
+ dml32_CalculateDETBufferSize(DETSizeOverride,
+ UseMALLForPStateChange,
+ ForceSingleDPP,
+ NumberOfActiveSurfaces,
+ *UnboundedRequestEnabled,
+ nomDETInKByte,
+ MaxTotalDETInKByte,
+ ConfigReturnBufferSizeInKByte,
+ MinCompressedBufferSizeInKByte,
+ CompressedBufferSegmentSizeInkByteFinal,
+ SourcePixelFormat,
+ ReadBandwidthLuma,
+ ReadBandwidthChroma,
+ RoundedUpMaxSwathSizeBytesY,
+ RoundedUpMaxSwathSizeBytesC,
+ DPPPerSurface,
+
+ /* Output */
+ DETBufferSizeInKByte, // per hubp pipe
+ CompressedBufferSizeInkByte);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
+ dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
+ dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
+ dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
+ dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
+ dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
+#endif
+
+ *ViewportSizeSupport = true;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+ DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
+ dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
+ DETBufferSizeInKByteForSwathCalculation);
+#endif
+
+ if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
+ DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ SwathHeightY[k] = MaximumSwathHeightY[k];
+ SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
+ } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
+ RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
+ DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
+ SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
+ } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
+ RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
+ DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ SwathHeightY[k] = MaximumSwathHeightY[k];
+ SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
+ } else {
+ SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
+ SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
+ }
+
+ if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
+ DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
+ || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
+ SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
+ *ViewportSizeSupport = false;
+ ViewportSizeSupportPerSurface[k] = false;
+ } else {
+ ViewportSizeSupportPerSurface[k] = true;
+ }
+
+ if (SwathHeightC[k] == 0) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
+#endif
+ DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
+ DETBufferSizeC[k] = 0;
+ } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
+#endif
+ DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
+ DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
+ } else {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
+#endif
+ DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
+ DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
+ dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
+ dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
+ k, RoundedUpMaxSwathSizeBytesY[k]);
+ dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
+ k, RoundedUpMaxSwathSizeBytesC[k]);
+ dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY);
+ dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC);
+ dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
+ dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
+ dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
+ dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
+ ViewportSizeSupportPerSurface[k]);
+#endif
+
+ }
+} // CalculateSwathAndDETConfiguration
+
+void dml32_CalculateSwathWidth(
+ bool ForceSingleDPP,
+ unsigned int NumberOfActiveSurfaces,
+ enum source_format_class SourcePixelFormat[],
+ enum dm_rotation_angle SourceRotation[],
+ bool ViewportStationary[],
+ unsigned int ViewportWidth[],
+ unsigned int ViewportHeight[],
+ unsigned int ViewportXStart[],
+ unsigned int ViewportYStart[],
+ unsigned int ViewportXStartC[],
+ unsigned int ViewportYStartC[],
+ unsigned int SurfaceWidthY[],
+ unsigned int SurfaceWidthC[],
+ unsigned int SurfaceHeightY[],
+ unsigned int SurfaceHeightC[],
+ enum odm_combine_mode ODMMode[],
+ unsigned int BytePerPixY[],
+ unsigned int BytePerPixC[],
+ unsigned int Read256BytesBlockHeightY[],
+ unsigned int Read256BytesBlockHeightC[],
+ unsigned int Read256BytesBlockWidthY[],
+ unsigned int Read256BytesBlockWidthC[],
+ unsigned int BlendingAndTiming[],
+ unsigned int HActive[],
+ double HRatio[],
+ unsigned int DPPPerSurface[],
+
+ /* Output */
+ double SwathWidthdoubleDPPY[],
+ double SwathWidthdoubleDPPC[],
+ double SwathWidthY[], // per-pipe
+ double SwathWidthC[], // per-pipe
+ unsigned int MaximumSwathHeightY[],
+ unsigned int MaximumSwathHeightC[],
+ unsigned int swath_width_luma_ub[], // per-pipe
+ unsigned int swath_width_chroma_ub[]) // per-pipe
+{
+ unsigned int k, j;
+ enum odm_combine_mode MainSurfaceODMMode;
+
+ unsigned int surface_width_ub_l;
+ unsigned int surface_height_ub_l;
+ unsigned int surface_width_ub_c = 0;
+ unsigned int surface_height_ub_c = 0;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
+ dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
+#endif
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (!IsVertical(SourceRotation[k]))
+ SwathWidthdoubleDPPY[k] = ViewportWidth[k];
+ else
+ SwathWidthdoubleDPPY[k] = ViewportHeight[k];
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
+ dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
+#endif
+
+ MainSurfaceODMMode = ODMMode[k];
+ for (j = 0; j < NumberOfActiveSurfaces; ++j) {
+ if (BlendingAndTiming[k] == j)
+ MainSurfaceODMMode = ODMMode[j];
+ }
+
+ if (ForceSingleDPP) {
+ SwathWidthY[k] = SwathWidthdoubleDPPY[k];
+ } else {
+ if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
+ SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
+ dml_round(HActive[k] / 4.0 * HRatio[k]));
+ } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
+ SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
+ dml_round(HActive[k] / 2.0 * HRatio[k]));
+ } else if (DPPPerSurface[k] == 2) {
+ SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
+ } else {
+ SwathWidthY[k] = SwathWidthdoubleDPPY[k];
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
+ dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
+ dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
+ dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
+ dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
+#endif
+
+ if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
+ SourcePixelFormat[k] == dm_420_12) {
+ SwathWidthC[k] = SwathWidthY[k] / 2;
+ SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
+ } else {
+ SwathWidthC[k] = SwathWidthY[k];
+ SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
+ }
+
+ if (ForceSingleDPP == true) {
+ SwathWidthY[k] = SwathWidthdoubleDPPY[k];
+ SwathWidthC[k] = SwathWidthdoubleDPPC[k];
+ }
+
+ surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
+ surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
+
+ if (!IsVertical(SourceRotation[k])) {
+ MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
+ MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
+ if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
+ swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
+ dml_floor(ViewportXStart[k] +
+ SwathWidthY[k] +
+ Read256BytesBlockWidthY[k] - 1,
+ Read256BytesBlockWidthY[k]) -
+ dml_floor(ViewportXStart[k],
+ Read256BytesBlockWidthY[k]));
+ } else {
+ swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
+ dml_ceil(SwathWidthY[k] - 1,
+ Read256BytesBlockWidthY[k]) +
+ Read256BytesBlockWidthY[k]);
+ }
+ if (BytePerPixC[k] > 0) {
+ surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
+ if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
+ swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
+ dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
+ Read256BytesBlockWidthC[k] - 1,
+ Read256BytesBlockWidthC[k]) -
+ dml_floor(ViewportXStartC[k],
+ Read256BytesBlockWidthC[k]));
+ } else {
+ swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
+ dml_ceil(SwathWidthC[k] - 1,
+ Read256BytesBlockWidthC[k]) +
+ Read256BytesBlockWidthC[k]);
+ }
+ } else {
+ swath_width_chroma_ub[k] = 0;
+ }
+ } else {
+ MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
+ MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
+
+ if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
+ swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
+ SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
+ Read256BytesBlockHeightY[k]) -
+ dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
+ } else {
+ swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
+ Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
+ }
+ if (BytePerPixC[k] > 0) {
+ surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
+ if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
+ swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
+ dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
+ Read256BytesBlockHeightC[k] - 1,
+ Read256BytesBlockHeightC[k]) -
+ dml_floor(ViewportYStartC[k],
+ Read256BytesBlockHeightC[k]));
+ } else {
+ swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
+ dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
+ Read256BytesBlockHeightC[k]);
+ }
+ } else {
+ swath_width_chroma_ub[k] = 0;
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
+ dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
+ dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
+ dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
+ dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
+ dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
+ dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
+ dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
+ dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
+ dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
+ dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
+ dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
+ dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
+#endif
+
+ }
+} // CalculateSwathWidth
+
+bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
+ unsigned int TotalNumberOfActiveDPP,
+ bool NoChroma,
+ enum output_encoder_class Output,
+ enum dm_swizzle_mode SurfaceTiling,
+ bool CompBufReservedSpaceNeedAdjustment,
+ bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
+{
+ bool ret_val = false;
+
+ ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
+ TotalNumberOfActiveDPP == 1 && NoChroma);
+ if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
+ ret_val = false;
+
+ if (SurfaceTiling == dm_sw_linear)
+ ret_val = false;
+
+ if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
+ ret_val = false;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, CompBufReservedSpaceNeedAdjustment);
+ dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = %d\n", __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
+ dml_print("DML::%s: ret_val = %d\n", __func__, ret_val);
+#endif
+
+ return (ret_val);
+}
+
+void dml32_CalculateDETBufferSize(
+ unsigned int DETSizeOverride[],
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ bool ForceSingleDPP,
+ unsigned int NumberOfActiveSurfaces,
+ bool UnboundedRequestEnabled,
+ unsigned int nomDETInKByte,
+ unsigned int MaxTotalDETInKByte,
+ unsigned int ConfigReturnBufferSizeInKByte,
+ unsigned int MinCompressedBufferSizeInKByte,
+ unsigned int CompressedBufferSegmentSizeInkByteFinal,
+ enum source_format_class SourcePixelFormat[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ unsigned int RoundedUpMaxSwathSizeBytesY[],
+ unsigned int RoundedUpMaxSwathSizeBytesC[],
+ unsigned int DPPPerSurface[],
+ /* Output */
+ unsigned int DETBufferSizeInKByte[],
+ unsigned int *CompressedBufferSizeInkByte)
+{
+ unsigned int DETBufferSizePoolInKByte;
+ unsigned int NextDETBufferPieceInKByte;
+ bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
+ bool NextPotentialSurfaceToAssignDETPieceFound;
+ unsigned int NextSurfaceToAssignDETPiece;
+ double TotalBandwidth;
+ double BandwidthOfSurfacesNotAssignedDETPiece;
+ unsigned int max_minDET;
+ unsigned int minDET;
+ unsigned int minDET_pipe;
+ unsigned int j, k;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
+ dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
+ dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
+ dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
+ dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
+ dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
+ dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
+ dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
+ CompressedBufferSegmentSizeInkByteFinal);
+#endif
+
+ // Note: Will use default det size if that fits 2 swaths
+ if (UnboundedRequestEnabled) {
+ if (DETSizeOverride[0] > 0) {
+ DETBufferSizeInKByte[0] = DETSizeOverride[0];
+ } else {
+ DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
+ ((double) RoundedUpMaxSwathSizeBytesY[0] +
+ (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
+ }
+ *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
+ } else {
+ DETBufferSizePoolInKByte = MaxTotalDETInKByte;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ DETBufferSizeInKByte[k] = nomDETInKByte;
+ if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
+ SourcePixelFormat[k] == dm_420_12) {
+ max_minDET = nomDETInKByte - 64;
+ } else {
+ max_minDET = nomDETInKByte;
+ }
+ minDET = 128;
+ minDET_pipe = 0;
+
+ // add DET resource until can hold 2 full swaths
+ while (minDET <= max_minDET && minDET_pipe == 0) {
+ if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
+ (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
+ minDET_pipe = minDET;
+ minDET = minDET + 64;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d minDET = %d\n", __func__, k, minDET);
+ dml_print("DML::%s: k=%0d max_minDET = %d\n", __func__, k, max_minDET);
+ dml_print("DML::%s: k=%0d minDET_pipe = %d\n", __func__, k, minDET_pipe);
+ dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
+ RoundedUpMaxSwathSizeBytesY[k]);
+ dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
+ RoundedUpMaxSwathSizeBytesC[k]);
+#endif
+
+ if (minDET_pipe == 0) {
+ minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
+ (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
+ __func__, k, minDET_pipe);
+#endif
+ }
+
+ if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
+ DETBufferSizeInKByte[k] = 0;
+ } else if (DETSizeOverride[k] > 0) {
+ DETBufferSizeInKByte[k] = DETSizeOverride[k];
+ DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
+ (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
+ } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
+ DETBufferSizeInKByte[k] = minDET_pipe;
+ DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
+ (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
+ dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
+ dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
+ dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
+#endif
+ }
+
+ TotalBandwidth = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
+ TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
+ for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
+ dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
+ dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
+ dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
+#endif
+ BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+ if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
+ DETPieceAssignedToThisSurfaceAlready[k] = true;
+ } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
+ (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
+ ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
+ DETPieceAssignedToThisSurfaceAlready[k] = true;
+ BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
+ ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
+ } else {
+ DETPieceAssignedToThisSurfaceAlready[k] = false;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
+ DETPieceAssignedToThisSurfaceAlready[k]);
+ dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
+ BandwidthOfSurfacesNotAssignedDETPiece);
+#endif
+ }
+
+ for (j = 0; j < NumberOfActiveSurfaces; ++j) {
+ NextPotentialSurfaceToAssignDETPieceFound = false;
+ NextSurfaceToAssignDETPiece = 0;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
+ ReadBandwidthLuma[k]);
+ dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
+ ReadBandwidthChroma[k]);
+ dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
+ ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
+ dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
+ ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
+ dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
+ NextSurfaceToAssignDETPiece);
+#endif
+ if (!DETPieceAssignedToThisSurfaceAlready[k] &&
+ (!NextPotentialSurfaceToAssignDETPieceFound ||
+ ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
+ ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
+ ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
+ NextSurfaceToAssignDETPiece = k;
+ NextPotentialSurfaceToAssignDETPieceFound = true;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
+ __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
+ dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
+ __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
+#endif
+ }
+
+ if (NextPotentialSurfaceToAssignDETPieceFound) {
+ // Note: To show the banker's rounding behavior in VBA and also the fact
+ // that the DET buffer size varies due to precision issue
+ //
+ //double tmp1 = ((double) DETBufferSizePoolInKByte *
+ // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
+ // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
+ // BandwidthOfSurfacesNotAssignedDETPiece /
+ // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
+ //double tmp2 = dml_round((double) DETBufferSizePoolInKByte *
+ // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
+ // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
+ //BandwidthOfSurfacesNotAssignedDETPiece /
+ // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
+ //
+ //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
+ //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
+
+ NextDETBufferPieceInKByte = dml_min(
+ dml_round((double) DETBufferSizePoolInKByte *
+ (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
+ ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
+ BandwidthOfSurfacesNotAssignedDETPiece /
+ ((ForceSingleDPP ? 1 :
+ DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
+ (ForceSingleDPP ? 1 :
+ DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
+ dml_floor((double) DETBufferSizePoolInKByte,
+ (ForceSingleDPP ? 1 :
+ DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
+
+ // Above calculation can assign the entire DET buffer allocation to a single pipe.
+ // We should limit the per-pipe DET size to the nominal / max per pipe.
+ if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
+ if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
+ nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
+ NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
+ DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
+ } else {
+ // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
+ // already has the max per-pipe value
+ NextDETBufferPieceInKByte = 0;
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
+ DETBufferSizePoolInKByte);
+ dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
+ NextSurfaceToAssignDETPiece);
+ dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
+ NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
+ dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
+ NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
+ dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
+ __func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
+ dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
+ NextDETBufferPieceInKByte);
+ dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
+ __func__, j, NextSurfaceToAssignDETPiece,
+ DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
+#endif
+
+ DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
+ DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
+ + NextDETBufferPieceInKByte
+ / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
+#endif
+
+ DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
+ DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
+ BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
+ (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
+ ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
+ }
+ }
+ *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
+ }
+ *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
+ dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
+ for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
+ dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
+ __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
+ }
+#endif
+} // CalculateDETBufferSize
+
+void dml32_CalculateODMMode(
+ unsigned int MaximumPixelsPerLinePerDSCUnit,
+ unsigned int HActive,
+ enum output_format_class OutFormat,
+ enum output_encoder_class Output,
+ enum odm_combine_policy ODMUse,
+ double StateDispclk,
+ double MaxDispclk,
+ bool DSCEnable,
+ unsigned int TotalNumberOfActiveDPP,
+ unsigned int MaxNumDPP,
+ double PixelClock,
+ double DISPCLKDPPCLKDSCCLKDownSpreading,
+ double DISPCLKRampingMargin,
+ double DISPCLKDPPCLKVCOSpeed,
+ unsigned int NumberOfDSCSlices,
+
+ /* Output */
+ bool *TotalAvailablePipesSupport,
+ unsigned int *NumberOfDPP,
+ enum odm_combine_mode *ODMMode,
+ double *RequiredDISPCLKPerSurface)
+{
+
+ double SurfaceRequiredDISPCLKWithoutODMCombine;
+ double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
+ double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
+
+ SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
+ PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
+ MaxDispclk);
+ SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
+ PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
+ MaxDispclk);
+ SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
+ PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
+ MaxDispclk);
+ *TotalAvailablePipesSupport = true;
+ *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
+
+ if (ODMUse == dm_odm_combine_policy_none)
+ *ODMMode = dm_odm_combine_mode_disabled;
+
+ *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
+ *NumberOfDPP = 0;
+
+ // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
+ // (ODMUse == "" || ODMUse == "CombineAsNeeded")
+
+ if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
+ ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
+ (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit))
+ || NumberOfDSCSlices > 8)))) {
+ if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
+ *ODMMode = dm_odm_combine_mode_4to1;
+ *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
+ *NumberOfDPP = 4;
+ } else {
+ *TotalAvailablePipesSupport = false;
+ }
+ } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
+ (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
+ SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
+ (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit))
+ || (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) {
+ if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
+ *ODMMode = dm_odm_combine_mode_2to1;
+ *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
+ *NumberOfDPP = 2;
+ } else {
+ *TotalAvailablePipesSupport = false;
+ }
+ } else {
+ if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
+ *NumberOfDPP = 1;
+ else
+ *TotalAvailablePipesSupport = false;
+ }
+ if (OutFormat == dm_420 && HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH &&
+ ODMUse != dm_odm_combine_policy_4to1) {
+ if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 4) {
+ *ODMMode = dm_odm_combine_mode_disabled;
+ *NumberOfDPP = 0;
+ *TotalAvailablePipesSupport = false;
+ } else if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 2 ||
+ *ODMMode == dm_odm_combine_mode_4to1) {
+ *ODMMode = dm_odm_combine_mode_4to1;
+ *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
+ *NumberOfDPP = 4;
+ } else {
+ *ODMMode = dm_odm_combine_mode_2to1;
+ *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
+ *NumberOfDPP = 2;
+ }
+ }
+ if (Output == dm_hdmi && OutFormat == dm_420 &&
+ HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH) {
+ *ODMMode = dm_odm_combine_mode_disabled;
+ *NumberOfDPP = 0;
+ *TotalAvailablePipesSupport = false;
+ }
+}
+
+double dml32_CalculateRequiredDispclk(
+ enum odm_combine_mode ODMMode,
+ double PixelClock,
+ double DISPCLKDPPCLKDSCCLKDownSpreading,
+ double DISPCLKRampingMargin,
+ double DISPCLKDPPCLKVCOSpeed,
+ double MaxDispclk)
+{
+ double RequiredDispclk = 0.;
+ double PixelClockAfterODM;
+ double DISPCLKWithRampingRoundedToDFSGranularity;
+ double DISPCLKWithoutRampingRoundedToDFSGranularity;
+ double MaxDispclkRoundedDownToDFSGranularity;
+
+ if (ODMMode == dm_odm_combine_mode_4to1)
+ PixelClockAfterODM = PixelClock / 4;
+ else if (ODMMode == dm_odm_combine_mode_2to1)
+ PixelClockAfterODM = PixelClock / 2;
+ else
+ PixelClockAfterODM = PixelClock;
+
+
+ DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
+ PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
+ * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);
+
+ DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
+ PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);
+
+ MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
+
+ if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
+ RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
+ else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
+ RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
+ else
+ RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
+
+ return RequiredDispclk;
+}
+
+double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
+{
+ if (Clock <= 0.0)
+ return 0.0;
+
+ if (round_up)
+ return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
+ else
+ return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
+}
+
+void dml32_CalculateOutputLink(
+ double PHYCLKPerState,
+ double PHYCLKD18PerState,
+ double PHYCLKD32PerState,
+ double Downspreading,
+ bool IsMainSurfaceUsingTheIndicatedTiming,
+ enum output_encoder_class Output,
+ enum output_format_class OutputFormat,
+ unsigned int HTotal,
+ unsigned int HActive,
+ double PixelClockBackEnd,
+ double ForcedOutputLinkBPP,
+ unsigned int DSCInputBitPerComponent,
+ unsigned int NumberOfDSCSlices,
+ double AudioSampleRate,
+ unsigned int AudioSampleLayout,
+ enum odm_combine_mode ODMModeNoDSC,
+ enum odm_combine_mode ODMModeDSC,
+ bool DSCEnable,
+ unsigned int OutputLinkDPLanes,
+ enum dm_output_link_dp_rate OutputLinkDPRate,
+
+ /* Output */
+ bool *RequiresDSC,
+ double *RequiresFEC,
+ double *OutBpp,
+ enum dm_output_type *OutputType,
+ enum dm_output_rate *OutputRate,
+ unsigned int *RequiredSlots)
+{
+ bool LinkDSCEnable;
+ unsigned int dummy;
+ *RequiresDSC = false;
+ *RequiresFEC = false;
+ *OutBpp = 0;
+ *OutputType = dm_output_type_unknown;
+ *OutputRate = dm_output_rate_unknown;
+
+ if (IsMainSurfaceUsingTheIndicatedTiming) {
+ if (Output == dm_hdmi) {
+ *RequiresDSC = false;
+ *RequiresFEC = false;
+ *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
+ PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat,
+ DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, &dummy);
+ //OutputTypeAndRate = "HDMI";
+ *OutputType = dm_output_type_hdmi;
+
+ } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
+ if (DSCEnable == true) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dm_dp || Output == dm_dp2p0)
+ *RequiresFEC = true;
+ else
+ *RequiresFEC = false;
+ } else {
+ *RequiresDSC = false;
+ LinkDSCEnable = false;
+ if (Output == dm_dp2p0)
+ *RequiresFEC = true;
+ else
+ *RequiresFEC = false;
+ }
+ if (Output == dm_dp2p0) {
+ *OutBpp = 0;
+ if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
+ PHYCLKD32PerState >= 10000.0 / 32) {
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
+ DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
+ AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ if (*OutBpp == 0 && PHYCLKD32PerState < 13500.0 / 32 && DSCEnable == true &&
+ ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent,
+ NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " UHBR10";
+ *OutputType = dm_output_type_dp2p0;
+ *OutputRate = dm_output_rate_dp_rate_uhbr10;
+ }
+ if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
+ *OutBpp == 0 && PHYCLKD32PerState >= 13500.0 / 32) {
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
+ DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
+ AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+
+ if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
+ ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent,
+ NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " UHBR13p5";
+ *OutputType = dm_output_type_dp2p0;
+ *OutputRate = dm_output_rate_dp_rate_uhbr13p5;
+ }
+ if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
+ *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
+ DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
+ AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent,
+ NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " UHBR20";
+ *OutputType = dm_output_type_dp2p0;
+ *OutputRate = dm_output_rate_dp_rate_uhbr20;
+ }
+ } else {
+ *OutBpp = 0;
+ if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
+ PHYCLKPerState >= 270) {
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
+ DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
+ AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
+ ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dm_dp)
+ *RequiresFEC = true;
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent,
+ NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " HBR";
+ *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
+ *OutputRate = dm_output_rate_dp_rate_hbr;
+ }
+ if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
+ *OutBpp == 0 && PHYCLKPerState >= 540) {
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
+ DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
+ AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+
+ if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
+ ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dm_dp)
+ *RequiresFEC = true;
+
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent,
+ NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " HBR2";
+ *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
+ *OutputRate = dm_output_rate_dp_rate_hbr2;
+ }
+ if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
+ AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
+ RequiredSlots);
+
+ if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dm_dp)
+ *RequiresFEC = true;
+
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent,
+ NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " HBR3";
+ *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
+ *OutputRate = dm_output_rate_dp_rate_hbr3;
+ }
+ }
+ }
+ }
+}
+
+void dml32_CalculateDPPCLK(
+ unsigned int NumberOfActiveSurfaces,
+ double DISPCLKDPPCLKDSCCLKDownSpreading,
+ double DISPCLKDPPCLKVCOSpeed,
+ double DPPCLKUsingSingleDPP[],
+ unsigned int DPPPerSurface[],
+
+ /* output */
+ double *GlobalDPPCLK,
+ double Dppclk[])
+{
+ unsigned int k;
+ *GlobalDPPCLK = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
+ *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
+ }
+ *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
+ for (k = 0; k < NumberOfActiveSurfaces; ++k)
+ Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
+}
+
+double dml32_TruncToValidBPP(
+ double LinkBitRate,
+ unsigned int Lanes,
+ unsigned int HTotal,
+ unsigned int HActive,
+ double PixelClock,
+ double DesiredBPP,
+ bool DSCEnable,
+ enum output_encoder_class Output,
+ enum output_format_class Format,
+ unsigned int DSCInputBitPerComponent,
+ unsigned int DSCSlices,
+ unsigned int AudioRate,
+ unsigned int AudioLayout,
+ enum odm_combine_mode ODMModeNoDSC,
+ enum odm_combine_mode ODMModeDSC,
+ /* Output */
+ unsigned int *RequiredSlots)
+{
+ double MaxLinkBPP;
+ unsigned int MinDSCBPP;
+ double MaxDSCBPP;
+ unsigned int NonDSCBPP0;
+ unsigned int NonDSCBPP1;
+ unsigned int NonDSCBPP2;
+ unsigned int NonDSCBPP3 = BPP_INVALID;
+
+ if (Format == dm_420) {
+ NonDSCBPP0 = 12;
+ NonDSCBPP1 = 15;
+ NonDSCBPP2 = 18;
+ MinDSCBPP = 6;
+ MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
+ } else if (Format == dm_444) {
+ NonDSCBPP3 = 18;
+ NonDSCBPP0 = 24;
+ NonDSCBPP1 = 30;
+ NonDSCBPP2 = 36;
+ MinDSCBPP = 8;
+ MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
+ } else {
+ if (Output == dm_hdmi) {
+ NonDSCBPP0 = 24;
+ NonDSCBPP1 = 24;
+ NonDSCBPP2 = 24;
+ } else {
+ NonDSCBPP0 = 16;
+ NonDSCBPP1 = 20;
+ NonDSCBPP2 = 24;
+ }
+ if (Format == dm_n422) {
+ MinDSCBPP = 7;
+ MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
+ } else {
+ MinDSCBPP = 8;
+ MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
+ }
+ }
+ if (Output == dm_dp2p0) {
+ MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
+ } else if (DSCEnable && Output == dm_dp) {
+ MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
+ } else {
+ MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
+ }
+
+ if (DSCEnable) {
+ if (ODMModeDSC == dm_odm_combine_mode_4to1)
+ MaxLinkBPP = dml_min(MaxLinkBPP, 16);
+ else if (ODMModeDSC == dm_odm_combine_mode_2to1)
+ MaxLinkBPP = dml_min(MaxLinkBPP, 32);
+ else if (ODMModeDSC == dm_odm_split_mode_1to2)
+ MaxLinkBPP = 2 * MaxLinkBPP;
+ } else {
+ if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
+ MaxLinkBPP = dml_min(MaxLinkBPP, 16);
+ else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
+ MaxLinkBPP = dml_min(MaxLinkBPP, 32);
+ else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
+ MaxLinkBPP = 2 * MaxLinkBPP;
+ }
+
+ *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
+
+ if (DesiredBPP == 0) {
+ if (DSCEnable) {
+ if (MaxLinkBPP < MinDSCBPP)
+ return BPP_INVALID;
+ else if (MaxLinkBPP >= MaxDSCBPP)
+ return MaxDSCBPP;
+ else
+ return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
+ } else {
+ if (MaxLinkBPP >= NonDSCBPP2)
+ return NonDSCBPP2;
+ else if (MaxLinkBPP >= NonDSCBPP1)
+ return NonDSCBPP1;
+ else if (MaxLinkBPP >= NonDSCBPP0)
+ return 16.0;
+ else if ((Output == dm_dp2p0 || Output == dm_dp) && NonDSCBPP3 != BPP_INVALID && MaxLinkBPP >= NonDSCBPP3)
+ return NonDSCBPP3; // Special case to allow 6bpc RGB for DP connections.
+ else
+ return BPP_INVALID;
+ }
+ } else {
+ if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
+ DesiredBPP <= NonDSCBPP0)) ||
+ (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
+ return BPP_INVALID;
+ else
+ return DesiredBPP;
+ }
+} // TruncToValidBPP
+
+double dml32_RequiredDTBCLK(
+ bool DSCEnable,
+ double PixelClock,
+ enum output_format_class OutputFormat,
+ double OutputBpp,
+ unsigned int DSCSlices,
+ unsigned int HTotal,
+ unsigned int HActive,
+ unsigned int AudioRate,
+ unsigned int AudioLayout)
+{
+ double PixelWordRate;
+ double HCActive;
+ double HCBlank;
+ double AverageTribyteRate;
+ double HActiveTribyteRate;
+
+ if (DSCEnable != true)
+ return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
+
+ PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2);
+ HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
+ dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
+ HCBlank = 64 + 32 *
+ dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
+ AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
+ HActiveTribyteRate = PixelWordRate * HCActive / HActive;
+ return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
+}
+
+unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
+ enum odm_combine_mode ODMMode,
+ unsigned int DSCInputBitPerComponent,
+ double OutputBpp,
+ unsigned int HActive,
+ unsigned int HTotal,
+ unsigned int NumberOfDSCSlices,
+ enum output_format_class OutputFormat,
+ enum output_encoder_class Output,
+ double PixelClock,
+ double PixelClockBackEnd,
+ double dsc_delay_factor_wa)
+{
+ unsigned int DSCDelayRequirement_val;
+
+ if (DSCEnabled == true && OutputBpp != 0) {
+ if (ODMMode == dm_odm_combine_mode_4to1) {
+ DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
+ dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
+ OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
+ } else if (ODMMode == dm_odm_combine_mode_2to1) {
+ DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
+ dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
+ OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
+ } else {
+ DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
+ dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
+ OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
+ }
+
+ DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
+ dml_ceil((double)DSCDelayRequirement_val / HActive, 1);
+
+ DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
+
+ } else {
+ DSCDelayRequirement_val = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DSCEnabled = %d\n", __func__, DSCEnabled);
+ dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
+ dml_print("DML::%s: HActive = %d\n", __func__, HActive);
+ dml_print("DML::%s: OutputFormat = %d\n", __func__, OutputFormat);
+ dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
+ dml_print("DML::%s: NumberOfDSCSlices = %d\n", __func__, NumberOfDSCSlices);
+ dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
+#endif
+
+ return dml_ceil(DSCDelayRequirement_val * dsc_delay_factor_wa, 1);
+}
+
+void dml32_CalculateSurfaceSizeInMall(
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int MALLAllocatedForDCN,
+ enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
+ enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],
+ bool DCCEnable[],
+ bool ViewportStationary[],
+ unsigned int ViewportXStartY[],
+ unsigned int ViewportYStartY[],
+ unsigned int ViewportXStartC[],
+ unsigned int ViewportYStartC[],
+ unsigned int ViewportWidthY[],
+ unsigned int ViewportHeightY[],
+ unsigned int BytesPerPixelY[],
+ unsigned int ViewportWidthC[],
+ unsigned int ViewportHeightC[],
+ unsigned int BytesPerPixelC[],
+ unsigned int SurfaceWidthY[],
+ unsigned int SurfaceWidthC[],
+ unsigned int SurfaceHeightY[],
+ unsigned int SurfaceHeightC[],
+ unsigned int Read256BytesBlockWidthY[],
+ unsigned int Read256BytesBlockWidthC[],
+ unsigned int Read256BytesBlockHeightY[],
+ unsigned int Read256BytesBlockHeightC[],
+ unsigned int ReadBlockWidthY[],
+ unsigned int ReadBlockWidthC[],
+ unsigned int ReadBlockHeightY[],
+ unsigned int ReadBlockHeightC[],
+ unsigned int DCCMetaPitchY[],
+ unsigned int DCCMetaPitchC[],
+
+ /* Output */
+ unsigned int SurfaceSizeInMALL[],
+ bool *ExceededMALLSize)
+{
+ unsigned int k;
+ unsigned int TotalSurfaceSizeInMALLForSS = 0;
+ unsigned int TotalSurfaceSizeInMALLForSubVP = 0;
+ unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (ViewportStationary[k]) {
+ SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
+ dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
+ ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
+ ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
+ ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
+ ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
+ dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];
+
+ if (ReadBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
+ dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
+ dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
+ ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
+ dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
+ dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
+ dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
+ ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
+ dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
+ BytesPerPixelC[k];
+ }
+ if (DCCEnable[k] == true) {
+ SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
+ (dml_min(dml_ceil(DCCMetaPitchY[k], 8 * Read256BytesBlockWidthY[k]),
+ dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
+ Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
+ - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
+ * dml_min(dml_ceil(SurfaceHeightY[k], 8 *
+ Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
+ ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
+ Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 *
+ Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256) + (64 * 1024);
+ if (Read256BytesBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
+ dml_min(dml_ceil(DCCMetaPitchC[k], 8 *
+ Read256BytesBlockWidthC[k]),
+ dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
+ * Read256BytesBlockWidthC[k] - 1, 8 *
+ Read256BytesBlockWidthC[k]) -
+ dml_floor(ViewportXStartC[k], 8 *
+ Read256BytesBlockWidthC[k])) *
+ dml_min(dml_ceil(SurfaceHeightC[k], 8 *
+ Read256BytesBlockHeightC[k]),
+ dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
+ 8 * Read256BytesBlockHeightC[k] - 1, 8 *
+ Read256BytesBlockHeightC[k]) -
+ dml_floor(ViewportYStartC[k], 8 *
+ Read256BytesBlockHeightC[k])) *
+ BytesPerPixelC[k] / 256;
+ }
+ }
+ } else {
+ SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
+ ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
+ dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
+ ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
+ BytesPerPixelY[k];
+ if (ReadBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
+ dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
+ ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
+ dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
+ ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
+ BytesPerPixelC[k];
+ }
+ if (DCCEnable[k] == true) {
+ SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
+ (dml_ceil(dml_min(DCCMetaPitchY[k], ViewportWidthY[k] + 8 *
+ Read256BytesBlockWidthY[k] - 1), 8 *
+ Read256BytesBlockWidthY[k]) *
+ dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
+ Read256BytesBlockHeightY[k] - 1), 8 *
+ Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256) + (64 * 1024);
+
+ if (Read256BytesBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
+ dml_ceil(dml_min(DCCMetaPitchC[k], ViewportWidthC[k] + 8 *
+ Read256BytesBlockWidthC[k] - 1), 8 *
+ Read256BytesBlockWidthC[k]) *
+ dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
+ Read256BytesBlockHeightC[k] - 1), 8 *
+ Read256BytesBlockHeightC[k]) *
+ BytesPerPixelC[k] / 256;
+ }
+ }
+ }
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ /* SS and Subvp counted separate as they are never used at the same time */
+ if (UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe)
+ TotalSurfaceSizeInMALLForSubVP = TotalSurfaceSizeInMALLForSubVP + SurfaceSizeInMALL[k];
+ else if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
+ TotalSurfaceSizeInMALLForSS = TotalSurfaceSizeInMALLForSS + SurfaceSizeInMALL[k];
+ }
+ *ExceededMALLSize = (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) ||
+ (TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes);
+} // CalculateSurfaceSizeInMall
+
+void dml32_CalculateVMRowAndSwath(
+ unsigned int NumberOfActiveSurfaces,
+ DmlPipe myPipe[],
+ unsigned int SurfaceSizeInMALL[],
+ unsigned int PTEBufferSizeInRequestsLuma,
+ unsigned int PTEBufferSizeInRequestsChroma,
+ unsigned int DCCMetaBufferSizeBytes,
+ enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ unsigned int MALLAllocatedForDCN,
+ double SwathWidthY[],
+ double SwathWidthC[],
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ unsigned int GPUVMMaxPageTableLevels,
+ unsigned int GPUVMMinPageSizeKBytes[],
+ unsigned int HostVMMinPageSize,
+
+ /* Output */
+ bool PTEBufferSizeNotExceeded[],
+ bool DCCMetaBufferSizeNotExceeded[],
+ unsigned int dpte_row_width_luma_ub[],
+ unsigned int dpte_row_width_chroma_ub[],
+ unsigned int dpte_row_height_luma[],
+ unsigned int dpte_row_height_chroma[],
+ unsigned int dpte_row_height_linear_luma[], // VBA_DELTA
+ unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA
+ unsigned int meta_req_width[],
+ unsigned int meta_req_width_chroma[],
+ unsigned int meta_req_height[],
+ unsigned int meta_req_height_chroma[],
+ unsigned int meta_row_width[],
+ unsigned int meta_row_width_chroma[],
+ unsigned int meta_row_height[],
+ unsigned int meta_row_height_chroma[],
+ unsigned int vm_group_bytes[],
+ unsigned int dpte_group_bytes[],
+ unsigned int PixelPTEReqWidthY[],
+ unsigned int PixelPTEReqHeightY[],
+ unsigned int PTERequestSizeY[],
+ unsigned int PixelPTEReqWidthC[],
+ unsigned int PixelPTEReqHeightC[],
+ unsigned int PTERequestSizeC[],
+ unsigned int dpde0_bytes_per_frame_ub_l[],
+ unsigned int meta_pte_bytes_per_frame_ub_l[],
+ unsigned int dpde0_bytes_per_frame_ub_c[],
+ unsigned int meta_pte_bytes_per_frame_ub_c[],
+ double PrefetchSourceLinesY[],
+ double PrefetchSourceLinesC[],
+ double VInitPreFillY[],
+ double VInitPreFillC[],
+ unsigned int MaxNumSwathY[],
+ unsigned int MaxNumSwathC[],
+ double meta_row_bw[],
+ double dpte_row_bw[],
+ double PixelPTEBytesPerRow[],
+ double PDEAndMetaPTEBytesFrame[],
+ double MetaRowByte[],
+ bool use_one_row_for_frame[],
+ bool use_one_row_for_frame_flip[],
+ bool UsesMALLForStaticScreen[],
+ bool PTE_BUFFER_MODE[],
+ unsigned int BIGK_FRAGMENT_SIZE[])
+{
+ unsigned int k;
+ unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
+ unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
+ unsigned int PDEAndMetaPTEBytesFrameY;
+ unsigned int PDEAndMetaPTEBytesFrameC;
+ unsigned int MetaRowByteY[DC__NUM_DPP__MAX] = {0};
+ unsigned int MetaRowByteC[DC__NUM_DPP__MAX] = {0};
+ unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
+ unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
+ unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
+ bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (HostVMEnable == true) {
+ vm_group_bytes[k] = 512;
+ dpte_group_bytes[k] = 512;
+ } else if (GPUVMEnable == true) {
+ vm_group_bytes[k] = 2048;
+ if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
+ dpte_group_bytes[k] = 512;
+ else
+ dpte_group_bytes[k] = 2048;
+ } else {
+ vm_group_bytes[k] = 0;
+ dpte_group_bytes[k] = 0;
+ }
+
+ if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
+ myPipe[k].SourcePixelFormat == dm_420_12 ||
+ myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
+ if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
+ !IsVertical(myPipe[k].SourceRotation)) {
+ PTEBufferSizeInRequestsForLuma[k] =
+ (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
+ PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
+ } else {
+ PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
+ PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
+ }
+
+ PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
+ myPipe[k].ViewportStationary,
+ myPipe[k].DCCEnable,
+ myPipe[k].DPPPerSurface,
+ myPipe[k].BlockHeight256BytesC,
+ myPipe[k].BlockWidth256BytesC,
+ myPipe[k].SourcePixelFormat,
+ myPipe[k].SurfaceTiling,
+ myPipe[k].BytePerPixelC,
+ myPipe[k].SourceRotation,
+ SwathWidthC[k],
+ myPipe[k].ViewportHeightChroma,
+ myPipe[k].ViewportXStartC,
+ myPipe[k].ViewportYStartC,
+ GPUVMEnable,
+ HostVMEnable,
+ HostVMMaxNonCachedPageTableLevels,
+ GPUVMMaxPageTableLevels,
+ GPUVMMinPageSizeKBytes[k],
+ HostVMMinPageSize,
+ PTEBufferSizeInRequestsForChroma[k],
+ myPipe[k].PitchC,
+ myPipe[k].DCCMetaPitchC,
+ myPipe[k].BlockWidthC,
+ myPipe[k].BlockHeightC,
+
+ /* Output */
+ &MetaRowByteC[k],
+ &PixelPTEBytesPerRowC[k],
+ &dpte_row_width_chroma_ub[k],
+ &dpte_row_height_chroma[k],
+ &dpte_row_height_linear_chroma[k],
+ &PixelPTEBytesPerRowC_one_row_per_frame[k],
+ &dpte_row_width_chroma_ub_one_row_per_frame[k],
+ &dpte_row_height_chroma_one_row_per_frame[k],
+ &meta_req_width_chroma[k],
+ &meta_req_height_chroma[k],
+ &meta_row_width_chroma[k],
+ &meta_row_height_chroma[k],
+ &PixelPTEReqWidthC[k],
+ &PixelPTEReqHeightC[k],
+ &PTERequestSizeC[k],
+ &dpde0_bytes_per_frame_ub_c[k],
+ &meta_pte_bytes_per_frame_ub_c[k]);
+
+ PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
+ myPipe[k].VRatioChroma,
+ myPipe[k].VTapsChroma,
+ myPipe[k].InterlaceEnable,
+ myPipe[k].ProgressiveToInterlaceUnitInOPP,
+ myPipe[k].SwathHeightC,
+ myPipe[k].SourceRotation,
+ myPipe[k].ViewportStationary,
+ SwathWidthC[k],
+ myPipe[k].ViewportHeightChroma,
+ myPipe[k].ViewportXStartC,
+ myPipe[k].ViewportYStartC,
+
+ /* Output */
+ &VInitPreFillC[k],
+ &MaxNumSwathC[k]);
+ } else {
+ PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
+ PTEBufferSizeInRequestsForChroma[k] = 0;
+ PixelPTEBytesPerRowC[k] = 0;
+ PDEAndMetaPTEBytesFrameC = 0;
+ MetaRowByteC[k] = 0;
+ MaxNumSwathC[k] = 0;
+ PrefetchSourceLinesC[k] = 0;
+ dpte_row_height_chroma_one_row_per_frame[k] = 0;
+ dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
+ PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
+ }
+
+ PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
+ myPipe[k].ViewportStationary,
+ myPipe[k].DCCEnable,
+ myPipe[k].DPPPerSurface,
+ myPipe[k].BlockHeight256BytesY,
+ myPipe[k].BlockWidth256BytesY,
+ myPipe[k].SourcePixelFormat,
+ myPipe[k].SurfaceTiling,
+ myPipe[k].BytePerPixelY,
+ myPipe[k].SourceRotation,
+ SwathWidthY[k],
+ myPipe[k].ViewportHeight,
+ myPipe[k].ViewportXStart,
+ myPipe[k].ViewportYStart,
+ GPUVMEnable,
+ HostVMEnable,
+ HostVMMaxNonCachedPageTableLevels,
+ GPUVMMaxPageTableLevels,
+ GPUVMMinPageSizeKBytes[k],
+ HostVMMinPageSize,
+ PTEBufferSizeInRequestsForLuma[k],
+ myPipe[k].PitchY,
+ myPipe[k].DCCMetaPitchY,
+ myPipe[k].BlockWidthY,
+ myPipe[k].BlockHeightY,
+
+ /* Output */
+ &MetaRowByteY[k],
+ &PixelPTEBytesPerRowY[k],
+ &dpte_row_width_luma_ub[k],
+ &dpte_row_height_luma[k],
+ &dpte_row_height_linear_luma[k],
+ &PixelPTEBytesPerRowY_one_row_per_frame[k],
+ &dpte_row_width_luma_ub_one_row_per_frame[k],
+ &dpte_row_height_luma_one_row_per_frame[k],
+ &meta_req_width[k],
+ &meta_req_height[k],
+ &meta_row_width[k],
+ &meta_row_height[k],
+ &PixelPTEReqWidthY[k],
+ &PixelPTEReqHeightY[k],
+ &PTERequestSizeY[k],
+ &dpde0_bytes_per_frame_ub_l[k],
+ &meta_pte_bytes_per_frame_ub_l[k]);
+
+ PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
+ myPipe[k].VRatio,
+ myPipe[k].VTaps,
+ myPipe[k].InterlaceEnable,
+ myPipe[k].ProgressiveToInterlaceUnitInOPP,
+ myPipe[k].SwathHeightY,
+ myPipe[k].SourceRotation,
+ myPipe[k].ViewportStationary,
+ SwathWidthY[k],
+ myPipe[k].ViewportHeight,
+ myPipe[k].ViewportXStart,
+ myPipe[k].ViewportYStart,
+
+ /* Output */
+ &VInitPreFillY[k],
+ &MaxNumSwathY[k]);
+
+ PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
+ MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];
+
+ if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
+ PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
+ PTEBufferSizeNotExceeded[k] = true;
+ } else {
+ PTEBufferSizeNotExceeded[k] = false;
+ }
+
+ one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
+ PTEBufferSizeInRequestsForLuma[k] &&
+ PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
+ }
+
+ dml32_CalculateMALLUseForStaticScreen(
+ NumberOfActiveSurfaces,
+ MALLAllocatedForDCN,
+ UseMALLForStaticScreen, // mode
+ SurfaceSizeInMALL,
+ one_row_per_frame_fits_in_buffer,
+ /* Output */
+ UsesMALLForStaticScreen); // boolen
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
+ (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
+ (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
+ (GPUVMMinPageSizeKBytes[k] > 64);
+ BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n", __func__, k, SurfaceSizeInMALL[k]);
+ dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]);
+#endif
+ use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
+ (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
+ (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
+ (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));
+
+ use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
+ !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
+
+ if (use_one_row_for_frame[k]) {
+ dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
+ dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
+ PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
+ dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
+ dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
+ PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
+ PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
+ }
+
+ if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
+ DCCMetaBufferSizeNotExceeded[k] = true;
+ else
+ DCCMetaBufferSizeNotExceeded[k] = false;
+
+ PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
+ if (use_one_row_for_frame[k])
+ PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
+
+ dml32_CalculateRowBandwidth(
+ GPUVMEnable,
+ myPipe[k].SourcePixelFormat,
+ myPipe[k].VRatio,
+ myPipe[k].VRatioChroma,
+ myPipe[k].DCCEnable,
+ myPipe[k].HTotal / myPipe[k].PixelClock,
+ MetaRowByteY[k], MetaRowByteC[k],
+ meta_row_height[k],
+ meta_row_height_chroma[k],
+ PixelPTEBytesPerRowY[k],
+ PixelPTEBytesPerRowC[k],
+ dpte_row_height_luma[k],
+ dpte_row_height_chroma[k],
+
+ /* Output */
+ &meta_row_bw[k],
+ &dpte_row_bw[k]);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, use_one_row_for_frame = %d\n", __func__, k, use_one_row_for_frame[k]);
+ dml_print("DML::%s: k=%d, use_one_row_for_frame_flip = %d\n",
+ __func__, k, use_one_row_for_frame_flip[k]);
+ dml_print("DML::%s: k=%d, UseMALLForPStateChange = %d\n",
+ __func__, k, UseMALLForPStateChange[k]);
+ dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]);
+ dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n",
+ __func__, k, dpte_row_width_luma_ub[k]);
+ dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]);
+ dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n",
+ __func__, k, dpte_row_height_chroma[k]);
+ dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n",
+ __func__, k, dpte_row_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]);
+ dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]);
+ dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n",
+ __func__, k, PTEBufferSizeNotExceeded[k]);
+ dml_print("DML::%s: k=%d, PTE_BUFFER_MODE = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
+ dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
+#endif
+ }
+} // CalculateVMRowAndSwath
+
+unsigned int dml32_CalculateVMAndRowBytes(
+ bool ViewportStationary,
+ bool DCCEnable,
+ unsigned int NumberOfDPPs,
+ unsigned int BlockHeight256Bytes,
+ unsigned int BlockWidth256Bytes,
+ enum source_format_class SourcePixelFormat,
+ unsigned int SurfaceTiling,
+ unsigned int BytePerPixel,
+ enum dm_rotation_angle SourceRotation,
+ double SwathWidth,
+ unsigned int ViewportHeight,
+ unsigned int ViewportXStart,
+ unsigned int ViewportYStart,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ unsigned int GPUVMMaxPageTableLevels,
+ unsigned int GPUVMMinPageSizeKBytes,
+ unsigned int HostVMMinPageSize,
+ unsigned int PTEBufferSizeInRequests,
+ unsigned int Pitch,
+ unsigned int DCCMetaPitch,
+ unsigned int MacroTileWidth,
+ unsigned int MacroTileHeight,
+
+ /* Output */
+ unsigned int *MetaRowByte,
+ unsigned int *PixelPTEBytesPerRow,
+ unsigned int *dpte_row_width_ub,
+ unsigned int *dpte_row_height,
+ unsigned int *dpte_row_height_linear,
+ unsigned int *PixelPTEBytesPerRow_one_row_per_frame,
+ unsigned int *dpte_row_width_ub_one_row_per_frame,
+ unsigned int *dpte_row_height_one_row_per_frame,
+ unsigned int *MetaRequestWidth,
+ unsigned int *MetaRequestHeight,
+ unsigned int *meta_row_width,
+ unsigned int *meta_row_height,
+ unsigned int *PixelPTEReqWidth,
+ unsigned int *PixelPTEReqHeight,
+ unsigned int *PTERequestSize,
+ unsigned int *DPDE0BytesFrame,
+ unsigned int *MetaPTEBytesFrame)
+{
+ unsigned int MPDEBytesFrame;
+ unsigned int DCCMetaSurfaceBytes;
+ unsigned int ExtraDPDEBytesFrame;
+ unsigned int PDEAndMetaPTEBytesFrame;
+ unsigned int HostVMDynamicLevels = 0;
+ unsigned int MacroTileSizeBytes;
+ unsigned int vp_height_meta_ub;
+ unsigned int vp_height_dpte_ub;
+ unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
+
+ if (GPUVMEnable == true && HostVMEnable == true) {
+ if (HostVMMinPageSize < 2048)
+ HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
+ else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
+ else
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
+ }
+
+ *MetaRequestHeight = 8 * BlockHeight256Bytes;
+ *MetaRequestWidth = 8 * BlockWidth256Bytes;
+ if (SurfaceTiling == dm_sw_linear) {
+ *meta_row_height = 32;
+ *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
+ - dml_floor(ViewportXStart, *MetaRequestWidth);
+ } else if (!IsVertical(SourceRotation)) {
+ *meta_row_height = *MetaRequestHeight;
+ if (ViewportStationary && NumberOfDPPs == 1) {
+ *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
+ *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
+ } else {
+ *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
+ }
+ *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
+ } else {
+ *meta_row_height = *MetaRequestWidth;
+ if (ViewportStationary && NumberOfDPPs == 1) {
+ *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
+ *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
+ } else {
+ *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
+ }
+ *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
+ }
+
+ if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
+ vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
+ 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
+ } else if (!IsVertical(SourceRotation)) {
+ vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
+ } else {
+ vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
+ }
+
+ DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
+
+ if (GPUVMEnable == true) {
+ *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
+ (8 * 4.0 * 1024), 1) + 1) * 64;
+ MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
+ } else {
+ *MetaPTEBytesFrame = 0;
+ MPDEBytesFrame = 0;
+ }
+
+ if (DCCEnable != true) {
+ *MetaPTEBytesFrame = 0;
+ MPDEBytesFrame = 0;
+ *MetaRowByte = 0;
+ }
+
+ MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
+
+ if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
+ if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
+ vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
+ MacroTileHeight - 1, MacroTileHeight) -
+ dml_floor(ViewportYStart, MacroTileHeight);
+ } else if (!IsVertical(SourceRotation)) {
+ vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
+ } else {
+ vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
+ }
+ *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
+ (8 * 2097152), 1) + 1);
+ ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
+ } else {
+ *DPDE0BytesFrame = 0;
+ ExtraDPDEBytesFrame = 0;
+ vp_height_dpte_ub = 0;
+ }
+
+ PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
+ dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
+ dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
+ dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
+ dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
+ dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
+ dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
+ dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
+ dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
+ dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
+ dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
+ dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
+ dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
+ dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
+ dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
+ dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
+#endif
+
+ if (HostVMEnable == true)
+ PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
+
+ if (SurfaceTiling == dm_sw_linear) {
+ *PixelPTEReqHeight = 1;
+ *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
+ PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
+ *PTERequestSize = 64;
+ } else if (GPUVMMinPageSizeKBytes == 4) {
+ *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
+ *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
+ *PTERequestSize = 128;
+ } else {
+ *PixelPTEReqHeight = MacroTileHeight;
+ *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
+ *PTERequestSize = 64;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
+ dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
+ dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
+ dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
+ dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
+ dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
+#endif
+
+ *dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
+ *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
+ (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
+ (double) *PixelPTEReqWidth;
+ *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
+ *PTERequestSize;
+
+ if (SurfaceTiling == dm_sw_linear) {
+ *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
+ *PixelPTEReqWidth / Pitch), 1));
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
+ PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
+ dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
+ dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
+ dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
+ dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
+ dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
+ 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
+ *PixelPTEReqWidth / Pitch), 1));
+ dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
+#endif
+ *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
+ (double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
+ *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
+
+ // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
+ *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
+ PixelPTEReqWidth_linear / Pitch), 1);
+ if (*dpte_row_height_linear > 128)
+ *dpte_row_height_linear = 128;
+
+ } else if (!IsVertical(SourceRotation)) {
+ *dpte_row_height = *PixelPTEReqHeight;
+
+ if (GPUVMMinPageSizeKBytes > 64) {
+ *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
+ *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
+ } else if (ViewportStationary && (NumberOfDPPs == 1)) {
+ *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
+ *PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
+ dml_floor(ViewportXStart, *PixelPTEReqWidth);
+ } else {
+ *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
+ *PixelPTEReqWidth;
+ }
+
+ *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
+ } else {
+ *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);
+
+ if (ViewportStationary && (NumberOfDPPs == 1)) {
+ *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
+ *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
+ } else {
+ *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
+ * *PixelPTEReqHeight;
+ }
+
+ *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
+ }
+
+ if (GPUVMEnable != true)
+ *PixelPTEBytesPerRow = 0;
+ if (HostVMEnable == true)
+ *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
+ dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
+ dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
+ dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
+ dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
+ dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
+ dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
+ dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
+ __func__, *dpte_row_width_ub_one_row_per_frame);
+ dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
+ __func__, *PixelPTEBytesPerRow_one_row_per_frame);
+ dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
+ *MetaPTEBytesFrame);
+#endif
+
+ return PDEAndMetaPTEBytesFrame;
+} // CalculateVMAndRowBytes
+
+double dml32_CalculatePrefetchSourceLines(
+ double VRatio,
+ unsigned int VTaps,
+ bool Interlace,
+ bool ProgressiveToInterlaceUnitInOPP,
+ unsigned int SwathHeight,
+ enum dm_rotation_angle SourceRotation,
+ bool ViewportStationary,
+ double SwathWidth,
+ unsigned int ViewportHeight,
+ unsigned int ViewportXStart,
+ unsigned int ViewportYStart,
+
+ /* Output */
+ double *VInitPreFill,
+ unsigned int *MaxNumSwath)
+{
+
+ unsigned int vp_start_rot;
+ unsigned int sw0_tmp;
+ unsigned int MaxPartialSwath;
+ double numLines;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
+ dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
+ dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
+ dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
+ dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
+ dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
+#endif
+ if (ProgressiveToInterlaceUnitInOPP)
+ *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
+ else
+ *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
+
+ if (ViewportStationary) {
+ if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
+ vp_start_rot = SwathHeight -
+ (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
+ } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
+ vp_start_rot = ViewportXStart;
+ } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
+ vp_start_rot = SwathHeight -
+ (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
+ } else {
+ vp_start_rot = ViewportYStart;
+ }
+ sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
+ if (sw0_tmp < *VInitPreFill)
+ *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
+ else
+ *MaxNumSwath = 1;
+ MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
+ } else {
+ *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
+ if (*VInitPreFill > 1)
+ MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
+ else
+ MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
+ }
+ numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
+ dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
+ dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
+ dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
+ dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
+#endif
+ return numLines;
+
+} // CalculatePrefetchSourceLines
+
+void dml32_CalculateMALLUseForStaticScreen(
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int MALLAllocatedForDCNFinal,
+ enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
+ unsigned int SurfaceSizeInMALL[],
+ bool one_row_per_frame_fits_in_buffer[],
+
+ /* output */
+ bool UsesMALLForStaticScreen[])
+{
+ unsigned int k;
+ unsigned int SurfaceToAddToMALL;
+ bool CanAddAnotherSurfaceToMALL;
+ unsigned int TotalSurfaceSizeInMALL;
+
+ TotalSurfaceSizeInMALL = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
+ if (UsesMALLForStaticScreen[k])
+ TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]);
+ dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n", __func__, k, TotalSurfaceSizeInMALL);
+#endif
+ }
+
+ SurfaceToAddToMALL = 0;
+ CanAddAnotherSurfaceToMALL = true;
+ while (CanAddAnotherSurfaceToMALL) {
+ CanAddAnotherSurfaceToMALL = false;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
+ !UsesMALLForStaticScreen[k] &&
+ UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
+ one_row_per_frame_fits_in_buffer[k] &&
+ (!CanAddAnotherSurfaceToMALL ||
+ SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
+ CanAddAnotherSurfaceToMALL = true;
+ SurfaceToAddToMALL = k;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
+ __func__, k, UseMALLForStaticScreen[k]);
+#endif
+ }
+ }
+ if (CanAddAnotherSurfaceToMALL) {
+ UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
+ TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: SurfaceToAddToMALL = %d\n", __func__, SurfaceToAddToMALL);
+ dml_print("DML::%s: TotalSurfaceSizeInMALL = %d\n", __func__, TotalSurfaceSizeInMALL);
+#endif
+
+ }
+ }
+}
+
+void dml32_CalculateRowBandwidth(
+ bool GPUVMEnable,
+ enum source_format_class SourcePixelFormat,
+ double VRatio,
+ double VRatioChroma,
+ bool DCCEnable,
+ double LineTime,
+ unsigned int MetaRowByteLuma,
+ unsigned int MetaRowByteChroma,
+ unsigned int meta_row_height_luma,
+ unsigned int meta_row_height_chroma,
+ unsigned int PixelPTEBytesPerRowLuma,
+ unsigned int PixelPTEBytesPerRowChroma,
+ unsigned int dpte_row_height_luma,
+ unsigned int dpte_row_height_chroma,
+ /* Output */
+ double *meta_row_bw,
+ double *dpte_row_bw)
+{
+ if (DCCEnable != true) {
+ *meta_row_bw = 0;
+ } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
+ SourcePixelFormat == dm_rgbe_alpha) {
+ *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
+ MetaRowByteChroma / (meta_row_height_chroma * LineTime);
+ } else {
+ *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
+ }
+
+ if (GPUVMEnable != true) {
+ *dpte_row_bw = 0;
+ } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
+ SourcePixelFormat == dm_rgbe_alpha) {
+ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
+ VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
+ } else {
+ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
+ }
+}
+
+double dml32_CalculateUrgentLatency(
+ double UrgentLatencyPixelDataOnly,
+ double UrgentLatencyPixelMixedWithVMData,
+ double UrgentLatencyVMDataOnly,
+ bool DoUrgentLatencyAdjustment,
+ double UrgentLatencyAdjustmentFabricClockComponent,
+ double UrgentLatencyAdjustmentFabricClockReference,
+ double FabricClock)
+{
+ double ret;
+
+ ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
+ if (DoUrgentLatencyAdjustment == true) {
+ ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
+ (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
+ }
+ return ret;
+}
+
+void dml32_CalculateUrgentBurstFactor(
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
+ unsigned int swath_width_luma_ub,
+ unsigned int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double LineTime,
+ double UrgentLatency,
+ double CursorBufferSize,
+ unsigned int CursorWidth,
+ unsigned int CursorBPP,
+ double VRatio,
+ double VRatioC,
+ double BytePerPixelInDETY,
+ double BytePerPixelInDETC,
+ unsigned int DETBufferSizeY,
+ unsigned int DETBufferSizeC,
+ /* Output */
+ double *UrgentBurstFactorCursor,
+ double *UrgentBurstFactorLuma,
+ double *UrgentBurstFactorChroma,
+ bool *NotEnoughUrgentLatencyHiding)
+{
+ double LinesInDETLuma;
+ double LinesInDETChroma;
+ unsigned int LinesInCursorBuffer;
+ double CursorBufferSizeInTime;
+ double DETBufferSizeInTimeLuma;
+ double DETBufferSizeInTimeChroma;
+
+ *NotEnoughUrgentLatencyHiding = 0;
+
+ if (CursorWidth > 0) {
+ LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
+ (CursorWidth * CursorBPP / 8.0)), 1.0);
+ if (VRatio > 0) {
+ CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
+ if (CursorBufferSizeInTime - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorCursor = 0;
+ } else {
+ *UrgentBurstFactorCursor = CursorBufferSizeInTime /
+ (CursorBufferSizeInTime - UrgentLatency);
+ }
+ } else {
+ *UrgentBurstFactorCursor = 1;
+ }
+ }
+
+ LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
+ DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
+
+ if (VRatio > 0) {
+ DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
+ if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorLuma = 0;
+ } else {
+ *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
+ }
+ } else {
+ *UrgentBurstFactorLuma = 1;
+ }
+
+ if (BytePerPixelInDETC > 0) {
+ LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
+ 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
+ / swath_width_chroma_ub;
+
+ if (VRatio > 0) {
+ DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
+ if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorChroma = 0;
+ } else {
+ *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
+ / (DETBufferSizeInTimeChroma - UrgentLatency);
+ }
+ } else {
+ *UrgentBurstFactorChroma = 1;
+ }
+ }
+} // CalculateUrgentBurstFactor
+
+void dml32_CalculateDCFCLKDeepSleep(
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int BytePerPixelY[],
+ unsigned int BytePerPixelC[],
+ double VRatio[],
+ double VRatioChroma[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ unsigned int DPPPerSurface[],
+ double HRatio[],
+ double HRatioChroma[],
+ double PixelClock[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double Dppclk[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ unsigned int ReturnBusWidth,
+
+ /* Output */
+ double *DCFClkDeepSleep)
+{
+ unsigned int k;
+ double DisplayPipeLineDeliveryTimeLuma;
+ double DisplayPipeLineDeliveryTimeChroma;
+ double DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
+ double ReadBandwidth = 0.0;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+ if (VRatio[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
+ / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
+ }
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChroma = 0;
+ } else {
+ if (VRatioChroma[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
+ DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
+ / Dppclk[k];
+ }
+ }
+
+ if (BytePerPixelC[k] > 0) {
+ DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
+ BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
+ __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
+ 32.0 / DisplayPipeLineDeliveryTimeChroma);
+ } else {
+ DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
+ 64.0 / DisplayPipeLineDeliveryTimeLuma;
+ }
+ DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
+ dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
+#endif
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k)
+ ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
+
+ *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
+ dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
+ dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
+ dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
+#endif
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k)
+ *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
+#endif
+} // CalculateDCFCLKDeepSleep
+
+double dml32_CalculateWriteBackDelay(
+ enum source_format_class WritebackPixelFormat,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackVTaps,
+ unsigned int WritebackDestinationWidth,
+ unsigned int WritebackDestinationHeight,
+ unsigned int WritebackSourceHeight,
+ unsigned int HTotal)
+{
+ double CalculateWriteBackDelay;
+ double Line_length;
+ double Output_lines_last_notclamped;
+ double WritebackVInit;
+
+ WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
+ Line_length = dml_max((double) WritebackDestinationWidth,
+ dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
+ Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
+ dml_ceil(((double)WritebackSourceHeight -
+ (double) WritebackVInit) / (double)WritebackVRatio, 1.0);
+ if (Output_lines_last_notclamped < 0) {
+ CalculateWriteBackDelay = 0;
+ } else {
+ CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
+ (HTotal - WritebackDestinationWidth) + 80;
+ }
+ return CalculateWriteBackDelay;
+}
+
+void dml32_UseMinimumDCFCLK(
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ bool DRRDisplay[],
+ bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ unsigned int MaxInterDCNTileRepeaters,
+ unsigned int MaxPrefetchMode,
+ double DRAMClockChangeLatencyFinal,
+ double FCLKChangeLatency,
+ double SREnterPlusExitTime,
+ unsigned int ReturnBusWidth,
+ unsigned int RoundTripPingLatencyCycles,
+ unsigned int ReorderingBytes,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int MetaChunkSize,
+ bool GPUVMEnable,
+ unsigned int GPUVMMaxPageTableLevels,
+ bool HostVMEnable,
+ unsigned int NumberOfActiveSurfaces,
+ double HostVMMinPageSize,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ bool DynamicMetadataVMEnabled,
+ bool ImmediateFlipRequirement,
+ bool ProgressiveToInterlaceUnitInOPP,
+ double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
+ double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
+ unsigned int VTotal[],
+ unsigned int VActive[],
+ unsigned int DynamicMetadataTransmittedBytes[],
+ unsigned int DynamicMetadataLinesBeforeActiveRequired[],
+ bool Interlace[],
+ double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
+ double RequiredDISPCLK[][2],
+ double UrgLatency[],
+ unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
+ double ProjectedDCFClkDeepSleep[][2],
+ double MaximumVStartup[][2][DC__NUM_DPP__MAX],
+ unsigned int TotalNumberOfActiveDPP[][2],
+ unsigned int TotalNumberOfDCCActiveDPP[][2],
+ unsigned int dpte_group_bytes[],
+ double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
+ double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
+ unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
+ unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
+ unsigned int BytePerPixelY[],
+ unsigned int BytePerPixelC[],
+ unsigned int HTotal[],
+ double PixelClock[],
+ double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
+ double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
+ double MetaRowBytes[][2][DC__NUM_DPP__MAX],
+ bool DynamicMetadataEnable[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double DCFCLKPerState[],
+ /* Output */
+ double DCFCLKState[][2])
+{
+ unsigned int i, j, k;
+ unsigned int dummy1;
+ double dummy2, dummy3;
+ double NormalEfficiency;
+ double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
+
+ NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
+ for (i = 0; i < DC__VOLTAGE_STATES; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
+ double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
+ double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
+ double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
+ double MinimumTWait = 0.0;
+ double DPTEBandwidth;
+ double DCFCLKRequiredForAverageBandwidth;
+ unsigned int ExtraLatencyBytes;
+ double ExtraLatencyCycles;
+ double DCFCLKRequiredForPeakBandwidth;
+ unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
+ double MinimumTvmPlus2Tr0;
+
+ TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
+ + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
+ / (15.75 * HTotal[k] / PixelClock[k]);
+ }
+
+ for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
+ NoOfDPPState[k] = NoOfDPP[i][j][k];
+
+ DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
+ DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
+
+ ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
+ TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
+ TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
+ NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
+ HostVMMaxNonCachedPageTableLevels);
+ ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
+ + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ double DCFCLKCyclesRequiredInPrefetch;
+ double PrefetchTime;
+
+ PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
+ * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
+ + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
+ * BytePerPixelC[k]) / NormalEfficiency
+ / ReturnBusWidth;
+ DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
+ + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
+ / NormalEfficiency / ReturnBusWidth
+ * (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
+ + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
+ / ReturnBusWidth
+ + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
+ + PixelDCFCLKCyclesRequiredInPrefetch[k];
+ PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
+ * HTotal[k] / PixelClock[k];
+ DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
+ DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
+ UrgLatency[i] * GPUVMMaxPageTableLevels *
+ (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
+
+ MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
+ UseMALLForPStateChange[k],
+ SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ DRRDisplay[k],
+ DRAMClockChangeLatencyFinal,
+ FCLKChangeLatency,
+ UrgLatency[i],
+ SREnterPlusExitTime);
+
+ PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
+ MinimumTWait - UrgLatency[i] *
+ ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
+ GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ?
+ HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
+ DynamicMetadataVMExtraLatency[k];
+
+ if (PrefetchTime > 0) {
+ double ExpectedVRatioPrefetch;
+
+ ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
+ PixelDCFCLKCyclesRequiredInPrefetch[k] /
+ DCFCLKCyclesRequiredInPrefetch);
+ DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
+ PixelDCFCLKCyclesRequiredInPrefetch[k] /
+ PrefetchPixelLinesTime[k] *
+ dml_max(1.0, ExpectedVRatioPrefetch) *
+ dml_max(1.0, ExpectedVRatioPrefetch / 4);
+ if (HostVMEnable == true || ImmediateFlipRequirement == true) {
+ DCFCLKRequiredForPeakBandwidthPerSurface[k] =
+ DCFCLKRequiredForPeakBandwidthPerSurface[k] +
+ NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
+ NormalEfficiency / ReturnBusWidth;
+ }
+ } else {
+ DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
+ }
+ if (DynamicMetadataEnable[k] == true) {
+ double TSetupPipe;
+ double TdmbfPipe;
+ double TdmsksPipe;
+ double TdmecPipe;
+ double AllowedTimeForUrgentExtraLatency;
+
+ dml32_CalculateVUpdateAndDynamicMetadataParameters(
+ MaxInterDCNTileRepeaters,
+ RequiredDPPCLKPerSurface[i][j][k],
+ RequiredDISPCLK[i][j],
+ ProjectedDCFClkDeepSleep[i][j],
+ PixelClock[k],
+ HTotal[k],
+ VTotal[k] - VActive[k],
+ DynamicMetadataTransmittedBytes[k],
+ DynamicMetadataLinesBeforeActiveRequired[k],
+ Interlace[k],
+ ProgressiveToInterlaceUnitInOPP,
+
+ /* output */
+ &TSetupPipe,
+ &TdmbfPipe,
+ &TdmecPipe,
+ &TdmsksPipe,
+ &dummy1,
+ &dummy2,
+ &dummy3);
+ AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
+ PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
+ TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
+ if (AllowedTimeForUrgentExtraLatency > 0)
+ DCFCLKRequiredForPeakBandwidthPerSurface[k] =
+ dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
+ ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
+ else
+ DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
+ }
+ }
+ DCFCLKRequiredForPeakBandwidth = 0;
+ for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
+ DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
+ DCFCLKRequiredForPeakBandwidthPerSurface[k];
+ }
+ MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
+ (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
+ (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ double MaximumTvmPlus2Tr0PlusTsw;
+
+ MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
+ PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
+ if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
+ DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
+ } else {
+ DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
+ 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
+ MinimumTvmPlus2Tr0 -
+ PrefetchPixelLinesTime[k] / 4),
+ (2 * ExtraLatencyCycles +
+ PixelDCFCLKCyclesRequiredInPrefetch[k]) /
+ (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
+ }
+ }
+ DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
+ dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
+ }
+ }
+}
+
+unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
+ unsigned int TotalNumberOfActiveDPP,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int TotalNumberOfDCCActiveDPP,
+ unsigned int MetaChunkSize,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int NumberOfDPP[],
+ unsigned int dpte_group_bytes[],
+ double HostVMInefficiencyFactor,
+ double HostVMMinPageSize,
+ unsigned int HostVMMaxNonCachedPageTableLevels)
+{
+ unsigned int k;
+ double ret;
+ unsigned int HostVMDynamicLevels;
+
+ if (GPUVMEnable == true && HostVMEnable == true) {
+ if (HostVMMinPageSize < 2048)
+ HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
+ else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
+ else
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
+ } else {
+ HostVMDynamicLevels = 0;
+ }
+
+ ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
+ TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
+
+ if (GPUVMEnable == true) {
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
+ (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
+ }
+ }
+ return ret;
+}
+
+void dml32_CalculateVUpdateAndDynamicMetadataParameters(
+ unsigned int MaxInterDCNTileRepeaters,
+ double Dppclk,
+ double Dispclk,
+ double DCFClkDeepSleep,
+ double PixelClock,
+ unsigned int HTotal,
+ unsigned int VBlank,
+ unsigned int DynamicMetadataTransmittedBytes,
+ unsigned int DynamicMetadataLinesBeforeActiveRequired,
+ unsigned int InterlaceEnable,
+ bool ProgressiveToInterlaceUnitInOPP,
+
+ /* output */
+ double *TSetup,
+ double *Tdmbf,
+ double *Tdmec,
+ double *Tdmsks,
+ unsigned int *VUpdateOffsetPix,
+ double *VUpdateWidthPix,
+ double *VReadyOffsetPix)
+{
+ double TotalRepeaterDelayTime;
+
+ TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
+ *VUpdateWidthPix =
+ dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
+ *VReadyOffsetPix = dml_ceil(dml_max(150.0 / Dppclk,
+ TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
+ *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
+ *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
+ *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
+ *Tdmec = HTotal / PixelClock;
+
+ if (DynamicMetadataLinesBeforeActiveRequired == 0)
+ *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
+ else
+ *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
+
+ if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
+ *Tdmsks = *Tdmsks / 2;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
+ dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
+ dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
+
+ dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
+ __func__, DynamicMetadataLinesBeforeActiveRequired);
+ dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
+ dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
+ dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
+ dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
+#endif
+}
+
+double dml32_CalculateTWait(
+ unsigned int PrefetchMode,
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
+ bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ bool DRRDisplay,
+ double DRAMClockChangeLatency,
+ double FCLKChangeLatency,
+ double UrgentLatency,
+ double SREnterPlusExitTime)
+{
+ double TWait = 0.0;
+
+ if (PrefetchMode == 0 &&
+ !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
+ !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
+ !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
+ !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
+ TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
+ } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
+ TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
+ } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
+ TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
+ } else {
+ TWait = UrgentLatency;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
+ dml_print("DML::%s: TWait = %f\n", __func__, TWait);
+#endif
+ return TWait;
+} // CalculateTWait
+
+// Function: get_return_bw_mbps
+// Megabyte per second
+double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
+ const int VoltageLevel,
+ const bool HostVMEnable,
+ const double DCFCLK,
+ const double FabricClock,
+ const double DRAMSpeed)
+{
+ double ReturnBW = 0.;
+ double IdealSDPPortBandwidth = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
+ double IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
+ double IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
+ double PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
+ IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
+ IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
+ soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
+ double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
+ IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
+ IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
+ soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
+
+ if (HostVMEnable != true)
+ ReturnBW = PixelDataOnlyReturnBW;
+ else
+ ReturnBW = PixelMixedWithVMDataReturnBW;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
+ dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
+ dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
+ dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
+ dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
+ dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth);
+ dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth);
+ dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth);
+ dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW);
+ dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
+ dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW);
+#endif
+ return ReturnBW;
+}
+
+// Function: get_return_bw_mbps_vm_only
+// Megabyte per second
+double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
+ const int VoltageLevel,
+ const double DCFCLK,
+ const double FabricClock,
+ const double DRAMSpeed)
+{
+ double VMDataOnlyReturnBW = dml_min3(
+ soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
+ FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
+ * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
+ DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
+ * (VoltageLevel < 2 ?
+ soc->pct_ideal_dram_bw_after_urgent_strobe :
+ soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
+ dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
+ dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
+ dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
+ dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
+#endif
+ return VMDataOnlyReturnBW;
+}
+
+double dml32_CalculateExtraLatency(
+ unsigned int RoundTripPingLatencyCycles,
+ unsigned int ReorderingBytes,
+ double DCFCLK,
+ unsigned int TotalNumberOfActiveDPP,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int TotalNumberOfDCCActiveDPP,
+ unsigned int MetaChunkSize,
+ double ReturnBW,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int NumberOfDPP[],
+ unsigned int dpte_group_bytes[],
+ double HostVMInefficiencyFactor,
+ double HostVMMinPageSize,
+ unsigned int HostVMMaxNonCachedPageTableLevels)
+{
+ double ExtraLatencyBytes;
+ double ExtraLatency;
+
+ ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
+ ReorderingBytes,
+ TotalNumberOfActiveDPP,
+ PixelChunkSizeInKByte,
+ TotalNumberOfDCCActiveDPP,
+ MetaChunkSize,
+ GPUVMEnable,
+ HostVMEnable,
+ NumberOfActiveSurfaces,
+ NumberOfDPP,
+ dpte_group_bytes,
+ HostVMInefficiencyFactor,
+ HostVMMinPageSize,
+ HostVMMaxNonCachedPageTableLevels);
+
+ ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
+ dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
+ dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
+ dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
+ dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
+#endif
+
+ return ExtraLatency;
+} // CalculateExtraLatency
+
+bool dml32_CalculatePrefetchSchedule(
+ struct vba_vars_st *v,
+ unsigned int k,
+ double HostVMInefficiencyFactor,
+ DmlPipe *myPipe,
+ unsigned int DSCDelay,
+ unsigned int DPP_RECOUT_WIDTH,
+ unsigned int VStartup,
+ unsigned int MaxVStartup,
+ double UrgentLatency,
+ double UrgentExtraLatency,
+ double TCalc,
+ unsigned int PDEAndMetaPTEBytesFrame,
+ unsigned int MetaRowByte,
+ unsigned int PixelPTEBytesPerRow,
+ double PrefetchSourceLinesY,
+ unsigned int SwathWidthY,
+ unsigned int VInitPreFillY,
+ unsigned int MaxNumSwathY,
+ double PrefetchSourceLinesC,
+ unsigned int SwathWidthC,
+ unsigned int VInitPreFillC,
+ unsigned int MaxNumSwathC,
+ unsigned int swath_width_luma_ub,
+ unsigned int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double TWait,
+ double TPreReq,
+ bool ExtendPrefetchIfPossible,
+ /* Output */
+ double *DSTXAfterScaler,
+ double *DSTYAfterScaler,
+ double *DestinationLinesForPrefetch,
+ double *PrefetchBandwidth,
+ double *DestinationLinesToRequestVMInVBlank,
+ double *DestinationLinesToRequestRowInVBlank,
+ double *VRatioPrefetchY,
+ double *VRatioPrefetchC,
+ double *RequiredPrefetchPixDataBWLuma,
+ double *RequiredPrefetchPixDataBWChroma,
+ bool *NotEnoughTimeForDynamicMetadata,
+ double *Tno_bw,
+ double *prefetch_vmrow_bw,
+ double *Tdmdl_vm,
+ double *Tdmdl,
+ double *TSetup,
+ unsigned int *VUpdateOffsetPix,
+ double *VUpdateWidthPix,
+ double *VReadyOffsetPix)
+{
+ double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater;
+ bool MyError = false;
+ unsigned int DPPCycles, DISPCLKCycles;
+ double DSTTotalPixelsAfterScaler;
+ double LineTime;
+ double dst_y_prefetch_equ;
+ double prefetch_bw_oto;
+ double Tvm_oto;
+ double Tr0_oto;
+ double Tvm_oto_lines;
+ double Tr0_oto_lines;
+ double dst_y_prefetch_oto;
+ double TimeForFetchingMetaPTE = 0;
+ double TimeForFetchingRowInVBlank = 0;
+ double LinesToRequestPrefetchPixelData = 0;
+ double LinesForPrefetchBandwidth = 0;
+ unsigned int HostVMDynamicLevelsTrips;
+ double trip_to_mem;
+ double Tvm_trips;
+ double Tr0_trips;
+ double Tvm_trips_rounded;
+ double Tr0_trips_rounded;
+ double Lsw_oto;
+ double Tpre_rounded;
+ double prefetch_bw_equ;
+ double Tvm_equ;
+ double Tr0_equ;
+ double Tdmbf;
+ double Tdmec;
+ double Tdmsks;
+ double prefetch_sw_bytes;
+ double bytes_pp;
+ double dep_bytes;
+ unsigned int max_vratio_pre = v->MaxVRatioPre;
+ double min_Lsw;
+ double Tsw_est1 = 0;
+ double Tsw_est3 = 0;
+
+ if (v->GPUVMEnable == true && v->HostVMEnable == true)
+ HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
+ else
+ HostVMDynamicLevelsTrips = 0;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable);
+ dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels);
+ dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
+ dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
+ __func__, v->HostVMEnable, HostVMInefficiencyFactor);
+#endif
+ dml32_CalculateVUpdateAndDynamicMetadataParameters(
+ v->MaxInterDCNTileRepeaters,
+ myPipe->Dppclk,
+ myPipe->Dispclk,
+ myPipe->DCFClkDeepSleep,
+ myPipe->PixelClock,
+ myPipe->HTotal,
+ myPipe->VBlank,
+ v->DynamicMetadataTransmittedBytes[k],
+ v->DynamicMetadataLinesBeforeActiveRequired[k],
+ myPipe->InterlaceEnable,
+ myPipe->ProgressiveToInterlaceUnitInOPP,
+ TSetup,
+
+ /* output */
+ &Tdmbf,
+ &Tdmec,
+ &Tdmsks,
+ VUpdateOffsetPix,
+ VUpdateWidthPix,
+ VReadyOffsetPix);
+
+ LineTime = myPipe->HTotal / myPipe->PixelClock;
+ trip_to_mem = UrgentLatency;
+ Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
+
+ if (v->DynamicMetadataVMEnabled == true)
+ *Tdmdl = TWait + Tvm_trips + trip_to_mem;
+ else
+ *Tdmdl = TWait + UrgentExtraLatency;
+
+#ifdef __DML_VBA_ALLOW_DELTA__
+ if (v->DynamicMetadataEnable[k] == false)
+ *Tdmdl = 0.0;
+#endif
+
+ if (v->DynamicMetadataEnable[k] == true) {
+ if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
+ *NotEnoughTimeForDynamicMetadata = true;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
+ dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
+ __func__, Tdmbf);
+ dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
+ dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
+ __func__, Tdmsks);
+ dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
+ __func__, *Tdmdl);
+#endif
+ } else {
+ *NotEnoughTimeForDynamicMetadata = false;
+ }
+ } else {
+ *NotEnoughTimeForDynamicMetadata = false;
+ }
+
+ *Tdmdl_vm = (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true &&
+ v->GPUVMEnable == true ? TWait + Tvm_trips : 0);
+
+ if (myPipe->ScalerEnabled)
+ DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL;
+ else
+ DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly;
+
+ DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor;
+
+ DISPCLKCycles = v->DISPCLKDelaySubtotal;
+
+ if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
+ return true;
+
+ *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
+ myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
+
+ *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
+ + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
+ + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
+ myPipe->HActive / 2 : 0)
+ + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
+ dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
+ dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
+ dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
+ dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk);
+ dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
+ dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode);
+ dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
+ dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
+#endif
+
+ if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
+ *DSTYAfterScaler = 1;
+ else
+ *DSTYAfterScaler = 0;
+
+ DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
+ *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
+ *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
+ dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
+#endif
+
+ MyError = false;
+
+ Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
+
+ if (v->GPUVMEnable == true) {
+ Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
+ Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
+ if (v->GPUVMMaxPageTableLevels >= 3) {
+ *Tno_bw = UrgentExtraLatency + trip_to_mem *
+ (double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
+ } else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) {
+ Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
+ 4.0 * LineTime; // VBA_ERROR
+ *Tno_bw = UrgentExtraLatency;
+ } else {
+ *Tno_bw = 0;
+ }
+ } else if (myPipe->DCCEnable == true) {
+ Tvm_trips_rounded = LineTime / 4.0;
+ Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
+ *Tno_bw = 0;
+ } else {
+ Tvm_trips_rounded = LineTime / 4.0;
+ Tr0_trips_rounded = LineTime / 2.0;
+ *Tno_bw = 0;
+ }
+ Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
+ Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
+
+ if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
+ || myPipe->SourcePixelFormat == dm_420_12) {
+ bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
+ } else {
+ bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
+ }
+
+ prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
+ + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
+ prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
+ prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
+
+ min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
+ min_Lsw = dml_max(min_Lsw, 1.0);
+ Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
+
+ if (v->GPUVMEnable == true) {
+ Tvm_oto = dml_max3(
+ Tvm_trips,
+ *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
+ LineTime / 4.0);
+ } else
+ Tvm_oto = LineTime / 4.0;
+
+ if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
+ Tr0_oto = dml_max4(
+ Tr0_trips,
+ (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
+ (LineTime - Tvm_oto)/2.0,
+ LineTime / 4.0);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
+ (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
+ dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
+ dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
+ dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
+#endif
+ } else
+ Tr0_oto = (LineTime - Tvm_oto) / 2.0;
+
+ Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
+ Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
+ dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
+
+ dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
+ (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
+
+ dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, __DML_VBA_MAX_DST_Y_PRE__);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
+ dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
+ dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
+ dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
+ dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
+ dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
+ dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
+ dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
+ dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
+ dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
+ dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
+ dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
+ dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
+ dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
+ dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
+ dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
+ dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
+ dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
+ dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
+ dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
+ dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
+ dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
+ dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
+ dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
+ dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
+ dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
+#endif
+
+ dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
+ Tpre_rounded = dst_y_prefetch_equ * LineTime;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
+ dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
+ dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
+ dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
+ __func__, VStartup * LineTime);
+ dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
+ dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
+ dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
+ dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
+ dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
+ dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
+ dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
+ __func__, *DSTYAfterScaler);
+#endif
+ dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
+ MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
+
+ if (prefetch_sw_bytes < dep_bytes)
+ prefetch_sw_bytes = 2 * dep_bytes;
+
+ *PrefetchBandwidth = 0;
+ *DestinationLinesToRequestVMInVBlank = 0;
+ *DestinationLinesToRequestRowInVBlank = 0;
+ *VRatioPrefetchY = 0;
+ *VRatioPrefetchC = 0;
+ *RequiredPrefetchPixDataBWLuma = 0;
+ if (dst_y_prefetch_equ > 1 &&
+ (Tpre_rounded >= TPreReq || dst_y_prefetch_equ == __DML_VBA_MAX_DST_Y_PRE__)) {
+ double PrefetchBandwidth1;
+ double PrefetchBandwidth2;
+ double PrefetchBandwidth3;
+ double PrefetchBandwidth4;
+
+ if (Tpre_rounded - *Tno_bw > 0) {
+ PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
+ + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
+ + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
+ Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
+ } else
+ PrefetchBandwidth1 = 0;
+
+ if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
+ && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
+ PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
+ + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
+ / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
+ }
+
+ if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
+ PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
+ (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
+ else
+ PrefetchBandwidth2 = 0;
+
+ if (Tpre_rounded - Tvm_trips_rounded > 0) {
+ PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
+ + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
+ Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
+ } else
+ PrefetchBandwidth3 = 0;
+
+
+ if (VStartup == MaxVStartup &&
+ (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
+ LineTime - Tvm_trips_rounded > 0) {
+ PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
+ / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
+ }
+
+ if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
+ PrefetchBandwidth4 = prefetch_sw_bytes /
+ (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
+ } else {
+ PrefetchBandwidth4 = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
+ dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
+ dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
+ dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
+ dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
+ dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
+ dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
+ dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
+ dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
+#endif
+ {
+ bool Case1OK;
+ bool Case2OK;
+ bool Case3OK;
+
+ if (PrefetchBandwidth1 > 0) {
+ if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
+ >= Tvm_trips_rounded
+ && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
+ / PrefetchBandwidth1 >= Tr0_trips_rounded) {
+ Case1OK = true;
+ } else {
+ Case1OK = false;
+ }
+ } else {
+ Case1OK = false;
+ }
+
+ if (PrefetchBandwidth2 > 0) {
+ if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
+ >= Tvm_trips_rounded
+ && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
+ / PrefetchBandwidth2 < Tr0_trips_rounded) {
+ Case2OK = true;
+ } else {
+ Case2OK = false;
+ }
+ } else {
+ Case2OK = false;
+ }
+
+ if (PrefetchBandwidth3 > 0) {
+ if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
+ Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
+ HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
+ Tr0_trips_rounded) {
+ Case3OK = true;
+ } else {
+ Case3OK = false;
+ }
+ } else {
+ Case3OK = false;
+ }
+
+ if (Case1OK)
+ prefetch_bw_equ = PrefetchBandwidth1;
+ else if (Case2OK)
+ prefetch_bw_equ = PrefetchBandwidth2;
+ else if (Case3OK)
+ prefetch_bw_equ = PrefetchBandwidth3;
+ else
+ prefetch_bw_equ = PrefetchBandwidth4;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
+ dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
+ dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
+ dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
+#endif
+
+ if (prefetch_bw_equ > 0) {
+ if (v->GPUVMEnable == true) {
+ Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
+ HostVMInefficiencyFactor / prefetch_bw_equ,
+ Tvm_trips, LineTime / 4);
+ } else {
+ Tvm_equ = LineTime / 4;
+ }
+
+ if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
+ Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
+ HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
+ (LineTime - Tvm_equ) / 2, LineTime / 4);
+ } else {
+ Tr0_equ = (LineTime - Tvm_equ) / 2;
+ }
+ } else {
+ Tvm_equ = 0;
+ Tr0_equ = 0;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
+#endif
+ }
+ }
+
+ if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
+ if (dst_y_prefetch_oto * LineTime < TPreReq) {
+ *DestinationLinesForPrefetch = dst_y_prefetch_equ;
+ } else {
+ *DestinationLinesForPrefetch = dst_y_prefetch_oto;
+ }
+ TimeForFetchingMetaPTE = Tvm_oto;
+ TimeForFetchingRowInVBlank = Tr0_oto;
+ *PrefetchBandwidth = prefetch_bw_oto;
+ /* Clamp to oto for bandwidth calculation */
+ LinesForPrefetchBandwidth = dst_y_prefetch_oto;
+ } else {
+ /* For mode programming we want to extend the prefetch as much as possible
+ * (up to oto, or as long as we can for equ) if we're not already applying
+ * the 60us prefetch requirement. This is to avoid intermittent underflow
+ * issues during prefetch.
+ *
+ * The prefetch extension is applied under the following scenarios:
+ * 1. We're in prefetch mode > 0 (i.e. we don't support MCLK switch in blank)
+ * 2. We're using subvp or drr methods of p-state switch, in which case we
+ * we don't care if prefetch takes up more of the blanking time
+ *
+ * Mode programming typically chooses the smallest prefetch time possible
+ * (i.e. highest bandwidth during prefetch) presumably to create margin between
+ * p-states / c-states that happen in vblank and prefetch. Therefore we only
+ * apply this prefetch extension when p-state in vblank is not required (UCLK
+ * p-states take up the most vblank time).
+ */
+ if (ExtendPrefetchIfPossible && TPreReq == 0 && VStartup < MaxVStartup) {
+ MyError = true;
+ } else {
+ *DestinationLinesForPrefetch = dst_y_prefetch_equ;
+ TimeForFetchingMetaPTE = Tvm_equ;
+ TimeForFetchingRowInVBlank = Tr0_equ;
+ *PrefetchBandwidth = prefetch_bw_equ;
+ /* Clamp to equ for bandwidth calculation */
+ LinesForPrefetchBandwidth = dst_y_prefetch_equ;
+ }
+ }
+
+ *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
+
+ *DestinationLinesToRequestRowInVBlank =
+ dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
+
+ LinesToRequestPrefetchPixelData = LinesForPrefetchBandwidth -
+ *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
+ dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
+ __func__, *DestinationLinesToRequestVMInVBlank);
+ dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
+ dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
+ __func__, *DestinationLinesToRequestRowInVBlank);
+ dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
+ dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
+#endif
+
+ if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
+ *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
+ *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
+ dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
+ dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
+#endif
+ if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
+ if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
+ *VRatioPrefetchY =
+ dml_max((double) PrefetchSourceLinesY /
+ LinesToRequestPrefetchPixelData,
+ (double) MaxNumSwathY * SwathHeightY /
+ (LinesToRequestPrefetchPixelData -
+ (VInitPreFillY - 3.0) / 2.0));
+ *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
+ } else {
+ MyError = true;
+ *VRatioPrefetchY = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
+ dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
+ dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
+#endif
+ }
+
+ *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
+ *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
+ dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
+ dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
+#endif
+ if ((SwathHeightC > 4)) {
+ if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
+ *VRatioPrefetchC =
+ dml_max(*VRatioPrefetchC,
+ (double) MaxNumSwathC * SwathHeightC /
+ (LinesToRequestPrefetchPixelData -
+ (VInitPreFillC - 3.0) / 2.0));
+ *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
+ } else {
+ MyError = true;
+ *VRatioPrefetchC = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
+ dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
+ dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
+#endif
+ }
+
+ *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
+ / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
+ / LineTime;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
+ dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
+ dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
+ __func__, *RequiredPrefetchPixDataBWLuma);
+#endif
+ *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
+ LinesToRequestPrefetchPixelData
+ * myPipe->BytePerPixelC
+ * swath_width_chroma_ub / LineTime;
+ } else {
+ MyError = true;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
+ __func__, LinesToRequestPrefetchPixelData);
+#endif
+ *VRatioPrefetchY = 0;
+ *VRatioPrefetchC = 0;
+ *RequiredPrefetchPixDataBWLuma = 0;
+ *RequiredPrefetchPixDataBWChroma = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
+ (double)LinesToRequestPrefetchPixelData * LineTime +
+ 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
+ dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
+ dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
+ (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
+ dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
+ dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
+ TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
+ ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
+ dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
+ PixelPTEBytesPerRow);
+#endif
+ } else {
+ MyError = true;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
+ __func__, dst_y_prefetch_equ);
+#endif
+ }
+
+ {
+ double prefetch_vm_bw;
+ double prefetch_row_bw;
+
+ if (PDEAndMetaPTEBytesFrame == 0) {
+ prefetch_vm_bw = 0;
+ } else if (*DestinationLinesToRequestVMInVBlank > 0) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
+ dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
+ dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
+ __func__, *DestinationLinesToRequestVMInVBlank);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
+#endif
+ prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
+ (*DestinationLinesToRequestVMInVBlank * LineTime);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
+#endif
+ } else {
+ prefetch_vm_bw = 0;
+ MyError = true;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
+ __func__, *DestinationLinesToRequestVMInVBlank);
+#endif
+ }
+
+ if (MetaRowByte + PixelPTEBytesPerRow == 0) {
+ prefetch_row_bw = 0;
+ } else if (*DestinationLinesToRequestRowInVBlank > 0) {
+ prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
+ (*DestinationLinesToRequestRowInVBlank * LineTime);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
+ dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
+ dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
+ __func__, *DestinationLinesToRequestRowInVBlank);
+ dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
+#endif
+ } else {
+ prefetch_row_bw = 0;
+ MyError = true;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
+ __func__, *DestinationLinesToRequestRowInVBlank);
+#endif
+ }
+
+ *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
+ }
+
+ if (MyError) {
+ *PrefetchBandwidth = 0;
+ *DestinationLinesToRequestVMInVBlank = 0;
+ *DestinationLinesToRequestRowInVBlank = 0;
+ *DestinationLinesForPrefetch = 0;
+ *VRatioPrefetchY = 0;
+ *VRatioPrefetchC = 0;
+ *RequiredPrefetchPixDataBWLuma = 0;
+ *RequiredPrefetchPixDataBWChroma = 0;
+ }
+
+ return MyError;
+} // CalculatePrefetchSchedule
+
+void dml32_CalculateFlipSchedule(
+ double HostVMInefficiencyFactor,
+ double UrgentExtraLatency,
+ double UrgentLatency,
+ unsigned int GPUVMMaxPageTableLevels,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ bool GPUVMEnable,
+ double HostVMMinPageSize,
+ double PDEAndMetaPTEBytesPerFrame,
+ double MetaRowBytes,
+ double DPTEBytesPerRow,
+ double BandwidthAvailableForImmediateFlip,
+ unsigned int TotImmediateFlipBytes,
+ enum source_format_class SourcePixelFormat,
+ double LineTime,
+ double VRatio,
+ double VRatioChroma,
+ double Tno_bw,
+ bool DCCEnable,
+ unsigned int dpte_row_height,
+ unsigned int meta_row_height,
+ unsigned int dpte_row_height_chroma,
+ unsigned int meta_row_height_chroma,
+ bool use_one_row_for_frame_flip,
+
+ /* Output */
+ double *DestinationLinesToRequestVMInImmediateFlip,
+ double *DestinationLinesToRequestRowInImmediateFlip,
+ double *final_flip_bw,
+ bool *ImmediateFlipSupportedForPipe)
+{
+ double min_row_time = 0.0;
+ unsigned int HostVMDynamicLevelsTrips;
+ double TimeForFetchingMetaPTEImmediateFlip;
+ double TimeForFetchingRowInVBlankImmediateFlip;
+ double ImmediateFlipBW = 1.0;
+
+ if (GPUVMEnable == true && HostVMEnable == true)
+ HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
+ else
+ HostVMDynamicLevelsTrips = 0;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
+ dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
+#endif
+
+ if (TotImmediateFlipBytes > 0) {
+ if (use_one_row_for_frame_flip) {
+ ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
+ BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
+ } else {
+ ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
+ BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
+ }
+ if (GPUVMEnable == true) {
+ TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
+ HostVMInefficiencyFactor / ImmediateFlipBW,
+ UrgentExtraLatency + UrgentLatency *
+ (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
+ LineTime / 4.0);
+ } else {
+ TimeForFetchingMetaPTEImmediateFlip = 0;
+ }
+ if ((GPUVMEnable == true || DCCEnable == true)) {
+ TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
+ (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
+ UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
+ } else {
+ TimeForFetchingRowInVBlankImmediateFlip = 0;
+ }
+
+ *DestinationLinesToRequestVMInImmediateFlip =
+ dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
+ *DestinationLinesToRequestRowInImmediateFlip =
+ dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
+
+ if (GPUVMEnable == true) {
+ *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
+ (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
+ (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
+ (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
+ } else if ((GPUVMEnable == true || DCCEnable == true)) {
+ *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
+ (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
+ } else {
+ *final_flip_bw = 0;
+ }
+ } else {
+ TimeForFetchingMetaPTEImmediateFlip = 0;
+ TimeForFetchingRowInVBlankImmediateFlip = 0;
+ *DestinationLinesToRequestVMInImmediateFlip = 0;
+ *DestinationLinesToRequestRowInImmediateFlip = 0;
+ *final_flip_bw = 0;
+ }
+
+ if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
+ if (GPUVMEnable == true && DCCEnable != true) {
+ min_row_time = dml_min(dpte_row_height *
+ LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
+ } else if (GPUVMEnable != true && DCCEnable == true) {
+ min_row_time = dml_min(meta_row_height *
+ LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
+ } else {
+ min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
+ LineTime / VRatio, dpte_row_height_chroma * LineTime /
+ VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
+ }
+ } else {
+ if (GPUVMEnable == true && DCCEnable != true) {
+ min_row_time = dpte_row_height * LineTime / VRatio;
+ } else if (GPUVMEnable != true && DCCEnable == true) {
+ min_row_time = meta_row_height * LineTime / VRatio;
+ } else {
+ min_row_time =
+ dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
+ }
+ }
+
+ if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
+ || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
+ > min_row_time) {
+ *ImmediateFlipSupportedForPipe = false;
+ } else {
+ *ImmediateFlipSupportedForPipe = true;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
+ dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
+ dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
+ __func__, *DestinationLinesToRequestVMInImmediateFlip);
+ dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
+ __func__, *DestinationLinesToRequestRowInImmediateFlip);
+ dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
+ dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
+ __func__, TimeForFetchingRowInVBlankImmediateFlip);
+ dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
+ dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
+#endif
+} // CalculateFlipSchedule
+
+void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
+ struct vba_vars_st *v,
+ unsigned int PrefetchMode,
+ double DCFCLK,
+ double ReturnBW,
+ SOCParametersList mmSOCParameters,
+ double SOCCLK,
+ double DCFClkDeepSleep,
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ unsigned int DPPPerSurface[],
+ double BytePerPixelDETY[],
+ double BytePerPixelDETC[],
+ double DSTXAfterScaler[],
+ double DSTYAfterScaler[],
+ bool UnboundedRequestEnabled,
+ unsigned int CompressedBufferSizeInkByte,
+
+ /* Output */
+ enum clock_change_support *DRAMClockChangeSupport,
+ double MaxActiveDRAMClockChangeLatencySupported[],
+ unsigned int SubViewportLinesNeededInMALL[],
+ enum dm_fclock_change_support *FCLKChangeSupport,
+ double *MinActiveFCLKChangeLatencySupported,
+ bool *USRRetrainingSupport,
+ double ActiveDRAMClockChangeLatencyMargin[])
+{
+ unsigned int i, j, k;
+ unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
+ unsigned int DRAMClockChangeSupportNumber = 0;
+ unsigned int LastSurfaceWithoutMargin = 0;
+ unsigned int DRAMClockChangeMethod = 0;
+ bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
+ double MinActiveFCLKChangeMargin = 0.;
+ double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
+ double ActiveClockChangeLatencyHidingY;
+ double ActiveClockChangeLatencyHidingC;
+ double ActiveClockChangeLatencyHiding;
+ double EffectiveDETBufferSizeY;
+ double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
+ double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
+ double TotalPixelBW = 0.0;
+ bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
+ double EffectiveLBLatencyHidingY;
+ double EffectiveLBLatencyHidingC;
+ double LinesInDETY[DC__NUM_DPP__MAX];
+ double LinesInDETC[DC__NUM_DPP__MAX];
+ unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
+ unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
+ double FullDETBufferingTimeY;
+ double FullDETBufferingTimeC;
+ double WritebackDRAMClockChangeLatencyMargin;
+ double WritebackFCLKChangeLatencyMargin;
+ double WritebackLatencyHiding;
+ bool SameTimingForFCLKChange;
+
+ unsigned int TotalActiveWriteback = 0;
+ unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
+ unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
+
+ v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
+ v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
+ + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
+ v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark;
+ v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark;
+ v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
+ + 10 / DCFClkDeepSleep;
+ v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
+ + 10 / DCFClkDeepSleep;
+ v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
+ + 10 / DCFClkDeepSleep;
+ v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
+ + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
+ dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
+ dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
+ dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark);
+ dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark);
+ dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark);
+ dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark);
+ dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark);
+ dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark);
+ dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark);
+ dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
+ __func__, v->Watermark.Z8StutterEnterPlusExitWatermark);
+#endif
+
+
+ TotalActiveWriteback = 0;
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+ if (v->WritebackEnable[k] == true)
+ TotalActiveWriteback = TotalActiveWriteback + 1;
+ }
+
+ if (TotalActiveWriteback <= 1) {
+ v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
+ } else {
+ v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
+ + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
+ }
+ if (v->USRRetrainingRequiredFinal)
+ v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
+ + mmSOCParameters.USRRetrainingLatency;
+
+ if (TotalActiveWriteback <= 1) {
+ v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
+ + mmSOCParameters.WritebackLatency;
+ v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
+ + mmSOCParameters.WritebackLatency;
+ } else {
+ v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
+ + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
+ v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
+ + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK;
+ }
+
+ if (v->USRRetrainingRequiredFinal)
+ v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
+ + mmSOCParameters.USRRetrainingLatency;
+
+ if (v->USRRetrainingRequiredFinal)
+ v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark
+ + mmSOCParameters.USRRetrainingLatency;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
+ __func__, v->Watermark.WritebackDRAMClockChangeWatermark);
+ dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark);
+ dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark);
+ dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal);
+ dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
+#endif
+
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+ TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] +
+ SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]);
+ }
+
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+
+ LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
+ LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
+
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines);
+ dml_print("DML::%s: k=%d, v->LineBufferSizeFinal = %d\n", __func__, k, v->LineBufferSizeFinal);
+ dml_print("DML::%s: k=%d, v->LBBitPerPixel = %d\n", __func__, k, v->LBBitPerPixel[k]);
+ dml_print("DML::%s: k=%d, v->HRatio = %f\n", __func__, k, v->HRatio[k]);
+ dml_print("DML::%s: k=%d, v->vtaps = %d\n", __func__, k, v->vtaps[k]);
+#endif
+
+ EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
+ EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
+ EffectiveDETBufferSizeY = DETBufferSizeY[k];
+
+ if (UnboundedRequestEnabled) {
+ EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
+ + CompressedBufferSizeInkByte * 1024
+ * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k])
+ / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
+ }
+
+ LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
+ LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
+ FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
+
+ ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
+ - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k];
+
+ if (v->NumberOfActiveSurfaces > 1) {
+ ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
+ - (1.0 - 1.0 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k]
+ / v->PixelClock[k] / v->VRatio[k];
+ }
+
+ if (BytePerPixelDETC[k] > 0) {
+ LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
+ LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]);
+ FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k])
+ / v->VRatioChroma[k];
+ ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
+ - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k]
+ / v->PixelClock[k];
+ if (v->NumberOfActiveSurfaces > 1) {
+ ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
+ - (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k]
+ / v->PixelClock[k] / v->VRatioChroma[k];
+ }
+ ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY,
+ ActiveClockChangeLatencyHidingC);
+ } else {
+ ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
+ }
+
+ ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
+ - v->Watermark.DRAMClockChangeWatermark;
+ ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
+ - v->Watermark.FCLKChangeWatermark;
+ USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark;
+
+ if (v->WritebackEnable[k]) {
+ WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024
+ / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
+ / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
+ if (v->WritebackPixelFormat[k] == dm_444_64)
+ WritebackLatencyHiding = WritebackLatencyHiding / 2;
+
+ WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
+ - v->Watermark.WritebackDRAMClockChangeWatermark;
+
+ WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
+ - v->Watermark.WritebackFCLKChangeWatermark;
+
+ ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
+ WritebackFCLKChangeLatencyMargin);
+ ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k],
+ WritebackDRAMClockChangeLatencyMargin);
+ }
+ MaxActiveDRAMClockChangeLatencySupported[k] =
+ (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
+ 0 :
+ (ActiveDRAMClockChangeLatencyMargin[k]
+ + mmSOCParameters.DRAMClockChangeLatency);
+ }
+
+ for (i = 0; i < v->NumberOfActiveSurfaces; ++i) {
+ for (j = 0; j < v->NumberOfActiveSurfaces; ++j) {
+ if (i == j ||
+ (v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) ||
+ (v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) ||
+ (v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) ||
+ (v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] &&
+ v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] &&
+ v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
+ (v->DRRDisplay[i] || v->DRRDisplay[j]))) {
+ SynchronizedSurfaces[i][j] = true;
+ } else {
+ SynchronizedSurfaces[i][j] = false;
+ }
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+ if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
+ (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
+ ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
+ FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
+ MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
+ SurfaceWithMinActiveFCLKChangeMargin = k;
+ }
+ }
+
+ *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
+
+ SameTimingForFCLKChange = true;
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+ if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
+ if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
+ (SameTimingForFCLKChange ||
+ ActiveFCLKChangeLatencyMargin[k] <
+ SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
+ SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
+ }
+ SameTimingForFCLKChange = false;
+ }
+ }
+
+ if (MinActiveFCLKChangeMargin > 0) {
+ *FCLKChangeSupport = dm_fclock_change_vactive;
+ } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
+ (PrefetchMode <= 1)) {
+ *FCLKChangeSupport = dm_fclock_change_vblank;
+ } else {
+ *FCLKChangeSupport = dm_fclock_change_unsupported;
+ }
+
+ *USRRetrainingSupport = true;
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+ if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
+ (USRRetrainingLatencyMargin[k] < 0)) {
+ *USRRetrainingSupport = false;
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+ if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
+ v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
+ v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
+ ActiveDRAMClockChangeLatencyMargin[k] < 0) {
+ if (PrefetchMode > 0) {
+ DRAMClockChangeSupportNumber = 2;
+ } else if (DRAMClockChangeSupportNumber == 0) {
+ DRAMClockChangeSupportNumber = 1;
+ LastSurfaceWithoutMargin = k;
+ } else if (DRAMClockChangeSupportNumber == 1 &&
+ !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
+ DRAMClockChangeSupportNumber = 2;
+ }
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+ if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
+ DRAMClockChangeMethod = 1;
+ else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
+ DRAMClockChangeMethod = 2;
+ }
+
+ if (DRAMClockChangeMethod == 0) {
+ if (DRAMClockChangeSupportNumber == 0)
+ *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
+ else if (DRAMClockChangeSupportNumber == 1)
+ *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
+ else
+ *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
+ } else if (DRAMClockChangeMethod == 1) {
+ if (DRAMClockChangeSupportNumber == 0)
+ *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
+ else if (DRAMClockChangeSupportNumber == 1)
+ *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
+ else
+ *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
+ } else {
+ if (DRAMClockChangeSupportNumber == 0)
+ *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
+ else if (DRAMClockChangeSupportNumber == 1)
+ *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
+ else
+ *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
+ }
+
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+ unsigned int dst_y_pstate;
+ unsigned int src_y_pstate_l;
+ unsigned int src_y_pstate_c;
+ unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
+
+ dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1);
+ src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]);
+ src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
+ sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k];
+
+#ifdef __DML_VBA_DEBUG__
+dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
+dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
+dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
+dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
+dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]);
+dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate);
+dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l);
+dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l);
+dml_print("DML::%s: k=%d, v->meta_row_height = %d\n", __func__, k, v->meta_row_height[k]);
+dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l);
+#endif
+ SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
+
+ if (BytePerPixelDETC[k] > 0) {
+ src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]);
+ src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
+ sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k];
+ SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
+
+#ifdef __DML_VBA_DEBUG__
+dml_print("DML::%s: k=%d, src_y_pstate_c = %d\n", __func__, k, src_y_pstate_c);
+dml_print("DML::%s: k=%d, src_y_ahead_c = %d\n", __func__, k, src_y_ahead_c);
+dml_print("DML::%s: k=%d, v->meta_row_height_chroma = %d\n", __func__, k, v->meta_row_height_chroma[k]);
+dml_print("DML::%s: k=%d, sub_vp_lines_c = %d\n", __func__, k, sub_vp_lines_c);
+#endif
+ }
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
+ dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
+ dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
+ __func__, *MinActiveFCLKChangeLatencySupported);
+ dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
+#endif
+} // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
+
+double dml32_CalculateWriteBackDISPCLK(
+ enum source_format_class WritebackPixelFormat,
+ double PixelClock,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackHTaps,
+ unsigned int WritebackVTaps,
+ unsigned int WritebackSourceWidth,
+ unsigned int WritebackDestinationWidth,
+ unsigned int HTotal,
+ unsigned int WritebackLineBufferSize,
+ double DISPCLKDPPCLKVCOSpeed)
+{
+ double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
+
+ DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
+ DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
+ DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
+ WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
+ return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
+}
+
+void dml32_CalculateMinAndMaxPrefetchMode(
+ enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal,
+ unsigned int *MinPrefetchMode,
+ unsigned int *MaxPrefetchMode)
+{
+ if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
+ *MinPrefetchMode = 3;
+ *MaxPrefetchMode = 3;
+ } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
+ *MinPrefetchMode = 2;
+ *MaxPrefetchMode = 2;
+ } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
+ *MinPrefetchMode = 1;
+ *MaxPrefetchMode = 1;
+ } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
+ *MinPrefetchMode = 0;
+ *MaxPrefetchMode = 0;
+ } else {
+ *MinPrefetchMode = 0;
+ *MaxPrefetchMode = 3;
+ }
+} // CalculateMinAndMaxPrefetchMode
+
+void dml32_CalculatePixelDeliveryTimes(
+ unsigned int NumberOfActiveSurfaces,
+ double VRatio[],
+ double VRatioChroma[],
+ double VRatioPrefetchY[],
+ double VRatioPrefetchC[],
+ unsigned int swath_width_luma_ub[],
+ unsigned int swath_width_chroma_ub[],
+ unsigned int DPPPerSurface[],
+ double HRatio[],
+ double HRatioChroma[],
+ double PixelClock[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double Dppclk[],
+ unsigned int BytePerPixelC[],
+ enum dm_rotation_angle SourceRotation[],
+ unsigned int NumberOfCursors[],
+ unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
+ unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
+ unsigned int BlockWidth256BytesY[],
+ unsigned int BlockHeight256BytesY[],
+ unsigned int BlockWidth256BytesC[],
+ unsigned int BlockHeight256BytesC[],
+
+ /* Output */
+ double DisplayPipeLineDeliveryTimeLuma[],
+ double DisplayPipeLineDeliveryTimeChroma[],
+ double DisplayPipeLineDeliveryTimeLumaPrefetch[],
+ double DisplayPipeLineDeliveryTimeChromaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeLuma[],
+ double DisplayPipeRequestDeliveryTimeChroma[],
+ double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
+ double CursorRequestDeliveryTime[],
+ double CursorRequestDeliveryTimePrefetch[])
+{
+ double req_per_swath_ub;
+ unsigned int k;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
+ dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
+ dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
+ dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
+ dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
+ dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
+ dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
+ dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
+ dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
+ dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
+#endif
+
+ if (VRatio[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLuma[k] =
+ swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
+ }
+
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChroma[k] = 0;
+ } else {
+ if (VRatioChroma[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChroma[k] =
+ swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeChroma[k] =
+ swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
+ }
+ }
+
+ if (VRatioPrefetchY[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
+ swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
+ swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
+ }
+
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
+ } else {
+ if (VRatioPrefetchC[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
+ DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
+ swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
+ }
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
+ __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
+ __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
+ __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
+ __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
+#endif
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (!IsVertical(SourceRotation[k]))
+ req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
+ else
+ req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
+#endif
+
+ DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
+ DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
+ DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeRequestDeliveryTimeChroma[k] = 0;
+ DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
+ } else {
+ if (!IsVertical(SourceRotation[k]))
+ req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
+ else
+ req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
+#endif
+ DisplayPipeRequestDeliveryTimeChroma[k] =
+ DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
+ DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
+ __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
+ __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
+ __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
+ __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
+#endif
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ unsigned int cursor_req_per_width;
+
+ cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
+ 256.0 / 8.0, 1.0);
+ if (NumberOfCursors[k] > 0) {
+ if (VRatio[k] <= 1) {
+ CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
+ HRatio[k] / PixelClock[k] / cursor_req_per_width;
+ } else {
+ CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
+ PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
+ }
+ if (VRatioPrefetchY[k] <= 1) {
+ CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
+ HRatio[k] / PixelClock[k] / cursor_req_per_width;
+ } else {
+ CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
+ PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
+ }
+ } else {
+ CursorRequestDeliveryTime[k] = 0;
+ CursorRequestDeliveryTimePrefetch[k] = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
+ __func__, k, NumberOfCursors[k]);
+ dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
+ __func__, k, CursorRequestDeliveryTime[k]);
+ dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
+ __func__, k, CursorRequestDeliveryTimePrefetch[k]);
+#endif
+ }
+} // CalculatePixelDeliveryTimes
+
+void dml32_CalculateMetaAndPTETimes(
+ bool use_one_row_for_frame[],
+ unsigned int NumberOfActiveSurfaces,
+ bool GPUVMEnable,
+ unsigned int MetaChunkSize,
+ unsigned int MinMetaChunkSizeBytes,
+ unsigned int HTotal[],
+ double VRatio[],
+ double VRatioChroma[],
+ double DestinationLinesToRequestRowInVBlank[],
+ double DestinationLinesToRequestRowInImmediateFlip[],
+ bool DCCEnable[],
+ double PixelClock[],
+ unsigned int BytePerPixelY[],
+ unsigned int BytePerPixelC[],
+ enum dm_rotation_angle SourceRotation[],
+ unsigned int dpte_row_height[],
+ unsigned int dpte_row_height_chroma[],
+ unsigned int meta_row_width[],
+ unsigned int meta_row_width_chroma[],
+ unsigned int meta_row_height[],
+ unsigned int meta_row_height_chroma[],
+ unsigned int meta_req_width[],
+ unsigned int meta_req_width_chroma[],
+ unsigned int meta_req_height[],
+ unsigned int meta_req_height_chroma[],
+ unsigned int dpte_group_bytes[],
+ unsigned int PTERequestSizeY[],
+ unsigned int PTERequestSizeC[],
+ unsigned int PixelPTEReqWidthY[],
+ unsigned int PixelPTEReqHeightY[],
+ unsigned int PixelPTEReqWidthC[],
+ unsigned int PixelPTEReqHeightC[],
+ unsigned int dpte_row_width_luma_ub[],
+ unsigned int dpte_row_width_chroma_ub[],
+
+ /* Output */
+ double DST_Y_PER_PTE_ROW_NOM_L[],
+ double DST_Y_PER_PTE_ROW_NOM_C[],
+ double DST_Y_PER_META_ROW_NOM_L[],
+ double DST_Y_PER_META_ROW_NOM_C[],
+ double TimePerMetaChunkNominal[],
+ double TimePerChromaMetaChunkNominal[],
+ double TimePerMetaChunkVBlank[],
+ double TimePerChromaMetaChunkVBlank[],
+ double TimePerMetaChunkFlip[],
+ double TimePerChromaMetaChunkFlip[],
+ double time_per_pte_group_nom_luma[],
+ double time_per_pte_group_vblank_luma[],
+ double time_per_pte_group_flip_luma[],
+ double time_per_pte_group_nom_chroma[],
+ double time_per_pte_group_vblank_chroma[],
+ double time_per_pte_group_flip_chroma[])
+{
+ unsigned int meta_chunk_width;
+ unsigned int min_meta_chunk_width;
+ unsigned int meta_chunk_per_row_int;
+ unsigned int meta_row_remainder;
+ unsigned int meta_chunk_threshold;
+ unsigned int meta_chunks_per_row_ub;
+ unsigned int meta_chunk_width_chroma;
+ unsigned int min_meta_chunk_width_chroma;
+ unsigned int meta_chunk_per_row_int_chroma;
+ unsigned int meta_row_remainder_chroma;
+ unsigned int meta_chunk_threshold_chroma;
+ unsigned int meta_chunks_per_row_ub_chroma;
+ unsigned int dpte_group_width_luma;
+ unsigned int dpte_groups_per_row_luma_ub;
+ unsigned int dpte_group_width_chroma;
+ unsigned int dpte_groups_per_row_chroma_ub;
+ unsigned int k;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
+ if (BytePerPixelC[k] == 0)
+ DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
+ else
+ DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
+ DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
+ if (BytePerPixelC[k] == 0)
+ DST_Y_PER_META_ROW_NOM_C[k] = 0;
+ else
+ DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (DCCEnable[k] == true) {
+ meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
+ min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
+ meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
+ meta_row_remainder = meta_row_width[k] % meta_chunk_width;
+ if (!IsVertical(SourceRotation[k]))
+ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
+ else
+ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
+
+ if (meta_row_remainder <= meta_chunk_threshold)
+ meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
+ else
+ meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
+
+ TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
+ HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
+ TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
+ HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
+ TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
+ HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
+ if (BytePerPixelC[k] == 0) {
+ TimePerChromaMetaChunkNominal[k] = 0;
+ TimePerChromaMetaChunkVBlank[k] = 0;
+ TimePerChromaMetaChunkFlip[k] = 0;
+ } else {
+ meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
+ meta_row_height_chroma[k];
+ min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
+ meta_row_height_chroma[k];
+ meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
+ meta_chunk_width_chroma;
+ meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
+ if (!IsVertical(SourceRotation[k])) {
+ meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
+ meta_req_width_chroma[k];
+ } else {
+ meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
+ meta_req_height_chroma[k];
+ }
+ if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
+ meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
+ else
+ meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
+
+ TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
+ HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
+ TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
+ HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
+ TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
+ HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
+ }
+ } else {
+ TimePerMetaChunkNominal[k] = 0;
+ TimePerMetaChunkVBlank[k] = 0;
+ TimePerMetaChunkFlip[k] = 0;
+ TimePerChromaMetaChunkNominal[k] = 0;
+ TimePerChromaMetaChunkVBlank[k] = 0;
+ TimePerChromaMetaChunkFlip[k] = 0;
+ }
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (GPUVMEnable == true) {
+ if (!IsVertical(SourceRotation[k])) {
+ dpte_group_width_luma = (double) dpte_group_bytes[k] /
+ (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
+ } else {
+ dpte_group_width_luma = (double) dpte_group_bytes[k] /
+ (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
+ }
+
+ if (use_one_row_for_frame[k]) {
+ dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
+ (double) dpte_group_width_luma / 2.0, 1.0);
+ } else {
+ dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
+ (double) dpte_group_width_luma, 1.0);
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, use_one_row_for_frame = %d\n",
+ __func__, k, use_one_row_for_frame[k]);
+ dml_print("DML::%s: k=%0d, dpte_group_bytes = %d\n",
+ __func__, k, dpte_group_bytes[k]);
+ dml_print("DML::%s: k=%0d, PTERequestSizeY = %d\n",
+ __func__, k, PTERequestSizeY[k]);
+ dml_print("DML::%s: k=%0d, PixelPTEReqWidthY = %d\n",
+ __func__, k, PixelPTEReqWidthY[k]);
+ dml_print("DML::%s: k=%0d, PixelPTEReqHeightY = %d\n",
+ __func__, k, PixelPTEReqHeightY[k]);
+ dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub = %d\n",
+ __func__, k, dpte_row_width_luma_ub[k]);
+ dml_print("DML::%s: k=%0d, dpte_group_width_luma = %d\n",
+ __func__, k, dpte_group_width_luma);
+ dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub = %d\n",
+ __func__, k, dpte_groups_per_row_luma_ub);
+#endif
+
+ time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
+ HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
+ time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
+ HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
+ time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
+ HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
+ if (BytePerPixelC[k] == 0) {
+ time_per_pte_group_nom_chroma[k] = 0;
+ time_per_pte_group_vblank_chroma[k] = 0;
+ time_per_pte_group_flip_chroma[k] = 0;
+ } else {
+ if (!IsVertical(SourceRotation[k])) {
+ dpte_group_width_chroma = (double) dpte_group_bytes[k] /
+ (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
+ } else {
+ dpte_group_width_chroma = (double) dpte_group_bytes[k] /
+ (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
+ }
+
+ if (use_one_row_for_frame[k]) {
+ dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
+ (double) dpte_group_width_chroma / 2.0, 1.0);
+ } else {
+ dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
+ (double) dpte_group_width_chroma, 1.0);
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub = %d\n",
+ __func__, k, dpte_row_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%0d, dpte_group_width_chroma = %d\n",
+ __func__, k, dpte_group_width_chroma);
+ dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub = %d\n",
+ __func__, k, dpte_groups_per_row_chroma_ub);
+#endif
+ time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
+ HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
+ time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
+ HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
+ time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
+ HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
+ }
+ } else {
+ time_per_pte_group_nom_luma[k] = 0;
+ time_per_pte_group_vblank_luma[k] = 0;
+ time_per_pte_group_flip_luma[k] = 0;
+ time_per_pte_group_nom_chroma[k] = 0;
+ time_per_pte_group_vblank_chroma[k] = 0;
+ time_per_pte_group_flip_chroma[k] = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank = %f\n",
+ __func__, k, DestinationLinesToRequestRowInVBlank[k]);
+ dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip = %f\n",
+ __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
+ dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L = %f\n",
+ __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
+ dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C = %f\n",
+ __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
+ dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L = %f\n",
+ __func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
+ dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C = %f\n",
+ __func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
+ dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal = %f\n",
+ __func__, k, TimePerMetaChunkNominal[k]);
+ dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank = %f\n",
+ __func__, k, TimePerMetaChunkVBlank[k]);
+ dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip = %f\n",
+ __func__, k, TimePerMetaChunkFlip[k]);
+ dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal = %f\n",
+ __func__, k, TimePerChromaMetaChunkNominal[k]);
+ dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank = %f\n",
+ __func__, k, TimePerChromaMetaChunkVBlank[k]);
+ dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip = %f\n",
+ __func__, k, TimePerChromaMetaChunkFlip[k]);
+ dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma = %f\n",
+ __func__, k, time_per_pte_group_nom_luma[k]);
+ dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma = %f\n",
+ __func__, k, time_per_pte_group_vblank_luma[k]);
+ dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma = %f\n",
+ __func__, k, time_per_pte_group_flip_luma[k]);
+ dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma = %f\n",
+ __func__, k, time_per_pte_group_nom_chroma[k]);
+ dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
+ __func__, k, time_per_pte_group_vblank_chroma[k]);
+ dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma = %f\n",
+ __func__, k, time_per_pte_group_flip_chroma[k]);
+#endif
+ }
+} // CalculateMetaAndPTETimes
+
+void dml32_CalculateVMGroupAndRequestTimes(
+ unsigned int NumberOfActiveSurfaces,
+ bool GPUVMEnable,
+ unsigned int GPUVMMaxPageTableLevels,
+ unsigned int HTotal[],
+ unsigned int BytePerPixelC[],
+ double DestinationLinesToRequestVMInVBlank[],
+ double DestinationLinesToRequestVMInImmediateFlip[],
+ bool DCCEnable[],
+ double PixelClock[],
+ unsigned int dpte_row_width_luma_ub[],
+ unsigned int dpte_row_width_chroma_ub[],
+ unsigned int vm_group_bytes[],
+ unsigned int dpde0_bytes_per_frame_ub_l[],
+ unsigned int dpde0_bytes_per_frame_ub_c[],
+ unsigned int meta_pte_bytes_per_frame_ub_l[],
+ unsigned int meta_pte_bytes_per_frame_ub_c[],
+
+ /* Output */
+ double TimePerVMGroupVBlank[],
+ double TimePerVMGroupFlip[],
+ double TimePerVMRequestVBlank[],
+ double TimePerVMRequestFlip[])
+{
+ unsigned int k;
+ unsigned int num_group_per_lower_vm_stage;
+ unsigned int num_req_per_lower_vm_stage;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
+ dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
+#endif
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
+ dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
+ dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
+ __func__, k, dpde0_bytes_per_frame_ub_l[k]);
+ dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
+ __func__, k, dpde0_bytes_per_frame_ub_c[k]);
+ dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
+ __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
+ dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
+ __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
+#endif
+
+ if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
+ if (DCCEnable[k] == false) {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage = dml_ceil(
+ (double) (dpde0_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1.0) +
+ dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
+ (double) (vm_group_bytes[k]), 1.0);
+ } else {
+ num_group_per_lower_vm_stage = dml_ceil(
+ (double) (dpde0_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1.0);
+ }
+ } else {
+ if (GPUVMMaxPageTableLevels == 1) {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage = dml_ceil(
+ (double) (meta_pte_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1.0) +
+ dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
+ (double) (vm_group_bytes[k]), 1.0);
+ } else {
+ num_group_per_lower_vm_stage = dml_ceil(
+ (double) (meta_pte_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1.0);
+ }
+ } else {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage = 2 + dml_ceil(
+ (double) (dpde0_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1) +
+ dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
+ (double) (vm_group_bytes[k]), 1) +
+ dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1) +
+ dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
+ (double) (vm_group_bytes[k]), 1);
+ } else {
+ num_group_per_lower_vm_stage = 1 + dml_ceil(
+ (double) (dpde0_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1) + dml_ceil(
+ (double) (meta_pte_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1);
+ }
+ }
+ }
+
+ if (DCCEnable[k] == false) {
+ if (BytePerPixelC[k] > 0) {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
+ dpde0_bytes_per_frame_ub_c[k] / 64;
+ } else {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
+ }
+ } else {
+ if (GPUVMMaxPageTableLevels == 1) {
+ if (BytePerPixelC[k] > 0) {
+ num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
+ meta_pte_bytes_per_frame_ub_c[k] / 64;
+ } else {
+ num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
+ }
+ } else {
+ if (BytePerPixelC[k] > 0) {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
+ 64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
+ meta_pte_bytes_per_frame_ub_l[k] / 64 +
+ meta_pte_bytes_per_frame_ub_c[k] / 64;
+ } else {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
+ 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
+ }
+ }
+ }
+
+ TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
+ HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
+ TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
+ HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
+ TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
+ HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
+ TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
+ HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
+
+ if (GPUVMMaxPageTableLevels > 2) {
+ TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
+ TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
+ TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
+ TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
+ }
+
+ } else {
+ TimePerVMGroupVBlank[k] = 0;
+ TimePerVMGroupFlip[k] = 0;
+ TimePerVMRequestVBlank[k] = 0;
+ TimePerVMRequestFlip[k] = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
+ dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
+ dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
+ dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
+#endif
+ }
+} // CalculateVMGroupAndRequestTimes
+
+void dml32_CalculateDCCConfiguration(
+ bool DCCEnabled,
+ bool DCCProgrammingAssumesScanDirectionUnknown,
+ enum source_format_class SourcePixelFormat,
+ unsigned int SurfaceWidthLuma,
+ unsigned int SurfaceWidthChroma,
+ unsigned int SurfaceHeightLuma,
+ unsigned int SurfaceHeightChroma,
+ unsigned int nomDETInKByte,
+ unsigned int RequestHeight256ByteLuma,
+ unsigned int RequestHeight256ByteChroma,
+ enum dm_swizzle_mode TilingFormat,
+ unsigned int BytePerPixelY,
+ unsigned int BytePerPixelC,
+ double BytePerPixelDETY,
+ double BytePerPixelDETC,
+ enum dm_rotation_angle SourceRotation,
+ /* Output */
+ unsigned int *MaxUncompressedBlockLuma,
+ unsigned int *MaxUncompressedBlockChroma,
+ unsigned int *MaxCompressedBlockLuma,
+ unsigned int *MaxCompressedBlockChroma,
+ unsigned int *IndependentBlockLuma,
+ unsigned int *IndependentBlockChroma)
+{
+ typedef enum {
+ REQ_256Bytes,
+ REQ_128BytesNonContiguous,
+ REQ_128BytesContiguous,
+ REQ_NA
+ } RequestType;
+
+ RequestType RequestLuma;
+ RequestType RequestChroma;
+
+ unsigned int segment_order_horz_contiguous_luma;
+ unsigned int segment_order_horz_contiguous_chroma;
+ unsigned int segment_order_vert_contiguous_luma;
+ unsigned int segment_order_vert_contiguous_chroma;
+ unsigned int req128_horz_wc_l;
+ unsigned int req128_horz_wc_c;
+ unsigned int req128_vert_wc_l;
+ unsigned int req128_vert_wc_c;
+ unsigned int MAS_vp_horz_limit;
+ unsigned int MAS_vp_vert_limit;
+ unsigned int max_vp_horz_width;
+ unsigned int max_vp_vert_height;
+ unsigned int eff_surf_width_l;
+ unsigned int eff_surf_width_c;
+ unsigned int eff_surf_height_l;
+ unsigned int eff_surf_height_c;
+ unsigned int full_swath_bytes_horz_wc_l;
+ unsigned int full_swath_bytes_horz_wc_c;
+ unsigned int full_swath_bytes_vert_wc_l;
+ unsigned int full_swath_bytes_vert_wc_c;
+ unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
+
+ unsigned int yuv420;
+ unsigned int horz_div_l;
+ unsigned int horz_div_c;
+ unsigned int vert_div_l;
+ unsigned int vert_div_c;
+
+ unsigned int swath_buf_size;
+ double detile_buf_vp_horz_limit;
+ double detile_buf_vp_vert_limit;
+
+ yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
+ SourcePixelFormat == dm_420_12) ? 1 : 0);
+ horz_div_l = 1;
+ horz_div_c = 1;
+ vert_div_l = 1;
+ vert_div_c = 1;
+
+ if (BytePerPixelY == 1)
+ vert_div_l = 0;
+ if (BytePerPixelC == 1)
+ vert_div_c = 0;
+
+ if (BytePerPixelC == 0) {
+ swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
+ detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
+ BytePerPixelY / (1 + horz_div_l));
+ detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
+ (1 + vert_div_l));
+ } else {
+ swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
+ detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
+ BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
+ BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
+ detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
+ (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
+ (1 + vert_div_c) / (1 + yuv420));
+ }
+
+ if (SourcePixelFormat == dm_420_10) {
+ detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
+ detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
+ }
+
+ detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
+ detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
+
+ MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
+ MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
+ max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
+ max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
+ eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
+ eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
+ eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
+ eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
+
+ full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
+ full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
+ if (BytePerPixelC > 0) {
+ full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
+ full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
+ } else {
+ full_swath_bytes_horz_wc_c = 0;
+ full_swath_bytes_vert_wc_c = 0;
+ }
+
+ if (SourcePixelFormat == dm_420_10) {
+ full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
+ full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
+ full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
+ full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
+ }
+
+ if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
+ req128_horz_wc_l = 0;
+ req128_horz_wc_c = 0;
+ } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
+ full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
+ req128_horz_wc_l = 0;
+ req128_horz_wc_c = 1;
+ } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
+ full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
+ req128_horz_wc_l = 1;
+ req128_horz_wc_c = 0;
+ } else {
+ req128_horz_wc_l = 1;
+ req128_horz_wc_c = 1;
+ }
+
+ if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
+ req128_vert_wc_l = 0;
+ req128_vert_wc_c = 0;
+ } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
+ full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
+ req128_vert_wc_l = 0;
+ req128_vert_wc_c = 1;
+ } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
+ full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
+ req128_vert_wc_l = 1;
+ req128_vert_wc_c = 0;
+ } else {
+ req128_vert_wc_l = 1;
+ req128_vert_wc_c = 1;
+ }
+
+ if (BytePerPixelY == 2) {
+ segment_order_horz_contiguous_luma = 0;
+ segment_order_vert_contiguous_luma = 1;
+ } else {
+ segment_order_horz_contiguous_luma = 1;
+ segment_order_vert_contiguous_luma = 0;
+ }
+
+ if (BytePerPixelC == 2) {
+ segment_order_horz_contiguous_chroma = 0;
+ segment_order_vert_contiguous_chroma = 1;
+ } else {
+ segment_order_horz_contiguous_chroma = 1;
+ segment_order_vert_contiguous_chroma = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
+ dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
+ dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
+ dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
+ dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
+ dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
+ dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
+ dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
+ dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
+ __func__, segment_order_horz_contiguous_chroma);
+#endif
+
+ if (DCCProgrammingAssumesScanDirectionUnknown == true) {
+ if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
+ RequestLuma = REQ_256Bytes;
+ else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
+ (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
+ RequestLuma = REQ_128BytesNonContiguous;
+ else
+ RequestLuma = REQ_128BytesContiguous;
+
+ if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
+ RequestChroma = REQ_256Bytes;
+ else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
+ (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
+ RequestChroma = REQ_128BytesNonContiguous;
+ else
+ RequestChroma = REQ_128BytesContiguous;
+
+ } else if (!IsVertical(SourceRotation)) {
+ if (req128_horz_wc_l == 0)
+ RequestLuma = REQ_256Bytes;
+ else if (segment_order_horz_contiguous_luma == 0)
+ RequestLuma = REQ_128BytesNonContiguous;
+ else
+ RequestLuma = REQ_128BytesContiguous;
+
+ if (req128_horz_wc_c == 0)
+ RequestChroma = REQ_256Bytes;
+ else if (segment_order_horz_contiguous_chroma == 0)
+ RequestChroma = REQ_128BytesNonContiguous;
+ else
+ RequestChroma = REQ_128BytesContiguous;
+
+ } else {
+ if (req128_vert_wc_l == 0)
+ RequestLuma = REQ_256Bytes;
+ else if (segment_order_vert_contiguous_luma == 0)
+ RequestLuma = REQ_128BytesNonContiguous;
+ else
+ RequestLuma = REQ_128BytesContiguous;
+
+ if (req128_vert_wc_c == 0)
+ RequestChroma = REQ_256Bytes;
+ else if (segment_order_vert_contiguous_chroma == 0)
+ RequestChroma = REQ_128BytesNonContiguous;
+ else
+ RequestChroma = REQ_128BytesContiguous;
+ }
+
+ if (RequestLuma == REQ_256Bytes) {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 256;
+ *IndependentBlockLuma = 0;
+ } else if (RequestLuma == REQ_128BytesContiguous) {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 128;
+ *IndependentBlockLuma = 128;
+ } else {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 64;
+ *IndependentBlockLuma = 64;
+ }
+
+ if (RequestChroma == REQ_256Bytes) {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 256;
+ *IndependentBlockChroma = 0;
+ } else if (RequestChroma == REQ_128BytesContiguous) {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 128;
+ *IndependentBlockChroma = 128;
+ } else {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 64;
+ *IndependentBlockChroma = 64;
+ }
+
+ if (DCCEnabled != true || BytePerPixelC == 0) {
+ *MaxUncompressedBlockChroma = 0;
+ *MaxCompressedBlockChroma = 0;
+ *IndependentBlockChroma = 0;
+ }
+
+ if (DCCEnabled != true) {
+ *MaxUncompressedBlockLuma = 0;
+ *MaxCompressedBlockLuma = 0;
+ *IndependentBlockLuma = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
+ dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
+ dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
+ dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
+ dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
+ dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
+#endif
+
+} // CalculateDCCConfiguration
+
+void dml32_CalculateStutterEfficiency(
+ unsigned int CompressedBufferSizeInkByte,
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ bool UnboundedRequestEnabled,
+ unsigned int MetaFIFOSizeInKEntries,
+ unsigned int ZeroSizeBufferEntries,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int ROBBufferSizeInKByte,
+ double TotalDataReadBandwidth,
+ double DCFCLK,
+ double ReturnBW,
+ unsigned int CompbufReservedSpace64B,
+ unsigned int CompbufReservedSpaceZs,
+ double SRExitTime,
+ double SRExitZ8Time,
+ bool SynchronizeTimingsFinal,
+ unsigned int BlendingAndTiming[],
+ double StutterEnterPlusExitWatermark,
+ double Z8StutterEnterPlusExitWatermark,
+ bool ProgressiveToInterlaceUnitInOPP,
+ bool Interlace[],
+ double MinTTUVBlank[],
+ unsigned int DPPPerSurface[],
+ unsigned int DETBufferSizeY[],
+ unsigned int BytePerPixelY[],
+ double BytePerPixelDETY[],
+ double SwathWidthY[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ double NetDCCRateLuma[],
+ double NetDCCRateChroma[],
+ double DCCFractionOfZeroSizeRequestsLuma[],
+ double DCCFractionOfZeroSizeRequestsChroma[],
+ unsigned int HTotal[],
+ unsigned int VTotal[],
+ double PixelClock[],
+ double VRatio[],
+ enum dm_rotation_angle SourceRotation[],
+ unsigned int BlockHeight256BytesY[],
+ unsigned int BlockWidth256BytesY[],
+ unsigned int BlockHeight256BytesC[],
+ unsigned int BlockWidth256BytesC[],
+ unsigned int DCCYMaxUncompressedBlock[],
+ unsigned int DCCCMaxUncompressedBlock[],
+ unsigned int VActive[],
+ bool DCCEnable[],
+ bool WritebackEnable[],
+ double ReadBandwidthSurfaceLuma[],
+ double ReadBandwidthSurfaceChroma[],
+ double meta_row_bw[],
+ double dpte_row_bw[],
+
+ /* Output */
+ double *StutterEfficiencyNotIncludingVBlank,
+ double *StutterEfficiency,
+ unsigned int *NumberOfStutterBurstsPerFrame,
+ double *Z8StutterEfficiencyNotIncludingVBlank,
+ double *Z8StutterEfficiency,
+ unsigned int *Z8NumberOfStutterBurstsPerFrame,
+ double *StutterPeriod,
+ bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
+{
+
+ bool FoundCriticalSurface = false;
+ unsigned int SwathSizeCriticalSurface = 0;
+ unsigned int LastChunkOfSwathSize;
+ unsigned int MissingPartOfLastSwathOfDETSize;
+ double LastZ8StutterPeriod = 0.0;
+ double LastStutterPeriod = 0.0;
+ unsigned int TotalNumberOfActiveOTG = 0;
+ double doublePixelClock = 0;
+ unsigned int doubleHTotal = 0;
+ unsigned int doubleVTotal = 0;
+ bool SameTiming = true;
+ double DETBufferingTimeY;
+ double SwathWidthYCriticalSurface = 0.0;
+ double SwathHeightYCriticalSurface = 0.0;
+ double VActiveTimeCriticalSurface = 0.0;
+ double FrameTimeCriticalSurface = 0.0;
+ unsigned int BytePerPixelYCriticalSurface = 0;
+ double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
+ unsigned int DETBufferSizeYCriticalSurface = 0;
+ double MinTTUVBlankCriticalSurface = 0.0;
+ unsigned int BlockWidth256BytesYCriticalSurface = 0;
+ bool doublePlaneCriticalSurface = 0;
+ bool doublePipeCriticalSurface = 0;
+ double TotalCompressedReadBandwidth;
+ double TotalRowReadBandwidth;
+ double AverageDCCCompressionRate;
+ double EffectiveCompressedBufferSize;
+ double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
+ double StutterBurstTime;
+ unsigned int TotalActiveWriteback;
+ double LinesInDETY;
+ double LinesInDETYRoundedDownToSwath;
+ double MaximumEffectiveCompressionLuma;
+ double MaximumEffectiveCompressionChroma;
+ double TotalZeroSizeRequestReadBandwidth;
+ double TotalZeroSizeCompressedReadBandwidth;
+ double AverageDCCZeroSizeFraction;
+ double AverageZeroSizeCompressionRate;
+ unsigned int k;
+
+ TotalZeroSizeRequestReadBandwidth = 0;
+ TotalZeroSizeCompressedReadBandwidth = 0;
+ TotalRowReadBandwidth = 0;
+ TotalCompressedReadBandwidth = 0;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
+ if (DCCEnable[k] == true) {
+ if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
+ || (!IsVertical(SourceRotation[k])
+ && BlockHeight256BytesY[k] > SwathHeightY[k])
+ || DCCYMaxUncompressedBlock[k] < 256) {
+ MaximumEffectiveCompressionLuma = 2;
+ } else {
+ MaximumEffectiveCompressionLuma = 4;
+ }
+ TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
+ + ReadBandwidthSurfaceLuma[k]
+ / dml_min(NetDCCRateLuma[k],
+ MaximumEffectiveCompressionLuma);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
+ __func__, k, ReadBandwidthSurfaceLuma[k]);
+ dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
+ __func__, k, NetDCCRateLuma[k]);
+ dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
+ __func__, k, MaximumEffectiveCompressionLuma);
+#endif
+ TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
+ + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
+ TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
+ + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
+ / MaximumEffectiveCompressionLuma;
+
+ if (ReadBandwidthSurfaceChroma[k] > 0) {
+ if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
+ || (!IsVertical(SourceRotation[k])
+ && BlockHeight256BytesC[k] > SwathHeightC[k])
+ || DCCCMaxUncompressedBlock[k] < 256) {
+ MaximumEffectiveCompressionChroma = 2;
+ } else {
+ MaximumEffectiveCompressionChroma = 4;
+ }
+ TotalCompressedReadBandwidth =
+ TotalCompressedReadBandwidth
+ + ReadBandwidthSurfaceChroma[k]
+ / dml_min(NetDCCRateChroma[k],
+ MaximumEffectiveCompressionChroma);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
+ __func__, k, ReadBandwidthSurfaceChroma[k]);
+ dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
+ __func__, k, NetDCCRateChroma[k]);
+ dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
+ __func__, k, MaximumEffectiveCompressionChroma);
+#endif
+ TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
+ + ReadBandwidthSurfaceChroma[k]
+ * DCCFractionOfZeroSizeRequestsChroma[k];
+ TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
+ + ReadBandwidthSurfaceChroma[k]
+ * DCCFractionOfZeroSizeRequestsChroma[k]
+ / MaximumEffectiveCompressionChroma;
+ }
+ } else {
+ TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
+ + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
+ }
+ TotalRowReadBandwidth = TotalRowReadBandwidth
+ + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
+ }
+ }
+
+ AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
+ AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
+ dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
+ dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
+ dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
+ __func__, TotalZeroSizeCompressedReadBandwidth);
+ dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
+ dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
+ dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
+ dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
+ dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
+ dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
+ dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
+#endif
+ if (AverageDCCZeroSizeFraction == 1) {
+ AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
+ / TotalZeroSizeCompressedReadBandwidth;
+ EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
+ * AverageZeroSizeCompressionRate
+ + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
+ * AverageZeroSizeCompressionRate;
+ } else if (AverageDCCZeroSizeFraction > 0) {
+ AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
+ / TotalZeroSizeCompressedReadBandwidth;
+ EffectiveCompressedBufferSize = dml_min(
+ (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
+ (double) MetaFIFOSizeInKEntries * 1024 * 64
+ / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
+ + 1 / AverageDCCCompressionRate))
+ + dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
+ * AverageDCCCompressionRate,
+ ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
+ / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: min 1 = %f\n", __func__,
+ CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
+ dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
+ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
+ AverageDCCCompressionRate));
+ dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
+ CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
+ dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
+ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
+#endif
+ } else {
+ EffectiveCompressedBufferSize = dml_min(
+ (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
+ (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
+ + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
+ * AverageDCCCompressionRate;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: min 1 = %f\n", __func__,
+ CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
+ dml_print("DML::%s: min 2 = %f\n", __func__,
+ MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
+#endif
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
+ dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
+ dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
+#endif
+
+ *StutterPeriod = 0;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
+ LinesInDETY = ((double) DETBufferSizeY[k]
+ + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
+ * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
+ / BytePerPixelDETY[k] / SwathWidthY[k];
+ LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
+ DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
+ / VRatio[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
+ dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
+ dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
+ dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
+ __func__, k, ReadBandwidthSurfaceLuma[k]);
+ dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
+ dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
+ dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
+ __func__, k, LinesInDETYRoundedDownToSwath);
+ dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
+ dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
+ dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
+ dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
+ dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
+#endif
+
+ if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
+ bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
+
+ FoundCriticalSurface = true;
+ *StutterPeriod = DETBufferingTimeY;
+ FrameTimeCriticalSurface = (
+ isInterlaceTiming ?
+ dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k])
+ * (double) HTotal[k] / PixelClock[k];
+ VActiveTimeCriticalSurface = (
+ isInterlaceTiming ?
+ dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k])
+ * (double) HTotal[k] / PixelClock[k];
+ BytePerPixelYCriticalSurface = BytePerPixelY[k];
+ SwathWidthYCriticalSurface = SwathWidthY[k];
+ SwathHeightYCriticalSurface = SwathHeightY[k];
+ BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
+ LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
+ - (LinesInDETY - LinesInDETYRoundedDownToSwath);
+ DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
+ MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
+ doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
+ doublePipeCriticalSurface = (DPPPerSurface[k] == 1);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, FoundCriticalSurface = %d\n",
+ __func__, k, FoundCriticalSurface);
+ dml_print("DML::%s: k=%0d, StutterPeriod = %f\n",
+ __func__, k, *StutterPeriod);
+ dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface = %f\n",
+ __func__, k, MinTTUVBlankCriticalSurface);
+ dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface = %f\n",
+ __func__, k, FrameTimeCriticalSurface);
+ dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface = %f\n",
+ __func__, k, VActiveTimeCriticalSurface);
+ dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface = %d\n",
+ __func__, k, BytePerPixelYCriticalSurface);
+ dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface = %f\n",
+ __func__, k, SwathWidthYCriticalSurface);
+ dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface = %f\n",
+ __func__, k, SwathHeightYCriticalSurface);
+ dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface = %d\n",
+ __func__, k, BlockWidth256BytesYCriticalSurface);
+ dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface = %d\n",
+ __func__, k, doublePlaneCriticalSurface);
+ dml_print("DML::%s: k=%0d, doublePipeCriticalSurface = %d\n",
+ __func__, k, doublePipeCriticalSurface);
+ dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
+ __func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
+#endif
+ }
+ }
+ }
+
+ PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth,
+ EffectiveCompressedBufferSize);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
+ dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
+ dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
+ __func__, *StutterPeriod * TotalDataReadBandwidth);
+ dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
+ dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
+ PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
+ dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
+ dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
+ dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
+ dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
+#endif
+
+ StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
+ / ReturnBW
+ + (*StutterPeriod * TotalDataReadBandwidth
+ - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
+ + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
+ AverageDCCCompressionRate / ReturnBW);
+ dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
+ __func__, (*StutterPeriod * TotalDataReadBandwidth));
+ dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
+ PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
+ dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
+ dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
+#endif
+ StutterBurstTime = dml_max(StutterBurstTime,
+ LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
+ * SwathWidthYCriticalSurface / ReturnBW);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Time to finish residue swath=%f\n",
+ __func__,
+ LinesToFinishSwathTransferStutterCriticalSurface *
+ BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
+#endif
+
+ TotalActiveWriteback = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (WritebackEnable[k])
+ TotalActiveWriteback = TotalActiveWriteback + 1;
+ }
+
+ if (TotalActiveWriteback == 0) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
+ dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
+ dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
+ dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
+#endif
+ *StutterEfficiencyNotIncludingVBlank = dml_max(0.,
+ 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
+ *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0.,
+ 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
+ *NumberOfStutterBurstsPerFrame = (
+ *StutterEfficiencyNotIncludingVBlank > 0 ?
+ dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
+ *Z8NumberOfStutterBurstsPerFrame = (
+ *Z8StutterEfficiencyNotIncludingVBlank > 0 ?
+ dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
+ } else {
+ *StutterEfficiencyNotIncludingVBlank = 0.;
+ *Z8StutterEfficiencyNotIncludingVBlank = 0.;
+ *NumberOfStutterBurstsPerFrame = 0;
+ *Z8NumberOfStutterBurstsPerFrame = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
+ dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
+ __func__, *StutterEfficiencyNotIncludingVBlank);
+ dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
+ __func__, *Z8StutterEfficiencyNotIncludingVBlank);
+ dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
+ dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
+#endif
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
+ if (BlendingAndTiming[k] == k) {
+ if (TotalNumberOfActiveOTG == 0) {
+ doublePixelClock = PixelClock[k];
+ doubleHTotal = HTotal[k];
+ doubleVTotal = VTotal[k];
+ } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
+ || doubleVTotal != VTotal[k]) {
+ SameTiming = false;
+ }
+ TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
+ }
+ }
+ }
+
+ if (*StutterEfficiencyNotIncludingVBlank > 0) {
+ LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
+
+ if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
+ && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
+ *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
+ + StutterBurstTime * VActiveTimeCriticalSurface
+ / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
+ } else {
+ *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
+ }
+ } else {
+ *StutterEfficiency = 0;
+ }
+
+ if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
+ LastZ8StutterPeriod = VActiveTimeCriticalSurface
+ - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
+ if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
+ MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
+ *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
+ * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
+ } else {
+ *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
+ }
+ } else {
+ *Z8StutterEfficiency = 0.;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
+ dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
+ dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
+ dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
+ dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
+ dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
+ dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
+ __func__, *StutterEfficiencyNotIncludingVBlank);
+ dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
+#endif
+
+ SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
+ * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface);
+ LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
+ MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface)
+ - DETBufferSizeYCriticalSurface;
+
+ *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
+ && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
+ && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
+ && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
+ dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
+ dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
+ dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
+#endif
+} // CalculateStutterEfficiency
+
+void dml32_CalculateMaxDETAndMinCompressedBufferSize(
+ unsigned int ConfigReturnBufferSizeInKByte,
+ unsigned int ROBBufferSizeInKByte,
+ unsigned int MaxNumDPP,
+ bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
+ unsigned int nomDETInKByteOverrideValue, // VBA_DELTA
+
+ /* Output */
+ unsigned int *MaxTotalDETInKByte,
+ unsigned int *nomDETInKByte,
+ unsigned int *MinCompressedBufferSizeInKByte)
+{
+ bool det_buff_size_override_en = nomDETInKByteOverrideEnable;
+ unsigned int det_buff_size_override_val = nomDETInKByteOverrideValue;
+
+ *MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte +
+ (double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64);
+ *nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64);
+ *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
+ dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
+ dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
+ dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
+ dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
+ dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
+#endif
+
+ if (det_buff_size_override_en) {
+ *nomDETInKByte = det_buff_size_override_val;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
+#endif
+ }
+} // CalculateMaxDETAndMinCompressedBufferSize
+
+bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ bool NotUrgentLatencyHiding[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double cursor_bw[],
+ double meta_row_bandwidth[],
+ double dpte_row_bandwidth[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[])
+{
+ unsigned int k;
+ bool NotEnoughUrgentLatencyHiding = false;
+ bool CalculateVActiveBandwithSupport_val = false;
+ double VActiveBandwith = 0;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (NotUrgentLatencyHiding[k]) {
+ NotEnoughUrgentLatencyHiding = true;
+ }
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
+ }
+
+ CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
+
+#ifdef __DML_VBA_DEBUG__
+dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, NotEnoughUrgentLatencyHiding);
+dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith);
+dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
+dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
+#endif
+ return CalculateVActiveBandwithSupport_val;
+}
+
+void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ bool NotUrgentLatencyHiding[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double PrefetchBandwidthLuma[],
+ double PrefetchBandwidthChroma[],
+ double cursor_bw[],
+ double meta_row_bandwidth[],
+ double dpte_row_bandwidth[],
+ double cursor_bw_pre[],
+ double prefetch_vmrow_bw[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[],
+ double UrgentBurstFactorLumaPre[],
+ double UrgentBurstFactorChromaPre[],
+ double UrgentBurstFactorCursorPre[],
+ double PrefetchBW[],
+ double VRatio[],
+ double MaxVRatioPre,
+
+ /* output */
+ double *MaxPrefetchBandwidth,
+ double *FractionOfUrgentBandwidth,
+ bool *PrefetchBandwidthSupport)
+{
+ unsigned int k;
+ double ActiveBandwidthPerSurface;
+ bool NotEnoughUrgentLatencyHiding = false;
+ double TotalActiveBandwidth = 0;
+ double TotalPrefetchBandwidth = 0;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (NotUrgentLatencyHiding[k]) {
+ NotEnoughUrgentLatencyHiding = true;
+ }
+ }
+
+ *MaxPrefetchBandwidth = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ ActiveBandwidthPerSurface = ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]);
+
+ TotalActiveBandwidth += ActiveBandwidthPerSurface;
+
+ TotalPrefetchBandwidth = TotalPrefetchBandwidth + PrefetchBW[k] * VRatio[k];
+
+ *MaxPrefetchBandwidth = *MaxPrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
+ ActiveBandwidthPerSurface,
+ NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ }
+
+ if (MaxVRatioPre == __DML_MAX_VRATIO_PRE__)
+ *PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && (TotalPrefetchBandwidth <= TotalActiveBandwidth * __DML_MAX_BW_RATIO_PRE__) && !NotEnoughUrgentLatencyHiding;
+ else
+ *PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
+
+ *FractionOfUrgentBandwidth = *MaxPrefetchBandwidth / ReturnBW;
+}
+
+double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double PrefetchBandwidthLuma[],
+ double PrefetchBandwidthChroma[],
+ double cursor_bw[],
+ double cursor_bw_pre[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[],
+ double UrgentBurstFactorLumaPre[],
+ double UrgentBurstFactorChromaPre[],
+ double UrgentBurstFactorCursorPre[])
+{
+ unsigned int k;
+ double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
+ NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ }
+
+ return CalculateBandwidthAvailableForImmediateFlip_val;
+}
+
+void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ enum immediate_flip_requirement ImmediateFlipRequirement[],
+ double final_flip_bw[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double PrefetchBandwidthLuma[],
+ double PrefetchBandwidthChroma[],
+ double cursor_bw[],
+ double meta_row_bandwidth[],
+ double dpte_row_bandwidth[],
+ double cursor_bw_pre[],
+ double prefetch_vmrow_bw[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[],
+ double UrgentBurstFactorLumaPre[],
+ double UrgentBurstFactorChromaPre[],
+ double UrgentBurstFactorCursorPre[],
+
+ /* output */
+ double *TotalBandwidth,
+ double *FractionOfUrgentBandwidth,
+ bool *ImmediateFlipBandwidthSupport)
+{
+ unsigned int k;
+ *TotalBandwidth = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
+ *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
+ NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
+ NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ } else {
+ *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
+ NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
+ NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ }
+ }
+ *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
+ *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
+}
+
+bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ double UrgentLatency,
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ unsigned int SwathWidthY[],
+ unsigned int SwathWidthC[],
+ double BytePerPixelInDETY[],
+ double BytePerPixelInDETC[],
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ unsigned int NumOfDPP[],
+ unsigned int HTotal[],
+ double PixelClock[],
+ double VRatioY[],
+ double VRatioC[],
+ enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],
+ enum unbounded_requesting_policy UseUnboundedRequesting)
+{
+ int k;
+ double SwathSizeAllSurfaces = 0;
+ double SwathSizeAllSurfacesInFetchTimeUs;
+ double DETSwathLatencyHidingUs;
+ double DETSwathLatencyHidingYUs;
+ double DETSwathLatencyHidingCUs;
+ double SwathSizePerSurfaceY[DC__NUM_DPP__MAX];
+ double SwathSizePerSurfaceC[DC__NUM_DPP__MAX];
+ bool NotEnoughDETSwathFillLatencyHiding = false;
+
+ if (UseUnboundedRequesting == dm_unbounded_requesting)
+ return false;
+
+ /* calculate sum of single swath size for all pipes in bytes */
+ for (k = 0; k < NumberOfActiveSurfaces; k++) {
+ SwathSizePerSurfaceY[k] = SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k];
+
+ if (SwathHeightC[k] != 0)
+ SwathSizePerSurfaceC[k] = SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k];
+ else
+ SwathSizePerSurfaceC[k] = 0;
+
+ SwathSizeAllSurfaces += SwathSizePerSurfaceY[k] + SwathSizePerSurfaceC[k];
+ }
+
+ SwathSizeAllSurfacesInFetchTimeUs = SwathSizeAllSurfaces / ReturnBW + UrgentLatency;
+
+ /* ensure all DET - 1 swath can hide a fetch for all surfaces */
+ for (k = 0; k < NumberOfActiveSurfaces; k++) {
+ double LineTime = HTotal[k] / PixelClock[k];
+
+ /* only care if surface is not phantom */
+ if (UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
+ DETSwathLatencyHidingYUs = (dml_floor(DETBufferSizeY[k] / BytePerPixelInDETY[k] / SwathWidthY[k], 1.0) - SwathHeightY[k]) / VRatioY[k] * LineTime;
+
+ if (SwathHeightC[k] != 0) {
+ DETSwathLatencyHidingCUs = (dml_floor(DETBufferSizeC[k] / BytePerPixelInDETC[k] / SwathWidthC[k], 1.0) - SwathHeightC[k]) / VRatioC[k] * LineTime;
+
+ DETSwathLatencyHidingUs = dml_min(DETSwathLatencyHidingYUs, DETSwathLatencyHidingCUs);
+ } else {
+ DETSwathLatencyHidingUs = DETSwathLatencyHidingYUs;
+ }
+
+ /* DET must be able to hide time to fetch 1 swath for each surface */
+ if (DETSwathLatencyHidingUs < SwathSizeAllSurfacesInFetchTimeUs) {
+ NotEnoughDETSwathFillLatencyHiding = true;
+ break;
+ }
+ }
+ }
+
+ return NotEnoughDETSwathFillLatencyHiding;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
new file mode 100644
index 000000000000..5d34735df83d
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
@@ -0,0 +1,1170 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DML_DCN32_DISPLAY_MODE_VBA_UTIL_32_H__
+#define __DML_DCN32_DISPLAY_MODE_VBA_UTIL_32_H__
+
+#include "../display_mode_enums.h"
+#include "os_types.h"
+#include "../dc_features.h"
+#include "../display_mode_structs.h"
+#include "../display_mode_vba.h"
+
+unsigned int dml32_dscceComputeDelay(
+ unsigned int bpc,
+ double BPP,
+ unsigned int sliceWidth,
+ unsigned int numSlices,
+ enum output_format_class pixelFormat,
+ enum output_encoder_class Output);
+
+unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
+
+bool IsVertical(enum dm_rotation_angle Scan);
+
+void dml32_CalculateBytePerPixelAndBlockSizes(
+ enum source_format_class SourcePixelFormat,
+ enum dm_swizzle_mode SurfaceTiling,
+
+ /*Output*/
+ unsigned int *BytePerPixelY,
+ unsigned int *BytePerPixelC,
+ double *BytePerPixelDETY,
+ double *BytePerPixelDETC,
+ unsigned int *BlockHeight256BytesY,
+ unsigned int *BlockHeight256BytesC,
+ unsigned int *BlockWidth256BytesY,
+ unsigned int *BlockWidth256BytesC,
+ unsigned int *MacroTileHeightY,
+ unsigned int *MacroTileHeightC,
+ unsigned int *MacroTileWidthY,
+ unsigned int *MacroTileWidthC);
+
+void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
+ double HRatio,
+ double HRatioChroma,
+ double VRatio,
+ double VRatioChroma,
+ double MaxDCHUBToPSCLThroughput,
+ double MaxPSCLToLBThroughput,
+ double PixelClock,
+ enum source_format_class SourcePixelFormat,
+ unsigned int HTaps,
+ unsigned int HTapsChroma,
+ unsigned int VTaps,
+ unsigned int VTapsChroma,
+
+ /* output */
+ double *PSCL_THROUGHPUT,
+ double *PSCL_THROUGHPUT_CHROMA,
+ double *DPPCLKUsingSingleDPP);
+
+void dml32_CalculateSwathAndDETConfiguration(
+ unsigned int DETSizeOverride[],
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ unsigned int ConfigReturnBufferSizeInKByte,
+ unsigned int MaxTotalDETInKByte,
+ unsigned int MinCompressedBufferSizeInKByte,
+ double ForceSingleDPP,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int nomDETInKByte,
+ enum unbounded_requesting_policy UseUnboundedRequestingFinal,
+ bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
+ unsigned int PixelChunkSizeKBytes,
+ unsigned int ROBSizeKBytes,
+ unsigned int CompressedBufferSegmentSizeInkByteFinal,
+ enum output_encoder_class Output[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double MaximumSwathWidthLuma[],
+ double MaximumSwathWidthChroma[],
+ enum dm_rotation_angle SourceRotation[],
+ bool ViewportStationary[],
+ enum source_format_class SourcePixelFormat[],
+ enum dm_swizzle_mode SurfaceTiling[],
+ unsigned int ViewportWidth[],
+ unsigned int ViewportHeight[],
+ unsigned int ViewportXStart[],
+ unsigned int ViewportYStart[],
+ unsigned int ViewportXStartC[],
+ unsigned int ViewportYStartC[],
+ unsigned int SurfaceWidthY[],
+ unsigned int SurfaceWidthC[],
+ unsigned int SurfaceHeightY[],
+ unsigned int SurfaceHeightC[],
+ unsigned int Read256BytesBlockHeightY[],
+ unsigned int Read256BytesBlockHeightC[],
+ unsigned int Read256BytesBlockWidthY[],
+ unsigned int Read256BytesBlockWidthC[],
+ enum odm_combine_mode ODMMode[],
+ unsigned int BlendingAndTiming[],
+ unsigned int BytePerPixY[],
+ unsigned int BytePerPixC[],
+ double BytePerPixDETY[],
+ double BytePerPixDETC[],
+ unsigned int HActive[],
+ double HRatio[],
+ double HRatioChroma[],
+ unsigned int DPPPerSurface[],
+
+ /* Output */
+ unsigned int swath_width_luma_ub[],
+ unsigned int swath_width_chroma_ub[],
+ double SwathWidth[],
+ double SwathWidthChroma[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ unsigned int DETBufferSizeInKByte[],
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ bool *UnboundedRequestEnabled,
+ unsigned int *CompressedBufferSizeInkByte,
+ unsigned int *CompBufReservedSpaceKBytes,
+ bool *CompBufReservedSpaceNeedAdjustment,
+ bool ViewportSizeSupportPerSurface[],
+ bool *ViewportSizeSupport);
+
+void dml32_CalculateSwathWidth(
+ bool ForceSingleDPP,
+ unsigned int NumberOfActiveSurfaces,
+ enum source_format_class SourcePixelFormat[],
+ enum dm_rotation_angle SourceScan[],
+ bool ViewportStationary[],
+ unsigned int ViewportWidth[],
+ unsigned int ViewportHeight[],
+ unsigned int ViewportXStart[],
+ unsigned int ViewportYStart[],
+ unsigned int ViewportXStartC[],
+ unsigned int ViewportYStartC[],
+ unsigned int SurfaceWidthY[],
+ unsigned int SurfaceWidthC[],
+ unsigned int SurfaceHeightY[],
+ unsigned int SurfaceHeightC[],
+ enum odm_combine_mode ODMMode[],
+ unsigned int BytePerPixY[],
+ unsigned int BytePerPixC[],
+ unsigned int Read256BytesBlockHeightY[],
+ unsigned int Read256BytesBlockHeightC[],
+ unsigned int Read256BytesBlockWidthY[],
+ unsigned int Read256BytesBlockWidthC[],
+ unsigned int BlendingAndTiming[],
+ unsigned int HActive[],
+ double HRatio[],
+ unsigned int DPPPerSurface[],
+
+ /* Output */
+ double SwathWidthdoubleDPPY[],
+ double SwathWidthdoubleDPPC[],
+ double SwathWidthY[], // per-pipe
+ double SwathWidthC[], // per-pipe
+ unsigned int MaximumSwathHeightY[],
+ unsigned int MaximumSwathHeightC[],
+ unsigned int swath_width_luma_ub[], // per-pipe
+ unsigned int swath_width_chroma_ub[]);
+
+bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
+ unsigned int TotalNumberOfActiveDPP,
+ bool NoChroma,
+ enum output_encoder_class Output,
+ enum dm_swizzle_mode SurfaceTiling,
+ bool CompBufReservedSpaceNeedAdjustment,
+ bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
+
+void dml32_CalculateDETBufferSize(
+ unsigned int DETSizeOverride[],
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ bool ForceSingleDPP,
+ unsigned int NumberOfActiveSurfaces,
+ bool UnboundedRequestEnabled,
+ unsigned int nomDETInKByte,
+ unsigned int MaxTotalDETInKByte,
+ unsigned int ConfigReturnBufferSizeInKByte,
+ unsigned int MinCompressedBufferSizeInKByte,
+ unsigned int CompressedBufferSegmentSizeInkByteFinal,
+ enum source_format_class SourcePixelFormat[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ unsigned int RoundedUpMaxSwathSizeBytesY[],
+ unsigned int RoundedUpMaxSwathSizeBytesC[],
+ unsigned int DPPPerSurface[],
+ /* Output */
+ unsigned int DETBufferSizeInKByte[],
+ unsigned int *CompressedBufferSizeInkByte);
+
+void dml32_CalculateODMMode(
+ unsigned int MaximumPixelsPerLinePerDSCUnit,
+ unsigned int HActive,
+ enum output_format_class OutFormat,
+ enum output_encoder_class Output,
+ enum odm_combine_policy ODMUse,
+ double StateDispclk,
+ double MaxDispclk,
+ bool DSCEnable,
+ unsigned int TotalNumberOfActiveDPP,
+ unsigned int MaxNumDPP,
+ double PixelClock,
+ double DISPCLKDPPCLKDSCCLKDownSpreading,
+ double DISPCLKRampingMargin,
+ double DISPCLKDPPCLKVCOSpeed,
+ unsigned int NumberOfDSCSlices,
+
+ /* Output */
+ bool *TotalAvailablePipesSupport,
+ unsigned int *NumberOfDPP,
+ enum odm_combine_mode *ODMMode,
+ double *RequiredDISPCLKPerSurface);
+
+double dml32_CalculateRequiredDispclk(
+ enum odm_combine_mode ODMMode,
+ double PixelClock,
+ double DISPCLKDPPCLKDSCCLKDownSpreading,
+ double DISPCLKRampingMargin,
+ double DISPCLKDPPCLKVCOSpeed,
+ double MaxDispclk);
+
+double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed);
+
+void dml32_CalculateOutputLink(
+ double PHYCLKPerState,
+ double PHYCLKD18PerState,
+ double PHYCLKD32PerState,
+ double Downspreading,
+ bool IsMainSurfaceUsingTheIndicatedTiming,
+ enum output_encoder_class Output,
+ enum output_format_class OutputFormat,
+ unsigned int HTotal,
+ unsigned int HActive,
+ double PixelClockBackEnd,
+ double ForcedOutputLinkBPP,
+ unsigned int DSCInputBitPerComponent,
+ unsigned int NumberOfDSCSlices,
+ double AudioSampleRate,
+ unsigned int AudioSampleLayout,
+ enum odm_combine_mode ODMModeNoDSC,
+ enum odm_combine_mode ODMModeDSC,
+ bool DSCEnable,
+ unsigned int OutputLinkDPLanes,
+ enum dm_output_link_dp_rate OutputLinkDPRate,
+
+ /* Output */
+ bool *RequiresDSC,
+ double *RequiresFEC,
+ double *OutBpp,
+ enum dm_output_type *OutputType,
+ enum dm_output_rate *OutputRate,
+ unsigned int *RequiredSlots);
+
+void dml32_CalculateDPPCLK(
+ unsigned int NumberOfActiveSurfaces,
+ double DISPCLKDPPCLKDSCCLKDownSpreading,
+ double DISPCLKDPPCLKVCOSpeed,
+ double DPPCLKUsingSingleDPP[],
+ unsigned int DPPPerSurface[],
+
+ /* output */
+ double *GlobalDPPCLK,
+ double Dppclk[]);
+
+double dml32_TruncToValidBPP(
+ double LinkBitRate,
+ unsigned int Lanes,
+ unsigned int HTotal,
+ unsigned int HActive,
+ double PixelClock,
+ double DesiredBPP,
+ bool DSCEnable,
+ enum output_encoder_class Output,
+ enum output_format_class Format,
+ unsigned int DSCInputBitPerComponent,
+ unsigned int DSCSlices,
+ unsigned int AudioRate,
+ unsigned int AudioLayout,
+ enum odm_combine_mode ODMModeNoDSC,
+ enum odm_combine_mode ODMModeDSC,
+ /* Output */
+ unsigned int *RequiredSlots);
+
+double dml32_RequiredDTBCLK(
+ bool DSCEnable,
+ double PixelClock,
+ enum output_format_class OutputFormat,
+ double OutputBpp,
+ unsigned int DSCSlices,
+ unsigned int HTotal,
+ unsigned int HActive,
+ unsigned int AudioRate,
+ unsigned int AudioLayout);
+
+unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
+ enum odm_combine_mode ODMMode,
+ unsigned int DSCInputBitPerComponent,
+ double OutputBpp,
+ unsigned int HActive,
+ unsigned int HTotal,
+ unsigned int NumberOfDSCSlices,
+ enum output_format_class OutputFormat,
+ enum output_encoder_class Output,
+ double PixelClock,
+ double PixelClockBackEnd,
+ double dsc_delay_factor_wa);
+
+void dml32_CalculateSurfaceSizeInMall(
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int MALLAllocatedForDCN,
+ enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
+ enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],
+ bool DCCEnable[],
+ bool ViewportStationary[],
+ unsigned int ViewportXStartY[],
+ unsigned int ViewportYStartY[],
+ unsigned int ViewportXStartC[],
+ unsigned int ViewportYStartC[],
+ unsigned int ViewportWidthY[],
+ unsigned int ViewportHeightY[],
+ unsigned int BytesPerPixelY[],
+ unsigned int ViewportWidthC[],
+ unsigned int ViewportHeightC[],
+ unsigned int BytesPerPixelC[],
+ unsigned int SurfaceWidthY[],
+ unsigned int SurfaceWidthC[],
+ unsigned int SurfaceHeightY[],
+ unsigned int SurfaceHeightC[],
+ unsigned int Read256BytesBlockWidthY[],
+ unsigned int Read256BytesBlockWidthC[],
+ unsigned int Read256BytesBlockHeightY[],
+ unsigned int Read256BytesBlockHeightC[],
+ unsigned int ReadBlockWidthY[],
+ unsigned int ReadBlockWidthC[],
+ unsigned int ReadBlockHeightY[],
+ unsigned int ReadBlockHeightC[],
+ unsigned int DCCMetaPitchY[],
+ unsigned int DCCMetaPitchC[],
+
+ /* Output */
+ unsigned int SurfaceSizeInMALL[],
+ bool *ExceededMALLSize);
+
+void dml32_CalculateVMRowAndSwath(
+ unsigned int NumberOfActiveSurfaces,
+ DmlPipe myPipe[],
+ unsigned int SurfaceSizeInMALL[],
+ unsigned int PTEBufferSizeInRequestsLuma,
+ unsigned int PTEBufferSizeInRequestsChroma,
+ unsigned int DCCMetaBufferSizeBytes,
+ enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ unsigned int MALLAllocatedForDCN,
+ double SwathWidthY[],
+ double SwathWidthC[],
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ unsigned int GPUVMMaxPageTableLevels,
+ unsigned int GPUVMMinPageSizeKBytes[],
+ unsigned int HostVMMinPageSize,
+
+ /* Output */
+ bool PTEBufferSizeNotExceeded[],
+ bool DCCMetaBufferSizeNotExceeded[],
+ unsigned int dpte_row_width_luma_ub[],
+ unsigned int dpte_row_width_chroma_ub[],
+ unsigned int dpte_row_height_luma[],
+ unsigned int dpte_row_height_chroma[],
+ unsigned int dpte_row_height_linear_luma[], // VBA_DELTA
+ unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA
+ unsigned int meta_req_width[],
+ unsigned int meta_req_width_chroma[],
+ unsigned int meta_req_height[],
+ unsigned int meta_req_height_chroma[],
+ unsigned int meta_row_width[],
+ unsigned int meta_row_width_chroma[],
+ unsigned int meta_row_height[],
+ unsigned int meta_row_height_chroma[],
+ unsigned int vm_group_bytes[],
+ unsigned int dpte_group_bytes[],
+ unsigned int PixelPTEReqWidthY[],
+ unsigned int PixelPTEReqHeightY[],
+ unsigned int PTERequestSizeY[],
+ unsigned int PixelPTEReqWidthC[],
+ unsigned int PixelPTEReqHeightC[],
+ unsigned int PTERequestSizeC[],
+ unsigned int dpde0_bytes_per_frame_ub_l[],
+ unsigned int meta_pte_bytes_per_frame_ub_l[],
+ unsigned int dpde0_bytes_per_frame_ub_c[],
+ unsigned int meta_pte_bytes_per_frame_ub_c[],
+ double PrefetchSourceLinesY[],
+ double PrefetchSourceLinesC[],
+ double VInitPreFillY[],
+ double VInitPreFillC[],
+ unsigned int MaxNumSwathY[],
+ unsigned int MaxNumSwathC[],
+ double meta_row_bw[],
+ double dpte_row_bw[],
+ double PixelPTEBytesPerRow[],
+ double PDEAndMetaPTEBytesFrame[],
+ double MetaRowByte[],
+ bool use_one_row_for_frame[],
+ bool use_one_row_for_frame_flip[],
+ bool UsesMALLForStaticScreen[],
+ bool PTE_BUFFER_MODE[],
+ unsigned int BIGK_FRAGMENT_SIZE[]);
+
+unsigned int dml32_CalculateVMAndRowBytes(
+ bool ViewportStationary,
+ bool DCCEnable,
+ unsigned int NumberOfDPPs,
+ unsigned int BlockHeight256Bytes,
+ unsigned int BlockWidth256Bytes,
+ enum source_format_class SourcePixelFormat,
+ unsigned int SurfaceTiling,
+ unsigned int BytePerPixel,
+ enum dm_rotation_angle SourceScan,
+ double SwathWidth,
+ unsigned int ViewportHeight,
+ unsigned int ViewportXStart,
+ unsigned int ViewportYStart,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ unsigned int GPUVMMaxPageTableLevels,
+ unsigned int GPUVMMinPageSizeKBytes,
+ unsigned int HostVMMinPageSize,
+ unsigned int PTEBufferSizeInRequests,
+ unsigned int Pitch,
+ unsigned int DCCMetaPitch,
+ unsigned int MacroTileWidth,
+ unsigned int MacroTileHeight,
+
+ /* Output */
+ unsigned int *MetaRowByte,
+ unsigned int *PixelPTEBytesPerRow,
+ unsigned int *dpte_row_width_ub,
+ unsigned int *dpte_row_height,
+ unsigned int *dpte_row_height_linear,
+ unsigned int *PixelPTEBytesPerRow_one_row_per_frame,
+ unsigned int *dpte_row_width_ub_one_row_per_frame,
+ unsigned int *dpte_row_height_one_row_per_frame,
+ unsigned int *MetaRequestWidth,
+ unsigned int *MetaRequestHeight,
+ unsigned int *meta_row_width,
+ unsigned int *meta_row_height,
+ unsigned int *PixelPTEReqWidth,
+ unsigned int *PixelPTEReqHeight,
+ unsigned int *PTERequestSize,
+ unsigned int *DPDE0BytesFrame,
+ unsigned int *MetaPTEBytesFrame);
+
+double dml32_CalculatePrefetchSourceLines(
+ double VRatio,
+ unsigned int VTaps,
+ bool Interlace,
+ bool ProgressiveToInterlaceUnitInOPP,
+ unsigned int SwathHeight,
+ enum dm_rotation_angle SourceRotation,
+ bool ViewportStationary,
+ double SwathWidth,
+ unsigned int ViewportHeight,
+ unsigned int ViewportXStart,
+ unsigned int ViewportYStart,
+
+ /* Output */
+ double *VInitPreFill,
+ unsigned int *MaxNumSwath);
+
+void dml32_CalculateMALLUseForStaticScreen(
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int MALLAllocatedForDCNFinal,
+ enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
+ unsigned int SurfaceSizeInMALL[],
+ bool one_row_per_frame_fits_in_buffer[],
+
+ /* output */
+ bool UsesMALLForStaticScreen[]);
+
+void dml32_CalculateRowBandwidth(
+ bool GPUVMEnable,
+ enum source_format_class SourcePixelFormat,
+ double VRatio,
+ double VRatioChroma,
+ bool DCCEnable,
+ double LineTime,
+ unsigned int MetaRowByteLuma,
+ unsigned int MetaRowByteChroma,
+ unsigned int meta_row_height_luma,
+ unsigned int meta_row_height_chroma,
+ unsigned int PixelPTEBytesPerRowLuma,
+ unsigned int PixelPTEBytesPerRowChroma,
+ unsigned int dpte_row_height_luma,
+ unsigned int dpte_row_height_chroma,
+ /* Output */
+ double *meta_row_bw,
+ double *dpte_row_bw);
+
+double dml32_CalculateUrgentLatency(
+ double UrgentLatencyPixelDataOnly,
+ double UrgentLatencyPixelMixedWithVMData,
+ double UrgentLatencyVMDataOnly,
+ bool DoUrgentLatencyAdjustment,
+ double UrgentLatencyAdjustmentFabricClockComponent,
+ double UrgentLatencyAdjustmentFabricClockReference,
+ double FabricClock);
+
+void dml32_CalculateUrgentBurstFactor(
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
+ unsigned int swath_width_luma_ub,
+ unsigned int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double LineTime,
+ double UrgentLatency,
+ double CursorBufferSize,
+ unsigned int CursorWidth,
+ unsigned int CursorBPP,
+ double VRatio,
+ double VRatioC,
+ double BytePerPixelInDETY,
+ double BytePerPixelInDETC,
+ unsigned int DETBufferSizeY,
+ unsigned int DETBufferSizeC,
+ /* Output */
+ double *UrgentBurstFactorCursor,
+ double *UrgentBurstFactorLuma,
+ double *UrgentBurstFactorChroma,
+ bool *NotEnoughUrgentLatencyHiding);
+
+void dml32_CalculateDCFCLKDeepSleep(
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int BytePerPixelY[],
+ unsigned int BytePerPixelC[],
+ double VRatio[],
+ double VRatioChroma[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ unsigned int DPPPerSurface[],
+ double HRatio[],
+ double HRatioChroma[],
+ double PixelClock[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double Dppclk[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ unsigned int ReturnBusWidth,
+
+ /* Output */
+ double *DCFClkDeepSleep);
+
+double dml32_CalculateWriteBackDelay(
+ enum source_format_class WritebackPixelFormat,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackVTaps,
+ unsigned int WritebackDestinationWidth,
+ unsigned int WritebackDestinationHeight,
+ unsigned int WritebackSourceHeight,
+ unsigned int HTotal);
+
+void dml32_UseMinimumDCFCLK(
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ bool DRRDisplay[],
+ bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ unsigned int MaxInterDCNTileRepeaters,
+ unsigned int MaxPrefetchMode,
+ double DRAMClockChangeLatencyFinal,
+ double FCLKChangeLatency,
+ double SREnterPlusExitTime,
+ unsigned int ReturnBusWidth,
+ unsigned int RoundTripPingLatencyCycles,
+ unsigned int ReorderingBytes,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int MetaChunkSize,
+ bool GPUVMEnable,
+ unsigned int GPUVMMaxPageTableLevels,
+ bool HostVMEnable,
+ unsigned int NumberOfActiveSurfaces,
+ double HostVMMinPageSize,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ bool DynamicMetadataVMEnabled,
+ bool ImmediateFlipRequirement,
+ bool ProgressiveToInterlaceUnitInOPP,
+ double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
+ double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
+ unsigned int VTotal[],
+ unsigned int VActive[],
+ unsigned int DynamicMetadataTransmittedBytes[],
+ unsigned int DynamicMetadataLinesBeforeActiveRequired[],
+ bool Interlace[],
+ double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
+ double RequiredDISPCLK[][2],
+ double UrgLatency[],
+ unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
+ double ProjectedDCFClkDeepSleep[][2],
+ double MaximumVStartup[][2][DC__NUM_DPP__MAX],
+ unsigned int TotalNumberOfActiveDPP[][2],
+ unsigned int TotalNumberOfDCCActiveDPP[][2],
+ unsigned int dpte_group_bytes[],
+ double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
+ double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
+ unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
+ unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
+ unsigned int BytePerPixelY[],
+ unsigned int BytePerPixelC[],
+ unsigned int HTotal[],
+ double PixelClock[],
+ double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
+ double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
+ double MetaRowBytes[][2][DC__NUM_DPP__MAX],
+ bool DynamicMetadataEnable[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double DCFCLKPerState[],
+ /* Output */
+ double DCFCLKState[][2]);
+
+unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
+ unsigned int TotalNumberOfActiveDPP,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int TotalNumberOfDCCActiveDPP,
+ unsigned int MetaChunkSize,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int NumberOfDPP[],
+ unsigned int dpte_group_bytes[],
+ double HostVMInefficiencyFactor,
+ double HostVMMinPageSize,
+ unsigned int HostVMMaxNonCachedPageTableLevels);
+
+void dml32_CalculateVUpdateAndDynamicMetadataParameters(
+ unsigned int MaxInterDCNTileRepeaters,
+ double Dppclk,
+ double Dispclk,
+ double DCFClkDeepSleep,
+ double PixelClock,
+ unsigned int HTotal,
+ unsigned int VBlank,
+ unsigned int DynamicMetadataTransmittedBytes,
+ unsigned int DynamicMetadataLinesBeforeActiveRequired,
+ unsigned int InterlaceEnable,
+ bool ProgressiveToInterlaceUnitInOPP,
+ double *TSetup,
+ double *Tdmbf,
+ double *Tdmec,
+ double *Tdmsks,
+ unsigned int *VUpdateOffsetPix,
+ double *VUpdateWidthPix,
+ double *VReadyOffsetPix);
+
+double dml32_CalculateTWait(
+ unsigned int PrefetchMode,
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
+ bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ bool DRRDisplay,
+ double DRAMClockChangeLatency,
+ double FCLKChangeLatency,
+ double UrgentLatency,
+ double SREnterPlusExitTime);
+
+double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
+ const int VoltageLevel,
+ const bool HostVMEnable,
+ const double DCFCLK,
+ const double FabricClock,
+ const double DRAMSpeed);
+
+double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
+ const int VoltageLevel,
+ const double DCFCLK,
+ const double FabricClock,
+ const double DRAMSpeed);
+
+double dml32_CalculateExtraLatency(
+ unsigned int RoundTripPingLatencyCycles,
+ unsigned int ReorderingBytes,
+ double DCFCLK,
+ unsigned int TotalNumberOfActiveDPP,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int TotalNumberOfDCCActiveDPP,
+ unsigned int MetaChunkSize,
+ double ReturnBW,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int NumberOfDPP[],
+ unsigned int dpte_group_bytes[],
+ double HostVMInefficiencyFactor,
+ double HostVMMinPageSize,
+ unsigned int HostVMMaxNonCachedPageTableLevels);
+
+bool dml32_CalculatePrefetchSchedule(
+ struct vba_vars_st *v,
+ unsigned int k,
+ double HostVMInefficiencyFactor,
+ DmlPipe *myPipe,
+ unsigned int DSCDelay,
+ unsigned int DPP_RECOUT_WIDTH,
+ unsigned int VStartup,
+ unsigned int MaxVStartup,
+ double UrgentLatency,
+ double UrgentExtraLatency,
+ double TCalc,
+ unsigned int PDEAndMetaPTEBytesFrame,
+ unsigned int MetaRowByte,
+ unsigned int PixelPTEBytesPerRow,
+ double PrefetchSourceLinesY,
+ unsigned int SwathWidthY,
+ unsigned int VInitPreFillY,
+ unsigned int MaxNumSwathY,
+ double PrefetchSourceLinesC,
+ unsigned int SwathWidthC,
+ unsigned int VInitPreFillC,
+ unsigned int MaxNumSwathC,
+ unsigned int swath_width_luma_ub,
+ unsigned int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double TWait,
+ double TPreReq,
+ bool ExtendPrefetchIfPossible,
+ /* Output */
+ double *DSTXAfterScaler,
+ double *DSTYAfterScaler,
+ double *DestinationLinesForPrefetch,
+ double *PrefetchBandwidth,
+ double *DestinationLinesToRequestVMInVBlank,
+ double *DestinationLinesToRequestRowInVBlank,
+ double *VRatioPrefetchY,
+ double *VRatioPrefetchC,
+ double *RequiredPrefetchPixDataBWLuma,
+ double *RequiredPrefetchPixDataBWChroma,
+ bool *NotEnoughTimeForDynamicMetadata,
+ double *Tno_bw,
+ double *prefetch_vmrow_bw,
+ double *Tdmdl_vm,
+ double *Tdmdl,
+ double *TSetup,
+ unsigned int *VUpdateOffsetPix,
+ double *VUpdateWidthPix,
+ double *VReadyOffsetPix);
+
+void dml32_CalculateFlipSchedule(
+ double HostVMInefficiencyFactor,
+ double UrgentExtraLatency,
+ double UrgentLatency,
+ unsigned int GPUVMMaxPageTableLevels,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ bool GPUVMEnable,
+ double HostVMMinPageSize,
+ double PDEAndMetaPTEBytesPerFrame,
+ double MetaRowBytes,
+ double DPTEBytesPerRow,
+ double BandwidthAvailableForImmediateFlip,
+ unsigned int TotImmediateFlipBytes,
+ enum source_format_class SourcePixelFormat,
+ double LineTime,
+ double VRatio,
+ double VRatioChroma,
+ double Tno_bw,
+ bool DCCEnable,
+ unsigned int dpte_row_height,
+ unsigned int meta_row_height,
+ unsigned int dpte_row_height_chroma,
+ unsigned int meta_row_height_chroma,
+ bool use_one_row_for_frame_flip,
+
+ /* Output */
+ double *DestinationLinesToRequestVMInImmediateFlip,
+ double *DestinationLinesToRequestRowInImmediateFlip,
+ double *final_flip_bw,
+ bool *ImmediateFlipSupportedForPipe);
+
+void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
+ struct vba_vars_st *v,
+ unsigned int PrefetchMode,
+ double DCFCLK,
+ double ReturnBW,
+ SOCParametersList mmSOCParameters,
+ double SOCCLK,
+ double DCFClkDeepSleep,
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ unsigned int DPPPerSurface[],
+ double BytePerPixelDETY[],
+ double BytePerPixelDETC[],
+ double DSTXAfterScaler[],
+ double DSTYAfterScaler[],
+ bool UnboundedRequestEnabled,
+ unsigned int CompressedBufferSizeInkByte,
+
+ /* Output */
+ enum clock_change_support *DRAMClockChangeSupport,
+ double MaxActiveDRAMClockChangeLatencySupported[],
+ unsigned int SubViewportLinesNeededInMALL[],
+ enum dm_fclock_change_support *FCLKChangeSupport,
+ double *MinActiveFCLKChangeLatencySupported,
+ bool *USRRetrainingSupport,
+ double ActiveDRAMClockChangeLatencyMargin[]);
+
+double dml32_CalculateWriteBackDISPCLK(
+ enum source_format_class WritebackPixelFormat,
+ double PixelClock,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackHTaps,
+ unsigned int WritebackVTaps,
+ unsigned int WritebackSourceWidth,
+ unsigned int WritebackDestinationWidth,
+ unsigned int HTotal,
+ unsigned int WritebackLineBufferSize,
+ double DISPCLKDPPCLKVCOSpeed);
+
+void dml32_CalculateMinAndMaxPrefetchMode(
+ enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal,
+ unsigned int *MinPrefetchMode,
+ unsigned int *MaxPrefetchMode);
+
+void dml32_CalculatePixelDeliveryTimes(
+ unsigned int NumberOfActiveSurfaces,
+ double VRatio[],
+ double VRatioChroma[],
+ double VRatioPrefetchY[],
+ double VRatioPrefetchC[],
+ unsigned int swath_width_luma_ub[],
+ unsigned int swath_width_chroma_ub[],
+ unsigned int DPPPerSurface[],
+ double HRatio[],
+ double HRatioChroma[],
+ double PixelClock[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double Dppclk[],
+ unsigned int BytePerPixelC[],
+ enum dm_rotation_angle SourceRotation[],
+ unsigned int NumberOfCursors[],
+ unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
+ unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
+ unsigned int BlockWidth256BytesY[],
+ unsigned int BlockHeight256BytesY[],
+ unsigned int BlockWidth256BytesC[],
+ unsigned int BlockHeight256BytesC[],
+
+ /* Output */
+ double DisplayPipeLineDeliveryTimeLuma[],
+ double DisplayPipeLineDeliveryTimeChroma[],
+ double DisplayPipeLineDeliveryTimeLumaPrefetch[],
+ double DisplayPipeLineDeliveryTimeChromaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeLuma[],
+ double DisplayPipeRequestDeliveryTimeChroma[],
+ double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
+ double CursorRequestDeliveryTime[],
+ double CursorRequestDeliveryTimePrefetch[]);
+
+void dml32_CalculateMetaAndPTETimes(
+ bool use_one_row_for_frame[],
+ unsigned int NumberOfActiveSurfaces,
+ bool GPUVMEnable,
+ unsigned int MetaChunkSize,
+ unsigned int MinMetaChunkSizeBytes,
+ unsigned int HTotal[],
+ double VRatio[],
+ double VRatioChroma[],
+ double DestinationLinesToRequestRowInVBlank[],
+ double DestinationLinesToRequestRowInImmediateFlip[],
+ bool DCCEnable[],
+ double PixelClock[],
+ unsigned int BytePerPixelY[],
+ unsigned int BytePerPixelC[],
+ enum dm_rotation_angle SourceRotation[],
+ unsigned int dpte_row_height[],
+ unsigned int dpte_row_height_chroma[],
+ unsigned int meta_row_width[],
+ unsigned int meta_row_width_chroma[],
+ unsigned int meta_row_height[],
+ unsigned int meta_row_height_chroma[],
+ unsigned int meta_req_width[],
+ unsigned int meta_req_width_chroma[],
+ unsigned int meta_req_height[],
+ unsigned int meta_req_height_chroma[],
+ unsigned int dpte_group_bytes[],
+ unsigned int PTERequestSizeY[],
+ unsigned int PTERequestSizeC[],
+ unsigned int PixelPTEReqWidthY[],
+ unsigned int PixelPTEReqHeightY[],
+ unsigned int PixelPTEReqWidthC[],
+ unsigned int PixelPTEReqHeightC[],
+ unsigned int dpte_row_width_luma_ub[],
+ unsigned int dpte_row_width_chroma_ub[],
+
+ /* Output */
+ double DST_Y_PER_PTE_ROW_NOM_L[],
+ double DST_Y_PER_PTE_ROW_NOM_C[],
+ double DST_Y_PER_META_ROW_NOM_L[],
+ double DST_Y_PER_META_ROW_NOM_C[],
+ double TimePerMetaChunkNominal[],
+ double TimePerChromaMetaChunkNominal[],
+ double TimePerMetaChunkVBlank[],
+ double TimePerChromaMetaChunkVBlank[],
+ double TimePerMetaChunkFlip[],
+ double TimePerChromaMetaChunkFlip[],
+ double time_per_pte_group_nom_luma[],
+ double time_per_pte_group_vblank_luma[],
+ double time_per_pte_group_flip_luma[],
+ double time_per_pte_group_nom_chroma[],
+ double time_per_pte_group_vblank_chroma[],
+ double time_per_pte_group_flip_chroma[]);
+
+void dml32_CalculateVMGroupAndRequestTimes(
+ unsigned int NumberOfActiveSurfaces,
+ bool GPUVMEnable,
+ unsigned int GPUVMMaxPageTableLevels,
+ unsigned int HTotal[],
+ unsigned int BytePerPixelC[],
+ double DestinationLinesToRequestVMInVBlank[],
+ double DestinationLinesToRequestVMInImmediateFlip[],
+ bool DCCEnable[],
+ double PixelClock[],
+ unsigned int dpte_row_width_luma_ub[],
+ unsigned int dpte_row_width_chroma_ub[],
+ unsigned int vm_group_bytes[],
+ unsigned int dpde0_bytes_per_frame_ub_l[],
+ unsigned int dpde0_bytes_per_frame_ub_c[],
+ unsigned int meta_pte_bytes_per_frame_ub_l[],
+ unsigned int meta_pte_bytes_per_frame_ub_c[],
+
+ /* Output */
+ double TimePerVMGroupVBlank[],
+ double TimePerVMGroupFlip[],
+ double TimePerVMRequestVBlank[],
+ double TimePerVMRequestFlip[]);
+
+void dml32_CalculateDCCConfiguration(
+ bool DCCEnabled,
+ bool DCCProgrammingAssumesScanDirectionUnknown,
+ enum source_format_class SourcePixelFormat,
+ unsigned int SurfaceWidthLuma,
+ unsigned int SurfaceWidthChroma,
+ unsigned int SurfaceHeightLuma,
+ unsigned int SurfaceHeightChroma,
+ unsigned int nomDETInKByte,
+ unsigned int RequestHeight256ByteLuma,
+ unsigned int RequestHeight256ByteChroma,
+ enum dm_swizzle_mode TilingFormat,
+ unsigned int BytePerPixelY,
+ unsigned int BytePerPixelC,
+ double BytePerPixelDETY,
+ double BytePerPixelDETC,
+ enum dm_rotation_angle SourceRotation,
+ /* Output */
+ unsigned int *MaxUncompressedBlockLuma,
+ unsigned int *MaxUncompressedBlockChroma,
+ unsigned int *MaxCompressedBlockLuma,
+ unsigned int *MaxCompressedBlockChroma,
+ unsigned int *IndependentBlockLuma,
+ unsigned int *IndependentBlockChroma);
+
+void dml32_CalculateStutterEfficiency(
+ unsigned int CompressedBufferSizeInkByte,
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ bool UnboundedRequestEnabled,
+ unsigned int MetaFIFOSizeInKEntries,
+ unsigned int ZeroSizeBufferEntries,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int ROBBufferSizeInKByte,
+ double TotalDataReadBandwidth,
+ double DCFCLK,
+ double ReturnBW,
+ unsigned int CompbufReservedSpace64B,
+ unsigned int CompbufReservedSpaceZs,
+ double SRExitTime,
+ double SRExitZ8Time,
+ bool SynchronizeTimingsFinal,
+ unsigned int BlendingAndTiming[],
+ double StutterEnterPlusExitWatermark,
+ double Z8StutterEnterPlusExitWatermark,
+ bool ProgressiveToInterlaceUnitInOPP,
+ bool Interlace[],
+ double MinTTUVBlank[],
+ unsigned int DPPPerSurface[],
+ unsigned int DETBufferSizeY[],
+ unsigned int BytePerPixelY[],
+ double BytePerPixelDETY[],
+ double SwathWidthY[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ double NetDCCRateLuma[],
+ double NetDCCRateChroma[],
+ double DCCFractionOfZeroSizeRequestsLuma[],
+ double DCCFractionOfZeroSizeRequestsChroma[],
+ unsigned int HTotal[],
+ unsigned int VTotal[],
+ double PixelClock[],
+ double VRatio[],
+ enum dm_rotation_angle SourceRotation[],
+ unsigned int BlockHeight256BytesY[],
+ unsigned int BlockWidth256BytesY[],
+ unsigned int BlockHeight256BytesC[],
+ unsigned int BlockWidth256BytesC[],
+ unsigned int DCCYMaxUncompressedBlock[],
+ unsigned int DCCCMaxUncompressedBlock[],
+ unsigned int VActive[],
+ bool DCCEnable[],
+ bool WritebackEnable[],
+ double ReadBandwidthSurfaceLuma[],
+ double ReadBandwidthSurfaceChroma[],
+ double meta_row_bw[],
+ double dpte_row_bw[],
+
+ /* Output */
+ double *StutterEfficiencyNotIncludingVBlank,
+ double *StutterEfficiency,
+ unsigned int *NumberOfStutterBurstsPerFrame,
+ double *Z8StutterEfficiencyNotIncludingVBlank,
+ double *Z8StutterEfficiency,
+ unsigned int *Z8NumberOfStutterBurstsPerFrame,
+ double *StutterPeriod,
+ bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
+
+void dml32_CalculateMaxDETAndMinCompressedBufferSize(
+ unsigned int ConfigReturnBufferSizeInKByte,
+ unsigned int ROBBufferSizeInKByte,
+ unsigned int MaxNumDPP,
+ bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
+ unsigned int nomDETInKByteOverrideValue, // VBA_DELTA
+
+ /* Output */
+ unsigned int *MaxTotalDETInKByte,
+ unsigned int *nomDETInKByte,
+ unsigned int *MinCompressedBufferSizeInKByte);
+
+bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ bool NotUrgentLatencyHiding[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double cursor_bw[],
+ double meta_row_bandwidth[],
+ double dpte_row_bandwidth[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[]);
+
+void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ bool NotUrgentLatencyHiding[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double PrefetchBandwidthLuma[],
+ double PrefetchBandwidthChroma[],
+ double cursor_bw[],
+ double meta_row_bandwidth[],
+ double dpte_row_bandwidth[],
+ double cursor_bw_pre[],
+ double prefetch_vmrow_bw[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[],
+ double UrgentBurstFactorLumaPre[],
+ double UrgentBurstFactorChromaPre[],
+ double UrgentBurstFactorCursorPre[],
+ double PrefetchBW[],
+ double VRatio[],
+ double MaxVRatioPre,
+
+ /* output */
+ double *MaxPrefetchBandwidth,
+ double *FractionOfUrgentBandwidth,
+ bool *PrefetchBandwidthSupport);
+
+double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double PrefetchBandwidthLuma[],
+ double PrefetchBandwidthChroma[],
+ double cursor_bw[],
+ double cursor_bw_pre[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[],
+ double UrgentBurstFactorLumaPre[],
+ double UrgentBurstFactorChromaPre[],
+ double UrgentBurstFactorCursorPre[]);
+
+void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ enum immediate_flip_requirement ImmediateFlipRequirement[],
+ double final_flip_bw[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double PrefetchBandwidthLuma[],
+ double PrefetchBandwidthChroma[],
+ double cursor_bw[],
+ double meta_row_bandwidth[],
+ double dpte_row_bandwidth[],
+ double cursor_bw_pre[],
+ double prefetch_vmrow_bw[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[],
+ double UrgentBurstFactorLumaPre[],
+ double UrgentBurstFactorChromaPre[],
+ double UrgentBurstFactorCursorPre[],
+
+ /* output */
+ double *TotalBandwidth,
+ double *FractionOfUrgentBandwidth,
+ bool *ImmediateFlipBandwidthSupport);
+
+bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ double UrgentLatency,
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ unsigned int SwathWidthY[],
+ unsigned int SwathWidthC[],
+ double BytePerPixelInDETY[],
+ double BytePerPixelInDETC[],
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ unsigned int NumOfDPP[],
+ unsigned int HTotal[],
+ double PixelClock[],
+ double VRatioY[],
+ double VRatioC[],
+ enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],
+ enum unbounded_requesting_policy UseUnboundedRequesting);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c
new file mode 100644
index 000000000000..6c75aa82327a
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c
@@ -0,0 +1,614 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+
+#include "../display_mode_lib.h"
+#include "../display_mode_vba.h"
+#include "../dml_inline_defs.h"
+#include "display_rq_dlg_calc_32.h"
+
+static bool is_dual_plane(enum source_format_class source_format)
+{
+ bool ret_val = 0;
+
+ if ((source_format == dm_420_12) || (source_format == dm_420_8) || (source_format == dm_420_10)
+ || (source_format == dm_rgbe_alpha))
+ ret_val = 1;
+
+ return ret_val;
+}
+
+void dml32_rq_dlg_get_rq_reg(display_rq_regs_st *rq_regs,
+ struct display_mode_lib *mode_lib,
+ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx)
+{
+ const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src;
+ bool dual_plane = is_dual_plane((enum source_format_class) (src->source_format));
+ double stored_swath_l_bytes;
+ double stored_swath_c_bytes;
+ bool is_phantom_pipe;
+ uint32_t pixel_chunk_bytes = 0;
+ uint32_t min_pixel_chunk_bytes = 0;
+ uint32_t meta_chunk_bytes = 0;
+ uint32_t min_meta_chunk_bytes = 0;
+ uint32_t dpte_group_bytes = 0;
+ uint32_t mpte_group_bytes = 0;
+
+ uint32_t p1_pixel_chunk_bytes = 0;
+ uint32_t p1_min_pixel_chunk_bytes = 0;
+ uint32_t p1_meta_chunk_bytes = 0;
+ uint32_t p1_min_meta_chunk_bytes = 0;
+ uint32_t p1_dpte_group_bytes = 0;
+ uint32_t p1_mpte_group_bytes = 0;
+
+ unsigned int detile_buf_size_in_bytes;
+ unsigned int detile_buf_plane1_addr;
+ unsigned int pte_row_height_linear;
+
+ memset(rq_regs, 0, sizeof(*rq_regs));
+
+ dml_print("DML_DLG::%s: Calculation for pipe[%d] start, num_pipes=%d\n", __func__, pipe_idx, num_pipes);
+
+ pixel_chunk_bytes = get_pixel_chunk_size_in_kbyte(mode_lib, e2e_pipe_param, num_pipes) * 1024; // From VBA
+ min_pixel_chunk_bytes = get_min_pixel_chunk_size_in_byte(mode_lib, e2e_pipe_param, num_pipes); // From VBA
+
+ if (pixel_chunk_bytes == 64 * 1024)
+ min_pixel_chunk_bytes = 0;
+
+ meta_chunk_bytes = get_meta_chunk_size_in_kbyte(mode_lib, e2e_pipe_param, num_pipes) * 1024; // From VBA
+ min_meta_chunk_bytes = get_min_meta_chunk_size_in_byte(mode_lib, e2e_pipe_param, num_pipes); // From VBA
+
+ dpte_group_bytes = get_dpte_group_size_in_bytes(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ mpte_group_bytes = get_vm_group_size_in_bytes(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+
+ p1_pixel_chunk_bytes = pixel_chunk_bytes;
+ p1_min_pixel_chunk_bytes = min_pixel_chunk_bytes;
+ p1_meta_chunk_bytes = meta_chunk_bytes;
+ p1_min_meta_chunk_bytes = min_meta_chunk_bytes;
+ p1_dpte_group_bytes = dpte_group_bytes;
+ p1_mpte_group_bytes = mpte_group_bytes;
+
+ if ((enum source_format_class) src->source_format == dm_rgbe_alpha)
+ p1_pixel_chunk_bytes = get_alpha_pixel_chunk_size_in_kbyte(mode_lib, e2e_pipe_param, num_pipes) * 1024;
+
+ rq_regs->rq_regs_l.chunk_size = dml_log2(pixel_chunk_bytes) - 10;
+ rq_regs->rq_regs_c.chunk_size = dml_log2(p1_pixel_chunk_bytes) - 10;
+
+ if (min_pixel_chunk_bytes == 0)
+ rq_regs->rq_regs_l.min_chunk_size = 0;
+ else
+ rq_regs->rq_regs_l.min_chunk_size = dml_log2(min_pixel_chunk_bytes) - 8 + 1;
+
+ if (p1_min_pixel_chunk_bytes == 0)
+ rq_regs->rq_regs_c.min_chunk_size = 0;
+ else
+ rq_regs->rq_regs_c.min_chunk_size = dml_log2(p1_min_pixel_chunk_bytes) - 8 + 1;
+
+ rq_regs->rq_regs_l.meta_chunk_size = dml_log2(meta_chunk_bytes) - 10;
+ rq_regs->rq_regs_c.meta_chunk_size = dml_log2(p1_meta_chunk_bytes) - 10;
+
+ if (min_meta_chunk_bytes == 0)
+ rq_regs->rq_regs_l.min_meta_chunk_size = 0;
+ else
+ rq_regs->rq_regs_l.min_meta_chunk_size = dml_log2(min_meta_chunk_bytes) - 6 + 1;
+
+ if (p1_min_meta_chunk_bytes == 0)
+ rq_regs->rq_regs_c.min_meta_chunk_size = 0;
+ else
+ rq_regs->rq_regs_c.min_meta_chunk_size = dml_log2(p1_min_meta_chunk_bytes) - 6 + 1;
+
+ rq_regs->rq_regs_l.dpte_group_size = dml_log2(dpte_group_bytes) - 6;
+ rq_regs->rq_regs_l.mpte_group_size = dml_log2(mpte_group_bytes) - 6;
+ rq_regs->rq_regs_c.dpte_group_size = dml_log2(p1_dpte_group_bytes) - 6;
+ rq_regs->rq_regs_c.mpte_group_size = dml_log2(p1_mpte_group_bytes) - 6;
+
+ detile_buf_size_in_bytes = get_det_buffer_size_kbytes(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * 1024;
+ detile_buf_plane1_addr = 0;
+ pte_row_height_linear = get_dpte_row_height_linear_l(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx);
+
+ if (src->sw_mode == dm_sw_linear)
+ ASSERT(pte_row_height_linear >= 8);
+
+ rq_regs->rq_regs_l.pte_row_height_linear = dml_floor(dml_log2(pte_row_height_linear), 1) - 3;
+
+ if (dual_plane) {
+ unsigned int p1_pte_row_height_linear = get_dpte_row_height_linear_c(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx);
+ if (src->sw_mode == dm_sw_linear)
+ ASSERT(p1_pte_row_height_linear >= 8);
+
+ rq_regs->rq_regs_c.pte_row_height_linear = dml_floor(dml_log2(p1_pte_row_height_linear), 1) - 3;
+ }
+
+ rq_regs->rq_regs_l.swath_height = dml_log2(get_swath_height_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx));
+ rq_regs->rq_regs_c.swath_height = dml_log2(get_swath_height_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx));
+
+ // FIXME: take the max between luma, chroma chunk size?
+ // okay for now, as we are setting pixel_chunk_bytes to 8kb anyways
+ if (pixel_chunk_bytes >= 32 * 1024 || (dual_plane && p1_pixel_chunk_bytes >= 32 * 1024)) { //32kb
+ rq_regs->drq_expansion_mode = 0;
+ } else {
+ rq_regs->drq_expansion_mode = 2;
+ }
+ rq_regs->prq_expansion_mode = 1;
+ rq_regs->mrq_expansion_mode = 1;
+ rq_regs->crq_expansion_mode = 1;
+
+ stored_swath_l_bytes = get_det_stored_buffer_size_l_bytes(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx);
+ stored_swath_c_bytes = get_det_stored_buffer_size_c_bytes(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx);
+ is_phantom_pipe = get_is_phantom_pipe(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+
+ // Note: detile_buf_plane1_addr is in unit of 1KB
+ if (dual_plane) {
+ if (is_phantom_pipe) {
+ detile_buf_plane1_addr = ((1024.0 * 1024.0) / 2.0 / 1024.0); // half to chroma
+ } else {
+ if (stored_swath_l_bytes / stored_swath_c_bytes <= 1.5) {
+ detile_buf_plane1_addr = (detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: detile_buf_plane1_addr = %d (1/2 to chroma)\n",
+ __func__, detile_buf_plane1_addr);
+#endif
+ } else {
+ detile_buf_plane1_addr =
+ dml_round_to_multiple(
+ (unsigned int) ((2.0 * detile_buf_size_in_bytes) / 3.0),
+ 1024, 0) / 1024.0; // 2/3 to luma
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: detile_buf_plane1_addr = %d (1/3 chroma)\n",
+ __func__, detile_buf_plane1_addr);
+#endif
+ }
+ }
+ }
+ rq_regs->plane1_base_address = detile_buf_plane1_addr;
+
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: is_phantom_pipe = %d\n", __func__, is_phantom_pipe);
+ dml_print("DML_DLG: %s: stored_swath_l_bytes = %f\n", __func__, stored_swath_l_bytes);
+ dml_print("DML_DLG: %s: stored_swath_c_bytes = %f\n", __func__, stored_swath_c_bytes);
+ dml_print("DML_DLG: %s: detile_buf_size_in_bytes = %d\n", __func__, detile_buf_size_in_bytes);
+ dml_print("DML_DLG: %s: detile_buf_plane1_addr = %d\n", __func__, detile_buf_plane1_addr);
+ dml_print("DML_DLG: %s: plane1_base_address = %d\n", __func__, rq_regs->plane1_base_address);
+#endif
+ print__rq_regs_st(mode_lib, rq_regs);
+ dml_print("DML_DLG::%s: Calculation for pipe[%d] done, num_pipes=%d\n", __func__, pipe_idx, num_pipes);
+}
+
+void dml32_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
+ display_dlg_regs_st *dlg_regs,
+ display_ttu_regs_st *ttu_regs,
+ display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx)
+{
+ const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src;
+ const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest;
+ const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg;
+ double refcyc_per_req_delivery_pre_cur0 = 0.;
+ double refcyc_per_req_delivery_cur0 = 0.;
+ double refcyc_per_req_delivery_pre_c = 0.;
+ double refcyc_per_req_delivery_c = 0.;
+ double refcyc_per_req_delivery_pre_l;
+ double refcyc_per_req_delivery_l;
+ double refcyc_per_line_delivery_pre_c = 0.;
+ double refcyc_per_line_delivery_c = 0.;
+ double refcyc_per_line_delivery_pre_l;
+ double refcyc_per_line_delivery_l;
+ double min_ttu_vblank;
+ double vratio_pre_l;
+ double vratio_pre_c;
+ unsigned int min_dst_y_next_start;
+ unsigned int htotal = dst->htotal;
+ unsigned int hblank_end = dst->hblank_end;
+ unsigned int vblank_end = dst->vblank_end;
+ bool interlaced = dst->interlaced;
+ double pclk_freq_in_mhz = dst->pixel_rate_mhz;
+ unsigned int vready_after_vcount0;
+ double refclk_freq_in_mhz = clks->refclk_mhz;
+ double ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz;
+ bool dual_plane = 0;
+ unsigned int pipe_index_in_combine[DC__NUM_PIPES__MAX];
+ unsigned int dst_x_after_scaler;
+ unsigned int dst_y_after_scaler;
+ double dst_y_prefetch;
+ double dst_y_per_vm_vblank;
+ double dst_y_per_row_vblank;
+ double dst_y_per_vm_flip;
+ double dst_y_per_row_flip;
+ double max_dst_y_per_vm_vblank = 32.0;
+ double max_dst_y_per_row_vblank = 16.0;
+ double dst_y_per_pte_row_nom_l;
+ double dst_y_per_pte_row_nom_c;
+ double dst_y_per_meta_row_nom_l;
+ double dst_y_per_meta_row_nom_c;
+ double refcyc_per_pte_group_nom_l;
+ double refcyc_per_pte_group_nom_c;
+ double refcyc_per_pte_group_vblank_l;
+ double refcyc_per_pte_group_vblank_c;
+ double refcyc_per_pte_group_flip_l;
+ double refcyc_per_pte_group_flip_c;
+ double refcyc_per_meta_chunk_nom_l;
+ double refcyc_per_meta_chunk_nom_c;
+ double refcyc_per_meta_chunk_vblank_l;
+ double refcyc_per_meta_chunk_vblank_c;
+ double refcyc_per_meta_chunk_flip_l;
+ double refcyc_per_meta_chunk_flip_c;
+
+ memset(dlg_regs, 0, sizeof(*dlg_regs));
+ memset(ttu_regs, 0, sizeof(*ttu_regs));
+ dml_print("DML_DLG::%s: Calculation for pipe[%d] starts, num_pipes=%d\n", __func__, pipe_idx, num_pipes);
+ dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz);
+ dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz);
+ dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, ref_freq_to_pix_freq);
+ dml_print("DML_DLG: %s: interlaced = %d\n", __func__, interlaced);
+ ASSERT(ref_freq_to_pix_freq < 4.0);
+
+ dlg_regs->ref_freq_to_pix_freq = (unsigned int) (ref_freq_to_pix_freq * dml_pow(2, 19));
+ dlg_regs->refcyc_per_htotal = (unsigned int) (ref_freq_to_pix_freq * (double) htotal * dml_pow(2, 8));
+ dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; // 15 bits
+
+ min_ttu_vblank = get_min_ttu_vblank_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ min_dst_y_next_start = get_min_dst_y_next_start(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+
+ dml_print("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, min_ttu_vblank);
+ dml_print("DML_DLG: %s: min_dst_y_next_start = %d\n", __func__, min_dst_y_next_start);
+ dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, ref_freq_to_pix_freq);
+
+ dual_plane = is_dual_plane((enum source_format_class) (src->source_format));
+
+ vready_after_vcount0 = get_vready_at_or_after_vsync(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx); // From VBA
+ dlg_regs->vready_after_vcount0 = vready_after_vcount0;
+
+ dml_print("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, dlg_regs->vready_after_vcount0);
+
+ dst_x_after_scaler = dml_ceil(get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx), 1);
+ dst_y_after_scaler = dml_ceil(get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx), 1);
+
+ // do some adjustment on the dst_after scaler to account for odm combine mode
+ dml_print("DML_DLG: %s: input dst_x_after_scaler = %d\n", __func__, dst_x_after_scaler);
+ dml_print("DML_DLG: %s: input dst_y_after_scaler = %d\n", __func__, dst_y_after_scaler);
+
+ // need to figure out which side of odm combine we're in
+ if (dst->odm_combine == dm_odm_combine_mode_2to1 || dst->odm_combine == dm_odm_combine_mode_4to1) {
+ // figure out which pipes go together
+ bool visited[DC__NUM_PIPES__MAX];
+ unsigned int i, j, k;
+
+ for (k = 0; k < num_pipes; ++k) {
+ visited[k] = false;
+ pipe_index_in_combine[k] = 0;
+ }
+
+ for (i = 0; i < num_pipes; i++) {
+ if (e2e_pipe_param[i].pipe.src.is_hsplit && !visited[i]) {
+
+ unsigned int grp = e2e_pipe_param[i].pipe.src.hsplit_grp;
+ unsigned int grp_idx = 0;
+
+ for (j = i; j < num_pipes; j++) {
+ if (e2e_pipe_param[j].pipe.src.hsplit_grp == grp
+ && e2e_pipe_param[j].pipe.src.is_hsplit && !visited[j]) {
+ pipe_index_in_combine[j] = grp_idx;
+ dml_print("DML_DLG: %s: pipe[%d] is in grp %d idx %d\n",
+ __func__, j, grp, grp_idx);
+ grp_idx++;
+ visited[j] = true;
+ }
+ }
+ }
+ }
+ }
+
+ if (dst->odm_combine == dm_odm_combine_mode_disabled) {
+ // FIXME how about ODM split??
+ dlg_regs->refcyc_h_blank_end = (unsigned int) ((double) hblank_end * ref_freq_to_pix_freq);
+ } else {
+ if (dst->odm_combine == dm_odm_combine_mode_2to1 || dst->odm_combine == dm_odm_combine_mode_4to1) {
+ // TODO: We should really check that 4to1 is supported before setting it to 4
+ unsigned int odm_combine_factor = (dst->odm_combine == dm_odm_combine_mode_2to1 ? 2 : 4);
+ unsigned int odm_pipe_index = pipe_index_in_combine[pipe_idx];
+
+ dlg_regs->refcyc_h_blank_end = (unsigned int) (((double) hblank_end
+ + odm_pipe_index * (double) dst->hactive / odm_combine_factor) * ref_freq_to_pix_freq);
+ }
+ }
+ ASSERT(dlg_regs->refcyc_h_blank_end < (unsigned int)dml_pow(2, 13));
+
+ dml_print("DML_DLG: %s: htotal= %d\n", __func__, htotal);
+ dml_print("DML_DLG: %s: dst_x_after_scaler[%d]= %d\n", __func__, pipe_idx, dst_x_after_scaler);
+ dml_print("DML_DLG: %s: dst_y_after_scaler[%d] = %d\n", __func__, pipe_idx, dst_y_after_scaler);
+
+ dst_y_prefetch = get_dst_y_prefetch(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ // From VBA
+ dst_y_per_vm_vblank = get_dst_y_per_vm_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+ // From VBA
+ dst_y_per_row_vblank = get_dst_y_per_row_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+ dst_y_per_vm_flip = get_dst_y_per_vm_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ dst_y_per_row_flip = get_dst_y_per_row_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+
+ // magic!
+ if (htotal <= 75) {
+ max_dst_y_per_vm_vblank = 100.0;
+ max_dst_y_per_row_vblank = 100.0;
+ }
+
+ dml_print("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, dst_y_prefetch);
+ dml_print("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, dst_y_per_vm_flip);
+ dml_print("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, dst_y_per_row_flip);
+ dml_print("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, dst_y_per_vm_vblank);
+ dml_print("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, dst_y_per_row_vblank);
+
+ ASSERT(dst_y_per_vm_vblank < max_dst_y_per_vm_vblank);
+ ASSERT(dst_y_per_row_vblank < max_dst_y_per_row_vblank);
+ ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank));
+
+ vratio_pre_l = get_vratio_prefetch_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ vratio_pre_c = get_vratio_prefetch_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+
+ dml_print("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, vratio_pre_l);
+ dml_print("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, vratio_pre_c);
+
+ // Active
+ refcyc_per_line_delivery_pre_l = get_refcyc_per_line_delivery_pre_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_line_delivery_l = get_refcyc_per_line_delivery_l_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, refcyc_per_line_delivery_pre_l);
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, refcyc_per_line_delivery_l);
+
+ if (dual_plane) {
+ refcyc_per_line_delivery_pre_c = get_refcyc_per_line_delivery_pre_c_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_line_delivery_c = get_refcyc_per_line_delivery_c_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n",
+ __func__, refcyc_per_line_delivery_pre_c);
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n",
+ __func__, refcyc_per_line_delivery_c);
+ }
+
+ if (src->dynamic_metadata_enable && src->gpuvm)
+ dlg_regs->refcyc_per_vm_dmdata = get_refcyc_per_vm_dmdata_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ dlg_regs->dmdata_dl_delta = get_dmdata_dl_delta_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx)
+ * refclk_freq_in_mhz; // From VBA
+
+ refcyc_per_req_delivery_pre_l = get_refcyc_per_req_delivery_pre_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_req_delivery_l = get_refcyc_per_req_delivery_l_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, refcyc_per_req_delivery_pre_l);
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, refcyc_per_req_delivery_l);
+
+ if (dual_plane) {
+ refcyc_per_req_delivery_pre_c = get_refcyc_per_req_delivery_pre_c_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_req_delivery_c = get_refcyc_per_req_delivery_c_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n",
+ __func__, refcyc_per_req_delivery_pre_c);
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, refcyc_per_req_delivery_c);
+ }
+
+ // TTU - Cursor
+ ASSERT(src->num_cursors <= 1);
+ if (src->num_cursors > 0) {
+ refcyc_per_req_delivery_pre_cur0 = get_refcyc_per_cursor_req_delivery_pre_in_us(mode_lib,
+ e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_req_delivery_cur0 = get_refcyc_per_cursor_req_delivery_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_cur0 = %3.2f\n",
+ __func__, refcyc_per_req_delivery_pre_cur0);
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_cur0 = %3.2f\n",
+ __func__, refcyc_per_req_delivery_cur0);
+ }
+
+ // Assign to register structures
+ dlg_regs->min_dst_y_next_start = min_dst_y_next_start * dml_pow(2, 2);
+ ASSERT(dlg_regs->min_dst_y_next_start < (unsigned int)dml_pow(2, 18));
+
+ dlg_regs->dst_y_after_scaler = dst_y_after_scaler; // in terms of line
+ dlg_regs->refcyc_x_after_scaler = dst_x_after_scaler * ref_freq_to_pix_freq; // in terms of refclk
+ dlg_regs->dst_y_prefetch = (unsigned int) (dst_y_prefetch * dml_pow(2, 2));
+ dlg_regs->dst_y_per_vm_vblank = (unsigned int) (dst_y_per_vm_vblank * dml_pow(2, 2));
+ dlg_regs->dst_y_per_row_vblank = (unsigned int) (dst_y_per_row_vblank * dml_pow(2, 2));
+ dlg_regs->dst_y_per_vm_flip = (unsigned int) (dst_y_per_vm_flip * dml_pow(2, 2));
+ dlg_regs->dst_y_per_row_flip = (unsigned int) (dst_y_per_row_flip * dml_pow(2, 2));
+
+ dlg_regs->vratio_prefetch = (unsigned int) (vratio_pre_l * dml_pow(2, 19));
+ dlg_regs->vratio_prefetch_c = (unsigned int) (vratio_pre_c * dml_pow(2, 19));
+
+ dml_print("DML_DLG: %s: dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, dlg_regs->dst_y_per_vm_vblank);
+ dml_print("DML_DLG: %s: dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, dlg_regs->dst_y_per_row_vblank);
+ dml_print("DML_DLG: %s: dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, dlg_regs->dst_y_per_vm_flip);
+ dml_print("DML_DLG: %s: dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, dlg_regs->dst_y_per_row_flip);
+
+ dlg_regs->refcyc_per_vm_group_vblank = get_refcyc_per_vm_group_vblank_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ dlg_regs->refcyc_per_vm_group_flip = get_refcyc_per_vm_group_flip_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+ dlg_regs->refcyc_per_vm_req_vblank = get_refcyc_per_vm_req_vblank_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10); // From VBA
+ dlg_regs->refcyc_per_vm_req_flip = get_refcyc_per_vm_req_flip_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10); // From VBA
+
+ // From VBA
+ dst_y_per_pte_row_nom_l = get_dst_y_per_pte_row_nom_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+ // From VBA
+ dst_y_per_pte_row_nom_c = get_dst_y_per_pte_row_nom_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+ // From VBA
+ dst_y_per_meta_row_nom_l = get_dst_y_per_meta_row_nom_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+ // From VBA
+ dst_y_per_meta_row_nom_c = get_dst_y_per_meta_row_nom_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+
+ refcyc_per_pte_group_nom_l = get_refcyc_per_pte_group_nom_l_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_pte_group_nom_c = get_refcyc_per_pte_group_nom_c_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_pte_group_vblank_l = get_refcyc_per_pte_group_vblank_l_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_pte_group_vblank_c = get_refcyc_per_pte_group_vblank_c_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_pte_group_flip_l = get_refcyc_per_pte_group_flip_l_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_pte_group_flip_c = get_refcyc_per_pte_group_flip_c_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ refcyc_per_meta_chunk_nom_l = get_refcyc_per_meta_chunk_nom_l_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_meta_chunk_nom_c = get_refcyc_per_meta_chunk_nom_c_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_meta_chunk_vblank_l = get_refcyc_per_meta_chunk_vblank_l_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_meta_chunk_vblank_c = get_refcyc_per_meta_chunk_vblank_c_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_meta_chunk_flip_l = get_refcyc_per_meta_chunk_flip_l_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_meta_chunk_flip_c = get_refcyc_per_meta_chunk_flip_c_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ dlg_regs->dst_y_per_pte_row_nom_l = dst_y_per_pte_row_nom_l * dml_pow(2, 2);
+ dlg_regs->dst_y_per_pte_row_nom_c = dst_y_per_pte_row_nom_c * dml_pow(2, 2);
+ dlg_regs->dst_y_per_meta_row_nom_l = dst_y_per_meta_row_nom_l * dml_pow(2, 2);
+ dlg_regs->dst_y_per_meta_row_nom_c = dst_y_per_meta_row_nom_c * dml_pow(2, 2);
+ dlg_regs->refcyc_per_pte_group_nom_l = refcyc_per_pte_group_nom_l;
+ dlg_regs->refcyc_per_pte_group_nom_c = refcyc_per_pte_group_nom_c;
+ dlg_regs->refcyc_per_pte_group_vblank_l = refcyc_per_pte_group_vblank_l;
+ dlg_regs->refcyc_per_pte_group_vblank_c = refcyc_per_pte_group_vblank_c;
+ dlg_regs->refcyc_per_pte_group_flip_l = refcyc_per_pte_group_flip_l;
+ dlg_regs->refcyc_per_pte_group_flip_c = refcyc_per_pte_group_flip_c;
+ dlg_regs->refcyc_per_meta_chunk_nom_l = refcyc_per_meta_chunk_nom_l;
+ dlg_regs->refcyc_per_meta_chunk_nom_c = refcyc_per_meta_chunk_nom_c;
+ dlg_regs->refcyc_per_meta_chunk_vblank_l = refcyc_per_meta_chunk_vblank_l;
+ dlg_regs->refcyc_per_meta_chunk_vblank_c = refcyc_per_meta_chunk_vblank_c;
+ dlg_regs->refcyc_per_meta_chunk_flip_l = refcyc_per_meta_chunk_flip_l;
+ dlg_regs->refcyc_per_meta_chunk_flip_c = refcyc_per_meta_chunk_flip_c;
+ dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_l, 1);
+ dlg_regs->refcyc_per_line_delivery_l = (unsigned int) dml_floor(refcyc_per_line_delivery_l, 1);
+ dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_c, 1);
+ dlg_regs->refcyc_per_line_delivery_c = (unsigned int) dml_floor(refcyc_per_line_delivery_c, 1);
+
+ dlg_regs->chunk_hdl_adjust_cur0 = 3;
+ dlg_regs->dst_y_offset_cur0 = 0;
+ dlg_regs->chunk_hdl_adjust_cur1 = 3;
+ dlg_regs->dst_y_offset_cur1 = 0;
+
+ dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off
+
+ ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int) (refcyc_per_req_delivery_pre_l * dml_pow(2, 10));
+ ttu_regs->refcyc_per_req_delivery_l = (unsigned int) (refcyc_per_req_delivery_l * dml_pow(2, 10));
+ ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int) (refcyc_per_req_delivery_pre_c * dml_pow(2, 10));
+ ttu_regs->refcyc_per_req_delivery_c = (unsigned int) (refcyc_per_req_delivery_c * dml_pow(2, 10));
+ ttu_regs->refcyc_per_req_delivery_pre_cur0 =
+ (unsigned int) (refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10));
+ ttu_regs->refcyc_per_req_delivery_cur0 = (unsigned int) (refcyc_per_req_delivery_cur0 * dml_pow(2, 10));
+ ttu_regs->refcyc_per_req_delivery_pre_cur1 = 0;
+ ttu_regs->refcyc_per_req_delivery_cur1 = 0;
+ ttu_regs->qos_level_low_wm = 0;
+
+ ttu_regs->qos_level_high_wm = (unsigned int) (4.0 * (double) htotal * ref_freq_to_pix_freq);
+
+ ttu_regs->qos_level_flip = 14;
+ ttu_regs->qos_level_fixed_l = 8;
+ ttu_regs->qos_level_fixed_c = 8;
+ ttu_regs->qos_level_fixed_cur0 = 8;
+ ttu_regs->qos_ramp_disable_l = 0;
+ ttu_regs->qos_ramp_disable_c = 0;
+ ttu_regs->qos_ramp_disable_cur0 = 0;
+ ttu_regs->min_ttu_vblank = min_ttu_vblank * refclk_freq_in_mhz;
+
+ // CHECK for HW registers' range, assert or clamp
+ ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13));
+ ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13));
+ ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13));
+ ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13));
+ if (dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int) dml_pow(2, 23))
+ dlg_regs->refcyc_per_vm_group_vblank = dml_pow(2, 23) - 1;
+
+ if (dlg_regs->refcyc_per_vm_group_flip >= (unsigned int) dml_pow(2, 23))
+ dlg_regs->refcyc_per_vm_group_flip = dml_pow(2, 23) - 1;
+
+ if (dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int) dml_pow(2, 23))
+ dlg_regs->refcyc_per_vm_req_vblank = dml_pow(2, 23) - 1;
+
+ if (dlg_regs->refcyc_per_vm_req_flip >= (unsigned int) dml_pow(2, 23))
+ dlg_regs->refcyc_per_vm_req_flip = dml_pow(2, 23) - 1;
+
+ ASSERT(dlg_regs->dst_y_after_scaler < (unsigned int) 8);
+ ASSERT(dlg_regs->refcyc_x_after_scaler < (unsigned int)dml_pow(2, 13));
+ ASSERT(dlg_regs->dst_y_per_pte_row_nom_l < (unsigned int)dml_pow(2, 17));
+ if (dual_plane) {
+ if (dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int) dml_pow(2, 17)) {
+ // FIXME what so special about chroma, can we just assert?
+ dml_print("DML_DLG: %s: Warning dst_y_per_pte_row_nom_c %u > register max U15.2 %u\n",
+ __func__, dlg_regs->dst_y_per_pte_row_nom_c, (unsigned int)dml_pow(2, 17) - 1);
+ }
+ }
+ ASSERT(dlg_regs->dst_y_per_meta_row_nom_l < (unsigned int)dml_pow(2, 17));
+ ASSERT(dlg_regs->dst_y_per_meta_row_nom_c < (unsigned int)dml_pow(2, 17));
+
+ if (dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int) dml_pow(2, 23))
+ dlg_regs->refcyc_per_pte_group_nom_l = dml_pow(2, 23) - 1;
+ if (dual_plane) {
+ if (dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int) dml_pow(2, 23))
+ dlg_regs->refcyc_per_pte_group_nom_c = dml_pow(2, 23) - 1;
+ }
+ ASSERT(dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)dml_pow(2, 13));
+ if (dual_plane) {
+ ASSERT(dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)dml_pow(2, 13));
+ }
+
+ if (dlg_regs->refcyc_per_meta_chunk_nom_l >= (unsigned int) dml_pow(2, 23))
+ dlg_regs->refcyc_per_meta_chunk_nom_l = dml_pow(2, 23) - 1;
+ if (dual_plane) {
+ if (dlg_regs->refcyc_per_meta_chunk_nom_c >= (unsigned int) dml_pow(2, 23))
+ dlg_regs->refcyc_per_meta_chunk_nom_c = dml_pow(2, 23) - 1;
+ }
+ ASSERT(dlg_regs->refcyc_per_meta_chunk_vblank_l < (unsigned int)dml_pow(2, 13));
+ ASSERT(dlg_regs->refcyc_per_meta_chunk_vblank_c < (unsigned int)dml_pow(2, 13));
+ ASSERT(dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)dml_pow(2, 13));
+ ASSERT(dlg_regs->refcyc_per_line_delivery_l < (unsigned int)dml_pow(2, 13));
+ ASSERT(dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)dml_pow(2, 13));
+ ASSERT(dlg_regs->refcyc_per_line_delivery_c < (unsigned int)dml_pow(2, 13));
+ ASSERT(ttu_regs->qos_level_low_wm < dml_pow(2, 14));
+ ASSERT(ttu_regs->qos_level_high_wm < dml_pow(2, 14));
+ ASSERT(ttu_regs->min_ttu_vblank < dml_pow(2, 24));
+
+ print__ttu_regs_st(mode_lib, ttu_regs);
+ print__dlg_regs_st(mode_lib, dlg_regs);
+ dml_print("DML_DLG::%s: Calculation for pipe[%d] done, num_pipes=%d\n", __func__, pipe_idx, num_pipes);
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.h
new file mode 100644
index 000000000000..ebee365293cd
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DML32_DISPLAY_RQ_DLG_CALC_H__
+#define __DML32_DISPLAY_RQ_DLG_CALC_H__
+
+#include "../display_rq_dlg_helpers.h"
+
+struct display_mode_lib;
+
+/*
+* Function: dml_rq_dlg_get_rq_reg
+* Main entry point for test to get the register values out of this DML class.
+* This function calls <get_rq_param> and <extract_rq_regs> functions to calculate
+* and then populate the rq_regs struct
+* Input:
+* pipe_param - pipe source configuration (e.g. vp, pitch, scaling, dest, etc.)
+* Output:
+* rq_regs - struct that holds all the RQ registers field value.
+* See also: <display_rq_regs_st>
+*/
+void dml32_rq_dlg_get_rq_reg(display_rq_regs_st *rq_regs,
+ struct display_mode_lib *mode_lib,
+ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx);
+
+/*
+* Function: dml_rq_dlg_get_dlg_reg
+* Calculate and return DLG and TTU register struct given the system setting
+* Output:
+* dlg_regs - output DLG register struct
+* ttu_regs - output DLG TTU register struct
+* Input:
+* e2e_pipe_param - "compacted" array of e2e pipe param struct
+* num_pipes - num of active "pipe" or "route"
+* pipe_idx - index that identifies the e2e_pipe_param that corresponding to this dlg
+* cstate - 0: when calculate min_ttu_vblank it is assumed cstate is not required. 1: Normal mode, cstate is considered.
+* Added for legacy or unrealistic timing tests.
+*/
+void dml32_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
+ display_dlg_regs_st *dlg_regs,
+ display_ttu_regs_st *ttu_regs,
+ display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
new file mode 100644
index 000000000000..e0a1dc89ce43
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
@@ -0,0 +1,931 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "clk_mgr.h"
+#include "resource.h"
+#include "dcn321_fpu.h"
+#include "dcn32/dcn32_resource.h"
+#include "dcn321/dcn321_resource.h"
+#include "dml/dcn32/display_mode_vba_util_32.h"
+
+#define DCN3_2_DEFAULT_DET_SIZE 256
+
+struct _vcs_dpi_ip_params_st dcn3_21_ip = {
+ .gpuvm_enable = 0,
+ .gpuvm_max_page_table_levels = 4,
+ .hostvm_enable = 0,
+ .rob_buffer_size_kbytes = 128,
+ .det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE,
+ .config_return_buffer_size_in_kbytes = 1280,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .meta_fifo_size_in_kentries = 22,
+ .zero_size_buffer_entries = 512,
+ .compbuf_reserved_space_64b = 256,
+ .compbuf_reserved_space_zs = 64,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .alpha_pixel_chunk_size_kbytes = 4,
+ .min_pixel_chunk_size_bytes = 1024,
+ .dcc_meta_buffer_size_bytes = 6272,
+ .meta_chunk_size_kbytes = 2,
+ .min_meta_chunk_size_bytes = 256,
+ .writeback_chunk_size_kbytes = 8,
+ .ptoi_supported = false,
+ .num_dsc = 4,
+ .maximum_dsc_bits_per_component = 12,
+ .maximum_pixels_per_line_per_dsc_unit = 6016,
+ .dsc422_native_support = true,
+ .is_line_buffer_bpp_fixed = true,
+ .line_buffer_fixed_bpp = 57,
+ .line_buffer_size_bits = 1171920,
+ .max_line_buffer_lines = 32,
+ .writeback_interface_buffer_size_kbytes = 90,
+ .max_num_dpp = 4,
+ .max_num_otg = 4,
+ .max_num_hdmi_frl_outputs = 1,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dpte_buffer_size_in_pte_reqs_luma = 64,
+ .dpte_buffer_size_in_pte_reqs_chroma = 34,
+ .dispclk_ramp_margin_percent = 1,
+ .max_inter_dcn_tile_repeaters = 8,
+ .cursor_buffer_size = 16,
+ .cursor_chunk_size = 2,
+ .writeback_line_buffer_buffer_size = 0,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .dppclk_delay_subtotal = 47,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_cnvc_formatter = 28,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 125,
+ .dynamic_metadata_vm_enabled = false,
+ .odm_combine_4to1_supported = false,
+ .dcc_supported = true,
+ .max_num_dp2p0_outputs = 2,
+ .max_num_dp2p0_streams = 4,
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = {
+ .clock_limits = {
+ {
+ .state = 0,
+ .dcfclk_mhz = 1434.0,
+ .fabricclk_mhz = 2250.0,
+ .dispclk_mhz = 1720.0,
+ .dppclk_mhz = 1720.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .phyclk_d32_mhz = 313.0,
+ .socclk_mhz = 1200.0,
+ .dscclk_mhz = 573.333,
+ .dram_speed_mts = 16000.0,
+ .dtbclk_mhz = 1564.0,
+ },
+ },
+ .num_states = 1,
+ .sr_exit_time_us = 19.95,
+ .sr_enter_plus_exit_time_us = 24.36,
+ .sr_exit_z8_time_us = 285.0,
+ .sr_enter_plus_exit_z8_time_us = 320,
+ .writeback_latency_us = 12.0,
+ .round_trip_ping_latency_dcfclk_cycles = 207,
+ .urgent_latency_pixel_data_only_us = 4,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4,
+ .urgent_latency_vm_data_only_us = 4,
+ .fclk_change_latency_us = 7,
+ .usr_retraining_latency_us = 0,
+ .smn_latency_us = 0,
+ .mall_allocated_for_dcn_mbytes = 32,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_sdp_bw_after_urgent = 90.0,
+ .pct_ideal_fabric_bw_after_urgent = 67.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
+ .pct_ideal_dram_bw_after_urgent_strobe = 67.0,
+ .max_avg_sdp_bw_use_normal_percent = 80.0,
+ .max_avg_fabric_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_strobe_percent = 50.0,
+ .max_avg_dram_bw_use_normal_percent = 15.0,
+ .num_chans = 8,
+ .dram_channel_width_bytes = 2,
+ .fabric_datapath_to_dcn_data_return_bytes = 64,
+ .return_bus_width_bytes = 64,
+ .downspread_percent = 0.38,
+ .dcn_downspread_percent = 0.5,
+ .dram_clock_change_latency_us = 400,
+ .dispclk_dppclk_vco_speed_mhz = 4300.0,
+ .do_urgent_latency_adjustment = true,
+ .urgent_latency_adjustment_fabric_clock_component_us = 1.0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000,
+};
+
+static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry)
+{
+ if (entry->dcfclk_mhz > 0) {
+ float bw_on_sdp = entry->dcfclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100);
+
+ entry->fabricclk_mhz = bw_on_sdp / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100));
+ entry->dram_speed_mts = bw_on_sdp / (dcn3_21_soc.num_chans *
+ dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100));
+ } else if (entry->fabricclk_mhz > 0) {
+ float bw_on_fabric = entry->fabricclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100);
+
+ entry->dcfclk_mhz = bw_on_fabric / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100));
+ entry->dram_speed_mts = bw_on_fabric / (dcn3_21_soc.num_chans *
+ dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100));
+ } else if (entry->dram_speed_mts > 0) {
+ float bw_on_dram = entry->dram_speed_mts * dcn3_21_soc.num_chans *
+ dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100);
+
+ entry->fabricclk_mhz = bw_on_dram / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100));
+ entry->dcfclk_mhz = bw_on_dram / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100));
+ }
+}
+
+static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st *entry)
+{
+ float memory_bw_kbytes_sec;
+ float fabric_bw_kbytes_sec;
+ float sdp_bw_kbytes_sec;
+ float limiting_bw_kbytes_sec;
+
+ memory_bw_kbytes_sec = entry->dram_speed_mts * dcn3_21_soc.num_chans *
+ dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100);
+
+ fabric_bw_kbytes_sec = entry->fabricclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100);
+
+ sdp_bw_kbytes_sec = entry->dcfclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100);
+
+ limiting_bw_kbytes_sec = memory_bw_kbytes_sec;
+
+ if (fabric_bw_kbytes_sec < limiting_bw_kbytes_sec)
+ limiting_bw_kbytes_sec = fabric_bw_kbytes_sec;
+
+ if (sdp_bw_kbytes_sec < limiting_bw_kbytes_sec)
+ limiting_bw_kbytes_sec = sdp_bw_kbytes_sec;
+
+ return limiting_bw_kbytes_sec;
+}
+
+static void dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table,
+ unsigned int *num_entries,
+ struct _vcs_dpi_voltage_scaling_st *entry)
+{
+ int i = 0;
+ int index = 0;
+
+ dc_assert_fp_enabled();
+
+ if (*num_entries == 0) {
+ table[0] = *entry;
+ (*num_entries)++;
+ } else {
+ while (entry->net_bw_in_kbytes_sec > table[index].net_bw_in_kbytes_sec) {
+ index++;
+ if (index >= *num_entries)
+ break;
+ }
+
+ for (i = *num_entries; i > index; i--)
+ table[i] = table[i - 1];
+
+ table[index] = *entry;
+ (*num_entries)++;
+ }
+}
+
+static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries,
+ unsigned int index)
+{
+ int i;
+
+ if (*num_entries == 0)
+ return;
+
+ for (i = index; i < *num_entries - 1; i++) {
+ table[i] = table[i + 1];
+ }
+ memset(&table[--(*num_entries)], 0, sizeof(struct _vcs_dpi_voltage_scaling_st));
+}
+
+static void swap_table_entries(struct _vcs_dpi_voltage_scaling_st *first_entry,
+ struct _vcs_dpi_voltage_scaling_st *second_entry)
+{
+ struct _vcs_dpi_voltage_scaling_st temp_entry = *first_entry;
+ *first_entry = *second_entry;
+ *second_entry = temp_entry;
+}
+
+/*
+ * sort_entries_with_same_bw - Sort entries sharing the same bandwidth by DCFCLK
+ */
+static void sort_entries_with_same_bw(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries)
+{
+ unsigned int start_index = 0;
+ unsigned int end_index = 0;
+ unsigned int current_bw = 0;
+
+ for (int i = 0; i < (*num_entries - 1); i++) {
+ if (table[i].net_bw_in_kbytes_sec == table[i+1].net_bw_in_kbytes_sec) {
+ current_bw = table[i].net_bw_in_kbytes_sec;
+ start_index = i;
+ end_index = ++i;
+
+ while ((i < (*num_entries - 1)) && (table[i+1].net_bw_in_kbytes_sec == current_bw))
+ end_index = ++i;
+ }
+
+ if (start_index != end_index) {
+ for (int j = start_index; j < end_index; j++) {
+ for (int k = start_index; k < end_index; k++) {
+ if (table[k].dcfclk_mhz > table[k+1].dcfclk_mhz)
+ swap_table_entries(&table[k], &table[k+1]);
+ }
+ }
+ }
+
+ start_index = 0;
+ end_index = 0;
+
+ }
+}
+
+/*
+ * remove_inconsistent_entries - Ensure entries with the same bandwidth have MEMCLK and FCLK monotonically increasing
+ * and remove entries that do not follow this order
+ */
+static void remove_inconsistent_entries(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries)
+{
+ for (int i = 0; i < (*num_entries - 1); i++) {
+ if (table[i].net_bw_in_kbytes_sec == table[i+1].net_bw_in_kbytes_sec) {
+ if ((table[i].dram_speed_mts > table[i+1].dram_speed_mts) ||
+ (table[i].fabricclk_mhz > table[i+1].fabricclk_mhz))
+ remove_entry_from_table_at_index(table, num_entries, i);
+ }
+ }
+}
+
+/*
+ * override_max_clk_values - Overwrite the max clock frequencies with the max DC mode timings
+ * Input:
+ * max_clk_limit - struct containing the desired clock timings
+ * Output:
+ * curr_clk_limit - struct containing the timings that need to be overwritten
+ * Return: 0 upon success, non-zero for failure
+ */
+static int override_max_clk_values(struct clk_limit_table_entry *max_clk_limit,
+ struct clk_limit_table_entry *curr_clk_limit)
+{
+ if (NULL == max_clk_limit || NULL == curr_clk_limit)
+ return -1; //invalid parameters
+
+ //only overwrite if desired max clock frequency is initialized
+ if (max_clk_limit->dcfclk_mhz != 0)
+ curr_clk_limit->dcfclk_mhz = max_clk_limit->dcfclk_mhz;
+
+ if (max_clk_limit->fclk_mhz != 0)
+ curr_clk_limit->fclk_mhz = max_clk_limit->fclk_mhz;
+
+ if (max_clk_limit->memclk_mhz != 0)
+ curr_clk_limit->memclk_mhz = max_clk_limit->memclk_mhz;
+
+ if (max_clk_limit->socclk_mhz != 0)
+ curr_clk_limit->socclk_mhz = max_clk_limit->socclk_mhz;
+
+ if (max_clk_limit->dtbclk_mhz != 0)
+ curr_clk_limit->dtbclk_mhz = max_clk_limit->dtbclk_mhz;
+
+ if (max_clk_limit->dispclk_mhz != 0)
+ curr_clk_limit->dispclk_mhz = max_clk_limit->dispclk_mhz;
+
+ return 0;
+}
+
+static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk_bw_params *bw_params,
+ struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries)
+{
+ int i, j;
+ struct _vcs_dpi_voltage_scaling_st entry = {0};
+ struct clk_limit_table_entry max_clk_data = {0};
+
+ unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299;
+
+ static const unsigned int num_dcfclk_stas = 5;
+ unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564};
+
+ unsigned int num_uclk_dpms = 0;
+ unsigned int num_fclk_dpms = 0;
+ unsigned int num_dcfclk_dpms = 0;
+
+ unsigned int num_dc_uclk_dpms = 0;
+ unsigned int num_dc_fclk_dpms = 0;
+ unsigned int num_dc_dcfclk_dpms = 0;
+
+ for (i = 0; i < MAX_NUM_DPM_LVL; i++) {
+ if (bw_params->clk_table.entries[i].dcfclk_mhz > max_clk_data.dcfclk_mhz)
+ max_clk_data.dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
+ if (bw_params->clk_table.entries[i].fclk_mhz > max_clk_data.fclk_mhz)
+ max_clk_data.fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz;
+ if (bw_params->clk_table.entries[i].memclk_mhz > max_clk_data.memclk_mhz)
+ max_clk_data.memclk_mhz = bw_params->clk_table.entries[i].memclk_mhz;
+ if (bw_params->clk_table.entries[i].dispclk_mhz > max_clk_data.dispclk_mhz)
+ max_clk_data.dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
+ if (bw_params->clk_table.entries[i].dppclk_mhz > max_clk_data.dppclk_mhz)
+ max_clk_data.dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
+ if (bw_params->clk_table.entries[i].phyclk_mhz > max_clk_data.phyclk_mhz)
+ max_clk_data.phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
+ if (bw_params->clk_table.entries[i].dtbclk_mhz > max_clk_data.dtbclk_mhz)
+ max_clk_data.dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+
+ if (bw_params->clk_table.entries[i].memclk_mhz > 0) {
+ num_uclk_dpms++;
+ if (bw_params->clk_table.entries[i].memclk_mhz <= bw_params->dc_mode_limit.memclk_mhz)
+ num_dc_uclk_dpms++;
+ }
+ if (bw_params->clk_table.entries[i].fclk_mhz > 0) {
+ num_fclk_dpms++;
+ if (bw_params->clk_table.entries[i].fclk_mhz <= bw_params->dc_mode_limit.fclk_mhz)
+ num_dc_fclk_dpms++;
+ }
+ if (bw_params->clk_table.entries[i].dcfclk_mhz > 0) {
+ num_dcfclk_dpms++;
+ if (bw_params->clk_table.entries[i].dcfclk_mhz <= bw_params->dc_mode_limit.dcfclk_mhz)
+ num_dc_dcfclk_dpms++;
+ }
+ }
+
+ if (!disable_dc_mode_overwrite) {
+ //Overwrite max frequencies with max DC mode frequencies for DC mode systems
+ override_max_clk_values(&bw_params->dc_mode_limit, &max_clk_data);
+ num_uclk_dpms = num_dc_uclk_dpms;
+ num_fclk_dpms = num_dc_fclk_dpms;
+ num_dcfclk_dpms = num_dc_dcfclk_dpms;
+ bw_params->clk_table.num_entries_per_clk.num_memclk_levels = num_uclk_dpms;
+ bw_params->clk_table.num_entries_per_clk.num_fclk_levels = num_fclk_dpms;
+ }
+
+ if (num_dcfclk_dpms > 0 && bw_params->clk_table.entries[0].fclk_mhz > min_fclk_mhz)
+ min_fclk_mhz = bw_params->clk_table.entries[0].fclk_mhz;
+
+ if (!max_clk_data.dcfclk_mhz || !max_clk_data.dispclk_mhz || !max_clk_data.dtbclk_mhz)
+ return -1;
+
+ if (max_clk_data.dppclk_mhz == 0)
+ max_clk_data.dppclk_mhz = max_clk_data.dispclk_mhz;
+
+ if (max_clk_data.fclk_mhz == 0)
+ max_clk_data.fclk_mhz = max_clk_data.dcfclk_mhz *
+ dcn3_21_soc.pct_ideal_sdp_bw_after_urgent /
+ dcn3_21_soc.pct_ideal_fabric_bw_after_urgent;
+
+ if (max_clk_data.phyclk_mhz == 0)
+ max_clk_data.phyclk_mhz = dcn3_21_soc.clock_limits[0].phyclk_mhz;
+
+ *num_entries = 0;
+ entry.dispclk_mhz = max_clk_data.dispclk_mhz;
+ entry.dscclk_mhz = max_clk_data.dispclk_mhz / 3;
+ entry.dppclk_mhz = max_clk_data.dppclk_mhz;
+ entry.dtbclk_mhz = max_clk_data.dtbclk_mhz;
+ entry.phyclk_mhz = max_clk_data.phyclk_mhz;
+ entry.phyclk_d18_mhz = dcn3_21_soc.clock_limits[0].phyclk_d18_mhz;
+ entry.phyclk_d32_mhz = dcn3_21_soc.clock_limits[0].phyclk_d32_mhz;
+
+ // Insert all the DCFCLK STAs
+ for (i = 0; i < num_dcfclk_stas; i++) {
+ entry.dcfclk_mhz = dcfclk_sta_targets[i];
+ entry.fabricclk_mhz = 0;
+ entry.dram_speed_mts = 0;
+
+ get_optimal_ntuple(&entry);
+ entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry);
+ dcn321_insert_entry_into_table_sorted(table, num_entries, &entry);
+ }
+
+ // Insert the max DCFCLK
+ entry.dcfclk_mhz = max_clk_data.dcfclk_mhz;
+ entry.fabricclk_mhz = 0;
+ entry.dram_speed_mts = 0;
+
+ get_optimal_ntuple(&entry);
+ entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry);
+ dcn321_insert_entry_into_table_sorted(table, num_entries, &entry);
+
+ // Insert the UCLK DPMS
+ for (i = 0; i < num_uclk_dpms; i++) {
+ entry.dcfclk_mhz = 0;
+ entry.fabricclk_mhz = 0;
+ entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16;
+
+ get_optimal_ntuple(&entry);
+ entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry);
+ dcn321_insert_entry_into_table_sorted(table, num_entries, &entry);
+ }
+
+ // If FCLK is coarse grained, insert individual DPMs.
+ if (num_fclk_dpms > 2) {
+ for (i = 0; i < num_fclk_dpms; i++) {
+ entry.dcfclk_mhz = 0;
+ entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz;
+ entry.dram_speed_mts = 0;
+
+ get_optimal_ntuple(&entry);
+ entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry);
+ dcn321_insert_entry_into_table_sorted(table, num_entries, &entry);
+ }
+ }
+ // If FCLK fine grained, only insert max
+ else {
+ entry.dcfclk_mhz = 0;
+ entry.fabricclk_mhz = max_clk_data.fclk_mhz;
+ entry.dram_speed_mts = 0;
+
+ get_optimal_ntuple(&entry);
+ entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry);
+ dcn321_insert_entry_into_table_sorted(table, num_entries, &entry);
+ }
+
+ // At this point, the table contains all "points of interest" based on
+ // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock
+ // ratios (by derate, are exact).
+
+ // Remove states that require higher clocks than are supported
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ if (table[i].dcfclk_mhz > max_clk_data.dcfclk_mhz ||
+ table[i].fabricclk_mhz > max_clk_data.fclk_mhz ||
+ table[i].dram_speed_mts > max_clk_data.memclk_mhz * 16)
+ remove_entry_from_table_at_index(table, num_entries, i);
+ }
+
+ // Insert entry with all max dc limits without bandwitch matching
+ if (!disable_dc_mode_overwrite) {
+ struct _vcs_dpi_voltage_scaling_st max_dc_limits_entry = entry;
+
+ max_dc_limits_entry.dcfclk_mhz = max_clk_data.dcfclk_mhz;
+ max_dc_limits_entry.fabricclk_mhz = max_clk_data.fclk_mhz;
+ max_dc_limits_entry.dram_speed_mts = max_clk_data.memclk_mhz * 16;
+
+ max_dc_limits_entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&max_dc_limits_entry);
+ dcn321_insert_entry_into_table_sorted(table, num_entries, &max_dc_limits_entry);
+
+ sort_entries_with_same_bw(table, num_entries);
+ remove_inconsistent_entries(table, num_entries);
+ }
+
+
+
+ // At this point, the table only contains supported points of interest
+ // it could be used as is, but some states may be redundant due to
+ // coarse grained nature of some clocks, so we want to round up to
+ // coarse grained DPMs and remove duplicates.
+
+ // Round up UCLKs
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ for (j = 0; j < num_uclk_dpms; j++) {
+ if (bw_params->clk_table.entries[j].memclk_mhz * 16 >= table[i].dram_speed_mts) {
+ table[i].dram_speed_mts = bw_params->clk_table.entries[j].memclk_mhz * 16;
+ break;
+ }
+ }
+ }
+
+ // If FCLK is coarse grained, round up to next DPMs
+ if (num_fclk_dpms > 2) {
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ for (j = 0; j < num_fclk_dpms; j++) {
+ if (bw_params->clk_table.entries[j].fclk_mhz >= table[i].fabricclk_mhz) {
+ table[i].fabricclk_mhz = bw_params->clk_table.entries[j].fclk_mhz;
+ break;
+ }
+ }
+ }
+ }
+ // Otherwise, round up to minimum.
+ else {
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ if (table[i].fabricclk_mhz < min_fclk_mhz) {
+ table[i].fabricclk_mhz = min_fclk_mhz;
+ }
+ }
+ }
+
+ // Round DCFCLKs up to minimum
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ if (table[i].dcfclk_mhz < min_dcfclk_mhz) {
+ table[i].dcfclk_mhz = min_dcfclk_mhz;
+ }
+ }
+
+ // Remove duplicate states, note duplicate states are always neighbouring since table is sorted.
+ i = 0;
+ while (i < *num_entries - 1) {
+ if (table[i].dcfclk_mhz == table[i + 1].dcfclk_mhz &&
+ table[i].fabricclk_mhz == table[i + 1].fabricclk_mhz &&
+ table[i].dram_speed_mts == table[i + 1].dram_speed_mts)
+ remove_entry_from_table_at_index(table, num_entries, i + 1);
+ else
+ i++;
+ }
+
+ // Fix up the state indicies
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ table[i].state = i;
+ }
+
+ return 0;
+}
+
+static void dcn321_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
+ unsigned int *optimal_dcfclk,
+ unsigned int *optimal_fclk)
+{
+ double bw_from_dram, bw_from_dram1, bw_from_dram2;
+
+ bw_from_dram1 = uclk_mts * dcn3_21_soc.num_chans *
+ dcn3_21_soc.dram_channel_width_bytes * (dcn3_21_soc.max_avg_dram_bw_use_normal_percent / 100);
+ bw_from_dram2 = uclk_mts * dcn3_21_soc.num_chans *
+ dcn3_21_soc.dram_channel_width_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100);
+
+ bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2;
+
+ if (optimal_fclk)
+ *optimal_fclk = bw_from_dram /
+ (dcn3_21_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100));
+
+ if (optimal_dcfclk)
+ *optimal_dcfclk = bw_from_dram /
+ (dcn3_21_soc.return_bus_width_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100));
+}
+
+/** dcn321_update_bw_bounding_box
+ * This would override some dcn3_2 ip_or_soc initial parameters hardcoded from spreadsheet
+ * with actual values as per dGPU SKU:
+ * -with passed few options from dc->config
+ * -with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might need to get it from PM FW)
+ * -with passed latency values (passed in ns units) in dc-> bb override for debugging purposes
+ * -with passed latencies from VBIOS (in 100_ns units) if available for certain dGPU SKU
+ * -with number of DRAM channels from VBIOS (which differ for certain dGPU SKU of the same ASIC)
+ * -clocks levels with passed clk_table entries from Clk Mgr as reported by PM FW for different
+ * clocks (which might differ for certain dGPU SKU of the same ASIC)
+ */
+void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params)
+{
+ dc_assert_fp_enabled();
+ /* Overrides from dc->config options */
+ dcn3_21_ip.clamp_min_dcfclk = dc->config.clamp_min_dcfclk;
+
+ /* Override from passed dc->bb_overrides if available*/
+ if ((int)(dcn3_21_soc.sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns
+ && dc->bb_overrides.sr_exit_time_ns) {
+ dc->dml2_options.bbox_overrides.sr_exit_latency_us =
+ dcn3_21_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0;
+ }
+
+ if ((int)(dcn3_21_soc.sr_enter_plus_exit_time_us * 1000)
+ != dc->bb_overrides.sr_enter_plus_exit_time_ns
+ && dc->bb_overrides.sr_enter_plus_exit_time_ns) {
+ dc->dml2_options.bbox_overrides.sr_enter_plus_exit_latency_us =
+ dcn3_21_soc.sr_enter_plus_exit_time_us =
+ dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0;
+ }
+
+ if ((int)(dcn3_21_soc.urgent_latency_us * 1000) != dc->bb_overrides.urgent_latency_ns
+ && dc->bb_overrides.urgent_latency_ns) {
+ dcn3_21_soc.urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0;
+ dc->dml2_options.bbox_overrides.urgent_latency_us =
+ dcn3_21_soc.urgent_latency_pixel_data_only_us = dc->bb_overrides.urgent_latency_ns / 1000.0;
+ }
+
+ if ((int)(dcn3_21_soc.dram_clock_change_latency_us * 1000)
+ != dc->bb_overrides.dram_clock_change_latency_ns
+ && dc->bb_overrides.dram_clock_change_latency_ns) {
+ dc->dml2_options.bbox_overrides.dram_clock_change_latency_us =
+ dcn3_21_soc.dram_clock_change_latency_us =
+ dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
+ }
+
+ if ((int)(dcn3_21_soc.fclk_change_latency_us * 1000)
+ != dc->bb_overrides.fclk_clock_change_latency_ns
+ && dc->bb_overrides.fclk_clock_change_latency_ns) {
+ dc->dml2_options.bbox_overrides.fclk_change_latency_us =
+ dcn3_21_soc.fclk_change_latency_us =
+ dc->bb_overrides.fclk_clock_change_latency_ns / 1000;
+ }
+
+ if ((int)(dcn3_21_soc.dummy_pstate_latency_us * 1000)
+ != dc->bb_overrides.dummy_clock_change_latency_ns
+ && dc->bb_overrides.dummy_clock_change_latency_ns) {
+ dcn3_21_soc.dummy_pstate_latency_us =
+ dc->bb_overrides.dummy_clock_change_latency_ns / 1000.0;
+ }
+
+ /* Override from VBIOS if VBIOS bb_info available */
+ if (dc->ctx->dc_bios->funcs->get_soc_bb_info) {
+ struct bp_soc_bb_info bb_info = {0};
+
+ if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) {
+ if (bb_info.dram_clock_change_latency_100ns > 0)
+ dc->dml2_options.bbox_overrides.dram_clock_change_latency_us =
+ dcn3_21_soc.dram_clock_change_latency_us =
+ bb_info.dram_clock_change_latency_100ns * 10;
+
+ if (bb_info.dram_sr_enter_exit_latency_100ns > 0)
+ dc->dml2_options.bbox_overrides.sr_enter_plus_exit_latency_us =
+ dcn3_21_soc.sr_enter_plus_exit_time_us =
+ bb_info.dram_sr_enter_exit_latency_100ns * 10;
+
+ if (bb_info.dram_sr_exit_latency_100ns > 0)
+ dc->dml2_options.bbox_overrides.sr_exit_latency_us =
+ dcn3_21_soc.sr_exit_time_us =
+ bb_info.dram_sr_exit_latency_100ns * 10;
+ }
+ }
+
+ /* Override from VBIOS for num_chan */
+ if (dc->ctx->dc_bios->vram_info.num_chans) {
+ dc->dml2_options.bbox_overrides.dram_num_chan =
+ dcn3_21_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans;
+ dcn3_21_soc.mall_allocated_for_dcn_mbytes = (double)(dcn32_calc_num_avail_chans_for_mall(dc,
+ dc->ctx->dc_bios->vram_info.num_chans) * dc->caps.mall_size_per_mem_channel);
+ }
+
+ if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes)
+ dc->dml2_options.bbox_overrides.dram_chanel_width_bytes =
+ dcn3_21_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes;
+
+ /* DML DSC delay factor workaround */
+ dcn3_21_ip.dsc_delay_factor_wa = dc->debug.dsc_delay_factor_wa_x1000 / 1000.0;
+
+ dcn3_21_ip.min_prefetch_in_strobe_us = dc->debug.min_prefetch_in_strobe_ns / 1000.0;
+
+ /* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */
+ dcn3_21_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+ dc->dml2_options.bbox_overrides.disp_pll_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+ dc->dml2_options.bbox_overrides.xtalclk_mhz = dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency / 1000.0;
+ dc->dml2_options.bbox_overrides.dchub_refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0;
+ dc->dml2_options.bbox_overrides.dprefclk_mhz = dc->clk_mgr->dprefclk_khz / 1000.0;
+
+ /* Overrides Clock levelsfrom CLK Mgr table entries as reported by PM FW */
+ if (dc->debug.use_legacy_soc_bb_mechanism) {
+ unsigned int i = 0, j = 0, num_states = 0;
+
+ unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0};
+ unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0};
+ unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0};
+ unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0};
+
+ unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {615, 906, 1324, 1564};
+ unsigned int num_dcfclk_sta_targets = 4, num_uclk_states = 0;
+ unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0;
+
+ for (i = 0; i < MAX_NUM_DPM_LVL; i++) {
+ if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz)
+ max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
+ if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
+ if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
+ if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz)
+ max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
+ }
+ if (!max_dcfclk_mhz)
+ max_dcfclk_mhz = dcn3_21_soc.clock_limits[0].dcfclk_mhz;
+ if (!max_dispclk_mhz)
+ max_dispclk_mhz = dcn3_21_soc.clock_limits[0].dispclk_mhz;
+ if (!max_dppclk_mhz)
+ max_dppclk_mhz = dcn3_21_soc.clock_limits[0].dppclk_mhz;
+ if (!max_phyclk_mhz)
+ max_phyclk_mhz = dcn3_21_soc.clock_limits[0].phyclk_mhz;
+
+ if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
+ // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array
+ dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz;
+ num_dcfclk_sta_targets++;
+ } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
+ // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates
+ for (i = 0; i < num_dcfclk_sta_targets; i++) {
+ if (dcfclk_sta_targets[i] > max_dcfclk_mhz) {
+ dcfclk_sta_targets[i] = max_dcfclk_mhz;
+ break;
+ }
+ }
+ // Update size of array since we "removed" duplicates
+ num_dcfclk_sta_targets = i + 1;
+ }
+
+ num_uclk_states = bw_params->clk_table.num_entries;
+
+ // Calculate optimal dcfclk for each uclk
+ for (i = 0; i < num_uclk_states; i++) {
+ dcn321_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16,
+ &optimal_dcfclk_for_uclk[i], NULL);
+ if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) {
+ optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz;
+ }
+ }
+
+ // Calculate optimal uclk for each dcfclk sta target
+ for (i = 0; i < num_dcfclk_sta_targets; i++) {
+ for (j = 0; j < num_uclk_states; j++) {
+ if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) {
+ optimal_uclk_for_dcfclk_sta_targets[i] =
+ bw_params->clk_table.entries[j].memclk_mhz * 16;
+ break;
+ }
+ }
+ }
+
+ i = 0;
+ j = 0;
+ // create the final dcfclk and uclk table
+ while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) {
+ if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) {
+ dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
+ dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
+ } else {
+ if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
+ dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
+ dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
+ } else {
+ j = num_uclk_states;
+ }
+ }
+ }
+
+ while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) {
+ dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
+ dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
+ }
+
+ while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES &&
+ optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
+ dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
+ dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
+ }
+
+ /* bw_params->clk_table.entries[MAX_NUM_DPM_LVL].
+ * MAX_NUM_DPM_LVL is 8.
+ * dcn3_02_soc.clock_limits[DC__VOLTAGE_STATES].
+ * DC__VOLTAGE_STATES is 40.
+ */
+ if (num_states > MAX_NUM_DPM_LVL) {
+ ASSERT(0);
+ return;
+ }
+
+ dcn3_21_soc.num_states = num_states;
+ for (i = 0; i < dcn3_21_soc.num_states; i++) {
+ dcn3_21_soc.clock_limits[i].state = i;
+ dcn3_21_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i];
+ dcn3_21_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i];
+
+ /* Fill all states with max values of all these clocks */
+ dcn3_21_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz;
+ dcn3_21_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz;
+ dcn3_21_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz;
+ dcn3_21_soc.clock_limits[i].dscclk_mhz = max_dispclk_mhz / 3;
+
+ /* Populate from bw_params for DTBCLK, SOCCLK */
+ if (i > 0) {
+ if (!bw_params->clk_table.entries[i].dtbclk_mhz) {
+ dcn3_21_soc.clock_limits[i].dtbclk_mhz = dcn3_21_soc.clock_limits[i-1].dtbclk_mhz;
+ } else {
+ dcn3_21_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+ }
+ } else if (bw_params->clk_table.entries[i].dtbclk_mhz) {
+ dcn3_21_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+ }
+
+ if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0)
+ dcn3_21_soc.clock_limits[i].socclk_mhz = dcn3_21_soc.clock_limits[i-1].socclk_mhz;
+ else
+ dcn3_21_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz;
+
+ if (!dram_speed_mts[i] && i > 0)
+ dcn3_21_soc.clock_limits[i].dram_speed_mts = dcn3_21_soc.clock_limits[i-1].dram_speed_mts;
+ else
+ dcn3_21_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i];
+
+ /* These clocks cannot come from bw_params, always fill from dcn3_21_soc[0] */
+ /* PHYCLK_D18, PHYCLK_D32 */
+ dcn3_21_soc.clock_limits[i].phyclk_d18_mhz = dcn3_21_soc.clock_limits[0].phyclk_d18_mhz;
+ dcn3_21_soc.clock_limits[i].phyclk_d32_mhz = dcn3_21_soc.clock_limits[0].phyclk_d32_mhz;
+ }
+ } else {
+ build_synthetic_soc_states(dc->debug.disable_dc_mode_overwrite, bw_params,
+ dcn3_21_soc.clock_limits, &dcn3_21_soc.num_states);
+ }
+
+ /* Re-init DML with updated bb */
+ dml_init_instance(&dc->dml, &dcn3_21_soc, &dcn3_21_ip, DML_PROJECT_DCN32);
+ if (dc->current_state)
+ dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_21_soc, &dcn3_21_ip, DML_PROJECT_DCN32);
+
+ if (dc->clk_mgr->bw_params->clk_table.num_entries > 1) {
+ unsigned int i = 0;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_states = dc->clk_mgr->bw_params->clk_table.num_entries;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dcfclk_levels;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_fclk_levels;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_socclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_socclk_levels;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dtbclk_levels;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dispclk_levels;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dppclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dppclk_levels;
+
+
+ for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dcfclk_levels; i++) {
+ if (dc->clk_mgr->bw_params->clk_table.entries[i].dcfclk_mhz)
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].dcfclk_mhz;
+ }
+
+ for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_fclk_levels; i++) {
+ if (dc->clk_mgr->bw_params->clk_table.entries[i].fclk_mhz)
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].fclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].fclk_mhz;
+ }
+
+ for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels; i++) {
+ if (dc->clk_mgr->bw_params->clk_table.entries[i].memclk_mhz)
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].memclk_mhz;
+ }
+
+ for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_socclk_levels; i++) {
+ if (dc->clk_mgr->bw_params->clk_table.entries[i].socclk_mhz)
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].socclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].socclk_mhz;
+ }
+
+ for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dtbclk_levels; i++) {
+ if (dc->clk_mgr->bw_params->clk_table.entries[i].dtbclk_mhz)
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].dtbclk_mhz;
+ }
+
+ for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dispclk_levels; i++) {
+ if (dc->clk_mgr->bw_params->clk_table.entries[i].dispclk_mhz) {
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dispclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].dispclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dppclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].dispclk_mhz;
+ }
+ }
+ }
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h
new file mode 100644
index 000000000000..c6623b3705ca
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN32_FPU_H__
+#define __DCN32_FPU_H__
+
+#include "dml/display_mode_vba.h"
+
+void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
new file mode 100644
index 000000000000..817a370e80a7
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
@@ -0,0 +1,620 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#include "resource.h"
+#include "dcn35_fpu.h"
+#include "dcn31/dcn31_resource.h"
+#include "dcn32/dcn32_resource.h"
+#include "dcn35/dcn35_resource.h"
+#include "dml/dcn31/dcn31_fpu.h"
+#include "dml/dml_inline_defs.h"
+
+#include "link_service.h"
+
+#define DC_LOGGER_INIT(logger)
+
+struct _vcs_dpi_ip_params_st dcn3_5_ip = {
+ .VBlankNomDefaultUS = 668,
+ .gpuvm_enable = 1,
+ .gpuvm_max_page_table_levels = 1,
+ .hostvm_enable = 1,
+ .hostvm_max_page_table_levels = 2,
+ .rob_buffer_size_kbytes = 64,
+ .det_buffer_size_kbytes = 1536,
+ .config_return_buffer_size_in_kbytes = 1792,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .meta_fifo_size_in_kentries = 32,
+ .zero_size_buffer_entries = 512,
+ .compbuf_reserved_space_64b = 256,
+ .compbuf_reserved_space_zs = 64,
+ .dpp_output_buffer_pixels = 2560,/*not used*/
+ .opp_output_buffer_lines = 1,/*not used*/
+ .pixel_chunk_size_kbytes = 8,
+ //.alpha_pixel_chunk_size_kbytes = 4;/*new*/
+ //.min_pixel_chunk_size_bytes = 1024;/*new*/
+ .meta_chunk_size_kbytes = 2,
+ .min_meta_chunk_size_bytes = 256,
+ .writeback_chunk_size_kbytes = 8,
+ .ptoi_supported = false,
+ .num_dsc = 4,
+ .maximum_dsc_bits_per_component = 12,/*delta from 10*/
+ .dsc422_native_support = true,/*delta from false*/
+ .is_line_buffer_bpp_fixed = true,/*new*/
+ .line_buffer_fixed_bpp = 32,/*delta from 48*/
+ .line_buffer_size_bits = 986880,/*delta from 789504*/
+ .max_line_buffer_lines = 32,/*delta from 12*/
+ .writeback_interface_buffer_size_kbytes = 90,
+ .max_num_dpp = 4,
+ .max_num_otg = 4,
+ .max_num_hdmi_frl_outputs = 1,
+ .max_num_wb = 1,
+ /*.max_num_hdmi_frl_outputs = 1; new in dml2*/
+ /*.max_num_dp2p0_outputs = 2; new in dml2*/
+ /*.max_num_dp2p0_streams = 4; new in dml2*/
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dpte_buffer_size_in_pte_reqs_luma = 68,/*changed from 64,*/
+ .dpte_buffer_size_in_pte_reqs_chroma = 36,/*changed from 34*/
+ /*.dcc_meta_buffer_size_bytes = 6272; new to dml2*/
+ .dispclk_ramp_margin_percent = 1.11,/*delta from 1*/
+ /*.dppclk_delay_subtotal = 47;
+ .dppclk_delay_scl = 50;
+ .dppclk_delay_scl_lb_only = 16;
+ .dppclk_delay_cnvc_formatter = 28;
+ .dppclk_delay_cnvc_cursor = 6;
+ .dispclk_delay_subtotal = 125;*/ /*new to dml2*/
+ .max_inter_dcn_tile_repeaters = 8,
+ .cursor_buffer_size = 16,
+ .cursor_chunk_size = 2,
+ .writeback_line_buffer_buffer_size = 0,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .dppclk_delay_subtotal = 47, /* changed from 46,*/
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_cnvc_formatter = 28,/*changed from 27,*/
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 125, /*changed from 119,*/
+ .dynamic_metadata_vm_enabled = false,
+ .odm_combine_4to1_supported = false,
+ .dcc_supported = true,
+// .config_return_buffer_segment_size_in_kbytes = 64;/*required, hard coded in dml2_translate_ip_params*/
+
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
+ /*TODO: correct dispclk/dppclk voltage level determination*/
+ .clock_limits = {
+ {
+ .state = 0,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 600.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 186.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 1,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 209.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 2,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 209.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 3,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 371.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 4,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 400.0,
+ .dtbclk_mhz = 600.0,
+ },
+ },
+ .num_states = 5,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
+ .sr_exit_z8_time_us = 250.0,
+ .sr_enter_plus_exit_z8_time_us = 350.0,
+ .fclk_change_latency_us = 24.0,
+ .usr_retraining_latency_us = 2,
+ .writeback_latency_us = 12.0,
+
+ .dram_channel_width_bytes = 4,/*not exist in dml2*/
+ .round_trip_ping_latency_dcfclk_cycles = 106,/*not exist in dml2*/
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .dram_clock_change_latency_us = 34.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+
+ .pct_ideal_sdp_bw_after_urgent = 80.0,
+ .pct_ideal_fabric_bw_after_urgent = 80.0, /*new to dml2*/
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
+ .max_avg_sdp_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_percent = 60.0,
+ .fabric_datapath_to_dcn_data_return_bytes = 32,
+ .return_bus_width_bytes = 64,
+ .downspread_percent = 0.38,
+ .dcn_downspread_percent = 0.5,
+ .gpuvm_min_page_size_bytes = 4096,
+ .hostvm_min_page_size_bytes = 4096,
+ .do_urgent_latency_adjustment = 0,
+ .urgent_latency_adjustment_fabric_clock_component_us = 0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
+};
+
+void dcn35_build_wm_range_table_fpu(struct clk_mgr *clk_mgr)
+{
+ //TODO
+}
+
+
+/*
+ * dcn35_update_bw_bounding_box
+ *
+ * This would override some dcn3_5 ip_or_soc initial parameters hardcoded from
+ * spreadsheet with actual values as per dGPU SKU:
+ * - with passed few options from dc->config
+ * - with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might
+ * need to get it from PM FW)
+ * - with passed latency values (passed in ns units) in dc-> bb override for
+ * debugging purposes
+ * - with passed latencies from VBIOS (in 100_ns units) if available for
+ * certain dGPU SKU
+ * - with number of DRAM channels from VBIOS (which differ for certain dGPU SKU
+ * of the same ASIC)
+ * - clocks levels with passed clk_table entries from Clk Mgr as reported by PM
+ * FW for different clocks (which might differ for certain dGPU SKU of the
+ * same ASIC)
+ */
+void dcn35_update_bw_bounding_box_fpu(struct dc *dc,
+ struct clk_bw_params *bw_params)
+{
+ unsigned int i, closest_clk_lvl;
+ int j;
+ struct clk_limit_table *clk_table = &bw_params->clk_table;
+ struct _vcs_dpi_voltage_scaling_st *clock_limits =
+ dc->scratch.update_bw_bounding_box.clock_limits;
+ int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
+
+ dc_assert_fp_enabled();
+
+ dcn3_5_ip.max_num_otg =
+ dc->res_pool->res_cap->num_timing_generator;
+ dcn3_5_ip.max_num_dpp = dc->res_pool->pipe_count;
+ dcn3_5_soc.num_chans = bw_params->num_channels;
+
+ ASSERT(clk_table->num_entries);
+
+ /* Prepass to find max clocks independent of voltage level. */
+ for (i = 0; i < clk_table->num_entries; ++i) {
+ if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
+ if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
+ }
+
+ for (i = 0; i < clk_table->num_entries; i++) {
+ /* loop backwards*/
+ for (closest_clk_lvl = 0, j = dcn3_5_soc.num_states - 1;
+ j >= 0; j--) {
+ if (dcn3_5_soc.clock_limits[j].dcfclk_mhz <=
+ clk_table->entries[i].dcfclk_mhz) {
+ closest_clk_lvl = j;
+ break;
+ }
+ }
+ if (clk_table->num_entries == 1) {
+ /*smu gives one DPM level, let's take the highest one*/
+ closest_clk_lvl = dcn3_5_soc.num_states - 1;
+ }
+
+ clock_limits[i].state = i;
+
+ /* Clocks dependent on voltage level. */
+ clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+ if (clk_table->num_entries == 1 &&
+ clock_limits[i].dcfclk_mhz <
+ dcn3_5_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) {
+ /*SMU fix not released yet*/
+ clock_limits[i].dcfclk_mhz =
+ dcn3_5_soc.clock_limits[closest_clk_lvl].dcfclk_mhz;
+ }
+
+ clock_limits[i].fabricclk_mhz =
+ clk_table->entries[i].fclk_mhz;
+ clock_limits[i].socclk_mhz =
+ clk_table->entries[i].socclk_mhz;
+
+ if (clk_table->entries[i].memclk_mhz &&
+ clk_table->entries[i].wck_ratio)
+ clock_limits[i].dram_speed_mts =
+ clk_table->entries[i].memclk_mhz * 2 *
+ clk_table->entries[i].wck_ratio;
+
+ /* Clocks independent of voltage level. */
+ clock_limits[i].dispclk_mhz = max_dispclk_mhz ?
+ max_dispclk_mhz :
+ dcn3_5_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+
+ clock_limits[i].dppclk_mhz = max_dppclk_mhz ?
+ max_dppclk_mhz :
+ dcn3_5_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+
+ clock_limits[i].dram_bw_per_chan_gbps =
+ dcn3_5_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+ clock_limits[i].dscclk_mhz =
+ dcn3_5_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+ clock_limits[i].dtbclk_mhz =
+ dcn3_5_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+ clock_limits[i].phyclk_d18_mhz =
+ dcn3_5_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+ clock_limits[i].phyclk_mhz =
+ dcn3_5_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+ }
+
+ memcpy(dcn3_5_soc.clock_limits, clock_limits,
+ sizeof(dcn3_5_soc.clock_limits));
+
+ if (clk_table->num_entries)
+ dcn3_5_soc.num_states = clk_table->num_entries;
+
+ if (max_dispclk_mhz) {
+ dcn3_5_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ }
+ if ((int)(dcn3_5_soc.dram_clock_change_latency_us * 1000)
+ != dc->debug.dram_clock_change_latency_ns
+ && dc->debug.dram_clock_change_latency_ns) {
+ dcn3_5_soc.dram_clock_change_latency_us =
+ dc->debug.dram_clock_change_latency_ns / 1000.0;
+ }
+
+ if (dc->bb_overrides.dram_clock_change_latency_ns > 0)
+ dcn3_5_soc.dram_clock_change_latency_us =
+ dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
+
+ if (dc->bb_overrides.sr_exit_time_ns > 0)
+ dcn3_5_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0;
+
+ if (dc->bb_overrides.sr_enter_plus_exit_time_ns > 0)
+ dcn3_5_soc.sr_enter_plus_exit_time_us =
+ dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0;
+
+ if (dc->bb_overrides.sr_exit_z8_time_ns > 0)
+ dcn3_5_soc.sr_exit_z8_time_us = dc->bb_overrides.sr_exit_z8_time_ns / 1000.0;
+
+ if (dc->bb_overrides.sr_enter_plus_exit_z8_time_ns > 0)
+ dcn3_5_soc.sr_enter_plus_exit_z8_time_us =
+ dc->bb_overrides.sr_enter_plus_exit_z8_time_ns / 1000.0;
+
+ /*temp till dml2 fully work without dml1*/
+ dml_init_instance(&dc->dml, &dcn3_5_soc, &dcn3_5_ip,
+ DML_PROJECT_DCN31);
+
+ /*copy to dml2, before dml2_create*/
+ if (clk_table->num_entries > 2) {
+
+ for (i = 0; i < clk_table->num_entries; i++) {
+ dc->dml2_options.bbox_overrides.clks_table.num_states =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz =
+ clock_limits[i].dcfclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].fclk_mhz =
+ clock_limits[i].fabricclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dispclk_mhz =
+ clock_limits[i].dispclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dppclk_mhz =
+ clock_limits[i].dppclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].socclk_mhz =
+ clock_limits[i].socclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz =
+ clk_table->entries[i].memclk_mhz * clk_table->entries[i].wck_ratio;
+
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dram_speed_mts = clock_limits[i].dram_speed_mts;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz =
+ clock_limits[i].dtbclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dppclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_socclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels =
+ clk_table->num_entries;
+ }
+ }
+
+ /* Update latency values */
+ dc->dml2_options.bbox_overrides.dram_clock_change_latency_us = dcn3_5_soc.dram_clock_change_latency_us;
+
+ dc->dml2_options.bbox_overrides.sr_exit_latency_us = dcn3_5_soc.sr_exit_time_us;
+ dc->dml2_options.bbox_overrides.sr_enter_plus_exit_latency_us = dcn3_5_soc.sr_enter_plus_exit_time_us;
+
+ dc->dml2_options.bbox_overrides.sr_exit_z8_time_us = dcn3_5_soc.sr_exit_z8_time_us;
+ dc->dml2_options.bbox_overrides.sr_enter_plus_exit_z8_time_us = dcn3_5_soc.sr_enter_plus_exit_z8_time_us;
+}
+
+static bool is_dual_plane(enum surface_pixel_format format)
+{
+ return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN ||
+ format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA;
+}
+
+/*
+ * micro_sec_to_vert_lines () - converts time to number of vertical lines for a given timing
+ *
+ * @param: num_us: number of microseconds
+ * @return: number of vertical lines. If exact number of vertical lines is not found then
+ * it will round up to next number of lines to guarantee num_us
+ */
+static unsigned int micro_sec_to_vert_lines(unsigned int num_us, struct dc_crtc_timing *timing)
+{
+ unsigned int num_lines = 0;
+ unsigned int lines_time_in_ns = 1000.0 *
+ (((float)timing->h_total * 1000.0) /
+ ((float)timing->pix_clk_100hz / 10.0));
+
+ num_lines = dml_ceil(1000.0 * num_us / lines_time_in_ns, 1.0);
+
+ return num_lines;
+}
+
+static unsigned int get_vertical_back_porch(struct dc_crtc_timing *timing)
+{
+ unsigned int v_active = 0, v_blank = 0, v_back_porch = 0;
+
+ v_active = timing->v_border_top + timing->v_addressable + timing->v_border_bottom;
+ v_blank = timing->v_total - v_active;
+ v_back_porch = v_blank - timing->v_front_porch - timing->v_sync_width;
+
+ return v_back_porch;
+}
+
+int dcn35_populate_dml_pipes_from_context_fpu(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ enum dc_validate_mode validate_mode)
+{
+ int i, pipe_cnt;
+ struct resource_context *res_ctx = &context->res_ctx;
+ struct pipe_ctx *pipe = 0;
+ bool upscaled = false;
+ const unsigned int max_allowed_vblank_nom = 1023;
+
+ dc_assert_fp_enabled();
+
+ dcn31_populate_dml_pipes_from_context(dc, context, pipes,
+ validate_mode);
+
+ for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+ struct dc_crtc_timing *timing;
+ unsigned int num_lines = 0;
+ unsigned int v_back_porch = 0;
+
+ if (!res_ctx->pipe_ctx[i].stream)
+ continue;
+
+ pipe = &res_ctx->pipe_ctx[i];
+ timing = &pipe->stream->timing;
+
+ num_lines = micro_sec_to_vert_lines(dcn3_5_ip.VBlankNomDefaultUS, timing);
+ v_back_porch = get_vertical_back_porch(timing);
+
+ if (pipe->stream->adjust.v_total_max ==
+ pipe->stream->adjust.v_total_min &&
+ pipe->stream->adjust.v_total_min > timing->v_total) {
+ pipes[pipe_cnt].pipe.dest.vtotal =
+ pipe->stream->adjust.v_total_min;
+ pipes[pipe_cnt].pipe.dest.vblank_nom = timing->v_total -
+ pipes[pipe_cnt].pipe.dest.vactive;
+ }
+
+ pipes[pipe_cnt].pipe.dest.vblank_nom = timing->v_total - pipes[pipe_cnt].pipe.dest.vactive;
+ pipes[pipe_cnt].pipe.dest.vblank_nom = min(pipes[pipe_cnt].pipe.dest.vblank_nom, num_lines);
+ // vblank_nom should not smaller than (VSync (timing->v_sync_width + v_back_porch) + 2)
+ // + 2 is because
+ // 1 -> VStartup_start should be 1 line before VSync
+ // 1 -> always reserve 1 line between start of vblank to vstartup signal
+ pipes[pipe_cnt].pipe.dest.vblank_nom =
+ max(pipes[pipe_cnt].pipe.dest.vblank_nom, timing->v_sync_width + v_back_porch + 2);
+ pipes[pipe_cnt].pipe.dest.vblank_nom = min(pipes[pipe_cnt].pipe.dest.vblank_nom, max_allowed_vblank_nom);
+
+ if (pipe->plane_state &&
+ (pipe->plane_state->src_rect.height <
+ pipe->plane_state->dst_rect.height ||
+ pipe->plane_state->src_rect.width <
+ pipe->plane_state->dst_rect.width))
+ upscaled = true;
+
+ /*
+ * Immediate flip can be set dynamically after enabling the
+ * plane. We need to require support for immediate flip or
+ * underflow can be intermittently experienced depending on peak
+ * b/w requirements.
+ */
+ pipes[pipe_cnt].pipe.src.immediate_flip = true;
+
+ pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
+
+ dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt);
+
+ pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
+ pipes[pipe_cnt].pipe.src.dcc_rate = 3;
+ pipes[pipe_cnt].dout.dsc_input_bpc = 0;
+ pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256;
+
+ if (pipes[pipe_cnt].dout.dsc_enable) {
+ switch (timing->display_color_depth) {
+ case COLOR_DEPTH_888:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 8;
+ break;
+ case COLOR_DEPTH_101010:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 10;
+ break;
+ case COLOR_DEPTH_121212:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 12;
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+ }
+
+ pipe_cnt++;
+ }
+
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = 384;/*per guide*/
+ dc->config.enable_4to1MPC = false;
+
+ if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) {
+ if (is_dual_plane(pipe->plane_state->format)
+ && pipe->plane_state->src_rect.width <= 1920 &&
+ pipe->plane_state->src_rect.height <= 1080) {
+ dc->config.enable_4to1MPC = true;
+ } else if (!is_dual_plane(pipe->plane_state->format) &&
+ pipe->plane_state->src_rect.width <= 5120) {
+ /*
+ * Limit to 5k max to avoid forced pipe split when there
+ * is not enough detile for swath
+ */
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
+ pipes[0].pipe.src.unbounded_req_mode = true;
+ }
+ } else if (context->stream_count >=
+ dc->debug.crb_alloc_policy_min_disp_count &&
+ dc->debug.crb_alloc_policy > DET_SIZE_DEFAULT) {
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes =
+ dc->debug.crb_alloc_policy * 64;
+ } else if (context->stream_count >= 3 && upscaled) {
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
+ }
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe->stream)
+ continue;
+
+ if (pipe->stream->signal == SIGNAL_TYPE_EDP &&
+ dc->debug.seamless_boot_odm_combine &&
+ pipe->stream->apply_seamless_boot_optimization) {
+
+ if (pipe->stream->apply_boot_odm_mode ==
+ dm_odm_combine_policy_2to1) {
+ context->bw_ctx.dml.vba.ODMCombinePolicy =
+ dm_odm_combine_policy_2to1;
+ break;
+ }
+ }
+ }
+
+ return pipe_cnt;
+}
+
+void dcn35_decide_zstate_support(struct dc *dc, struct dc_state *context)
+{
+ enum dcn_zstate_support_state support = DCN_ZSTATE_SUPPORT_DISALLOW;
+ unsigned int i, plane_count = 0;
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ dc_assert_fp_enabled();
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (context->res_ctx.pipe_ctx[i].plane_state)
+ plane_count++;
+ }
+
+ if (context->stream_count == 0 || plane_count == 0) {
+ support = DCN_ZSTATE_SUPPORT_ALLOW;
+ } else if (context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) {
+ struct dc_link *link = context->streams[0]->sink->link;
+ bool is_pwrseq0 = link && link->link_index == 0;
+ bool is_psr = (link && (link->psr_settings.psr_version == DC_PSR_VERSION_1 ||
+ link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) && !link->panel_config.psr.disable_psr);
+ bool is_replay = link && link->replay_settings.replay_feature_enabled;
+ int minmum_z8_residency =
+ dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000;
+ bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency;
+ int minmum_z10_residency =
+ dc->debug.minimum_z10_residency_time > 0 ? dc->debug.minimum_z10_residency_time : 5000;
+ bool allow_z10 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z10_residency;
+
+ /*for psr1/psr-su, we allow z8 and z10 based on latency, for replay with IPS enabled, it will enter ips2*/
+ if (is_pwrseq0 && allow_z10)
+ support = DCN_ZSTATE_SUPPORT_ALLOW;
+ else if (is_pwrseq0 && (is_psr || is_replay))
+ support = DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY;
+ else if (allow_z8)
+ support = DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY;
+
+ }
+
+ DC_LOG_SMU("zstate_support: %d, StutterPeriod: %d\n", support,
+ (int)context->bw_ctx.dml.vba.StutterPeriod);
+
+ context->bw_ctx.bw.dcn.clk.zstate_support = support;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h
new file mode 100644
index 000000000000..d121c5afce71
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN35_FPU_H__
+#define __DCN35_FPU_H__
+
+#include "clk_mgr.h"
+
+void dcn35_build_wm_range_table_fpu(struct clk_mgr *clk_mgr);
+
+void dcn35_update_bw_bounding_box_fpu(struct dc *dc,
+ struct clk_bw_params *bw_params);
+
+int dcn35_populate_dml_pipes_from_context_fpu(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ enum dc_validate_mode validate_mode);
+
+void dcn35_decide_zstate_support(struct dc *dc, struct dc_state *context);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c
new file mode 100644
index 000000000000..77023b619f1e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c
@@ -0,0 +1,639 @@
+/* SPDX-License-Identifier: MIT */
+/* Copyright 2024 Advanced Micro Devices, Inc. */
+#include "resource.h"
+#include "dcn351_fpu.h"
+#include "dcn31/dcn31_resource.h"
+#include "dcn32/dcn32_resource.h"
+#include "dcn35/dcn35_resource.h"
+#include "dcn351/dcn351_resource.h"
+#include "dml/dcn31/dcn31_fpu.h"
+#include "dml/dcn35/dcn35_fpu.h"
+#include "dml/dml_inline_defs.h"
+
+#include "link_service.h"
+
+#define DC_LOGGER_INIT(logger)
+
+struct _vcs_dpi_ip_params_st dcn3_51_ip = {
+ .VBlankNomDefaultUS = 668,
+ .gpuvm_enable = 1,
+ .gpuvm_max_page_table_levels = 1,
+ .hostvm_enable = 1,
+ .hostvm_max_page_table_levels = 2,
+ .rob_buffer_size_kbytes = 64,
+ .det_buffer_size_kbytes = 1536,
+ .config_return_buffer_size_in_kbytes = 1792,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .meta_fifo_size_in_kentries = 32,
+ .zero_size_buffer_entries = 512,
+ .compbuf_reserved_space_64b = 256,
+ .compbuf_reserved_space_zs = 64,
+ .dpp_output_buffer_pixels = 2560,/*not used*/
+ .opp_output_buffer_lines = 1,/*not used*/
+ .pixel_chunk_size_kbytes = 8,
+ //.alpha_pixel_chunk_size_kbytes = 4;/*new*/
+ //.min_pixel_chunk_size_bytes = 1024;/*new*/
+ .meta_chunk_size_kbytes = 2,
+ .min_meta_chunk_size_bytes = 256,
+ .writeback_chunk_size_kbytes = 8,
+ .ptoi_supported = false,
+ .num_dsc = 4,
+ .maximum_dsc_bits_per_component = 12,/*delta from 10*/
+ .dsc422_native_support = true,/*delta from false*/
+ .is_line_buffer_bpp_fixed = true,/*new*/
+ .line_buffer_fixed_bpp = 32,/*delta from 48*/
+ .line_buffer_size_bits = 986880,/*delta from 789504*/
+ .max_line_buffer_lines = 32,/*delta from 12*/
+ .writeback_interface_buffer_size_kbytes = 90,
+ .max_num_dpp = 4,
+ .max_num_otg = 4,
+ .max_num_hdmi_frl_outputs = 1,
+ .max_num_wb = 1,
+ /*.max_num_hdmi_frl_outputs = 1; new in dml2*/
+ /*.max_num_dp2p0_outputs = 2; new in dml2*/
+ /*.max_num_dp2p0_streams = 4; new in dml2*/
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dpte_buffer_size_in_pte_reqs_luma = 68,/*changed from 64,*/
+ .dpte_buffer_size_in_pte_reqs_chroma = 36,/*changed from 34*/
+ /*.dcc_meta_buffer_size_bytes = 6272; new to dml2*/
+ .dispclk_ramp_margin_percent = 1.11,/*delta from 1*/
+ /*.dppclk_delay_subtotal = 47;
+ .dppclk_delay_scl = 50;
+ .dppclk_delay_scl_lb_only = 16;
+ .dppclk_delay_cnvc_formatter = 28;
+ .dppclk_delay_cnvc_cursor = 6;
+ .dispclk_delay_subtotal = 125;*/ /*new to dml2*/
+ .max_inter_dcn_tile_repeaters = 8,
+ .cursor_buffer_size = 16,
+ .cursor_chunk_size = 2,
+ .writeback_line_buffer_buffer_size = 0,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .dppclk_delay_subtotal = 47, /* changed from 46,*/
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_cnvc_formatter = 28,/*changed from 27,*/
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 125, /*changed from 119,*/
+ .dynamic_metadata_vm_enabled = false,
+ .odm_combine_4to1_supported = false,
+ .dcc_supported = true,
+// .config_return_buffer_segment_size_in_kbytes = 64;/*required, hard coded in dml2_translate_ip_params*/
+
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn3_51_soc = {
+ /*TODO: correct dispclk/dppclk voltage level determination*/
+ .clock_limits = {
+ {
+ .state = 0,
+ .dcfclk_mhz = 400.0,
+ .fabricclk_mhz = 400.0,
+ .socclk_mhz = 600.0,
+ .dram_speed_mts = 3200.0,
+ .dispclk_mhz = 600.0,
+ .dppclk_mhz = 600.0,
+ .phyclk_mhz = 600.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 200.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 1,
+ .dcfclk_mhz = 600.0,
+ .fabricclk_mhz = 1000.0,
+ .socclk_mhz = 733.0,
+ .dram_speed_mts = 6400.0,
+ .dispclk_mhz = 800.0,
+ .dppclk_mhz = 800.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 266.7,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 2,
+ .dcfclk_mhz = 738.0,
+ .fabricclk_mhz = 1200.0,
+ .socclk_mhz = 880.0,
+ .dram_speed_mts = 7500.0,
+ .dispclk_mhz = 800.0,
+ .dppclk_mhz = 800.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 266.7,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 3,
+ .dcfclk_mhz = 800.0,
+ .fabricclk_mhz = 1400.0,
+ .socclk_mhz = 978.0,
+ .dram_speed_mts = 7500.0,
+ .dispclk_mhz = 960.0,
+ .dppclk_mhz = 960.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 320.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 4,
+ .dcfclk_mhz = 873.0,
+ .fabricclk_mhz = 1600.0,
+ .socclk_mhz = 1100.0,
+ .dram_speed_mts = 8533.0,
+ .dispclk_mhz = 1066.7,
+ .dppclk_mhz = 1066.7,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 355.6,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 5,
+ .dcfclk_mhz = 960.0,
+ .fabricclk_mhz = 1700.0,
+ .socclk_mhz = 1257.0,
+ .dram_speed_mts = 8533.0,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 400.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 6,
+ .dcfclk_mhz = 1067.0,
+ .fabricclk_mhz = 1850.0,
+ .socclk_mhz = 1257.0,
+ .dram_speed_mts = 8533.0,
+ .dispclk_mhz = 1371.4,
+ .dppclk_mhz = 1371.4,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 457.1,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 7,
+ .dcfclk_mhz = 1200.0,
+ .fabricclk_mhz = 2000.0,
+ .socclk_mhz = 1467.0,
+ .dram_speed_mts = 8533.0,
+ .dispclk_mhz = 1600.0,
+ .dppclk_mhz = 1600.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 533.3,
+ .dtbclk_mhz = 600.0,
+ },
+ },
+ .num_states = 8,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
+ .sr_exit_z8_time_us = 263.0,
+ .sr_enter_plus_exit_z8_time_us = 363.0,
+ .fclk_change_latency_us = 24.0,
+ .usr_retraining_latency_us = 2,
+ .writeback_latency_us = 12.0,
+
+ .dram_channel_width_bytes = 4,/*not exist in dml2*/
+ .round_trip_ping_latency_dcfclk_cycles = 106,/*not exist in dml2*/
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .dram_clock_change_latency_us = 34,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+
+ .pct_ideal_sdp_bw_after_urgent = 80.0,
+ .pct_ideal_fabric_bw_after_urgent = 80.0, /*new to dml2*/
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
+ .max_avg_sdp_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_percent = 60.0,
+ .fabric_datapath_to_dcn_data_return_bytes = 32,
+ .return_bus_width_bytes = 64,
+ .downspread_percent = 0.38,
+ .dcn_downspread_percent = 0.5,
+ .gpuvm_min_page_size_bytes = 4096,
+ .hostvm_min_page_size_bytes = 4096,
+ .do_urgent_latency_adjustment = 0,
+ .urgent_latency_adjustment_fabric_clock_component_us = 0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
+ .num_chans = 4,
+ .dispclk_dppclk_vco_speed_mhz = 2400.0,
+};
+
+/*
+ * dcn351_update_bw_bounding_box
+ *
+ * This would override some dcn3_51 ip_or_soc initial parameters hardcoded from
+ * spreadsheet with actual values as per dGPU SKU:
+ * - with passed few options from dc->config
+ * - with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might
+ * need to get it from PM FW)
+ * - with passed latency values (passed in ns units) in dc-> bb override for
+ * debugging purposes
+ * - with passed latencies from VBIOS (in 100_ns units) if available for
+ * certain dGPU SKU
+ * - with number of DRAM channels from VBIOS (which differ for certain dGPU SKU
+ * of the same ASIC)
+ * - clocks levels with passed clk_table entries from Clk Mgr as reported by PM
+ * FW for different clocks (which might differ for certain dGPU SKU of the
+ * same ASIC)
+ */
+void dcn351_update_bw_bounding_box_fpu(struct dc *dc,
+ struct clk_bw_params *bw_params)
+{
+ unsigned int i, closest_clk_lvl;
+ int j;
+ struct clk_limit_table *clk_table = &bw_params->clk_table;
+ struct _vcs_dpi_voltage_scaling_st *clock_limits =
+ dc->scratch.update_bw_bounding_box.clock_limits;
+ int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
+
+ dc_assert_fp_enabled();
+
+ dcn3_51_ip.max_num_otg =
+ dc->res_pool->res_cap->num_timing_generator;
+ dcn3_51_ip.max_num_dpp = dc->res_pool->pipe_count;
+ dcn3_51_soc.num_chans = bw_params->num_channels;
+
+ ASSERT(clk_table->num_entries);
+
+ /* Prepass to find max clocks independent of voltage level. */
+ for (i = 0; i < clk_table->num_entries; ++i) {
+ if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
+ if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
+ }
+
+ for (i = 0; i < clk_table->num_entries; i++) {
+ /* loop backwards*/
+ for (closest_clk_lvl = 0, j = dcn3_51_soc.num_states - 1;
+ j >= 0; j--) {
+ if (dcn3_51_soc.clock_limits[j].dcfclk_mhz <=
+ clk_table->entries[i].dcfclk_mhz) {
+ closest_clk_lvl = j;
+ break;
+ }
+ }
+ if (clk_table->num_entries == 1) {
+ /*smu gives one DPM level, let's take the highest one*/
+ closest_clk_lvl = dcn3_51_soc.num_states - 1;
+ }
+
+ clock_limits[i].state = i;
+
+ /* Clocks dependent on voltage level. */
+ clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+ if (clk_table->num_entries == 1 &&
+ clock_limits[i].dcfclk_mhz <
+ dcn3_51_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) {
+ /*SMU fix not released yet*/
+ clock_limits[i].dcfclk_mhz =
+ dcn3_51_soc.clock_limits[closest_clk_lvl].dcfclk_mhz;
+ }
+
+ clock_limits[i].fabricclk_mhz =
+ clk_table->entries[i].fclk_mhz;
+ clock_limits[i].socclk_mhz =
+ clk_table->entries[i].socclk_mhz;
+
+ if (clk_table->entries[i].memclk_mhz &&
+ clk_table->entries[i].wck_ratio)
+ clock_limits[i].dram_speed_mts =
+ clk_table->entries[i].memclk_mhz * 2 *
+ clk_table->entries[i].wck_ratio;
+
+ /* Clocks independent of voltage level. */
+ clock_limits[i].dispclk_mhz = max_dispclk_mhz ?
+ max_dispclk_mhz :
+ dcn3_51_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+
+ clock_limits[i].dppclk_mhz = max_dppclk_mhz ?
+ max_dppclk_mhz :
+ dcn3_51_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+
+ clock_limits[i].dram_bw_per_chan_gbps =
+ dcn3_51_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+ clock_limits[i].dscclk_mhz =
+ dcn3_51_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+ clock_limits[i].dtbclk_mhz =
+ dcn3_51_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+ clock_limits[i].phyclk_d18_mhz =
+ dcn3_51_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+ clock_limits[i].phyclk_mhz =
+ dcn3_51_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+ }
+
+ memcpy(dcn3_51_soc.clock_limits, clock_limits,
+ sizeof(dcn3_51_soc.clock_limits));
+
+ if (clk_table->num_entries)
+ dcn3_51_soc.num_states = clk_table->num_entries;
+
+ if (max_dispclk_mhz) {
+ dcn3_51_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ }
+ if ((int)(dcn3_51_soc.dram_clock_change_latency_us * 1000)
+ != dc->debug.dram_clock_change_latency_ns
+ && dc->debug.dram_clock_change_latency_ns) {
+ dcn3_51_soc.dram_clock_change_latency_us =
+ dc->debug.dram_clock_change_latency_ns / 1000.0;
+ }
+
+ if (dc->bb_overrides.dram_clock_change_latency_ns > 0)
+ dcn3_51_soc.dram_clock_change_latency_us =
+ dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
+
+ if (dc->bb_overrides.sr_exit_time_ns > 0)
+ dcn3_51_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0;
+
+ if (dc->bb_overrides.sr_enter_plus_exit_time_ns > 0)
+ dcn3_51_soc.sr_enter_plus_exit_time_us =
+ dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0;
+
+ if (dc->bb_overrides.sr_exit_z8_time_ns > 0)
+ dcn3_51_soc.sr_exit_z8_time_us = dc->bb_overrides.sr_exit_z8_time_ns / 1000.0;
+
+ if (dc->bb_overrides.sr_enter_plus_exit_z8_time_ns > 0)
+ dcn3_51_soc.sr_enter_plus_exit_z8_time_us =
+ dc->bb_overrides.sr_enter_plus_exit_z8_time_ns / 1000.0;
+
+ /*temp till dml2 fully work without dml1*/
+ dml_init_instance(&dc->dml, &dcn3_51_soc, &dcn3_51_ip,
+ DML_PROJECT_DCN31);
+
+ /*copy to dml2, before dml2_create*/
+ if (clk_table->num_entries > 2) {
+
+ for (i = 0; i < clk_table->num_entries; i++) {
+ dc->dml2_options.bbox_overrides.clks_table.num_states =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz =
+ clock_limits[i].dcfclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].fclk_mhz =
+ clock_limits[i].fabricclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dispclk_mhz =
+ clock_limits[i].dispclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dppclk_mhz =
+ clock_limits[i].dppclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].socclk_mhz =
+ clock_limits[i].socclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz =
+ clk_table->entries[i].memclk_mhz * clk_table->entries[i].wck_ratio;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dram_speed_mts = clock_limits[i].dram_speed_mts;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz =
+ clock_limits[i].dtbclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dppclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_socclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels =
+ clk_table->num_entries;
+ }
+ }
+
+ /* Update latency values */
+ dc->dml2_options.bbox_overrides.dram_clock_change_latency_us = dcn3_51_soc.dram_clock_change_latency_us;
+
+ dc->dml2_options.bbox_overrides.sr_exit_latency_us = dcn3_51_soc.sr_exit_time_us;
+ dc->dml2_options.bbox_overrides.sr_enter_plus_exit_latency_us = dcn3_51_soc.sr_enter_plus_exit_time_us;
+
+ dc->dml2_options.bbox_overrides.sr_exit_z8_time_us = dcn3_51_soc.sr_exit_z8_time_us;
+ dc->dml2_options.bbox_overrides.sr_enter_plus_exit_z8_time_us = dcn3_51_soc.sr_enter_plus_exit_z8_time_us;
+}
+
+static bool is_dual_plane(enum surface_pixel_format format)
+{
+ return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN ||
+ format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA;
+}
+
+/*
+ * micro_sec_to_vert_lines () - converts time to number of vertical lines for a given timing
+ *
+ * @param: num_us: number of microseconds
+ * @return: number of vertical lines. If exact number of vertical lines is not found then
+ * it will round up to next number of lines to guarantee num_us
+ */
+static unsigned int micro_sec_to_vert_lines(unsigned int num_us, struct dc_crtc_timing *timing)
+{
+ unsigned int num_lines = 0;
+ unsigned int lines_time_in_ns = 1000.0 *
+ (((float)timing->h_total * 1000.0) /
+ ((float)timing->pix_clk_100hz / 10.0));
+
+ num_lines = dml_ceil(1000.0 * num_us / lines_time_in_ns, 1.0);
+
+ return num_lines;
+}
+
+static unsigned int get_vertical_back_porch(struct dc_crtc_timing *timing)
+{
+ unsigned int v_active = 0, v_blank = 0, v_back_porch = 0;
+
+ v_active = timing->v_border_top + timing->v_addressable + timing->v_border_bottom;
+ v_blank = timing->v_total - v_active;
+ v_back_porch = v_blank - timing->v_front_porch - timing->v_sync_width;
+
+ return v_back_porch;
+}
+
+int dcn351_populate_dml_pipes_from_context_fpu(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ enum dc_validate_mode validate_mode)
+{
+ int i, pipe_cnt;
+ struct resource_context *res_ctx = &context->res_ctx;
+ struct pipe_ctx *pipe = 0;
+ bool upscaled = false;
+ const unsigned int max_allowed_vblank_nom = 1023;
+
+ dc_assert_fp_enabled();
+
+ dcn31_populate_dml_pipes_from_context(dc, context, pipes,
+ validate_mode);
+
+ for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+ struct dc_crtc_timing *timing;
+ unsigned int num_lines = 0;
+ unsigned int v_back_porch = 0;
+
+ if (!res_ctx->pipe_ctx[i].stream)
+ continue;
+
+ pipe = &res_ctx->pipe_ctx[i];
+ timing = &pipe->stream->timing;
+
+ num_lines = micro_sec_to_vert_lines(dcn3_51_ip.VBlankNomDefaultUS, timing);
+ v_back_porch = get_vertical_back_porch(timing);
+
+ if (pipe->stream->adjust.v_total_max ==
+ pipe->stream->adjust.v_total_min &&
+ pipe->stream->adjust.v_total_min > timing->v_total) {
+ pipes[pipe_cnt].pipe.dest.vtotal =
+ pipe->stream->adjust.v_total_min;
+ pipes[pipe_cnt].pipe.dest.vblank_nom = timing->v_total -
+ pipes[pipe_cnt].pipe.dest.vactive;
+ }
+
+ pipes[pipe_cnt].pipe.dest.vblank_nom = timing->v_total - pipes[pipe_cnt].pipe.dest.vactive;
+ pipes[pipe_cnt].pipe.dest.vblank_nom = min(pipes[pipe_cnt].pipe.dest.vblank_nom, num_lines);
+ // vblank_nom should not smaller than (VSync (timing->v_sync_width + v_back_porch) + 2)
+ // + 2 is because
+ // 1 -> VStartup_start should be 1 line before VSync
+ // 1 -> always reserve 1 line between start of vblank to vstartup signal
+ pipes[pipe_cnt].pipe.dest.vblank_nom =
+ max(pipes[pipe_cnt].pipe.dest.vblank_nom, timing->v_sync_width + v_back_porch + 2);
+ pipes[pipe_cnt].pipe.dest.vblank_nom = min(pipes[pipe_cnt].pipe.dest.vblank_nom, max_allowed_vblank_nom);
+
+ if (pipe->plane_state &&
+ (pipe->plane_state->src_rect.height <
+ pipe->plane_state->dst_rect.height ||
+ pipe->plane_state->src_rect.width <
+ pipe->plane_state->dst_rect.width))
+ upscaled = true;
+
+ /*
+ * Immediate flip can be set dynamically after enabling the
+ * plane. We need to require support for immediate flip or
+ * underflow can be intermittently experienced depending on peak
+ * b/w requirements.
+ */
+ pipes[pipe_cnt].pipe.src.immediate_flip = true;
+
+ pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
+
+ dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt);
+
+ pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
+ pipes[pipe_cnt].pipe.src.dcc_rate = 3;
+ pipes[pipe_cnt].dout.dsc_input_bpc = 0;
+ pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256;
+
+ if (pipes[pipe_cnt].dout.dsc_enable) {
+ switch (timing->display_color_depth) {
+ case COLOR_DEPTH_888:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 8;
+ break;
+ case COLOR_DEPTH_101010:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 10;
+ break;
+ case COLOR_DEPTH_121212:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 12;
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+ }
+
+ pipe_cnt++;
+ }
+
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = 384;/*per guide*/
+ dc->config.enable_4to1MPC = false;
+
+ if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) {
+ if (is_dual_plane(pipe->plane_state->format)
+ && pipe->plane_state->src_rect.width <= 1920 &&
+ pipe->plane_state->src_rect.height <= 1080) {
+ dc->config.enable_4to1MPC = true;
+ } else if (!is_dual_plane(pipe->plane_state->format) &&
+ pipe->plane_state->src_rect.width <= 5120) {
+ /*
+ * Limit to 5k max to avoid forced pipe split when there
+ * is not enough detile for swath
+ */
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
+ pipes[0].pipe.src.unbounded_req_mode = true;
+ }
+ } else if (context->stream_count >=
+ dc->debug.crb_alloc_policy_min_disp_count &&
+ dc->debug.crb_alloc_policy > DET_SIZE_DEFAULT) {
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes =
+ dc->debug.crb_alloc_policy * 64;
+ } else if (context->stream_count >= 3 && upscaled) {
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
+ }
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe->stream)
+ continue;
+
+ if (pipe->stream->signal == SIGNAL_TYPE_EDP &&
+ dc->debug.seamless_boot_odm_combine &&
+ pipe->stream->apply_seamless_boot_optimization) {
+
+ if (pipe->stream->apply_boot_odm_mode ==
+ dm_odm_combine_policy_2to1) {
+ context->bw_ctx.dml.vba.ODMCombinePolicy =
+ dm_odm_combine_policy_2to1;
+ break;
+ }
+ }
+ }
+
+ return pipe_cnt;
+}
+
+void dcn351_decide_zstate_support(struct dc *dc, struct dc_state *context)
+{
+ enum dcn_zstate_support_state support = DCN_ZSTATE_SUPPORT_DISALLOW;
+ unsigned int i, plane_count = 0;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (context->res_ctx.pipe_ctx[i].plane_state)
+ plane_count++;
+ }
+
+ /*dcn351 does not support z9/z10*/
+ if (context->stream_count == 0 || plane_count == 0) {
+ support = DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY;
+ } else if (context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) {
+ struct dc_link *link = context->streams[0]->sink->link;
+ bool is_pwrseq0 = link && link->link_index == 0;
+ bool is_psr = (link && (link->psr_settings.psr_version == DC_PSR_VERSION_1 ||
+ link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) && !link->panel_config.psr.disable_psr);
+ bool is_replay = link && link->replay_settings.replay_feature_enabled;
+ int minmum_z8_residency =
+ dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000;
+ bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency;
+
+ /*for psr1/psr-su, we allow z8 and z10 based on latency, for replay with IPS enabled, it will enter ips2*/
+ if (is_pwrseq0 && (is_psr || is_replay))
+ support = allow_z8 ? allow_z8 : DCN_ZSTATE_SUPPORT_DISALLOW;
+ }
+ context->bw_ctx.bw.dcn.clk.zstate_support = support;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.h
new file mode 100644
index 000000000000..f71d9d8d0759
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/* Copyright 2024 Advanced Micro Devices, Inc. */
+
+#ifndef __DCN351_FPU_H__
+#define __DCN351_FPU_H__
+
+#include "clk_mgr.h"
+
+void dcn351_update_bw_bounding_box_fpu(struct dc *dc,
+ struct clk_bw_params *bw_params);
+
+int dcn351_populate_dml_pipes_from_context_fpu(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ enum dc_validate_mode validate_mode);
+
+void dcn351_decide_zstate_support(struct dc *dc, struct dc_state *context);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
index edb9f7567d6d..d5831a34f5a1 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
@@ -26,7 +26,11 @@
#define __DISPLAY_MODE_ENUMS_H__
enum output_encoder_class {
- dm_dp = 0, dm_hdmi = 1, dm_wb = 2, dm_edp
+ dm_dp = 0,
+ dm_hdmi = 1,
+ dm_wb = 2,
+ dm_edp = 3,
+ dm_dp2p0 = 5,
};
enum output_format_class {
dm_444 = 0, dm_420 = 1, dm_n422, dm_s422
@@ -101,10 +105,39 @@ enum source_macro_tile_size {
enum cursor_bpp {
dm_cur_2bit = 0, dm_cur_32bit = 1, dm_cur_64bit = 2
};
+
+/**
+ * @enum clock_change_support - It represents possible reasons to change the DRAM clock.
+ *
+ * DC may change the DRAM clock during its execution, and this enum tracks all
+ * the available methods. Note that every ASIC has their specific way to deal
+ * with these clock switch.
+ */
enum clock_change_support {
+ /**
+ * @dm_dram_clock_change_uninitialized: If you see this, we might have
+ * a code initialization issue
+ */
dm_dram_clock_change_uninitialized = 0,
+
+ /**
+ * @dm_dram_clock_change_vactive: Support DRAM switch in VActive
+ */
dm_dram_clock_change_vactive,
+
+ /**
+ * @dm_dram_clock_change_vblank: Support DRAM switch in VBlank
+ */
dm_dram_clock_change_vblank,
+
+ dm_dram_clock_change_vactive_w_mall_full_frame,
+ dm_dram_clock_change_vactive_w_mall_sub_vp,
+ dm_dram_clock_change_vblank_w_mall_full_frame,
+ dm_dram_clock_change_vblank_w_mall_sub_vp,
+
+ /**
+ * @dm_dram_clock_change_unsupported: Do not support DRAM switch
+ */
dm_dram_clock_change_unsupported
};
@@ -157,6 +190,14 @@ enum dm_validation_status {
DML_FAIL_DSC_INPUT_BPC,
DML_FAIL_PREFETCH_SUPPORT,
DML_FAIL_V_RATIO_PREFETCH,
+ DML_FAIL_P2I_WITH_420,
+ DML_FAIL_DSC_ONLY_IF_NECESSARY_WITH_BPP,
+ DML_FAIL_NOT_DSC422_NATIVE,
+ DML_FAIL_ODM_COMBINE4TO1,
+ DML_FAIL_ENOUGH_WRITEBACK_UNITS,
+ DML_FAIL_VIEWPORT_EXCEEDS_SURFACE,
+ DML_FAIL_DYNAMIC_METADATA,
+ DML_FAIL_FMT_BUFFER_EXCEEDED,
};
enum writeback_config {
@@ -169,6 +210,9 @@ enum odm_combine_mode {
dm_odm_combine_mode_disabled,
dm_odm_combine_mode_2to1,
dm_odm_combine_mode_4to1,
+ dm_odm_split_mode_1to2,
+ dm_odm_mode_mso_1to2,
+ dm_odm_mode_mso_1to4
};
enum odm_combine_policy {
@@ -176,11 +220,15 @@ enum odm_combine_policy {
dm_odm_combine_policy_none,
dm_odm_combine_policy_2to1,
dm_odm_combine_policy_4to1,
+ dm_odm_split_policy_1to2,
+ dm_odm_mso_policy_1to2,
+ dm_odm_mso_policy_1to4,
};
enum immediate_flip_requirement {
dm_immediate_flip_not_required,
dm_immediate_flip_required,
+ dm_immediate_flip_opportunistic,
};
enum unbounded_requesting_policy {
@@ -189,4 +237,75 @@ enum unbounded_requesting_policy {
dm_unbounded_requesting_disable
};
+enum dm_rotation_angle {
+ dm_rotation_0,
+ dm_rotation_90,
+ dm_rotation_180,
+ dm_rotation_270,
+ dm_rotation_0m,
+ dm_rotation_90m,
+ dm_rotation_180m,
+ dm_rotation_270m,
+};
+
+enum dm_use_mall_for_pstate_change_mode {
+ dm_use_mall_pstate_change_disable,
+ dm_use_mall_pstate_change_full_frame,
+ dm_use_mall_pstate_change_sub_viewport,
+ dm_use_mall_pstate_change_phantom_pipe
+};
+
+enum dm_use_mall_for_static_screen_mode {
+ dm_use_mall_static_screen_disable,
+ dm_use_mall_static_screen_optimize,
+ dm_use_mall_static_screen_enable,
+};
+
+enum dm_output_link_dp_rate {
+ dm_dp_rate_na,
+ dm_dp_rate_hbr,
+ dm_dp_rate_hbr2,
+ dm_dp_rate_hbr3,
+ dm_dp_rate_uhbr10,
+ dm_dp_rate_uhbr13p5,
+ dm_dp_rate_uhbr20,
+};
+
+enum dm_fclock_change_support {
+ dm_fclock_change_vactive,
+ dm_fclock_change_vblank,
+ dm_fclock_change_unsupported,
+};
+
+enum dm_prefetch_modes {
+ dm_prefetch_support_uclk_fclk_and_stutter_if_possible,
+ dm_prefetch_support_uclk_fclk_and_stutter,
+ dm_prefetch_support_fclk_and_stutter,
+ dm_prefetch_support_stutter,
+ dm_prefetch_support_none,
+};
+enum dm_output_type {
+ dm_output_type_unknown,
+ dm_output_type_dp,
+ dm_output_type_edp,
+ dm_output_type_dp2p0,
+ dm_output_type_hdmi,
+ dm_output_type_hdmifrl,
+};
+
+enum dm_output_rate {
+ dm_output_rate_unknown,
+ dm_output_rate_dp_rate_hbr,
+ dm_output_rate_dp_rate_hbr2,
+ dm_output_rate_dp_rate_hbr3,
+ dm_output_rate_dp_rate_uhbr10,
+ dm_output_rate_dp_rate_uhbr13p5,
+ dm_output_rate_dp_rate_uhbr20,
+ dm_output_rate_hdmi_rate_3x3,
+ dm_output_rate_hdmi_rate_6x3,
+ dm_output_rate_hdmi_rate_6x4,
+ dm_output_rate_hdmi_rate_8x4,
+ dm_output_rate_hdmi_rate_10x4,
+ dm_output_rate_hdmi_rate_12x4,
+};
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c
index 30db51fbd8cd..da0cfbb071e6 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c
@@ -35,43 +35,61 @@
#include "dcn30/display_rq_dlg_calc_30.h"
#include "dcn31/display_mode_vba_31.h"
#include "dcn31/display_rq_dlg_calc_31.h"
+#include "dcn314/display_mode_vba_314.h"
+#include "dcn314/display_rq_dlg_calc_314.h"
+#include "dcn32/display_mode_vba_32.h"
+#include "dcn32/display_rq_dlg_calc_32.h"
#include "dml_logger.h"
-const struct dml_funcs dml20_funcs = {
+static const struct dml_funcs dml20_funcs = {
.validate = dml20_ModeSupportAndSystemConfigurationFull,
.recalculate = dml20_recalculate,
.rq_dlg_get_dlg_reg = dml20_rq_dlg_get_dlg_reg,
.rq_dlg_get_rq_reg = dml20_rq_dlg_get_rq_reg
};
-const struct dml_funcs dml20v2_funcs = {
+static const struct dml_funcs dml20v2_funcs = {
.validate = dml20v2_ModeSupportAndSystemConfigurationFull,
.recalculate = dml20v2_recalculate,
.rq_dlg_get_dlg_reg = dml20v2_rq_dlg_get_dlg_reg,
.rq_dlg_get_rq_reg = dml20v2_rq_dlg_get_rq_reg
};
-const struct dml_funcs dml21_funcs = {
- .validate = dml21_ModeSupportAndSystemConfigurationFull,
- .recalculate = dml21_recalculate,
- .rq_dlg_get_dlg_reg = dml21_rq_dlg_get_dlg_reg,
- .rq_dlg_get_rq_reg = dml21_rq_dlg_get_rq_reg
+static const struct dml_funcs dml21_funcs = {
+ .validate = dml21_ModeSupportAndSystemConfigurationFull,
+ .recalculate = dml21_recalculate,
+ .rq_dlg_get_dlg_reg = dml21_rq_dlg_get_dlg_reg,
+ .rq_dlg_get_rq_reg = dml21_rq_dlg_get_rq_reg
};
-const struct dml_funcs dml30_funcs = {
+static const struct dml_funcs dml30_funcs = {
.validate = dml30_ModeSupportAndSystemConfigurationFull,
.recalculate = dml30_recalculate,
.rq_dlg_get_dlg_reg = dml30_rq_dlg_get_dlg_reg,
.rq_dlg_get_rq_reg = dml30_rq_dlg_get_rq_reg
};
-const struct dml_funcs dml31_funcs = {
+static const struct dml_funcs dml31_funcs = {
.validate = dml31_ModeSupportAndSystemConfigurationFull,
.recalculate = dml31_recalculate,
.rq_dlg_get_dlg_reg = dml31_rq_dlg_get_dlg_reg,
.rq_dlg_get_rq_reg = dml31_rq_dlg_get_rq_reg
};
+static const struct dml_funcs dml314_funcs = {
+ .validate = dml314_ModeSupportAndSystemConfigurationFull,
+ .recalculate = dml314_recalculate,
+ .rq_dlg_get_dlg_reg = dml314_rq_dlg_get_dlg_reg,
+ .rq_dlg_get_rq_reg = dml314_rq_dlg_get_rq_reg
+};
+
+static const struct dml_funcs dml32_funcs = {
+ .validate = dml32_ModeSupportAndSystemConfigurationFull,
+ .recalculate = dml32_recalculate,
+ .rq_dlg_get_dlg_reg_v2 = dml32_rq_dlg_get_dlg_reg,
+ .rq_dlg_get_rq_reg_v2 = dml32_rq_dlg_get_rq_reg
+};
+
void dml_init_instance(struct display_mode_lib *lib,
const struct _vcs_dpi_soc_bounding_box_st *soc_bb,
const struct _vcs_dpi_ip_params_st *ip_params,
@@ -95,9 +113,15 @@ void dml_init_instance(struct display_mode_lib *lib,
lib->funcs = dml30_funcs;
break;
case DML_PROJECT_DCN31:
- case DML_PROJECT_DCN31_FPGA:
+ case DML_PROJECT_DCN315:
lib->funcs = dml31_funcs;
break;
+ case DML_PROJECT_DCN314:
+ lib->funcs = dml314_funcs;
+ break;
+ case DML_PROJECT_DCN32:
+ lib->funcs = dml32_funcs;
+ break;
default:
break;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
index 6905ef1e75a6..5edf69fa40d1 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
@@ -40,7 +40,9 @@ enum dml_project {
DML_PROJECT_DCN21,
DML_PROJECT_DCN30,
DML_PROJECT_DCN31,
- DML_PROJECT_DCN31_FPGA,
+ DML_PROJECT_DCN315,
+ DML_PROJECT_DCN314,
+ DML_PROJECT_DCN32,
};
struct display_mode_lib;
@@ -62,6 +64,20 @@ struct dml_funcs {
struct display_mode_lib *mode_lib,
display_rq_regs_st *rq_regs,
const display_pipe_params_st *pipe_param);
+ // DLG interfaces have different function parameters in DCN32.
+ // Create new function pointers to address the changes
+ void (*rq_dlg_get_dlg_reg_v2)(
+ struct display_mode_lib *mode_lib,
+ display_dlg_regs_st *dlg_regs,
+ display_ttu_regs_st *ttu_regs,
+ display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx);
+ void (*rq_dlg_get_rq_reg_v2)(display_rq_regs_st *rq_regs,
+ struct display_mode_lib *mode_lib,
+ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx);
void (*recalculate)(struct display_mode_lib *mode_lib);
void (*validate)(struct display_mode_lib *mode_lib);
};
@@ -73,6 +89,8 @@ struct display_mode_lib {
struct vba_vars_st vba;
struct dal_logger *logger;
struct dml_funcs funcs;
+ struct _vcs_dpi_display_e2e_pipe_params_st dml_pipe_state[6];
+ bool validate_max_state;
};
void dml_init_instance(struct display_mode_lib *lib,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
index d46a2733024c..641a8cd019cd 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
@@ -26,6 +26,16 @@
#include "dc_features.h"
#include "display_mode_enums.h"
+/**
+ * DOC: overview
+ *
+ * Most of the DML code is automatically generated and tested via hardware
+ * description language. Usually, we use the reference _vcs_dpi in the code
+ * where VCS means "Verilog Compiled Simulator" and DPI stands for "Direct
+ * Programmer Interface". In other words, those structs can be used to
+ * interface with Verilog with other languages such as C.
+ */
+
#ifndef __DISPLAY_MODE_STRUCTS_H__
#define __DISPLAY_MODE_STRUCTS_H__
@@ -54,12 +64,102 @@ typedef struct _vcs_dpi_display_rq_regs_st display_rq_regs_st;
typedef struct _vcs_dpi_display_dlg_sys_params_st display_dlg_sys_params_st;
typedef struct _vcs_dpi_display_arb_params_st display_arb_params_st;
+typedef struct {
+ double UrgentWatermark;
+ double WritebackUrgentWatermark;
+ double DRAMClockChangeWatermark;
+ double FCLKChangeWatermark;
+ double WritebackDRAMClockChangeWatermark;
+ double WritebackFCLKChangeWatermark;
+ double StutterExitWatermark;
+ double StutterEnterPlusExitWatermark;
+ double Z8StutterExitWatermark;
+ double Z8StutterEnterPlusExitWatermark;
+ double USRRetrainingWatermark;
+} Watermarks;
+
+typedef struct {
+ double UrgentLatency;
+ double ExtraLatency;
+ double WritebackLatency;
+ double DRAMClockChangeLatency;
+ double FCLKChangeLatency;
+ double SRExitTime;
+ double SREnterPlusExitTime;
+ double SRExitZ8Time;
+ double SREnterPlusExitZ8Time;
+ double USRRetrainingLatencyPlusSMNLatency;
+} Latencies;
+
+typedef struct {
+ double Dppclk;
+ double Dispclk;
+ double PixelClock;
+ double DCFClkDeepSleep;
+ unsigned int DPPPerSurface;
+ bool ScalerEnabled;
+ enum dm_rotation_angle SourceRotation;
+ unsigned int ViewportHeight;
+ unsigned int ViewportHeightChroma;
+ unsigned int BlockWidth256BytesY;
+ unsigned int BlockHeight256BytesY;
+ unsigned int BlockWidth256BytesC;
+ unsigned int BlockHeight256BytesC;
+ unsigned int BlockWidthY;
+ unsigned int BlockHeightY;
+ unsigned int BlockWidthC;
+ unsigned int BlockHeightC;
+ unsigned int InterlaceEnable;
+ unsigned int NumberOfCursors;
+ unsigned int VBlank;
+ unsigned int HTotal;
+ unsigned int HActive;
+ bool DCCEnable;
+ enum odm_combine_mode ODMMode;
+ enum source_format_class SourcePixelFormat;
+ enum dm_swizzle_mode SurfaceTiling;
+ unsigned int BytePerPixelY;
+ unsigned int BytePerPixelC;
+ bool ProgressiveToInterlaceUnitInOPP;
+ double VRatio;
+ double VRatioChroma;
+ unsigned int VTaps;
+ unsigned int VTapsChroma;
+ unsigned int PitchY;
+ unsigned int DCCMetaPitchY;
+ unsigned int PitchC;
+ unsigned int DCCMetaPitchC;
+ bool ViewportStationary;
+ unsigned int ViewportXStart;
+ unsigned int ViewportYStart;
+ unsigned int ViewportXStartC;
+ unsigned int ViewportYStartC;
+ bool FORCE_ONE_ROW_FOR_FRAME;
+ unsigned int SwathHeightY;
+ unsigned int SwathHeightC;
+} DmlPipe;
+
+typedef struct {
+ double UrgentLatency;
+ double ExtraLatency;
+ double WritebackLatency;
+ double DRAMClockChangeLatency;
+ double FCLKChangeLatency;
+ double SRExitTime;
+ double SREnterPlusExitTime;
+ double SRExitZ8Time;
+ double SREnterPlusExitZ8Time;
+ double USRRetrainingLatency;
+ double SMNLatency;
+} SOCParametersList;
+
struct _vcs_dpi_voltage_scaling_st {
int state;
double dscclk_mhz;
double dcfclk_mhz;
double socclk_mhz;
double phyclk_d18_mhz;
+ double phyclk_d32_mhz;
double dram_speed_mts;
double fabricclk_mhz;
double dispclk_mhz;
@@ -67,10 +167,23 @@ struct _vcs_dpi_voltage_scaling_st {
double phyclk_mhz;
double dppclk_mhz;
double dtbclk_mhz;
+ float net_bw_in_kbytes_sec;
};
+/**
+ * _vcs_dpi_soc_bounding_box_st: SOC definitions
+ *
+ * This struct maintains the SOC Bounding Box information for the ASIC; it
+ * defines things such as clock, voltage, performance, etc. Usually, we load
+ * these values from VBIOS; if something goes wrong, we use some hard-coded
+ * values, which will enable the ASIC to light up with limitations.
+ */
struct _vcs_dpi_soc_bounding_box_st {
struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES];
+ /**
+ * @num_states: It represents the total of Display Power Management
+ * (DPM) supported by the specific ASIC.
+ */
unsigned int num_states;
double sr_exit_time_us;
double sr_enter_plus_exit_time_us;
@@ -80,6 +193,16 @@ struct _vcs_dpi_soc_bounding_box_st {
double urgent_latency_pixel_data_only_us;
double urgent_latency_pixel_mixed_with_vm_data_us;
double urgent_latency_vm_data_only_us;
+ double usr_retraining_latency_us;
+ double smn_latency_us;
+ double fclk_change_latency_us;
+ double mall_allocated_for_dcn_mbytes;
+ double pct_ideal_fabric_bw_after_urgent;
+ double pct_ideal_dram_bw_after_urgent_strobe;
+ double max_avg_fabric_bw_use_normal_percent;
+ double max_avg_dram_bw_use_normal_strobe_percent;
+ enum dm_prefetch_modes allow_for_pstate_or_stutter_in_vblank_final;
+ bool dram_clock_change_requirement_final;
double writeback_latency_us;
double ideal_dram_bw_after_urgent_percent;
double pct_ideal_dram_sdp_bw_after_urgent_pixel_only; // PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly
@@ -124,8 +247,17 @@ struct _vcs_dpi_soc_bounding_box_st {
bool disable_dram_clock_change_vactive_support;
bool allow_dram_clock_one_display_vactive;
enum self_refresh_affinity allow_dram_self_refresh_or_dram_clock_change_in_vblank;
+ double max_vratio_pre;
};
+/**
+ * @_vcs_dpi_ip_params_st: IP configuraion for DCN blocks
+ *
+ * In this struct you can find the DCN configuration associated to the specific
+ * ASIC. For example, here we can save how many DPPs the ASIC is using and it
+ * is available.
+ *
+ */
struct _vcs_dpi_ip_params_st {
bool use_min_dcfclk;
bool clamp_min_dcfclk;
@@ -141,12 +273,16 @@ struct _vcs_dpi_ip_params_st {
unsigned int odm_capable;
unsigned int rob_buffer_size_kbytes;
unsigned int det_buffer_size_kbytes;
+ unsigned int min_comp_buffer_size_kbytes;
unsigned int dpte_buffer_size_in_pte_reqs_luma;
unsigned int dpte_buffer_size_in_pte_reqs_chroma;
unsigned int pde_proc_buffer_size_64k_reqs;
unsigned int dpp_output_buffer_pixels;
unsigned int opp_output_buffer_lines;
unsigned int pixel_chunk_size_kbytes;
+ unsigned int alpha_pixel_chunk_size_kbytes;
+ unsigned int min_pixel_chunk_size_bytes;
+ unsigned int dcc_meta_buffer_size_bytes;
unsigned char pte_enable;
unsigned int pte_chunk_size_kbytes;
unsigned int meta_chunk_size_kbytes;
@@ -167,12 +303,16 @@ struct _vcs_dpi_ip_params_st {
double writeback_min_hscl_ratio;
double writeback_min_vscl_ratio;
unsigned int maximum_dsc_bits_per_component;
+ unsigned int maximum_pixels_per_line_per_dsc_unit;
unsigned int writeback_max_hscl_taps;
unsigned int writeback_max_vscl_taps;
unsigned int writeback_line_buffer_luma_buffer_size;
unsigned int writeback_line_buffer_chroma_buffer_size;
unsigned int max_page_table_levels;
+ /**
+ * @max_num_dpp: Maximum number of DPP supported in the target ASIC.
+ */
unsigned int max_num_dpp;
unsigned int max_num_otg;
unsigned int cursor_chunk_size;
@@ -223,6 +363,13 @@ struct _vcs_dpi_ip_params_st {
unsigned int can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one;
unsigned int bug_forcing_LC_req_same_size_fixed;
unsigned int number_of_cursors;
+ unsigned int max_num_dp2p0_outputs;
+ unsigned int max_num_dp2p0_streams;
+ unsigned int VBlankNomDefaultUS;
+
+ /* DM workarounds */
+ double dsc_delay_factor_wa; // TODO: Remove after implementing root cause fix
+ double min_prefetch_in_strobe_us;
};
struct _vcs_dpi_display_xfc_params_st {
@@ -249,6 +396,8 @@ struct _vcs_dpi_display_pipe_source_params_st {
bool hostvm_levels_force_en;
unsigned int hostvm_levels_force;
int source_scan;
+ int source_rotation; // new in dml32
+ unsigned int det_size_override; // use to populate DETSizeOverride in vba struct
int sw_mode;
int macro_tile_size;
unsigned int surface_width_y;
@@ -263,6 +412,15 @@ struct _vcs_dpi_display_pipe_source_params_st {
unsigned int viewport_height_c;
unsigned int viewport_width_max;
unsigned int viewport_height_max;
+ unsigned int viewport_x_y;
+ unsigned int viewport_x_c;
+ bool viewport_stationary;
+ unsigned int dcc_rate_luma;
+ unsigned int gpuvm_min_page_size_kbytes;
+ unsigned int use_mall_for_pstate_change;
+ unsigned int use_mall_for_static_screen;
+ bool force_one_row_for_frame;
+ bool pte_buffer_mode;
unsigned int data_pitch;
unsigned int data_pitch_c;
unsigned int meta_pitch;
@@ -295,10 +453,17 @@ struct writeback_st {
int wb_vtaps_luma;
int wb_htaps_chroma;
int wb_vtaps_chroma;
+ unsigned int wb_htaps;
+ unsigned int wb_vtaps;
double wb_hratio;
double wb_vratio;
};
+struct display_audio_params_st {
+ unsigned int audio_sample_rate_khz;
+ int audio_sample_layout;
+};
+
struct _vcs_dpi_display_output_params_st {
int dp_lanes;
double output_bpp;
@@ -312,6 +477,11 @@ struct _vcs_dpi_display_output_params_st {
int dsc_slices;
int max_audio_sample_rate;
struct writeback_st wb;
+ struct display_audio_params_st audio;
+ unsigned int output_bpc;
+ int dp_rate;
+ unsigned int dp_multistream_id;
+ bool dp_multistream_en;
};
struct _vcs_dpi_scaler_ratio_depth_st {
@@ -346,12 +516,14 @@ struct _vcs_dpi_display_pipe_dest_params_st {
unsigned int htotal;
unsigned int vtotal;
unsigned int vfront_porch;
+ unsigned int vblank_nom;
unsigned int vactive;
unsigned int hactive;
unsigned int vstartup_start;
unsigned int vupdate_offset;
unsigned int vupdate_width;
unsigned int vready_offset;
+ unsigned int pstate_keepout;
unsigned char interlaced;
double pixel_rate_mhz;
unsigned char synchronized_vblank_all_planes;
@@ -360,6 +532,10 @@ struct _vcs_dpi_display_pipe_dest_params_st {
unsigned char use_maximum_vstartup;
unsigned int vtotal_max;
unsigned int vtotal_min;
+ unsigned int refresh_rate;
+ bool synchronize_timings;
+ unsigned int odm_combine_policy;
+ bool drr_display;
};
struct _vcs_dpi_display_pipe_params_st {
@@ -445,6 +621,7 @@ struct _vcs_dpi_display_dlg_regs_st {
unsigned int refcyc_h_blank_end;
unsigned int dlg_vblank_end;
unsigned int min_dst_y_next_start;
+ unsigned int min_dst_y_next_start_us;
unsigned int refcyc_per_htotal;
unsigned int refcyc_x_after_scaler;
unsigned int dst_y_after_scaler;
@@ -456,6 +633,7 @@ struct _vcs_dpi_display_dlg_regs_st {
unsigned int ref_freq_to_pix_freq;
unsigned int vratio_prefetch;
unsigned int vratio_prefetch_c;
+ unsigned int refcyc_per_tdlut_group;
unsigned int refcyc_per_pte_group_vblank_l;
unsigned int refcyc_per_pte_group_vblank_c;
unsigned int refcyc_per_meta_chunk_vblank_l;
@@ -546,7 +724,6 @@ struct _vcs_dpi_display_dlg_sys_params_st {
double t_sr_wm_us;
double t_extra_us;
double mem_trip_us;
- double t_srx_delay_us;
double deepsleep_dcfclk_mhz;
double total_flip_bw;
unsigned int total_flip_bytes;
@@ -556,6 +733,9 @@ struct _vcs_dpi_display_arb_params_st {
int max_req_outstanding;
int min_req_outstanding;
int sat_level_us;
+ int hvm_min_req_outstand_commit_threshold;
+ int hvm_max_qos_commit_threshold;
+ int compbuf_reserved_space_kbytes;
};
#endif /*__DISPLAY_MODE_STRUCTS_H__*/
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
index 0fad15020c74..85453bbb4f9b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
@@ -47,6 +47,7 @@ static void recalculate_params(
unsigned int num_pipes);
static unsigned int CursorBppEnumToBits(enum cursor_bpp ebpp);
+static void cache_debug_params(struct display_mode_lib *mode_lib);
unsigned int dml_get_voltage_level(
struct display_mode_lib *mode_lib,
@@ -73,6 +74,7 @@ unsigned int dml_get_voltage_level(
PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
}
mode_lib->funcs.validate(mode_lib);
+ cache_debug_params(mode_lib);
return mode_lib->vba.VoltageLevel;
}
@@ -109,6 +111,22 @@ dml_get_attr_func(tcalc, mode_lib->vba.TCalc);
dml_get_attr_func(fraction_of_urgent_bandwidth, mode_lib->vba.FractionOfUrgentBandwidth);
dml_get_attr_func(fraction_of_urgent_bandwidth_imm_flip, mode_lib->vba.FractionOfUrgentBandwidthImmediateFlip);
+
+dml_get_attr_func(cstate_max_cap_mode, mode_lib->vba.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
+dml_get_attr_func(comp_buffer_size_kbytes, mode_lib->vba.CompressedBufferSizeInkByte);
+dml_get_attr_func(pixel_chunk_size_in_kbyte, mode_lib->vba.PixelChunkSizeInKByte);
+dml_get_attr_func(alpha_pixel_chunk_size_in_kbyte, mode_lib->vba.AlphaPixelChunkSizeInKByte);
+dml_get_attr_func(meta_chunk_size_in_kbyte, mode_lib->vba.MetaChunkSize);
+dml_get_attr_func(min_pixel_chunk_size_in_byte, mode_lib->vba.MinPixelChunkSizeBytes);
+dml_get_attr_func(min_meta_chunk_size_in_byte, mode_lib->vba.MinMetaChunkSizeBytes);
+dml_get_attr_func(fclk_watermark, mode_lib->vba.Watermark.FCLKChangeWatermark);
+dml_get_attr_func(usr_retraining_watermark, mode_lib->vba.Watermark.USRRetrainingWatermark);
+
+dml_get_attr_func(comp_buffer_reserved_space_kbytes, mode_lib->vba.CompBufReservedSpaceKBytes);
+dml_get_attr_func(comp_buffer_reserved_space_64bytes, mode_lib->vba.CompBufReservedSpace64B);
+dml_get_attr_func(comp_buffer_reserved_space_zs, mode_lib->vba.CompBufReservedSpaceZs);
+dml_get_attr_func(unbounded_request_enabled, mode_lib->vba.UnboundedRequestEnabled);
+
#define dml_get_pipe_attr_func(attr, var) double get_##attr(struct display_mode_lib *mode_lib, const display_e2e_pipe_params_st *pipes, unsigned int num_pipes, unsigned int which_pipe) \
{\
unsigned int which_plane; \
@@ -163,6 +181,28 @@ dml_get_pipe_attr_func(vupdate_width, mode_lib->vba.VUpdateWidthPix);
dml_get_pipe_attr_func(vready_offset, mode_lib->vba.VReadyOffsetPix);
dml_get_pipe_attr_func(vready_at_or_after_vsync, mode_lib->vba.VREADY_AT_OR_AFTER_VSYNC);
dml_get_pipe_attr_func(min_dst_y_next_start, mode_lib->vba.MIN_DST_Y_NEXT_START);
+dml_get_pipe_attr_func(dst_y_per_pte_row_nom_l, mode_lib->vba.DST_Y_PER_PTE_ROW_NOM_L);
+dml_get_pipe_attr_func(dst_y_per_pte_row_nom_c, mode_lib->vba.DST_Y_PER_PTE_ROW_NOM_C);
+dml_get_pipe_attr_func(dst_y_per_meta_row_nom_l, mode_lib->vba.DST_Y_PER_META_ROW_NOM_L);
+dml_get_pipe_attr_func(dst_y_per_meta_row_nom_c, mode_lib->vba.DST_Y_PER_META_ROW_NOM_C);
+dml_get_pipe_attr_func(refcyc_per_pte_group_nom_l_in_us, mode_lib->vba.time_per_pte_group_nom_luma);
+dml_get_pipe_attr_func(refcyc_per_pte_group_nom_c_in_us, mode_lib->vba.time_per_pte_group_nom_chroma);
+dml_get_pipe_attr_func(refcyc_per_pte_group_vblank_l_in_us, mode_lib->vba.time_per_pte_group_vblank_luma);
+dml_get_pipe_attr_func(refcyc_per_pte_group_vblank_c_in_us, mode_lib->vba.time_per_pte_group_vblank_chroma);
+dml_get_pipe_attr_func(refcyc_per_pte_group_flip_l_in_us, mode_lib->vba.time_per_pte_group_flip_luma);
+dml_get_pipe_attr_func(refcyc_per_pte_group_flip_c_in_us, mode_lib->vba.time_per_pte_group_flip_chroma);
+dml_get_pipe_attr_func(vstartup_calculated, mode_lib->vba.VStartup);
+dml_get_pipe_attr_func(dpte_row_height_linear_c, mode_lib->vba.dpte_row_height_linear_chroma);
+dml_get_pipe_attr_func(swath_height_l, mode_lib->vba.SwathHeightY);
+dml_get_pipe_attr_func(swath_height_c, mode_lib->vba.SwathHeightC);
+dml_get_pipe_attr_func(det_stored_buffer_size_l_bytes, mode_lib->vba.DETBufferSizeY);
+dml_get_pipe_attr_func(det_stored_buffer_size_c_bytes, mode_lib->vba.DETBufferSizeC);
+dml_get_pipe_attr_func(dpte_group_size_in_bytes, mode_lib->vba.dpte_group_bytes);
+dml_get_pipe_attr_func(vm_group_size_in_bytes, mode_lib->vba.vm_group_bytes);
+dml_get_pipe_attr_func(dpte_row_height_linear_l, mode_lib->vba.dpte_row_height_linear);
+dml_get_pipe_attr_func(pte_buffer_mode, mode_lib->vba.PTE_BUFFER_MODE);
+dml_get_pipe_attr_func(subviewport_lines_needed_in_mall, mode_lib->vba.SubViewportLinesNeededInMALL);
+dml_get_pipe_attr_func(surface_size_in_mall, mode_lib->vba.SurfaceSizeInMALL)
double get_total_immediate_flip_bytes(
struct display_mode_lib *mode_lib,
@@ -200,6 +240,67 @@ double get_total_prefetch_bw(
return total_prefetch_bw;
}
+unsigned int get_total_surface_size_in_mall_bytes(
+ struct display_mode_lib *mode_lib,
+ const display_e2e_pipe_params_st *pipes,
+ unsigned int num_pipes)
+{
+ unsigned int k;
+ unsigned int size = 0.0;
+ recalculate_params(mode_lib, pipes, num_pipes);
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k)
+ size += mode_lib->vba.SurfaceSizeInMALL[k];
+ return size;
+}
+
+static unsigned int get_pipe_idx(struct display_mode_lib *mode_lib, unsigned int plane_idx)
+{
+ int pipe_idx = -1;
+ int i;
+
+ ASSERT(plane_idx < DC__NUM_DPP__MAX);
+
+ for (i = 0; i < DC__NUM_DPP__MAX ; i++) {
+ if (plane_idx == mode_lib->vba.pipe_plane[i]) {
+ pipe_idx = i;
+ break;
+ }
+ }
+ ASSERT(pipe_idx >= 0);
+
+ return pipe_idx;
+}
+
+
+double get_det_buffer_size_kbytes(struct display_mode_lib *mode_lib, const display_e2e_pipe_params_st *pipes,
+ unsigned int num_pipes, unsigned int pipe_idx)
+{
+ unsigned int plane_idx;
+ double det_buf_size_kbytes;
+
+ recalculate_params(mode_lib, pipes, num_pipes);
+ plane_idx = mode_lib->vba.pipe_plane[pipe_idx];
+
+ dml_print("DML::%s: num_pipes=%d pipe_idx=%d plane_idx=%0d\n", __func__, num_pipes, pipe_idx, plane_idx);
+ det_buf_size_kbytes = mode_lib->vba.DETBufferSizeInKByte[plane_idx]; // per hubp DET buffer size
+
+ dml_print("DML::%s: det_buf_size_kbytes=%3.2f\n", __func__, det_buf_size_kbytes);
+
+ return det_buf_size_kbytes;
+}
+
+bool get_is_phantom_pipe(struct display_mode_lib *mode_lib, const display_e2e_pipe_params_st *pipes,
+ unsigned int num_pipes, unsigned int pipe_idx)
+{
+ unsigned int plane_idx;
+
+ recalculate_params(mode_lib, pipes, num_pipes);
+ plane_idx = mode_lib->vba.pipe_plane[pipe_idx];
+ dml_print("DML::%s: num_pipes=%d pipe_idx=%d UseMALLForPStateChange=%0d\n", __func__, num_pipes, pipe_idx,
+ mode_lib->vba.UsesMALLForPStateChange[plane_idx]);
+ return (mode_lib->vba.UsesMALLForPStateChange[plane_idx] == dm_use_mall_pstate_change_phantom_pipe);
+}
+
static void fetch_socbb_params(struct display_mode_lib *mode_lib)
{
soc_bounding_box_st *soc = &mode_lib->vba.soc;
@@ -239,6 +340,21 @@ static void fetch_socbb_params(struct display_mode_lib *mode_lib)
soc->max_avg_sdp_bw_use_normal_percent;
mode_lib->vba.SRExitZ8Time = soc->sr_exit_z8_time_us;
mode_lib->vba.SREnterPlusExitZ8Time = soc->sr_enter_plus_exit_z8_time_us;
+ mode_lib->vba.FCLKChangeLatency = soc->fclk_change_latency_us;
+ mode_lib->vba.USRRetrainingLatency = soc->usr_retraining_latency_us;
+ mode_lib->vba.SMNLatency = soc->smn_latency_us;
+ mode_lib->vba.MALLAllocatedForDCNFinal = soc->mall_allocated_for_dcn_mbytes;
+
+ mode_lib->vba.PercentOfIdealDRAMBWReceivedAfterUrgLatencySTROBE = soc->pct_ideal_dram_bw_after_urgent_strobe;
+ mode_lib->vba.MaxAveragePercentOfIdealFabricBWDisplayCanUseInNormalSystemOperation =
+ soc->max_avg_fabric_bw_use_normal_percent;
+ mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperationSTROBE =
+ soc->max_avg_dram_bw_use_normal_strobe_percent;
+
+ mode_lib->vba.DRAMClockChangeRequirementFinal = soc->dram_clock_change_requirement_final;
+ mode_lib->vba.FCLKChangeRequirementFinal = 1;
+ mode_lib->vba.USRRetrainingRequiredFinal = 1;
+ mode_lib->vba.AllowForPStateChangeOrStutterInVBlankFinal = soc->allow_for_pstate_or_stutter_in_vblank_final;
mode_lib->vba.DRAMClockChangeLatency = soc->dram_clock_change_latency_us;
mode_lib->vba.DummyPStateCheck = soc->dram_clock_change_latency_us == soc->dummy_pstate_latency_us;
mode_lib->vba.DRAMClockChangeSupportsVActive = !soc->disable_dram_clock_change_vactive_support ||
@@ -281,6 +397,7 @@ static void fetch_socbb_params(struct display_mode_lib *mode_lib)
mode_lib->vba.SOCCLKPerState[i] = soc->clock_limits[i].socclk_mhz;
mode_lib->vba.PHYCLKPerState[i] = soc->clock_limits[i].phyclk_mhz;
mode_lib->vba.PHYCLKD18PerState[i] = soc->clock_limits[i].phyclk_d18_mhz;
+ mode_lib->vba.PHYCLKD32PerState[i] = soc->clock_limits[i].phyclk_d32_mhz;
mode_lib->vba.MaxDppclk[i] = soc->clock_limits[i].dppclk_mhz;
mode_lib->vba.MaxDSCCLK[i] = soc->clock_limits[i].dscclk_mhz;
mode_lib->vba.DRAMSpeedPerState[i] = soc->clock_limits[i].dram_speed_mts;
@@ -295,6 +412,7 @@ static void fetch_socbb_params(struct display_mode_lib *mode_lib)
soc->urgent_latency_adjustment_fabric_clock_component_us;
mode_lib->vba.UrgentLatencyAdjustmentFabricClockReference =
soc->urgent_latency_adjustment_fabric_clock_reference_mhz;
+ mode_lib->vba.MaxVRatioPre = soc->max_vratio_pre;
}
static void fetch_ip_params(struct display_mode_lib *mode_lib)
@@ -323,6 +441,18 @@ static void fetch_ip_params(struct display_mode_lib *mode_lib)
mode_lib->vba.COMPBUF_RESERVED_SPACE_ZS = ip->compbuf_reserved_space_zs;
mode_lib->vba.MaximumDSCBitsPerComponent = ip->maximum_dsc_bits_per_component;
mode_lib->vba.DSC422NativeSupport = ip->dsc422_native_support;
+ /* In DCN3.2, nomDETInKByte should be initialized correctly. */
+ mode_lib->vba.nomDETInKByte = ip->det_buffer_size_kbytes;
+ mode_lib->vba.CompbufReservedSpace64B = ip->compbuf_reserved_space_64b;
+ mode_lib->vba.CompbufReservedSpaceZs = ip->compbuf_reserved_space_zs;
+ mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal = ip->compressed_buffer_segment_size_in_kbytes;
+ mode_lib->vba.LineBufferSizeFinal = ip->line_buffer_size_bits;
+ mode_lib->vba.AlphaPixelChunkSizeInKByte = ip->alpha_pixel_chunk_size_kbytes; // not ysed
+ mode_lib->vba.MinPixelChunkSizeBytes = ip->min_pixel_chunk_size_bytes; // not used
+ mode_lib->vba.MaximumPixelsPerLinePerDSCUnit = ip->maximum_pixels_per_line_per_dsc_unit;
+ mode_lib->vba.MaxNumDP2p0Outputs = ip->max_num_dp2p0_outputs;
+ mode_lib->vba.MaxNumDP2p0Streams = ip->max_num_dp2p0_streams;
+ mode_lib->vba.DCCMetaBufferSizeBytes = ip->dcc_meta_buffer_size_bytes;
mode_lib->vba.PixelChunkSizeInKByte = ip->pixel_chunk_size_kbytes;
mode_lib->vba.MetaChunkSize = ip->meta_chunk_size_kbytes;
@@ -397,6 +527,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
visited[k] = false;
mode_lib->vba.NumberOfActivePlanes = 0;
+ mode_lib->vba.NumberOfActiveSurfaces = 0;
mode_lib->vba.ImmediateFlipSupport = false;
for (j = 0; j < mode_lib->vba.cache_num_pipes; ++j) {
display_pipe_source_params_st *src = &pipes[j].pipe.src;
@@ -427,6 +558,26 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
src->viewport_y_y;
mode_lib->vba.ViewportYStartC[mode_lib->vba.NumberOfActivePlanes] =
src->viewport_y_c;
+ mode_lib->vba.SourceRotation[mode_lib->vba.NumberOfActiveSurfaces] = src->source_rotation;
+ mode_lib->vba.ViewportXStartY[mode_lib->vba.NumberOfActiveSurfaces] = src->viewport_x_y;
+ mode_lib->vba.ViewportXStartC[mode_lib->vba.NumberOfActiveSurfaces] = src->viewport_x_c;
+ // TODO: Assign correct value to viewport_stationary
+ mode_lib->vba.ViewportStationary[mode_lib->vba.NumberOfActivePlanes] =
+ src->viewport_stationary;
+ mode_lib->vba.UsesMALLForPStateChange[mode_lib->vba.NumberOfActivePlanes] = src->use_mall_for_pstate_change;
+ mode_lib->vba.UseMALLForStaticScreen[mode_lib->vba.NumberOfActivePlanes] = src->use_mall_for_static_screen;
+ mode_lib->vba.GPUVMMinPageSizeKBytes[mode_lib->vba.NumberOfActivePlanes] = src->gpuvm_min_page_size_kbytes;
+ mode_lib->vba.RefreshRate[mode_lib->vba.NumberOfActivePlanes] = dst->refresh_rate; //todo remove this
+ mode_lib->vba.OutputLinkDPRate[mode_lib->vba.NumberOfActivePlanes] = dout->dp_rate;
+ mode_lib->vba.ODMUse[mode_lib->vba.NumberOfActivePlanes] = dst->odm_combine_policy;
+ mode_lib->vba.DETSizeOverride[mode_lib->vba.NumberOfActivePlanes] = src->det_size_override;
+ if (src->det_size_override)
+ mode_lib->vba.DETBufferSizeInKByte[mode_lib->vba.NumberOfActivePlanes] = src->det_size_override;
+ else
+ mode_lib->vba.DETBufferSizeInKByte[mode_lib->vba.NumberOfActivePlanes] = ip->det_buffer_size_kbytes;
+ //TODO: Need to assign correct values to dp_multistream vars
+ mode_lib->vba.OutputMultistreamEn[mode_lib->vba.NumberOfActiveSurfaces] = dout->dp_multistream_en;
+ mode_lib->vba.OutputMultistreamId[mode_lib->vba.NumberOfActiveSurfaces] = dout->dp_multistream_id;
mode_lib->vba.PitchY[mode_lib->vba.NumberOfActivePlanes] = src->data_pitch;
mode_lib->vba.SurfaceWidthY[mode_lib->vba.NumberOfActivePlanes] = src->surface_width_y;
mode_lib->vba.SurfaceHeightY[mode_lib->vba.NumberOfActivePlanes] = src->surface_height_y;
@@ -452,6 +603,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
mode_lib->vba.HTotal[mode_lib->vba.NumberOfActivePlanes] = dst->htotal;
mode_lib->vba.VTotal[mode_lib->vba.NumberOfActivePlanes] = dst->vtotal;
mode_lib->vba.VFrontPorch[mode_lib->vba.NumberOfActivePlanes] = dst->vfront_porch;
+ mode_lib->vba.VBlankNom[mode_lib->vba.NumberOfActivePlanes] = dst->vblank_nom;
mode_lib->vba.DCCFractionOfZeroSizeRequestsLuma[mode_lib->vba.NumberOfActivePlanes] = src->dcc_fraction_of_zs_req_luma;
mode_lib->vba.DCCFractionOfZeroSizeRequestsChroma[mode_lib->vba.NumberOfActivePlanes] = src->dcc_fraction_of_zs_req_chroma;
mode_lib->vba.DCCEnable[mode_lib->vba.NumberOfActivePlanes] =
@@ -479,7 +631,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
mode_lib->vba.skip_dio_check[mode_lib->vba.NumberOfActivePlanes] =
dout->is_virtual;
- if (!dout->dsc_enable)
+ if (dout->dsc_enable)
mode_lib->vba.ForcedOutputLinkBPP[mode_lib->vba.NumberOfActivePlanes] = dout->output_bpp;
else
mode_lib->vba.ForcedOutputLinkBPP[mode_lib->vba.NumberOfActivePlanes] = 0.0;
@@ -553,6 +705,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
mode_lib->vba.PixelClock[mode_lib->vba.NumberOfActivePlanes] = dst->pixel_rate_mhz;
mode_lib->vba.PixelClockBackEnd[mode_lib->vba.NumberOfActivePlanes] = dst->pixel_rate_mhz;
mode_lib->vba.DPPCLK[mode_lib->vba.NumberOfActivePlanes] = clks->dppclk_mhz;
+ mode_lib->vba.DRRDisplay[mode_lib->vba.NumberOfActiveSurfaces] = dst->drr_display;
if (ip->is_line_buffer_bpp_fixed)
mode_lib->vba.LBBitPerPixel[mode_lib->vba.NumberOfActivePlanes] =
ip->line_buffer_fixed_bpp;
@@ -636,6 +789,8 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
mode_lib->vba.pipe_plane[k] =
mode_lib->vba.NumberOfActivePlanes;
mode_lib->vba.DPPPerPlane[mode_lib->vba.NumberOfActivePlanes]++;
+ if (src_k->det_size_override)
+ mode_lib->vba.DETBufferSizeInKByte[mode_lib->vba.NumberOfActivePlanes] = src_k->det_size_override;
if (mode_lib->vba.SourceScan[mode_lib->vba.NumberOfActivePlanes]
== dm_horz) {
mode_lib->vba.ViewportWidth[mode_lib->vba.NumberOfActivePlanes] +=
@@ -675,6 +830,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
}
mode_lib->vba.NumberOfActivePlanes++;
+ mode_lib->vba.NumberOfActiveSurfaces++;
}
// handle overlays through BlendingAndTiming
@@ -700,6 +856,11 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
}
}
+ mode_lib->vba.SynchronizeTimingsFinal = pipes[0].pipe.dest.synchronize_timings;
+ mode_lib->vba.DCCProgrammingAssumesScanDirectionUnknownFinal = false;
+
+ mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = 0;
+
mode_lib->vba.UseUnboundedRequesting = dm_unbounded_requesting;
for (k = 0; k < mode_lib->vba.cache_num_pipes; ++k) {
if (pipes[k].pipe.src.unbounded_req_mode == 0)
@@ -743,6 +904,52 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
mode_lib->vba.GPUVMEnable = mode_lib->vba.GPUVMEnable && !!ip->gpuvm_enable;
mode_lib->vba.HostVMEnable = mode_lib->vba.HostVMEnable && !!ip->hostvm_enable;
+
+ for (k = 0; k < mode_lib->vba.cache_num_pipes; ++k) {
+ mode_lib->vba.ForceOneRowForFrame[k] = pipes[k].pipe.src.force_one_row_for_frame;
+ mode_lib->vba.PteBufferMode[k] = pipes[k].pipe.src.pte_buffer_mode;
+
+ if (mode_lib->vba.PteBufferMode[k] == 0 && mode_lib->vba.GPUVMEnable) {
+ if (mode_lib->vba.ForceOneRowForFrame[k] ||
+ (mode_lib->vba.GPUVMMinPageSizeKBytes[k] > 64*1024) ||
+ (mode_lib->vba.UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_disable) ||
+ (mode_lib->vba.UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable)) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ERROR: Invalid PteBufferMode=%d for plane %0d!\n",
+ __func__, mode_lib->vba.PteBufferMode[k], k);
+ dml_print("DML::%s: - ForceOneRowForFrame = %d\n",
+ __func__, mode_lib->vba.ForceOneRowForFrame[k]);
+ dml_print("DML::%s: - GPUVMMinPageSizeKBytes = %d\n",
+ __func__, mode_lib->vba.GPUVMMinPageSizeKBytes[k]);
+ dml_print("DML::%s: - UseMALLForPStateChange = %d\n",
+ __func__, (int) mode_lib->vba.UsesMALLForPStateChange[k]);
+ dml_print("DML::%s: - UseMALLForStaticScreen = %d\n",
+ __func__, (int) mode_lib->vba.UseMALLForStaticScreen[k]);
+#endif
+ ASSERT(0);
+ }
+ }
+ }
+}
+
+/**
+ * cache_debug_params: Cache any params that needed to be maintained from the initial validation
+ * for debug purposes.
+ *
+ * The DML getters can modify some of the VBA params that we are interested in (for example when
+ * calculating with dummy p-state latency), so cache any params here that we want for debugging
+ *
+ * @mode_lib: mode_lib input/output of validate call
+ *
+ * Return: void
+ *
+ */
+static void cache_debug_params(struct display_mode_lib *mode_lib)
+{
+ int k = 0;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; k++)
+ mode_lib->vba.CachedActiveDRAMClockChangeLatencyMargin[k] = mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
}
// in wm mode we pull the parameters needed from the display_e2e_pipe_params_st structs
@@ -768,7 +975,7 @@ static void recalculate_params(
}
}
-bool Calculate256BBlockSizes(
+void Calculate256BBlockSizes(
enum source_format_class SourcePixelFormat,
enum dm_swizzle_mode SurfaceTiling,
unsigned int BytePerPixelY,
@@ -806,7 +1013,6 @@ bool Calculate256BBlockSizes(
*BlockWidth256BytesY = 256 / BytePerPixelY / *BlockHeight256BytesY;
*BlockWidth256BytesC = 256 / BytePerPixelC / *BlockHeight256BytesC;
}
- return true;
}
bool CalculateMinAndMaxPrefetchMode(
@@ -872,6 +1078,7 @@ void ModeSupportAndSystemConfiguration(struct display_mode_lib *mode_lib)
soc_bounding_box_st *soc = &mode_lib->vba.soc;
unsigned int k;
unsigned int total_pipes = 0;
+ unsigned int pipe_idx = 0;
mode_lib->vba.VoltageLevel = mode_lib->vba.cache_pipes[0].clks_cfg.voltage;
mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBWPerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb];
@@ -892,7 +1099,17 @@ void ModeSupportAndSystemConfiguration(struct display_mode_lib *mode_lib)
// Total Available Pipes Support Check
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
+ pipe_idx = get_pipe_idx(mode_lib, k);
+ if (pipe_idx == -1) {
+ ASSERT(0);
+ continue; // skip inactive planes
+ }
total_pipes += mode_lib->vba.DPPPerPlane[k];
+
+ if (mode_lib->vba.cache_pipes[pipe_idx].clks_cfg.dppclk_mhz > 0.0)
+ mode_lib->vba.DPPCLK[k] = mode_lib->vba.cache_pipes[pipe_idx].clks_cfg.dppclk_mhz;
+ else
+ mode_lib->vba.DPPCLK[k] = soc->clock_limits[mode_lib->vba.VoltageLevel].dppclk_mhz;
}
ASSERT(total_pipes <= DC__NUM_DPP__MAX);
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
index 90e87961fe3e..07993741f5e6 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
@@ -58,6 +58,19 @@ dml_get_attr_decl(return_bw);
dml_get_attr_decl(tcalc);
dml_get_attr_decl(fraction_of_urgent_bandwidth);
dml_get_attr_decl(fraction_of_urgent_bandwidth_imm_flip);
+dml_get_attr_decl(cstate_max_cap_mode);
+dml_get_attr_decl(comp_buffer_size_kbytes);
+dml_get_attr_decl(pixel_chunk_size_in_kbyte);
+dml_get_attr_decl(alpha_pixel_chunk_size_in_kbyte);
+dml_get_attr_decl(meta_chunk_size_in_kbyte);
+dml_get_attr_decl(min_pixel_chunk_size_in_byte);
+dml_get_attr_decl(min_meta_chunk_size_in_byte);
+dml_get_attr_decl(fclk_watermark);
+dml_get_attr_decl(usr_retraining_watermark);
+dml_get_attr_decl(comp_buffer_reserved_space_kbytes);
+dml_get_attr_decl(comp_buffer_reserved_space_64bytes);
+dml_get_attr_decl(comp_buffer_reserved_space_zs);
+dml_get_attr_decl(unbounded_request_enabled);
#define dml_get_pipe_attr_decl(attr) double get_##attr(struct display_mode_lib *mode_lib, const display_e2e_pipe_params_st *pipes, unsigned int num_pipes, unsigned int which_pipe)
@@ -75,6 +88,26 @@ dml_get_pipe_attr_decl(dst_y_per_row_vblank);
dml_get_pipe_attr_decl(dst_y_prefetch);
dml_get_pipe_attr_decl(dst_y_per_vm_flip);
dml_get_pipe_attr_decl(dst_y_per_row_flip);
+dml_get_pipe_attr_decl(dst_y_per_pte_row_nom_l);
+dml_get_pipe_attr_decl(dst_y_per_pte_row_nom_c);
+dml_get_pipe_attr_decl(dst_y_per_meta_row_nom_l);
+dml_get_pipe_attr_decl(dst_y_per_meta_row_nom_c);
+dml_get_pipe_attr_decl(dpte_row_height_linear_c);
+dml_get_pipe_attr_decl(swath_height_l);
+dml_get_pipe_attr_decl(swath_height_c);
+dml_get_pipe_attr_decl(det_stored_buffer_size_l_bytes);
+dml_get_pipe_attr_decl(det_stored_buffer_size_c_bytes);
+dml_get_pipe_attr_decl(dpte_group_size_in_bytes);
+dml_get_pipe_attr_decl(vm_group_size_in_bytes);
+dml_get_pipe_attr_decl(det_buffer_size_kbytes);
+dml_get_pipe_attr_decl(dpte_row_height_linear_l);
+dml_get_pipe_attr_decl(refcyc_per_pte_group_nom_l_in_us);
+dml_get_pipe_attr_decl(refcyc_per_pte_group_nom_c_in_us);
+dml_get_pipe_attr_decl(refcyc_per_pte_group_vblank_l_in_us);
+dml_get_pipe_attr_decl(refcyc_per_pte_group_vblank_c_in_us);
+dml_get_pipe_attr_decl(refcyc_per_pte_group_flip_l_in_us);
+dml_get_pipe_attr_decl(refcyc_per_pte_group_flip_c_in_us);
+dml_get_pipe_attr_decl(pte_buffer_mode);
dml_get_pipe_attr_decl(refcyc_per_vm_group_vblank);
dml_get_pipe_attr_decl(refcyc_per_vm_group_flip);
dml_get_pipe_attr_decl(refcyc_per_vm_req_vblank);
@@ -108,6 +141,9 @@ dml_get_pipe_attr_decl(vupdate_width);
dml_get_pipe_attr_decl(vready_offset);
dml_get_pipe_attr_decl(vready_at_or_after_vsync);
dml_get_pipe_attr_decl(min_dst_y_next_start);
+dml_get_pipe_attr_decl(vstartup_calculated);
+dml_get_pipe_attr_decl(subviewport_lines_needed_in_mall);
+dml_get_pipe_attr_decl(surface_size_in_mall);
double get_total_immediate_flip_bytes(
struct display_mode_lib *mode_lib,
@@ -126,9 +162,18 @@ unsigned int dml_get_voltage_level(
const display_e2e_pipe_params_st *pipes,
unsigned int num_pipes);
+unsigned int get_total_surface_size_in_mall_bytes(
+ struct display_mode_lib *mode_lib,
+ const display_e2e_pipe_params_st *pipes,
+ unsigned int num_pipes);
+
+bool get_is_phantom_pipe(struct display_mode_lib *mode_lib,
+ const display_e2e_pipe_params_st *pipes,
+ unsigned int num_pipes,
+ unsigned int pipe_idx);
void PixelClockAdjustmentForProgressiveToInterlaceUnit(struct display_mode_lib *mode_lib);
-bool Calculate256BBlockSizes(
+void Calculate256BBlockSizes(
enum source_format_class SourcePixelFormat,
enum dm_swizzle_mode SurfaceTiling,
unsigned int BytePerPixelY,
@@ -138,6 +183,79 @@ bool Calculate256BBlockSizes(
unsigned int *BlockWidth256BytesY,
unsigned int *BlockWidth256BytesC);
+struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation {
+ unsigned int dummy_integer_array[2][DC__NUM_DPP__MAX];
+ double dummy_single_array[2][DC__NUM_DPP__MAX];
+ unsigned int dummy_long_array[2][DC__NUM_DPP__MAX];
+ double dummy_double_array[2][DC__NUM_DPP__MAX];
+ bool dummy_boolean_array[DC__NUM_DPP__MAX];
+ bool dummy_boolean;
+ bool dummy_boolean2;
+ enum output_encoder_class dummy_output_encoder_array[DC__NUM_DPP__MAX];
+ DmlPipe SurfaceParameters[DC__NUM_DPP__MAX];
+ bool dummy_boolean_array2[2][DC__NUM_DPP__MAX];
+ unsigned int ReorderBytes;
+ unsigned int VMDataOnlyReturnBW;
+ double HostVMInefficiencyFactor;
+ DmlPipe myPipe;
+ SOCParametersList mmSOCParameters;
+ double dummy_unit_vector[DC__NUM_DPP__MAX];
+ double dummy_single[2];
+ enum clock_change_support dummy_dramchange_support;
+ enum dm_fclock_change_support dummy_fclkchange_support;
+ bool dummy_USRRetrainingSupport;
+};
+
+struct dml32_ModeSupportAndSystemConfigurationFull {
+ unsigned int dummy_integer_array[22][DC__NUM_DPP__MAX];
+ double dummy_double_array[2][DC__NUM_DPP__MAX];
+ DmlPipe SurfParameters[DC__NUM_DPP__MAX];
+ double dummy_single[5];
+ double dummy_single2[5];
+ SOCParametersList mSOCParameters;
+ unsigned int MaximumSwathWidthSupportLuma;
+ unsigned int MaximumSwathWidthSupportChroma;
+ double DSTYAfterScaler[DC__NUM_DPP__MAX];
+ double DSTXAfterScaler[DC__NUM_DPP__MAX];
+ double MaxTotalVActiveRDBandwidth;
+ bool dummy_boolean_array[2][DC__NUM_DPP__MAX];
+ enum odm_combine_mode dummy_odm_mode[DC__NUM_DPP__MAX];
+ DmlPipe myPipe;
+ unsigned int dummy_integer[4];
+ unsigned int TotalNumberOfActiveOTG;
+ unsigned int TotalNumberOfActiveHDMIFRL;
+ unsigned int TotalNumberOfActiveDP2p0;
+ unsigned int TotalNumberOfActiveDP2p0Outputs;
+ unsigned int TotalDSCUnitsRequired;
+ unsigned int ReorderingBytes;
+ unsigned int TotalSlots;
+ unsigned int NumberOfDPPDSC;
+ unsigned int NumberOfDPPNoDSC;
+ unsigned int NextPrefetchModeState;
+ bool MPCCombineMethodAsNeededForPStateChangeAndVoltage;
+ bool MPCCombineMethodAsPossible;
+ bool FullFrameMALLPStateMethod;
+ bool SubViewportMALLPStateMethod;
+ bool PhantomPipeMALLPStateMethod;
+ bool NoChroma;
+ bool TotalAvailablePipesSupportNoDSC;
+ bool TotalAvailablePipesSupportDSC;
+ enum odm_combine_mode ODMModeNoDSC;
+ enum odm_combine_mode ODMModeDSC;
+ double RequiredDISPCLKPerSurfaceNoDSC;
+ double RequiredDISPCLKPerSurfaceDSC;
+ double BWOfNonCombinedSurfaceOfMaximumBandwidth;
+ double VMDataOnlyReturnBWPerState;
+ double HostVMInefficiencyFactor;
+ bool dummy_boolean[2];
+};
+
+struct dummy_vars {
+ struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation;
+ struct dml32_ModeSupportAndSystemConfigurationFull dml32_ModeSupportAndSystemConfigurationFull;
+};
+
struct vba_vars_st {
ip_params_st ip;
soc_bounding_box_st soc;
@@ -145,6 +263,7 @@ struct vba_vars_st {
int maxMpcComb;
bool UseMaximumVStartup;
+ double MaxVRatioPre;
double WritebackDISPCLK;
double DPPCLKUsingSingleDPPLuma;
double DPPCLKUsingSingleDPPChroma;
@@ -169,6 +288,7 @@ struct vba_vars_st {
double NextMaxVStartup;
double VBlankTime;
double SmallestVBlank;
+ enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal; // Mode Support only
double DCFCLKDeepSleepPerPlane[DC__NUM_DPP__MAX];
double EffectiveDETPlusLBLinesLuma;
double EffectiveDETPlusLBLinesChroma;
@@ -194,7 +314,9 @@ struct vba_vars_st {
unsigned int ActiveDPPs;
unsigned int LBLatencyHidingSourceLinesY;
unsigned int LBLatencyHidingSourceLinesC;
+ double ActiveDRAMClockChangeLatencyMarginPerState[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];// DML doesn't save active margin per state
double ActiveDRAMClockChangeLatencyMargin[DC__NUM_DPP__MAX];
+ double CachedActiveDRAMClockChangeLatencyMargin[DC__NUM_DPP__MAX]; // Cache in dml_get_voltage_level for debug purposes only
double MinActiveDRAMClockChangeMargin;
double InitFillLevel;
double FinalFillMargin;
@@ -211,6 +333,13 @@ struct vba_vars_st {
double UrgentLatencyPixelMixedWithVMData;
double UrgentLatencyVMDataOnly;
double UrgentLatency; // max of the above three
+ double USRRetrainingLatency;
+ double SMNLatency;
+ double FCLKChangeLatency;
+ unsigned int MALLAllocatedForDCNFinal;
+ double MaxAveragePercentOfIdealFabricBWDisplayCanUseInNormalSystemOperation;
+ double MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperationSTROBE;
+ double PercentOfIdealDRAMBWReceivedAfterUrgLatencySTROBE;
double WritebackLatency;
double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly; // Mode Support
double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData; // Mode Support
@@ -283,13 +412,47 @@ struct vba_vars_st {
double DPPCLKDelayCNVCCursor;
double DISPCLKDelaySubtotal;
bool ProgressiveToInterlaceUnitInOPP;
+ unsigned int CompressedBufferSegmentSizeInkByteFinal;
+ unsigned int CompbufReservedSpace64B;
+ unsigned int CompbufReservedSpaceZs;
+ unsigned int LineBufferSizeFinal;
+ unsigned int MaximumPixelsPerLinePerDSCUnit;
+ unsigned int AlphaPixelChunkSizeInKByte;
+ double MinPixelChunkSizeBytes;
+ unsigned int DCCMetaBufferSizeBytes;
// Pipe/Plane Parameters
+
+ /** @VoltageLevel:
+ * Every ASIC has a fixed number of DPM states, and some devices might
+ * have some particular voltage configuration that does not map
+ * directly to the DPM states. This field tells how many states the
+ * target device supports; even though this field combines the DPM and
+ * special SOC voltages, it mostly matches the total number of DPM
+ * states.
+ */
int VoltageLevel;
double FabricClock;
double DRAMSpeed;
double DISPCLK;
double SOCCLK;
double DCFCLK;
+ unsigned int MaxTotalDETInKByte;
+ unsigned int MinCompressedBufferSizeInKByte;
+ unsigned int NumberOfActiveSurfaces;
+ bool ViewportStationary[DC__NUM_DPP__MAX];
+ unsigned int RefreshRate[DC__NUM_DPP__MAX];
+ double OutputBPP[DC__NUM_DPP__MAX];
+ unsigned int GPUVMMinPageSizeKBytes[DC__NUM_DPP__MAX];
+ bool SynchronizeTimingsFinal;
+ bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal;
+ bool ForceOneRowForFrame[DC__NUM_DPP__MAX];
+ unsigned int ViewportXStartY[DC__NUM_DPP__MAX];
+ unsigned int ViewportXStartC[DC__NUM_DPP__MAX];
+ enum dm_rotation_angle SourceRotation[DC__NUM_DPP__MAX];
+ bool DRRDisplay[DC__NUM_DPP__MAX];
+ bool PteBufferMode[DC__NUM_DPP__MAX];
+ enum dm_output_type OutputType[DC__NUM_DPP__MAX];
+ enum dm_output_rate OutputRate[DC__NUM_DPP__MAX];
unsigned int NumberOfActivePlanes;
unsigned int NumberOfDSCSlices[DC__NUM_DPP__MAX];
@@ -354,6 +517,8 @@ struct vba_vars_st {
unsigned int CursorBPP[DC__NUM_DPP__MAX][DC__NUM_CURSOR__MAX];
bool XFCEnabled[DC__NUM_DPP__MAX];
bool ScalerEnabled[DC__NUM_DPP__MAX];
+ unsigned int VBlankNom[DC__NUM_DPP__MAX];
+ bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment;
// Intermediates/Informational
bool ImmediateFlipSupport;
@@ -391,6 +556,16 @@ struct vba_vars_st {
double StutterEfficiencyNotIncludingVBlank;
double NonUrgentLatencyTolerance;
double MinActiveDRAMClockChangeLatencySupported;
+ double Z8StutterEfficiencyBestCase;
+ unsigned int Z8NumberOfStutterBurstsPerFrameBestCase;
+ double Z8StutterEfficiencyNotIncludingVBlankBestCase;
+ double StutterPeriodBestCase;
+ Watermarks Watermark;
+ bool DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE;
+ unsigned int CompBufReservedSpaceKBytes;
+ unsigned int CompBufReservedSpace64B;
+ unsigned int CompBufReservedSpaceZs;
+ bool CompBufReservedSpaceNeedAdjustment;
// These are the clocks calcuated by the library but they are not actually
// used explicitly. They are fetched by tests and then possibly used. The
@@ -398,6 +573,10 @@ struct vba_vars_st {
double DISPCLK_calculated;
double DPPCLK_calculated[DC__NUM_DPP__MAX];
+ bool ImmediateFlipSupportedSurface[DC__NUM_DPP__MAX];
+
+ bool Use_One_Row_For_Frame[DC__NUM_DPP__MAX];
+ bool Use_One_Row_For_Frame_Flip[DC__NUM_DPP__MAX];
unsigned int VUpdateOffsetPix[DC__NUM_DPP__MAX];
double VUpdateWidthPix[DC__NUM_DPP__MAX];
double VReadyOffsetPix[DC__NUM_DPP__MAX];
@@ -428,6 +607,7 @@ struct vba_vars_st {
double DRAMSpeedPerState[DC__VOLTAGE_STATES];
double MaxDispclk[DC__VOLTAGE_STATES];
int VoltageOverrideLevel;
+ double PHYCLKD32PerState[DC__VOLTAGE_STATES];
/*outputs*/
bool ScaleRatioAndTapsSupport;
@@ -451,6 +631,51 @@ struct vba_vars_st {
bool PitchSupport;
enum dm_validation_status ValidationStatus[DC__VOLTAGE_STATES];
+ /* Mode Support Reason */
+ bool P2IWith420;
+ bool DSCOnlyIfNecessaryWithBPP;
+ bool DSC422NativeNotSupported;
+ bool LinkRateDoesNotMatchDPVersion;
+ bool LinkRateForMultistreamNotIndicated;
+ bool BPPForMultistreamNotIndicated;
+ bool MultistreamWithHDMIOreDP;
+ bool MSOOrODMSplitWithNonDPLink;
+ bool NotEnoughLanesForMSO;
+ bool ViewportExceedsSurface;
+
+ bool ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified;
+ bool ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe;
+ bool InvalidCombinationOfMALLUseForPStateAndStaticScreen;
+ bool InvalidCombinationOfMALLUseForPState;
+
+ enum dm_output_link_dp_rate OutputLinkDPRate[DC__NUM_DPP__MAX];
+ double PrefetchLinesYThisState[DC__NUM_DPP__MAX];
+ double PrefetchLinesCThisState[DC__NUM_DPP__MAX];
+ double meta_row_bandwidth_this_state[DC__NUM_DPP__MAX];
+ double dpte_row_bandwidth_this_state[DC__NUM_DPP__MAX];
+ double DPTEBytesPerRowThisState[DC__NUM_DPP__MAX];
+ double PDEAndMetaPTEBytesPerFrameThisState[DC__NUM_DPP__MAX];
+ double MetaRowBytesThisState[DC__NUM_DPP__MAX];
+ bool use_one_row_for_frame[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
+ bool use_one_row_for_frame_flip[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
+ bool use_one_row_for_frame_this_state[DC__NUM_DPP__MAX];
+ bool use_one_row_for_frame_flip_this_state[DC__NUM_DPP__MAX];
+
+ unsigned int OutputTypeAndRatePerState[DC__VOLTAGE_STATES][DC__NUM_DPP__MAX];
+ double RequiredDISPCLKPerSurface[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
+ unsigned int MacroTileHeightY[DC__NUM_DPP__MAX];
+ unsigned int MacroTileHeightC[DC__NUM_DPP__MAX];
+ unsigned int MacroTileWidthY[DC__NUM_DPP__MAX];
+ unsigned int MacroTileWidthC[DC__NUM_DPP__MAX];
+ bool ImmediateFlipRequiredFinal;
+ bool DCCProgrammingAssumesScanDirectionUnknownFinal;
+ bool EnoughWritebackUnits;
+ bool ODMCombine2To1SupportCheckOK[DC__VOLTAGE_STATES];
+ bool NumberOfDP2p0Support;
+ unsigned int MaxNumDP2p0Streams;
+ unsigned int MaxNumDP2p0Outputs;
+ enum dm_output_type OutputTypePerState[DC__VOLTAGE_STATES][DC__NUM_DPP__MAX];
+ enum dm_output_rate OutputRatePerState[DC__VOLTAGE_STATES][DC__NUM_DPP__MAX];
double WritebackLineBufferLumaBufferSize;
double WritebackLineBufferChromaBufferSize;
double WritebackMinHSCLRatio;
@@ -544,6 +769,8 @@ struct vba_vars_st {
bool DTBCLKRequiredMoreThanSupported[DC__VOLTAGE_STATES];
double UrgentRoundTripAndOutOfOrderLatencyPerState[DC__VOLTAGE_STATES];
bool ROBSupport[DC__VOLTAGE_STATES][2];
+ //based on rev 99: Dim DCCMetaBufferSizeSupport(NumberOfStates, 1) As Boolean
+ bool DCCMetaBufferSizeSupport[DC__VOLTAGE_STATES][2];
bool PTEBufferSizeNotExceeded[DC__VOLTAGE_STATES][2];
bool TotalVerticalActiveBandwidthSupport[DC__VOLTAGE_STATES][2];
double MaxTotalVerticalActiveAvailableBandwidth[DC__VOLTAGE_STATES][2];
@@ -585,8 +812,6 @@ struct vba_vars_st {
double PSCL_FACTOR[DC__NUM_DPP__MAX];
double PSCL_FACTOR_CHROMA[DC__NUM_DPP__MAX];
double MaximumVStartup[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
- unsigned int MacroTileWidthY[DC__NUM_DPP__MAX];
- unsigned int MacroTileWidthC[DC__NUM_DPP__MAX];
double AlignedDCCMetaPitch[DC__NUM_DPP__MAX];
double AlignedYPitch[DC__NUM_DPP__MAX];
double AlignedCPitch[DC__NUM_DPP__MAX];
@@ -644,8 +869,7 @@ struct vba_vars_st {
double dummy7[DC__NUM_DPP__MAX];
double dummy8[DC__NUM_DPP__MAX];
double dummy13[DC__NUM_DPP__MAX];
- unsigned int dummyinteger1ms[DC__NUM_DPP__MAX];
- double dummyinteger2ms[DC__NUM_DPP__MAX];
+ double dummy_double_array[2][DC__NUM_DPP__MAX];
unsigned int dummyinteger3[DC__NUM_DPP__MAX];
unsigned int dummyinteger4[DC__NUM_DPP__MAX];
unsigned int dummyinteger5;
@@ -655,14 +879,8 @@ struct vba_vars_st {
unsigned int dummyinteger9;
unsigned int dummyinteger10;
unsigned int dummyinteger11;
- unsigned int dummyinteger12;
- unsigned int dummyinteger30;
- unsigned int dummyinteger31;
- unsigned int dummyinteger32;
- unsigned int dummyintegerarr1[DC__NUM_DPP__MAX];
- unsigned int dummyintegerarr2[DC__NUM_DPP__MAX];
- unsigned int dummyintegerarr3[DC__NUM_DPP__MAX];
- unsigned int dummyintegerarr4[DC__NUM_DPP__MAX];
+ unsigned int dummy_integer_array[8][DC__NUM_DPP__MAX];
+
bool dummysinglestring;
bool SingleDPPViewportSizeSupportPerPlane[DC__NUM_DPP__MAX];
double PlaneRequiredDISPCLKWithODMCombine2To1;
@@ -802,7 +1020,6 @@ struct vba_vars_st {
double TimePerChromaMetaChunkFlip[DC__NUM_DPP__MAX];
unsigned int DCCCMaxUncompressedBlock[DC__NUM_DPP__MAX];
unsigned int DCCCMaxCompressedBlock[DC__NUM_DPP__MAX];
- unsigned int DCCCIndependent64ByteBlock[DC__NUM_DPP__MAX];
double VStartupMargin;
bool NotEnoughTimeForDynamicMetadata[DC__NUM_DPP__MAX];
@@ -835,6 +1052,7 @@ struct vba_vars_st {
double MinFullDETBufferingTime;
double AverageReadBandwidthGBytePerSecond;
bool FirstMainPlane;
+ bool NotEnoughDETSwathFillLatencyHiding;
unsigned int ViewportWidthChroma[DC__NUM_DPP__MAX];
unsigned int ViewportHeightChroma[DC__NUM_DPP__MAX];
@@ -893,8 +1111,8 @@ struct vba_vars_st {
double meta_row_bandwidth[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
double DETBufferSizeYAllStates[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
double DETBufferSizeCAllStates[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
- int swath_width_luma_ub_all_states[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
- int swath_width_chroma_ub_all_states[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
+ unsigned int swath_width_luma_ub_all_states[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
+ unsigned int swath_width_chroma_ub_all_states[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
bool NotUrgentLatencyHiding[DC__VOLTAGE_STATES][2];
unsigned int SwathHeightYAllStates[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
unsigned int SwathHeightCAllStates[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
@@ -907,8 +1125,6 @@ struct vba_vars_st {
double WritebackDelayTime[DC__NUM_DPP__MAX];
unsigned int DCCYIndependentBlock[DC__NUM_DPP__MAX];
unsigned int DCCCIndependentBlock[DC__NUM_DPP__MAX];
- unsigned int dummyinteger15;
- unsigned int dummyinteger16;
unsigned int dummyinteger17;
unsigned int dummyinteger18;
unsigned int dummyinteger19;
@@ -949,7 +1165,7 @@ struct vba_vars_st {
double UrgBurstFactorLumaPre[DC__NUM_DPP__MAX];
double UrgBurstFactorChromaPre[DC__NUM_DPP__MAX];
bool NotUrgentLatencyHidingPre[DC__NUM_DPP__MAX];
- bool LinkCapacitySupport[DC__NUM_DPP__MAX];
+ bool LinkCapacitySupport[DC__VOLTAGE_STATES];
bool VREADY_AT_OR_AFTER_VSYNC[DC__NUM_DPP__MAX];
unsigned int MIN_DST_Y_NEXT_START[DC__NUM_DPP__MAX];
unsigned int VFrontPorch[DC__NUM_DPP__MAX];
@@ -969,6 +1185,59 @@ struct vba_vars_st {
int Z8NumberOfStutterBurstsPerFrame;
unsigned int MaximumDSCBitsPerComponent;
unsigned int NotEnoughUrgentLatencyHidingA[DC__VOLTAGE_STATES][2];
+ double ReadBandwidthSurfaceLuma[DC__NUM_DPP__MAX];
+ double ReadBandwidthSurfaceChroma[DC__NUM_DPP__MAX];
+ double SurfaceRequiredDISPCLKWithoutODMCombine;
+ double SurfaceRequiredDISPCLK;
+ double MinActiveFCLKChangeLatencySupported;
+ int MinVoltageLevel;
+ int MaxVoltageLevel;
+ unsigned int TotalNumberOfSingleDPPSurfaces[DC__VOLTAGE_STATES][2];
+ unsigned int CompressedBufferSizeInkByteAllStates[DC__VOLTAGE_STATES][2];
+ unsigned int DETBufferSizeInKByteAllStates[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
+ unsigned int DETBufferSizeInKByteThisState[DC__NUM_DPP__MAX];
+ unsigned int SurfaceSizeInMALL[DC__NUM_DPP__MAX];
+ bool ExceededMALLSize;
+ bool PTE_BUFFER_MODE[DC__NUM_DPP__MAX];
+ unsigned int BIGK_FRAGMENT_SIZE[DC__NUM_DPP__MAX];
+ unsigned int CompressedBufferSizeInkByteThisState;
+ enum dm_fclock_change_support FCLKChangeSupport[DC__VOLTAGE_STATES][2];
+ bool USRRetrainingSupport[DC__VOLTAGE_STATES][2];
+ enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[DC__NUM_DPP__MAX];
+ bool UnboundedRequestEnabledAllStates[DC__VOLTAGE_STATES][2];
+ bool SingleDPPViewportSizeSupportPerSurface[DC__NUM_DPP__MAX];
+ enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[DC__NUM_DPP__MAX];
+ bool UnboundedRequestEnabledThisState;
+ bool DRAMClockChangeRequirementFinal;
+ bool FCLKChangeRequirementFinal;
+ bool USRRetrainingRequiredFinal;
+ unsigned int DETSizeOverride[DC__NUM_DPP__MAX];
+ unsigned int nomDETInKByte;
+ enum mpc_combine_affinity MPCCombineUse[DC__NUM_DPP__MAX];
+ bool MPCCombineMethodIncompatible;
+ unsigned int RequiredSlots[DC__VOLTAGE_STATES][DC__NUM_DPP__MAX];
+ bool ExceededMultistreamSlots[DC__VOLTAGE_STATES];
+ enum odm_combine_policy ODMUse[DC__NUM_DPP__MAX];
+ unsigned int OutputMultistreamId[DC__NUM_DPP__MAX];
+ bool OutputMultistreamEn[DC__NUM_DPP__MAX];
+ bool UsesMALLForStaticScreen[DC__NUM_DPP__MAX];
+ double MaxActiveDRAMClockChangeLatencySupported[DC__NUM_DPP__MAX];
+ double WritebackAllowFCLKChangeEndPosition[DC__NUM_DPP__MAX];
+ bool PTEBufferSizeNotExceededPerState[DC__NUM_DPP__MAX]; // new in DML32
+ bool DCCMetaBufferSizeNotExceededPerState[DC__NUM_DPP__MAX]; // new in DML32
+ bool NotEnoughDSCSlices[DC__VOLTAGE_STATES];
+ bool PixelsPerLinePerDSCUnitSupport[DC__VOLTAGE_STATES];
+ bool DCCMetaBufferSizeNotExceeded[DC__VOLTAGE_STATES][2];
+ unsigned int dpte_row_height_linear[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_height_linear_chroma[DC__NUM_DPP__MAX];
+ unsigned int BlockHeightY[DC__NUM_DPP__MAX];
+ unsigned int BlockHeightC[DC__NUM_DPP__MAX];
+ unsigned int BlockWidthY[DC__NUM_DPP__MAX];
+ unsigned int BlockWidthC[DC__NUM_DPP__MAX];
+ unsigned int SubViewportLinesNeededInMALL[DC__NUM_DPP__MAX];
+ bool VActiveBandwithSupport[DC__VOLTAGE_STATES][2];
+ bool NotEnoughDETSwathFillLatencyHidingPerState[DC__VOLTAGE_STATES][2];
+ struct dummy_vars dummy_vars;
};
bool CalculateMinAndMaxPrefetchMode(
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c
index 71ea503cb32f..12ff65b6a7e5 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c
@@ -122,17 +122,6 @@ void print__data_rq_misc_params_st(struct display_mode_lib *mode_lib, const stru
dml_print("DML_RQ_DLG_CALC: =====================================\n");
}
-void print__rq_dlg_params_st(struct display_mode_lib *mode_lib, const struct _vcs_dpi_display_rq_dlg_params_st *rq_dlg_param)
-{
- dml_print("DML_RQ_DLG_CALC: =====================================\n");
- dml_print("DML_RQ_DLG_CALC: DISPLAY_RQ_DLG_PARAM_ST\n");
- dml_print("DML_RQ_DLG_CALC: <LUMA>\n");
- print__data_rq_dlg_params_st(mode_lib, &rq_dlg_param->rq_l);
- dml_print("DML_RQ_DLG_CALC: <CHROMA>\n");
- print__data_rq_dlg_params_st(mode_lib, &rq_dlg_param->rq_c);
- dml_print("DML_RQ_DLG_CALC: =====================================\n");
-}
-
void print__dlg_sys_params_st(struct display_mode_lib *mode_lib, const struct _vcs_dpi_display_dlg_sys_params_st *dlg_sys_param)
{
dml_print("DML_RQ_DLG_CALC: =====================================\n");
@@ -142,9 +131,6 @@ void print__dlg_sys_params_st(struct display_mode_lib *mode_lib, const struct _v
dml_print("DML_RQ_DLG_CALC: t_sr_wm_us = %3.2f\n", dlg_sys_param->t_sr_wm_us);
dml_print("DML_RQ_DLG_CALC: t_extra_us = %3.2f\n", dlg_sys_param->t_extra_us);
dml_print(
- "DML_RQ_DLG_CALC: t_srx_delay_us = %3.2f\n",
- dlg_sys_param->t_srx_delay_us);
- dml_print(
"DML_RQ_DLG_CALC: deepsleep_dcfclk_mhz = %3.2f\n",
dlg_sys_param->deepsleep_dcfclk_mhz);
dml_print(
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.h b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.h
index ebcd717744e5..2bc64c4081dc 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.h
@@ -35,7 +35,6 @@ void print__rq_params_st(struct display_mode_lib *mode_lib, const struct _vcs_dp
void print__data_rq_sizing_params_st(struct display_mode_lib *mode_lib, const struct _vcs_dpi_display_data_rq_sizing_params_st *rq_sizing);
void print__data_rq_dlg_params_st(struct display_mode_lib *mode_lib, const struct _vcs_dpi_display_data_rq_dlg_params_st *rq_dlg_param);
void print__data_rq_misc_params_st(struct display_mode_lib *mode_lib, const struct _vcs_dpi_display_data_rq_misc_params_st *rq_misc_param);
-void print__rq_dlg_params_st(struct display_mode_lib *mode_lib, const struct _vcs_dpi_display_rq_dlg_params_st *rq_dlg_param);
void print__dlg_sys_params_st(struct display_mode_lib *mode_lib, const struct _vcs_dpi_display_dlg_sys_params_st *dlg_sys_param);
void print__data_rq_regs_st(struct display_mode_lib *mode_lib, const struct _vcs_dpi_display_data_rq_regs_st *rq_regs);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c
index 59dc2c5b58dd..88dc2b97e7bf 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c
@@ -39,7 +39,7 @@
static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma)
{
- unsigned int ret_val = 0;
+ unsigned int ret_val = 1;
if (source_format == dm_444_16) {
if (!is_chroma)
@@ -559,12 +559,11 @@ static void get_surf_rq_param(
const struct _vcs_dpi_display_pipe_source_params_st *pipe_src_param,
bool is_chroma)
{
- bool mode_422 = 0;
unsigned int vp_width = 0;
unsigned int vp_height = 0;
unsigned int data_pitch = 0;
unsigned int meta_pitch = 0;
- unsigned int ppe = mode_422 ? 2 : 1;
+ unsigned int ppe = 1;
bool surf_linear;
bool surf_vert;
unsigned int bytes_per_element;
@@ -1331,10 +1330,6 @@ void dml1_rq_dlg_get_dlg_params(
if (dual_plane)
DTRACE("DLG: %s: swath_height_c = %d", __func__, swath_height_c);
- DTRACE(
- "DLG: %s: t_srx_delay_us = %3.2f",
- __func__,
- (double) dlg_sys_param->t_srx_delay_us);
DTRACE("DLG: %s: line_time_in_us = %3.2f", __func__, (double) line_time_in_us);
DTRACE("DLG: %s: vupdate_offset = %d", __func__, vupdate_offset);
DTRACE("DLG: %s: vupdate_width = %d", __func__, vupdate_width);
@@ -1600,11 +1595,6 @@ void dml1_rq_dlg_get_dlg_params(
swath_width_pixels_ub_c = swath_width_ub_c * 1;
}
- hscale_pixel_rate_l = 0.;
- hscale_pixel_rate_c = 0.;
- min_hratio_fact_l = 1.0;
- min_hratio_fact_c = 1.0;
-
if (htaps_l <= 1)
min_hratio_fact_l = 2.0;
else if (htaps_l <= 6) {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
index 479d7d83220c..6b2ab4ec2b5f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
@@ -66,24 +66,23 @@ static inline double dml_max5(double a, double b, double c, double d, double e)
static inline double dml_ceil(double a, double granularity)
{
+ if (granularity == 0)
+ return 0;
return (double) dcn_bw_ceil2(a, granularity);
}
static inline double dml_floor(double a, double granularity)
{
+ if (granularity == 0)
+ return 0;
return (double) dcn_bw_floor2(a, granularity);
}
static inline double dml_round(double a)
{
- double round_pt = 0.5;
- double ceil = dml_ceil(a, 1);
- double floor = dml_floor(a, 1);
+ const double round_pt = 0.5;
- if (a - floor >= round_pt)
- return ceil;
- else
- return floor;
+ return dml_floor(a + round_pt, 1);
}
/* float
@@ -119,11 +118,15 @@ static inline double dml_ceil_2(double f)
static inline double dml_ceil_ex(double x, double granularity)
{
+ if (granularity == 0)
+ return 0;
return (double) dcn_bw_ceil2(x, granularity);
}
static inline double dml_floor_ex(double x, double granularity)
{
+ if (granularity == 0)
+ return 0;
return (double) dcn_bw_floor2(x, granularity);
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dsc/qp_tables.h b/drivers/gpu/drm/amd/display/dc/dml/dsc/qp_tables.h
index e5fac9f4181d..dcff0dd2b6a1 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dsc/qp_tables.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dsc/qp_tables.h
@@ -25,7 +25,7 @@
*/
-const qp_table qp_table_422_10bpc_min = {
+static const qp_table qp_table_422_10bpc_min = {
{ 6, { 0, 4, 5, 6, 6, 6, 6, 7, 7, 8, 9, 9, 9, 12, 16} },
{ 6.5, { 0, 4, 5, 6, 6, 6, 6, 7, 7, 8, 9, 9, 9, 12, 16} },
{ 7, { 0, 4, 5, 6, 6, 6, 6, 7, 7, 7, 9, 9, 9, 11, 15} },
@@ -58,7 +58,7 @@ const qp_table qp_table_422_10bpc_min = {
};
-const qp_table qp_table_444_8bpc_max = {
+static const qp_table qp_table_444_8bpc_max = {
{ 6, { 4, 6, 8, 8, 9, 9, 9, 10, 11, 12, 12, 12, 12, 13, 15} },
{ 6.5, { 4, 6, 7, 8, 8, 8, 9, 10, 11, 11, 12, 12, 12, 13, 15} },
{ 7, { 4, 5, 7, 7, 8, 8, 8, 9, 10, 11, 11, 12, 12, 13, 14} },
@@ -99,7 +99,7 @@ const qp_table qp_table_444_8bpc_max = {
};
-const qp_table qp_table_420_12bpc_max = {
+static const qp_table qp_table_420_12bpc_max = {
{ 4, {11, 12, 13, 14, 15, 15, 15, 16, 17, 18, 18, 19, 19, 21, 22} },
{ 4.5, {10, 11, 12, 13, 14, 15, 15, 16, 17, 18, 18, 19, 19, 20, 21} },
{ 5, { 9, 11, 12, 13, 14, 15, 15, 16, 17, 17, 18, 18, 19, 20, 21} },
@@ -132,7 +132,7 @@ const qp_table qp_table_420_12bpc_max = {
};
-const qp_table qp_table_444_10bpc_min = {
+static const qp_table qp_table_444_10bpc_min = {
{ 6, { 0, 4, 7, 7, 9, 9, 9, 9, 9, 10, 10, 10, 10, 12, 18} },
{ 6.5, { 0, 4, 6, 7, 8, 8, 9, 9, 9, 9, 10, 10, 10, 12, 18} },
{ 7, { 0, 4, 6, 6, 8, 8, 8, 8, 8, 9, 9, 10, 10, 12, 17} },
@@ -185,7 +185,7 @@ const qp_table qp_table_444_10bpc_min = {
};
-const qp_table qp_table_420_8bpc_max = {
+static const qp_table qp_table_420_8bpc_max = {
{ 4, { 4, 4, 5, 6, 7, 7, 7, 8, 9, 10, 10, 11, 11, 13, 14} },
{ 4.5, { 4, 4, 5, 6, 7, 7, 7, 8, 9, 10, 10, 11, 11, 12, 13} },
{ 5, { 3, 4, 5, 6, 7, 7, 7, 8, 9, 9, 10, 10, 11, 12, 13} },
@@ -206,7 +206,7 @@ const qp_table qp_table_420_8bpc_max = {
};
-const qp_table qp_table_444_8bpc_min = {
+static const qp_table qp_table_444_8bpc_min = {
{ 6, { 0, 1, 3, 3, 5, 5, 5, 5, 5, 6, 6, 6, 6, 9, 14} },
{ 6.5, { 0, 1, 2, 3, 4, 4, 5, 5, 5, 5, 6, 6, 6, 9, 14} },
{ 7, { 0, 0, 2, 2, 4, 4, 4, 4, 4, 5, 5, 6, 6, 9, 13} },
@@ -247,7 +247,7 @@ const qp_table qp_table_444_8bpc_min = {
};
-const qp_table qp_table_444_12bpc_min = {
+static const qp_table qp_table_444_12bpc_min = {
{ 6, { 0, 5, 11, 11, 13, 13, 13, 13, 13, 14, 14, 14, 14, 17, 22} },
{ 6.5, { 0, 5, 10, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 17, 22} },
{ 7, { 0, 5, 10, 10, 12, 12, 12, 12, 12, 13, 13, 14, 14, 17, 21} },
@@ -312,7 +312,7 @@ const qp_table qp_table_444_12bpc_min = {
};
-const qp_table qp_table_420_12bpc_min = {
+static const qp_table qp_table_420_12bpc_min = {
{ 4, { 0, 4, 9, 10, 11, 11, 11, 11, 11, 11, 13, 13, 13, 15, 21} },
{ 4.5, { 0, 4, 8, 9, 10, 11, 11, 11, 11, 11, 13, 13, 13, 15, 20} },
{ 5, { 0, 4, 8, 9, 10, 11, 11, 11, 11, 11, 13, 13, 13, 15, 20} },
@@ -345,7 +345,7 @@ const qp_table qp_table_420_12bpc_min = {
};
-const qp_table qp_table_422_12bpc_min = {
+static const qp_table qp_table_422_12bpc_min = {
{ 6, { 0, 4, 9, 10, 11, 11, 11, 11, 11, 11, 13, 13, 13, 16, 20} },
{ 6.5, { 0, 4, 9, 10, 11, 11, 11, 11, 11, 11, 13, 13, 13, 16, 20} },
{ 7, { 0, 4, 9, 10, 11, 11, 11, 11, 11, 11, 13, 13, 13, 15, 19} },
@@ -386,7 +386,7 @@ const qp_table qp_table_422_12bpc_min = {
};
-const qp_table qp_table_422_12bpc_max = {
+static const qp_table qp_table_422_12bpc_max = {
{ 6, {12, 12, 13, 14, 15, 15, 15, 16, 17, 18, 18, 19, 19, 20, 21} },
{ 6.5, {12, 12, 13, 14, 15, 15, 15, 16, 17, 18, 18, 19, 19, 20, 21} },
{ 7, {11, 12, 13, 14, 15, 15, 15, 16, 17, 17, 18, 18, 19, 19, 20} },
@@ -427,7 +427,7 @@ const qp_table qp_table_422_12bpc_max = {
};
-const qp_table qp_table_444_12bpc_max = {
+static const qp_table qp_table_444_12bpc_max = {
{ 6, {12, 14, 16, 16, 17, 17, 17, 18, 19, 20, 20, 20, 20, 21, 23} },
{ 6.5, {12, 14, 15, 16, 16, 16, 17, 18, 19, 19, 20, 20, 20, 21, 23} },
{ 7, {12, 13, 15, 15, 16, 16, 16, 17, 18, 19, 19, 20, 20, 21, 22} },
@@ -492,7 +492,7 @@ const qp_table qp_table_444_12bpc_max = {
};
-const qp_table qp_table_420_8bpc_min = {
+static const qp_table qp_table_420_8bpc_min = {
{ 4, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 9, 13} },
{ 4.5, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 8, 12} },
{ 5, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 8, 12} },
@@ -513,7 +513,7 @@ const qp_table qp_table_420_8bpc_min = {
};
-const qp_table qp_table_422_8bpc_min = {
+static const qp_table qp_table_422_8bpc_min = {
{ 6, { 0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 5, 5, 5, 8, 12} },
{ 6.5, { 0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 5, 5, 5, 8, 12} },
{ 7, { 0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 11} },
@@ -538,7 +538,7 @@ const qp_table qp_table_422_8bpc_min = {
};
-const qp_table qp_table_422_10bpc_max = {
+static const qp_table qp_table_422_10bpc_max = {
{ 6, { 8, 8, 9, 10, 11, 11, 11, 12, 13, 14, 14, 15, 15, 16, 17} },
{ 6.5, { 8, 8, 9, 10, 11, 11, 11, 12, 13, 14, 14, 15, 15, 16, 17} },
{ 7, { 7, 8, 9, 10, 11, 11, 11, 12, 13, 13, 14, 14, 15, 15, 16} },
@@ -571,7 +571,7 @@ const qp_table qp_table_422_10bpc_max = {
};
-const qp_table qp_table_420_10bpc_max = {
+static const qp_table qp_table_420_10bpc_max = {
{ 4, { 8, 8, 9, 10, 11, 11, 11, 12, 13, 14, 14, 15, 15, 17, 18} },
{ 4.5, { 8, 8, 9, 10, 11, 11, 11, 12, 13, 14, 14, 15, 15, 16, 17} },
{ 5, { 7, 8, 9, 10, 11, 11, 11, 12, 13, 13, 14, 14, 15, 16, 17} },
@@ -598,7 +598,7 @@ const qp_table qp_table_420_10bpc_max = {
};
-const qp_table qp_table_420_10bpc_min = {
+static const qp_table qp_table_420_10bpc_min = {
{ 4, { 0, 4, 4, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 13, 17} },
{ 4.5, { 0, 4, 4, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 12, 16} },
{ 5, { 0, 4, 4, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 12, 16} },
@@ -625,7 +625,7 @@ const qp_table qp_table_420_10bpc_min = {
};
-const qp_table qp_table_444_10bpc_max = {
+static const qp_table qp_table_444_10bpc_max = {
{ 6, { 8, 10, 12, 12, 13, 13, 13, 14, 15, 16, 16, 16, 16, 17, 19} },
{ 6.5, { 8, 10, 11, 12, 12, 12, 13, 14, 15, 15, 16, 16, 16, 17, 19} },
{ 7, { 8, 9, 11, 11, 12, 12, 12, 13, 14, 15, 15, 16, 16, 17, 18} },
@@ -678,7 +678,7 @@ const qp_table qp_table_444_10bpc_max = {
};
-const qp_table qp_table_422_8bpc_max = {
+static const qp_table qp_table_422_8bpc_max = {
{ 6, { 4, 4, 5, 6, 7, 7, 7, 8, 9, 10, 10, 11, 11, 12, 13} },
{ 6.5, { 4, 4, 5, 6, 7, 7, 7, 8, 9, 10, 10, 11, 11, 12, 13} },
{ 7, { 3, 4, 5, 6, 7, 7, 7, 8, 9, 9, 10, 10, 11, 11, 12} },
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.c
index 3ee858f311d1..ef75eb7d5adc 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.c
@@ -61,16 +61,6 @@ static double dsc_roundf(double num)
return (int)(num);
}
-static double dsc_ceil(double num)
-{
- double retval = (int)num;
-
- if (retval != num && num > 0)
- retval = num + 1;
-
- return (int)retval;
-}
-
static void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc,
enum max_min max_min, float bpp)
{
@@ -78,7 +68,7 @@ static void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc,
int sel = table_hash(mode, bpc, max_min);
int table_size = 0;
int index;
- const struct qp_entry *table = 0L;
+ const struct qp_entry *table = NULL;
// alias enum
enum { min = DAL_MM_MIN, max = DAL_MM_MAX };
@@ -103,7 +93,7 @@ static void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc,
TABLE_CASE(420, 12, min);
}
- if (table == 0)
+ if (!table)
return;
index = (bpp - table[0].bpp) * 2;
@@ -268,24 +258,3 @@ void _do_calc_rc_params(struct rc_params *rc,
rc->rc_buf_thresh[13] = 8064;
}
-u32 _do_bytes_per_pixel_calc(int slice_width,
- u16 drm_bpp,
- bool is_navite_422_or_420)
-{
- float bpp;
- u32 bytes_per_pixel;
- double d_bytes_per_pixel;
-
- dc_assert_fp_enabled();
-
- bpp = ((float)drm_bpp / 16.0);
- d_bytes_per_pixel = dsc_ceil(bpp * slice_width / 8.0) / slice_width;
- // TODO: Make sure the formula for calculating this is precise (ceiling
- // vs. floor, and at what point they should be applied)
- if (is_navite_422_or_420)
- d_bytes_per_pixel /= 2;
-
- bytes_per_pixel = (u32)dsc_ceil(d_bytes_per_pixel * 0x10000000);
-
- return bytes_per_pixel;
-}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.h
index b93b95409fbe..d7cd8cc24758 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.h
@@ -27,7 +27,7 @@
#define __RC_CALC_FPU_H__
#include "os_types.h"
-#include <drm/drm_dsc.h>
+#include <drm/display/drm_dsc.h>
#define QP_SET_SIZE 15
@@ -78,10 +78,6 @@ struct qp_entry {
typedef struct qp_entry qp_table[];
-u32 _do_bytes_per_pixel_calc(int slice_width,
- u16 drm_bpp,
- bool is_navite_422_or_420);
-
void _do_calc_rc_params(struct rc_params *rc,
enum colour_mode cm,
enum bits_per_comp bpc,
diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/Makefile b/drivers/gpu/drm/amd/display/dc/dml2_0/Makefile
new file mode 100644
index 000000000000..97e068b6bf6b
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2_0/Makefile
@@ -0,0 +1,140 @@
+# SPDX-License-Identifier: MIT */
+#
+# Copyright 2023 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# Authors: AMD
+#
+# Makefile for dml2.
+
+dml2_ccflags := $(CC_FLAGS_FPU)
+dml2_rcflags := $(CC_FLAGS_NO_FPU)
+
+ifneq ($(CONFIG_FRAME_WARN),0)
+ ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y)
+ ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_COMPILE_TEST),yy)
+ frame_warn_limit := 4096
+ else
+ frame_warn_limit := 3072
+ endif
+ else
+ frame_warn_limit := 2056
+ endif
+
+ ifeq ($(call test-lt, $(CONFIG_FRAME_WARN), $(frame_warn_limit)),y)
+ frame_warn_flag := -Wframe-larger-than=$(frame_warn_limit)
+ endif
+endif
+
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2_0
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2_0/dml21/src/dml2_core
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2_0/dml21/src/dml2_mcg/
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2_0/dml21/src/dml2_dpmm/
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2_0/dml21/src/dml2_pmo/
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2_0/dml21/src/dml2_standalone_libraries/
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2_0/dml21/src/inc
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2_0/dml21/inc
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2_0/dml21/
+
+CFLAGS_$(AMDDALPATH)/dc/dml2_0/display_mode_core.o := $(dml2_ccflags) $(frame_warn_flag)
+CFLAGS_$(AMDDALPATH)/dc/dml2_0/display_mode_util.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml2_wrapper.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml2_utils.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml2_policy.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml2_translation_helper.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml2_mall_phantom.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml_display_rq_dlg_calc.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml2_dc_resource_mgmt.o := $(dml2_ccflags)
+
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/display_mode_core.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/display_mode_util.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml2_wrapper.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml2_utils.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml2_policy.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml2_translation_helper.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml2_mall_phantom.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml_display_rq_dlg_calc.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml2_dc_resource_mgmt.o := $(dml2_rcflags)
+
+DML2 = display_mode_core.o display_mode_util.o dml2_wrapper.o \
+ dml2_utils.o dml2_policy.o dml2_translation_helper.o dml2_dc_resource_mgmt.o dml2_mall_phantom.o \
+ dml_display_rq_dlg_calc.o
+
+AMD_DAL_DML2 = $(addprefix $(AMDDALPATH)/dc/dml2_0/,$(DML2))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DML2)
+
+CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_ccflags) $(frame_warn_flag)
+CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.o := $(dml2_ccflags) $(frame_warn_flag)
+CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_top/dml2_top_interfaces.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_dcn4.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn3.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml21_wrapper.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/dml21_translation_helper.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/dml21_utils.o := $(dml2_ccflags)
+
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_top/dml2_top_interfaces.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_dcn4.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn3.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml21_wrapper.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/dml21_translation_helper.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/dml21_utils.o := $(dml2_rcflags)
+
+DML21 := src/dml2_top/dml2_top_interfaces.o
+DML21 += src/dml2_top/dml2_top_soc15.o
+DML21 += src/dml2_core/dml2_core_dcn4.o
+DML21 += src/dml2_core/dml2_core_utils.o
+DML21 += src/dml2_core/dml2_core_factory.o
+DML21 += src/dml2_core/dml2_core_dcn4_calcs.o
+DML21 += src/dml2_dpmm/dml2_dpmm_dcn4.o
+DML21 += src/dml2_dpmm/dml2_dpmm_factory.o
+DML21 += src/dml2_mcg/dml2_mcg_dcn4.o
+DML21 += src/dml2_mcg/dml2_mcg_factory.o
+DML21 += src/dml2_pmo/dml2_pmo_dcn3.o
+DML21 += src/dml2_pmo/dml2_pmo_factory.o
+DML21 += src/dml2_pmo/dml2_pmo_dcn4_fams2.o
+DML21 += src/dml2_standalone_libraries/lib_float_math.o
+DML21 += dml21_translation_helper.o
+DML21 += dml21_wrapper.o
+DML21 += dml21_utils.o
+
+AMD_DAL_DML21 = $(addprefix $(AMDDALPATH)/dc/dml2_0/dml21/,$(DML21))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DML21)
diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/cmntypes.h b/drivers/gpu/drm/amd/display/dc/dml2_0/cmntypes.h
new file mode 100644
index 000000000000..b954c9648fbe
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2_0/cmntypes.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __CMNTYPES_H__
+#define __CMNTYPES_H__
+
+#ifdef __GNUC__
+#if __GNUC__ == 4 && __GNUC_MINOR__ > 7
+typedef unsigned int uint;
+#endif
+#endif
+
+typedef signed char int8, *pint8;
+typedef signed short int16, *pint16;
+typedef signed int int32, *pint32;
+typedef signed int64, *pint64;
+
+typedef unsigned char uint8, *puint8;
+typedef unsigned short uint16, *puint16;
+typedef unsigned int uint32, *puint32;
+typedef unsigned uint64, *puint64;
+
+typedef unsigned long int ulong;
+typedef unsigned char uchar;
+typedef unsigned int uint;
+
+typedef void *pvoid;
+typedef char *pchar;
+typedef const void *const_pvoid;
+typedef const char *const_pchar;
+
+typedef struct rgba_struct {
+ uint8 a;
+ uint8 r;
+ uint8 g;
+ uint8 b;
+} rgba_t;
+
+typedef struct {
+ uint8 blue;
+ uint8 green;
+ uint8 red;
+ uint8 alpha;
+} gen_color_t;
+
+typedef union {
+ uint32 val;
+ gen_color_t f;
+} gen_color_u;
+
+//
+// Types to make it easy to get or set the bits of a float/double.
+// Avoids automatic casting from int to float and back.
+//
+#if 0
+typedef union {
+ uint32 i;
+ float f;
+} uintfloat32;
+
+typedef union {
+ uint64 i;
+ double f;
+} uintfloat64;
+
+#ifndef UNREFERENCED_PARAMETER
+#define UNREFERENCED_PARAMETER(x) (x = x)
+#endif
+#endif
+
+#endif //__CMNTYPES_H__
diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.c
new file mode 100644
index 000000000000..c468f492b876
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.c
@@ -0,0 +1,10337 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "display_mode_core.h"
+#include "display_mode_util.h"
+#include "display_mode_lib_defines.h"
+
+#include "dml_assert.h"
+
+#define DML2_MAX_FMT_420_BUFFER_WIDTH 4096
+#define TB_BORROWED_MAX 400
+#define DML_MAX_VSTARTUP_START 1023
+
+// ---------------------------
+// Declaration Begins
+// ---------------------------
+static void CalculateBytePerPixelAndBlockSizes(
+ enum dml_source_format_class SourcePixelFormat,
+ enum dml_swizzle_mode SurfaceTiling,
+ // Output
+ dml_uint_t *BytePerPixelY,
+ dml_uint_t *BytePerPixelC,
+ dml_float_t *BytePerPixelDETY,
+ dml_float_t *BytePerPixelDETC,
+ dml_uint_t *BlockHeight256BytesY,
+ dml_uint_t *BlockHeight256BytesC,
+ dml_uint_t *BlockWidth256BytesY,
+ dml_uint_t *BlockWidth256BytesC,
+ dml_uint_t *MacroTileHeightY,
+ dml_uint_t *MacroTileHeightC,
+ dml_uint_t *MacroTileWidthY,
+ dml_uint_t *MacroTileWidthC);
+
+static dml_float_t CalculateWriteBackDISPCLK(
+ enum dml_source_format_class WritebackPixelFormat,
+ dml_float_t PixelClock,
+ dml_float_t WritebackHRatio,
+ dml_float_t WritebackVRatio,
+ dml_uint_t WritebackHTaps,
+ dml_uint_t WritebackVTaps,
+ dml_uint_t WritebackSourceWidth,
+ dml_uint_t WritebackDestinationWidth,
+ dml_uint_t HTotal,
+ dml_uint_t WritebackLineBufferSize,
+ dml_float_t DISPCLKDPPCLKVCOSpeed);
+
+static void CalculateVMRowAndSwath(
+ struct display_mode_lib_scratch_st *s,
+ struct CalculateVMRowAndSwath_params_st *p);
+
+static void CalculateOutputLink(
+ dml_float_t PHYCLKPerState,
+ dml_float_t PHYCLKD18PerState,
+ dml_float_t PHYCLKD32PerState,
+ dml_float_t Downspreading,
+ dml_bool_t IsMainSurfaceUsingTheIndicatedTiming,
+ enum dml_output_encoder_class Output,
+ enum dml_output_format_class OutputFormat,
+ dml_uint_t HTotal,
+ dml_uint_t HActive,
+ dml_float_t PixelClockBackEnd,
+ dml_float_t ForcedOutputLinkBPP,
+ dml_uint_t DSCInputBitPerComponent,
+ dml_uint_t NumberOfDSCSlices,
+ dml_float_t AudioSampleRate,
+ dml_uint_t AudioSampleLayout,
+ enum dml_odm_mode ODMModeNoDSC,
+ enum dml_odm_mode ODMModeDSC,
+ enum dml_dsc_enable DSCEnable,
+ dml_uint_t OutputLinkDPLanes,
+ enum dml_output_link_dp_rate OutputLinkDPRate,
+
+ // Output
+ dml_bool_t *RequiresDSC,
+ dml_bool_t *RequiresFEC,
+ dml_float_t *OutBpp,
+ enum dml_output_type_and_rate__type *OutputType,
+ enum dml_output_type_and_rate__rate *OutputRate,
+ dml_uint_t *RequiredSlots);
+
+static void CalculateODMMode(
+ dml_uint_t MaximumPixelsPerLinePerDSCUnit,
+ dml_uint_t HActive,
+ enum dml_output_encoder_class Output,
+ enum dml_output_format_class OutputFormat,
+ enum dml_odm_use_policy ODMUse,
+ dml_float_t StateDispclk,
+ dml_float_t MaxDispclk,
+ dml_bool_t DSCEnable,
+ dml_uint_t TotalNumberOfActiveDPP,
+ dml_uint_t MaxNumDPP,
+ dml_float_t PixelClock,
+ dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
+ dml_float_t DISPCLKRampingMargin,
+ dml_float_t DISPCLKDPPCLKVCOSpeed,
+ dml_uint_t NumberOfDSCSlices,
+
+ // Output
+ dml_bool_t *TotalAvailablePipesSupport,
+ dml_uint_t *NumberOfDPP,
+ enum dml_odm_mode *ODMMode,
+ dml_float_t *RequiredDISPCLKPerSurface);
+
+static dml_float_t CalculateRequiredDispclk(
+ enum dml_odm_mode ODMMode,
+ dml_float_t PixelClock,
+ dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
+ dml_float_t DISPCLKRampingMargin,
+ dml_float_t DISPCLKDPPCLKVCOSpeed,
+ dml_float_t MaxDispclkSingle);
+
+static void CalculateSinglePipeDPPCLKAndSCLThroughput(
+ dml_float_t HRatio,
+ dml_float_t HRatioChroma,
+ dml_float_t VRatio,
+ dml_float_t VRatioChroma,
+ dml_float_t MaxDCHUBToPSCLThroughput,
+ dml_float_t MaxPSCLToLBThroughput,
+ dml_float_t PixelClock,
+ enum dml_source_format_class SourcePixelFormat,
+ dml_uint_t HTaps,
+ dml_uint_t HTapsChroma,
+ dml_uint_t VTaps,
+ dml_uint_t VTapsChroma,
+
+ // Output
+ dml_float_t *PSCL_THROUGHPUT,
+ dml_float_t *PSCL_THROUGHPUT_CHROMA,
+ dml_float_t *DPPCLKUsingSingleDPP);
+
+static void CalculateDPPCLK(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
+ dml_float_t DISPCLKDPPCLKVCOSpeed,
+ dml_float_t DPPCLKUsingSingleDPP[],
+ dml_uint_t DPPPerSurface[],
+
+ // Output
+ dml_float_t *GlobalDPPCLK,
+ dml_float_t Dppclk[]);
+
+static void CalculateMALLUseForStaticScreen(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_uint_t MALLAllocatedForDCNFinal,
+ enum dml_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
+ dml_uint_t SurfaceSizeInMALL[],
+ dml_bool_t one_row_per_frame_fits_in_buffer[],
+
+ // Output
+ dml_bool_t UsesMALLForStaticScreen[]);
+
+static dml_uint_t dscceComputeDelay(
+ dml_uint_t bpc,
+ dml_float_t BPP,
+ dml_uint_t sliceWidth,
+ dml_uint_t numSlices,
+ enum dml_output_format_class pixelFormat,
+ enum dml_output_encoder_class Output);
+
+static dml_uint_t dscComputeDelay(enum dml_output_format_class pixelFormat,
+ enum dml_output_encoder_class Output);
+
+static dml_bool_t CalculatePrefetchSchedule(struct display_mode_lib_scratch_st *scratch,
+ struct CalculatePrefetchSchedule_params_st *p);
+
+static dml_float_t RoundToDFSGranularity(dml_float_t Clock, dml_bool_t round_up, dml_float_t VCOSpeed);
+
+static void CalculateDCCConfiguration(
+ dml_bool_t DCCEnabled,
+ dml_bool_t DCCProgrammingAssumesScanDirectionUnknown,
+ enum dml_source_format_class SourcePixelFormat,
+ dml_uint_t SurfaceWidthLuma,
+ dml_uint_t SurfaceWidthChroma,
+ dml_uint_t SurfaceHeightLuma,
+ dml_uint_t SurfaceHeightChroma,
+ dml_uint_t nomDETInKByte,
+ dml_uint_t RequestHeight256ByteLuma,
+ dml_uint_t RequestHeight256ByteChroma,
+ enum dml_swizzle_mode TilingFormat,
+ dml_uint_t BytePerPixelY,
+ dml_uint_t BytePerPixelC,
+ dml_float_t BytePerPixelDETY,
+ dml_float_t BytePerPixelDETC,
+ enum dml_rotation_angle SourceScan,
+ // Output
+ dml_uint_t *MaxUncompressedBlockLuma,
+ dml_uint_t *MaxUncompressedBlockChroma,
+ dml_uint_t *MaxCompressedBlockLuma,
+ dml_uint_t *MaxCompressedBlockChroma,
+ dml_uint_t *IndependentBlockLuma,
+ dml_uint_t *IndependentBlockChroma);
+
+static dml_uint_t CalculatePrefetchSourceLines(
+ dml_float_t VRatio,
+ dml_uint_t VTaps,
+ dml_bool_t Interlace,
+ dml_bool_t ProgressiveToInterlaceUnitInOPP,
+ dml_uint_t SwathHeight,
+ enum dml_rotation_angle SourceScan,
+ dml_bool_t ViewportStationary,
+ dml_uint_t SwathWidth,
+ dml_uint_t ViewportHeight,
+ dml_uint_t ViewportXStart,
+ dml_uint_t ViewportYStart,
+
+ // Output
+ dml_uint_t *VInitPreFill,
+ dml_uint_t *MaxNumSwath);
+
+static dml_uint_t CalculateVMAndRowBytes(
+ dml_bool_t ViewportStationary,
+ dml_bool_t DCCEnable,
+ dml_uint_t NumberOfDPPs,
+ dml_uint_t BlockHeight256Bytes,
+ dml_uint_t BlockWidth256Bytes,
+ enum dml_source_format_class SourcePixelFormat,
+ dml_uint_t SurfaceTiling,
+ dml_uint_t BytePerPixel,
+ enum dml_rotation_angle SourceScan,
+ dml_uint_t SwathWidth,
+ dml_uint_t ViewportHeight,
+ dml_uint_t ViewportXStart,
+ dml_uint_t ViewportYStart,
+ dml_bool_t GPUVMEnable,
+ dml_uint_t GPUVMMaxPageTableLevels,
+ dml_uint_t GPUVMMinPageSizeKBytes,
+ dml_uint_t PTEBufferSizeInRequests,
+ dml_uint_t Pitch,
+ dml_uint_t DCCMetaPitch,
+ dml_uint_t MacroTileWidth,
+ dml_uint_t MacroTileHeight,
+
+ // Output
+ dml_uint_t *MetaRowByte,
+ dml_uint_t *PixelPTEBytesPerRow,
+ dml_uint_t *PixelPTEBytesPerRowStorage, // for PTE buffer size check
+ dml_uint_t *dpte_row_width_ub,
+ dml_uint_t *dpte_row_height,
+ dml_uint_t *dpte_row_height_linear,
+ dml_uint_t *PixelPTEBytesPerRow_one_row_per_frame,
+ dml_uint_t *dpte_row_width_ub_one_row_per_frame,
+ dml_uint_t *dpte_row_height_one_row_per_frame,
+ dml_uint_t *MetaRequestWidth,
+ dml_uint_t *MetaRequestHeight,
+ dml_uint_t *meta_row_width,
+ dml_uint_t *meta_row_height,
+ dml_uint_t *PixelPTEReqWidth,
+ dml_uint_t *PixelPTEReqHeight,
+ dml_uint_t *PTERequestSize,
+ dml_uint_t *DPDE0BytesFrame,
+ dml_uint_t *MetaPTEBytesFrame);
+
+static dml_float_t CalculateTWait(
+ dml_uint_t PrefetchMode,
+ enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,
+ dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ dml_bool_t DRRDisplay,
+ dml_float_t DRAMClockChangeLatency,
+ dml_float_t FCLKChangeLatency,
+ dml_float_t UrgentLatency,
+ dml_float_t SREnterPlusExitTime);
+
+static void CalculatePrefetchMode(
+ enum dml_prefetch_modes AllowForPStateChangeOrStutterInVBlank,
+ dml_uint_t *MinPrefetchMode,
+ dml_uint_t *MaxPrefetchMode);
+
+static void CalculateRowBandwidth(
+ dml_bool_t GPUVMEnable,
+ enum dml_source_format_class SourcePixelFormat,
+ dml_float_t VRatio,
+ dml_float_t VRatioChroma,
+ dml_bool_t DCCEnable,
+ dml_float_t LineTime,
+ dml_uint_t MetaRowByteLuma,
+ dml_uint_t MetaRowByteChroma,
+ dml_uint_t meta_row_height_luma,
+ dml_uint_t meta_row_height_chroma,
+ dml_uint_t PixelPTEBytesPerRowLuma,
+ dml_uint_t PixelPTEBytesPerRowChroma,
+ dml_uint_t dpte_row_height_luma,
+ dml_uint_t dpte_row_height_chroma,
+ // Output
+ dml_float_t *meta_row_bw,
+ dml_float_t *dpte_row_bw);
+
+static void CalculateFlipSchedule(
+ dml_float_t HostVMInefficiencyFactor,
+ dml_float_t UrgentExtraLatency,
+ dml_float_t UrgentLatency,
+ dml_uint_t GPUVMMaxPageTableLevels,
+ dml_bool_t HostVMEnable,
+ dml_uint_t HostVMMaxNonCachedPageTableLevels,
+ dml_bool_t GPUVMEnable,
+ dml_uint_t HostVMMinPageSize,
+ dml_float_t PDEAndMetaPTEBytesPerFrame,
+ dml_float_t MetaRowBytes,
+ dml_float_t DPTEBytesPerRow,
+ dml_float_t BandwidthAvailableForImmediateFlip,
+ dml_uint_t TotImmediateFlipBytes,
+ enum dml_source_format_class SourcePixelFormat,
+ dml_float_t LineTime,
+ dml_float_t VRatio,
+ dml_float_t VRatioChroma,
+ dml_float_t Tno_bw,
+ dml_bool_t DCCEnable,
+ dml_uint_t dpte_row_height,
+ dml_uint_t meta_row_height,
+ dml_uint_t dpte_row_height_chroma,
+ dml_uint_t meta_row_height_chroma,
+ dml_bool_t use_one_row_for_frame_flip,
+
+ // Output
+ dml_float_t *DestinationLinesToRequestVMInImmediateFlip,
+ dml_float_t *DestinationLinesToRequestRowInImmediateFlip,
+ dml_float_t *final_flip_bw,
+ dml_bool_t *ImmediateFlipSupportedForPipe);
+
+static dml_float_t CalculateWriteBackDelay(
+ enum dml_source_format_class WritebackPixelFormat,
+ dml_float_t WritebackHRatio,
+ dml_float_t WritebackVRatio,
+ dml_uint_t WritebackVTaps,
+ dml_uint_t WritebackDestinationWidth,
+ dml_uint_t WritebackDestinationHeight,
+ dml_uint_t WritebackSourceHeight,
+ dml_uint_t HTotal);
+
+static void CalculateVUpdateAndDynamicMetadataParameters(
+ dml_uint_t MaxInterDCNTileRepeaters,
+ dml_float_t Dppclk,
+ dml_float_t DISPCLK,
+ dml_float_t DCFClkDeepSleep,
+ dml_float_t PixelClock,
+ dml_uint_t HTotal,
+ dml_uint_t VBlank,
+ dml_uint_t DynamicMetadataTransmittedBytes,
+ dml_uint_t DynamicMetadataLinesBeforeActiveRequired,
+ dml_uint_t InterlaceEnable,
+ dml_bool_t ProgressiveToInterlaceUnitInOPP,
+ dml_float_t *TSetup,
+ dml_float_t *Tdmbf,
+ dml_float_t *Tdmec,
+ dml_float_t *Tdmsks,
+ dml_uint_t *VUpdateOffsetPix,
+ dml_uint_t *VUpdateWidthPix,
+ dml_uint_t *VReadyOffsetPix);
+
+static void PixelClockAdjustmentForProgressiveToInterlaceUnit(struct dml_display_cfg_st *display_cfg, dml_bool_t ptoi_supported);
+
+static dml_float_t TruncToValidBPP(
+ dml_float_t LinkBitRate,
+ dml_uint_t Lanes,
+ dml_uint_t HTotal,
+ dml_uint_t HActive,
+ dml_float_t PixelClock,
+ dml_float_t DesiredBPP,
+ dml_bool_t DSCEnable,
+ enum dml_output_encoder_class Output,
+ enum dml_output_format_class Format,
+ dml_uint_t DSCInputBitPerComponent,
+ dml_uint_t DSCSlices,
+ dml_uint_t AudioRate,
+ dml_uint_t AudioLayout,
+ enum dml_odm_mode ODMModeNoDSC,
+ enum dml_odm_mode ODMModeDSC,
+ // Output
+ dml_uint_t *RequiredSlotsSingle);
+
+static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
+ struct display_mode_lib_scratch_st *s,
+ struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *p);
+
+static void CalculateDCFCLKDeepSleep(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_uint_t BytePerPixelY[],
+ dml_uint_t BytePerPixelC[],
+ dml_float_t VRatio[],
+ dml_float_t VRatioChroma[],
+ dml_uint_t SwathWidthY[],
+ dml_uint_t SwathWidthC[],
+ dml_uint_t DPPPerSurface[],
+ dml_float_t HRatio[],
+ dml_float_t HRatioChroma[],
+ dml_float_t PixelClock[],
+ dml_float_t PSCL_THROUGHPUT[],
+ dml_float_t PSCL_THROUGHPUT_CHROMA[],
+ dml_float_t Dppclk[],
+ dml_float_t ReadBandwidthLuma[],
+ dml_float_t ReadBandwidthChroma[],
+ dml_uint_t ReturnBusWidth,
+
+ // Output
+ dml_float_t *DCFCLKDeepSleep);
+
+static void CalculateUrgentBurstFactor(
+ enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,
+ dml_uint_t swath_width_luma_ub,
+ dml_uint_t swath_width_chroma_ub,
+ dml_uint_t SwathHeightY,
+ dml_uint_t SwathHeightC,
+ dml_float_t LineTime,
+ dml_float_t UrgentLatency,
+ dml_float_t CursorBufferSize,
+ dml_uint_t CursorWidth,
+ dml_uint_t CursorBPP,
+ dml_float_t VRatio,
+ dml_float_t VRatioC,
+ dml_float_t BytePerPixelInDETY,
+ dml_float_t BytePerPixelInDETC,
+ dml_uint_t DETBufferSizeY,
+ dml_uint_t DETBufferSizeC,
+ // Output
+ dml_float_t *UrgentBurstFactorCursor,
+ dml_float_t *UrgentBurstFactorLuma,
+ dml_float_t *UrgentBurstFactorChroma,
+ dml_bool_t *NotEnoughUrgentLatencyHiding);
+
+static dml_float_t RequiredDTBCLK(
+ dml_bool_t DSCEnable,
+ dml_float_t PixelClock,
+ enum dml_output_format_class OutputFormat,
+ dml_float_t OutputBpp,
+ dml_uint_t DSCSlices,
+ dml_uint_t HTotal,
+ dml_uint_t HActive,
+ dml_uint_t AudioRate,
+ dml_uint_t AudioLayoutSingle);
+
+static void UseMinimumDCFCLK(
+ struct display_mode_lib_scratch_st *scratch,
+ struct UseMinimumDCFCLK_params_st *p);
+
+static void CalculatePixelDeliveryTimes(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_float_t VRatio[],
+ dml_float_t VRatioChroma[],
+ dml_float_t VRatioPrefetchY[],
+ dml_float_t VRatioPrefetchC[],
+ dml_uint_t swath_width_luma_ub[],
+ dml_uint_t swath_width_chroma_ub[],
+ dml_uint_t DPPPerSurface[],
+ dml_float_t HRatio[],
+ dml_float_t HRatioChroma[],
+ dml_float_t PixelClock[],
+ dml_float_t PSCL_THROUGHPUT[],
+ dml_float_t PSCL_THROUGHPUT_CHROMA[],
+ dml_float_t Dppclk[],
+ dml_uint_t BytePerPixelC[],
+ enum dml_rotation_angle SourceScan[],
+ dml_uint_t NumberOfCursors[],
+ dml_uint_t CursorWidth[],
+ dml_uint_t CursorBPP[],
+ dml_uint_t BlockWidth256BytesY[],
+ dml_uint_t BlockHeight256BytesY[],
+ dml_uint_t BlockWidth256BytesC[],
+ dml_uint_t BlockHeight256BytesC[],
+
+ // Output
+ dml_float_t DisplayPipeLineDeliveryTimeLuma[],
+ dml_float_t DisplayPipeLineDeliveryTimeChroma[],
+ dml_float_t DisplayPipeLineDeliveryTimeLumaPrefetch[],
+ dml_float_t DisplayPipeLineDeliveryTimeChromaPrefetch[],
+ dml_float_t DisplayPipeRequestDeliveryTimeLuma[],
+ dml_float_t DisplayPipeRequestDeliveryTimeChroma[],
+ dml_float_t DisplayPipeRequestDeliveryTimeLumaPrefetch[],
+ dml_float_t DisplayPipeRequestDeliveryTimeChromaPrefetch[],
+ dml_float_t CursorRequestDeliveryTime[],
+ dml_float_t CursorRequestDeliveryTimePrefetch[]);
+
+static void CalculateMetaAndPTETimes(
+ dml_bool_t use_one_row_for_frame[],
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_bool_t GPUVMEnable,
+ dml_uint_t MetaChunkSize,
+ dml_uint_t MinMetaChunkSizeBytes,
+ dml_uint_t HTotal[],
+ dml_float_t VRatio[],
+ dml_float_t VRatioChroma[],
+ dml_float_t DestinationLinesToRequestRowInVBlank[],
+ dml_float_t DestinationLinesToRequestRowInImmediateFlip[],
+ dml_bool_t DCCEnable[],
+ dml_float_t PixelClock[],
+ dml_uint_t BytePerPixelY[],
+ dml_uint_t BytePerPixelC[],
+ enum dml_rotation_angle SourceScan[],
+ dml_uint_t dpte_row_height[],
+ dml_uint_t dpte_row_height_chroma[],
+ dml_uint_t meta_row_width[],
+ dml_uint_t meta_row_width_chroma[],
+ dml_uint_t meta_row_height[],
+ dml_uint_t meta_row_height_chroma[],
+ dml_uint_t meta_req_width[],
+ dml_uint_t meta_req_width_chroma[],
+ dml_uint_t meta_req_height[],
+ dml_uint_t meta_req_height_chroma[],
+ dml_uint_t dpte_group_bytes[],
+ dml_uint_t PTERequestSizeY[],
+ dml_uint_t PTERequestSizeC[],
+ dml_uint_t PixelPTEReqWidthY[],
+ dml_uint_t PixelPTEReqHeightY[],
+ dml_uint_t PixelPTEReqWidthC[],
+ dml_uint_t PixelPTEReqHeightC[],
+ dml_uint_t dpte_row_width_luma_ub[],
+ dml_uint_t dpte_row_width_chroma_ub[],
+
+ // Output
+ dml_float_t DST_Y_PER_PTE_ROW_NOM_L[],
+ dml_float_t DST_Y_PER_PTE_ROW_NOM_C[],
+ dml_float_t DST_Y_PER_META_ROW_NOM_L[],
+ dml_float_t DST_Y_PER_META_ROW_NOM_C[],
+ dml_float_t TimePerMetaChunkNominal[],
+ dml_float_t TimePerChromaMetaChunkNominal[],
+ dml_float_t TimePerMetaChunkVBlank[],
+ dml_float_t TimePerChromaMetaChunkVBlank[],
+ dml_float_t TimePerMetaChunkFlip[],
+ dml_float_t TimePerChromaMetaChunkFlip[],
+ dml_float_t time_per_pte_group_nom_luma[],
+ dml_float_t time_per_pte_group_vblank_luma[],
+ dml_float_t time_per_pte_group_flip_luma[],
+ dml_float_t time_per_pte_group_nom_chroma[],
+ dml_float_t time_per_pte_group_vblank_chroma[],
+ dml_float_t time_per_pte_group_flip_chroma[]);
+
+static void CalculateVMGroupAndRequestTimes(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_bool_t GPUVMEnable,
+ dml_uint_t GPUVMMaxPageTableLevels,
+ dml_uint_t HTotal[],
+ dml_uint_t BytePerPixelC[],
+ dml_float_t DestinationLinesToRequestVMInVBlank[],
+ dml_float_t DestinationLinesToRequestVMInImmediateFlip[],
+ dml_bool_t DCCEnable[],
+ dml_float_t PixelClock[],
+ dml_uint_t dpte_row_width_luma_ub[],
+ dml_uint_t dpte_row_width_chroma_ub[],
+ dml_uint_t vm_group_bytes[],
+ dml_uint_t dpde0_bytes_per_frame_ub_l[],
+ dml_uint_t dpde0_bytes_per_frame_ub_c[],
+ dml_uint_t meta_pte_bytes_per_frame_ub_l[],
+ dml_uint_t meta_pte_bytes_per_frame_ub_c[],
+
+ // Output
+ dml_float_t TimePerVMGroupVBlank[],
+ dml_float_t TimePerVMGroupFlip[],
+ dml_float_t TimePerVMRequestVBlank[],
+ dml_float_t TimePerVMRequestFlip[]);
+
+static void CalculateStutterEfficiency(
+ struct display_mode_lib_scratch_st *scratch,
+ struct CalculateStutterEfficiency_params_st *p);
+
+static void CalculateSwathAndDETConfiguration(
+ struct display_mode_lib_scratch_st *scratch,
+ struct CalculateSwathAndDETConfiguration_params_st *p);
+
+static void CalculateSwathWidth(
+ dml_bool_t ForceSingleDPP,
+ dml_uint_t NumberOfActiveSurfaces,
+ enum dml_source_format_class SourcePixelFormat[],
+ enum dml_rotation_angle SourceScan[],
+ dml_bool_t ViewportStationary[],
+ dml_uint_t ViewportWidth[],
+ dml_uint_t ViewportHeight[],
+ dml_uint_t ViewportXStart[],
+ dml_uint_t ViewportYStart[],
+ dml_uint_t ViewportXStartC[],
+ dml_uint_t ViewportYStartC[],
+ dml_uint_t SurfaceWidthY[],
+ dml_uint_t SurfaceWidthC[],
+ dml_uint_t SurfaceHeightY[],
+ dml_uint_t SurfaceHeightC[],
+ enum dml_odm_mode ODMMode[],
+ dml_uint_t BytePerPixY[],
+ dml_uint_t BytePerPixC[],
+ dml_uint_t Read256BytesBlockHeightY[],
+ dml_uint_t Read256BytesBlockHeightC[],
+ dml_uint_t Read256BytesBlockWidthY[],
+ dml_uint_t Read256BytesBlockWidthC[],
+ dml_uint_t BlendingAndTiming[],
+ dml_uint_t HActive[],
+ dml_float_t HRatio[],
+ dml_uint_t DPPPerSurface[],
+
+ // Output
+ dml_uint_t SwathWidthSingleDPPY[],
+ dml_uint_t SwathWidthSingleDPPC[],
+ dml_uint_t SwathWidthY[],
+ dml_uint_t SwathWidthC[],
+ dml_uint_t MaximumSwathHeightY[],
+ dml_uint_t MaximumSwathHeightC[],
+ dml_uint_t swath_width_luma_ub[],
+ dml_uint_t swath_width_chroma_ub[]);
+
+static dml_float_t CalculateExtraLatency(
+ dml_uint_t RoundTripPingLatencyCycles,
+ dml_uint_t ReorderingBytes,
+ dml_float_t DCFCLK,
+ dml_uint_t TotalNumberOfActiveDPP,
+ dml_uint_t PixelChunkSizeInKByte,
+ dml_uint_t TotalNumberOfDCCActiveDPP,
+ dml_uint_t MetaChunkSize,
+ dml_float_t ReturnBW,
+ dml_bool_t GPUVMEnable,
+ dml_bool_t HostVMEnable,
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_uint_t NumberOfDPP[],
+ dml_uint_t dpte_group_bytes[],
+ dml_float_t HostVMInefficiencyFactor,
+ dml_uint_t HostVMMinPageSize,
+ dml_uint_t HostVMMaxNonCachedPageTableLevels);
+
+static dml_uint_t CalculateExtraLatencyBytes(
+ dml_uint_t ReorderingBytes,
+ dml_uint_t TotalNumberOfActiveDPP,
+ dml_uint_t PixelChunkSizeInKByte,
+ dml_uint_t TotalNumberOfDCCActiveDPP,
+ dml_uint_t MetaChunkSize,
+ dml_bool_t GPUVMEnable,
+ dml_bool_t HostVMEnable,
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_uint_t NumberOfDPP[],
+ dml_uint_t dpte_group_bytes[],
+ dml_float_t HostVMInefficiencyFactor,
+ dml_uint_t HostVMMinPageSize,
+ dml_uint_t HostVMMaxNonCachedPageTableLevels);
+
+static dml_float_t CalculateUrgentLatency(
+ dml_float_t UrgentLatencyPixelDataOnly,
+ dml_float_t UrgentLatencyPixelMixedWithVMData,
+ dml_float_t UrgentLatencyVMDataOnly,
+ dml_bool_t DoUrgentLatencyAdjustment,
+ dml_float_t UrgentLatencyAdjustmentFabricClockComponent,
+ dml_float_t UrgentLatencyAdjustmentFabricClockReference,
+ dml_float_t FabricClockSingle);
+
+static dml_bool_t UnboundedRequest(
+ enum dml_unbounded_requesting_policy UseUnboundedRequestingFinal,
+ dml_uint_t TotalNumberOfActiveDPP,
+ dml_bool_t NoChromaOrLinear,
+ enum dml_output_encoder_class Output);
+
+static void CalculateSurfaceSizeInMall(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_uint_t MALLAllocatedForDCN,
+ enum dml_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
+ dml_bool_t DCCEnable[],
+ dml_bool_t ViewportStationary[],
+ dml_uint_t ViewportXStartY[],
+ dml_uint_t ViewportYStartY[],
+ dml_uint_t ViewportXStartC[],
+ dml_uint_t ViewportYStartC[],
+ dml_uint_t ViewportWidthY[],
+ dml_uint_t ViewportHeightY[],
+ dml_uint_t BytesPerPixelY[],
+ dml_uint_t ViewportWidthC[],
+ dml_uint_t ViewportHeightC[],
+ dml_uint_t BytesPerPixelC[],
+ dml_uint_t SurfaceWidthY[],
+ dml_uint_t SurfaceWidthC[],
+ dml_uint_t SurfaceHeightY[],
+ dml_uint_t SurfaceHeightC[],
+ dml_uint_t Read256BytesBlockWidthY[],
+ dml_uint_t Read256BytesBlockWidthC[],
+ dml_uint_t Read256BytesBlockHeightY[],
+ dml_uint_t Read256BytesBlockHeightC[],
+ dml_uint_t ReadBlockWidthY[],
+ dml_uint_t ReadBlockWidthC[],
+ dml_uint_t ReadBlockHeightY[],
+ dml_uint_t ReadBlockHeightC[],
+
+ // Output
+ dml_uint_t SurfaceSizeInMALL[],
+ dml_bool_t *ExceededMALLSize);
+
+static void CalculateDETBufferSize(
+ dml_uint_t DETSizeOverride[],
+ enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ dml_bool_t ForceSingleDPP,
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_bool_t UnboundedRequestEnabled,
+ dml_uint_t nomDETInKByte,
+ dml_uint_t MaxTotalDETInKByte,
+ dml_uint_t ConfigReturnBufferSizeInKByte,
+ dml_uint_t MinCompressedBufferSizeInKByte,
+ dml_uint_t ConfigReturnBufferSegmentSizeInkByte,
+ dml_uint_t CompressedBufferSegmentSizeInkByteFinal,
+ enum dml_source_format_class SourcePixelFormat[],
+ dml_float_t ReadBandwidthLuma[],
+ dml_float_t ReadBandwidthChroma[],
+ dml_uint_t RotesY[],
+ dml_uint_t RoundedUpMaxSwathSizeBytesC[],
+ dml_uint_t DPPPerSurface[],
+ // Output
+ dml_uint_t DETBufferSizeInKByte[],
+ dml_uint_t *CompressedBufferSizeInkByte);
+
+static void CalculateMaxDETAndMinCompressedBufferSize(
+ dml_uint_t ConfigReturnBufferSizeInKByte,
+ dml_uint_t ConfigReturnBufferSegmentSizeInKByte,
+ dml_uint_t ROBBufferSizeInKByte,
+ dml_uint_t MaxNumDPP,
+ dml_bool_t nomDETInKByteOverrideEnable,
+ dml_uint_t nomDETInKByteOverrideValue,
+
+ // Output
+ dml_uint_t *MaxTotalDETInKByte,
+ dml_uint_t *nomDETInKByte,
+ dml_uint_t *MinCompressedBufferSizeInKByte);
+
+static dml_uint_t DSCDelayRequirement(
+ dml_bool_t DSCEnabled,
+ enum dml_odm_mode ODMMode,
+ dml_uint_t DSCInputBitPerComponent,
+ dml_float_t OutputBpp,
+ dml_uint_t HActive,
+ dml_uint_t HTotal,
+ dml_uint_t NumberOfDSCSlices,
+ enum dml_output_format_class OutputFormat,
+ enum dml_output_encoder_class Output,
+ dml_float_t PixelClock,
+ dml_float_t PixelClockBackEnd);
+
+static dml_bool_t CalculateVActiveBandwithSupport(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_float_t ReturnBW,
+ dml_bool_t NotUrgentLatencyHiding[],
+ dml_float_t ReadBandwidthLuma[],
+ dml_float_t ReadBandwidthChroma[],
+ dml_float_t cursor_bw[],
+ dml_float_t meta_row_bandwidth[],
+ dml_float_t dpte_row_bandwidth[],
+ dml_uint_t NumberOfDPP[],
+ dml_float_t UrgentBurstFactorLuma[],
+ dml_float_t UrgentBurstFactorChroma[],
+ dml_float_t UrgentBurstFactorCursor[]);
+
+static void CalculatePrefetchBandwithSupport(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_float_t ReturnBW,
+ enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ dml_bool_t NotUrgentLatencyHiding[],
+ dml_float_t ReadBandwidthLuma[],
+ dml_float_t ReadBandwidthChroma[],
+ dml_float_t PrefetchBandwidthLuma[],
+ dml_float_t PrefetchBandwidthChroma[],
+ dml_float_t cursor_bw[],
+ dml_float_t meta_row_bandwidth[],
+ dml_float_t dpte_row_bandwidth[],
+ dml_float_t cursor_bw_pre[],
+ dml_float_t prefetch_vmrow_bw[],
+ dml_uint_t NumberOfDPP[],
+ dml_float_t UrgentBurstFactorLuma[],
+ dml_float_t UrgentBurstFactorChroma[],
+ dml_float_t UrgentBurstFactorCursor[],
+ dml_float_t UrgentBurstFactorLumaPre[],
+ dml_float_t UrgentBurstFactorChromaPre[],
+ dml_float_t UrgentBurstFactorCursorPre[],
+
+ // Output
+ dml_float_t *PrefetchBandwidth,
+ dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch,
+ dml_float_t *FractionOfUrgentBandwidth,
+ dml_bool_t *PrefetchBandwidthSupport);
+
+static dml_float_t CalculateBandwidthAvailableForImmediateFlip(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_float_t ReturnBW,
+ dml_float_t ReadBandwidthLuma[],
+ dml_float_t ReadBandwidthChroma[],
+ dml_float_t PrefetchBandwidthLuma[],
+ dml_float_t PrefetchBandwidthChroma[],
+ dml_float_t cursor_bw[],
+ dml_float_t cursor_bw_pre[],
+ dml_uint_t NumberOfDPP[],
+ dml_float_t UrgentBurstFactorLuma[],
+ dml_float_t UrgentBurstFactorChroma[],
+ dml_float_t UrgentBurstFactorCursor[],
+ dml_float_t UrgentBurstFactorLumaPre[],
+ dml_float_t UrgentBurstFactorChromaPre[],
+ dml_float_t UrgentBurstFactorCursorPre[]);
+
+static void CalculateImmediateFlipBandwithSupport(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_float_t ReturnBW,
+ enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ enum dml_immediate_flip_requirement ImmediateFlipRequirement[],
+ dml_float_t final_flip_bw[],
+ dml_float_t ReadBandwidthLuma[],
+ dml_float_t ReadBandwidthChroma[],
+ dml_float_t PrefetchBandwidthLuma[],
+ dml_float_t PrefetchBandwidthChroma[],
+ dml_float_t cursor_bw[],
+ dml_float_t meta_row_bandwidth[],
+ dml_float_t dpte_row_bandwidth[],
+ dml_float_t cursor_bw_pre[],
+ dml_float_t prefetch_vmrow_bw[],
+ dml_uint_t NumberOfDPP[],
+ dml_float_t UrgentBurstFactorLuma[],
+ dml_float_t UrgentBurstFactorChroma[],
+ dml_float_t UrgentBurstFactorCursor[],
+ dml_float_t UrgentBurstFactorLumaPre[],
+ dml_float_t UrgentBurstFactorChromaPre[],
+ dml_float_t UrgentBurstFactorCursorPre[],
+
+ // Output
+ dml_float_t *TotalBandwidth,
+ dml_float_t *TotalBandwidthNotIncludingMALLPrefetch,
+ dml_float_t *FractionOfUrgentBandwidth,
+ dml_bool_t *ImmediateFlipBandwidthSupport);
+
+// ---------------------------
+// Declaration Ends
+// ---------------------------
+
+static dml_uint_t dscceComputeDelay(
+ dml_uint_t bpc,
+ dml_float_t BPP,
+ dml_uint_t sliceWidth,
+ dml_uint_t numSlices,
+ enum dml_output_format_class pixelFormat,
+ enum dml_output_encoder_class Output)
+{
+ // valid bpc = source bits per component in the set of {8, 10, 12}
+ // valid bpp = increments of 1/16 of a bit
+ // min = 6/7/8 in N420/N422/444, respectively
+ // max = such that compression is 1:1
+ //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
+ //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
+ //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
+
+ // fixed value
+ dml_uint_t rcModelSize = 8192;
+
+ // N422/N420 operate at 2 pixels per clock
+ dml_uint_t pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
+ Delay, pixels;
+
+ if (pixelFormat == dml_420)
+ pixelsPerClock = 2;
+ // #all other modes operate at 1 pixel per clock
+ else if (pixelFormat == dml_444)
+ pixelsPerClock = 1;
+ else if (pixelFormat == dml_n422)
+ pixelsPerClock = 2;
+ else
+ pixelsPerClock = 1;
+
+ //initial transmit delay as per PPS
+ initalXmitDelay = (dml_uint_t)(dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock, 1));
+
+ //compute ssm delay
+ if (bpc == 8)
+ D = 81;
+ else if (bpc == 10)
+ D = 89;
+ else
+ D = 113;
+
+ //divide by pixel per cycle to compute slice width as seen by DSC
+ w = sliceWidth / pixelsPerClock;
+
+ //422 mode has an additional cycle of delay
+ if (pixelFormat == dml_420 || pixelFormat == dml_444 || pixelFormat == dml_n422)
+ s = 0;
+ else
+ s = 1;
+
+ //main calculation for the dscce
+ ix = initalXmitDelay + 45;
+ wx = (w + 2) / 3;
+ p = 3 * wx - w;
+ l0 = ix / w;
+ a = ix + p * l0;
+ ax = (a + 2) / 3 + D + 6 + 1;
+ L = (ax + wx - 1) / wx;
+ if ((ix % w) == 0 && p != 0)
+ lstall = 1;
+ else
+ lstall = 0;
+ Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
+
+ //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
+ pixels = Delay * 3 * pixelsPerClock;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: bpc: %u\n", __func__, bpc);
+ dml_print("DML::%s: BPP: %f\n", __func__, BPP);
+ dml_print("DML::%s: sliceWidth: %u\n", __func__, sliceWidth);
+ dml_print("DML::%s: numSlices: %u\n", __func__, numSlices);
+ dml_print("DML::%s: pixelFormat: %u\n", __func__, pixelFormat);
+ dml_print("DML::%s: Output: %u\n", __func__, Output);
+ dml_print("DML::%s: pixels: %u\n", __func__, pixels);
+#endif
+ return pixels;
+}
+
+static dml_uint_t dscComputeDelay(enum dml_output_format_class pixelFormat, enum dml_output_encoder_class Output)
+{
+ dml_uint_t Delay = 0;
+
+ if (pixelFormat == dml_420) {
+ // sfr
+ Delay = Delay + 2;
+ // dsccif
+ Delay = Delay + 0;
+ // dscc - input deserializer
+ Delay = Delay + 3;
+ // dscc gets pixels every other cycle
+ Delay = Delay + 2;
+ // dscc - input cdc fifo
+ Delay = Delay + 12;
+ // dscc gets pixels every other cycle
+ Delay = Delay + 13;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output cdc fifo
+ Delay = Delay + 7;
+ // dscc gets pixels every other cycle
+ Delay = Delay + 3;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output serializer
+ Delay = Delay + 1;
+ // sft
+ Delay = Delay + 1;
+ } else if (pixelFormat == dml_n422) {
+ // sfr
+ Delay = Delay + 2;
+ // dsccif
+ Delay = Delay + 1;
+ // dscc - input deserializer
+ Delay = Delay + 5;
+ // dscc - input cdc fifo
+ Delay = Delay + 25;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output cdc fifo
+ Delay = Delay + 10;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output serializer
+ Delay = Delay + 1;
+ // sft
+ Delay = Delay + 1;
+ } else {
+ // sfr
+ Delay = Delay + 2;
+ // dsccif
+ Delay = Delay + 0;
+ // dscc - input deserializer
+ Delay = Delay + 3;
+ // dscc - input cdc fifo
+ Delay = Delay + 12;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output cdc fifo
+ Delay = Delay + 7;
+ // dscc - output serializer
+ Delay = Delay + 1;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // sft
+ Delay = Delay + 1;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: pixelFormat = %u\n", __func__, pixelFormat);
+ dml_print("DML::%s: Delay = %u\n", __func__, Delay);
+#endif
+
+ return Delay;
+}
+
+static dml_bool_t CalculatePrefetchSchedule(struct display_mode_lib_scratch_st *scratch,
+ struct CalculatePrefetchSchedule_params_st *p)
+{
+ struct CalculatePrefetchSchedule_locals_st *s = &scratch->CalculatePrefetchSchedule_locals;
+
+ s->MyError = false;
+ s->DPPCycles = 0;
+ s->DISPCLKCycles = 0;
+ s->DSTTotalPixelsAfterScaler = 0.0;
+ s->LineTime = 0.0;
+ s->dst_y_prefetch_equ = 0.0;
+ s->prefetch_bw_oto = 0.0;
+ s->Tvm_oto = 0.0;
+ s->Tr0_oto = 0.0;
+ s->Tvm_oto_lines = 0.0;
+ s->Tr0_oto_lines = 0.0;
+ s->dst_y_prefetch_oto = 0.0;
+ s->TimeForFetchingMetaPTE = 0.0;
+ s->TimeForFetchingRowInVBlank = 0.0;
+ s->LinesToRequestPrefetchPixelData = 0.0;
+ s->HostVMDynamicLevelsTrips = 0;
+ s->trip_to_mem = 0.0;
+ s->Tvm_trips = 0.0;
+ s->Tr0_trips = 0.0;
+ s->Tvm_trips_rounded = 0.0;
+ s->Tr0_trips_rounded = 0.0;
+ s->max_Tsw = 0.0;
+ s->Lsw_oto = 0.0;
+ s->Tpre_rounded = 0.0;
+ s->prefetch_bw_equ = 0.0;
+ s->Tvm_equ = 0.0;
+ s->Tr0_equ = 0.0;
+ s->Tdmbf = 0.0;
+ s->Tdmec = 0.0;
+ s->Tdmsks = 0.0;
+ s->prefetch_sw_bytes = 0.0;
+ s->prefetch_bw_pr = 0.0;
+ s->bytes_pp = 0.0;
+ s->dep_bytes = 0.0;
+ s->min_Lsw_oto = 0.0;
+ s->Tsw_est1 = 0.0;
+ s->Tsw_est3 = 0.0;
+
+ if (p->GPUVMEnable == true && p->HostVMEnable == true) {
+ s->HostVMDynamicLevelsTrips = p->HostVMMaxNonCachedPageTableLevels;
+ } else {
+ s->HostVMDynamicLevelsTrips = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable);
+ dml_print("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->GPUVMPageTableLevels);
+ dml_print("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable);
+ dml_print("DML::%s: VStartup = %u\n", __func__, p->VStartup);
+ dml_print("DML::%s: MaxVStartup = %u\n", __func__, p->MaxVStartup);
+ dml_print("DML::%s: HostVMEnable = %u\n", __func__, p->HostVMEnable);
+ dml_print("DML::%s: HostVMInefficiencyFactor= %f\n", __func__, p->HostVMInefficiencyFactor);
+ dml_print("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk);
+#endif
+ CalculateVUpdateAndDynamicMetadataParameters(
+ p->MaxInterDCNTileRepeaters,
+ p->myPipe->Dppclk,
+ p->myPipe->Dispclk,
+ p->myPipe->DCFClkDeepSleep,
+ p->myPipe->PixelClock,
+ p->myPipe->HTotal,
+ p->myPipe->VBlank,
+ p->DynamicMetadataTransmittedBytes,
+ p->DynamicMetadataLinesBeforeActiveRequired,
+ p->myPipe->InterlaceEnable,
+ p->myPipe->ProgressiveToInterlaceUnitInOPP,
+ p->TSetup,
+
+ // Output
+ &s->Tdmbf,
+ &s->Tdmec,
+ &s->Tdmsks,
+ p->VUpdateOffsetPix,
+ p->VUpdateWidthPix,
+ p->VReadyOffsetPix);
+
+ s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock;
+ s->trip_to_mem = p->UrgentLatency;
+ s->Tvm_trips = p->UrgentExtraLatency + s->trip_to_mem * (p->GPUVMPageTableLevels * (s->HostVMDynamicLevelsTrips + 1) - 1);
+
+ if (p->DynamicMetadataVMEnabled == true) {
+ *p->Tdmdl = p->TWait + s->Tvm_trips + s->trip_to_mem;
+ } else {
+ *p->Tdmdl = p->TWait + p->UrgentExtraLatency;
+ }
+
+#ifdef __DML_VBA_ALLOW_DELTA__
+ if (DynamicMetadataEnable == false) {
+ *Tdmdl = 0.0;
+ }
+#endif
+
+ if (p->DynamicMetadataEnable == true) {
+ if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) {
+ *p->NotEnoughTimeForDynamicMetadata = true;
+ dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
+ dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
+ dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
+ dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
+ dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
+ } else {
+ *p->NotEnoughTimeForDynamicMetadata = false;
+ }
+ } else {
+ *p->NotEnoughTimeForDynamicMetadata = false;
+ }
+
+ *p->Tdmdl_vm = (p->DynamicMetadataEnable == true && p->DynamicMetadataVMEnabled == true && p->GPUVMEnable == true ? p->TWait + s->Tvm_trips : 0);
+
+ if (p->myPipe->ScalerEnabled)
+ s->DPPCycles = (dml_uint_t)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCL);
+ else
+ s->DPPCycles = (dml_uint_t)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCLLBOnly);
+
+ s->DPPCycles = (dml_uint_t)(s->DPPCycles + p->myPipe->NumberOfCursors * p->DPPCLKDelayCNVCCursor);
+
+ s->DISPCLKCycles = (dml_uint_t)p->DISPCLKDelaySubtotal;
+
+ if (p->myPipe->Dppclk == 0.0 || p->myPipe->Dispclk == 0.0)
+ return true;
+
+ *p->DSTXAfterScaler = (dml_uint_t) dml_round(s->DPPCycles * p->myPipe->PixelClock / p->myPipe->Dppclk + s->DISPCLKCycles * p->myPipe->PixelClock / p->myPipe->Dispclk + p->DSCDelay, 1.0);
+ *p->DSTXAfterScaler = (dml_uint_t) dml_round(*p->DSTXAfterScaler + (p->myPipe->ODMMode != dml_odm_mode_bypass ? 18 : 0) + (p->myPipe->DPPPerSurface - 1) * p->DPP_RECOUT_WIDTH +
+ ((p->myPipe->ODMMode == dml_odm_mode_split_1to2 || p->myPipe->ODMMode == dml_odm_mode_mso_1to2) ? (dml_float_t)p->myPipe->HActive / 2.0 : 0) +
+ ((p->myPipe->ODMMode == dml_odm_mode_mso_1to4) ? (dml_float_t)p->myPipe->HActive * 3.0 / 4.0 : 0), 1.0);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles);
+ dml_print("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock);
+ dml_print("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk);
+ dml_print("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles);
+ dml_print("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk);
+ dml_print("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay);
+ dml_print("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode);
+ dml_print("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH);
+ dml_print("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler);
+#endif
+
+ if (p->OutputFormat == dml_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP))
+ *p->DSTYAfterScaler = 1;
+ else
+ *p->DSTYAfterScaler = 0;
+
+ s->DSTTotalPixelsAfterScaler = *p->DSTYAfterScaler * p->myPipe->HTotal + *p->DSTXAfterScaler;
+ *p->DSTYAfterScaler = (dml_uint_t)(dml_floor(s->DSTTotalPixelsAfterScaler / p->myPipe->HTotal, 1));
+ *p->DSTXAfterScaler = (dml_uint_t)(s->DSTTotalPixelsAfterScaler - ((dml_float_t) (*p->DSTYAfterScaler * p->myPipe->HTotal)));
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler);
+ dml_print("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler);
+#endif
+
+ s->MyError = false;
+
+ s->Tr0_trips = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1);
+
+ if (p->GPUVMEnable == true) {
+ s->Tvm_trips_rounded = dml_ceil(4.0 * s->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
+ s->Tr0_trips_rounded = dml_ceil(4.0 * s->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
+ if (p->GPUVMPageTableLevels >= 3) {
+ *p->Tno_bw = p->UrgentExtraLatency + s->trip_to_mem * (dml_float_t) ((p->GPUVMPageTableLevels - 2) * (s->HostVMDynamicLevelsTrips + 1) - 1);
+ } else if (p->GPUVMPageTableLevels == 1 && p->myPipe->DCCEnable != true) {
+ s->Tr0_trips_rounded = dml_ceil(4.0 * p->UrgentExtraLatency / s->LineTime, 1.0) / 4.0 * s->LineTime;
+ *p->Tno_bw = p->UrgentExtraLatency;
+ } else {
+ *p->Tno_bw = 0;
+ }
+ } else if (p->myPipe->DCCEnable == true) {
+ s->Tvm_trips_rounded = s->LineTime / 4.0;
+ s->Tr0_trips_rounded = dml_ceil(4.0 * s->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
+ *p->Tno_bw = 0;
+ } else {
+ s->Tvm_trips_rounded = s->LineTime / 4.0;
+ s->Tr0_trips_rounded = s->LineTime / 2.0;
+ *p->Tno_bw = 0;
+ }
+ s->Tvm_trips_rounded = dml_max(s->Tvm_trips_rounded, s->LineTime / 4.0);
+ s->Tr0_trips_rounded = dml_max(s->Tr0_trips_rounded, s->LineTime / 4.0);
+
+ if (p->myPipe->SourcePixelFormat == dml_420_8 || p->myPipe->SourcePixelFormat == dml_420_10 || p->myPipe->SourcePixelFormat == dml_420_12) {
+ s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4;
+ } else {
+ s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC;
+ }
+
+ s->prefetch_bw_pr = s->bytes_pp * p->myPipe->PixelClock / (dml_float_t)p->myPipe->DPPPerSurface;
+ if (p->myPipe->VRatio < 1.0)
+ s->prefetch_bw_pr = p->myPipe->VRatio * s->prefetch_bw_pr;
+
+ s->max_Tsw = (dml_max(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) * s->LineTime);
+
+ s->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC;
+ s->prefetch_bw_oto = dml_max(s->prefetch_bw_pr, s->prefetch_sw_bytes / s->max_Tsw);
+
+ s->min_Lsw_oto = dml_max(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML_MAX_VRATIO_PRE_OTO__;
+ s->min_Lsw_oto = dml_max(s->min_Lsw_oto, 1.0);
+ s->Lsw_oto = dml_ceil(4.0 * dml_max(s->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, s->min_Lsw_oto), 1.0) / 4.0;
+
+ if (p->GPUVMEnable == true) {
+ s->Tvm_oto = dml_max3(
+ s->Tvm_trips,
+ *p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->prefetch_bw_oto,
+ s->LineTime / 4.0);
+ } else
+ s->Tvm_oto = s->LineTime / 4.0;
+
+ if ((p->GPUVMEnable == true || p->myPipe->DCCEnable == true)) {
+ s->Tr0_oto = dml_max4(
+ s->Tr0_trips,
+ (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_oto,
+ (s->LineTime - s->Tvm_oto)/2.0,
+ s->LineTime / 4.0);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__, (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_oto);
+ dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, s->Tr0_trips);
+ dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime - s->Tvm_oto);
+ dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, s->LineTime / 4);
+#endif
+ } else
+ s->Tr0_oto = (s->LineTime - s->Tvm_oto) / 2.0;
+
+ s->Tvm_oto_lines = dml_ceil(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0;
+ s->Tr0_oto_lines = dml_ceil(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0;
+ s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto;
+
+ s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + dml_max(p->TWait + p->TCalc, *p->Tdmdl)) / s->LineTime - (*p->DSTYAfterScaler + (dml_float_t) *p->DSTXAfterScaler / (dml_float_t)p->myPipe->HTotal);
+ s->dst_y_prefetch_equ = dml_min(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal);
+ dml_print("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto);
+ dml_print("DML::%s: *Tno_bw = %f\n", __func__, *p->Tno_bw);
+ dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, p->UrgentExtraLatency);
+ dml_print("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem);
+ dml_print("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
+ dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
+ dml_print("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
+ dml_print("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC);
+ dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
+ dml_print("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub);
+ dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, s->prefetch_sw_bytes);
+ dml_print("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp);
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, p->PDEAndMetaPTEBytesFrame);
+ dml_print("DML::%s: MetaRowByte = %u\n", __func__, p->MetaRowByte);
+ dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
+ dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
+ dml_print("DML::%s: Tvm_trips = %f\n", __func__, s->Tvm_trips);
+ dml_print("DML::%s: Tr0_trips = %f\n", __func__, s->Tr0_trips);
+ dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto);
+ dml_print("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto);
+ dml_print("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto);
+ dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines);
+ dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines);
+ dml_print("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto);
+ dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto);
+ dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ);
+#endif
+
+ s->dst_y_prefetch_equ = dml_floor(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0;
+ s->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime;
+
+ dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ);
+
+ dml_print("DML::%s: LineTime: %f\n", __func__, s->LineTime);
+ dml_print("DML::%s: VStartup: %u\n", __func__, p->VStartup);
+ dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime);
+ dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup);
+ dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc);
+ dml_print("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait);
+ dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
+ dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
+ dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
+ dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm);
+ dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
+ dml_print("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler);
+ dml_print("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler);
+
+ s->dep_bytes = dml_max(p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor, p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor);
+
+ if (s->prefetch_sw_bytes < s->dep_bytes) {
+ s->prefetch_sw_bytes = 2 * s->dep_bytes;
+ }
+
+ *p->DestinationLinesToRequestVMInVBlank = 0;
+ *p->DestinationLinesToRequestRowInVBlank = 0;
+ *p->VRatioPrefetchY = 0;
+ *p->VRatioPrefetchC = 0;
+ *p->RequiredPrefetchPixDataBWLuma = 0;
+ if (s->dst_y_prefetch_equ > 1) {
+
+ if (s->Tpre_rounded - *p->Tno_bw > 0) {
+ s->PrefetchBandwidth1 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + 2 * p->MetaRowByte
+ + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor
+ + s->prefetch_sw_bytes)
+ / (s->Tpre_rounded - *p->Tno_bw);
+ s->Tsw_est1 = s->prefetch_sw_bytes / s->PrefetchBandwidth1;
+ } else
+ s->PrefetchBandwidth1 = 0;
+
+ if (p->VStartup == p->MaxVStartup && (s->Tsw_est1 / s->LineTime < s->min_Lsw_oto) && s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0) {
+ s->PrefetchBandwidth1 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + 2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) /
+ (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw);
+ }
+
+ if (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded > 0)
+ s->PrefetchBandwidth2 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) /
+ (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded);
+ else
+ s->PrefetchBandwidth2 = 0;
+
+ if (s->Tpre_rounded - s->Tvm_trips_rounded > 0) {
+ s->PrefetchBandwidth3 = (2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) /
+ (s->Tpre_rounded - s->Tvm_trips_rounded);
+ s->Tsw_est3 = s->prefetch_sw_bytes / s->PrefetchBandwidth3;
+ }
+ else
+ s->PrefetchBandwidth3 = 0;
+
+
+ if (p->VStartup == p->MaxVStartup && (s->Tsw_est3 / s->LineTime < s->min_Lsw_oto) && s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded > 0) {
+ s->PrefetchBandwidth3 = (2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded);
+ }
+
+ if (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0)
+ s->PrefetchBandwidth4 = s->prefetch_sw_bytes / (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded);
+ else
+ s->PrefetchBandwidth4 = 0;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Tpre_rounded: %f\n", __func__, s->Tpre_rounded);
+ dml_print("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
+ dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, s->Tvm_trips_rounded);
+ dml_print("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1);
+ dml_print("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3);
+ dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, s->PrefetchBandwidth1);
+ dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, s->PrefetchBandwidth2);
+ dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, s->PrefetchBandwidth3);
+ dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, s->PrefetchBandwidth4);
+#endif
+ {
+ dml_bool_t Case1OK;
+ dml_bool_t Case2OK;
+ dml_bool_t Case3OK;
+
+ if (s->PrefetchBandwidth1 > 0) {
+ if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth1 >= s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth1 >= s->Tr0_trips_rounded) {
+ Case1OK = true;
+ } else {
+ Case1OK = false;
+ }
+ } else {
+ Case1OK = false;
+ }
+
+ if (s->PrefetchBandwidth2 > 0) {
+ if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth2 >= s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth2 < s->Tr0_trips_rounded) {
+ Case2OK = true;
+ } else {
+ Case2OK = false;
+ }
+ } else {
+ Case2OK = false;
+ }
+
+ if (s->PrefetchBandwidth3 > 0) {
+ if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth3 < s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth3 >= s->Tr0_trips_rounded) {
+ Case3OK = true;
+ } else {
+ Case3OK = false;
+ }
+ } else {
+ Case3OK = false;
+ }
+
+ if (Case1OK) {
+ s->prefetch_bw_equ = s->PrefetchBandwidth1;
+ } else if (Case2OK) {
+ s->prefetch_bw_equ = s->PrefetchBandwidth2;
+ } else if (Case3OK) {
+ s->prefetch_bw_equ = s->PrefetchBandwidth3;
+ } else {
+ s->prefetch_bw_equ = s->PrefetchBandwidth4;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Case1OK: %u\n", __func__, Case1OK);
+ dml_print("DML::%s: Case2OK: %u\n", __func__, Case2OK);
+ dml_print("DML::%s: Case3OK: %u\n", __func__, Case3OK);
+ dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ);
+#endif
+
+ if (s->prefetch_bw_equ > 0) {
+ if (p->GPUVMEnable == true) {
+ s->Tvm_equ = dml_max3(*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->prefetch_bw_equ, s->Tvm_trips, s->LineTime / 4);
+ } else {
+ s->Tvm_equ = s->LineTime / 4;
+ }
+
+ if ((p->GPUVMEnable == true || p->myPipe->DCCEnable == true)) {
+ s->Tr0_equ = dml_max4((p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_equ, s->Tr0_trips, (s->LineTime - s->Tvm_equ) / 2, s->LineTime / 4);
+ } else {
+ s->Tr0_equ = (s->LineTime - s->Tvm_equ) / 2;
+ }
+ } else {
+ s->Tvm_equ = 0;
+ s->Tr0_equ = 0;
+ dml_print("DML::%s: prefetch_bw_equ equals 0!\n", __func__);
+ }
+ }
+
+
+ if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) {
+ *p->DestinationLinesForPrefetch = s->dst_y_prefetch_oto;
+ s->TimeForFetchingMetaPTE = s->Tvm_oto;
+ s->TimeForFetchingRowInVBlank = s->Tr0_oto;
+
+ *p->DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * s->TimeForFetchingMetaPTE / s->LineTime, 1.0) / 4.0;
+ *p->DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
+ } else {
+ *p->DestinationLinesForPrefetch = s->dst_y_prefetch_equ;
+ s->TimeForFetchingMetaPTE = s->Tvm_equ;
+ s->TimeForFetchingRowInVBlank = s->Tr0_equ;
+
+ if (p->VStartup == p->MaxVStartup && p->EnhancedPrefetchScheduleAccelerationFinal != 0) {
+ *p->DestinationLinesToRequestVMInVBlank = dml_floor(4.0 * s->TimeForFetchingMetaPTE / s->LineTime, 1.0) / 4.0;
+ *p->DestinationLinesToRequestRowInVBlank = dml_floor(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
+ } else {
+ *p->DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * s->TimeForFetchingMetaPTE / s->LineTime, 1.0) / 4.0;
+ *p->DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
+ }
+ }
+
+ s->LinesToRequestPrefetchPixelData = *p->DestinationLinesForPrefetch - *p->DestinationLinesToRequestVMInVBlank - 2 * *p->DestinationLinesToRequestRowInVBlank;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *p->DestinationLinesForPrefetch);
+ dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *p->DestinationLinesToRequestVMInVBlank);
+ dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank);
+ dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime);
+ dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *p->DestinationLinesToRequestRowInVBlank);
+ dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
+ dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData);
+#endif
+
+ if (s->LinesToRequestPrefetchPixelData >= 1 && s->prefetch_bw_equ > 0) {
+ *p->VRatioPrefetchY = (dml_float_t)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData;
+ *p->VRatioPrefetchY = dml_max(*p->VRatioPrefetchY, 1.0);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
+ dml_print("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY);
+ dml_print("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY);
+#endif
+ if ((p->SwathHeightY > 4) && (p->VInitPreFillY > 3)) {
+ if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillY - 3.0) / 2.0) {
+ *p->VRatioPrefetchY = dml_max(*p->VRatioPrefetchY,
+ (dml_float_t)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0));
+ } else {
+ s->MyError = true;
+ dml_print("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY);
+ *p->VRatioPrefetchY = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
+ dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
+ dml_print("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY);
+#endif
+ }
+
+ *p->VRatioPrefetchC = (dml_float_t)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData;
+ *p->VRatioPrefetchC = dml_max(*p->VRatioPrefetchC, 1.0);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
+ dml_print("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC);
+ dml_print("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC);
+#endif
+ if ((p->SwathHeightC > 4) && (p->VInitPreFillC > 3)) {
+ if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillC - 3.0) / 2.0) {
+ *p->VRatioPrefetchC = dml_max(*p->VRatioPrefetchC, (dml_float_t)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0));
+ } else {
+ s->MyError = true;
+ dml_print("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC);
+ *p->VRatioPrefetchC = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
+ dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
+ dml_print("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC);
+#endif
+ }
+
+ *p->RequiredPrefetchPixDataBWLuma = (dml_float_t)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData
+ * p->myPipe->BytePerPixelY
+ * p->swath_width_luma_ub / s->LineTime;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
+ dml_print("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
+ dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime);
+ dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixDataBWLuma);
+#endif
+ *p->RequiredPrefetchPixDataBWChroma = (dml_float_t)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData
+ *p->myPipe->BytePerPixelC
+ *p->swath_width_chroma_ub / s->LineTime;
+ } else {
+ s->MyError = true;
+ dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n", __func__, s->LinesToRequestPrefetchPixelData);
+ *p->VRatioPrefetchY = 0;
+ *p->VRatioPrefetchC = 0;
+ *p->RequiredPrefetchPixDataBWLuma = 0;
+ *p->RequiredPrefetchPixDataBWChroma = 0;
+ }
+
+ dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", (dml_float_t)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingMetaPTE);
+ dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", s->TimeForFetchingMetaPTE);
+ dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", s->TimeForFetchingRowInVBlank);
+ dml_print("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (dml_float_t)s->LinesToRequestPrefetchPixelData * s->LineTime);
+ dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((dml_float_t) (*p->DSTXAfterScaler) / (dml_float_t)p->myPipe->HTotal)) * s->LineTime);
+ dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
+ dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingMetaPTE - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((dml_float_t) (*p->DSTXAfterScaler) / (dml_float_t)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup);
+ dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow);
+
+ } else {
+ s->MyError = true;
+ dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ);
+ s->TimeForFetchingMetaPTE = 0;
+ s->TimeForFetchingRowInVBlank = 0;
+ *p->DestinationLinesToRequestVMInVBlank = 0;
+ *p->DestinationLinesToRequestRowInVBlank = 0;
+ s->LinesToRequestPrefetchPixelData = 0;
+ *p->VRatioPrefetchY = 0;
+ *p->VRatioPrefetchC = 0;
+ *p->RequiredPrefetchPixDataBWLuma = 0;
+ *p->RequiredPrefetchPixDataBWChroma = 0;
+ }
+
+ {
+ dml_float_t prefetch_vm_bw;
+ dml_float_t prefetch_row_bw;
+
+ if (p->PDEAndMetaPTEBytesFrame == 0) {
+ prefetch_vm_bw = 0;
+ } else if (*p->DestinationLinesToRequestVMInVBlank > 0) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, p->PDEAndMetaPTEBytesFrame);
+ dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
+ dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *p->DestinationLinesToRequestVMInVBlank);
+ dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime);
+#endif
+ prefetch_vm_bw = p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / (*p->DestinationLinesToRequestVMInVBlank * s->LineTime);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
+#endif
+ } else {
+ prefetch_vm_bw = 0;
+ s->MyError = true;
+ dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n", __func__, *p->DestinationLinesToRequestVMInVBlank);
+ }
+
+ if (p->MetaRowByte + p->PixelPTEBytesPerRow == 0) {
+ prefetch_row_bw = 0;
+ } else if (*p->DestinationLinesToRequestRowInVBlank > 0) {
+ prefetch_row_bw = (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / (*p->DestinationLinesToRequestRowInVBlank * s->LineTime);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MetaRowByte = %u\n", __func__, p->MetaRowByte);
+ dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
+ dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *p->DestinationLinesToRequestRowInVBlank);
+ dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
+#endif
+ } else {
+ prefetch_row_bw = 0;
+ s->MyError = true;
+ dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n", __func__, *p->DestinationLinesToRequestRowInVBlank);
+ }
+
+ *p->prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
+ }
+
+ if (s->MyError) {
+ s->TimeForFetchingMetaPTE = 0;
+ s->TimeForFetchingRowInVBlank = 0;
+ *p->DestinationLinesToRequestVMInVBlank = 0;
+ *p->DestinationLinesToRequestRowInVBlank = 0;
+ *p->DestinationLinesForPrefetch = 0;
+ s->LinesToRequestPrefetchPixelData = 0;
+ *p->VRatioPrefetchY = 0;
+ *p->VRatioPrefetchC = 0;
+ *p->RequiredPrefetchPixDataBWLuma = 0;
+ *p->RequiredPrefetchPixDataBWChroma = 0;
+ }
+
+ return s->MyError;
+} // CalculatePrefetchSchedule
+
+static void CalculateBytePerPixelAndBlockSizes(
+ enum dml_source_format_class SourcePixelFormat,
+ enum dml_swizzle_mode SurfaceTiling,
+
+ // Output
+ dml_uint_t *BytePerPixelY,
+ dml_uint_t *BytePerPixelC,
+ dml_float_t *BytePerPixelDETY,
+ dml_float_t *BytePerPixelDETC,
+ dml_uint_t *BlockHeight256BytesY,
+ dml_uint_t *BlockHeight256BytesC,
+ dml_uint_t *BlockWidth256BytesY,
+ dml_uint_t *BlockWidth256BytesC,
+ dml_uint_t *MacroTileHeightY,
+ dml_uint_t *MacroTileHeightC,
+ dml_uint_t *MacroTileWidthY,
+ dml_uint_t *MacroTileWidthC)
+{
+ if (SourcePixelFormat == dml_444_64) {
+ *BytePerPixelDETY = 8;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 8;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dml_444_32 || SourcePixelFormat == dml_rgbe) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dml_444_16 || SourcePixelFormat == dml_mono_16) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dml_444_8 || SourcePixelFormat == dml_mono_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dml_rgbe_alpha) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 1;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 1;
+ } else if (SourcePixelFormat == dml_420_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 2;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 2;
+ } else if (SourcePixelFormat == dml_420_12) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 4;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ } else {
+ *BytePerPixelDETY = (dml_float_t) (4.0 / 3);
+ *BytePerPixelDETC = (dml_float_t) (8.0 / 3);
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat);
+ dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
+ dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
+ dml_print("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY);
+ dml_print("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC);
+#endif
+ if ((SourcePixelFormat == dml_444_64 || SourcePixelFormat == dml_444_32
+ || SourcePixelFormat == dml_444_16
+ || SourcePixelFormat == dml_444_8
+ || SourcePixelFormat == dml_mono_16
+ || SourcePixelFormat == dml_mono_8
+ || SourcePixelFormat == dml_rgbe)) {
+ if (SurfaceTiling == dml_sw_linear) {
+ *BlockHeight256BytesY = 1;
+ } else if (SourcePixelFormat == dml_444_64) {
+ *BlockHeight256BytesY = 4;
+ } else if (SourcePixelFormat == dml_444_8) {
+ *BlockHeight256BytesY = 16;
+ } else {
+ *BlockHeight256BytesY = 8;
+ }
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockHeight256BytesC = 0;
+ *BlockWidth256BytesC = 0;
+ } else {
+ if (SurfaceTiling == dml_sw_linear) {
+ *BlockHeight256BytesY = 1;
+ *BlockHeight256BytesC = 1;
+ } else if (SourcePixelFormat == dml_rgbe_alpha) {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 16;
+ } else if (SourcePixelFormat == dml_420_8) {
+ *BlockHeight256BytesY = 16;
+ *BlockHeight256BytesC = 8;
+ } else {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 8;
+ }
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY);
+ dml_print("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY);
+ dml_print("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC);
+ dml_print("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC);
+#endif
+
+ if (SurfaceTiling == dml_sw_linear) {
+ *MacroTileHeightY = *BlockHeight256BytesY;
+ *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
+ *MacroTileHeightC = *BlockHeight256BytesC;
+ if (*MacroTileHeightC == 0) {
+ *MacroTileWidthC = 0;
+ } else {
+ *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
+ }
+ } else if (SurfaceTiling == dml_sw_64kb_d || SurfaceTiling == dml_sw_64kb_d_t || SurfaceTiling == dml_sw_64kb_d_x || SurfaceTiling == dml_sw_64kb_r_x) {
+ *MacroTileHeightY = 16 * *BlockHeight256BytesY;
+ *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
+ *MacroTileHeightC = 16 * *BlockHeight256BytesC;
+ if (*MacroTileHeightC == 0) {
+ *MacroTileWidthC = 0;
+ } else {
+ *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
+ }
+ } else {
+ *MacroTileHeightY = 32 * *BlockHeight256BytesY;
+ *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
+ *MacroTileHeightC = 32 * *BlockHeight256BytesC;
+ if (*MacroTileHeightC == 0) {
+ *MacroTileWidthC = 0;
+ } else {
+ *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY);
+ dml_print("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY);
+ dml_print("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC);
+ dml_print("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC);
+#endif
+} // CalculateBytePerPixelAndBlockSizes
+
+static noinline_for_stack dml_float_t CalculateTWait(
+ dml_uint_t PrefetchMode,
+ enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,
+ dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ dml_bool_t DRRDisplay,
+ dml_float_t DRAMClockChangeLatency,
+ dml_float_t FCLKChangeLatency,
+ dml_float_t UrgentLatency,
+ dml_float_t SREnterPlusExitTime)
+{
+ dml_float_t TWait = 0.0;
+
+ if (PrefetchMode == 0 &&
+ !(UseMALLForPStateChange == dml_use_mall_pstate_change_full_frame) && !(UseMALLForPStateChange == dml_use_mall_pstate_change_sub_viewport) &&
+ !(UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe) && !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
+ TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
+ } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe)) {
+ TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
+ } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe)) {
+ TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
+ } else {
+ TWait = UrgentLatency;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PrefetchMode = %u\n", __func__, PrefetchMode);
+ dml_print("DML::%s: TWait = %f\n", __func__, TWait);
+#endif
+ return TWait;
+} // CalculateTWait
+
+
+/// @brief Calculate the "starting point" for prefetch calculation
+/// if AllowForPStateChangeOrStutterInVBlank is set as a particular requirement, then the mode evalulation
+/// will only be done at the given mode. If no specific requirement (i.e. *_if_possible), then will just go from
+/// try all the prefetch mode in decreasing order of "difficulty" (start from 0 which means all power saving
+/// features).
+static void CalculatePrefetchMode(
+ enum dml_prefetch_modes AllowForPStateChangeOrStutterInVBlank,
+ dml_uint_t *MinPrefetchMode,
+ dml_uint_t *MaxPrefetchMode)
+{
+ if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_uclk_fclk_and_stutter_if_possible) {
+ *MinPrefetchMode = 0; // consider all pwr saving features
+ *MaxPrefetchMode = 3; // consider just urgent latency
+ } else {
+ if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_none) {
+ *MinPrefetchMode = 3;
+ } else if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_stutter) {
+ *MinPrefetchMode = 2;
+ } else if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_fclk_and_stutter) {
+ *MinPrefetchMode = 1;
+ } else if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_uclk_fclk_and_stutter) {
+ *MinPrefetchMode = 0;
+ } else {
+ dml_print("ERROR: Invalid AllowForPStateChangeOrStutterInVBlank setting! val=%u\n", AllowForPStateChangeOrStutterInVBlank);
+ ASSERT(0);
+ }
+ *MaxPrefetchMode = *MinPrefetchMode;
+ }
+} // CalculatePrefetchMode
+
+static dml_float_t CalculateWriteBackDISPCLK(
+ enum dml_source_format_class WritebackPixelFormat,
+ dml_float_t PixelClock,
+ dml_float_t WritebackHRatio,
+ dml_float_t WritebackVRatio,
+ dml_uint_t WritebackHTaps,
+ dml_uint_t WritebackVTaps,
+ dml_uint_t WritebackSourceWidth,
+ dml_uint_t WritebackDestinationWidth,
+ dml_uint_t HTotal,
+ dml_uint_t WritebackLineBufferSize,
+ dml_float_t DISPCLKDPPCLKVCOSpeed)
+{
+ dml_float_t DISPCLK_H, DISPCLK_V, DISPCLK_HB;
+
+ DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
+ DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / (dml_float_t) HTotal;
+ DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / (dml_float_t) WritebackSourceWidth;
+ return RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
+}
+
+static dml_float_t CalculateWriteBackDelay(
+ enum dml_source_format_class WritebackPixelFormat,
+ dml_float_t WritebackHRatio,
+ dml_float_t WritebackVRatio,
+ dml_uint_t WritebackVTaps,
+ dml_uint_t WritebackDestinationWidth,
+ dml_uint_t WritebackDestinationHeight,
+ dml_uint_t WritebackSourceHeight,
+ dml_uint_t HTotal)
+{
+ dml_float_t CalculateWriteBackDelay;
+ dml_float_t Line_length;
+ dml_float_t Output_lines_last_notclamped;
+ dml_float_t WritebackVInit;
+
+ WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
+ Line_length = dml_max((dml_float_t) WritebackDestinationWidth, dml_ceil((dml_float_t)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
+ Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil(((dml_float_t)WritebackSourceHeight - (dml_float_t) WritebackVInit) / (dml_float_t)WritebackVRatio, 1.0);
+ if (Output_lines_last_notclamped < 0) {
+ CalculateWriteBackDelay = 0;
+ } else {
+ CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
+ }
+ return CalculateWriteBackDelay;
+}
+
+static void CalculateVUpdateAndDynamicMetadataParameters(
+ dml_uint_t MaxInterDCNTileRepeaters,
+ dml_float_t Dppclk,
+ dml_float_t Dispclk,
+ dml_float_t DCFClkDeepSleep,
+ dml_float_t PixelClock,
+ dml_uint_t HTotal,
+ dml_uint_t VBlank,
+ dml_uint_t DynamicMetadataTransmittedBytes,
+ dml_uint_t DynamicMetadataLinesBeforeActiveRequired,
+ dml_uint_t InterlaceEnable,
+ dml_bool_t ProgressiveToInterlaceUnitInOPP,
+
+ // Output
+ dml_float_t *TSetup,
+ dml_float_t *Tdmbf,
+ dml_float_t *Tdmec,
+ dml_float_t *Tdmsks,
+ dml_uint_t *VUpdateOffsetPix,
+ dml_uint_t *VUpdateWidthPix,
+ dml_uint_t *VReadyOffsetPix)
+{
+ dml_float_t TotalRepeaterDelayTime;
+ TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
+ *VUpdateWidthPix = (dml_uint_t)(dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0));
+ *VReadyOffsetPix = (dml_uint_t)(dml_ceil(dml_max(150.0 / Dppclk, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0));
+ *VUpdateOffsetPix = (dml_uint_t)(dml_ceil(HTotal / 4.0, 1.0));
+ *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
+ *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
+ *Tdmec = HTotal / PixelClock;
+
+ if (DynamicMetadataLinesBeforeActiveRequired == 0) {
+ *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
+ } else {
+ *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
+ }
+ if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
+ *Tdmsks = *Tdmsks / 2;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired);
+ dml_print("DML::%s: VBlank = %u\n", __func__, VBlank);
+ dml_print("DML::%s: HTotal = %u\n", __func__, HTotal);
+ dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
+ dml_print("DML::%s: Dppclk = %f\n", __func__, Dppclk);
+ dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep);
+ dml_print("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters);
+ dml_print("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime);
+
+ dml_print("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix);
+ dml_print("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix);
+ dml_print("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix);
+
+ dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
+#endif
+}
+
+static void CalculateRowBandwidth(
+ dml_bool_t GPUVMEnable,
+ enum dml_source_format_class SourcePixelFormat,
+ dml_float_t VRatio,
+ dml_float_t VRatioChroma,
+ dml_bool_t DCCEnable,
+ dml_float_t LineTime,
+ dml_uint_t MetaRowByteLuma,
+ dml_uint_t MetaRowByteChroma,
+ dml_uint_t meta_row_height_luma,
+ dml_uint_t meta_row_height_chroma,
+ dml_uint_t PixelPTEBytesPerRowLuma,
+ dml_uint_t PixelPTEBytesPerRowChroma,
+ dml_uint_t dpte_row_height_luma,
+ dml_uint_t dpte_row_height_chroma,
+ // Output
+ dml_float_t *meta_row_bw,
+ dml_float_t *dpte_row_bw)
+{
+ if (DCCEnable != true) {
+ *meta_row_bw = 0;
+ } else if (SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_420_12 || SourcePixelFormat == dml_rgbe_alpha) {
+ *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime)
+ + VRatioChroma * MetaRowByteChroma
+ / (meta_row_height_chroma * LineTime);
+ } else {
+ *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
+ }
+
+ if (GPUVMEnable != true) {
+ *dpte_row_bw = 0;
+ } else if (SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_420_12 || SourcePixelFormat == dml_rgbe_alpha) {
+ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
+ + VRatioChroma * PixelPTEBytesPerRowChroma
+ / (dpte_row_height_chroma * LineTime);
+ } else {
+ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
+ }
+}
+
+/// @brief Determine immediate flip schedule given bw remaining after considering the prefetch schedule
+/// @param BandwidthAvailableForImmediateFlip Bandwidth available for iflip for all planes
+static void CalculateFlipSchedule(
+ dml_float_t HostVMInefficiencyFactor,
+ dml_float_t UrgentExtraLatency,
+ dml_float_t UrgentLatency,
+ dml_uint_t GPUVMMaxPageTableLevels,
+ dml_bool_t HostVMEnable,
+ dml_uint_t HostVMMaxNonCachedPageTableLevels,
+ dml_bool_t GPUVMEnable,
+ dml_uint_t HostVMMinPageSize,
+ dml_float_t PDEAndMetaPTEBytesPerFrame,
+ dml_float_t MetaRowBytes,
+ dml_float_t DPTEBytesPerRow,
+ dml_float_t BandwidthAvailableForImmediateFlip,
+ dml_uint_t TotImmediateFlipBytes,
+ enum dml_source_format_class SourcePixelFormat,
+ dml_float_t LineTime,
+ dml_float_t VRatio,
+ dml_float_t VRatioChroma,
+ dml_float_t Tno_bw,
+ dml_bool_t DCCEnable,
+ dml_uint_t dpte_row_height,
+ dml_uint_t meta_row_height,
+ dml_uint_t dpte_row_height_chroma,
+ dml_uint_t meta_row_height_chroma,
+ dml_bool_t use_one_row_for_frame_flip,
+
+ // Output
+ dml_float_t *DestinationLinesToRequestVMInImmediateFlip,
+ dml_float_t *DestinationLinesToRequestRowInImmediateFlip,
+ dml_float_t *final_flip_bw,
+ dml_bool_t *ImmediateFlipSupportedForPipe)
+{
+ dml_float_t min_row_time = 0.0;
+ dml_uint_t HostVMDynamicLevelsTrips = 0;
+ dml_float_t TimeForFetchingMetaPTEImmediateFlip = 0;
+ dml_float_t TimeForFetchingRowInVBlankImmediateFlip = 0;
+ dml_float_t ImmediateFlipBW = 0; // @brief The immediate flip bandwidth for this pipe
+
+ if (GPUVMEnable == true && HostVMEnable == true) {
+ HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
+ } else {
+ HostVMDynamicLevelsTrips = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes);
+ dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
+ dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
+ dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
+#endif
+
+ if (TotImmediateFlipBytes > 0) {
+ if (use_one_row_for_frame_flip) {
+ ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2.0 * DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / (dml_float_t) TotImmediateFlipBytes;
+ } else {
+ ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / (dml_float_t) TotImmediateFlipBytes;
+ }
+ if (GPUVMEnable == true) {
+ TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
+ UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
+ LineTime / 4.0);
+ } else {
+ TimeForFetchingMetaPTEImmediateFlip = 0;
+ }
+ if ((GPUVMEnable == true || DCCEnable == true)) {
+ TimeForFetchingRowInVBlankImmediateFlip = dml_max3((MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
+ } else {
+ TimeForFetchingRowInVBlankImmediateFlip = 0;
+ }
+
+ *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
+ *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
+
+ if (GPUVMEnable == true) {
+ *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
+ (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
+ } else if ((GPUVMEnable == true || DCCEnable == true)) {
+ *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
+ } else {
+ *final_flip_bw = 0;
+ }
+ } else {
+ TimeForFetchingMetaPTEImmediateFlip = 0;
+ TimeForFetchingRowInVBlankImmediateFlip = 0;
+ *DestinationLinesToRequestVMInImmediateFlip = 0;
+ *DestinationLinesToRequestRowInImmediateFlip = 0;
+ *final_flip_bw = 0;
+ }
+
+ if (SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_rgbe_alpha) {
+ if (GPUVMEnable == true && DCCEnable != true) {
+ min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
+ } else if (GPUVMEnable != true && DCCEnable == true) {
+ min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
+ } else {
+ min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
+ }
+ } else {
+ if (GPUVMEnable == true && DCCEnable != true) {
+ min_row_time = dpte_row_height * LineTime / VRatio;
+ } else if (GPUVMEnable != true && DCCEnable == true) {
+ min_row_time = meta_row_height * LineTime / VRatio;
+ } else {
+ min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
+ }
+ }
+
+ if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
+ *ImmediateFlipSupportedForPipe = false;
+ } else {
+ *ImmediateFlipSupportedForPipe = true;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
+ dml_print("DML::%s: DCCEnable = %u\n", __func__, DCCEnable);
+
+ dml_print("DML::%s: MetaRowBytes = %f\n", __func__, MetaRowBytes);
+ dml_print("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow);
+ dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
+ dml_print("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes);
+ dml_print("DML::%s: ImmediateFlipBW = %f\n", __func__, ImmediateFlipBW);
+ dml_print("DML::%s: PDEAndMetaPTEBytesPerFrame = %f\n", __func__, PDEAndMetaPTEBytesPerFrame);
+ dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
+ dml_print("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw);
+
+ dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip);
+ dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip);
+ dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
+ dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
+ dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
+ dml_print("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe);
+#endif
+} // CalculateFlipSchedule
+
+static dml_float_t RoundToDFSGranularity(dml_float_t Clock, dml_bool_t round_up, dml_float_t VCOSpeed)
+{
+ if (Clock <= 0.0)
+ return 0.0;
+ else {
+ if (round_up)
+ return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
+ else
+ return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
+ }
+}
+
+static void CalculateDCCConfiguration(
+ dml_bool_t DCCEnabled,
+ dml_bool_t DCCProgrammingAssumesScanDirectionUnknown,
+ enum dml_source_format_class SourcePixelFormat,
+ dml_uint_t SurfaceWidthLuma,
+ dml_uint_t SurfaceWidthChroma,
+ dml_uint_t SurfaceHeightLuma,
+ dml_uint_t SurfaceHeightChroma,
+ dml_uint_t nomDETInKByte,
+ dml_uint_t RequestHeight256ByteLuma,
+ dml_uint_t RequestHeight256ByteChroma,
+ enum dml_swizzle_mode TilingFormat,
+ dml_uint_t BytePerPixelY,
+ dml_uint_t BytePerPixelC,
+ dml_float_t BytePerPixelDETY,
+ dml_float_t BytePerPixelDETC,
+ enum dml_rotation_angle SourceScan,
+ // Output
+ dml_uint_t *MaxUncompressedBlockLuma,
+ dml_uint_t *MaxUncompressedBlockChroma,
+ dml_uint_t *MaxCompressedBlockLuma,
+ dml_uint_t *MaxCompressedBlockChroma,
+ dml_uint_t *IndependentBlockLuma,
+ dml_uint_t *IndependentBlockChroma)
+{
+ dml_uint_t DETBufferSizeForDCC = nomDETInKByte * 1024;
+
+ dml_uint_t yuv420;
+ dml_uint_t horz_div_l;
+ dml_uint_t horz_div_c;
+ dml_uint_t vert_div_l;
+ dml_uint_t vert_div_c;
+
+ dml_uint_t swath_buf_size;
+ dml_float_t detile_buf_vp_horz_limit;
+ dml_float_t detile_buf_vp_vert_limit;
+
+ dml_uint_t MAS_vp_horz_limit;
+ dml_uint_t MAS_vp_vert_limit;
+ dml_uint_t max_vp_horz_width;
+ dml_uint_t max_vp_vert_height;
+ dml_uint_t eff_surf_width_l;
+ dml_uint_t eff_surf_width_c;
+ dml_uint_t eff_surf_height_l;
+ dml_uint_t eff_surf_height_c;
+
+ dml_uint_t full_swath_bytes_horz_wc_l;
+ dml_uint_t full_swath_bytes_horz_wc_c;
+ dml_uint_t full_swath_bytes_vert_wc_l;
+ dml_uint_t full_swath_bytes_vert_wc_c;
+
+ dml_uint_t req128_horz_wc_l;
+ dml_uint_t req128_horz_wc_c;
+ dml_uint_t req128_vert_wc_l;
+ dml_uint_t req128_vert_wc_c;
+
+ dml_uint_t segment_order_horz_contiguous_luma;
+ dml_uint_t segment_order_horz_contiguous_chroma;
+ dml_uint_t segment_order_vert_contiguous_luma;
+ dml_uint_t segment_order_vert_contiguous_chroma;
+
+ typedef enum{
+ REQ_256Bytes,
+ REQ_128BytesNonContiguous,
+ REQ_128BytesContiguous,
+ REQ_NA
+ } RequestType;
+
+ RequestType RequestLuma;
+ RequestType RequestChroma;
+
+ yuv420 = ((SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_420_12) ? 1 : 0);
+ horz_div_l = 1;
+ horz_div_c = 1;
+ vert_div_l = 1;
+ vert_div_c = 1;
+
+ if (BytePerPixelY == 1)
+ vert_div_l = 0;
+ if (BytePerPixelC == 1)
+ vert_div_c = 0;
+
+ if (BytePerPixelC == 0) {
+ swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
+ detile_buf_vp_horz_limit = (dml_float_t) swath_buf_size / ((dml_float_t) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
+ detile_buf_vp_vert_limit = (dml_float_t) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
+ } else {
+ swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
+ detile_buf_vp_horz_limit = (dml_float_t) swath_buf_size / ((dml_float_t) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) + (dml_float_t) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
+ detile_buf_vp_vert_limit = (dml_float_t) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
+ }
+
+ if (SourcePixelFormat == dml_420_10) {
+ detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
+ detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
+ }
+
+ detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
+ detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
+
+ MAS_vp_horz_limit = SourcePixelFormat == dml_rgbe_alpha ? 3840 : 6144;
+ MAS_vp_vert_limit = SourcePixelFormat == dml_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
+ max_vp_horz_width = (dml_uint_t)(dml_min((dml_float_t) MAS_vp_horz_limit, detile_buf_vp_horz_limit));
+ max_vp_vert_height = (dml_uint_t)(dml_min((dml_float_t) MAS_vp_vert_limit, detile_buf_vp_vert_limit));
+ eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
+ eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
+ eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
+ eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
+
+ full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
+ full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
+ if (BytePerPixelC > 0) {
+ full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
+ full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
+ } else {
+ full_swath_bytes_horz_wc_c = 0;
+ full_swath_bytes_vert_wc_c = 0;
+ }
+
+ if (SourcePixelFormat == dml_420_10) {
+ full_swath_bytes_horz_wc_l = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0));
+ full_swath_bytes_horz_wc_c = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0));
+ full_swath_bytes_vert_wc_l = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0));
+ full_swath_bytes_vert_wc_c = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0));
+ }
+
+ if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
+ req128_horz_wc_l = 0;
+ req128_horz_wc_c = 0;
+ } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
+ req128_horz_wc_l = 0;
+ req128_horz_wc_c = 1;
+ } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
+ req128_horz_wc_l = 1;
+ req128_horz_wc_c = 0;
+ } else {
+ req128_horz_wc_l = 1;
+ req128_horz_wc_c = 1;
+ }
+
+ if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
+ req128_vert_wc_l = 0;
+ req128_vert_wc_c = 0;
+ } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
+ req128_vert_wc_l = 0;
+ req128_vert_wc_c = 1;
+ } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
+ req128_vert_wc_l = 1;
+ req128_vert_wc_c = 0;
+ } else {
+ req128_vert_wc_l = 1;
+ req128_vert_wc_c = 1;
+ }
+
+ if (BytePerPixelY == 2) {
+ segment_order_horz_contiguous_luma = 0;
+ segment_order_vert_contiguous_luma = 1;
+ } else {
+ segment_order_horz_contiguous_luma = 1;
+ segment_order_vert_contiguous_luma = 0;
+ }
+
+ if (BytePerPixelC == 2) {
+ segment_order_horz_contiguous_chroma = 0;
+ segment_order_vert_contiguous_chroma = 1;
+ } else {
+ segment_order_horz_contiguous_chroma = 1;
+ segment_order_vert_contiguous_chroma = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled);
+ dml_print("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
+ dml_print("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC);
+ dml_print("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l);
+ dml_print("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c);
+ dml_print("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l);
+ dml_print("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c);
+ dml_print("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma);
+ dml_print("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma);
+#endif
+
+ if (DCCProgrammingAssumesScanDirectionUnknown == true) {
+ if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
+ RequestLuma = REQ_256Bytes;
+ } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
+ RequestLuma = REQ_128BytesNonContiguous;
+ } else {
+ RequestLuma = REQ_128BytesContiguous;
+ }
+ if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
+ RequestChroma = REQ_256Bytes;
+ } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
+ RequestChroma = REQ_128BytesNonContiguous;
+ } else {
+ RequestChroma = REQ_128BytesContiguous;
+ }
+ } else if (!dml_is_vertical_rotation(SourceScan)) {
+ if (req128_horz_wc_l == 0) {
+ RequestLuma = REQ_256Bytes;
+ } else if (segment_order_horz_contiguous_luma == 0) {
+ RequestLuma = REQ_128BytesNonContiguous;
+ } else {
+ RequestLuma = REQ_128BytesContiguous;
+ }
+ if (req128_horz_wc_c == 0) {
+ RequestChroma = REQ_256Bytes;
+ } else if (segment_order_horz_contiguous_chroma == 0) {
+ RequestChroma = REQ_128BytesNonContiguous;
+ } else {
+ RequestChroma = REQ_128BytesContiguous;
+ }
+ } else {
+ if (req128_vert_wc_l == 0) {
+ RequestLuma = REQ_256Bytes;
+ } else if (segment_order_vert_contiguous_luma == 0) {
+ RequestLuma = REQ_128BytesNonContiguous;
+ } else {
+ RequestLuma = REQ_128BytesContiguous;
+ }
+ if (req128_vert_wc_c == 0) {
+ RequestChroma = REQ_256Bytes;
+ } else if (segment_order_vert_contiguous_chroma == 0) {
+ RequestChroma = REQ_128BytesNonContiguous;
+ } else {
+ RequestChroma = REQ_128BytesContiguous;
+ }
+ }
+
+ if (RequestLuma == REQ_256Bytes) {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 256;
+ *IndependentBlockLuma = 0;
+ } else if (RequestLuma == REQ_128BytesContiguous) {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 128;
+ *IndependentBlockLuma = 128;
+ } else {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 64;
+ *IndependentBlockLuma = 64;
+ }
+
+ if (RequestChroma == REQ_256Bytes) {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 256;
+ *IndependentBlockChroma = 0;
+ } else if (RequestChroma == REQ_128BytesContiguous) {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 128;
+ *IndependentBlockChroma = 128;
+ } else {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 64;
+ *IndependentBlockChroma = 64;
+ }
+
+ if (DCCEnabled != true || BytePerPixelC == 0) {
+ *MaxUncompressedBlockChroma = 0;
+ *MaxCompressedBlockChroma = 0;
+ *IndependentBlockChroma = 0;
+ }
+
+ if (DCCEnabled != true) {
+ *MaxUncompressedBlockLuma = 0;
+ *MaxCompressedBlockLuma = 0;
+ *IndependentBlockLuma = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma);
+ dml_print("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma);
+ dml_print("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma);
+ dml_print("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma);
+ dml_print("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma);
+ dml_print("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma);
+#endif
+
+} // CalculateDCCConfiguration
+
+static dml_uint_t CalculatePrefetchSourceLines(
+ dml_float_t VRatio,
+ dml_uint_t VTaps,
+ dml_bool_t Interlace,
+ dml_bool_t ProgressiveToInterlaceUnitInOPP,
+ dml_uint_t SwathHeight,
+ enum dml_rotation_angle SourceScan,
+ dml_bool_t ViewportStationary,
+ dml_uint_t SwathWidth,
+ dml_uint_t ViewportHeight,
+ dml_uint_t ViewportXStart,
+ dml_uint_t ViewportYStart,
+
+ // Output
+ dml_uint_t *VInitPreFill,
+ dml_uint_t *MaxNumSwath)
+{
+
+ dml_uint_t vp_start_rot = 0;
+ dml_uint_t sw0_tmp = 0;
+ dml_uint_t MaxPartialSwath = 0;
+ dml_float_t numLines = 0;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
+ dml_print("DML::%s: VTaps = %u\n", __func__, VTaps);
+ dml_print("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart);
+ dml_print("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart);
+ dml_print("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary);
+ dml_print("DML::%s: SwathHeight = %u\n", __func__, SwathHeight);
+#endif
+ if (ProgressiveToInterlaceUnitInOPP)
+ *VInitPreFill = (dml_uint_t)(dml_floor((VRatio + (dml_float_t) VTaps + 1) / 2.0, 1));
+ else
+ *VInitPreFill = (dml_uint_t)(dml_floor((VRatio + (dml_float_t) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1));
+
+ if (ViewportStationary) {
+ if (SourceScan == dml_rotation_180 || SourceScan == dml_rotation_180m) {
+ vp_start_rot = SwathHeight - (((dml_uint_t) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
+ } else if (SourceScan == dml_rotation_270 || SourceScan == dml_rotation_90m) {
+ vp_start_rot = ViewportXStart;
+ } else if (SourceScan == dml_rotation_90 || SourceScan == dml_rotation_270m) {
+ vp_start_rot = SwathHeight - (((dml_uint_t)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
+ } else {
+ vp_start_rot = ViewportYStart;
+ }
+ sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
+ if (sw0_tmp < *VInitPreFill) {
+ *MaxNumSwath = (dml_uint_t)(dml_ceil((*VInitPreFill - sw0_tmp) / (dml_float_t) SwathHeight, 1) + 1);
+ } else {
+ *MaxNumSwath = 1;
+ }
+ MaxPartialSwath = (dml_uint_t)(dml_max(1, (dml_uint_t) (vp_start_rot + *VInitPreFill - 1) % SwathHeight));
+ } else {
+ *MaxNumSwath = (dml_uint_t)(dml_ceil((*VInitPreFill - 1.0) / (dml_float_t) SwathHeight, 1) + 1);
+ if (*VInitPreFill > 1) {
+ MaxPartialSwath = (dml_uint_t)(dml_max(1, (dml_uint_t) (*VInitPreFill - 2) % SwathHeight));
+ } else {
+ MaxPartialSwath = (dml_uint_t)(dml_max(1, (dml_uint_t) (*VInitPreFill + SwathHeight - 2) % SwathHeight));
+ }
+ }
+ numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot);
+ dml_print("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill);
+ dml_print("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath);
+ dml_print("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath);
+ dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
+#endif
+ return (dml_uint_t)(numLines);
+
+} // CalculatePrefetchSourceLines
+
+static dml_uint_t CalculateVMAndRowBytes(
+ dml_bool_t ViewportStationary,
+ dml_bool_t DCCEnable,
+ dml_uint_t NumberOfDPPs,
+ dml_uint_t BlockHeight256Bytes,
+ dml_uint_t BlockWidth256Bytes,
+ enum dml_source_format_class SourcePixelFormat,
+ dml_uint_t SurfaceTiling,
+ dml_uint_t BytePerPixel,
+ enum dml_rotation_angle SourceScan,
+ dml_uint_t SwathWidth,
+ dml_uint_t ViewportHeight,
+ dml_uint_t ViewportXStart,
+ dml_uint_t ViewportYStart,
+ dml_bool_t GPUVMEnable,
+ dml_uint_t GPUVMMaxPageTableLevels,
+ dml_uint_t GPUVMMinPageSizeKBytes,
+ dml_uint_t PTEBufferSizeInRequests,
+ dml_uint_t Pitch,
+ dml_uint_t DCCMetaPitch,
+ dml_uint_t MacroTileWidth,
+ dml_uint_t MacroTileHeight,
+
+ // Output
+ dml_uint_t *MetaRowByte,
+ dml_uint_t *PixelPTEBytesPerRow, // for bandwidth calculation
+ dml_uint_t *PixelPTEBytesPerRowStorage, // for PTE buffer size check
+ dml_uint_t *dpte_row_width_ub,
+ dml_uint_t *dpte_row_height,
+ dml_uint_t *dpte_row_height_linear,
+ dml_uint_t *PixelPTEBytesPerRow_one_row_per_frame,
+ dml_uint_t *dpte_row_width_ub_one_row_per_frame,
+ dml_uint_t *dpte_row_height_one_row_per_frame,
+ dml_uint_t *MetaRequestWidth,
+ dml_uint_t *MetaRequestHeight,
+ dml_uint_t *meta_row_width,
+ dml_uint_t *meta_row_height,
+ dml_uint_t *PixelPTEReqWidth,
+ dml_uint_t *PixelPTEReqHeight,
+ dml_uint_t *PTERequestSize,
+ dml_uint_t *DPDE0BytesFrame,
+ dml_uint_t *MetaPTEBytesFrame)
+{
+ dml_uint_t MPDEBytesFrame;
+ dml_uint_t DCCMetaSurfaceBytes;
+ dml_uint_t ExtraDPDEBytesFrame;
+ dml_uint_t PDEAndMetaPTEBytesFrame;
+ dml_uint_t MacroTileSizeBytes;
+ dml_uint_t vp_height_meta_ub;
+ dml_uint_t vp_height_dpte_ub;
+
+ dml_uint_t PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
+
+ *MetaRequestHeight = 8 * BlockHeight256Bytes;
+ *MetaRequestWidth = 8 * BlockWidth256Bytes;
+ if (SurfaceTiling == dml_sw_linear) {
+ *meta_row_height = 32;
+ *meta_row_width = (dml_uint_t)(dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth));
+ } else if (!dml_is_vertical_rotation(SourceScan)) {
+ *meta_row_height = *MetaRequestHeight;
+ if (ViewportStationary && NumberOfDPPs == 1) {
+ *meta_row_width = (dml_uint_t)(dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth));
+ } else {
+ *meta_row_width = (dml_uint_t)(dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth);
+ }
+ *MetaRowByte = (dml_uint_t)(*meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0);
+ } else {
+ *meta_row_height = *MetaRequestWidth;
+ if (ViewportStationary && NumberOfDPPs == 1) {
+ *meta_row_width = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1, *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight));
+ } else {
+ *meta_row_width = (dml_uint_t)(dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight);
+ }
+ *MetaRowByte = (dml_uint_t)(*meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0);
+ }
+
+ if (ViewportStationary && (NumberOfDPPs == 1 || !dml_is_vertical_rotation(SourceScan))) {
+ vp_height_meta_ub = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1, 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes));
+ } else if (!dml_is_vertical_rotation(SourceScan)) {
+ vp_height_meta_ub = (dml_uint_t)(dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes);
+ } else {
+ vp_height_meta_ub = (dml_uint_t)(dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes);
+ }
+
+ DCCMetaSurfaceBytes = (dml_uint_t)(DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0);
+
+ if (GPUVMEnable == true) {
+ *MetaPTEBytesFrame = (dml_uint_t)((dml_ceil((dml_float_t) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64);
+ MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
+ } else {
+ *MetaPTEBytesFrame = 0;
+ MPDEBytesFrame = 0;
+ }
+
+ if (DCCEnable != true) {
+ *MetaPTEBytesFrame = 0;
+ MPDEBytesFrame = 0;
+ *MetaRowByte = 0;
+ }
+
+ MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
+
+ if (ViewportStationary && (NumberOfDPPs == 1 || !dml_is_vertical_rotation(SourceScan))) {
+ vp_height_dpte_ub = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + MacroTileHeight - 1, MacroTileHeight) - dml_floor(ViewportYStart, MacroTileHeight));
+ } else if (!dml_is_vertical_rotation(SourceScan)) {
+ vp_height_dpte_ub = (dml_uint_t)(dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight);
+ } else {
+ vp_height_dpte_ub = (dml_uint_t)(dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight);
+ }
+
+ if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
+ *DPDE0BytesFrame = (dml_uint_t)(64 * (dml_ceil((dml_float_t) (Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) / (dml_float_t) (8 * 2097152), 1) + 1));
+ ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
+ } else {
+ *DPDE0BytesFrame = 0;
+ ExtraDPDEBytesFrame = 0;
+ }
+
+ PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DCCEnable = %u\n", __func__, DCCEnable);
+ dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
+ dml_print("DML::%s: SwModeLinear = %u\n", __func__, SurfaceTiling == dml_sw_linear);
+ dml_print("DML::%s: BytePerPixel = %u\n", __func__, BytePerPixel);
+ dml_print("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, GPUVMMaxPageTableLevels);
+ dml_print("DML::%s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes);
+ dml_print("DML::%s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes);
+ dml_print("DML::%s: MacroTileHeight = %u\n", __func__, MacroTileHeight);
+ dml_print("DML::%s: MacroTileWidth = %u\n", __func__, MacroTileWidth);
+ dml_print("DML::%s: MetaPTEBytesFrame = %u\n", __func__, *MetaPTEBytesFrame);
+ dml_print("DML::%s: MPDEBytesFrame = %u\n", __func__, MPDEBytesFrame);
+ dml_print("DML::%s: DPDE0BytesFrame = %u\n", __func__, *DPDE0BytesFrame);
+ dml_print("DML::%s: ExtraDPDEBytesFrame= %u\n", __func__, ExtraDPDEBytesFrame);
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, PDEAndMetaPTEBytesFrame);
+ dml_print("DML::%s: ViewportHeight = %u\n", __func__, ViewportHeight);
+ dml_print("DML::%s: SwathWidth = %u\n", __func__, SwathWidth);
+ dml_print("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub);
+#endif
+
+ if (SurfaceTiling == dml_sw_linear) {
+ *PixelPTEReqHeight = 1;
+ *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
+ PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
+ *PTERequestSize = 64;
+ } else if (GPUVMMinPageSizeKBytes == 4) {
+ *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
+ *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
+ *PTERequestSize = 128;
+ } else {
+ *PixelPTEReqHeight = MacroTileHeight;
+ *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
+ *PTERequestSize = 64;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes);
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
+ dml_print("DML::%s: PixelPTEReqHeight = %u\n", __func__, *PixelPTEReqHeight);
+ dml_print("DML::%s: PixelPTEReqWidth = %u\n", __func__, *PixelPTEReqWidth);
+ dml_print("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear);
+ dml_print("DML::%s: PTERequestSize = %u\n", __func__, *PTERequestSize);
+ dml_print("DML::%s: Pitch = %u\n", __func__, Pitch);
+#endif
+
+ *dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
+ *dpte_row_width_ub_one_row_per_frame = (dml_uint_t)((dml_ceil(((dml_float_t)Pitch * (dml_float_t) *dpte_row_height_one_row_per_frame / (dml_float_t) *PixelPTEReqHeight - 1) / (dml_float_t) *PixelPTEReqWidth, 1) + 1) * (dml_float_t) *PixelPTEReqWidth);
+ *PixelPTEBytesPerRow_one_row_per_frame = (dml_uint_t)((dml_float_t) *dpte_row_width_ub_one_row_per_frame / (dml_float_t) *PixelPTEReqWidth * *PTERequestSize);
+
+ if (SurfaceTiling == dml_sw_linear) {
+ *dpte_row_height = (dml_uint_t)(dml_min(128, 1 << (dml_uint_t) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)));
+ dml_print("DML::%s: dpte_row_height term 1 = %u\n", __func__, PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
+ dml_print("DML::%s: dpte_row_height term 2 = %f\n", __func__, dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
+ dml_print("DML::%s: dpte_row_height term 3 = %f\n", __func__, dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
+ dml_print("DML::%s: dpte_row_height term 4 = %u\n", __func__, 1 << (dml_uint_t) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
+ dml_print("DML::%s: dpte_row_height = %u\n", __func__, *dpte_row_height);
+
+ *dpte_row_width_ub = (dml_uint_t)(dml_ceil(((dml_float_t) Pitch * (dml_float_t) *dpte_row_height - 1), (dml_float_t) *PixelPTEReqWidth) + *PixelPTEReqWidth);
+ *PixelPTEBytesPerRow = (dml_uint_t)((dml_float_t) *dpte_row_width_ub / (dml_float_t) *PixelPTEReqWidth * *PTERequestSize);
+
+ // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
+ *dpte_row_height_linear = 1 << (dml_uint_t) dml_floor(dml_log2(PTEBufferSizeInRequests * PixelPTEReqWidth_linear / Pitch), 1);
+ if (*dpte_row_height_linear > 128)
+ *dpte_row_height_linear = 128;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *dpte_row_width_ub);
+#endif
+
+ } else if (!dml_is_vertical_rotation(SourceScan)) {
+ *dpte_row_height = *PixelPTEReqHeight;
+
+ if (GPUVMMinPageSizeKBytes > 64) {
+ *dpte_row_width_ub = (dml_uint_t)((dml_ceil(((dml_float_t) Pitch * (dml_float_t) *dpte_row_height / (dml_float_t) *PixelPTEReqHeight - 1) / (dml_float_t) *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth);
+ } else if (ViewportStationary && (NumberOfDPPs == 1)) {
+ *dpte_row_width_ub = (dml_uint_t)(dml_floor(ViewportXStart + SwathWidth + *PixelPTEReqWidth - 1, *PixelPTEReqWidth) - dml_floor(ViewportXStart, *PixelPTEReqWidth));
+ } else {
+ *dpte_row_width_ub = (dml_uint_t)((dml_ceil((dml_float_t) (SwathWidth - 1) / (dml_float_t)*PixelPTEReqWidth, 1) + 1.0) * *PixelPTEReqWidth);
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *dpte_row_width_ub);
+#endif
+
+ ASSERT(*PixelPTEReqWidth);
+ if (*PixelPTEReqWidth != 0)
+ *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
+ } else {
+ *dpte_row_height = (dml_uint_t)(dml_min(*PixelPTEReqWidth, MacroTileWidth));
+
+ if (ViewportStationary && (NumberOfDPPs == 1)) {
+ *dpte_row_width_ub = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1, *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight));
+ } else {
+ *dpte_row_width_ub = (dml_uint_t)((dml_ceil((dml_float_t) (SwathWidth - 1) / (dml_float_t) *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight);
+ }
+
+ *PixelPTEBytesPerRow = (dml_uint_t)((dml_float_t) *dpte_row_width_ub / (dml_float_t) *PixelPTEReqHeight * *PTERequestSize);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *dpte_row_width_ub);
+#endif
+ }
+
+ if (GPUVMEnable != true)
+ *PixelPTEBytesPerRow = 0;
+
+ *PixelPTEBytesPerRowStorage = *PixelPTEBytesPerRow;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes);
+ dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
+ dml_print("DML::%s: dpte_row_height = %u\n", __func__, *dpte_row_height);
+ dml_print("DML::%s: dpte_row_height_linear = %u\n", __func__, *dpte_row_height_linear);
+ dml_print("DML::%s: dpte_row_width_ub = %u\n", __func__, *dpte_row_width_ub);
+ dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *PixelPTEBytesPerRow);
+ dml_print("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *PixelPTEBytesPerRowStorage);
+ dml_print("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests);
+ dml_print("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *dpte_row_height_one_row_per_frame);
+ dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *dpte_row_width_ub_one_row_per_frame);
+ dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *PixelPTEBytesPerRow_one_row_per_frame);
+#endif
+
+ dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
+
+ return PDEAndMetaPTEBytesFrame;
+} // CalculateVMAndRowBytes
+
+static void PixelClockAdjustmentForProgressiveToInterlaceUnit(struct dml_display_cfg_st *display_cfg, dml_bool_t ptoi_supported)
+{
+ dml_uint_t num_active_planes = dml_get_num_active_planes(display_cfg);
+
+ //Progressive To Interlace Unit Effect
+ for (dml_uint_t k = 0; k < num_active_planes; ++k) {
+ display_cfg->output.PixelClockBackEnd[k] = display_cfg->timing.PixelClock[k];
+ if (display_cfg->timing.Interlace[k] == 1 && ptoi_supported == true) {
+ display_cfg->timing.PixelClock[k] = 2 * display_cfg->timing.PixelClock[k];
+ }
+ }
+}
+
+static dml_float_t TruncToValidBPP(
+ dml_float_t LinkBitRate,
+ dml_uint_t Lanes,
+ dml_uint_t HTotal,
+ dml_uint_t HActive,
+ dml_float_t PixelClock,
+ dml_float_t DesiredBPP,
+ dml_bool_t DSCEnable,
+ enum dml_output_encoder_class Output,
+ enum dml_output_format_class Format,
+ dml_uint_t DSCInputBitPerComponent,
+ dml_uint_t DSCSlices,
+ dml_uint_t AudioRate,
+ dml_uint_t AudioLayout,
+ enum dml_odm_mode ODMModeNoDSC,
+ enum dml_odm_mode ODMModeDSC,
+
+ // Output
+ dml_uint_t *RequiredSlots)
+{
+ dml_float_t MaxLinkBPP;
+ dml_uint_t MinDSCBPP;
+ dml_float_t MaxDSCBPP;
+ dml_uint_t NonDSCBPP0;
+ dml_uint_t NonDSCBPP1;
+ dml_uint_t NonDSCBPP2;
+
+ if (Format == dml_420) {
+ NonDSCBPP0 = 12;
+ NonDSCBPP1 = 15;
+ NonDSCBPP2 = 18;
+ MinDSCBPP = 6;
+ MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
+ } else if (Format == dml_444) {
+ NonDSCBPP0 = 24;
+ NonDSCBPP1 = 30;
+ NonDSCBPP2 = 36;
+ MinDSCBPP = 8;
+ MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
+ } else {
+ if (Output == dml_hdmi) {
+ NonDSCBPP0 = 24;
+ NonDSCBPP1 = 24;
+ NonDSCBPP2 = 24;
+ } else {
+ NonDSCBPP0 = 16;
+ NonDSCBPP1 = 20;
+ NonDSCBPP2 = 24;
+ }
+ if (Format == dml_n422) {
+ MinDSCBPP = 7;
+ MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
+ } else {
+ MinDSCBPP = 8;
+ MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
+ }
+ }
+
+ if (Output == dml_dp2p0) {
+ MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128.0 / 132.0 * 383.0 / 384.0 * 65536.0 / 65540.0;
+ } else if (DSCEnable && Output == dml_dp) {
+ MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock * (1 - 2.4 / 100);
+ } else {
+ MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock;
+ }
+
+ if (DSCEnable) {
+ if (ODMModeDSC == dml_odm_mode_combine_4to1) {
+ MaxLinkBPP = dml_min(MaxLinkBPP, 16);
+ } else if (ODMModeDSC == dml_odm_mode_combine_2to1) {
+ MaxLinkBPP = dml_min(MaxLinkBPP, 32);
+ } else if (ODMModeDSC == dml_odm_mode_split_1to2) {
+ MaxLinkBPP = 2 * MaxLinkBPP;
+ }
+ } else {
+ if (ODMModeNoDSC == dml_odm_mode_combine_4to1) {
+ MaxLinkBPP = dml_min(MaxLinkBPP, 16);
+ } else if (ODMModeNoDSC == dml_odm_mode_combine_2to1) {
+ MaxLinkBPP = dml_min(MaxLinkBPP, 32);
+ } else if (ODMModeNoDSC == dml_odm_mode_split_1to2) {
+ MaxLinkBPP = 2 * MaxLinkBPP;
+ }
+ }
+
+ *RequiredSlots = (dml_uint_t)(dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1));
+
+ if (DesiredBPP == 0) {
+ if (DSCEnable) {
+ if (MaxLinkBPP < MinDSCBPP) {
+ return __DML_DPP_INVALID__;
+ } else if (MaxLinkBPP >= MaxDSCBPP) {
+ return MaxDSCBPP;
+ } else {
+ return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
+ }
+ } else {
+ if (MaxLinkBPP >= NonDSCBPP2) {
+ return NonDSCBPP2;
+ } else if (MaxLinkBPP >= NonDSCBPP1) {
+ return NonDSCBPP1;
+ } else if (MaxLinkBPP >= NonDSCBPP0) {
+ return NonDSCBPP0;
+ } else {
+ return __DML_DPP_INVALID__;
+ }
+ }
+ } else {
+ if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0)) ||
+ (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
+ return __DML_DPP_INVALID__;
+ } else {
+ return DesiredBPP;
+ }
+ }
+} // TruncToValidBPP
+
+static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
+ struct display_mode_lib_scratch_st *scratch,
+ struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *p)
+{
+ struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals_st *s = &scratch->CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals;
+
+ s->TotalActiveWriteback = 0;
+ p->Watermark->UrgentWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency;
+ p->Watermark->USRRetrainingWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency + p->mmSOCParameters.USRRetrainingLatency + p->mmSOCParameters.SMNLatency;
+ p->Watermark->DRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->Watermark->UrgentWatermark;
+ p->Watermark->FCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->Watermark->UrgentWatermark;
+ p->Watermark->StutterExitWatermark = p->mmSOCParameters.SRExitTime + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep;
+ p->Watermark->StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitTime + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep;
+ p->Watermark->Z8StutterExitWatermark = p->mmSOCParameters.SRExitZ8Time + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep;
+ p->Watermark->Z8StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitZ8Time + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency);
+ dml_print("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency);
+ dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency);
+ dml_print("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark);
+ dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark);
+ dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark);
+ dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark);
+ dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark);
+ dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark);
+ dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark);
+ dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark);
+#endif
+
+ s->TotalActiveWriteback = 0;
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (p->WritebackEnable[k] == true) {
+ s->TotalActiveWriteback = s->TotalActiveWriteback + 1;
+ }
+ }
+
+ if (s->TotalActiveWriteback <= 1) {
+ p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency;
+ } else {
+ p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK;
+ }
+ if (p->USRRetrainingRequiredFinal)
+ p->Watermark->WritebackUrgentWatermark = p->Watermark->WritebackUrgentWatermark + p->mmSOCParameters.USRRetrainingLatency;
+
+ if (s->TotalActiveWriteback <= 1) {
+ p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency;
+ p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency;
+ } else {
+ p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK;
+ p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024 / 32 / p->SOCCLK;
+ }
+
+ if (p->USRRetrainingRequiredFinal)
+ p->Watermark->WritebackDRAMClockChangeWatermark = p->Watermark->WritebackDRAMClockChangeWatermark + p->mmSOCParameters.USRRetrainingLatency;
+
+ if (p->USRRetrainingRequiredFinal)
+ p->Watermark->WritebackFCLKChangeWatermark = p->Watermark->WritebackFCLKChangeWatermark + p->mmSOCParameters.USRRetrainingLatency;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark);
+ dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark);
+ dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark);
+ dml_print("DML::%s: USRRetrainingRequiredFinal = %u\n", __func__, p->USRRetrainingRequiredFinal);
+ dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency);
+#endif
+
+ s->TotalPixelBW = 0.0;
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ s->TotalPixelBW = s->TotalPixelBW + p->DPPPerSurface[k]
+ * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * p->VRatio[k] + p->SwathWidthC[k] * p->BytePerPixelDETC[k] * p->VRatioChroma[k]) / (p->HTotal[k] / p->PixelClock[k]);
+ }
+
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+
+ s->LBLatencyHidingSourceLinesY[k] = (dml_uint_t)(dml_min((dml_float_t)p->MaxLineBufferLines, dml_floor((dml_float_t)p->LineBufferSize / (dml_float_t)p->LBBitPerPixel[k] / ((dml_float_t)p->SwathWidthY[k] / dml_max(p->HRatio[k], 1.0)), 1)) - (p->VTaps[k] - 1));
+ s->LBLatencyHidingSourceLinesC[k] = (dml_uint_t)(dml_min((dml_float_t)p->MaxLineBufferLines, dml_floor((dml_float_t)p->LineBufferSize / (dml_float_t)p->LBBitPerPixel[k] / ((dml_float_t)p->SwathWidthC[k] / dml_max(p->HRatioChroma[k], 1.0)), 1)) - (p->VTapsChroma[k] - 1));
+
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, MaxLineBufferLines = %u\n", __func__, k, p->MaxLineBufferLines);
+ dml_print("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize);
+ dml_print("DML::%s: k=%u, LBBitPerPixel = %u\n", __func__, k, p->LBBitPerPixel[k]);
+ dml_print("DML::%s: k=%u, HRatio = %f\n", __func__, k, p->HRatio[k]);
+ dml_print("DML::%s: k=%u, VTaps = %u\n", __func__, k, p->VTaps[k]);
+#endif
+
+ s->EffectiveLBLatencyHidingY = s->LBLatencyHidingSourceLinesY[k] / p->VRatio[k] * (p->HTotal[k] / p->PixelClock[k]);
+ s->EffectiveLBLatencyHidingC = s->LBLatencyHidingSourceLinesC[k] / p->VRatioChroma[k] * (p->HTotal[k] / p->PixelClock[k]);
+
+ s->EffectiveDETBufferSizeY = p->DETBufferSizeY[k];
+ if (p->UnboundedRequestEnabled) {
+ s->EffectiveDETBufferSizeY = s->EffectiveDETBufferSizeY + p->CompressedBufferSizeInkByte * 1024 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * p->VRatio[k]) / (p->HTotal[k] / p->PixelClock[k]) / s->TotalPixelBW;
+ }
+
+ s->LinesInDETY[k] = (dml_float_t)s->EffectiveDETBufferSizeY / p->BytePerPixelDETY[k] / p->SwathWidthY[k];
+ s->LinesInDETYRoundedDownToSwath[k] = (dml_uint_t)(dml_floor(s->LinesInDETY[k], p->SwathHeightY[k]));
+ s->FullDETBufferingTimeY = s->LinesInDETYRoundedDownToSwath[k] * (p->HTotal[k] / p->PixelClock[k]) / p->VRatio[k];
+
+ s->ActiveClockChangeLatencyHidingY = s->EffectiveLBLatencyHidingY + s->FullDETBufferingTimeY - ((dml_float_t)p->DSTXAfterScaler[k] / (dml_float_t)p->HTotal[k] + (dml_float_t)p->DSTYAfterScaler[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k];
+
+ if (p->NumberOfActiveSurfaces > 1) {
+ s->ActiveClockChangeLatencyHidingY = s->ActiveClockChangeLatencyHidingY - (1.0 - 1.0 / (dml_float_t)p->NumberOfActiveSurfaces) * (dml_float_t)p->SwathHeightY[k] * (dml_float_t)p->HTotal[k] / p->PixelClock[k] / p->VRatio[k];
+ }
+
+ if (p->BytePerPixelDETC[k] > 0) {
+ s->LinesInDETC[k] = p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k];
+ s->LinesInDETCRoundedDownToSwath[k] = (dml_uint_t)(dml_floor(s->LinesInDETC[k], p->SwathHeightC[k]));
+ s->FullDETBufferingTimeC = s->LinesInDETCRoundedDownToSwath[k] * (p->HTotal[k] / p->PixelClock[k]) / p->VRatioChroma[k];
+ s->ActiveClockChangeLatencyHidingC = s->EffectiveLBLatencyHidingC + s->FullDETBufferingTimeC - ((dml_float_t)p->DSTXAfterScaler[k] / (dml_float_t)p->HTotal[k] + (dml_float_t)p->DSTYAfterScaler[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k];
+ if (p->NumberOfActiveSurfaces > 1) {
+ s->ActiveClockChangeLatencyHidingC = s->ActiveClockChangeLatencyHidingC - (1.0 - 1.0 / (dml_float_t)p->NumberOfActiveSurfaces) * (dml_float_t)p->SwathHeightC[k] * (dml_float_t)p->HTotal[k] / p->PixelClock[k] / p->VRatioChroma[k];
+ }
+ s->ActiveClockChangeLatencyHiding = dml_min(s->ActiveClockChangeLatencyHidingY, s->ActiveClockChangeLatencyHidingC);
+ } else {
+ s->ActiveClockChangeLatencyHiding = s->ActiveClockChangeLatencyHidingY;
+ }
+
+ s->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->UrgentWatermark - p->Watermark->DRAMClockChangeWatermark;
+ s->ActiveFCLKChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->UrgentWatermark - p->Watermark->FCLKChangeWatermark;
+ s->USRRetrainingLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->USRRetrainingWatermark;
+
+ if (p->WritebackEnable[k]) {
+ s->WritebackLatencyHiding = (dml_float_t)p->WritebackInterfaceBufferSize * 1024.0 / ((dml_float_t)p->WritebackDestinationWidth[k] * (dml_float_t)p->WritebackDestinationHeight[k] / ((dml_float_t)p->WritebackSourceHeight[k] * (dml_float_t)p->HTotal[k] / p->PixelClock[k]) * 4.0);
+ if (p->WritebackPixelFormat[k] == dml_444_64) {
+ s->WritebackLatencyHiding = s->WritebackLatencyHiding / 2;
+ }
+ s->WritebackDRAMClockChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackDRAMClockChangeWatermark;
+
+ s->WritebackFCLKChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackFCLKChangeWatermark;
+
+ s->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(s->ActiveDRAMClockChangeLatencyMargin[k], s->WritebackFCLKChangeLatencyMargin);
+ s->ActiveFCLKChangeLatencyMargin[k] = dml_min(s->ActiveFCLKChangeLatencyMargin[k], s->WritebackDRAMClockChangeLatencyMargin);
+ }
+ p->MaxActiveDRAMClockChangeLatencySupported[k] = (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) ? 0 : (s->ActiveDRAMClockChangeLatencyMargin[k] + p->mmSOCParameters.DRAMClockChangeLatency);
+ p->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveDRAMClockChangeLatencyMargin[k];
+ }
+
+ *p->USRRetrainingSupport = true;
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if ((p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) && (s->USRRetrainingLatencyMargin[k] < 0)) {
+ *p->USRRetrainingSupport = false;
+ }
+ }
+
+ s->FoundCriticalSurface = false;
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if ((p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) && ((!s->FoundCriticalSurface)
+ || ((s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency) < *p->MaxActiveFCLKChangeLatencySupported))) {
+ s->FoundCriticalSurface = true;
+ *p->MaxActiveFCLKChangeLatencySupported = s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency;
+ }
+ }
+
+ for (dml_uint_t i = 0; i < p->NumberOfActiveSurfaces; ++i) {
+ for (dml_uint_t j = 0; j < p->NumberOfActiveSurfaces; ++j) {
+ if (i == j ||
+ (p->BlendingAndTiming[i] == i && p->BlendingAndTiming[j] == i) ||
+ (p->BlendingAndTiming[j] == j && p->BlendingAndTiming[i] == j) ||
+ (p->BlendingAndTiming[i] == p->BlendingAndTiming[j] && p->BlendingAndTiming[i] != i) ||
+ (p->SynchronizeTimingsFinal && p->PixelClock[i] == p->PixelClock[j] && p->HTotal[i] == p->HTotal[j] && p->VTotal[i] == p->VTotal[j] && p->VActive[i] == p->VActive[j]) ||
+ (p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && (p->DRRDisplay[i] || p->DRRDisplay[j]))) {
+ s->SynchronizedSurfaces[i][j] = true;
+ } else {
+ s->SynchronizedSurfaces[i][j] = false;
+ }
+ }
+ }
+
+ s->FCLKChangeSupportNumber = 0;
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if ((p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) && (s->ActiveFCLKChangeLatencyMargin[k] < 0)) {
+ if (!(p->PrefetchMode[k] <= 1)) {
+ s->FCLKChangeSupportNumber = 3;
+ } else if (s->FCLKChangeSupportNumber == 0) {
+ s->FCLKChangeSupportNumber = ((p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && p->DRRDisplay[k]) ? 2 : 1);
+ s->LastSurfaceWithoutMargin = k;
+ } else if (((s->FCLKChangeSupportNumber == 1) && (p->DRRDisplay[k] || (!s->SynchronizedSurfaces[s->LastSurfaceWithoutMargin][k]))) || (s->FCLKChangeSupportNumber == 2))
+ s->FCLKChangeSupportNumber = 3;
+ }
+ }
+
+ if (s->FCLKChangeSupportNumber == 0) {
+ *p->FCLKChangeSupport = dml_fclock_change_vactive;
+ } else if ((s->FCLKChangeSupportNumber == 1) || (s->FCLKChangeSupportNumber == 2)) {
+ *p->FCLKChangeSupport = dml_fclock_change_vblank;
+ } else {
+ *p->FCLKChangeSupport = dml_fclock_change_unsupported;
+ }
+
+ s->DRAMClockChangeMethod = 0;
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame)
+ s->DRAMClockChangeMethod = 1;
+ else if (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport)
+ s->DRAMClockChangeMethod = 2;
+ }
+
+ s->DRAMClockChangeSupportNumber = 0;
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (((s->DRAMClockChangeMethod == 0) && (s->ActiveDRAMClockChangeLatencyMargin[k] < 0)) ||
+ ((s->DRAMClockChangeMethod == 1) && (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_full_frame)) ||
+ ((s->DRAMClockChangeMethod == 2) && (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_sub_viewport) && (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe))) {
+ if (p->PrefetchMode[k] != 0) { // Don't need to support DRAM clock change, PrefetchMode 0 means needs DRAM clock change support
+ s->DRAMClockChangeSupportNumber = 3;
+ } else if (s->DRAMClockChangeSupportNumber == 0) {
+ s->DRAMClockChangeSupportNumber = (p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && p->DRRDisplay[k]) ? 2 : 1;
+ s->LastSurfaceWithoutMargin = k;
+ } else if (((s->DRAMClockChangeSupportNumber == 1) && (p->DRRDisplay[k] || !s->SynchronizedSurfaces[s->LastSurfaceWithoutMargin][k])) || (s->DRAMClockChangeSupportNumber == 2)) {
+ s->DRAMClockChangeSupportNumber = 3;
+ }
+ }
+ }
+
+ if (s->DRAMClockChangeMethod == 0) { // No MALL usage
+ if (s->DRAMClockChangeSupportNumber == 0) {
+ *p->DRAMClockChangeSupport = dml_dram_clock_change_vactive;
+ } else if (s->DRAMClockChangeSupportNumber == 1) {
+ *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank;
+ } else if (s->DRAMClockChangeSupportNumber == 2) {
+ *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_drr;
+ } else {
+ *p->DRAMClockChangeSupport = dml_dram_clock_change_unsupported;
+ }
+ } else if (s->DRAMClockChangeMethod == 1) { // Any pipe using MALL full frame
+ if (s->DRAMClockChangeSupportNumber == 0) {
+ *p->DRAMClockChangeSupport = dml_dram_clock_change_vactive_w_mall_full_frame;
+ } else if (s->DRAMClockChangeSupportNumber == 1) {
+ *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_w_mall_full_frame;
+ } else if (s->DRAMClockChangeSupportNumber == 2) {
+ *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_drr_w_mall_full_frame;
+ } else {
+ *p->DRAMClockChangeSupport = dml_dram_clock_change_unsupported;
+ }
+ } else { // Any pipe using MALL subviewport
+ if (s->DRAMClockChangeSupportNumber == 0) {
+ *p->DRAMClockChangeSupport = dml_dram_clock_change_vactive_w_mall_sub_vp;
+ } else if (s->DRAMClockChangeSupportNumber == 1) {
+ *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_w_mall_sub_vp;
+ } else if (s->DRAMClockChangeSupportNumber == 2) {
+ *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_drr_w_mall_sub_vp;
+ } else {
+ *p->DRAMClockChangeSupport = dml_dram_clock_change_unsupported;
+ }
+ }
+
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ s->dst_y_pstate = (dml_uint_t)(dml_ceil((p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.UrgentLatency) / (p->HTotal[k] / p->PixelClock[k]), 1));
+ s->src_y_pstate_l = (dml_uint_t)(dml_ceil(s->dst_y_pstate * p->VRatio[k], p->SwathHeightY[k]));
+ s->src_y_ahead_l = (dml_uint_t)(dml_floor(p->DETBufferSizeY[k] / p->BytePerPixelDETY[k] / p->SwathWidthY[k], p->SwathHeightY[k]) + s->LBLatencyHidingSourceLinesY[k]);
+ s->sub_vp_lines_l = s->src_y_pstate_l + s->src_y_ahead_l + p->meta_row_height[k];
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
+ dml_print("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
+ dml_print("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
+ dml_print("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
+ dml_print("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]);
+ dml_print("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate);
+ dml_print("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l);
+ dml_print("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l);
+ dml_print("DML::%s: k=%u, meta_row_height = %u\n", __func__, k, p->meta_row_height[k]);
+ dml_print("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l);
+#endif
+ p->SubViewportLinesNeededInMALL[k] = s->sub_vp_lines_l;
+
+ if (p->BytePerPixelDETC[k] > 0) {
+ s->src_y_pstate_c = (dml_uint_t)(dml_ceil(s->dst_y_pstate * p->VRatioChroma[k], p->SwathHeightC[k]));
+ s->src_y_ahead_c = (dml_uint_t)(dml_floor(p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k], p->SwathHeightC[k]) + s->LBLatencyHidingSourceLinesC[k]);
+ s->sub_vp_lines_c = s->src_y_pstate_c + s->src_y_ahead_c + p->meta_row_height_chroma[k];
+ p->SubViewportLinesNeededInMALL[k] = (dml_uint_t)(dml_max(s->sub_vp_lines_l, s->sub_vp_lines_c));
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c);
+ dml_print("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c);
+ dml_print("DML::%s: k=%u, meta_row_height_chroma = %u\n", __func__, k, p->meta_row_height_chroma[k]);
+ dml_print("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c);
+#endif
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->DRAMClockChangeSupport);
+ dml_print("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->FCLKChangeSupport);
+ dml_print("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported);
+ dml_print("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport);
+#endif
+} // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
+
+static void CalculateDCFCLKDeepSleep(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_uint_t BytePerPixelY[],
+ dml_uint_t BytePerPixelC[],
+ dml_float_t VRatio[],
+ dml_float_t VRatioChroma[],
+ dml_uint_t SwathWidthY[],
+ dml_uint_t SwathWidthC[],
+ dml_uint_t DPPPerSurface[],
+ dml_float_t HRatio[],
+ dml_float_t HRatioChroma[],
+ dml_float_t PixelClock[],
+ dml_float_t PSCL_THROUGHPUT[],
+ dml_float_t PSCL_THROUGHPUT_CHROMA[],
+ dml_float_t Dppclk[],
+ dml_float_t ReadBandwidthLuma[],
+ dml_float_t ReadBandwidthChroma[],
+ dml_uint_t ReturnBusWidth,
+
+ // Output
+ dml_float_t *DCFClkDeepSleep)
+{
+ dml_float_t DisplayPipeLineDeliveryTimeLuma;
+ dml_float_t DisplayPipeLineDeliveryTimeChroma;
+ dml_float_t DCFClkDeepSleepPerSurface[__DML_NUM_PLANES__];
+ dml_float_t ReadBandwidth = 0.0;
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+ if (VRatio[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
+ }
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChroma = 0;
+ } else {
+ if (VRatioChroma[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
+ }
+ }
+
+ if (BytePerPixelC[k] > 0) {
+ DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
+ __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
+ } else {
+ DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
+ }
+ DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, PixelClock = %f\n", __func__, k, PixelClock[k]);
+ dml_print("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
+#endif
+ }
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
+ }
+
+ *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (dml_float_t) ReturnBusWidth);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
+ dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
+ dml_print("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth);
+ dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
+#endif
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
+ }
+ dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
+} // CalculateDCFCLKDeepSleep
+
+static void CalculateUrgentBurstFactor(
+ enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,
+ dml_uint_t swath_width_luma_ub,
+ dml_uint_t swath_width_chroma_ub,
+ dml_uint_t SwathHeightY,
+ dml_uint_t SwathHeightC,
+ dml_float_t LineTime,
+ dml_float_t UrgentLatency,
+ dml_float_t CursorBufferSize,
+ dml_uint_t CursorWidth,
+ dml_uint_t CursorBPP,
+ dml_float_t VRatio,
+ dml_float_t VRatioC,
+ dml_float_t BytePerPixelInDETY,
+ dml_float_t BytePerPixelInDETC,
+ dml_uint_t DETBufferSizeY,
+ dml_uint_t DETBufferSizeC,
+ // Output
+ dml_float_t *UrgentBurstFactorCursor,
+ dml_float_t *UrgentBurstFactorLuma,
+ dml_float_t *UrgentBurstFactorChroma,
+ dml_bool_t *NotEnoughUrgentLatencyHiding)
+{
+ dml_float_t LinesInDETLuma;
+ dml_float_t LinesInDETChroma;
+ dml_uint_t LinesInCursorBuffer;
+ dml_float_t CursorBufferSizeInTime;
+ dml_float_t DETBufferSizeInTimeLuma;
+ dml_float_t DETBufferSizeInTimeChroma;
+
+ *NotEnoughUrgentLatencyHiding = 0;
+
+ if (CursorWidth > 0) {
+ LinesInCursorBuffer = 1 << (dml_uint_t) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
+ if (VRatio > 0) {
+ CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
+ if (CursorBufferSizeInTime - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorCursor = 0;
+ } else {
+ *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
+ }
+ } else {
+ *UrgentBurstFactorCursor = 1;
+ }
+ }
+
+ LinesInDETLuma = (UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe ? 1024*1024 : DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
+
+ if (VRatio > 0) {
+ DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
+ if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorLuma = 0;
+ } else {
+ *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
+ }
+ } else {
+ *UrgentBurstFactorLuma = 1;
+ }
+
+ if (BytePerPixelInDETC > 0) {
+ LinesInDETChroma = (UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe ? 1024*1024 : DETBufferSizeC) / BytePerPixelInDETC / swath_width_chroma_ub;
+
+ if (VRatioC > 0) {
+ DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatioC;
+ if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorChroma = 0;
+ } else {
+ *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
+ }
+ } else {
+ *UrgentBurstFactorChroma = 1;
+ }
+ }
+} // CalculateUrgentBurstFactor
+
+static void CalculatePixelDeliveryTimes(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_float_t VRatio[],
+ dml_float_t VRatioChroma[],
+ dml_float_t VRatioPrefetchY[],
+ dml_float_t VRatioPrefetchC[],
+ dml_uint_t swath_width_luma_ub[],
+ dml_uint_t swath_width_chroma_ub[],
+ dml_uint_t DPPPerSurface[],
+ dml_float_t HRatio[],
+ dml_float_t HRatioChroma[],
+ dml_float_t PixelClock[],
+ dml_float_t PSCL_THROUGHPUT[],
+ dml_float_t PSCL_THROUGHPUT_CHROMA[],
+ dml_float_t Dppclk[],
+ dml_uint_t BytePerPixelC[],
+ enum dml_rotation_angle SourceScan[],
+ dml_uint_t NumberOfCursors[],
+ dml_uint_t CursorWidth[],
+ dml_uint_t CursorBPP[],
+ dml_uint_t BlockWidth256BytesY[],
+ dml_uint_t BlockHeight256BytesY[],
+ dml_uint_t BlockWidth256BytesC[],
+ dml_uint_t BlockHeight256BytesC[],
+
+ // Output
+ dml_float_t DisplayPipeLineDeliveryTimeLuma[],
+ dml_float_t DisplayPipeLineDeliveryTimeChroma[],
+ dml_float_t DisplayPipeLineDeliveryTimeLumaPrefetch[],
+ dml_float_t DisplayPipeLineDeliveryTimeChromaPrefetch[],
+ dml_float_t DisplayPipeRequestDeliveryTimeLuma[],
+ dml_float_t DisplayPipeRequestDeliveryTimeChroma[],
+ dml_float_t DisplayPipeRequestDeliveryTimeLumaPrefetch[],
+ dml_float_t DisplayPipeRequestDeliveryTimeChromaPrefetch[],
+ dml_float_t CursorRequestDeliveryTime[],
+ dml_float_t CursorRequestDeliveryTimePrefetch[])
+{
+ dml_float_t req_per_swath_ub;
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u : HRatio = %f\n", __func__, k, HRatio[k]);
+ dml_print("DML::%s: k=%u : VRatio = %f\n", __func__, k, VRatio[k]);
+ dml_print("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
+ dml_print("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
+ dml_print("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]);
+ dml_print("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
+ dml_print("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
+ dml_print("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]);
+ dml_print("DML::%s: k=%u : PixelClock = %f\n", __func__, k, PixelClock[k]);
+ dml_print("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]);
+#endif
+
+ if (VRatio[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
+ }
+
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChroma[k] = 0;
+ } else {
+ if (VRatioChroma[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
+ }
+ }
+
+ if (VRatioPrefetchY[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
+ }
+
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
+ } else {
+ if (VRatioPrefetchC[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
+ }
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
+ dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
+ dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
+ dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
+#endif
+ }
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (!dml_is_vertical_rotation(SourceScan[k])) {
+ req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
+ } else {
+ req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
+#endif
+
+ DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
+ DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeRequestDeliveryTimeChroma[k] = 0;
+ DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
+ } else {
+ if (!dml_is_vertical_rotation(SourceScan[k])) {
+ req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
+ } else {
+ req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
+#endif
+ DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
+ DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
+ dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
+ dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
+ dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
+#endif
+ }
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ dml_uint_t cursor_req_per_width;
+ cursor_req_per_width = (dml_uint_t)(dml_ceil((dml_float_t) CursorWidth[k] * (dml_float_t) CursorBPP[k] / 256.0 / 8.0, 1.0));
+ if (NumberOfCursors[k] > 0) {
+ if (VRatio[k] <= 1) {
+ CursorRequestDeliveryTime[k] = (dml_float_t) CursorWidth[k] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
+ } else {
+ CursorRequestDeliveryTime[k] = (dml_float_t) CursorWidth[k] / PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
+ }
+ if (VRatioPrefetchY[k] <= 1) {
+ CursorRequestDeliveryTimePrefetch[k] = (dml_float_t) CursorWidth[k] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
+ } else {
+ CursorRequestDeliveryTimePrefetch[k] = (dml_float_t) CursorWidth[k] / PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
+ }
+ } else {
+ CursorRequestDeliveryTime[k] = 0;
+ CursorRequestDeliveryTimePrefetch[k] = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u : NumberOfCursors = %u\n", __func__, k, NumberOfCursors[k]);
+ dml_print("DML::%s: k=%u : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
+ dml_print("DML::%s: k=%u : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
+#endif
+ }
+} // CalculatePixelDeliveryTimes
+
+static void CalculateMetaAndPTETimes(
+ dml_bool_t use_one_row_for_frame[],
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_bool_t GPUVMEnable,
+ dml_uint_t MetaChunkSize,
+ dml_uint_t MinMetaChunkSizeBytes,
+ dml_uint_t HTotal[],
+ dml_float_t VRatio[],
+ dml_float_t VRatioChroma[],
+ dml_float_t DestinationLinesToRequestRowInVBlank[],
+ dml_float_t DestinationLinesToRequestRowInImmediateFlip[],
+ dml_bool_t DCCEnable[],
+ dml_float_t PixelClock[],
+ dml_uint_t BytePerPixelY[],
+ dml_uint_t BytePerPixelC[],
+ enum dml_rotation_angle SourceScan[],
+ dml_uint_t dpte_row_height[],
+ dml_uint_t dpte_row_height_chroma[],
+ dml_uint_t meta_row_width[],
+ dml_uint_t meta_row_width_chroma[],
+ dml_uint_t meta_row_height[],
+ dml_uint_t meta_row_height_chroma[],
+ dml_uint_t meta_req_width[],
+ dml_uint_t meta_req_width_chroma[],
+ dml_uint_t meta_req_height[],
+ dml_uint_t meta_req_height_chroma[],
+ dml_uint_t dpte_group_bytes[],
+ dml_uint_t PTERequestSizeY[],
+ dml_uint_t PTERequestSizeC[],
+ dml_uint_t PixelPTEReqWidthY[],
+ dml_uint_t PixelPTEReqHeightY[],
+ dml_uint_t PixelPTEReqWidthC[],
+ dml_uint_t PixelPTEReqHeightC[],
+ dml_uint_t dpte_row_width_luma_ub[],
+ dml_uint_t dpte_row_width_chroma_ub[],
+
+ // Output
+ dml_float_t DST_Y_PER_PTE_ROW_NOM_L[],
+ dml_float_t DST_Y_PER_PTE_ROW_NOM_C[],
+ dml_float_t DST_Y_PER_META_ROW_NOM_L[],
+ dml_float_t DST_Y_PER_META_ROW_NOM_C[],
+ dml_float_t TimePerMetaChunkNominal[],
+ dml_float_t TimePerChromaMetaChunkNominal[],
+ dml_float_t TimePerMetaChunkVBlank[],
+ dml_float_t TimePerChromaMetaChunkVBlank[],
+ dml_float_t TimePerMetaChunkFlip[],
+ dml_float_t TimePerChromaMetaChunkFlip[],
+ dml_float_t time_per_pte_group_nom_luma[],
+ dml_float_t time_per_pte_group_vblank_luma[],
+ dml_float_t time_per_pte_group_flip_luma[],
+ dml_float_t time_per_pte_group_nom_chroma[],
+ dml_float_t time_per_pte_group_vblank_chroma[],
+ dml_float_t time_per_pte_group_flip_chroma[])
+{
+ dml_uint_t meta_chunk_width;
+ dml_uint_t min_meta_chunk_width;
+ dml_uint_t meta_chunk_per_row_int;
+ dml_uint_t meta_row_remainder;
+ dml_uint_t meta_chunk_threshold;
+ dml_uint_t meta_chunks_per_row_ub;
+ dml_uint_t meta_chunk_width_chroma;
+ dml_uint_t min_meta_chunk_width_chroma;
+ dml_uint_t meta_chunk_per_row_int_chroma;
+ dml_uint_t meta_row_remainder_chroma;
+ dml_uint_t meta_chunk_threshold_chroma;
+ dml_uint_t meta_chunks_per_row_ub_chroma;
+ dml_uint_t dpte_group_width_luma;
+ dml_uint_t dpte_groups_per_row_luma_ub;
+ dml_uint_t dpte_group_width_chroma;
+ dml_uint_t dpte_groups_per_row_chroma_ub;
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
+ if (BytePerPixelC[k] == 0) {
+ DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
+ } else {
+ DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
+ }
+ DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
+ if (BytePerPixelC[k] == 0) {
+ DST_Y_PER_META_ROW_NOM_C[k] = 0;
+ } else {
+ DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
+ }
+ }
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (DCCEnable[k] == true) {
+ meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
+ min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
+ meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
+ meta_row_remainder = meta_row_width[k] % meta_chunk_width;
+ if (!dml_is_vertical_rotation(SourceScan[k])) {
+ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
+ } else {
+ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
+ }
+ if (meta_row_remainder <= meta_chunk_threshold) {
+ meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
+ } else {
+ meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
+ }
+ TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
+ TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
+ TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
+ if (BytePerPixelC[k] == 0) {
+ TimePerChromaMetaChunkNominal[k] = 0;
+ TimePerChromaMetaChunkVBlank[k] = 0;
+ TimePerChromaMetaChunkFlip[k] = 0;
+ } else {
+ meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
+ min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
+ meta_chunk_per_row_int_chroma = (dml_uint_t)((dml_float_t) meta_row_width_chroma[k] / meta_chunk_width_chroma);
+ meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
+ if (!dml_is_vertical_rotation(SourceScan[k])) {
+ meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
+ } else {
+ meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
+ }
+ if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
+ meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
+ } else {
+ meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
+ }
+ TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
+ TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
+ TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
+ }
+ } else {
+ TimePerMetaChunkNominal[k] = 0;
+ TimePerMetaChunkVBlank[k] = 0;
+ TimePerMetaChunkFlip[k] = 0;
+ TimePerChromaMetaChunkNominal[k] = 0;
+ TimePerChromaMetaChunkVBlank[k] = 0;
+ TimePerChromaMetaChunkFlip[k] = 0;
+ }
+ }
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (GPUVMEnable == true) {
+ if (!dml_is_vertical_rotation(SourceScan[k])) {
+ dpte_group_width_luma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeY[k] * PixelPTEReqWidthY[k]);
+ } else {
+ dpte_group_width_luma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeY[k] * PixelPTEReqHeightY[k]);
+ }
+
+ if (use_one_row_for_frame[k]) {
+ dpte_groups_per_row_luma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_luma_ub[k] / (dml_float_t) dpte_group_width_luma / 2.0, 1.0));
+ } else {
+ dpte_groups_per_row_luma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_luma_ub[k] / (dml_float_t) dpte_group_width_luma, 1.0));
+ }
+
+ dml_print("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, use_one_row_for_frame[k]);
+ dml_print("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, dpte_group_bytes[k]);
+ dml_print("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, PTERequestSizeY[k]);
+ dml_print("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, PixelPTEReqWidthY[k]);
+ dml_print("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, PixelPTEReqHeightY[k]);
+ dml_print("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, dpte_row_width_luma_ub[k]);
+ dml_print("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma);
+ dml_print("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub);
+
+ time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
+ time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
+ time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
+ if (BytePerPixelC[k] == 0) {
+ time_per_pte_group_nom_chroma[k] = 0;
+ time_per_pte_group_vblank_chroma[k] = 0;
+ time_per_pte_group_flip_chroma[k] = 0;
+ } else {
+ if (!dml_is_vertical_rotation(SourceScan[k])) {
+ dpte_group_width_chroma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeC[k] * PixelPTEReqWidthC[k]);
+ } else {
+ dpte_group_width_chroma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeC[k] * PixelPTEReqHeightC[k]);
+ }
+
+ if (use_one_row_for_frame[k]) {
+ dpte_groups_per_row_chroma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_chroma_ub[k] / (dml_float_t) dpte_group_width_chroma / 2.0, 1.0));
+ } else {
+ dpte_groups_per_row_chroma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_chroma_ub[k] / (dml_float_t) dpte_group_width_chroma, 1.0));
+ }
+ dml_print("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, dpte_row_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma);
+ dml_print("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub);
+
+ time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
+ time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
+ time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
+ }
+ } else {
+ time_per_pte_group_nom_luma[k] = 0;
+ time_per_pte_group_vblank_luma[k] = 0;
+ time_per_pte_group_flip_luma[k] = 0;
+ time_per_pte_group_nom_chroma[k] = 0;
+ time_per_pte_group_vblank_chroma[k] = 0;
+ time_per_pte_group_flip_chroma[k] = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, DestinationLinesToRequestRowInVBlank = %f\n", __func__, k, DestinationLinesToRequestRowInVBlank[k]);
+ dml_print("DML::%s: k=%u, DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
+
+ dml_print("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
+ dml_print("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
+ dml_print("DML::%s: k=%u, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
+ dml_print("DML::%s: k=%u, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
+ dml_print("DML::%s: k=%u, TimePerMetaChunkNominal = %f\n", __func__, k, TimePerMetaChunkNominal[k]);
+ dml_print("DML::%s: k=%u, TimePerMetaChunkVBlank = %f\n", __func__, k, TimePerMetaChunkVBlank[k]);
+ dml_print("DML::%s: k=%u, TimePerMetaChunkFlip = %f\n", __func__, k, TimePerMetaChunkFlip[k]);
+ dml_print("DML::%s: k=%u, TimePerChromaMetaChunkNominal = %f\n", __func__, k, TimePerChromaMetaChunkNominal[k]);
+ dml_print("DML::%s: k=%u, TimePerChromaMetaChunkVBlank = %f\n", __func__, k, TimePerChromaMetaChunkVBlank[k]);
+ dml_print("DML::%s: k=%u, TimePerChromaMetaChunkFlip = %f\n", __func__, k, TimePerChromaMetaChunkFlip[k]);
+ dml_print("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, time_per_pte_group_nom_luma[k]);
+ dml_print("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, time_per_pte_group_vblank_luma[k]);
+ dml_print("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, time_per_pte_group_flip_luma[k]);
+ dml_print("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, time_per_pte_group_nom_chroma[k]);
+ dml_print("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, time_per_pte_group_vblank_chroma[k]);
+ dml_print("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, time_per_pte_group_flip_chroma[k]);
+#endif
+ }
+} // CalculateMetaAndPTETimes
+
+static void CalculateVMGroupAndRequestTimes(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_bool_t GPUVMEnable,
+ dml_uint_t GPUVMMaxPageTableLevels,
+ dml_uint_t HTotal[],
+ dml_uint_t BytePerPixelC[],
+ dml_float_t DestinationLinesToRequestVMInVBlank[],
+ dml_float_t DestinationLinesToRequestVMInImmediateFlip[],
+ dml_bool_t DCCEnable[],
+ dml_float_t PixelClock[],
+ dml_uint_t dpte_row_width_luma_ub[],
+ dml_uint_t dpte_row_width_chroma_ub[],
+ dml_uint_t vm_group_bytes[],
+ dml_uint_t dpde0_bytes_per_frame_ub_l[],
+ dml_uint_t dpde0_bytes_per_frame_ub_c[],
+ dml_uint_t meta_pte_bytes_per_frame_ub_l[],
+ dml_uint_t meta_pte_bytes_per_frame_ub_c[],
+
+ // Output
+ dml_float_t TimePerVMGroupVBlank[],
+ dml_float_t TimePerVMGroupFlip[],
+ dml_float_t TimePerVMRequestVBlank[],
+ dml_float_t TimePerVMRequestFlip[])
+{
+ dml_uint_t num_group_per_lower_vm_stage;
+ dml_uint_t num_req_per_lower_vm_stage;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
+ dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
+#endif
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, DCCEnable = %u\n", __func__, k, DCCEnable[k]);
+ dml_print("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]);
+ dml_print("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]);
+ dml_print("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]);
+ dml_print("DML::%s: k=%u, meta_pte_bytes_per_frame_ub_l = %u\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
+ dml_print("DML::%s: k=%u, meta_pte_bytes_per_frame_ub_c = %u\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
+#endif
+
+ if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
+ if (DCCEnable[k] == false) {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage = (dml_uint_t) (dml_ceil((dml_float_t) dpde0_bytes_per_frame_ub_l[k] / (dml_float_t) vm_group_bytes[k], 1.0) +
+ dml_ceil((dml_float_t) dpde0_bytes_per_frame_ub_c[k] / (dml_float_t) vm_group_bytes[k], 1.0));
+ } else {
+ num_group_per_lower_vm_stage = (dml_uint_t) (dml_ceil((dml_float_t) dpde0_bytes_per_frame_ub_l[k] / (dml_float_t) vm_group_bytes[k], 1.0));
+ }
+ } else {
+ if (GPUVMMaxPageTableLevels == 1) {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage = (dml_uint_t)(dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1.0) +
+ dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_c[k]) / (dml_float_t) (vm_group_bytes[k]), 1.0));
+ } else {
+ num_group_per_lower_vm_stage = (dml_uint_t)(dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1.0));
+ }
+ } else {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage = (dml_uint_t)(2.0 + dml_ceil((dml_float_t) (dpde0_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1) +
+ dml_ceil((dml_float_t) (dpde0_bytes_per_frame_ub_c[k]) / (dml_float_t) (vm_group_bytes[k]), 1) +
+ dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1) +
+ dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_c[k]) / (dml_float_t) (vm_group_bytes[k]), 1));
+ } else {
+ num_group_per_lower_vm_stage = (dml_uint_t)(1.0 + dml_ceil((dml_float_t) (dpde0_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1) +
+ dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1));
+ }
+ }
+ }
+
+ if (DCCEnable[k] == false) {
+ if (BytePerPixelC[k] > 0) {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
+ } else {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
+ }
+ } else {
+ if (GPUVMMaxPageTableLevels == 1) {
+ if (BytePerPixelC[k] > 0) {
+ num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
+ } else {
+ num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
+ }
+ } else {
+ if (BytePerPixelC[k] > 0) {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
+ } else {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
+ }
+ }
+ }
+
+ TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
+ TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
+ TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
+ TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
+
+ if (GPUVMMaxPageTableLevels > 2) {
+ TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
+ TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
+ TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
+ TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
+ }
+
+ } else {
+ TimePerVMGroupVBlank[k] = 0;
+ TimePerVMGroupFlip[k] = 0;
+ TimePerVMRequestVBlank[k] = 0;
+ TimePerVMRequestFlip[k] = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
+ dml_print("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
+ dml_print("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
+ dml_print("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
+#endif
+ }
+} // CalculateVMGroupAndRequestTimes
+
+static void CalculateStutterEfficiency(struct display_mode_lib_scratch_st *scratch,
+ struct CalculateStutterEfficiency_params_st *p)
+{
+ dml_float_t DETBufferingTimeY = 0;
+ dml_float_t SwathWidthYCriticalSurface = 0;
+ dml_float_t SwathHeightYCriticalSurface = 0;
+ dml_float_t VActiveTimeCriticalSurface = 0;
+ dml_float_t FrameTimeCriticalSurface = 0;
+ dml_uint_t BytePerPixelYCriticalSurface = 0;
+ dml_float_t LinesToFinishSwathTransferStutterCriticalSurface = 0;
+ dml_uint_t DETBufferSizeYCriticalSurface = 0;
+ dml_float_t MinTTUVBlankCriticalSurface = 0;
+ dml_uint_t BlockWidth256BytesYCriticalSurface = 0;
+ dml_bool_t SinglePlaneCriticalSurface = 0;
+ dml_bool_t SinglePipeCriticalSurface = 0;
+ dml_float_t TotalCompressedReadBandwidth = 0;
+ dml_float_t TotalRowReadBandwidth = 0;
+ dml_float_t AverageDCCCompressionRate = 0;
+ dml_float_t EffectiveCompressedBufferSize = 0;
+ dml_float_t PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = 0;
+ dml_float_t StutterBurstTime = 0;
+ dml_uint_t TotalActiveWriteback = 0;
+ dml_float_t LinesInDETY = 0;
+ dml_float_t LinesInDETYRoundedDownToSwath = 0;
+ dml_float_t MaximumEffectiveCompressionLuma = 0;
+ dml_float_t MaximumEffectiveCompressionChroma = 0;
+ dml_float_t TotalZeroSizeRequestReadBandwidth = 0;
+ dml_float_t TotalZeroSizeCompressedReadBandwidth = 0;
+ dml_float_t AverageDCCZeroSizeFraction = 0;
+ dml_float_t AverageZeroSizeCompressionRate = 0;
+
+ dml_bool_t FoundCriticalSurface = false;
+
+ dml_uint_t TotalNumberOfActiveOTG = 0;
+ dml_float_t SinglePixelClock = 0;
+ dml_uint_t SingleHTotal = 0;
+ dml_uint_t SingleVTotal = 0;
+ dml_bool_t SameTiming = true;
+
+ dml_float_t LastStutterPeriod = 0.0;
+ dml_float_t LastZ8StutterPeriod = 0.0;
+
+ dml_uint_t SwathSizeCriticalSurface;
+ dml_uint_t LastChunkOfSwathSize;
+ dml_uint_t MissingPartOfLastSwathOfDETSize;
+
+ TotalZeroSizeRequestReadBandwidth = 0;
+ TotalZeroSizeCompressedReadBandwidth = 0;
+ TotalRowReadBandwidth = 0;
+ TotalCompressedReadBandwidth = 0;
+
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) {
+ if (p->DCCEnable[k] == true) {
+ if ((dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockWidth256BytesY[k] > p->SwathHeightY[k]) || (!dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockHeight256BytesY[k] > p->SwathHeightY[k]) || p->DCCYMaxUncompressedBlock[k] < 256) {
+ MaximumEffectiveCompressionLuma = 2;
+ } else {
+ MaximumEffectiveCompressionLuma = 4;
+ }
+ TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] / dml_min(p->NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
+ dml_print("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->NetDCCRateLuma[k]);
+ dml_print("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, MaximumEffectiveCompressionLuma);
+#endif
+ TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->DCCFractionOfZeroSizeRequestsLuma[k];
+ TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
+
+ if (p->ReadBandwidthSurfaceChroma[k] > 0) {
+ if ((dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockWidth256BytesC[k] > p->SwathHeightC[k]) || (!dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockHeight256BytesC[k] > p->SwathHeightC[k]) || p->DCCCMaxUncompressedBlock[k] < 256) {
+ MaximumEffectiveCompressionChroma = 2;
+ } else {
+ MaximumEffectiveCompressionChroma = 4;
+ }
+ TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] / dml_min(p->NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]);
+ dml_print("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->NetDCCRateChroma[k]);
+ dml_print("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, MaximumEffectiveCompressionChroma);
+#endif
+ TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->DCCFractionOfZeroSizeRequestsChroma[k];
+ TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
+ }
+ } else {
+ TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] + p->ReadBandwidthSurfaceChroma[k];
+ }
+ TotalRowReadBandwidth = TotalRowReadBandwidth + p->DPPPerSurface[k] * (p->meta_row_bw[k] + p->dpte_row_bw[k]);
+ }
+ }
+
+ AverageDCCCompressionRate = p->TotalDataReadBandwidth / TotalCompressedReadBandwidth;
+ AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / p->TotalDataReadBandwidth;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled);
+ dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
+ dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
+ dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
+ dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
+ dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
+ dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
+ dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
+ dml_print("DML::%s: CompbufReservedSpace64B = %u\n", __func__, p->CompbufReservedSpace64B);
+ dml_print("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs);
+ dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, p->CompressedBufferSizeInkByte);
+#endif
+ if (AverageDCCZeroSizeFraction == 1) {
+ AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
+ EffectiveCompressedBufferSize = (dml_float_t)p->MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + ((dml_float_t)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 * AverageZeroSizeCompressionRate;
+ } else if (AverageDCCZeroSizeFraction > 0) {
+ AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
+ EffectiveCompressedBufferSize = dml_min((dml_float_t)p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
+ (dml_float_t)p->MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)) +
+ dml_min(((dml_float_t)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * AverageDCCCompressionRate,
+ ((dml_float_t)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
+ dml_print("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
+ dml_print("DML::%s: min 3 = %f\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
+ dml_print("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
+#endif
+ } else {
+ EffectiveCompressedBufferSize = dml_min((dml_float_t)p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
+ (dml_float_t)p->MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) +
+ ((dml_float_t)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * AverageDCCCompressionRate;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
+ dml_print("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
+#endif
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries);
+ dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
+ dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
+#endif
+
+ *p->StutterPeriod = 0;
+
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) {
+ LinesInDETY = ((dml_float_t)p->DETBufferSizeY[k] + (p->UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * p->ReadBandwidthSurfaceLuma[k] / p->TotalDataReadBandwidth) / p->BytePerPixelDETY[k] / p->SwathWidthY[k];
+ LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, p->SwathHeightY[k]);
+ DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((dml_float_t)p->HTotal[k] / p->PixelClock[k]) / p->VRatio[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
+ dml_print("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
+ dml_print("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
+ dml_print("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
+ dml_print("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth);
+ dml_print("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, LinesInDETY);
+ dml_print("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
+ dml_print("DML::%s: k=%u, HTotal = %u\n", __func__, k, p->HTotal[k]);
+ dml_print("DML::%s: k=%u, PixelClock = %f\n", __func__, k, p->PixelClock[k]);
+ dml_print("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->VRatio[k]);
+ dml_print("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
+ dml_print("DML::%s: k=%u,PixelClock = %f\n", __func__, k, p->PixelClock[k]);
+#endif
+
+ if (!FoundCriticalSurface || DETBufferingTimeY < *p->StutterPeriod) {
+ dml_bool_t isInterlaceTiming = p->Interlace[k] && !p->ProgressiveToInterlaceUnitInOPP;
+
+ FoundCriticalSurface = true;
+ *p->StutterPeriod = DETBufferingTimeY;
+ FrameTimeCriticalSurface = (isInterlaceTiming ? dml_floor((dml_float_t)p->VTotal[k]/2.0, 1.0) : p->VTotal[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k];
+ VActiveTimeCriticalSurface = (isInterlaceTiming ? dml_floor((dml_float_t)p->VActive[k]/2.0, 1.0) : p->VActive[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k];
+ BytePerPixelYCriticalSurface = p->BytePerPixelY[k];
+ SwathWidthYCriticalSurface = p->SwathWidthY[k];
+ SwathHeightYCriticalSurface = p->SwathHeightY[k];
+ BlockWidth256BytesYCriticalSurface = p->BlockWidth256BytesY[k];
+ LinesToFinishSwathTransferStutterCriticalSurface = p->SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
+ DETBufferSizeYCriticalSurface = p->DETBufferSizeY[k];
+ MinTTUVBlankCriticalSurface = p->MinTTUVBlank[k];
+ SinglePlaneCriticalSurface = (p->ReadBandwidthSurfaceChroma[k] == 0);
+ SinglePipeCriticalSurface = (p->DPPPerSurface[k] == 1);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface);
+ dml_print("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod);
+ dml_print("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, MinTTUVBlankCriticalSurface);
+ dml_print("DML::%s: k=%u, FrameTimeCriticalSurface = %f\n", __func__, k, FrameTimeCriticalSurface);
+ dml_print("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, VActiveTimeCriticalSurface);
+ dml_print("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, BytePerPixelYCriticalSurface);
+ dml_print("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, SwathWidthYCriticalSurface);
+ dml_print("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, SwathHeightYCriticalSurface);
+ dml_print("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, BlockWidth256BytesYCriticalSurface);
+ dml_print("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, SinglePlaneCriticalSurface);
+ dml_print("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, SinglePipeCriticalSurface);
+ dml_print("DML::%s: k=%u, LinesToFinishSwathTransferStutterCriticalSurface = %f\n", __func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
+#endif
+ }
+ }
+ }
+
+ PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*p->StutterPeriod * p->TotalDataReadBandwidth, EffectiveCompressedBufferSize);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, p->ROBBufferSizeInKByte);
+ dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
+ dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth);
+ dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, p->ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
+ dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
+ dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
+ dml_print("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW);
+ dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth);
+ dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
+ dml_print("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK);
+#endif
+
+ StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / p->ReturnBW + (*p->StutterPeriod * p->TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64) + *p->StutterPeriod * TotalRowReadBandwidth / p->ReturnBW;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / p->ReturnBW);
+ dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth));
+ dml_print("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64));
+ dml_print("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * TotalRowReadBandwidth / p->ReturnBW);
+ dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
+#endif
+ StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / p->ReturnBW);
+
+ dml_print("DML::%s: Time to finish residue swath=%f\n", __func__, LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / p->ReturnBW);
+
+ TotalActiveWriteback = 0;
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (p->WritebackEnable[k]) {
+ TotalActiveWriteback = TotalActiveWriteback + 1;
+ }
+ }
+
+ if (TotalActiveWriteback == 0) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime);
+ dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time);
+ dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
+ dml_print("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
+#endif
+ *p->StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (p->SRExitTime + StutterBurstTime) / *p->StutterPeriod) * 100;
+ *p->Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (p->SRExitZ8Time + StutterBurstTime) / *p->StutterPeriod) * 100;
+ *p->NumberOfStutterBurstsPerFrame = (*p->StutterEfficiencyNotIncludingVBlank > 0 ? (dml_uint_t)(dml_ceil(VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0);
+ *p->Z8NumberOfStutterBurstsPerFrame = (*p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? (dml_uint_t)(dml_ceil(VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0);
+ } else {
+ *p->StutterEfficiencyNotIncludingVBlank = 0.;
+ *p->Z8StutterEfficiencyNotIncludingVBlank = 0.;
+ *p->NumberOfStutterBurstsPerFrame = 0;
+ *p->Z8NumberOfStutterBurstsPerFrame = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
+ dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
+ dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank);
+ dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame);
+ dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
+#endif
+
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) {
+ if (p->BlendingAndTiming[k] == k) {
+ if (TotalNumberOfActiveOTG == 0) {
+ SinglePixelClock = p->PixelClock[k];
+ SingleHTotal = p->HTotal[k];
+ SingleVTotal = p->VTotal[k];
+ } else if (SinglePixelClock != p->PixelClock[k] || SingleHTotal != p->HTotal[k] || SingleVTotal != p->VTotal[k]) {
+ SameTiming = false;
+ }
+ TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
+ }
+ }
+ }
+
+ if (*p->StutterEfficiencyNotIncludingVBlank > 0) {
+ LastStutterPeriod = VActiveTimeCriticalSurface - (*p->NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod;
+
+ if ((p->SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming &&
+ LastStutterPeriod + MinTTUVBlankCriticalSurface > p->StutterEnterPlusExitWatermark) {
+ *p->StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitTime + StutterBurstTime * VActiveTimeCriticalSurface / *p->StutterPeriod) / FrameTimeCriticalSurface) * 100;
+ } else {
+ *p->StutterEfficiency = *p->StutterEfficiencyNotIncludingVBlank;
+ }
+ } else {
+ *p->StutterEfficiency = 0;
+ }
+
+ if (*p->Z8StutterEfficiencyNotIncludingVBlank > 0) {
+ LastZ8StutterPeriod = VActiveTimeCriticalSurface - (*p->NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod;
+ if ((p->SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod + MinTTUVBlankCriticalSurface > p->Z8StutterEnterPlusExitWatermark) {
+ *p->Z8StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalSurface / *p->StutterPeriod) / FrameTimeCriticalSurface) * 100;
+ } else {
+ *p->Z8StutterEfficiency = *p->Z8StutterEfficiencyNotIncludingVBlank;
+ }
+ } else {
+ *p->Z8StutterEfficiency = 0.;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
+ dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark);
+ dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
+ dml_print("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
+ dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency);
+ dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency);
+ dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
+ dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
+#endif
+
+ SwathSizeCriticalSurface = (dml_uint_t)(BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface));
+ LastChunkOfSwathSize = SwathSizeCriticalSurface % (p->PixelChunkSizeInKByte * 1024);
+ MissingPartOfLastSwathOfDETSize = (dml_uint_t)(dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface) - DETBufferSizeYCriticalSurface);
+
+ *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && SinglePlaneCriticalSurface && SinglePipeCriticalSurface && (LastChunkOfSwathSize > 0) &&
+ (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0) && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: SwathSizeCriticalSurface = %u\n", __func__, SwathSizeCriticalSurface);
+ dml_print("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, DETBufferSizeYCriticalSurface);
+ dml_print("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte);
+ dml_print("DML::%s: LastChunkOfSwathSize = %u\n", __func__, LastChunkOfSwathSize);
+ dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %u\n", __func__, MissingPartOfLastSwathOfDETSize);
+ dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
+#endif
+} // CalculateStutterEfficiency
+
+/// \CalculateSwathAndDETConfiguration
+/// @brief Calculates swath width and different return buffers sizing (DET, CDB, etc.)
+static void CalculateSwathAndDETConfiguration(struct display_mode_lib_scratch_st *scratch,
+ struct CalculateSwathAndDETConfiguration_params_st *p)
+{
+ dml_uint_t MaximumSwathHeightY[__DML_NUM_PLANES__];
+ dml_uint_t MaximumSwathHeightC[__DML_NUM_PLANES__];
+ dml_uint_t RoundedUpMaxSwathSizeBytesY[__DML_NUM_PLANES__];
+ dml_uint_t RoundedUpMaxSwathSizeBytesC[__DML_NUM_PLANES__];
+ dml_uint_t RoundedUpSwathSizeBytesY[__DML_NUM_PLANES__];
+ dml_uint_t RoundedUpSwathSizeBytesC[__DML_NUM_PLANES__];
+ dml_uint_t SwathWidthSingleDPP[__DML_NUM_PLANES__];
+ dml_uint_t SwathWidthSingleDPPChroma[__DML_NUM_PLANES__];
+
+ dml_uint_t TotalActiveDPP = 0;
+ dml_bool_t NoChromaOrLinearSurfaces = true;
+ dml_uint_t SurfaceDoingUnboundedRequest = 0;
+
+ dml_uint_t DETBufferSizeInKByteForSwathCalculation;
+
+ const long TTUFIFODEPTH = 8;
+ const long MAXIMUMCOMPRESSION = 4;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP);
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ dml_print("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]);
+ }
+#endif
+ CalculateSwathWidth(p->ForceSingleDPP,
+ p->NumberOfActiveSurfaces,
+ p->SourcePixelFormat,
+ p->SourceScan,
+ p->ViewportStationary,
+ p->ViewportWidth,
+ p->ViewportHeight,
+ p->ViewportXStart,
+ p->ViewportYStart,
+ p->ViewportXStartC,
+ p->ViewportYStartC,
+ p->SurfaceWidthY,
+ p->SurfaceWidthC,
+ p->SurfaceHeightY,
+ p->SurfaceHeightC,
+ p->ODMMode,
+ p->BytePerPixY,
+ p->BytePerPixC,
+ p->Read256BytesBlockHeightY,
+ p->Read256BytesBlockHeightC,
+ p->Read256BytesBlockWidthY,
+ p->Read256BytesBlockWidthC,
+ p->BlendingAndTiming,
+ p->HActive,
+ p->HRatio,
+ p->DPPPerSurface,
+
+ // Output
+ SwathWidthSingleDPP,
+ SwathWidthSingleDPPChroma,
+ p->SwathWidth,
+ p->SwathWidthChroma,
+ MaximumSwathHeightY,
+ MaximumSwathHeightC,
+ p->swath_width_luma_ub,
+ p->swath_width_chroma_ub);
+
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ RoundedUpMaxSwathSizeBytesY[k] = (dml_uint_t)(p->swath_width_luma_ub[k] * p->BytePerPixDETY[k] * MaximumSwathHeightY[k]);
+ RoundedUpMaxSwathSizeBytesC[k] = (dml_uint_t)(p->swath_width_chroma_ub[k] * p->BytePerPixDETC[k] * MaximumSwathHeightC[k]);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]);
+ dml_print("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]);
+ dml_print("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]);
+ dml_print("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, MaximumSwathHeightY[k]);
+ dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesY = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesY[k]);
+ dml_print("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]);
+ dml_print("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, MaximumSwathHeightC[k]);
+ dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesC = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesC[k]);
+#endif
+ if (p->SourcePixelFormat[k] == dml_420_10) {
+ RoundedUpMaxSwathSizeBytesY[k] = (dml_uint_t)(dml_ceil((dml_float_t) RoundedUpMaxSwathSizeBytesY[k], 256));
+ RoundedUpMaxSwathSizeBytesC[k] = (dml_uint_t)(dml_ceil((dml_float_t) RoundedUpMaxSwathSizeBytesC[k], 256));
+ }
+ }
+
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ TotalActiveDPP = TotalActiveDPP + (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]);
+ if (p->DPPPerSurface[k] > 0)
+ SurfaceDoingUnboundedRequest = k;
+ if (p->SourcePixelFormat[k] == dml_420_8 || p->SourcePixelFormat[k] == dml_420_10 ||
+ p->SourcePixelFormat[k] == dml_420_12 || p->SourcePixelFormat[k] == dml_rgbe_alpha
+ || p->SurfaceTiling[k] == dml_sw_linear) {
+ NoChromaOrLinearSurfaces = false;
+ }
+ }
+
+ *p->UnboundedRequestEnabled = UnboundedRequest(p->UseUnboundedRequestingFinal, TotalActiveDPP,
+ NoChromaOrLinearSurfaces, p->Output[0]);
+
+ CalculateDETBufferSize(p->DETSizeOverride,
+ p->UseMALLForPStateChange,
+ p->ForceSingleDPP,
+ p->NumberOfActiveSurfaces,
+ *p->UnboundedRequestEnabled,
+ p->nomDETInKByte,
+ p->MaxTotalDETInKByte,
+ p->ConfigReturnBufferSizeInKByte,
+ p->MinCompressedBufferSizeInKByte,
+ p->ConfigReturnBufferSegmentSizeInkByte,
+ p->CompressedBufferSegmentSizeInkByteFinal,
+ p->SourcePixelFormat,
+ p->ReadBandwidthLuma,
+ p->ReadBandwidthChroma,
+ RoundedUpMaxSwathSizeBytesY,
+ RoundedUpMaxSwathSizeBytesC,
+ p->DPPPerSurface,
+
+ // Output
+ p->DETBufferSizeInKByte, // per hubp pipe
+ p->CompressedBufferSizeInkByte);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP);
+ dml_print("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte);
+ dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte);
+ dml_print("DML::%s: UseUnboundedRequestingFinal = %u\n", __func__, p->UseUnboundedRequestingFinal);
+ dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled);
+ dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte);
+#endif
+
+ *p->ViewportSizeSupport = true;
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+
+ DETBufferSizeInKByteForSwathCalculation = (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe ? 1024 : p->DETBufferSizeInKByte[k]);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation);
+#endif
+
+ if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ p->SwathHeightY[k] = MaximumSwathHeightY[k];
+ p->SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k];
+ RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k];
+ } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] && RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
+ p->SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k] / 2;
+ RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k];
+ } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] && RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ p->SwathHeightY[k] = MaximumSwathHeightY[k];
+ p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
+ RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k];
+ RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k] / 2;
+ } else {
+ p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
+ p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
+ RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k] / 2;
+ RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k] / 2;
+ }
+
+ if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 > DETBufferSizeInKByteForSwathCalculation * 1024 / 2) ||
+ p->SwathWidth[k] > p->MaximumSwathWidthLuma[k] || (p->SwathHeightC[k] > 0 && p->SwathWidthChroma[k] > p->MaximumSwathWidthChroma[k])) {
+ *p->ViewportSizeSupport = false;
+ p->ViewportSizeSupportPerSurface[k] = false;
+ } else {
+ p->ViewportSizeSupportPerSurface[k] = true;
+ }
+
+ if (p->SwathHeightC[k] == 0) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u All DET for plane0\n", __func__, k);
+#endif
+ p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024;
+ p->DETBufferSizeC[k] = 0;
+ } else if (RoundedUpSwathSizeBytesY[k] <= 1.5 * RoundedUpSwathSizeBytesC[k]) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u Half DET for plane0, half for plane1\n", __func__, k);
+#endif
+ p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
+ p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
+ } else {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
+#endif
+ p->DETBufferSizeY[k] = (dml_uint_t)(dml_floor(p->DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024));
+ p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 - p->DETBufferSizeY[k];
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
+ dml_print("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]);
+ dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesY = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesY[k]);
+ dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesC = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesC[k]);
+ dml_print("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, RoundedUpSwathSizeBytesY[k]);
+ dml_print("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]);
+ dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]);
+ dml_print("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
+ dml_print("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]);
+ dml_print("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]);
+#endif
+
+ }
+
+ *p->compbuf_reserved_space_64b = 2 * p->PixelChunkSizeInKByte * 1024 / 64;
+ if (*p->UnboundedRequestEnabled) {
+ *p->compbuf_reserved_space_64b = dml_max(*p->compbuf_reserved_space_64b,
+ (dml_float_t)(p->ROBBufferSizeInKByte * 1024/64)
+ - (dml_float_t)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / MAXIMUMCOMPRESSION/64));
+ }
+ *p->compbuf_reserved_space_zs = 2 * p->PixelChunkSizeInKByte * 1024 / 256;
+} // CalculateSwathAndDETConfiguration
+
+static void CalculateSwathWidth(
+ dml_bool_t ForceSingleDPP,
+ dml_uint_t NumberOfActiveSurfaces,
+ enum dml_source_format_class SourcePixelFormat[],
+ enum dml_rotation_angle SourceScan[],
+ dml_bool_t ViewportStationary[],
+ dml_uint_t ViewportWidth[],
+ dml_uint_t ViewportHeight[],
+ dml_uint_t ViewportXStart[],
+ dml_uint_t ViewportYStart[],
+ dml_uint_t ViewportXStartC[],
+ dml_uint_t ViewportYStartC[],
+ dml_uint_t SurfaceWidthY[],
+ dml_uint_t SurfaceWidthC[],
+ dml_uint_t SurfaceHeightY[],
+ dml_uint_t SurfaceHeightC[],
+ enum dml_odm_mode ODMMode[],
+ dml_uint_t BytePerPixY[],
+ dml_uint_t BytePerPixC[],
+ dml_uint_t Read256BytesBlockHeightY[],
+ dml_uint_t Read256BytesBlockHeightC[],
+ dml_uint_t Read256BytesBlockWidthY[],
+ dml_uint_t Read256BytesBlockWidthC[],
+ dml_uint_t BlendingAndTiming[],
+ dml_uint_t HActive[],
+ dml_float_t HRatio[],
+ dml_uint_t DPPPerSurface[],
+
+ // Output
+ dml_uint_t SwathWidthSingleDPPY[],
+ dml_uint_t SwathWidthSingleDPPC[],
+ dml_uint_t SwathWidthY[], // per-pipe
+ dml_uint_t SwathWidthC[], // per-pipe
+ dml_uint_t MaximumSwathHeightY[],
+ dml_uint_t MaximumSwathHeightC[],
+ dml_uint_t swath_width_luma_ub[], // per-pipe
+ dml_uint_t swath_width_chroma_ub[]) // per-pipe
+{
+ enum dml_odm_mode MainSurfaceODMMode;
+ dml_uint_t surface_width_ub_l;
+ dml_uint_t surface_height_ub_l;
+ dml_uint_t surface_width_ub_c = 0;
+ dml_uint_t surface_height_ub_c = 0;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
+ dml_print("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
+#endif
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (!dml_is_vertical_rotation(SourceScan[k])) {
+ SwathWidthSingleDPPY[k] = ViewportWidth[k];
+ } else {
+ SwathWidthSingleDPPY[k] = ViewportHeight[k];
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u ViewportWidth=%u\n", __func__, k, ViewportWidth[k]);
+ dml_print("DML::%s: k=%u ViewportHeight=%u\n", __func__, k, ViewportHeight[k]);
+ dml_print("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
+#endif
+
+ MainSurfaceODMMode = ODMMode[k];
+ for (dml_uint_t j = 0; j < NumberOfActiveSurfaces; ++j) {
+ if (BlendingAndTiming[k] == j) {
+ MainSurfaceODMMode = ODMMode[j];
+ }
+ }
+
+ if (ForceSingleDPP) {
+ SwathWidthY[k] = SwathWidthSingleDPPY[k];
+ } else {
+ if (MainSurfaceODMMode == dml_odm_mode_combine_4to1) {
+ SwathWidthY[k] = (dml_uint_t)(dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k], true)));
+ } else if (MainSurfaceODMMode == dml_odm_mode_combine_2to1) {
+ SwathWidthY[k] = (dml_uint_t)(dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k], true)));
+ } else if (DPPPerSurface[k] == 2) {
+ SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
+ } else {
+ SwathWidthY[k] = SwathWidthSingleDPPY[k];
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u HActive=%u\n", __func__, k, HActive[k]);
+ dml_print("DML::%s: k=%u HRatio=%f\n", __func__, k, HRatio[k]);
+ dml_print("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode);
+ dml_print("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]);
+ dml_print("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]);
+#endif
+
+ if (SourcePixelFormat[k] == dml_420_8 || SourcePixelFormat[k] == dml_420_10 || SourcePixelFormat[k] == dml_420_12) {
+ SwathWidthC[k] = SwathWidthY[k] / 2;
+ SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
+ } else {
+ SwathWidthC[k] = SwathWidthY[k];
+ SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
+ }
+
+ if (ForceSingleDPP == true) {
+ SwathWidthY[k] = SwathWidthSingleDPPY[k];
+ SwathWidthC[k] = SwathWidthSingleDPPC[k];
+ }
+
+ surface_width_ub_l = (dml_uint_t)dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
+ surface_height_ub_l = (dml_uint_t)dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
+
+ if (!dml_is_vertical_rotation(SourceScan[k])) {
+ MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
+ MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
+ if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
+ swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_l, dml_floor(ViewportXStart[k] + SwathWidthY[k] + Read256BytesBlockWidthY[k] - 1, Read256BytesBlockWidthY[k]) - dml_floor(ViewportXStart[k], Read256BytesBlockWidthY[k])));
+ } else {
+ swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_l, dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]));
+ }
+ if (BytePerPixC[k] > 0) {
+ surface_width_ub_c = (dml_uint_t)dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
+ if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
+ swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_c, dml_floor(ViewportXStartC[k] + SwathWidthC[k] + Read256BytesBlockWidthC[k] - 1, Read256BytesBlockWidthC[k]) - dml_floor(ViewportXStartC[k], Read256BytesBlockWidthC[k])));
+ } else {
+ swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_c, dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]));
+ }
+ } else {
+ swath_width_chroma_ub[k] = 0;
+ }
+ } else {
+ MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
+ MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
+
+ if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
+ swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] + SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k])));
+ } else {
+ swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]));
+ }
+ if (BytePerPixC[k] > 0) {
+ surface_height_ub_c = (dml_uint_t)dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
+ if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
+ swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_c, dml_floor(ViewportYStartC[k] + SwathWidthC[k] + Read256BytesBlockHeightC[k] - 1, Read256BytesBlockHeightC[k]) - dml_floor(ViewportYStartC[k], Read256BytesBlockHeightC[k])));
+ } else {
+ swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_c, dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]));
+ }
+ } else {
+ swath_width_chroma_ub[k] = 0;
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l);
+ dml_print("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l);
+ dml_print("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c);
+ dml_print("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c);
+ dml_print("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]);
+ dml_print("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]);
+ dml_print("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]);
+ dml_print("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]);
+ dml_print("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, ViewportStationary[k]);
+ dml_print("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
+ dml_print("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]);
+ dml_print("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]);
+ dml_print("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]);
+#endif
+
+ }
+} // CalculateSwathWidth
+
+static noinline_for_stack dml_float_t CalculateExtraLatency(
+ dml_uint_t RoundTripPingLatencyCycles,
+ dml_uint_t ReorderingBytes,
+ dml_float_t DCFCLK,
+ dml_uint_t TotalNumberOfActiveDPP,
+ dml_uint_t PixelChunkSizeInKByte,
+ dml_uint_t TotalNumberOfDCCActiveDPP,
+ dml_uint_t MetaChunkSize,
+ dml_float_t ReturnBW,
+ dml_bool_t GPUVMEnable,
+ dml_bool_t HostVMEnable,
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_uint_t NumberOfDPP[],
+ dml_uint_t dpte_group_bytes[],
+ dml_float_t HostVMInefficiencyFactor,
+ dml_uint_t HostVMMinPageSize,
+ dml_uint_t HostVMMaxNonCachedPageTableLevels)
+{
+ dml_float_t ExtraLatencyBytes;
+ dml_float_t ExtraLatency;
+
+ ExtraLatencyBytes = CalculateExtraLatencyBytes(
+ ReorderingBytes,
+ TotalNumberOfActiveDPP,
+ PixelChunkSizeInKByte,
+ TotalNumberOfDCCActiveDPP,
+ MetaChunkSize,
+ GPUVMEnable,
+ HostVMEnable,
+ NumberOfActiveSurfaces,
+ NumberOfDPP,
+ dpte_group_bytes,
+ HostVMInefficiencyFactor,
+ HostVMMinPageSize,
+ HostVMMaxNonCachedPageTableLevels);
+
+ ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles);
+ dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
+ dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
+ dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
+ dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
+#endif
+
+ return ExtraLatency;
+} // CalculateExtraLatency
+
+static dml_uint_t CalculateHostVMDynamicLevels(
+ dml_bool_t GPUVMEnable,
+ dml_bool_t HostVMEnable,
+ dml_uint_t HostVMMinPageSize,
+ dml_uint_t HostVMMaxNonCachedPageTableLevels)
+{
+ dml_uint_t HostVMDynamicLevels = 0;
+
+ if (GPUVMEnable && HostVMEnable) {
+ if (HostVMMinPageSize < 2048)
+ HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
+ else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
+ HostVMDynamicLevels = (dml_uint_t) dml_max(0, (dml_float_t) HostVMMaxNonCachedPageTableLevels - 1);
+ else
+ HostVMDynamicLevels = (dml_uint_t) dml_max(0, (dml_float_t) HostVMMaxNonCachedPageTableLevels - 2);
+ } else {
+ HostVMDynamicLevels = 0;
+ }
+ return HostVMDynamicLevels;
+}
+
+static dml_uint_t CalculateExtraLatencyBytes(dml_uint_t ReorderingBytes,
+ dml_uint_t TotalNumberOfActiveDPP,
+ dml_uint_t PixelChunkSizeInKByte,
+ dml_uint_t TotalNumberOfDCCActiveDPP,
+ dml_uint_t MetaChunkSize,
+ dml_bool_t GPUVMEnable,
+ dml_bool_t HostVMEnable,
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_uint_t NumberOfDPP[],
+ dml_uint_t dpte_group_bytes[],
+ dml_float_t HostVMInefficiencyFactor,
+ dml_uint_t HostVMMinPageSize,
+ dml_uint_t HostVMMaxNonCachedPageTableLevels)
+{
+ dml_uint_t HostVMDynamicLevels = CalculateHostVMDynamicLevels(GPUVMEnable, HostVMEnable, HostVMMinPageSize, HostVMMaxNonCachedPageTableLevels);
+ dml_float_t ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
+
+ if (GPUVMEnable == true) {
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
+ }
+ }
+ return (dml_uint_t)(ret);
+}
+
+static dml_float_t CalculateUrgentLatency(
+ dml_float_t UrgentLatencyPixelDataOnly,
+ dml_float_t UrgentLatencyPixelMixedWithVMData,
+ dml_float_t UrgentLatencyVMDataOnly,
+ dml_bool_t DoUrgentLatencyAdjustment,
+ dml_float_t UrgentLatencyAdjustmentFabricClockComponent,
+ dml_float_t UrgentLatencyAdjustmentFabricClockReference,
+ dml_float_t FabricClock)
+{
+ dml_float_t ret;
+
+ ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
+ if (DoUrgentLatencyAdjustment == true) {
+ ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
+ }
+ return ret;
+}
+
+static dml_float_t RequiredDTBCLK(
+ dml_bool_t DSCEnable,
+ dml_float_t PixelClock,
+ enum dml_output_format_class OutputFormat,
+ dml_float_t OutputBpp,
+ dml_uint_t DSCSlices,
+ dml_uint_t HTotal,
+ dml_uint_t HActive,
+ dml_uint_t AudioRate,
+ dml_uint_t AudioLayout)
+{
+ if (DSCEnable != true) {
+ return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
+ } else {
+ dml_float_t PixelWordRate = PixelClock / (OutputFormat == dml_444 ? 1 : 2);
+ dml_float_t HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp * dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
+ dml_float_t HCBlank = 64 + 32 * dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
+ dml_float_t AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
+ dml_float_t HActiveTribyteRate = PixelWordRate * HCActive / HActive;
+ return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
+ }
+}
+
+static void UseMinimumDCFCLK(struct display_mode_lib_scratch_st *scratch, struct UseMinimumDCFCLK_params_st *p)
+{
+ struct UseMinimumDCFCLK_locals_st *s = &scratch->UseMinimumDCFCLK_locals;
+
+ s->NormalEfficiency = p->PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
+ for (dml_uint_t j = 0; j < 2; ++j) {
+
+
+ s->TotalMaxPrefetchFlipDPTERowBandwidth[j] = 0;
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ s->TotalMaxPrefetchFlipDPTERowBandwidth[j] = s->TotalMaxPrefetchFlipDPTERowBandwidth[j] + p->NoOfDPP[j][k] * p->DPTEBytesPerRow[j][k] / (15.75 * p->HTotal[k] / p->PixelClock[k]);
+ }
+
+ for (dml_uint_t k = 0; k <= p->NumberOfActiveSurfaces - 1; ++k) {
+ s->NoOfDPPState[k] = p->NoOfDPP[j][k];
+ }
+
+ s->DPTEBandwidth = s->TotalMaxPrefetchFlipDPTERowBandwidth[j];
+
+ s->DCFCLKRequiredForAverageBandwidth = dml_max(p->ProjectedDCFCLKDeepSleep[j], s->DPTEBandwidth / s->NormalEfficiency / p->ReturnBusWidth);
+
+ s->ExtraLatencyBytes = CalculateExtraLatencyBytes(p->ReorderingBytes, p->TotalNumberOfActiveDPP[j], p->PixelChunkSizeInKByte, p->TotalNumberOfDCCActiveDPP[j],
+ p->MetaChunkSize, p->GPUVMEnable, p->HostVMEnable, p->NumberOfActiveSurfaces, s->NoOfDPPState, p->dpte_group_bytes,
+ 1, p->HostVMMinPageSize, p->HostVMMaxNonCachedPageTableLevels);
+ s->ExtraLatencyCycles = p->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + s->ExtraLatencyBytes / s->NormalEfficiency / p->ReturnBusWidth;
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ dml_float_t DCFCLKCyclesRequiredInPrefetch;
+ dml_float_t PrefetchTime;
+
+ s->PixelDCFCLKCyclesRequiredInPrefetch[k] = (p->PrefetchLinesY[j][k] * p->swath_width_luma_ub_all_states[j][k] * p->BytePerPixelY[k] + p->PrefetchLinesC[j][k] * p->swath_width_chroma_ub_all_states[j][k] * p->BytePerPixelC[k]) / s->NormalEfficiency / p->ReturnBusWidth;
+ DCFCLKCyclesRequiredInPrefetch = 2 * s->ExtraLatencyCycles / s->NoOfDPPState[k] + p->PDEAndMetaPTEBytesPerFrame[j][k] / s->NormalEfficiency / s->NormalEfficiency / p->ReturnBusWidth * (p->GPUVMMaxPageTableLevels > 2 ? 1 : 0) + 2 * p->DPTEBytesPerRow[j][k] / s->NormalEfficiency / s->NormalEfficiency / p->ReturnBusWidth + 2 * p->MetaRowBytes[j][k] / s->NormalEfficiency / p->ReturnBusWidth + s->PixelDCFCLKCyclesRequiredInPrefetch[k];
+ s->PrefetchPixelLinesTime[k] = dml_max(p->PrefetchLinesY[j][k], p->PrefetchLinesC[j][k]) * p->HTotal[k] / p->PixelClock[k];
+ s->DynamicMetadataVMExtraLatency[k] = (p->GPUVMEnable == true && p->DynamicMetadataEnable[k] == true && p->DynamicMetadataVMEnabled == true) ? p->UrgLatency * p->GPUVMMaxPageTableLevels * (p->HostVMEnable == true ? p->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
+
+ s->MinimumTWait = CalculateTWait(p->MaxPrefetchMode,
+ p->UseMALLForPStateChange[k],
+ p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ p->DRRDisplay[k],
+ p->DRAMClockChangeLatencyFinal,
+ p->FCLKChangeLatency,
+ p->UrgLatency,
+ p->SREnterPlusExitTime);
+
+ PrefetchTime = (p->MaximumVStartup[j][k] - 1) * p->HTotal[k] / p->PixelClock[k] - s->MinimumTWait - p->UrgLatency * ((p->GPUVMMaxPageTableLevels <= 2 ? p->GPUVMMaxPageTableLevels : p->GPUVMMaxPageTableLevels - 2) * (p->HostVMEnable == true ? p->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - s->DynamicMetadataVMExtraLatency[k];
+
+ if (PrefetchTime > 0) {
+ dml_float_t ExpectedVRatioPrefetch;
+ ExpectedVRatioPrefetch = s->PrefetchPixelLinesTime[k] / (PrefetchTime * s->PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
+ s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = s->NoOfDPPState[k] * s->PixelDCFCLKCyclesRequiredInPrefetch[k] / s->PrefetchPixelLinesTime[k] * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4);
+ if (p->HostVMEnable == true || p->ImmediateFlipRequirement == true) {
+ s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = s->DCFCLKRequiredForPeakBandwidthPerSurface[k] + s->NoOfDPPState[k] * s->DPTEBandwidth / s->NormalEfficiency / s->NormalEfficiency / p->ReturnBusWidth;
+ }
+ } else {
+ s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = p->DCFCLKPerState;
+ }
+ if (p->DynamicMetadataEnable[k] == true) {
+ dml_float_t TSetupPipe;
+ dml_float_t TdmbfPipe;
+ dml_float_t TdmsksPipe;
+ dml_float_t TdmecPipe;
+ dml_float_t AllowedTimeForUrgentExtraLatency;
+
+ CalculateVUpdateAndDynamicMetadataParameters(
+ p->MaxInterDCNTileRepeaters,
+ p->RequiredDPPCLKPerSurface[j][k],
+ p->RequiredDISPCLK[j],
+ p->ProjectedDCFCLKDeepSleep[j],
+ p->PixelClock[k],
+ p->HTotal[k],
+ p->VTotal[k] - p->VActive[k],
+ p->DynamicMetadataTransmittedBytes[k],
+ p->DynamicMetadataLinesBeforeActiveRequired[k],
+ p->Interlace[k],
+ p->ProgressiveToInterlaceUnitInOPP,
+
+ // Output
+ &TSetupPipe,
+ &TdmbfPipe,
+ &TdmecPipe,
+ &TdmsksPipe,
+ &s->dummy1,
+ &s->dummy2,
+ &s->dummy3);
+
+ AllowedTimeForUrgentExtraLatency = p->MaximumVStartup[j][k] * p->HTotal[k] / p->PixelClock[k] - s->MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe - TdmsksPipe - s->DynamicMetadataVMExtraLatency[k];
+ if (AllowedTimeForUrgentExtraLatency > 0) {
+ s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = dml_max(s->DCFCLKRequiredForPeakBandwidthPerSurface[k], s->ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
+ } else {
+ s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = p->DCFCLKPerState;
+ }
+ }
+ }
+ s->DCFCLKRequiredForPeakBandwidth = 0;
+ for (dml_uint_t k = 0; k <= p->NumberOfActiveSurfaces - 1; ++k) {
+ s->DCFCLKRequiredForPeakBandwidth = s->DCFCLKRequiredForPeakBandwidth + s->DCFCLKRequiredForPeakBandwidthPerSurface[k];
+ }
+ s->MinimumTvmPlus2Tr0 = p->UrgLatency * (p->GPUVMEnable == true ? (p->HostVMEnable == true ? (p->GPUVMMaxPageTableLevels + 2) * (p->HostVMMaxNonCachedPageTableLevels + 1) - 1 : p->GPUVMMaxPageTableLevels + 1) : 0);
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ dml_float_t MaximumTvmPlus2Tr0PlusTsw;
+ MaximumTvmPlus2Tr0PlusTsw = (p->MaximumVStartup[j][k] - 2) * p->HTotal[k] / p->PixelClock[k] - s->MinimumTWait - s->DynamicMetadataVMExtraLatency[k];
+ if (MaximumTvmPlus2Tr0PlusTsw <= s->MinimumTvmPlus2Tr0 + s->PrefetchPixelLinesTime[k] / 4) {
+ s->DCFCLKRequiredForPeakBandwidth = p->DCFCLKPerState;
+ } else {
+ s->DCFCLKRequiredForPeakBandwidth = dml_max3(s->DCFCLKRequiredForPeakBandwidth,
+ 2 * s->ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - s->MinimumTvmPlus2Tr0 - s->PrefetchPixelLinesTime[k] / 4),
+ (2 * s->ExtraLatencyCycles + s->PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - s->MinimumTvmPlus2Tr0));
+ }
+ }
+ p->DCFCLKState[j] = dml_min(p->DCFCLKPerState, 1.05 * dml_max(s->DCFCLKRequiredForAverageBandwidth, s->DCFCLKRequiredForPeakBandwidth));
+ }
+}
+
+
+static dml_bool_t UnboundedRequest(enum dml_unbounded_requesting_policy UseUnboundedRequestingFinal,
+ dml_uint_t TotalNumberOfActiveDPP,
+ dml_bool_t NoChromaOrLinear,
+ enum dml_output_encoder_class Output)
+{
+ dml_bool_t ret_val = false;
+
+ ret_val = (UseUnboundedRequestingFinal != dml_unbounded_requesting_disable
+ && TotalNumberOfActiveDPP == 1 && NoChromaOrLinear);
+ if (UseUnboundedRequestingFinal == dml_unbounded_requesting_edp_only && Output != dml_edp) {
+ ret_val = false;
+ }
+ return (ret_val);
+}
+
+static void CalculateSurfaceSizeInMall(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_uint_t MALLAllocatedForDCN,
+ enum dml_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
+ dml_bool_t DCCEnable[],
+ dml_bool_t ViewportStationary[],
+ dml_uint_t ViewportXStartY[],
+ dml_uint_t ViewportYStartY[],
+ dml_uint_t ViewportXStartC[],
+ dml_uint_t ViewportYStartC[],
+ dml_uint_t ViewportWidthY[],
+ dml_uint_t ViewportHeightY[],
+ dml_uint_t BytesPerPixelY[],
+ dml_uint_t ViewportWidthC[],
+ dml_uint_t ViewportHeightC[],
+ dml_uint_t BytesPerPixelC[],
+ dml_uint_t SurfaceWidthY[],
+ dml_uint_t SurfaceWidthC[],
+ dml_uint_t SurfaceHeightY[],
+ dml_uint_t SurfaceHeightC[],
+ dml_uint_t Read256BytesBlockWidthY[],
+ dml_uint_t Read256BytesBlockWidthC[],
+ dml_uint_t Read256BytesBlockHeightY[],
+ dml_uint_t Read256BytesBlockHeightC[],
+ dml_uint_t ReadBlockWidthY[],
+ dml_uint_t ReadBlockWidthC[],
+ dml_uint_t ReadBlockHeightY[],
+ dml_uint_t ReadBlockHeightC[],
+
+ // Output
+ dml_uint_t SurfaceSizeInMALL[],
+ dml_bool_t *ExceededMALLSize)
+{
+ dml_uint_t TotalSurfaceSizeInMALL = 0;
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (ViewportStationary[k]) {
+ SurfaceSizeInMALL[k] = (dml_uint_t)(dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]), dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1, ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k], ReadBlockWidthY[k])) *
+ dml_min(dml_ceil(SurfaceHeightY[k], ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] + ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) - dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) *
+ BytesPerPixelY[k]);
+
+ if (ReadBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
+ dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]), dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) - dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
+ dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]), dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) - dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) * BytesPerPixelC[k]);
+ }
+ if (DCCEnable[k] == true) {
+ SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
+ dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]), dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 * Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k]) - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k])) *
+ dml_min(dml_ceil(SurfaceHeightY[k], 8 * Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] + ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 * Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256);
+ if (Read256BytesBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
+ dml_min(dml_ceil(SurfaceWidthC[k], 8 * Read256BytesBlockWidthC[k]), dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8 * Read256BytesBlockWidthC[k] - 1, 8 * Read256BytesBlockWidthC[k]) - dml_floor(ViewportXStartC[k], 8 * Read256BytesBlockWidthC[k])) *
+ dml_min(dml_ceil(SurfaceHeightC[k], 8 * Read256BytesBlockHeightC[k]), dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + 8 * Read256BytesBlockHeightC[k] - 1, 8 * Read256BytesBlockHeightC[k]) - dml_floor(ViewportYStartC[k], 8 * Read256BytesBlockHeightC[k])) * BytesPerPixelC[k] / 256);
+ }
+ }
+ } else {
+ SurfaceSizeInMALL[k] = (dml_uint_t)(dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) * dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * BytesPerPixelY[k]);
+ if (ReadBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
+ dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
+ dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * BytesPerPixelC[k]);
+ }
+ if (DCCEnable[k] == true) {
+ SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
+ dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 * Read256BytesBlockWidthY[k] - 1), 8 * Read256BytesBlockWidthY[k]) *
+ dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1), 8 * Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256);
+
+ if (Read256BytesBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
+ dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 * Read256BytesBlockWidthC[k] - 1), 8 * Read256BytesBlockWidthC[k]) *
+ dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 * Read256BytesBlockHeightC[k] - 1), 8 * Read256BytesBlockHeightC[k]) * BytesPerPixelC[k] / 256);
+ }
+ }
+ }
+ }
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForStaticScreen[k] == dml_use_mall_static_screen_enable)
+ TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
+ }
+ *ExceededMALLSize = (TotalSurfaceSizeInMALL > MALLAllocatedForDCN * 1024 * 1024);
+} // CalculateSurfaceSizeInMall
+
+static void CalculateDETBufferSize(
+ dml_uint_t DETSizeOverride[],
+ enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ dml_bool_t ForceSingleDPP,
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_bool_t UnboundedRequestEnabled,
+ dml_uint_t nomDETInKByte,
+ dml_uint_t MaxTotalDETInKByte,
+ dml_uint_t ConfigReturnBufferSizeInKByte,
+ dml_uint_t MinCompressedBufferSizeInKByte,
+ dml_uint_t ConfigReturnBufferSegmentSizeInkByte,
+ dml_uint_t CompressedBufferSegmentSizeInkByteFinal,
+ enum dml_source_format_class SourcePixelFormat[],
+ dml_float_t ReadBandwidthLuma[],
+ dml_float_t ReadBandwidthChroma[],
+ dml_uint_t RoundedUpMaxSwathSizeBytesY[],
+ dml_uint_t RoundedUpMaxSwathSizeBytesC[],
+ dml_uint_t DPPPerSurface[],
+ // Output
+ dml_uint_t DETBufferSizeInKByte[],
+ dml_uint_t *CompressedBufferSizeInkByte)
+{
+ dml_uint_t DETBufferSizePoolInKByte;
+ dml_uint_t NextDETBufferPieceInKByte;
+ dml_bool_t DETPieceAssignedToThisSurfaceAlready[__DML_NUM_PLANES__];
+ dml_bool_t NextPotentialSurfaceToAssignDETPieceFound;
+ dml_uint_t NextSurfaceToAssignDETPiece;
+ dml_float_t TotalBandwidth;
+ dml_float_t BandwidthOfSurfacesNotAssignedDETPiece;
+ dml_uint_t max_minDET;
+ dml_uint_t minDET;
+ dml_uint_t minDET_pipe;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
+ dml_print("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
+ dml_print("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
+ dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, UnboundedRequestEnabled);
+ dml_print("DML::%s: MaxTotalDETInKByte = %u\n", __func__, MaxTotalDETInKByte);
+ dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
+ dml_print("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, MinCompressedBufferSizeInKByte);
+ dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %u\n", __func__, CompressedBufferSegmentSizeInkByteFinal);
+#endif
+
+ // Note: Will use default det size if that fits 2 swaths
+ if (UnboundedRequestEnabled) {
+ if (DETSizeOverride[0] > 0) {
+ DETBufferSizeInKByte[0] = DETSizeOverride[0];
+ } else {
+ DETBufferSizeInKByte[0] = (dml_uint_t) dml_max(128.0, dml_ceil(2.0 * ((dml_float_t) RoundedUpMaxSwathSizeBytesY[0] + (dml_float_t) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte));
+ }
+ *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
+ } else {
+ DETBufferSizePoolInKByte = MaxTotalDETInKByte;
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ DETBufferSizeInKByte[k] = 0;
+ if (SourcePixelFormat[k] == dml_420_8 || SourcePixelFormat[k] == dml_420_10 || SourcePixelFormat[k] == dml_420_12) {
+ max_minDET = nomDETInKByte - ConfigReturnBufferSegmentSizeInkByte;
+ } else {
+ max_minDET = nomDETInKByte;
+ }
+ minDET = 128;
+ minDET_pipe = 0;
+
+ // add DET resource until can hold 2 full swaths
+ while (minDET <= max_minDET && minDET_pipe == 0) {
+ if (2.0 * ((dml_float_t) RoundedUpMaxSwathSizeBytesY[k] + (dml_float_t) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
+ minDET_pipe = minDET;
+ minDET = minDET + ConfigReturnBufferSegmentSizeInkByte;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u minDET = %u\n", __func__, k, minDET);
+ dml_print("DML::%s: k=%u max_minDET = %u\n", __func__, k, max_minDET);
+ dml_print("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, minDET_pipe);
+ dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesY = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesY[k]);
+ dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesC = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesC[k]);
+#endif
+
+ if (minDET_pipe == 0) {
+ minDET_pipe = (dml_uint_t)(dml_max(128, dml_ceil(((dml_float_t)RoundedUpMaxSwathSizeBytesY[k] + (dml_float_t)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte)));
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, minDET_pipe);
+#endif
+ }
+
+ if (UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) {
+ DETBufferSizeInKByte[k] = 0;
+ } else if (DETSizeOverride[k] > 0) {
+ DETBufferSizeInKByte[k] = DETSizeOverride[k];
+ DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
+ } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
+ DETBufferSizeInKByte[k] = minDET_pipe;
+ DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]);
+ dml_print("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, DETSizeOverride[k]);
+ dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
+ dml_print("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, DETBufferSizePoolInKByte);
+#endif
+ }
+
+ TotalBandwidth = 0;
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe)
+ TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
+ }
+ dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
+#endif
+ dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
+ BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+ if (UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) {
+ DETPieceAssignedToThisSurfaceAlready[k] = true;
+ } else if (DETSizeOverride[k] > 0 || (((dml_float_t) (ForceSingleDPP ? 1 : DPPPerSurface[k]) * (dml_float_t) DETBufferSizeInKByte[k] / (dml_float_t) MaxTotalDETInKByte) >= ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
+ DETPieceAssignedToThisSurfaceAlready[k] = true;
+ BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
+ } else {
+ DETPieceAssignedToThisSurfaceAlready[k] = false;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]);
+ dml_print("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, BandwidthOfSurfacesNotAssignedDETPiece);
+#endif
+ }
+
+ for (dml_uint_t j = 0; j < NumberOfActiveSurfaces; ++j) {
+ NextPotentialSurfaceToAssignDETPieceFound = false;
+ NextSurfaceToAssignDETPiece = 0;
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, ReadBandwidthLuma[k]);
+ dml_print("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, ReadBandwidthChroma[k]);
+ dml_print("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
+ dml_print("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
+ dml_print("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, NextSurfaceToAssignDETPiece);
+#endif
+ if (!DETPieceAssignedToThisSurfaceAlready[k] && (!NextPotentialSurfaceToAssignDETPieceFound ||
+ ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
+ NextSurfaceToAssignDETPiece = k;
+ NextPotentialSurfaceToAssignDETPieceFound = true;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
+ dml_print("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
+#endif
+ }
+
+ if (NextPotentialSurfaceToAssignDETPieceFound) {
+ // Note: To show the banker's rounding behavior in VBA and also the fact that the DET buffer size varies due to precision issue
+ //
+ //dml_float_t tmp1 = ((dml_float_t) DETBufferSizePoolInKByte * (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / BandwidthOfSurfacesNotAssignedDETPiece /
+ // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
+ //dml_float_t tmp2 = dml_round((dml_float_t) DETBufferSizePoolInKByte * (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / BandwidthOfSurfacesNotAssignedDETPiece /
+ // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
+ //
+ //dml_print("DML::%s: j=%u, tmp1 = %f\n", __func__, j, tmp1);
+ //dml_print("DML::%s: j=%u, tmp2 = %f\n", __func__, j, tmp2);
+
+ NextDETBufferPieceInKByte = (dml_uint_t)(dml_min(
+ dml_round((dml_float_t) DETBufferSizePoolInKByte * (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / BandwidthOfSurfacesNotAssignedDETPiece /
+ ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte), true)
+ * (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte,
+ dml_floor((dml_float_t) DETBufferSizePoolInKByte, (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte)));
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, DETBufferSizePoolInKByte);
+ dml_print("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, NextSurfaceToAssignDETPiece);
+ dml_print("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
+ dml_print("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
+ dml_print("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
+ dml_print("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, NextDETBufferPieceInKByte);
+ dml_print("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, NextSurfaceToAssignDETPiece, DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
+#endif
+
+ DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] = DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] + NextDETBufferPieceInKByte / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("to %u\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
+#endif
+
+ DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
+ DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
+ BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
+ }
+ }
+ *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
+ }
+ *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / ConfigReturnBufferSegmentSizeInkByte;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
+ dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *CompressedBufferSizeInkByte);
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
+ }
+#endif
+} // CalculateDETBufferSize
+
+
+/// @brief Calculate the bound for return buffer sizing
+static void CalculateMaxDETAndMinCompressedBufferSize(
+ dml_uint_t ConfigReturnBufferSizeInKByte,
+ dml_uint_t ConfigReturnBufferSegmentSizeInKByte,
+ dml_uint_t ROBBufferSizeInKByte,
+ dml_uint_t MaxNumDPP,
+ dml_bool_t nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
+ dml_uint_t nomDETInKByteOverrideValue, // VBA_DELTA
+
+ // Output
+ dml_uint_t *MaxTotalDETInKByte,
+ dml_uint_t *nomDETInKByte,
+ dml_uint_t *MinCompressedBufferSizeInKByte)
+{
+ *MaxTotalDETInKByte = ConfigReturnBufferSizeInKByte - ConfigReturnBufferSegmentSizeInKByte;
+ *nomDETInKByte = (dml_uint_t)(dml_floor((dml_float_t) *MaxTotalDETInKByte / (dml_float_t) MaxNumDPP, ConfigReturnBufferSegmentSizeInKByte));
+ *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
+ dml_print("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte);
+ dml_print("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP);
+ dml_print("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte);
+ dml_print("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte);
+ dml_print("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte);
+#endif
+
+ if (nomDETInKByteOverrideEnable) {
+ *nomDETInKByte = nomDETInKByteOverrideValue;
+ dml_print("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte);
+ }
+} // CalculateMaxDETAndMinCompressedBufferSize
+
+/// @brief Calculate all the RQ request attributes, like row height and # swath
+static void CalculateVMRowAndSwath(struct display_mode_lib_scratch_st *scratch,
+ struct CalculateVMRowAndSwath_params_st *p)
+{
+ struct CalculateVMRowAndSwath_locals_st *s = &scratch->CalculateVMRowAndSwath_locals;
+
+ s->HostVMDynamicLevels = CalculateHostVMDynamicLevels(p->GPUVMEnable, p->HostVMEnable, p->HostVMMinPageSize, p->HostVMMaxNonCachedPageTableLevels);
+
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (p->HostVMEnable == true) {
+ p->vm_group_bytes[k] = 512;
+ p->dpte_group_bytes[k] = 512;
+ } else if (p->GPUVMEnable == true) {
+ p->vm_group_bytes[k] = 2048;
+ if (p->GPUVMMinPageSizeKBytes[k] >= 64 && dml_is_vertical_rotation(p->myPipe[k].SourceScan)) {
+ p->dpte_group_bytes[k] = 512;
+ } else {
+ p->dpte_group_bytes[k] = 2048;
+ }
+ } else {
+ p->vm_group_bytes[k] = 0;
+ p->dpte_group_bytes[k] = 0;
+ }
+
+ if (p->myPipe[k].SourcePixelFormat == dml_420_8 || p->myPipe[k].SourcePixelFormat == dml_420_10 ||
+ p->myPipe[k].SourcePixelFormat == dml_420_12 || p->myPipe[k].SourcePixelFormat == dml_rgbe_alpha) {
+ if ((p->myPipe[k].SourcePixelFormat == dml_420_10 || p->myPipe[k].SourcePixelFormat == dml_420_12) && !dml_is_vertical_rotation(p->myPipe[k].SourceScan)) {
+ s->PTEBufferSizeInRequestsForLuma[k] = (p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma) / 2;
+ s->PTEBufferSizeInRequestsForChroma[k] = s->PTEBufferSizeInRequestsForLuma[k];
+ } else {
+ s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma;
+ s->PTEBufferSizeInRequestsForChroma[k] = p->PTEBufferSizeInRequestsChroma;
+ }
+
+ s->PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
+ p->myPipe[k].ViewportStationary,
+ p->myPipe[k].DCCEnable,
+ p->myPipe[k].DPPPerSurface,
+ p->myPipe[k].BlockHeight256BytesC,
+ p->myPipe[k].BlockWidth256BytesC,
+ p->myPipe[k].SourcePixelFormat,
+ p->myPipe[k].SurfaceTiling,
+ p->myPipe[k].BytePerPixelC,
+ p->myPipe[k].SourceScan,
+ p->SwathWidthC[k],
+ p->myPipe[k].ViewportHeightChroma,
+ p->myPipe[k].ViewportXStartC,
+ p->myPipe[k].ViewportYStartC,
+ p->GPUVMEnable,
+ p->GPUVMMaxPageTableLevels,
+ p->GPUVMMinPageSizeKBytes[k],
+ s->PTEBufferSizeInRequestsForChroma[k],
+ p->myPipe[k].PitchC,
+ p->myPipe[k].DCCMetaPitchC,
+ p->myPipe[k].BlockWidthC,
+ p->myPipe[k].BlockHeightC,
+
+ // Output
+ &s->MetaRowByteC[k],
+ &s->PixelPTEBytesPerRowC[k],
+ &s->PixelPTEBytesPerRowStorageC[k],
+ &p->dpte_row_width_chroma_ub[k],
+ &p->dpte_row_height_chroma[k],
+ &p->dpte_row_height_linear_chroma[k],
+ &s->PixelPTEBytesPerRowC_one_row_per_frame[k],
+ &s->dpte_row_width_chroma_ub_one_row_per_frame[k],
+ &s->dpte_row_height_chroma_one_row_per_frame[k],
+ &p->meta_req_width_chroma[k],
+ &p->meta_req_height_chroma[k],
+ &p->meta_row_width_chroma[k],
+ &p->meta_row_height_chroma[k],
+ &p->PixelPTEReqWidthC[k],
+ &p->PixelPTEReqHeightC[k],
+ &p->PTERequestSizeC[k],
+ &p->dpde0_bytes_per_frame_ub_c[k],
+ &p->meta_pte_bytes_per_frame_ub_c[k]);
+
+ p->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines (
+ p->myPipe[k].VRatioChroma,
+ p->myPipe[k].VTapsChroma,
+ p->myPipe[k].InterlaceEnable,
+ p->myPipe[k].ProgressiveToInterlaceUnitInOPP,
+ p->myPipe[k].SwathHeightC,
+ p->myPipe[k].SourceScan,
+ p->myPipe[k].ViewportStationary,
+ p->SwathWidthC[k],
+ p->myPipe[k].ViewportHeightChroma,
+ p->myPipe[k].ViewportXStartC,
+ p->myPipe[k].ViewportYStartC,
+
+ // Output
+ &p->VInitPreFillC[k],
+ &p->MaxNumSwathC[k]);
+ } else {
+ s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma;
+ s->PTEBufferSizeInRequestsForChroma[k] = 0;
+ s->PixelPTEBytesPerRowC[k] = 0;
+ s->PixelPTEBytesPerRowStorageC[k] = 0;
+ s->PDEAndMetaPTEBytesFrameC = 0;
+ s->MetaRowByteC[k] = 0;
+ p->MaxNumSwathC[k] = 0;
+ p->PrefetchSourceLinesC[k] = 0;
+ s->dpte_row_height_chroma_one_row_per_frame[k] = 0;
+ s->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
+ s->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
+ }
+
+ s->PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
+ p->myPipe[k].ViewportStationary,
+ p->myPipe[k].DCCEnable,
+ p->myPipe[k].DPPPerSurface,
+ p->myPipe[k].BlockHeight256BytesY,
+ p->myPipe[k].BlockWidth256BytesY,
+ p->myPipe[k].SourcePixelFormat,
+ p->myPipe[k].SurfaceTiling,
+ p->myPipe[k].BytePerPixelY,
+ p->myPipe[k].SourceScan,
+ p->SwathWidthY[k],
+ p->myPipe[k].ViewportHeight,
+ p->myPipe[k].ViewportXStart,
+ p->myPipe[k].ViewportYStart,
+ p->GPUVMEnable,
+ p->GPUVMMaxPageTableLevels,
+ p->GPUVMMinPageSizeKBytes[k],
+ s->PTEBufferSizeInRequestsForLuma[k],
+ p->myPipe[k].PitchY,
+ p->myPipe[k].DCCMetaPitchY,
+ p->myPipe[k].BlockWidthY,
+ p->myPipe[k].BlockHeightY,
+
+ // Output
+ &s->MetaRowByteY[k],
+ &s->PixelPTEBytesPerRowY[k],
+ &s->PixelPTEBytesPerRowStorageY[k],
+ &p->dpte_row_width_luma_ub[k],
+ &p->dpte_row_height_luma[k],
+ &p->dpte_row_height_linear_luma[k],
+ &s->PixelPTEBytesPerRowY_one_row_per_frame[k],
+ &s->dpte_row_width_luma_ub_one_row_per_frame[k],
+ &s->dpte_row_height_luma_one_row_per_frame[k],
+ &p->meta_req_width[k],
+ &p->meta_req_height[k],
+ &p->meta_row_width[k],
+ &p->meta_row_height[k],
+ &p->PixelPTEReqWidthY[k],
+ &p->PixelPTEReqHeightY[k],
+ &p->PTERequestSizeY[k],
+ &p->dpde0_bytes_per_frame_ub_l[k],
+ &p->meta_pte_bytes_per_frame_ub_l[k]);
+
+ p->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
+ p->myPipe[k].VRatio,
+ p->myPipe[k].VTaps,
+ p->myPipe[k].InterlaceEnable,
+ p->myPipe[k].ProgressiveToInterlaceUnitInOPP,
+ p->myPipe[k].SwathHeightY,
+ p->myPipe[k].SourceScan,
+ p->myPipe[k].ViewportStationary,
+ p->SwathWidthY[k],
+ p->myPipe[k].ViewportHeight,
+ p->myPipe[k].ViewportXStart,
+ p->myPipe[k].ViewportYStart,
+
+ // Output
+ &p->VInitPreFillY[k],
+ &p->MaxNumSwathY[k]);
+
+ p->PDEAndMetaPTEBytesFrame[k] = (s->PDEAndMetaPTEBytesFrameY + s->PDEAndMetaPTEBytesFrameC) * (1 + 8 * s->HostVMDynamicLevels);
+ p->MetaRowByte[k] = s->MetaRowByteY[k] + s->MetaRowByteC[k];
+
+ if (s->PixelPTEBytesPerRowStorageY[k] <= 64 * s->PTEBufferSizeInRequestsForLuma[k] && s->PixelPTEBytesPerRowStorageC[k] <= 64 * s->PTEBufferSizeInRequestsForChroma[k]) {
+ p->PTEBufferSizeNotExceeded[k] = true;
+ } else {
+ p->PTEBufferSizeNotExceeded[k] = false;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, PixelPTEBytesPerRowY = %u\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
+ dml_print("DML::%s: k=%u, PixelPTEBytesPerRowC = %u\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
+ dml_print("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]);
+ dml_print("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]);
+ dml_print("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]);
+ dml_print("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]);
+ dml_print("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
+#endif
+ }
+ s->one_row_per_frame_fits_in_buffer[k] = (s->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForLuma[k] &&
+ s->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForChroma[k]);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, PDEAndMetaPTEBytesFrame = %u\n", __func__, k, p->PDEAndMetaPTEBytesFrame[k]);
+ dml_print("DML::%s: k=%u, PDEAndMetaPTEBytesFrameY = %u\n", __func__, k, s->PDEAndMetaPTEBytesFrameY);
+ dml_print("DML::%s: k=%u, PDEAndMetaPTEBytesFrameC = %u\n", __func__, k, s->PDEAndMetaPTEBytesFrameC);
+ dml_print("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels);
+ dml_print("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]);
+ dml_print("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]);
+ dml_print("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]);
+#endif
+ }
+
+ CalculateMALLUseForStaticScreen(
+ p->NumberOfActiveSurfaces,
+ p->MALLAllocatedForDCN,
+ p->UseMALLForStaticScreen, // mode
+ p->SurfaceSizeInMALL,
+ s->one_row_per_frame_fits_in_buffer,
+ // Output
+ p->UsesMALLForStaticScreen); // boolen
+
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (p->PTEBufferModeOverrideEn[k] == 1) {
+ p->PTE_BUFFER_MODE[k] = p->PTEBufferModeOverrideVal[k];
+ }
+ p->PTE_BUFFER_MODE[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->UsesMALLForStaticScreen[k] || (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport) ||
+ (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) || (p->GPUVMMinPageSizeKBytes[k] > 64);
+ p->BIGK_FRAGMENT_SIZE[k] = (dml_uint_t)(dml_log2(p->GPUVMMinPageSizeKBytes[k] * 1024) - 12);
+ }
+
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]);
+ dml_print("DML::%s: k=%u, UsesMALLForStaticScreen = %u\n", __func__, k, p->UsesMALLForStaticScreen[k]);
+#endif
+ p->use_one_row_for_frame[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->UsesMALLForStaticScreen[k] || (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport) ||
+ (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) || (p->GPUVMMinPageSizeKBytes[k] > 64 && dml_is_vertical_rotation(p->myPipe[k].SourceScan));
+
+ p->use_one_row_for_frame_flip[k] = p->use_one_row_for_frame[k] && !(p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame);
+
+ if (p->use_one_row_for_frame[k]) {
+ p->dpte_row_height_luma[k] = s->dpte_row_height_luma_one_row_per_frame[k];
+ p->dpte_row_width_luma_ub[k] = s->dpte_row_width_luma_ub_one_row_per_frame[k];
+ s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY_one_row_per_frame[k];
+ p->dpte_row_height_chroma[k] = s->dpte_row_height_chroma_one_row_per_frame[k];
+ p->dpte_row_width_chroma_ub[k] = s->dpte_row_width_chroma_ub_one_row_per_frame[k];
+ s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC_one_row_per_frame[k];
+ p->PTEBufferSizeNotExceeded[k] = s->one_row_per_frame_fits_in_buffer[k];
+ }
+
+ if (p->MetaRowByte[k] <= p->DCCMetaBufferSizeBytes) {
+ p->DCCMetaBufferSizeNotExceeded[k] = true;
+ } else {
+ p->DCCMetaBufferSizeNotExceeded[k] = false;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, MetaRowByte = %u\n", __func__, k, p->MetaRowByte[k]);
+ dml_print("DML::%s: k=%u, DCCMetaBufferSizeBytes = %u\n", __func__, k, p->DCCMetaBufferSizeBytes);
+ dml_print("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, p->DCCMetaBufferSizeNotExceeded[k]);
+#endif
+ }
+ s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY[k] * (1 + 8 * s->HostVMDynamicLevels);
+ s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC[k] * (1 + 8 * s->HostVMDynamicLevels);
+ p->PixelPTEBytesPerRow[k] = s->PixelPTEBytesPerRowY[k] + s->PixelPTEBytesPerRowC[k];
+ if (p->use_one_row_for_frame[k])
+ p->PixelPTEBytesPerRow[k] = p->PixelPTEBytesPerRow[k] / 2;
+
+ CalculateRowBandwidth(
+ p->GPUVMEnable,
+ p->myPipe[k].SourcePixelFormat,
+ p->myPipe[k].VRatio,
+ p->myPipe[k].VRatioChroma,
+ p->myPipe[k].DCCEnable,
+ p->myPipe[k].HTotal / p->myPipe[k].PixelClock,
+ s->MetaRowByteY[k],
+ s->MetaRowByteC[k],
+ p->meta_row_height[k],
+ p->meta_row_height_chroma[k],
+ s->PixelPTEBytesPerRowY[k],
+ s->PixelPTEBytesPerRowC[k],
+ p->dpte_row_height_luma[k],
+ p->dpte_row_height_chroma[k],
+
+ // Output
+ &p->meta_row_bw[k],
+ &p->dpte_row_bw[k]);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]);
+ dml_print("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]);
+ dml_print("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->UseMALLForPStateChange[k]);
+ dml_print("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]);
+ dml_print("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]);
+ dml_print("DML::%s: k=%u, PixelPTEBytesPerRowY = %u\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
+ dml_print("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]);
+ dml_print("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%u, PixelPTEBytesPerRowC = %u\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
+ dml_print("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]);
+ dml_print("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
+ dml_print("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]);
+ dml_print("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]);
+#endif
+ }
+}
+
+static void CalculateOutputLink(
+ dml_float_t PHYCLKPerState,
+ dml_float_t PHYCLKD18PerState,
+ dml_float_t PHYCLKD32PerState,
+ dml_float_t Downspreading,
+ dml_bool_t IsMainSurfaceUsingTheIndicatedTiming,
+ enum dml_output_encoder_class Output,
+ enum dml_output_format_class OutputFormat,
+ dml_uint_t HTotal,
+ dml_uint_t HActive,
+ dml_float_t PixelClockBackEnd,
+ dml_float_t ForcedOutputLinkBPP,
+ dml_uint_t DSCInputBitPerComponent,
+ dml_uint_t NumberOfDSCSlices,
+ dml_float_t AudioSampleRate,
+ dml_uint_t AudioSampleLayout,
+ enum dml_odm_mode ODMModeNoDSC,
+ enum dml_odm_mode ODMModeDSC,
+ enum dml_dsc_enable DSCEnable,
+ dml_uint_t OutputLinkDPLanes,
+ enum dml_output_link_dp_rate OutputLinkDPRate,
+
+ // Output
+ dml_bool_t *RequiresDSC,
+ dml_bool_t *RequiresFEC,
+ dml_float_t *OutBpp,
+ enum dml_output_type_and_rate__type *OutputType,
+ enum dml_output_type_and_rate__rate *OutputRate,
+ dml_uint_t *RequiredSlots)
+{
+ dml_bool_t LinkDSCEnable;
+ dml_uint_t dummy;
+ *RequiresDSC = false;
+ *RequiresFEC = false;
+ *OutBpp = 0;
+
+ *OutputType = dml_output_type_unknown;
+ *OutputRate = dml_output_rate_unknown;
+
+ if (IsMainSurfaceUsingTheIndicatedTiming) {
+ if (Output == dml_hdmi) {
+ *RequiresDSC = false;
+ *RequiresFEC = false;
+ *OutBpp = TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, false, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
+ //OutputTypeAndRate = "HDMI";
+ *OutputType = dml_output_type_hdmi;
+
+ } else if (Output == dml_dp || Output == dml_dp2p0 || Output == dml_edp) {
+ if (DSCEnable == dml_dsc_enable) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dml_dp || Output == dml_dp2p0) {
+ *RequiresFEC = true;
+ } else {
+ *RequiresFEC = false;
+ }
+ } else {
+ *RequiresDSC = false;
+ LinkDSCEnable = false;
+ if (Output == dml_dp2p0) {
+ *RequiresFEC = true;
+ } else {
+ *RequiresFEC = false;
+ }
+ }
+ if (Output == dml_dp2p0) {
+ *OutBpp = 0;
+ if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_uhbr10) && PHYCLKD32PerState >= 10000 / 32.0) {
+ *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32.0 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " UHBR10";
+ *OutputType = dml_output_type_dp2p0;
+ *OutputRate = dml_output_rate_dp_rate_uhbr10;
+ }
+ if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_uhbr13p5) && *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32.0) {
+ *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+
+ if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " UHBR13p5";
+ *OutputType = dml_output_type_dp2p0;
+ *OutputRate = dml_output_rate_dp_rate_uhbr13p5;
+ }
+ if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_uhbr20) && *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
+ *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ if (*OutBpp == 0 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " UHBR20";
+ *OutputType = dml_output_type_dp2p0;
+ *OutputRate = dml_output_rate_dp_rate_uhbr20;
+ }
+ } else { // output is dp or edp
+ *OutBpp = 0;
+ if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_hbr) && PHYCLKPerState >= 270) {
+ *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dml_dp) {
+ *RequiresFEC = true;
+ }
+ *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " HBR";
+ *OutputType = (Output == dml_dp) ? dml_output_type_dp : dml_output_type_edp;
+ *OutputRate = dml_output_rate_dp_rate_hbr;
+ }
+ if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_hbr2) && *OutBpp == 0 && PHYCLKPerState >= 540) {
+ *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+
+ if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dml_dp) {
+ *RequiresFEC = true;
+ }
+ *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " HBR2";
+ *OutputType = (Output == dml_dp) ? dml_output_type_dp : dml_output_type_edp;
+ *OutputRate = dml_output_rate_dp_rate_hbr2;
+ }
+ if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) { // VBA_ERROR, vba code doesn't have hbr3 check
+ *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+
+ if (*OutBpp == 0 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dml_dp) {
+ *RequiresFEC = true;
+ }
+ *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " HBR3";
+ *OutputType = (Output == dml_dp) ? dml_output_type_dp : dml_output_type_edp;
+ *OutputRate = dml_output_rate_dp_rate_hbr3;
+ }
+ }
+ }
+ }
+}
+
+/// @brief Determine the ODM mode and number of DPP used per plane based on dispclk, dsc usage, odm usage policy
+static void CalculateODMMode(
+ dml_uint_t MaximumPixelsPerLinePerDSCUnit,
+ dml_uint_t HActive,
+ enum dml_output_encoder_class Output,
+ enum dml_output_format_class OutputFormat,
+ enum dml_odm_use_policy ODMUse,
+ dml_float_t StateDispclk,
+ dml_float_t MaxDispclk,
+ dml_bool_t DSCEnable,
+ dml_uint_t TotalNumberOfActiveDPP,
+ dml_uint_t MaxNumDPP,
+ dml_float_t PixelClock,
+ dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
+ dml_float_t DISPCLKRampingMargin,
+ dml_float_t DISPCLKDPPCLKVCOSpeed,
+ dml_uint_t NumberOfDSCSlices,
+
+ // Output
+ dml_bool_t *TotalAvailablePipesSupport,
+ dml_uint_t *NumberOfDPP,
+ enum dml_odm_mode *ODMMode,
+ dml_float_t *RequiredDISPCLKPerSurface)
+{
+
+ dml_float_t SurfaceRequiredDISPCLKWithoutODMCombine;
+ dml_float_t SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
+ dml_float_t SurfaceRequiredDISPCLKWithODMCombineFourToOne;
+
+ SurfaceRequiredDISPCLKWithoutODMCombine = CalculateRequiredDispclk(dml_odm_mode_bypass, PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, MaxDispclk);
+ SurfaceRequiredDISPCLKWithODMCombineTwoToOne = CalculateRequiredDispclk(dml_odm_mode_combine_2to1, PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, MaxDispclk);
+ SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml_odm_mode_combine_4to1, PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, MaxDispclk);
+ *TotalAvailablePipesSupport = true;
+
+ if (OutputFormat == dml_420) {
+ if (HActive > 4 * DML2_MAX_FMT_420_BUFFER_WIDTH)
+ *TotalAvailablePipesSupport = false;
+ else if (HActive > 2 * DML2_MAX_FMT_420_BUFFER_WIDTH)
+ ODMUse = dml_odm_use_policy_combine_4to1;
+ else if (HActive > DML2_MAX_FMT_420_BUFFER_WIDTH && ODMUse != dml_odm_use_policy_combine_4to1)
+ ODMUse = dml_odm_use_policy_combine_2to1;
+ if (Output == dml_hdmi && ODMUse == dml_odm_use_policy_combine_2to1)
+ *TotalAvailablePipesSupport = false;
+ if ((Output == dml_hdmi || Output == dml_dp || Output == dml_edp) && ODMUse == dml_odm_use_policy_combine_4to1)
+ *TotalAvailablePipesSupport = false;
+ }
+
+ if (ODMUse == dml_odm_use_policy_bypass || ODMUse == dml_odm_use_policy_combine_as_needed)
+ *ODMMode = dml_odm_mode_bypass;
+ else if (ODMUse == dml_odm_use_policy_combine_2to1)
+ *ODMMode = dml_odm_mode_combine_2to1;
+ else if (ODMUse == dml_odm_use_policy_combine_4to1)
+ *ODMMode = dml_odm_mode_combine_4to1;
+ else if (ODMUse == dml_odm_use_policy_split_1to2)
+ *ODMMode = dml_odm_mode_split_1to2;
+ else if (ODMUse == dml_odm_use_policy_mso_1to2)
+ *ODMMode = dml_odm_mode_mso_1to2;
+ else if (ODMUse == dml_odm_use_policy_mso_1to4)
+ *ODMMode = dml_odm_mode_mso_1to4;
+
+ *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
+ *NumberOfDPP = 0;
+
+ if (!(Output == dml_hdmi || Output == dml_dp || Output == dml_edp) && (ODMUse == dml_odm_use_policy_combine_4to1 || (ODMUse == dml_odm_use_policy_combine_as_needed &&
+ (SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk || (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)) || NumberOfDSCSlices > 8)))) {
+ if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
+ *ODMMode = dml_odm_mode_combine_4to1;
+ *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
+ *NumberOfDPP = 4;
+ } else {
+ *TotalAvailablePipesSupport = false;
+ }
+ } else if (Output != dml_hdmi && (ODMUse == dml_odm_use_policy_combine_2to1 || (ODMUse == dml_odm_use_policy_combine_as_needed &&
+ ((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk && SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
+ (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit)) || (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) {
+ if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
+ *ODMMode = dml_odm_mode_combine_2to1;
+ *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
+ *NumberOfDPP = 2;
+ } else {
+ *TotalAvailablePipesSupport = false;
+ }
+ } else {
+ if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP) {
+ *NumberOfDPP = 1;
+ } else {
+ *TotalAvailablePipesSupport = false;
+ }
+ }
+}
+
+/// @brief Calculate the required DISPCLK given the odm mode and pixclk
+static dml_float_t CalculateRequiredDispclk(
+ enum dml_odm_mode ODMMode,
+ dml_float_t PixelClock,
+ dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
+ dml_float_t DISPCLKRampingMargin,
+ dml_float_t DISPCLKDPPCLKVCOSpeed,
+ dml_float_t MaxDispclk)
+{
+ dml_float_t RequiredDispclk = 0.;
+ dml_float_t PixelClockAfterODM;
+
+ dml_float_t DISPCLKWithRampingRoundedToDFSGranularity;
+ dml_float_t DISPCLKWithoutRampingRoundedToDFSGranularity;
+ dml_float_t MaxDispclkRoundedDownToDFSGranularity;
+
+ if (ODMMode == dml_odm_mode_combine_4to1) {
+ PixelClockAfterODM = PixelClock / 4;
+ } else if (ODMMode == dml_odm_mode_combine_2to1) {
+ PixelClockAfterODM = PixelClock / 2;
+ } else {
+ PixelClockAfterODM = PixelClock;
+ }
+
+ DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularity(PixelClockAfterODM * (1.0 + DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1 + DISPCLKRampingMargin / 100.0), 1, DISPCLKDPPCLKVCOSpeed);
+ DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularity(PixelClockAfterODM * (1.0 + DISPCLKDPPCLKDSCCLKDownSpreading / 100.0), 1, DISPCLKDPPCLKVCOSpeed);
+ MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
+
+ if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) {
+ RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
+ } else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) {
+ RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
+ } else {
+ RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
+ }
+
+ return RequiredDispclk;
+}
+
+/// @brief Determine DPPCLK if there only one DPP per plane, main factor is the pixel rate and DPP scaling parameter
+static void CalculateSinglePipeDPPCLKAndSCLThroughput(
+ dml_float_t HRatio,
+ dml_float_t HRatioChroma,
+ dml_float_t VRatio,
+ dml_float_t VRatioChroma,
+ dml_float_t MaxDCHUBToPSCLThroughput,
+ dml_float_t MaxPSCLToLBThroughput,
+ dml_float_t PixelClock,
+ enum dml_source_format_class SourcePixelFormat,
+ dml_uint_t HTaps,
+ dml_uint_t HTapsChroma,
+ dml_uint_t VTaps,
+ dml_uint_t VTapsChroma,
+
+ // Output
+ dml_float_t *PSCL_THROUGHPUT,
+ dml_float_t *PSCL_THROUGHPUT_CHROMA,
+ dml_float_t *DPPCLKUsingSingleDPP)
+{
+ dml_float_t DPPCLKUsingSingleDPPLuma;
+ dml_float_t DPPCLKUsingSingleDPPChroma;
+
+ if (HRatio > 1) {
+ *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / dml_ceil((dml_float_t) HTaps / 6.0, 1.0));
+ } else {
+ *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
+ }
+
+ DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio / *PSCL_THROUGHPUT, 1);
+
+ if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
+ DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
+
+ if ((SourcePixelFormat != dml_420_8 && SourcePixelFormat != dml_420_10 && SourcePixelFormat != dml_420_12 && SourcePixelFormat != dml_rgbe_alpha)) {
+ *PSCL_THROUGHPUT_CHROMA = 0;
+ *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
+ } else {
+ if (HRatioChroma > 1) {
+ *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatioChroma / dml_ceil((dml_float_t) HTapsChroma / 6.0, 1.0));
+ } else {
+ *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
+ }
+ DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
+ HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
+ if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
+ DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
+ *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
+ }
+}
+
+/// @brief Calculate the actual dppclk freq
+/// @param DPPCLKUsingSingleDPP DppClk freq required if there is only 1 DPP per plane
+/// @param DPPPerSurface Number of DPP for each plane
+static void CalculateDPPCLK(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
+ dml_float_t DISPCLKDPPCLKVCOSpeed,
+ dml_float_t DPPCLKUsingSingleDPP[],
+ dml_uint_t DPPPerSurface[],
+
+ // Output
+ dml_float_t *GlobalDPPCLK,
+ dml_float_t Dppclk[])
+{
+ *GlobalDPPCLK = 0;
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
+ *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
+ }
+ *GlobalDPPCLK = RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
+
+ dml_print("DML::%s: GlobalDPPCLK = %f\n", __func__, *GlobalDPPCLK);
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ Dppclk[k] = *GlobalDPPCLK / 255.0 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
+ dml_print("DML::%s: Dppclk[%0d] = %f\n", __func__, k, Dppclk[k]);
+ }
+}
+
+static void CalculateMALLUseForStaticScreen(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_uint_t MALLAllocatedForDCNFinal,
+ enum dml_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
+ dml_uint_t SurfaceSizeInMALL[],
+ dml_bool_t one_row_per_frame_fits_in_buffer[],
+
+ // Output
+ dml_bool_t UsesMALLForStaticScreen[])
+{
+
+ dml_uint_t SurfaceToAddToMALL;
+ dml_bool_t CanAddAnotherSurfaceToMALL;
+ dml_uint_t TotalSurfaceSizeInMALL;
+
+ TotalSurfaceSizeInMALL = 0;
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dml_use_mall_static_screen_enable);
+ if (UsesMALLForStaticScreen[k])
+ TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, UsesMALLForStaticScreen = %u\n", __func__, k, UsesMALLForStaticScreen[k]);
+ dml_print("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL);
+#endif
+ }
+
+ SurfaceToAddToMALL = 0;
+ CanAddAnotherSurfaceToMALL = true;
+ while (CanAddAnotherSurfaceToMALL) {
+ CanAddAnotherSurfaceToMALL = false;
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
+ !UsesMALLForStaticScreen[k] && UseMALLForStaticScreen[k] != dml_use_mall_static_screen_disable && one_row_per_frame_fits_in_buffer[k] &&
+ (!CanAddAnotherSurfaceToMALL || SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
+ CanAddAnotherSurfaceToMALL = true;
+ SurfaceToAddToMALL = k;
+ dml_print("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, UseMALLForStaticScreen[k]);
+ }
+ }
+ if (CanAddAnotherSurfaceToMALL) {
+ UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
+ TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL);
+ dml_print("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL);
+#endif
+ }
+ }
+}
+
+// @brief Calculate return bw for VM only traffic
+dml_float_t dml_get_return_bw_mbps_vm_only(
+ const struct soc_bounding_box_st *soc,
+ dml_bool_t use_ideal_dram_bw_strobe,
+ dml_bool_t HostVMEnable,
+ dml_float_t DCFCLK,
+ dml_float_t FabricClock,
+ dml_float_t DRAMSpeed)
+{
+ dml_float_t VMDataOnlyReturnBW =
+ dml_min3(soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
+ FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
+ DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes *
+ ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe : soc->pct_ideal_dram_bw_after_urgent_vm_only) / 100.0);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: use_ideal_dram_bw_strobe = %u\n", __func__, use_ideal_dram_bw_strobe);
+ dml_print("DML::%s: HostVMEnable = %u\n", __func__, HostVMEnable);
+ dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
+ dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
+ dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
+ dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
+#endif
+ return VMDataOnlyReturnBW;
+}
+
+// Function: dml_get_return_bw_mbps
+// Megabyte per second
+dml_float_t dml_get_return_bw_mbps(
+ const struct soc_bounding_box_st *soc,
+ dml_bool_t use_ideal_dram_bw_strobe,
+ dml_bool_t HostVMEnable,
+ dml_float_t DCFCLK,
+ dml_float_t FabricClock,
+ dml_float_t DRAMSpeed)
+{
+ dml_float_t ReturnBW = 0.;
+ dml_float_t IdealSDPPortBandwidth = soc->return_bus_width_bytes * DCFCLK;
+ dml_float_t IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
+ dml_float_t IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
+ dml_float_t PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
+ IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
+ IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe :
+ soc->pct_ideal_dram_bw_after_urgent_pixel_only) / 100);
+ dml_float_t PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
+ IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
+ IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe :
+ soc->pct_ideal_dram_bw_after_urgent_pixel_and_vm) / 100);
+
+ if (HostVMEnable != true) {
+ ReturnBW = PixelDataOnlyReturnBW;
+ } else {
+ ReturnBW = PixelMixedWithVMDataReturnBW;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: use_ideal_dram_bw_strobe = %u\n", __func__, use_ideal_dram_bw_strobe);
+ dml_print("DML::%s: HostVMEnable = %u\n", __func__, HostVMEnable);
+ dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
+ dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
+ dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
+ dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth);
+ dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth);
+ dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth);
+ dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW);
+ dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
+ dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW);
+#endif
+ return ReturnBW;
+}
+
+// Function: dml_get_return_dram_bw_mbps
+// Megabyte per second
+static dml_float_t dml_get_return_dram_bw_mbps(
+ const struct soc_bounding_box_st *soc,
+ dml_bool_t use_ideal_dram_bw_strobe,
+ dml_bool_t HostVMEnable,
+ dml_float_t DRAMSpeed)
+{
+ dml_float_t ReturnDRAMBW = 0.;
+ dml_float_t IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
+ dml_float_t PixelDataOnlyReturnBW = IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe :
+ soc->pct_ideal_dram_bw_after_urgent_pixel_only) / 100;
+ dml_float_t PixelMixedWithVMDataReturnBW = IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe :
+ soc->pct_ideal_dram_bw_after_urgent_pixel_and_vm) / 100;
+
+ if (HostVMEnable != true) {
+ ReturnDRAMBW = PixelDataOnlyReturnBW;
+ } else {
+ ReturnDRAMBW = PixelMixedWithVMDataReturnBW;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: use_ideal_dram_bw_strobe = %u\n", __func__, use_ideal_dram_bw_strobe);
+ dml_print("DML::%s: HostVMEnable = %u\n", __func__, HostVMEnable);
+ dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
+ dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth);
+ dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW);
+ dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
+ dml_print("DML::%s: ReturnDRAMBW = %f MBps\n", __func__, ReturnDRAMBW);
+#endif
+ return ReturnDRAMBW;
+}
+
+/// @brief BACKEND
+static dml_uint_t DSCDelayRequirement(
+ dml_bool_t DSCEnabled,
+ enum dml_odm_mode ODMMode,
+ dml_uint_t DSCInputBitPerComponent,
+ dml_float_t OutputBpp,
+ dml_uint_t HActive,
+ dml_uint_t HTotal,
+ dml_uint_t NumberOfDSCSlices,
+ enum dml_output_format_class OutputFormat,
+ enum dml_output_encoder_class Output,
+ dml_float_t PixelClock,
+ dml_float_t PixelClockBackEnd)
+{
+ dml_uint_t DSCDelayRequirement_val = 0;
+
+ if (DSCEnabled == true && OutputBpp != 0) {
+ if (ODMMode == dml_odm_mode_combine_4to1) {
+ DSCDelayRequirement_val = dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (dml_uint_t)(dml_ceil((dml_float_t) HActive / (dml_float_t) NumberOfDSCSlices, 1.0)),
+ (dml_uint_t) (NumberOfDSCSlices / 4.0), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output);
+ } else if (ODMMode == dml_odm_mode_combine_2to1) {
+ DSCDelayRequirement_val = dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (dml_uint_t)(dml_ceil((dml_float_t) HActive / (dml_float_t) NumberOfDSCSlices, 1.0)),
+ (dml_uint_t) (NumberOfDSCSlices / 2.0), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output);
+ } else {
+ DSCDelayRequirement_val = dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (dml_uint_t)((dml_float_t) dml_ceil(HActive / (dml_float_t) NumberOfDSCSlices, 1.0)),
+ NumberOfDSCSlices, OutputFormat, Output) + dscComputeDelay(OutputFormat, Output);
+ }
+ DSCDelayRequirement_val = (dml_uint_t)(DSCDelayRequirement_val + (HTotal - HActive) * dml_ceil((dml_float_t) DSCDelayRequirement_val / (dml_float_t) HActive, 1.0));
+ DSCDelayRequirement_val = (dml_uint_t)(DSCDelayRequirement_val * PixelClock / PixelClockBackEnd);
+
+ } else {
+ DSCDelayRequirement_val = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DSCEnabled = %u\n", __func__, DSCEnabled);
+ dml_print("DML::%s: ODMMode = %u\n", __func__, ODMMode);
+ dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
+ dml_print("DML::%s: HActive = %u\n", __func__, HActive);
+ dml_print("DML::%s: HTotal = %u\n", __func__, HTotal);
+ dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
+ dml_print("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd);
+ dml_print("DML::%s: OutputFormat = %u\n", __func__, OutputFormat);
+ dml_print("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent);
+ dml_print("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices);
+ dml_print("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val);
+#endif
+
+ return DSCDelayRequirement_val;
+}
+
+static noinline_for_stack dml_bool_t CalculateVActiveBandwithSupport(dml_uint_t NumberOfActiveSurfaces,
+ dml_float_t ReturnBW,
+ dml_bool_t NotUrgentLatencyHiding[],
+ dml_float_t ReadBandwidthLuma[],
+ dml_float_t ReadBandwidthChroma[],
+ dml_float_t cursor_bw[],
+ dml_float_t meta_row_bandwidth[],
+ dml_float_t dpte_row_bandwidth[],
+ dml_uint_t NumberOfDPP[],
+ dml_float_t UrgentBurstFactorLuma[],
+ dml_float_t UrgentBurstFactorChroma[],
+ dml_float_t UrgentBurstFactorCursor[])
+{
+ dml_bool_t NotEnoughUrgentLatencyHiding = false;
+ dml_bool_t CalculateVActiveBandwithSupport_val = false;
+ dml_float_t VActiveBandwith = 0;
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (NotUrgentLatencyHiding[k]) {
+ NotEnoughUrgentLatencyHiding = true;
+ }
+ }
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
+ }
+
+ CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %u\n", __func__, NotEnoughUrgentLatencyHiding);
+ dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith);
+ dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
+ dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %u\n", __func__, CalculateVActiveBandwithSupport_val);
+#endif
+ return CalculateVActiveBandwithSupport_val;
+}
+
+static void CalculatePrefetchBandwithSupport(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_float_t ReturnBW,
+ enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ dml_bool_t NotUrgentLatencyHiding[],
+ dml_float_t ReadBandwidthLuma[],
+ dml_float_t ReadBandwidthChroma[],
+ dml_float_t PrefetchBandwidthLuma[],
+ dml_float_t PrefetchBandwidthChroma[],
+ dml_float_t cursor_bw[],
+ dml_float_t meta_row_bandwidth[],
+ dml_float_t dpte_row_bandwidth[],
+ dml_float_t cursor_bw_pre[],
+ dml_float_t prefetch_vmrow_bw[],
+ dml_uint_t NumberOfDPP[],
+ dml_float_t UrgentBurstFactorLuma[],
+ dml_float_t UrgentBurstFactorChroma[],
+ dml_float_t UrgentBurstFactorCursor[],
+ dml_float_t UrgentBurstFactorLumaPre[],
+ dml_float_t UrgentBurstFactorChromaPre[],
+ dml_float_t UrgentBurstFactorCursorPre[],
+
+ // Output
+ dml_float_t *PrefetchBandwidth,
+ dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch,
+ dml_float_t *FractionOfUrgentBandwidth,
+ dml_bool_t *PrefetchBandwidthSupport)
+{
+ dml_bool_t NotEnoughUrgentLatencyHiding = false;
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (NotUrgentLatencyHiding[k]) {
+ NotEnoughUrgentLatencyHiding = true;
+ }
+ }
+
+ *PrefetchBandwidth = 0;
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ *PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
+ ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]),
+ NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ }
+
+ *PrefetchBandwidthNotIncludingMALLPrefetch = 0;
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe)
+ *PrefetchBandwidthNotIncludingMALLPrefetch = *PrefetchBandwidthNotIncludingMALLPrefetch
+ + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
+ ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k]
+ + cursor_bw[k] * UrgentBurstFactorCursor[k]
+ + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]),
+ NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k]
+ + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k])
+ + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ }
+
+ *PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
+ *FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
+ dml_print("DML::%s: PrefetchBandwidth = %f\n", __func__, *PrefetchBandwidth);
+ dml_print("DML::%s: FractionOfUrgentBandwidth = %f\n", __func__, *FractionOfUrgentBandwidth);
+ dml_print("DML::%s: PrefetchBandwidthSupport = %u\n", __func__, *PrefetchBandwidthSupport);
+#endif
+}
+
+static noinline_for_stack dml_float_t CalculateBandwidthAvailableForImmediateFlip(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_float_t ReturnBW,
+ dml_float_t ReadBandwidthLuma[],
+ dml_float_t ReadBandwidthChroma[],
+ dml_float_t PrefetchBandwidthLuma[],
+ dml_float_t PrefetchBandwidthChroma[],
+ dml_float_t cursor_bw[],
+ dml_float_t cursor_bw_pre[],
+ dml_uint_t NumberOfDPP[],
+ dml_float_t UrgentBurstFactorLuma[],
+ dml_float_t UrgentBurstFactorChroma[],
+ dml_float_t UrgentBurstFactorCursor[],
+ dml_float_t UrgentBurstFactorLumaPre[],
+ dml_float_t UrgentBurstFactorChromaPre[],
+ dml_float_t UrgentBurstFactorCursorPre[])
+{
+ dml_float_t ret_val = ReturnBW;
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ ret_val = ret_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
+ NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) +
+ cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u\n", __func__, k);
+ dml_print("DML::%s: NumberOfDPP = %u\n", __func__, NumberOfDPP[k]);
+ dml_print("DML::%s: ReadBandwidthLuma = %f\n", __func__, ReadBandwidthLuma[k]);
+ dml_print("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, UrgentBurstFactorLuma[k]);
+ dml_print("DML::%s: ReadBandwidthChroma = %f\n", __func__, ReadBandwidthChroma[k]);
+ dml_print("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, UrgentBurstFactorChroma[k]);
+ dml_print("DML::%s: cursor_bw = %f\n", __func__, cursor_bw[k]);
+ dml_print("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, UrgentBurstFactorCursor[k]);
+
+ dml_print("DML::%s: PrefetchBandwidthLuma = %f\n", __func__, PrefetchBandwidthLuma[k]);
+ dml_print("DML::%s: UrgentBurstFactorLumaPre = %f\n", __func__, UrgentBurstFactorLumaPre[k]);
+ dml_print("DML::%s: PrefetchBandwidthChroma = %f\n", __func__, PrefetchBandwidthChroma[k]);
+ dml_print("DML::%s: UrgentBurstFactorChromaPre = %f\n", __func__, UrgentBurstFactorChromaPre[k]);
+ dml_print("DML::%s: cursor_bw_pre = %f\n", __func__, cursor_bw_pre[k]);
+ dml_print("DML::%s: UrgentBurstFactorCursorPre = %f\n", __func__, UrgentBurstFactorCursorPre[k]);
+ dml_print("DML::%s: ret_val = %f\n", __func__, ret_val);
+#endif
+ }
+
+ return ret_val;
+}
+
+static void CalculateImmediateFlipBandwithSupport(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_float_t ReturnBW,
+ enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ enum dml_immediate_flip_requirement ImmediateFlipRequirement[],
+ dml_float_t final_flip_bw[],
+ dml_float_t ReadBandwidthLuma[],
+ dml_float_t ReadBandwidthChroma[],
+ dml_float_t PrefetchBandwidthLuma[],
+ dml_float_t PrefetchBandwidthChroma[],
+ dml_float_t cursor_bw[],
+ dml_float_t meta_row_bandwidth[],
+ dml_float_t dpte_row_bandwidth[],
+ dml_float_t cursor_bw_pre[],
+ dml_float_t prefetch_vmrow_bw[],
+ dml_uint_t NumberOfDPP[],
+ dml_float_t UrgentBurstFactorLuma[],
+ dml_float_t UrgentBurstFactorChroma[],
+ dml_float_t UrgentBurstFactorCursor[],
+ dml_float_t UrgentBurstFactorLumaPre[],
+ dml_float_t UrgentBurstFactorChromaPre[],
+ dml_float_t UrgentBurstFactorCursorPre[],
+
+ // Output
+ dml_float_t *TotalBandwidth,
+ dml_float_t *TotalBandwidthNotIncludingMALLPrefetch,
+ dml_float_t *FractionOfUrgentBandwidth,
+ dml_bool_t *ImmediateFlipBandwidthSupport)
+{
+ *TotalBandwidth = 0;
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (ImmediateFlipRequirement[k] != dml_immediate_flip_not_required) {
+
+
+
+ *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
+ NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
+ NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ } else {
+ *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
+ NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
+ NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k = %u\n", __func__, k);
+ dml_print("DML::%s: ImmediateFlipRequirement = %u\n", __func__, ImmediateFlipRequirement[k]);
+ dml_print("DML::%s: TotalBandwidth = %f\n", __func__, *TotalBandwidth);
+ dml_print("DML::%s: NumberOfDPP = %u\n", __func__, NumberOfDPP[k]);
+ dml_print("DML::%s: prefetch_vmrow_bw = %f\n", __func__, prefetch_vmrow_bw[k]);
+ dml_print("DML::%s: final_flip_bw = %f\n", __func__, final_flip_bw[k]);
+ dml_print("DML::%s: ReadBandwidthLuma = %f\n", __func__, ReadBandwidthLuma[k]);
+ dml_print("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, UrgentBurstFactorLuma[k]);
+ dml_print("DML::%s: ReadBandwidthChroma = %f\n", __func__, ReadBandwidthChroma[k]);
+ dml_print("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, UrgentBurstFactorChroma[k]);
+ dml_print("DML::%s: cursor_bw = %f\n", __func__, cursor_bw[k]);
+ dml_print("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, UrgentBurstFactorCursor[k]);
+ dml_print("DML::%s: PrefetchBandwidthLuma = %f\n", __func__, PrefetchBandwidthLuma[k]);
+ dml_print("DML::%s: UrgentBurstFactorLumaPre = %f\n", __func__, UrgentBurstFactorLumaPre[k]);
+ dml_print("DML::%s: PrefetchBandwidthChroma = %f\n", __func__, PrefetchBandwidthChroma[k]);
+ dml_print("DML::%s: UrgentBurstFactorChromaPre = %f\n", __func__, UrgentBurstFactorChromaPre[k]);
+ dml_print("DML::%s: cursor_bw_pre = %f\n", __func__, cursor_bw_pre[k]);
+ dml_print("DML::%s: UrgentBurstFactorCursorPre = %f\n", __func__, UrgentBurstFactorCursorPre[k]);
+ dml_print("DML::%s: meta_row_bandwidth = %f\n", __func__, meta_row_bandwidth[k]);
+ dml_print("DML::%s: dpte_row_bandwidth = %f\n", __func__, dpte_row_bandwidth[k]);
+#endif
+ }
+
+ *TotalBandwidthNotIncludingMALLPrefetch = 0;
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) {
+ if (ImmediateFlipRequirement[k] != dml_immediate_flip_not_required)
+ *TotalBandwidthNotIncludingMALLPrefetch = *TotalBandwidthNotIncludingMALLPrefetch + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
+ NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
+ NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k])
+ + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ else
+ *TotalBandwidthNotIncludingMALLPrefetch = *TotalBandwidthNotIncludingMALLPrefetch + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
+ NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k])
+ + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
+ NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k])
+ + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ }
+ }
+
+ *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
+ *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
+ dml_print("DML::%s: TotalBandwidth = %f\n", __func__, *TotalBandwidth);
+ dml_print("DML::%s: ImmediateFlipBandwidthSupport = %u\n", __func__, *ImmediateFlipBandwidthSupport);
+#endif
+}
+
+static dml_uint_t MicroSecToVertLines(dml_uint_t num_us, dml_uint_t h_total, dml_float_t pixel_clock)
+{
+ dml_uint_t lines_time_in_ns = 1000.0 * (h_total * 1000.0) / (pixel_clock * 1000.0);
+
+ return dml_ceil(1000.0 * num_us / lines_time_in_ns, 1.0);
+}
+
+/// @brief Calculate the maximum vstartup for mode support and mode programming consideration
+/// Bounded by min of actual vblank and input vblank_nom, dont want vstartup/ready to start too early if actual vbllank is huge
+static dml_uint_t CalculateMaxVStartup(
+ dml_uint_t plane_idx,
+ dml_bool_t ptoi_supported,
+ dml_uint_t vblank_nom_default_us,
+ struct dml_timing_cfg_st *timing,
+ dml_float_t write_back_delay_us)
+{
+ dml_uint_t vblank_size = 0;
+ dml_uint_t max_vstartup_lines = 0;
+ const dml_uint_t max_allowed_vblank_nom = 1023;
+
+ dml_float_t line_time_us = (dml_float_t) timing->HTotal[plane_idx] / timing->PixelClock[plane_idx];
+ dml_uint_t vblank_actual = timing->VTotal[plane_idx] - timing->VActive[plane_idx];
+
+ dml_uint_t vblank_nom_default_in_line = MicroSecToVertLines(vblank_nom_default_us, timing->HTotal[plane_idx],
+ timing->PixelClock[plane_idx]);
+ dml_uint_t vblank_nom_input = (dml_uint_t)dml_min(vblank_actual, vblank_nom_default_in_line);
+
+ // vblank_nom should not be smaller than (VSync (VTotal - VActive - VFrontPorch) + 2)
+ // + 2 is because
+ // 1 -> VStartup_start should be 1 line before VSync
+ // 1 -> always reserve 1 line between start of VBlank to VStartup signal
+ dml_uint_t vblank_nom_vsync_capped = dml_max(vblank_nom_input,
+ timing->VTotal[plane_idx] - timing->VActive[plane_idx] - timing->VFrontPorch[plane_idx] + 2);
+ dml_uint_t vblank_nom_max_allowed_capped = dml_min(vblank_nom_vsync_capped, max_allowed_vblank_nom);
+ dml_uint_t vblank_avail = (vblank_nom_max_allowed_capped == 0) ?
+ vblank_nom_default_in_line : vblank_nom_max_allowed_capped;
+
+ vblank_size = (dml_uint_t) dml_min(vblank_actual, vblank_avail);
+
+ if (timing->Interlace[plane_idx] && !ptoi_supported)
+ max_vstartup_lines = (dml_uint_t) (dml_floor(vblank_size/2.0, 1.0));
+ else
+ max_vstartup_lines = vblank_size - (dml_uint_t) dml_max(1.0, dml_ceil(write_back_delay_us/line_time_us, 1.0));
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: plane_idx = %u\n", __func__, plane_idx);
+ dml_print("DML::%s: VBlankNom = %u\n", __func__, timing->VBlankNom[plane_idx]);
+ dml_print("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us);
+ dml_print("DML::%s: line_time_us = %f\n", __func__, line_time_us);
+ dml_print("DML::%s: vblank_actual = %u\n", __func__, vblank_actual);
+ dml_print("DML::%s: vblank_avail = %u\n", __func__, vblank_avail);
+ dml_print("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines);
+#endif
+ max_vstartup_lines = (dml_uint_t) dml_min(max_vstartup_lines, DML_MAX_VSTARTUP_START);
+ return max_vstartup_lines;
+}
+
+static noinline_for_stack void set_calculate_prefetch_schedule_params(struct display_mode_lib_st *mode_lib,
+ struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params,
+ dml_uint_t j,
+ dml_uint_t k)
+{
+ CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelayPerState[k];
+ CalculatePrefetchSchedule_params->EnhancedPrefetchScheduleAccelerationFinal = mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal;
+ CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ms.ip.dppclk_delay_subtotal + mode_lib->ms.ip.dppclk_delay_cnvc_formatter;
+ CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ms.ip.dppclk_delay_scl;
+ CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ms.ip.dppclk_delay_scl_lb_only;
+ CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ms.ip.dppclk_delay_cnvc_cursor;
+ CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ms.ip.dispclk_delay_subtotal;
+ CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (dml_uint_t)(mode_lib->ms.SwathWidthYThisState[k] / mode_lib->ms.cache_display_cfg.plane.HRatio[k]);
+ CalculatePrefetchSchedule_params->OutputFormat = mode_lib->ms.cache_display_cfg.output.OutputFormat[k];
+ CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters;
+ CalculatePrefetchSchedule_params->GPUVMPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
+ CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
+ CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
+ CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
+ CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
+ CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k];
+ CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
+ CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k];
+ CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes[k];
+ CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency;
+ CalculatePrefetchSchedule_params->UrgentExtraLatency = mode_lib->ms.ExtraLatency;
+ CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc;
+ CalculatePrefetchSchedule_params->PDEAndMetaPTEBytesFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k];
+ CalculatePrefetchSchedule_params->MetaRowByte = mode_lib->ms.MetaRowBytes[j][k];
+ CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[j][k];
+ CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[j][k];
+ CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k];
+ CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwY[k];
+ CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[j][k];
+ CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k];
+ CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwC[k];
+ CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub_this_state[k];
+ CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub_this_state[k];
+ CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState[k];
+ CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState[k];
+ CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait;
+ CalculatePrefetchSchedule_params->DestinationLinesForPrefetch = &mode_lib->ms.LineTimesForPrefetch[k];
+ CalculatePrefetchSchedule_params->DestinationLinesToRequestVMInVBlank = &mode_lib->ms.LinesForMetaPTE[k];
+ CalculatePrefetchSchedule_params->DestinationLinesToRequestRowInVBlank = &mode_lib->ms.LinesForMetaAndDPTERow[k];
+ CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[j][k];
+ CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[j][k];
+ CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k];
+ CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k];
+ CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.support.NoTimeForDynamicMetadata[j][k];
+ CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k];
+}
+
+static noinline_for_stack void dml_prefetch_check(struct display_mode_lib_st *mode_lib)
+{
+ struct dml_core_mode_support_locals_st *s = &mode_lib->scratch.dml_core_mode_support_locals;
+ struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
+ struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
+ struct DmlPipe *myPipe;
+ dml_uint_t j, k;
+
+ for (j = 0; j < 2; ++j) {
+ mode_lib->ms.TimeCalc = 24 / mode_lib->ms.ProjectedDCFCLKDeepSleep[j];
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k];
+ mode_lib->ms.swath_width_luma_ub_this_state[k] = mode_lib->ms.swath_width_luma_ub_all_states[j][k];
+ mode_lib->ms.swath_width_chroma_ub_this_state[k] = mode_lib->ms.swath_width_chroma_ub_all_states[j][k];
+ mode_lib->ms.SwathWidthYThisState[k] = mode_lib->ms.SwathWidthYAllStates[j][k];
+ mode_lib->ms.SwathWidthCThisState[k] = mode_lib->ms.SwathWidthCAllStates[j][k];
+ mode_lib->ms.SwathHeightYThisState[k] = mode_lib->ms.SwathHeightYAllStates[j][k];
+ mode_lib->ms.SwathHeightCThisState[k] = mode_lib->ms.SwathHeightCAllStates[j][k];
+ mode_lib->ms.UnboundedRequestEnabledThisState = mode_lib->ms.UnboundedRequestEnabledAllStates[j];
+ mode_lib->ms.CompressedBufferSizeInkByteThisState = mode_lib->ms.CompressedBufferSizeInkByteAllStates[j];
+ mode_lib->ms.DETBufferSizeInKByteThisState[k] = mode_lib->ms.DETBufferSizeInKByteAllStates[j][k];
+ mode_lib->ms.DETBufferSizeYThisState[k] = mode_lib->ms.DETBufferSizeYAllStates[j][k];
+ mode_lib->ms.DETBufferSizeCThisState[k] = mode_lib->ms.DETBufferSizeCAllStates[j][k];
+ }
+
+ mode_lib->ms.support.VActiveBandwithSupport[j] = CalculateVActiveBandwithSupport(
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.ReturnBWPerState[j],
+ mode_lib->ms.NotUrgentLatencyHiding,
+ mode_lib->ms.ReadBandwidthLuma,
+ mode_lib->ms.ReadBandwidthChroma,
+ mode_lib->ms.cursor_bw,
+ mode_lib->ms.meta_row_bandwidth_this_state,
+ mode_lib->ms.dpte_row_bandwidth_this_state,
+ mode_lib->ms.NoOfDPPThisState,
+ mode_lib->ms.UrgentBurstFactorLuma[j],
+ mode_lib->ms.UrgentBurstFactorChroma[j],
+ mode_lib->ms.UrgentBurstFactorCursor[j]);
+
+ s->VMDataOnlyReturnBWPerState = dml_get_return_bw_mbps_vm_only(
+ &mode_lib->ms.soc,
+ mode_lib->ms.state.use_ideal_dram_bw_strobe,
+ mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
+ mode_lib->ms.DCFCLKState[j],
+ mode_lib->ms.state.fabricclk_mhz,
+ mode_lib->ms.state.dram_speed_mts);
+
+ s->HostVMInefficiencyFactor = 1;
+ if (mode_lib->ms.cache_display_cfg.plane.GPUVMEnable && mode_lib->ms.cache_display_cfg.plane.HostVMEnable)
+ s->HostVMInefficiencyFactor = mode_lib->ms.ReturnBWPerState[j] / s->VMDataOnlyReturnBWPerState;
+
+ mode_lib->ms.ExtraLatency = CalculateExtraLatency(
+ mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles,
+ s->ReorderingBytes,
+ mode_lib->ms.DCFCLKState[j],
+ mode_lib->ms.TotalNumberOfActiveDPP[j],
+ mode_lib->ms.ip.pixel_chunk_size_kbytes,
+ mode_lib->ms.TotalNumberOfDCCActiveDPP[j],
+ mode_lib->ms.ip.meta_chunk_size_kbytes,
+ mode_lib->ms.ReturnBWPerState[j],
+ mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
+ mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.NoOfDPPThisState,
+ mode_lib->ms.dpte_group_bytes,
+ s->HostVMInefficiencyFactor,
+ mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
+ mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels);
+
+ s->NextMaxVStartup = s->MaxVStartupAllPlanes[j];
+ s->MaxVStartup = 0;
+ s->AllPrefetchModeTested = true;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ CalculatePrefetchMode(mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k], &s->MinPrefetchMode[k], &s->MaxPrefetchMode[k]);
+ s->NextPrefetchMode[k] = s->MinPrefetchMode[k];
+ }
+
+ do {
+ s->MaxVStartup = s->NextMaxVStartup;
+ s->AllPrefetchModeTested = true;
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ mode_lib->ms.PrefetchMode[k] = s->NextPrefetchMode[k];
+ mode_lib->ms.TWait = CalculateTWait(
+ mode_lib->ms.PrefetchMode[k],
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
+ mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ mode_lib->ms.cache_display_cfg.timing.DRRDisplay[k],
+ mode_lib->ms.state.dram_clock_change_latency_us,
+ mode_lib->ms.state.fclk_change_latency_us,
+ mode_lib->ms.UrgLatency,
+ mode_lib->ms.state.sr_enter_plus_exit_time_us);
+
+ myPipe = &s->myPipe;
+ myPipe->Dppclk = mode_lib->ms.RequiredDPPCLKPerSurface[j][k];
+ myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK[j];
+ myPipe->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k];
+ myPipe->DCFClkDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep[j];
+ myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[j][k];
+ myPipe->ScalerEnabled = mode_lib->ms.cache_display_cfg.plane.ScalerEnabled[k];
+ myPipe->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k];
+ myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
+ myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
+ myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
+ myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
+ myPipe->InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k];
+ myPipe->NumberOfCursors = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k];
+ myPipe->VBlank = mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k];
+ myPipe->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k];
+ myPipe->HActive = mode_lib->ms.cache_display_cfg.timing.HActive[k];
+ myPipe->DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k];
+ myPipe->ODMMode = mode_lib->ms.ODMModePerState[k];
+ myPipe->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k];
+ myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
+ myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
+ myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Calling CalculatePrefetchSchedule for j=%u, k=%u\n", __func__, j, k);
+ dml_print("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[j][k]);
+ dml_print("DML::%s: MaxVStartup = %u\n", __func__, s->MaxVStartup);
+ dml_print("DML::%s: NextPrefetchMode = %u\n", __func__, s->NextPrefetchMode[k]);
+ dml_print("DML::%s: AllowForPStateChangeOrStutterInVBlank = %u\n", __func__, mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k]);
+ dml_print("DML::%s: PrefetchMode = %u\n", __func__, mode_lib->ms.PrefetchMode[k]);
+#endif
+
+ CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactor;
+ CalculatePrefetchSchedule_params->myPipe = myPipe;
+ CalculatePrefetchSchedule_params->VStartup = (dml_uint_t)(dml_min(s->MaxVStartup, s->MaximumVStartup[j][k]));
+ CalculatePrefetchSchedule_params->MaxVStartup = s->MaximumVStartup[j][k];
+ CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k];
+ CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k];
+ CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k];
+ CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0];
+ CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1];
+ CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2];
+ CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0];
+ CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1];
+ CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2];
+
+ set_calculate_prefetch_schedule_params(mode_lib, CalculatePrefetchSchedule_params, j, k);
+
+ mode_lib->ms.support.NoTimeForPrefetch[j][k] =
+ CalculatePrefetchSchedule(&mode_lib->scratch,
+ CalculatePrefetchSchedule_params);
+ }
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ CalculateUrgentBurstFactor(
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
+ mode_lib->ms.swath_width_luma_ub_this_state[k],
+ mode_lib->ms.swath_width_chroma_ub_this_state[k],
+ mode_lib->ms.SwathHeightYThisState[k],
+ mode_lib->ms.SwathHeightCThisState[k],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ mode_lib->ms.UrgLatency,
+ mode_lib->ms.ip.cursor_buffer_size,
+ mode_lib->ms.cache_display_cfg.plane.CursorWidth[k],
+ mode_lib->ms.cache_display_cfg.plane.CursorBPP[k],
+ mode_lib->ms.VRatioPreY[j][k],
+ mode_lib->ms.VRatioPreC[j][k],
+ mode_lib->ms.BytePerPixelInDETY[k],
+ mode_lib->ms.BytePerPixelInDETC[k],
+ mode_lib->ms.DETBufferSizeYThisState[k],
+ mode_lib->ms.DETBufferSizeCThisState[k],
+ /* Output */
+ &mode_lib->ms.UrgentBurstFactorCursorPre[k],
+ &mode_lib->ms.UrgentBurstFactorLumaPre[k],
+ &mode_lib->ms.UrgentBurstFactorChromaPre[k],
+ &mode_lib->ms.NotUrgentLatencyHidingPre[k]);
+
+ mode_lib->ms.cursor_bw_pre[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] *
+ mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] /
+ mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.VRatioPreY[j][k];
+ }
+
+ {
+ CalculatePrefetchBandwithSupport(
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.ReturnBWPerState[j],
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
+ mode_lib->ms.NotUrgentLatencyHidingPre,
+ mode_lib->ms.ReadBandwidthLuma,
+ mode_lib->ms.ReadBandwidthChroma,
+ mode_lib->ms.RequiredPrefetchPixelDataBWLuma,
+ mode_lib->ms.RequiredPrefetchPixelDataBWChroma,
+ mode_lib->ms.cursor_bw,
+ mode_lib->ms.meta_row_bandwidth_this_state,
+ mode_lib->ms.dpte_row_bandwidth_this_state,
+ mode_lib->ms.cursor_bw_pre,
+ mode_lib->ms.prefetch_vmrow_bw,
+ mode_lib->ms.NoOfDPPThisState,
+ mode_lib->ms.UrgentBurstFactorLuma[j],
+ mode_lib->ms.UrgentBurstFactorChroma[j],
+ mode_lib->ms.UrgentBurstFactorCursor[j],
+ mode_lib->ms.UrgentBurstFactorLumaPre,
+ mode_lib->ms.UrgentBurstFactorChromaPre,
+ mode_lib->ms.UrgentBurstFactorCursorPre,
+
+ /* output */
+ &s->dummy_single[0], // dml_float_t *PrefetchBandwidth
+ &s->dummy_single[1], // dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch
+ &mode_lib->mp.FractionOfUrgentBandwidth, // dml_float_t *FractionOfUrgentBandwidth
+ &mode_lib->ms.support.PrefetchSupported[j]);
+ }
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.LineTimesForPrefetch[k] < 2.0
+ || mode_lib->ms.LinesForMetaPTE[k] >= 32.0
+ || mode_lib->ms.LinesForMetaAndDPTERow[k] >= 16.0
+ || mode_lib->ms.support.NoTimeForPrefetch[j][k] == true) {
+ mode_lib->ms.support.PrefetchSupported[j] = false;
+ }
+ }
+
+ mode_lib->ms.support.DynamicMetadataSupported[j] = true;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.support.NoTimeForDynamicMetadata[j][k] == true) {
+ mode_lib->ms.support.DynamicMetadataSupported[j] = false;
+ }
+ }
+
+ mode_lib->ms.support.VRatioInPrefetchSupported[j] = true;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.support.NoTimeForPrefetch[j][k] == true ||
+ mode_lib->ms.VRatioPreY[j][k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ ||
+ mode_lib->ms.VRatioPreC[j][k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ ||
+ ((s->MaxVStartup < s->MaximumVStartup[j][k] || mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal == 0) &&
+ (mode_lib->ms.VRatioPreY[j][k] > __DML_MAX_VRATIO_PRE__ || mode_lib->ms.VRatioPreC[j][k] > __DML_MAX_VRATIO_PRE__))) {
+ mode_lib->ms.support.VRatioInPrefetchSupported[j] = false;
+ }
+ }
+
+ s->AnyLinesForVMOrRowTooLarge = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.LinesForMetaAndDPTERow[k] >= 16 || mode_lib->ms.LinesForMetaPTE[k] >= 32) {
+ s->AnyLinesForVMOrRowTooLarge = true;
+ }
+ }
+
+ if (mode_lib->ms.support.PrefetchSupported[j] == true && mode_lib->ms.support.VRatioInPrefetchSupported[j] == true) {
+ mode_lib->ms.BandwidthAvailableForImmediateFlip = CalculateBandwidthAvailableForImmediateFlip(
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.ReturnBWPerState[j],
+ mode_lib->ms.ReadBandwidthLuma,
+ mode_lib->ms.ReadBandwidthChroma,
+ mode_lib->ms.RequiredPrefetchPixelDataBWLuma,
+ mode_lib->ms.RequiredPrefetchPixelDataBWChroma,
+ mode_lib->ms.cursor_bw,
+ mode_lib->ms.cursor_bw_pre,
+ mode_lib->ms.NoOfDPPThisState,
+ mode_lib->ms.UrgentBurstFactorLuma[j],
+ mode_lib->ms.UrgentBurstFactorChroma[j],
+ mode_lib->ms.UrgentBurstFactorCursor[j],
+ mode_lib->ms.UrgentBurstFactorLumaPre,
+ mode_lib->ms.UrgentBurstFactorChromaPre,
+ mode_lib->ms.UrgentBurstFactorCursorPre);
+
+ mode_lib->ms.TotImmediateFlipBytes = 0;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (!(mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_not_required)) {
+ mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * (mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k] + mode_lib->ms.MetaRowBytes[j][k]);
+ if (mode_lib->ms.use_one_row_for_frame_flip[j][k]) {
+ mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * (2 * mode_lib->ms.DPTEBytesPerRow[j][k]);
+ } else {
+ mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * mode_lib->ms.DPTEBytesPerRow[j][k];
+ }
+ }
+ }
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ CalculateFlipSchedule(
+ s->HostVMInefficiencyFactor,
+ mode_lib->ms.ExtraLatency,
+ mode_lib->ms.UrgLatency,
+ mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels,
+ mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
+ mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels,
+ mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
+ mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
+ mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k],
+ mode_lib->ms.MetaRowBytes[j][k],
+ mode_lib->ms.DPTEBytesPerRow[j][k],
+ mode_lib->ms.BandwidthAvailableForImmediateFlip,
+ mode_lib->ms.TotImmediateFlipBytes,
+ mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
+ (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]),
+ mode_lib->ms.cache_display_cfg.plane.VRatio[k],
+ mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
+ mode_lib->ms.Tno_bw[k],
+ mode_lib->ms.cache_display_cfg.surface.DCCEnable[k],
+ mode_lib->ms.dpte_row_height[k],
+ mode_lib->ms.meta_row_height[k],
+ mode_lib->ms.dpte_row_height_chroma[k],
+ mode_lib->ms.meta_row_height_chroma[k],
+ mode_lib->ms.use_one_row_for_frame_flip[j][k], // 24
+
+ /* Output */
+ &mode_lib->ms.DestinationLinesToRequestVMInImmediateFlip[k],
+ &mode_lib->ms.DestinationLinesToRequestRowInImmediateFlip[k],
+ &mode_lib->ms.final_flip_bw[k],
+ &mode_lib->ms.ImmediateFlipSupportedForPipe[k]);
+ }
+
+ {
+ CalculateImmediateFlipBandwithSupport(mode_lib->ms.num_active_planes,
+ mode_lib->ms.ReturnBWPerState[j],
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
+ mode_lib->ms.policy.ImmediateFlipRequirement,
+ mode_lib->ms.final_flip_bw,
+ mode_lib->ms.ReadBandwidthLuma,
+ mode_lib->ms.ReadBandwidthChroma,
+ mode_lib->ms.RequiredPrefetchPixelDataBWLuma,
+ mode_lib->ms.RequiredPrefetchPixelDataBWChroma,
+ mode_lib->ms.cursor_bw,
+ mode_lib->ms.meta_row_bandwidth_this_state,
+ mode_lib->ms.dpte_row_bandwidth_this_state,
+ mode_lib->ms.cursor_bw_pre,
+ mode_lib->ms.prefetch_vmrow_bw,
+ mode_lib->ms.NoOfDPP[j], // VBA_ERROR DPPPerSurface is not assigned at this point, should use NoOfDpp here
+ mode_lib->ms.UrgentBurstFactorLuma[j],
+ mode_lib->ms.UrgentBurstFactorChroma[j],
+ mode_lib->ms.UrgentBurstFactorCursor[j],
+ mode_lib->ms.UrgentBurstFactorLumaPre,
+ mode_lib->ms.UrgentBurstFactorChromaPre,
+ mode_lib->ms.UrgentBurstFactorCursorPre,
+
+ /* output */
+ &s->dummy_single[0], // dml_float_t *TotalBandwidth
+ &s->dummy_single[1], // dml_float_t *TotalBandwidthNotIncludingMALLPrefetch
+ &s->dummy_single[2], // dml_float_t *FractionOfUrgentBandwidth
+ &mode_lib->ms.support.ImmediateFlipSupportedForState[j]); // dml_bool_t *ImmediateFlipBandwidthSupport
+ }
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (!(mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_not_required) && (mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false))
+ mode_lib->ms.support.ImmediateFlipSupportedForState[j] = false;
+ }
+
+ } else { // if prefetch not support, assume iflip not supported
+ mode_lib->ms.support.ImmediateFlipSupportedForState[j] = false;
+ }
+
+ if (s->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || s->AnyLinesForVMOrRowTooLarge == false) {
+ s->NextMaxVStartup = s->MaxVStartupAllPlanes[j];
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ s->NextPrefetchMode[k] = s->NextPrefetchMode[k] + 1;
+
+ if (s->NextPrefetchMode[k] <= s->MaxPrefetchMode[k])
+ s->AllPrefetchModeTested = false;
+ }
+ } else {
+ s->NextMaxVStartup = s->NextMaxVStartup - 1;
+ }
+ } while (!((mode_lib->ms.support.PrefetchSupported[j] == true && mode_lib->ms.support.DynamicMetadataSupported[j] == true &&
+ mode_lib->ms.support.VRatioInPrefetchSupported[j] == true &&
+ // consider flip support is okay if when there is no hostvm and the user does't require a iflip OR the flip bw is ok
+ // If there is hostvm, DCN needs to support iflip for invalidation
+ ((s->ImmediateFlipRequiredFinal) || mode_lib->ms.support.ImmediateFlipSupportedForState[j] == true)) ||
+ (s->NextMaxVStartup == s->MaxVStartupAllPlanes[j] && s->AllPrefetchModeTested)));
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.use_one_row_for_frame_this_state[k] = mode_lib->ms.use_one_row_for_frame[j][k];
+ }
+
+ s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency;
+ s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency;
+ s->mSOCParameters.WritebackLatency = mode_lib->ms.state.writeback_latency_us;
+ s->mSOCParameters.DRAMClockChangeLatency = mode_lib->ms.state.dram_clock_change_latency_us;
+ s->mSOCParameters.FCLKChangeLatency = mode_lib->ms.state.fclk_change_latency_us;
+ s->mSOCParameters.SRExitTime = mode_lib->ms.state.sr_exit_time_us;
+ s->mSOCParameters.SREnterPlusExitTime = mode_lib->ms.state.sr_enter_plus_exit_time_us;
+ s->mSOCParameters.SRExitZ8Time = mode_lib->ms.state.sr_exit_z8_time_us;
+ s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->ms.state.sr_enter_plus_exit_z8_time_us;
+ s->mSOCParameters.USRRetrainingLatency = mode_lib->ms.state.usr_retraining_latency_us;
+ s->mSOCParameters.SMNLatency = mode_lib->ms.soc.smn_latency_us;
+
+ CalculateWatermarks_params->USRRetrainingRequiredFinal = mode_lib->ms.policy.USRRetrainingRequiredFinal;
+ CalculateWatermarks_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
+ CalculateWatermarks_params->PrefetchMode = mode_lib->ms.PrefetchMode;
+ CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
+ CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ms.ip.max_line_buffer_lines;
+ CalculateWatermarks_params->LineBufferSize = mode_lib->ms.ip.line_buffer_size_bits;
+ CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ms.ip.writeback_interface_buffer_size_kbytes;
+ CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLKState[j];
+ CalculateWatermarks_params->ReturnBW = mode_lib->ms.ReturnBWPerState[j];
+ CalculateWatermarks_params->SynchronizeTimingsFinal = mode_lib->ms.policy.SynchronizeTimingsFinal;
+ CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal;
+ CalculateWatermarks_params->DRRDisplay = mode_lib->ms.cache_display_cfg.timing.DRRDisplay;
+ CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
+ CalculateWatermarks_params->meta_row_height = mode_lib->ms.meta_row_height;
+ CalculateWatermarks_params->meta_row_height_chroma = mode_lib->ms.meta_row_height_chroma;
+ CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters;
+ CalculateWatermarks_params->WritebackChunkSize = mode_lib->ms.ip.writeback_chunk_size_kbytes;
+ CalculateWatermarks_params->SOCCLK = mode_lib->ms.state.socclk_mhz;
+ CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep[j];
+ CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeYThisState;
+ CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeCThisState;
+ CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState;
+ CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState;
+ CalculateWatermarks_params->LBBitPerPixel = mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel;
+ CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthYThisState;
+ CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthCThisState;
+ CalculateWatermarks_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio;
+ CalculateWatermarks_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma;
+ CalculateWatermarks_params->VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps;
+ CalculateWatermarks_params->VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma;
+ CalculateWatermarks_params->VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio;
+ CalculateWatermarks_params->VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma;
+ CalculateWatermarks_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal;
+ CalculateWatermarks_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal;
+ CalculateWatermarks_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive;
+ CalculateWatermarks_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock;
+ CalculateWatermarks_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
+ CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPPThisState;
+ CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY;
+ CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC;
+ CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler;
+ CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler;
+ CalculateWatermarks_params->WritebackEnable = mode_lib->ms.cache_display_cfg.writeback.WritebackEnable;
+ CalculateWatermarks_params->WritebackPixelFormat = mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat;
+ CalculateWatermarks_params->WritebackDestinationWidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth;
+ CalculateWatermarks_params->WritebackDestinationHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight;
+ CalculateWatermarks_params->WritebackSourceHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight;
+ CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabledThisState;
+ CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByteThisState;
+
+ // Output
+ CalculateWatermarks_params->Watermark = &s->dummy_watermark; // Watermarks *Watermark
+ CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[j];
+ CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[]
+ CalculateWatermarks_params->SubViewportLinesNeededInMALL = &mode_lib->ms.SubViewportLinesNeededInMALL[j]; // dml_uint_t SubViewportLinesNeededInMALL[]
+ CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[j];
+ CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // dml_float_t *MaxActiveFCLKChangeLatencySupported
+ CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[j];
+ CalculateWatermarks_params->ActiveDRAMClockChangeLatencyMargin = mode_lib->ms.support.ActiveDRAMClockChangeLatencyMargin;
+
+ CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch,
+ CalculateWatermarks_params);
+
+ } // for j
+}
+
+/// @brief The Mode Support function.
+dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib)
+{
+ struct dml_core_mode_support_locals_st *s = &mode_lib->scratch.dml_core_mode_support_locals;
+ struct UseMinimumDCFCLK_params_st *UseMinimumDCFCLK_params = &mode_lib->scratch.UseMinimumDCFCLK_params;
+ struct CalculateSwathAndDETConfiguration_params_st *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
+ struct CalculateVMRowAndSwath_params_st *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
+
+ dml_uint_t j, k, m;
+
+ mode_lib->ms.num_active_planes = dml_get_num_active_planes(&mode_lib->ms.cache_display_cfg);
+ dml_print("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes);
+
+ CalculateMaxDETAndMinCompressedBufferSize(
+ mode_lib->ms.ip.config_return_buffer_size_in_kbytes,
+ mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes,
+ mode_lib->ms.ip.rob_buffer_size_kbytes,
+ mode_lib->ms.ip.max_num_dpp,
+ mode_lib->ms.policy.NomDETInKByteOverrideEnable, // VBA_DELTA
+ mode_lib->ms.policy.NomDETInKByteOverrideValue, // VBA_DELTA
+
+ /* Output */
+ &mode_lib->ms.MaxTotalDETInKByte,
+ &mode_lib->ms.NomDETInKByte,
+ &mode_lib->ms.MinCompressedBufferSizeInKByte);
+
+ PixelClockAdjustmentForProgressiveToInterlaceUnit(&mode_lib->ms.cache_display_cfg, mode_lib->ms.ip.ptoi_supported);
+
+
+ /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
+
+ /*Scale Ratio, taps Support Check*/
+ mode_lib->ms.support.ScaleRatioAndTapsSupport = true;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.cache_display_cfg.plane.ScalerEnabled[k] == false
+ && ((mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_16
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_8
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe_alpha)
+ || mode_lib->ms.cache_display_cfg.plane.HRatio[k] != 1.0
+ || mode_lib->ms.cache_display_cfg.plane.HTaps[k] != 1.0
+ || mode_lib->ms.cache_display_cfg.plane.VRatio[k] != 1.0
+ || mode_lib->ms.cache_display_cfg.plane.VTaps[k] != 1.0)) {
+ mode_lib->ms.support.ScaleRatioAndTapsSupport = false;
+ } else if (mode_lib->ms.cache_display_cfg.plane.VTaps[k] < 1.0 || mode_lib->ms.cache_display_cfg.plane.VTaps[k] > 8.0
+ || mode_lib->ms.cache_display_cfg.plane.HTaps[k] < 1.0 || mode_lib->ms.cache_display_cfg.plane.HTaps[k] > 8.0
+ || (mode_lib->ms.cache_display_cfg.plane.HTaps[k] > 1.0 && (mode_lib->ms.cache_display_cfg.plane.HTaps[k] % 2) == 1)
+ || mode_lib->ms.cache_display_cfg.plane.HRatio[k] > mode_lib->ms.ip.max_hscl_ratio
+ || mode_lib->ms.cache_display_cfg.plane.VRatio[k] > mode_lib->ms.ip.max_vscl_ratio
+ || mode_lib->ms.cache_display_cfg.plane.HRatio[k] > mode_lib->ms.cache_display_cfg.plane.HTaps[k]
+ || mode_lib->ms.cache_display_cfg.plane.VRatio[k] > mode_lib->ms.cache_display_cfg.plane.VTaps[k]
+ || (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_16
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_8
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe
+ && (mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k] < 1 || mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k] > 8 || mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] < 1 || mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] > 8 ||
+ (mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] > 1 && mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] % 2 == 1) ||
+ mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k] > mode_lib->ms.ip.max_hscl_ratio ||
+ mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k] > mode_lib->ms.ip.max_vscl_ratio ||
+ mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k] > mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] ||
+ mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k] > mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k]))) {
+ mode_lib->ms.support.ScaleRatioAndTapsSupport = false;
+ }
+ }
+
+ /*Source Format, Pixel Format and Scan Support Check*/
+ mode_lib->ms.support.SourceFormatPixelAndScanSupport = true;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k] == dml_sw_linear && (!(!dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k])) || mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true)) {
+ mode_lib->ms.support.SourceFormatPixelAndScanSupport = false;
+ }
+ }
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ CalculateBytePerPixelAndBlockSizes(
+ mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
+ mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k],
+
+ /* Output */
+ &mode_lib->ms.BytePerPixelY[k],
+ &mode_lib->ms.BytePerPixelC[k],
+ &mode_lib->ms.BytePerPixelInDETY[k],
+ &mode_lib->ms.BytePerPixelInDETC[k],
+ &mode_lib->ms.Read256BlockHeightY[k],
+ &mode_lib->ms.Read256BlockHeightC[k],
+ &mode_lib->ms.Read256BlockWidthY[k],
+ &mode_lib->ms.Read256BlockWidthC[k],
+ &mode_lib->ms.MacroTileHeightY[k],
+ &mode_lib->ms.MacroTileHeightC[k],
+ &mode_lib->ms.MacroTileWidthY[k],
+ &mode_lib->ms.MacroTileWidthC[k]);
+ }
+
+ /*Bandwidth Support Check*/
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (!dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k])) {
+ mode_lib->ms.SwathWidthYSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportWidth[k];
+ mode_lib->ms.SwathWidthCSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma[k];
+ } else {
+ mode_lib->ms.SwathWidthYSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k];
+ mode_lib->ms.SwathWidthCSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k];
+ }
+ }
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ mode_lib->ms.ReadBandwidthLuma[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * dml_ceil(mode_lib->ms.BytePerPixelInDETY[k], 1.0) / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k];
+ mode_lib->ms.ReadBandwidthChroma[k] = mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * dml_ceil(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k] / 2.0;
+ }
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true
+ && mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k] == dml_444_64) {
+ mode_lib->ms.WriteBandwidth[k] = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k]
+ * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k]
+ / (mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k]
+ * mode_lib->ms.cache_display_cfg.timing.HTotal[k]
+ / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 8.0;
+ } else if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
+ mode_lib->ms.WriteBandwidth[k] = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k]
+ * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k]
+ / (mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k]
+ * mode_lib->ms.cache_display_cfg.timing.HTotal[k]
+ / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 4.0;
+ } else {
+ mode_lib->ms.WriteBandwidth[k] = 0.0;
+ }
+ }
+
+ /*Writeback Latency support check*/
+ mode_lib->ms.support.WritebackLatencySupport = true;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true &&
+ (mode_lib->ms.WriteBandwidth[k] > mode_lib->ms.ip.writeback_interface_buffer_size_kbytes * 1024 / mode_lib->ms.state.writeback_latency_us)) {
+ mode_lib->ms.support.WritebackLatencySupport = false;
+ }
+ }
+
+ /*Writeback Mode Support Check*/
+ s->TotalNumberOfActiveWriteback = 0;
+ for (k = 0; k <= (dml_uint_t) mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
+ s->TotalNumberOfActiveWriteback = s->TotalNumberOfActiveWriteback + 1;
+ }
+ }
+
+ mode_lib->ms.support.EnoughWritebackUnits = 1;
+ if (s->TotalNumberOfActiveWriteback > (dml_uint_t) mode_lib->ms.ip.max_num_wb) {
+ mode_lib->ms.support.EnoughWritebackUnits = false;
+ }
+
+ /*Writeback Scale Ratio and Taps Support Check*/
+ mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = true;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
+ if (mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k] > mode_lib->ms.ip.writeback_max_hscl_ratio
+ || mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k] > mode_lib->ms.ip.writeback_max_vscl_ratio
+ || mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k] < mode_lib->ms.ip.writeback_min_hscl_ratio
+ || mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k] < mode_lib->ms.ip.writeback_min_vscl_ratio
+ || mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k] > (dml_uint_t) mode_lib->ms.ip.writeback_max_hscl_taps
+ || mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k] > (dml_uint_t) mode_lib->ms.ip.writeback_max_vscl_taps
+ || mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k] > (dml_uint_t) mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k]
+ || mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k] > (dml_uint_t) mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k]
+ || (mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k] > 2.0 && ((mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k] % 2) == 1))) {
+ mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
+ }
+ if (2.0 * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] * (mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k] - 1) * 57 > mode_lib->ms.ip.writeback_line_buffer_buffer_size) {
+ mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
+ }
+ }
+ }
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ CalculateSinglePipeDPPCLKAndSCLThroughput(
+ mode_lib->ms.cache_display_cfg.plane.HRatio[k],
+ mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k],
+ mode_lib->ms.cache_display_cfg.plane.VRatio[k],
+ mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
+ mode_lib->ms.ip.max_dchub_pscl_bw_pix_per_clk,
+ mode_lib->ms.ip.max_pscl_lb_bw_pix_per_clk,
+ mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
+ mode_lib->ms.cache_display_cfg.plane.HTaps[k],
+ mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k],
+ mode_lib->ms.cache_display_cfg.plane.VTaps[k],
+ mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k],
+ /* Output */
+ &mode_lib->ms.PSCL_FACTOR[k],
+ &mode_lib->ms.PSCL_FACTOR_CHROMA[k],
+ &mode_lib->ms.MinDPPCLKUsingSingleDPP[k]);
+ }
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k] == dml_sw_linear) {
+ s->MaximumSwathWidthSupportLuma = 8192;
+ } else if (!dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k]) && mode_lib->ms.BytePerPixelC[k] > 0 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe_alpha) {
+ s->MaximumSwathWidthSupportLuma = 7680;
+ } else if (dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k]) && mode_lib->ms.BytePerPixelC[k] > 0 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe_alpha) {
+ s->MaximumSwathWidthSupportLuma = 4320;
+ } else if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_rgbe_alpha) {
+ s->MaximumSwathWidthSupportLuma = 3840;
+ } else if (dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k]) && mode_lib->ms.BytePerPixelY[k] == 8 && mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) {
+ s->MaximumSwathWidthSupportLuma = 3072;
+ } else {
+ s->MaximumSwathWidthSupportLuma = 6144;
+ }
+
+ if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_8 || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_10 || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_12) {
+ s->MaximumSwathWidthSupportChroma = (dml_uint_t)(s->MaximumSwathWidthSupportLuma / 2.0);
+ } else {
+ s->MaximumSwathWidthSupportChroma = s->MaximumSwathWidthSupportLuma;
+ }
+ mode_lib->ms.MaximumSwathWidthInLineBufferLuma = mode_lib->ms.ip.line_buffer_size_bits * dml_max(mode_lib->ms.cache_display_cfg.plane.HRatio[k], 1.0) / mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel[k] /
+ (mode_lib->ms.cache_display_cfg.plane.VTaps[k] + dml_max(dml_ceil(mode_lib->ms.cache_display_cfg.plane.VRatio[k], 1.0) - 2, 0.0));
+ if (mode_lib->ms.BytePerPixelC[k] == 0.0) {
+ mode_lib->ms.MaximumSwathWidthInLineBufferChroma = 0;
+ } else {
+ mode_lib->ms.MaximumSwathWidthInLineBufferChroma =
+ mode_lib->ms.ip.line_buffer_size_bits
+ * dml_max(mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k], 1.0)
+ / mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel[k]
+ / (mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k]
+ + dml_max(dml_ceil(mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], 1.0) - 2, 0.0));
+ }
+ mode_lib->ms.MaximumSwathWidthLuma[k] = dml_min(s->MaximumSwathWidthSupportLuma, mode_lib->ms.MaximumSwathWidthInLineBufferLuma);
+ mode_lib->ms.MaximumSwathWidthChroma[k] = dml_min(s->MaximumSwathWidthSupportChroma, mode_lib->ms.MaximumSwathWidthInLineBufferChroma);
+ }
+
+ /*Number Of DSC Slices*/
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k &&
+ mode_lib->ms.cache_display_cfg.output.DSCEnable[k] != dml_dsc_disable) {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = mode_lib->ms.cache_display_cfg.output.DSCSlices[k];
+
+ if (mode_lib->ms.support.NumberOfDSCSlices[k] == 0) {
+ if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 4800) {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = (dml_uint_t)(dml_ceil(mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 600, 4));
+ } else if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 2400) {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 8;
+ } else if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 1200) {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 4;
+ } else if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 340) {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 2;
+ } else {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 1;
+ }
+ }
+ } else {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 1;
+ }
+ }
+
+ CalculateSwathAndDETConfiguration_params->DETSizeOverride = mode_lib->ms.cache_display_cfg.plane.DETSizeOverride;
+ CalculateSwathAndDETConfiguration_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
+ CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ms.ip.config_return_buffer_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte;
+ CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte;
+ CalculateSwathAndDETConfiguration_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->ForceSingleDPP = 1;
+ CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
+ CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte;
+ CalculateSwathAndDETConfiguration_params->UseUnboundedRequestingFinal = mode_lib->ms.policy.UseUnboundedRequesting;
+ CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByteFinal = mode_lib->ms.ip.compressed_buffer_segment_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->Output = mode_lib->ms.cache_display_cfg.output.OutputEncoder;
+ CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.ReadBandwidthLuma;
+ CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.ReadBandwidthChroma;
+ CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma;
+ CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma;
+ CalculateSwathAndDETConfiguration_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan;
+ CalculateSwathAndDETConfiguration_params->ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary;
+ CalculateSwathAndDETConfiguration_params->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat;
+ CalculateSwathAndDETConfiguration_params->SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling;
+ CalculateSwathAndDETConfiguration_params->ViewportWidth = mode_lib->ms.cache_display_cfg.plane.ViewportWidth;
+ CalculateSwathAndDETConfiguration_params->ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight;
+ CalculateSwathAndDETConfiguration_params->ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart;
+ CalculateSwathAndDETConfiguration_params->ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart;
+ CalculateSwathAndDETConfiguration_params->ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC;
+ CalculateSwathAndDETConfiguration_params->ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC;
+ CalculateSwathAndDETConfiguration_params->SurfaceWidthY = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY;
+ CalculateSwathAndDETConfiguration_params->SurfaceWidthC = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC;
+ CalculateSwathAndDETConfiguration_params->SurfaceHeightY = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY;
+ CalculateSwathAndDETConfiguration_params->SurfaceHeightC = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC;
+ CalculateSwathAndDETConfiguration_params->ODMMode = s->dummy_odm_mode;
+ CalculateSwathAndDETConfiguration_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
+ CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY;
+ CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC;
+ CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY;
+ CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC;
+ CalculateSwathAndDETConfiguration_params->HActive = mode_lib->ms.cache_display_cfg.timing.HActive;
+ CalculateSwathAndDETConfiguration_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio;
+ CalculateSwathAndDETConfiguration_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma;
+ CalculateSwathAndDETConfiguration_params->DPPPerSurface = s->dummy_integer_array[0];
+ CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_integer_array[1];
+ CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_integer_array[2];
+ CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_integer_array[3];
+ CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_integer_array[4];
+ CalculateSwathAndDETConfiguration_params->SwathHeightY = s->dummy_integer_array[5];
+ CalculateSwathAndDETConfiguration_params->SwathHeightC = s->dummy_integer_array[6];
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = s->dummy_integer_array[7];
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY;
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC;
+ CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &s->dummy_boolean[0];
+ CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[2];
+ CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_zs = &s->dummy_integer[1];
+ CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &s->dummy_integer[0];
+ CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = mode_lib->ms.SingleDPPViewportSizeSupportPerSurface;
+ CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[1];
+
+ CalculateSwathAndDETConfiguration(&mode_lib->scratch,
+ CalculateSwathAndDETConfiguration_params); /* dml_bool_t *ViewportSizeSupport */
+
+ s->MPCCombineMethodAsNeededForPStateChangeAndVoltage = false;
+ s->MPCCombineMethodAsPossible = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.policy.MPCCombineUse[k] == dml_mpc_as_needed_for_pstate_and_voltage)
+ s->MPCCombineMethodAsNeededForPStateChangeAndVoltage = true;
+ if (mode_lib->ms.policy.MPCCombineUse[k] == dml_mpc_as_possible)
+ s->MPCCombineMethodAsPossible = true;
+ }
+ mode_lib->ms.support.MPCCombineMethodIncompatible = s->MPCCombineMethodAsNeededForPStateChangeAndVoltage && s->MPCCombineMethodAsPossible;
+
+ for (j = 0; j < 2; j++) {
+ mode_lib->ms.TotalNumberOfActiveDPP[j] = 0;
+ mode_lib->ms.support.TotalAvailablePipesSupport[j] = true;
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ CalculateODMMode(
+ mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit,
+ mode_lib->ms.cache_display_cfg.timing.HActive[k],
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k],
+ mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
+ mode_lib->ms.policy.ODMUse[k],
+ mode_lib->ms.state.dispclk_mhz,
+ mode_lib->ms.max_state.dispclk_mhz,
+ false, // DSCEnable
+ mode_lib->ms.TotalNumberOfActiveDPP[j],
+ mode_lib->ms.ip.max_num_dpp,
+ mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ mode_lib->ms.soc.dcn_downspread_percent,
+ mode_lib->ms.ip.dispclk_ramp_margin_percent,
+ mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz,
+ mode_lib->ms.support.NumberOfDSCSlices[k],
+
+ /* Output */
+ &s->TotalAvailablePipesSupportNoDSC,
+ &s->NumberOfDPPNoDSC,
+ &s->ODMModeNoDSC,
+ &s->RequiredDISPCLKPerSurfaceNoDSC);
+
+ CalculateODMMode(
+ mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit,
+ mode_lib->ms.cache_display_cfg.timing.HActive[k],
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k],
+ mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
+ mode_lib->ms.policy.ODMUse[k],
+ mode_lib->ms.state.dispclk_mhz,
+ mode_lib->ms.max_state.dispclk_mhz,
+ true, // DSCEnable
+ mode_lib->ms.TotalNumberOfActiveDPP[j],
+ mode_lib->ms.ip.max_num_dpp,
+ mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ mode_lib->ms.soc.dcn_downspread_percent,
+ mode_lib->ms.ip.dispclk_ramp_margin_percent,
+ mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz,
+ mode_lib->ms.support.NumberOfDSCSlices[k],
+
+ /* Output */
+ &s->TotalAvailablePipesSupportDSC,
+ &s->NumberOfDPPDSC,
+ &s->ODMModeDSC,
+ &s->RequiredDISPCLKPerSurfaceDSC);
+
+ CalculateOutputLink(
+ mode_lib->ms.state.phyclk_mhz,
+ mode_lib->ms.state.phyclk_d18_mhz,
+ mode_lib->ms.state.phyclk_d32_mhz,
+ mode_lib->ms.soc.phy_downspread_percent,
+ (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k),
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k],
+ mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[k],
+ mode_lib->ms.cache_display_cfg.timing.HActive[k],
+ mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k],
+ mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k],
+ mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k],
+ mode_lib->ms.support.NumberOfDSCSlices[k],
+ mode_lib->ms.cache_display_cfg.output.AudioSampleRate[k],
+ mode_lib->ms.cache_display_cfg.output.AudioSampleLayout[k],
+ s->ODMModeNoDSC,
+ s->ODMModeDSC,
+ mode_lib->ms.cache_display_cfg.output.DSCEnable[k],
+ mode_lib->ms.cache_display_cfg.output.OutputLinkDPLanes[k],
+ mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k],
+
+ /* Output */
+ &mode_lib->ms.RequiresDSC[k],
+ &mode_lib->ms.RequiresFEC[k],
+ &mode_lib->ms.OutputBppPerState[k],
+ &mode_lib->ms.OutputTypePerState[k], // VBA_DELTA, VBA uses a string to represent type and rate, but DML uses enum, don't want to rely on strng
+ &mode_lib->ms.OutputRatePerState[k],
+ &mode_lib->ms.RequiredSlots[k]);
+
+ if (mode_lib->ms.RequiresDSC[k] == false) {
+ mode_lib->ms.ODMModePerState[k] = s->ODMModeNoDSC;
+ mode_lib->ms.RequiredDISPCLKPerSurface[j][k] = s->RequiredDISPCLKPerSurfaceNoDSC;
+ if (!s->TotalAvailablePipesSupportNoDSC)
+ mode_lib->ms.support.TotalAvailablePipesSupport[j] = false;
+ mode_lib->ms.TotalNumberOfActiveDPP[j] = mode_lib->ms.TotalNumberOfActiveDPP[j] + s->NumberOfDPPNoDSC;
+ } else {
+ mode_lib->ms.ODMModePerState[k] = s->ODMModeDSC;
+ mode_lib->ms.RequiredDISPCLKPerSurface[j][k] = s->RequiredDISPCLKPerSurfaceDSC;
+ if (!s->TotalAvailablePipesSupportDSC)
+ mode_lib->ms.support.TotalAvailablePipesSupport[j] = false;
+ mode_lib->ms.TotalNumberOfActiveDPP[j] = mode_lib->ms.TotalNumberOfActiveDPP[j] + s->NumberOfDPPDSC;
+ }
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1) {
+ mode_lib->ms.MPCCombine[j][k] = false;
+ mode_lib->ms.NoOfDPP[j][k] = 4;
+ } else if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1) {
+ mode_lib->ms.MPCCombine[j][k] = false;
+ mode_lib->ms.NoOfDPP[j][k] = 2;
+ } else if (mode_lib->ms.policy.MPCCombineUse[k] == dml_mpc_disabled) {
+ mode_lib->ms.MPCCombine[j][k] = false;
+ mode_lib->ms.NoOfDPP[j][k] = 1;
+ } else if (RoundToDFSGranularity(mode_lib->ms.MinDPPCLKUsingSingleDPP[k] * (1 + mode_lib->ms.soc.dcn_downspread_percent / 100),
+ 1, mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz) <= mode_lib->ms.state.dppclk_mhz &&
+ mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k] == true) {
+ mode_lib->ms.MPCCombine[j][k] = false;
+ mode_lib->ms.NoOfDPP[j][k] = 1;
+ } else if (mode_lib->ms.TotalNumberOfActiveDPP[j] < (dml_uint_t) mode_lib->ms.ip.max_num_dpp) {
+ mode_lib->ms.MPCCombine[j][k] = true;
+ mode_lib->ms.NoOfDPP[j][k] = 2;
+ mode_lib->ms.TotalNumberOfActiveDPP[j] = (dml_uint_t) mode_lib->ms.TotalNumberOfActiveDPP[j] + 1;
+ } else {
+ mode_lib->ms.MPCCombine[j][k] = false;
+ mode_lib->ms.NoOfDPP[j][k] = 1;
+ mode_lib->ms.support.TotalAvailablePipesSupport[j] = false;
+ }
+ }
+
+ mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] = 0;
+ s->NoChromaOrLinear = true;
+ for (k = 0; k < (dml_uint_t) mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.NoOfDPP[j][k] == 1)
+ mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] = mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] + 1;
+ if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_8
+ || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_10
+ || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_12
+ || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_rgbe_alpha
+ || mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k] == dml_sw_linear) {
+ s->NoChromaOrLinear = false;
+ }
+ }
+
+ if (j == 1 && !UnboundedRequest(mode_lib->ms.policy.UseUnboundedRequesting,
+ mode_lib->ms.TotalNumberOfActiveDPP[j], s->NoChromaOrLinear,
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[0])) {
+ while (!(mode_lib->ms.TotalNumberOfActiveDPP[j] >= (dml_uint_t) mode_lib->ms.ip.max_num_dpp || mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] == 0)) {
+ s->BWOfNonCombinedSurfaceOfMaximumBandwidth = 0;
+ s->NumberOfNonCombinedSurfaceOfMaximumBandwidth = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.policy.MPCCombineUse[k] != dml_mpc_disabled && mode_lib->ms.policy.MPCCombineUse[k] != dml_mpc_as_needed_for_voltage &&
+ mode_lib->ms.ReadBandwidthLuma[k] + mode_lib->ms.ReadBandwidthChroma[k] > s->BWOfNonCombinedSurfaceOfMaximumBandwidth &&
+ (mode_lib->ms.ODMModePerState[k] != dml_odm_mode_combine_2to1 && mode_lib->ms.ODMModePerState[k] != dml_odm_mode_combine_4to1) &&
+ mode_lib->ms.MPCCombine[j][k] == false) {
+ s->BWOfNonCombinedSurfaceOfMaximumBandwidth = mode_lib->ms.ReadBandwidthLuma[k] + mode_lib->ms.ReadBandwidthChroma[k];
+ s->NumberOfNonCombinedSurfaceOfMaximumBandwidth = k;
+ }
+ }
+ mode_lib->ms.MPCCombine[j][s->NumberOfNonCombinedSurfaceOfMaximumBandwidth] = true;
+ mode_lib->ms.NoOfDPP[j][s->NumberOfNonCombinedSurfaceOfMaximumBandwidth] = 2;
+ mode_lib->ms.TotalNumberOfActiveDPP[j] = mode_lib->ms.TotalNumberOfActiveDPP[j] + 1;
+ mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] = mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] - 1;
+ }
+ }
+
+ //DISPCLK/DPPCLK
+ mode_lib->ms.WritebackRequiredDISPCLK = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k]) {
+ mode_lib->ms.WritebackRequiredDISPCLK = dml_max(mode_lib->ms.WritebackRequiredDISPCLK,
+ CalculateWriteBackDISPCLK(mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k],
+ mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackSourceWidth[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[k],
+ mode_lib->ms.ip.writeback_line_buffer_buffer_size,
+ mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz));
+ }
+ }
+
+ mode_lib->ms.RequiredDISPCLK[j] = mode_lib->ms.WritebackRequiredDISPCLK;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.RequiredDISPCLK[j] = dml_max(mode_lib->ms.RequiredDISPCLK[j], mode_lib->ms.RequiredDISPCLKPerSurface[j][k]);
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k];
+ }
+
+ CalculateDPPCLK(mode_lib->ms.num_active_planes,
+ mode_lib->ms.soc.dcn_downspread_percent,
+ mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz,
+ mode_lib->ms.MinDPPCLKUsingSingleDPP,
+ mode_lib->ms.NoOfDPPThisState,
+ /* Output */
+ &mode_lib->ms.GlobalDPPCLK,
+ mode_lib->ms.RequiredDPPCLKThisState);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.RequiredDPPCLKPerSurface[j][k] = mode_lib->ms.RequiredDPPCLKThisState[k];
+ }
+
+ mode_lib->ms.support.DISPCLK_DPPCLK_Support[j] = !((mode_lib->ms.RequiredDISPCLK[j] > mode_lib->ms.state.dispclk_mhz) || (mode_lib->ms.GlobalDPPCLK > mode_lib->ms.state.dppclk_mhz));
+
+ if (mode_lib->ms.TotalNumberOfActiveDPP[j] > (dml_uint_t) mode_lib->ms.ip.max_num_dpp) {
+ mode_lib->ms.support.TotalAvailablePipesSupport[j] = false;
+ }
+ } // j
+
+ /* Total Available OTG, HDMIFRL, DP Support Check */
+ s->TotalNumberOfActiveOTG = 0;
+ s->TotalNumberOfActiveHDMIFRL = 0;
+ s->TotalNumberOfActiveDP2p0 = 0;
+ s->TotalNumberOfActiveDP2p0Outputs = 0;
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
+ s->TotalNumberOfActiveOTG = s->TotalNumberOfActiveOTG + 1;
+ if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl)
+ s->TotalNumberOfActiveHDMIFRL = s->TotalNumberOfActiveHDMIFRL + 1;
+ if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0) {
+ s->TotalNumberOfActiveDP2p0 = s->TotalNumberOfActiveDP2p0 + 1;
+ if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k || mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == false) {
+ s->TotalNumberOfActiveDP2p0Outputs = s->TotalNumberOfActiveDP2p0Outputs + 1;
+ }
+ }
+ }
+ }
+
+ mode_lib->ms.support.NumberOfOTGSupport = (s->TotalNumberOfActiveOTG <= (dml_uint_t) mode_lib->ms.ip.max_num_otg);
+ mode_lib->ms.support.NumberOfHDMIFRLSupport = (s->TotalNumberOfActiveHDMIFRL <= (dml_uint_t) mode_lib->ms.ip.max_num_hdmi_frl_outputs);
+ mode_lib->ms.support.NumberOfDP2p0Support = (s->TotalNumberOfActiveDP2p0 <= (dml_uint_t) mode_lib->ms.ip.max_num_dp2p0_streams && s->TotalNumberOfActiveDP2p0Outputs <= (dml_uint_t) mode_lib->ms.ip.max_num_dp2p0_outputs);
+
+ /* Display IO and DSC Support Check */
+ mode_lib->ms.support.NonsupportedDSCInputBPC = false;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.cache_display_cfg.output.OutputDisabled[k] == false &&
+ !(mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] == 12.0
+ || mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] == 10.0
+ || mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] == 8.0
+ || mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] > (dml_uint_t) mode_lib->ms.ip.maximum_dsc_bits_per_component
+ )) {
+ mode_lib->ms.support.NonsupportedDSCInputBPC = true;
+ }
+ }
+
+ mode_lib->ms.support.ExceededMultistreamSlots = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k) {
+ s->TotalSlots = mode_lib->ms.RequiredSlots[k];
+ for (j = 0; j < mode_lib->ms.num_active_planes; ++j) {
+ if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[j] == k)
+ s->TotalSlots = s->TotalSlots + mode_lib->ms.RequiredSlots[j];
+ }
+ if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp && s->TotalSlots > 63)
+ mode_lib->ms.support.ExceededMultistreamSlots = true;
+ if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 && s->TotalSlots > 64)
+ mode_lib->ms.support.ExceededMultistreamSlots = true;
+ }
+ }
+ mode_lib->ms.support.LinkCapacitySupport = true;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.output.OutputDisabled[k] == false &&
+ mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp ||
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) && mode_lib->ms.OutputBppPerState[k] == 0) {
+ mode_lib->ms.support.LinkCapacitySupport = false;
+ }
+ }
+
+ mode_lib->ms.support.P2IWith420 = false;
+ mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = false;
+ mode_lib->ms.support.DSC422NativeNotSupported = false;
+ mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = false;
+ mode_lib->ms.support.LinkRateForMultistreamNotIndicated = false;
+ mode_lib->ms.support.BPPForMultistreamNotIndicated = false;
+ mode_lib->ms.support.MultistreamWithHDMIOreDP = false;
+ mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = false;
+ mode_lib->ms.support.NotEnoughLanesForMSO = false;
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp ||
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl)) {
+ if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_420 && mode_lib->ms.cache_display_cfg.timing.Interlace[k] == 1 && mode_lib->ms.ip.ptoi_supported == true)
+ mode_lib->ms.support.P2IWith420 = true;
+
+ if (mode_lib->ms.cache_display_cfg.output.DSCEnable[k] == dml_dsc_enable_if_necessary && mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k] != 0)
+ mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = true;
+ if ((mode_lib->ms.cache_display_cfg.output.DSCEnable[k] == dml_dsc_enable || mode_lib->ms.cache_display_cfg.output.DSCEnable[k] == dml_dsc_enable_if_necessary) && mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_n422 && !mode_lib->ms.ip.dsc422_native_support)
+ mode_lib->ms.support.DSC422NativeNotSupported = true;
+
+ if (((mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_hbr || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_hbr2 || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_hbr3) &&
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_dp && mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_edp) ||
+ ((mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_uhbr10 || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_uhbr13p5 || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_uhbr20) &&
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_dp2p0))
+ mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = true;
+
+ if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == 1) {
+ if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k && mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_na)
+ mode_lib->ms.support.LinkRateForMultistreamNotIndicated = true;
+ if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k && mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k] == 0)
+ mode_lib->ms.support.BPPForMultistreamNotIndicated = true;
+ for (j = 0; j < mode_lib->ms.num_active_planes; ++j) {
+ if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == j && mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k] == 0)
+ mode_lib->ms.support.BPPForMultistreamNotIndicated = true;
+ }
+ }
+
+ if ((mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl)) {
+ if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == 1 && mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k)
+ mode_lib->ms.support.MultistreamWithHDMIOreDP = true;
+ for (j = 0; j < mode_lib->ms.num_active_planes; ++j) {
+ if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == 1 && mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == j)
+ mode_lib->ms.support.MultistreamWithHDMIOreDP = true;
+ }
+ }
+ if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_dp && (mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_split_1to2 ||
+ mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to2 || mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to4))
+ mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = true;
+
+ if ((mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to2 && mode_lib->ms.cache_display_cfg.output.OutputLinkDPLanes[k] < 2) ||
+ (mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to4 && mode_lib->ms.cache_display_cfg.output.OutputLinkDPLanes[k] < 4))
+ mode_lib->ms.support.NotEnoughLanesForMSO = true;
+ }
+ }
+
+ mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k &&
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl &&
+ RequiredDTBCLK(
+ mode_lib->ms.RequiresDSC[k],
+ mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k],
+ mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
+ mode_lib->ms.OutputBppPerState[k],
+ mode_lib->ms.support.NumberOfDSCSlices[k],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[k],
+ mode_lib->ms.cache_display_cfg.timing.HActive[k],
+ mode_lib->ms.cache_display_cfg.output.AudioSampleRate[k],
+ mode_lib->ms.cache_display_cfg.output.AudioSampleLayout[k]) > mode_lib->ms.state.dtbclk_mhz) {
+ mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = true;
+ }
+ }
+
+ mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK = true;
+ mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK = true;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1 && mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi) {
+ mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK = false;
+ }
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1 && (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp ||
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi)) {
+ mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK = false;
+ }
+ }
+
+ mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = false;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
+ if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp ||
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 ||
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp ||
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) {
+ if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_420) {
+ s->DSCFormatFactor = 2;
+ } else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_444) {
+ s->DSCFormatFactor = 1;
+ } else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_n422 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) {
+ s->DSCFormatFactor = 2;
+ } else {
+ s->DSCFormatFactor = 1;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]);
+#endif
+ if (mode_lib->ms.RequiresDSC[k] == true) {
+ if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1) {
+ if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 12.0 / (dml_float_t)s->DSCFormatFactor > (1.0 - mode_lib->ms.soc.dcn_downspread_percent / 100.0) * mode_lib->ms.state.dscclk_mhz) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k]);
+ dml_print("DML::%s: k=%u, DSCCLKPerState = %f\n", __func__, k, mode_lib->ms.state.dscclk_mhz);
+ dml_print("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor);
+#endif
+ mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true;
+ }
+ } else if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1) {
+ if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 6.0 / (dml_float_t)s->DSCFormatFactor > (1.0 - mode_lib->ms.soc.dcn_downspread_percent / 100.0) * mode_lib->ms.state.dscclk_mhz) {
+ mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true;
+ }
+ } else {
+ if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 3.0 / (dml_float_t)s->DSCFormatFactor > (1.0 - mode_lib->ms.soc.dcn_downspread_percent / 100.0) * mode_lib->ms.state.dscclk_mhz) {
+ mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true;
+ }
+ }
+ }
+ }
+ }
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DSCCLKRequiredMoreThanSupported = %u\n", __func__, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported);
+#endif
+
+ /* Check DSC Unit and Slices Support */
+ mode_lib->ms.support.NotEnoughDSCUnits = false;
+ mode_lib->ms.support.NotEnoughDSCSlices = false;
+ s->TotalDSCUnitsRequired = 0;
+ mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = true;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.RequiresDSC[k] == true) {
+ if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1) {
+ if (mode_lib->ms.cache_display_cfg.timing.HActive[k] > 4 * (dml_uint_t) mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit)
+ mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false;
+ s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + 4;
+ if (mode_lib->ms.support.NumberOfDSCSlices[k] > 16)
+ mode_lib->ms.support.NotEnoughDSCSlices = true;
+ } else if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1) {
+ if (mode_lib->ms.cache_display_cfg.timing.HActive[k] > 2 * (dml_uint_t) mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit)
+ mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false;
+ s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + 2;
+ if (mode_lib->ms.support.NumberOfDSCSlices[k] > 8)
+ mode_lib->ms.support.NotEnoughDSCSlices = true;
+ } else {
+ if (mode_lib->ms.cache_display_cfg.timing.HActive[k] > (dml_uint_t) mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit)
+ mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false;
+ s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + 1;
+ if (mode_lib->ms.support.NumberOfDSCSlices[k] > 4)
+ mode_lib->ms.support.NotEnoughDSCSlices = true;
+ }
+ }
+ }
+ if (s->TotalDSCUnitsRequired > (dml_uint_t) mode_lib->ms.ip.num_dsc) {
+ mode_lib->ms.support.NotEnoughDSCUnits = true;
+ }
+
+ /*DSC Delay per state*/
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.DSCDelayPerState[k] = DSCDelayRequirement(mode_lib->ms.RequiresDSC[k],
+ mode_lib->ms.ODMModePerState[k],
+ mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k],
+ mode_lib->ms.OutputBppPerState[k],
+ mode_lib->ms.cache_display_cfg.timing.HActive[k],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[k],
+ mode_lib->ms.support.NumberOfDSCSlices[k],
+ mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k],
+ mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k]);
+ }
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) {
+ for (j = 0; j <= mode_lib->ms.num_active_planes - 1; j++) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == m && mode_lib->ms.RequiresDSC[m] == true) {
+ mode_lib->ms.DSCDelayPerState[k] = mode_lib->ms.DSCDelayPerState[m];
+ }
+ }
+ }
+ }
+
+ //Calculate Swath, DET Configuration, DCFCLKDeepSleep
+ //
+ for (j = 0; j < 2; ++j) {
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.RequiredDPPCLKThisState[k] = mode_lib->ms.RequiredDPPCLKPerSurface[j][k];
+ mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k];
+ mode_lib->ms.ODMModeThisState[k] = mode_lib->ms.ODMModePerState[k];
+ }
+
+ CalculateSwathAndDETConfiguration_params->DETSizeOverride = mode_lib->ms.cache_display_cfg.plane.DETSizeOverride;
+ CalculateSwathAndDETConfiguration_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
+ CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ms.ip.config_return_buffer_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte;
+ CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte;
+ CalculateSwathAndDETConfiguration_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false;
+ CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
+ CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte;
+ CalculateSwathAndDETConfiguration_params->UseUnboundedRequestingFinal = mode_lib->ms.policy.UseUnboundedRequesting;
+ CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByteFinal = mode_lib->ms.ip.compressed_buffer_segment_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->Output = mode_lib->ms.cache_display_cfg.output.OutputEncoder;
+ CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.ReadBandwidthLuma;
+ CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.ReadBandwidthChroma;
+ CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma;
+ CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma;
+ CalculateSwathAndDETConfiguration_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan;
+ CalculateSwathAndDETConfiguration_params->ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary;
+ CalculateSwathAndDETConfiguration_params->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat;
+ CalculateSwathAndDETConfiguration_params->SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling;
+ CalculateSwathAndDETConfiguration_params->ViewportWidth = mode_lib->ms.cache_display_cfg.plane.ViewportWidth;
+ CalculateSwathAndDETConfiguration_params->ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight;
+ CalculateSwathAndDETConfiguration_params->ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart;
+ CalculateSwathAndDETConfiguration_params->ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart;
+ CalculateSwathAndDETConfiguration_params->ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC;
+ CalculateSwathAndDETConfiguration_params->ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC;
+ CalculateSwathAndDETConfiguration_params->SurfaceWidthY = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY;
+ CalculateSwathAndDETConfiguration_params->SurfaceWidthC = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC;
+ CalculateSwathAndDETConfiguration_params->SurfaceHeightY = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY;
+ CalculateSwathAndDETConfiguration_params->SurfaceHeightC = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC;
+ CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.ODMModeThisState;
+ CalculateSwathAndDETConfiguration_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
+ CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY;
+ CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC;
+ CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY;
+ CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC;
+ CalculateSwathAndDETConfiguration_params->HActive = mode_lib->ms.cache_display_cfg.timing.HActive;
+ CalculateSwathAndDETConfiguration_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio;
+ CalculateSwathAndDETConfiguration_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma;
+ CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.NoOfDPPThisState;
+ CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub_this_state;
+ CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub_this_state;
+ CalculateSwathAndDETConfiguration_params->SwathWidth = mode_lib->ms.SwathWidthYThisState;
+ CalculateSwathAndDETConfiguration_params->SwathWidthChroma = mode_lib->ms.SwathWidthCThisState;
+ CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState;
+ CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState;
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->ms.DETBufferSizeInKByteThisState;
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeYThisState;
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeCThisState;
+ CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->ms.UnboundedRequestEnabledThisState;
+ CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[2];
+ CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_zs = &s->dummy_integer[1];
+ CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->ms.CompressedBufferSizeInkByteThisState;
+ CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = s->dummy_boolean_array[0];
+ CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &mode_lib->ms.support.ViewportSizeSupport[j];
+
+ CalculateSwathAndDETConfiguration(&mode_lib->scratch,
+ CalculateSwathAndDETConfiguration_params);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.swath_width_luma_ub_all_states[j][k] = mode_lib->ms.swath_width_luma_ub_this_state[k];
+ mode_lib->ms.swath_width_chroma_ub_all_states[j][k] = mode_lib->ms.swath_width_chroma_ub_this_state[k];
+ mode_lib->ms.SwathWidthYAllStates[j][k] = mode_lib->ms.SwathWidthYThisState[k];
+ mode_lib->ms.SwathWidthCAllStates[j][k] = mode_lib->ms.SwathWidthCThisState[k];
+ mode_lib->ms.SwathHeightYAllStates[j][k] = mode_lib->ms.SwathHeightYThisState[k];
+ mode_lib->ms.SwathHeightCAllStates[j][k] = mode_lib->ms.SwathHeightCThisState[k];
+ mode_lib->ms.UnboundedRequestEnabledAllStates[j] = mode_lib->ms.UnboundedRequestEnabledThisState;
+ mode_lib->ms.CompressedBufferSizeInkByteAllStates[j] = mode_lib->ms.CompressedBufferSizeInkByteThisState;
+ mode_lib->ms.DETBufferSizeInKByteAllStates[j][k] = mode_lib->ms.DETBufferSizeInKByteThisState[k];
+ mode_lib->ms.DETBufferSizeYAllStates[j][k] = mode_lib->ms.DETBufferSizeYThisState[k];
+ mode_lib->ms.DETBufferSizeCAllStates[j][k] = mode_lib->ms.DETBufferSizeCThisState[k];
+ }
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.cursor_bw[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k];
+ }
+
+ CalculateSurfaceSizeInMall(
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.soc.mall_allocated_for_dcn_mbytes,
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen,
+ mode_lib->ms.cache_display_cfg.surface.DCCEnable,
+ mode_lib->ms.cache_display_cfg.plane.ViewportStationary,
+ mode_lib->ms.cache_display_cfg.plane.ViewportXStart,
+ mode_lib->ms.cache_display_cfg.plane.ViewportYStart,
+ mode_lib->ms.cache_display_cfg.plane.ViewportXStartC,
+ mode_lib->ms.cache_display_cfg.plane.ViewportYStartC,
+ mode_lib->ms.cache_display_cfg.plane.ViewportWidth,
+ mode_lib->ms.cache_display_cfg.plane.ViewportHeight,
+ mode_lib->ms.BytePerPixelY,
+ mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma,
+ mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma,
+ mode_lib->ms.BytePerPixelC,
+ mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY,
+ mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC,
+ mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY,
+ mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC,
+ mode_lib->ms.Read256BlockWidthY,
+ mode_lib->ms.Read256BlockWidthC,
+ mode_lib->ms.Read256BlockHeightY,
+ mode_lib->ms.Read256BlockHeightC,
+ mode_lib->ms.MacroTileWidthY,
+ mode_lib->ms.MacroTileWidthC,
+ mode_lib->ms.MacroTileHeightY,
+ mode_lib->ms.MacroTileHeightC,
+
+ /* Output */
+ mode_lib->ms.SurfaceSizeInMALL,
+ &mode_lib->ms.support.ExceededMALLSize);
+
+ for (j = 0; j < 2; j++) {
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ mode_lib->ms.swath_width_luma_ub_this_state[k] = mode_lib->ms.swath_width_luma_ub_all_states[j][k];
+ mode_lib->ms.swath_width_chroma_ub_this_state[k] = mode_lib->ms.swath_width_chroma_ub_all_states[j][k];
+ mode_lib->ms.SwathWidthYThisState[k] = mode_lib->ms.SwathWidthYAllStates[j][k];
+ mode_lib->ms.SwathWidthCThisState[k] = mode_lib->ms.SwathWidthCAllStates[j][k];
+ mode_lib->ms.SwathHeightYThisState[k] = mode_lib->ms.SwathHeightYAllStates[j][k];
+ mode_lib->ms.SwathHeightCThisState[k] = mode_lib->ms.SwathHeightCAllStates[j][k];
+ mode_lib->ms.DETBufferSizeInKByteThisState[k] = mode_lib->ms.DETBufferSizeInKByteAllStates[j][k];
+ mode_lib->ms.DETBufferSizeYThisState[k] = mode_lib->ms.DETBufferSizeYAllStates[j][k];
+ mode_lib->ms.DETBufferSizeCThisState[k] = mode_lib->ms.DETBufferSizeCAllStates[j][k];
+ mode_lib->ms.RequiredDPPCLKThisState[k] = mode_lib->ms.RequiredDPPCLKPerSurface[j][k];
+ mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k];
+ }
+
+ mode_lib->ms.TotalNumberOfDCCActiveDPP[j] = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) {
+ mode_lib->ms.TotalNumberOfDCCActiveDPP[j] = mode_lib->ms.TotalNumberOfDCCActiveDPP[j] + mode_lib->ms.NoOfDPP[j][k];
+ }
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ s->SurfParameters[k].PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k];
+ s->SurfParameters[k].DPPPerSurface = mode_lib->ms.NoOfDPP[j][k];
+ s->SurfParameters[k].SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k];
+ s->SurfParameters[k].ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k];
+ s->SurfParameters[k].ViewportHeightChroma = mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k];
+ s->SurfParameters[k].BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
+ s->SurfParameters[k].BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
+ s->SurfParameters[k].BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
+ s->SurfParameters[k].BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
+ s->SurfParameters[k].BlockWidthY = mode_lib->ms.MacroTileWidthY[k];
+ s->SurfParameters[k].BlockHeightY = mode_lib->ms.MacroTileHeightY[k];
+ s->SurfParameters[k].BlockWidthC = mode_lib->ms.MacroTileWidthC[k];
+ s->SurfParameters[k].BlockHeightC = mode_lib->ms.MacroTileHeightC[k];
+ s->SurfParameters[k].InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k];
+ s->SurfParameters[k].HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k];
+ s->SurfParameters[k].DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k];
+ s->SurfParameters[k].SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k];
+ s->SurfParameters[k].SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k];
+ s->SurfParameters[k].BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
+ s->SurfParameters[k].BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
+ s->SurfParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
+ s->SurfParameters[k].VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio[k];
+ s->SurfParameters[k].VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k];
+ s->SurfParameters[k].VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps[k];
+ s->SurfParameters[k].VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k];
+ s->SurfParameters[k].PitchY = mode_lib->ms.cache_display_cfg.surface.PitchY[k];
+ s->SurfParameters[k].DCCMetaPitchY = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k];
+ s->SurfParameters[k].PitchC = mode_lib->ms.cache_display_cfg.surface.PitchC[k];
+ s->SurfParameters[k].DCCMetaPitchC = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k];
+ s->SurfParameters[k].ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary[k];
+ s->SurfParameters[k].ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart[k];
+ s->SurfParameters[k].ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart[k];
+ s->SurfParameters[k].ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC[k];
+ s->SurfParameters[k].ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC[k];
+ s->SurfParameters[k].FORCE_ONE_ROW_FOR_FRAME = mode_lib->ms.cache_display_cfg.plane.ForceOneRowForFrame[k];
+ s->SurfParameters[k].SwathHeightY = mode_lib->ms.SwathHeightYThisState[k];
+ s->SurfParameters[k].SwathHeightC = mode_lib->ms.SwathHeightCThisState[k];
+ }
+
+ CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
+ CalculateVMRowAndSwath_params->myPipe = s->SurfParameters;
+ CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->ms.SurfaceSizeInMALL;
+ CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_luma;
+ CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_chroma;
+ CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ms.ip.dcc_meta_buffer_size_bytes;
+ CalculateVMRowAndSwath_params->UseMALLForStaticScreen = mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen;
+ CalculateVMRowAndSwath_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
+ CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->ms.soc.mall_allocated_for_dcn_mbytes;
+ CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->ms.SwathWidthYThisState;
+ CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->ms.SwathWidthCThisState;
+ CalculateVMRowAndSwath_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
+ CalculateVMRowAndSwath_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
+ CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
+ CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
+ CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes;
+ CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
+ CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn;
+ CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode;
+ CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceededPerState;
+ CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = mode_lib->ms.DCCMetaBufferSizeNotExceededPerState;
+ CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = s->dummy_integer_array[0];
+ CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = s->dummy_integer_array[1];
+ CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->ms.dpte_row_height;
+ CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->ms.dpte_row_height_chroma;
+ CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = s->dummy_integer_array[2]; // VBA_DELTA
+ CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = s->dummy_integer_array[3]; // VBA_DELTA
+ CalculateVMRowAndSwath_params->meta_req_width = s->dummy_integer_array[4];
+ CalculateVMRowAndSwath_params->meta_req_width_chroma = s->dummy_integer_array[5];
+ CalculateVMRowAndSwath_params->meta_req_height = s->dummy_integer_array[6];
+ CalculateVMRowAndSwath_params->meta_req_height_chroma = s->dummy_integer_array[7];
+ CalculateVMRowAndSwath_params->meta_row_width = s->dummy_integer_array[8];
+ CalculateVMRowAndSwath_params->meta_row_width_chroma = s->dummy_integer_array[9];
+ CalculateVMRowAndSwath_params->meta_row_height = mode_lib->ms.meta_row_height;
+ CalculateVMRowAndSwath_params->meta_row_height_chroma = mode_lib->ms.meta_row_height_chroma;
+ CalculateVMRowAndSwath_params->vm_group_bytes = s->dummy_integer_array[10];
+ CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
+ CalculateVMRowAndSwath_params->PixelPTEReqWidthY = s->dummy_integer_array[11];
+ CalculateVMRowAndSwath_params->PixelPTEReqHeightY = s->dummy_integer_array[12];
+ CalculateVMRowAndSwath_params->PTERequestSizeY = s->dummy_integer_array[13];
+ CalculateVMRowAndSwath_params->PixelPTEReqWidthC = s->dummy_integer_array[14];
+ CalculateVMRowAndSwath_params->PixelPTEReqHeightC = s->dummy_integer_array[15];
+ CalculateVMRowAndSwath_params->PTERequestSizeC = s->dummy_integer_array[16];
+ CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = s->dummy_integer_array[17];
+ CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = s->dummy_integer_array[18];
+ CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = s->dummy_integer_array[19];
+ CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = s->dummy_integer_array[20];
+ CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesYThisState;
+ CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesCThisState;
+ CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->ms.PrefillY;
+ CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->ms.PrefillC;
+ CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->ms.MaxNumSwY;
+ CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->ms.MaxNumSwC;
+ CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->ms.meta_row_bandwidth_this_state;
+ CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->ms.dpte_row_bandwidth_this_state;
+ CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRowThisState;
+ CalculateVMRowAndSwath_params->PDEAndMetaPTEBytesFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrameThisState;
+ CalculateVMRowAndSwath_params->MetaRowByte = mode_lib->ms.MetaRowBytesThisState;
+ CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->ms.use_one_row_for_frame_this_state;
+ CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->ms.use_one_row_for_frame_flip_this_state;
+ CalculateVMRowAndSwath_params->UsesMALLForStaticScreen = s->dummy_boolean_array[0];
+ CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = s->dummy_boolean_array[1];
+ CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = s->dummy_integer_array[21];
+
+ CalculateVMRowAndSwath(&mode_lib->scratch,
+ CalculateVMRowAndSwath_params);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.PrefetchLinesY[j][k] = mode_lib->ms.PrefetchLinesYThisState[k];
+ mode_lib->ms.PrefetchLinesC[j][k] = mode_lib->ms.PrefetchLinesCThisState[k];
+ mode_lib->ms.meta_row_bandwidth[j][k] = mode_lib->ms.meta_row_bandwidth_this_state[k];
+ mode_lib->ms.dpte_row_bandwidth[j][k] = mode_lib->ms.dpte_row_bandwidth_this_state[k];
+ mode_lib->ms.DPTEBytesPerRow[j][k] = mode_lib->ms.DPTEBytesPerRowThisState[k];
+ mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k] = mode_lib->ms.PDEAndMetaPTEBytesPerFrameThisState[k];
+ mode_lib->ms.MetaRowBytes[j][k] = mode_lib->ms.MetaRowBytesThisState[k];
+ mode_lib->ms.use_one_row_for_frame[j][k] = mode_lib->ms.use_one_row_for_frame_this_state[k];
+ mode_lib->ms.use_one_row_for_frame_flip[j][k] = mode_lib->ms.use_one_row_for_frame_flip_this_state[k];
+ }
+
+ mode_lib->ms.support.PTEBufferSizeNotExceeded[j] = true;
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.PTEBufferSizeNotExceededPerState[k] == false)
+ mode_lib->ms.support.PTEBufferSizeNotExceeded[j] = false;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: j=%u k=%u, PTEBufferSizeNotExceededPerState[%u] = %u\n", __func__, j, k, k, mode_lib->ms.PTEBufferSizeNotExceededPerState[k]);
+#endif
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PTEBufferSizeNotExceeded[%u] = %u\n", __func__, j, mode_lib->ms.support.PTEBufferSizeNotExceeded[j]);
+#endif
+
+ mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] = true;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.DCCMetaBufferSizeNotExceededPerState[k] == false)
+ mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] = false;
+ }
+
+ mode_lib->ms.UrgLatency = CalculateUrgentLatency(mode_lib->ms.state.urgent_latency_pixel_data_only_us,
+ mode_lib->ms.state.urgent_latency_pixel_mixed_with_vm_data_us,
+ mode_lib->ms.state.urgent_latency_vm_data_only_us,
+ mode_lib->ms.soc.do_urgent_latency_adjustment,
+ mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_component_us,
+ mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_reference_mhz,
+ mode_lib->ms.state.fabricclk_mhz);
+
+ /* Getter functions work at mp interface so copy the urgent latency to mp*/
+ mode_lib->mp.UrgentLatency = mode_lib->ms.UrgLatency;
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ CalculateUrgentBurstFactor(
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
+ mode_lib->ms.swath_width_luma_ub_this_state[k],
+ mode_lib->ms.swath_width_chroma_ub_this_state[k],
+ mode_lib->ms.SwathHeightYThisState[k],
+ mode_lib->ms.SwathHeightCThisState[k],
+ (dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ mode_lib->ms.UrgLatency,
+ mode_lib->ms.ip.cursor_buffer_size,
+ mode_lib->ms.cache_display_cfg.plane.CursorWidth[k],
+ mode_lib->ms.cache_display_cfg.plane.CursorBPP[k],
+ mode_lib->ms.cache_display_cfg.plane.VRatio[k],
+ mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
+ mode_lib->ms.BytePerPixelInDETY[k],
+ mode_lib->ms.BytePerPixelInDETC[k],
+ mode_lib->ms.DETBufferSizeYThisState[k],
+ mode_lib->ms.DETBufferSizeCThisState[k],
+ /* Output */
+ &mode_lib->ms.UrgentBurstFactorCursor[j][k],
+ &mode_lib->ms.UrgentBurstFactorLuma[j][k],
+ &mode_lib->ms.UrgentBurstFactorChroma[j][k],
+ &mode_lib->ms.NotUrgentLatencyHiding[k]);
+ }
+
+ CalculateDCFCLKDeepSleep(
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.BytePerPixelY,
+ mode_lib->ms.BytePerPixelC,
+ mode_lib->ms.cache_display_cfg.plane.VRatio,
+ mode_lib->ms.cache_display_cfg.plane.VRatioChroma,
+ mode_lib->ms.SwathWidthYThisState,
+ mode_lib->ms.SwathWidthCThisState,
+ mode_lib->ms.NoOfDPPThisState,
+ mode_lib->ms.cache_display_cfg.plane.HRatio,
+ mode_lib->ms.cache_display_cfg.plane.HRatioChroma,
+ mode_lib->ms.cache_display_cfg.timing.PixelClock,
+ mode_lib->ms.PSCL_FACTOR,
+ mode_lib->ms.PSCL_FACTOR_CHROMA,
+ mode_lib->ms.RequiredDPPCLKThisState,
+ mode_lib->ms.ReadBandwidthLuma,
+ mode_lib->ms.ReadBandwidthChroma,
+ mode_lib->ms.soc.return_bus_width_bytes,
+
+ /* Output */
+ &mode_lib->ms.ProjectedDCFCLKDeepSleep[j]);
+ }
+
+ //Calculate Return BW
+ for (j = 0; j < 2; ++j) {
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
+ if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
+ mode_lib->ms.WritebackDelayTime[k] = mode_lib->ms.state.writeback_latency_us + CalculateWriteBackDelay(
+ mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[k]) / mode_lib->ms.RequiredDISPCLK[j];
+ } else {
+ mode_lib->ms.WritebackDelayTime[k] = 0.0;
+ }
+ for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[m] == k && mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[m] == true) {
+ mode_lib->ms.WritebackDelayTime[k] = dml_max(mode_lib->ms.WritebackDelayTime[k],
+ mode_lib->ms.state.writeback_latency_us + CalculateWriteBackDelay(
+ mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[m],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[m],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[m],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[m],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[m],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[m],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[m],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[m]) / mode_lib->ms.RequiredDISPCLK[j]);
+ }
+ }
+ }
+ }
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == m) {
+ mode_lib->ms.WritebackDelayTime[k] = mode_lib->ms.WritebackDelayTime[m];
+ }
+ }
+ }
+ s->MaxVStartupAllPlanes[j] = 0; // max vstartup among all planes
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ s->MaximumVStartup[j][k] = CalculateMaxVStartup(k,
+ mode_lib->ms.ip.ptoi_supported,
+ mode_lib->ms.ip.vblank_nom_default_us,
+ &mode_lib->ms.cache_display_cfg.timing,
+ mode_lib->ms.WritebackDelayTime[k]);
+
+ s->MaxVStartupAllPlanes[j] = (dml_uint_t)(dml_max(s->MaxVStartupAllPlanes[j], s->MaximumVStartup[j][k]));
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, MaxVStartupAllPlanes[%u] = %u\n", __func__, k, j, s->MaxVStartupAllPlanes[j]);
+ dml_print("DML::%s: k=%u, MaximumVStartup[%u][%u] = %u\n", __func__, k, j, k, s->MaximumVStartup[j][k]);
+#endif
+ }
+ }
+
+ s->ReorderingBytes = (dml_uint_t)(mode_lib->ms.soc.num_chans * dml_max3(mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_only_bytes,
+ mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
+ mode_lib->ms.soc.urgent_out_of_order_return_per_channel_vm_only_bytes));
+
+ for (j = 0; j < 2; ++j) {
+ mode_lib->ms.DCFCLKState[j] = mode_lib->ms.state.dcfclk_mhz;
+ }
+
+ /* Immediate Flip and MALL parameters */
+ s->ImmediateFlipRequiredFinal = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ s->ImmediateFlipRequiredFinal = s->ImmediateFlipRequiredFinal || (mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_required);
+ }
+
+ mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified ||
+ ((mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_required) &&
+ (mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required));
+ }
+ mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified && s->ImmediateFlipRequiredFinal;
+
+ mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe =
+ mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe || ((mode_lib->ms.cache_display_cfg.plane.HostVMEnable == true || mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required) &&
+ (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame || mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe));
+ }
+
+ mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen ||
+ ((mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_enable || mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_optimize) && (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe)) ||
+ ((mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_disable || mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_optimize) && (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame));
+ }
+
+ s->FullFrameMALLPStateMethod = false;
+ s->SubViewportMALLPStateMethod = false;
+ s->PhantomPipeMALLPStateMethod = false;
+ s->SubViewportMALLRefreshGreaterThan120Hz = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame)
+ s->FullFrameMALLPStateMethod = true;
+ if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport) {
+ s->SubViewportMALLPStateMethod = true;
+ if (mode_lib->ms.cache_display_cfg.timing.RefreshRate[k] > 120)
+ s->SubViewportMALLRefreshGreaterThan120Hz = true;
+ }
+ if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe)
+ s->PhantomPipeMALLPStateMethod = true;
+ }
+ mode_lib->ms.support.InvalidCombinationOfMALLUseForPState = (s->SubViewportMALLPStateMethod != s->PhantomPipeMALLPStateMethod)
+ || (s->SubViewportMALLPStateMethod && s->FullFrameMALLPStateMethod) || s->SubViewportMALLRefreshGreaterThan120Hz;
+
+ if (mode_lib->ms.policy.UseMinimumRequiredDCFCLK == true) {
+ UseMinimumDCFCLK_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
+ UseMinimumDCFCLK_params->DRRDisplay = mode_lib->ms.cache_display_cfg.timing.DRRDisplay;
+ UseMinimumDCFCLK_params->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal;
+ UseMinimumDCFCLK_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters;
+ UseMinimumDCFCLK_params->MaxPrefetchMode = dml_prefetch_support_stutter;
+ UseMinimumDCFCLK_params->DRAMClockChangeLatencyFinal = mode_lib->ms.state.dram_clock_change_latency_us;
+ UseMinimumDCFCLK_params->FCLKChangeLatency = mode_lib->ms.state.fclk_change_latency_us;
+ UseMinimumDCFCLK_params->SREnterPlusExitTime = mode_lib->ms.state.sr_enter_plus_exit_time_us;
+ UseMinimumDCFCLK_params->ReturnBusWidth = mode_lib->ms.soc.return_bus_width_bytes;
+ UseMinimumDCFCLK_params->RoundTripPingLatencyCycles = mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles;
+ UseMinimumDCFCLK_params->ReorderingBytes = s->ReorderingBytes;
+ UseMinimumDCFCLK_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes;
+ UseMinimumDCFCLK_params->MetaChunkSize = mode_lib->ms.ip.meta_chunk_size_kbytes;
+ UseMinimumDCFCLK_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
+ UseMinimumDCFCLK_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
+ UseMinimumDCFCLK_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
+ UseMinimumDCFCLK_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
+ UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
+ UseMinimumDCFCLK_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
+ UseMinimumDCFCLK_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
+ UseMinimumDCFCLK_params->ImmediateFlipRequirement = s->ImmediateFlipRequiredFinal;
+ UseMinimumDCFCLK_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
+ UseMinimumDCFCLK_params->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation = mode_lib->ms.soc.max_avg_sdp_bw_use_normal_percent;
+ UseMinimumDCFCLK_params->PercentOfIdealSDPPortBWReceivedAfterUrgLatency = mode_lib->ms.soc.pct_ideal_sdp_bw_after_urgent;
+ UseMinimumDCFCLK_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal;
+ UseMinimumDCFCLK_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive;
+ UseMinimumDCFCLK_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes;
+ UseMinimumDCFCLK_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired;
+ UseMinimumDCFCLK_params->Interlace = mode_lib->ms.cache_display_cfg.timing.Interlace;
+ UseMinimumDCFCLK_params->RequiredDPPCLKPerSurface = mode_lib->ms.RequiredDPPCLKPerSurface;
+ UseMinimumDCFCLK_params->RequiredDISPCLK = mode_lib->ms.RequiredDISPCLK;
+ UseMinimumDCFCLK_params->UrgLatency = mode_lib->ms.UrgLatency;
+ UseMinimumDCFCLK_params->NoOfDPP = mode_lib->ms.NoOfDPP;
+ UseMinimumDCFCLK_params->ProjectedDCFCLKDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep;
+ UseMinimumDCFCLK_params->MaximumVStartup = s->MaximumVStartup;
+ UseMinimumDCFCLK_params->TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP;
+ UseMinimumDCFCLK_params->TotalNumberOfDCCActiveDPP = mode_lib->ms.TotalNumberOfDCCActiveDPP;
+ UseMinimumDCFCLK_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
+ UseMinimumDCFCLK_params->PrefetchLinesY = mode_lib->ms.PrefetchLinesY;
+ UseMinimumDCFCLK_params->PrefetchLinesC = mode_lib->ms.PrefetchLinesC;
+ UseMinimumDCFCLK_params->swath_width_luma_ub_all_states = mode_lib->ms.swath_width_luma_ub_all_states;
+ UseMinimumDCFCLK_params->swath_width_chroma_ub_all_states = mode_lib->ms.swath_width_chroma_ub_all_states;
+ UseMinimumDCFCLK_params->BytePerPixelY = mode_lib->ms.BytePerPixelY;
+ UseMinimumDCFCLK_params->BytePerPixelC = mode_lib->ms.BytePerPixelC;
+ UseMinimumDCFCLK_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal;
+ UseMinimumDCFCLK_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock;
+ UseMinimumDCFCLK_params->PDEAndMetaPTEBytesPerFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrame;
+ UseMinimumDCFCLK_params->DPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow;
+ UseMinimumDCFCLK_params->MetaRowBytes = mode_lib->ms.MetaRowBytes;
+ UseMinimumDCFCLK_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable;
+ UseMinimumDCFCLK_params->ReadBandwidthLuma = mode_lib->ms.ReadBandwidthLuma;
+ UseMinimumDCFCLK_params->ReadBandwidthChroma = mode_lib->ms.ReadBandwidthChroma;
+ UseMinimumDCFCLK_params->DCFCLKPerState = mode_lib->ms.state.dcfclk_mhz;
+ UseMinimumDCFCLK_params->DCFCLKState = mode_lib->ms.DCFCLKState;
+
+ UseMinimumDCFCLK(&mode_lib->scratch,
+ UseMinimumDCFCLK_params);
+
+ } // UseMinimumRequiredDCFCLK == true
+
+ for (j = 0; j < 2; ++j) {
+ mode_lib->ms.ReturnBWPerState[j] = dml_get_return_bw_mbps(&mode_lib->ms.soc, mode_lib->ms.state.use_ideal_dram_bw_strobe,
+ mode_lib->ms.cache_display_cfg.plane.HostVMEnable, mode_lib->ms.DCFCLKState[j], mode_lib->ms.state.fabricclk_mhz,
+ mode_lib->ms.state.dram_speed_mts);
+ mode_lib->ms.ReturnDRAMBWPerState[j] = dml_get_return_dram_bw_mbps(&mode_lib->ms.soc, mode_lib->ms.state.use_ideal_dram_bw_strobe,
+ mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
+ mode_lib->ms.state.dram_speed_mts);
+ }
+
+ //Re-ordering Buffer Support Check
+ for (j = 0; j < 2; ++j) {
+ if ((mode_lib->ms.ip.rob_buffer_size_kbytes - mode_lib->ms.ip.pixel_chunk_size_kbytes) * 1024 / mode_lib->ms.ReturnBWPerState[j] >
+ (mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles + 32) / mode_lib->ms.DCFCLKState[j] + s->ReorderingBytes / mode_lib->ms.ReturnBWPerState[j]) {
+ mode_lib->ms.support.ROBSupport[j] = true;
+ } else {
+ mode_lib->ms.support.ROBSupport[j] = false;
+ }
+ dml_print("DML::%s: DEBUG ROBSupport[%u] = %u (%u)\n", __func__, j, mode_lib->ms.support.ROBSupport[j], __LINE__);
+ }
+
+ //Vertical Active BW support check
+ s->MaxTotalVActiveRDBandwidth = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ s->MaxTotalVActiveRDBandwidth = s->MaxTotalVActiveRDBandwidth + mode_lib->ms.ReadBandwidthLuma[k] + mode_lib->ms.ReadBandwidthChroma[k];
+ }
+
+ for (j = 0; j < 2; ++j) {
+ mode_lib->ms.support.MaxTotalVerticalActiveAvailableBandwidth[j] = dml_min3(mode_lib->ms.soc.return_bus_width_bytes * mode_lib->ms.DCFCLKState[j] * mode_lib->ms.soc.max_avg_sdp_bw_use_normal_percent / 100.0,
+ mode_lib->ms.state.fabricclk_mhz * mode_lib->ms.soc.fabric_datapath_to_dcn_data_return_bytes * mode_lib->ms.soc.max_avg_fabric_bw_use_normal_percent / 100.0,
+ mode_lib->ms.state.dram_speed_mts * mode_lib->ms.soc.num_chans * mode_lib->ms.soc.dram_channel_width_bytes *
+ ((mode_lib->ms.state.use_ideal_dram_bw_strobe && !mode_lib->ms.cache_display_cfg.plane.HostVMEnable) ?
+ mode_lib->ms.soc.max_avg_dram_bw_use_normal_strobe_percent : mode_lib->ms.soc.max_avg_dram_bw_use_normal_percent) / 100.0);
+
+ if (s->MaxTotalVActiveRDBandwidth <= mode_lib->ms.support.MaxTotalVerticalActiveAvailableBandwidth[j]) {
+ mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] = true;
+ } else {
+ mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] = false;
+ }
+ }
+
+ /* Prefetch Check */
+ dml_prefetch_check(mode_lib);
+
+ // End of Prefetch Check
+ dml_print("DML::%s: Done prefetch calculation\n", __func__);
+
+ /*Cursor Support Check*/
+ mode_lib->ms.support.CursorSupport = true;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] > 0.0) {
+ if (mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] == 64 && mode_lib->ms.ip.cursor_64bpp_support == false) {
+ mode_lib->ms.support.CursorSupport = false;
+ }
+ }
+ }
+
+ /*Valid Pitch Check*/
+ mode_lib->ms.support.PitchSupport = true;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ mode_lib->ms.support.AlignedYPitch[k] = dml_ceil(
+ dml_max(mode_lib->ms.cache_display_cfg.surface.PitchY[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k]),
+ mode_lib->ms.MacroTileWidthY[k]);
+ if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) {
+ mode_lib->ms.support.AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k]), 64.0 * mode_lib->ms.Read256BlockWidthY[k]);
+ } else {
+ mode_lib->ms.support.AlignedDCCMetaPitchY[k] = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k];
+ }
+ if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_16
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_8) {
+ mode_lib->ms.support.AlignedCPitch[k] = dml_ceil(dml_max(mode_lib->ms.cache_display_cfg.surface.PitchC[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k]), mode_lib->ms.MacroTileWidthC[k]);
+ if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) {
+ mode_lib->ms.support.AlignedDCCMetaPitchC[k] = dml_ceil(dml_max(mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k]), 64.0 * mode_lib->ms.Read256BlockWidthC[k]);
+ } else {
+ mode_lib->ms.support.AlignedDCCMetaPitchC[k] = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k];
+ }
+ } else {
+ mode_lib->ms.support.AlignedCPitch[k] = mode_lib->ms.cache_display_cfg.surface.PitchC[k];
+ mode_lib->ms.support.AlignedDCCMetaPitchC[k] = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k];
+ }
+ if (mode_lib->ms.support.AlignedYPitch[k] > mode_lib->ms.cache_display_cfg.surface.PitchY[k] || mode_lib->ms.support.AlignedCPitch[k] > mode_lib->ms.cache_display_cfg.surface.PitchC[k] ||
+ mode_lib->ms.support.AlignedDCCMetaPitchY[k] > mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k] || mode_lib->ms.support.AlignedDCCMetaPitchC[k] > mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k]) {
+ mode_lib->ms.support.PitchSupport = false;
+ }
+ }
+
+ mode_lib->ms.support.ViewportExceedsSurface = false;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.cache_display_cfg.plane.ViewportWidth[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k] || mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY[k]) {
+ mode_lib->ms.support.ViewportExceedsSurface = true;
+ if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32 &&
+ mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_8 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe) {
+ if (mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k] || mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC[k]) {
+ mode_lib->ms.support.ViewportExceedsSurface = true;
+ }
+ }
+ }
+ }
+
+ /*Mode Support, Voltage State and SOC Configuration*/
+ for (j = 0; j < 2; j++) { // j iterator is for the combine mode off or on
+ dml_print("DML::%s: checking support for j=%u\n", __func__, j);
+ dml_print("DML::%s: state_idx=%0d max_state_idx=%0d\n", __func__, mode_lib->ms.state_idx, mode_lib->ms.max_state_idx);
+
+ s->is_max_pwr_state = (mode_lib->ms.max_state_idx == mode_lib->ms.state_idx);
+ s->is_max_dram_pwr_state = (mode_lib->ms.max_state.dram_speed_mts == mode_lib->ms.state.dram_speed_mts);
+
+ s->dram_clock_change_support = (!mode_lib->ms.policy.DRAMClockChangeRequirementFinal ||
+ (s->is_max_dram_pwr_state && mode_lib->policy.AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported) ||
+ mode_lib->ms.support.DRAMClockChangeSupport[j] != dml_dram_clock_change_unsupported);
+ s->f_clock_change_support = (!mode_lib->ms.policy.FCLKChangeRequirementFinal ||
+ (s->is_max_pwr_state && mode_lib->policy.AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported) ||
+ mode_lib->ms.support.FCLKChangeSupport[j] != dml_fclock_change_unsupported);
+
+ if (mode_lib->ms.support.ScaleRatioAndTapsSupport == true
+ && mode_lib->ms.support.SourceFormatPixelAndScanSupport == true
+ && mode_lib->ms.support.ViewportSizeSupport[j] == true
+ && !mode_lib->ms.support.LinkRateDoesNotMatchDPVersion
+ && !mode_lib->ms.support.LinkRateForMultistreamNotIndicated
+ && !mode_lib->ms.support.BPPForMultistreamNotIndicated
+ && !mode_lib->ms.support.MultistreamWithHDMIOreDP
+ && !mode_lib->ms.support.ExceededMultistreamSlots
+ && !mode_lib->ms.support.MSOOrODMSplitWithNonDPLink
+ && !mode_lib->ms.support.NotEnoughLanesForMSO
+ && mode_lib->ms.support.LinkCapacitySupport == true
+ && !mode_lib->ms.support.P2IWith420
+ && !mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP
+ && !mode_lib->ms.support.DSC422NativeNotSupported
+ && !mode_lib->ms.support.MPCCombineMethodIncompatible
+ && mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK == true
+ && mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK == true
+ && mode_lib->ms.support.NotEnoughDSCUnits == false
+ && !mode_lib->ms.support.NotEnoughDSCSlices
+ && !mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe
+ && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen
+ && mode_lib->ms.support.DSCCLKRequiredMoreThanSupported == false
+ && mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport
+ && mode_lib->ms.support.DTBCLKRequiredMoreThanSupported == false
+ && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPState
+ && !mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified
+ && mode_lib->ms.support.ROBSupport[j] == true
+ && mode_lib->ms.support.DISPCLK_DPPCLK_Support[j] == true
+ && mode_lib->ms.support.TotalAvailablePipesSupport[j] == true
+ && mode_lib->ms.support.NumberOfOTGSupport == true
+ && mode_lib->ms.support.NumberOfHDMIFRLSupport == true
+ && mode_lib->ms.support.NumberOfDP2p0Support == true
+ && mode_lib->ms.support.EnoughWritebackUnits == true
+ && mode_lib->ms.support.WritebackLatencySupport == true
+ && mode_lib->ms.support.WritebackScaleRatioAndTapsSupport == true
+ && mode_lib->ms.support.CursorSupport == true
+ && mode_lib->ms.support.PitchSupport == true
+ && mode_lib->ms.support.ViewportExceedsSurface == false
+ && mode_lib->ms.support.PrefetchSupported[j] == true
+ && mode_lib->ms.support.VActiveBandwithSupport[j] == true
+ && mode_lib->ms.support.DynamicMetadataSupported[j] == true
+ && mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] == true
+ && mode_lib->ms.support.VRatioInPrefetchSupported[j] == true
+ && mode_lib->ms.support.PTEBufferSizeNotExceeded[j] == true
+ && mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] == true
+ && mode_lib->ms.support.NonsupportedDSCInputBPC == false
+ && !mode_lib->ms.support.ExceededMALLSize
+ && ((mode_lib->ms.cache_display_cfg.plane.HostVMEnable == false && !s->ImmediateFlipRequiredFinal) || mode_lib->ms.support.ImmediateFlipSupportedForState[j])
+ && s->dram_clock_change_support == true
+ && s->f_clock_change_support == true
+ && (!mode_lib->ms.policy.USRRetrainingRequiredFinal || mode_lib->ms.support.USRRetrainingSupport[j])) {
+ dml_print("DML::%s: mode is supported\n", __func__);
+ mode_lib->ms.support.ModeSupport[j] = true;
+ } else {
+ dml_print("DML::%s: mode is NOT supported\n", __func__);
+ mode_lib->ms.support.ModeSupport[j] = false;
+ dml_print_mode_support(mode_lib, j);
+ }
+ }
+
+ mode_lib->ms.support.MaximumMPCCombine = 0;
+ mode_lib->ms.support.ModeIsSupported = 0;
+ if (mode_lib->ms.support.ModeSupport[0] == true || mode_lib->ms.support.ModeSupport[1] == true) { // if the mode is supported by either no combine or mpccombine
+ mode_lib->ms.support.ModeIsSupported = mode_lib->ms.support.ModeSupport[0] == true || mode_lib->ms.support.ModeSupport[1] == true;
+
+ // Determine if MPC combine is necessary, depends on if using MPC combine will help dram clock change or fclk change, etc.
+ if ((mode_lib->ms.support.ModeSupport[0] == false && mode_lib->ms.support.ModeSupport[1] == true) || s->MPCCombineMethodAsPossible ||
+ (s->MPCCombineMethodAsNeededForPStateChangeAndVoltage && mode_lib->ms.policy.DRAMClockChangeRequirementFinal &&
+ (((mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vactive || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vactive_w_mall_full_frame || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vactive_w_mall_sub_vp) &&
+ !(mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive || mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive_w_mall_full_frame || mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive_w_mall_sub_vp)) ||
+ ((mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_drr
+ || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_w_mall_full_frame || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_drr_w_mall_full_frame
+ || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_w_mall_sub_vp || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_drr_w_mall_sub_vp
+ ) &&
+ mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_unsupported)))
+ || (s->MPCCombineMethodAsNeededForPStateChangeAndVoltage && mode_lib->ms.policy.FCLKChangeRequirementFinal &&
+ ((mode_lib->ms.support.FCLKChangeSupport[1] == dml_fclock_change_vactive && mode_lib->ms.support.FCLKChangeSupport[0] != dml_fclock_change_vactive) ||
+ (mode_lib->ms.support.FCLKChangeSupport[1] == dml_fclock_change_vblank && mode_lib->ms.support.FCLKChangeSupport[0] == dml_fclock_change_unsupported)))) {
+ mode_lib->ms.support.MaximumMPCCombine = 1;
+ } else {
+ mode_lib->ms.support.MaximumMPCCombine = 0;
+ }
+ }
+
+ // Since now the mode_support work on 1 particular power state, so there is only 1 state idx (index 0).
+ mode_lib->ms.support.ImmediateFlipSupport = mode_lib->ms.support.ImmediateFlipSupportedForState[mode_lib->ms.support.MaximumMPCCombine]; // Consider flip support if max combine support imm flip
+ mode_lib->ms.support.UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabledAllStates[mode_lib->ms.support.MaximumMPCCombine]; // Not used, informational
+ mode_lib->ms.support.CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByteAllStates[mode_lib->ms.support.MaximumMPCCombine]; // Not used, informational
+
+ dml_print("DML::%s: ModeIsSupported = %u\n", __func__, mode_lib->ms.support.ModeIsSupported);
+ dml_print("DML::%s: MaximumMPCCombine = %u\n", __func__, mode_lib->ms.support.MaximumMPCCombine);
+ dml_print("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport);
+ dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, mode_lib->ms.support.UnboundedRequestEnabled);
+ dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, mode_lib->ms.support.CompressedBufferSizeInkByte);
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[mode_lib->ms.support.MaximumMPCCombine][k];
+ mode_lib->ms.support.DPPPerSurface[k] = mode_lib->ms.NoOfDPP[mode_lib->ms.support.MaximumMPCCombine][k];
+ mode_lib->ms.SwathHeightY[k] = mode_lib->ms.SwathHeightYAllStates[mode_lib->ms.support.MaximumMPCCombine][k];
+ mode_lib->ms.SwathHeightC[k] = mode_lib->ms.SwathHeightCAllStates[mode_lib->ms.support.MaximumMPCCombine][k];
+ mode_lib->ms.DETBufferSizeInKByte[k] = mode_lib->ms.DETBufferSizeInKByteAllStates[mode_lib->ms.support.MaximumMPCCombine][k];
+ mode_lib->ms.DETBufferSizeY[k] = mode_lib->ms.DETBufferSizeYAllStates[mode_lib->ms.support.MaximumMPCCombine][k];
+ mode_lib->ms.DETBufferSizeC[k] = mode_lib->ms.DETBufferSizeCAllStates[mode_lib->ms.support.MaximumMPCCombine][k];
+ }
+
+ mode_lib->ms.DRAMSpeed = mode_lib->ms.state.dram_speed_mts;
+ mode_lib->ms.FabricClock = mode_lib->ms.state.fabricclk_mhz;
+ mode_lib->ms.SOCCLK = mode_lib->ms.state.socclk_mhz;
+ mode_lib->ms.DCFCLK = mode_lib->ms.DCFCLKState[mode_lib->ms.support.MaximumMPCCombine];
+ mode_lib->ms.ReturnBW = mode_lib->ms.ReturnBWPerState[mode_lib->ms.support.MaximumMPCCombine];
+ mode_lib->ms.ReturnDRAMBW = mode_lib->ms.ReturnDRAMBWPerState[mode_lib->ms.support.MaximumMPCCombine];
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
+ mode_lib->ms.support.ODMMode[k] = mode_lib->ms.ODMModePerState[k];
+ } else {
+ mode_lib->ms.support.ODMMode[k] = dml_odm_mode_bypass;
+ }
+
+ mode_lib->ms.support.DSCEnabled[k] = mode_lib->ms.RequiresDSC[k];
+ mode_lib->ms.support.FECEnabled[k] = mode_lib->ms.RequiresFEC[k];
+ mode_lib->ms.support.OutputBpp[k] = mode_lib->ms.OutputBppPerState[k];
+ mode_lib->ms.support.OutputType[k] = mode_lib->ms.OutputTypePerState[k];
+ mode_lib->ms.support.OutputRate[k] = mode_lib->ms.OutputRatePerState[k];
+ mode_lib->ms.support.SubViewportLinesNeededInMALL[k] = mode_lib->ms.SubViewportLinesNeededInMALL[k];
+ }
+
+ return mode_lib->ms.support.ModeIsSupported;
+} // dml_core_mode_support
+
+/// @brief This function calculates some parameters thats are needed ahead of the mode programming function all
+void dml_core_mode_support_partial(struct display_mode_lib_st *mode_lib)
+{
+ CalculateMaxDETAndMinCompressedBufferSize(
+ mode_lib->ms.ip.config_return_buffer_size_in_kbytes,
+ mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes,
+ mode_lib->ms.ip.rob_buffer_size_kbytes,
+ mode_lib->ms.ip.max_num_dpp,
+ mode_lib->ms.policy.NomDETInKByteOverrideEnable,
+ mode_lib->ms.policy.NomDETInKByteOverrideValue,
+
+ /* Output */
+ &mode_lib->ms.MaxTotalDETInKByte,
+ &mode_lib->ms.NomDETInKByte,
+ &mode_lib->ms.MinCompressedBufferSizeInKByte);
+
+ PixelClockAdjustmentForProgressiveToInterlaceUnit(&mode_lib->ms.cache_display_cfg, mode_lib->ms.ip.ptoi_supported);
+
+ mode_lib->ms.ReturnBW = dml_get_return_bw_mbps(&mode_lib->ms.soc,
+ mode_lib->ms.state.use_ideal_dram_bw_strobe,
+ mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
+ mode_lib->ms.DCFCLK,
+ mode_lib->ms.FabricClock,
+ mode_lib->ms.DRAMSpeed);
+ dml_print("DML::%s: ReturnBW = %f\n", __func__, mode_lib->ms.ReturnBW);
+
+} // dml_core_mode_support_partial
+
+/// @brief This is the mode programming function. It is assumed the display cfg is support at the given power state
+void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struct dml_clk_cfg_st *clk_cfg)
+{
+ struct dml_core_mode_programming_locals_st *s = &mode_lib->scratch.dml_core_mode_programming_locals;
+ struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
+ struct CalculateVMRowAndSwath_params_st *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
+ struct CalculateSwathAndDETConfiguration_params_st *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
+ struct CalculateStutterEfficiency_params_st *CalculateStutterEfficiency_params = &mode_lib->scratch.CalculateStutterEfficiency_params;
+ struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
+
+ struct mode_program_st *locals = &mode_lib->mp;
+ struct DmlPipe *myPipe;
+ dml_uint_t j = 0, k = 0;
+ dml_float_t TWait;
+ dml_bool_t isInterlaceTiming;
+
+ mode_lib->ms.num_active_planes = dml_get_num_active_planes(&mode_lib->ms.cache_display_cfg);
+ mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(&mode_lib->ms.cache_display_cfg);
+ dml_calc_pipe_plane_mapping(&mode_lib->ms.cache_display_cfg.hw, mode_lib->mp.pipe_plane);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: --- START --- \n", __func__);
+ dml_print("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes);
+ dml_print("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes);
+#endif
+
+ s->DSCFormatFactor = 0;
+
+ // Unlike dppclk and dispclk which can be calculated in mode_programming
+ // DCFCLK is calculated in mode_support (which is the state bbox dcfclk or min dcfclk if min dcfclk option is used in mode support calculation)
+ if (clk_cfg->dcfclk_option != dml_use_override_freq)
+ locals->Dcfclk = mode_lib->ms.DCFCLK;
+ else
+ locals->Dcfclk = clk_cfg->dcfclk_mhz;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print_dml_policy(&mode_lib->ms.policy);
+ dml_print_soc_state_bounding_box(&mode_lib->ms.state);
+ dml_print_soc_bounding_box(&mode_lib->ms.soc);
+ dml_print_clk_cfg(clk_cfg);
+
+ dml_print("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport);
+ dml_print("DML::%s: Using DCFCLK = %f\n", __func__, locals->Dcfclk);
+ dml_print("DML::%s: Using SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK);
+#endif
+
+ locals->WritebackDISPCLK = 0.0;
+ locals->GlobalDPPCLK = 0.0;
+
+ // DISPCLK and DPPCLK Calculation
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k]) {
+ locals->WritebackDISPCLK =
+ dml_max(
+ locals->WritebackDISPCLK,
+ CalculateWriteBackDISPCLK(
+ mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k],
+ mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackSourceWidth[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[k],
+ mode_lib->ms.ip.writeback_line_buffer_buffer_size,
+ mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz));
+ }
+ }
+
+ locals->Dispclk_calculated = locals->WritebackDISPCLK;
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
+ locals->Dispclk_calculated = dml_max(locals->Dispclk_calculated, CalculateRequiredDispclk(
+ mode_lib->ms.cache_display_cfg.hw.ODMMode[k],
+ mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ mode_lib->ms.soc.dcn_downspread_percent,
+ mode_lib->ms.ip.dispclk_ramp_margin_percent,
+ mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz,
+ mode_lib->ms.max_state.dispclk_mhz));
+ }
+ }
+ if (clk_cfg->dispclk_option == dml_use_required_freq)
+ locals->Dispclk = locals->Dispclk_calculated;
+ else if (clk_cfg->dispclk_option == dml_use_override_freq)
+ locals->Dispclk = clk_cfg->dispclk_mhz;
+ else
+ locals->Dispclk = mode_lib->ms.state.dispclk_mhz;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Using Dispclk = %f\n", __func__, locals->Dispclk);
+#endif
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ CalculateSinglePipeDPPCLKAndSCLThroughput(
+ mode_lib->ms.cache_display_cfg.plane.HRatio[k],
+ mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k],
+ mode_lib->ms.cache_display_cfg.plane.VRatio[k],
+ mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
+ mode_lib->ms.ip.max_dchub_pscl_bw_pix_per_clk,
+ mode_lib->ms.ip.max_pscl_lb_bw_pix_per_clk,
+ mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
+ mode_lib->ms.cache_display_cfg.plane.HTaps[k],
+ mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k],
+ mode_lib->ms.cache_display_cfg.plane.VTaps[k],
+ mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k],
+
+ /* Output */
+ &locals->PSCL_THROUGHPUT[k],
+ &locals->PSCL_THROUGHPUT_CHROMA[k],
+ &locals->DPPCLKUsingSingleDPP[k]);
+ }
+
+ CalculateDPPCLK(mode_lib->ms.num_active_planes,
+ mode_lib->ms.soc.dcn_downspread_percent,
+ mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz,
+ locals->DPPCLKUsingSingleDPP,
+ mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
+ /* Output */
+ &locals->GlobalDPPCLK,
+ locals->Dppclk_calculated);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (clk_cfg->dppclk_option[k] == dml_use_required_freq)
+ locals->Dppclk[k] = locals->Dppclk_calculated[k];
+ else if (clk_cfg->dppclk_option[k] == dml_use_override_freq)
+ locals->Dppclk[k] = clk_cfg->dppclk_mhz[k];
+ else
+ locals->Dppclk[k] = mode_lib->ms.state.dppclk_mhz;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Using Dppclk[%0d] = %f\n", __func__, k, locals->Dppclk[k]);
+#endif
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ CalculateBytePerPixelAndBlockSizes(
+ mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
+ mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k],
+
+ /* Output */
+ &locals->BytePerPixelY[k],
+ &locals->BytePerPixelC[k],
+ &locals->BytePerPixelDETY[k],
+ &locals->BytePerPixelDETC[k],
+ &locals->BlockHeight256BytesY[k],
+ &locals->BlockHeight256BytesC[k],
+ &locals->BlockWidth256BytesY[k],
+ &locals->BlockWidth256BytesC[k],
+ &locals->BlockHeightY[k],
+ &locals->BlockHeightC[k],
+ &locals->BlockWidthY[k],
+ &locals->BlockWidthC[k]);
+ }
+
+
+ dml_print("DML::%s: %u\n", __func__, __LINE__);
+ CalculateSwathWidth(
+ false, // ForceSingleDPP
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat,
+ mode_lib->ms.cache_display_cfg.plane.SourceScan,
+ mode_lib->ms.cache_display_cfg.plane.ViewportStationary,
+ mode_lib->ms.cache_display_cfg.plane.ViewportWidth,
+ mode_lib->ms.cache_display_cfg.plane.ViewportHeight,
+ mode_lib->ms.cache_display_cfg.plane.ViewportXStart,
+ mode_lib->ms.cache_display_cfg.plane.ViewportYStart,
+ mode_lib->ms.cache_display_cfg.plane.ViewportXStartC,
+ mode_lib->ms.cache_display_cfg.plane.ViewportYStartC,
+ mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY,
+ mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC,
+ mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY,
+ mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC,
+ mode_lib->ms.cache_display_cfg.hw.ODMMode,
+ locals->BytePerPixelY,
+ locals->BytePerPixelC,
+ locals->BlockHeight256BytesY,
+ locals->BlockHeight256BytesC,
+ locals->BlockWidth256BytesY,
+ locals->BlockWidth256BytesC,
+ mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming,
+ mode_lib->ms.cache_display_cfg.timing.HActive,
+ mode_lib->ms.cache_display_cfg.plane.HRatio,
+ mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
+
+ /* Output */
+ locals->SwathWidthSingleDPPY,
+ locals->SwathWidthSingleDPPC,
+ locals->SwathWidthY,
+ locals->SwathWidthC,
+ s->dummy_integer_array[0], // dml_uint_t MaximumSwathHeightY[]
+ s->dummy_integer_array[1], // dml_uint_t MaximumSwathHeightC[]
+ locals->swath_width_luma_ub,
+ locals->swath_width_chroma_ub);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ locals->ReadBandwidthSurfaceLuma[k] = locals->SwathWidthSingleDPPY[k] * locals->BytePerPixelY[k] / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k];
+ locals->ReadBandwidthSurfaceChroma[k] = locals->SwathWidthSingleDPPC[k] * locals->BytePerPixelC[k] / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k];
+ dml_print("DML::%s: ReadBandwidthSurfaceLuma[%i] = %fBps\n", __func__, k, locals->ReadBandwidthSurfaceLuma[k]);
+ dml_print("DML::%s: ReadBandwidthSurfaceChroma[%i] = %fBps\n", __func__, k, locals->ReadBandwidthSurfaceChroma[k]);
+ }
+
+ CalculateSwathAndDETConfiguration_params->DETSizeOverride = mode_lib->ms.cache_display_cfg.plane.DETSizeOverride;
+ CalculateSwathAndDETConfiguration_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
+ CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ms.ip.config_return_buffer_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte;
+ CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte;
+ CalculateSwathAndDETConfiguration_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false;
+ CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
+ CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte;
+ CalculateSwathAndDETConfiguration_params->UseUnboundedRequestingFinal = mode_lib->ms.policy.UseUnboundedRequesting;
+ CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByteFinal = mode_lib->ms.ip.compressed_buffer_segment_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->Output = s->dummy_output_encoder_array;
+ CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = locals->ReadBandwidthSurfaceLuma;
+ CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = locals->ReadBandwidthSurfaceChroma;
+ CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = s->dummy_single_array[0];
+ CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = s->dummy_single_array[1];
+ CalculateSwathAndDETConfiguration_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan;
+ CalculateSwathAndDETConfiguration_params->ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary;
+ CalculateSwathAndDETConfiguration_params->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat;
+ CalculateSwathAndDETConfiguration_params->SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling;
+ CalculateSwathAndDETConfiguration_params->ViewportWidth = mode_lib->ms.cache_display_cfg.plane.ViewportWidth;
+ CalculateSwathAndDETConfiguration_params->ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight;
+ CalculateSwathAndDETConfiguration_params->ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart;
+ CalculateSwathAndDETConfiguration_params->ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart;
+ CalculateSwathAndDETConfiguration_params->ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC;
+ CalculateSwathAndDETConfiguration_params->ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC;
+ CalculateSwathAndDETConfiguration_params->SurfaceWidthY = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY;
+ CalculateSwathAndDETConfiguration_params->SurfaceWidthC = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC;
+ CalculateSwathAndDETConfiguration_params->SurfaceHeightY = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY;
+ CalculateSwathAndDETConfiguration_params->SurfaceHeightC = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = locals->BlockHeight256BytesY;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = locals->BlockHeight256BytesC;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = locals->BlockWidth256BytesY;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = locals->BlockWidth256BytesC;
+ CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.cache_display_cfg.hw.ODMMode;
+ CalculateSwathAndDETConfiguration_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
+ CalculateSwathAndDETConfiguration_params->BytePerPixY = locals->BytePerPixelY;
+ CalculateSwathAndDETConfiguration_params->BytePerPixC = locals->BytePerPixelC;
+ CalculateSwathAndDETConfiguration_params->BytePerPixDETY = locals->BytePerPixelDETY;
+ CalculateSwathAndDETConfiguration_params->BytePerPixDETC = locals->BytePerPixelDETC;
+ CalculateSwathAndDETConfiguration_params->HActive = mode_lib->ms.cache_display_cfg.timing.HActive;
+ CalculateSwathAndDETConfiguration_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio;
+ CalculateSwathAndDETConfiguration_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma;
+ CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface;
+ CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_long_array[0];
+ CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_long_array[1];
+ CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_long_array[2];
+ CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_long_array[3];
+ CalculateSwathAndDETConfiguration_params->SwathHeightY = locals->SwathHeightY;
+ CalculateSwathAndDETConfiguration_params->SwathHeightC = locals->SwathHeightC;
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = locals->DETBufferSizeInKByte;
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeY = locals->DETBufferSizeY;
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeC = locals->DETBufferSizeC;
+ CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &locals->UnboundedRequestEnabled;
+ CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &locals->compbuf_reserved_space_64b;
+ CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_zs = &locals->compbuf_reserved_space_zs;
+ CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &locals->CompressedBufferSizeInkByte;
+ CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = &s->dummy_boolean_array[0][0];
+ CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[0];
+
+ // VBA_DELTA
+ // Calculate DET size, swath height here. In VBA, they are calculated in mode check stage
+ CalculateSwathAndDETConfiguration(&mode_lib->scratch,
+ CalculateSwathAndDETConfiguration_params);
+
+ // DCFCLK Deep Sleep
+ CalculateDCFCLKDeepSleep(
+ mode_lib->ms.num_active_planes,
+ locals->BytePerPixelY,
+ locals->BytePerPixelC,
+ mode_lib->ms.cache_display_cfg.plane.VRatio,
+ mode_lib->ms.cache_display_cfg.plane.VRatioChroma,
+ locals->SwathWidthY,
+ locals->SwathWidthC,
+ mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
+ mode_lib->ms.cache_display_cfg.plane.HRatio,
+ mode_lib->ms.cache_display_cfg.plane.HRatioChroma,
+ mode_lib->ms.cache_display_cfg.timing.PixelClock,
+ locals->PSCL_THROUGHPUT,
+ locals->PSCL_THROUGHPUT_CHROMA,
+ locals->Dppclk,
+ locals->ReadBandwidthSurfaceLuma,
+ locals->ReadBandwidthSurfaceChroma,
+ mode_lib->ms.soc.return_bus_width_bytes,
+
+ /* Output */
+ &locals->DCFCLKDeepSleep);
+
+ // DSCCLK
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if ((mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] != k) || !mode_lib->ms.cache_display_cfg.hw.DSCEnabled[k]) {
+ locals->DSCCLK_calculated[k] = 0.0;
+ } else {
+ if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_420)
+ s->DSCFormatFactor = 2;
+ else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_444)
+ s->DSCFormatFactor = 1;
+ else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_n422 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl)
+ s->DSCFormatFactor = 2;
+ else
+ s->DSCFormatFactor = 1;
+ if (mode_lib->ms.cache_display_cfg.hw.ODMMode[k] == dml_odm_mode_combine_4to1)
+ locals->DSCCLK_calculated[k] = mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 12 / s->DSCFormatFactor / (1 - mode_lib->ms.soc.dcn_downspread_percent / 100);
+ else if (mode_lib->ms.cache_display_cfg.hw.ODMMode[k] == dml_odm_mode_combine_2to1)
+ locals->DSCCLK_calculated[k] = mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 6 / s->DSCFormatFactor / (1 - mode_lib->ms.soc.dcn_downspread_percent / 100);
+ else
+ locals->DSCCLK_calculated[k] = mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 3 / s->DSCFormatFactor / (1 - mode_lib->ms.soc.dcn_downspread_percent / 100);
+ }
+ }
+
+ // DSC Delay
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ locals->DSCDelay[k] = DSCDelayRequirement(mode_lib->ms.cache_display_cfg.hw.DSCEnabled[k],
+ mode_lib->ms.cache_display_cfg.hw.ODMMode[k],
+ mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k],
+ mode_lib->ms.cache_display_cfg.output.OutputBpp[k],
+ mode_lib->ms.cache_display_cfg.timing.HActive[k],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[k],
+ mode_lib->ms.cache_display_cfg.hw.NumberOfDSCSlices[k],
+ mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k],
+ mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k]);
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k)
+ for (j = 0; j < mode_lib->ms.num_active_planes; ++j) // NumberOfSurfaces
+ if (j != k && mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == j && mode_lib->ms.cache_display_cfg.hw.DSCEnabled[j])
+ locals->DSCDelay[k] = locals->DSCDelay[j];
+
+ // Prefetch
+ CalculateSurfaceSizeInMall(
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.soc.mall_allocated_for_dcn_mbytes,
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen,
+ mode_lib->ms.cache_display_cfg.surface.DCCEnable,
+ mode_lib->ms.cache_display_cfg.plane.ViewportStationary,
+ mode_lib->ms.cache_display_cfg.plane.ViewportXStart,
+ mode_lib->ms.cache_display_cfg.plane.ViewportYStart,
+ mode_lib->ms.cache_display_cfg.plane.ViewportXStartC,
+ mode_lib->ms.cache_display_cfg.plane.ViewportYStartC,
+ mode_lib->ms.cache_display_cfg.plane.ViewportWidth,
+ mode_lib->ms.cache_display_cfg.plane.ViewportHeight,
+ locals->BytePerPixelY,
+ mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma,
+ mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma,
+ locals->BytePerPixelC,
+ mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY,
+ mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC,
+ mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY,
+ mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC,
+ locals->BlockWidth256BytesY,
+ locals->BlockWidth256BytesC,
+ locals->BlockHeight256BytesY,
+ locals->BlockHeight256BytesC,
+ locals->BlockWidthY,
+ locals->BlockWidthC,
+ locals->BlockHeightY,
+ locals->BlockHeightC,
+
+ /* Output */
+ locals->SurfaceSizeInTheMALL,
+ &s->dummy_boolean[0]); /* dml_bool_t *ExceededMALLSize */
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ s->SurfaceParameters[k].PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k];
+ s->SurfaceParameters[k].DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k];
+ s->SurfaceParameters[k].SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k];
+ s->SurfaceParameters[k].ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k];
+ s->SurfaceParameters[k].ViewportHeightChroma = mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k];
+ s->SurfaceParameters[k].BlockWidth256BytesY = locals->BlockWidth256BytesY[k];
+ s->SurfaceParameters[k].BlockHeight256BytesY = locals->BlockHeight256BytesY[k];
+ s->SurfaceParameters[k].BlockWidth256BytesC = locals->BlockWidth256BytesC[k];
+ s->SurfaceParameters[k].BlockHeight256BytesC = locals->BlockHeight256BytesC[k];
+ s->SurfaceParameters[k].BlockWidthY = locals->BlockWidthY[k];
+ s->SurfaceParameters[k].BlockHeightY = locals->BlockHeightY[k];
+ s->SurfaceParameters[k].BlockWidthC = locals->BlockWidthC[k];
+ s->SurfaceParameters[k].BlockHeightC = locals->BlockHeightC[k];
+ s->SurfaceParameters[k].InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k];
+ s->SurfaceParameters[k].HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k];
+ s->SurfaceParameters[k].DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k];
+ s->SurfaceParameters[k].SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k];
+ s->SurfaceParameters[k].SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k];
+ s->SurfaceParameters[k].BytePerPixelY = locals->BytePerPixelY[k];
+ s->SurfaceParameters[k].BytePerPixelC = locals->BytePerPixelC[k];
+ s->SurfaceParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
+ s->SurfaceParameters[k].VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio[k];
+ s->SurfaceParameters[k].VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k];
+ s->SurfaceParameters[k].VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps[k];
+ s->SurfaceParameters[k].VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k];
+ s->SurfaceParameters[k].PitchY = mode_lib->ms.cache_display_cfg.surface.PitchY[k];
+ s->SurfaceParameters[k].DCCMetaPitchY = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k];
+ s->SurfaceParameters[k].PitchC = mode_lib->ms.cache_display_cfg.surface.PitchC[k];
+ s->SurfaceParameters[k].DCCMetaPitchC = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k];
+ s->SurfaceParameters[k].ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary[k];
+ s->SurfaceParameters[k].ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart[k];
+ s->SurfaceParameters[k].ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart[k];
+ s->SurfaceParameters[k].ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC[k];
+ s->SurfaceParameters[k].ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC[k];
+ s->SurfaceParameters[k].FORCE_ONE_ROW_FOR_FRAME = mode_lib->ms.cache_display_cfg.plane.ForceOneRowForFrame[k];
+ s->SurfaceParameters[k].SwathHeightY = locals->SwathHeightY[k];
+ s->SurfaceParameters[k].SwathHeightC = locals->SwathHeightC[k];
+ }
+
+ CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
+ CalculateVMRowAndSwath_params->myPipe = s->SurfaceParameters;
+ CalculateVMRowAndSwath_params->SurfaceSizeInMALL = locals->SurfaceSizeInTheMALL;
+ CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_luma;
+ CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_chroma;
+ CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ms.ip.dcc_meta_buffer_size_bytes;
+ CalculateVMRowAndSwath_params->UseMALLForStaticScreen = mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen;
+ CalculateVMRowAndSwath_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
+ CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->ms.soc.mall_allocated_for_dcn_mbytes;
+ CalculateVMRowAndSwath_params->SwathWidthY = locals->SwathWidthY;
+ CalculateVMRowAndSwath_params->SwathWidthC = locals->SwathWidthC;
+ CalculateVMRowAndSwath_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
+ CalculateVMRowAndSwath_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
+ CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
+ CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
+ CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes;
+ CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
+ CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn;
+ CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode;
+ CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0];
+ CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = s->dummy_boolean_array[1];
+ CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = locals->dpte_row_width_luma_ub;
+ CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = locals->dpte_row_width_chroma_ub;
+ CalculateVMRowAndSwath_params->dpte_row_height_luma = locals->dpte_row_height;
+ CalculateVMRowAndSwath_params->dpte_row_height_chroma = locals->dpte_row_height_chroma;
+ CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = locals->dpte_row_height_linear;
+ CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = locals->dpte_row_height_linear_chroma;
+ CalculateVMRowAndSwath_params->meta_req_width = locals->meta_req_width;
+ CalculateVMRowAndSwath_params->meta_req_width_chroma = locals->meta_req_width_chroma;
+ CalculateVMRowAndSwath_params->meta_req_height = locals->meta_req_height;
+ CalculateVMRowAndSwath_params->meta_req_height_chroma = locals->meta_req_height_chroma;
+ CalculateVMRowAndSwath_params->meta_row_width = locals->meta_row_width;
+ CalculateVMRowAndSwath_params->meta_row_width_chroma = locals->meta_row_width_chroma;
+ CalculateVMRowAndSwath_params->meta_row_height = locals->meta_row_height;
+ CalculateVMRowAndSwath_params->meta_row_height_chroma = locals->meta_row_height_chroma;
+ CalculateVMRowAndSwath_params->vm_group_bytes = locals->vm_group_bytes;
+ CalculateVMRowAndSwath_params->dpte_group_bytes = locals->dpte_group_bytes;
+ CalculateVMRowAndSwath_params->PixelPTEReqWidthY = locals->PixelPTEReqWidthY;
+ CalculateVMRowAndSwath_params->PixelPTEReqHeightY = locals->PixelPTEReqHeightY;
+ CalculateVMRowAndSwath_params->PTERequestSizeY = locals->PTERequestSizeY;
+ CalculateVMRowAndSwath_params->PixelPTEReqWidthC = locals->PixelPTEReqWidthC;
+ CalculateVMRowAndSwath_params->PixelPTEReqHeightC = locals->PixelPTEReqHeightC;
+ CalculateVMRowAndSwath_params->PTERequestSizeC = locals->PTERequestSizeC;
+ CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = locals->dpde0_bytes_per_frame_ub_l;
+ CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = locals->meta_pte_bytes_per_frame_ub_l;
+ CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = locals->dpde0_bytes_per_frame_ub_c;
+ CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = locals->meta_pte_bytes_per_frame_ub_c;
+ CalculateVMRowAndSwath_params->PrefetchSourceLinesY = locals->PrefetchSourceLinesY;
+ CalculateVMRowAndSwath_params->PrefetchSourceLinesC = locals->PrefetchSourceLinesC;
+ CalculateVMRowAndSwath_params->VInitPreFillY = locals->VInitPreFillY;
+ CalculateVMRowAndSwath_params->VInitPreFillC = locals->VInitPreFillC;
+ CalculateVMRowAndSwath_params->MaxNumSwathY = locals->MaxNumSwathY;
+ CalculateVMRowAndSwath_params->MaxNumSwathC = locals->MaxNumSwathC;
+ CalculateVMRowAndSwath_params->meta_row_bw = locals->meta_row_bw;
+ CalculateVMRowAndSwath_params->dpte_row_bw = locals->dpte_row_bw;
+ CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = locals->PixelPTEBytesPerRow;
+ CalculateVMRowAndSwath_params->PDEAndMetaPTEBytesFrame = locals->PDEAndMetaPTEBytesFrame;
+ CalculateVMRowAndSwath_params->MetaRowByte = locals->MetaRowByte;
+ CalculateVMRowAndSwath_params->use_one_row_for_frame = locals->use_one_row_for_frame;
+ CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = locals->use_one_row_for_frame_flip;
+ CalculateVMRowAndSwath_params->UsesMALLForStaticScreen = locals->UsesMALLForStaticScreen;
+ CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = locals->PTE_BUFFER_MODE;
+ CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = locals->BIGK_FRAGMENT_SIZE;
+
+ CalculateVMRowAndSwath(&mode_lib->scratch,
+ CalculateVMRowAndSwath_params);
+
+ s->ReorderBytes = (dml_uint_t)(mode_lib->ms.soc.num_chans * dml_max3(
+ mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_only_bytes,
+ mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
+ mode_lib->ms.soc.urgent_out_of_order_return_per_channel_vm_only_bytes));
+
+ s->VMDataOnlyReturnBW = dml_get_return_bw_mbps_vm_only(&mode_lib->ms.soc,
+ mode_lib->ms.state.use_ideal_dram_bw_strobe,
+ mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
+ locals->Dcfclk,
+ mode_lib->ms.FabricClock,
+ mode_lib->ms.DRAMSpeed);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: locals->Dcfclk = %f\n", __func__, locals->Dcfclk);
+ dml_print("DML::%s: mode_lib->ms.soc.return_bus_width_bytes = %u\n", __func__, mode_lib->ms.soc.return_bus_width_bytes);
+ dml_print("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock);
+ dml_print("DML::%s: mode_lib->ms.soc.fabric_datapath_to_dcn_data_return_bytes = %u\n", __func__, mode_lib->ms.soc.fabric_datapath_to_dcn_data_return_bytes);
+ dml_print("DML::%s: mode_lib->ms.soc.pct_ideal_sdp_bw_after_urgent = %f\n", __func__, mode_lib->ms.soc.pct_ideal_sdp_bw_after_urgent);
+ dml_print("DML::%s: mode_lib->ms.DRAMSpeed = %f\n", __func__, mode_lib->ms.DRAMSpeed);
+ dml_print("DML::%s: mode_lib->ms.soc.num_chans = %u\n", __func__, mode_lib->ms.soc.num_chans);
+ dml_print("DML::%s: mode_lib->ms.soc.dram_channel_width_bytes = %u\n", __func__, mode_lib->ms.soc.dram_channel_width_bytes);
+ dml_print("DML::%s: mode_lib->ms.state_idx = %u\n", __func__, mode_lib->ms.state_idx);
+ dml_print("DML::%s: mode_lib->ms.max_state_idx = %u\n", __func__, mode_lib->ms.max_state_idx);
+ dml_print("DML::%s: mode_lib->ms.state.use_ideal_dram_bw_strobe = %u\n", __func__, mode_lib->ms.state.use_ideal_dram_bw_strobe);
+ dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, s->VMDataOnlyReturnBW);
+ dml_print("DML::%s: ReturnBW = %f\n", __func__, mode_lib->ms.ReturnBW);
+#endif
+
+ s->HostVMInefficiencyFactor = 1.0;
+ if (mode_lib->ms.cache_display_cfg.plane.GPUVMEnable && mode_lib->ms.cache_display_cfg.plane.HostVMEnable)
+ s->HostVMInefficiencyFactor = mode_lib->ms.ReturnBW / s->VMDataOnlyReturnBW;
+
+ s->TotalDCCActiveDPP = 0;
+ s->TotalActiveDPP = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ s->TotalActiveDPP = s->TotalActiveDPP + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k];
+ if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k])
+ s->TotalDCCActiveDPP = s->TotalDCCActiveDPP + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k];
+ }
+
+ locals->UrgentExtraLatency = CalculateExtraLatency(
+ mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles,
+ s->ReorderBytes,
+ locals->Dcfclk,
+ s->TotalActiveDPP,
+ mode_lib->ms.ip.pixel_chunk_size_kbytes,
+ s->TotalDCCActiveDPP,
+ mode_lib->ms.ip.meta_chunk_size_kbytes,
+ mode_lib->ms.ReturnBW,
+ mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
+ mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
+ locals->dpte_group_bytes,
+ s->HostVMInefficiencyFactor,
+ mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
+ mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels);
+
+ locals->TCalc = 24.0 / locals->DCFCLKDeepSleep;
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
+ if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
+ locals->WritebackDelay[k] =
+ mode_lib->ms.state.writeback_latency_us
+ + CalculateWriteBackDelay(
+ mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[k]) / locals->Dispclk;
+ } else
+ locals->WritebackDelay[k] = 0;
+ for (j = 0; j < mode_lib->ms.num_active_planes; ++j) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[j] == k
+ && mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[j] == true) {
+ locals->WritebackDelay[k] =
+ dml_max(
+ locals->WritebackDelay[k],
+ mode_lib->ms.state.writeback_latency_us
+ + CalculateWriteBackDelay(
+ mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[j],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[j],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[j],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[j],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[j],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[j],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[j],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[k]) / locals->Dispclk);
+ }
+ }
+ }
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k)
+ for (j = 0; j < mode_lib->ms.num_active_planes; ++j)
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == j)
+ locals->WritebackDelay[k] = locals->WritebackDelay[j];
+
+ locals->UrgentLatency = CalculateUrgentLatency(mode_lib->ms.state.urgent_latency_pixel_data_only_us,
+ mode_lib->ms.state.urgent_latency_pixel_mixed_with_vm_data_us,
+ mode_lib->ms.state.urgent_latency_vm_data_only_us,
+ mode_lib->ms.soc.do_urgent_latency_adjustment,
+ mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_component_us,
+ mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_reference_mhz,
+ mode_lib->ms.FabricClock);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ CalculateUrgentBurstFactor(mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
+ locals->swath_width_luma_ub[k],
+ locals->swath_width_chroma_ub[k],
+ locals->SwathHeightY[k],
+ locals->SwathHeightC[k],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ locals->UrgentLatency,
+ mode_lib->ms.ip.cursor_buffer_size,
+ mode_lib->ms.cache_display_cfg.plane.CursorWidth[k],
+ mode_lib->ms.cache_display_cfg.plane.CursorBPP[k],
+ mode_lib->ms.cache_display_cfg.plane.VRatio[k],
+ mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
+ locals->BytePerPixelDETY[k],
+ locals->BytePerPixelDETC[k],
+ locals->DETBufferSizeY[k],
+ locals->DETBufferSizeC[k],
+
+ /* output */
+ &locals->UrgBurstFactorCursor[k],
+ &locals->UrgBurstFactorLuma[k],
+ &locals->UrgBurstFactorChroma[k],
+ &locals->NoUrgentLatencyHiding[k]);
+
+ locals->cursor_bw[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 /
+ ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k];
+ }
+
+ s->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
+ s->MaxVStartupAllPlanes = 0;
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ s->MaxVStartupLines[k] = CalculateMaxVStartup(k,
+ mode_lib->ms.ip.ptoi_supported,
+ mode_lib->ms.ip.vblank_nom_default_us,
+ &mode_lib->ms.cache_display_cfg.timing,
+ locals->WritebackDelay[k]);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
+ dml_print("DML::%s: k=%u WritebackDelay = %f\n", __func__, k, locals->WritebackDelay[k]);
+#endif
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k)
+ s->MaxVStartupAllPlanes = (dml_uint_t)(dml_max(s->MaxVStartupAllPlanes, s->MaxVStartupLines[k]));
+
+ s->ImmediateFlipRequirementFinal = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ s->ImmediateFlipRequirementFinal = s->ImmediateFlipRequirementFinal || (mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_required);
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ImmediateFlipRequirementFinal = %u\n", __func__, s->ImmediateFlipRequirementFinal);
+#endif
+
+ // The prefetch scheduling should only be calculated once as per AllowForPStateChangeOrStutterInVBlank requirement
+ // If the AllowForPStateChangeOrStutterInVBlank requirement is not strict (i.e. only try those power saving feature
+ // if possible, then will try to program for the best power saving features in order of difficulty (dram, fclk, stutter)
+ s->iteration = 0;
+ s->MaxTotalRDBandwidth = 0;
+ s->AllPrefetchModeTested = false;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ CalculatePrefetchMode(mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k], &s->MinPrefetchMode[k], &s->MaxPrefetchMode[k]);
+ s->NextPrefetchMode[k] = s->MinPrefetchMode[k];
+ }
+
+ do {
+ s->MaxTotalRDBandwidthNoUrgentBurst = 0.0;
+ s->DestinationLineTimesForPrefetchLessThan2 = false;
+ s->VRatioPrefetchMoreThanMax = false;
+
+ dml_print("DML::%s: Start one iteration: VStartupLines = %u\n", __func__, s->VStartupLines);
+
+ s->AllPrefetchModeTested = true;
+ s->MaxTotalRDBandwidth = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ locals->PrefetchMode[k] = s->NextPrefetchMode[k];
+ TWait = CalculateTWait(
+ locals->PrefetchMode[k],
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
+ mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ mode_lib->ms.cache_display_cfg.timing.DRRDisplay[k],
+ mode_lib->ms.state.dram_clock_change_latency_us,
+ mode_lib->ms.state.fclk_change_latency_us,
+ locals->UrgentLatency,
+ mode_lib->ms.state.sr_enter_plus_exit_time_us);
+
+ myPipe = &s->myPipe;
+ myPipe->Dppclk = locals->Dppclk[k];
+ myPipe->Dispclk = locals->Dispclk;
+ myPipe->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k];
+ myPipe->DCFClkDeepSleep = locals->DCFCLKDeepSleep;
+ myPipe->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k];
+ myPipe->ScalerEnabled = mode_lib->ms.cache_display_cfg.plane.ScalerEnabled[k];
+ myPipe->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k];
+ myPipe->BlockWidth256BytesY = locals->BlockWidth256BytesY[k];
+ myPipe->BlockHeight256BytesY = locals->BlockHeight256BytesY[k];
+ myPipe->BlockWidth256BytesC = locals->BlockWidth256BytesC[k];
+ myPipe->BlockHeight256BytesC = locals->BlockHeight256BytesC[k];
+ myPipe->InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k];
+ myPipe->NumberOfCursors = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k];
+ myPipe->VBlank = mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k];
+ myPipe->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k];
+ myPipe->HActive = mode_lib->ms.cache_display_cfg.timing.HActive[k];
+ myPipe->DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k];
+ myPipe->ODMMode = mode_lib->ms.cache_display_cfg.hw.ODMMode[k];
+ myPipe->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k];
+ myPipe->BytePerPixelY = locals->BytePerPixelY[k];
+ myPipe->BytePerPixelC = locals->BytePerPixelC[k];
+ myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k);
+ dml_print("DML::%s: AllowForPStateChangeOrStutterInVBlank = %u\n", __func__, mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k]);
+ dml_print("DML::%s: PrefetchMode[k] = %u (Min=%u Max=%u)\n", __func__, locals->PrefetchMode[k], s->MinPrefetchMode[k], s->MaxPrefetchMode[k]);
+#endif
+
+ CalculatePrefetchSchedule_params->EnhancedPrefetchScheduleAccelerationFinal = mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal;
+ CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactor;
+ CalculatePrefetchSchedule_params->myPipe = myPipe;
+ CalculatePrefetchSchedule_params->DSCDelay = locals->DSCDelay[k];
+ CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ms.ip.dppclk_delay_subtotal + mode_lib->ms.ip.dppclk_delay_cnvc_formatter;
+ CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ms.ip.dppclk_delay_scl;
+ CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ms.ip.dppclk_delay_scl_lb_only;
+ CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ms.ip.dppclk_delay_cnvc_cursor;
+ CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ms.ip.dispclk_delay_subtotal;
+ CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (dml_uint_t)(locals->SwathWidthY[k] / mode_lib->ms.cache_display_cfg.plane.HRatio[k]);
+ CalculatePrefetchSchedule_params->OutputFormat = mode_lib->ms.cache_display_cfg.output.OutputFormat[k];
+ CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters;
+ CalculatePrefetchSchedule_params->VStartup = (dml_uint_t)(dml_min(s->VStartupLines, s->MaxVStartupLines[k]));
+ CalculatePrefetchSchedule_params->MaxVStartup = s->MaxVStartupLines[k];
+ CalculatePrefetchSchedule_params->GPUVMPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
+ CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
+ CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
+ CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
+ CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
+ CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k];
+ CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
+ CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k];
+ CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes[k];
+ CalculatePrefetchSchedule_params->UrgentLatency = locals->UrgentLatency;
+ CalculatePrefetchSchedule_params->UrgentExtraLatency = locals->UrgentExtraLatency;
+ CalculatePrefetchSchedule_params->TCalc = locals->TCalc;
+ CalculatePrefetchSchedule_params->PDEAndMetaPTEBytesFrame = locals->PDEAndMetaPTEBytesFrame[k];
+ CalculatePrefetchSchedule_params->MetaRowByte = locals->MetaRowByte[k];
+ CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = locals->PixelPTEBytesPerRow[k];
+ CalculatePrefetchSchedule_params->PrefetchSourceLinesY = locals->PrefetchSourceLinesY[k];
+ CalculatePrefetchSchedule_params->VInitPreFillY = locals->VInitPreFillY[k];
+ CalculatePrefetchSchedule_params->MaxNumSwathY = locals->MaxNumSwathY[k];
+ CalculatePrefetchSchedule_params->PrefetchSourceLinesC = locals->PrefetchSourceLinesC[k];
+ CalculatePrefetchSchedule_params->VInitPreFillC = locals->VInitPreFillC[k];
+ CalculatePrefetchSchedule_params->MaxNumSwathC = locals->MaxNumSwathC[k];
+ CalculatePrefetchSchedule_params->swath_width_luma_ub = locals->swath_width_luma_ub[k];
+ CalculatePrefetchSchedule_params->swath_width_chroma_ub = locals->swath_width_chroma_ub[k];
+ CalculatePrefetchSchedule_params->SwathHeightY = locals->SwathHeightY[k];
+ CalculatePrefetchSchedule_params->SwathHeightC = locals->SwathHeightC[k];
+ CalculatePrefetchSchedule_params->TWait = TWait;
+ CalculatePrefetchSchedule_params->DSTXAfterScaler = &locals->DSTXAfterScaler[k];
+ CalculatePrefetchSchedule_params->DSTYAfterScaler = &locals->DSTYAfterScaler[k];
+ CalculatePrefetchSchedule_params->DestinationLinesForPrefetch = &locals->DestinationLinesForPrefetch[k];
+ CalculatePrefetchSchedule_params->DestinationLinesToRequestVMInVBlank = &locals->DestinationLinesToRequestVMInVBlank[k];
+ CalculatePrefetchSchedule_params->DestinationLinesToRequestRowInVBlank = &locals->DestinationLinesToRequestRowInVBlank[k];
+ CalculatePrefetchSchedule_params->VRatioPrefetchY = &locals->VRatioPrefetchY[k];
+ CalculatePrefetchSchedule_params->VRatioPrefetchC = &locals->VRatioPrefetchC[k];
+ CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWLuma = &locals->RequiredPrefetchPixDataBWLuma[k];
+ CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWChroma = &locals->RequiredPrefetchPixDataBWChroma[k];
+ CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &locals->NotEnoughTimeForDynamicMetadata[k];
+ CalculatePrefetchSchedule_params->Tno_bw = &locals->Tno_bw[k];
+ CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &locals->prefetch_vmrow_bw[k];
+ CalculatePrefetchSchedule_params->Tdmdl_vm = &locals->Tdmdl_vm[k];
+ CalculatePrefetchSchedule_params->Tdmdl = &locals->Tdmdl[k];
+ CalculatePrefetchSchedule_params->TSetup = &locals->TSetup[k];
+ CalculatePrefetchSchedule_params->VUpdateOffsetPix = &locals->VUpdateOffsetPix[k];
+ CalculatePrefetchSchedule_params->VUpdateWidthPix = &locals->VUpdateWidthPix[k];
+ CalculatePrefetchSchedule_params->VReadyOffsetPix = &locals->VReadyOffsetPix[k];
+
+ locals->NoTimeToPrefetch[k] =
+ CalculatePrefetchSchedule(&mode_lib->scratch,
+ CalculatePrefetchSchedule_params);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, locals->NoTimeToPrefetch[k]);
+#endif
+ locals->VStartup[k] = (dml_uint_t)(dml_min(s->VStartupLines, s->MaxVStartupLines[k]));
+ locals->VStartupMin[k] = locals->VStartup[k];
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ CalculateUrgentBurstFactor(
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
+ locals->swath_width_luma_ub[k],
+ locals->swath_width_chroma_ub[k],
+ locals->SwathHeightY[k],
+ locals->SwathHeightC[k],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ locals->UrgentLatency,
+ mode_lib->ms.ip.cursor_buffer_size,
+ mode_lib->ms.cache_display_cfg.plane.CursorWidth[k],
+ mode_lib->ms.cache_display_cfg.plane.CursorBPP[k],
+ locals->VRatioPrefetchY[k],
+ locals->VRatioPrefetchC[k],
+ locals->BytePerPixelDETY[k],
+ locals->BytePerPixelDETC[k],
+ locals->DETBufferSizeY[k],
+ locals->DETBufferSizeC[k],
+ /* Output */
+ &locals->UrgBurstFactorCursorPre[k],
+ &locals->UrgBurstFactorLumaPre[k],
+ &locals->UrgBurstFactorChromaPre[k],
+ &locals->NoUrgentLatencyHidingPre[k]);
+
+ locals->cursor_bw_pre[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * locals->VRatioPrefetchY[k];
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0u DPPPerSurface=%u\n", __func__, k, mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]);
+ dml_print("DML::%s: k=%0u UrgBurstFactorLuma=%f\n", __func__, k, locals->UrgBurstFactorLuma[k]);
+ dml_print("DML::%s: k=%0u UrgBurstFactorChroma=%f\n", __func__, k, locals->UrgBurstFactorChroma[k]);
+ dml_print("DML::%s: k=%0u UrgBurstFactorLumaPre=%f\n", __func__, k, locals->UrgBurstFactorLumaPre[k]);
+ dml_print("DML::%s: k=%0u UrgBurstFactorChromaPre=%f\n", __func__, k, locals->UrgBurstFactorChromaPre[k]);
+
+ dml_print("DML::%s: k=%0u VRatioPrefetchY=%f\n", __func__, k, locals->VRatioPrefetchY[k]);
+ dml_print("DML::%s: k=%0u VRatioY=%f\n", __func__, k, mode_lib->ms.cache_display_cfg.plane.VRatio[k]);
+
+ dml_print("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, locals->prefetch_vmrow_bw[k]);
+ dml_print("DML::%s: k=%0u ReadBandwidthSurfaceLuma=%f\n", __func__, k, locals->ReadBandwidthSurfaceLuma[k]);
+ dml_print("DML::%s: k=%0u ReadBandwidthSurfaceChroma=%f\n", __func__, k, locals->ReadBandwidthSurfaceChroma[k]);
+ dml_print("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, locals->cursor_bw[k]);
+ dml_print("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, locals->meta_row_bw[k]);
+ dml_print("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, locals->dpte_row_bw[k]);
+ dml_print("DML::%s: k=%0u RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, locals->RequiredPrefetchPixDataBWLuma[k]);
+ dml_print("DML::%s: k=%0u RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, locals->RequiredPrefetchPixDataBWChroma[k]);
+ dml_print("DML::%s: k=%0u cursor_bw_pre=%f\n", __func__, k, locals->cursor_bw_pre[k]);
+ dml_print("DML::%s: k=%0u MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, s->MaxTotalRDBandwidthNoUrgentBurst);
+#endif
+ if (locals->DestinationLinesForPrefetch[k] < 2)
+ s->DestinationLineTimesForPrefetchLessThan2 = true;
+
+ if (locals->VRatioPrefetchY[k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ ||
+ locals->VRatioPrefetchC[k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ ||
+ ((s->VStartupLines < s->MaxVStartupLines[k] || mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal == 0) &&
+ (locals->VRatioPrefetchY[k] > __DML_MAX_VRATIO_PRE__ || locals->VRatioPrefetchC[k] > __DML_MAX_VRATIO_PRE__)))
+ s->VRatioPrefetchMoreThanMax = true;
+
+ //dml_bool_t DestinationLinesToRequestVMInVBlankEqualOrMoreThan32 = false;
+ //dml_bool_t DestinationLinesToRequestRowInVBlankEqualOrMoreThan16 = false;
+ //if (locals->DestinationLinesToRequestVMInVBlank[k] >= 32) {
+ // DestinationLinesToRequestVMInVBlankEqualOrMoreThan32 = true;
+ //}
+
+ //if (locals->DestinationLinesToRequestRowInVBlank[k] >= 16) {
+ // DestinationLinesToRequestRowInVBlankEqualOrMoreThan16 = true;
+ //}
+ }
+
+ locals->FractionOfUrgentBandwidth = s->MaxTotalRDBandwidthNoUrgentBurst / mode_lib->ms.ReturnBW;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, s->MaxTotalRDBandwidthNoUrgentBurst);
+ dml_print("DML::%s: ReturnBW=%f \n", __func__, mode_lib->ms.ReturnBW);
+ dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, locals->FractionOfUrgentBandwidth);
+#endif
+
+ CalculatePrefetchBandwithSupport(
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.ReturnBW,
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
+ locals->NoUrgentLatencyHidingPre,
+ locals->ReadBandwidthSurfaceLuma,
+ locals->ReadBandwidthSurfaceChroma,
+ locals->RequiredPrefetchPixDataBWLuma,
+ locals->RequiredPrefetchPixDataBWChroma,
+ locals->cursor_bw,
+ locals->meta_row_bw,
+ locals->dpte_row_bw,
+ locals->cursor_bw_pre,
+ locals->prefetch_vmrow_bw,
+ mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
+ locals->UrgBurstFactorLuma,
+ locals->UrgBurstFactorChroma,
+ locals->UrgBurstFactorCursor,
+ locals->UrgBurstFactorLumaPre,
+ locals->UrgBurstFactorChromaPre,
+ locals->UrgBurstFactorCursorPre,
+
+ /* output */
+ &s->MaxTotalRDBandwidth, // dml_float_t *PrefetchBandwidth
+ &s->MaxTotalRDBandwidthNotIncludingMALLPrefetch, // dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch
+ &s->dummy_single[0], // dml_float_t *FractionOfUrgentBandwidth
+ &locals->PrefetchModeSupported);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k)
+ s->dummy_unit_vector[k] = 1.0;
+
+ CalculatePrefetchBandwithSupport(mode_lib->ms.num_active_planes,
+ mode_lib->ms.ReturnBW,
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
+ locals->NoUrgentLatencyHidingPre,
+ locals->ReadBandwidthSurfaceLuma,
+ locals->ReadBandwidthSurfaceChroma,
+ locals->RequiredPrefetchPixDataBWLuma,
+ locals->RequiredPrefetchPixDataBWChroma,
+ locals->cursor_bw,
+ locals->meta_row_bw,
+ locals->dpte_row_bw,
+ locals->cursor_bw_pre,
+ locals->prefetch_vmrow_bw,
+ mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
+ s->dummy_unit_vector,
+ s->dummy_unit_vector,
+ s->dummy_unit_vector,
+ s->dummy_unit_vector,
+ s->dummy_unit_vector,
+ s->dummy_unit_vector,
+
+ /* output */
+ &s->NonUrgentMaxTotalRDBandwidth, // dml_float_t *PrefetchBandwidth
+ &s->NonUrgentMaxTotalRDBandwidthNotIncludingMALLPrefetch, // dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch
+ &locals->FractionOfUrgentBandwidth,
+ &s->dummy_boolean[0]); // dml_bool_t *PrefetchBandwidthSupport
+
+
+
+ if (s->VRatioPrefetchMoreThanMax != false || s->DestinationLineTimesForPrefetchLessThan2 != false) {
+ dml_print("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax);
+ dml_print("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2);
+ locals->PrefetchModeSupported = false;
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (locals->NoTimeToPrefetch[k] == true || locals->NotEnoughTimeForDynamicMetadata[k]) {
+ dml_print("DML::%s: k=%u, NoTimeToPrefetch = %0d\n", __func__, k, locals->NoTimeToPrefetch[k]);
+ dml_print("DML::%s: k=%u, NotEnoughTimeForDynamicMetadata=%u\n", __func__, k, locals->NotEnoughTimeForDynamicMetadata[k]);
+ locals->PrefetchModeSupported = false;
+ }
+ }
+
+
+ if (locals->PrefetchModeSupported == true && mode_lib->ms.support.ImmediateFlipSupport == true) {
+ locals->BandwidthAvailableForImmediateFlip = CalculateBandwidthAvailableForImmediateFlip(
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.ReturnBW,
+ locals->ReadBandwidthSurfaceLuma,
+ locals->ReadBandwidthSurfaceChroma,
+ locals->RequiredPrefetchPixDataBWLuma,
+ locals->RequiredPrefetchPixDataBWChroma,
+ locals->cursor_bw,
+ locals->cursor_bw_pre,
+ mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
+ locals->UrgBurstFactorLuma,
+ locals->UrgBurstFactorChroma,
+ locals->UrgBurstFactorCursor,
+ locals->UrgBurstFactorLumaPre,
+ locals->UrgBurstFactorChromaPre,
+ locals->UrgBurstFactorCursorPre);
+
+ locals->TotImmediateFlipBytes = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required) {
+ locals->TotImmediateFlipBytes = locals->TotImmediateFlipBytes + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k] * (locals->PDEAndMetaPTEBytesFrame[k] + locals->MetaRowByte[k]);
+ if (locals->use_one_row_for_frame_flip[k]) {
+ locals->TotImmediateFlipBytes = locals->TotImmediateFlipBytes + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k] * (2 * locals->PixelPTEBytesPerRow[k]);
+ } else {
+ locals->TotImmediateFlipBytes = locals->TotImmediateFlipBytes + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k] * locals->PixelPTEBytesPerRow[k];
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k = %u\n", __func__, k);
+ dml_print("DML::%s: DPPPerSurface = %u\n", __func__, mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]);
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, locals->PDEAndMetaPTEBytesFrame[k]);
+ dml_print("DML::%s: MetaRowByte = %u\n", __func__, locals->MetaRowByte[k]);
+ dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, locals->PixelPTEBytesPerRow[k]);
+ dml_print("DML::%s: TotImmediateFlipBytes = %u\n", __func__, locals->TotImmediateFlipBytes);
+#endif
+ }
+ }
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ CalculateFlipSchedule(
+ s->HostVMInefficiencyFactor,
+ locals->UrgentExtraLatency,
+ locals->UrgentLatency,
+ mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels,
+ mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
+ mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels,
+ mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
+ mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
+ locals->PDEAndMetaPTEBytesFrame[k],
+ locals->MetaRowByte[k],
+ locals->PixelPTEBytesPerRow[k],
+ locals->BandwidthAvailableForImmediateFlip,
+ locals->TotImmediateFlipBytes,
+ mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ mode_lib->ms.cache_display_cfg.plane.VRatio[k],
+ mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
+ locals->Tno_bw[k],
+ mode_lib->ms.cache_display_cfg.surface.DCCEnable[k],
+ locals->dpte_row_height[k],
+ locals->meta_row_height[k],
+ locals->dpte_row_height_chroma[k],
+ locals->meta_row_height_chroma[k],
+ locals->use_one_row_for_frame_flip[k],
+
+ /* Output */
+ &locals->DestinationLinesToRequestVMInImmediateFlip[k],
+ &locals->DestinationLinesToRequestRowInImmediateFlip[k],
+ &locals->final_flip_bw[k],
+ &locals->ImmediateFlipSupportedForPipe[k]);
+ }
+
+ CalculateImmediateFlipBandwithSupport(mode_lib->ms.num_active_planes,
+ mode_lib->ms.ReturnBW,
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
+ mode_lib->ms.policy.ImmediateFlipRequirement,
+ locals->final_flip_bw,
+ locals->ReadBandwidthSurfaceLuma,
+ locals->ReadBandwidthSurfaceChroma,
+ locals->RequiredPrefetchPixDataBWLuma,
+ locals->RequiredPrefetchPixDataBWChroma,
+ locals->cursor_bw,
+ locals->meta_row_bw,
+ locals->dpte_row_bw,
+ locals->cursor_bw_pre,
+ locals->prefetch_vmrow_bw,
+ mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
+ locals->UrgBurstFactorLuma,
+ locals->UrgBurstFactorChroma,
+ locals->UrgBurstFactorCursor,
+ locals->UrgBurstFactorLumaPre,
+ locals->UrgBurstFactorChromaPre,
+ locals->UrgBurstFactorCursorPre,
+
+ /* output */
+ &locals->total_dcn_read_bw_with_flip, // dml_float_t *TotalBandwidth
+ &locals->total_dcn_read_bw_with_flip_not_including_MALL_prefetch, // dml_float_t TotalBandwidthNotIncludingMALLPrefetch
+ &s->dummy_single[0], // dml_float_t *FractionOfUrgentBandwidth
+ &locals->ImmediateFlipSupported); // dml_bool_t *ImmediateFlipBandwidthSupport
+
+ CalculateImmediateFlipBandwithSupport(mode_lib->ms.num_active_planes,
+ mode_lib->ms.ReturnBW,
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
+ mode_lib->ms.policy.ImmediateFlipRequirement,
+ locals->final_flip_bw,
+ locals->ReadBandwidthSurfaceLuma,
+ locals->ReadBandwidthSurfaceChroma,
+ locals->RequiredPrefetchPixDataBWLuma,
+ locals->RequiredPrefetchPixDataBWChroma,
+ locals->cursor_bw,
+ locals->meta_row_bw,
+ locals->dpte_row_bw,
+ locals->cursor_bw_pre,
+ locals->prefetch_vmrow_bw,
+ mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
+ s->dummy_unit_vector,
+ s->dummy_unit_vector,
+ s->dummy_unit_vector,
+ s->dummy_unit_vector,
+ s->dummy_unit_vector,
+ s->dummy_unit_vector,
+
+ /* output */
+ &locals->non_urgent_total_dcn_read_bw_with_flip, // dml_float_t *TotalBandwidth
+ &locals->non_urgent_total_dcn_read_bw_with_flip_not_including_MALL_prefetch, // dml_float_t TotalBandwidthNotIncludingMALLPrefetch
+ &locals->FractionOfUrgentBandwidthImmediateFlip, // dml_float_t *FractionOfUrgentBandwidth
+ &s->dummy_boolean[0]); // dml_bool_t *ImmediateFlipBandwidthSupport
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required && locals->ImmediateFlipSupportedForPipe[k] == false) {
+ locals->ImmediateFlipSupported = false;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k);
+#endif
+ }
+ }
+ } else {
+ locals->ImmediateFlipSupported = false;
+ locals->total_dcn_read_bw_with_flip = s->MaxTotalRDBandwidth;
+ locals->total_dcn_read_bw_with_flip_not_including_MALL_prefetch = s->MaxTotalRDBandwidthNotIncludingMALLPrefetch;
+ locals->non_urgent_total_dcn_read_bw_with_flip = s->NonUrgentMaxTotalRDBandwidth;
+ locals->non_urgent_total_dcn_read_bw_with_flip_not_including_MALL_prefetch = s->NonUrgentMaxTotalRDBandwidthNotIncludingMALLPrefetch;
+ }
+
+ /* consider flip support is okay if the flip bw is ok or (when user does't require a iflip and there is no host vm) */
+ locals->PrefetchAndImmediateFlipSupported = (locals->PrefetchModeSupported == true &&
+ ((!mode_lib->ms.support.ImmediateFlipSupport && !mode_lib->ms.cache_display_cfg.plane.HostVMEnable && !s->ImmediateFlipRequirementFinal) ||
+ locals->ImmediateFlipSupported)) ? true : false;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PrefetchModeSupported = %u\n", __func__, locals->PrefetchModeSupported);
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k)
+ dml_print("DML::%s: ImmediateFlipRequirement[%u] = %u\n", __func__, k, mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_required);
+ dml_print("DML::%s: HostVMEnable = %u\n", __func__, mode_lib->ms.cache_display_cfg.plane.HostVMEnable);
+ dml_print("DML::%s: ImmediateFlipSupport = %u (from mode_support)\n", __func__, mode_lib->ms.support.ImmediateFlipSupport);
+ dml_print("DML::%s: ImmediateFlipSupported = %u\n", __func__, locals->ImmediateFlipSupported);
+ dml_print("DML::%s: PrefetchAndImmediateFlipSupported = %u\n", __func__, locals->PrefetchAndImmediateFlipSupported);
+#endif
+ dml_print("DML::%s: Done one iteration: VStartupLines=%u, MaxVStartupAllPlanes=%u\n", __func__, s->VStartupLines, s->MaxVStartupAllPlanes);
+
+ s->VStartupLines = s->VStartupLines + 1;
+
+ if (s->VStartupLines > s->MaxVStartupAllPlanes) {
+ s->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ s->NextPrefetchMode[k] = s->NextPrefetchMode[k] + 1;
+
+ if (s->NextPrefetchMode[k] <= s->MaxPrefetchMode[k])
+ s->AllPrefetchModeTested = false;
+ dml_print("DML::%s: VStartupLines=%u, reaches max vstartup, try next prefetch mode=%u\n", __func__, s->VStartupLines-1, s->AllPrefetchModeTested);
+ }
+ } else {
+ s->AllPrefetchModeTested = false;
+ }
+ s->iteration++;
+ if (s->iteration > 2500) {
+ dml_print("ERROR: DML::%s: Too many errors, exit now\n", __func__);
+ ASSERT(0);
+ }
+ } while (!(locals->PrefetchAndImmediateFlipSupported || s->AllPrefetchModeTested));
+
+ if (locals->PrefetchAndImmediateFlipSupported) {
+ dml_print("DML::%s: Good, Prefetch and flip scheduling solution found at VStartupLines=%u (MaxVStartupAllPlanes=%u)\n", __func__, s->VStartupLines-1, s->MaxVStartupAllPlanes);
+ } else {
+ dml_print("DML::%s: Bad, Prefetch and flip scheduling solution did NOT find solution! (MaxVStartupAllPlanes=%u)\n", __func__, s->MaxVStartupAllPlanes);
+ }
+
+ //Watermarks and NB P-State/DRAM Clock Change Support
+ {
+ s->mmSOCParameters.UrgentLatency = locals->UrgentLatency;
+ s->mmSOCParameters.ExtraLatency = locals->UrgentExtraLatency;
+ s->mmSOCParameters.WritebackLatency = mode_lib->ms.state.writeback_latency_us;
+ s->mmSOCParameters.DRAMClockChangeLatency = mode_lib->ms.state.dram_clock_change_latency_us;
+ s->mmSOCParameters.FCLKChangeLatency = mode_lib->ms.state.fclk_change_latency_us;
+ s->mmSOCParameters.SRExitTime = mode_lib->ms.state.sr_exit_time_us;
+ s->mmSOCParameters.SREnterPlusExitTime = mode_lib->ms.state.sr_enter_plus_exit_time_us;
+ s->mmSOCParameters.SRExitZ8Time = mode_lib->ms.state.sr_exit_z8_time_us;
+ s->mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->ms.state.sr_enter_plus_exit_z8_time_us;
+ s->mmSOCParameters.USRRetrainingLatency = mode_lib->ms.state.usr_retraining_latency_us;
+ s->mmSOCParameters.SMNLatency = mode_lib->ms.soc.smn_latency_us;
+
+ CalculateWatermarks_params->USRRetrainingRequiredFinal = mode_lib->ms.policy.USRRetrainingRequiredFinal;
+ CalculateWatermarks_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
+ CalculateWatermarks_params->PrefetchMode = locals->PrefetchMode;
+ CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
+ CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ms.ip.max_line_buffer_lines;
+ CalculateWatermarks_params->LineBufferSize = mode_lib->ms.ip.line_buffer_size_bits;
+ CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ms.ip.writeback_interface_buffer_size_kbytes;
+ CalculateWatermarks_params->DCFCLK = locals->Dcfclk;
+ CalculateWatermarks_params->ReturnBW = mode_lib->ms.ReturnBW;
+ CalculateWatermarks_params->SynchronizeTimingsFinal = mode_lib->ms.policy.SynchronizeTimingsFinal;
+ CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal;
+ CalculateWatermarks_params->DRRDisplay = mode_lib->ms.cache_display_cfg.timing.DRRDisplay;
+ CalculateWatermarks_params->dpte_group_bytes = locals->dpte_group_bytes;
+ CalculateWatermarks_params->meta_row_height = locals->meta_row_height;
+ CalculateWatermarks_params->meta_row_height_chroma = locals->meta_row_height_chroma;
+ CalculateWatermarks_params->mmSOCParameters = s->mmSOCParameters;
+ CalculateWatermarks_params->WritebackChunkSize = mode_lib->ms.ip.writeback_chunk_size_kbytes;
+ CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK;
+ CalculateWatermarks_params->DCFClkDeepSleep = locals->DCFCLKDeepSleep;
+ CalculateWatermarks_params->DETBufferSizeY = locals->DETBufferSizeY;
+ CalculateWatermarks_params->DETBufferSizeC = locals->DETBufferSizeC;
+ CalculateWatermarks_params->SwathHeightY = locals->SwathHeightY;
+ CalculateWatermarks_params->SwathHeightC = locals->SwathHeightC;
+ CalculateWatermarks_params->LBBitPerPixel = mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel;
+ CalculateWatermarks_params->SwathWidthY = locals->SwathWidthY;
+ CalculateWatermarks_params->SwathWidthC = locals->SwathWidthC;
+ CalculateWatermarks_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio;
+ CalculateWatermarks_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma;
+ CalculateWatermarks_params->VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps;
+ CalculateWatermarks_params->VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma;
+ CalculateWatermarks_params->VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio;
+ CalculateWatermarks_params->VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma;
+ CalculateWatermarks_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal;
+ CalculateWatermarks_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal;
+ CalculateWatermarks_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive;
+ CalculateWatermarks_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock;
+ CalculateWatermarks_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
+ CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface;
+ CalculateWatermarks_params->BytePerPixelDETY = locals->BytePerPixelDETY;
+ CalculateWatermarks_params->BytePerPixelDETC = locals->BytePerPixelDETC;
+ CalculateWatermarks_params->DSTXAfterScaler = locals->DSTXAfterScaler;
+ CalculateWatermarks_params->DSTYAfterScaler = locals->DSTYAfterScaler;
+ CalculateWatermarks_params->WritebackEnable = mode_lib->ms.cache_display_cfg.writeback.WritebackEnable;
+ CalculateWatermarks_params->WritebackPixelFormat = mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat;
+ CalculateWatermarks_params->WritebackDestinationWidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth;
+ CalculateWatermarks_params->WritebackDestinationHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight;
+ CalculateWatermarks_params->WritebackSourceHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight;
+ CalculateWatermarks_params->UnboundedRequestEnabled = locals->UnboundedRequestEnabled;
+ CalculateWatermarks_params->CompressedBufferSizeInkByte = locals->CompressedBufferSizeInkByte;
+
+ // Output
+ CalculateWatermarks_params->Watermark = &locals->Watermark; // Watermarks *Watermark
+ CalculateWatermarks_params->DRAMClockChangeSupport = &locals->DRAMClockChangeSupport;
+ CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = locals->MaxActiveDRAMClockChangeLatencySupported; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[]
+ CalculateWatermarks_params->SubViewportLinesNeededInMALL = locals->SubViewportLinesNeededInMALL; // dml_uint_t SubViewportLinesNeededInMALL[]
+ CalculateWatermarks_params->FCLKChangeSupport = &locals->FCLKChangeSupport;
+ CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &locals->MaxActiveFCLKChangeLatencySupported; // dml_float_t *MaxActiveFCLKChangeLatencySupported
+ CalculateWatermarks_params->USRRetrainingSupport = &locals->USRRetrainingSupport;
+
+ CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
+ &mode_lib->scratch,
+ CalculateWatermarks_params);
+
+ /* Copy the calculated watermarks to mp.Watermark as the getter functions are
+ * implemented by the DML team to copy the calculated values from the mp.Watermark interface.
+ * &mode_lib->mp.Watermark and &locals->Watermark are the same address, memcpy may lead to
+ * unexpected behavior. memmove should be used.
+ */
+ memmove(&mode_lib->mp.Watermark, CalculateWatermarks_params->Watermark, sizeof(struct Watermarks));
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
+ locals->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(0, locals->VStartupMin[k] * mode_lib->ms.cache_display_cfg.timing.HTotal[k] /
+ mode_lib->ms.cache_display_cfg.timing.PixelClock[k] - locals->Watermark.WritebackDRAMClockChangeWatermark);
+ locals->WritebackAllowFCLKChangeEndPosition[k] = dml_max(0, locals->VStartupMin[k] * mode_lib->ms.cache_display_cfg.timing.HTotal[k] /
+ mode_lib->ms.cache_display_cfg.timing.PixelClock[k] - locals->Watermark.WritebackFCLKChangeWatermark);
+ } else {
+ locals->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
+ locals->WritebackAllowFCLKChangeEndPosition[k] = 0;
+ }
+ }
+ }
+
+ //Display Pipeline Delivery Time in Prefetch, Groups
+ CalculatePixelDeliveryTimes(
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.cache_display_cfg.plane.VRatio,
+ mode_lib->ms.cache_display_cfg.plane.VRatioChroma,
+ locals->VRatioPrefetchY,
+ locals->VRatioPrefetchC,
+ locals->swath_width_luma_ub,
+ locals->swath_width_chroma_ub,
+ mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
+ mode_lib->ms.cache_display_cfg.plane.HRatio,
+ mode_lib->ms.cache_display_cfg.plane.HRatioChroma,
+ mode_lib->ms.cache_display_cfg.timing.PixelClock,
+ locals->PSCL_THROUGHPUT,
+ locals->PSCL_THROUGHPUT_CHROMA,
+ locals->Dppclk,
+ locals->BytePerPixelC,
+ mode_lib->ms.cache_display_cfg.plane.SourceScan,
+ mode_lib->ms.cache_display_cfg.plane.NumberOfCursors,
+ mode_lib->ms.cache_display_cfg.plane.CursorWidth,
+ mode_lib->ms.cache_display_cfg.plane.CursorBPP,
+ locals->BlockWidth256BytesY,
+ locals->BlockHeight256BytesY,
+ locals->BlockWidth256BytesC,
+ locals->BlockHeight256BytesC,
+
+ /* Output */
+ locals->DisplayPipeLineDeliveryTimeLuma,
+ locals->DisplayPipeLineDeliveryTimeChroma,
+ locals->DisplayPipeLineDeliveryTimeLumaPrefetch,
+ locals->DisplayPipeLineDeliveryTimeChromaPrefetch,
+ locals->DisplayPipeRequestDeliveryTimeLuma,
+ locals->DisplayPipeRequestDeliveryTimeChroma,
+ locals->DisplayPipeRequestDeliveryTimeLumaPrefetch,
+ locals->DisplayPipeRequestDeliveryTimeChromaPrefetch,
+ locals->CursorRequestDeliveryTime,
+ locals->CursorRequestDeliveryTimePrefetch);
+
+ CalculateMetaAndPTETimes(
+ locals->use_one_row_for_frame,
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
+ mode_lib->ms.ip.meta_chunk_size_kbytes,
+ mode_lib->ms.ip.min_meta_chunk_size_bytes,
+ mode_lib->ms.cache_display_cfg.timing.HTotal,
+ mode_lib->ms.cache_display_cfg.plane.VRatio,
+ mode_lib->ms.cache_display_cfg.plane.VRatioChroma,
+ locals->DestinationLinesToRequestRowInVBlank,
+ locals->DestinationLinesToRequestRowInImmediateFlip,
+ mode_lib->ms.cache_display_cfg.surface.DCCEnable,
+ mode_lib->ms.cache_display_cfg.timing.PixelClock,
+ locals->BytePerPixelY,
+ locals->BytePerPixelC,
+ mode_lib->ms.cache_display_cfg.plane.SourceScan,
+ locals->dpte_row_height,
+ locals->dpte_row_height_chroma,
+ locals->meta_row_width,
+ locals->meta_row_width_chroma,
+ locals->meta_row_height,
+ locals->meta_row_height_chroma,
+ locals->meta_req_width,
+ locals->meta_req_width_chroma,
+ locals->meta_req_height,
+ locals->meta_req_height_chroma,
+ locals->dpte_group_bytes,
+ locals->PTERequestSizeY,
+ locals->PTERequestSizeC,
+ locals->PixelPTEReqWidthY,
+ locals->PixelPTEReqHeightY,
+ locals->PixelPTEReqWidthC,
+ locals->PixelPTEReqHeightC,
+ locals->dpte_row_width_luma_ub,
+ locals->dpte_row_width_chroma_ub,
+
+ /* Output */
+ locals->DST_Y_PER_PTE_ROW_NOM_L,
+ locals->DST_Y_PER_PTE_ROW_NOM_C,
+ locals->DST_Y_PER_META_ROW_NOM_L,
+ locals->DST_Y_PER_META_ROW_NOM_C,
+ locals->TimePerMetaChunkNominal,
+ locals->TimePerChromaMetaChunkNominal,
+ locals->TimePerMetaChunkVBlank,
+ locals->TimePerChromaMetaChunkVBlank,
+ locals->TimePerMetaChunkFlip,
+ locals->TimePerChromaMetaChunkFlip,
+ locals->time_per_pte_group_nom_luma,
+ locals->time_per_pte_group_vblank_luma,
+ locals->time_per_pte_group_flip_luma,
+ locals->time_per_pte_group_nom_chroma,
+ locals->time_per_pte_group_vblank_chroma,
+ locals->time_per_pte_group_flip_chroma);
+
+ CalculateVMGroupAndRequestTimes(
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
+ mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels,
+ mode_lib->ms.cache_display_cfg.timing.HTotal,
+ locals->BytePerPixelC,
+ locals->DestinationLinesToRequestVMInVBlank,
+ locals->DestinationLinesToRequestVMInImmediateFlip,
+ mode_lib->ms.cache_display_cfg.surface.DCCEnable,
+ mode_lib->ms.cache_display_cfg.timing.PixelClock,
+ locals->dpte_row_width_luma_ub,
+ locals->dpte_row_width_chroma_ub,
+ locals->vm_group_bytes,
+ locals->dpde0_bytes_per_frame_ub_l,
+ locals->dpde0_bytes_per_frame_ub_c,
+ locals->meta_pte_bytes_per_frame_ub_l,
+ locals->meta_pte_bytes_per_frame_ub_c,
+
+ /* Output */
+ locals->TimePerVMGroupVBlank,
+ locals->TimePerVMGroupFlip,
+ locals->TimePerVMRequestVBlank,
+ locals->TimePerVMRequestFlip);
+
+ // Min TTUVBlank
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (locals->PrefetchMode[k] == 0) {
+ locals->MinTTUVBlank[k] = dml_max4(
+ locals->Watermark.DRAMClockChangeWatermark,
+ locals->Watermark.FCLKChangeWatermark,
+ locals->Watermark.StutterEnterPlusExitWatermark,
+ locals->Watermark.UrgentWatermark);
+ } else if (locals->PrefetchMode[k] == 1) {
+ locals->MinTTUVBlank[k] = dml_max3(
+ locals->Watermark.FCLKChangeWatermark,
+ locals->Watermark.StutterEnterPlusExitWatermark,
+ locals->Watermark.UrgentWatermark);
+ } else if (locals->PrefetchMode[k] == 2) {
+ locals->MinTTUVBlank[k] = dml_max(
+ locals->Watermark.StutterEnterPlusExitWatermark,
+ locals->Watermark.UrgentWatermark);
+ } else {
+ locals->MinTTUVBlank[k] = locals->Watermark.UrgentWatermark;
+ }
+ if (!mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k])
+ locals->MinTTUVBlank[k] = locals->TCalc + locals->MinTTUVBlank[k];
+ }
+
+ // DCC Configuration
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Calculate DCC configuration for surface k=%u\n", __func__, k);
+#endif
+ CalculateDCCConfiguration(
+ mode_lib->ms.cache_display_cfg.surface.DCCEnable[k],
+ mode_lib->ms.policy.DCCProgrammingAssumesScanDirectionUnknownFinal,
+ mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
+ mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k],
+ mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k],
+ mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY[k],
+ mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC[k],
+ mode_lib->ms.NomDETInKByte,
+ locals->BlockHeight256BytesY[k],
+ locals->BlockHeight256BytesC[k],
+ mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k],
+ locals->BytePerPixelY[k],
+ locals->BytePerPixelC[k],
+ locals->BytePerPixelDETY[k],
+ locals->BytePerPixelDETC[k],
+ mode_lib->ms.cache_display_cfg.plane.SourceScan[k],
+ /* Output */
+ &locals->DCCYMaxUncompressedBlock[k],
+ &locals->DCCCMaxUncompressedBlock[k],
+ &locals->DCCYMaxCompressedBlock[k],
+ &locals->DCCCMaxCompressedBlock[k],
+ &locals->DCCYIndependentBlock[k],
+ &locals->DCCCIndependentBlock[k]);
+ }
+
+ // VStartup Adjustment
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ s->Tvstartup_margin = (s->MaxVStartupLines[k] - locals->VStartupMin[k]) * mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, locals->MinTTUVBlank[k]);
+#endif
+
+ locals->MinTTUVBlank[k] = locals->MinTTUVBlank[k] + s->Tvstartup_margin;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, Tvstartup_margin = %f\n", __func__, k, s->Tvstartup_margin);
+ dml_print("DML::%s: k=%u, MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
+ dml_print("DML::%s: k=%u, MinTTUVBlank = %f\n", __func__, k, locals->MinTTUVBlank[k]);
+#endif
+
+ locals->Tdmdl[k] = locals->Tdmdl[k] + s->Tvstartup_margin;
+ if (mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k] && mode_lib->ms.ip.dynamic_metadata_vm_enabled) {
+ locals->Tdmdl_vm[k] = locals->Tdmdl_vm[k] + s->Tvstartup_margin;
+ }
+
+ isInterlaceTiming = (mode_lib->ms.cache_display_cfg.timing.Interlace[k] && !mode_lib->ms.ip.ptoi_supported);
+
+ // The actual positioning of the vstartup
+ locals->VStartup[k] = (isInterlaceTiming ? (2 * s->MaxVStartupLines[k]) : s->MaxVStartupLines[k]);
+
+ s->dlg_vblank_start = ((isInterlaceTiming ? dml_floor((mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]) / 2.0, 1.0) :
+ mode_lib->ms.cache_display_cfg.timing.VTotal[k]) - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]);
+ s->LSetup = dml_floor(4.0 * locals->TSetup[k] / ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), 1.0) / 4.0;
+ s->blank_lines_remaining = (mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k]) - locals->VStartup[k];
+
+ if (s->blank_lines_remaining < 0) {
+ dml_print("ERROR: Vstartup is larger than vblank!?\n");
+ s->blank_lines_remaining = 0;
+ ASSERT(0);
+ }
+ locals->MIN_DST_Y_NEXT_START[k] = s->dlg_vblank_start + s->blank_lines_remaining + s->LSetup;
+
+ // debug only
+ s->old_MIN_DST_Y_NEXT_START = ((isInterlaceTiming ? dml_floor((mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]) / 2.0, 1.0) :
+ mode_lib->ms.cache_display_cfg.timing.VTotal[k]) - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k])
+ + dml_max(1.0, dml_ceil((dml_float_t) locals->WritebackDelay[k] / ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), 1.0))
+ + dml_floor(4.0 * locals->TSetup[k] / ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), 1.0) / 4.0;
+
+ if (((locals->VUpdateOffsetPix[k] + locals->VUpdateWidthPix[k] + locals->VReadyOffsetPix[k]) / (double) mode_lib->ms.cache_display_cfg.timing.HTotal[k]) <=
+ (isInterlaceTiming ?
+ dml_floor((mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k] - locals->VStartup[k]) / 2.0, 1.0) :
+ (int) (mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k] - locals->VStartup[k]))) {
+ locals->VREADY_AT_OR_AFTER_VSYNC[k] = true;
+ } else {
+ locals->VREADY_AT_OR_AFTER_VSYNC[k] = false;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, VStartup = %u (max)\n", __func__, k, locals->VStartup[k]);
+ dml_print("DML::%s: k=%u, VStartupMin = %u (max)\n", __func__, k, locals->VStartupMin[k]);
+ dml_print("DML::%s: k=%u, VUpdateOffsetPix = %u\n", __func__, k, locals->VUpdateOffsetPix[k]);
+ dml_print("DML::%s: k=%u, VUpdateWidthPix = %u\n", __func__, k, locals->VUpdateWidthPix[k]);
+ dml_print("DML::%s: k=%u, VReadyOffsetPix = %u\n", __func__, k, locals->VReadyOffsetPix[k]);
+ dml_print("DML::%s: k=%u, HTotal = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.HTotal[k]);
+ dml_print("DML::%s: k=%u, VTotal = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.VTotal[k]);
+ dml_print("DML::%s: k=%u, VActive = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.VActive[k]);
+ dml_print("DML::%s: k=%u, VFrontPorch = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]);
+ dml_print("DML::%s: k=%u, TSetup = %f\n", __func__, k, locals->TSetup[k]);
+ dml_print("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f\n", __func__, k, locals->MIN_DST_Y_NEXT_START[k]);
+ dml_print("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f (old)\n", __func__, k, s->old_MIN_DST_Y_NEXT_START);
+ dml_print("DML::%s: k=%u, VREADY_AT_OR_AFTER_VSYNC = %u\n", __func__, k, locals->VREADY_AT_OR_AFTER_VSYNC[k]);
+#endif
+ }
+
+ //Maximum Bandwidth Used
+ s->TotalWRBandwidth = 0;
+ s->WRBandwidth = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true && mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k] == dml_444_32) {
+ s->WRBandwidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k] /
+ (mode_lib->ms.cache_display_cfg.timing.HTotal[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 4;
+ } else if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
+ s->WRBandwidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k] /
+ (mode_lib->ms.cache_display_cfg.timing.HTotal[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 8;
+ }
+ s->TotalWRBandwidth = s->TotalWRBandwidth + s->WRBandwidth;
+ }
+
+ locals->TotalDataReadBandwidth = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ locals->TotalDataReadBandwidth = locals->TotalDataReadBandwidth + locals->ReadBandwidthSurfaceLuma[k] + locals->ReadBandwidthSurfaceChroma[k];
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, locals->TotalDataReadBandwidth);
+ dml_print("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, locals->ReadBandwidthSurfaceLuma[k]);
+ dml_print("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, locals->ReadBandwidthSurfaceChroma[k]);
+#endif
+ }
+
+ locals->TotalDataReadBandwidthNotIncludingMALLPrefetch = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) {
+ locals->TotalDataReadBandwidthNotIncludingMALLPrefetch = locals->TotalDataReadBandwidthNotIncludingMALLPrefetch
+ + locals->ReadBandwidthSurfaceLuma[k] + locals->ReadBandwidthSurfaceChroma[k];
+ }
+ }
+
+ CalculateStutterEfficiency_params->CompressedBufferSizeInkByte = locals->CompressedBufferSizeInkByte;
+ CalculateStutterEfficiency_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
+ CalculateStutterEfficiency_params->UnboundedRequestEnabled = locals->UnboundedRequestEnabled;
+ CalculateStutterEfficiency_params->MetaFIFOSizeInKEntries = mode_lib->ms.ip.meta_fifo_size_in_kentries;
+ CalculateStutterEfficiency_params->ZeroSizeBufferEntries = mode_lib->ms.ip.zero_size_buffer_entries;
+ CalculateStutterEfficiency_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes;
+ CalculateStutterEfficiency_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
+ CalculateStutterEfficiency_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes;
+ CalculateStutterEfficiency_params->TotalDataReadBandwidth = locals->TotalDataReadBandwidth;
+ CalculateStutterEfficiency_params->DCFCLK = locals->Dcfclk;
+ CalculateStutterEfficiency_params->ReturnBW = mode_lib->ms.ReturnBW;
+ CalculateStutterEfficiency_params->CompbufReservedSpace64B = locals->compbuf_reserved_space_64b;
+ CalculateStutterEfficiency_params->CompbufReservedSpaceZs = locals->compbuf_reserved_space_zs;
+ CalculateStutterEfficiency_params->SRExitTime = mode_lib->ms.state.sr_exit_time_us;
+ CalculateStutterEfficiency_params->SRExitZ8Time = mode_lib->ms.state.sr_exit_z8_time_us;
+ CalculateStutterEfficiency_params->SynchronizeTimingsFinal = mode_lib->ms.policy.SynchronizeTimingsFinal;
+ CalculateStutterEfficiency_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
+ CalculateStutterEfficiency_params->StutterEnterPlusExitWatermark = locals->Watermark.StutterEnterPlusExitWatermark;
+ CalculateStutterEfficiency_params->Z8StutterEnterPlusExitWatermark = locals->Watermark.Z8StutterEnterPlusExitWatermark;
+ CalculateStutterEfficiency_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
+ CalculateStutterEfficiency_params->Interlace = mode_lib->ms.cache_display_cfg.timing.Interlace;
+ CalculateStutterEfficiency_params->MinTTUVBlank = locals->MinTTUVBlank;
+ CalculateStutterEfficiency_params->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface;
+ CalculateStutterEfficiency_params->DETBufferSizeY = locals->DETBufferSizeY;
+ CalculateStutterEfficiency_params->BytePerPixelY = locals->BytePerPixelY;
+ CalculateStutterEfficiency_params->BytePerPixelDETY = locals->BytePerPixelDETY;
+ CalculateStutterEfficiency_params->SwathWidthY = locals->SwathWidthY;
+ CalculateStutterEfficiency_params->SwathHeightY = locals->SwathHeightY;
+ CalculateStutterEfficiency_params->SwathHeightC = locals->SwathHeightC;
+ CalculateStutterEfficiency_params->NetDCCRateLuma = mode_lib->ms.cache_display_cfg.surface.DCCRateLuma;
+ CalculateStutterEfficiency_params->NetDCCRateChroma = mode_lib->ms.cache_display_cfg.surface.DCCRateChroma;
+ CalculateStutterEfficiency_params->DCCFractionOfZeroSizeRequestsLuma = mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsLuma;
+ CalculateStutterEfficiency_params->DCCFractionOfZeroSizeRequestsChroma = mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsChroma;
+ CalculateStutterEfficiency_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal;
+ CalculateStutterEfficiency_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal;
+ CalculateStutterEfficiency_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock;
+ CalculateStutterEfficiency_params->VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio;
+ CalculateStutterEfficiency_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan;
+ CalculateStutterEfficiency_params->BlockHeight256BytesY = locals->BlockHeight256BytesY;
+ CalculateStutterEfficiency_params->BlockWidth256BytesY = locals->BlockWidth256BytesY;
+ CalculateStutterEfficiency_params->BlockHeight256BytesC = locals->BlockHeight256BytesC;
+ CalculateStutterEfficiency_params->BlockWidth256BytesC = locals->BlockWidth256BytesC;
+ CalculateStutterEfficiency_params->DCCYMaxUncompressedBlock = locals->DCCYMaxUncompressedBlock;
+ CalculateStutterEfficiency_params->DCCCMaxUncompressedBlock = locals->DCCCMaxUncompressedBlock;
+ CalculateStutterEfficiency_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive;
+ CalculateStutterEfficiency_params->DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable;
+ CalculateStutterEfficiency_params->WritebackEnable = mode_lib->ms.cache_display_cfg.writeback.WritebackEnable;
+ CalculateStutterEfficiency_params->ReadBandwidthSurfaceLuma = locals->ReadBandwidthSurfaceLuma;
+ CalculateStutterEfficiency_params->ReadBandwidthSurfaceChroma = locals->ReadBandwidthSurfaceChroma;
+ CalculateStutterEfficiency_params->meta_row_bw = locals->meta_row_bw;
+ CalculateStutterEfficiency_params->dpte_row_bw = locals->dpte_row_bw;
+ CalculateStutterEfficiency_params->StutterEfficiencyNotIncludingVBlank = &locals->StutterEfficiencyNotIncludingVBlank;
+ CalculateStutterEfficiency_params->StutterEfficiency = &locals->StutterEfficiency;
+ CalculateStutterEfficiency_params->NumberOfStutterBurstsPerFrame = &locals->NumberOfStutterBurstsPerFrame;
+ CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &locals->Z8StutterEfficiencyNotIncludingVBlank;
+ CalculateStutterEfficiency_params->Z8StutterEfficiency = &locals->Z8StutterEfficiency;
+ CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &locals->Z8NumberOfStutterBurstsPerFrame;
+ CalculateStutterEfficiency_params->StutterPeriod = &locals->StutterPeriod;
+ CalculateStutterEfficiency_params->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = &locals->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE;
+
+ // Stutter Efficiency
+ CalculateStutterEfficiency(&mode_lib->scratch,
+ CalculateStutterEfficiency_params);
+
+#ifdef __DML_VBA_ALLOW_DELTA__
+ {
+ dml_float_t dummy_single[2];
+ dml_uint_t dummy_integer[1];
+ dml_bool_t dummy_boolean[1];
+
+ // Calculate z8 stutter eff assuming 0 reserved space
+ CalculateStutterEfficiency(
+ locals->CompressedBufferSizeInkByte,
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
+ locals->UnboundedRequestEnabled,
+ mode_lib->ms.ip.meta_fifo_size_in_kentries,
+ mode_lib->ms.ip.zero_size_buffer_entries,
+ mode_lib->ms.ip.pixel_chunk_size_kbytes,
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.ip.rob_buffer_size_kbytes,
+ locals->TotalDataReadBandwidth,
+ locals->Dcfclk,
+ mode_lib->ms.ReturnBW,
+ 0, //mode_lib->ms.ip.compbuf_reserved_space_64b,
+ 0, //mode_lib->ms.ip.compbuf_reserved_space_zs,
+ mode_lib->ms.state.sr_exit_time_us,
+ mode_lib->ms.state.sr_exit_z8_time_us,
+ mode_lib->ms.policy.SynchronizeTimingsFinal,
+ mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming,
+ locals->Watermark.StutterEnterPlusExitWatermark,
+ locals->Watermark.Z8StutterEnterPlusExitWatermark,
+ mode_lib->ms.ip.ptoi_supported,
+ mode_lib->ms.cache_display_cfg.timing.Interlace,
+ locals->MinTTUVBlank,
+ mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
+ mode_lib->ms.DETBufferSizeY,
+ locals->BytePerPixelY,
+ locals->BytePerPixelDETY,
+ locals->SwathWidthY,
+ mode_lib->ms.SwathHeightY,
+ mode_lib->ms.SwathHeightC,
+ mode_lib->ms.cache_display_cfg.surface.DCCRateLuma,
+ mode_lib->ms.cache_display_cfg.surface.DCCRateChroma,
+ mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsLuma,
+ mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsChroma,
+ mode_lib->ms.cache_display_cfg.timing.HTotal,
+ mode_lib->ms.cache_display_cfg.timing.VTotal,
+ mode_lib->ms.cache_display_cfg.timing.PixelClock,
+ mode_lib->ms.cache_display_cfg.plane.VRatio,
+ mode_lib->ms.cache_display_cfg.plane.SourceScan,
+ locals->BlockHeight256BytesY,
+ locals->BlockWidth256BytesY,
+ locals->BlockHeight256BytesC,
+ locals->BlockWidth256BytesC,
+ locals->DCCYMaxUncompressedBlock,
+ locals->DCCCMaxUncompressedBlock,
+ mode_lib->ms.cache_display_cfg.timing.VActive,
+ mode_lib->ms.cache_display_cfg.surface.DCCEnable,
+ mode_lib->ms.cache_display_cfg.writeback.WritebackEnable,
+ locals->ReadBandwidthSurfaceLuma,
+ locals->ReadBandwidthSurfaceChroma,
+ locals->meta_row_bw,
+ locals->dpte_row_bw,
+
+ /* Output */
+ &dummy_single[0],
+ &dummy_single[1],
+ &dummy_integer[0],
+ &locals->Z8StutterEfficiencyNotIncludingVBlankBestCase,
+ &locals->Z8StutterEfficiencyBestCase,
+ &locals->Z8NumberOfStutterBurstsPerFrameBestCase,
+ &locals->StutterPeriodBestCase,
+ &dummy_boolean[0]);
+ }
+#else
+ locals->Z8StutterEfficiencyNotIncludingVBlankBestCase = locals->Z8StutterEfficiencyNotIncludingVBlank;
+ locals->Z8StutterEfficiencyBestCase = locals->Z8StutterEfficiency;
+ locals->Z8NumberOfStutterBurstsPerFrameBestCase = locals->Z8NumberOfStutterBurstsPerFrame;
+ locals->StutterPeriodBestCase = locals->StutterPeriod;
+#endif
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: --- END --- \n", __func__);
+#endif
+} // dml_core_mode_programming
+
+/// Function: dml_core_get_row_heights
+/// @brief Get row height for DPTE and META with minimal input.
+void dml_core_get_row_heights(
+ dml_uint_t *dpte_row_height,
+ dml_uint_t *meta_row_height,
+ const struct display_mode_lib_st *mode_lib,
+ dml_bool_t is_plane1,
+ enum dml_source_format_class SourcePixelFormat,
+ enum dml_swizzle_mode SurfaceTiling,
+ enum dml_rotation_angle ScanDirection,
+ dml_uint_t pitch,
+ dml_uint_t GPUVMMinPageSizeKBytes)
+{
+ dml_uint_t BytePerPixelY;
+ dml_uint_t BytePerPixelC;
+ dml_float_t BytePerPixelInDETY;
+ dml_float_t BytePerPixelInDETC;
+ dml_uint_t BlockHeight256BytesY;
+ dml_uint_t BlockHeight256BytesC;
+ dml_uint_t BlockWidth256BytesY;
+ dml_uint_t BlockWidth256BytesC;
+ dml_uint_t MacroTileWidthY;
+ dml_uint_t MacroTileWidthC;
+ dml_uint_t MacroTileHeightY;
+ dml_uint_t MacroTileHeightC;
+
+ dml_uint_t BytePerPixel;
+ dml_uint_t BlockHeight256Bytes;
+ dml_uint_t BlockWidth256Bytes;
+ dml_uint_t MacroTileWidth;
+ dml_uint_t MacroTileHeight;
+ dml_uint_t PTEBufferSizeInRequests;
+
+ dml_uint_t dummy_integer[16];
+
+ CalculateBytePerPixelAndBlockSizes(
+ SourcePixelFormat,
+ SurfaceTiling,
+
+ /* Output */
+ &BytePerPixelY,
+ &BytePerPixelC,
+ &BytePerPixelInDETY,
+ &BytePerPixelInDETC,
+ &BlockHeight256BytesY,
+ &BlockHeight256BytesC,
+ &BlockWidth256BytesY,
+ &BlockWidth256BytesC,
+ &MacroTileHeightY,
+ &MacroTileHeightC,
+ &MacroTileWidthY,
+ &MacroTileWidthC);
+
+ BytePerPixel = is_plane1 ? BytePerPixelC : BytePerPixelY;
+ BlockHeight256Bytes = is_plane1 ? BlockHeight256BytesC : BlockHeight256BytesY;
+ BlockWidth256Bytes = is_plane1 ? BlockWidth256BytesC : BlockWidth256BytesY;
+ MacroTileWidth = is_plane1 ? MacroTileWidthC : MacroTileWidthY;
+ MacroTileHeight = is_plane1 ? MacroTileHeightC : MacroTileHeightY;
+ PTEBufferSizeInRequests = is_plane1 ? mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma : mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: is_plane1 = %u\n", __func__, is_plane1);
+ dml_print("DML_DLG: %s: BytePerPixel = %u\n", __func__, BytePerPixel);
+ dml_print("DML_DLG: %s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes);
+ dml_print("DML_DLG: %s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes);
+ dml_print("DML_DLG: %s: MacroTileWidth = %u\n", __func__, MacroTileWidth);
+ dml_print("DML_DLG: %s: MacroTileHeight = %u\n", __func__, MacroTileHeight);
+ dml_print("DML_DLG: %s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests);
+ dml_print("DML_DLG: %s: dpte_buffer_size_in_pte_reqs_luma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma);
+ dml_print("DML_DLG: %s: dpte_buffer_size_in_pte_reqs_chroma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma);
+ dml_print("DML_DLG: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes);
+#endif
+
+ // just supply with enough parameters to calculate meta and dte
+ CalculateVMAndRowBytes(
+ 0, // dml_bool_t ViewportStationary,
+ 1, // dml_bool_t DCCEnable,
+ 1, // dml_uint_t NumberOfDPPs,
+ BlockHeight256Bytes,
+ BlockWidth256Bytes,
+ SourcePixelFormat,
+ SurfaceTiling,
+ BytePerPixel,
+ ScanDirection,
+ 0, // dml_uint_t SwathWidth,
+ 0, // dml_uint_t ViewportHeight, (Note: DML calculates one_row_for_frame height regardless, would need test input if that height is useful)
+ 0, // dml_uint_t ViewportXStart,
+ 0, // dml_uint_t ViewportYStart,
+ 1, // dml_bool_t GPUVMEnable,
+ 4, // dml_uint_t GPUVMMaxPageTableLevels,
+ GPUVMMinPageSizeKBytes,
+ PTEBufferSizeInRequests,
+ pitch,
+ 0, // dml_uint_t DCCMetaPitch,
+ MacroTileWidth,
+ MacroTileHeight,
+
+ // /* Output */
+ &dummy_integer[0], // dml_uint_t *MetaRowByte,
+ &dummy_integer[1], // dml_uint_t *PixelPTEBytesPerRow,
+ &dummy_integer[2], // dml_uint_t *PixelPTEBytesPerRowStorage,
+ &dummy_integer[3], // dml_uint_t *dpte_row_width_ub,
+ dpte_row_height,
+ &dummy_integer[4], // dml_uint_t *dpte_row_height_linear
+ &dummy_integer[5], // dml_uint_t *PixelPTEBytesPerRow_one_row_per_frame,
+ &dummy_integer[6], // dml_uint_t *dpte_row_width_ub_one_row_per_frame,
+ &dummy_integer[7], // dml_uint_t *dpte_row_height_one_row_per_frame,
+ &dummy_integer[8], // dml_uint_t *MetaRequestWidth,
+ &dummy_integer[9], // dml_uint_t *MetaRequestHeight,
+ &dummy_integer[10], // dml_uint_t *meta_row_width,
+ meta_row_height,
+ &dummy_integer[11], // dml_uint_t *PixelPTEReqWidth,
+ &dummy_integer[12], // dml_uint_t *PixelPTEReqHeight,
+ &dummy_integer[13], // dml_uint_t *PTERequestSize,
+ &dummy_integer[14], // dml_uint_t *DPDE0BytesFrame,
+ &dummy_integer[15]); // dml_uint_t *MetaPTEBytesFrame)
+
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: dpte_row_height = %u\n", __func__, *dpte_row_height);
+ dml_print("DML_DLG: %s: meta_row_height = %u\n", __func__, *meta_row_height);
+#endif
+}
+
+static struct soc_state_bounding_box_st dml_get_soc_state_bounding_box(
+ const struct soc_states_st *states,
+ dml_uint_t state_idx)
+{
+ dml_print("DML::%s: state_idx=%u (num_states=%u)\n", __func__, state_idx, states->num_states);
+
+ if (state_idx >= (dml_uint_t)states->num_states) {
+ dml_print("DML::%s: ERROR: Invalid state_idx=%u! num_states=%u\n", __func__, state_idx, states->num_states);
+ ASSERT(0);
+ }
+ return (states->state_array[state_idx]);
+}
+
+/// @brief Copy the parameters to a calculation struct, it actually only need when the DML needs to have
+/// the intelligence to re-calculate when any of display cfg, bbox, or policy changes since last calculated.
+///
+static void cache_ip_soc_cfg(struct display_mode_lib_st *mode_lib,
+ dml_uint_t state_idx)
+{
+ mode_lib->ms.state_idx = state_idx;
+ mode_lib->ms.max_state_idx = mode_lib->states.num_states - 1;
+ mode_lib->ms.soc = mode_lib->soc;
+ mode_lib->ms.ip = mode_lib->ip;
+ mode_lib->ms.policy = mode_lib->policy;
+ mode_lib->ms.state = dml_get_soc_state_bounding_box(&mode_lib->states, state_idx);
+ mode_lib->ms.max_state = dml_get_soc_state_bounding_box(&mode_lib->states, mode_lib->states.num_states - 1);
+}
+
+static void cache_display_cfg(struct display_mode_lib_st *mode_lib,
+ const struct dml_display_cfg_st *display_cfg)
+{
+ mode_lib->ms.cache_display_cfg = *display_cfg;
+}
+
+static void fetch_socbb_params(struct display_mode_lib_st *mode_lib)
+{
+ struct soc_state_bounding_box_st *state = &mode_lib->ms.state;
+
+ // Default values, SOCCLK, DRAMSpeed, and FabricClock will be reassigned to the same state value in mode_check step
+ // If UseMinimumRequiredDCFCLK is used, the DCFCLK will be the min dcflk for the mode support
+ mode_lib->ms.SOCCLK = (dml_float_t)state->socclk_mhz;
+ mode_lib->ms.DRAMSpeed = (dml_float_t)state->dram_speed_mts;
+ mode_lib->ms.FabricClock = (dml_float_t)state->fabricclk_mhz;
+ mode_lib->ms.DCFCLK = (dml_float_t)state->dcfclk_mhz;
+}
+
+/// @brief Use display_cfg directly for mode_support calculation
+/// Calculated values and informational output are stored in mode_lib.vba data struct
+/// The display configuration is described with pipes struct and num_pipes
+/// This function is used when physical resource mapping is not finalized (for example,
+/// don't know how many pipes to represent a surface)
+/// @param mode_lib Contains the bounding box and policy setting.
+/// @param state_idx Power state index
+/// @param display_cfg Display configurations. A display
+dml_bool_t dml_mode_support(
+ struct display_mode_lib_st *mode_lib,
+ dml_uint_t state_idx,
+ const struct dml_display_cfg_st *display_cfg)
+{
+ dml_bool_t is_mode_support;
+
+ dml_print("DML::%s: ------------- START ----------\n", __func__);
+ cache_ip_soc_cfg(mode_lib, state_idx);
+ cache_display_cfg(mode_lib, display_cfg);
+
+ fetch_socbb_params(mode_lib);
+
+ dml_print("DML::%s: state_idx = %u\n", __func__, state_idx);
+
+ is_mode_support = dml_core_mode_support(mode_lib);
+
+ dml_print("DML::%s: is_mode_support = %u\n", __func__, is_mode_support);
+ dml_print("DML::%s: ------------- DONE ----------\n", __func__);
+ return is_mode_support;
+}
+
+/// @Brief A function to calculate the programming values for DCN DCHUB (Assume mode is supported)
+/// The output will be stored in the mode_lib.mp (mode_program_st) data struct and those can be accessed via the getter functions
+/// Calculated values include: watermarks, dlg, rq reg, different clock frequency
+/// This function returns 1 when there is no error.
+/// Note: In this function, it is assumed that DCFCLK, SOCCLK freq are the state values, and mode_program will just use the DML calculated DPPCLK and DISPCLK
+/// @param mode_lib mode_lib data struct that house all the input/output/bbox and calculation values.
+/// @param state_idx Power state idx chosen
+/// @param display_cfg Display Configuration
+/// @param call_standalone Calling mode_programming without calling mode support. Some of the "support" struct member will be pre-calculated before doing mode programming
+/// TODO: Add clk_cfg input, could be useful for standalone mode
+dml_bool_t dml_mode_programming(
+ struct display_mode_lib_st *mode_lib,
+ dml_uint_t state_idx,
+ const struct dml_display_cfg_st *display_cfg,
+ bool call_standalone)
+{
+ struct dml_clk_cfg_st clk_cfg;
+ memset(&clk_cfg, 0, sizeof(clk_cfg));
+
+ clk_cfg.dcfclk_option = dml_use_required_freq;
+ clk_cfg.dispclk_option = dml_use_required_freq;
+ for (dml_uint_t k = 0; k < __DML_NUM_PLANES__; ++k)
+ clk_cfg.dppclk_option[k] = dml_use_required_freq;
+
+ dml_print("DML::%s: ------------- START ----------\n", __func__);
+ dml_print("DML::%s: state_idx = %u\n", __func__, state_idx);
+ dml_print("DML::%s: call_standalone = %u\n", __func__, call_standalone);
+
+ cache_ip_soc_cfg(mode_lib, state_idx);
+ cache_display_cfg(mode_lib, display_cfg);
+
+ fetch_socbb_params(mode_lib);
+ if (call_standalone) {
+ mode_lib->ms.support.ImmediateFlipSupport = 1; // assume mode support say immediate flip ok at max state/combine
+ dml_core_mode_support_partial(mode_lib);
+ }
+
+ dml_core_mode_programming(mode_lib, &clk_cfg);
+
+ dml_print("DML::%s: ------------- DONE ----------\n", __func__);
+ dml_print("DML::%s: PrefetchAndImmediateFlipSupported = %0d\n", __func__, mode_lib->mp.PrefetchAndImmediateFlipSupported);
+ return mode_lib->mp.PrefetchAndImmediateFlipSupported;
+}
+
+static dml_uint_t mode_support_pwr_states(
+ dml_uint_t *lowest_state_idx,
+ struct display_mode_lib_st *mode_lib,
+ const struct dml_display_cfg_st *display_cfg,
+ dml_uint_t start_state_idx,
+ dml_uint_t end_state_idx)
+{
+ dml_uint_t state_idx = 0;
+ dml_bool_t mode_is_supported = 0;
+ *lowest_state_idx = end_state_idx;
+
+ if (end_state_idx < start_state_idx)
+ ASSERT(0);
+
+ if (end_state_idx >= mode_lib->states.num_states) // idx is 0-based
+ ASSERT(0);
+
+ for (state_idx = start_state_idx; state_idx <= end_state_idx; state_idx++) {
+ if (dml_mode_support(mode_lib, state_idx, display_cfg)) {
+ dml_print("DML::%s: Mode is supported at power state_idx = %u\n", __func__, state_idx);
+ mode_is_supported = 1;
+ *lowest_state_idx = state_idx;
+ break;
+ }
+ }
+
+ return mode_is_supported;
+}
+
+dml_uint_t dml_mode_support_ex(struct dml_mode_support_ex_params_st *in_out_params)
+{
+ dml_uint_t result;
+
+ result = mode_support_pwr_states(&in_out_params->out_lowest_state_idx,
+ in_out_params->mode_lib,
+ in_out_params->in_display_cfg,
+ in_out_params->in_start_state_idx,
+ in_out_params->mode_lib->states.num_states - 1);
+
+ if (result)
+ *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support;
+
+ return result;
+}
+
+dml_bool_t dml_get_is_phantom_pipe(struct display_mode_lib_st *mode_lib, dml_uint_t pipe_idx)
+{
+ dml_uint_t plane_idx = mode_lib->mp.pipe_plane[pipe_idx];
+ dml_print("DML::%s: pipe_idx=%d UseMALLForPStateChange=%0d\n", __func__, pipe_idx, mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[plane_idx]);
+ return (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[plane_idx] == dml_use_mall_pstate_change_phantom_pipe);
+}
+
+
+#define dml_get_per_surface_var_func(variable, type, interval_var) type dml_get_##variable(struct display_mode_lib_st *mode_lib, dml_uint_t surface_idx) \
+{ \
+ dml_uint_t plane_idx; \
+ plane_idx = mode_lib->mp.pipe_plane[surface_idx]; \
+ return (type) interval_var[plane_idx]; \
+}
+
+#define dml_get_var_func(var, type, internal_var) type dml_get_##var(struct display_mode_lib_st *mode_lib) \
+{ \
+ return (type) internal_var; \
+}
+
+dml_get_var_func(wm_urgent, dml_float_t, mode_lib->mp.Watermark.UrgentWatermark);
+dml_get_var_func(wm_stutter_exit, dml_float_t, mode_lib->mp.Watermark.StutterExitWatermark);
+dml_get_var_func(wm_stutter_enter_exit, dml_float_t, mode_lib->mp.Watermark.StutterEnterPlusExitWatermark);
+dml_get_var_func(wm_memory_trip, dml_float_t, mode_lib->mp.UrgentLatency);
+dml_get_var_func(wm_fclk_change, dml_float_t, mode_lib->mp.Watermark.FCLKChangeWatermark);
+dml_get_var_func(wm_usr_retraining, dml_float_t, mode_lib->mp.Watermark.USRRetrainingWatermark);
+dml_get_var_func(wm_dram_clock_change, dml_float_t, mode_lib->mp.Watermark.DRAMClockChangeWatermark);
+dml_get_var_func(wm_z8_stutter_enter_exit, dml_float_t, mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark);
+dml_get_var_func(wm_z8_stutter, dml_float_t, mode_lib->mp.Watermark.Z8StutterExitWatermark);
+dml_get_var_func(fraction_of_urgent_bandwidth, dml_float_t, mode_lib->mp.FractionOfUrgentBandwidth);
+dml_get_var_func(fraction_of_urgent_bandwidth_imm_flip, dml_float_t, mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip);
+dml_get_var_func(urgent_latency, dml_float_t, mode_lib->mp.UrgentLatency);
+dml_get_var_func(clk_dcf_deepsleep, dml_float_t, mode_lib->mp.DCFCLKDeepSleep);
+dml_get_var_func(wm_writeback_dram_clock_change, dml_float_t, mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark);
+dml_get_var_func(wm_writeback_urgent, dml_float_t, mode_lib->mp.Watermark.WritebackUrgentWatermark);
+dml_get_var_func(stutter_efficiency, dml_float_t, mode_lib->mp.StutterEfficiency);
+dml_get_var_func(stutter_efficiency_no_vblank, dml_float_t, mode_lib->mp.StutterEfficiencyNotIncludingVBlank);
+dml_get_var_func(stutter_efficiency_z8, dml_float_t, mode_lib->mp.Z8StutterEfficiency);
+dml_get_var_func(stutter_num_bursts_z8, dml_float_t, mode_lib->mp.Z8NumberOfStutterBurstsPerFrame);
+dml_get_var_func(stutter_period, dml_float_t, mode_lib->mp.StutterPeriod);
+dml_get_var_func(stutter_efficiency_z8_bestcase, dml_float_t, mode_lib->mp.Z8StutterEfficiencyBestCase);
+dml_get_var_func(stutter_num_bursts_z8_bestcase, dml_float_t, mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase);
+dml_get_var_func(stutter_period_bestcase, dml_float_t, mode_lib->mp.StutterPeriodBestCase);
+dml_get_var_func(urgent_extra_latency, dml_float_t, mode_lib->mp.UrgentExtraLatency);
+dml_get_var_func(fclk_change_latency, dml_float_t, mode_lib->mp.MaxActiveFCLKChangeLatencySupported);
+dml_get_var_func(dispclk_calculated, dml_float_t, mode_lib->mp.Dispclk_calculated);
+dml_get_var_func(total_data_read_bw, dml_float_t, mode_lib->mp.TotalDataReadBandwidth);
+dml_get_var_func(return_bw, dml_float_t, mode_lib->ms.ReturnBW);
+dml_get_var_func(return_dram_bw, dml_float_t, mode_lib->ms.ReturnDRAMBW);
+dml_get_var_func(tcalc, dml_float_t, mode_lib->mp.TCalc);
+dml_get_var_func(comp_buffer_size_kbytes, dml_uint_t, mode_lib->mp.CompressedBufferSizeInkByte);
+dml_get_var_func(pixel_chunk_size_in_kbyte, dml_uint_t, mode_lib->ms.ip.pixel_chunk_size_kbytes);
+dml_get_var_func(alpha_pixel_chunk_size_in_kbyte, dml_uint_t, mode_lib->ms.ip.alpha_pixel_chunk_size_kbytes);
+dml_get_var_func(meta_chunk_size_in_kbyte, dml_uint_t, mode_lib->ms.ip.meta_chunk_size_kbytes);
+dml_get_var_func(min_pixel_chunk_size_in_byte, dml_uint_t, mode_lib->ms.ip.min_pixel_chunk_size_bytes);
+dml_get_var_func(min_meta_chunk_size_in_byte, dml_uint_t, mode_lib->ms.ip.min_meta_chunk_size_bytes);
+dml_get_var_func(total_immediate_flip_bytes, dml_uint_t, mode_lib->mp.TotImmediateFlipBytes);
+
+dml_get_per_surface_var_func(dsc_delay, dml_uint_t, mode_lib->mp.DSCDelay); // this is the dsc latency
+dml_get_per_surface_var_func(dppclk_calculated, dml_float_t, mode_lib->mp.Dppclk_calculated);
+dml_get_per_surface_var_func(dscclk_calculated, dml_float_t, mode_lib->mp.DSCCLK_calculated);
+dml_get_per_surface_var_func(min_ttu_vblank_in_us, dml_float_t, mode_lib->mp.MinTTUVBlank);
+dml_get_per_surface_var_func(vratio_prefetch_l, dml_float_t, mode_lib->mp.VRatioPrefetchY);
+dml_get_per_surface_var_func(vratio_prefetch_c, dml_float_t, mode_lib->mp.VRatioPrefetchC);
+dml_get_per_surface_var_func(dst_x_after_scaler, dml_uint_t, mode_lib->mp.DSTXAfterScaler);
+dml_get_per_surface_var_func(dst_y_after_scaler, dml_uint_t, mode_lib->mp.DSTYAfterScaler);
+dml_get_per_surface_var_func(dst_y_per_vm_vblank, dml_float_t, mode_lib->mp.DestinationLinesToRequestVMInVBlank);
+dml_get_per_surface_var_func(dst_y_per_row_vblank, dml_float_t, mode_lib->mp.DestinationLinesToRequestRowInVBlank);
+dml_get_per_surface_var_func(dst_y_prefetch, dml_float_t, mode_lib->mp.DestinationLinesForPrefetch);
+dml_get_per_surface_var_func(dst_y_per_vm_flip, dml_float_t, mode_lib->mp.DestinationLinesToRequestVMInImmediateFlip);
+dml_get_per_surface_var_func(dst_y_per_row_flip, dml_float_t, mode_lib->mp.DestinationLinesToRequestRowInImmediateFlip);
+dml_get_per_surface_var_func(dst_y_per_pte_row_nom_l, dml_float_t, mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L);
+dml_get_per_surface_var_func(dst_y_per_pte_row_nom_c, dml_float_t, mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C);
+dml_get_per_surface_var_func(dst_y_per_meta_row_nom_l, dml_float_t, mode_lib->mp.DST_Y_PER_META_ROW_NOM_L);
+dml_get_per_surface_var_func(dst_y_per_meta_row_nom_c, dml_float_t, mode_lib->mp.DST_Y_PER_META_ROW_NOM_C);
+dml_get_per_surface_var_func(refcyc_per_vm_group_vblank_in_us, dml_float_t, mode_lib->mp.TimePerVMGroupVBlank);
+dml_get_per_surface_var_func(refcyc_per_vm_group_flip_in_us, dml_float_t, mode_lib->mp.TimePerVMGroupFlip);
+dml_get_per_surface_var_func(refcyc_per_vm_req_vblank_in_us, dml_float_t, mode_lib->mp.TimePerVMRequestVBlank);
+dml_get_per_surface_var_func(refcyc_per_vm_req_flip_in_us, dml_float_t, mode_lib->mp.TimePerVMRequestFlip);
+dml_get_per_surface_var_func(refcyc_per_vm_dmdata_in_us, dml_float_t, mode_lib->mp.Tdmdl_vm);
+dml_get_per_surface_var_func(dmdata_dl_delta_in_us, dml_float_t, mode_lib->mp.Tdmdl);
+dml_get_per_surface_var_func(refcyc_per_line_delivery_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeLuma);
+dml_get_per_surface_var_func(refcyc_per_line_delivery_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeChroma);
+dml_get_per_surface_var_func(refcyc_per_line_delivery_pre_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch);
+dml_get_per_surface_var_func(refcyc_per_line_delivery_pre_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch);
+dml_get_per_surface_var_func(refcyc_per_req_delivery_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma);
+dml_get_per_surface_var_func(refcyc_per_req_delivery_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma);
+dml_get_per_surface_var_func(refcyc_per_req_delivery_pre_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch);
+dml_get_per_surface_var_func(refcyc_per_req_delivery_pre_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch);
+dml_get_per_surface_var_func(refcyc_per_cursor_req_delivery_in_us, dml_float_t, mode_lib->mp.CursorRequestDeliveryTime);
+dml_get_per_surface_var_func(refcyc_per_cursor_req_delivery_pre_in_us, dml_float_t, mode_lib->mp.CursorRequestDeliveryTimePrefetch);
+dml_get_per_surface_var_func(refcyc_per_meta_chunk_nom_l_in_us, dml_float_t, mode_lib->mp.TimePerMetaChunkNominal);
+dml_get_per_surface_var_func(refcyc_per_meta_chunk_nom_c_in_us, dml_float_t, mode_lib->mp.TimePerChromaMetaChunkNominal);
+dml_get_per_surface_var_func(refcyc_per_meta_chunk_vblank_l_in_us, dml_float_t, mode_lib->mp.TimePerMetaChunkVBlank);
+dml_get_per_surface_var_func(refcyc_per_meta_chunk_vblank_c_in_us, dml_float_t, mode_lib->mp.TimePerChromaMetaChunkVBlank);
+dml_get_per_surface_var_func(refcyc_per_meta_chunk_flip_l_in_us, dml_float_t, mode_lib->mp.TimePerMetaChunkFlip);
+dml_get_per_surface_var_func(refcyc_per_meta_chunk_flip_c_in_us, dml_float_t, mode_lib->mp.TimePerChromaMetaChunkFlip);
+dml_get_per_surface_var_func(refcyc_per_pte_group_nom_l_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_nom_luma);
+dml_get_per_surface_var_func(refcyc_per_pte_group_nom_c_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_nom_chroma);
+dml_get_per_surface_var_func(refcyc_per_pte_group_vblank_l_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_vblank_luma);
+dml_get_per_surface_var_func(refcyc_per_pte_group_vblank_c_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_vblank_chroma);
+dml_get_per_surface_var_func(refcyc_per_pte_group_flip_l_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_flip_luma);
+dml_get_per_surface_var_func(refcyc_per_pte_group_flip_c_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_flip_chroma);
+dml_get_per_surface_var_func(dpte_group_size_in_bytes, dml_uint_t, mode_lib->mp.dpte_group_bytes);
+dml_get_per_surface_var_func(vm_group_size_in_bytes, dml_uint_t, mode_lib->mp.vm_group_bytes);
+dml_get_per_surface_var_func(swath_height_l, dml_uint_t, mode_lib->ms.SwathHeightY);
+dml_get_per_surface_var_func(swath_height_c, dml_uint_t, mode_lib->ms.SwathHeightC);
+dml_get_per_surface_var_func(dpte_row_height_l, dml_uint_t, mode_lib->mp.dpte_row_height);
+dml_get_per_surface_var_func(dpte_row_height_c, dml_uint_t, mode_lib->mp.dpte_row_height_chroma);
+dml_get_per_surface_var_func(dpte_row_height_linear_l, dml_uint_t, mode_lib->mp.dpte_row_height_linear);
+dml_get_per_surface_var_func(dpte_row_height_linear_c, dml_uint_t, mode_lib->mp.dpte_row_height_linear_chroma);
+dml_get_per_surface_var_func(meta_row_height_l, dml_uint_t, mode_lib->mp.meta_row_height);
+dml_get_per_surface_var_func(meta_row_height_c, dml_uint_t, mode_lib->mp.meta_row_height_chroma);
+
+dml_get_per_surface_var_func(vstartup_calculated, dml_uint_t, mode_lib->mp.VStartup);
+dml_get_per_surface_var_func(vupdate_offset, dml_uint_t, mode_lib->mp.VUpdateOffsetPix);
+dml_get_per_surface_var_func(vupdate_width, dml_uint_t, mode_lib->mp.VUpdateWidthPix);
+dml_get_per_surface_var_func(vready_offset, dml_uint_t, mode_lib->mp.VReadyOffsetPix);
+dml_get_per_surface_var_func(vready_at_or_after_vsync, dml_uint_t, mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC);
+dml_get_per_surface_var_func(min_dst_y_next_start, dml_uint_t, mode_lib->mp.MIN_DST_Y_NEXT_START);
+dml_get_per_surface_var_func(det_stored_buffer_size_l_bytes, dml_uint_t, mode_lib->ms.DETBufferSizeY);
+dml_get_per_surface_var_func(det_stored_buffer_size_c_bytes, dml_uint_t, mode_lib->ms.DETBufferSizeC);
+dml_get_per_surface_var_func(use_mall_for_static_screen, dml_uint_t, mode_lib->mp.UsesMALLForStaticScreen);
+dml_get_per_surface_var_func(surface_size_for_mall, dml_uint_t, mode_lib->mp.SurfaceSizeInTheMALL);
+dml_get_per_surface_var_func(dcc_max_uncompressed_block_l, dml_uint_t, mode_lib->mp.DCCYMaxUncompressedBlock);
+dml_get_per_surface_var_func(dcc_max_compressed_block_l, dml_uint_t, mode_lib->mp.DCCYMaxCompressedBlock);
+dml_get_per_surface_var_func(dcc_independent_block_l, dml_uint_t, mode_lib->mp.DCCYIndependentBlock);
+dml_get_per_surface_var_func(dcc_max_uncompressed_block_c, dml_uint_t, mode_lib->mp.DCCCMaxUncompressedBlock);
+dml_get_per_surface_var_func(dcc_max_compressed_block_c, dml_uint_t, mode_lib->mp.DCCCMaxCompressedBlock);
+dml_get_per_surface_var_func(dcc_independent_block_c, dml_uint_t, mode_lib->mp.DCCCIndependentBlock);
+dml_get_per_surface_var_func(max_active_dram_clock_change_latency_supported, dml_uint_t, mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported);
+dml_get_per_surface_var_func(pte_buffer_mode, dml_uint_t, mode_lib->mp.PTE_BUFFER_MODE);
+dml_get_per_surface_var_func(bigk_fragment_size, dml_uint_t, mode_lib->mp.BIGK_FRAGMENT_SIZE);
+dml_get_per_surface_var_func(dpte_bytes_per_row, dml_uint_t, mode_lib->mp.PixelPTEBytesPerRow);
+dml_get_per_surface_var_func(meta_bytes_per_row, dml_uint_t, mode_lib->mp.MetaRowByte);
+dml_get_per_surface_var_func(det_buffer_size_kbytes, dml_uint_t, mode_lib->ms.DETBufferSizeInKByte);
+
diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.h b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.h
new file mode 100644
index 000000000000..a38ed89c47a9
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.h
@@ -0,0 +1,204 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DISPLAY_MODE_CORE_H__
+#define __DISPLAY_MODE_CORE_H__
+
+#include "display_mode_core_structs.h"
+
+struct display_mode_lib_st;
+
+dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib);
+void dml_core_mode_support_partial(struct display_mode_lib_st *mode_lib);
+void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struct dml_clk_cfg_st *clk_cfg);
+
+void dml_core_get_row_heights(
+ dml_uint_t *dpte_row_height,
+ dml_uint_t *meta_row_height,
+ const struct display_mode_lib_st *mode_lib,
+ dml_bool_t is_plane1,
+ enum dml_source_format_class SourcePixelFormat,
+ enum dml_swizzle_mode SurfaceTiling,
+ enum dml_rotation_angle ScanDirection,
+ dml_uint_t pitch,
+ dml_uint_t GPUVMMinPageSizeKBytes);
+
+dml_float_t dml_get_return_bw_mbps_vm_only(
+ const struct soc_bounding_box_st *soc,
+ dml_bool_t use_ideal_dram_bw_strobe,
+ dml_bool_t HostVMEnable,
+ dml_float_t DCFCLK,
+ dml_float_t FabricClock,
+ dml_float_t DRAMSpeed);
+
+dml_float_t dml_get_return_bw_mbps(
+ const struct soc_bounding_box_st *soc,
+ dml_bool_t use_ideal_dram_bw_strobe,
+ dml_bool_t HostVMEnable,
+ dml_float_t DCFCLK,
+ dml_float_t FabricClock,
+ dml_float_t DRAMSpeed);
+
+dml_bool_t dml_mode_support(
+ struct display_mode_lib_st *mode_lib,
+ dml_uint_t state_idx,
+ const struct dml_display_cfg_st *display_cfg);
+
+dml_bool_t dml_mode_programming(
+ struct display_mode_lib_st *mode_lib,
+ dml_uint_t state_idx,
+ const struct dml_display_cfg_st *display_cfg,
+ bool call_standalone);
+
+dml_uint_t dml_mode_support_ex(
+ struct dml_mode_support_ex_params_st *in_out_params);
+
+dml_bool_t dml_get_is_phantom_pipe(struct display_mode_lib_st *mode_lib, dml_uint_t pipe_idx);
+
+#define dml_get_per_surface_var_decl(variable, type) type dml_get_##variable(struct display_mode_lib_st *mode_lib, dml_uint_t surface_idx)
+#define dml_get_var_decl(var, type) type dml_get_##var(struct display_mode_lib_st *mode_lib)
+
+dml_get_var_decl(wm_urgent, dml_float_t);
+dml_get_var_decl(wm_stutter_exit, dml_float_t);
+dml_get_var_decl(wm_stutter_enter_exit, dml_float_t);
+dml_get_var_decl(wm_memory_trip, dml_float_t);
+dml_get_var_decl(wm_dram_clock_change, dml_float_t);
+dml_get_var_decl(wm_z8_stutter_enter_exit, dml_float_t);
+dml_get_var_decl(wm_z8_stutter, dml_float_t);
+dml_get_var_decl(urgent_latency, dml_float_t);
+dml_get_var_decl(clk_dcf_deepsleep, dml_float_t);
+dml_get_var_decl(wm_fclk_change, dml_float_t);
+dml_get_var_decl(wm_usr_retraining, dml_float_t);
+dml_get_var_decl(urgent_latency, dml_float_t);
+
+dml_get_var_decl(wm_writeback_dram_clock_change, dml_float_t);
+dml_get_var_decl(wm_writeback_urgent, dml_float_t);
+dml_get_var_decl(stutter_efficiency_no_vblank, dml_float_t);
+dml_get_var_decl(stutter_efficiency, dml_float_t);
+dml_get_var_decl(stutter_efficiency_z8, dml_float_t);
+dml_get_var_decl(stutter_num_bursts_z8, dml_float_t);
+dml_get_var_decl(stutter_period, dml_float_t);
+dml_get_var_decl(stutter_efficiency_z8_bestcase, dml_float_t);
+dml_get_var_decl(stutter_num_bursts_z8_bestcase, dml_float_t);
+dml_get_var_decl(stutter_period_bestcase, dml_float_t);
+dml_get_var_decl(urgent_latency, dml_float_t);
+dml_get_var_decl(urgent_extra_latency, dml_float_t);
+dml_get_var_decl(fclk_change_latency, dml_float_t);
+dml_get_var_decl(nonurgent_latency, dml_float_t);
+dml_get_var_decl(dispclk_calculated, dml_float_t);
+dml_get_var_decl(total_data_read_bw, dml_float_t);
+dml_get_var_decl(return_bw, dml_float_t);
+dml_get_var_decl(return_dram_bw, dml_float_t);
+dml_get_var_decl(tcalc, dml_float_t);
+dml_get_var_decl(fraction_of_urgent_bandwidth, dml_float_t);
+dml_get_var_decl(fraction_of_urgent_bandwidth_imm_flip, dml_float_t);
+dml_get_var_decl(comp_buffer_size_kbytes, dml_uint_t);
+dml_get_var_decl(pixel_chunk_size_in_kbyte, dml_uint_t);
+dml_get_var_decl(alpha_pixel_chunk_size_in_kbyte, dml_uint_t);
+dml_get_var_decl(meta_chunk_size_in_kbyte, dml_uint_t);
+dml_get_var_decl(min_pixel_chunk_size_in_byte, dml_uint_t);
+dml_get_var_decl(min_meta_chunk_size_in_byte, dml_uint_t);
+dml_get_var_decl(total_immediate_flip_bytes, dml_uint_t);
+
+dml_get_per_surface_var_decl(dsc_delay, dml_uint_t);
+dml_get_per_surface_var_decl(dppclk_calculated, dml_float_t);
+dml_get_per_surface_var_decl(dscclk_calculated, dml_float_t);
+dml_get_per_surface_var_decl(min_ttu_vblank_in_us, dml_float_t);
+dml_get_per_surface_var_decl(vratio_prefetch_l, dml_float_t);
+dml_get_per_surface_var_decl(vratio_prefetch_c, dml_float_t);
+dml_get_per_surface_var_decl(dst_x_after_scaler, dml_uint_t);
+dml_get_per_surface_var_decl(dst_y_after_scaler, dml_uint_t);
+dml_get_per_surface_var_decl(dst_y_per_vm_vblank, dml_float_t);
+dml_get_per_surface_var_decl(dst_y_per_row_vblank, dml_float_t);
+dml_get_per_surface_var_decl(dst_y_prefetch, dml_float_t);
+dml_get_per_surface_var_decl(dst_y_per_vm_flip, dml_float_t);
+dml_get_per_surface_var_decl(dst_y_per_row_flip, dml_float_t);
+dml_get_per_surface_var_decl(dst_y_per_pte_row_nom_l, dml_float_t);
+dml_get_per_surface_var_decl(dst_y_per_pte_row_nom_c, dml_float_t);
+dml_get_per_surface_var_decl(dst_y_per_meta_row_nom_l, dml_float_t);
+dml_get_per_surface_var_decl(dst_y_per_meta_row_nom_c, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_vm_group_vblank_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_vm_group_flip_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_vm_req_vblank_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_vm_req_flip_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_vm_dmdata_in_us, dml_float_t);
+dml_get_per_surface_var_decl(dmdata_dl_delta_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_line_delivery_l_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_line_delivery_c_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_line_delivery_pre_l_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_line_delivery_pre_c_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_req_delivery_l_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_req_delivery_c_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_req_delivery_pre_l_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_req_delivery_pre_c_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_cursor_req_delivery_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_cursor_req_delivery_pre_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_meta_chunk_nom_l_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_meta_chunk_nom_c_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_meta_chunk_vblank_l_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_meta_chunk_vblank_c_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_meta_chunk_flip_l_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_meta_chunk_flip_c_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_pte_group_nom_l_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_pte_group_nom_c_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_pte_group_vblank_l_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_pte_group_vblank_c_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_pte_group_flip_l_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_pte_group_flip_c_in_us, dml_float_t);
+
+dml_get_per_surface_var_decl(dpte_group_size_in_bytes, dml_uint_t);
+dml_get_per_surface_var_decl(vm_group_size_in_bytes, dml_uint_t);
+dml_get_per_surface_var_decl(swath_height_l, dml_uint_t);
+dml_get_per_surface_var_decl(swath_height_c, dml_uint_t);
+dml_get_per_surface_var_decl(dpte_row_height_l, dml_uint_t);
+dml_get_per_surface_var_decl(dpte_row_height_c, dml_uint_t);
+dml_get_per_surface_var_decl(dpte_row_height_linear_l, dml_uint_t);
+dml_get_per_surface_var_decl(dpte_row_height_linear_c, dml_uint_t);
+dml_get_per_surface_var_decl(meta_row_height_l, dml_uint_t);
+dml_get_per_surface_var_decl(meta_row_height_c, dml_uint_t);
+dml_get_per_surface_var_decl(vstartup_calculated, dml_uint_t);
+dml_get_per_surface_var_decl(vupdate_offset, dml_uint_t);
+dml_get_per_surface_var_decl(vupdate_width, dml_uint_t);
+dml_get_per_surface_var_decl(vready_offset, dml_uint_t);
+dml_get_per_surface_var_decl(vready_at_or_after_vsync, dml_uint_t);
+dml_get_per_surface_var_decl(min_dst_y_next_start, dml_uint_t);
+dml_get_per_surface_var_decl(det_stored_buffer_size_l_bytes, dml_uint_t);
+dml_get_per_surface_var_decl(det_stored_buffer_size_c_bytes, dml_uint_t);
+dml_get_per_surface_var_decl(use_mall_for_static_screen, dml_uint_t);
+dml_get_per_surface_var_decl(surface_size_for_mall, dml_uint_t);
+dml_get_per_surface_var_decl(dcc_max_uncompressed_block_l, dml_uint_t);
+dml_get_per_surface_var_decl(dcc_max_uncompressed_block_c, dml_uint_t);
+dml_get_per_surface_var_decl(dcc_max_compressed_block_l, dml_uint_t);
+dml_get_per_surface_var_decl(dcc_max_compressed_block_c, dml_uint_t);
+dml_get_per_surface_var_decl(dcc_independent_block_l, dml_uint_t);
+dml_get_per_surface_var_decl(dcc_independent_block_c, dml_uint_t);
+dml_get_per_surface_var_decl(max_active_dram_clock_change_latency_supported, dml_uint_t);
+dml_get_per_surface_var_decl(pte_buffer_mode, dml_uint_t);
+dml_get_per_surface_var_decl(bigk_fragment_size, dml_uint_t);
+dml_get_per_surface_var_decl(dpte_bytes_per_row, dml_uint_t);
+dml_get_per_surface_var_decl(meta_bytes_per_row, dml_uint_t);
+dml_get_per_surface_var_decl(det_buffer_size_kbytes, dml_uint_t);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core_structs.h b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core_structs.h
new file mode 100644
index 000000000000..5b40dcdc4406
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core_structs.h
@@ -0,0 +1,2032 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DISPLAY_MODE_CORE_STRUCT_H__
+#define __DISPLAY_MODE_CORE_STRUCT_H__
+
+#include "display_mode_lib_defines.h"
+#include "dml_top_display_cfg_types.h"
+
+enum dml_project_id {
+ dml_project_invalid = 0,
+ dml_project_default = 1,
+ dml_project_dcn32 = dml_project_default,
+ dml_project_dcn321 = 2,
+ dml_project_dcn35 = 3,
+ dml_project_dcn351 = 4,
+ dml_project_dcn401 = 5,
+ dml_project_dcn36 = 6,
+};
+enum dml_prefetch_modes {
+ dml_prefetch_support_uclk_fclk_and_stutter_if_possible = 0,
+ dml_prefetch_support_uclk_fclk_and_stutter = 1,
+ dml_prefetch_support_fclk_and_stutter = 2,
+ dml_prefetch_support_stutter = 3,
+ dml_prefetch_support_none = 4
+};
+enum dml_use_mall_for_pstate_change_mode {
+ dml_use_mall_pstate_change_disable = 0,
+ dml_use_mall_pstate_change_full_frame = 1,
+ dml_use_mall_pstate_change_sub_viewport = 2,
+ dml_use_mall_pstate_change_phantom_pipe = 3,
+ dml_use_mall_pstate_change_phantom_pipe_no_data_return = 4,
+ dml_use_mall_pstate_change_imall = 5
+};
+enum dml_use_mall_for_static_screen_mode {
+ dml_use_mall_static_screen_disable = 0,
+ dml_use_mall_static_screen_enable = 1,
+ dml_use_mall_static_screen_optimize = 2
+};
+enum dml_output_encoder_class {
+ dml_dp = 0,
+ dml_edp = 1,
+ dml_dp2p0 = 2,
+ dml_hdmi = 3,
+ dml_hdmifrl = 4,
+ dml_none = 5
+};
+enum dml_output_link_dp_rate{
+ dml_dp_rate_na = 0,
+ dml_dp_rate_hbr = 1,
+ dml_dp_rate_hbr2 = 2,
+ dml_dp_rate_hbr3 = 3,
+ dml_dp_rate_uhbr10 = 4,
+ dml_dp_rate_uhbr13p5 = 5,
+ dml_dp_rate_uhbr20 = 6
+};
+enum dml_output_type_and_rate__type{
+ dml_output_type_unknown = 0,
+ dml_output_type_dp = 1,
+ dml_output_type_edp = 2,
+ dml_output_type_dp2p0 = 3,
+ dml_output_type_hdmi = 4,
+ dml_output_type_hdmifrl = 5
+};
+enum dml_output_type_and_rate__rate {
+ dml_output_rate_unknown = 0,
+ dml_output_rate_dp_rate_hbr = 1,
+ dml_output_rate_dp_rate_hbr2 = 2,
+ dml_output_rate_dp_rate_hbr3 = 3,
+ dml_output_rate_dp_rate_uhbr10 = 4,
+ dml_output_rate_dp_rate_uhbr13p5 = 5,
+ dml_output_rate_dp_rate_uhbr20 = 6,
+ dml_output_rate_hdmi_rate_3x3 = 7,
+ dml_output_rate_hdmi_rate_6x3 = 8,
+ dml_output_rate_hdmi_rate_6x4 = 9,
+ dml_output_rate_hdmi_rate_8x4 = 10,
+ dml_output_rate_hdmi_rate_10x4 = 11,
+ dml_output_rate_hdmi_rate_12x4 = 12
+};
+enum dml_output_format_class {
+ dml_444 = 0,
+ dml_s422 = 1,
+ dml_n422 = 2,
+ dml_420 = 3
+};
+enum dml_source_format_class {
+ dml_444_8 = 0,
+ dml_444_16 = 1,
+ dml_444_32 = 2,
+ dml_444_64 = 3,
+ dml_420_8 = 4,
+ dml_420_10 = 5,
+ dml_420_12 = 6,
+ dml_422_8 = 7,
+ dml_422_10 = 8,
+ dml_rgbe_alpha = 9,
+ dml_rgbe = 10,
+ dml_mono_8 = 11,
+ dml_mono_16 = 12
+};
+enum dml_output_bpc_class {
+ dml_out_6 = 0,
+ dml_out_8 = 1,
+ dml_out_10 = 2,
+ dml_out_12 = 3,
+ dml_out_16 = 4
+};
+enum dml_output_standard_class {
+ dml_std_cvt = 0,
+ dml_std_cea = 1,
+ dml_std_cvtr2 = 2
+};
+enum dml_rotation_angle {
+ dml_rotation_0 = 0,
+ dml_rotation_90 = 1,
+ dml_rotation_180 = 2,
+ dml_rotation_270 = 3,
+ dml_rotation_0m = 4,
+ dml_rotation_90m = 5,
+ dml_rotation_180m = 6,
+ dml_rotation_270m = 7
+};
+enum dml_swizzle_mode {
+ dml_sw_linear = 0,
+ dml_sw_256b_s = 1,
+ dml_sw_256b_d = 2,
+ dml_sw_256b_r = 3,
+ dml_sw_4kb_z = 4,
+ dml_sw_4kb_s = 5,
+ dml_sw_4kb_d = 6,
+ dml_sw_4kb_r = 7,
+ dml_sw_64kb_z = 8,
+ dml_sw_64kb_s = 9,
+ dml_sw_64kb_d = 10,
+ dml_sw_64kb_r = 11,
+ dml_sw_256kb_z = 12,
+ dml_sw_256kb_s = 13,
+ dml_sw_256kb_d = 14,
+ dml_sw_256kb_r = 15,
+ dml_sw_64kb_z_t = 16,
+ dml_sw_64kb_s_t = 17,
+ dml_sw_64kb_d_t = 18,
+ dml_sw_64kb_r_t = 19,
+ dml_sw_4kb_z_x = 20,
+ dml_sw_4kb_s_x = 21,
+ dml_sw_4kb_d_x = 22,
+ dml_sw_4kb_r_x = 23,
+ dml_sw_64kb_z_x = 24,
+ dml_sw_64kb_s_x = 25,
+ dml_sw_64kb_d_x = 26,
+ dml_sw_64kb_r_x = 27,
+ dml_sw_256kb_z_x = 28,
+ dml_sw_256kb_s_x = 29,
+ dml_sw_256kb_d_x = 30,
+ dml_sw_256kb_r_x = 31,
+ dml_sw_256b_2d = 32,
+ dml_sw_4kb_2d = 33,
+ dml_sw_64kb_2d = 34,
+ dml_sw_256kb_2d = 35
+};
+enum dml_lb_depth {
+ dml_lb_6 = 0,
+ dml_lb_8 = 1,
+ dml_lb_10 = 2,
+ dml_lb_12 = 3,
+ dml_lb_16 = 4
+};
+enum dml_voltage_state {
+ dml_vmin_lv = 0,
+ dml_vmin = 1,
+ dml_vmid = 2,
+ dml_vnom = 3,
+ dml_vmax = 4
+};
+enum dml_source_macro_tile_size {
+ dml_4k_tile = 0,
+ dml_64k_tile = 1,
+ dml_256k_tile = 2
+};
+enum dml_cursor_bpp {
+ dml_cur_2bit = 0,
+ dml_cur_32bit = 1,
+ dml_cur_64bit = 2
+};
+enum dml_dram_clock_change_support {
+ dml_dram_clock_change_vactive = 0,
+ dml_dram_clock_change_vblank = 1,
+ dml_dram_clock_change_vblank_drr = 2,
+ dml_dram_clock_change_vactive_w_mall_full_frame = 3,
+ dml_dram_clock_change_vactive_w_mall_sub_vp = 4,
+ dml_dram_clock_change_vblank_w_mall_full_frame = 5,
+ dml_dram_clock_change_vblank_drr_w_mall_full_frame = 6,
+ dml_dram_clock_change_vblank_w_mall_sub_vp = 7,
+ dml_dram_clock_change_vblank_drr_w_mall_sub_vp = 8,
+ dml_dram_clock_change_unsupported = 9
+};
+enum dml_fclock_change_support {
+ dml_fclock_change_vactive = 0,
+ dml_fclock_change_vblank = 1,
+ dml_fclock_change_unsupported = 2
+};
+enum dml_dsc_enable {
+ dml_dsc_disable = 0,
+ dml_dsc_enable = 1,
+ dml_dsc_enable_if_necessary = 2
+};
+enum dml_mpc_use_policy {
+ dml_mpc_disabled = 0,
+ dml_mpc_as_possible = 1,
+ dml_mpc_as_needed_for_voltage = 2,
+ dml_mpc_as_needed_for_pstate_and_voltage = 3,
+ dml_mpc_as_needed = 4,
+ dml_mpc_2to1 = 5
+};
+enum dml_odm_use_policy {
+ dml_odm_use_policy_bypass = 0,
+ dml_odm_use_policy_combine_as_needed = 1,
+ dml_odm_use_policy_combine_2to1 = 2,
+ dml_odm_use_policy_combine_3to1 = 3,
+ dml_odm_use_policy_combine_4to1 = 4,
+ dml_odm_use_policy_split_1to2 = 5,
+ dml_odm_use_policy_mso_1to2 = 6,
+ dml_odm_use_policy_mso_1to4 = 7
+};
+enum dml_odm_mode {
+ dml_odm_mode_bypass = 0,
+ dml_odm_mode_combine_2to1 = 1,
+ dml_odm_mode_combine_3to1 = 2,
+ dml_odm_mode_combine_4to1 = 3,
+ dml_odm_mode_split_1to2 = 4,
+ dml_odm_mode_mso_1to2 = 5,
+ dml_odm_mode_mso_1to4 = 6
+};
+enum dml_writeback_configuration {
+ dml_whole_buffer_for_single_stream_no_interleave = 0,
+ dml_whole_buffer_for_single_stream_interleave = 1
+};
+enum dml_immediate_flip_requirement {
+ dml_immediate_flip_not_required = 0,
+ dml_immediate_flip_required = 1,
+ dml_immediate_flip_if_possible = 2
+};
+enum dml_unbounded_requesting_policy {
+ dml_unbounded_requesting_enable = 0,
+ dml_unbounded_requesting_edp_only = 1,
+ dml_unbounded_requesting_disable = 2
+};
+enum dml_clk_cfg_policy {
+ dml_use_required_freq = 0,
+ dml_use_override_freq = 1,
+ dml_use_state_freq = 2
+};
+
+struct soc_state_bounding_box_st {
+ dml_float_t socclk_mhz;
+ dml_float_t dscclk_mhz;
+ dml_float_t phyclk_mhz;
+ dml_float_t phyclk_d18_mhz;
+ dml_float_t phyclk_d32_mhz;
+ dml_float_t dtbclk_mhz;
+ dml_float_t fabricclk_mhz;
+ dml_float_t dcfclk_mhz;
+ dml_float_t dispclk_mhz;
+ dml_float_t dppclk_mhz;
+ dml_float_t dram_speed_mts;
+ dml_float_t urgent_latency_pixel_data_only_us;
+ dml_float_t urgent_latency_pixel_mixed_with_vm_data_us;
+ dml_float_t urgent_latency_vm_data_only_us;
+ dml_float_t writeback_latency_us;
+ dml_float_t urgent_latency_adjustment_fabric_clock_component_us;
+ dml_float_t urgent_latency_adjustment_fabric_clock_reference_mhz;
+ dml_float_t sr_exit_time_us;
+ dml_float_t sr_enter_plus_exit_time_us;
+ dml_float_t sr_exit_z8_time_us;
+ dml_float_t sr_enter_plus_exit_z8_time_us;
+ dml_float_t dram_clock_change_latency_us;
+ dml_float_t fclk_change_latency_us;
+ dml_float_t usr_retraining_latency_us;
+ dml_bool_t use_ideal_dram_bw_strobe;
+ dml_float_t g6_temp_read_blackout_us;
+
+ struct {
+ dml_uint_t urgent_ramp_uclk_cycles;
+ dml_uint_t trip_to_memory_uclk_cycles;
+ dml_uint_t meta_trip_to_memory_uclk_cycles;
+ dml_uint_t maximum_latency_when_urgent_uclk_cycles;
+ dml_uint_t average_latency_when_urgent_uclk_cycles;
+ dml_uint_t maximum_latency_when_non_urgent_uclk_cycles;
+ dml_uint_t average_latency_when_non_urgent_uclk_cycles;
+ } dml_dcn401_uclk_dpm_dependent_soc_qos_params;
+};
+
+struct soc_bounding_box_st {
+ dml_float_t dprefclk_mhz;
+ dml_float_t xtalclk_mhz;
+ dml_float_t pcierefclk_mhz;
+ dml_float_t refclk_mhz;
+ dml_float_t amclk_mhz;
+ dml_uint_t max_outstanding_reqs;
+ dml_float_t pct_ideal_sdp_bw_after_urgent;
+ dml_float_t pct_ideal_fabric_bw_after_urgent;
+ dml_float_t pct_ideal_dram_bw_after_urgent_pixel_only;
+ dml_float_t pct_ideal_dram_bw_after_urgent_pixel_and_vm;
+ dml_float_t pct_ideal_dram_bw_after_urgent_vm_only;
+ dml_float_t pct_ideal_dram_bw_after_urgent_strobe;
+ dml_float_t max_avg_sdp_bw_use_normal_percent;
+ dml_float_t max_avg_fabric_bw_use_normal_percent;
+ dml_float_t max_avg_dram_bw_use_normal_percent;
+ dml_float_t max_avg_dram_bw_use_normal_strobe_percent;
+
+ dml_float_t svp_prefetch_pct_ideal_sdp_bw_after_urgent;
+ dml_float_t svp_prefetch_pct_ideal_fabric_bw_after_urgent;
+ dml_float_t svp_prefetch_pct_ideal_dram_bw_after_urgent_pixel_only;
+ dml_float_t svp_prefetch_pct_ideal_dram_bw_after_urgent_pixel_and_vm;
+ dml_float_t svp_prefetch_pct_ideal_dram_bw_after_urgent_vm_only;
+ dml_float_t svp_prefetch_max_avg_sdp_bw_use_normal_percent;
+ dml_float_t svp_prefetch_max_avg_fabric_bw_use_normal_percent;
+ dml_float_t svp_prefetch_max_avg_dram_bw_use_normal_percent;
+
+ dml_uint_t round_trip_ping_latency_dcfclk_cycles;
+ dml_uint_t urgent_out_of_order_return_per_channel_pixel_only_bytes;
+ dml_uint_t urgent_out_of_order_return_per_channel_pixel_and_vm_bytes;
+ dml_uint_t urgent_out_of_order_return_per_channel_vm_only_bytes;
+ dml_uint_t num_chans;
+ dml_uint_t return_bus_width_bytes;
+ dml_uint_t dram_channel_width_bytes;
+ dml_uint_t fabric_datapath_to_dcn_data_return_bytes;
+ dml_uint_t hostvm_min_page_size_kbytes;
+ dml_uint_t gpuvm_min_page_size_kbytes;
+ dml_float_t phy_downspread_percent;
+ dml_float_t dcn_downspread_percent;
+ dml_float_t smn_latency_us;
+ dml_uint_t mall_allocated_for_dcn_mbytes;
+ dml_float_t dispclk_dppclk_vco_speed_mhz;
+ dml_bool_t do_urgent_latency_adjustment;
+
+ dml_uint_t mem_word_bytes;
+ dml_uint_t num_dcc_mcaches;
+ dml_uint_t mcache_size_bytes;
+ dml_uint_t mcache_line_size_bytes;
+
+ struct {
+ dml_bool_t UseNewDCN401SOCParameters;
+ dml_uint_t df_qos_response_time_fclk_cycles;
+ dml_uint_t max_round_trip_to_furthest_cs_fclk_cycles;
+ dml_uint_t mall_overhead_fclk_cycles;
+ dml_uint_t meta_trip_adder_fclk_cycles;
+ dml_uint_t average_transport_distance_fclk_cycles;
+ dml_float_t umc_urgent_ramp_latency_margin;
+ dml_float_t umc_max_latency_margin;
+ dml_float_t umc_average_latency_margin;
+ dml_float_t fabric_max_transport_latency_margin;
+ dml_float_t fabric_average_transport_latency_margin;
+ } dml_dcn401_soc_qos_params;
+
+};
+
+struct ip_params_st {
+ dml_uint_t vblank_nom_default_us;
+ dml_uint_t rob_buffer_size_kbytes;
+ dml_uint_t config_return_buffer_size_in_kbytes;
+ dml_uint_t config_return_buffer_segment_size_in_kbytes;
+ dml_uint_t compressed_buffer_segment_size_in_kbytes;
+ dml_uint_t meta_fifo_size_in_kentries;
+ dml_uint_t zero_size_buffer_entries;
+ dml_uint_t dpte_buffer_size_in_pte_reqs_luma;
+ dml_uint_t dpte_buffer_size_in_pte_reqs_chroma;
+ dml_uint_t dcc_meta_buffer_size_bytes;
+ dml_bool_t gpuvm_enable;
+ dml_bool_t hostvm_enable;
+ dml_uint_t gpuvm_max_page_table_levels;
+ dml_uint_t hostvm_max_page_table_levels;
+ dml_uint_t pixel_chunk_size_kbytes;
+ dml_uint_t alpha_pixel_chunk_size_kbytes;
+ dml_uint_t min_pixel_chunk_size_bytes;
+ dml_uint_t meta_chunk_size_kbytes;
+ dml_uint_t min_meta_chunk_size_bytes;
+ dml_uint_t writeback_chunk_size_kbytes;
+ dml_uint_t line_buffer_size_bits;
+ dml_uint_t max_line_buffer_lines;
+ dml_uint_t writeback_interface_buffer_size_kbytes;
+ dml_uint_t max_num_dpp;
+ dml_uint_t max_num_otg;
+ dml_uint_t max_num_wb;
+ dml_uint_t max_dchub_pscl_bw_pix_per_clk;
+ dml_uint_t max_pscl_lb_bw_pix_per_clk;
+ dml_uint_t max_lb_vscl_bw_pix_per_clk;
+ dml_uint_t max_vscl_hscl_bw_pix_per_clk;
+ dml_float_t max_hscl_ratio;
+ dml_float_t max_vscl_ratio;
+ dml_uint_t max_hscl_taps;
+ dml_uint_t max_vscl_taps;
+ dml_uint_t num_dsc;
+ dml_uint_t maximum_dsc_bits_per_component;
+ dml_uint_t maximum_pixels_per_line_per_dsc_unit;
+ dml_bool_t dsc422_native_support;
+ dml_bool_t cursor_64bpp_support;
+ dml_float_t dispclk_ramp_margin_percent;
+ dml_uint_t dppclk_delay_subtotal;
+ dml_uint_t dppclk_delay_scl;
+ dml_uint_t dppclk_delay_scl_lb_only;
+ dml_uint_t dppclk_delay_cnvc_formatter;
+ dml_uint_t dppclk_delay_cnvc_cursor;
+ dml_uint_t cursor_buffer_size;
+ dml_uint_t cursor_chunk_size;
+ dml_uint_t dispclk_delay_subtotal;
+ dml_bool_t dynamic_metadata_vm_enabled;
+ dml_uint_t max_inter_dcn_tile_repeaters;
+ dml_uint_t max_num_hdmi_frl_outputs;
+ dml_uint_t max_num_dp2p0_outputs;
+ dml_uint_t max_num_dp2p0_streams;
+ dml_bool_t dcc_supported;
+ dml_bool_t ptoi_supported;
+ dml_float_t writeback_max_hscl_ratio;
+ dml_float_t writeback_max_vscl_ratio;
+ dml_float_t writeback_min_hscl_ratio;
+ dml_float_t writeback_min_vscl_ratio;
+ dml_uint_t writeback_max_hscl_taps;
+ dml_uint_t writeback_max_vscl_taps;
+ dml_uint_t writeback_line_buffer_buffer_size;
+};
+
+struct DmlPipe {
+ dml_float_t Dppclk;
+ dml_float_t Dispclk;
+ dml_float_t PixelClock;
+ dml_float_t DCFClkDeepSleep;
+ dml_uint_t DPPPerSurface;
+ dml_bool_t ScalerEnabled;
+ enum dml_rotation_angle SourceScan;
+ dml_uint_t ViewportHeight;
+ dml_uint_t ViewportHeightChroma;
+ dml_uint_t BlockWidth256BytesY;
+ dml_uint_t BlockHeight256BytesY;
+ dml_uint_t BlockWidth256BytesC;
+ dml_uint_t BlockHeight256BytesC;
+ dml_uint_t BlockWidthY;
+ dml_uint_t BlockHeightY;
+ dml_uint_t BlockWidthC;
+ dml_uint_t BlockHeightC;
+ dml_uint_t InterlaceEnable;
+ dml_uint_t NumberOfCursors;
+ dml_uint_t VBlank;
+ dml_uint_t HTotal;
+ dml_uint_t HActive;
+ dml_bool_t DCCEnable;
+ enum dml_odm_mode ODMMode;
+ enum dml_source_format_class SourcePixelFormat;
+ enum dml_swizzle_mode SurfaceTiling;
+ dml_uint_t BytePerPixelY;
+ dml_uint_t BytePerPixelC;
+ dml_bool_t ProgressiveToInterlaceUnitInOPP;
+ dml_float_t VRatio;
+ dml_float_t VRatioChroma;
+ dml_uint_t VTaps;
+ dml_uint_t VTapsChroma;
+ dml_uint_t PitchY;
+ dml_uint_t DCCMetaPitchY;
+ dml_uint_t PitchC;
+ dml_uint_t DCCMetaPitchC;
+ dml_bool_t ViewportStationary;
+ dml_uint_t ViewportXStart;
+ dml_uint_t ViewportYStart;
+ dml_uint_t ViewportXStartC;
+ dml_uint_t ViewportYStartC;
+ dml_bool_t FORCE_ONE_ROW_FOR_FRAME;
+ dml_uint_t SwathHeightY;
+ dml_uint_t SwathHeightC;
+};
+
+struct Watermarks {
+ dml_float_t UrgentWatermark;
+ dml_float_t WritebackUrgentWatermark;
+ dml_float_t DRAMClockChangeWatermark;
+ dml_float_t FCLKChangeWatermark;
+ dml_float_t WritebackDRAMClockChangeWatermark;
+ dml_float_t WritebackFCLKChangeWatermark;
+ dml_float_t StutterExitWatermark;
+ dml_float_t StutterEnterPlusExitWatermark;
+ dml_float_t Z8StutterExitWatermark;
+ dml_float_t Z8StutterEnterPlusExitWatermark;
+ dml_float_t USRRetrainingWatermark;
+};
+
+struct SOCParametersList {
+ dml_float_t UrgentLatency;
+ dml_float_t ExtraLatency;
+ dml_float_t WritebackLatency;
+ dml_float_t DRAMClockChangeLatency;
+ dml_float_t FCLKChangeLatency;
+ dml_float_t SRExitTime;
+ dml_float_t SREnterPlusExitTime;
+ dml_float_t SRExitZ8Time;
+ dml_float_t SREnterPlusExitZ8Time;
+ dml_float_t USRRetrainingLatency;
+ dml_float_t SMNLatency;
+};
+
+/// @brief Struct that represent Plane configration of a display cfg
+struct dml_plane_cfg_st {
+ //
+ // Pipe/Surface Parameters
+ //
+ dml_bool_t GPUVMEnable; /// <brief Set if any pipe has GPUVM enable
+ dml_bool_t HostVMEnable; /// <brief Set if any pipe has HostVM enable
+
+ dml_uint_t GPUVMMaxPageTableLevels; /// <brief GPUVM level; max of all pipes'
+ dml_uint_t HostVMMaxPageTableLevels; /// <brief HostVM level; max of all pipes'; that is the number of non-cache HVM level
+
+ dml_uint_t GPUVMMinPageSizeKBytes[__DML_NUM_PLANES__];
+ dml_bool_t ForceOneRowForFrame[__DML_NUM_PLANES__];
+ dml_bool_t PTEBufferModeOverrideEn[__DML_NUM_PLANES__]; //< brief when override enable; the DML will only check the given pte buffer and will use the pte buffer mode as is
+ dml_bool_t PTEBufferMode[__DML_NUM_PLANES__];
+ dml_uint_t ViewportWidth[__DML_NUM_PLANES__];
+ dml_uint_t ViewportHeight[__DML_NUM_PLANES__];
+ dml_uint_t ViewportWidthChroma[__DML_NUM_PLANES__];
+ dml_uint_t ViewportHeightChroma[__DML_NUM_PLANES__];
+ dml_uint_t ViewportXStart[__DML_NUM_PLANES__];
+ dml_uint_t ViewportXStartC[__DML_NUM_PLANES__];
+ dml_uint_t ViewportYStart[__DML_NUM_PLANES__];
+ dml_uint_t ViewportYStartC[__DML_NUM_PLANES__];
+ dml_bool_t ViewportStationary[__DML_NUM_PLANES__];
+
+ dml_bool_t ScalerEnabled[__DML_NUM_PLANES__];
+ dml_float_t HRatio[__DML_NUM_PLANES__];
+ dml_float_t VRatio[__DML_NUM_PLANES__];
+ dml_float_t HRatioChroma[__DML_NUM_PLANES__];
+ dml_float_t VRatioChroma[__DML_NUM_PLANES__];
+ dml_uint_t HTaps[__DML_NUM_PLANES__];
+ dml_uint_t VTaps[__DML_NUM_PLANES__];
+ dml_uint_t HTapsChroma[__DML_NUM_PLANES__];
+ dml_uint_t VTapsChroma[__DML_NUM_PLANES__];
+ dml_uint_t LBBitPerPixel[__DML_NUM_PLANES__];
+
+ enum dml_rotation_angle SourceScan[__DML_NUM_PLANES__];
+ dml_uint_t ScalerRecoutWidth[__DML_NUM_PLANES__];
+
+ dml_bool_t DynamicMetadataEnable[__DML_NUM_PLANES__];
+ dml_uint_t DynamicMetadataLinesBeforeActiveRequired[__DML_NUM_PLANES__];
+ dml_uint_t DynamicMetadataTransmittedBytes[__DML_NUM_PLANES__];
+ dml_uint_t DETSizeOverride[__DML_NUM_PLANES__]; /// <brief user can specify the desire DET buffer usage per-plane
+
+ dml_uint_t NumberOfCursors[__DML_NUM_PLANES__];
+ dml_uint_t CursorWidth[__DML_NUM_PLANES__];
+ dml_uint_t CursorBPP[__DML_NUM_PLANES__];
+
+ dml_bool_t setup_for_tdlut[__DML_NUM_PLANES__];
+ enum dml2_tdlut_addressing_mode tdlut_addressing_mode[__DML_NUM_PLANES__];
+ enum dml2_tdlut_width_mode tdlut_width_mode[__DML_NUM_PLANES__];
+
+ enum dml_use_mall_for_static_screen_mode UseMALLForStaticScreen[__DML_NUM_PLANES__];
+ enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[__DML_NUM_PLANES__];
+
+ dml_uint_t BlendingAndTiming[__DML_NUM_PLANES__]; /// <brief From which timing group (like OTG) that this plane is getting its timing from. Mode check also need this info for example to check num OTG; encoder; dsc etc.
+}; // dml_plane_cfg_st;
+
+/// @brief Surface Parameters
+struct dml_surface_cfg_st {
+ enum dml_swizzle_mode SurfaceTiling[__DML_NUM_PLANES__];
+ enum dml_source_format_class SourcePixelFormat[__DML_NUM_PLANES__];
+ dml_uint_t PitchY[__DML_NUM_PLANES__];
+ dml_uint_t SurfaceWidthY[__DML_NUM_PLANES__];
+ dml_uint_t SurfaceHeightY[__DML_NUM_PLANES__];
+ dml_uint_t PitchC[__DML_NUM_PLANES__];
+ dml_uint_t SurfaceWidthC[__DML_NUM_PLANES__];
+ dml_uint_t SurfaceHeightC[__DML_NUM_PLANES__];
+
+ dml_bool_t DCCEnable[__DML_NUM_PLANES__];
+ dml_uint_t DCCMetaPitchY[__DML_NUM_PLANES__];
+ dml_uint_t DCCMetaPitchC[__DML_NUM_PLANES__];
+
+ dml_float_t DCCRateLuma[__DML_NUM_PLANES__];
+ dml_float_t DCCRateChroma[__DML_NUM_PLANES__];
+ dml_float_t DCCFractionOfZeroSizeRequestsLuma[__DML_NUM_PLANES__];
+ dml_float_t DCCFractionOfZeroSizeRequestsChroma[__DML_NUM_PLANES__];
+}; // dml_surface_cfg_st
+
+/// @brief structure that represents the timing configuration
+struct dml_timing_cfg_st {
+ dml_uint_t HTotal[__DML_NUM_PLANES__];
+ dml_uint_t VTotal[__DML_NUM_PLANES__];
+ dml_uint_t HBlankEnd[__DML_NUM_PLANES__];
+ dml_uint_t VBlankEnd[__DML_NUM_PLANES__];
+ dml_uint_t RefreshRate[__DML_NUM_PLANES__];
+ dml_uint_t VFrontPorch[__DML_NUM_PLANES__];
+ dml_float_t PixelClock[__DML_NUM_PLANES__];
+ dml_uint_t HActive[__DML_NUM_PLANES__];
+ dml_uint_t VActive[__DML_NUM_PLANES__];
+ dml_bool_t Interlace[__DML_NUM_PLANES__];
+ dml_bool_t DRRDisplay[__DML_NUM_PLANES__];
+ dml_uint_t VBlankNom[__DML_NUM_PLANES__];
+}; // dml_timing_cfg_st;
+
+/// @brief structure that represents the output stream
+struct dml_output_cfg_st {
+ // Output Setting
+ dml_uint_t DSCInputBitPerComponent[__DML_NUM_PLANES__];
+ enum dml_output_format_class OutputFormat[__DML_NUM_PLANES__];
+ enum dml_output_encoder_class OutputEncoder[__DML_NUM_PLANES__];
+ dml_uint_t OutputMultistreamId[__DML_NUM_PLANES__];
+ dml_bool_t OutputMultistreamEn[__DML_NUM_PLANES__];
+ dml_float_t OutputBpp[__DML_NUM_PLANES__]; //< brief Use by mode_programming to specify a output bpp; user can use the output from mode_support (support.OutputBpp)
+ dml_float_t PixelClockBackEnd[__DML_NUM_PLANES__];
+ enum dml_dsc_enable DSCEnable[__DML_NUM_PLANES__]; //< brief for mode support check; use to determine if dsc is required
+ dml_uint_t OutputLinkDPLanes[__DML_NUM_PLANES__];
+ enum dml_output_link_dp_rate OutputLinkDPRate[__DML_NUM_PLANES__];
+ dml_float_t ForcedOutputLinkBPP[__DML_NUM_PLANES__];
+ dml_uint_t AudioSampleRate[__DML_NUM_PLANES__];
+ dml_uint_t AudioSampleLayout[__DML_NUM_PLANES__];
+ dml_bool_t OutputDisabled[__DML_NUM_PLANES__];
+ dml_uint_t DSCSlices[__DML_NUM_PLANES__];
+}; // dml_timing_cfg_st;
+
+/// @brief Writeback Setting
+struct dml_writeback_cfg_st {
+ enum dml_source_format_class WritebackPixelFormat[__DML_NUM_PLANES__];
+ dml_bool_t WritebackEnable[__DML_NUM_PLANES__];
+ dml_uint_t ActiveWritebacksPerSurface[__DML_NUM_PLANES__];
+ dml_uint_t WritebackDestinationWidth[__DML_NUM_PLANES__];
+ dml_uint_t WritebackDestinationHeight[__DML_NUM_PLANES__];
+ dml_uint_t WritebackSourceWidth[__DML_NUM_PLANES__];
+ dml_uint_t WritebackSourceHeight[__DML_NUM_PLANES__];
+ dml_uint_t WritebackHTaps[__DML_NUM_PLANES__];
+ dml_uint_t WritebackVTaps[__DML_NUM_PLANES__];
+ dml_float_t WritebackHRatio[__DML_NUM_PLANES__];
+ dml_float_t WritebackVRatio[__DML_NUM_PLANES__];
+}; // dml_writeback_cfg_st;
+
+/// @brief Hardware resource specific; mainly used by mode_programming when test/sw wants to do some specific setting
+/// which are not the same as what the mode support stage derive. When call mode_support with mode_programm; the hw-specific
+// resource will be set to what the mode_support layer recommends
+struct dml_hw_resource_st {
+ enum dml_odm_mode ODMMode[__DML_NUM_PLANES__]; /// <brief ODM mode that is chosen in the mode check stage and will be used in mode programming stage
+ dml_uint_t DPPPerSurface[__DML_NUM_PLANES__]; /// <brief How many DPPs are needed drive the surface to output. If MPCC or ODMC could be 2 or 4.
+ dml_bool_t DSCEnabled[__DML_NUM_PLANES__]; /// <brief Indicate if the DSC is enabled; used in mode_programming
+ dml_uint_t NumberOfDSCSlices[__DML_NUM_PLANES__]; /// <brief Indicate how many slices needed to support the given mode
+ dml_float_t DLGRefClkFreqMHz; /// <brief DLG Global Reference timer
+};
+
+/// @brief To control the clk usage for model programming
+struct dml_clk_cfg_st {
+ enum dml_clk_cfg_policy dcfclk_option; ///< brief Use for mode_program; user can select between use the min require clk req as calculated by DML or use the test-specific freq
+ enum dml_clk_cfg_policy dispclk_option; ///< brief Use for mode_program; user can select between use the min require clk req as calculated by DML or use the test-specific freq
+ enum dml_clk_cfg_policy dppclk_option[__DML_NUM_PLANES__];
+
+ dml_float_t dcfclk_mhz;
+ dml_float_t dispclk_mhz;
+ dml_float_t dppclk_mhz[__DML_NUM_PLANES__];
+}; // dml_clk_cfg_st
+
+/// @brief DML display configuration.
+/// Describe how to display a surface in multi-plane setup and output to different output and writeback using the specified timgin
+struct dml_display_cfg_st {
+ struct dml_surface_cfg_st surface;
+ struct dml_plane_cfg_st plane;
+ struct dml_timing_cfg_st timing;
+ struct dml_output_cfg_st output;
+ struct dml_writeback_cfg_st writeback;
+ unsigned int num_surfaces;
+ unsigned int num_timings;
+
+ struct dml_hw_resource_st hw; //< brief for mode programming
+ struct dml_clk_cfg_st clk_overrides; //< brief for mode programming clk override
+}; // dml_display_cfg_st
+
+/// @brief DML mode evaluation and programming policy
+/// Those knobs that affect mode support and mode programming
+struct dml_mode_eval_policy_st {
+ // -------------------
+ // Policy
+ // -------------------
+ enum dml_mpc_use_policy MPCCombineUse[__DML_NUM_PLANES__]; /// <brief MPC Combine mode as selected by the user; used in mode check stage
+ enum dml_odm_use_policy ODMUse[__DML_NUM_PLANES__]; /// <brief ODM mode as selected by the user; used in mode check stage
+ enum dml_unbounded_requesting_policy UseUnboundedRequesting; ///< brief Unbounded request mode preference
+ enum dml_immediate_flip_requirement ImmediateFlipRequirement[__DML_NUM_PLANES__]; /// <brief Is immediate flip a requirement for this plane. When host vm is present iflip is needed regardless
+ enum dml_prefetch_modes AllowForPStateChangeOrStutterInVBlank[__DML_NUM_PLANES__]; /// <brief To specify if the DML should calculate the values for support different pwr saving features (cstate; pstate; etc.) during vblank
+
+ enum dml_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal;
+ bool UseOnlyMaxPrefetchModes;
+ dml_bool_t UseMinimumRequiredDCFCLK; //<brief When set the mode_check stage will figure the min DCFCLK freq to support the given display configuration. User can tell use the output DCFCLK for mode programming.
+ dml_bool_t DRAMClockChangeRequirementFinal;
+ dml_bool_t FCLKChangeRequirementFinal;
+ dml_bool_t USRRetrainingRequiredFinal;
+ dml_bool_t EnhancedPrefetchScheduleAccelerationFinal;
+
+ dml_bool_t NomDETInKByteOverrideEnable; //<brief Nomimal DET buffer size for a pipe. If this size fit the required 2 swathes; DML will use this DET size
+ dml_uint_t NomDETInKByteOverrideValue;
+
+ dml_bool_t DCCProgrammingAssumesScanDirectionUnknownFinal;
+ dml_bool_t SynchronizeTimingsFinal;
+ dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal;
+ dml_bool_t AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported; //<brief if set; the mode support will say mode is supported even though the DRAM clock change is not support (assuming the soc will be stay in max power state)
+ dml_bool_t AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported; //<brief if set; the mode support will say mode is supported even though the Fabric clock change is not support (assuming the soc will be stay in max power state
+};
+
+/// @brief Contains important information after the mode support steps. Also why a mode is not supported.
+struct dml_mode_support_info_st {
+ //-----------------
+ // Mode Support Information
+ //-----------------
+ dml_bool_t ModeIsSupported; //<brief Is the mode support any voltage and combine setting
+ dml_bool_t ImmediateFlipSupport; //<brief Means mode support immediate flip at the max combine setting; determine in mode support and used in mode programming
+ dml_uint_t MaximumMPCCombine; //<brief If using MPC combine helps the power saving support; then this will be set to 1
+ dml_bool_t UnboundedRequestEnabled;
+ dml_uint_t CompressedBufferSizeInkByte;
+
+ /* Mode Support Reason */
+ dml_bool_t WritebackLatencySupport;
+ dml_bool_t ScaleRatioAndTapsSupport;
+ dml_bool_t SourceFormatPixelAndScanSupport;
+ dml_bool_t MPCCombineMethodIncompatible;
+ dml_bool_t P2IWith420;
+ dml_bool_t DSCOnlyIfNecessaryWithBPP;
+ dml_bool_t DSC422NativeNotSupported;
+ dml_bool_t LinkRateDoesNotMatchDPVersion;
+ dml_bool_t LinkRateForMultistreamNotIndicated;
+ dml_bool_t BPPForMultistreamNotIndicated;
+ dml_bool_t MultistreamWithHDMIOreDP;
+ dml_bool_t MSOOrODMSplitWithNonDPLink;
+ dml_bool_t NotEnoughLanesForMSO;
+ dml_bool_t NumberOfOTGSupport;
+ dml_bool_t NumberOfHDMIFRLSupport;
+ dml_bool_t NumberOfDP2p0Support;
+ dml_bool_t NonsupportedDSCInputBPC;
+ dml_bool_t WritebackScaleRatioAndTapsSupport;
+ dml_bool_t CursorSupport;
+ dml_bool_t PitchSupport;
+ dml_bool_t ViewportExceedsSurface;
+ dml_bool_t ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified;
+ dml_bool_t ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe;
+ dml_bool_t InvalidCombinationOfMALLUseForPStateAndStaticScreen;
+ dml_bool_t InvalidCombinationOfMALLUseForPState;
+ dml_bool_t ExceededMALLSize;
+ dml_bool_t EnoughWritebackUnits;
+
+ dml_bool_t ExceededMultistreamSlots;
+ dml_bool_t ODMCombineTwoToOneSupportCheckOK;
+ dml_bool_t ODMCombineFourToOneSupportCheckOK;
+ dml_bool_t NotEnoughDSCUnits;
+ dml_bool_t NotEnoughDSCSlices;
+ dml_bool_t PixelsPerLinePerDSCUnitSupport;
+ dml_bool_t DSCCLKRequiredMoreThanSupported;
+ dml_bool_t DTBCLKRequiredMoreThanSupported;
+ dml_bool_t LinkCapacitySupport;
+
+ dml_bool_t ROBSupport[2];
+ dml_bool_t PTEBufferSizeNotExceeded[2];
+ dml_bool_t DCCMetaBufferSizeNotExceeded[2];
+ dml_bool_t TotalVerticalActiveBandwidthSupport[2];
+ enum dml_dram_clock_change_support DRAMClockChangeSupport[2];
+ dml_float_t ActiveDRAMClockChangeLatencyMargin[__DML_NUM_PLANES__];
+ dml_uint_t SubViewportLinesNeededInMALL[__DML_NUM_PLANES__];
+ enum dml_fclock_change_support FCLKChangeSupport[2];
+ dml_bool_t USRRetrainingSupport[2];
+ dml_bool_t VActiveBandwithSupport[2];
+ dml_bool_t PrefetchSupported[2];
+ dml_bool_t DynamicMetadataSupported[2];
+ dml_bool_t VRatioInPrefetchSupported[2];
+ dml_bool_t DISPCLK_DPPCLK_Support[2];
+ dml_bool_t TotalAvailablePipesSupport[2];
+ dml_bool_t ModeSupport[2];
+ dml_bool_t ViewportSizeSupport[2];
+ dml_bool_t ImmediateFlipSupportedForState[2];
+
+ dml_bool_t NoTimeForPrefetch[2][__DML_NUM_PLANES__];
+ dml_bool_t NoTimeForDynamicMetadata[2][__DML_NUM_PLANES__];
+
+ dml_bool_t MPCCombineEnable[__DML_NUM_PLANES__]; /// <brief Indicate if the MPC Combine enable in the given state and optimize mpc combine setting
+ enum dml_odm_mode ODMMode[__DML_NUM_PLANES__]; /// <brief ODM mode that is chosen in the mode check stage and will be used in mode programming stage
+ dml_uint_t DPPPerSurface[__DML_NUM_PLANES__]; /// <brief How many DPPs are needed drive the surface to output. If MPCC or ODMC could be 2 or 4.
+ dml_bool_t DSCEnabled[__DML_NUM_PLANES__]; /// <brief Indicate if the DSC is actually required; used in mode_programming
+ dml_bool_t FECEnabled[__DML_NUM_PLANES__]; /// <brief Indicate if the FEC is actually required
+ dml_uint_t NumberOfDSCSlices[__DML_NUM_PLANES__]; /// <brief Indicate how many slices needed to support the given mode
+
+ dml_float_t OutputBpp[__DML_NUM_PLANES__];
+ enum dml_output_type_and_rate__type OutputType[__DML_NUM_PLANES__];
+ enum dml_output_type_and_rate__rate OutputRate[__DML_NUM_PLANES__];
+
+ dml_float_t AlignedDCCMetaPitchY[__DML_NUM_PLANES__]; /// <brief Pitch value that is aligned to tiling setting
+ dml_float_t AlignedDCCMetaPitchC[__DML_NUM_PLANES__];
+ dml_float_t AlignedYPitch[__DML_NUM_PLANES__];
+ dml_float_t AlignedCPitch[__DML_NUM_PLANES__];
+ dml_float_t MaxTotalVerticalActiveAvailableBandwidth[2]; /// <brief nominal bw available for display
+}; // dml_mode_support_info_st
+
+/// @brief Treat this as the intermediate values and outputs of mode check function. User can query the content of the struct to know more about the result of mode evaluation.
+struct mode_support_st {
+ struct ip_params_st ip;
+ struct soc_bounding_box_st soc;
+ struct soc_state_bounding_box_st state; //<brief Per-state bbox values; only 1 state per compute
+ struct dml_mode_eval_policy_st policy;
+
+ dml_uint_t state_idx; //<brief The power state idx for the power state under this computation
+ dml_uint_t max_state_idx; //<brief The MAX power state idx
+ struct soc_state_bounding_box_st max_state; //<brief The MAX power state; some algo needs to know the max state info to determine if
+ struct dml_display_cfg_st cache_display_cfg; // <brief A copy of the current display cfg in consideration
+
+ // Physical info; only using for programming
+ dml_uint_t num_active_planes; // <brief As determined by either e2e_pipe_param or display_cfg
+
+ // Calculated Clocks
+ dml_float_t RequiredDISPCLK[2]; /// <brief Required DISPCLK; depends on pixel rate; odm mode etc.
+ dml_float_t RequiredDPPCLKThisState[__DML_NUM_PLANES__];
+ dml_float_t DCFCLKState[2]; /// <brief recommended DCFCLK freq; calculated by DML. If UseMinimumRequiredDCFCLK is not set; then it will be just the state DCFCLK; else it will min DCFCLK for support
+ dml_float_t RequiredDISPCLKPerSurface[2][__DML_NUM_PLANES__];
+ dml_float_t RequiredDPPCLKPerSurface[2][__DML_NUM_PLANES__];
+
+ dml_float_t FabricClock; /// <brief Basically just the clock freq at the min (or given) state
+ dml_float_t DRAMSpeed; /// <brief Basically just the clock freq at the min (or given) state
+ dml_float_t SOCCLK; /// <brief Basically just the clock freq at the min (or given) state
+ dml_float_t DCFCLK; /// <brief Basically just the clock freq at the min (or given) state and max combine setting
+ dml_float_t GlobalDPPCLK; /// <brief the Max DPPCLK freq out of all pipes
+
+ // ----------------------------------
+ // Mode Support Info and fail reason
+ // ----------------------------------
+ struct dml_mode_support_info_st support;
+
+ // These are calculated before the ModeSupport and ModeProgram step
+ // They represent the bound for the return buffer sizing
+ dml_uint_t MaxTotalDETInKByte;
+ dml_uint_t NomDETInKByte;
+ dml_uint_t MinCompressedBufferSizeInKByte;
+
+ // Info obtained at the end of mode support calculations
+ // The reported info is at the "optimal" state and combine setting
+ dml_float_t ReturnBW;
+ dml_float_t ReturnDRAMBW;
+ dml_uint_t DETBufferSizeInKByte[__DML_NUM_PLANES__]; // <brief Recommended DET size configuration for this plane. All pipes under this plane should program the DET buffer size to the calculated value.
+ dml_uint_t DETBufferSizeY[__DML_NUM_PLANES__];
+ dml_uint_t DETBufferSizeC[__DML_NUM_PLANES__];
+ dml_uint_t SwathHeightY[__DML_NUM_PLANES__];
+ dml_uint_t SwathHeightC[__DML_NUM_PLANES__];
+
+ // ----------------------------------
+ // Intermediates/Informational
+ // ----------------------------------
+ dml_uint_t TotImmediateFlipBytes;
+ dml_bool_t DCCEnabledInAnySurface;
+ dml_float_t WritebackRequiredDISPCLK;
+ dml_float_t TimeCalc;
+ dml_float_t TWait;
+
+ dml_uint_t SwathWidthYAllStates[2][__DML_NUM_PLANES__];
+ dml_uint_t SwathWidthCAllStates[2][__DML_NUM_PLANES__];
+ dml_uint_t SwathHeightYAllStates[2][__DML_NUM_PLANES__];
+ dml_uint_t SwathHeightCAllStates[2][__DML_NUM_PLANES__];
+ dml_uint_t SwathWidthYThisState[__DML_NUM_PLANES__];
+ dml_uint_t SwathWidthCThisState[__DML_NUM_PLANES__];
+ dml_uint_t SwathHeightYThisState[__DML_NUM_PLANES__];
+ dml_uint_t SwathHeightCThisState[__DML_NUM_PLANES__];
+ dml_uint_t DETBufferSizeInKByteAllStates[2][__DML_NUM_PLANES__];
+ dml_uint_t DETBufferSizeYAllStates[2][__DML_NUM_PLANES__];
+ dml_uint_t DETBufferSizeCAllStates[2][__DML_NUM_PLANES__];
+ dml_bool_t UnboundedRequestEnabledAllStates[2];
+ dml_uint_t CompressedBufferSizeInkByteAllStates[2];
+ dml_bool_t UnboundedRequestEnabledThisState;
+ dml_uint_t CompressedBufferSizeInkByteThisState;
+ dml_uint_t DETBufferSizeInKByteThisState[__DML_NUM_PLANES__];
+ dml_uint_t DETBufferSizeYThisState[__DML_NUM_PLANES__];
+ dml_uint_t DETBufferSizeCThisState[__DML_NUM_PLANES__];
+ dml_float_t VRatioPreY[2][__DML_NUM_PLANES__];
+ dml_float_t VRatioPreC[2][__DML_NUM_PLANES__];
+ dml_uint_t swath_width_luma_ub_all_states[2][__DML_NUM_PLANES__];
+ dml_uint_t swath_width_chroma_ub_all_states[2][__DML_NUM_PLANES__];
+ dml_uint_t swath_width_luma_ub_this_state[__DML_NUM_PLANES__];
+ dml_uint_t swath_width_chroma_ub_this_state[__DML_NUM_PLANES__];
+ dml_uint_t RequiredSlots[__DML_NUM_PLANES__];
+ dml_uint_t PDEAndMetaPTEBytesPerFrame[2][__DML_NUM_PLANES__];
+ dml_uint_t MetaRowBytes[2][__DML_NUM_PLANES__];
+ dml_uint_t DPTEBytesPerRow[2][__DML_NUM_PLANES__];
+ dml_uint_t PrefetchLinesY[2][__DML_NUM_PLANES__];
+ dml_uint_t PrefetchLinesC[2][__DML_NUM_PLANES__];
+ dml_uint_t MaxNumSwY[__DML_NUM_PLANES__]; /// <brief Max number of swath for prefetch
+ dml_uint_t MaxNumSwC[__DML_NUM_PLANES__]; /// <brief Max number of swath for prefetch
+ dml_uint_t PrefillY[__DML_NUM_PLANES__];
+ dml_uint_t PrefillC[__DML_NUM_PLANES__];
+
+ dml_uint_t PrefetchLinesYThisState[__DML_NUM_PLANES__];
+ dml_uint_t PrefetchLinesCThisState[__DML_NUM_PLANES__];
+ dml_uint_t DPTEBytesPerRowThisState[__DML_NUM_PLANES__];
+ dml_uint_t PDEAndMetaPTEBytesPerFrameThisState[__DML_NUM_PLANES__];
+ dml_uint_t MetaRowBytesThisState[__DML_NUM_PLANES__];
+ dml_bool_t use_one_row_for_frame[2][__DML_NUM_PLANES__];
+ dml_bool_t use_one_row_for_frame_flip[2][__DML_NUM_PLANES__];
+ dml_bool_t use_one_row_for_frame_this_state[__DML_NUM_PLANES__];
+ dml_bool_t use_one_row_for_frame_flip_this_state[__DML_NUM_PLANES__];
+
+ dml_float_t LineTimesForPrefetch[__DML_NUM_PLANES__];
+ dml_float_t LinesForMetaPTE[__DML_NUM_PLANES__];
+ dml_float_t LinesForMetaAndDPTERow[__DML_NUM_PLANES__];
+ dml_float_t SwathWidthYSingleDPP[__DML_NUM_PLANES__];
+ dml_float_t SwathWidthCSingleDPP[__DML_NUM_PLANES__];
+ dml_uint_t BytePerPixelY[__DML_NUM_PLANES__];
+ dml_uint_t BytePerPixelC[__DML_NUM_PLANES__];
+ dml_float_t BytePerPixelInDETY[__DML_NUM_PLANES__];
+ dml_float_t BytePerPixelInDETC[__DML_NUM_PLANES__];
+
+ dml_uint_t Read256BlockHeightY[__DML_NUM_PLANES__];
+ dml_uint_t Read256BlockWidthY[__DML_NUM_PLANES__];
+ dml_uint_t Read256BlockHeightC[__DML_NUM_PLANES__];
+ dml_uint_t Read256BlockWidthC[__DML_NUM_PLANES__];
+ dml_uint_t MacroTileHeightY[__DML_NUM_PLANES__];
+ dml_uint_t MacroTileHeightC[__DML_NUM_PLANES__];
+ dml_uint_t MacroTileWidthY[__DML_NUM_PLANES__];
+ dml_uint_t MacroTileWidthC[__DML_NUM_PLANES__];
+ dml_float_t PSCL_FACTOR[__DML_NUM_PLANES__];
+ dml_float_t PSCL_FACTOR_CHROMA[__DML_NUM_PLANES__];
+ dml_float_t MaximumSwathWidthLuma[__DML_NUM_PLANES__];
+ dml_float_t MaximumSwathWidthChroma[__DML_NUM_PLANES__];
+ dml_float_t Tno_bw[__DML_NUM_PLANES__];
+ dml_float_t DestinationLinesToRequestVMInImmediateFlip[__DML_NUM_PLANES__];
+ dml_float_t DestinationLinesToRequestRowInImmediateFlip[__DML_NUM_PLANES__];
+ dml_float_t WritebackDelayTime[__DML_NUM_PLANES__];
+ dml_uint_t dpte_group_bytes[__DML_NUM_PLANES__];
+ dml_uint_t dpte_row_height[__DML_NUM_PLANES__];
+ dml_uint_t dpte_row_height_chroma[__DML_NUM_PLANES__];
+ dml_uint_t meta_row_height[__DML_NUM_PLANES__];
+ dml_uint_t meta_row_height_chroma[__DML_NUM_PLANES__];
+ dml_float_t UrgLatency;
+ dml_float_t UrgentBurstFactorCursor[2][__DML_NUM_PLANES__];
+ dml_float_t UrgentBurstFactorCursorPre[__DML_NUM_PLANES__];
+ dml_float_t UrgentBurstFactorLuma[2][__DML_NUM_PLANES__];
+ dml_float_t UrgentBurstFactorLumaPre[__DML_NUM_PLANES__];
+ dml_float_t UrgentBurstFactorChroma[2][__DML_NUM_PLANES__];
+ dml_float_t UrgentBurstFactorChromaPre[__DML_NUM_PLANES__];
+ dml_float_t MaximumSwathWidthInLineBufferLuma;
+ dml_float_t MaximumSwathWidthInLineBufferChroma;
+ dml_float_t ExtraLatency;
+
+ // Backend
+ dml_bool_t RequiresDSC[__DML_NUM_PLANES__];
+ dml_bool_t RequiresFEC[__DML_NUM_PLANES__];
+ dml_float_t OutputBppPerState[__DML_NUM_PLANES__];
+ dml_uint_t DSCDelayPerState[__DML_NUM_PLANES__];
+ enum dml_output_type_and_rate__type OutputTypePerState[__DML_NUM_PLANES__];
+ enum dml_output_type_and_rate__rate OutputRatePerState[__DML_NUM_PLANES__];
+
+ // Bandwidth Related Info
+ dml_float_t BandwidthAvailableForImmediateFlip;
+ dml_float_t ReadBandwidthLuma[__DML_NUM_PLANES__];
+ dml_float_t ReadBandwidthChroma[__DML_NUM_PLANES__];
+ dml_float_t WriteBandwidth[__DML_NUM_PLANES__];
+ dml_float_t RequiredPrefetchPixelDataBWLuma[__DML_NUM_PLANES__];
+ dml_float_t RequiredPrefetchPixelDataBWChroma[__DML_NUM_PLANES__];
+ dml_float_t cursor_bw[__DML_NUM_PLANES__];
+ dml_float_t cursor_bw_pre[__DML_NUM_PLANES__];
+ dml_float_t prefetch_vmrow_bw[__DML_NUM_PLANES__];
+ dml_float_t final_flip_bw[__DML_NUM_PLANES__];
+ dml_float_t meta_row_bandwidth_this_state[__DML_NUM_PLANES__];
+ dml_float_t dpte_row_bandwidth_this_state[__DML_NUM_PLANES__];
+ dml_float_t ReturnBWPerState[2];
+ dml_float_t ReturnDRAMBWPerState[2];
+ dml_float_t meta_row_bandwidth[2][__DML_NUM_PLANES__];
+ dml_float_t dpte_row_bandwidth[2][__DML_NUM_PLANES__];
+
+ // Something that should be feedback to caller
+ enum dml_odm_mode ODMModePerState[__DML_NUM_PLANES__];
+ enum dml_odm_mode ODMModeThisState[__DML_NUM_PLANES__];
+ dml_uint_t SurfaceSizeInMALL[__DML_NUM_PLANES__];
+ dml_uint_t NoOfDPP[2][__DML_NUM_PLANES__];
+ dml_uint_t NoOfDPPThisState[__DML_NUM_PLANES__];
+ dml_bool_t MPCCombine[2][__DML_NUM_PLANES__];
+ dml_bool_t MPCCombineThisState[__DML_NUM_PLANES__];
+ dml_float_t ProjectedDCFCLKDeepSleep[2];
+ dml_float_t MinDPPCLKUsingSingleDPP[__DML_NUM_PLANES__];
+ dml_bool_t SingleDPPViewportSizeSupportPerSurface[__DML_NUM_PLANES__];
+ dml_bool_t ImmediateFlipSupportedForPipe[__DML_NUM_PLANES__];
+ dml_bool_t NotUrgentLatencyHiding[__DML_NUM_PLANES__];
+ dml_bool_t NotUrgentLatencyHidingPre[__DML_NUM_PLANES__];
+ dml_bool_t PTEBufferSizeNotExceededPerState[__DML_NUM_PLANES__];
+ dml_bool_t DCCMetaBufferSizeNotExceededPerState[__DML_NUM_PLANES__];
+ dml_uint_t PrefetchMode[__DML_NUM_PLANES__];
+ dml_uint_t TotalNumberOfActiveDPP[2];
+ dml_uint_t TotalNumberOfSingleDPPSurfaces[2];
+ dml_uint_t TotalNumberOfDCCActiveDPP[2];
+
+ dml_uint_t SubViewportLinesNeededInMALL[__DML_NUM_PLANES__];
+
+}; // mode_support_st
+
+/// @brief A mega structure that houses various info for model programming step.
+struct mode_program_st {
+
+ //-------------
+ // Intermediate/Informational
+ //-------------
+ dml_float_t UrgentLatency;
+ dml_float_t UrgentLatencyWithUSRRetraining;
+ dml_uint_t VInitPreFillY[__DML_NUM_PLANES__];
+ dml_uint_t VInitPreFillC[__DML_NUM_PLANES__];
+ dml_uint_t MaxNumSwathY[__DML_NUM_PLANES__];
+ dml_uint_t MaxNumSwathC[__DML_NUM_PLANES__];
+
+ dml_float_t BytePerPixelDETY[__DML_NUM_PLANES__];
+ dml_float_t BytePerPixelDETC[__DML_NUM_PLANES__];
+ dml_uint_t BytePerPixelY[__DML_NUM_PLANES__];
+ dml_uint_t BytePerPixelC[__DML_NUM_PLANES__];
+ dml_uint_t SwathWidthY[__DML_NUM_PLANES__];
+ dml_uint_t SwathWidthC[__DML_NUM_PLANES__];
+ dml_uint_t SwathWidthSingleDPPY[__DML_NUM_PLANES__];
+ dml_uint_t SwathWidthSingleDPPC[__DML_NUM_PLANES__];
+ dml_float_t ReadBandwidthSurfaceLuma[__DML_NUM_PLANES__];
+ dml_float_t ReadBandwidthSurfaceChroma[__DML_NUM_PLANES__];
+
+ dml_uint_t PixelPTEBytesPerRow[__DML_NUM_PLANES__];
+ dml_uint_t PDEAndMetaPTEBytesFrame[__DML_NUM_PLANES__];
+ dml_uint_t MetaRowByte[__DML_NUM_PLANES__];
+ dml_uint_t PrefetchSourceLinesY[__DML_NUM_PLANES__];
+ dml_float_t RequiredPrefetchPixDataBWLuma[__DML_NUM_PLANES__];
+ dml_float_t RequiredPrefetchPixDataBWChroma[__DML_NUM_PLANES__];
+ dml_uint_t PrefetchSourceLinesC[__DML_NUM_PLANES__];
+ dml_float_t PSCL_THROUGHPUT[__DML_NUM_PLANES__];
+ dml_float_t PSCL_THROUGHPUT_CHROMA[__DML_NUM_PLANES__];
+ dml_uint_t DSCDelay[__DML_NUM_PLANES__];
+ dml_float_t DPPCLKUsingSingleDPP[__DML_NUM_PLANES__];
+
+ dml_uint_t MacroTileWidthY[__DML_NUM_PLANES__];
+ dml_uint_t MacroTileWidthC[__DML_NUM_PLANES__];
+ dml_uint_t BlockHeight256BytesY[__DML_NUM_PLANES__];
+ dml_uint_t BlockHeight256BytesC[__DML_NUM_PLANES__];
+ dml_uint_t BlockWidth256BytesY[__DML_NUM_PLANES__];
+ dml_uint_t BlockWidth256BytesC[__DML_NUM_PLANES__];
+
+ dml_uint_t BlockHeightY[__DML_NUM_PLANES__];
+ dml_uint_t BlockHeightC[__DML_NUM_PLANES__];
+ dml_uint_t BlockWidthY[__DML_NUM_PLANES__];
+ dml_uint_t BlockWidthC[__DML_NUM_PLANES__];
+
+ dml_uint_t SurfaceSizeInTheMALL[__DML_NUM_PLANES__];
+ dml_float_t VRatioPrefetchY[__DML_NUM_PLANES__];
+ dml_float_t VRatioPrefetchC[__DML_NUM_PLANES__];
+ dml_float_t Tno_bw[__DML_NUM_PLANES__];
+ dml_float_t final_flip_bw[__DML_NUM_PLANES__];
+ dml_float_t prefetch_vmrow_bw[__DML_NUM_PLANES__];
+ dml_float_t cursor_bw[__DML_NUM_PLANES__];
+ dml_float_t cursor_bw_pre[__DML_NUM_PLANES__];
+ dml_float_t WritebackDelay[__DML_NUM_PLANES__];
+ dml_uint_t dpte_row_height[__DML_NUM_PLANES__];
+ dml_uint_t dpte_row_height_linear[__DML_NUM_PLANES__];
+ dml_uint_t meta_req_width[__DML_NUM_PLANES__];
+ dml_uint_t meta_req_height[__DML_NUM_PLANES__];
+ dml_uint_t meta_row_width[__DML_NUM_PLANES__];
+ dml_uint_t meta_row_height[__DML_NUM_PLANES__];
+ dml_uint_t dpte_row_width_luma_ub[__DML_NUM_PLANES__];
+ dml_uint_t dpte_row_width_chroma_ub[__DML_NUM_PLANES__];
+ dml_uint_t dpte_row_height_chroma[__DML_NUM_PLANES__];
+ dml_uint_t dpte_row_height_linear_chroma[__DML_NUM_PLANES__];
+ dml_uint_t meta_req_width_chroma[__DML_NUM_PLANES__];
+ dml_uint_t meta_req_height_chroma[__DML_NUM_PLANES__];
+ dml_uint_t meta_row_width_chroma[__DML_NUM_PLANES__];
+ dml_uint_t meta_row_height_chroma[__DML_NUM_PLANES__];
+ dml_uint_t vm_group_bytes[__DML_NUM_PLANES__];
+ dml_uint_t dpte_group_bytes[__DML_NUM_PLANES__];
+ dml_float_t meta_row_bw[__DML_NUM_PLANES__];
+ dml_float_t dpte_row_bw[__DML_NUM_PLANES__];
+ dml_float_t UrgBurstFactorCursor[__DML_NUM_PLANES__];
+ dml_float_t UrgBurstFactorCursorPre[__DML_NUM_PLANES__];
+ dml_float_t UrgBurstFactorLuma[__DML_NUM_PLANES__];
+ dml_float_t UrgBurstFactorLumaPre[__DML_NUM_PLANES__];
+ dml_float_t UrgBurstFactorChroma[__DML_NUM_PLANES__];
+ dml_float_t UrgBurstFactorChromaPre[__DML_NUM_PLANES__];
+
+ dml_uint_t swath_width_luma_ub[__DML_NUM_PLANES__];
+ dml_uint_t swath_width_chroma_ub[__DML_NUM_PLANES__];
+ dml_uint_t PixelPTEReqWidthY[__DML_NUM_PLANES__];
+ dml_uint_t PixelPTEReqHeightY[__DML_NUM_PLANES__];
+ dml_uint_t PTERequestSizeY[__DML_NUM_PLANES__];
+ dml_uint_t PixelPTEReqWidthC[__DML_NUM_PLANES__];
+ dml_uint_t PixelPTEReqHeightC[__DML_NUM_PLANES__];
+ dml_uint_t PTERequestSizeC[__DML_NUM_PLANES__];
+
+ dml_float_t Tdmdl_vm[__DML_NUM_PLANES__];
+ dml_float_t Tdmdl[__DML_NUM_PLANES__];
+ dml_float_t TSetup[__DML_NUM_PLANES__];
+ dml_uint_t dpde0_bytes_per_frame_ub_l[__DML_NUM_PLANES__];
+ dml_uint_t meta_pte_bytes_per_frame_ub_l[__DML_NUM_PLANES__];
+ dml_uint_t dpde0_bytes_per_frame_ub_c[__DML_NUM_PLANES__];
+ dml_uint_t meta_pte_bytes_per_frame_ub_c[__DML_NUM_PLANES__];
+
+ dml_bool_t UnboundedRequestEnabled;
+ dml_uint_t compbuf_reserved_space_64b;
+ dml_uint_t compbuf_reserved_space_zs;
+ dml_uint_t CompressedBufferSizeInkByte;
+
+ dml_bool_t NoUrgentLatencyHiding[__DML_NUM_PLANES__];
+ dml_bool_t NoUrgentLatencyHidingPre[__DML_NUM_PLANES__];
+ dml_float_t UrgentExtraLatency;
+ dml_bool_t PrefetchAndImmediateFlipSupported;
+ dml_float_t TotalDataReadBandwidth;
+ dml_float_t BandwidthAvailableForImmediateFlip;
+ dml_bool_t NotEnoughTimeForDynamicMetadata[__DML_NUM_PLANES__];
+
+ dml_float_t ReadBandwidthLuma[__DML_NUM_PLANES__];
+ dml_float_t ReadBandwidthChroma[__DML_NUM_PLANES__];
+
+ dml_float_t total_dcn_read_bw_with_flip;
+ dml_float_t total_dcn_read_bw_with_flip_no_urgent_burst;
+ dml_float_t TotalDataReadBandwidthNotIncludingMALLPrefetch;
+ dml_float_t total_dcn_read_bw_with_flip_not_including_MALL_prefetch;
+ dml_float_t non_urgent_total_dcn_read_bw_with_flip;
+ dml_float_t non_urgent_total_dcn_read_bw_with_flip_not_including_MALL_prefetch;
+
+ dml_bool_t use_one_row_for_frame[__DML_NUM_PLANES__];
+ dml_bool_t use_one_row_for_frame_flip[__DML_NUM_PLANES__];
+
+ dml_float_t TCalc;
+ dml_uint_t TotImmediateFlipBytes;
+
+ // -------------------
+ // Output
+ // -------------------
+ dml_uint_t pipe_plane[__DML_NUM_PLANES__]; // <brief used mainly by dv to map the pipe inst to plane index within DML core; the plane idx of a pipe
+ dml_uint_t num_active_pipes;
+
+ dml_bool_t NoTimeToPrefetch[__DML_NUM_PLANES__]; /// <brief Prefetch schedule calculation result
+
+ // Support
+ dml_uint_t PrefetchMode[__DML_NUM_PLANES__]; /// <brief prefetch mode used for prefetch support check in mode programming step
+ dml_bool_t PrefetchModeSupported; /// <brief Is the prefetch mode (bandwidth and latency) supported
+ dml_bool_t ImmediateFlipSupported;
+ dml_bool_t ImmediateFlipSupportedForPipe[__DML_NUM_PLANES__];
+
+ // Clock
+ dml_float_t Dcfclk;
+ dml_float_t Dispclk; /// <brief dispclk being used in mode programming
+ dml_float_t Dppclk[__DML_NUM_PLANES__]; /// <brief dppclk being used in mode programming
+ dml_float_t WritebackDISPCLK;
+ dml_float_t GlobalDPPCLK;
+
+ //@ brief These "calculated" dispclk and dppclk clocks are calculated in the mode programming step.
+ // Depends on the dml_clk_cfg_st option; these calculated values may not used in subsequent calculation.
+ // Possible DV usage: Calculated values fetched by test once after mode_programming step and then possibly
+ // use the values as min and adjust the actual freq used for the 2nd pass
+ dml_float_t Dispclk_calculated;
+ dml_float_t Dppclk_calculated[__DML_NUM_PLANES__];
+
+ dml_float_t DSCCLK_calculated[__DML_NUM_PLANES__]; //< brief Required DSCCLK freq. Backend; not used in any subsequent calculations for now
+ dml_float_t DCFCLKDeepSleep;
+
+ // ARB reg
+ dml_bool_t DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE;
+ struct Watermarks Watermark;
+
+ // DCC compression control
+ dml_uint_t DCCYMaxUncompressedBlock[__DML_NUM_PLANES__];
+ dml_uint_t DCCYMaxCompressedBlock[__DML_NUM_PLANES__];
+ dml_uint_t DCCYIndependentBlock[__DML_NUM_PLANES__];
+ dml_uint_t DCCCMaxUncompressedBlock[__DML_NUM_PLANES__];
+ dml_uint_t DCCCMaxCompressedBlock[__DML_NUM_PLANES__];
+ dml_uint_t DCCCIndependentBlock[__DML_NUM_PLANES__];
+
+ // Stutter Efficiency
+ dml_float_t StutterEfficiency;
+ dml_float_t StutterEfficiencyNotIncludingVBlank;
+ dml_uint_t NumberOfStutterBurstsPerFrame;
+ dml_float_t Z8StutterEfficiency;
+ dml_uint_t Z8NumberOfStutterBurstsPerFrame;
+ dml_float_t Z8StutterEfficiencyNotIncludingVBlank;
+ dml_float_t StutterPeriod;
+ dml_float_t Z8StutterEfficiencyBestCase;
+ dml_uint_t Z8NumberOfStutterBurstsPerFrameBestCase;
+ dml_float_t Z8StutterEfficiencyNotIncludingVBlankBestCase;
+ dml_float_t StutterPeriodBestCase;
+
+ // DLG TTU reg
+ dml_float_t MIN_DST_Y_NEXT_START[__DML_NUM_PLANES__];
+ dml_bool_t VREADY_AT_OR_AFTER_VSYNC[__DML_NUM_PLANES__];
+ dml_uint_t DSTYAfterScaler[__DML_NUM_PLANES__];
+ dml_uint_t DSTXAfterScaler[__DML_NUM_PLANES__];
+ dml_float_t DestinationLinesForPrefetch[__DML_NUM_PLANES__];
+ dml_float_t DestinationLinesToRequestVMInVBlank[__DML_NUM_PLANES__];
+ dml_float_t DestinationLinesToRequestRowInVBlank[__DML_NUM_PLANES__];
+ dml_float_t DestinationLinesToRequestVMInImmediateFlip[__DML_NUM_PLANES__];
+ dml_float_t DestinationLinesToRequestRowInImmediateFlip[__DML_NUM_PLANES__];
+ dml_float_t MinTTUVBlank[__DML_NUM_PLANES__];
+ dml_float_t DisplayPipeLineDeliveryTimeLuma[__DML_NUM_PLANES__];
+ dml_float_t DisplayPipeLineDeliveryTimeChroma[__DML_NUM_PLANES__];
+ dml_float_t DisplayPipeLineDeliveryTimeLumaPrefetch[__DML_NUM_PLANES__];
+ dml_float_t DisplayPipeLineDeliveryTimeChromaPrefetch[__DML_NUM_PLANES__];
+ dml_float_t DisplayPipeRequestDeliveryTimeLuma[__DML_NUM_PLANES__];
+ dml_float_t DisplayPipeRequestDeliveryTimeChroma[__DML_NUM_PLANES__];
+ dml_float_t DisplayPipeRequestDeliveryTimeLumaPrefetch[__DML_NUM_PLANES__];
+ dml_float_t DisplayPipeRequestDeliveryTimeChromaPrefetch[__DML_NUM_PLANES__];
+ dml_float_t CursorRequestDeliveryTime[__DML_NUM_PLANES__];
+ dml_float_t CursorRequestDeliveryTimePrefetch[__DML_NUM_PLANES__];
+
+ dml_float_t DST_Y_PER_PTE_ROW_NOM_L[__DML_NUM_PLANES__];
+ dml_float_t DST_Y_PER_PTE_ROW_NOM_C[__DML_NUM_PLANES__];
+ dml_float_t DST_Y_PER_META_ROW_NOM_L[__DML_NUM_PLANES__];
+ dml_float_t DST_Y_PER_META_ROW_NOM_C[__DML_NUM_PLANES__];
+ dml_float_t TimePerMetaChunkNominal[__DML_NUM_PLANES__];
+ dml_float_t TimePerChromaMetaChunkNominal[__DML_NUM_PLANES__];
+ dml_float_t TimePerMetaChunkVBlank[__DML_NUM_PLANES__];
+ dml_float_t TimePerChromaMetaChunkVBlank[__DML_NUM_PLANES__];
+ dml_float_t TimePerMetaChunkFlip[__DML_NUM_PLANES__];
+ dml_float_t TimePerChromaMetaChunkFlip[__DML_NUM_PLANES__];
+ dml_float_t time_per_pte_group_nom_luma[__DML_NUM_PLANES__];
+ dml_float_t time_per_pte_group_nom_chroma[__DML_NUM_PLANES__];
+ dml_float_t time_per_pte_group_vblank_luma[__DML_NUM_PLANES__];
+ dml_float_t time_per_pte_group_vblank_chroma[__DML_NUM_PLANES__];
+ dml_float_t time_per_pte_group_flip_luma[__DML_NUM_PLANES__];
+ dml_float_t time_per_pte_group_flip_chroma[__DML_NUM_PLANES__];
+ dml_float_t TimePerVMGroupVBlank[__DML_NUM_PLANES__];
+ dml_float_t TimePerVMGroupFlip[__DML_NUM_PLANES__];
+ dml_float_t TimePerVMRequestVBlank[__DML_NUM_PLANES__];
+ dml_float_t TimePerVMRequestFlip[__DML_NUM_PLANES__];
+
+ dml_float_t FractionOfUrgentBandwidth;
+ dml_float_t FractionOfUrgentBandwidthImmediateFlip;
+
+ // RQ registers
+ dml_bool_t PTE_BUFFER_MODE[__DML_NUM_PLANES__];
+ dml_uint_t BIGK_FRAGMENT_SIZE[__DML_NUM_PLANES__];
+
+ dml_uint_t SubViewportLinesNeededInMALL[__DML_NUM_PLANES__];
+ dml_bool_t UsesMALLForStaticScreen[__DML_NUM_PLANES__];
+
+ // OTG
+ dml_uint_t VStartupMin[__DML_NUM_PLANES__]; /// <brief Minimum vstartup to meet the prefetch schedule (i.e. the prefetch solution can be found at this vstartup time); not the actual global sync vstartup pos.
+ dml_uint_t VStartup[__DML_NUM_PLANES__]; /// <brief The vstartup value for OTG programming (will set to max vstartup; but now bounded by min(vblank_nom. actual vblank))
+ dml_uint_t VUpdateOffsetPix[__DML_NUM_PLANES__];
+ dml_uint_t VUpdateWidthPix[__DML_NUM_PLANES__];
+ dml_uint_t VReadyOffsetPix[__DML_NUM_PLANES__];
+
+ // Latency and Support
+ dml_float_t MaxActiveFCLKChangeLatencySupported;
+ dml_bool_t USRRetrainingSupport;
+ enum dml_fclock_change_support FCLKChangeSupport;
+ enum dml_dram_clock_change_support DRAMClockChangeSupport;
+ dml_float_t MaxActiveDRAMClockChangeLatencySupported[__DML_NUM_PLANES__];
+ dml_float_t WritebackAllowFCLKChangeEndPosition[__DML_NUM_PLANES__];
+ dml_float_t WritebackAllowDRAMClockChangeEndPosition[__DML_NUM_PLANES__];
+
+ // buffer sizing
+ dml_uint_t DETBufferSizeInKByte[__DML_NUM_PLANES__]; // <brief Recommended DET size configuration for this plane. All pipes under this plane should program the DET buffer size to the calculated value.
+ dml_uint_t DETBufferSizeY[__DML_NUM_PLANES__];
+ dml_uint_t DETBufferSizeC[__DML_NUM_PLANES__];
+ dml_uint_t SwathHeightY[__DML_NUM_PLANES__];
+ dml_uint_t SwathHeightC[__DML_NUM_PLANES__];
+}; // mode_program_st
+
+struct soc_states_st {
+ dml_uint_t num_states; /// <brief num of soc pwr states
+ struct soc_state_bounding_box_st state_array[__DML_MAX_STATE_ARRAY_SIZE__]; /// <brief fixed size array that holds states struct
+};
+
+struct UseMinimumDCFCLK_params_st {
+ enum dml_use_mall_for_pstate_change_mode *UseMALLForPStateChange;
+ dml_bool_t *DRRDisplay;
+ dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal;
+ dml_uint_t MaxInterDCNTileRepeaters;
+ dml_uint_t MaxPrefetchMode;
+ dml_float_t DRAMClockChangeLatencyFinal;
+ dml_float_t FCLKChangeLatency;
+ dml_float_t SREnterPlusExitTime;
+ dml_uint_t ReturnBusWidth;
+ dml_uint_t RoundTripPingLatencyCycles;
+ dml_uint_t ReorderingBytes;
+ dml_uint_t PixelChunkSizeInKByte;
+ dml_uint_t MetaChunkSize;
+ dml_bool_t GPUVMEnable;
+ dml_uint_t GPUVMMaxPageTableLevels;
+ dml_bool_t HostVMEnable;
+ dml_uint_t NumberOfActiveSurfaces;
+ dml_uint_t HostVMMinPageSize;
+ dml_uint_t HostVMMaxNonCachedPageTableLevels;
+ dml_bool_t DynamicMetadataVMEnabled;
+ dml_bool_t ImmediateFlipRequirement;
+ dml_bool_t ProgressiveToInterlaceUnitInOPP;
+ dml_float_t MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation;
+ dml_float_t PercentOfIdealSDPPortBWReceivedAfterUrgLatency;
+ dml_uint_t *VTotal;
+ dml_uint_t *VActive;
+ dml_uint_t *DynamicMetadataTransmittedBytes;
+ dml_uint_t *DynamicMetadataLinesBeforeActiveRequired;
+ dml_bool_t *Interlace;
+ dml_float_t (*RequiredDPPCLKPerSurface)[__DML_NUM_PLANES__];
+ dml_float_t *RequiredDISPCLK;
+ dml_float_t UrgLatency;
+ dml_uint_t (*NoOfDPP)[__DML_NUM_PLANES__];
+ dml_float_t *ProjectedDCFCLKDeepSleep;
+ dml_uint_t (*MaximumVStartup)[__DML_NUM_PLANES__];
+ dml_uint_t *TotalNumberOfActiveDPP;
+ dml_uint_t *TotalNumberOfDCCActiveDPP;
+ dml_uint_t *dpte_group_bytes;
+ dml_uint_t (*PrefetchLinesY)[__DML_NUM_PLANES__];
+ dml_uint_t (*PrefetchLinesC)[__DML_NUM_PLANES__];
+ dml_uint_t (*swath_width_luma_ub_all_states)[__DML_NUM_PLANES__];
+ dml_uint_t (*swath_width_chroma_ub_all_states)[__DML_NUM_PLANES__];
+ dml_uint_t *BytePerPixelY;
+ dml_uint_t *BytePerPixelC;
+ dml_uint_t *HTotal;
+ dml_float_t *PixelClock;
+ dml_uint_t (*PDEAndMetaPTEBytesPerFrame)[__DML_NUM_PLANES__];
+ dml_uint_t (*DPTEBytesPerRow)[__DML_NUM_PLANES__];
+ dml_uint_t (*MetaRowBytes)[__DML_NUM_PLANES__];
+ dml_bool_t *DynamicMetadataEnable;
+ dml_float_t *ReadBandwidthLuma;
+ dml_float_t *ReadBandwidthChroma;
+ dml_float_t DCFCLKPerState;
+ dml_float_t *DCFCLKState;
+};
+
+struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st {
+ dml_bool_t USRRetrainingRequiredFinal;
+ enum dml_use_mall_for_pstate_change_mode *UseMALLForPStateChange;
+ dml_uint_t *PrefetchMode;
+ dml_uint_t NumberOfActiveSurfaces;
+ dml_uint_t MaxLineBufferLines;
+ dml_uint_t LineBufferSize;
+ dml_uint_t WritebackInterfaceBufferSize;
+ dml_float_t DCFCLK;
+ dml_float_t ReturnBW;
+ dml_bool_t SynchronizeTimingsFinal;
+ dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal;
+ dml_bool_t *DRRDisplay;
+ dml_uint_t *dpte_group_bytes;
+ dml_uint_t *meta_row_height;
+ dml_uint_t *meta_row_height_chroma;
+ struct SOCParametersList mmSOCParameters;
+ dml_uint_t WritebackChunkSize;
+ dml_float_t SOCCLK;
+ dml_float_t DCFClkDeepSleep;
+ dml_uint_t *DETBufferSizeY;
+ dml_uint_t *DETBufferSizeC;
+ dml_uint_t *SwathHeightY;
+ dml_uint_t *SwathHeightC;
+ dml_uint_t *LBBitPerPixel;
+ dml_uint_t *SwathWidthY;
+ dml_uint_t *SwathWidthC;
+ dml_float_t *HRatio;
+ dml_float_t *HRatioChroma;
+ dml_uint_t *VTaps;
+ dml_uint_t *VTapsChroma;
+ dml_float_t *VRatio;
+ dml_float_t *VRatioChroma;
+ dml_uint_t *HTotal;
+ dml_uint_t *VTotal;
+ dml_uint_t *VActive;
+ dml_float_t *PixelClock;
+ dml_uint_t *BlendingAndTiming;
+ dml_uint_t *DPPPerSurface;
+ dml_float_t *BytePerPixelDETY;
+ dml_float_t *BytePerPixelDETC;
+ dml_uint_t *DSTXAfterScaler;
+ dml_uint_t *DSTYAfterScaler;
+ dml_bool_t *WritebackEnable;
+ enum dml_source_format_class *WritebackPixelFormat;
+ dml_uint_t *WritebackDestinationWidth;
+ dml_uint_t *WritebackDestinationHeight;
+ dml_uint_t *WritebackSourceHeight;
+ dml_bool_t UnboundedRequestEnabled;
+ dml_uint_t CompressedBufferSizeInkByte;
+
+ // Output
+ struct Watermarks *Watermark;
+ enum dml_dram_clock_change_support *DRAMClockChangeSupport;
+ dml_float_t *MaxActiveDRAMClockChangeLatencySupported;
+ dml_uint_t *SubViewportLinesNeededInMALL;
+ enum dml_fclock_change_support *FCLKChangeSupport;
+ dml_float_t *MaxActiveFCLKChangeLatencySupported;
+ dml_bool_t *USRRetrainingSupport;
+ dml_float_t *ActiveDRAMClockChangeLatencyMargin;
+};
+
+struct CalculateVMRowAndSwath_params_st {
+ dml_uint_t NumberOfActiveSurfaces;
+ struct DmlPipe *myPipe;
+ dml_uint_t *SurfaceSizeInMALL;
+ dml_uint_t PTEBufferSizeInRequestsLuma;
+ dml_uint_t PTEBufferSizeInRequestsChroma;
+ dml_uint_t DCCMetaBufferSizeBytes;
+ enum dml_use_mall_for_static_screen_mode *UseMALLForStaticScreen;
+ enum dml_use_mall_for_pstate_change_mode *UseMALLForPStateChange;
+ dml_uint_t MALLAllocatedForDCN;
+ dml_uint_t *SwathWidthY;
+ dml_uint_t *SwathWidthC;
+ dml_bool_t GPUVMEnable;
+ dml_bool_t HostVMEnable;
+ dml_uint_t HostVMMaxNonCachedPageTableLevels;
+ dml_uint_t GPUVMMaxPageTableLevels;
+ dml_uint_t *GPUVMMinPageSizeKBytes;
+ dml_uint_t HostVMMinPageSize;
+ dml_bool_t *PTEBufferModeOverrideEn;
+ dml_bool_t *PTEBufferModeOverrideVal;
+
+ // Output
+ dml_bool_t *PTEBufferSizeNotExceeded;
+ dml_bool_t *DCCMetaBufferSizeNotExceeded;
+ dml_uint_t *dpte_row_width_luma_ub;
+ dml_uint_t *dpte_row_width_chroma_ub;
+ dml_uint_t *dpte_row_height_luma;
+ dml_uint_t *dpte_row_height_chroma;
+ dml_uint_t *dpte_row_height_linear_luma; // VBA_DELTA
+ dml_uint_t *dpte_row_height_linear_chroma; // VBA_DELTA
+ dml_uint_t *meta_req_width;
+ dml_uint_t *meta_req_width_chroma;
+ dml_uint_t *meta_req_height;
+ dml_uint_t *meta_req_height_chroma;
+ dml_uint_t *meta_row_width;
+ dml_uint_t *meta_row_width_chroma;
+ dml_uint_t *meta_row_height;
+ dml_uint_t *meta_row_height_chroma;
+ dml_uint_t *vm_group_bytes;
+ dml_uint_t *dpte_group_bytes;
+ dml_uint_t *PixelPTEReqWidthY;
+ dml_uint_t *PixelPTEReqHeightY;
+ dml_uint_t *PTERequestSizeY;
+ dml_uint_t *PixelPTEReqWidthC;
+ dml_uint_t *PixelPTEReqHeightC;
+ dml_uint_t *PTERequestSizeC;
+ dml_uint_t *dpde0_bytes_per_frame_ub_l;
+ dml_uint_t *meta_pte_bytes_per_frame_ub_l;
+ dml_uint_t *dpde0_bytes_per_frame_ub_c;
+ dml_uint_t *meta_pte_bytes_per_frame_ub_c;
+ dml_uint_t *PrefetchSourceLinesY;
+ dml_uint_t *PrefetchSourceLinesC;
+ dml_uint_t *VInitPreFillY;
+ dml_uint_t *VInitPreFillC;
+ dml_uint_t *MaxNumSwathY;
+ dml_uint_t *MaxNumSwathC;
+ dml_float_t *meta_row_bw;
+ dml_float_t *dpte_row_bw;
+ dml_uint_t *PixelPTEBytesPerRow;
+ dml_uint_t *PDEAndMetaPTEBytesFrame;
+ dml_uint_t *MetaRowByte;
+ dml_bool_t *use_one_row_for_frame;
+ dml_bool_t *use_one_row_for_frame_flip;
+ dml_bool_t *UsesMALLForStaticScreen;
+ dml_bool_t *PTE_BUFFER_MODE;
+ dml_uint_t *BIGK_FRAGMENT_SIZE;
+};
+
+struct CalculateSwathAndDETConfiguration_params_st {
+ dml_uint_t *DETSizeOverride;
+ enum dml_use_mall_for_pstate_change_mode *UseMALLForPStateChange;
+ dml_uint_t ConfigReturnBufferSizeInKByte;
+ dml_uint_t ROBBufferSizeInKByte;
+ dml_uint_t MaxTotalDETInKByte;
+ dml_uint_t MinCompressedBufferSizeInKByte;
+ dml_uint_t PixelChunkSizeInKByte;
+ dml_bool_t ForceSingleDPP;
+ dml_uint_t NumberOfActiveSurfaces;
+ dml_uint_t nomDETInKByte;
+ enum dml_unbounded_requesting_policy UseUnboundedRequestingFinal;
+ dml_uint_t ConfigReturnBufferSegmentSizeInkByte;
+ dml_uint_t CompressedBufferSegmentSizeInkByteFinal;
+ enum dml_output_encoder_class *Output;
+ dml_float_t *ReadBandwidthLuma;
+ dml_float_t *ReadBandwidthChroma;
+ dml_float_t *MaximumSwathWidthLuma;
+ dml_float_t *MaximumSwathWidthChroma;
+ enum dml_rotation_angle *SourceScan;
+ dml_bool_t *ViewportStationary;
+ enum dml_source_format_class *SourcePixelFormat;
+ enum dml_swizzle_mode *SurfaceTiling;
+ dml_uint_t *ViewportWidth;
+ dml_uint_t *ViewportHeight;
+ dml_uint_t *ViewportXStart;
+ dml_uint_t *ViewportYStart;
+ dml_uint_t *ViewportXStartC;
+ dml_uint_t *ViewportYStartC;
+ dml_uint_t *SurfaceWidthY;
+ dml_uint_t *SurfaceWidthC;
+ dml_uint_t *SurfaceHeightY;
+ dml_uint_t *SurfaceHeightC;
+ dml_uint_t *Read256BytesBlockHeightY;
+ dml_uint_t *Read256BytesBlockHeightC;
+ dml_uint_t *Read256BytesBlockWidthY;
+ dml_uint_t *Read256BytesBlockWidthC;
+ enum dml_odm_mode *ODMMode;
+ dml_uint_t *BlendingAndTiming;
+ dml_uint_t *BytePerPixY;
+ dml_uint_t *BytePerPixC;
+ dml_float_t *BytePerPixDETY;
+ dml_float_t *BytePerPixDETC;
+ dml_uint_t *HActive;
+ dml_float_t *HRatio;
+ dml_float_t *HRatioChroma;
+ dml_uint_t *DPPPerSurface;
+ dml_uint_t *swath_width_luma_ub;
+ dml_uint_t *swath_width_chroma_ub;
+ dml_uint_t *SwathWidth;
+ dml_uint_t *SwathWidthChroma;
+ dml_uint_t *SwathHeightY;
+ dml_uint_t *SwathHeightC;
+ dml_uint_t *DETBufferSizeInKByte;
+ dml_uint_t *DETBufferSizeY;
+ dml_uint_t *DETBufferSizeC;
+ dml_bool_t *UnboundedRequestEnabled;
+ dml_uint_t *compbuf_reserved_space_64b;
+ dml_uint_t *compbuf_reserved_space_zs;
+ dml_uint_t *CompressedBufferSizeInkByte;
+ dml_bool_t *ViewportSizeSupportPerSurface;
+ dml_bool_t *ViewportSizeSupport;
+};
+
+struct CalculateStutterEfficiency_params_st {
+ dml_uint_t CompressedBufferSizeInkByte;
+ enum dml_use_mall_for_pstate_change_mode *UseMALLForPStateChange;
+ dml_bool_t UnboundedRequestEnabled;
+ dml_uint_t MetaFIFOSizeInKEntries;
+ dml_uint_t ZeroSizeBufferEntries;
+ dml_uint_t PixelChunkSizeInKByte;
+ dml_uint_t NumberOfActiveSurfaces;
+ dml_uint_t ROBBufferSizeInKByte;
+ dml_float_t TotalDataReadBandwidth;
+ dml_float_t DCFCLK;
+ dml_float_t ReturnBW;
+ dml_uint_t CompbufReservedSpace64B;
+ dml_uint_t CompbufReservedSpaceZs;
+ dml_float_t SRExitTime;
+ dml_float_t SRExitZ8Time;
+ dml_bool_t SynchronizeTimingsFinal;
+ dml_uint_t *BlendingAndTiming;
+ dml_float_t StutterEnterPlusExitWatermark;
+ dml_float_t Z8StutterEnterPlusExitWatermark;
+ dml_bool_t ProgressiveToInterlaceUnitInOPP;
+ dml_bool_t *Interlace;
+ dml_float_t *MinTTUVBlank;
+ dml_uint_t *DPPPerSurface;
+ dml_uint_t *DETBufferSizeY;
+ dml_uint_t *BytePerPixelY;
+ dml_float_t *BytePerPixelDETY;
+ dml_uint_t *SwathWidthY;
+ dml_uint_t *SwathHeightY;
+ dml_uint_t *SwathHeightC;
+ dml_float_t *NetDCCRateLuma;
+ dml_float_t *NetDCCRateChroma;
+ dml_float_t *DCCFractionOfZeroSizeRequestsLuma;
+ dml_float_t *DCCFractionOfZeroSizeRequestsChroma;
+ dml_uint_t *HTotal;
+ dml_uint_t *VTotal;
+ dml_float_t *PixelClock;
+ dml_float_t *VRatio;
+ enum dml_rotation_angle *SourceScan;
+ dml_uint_t *BlockHeight256BytesY;
+ dml_uint_t *BlockWidth256BytesY;
+ dml_uint_t *BlockHeight256BytesC;
+ dml_uint_t *BlockWidth256BytesC;
+ dml_uint_t *DCCYMaxUncompressedBlock;
+ dml_uint_t *DCCCMaxUncompressedBlock;
+ dml_uint_t *VActive;
+ dml_bool_t *DCCEnable;
+ dml_bool_t *WritebackEnable;
+ dml_float_t *ReadBandwidthSurfaceLuma;
+ dml_float_t *ReadBandwidthSurfaceChroma;
+ dml_float_t *meta_row_bw;
+ dml_float_t *dpte_row_bw;
+ dml_float_t *StutterEfficiencyNotIncludingVBlank;
+ dml_float_t *StutterEfficiency;
+ dml_uint_t *NumberOfStutterBurstsPerFrame;
+ dml_float_t *Z8StutterEfficiencyNotIncludingVBlank;
+ dml_float_t *Z8StutterEfficiency;
+ dml_uint_t *Z8NumberOfStutterBurstsPerFrame;
+ dml_float_t *StutterPeriod;
+ dml_bool_t *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE;
+};
+
+struct CalculatePrefetchSchedule_params_st {
+ dml_bool_t EnhancedPrefetchScheduleAccelerationFinal;
+ dml_float_t HostVMInefficiencyFactor;
+ struct DmlPipe *myPipe;
+ dml_uint_t DSCDelay;
+ dml_float_t DPPCLKDelaySubtotalPlusCNVCFormater;
+ dml_float_t DPPCLKDelaySCL;
+ dml_float_t DPPCLKDelaySCLLBOnly;
+ dml_float_t DPPCLKDelayCNVCCursor;
+ dml_float_t DISPCLKDelaySubtotal;
+ dml_uint_t DPP_RECOUT_WIDTH;
+ enum dml_output_format_class OutputFormat;
+ dml_uint_t MaxInterDCNTileRepeaters;
+ dml_uint_t VStartup;
+ dml_uint_t MaxVStartup;
+ dml_uint_t GPUVMPageTableLevels;
+ dml_bool_t GPUVMEnable;
+ dml_bool_t HostVMEnable;
+ dml_uint_t HostVMMaxNonCachedPageTableLevels;
+ dml_uint_t HostVMMinPageSize;
+ dml_bool_t DynamicMetadataEnable;
+ dml_bool_t DynamicMetadataVMEnabled;
+ int DynamicMetadataLinesBeforeActiveRequired;
+ dml_uint_t DynamicMetadataTransmittedBytes;
+ dml_float_t UrgentLatency;
+ dml_float_t UrgentExtraLatency;
+ dml_float_t TCalc;
+ dml_uint_t PDEAndMetaPTEBytesFrame;
+ dml_uint_t MetaRowByte;
+ dml_uint_t PixelPTEBytesPerRow;
+ dml_float_t PrefetchSourceLinesY;
+ dml_uint_t VInitPreFillY;
+ dml_uint_t MaxNumSwathY;
+ dml_float_t PrefetchSourceLinesC;
+ dml_uint_t VInitPreFillC;
+ dml_uint_t MaxNumSwathC;
+ dml_uint_t swath_width_luma_ub;
+ dml_uint_t swath_width_chroma_ub;
+ dml_uint_t SwathHeightY;
+ dml_uint_t SwathHeightC;
+ dml_float_t TWait;
+ dml_uint_t *DSTXAfterScaler;
+ dml_uint_t *DSTYAfterScaler;
+ dml_float_t *DestinationLinesForPrefetch;
+ dml_float_t *DestinationLinesToRequestVMInVBlank;
+ dml_float_t *DestinationLinesToRequestRowInVBlank;
+ dml_float_t *VRatioPrefetchY;
+ dml_float_t *VRatioPrefetchC;
+ dml_float_t *RequiredPrefetchPixDataBWLuma;
+ dml_float_t *RequiredPrefetchPixDataBWChroma;
+ dml_bool_t *NotEnoughTimeForDynamicMetadata;
+ dml_float_t *Tno_bw;
+ dml_float_t *prefetch_vmrow_bw;
+ dml_float_t *Tdmdl_vm;
+ dml_float_t *Tdmdl;
+ dml_float_t *TSetup;
+ dml_uint_t *VUpdateOffsetPix;
+ dml_uint_t *VUpdateWidthPix;
+ dml_uint_t *VReadyOffsetPix;
+};
+
+struct dml_core_mode_support_locals_st {
+ dml_bool_t dummy_boolean[2];
+ dml_uint_t dummy_integer[3];
+ dml_uint_t dummy_integer_array[22][__DML_NUM_PLANES__];
+ enum dml_odm_mode dummy_odm_mode[__DML_NUM_PLANES__];
+ dml_bool_t dummy_boolean_array[2][__DML_NUM_PLANES__];
+ dml_uint_t MaxVStartupAllPlanes[2];
+ dml_uint_t MaximumVStartup[2][__DML_NUM_PLANES__];
+ dml_uint_t DSTYAfterScaler[__DML_NUM_PLANES__];
+ dml_uint_t DSTXAfterScaler[__DML_NUM_PLANES__];
+ dml_uint_t NextPrefetchMode[__DML_NUM_PLANES__];
+ dml_uint_t MinPrefetchMode[__DML_NUM_PLANES__];
+ dml_uint_t MaxPrefetchMode[__DML_NUM_PLANES__];
+ dml_float_t dummy_single[3];
+ dml_float_t dummy_single_array[__DML_NUM_PLANES__];
+ struct Watermarks dummy_watermark;
+ struct SOCParametersList mSOCParameters;
+ struct DmlPipe myPipe;
+ struct DmlPipe SurfParameters[__DML_NUM_PLANES__];
+ dml_uint_t TotalNumberOfActiveWriteback;
+ dml_uint_t MaximumSwathWidthSupportLuma;
+ dml_uint_t MaximumSwathWidthSupportChroma;
+ dml_bool_t MPCCombineMethodAsNeededForPStateChangeAndVoltage;
+ dml_bool_t MPCCombineMethodAsPossible;
+ dml_bool_t TotalAvailablePipesSupportNoDSC;
+ dml_uint_t NumberOfDPPNoDSC;
+ enum dml_odm_mode ODMModeNoDSC;
+ dml_float_t RequiredDISPCLKPerSurfaceNoDSC;
+ dml_bool_t TotalAvailablePipesSupportDSC;
+ dml_uint_t NumberOfDPPDSC;
+ enum dml_odm_mode ODMModeDSC;
+ dml_float_t RequiredDISPCLKPerSurfaceDSC;
+ dml_bool_t NoChromaOrLinear;
+ dml_float_t BWOfNonCombinedSurfaceOfMaximumBandwidth;
+ dml_uint_t NumberOfNonCombinedSurfaceOfMaximumBandwidth;
+ dml_uint_t TotalNumberOfActiveOTG;
+ dml_uint_t TotalNumberOfActiveHDMIFRL;
+ dml_uint_t TotalNumberOfActiveDP2p0;
+ dml_uint_t TotalNumberOfActiveDP2p0Outputs;
+ dml_uint_t TotalSlots;
+ dml_uint_t DSCFormatFactor;
+ dml_uint_t TotalDSCUnitsRequired;
+ dml_uint_t ReorderingBytes;
+ dml_bool_t ImmediateFlipRequiredFinal;
+ dml_bool_t FullFrameMALLPStateMethod;
+ dml_bool_t SubViewportMALLPStateMethod;
+ dml_bool_t PhantomPipeMALLPStateMethod;
+ dml_bool_t SubViewportMALLRefreshGreaterThan120Hz;
+ dml_float_t MaxTotalVActiveRDBandwidth;
+ dml_float_t VMDataOnlyReturnBWPerState;
+ dml_float_t HostVMInefficiencyFactor;
+ dml_uint_t NextMaxVStartup;
+ dml_uint_t MaxVStartup;
+ dml_bool_t AllPrefetchModeTested;
+ dml_bool_t AnyLinesForVMOrRowTooLarge;
+ dml_bool_t is_max_pwr_state;
+ dml_bool_t is_max_dram_pwr_state;
+ dml_bool_t dram_clock_change_support;
+ dml_bool_t f_clock_change_support;
+};
+
+struct dml_core_mode_programming_locals_st {
+ dml_uint_t DSCFormatFactor;
+ dml_uint_t dummy_integer_array[2][__DML_NUM_PLANES__];
+ enum dml_output_encoder_class dummy_output_encoder_array[__DML_NUM_PLANES__];
+ dml_float_t dummy_single_array[2][__DML_NUM_PLANES__];
+ dml_uint_t dummy_long_array[4][__DML_NUM_PLANES__];
+ dml_bool_t dummy_boolean_array[2][__DML_NUM_PLANES__];
+ dml_bool_t dummy_boolean[1];
+ struct DmlPipe SurfaceParameters[__DML_NUM_PLANES__];
+ dml_uint_t ReorderBytes;
+ dml_float_t VMDataOnlyReturnBW;
+ dml_float_t HostVMInefficiencyFactor;
+ dml_uint_t TotalDCCActiveDPP;
+ dml_uint_t TotalActiveDPP;
+ dml_uint_t VStartupLines;
+ dml_uint_t MaxVStartupLines[__DML_NUM_PLANES__]; /// <brief more like vblank for the plane's OTG
+ dml_uint_t MaxVStartupAllPlanes;
+ dml_bool_t ImmediateFlipRequirementFinal;
+ int iteration;
+ dml_float_t MaxTotalRDBandwidth;
+ dml_float_t MaxTotalRDBandwidthNoUrgentBurst;
+ dml_bool_t DestinationLineTimesForPrefetchLessThan2;
+ dml_bool_t VRatioPrefetchMoreThanMax;
+ dml_float_t MaxTotalRDBandwidthNotIncludingMALLPrefetch;
+ dml_uint_t NextPrefetchMode[__DML_NUM_PLANES__];
+ dml_uint_t MinPrefetchMode[__DML_NUM_PLANES__];
+ dml_uint_t MaxPrefetchMode[__DML_NUM_PLANES__];
+ dml_bool_t AllPrefetchModeTested;
+ dml_float_t dummy_unit_vector[__DML_NUM_PLANES__];
+ dml_float_t NonUrgentMaxTotalRDBandwidth;
+ dml_float_t NonUrgentMaxTotalRDBandwidthNotIncludingMALLPrefetch;
+ dml_float_t dummy_single[2];
+ struct SOCParametersList mmSOCParameters;
+ dml_float_t Tvstartup_margin;
+ dml_float_t dlg_vblank_start;
+ dml_float_t LSetup;
+ dml_float_t blank_lines_remaining;
+ dml_float_t old_MIN_DST_Y_NEXT_START;
+ dml_float_t TotalWRBandwidth;
+ dml_float_t WRBandwidth;
+ struct Watermarks dummy_watermark;
+ struct DmlPipe myPipe;
+};
+
+struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals_st {
+ dml_float_t ActiveDRAMClockChangeLatencyMargin[__DML_NUM_PLANES__];
+ dml_float_t ActiveFCLKChangeLatencyMargin[__DML_NUM_PLANES__];
+ dml_float_t USRRetrainingLatencyMargin[__DML_NUM_PLANES__];
+
+ dml_bool_t SynchronizedSurfaces[__DML_NUM_PLANES__][__DML_NUM_PLANES__];
+ dml_float_t EffectiveLBLatencyHidingY;
+ dml_float_t EffectiveLBLatencyHidingC;
+ dml_float_t LinesInDETY[__DML_NUM_PLANES__];
+ dml_float_t LinesInDETC[__DML_NUM_PLANES__];
+ dml_uint_t LinesInDETYRoundedDownToSwath[__DML_NUM_PLANES__];
+ dml_uint_t LinesInDETCRoundedDownToSwath[__DML_NUM_PLANES__];
+ dml_float_t FullDETBufferingTimeY;
+ dml_float_t FullDETBufferingTimeC;
+ dml_float_t WritebackDRAMClockChangeLatencyMargin;
+ dml_float_t WritebackFCLKChangeLatencyMargin;
+ dml_float_t WritebackLatencyHiding;
+
+ dml_uint_t TotalActiveWriteback;
+ dml_uint_t LBLatencyHidingSourceLinesY[__DML_NUM_PLANES__];
+ dml_uint_t LBLatencyHidingSourceLinesC[__DML_NUM_PLANES__];
+ dml_float_t TotalPixelBW;
+ dml_float_t EffectiveDETBufferSizeY;
+ dml_float_t ActiveClockChangeLatencyHidingY;
+ dml_float_t ActiveClockChangeLatencyHidingC;
+ dml_float_t ActiveClockChangeLatencyHiding;
+ dml_bool_t FoundCriticalSurface;
+ dml_uint_t LastSurfaceWithoutMargin;
+ dml_uint_t FCLKChangeSupportNumber;
+ dml_uint_t DRAMClockChangeMethod;
+ dml_uint_t DRAMClockChangeSupportNumber;
+ dml_uint_t dst_y_pstate;
+ dml_uint_t src_y_pstate_l;
+ dml_uint_t src_y_pstate_c;
+ dml_uint_t src_y_ahead_l;
+ dml_uint_t src_y_ahead_c;
+ dml_uint_t sub_vp_lines_l;
+ dml_uint_t sub_vp_lines_c;
+};
+
+struct CalculateVMRowAndSwath_locals_st {
+ dml_uint_t PTEBufferSizeInRequestsForLuma[__DML_NUM_PLANES__];
+ dml_uint_t PTEBufferSizeInRequestsForChroma[__DML_NUM_PLANES__];
+ dml_uint_t PDEAndMetaPTEBytesFrameY;
+ dml_uint_t PDEAndMetaPTEBytesFrameC;
+ dml_uint_t MetaRowByteY[__DML_NUM_PLANES__];
+ dml_uint_t MetaRowByteC[__DML_NUM_PLANES__];
+ dml_uint_t PixelPTEBytesPerRowY[__DML_NUM_PLANES__];
+ dml_uint_t PixelPTEBytesPerRowC[__DML_NUM_PLANES__];
+ dml_uint_t PixelPTEBytesPerRowStorageY[__DML_NUM_PLANES__];
+ dml_uint_t PixelPTEBytesPerRowStorageC[__DML_NUM_PLANES__];
+ dml_uint_t PixelPTEBytesPerRowY_one_row_per_frame[__DML_NUM_PLANES__];
+ dml_uint_t PixelPTEBytesPerRowC_one_row_per_frame[__DML_NUM_PLANES__];
+ dml_uint_t dpte_row_width_luma_ub_one_row_per_frame[__DML_NUM_PLANES__];
+ dml_uint_t dpte_row_height_luma_one_row_per_frame[__DML_NUM_PLANES__];
+ dml_uint_t dpte_row_width_chroma_ub_one_row_per_frame[__DML_NUM_PLANES__];
+ dml_uint_t dpte_row_height_chroma_one_row_per_frame[__DML_NUM_PLANES__];
+ dml_bool_t one_row_per_frame_fits_in_buffer[__DML_NUM_PLANES__];
+
+ dml_uint_t HostVMDynamicLevels;
+};
+
+struct UseMinimumDCFCLK_locals_st {
+ dml_uint_t dummy1;
+ dml_uint_t dummy2;
+ dml_uint_t dummy3;
+ dml_float_t NormalEfficiency;
+ dml_float_t TotalMaxPrefetchFlipDPTERowBandwidth[2];
+
+ dml_float_t PixelDCFCLKCyclesRequiredInPrefetch[__DML_NUM_PLANES__];
+ dml_float_t PrefetchPixelLinesTime[__DML_NUM_PLANES__];
+ dml_float_t DCFCLKRequiredForPeakBandwidthPerSurface[__DML_NUM_PLANES__];
+ dml_float_t DynamicMetadataVMExtraLatency[__DML_NUM_PLANES__];
+ dml_float_t MinimumTWait;
+ dml_float_t DPTEBandwidth;
+ dml_float_t DCFCLKRequiredForAverageBandwidth;
+ dml_uint_t ExtraLatencyBytes;
+ dml_float_t ExtraLatencyCycles;
+ dml_float_t DCFCLKRequiredForPeakBandwidth;
+ dml_uint_t NoOfDPPState[__DML_NUM_PLANES__];
+ dml_float_t MinimumTvmPlus2Tr0;
+};
+
+struct CalculatePrefetchSchedule_locals_st {
+ dml_bool_t MyError;
+ dml_uint_t DPPCycles;
+ dml_uint_t DISPCLKCycles;
+ dml_float_t DSTTotalPixelsAfterScaler;
+ dml_float_t LineTime;
+ dml_float_t dst_y_prefetch_equ;
+ dml_float_t prefetch_bw_oto;
+ dml_float_t Tvm_oto;
+ dml_float_t Tr0_oto;
+ dml_float_t Tvm_oto_lines;
+ dml_float_t Tr0_oto_lines;
+ dml_float_t dst_y_prefetch_oto;
+ dml_float_t TimeForFetchingMetaPTE;
+ dml_float_t TimeForFetchingRowInVBlank;
+ dml_float_t LinesToRequestPrefetchPixelData;
+ dml_uint_t HostVMDynamicLevelsTrips;
+ dml_float_t trip_to_mem;
+ dml_float_t Tvm_trips;
+ dml_float_t Tr0_trips;
+ dml_float_t Tvm_trips_rounded;
+ dml_float_t Tr0_trips_rounded;
+ dml_float_t max_Tsw;
+ dml_float_t Lsw_oto;
+ dml_float_t Tpre_rounded;
+ dml_float_t prefetch_bw_equ;
+ dml_float_t Tvm_equ;
+ dml_float_t Tr0_equ;
+ dml_float_t Tdmbf;
+ dml_float_t Tdmec;
+ dml_float_t Tdmsks;
+ dml_float_t prefetch_sw_bytes;
+ dml_float_t prefetch_bw_pr;
+ dml_float_t bytes_pp;
+ dml_float_t dep_bytes;
+ dml_float_t min_Lsw_oto;
+ dml_float_t Tsw_est1;
+ dml_float_t Tsw_est3;
+ dml_float_t PrefetchBandwidth1;
+ dml_float_t PrefetchBandwidth2;
+ dml_float_t PrefetchBandwidth3;
+ dml_float_t PrefetchBandwidth4;
+};
+
+/// @brief To minimize stack usage; function locals are instead placed into this scratch structure which is allocated per context
+struct display_mode_lib_scratch_st {
+ // Scratch space for function locals
+ struct dml_core_mode_support_locals_st dml_core_mode_support_locals;
+ struct dml_core_mode_programming_locals_st dml_core_mode_programming_locals;
+ struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals_st CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals;
+ struct CalculateVMRowAndSwath_locals_st CalculateVMRowAndSwath_locals;
+ struct UseMinimumDCFCLK_locals_st UseMinimumDCFCLK_locals;
+ struct CalculatePrefetchSchedule_locals_st CalculatePrefetchSchedule_locals;
+
+ // Scratch space for function params
+ struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
+ struct CalculateVMRowAndSwath_params_st CalculateVMRowAndSwath_params;
+ struct UseMinimumDCFCLK_params_st UseMinimumDCFCLK_params;
+ struct CalculateSwathAndDETConfiguration_params_st CalculateSwathAndDETConfiguration_params;
+ struct CalculateStutterEfficiency_params_st CalculateStutterEfficiency_params;
+ struct CalculatePrefetchSchedule_params_st CalculatePrefetchSchedule_params;
+};
+
+/// @brief Represent the overall soc/ip environment. It contains data structure represent the soc/ip characteristic and also structures that hold calculation output
+struct display_mode_lib_st {
+ dml_uint_t project;
+
+ //@brief Mode evaluation and programming policy
+ struct dml_mode_eval_policy_st policy;
+
+ //@brief IP/SOC characteristic
+ struct ip_params_st ip;
+ struct soc_bounding_box_st soc;
+ struct soc_states_st states;
+
+ //@brief Mode Support and Mode programming struct
+ // Used to hold input; intermediate and output of the calculations
+ struct mode_support_st ms; // struct for mode support
+ struct mode_program_st mp; // struct for mode programming
+
+ struct display_mode_lib_scratch_st scratch;
+};
+
+struct dml_mode_support_ex_params_st {
+ struct display_mode_lib_st *mode_lib;
+ const struct dml_display_cfg_st *in_display_cfg;
+ dml_uint_t in_start_state_idx;
+ dml_uint_t out_lowest_state_idx;
+ struct dml_mode_support_info_st *out_evaluation_info;
+};
+
+typedef struct _vcs_dpi_dml_display_rq_regs_st dml_display_rq_regs_st;
+typedef struct _vcs_dpi_dml_display_dlg_regs_st dml_display_dlg_regs_st;
+typedef struct _vcs_dpi_dml_display_ttu_regs_st dml_display_ttu_regs_st;
+typedef struct _vcs_dpi_dml_display_arb_params_st dml_display_arb_params_st;
+typedef struct _vcs_dpi_dml_display_plane_rq_regs_st dml_display_plane_rq_regs_st;
+
+struct _vcs_dpi_dml_display_dlg_regs_st {
+ dml_uint_t refcyc_h_blank_end;
+ dml_uint_t dlg_vblank_end;
+ dml_uint_t min_dst_y_next_start;
+ dml_uint_t refcyc_per_htotal;
+ dml_uint_t refcyc_x_after_scaler;
+ dml_uint_t dst_y_after_scaler;
+ dml_uint_t dst_y_prefetch;
+ dml_uint_t dst_y_per_vm_vblank;
+ dml_uint_t dst_y_per_row_vblank;
+ dml_uint_t dst_y_per_vm_flip;
+ dml_uint_t dst_y_per_row_flip;
+ dml_uint_t ref_freq_to_pix_freq;
+ dml_uint_t vratio_prefetch;
+ dml_uint_t vratio_prefetch_c;
+ dml_uint_t refcyc_per_pte_group_vblank_l;
+ dml_uint_t refcyc_per_pte_group_vblank_c;
+ dml_uint_t refcyc_per_meta_chunk_vblank_l;
+ dml_uint_t refcyc_per_meta_chunk_vblank_c;
+ dml_uint_t refcyc_per_pte_group_flip_l;
+ dml_uint_t refcyc_per_pte_group_flip_c;
+ dml_uint_t refcyc_per_meta_chunk_flip_l;
+ dml_uint_t refcyc_per_meta_chunk_flip_c;
+ dml_uint_t dst_y_per_pte_row_nom_l;
+ dml_uint_t dst_y_per_pte_row_nom_c;
+ dml_uint_t refcyc_per_pte_group_nom_l;
+ dml_uint_t refcyc_per_pte_group_nom_c;
+ dml_uint_t dst_y_per_meta_row_nom_l;
+ dml_uint_t dst_y_per_meta_row_nom_c;
+ dml_uint_t refcyc_per_meta_chunk_nom_l;
+ dml_uint_t refcyc_per_meta_chunk_nom_c;
+ dml_uint_t refcyc_per_line_delivery_pre_l;
+ dml_uint_t refcyc_per_line_delivery_pre_c;
+ dml_uint_t refcyc_per_line_delivery_l;
+ dml_uint_t refcyc_per_line_delivery_c;
+ dml_uint_t refcyc_per_vm_group_vblank;
+ dml_uint_t refcyc_per_vm_group_flip;
+ dml_uint_t refcyc_per_vm_req_vblank;
+ dml_uint_t refcyc_per_vm_req_flip;
+ dml_uint_t dst_y_offset_cur0;
+ dml_uint_t chunk_hdl_adjust_cur0;
+ dml_uint_t dst_y_offset_cur1;
+ dml_uint_t chunk_hdl_adjust_cur1;
+ dml_uint_t vready_after_vcount0;
+ dml_uint_t dst_y_delta_drq_limit;
+ dml_uint_t refcyc_per_vm_dmdata;
+ dml_uint_t dmdata_dl_delta;
+};
+
+struct _vcs_dpi_dml_display_ttu_regs_st {
+ dml_uint_t qos_level_low_wm;
+ dml_uint_t qos_level_high_wm;
+ dml_uint_t min_ttu_vblank;
+ dml_uint_t qos_level_flip;
+ dml_uint_t refcyc_per_req_delivery_l;
+ dml_uint_t refcyc_per_req_delivery_c;
+ dml_uint_t refcyc_per_req_delivery_cur0;
+ dml_uint_t refcyc_per_req_delivery_cur1;
+ dml_uint_t refcyc_per_req_delivery_pre_l;
+ dml_uint_t refcyc_per_req_delivery_pre_c;
+ dml_uint_t refcyc_per_req_delivery_pre_cur0;
+ dml_uint_t refcyc_per_req_delivery_pre_cur1;
+ dml_uint_t qos_level_fixed_l;
+ dml_uint_t qos_level_fixed_c;
+ dml_uint_t qos_level_fixed_cur0;
+ dml_uint_t qos_level_fixed_cur1;
+ dml_uint_t qos_ramp_disable_l;
+ dml_uint_t qos_ramp_disable_c;
+ dml_uint_t qos_ramp_disable_cur0;
+ dml_uint_t qos_ramp_disable_cur1;
+};
+
+struct _vcs_dpi_dml_display_arb_params_st {
+ dml_uint_t max_req_outstanding;
+ dml_uint_t min_req_outstanding;
+ dml_uint_t sat_level_us;
+ dml_uint_t hvm_max_qos_commit_threshold;
+ dml_uint_t hvm_min_req_outstand_commit_threshold;
+ dml_uint_t compbuf_reserved_space_kbytes;
+};
+
+struct _vcs_dpi_dml_display_plane_rq_regs_st {
+ dml_uint_t chunk_size;
+ dml_uint_t min_chunk_size;
+ dml_uint_t meta_chunk_size;
+ dml_uint_t min_meta_chunk_size;
+ dml_uint_t dpte_group_size;
+ dml_uint_t mpte_group_size;
+ dml_uint_t swath_height;
+ dml_uint_t pte_row_height_linear;
+};
+
+struct _vcs_dpi_dml_display_rq_regs_st {
+ dml_display_plane_rq_regs_st rq_regs_l;
+ dml_display_plane_rq_regs_st rq_regs_c;
+ dml_uint_t drq_expansion_mode;
+ dml_uint_t prq_expansion_mode;
+ dml_uint_t mrq_expansion_mode;
+ dml_uint_t crq_expansion_mode;
+ dml_uint_t plane1_base_address;
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_lib_defines.h b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_lib_defines.h
new file mode 100644
index 000000000000..e574c81edf5e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_lib_defines.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DISPLAY_MODE_LIB_DEFINES_H__
+#define __DISPLAY_MODE_LIB_DEFINES_H__
+
+#define DCN_DML__DML_STANDALONE 1
+#define DCN_DML__DML_STANDALONE__1 1
+#define DCN_DML__PRESENT 1
+#define DCN_DML__PRESENT__1 1
+#define DCN_DML__NUM_PLANE 8
+#define DCN_DML__NUM_PLANE__8 1
+#define DCN_DML__NUM_CURSOR 1
+#define DCN_DML__NUM_CURSOR__1 1
+#define DCN_DML__NUM_PWR_STATE 30
+#define DCN_DML__NUM_PWR_STATE__30 1
+#define DCN_DML__VM_PRESENT 1
+#define DCN_DML__VM_PRESENT__1 1
+#define DCN_DML__HOST_VM_PRESENT 1
+#define DCN_DML__HOST_VM_PRESENT__1 1
+#define DCN_DML__DWB 1
+
+#include "dml_depedencies.h"
+
+#include "dml_logging.h"
+#include "dml_assert.h"
+
+// To enable a lot of debug msg
+#define __DML_VBA_DEBUG__
+#define __DML_VBA_ENABLE_INLINE_CHECK_ 0
+#define __DML_VBA_MIN_VSTARTUP__ 9 //<brief At which vstartup the DML start to try if the mode can be supported
+#define __DML_ARB_TO_RET_DELAY__ (7 + 95) //<brief Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
+#define __DML_MIN_DCFCLK_FACTOR__ 1.15 //<brief fudge factor for min dcfclk calclation
+#define __DML_MAX_VRATIO_PRE__ 4.0 //<brief Prefetch schedule max vratio
+#define __DML_MAX_VRATIO_PRE_OTO__ 4.0 //<brief Prefetch schedule max vratio for one to one scheduling calculation for prefetch
+#define __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ 6.0 //<brief Prefetch schedule max vratio when enhance prefetch schedule acceleration is enabled and vstartup is earliest possible already
+#define __DML_NUM_PLANES__ DCN_DML__NUM_PLANE
+#define __DML_NUM_CURSORS__ DCN_DML__NUM_CURSOR
+#define __DML_DPP_INVALID__ 0
+#define __DML_NUM_DMB__ DCN_DML__DWB
+#define __DML_PIPE_NO_PLANE__ 99
+
+#define __DML_MAX_STATE_ARRAY_SIZE__ DCN_DML__NUM_PWR_STATE
+
+// Compilation define
+#define __DML_DLL_EXPORT__
+
+typedef int dml_int_t; // int is 32-bit in C/C++, but Integer datatype is 16-bit in VBA. this should map to Long in VBA
+typedef unsigned int dml_uint_t;
+typedef double dml_float_t;
+
+// Note: bool is 8-bit in C/C++, but Boolean is 16-bit in VBA, use "short" in C/C++ DLL so the struct work when vba uses DLL
+// Or the VBA side don't use Boolean, just use "Byte", then C side can use bool
+typedef bool dml_bool_t;
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_util.c b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_util.c
new file mode 100644
index 000000000000..89890c88fd66
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_util.c
@@ -0,0 +1,798 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "display_mode_util.h"
+
+static dml_float_t _log(float in)
+{
+ int * const exp_ptr = (int *)(&in);
+ int x = *exp_ptr;
+ const int log_2 = ((x >> 23) & 255) - 128;
+
+ x &= ~(255 << 23);
+ x += 127 << 23;
+ *exp_ptr = x;
+
+ in = ((-1.0f / 3) * in + 2) * in - 2.0f / 3;
+
+ return (in + log_2);
+}
+
+dml_bool_t dml_util_is_420(enum dml_source_format_class source_format)
+{
+ dml_bool_t val = false;
+
+ switch (source_format) {
+ case dml_444_16:
+ val = 0;
+ break;
+ case dml_444_32:
+ val = 0;
+ break;
+ case dml_444_64:
+ val = 0;
+ break;
+ case dml_420_8:
+ val = 1;
+ break;
+ case dml_420_10:
+ val = 1;
+ break;
+ case dml_422_8:
+ val = 0;
+ break;
+ case dml_422_10:
+ val = 0;
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+ return val;
+}
+
+static inline float dcn_bw_pow(float a, float exp)
+{
+ float temp;
+ /*ASSERT(exp == (int)exp);*/
+ if ((int)exp == 0)
+ return 1;
+ temp = dcn_bw_pow(a, (int)(exp / 2));
+ if (((int)exp % 2) == 0) {
+ return temp * temp;
+ } else {
+ if ((int)exp > 0)
+ return a * temp * temp;
+ else
+ return (temp * temp) / a;
+ }
+}
+
+static inline float dcn_bw_ceil2(const float arg, const float significance)
+{
+ ASSERT(significance != 0);
+
+ return ((int)(arg / significance + 0.99999)) * significance;
+}
+
+static inline float dcn_bw_floor2(const float arg, const float significance)
+{
+ ASSERT(significance != 0);
+
+ return ((int)(arg / significance)) * significance;
+}
+
+dml_float_t dml_ceil(dml_float_t x, dml_float_t granularity)
+{
+ if (granularity == 0)
+ return 0;
+ //return (dml_float_t) (ceil(x / granularity) * granularity);
+ return (dml_float_t)dcn_bw_ceil2(x, granularity);
+}
+
+dml_float_t dml_floor(dml_float_t x, dml_float_t granularity)
+{
+ if (granularity == 0)
+ return 0;
+ //return (dml_float_t) (floor(x / granularity) * granularity);
+ return (dml_float_t)dcn_bw_floor2(x, granularity);
+}
+
+dml_float_t dml_min(dml_float_t x, dml_float_t y)
+{
+ if (x != x)
+ return y;
+ if (y != y)
+ return x;
+ if (x < y)
+ return x;
+ else
+ return y;
+}
+
+dml_float_t dml_min3(dml_float_t x, dml_float_t y, dml_float_t z)
+{
+ return dml_min(dml_min(x, y), z);
+}
+
+dml_float_t dml_min4(dml_float_t x, dml_float_t y, dml_float_t z, dml_float_t w)
+{
+ return dml_min(dml_min(dml_min(x, y), z), w);
+}
+
+dml_float_t dml_max(dml_float_t x, dml_float_t y)
+{
+ if (x != x)
+ return y;
+ if (y != y)
+ return x;
+if (x > y)
+ return x;
+ else
+ return y;
+}
+dml_float_t dml_max3(dml_float_t x, dml_float_t y, dml_float_t z)
+{
+ return dml_max(dml_max(x, y), z);
+}
+dml_float_t dml_max4(dml_float_t a, dml_float_t b, dml_float_t c, dml_float_t d)
+{
+ return dml_max(dml_max(a, b), dml_max(c, d));
+}
+dml_float_t dml_max5(dml_float_t a, dml_float_t b, dml_float_t c, dml_float_t d, dml_float_t e)
+{
+ return dml_max(dml_max4(a, b, c, d), e);
+}
+dml_float_t dml_log(dml_float_t x, dml_float_t base)
+{
+ return (dml_float_t) (_log(x) / _log(base));
+}
+
+dml_float_t dml_log2(dml_float_t x)
+{
+ return (dml_float_t) (_log(x) / _log(2));
+}
+
+dml_float_t dml_round(dml_float_t val, dml_bool_t bankers_rounding)
+{
+// if (bankers_rounding)
+// return (dml_float_t) lrint(val);
+// else {
+// return round(val);
+ double round_pt = 0.5;
+ double ceil = dml_ceil(val, 1);
+ double floor = dml_floor(val, 1);
+
+ if (val - floor >= round_pt)
+ return ceil;
+ else
+ return floor;
+// }
+}
+
+dml_float_t dml_pow(dml_float_t base, int exp)
+{
+ return (dml_float_t) dcn_bw_pow(base, exp);
+}
+
+dml_uint_t dml_round_to_multiple(dml_uint_t num, dml_uint_t multiple, dml_bool_t up)
+{
+ dml_uint_t remainder;
+
+ if (multiple == 0)
+ return num;
+
+ remainder = num % multiple;
+ if (remainder == 0)
+ return num;
+
+ if (up)
+ return (num + multiple - remainder);
+ else
+ return (num - remainder);
+}
+
+void dml_print_data_rq_regs_st(const dml_display_plane_rq_regs_st *rq_regs)
+{
+ dml_print("DML: ===================================== \n");
+ dml_print("DML: DISPLAY_PLANE_RQ_REGS_ST\n");
+ dml_print("DML: chunk_size = 0x%x\n", rq_regs->chunk_size);
+ dml_print("DML: min_chunk_size = 0x%x\n", rq_regs->min_chunk_size);
+ dml_print("DML: meta_chunk_size = 0x%x\n", rq_regs->meta_chunk_size);
+ dml_print("DML: min_meta_chunk_size = 0x%x\n", rq_regs->min_meta_chunk_size);
+ dml_print("DML: dpte_group_size = 0x%x\n", rq_regs->dpte_group_size);
+ dml_print("DML: mpte_group_size = 0x%x\n", rq_regs->mpte_group_size);
+ dml_print("DML: swath_height = 0x%x\n", rq_regs->swath_height);
+ dml_print("DML: pte_row_height_linear = 0x%x\n", rq_regs->pte_row_height_linear);
+ dml_print("DML: ===================================== \n");
+}
+
+void dml_print_rq_regs_st(const dml_display_rq_regs_st *rq_regs)
+{
+ dml_print("DML: ===================================== \n");
+ dml_print("DML: DISPLAY_RQ_REGS_ST\n");
+ dml_print("DML: <LUMA> \n");
+ dml_print_data_rq_regs_st(&rq_regs->rq_regs_l);
+ dml_print("DML: <CHROMA> \n");
+ dml_print_data_rq_regs_st(&rq_regs->rq_regs_c);
+ dml_print("DML: drq_expansion_mode = 0x%x\n", rq_regs->drq_expansion_mode);
+ dml_print("DML: prq_expansion_mode = 0x%x\n", rq_regs->prq_expansion_mode);
+ dml_print("DML: mrq_expansion_mode = 0x%x\n", rq_regs->mrq_expansion_mode);
+ dml_print("DML: crq_expansion_mode = 0x%x\n", rq_regs->crq_expansion_mode);
+ dml_print("DML: plane1_base_address = 0x%x\n", rq_regs->plane1_base_address);
+ dml_print("DML: ===================================== \n");
+}
+
+void dml_print_dlg_regs_st(const dml_display_dlg_regs_st *dlg_regs)
+{
+ dml_print("DML: ===================================== \n");
+ dml_print("DML: DISPLAY_DLG_REGS_ST \n");
+ dml_print("DML: refcyc_h_blank_end = 0x%x\n", dlg_regs->refcyc_h_blank_end);
+ dml_print("DML: dlg_vblank_end = 0x%x\n", dlg_regs->dlg_vblank_end);
+ dml_print("DML: min_dst_y_next_start = 0x%x\n", dlg_regs->min_dst_y_next_start);
+ dml_print("DML: refcyc_per_htotal = 0x%x\n", dlg_regs->refcyc_per_htotal);
+ dml_print("DML: refcyc_x_after_scaler = 0x%x\n", dlg_regs->refcyc_x_after_scaler);
+ dml_print("DML: dst_y_after_scaler = 0x%x\n", dlg_regs->dst_y_after_scaler);
+ dml_print("DML: dst_y_prefetch = 0x%x\n", dlg_regs->dst_y_prefetch);
+ dml_print("DML: dst_y_per_vm_vblank = 0x%x\n", dlg_regs->dst_y_per_vm_vblank);
+ dml_print("DML: dst_y_per_row_vblank = 0x%x\n", dlg_regs->dst_y_per_row_vblank);
+ dml_print("DML: dst_y_per_vm_flip = 0x%x\n", dlg_regs->dst_y_per_vm_flip);
+ dml_print("DML: dst_y_per_row_flip = 0x%x\n", dlg_regs->dst_y_per_row_flip);
+ dml_print("DML: ref_freq_to_pix_freq = 0x%x\n", dlg_regs->ref_freq_to_pix_freq);
+ dml_print("DML: vratio_prefetch = 0x%x\n", dlg_regs->vratio_prefetch);
+ dml_print("DML: vratio_prefetch_c = 0x%x\n", dlg_regs->vratio_prefetch_c);
+ dml_print("DML: refcyc_per_pte_group_vblank_l = 0x%x\n", dlg_regs->refcyc_per_pte_group_vblank_l);
+ dml_print("DML: refcyc_per_pte_group_vblank_c = 0x%x\n", dlg_regs->refcyc_per_pte_group_vblank_c);
+ dml_print("DML: refcyc_per_meta_chunk_vblank_l = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_vblank_l);
+ dml_print("DML: refcyc_per_meta_chunk_vblank_c = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_vblank_c);
+ dml_print("DML: refcyc_per_pte_group_flip_l = 0x%x\n", dlg_regs->refcyc_per_pte_group_flip_l);
+ dml_print("DML: refcyc_per_pte_group_flip_c = 0x%x\n", dlg_regs->refcyc_per_pte_group_flip_c);
+ dml_print("DML: refcyc_per_meta_chunk_flip_l = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_flip_l);
+ dml_print("DML: refcyc_per_meta_chunk_flip_c = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_flip_c);
+ dml_print("DML: dst_y_per_pte_row_nom_l = 0x%x\n", dlg_regs->dst_y_per_pte_row_nom_l);
+ dml_print("DML: dst_y_per_pte_row_nom_c = 0x%x\n", dlg_regs->dst_y_per_pte_row_nom_c);
+ dml_print("DML: refcyc_per_pte_group_nom_l = 0x%x\n", dlg_regs->refcyc_per_pte_group_nom_l);
+ dml_print("DML: refcyc_per_pte_group_nom_c = 0x%x\n", dlg_regs->refcyc_per_pte_group_nom_c);
+ dml_print("DML: dst_y_per_meta_row_nom_l = 0x%x\n", dlg_regs->dst_y_per_meta_row_nom_l);
+ dml_print("DML: dst_y_per_meta_row_nom_c = 0x%x\n", dlg_regs->dst_y_per_meta_row_nom_c);
+ dml_print("DML: refcyc_per_meta_chunk_nom_l = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_nom_l);
+ dml_print("DML: refcyc_per_meta_chunk_nom_c = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_nom_c);
+ dml_print("DML: refcyc_per_line_delivery_pre_l = 0x%x\n", dlg_regs->refcyc_per_line_delivery_pre_l);
+ dml_print("DML: refcyc_per_line_delivery_pre_c = 0x%x\n", dlg_regs->refcyc_per_line_delivery_pre_c);
+ dml_print("DML: refcyc_per_line_delivery_l = 0x%x\n", dlg_regs->refcyc_per_line_delivery_l);
+ dml_print("DML: refcyc_per_line_delivery_c = 0x%x\n", dlg_regs->refcyc_per_line_delivery_c);
+ dml_print("DML: refcyc_per_vm_group_vblank = 0x%x\n", dlg_regs->refcyc_per_vm_group_vblank);
+ dml_print("DML: refcyc_per_vm_group_flip = 0x%x\n", dlg_regs->refcyc_per_vm_group_flip);
+ dml_print("DML: refcyc_per_vm_req_vblank = 0x%x\n", dlg_regs->refcyc_per_vm_req_vblank);
+ dml_print("DML: refcyc_per_vm_req_flip = 0x%x\n", dlg_regs->refcyc_per_vm_req_flip);
+ dml_print("DML: chunk_hdl_adjust_cur0 = 0x%x\n", dlg_regs->chunk_hdl_adjust_cur0);
+ dml_print("DML: dst_y_offset_cur1 = 0x%x\n", dlg_regs->dst_y_offset_cur1);
+ dml_print("DML: chunk_hdl_adjust_cur1 = 0x%x\n", dlg_regs->chunk_hdl_adjust_cur1);
+ dml_print("DML: vready_after_vcount0 = 0x%x\n", dlg_regs->vready_after_vcount0);
+ dml_print("DML: dst_y_delta_drq_limit = 0x%x\n", dlg_regs->dst_y_delta_drq_limit);
+ dml_print("DML: refcyc_per_vm_dmdata = 0x%x\n", dlg_regs->refcyc_per_vm_dmdata);
+ dml_print("DML: ===================================== \n");
+}
+
+void dml_print_ttu_regs_st(const dml_display_ttu_regs_st *ttu_regs)
+{
+ dml_print("DML: ===================================== \n");
+ dml_print("DML: DISPLAY_TTU_REGS_ST \n");
+ dml_print("DML: qos_level_low_wm = 0x%x\n", ttu_regs->qos_level_low_wm);
+ dml_print("DML: qos_level_high_wm = 0x%x\n", ttu_regs->qos_level_high_wm);
+ dml_print("DML: min_ttu_vblank = 0x%x\n", ttu_regs->min_ttu_vblank);
+ dml_print("DML: qos_level_flip = 0x%x\n", ttu_regs->qos_level_flip);
+ dml_print("DML: refcyc_per_req_delivery_pre_l = 0x%x\n", ttu_regs->refcyc_per_req_delivery_pre_l);
+ dml_print("DML: refcyc_per_req_delivery_l = 0x%x\n", ttu_regs->refcyc_per_req_delivery_l);
+ dml_print("DML: refcyc_per_req_delivery_pre_c = 0x%x\n", ttu_regs->refcyc_per_req_delivery_pre_c);
+ dml_print("DML: refcyc_per_req_delivery_c = 0x%x\n", ttu_regs->refcyc_per_req_delivery_c);
+ dml_print("DML: refcyc_per_req_delivery_cur0 = 0x%x\n", ttu_regs->refcyc_per_req_delivery_cur0);
+ dml_print("DML: refcyc_per_req_delivery_pre_cur0 = 0x%x\n", ttu_regs->refcyc_per_req_delivery_pre_cur0);
+ dml_print("DML: refcyc_per_req_delivery_cur1 = 0x%x\n", ttu_regs->refcyc_per_req_delivery_cur1);
+ dml_print("DML: refcyc_per_req_delivery_pre_cur1 = 0x%x\n", ttu_regs->refcyc_per_req_delivery_pre_cur1);
+ dml_print("DML: qos_level_fixed_l = 0x%x\n", ttu_regs->qos_level_fixed_l);
+ dml_print("DML: qos_ramp_disable_l = 0x%x\n", ttu_regs->qos_ramp_disable_l);
+ dml_print("DML: qos_level_fixed_c = 0x%x\n", ttu_regs->qos_level_fixed_c);
+ dml_print("DML: qos_ramp_disable_c = 0x%x\n", ttu_regs->qos_ramp_disable_c);
+ dml_print("DML: qos_level_fixed_cur0 = 0x%x\n", ttu_regs->qos_level_fixed_cur0);
+ dml_print("DML: qos_ramp_disable_cur0 = 0x%x\n", ttu_regs->qos_ramp_disable_cur0);
+ dml_print("DML: qos_level_fixed_cur1 = 0x%x\n", ttu_regs->qos_level_fixed_cur1);
+ dml_print("DML: qos_ramp_disable_cur1 = 0x%x\n", ttu_regs->qos_ramp_disable_cur1);
+ dml_print("DML: ===================================== \n");
+}
+
+void dml_print_dml_policy(const struct dml_mode_eval_policy_st *policy)
+{
+ dml_print("DML: ===================================== \n");
+ dml_print("DML: DML_MODE_EVAL_POLICY_ST\n");
+ dml_print("DML: Policy: UseUnboundedRequesting = 0x%x\n", policy->UseUnboundedRequesting);
+ dml_print("DML: Policy: UseMinimumRequiredDCFCLK = 0x%x\n", policy->UseMinimumRequiredDCFCLK);
+ dml_print("DML: Policy: DRAMClockChangeRequirementFinal = 0x%x\n", policy->DRAMClockChangeRequirementFinal);
+ dml_print("DML: Policy: FCLKChangeRequirementFinal = 0x%x\n", policy->FCLKChangeRequirementFinal);
+ dml_print("DML: Policy: USRRetrainingRequiredFinal = 0x%x\n", policy->USRRetrainingRequiredFinal);
+ dml_print("DML: Policy: EnhancedPrefetchScheduleAccelerationFinal = 0x%x\n", policy->EnhancedPrefetchScheduleAccelerationFinal);
+ dml_print("DML: Policy: NomDETInKByteOverrideEnable = 0x%x\n", policy->NomDETInKByteOverrideEnable);
+ dml_print("DML: Policy: NomDETInKByteOverrideValue = 0x%x\n", policy->NomDETInKByteOverrideValue);
+ dml_print("DML: Policy: DCCProgrammingAssumesScanDirectionUnknownFinal = 0x%x\n", policy->DCCProgrammingAssumesScanDirectionUnknownFinal);
+ dml_print("DML: Policy: SynchronizeTimingsFinal = 0x%x\n", policy->SynchronizeTimingsFinal);
+ dml_print("DML: Policy: SynchronizeDRRDisplaysForUCLKPStateChangeFinal = 0x%x\n", policy->SynchronizeDRRDisplaysForUCLKPStateChangeFinal);
+ dml_print("DML: Policy: AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported = 0x%x\n", policy->AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported);
+ dml_print("DML: Policy: AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported = 0x%x\n", policy->AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported);
+
+ for (dml_uint_t i = 0; i < DCN_DML__NUM_PLANE; i++) {
+ dml_print("DML: i=%0d, Policy: MPCCombineUse = 0x%x\n", i, policy->MPCCombineUse[i]);
+ dml_print("DML: i=%0d, Policy: ODMUse = 0x%x\n", i, policy->ODMUse[i]);
+ dml_print("DML: i=%0d, Policy: ImmediateFlipRequirement = 0x%x\n", i, policy->ImmediateFlipRequirement[i]);
+ dml_print("DML: i=%0d, Policy: AllowForPStateChangeOrStutterInVBlank = 0x%x\n", i, policy->AllowForPStateChangeOrStutterInVBlank[i]);
+ }
+ dml_print("DML: ===================================== \n");
+}
+
+void dml_print_mode_support(struct display_mode_lib_st *mode_lib, dml_uint_t j)
+{
+ dml_print("DML: MODE SUPPORT: ===============================================\n");
+ dml_print("DML: MODE SUPPORT: Voltage State %d\n", j);
+ dml_print("DML: MODE SUPPORT: Mode Supported : %s\n", mode_lib->ms.support.ModeSupport[j] == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Scale Ratio And Taps : %s\n", mode_lib->ms.support.ScaleRatioAndTapsSupport == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Source Format Pixel And Scan : %s\n", mode_lib->ms.support.SourceFormatPixelAndScanSupport == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Viewport Size : %s\n", mode_lib->ms.support.ViewportSizeSupport[j] == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Link Rate Does Not Match DP Version : %s\n", mode_lib->ms.support.LinkRateDoesNotMatchDPVersion == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Link Rate For Multistream Not Indicated : %s\n", mode_lib->ms.support.LinkRateForMultistreamNotIndicated == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: BPP For Multi stream Not Indicated : %s\n", mode_lib->ms.support.BPPForMultistreamNotIndicated == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Multistream With HDMI Or eDP : %s\n", mode_lib->ms.support.MultistreamWithHDMIOreDP == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Exceeded Multistream Slots : %s\n", mode_lib->ms.support.ExceededMultistreamSlots == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: MSO Or ODM Split With Non DP Link : %s\n", mode_lib->ms.support.MSOOrODMSplitWithNonDPLink == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Not Enough Lanes For MSO : %s\n", mode_lib->ms.support.NotEnoughLanesForMSO == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: LinkCapacitySupport : %s\n", mode_lib->ms.support.LinkCapacitySupport == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: P2IWith420 : %s\n", mode_lib->ms.support.P2IWith420 == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: DSCOnlyIfNecessaryWithBPP : %s\n", mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: DSC422NativeNotSupported : %s\n", mode_lib->ms.support.DSC422NativeNotSupported == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: MPCCombineMethodIncompatible : %s\n", mode_lib->ms.support.MPCCombineMethodIncompatible == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: ODMCombineTwoToOneSupportCheckOK : %s\n", mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: ODMCombineFourToOneSupportCheckOK : %s\n", mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: NotEnoughDSCUnits : %s\n", mode_lib->ms.support.NotEnoughDSCUnits == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: NotEnoughDSCSlices : %s\n", mode_lib->ms.support.NotEnoughDSCSlices == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe : %s\n", mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: InvalidCombinationOfMALLUseForPStateAndStaticScreen : %s\n", mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: DSCCLKRequiredMoreThanSupported : %s\n", mode_lib->ms.support.DSCCLKRequiredMoreThanSupported == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: PixelsPerLinePerDSCUnitSupport : %s\n", mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: DTBCLKRequiredMoreThanSupported : %s\n", mode_lib->ms.support.DTBCLKRequiredMoreThanSupported == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: InvalidCombinationOfMALLUseForPState : %s\n", mode_lib->ms.support.InvalidCombinationOfMALLUseForPState == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified : %s\n", mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: ROB Support : %s\n", mode_lib->ms.support.ROBSupport[j] == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: DISPCLK DPPCLK Support : %s\n", mode_lib->ms.support.DISPCLK_DPPCLK_Support[j] == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Total Available Pipes Support : %s\n", mode_lib->ms.support.TotalAvailablePipesSupport[j] == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Number Of OTG Support : %s\n", mode_lib->ms.support.NumberOfOTGSupport == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Number Of DP2p0 Support : %s\n", mode_lib->ms.support.NumberOfDP2p0Support == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Writeback Latency Support : %s\n", mode_lib->ms.support.WritebackLatencySupport == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Writeback Scale Ratio And Taps Support : %s\n", mode_lib->ms.support.WritebackScaleRatioAndTapsSupport == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Cursor Support : %s\n", mode_lib->ms.support.CursorSupport == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Pitch Support : %s\n", mode_lib->ms.support.PitchSupport == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Viewport Exceeds Surface : %s\n", mode_lib->ms.support.ViewportExceedsSurface == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Prefetch Supported : %s\n", mode_lib->ms.support.PrefetchSupported[j] == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: VActive Bandwith Support : %s\n", mode_lib->ms.support.VActiveBandwithSupport[j] == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Dynamic Metadata Supported : %s\n", mode_lib->ms.support.DynamicMetadataSupported[j] == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Total Vertical Active Bandwidth Support : %s\n", mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: VRatio In Prefetch Supported : %s\n", mode_lib->ms.support.VRatioInPrefetchSupported[j] == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: PTE Buffer Size Not Exceeded : %s\n", mode_lib->ms.support.PTEBufferSizeNotExceeded[j] == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: DCC Meta Buffer Size Not Exceeded : %s\n", mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Non supported DSC Input BPC : %s\n", mode_lib->ms.support.NonsupportedDSCInputBPC == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Exceeded MALL Size : %s\n", mode_lib->ms.support.ExceededMALLSize == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Host VM or Immediate Flip Supported : %s\n", ((mode_lib->ms.cache_display_cfg.plane.HostVMEnable == false && !mode_lib->scratch.dml_core_mode_support_locals.ImmediateFlipRequiredFinal) || mode_lib->ms.support.ImmediateFlipSupportedForState[j]) ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: dram clock change support : %s\n", mode_lib->scratch.dml_core_mode_support_locals.dram_clock_change_support == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: f_clock change support : %s\n", mode_lib->scratch.dml_core_mode_support_locals.f_clock_change_support == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: USR Retraining Support : %s\n", (!mode_lib->ms.policy.USRRetrainingRequiredFinal || &mode_lib->ms.support.USRRetrainingSupport[j]) ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: ===============================================\n");
+}
+
+void dml_print_dml_mode_support_info(const struct dml_mode_support_info_st *support, dml_bool_t fail_only)
+{
+ dml_print("DML: ===================================== \n");
+ dml_print("DML: DML_MODE_SUPPORT_INFO_ST\n");
+ if (!fail_only || support->ModeIsSupported == 0)
+ dml_print("DML: support: ModeIsSupported = 0x%x\n", support->ModeIsSupported);
+ if (!fail_only || support->ImmediateFlipSupport == 0)
+ dml_print("DML: support: ImmediateFlipSupport = 0x%x\n", support->ImmediateFlipSupport);
+ if (!fail_only || support->WritebackLatencySupport == 0)
+ dml_print("DML: support: WritebackLatencySupport = 0x%x\n", support->WritebackLatencySupport);
+ if (!fail_only || support->ScaleRatioAndTapsSupport == 0)
+ dml_print("DML: support: ScaleRatioAndTapsSupport = 0x%x\n", support->ScaleRatioAndTapsSupport);
+ if (!fail_only || support->SourceFormatPixelAndScanSupport == 0)
+ dml_print("DML: support: SourceFormatPixelAndScanSupport = 0x%x\n", support->SourceFormatPixelAndScanSupport);
+ if (!fail_only || support->MPCCombineMethodIncompatible == 1)
+ dml_print("DML: support: MPCCombineMethodIncompatible = 0x%x\n", support->MPCCombineMethodIncompatible);
+ if (!fail_only || support->P2IWith420 == 1)
+ dml_print("DML: support: P2IWith420 = 0x%x\n", support->P2IWith420);
+ if (!fail_only || support->DSCOnlyIfNecessaryWithBPP == 1)
+ dml_print("DML: support: DSCOnlyIfNecessaryWithBPP = 0x%x\n", support->DSCOnlyIfNecessaryWithBPP);
+ if (!fail_only || support->DSC422NativeNotSupported == 1)
+ dml_print("DML: support: DSC422NativeNotSupported = 0x%x\n", support->DSC422NativeNotSupported);
+ if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1)
+ dml_print("DML: support: LinkRateDoesNotMatchDPVersion = 0x%x\n", support->LinkRateDoesNotMatchDPVersion);
+ if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1)
+ dml_print("DML: support: LinkRateForMultistreamNotIndicated = 0x%x\n", support->LinkRateForMultistreamNotIndicated);
+ if (!fail_only || support->BPPForMultistreamNotIndicated == 1)
+ dml_print("DML: support: BPPForMultistreamNotIndicated = 0x%x\n", support->BPPForMultistreamNotIndicated);
+ if (!fail_only || support->MultistreamWithHDMIOreDP == 1)
+ dml_print("DML: support: MultistreamWithHDMIOreDP = 0x%x\n", support->MultistreamWithHDMIOreDP);
+ if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1)
+ dml_print("DML: support: MSOOrODMSplitWithNonDPLink = 0x%x\n", support->MSOOrODMSplitWithNonDPLink);
+ if (!fail_only || support->NotEnoughLanesForMSO == 1)
+ dml_print("DML: support: NotEnoughLanesForMSO = 0x%x\n", support->NotEnoughLanesForMSO);
+ if (!fail_only || support->NumberOfOTGSupport == 0)
+ dml_print("DML: support: NumberOfOTGSupport = 0x%x\n", support->NumberOfOTGSupport);
+ if (!fail_only || support->NumberOfDP2p0Support == 0)
+ dml_print("DML: support: NumberOfDP2p0Support = 0x%x\n", support->NumberOfDP2p0Support);
+ if (!fail_only || support->NonsupportedDSCInputBPC == 1)
+ dml_print("DML: support: NonsupportedDSCInputBPC = 0x%x\n", support->NonsupportedDSCInputBPC);
+ if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0)
+ dml_print("DML: support: WritebackScaleRatioAndTapsSupport = 0x%x\n", support->WritebackScaleRatioAndTapsSupport);
+ if (!fail_only || support->CursorSupport == 0)
+ dml_print("DML: support: CursorSupport = 0x%x\n", support->CursorSupport);
+ if (!fail_only || support->PitchSupport == 0)
+ dml_print("DML: support: PitchSupport = 0x%x\n", support->PitchSupport);
+ if (!fail_only || support->ViewportExceedsSurface == 1)
+ dml_print("DML: support: ViewportExceedsSurface = 0x%x\n", support->ViewportExceedsSurface);
+ if (!fail_only || support->ExceededMALLSize == 1)
+ dml_print("DML: support: ExceededMALLSize = 0x%x\n", support->ExceededMALLSize);
+ if (!fail_only || support->EnoughWritebackUnits == 0)
+ dml_print("DML: support: EnoughWritebackUnits = 0x%x\n", support->EnoughWritebackUnits);
+ if (!fail_only || support->ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified == 1)
+ dml_print("DML: support: ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = 0x%x\n", support->ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified);
+ if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1)
+ dml_print("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = 0x%x\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe);
+ if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1)
+ dml_print("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = 0x%x\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen);
+ if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1)
+ dml_print("DML: support: InvalidCombinationOfMALLUseForPState = 0x%x\n", support->InvalidCombinationOfMALLUseForPState);
+
+ if (!fail_only || support->ExceededMultistreamSlots == 1)
+ dml_print("DML: support: ExceededMultistreamSlots = 0x%x\n", support->ExceededMultistreamSlots);
+ if (!fail_only || support->ODMCombineTwoToOneSupportCheckOK == 0)
+ dml_print("DML: support: ODMCombineTwoToOneSupportCheckOK = 0x%x\n", support->ODMCombineTwoToOneSupportCheckOK);
+ if (!fail_only || support->ODMCombineFourToOneSupportCheckOK == 0)
+ dml_print("DML: support: ODMCombineFourToOneSupportCheckOK = 0x%x\n", support->ODMCombineFourToOneSupportCheckOK);
+ if (!fail_only || support->NotEnoughDSCUnits == 1)
+ dml_print("DML: support: NotEnoughDSCUnits = 0x%x\n", support->NotEnoughDSCUnits);
+ if (!fail_only || support->NotEnoughDSCSlices == 1)
+ dml_print("DML: support: NotEnoughDSCSlices = 0x%x\n", support->NotEnoughDSCSlices);
+ if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0)
+ dml_print("DML: support: PixelsPerLinePerDSCUnitSupport = 0x%x\n", support->PixelsPerLinePerDSCUnitSupport);
+ if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1)
+ dml_print("DML: support: DSCCLKRequiredMoreThanSupported = 0x%x\n", support->DSCCLKRequiredMoreThanSupported);
+ if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1)
+ dml_print("DML: support: DTBCLKRequiredMoreThanSupported = 0x%x\n", support->DTBCLKRequiredMoreThanSupported);
+ if (!fail_only || support->LinkCapacitySupport == 0)
+ dml_print("DML: support: LinkCapacitySupport = 0x%x\n", support->LinkCapacitySupport);
+
+ for (dml_uint_t j = 0; j < 2; j++) {
+ if (!fail_only || support->DRAMClockChangeSupport[j] == dml_dram_clock_change_unsupported)
+ dml_print("DML: support: combine=%d, DRAMClockChangeSupport = %d\n", j, support->DRAMClockChangeSupport[j]);
+ if (!fail_only || support->FCLKChangeSupport[j] == dml_fclock_change_unsupported)
+ dml_print("DML: support: combine=%d, FCLKChangeSupport = %d\n", j, support->FCLKChangeSupport[j]);
+ if (!fail_only || support->ROBSupport[j] == 0)
+ dml_print("DML: support: combine=%d, ROBSupport = %d\n", j, support->ROBSupport[j]);
+ if (!fail_only || support->PTEBufferSizeNotExceeded[j] == 0)
+ dml_print("DML: support: combine=%d, PTEBufferSizeNotExceeded = %d\n", j, support->PTEBufferSizeNotExceeded[j]);
+ if (!fail_only || support->DCCMetaBufferSizeNotExceeded[j] == 0)
+ dml_print("DML: support: combine=%d, DCCMetaBufferSizeNotExceeded = %d\n", j, support->DCCMetaBufferSizeNotExceeded[j]);
+ if (!fail_only || support->TotalVerticalActiveBandwidthSupport[j] == 0)
+ dml_print("DML: support: combine=%d, TotalVerticalActiveBandwidthSupport = %d\n", j, support->TotalVerticalActiveBandwidthSupport[j]);
+ if (!fail_only || support->USRRetrainingSupport[j] == 0)
+ dml_print("DML: support: combine=%d, USRRetrainingSupport = %d\n", j, support->USRRetrainingSupport[j]);
+ if (!fail_only || support->VActiveBandwithSupport[j] == 0)
+ dml_print("DML: support: combine=%d, VActiveBandwithSupport = %d\n", j, support->VActiveBandwithSupport[j]);
+ if (!fail_only || support->PrefetchSupported[j] == 0)
+ dml_print("DML: support: combine=%d, PrefetchSupported = %d\n", j, support->PrefetchSupported[j]);
+ if (!fail_only || support->DynamicMetadataSupported[j] == 0)
+ dml_print("DML: support: combine=%d, DynamicMetadataSupported = %d\n", j, support->DynamicMetadataSupported[j]);
+ if (!fail_only || support->VRatioInPrefetchSupported[j] == 0)
+ dml_print("DML: support: combine=%d, VRatioInPrefetchSupported = %d\n", j, support->VRatioInPrefetchSupported[j]);
+ if (!fail_only || support->DISPCLK_DPPCLK_Support[j] == 0)
+ dml_print("DML: support: combine=%d, DISPCLK_DPPCLK_Support = %d\n", j, support->DISPCLK_DPPCLK_Support[j]);
+ if (!fail_only || support->TotalAvailablePipesSupport[j] == 0)
+ dml_print("DML: support: combine=%d, TotalAvailablePipesSupport = %d\n", j, support->TotalAvailablePipesSupport[j]);
+ if (!fail_only || support->ModeSupport[j] == 0)
+ dml_print("DML: support: combine=%d, ModeSupport = %d\n", j, support->ModeSupport[j]);
+ if (!fail_only || support->ViewportSizeSupport[j] == 0)
+ dml_print("DML: support: combine=%d, ViewportSizeSupport = %d\n", j, support->ViewportSizeSupport[j]);
+ if (!fail_only || support->ImmediateFlipSupportedForState[j] == 0)
+ dml_print("DML: support: combine=%d, ImmediateFlipSupportedForState = %d\n", j, support->ImmediateFlipSupportedForState[j]);
+ }
+}
+
+void dml_print_dml_display_cfg_timing(const struct dml_timing_cfg_st *timing, dml_uint_t num_plane)
+{
+ for (dml_uint_t i = 0; i < num_plane; i++) {
+ dml_print("DML: timing_cfg: plane=%d, HTotal = %d\n", i, timing->HTotal[i]);
+ dml_print("DML: timing_cfg: plane=%d, VTotal = %d\n", i, timing->VTotal[i]);
+ dml_print("DML: timing_cfg: plane=%d, HActive = %d\n", i, timing->HActive[i]);
+ dml_print("DML: timing_cfg: plane=%d, VActive = %d\n", i, timing->VActive[i]);
+ dml_print("DML: timing_cfg: plane=%d, VFrontPorch = %d\n", i, timing->VFrontPorch[i]);
+ dml_print("DML: timing_cfg: plane=%d, VBlankNom = %d\n", i, timing->VBlankNom[i]);
+ dml_print("DML: timing_cfg: plane=%d, RefreshRate = %d\n", i, timing->RefreshRate[i]);
+ dml_print("DML: timing_cfg: plane=%d, PixelClock = %f\n", i, timing->PixelClock[i]);
+ dml_print("DML: timing_cfg: plane=%d, Interlace = %d\n", i, timing->Interlace[i]);
+ dml_print("DML: timing_cfg: plane=%d, DRRDisplay = %d\n", i, timing->DRRDisplay[i]);
+ }
+}
+
+void dml_print_dml_display_cfg_plane(const struct dml_plane_cfg_st *plane, dml_uint_t num_plane)
+{
+ dml_print("DML: plane_cfg: num_plane = %d\n", num_plane);
+ dml_print("DML: plane_cfg: GPUVMEnable = %d\n", plane->GPUVMEnable);
+ dml_print("DML: plane_cfg: HostVMEnable = %d\n", plane->HostVMEnable);
+ dml_print("DML: plane_cfg: GPUVMMaxPageTableLevels = %d\n", plane->GPUVMMaxPageTableLevels);
+ dml_print("DML: plane_cfg: HostVMMaxPageTableLevels = %d\n", plane->HostVMMaxPageTableLevels);
+
+ for (dml_uint_t i = 0; i < num_plane; i++) {
+ dml_print("DML: plane_cfg: plane=%d, GPUVMMinPageSizeKBytes = %d\n", i, plane->GPUVMMinPageSizeKBytes[i]);
+ dml_print("DML: plane_cfg: plane=%d, ForceOneRowForFrame = %d\n", i, plane->ForceOneRowForFrame[i]);
+ dml_print("DML: plane_cfg: plane=%d, PTEBufferModeOverrideEn = %d\n", i, plane->PTEBufferModeOverrideEn[i]);
+ dml_print("DML: plane_cfg: plane=%d, PTEBufferMode = %d\n", i, plane->PTEBufferMode[i]);
+ dml_print("DML: plane_cfg: plane=%d, DETSizeOverride = %d\n", i, plane->DETSizeOverride[i]);
+ dml_print("DML: plane_cfg: plane=%d, UseMALLForStaticScreen = %d\n", i, plane->UseMALLForStaticScreen[i]);
+ dml_print("DML: plane_cfg: plane=%d, UseMALLForPStateChange = %d\n", i, plane->UseMALLForPStateChange[i]);
+ dml_print("DML: plane_cfg: plane=%d, BlendingAndTiming = %d\n", i, plane->BlendingAndTiming[i]);
+ dml_print("DML: plane_cfg: plane=%d, ViewportWidth = %d\n", i, plane->ViewportWidth[i]);
+ dml_print("DML: plane_cfg: plane=%d, ViewportHeight = %d\n", i, plane->ViewportHeight[i]);
+ dml_print("DML: plane_cfg: plane=%d, ViewportWidthChroma = %d\n", i, plane->ViewportWidthChroma[i]);
+ dml_print("DML: plane_cfg: plane=%d, ViewportHeightChroma = %d\n", i, plane->ViewportHeightChroma[i]);
+ dml_print("DML: plane_cfg: plane=%d, ViewportXStart = %d\n", i, plane->ViewportXStart[i]);
+ dml_print("DML: plane_cfg: plane=%d, ViewportXStartC = %d\n", i, plane->ViewportXStartC[i]);
+ dml_print("DML: plane_cfg: plane=%d, ViewportYStart = %d\n", i, plane->ViewportYStart[i]);
+ dml_print("DML: plane_cfg: plane=%d, ViewportYStartC = %d\n", i, plane->ViewportYStartC[i]);
+ dml_print("DML: plane_cfg: plane=%d, ViewportStationary = %d\n", i, plane->ViewportStationary[i]);
+ dml_print("DML: plane_cfg: plane=%d, ScalerEnabled = %d\n", i, plane->ScalerEnabled[i]);
+ dml_print("DML: plane_cfg: plane=%d, HRatio = %3.2f\n", i, plane->HRatio[i]);
+ dml_print("DML: plane_cfg: plane=%d, VRatio = %3.2f\n", i, plane->VRatio[i]);
+ dml_print("DML: plane_cfg: plane=%d, HRatioChroma = %3.2f\n", i, plane->HRatioChroma[i]);
+ dml_print("DML: plane_cfg: plane=%d, VRatioChroma = %3.2f\n", i, plane->VRatioChroma[i]);
+ dml_print("DML: plane_cfg: plane=%d, HTaps = %d\n", i, plane->HTaps[i]);
+ dml_print("DML: plane_cfg: plane=%d, VTaps = %d\n", i, plane->VTaps[i]);
+ dml_print("DML: plane_cfg: plane=%d, HTapsChroma = %d\n", i, plane->HTapsChroma[i]);
+ dml_print("DML: plane_cfg: plane=%d, VTapsChroma = %d\n", i, plane->VTapsChroma[i]);
+ dml_print("DML: plane_cfg: plane=%d, LBBitPerPixel = %d\n", i, plane->LBBitPerPixel[i]);
+ dml_print("DML: plane_cfg: plane=%d, SourceScan = %d\n", i, plane->SourceScan[i]);
+ dml_print("DML: plane_cfg: plane=%d, ScalerRecoutWidth = %d\n", i, plane->ScalerRecoutWidth[i]);
+ dml_print("DML: plane_cfg: plane=%d, NumberOfCursors = %d\n", i, plane->NumberOfCursors[i]);
+ dml_print("DML: plane_cfg: plane=%d, CursorWidth = %d\n", i, plane->CursorWidth[i]);
+ dml_print("DML: plane_cfg: plane=%d, CursorBPP = %d\n", i, plane->CursorBPP[i]);
+
+ dml_print("DML: plane_cfg: plane=%d, DynamicMetadataEnable = %d\n", i, plane->DynamicMetadataEnable[i]);
+ dml_print("DML: plane_cfg: plane=%d, DynamicMetadataLinesBeforeActiveRequired = %d\n", i, plane->DynamicMetadataLinesBeforeActiveRequired[i]);
+ dml_print("DML: plane_cfg: plane=%d, DynamicMetadataTransmittedBytes = %d\n", i, plane->DynamicMetadataTransmittedBytes[i]);
+ }
+}
+
+void dml_print_dml_display_cfg_surface(const struct dml_surface_cfg_st *surface, dml_uint_t num_plane)
+{
+ for (dml_uint_t i = 0; i < num_plane; i++) {
+ dml_print("DML: surface_cfg: plane=%d, PitchY = %d\n", i, surface->PitchY[i]);
+ dml_print("DML: surface_cfg: plane=%d, SurfaceWidthY = %d\n", i, surface->SurfaceWidthY[i]);
+ dml_print("DML: surface_cfg: plane=%d, SurfaceHeightY = %d\n", i, surface->SurfaceHeightY[i]);
+ dml_print("DML: surface_cfg: plane=%d, PitchC = %d\n", i, surface->PitchC[i]);
+ dml_print("DML: surface_cfg: plane=%d, SurfaceWidthC = %d\n", i, surface->SurfaceWidthC[i]);
+ dml_print("DML: surface_cfg: plane=%d, SurfaceHeightC = %d\n", i, surface->SurfaceHeightC[i]);
+ dml_print("DML: surface_cfg: plane=%d, DCCEnable = %d\n", i, surface->DCCEnable[i]);
+ dml_print("DML: surface_cfg: plane=%d, DCCMetaPitchY = %d\n", i, surface->DCCMetaPitchY[i]);
+ dml_print("DML: surface_cfg: plane=%d, DCCMetaPitchC = %d\n", i, surface->DCCMetaPitchC[i]);
+ dml_print("DML: surface_cfg: plane=%d, DCCRateLuma = %f\n", i, surface->DCCRateLuma[i]);
+ dml_print("DML: surface_cfg: plane=%d, DCCRateChroma = %f\n", i, surface->DCCRateChroma[i]);
+ dml_print("DML: surface_cfg: plane=%d, DCCFractionOfZeroSizeRequestsLuma = %f\n", i, surface->DCCFractionOfZeroSizeRequestsLuma[i]);
+ dml_print("DML: surface_cfg: plane=%d, DCCFractionOfZeroSizeRequestsChroma= %f\n", i, surface->DCCFractionOfZeroSizeRequestsChroma[i]);
+ }
+}
+
+void dml_print_dml_display_cfg_hw_resource(const struct dml_hw_resource_st *hw, dml_uint_t num_plane)
+{
+ for (dml_uint_t i = 0; i < num_plane; i++) {
+ dml_print("DML: hw_resource: plane=%d, ODMMode = %d\n", i, hw->ODMMode[i]);
+ dml_print("DML: hw_resource: plane=%d, DPPPerSurface = %d\n", i, hw->DPPPerSurface[i]);
+ dml_print("DML: hw_resource: plane=%d, DSCEnabled = %d\n", i, hw->DSCEnabled[i]);
+ dml_print("DML: hw_resource: plane=%d, NumberOfDSCSlices = %d\n", i, hw->NumberOfDSCSlices[i]);
+ }
+ dml_print("DML: hw_resource: DLGRefClkFreqMHz = %f\n", hw->DLGRefClkFreqMHz);
+}
+
+__DML_DLL_EXPORT__ void dml_print_soc_state_bounding_box(const struct soc_state_bounding_box_st *state)
+{
+ dml_print("DML: state_bbox: socclk_mhz = %f\n", state->socclk_mhz);
+ dml_print("DML: state_bbox: dscclk_mhz = %f\n", state->dscclk_mhz);
+ dml_print("DML: state_bbox: phyclk_mhz = %f\n", state->phyclk_mhz);
+ dml_print("DML: state_bbox: phyclk_d18_mhz = %f\n", state->phyclk_d18_mhz);
+ dml_print("DML: state_bbox: phyclk_d32_mhz = %f\n", state->phyclk_d32_mhz);
+ dml_print("DML: state_bbox: dtbclk_mhz = %f\n", state->dtbclk_mhz);
+ dml_print("DML: state_bbox: dispclk_mhz = %f\n", state->dispclk_mhz);
+ dml_print("DML: state_bbox: dppclk_mhz = %f\n", state->dppclk_mhz);
+ dml_print("DML: state_bbox: fabricclk_mhz = %f\n", state->fabricclk_mhz);
+ dml_print("DML: state_bbox: dcfclk_mhz = %f\n", state->dcfclk_mhz);
+ dml_print("DML: state_bbox: dram_speed_mts = %f\n", state->dram_speed_mts);
+ dml_print("DML: state_bbox: urgent_latency_pixel_data_only_us = %f\n", state->urgent_latency_pixel_data_only_us);
+ dml_print("DML: state_bbox: urgent_latency_pixel_mixed_with_vm_data_us = %f\n", state->urgent_latency_pixel_mixed_with_vm_data_us);
+ dml_print("DML: state_bbox: urgent_latency_vm_data_only_us = %f\n", state->urgent_latency_vm_data_only_us);
+ dml_print("DML: state_bbox: writeback_latency_us = %f\n", state->writeback_latency_us);
+ dml_print("DML: state_bbox: urgent_latency_adjustment_fabric_clock_component_us = %f\n", state->urgent_latency_adjustment_fabric_clock_component_us);
+ dml_print("DML: state_bbox: urgent_latency_adjustment_fabric_clock_reference_mhz= %f\n", state->urgent_latency_adjustment_fabric_clock_reference_mhz);
+ dml_print("DML: state_bbox: sr_exit_time_us = %f\n", state->sr_exit_time_us);
+ dml_print("DML: state_bbox: sr_enter_plus_exit_time_us = %f\n", state->sr_enter_plus_exit_time_us);
+ dml_print("DML: state_bbox: sr_exit_z8_time_us = %f\n", state->sr_exit_z8_time_us);
+ dml_print("DML: state_bbox: sr_enter_plus_exit_z8_time_us = %f\n", state->sr_enter_plus_exit_z8_time_us);
+ dml_print("DML: state_bbox: dram_clock_change_latency_us = %f\n", state->dram_clock_change_latency_us);
+ dml_print("DML: state_bbox: fclk_change_latency_us = %f\n", state->fclk_change_latency_us);
+ dml_print("DML: state_bbox: usr_retraining_latency_us = %f\n", state->usr_retraining_latency_us);
+ dml_print("DML: state_bbox: use_ideal_dram_bw_strobe = %d\n", state->use_ideal_dram_bw_strobe);
+}
+
+__DML_DLL_EXPORT__ void dml_print_soc_bounding_box(const struct soc_bounding_box_st *soc)
+{
+ dml_print("DML: soc_bbox: dprefclk_mhz = %f\n", soc->dprefclk_mhz);
+ dml_print("DML: soc_bbox: xtalclk_mhz = %f\n", soc->xtalclk_mhz);
+ dml_print("DML: soc_bbox: pcierefclk_mhz = %f\n", soc->pcierefclk_mhz);
+ dml_print("DML: soc_bbox: refclk_mhz = %f\n", soc->refclk_mhz);
+ dml_print("DML: soc_bbox: amclk_mhz = %f\n", soc->amclk_mhz);
+
+ dml_print("DML: soc_bbox: max_outstanding_reqs = %f\n", soc->max_outstanding_reqs);
+ dml_print("DML: soc_bbox: pct_ideal_sdp_bw_after_urgent = %f\n", soc->pct_ideal_sdp_bw_after_urgent);
+ dml_print("DML: soc_bbox: pct_ideal_fabric_bw_after_urgent = %f\n", soc->pct_ideal_fabric_bw_after_urgent);
+ dml_print("DML: soc_bbox: pct_ideal_dram_bw_after_urgent_pixel_only = %f\n", soc->pct_ideal_dram_bw_after_urgent_pixel_only);
+ dml_print("DML: soc_bbox: pct_ideal_dram_bw_after_urgent_pixel_and_vm = %f\n", soc->pct_ideal_dram_bw_after_urgent_pixel_and_vm);
+ dml_print("DML: soc_bbox: pct_ideal_dram_bw_after_urgent_vm_only = %f\n", soc->pct_ideal_dram_bw_after_urgent_vm_only);
+ dml_print("DML: soc_bbox: pct_ideal_dram_bw_after_urgent_strobe = %f\n", soc->pct_ideal_dram_bw_after_urgent_strobe);
+ dml_print("DML: soc_bbox: max_avg_sdp_bw_use_normal_percent = %f\n", soc->max_avg_sdp_bw_use_normal_percent);
+ dml_print("DML: soc_bbox: max_avg_fabric_bw_use_normal_percent = %f\n", soc->max_avg_fabric_bw_use_normal_percent);
+ dml_print("DML: soc_bbox: max_avg_dram_bw_use_normal_percent = %f\n", soc->max_avg_dram_bw_use_normal_percent);
+ dml_print("DML: soc_bbox: max_avg_dram_bw_use_normal_strobe_percent = %f\n", soc->max_avg_dram_bw_use_normal_strobe_percent);
+ dml_print("DML: soc_bbox: round_trip_ping_latency_dcfclk_cycles = %d\n", soc->round_trip_ping_latency_dcfclk_cycles);
+ dml_print("DML: soc_bbox: urgent_out_of_order_return_per_channel_pixel_only_bytes = %d\n", soc->urgent_out_of_order_return_per_channel_pixel_only_bytes);
+ dml_print("DML: soc_bbox: urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = %d\n", soc->urgent_out_of_order_return_per_channel_pixel_and_vm_bytes);
+ dml_print("DML: soc_bbox: urgent_out_of_order_return_per_channel_vm_only_bytes = %d\n", soc->urgent_out_of_order_return_per_channel_vm_only_bytes);
+ dml_print("DML: soc_bbox: num_chans = %d\n", soc->num_chans);
+ dml_print("DML: soc_bbox: return_bus_width_bytes = %d\n", soc->return_bus_width_bytes);
+ dml_print("DML: soc_bbox: dram_channel_width_bytes = %d\n", soc->dram_channel_width_bytes);
+ dml_print("DML: soc_bbox: fabric_datapath_to_dcn_data_return_bytes = %d\n", soc->fabric_datapath_to_dcn_data_return_bytes);
+ dml_print("DML: soc_bbox: hostvm_min_page_size_kbytes = %d\n", soc->hostvm_min_page_size_kbytes);
+ dml_print("DML: soc_bbox: gpuvm_min_page_size_kbytes = %d\n", soc->gpuvm_min_page_size_kbytes);
+ dml_print("DML: soc_bbox: phy_downspread_percent = %f\n", soc->phy_downspread_percent);
+ dml_print("DML: soc_bbox: dcn_downspread_percent = %f\n", soc->dcn_downspread_percent);
+ dml_print("DML: soc_bbox: smn_latency_us = %f\n", soc->smn_latency_us);
+ dml_print("DML: soc_bbox: mall_allocated_for_dcn_mbytes = %d\n", soc->mall_allocated_for_dcn_mbytes);
+ dml_print("DML: soc_bbox: dispclk_dppclk_vco_speed_mhz = %f\n", soc->dispclk_dppclk_vco_speed_mhz);
+ dml_print("DML: soc_bbox: do_urgent_latency_adjustment = %d\n", soc->do_urgent_latency_adjustment);
+}
+
+__DML_DLL_EXPORT__ void dml_print_clk_cfg(const struct dml_clk_cfg_st *clk_cfg)
+{
+ dml_print("DML: clk_cfg: 0-use_required, 1-use pipe.clks_cfg, 2-use state bbox\n");
+ dml_print("DML: clk_cfg: dcfclk_option = %d\n", clk_cfg->dcfclk_option);
+ dml_print("DML: clk_cfg: dispclk_option = %d\n", clk_cfg->dispclk_option);
+
+ dml_print("DML: clk_cfg: dcfclk_mhz = %f\n", clk_cfg->dcfclk_mhz);
+ dml_print("DML: clk_cfg: dispclk_mhz = %f\n", clk_cfg->dispclk_mhz);
+
+ for (dml_uint_t i = 0; i < DCN_DML__NUM_PLANE; i++) {
+ dml_print("DML: clk_cfg: i=%d, dppclk_option = %d\n", i, clk_cfg->dppclk_option[i]);
+ dml_print("DML: clk_cfg: i=%d, dppclk_mhz = %f\n", i, clk_cfg->dppclk_mhz[i]);
+ }
+}
+
+dml_bool_t dml_is_vertical_rotation(enum dml_rotation_angle Scan)
+{
+ dml_bool_t is_vert = false;
+ if (Scan == dml_rotation_90 || Scan == dml_rotation_90m || Scan == dml_rotation_270 || Scan == dml_rotation_270m) {
+ is_vert = true;
+ } else {
+ is_vert = false;
+ }
+ return is_vert;
+} // dml_is_vertical_rotation
+
+dml_uint_t dml_get_cursor_bit_per_pixel(enum dml_cursor_bpp ebpp)
+{
+ switch (ebpp) {
+ case dml_cur_2bit:
+ return 2;
+ case dml_cur_32bit:
+ return 32;
+ case dml_cur_64bit:
+ return 64;
+ default:
+ return 0;
+ }
+}
+
+/// @brief Determine the physical pipe to logical plane mapping using the display_cfg
+dml_uint_t dml_get_num_active_planes(const struct dml_display_cfg_st *display_cfg)
+{
+ dml_uint_t num_active_planes = 0;
+
+ for (dml_uint_t k = 0; k < __DML_NUM_PLANES__; k++) {
+ if (display_cfg->plane.ViewportWidth[k] > 0)
+ num_active_planes = num_active_planes + 1;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: num_active_planes = %d\n", __func__, num_active_planes);
+#endif
+ return num_active_planes;
+}
+
+/// @brief Determine the physical pipe to logical plane mapping using the display_cfg
+dml_uint_t dml_get_num_active_pipes(const struct dml_display_cfg_st *display_cfg)
+{
+ dml_uint_t num_active_pipes = 0;
+
+ for (dml_uint_t j = 0; j < dml_get_num_active_planes(display_cfg); j++) {
+ num_active_pipes = num_active_pipes + display_cfg->hw.DPPPerSurface[j];
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes);
+#endif
+ return num_active_pipes;
+}
+
+dml_uint_t dml_get_plane_idx(const struct display_mode_lib_st *mode_lib, dml_uint_t pipe_idx)
+{
+ dml_uint_t plane_idx = mode_lib->mp.pipe_plane[pipe_idx];
+ return plane_idx;
+}
+
+dml_uint_t dml_get_pipe_idx(const struct display_mode_lib_st *mode_lib, dml_uint_t plane_idx)
+{
+ dml_uint_t pipe_idx = 0;
+ dml_bool_t pipe_found = 0;
+
+ ASSERT(plane_idx < __DML_NUM_PLANES__);
+
+ for (dml_uint_t i = 0; i < __DML_NUM_PLANES__; i++) {
+ if (plane_idx == mode_lib->mp.pipe_plane[i]) {
+ pipe_idx = i;
+ pipe_found = 1;
+ break;
+ }
+ }
+ ASSERT(pipe_found != 0);
+
+ return pipe_idx;
+}
+
+void dml_calc_pipe_plane_mapping(const struct dml_hw_resource_st *hw, dml_uint_t *pipe_plane)
+{
+ dml_uint_t pipe_idx = 0;
+
+ for (dml_uint_t k = 0; k < __DML_NUM_PLANES__; ++k) {
+ pipe_plane[k] = __DML_PIPE_NO_PLANE__;
+ }
+
+ for (dml_uint_t plane_idx = 0; plane_idx < __DML_NUM_PLANES__; plane_idx++) {
+ for (dml_uint_t i = 0; i < hw->DPPPerSurface[plane_idx]; i++) {
+ pipe_plane[pipe_idx] = plane_idx;
+ pipe_idx++;
+ }
+ }
+}
+
+
diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_util.h b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_util.h
new file mode 100644
index 000000000000..a82b49cf7fb0
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_util.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DISPLAY_MODE_UTIL_H__
+#define __DISPLAY_MODE_UTIL_H__
+
+#include "display_mode_core_structs.h"
+#include "cmntypes.h"
+
+#include "dml_assert.h"
+#include "dml_logging.h"
+
+__DML_DLL_EXPORT__ dml_bool_t dml_util_is_420(enum dml_source_format_class source_format);
+__DML_DLL_EXPORT__ dml_float_t dml_ceil(dml_float_t x, dml_float_t granularity);
+__DML_DLL_EXPORT__ dml_float_t dml_floor(dml_float_t x, dml_float_t granularity);
+__DML_DLL_EXPORT__ dml_float_t dml_min(dml_float_t x, dml_float_t y);
+__DML_DLL_EXPORT__ dml_float_t dml_min3(dml_float_t x, dml_float_t y, dml_float_t z);
+__DML_DLL_EXPORT__ dml_float_t dml_min4(dml_float_t x, dml_float_t y, dml_float_t z, dml_float_t w);
+__DML_DLL_EXPORT__ dml_float_t dml_max(dml_float_t x, dml_float_t y);
+__DML_DLL_EXPORT__ dml_float_t dml_max3(dml_float_t x, dml_float_t y, dml_float_t z);
+__DML_DLL_EXPORT__ dml_float_t dml_max4(dml_float_t a, dml_float_t b, dml_float_t c, dml_float_t d);
+__DML_DLL_EXPORT__ dml_float_t dml_max5(dml_float_t a, dml_float_t b, dml_float_t c, dml_float_t d, dml_float_t e);
+__DML_DLL_EXPORT__ dml_float_t dml_log(dml_float_t x, dml_float_t base);
+__DML_DLL_EXPORT__ dml_float_t dml_log2(dml_float_t x);
+__DML_DLL_EXPORT__ dml_float_t dml_round(dml_float_t val, dml_bool_t bankers_rounding);
+__DML_DLL_EXPORT__ dml_float_t dml_pow(dml_float_t base, int exp);
+__DML_DLL_EXPORT__ dml_uint_t dml_round_to_multiple(dml_uint_t num, dml_uint_t multiple, dml_bool_t up);
+__DML_DLL_EXPORT__ dml_bool_t dml_is_vertical_rotation(enum dml_rotation_angle scan);
+__DML_DLL_EXPORT__ dml_uint_t dml_get_cursor_bit_per_pixel(enum dml_cursor_bpp ebpp);
+__DML_DLL_EXPORT__ void dml_print_data_rq_regs_st(const dml_display_plane_rq_regs_st *data_rq_regs);
+__DML_DLL_EXPORT__ void dml_print_rq_regs_st(const dml_display_rq_regs_st *rq_regs);
+__DML_DLL_EXPORT__ void dml_print_dlg_regs_st(const dml_display_dlg_regs_st *dlg_regs);
+__DML_DLL_EXPORT__ void dml_print_ttu_regs_st(const dml_display_ttu_regs_st *ttu_regs);
+__DML_DLL_EXPORT__ void dml_print_dml_policy(const struct dml_mode_eval_policy_st *policy);
+__DML_DLL_EXPORT__ void dml_print_mode_support(struct display_mode_lib_st *mode_lib, dml_uint_t j);
+__DML_DLL_EXPORT__ void dml_print_dml_mode_support_info(const struct dml_mode_support_info_st *support, dml_bool_t fail_only);
+__DML_DLL_EXPORT__ void dml_print_dml_display_cfg_timing(const struct dml_timing_cfg_st *timing, dml_uint_t num_plane);
+__DML_DLL_EXPORT__ void dml_print_dml_display_cfg_plane(const struct dml_plane_cfg_st *plane, dml_uint_t num_plane);
+__DML_DLL_EXPORT__ void dml_print_dml_display_cfg_surface(const struct dml_surface_cfg_st *surface, dml_uint_t num_plane);
+__DML_DLL_EXPORT__ void dml_print_dml_display_cfg_hw_resource(const struct dml_hw_resource_st *hw, dml_uint_t num_plane);
+__DML_DLL_EXPORT__ void dml_print_soc_state_bounding_box(const struct soc_state_bounding_box_st *state);
+__DML_DLL_EXPORT__ void dml_print_soc_bounding_box(const struct soc_bounding_box_st *soc);
+__DML_DLL_EXPORT__ void dml_print_clk_cfg(const struct dml_clk_cfg_st *clk_cfg);
+
+__DML_DLL_EXPORT__ dml_uint_t dml_get_num_active_planes(const struct dml_display_cfg_st *display_cfg);
+__DML_DLL_EXPORT__ dml_uint_t dml_get_num_active_pipes(const struct dml_display_cfg_st *display_cfg);
+__DML_DLL_EXPORT__ dml_uint_t dml_get_plane_idx(const struct display_mode_lib_st *mode_lib, dml_uint_t pipe_idx);
+__DML_DLL_EXPORT__ dml_uint_t dml_get_pipe_idx(const struct display_mode_lib_st *mode_lib, dml_uint_t plane_idx);
+__DML_DLL_EXPORT__ void dml_calc_pipe_plane_mapping(const struct dml_hw_resource_st *hw, dml_uint_t *pipe_plane);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.c
new file mode 100644
index 000000000000..bf5e7f4e0416
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.c
@@ -0,0 +1,929 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dml21_wrapper.h"
+#include "dml2_core_dcn4_calcs.h"
+#include "dml2_internal_shared_types.h"
+#include "dml2_internal_types.h"
+#include "dml21_utils.h"
+#include "dml21_translation_helper.h"
+#include "soc_and_ip_translator.h"
+
+static void dml21_populate_pmo_options(struct dml2_pmo_options *pmo_options,
+ const struct dc *in_dc,
+ const struct dml2_configuration_options *config)
+{
+ bool disable_fams2 = !in_dc->debug.fams2_config.bits.enable;
+
+ /* ODM options */
+ pmo_options->disable_dyn_odm = !config->minimize_dispclk_using_odm;
+ pmo_options->disable_dyn_odm_for_multi_stream = true;
+ pmo_options->disable_dyn_odm_for_stream_with_svp = true;
+
+ pmo_options->disable_vblank = ((in_dc->debug.dml21_disable_pstate_method_mask >> 1) & 1);
+
+ /* NOTE: DRR and SubVP Require FAMS2 */
+ pmo_options->disable_svp = ((in_dc->debug.dml21_disable_pstate_method_mask >> 2) & 1) ||
+ in_dc->debug.force_disable_subvp ||
+ disable_fams2;
+ pmo_options->disable_drr_clamped = ((in_dc->debug.dml21_disable_pstate_method_mask >> 3) & 1) ||
+ disable_fams2;
+ pmo_options->disable_drr_var = ((in_dc->debug.dml21_disable_pstate_method_mask >> 4) & 1) ||
+ disable_fams2;
+ pmo_options->disable_fams2 = disable_fams2;
+
+ pmo_options->disable_drr_var_when_var_active = in_dc->debug.disable_fams_gaming == INGAME_FAMS_DISABLE ||
+ in_dc->debug.disable_fams_gaming == INGAME_FAMS_MULTI_DISP_CLAMPED_ONLY;
+ pmo_options->disable_drr_clamped_when_var_active = in_dc->debug.disable_fams_gaming == INGAME_FAMS_DISABLE;
+}
+
+static enum dml2_project_id dml21_dcn_revision_to_dml2_project_id(enum dce_version dcn_version)
+{
+ enum dml2_project_id project_id;
+ switch (dcn_version) {
+ case DCN_VERSION_4_01:
+ project_id = dml2_project_dcn4x_stage2_auto_drr_svp;
+ break;
+ default:
+ project_id = dml2_project_invalid;
+ DC_ERR("unsupported dcn version for DML21!");
+ break;
+ }
+
+ return project_id;
+}
+
+void dml21_populate_dml_init_params(struct dml2_initialize_instance_in_out *dml_init,
+ const struct dml2_configuration_options *config,
+ const struct dc *in_dc)
+{
+ dml_init->options.project_id = dml21_dcn_revision_to_dml2_project_id(in_dc->ctx->dce_version);
+
+ if (config->use_native_soc_bb_construction) {
+ in_dc->soc_and_ip_translator->translator_funcs->get_soc_bb(&dml_init->soc_bb, in_dc, config);
+ in_dc->soc_and_ip_translator->translator_funcs->get_ip_caps(&dml_init->ip_caps);
+ } else {
+ dml_init->soc_bb = config->external_socbb_ip_params->soc_bb;
+ dml_init->ip_caps = config->external_socbb_ip_params->ip_params;
+ }
+
+ dml21_populate_pmo_options(&dml_init->options.pmo_options, in_dc, config);
+}
+
+static unsigned int calc_max_hardware_v_total(const struct dc_stream_state *stream)
+{
+ unsigned int max_hw_v_total = stream->ctx->dc->caps.max_v_total;
+
+ if (stream->ctx->dc->caps.vtotal_limited_by_fp2) {
+ max_hw_v_total -= stream->timing.v_front_porch + 1;
+ }
+
+ return max_hw_v_total;
+}
+
+static void populate_dml21_timing_config_from_stream_state(struct dml2_timing_cfg *timing,
+ struct dc_stream_state *stream,
+ struct pipe_ctx *pipe_ctx,
+ struct dml2_context *dml_ctx)
+{
+ unsigned int hblank_start, vblank_start, min_hardware_refresh_in_uhz;
+ uint32_t pix_clk_100hz;
+
+ timing->h_active = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right + pipe_ctx->dsc_padding_params.dsc_hactive_padding;
+ timing->v_active = stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top;
+ timing->h_front_porch = stream->timing.h_front_porch;
+ timing->v_front_porch = stream->timing.v_front_porch;
+ timing->pixel_clock_khz = stream->timing.pix_clk_100hz / 10;
+ if (pipe_ctx->dsc_padding_params.dsc_hactive_padding != 0)
+ timing->pixel_clock_khz = pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz / 10;
+ if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING)
+ timing->pixel_clock_khz *= 2;
+ timing->h_total = stream->timing.h_total + pipe_ctx->dsc_padding_params.dsc_htotal_padding;
+ timing->v_total = stream->timing.v_total;
+ timing->h_sync_width = stream->timing.h_sync_width;
+ timing->interlaced = stream->timing.flags.INTERLACE;
+
+ hblank_start = stream->timing.h_total - stream->timing.h_front_porch;
+
+ timing->h_blank_end = hblank_start - stream->timing.h_addressable - pipe_ctx->dsc_padding_params.dsc_hactive_padding
+ - stream->timing.h_border_left - stream->timing.h_border_right;
+
+ if (hblank_start < stream->timing.h_addressable)
+ timing->h_blank_end = 0;
+
+ vblank_start = stream->timing.v_total - stream->timing.v_front_porch;
+
+ timing->v_blank_end = vblank_start - stream->timing.v_addressable
+ - stream->timing.v_border_top - stream->timing.v_border_bottom;
+
+ timing->drr_config.enabled = stream->ignore_msa_timing_param;
+ timing->drr_config.drr_active_variable = stream->vrr_active_variable;
+ timing->drr_config.drr_active_fixed = stream->vrr_active_fixed;
+ timing->drr_config.disallowed = !stream->allow_freesync;
+
+ /* limit min refresh rate to DC cap */
+ min_hardware_refresh_in_uhz = stream->timing.min_refresh_in_uhz;
+ if (stream->ctx->dc->caps.max_v_total != 0) {
+ if (pipe_ctx->dsc_padding_params.dsc_hactive_padding != 0) {
+ pix_clk_100hz = pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz;
+ } else {
+ pix_clk_100hz = stream->timing.pix_clk_100hz;
+ }
+ min_hardware_refresh_in_uhz = div64_u64((pix_clk_100hz * 100000000ULL),
+ (timing->h_total * (long long)calc_max_hardware_v_total(stream)));
+ }
+
+ timing->drr_config.min_refresh_uhz = max(stream->timing.min_refresh_in_uhz, min_hardware_refresh_in_uhz);
+
+ if (dml_ctx->config.callbacks.get_max_flickerless_instant_vtotal_increase &&
+ stream->ctx->dc->config.enable_fpo_flicker_detection == 1)
+ timing->drr_config.max_instant_vtotal_delta = dml_ctx->config.callbacks.get_max_flickerless_instant_vtotal_increase(stream, false);
+ else
+ timing->drr_config.max_instant_vtotal_delta = 0;
+
+ if (stream->timing.flags.DSC) {
+ timing->dsc.enable = dml2_dsc_enable;
+ timing->dsc.overrides.num_slices = stream->timing.dsc_cfg.num_slices_h;
+ timing->dsc.dsc_compressed_bpp_x16 = stream->timing.dsc_cfg.bits_per_pixel;
+ } else
+ timing->dsc.enable = dml2_dsc_disable;
+
+ switch (stream->timing.display_color_depth) {
+ case COLOR_DEPTH_666:
+ timing->bpc = 6;
+ break;
+ case COLOR_DEPTH_888:
+ timing->bpc = 8;
+ break;
+ case COLOR_DEPTH_101010:
+ timing->bpc = 10;
+ break;
+ case COLOR_DEPTH_121212:
+ timing->bpc = 12;
+ break;
+ case COLOR_DEPTH_141414:
+ timing->bpc = 14;
+ break;
+ case COLOR_DEPTH_161616:
+ timing->bpc = 16;
+ break;
+ case COLOR_DEPTH_999:
+ timing->bpc = 9;
+ break;
+ case COLOR_DEPTH_111111:
+ timing->bpc = 11;
+ break;
+ default:
+ timing->bpc = 8;
+ break;
+ }
+
+ timing->vblank_nom = timing->v_total - timing->v_active;
+}
+
+static void populate_dml21_output_config_from_stream_state(struct dml2_link_output_cfg *output,
+ struct dc_stream_state *stream, const struct pipe_ctx *pipe)
+{
+ output->output_dp_lane_count = 4;
+
+ switch (stream->signal) {
+ case SIGNAL_TYPE_DISPLAY_PORT_MST:
+ case SIGNAL_TYPE_DISPLAY_PORT:
+ output->output_encoder = dml2_dp;
+ if (check_dp2p0_output_encoder(pipe))
+ output->output_encoder = dml2_dp2p0;
+ break;
+ case SIGNAL_TYPE_EDP:
+ output->output_encoder = dml2_edp;
+ break;
+ case SIGNAL_TYPE_HDMI_TYPE_A:
+ case SIGNAL_TYPE_DVI_SINGLE_LINK:
+ case SIGNAL_TYPE_DVI_DUAL_LINK:
+ output->output_encoder = dml2_hdmi;
+ break;
+ default:
+ output->output_encoder = dml2_dp;
+ }
+
+ switch (stream->timing.pixel_encoding) {
+ case PIXEL_ENCODING_RGB:
+ case PIXEL_ENCODING_YCBCR444:
+ output->output_format = dml2_444;
+ break;
+ case PIXEL_ENCODING_YCBCR420:
+ output->output_format = dml2_420;
+ break;
+ case PIXEL_ENCODING_YCBCR422:
+ if (stream->timing.flags.DSC && !stream->timing.dsc_cfg.ycbcr422_simple)
+ output->output_format = dml2_n422;
+ else
+ output->output_format = dml2_s422;
+ break;
+ default:
+ output->output_format = dml2_444;
+ break;
+ }
+
+ switch (stream->signal) {
+ case SIGNAL_TYPE_NONE:
+ case SIGNAL_TYPE_DVI_SINGLE_LINK:
+ case SIGNAL_TYPE_DVI_DUAL_LINK:
+ case SIGNAL_TYPE_HDMI_TYPE_A:
+ case SIGNAL_TYPE_LVDS:
+ case SIGNAL_TYPE_RGB:
+ case SIGNAL_TYPE_DISPLAY_PORT:
+ case SIGNAL_TYPE_DISPLAY_PORT_MST:
+ case SIGNAL_TYPE_EDP:
+ case SIGNAL_TYPE_VIRTUAL:
+ default:
+ output->output_dp_link_rate = dml2_dp_rate_na;
+ break;
+ }
+
+ output->audio_sample_layout = stream->audio_info.modes->sample_size;
+ output->audio_sample_rate = stream->audio_info.modes->max_bit_rate;
+ output->output_disabled = true;
+
+ //TODO : New to DML2.1. How do we populate this ?
+ // output->validate_output
+}
+
+static void populate_dml21_stream_overrides_from_stream_state(
+ struct dml2_stream_parameters *stream_desc,
+ struct dc_stream_state *stream,
+ struct dc_stream_status *stream_status)
+{
+ switch (stream->debug.force_odm_combine_segments) {
+ case 0:
+ stream_desc->overrides.odm_mode = dml2_odm_mode_auto;
+ break;
+ case 1:
+ stream_desc->overrides.odm_mode = dml2_odm_mode_bypass;
+ break;
+ case 2:
+ stream_desc->overrides.odm_mode = dml2_odm_mode_combine_2to1;
+ break;
+ case 3:
+ stream_desc->overrides.odm_mode = dml2_odm_mode_combine_3to1;
+ break;
+ case 4:
+ stream_desc->overrides.odm_mode = dml2_odm_mode_combine_4to1;
+ break;
+ default:
+ stream_desc->overrides.odm_mode = dml2_odm_mode_auto;
+ break;
+ }
+ if (!stream->ctx->dc->debug.enable_single_display_2to1_odm_policy ||
+ stream->debug.force_odm_combine_segments > 0)
+ stream_desc->overrides.disable_dynamic_odm = true;
+ stream_desc->overrides.disable_subvp = stream->ctx->dc->debug.force_disable_subvp ||
+ stream->hw_cursor_req ||
+ stream_status->mall_stream_config.cursor_size_limit_subvp;
+}
+
+static enum dml2_swizzle_mode gfx_addr3_to_dml2_swizzle_mode(enum swizzle_mode_addr3_values addr3_mode)
+{
+ enum dml2_swizzle_mode dml2_mode = dml2_sw_linear;
+
+ switch (addr3_mode) {
+ case DC_ADDR3_SW_LINEAR:
+ dml2_mode = dml2_sw_linear;
+ break;
+ case DC_ADDR3_SW_256B_2D:
+ dml2_mode = dml2_sw_256b_2d;
+ break;
+ case DC_ADDR3_SW_4KB_2D:
+ dml2_mode = dml2_sw_4kb_2d;
+ break;
+ case DC_ADDR3_SW_64KB_2D:
+ dml2_mode = dml2_sw_64kb_2d;
+ break;
+ case DC_ADDR3_SW_256KB_2D:
+ dml2_mode = dml2_sw_256kb_2d;
+ break;
+ default:
+ /* invalid swizzle mode for DML2.1 */
+ ASSERT(false);
+ dml2_mode = dml2_sw_linear;
+ }
+
+ return dml2_mode;
+}
+
+static enum dml2_swizzle_mode gfx9_to_dml2_swizzle_mode(enum swizzle_mode_values gfx9_mode)
+{
+ enum dml2_swizzle_mode dml2_mode = dml2_sw_64kb_2d;
+
+ switch (gfx9_mode) {
+ case DC_SW_LINEAR:
+ dml2_mode = dml2_sw_linear;
+ break;
+ case DC_SW_256_D:
+ case DC_SW_256_R:
+ dml2_mode = dml2_sw_256b_2d;
+ break;
+ case DC_SW_4KB_D:
+ case DC_SW_4KB_R:
+ case DC_SW_4KB_R_X:
+ dml2_mode = dml2_sw_4kb_2d;
+ break;
+ case DC_SW_64KB_D:
+ case DC_SW_64KB_D_X:
+ case DC_SW_64KB_R:
+ case DC_SW_64KB_R_X:
+ dml2_mode = dml2_sw_64kb_2d;
+ break;
+ case DC_SW_256B_S:
+ case DC_SW_4KB_S:
+ case DC_SW_64KB_S:
+ case DC_SW_VAR_S:
+ case DC_SW_VAR_D:
+ case DC_SW_VAR_R:
+ case DC_SW_64KB_S_T:
+ case DC_SW_64KB_D_T:
+ case DC_SW_4KB_S_X:
+ case DC_SW_4KB_D_X:
+ case DC_SW_64KB_S_X:
+ case DC_SW_VAR_S_X:
+ case DC_SW_VAR_D_X:
+ case DC_SW_VAR_R_X:
+ default:
+ /*
+ * invalid swizzle mode for DML2.1. This could happen because
+ * DML21 is not intended to be used by N-1 in production. To
+ * properly filter out unsupported swizzle modes, we will need
+ * to fix capability reporting when DML2.1 is used for N-1 in
+ * dc. So DML will only receive DML21 supported swizzle modes.
+ * This implementation is not added and has a low value because
+ * the supported swizzle modes should already cover most of our
+ * N-1 test cases.
+ */
+ return dml2_sw_64kb_2d;
+ }
+
+ return dml2_mode;
+}
+
+static void populate_dml21_dummy_surface_cfg(struct dml2_surface_cfg *surface, const struct dc_stream_state *stream)
+{
+ surface->plane0.width = stream->timing.h_addressable;
+ surface->plane0.height = stream->timing.v_addressable;
+ surface->plane1.width = stream->timing.h_addressable;
+ surface->plane1.height = stream->timing.v_addressable;
+ surface->plane0.pitch = ((surface->plane0.width + 127) / 128) * 128;
+ surface->plane1.pitch = 0;
+ surface->dcc.enable = false;
+ surface->dcc.informative.dcc_rate_plane0 = 1.0;
+ surface->dcc.informative.dcc_rate_plane1 = 1.0;
+ surface->dcc.informative.fraction_of_zero_size_request_plane0 = 0;
+ surface->dcc.informative.fraction_of_zero_size_request_plane1 = 0;
+ surface->tiling = dml2_sw_64kb_2d;
+}
+
+static void populate_dml21_dummy_plane_cfg(struct dml2_plane_parameters *plane, const struct dc_stream_state *stream)
+{
+ unsigned int width, height;
+
+ if (stream->timing.h_addressable > 3840)
+ width = 3840;
+ else
+ width = stream->timing.h_addressable; // 4K max
+
+ if (stream->timing.v_addressable > 2160)
+ height = 2160;
+ else
+ height = stream->timing.v_addressable; // 4K max
+
+ plane->cursor.cursor_bpp = 32;
+
+ plane->cursor.cursor_width = 256;
+ plane->cursor.num_cursors = 1;
+
+ plane->composition.viewport.plane0.width = width;
+ plane->composition.viewport.plane0.height = height;
+ plane->composition.viewport.plane1.width = 0;
+ plane->composition.viewport.plane1.height = 0;
+
+ plane->composition.viewport.stationary = false;
+ plane->composition.viewport.plane0.x_start = 0;
+ plane->composition.viewport.plane0.y_start = 0;
+ plane->composition.viewport.plane1.x_start = 0;
+ plane->composition.viewport.plane1.y_start = 0;
+
+ plane->composition.scaler_info.enabled = false;
+ plane->composition.rotation_angle = dml2_rotation_0;
+ plane->composition.scaler_info.plane0.h_ratio = 1.0;
+ plane->composition.scaler_info.plane0.v_ratio = 1.0;
+ plane->composition.scaler_info.plane1.h_ratio = 0;
+ plane->composition.scaler_info.plane1.v_ratio = 0;
+ plane->composition.scaler_info.plane0.h_taps = 1;
+ plane->composition.scaler_info.plane0.v_taps = 1;
+ plane->composition.scaler_info.plane1.h_taps = 0;
+ plane->composition.scaler_info.plane1.v_taps = 0;
+ plane->composition.scaler_info.rect_out_width = width;
+ plane->pixel_format = dml2_444_32;
+
+ plane->dynamic_meta_data.enable = false;
+ plane->overrides.gpuvm_min_page_size_kbytes = 256;
+}
+
+static void populate_dml21_surface_config_from_plane_state(
+ const struct dc *in_dc,
+ struct dml2_surface_cfg *surface,
+ const struct dc_plane_state *plane_state)
+{
+ surface->plane0.pitch = plane_state->plane_size.surface_pitch;
+ surface->plane1.pitch = plane_state->plane_size.chroma_pitch;
+ surface->plane0.height = plane_state->plane_size.surface_size.height;
+ surface->plane0.width = plane_state->plane_size.surface_size.width;
+ surface->plane1.height = plane_state->plane_size.chroma_size.height;
+ surface->plane1.width = plane_state->plane_size.chroma_size.width;
+ surface->dcc.enable = plane_state->dcc.enable;
+ surface->dcc.informative.dcc_rate_plane0 = 1.0;
+ surface->dcc.informative.dcc_rate_plane1 = 1.0;
+ surface->dcc.informative.fraction_of_zero_size_request_plane0 = plane_state->dcc.independent_64b_blks;
+ surface->dcc.informative.fraction_of_zero_size_request_plane1 = plane_state->dcc.independent_64b_blks_c;
+ surface->dcc.plane0.pitch = plane_state->dcc.meta_pitch;
+ surface->dcc.plane1.pitch = plane_state->dcc.meta_pitch_c;
+
+ // Update swizzle / array mode based on the gfx_format
+ switch (plane_state->tiling_info.gfxversion) {
+ case DcGfxVersion7:
+ case DcGfxVersion8:
+ break;
+ case DcGfxVersion9:
+ case DcGfxVersion10:
+ case DcGfxVersion11:
+ surface->tiling = gfx9_to_dml2_swizzle_mode(plane_state->tiling_info.gfx9.swizzle);
+ break;
+ case DcGfxAddr3:
+ surface->tiling = gfx_addr3_to_dml2_swizzle_mode(plane_state->tiling_info.gfx_addr3.swizzle);
+ break;
+ }
+}
+
+static const struct scaler_data *get_scaler_data_for_plane(
+ struct dml2_context *dml_ctx,
+ const struct dc_plane_state *in,
+ const struct dc_state *context)
+{
+ int i;
+ struct pipe_ctx *temp_pipe = &dml_ctx->v21.scratch.temp_pipe;
+
+ memset(temp_pipe, 0, sizeof(struct pipe_ctx));
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe->plane_state == in && !pipe->prev_odm_pipe) {
+ temp_pipe->stream = pipe->stream;
+ temp_pipe->plane_state = pipe->plane_state;
+ temp_pipe->plane_res.scl_data.taps = pipe->plane_res.scl_data.taps;
+ temp_pipe->stream_res = pipe->stream_res;
+ temp_pipe->dsc_padding_params.dsc_hactive_padding = pipe->dsc_padding_params.dsc_hactive_padding;
+ temp_pipe->dsc_padding_params.dsc_htotal_padding = pipe->dsc_padding_params.dsc_htotal_padding;
+ temp_pipe->dsc_padding_params.dsc_pix_clk_100hz = pipe->dsc_padding_params.dsc_pix_clk_100hz;
+ dml_ctx->config.callbacks.build_scaling_params(temp_pipe);
+ break;
+ }
+ }
+
+ ASSERT(i < MAX_PIPES);
+ return &temp_pipe->plane_res.scl_data;
+}
+
+static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dml_ctx,
+ struct dml2_plane_parameters *plane, const struct dc_plane_state *plane_state,
+ const struct dc_state *context, unsigned int stream_index)
+{
+ const struct scaler_data *scaler_data = get_scaler_data_for_plane(dml_ctx, plane_state, context);
+ struct dc_stream_state *stream = context->streams[stream_index];
+
+ plane->cursor.cursor_bpp = 32;
+ plane->cursor.cursor_width = 256;
+ plane->cursor.num_cursors = 1;
+
+ switch (plane_state->format) {
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr:
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb:
+ plane->pixel_format = dml2_420_8;
+ break;
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr:
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb:
+ plane->pixel_format = dml2_420_10;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
+ plane->pixel_format = dml2_444_64;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555:
+ case SURFACE_PIXEL_FORMAT_GRPH_RGB565:
+ plane->pixel_format = dml2_444_16;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_PALETA_256_COLORS:
+ plane->pixel_format = dml2_444_8;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA:
+ plane->pixel_format = dml2_rgbe_alpha;
+ break;
+ default:
+ plane->pixel_format = dml2_444_32;
+ break;
+ }
+
+ plane->composition.viewport.plane0.height = scaler_data->viewport.height;
+ plane->composition.viewport.plane0.width = scaler_data->viewport.width;
+ plane->composition.viewport.plane1.height = scaler_data->viewport_c.height;
+ plane->composition.viewport.plane1.width = scaler_data->viewport_c.width;
+ plane->composition.viewport.plane0.x_start = scaler_data->viewport.x;
+ plane->composition.viewport.plane0.y_start = scaler_data->viewport.y;
+ plane->composition.viewport.plane1.x_start = scaler_data->viewport_c.x;
+ plane->composition.viewport.plane1.y_start = scaler_data->viewport_c.y;
+ plane->composition.viewport.stationary = false;
+ plane->composition.scaler_info.enabled = scaler_data->ratios.horz.value != dc_fixpt_one.value ||
+ scaler_data->ratios.horz_c.value != dc_fixpt_one.value ||
+ scaler_data->ratios.vert.value != dc_fixpt_one.value ||
+ scaler_data->ratios.vert_c.value != dc_fixpt_one.value;
+
+ if (!scaler_data->taps.h_taps) {
+ /* Above logic determines scaling should be enabled even when there are no taps for
+ * certain cases. Hence do corrective active and disable scaling.
+ */
+ plane->composition.scaler_info.enabled = false;
+ } else if ((plane_state->ctx->dc->config.use_spl == true) &&
+ (plane->composition.scaler_info.enabled == false)) {
+ /* To enable sharpener for 1:1, scaler must be enabled. If use_spl is set, then
+ * allow case where ratio is 1 but taps > 1
+ */
+ if ((scaler_data->taps.h_taps > 1) || (scaler_data->taps.v_taps > 1) ||
+ (scaler_data->taps.h_taps_c > 1) || (scaler_data->taps.v_taps_c > 1))
+ plane->composition.scaler_info.enabled = true;
+ }
+
+ /* always_scale is only used for debug purposes not used in production but has to be
+ * maintained for certain complainces. */
+ if (plane_state->ctx->dc->debug.always_scale == true) {
+ plane->composition.scaler_info.enabled = true;
+ }
+
+ if (plane->composition.scaler_info.enabled == false) {
+ plane->composition.scaler_info.plane0.h_ratio = 1.0;
+ plane->composition.scaler_info.plane0.v_ratio = 1.0;
+ plane->composition.scaler_info.plane1.h_ratio = 1.0;
+ plane->composition.scaler_info.plane1.v_ratio = 1.0;
+ } else {
+ plane->composition.scaler_info.plane0.h_ratio = (double)scaler_data->ratios.horz.value / (1ULL << 32);
+ plane->composition.scaler_info.plane0.v_ratio = (double)scaler_data->ratios.vert.value / (1ULL << 32);
+ plane->composition.scaler_info.plane1.h_ratio = (double)scaler_data->ratios.horz_c.value / (1ULL << 32);
+ plane->composition.scaler_info.plane1.v_ratio = (double)scaler_data->ratios.vert_c.value / (1ULL << 32);
+ }
+
+ if (!scaler_data->taps.h_taps) {
+ plane->composition.scaler_info.plane0.h_taps = 1;
+ plane->composition.scaler_info.plane1.h_taps = 1;
+ } else {
+ plane->composition.scaler_info.plane0.h_taps = scaler_data->taps.h_taps;
+ plane->composition.scaler_info.plane1.h_taps = scaler_data->taps.h_taps_c;
+ }
+ if (!scaler_data->taps.v_taps) {
+ plane->composition.scaler_info.plane0.v_taps = 1;
+ plane->composition.scaler_info.plane1.v_taps = 1;
+ } else {
+ plane->composition.scaler_info.plane0.v_taps = scaler_data->taps.v_taps;
+ plane->composition.scaler_info.plane1.v_taps = scaler_data->taps.v_taps_c;
+ }
+
+ plane->composition.viewport.stationary = false;
+
+ if (plane_state->mcm_luts.lut3d_data.lut3d_src == DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM) {
+ plane->tdlut.setup_for_tdlut = true;
+
+ switch (plane_state->mcm_luts.lut3d_data.gpu_mem_params.layout) {
+ case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_RGB:
+ case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_BGR:
+ plane->tdlut.tdlut_addressing_mode = dml2_tdlut_sw_linear;
+ break;
+ case DC_CM2_GPU_MEM_LAYOUT_1D_PACKED_LINEAR:
+ plane->tdlut.tdlut_addressing_mode = dml2_tdlut_simple_linear;
+ break;
+ }
+
+ switch (plane_state->mcm_luts.lut3d_data.gpu_mem_params.size) {
+ case DC_CM2_GPU_MEM_SIZE_171717:
+ plane->tdlut.tdlut_width_mode = dml2_tdlut_width_17_cube;
+ break;
+ case DC_CM2_GPU_MEM_SIZE_TRANSFORMED:
+ default:
+ //plane->tdlut.tdlut_width_mode = dml2_tdlut_width_flatten; // dml2_tdlut_width_flatten undefined
+ break;
+ }
+ }
+ plane->tdlut.setup_for_tdlut |= dml_ctx->config.force_tdlut_enable;
+
+ plane->dynamic_meta_data.enable = false;
+ plane->dynamic_meta_data.lines_before_active_required = 0;
+ plane->dynamic_meta_data.transmitted_bytes = 0;
+
+ plane->composition.scaler_info.rect_out_width = plane_state->dst_rect.width;
+ plane->composition.rotation_angle = (enum dml2_rotation_angle) plane_state->rotation;
+ plane->stream_index = stream_index;
+
+ plane->overrides.gpuvm_min_page_size_kbytes = 256;
+
+ plane->immediate_flip = plane_state->flip_immediate;
+
+ plane->composition.rect_out_height_spans_vactive =
+ plane_state->dst_rect.height >= stream->src.height &&
+ stream->dst.height >= stream->timing.v_addressable;
+}
+
+//TODO : Could be possibly moved to a common helper layer.
+static bool dml21_wrapper_get_plane_id(const struct dc_state *context, unsigned int stream_id, const struct dc_plane_state *plane, unsigned int *plane_id)
+{
+ int i, j;
+
+ if (!plane_id)
+ return false;
+
+ for (i = 0; i < context->stream_count; i++) {
+ if (context->streams[i]->stream_id == stream_id) {
+ for (j = 0; j < context->stream_status[i].plane_count; j++) {
+ if (context->stream_status[i].plane_states[j] == plane) {
+ *plane_id = (i << 16) | j;
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+static unsigned int map_stream_to_dml21_display_cfg(const struct dml2_context *dml_ctx, const struct dc_stream_state *stream)
+{
+ int i = 0;
+ int location = -1;
+
+ for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
+ if (dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[i] && dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i] == stream->stream_id) {
+ location = i;
+ break;
+ }
+ }
+
+ return location;
+}
+
+unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, unsigned int stream_id,
+ const struct dc_plane_state *plane, const struct dc_state *context)
+{
+ unsigned int plane_id;
+ int i = 0;
+ int location = -1;
+
+ if (!dml21_wrapper_get_plane_id(context, stream_id, plane, &plane_id)) {
+ ASSERT(false);
+ return -1;
+ }
+
+ for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
+ if (dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[i] && dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[i] == plane_id) {
+ location = i;
+ break;
+ }
+ }
+
+ return location;
+}
+
+static enum dml2_uclk_pstate_change_strategy dml21_force_pstate_method_to_uclk_state_change_strategy(enum dml2_force_pstate_methods force_pstate_method)
+{
+ enum dml2_uclk_pstate_change_strategy val = dml2_uclk_pstate_change_strategy_auto;
+
+ switch (force_pstate_method) {
+ case dml2_force_pstate_method_vactive:
+ val = dml2_uclk_pstate_change_strategy_force_vactive;
+ break;
+ case dml2_force_pstate_method_vblank:
+ val = dml2_uclk_pstate_change_strategy_force_vblank;
+ break;
+ case dml2_force_pstate_method_drr:
+ val = dml2_uclk_pstate_change_strategy_force_drr;
+ break;
+ case dml2_force_pstate_method_subvp:
+ val = dml2_uclk_pstate_change_strategy_force_mall_svp;
+ break;
+ case dml2_force_pstate_method_auto:
+ default:
+ val = dml2_uclk_pstate_change_strategy_auto;
+ }
+
+ return val;
+}
+
+bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx)
+{
+ int stream_index, plane_index;
+ int disp_cfg_stream_location, disp_cfg_plane_location;
+ struct dml2_display_cfg *dml_dispcfg = &dml_ctx->v21.display_config;
+ unsigned int plane_count = 0;
+
+ memset(&dml_ctx->v21.dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping));
+
+ dml_dispcfg->gpuvm_enable = dml_ctx->config.gpuvm_enable;
+ dml_dispcfg->gpuvm_max_page_table_levels = 4;
+ dml_dispcfg->hostvm_enable = false;
+ dml_dispcfg->minimize_det_reallocation = true;
+ dml_dispcfg->overrides.enable_subvp_implicit_pmo = true;
+
+ if (in_dc->debug.disable_unbounded_requesting) {
+ dml_dispcfg->overrides.hw.force_unbounded_requesting.enable = true;
+ dml_dispcfg->overrides.hw.force_unbounded_requesting.value = false;
+ }
+
+ for (stream_index = 0; stream_index < context->stream_count; stream_index++) {
+ disp_cfg_stream_location = map_stream_to_dml21_display_cfg(dml_ctx, context->streams[stream_index]);
+
+ if (disp_cfg_stream_location < 0)
+ disp_cfg_stream_location = dml_dispcfg->num_streams++;
+
+ ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__);
+ populate_dml21_timing_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, context->streams[stream_index], &context->res_ctx.pipe_ctx[stream_index], dml_ctx);
+ populate_dml21_output_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].output, context->streams[stream_index], &context->res_ctx.pipe_ctx[stream_index]);
+ populate_dml21_stream_overrides_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location], context->streams[stream_index], &context->stream_status[stream_index]);
+
+ dml_dispcfg->stream_descriptors[disp_cfg_stream_location].overrides.hw.twait_budgeting.fclk_pstate = dml2_twait_budgeting_setting_if_needed;
+ dml_dispcfg->stream_descriptors[disp_cfg_stream_location].overrides.hw.twait_budgeting.uclk_pstate = dml2_twait_budgeting_setting_if_needed;
+ dml_dispcfg->stream_descriptors[disp_cfg_stream_location].overrides.hw.twait_budgeting.stutter_enter_exit = dml2_twait_budgeting_setting_if_needed;
+
+ dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[disp_cfg_stream_location] = context->streams[stream_index]->stream_id;
+ dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[disp_cfg_stream_location] = true;
+
+ if (context->stream_status[stream_index].plane_count == 0) {
+ disp_cfg_plane_location = dml_dispcfg->num_planes++;
+ populate_dml21_dummy_surface_cfg(&dml_dispcfg->plane_descriptors[disp_cfg_plane_location].surface, context->streams[stream_index]);
+ populate_dml21_dummy_plane_cfg(&dml_dispcfg->plane_descriptors[disp_cfg_plane_location], context->streams[stream_index]);
+ dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location;
+ } else {
+ for (plane_index = 0; plane_index < context->stream_status[stream_index].plane_count; plane_index++) {
+ disp_cfg_plane_location = map_plane_to_dml21_display_cfg(dml_ctx, context->streams[stream_index]->stream_id, context->stream_status[stream_index].plane_states[plane_index], context);
+
+ if (disp_cfg_plane_location < 0)
+ disp_cfg_plane_location = dml_dispcfg->num_planes++;
+
+ ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__);
+
+ populate_dml21_surface_config_from_plane_state(in_dc, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location].surface, context->stream_status[stream_index].plane_states[plane_index]);
+ populate_dml21_plane_config_from_plane_state(dml_ctx, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location], context->stream_status[stream_index].plane_states[plane_index], context, stream_index);
+ dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location;
+
+ if (dml21_wrapper_get_plane_id(context, context->streams[stream_index]->stream_id, context->stream_status[stream_index].plane_states[plane_index], &dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location]))
+ dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[disp_cfg_plane_location] = true;
+
+ /* apply forced pstate policy */
+ if (dml_ctx->config.pmo.force_pstate_method_enable) {
+ dml_dispcfg->plane_descriptors[disp_cfg_plane_location].overrides.uclk_pstate_change_strategy =
+ dml21_force_pstate_method_to_uclk_state_change_strategy(dml_ctx->config.pmo.force_pstate_method_values[stream_index]);
+ }
+
+ plane_count++;
+ }
+ }
+ }
+
+ if (plane_count == 0) {
+ dml_dispcfg->overrides.all_streams_blanked = true;
+ }
+
+ return true;
+}
+
+void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state *context)
+{
+ /* TODO these should be the max of active, svp prefetch and idle should be tracked seperately */
+ context->bw_ctx.bw.dcn.clk.dispclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dispclk_khz;
+ context->bw_ctx.bw.dcn.clk.dcfclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.active.dcfclk_khz;
+ context->bw_ctx.bw.dcn.clk.dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.active.uclk_khz;
+ context->bw_ctx.bw.dcn.clk.fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.active.fclk_khz;
+ context->bw_ctx.bw.dcn.clk.idle_dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.idle.uclk_khz;
+ context->bw_ctx.bw.dcn.clk.idle_fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.idle.fclk_khz;
+ context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.deepsleep_dcfclk_khz;
+ context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = in_ctx->v21.mode_programming.programming->fclk_pstate_supported;
+ context->bw_ctx.bw.dcn.clk.p_state_change_support = in_ctx->v21.mode_programming.programming->uclk_pstate_supported;
+ context->bw_ctx.bw.dcn.clk.dtbclk_en = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz > 0;
+ context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz;
+ context->bw_ctx.bw.dcn.clk.socclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.socclk_khz;
+ context->bw_ctx.bw.dcn.clk.subvp_prefetch_dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz;
+ context->bw_ctx.bw.dcn.clk.subvp_prefetch_fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz;
+ context->bw_ctx.bw.dcn.clk.stutter_efficiency.base_efficiency = in_ctx->v21.mode_programming.programming->stutter.base_percent_efficiency;
+ context->bw_ctx.bw.dcn.clk.stutter_efficiency.low_power_efficiency = in_ctx->v21.mode_programming.programming->stutter.low_power_percent_efficiency;
+}
+
+static struct dml2_dchub_watermark_regs *wm_set_index_to_dc_wm_set(union dcn_watermark_set *watermarks, const enum dml2_dchub_watermark_reg_set_index wm_index)
+{
+ struct dml2_dchub_watermark_regs *wm_regs = NULL;
+
+ switch (wm_index) {
+ case DML2_DCHUB_WATERMARK_SET_A:
+ wm_regs = &watermarks->dcn4x.a;
+ break;
+ case DML2_DCHUB_WATERMARK_SET_B:
+ wm_regs = &watermarks->dcn4x.b;
+ break;
+ case DML2_DCHUB_WATERMARK_SET_C:
+ wm_regs = &watermarks->dcn4x.c;
+ break;
+ case DML2_DCHUB_WATERMARK_SET_D:
+ wm_regs = &watermarks->dcn4x.d;
+ break;
+ case DML2_DCHUB_WATERMARK_SET_NUM:
+ default:
+ /* invalid wm set index */
+ wm_regs = NULL;
+ }
+
+ return wm_regs;
+}
+
+void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_set *watermarks, struct dml2_context *in_ctx)
+{
+ const struct dml2_display_cfg_programming *programming = in_ctx->v21.mode_programming.programming;
+
+ unsigned int wm_index;
+
+ /* copy watermark sets from DML */
+ for (wm_index = 0; wm_index < programming->global_regs.num_watermark_sets; wm_index++) {
+ struct dml2_dchub_watermark_regs *wm_regs = wm_set_index_to_dc_wm_set(watermarks, wm_index);
+
+ if (wm_regs)
+ memcpy(wm_regs,
+ &programming->global_regs.wm_regs[wm_index],
+ sizeof(struct dml2_dchub_watermark_regs));
+ }
+}
+
+void dml21_map_hw_resources(struct dml2_context *dml_ctx)
+{
+ unsigned int i = 0;
+
+ for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
+ dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[i] = dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i];
+ dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[i] = true;
+ dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[i] = dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[i];
+ dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id_valid[i] = true;
+ }
+
+}
+
+void dml21_get_pipe_mcache_config(
+ struct dc_state *context,
+ struct pipe_ctx *pipe_ctx,
+ struct dml2_per_plane_programming *pln_prog,
+ struct dml2_pipe_configuration_descriptor *mcache_pipe_config)
+{
+ mcache_pipe_config->plane0.viewport_x_start = pipe_ctx->plane_res.scl_data.viewport.x;
+ mcache_pipe_config->plane0.viewport_width = pipe_ctx->plane_res.scl_data.viewport.width;
+
+ mcache_pipe_config->plane1.viewport_x_start = pipe_ctx->plane_res.scl_data.viewport_c.x;
+ mcache_pipe_config->plane1.viewport_width = pipe_ctx->plane_res.scl_data.viewport_c.width;
+
+ mcache_pipe_config->plane1_enabled =
+ dml21_is_plane1_enabled(pln_prog->plane_descriptor->pixel_format);
+}
+
+void dml21_set_dc_p_state_type(
+ struct pipe_ctx *pipe_ctx,
+ struct dml2_per_stream_programming *stream_programming,
+ bool sub_vp_enabled)
+{
+ switch (stream_programming->uclk_pstate_method) {
+ case dml2_pstate_method_vactive:
+ case dml2_pstate_method_fw_vactive_drr:
+ pipe_ctx->p_state_type = P_STATE_V_ACTIVE;
+ break;
+ case dml2_pstate_method_vblank:
+ case dml2_pstate_method_fw_vblank_drr:
+ if (sub_vp_enabled)
+ pipe_ctx->p_state_type = P_STATE_V_BLANK_SUB_VP;
+ else
+ pipe_ctx->p_state_type = P_STATE_V_BLANK;
+ break;
+ case dml2_pstate_method_fw_svp:
+ case dml2_pstate_method_fw_svp_drr:
+ pipe_ctx->p_state_type = P_STATE_SUB_VP;
+ break;
+ case dml2_pstate_method_fw_drr:
+ if (sub_vp_enabled)
+ pipe_ctx->p_state_type = P_STATE_DRR_SUB_VP;
+ else
+ pipe_ctx->p_state_type = P_STATE_FPO;
+ break;
+ default:
+ pipe_ctx->p_state_type = P_STATE_UNKNOWN;
+ break;
+ }
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.h
new file mode 100644
index 000000000000..9880d3e0398e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.h
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+
+#ifndef _DML21_TRANSLATION_HELPER_H_
+#define _DML21_TRANSLATION_HELPER_H_
+
+struct dc;
+struct dc_state;
+struct dcn_watermarks;
+union dcn_watermark_set;
+struct pipe_ctx;
+struct dc_plane_state;
+
+struct dml2_context;
+struct dml2_configuration_options;
+struct dml2_initialize_instance_in_out;
+
+void dml21_populate_dml_init_params(struct dml2_initialize_instance_in_out *dml_init, const struct dml2_configuration_options *config, const struct dc *in_dc);
+bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx);
+void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state *context);
+void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_set *watermarks, struct dml2_context *in_ctx);
+void dml21_map_hw_resources(struct dml2_context *dml_ctx);
+void dml21_get_pipe_mcache_config(struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_plane_programming *pln_prog, struct dml2_pipe_configuration_descriptor *mcache_pipe_config);
+void dml21_set_dc_p_state_type(struct pipe_ctx *pipe_ctx, struct dml2_per_stream_programming *stream_programming, bool sub_vp_enabled);
+unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, unsigned int stream_id, const struct dc_plane_state *plane, const struct dc_state *context);
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.c
new file mode 100644
index 000000000000..ee721606b883
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.c
@@ -0,0 +1,516 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+
+#include "dml2_internal_shared_types.h"
+#include "dml21_translation_helper.h"
+#include "dml2_internal_types.h"
+#include "dml21_utils.h"
+#include "dml2_dc_resource_mgmt.h"
+
+#include "dml2_core_dcn4_calcs.h"
+
+int dml21_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id)
+{
+ int i;
+ for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
+ if (ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[i] && ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[i] == stream_id)
+ return i;
+ }
+
+ return -1;
+}
+
+int dml21_find_dml_pipe_idx_by_plane_id(struct dml2_context *ctx, unsigned int plane_id)
+{
+ int i;
+ for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
+ if (ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id_valid[i] && ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[i] == plane_id)
+ return i;
+ }
+
+ return -1;
+}
+
+bool dml21_get_plane_id(const struct dc_state *state, const struct dc_plane_state *plane, unsigned int *plane_id)
+{
+ int i, j;
+
+ if (!plane_id)
+ return false;
+
+ for (i = 0; i < state->stream_count; i++) {
+ for (j = 0; j < state->stream_status[i].plane_count; j++) {
+ if (state->stream_status[i].plane_states[j] == plane) {
+ *plane_id = (i << 16) | j;
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+unsigned int dml21_get_dc_plane_idx_from_plane_id(unsigned int plane_id)
+{
+ return 0xffff & plane_id;
+}
+
+void find_valid_pipe_idx_for_stream_index(const struct dml2_context *dml_ctx, unsigned int *dml_pipe_idx, unsigned int stream_index)
+{
+ unsigned int i = 0;
+
+ for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
+ if (dml_ctx->v21.mode_programming.programming->plane_programming[i].plane_descriptor->stream_index == stream_index) {
+ *dml_pipe_idx = i;
+ return;
+ }
+ }
+}
+
+void find_pipe_regs_idx(const struct dml2_context *dml_ctx,
+ struct pipe_ctx *pipe, unsigned int *pipe_regs_idx)
+{
+ struct pipe_ctx *opp_head = dml_ctx->config.callbacks.get_opp_head(pipe);
+
+ *pipe_regs_idx = dml_ctx->config.callbacks.get_odm_slice_index(opp_head);
+
+ if (pipe->plane_state)
+ *pipe_regs_idx += dml_ctx->config.callbacks.get_mpc_slice_index(pipe);
+}
+
+/* places pipe references into pipes arrays and returns number of pipes */
+int dml21_find_dc_pipes_for_plane(const struct dc *in_dc,
+ struct dc_state *context,
+ struct dml2_context *dml_ctx,
+ struct pipe_ctx *dc_main_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__],
+ struct pipe_ctx *dc_phantom_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__],
+ int dml_plane_idx)
+{
+ unsigned int dml_stream_index;
+ unsigned int main_stream_id;
+ unsigned int dc_plane_index;
+ struct dc_stream_state *dc_main_stream;
+ struct dc_stream_status *dc_main_stream_status;
+ struct dc_plane_state *dc_main_plane;
+ struct dc_stream_state *dc_phantom_stream;
+ struct dc_stream_status *dc_phantom_stream_status;
+ struct dc_plane_state *dc_phantom_plane;
+ int num_pipes = 0;
+
+ memset(dc_main_pipes, 0, sizeof(struct pipe_ctx *) * __DML2_WRAPPER_MAX_STREAMS_PLANES__);
+ memset(dc_phantom_pipes, 0, sizeof(struct pipe_ctx *) * __DML2_WRAPPER_MAX_STREAMS_PLANES__);
+
+ dml_stream_index = dml_ctx->v21.mode_programming.programming->plane_programming[dml_plane_idx].plane_descriptor->stream_index;
+ main_stream_id = dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[dml_stream_index];
+
+ dc_main_stream = dml_ctx->config.callbacks.get_stream_from_id(context, main_stream_id);
+ dc_main_stream_status = dml_ctx->config.callbacks.get_stream_status(context, dc_main_stream);
+ if (!dc_main_stream_status)
+ return num_pipes;
+
+ /* find main plane based on id */
+ dc_plane_index = dml21_get_dc_plane_idx_from_plane_id(dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[dml_plane_idx]);
+ dc_main_plane = dc_main_stream_status->plane_states[dc_plane_index];
+
+ if (dc_main_plane) {
+ num_pipes = dml_ctx->config.callbacks.get_dpp_pipes_for_plane(dc_main_plane, &context->res_ctx, dc_main_pipes);
+ } else {
+ /* stream was configured with dummy plane, so get pipes from opp head */
+ struct pipe_ctx *otg_master_pipe = dml_ctx->config.callbacks.get_otg_master_for_stream(&context->res_ctx, dc_main_stream);
+ if (otg_master_pipe != NULL)
+ num_pipes = dml_ctx->config.callbacks.get_opp_heads_for_otg_master(otg_master_pipe, &context->res_ctx, dc_main_pipes);
+ }
+
+ /* if phantom exists, find associated pipes */
+ dc_phantom_stream = dml_ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, dc_main_stream);
+ if (dc_phantom_stream && num_pipes > 0) {
+ dc_phantom_stream_status = dml_ctx->config.callbacks.get_stream_status(context, dc_phantom_stream);
+
+ if (dc_phantom_stream_status) {
+ /* phantom plane will have same index as main */
+ dc_phantom_plane = dc_phantom_stream_status->plane_states[dc_plane_index];
+
+ if (dc_phantom_plane) {
+ /* only care about phantom pipes if they contain the phantom plane */
+ dml_ctx->config.callbacks.get_dpp_pipes_for_plane(dc_phantom_plane, &context->res_ctx, dc_phantom_pipes);
+ }
+ }
+ }
+
+ return num_pipes;
+}
+
+void dml21_pipe_populate_global_sync(struct dml2_context *dml_ctx,
+ struct dc_state *context,
+ struct pipe_ctx *pipe_ctx,
+ struct dml2_per_stream_programming *stream_programming)
+{
+ union dml2_global_sync_programming *global_sync = &stream_programming->global_sync;
+
+ if (dml_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) {
+ /* phantom has its own global sync */
+ global_sync = &stream_programming->phantom_stream.global_sync;
+ }
+
+ memcpy(&pipe_ctx->global_sync,
+ global_sync,
+ sizeof(union dml2_global_sync_programming));
+}
+
+void dml21_populate_mall_allocation_size(struct dc_state *context,
+ struct dml2_context *in_ctx,
+ struct dml2_per_plane_programming *pln_prog,
+ struct pipe_ctx *dc_pipe)
+{
+
+ /* Reuse MALL Allocation Sizes logic from dcn32_fpu.c */
+ /* Count from active, top pipes per plane only. Only add mall_ss_size_bytes for each unique plane. */
+ if (dc_pipe->stream && dc_pipe->plane_state &&
+ (dc_pipe->top_pipe == NULL ||
+ dc_pipe->plane_state != dc_pipe->top_pipe->plane_state) &&
+ dc_pipe->prev_odm_pipe == NULL) {
+ /* SS: all active surfaces stored in MALL */
+ if (in_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, dc_pipe) != SUBVP_PHANTOM) {
+ dc_pipe->surface_size_in_mall_bytes = pln_prog->surface_size_mall_bytes;
+ context->bw_ctx.bw.dcn.mall_ss_size_bytes += dc_pipe->surface_size_in_mall_bytes;
+ } else {
+ /* SUBVP: phantom surfaces only stored in MALL */
+ dc_pipe->surface_size_in_mall_bytes = pln_prog->svp_size_mall_bytes;
+ context->bw_ctx.bw.dcn.mall_subvp_size_bytes += dc_pipe->surface_size_in_mall_bytes;
+ }
+ }
+}
+
+bool check_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx)
+{
+ /* If this assert is hit then we have a link encoder dynamic management issue */
+ ASSERT(pipe_ctx->stream_res.hpo_dp_stream_enc ? pipe_ctx->link_res.hpo_dp_link_enc != NULL : true);
+ return (pipe_ctx->stream_res.hpo_dp_stream_enc &&
+ pipe_ctx->link_res.hpo_dp_link_enc &&
+ dc_is_dp_signal(pipe_ctx->stream->signal));
+}
+
+
+static bool is_sub_vp_enabled(struct dc *dc, struct dc_state *context)
+{
+ int i;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe_ctx->stream && dc_state_get_paired_subvp_stream(context, pipe_ctx->stream) &&
+ dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_MAIN) {
+ return true;
+ }
+ }
+ return false;
+}
+
+
+void dml21_program_dc_pipe(struct dml2_context *dml_ctx, struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_plane_programming *pln_prog,
+ struct dml2_per_stream_programming *stream_prog)
+{
+ unsigned int pipe_reg_index = 0;
+
+ dml21_pipe_populate_global_sync(dml_ctx, context, pipe_ctx, stream_prog);
+ find_pipe_regs_idx(dml_ctx, pipe_ctx, &pipe_reg_index);
+
+ if (dml_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) {
+ memcpy(&pipe_ctx->hubp_regs, pln_prog->phantom_plane.pipe_regs[pipe_reg_index], sizeof(struct dml2_dchub_per_pipe_register_set));
+ pipe_ctx->unbounded_req = false;
+ pipe_ctx->det_buffer_size_kb = 0;
+ } else {
+ memcpy(&pipe_ctx->hubp_regs, pln_prog->pipe_regs[pipe_reg_index], sizeof(struct dml2_dchub_per_pipe_register_set));
+ pipe_ctx->unbounded_req = pln_prog->pipe_regs[pipe_reg_index]->rq_regs.unbounded_request_enabled;
+ pipe_ctx->det_buffer_size_kb = pln_prog->pipe_regs[pipe_reg_index]->det_size * 64;
+ }
+
+ pipe_ctx->plane_res.bw.dppclk_khz = pln_prog->min_clocks.dcn4x.dppclk_khz;
+ if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipe_ctx->plane_res.bw.dppclk_khz)
+ context->bw_ctx.bw.dcn.clk.dppclk_khz = pipe_ctx->plane_res.bw.dppclk_khz;
+
+ dml21_populate_mall_allocation_size(context, dml_ctx, pln_prog, pipe_ctx);
+
+ bool sub_vp_enabled = is_sub_vp_enabled(pipe_ctx->stream->ctx->dc, context);
+
+ dml21_set_dc_p_state_type(pipe_ctx, stream_prog, sub_vp_enabled);
+}
+
+static struct dc_stream_state *dml21_add_phantom_stream(struct dml2_context *dml_ctx,
+ const struct dc *dc,
+ struct dc_state *context,
+ struct dc_stream_state *main_stream,
+ struct dml2_per_stream_programming *stream_programming)
+{
+ struct dc_stream_state *phantom_stream;
+ struct dml2_stream_parameters *phantom_stream_descriptor = &stream_programming->phantom_stream.descriptor;
+
+ phantom_stream = dml_ctx->config.svp_pstate.callbacks.create_phantom_stream(dc, context, main_stream);
+ if (!phantom_stream)
+ return NULL;
+
+ /* copy details of phantom stream from main */
+ memcpy(&phantom_stream->timing, &main_stream->timing, sizeof(phantom_stream->timing));
+ memcpy(&phantom_stream->src, &main_stream->src, sizeof(phantom_stream->src));
+ memcpy(&phantom_stream->dst, &main_stream->dst, sizeof(phantom_stream->dst));
+
+ /* modify timing for phantom */
+ phantom_stream->timing.v_front_porch = phantom_stream_descriptor->timing.v_front_porch;
+ phantom_stream->timing.v_addressable = phantom_stream_descriptor->timing.v_active;
+ phantom_stream->timing.v_total = phantom_stream_descriptor->timing.v_total;
+ phantom_stream->timing.flags.DSC = 0; // phantom always has DSC disabled
+
+ phantom_stream->dst.y = 0;
+ phantom_stream->dst.height = stream_programming->phantom_stream.descriptor.timing.v_active;
+
+ phantom_stream->src.y = 0;
+ phantom_stream->src.height = (double)phantom_stream_descriptor->timing.v_active * (double)main_stream->src.height / (double)main_stream->dst.height;
+
+ phantom_stream->use_dynamic_meta = false;
+
+ dml_ctx->config.svp_pstate.callbacks.add_phantom_stream(dc, context, phantom_stream, main_stream);
+
+ return phantom_stream;
+}
+
+static struct dc_plane_state *dml21_add_phantom_plane(struct dml2_context *dml_ctx,
+ const struct dc *dc,
+ struct dc_state *context,
+ struct dc_stream_state *phantom_stream,
+ struct dc_plane_state *main_plane,
+ struct dml2_per_plane_programming *plane_programming)
+{
+ struct dc_plane_state *phantom_plane;
+
+ phantom_plane = dml_ctx->config.svp_pstate.callbacks.create_phantom_plane(dc, context, main_plane);
+ if (!phantom_plane)
+ return NULL;
+
+ phantom_plane->format = main_plane->format;
+ phantom_plane->rotation = main_plane->rotation;
+ phantom_plane->visible = main_plane->visible;
+
+ memcpy(&phantom_plane->address, &main_plane->address, sizeof(phantom_plane->address));
+ memcpy(&phantom_plane->scaling_quality, &main_plane->scaling_quality,
+ sizeof(phantom_plane->scaling_quality));
+ memcpy(&phantom_plane->src_rect, &main_plane->src_rect, sizeof(phantom_plane->src_rect));
+ memcpy(&phantom_plane->dst_rect, &main_plane->dst_rect, sizeof(phantom_plane->dst_rect));
+ memcpy(&phantom_plane->clip_rect, &main_plane->clip_rect, sizeof(phantom_plane->clip_rect));
+ memcpy(&phantom_plane->plane_size, &main_plane->plane_size,
+ sizeof(phantom_plane->plane_size));
+ memcpy(&phantom_plane->tiling_info, &main_plane->tiling_info,
+ sizeof(phantom_plane->tiling_info));
+ memcpy(&phantom_plane->dcc, &main_plane->dcc, sizeof(phantom_plane->dcc));
+
+ phantom_plane->format = main_plane->format;
+ phantom_plane->rotation = main_plane->rotation;
+ phantom_plane->visible = main_plane->visible;
+
+ /* Shadow pipe has small viewport. */
+ phantom_plane->clip_rect.y = 0;
+ phantom_plane->clip_rect.height = phantom_stream->src.height;
+
+ dml_ctx->config.svp_pstate.callbacks.add_phantom_plane(dc, phantom_stream, phantom_plane, context);
+
+ return phantom_plane;
+}
+
+void dml21_handle_phantom_streams_planes(const struct dc *dc, struct dc_state *context, struct dml2_context *dml_ctx)
+{
+ unsigned int dml_stream_index, dml_plane_index, dc_plane_index;
+ struct dc_stream_state *main_stream;
+ struct dc_stream_status *main_stream_status;
+ struct dc_stream_state *phantom_stream;
+ struct dc_plane_state *main_plane;
+ bool phantoms_added = false;
+
+ /* create phantom streams and planes and add to context */
+ for (dml_stream_index = 0; dml_stream_index < dml_ctx->v21.mode_programming.programming->display_config.num_streams; dml_stream_index++) {
+ /* iterate through DML streams looking for phantoms */
+ if (dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_index].phantom_stream.enabled) {
+ /* find associated dc stream */
+ main_stream = dml_ctx->config.callbacks.get_stream_from_id(context,
+ dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[dml_stream_index]);
+
+ main_stream_status = dml_ctx->config.callbacks.get_stream_status(context, main_stream);
+
+ if (!main_stream_status || main_stream_status->plane_count == 0)
+ continue;
+
+ /* create phantom stream for subvp enabled stream */
+ phantom_stream = dml21_add_phantom_stream(dml_ctx,
+ dc,
+ context,
+ main_stream,
+ &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_index]);
+
+ if (!phantom_stream)
+ continue;
+
+ /* iterate through DML planes associated with this stream */
+ for (dml_plane_index = 0; dml_plane_index < dml_ctx->v21.mode_programming.programming->display_config.num_planes; dml_plane_index++) {
+ if (dml_ctx->v21.mode_programming.programming->plane_programming[dml_plane_index].plane_descriptor->stream_index == dml_stream_index) {
+ /* find associated dc plane */
+ dc_plane_index = dml21_get_dc_plane_idx_from_plane_id(dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[dml_plane_index]);
+ main_plane = main_stream_status->plane_states[dc_plane_index];
+
+ /* create phantom planes for subvp enabled plane */
+ dml21_add_phantom_plane(dml_ctx,
+ dc,
+ context,
+ phantom_stream,
+ main_plane,
+ &dml_ctx->v21.mode_programming.programming->plane_programming[dml_plane_index]);
+
+ phantoms_added = true;
+ }
+ }
+ }
+ }
+
+ if (phantoms_added)
+ dml2_map_dc_pipes(dml_ctx, context, NULL, &dml_ctx->v21.dml_to_dc_pipe_mapping, dc->current_state);
+}
+
+void dml21_build_fams2_programming(const struct dc *dc,
+ struct dc_state *context,
+ struct dml2_context *dml_ctx)
+{
+ int i, j, k;
+ unsigned int num_fams2_streams = 0;
+
+ /* reset fams2 data */
+ memset(&context->bw_ctx.bw.dcn.fams2_stream_base_params, 0, sizeof(union dmub_cmd_fams2_config) * DML2_MAX_PLANES);
+ memset(&context->bw_ctx.bw.dcn.fams2_stream_sub_params, 0, sizeof(union dmub_cmd_fams2_config) * DML2_MAX_PLANES);
+ memset(&context->bw_ctx.bw.dcn.fams2_stream_sub_params_v2, 0, sizeof(union dmub_fams2_stream_static_sub_state_v2) * DML2_MAX_PLANES);
+ memset(&context->bw_ctx.bw.dcn.fams2_global_config, 0, sizeof(struct dmub_cmd_fams2_global_config));
+
+ if (dml_ctx->v21.mode_programming.programming->fams2_required) {
+ for (i = 0; i < context->stream_count; i++) {
+ int dml_stream_idx;
+ struct dc_stream_state *phantom_stream;
+ struct dc_stream_status *phantom_status;
+ enum fams2_stream_type type = 0;
+
+ union dmub_cmd_fams2_config *static_base_state = &context->bw_ctx.bw.dcn.fams2_stream_base_params[num_fams2_streams];
+ union dmub_cmd_fams2_config *static_sub_state = &context->bw_ctx.bw.dcn.fams2_stream_sub_params[num_fams2_streams];
+
+ struct dc_stream_state *stream = context->streams[i];
+
+ if (context->stream_status[i].plane_count == 0 ||
+ dml_ctx->config.svp_pstate.callbacks.get_stream_subvp_type(context, stream) == SUBVP_PHANTOM) {
+ /* can ignore blanked or phantom streams */
+ continue;
+ }
+
+ dml_stream_idx = dml21_helper_find_dml_pipe_idx_by_stream_id(dml_ctx, stream->stream_id);
+ if (dml_stream_idx < 0) {
+ ASSERT(dml_stream_idx >= 0);
+ continue;
+ }
+
+ /* copy static state from PMO */
+ memcpy(static_base_state,
+ &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_base_params,
+ sizeof(union dmub_cmd_fams2_config));
+
+ if (dc->debug.fams_version.major == 3) {
+ memcpy(&context->bw_ctx.bw.dcn.fams2_stream_sub_params_v2[num_fams2_streams],
+ &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_sub_params_v2,
+ sizeof(union dmub_fams2_stream_static_sub_state_v2));
+ } else {
+ memcpy(static_sub_state,
+ &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_sub_params,
+ sizeof(union dmub_cmd_fams2_config));
+ }
+
+ switch (dc->debug.fams_version.minor) {
+ case 1:
+ default:
+ type = static_base_state->stream_v1.base.type;
+
+ /* get information from context */
+ static_base_state->stream_v1.base.num_planes = context->stream_status[i].plane_count;
+ static_base_state->stream_v1.base.otg_inst = context->stream_status[i].primary_otg_inst;
+
+ /* populate pipe masks for planes */
+ for (j = 0; j < context->stream_status[i].plane_count; j++) {
+ for (k = 0; k < dc->res_pool->pipe_count; k++) {
+ if (context->res_ctx.pipe_ctx[k].stream &&
+ context->res_ctx.pipe_ctx[k].stream->stream_id == stream->stream_id &&
+ context->res_ctx.pipe_ctx[k].plane_state == context->stream_status[i].plane_states[j]) {
+ static_base_state->stream_v1.base.pipe_mask |= (1 << k);
+ static_base_state->stream_v1.base.plane_pipe_masks[j] |= (1 << k);
+ }
+ }
+ }
+ }
+
+
+ /* get per method programming */
+ switch (type) {
+ case FAMS2_STREAM_TYPE_VBLANK:
+ case FAMS2_STREAM_TYPE_VACTIVE:
+ case FAMS2_STREAM_TYPE_DRR:
+ break;
+ case FAMS2_STREAM_TYPE_SUBVP:
+ phantom_stream = dml_ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, stream);
+ if (!phantom_stream)
+ break;
+
+ phantom_status = dml_ctx->config.callbacks.get_stream_status(context, phantom_stream);
+
+ /* phantom status should always be present */
+ ASSERT(phantom_status);
+ if (!phantom_status)
+ break;
+
+ switch (dc->debug.fams_version.minor) {
+ case 1:
+ default:
+ static_sub_state->stream_v1.sub_state.subvp.phantom_otg_inst = phantom_status->primary_otg_inst;
+
+ /* populate pipe masks for phantom planes */
+ for (j = 0; j < phantom_status->plane_count; j++) {
+ for (k = 0; k < dc->res_pool->pipe_count; k++) {
+ if (context->res_ctx.pipe_ctx[k].stream &&
+ context->res_ctx.pipe_ctx[k].stream->stream_id == phantom_stream->stream_id &&
+ context->res_ctx.pipe_ctx[k].plane_state == phantom_status->plane_states[j]) {
+ switch (dc->debug.fams_version.minor) {
+ case 1:
+ default:
+ static_sub_state->stream_v1.sub_state.subvp.phantom_pipe_mask |= (1 << k);
+ static_sub_state->stream_v1.sub_state.subvp.phantom_plane_pipe_masks[j] |= (1 << k);
+ }
+ }
+ }
+ }
+ }
+ break;
+ default:
+ ASSERT(false);
+ break;
+ }
+
+ num_fams2_streams++;
+ }
+ }
+
+ if (num_fams2_streams > 0) {
+ /* copy FAMS2 configuration */
+ memcpy(&context->bw_ctx.bw.dcn.fams2_global_config,
+ &dml_ctx->v21.mode_programming.programming->fams2_global_config,
+ sizeof(struct dmub_cmd_fams2_global_config));
+
+ context->bw_ctx.bw.dcn.fams2_global_config.num_streams = num_fams2_streams;
+ }
+
+ context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = context->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable;
+}
+
+bool dml21_is_plane1_enabled(enum dml2_source_format_class source_format)
+{
+ return source_format >= dml2_420_8 && source_format <= dml2_rgbe_alpha;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.h
new file mode 100644
index 000000000000..4bff52eaaef8
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.h
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+
+#ifndef _DML21_UTILS_H_
+#define _DML21_UTILS_H_
+
+struct dc_state;
+struct dc_plane_state;
+struct pipe_ctx;
+
+struct dml2_context;
+struct dml2_display_rq_regs;
+struct dml2_display_dlg_regs;
+struct dml2_display_ttu_regs;
+
+int dml21_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id);
+int dml21_find_dml_pipe_idx_by_plane_id(struct dml2_context *ctx, unsigned int plane_id);
+bool dml21_get_plane_id(const struct dc_state *state, const struct dc_plane_state *plane, unsigned int *plane_id);
+void dml21_pipe_populate_global_sync(struct dml2_context *dml_ctx,
+ struct dc_state *context,
+ struct pipe_ctx *pipe_ctx,
+ struct dml2_per_stream_programming *stream_programming);
+void dml21_populate_mall_allocation_size(struct dc_state *context,
+ struct dml2_context *in_ctx,
+ struct dml2_per_plane_programming *pln_prog,
+ struct pipe_ctx *dc_pipe);
+bool check_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx);
+void find_valid_pipe_idx_for_stream_index(const struct dml2_context *dml_ctx, unsigned int *dml_pipe_idx, unsigned int stream_index);
+void find_pipe_regs_idx(const struct dml2_context *dml_ctx,
+ struct pipe_ctx *pipe, unsigned int *pipe_regs_idx);
+int dml21_find_dc_pipes_for_plane(const struct dc *in_dc,
+ struct dc_state *context,
+ struct dml2_context *dml_ctx,
+ struct pipe_ctx *dc_main_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__],
+ struct pipe_ctx *dc_phantom_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__],
+ int dml_plane_idx);
+void dml21_program_dc_pipe(struct dml2_context *dml_ctx,
+ struct dc_state *context,
+ struct pipe_ctx *pipe_ctx,
+ struct dml2_per_plane_programming *pln_prog,
+ struct dml2_per_stream_programming *stream_prog);
+void dml21_handle_phantom_streams_planes(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx);
+unsigned int dml21_get_dc_plane_idx_from_plane_id(unsigned int plane_id);
+void dml21_build_fams2_programming(const struct dc *dc,
+ struct dc_state *context,
+ struct dml2_context *dml_ctx);
+bool dml21_is_plane1_enabled(enum dml2_source_format_class source_format);
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.c
new file mode 100644
index 000000000000..798abb2b2e67
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.c
@@ -0,0 +1,466 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dml2_internal_types.h"
+#include "dml_top.h"
+#include "dml2_core_dcn4_calcs.h"
+#include "dml2_internal_shared_types.h"
+#include "dml21_utils.h"
+#include "dml21_translation_helper.h"
+#include "dml2_dc_resource_mgmt.h"
+
+#define INVALID -1
+
+static bool dml21_allocate_memory(struct dml2_context **dml_ctx)
+{
+ *dml_ctx = vzalloc(sizeof(struct dml2_context));
+ if (!(*dml_ctx))
+ return false;
+
+ (*dml_ctx)->v21.dml_init.dml2_instance = vzalloc(sizeof(struct dml2_instance));
+ if (!((*dml_ctx)->v21.dml_init.dml2_instance))
+ return false;
+
+ (*dml_ctx)->v21.mode_support.dml2_instance = (*dml_ctx)->v21.dml_init.dml2_instance;
+ (*dml_ctx)->v21.mode_programming.dml2_instance = (*dml_ctx)->v21.dml_init.dml2_instance;
+
+ (*dml_ctx)->v21.mode_support.display_config = &(*dml_ctx)->v21.display_config;
+ (*dml_ctx)->v21.mode_programming.display_config = (*dml_ctx)->v21.mode_support.display_config;
+
+ (*dml_ctx)->v21.mode_programming.programming = vzalloc(sizeof(struct dml2_display_cfg_programming));
+ if (!((*dml_ctx)->v21.mode_programming.programming))
+ return false;
+
+ return true;
+}
+
+static void dml21_populate_configuration_options(const struct dc *in_dc,
+ struct dml2_context *dml_ctx,
+ const struct dml2_configuration_options *config)
+{
+ dml_ctx->config = *config;
+
+ /* UCLK P-State options */
+ if (in_dc->debug.dml21_force_pstate_method) {
+ dml_ctx->config.pmo.force_pstate_method_enable = true;
+ for (int i = 0; i < MAX_PIPES; i++)
+ dml_ctx->config.pmo.force_pstate_method_values[i] = in_dc->debug.dml21_force_pstate_method_values[i];
+ } else {
+ dml_ctx->config.pmo.force_pstate_method_enable = false;
+ }
+}
+
+static void dml21_init(const struct dc *in_dc, struct dml2_context *dml_ctx, const struct dml2_configuration_options *config)
+{
+
+ dml_ctx->architecture = dml2_architecture_21;
+
+ dml21_populate_configuration_options(in_dc, dml_ctx, config);
+
+ DC_FP_START();
+
+ dml21_populate_dml_init_params(&dml_ctx->v21.dml_init, &dml_ctx->config, in_dc);
+
+ dml2_initialize_instance(&dml_ctx->v21.dml_init);
+
+ DC_FP_END();
+}
+
+bool dml21_create(const struct dc *in_dc, struct dml2_context **dml_ctx, const struct dml2_configuration_options *config)
+{
+ /* Allocate memory for initializing DML21 instance */
+ if (!dml21_allocate_memory(dml_ctx))
+ return false;
+
+ dml21_init(in_dc, *dml_ctx, config);
+
+ return true;
+}
+
+void dml21_destroy(struct dml2_context *dml2)
+{
+ vfree(dml2->v21.dml_init.dml2_instance);
+ vfree(dml2->v21.mode_programming.programming);
+}
+
+static void dml21_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state,
+ struct dml2_context *in_ctx, unsigned int pipe_cnt)
+{
+ unsigned int dml_prog_idx = 0, dc_pipe_index = 0, num_dpps_required = 0;
+ struct dml2_per_plane_programming *pln_prog = NULL;
+ struct dml2_per_stream_programming *stream_prog = NULL;
+ struct pipe_ctx *dc_main_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__];
+ struct pipe_ctx *dc_phantom_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__] = {0};
+ int num_pipes;
+ unsigned int dml_phantom_prog_idx;
+
+ context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;
+
+ /* copy global DCHUBBUB arbiter registers */
+ memcpy(&context->bw_ctx.bw.dcn.arb_regs, &in_ctx->v21.mode_programming.programming->global_regs.arb_regs, sizeof(struct dml2_display_arb_regs));
+
+ /* legacy only */
+ context->bw_ctx.bw.dcn.compbuf_size_kb = (int)in_ctx->v21.mode_programming.programming->global_regs.arb_regs.compbuf_size * 64;
+
+ context->bw_ctx.bw.dcn.mall_ss_size_bytes = 0;
+ context->bw_ctx.bw.dcn.mall_ss_psr_active_size_bytes = 0;
+ context->bw_ctx.bw.dcn.mall_subvp_size_bytes = 0;
+
+ /* phantom's start after main planes */
+ dml_phantom_prog_idx = in_ctx->v21.mode_programming.programming->display_config.num_planes;
+
+ for (dml_prog_idx = 0; dml_prog_idx < DML2_MAX_PLANES; dml_prog_idx++) {
+ pln_prog = &in_ctx->v21.mode_programming.programming->plane_programming[dml_prog_idx];
+
+ if (!pln_prog->plane_descriptor)
+ continue;
+
+ stream_prog = &in_ctx->v21.mode_programming.programming->stream_programming[pln_prog->plane_descriptor->stream_index];
+ num_dpps_required = pln_prog->num_dpps_required;
+
+ if (num_dpps_required == 0) {
+ continue;
+ }
+ num_pipes = dml21_find_dc_pipes_for_plane(dc, context, in_ctx, dc_main_pipes, dc_phantom_pipes, dml_prog_idx);
+
+ if (num_pipes <= 0)
+ continue;
+
+ /* program each pipe */
+ for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) {
+ dml21_program_dc_pipe(in_ctx, context, dc_main_pipes[dc_pipe_index], pln_prog, stream_prog);
+
+ if (pln_prog->phantom_plane.valid && dc_phantom_pipes[dc_pipe_index]) {
+ dml21_program_dc_pipe(in_ctx, context, dc_phantom_pipes[dc_pipe_index], pln_prog, stream_prog);
+ }
+ }
+
+ /* copy per plane mcache allocation */
+ memcpy(&context->bw_ctx.bw.dcn.mcache_allocations[dml_prog_idx], &pln_prog->mcache_allocation, sizeof(struct dml2_mcache_surface_allocation));
+ if (pln_prog->phantom_plane.valid) {
+ memcpy(&context->bw_ctx.bw.dcn.mcache_allocations[dml_phantom_prog_idx],
+ &pln_prog->phantom_plane.mcache_allocation,
+ sizeof(struct dml2_mcache_surface_allocation));
+
+ dml_phantom_prog_idx++;
+ }
+ }
+
+ /* assign global clocks */
+ context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz;
+ context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz;
+ if (in_ctx->v21.dml_init.soc_bb.clk_table.dispclk.num_clk_values > 1) {
+ context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz =
+ in_ctx->v21.dml_init.soc_bb.clk_table.dispclk.clk_values_khz[in_ctx->v21.dml_init.soc_bb.clk_table.dispclk.num_clk_values] * 1000;
+ } else {
+ context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = in_ctx->v21.dml_init.soc_bb.clk_table.dispclk.clk_values_khz[0] * 1000;
+ }
+
+ if (in_ctx->v21.dml_init.soc_bb.clk_table.dppclk.num_clk_values > 1) {
+ context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz =
+ in_ctx->v21.dml_init.soc_bb.clk_table.dppclk.clk_values_khz[in_ctx->v21.dml_init.soc_bb.clk_table.dppclk.num_clk_values] * 1000;
+ } else {
+ context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = in_ctx->v21.dml_init.soc_bb.clk_table.dppclk.clk_values_khz[0] * 1000;
+ }
+
+ /* get global mall allocation */
+ if (dc->res_pool->funcs->calculate_mall_ways_from_bytes) {
+ context->bw_ctx.bw.dcn.clk.num_ways = dc->res_pool->funcs->calculate_mall_ways_from_bytes(dc, context->bw_ctx.bw.dcn.mall_subvp_size_bytes);
+ } else {
+ context->bw_ctx.bw.dcn.clk.num_ways = 0;
+ }
+}
+
+static void dml21_prepare_mcache_params(struct dml2_context *dml_ctx, struct dc_state *context, struct dc_mcache_params *mcache_params)
+{
+ int dc_plane_idx = 0;
+ int dml_prog_idx, stream_idx, plane_idx;
+ struct dml2_per_plane_programming *pln_prog = NULL;
+
+ for (stream_idx = 0; stream_idx < context->stream_count; stream_idx++) {
+ for (plane_idx = 0; plane_idx < context->stream_status[stream_idx].plane_count; plane_idx++) {
+ dml_prog_idx = map_plane_to_dml21_display_cfg(dml_ctx, context->streams[stream_idx]->stream_id, context->stream_status[stream_idx].plane_states[plane_idx], context);
+ if (dml_prog_idx == INVALID) {
+ continue;
+ }
+ pln_prog = &dml_ctx->v21.mode_programming.programming->plane_programming[dml_prog_idx];
+ mcache_params[dc_plane_idx].valid = pln_prog->mcache_allocation.valid;
+ mcache_params[dc_plane_idx].num_mcaches_plane0 = pln_prog->mcache_allocation.num_mcaches_plane0;
+ mcache_params[dc_plane_idx].num_mcaches_plane1 = pln_prog->mcache_allocation.num_mcaches_plane1;
+ mcache_params[dc_plane_idx].requires_dedicated_mall_mcache = pln_prog->mcache_allocation.requires_dedicated_mall_mcache;
+ mcache_params[dc_plane_idx].last_slice_sharing.plane0_plane1 = pln_prog->mcache_allocation.last_slice_sharing.plane0_plane1;
+ memcpy(mcache_params[dc_plane_idx].mcache_x_offsets_plane0,
+ pln_prog->mcache_allocation.mcache_x_offsets_plane0,
+ sizeof(int) * (DML2_MAX_MCACHES + 1));
+ memcpy(mcache_params[dc_plane_idx].mcache_x_offsets_plane1,
+ pln_prog->mcache_allocation.mcache_x_offsets_plane1,
+ sizeof(int) * (DML2_MAX_MCACHES + 1));
+ dc_plane_idx++;
+ }
+ }
+}
+
+static bool dml21_mode_check_and_programming(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx)
+{
+ bool result = false;
+ struct dml2_build_mode_programming_in_out *mode_programming = &dml_ctx->v21.mode_programming;
+ struct dc_mcache_params mcache_params[MAX_PLANES] = {0};
+
+ memset(&dml_ctx->v21.display_config, 0, sizeof(struct dml2_display_cfg));
+ memset(&dml_ctx->v21.dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping));
+ memset(&dml_ctx->v21.mode_programming.dml2_instance->scratch.build_mode_programming_locals.mode_programming_params, 0, sizeof(struct dml2_core_mode_programming_in_out));
+
+ if (!context)
+ return true;
+
+ if (context->stream_count == 0) {
+ dml21_build_fams2_programming(in_dc, context, dml_ctx);
+ return true;
+ }
+
+ /* scrub phantom's from current dc_state */
+ dml_ctx->config.svp_pstate.callbacks.remove_phantom_streams_and_planes(in_dc, context);
+ dml_ctx->config.svp_pstate.callbacks.release_phantom_streams_and_planes(in_dc, context);
+
+ /* Populate stream, plane mappings and other fields in display config. */
+ result = dml21_map_dc_state_into_dml_display_cfg(in_dc, context, dml_ctx);
+ if (!result)
+ return false;
+
+ DC_FP_START();
+ result = dml2_build_mode_programming(mode_programming);
+ DC_FP_END();
+ if (!result)
+ return false;
+
+ /* Check and map HW resources */
+ if (result && !dml_ctx->config.skip_hw_state_mapping) {
+ dml21_map_hw_resources(dml_ctx);
+ dml2_map_dc_pipes(dml_ctx, context, NULL, &dml_ctx->v21.dml_to_dc_pipe_mapping, in_dc->current_state);
+ /* if subvp phantoms are present, expand them into dc context */
+ dml21_handle_phantom_streams_planes(in_dc, context, dml_ctx);
+
+ if (in_dc->res_pool->funcs->program_mcache_pipe_config) {
+ //Prepare mcache params for each plane based on mcache output from DML
+ dml21_prepare_mcache_params(dml_ctx, context, mcache_params);
+
+ //populate mcache regs to each pipe
+ dml_ctx->config.callbacks.allocate_mcache(context, mcache_params);
+ }
+ }
+
+ /* Copy DML CLK, WM and REG outputs to bandwidth context */
+ if (result && !dml_ctx->config.skip_hw_state_mapping) {
+ dml21_calculate_rq_and_dlg_params(in_dc, context, &context->res_ctx, dml_ctx, in_dc->res_pool->pipe_count);
+ dml21_copy_clocks_to_dc_state(dml_ctx, context);
+ dml21_extract_watermark_sets(in_dc, &context->bw_ctx.bw.dcn.watermarks, dml_ctx);
+ dml21_build_fams2_programming(in_dc, context, dml_ctx);
+ }
+
+ return true;
+}
+
+static bool dml21_check_mode_support(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx)
+{
+ bool is_supported = false;
+ struct dml2_initialize_instance_in_out *dml_init = &dml_ctx->v21.dml_init;
+ struct dml2_check_mode_supported_in_out *mode_support = &dml_ctx->v21.mode_support;
+
+ memset(&dml_ctx->v21.display_config, 0, sizeof(struct dml2_display_cfg));
+ memset(&dml_ctx->v21.dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping));
+ memset(&dml_ctx->v21.mode_programming.dml2_instance->scratch.check_mode_supported_locals.mode_support_params, 0, sizeof(struct dml2_core_mode_support_in_out));
+
+ if (!context || context->stream_count == 0)
+ return true;
+
+ /* Scrub phantom's from current dc_state */
+ dml_ctx->config.svp_pstate.callbacks.remove_phantom_streams_and_planes(in_dc, context);
+ dml_ctx->config.svp_pstate.callbacks.release_phantom_streams_and_planes(in_dc, context);
+
+ mode_support->dml2_instance = dml_init->dml2_instance;
+ dml21_map_dc_state_into_dml_display_cfg(in_dc, context, dml_ctx);
+ dml_ctx->v21.mode_programming.dml2_instance->scratch.build_mode_programming_locals.mode_programming_params.programming = dml_ctx->v21.mode_programming.programming;
+ DC_FP_START();
+ is_supported = dml2_check_mode_supported(mode_support);
+ DC_FP_END();
+ if (!is_supported)
+ return false;
+
+ return true;
+}
+
+bool dml21_validate(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx,
+ enum dc_validate_mode validate_mode)
+{
+ bool out = false;
+
+ /* Use dml21_check_mode_support for DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX path */
+ if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING)
+ out = dml21_check_mode_support(in_dc, context, dml_ctx);
+ else
+ out = dml21_mode_check_and_programming(in_dc, context, dml_ctx);
+
+ return out;
+}
+
+void dml21_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx)
+{
+ unsigned int dml_prog_idx, dml_phantom_prog_idx, dc_pipe_index;
+ int num_pipes;
+ struct pipe_ctx *dc_main_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__];
+ struct pipe_ctx *dc_phantom_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__] = {0};
+
+ struct dml2_per_plane_programming *pln_prog = NULL;
+ struct dml2_plane_mcache_configuration_descriptor *mcache_config = NULL;
+ struct prepare_mcache_programming_locals *l = &dml_ctx->v21.scratch.prepare_mcache_locals;
+
+ if (context->stream_count == 0) {
+ return;
+ }
+
+ memset(&l->build_mcache_programming_params, 0, sizeof(struct dml2_build_mcache_programming_in_out));
+ l->build_mcache_programming_params.dml2_instance = dml_ctx->v21.dml_init.dml2_instance;
+
+ /* phantom's start after main planes */
+ dml_phantom_prog_idx = dml_ctx->v21.mode_programming.programming->display_config.num_planes;
+
+ /* Build mcache programming parameters per plane per pipe */
+ for (dml_prog_idx = 0; dml_prog_idx < dml_ctx->v21.mode_programming.programming->display_config.num_planes; dml_prog_idx++) {
+ pln_prog = &dml_ctx->v21.mode_programming.programming->plane_programming[dml_prog_idx];
+
+ mcache_config = &l->build_mcache_programming_params.mcache_configurations[dml_prog_idx];
+ memset(mcache_config, 0, sizeof(struct dml2_plane_mcache_configuration_descriptor));
+ mcache_config->plane_descriptor = pln_prog->plane_descriptor;
+ mcache_config->mcache_allocation = &context->bw_ctx.bw.dcn.mcache_allocations[dml_prog_idx];
+ mcache_config->num_pipes = pln_prog->num_dpps_required;
+ l->build_mcache_programming_params.num_configurations++;
+
+ if (pln_prog->num_dpps_required == 0) {
+ continue;
+ }
+
+ num_pipes = dml21_find_dc_pipes_for_plane(in_dc, context, dml_ctx, dc_main_pipes, dc_phantom_pipes, dml_prog_idx);
+ if (num_pipes <= 0 || dc_main_pipes[0]->stream == NULL ||
+ dc_main_pipes[0]->plane_state == NULL)
+ continue;
+
+ /* get config for each pipe */
+ for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) {
+ ASSERT(dc_main_pipes[dc_pipe_index]);
+ dml21_get_pipe_mcache_config(context, dc_main_pipes[dc_pipe_index], pln_prog, &mcache_config->pipe_configurations[dc_pipe_index]);
+ }
+
+ /* get config for each phantom pipe */
+ if (pln_prog->phantom_plane.valid &&
+ dc_phantom_pipes[0] &&
+ dc_main_pipes[0]->stream &&
+ dc_phantom_pipes[0]->plane_state) {
+ mcache_config = &l->build_mcache_programming_params.mcache_configurations[dml_phantom_prog_idx];
+ memset(mcache_config, 0, sizeof(struct dml2_plane_mcache_configuration_descriptor));
+ mcache_config->plane_descriptor = pln_prog->plane_descriptor;
+ mcache_config->mcache_allocation = &context->bw_ctx.bw.dcn.mcache_allocations[dml_phantom_prog_idx];
+ mcache_config->num_pipes = pln_prog->num_dpps_required;
+ l->build_mcache_programming_params.num_configurations++;
+
+ for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) {
+ ASSERT(dc_phantom_pipes[dc_pipe_index]);
+ dml21_get_pipe_mcache_config(context, dc_phantom_pipes[dc_pipe_index], pln_prog, &mcache_config->pipe_configurations[dc_pipe_index]);
+ }
+
+ /* increment phantom index */
+ dml_phantom_prog_idx++;
+ }
+ }
+
+ /* Call to generate mcache programming per plane per pipe for the given display configuration */
+ dml2_build_mcache_programming(&l->build_mcache_programming_params);
+
+ /* get per plane per pipe mcache programming */
+ for (dml_prog_idx = 0; dml_prog_idx < dml_ctx->v21.mode_programming.programming->display_config.num_planes; dml_prog_idx++) {
+ pln_prog = &dml_ctx->v21.mode_programming.programming->plane_programming[dml_prog_idx];
+
+ num_pipes = dml21_find_dc_pipes_for_plane(in_dc, context, dml_ctx, dc_main_pipes, dc_phantom_pipes, dml_prog_idx);
+ if (num_pipes <= 0 || dc_main_pipes[0]->stream == NULL ||
+ dc_main_pipes[0]->plane_state == NULL)
+ continue;
+
+ /* get config for each pipe */
+ for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) {
+ ASSERT(dc_main_pipes[dc_pipe_index]);
+ if (l->build_mcache_programming_params.per_plane_pipe_mcache_regs[dml_prog_idx][dc_pipe_index]) {
+ memcpy(&dc_main_pipes[dc_pipe_index]->mcache_regs,
+ l->build_mcache_programming_params.per_plane_pipe_mcache_regs[dml_prog_idx][dc_pipe_index],
+ sizeof(struct dml2_hubp_pipe_mcache_regs));
+ }
+ }
+
+ /* get config for each phantom pipe */
+ if (pln_prog->phantom_plane.valid &&
+ dc_phantom_pipes[0] &&
+ dc_main_pipes[0]->stream &&
+ dc_phantom_pipes[0]->plane_state) {
+ for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) {
+ ASSERT(dc_phantom_pipes[dc_pipe_index]);
+ if (l->build_mcache_programming_params.per_plane_pipe_mcache_regs[dml_phantom_prog_idx][dc_pipe_index]) {
+ memcpy(&dc_phantom_pipes[dc_pipe_index]->mcache_regs,
+ l->build_mcache_programming_params.per_plane_pipe_mcache_regs[dml_phantom_prog_idx][dc_pipe_index],
+ sizeof(struct dml2_hubp_pipe_mcache_regs));
+ }
+ }
+ /* increment phantom index */
+ dml_phantom_prog_idx++;
+ }
+ }
+}
+
+void dml21_copy(struct dml2_context *dst_dml_ctx,
+ struct dml2_context *src_dml_ctx)
+{
+ /* Preserve references to internals */
+ struct dml2_instance *dst_dml2_instance = dst_dml_ctx->v21.dml_init.dml2_instance;
+ struct dml2_display_cfg_programming *dst_dml2_programming = dst_dml_ctx->v21.mode_programming.programming;
+
+ /* Copy context */
+ memcpy(dst_dml_ctx, src_dml_ctx, sizeof(struct dml2_context));
+
+ /* Copy Internals */
+ memcpy(dst_dml2_instance, src_dml_ctx->v21.dml_init.dml2_instance, sizeof(struct dml2_instance));
+ memcpy(dst_dml2_programming, src_dml_ctx->v21.mode_programming.programming, sizeof(struct dml2_display_cfg_programming));
+
+ /* Restore references to internals */
+ dst_dml_ctx->v21.dml_init.dml2_instance = dst_dml2_instance;
+
+ dst_dml_ctx->v21.mode_support.dml2_instance = dst_dml2_instance;
+ dst_dml_ctx->v21.mode_programming.dml2_instance = dst_dml2_instance;
+
+ dst_dml_ctx->v21.mode_support.display_config = &dst_dml_ctx->v21.display_config;
+ dst_dml_ctx->v21.mode_programming.display_config = dst_dml_ctx->v21.mode_support.display_config;
+
+ dst_dml_ctx->v21.mode_programming.programming = dst_dml2_programming;
+
+ DC_FP_START();
+
+ /* need to initialize copied instance for internal references to be correct */
+ dml2_initialize_instance(&dst_dml_ctx->v21.dml_init);
+
+ DC_FP_END();
+}
+
+bool dml21_create_copy(struct dml2_context **dst_dml_ctx,
+ struct dml2_context *src_dml_ctx)
+{
+ /* Allocate memory for initializing DML21 instance */
+ if (!dml21_allocate_memory(dst_dml_ctx))
+ return false;
+
+ dml21_copy(*dst_dml_ctx, src_dml_ctx);
+
+ return true;
+}
+
+void dml21_reinit(const struct dc *in_dc, struct dml2_context *dml_ctx, const struct dml2_configuration_options *config)
+{
+ dml21_init(in_dc, dml_ctx, config);
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.h
new file mode 100644
index 000000000000..15f92029d2e5
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.h
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+
+#ifndef _DML21_WRAPPER_H_
+#define _DML21_WRAPPER_H_
+
+#include "os_types.h"
+#include "dml_top_soc_parameter_types.h"
+#include "dml_top_display_cfg_types.h"
+
+struct dc;
+struct dc_state;
+struct dml2_configuration_options;
+struct dml2_context;
+enum dc_validate_mode;
+
+/**
+ * dml2_create - Creates dml21_context.
+ * @in_dc: dc.
+ * @dml2: Created dml21 context.
+ * @config: dml21 configuration options.
+ *
+ * Create of DML21 is done as part of dc_state creation.
+ * DML21 IP, SOC and STATES are initialized at
+ * creation time.
+ *
+ * Return: True if dml2 is successfully created, false otherwise.
+ */
+bool dml21_create(const struct dc *in_dc, struct dml2_context **dml_ctx, const struct dml2_configuration_options *config);
+void dml21_destroy(struct dml2_context *dml2);
+void dml21_copy(struct dml2_context *dst_dml_ctx,
+ struct dml2_context *src_dml_ctx);
+bool dml21_create_copy(struct dml2_context **dst_dml_ctx,
+ struct dml2_context *src_dml_ctx);
+void dml21_reinit(const struct dc *in_dc, struct dml2_context *dml_ctx, const struct dml2_configuration_options *config);
+
+/**
+ * dml21_validate - Determines if a display configuration is supported or not.
+ * @in_dc: dc.
+ * @context: dc_state to be validated.
+ * @validate_mode: DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX
+ * will not populate context.res_ctx.
+ *
+ * Based on fast_validate option internally would call:
+ *
+ * -dml21_mode_check_and_programming - for DC_VALIDATE_MODE_AND_PROGRAMMING option
+ * Calculates if dc_state can be supported on the input display
+ * configuration. If supported, generates the necessary HW
+ * programming for the new dc_state.
+ *
+ * -dml21_check_mode_support - for DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX option
+ * Calculates if dc_state can be supported for the input display
+ * config.
+
+ * Context: Two threads may not invoke this function concurrently unless they reference
+ * separate dc_states for validation.
+ * Return: True if mode is supported, false otherwise.
+ */
+bool dml21_validate(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx,
+ enum dc_validate_mode validate_mode);
+
+/* Prepare hubp mcache_regs for hubp mcache ID and split coordinate programming */
+void dml21_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx);
+
+/* Structure for inputting external SOCBB and DCNIP values for tool based debugging. */
+struct socbb_ip_params_external {
+ struct dml2_ip_capabilities ip_params;
+ struct dml2_soc_bb soc_bb;
+};
+
+/*mcache parameters decided by dml*/
+struct dc_mcache_params {
+ bool valid;
+ /*
+ * For iMALL, dedicated mall mcaches are required (sharing of last
+ * slice possible), for legacy phantom or phantom without return
+ * the only mall mcaches need to be valid.
+ */
+ bool requires_dedicated_mall_mcache;
+ unsigned int num_mcaches_plane0;
+ unsigned int num_mcaches_plane1;
+ /*
+ * Generally, plane0/1 slices must use a disjoint set of caches
+ * but in some cases the final segement of the two planes can
+ * use the same cache. If plane0_plane1 is set, then this is
+ * allowed.
+ *
+ * Similarly, the caches allocated to MALL prefetcher are generally
+ * disjoint, but if mall_prefetch is set, then the final segment
+ * between the main and the mall pixel requestor can use the same
+ * cache.
+ *
+ * Note that both bits may be set at the same time.
+ */
+ struct {
+ bool mall_comb_mcache_p0;
+ bool mall_comb_mcache_p1;
+ bool plane0_plane1;
+ } last_slice_sharing;
+ /*
+ * A plane is divided into vertical slices of mcaches,
+ * which wrap on the surface width.
+ *
+ * For example, if the surface width is 7680, and split into
+ * three slices of equal width, the boundary array would contain
+ * [2560, 5120, 7680]
+ *
+ * The assignments are
+ * 0 = [0 .. 2559]
+ * 1 = [2560 .. 5119]
+ * 2 = [5120 .. 7679]
+ * 0 = [7680 .. INF]
+ * The final element implicitly is the same as the first, and
+ * at first seems invalid since it is never referenced (since)
+ * it is outside the surface. However, its useful when shifting
+ * (see below).
+ *
+ * For any given valid mcache assignment, a shifted version, wrapped
+ * on the surface width boundary is also assumed to be valid.
+ *
+ * For example, shifting [2560, 5120, 7680] by -50 results in
+ * [2510, 5170, 7630].
+ *
+ * The assignments are now:
+ * 0 = [0 .. 2509]
+ * 1 = [2510 .. 5169]
+ * 2 = [5170 .. 7629]
+ * 0 = [7630 .. INF]
+ */
+ int mcache_x_offsets_plane0[DML2_MAX_MCACHES + 1];
+ int mcache_x_offsets_plane1[DML2_MAX_MCACHES + 1];
+};
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/bounding_boxes/dcn4_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/bounding_boxes/dcn4_soc_bb.h
new file mode 100644
index 000000000000..16a4f97bca4e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/bounding_boxes/dcn4_soc_bb.h
@@ -0,0 +1,372 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML_DML_DCN4_SOC_BB__
+#define __DML_DML_DCN4_SOC_BB__
+
+#include "dml_top_soc_parameter_types.h"
+
+static const struct dml2_soc_qos_parameters dml_dcn4_variant_a_soc_qos_params = {
+ .derate_table = {
+ .system_active_urgent = {
+ .dram_derate_percent_pixel = 22,
+ .dram_derate_percent_vm = 0,
+ .dram_derate_percent_pixel_and_vm = 0,
+ .fclk_derate_percent = 76,
+ .dcfclk_derate_percent = 100,
+ },
+ .system_active_average = {
+ .dram_derate_percent_pixel = 17,
+ .dram_derate_percent_vm = 0,
+ .dram_derate_percent_pixel_and_vm = 0,
+ .fclk_derate_percent = 57,
+ .dcfclk_derate_percent = 75,
+ },
+ .dcn_mall_prefetch_urgent = {
+ .dram_derate_percent_pixel = 40,
+ .dram_derate_percent_vm = 0,
+ .dram_derate_percent_pixel_and_vm = 0,
+ .fclk_derate_percent = 83,
+ .dcfclk_derate_percent = 100,
+ },
+ .dcn_mall_prefetch_average = {
+ .dram_derate_percent_pixel = 33,
+ .dram_derate_percent_vm = 0,
+ .dram_derate_percent_pixel_and_vm = 0,
+ .fclk_derate_percent = 62,
+ .dcfclk_derate_percent = 83,
+ },
+ .system_idle_average = {
+ .dram_derate_percent_pixel = 70,
+ .dram_derate_percent_vm = 0,
+ .dram_derate_percent_pixel_and_vm = 0,
+ .fclk_derate_percent = 83,
+ .dcfclk_derate_percent = 100,
+ },
+ },
+ .writeback = {
+ .base_latency_us = 12,
+ .scaling_factor_us = 0,
+ .scaling_factor_mhz = 0,
+ },
+ .qos_params = {
+ .dcn4x = {
+ .df_qos_response_time_fclk_cycles = 300,
+ .max_round_trip_to_furthest_cs_fclk_cycles = 350,
+ .mall_overhead_fclk_cycles = 50,
+ .meta_trip_adder_fclk_cycles = 36,
+ .average_transport_distance_fclk_cycles = 257,
+ .umc_urgent_ramp_latency_margin = 50,
+ .umc_max_latency_margin = 30,
+ .umc_average_latency_margin = 20,
+ .fabric_max_transport_latency_margin = 20,
+ .fabric_average_transport_latency_margin = 10,
+
+ .per_uclk_dpm_params = {
+ {
+ .minimum_uclk_khz = 97 * 1000,
+ .urgent_ramp_uclk_cycles = 472,
+ .trip_to_memory_uclk_cycles = 827,
+ .meta_trip_to_memory_uclk_cycles = 827,
+ .maximum_latency_when_urgent_uclk_cycles = 72,
+ .average_latency_when_urgent_uclk_cycles = 61,
+ .maximum_latency_when_non_urgent_uclk_cycles = 827,
+ .average_latency_when_non_urgent_uclk_cycles = 118,
+ },
+ },
+ },
+ },
+ .qos_type = dml2_qos_param_type_dcn4x,
+};
+
+static const struct dml2_soc_bb dml2_socbb_dcn401 = {
+ .clk_table = {
+ .uclk = {
+ .clk_values_khz = {97000},
+ .num_clk_values = 1,
+ },
+ .fclk = {
+ .clk_values_khz = {300000, 2500000},
+ .num_clk_values = 2,
+ },
+ .dcfclk = {
+ .clk_values_khz = {200000, 1564000},
+ .num_clk_values = 2,
+ },
+ .dispclk = {
+ .clk_values_khz = {100000, 2000000},
+ .num_clk_values = 2,
+ },
+ .dppclk = {
+ .clk_values_khz = {100000, 2000000},
+ .num_clk_values = 2,
+ },
+ .dtbclk = {
+ .clk_values_khz = {100000, 1564000},
+ .num_clk_values = 2,
+ },
+ .phyclk = {
+ .clk_values_khz = {810000, 810000},
+ .num_clk_values = 2,
+ },
+ .socclk = {
+ .clk_values_khz = {300000, 1200000},
+ .num_clk_values = 2,
+ },
+ .dscclk = {
+ .clk_values_khz = {666667, 666667},
+ .num_clk_values = 2,
+ },
+ .phyclk_d18 = {
+ .clk_values_khz = {625000, 625000},
+ .num_clk_values = 2,
+ },
+ .phyclk_d32 = {
+ .clk_values_khz = {625000, 625000},
+ .num_clk_values = 2,
+ },
+ .dram_config = {
+ .channel_width_bytes = 2,
+ .channel_count = 16,
+ .transactions_per_clock = 16,
+ },
+ },
+
+ .qos_parameters = {
+ .derate_table = {
+ .system_active_urgent = {
+ .dram_derate_percent_pixel = 22,
+ .dram_derate_percent_vm = 0,
+ .dram_derate_percent_pixel_and_vm = 0,
+ .fclk_derate_percent = 76,
+ .dcfclk_derate_percent = 100,
+ },
+ .system_active_average = {
+ .dram_derate_percent_pixel = 15,
+ .dram_derate_percent_vm = 0,
+ .dram_derate_percent_pixel_and_vm = 0,
+ .fclk_derate_percent = 57,
+ .dcfclk_derate_percent = 75,
+ },
+ .dcn_mall_prefetch_urgent = {
+ .dram_derate_percent_pixel = 40,
+ .dram_derate_percent_vm = 0,
+ .dram_derate_percent_pixel_and_vm = 0,
+ .fclk_derate_percent = 83,
+ .dcfclk_derate_percent = 100,
+ },
+ .dcn_mall_prefetch_average = {
+ .dram_derate_percent_pixel = 30,
+ .dram_derate_percent_vm = 0,
+ .dram_derate_percent_pixel_and_vm = 0,
+ .fclk_derate_percent = 62,
+ .dcfclk_derate_percent = 83,
+ },
+ .system_idle_average = {
+ .dram_derate_percent_pixel = 70,
+ .dram_derate_percent_vm = 0,
+ .dram_derate_percent_pixel_and_vm = 0,
+ .fclk_derate_percent = 83,
+ .dcfclk_derate_percent = 100,
+ },
+ },
+ .writeback = {
+ .base_latency_us = 0,
+ .scaling_factor_us = 0,
+ .scaling_factor_mhz = 0,
+ },
+ .qos_params = {
+ .dcn4x = {
+ .df_qos_response_time_fclk_cycles = 300,
+ .max_round_trip_to_furthest_cs_fclk_cycles = 350,
+ .mall_overhead_fclk_cycles = 50,
+ .meta_trip_adder_fclk_cycles = 36,
+ .average_transport_distance_fclk_cycles = 260,
+ .umc_urgent_ramp_latency_margin = 50,
+ .umc_max_latency_margin = 30,
+ .umc_average_latency_margin = 20,
+ .fabric_max_transport_latency_margin = 20,
+ .fabric_average_transport_latency_margin = 10,
+
+ .per_uclk_dpm_params = {
+ {
+ // State 1
+ .minimum_uclk_khz = 0,
+ .urgent_ramp_uclk_cycles = 472,
+ .trip_to_memory_uclk_cycles = 827,
+ .meta_trip_to_memory_uclk_cycles = 827,
+ .maximum_latency_when_urgent_uclk_cycles = 72,
+ .average_latency_when_urgent_uclk_cycles = 72,
+ .maximum_latency_when_non_urgent_uclk_cycles = 827,
+ .average_latency_when_non_urgent_uclk_cycles = 117,
+ },
+ {
+ // State 2
+ .minimum_uclk_khz = 0,
+ .urgent_ramp_uclk_cycles = 546,
+ .trip_to_memory_uclk_cycles = 848,
+ .meta_trip_to_memory_uclk_cycles = 848,
+ .maximum_latency_when_urgent_uclk_cycles = 146,
+ .average_latency_when_urgent_uclk_cycles = 146,
+ .maximum_latency_when_non_urgent_uclk_cycles = 848,
+ .average_latency_when_non_urgent_uclk_cycles = 133,
+ },
+ {
+ // State 3
+ .minimum_uclk_khz = 0,
+ .urgent_ramp_uclk_cycles = 564,
+ .trip_to_memory_uclk_cycles = 853,
+ .meta_trip_to_memory_uclk_cycles = 853,
+ .maximum_latency_when_urgent_uclk_cycles = 164,
+ .average_latency_when_urgent_uclk_cycles = 164,
+ .maximum_latency_when_non_urgent_uclk_cycles = 853,
+ .average_latency_when_non_urgent_uclk_cycles = 136,
+ },
+ {
+ // State 4
+ .minimum_uclk_khz = 0,
+ .urgent_ramp_uclk_cycles = 613,
+ .trip_to_memory_uclk_cycles = 869,
+ .meta_trip_to_memory_uclk_cycles = 869,
+ .maximum_latency_when_urgent_uclk_cycles = 213,
+ .average_latency_when_urgent_uclk_cycles = 213,
+ .maximum_latency_when_non_urgent_uclk_cycles = 869,
+ .average_latency_when_non_urgent_uclk_cycles = 149,
+ },
+ {
+ // State 5
+ .minimum_uclk_khz = 0,
+ .urgent_ramp_uclk_cycles = 632,
+ .trip_to_memory_uclk_cycles = 874,
+ .meta_trip_to_memory_uclk_cycles = 874,
+ .maximum_latency_when_urgent_uclk_cycles = 232,
+ .average_latency_when_urgent_uclk_cycles = 232,
+ .maximum_latency_when_non_urgent_uclk_cycles = 874,
+ .average_latency_when_non_urgent_uclk_cycles = 153,
+ },
+ {
+ // State 6
+ .minimum_uclk_khz = 0,
+ .urgent_ramp_uclk_cycles = 665,
+ .trip_to_memory_uclk_cycles = 885,
+ .meta_trip_to_memory_uclk_cycles = 885,
+ .maximum_latency_when_urgent_uclk_cycles = 265,
+ .average_latency_when_urgent_uclk_cycles = 265,
+ .maximum_latency_when_non_urgent_uclk_cycles = 885,
+ .average_latency_when_non_urgent_uclk_cycles = 161,
+ },
+ {
+ // State 7
+ .minimum_uclk_khz = 0,
+ .urgent_ramp_uclk_cycles = 689,
+ .trip_to_memory_uclk_cycles = 895,
+ .meta_trip_to_memory_uclk_cycles = 895,
+ .maximum_latency_when_urgent_uclk_cycles = 289,
+ .average_latency_when_urgent_uclk_cycles = 289,
+ .maximum_latency_when_non_urgent_uclk_cycles = 895,
+ .average_latency_when_non_urgent_uclk_cycles = 167,
+ },
+ {
+ // State 8
+ .minimum_uclk_khz = 0,
+ .urgent_ramp_uclk_cycles = 716,
+ .trip_to_memory_uclk_cycles = 902,
+ .meta_trip_to_memory_uclk_cycles = 902,
+ .maximum_latency_when_urgent_uclk_cycles = 316,
+ .average_latency_when_urgent_uclk_cycles = 316,
+ .maximum_latency_when_non_urgent_uclk_cycles = 902,
+ .average_latency_when_non_urgent_uclk_cycles = 174,
+ },
+ },
+ },
+ },
+ .qos_type = dml2_qos_param_type_dcn4x,
+ },
+
+ .power_management_parameters = {
+ .dram_clk_change_blackout_us = 400,
+ .fclk_change_blackout_us = 0,
+ .g7_ppt_blackout_us = 0,
+ .stutter_enter_plus_exit_latency_us = 54,
+ .stutter_exit_latency_us = 41,
+ .z8_stutter_enter_plus_exit_latency_us = 0,
+ .z8_stutter_exit_latency_us = 0,
+ /*
+ .g6_temp_read_blackout_us = {
+ 23.00,
+ 10.00,
+ 10.00,
+ 8.00,
+ 8.00,
+ 5.00,
+ 5.00,
+ 5.00,
+ },
+ */
+ },
+
+ .vmin_limit = {
+ .dispclk_khz = 600 * 1000,
+ },
+
+ .dprefclk_mhz = 720,
+ .xtalclk_mhz = 100,
+ .pcie_refclk_mhz = 100,
+ .dchub_refclk_mhz = 50,
+ .mall_allocated_for_dcn_mbytes = 64,
+ .max_outstanding_reqs = 512,
+ .fabric_datapath_to_dcn_data_return_bytes = 64,
+ .return_bus_width_bytes = 64,
+ .hostvm_min_page_size_kbytes = 0,
+ .gpuvm_min_page_size_kbytes = 256,
+ .phy_downspread_percent = 0.38,
+ .dcn_downspread_percent = 0.38,
+ .dispclk_dppclk_vco_speed_mhz = 4500,
+ .do_urgent_latency_adjustment = 0,
+ .mem_word_bytes = 32,
+ .num_dcc_mcaches = 8,
+ .mcache_size_bytes = 2048,
+ .mcache_line_size_bytes = 32,
+ .max_fclk_for_uclk_dpm_khz = 1250 * 1000,
+};
+
+static const struct dml2_ip_capabilities dml2_dcn401_max_ip_caps = {
+ .pipe_count = 4,
+ .otg_count = 4,
+ .num_dsc = 4,
+ .max_num_dp2p0_streams = 4,
+ .max_num_hdmi_frl_outputs = 1,
+ .max_num_dp2p0_outputs = 4,
+ .rob_buffer_size_kbytes = 192,
+ .config_return_buffer_size_in_kbytes = 1344,
+ .config_return_buffer_segment_size_in_kbytes = 64,
+ .meta_fifo_size_in_kentries = 22,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .cursor_buffer_size = 24,
+ .max_flip_time_us = 80,
+ .max_flip_time_lines = 32,
+ .hostvm_mode = 0,
+ .subvp_drr_scheduling_margin_us = 100,
+ .subvp_prefetch_end_to_mall_start_us = 15,
+ .subvp_fw_processing_delay = 15,
+ .max_vactive_det_fill_delay_us = 400,
+
+ .fams2 = {
+ .max_allow_delay_us = 100 * 1000,
+ .scheduling_delay_us = 550,
+ .vertical_interrupt_ack_delay_us = 40,
+ .allow_programming_delay_us = 18,
+ .min_allow_width_us = 20,
+ .subvp_df_throttle_delay_us = 100,
+ .subvp_programming_delay_us = 200,
+ .subvp_prefetch_to_mall_delay_us = 18,
+ .drr_programming_delay_us = 35,
+
+ .lock_timeout_us = 5000,
+ .recovery_timeout_us = 5000,
+ .flip_programming_delay_us = 300,
+ },
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml2_external_lib_deps.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml2_external_lib_deps.h
new file mode 100644
index 000000000000..281d7ad230d8
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml2_external_lib_deps.h
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML2_EXTERNAL_LIB_DEPS__
+#define __DML2_EXTERNAL_LIB_DEPS__
+
+#include "os_types.h"
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top.h
new file mode 100644
index 000000000000..a64ec4dcf11a
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top.h
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML_TOP_H__
+#define __DML_TOP_H__
+
+#include "dml_top_types.h"
+
+/*
+ * Top Level Interface for DML2
+ */
+
+/*
+ * Returns the size of the DML instance for the caller to allocate
+ */
+unsigned int dml2_get_instance_size_bytes(void);
+
+/*
+ * Initializes the DML instance (i.e. with configuration, soc BB, IP params, etc...)
+ */
+bool dml2_initialize_instance(struct dml2_initialize_instance_in_out *in_out);
+
+/*
+ * Determines if the input mode is supported (boolean) on the SoC at all. Does not return
+ * information on how mode should be programmed.
+ */
+bool dml2_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out);
+
+/*
+ * Determines the full (optimized) programming for the input mode. Returns minimum
+ * clocks as well as dchub register programming values for all pipes, additional meta
+ * such as ODM or MPCC combine factors.
+ */
+bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_out);
+
+/*
+ * Determines the correct per pipe mcache register programming for a valid mode.
+ * The mcache allocation must have been calculated (successfully) in a previous
+ * call to dml2_build_mode_programming.
+ * The actual hubp viewport dimensions be what the actual registers will be
+ * programmed to (i.e. based on scaler setup).
+ */
+bool dml2_build_mcache_programming(struct dml2_build_mcache_programming_in_out *in_out);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_dchub_registers.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_dchub_registers.h
new file mode 100644
index 000000000000..bf57df42d1d9
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_dchub_registers.h
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __dml2_TOP_DCHUB_REGISTERS_H__
+#define __dml2_TOP_DCHUB_REGISTERS_H__
+
+#include "dml2_external_lib_deps.h"
+// These types are uint32_t as they represent actual calculated register values for HW
+
+struct dml2_display_dlg_regs {
+ uint32_t refcyc_h_blank_end;
+ uint32_t dlg_vblank_end;
+ uint32_t min_dst_y_next_start;
+ uint32_t refcyc_per_htotal;
+ uint32_t refcyc_x_after_scaler;
+ uint32_t dst_y_after_scaler;
+ uint32_t dst_y_prefetch;
+ uint32_t dst_y_per_vm_vblank;
+ uint32_t dst_y_per_row_vblank;
+ uint32_t dst_y_per_vm_flip;
+ uint32_t dst_y_per_row_flip;
+ uint32_t ref_freq_to_pix_freq;
+ uint32_t vratio_prefetch;
+ uint32_t vratio_prefetch_c;
+ uint32_t refcyc_per_tdlut_group;
+ uint32_t refcyc_per_pte_group_vblank_l;
+ uint32_t refcyc_per_pte_group_vblank_c;
+ uint32_t refcyc_per_pte_group_flip_l;
+ uint32_t refcyc_per_pte_group_flip_c;
+ uint32_t dst_y_per_pte_row_nom_l;
+ uint32_t dst_y_per_pte_row_nom_c;
+ uint32_t refcyc_per_pte_group_nom_l;
+ uint32_t refcyc_per_pte_group_nom_c;
+ uint32_t refcyc_per_line_delivery_pre_l;
+ uint32_t refcyc_per_line_delivery_pre_c;
+ uint32_t refcyc_per_line_delivery_l;
+ uint32_t refcyc_per_line_delivery_c;
+ uint32_t refcyc_per_vm_group_vblank;
+ uint32_t refcyc_per_vm_group_flip;
+ uint32_t refcyc_per_vm_req_vblank;
+ uint32_t refcyc_per_vm_req_flip;
+ uint32_t dst_y_offset_cur0;
+ uint32_t chunk_hdl_adjust_cur0;
+ uint32_t vready_after_vcount0;
+ uint32_t dst_y_delta_drq_limit;
+ uint32_t refcyc_per_vm_dmdata;
+ uint32_t dmdata_dl_delta;
+
+ // MRQ
+ uint32_t refcyc_per_meta_chunk_vblank_l;
+ uint32_t refcyc_per_meta_chunk_vblank_c;
+ uint32_t refcyc_per_meta_chunk_flip_l;
+ uint32_t refcyc_per_meta_chunk_flip_c;
+ uint32_t dst_y_per_meta_row_nom_l;
+ uint32_t dst_y_per_meta_row_nom_c;
+ uint32_t refcyc_per_meta_chunk_nom_l;
+ uint32_t refcyc_per_meta_chunk_nom_c;
+};
+
+struct dml2_display_ttu_regs {
+ uint32_t qos_level_low_wm;
+ uint32_t qos_level_high_wm;
+ uint32_t min_ttu_vblank;
+ uint32_t qos_level_flip;
+ uint32_t refcyc_per_req_delivery_l;
+ uint32_t refcyc_per_req_delivery_c;
+ uint32_t refcyc_per_req_delivery_cur0;
+ uint32_t refcyc_per_req_delivery_pre_l;
+ uint32_t refcyc_per_req_delivery_pre_c;
+ uint32_t refcyc_per_req_delivery_pre_cur0;
+ uint32_t qos_level_fixed_l;
+ uint32_t qos_level_fixed_c;
+ uint32_t qos_level_fixed_cur0;
+ uint32_t qos_ramp_disable_l;
+ uint32_t qos_ramp_disable_c;
+ uint32_t qos_ramp_disable_cur0;
+};
+
+struct dml2_display_arb_regs {
+ uint32_t max_req_outstanding;
+ uint32_t min_req_outstanding;
+ uint32_t sat_level_us;
+ uint32_t hvm_max_qos_commit_threshold;
+ uint32_t hvm_min_req_outstand_commit_threshold;
+ uint32_t compbuf_reserved_space_kbytes;
+ uint32_t compbuf_size;
+ uint32_t sdpif_request_rate_limit;
+ uint32_t allow_sdpif_rate_limit_when_cstate_req;
+ uint32_t dcfclk_deep_sleep_hysteresis;
+ uint32_t pstate_stall_threshold;
+};
+
+struct dml2_cursor_dlg_regs{
+ uint32_t dst_x_offset; // CURSOR0_DST_X_OFFSET
+ uint32_t dst_y_offset; // CURSOR0_DST_Y_OFFSET
+ uint32_t chunk_hdl_adjust; // CURSOR0_CHUNK_HDL_ADJUST
+
+ uint32_t qos_level_fixed;
+ uint32_t qos_ramp_disable;
+};
+
+struct dml2_display_plane_rq_regs {
+ uint32_t chunk_size;
+ uint32_t min_chunk_size;
+ uint32_t dpte_group_size;
+ uint32_t mpte_group_size;
+ uint32_t swath_height;
+ uint32_t pte_row_height_linear;
+
+ // MRQ
+ uint32_t meta_chunk_size;
+ uint32_t min_meta_chunk_size;
+};
+
+struct dml2_display_rq_regs {
+ struct dml2_display_plane_rq_regs rq_regs_l;
+ struct dml2_display_plane_rq_regs rq_regs_c;
+ uint32_t drq_expansion_mode;
+ uint32_t prq_expansion_mode;
+ uint32_t crq_expansion_mode;
+ uint32_t plane1_base_address;
+ uint32_t unbounded_request_enabled;
+ bool pte_buffer_mode;
+ bool force_one_row_for_frame;
+
+ // MRQ
+ uint32_t mrq_expansion_mode;
+};
+
+struct dml2_display_mcache_regs {
+ uint32_t mcache_id_first;
+ uint32_t mcache_id_second;
+ uint32_t split_location;
+};
+
+struct dml2_hubp_pipe_mcache_regs {
+ struct {
+ struct dml2_display_mcache_regs p0;
+ struct dml2_display_mcache_regs p1;
+ } main;
+ struct {
+ struct dml2_display_mcache_regs p0;
+ struct dml2_display_mcache_regs p1;
+ } mall;
+};
+
+struct dml2_dchub_per_pipe_register_set {
+ struct dml2_display_rq_regs rq_regs;
+ struct dml2_display_ttu_regs ttu_regs;
+ struct dml2_display_dlg_regs dlg_regs;
+
+ uint32_t det_size;
+};
+
+struct dml2_dchub_watermark_regs {
+ /* watermarks */
+ uint32_t urgent;
+ uint32_t sr_enter;
+ uint32_t sr_exit;
+ uint32_t sr_enter_z8;
+ uint32_t sr_exit_z8;
+ uint32_t sr_enter_low_power;
+ uint32_t sr_exit_low_power;
+ uint32_t uclk_pstate;
+ uint32_t fclk_pstate;
+ uint32_t temp_read_or_ppt;
+ uint32_t usr;
+ /* qos */
+ uint32_t refcyc_per_trip_to_mem;
+ uint32_t refcyc_per_meta_trip_to_mem;
+ uint32_t frac_urg_bw_flip;
+ uint32_t frac_urg_bw_nom;
+ uint32_t frac_urg_bw_mall;
+};
+
+enum dml2_dchub_watermark_reg_set_index {
+ DML2_DCHUB_WATERMARK_SET_A = 0,
+ DML2_DCHUB_WATERMARK_SET_B = 1,
+ DML2_DCHUB_WATERMARK_SET_C = 2,
+ DML2_DCHUB_WATERMARK_SET_D = 3,
+ DML2_DCHUB_WATERMARK_SET_NUM = 4,
+};
+
+struct dml2_dchub_global_register_set {
+ struct dml2_display_arb_regs arb_regs;
+ struct dml2_dchub_watermark_regs wm_regs[DML2_DCHUB_WATERMARK_SET_NUM];
+ unsigned int num_watermark_sets;
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_display_cfg_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_display_cfg_types.h
new file mode 100644
index 000000000000..35aa954248cd
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_display_cfg_types.h
@@ -0,0 +1,526 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML_TOP_DISPLAY_CFG_TYPES_H__
+#define __DML_TOP_DISPLAY_CFG_TYPES_H__
+
+#include "dml2_external_lib_deps.h"
+
+#define DML2_MAX_PLANES 8
+#define DML2_MAX_DCN_PIPES 8
+#define DML2_MAX_MCACHES 8 // assume plane is going to be supported by a max of 8 mcaches
+#define DML2_MAX_WRITEBACK 3
+
+enum dml2_swizzle_mode {
+ dml2_sw_linear, // SW_LINEAR accepts 256 byte aligned pitch and also 128 byte aligned pitch if DCC is not enabled
+ dml2_sw_256b_2d,
+ dml2_sw_4kb_2d,
+ dml2_sw_64kb_2d,
+ dml2_sw_256kb_2d,
+
+ dml2_gfx11_sw_linear,
+ dml2_gfx11_sw_64kb_d,
+ dml2_gfx11_sw_64kb_d_t,
+ dml2_gfx11_sw_64kb_d_x,
+ dml2_gfx11_sw_64kb_r_x,
+ dml2_gfx11_sw_256kb_d_x,
+ dml2_gfx11_sw_256kb_r_x,
+
+};
+
+enum dml2_source_format_class {
+ dml2_444_8 = 0,
+ dml2_444_16 = 1,
+ dml2_444_32 = 2,
+ dml2_444_64 = 3,
+ dml2_420_8 = 4,
+ dml2_420_10 = 5,
+ dml2_420_12 = 6,
+ dml2_rgbe_alpha = 9,
+ dml2_rgbe = 10,
+ dml2_mono_8 = 11,
+ dml2_mono_16 = 12,
+ dml2_422_planar_8 = 13,
+ dml2_422_planar_10 = 14,
+ dml2_422_planar_12 = 15,
+ dml2_422_packed_8 = 16,
+ dml2_422_packed_10 = 17,
+ dml2_422_packed_12 = 18
+};
+
+enum dml2_sample_positioning {
+ dml2_interstitial = 0,
+ dml2_cosited = 1
+};
+
+enum dml2_rotation_angle {
+ dml2_rotation_0 = 0,
+ dml2_rotation_90 = 1,
+ dml2_rotation_180 = 2,
+ dml2_rotation_270 = 3
+};
+
+enum dml2_output_format_class {
+ dml2_444 = 0,
+ dml2_s422 = 1,
+ dml2_n422 = 2,
+ dml2_420 = 3
+};
+
+enum dml2_output_encoder_class {
+ dml2_dp = 0,
+ dml2_edp = 1,
+ dml2_dp2p0 = 2,
+ dml2_hdmi = 3,
+ dml2_hdmifrl = 4,
+ dml2_none = 5
+};
+
+enum dml2_output_link_dp_rate {
+ dml2_dp_rate_na = 0,
+ dml2_dp_rate_hbr = 1,
+ dml2_dp_rate_hbr2 = 2,
+ dml2_dp_rate_hbr3 = 3,
+ dml2_dp_rate_uhbr10 = 4,
+ dml2_dp_rate_uhbr13p5 = 5,
+ dml2_dp_rate_uhbr20 = 6
+};
+
+enum dml2_pstate_type {
+ dml2_pstate_type_uclk = 0,
+ dml2_pstate_type_fclk = 1,
+ dml2_pstate_type_ppt = 2,
+ dml2_pstate_type_temp_read = 3,
+ dml2_pstate_type_dummy_pstate = 4,
+ dml2_pstate_type_count = 5
+};
+
+enum dml2_uclk_pstate_change_strategy {
+ dml2_uclk_pstate_change_strategy_auto = 0,
+ dml2_uclk_pstate_change_strategy_force_vactive = 1,
+ dml2_uclk_pstate_change_strategy_force_vblank = 2,
+ dml2_uclk_pstate_change_strategy_force_drr = 3,
+ dml2_uclk_pstate_change_strategy_force_mall_svp = 4,
+ dml2_uclk_pstate_change_strategy_force_mall_full_frame = 5,
+};
+
+enum dml2_svp_mode_override {
+ dml2_svp_mode_override_auto = 0,
+ dml2_svp_mode_override_main_pipe = 1,
+ dml2_svp_mode_override_phantom_pipe = 2, //does not need to be defined explicitly, main overrides result in implicit phantom additions
+ dml2_svp_mode_override_phantom_pipe_no_data_return = 3,
+ dml2_svp_mode_override_imall = 4
+};
+
+enum dml2_refresh_from_mall_mode_override {
+ dml2_refresh_from_mall_mode_override_auto = 0,
+ dml2_refresh_from_mall_mode_override_force_disable = 1,
+ dml2_refresh_from_mall_mode_override_force_enable = 2
+};
+
+enum dml2_odm_mode {
+ dml2_odm_mode_auto = 0,
+ dml2_odm_mode_bypass,
+ dml2_odm_mode_combine_2to1,
+ dml2_odm_mode_combine_3to1,
+ dml2_odm_mode_combine_4to1,
+ dml2_odm_mode_split_1to2,
+ dml2_odm_mode_mso_1to2,
+ dml2_odm_mode_mso_1to4
+};
+
+enum dml2_scaling_transform {
+ dml2_scaling_transform_explicit = 0,
+ dml2_scaling_transform_fullscreen,
+ dml2_scaling_transform_aspect_ratio,
+ dml2_scaling_transform_centered
+};
+
+enum dml2_dsc_enable_option {
+ dml2_dsc_disable = 0,
+ dml2_dsc_enable = 1,
+ dml2_dsc_enable_if_necessary = 2
+};
+
+enum dml2_tdlut_addressing_mode {
+ dml2_tdlut_sw_linear = 0,
+ dml2_tdlut_simple_linear = 1
+};
+
+enum dml2_tdlut_width_mode {
+ dml2_tdlut_width_17_cube = 0,
+ dml2_tdlut_width_33_cube = 1
+};
+
+enum dml2_twait_budgeting_setting {
+ dml2_twait_budgeting_setting_ignore = 0,// Ignore this budget in twait
+
+ dml2_twait_budgeting_setting_if_needed, // Budget for it only if needed
+ //(i.e. UCLK/FCLK DPM cannot be supported in active)
+
+ dml2_twait_budgeting_setting_try, // Budget for it as long as there is an SoC state that
+ // can support it
+};
+
+struct dml2_get_cursor_dlg_reg{
+ unsigned int cursor_x_position;
+ unsigned int cursor_hotspot_x;
+ unsigned int cursor_primary_offset;
+ unsigned int cursor_secondary_offset;
+ bool cursor_stereo_en;
+ bool cursor_2x_magnify;
+ double hratio;
+ double pixel_rate_mhz;
+ double dlg_refclk_mhz;
+};
+
+/// @brief Surface Parameters
+struct dml2_surface_cfg {
+ enum dml2_swizzle_mode tiling;
+
+ struct {
+ unsigned long pitch; // In elements, two pixels per element in 422 packed format
+ unsigned long width;
+ unsigned long height;
+ } plane0;
+
+
+ struct {
+ unsigned long pitch;
+ unsigned long width;
+ unsigned long height;
+ } plane1;
+
+ struct {
+ bool enable;
+ struct {
+ unsigned long pitch;
+ } plane0;
+ struct {
+ unsigned long pitch;
+ } plane1;
+
+ struct {
+ double dcc_rate_plane0;
+ double dcc_rate_plane1;
+ double fraction_of_zero_size_request_plane0;
+ double fraction_of_zero_size_request_plane1;
+ } informative;
+ } dcc;
+};
+
+
+struct dml2_composition_cfg {
+ enum dml2_rotation_angle rotation_angle;
+ bool mirrored;
+ enum dml2_scaling_transform scaling_transform;
+ bool rect_out_height_spans_vactive;
+
+ struct {
+ bool stationary;
+ struct {
+ unsigned long width;
+ unsigned long height;
+ unsigned long x_start;
+ unsigned long y_start;
+ } plane0;
+
+ struct {
+ unsigned long width;
+ unsigned long height;
+ unsigned long x_start;
+ unsigned long y_start;
+ } plane1;
+ } viewport;
+
+ struct {
+ bool enabled;
+ bool easf_enabled;
+ bool isharp_enabled;
+ bool upsp_enabled;
+ enum dml2_sample_positioning upsp_sample_positioning;
+ unsigned int upsp_vtaps;
+ struct {
+ double h_ratio;
+ double v_ratio;
+ unsigned int h_taps;
+ unsigned int v_taps;
+ } plane0;
+
+ struct {
+ double h_ratio;
+ double v_ratio;
+ unsigned int h_taps;
+ unsigned int v_taps;
+ } plane1;
+
+ unsigned long rect_out_width;
+ } scaler_info;
+};
+
+struct dml2_timing_cfg {
+ unsigned long h_total;
+ unsigned long v_total;
+ unsigned long h_blank_end;
+ unsigned long v_blank_end;
+ unsigned long h_front_porch;
+ unsigned long v_front_porch;
+ unsigned long h_sync_width;
+ unsigned long pixel_clock_khz;
+ unsigned long h_active;
+ unsigned long v_active;
+ unsigned int bpc; //FIXME: review with Jun
+ struct {
+ enum dml2_dsc_enable_option enable;
+ unsigned int dsc_compressed_bpp_x16;
+ struct {
+ // for dv to specify num dsc slices to use
+ unsigned int num_slices;
+ } overrides;
+ } dsc;
+ bool interlaced;
+ struct {
+ /* static */
+ bool enabled;
+ unsigned long min_refresh_uhz;
+ unsigned int max_instant_vtotal_delta;
+ /* dynamic */
+ bool disallowed;
+ bool drr_active_variable;
+ bool drr_active_fixed;
+ } drr_config;
+ unsigned long vblank_nom;
+};
+
+struct dml2_link_output_cfg {
+ enum dml2_output_format_class output_format;
+ enum dml2_output_encoder_class output_encoder;
+ unsigned int output_dp_lane_count;
+ enum dml2_output_link_dp_rate output_dp_link_rate;
+ unsigned long audio_sample_rate;
+ unsigned long audio_sample_layout;
+ bool output_disabled; // The stream does not go to a backend for output to a physical
+ //connector (e.g. writeback only, phantom pipe) goes to writeback
+ bool validate_output; // Do not validate the link configuration for this display stream.
+};
+
+struct dml2_writeback_info {
+ enum dml2_source_format_class pixel_format;
+ unsigned long input_width;
+ unsigned long input_height;
+ unsigned long output_width;
+ unsigned long output_height;
+ unsigned long v_taps;
+ unsigned long h_taps;
+ unsigned long v_taps_chroma;
+ unsigned long h_taps_chroma;
+ double h_ratio;
+ double v_ratio;
+};
+
+struct dml2_writeback_cfg {
+ unsigned int active_writebacks_per_stream;
+ struct dml2_writeback_info writeback_stream[DML2_MAX_WRITEBACK];
+};
+
+struct dml2_plane_parameters {
+ unsigned int stream_index; // Identifies which plane will be composed
+
+ enum dml2_source_format_class pixel_format;
+ /*
+ * The surface and composition structures use
+ * the terms plane0 and plane1. These planes
+ * are expected to hold the following data based
+ * on the pixel format.
+ *
+ * RGB or YUV Non-Planar Types:
+ * dml2_444_8
+ * dml2_444_16
+ * dml2_444_32
+ * dml2_444_64
+ * dml2_rgbe
+ *
+ * plane0 = argb or rgbe
+ * plane1 = not used
+ *
+ * YUV Planar-Types:
+ * dml2_420_8
+ * dml2_420_10
+ * dml2_420_12
+ *
+ * plane0 = luma
+ * plane1 = chroma
+ *
+ * RGB Planar Types:
+ * dml2_rgbe_alpha
+ *
+ * plane0 = rgbe
+ * plane1 = alpha
+ *
+ * Mono Non-Planar Types:
+ * dml2_mono_8
+ * dml2_mono_16
+ *
+ * plane0 = luma
+ * plane1 = not used
+ */
+
+ struct dml2_surface_cfg surface;
+ struct dml2_composition_cfg composition;
+
+ struct {
+ bool enable;
+ unsigned long lines_before_active_required;
+ unsigned long transmitted_bytes;
+ } dynamic_meta_data;
+
+ struct {
+ unsigned int num_cursors;
+ unsigned long cursor_width;
+ unsigned long cursor_bpp;
+ } cursor;
+
+ // For TDLUT, SW would assume TDLUT is setup and enable all the time and
+ // budget for worst case addressing/width mode
+ struct {
+ bool setup_for_tdlut;
+ enum dml2_tdlut_addressing_mode tdlut_addressing_mode;
+ enum dml2_tdlut_width_mode tdlut_width_mode;
+ bool tdlut_mpc_width_flag;
+ } tdlut;
+
+ bool immediate_flip;
+
+ struct {
+ // Logical overrides to power management policies (usually)
+ enum dml2_uclk_pstate_change_strategy uclk_pstate_change_strategy;
+ enum dml2_refresh_from_mall_mode_override refresh_from_mall;
+ unsigned int det_size_override_kb;
+ unsigned int mpcc_combine_factor;
+
+ // reserved_vblank_time_ns is the minimum time to reserve in vblank for Twait
+ // The actual reserved vblank time used for the corresponding stream in mode_programming would be at least as much as this per-plane override.
+ long reserved_vblank_time_ns;
+ unsigned int max_vactive_det_fill_delay_us[dml2_pstate_type_count]; // 0 = no reserved time, +ve = explicit max delay
+ unsigned int gpuvm_min_page_size_kbytes;
+ unsigned int hostvm_min_page_size_kbytes;
+
+ enum dml2_svp_mode_override legacy_svp_config; //TODO remove in favor of svp_config
+
+ struct {
+ // HW specific overrides, there's almost no reason to mess with these
+ // generally used for debugging or simulation
+ bool force_one_row_for_frame;
+ struct {
+ bool enable;
+ bool value;
+ } force_pte_buffer_mode;
+ double dppclk_mhz;
+ } hw;
+ } overrides;
+};
+
+struct dml2_stream_parameters {
+ struct dml2_timing_cfg timing;
+ struct dml2_link_output_cfg output;
+ struct dml2_writeback_cfg writeback;
+
+ struct {
+ enum dml2_odm_mode odm_mode;
+ bool disable_dynamic_odm;
+ bool disable_subvp;
+ int minimum_vblank_idle_requirement_us;
+
+ struct {
+ struct {
+ enum dml2_twait_budgeting_setting uclk_pstate;
+ enum dml2_twait_budgeting_setting fclk_pstate;
+ enum dml2_twait_budgeting_setting stutter_enter_exit;
+ } twait_budgeting;
+ } hw;
+ } overrides;
+};
+
+struct dml2_display_cfg {
+ bool gpuvm_enable;
+ bool ffbm_enable;
+ bool hostvm_enable;
+
+ // Allocate DET proportionally between streams based on pixel rate
+ // and then allocate proportionally between planes.
+ bool minimize_det_reallocation;
+
+ unsigned int gpuvm_max_page_table_levels;
+ unsigned int hostvm_max_non_cached_page_table_levels;
+
+ struct dml2_plane_parameters plane_descriptors[DML2_MAX_PLANES];
+ struct dml2_stream_parameters stream_descriptors[DML2_MAX_PLANES];
+
+ unsigned int num_planes;
+ unsigned int num_streams;
+
+ struct {
+ struct {
+ // HW specific overrides, there's almost no reason to mess with these
+ // generally used for debugging or simulation
+ struct {
+ bool enable;
+ bool value;
+ } force_unbounded_requesting;
+
+ struct {
+ bool enable;
+ bool value;
+ } force_nom_det_size_kbytes;
+
+ bool mode_support_check_disable;
+ bool mcache_admissibility_check_disable;
+ bool surface_viewport_size_check_disable;
+ double dlg_ref_clk_mhz;
+ double dispclk_mhz;
+ double dcfclk_mhz;
+ bool optimize_tdlut_scheduling; // TBD: for DV, will set this to 1, to ensure tdlut schedule is calculated based on address/width mode
+ } hw;
+
+ struct {
+ bool uclk_pstate_change_disable;
+ bool fclk_pstate_change_disable;
+ bool g6_temp_read_pstate_disable;
+ bool g7_ppt_pstate_disable;
+ } power_management;
+
+ bool enhanced_prefetch_schedule_acceleration;
+ bool dcc_programming_assumes_scan_direction_unknown;
+ bool synchronize_timings;
+ bool synchronize_ddr_displays_for_uclk_pstate_change;
+ bool max_outstanding_when_urgent_expected_disable;
+ bool enable_subvp_implicit_pmo; //enables PMO to switch pipe uclk strategy to subvp, and generate phantom programming
+ bool all_streams_blanked;
+ } overrides;
+};
+
+struct dml2_pipe_configuration_descriptor {
+ struct {
+ unsigned int viewport_x_start;
+ unsigned int viewport_width;
+ } plane0;
+
+ struct {
+ unsigned int viewport_x_start;
+ unsigned int viewport_width;
+ } plane1;
+
+ bool plane1_enabled;
+ bool imall_enabled;
+};
+
+struct dml2_plane_mcache_configuration_descriptor {
+ const struct dml2_plane_parameters *plane_descriptor;
+ const struct dml2_mcache_surface_allocation *mcache_allocation;
+
+ struct dml2_pipe_configuration_descriptor pipe_configurations[DML2_MAX_DCN_PIPES];
+ char num_pipes;
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_policy_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_policy_types.h
new file mode 100644
index 000000000000..8f624a912e78
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_policy_types.h
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML_TOP_POLICY_TYPES_H__
+#define __DML_TOP_POLICY_TYPES_H__
+
+struct dml2_policy_parameters {
+ unsigned long odm_combine_dispclk_threshold_khz;
+ unsigned int max_immediate_flip_latency;
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_soc_parameter_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_soc_parameter_types.h
new file mode 100644
index 000000000000..1fbc520c2540
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_soc_parameter_types.h
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML_TOP_SOC_PARAMETER_TYPES_H__
+#define __DML_TOP_SOC_PARAMETER_TYPES_H__
+
+#include "dml2_external_lib_deps.h"
+
+#define DML_MAX_CLK_TABLE_SIZE 20
+
+struct dml2_soc_derate_values {
+ unsigned int dram_derate_percent_pixel;
+ unsigned int dram_derate_percent_vm;
+ unsigned int dram_derate_percent_pixel_and_vm;
+
+ unsigned int fclk_derate_percent;
+ unsigned int dcfclk_derate_percent;
+};
+
+struct dml2_soc_derates {
+ struct dml2_soc_derate_values system_active_urgent;
+ struct dml2_soc_derate_values system_active_average;
+ struct dml2_soc_derate_values dcn_mall_prefetch_urgent;
+ struct dml2_soc_derate_values dcn_mall_prefetch_average;
+ struct dml2_soc_derate_values system_idle_average;
+};
+
+struct dml2_dcn32x_soc_qos_params {
+ struct {
+ unsigned int base_latency_us;
+ unsigned int base_latency_pixel_vm_us;
+ unsigned int base_latency_vm_us;
+ unsigned int scaling_factor_fclk_us;
+ unsigned int scaling_factor_mhz;
+ } urgent_latency_us;
+
+ unsigned int loaded_round_trip_latency_fclk_cycles;
+ unsigned int urgent_out_of_order_return_per_channel_pixel_only_bytes;
+ unsigned int urgent_out_of_order_return_per_channel_pixel_and_vm_bytes;
+ unsigned int urgent_out_of_order_return_per_channel_vm_only_bytes;
+};
+
+struct dml2_dcn4_uclk_dpm_dependent_qos_params {
+ unsigned long minimum_uclk_khz;
+ unsigned int urgent_ramp_uclk_cycles;
+ unsigned int trip_to_memory_uclk_cycles;
+ unsigned int meta_trip_to_memory_uclk_cycles;
+ unsigned int maximum_latency_when_urgent_uclk_cycles;
+ unsigned int average_latency_when_urgent_uclk_cycles;
+ unsigned int maximum_latency_when_non_urgent_uclk_cycles;
+ unsigned int average_latency_when_non_urgent_uclk_cycles;
+};
+
+struct dml2_dcn4x_soc_qos_params {
+ unsigned int df_qos_response_time_fclk_cycles;
+ unsigned int max_round_trip_to_furthest_cs_fclk_cycles;
+ unsigned int mall_overhead_fclk_cycles;
+ unsigned int meta_trip_adder_fclk_cycles;
+ unsigned int average_transport_distance_fclk_cycles;
+ double umc_urgent_ramp_latency_margin;
+ double umc_max_latency_margin;
+ double umc_average_latency_margin;
+ double fabric_max_transport_latency_margin;
+ double fabric_average_transport_latency_margin;
+ struct dml2_dcn4_uclk_dpm_dependent_qos_params per_uclk_dpm_params[DML_MAX_CLK_TABLE_SIZE];
+};
+
+enum dml2_qos_param_type {
+ dml2_qos_param_type_dcn3,
+ dml2_qos_param_type_dcn4x
+};
+
+struct dml2_soc_qos_parameters {
+ struct dml2_soc_derates derate_table;
+ struct {
+ unsigned int base_latency_us;
+ unsigned int scaling_factor_us;
+ unsigned int scaling_factor_mhz;
+ } writeback;
+
+ union {
+ struct dml2_dcn32x_soc_qos_params dcn32x;
+ struct dml2_dcn4x_soc_qos_params dcn4x;
+ } qos_params;
+
+ enum dml2_qos_param_type qos_type;
+};
+
+struct dml2_soc_power_management_parameters {
+ double dram_clk_change_blackout_us;
+ double dram_clk_change_read_only_us; // deprecated
+ double dram_clk_change_write_only_us; // deprecated
+ double fclk_change_blackout_us;
+ double g7_ppt_blackout_us;
+ double g7_temperature_read_blackout_us;
+ double stutter_enter_plus_exit_latency_us;
+ double stutter_exit_latency_us;
+ double low_power_stutter_enter_plus_exit_latency_us;
+ double low_power_stutter_exit_latency_us;
+ double z8_stutter_enter_plus_exit_latency_us;
+ double z8_stutter_exit_latency_us;
+ double z8_min_idle_time;
+ double g6_temp_read_blackout_us[DML_MAX_CLK_TABLE_SIZE];
+ double type_b_dram_clk_change_blackout_us;
+ double type_b_ppt_blackout_us;
+};
+
+struct dml2_clk_table {
+ unsigned long clk_values_khz[DML_MAX_CLK_TABLE_SIZE];
+ unsigned char num_clk_values;
+};
+
+struct dml2_dram_params {
+ unsigned int channel_width_bytes;
+ unsigned int channel_count;
+ unsigned int transactions_per_clock;
+};
+
+struct dml2_soc_state_table {
+ struct dml2_clk_table uclk;
+ struct dml2_clk_table fclk;
+ struct dml2_clk_table dcfclk;
+ struct dml2_clk_table dispclk;
+ struct dml2_clk_table dppclk;
+ struct dml2_clk_table dtbclk;
+ struct dml2_clk_table phyclk;
+ struct dml2_clk_table socclk;
+ struct dml2_clk_table dscclk;
+ struct dml2_clk_table phyclk_d18;
+ struct dml2_clk_table phyclk_d32;
+
+ struct dml2_dram_params dram_config;
+};
+
+struct dml2_soc_vmin_clock_limits {
+ unsigned long dispclk_khz;
+ unsigned long dcfclk_khz;
+};
+
+struct dml2_soc_bb {
+ struct dml2_soc_state_table clk_table;
+ struct dml2_soc_qos_parameters qos_parameters;
+ struct dml2_soc_power_management_parameters power_management_parameters;
+ struct dml2_soc_vmin_clock_limits vmin_limit;
+
+ double lower_bound_bandwidth_dchub;
+ double fraction_of_urgent_bandwidth_nominal_target;
+ double fraction_of_urgent_bandwidth_flip_target;
+ unsigned int dprefclk_mhz;
+ unsigned int xtalclk_mhz;
+ unsigned int pcie_refclk_mhz;
+ unsigned int dchub_refclk_mhz;
+ unsigned int mall_allocated_for_dcn_mbytes;
+ unsigned int max_outstanding_reqs;
+ unsigned long fabric_datapath_to_dcn_data_return_bytes;
+ unsigned long return_bus_width_bytes;
+ unsigned long hostvm_min_page_size_kbytes;
+ unsigned long gpuvm_min_page_size_kbytes;
+ double phy_downspread_percent;
+ double dcn_downspread_percent;
+ double dispclk_dppclk_vco_speed_mhz;
+ bool no_dfs;
+ bool do_urgent_latency_adjustment;
+ unsigned int mem_word_bytes;
+ unsigned int num_dcc_mcaches;
+ unsigned int mcache_size_bytes;
+ unsigned int mcache_line_size_bytes;
+ unsigned long max_fclk_for_uclk_dpm_khz;
+};
+
+struct dml2_ip_capabilities {
+ unsigned int pipe_count;
+ unsigned int otg_count;
+ unsigned int TDLUT_33cube_count;
+ unsigned int num_dsc;
+ unsigned int max_num_dp2p0_streams;
+ unsigned int max_num_hdmi_frl_outputs;
+ unsigned int max_num_dp2p0_outputs;
+ unsigned int max_num_wb;
+ unsigned int rob_buffer_size_kbytes;
+ unsigned int config_return_buffer_size_in_kbytes;
+ unsigned int config_return_buffer_segment_size_in_kbytes;
+ unsigned int meta_fifo_size_in_kentries;
+ unsigned int compressed_buffer_segment_size_in_kbytes;
+ unsigned int cursor_buffer_size;
+ unsigned int max_flip_time_us;
+ unsigned int max_flip_time_lines;
+ unsigned int hostvm_mode;
+ unsigned int subvp_drr_scheduling_margin_us;
+ unsigned int subvp_prefetch_end_to_mall_start_us;
+ unsigned int subvp_fw_processing_delay;
+ unsigned int max_vactive_det_fill_delay_us;
+ unsigned int ppt_max_allow_delay_us;
+ unsigned int temp_read_max_allow_delay_us;
+ unsigned int dummy_pstate_max_allow_delay_us;
+ /* FAMS2 delays */
+ struct {
+ unsigned int max_allow_delay_us;
+ unsigned int scheduling_delay_us;
+ unsigned int vertical_interrupt_ack_delay_us; // delay to acknowledge vline int
+ unsigned int allow_programming_delay_us; // time requires to program allow
+ unsigned int min_allow_width_us;
+ unsigned int subvp_df_throttle_delay_us;
+ unsigned int subvp_programming_delay_us;
+ unsigned int subvp_prefetch_to_mall_delay_us;
+ unsigned int drr_programming_delay_us;
+
+ unsigned int lock_timeout_us;
+ unsigned int recovery_timeout_us;
+ unsigned int flip_programming_delay_us;
+ } fams2;
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_types.h
new file mode 100644
index 000000000000..452e4a2e72c0
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_types.h
@@ -0,0 +1,744 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML_TOP_TYPES_H__
+#define __DML_TOP_TYPES_H__
+
+#include "dml_top_display_cfg_types.h"
+#include "dml_top_soc_parameter_types.h"
+#include "dml_top_policy_types.h"
+#include "dml_top_dchub_registers.h"
+
+#include "dmub_cmd.h"
+
+struct dml2_instance;
+
+enum dml2_project_id {
+ dml2_project_invalid = 0,
+ dml2_project_dcn4x_stage1,
+ dml2_project_dcn4x_stage2,
+ dml2_project_dcn4x_stage2_auto_drr_svp,
+};
+
+enum dml2_pstate_change_support {
+ dml2_pstate_change_vactive = 0,
+ dml2_pstate_change_vblank = 1,
+ dml2_pstate_change_vblank_and_vactive = 2,
+ dml2_pstate_change_drr = 3,
+ dml2_pstate_change_mall_svp = 4,
+ dml2_pstate_change_mall_full_frame = 6,
+ dml2_pstate_change_unsupported = 7
+};
+
+enum dml2_output_type_and_rate__type {
+ dml2_output_type_unknown = 0,
+ dml2_output_type_dp = 1,
+ dml2_output_type_edp = 2,
+ dml2_output_type_dp2p0 = 3,
+ dml2_output_type_hdmi = 4,
+ dml2_output_type_hdmifrl = 5
+};
+
+enum dml2_output_type_and_rate__rate {
+ dml2_output_rate_unknown = 0,
+ dml2_output_rate_dp_rate_hbr = 1,
+ dml2_output_rate_dp_rate_hbr2 = 2,
+ dml2_output_rate_dp_rate_hbr3 = 3,
+ dml2_output_rate_dp_rate_uhbr10 = 4,
+ dml2_output_rate_dp_rate_uhbr13p5 = 5,
+ dml2_output_rate_dp_rate_uhbr20 = 6,
+ dml2_output_rate_hdmi_rate_3x3 = 7,
+ dml2_output_rate_hdmi_rate_6x3 = 8,
+ dml2_output_rate_hdmi_rate_6x4 = 9,
+ dml2_output_rate_hdmi_rate_8x4 = 10,
+ dml2_output_rate_hdmi_rate_10x4 = 11,
+ dml2_output_rate_hdmi_rate_12x4 = 12,
+ dml2_output_rate_hdmi_rate_16x4 = 13,
+ dml2_output_rate_hdmi_rate_20x4 = 14
+};
+
+struct dml2_pmo_options {
+ bool disable_vblank;
+ bool disable_svp;
+ bool disable_drr_var;
+ bool disable_drr_clamped;
+ bool disable_drr_var_when_var_active;
+ bool disable_drr_clamped_when_var_active;
+ bool disable_fams2;
+ bool disable_vactive_det_fill_bw_pad; /* dml2_project_dcn4x_stage2_auto_drr_svp and above only */
+ bool disable_dyn_odm;
+ bool disable_dyn_odm_for_multi_stream;
+ bool disable_dyn_odm_for_stream_with_svp;
+ struct dml2_pmo_pstate_strategy *override_strategy_lists[DML2_MAX_PLANES];
+ unsigned int num_override_strategies_per_list[DML2_MAX_PLANES];
+};
+
+struct dml2_options {
+ enum dml2_project_id project_id;
+ struct dml2_pmo_options pmo_options;
+};
+
+struct dml2_initialize_instance_in_out {
+ struct dml2_instance *dml2_instance;
+ struct dml2_options options;
+ struct dml2_soc_bb soc_bb;
+ struct dml2_ip_capabilities ip_caps;
+
+ struct {
+ void *explicit_ip_bb;
+ unsigned int explicit_ip_bb_size;
+ } overrides;
+};
+
+struct dml2_reset_instance_in_out {
+ struct dml2_instance *dml2_instance;
+};
+
+struct dml2_check_mode_supported_in_out {
+ /*
+ * Inputs
+ */
+ struct dml2_instance *dml2_instance;
+ const struct dml2_display_cfg *display_config;
+
+ /*
+ * Outputs
+ */
+ bool is_supported;
+};
+
+struct dml2_mcache_surface_allocation {
+ bool valid;
+ /*
+ * For iMALL, dedicated mall mcaches are required (sharing of last
+ * slice possible), for legacy phantom or phantom without return
+ * the only mall mcaches need to be valid.
+ */
+ bool requires_dedicated_mall_mcache;
+
+ unsigned int num_mcaches_plane0;
+ unsigned int num_mcaches_plane1;
+ /*
+ * A plane is divided into vertical slices of mcaches,
+ * which wrap on the surface width.
+ *
+ * For example, if the surface width is 7680, and split into
+ * three slices of equal width, the boundary array would contain
+ * [2560, 5120, 7680]
+ *
+ * The assignments are
+ * 0 = [0 .. 2559]
+ * 1 = [2560 .. 5119]
+ * 2 = [5120 .. 7679]
+ * 0 = [7680 .. INF]
+ * The final element implicitly is the same as the first, and
+ * at first seems invalid since it is never referenced (since)
+ * it is outside the surface. However, its useful when shifting
+ * (see below).
+ *
+ * For any given valid mcache assignment, a shifted version, wrapped
+ * on the surface width boundary is also assumed to be valid.
+ *
+ * For example, shifting [2560, 5120, 7680] by -50 results in
+ * [2510, 5170, 7630].
+ *
+ * The assignments are now:
+ * 0 = [0 .. 2509]
+ * 1 = [2510 .. 5169]
+ * 2 = [5170 .. 7629]
+ * 0 = [7630 .. INF]
+ */
+ int mcache_x_offsets_plane0[DML2_MAX_MCACHES + 1];
+ int mcache_x_offsets_plane1[DML2_MAX_MCACHES + 1];
+
+ /*
+ * Shift grainularity is not necessarily 1
+ */
+ struct {
+ int p0;
+ int p1;
+ } shift_granularity;
+
+ /*
+ * MCacheIDs have global scope in the SoC, and they are stored here.
+ * These IDs are generally not valid until all planes in a display
+ * configuration have had their mcache requirements calculated.
+ */
+ int global_mcache_ids_plane0[DML2_MAX_MCACHES + 1];
+ int global_mcache_ids_plane1[DML2_MAX_MCACHES + 1];
+ int global_mcache_ids_mall_plane0[DML2_MAX_MCACHES + 1];
+ int global_mcache_ids_mall_plane1[DML2_MAX_MCACHES + 1];
+
+ /*
+ * Generally, plane0/1 slices must use a disjoint set of caches
+ * but in some cases the final segement of the two planes can
+ * use the same cache. If plane0_plane1 is set, then this is
+ * allowed.
+ *
+ * Similarly, the caches allocated to MALL prefetcher are generally
+ * disjoint, but if mall_prefetch is set, then the final segment
+ * between the main and the mall pixel requestor can use the same
+ * cache.
+ *
+ * Note that both bits may be set at the same time.
+ */
+ struct {
+ bool mall_comb_mcache_p0;
+ bool mall_comb_mcache_p1;
+ bool plane0_plane1;
+ } last_slice_sharing;
+
+ struct {
+ int meta_row_bytes_plane0;
+ int meta_row_bytes_plane1;
+ } informative;
+};
+
+enum dml2_pstate_method {
+ dml2_pstate_method_na = 0,
+ /* hw exclusive modes */
+ dml2_pstate_method_vactive = 1,
+ dml2_pstate_method_vblank = 2,
+ dml2_pstate_method_reserved_hw = 5,
+ /* fw assisted exclusive modes */
+ dml2_pstate_method_fw_svp = 6,
+ dml2_pstate_method_reserved_fw = 10,
+ /* fw assisted modes requiring drr modulation */
+ dml2_pstate_method_fw_vactive_drr = 11,
+ dml2_pstate_method_fw_vblank_drr = 12,
+ dml2_pstate_method_fw_svp_drr = 13,
+ dml2_pstate_method_reserved_fw_drr_clamped = 20,
+ dml2_pstate_method_fw_drr = 21,
+ dml2_pstate_method_reserved_fw_drr_var = 22,
+ dml2_pstate_method_count
+};
+
+struct dml2_per_plane_programming {
+ const struct dml2_plane_parameters *plane_descriptor;
+
+ union {
+ struct {
+ unsigned long dppclk_khz;
+ } dcn4x;
+ } min_clocks;
+
+ struct dml2_mcache_surface_allocation mcache_allocation;
+
+ // If a stream is using automatic or forced odm combine
+ // and the stream for this plane has num_odms_required > 1
+ // num_dpps_required is always equal to num_odms_required for
+ // ALL planes of the stream
+
+ // If a stream is using odm split, then this value is always 1
+ unsigned int num_dpps_required;
+
+ enum dml2_pstate_method uclk_pstate_support_method;
+
+ // MALL size requirements for MALL SS and SubVP
+ unsigned int surface_size_mall_bytes;
+ unsigned int svp_size_mall_bytes;
+
+ struct dml2_dchub_per_pipe_register_set *pipe_regs[DML2_MAX_PLANES];
+
+ struct {
+ bool valid;
+ struct dml2_plane_parameters descriptor;
+ struct dml2_mcache_surface_allocation mcache_allocation;
+ struct dml2_dchub_per_pipe_register_set *pipe_regs[DML2_MAX_PLANES];
+ } phantom_plane;
+};
+
+union dml2_global_sync_programming {
+ struct {
+ unsigned int vstartup_lines;
+ unsigned int vupdate_offset_pixels;
+ unsigned int vupdate_vupdate_width_pixels;
+ unsigned int vready_offset_pixels;
+ unsigned int pstate_keepout_start_lines;
+ } dcn4x;
+};
+
+struct dml2_per_stream_programming {
+ const struct dml2_stream_parameters *stream_descriptor;
+
+ union {
+ struct {
+ unsigned long dscclk_khz;
+ unsigned long dtbclk_khz;
+ unsigned long phyclk_khz;
+ } dcn4x;
+ } min_clocks;
+
+ union dml2_global_sync_programming global_sync;
+
+ unsigned int num_odms_required;
+
+ enum dml2_pstate_method uclk_pstate_method;
+
+ struct {
+ bool enabled;
+ struct dml2_stream_parameters descriptor;
+ union dml2_global_sync_programming global_sync;
+ } phantom_stream;
+
+ union dmub_cmd_fams2_config fams2_base_params;
+ union {
+ union dmub_cmd_fams2_config fams2_sub_params;
+ union dmub_fams2_stream_static_sub_state_v2 fams2_sub_params_v2;
+ };
+};
+
+//-----------------
+// Mode Support Information
+//-----------------
+
+struct dml2_mode_support_info {
+ bool ModeIsSupported; //<brief Is the mode support any voltage and combine setting
+ bool ImmediateFlipSupport; //<brief Means mode support immediate flip at the max combine setting; determine in mode support and used in mode programming
+ // Mode Support Reason
+ bool WritebackLatencySupport;
+ bool ScaleRatioAndTapsSupport;
+ bool SourceFormatPixelAndScanSupport;
+ bool P2IWith420;
+ bool DSCOnlyIfNecessaryWithBPP;
+ bool DSC422NativeNotSupported;
+ bool LinkRateDoesNotMatchDPVersion;
+ bool LinkRateForMultistreamNotIndicated;
+ bool BPPForMultistreamNotIndicated;
+ bool MultistreamWithHDMIOreDP;
+ bool MSOOrODMSplitWithNonDPLink;
+ bool NotEnoughLanesForMSO;
+ bool NumberOfOTGSupport;
+ bool NumberOfHDMIFRLSupport;
+ bool NumberOfDP2p0Support;
+ bool NumberOfTDLUT33cubeSupport;
+ bool WritebackScaleRatioAndTapsSupport;
+ bool CursorSupport;
+ bool PitchSupport;
+ bool ViewportExceedsSurface;
+ bool ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified;
+ bool ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe;
+ bool InvalidCombinationOfMALLUseForPStateAndStaticScreen;
+ bool InvalidCombinationOfMALLUseForPState;
+ bool ExceededMALLSize;
+ bool EnoughWritebackUnits;
+ bool ExceededMultistreamSlots;
+ bool NotEnoughDSCUnits;
+ bool NotEnoughDSCSlices;
+ bool PixelsPerLinePerDSCUnitSupport;
+ bool DSCCLKRequiredMoreThanSupported;
+ bool DTBCLKRequiredMoreThanSupported;
+ bool LinkCapacitySupport;
+ bool ROBSupport;
+ bool OutstandingRequestsSupport;
+ bool OutstandingRequestsUrgencyAvoidance;
+ bool PTEBufferSizeNotExceeded;
+ bool DCCMetaBufferSizeNotExceeded;
+ bool TotalVerticalActiveBandwidthSupport;
+ bool VActiveBandwidthSupport;
+ enum dml2_pstate_change_support FCLKChangeSupport[DML2_MAX_PLANES];
+ bool USRRetrainingSupport;
+ bool PrefetchSupported;
+ bool DynamicMetadataSupported;
+ bool VRatioInPrefetchSupported;
+ bool DISPCLK_DPPCLK_Support;
+ bool TotalAvailablePipesSupport;
+ bool ViewportSizeSupport;
+ bool ImmediateFlipSupportedForState;
+ double MaxTotalVerticalActiveAvailableBandwidth;
+ bool MPCCombineEnable[DML2_MAX_PLANES]; /// <brief Indicate if the MPC Combine enable in the given state and optimize mpc combine setting
+ enum dml2_odm_mode ODMMode[DML2_MAX_PLANES]; /// <brief ODM mode that is chosen in the mode check stage and will be used in mode programming stage
+ unsigned int DPPPerSurface[DML2_MAX_PLANES]; /// <brief How many DPPs are needed drive the surface to output. If MPCC or ODMC could be 2 or 4.
+ bool DSCEnabled[DML2_MAX_PLANES]; /// <brief Indicate if the DSC is actually required; used in mode_programming
+ bool FECEnabled[DML2_MAX_PLANES]; /// <brief Indicate if the FEC is actually required
+ unsigned int NumberOfDSCSlices[DML2_MAX_PLANES]; /// <brief Indicate how many slices needed to support the given mode
+ double OutputBpp[DML2_MAX_PLANES];
+ enum dml2_output_type_and_rate__type OutputType[DML2_MAX_PLANES];
+ enum dml2_output_type_and_rate__rate OutputRate[DML2_MAX_PLANES];
+ unsigned int AlignedYPitch[DML2_MAX_PLANES];
+ unsigned int AlignedCPitch[DML2_MAX_PLANES];
+ bool g6_temp_read_support;
+ bool temp_read_or_ppt_support;
+ bool qos_bandwidth_support;
+ bool dcfclk_support;
+}; // dml2_mode_support_info
+
+struct dml2_display_cfg_programming {
+ struct dml2_display_cfg display_config;
+
+ union {
+ struct {
+ unsigned long dcfclk_khz;
+ unsigned long fclk_khz;
+ unsigned long uclk_khz;
+ unsigned long socclk_khz;
+ unsigned long dispclk_khz;
+ unsigned long dcfclk_deepsleep_khz;
+ unsigned long dpp_ref_khz;
+ } dcn32x;
+ struct {
+ struct {
+ unsigned long uclk_khz;
+ unsigned long fclk_khz;
+ unsigned long dcfclk_khz;
+ } active;
+ struct {
+ unsigned long uclk_khz;
+ unsigned long fclk_khz;
+ unsigned long dcfclk_khz;
+ } idle;
+ struct {
+ unsigned long uclk_khz;
+ unsigned long fclk_khz;
+ unsigned long dcfclk_khz;
+ } svp_prefetch;
+ struct {
+ unsigned long uclk_khz;
+ unsigned long fclk_khz;
+ unsigned long dcfclk_khz;
+ } svp_prefetch_no_throttle;
+
+ unsigned long deepsleep_dcfclk_khz;
+ unsigned long dispclk_khz;
+ unsigned long dpprefclk_khz;
+ unsigned long dtbrefclk_khz;
+ unsigned long socclk_khz;
+
+ struct {
+ uint32_t dispclk_did;
+ uint32_t dpprefclk_did;
+ uint32_t dtbrefclk_did;
+ } divider_ids;
+ } dcn4x;
+ } min_clocks;
+
+ bool uclk_pstate_supported;
+ bool fclk_pstate_supported;
+
+ /* indicates this configuration requires FW to support */
+ bool fams2_required;
+ struct dmub_cmd_fams2_global_config fams2_global_config;
+
+ struct {
+ bool supported_in_blank; // Changing to configurations where this is false requires stutter to be disabled during the transition
+ uint8_t base_percent_efficiency; //LP1
+ uint8_t low_power_percent_efficiency; //LP2
+ } stutter;
+
+ struct {
+ bool meets_eco; // Stutter cycles will meet Z8 ECO criteria
+ bool supported_in_blank; // Changing to configurations where this is false requires Z8 to be disabled during the transition
+ } z8_stutter;
+
+ struct dml2_dchub_global_register_set global_regs;
+
+ struct dml2_per_plane_programming plane_programming[DML2_MAX_PLANES];
+ struct dml2_per_stream_programming stream_programming[DML2_MAX_PLANES];
+
+ // Don't access this structure directly, access it through plane_programming.pipe_regs
+ struct dml2_dchub_per_pipe_register_set pipe_regs[DML2_MAX_PLANES];
+
+ struct {
+ struct {
+ double urgent_us;
+ double writeback_urgent_us;
+ double writeback_pstate_us;
+ double writeback_fclk_pstate_us;
+ double cstate_exit_us;
+ double cstate_enter_plus_exit_us;
+ double z8_cstate_exit_us;
+ double z8_cstate_enter_plus_exit_us;
+ double pstate_change_us;
+ double fclk_pstate_change_us;
+ double usr_retraining_us;
+ double temp_read_or_ppt_watermark_us;
+ } watermarks;
+
+ struct {
+ unsigned int swath_width_plane0;
+ unsigned int swath_height_plane0;
+ unsigned int swath_height_plane1;
+ unsigned int dpte_row_height_plane0;
+ unsigned int dpte_row_height_plane1;
+ unsigned int meta_row_height_plane0;
+ unsigned int meta_row_height_plane1;
+ } plane_info[DML2_MAX_PLANES];
+
+ struct {
+ unsigned int total_num_dpps_required;
+ } dpp;
+
+ struct {
+ unsigned long long total_surface_size_in_mall_bytes;
+ unsigned int subviewport_lines_needed_in_mall[DML2_MAX_PLANES];
+ } mall;
+
+ struct {
+ double urgent_latency_us; // urgent ramp latency
+ double max_non_urgent_latency_us;
+ double max_urgent_latency_us;
+ double avg_non_urgent_latency_us;
+ double avg_urgent_latency_us;
+ double wm_memory_trip_us;
+ double meta_trip_memory_us;
+ double fraction_of_urgent_bandwidth; // nom
+ double fraction_of_urgent_bandwidth_immediate_flip;
+ double fraction_of_urgent_bandwidth_mall;
+ double max_active_fclk_change_latency_supported;
+ unsigned int min_return_latency_in_dcfclk;
+
+ struct {
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ double dram_vm_only_bw_mbps;
+ } svp_prefetch;
+
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ double dram_vm_only_bw_mbps;
+ } sys_active;
+ } urg_bw_available;
+
+ struct {
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ } svp_prefetch;
+
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ } sys_active;
+ } avg_bw_available;
+
+ struct {
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ } svp_prefetch;
+
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ } sys_active;
+ } non_urg_bw_required;
+
+ struct {
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ } svp_prefetch;
+
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ } sys_active;
+ } non_urg_bw_required_with_flip;
+
+ struct {
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ } svp_prefetch;
+
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ } sys_active;
+
+ } urg_bw_required;
+
+ struct {
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ } svp_prefetch;
+
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ } sys_active;
+ } urg_bw_required_with_flip;
+
+ struct {
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ } svp_prefetch;
+
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ } sys_active;
+ } avg_bw_required;
+ } qos;
+
+ struct {
+ unsigned long long det_size_in_kbytes[DML2_MAX_PLANES];
+ unsigned long long DETBufferSizeY[DML2_MAX_PLANES];
+ unsigned long long comp_buffer_size_kbytes;
+ bool UnboundedRequestEnabled;
+ unsigned int compbuf_reserved_space_64b;
+ } crb;
+
+ struct {
+ unsigned int max_uncompressed_block_plane0;
+ unsigned int max_compressed_block_plane0;
+ unsigned int independent_block_plane0;
+ unsigned int max_uncompressed_block_plane1;
+ unsigned int max_compressed_block_plane1;
+ unsigned int independent_block_plane1;
+ } dcc_control[DML2_MAX_PLANES];
+
+ struct {
+ double stutter_efficiency;
+ double stutter_efficiency_with_vblank;
+ double stutter_num_bursts;
+
+ struct {
+ double stutter_efficiency;
+ double stutter_efficiency_with_vblank;
+ double stutter_num_bursts;
+ double stutter_period;
+
+ struct {
+ double stutter_efficiency;
+ double stutter_num_bursts;
+ double stutter_period;
+ } bestcase;
+ } z8;
+ } power_management;
+
+ struct {
+ double min_ttu_vblank_us[DML2_MAX_PLANES];
+ bool vready_at_or_after_vsync[DML2_MAX_PLANES];
+ double min_dst_y_next_start[DML2_MAX_PLANES];
+ bool cstate_max_cap_mode;
+ bool hw_debug5;
+ unsigned int dcfclk_deep_sleep_hysteresis;
+ unsigned int dst_x_after_scaler[DML2_MAX_PLANES];
+ unsigned int dst_y_after_scaler[DML2_MAX_PLANES];
+ unsigned int prefetch_source_lines_plane0[DML2_MAX_PLANES];
+ unsigned int prefetch_source_lines_plane1[DML2_MAX_PLANES];
+ bool ImmediateFlipSupportedForPipe[DML2_MAX_PLANES];
+ bool UsesMALLForStaticScreen[DML2_MAX_PLANES];
+ unsigned int CursorDstXOffset[DML2_MAX_PLANES];
+ unsigned int CursorDstYOffset[DML2_MAX_PLANES];
+ unsigned int CursorChunkHDLAdjust[DML2_MAX_PLANES];
+ unsigned int dpte_group_bytes[DML2_MAX_PLANES];
+ unsigned int vm_group_bytes[DML2_MAX_PLANES];
+ double DisplayPipeRequestDeliveryTimeLuma[DML2_MAX_PLANES];
+ double DisplayPipeRequestDeliveryTimeChroma[DML2_MAX_PLANES];
+ double DisplayPipeRequestDeliveryTimeLumaPrefetch[DML2_MAX_PLANES];
+ double DisplayPipeRequestDeliveryTimeChromaPrefetch[DML2_MAX_PLANES];
+ double TimePerVMGroupVBlank[DML2_MAX_PLANES];
+ double TimePerVMGroupFlip[DML2_MAX_PLANES];
+ double TimePerVMRequestVBlank[DML2_MAX_PLANES];
+ double TimePerVMRequestFlip[DML2_MAX_PLANES];
+ double Tdmdl_vm[DML2_MAX_PLANES];
+ double Tdmdl[DML2_MAX_PLANES];
+ unsigned int VStartup[DML2_MAX_PLANES];
+ unsigned int VUpdateOffsetPix[DML2_MAX_PLANES];
+ unsigned int VUpdateWidthPix[DML2_MAX_PLANES];
+ unsigned int VReadyOffsetPix[DML2_MAX_PLANES];
+
+ double DST_Y_PER_PTE_ROW_NOM_L[DML2_MAX_PLANES];
+ double DST_Y_PER_PTE_ROW_NOM_C[DML2_MAX_PLANES];
+ double time_per_pte_group_nom_luma[DML2_MAX_PLANES];
+ double time_per_pte_group_nom_chroma[DML2_MAX_PLANES];
+ double time_per_pte_group_vblank_luma[DML2_MAX_PLANES];
+ double time_per_pte_group_vblank_chroma[DML2_MAX_PLANES];
+ double time_per_pte_group_flip_luma[DML2_MAX_PLANES];
+ double time_per_pte_group_flip_chroma[DML2_MAX_PLANES];
+ double VRatioPrefetchY[DML2_MAX_PLANES];
+ double VRatioPrefetchC[DML2_MAX_PLANES];
+ double DestinationLinesForPrefetch[DML2_MAX_PLANES];
+ double DestinationLinesToRequestVMInVBlank[DML2_MAX_PLANES];
+ double DestinationLinesToRequestRowInVBlank[DML2_MAX_PLANES];
+ double DestinationLinesToRequestVMInImmediateFlip[DML2_MAX_PLANES];
+ double DestinationLinesToRequestRowInImmediateFlip[DML2_MAX_PLANES];
+ double DisplayPipeLineDeliveryTimeLuma[DML2_MAX_PLANES];
+ double DisplayPipeLineDeliveryTimeChroma[DML2_MAX_PLANES];
+ double DisplayPipeLineDeliveryTimeLumaPrefetch[DML2_MAX_PLANES];
+ double DisplayPipeLineDeliveryTimeChromaPrefetch[DML2_MAX_PLANES];
+
+ double WritebackRequiredBandwidth;
+ double WritebackAllowDRAMClockChangeEndPosition[DML2_MAX_PLANES];
+ double WritebackAllowFCLKChangeEndPosition[DML2_MAX_PLANES];
+ double DSCCLK_calculated[DML2_MAX_PLANES];
+ unsigned int BIGK_FRAGMENT_SIZE[DML2_MAX_PLANES];
+ bool PTE_BUFFER_MODE[DML2_MAX_PLANES];
+ double DSCDelay[DML2_MAX_PLANES];
+ double MaxActiveDRAMClockChangeLatencySupported[DML2_MAX_PLANES];
+ unsigned int PrefetchMode[DML2_MAX_PLANES]; // LEGACY_ONLY
+ bool ROBUrgencyAvoidance;
+ double LowestPrefetchMargin;
+
+ unsigned int pstate_recout_reduction_lines[DML2_MAX_PLANES];
+ } misc;
+
+ struct dml2_mode_support_info mode_support_info;
+ unsigned int voltage_level; // LEGACY_ONLY
+
+ // For DV only
+ // This is what dml core calculated, only on the full_vp width and assume we have
+ // unlimited # of mcache
+ struct dml2_mcache_surface_allocation non_optimized_mcache_allocation[DML2_MAX_PLANES];
+
+ bool failed_prefetch;
+ bool failed_uclk_pstate;
+ bool failed_mcache_validation;
+ bool failed_dpmm;
+ bool failed_mode_programming;
+ bool failed_mode_programming_dcfclk;
+ bool failed_mode_programming_prefetch;
+ bool failed_mode_programming_flip;
+ bool failed_map_watermarks;
+ } informative;
+};
+
+struct dml2_build_mode_programming_in_out {
+ /*
+ * Inputs
+ */
+ struct dml2_instance *dml2_instance;
+ const struct dml2_display_cfg *display_config;
+
+ /*
+ * Outputs
+ */
+ struct dml2_display_cfg_programming *programming;
+};
+
+struct dml2_build_mcache_programming_in_out {
+ /*
+ * Inputs
+ */
+ struct dml2_instance *dml2_instance;
+
+ struct dml2_plane_mcache_configuration_descriptor mcache_configurations[DML2_MAX_PLANES];
+ char num_configurations;
+
+ /*
+ * Outputs
+ */
+ // per_plane_pipe_mcache_regs[i][j] refers to the proper programming for the j-th pipe of the
+ // i-th plane (from mcache_configurations)
+ struct dml2_hubp_pipe_mcache_regs *per_plane_pipe_mcache_regs[DML2_MAX_PLANES][DML2_MAX_DCN_PIPES];
+
+ // It's not a good idea to reference this directly, better to use the pointer structure above instead
+ struct dml2_hubp_pipe_mcache_regs mcache_regs_set[DML2_MAX_DCN_PIPES];
+};
+
+struct dml2_unit_test_in_out {
+ /*
+ * Inputs
+ */
+ struct dml2_instance *dml2_instance;
+};
+
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.c
new file mode 100644
index 000000000000..eba948e187c1
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.c
@@ -0,0 +1,661 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dml2_internal_shared_types.h"
+#include "dml2_core_shared_types.h"
+#include "dml2_core_dcn4.h"
+#include "dml2_core_dcn4_calcs.h"
+#include "dml2_debug.h"
+#include "lib_float_math.h"
+
+struct dml2_core_ip_params core_dcn4_ip_caps_base = {
+ // Hardcoded values for DCN3x
+ .vblank_nom_default_us = 668,
+ .remote_iommu_outstanding_translations = 256,
+ .rob_buffer_size_kbytes = 128,
+ .config_return_buffer_size_in_kbytes = 1280,
+ .config_return_buffer_segment_size_in_kbytes = 64,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .dpte_buffer_size_in_pte_reqs_luma = 68,
+ .dpte_buffer_size_in_pte_reqs_chroma = 36,
+ .pixel_chunk_size_kbytes = 8,
+ .alpha_pixel_chunk_size_kbytes = 4,
+ .min_pixel_chunk_size_bytes = 1024,
+ .writeback_chunk_size_kbytes = 8,
+ .line_buffer_size_bits = 1171920,
+ .max_line_buffer_lines = 32,
+ .writeback_interface_buffer_size_kbytes = 90,
+ //Number of pipes after DCN Pipe harvesting
+ .max_num_dpp = 4,
+ .max_num_opp = 4,
+ .max_num_otg = 4,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dispclk_ramp_margin_percent = 1,
+ .dppclk_delay_subtotal = 47,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_cnvc_formatter = 28,
+ .dppclk_delay_cnvc_cursor = 6,
+ .cursor_buffer_size = 24,
+ .cursor_chunk_size = 2,
+ .dispclk_delay_subtotal = 125,
+ .max_inter_dcn_tile_repeaters = 8,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .writeback_line_buffer_buffer_size = 0,
+ .num_dsc = 4,
+ .maximum_dsc_bits_per_component = 12,
+ .maximum_pixels_per_line_per_dsc_unit = 5760,
+ .dsc422_native_support = true,
+ .dcc_supported = true,
+ .ptoi_supported = false,
+
+ .cursor_64bpp_support = true,
+ .dynamic_metadata_vm_enabled = false,
+
+ .max_num_dp2p0_outputs = 4,
+ .max_num_dp2p0_streams = 4,
+ .imall_supported = 1,
+ .max_flip_time_us = 80,
+ .max_flip_time_lines = 32,
+ .words_per_channel = 16,
+
+ .subvp_fw_processing_delay_us = 15,
+ .subvp_pstate_allow_width_us = 20,
+ .subvp_swath_height_margin_lines = 16,
+};
+
+static void patch_ip_caps_with_explicit_ip_params(struct dml2_ip_capabilities *ip_caps, const struct dml2_core_ip_params *ip_params)
+{
+ ip_caps->pipe_count = ip_params->max_num_dpp;
+ ip_caps->otg_count = ip_params->max_num_otg;
+ ip_caps->num_dsc = ip_params->num_dsc;
+ ip_caps->max_num_dp2p0_streams = ip_params->max_num_dp2p0_streams;
+ ip_caps->max_num_dp2p0_outputs = ip_params->max_num_dp2p0_outputs;
+ ip_caps->max_num_hdmi_frl_outputs = ip_params->max_num_hdmi_frl_outputs;
+ ip_caps->rob_buffer_size_kbytes = ip_params->rob_buffer_size_kbytes;
+ ip_caps->config_return_buffer_size_in_kbytes = ip_params->config_return_buffer_size_in_kbytes;
+ ip_caps->config_return_buffer_segment_size_in_kbytes = ip_params->config_return_buffer_segment_size_in_kbytes;
+ ip_caps->meta_fifo_size_in_kentries = ip_params->meta_fifo_size_in_kentries;
+ ip_caps->compressed_buffer_segment_size_in_kbytes = ip_params->compressed_buffer_segment_size_in_kbytes;
+ ip_caps->cursor_buffer_size = ip_params->cursor_buffer_size;
+ ip_caps->max_flip_time_us = ip_params->max_flip_time_us;
+ ip_caps->max_flip_time_lines = ip_params->max_flip_time_lines;
+ ip_caps->hostvm_mode = ip_params->hostvm_mode;
+
+ // FIXME_STAGE2: cleanup after adding all dv override to ip_caps
+ ip_caps->subvp_drr_scheduling_margin_us = 100;
+ ip_caps->subvp_prefetch_end_to_mall_start_us = 15;
+ ip_caps->subvp_fw_processing_delay = 16;
+
+}
+
+static void patch_ip_params_with_ip_caps(struct dml2_core_ip_params *ip_params, const struct dml2_ip_capabilities *ip_caps)
+{
+ ip_params->max_num_dpp = ip_caps->pipe_count;
+ ip_params->max_num_otg = ip_caps->otg_count;
+ ip_params->num_dsc = ip_caps->num_dsc;
+ ip_params->max_num_dp2p0_streams = ip_caps->max_num_dp2p0_streams;
+ ip_params->max_num_dp2p0_outputs = ip_caps->max_num_dp2p0_outputs;
+ ip_params->max_num_hdmi_frl_outputs = ip_caps->max_num_hdmi_frl_outputs;
+ ip_params->rob_buffer_size_kbytes = ip_caps->rob_buffer_size_kbytes;
+ ip_params->config_return_buffer_size_in_kbytes = ip_caps->config_return_buffer_size_in_kbytes;
+ ip_params->config_return_buffer_segment_size_in_kbytes = ip_caps->config_return_buffer_segment_size_in_kbytes;
+ ip_params->meta_fifo_size_in_kentries = ip_caps->meta_fifo_size_in_kentries;
+ ip_params->compressed_buffer_segment_size_in_kbytes = ip_caps->compressed_buffer_segment_size_in_kbytes;
+ ip_params->cursor_buffer_size = ip_caps->cursor_buffer_size;
+ ip_params->max_flip_time_us = ip_caps->max_flip_time_us;
+ ip_params->max_flip_time_lines = ip_caps->max_flip_time_lines;
+ ip_params->hostvm_mode = ip_caps->hostvm_mode;
+}
+
+bool core_dcn4_initialize(struct dml2_core_initialize_in_out *in_out)
+{
+ struct dml2_core_instance *core = in_out->instance;
+
+ if (!in_out->minimum_clock_table)
+ return false;
+ else
+ core->minimum_clock_table = in_out->minimum_clock_table;
+
+ if (in_out->explicit_ip_bb && in_out->explicit_ip_bb_size > 0) {
+ memcpy(&core->clean_me_up.mode_lib.ip, in_out->explicit_ip_bb, in_out->explicit_ip_bb_size);
+
+ // FIXME_STAGE2:
+ // DV still uses stage1 ip_param_st for each variant, need to patch the ip_caps with ip_param info
+ // Should move DV to use ip_caps but need move more overrides to ip_caps
+ patch_ip_caps_with_explicit_ip_params(in_out->ip_caps, in_out->explicit_ip_bb);
+ core->clean_me_up.mode_lib.ip.subvp_pstate_allow_width_us = core_dcn4_ip_caps_base.subvp_pstate_allow_width_us;
+ core->clean_me_up.mode_lib.ip.subvp_fw_processing_delay_us = core_dcn4_ip_caps_base.subvp_pstate_allow_width_us;
+ core->clean_me_up.mode_lib.ip.subvp_swath_height_margin_lines = core_dcn4_ip_caps_base.subvp_swath_height_margin_lines;
+ } else {
+ memcpy(&core->clean_me_up.mode_lib.ip, &core_dcn4_ip_caps_base, sizeof(struct dml2_core_ip_params));
+ patch_ip_params_with_ip_caps(&core->clean_me_up.mode_lib.ip, in_out->ip_caps);
+ core->clean_me_up.mode_lib.ip.imall_supported = false;
+ }
+
+ memcpy(&core->clean_me_up.mode_lib.soc, in_out->soc_bb, sizeof(struct dml2_soc_bb));
+ memcpy(&core->clean_me_up.mode_lib.ip_caps, in_out->ip_caps, sizeof(struct dml2_ip_capabilities));
+
+ return true;
+}
+
+static void create_phantom_stream_from_main_stream(struct dml2_stream_parameters *phantom, const struct dml2_stream_parameters *main,
+ const struct dml2_implicit_svp_meta *meta)
+{
+ memcpy(phantom, main, sizeof(struct dml2_stream_parameters));
+
+ phantom->timing.v_total = meta->v_total;
+ phantom->timing.v_active = meta->v_active;
+ phantom->timing.v_front_porch = meta->v_front_porch;
+ phantom->timing.v_blank_end = phantom->timing.v_total - phantom->timing.v_front_porch - phantom->timing.v_active;
+ phantom->timing.vblank_nom = phantom->timing.v_total - phantom->timing.v_active;
+ phantom->timing.drr_config.enabled = false;
+}
+
+static void create_phantom_plane_from_main_plane(struct dml2_plane_parameters *phantom, const struct dml2_plane_parameters *main,
+ const struct dml2_stream_parameters *phantom_stream, int phantom_stream_index, const struct dml2_stream_parameters *main_stream)
+{
+ memcpy(phantom, main, sizeof(struct dml2_plane_parameters));
+
+ phantom->stream_index = phantom_stream_index;
+ phantom->overrides.refresh_from_mall = dml2_refresh_from_mall_mode_override_force_disable;
+ phantom->overrides.legacy_svp_config = dml2_svp_mode_override_phantom_pipe_no_data_return;
+ phantom->composition.viewport.plane0.height = (long int unsigned) math_min2(math_ceil2(
+ (double)main->composition.scaler_info.plane0.v_ratio * (double)phantom_stream->timing.v_active, 16.0),
+ (double)main->composition.viewport.plane0.height);
+ phantom->composition.viewport.plane1.height = (long int unsigned) math_min2(math_ceil2(
+ (double)main->composition.scaler_info.plane1.v_ratio * (double)phantom_stream->timing.v_active, 16.0),
+ (double)main->composition.viewport.plane1.height);
+ phantom->immediate_flip = false;
+ phantom->dynamic_meta_data.enable = false;
+ phantom->cursor.num_cursors = 0;
+ phantom->cursor.cursor_width = 0;
+ phantom->tdlut.setup_for_tdlut = false;
+}
+
+static void expand_implict_subvp(const struct display_configuation_with_meta *display_cfg, struct dml2_display_cfg *svp_expanded_display_cfg,
+ struct dml2_core_scratch *scratch)
+{
+ unsigned int stream_index, plane_index;
+ const struct dml2_plane_parameters *main_plane;
+ const struct dml2_stream_parameters *main_stream;
+ const struct dml2_stream_parameters *phantom_stream;
+
+ memcpy(svp_expanded_display_cfg, &display_cfg->display_config, sizeof(struct dml2_display_cfg));
+ memset(scratch->main_stream_index_from_svp_stream_index, 0, sizeof(int) * DML2_MAX_PLANES);
+ memset(scratch->svp_stream_index_from_main_stream_index, 0, sizeof(int) * DML2_MAX_PLANES);
+ memset(scratch->main_plane_index_to_phantom_plane_index, 0, sizeof(int) * DML2_MAX_PLANES);
+
+ if (!display_cfg->display_config.overrides.enable_subvp_implicit_pmo)
+ return;
+
+ /* disable unbounded requesting for all planes until stage 3 has been performed */
+ if (!display_cfg->stage3.performed) {
+ svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.enable = true;
+ svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.value = false;
+ }
+ // Create the phantom streams
+ for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) {
+ main_stream = &display_cfg->display_config.stream_descriptors[stream_index];
+ scratch->main_stream_index_from_svp_stream_index[stream_index] = stream_index;
+ scratch->svp_stream_index_from_main_stream_index[stream_index] = stream_index;
+
+ if (display_cfg->stage3.stream_svp_meta[stream_index].valid) {
+ // Create the phantom stream
+ create_phantom_stream_from_main_stream(&svp_expanded_display_cfg->stream_descriptors[svp_expanded_display_cfg->num_streams],
+ main_stream, &display_cfg->stage3.stream_svp_meta[stream_index]);
+
+ // Associate this phantom stream to the main stream
+ scratch->main_stream_index_from_svp_stream_index[svp_expanded_display_cfg->num_streams] = stream_index;
+ scratch->svp_stream_index_from_main_stream_index[stream_index] = svp_expanded_display_cfg->num_streams;
+
+ // Increment num streams
+ svp_expanded_display_cfg->num_streams++;
+ }
+ }
+
+ // Create the phantom planes
+ for (plane_index = 0; plane_index < display_cfg->display_config.num_planes; plane_index++) {
+ main_plane = &display_cfg->display_config.plane_descriptors[plane_index];
+
+ if (display_cfg->stage3.stream_svp_meta[main_plane->stream_index].valid) {
+ main_stream = &display_cfg->display_config.stream_descriptors[main_plane->stream_index];
+ phantom_stream = &svp_expanded_display_cfg->stream_descriptors[scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index]];
+ create_phantom_plane_from_main_plane(&svp_expanded_display_cfg->plane_descriptors[svp_expanded_display_cfg->num_planes],
+ main_plane, phantom_stream, scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index], main_stream);
+
+ // Associate this phantom plane to the main plane
+ scratch->phantom_plane_index_to_main_plane_index[svp_expanded_display_cfg->num_planes] = plane_index;
+ scratch->main_plane_index_to_phantom_plane_index[plane_index] = svp_expanded_display_cfg->num_planes;
+
+ // Increment num planes
+ svp_expanded_display_cfg->num_planes++;
+
+ // Adjust the main plane settings
+ svp_expanded_display_cfg->plane_descriptors[plane_index].overrides.legacy_svp_config = dml2_svp_mode_override_main_pipe;
+ }
+ }
+}
+
+static void pack_mode_programming_params_with_implicit_subvp(struct dml2_core_instance *core, const struct display_configuation_with_meta *display_cfg,
+ const struct dml2_display_cfg *svp_expanded_display_cfg, struct dml2_display_cfg_programming *programming, struct dml2_core_scratch *scratch)
+{
+ unsigned int stream_index, plane_index, pipe_offset, stream_already_populated_mask, main_plane_index, mcache_index;
+ unsigned int total_main_mcaches_required = 0;
+ int total_pipe_regs_copied = 0;
+ int dml_internal_pipe_index = 0;
+ const struct dml2_plane_parameters *main_plane;
+ const struct dml2_plane_parameters *phantom_plane;
+ const struct dml2_stream_parameters *main_stream;
+ const struct dml2_stream_parameters *phantom_stream;
+
+ // Copy the unexpanded display config to output
+ memcpy(&programming->display_config, &display_cfg->display_config, sizeof(struct dml2_display_cfg));
+
+ // Set the global register values
+ dml2_core_calcs_get_arb_params(&display_cfg->display_config, &core->clean_me_up.mode_lib, &programming->global_regs.arb_regs);
+ // Get watermarks uses display config for ref clock override, so it doesn't matter whether we pass the pre or post expansion
+ // display config
+ dml2_core_calcs_get_watermarks(&display_cfg->display_config, &core->clean_me_up.mode_lib, &programming->global_regs.wm_regs[0]);
+
+ // Check if FAMS2 is required
+ if (display_cfg->stage3.performed && display_cfg->stage3.success) {
+ programming->fams2_required = display_cfg->stage3.fams2_required;
+
+ dml2_core_calcs_get_global_fams2_programming(&core->clean_me_up.mode_lib, display_cfg, &programming->fams2_global_config);
+ }
+
+ // Only loop over all the main streams (the implicit svp streams will be packed as part of the main stream)
+ for (stream_index = 0; stream_index < programming->display_config.num_streams; stream_index++) {
+ main_stream = &svp_expanded_display_cfg->stream_descriptors[stream_index];
+ phantom_stream = &svp_expanded_display_cfg->stream_descriptors[scratch->svp_stream_index_from_main_stream_index[stream_index]];
+
+ // Set the descriptor
+ programming->stream_programming[stream_index].stream_descriptor = &programming->display_config.stream_descriptors[stream_index];
+
+ // Set the odm combine factor
+ programming->stream_programming[stream_index].num_odms_required = display_cfg->mode_support_result.cfg_support_info.stream_support_info[stream_index].odms_used;
+
+ // Check if the stream has implicit SVP enabled
+ if (main_stream != phantom_stream) {
+ // If so, copy the phantom stream descriptor
+ programming->stream_programming[stream_index].phantom_stream.enabled = true;
+ memcpy(&programming->stream_programming[stream_index].phantom_stream.descriptor, phantom_stream, sizeof(struct dml2_stream_parameters));
+ } else {
+ programming->stream_programming[stream_index].phantom_stream.enabled = false;
+ }
+
+ // Due to the way DML indexes data internally, it's easier to populate the rest of the display
+ // stream programming in the next stage
+ }
+
+ dml_internal_pipe_index = 0;
+ total_pipe_regs_copied = 0;
+ stream_already_populated_mask = 0x0;
+
+ // Loop over all main planes
+ for (plane_index = 0; plane_index < programming->display_config.num_planes; plane_index++) {
+ main_plane = &svp_expanded_display_cfg->plane_descriptors[plane_index];
+
+ // Set the descriptor
+ programming->plane_programming[plane_index].plane_descriptor = &programming->display_config.plane_descriptors[plane_index];
+
+ // Set the mpc combine factor
+ programming->plane_programming[plane_index].num_dpps_required = core->clean_me_up.mode_lib.mp.NoOfDPP[plane_index];
+
+ // Setup the appropriate p-state strategy
+ if (display_cfg->stage3.performed && display_cfg->stage3.success) {
+ programming->plane_programming[plane_index].uclk_pstate_support_method = display_cfg->stage3.pstate_switch_modes[plane_index];
+ } else {
+ programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_na;
+ }
+
+ dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &programming->plane_programming[plane_index].surface_size_mall_bytes, dml_internal_pipe_index);
+
+ memcpy(&programming->plane_programming[plane_index].mcache_allocation,
+ &display_cfg->stage2.mcache_allocations[plane_index],
+ sizeof(struct dml2_mcache_surface_allocation));
+ total_main_mcaches_required += programming->plane_programming[plane_index].mcache_allocation.num_mcaches_plane0 +
+ programming->plane_programming[plane_index].mcache_allocation.num_mcaches_plane1 -
+ (programming->plane_programming[plane_index].mcache_allocation.last_slice_sharing.plane0_plane1 ? 1 : 0);
+
+ for (pipe_offset = 0; pipe_offset < programming->plane_programming[plane_index].num_dpps_required; pipe_offset++) {
+ // Assign storage for this pipe's register values
+ programming->plane_programming[plane_index].pipe_regs[pipe_offset] = &programming->pipe_regs[total_pipe_regs_copied];
+ memset(programming->plane_programming[plane_index].pipe_regs[pipe_offset], 0, sizeof(struct dml2_dchub_per_pipe_register_set));
+ total_pipe_regs_copied++;
+
+ // Populate the main plane regs
+ dml2_core_calcs_get_pipe_regs(svp_expanded_display_cfg, &core->clean_me_up.mode_lib, programming->plane_programming[plane_index].pipe_regs[pipe_offset], dml_internal_pipe_index);
+
+ // Multiple planes can refer to the same stream index, so it's only necessary to populate it once
+ if (!(stream_already_populated_mask & (0x1 << main_plane->stream_index))) {
+ dml2_core_calcs_get_stream_programming(&core->clean_me_up.mode_lib, &programming->stream_programming[main_plane->stream_index], dml_internal_pipe_index);
+
+ programming->stream_programming[main_plane->stream_index].uclk_pstate_method = programming->plane_programming[plane_index].uclk_pstate_support_method;
+
+ /* unconditionally populate fams2 params */
+ dml2_core_calcs_get_stream_fams2_programming(&core->clean_me_up.mode_lib,
+ display_cfg,
+ &programming->stream_programming[main_plane->stream_index].fams2_base_params,
+ &programming->stream_programming[main_plane->stream_index].fams2_sub_params,
+ programming->stream_programming[main_plane->stream_index].uclk_pstate_method,
+ plane_index);
+
+ stream_already_populated_mask |= (0x1 << main_plane->stream_index);
+ }
+ dml_internal_pipe_index++;
+ }
+ }
+
+ for (plane_index = programming->display_config.num_planes; plane_index < svp_expanded_display_cfg->num_planes; plane_index++) {
+ phantom_plane = &svp_expanded_display_cfg->plane_descriptors[plane_index];
+ main_plane_index = scratch->phantom_plane_index_to_main_plane_index[plane_index];
+ main_plane = &svp_expanded_display_cfg->plane_descriptors[main_plane_index];
+
+ programming->plane_programming[main_plane_index].phantom_plane.valid = true;
+ memcpy(&programming->plane_programming[main_plane_index].phantom_plane.descriptor, phantom_plane, sizeof(struct dml2_plane_parameters));
+
+ dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &programming->plane_programming[main_plane_index].svp_size_mall_bytes, dml_internal_pipe_index);
+
+ /* generate mcache allocation, phantoms use identical mcache configuration, but in the MALL set and unique mcache ID's beginning after all main ID's */
+ memcpy(&programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation,
+ &programming->plane_programming[main_plane_index].mcache_allocation,
+ sizeof(struct dml2_mcache_surface_allocation));
+ for (mcache_index = 0; mcache_index < programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.num_mcaches_plane0; mcache_index++) {
+ programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane0[mcache_index] += total_main_mcaches_required;
+ programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_mall_plane0[mcache_index] =
+ programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane0[mcache_index];
+ }
+ for (mcache_index = 0; mcache_index < programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.num_mcaches_plane1; mcache_index++) {
+ programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane1[mcache_index] += total_main_mcaches_required;
+ programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_mall_plane1[mcache_index] =
+ programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane1[mcache_index];
+ }
+
+ for (pipe_offset = 0; pipe_offset < programming->plane_programming[main_plane_index].num_dpps_required; pipe_offset++) {
+ // Assign storage for this pipe's register values
+ programming->plane_programming[main_plane_index].phantom_plane.pipe_regs[pipe_offset] = &programming->pipe_regs[total_pipe_regs_copied];
+ memset(programming->plane_programming[main_plane_index].phantom_plane.pipe_regs[pipe_offset], 0, sizeof(struct dml2_dchub_per_pipe_register_set));
+ total_pipe_regs_copied++;
+
+ // Populate the phantom plane regs
+ dml2_core_calcs_get_pipe_regs(svp_expanded_display_cfg, &core->clean_me_up.mode_lib, programming->plane_programming[main_plane_index].phantom_plane.pipe_regs[pipe_offset], dml_internal_pipe_index);
+ // Populate the phantom stream specific programming
+ if (!(stream_already_populated_mask & (0x1 << phantom_plane->stream_index))) {
+ dml2_core_calcs_get_global_sync_programming(&core->clean_me_up.mode_lib, &programming->stream_programming[main_plane->stream_index].phantom_stream.global_sync, dml_internal_pipe_index);
+
+ stream_already_populated_mask |= (0x1 << phantom_plane->stream_index);
+ }
+
+ dml_internal_pipe_index++;
+ }
+ }
+}
+
+bool core_dcn4_mode_support(struct dml2_core_mode_support_in_out *in_out)
+{
+ struct dml2_core_instance *core = (struct dml2_core_instance *)in_out->instance;
+ struct dml2_core_mode_support_locals *l = &core->scratch.mode_support_locals;
+
+ bool result;
+ unsigned int i, stream_index, stream_bitmask;
+ int unsigned odm_count, num_odm_output_segments, dpp_count;
+
+ expand_implict_subvp(in_out->display_cfg, &l->svp_expanded_display_cfg, &core->scratch);
+
+ l->mode_support_ex_params.mode_lib = &core->clean_me_up.mode_lib;
+ l->mode_support_ex_params.in_display_cfg = &l->svp_expanded_display_cfg;
+ l->mode_support_ex_params.min_clk_table = in_out->min_clk_table;
+ l->mode_support_ex_params.min_clk_index = in_out->min_clk_index;
+ l->mode_support_ex_params.out_evaluation_info = &in_out->mode_support_result.cfg_support_info.clean_me_up.support_info;
+
+ result = dml2_core_calcs_mode_support_ex(&l->mode_support_ex_params);
+
+ in_out->mode_support_result.cfg_support_info.is_supported = result;
+
+ if (result) {
+ in_out->mode_support_result.global.dispclk_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.RequiredDISPCLK * 1000);
+ in_out->mode_support_result.global.dcfclk_deepsleep_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.dcfclk_deepsleep * 1000);
+ in_out->mode_support_result.global.socclk_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.SOCCLK * 1000);
+
+ in_out->mode_support_result.global.fclk_pstate_supported = l->mode_support_ex_params.out_evaluation_info->global_fclk_change_supported;
+ in_out->mode_support_result.global.uclk_pstate_supported = l->mode_support_ex_params.out_evaluation_info->global_dram_clock_change_supported;
+
+ in_out->mode_support_result.global.active.fclk_khz = (unsigned long)(core->clean_me_up.mode_lib.ms.FabricClock * 1000);
+ in_out->mode_support_result.global.active.dcfclk_khz = (unsigned long)(core->clean_me_up.mode_lib.ms.DCFCLK * 1000);
+
+
+ in_out->mode_support_result.global.svp_prefetch.fclk_khz = (unsigned long)core->clean_me_up.mode_lib.ms.FabricClock * 1000;
+ in_out->mode_support_result.global.svp_prefetch.dcfclk_khz = (unsigned long)core->clean_me_up.mode_lib.ms.DCFCLK * 1000;
+
+ in_out->mode_support_result.global.active.average_bw_sdp_kbps = 0;
+ in_out->mode_support_result.global.active.urgent_bw_dram_kbps = 0;
+ in_out->mode_support_result.global.svp_prefetch.average_bw_sdp_kbps = 0;
+ in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps = 0;
+
+ in_out->mode_support_result.global.active.average_bw_sdp_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] * 1000), 1.0);
+ in_out->mode_support_result.global.active.urgent_bw_sdp_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] * 1000), 1.0);
+ in_out->mode_support_result.global.svp_prefetch.average_bw_sdp_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] * 1000), 1.0);
+ in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] * 1000), 1.0);
+
+ in_out->mode_support_result.global.active.average_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] * 1000), 1.0);
+ in_out->mode_support_result.global.active.urgent_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] * 1000), 1.0);
+ in_out->mode_support_result.global.svp_prefetch.average_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] * 1000), 1.0);
+ in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] * 1000), 1.0);
+ DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.active.urgent_bw_sdp_kbps = %ld\n", __func__, in_out->mode_support_result.global.active.urgent_bw_sdp_kbps);
+ DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps = %ld\n", __func__, in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps);
+ DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.active.urgent_bw_dram_kbps = %ld\n", __func__, in_out->mode_support_result.global.active.urgent_bw_dram_kbps);
+ DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps = %ld\n", __func__, in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps);
+
+ for (i = 0; i < l->svp_expanded_display_cfg.num_planes; i++) {
+ in_out->mode_support_result.per_plane[i].dppclk_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.RequiredDPPCLK[i] * 1000);
+ }
+
+ stream_bitmask = 0;
+ for (i = 0; i < l->svp_expanded_display_cfg.num_planes; i++) {
+ odm_count = 1;
+ dpp_count = l->mode_support_ex_params.out_evaluation_info->DPPPerSurface[i];
+ num_odm_output_segments = 1;
+
+ switch (l->mode_support_ex_params.out_evaluation_info->ODMMode[i]) {
+ case dml2_odm_mode_bypass:
+ odm_count = 1;
+ dpp_count = l->mode_support_ex_params.out_evaluation_info->DPPPerSurface[i];
+ break;
+ case dml2_odm_mode_combine_2to1:
+ odm_count = 2;
+ dpp_count = 2;
+ break;
+ case dml2_odm_mode_combine_3to1:
+ odm_count = 3;
+ dpp_count = 3;
+ break;
+ case dml2_odm_mode_combine_4to1:
+ odm_count = 4;
+ dpp_count = 4;
+ break;
+ case dml2_odm_mode_split_1to2:
+ case dml2_odm_mode_mso_1to2:
+ num_odm_output_segments = 2;
+ break;
+ case dml2_odm_mode_mso_1to4:
+ num_odm_output_segments = 4;
+ break;
+ case dml2_odm_mode_auto:
+ default:
+ odm_count = 1;
+ dpp_count = l->mode_support_ex_params.out_evaluation_info->DPPPerSurface[i];
+ break;
+ }
+
+ in_out->mode_support_result.cfg_support_info.plane_support_info[i].dpps_used = dpp_count;
+
+ dml2_core_calcs_get_plane_support_info(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, &in_out->mode_support_result.cfg_support_info.plane_support_info[i], i);
+
+ stream_index = l->svp_expanded_display_cfg.plane_descriptors[i].stream_index;
+
+ in_out->mode_support_result.per_stream[stream_index].dscclk_khz = (unsigned int)core->clean_me_up.mode_lib.ms.required_dscclk_freq_mhz[i] * 1000;
+ DML_LOG_VERBOSE("CORE_DCN4::%s: i=%d stream_index=%d, in_out->mode_support_result.per_stream[stream_index].dscclk_khz = %u\n", __func__, i, stream_index, in_out->mode_support_result.per_stream[stream_index].dscclk_khz);
+
+ if (!((stream_bitmask >> stream_index) & 0x1)) {
+ in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index].odms_used = odm_count;
+ in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index].num_odm_output_segments = num_odm_output_segments;
+ in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index].dsc_enable = l->mode_support_ex_params.out_evaluation_info->DSCEnabled[i];
+ in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index].num_dsc_slices = l->mode_support_ex_params.out_evaluation_info->NumberOfDSCSlices[i];
+ dml2_core_calcs_get_stream_support_info(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, &in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index], i);
+ in_out->mode_support_result.per_stream[stream_index].dtbclk_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.RequiredDTBCLK[i] * 1000);
+ stream_bitmask |= 0x1 << stream_index;
+ }
+ }
+ }
+
+ return result;
+}
+
+static int lookup_uclk_dpm_index_by_freq(unsigned long uclk_freq_khz, struct dml2_soc_bb *soc_bb)
+{
+ int i;
+
+ for (i = 0; i < soc_bb->clk_table.uclk.num_clk_values; i++) {
+ if (uclk_freq_khz == soc_bb->clk_table.uclk.clk_values_khz[i])
+ return i;
+ }
+ return 0;
+}
+
+bool core_dcn4_mode_programming(struct dml2_core_mode_programming_in_out *in_out)
+{
+ struct dml2_core_instance *core = (struct dml2_core_instance *)in_out->instance;
+ struct dml2_core_mode_programming_locals *l = &core->scratch.mode_programming_locals;
+
+ bool result = false;
+ unsigned int pipe_offset;
+ int dml_internal_pipe_index;
+ int total_pipe_regs_copied = 0;
+ int stream_already_populated_mask = 0;
+
+ int main_stream_index;
+ unsigned int plane_index;
+
+ expand_implict_subvp(in_out->display_cfg, &l->svp_expanded_display_cfg, &core->scratch);
+
+ l->mode_programming_ex_params.mode_lib = &core->clean_me_up.mode_lib;
+ l->mode_programming_ex_params.in_display_cfg = &l->svp_expanded_display_cfg;
+ l->mode_programming_ex_params.min_clk_table = in_out->instance->minimum_clock_table;
+ l->mode_programming_ex_params.cfg_support_info = in_out->cfg_support_info;
+ l->mode_programming_ex_params.programming = in_out->programming;
+ l->mode_programming_ex_params.min_clk_index = lookup_uclk_dpm_index_by_freq(in_out->programming->min_clocks.dcn4x.active.uclk_khz,
+ &core->clean_me_up.mode_lib.soc);
+
+ result = dml2_core_calcs_mode_programming_ex(&l->mode_programming_ex_params);
+
+ if (result) {
+ // If the input display configuration contains implict SVP, we need to use a special packer
+ if (in_out->display_cfg->display_config.overrides.enable_subvp_implicit_pmo) {
+ pack_mode_programming_params_with_implicit_subvp(core, in_out->display_cfg, &l->svp_expanded_display_cfg, in_out->programming, &core->scratch);
+ } else {
+ memcpy(&in_out->programming->display_config, in_out->display_cfg, sizeof(struct dml2_display_cfg));
+
+ dml2_core_calcs_get_arb_params(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, &in_out->programming->global_regs.arb_regs);
+ dml2_core_calcs_get_watermarks(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, &in_out->programming->global_regs.wm_regs[0]);
+
+ dml_internal_pipe_index = 0;
+
+ for (plane_index = 0; plane_index < in_out->programming->display_config.num_planes; plane_index++) {
+ in_out->programming->plane_programming[plane_index].num_dpps_required = core->clean_me_up.mode_lib.mp.NoOfDPP[plane_index];
+
+ if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe)
+ in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_fw_svp;
+ else if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe)
+ in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_fw_svp;
+ else if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return)
+ in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_fw_svp;
+ else {
+ if (core->clean_me_up.mode_lib.mp.MaxActiveDRAMClockChangeLatencySupported[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us)
+ in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_vactive;
+ else if (core->clean_me_up.mode_lib.mp.TWait[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us)
+ in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_vblank;
+ else
+ in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_na;
+ }
+
+ dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &in_out->programming->plane_programming[plane_index].surface_size_mall_bytes, dml_internal_pipe_index);
+
+ memcpy(&in_out->programming->plane_programming[plane_index].mcache_allocation,
+ &in_out->display_cfg->stage2.mcache_allocations[plane_index],
+ sizeof(struct dml2_mcache_surface_allocation));
+
+ for (pipe_offset = 0; pipe_offset < in_out->programming->plane_programming[plane_index].num_dpps_required; pipe_offset++) {
+ in_out->programming->plane_programming[plane_index].plane_descriptor = &in_out->programming->display_config.plane_descriptors[plane_index];
+
+ // Assign storage for this pipe's register values
+ in_out->programming->plane_programming[plane_index].pipe_regs[pipe_offset] = &in_out->programming->pipe_regs[total_pipe_regs_copied];
+ memset(in_out->programming->plane_programming[plane_index].pipe_regs[pipe_offset], 0, sizeof(struct dml2_dchub_per_pipe_register_set));
+ total_pipe_regs_copied++;
+
+ // Populate
+ dml2_core_calcs_get_pipe_regs(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, in_out->programming->plane_programming[plane_index].pipe_regs[pipe_offset], dml_internal_pipe_index);
+
+ main_stream_index = in_out->programming->display_config.plane_descriptors[plane_index].stream_index;
+
+ // Multiple planes can refer to the same stream index, so it's only necessary to populate it once
+ if (!(stream_already_populated_mask & (0x1 << main_stream_index))) {
+ in_out->programming->stream_programming[main_stream_index].stream_descriptor = &in_out->programming->display_config.stream_descriptors[main_stream_index];
+ in_out->programming->stream_programming[main_stream_index].num_odms_required = in_out->cfg_support_info->stream_support_info[main_stream_index].odms_used;
+ dml2_core_calcs_get_stream_programming(&core->clean_me_up.mode_lib, &in_out->programming->stream_programming[main_stream_index], dml_internal_pipe_index);
+
+ stream_already_populated_mask |= (0x1 << main_stream_index);
+ }
+ dml_internal_pipe_index++;
+ }
+ }
+ }
+ }
+
+ return result;
+}
+
+bool core_dcn4_populate_informative(struct dml2_core_populate_informative_in_out *in_out)
+{
+ struct dml2_core_internal_display_mode_lib *mode_lib = &in_out->instance->clean_me_up.mode_lib;
+
+ if (in_out->mode_is_supported)
+ in_out->programming->informative.voltage_level = in_out->instance->scratch.mode_programming_locals.mode_programming_ex_params.min_clk_index;
+ else
+ in_out->programming->informative.voltage_level = in_out->instance->scratch.mode_support_locals.mode_support_ex_params.min_clk_index;
+
+ dml2_core_calcs_get_informative(mode_lib, in_out->programming);
+ return true;
+}
+
+bool core_dcn4_calculate_mcache_allocation(struct dml2_calculate_mcache_allocation_in_out *in_out)
+{
+ memset(in_out->mcache_allocation, 0, sizeof(struct dml2_mcache_surface_allocation));
+
+ dml2_core_calcs_get_mcache_allocation(&in_out->instance->clean_me_up.mode_lib, in_out->mcache_allocation, in_out->plane_index);
+
+ if (in_out->mcache_allocation->num_mcaches_plane0 > 0)
+ in_out->mcache_allocation->mcache_x_offsets_plane0[in_out->mcache_allocation->num_mcaches_plane0 - 1] = in_out->plane_descriptor->surface.plane0.width;
+
+ if (in_out->mcache_allocation->num_mcaches_plane1 > 0)
+ in_out->mcache_allocation->mcache_x_offsets_plane1[in_out->mcache_allocation->num_mcaches_plane1 - 1] = in_out->plane_descriptor->surface.plane1.width;
+
+ in_out->mcache_allocation->requires_dedicated_mall_mcache = false;
+
+ return true;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.h
new file mode 100644
index 000000000000..a68bb001a346
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.h
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML2_CORE_DCN4_H__
+#define __DML2_CORE_DCN4_H__
+bool core_dcn4_initialize(struct dml2_core_initialize_in_out *in_out);
+bool core_dcn4_mode_support(struct dml2_core_mode_support_in_out *in_out);
+bool core_dcn4_mode_programming(struct dml2_core_mode_programming_in_out *in_out);
+bool core_dcn4_populate_informative(struct dml2_core_populate_informative_in_out *in_out);
+bool core_dcn4_calculate_mcache_allocation(struct dml2_calculate_mcache_allocation_in_out *in_out);
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c
new file mode 100644
index 000000000000..a02e9fd6b5ca
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c
@@ -0,0 +1,13342 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+
+#include "dml2_internal_shared_types.h"
+#include "dml2_core_dcn4_calcs.h"
+#include "dml2_debug.h"
+#include "lib_float_math.h"
+#include "dml_top_types.h"
+
+#define DML2_MAX_FMT_420_BUFFER_WIDTH 4096
+#define DML_MAX_NUM_OF_SLICES_PER_DSC 4
+#define DML_MAX_COMPRESSION_RATIO 4
+//#define DML_MODE_SUPPORT_USE_DPM_DRAM_BW
+//#define DML_GLOBAL_PREFETCH_CHECK
+#define ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE
+#define DML_MAX_VSTARTUP_START 1023
+
+const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type)
+{
+ switch (bw_type) {
+ case (dml2_core_internal_bw_sdp):
+ return("dml2_core_internal_bw_sdp");
+ case (dml2_core_internal_bw_dram):
+ return("dml2_core_internal_bw_dram");
+ case (dml2_core_internal_bw_max):
+ return("dml2_core_internal_bw_max");
+ default:
+ return("dml2_core_internal_bw_unknown");
+ }
+}
+
+const char *dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type)
+{
+ switch (dml2_core_internal_soc_state_type) {
+ case (dml2_core_internal_soc_state_sys_idle):
+ return("dml2_core_internal_soc_state_sys_idle");
+ case (dml2_core_internal_soc_state_sys_active):
+ return("dml2_core_internal_soc_state_sys_active");
+ case (dml2_core_internal_soc_state_svp_prefetch):
+ return("dml2_core_internal_soc_state_svp_prefetch");
+ case dml2_core_internal_soc_state_max:
+ default:
+ return("dml2_core_internal_soc_state_unknown");
+ }
+}
+
+static double dml2_core_div_rem(double dividend, unsigned int divisor, unsigned int *remainder)
+{
+ *remainder = ((dividend / divisor) - (int)(dividend / divisor) > 0);
+ return dividend / divisor;
+}
+
+static void dml2_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only)
+{
+ DML_LOG_VERBOSE("DML: ===================================== \n");
+ DML_LOG_VERBOSE("DML: DML_MODE_SUPPORT_INFO_ST\n");
+ if (!fail_only || support->ScaleRatioAndTapsSupport == 0)
+ DML_LOG_VERBOSE("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport);
+ if (!fail_only || support->SourceFormatPixelAndScanSupport == 0)
+ DML_LOG_VERBOSE("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport);
+ if (!fail_only || support->ViewportSizeSupport == 0)
+ DML_LOG_VERBOSE("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport);
+ if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1)
+ DML_LOG_VERBOSE("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion);
+ if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1)
+ DML_LOG_VERBOSE("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated);
+ if (!fail_only || support->BPPForMultistreamNotIndicated == 1)
+ DML_LOG_VERBOSE("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated);
+ if (!fail_only || support->MultistreamWithHDMIOreDP == 1)
+ DML_LOG_VERBOSE("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP);
+ if (!fail_only || support->ExceededMultistreamSlots == 1)
+ DML_LOG_VERBOSE("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots);
+ if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1)
+ DML_LOG_VERBOSE("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink);
+ if (!fail_only || support->NotEnoughLanesForMSO == 1)
+ DML_LOG_VERBOSE("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO);
+ if (!fail_only || support->P2IWith420 == 1)
+ DML_LOG_VERBOSE("DML: support: P2IWith420 = %d\n", support->P2IWith420);
+ if (!fail_only || support->DSC422NativeNotSupported == 1)
+ DML_LOG_VERBOSE("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported);
+ if (!fail_only || support->DSCSlicesODMModeSupported == 0)
+ DML_LOG_VERBOSE("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported);
+ if (!fail_only || support->NotEnoughDSCUnits == 1)
+ DML_LOG_VERBOSE("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits);
+ if (!fail_only || support->NotEnoughDSCSlices == 1)
+ DML_LOG_VERBOSE("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices);
+ if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1)
+ DML_LOG_VERBOSE("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe);
+ if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1)
+ DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen);
+ if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1)
+ DML_LOG_VERBOSE("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported);
+ if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0)
+ DML_LOG_VERBOSE("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport);
+ if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1)
+ DML_LOG_VERBOSE("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported);
+ if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1)
+ DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState);
+ if (!fail_only || support->ROBSupport == 0)
+ DML_LOG_VERBOSE("DML: support: ROBSupport = %d\n", support->ROBSupport);
+ if (!fail_only || support->OutstandingRequestsSupport == 0)
+ DML_LOG_VERBOSE("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport);
+ if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0)
+ DML_LOG_VERBOSE("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance);
+ if (!fail_only || support->DISPCLK_DPPCLK_Support == 0)
+ DML_LOG_VERBOSE("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support);
+ if (!fail_only || support->TotalAvailablePipesSupport == 0)
+ DML_LOG_VERBOSE("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport);
+ if (!fail_only || support->NumberOfOTGSupport == 0)
+ DML_LOG_VERBOSE("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport);
+ if (!fail_only || support->NumberOfHDMIFRLSupport == 0)
+ DML_LOG_VERBOSE("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport);
+ if (!fail_only || support->NumberOfDP2p0Support == 0)
+ DML_LOG_VERBOSE("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support);
+ if (!fail_only || support->EnoughWritebackUnits == 0)
+ DML_LOG_VERBOSE("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits);
+ if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0)
+ DML_LOG_VERBOSE("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport);
+ if (!fail_only || support->WritebackLatencySupport == 0)
+ DML_LOG_VERBOSE("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport);
+ if (!fail_only || support->CursorSupport == 0)
+ DML_LOG_VERBOSE("DML: support: CursorSupport = %d\n", support->CursorSupport);
+ if (!fail_only || support->PitchSupport == 0)
+ DML_LOG_VERBOSE("DML: support: PitchSupport = %d\n", support->PitchSupport);
+ if (!fail_only || support->ViewportExceedsSurface == 1)
+ DML_LOG_VERBOSE("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface);
+ if (!fail_only || support->PrefetchSupported == 0)
+ DML_LOG_VERBOSE("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported);
+ if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0)
+ DML_LOG_VERBOSE("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport);
+ if (!fail_only || support->AvgBandwidthSupport == 0)
+ DML_LOG_VERBOSE("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport);
+ if (!fail_only || support->DynamicMetadataSupported == 0)
+ DML_LOG_VERBOSE("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported);
+ if (!fail_only || support->VRatioInPrefetchSupported == 0)
+ DML_LOG_VERBOSE("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported);
+ if (!fail_only || support->PTEBufferSizeNotExceeded == 0)
+ DML_LOG_VERBOSE("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded);
+ if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 0)
+ DML_LOG_VERBOSE("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded);
+ if (!fail_only || support->ExceededMALLSize == 1)
+ DML_LOG_VERBOSE("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize);
+ if (!fail_only || support->g6_temp_read_support == 0)
+ DML_LOG_VERBOSE("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support);
+ if (!fail_only || support->ImmediateFlipSupport == 0)
+ DML_LOG_VERBOSE("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport);
+ if (!fail_only || support->LinkCapacitySupport == 0)
+ DML_LOG_VERBOSE("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport);
+
+ if (!fail_only || support->ModeSupport == 0)
+ DML_LOG_VERBOSE("DML: support: ModeSupport = %d\n", support->ModeSupport);
+ DML_LOG_VERBOSE("DML: ===================================== \n");
+}
+
+static void get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg)
+{
+ for (unsigned int k = 0; k < display_cfg->num_planes; k++) {
+ double bpc = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.bpc;
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_disable) {
+ switch (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format) {
+ case dml2_444:
+ out_bpp[k] = bpc * 3;
+ break;
+ case dml2_s422:
+ out_bpp[k] = bpc * 2;
+ break;
+ case dml2_n422:
+ out_bpp[k] = bpc * 2;
+ break;
+ case dml2_420:
+ default:
+ out_bpp[k] = bpc * 1.5;
+ break;
+ }
+ } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable) {
+ out_bpp[k] = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.dsc_compressed_bpp_x16 / 16;
+ } else {
+ out_bpp[k] = 0;
+ }
+ DML_LOG_VERBOSE("DML::%s: k=%d bpc=%f\n", __func__, k, bpc);
+ DML_LOG_VERBOSE("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable);
+ DML_LOG_VERBOSE("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]);
+ }
+}
+
+static unsigned int dml_round_to_multiple(unsigned int num, unsigned int multiple, bool up)
+{
+ unsigned int remainder;
+
+ if (multiple == 0)
+ return num;
+
+ remainder = num % multiple;
+ if (remainder == 0)
+ return num;
+
+ if (up)
+ return (num + multiple - remainder);
+ else
+ return (num - remainder);
+}
+
+static unsigned int dml_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info)
+{
+ unsigned int num_active_pipes = 0;
+
+ for (unsigned int k = 0; k < num_planes; k++) {
+ num_active_pipes = num_active_pipes + (unsigned int)cfg_support_info->plane_support_info[k].dpps_used;
+ }
+
+ DML_LOG_VERBOSE("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes);
+ return num_active_pipes;
+}
+
+static void dml_calc_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane)
+{
+ unsigned int pipe_idx = 0;
+
+ for (unsigned int k = 0; k < DML2_MAX_PLANES; ++k) {
+ pipe_plane[k] = __DML2_CALCS_PIPE_NO_PLANE__;
+ }
+
+ for (unsigned int plane_idx = 0; plane_idx < DML2_MAX_PLANES; plane_idx++) {
+ for (int i = 0; i < cfg_support_info->plane_support_info[plane_idx].dpps_used; i++) {
+ pipe_plane[pipe_idx] = plane_idx;
+ pipe_idx++;
+ }
+ }
+}
+
+static bool dml_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg)
+{
+ bool is_phantom = false;
+
+ if (plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe ||
+ plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) {
+ is_phantom = true;
+ }
+
+ return is_phantom;
+}
+
+static bool dml_get_is_phantom_pipe(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx)
+{
+ unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx];
+
+ bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_idx]);
+ DML_LOG_VERBOSE("DML::%s: pipe_idx=%d legacy_svp_config=%0d is_phantom=%d\n", __func__, pipe_idx, display_cfg->plane_descriptors[plane_idx].overrides.legacy_svp_config, is_phantom);
+ return is_phantom;
+}
+
+#define dml_get_per_pipe_var_func(variable, type, interval_var) static type dml_get_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx) \
+{ \
+unsigned int plane_idx; \
+plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; \
+return (type) interval_var[plane_idx]; \
+}
+
+dml_get_per_pipe_var_func(dpte_group_size_in_bytes, unsigned int, mode_lib->mp.dpte_group_bytes);
+dml_get_per_pipe_var_func(vm_group_size_in_bytes, unsigned int, mode_lib->mp.vm_group_bytes);
+dml_get_per_pipe_var_func(swath_height_l, unsigned int, mode_lib->mp.SwathHeightY);
+dml_get_per_pipe_var_func(swath_height_c, unsigned int, mode_lib->mp.SwathHeightC);
+dml_get_per_pipe_var_func(dpte_row_height_linear_l, unsigned int, mode_lib->mp.dpte_row_height_linear);
+dml_get_per_pipe_var_func(dpte_row_height_linear_c, unsigned int, mode_lib->mp.dpte_row_height_linear_chroma);
+
+dml_get_per_pipe_var_func(vstartup_calculated, unsigned int, mode_lib->mp.VStartup);
+dml_get_per_pipe_var_func(vupdate_offset, unsigned int, mode_lib->mp.VUpdateOffsetPix);
+dml_get_per_pipe_var_func(vupdate_width, unsigned int, mode_lib->mp.VUpdateWidthPix);
+dml_get_per_pipe_var_func(vready_offset, unsigned int, mode_lib->mp.VReadyOffsetPix);
+dml_get_per_pipe_var_func(pstate_keepout_dst_lines, unsigned int, mode_lib->mp.pstate_keepout_dst_lines);
+dml_get_per_pipe_var_func(det_stored_buffer_size_l_bytes, unsigned int, mode_lib->mp.DETBufferSizeY);
+dml_get_per_pipe_var_func(det_stored_buffer_size_c_bytes, unsigned int, mode_lib->mp.DETBufferSizeC);
+dml_get_per_pipe_var_func(det_buffer_size_kbytes, unsigned int, mode_lib->mp.DETBufferSizeInKByte);
+dml_get_per_pipe_var_func(surface_size_in_mall_bytes, unsigned int, mode_lib->mp.SurfaceSizeInTheMALL);
+
+#define dml_get_per_plane_var_func(variable, type, interval_var) static type dml_get_plane_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int plane_idx) \
+{ \
+return (type) interval_var[plane_idx]; \
+}
+
+dml_get_per_plane_var_func(num_mcaches_plane0, unsigned int, mode_lib->ms.num_mcaches_l);
+dml_get_per_plane_var_func(mcache_row_bytes_plane0, unsigned int, mode_lib->ms.mcache_row_bytes_l);
+dml_get_per_plane_var_func(mcache_shift_granularity_plane0, unsigned int, mode_lib->ms.mcache_shift_granularity_l);
+dml_get_per_plane_var_func(num_mcaches_plane1, unsigned int, mode_lib->ms.num_mcaches_c);
+dml_get_per_plane_var_func(mcache_row_bytes_plane1, unsigned int, mode_lib->ms.mcache_row_bytes_c);
+dml_get_per_plane_var_func(mcache_shift_granularity_plane1, unsigned int, mode_lib->ms.mcache_shift_granularity_c);
+dml_get_per_plane_var_func(mall_comb_mcache_l, unsigned int, mode_lib->ms.mall_comb_mcache_l);
+dml_get_per_plane_var_func(mall_comb_mcache_c, unsigned int, mode_lib->ms.mall_comb_mcache_c);
+dml_get_per_plane_var_func(lc_comb_mcache, unsigned int, mode_lib->ms.lc_comb_mcache);
+dml_get_per_plane_var_func(subviewport_lines_needed_in_mall, unsigned int, mode_lib->ms.SubViewportLinesNeededInMALL);
+dml_get_per_plane_var_func(max_vstartup_lines, unsigned int, mode_lib->ms.MaxVStartupLines);
+
+#define dml_get_per_plane_array_var_func(variable, type, interval_var) static type dml_get_plane_array_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int plane_idx, unsigned int array_idx) \
+{ \
+return (type) interval_var[plane_idx][array_idx]; \
+}
+
+dml_get_per_plane_array_var_func(mcache_offsets_plane0, unsigned int, mode_lib->ms.mcache_offsets_l);
+dml_get_per_plane_array_var_func(mcache_offsets_plane1, unsigned int, mode_lib->ms.mcache_offsets_c);
+
+#define dml_get_var_func(var, type, internal_var) static type dml_get_##var(const struct dml2_core_internal_display_mode_lib *mode_lib) \
+{ \
+return (type) internal_var; \
+}
+
+dml_get_var_func(wm_urgent, double, mode_lib->mp.Watermark.UrgentWatermark);
+dml_get_var_func(wm_stutter_exit, double, mode_lib->mp.Watermark.StutterExitWatermark);
+dml_get_var_func(wm_stutter_enter_exit, double, mode_lib->mp.Watermark.StutterEnterPlusExitWatermark);
+dml_get_var_func(wm_z8_stutter_exit, double, mode_lib->mp.Watermark.Z8StutterExitWatermark);
+dml_get_var_func(wm_z8_stutter_enter_exit, double, mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark);
+dml_get_var_func(wm_memory_trip, double, mode_lib->mp.UrgentLatency);
+dml_get_var_func(meta_trip_memory_us, double, mode_lib->mp.MetaTripToMemory);
+
+dml_get_var_func(wm_fclk_change, double, mode_lib->mp.Watermark.FCLKChangeWatermark);
+dml_get_var_func(wm_usr_retraining, double, mode_lib->mp.Watermark.USRRetrainingWatermark);
+dml_get_var_func(wm_temp_read_or_ppt, double, mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us);
+dml_get_var_func(wm_dram_clock_change, double, mode_lib->mp.Watermark.DRAMClockChangeWatermark);
+dml_get_var_func(fraction_of_urgent_bandwidth, double, mode_lib->mp.FractionOfUrgentBandwidth);
+dml_get_var_func(fraction_of_urgent_bandwidth_imm_flip, double, mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip);
+dml_get_var_func(fraction_of_urgent_bandwidth_mall, double, mode_lib->mp.FractionOfUrgentBandwidthMALL);
+dml_get_var_func(wm_writeback_dram_clock_change, double, mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark);
+dml_get_var_func(wm_writeback_fclk_change, double, mode_lib->mp.Watermark.WritebackFCLKChangeWatermark);
+dml_get_var_func(stutter_efficiency, double, mode_lib->mp.StutterEfficiency);
+dml_get_var_func(stutter_efficiency_no_vblank, double, mode_lib->mp.StutterEfficiencyNotIncludingVBlank);
+dml_get_var_func(stutter_num_bursts, double, mode_lib->mp.NumberOfStutterBurstsPerFrame);
+dml_get_var_func(stutter_efficiency_z8, double, mode_lib->mp.Z8StutterEfficiency);
+dml_get_var_func(stutter_num_bursts_z8, double, mode_lib->mp.Z8NumberOfStutterBurstsPerFrame);
+dml_get_var_func(stutter_period, double, mode_lib->mp.StutterPeriod);
+dml_get_var_func(stutter_efficiency_z8_bestcase, double, mode_lib->mp.Z8StutterEfficiencyBestCase);
+dml_get_var_func(stutter_num_bursts_z8_bestcase, double, mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase);
+dml_get_var_func(stutter_period_bestcase, double, mode_lib->mp.StutterPeriodBestCase);
+dml_get_var_func(fclk_change_latency, double, mode_lib->mp.MaxActiveFCLKChangeLatencySupported);
+dml_get_var_func(global_dppclk_khz, double, mode_lib->mp.GlobalDPPCLK * 1000.0);
+
+dml_get_var_func(sys_active_avg_bw_required_sdp, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
+dml_get_var_func(sys_active_avg_bw_required_dram, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
+
+dml_get_var_func(svp_prefetch_avg_bw_required_sdp, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
+dml_get_var_func(svp_prefetch_avg_bw_required_dram, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
+
+dml_get_var_func(sys_active_avg_bw_available_sdp, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
+dml_get_var_func(sys_active_avg_bw_available_dram, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
+
+dml_get_var_func(svp_prefetch_avg_bw_available_sdp, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
+dml_get_var_func(svp_prefetch_avg_bw_available_dram, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
+
+dml_get_var_func(sys_active_urg_bw_available_sdp, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
+dml_get_var_func(sys_active_urg_bw_available_dram, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
+dml_get_var_func(sys_active_urg_bw_available_dram_vm_only, double, mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]);
+
+dml_get_var_func(svp_prefetch_urg_bw_available_sdp, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
+dml_get_var_func(svp_prefetch_urg_bw_available_dram, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
+dml_get_var_func(svp_prefetch_urg_bw_available_dram_vm_only, double, mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_svp_prefetch]);
+
+dml_get_var_func(urgent_latency, double, mode_lib->mp.UrgentLatency);
+dml_get_var_func(max_urgent_latency_us, double, mode_lib->ms.support.max_urgent_latency_us);
+dml_get_var_func(max_non_urgent_latency_us, double, mode_lib->ms.support.max_non_urgent_latency_us);
+dml_get_var_func(avg_non_urgent_latency_us, double, mode_lib->ms.support.avg_non_urgent_latency_us);
+dml_get_var_func(avg_urgent_latency_us, double, mode_lib->ms.support.avg_urgent_latency_us);
+
+dml_get_var_func(sys_active_urg_bw_required_sdp, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
+dml_get_var_func(sys_active_urg_bw_required_dram, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
+dml_get_var_func(svp_prefetch_urg_bw_required_sdp, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
+dml_get_var_func(svp_prefetch_urg_bw_required_dram, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
+
+dml_get_var_func(sys_active_non_urg_required_sdp, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
+dml_get_var_func(sys_active_non_urg_required_dram, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
+dml_get_var_func(svp_prefetch_non_urg_bw_required_sdp, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
+dml_get_var_func(svp_prefetch_non_urg_bw_required_dram, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
+
+dml_get_var_func(sys_active_urg_bw_required_sdp_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
+dml_get_var_func(sys_active_urg_bw_required_dram_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
+dml_get_var_func(svp_prefetch_urg_bw_required_sdp_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
+dml_get_var_func(svp_prefetch_urg_bw_required_dram_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
+
+dml_get_var_func(sys_active_non_urg_required_sdp_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
+dml_get_var_func(sys_active_non_urg_required_dram_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
+dml_get_var_func(svp_prefetch_non_urg_bw_required_sdp_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
+dml_get_var_func(svp_prefetch_non_urg_bw_required_dram_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
+
+dml_get_var_func(comp_buffer_size_kbytes, unsigned int, mode_lib->mp.CompressedBufferSizeInkByte);
+
+dml_get_var_func(unbounded_request_enabled, bool, mode_lib->mp.UnboundedRequestEnabled);
+dml_get_var_func(wm_writeback_urgent, double, mode_lib->mp.Watermark.WritebackUrgentWatermark);
+dml_get_var_func(cstate_max_cap_mode, bool, mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
+dml_get_var_func(compbuf_reserved_space_64b, unsigned int, mode_lib->mp.compbuf_reserved_space_64b);
+dml_get_var_func(hw_debug5, bool, mode_lib->mp.hw_debug5);
+dml_get_var_func(dcfclk_deep_sleep_hysteresis, unsigned int, mode_lib->mp.dcfclk_deep_sleep_hysteresis);
+
+static void CalculateMaxDETAndMinCompressedBufferSize(
+ unsigned int ConfigReturnBufferSizeInKByte,
+ unsigned int ConfigReturnBufferSegmentSizeInKByte,
+ unsigned int ROBBufferSizeInKByte,
+ unsigned int MaxNumDPP,
+ unsigned int nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
+ unsigned int nomDETInKByteOverrideValue, // VBA_DELTA
+ bool is_mrq_present,
+
+ // Output
+ unsigned int *MaxTotalDETInKByte,
+ unsigned int *nomDETInKByte,
+ unsigned int *MinCompressedBufferSizeInKByte)
+{
+ if (is_mrq_present)
+ *MaxTotalDETInKByte = (unsigned int) math_ceil2((double)(ConfigReturnBufferSizeInKByte + ROBBufferSizeInKByte)*4/5, 64);
+ else
+ *MaxTotalDETInKByte = ConfigReturnBufferSizeInKByte - ConfigReturnBufferSegmentSizeInKByte;
+
+ *nomDETInKByte = (unsigned int)(math_floor2((double)*MaxTotalDETInKByte / (double)MaxNumDPP, ConfigReturnBufferSegmentSizeInKByte));
+ *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
+
+ DML_LOG_VERBOSE("DML::%s: is_mrq_present = %u\n", __func__, is_mrq_present);
+ DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
+ DML_LOG_VERBOSE("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte);
+ DML_LOG_VERBOSE("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP);
+ DML_LOG_VERBOSE("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte);
+ DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte);
+ DML_LOG_VERBOSE("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte);
+
+ if (nomDETInKByteOverrideEnable) {
+ *nomDETInKByte = nomDETInKByteOverrideValue;
+ DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte);
+ }
+}
+
+static void PixelClockAdjustmentForProgressiveToInterlaceUnit(const struct dml2_display_cfg *display_cfg, bool ptoi_supported, double *PixelClockBackEnd)
+{
+ //unsigned int num_active_planes = display_cfg->num_planes;
+
+ //Progressive To Interlace Unit Effect
+ for (unsigned int k = 0; k < display_cfg->num_planes; ++k) {
+ PixelClockBackEnd[k] = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && ptoi_supported == true) {
+ // FIXME_STAGE2... can sw pass the pixel rate for interlaced directly
+ //display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz = 2 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz;
+ }
+ }
+}
+
+static bool dml_is_420(enum dml2_source_format_class source_format)
+{
+ bool val = false;
+
+ switch (source_format) {
+ case dml2_444_8:
+ val = 0;
+ break;
+ case dml2_444_16:
+ val = 0;
+ break;
+ case dml2_444_32:
+ val = 0;
+ break;
+ case dml2_444_64:
+ val = 0;
+ break;
+ case dml2_420_8:
+ val = 1;
+ break;
+ case dml2_420_10:
+ val = 1;
+ break;
+ case dml2_420_12:
+ val = 1;
+ break;
+ case dml2_422_planar_8:
+ val = 0;
+ break;
+ case dml2_422_planar_10:
+ val = 0;
+ break;
+ case dml2_422_planar_12:
+ val = 0;
+ break;
+ case dml2_422_packed_8:
+ val = 0;
+ break;
+ case dml2_422_packed_10:
+ val = 0;
+ break;
+ case dml2_422_packed_12:
+ val = 0;
+ break;
+ case dml2_rgbe_alpha:
+ val = 0;
+ break;
+ case dml2_rgbe:
+ val = 0;
+ break;
+ case dml2_mono_8:
+ val = 0;
+ break;
+ case dml2_mono_16:
+ val = 0;
+ break;
+ default:
+ DML_ASSERT(0);
+ break;
+ }
+ return val;
+}
+
+static unsigned int dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode)
+{
+ if (sw_mode == dml2_sw_linear)
+ return 256;
+ else if (sw_mode == dml2_sw_256b_2d)
+ return 256;
+ else if (sw_mode == dml2_sw_4kb_2d)
+ return 4096;
+ else if (sw_mode == dml2_sw_64kb_2d)
+ return 65536;
+ else if (sw_mode == dml2_sw_256kb_2d)
+ return 262144;
+ else if (sw_mode == dml2_gfx11_sw_linear)
+ return 256;
+ else if (sw_mode == dml2_gfx11_sw_64kb_d)
+ return 65536;
+ else if (sw_mode == dml2_gfx11_sw_64kb_d_t)
+ return 65536;
+ else if (sw_mode == dml2_gfx11_sw_64kb_d_x)
+ return 65536;
+ else if (sw_mode == dml2_gfx11_sw_64kb_r_x)
+ return 65536;
+ else if (sw_mode == dml2_gfx11_sw_256kb_d_x)
+ return 262144;
+ else if (sw_mode == dml2_gfx11_sw_256kb_r_x)
+ return 262144;
+ else {
+ DML_ASSERT(0);
+ return 256;
+ }
+}
+
+static bool dml_is_vertical_rotation(enum dml2_rotation_angle Scan)
+{
+ bool is_vert = false;
+ if (Scan == dml2_rotation_90 || Scan == dml2_rotation_270) {
+ is_vert = true;
+ } else {
+ is_vert = false;
+ }
+ return is_vert;
+}
+
+static int unsigned dml_get_gfx_version(enum dml2_swizzle_mode sw_mode)
+{
+ int unsigned version = 0;
+
+ if (sw_mode == dml2_sw_linear ||
+ sw_mode == dml2_sw_256b_2d ||
+ sw_mode == dml2_sw_4kb_2d ||
+ sw_mode == dml2_sw_64kb_2d ||
+ sw_mode == dml2_sw_256kb_2d) {
+ version = 12;
+ } else if (sw_mode == dml2_gfx11_sw_linear ||
+ sw_mode == dml2_gfx11_sw_64kb_d ||
+ sw_mode == dml2_gfx11_sw_64kb_d_t ||
+ sw_mode == dml2_gfx11_sw_64kb_d_x ||
+ sw_mode == dml2_gfx11_sw_64kb_r_x ||
+ sw_mode == dml2_gfx11_sw_256kb_d_x ||
+ sw_mode == dml2_gfx11_sw_256kb_r_x) {
+ version = 11;
+ } else {
+ DML_LOG_VERBOSE("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode);
+ DML_ASSERT(0);
+ }
+
+ return version;
+}
+
+static void CalculateBytePerPixelAndBlockSizes(
+ enum dml2_source_format_class SourcePixelFormat,
+ enum dml2_swizzle_mode SurfaceTiling,
+ unsigned int pitch_y,
+ unsigned int pitch_c,
+
+ // Output
+ unsigned int *BytePerPixelY,
+ unsigned int *BytePerPixelC,
+ double *BytePerPixelDETY,
+ double *BytePerPixelDETC,
+ unsigned int *BlockHeight256BytesY,
+ unsigned int *BlockHeight256BytesC,
+ unsigned int *BlockWidth256BytesY,
+ unsigned int *BlockWidth256BytesC,
+ unsigned int *MacroTileHeightY,
+ unsigned int *MacroTileHeightC,
+ unsigned int *MacroTileWidthY,
+ unsigned int *MacroTileWidthC,
+ bool *surf_linear128_l,
+ bool *surf_linear128_c)
+{
+ *BytePerPixelDETY = 0;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 1;
+
+ if (SourcePixelFormat == dml2_444_64) {
+ *BytePerPixelDETY = 8;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 8;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dml2_444_32 || SourcePixelFormat == dml2_rgbe) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dml2_444_16 || SourcePixelFormat == dml2_mono_16) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dml2_444_8 || SourcePixelFormat == dml2_mono_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dml2_rgbe_alpha) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 1;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 1;
+ } else if (SourcePixelFormat == dml2_420_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 2;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 2;
+ } else if (SourcePixelFormat == dml2_420_12) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 4;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ } else if (SourcePixelFormat == dml2_420_10) {
+ *BytePerPixelDETY = (double)(4.0 / 3);
+ *BytePerPixelDETC = (double)(8.0 / 3);
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ } else {
+ DML_LOG_VERBOSE("ERROR: DML::%s: SourcePixelFormat = %u not supported!\n", __func__, SourcePixelFormat);
+ DML_ASSERT(0);
+ }
+
+ DML_LOG_VERBOSE("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat);
+ DML_LOG_VERBOSE("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
+ DML_LOG_VERBOSE("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
+ DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY);
+ DML_LOG_VERBOSE("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC);
+ DML_LOG_VERBOSE("DML::%s: pitch_y = %u\n", __func__, pitch_y);
+ DML_LOG_VERBOSE("DML::%s: pitch_c = %u\n", __func__, pitch_c);
+ DML_LOG_VERBOSE("DML::%s: surf_linear128_l = %u\n", __func__, *surf_linear128_l);
+ DML_LOG_VERBOSE("DML::%s: surf_linear128_c = %u\n", __func__, *surf_linear128_c);
+
+ if (dml_get_gfx_version(SurfaceTiling) == 11) {
+ *surf_linear128_l = 0;
+ *surf_linear128_c = 0;
+ } else {
+ if (SurfaceTiling == dml2_sw_linear) {
+ *surf_linear128_l = (((pitch_y * *BytePerPixelY) % 256) != 0);
+
+ if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha)
+ *surf_linear128_c = (((pitch_c * *BytePerPixelC) % 256) != 0);
+ }
+ }
+
+ if (!(dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha)) {
+ if (SurfaceTiling == dml2_sw_linear) {
+ *BlockHeight256BytesY = 1;
+ } else if (SourcePixelFormat == dml2_444_64) {
+ *BlockHeight256BytesY = 4;
+ } else if (SourcePixelFormat == dml2_444_8) {
+ *BlockHeight256BytesY = 16;
+ } else {
+ *BlockHeight256BytesY = 8;
+ }
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockHeight256BytesC = 0;
+ *BlockWidth256BytesC = 0;
+ } else { // dual plane
+ if (SurfaceTiling == dml2_sw_linear) {
+ *BlockHeight256BytesY = 1;
+ *BlockHeight256BytesC = 1;
+ } else if (SourcePixelFormat == dml2_rgbe_alpha) {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 16;
+ } else if (SourcePixelFormat == dml2_420_8) {
+ *BlockHeight256BytesY = 16;
+ *BlockHeight256BytesC = 8;
+ } else {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 8;
+ }
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
+ }
+ DML_LOG_VERBOSE("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY);
+ DML_LOG_VERBOSE("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY);
+ DML_LOG_VERBOSE("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC);
+ DML_LOG_VERBOSE("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC);
+
+ if (dml_get_gfx_version(SurfaceTiling) == 11) {
+ if (SurfaceTiling == dml2_gfx11_sw_linear) {
+ *MacroTileHeightY = *BlockHeight256BytesY;
+ *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
+ *MacroTileHeightC = *BlockHeight256BytesC;
+ if (*MacroTileHeightC == 0) {
+ *MacroTileWidthC = 0;
+ } else {
+ *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
+ }
+ } else if (SurfaceTiling == dml2_gfx11_sw_64kb_d || SurfaceTiling == dml2_gfx11_sw_64kb_d_t || SurfaceTiling == dml2_gfx11_sw_64kb_d_x || SurfaceTiling == dml2_gfx11_sw_64kb_r_x) {
+ *MacroTileHeightY = 16 * *BlockHeight256BytesY;
+ *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
+ *MacroTileHeightC = 16 * *BlockHeight256BytesC;
+ if (*MacroTileHeightC == 0) {
+ *MacroTileWidthC = 0;
+ } else {
+ *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
+ }
+ } else {
+ *MacroTileHeightY = 32 * *BlockHeight256BytesY;
+ *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
+ *MacroTileHeightC = 32 * *BlockHeight256BytesC;
+ if (*MacroTileHeightC == 0) {
+ *MacroTileWidthC = 0;
+ } else {
+ *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
+ }
+ }
+ } else {
+ unsigned int macro_tile_size_bytes = dml_get_tile_block_size_bytes(SurfaceTiling);
+ unsigned int macro_tile_scale = 1; // macro tile to 256B req scaling
+
+ if (SurfaceTiling == dml2_sw_linear) {
+ macro_tile_scale = 1;
+ } else if (SurfaceTiling == dml2_sw_4kb_2d) {
+ macro_tile_scale = 4;
+ } else if (SurfaceTiling == dml2_sw_64kb_2d) {
+ macro_tile_scale = 16;
+ } else if (SurfaceTiling == dml2_sw_256kb_2d) {
+ macro_tile_scale = 32;
+ } else {
+ DML_LOG_VERBOSE("ERROR: Invalid SurfaceTiling setting! val=%u\n", SurfaceTiling);
+ DML_ASSERT(0);
+ }
+
+ *MacroTileHeightY = macro_tile_scale * *BlockHeight256BytesY;
+ *MacroTileWidthY = macro_tile_size_bytes / *BytePerPixelY / *MacroTileHeightY;
+ *MacroTileHeightC = macro_tile_scale * *BlockHeight256BytesC;
+ if (*MacroTileHeightC == 0) {
+ *MacroTileWidthC = 0;
+ } else {
+ *MacroTileWidthC = macro_tile_size_bytes / *BytePerPixelC / *MacroTileHeightC;
+ }
+ }
+
+ DML_LOG_VERBOSE("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY);
+ DML_LOG_VERBOSE("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY);
+ DML_LOG_VERBOSE("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC);
+ DML_LOG_VERBOSE("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC);
+}
+
+static void CalculateSinglePipeDPPCLKAndSCLThroughput(
+ double HRatio,
+ double HRatioChroma,
+ double VRatio,
+ double VRatioChroma,
+ double MaxDCHUBToPSCLThroughput,
+ double MaxPSCLToLBThroughput,
+ double PixelClock,
+ enum dml2_source_format_class SourcePixelFormat,
+ unsigned int HTaps,
+ unsigned int HTapsChroma,
+ unsigned int VTaps,
+ unsigned int VTapsChroma,
+
+ // Output
+ double *PSCL_THROUGHPUT,
+ double *PSCL_THROUGHPUT_CHROMA,
+ double *DPPCLKUsingSingleDPP)
+{
+ double DPPCLKUsingSingleDPPLuma;
+ double DPPCLKUsingSingleDPPChroma;
+
+ if (HRatio > 1) {
+ *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / math_ceil2((double)HTaps / 6.0, 1.0));
+ } else {
+ *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
+ }
+
+ DPPCLKUsingSingleDPPLuma = PixelClock * math_max3(VTaps / 6 * math_min2(1, HRatio), HRatio * VRatio / *PSCL_THROUGHPUT, 1);
+
+ if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
+ DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
+
+ if (!dml_is_420(SourcePixelFormat) && SourcePixelFormat != dml2_rgbe_alpha) {
+ *PSCL_THROUGHPUT_CHROMA = 0;
+ *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
+ } else {
+ if (HRatioChroma > 1) {
+ *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatioChroma / math_ceil2((double)HTapsChroma / 6.0, 1.0));
+ } else {
+ *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
+ }
+ DPPCLKUsingSingleDPPChroma = PixelClock * math_max3(VTapsChroma / 6 * math_min2(1, HRatioChroma),
+ HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
+ if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
+ DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
+ *DPPCLKUsingSingleDPP = math_max2(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
+ }
+}
+
+static void CalculateSwathWidth(
+ const struct dml2_display_cfg *display_cfg,
+ bool ForceSingleDPP,
+ unsigned int NumberOfActiveSurfaces,
+ enum dml2_odm_mode ODMMode[],
+ unsigned int BytePerPixY[],
+ unsigned int BytePerPixC[],
+ unsigned int Read256BytesBlockHeightY[],
+ unsigned int Read256BytesBlockHeightC[],
+ unsigned int Read256BytesBlockWidthY[],
+ unsigned int Read256BytesBlockWidthC[],
+ bool surf_linear128_l[],
+ bool surf_linear128_c[],
+ unsigned int DPPPerSurface[],
+
+ // Output
+ unsigned int req_per_swath_ub_l[],
+ unsigned int req_per_swath_ub_c[],
+ unsigned int SwathWidthSingleDPPY[], // post-rotated plane width
+ unsigned int SwathWidthSingleDPPC[],
+ unsigned int SwathWidthY[], // per-pipe
+ unsigned int SwathWidthC[], // per-pipe
+ unsigned int MaximumSwathHeightY[],
+ unsigned int MaximumSwathHeightC[],
+ unsigned int swath_width_luma_ub[], // per-pipe
+ unsigned int swath_width_chroma_ub[]) // per-pipe
+{
+ enum dml2_odm_mode MainSurfaceODMMode;
+ double odm_hactive_factor = 1.0;
+ unsigned int req_width_horz_y;
+ unsigned int req_width_horz_c;
+ unsigned int surface_width_ub_l;
+ unsigned int surface_height_ub_l;
+ unsigned int surface_width_ub_c;
+ unsigned int surface_height_ub_c;
+
+ DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
+ DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
+
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
+ SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
+ } else {
+ SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
+ }
+
+ DML_LOG_VERBOSE("DML::%s: k=%u ViewportWidth=%lu\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width);
+ DML_LOG_VERBOSE("DML::%s: k=%u ViewportHeight=%lu\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height);
+ DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
+
+ MainSurfaceODMMode = ODMMode[k];
+
+ if (ForceSingleDPP) {
+ SwathWidthY[k] = SwathWidthSingleDPPY[k];
+ } else {
+ if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1)
+ odm_hactive_factor = 4.0;
+ else if (MainSurfaceODMMode == dml2_odm_mode_combine_3to1)
+ odm_hactive_factor = 3.0;
+ else if (MainSurfaceODMMode == dml2_odm_mode_combine_2to1)
+ odm_hactive_factor = 2.0;
+
+ if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1 || MainSurfaceODMMode == dml2_odm_mode_combine_3to1 || MainSurfaceODMMode == dml2_odm_mode_combine_2to1) {
+ SwathWidthY[k] = (unsigned int)(math_min2((double)SwathWidthSingleDPPY[k], math_round((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active / odm_hactive_factor * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio)));
+ } else if (DPPPerSurface[k] == 2) {
+ SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
+ } else {
+ SwathWidthY[k] = SwathWidthSingleDPPY[k];
+ }
+ }
+
+ DML_LOG_VERBOSE("DML::%s: k=%u HActive=%lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active);
+ DML_LOG_VERBOSE("DML::%s: k=%u HRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
+ DML_LOG_VERBOSE("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode);
+ DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]);
+
+ if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) {
+ SwathWidthC[k] = SwathWidthY[k] / 2;
+ SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
+ } else {
+ SwathWidthC[k] = SwathWidthY[k];
+ SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
+ }
+
+ if (ForceSingleDPP == true) {
+ SwathWidthY[k] = SwathWidthSingleDPPY[k];
+ SwathWidthC[k] = SwathWidthSingleDPPC[k];
+ }
+
+ req_width_horz_y = Read256BytesBlockWidthY[k];
+ req_width_horz_c = Read256BytesBlockWidthC[k];
+
+ if (surf_linear128_l[k])
+ req_width_horz_y = req_width_horz_y / 2;
+
+ if (surf_linear128_c[k])
+ req_width_horz_c = req_width_horz_c / 2;
+
+ surface_width_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.width, req_width_horz_y);
+ surface_height_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.height, Read256BytesBlockHeightY[k]);
+ surface_width_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.width, req_width_horz_c);
+ surface_height_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.height, Read256BytesBlockHeightC[k]);
+
+ DML_LOG_VERBOSE("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l);
+ DML_LOG_VERBOSE("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l);
+ DML_LOG_VERBOSE("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c);
+ DML_LOG_VERBOSE("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c);
+ DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y);
+ DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c);
+ DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y);
+ DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c);
+ DML_LOG_VERBOSE("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.stationary);
+ DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
+
+ req_per_swath_ub_l[k] = 0;
+ req_per_swath_ub_c[k] = 0;
+ if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
+ MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
+ MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
+ if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
+ swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start + SwathWidthY[k] + req_width_horz_y - 1, req_width_horz_y) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start, req_width_horz_y)));
+ } else {
+ swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_ceil2((double)SwathWidthY[k] - 1, req_width_horz_y) + req_width_horz_y));
+ }
+ req_per_swath_ub_l[k] = swath_width_luma_ub[k] / req_width_horz_y;
+
+ if (BytePerPixC[k] > 0) {
+ if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
+ swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + req_width_horz_c - 1, req_width_horz_c) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, req_width_horz_c)));
+ } else {
+ swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_ceil2((double)SwathWidthC[k] - 1, req_width_horz_c) + req_width_horz_c));
+ }
+ req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / req_width_horz_c;
+ } else {
+ swath_width_chroma_ub[k] = 0;
+ }
+ } else {
+ MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
+ MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
+
+ if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
+ swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start + SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, Read256BytesBlockHeightY[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start, Read256BytesBlockHeightY[k])));
+ } else {
+ swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_ceil2((double)SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]));
+ }
+ req_per_swath_ub_l[k] = swath_width_luma_ub[k] / Read256BytesBlockHeightY[k];
+ if (BytePerPixC[k] > 0) {
+ if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
+ swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + Read256BytesBlockHeightC[k] - 1, Read256BytesBlockHeightC[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, Read256BytesBlockHeightC[k])));
+ } else {
+ swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_ceil2((double)SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]));
+ }
+ req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / Read256BytesBlockHeightC[k];
+ } else {
+ swath_width_chroma_ub[k] = 0;
+ }
+ }
+
+ DML_LOG_VERBOSE("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u req_per_swath_ub_l=%u\n", __func__, k, req_per_swath_ub_l[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u req_per_swath_ub_c=%u\n", __func__, k, req_per_swath_ub_c[k]);
+ }
+}
+
+static bool UnboundedRequest(bool unb_req_force_en, bool unb_req_force_val, unsigned int TotalNumberOfActiveDPP, bool NoChromaOrLinear)
+{
+ bool unb_req_ok = false;
+ bool unb_req_en = false;
+
+ unb_req_ok = (TotalNumberOfActiveDPP == 1 && NoChromaOrLinear);
+ unb_req_en = unb_req_ok;
+
+ if (unb_req_force_en) {
+ unb_req_en = unb_req_force_val && unb_req_ok;
+ }
+ DML_LOG_VERBOSE("DML::%s: unb_req_force_en = %u\n", __func__, unb_req_force_en);
+ DML_LOG_VERBOSE("DML::%s: unb_req_force_val = %u\n", __func__, unb_req_force_val);
+ DML_LOG_VERBOSE("DML::%s: unb_req_ok = %u\n", __func__, unb_req_ok);
+ DML_LOG_VERBOSE("DML::%s: unb_req_en = %u\n", __func__, unb_req_en);
+ return unb_req_en;
+}
+
+static void CalculateDETBufferSize(
+ struct dml2_core_shared_CalculateDETBufferSize_locals *l,
+ const struct dml2_display_cfg *display_cfg,
+ bool ForceSingleDPP,
+ unsigned int NumberOfActiveSurfaces,
+ bool UnboundedRequestEnabled,
+ unsigned int nomDETInKByte,
+ unsigned int MaxTotalDETInKByte,
+ unsigned int ConfigReturnBufferSizeInKByte,
+ unsigned int MinCompressedBufferSizeInKByte,
+ unsigned int ConfigReturnBufferSegmentSizeInkByte,
+ unsigned int CompressedBufferSegmentSizeInkByte,
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ unsigned int full_swath_bytes_l[],
+ unsigned int full_swath_bytes_c[],
+ unsigned int DPPPerSurface[],
+ // Output
+ unsigned int DETBufferSizeInKByte[],
+ unsigned int *CompressedBufferSizeInkByte)
+{
+ memset(l, 0, sizeof(struct dml2_core_shared_CalculateDETBufferSize_locals));
+
+ bool DETPieceAssignedToThisSurfaceAlready[DML2_MAX_PLANES];
+ bool NextPotentialSurfaceToAssignDETPieceFound;
+ bool MinimizeReallocationSuccess = false;
+
+ DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
+ DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
+ DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
+ DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, UnboundedRequestEnabled);
+ DML_LOG_VERBOSE("DML::%s: MaxTotalDETInKByte = %u\n", __func__, MaxTotalDETInKByte);
+ DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
+ DML_LOG_VERBOSE("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, MinCompressedBufferSizeInKByte);
+ DML_LOG_VERBOSE("DML::%s: CompressedBufferSegmentSizeInkByte = %u\n", __func__, CompressedBufferSegmentSizeInkByte);
+
+ // Note: Will use default det size if that fits 2 swaths
+ if (UnboundedRequestEnabled) {
+ if (display_cfg->plane_descriptors[0].overrides.det_size_override_kb > 0) {
+ DETBufferSizeInKByte[0] = display_cfg->plane_descriptors[0].overrides.det_size_override_kb;
+ } else {
+ DETBufferSizeInKByte[0] = (unsigned int)math_max2(128.0, math_ceil2(2.0 * ((double)full_swath_bytes_l[0] + (double)full_swath_bytes_c[0]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte));
+ }
+ *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
+ } else {
+ l->DETBufferSizePoolInKByte = MaxTotalDETInKByte;
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ DETBufferSizeInKByte[k] = 0;
+ if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) {
+ l->max_minDET = nomDETInKByte - ConfigReturnBufferSegmentSizeInkByte;
+ } else {
+ l->max_minDET = nomDETInKByte;
+ }
+ l->minDET = 128;
+ l->minDET_pipe = 0;
+
+ // add DET resource until can hold 2 full swaths
+ while (l->minDET <= l->max_minDET && l->minDET_pipe == 0) {
+ if (2.0 * ((double)full_swath_bytes_l[k] + (double)full_swath_bytes_c[k]) / 1024.0 <= l->minDET)
+ l->minDET_pipe = l->minDET;
+ l->minDET = l->minDET + ConfigReturnBufferSegmentSizeInkByte;
+ }
+
+ DML_LOG_VERBOSE("DML::%s: k=%u minDET = %u\n", __func__, k, l->minDET);
+ DML_LOG_VERBOSE("DML::%s: k=%u max_minDET = %u\n", __func__, k, l->max_minDET);
+ DML_LOG_VERBOSE("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, l->minDET_pipe);
+ DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, full_swath_bytes_l[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, full_swath_bytes_c[k]);
+
+ if (l->minDET_pipe == 0) {
+ l->minDET_pipe = (unsigned int)(math_max2(128, math_ceil2(((double)full_swath_bytes_l[k] + (double)full_swath_bytes_c[k]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte)));
+ DML_LOG_VERBOSE("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, l->minDET_pipe);
+ }
+
+ if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
+ DETBufferSizeInKByte[k] = 0;
+ } else if (display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0) {
+ DETBufferSizeInKByte[k] = display_cfg->plane_descriptors[k].overrides.det_size_override_kb;
+ l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * display_cfg->plane_descriptors[k].overrides.det_size_override_kb;
+ } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * l->minDET_pipe <= l->DETBufferSizePoolInKByte) {
+ DETBufferSizeInKByte[k] = l->minDET_pipe;
+ l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * l->minDET_pipe;
+ }
+
+ DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.det_size_override_kb);
+ DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
+ DML_LOG_VERBOSE("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, l->DETBufferSizePoolInKByte);
+ }
+
+ if (display_cfg->minimize_det_reallocation) {
+ MinimizeReallocationSuccess = true;
+ // To minimize det reallocation, we don't distribute based on each surfaces bandwidth proportional to the global
+ // but rather distribute DET across streams proportionally based on pixel rate, and only distribute based on
+ // bandwidth between the planes on the same stream. This ensures that large scale re-distribution only on a
+ // stream count and/or pixel rate change, which is must less likely then general bandwidth changes per plane.
+
+ // Calculate total pixel rate
+ for (unsigned int k = 0; k < display_cfg->num_streams; ++k) {
+ l->TotalPixelRate += display_cfg->stream_descriptors[k].timing.pixel_clock_khz;
+ }
+
+ // Calculate per stream DET budget
+ for (unsigned int k = 0; k < display_cfg->num_streams; ++k) {
+ l->DETBudgetPerStream[k] = (unsigned int)((double) display_cfg->stream_descriptors[k].timing.pixel_clock_khz * MaxTotalDETInKByte / l->TotalPixelRate);
+ l->RemainingDETBudgetPerStream[k] = l->DETBudgetPerStream[k];
+ }
+
+ // Calculate the per stream total bandwidth
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
+ l->TotalBandwidthPerStream[display_cfg->plane_descriptors[k].stream_index] += (unsigned int)(ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
+
+ // Check the minimum can be satisfied by budget
+ if (l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] >= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
+ l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] -= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k]);
+ } else {
+ MinimizeReallocationSuccess = false;
+ break;
+ }
+ }
+ }
+
+ if (MinimizeReallocationSuccess) {
+ // Since a fixed budget per stream is sufficient to satisfy the minimums, just re-distribute each streams
+ // budget proportionally across its planes
+ l->ResidualDETAfterRounding = MaxTotalDETInKByte;
+
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
+ l->IdealDETBudget = (unsigned int)(((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / l->TotalBandwidthPerStream[display_cfg->plane_descriptors[k].stream_index])
+ * l->DETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index]);
+
+ if (l->IdealDETBudget > DETBufferSizeInKByte[k]) {
+ l->DeltaDETBudget = l->IdealDETBudget - DETBufferSizeInKByte[k];
+ if (l->DeltaDETBudget > l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index])
+ l->DeltaDETBudget = l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index];
+
+ /* split the additional budgeted DET among the pipes per plane */
+ DETBufferSizeInKByte[k] += (unsigned int)((double)l->DeltaDETBudget / (ForceSingleDPP ? 1 : DPPPerSurface[k]));
+ l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] -= l->DeltaDETBudget;
+ }
+
+ // Round down to segment size
+ DETBufferSizeInKByte[k] = (DETBufferSizeInKByte[k] / ConfigReturnBufferSegmentSizeInkByte) * ConfigReturnBufferSegmentSizeInkByte;
+
+ l->ResidualDETAfterRounding -= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k]);
+ }
+ }
+ }
+ }
+
+ if (!MinimizeReallocationSuccess) {
+ l->TotalBandwidth = 0;
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
+ l->TotalBandwidth = l->TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
+ }
+ }
+ DML_LOG_VERBOSE("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
+ }
+ DML_LOG_VERBOSE("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
+ DML_LOG_VERBOSE("DML::%s: TotalBandwidth = %f\n", __func__, l->TotalBandwidth);
+ l->BandwidthOfSurfacesNotAssignedDETPiece = l->TotalBandwidth;
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+ if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
+ DETPieceAssignedToThisSurfaceAlready[k] = true;
+ } else if (display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0 || (((double)(ForceSingleDPP ? 1 : DPPPerSurface[k]) * (double)DETBufferSizeInKByte[k] / (double)MaxTotalDETInKByte) >= ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / l->TotalBandwidth))) {
+ DETPieceAssignedToThisSurfaceAlready[k] = true;
+ l->BandwidthOfSurfacesNotAssignedDETPiece = l->BandwidthOfSurfacesNotAssignedDETPiece - ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
+ } else {
+ DETPieceAssignedToThisSurfaceAlready[k] = false;
+ }
+ DML_LOG_VERBOSE("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, l->BandwidthOfSurfacesNotAssignedDETPiece);
+ }
+
+ for (unsigned int j = 0; j < NumberOfActiveSurfaces; ++j) {
+ NextPotentialSurfaceToAssignDETPieceFound = false;
+ l->NextSurfaceToAssignDETPiece = 0;
+
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, ReadBandwidthLuma[k]);
+ DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, ReadBandwidthChroma[k]);
+ DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]);
+ DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]);
+ DML_LOG_VERBOSE("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, l->NextSurfaceToAssignDETPiece);
+ if (!DETPieceAssignedToThisSurfaceAlready[k] && (!NextPotentialSurfaceToAssignDETPieceFound ||
+ ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece])) {
+ l->NextSurfaceToAssignDETPiece = k;
+ NextPotentialSurfaceToAssignDETPieceFound = true;
+ }
+ DML_LOG_VERBOSE("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
+ DML_LOG_VERBOSE("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
+ }
+
+ if (NextPotentialSurfaceToAssignDETPieceFound) {
+ l->NextDETBufferPieceInKByte = (unsigned int)(math_min2(
+ math_round((double)l->DETBufferSizePoolInKByte * (ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]) / l->BandwidthOfSurfacesNotAssignedDETPiece /
+ ((ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte))
+ * (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte,
+ math_floor2((double)l->DETBufferSizePoolInKByte, (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte)));
+
+ DML_LOG_VERBOSE("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, l->DETBufferSizePoolInKByte);
+ DML_LOG_VERBOSE("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, l->NextSurfaceToAssignDETPiece);
+ DML_LOG_VERBOSE("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]);
+ DML_LOG_VERBOSE("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]);
+ DML_LOG_VERBOSE("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, l->BandwidthOfSurfacesNotAssignedDETPiece);
+ DML_LOG_VERBOSE("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, l->NextDETBufferPieceInKByte);
+ DML_LOG_VERBOSE("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, l->NextSurfaceToAssignDETPiece, DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]);
+
+ DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] = DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] + l->NextDETBufferPieceInKByte / (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]);
+ DML_LOG_VERBOSE("to %u\n", DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]);
+
+ l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - l->NextDETBufferPieceInKByte;
+ DETPieceAssignedToThisSurfaceAlready[l->NextSurfaceToAssignDETPiece] = true;
+ l->BandwidthOfSurfacesNotAssignedDETPiece = l->BandwidthOfSurfacesNotAssignedDETPiece - (ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]);
+ }
+ }
+ }
+ *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
+ }
+ *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByte / ConfigReturnBufferSegmentSizeInkByte;
+
+ DML_LOG_VERBOSE("DML::%s: --- After bandwidth adjustment ---\n", __func__);
+ DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *CompressedBufferSizeInkByte);
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
+ }
+}
+
+static double CalculateRequiredDispclk(
+ enum dml2_odm_mode ODMMode,
+ double PixelClock,
+ bool isTMDS420)
+{
+ double DispClk;
+
+ if (ODMMode == dml2_odm_mode_combine_4to1) {
+ DispClk = PixelClock / 4.0;
+ } else if (ODMMode == dml2_odm_mode_combine_3to1) {
+ DispClk = PixelClock / 3.0;
+ } else if (ODMMode == dml2_odm_mode_combine_2to1) {
+ DispClk = PixelClock / 2.0;
+ } else {
+ DispClk = PixelClock;
+ }
+
+ if (isTMDS420) {
+ double TMDS420MinPixClock = PixelClock / 2.0;
+ DispClk = math_max2(DispClk, TMDS420MinPixClock);
+ }
+
+ return DispClk;
+}
+
+static double TruncToValidBPP(
+ struct dml2_core_shared_TruncToValidBPP_locals *l,
+ double LinkBitRate,
+ unsigned int Lanes,
+ unsigned int HTotal,
+ unsigned int HActive,
+ double PixelClock,
+ double DesiredBPP,
+ bool DSCEnable,
+ enum dml2_output_encoder_class Output,
+ enum dml2_output_format_class Format,
+ unsigned int DSCInputBitPerComponent,
+ unsigned int DSCSlices,
+ unsigned int AudioRate,
+ unsigned int AudioLayout,
+ enum dml2_odm_mode ODMModeNoDSC,
+ enum dml2_odm_mode ODMModeDSC,
+
+ // Output
+ unsigned int *RequiredSlots)
+{
+ double MaxLinkBPP;
+ unsigned int MinDSCBPP;
+ double MaxDSCBPP;
+ unsigned int NonDSCBPP0;
+ unsigned int NonDSCBPP1;
+ unsigned int NonDSCBPP2;
+ enum dml2_odm_mode ODMMode;
+
+ if (Format == dml2_420) {
+ NonDSCBPP0 = 12;
+ NonDSCBPP1 = 15;
+ NonDSCBPP2 = 18;
+ MinDSCBPP = 6;
+ MaxDSCBPP = 16;
+ } else if (Format == dml2_444) {
+ NonDSCBPP0 = 24;
+ NonDSCBPP1 = 30;
+ NonDSCBPP2 = 36;
+ MinDSCBPP = 8;
+ MaxDSCBPP = 16;
+ } else {
+
+ if (Output == dml2_hdmi || Output == dml2_hdmifrl) {
+ NonDSCBPP0 = 24;
+ NonDSCBPP1 = 24;
+ NonDSCBPP2 = 24;
+ } else {
+ NonDSCBPP0 = 16;
+ NonDSCBPP1 = 20;
+ NonDSCBPP2 = 24;
+ }
+ if (Format == dml2_n422 || Output == dml2_hdmifrl) {
+ MinDSCBPP = 7;
+ MaxDSCBPP = 16;
+ } else {
+ MinDSCBPP = 8;
+ MaxDSCBPP = 16;
+ }
+ }
+
+ if (Output == dml2_dp2p0) {
+ MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128.0 / 132.0 * 383.0 / 384.0 * 65536.0 / 65540.0;
+ } else if (DSCEnable && Output == dml2_dp) {
+ MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock * (1 - 2.4 / 100);
+ } else {
+ MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock;
+ }
+
+ ODMMode = DSCEnable ? ODMModeDSC : ODMModeNoDSC;
+
+ if (ODMMode == dml2_odm_mode_split_1to2) {
+ MaxLinkBPP = 2 * MaxLinkBPP;
+ }
+
+ if (DesiredBPP == 0) {
+ if (DSCEnable) {
+ if (MaxLinkBPP < MinDSCBPP) {
+ return __DML2_CALCS_DPP_INVALID__;
+ } else if (MaxLinkBPP >= MaxDSCBPP) {
+ return MaxDSCBPP;
+ } else {
+ return math_floor2(16.0 * MaxLinkBPP, 1.0) / 16.0;
+ }
+ } else {
+ if (MaxLinkBPP >= NonDSCBPP2) {
+ return NonDSCBPP2;
+ } else if (MaxLinkBPP >= NonDSCBPP1) {
+ return NonDSCBPP1;
+ } else if (MaxLinkBPP >= NonDSCBPP0) {
+ return NonDSCBPP0;
+ } else {
+ return __DML2_CALCS_DPP_INVALID__;
+ }
+ }
+ } else {
+ if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0)) ||
+ (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
+ return __DML2_CALCS_DPP_INVALID__;
+ } else {
+ return DesiredBPP;
+ }
+ }
+}
+
+// updated for dcn4
+static unsigned int dscceComputeDelay(
+ unsigned int bpc,
+ double BPP,
+ unsigned int sliceWidth,
+ unsigned int numSlices,
+ enum dml2_output_format_class pixelFormat,
+ enum dml2_output_encoder_class Output)
+{
+ // valid bpc = source bits per component in the set of {8, 10, 12}
+ // valid bpp = increments of 1/16 of a bit
+ // min = 6/7/8 in N420/N422/444, respectively
+ // max = such that compression is 1:1
+ //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
+ //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
+ //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
+
+ // fixed value
+ unsigned int rcModelSize = 8192;
+
+ // N422/N420 operate at 2 pixels per clock
+ unsigned int pixelsPerClock, padding_pixels, ssm_group_priming_delay, ssm_pipeline_delay, obsm_pipeline_delay, slice_padded_pixels, ixd_plus_padding, ixd_plus_padding_groups, cycles_per_group, group_delay, pipeline_delay, pixels, additional_group_delay, lines_to_reach_ixd, groups_to_reach_ixd, slice_width_groups, initial_xmit_delay, number_of_lines_to_reach_ixd, slice_width_modified;
+
+ if (pixelFormat == dml2_420)
+ pixelsPerClock = 2;
+ // #all other modes operate at 1 pixel per clock
+ else if (pixelFormat == dml2_444)
+ pixelsPerClock = 1;
+ else if (pixelFormat == dml2_n422 || Output == dml2_hdmifrl)
+ pixelsPerClock = 2;
+ else
+ pixelsPerClock = 1;
+
+ //initial transmit delay as per PPS
+ initial_xmit_delay = (unsigned int)(math_round(rcModelSize / 2.0 / BPP / pixelsPerClock));
+
+ //slice width as seen by dscc_bcl in pixels or pixels pairs (depending on number of pixels per pixel container based on pixel format)
+ slice_width_modified = (pixelFormat == dml2_444 || pixelFormat == dml2_420 || Output == dml2_hdmifrl) ? sliceWidth / 2 : sliceWidth;
+
+ padding_pixels = ((slice_width_modified % 3) != 0) ? (3 - (slice_width_modified % 3)) * (initial_xmit_delay / slice_width_modified) : 0;
+
+ if ((3.0 * pixelsPerClock * BPP) >= ((double)((initial_xmit_delay + 2) / 3) * (double)(3 + (pixelFormat == dml2_n422)))) {
+ if ((initial_xmit_delay + padding_pixels) % 3 == 1) {
+ initial_xmit_delay++;
+ }
+ }
+
+ //sub-stream multiplexer balance fifo priming delay in groups as per dsc standard
+ if (bpc == 8)
+ ssm_group_priming_delay = 83;
+ else if (bpc == 10)
+ ssm_group_priming_delay = 91;
+ else if (bpc == 12)
+ ssm_group_priming_delay = 115;
+ else if (bpc == 14)
+ ssm_group_priming_delay = 123;
+ else
+ ssm_group_priming_delay = 128;
+
+ //slice width in groups is rounded up to the nearest group as DSC adds padded pixels such that there are an integer number of groups per slice
+ slice_width_groups = (slice_width_modified + 2) / 3;
+
+ //determine number of padded pixels in the last group of a slice line, computed as
+ slice_padded_pixels = 3 * slice_width_groups - slice_width_modified;
+
+ //determine integer number of complete slice lines required to reach initial transmit delay without ssm delay considered
+ number_of_lines_to_reach_ixd = initial_xmit_delay / slice_width_modified;
+
+ //increase initial transmit delay by the number of padded pixels added to a slice line multipled by the integer number of complete lines to reach initial transmit delay
+ //this step is necessary as each padded pixel added takes up a clock cycle and, therefore, adds to the overall delay
+ ixd_plus_padding = initial_xmit_delay + slice_padded_pixels * number_of_lines_to_reach_ixd;
+
+ //convert the padded initial transmit delay from pixels to groups by rounding up to the nearest group as DSC processes in groups of pixels
+ ixd_plus_padding_groups = (ixd_plus_padding + 2) / 3;
+
+ //number of groups required for a slice to reach initial transmit delay is the sum of the padded initial transmit delay plus the ssm group priming delay
+ groups_to_reach_ixd = ixd_plus_padding_groups + ssm_group_priming_delay;
+
+ //number of lines required to reach padded initial transmit delay in groups in slices to the left of the last horizontal slice
+ //needs to be rounded up as a complete slice lines are buffered prior to initial transmit delay being reached in the last horizontal slice
+ lines_to_reach_ixd = (groups_to_reach_ixd + slice_width_groups - 1) / slice_width_groups; //round up lines to reach ixd to next
+
+ //determine if there are non-zero number of pixels reached in the group where initial transmit delay is reached
+ //an additional group time (i.e., 3 pixel times) is required before the first output if there are no additional pixels beyond initial transmit delay
+ additional_group_delay = ((initial_xmit_delay - number_of_lines_to_reach_ixd * slice_width_modified) % 3) == 0 ? 1 : 0;
+
+ //number of pipeline delay cycles in the ssm block (can be determined empirically or analytically by inspecting the ssm block)
+ ssm_pipeline_delay = 2;
+
+ //number of pipe delay cycles in the obsm block (can be determined empirically or analytically by inspecting the obsm block)
+ obsm_pipeline_delay = 1;
+
+ //a group of pixels is worth 6 pixels in N422/N420 mode or 3 pixels in all other modes
+ if (pixelFormat == dml2_420 || pixelFormat == dml2_444 || pixelFormat == dml2_n422 || Output == dml2_hdmifrl)
+ cycles_per_group = 6;
+ else
+ cycles_per_group = 3;
+ //delay of the bit stream contruction layer in pixels is the sum of:
+ //1. number of pixel containers in a slice line multipled by the number of lines required to reach initial transmit delay multipled by number of slices to the left of the last horizontal slice
+ //2. number of pixel containers required to reach initial transmit delay (specifically, in the last horizontal slice)
+ //3. additional group of delay if initial transmit delay is reached exactly in a group
+ //4. ssm and obsm pipeline delay (i.e., clock cycles of delay)
+ group_delay = (lines_to_reach_ixd * slice_width_groups * (numSlices - 1)) + groups_to_reach_ixd + additional_group_delay;
+ pipeline_delay = ssm_pipeline_delay + obsm_pipeline_delay;
+
+ //pixel delay is group_delay (converted to pixels) + pipeline, however, first group is a special case since it is processed as soon as it arrives (i.e., in 3 cycles regardless of pixel format)
+ pixels = (group_delay - 1) * cycles_per_group + 3 + pipeline_delay;
+
+ DML_LOG_VERBOSE("DML::%s: bpc: %u\n", __func__, bpc);
+ DML_LOG_VERBOSE("DML::%s: BPP: %f\n", __func__, BPP);
+ DML_LOG_VERBOSE("DML::%s: sliceWidth: %u\n", __func__, sliceWidth);
+ DML_LOG_VERBOSE("DML::%s: numSlices: %u\n", __func__, numSlices);
+ DML_LOG_VERBOSE("DML::%s: pixelFormat: %u\n", __func__, pixelFormat);
+ DML_LOG_VERBOSE("DML::%s: Output: %u\n", __func__, Output);
+ DML_LOG_VERBOSE("DML::%s: pixels: %u\n", __func__, pixels);
+ return pixels;
+}
+
+//updated in dcn4
+static unsigned int dscComputeDelay(enum dml2_output_format_class pixelFormat, enum dml2_output_encoder_class Output)
+{
+ unsigned int Delay = 0;
+ unsigned int dispclk_per_dscclk = 3;
+
+ // sfr
+ Delay = Delay + 2;
+
+ if (pixelFormat == dml2_420 || pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) {
+ dispclk_per_dscclk = 3 * 2;
+ }
+
+ if (pixelFormat == dml2_420) {
+ //dscc top delay for pixel compression layer
+ Delay = Delay + 16 * dispclk_per_dscclk;
+
+ // dscc - input deserializer
+ Delay = Delay + 5;
+
+ // dscc - input cdc fifo
+ Delay = Delay + 1 + 4 * dispclk_per_dscclk;
+
+ // dscc - output cdc fifo
+ Delay = Delay + 3 + 1 * dispclk_per_dscclk;
+
+ // dscc - cdc uncertainty
+ Delay = Delay + 3 + 3 * dispclk_per_dscclk;
+ } else if (pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) {
+ //dscc top delay for pixel compression layer
+ Delay = Delay + 16 * dispclk_per_dscclk;
+ // dsccif
+ Delay = Delay + 1;
+ // dscc - input deserializer
+ Delay = Delay + 5;
+ // dscc - input cdc fifo
+ Delay = Delay + 1 + 4 * dispclk_per_dscclk;
+
+
+ // dscc - output cdc fifo
+ Delay = Delay + 3 + 1 * dispclk_per_dscclk;
+ // dscc - cdc uncertainty
+ Delay = Delay + 3 + 3 * dispclk_per_dscclk;
+ } else if (pixelFormat == dml2_s422) {
+ //dscc top delay for pixel compression layer
+ Delay = Delay + 17 * dispclk_per_dscclk;
+
+ // dscc - input deserializer
+ Delay = Delay + 3;
+ // dscc - input cdc fifo
+ Delay = Delay + 1 + 4 * dispclk_per_dscclk;
+ // dscc - output cdc fifo
+ Delay = Delay + 3 + 1 * dispclk_per_dscclk;
+ // dscc - cdc uncertainty
+ Delay = Delay + 3 + 3 * dispclk_per_dscclk;
+ } else {
+ //dscc top delay for pixel compression layer
+ Delay = Delay + 16 * dispclk_per_dscclk;
+ // dscc - input deserializer
+ Delay = Delay + 3;
+ // dscc - input cdc fifo
+ Delay = Delay + 1 + 4 * dispclk_per_dscclk;
+ // dscc - output cdc fifo
+ Delay = Delay + 3 + 1 * dispclk_per_dscclk;
+
+ // dscc - cdc uncertainty
+ Delay = Delay + 3 + 3 * dispclk_per_dscclk;
+ }
+
+ // sft
+ Delay = Delay + 1;
+ DML_LOG_VERBOSE("DML::%s: pixelFormat = %u\n", __func__, pixelFormat);
+ DML_LOG_VERBOSE("DML::%s: Delay = %u\n", __func__, Delay);
+
+ return Delay;
+}
+
+static unsigned int CalculateHostVMDynamicLevels(
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMinPageSize,
+ unsigned int HostVMMaxNonCachedPageTableLevels)
+{
+ unsigned int HostVMDynamicLevels = 0;
+
+ if (GPUVMEnable && HostVMEnable) {
+ if (HostVMMinPageSize < 2048)
+ HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
+ else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
+ HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 1);
+ else
+ HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 2);
+ } else {
+ HostVMDynamicLevels = 0;
+ }
+ return HostVMDynamicLevels;
+}
+
+static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_and_row_bytes_params *p)
+{
+ unsigned int extra_dpde_bytes;
+ unsigned int extra_mpde_bytes;
+ unsigned int MacroTileSizeBytes;
+ unsigned int vp_height_dpte_ub;
+
+ unsigned int meta_surface_bytes;
+ unsigned int vm_bytes;
+ unsigned int vp_height_meta_ub;
+ unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
+
+ *p->MetaRequestHeight = 8 * p->BlockHeight256Bytes;
+ *p->MetaRequestWidth = 8 * p->BlockWidth256Bytes;
+ if (p->SurfaceTiling == dml2_sw_linear) {
+ *p->meta_row_height = 32;
+ *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth));
+ *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0); // FIXME_DCN4SW missing in old code but no dcc for linear anyways?
+ } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
+ *p->meta_row_height = *p->MetaRequestHeight;
+ if (p->ViewportStationary && p->NumberOfDPPs == 1) {
+ *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth));
+ } else {
+ *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestWidth) + *p->MetaRequestWidth);
+ }
+ *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0);
+ } else {
+ *p->meta_row_height = *p->MetaRequestWidth;
+ if (p->ViewportStationary && p->NumberOfDPPs == 1) {
+ *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->MetaRequestHeight - 1, *p->MetaRequestHeight) - math_floor2(p->ViewportYStart, *p->MetaRequestHeight));
+ } else {
+ *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestHeight) + *p->MetaRequestHeight);
+ }
+ *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestWidth * p->BytePerPixel / 256.0);
+ }
+
+ if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) {
+ vp_height_meta_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + 64 * p->BlockHeight256Bytes - 1, 64 * p->BlockHeight256Bytes) - math_floor2(p->ViewportYStart, 64 * p->BlockHeight256Bytes));
+ } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
+ vp_height_meta_ub = (unsigned int)(math_ceil2(p->ViewportHeight - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes);
+ } else {
+ vp_height_meta_ub = (unsigned int)(math_ceil2(p->SwathWidth - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes);
+ }
+
+ meta_surface_bytes = (unsigned int)(p->DCCMetaPitch * vp_height_meta_ub * p->BytePerPixel / 256.0);
+ DML_LOG_VERBOSE("DML::%s: DCCMetaPitch = %u\n", __func__, p->DCCMetaPitch);
+ DML_LOG_VERBOSE("DML::%s: meta_surface_bytes = %u\n", __func__, meta_surface_bytes);
+ if (p->GPUVMEnable == true) {
+ double meta_vmpg_bytes = 4.0 * 1024.0;
+ *p->meta_pte_bytes_per_frame_ub = (unsigned int)((math_ceil2((double) (meta_surface_bytes - meta_vmpg_bytes) / (8 * meta_vmpg_bytes), 1) + 1) * 64);
+ extra_mpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 1);
+ } else {
+ *p->meta_pte_bytes_per_frame_ub = 0;
+ extra_mpde_bytes = 0;
+ }
+
+ if (!p->DCCEnable || !p->mrq_present) {
+ *p->meta_pte_bytes_per_frame_ub = 0;
+ extra_mpde_bytes = 0;
+ *p->meta_row_bytes = 0;
+ }
+
+ if (!p->GPUVMEnable) {
+ *p->PixelPTEBytesPerRow = 0;
+ *p->PixelPTEBytesPerRowStorage = 0;
+ *p->dpte_row_width_ub = 0;
+ *p->dpte_row_height = 0;
+ *p->dpte_row_height_linear = 0;
+ *p->PixelPTEBytesPerRow_one_row_per_frame = 0;
+ *p->dpte_row_width_ub_one_row_per_frame = 0;
+ *p->dpte_row_height_one_row_per_frame = 0;
+ *p->vmpg_width = 0;
+ *p->vmpg_height = 0;
+ *p->PixelPTEReqWidth = 0;
+ *p->PixelPTEReqHeight = 0;
+ *p->PTERequestSize = 0;
+ *p->dpde0_bytes_per_frame_ub = 0;
+ return 0;
+ }
+
+ MacroTileSizeBytes = p->MacroTileWidth * p->BytePerPixel * p->MacroTileHeight;
+
+ if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) {
+ vp_height_dpte_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + p->MacroTileHeight - 1, p->MacroTileHeight) - math_floor2(p->ViewportYStart, p->MacroTileHeight));
+ } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
+ vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->ViewportHeight - 1, p->MacroTileHeight) + p->MacroTileHeight);
+ } else {
+ vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->SwathWidth - 1, p->MacroTileHeight) + p->MacroTileHeight);
+ }
+
+ if (p->GPUVMEnable == true && p->GPUVMMaxPageTableLevels > 1) {
+ *p->dpde0_bytes_per_frame_ub = (unsigned int)(64 * (math_ceil2((double)(p->Pitch * vp_height_dpte_ub * p->BytePerPixel - MacroTileSizeBytes) / (double)(8 * 2097152), 1) + 1));
+ extra_dpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 2);
+ } else {
+ *p->dpde0_bytes_per_frame_ub = 0;
+ extra_dpde_bytes = 0;
+ }
+
+ vm_bytes = *p->meta_pte_bytes_per_frame_ub + extra_mpde_bytes + *p->dpde0_bytes_per_frame_ub + extra_dpde_bytes;
+
+ DML_LOG_VERBOSE("DML::%s: DCCEnable = %u\n", __func__, p->DCCEnable);
+ DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable);
+ DML_LOG_VERBOSE("DML::%s: SwModeLinear = %u\n", __func__, p->SurfaceTiling == dml2_sw_linear);
+ DML_LOG_VERBOSE("DML::%s: BytePerPixel = %u\n", __func__, p->BytePerPixel);
+ DML_LOG_VERBOSE("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, p->GPUVMMaxPageTableLevels);
+ DML_LOG_VERBOSE("DML::%s: BlockHeight256Bytes = %u\n", __func__, p->BlockHeight256Bytes);
+ DML_LOG_VERBOSE("DML::%s: BlockWidth256Bytes = %u\n", __func__, p->BlockWidth256Bytes);
+ DML_LOG_VERBOSE("DML::%s: MacroTileHeight = %u\n", __func__, p->MacroTileHeight);
+ DML_LOG_VERBOSE("DML::%s: MacroTileWidth = %u\n", __func__, p->MacroTileWidth);
+ DML_LOG_VERBOSE("DML::%s: meta_pte_bytes_per_frame_ub = %u\n", __func__, *p->meta_pte_bytes_per_frame_ub);
+ DML_LOG_VERBOSE("DML::%s: dpde0_bytes_per_frame_ub = %u\n", __func__, *p->dpde0_bytes_per_frame_ub);
+ DML_LOG_VERBOSE("DML::%s: extra_mpde_bytes = %u\n", __func__, extra_mpde_bytes);
+ DML_LOG_VERBOSE("DML::%s: extra_dpde_bytes = %u\n", __func__, extra_dpde_bytes);
+ DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, vm_bytes);
+ DML_LOG_VERBOSE("DML::%s: ViewportHeight = %u\n", __func__, p->ViewportHeight);
+ DML_LOG_VERBOSE("DML::%s: SwathWidth = %u\n", __func__, p->SwathWidth);
+ DML_LOG_VERBOSE("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub);
+
+ if (p->SurfaceTiling == dml2_sw_linear) {
+ *p->PixelPTEReqHeight = 1;
+ *p->PixelPTEReqWidth = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel;
+ PixelPTEReqWidth_linear = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel;
+ *p->PTERequestSize = 64;
+
+ *p->vmpg_height = 1;
+ *p->vmpg_width = p->GPUVMMinPageSizeKBytes * 1024 / p->BytePerPixel;
+ } else if (p->GPUVMMinPageSizeKBytes * 1024 >= dml_get_tile_block_size_bytes(p->SurfaceTiling)) { // 1 64B 8x1 PTE
+ *p->PixelPTEReqHeight = p->MacroTileHeight;
+ *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
+ *p->PTERequestSize = 64;
+
+ *p->vmpg_height = p->MacroTileHeight;
+ *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
+
+ } else if (p->GPUVMMinPageSizeKBytes == 4 && dml_get_tile_block_size_bytes(p->SurfaceTiling) == 65536) { // 2 64B PTE requests to get 16 PTEs to cover the 64K tile
+ // one 64KB tile, is 16x16x256B req
+ *p->PixelPTEReqHeight = 16 * p->BlockHeight256Bytes;
+ *p->PixelPTEReqWidth = 16 * p->BlockWidth256Bytes;
+ *p->PTERequestSize = 128;
+
+ *p->vmpg_height = *p->PixelPTEReqHeight;
+ *p->vmpg_width = *p->PixelPTEReqWidth;
+ } else {
+ // default for rest of calculation to go through, when vm is disable, the calulated pte related values shouldnt be used anyways
+ *p->PixelPTEReqHeight = p->MacroTileHeight;
+ *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
+ *p->PTERequestSize = 64;
+
+ *p->vmpg_height = p->MacroTileHeight;
+ *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
+
+ if (p->GPUVMEnable == true) {
+ DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes=%u and sw_mode=%u (tile_size=%d) not supported!\n",
+ __func__, p->GPUVMMinPageSizeKBytes, p->SurfaceTiling, dml_get_tile_block_size_bytes(p->SurfaceTiling));
+ DML_ASSERT(0);
+ }
+ }
+
+ DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes);
+ DML_LOG_VERBOSE("DML::%s: PixelPTEReqHeight = %u\n", __func__, *p->PixelPTEReqHeight);
+ DML_LOG_VERBOSE("DML::%s: PixelPTEReqWidth = %u\n", __func__, *p->PixelPTEReqWidth);
+ DML_LOG_VERBOSE("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear);
+ DML_LOG_VERBOSE("DML::%s: PTERequestSize = %u\n", __func__, *p->PTERequestSize);
+ DML_LOG_VERBOSE("DML::%s: Pitch = %u\n", __func__, p->Pitch);
+ DML_LOG_VERBOSE("DML::%s: vmpg_width = %u\n", __func__, *p->vmpg_width);
+ DML_LOG_VERBOSE("DML::%s: vmpg_height = %u\n", __func__, *p->vmpg_height);
+
+ *p->dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
+ *p->dpte_row_width_ub_one_row_per_frame = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height_one_row_per_frame / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * (double)*p->PixelPTEReqWidth);
+ *p->PixelPTEBytesPerRow_one_row_per_frame = (unsigned int)((double)*p->dpte_row_width_ub_one_row_per_frame / (double)*p->PixelPTEReqWidth * *p->PTERequestSize);
+ *p->dpte_row_height_linear = 0;
+
+ if (p->SurfaceTiling == dml2_sw_linear) {
+ *p->dpte_row_height = (unsigned int)(math_min2(128, (double)(1ULL << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * *p->PixelPTEReqWidth / p->Pitch), 2.0), 1))));
+ *p->dpte_row_width_ub = (unsigned int)(math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height - 1), (double)*p->PixelPTEReqWidth) + *p->PixelPTEReqWidth);
+ *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqWidth * *p->PTERequestSize);
+
+ // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
+ *p->dpte_row_height_linear = (unsigned int)1 << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * PixelPTEReqWidth_linear / p->Pitch), 2.0), 1);
+ if (*p->dpte_row_height_linear > 128)
+ *p->dpte_row_height_linear = 128;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *p->dpte_row_width_ub);
+#endif
+
+ } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
+ *p->dpte_row_height = *p->PixelPTEReqHeight;
+
+ if (p->GPUVMMinPageSizeKBytes > 64) {
+ *p->dpte_row_width_ub = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * *p->PixelPTEReqWidth);
+ } else if (p->ViewportStationary && (p->NumberOfDPPs == 1)) {
+ *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->PixelPTEReqWidth - 1, *p->PixelPTEReqWidth) - math_floor2(p->ViewportXStart, *p->PixelPTEReqWidth));
+ } else {
+ *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqWidth, 1) + 1.0) * *p->PixelPTEReqWidth);
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *p->dpte_row_width_ub);
+#endif
+
+ *p->PixelPTEBytesPerRow = *p->dpte_row_width_ub / *p->PixelPTEReqWidth * *p->PTERequestSize;
+ } else {
+ *p->dpte_row_height = (unsigned int)(math_min2(*p->PixelPTEReqWidth, p->MacroTileWidth));
+
+ if (p->ViewportStationary && (p->NumberOfDPPs == 1)) {
+ *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->PixelPTEReqHeight - 1, *p->PixelPTEReqHeight) - math_floor2(p->ViewportYStart, *p->PixelPTEReqHeight));
+ } else {
+ *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqHeight, 1) + 1) * *p->PixelPTEReqHeight);
+ }
+
+ *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqHeight * *p->PTERequestSize);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *p->dpte_row_width_ub);
+#endif
+ }
+
+ if (p->GPUVMEnable != true) {
+ *p->PixelPTEBytesPerRow = 0;
+ *p->PixelPTEBytesPerRow_one_row_per_frame = 0;
+ }
+
+ *p->PixelPTEBytesPerRowStorage = *p->PixelPTEBytesPerRow;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes);
+ DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable);
+ DML_LOG_VERBOSE("DML::%s: meta_row_height = %u\n", __func__, *p->meta_row_height);
+ DML_LOG_VERBOSE("DML::%s: dpte_row_height = %u\n", __func__, *p->dpte_row_height);
+ DML_LOG_VERBOSE("DML::%s: dpte_row_height_linear = %u\n", __func__, *p->dpte_row_height_linear);
+ DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u\n", __func__, *p->dpte_row_width_ub);
+ DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *p->PixelPTEBytesPerRow);
+ DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *p->PixelPTEBytesPerRowStorage);
+ DML_LOG_VERBOSE("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, p->PTEBufferSizeInRequests);
+ DML_LOG_VERBOSE("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *p->dpte_row_height_one_row_per_frame);
+ DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *p->dpte_row_width_ub_one_row_per_frame);
+ DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *p->PixelPTEBytesPerRow_one_row_per_frame);
+#endif
+
+ return vm_bytes;
+} // CalculateVMAndRowBytes
+
+static unsigned int CalculatePrefetchSourceLines(
+ double VRatio,
+ unsigned int VTaps,
+ bool Interlace,
+ bool ProgressiveToInterlaceUnitInOPP,
+ unsigned int SwathHeight,
+ enum dml2_rotation_angle RotationAngle,
+ bool mirrored,
+ bool ViewportStationary,
+ unsigned int SwathWidth,
+ unsigned int ViewportHeight,
+ unsigned int ViewportXStart,
+ unsigned int ViewportYStart,
+
+ // Output
+ unsigned int *VInitPreFill,
+ unsigned int *MaxNumSwath)
+{
+
+ unsigned int vp_start_rot = 0;
+ unsigned int sw0_tmp = 0;
+ unsigned int MaxPartialSwath = 0;
+ double numLines = 0;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio);
+ DML_LOG_VERBOSE("DML::%s: VTaps = %u\n", __func__, VTaps);
+ DML_LOG_VERBOSE("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart);
+ DML_LOG_VERBOSE("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart);
+ DML_LOG_VERBOSE("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary);
+ DML_LOG_VERBOSE("DML::%s: SwathHeight = %u\n", __func__, SwathHeight);
+#endif
+ if (ProgressiveToInterlaceUnitInOPP)
+ *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1) / 2.0, 1));
+ else
+ *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1 + (Interlace ? 1 : 0) * 0.5 * VRatio) / 2.0, 1));
+
+ if (ViewportStationary) {
+ if (RotationAngle == dml2_rotation_180) {
+ vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
+ } else if ((RotationAngle == dml2_rotation_270 && !mirrored) || (RotationAngle == dml2_rotation_90 && mirrored)) {
+ vp_start_rot = ViewportXStart;
+ } else if ((RotationAngle == dml2_rotation_90 && !mirrored) || (RotationAngle == dml2_rotation_270 && mirrored)) {
+ vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
+ } else {
+ vp_start_rot = ViewportYStart;
+ }
+ sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
+ if (sw0_tmp < *VInitPreFill) {
+ *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - sw0_tmp) / (double)SwathHeight, 1) + 1);
+ } else {
+ *MaxNumSwath = 1;
+ }
+ MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(vp_start_rot + *VInitPreFill - 1) % SwathHeight));
+ } else {
+ *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - 1.0) / (double)SwathHeight, 1) + 1);
+ if (*VInitPreFill > 1) {
+ MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill - 2) % SwathHeight));
+ } else {
+ MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill + SwathHeight - 2) % SwathHeight));
+ }
+ }
+ numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot);
+ DML_LOG_VERBOSE("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill);
+ DML_LOG_VERBOSE("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath);
+ DML_LOG_VERBOSE("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath);
+ DML_LOG_VERBOSE("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
+#endif
+ return (unsigned int)(numLines);
+
+}
+
+static void CalculateRowBandwidth(
+ bool GPUVMEnable,
+ bool use_one_row_for_frame,
+ enum dml2_source_format_class SourcePixelFormat,
+ double VRatio,
+ double VRatioChroma,
+ bool DCCEnable,
+ double LineTime,
+ unsigned int PixelPTEBytesPerRowLuma,
+ unsigned int PixelPTEBytesPerRowChroma,
+ unsigned int dpte_row_height_luma,
+ unsigned int dpte_row_height_chroma,
+
+ bool mrq_present,
+ unsigned int meta_row_bytes_per_row_ub_l,
+ unsigned int meta_row_bytes_per_row_ub_c,
+ unsigned int meta_row_height_luma,
+ unsigned int meta_row_height_chroma,
+
+ // Output
+ double *dpte_row_bw,
+ double *meta_row_bw)
+{
+ if (!DCCEnable || !mrq_present) {
+ *meta_row_bw = 0;
+ } else if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) {
+ *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime)
+ + VRatioChroma * meta_row_bytes_per_row_ub_c / (meta_row_height_chroma * LineTime);
+ } else {
+ *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime);
+ }
+
+ if (GPUVMEnable != true) {
+ *dpte_row_bw = 0;
+ } else if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) {
+ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
+ + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
+ } else {
+ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
+ }
+}
+
+static void CalculateMALLUseForStaticScreen(
+ const struct dml2_display_cfg *display_cfg,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int MALLAllocatedForDCN,
+ unsigned int SurfaceSizeInMALL[],
+ bool one_row_per_frame_fits_in_buffer[],
+
+ // Output
+ bool is_using_mall_for_ss[])
+{
+
+ unsigned int SurfaceToAddToMALL;
+ bool CanAddAnotherSurfaceToMALL;
+ unsigned int TotalSurfaceSizeInMALL;
+
+ TotalSurfaceSizeInMALL = 0;
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ is_using_mall_for_ss[k] = (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable);
+ if (is_using_mall_for_ss[k])
+ TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, is_using_mall_for_ss[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL);
+#endif
+ }
+
+ SurfaceToAddToMALL = 0;
+ CanAddAnotherSurfaceToMALL = true;
+ while (CanAddAnotherSurfaceToMALL) {
+ CanAddAnotherSurfaceToMALL = false;
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCN * 1024 * 1024 &&
+ !is_using_mall_for_ss[k] && display_cfg->plane_descriptors[k].overrides.refresh_from_mall != dml2_refresh_from_mall_mode_override_force_disable && one_row_per_frame_fits_in_buffer[k] &&
+ (!CanAddAnotherSurfaceToMALL || SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
+ CanAddAnotherSurfaceToMALL = true;
+ SurfaceToAddToMALL = k;
+ DML_LOG_VERBOSE("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, display_cfg->plane_descriptors[k].overrides.refresh_from_mall);
+ }
+ }
+ if (CanAddAnotherSurfaceToMALL) {
+ is_using_mall_for_ss[SurfaceToAddToMALL] = true;
+ TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL);
+ DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL);
+#endif
+ }
+ }
+}
+
+static void CalculateDCCConfiguration(
+ bool DCCEnabled,
+ bool DCCProgrammingAssumesScanDirectionUnknown,
+ enum dml2_source_format_class SourcePixelFormat,
+ unsigned int SurfaceWidthLuma,
+ unsigned int SurfaceWidthChroma,
+ unsigned int SurfaceHeightLuma,
+ unsigned int SurfaceHeightChroma,
+ unsigned int nomDETInKByte,
+ unsigned int RequestHeight256ByteLuma,
+ unsigned int RequestHeight256ByteChroma,
+ enum dml2_swizzle_mode TilingFormat,
+ unsigned int BytePerPixelY,
+ unsigned int BytePerPixelC,
+ double BytePerPixelDETY,
+ double BytePerPixelDETC,
+ enum dml2_rotation_angle RotationAngle,
+
+ // Output
+ enum dml2_core_internal_request_type *RequestLuma,
+ enum dml2_core_internal_request_type *RequestChroma,
+ unsigned int *MaxUncompressedBlockLuma,
+ unsigned int *MaxUncompressedBlockChroma,
+ unsigned int *MaxCompressedBlockLuma,
+ unsigned int *MaxCompressedBlockChroma,
+ unsigned int *IndependentBlockLuma,
+ unsigned int *IndependentBlockChroma)
+{
+ unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
+
+ unsigned int segment_order_horz_contiguous_luma;
+ unsigned int segment_order_horz_contiguous_chroma;
+ unsigned int segment_order_vert_contiguous_luma;
+ unsigned int segment_order_vert_contiguous_chroma;
+
+ unsigned int req128_horz_wc_l;
+ unsigned int req128_horz_wc_c;
+ unsigned int req128_vert_wc_l;
+ unsigned int req128_vert_wc_c;
+
+ unsigned int yuv420;
+ unsigned int horz_div_l;
+ unsigned int horz_div_c;
+ unsigned int vert_div_l;
+ unsigned int vert_div_c;
+
+ unsigned int swath_buf_size;
+ double detile_buf_vp_horz_limit;
+ double detile_buf_vp_vert_limit;
+
+ unsigned int MAS_vp_horz_limit;
+ unsigned int MAS_vp_vert_limit;
+ unsigned int max_vp_horz_width;
+ unsigned int max_vp_vert_height;
+ unsigned int eff_surf_width_l;
+ unsigned int eff_surf_width_c;
+ unsigned int eff_surf_height_l;
+ unsigned int eff_surf_height_c;
+
+ unsigned int full_swath_bytes_horz_wc_l;
+ unsigned int full_swath_bytes_horz_wc_c;
+ unsigned int full_swath_bytes_vert_wc_l;
+ unsigned int full_swath_bytes_vert_wc_c;
+
+ if (dml_is_420(SourcePixelFormat))
+ yuv420 = 1;
+ else
+ yuv420 = 0;
+ horz_div_l = 1;
+ horz_div_c = 1;
+ vert_div_l = 1;
+ vert_div_c = 1;
+
+ if (BytePerPixelY == 1)
+ vert_div_l = 0;
+ if (BytePerPixelC == 1)
+ vert_div_c = 0;
+
+ if (BytePerPixelC == 0) {
+ swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
+ detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
+ detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
+ } else {
+ swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
+ detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) + (double)RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
+ detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
+ }
+
+ if (SourcePixelFormat == dml2_420_10) {
+ detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
+ detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
+ }
+
+ detile_buf_vp_horz_limit = math_floor2(detile_buf_vp_horz_limit - 1, 16);
+ detile_buf_vp_vert_limit = math_floor2(detile_buf_vp_vert_limit - 1, 16);
+
+ MAS_vp_horz_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : 6144;
+ MAS_vp_vert_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
+ max_vp_horz_width = (unsigned int)(math_min2((double)MAS_vp_horz_limit, detile_buf_vp_horz_limit));
+ max_vp_vert_height = (unsigned int)(math_min2((double)MAS_vp_vert_limit, detile_buf_vp_vert_limit));
+ eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
+ eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
+ eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
+ eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
+
+ full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
+ full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
+ if (BytePerPixelC > 0) {
+ full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
+ full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
+ } else {
+ full_swath_bytes_horz_wc_c = 0;
+ full_swath_bytes_vert_wc_c = 0;
+ }
+
+ if (SourcePixelFormat == dml2_420_10) {
+ full_swath_bytes_horz_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0));
+ full_swath_bytes_horz_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0));
+ full_swath_bytes_vert_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0));
+ full_swath_bytes_vert_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0));
+ }
+
+ if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
+ req128_horz_wc_l = 0;
+ req128_horz_wc_c = 0;
+ } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
+ req128_horz_wc_l = 0;
+ req128_horz_wc_c = 1;
+ } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
+ req128_horz_wc_l = 1;
+ req128_horz_wc_c = 0;
+ } else {
+ req128_horz_wc_l = 1;
+ req128_horz_wc_c = 1;
+ }
+
+ if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
+ req128_vert_wc_l = 0;
+ req128_vert_wc_c = 0;
+ } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
+ req128_vert_wc_l = 0;
+ req128_vert_wc_c = 1;
+ } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
+ req128_vert_wc_l = 1;
+ req128_vert_wc_c = 0;
+ } else {
+ req128_vert_wc_l = 1;
+ req128_vert_wc_c = 1;
+ }
+
+ if (BytePerPixelY == 2) {
+ segment_order_horz_contiguous_luma = 0;
+ segment_order_vert_contiguous_luma = 1;
+ } else {
+ segment_order_horz_contiguous_luma = 1;
+ segment_order_vert_contiguous_luma = 0;
+ }
+
+ if (BytePerPixelC == 2) {
+ segment_order_horz_contiguous_chroma = 0;
+ segment_order_vert_contiguous_chroma = 1;
+ } else {
+ segment_order_horz_contiguous_chroma = 1;
+ segment_order_vert_contiguous_chroma = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled);
+ DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
+ DML_LOG_VERBOSE("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC);
+ DML_LOG_VERBOSE("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l);
+ DML_LOG_VERBOSE("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c);
+ DML_LOG_VERBOSE("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l);
+ DML_LOG_VERBOSE("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c);
+ DML_LOG_VERBOSE("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma);
+ DML_LOG_VERBOSE("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma);
+#endif
+ if (DCCProgrammingAssumesScanDirectionUnknown == true) {
+ if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
+ *RequestLuma = dml2_core_internal_request_type_256_bytes;
+ } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
+ *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous;
+ } else {
+ *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous;
+ }
+ if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
+ *RequestChroma = dml2_core_internal_request_type_256_bytes;
+ } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
+ *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous;
+ } else {
+ *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous;
+ }
+ } else if (!dml_is_vertical_rotation(RotationAngle)) {
+ if (req128_horz_wc_l == 0) {
+ *RequestLuma = dml2_core_internal_request_type_256_bytes;
+ } else if (segment_order_horz_contiguous_luma == 0) {
+ *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous;
+ } else {
+ *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous;
+ }
+ if (req128_horz_wc_c == 0) {
+ *RequestChroma = dml2_core_internal_request_type_256_bytes;
+ } else if (segment_order_horz_contiguous_chroma == 0) {
+ *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous;
+ } else {
+ *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous;
+ }
+ } else {
+ if (req128_vert_wc_l == 0) {
+ *RequestLuma = dml2_core_internal_request_type_256_bytes;
+ } else if (segment_order_vert_contiguous_luma == 0) {
+ *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous;
+ } else {
+ *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous;
+ }
+ if (req128_vert_wc_c == 0) {
+ *RequestChroma = dml2_core_internal_request_type_256_bytes;
+ } else if (segment_order_vert_contiguous_chroma == 0) {
+ *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous;
+ } else {
+ *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous;
+ }
+ }
+
+ if (*RequestLuma == dml2_core_internal_request_type_256_bytes) {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 256;
+ *IndependentBlockLuma = 0;
+ } else if (*RequestLuma == dml2_core_internal_request_type_128_bytes_contiguous) {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 128;
+ *IndependentBlockLuma = 128;
+ } else {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 64;
+ *IndependentBlockLuma = 64;
+ }
+
+ if (*RequestChroma == dml2_core_internal_request_type_256_bytes) {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 256;
+ *IndependentBlockChroma = 0;
+ } else if (*RequestChroma == dml2_core_internal_request_type_128_bytes_contiguous) {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 128;
+ *IndependentBlockChroma = 128;
+ } else {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 64;
+ *IndependentBlockChroma = 64;
+ }
+
+ if (DCCEnabled != true || BytePerPixelC == 0) {
+ *MaxUncompressedBlockChroma = 0;
+ *MaxCompressedBlockChroma = 0;
+ *IndependentBlockChroma = 0;
+ }
+
+ if (DCCEnabled != true) {
+ *MaxUncompressedBlockLuma = 0;
+ *MaxCompressedBlockLuma = 0;
+ *IndependentBlockLuma = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma);
+ DML_LOG_VERBOSE("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma);
+ DML_LOG_VERBOSE("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma);
+ DML_LOG_VERBOSE("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma);
+ DML_LOG_VERBOSE("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma);
+ DML_LOG_VERBOSE("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma);
+#endif
+
+}
+
+static void calculate_mcache_row_bytes(
+ struct dml2_core_internal_scratch *scratch,
+ struct dml2_core_calcs_calculate_mcache_row_bytes_params *p)
+{
+ unsigned int vmpg_bytes = 0;
+ unsigned int blk_bytes = 0;
+ float meta_per_mvmpg_per_channel = 0;
+ unsigned int est_blk_per_vmpg = 2;
+ unsigned int mvmpg_per_row_ub = 0;
+ unsigned int full_vp_width_mvmpg_aligned = 0;
+ unsigned int full_vp_height_mvmpg_aligned = 0;
+ unsigned int meta_per_mvmpg_per_channel_ub = 0;
+ unsigned int mvmpg_per_mcache;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: num_chans = %u\n", __func__, p->num_chans);
+ DML_LOG_VERBOSE("DML::%s: mem_word_bytes = %u\n", __func__, p->mem_word_bytes);
+ DML_LOG_VERBOSE("DML::%s: mcache_line_size_bytes = %u\n", __func__, p->mcache_line_size_bytes);
+ DML_LOG_VERBOSE("DML::%s: mcache_size_bytes = %u\n", __func__, p->mcache_size_bytes);
+ DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable);
+ DML_LOG_VERBOSE("DML::%s: gpuvm_page_size_kbytes = %u\n", __func__, p->gpuvm_page_size_kbytes);
+ DML_LOG_VERBOSE("DML::%s: vp_stationary = %u\n", __func__, p->vp_stationary);
+ DML_LOG_VERBOSE("DML::%s: tiling_mode = %u\n", __func__, p->tiling_mode);
+ DML_LOG_VERBOSE("DML::%s: vp_start_x = %u\n", __func__, p->vp_start_x);
+ DML_LOG_VERBOSE("DML::%s: vp_start_y = %u\n", __func__, p->vp_start_y);
+ DML_LOG_VERBOSE("DML::%s: full_vp_width = %u\n", __func__, p->full_vp_width);
+ DML_LOG_VERBOSE("DML::%s: full_vp_height = %u\n", __func__, p->full_vp_height);
+ DML_LOG_VERBOSE("DML::%s: blk_width = %u\n", __func__, p->blk_width);
+ DML_LOG_VERBOSE("DML::%s: blk_height = %u\n", __func__, p->blk_height);
+ DML_LOG_VERBOSE("DML::%s: vmpg_width = %u\n", __func__, p->vmpg_width);
+ DML_LOG_VERBOSE("DML::%s: vmpg_height = %u\n", __func__, p->vmpg_height);
+ DML_LOG_VERBOSE("DML::%s: full_swath_bytes = %u\n", __func__, p->full_swath_bytes);
+#endif
+ DML_ASSERT(p->mcache_line_size_bytes != 0);
+ DML_ASSERT(p->mcache_size_bytes != 0);
+
+ *p->mvmpg_width = 0;
+ *p->mvmpg_height = 0;
+
+ if (p->full_vp_height == 0 && p->full_vp_width == 0) {
+ *p->num_mcaches = 0;
+ *p->mcache_row_bytes = 0;
+ *p->mcache_row_bytes_per_channel = 0;
+ } else {
+ blk_bytes = dml_get_tile_block_size_bytes(p->tiling_mode);
+
+ // if gpuvm is not enable, the alignment boundary should be in terms of tiling block size
+ vmpg_bytes = p->gpuvm_page_size_kbytes * 1024;
+
+ //With vmpg_bytes >= tile blk_bytes, the meta_row_width alignment equations are relative to the vmpg_width/height.
+ // But for 4KB page with 64KB tile block, we need the meta for all pages in the tile block.
+ // Therefore, the alignment is relative to the blk_width/height. The factor of 16 vmpg per 64KB tile block is applied at the end.
+ *p->mvmpg_width = p->blk_width;
+ *p->mvmpg_height = p->blk_height;
+ if (p->gpuvm_enable) {
+ if (vmpg_bytes >= blk_bytes) {
+ *p->mvmpg_width = p->vmpg_width;
+ *p->mvmpg_height = p->vmpg_height;
+ } else if (!((blk_bytes == 65536) && (vmpg_bytes == 4096))) {
+ DML_LOG_VERBOSE("ERROR: DML::%s: Tiling size and vm page size combination not supported\n", __func__);
+ DML_ASSERT(0);
+ }
+ }
+
+ //For plane0 & 1, first calculate full_vp_width/height_l/c aligned to vmpg_width/height_l/c
+ full_vp_width_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_x + p->full_vp_width) + *p->mvmpg_width - 1, *p->mvmpg_width) - math_floor2(p->vp_start_x, *p->mvmpg_width));
+ full_vp_height_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_y + p->full_vp_height) + *p->mvmpg_height - 1, *p->mvmpg_height) - math_floor2(p->vp_start_y, *p->mvmpg_height));
+
+ *p->full_vp_access_width_mvmpg_aligned = p->surf_vert ? full_vp_height_mvmpg_aligned : full_vp_width_mvmpg_aligned;
+
+ //Use the equation for the exact alignment when possible. Note that the exact alignment cannot be used for horizontal access if vmpg_bytes > blk_bytes.
+ if (!p->surf_vert) { //horizontal access
+ if (p->vp_stationary == 1 && vmpg_bytes <= blk_bytes)
+ *p->meta_row_width_ub = full_vp_width_mvmpg_aligned;
+ else
+ *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_width - 1, *p->mvmpg_width) + *p->mvmpg_width;
+ mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_width;
+ } else { //vertical access
+ if (p->vp_stationary == 1)
+ *p->meta_row_width_ub = full_vp_height_mvmpg_aligned;
+ else
+ *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_height - 1, *p->mvmpg_height) + *p->mvmpg_height;
+ mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_height;
+ }
+
+ if (p->gpuvm_enable) {
+ meta_per_mvmpg_per_channel = (float)vmpg_bytes / (float)256 / p->num_chans;
+
+ //but using the est_blk_per_vmpg between 2 and 4, to be not as pessimestic
+ if (p->surf_vert && vmpg_bytes > blk_bytes) {
+ meta_per_mvmpg_per_channel = (float)est_blk_per_vmpg * blk_bytes / (float)256 / p->num_chans;
+ }
+
+ *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); // dcc_dr_oh_nom
+ } else {
+ meta_per_mvmpg_per_channel = (float) blk_bytes / (float)256 / p->num_chans;
+
+ if (!p->surf_vert)
+ *p->dcc_dram_bw_nom_overhead_factor = 1 + 1.0 / 256.0;
+ else
+ *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel));
+ }
+
+ meta_per_mvmpg_per_channel_ub = (unsigned int)math_ceil2((double)meta_per_mvmpg_per_channel, p->mcache_line_size_bytes);
+
+ //but for 4KB vmpg with 64KB tile blk
+ if (p->gpuvm_enable && (blk_bytes == 65536) && (vmpg_bytes == 4096))
+ meta_per_mvmpg_per_channel_ub = 16 * meta_per_mvmpg_per_channel_ub;
+
+ // If this mcache_row_bytes for the full viewport of the surface is less than or equal to mcache_bytes,
+ // then one mcache can be used for this request stream. If not, it is useful to know the width of the viewport that can be supported in the mcache_bytes.
+ if (p->gpuvm_enable || p->surf_vert) {
+ *p->mcache_row_bytes_per_channel = mvmpg_per_row_ub * meta_per_mvmpg_per_channel_ub;
+ *p->mcache_row_bytes = *p->mcache_row_bytes_per_channel * p->num_chans;
+ } else { // horizontal and gpuvm disable
+ *p->mcache_row_bytes = *p->meta_row_width_ub * p->blk_height * p->bytes_per_pixel / 256;
+ if (p->mcache_line_size_bytes != 0)
+ *p->mcache_row_bytes_per_channel = (unsigned int)math_ceil2((double)*p->mcache_row_bytes / p->num_chans, p->mcache_line_size_bytes);
+ }
+
+ *p->dcc_dram_bw_pref_overhead_factor = 1 + math_max2(1.0 / 256.0, *p->mcache_row_bytes / p->full_swath_bytes); // dcc_dr_oh_pref
+ if (p->mcache_size_bytes != 0)
+ *p->num_mcaches = (unsigned int)math_ceil2((double)*p->mcache_row_bytes_per_channel / p->mcache_size_bytes, 1);
+
+ mvmpg_per_mcache = p->mcache_size_bytes / meta_per_mvmpg_per_channel_ub;
+ *p->mvmpg_per_mcache_lb = (unsigned int)math_floor2(mvmpg_per_mcache, 1);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable);
+ DML_LOG_VERBOSE("DML::%s: vmpg_bytes = %u\n", __func__, vmpg_bytes);
+ DML_LOG_VERBOSE("DML::%s: blk_bytes = %u\n", __func__, blk_bytes);
+ DML_LOG_VERBOSE("DML::%s: meta_per_mvmpg_per_channel = %f\n", __func__, meta_per_mvmpg_per_channel);
+ DML_LOG_VERBOSE("DML::%s: mvmpg_per_row_ub = %u\n", __func__, mvmpg_per_row_ub);
+ DML_LOG_VERBOSE("DML::%s: meta_row_width_ub = %u\n", __func__, *p->meta_row_width_ub);
+ DML_LOG_VERBOSE("DML::%s: mvmpg_width = %u\n", __func__, *p->mvmpg_width);
+ DML_LOG_VERBOSE("DML::%s: mvmpg_height = %u\n", __func__, *p->mvmpg_height);
+ DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_nom_overhead_factor);
+ DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_pref_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_pref_overhead_factor);
+#endif
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: mcache_row_bytes = %u\n", __func__, *p->mcache_row_bytes);
+ DML_LOG_VERBOSE("DML::%s: mcache_row_bytes_per_channel = %u\n", __func__, *p->mcache_row_bytes_per_channel);
+ DML_LOG_VERBOSE("DML::%s: num_mcaches = %u\n", __func__, *p->num_mcaches);
+#endif
+ DML_ASSERT(*p->num_mcaches > 0);
+}
+
+static void calculate_mcache_setting(
+ struct dml2_core_internal_scratch *scratch,
+ struct dml2_core_calcs_calculate_mcache_setting_params *p)
+{
+ unsigned int n;
+
+ struct dml2_core_shared_calculate_mcache_setting_locals *l = &scratch->calculate_mcache_setting_locals;
+ memset(l, 0, sizeof(struct dml2_core_shared_calculate_mcache_setting_locals));
+
+ *p->num_mcaches_l = 0;
+ *p->mcache_row_bytes_l = 0;
+ *p->mcache_row_bytes_per_channel_l = 0;
+ *p->dcc_dram_bw_nom_overhead_factor_l = 1.0;
+ *p->dcc_dram_bw_pref_overhead_factor_l = 1.0;
+
+ *p->num_mcaches_c = 0;
+ *p->mcache_row_bytes_c = 0;
+ *p->mcache_row_bytes_per_channel_c = 0;
+ *p->dcc_dram_bw_nom_overhead_factor_c = 1.0;
+ *p->dcc_dram_bw_pref_overhead_factor_c = 1.0;
+
+ *p->mall_comb_mcache_l = 0;
+ *p->mall_comb_mcache_c = 0;
+ *p->lc_comb_mcache = 0;
+
+ if (!p->dcc_enable)
+ return;
+
+ l->is_dual_plane = dml_is_420(p->source_format) || p->source_format == dml2_rgbe_alpha;
+
+ l->l_p.num_chans = p->num_chans;
+ l->l_p.mem_word_bytes = p->mem_word_bytes;
+ l->l_p.mcache_size_bytes = p->mcache_size_bytes;
+ l->l_p.mcache_line_size_bytes = p->mcache_line_size_bytes;
+ l->l_p.gpuvm_enable = p->gpuvm_enable;
+ l->l_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes;
+ l->l_p.surf_vert = p->surf_vert;
+ l->l_p.vp_stationary = p->vp_stationary;
+ l->l_p.tiling_mode = p->tiling_mode;
+ l->l_p.vp_start_x = p->vp_start_x_l;
+ l->l_p.vp_start_y = p->vp_start_y_l;
+ l->l_p.full_vp_width = p->full_vp_width_l;
+ l->l_p.full_vp_height = p->full_vp_height_l;
+ l->l_p.blk_width = p->blk_width_l;
+ l->l_p.blk_height = p->blk_height_l;
+ l->l_p.vmpg_width = p->vmpg_width_l;
+ l->l_p.vmpg_height = p->vmpg_height_l;
+ l->l_p.full_swath_bytes = p->full_swath_bytes_l;
+ l->l_p.bytes_per_pixel = p->bytes_per_pixel_l;
+
+ // output
+ l->l_p.num_mcaches = p->num_mcaches_l;
+ l->l_p.mcache_row_bytes = p->mcache_row_bytes_l;
+ l->l_p.mcache_row_bytes_per_channel = p->mcache_row_bytes_per_channel_l;
+ l->l_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_l;
+ l->l_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_l;
+ l->l_p.mvmpg_width = &l->mvmpg_width_l;
+ l->l_p.mvmpg_height = &l->mvmpg_height_l;
+ l->l_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_l;
+ l->l_p.meta_row_width_ub = &l->meta_row_width_l;
+ l->l_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_l;
+
+ calculate_mcache_row_bytes(scratch, &l->l_p);
+ DML_ASSERT(*p->num_mcaches_l > 0);
+
+ if (l->is_dual_plane) {
+ l->c_p.num_chans = p->num_chans;
+ l->c_p.mem_word_bytes = p->mem_word_bytes;
+ l->c_p.mcache_size_bytes = p->mcache_size_bytes;
+ l->c_p.mcache_line_size_bytes = p->mcache_line_size_bytes;
+ l->c_p.gpuvm_enable = p->gpuvm_enable;
+ l->c_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes;
+ l->c_p.surf_vert = p->surf_vert;
+ l->c_p.vp_stationary = p->vp_stationary;
+ l->c_p.tiling_mode = p->tiling_mode;
+ l->c_p.vp_start_x = p->vp_start_x_c;
+ l->c_p.vp_start_y = p->vp_start_y_c;
+ l->c_p.full_vp_width = p->full_vp_width_c;
+ l->c_p.full_vp_height = p->full_vp_height_c;
+ l->c_p.blk_width = p->blk_width_c;
+ l->c_p.blk_height = p->blk_height_c;
+ l->c_p.vmpg_width = p->vmpg_width_c;
+ l->c_p.vmpg_height = p->vmpg_height_c;
+ l->c_p.full_swath_bytes = p->full_swath_bytes_c;
+ l->c_p.bytes_per_pixel = p->bytes_per_pixel_c;
+
+ // output
+ l->c_p.num_mcaches = p->num_mcaches_c;
+ l->c_p.mcache_row_bytes = p->mcache_row_bytes_c;
+ l->c_p.mcache_row_bytes_per_channel = p->mcache_row_bytes_per_channel_c;
+ l->c_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_c;
+ l->c_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_c;
+ l->c_p.mvmpg_width = &l->mvmpg_width_c;
+ l->c_p.mvmpg_height = &l->mvmpg_height_c;
+ l->c_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_c;
+ l->c_p.meta_row_width_ub = &l->meta_row_width_c;
+ l->c_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_c;
+
+ calculate_mcache_row_bytes(scratch, &l->c_p);
+ DML_ASSERT(*p->num_mcaches_c > 0);
+ }
+
+ // Sharing for iMALL access
+ l->mcache_remainder_l = *p->mcache_row_bytes_per_channel_l % p->mcache_size_bytes;
+ l->mcache_remainder_c = *p->mcache_row_bytes_per_channel_c % p->mcache_size_bytes;
+ l->mvmpg_access_width_l = p->surf_vert ? l->mvmpg_height_l : l->mvmpg_width_l;
+ l->mvmpg_access_width_c = p->surf_vert ? l->mvmpg_height_c : l->mvmpg_width_c;
+
+ if (p->imall_enable) {
+ *p->mall_comb_mcache_l = (2 * l->mcache_remainder_l <= p->mcache_size_bytes);
+
+ if (l->is_dual_plane)
+ *p->mall_comb_mcache_c = (2 * l->mcache_remainder_c <= p->mcache_size_bytes);
+ }
+
+ if (!p->surf_vert) // horizonatal access
+ l->luma_time_factor = (double)l->mvmpg_height_c / l->mvmpg_height_l * 2;
+ else // vertical access
+ l->luma_time_factor = (double)l->mvmpg_width_c / l->mvmpg_width_l * 2;
+
+ // The algorithm starts with computing a non-integer, avg_mcache_element_size_l/c:
+ if (*p->num_mcaches_l) {
+ l->avg_mcache_element_size_l = l->meta_row_width_l / *p->num_mcaches_l;
+ }
+ if (l->is_dual_plane) {
+ l->avg_mcache_element_size_c = l->meta_row_width_c / *p->num_mcaches_c;
+
+ /* if either remainder is 0, then mcache sharing is not needed or not possible due to full utilization */
+ if (l->mcache_remainder_l && l->mcache_remainder_c) {
+ if (!p->imall_enable || (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c)) {
+ l->lc_comb_last_mcache_size = (unsigned int)((l->mcache_remainder_l * (*p->mall_comb_mcache_l ? 2 : 1) * l->luma_time_factor) +
+ (l->mcache_remainder_c * (*p->mall_comb_mcache_c ? 2 : 1)));
+ }
+ *p->lc_comb_mcache = (l->lc_comb_last_mcache_size <= p->mcache_size_bytes) && (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c);
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: imall_enable = %u\n", __func__, p->imall_enable);
+ DML_LOG_VERBOSE("DML::%s: is_dual_plane = %u\n", __func__, l->is_dual_plane);
+ DML_LOG_VERBOSE("DML::%s: surf_vert = %u\n", __func__, p->surf_vert);
+ DML_LOG_VERBOSE("DML::%s: mvmpg_width_l = %u\n", __func__, l->mvmpg_width_l);
+ DML_LOG_VERBOSE("DML::%s: mvmpg_height_l = %u\n", __func__, l->mvmpg_height_l);
+ DML_LOG_VERBOSE("DML::%s: mcache_remainder_l = %f\n", __func__, l->mcache_remainder_l);
+ DML_LOG_VERBOSE("DML::%s: num_mcaches_l = %u\n", __func__, *p->num_mcaches_l);
+ DML_LOG_VERBOSE("DML::%s: avg_mcache_element_size_l = %u\n", __func__, l->avg_mcache_element_size_l);
+ DML_LOG_VERBOSE("DML::%s: mvmpg_access_width_l = %u\n", __func__, l->mvmpg_access_width_l);
+ DML_LOG_VERBOSE("DML::%s: mall_comb_mcache_l = %u\n", __func__, *p->mall_comb_mcache_l);
+
+ if (l->is_dual_plane) {
+ DML_LOG_VERBOSE("DML::%s: mvmpg_width_c = %u\n", __func__, l->mvmpg_width_c);
+ DML_LOG_VERBOSE("DML::%s: mvmpg_height_c = %u\n", __func__, l->mvmpg_height_c);
+ DML_LOG_VERBOSE("DML::%s: mcache_remainder_c = %f\n", __func__, l->mcache_remainder_c);
+ DML_LOG_VERBOSE("DML::%s: luma_time_factor = %f\n", __func__, l->luma_time_factor);
+ DML_LOG_VERBOSE("DML::%s: num_mcaches_c = %u\n", __func__, *p->num_mcaches_c);
+ DML_LOG_VERBOSE("DML::%s: avg_mcache_element_size_c = %u\n", __func__, l->avg_mcache_element_size_c);
+ DML_LOG_VERBOSE("DML::%s: mvmpg_access_width_c = %u\n", __func__, l->mvmpg_access_width_c);
+ DML_LOG_VERBOSE("DML::%s: mall_comb_mcache_c = %u\n", __func__, *p->mall_comb_mcache_c);
+ DML_LOG_VERBOSE("DML::%s: lc_comb_last_mcache_size = %u\n", __func__, l->lc_comb_last_mcache_size);
+ DML_LOG_VERBOSE("DML::%s: lc_comb_mcache = %u\n", __func__, *p->lc_comb_mcache);
+ }
+#endif
+ // calculate split_coordinate
+ l->full_vp_access_width_l = p->surf_vert ? p->full_vp_height_l : p->full_vp_width_l;
+ l->full_vp_access_width_c = p->surf_vert ? p->full_vp_height_c : p->full_vp_width_c;
+
+ for (n = 0; n < *p->num_mcaches_l - 1; n++) {
+ p->mcache_offsets_l[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_l / l->mvmpg_access_width_l, 1)) * l->mvmpg_access_width_l;
+ }
+ p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l;
+
+ if (l->is_dual_plane) {
+ for (n = 0; n < *p->num_mcaches_c - 1; n++) {
+ p->mcache_offsets_c[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_c / l->mvmpg_access_width_c, 1)) * l->mvmpg_access_width_c;
+ }
+ p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c;
+ }
+#ifdef __DML_VBA_DEBUG__
+ for (n = 0; n < *p->num_mcaches_l; n++)
+ DML_LOG_VERBOSE("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]);
+
+ if (l->is_dual_plane) {
+ for (n = 0; n < *p->num_mcaches_c; n++)
+ DML_LOG_VERBOSE("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]);
+ }
+#endif
+
+ // Luma/Chroma combine in the last mcache
+ // In the case of Luma/Chroma combine-mCache (with lc_comb_mcache==1), all mCaches except the last segment are filled as much as possible, when stay aligned to mvmpg boundary
+ if (*p->lc_comb_mcache && l->is_dual_plane) {
+ for (n = 0; n < *p->num_mcaches_l - 1; n++)
+ p->mcache_offsets_l[n] = (n + 1) * l->mvmpg_per_mcache_lb_l * l->mvmpg_access_width_l;
+ p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l;
+
+ for (n = 0; n < *p->num_mcaches_c - 1; n++)
+ p->mcache_offsets_c[n] = (n + 1) * l->mvmpg_per_mcache_lb_c * l->mvmpg_access_width_c;
+ p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c;
+
+#ifdef __DML_VBA_DEBUG__
+ for (n = 0; n < *p->num_mcaches_l; n++)
+ DML_LOG_VERBOSE("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]);
+
+ for (n = 0; n < *p->num_mcaches_c; n++)
+ DML_LOG_VERBOSE("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]);
+#endif
+ }
+
+ *p->mcache_shift_granularity_l = l->mvmpg_access_width_l;
+ *p->mcache_shift_granularity_c = l->mvmpg_access_width_c;
+}
+
+static void calculate_mall_bw_overhead_factor(
+ double mall_prefetch_sdp_overhead_factor[], //mall_sdp_oh_nom/pref
+ double mall_prefetch_dram_overhead_factor[], //mall_dram_oh_nom/pref
+
+ // input
+ const struct dml2_display_cfg *display_cfg,
+ unsigned int num_active_planes)
+{
+ for (unsigned int k = 0; k < num_active_planes; ++k) {
+ mall_prefetch_sdp_overhead_factor[k] = 1.0;
+ mall_prefetch_dram_overhead_factor[k] = 1.0;
+
+ // SDP - on the return side
+ if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall) // always no data return
+ mall_prefetch_sdp_overhead_factor[k] = 1.25;
+ else if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return)
+ mall_prefetch_sdp_overhead_factor[k] = 0.25;
+
+ // DRAM
+ if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall)
+ mall_prefetch_dram_overhead_factor[k] = 2.0;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, mall_prefetch_sdp_overhead_factor = %f\n", __func__, k, mall_prefetch_sdp_overhead_factor[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, mall_prefetch_dram_overhead_factor = %f\n", __func__, k, mall_prefetch_dram_overhead_factor[k]);
+#endif
+ }
+}
+
+static double dml_get_return_bandwidth_available(
+ const struct dml2_soc_bb *soc,
+ enum dml2_core_internal_soc_state_type state_type,
+ enum dml2_core_internal_bw_type bw_type,
+ bool is_avg_bw,
+ bool is_hvm_en,
+ bool is_hvm_only,
+ double dcfclk_mhz,
+ double fclk_mhz,
+ double dram_bw_mbps)
+{
+ double return_bw_mbps = 0.;
+ double ideal_sdp_bandwidth = (double)soc->return_bus_width_bytes * dcfclk_mhz;
+ double ideal_fabric_bandwidth = fclk_mhz * (double)soc->fabric_datapath_to_dcn_data_return_bytes;
+ double ideal_dram_bandwidth = dram_bw_mbps; //dram_speed_mts * soc->clk_table.dram_config.channel_count * soc->clk_table.dram_config.channel_width_bytes;
+
+ double derate_sdp_factor;
+ double derate_fabric_factor;
+ double derate_dram_factor;
+
+ double derate_sdp_bandwidth;
+ double derate_fabric_bandwidth;
+ double derate_dram_bandwidth;
+
+ if (is_avg_bw) {
+ if (state_type == dml2_core_internal_soc_state_svp_prefetch) {
+ derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dcfclk_derate_percent / 100.0;
+ derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.fclk_derate_percent / 100.0;
+ derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dram_derate_percent_pixel / 100.0;
+ } else { // just assume sys_active
+ derate_sdp_factor = soc->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent / 100.0;
+ derate_fabric_factor = soc->qos_parameters.derate_table.system_active_average.fclk_derate_percent / 100.0;
+ derate_dram_factor = soc->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel / 100.0;
+ }
+ } else { // urgent bw
+ if (state_type == dml2_core_internal_soc_state_svp_prefetch) {
+ derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dcfclk_derate_percent / 100.0;
+ derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.fclk_derate_percent / 100.0;
+ derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0;
+
+ if (is_hvm_en) {
+ if (is_hvm_only)
+ derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_vm / 100.0;
+ else
+ derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel_and_vm / 100.0;
+ } else {
+ derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0;
+ }
+ } else { // just assume sys_active
+ derate_sdp_factor = soc->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0;
+ derate_fabric_factor = soc->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100.0;
+
+ if (is_hvm_en) {
+ if (is_hvm_only)
+ derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_vm / 100.0;
+ else
+ derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel_and_vm / 100.0;
+ } else {
+ derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100.0;
+ }
+ }
+ }
+
+ derate_sdp_bandwidth = ideal_sdp_bandwidth * derate_sdp_factor;
+ derate_fabric_bandwidth = ideal_fabric_bandwidth * derate_fabric_factor;
+ derate_dram_bandwidth = ideal_dram_bandwidth * derate_dram_factor;
+
+ if (bw_type == dml2_core_internal_bw_sdp)
+ return_bw_mbps = math_min2(derate_sdp_bandwidth, derate_fabric_bandwidth);
+ else // dml2_core_internal_bw_dram
+ return_bw_mbps = derate_dram_bandwidth;
+
+ DML_LOG_VERBOSE("DML::%s: is_avg_bw = %u\n", __func__, is_avg_bw);
+ DML_LOG_VERBOSE("DML::%s: is_hvm_en = %u\n", __func__, is_hvm_en);
+ DML_LOG_VERBOSE("DML::%s: is_hvm_only = %u\n", __func__, is_hvm_only);
+ DML_LOG_VERBOSE("DML::%s: state_type = %s\n", __func__, dml2_core_internal_soc_state_type_str(state_type));
+ DML_LOG_VERBOSE("DML::%s: bw_type = %s\n", __func__, dml2_core_internal_bw_type_str(bw_type));
+ DML_LOG_VERBOSE("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz);
+ DML_LOG_VERBOSE("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz);
+ DML_LOG_VERBOSE("DML::%s: ideal_sdp_bandwidth = %f\n", __func__, ideal_sdp_bandwidth);
+ DML_LOG_VERBOSE("DML::%s: ideal_fabric_bandwidth = %f\n", __func__, ideal_fabric_bandwidth);
+ DML_LOG_VERBOSE("DML::%s: ideal_dram_bandwidth = %f\n", __func__, ideal_dram_bandwidth);
+ DML_LOG_VERBOSE("DML::%s: derate_sdp_bandwidth = %f (derate %f)\n", __func__, derate_sdp_bandwidth, derate_sdp_factor);
+ DML_LOG_VERBOSE("DML::%s: derate_fabric_bandwidth = %f (derate %f)\n", __func__, derate_fabric_bandwidth, derate_fabric_factor);
+ DML_LOG_VERBOSE("DML::%s: derate_dram_bandwidth = %f (derate %f)\n", __func__, derate_dram_bandwidth, derate_dram_factor);
+ DML_LOG_VERBOSE("DML::%s: return_bw_mbps = %f\n", __func__, return_bw_mbps);
+ return return_bw_mbps;
+}
+
+static noinline_for_stack void calculate_bandwidth_available(
+ double avg_bandwidth_available_min[dml2_core_internal_soc_state_max],
+ double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
+ double urg_bandwidth_available_min[dml2_core_internal_soc_state_max], // min between SDP and DRAM
+ double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
+ double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max],
+ double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max],
+
+ const struct dml2_soc_bb *soc,
+ bool HostVMEnable,
+ double dcfclk_mhz,
+ double fclk_mhz,
+ double dram_bw_mbps)
+{
+ unsigned int n, m;
+
+ DML_LOG_VERBOSE("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz);
+ DML_LOG_VERBOSE("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz);
+ DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, dram_bw_mbps);
+
+ // Calculate all the bandwidth availabe
+ for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
+ for (n = 0; n < dml2_core_internal_bw_max; n++) {
+ avg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc,
+ m, // soc_state
+ n, // bw_type
+ 1, // avg_bw
+ HostVMEnable,
+ 0, // hvm_only
+ dcfclk_mhz,
+ fclk_mhz,
+ dram_bw_mbps);
+
+ urg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps);
+
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: avg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), avg_bandwidth_available[m][n]);
+ DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_bandwidth_available[m][n]);
+#endif
+
+ // urg_bandwidth_available_vm_only is indexed by soc_state
+ if (n == dml2_core_internal_bw_dram) {
+ urg_bandwidth_available_vm_only[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 1, dcfclk_mhz, fclk_mhz, dram_bw_mbps);
+ urg_bandwidth_available_pixel_and_vm[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps);
+ }
+ }
+
+ avg_bandwidth_available_min[m] = math_min2(avg_bandwidth_available[m][dml2_core_internal_bw_dram], avg_bandwidth_available[m][dml2_core_internal_bw_sdp]);
+ urg_bandwidth_available_min[m] = math_min2(urg_bandwidth_available[m][dml2_core_internal_bw_dram], urg_bandwidth_available[m][dml2_core_internal_bw_sdp]);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: avg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), avg_bandwidth_available_min[m]);
+ DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_min[m]);
+ DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_vm_only[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_vm_only[n]);
+#endif
+ }
+}
+
+static void calculate_avg_bandwidth_required(
+ double avg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
+
+ // input
+ const struct dml2_display_cfg *display_cfg,
+ unsigned int num_active_planes,
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double cursor_bw[],
+ double dcc_dram_bw_nom_overhead_factor_p0[],
+ double dcc_dram_bw_nom_overhead_factor_p1[],
+ double mall_prefetch_dram_overhead_factor[],
+ double mall_prefetch_sdp_overhead_factor[])
+{
+ unsigned int n, m, k;
+ double sdp_overhead_factor;
+ double dram_overhead_factor_p0;
+ double dram_overhead_factor_p1;
+
+ // Average BW support check
+ for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
+ for (n = 0; n < dml2_core_internal_bw_max; n++) { // sdp, dram
+ avg_bandwidth_required[m][n] = 0;
+ }
+ }
+
+ // SysActive and SVP Prefetch AVG bandwidth Check
+ for (k = 0; k < num_active_planes; ++k) {
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: plane %0d\n", __func__, k);
+ DML_LOG_VERBOSE("DML::%s: ReadBandwidthLuma=%f\n", __func__, ReadBandwidthLuma[k]);
+ DML_LOG_VERBOSE("DML::%s: ReadBandwidthChroma=%f\n", __func__, ReadBandwidthChroma[k]);
+ DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor_p0=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p0[k]);
+ DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor_p1=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p1[k]);
+ DML_LOG_VERBOSE("DML::%s: mall_prefetch_dram_overhead_factor=%f\n", __func__, mall_prefetch_dram_overhead_factor[k]);
+ DML_LOG_VERBOSE("DML::%s: mall_prefetch_sdp_overhead_factor=%f\n", __func__, mall_prefetch_sdp_overhead_factor[k]);
+#endif
+
+ sdp_overhead_factor = mall_prefetch_sdp_overhead_factor[k];
+ dram_overhead_factor_p0 = dcc_dram_bw_nom_overhead_factor_p0[k] * mall_prefetch_dram_overhead_factor[k];
+ dram_overhead_factor_p1 = dcc_dram_bw_nom_overhead_factor_p1[k] * mall_prefetch_dram_overhead_factor[k];
+
+ // FIXME_DCN4, was missing cursor_bw in here, but do I actually need that and tdlut bw for average bandwidth calculation?
+ // active avg bw not include phantom, but svp_prefetch avg bw should include phantom pipes
+ if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
+ avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k];
+ avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k];
+ }
+ avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k];
+ avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k];
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
+ DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
+ DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
+ DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
+#endif
+ }
+}
+
+static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch,
+ struct dml2_core_calcs_CalculateVMRowAndSwath_params *p)
+{
+ struct dml2_core_calcs_CalculateVMRowAndSwath_locals *s = &scratch->CalculateVMRowAndSwath_locals;
+
+ s->HostVMDynamicLevels = CalculateHostVMDynamicLevels(p->display_cfg->gpuvm_enable, p->display_cfg->hostvm_enable, p->HostVMMinPageSize, p->display_cfg->hostvm_max_non_cached_page_table_levels);
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (p->display_cfg->gpuvm_enable == true) {
+ p->vm_group_bytes[k] = 512;
+ p->dpte_group_bytes[k] = 512;
+ } else {
+ p->vm_group_bytes[k] = 0;
+ p->dpte_group_bytes[k] = 0;
+ }
+
+ if (dml_is_420(p->myPipe[k].SourcePixelFormat) || p->myPipe[k].SourcePixelFormat == dml2_rgbe_alpha) {
+ if ((p->myPipe[k].SourcePixelFormat == dml2_420_10 || p->myPipe[k].SourcePixelFormat == dml2_420_12) && !dml_is_vertical_rotation(p->myPipe[k].RotationAngle)) {
+ s->PTEBufferSizeInRequestsForLuma[k] = (p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma) / 2;
+ s->PTEBufferSizeInRequestsForChroma[k] = s->PTEBufferSizeInRequestsForLuma[k];
+ } else {
+ s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma;
+ s->PTEBufferSizeInRequestsForChroma[k] = p->PTEBufferSizeInRequestsChroma;
+ }
+
+ scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary;
+ scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable;
+ scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface;
+ scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesC;
+ scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesC;
+ scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat;
+ scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling;
+ scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelC;
+ scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle;
+ scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthC[k];
+ scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeightC;
+ scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStartC;
+ scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStartC;
+ scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable;
+ scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels;
+ scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
+ scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForChroma[k];
+ scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchC;
+ scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthC;
+ scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightC;
+ scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]);
+ scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchC;
+ scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present;
+
+ scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowC[k];
+ scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageC[k];
+ scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_chroma_ub[k];
+ scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_chroma[k];
+ scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_chroma[k];
+ scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowC_one_row_per_frame[k];
+ scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_chroma_ub_one_row_per_frame[k];
+ scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_chroma_one_row_per_frame[k];
+ scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_c[k];
+ scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_c[k];
+ scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthC[k];
+ scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightC[k];
+ scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeC[k];
+ scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_c[k];
+
+ scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_c[k];
+ scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_chroma[k];
+ scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_chroma[k];
+ scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_chroma[k];
+ scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_chroma[k];
+ scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_c[k];
+
+ s->vm_bytes_c = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params);
+
+ p->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
+ p->myPipe[k].VRatioChroma,
+ p->myPipe[k].VTapsChroma,
+ p->myPipe[k].InterlaceEnable,
+ p->myPipe[k].ProgressiveToInterlaceUnitInOPP,
+ p->myPipe[k].SwathHeightC,
+ p->myPipe[k].RotationAngle,
+ p->myPipe[k].mirrored,
+ p->myPipe[k].ViewportStationary,
+ p->SwathWidthC[k],
+ p->myPipe[k].ViewportHeightC,
+ p->myPipe[k].ViewportXStartC,
+ p->myPipe[k].ViewportYStartC,
+
+ // Output
+ &p->VInitPreFillC[k],
+ &p->MaxNumSwathC[k]);
+ } else {
+ s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma;
+ s->PTEBufferSizeInRequestsForChroma[k] = 0;
+ s->PixelPTEBytesPerRowC[k] = 0;
+ s->PixelPTEBytesPerRowStorageC[k] = 0;
+ s->vm_bytes_c = 0;
+ p->MaxNumSwathC[k] = 0;
+ p->PrefetchSourceLinesC[k] = 0;
+ s->dpte_row_height_chroma_one_row_per_frame[k] = 0;
+ s->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
+ s->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
+ }
+
+ scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary;
+ scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable;
+ scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface;
+ scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesY;
+ scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesY;
+ scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat;
+ scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling;
+ scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelY;
+ scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle;
+ scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthY[k];
+ scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeight;
+ scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStart;
+ scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStart;
+ scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable;
+ scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels;
+ scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
+ scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForLuma[k];
+ scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchY;
+ scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthY;
+ scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightY;
+ scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]);
+ scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchY;
+ scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present;
+
+ scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowY[k];
+ scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageY[k];
+ scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_luma_ub[k];
+ scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_luma[k];
+ scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_luma[k];
+ scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowY_one_row_per_frame[k];
+ scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_luma_ub_one_row_per_frame[k];
+ scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_luma_one_row_per_frame[k];
+ scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_y[k];
+ scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_y[k];
+ scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthY[k];
+ scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightY[k];
+ scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeY[k];
+ scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_l[k];
+
+ scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_l[k];
+ scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_luma[k];
+ scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_luma[k];
+ scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_luma[k];
+ scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_luma[k];
+ scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_l[k];
+
+ s->vm_bytes_l = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params);
+
+ p->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
+ p->myPipe[k].VRatio,
+ p->myPipe[k].VTaps,
+ p->myPipe[k].InterlaceEnable,
+ p->myPipe[k].ProgressiveToInterlaceUnitInOPP,
+ p->myPipe[k].SwathHeightY,
+ p->myPipe[k].RotationAngle,
+ p->myPipe[k].mirrored,
+ p->myPipe[k].ViewportStationary,
+ p->SwathWidthY[k],
+ p->myPipe[k].ViewportHeight,
+ p->myPipe[k].ViewportXStart,
+ p->myPipe[k].ViewportYStart,
+
+ // Output
+ &p->VInitPreFillY[k],
+ &p->MaxNumSwathY[k]);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes_l = %u (before hvm level)\n", __func__, k, s->vm_bytes_l);
+ DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes_c = %u (before hvm level)\n", __func__, k, s->vm_bytes_c);
+ DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes_per_row_ub_l = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_l[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes_per_row_ub_c = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_c[k]);
+#endif
+ p->vm_bytes[k] = (s->vm_bytes_l + s->vm_bytes_c) * (1 + 8 * s->HostVMDynamicLevels);
+ p->meta_row_bytes[k] = s->meta_row_bytes_per_row_ub_l[k] + s->meta_row_bytes_per_row_ub_c[k];
+ p->meta_row_bytes_per_row_ub_l[k] = s->meta_row_bytes_per_row_ub_l[k];
+ p->meta_row_bytes_per_row_ub_c[k] = s->meta_row_bytes_per_row_ub_c[k];
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes = %u\n", __func__, k, p->meta_row_bytes[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes = %u (after hvm level)\n", __func__, k, p->vm_bytes[k]);
+#endif
+ if (s->PixelPTEBytesPerRowStorageY[k] <= 64 * s->PTEBufferSizeInRequestsForLuma[k] && s->PixelPTEBytesPerRowStorageC[k] <= 64 * s->PTEBufferSizeInRequestsForChroma[k]) {
+ p->PTEBufferSizeNotExceeded[k] = true;
+ } else {
+ p->PTEBufferSizeNotExceeded[k] = false;
+ }
+
+ s->one_row_per_frame_fits_in_buffer[k] = (s->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForLuma[k] &&
+ s->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForChroma[k]);
+#ifdef __DML_VBA_DEBUG__
+ if (p->PTEBufferSizeNotExceeded[k] == 0 || s->one_row_per_frame_fits_in_buffer[k] == 0) {
+ DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded (not one_row_per_frame) = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
+
+ DML_LOG_VERBOSE("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]);
+ }
+#endif
+ }
+
+ CalculateMALLUseForStaticScreen(
+ p->display_cfg,
+ p->NumberOfActiveSurfaces,
+ p->MALLAllocatedForDCN,
+ p->SurfaceSizeInMALL,
+ s->one_row_per_frame_fits_in_buffer,
+ // Output
+ p->is_using_mall_for_ss);
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (p->display_cfg->gpuvm_enable) {
+ if (p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.enable == 1) {
+ p->PTE_BUFFER_MODE[k] = p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.value;
+ }
+ p->PTE_BUFFER_MODE[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) ||
+ dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64);
+ p->BIGK_FRAGMENT_SIZE[k] = (unsigned int)(math_log((float)p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes * 1024, 2) - 12);
+ } else {
+ p->PTE_BUFFER_MODE[k] = 0;
+ p->BIGK_FRAGMENT_SIZE[k] = 0;
+ }
+ }
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ p->DCCMetaBufferSizeNotExceeded[k] = true;
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, p->is_using_mall_for_ss[k]);
+#endif
+ p->use_one_row_for_frame[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) ||
+ (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64 && dml_is_vertical_rotation(p->myPipe[k].RotationAngle));
+
+ p->use_one_row_for_frame_flip[k] = p->use_one_row_for_frame[k] && !(p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame);
+
+ if (p->use_one_row_for_frame[k]) {
+ p->dpte_row_height_luma[k] = s->dpte_row_height_luma_one_row_per_frame[k];
+ p->dpte_row_width_luma_ub[k] = s->dpte_row_width_luma_ub_one_row_per_frame[k];
+ s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY_one_row_per_frame[k];
+ p->dpte_row_height_chroma[k] = s->dpte_row_height_chroma_one_row_per_frame[k];
+ p->dpte_row_width_chroma_ub[k] = s->dpte_row_width_chroma_ub_one_row_per_frame[k];
+ s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC_one_row_per_frame[k];
+ p->PTEBufferSizeNotExceeded[k] = s->one_row_per_frame_fits_in_buffer[k];
+ }
+
+ if (p->meta_row_bytes[k] <= p->DCCMetaBufferSizeBytes) {
+ p->DCCMetaBufferSizeNotExceeded[k] = true;
+ } else {
+ p->DCCMetaBufferSizeNotExceeded[k] = false;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%d, meta_row_bytes = %d\n", __func__, k, p->meta_row_bytes[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, DCCMetaBufferSizeBytes = %d\n", __func__, k, p->DCCMetaBufferSizeBytes);
+ DML_LOG_VERBOSE("DML::%s: k=%d, DCCMetaBufferSizeNotExceeded = %d\n", __func__, k, p->DCCMetaBufferSizeNotExceeded[k]);
+#endif
+ }
+
+ s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY[k] * (1 + 8 * s->HostVMDynamicLevels);
+ s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC[k] * (1 + 8 * s->HostVMDynamicLevels);
+ p->PixelPTEBytesPerRow[k] = s->PixelPTEBytesPerRowY[k] + s->PixelPTEBytesPerRowC[k];
+ p->dpte_row_bytes_per_row_l[k] = s->PixelPTEBytesPerRowY[k];
+ p->dpte_row_bytes_per_row_c[k] = s->PixelPTEBytesPerRowC[k];
+
+ // if one row of dPTEs is meant to span the entire frame, then for these calculations, we will pretend like that one big row is fetched in two halfs
+ if (p->use_one_row_for_frame[k])
+ p->PixelPTEBytesPerRow[k] = p->PixelPTEBytesPerRow[k] / 2;
+
+ CalculateRowBandwidth(
+ p->display_cfg->gpuvm_enable,
+ p->use_one_row_for_frame[k],
+ p->myPipe[k].SourcePixelFormat,
+ p->myPipe[k].VRatio,
+ p->myPipe[k].VRatioChroma,
+ p->myPipe[k].DCCEnable,
+ p->myPipe[k].HTotal / p->myPipe[k].PixelClock,
+ s->PixelPTEBytesPerRowY[k],
+ s->PixelPTEBytesPerRowC[k],
+ p->dpte_row_height_luma[k],
+ p->dpte_row_height_chroma[k],
+
+ p->mrq_present,
+ p->meta_row_bytes_per_row_ub_l[k],
+ p->meta_row_bytes_per_row_ub_c[k],
+ p->meta_row_height_luma[k],
+ p->meta_row_height_chroma[k],
+
+ // Output
+ &p->dpte_row_bw[k],
+ &p->meta_row_bw[k]);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, gpuvm_enable = %u\n", __func__, k, p->display_cfg->gpuvm_enable);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]);
+#endif
+ }
+}
+
+static double CalculateUrgentLatency(
+ double UrgentLatencyPixelDataOnly,
+ double UrgentLatencyPixelMixedWithVMData,
+ double UrgentLatencyVMDataOnly,
+ bool DoUrgentLatencyAdjustment,
+ double UrgentLatencyAdjustmentFabricClockComponent,
+ double UrgentLatencyAdjustmentFabricClockReference,
+ double FabricClock,
+ double uclk_freq_mhz,
+ enum dml2_qos_param_type qos_type,
+ unsigned int urgent_ramp_uclk_cycles,
+ unsigned int df_qos_response_time_fclk_cycles,
+ unsigned int max_round_trip_to_furthest_cs_fclk_cycles,
+ unsigned int mall_overhead_fclk_cycles,
+ double umc_urgent_ramp_latency_margin,
+ double fabric_max_transport_latency_margin)
+{
+ double urgent_latency = 0;
+ if (qos_type == dml2_qos_param_type_dcn4x) {
+ urgent_latency = (df_qos_response_time_fclk_cycles + mall_overhead_fclk_cycles) / FabricClock
+ + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1 + fabric_max_transport_latency_margin / 100.0)
+ + urgent_ramp_uclk_cycles / uclk_freq_mhz * (1 + umc_urgent_ramp_latency_margin / 100.0);
+ } else {
+ urgent_latency = math_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
+ if (DoUrgentLatencyAdjustment == true) {
+ urgent_latency = urgent_latency + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
+ }
+ }
+#ifdef __DML_VBA_DEBUG__
+ if (qos_type == dml2_qos_param_type_dcn4x) {
+ DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type);
+ DML_LOG_VERBOSE("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles);
+ DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
+ DML_LOG_VERBOSE("DML::%s: umc_urgent_ramp_latency_margin = %f\n", __func__, umc_urgent_ramp_latency_margin);
+ } else {
+ DML_LOG_VERBOSE("DML::%s: UrgentLatencyPixelDataOnly = %f\n", __func__, UrgentLatencyPixelDataOnly);
+ DML_LOG_VERBOSE("DML::%s: UrgentLatencyPixelMixedWithVMData = %f\n", __func__, UrgentLatencyPixelMixedWithVMData);
+ DML_LOG_VERBOSE("DML::%s: UrgentLatencyVMDataOnly = %f\n", __func__, UrgentLatencyVMDataOnly);
+ DML_LOG_VERBOSE("DML::%s: UrgentLatencyAdjustmentFabricClockComponent = %f\n", __func__, UrgentLatencyAdjustmentFabricClockComponent);
+ DML_LOG_VERBOSE("DML::%s: UrgentLatencyAdjustmentFabricClockReference = %f\n", __func__, UrgentLatencyAdjustmentFabricClockReference);
+ }
+ DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, FabricClock);
+ DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, urgent_latency);
+#endif
+ return urgent_latency;
+}
+
+static double CalculateTripToMemory(
+ double UrgLatency,
+ double FabricClock,
+ double uclk_freq_mhz,
+ enum dml2_qos_param_type qos_type,
+ unsigned int trip_to_memory_uclk_cycles,
+ unsigned int max_round_trip_to_furthest_cs_fclk_cycles,
+ unsigned int mall_overhead_fclk_cycles,
+ double umc_max_latency_margin,
+ double fabric_max_transport_latency_margin)
+{
+ double trip_to_memory_us;
+ if (qos_type == dml2_qos_param_type_dcn4x) {
+ trip_to_memory_us = mall_overhead_fclk_cycles / FabricClock
+ + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0)
+ + trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0);
+ } else {
+ trip_to_memory_us = UrgLatency;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ if (qos_type == dml2_qos_param_type_dcn4x) {
+ DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type);
+ DML_LOG_VERBOSE("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles);
+ DML_LOG_VERBOSE("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles);
+ DML_LOG_VERBOSE("DML::%s: trip_to_memory_uclk_cycles = %d\n", __func__, trip_to_memory_uclk_cycles);
+ DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
+ DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, FabricClock);
+ DML_LOG_VERBOSE("DML::%s: fabric_max_transport_latency_margin = %f\n", __func__, fabric_max_transport_latency_margin);
+ DML_LOG_VERBOSE("DML::%s: umc_max_latency_margin = %f\n", __func__, umc_max_latency_margin);
+ } else {
+ DML_LOG_VERBOSE("DML::%s: UrgLatency = %f\n", __func__, UrgLatency);
+ }
+ DML_LOG_VERBOSE("DML::%s: trip_to_memory_us = %f\n", __func__, trip_to_memory_us);
+#endif
+
+
+ return trip_to_memory_us;
+}
+
+static double CalculateMetaTripToMemory(
+ double UrgLatency,
+ double FabricClock,
+ double uclk_freq_mhz,
+ enum dml2_qos_param_type qos_type,
+ unsigned int meta_trip_to_memory_uclk_cycles,
+ unsigned int meta_trip_to_memory_fclk_cycles,
+ double umc_max_latency_margin,
+ double fabric_max_transport_latency_margin)
+{
+ double meta_trip_to_memory_us;
+ if (qos_type == dml2_qos_param_type_dcn4x) {
+ meta_trip_to_memory_us = meta_trip_to_memory_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0)
+ + meta_trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0);
+ } else {
+ meta_trip_to_memory_us = UrgLatency;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ if (qos_type == dml2_qos_param_type_dcn4x) {
+ DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type);
+ DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles);
+ DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles);
+ DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
+ } else {
+ DML_LOG_VERBOSE("DML::%s: UrgLatency = %f\n", __func__, UrgLatency);
+ }
+ DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_us = %f\n", __func__, meta_trip_to_memory_us);
+#endif
+
+
+ return meta_trip_to_memory_us;
+}
+
+static void calculate_cursor_req_attributes(
+ unsigned int cursor_width,
+ unsigned int cursor_bpp,
+
+ // output
+ unsigned int *cursor_lines_per_chunk,
+ unsigned int *cursor_bytes_per_line,
+ unsigned int *cursor_bytes_per_chunk,
+ unsigned int *cursor_bytes)
+{
+ unsigned int cursor_bytes_per_req = 0;
+ unsigned int cursor_width_bytes = 0;
+ unsigned int cursor_height = 0;
+
+ //SW determines the cursor pitch to support the maximum cursor_width that will be used but the following restrictions apply.
+ //- For 2bpp, cursor_pitch = 256 pixels due to min cursor request size of 64B
+ //- For 32 or 64 bpp, cursor_pitch = 64, 128 or 256 pixels depending on the cursor width
+
+ //The cursor requestor uses a cursor request size of 64B, 128B, or 256B depending on the cursor_width and cursor_bpp as follows.
+
+ cursor_width_bytes = (unsigned int)math_ceil2((double)cursor_width * cursor_bpp / 8, 1);
+ if (cursor_width_bytes <= 64)
+ cursor_bytes_per_req = 64;
+ else if (cursor_width_bytes <= 128)
+ cursor_bytes_per_req = 128;
+ else
+ cursor_bytes_per_req = 256;
+
+ //If cursor_width_bytes is greater than 256B, then multiple 256B requests are issued to fetch the entire cursor line.
+ *cursor_bytes_per_line = (unsigned int)math_ceil2((double)cursor_width_bytes, cursor_bytes_per_req);
+
+ //Nominally, the cursor chunk is 1KB or 2KB but it is restricted to a power of 2 number of lines with a maximum of 16 lines.
+ if (cursor_bpp == 2) {
+ *cursor_lines_per_chunk = 16;
+ } else if (cursor_bpp == 32) {
+ if (cursor_width <= 32)
+ *cursor_lines_per_chunk = 16;
+ else if (cursor_width <= 64)
+ *cursor_lines_per_chunk = 8;
+ else if (cursor_width <= 128)
+ *cursor_lines_per_chunk = 4;
+ else
+ *cursor_lines_per_chunk = 2;
+ } else if (cursor_bpp == 64) {
+ if (cursor_width <= 16)
+ *cursor_lines_per_chunk = 16;
+ else if (cursor_width <= 32)
+ *cursor_lines_per_chunk = 8;
+ else if (cursor_width <= 64)
+ *cursor_lines_per_chunk = 4;
+ else if (cursor_width <= 128)
+ *cursor_lines_per_chunk = 2;
+ else
+ *cursor_lines_per_chunk = 1;
+ } else {
+ if (cursor_width > 0) {
+ DML_LOG_VERBOSE("DML::%s: Invalid cursor_bpp = %d\n", __func__, cursor_bpp);
+ DML_ASSERT(0);
+ }
+ }
+
+ *cursor_bytes_per_chunk = *cursor_bytes_per_line * *cursor_lines_per_chunk;
+
+ // For the cursor implementation, all requested data is stored in the return buffer. Given this fact, the cursor_bytes can be directly compared with the CursorBufferSize.
+ // Only cursor_width is provided for worst case sizing so assume that the cursor is square
+ cursor_height = cursor_width;
+ *cursor_bytes = *cursor_bytes_per_line * cursor_height;
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: cursor_bpp = %d\n", __func__, cursor_bpp);
+ DML_LOG_VERBOSE("DML::%s: cursor_width = %d\n", __func__, cursor_width);
+ DML_LOG_VERBOSE("DML::%s: cursor_width_bytes = %d\n", __func__, cursor_width_bytes);
+ DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_req = %d\n", __func__, cursor_bytes_per_req);
+ DML_LOG_VERBOSE("DML::%s: cursor_lines_per_chunk = %d\n", __func__, *cursor_lines_per_chunk);
+ DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_line = %d\n", __func__, *cursor_bytes_per_line);
+ DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, *cursor_bytes_per_chunk);
+ DML_LOG_VERBOSE("DML::%s: cursor_bytes = %d\n", __func__, *cursor_bytes);
+ DML_LOG_VERBOSE("DML::%s: cursor_pitch = %d\n", __func__, cursor_bpp == 2 ? 256 : (unsigned int)1 << (unsigned int)math_ceil2(math_log((float)cursor_width, 2), 1));
+#endif
+}
+
+static void calculate_cursor_urgent_burst_factor(
+ unsigned int CursorBufferSize,
+ unsigned int CursorWidth,
+ unsigned int cursor_bytes_per_chunk,
+ unsigned int cursor_lines_per_chunk,
+ double LineTime,
+ double UrgentLatency,
+
+ double *UrgentBurstFactorCursor,
+ bool *NotEnoughUrgentLatencyHiding)
+{
+ unsigned int LinesInCursorBuffer = 0;
+ double CursorBufferSizeInTime = 0;
+
+ if (CursorWidth > 0) {
+ LinesInCursorBuffer = (unsigned int)math_floor2(CursorBufferSize * 1024.0 / (double)cursor_bytes_per_chunk, 1) * cursor_lines_per_chunk;
+
+ CursorBufferSizeInTime = LinesInCursorBuffer * LineTime;
+ if (CursorBufferSizeInTime - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorCursor = 1;
+ } else {
+ *NotEnoughUrgentLatencyHiding = 0;
+ *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: LinesInCursorBuffer = %u\n", __func__, LinesInCursorBuffer);
+ DML_LOG_VERBOSE("DML::%s: CursorBufferSizeInTime = %f\n", __func__, CursorBufferSizeInTime);
+ DML_LOG_VERBOSE("DML::%s: CursorBufferSize = %u (kbytes)\n", __func__, CursorBufferSize);
+ DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %u\n", __func__, cursor_bytes_per_chunk);
+ DML_LOG_VERBOSE("DML::%s: cursor_lines_per_chunk = %u\n", __func__, cursor_lines_per_chunk);
+ DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, *UrgentBurstFactorCursor);
+ DML_LOG_VERBOSE("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding);
+#endif
+
+ }
+}
+
+static void CalculateUrgentBurstFactor(
+ const struct dml2_plane_parameters *plane_cfg,
+ unsigned int swath_width_luma_ub,
+ unsigned int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double LineTime,
+ double UrgentLatency,
+ double VRatio,
+ double VRatioC,
+ double BytePerPixelInDETY,
+ double BytePerPixelInDETC,
+ unsigned int DETBufferSizeY,
+ unsigned int DETBufferSizeC,
+ // Output
+ double *UrgentBurstFactorLuma,
+ double *UrgentBurstFactorChroma,
+ bool *NotEnoughUrgentLatencyHiding)
+{
+ double LinesInDETLuma;
+ double LinesInDETChroma;
+ double DETBufferSizeInTimeLuma;
+ double DETBufferSizeInTimeChroma;
+
+ *NotEnoughUrgentLatencyHiding = 0;
+ *UrgentBurstFactorLuma = 0;
+ *UrgentBurstFactorChroma = 0;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio);
+ DML_LOG_VERBOSE("DML::%s: VRatioC = %f\n", __func__, VRatioC);
+ DML_LOG_VERBOSE("DML::%s: DETBufferSizeY = %d\n", __func__, DETBufferSizeY);
+ DML_LOG_VERBOSE("DML::%s: DETBufferSizeC = %d\n", __func__, DETBufferSizeC);
+ DML_LOG_VERBOSE("DML::%s: BytePerPixelInDETY = %f\n", __func__, BytePerPixelInDETY);
+ DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
+ DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, LineTime);
+#endif
+ DML_ASSERT(VRatio > 0);
+
+ LinesInDETLuma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
+
+ DETBufferSizeInTimeLuma = math_floor2(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
+ if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorLuma = 1;
+ } else {
+ *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
+ }
+
+ if (BytePerPixelInDETC > 0) {
+ LinesInDETChroma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC / swath_width_chroma_ub;
+
+ DETBufferSizeInTimeChroma = math_floor2(LinesInDETChroma, SwathHeightC) * LineTime / VRatioC;
+ if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorChroma = 1;
+ } else {
+ *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: LinesInDETLuma = %f\n", __func__, LinesInDETLuma);
+ DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
+ DML_LOG_VERBOSE("DML::%s: DETBufferSizeInTimeLuma = %f\n", __func__, DETBufferSizeInTimeLuma);
+ DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, *UrgentBurstFactorLuma);
+ DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, *UrgentBurstFactorChroma);
+ DML_LOG_VERBOSE("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding);
+#endif
+}
+
+static void CalculateDCFCLKDeepSleepTdlut(
+ const struct dml2_display_cfg *display_cfg,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int BytePerPixelY[],
+ unsigned int BytePerPixelC[],
+ unsigned int SwathWidthY[],
+ unsigned int SwathWidthC[],
+ unsigned int DPPPerSurface[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double Dppclk[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ unsigned int ReturnBusWidth,
+
+ double dispclk,
+ unsigned int tdlut_bytes_to_deliver[],
+ double prefetch_swath_time_us[],
+
+ // Output
+ double *DCFClkDeepSleep)
+{
+ double DisplayPipeLineDeliveryTimeLuma;
+ double DisplayPipeLineDeliveryTimeChroma;
+ double DCFClkDeepSleepPerSurface[DML2_MAX_PLANES];
+ double ReadBandwidth = 0.0;
+
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ double pixel_rate_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+
+ if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) {
+ DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_rate_mhz;
+ } else {
+ DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
+ }
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChroma = 0;
+ } else {
+ if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) {
+ DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_rate_mhz;
+ } else {
+ DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
+ }
+ }
+
+ if (BytePerPixelC[k] > 0) {
+ DCFClkDeepSleepPerSurface[k] = math_max2(__DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
+ __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
+ } else {
+ DCFClkDeepSleepPerSurface[k] = __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
+ }
+ DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], pixel_rate_mhz / 16);
+
+ // adjust for 3dlut delivery time
+ if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && tdlut_bytes_to_deliver[k] > 0) {
+ double tdlut_required_deepsleep_dcfclk = (double) tdlut_bytes_to_deliver[k] / 64.0 / prefetch_swath_time_us[k];
+
+ DML_LOG_VERBOSE("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, tdlut_bytes_to_deliver = %d\n", __func__, k, tdlut_bytes_to_deliver[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_swath_time_us = %f\n", __func__, k, prefetch_swath_time_us[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, tdlut_required_deepsleep_dcfclk = %f\n", __func__, k, tdlut_required_deepsleep_dcfclk);
+
+ // increase the deepsleep dcfclk to match the original dispclk throughput rate
+ if (tdlut_required_deepsleep_dcfclk > DCFClkDeepSleepPerSurface[k]) {
+ DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], tdlut_required_deepsleep_dcfclk);
+ DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], dispclk / 4.0);
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, PixelClock = %f\n", __func__, k, pixel_rate_mhz);
+ DML_LOG_VERBOSE("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
+#endif
+ }
+
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
+ }
+
+ *DCFClkDeepSleep = math_max2(8.0, __DML2_CALCS_DCFCLK_FACTOR__ * ReadBandwidth / (double)ReturnBusWidth);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: __DML2_CALCS_DCFCLK_FACTOR__ = %f\n", __func__, __DML2_CALCS_DCFCLK_FACTOR__);
+ DML_LOG_VERBOSE("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
+ DML_LOG_VERBOSE("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth);
+ DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
+#endif
+
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ *DCFClkDeepSleep = math_max2(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
+ }
+
+ DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
+}
+
+static noinline_for_stack void CalculateDCFCLKDeepSleep(
+ const struct dml2_display_cfg *display_cfg,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int BytePerPixelY[],
+ unsigned int BytePerPixelC[],
+ unsigned int SwathWidthY[],
+ unsigned int SwathWidthC[],
+ unsigned int DPPPerSurface[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double Dppclk[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ unsigned int ReturnBusWidth,
+
+ // Output
+ double *DCFClkDeepSleep)
+{
+ double zero_double[DML2_MAX_PLANES];
+ unsigned int zero_integer[DML2_MAX_PLANES];
+
+ memset(zero_double, 0, DML2_MAX_PLANES * sizeof(double));
+ memset(zero_integer, 0, DML2_MAX_PLANES * sizeof(unsigned int));
+
+ CalculateDCFCLKDeepSleepTdlut(
+ display_cfg,
+ NumberOfActiveSurfaces,
+ BytePerPixelY,
+ BytePerPixelC,
+ SwathWidthY,
+ SwathWidthC,
+ DPPPerSurface,
+ PSCL_THROUGHPUT,
+ PSCL_THROUGHPUT_CHROMA,
+ Dppclk,
+ ReadBandwidthLuma,
+ ReadBandwidthChroma,
+ ReturnBusWidth,
+ 0,
+ zero_integer, //tdlut_bytes_to_deliver,
+ zero_double, //prefetch_swath_time_us,
+
+ // Output
+ DCFClkDeepSleep);
+}
+
+static double CalculateWriteBackDelay(
+ enum dml2_source_format_class WritebackPixelFormat,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackVTaps,
+ unsigned int WritebackDestinationWidth,
+ unsigned int WritebackDestinationHeight,
+ unsigned int WritebackSourceHeight,
+ unsigned int HTotal)
+{
+ double CalculateWriteBackDelay;
+ double Line_length;
+ double Output_lines_last_notclamped;
+ double WritebackVInit;
+
+ WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
+ Line_length = math_max2((double)WritebackDestinationWidth, math_ceil2((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
+ Output_lines_last_notclamped = WritebackDestinationHeight - 1 - math_ceil2(((double)WritebackSourceHeight - (double)WritebackVInit) / (double)WritebackVRatio, 1.0);
+ if (Output_lines_last_notclamped < 0) {
+ CalculateWriteBackDelay = 0;
+ } else {
+ CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
+ }
+ return CalculateWriteBackDelay;
+}
+
+static unsigned int CalculateMaxVStartup(
+ bool ptoi_supported,
+ unsigned int vblank_nom_default_us,
+ const struct dml2_timing_cfg *timing,
+ double write_back_delay_us)
+{
+ unsigned int vblank_size = 0;
+ unsigned int max_vstartup_lines = 0;
+
+ double line_time_us = (double)timing->h_total / ((double)timing->pixel_clock_khz / 1000);
+ unsigned int vblank_actual = timing->v_total - timing->v_active;
+ unsigned int vblank_nom_default_in_line = (unsigned int)math_floor2((double)vblank_nom_default_us / line_time_us, 1.0);
+ unsigned int vblank_avail = (timing->vblank_nom == 0) ? vblank_nom_default_in_line : (unsigned int)timing->vblank_nom;
+
+ vblank_size = (unsigned int)math_min2(vblank_actual, vblank_avail);
+
+ if (timing->interlaced && !ptoi_supported)
+ max_vstartup_lines = (unsigned int)(math_floor2((vblank_size - 1) / 2.0, 1.0));
+ else
+ max_vstartup_lines = vblank_size - (unsigned int)math_max2(1.0, math_ceil2(write_back_delay_us / line_time_us, 1.0));
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: VBlankNom = %lu\n", __func__, timing->vblank_nom);
+ DML_LOG_VERBOSE("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us);
+ DML_LOG_VERBOSE("DML::%s: line_time_us = %f\n", __func__, line_time_us);
+ DML_LOG_VERBOSE("DML::%s: vblank_actual = %u\n", __func__, vblank_actual);
+ DML_LOG_VERBOSE("DML::%s: vblank_avail = %u\n", __func__, vblank_avail);
+ DML_LOG_VERBOSE("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines);
+#endif
+ max_vstartup_lines = (unsigned int)math_min2(max_vstartup_lines, DML_MAX_VSTARTUP_START);
+ return max_vstartup_lines;
+}
+
+static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch *scratch,
+ struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *p)
+{
+ unsigned int MaximumSwathHeightY[DML2_MAX_PLANES] = { 0 };
+ unsigned int MaximumSwathHeightC[DML2_MAX_PLANES] = { 0 };
+ unsigned int RoundedUpSwathSizeBytesY[DML2_MAX_PLANES] = { 0 };
+ unsigned int RoundedUpSwathSizeBytesC[DML2_MAX_PLANES] = { 0 };
+ unsigned int SwathWidthSingleDPP[DML2_MAX_PLANES] = { 0 };
+ unsigned int SwathWidthSingleDPPChroma[DML2_MAX_PLANES] = { 0 };
+
+ unsigned int TotalActiveDPP = 0;
+ bool NoChromaOrLinear = true;
+ unsigned int SurfaceDoingUnboundedRequest = 0;
+ unsigned int DETBufferSizeInKByteForSwathCalculation;
+
+ const long TTUFIFODEPTH = 8;
+ const long MAXIMUMCOMPRESSION = 4;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP);
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ DML_LOG_VERBOSE("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]);
+ }
+#endif
+ CalculateSwathWidth(
+ p->display_cfg,
+ p->ForceSingleDPP,
+ p->NumberOfActiveSurfaces,
+ p->ODMMode,
+ p->BytePerPixY,
+ p->BytePerPixC,
+ p->Read256BytesBlockHeightY,
+ p->Read256BytesBlockHeightC,
+ p->Read256BytesBlockWidthY,
+ p->Read256BytesBlockWidthC,
+ p->surf_linear128_l,
+ p->surf_linear128_c,
+ p->DPPPerSurface,
+
+ // Output
+ p->req_per_swath_ub_l,
+ p->req_per_swath_ub_c,
+ SwathWidthSingleDPP,
+ SwathWidthSingleDPPChroma,
+ p->SwathWidth,
+ p->SwathWidthChroma,
+ MaximumSwathHeightY,
+ MaximumSwathHeightC,
+ p->swath_width_luma_ub,
+ p->swath_width_chroma_ub);
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ p->full_swath_bytes_l[k] = (unsigned int)(p->swath_width_luma_ub[k] * p->BytePerPixDETY[k] * MaximumSwathHeightY[k]);
+ p->full_swath_bytes_c[k] = (unsigned int)(p->swath_width_chroma_ub[k] * p->BytePerPixDETC[k] * MaximumSwathHeightC[k]);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, MaximumSwathHeightY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, MaximumSwathHeightC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]);
+#endif
+ if (p->display_cfg->plane_descriptors[k].pixel_format == dml2_420_10) {
+ p->full_swath_bytes_l[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_l[k], 256));
+ p->full_swath_bytes_c[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_c[k], 256));
+ }
+ }
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ TotalActiveDPP = TotalActiveDPP + (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]);
+ if (p->DPPPerSurface[k] > 0)
+ SurfaceDoingUnboundedRequest = k;
+ if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format) || p->display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha
+ || p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) {
+ NoChromaOrLinear = false;
+ }
+ }
+
+ *p->UnboundedRequestEnabled = UnboundedRequest(p->display_cfg->overrides.hw.force_unbounded_requesting.enable, p->display_cfg->overrides.hw.force_unbounded_requesting.value, TotalActiveDPP, NoChromaOrLinear);
+
+ CalculateDETBufferSize(
+ &scratch->CalculateDETBufferSize_locals,
+ p->display_cfg,
+ p->ForceSingleDPP,
+ p->NumberOfActiveSurfaces,
+ *p->UnboundedRequestEnabled,
+ p->nomDETInKByte,
+ p->MaxTotalDETInKByte,
+ p->ConfigReturnBufferSizeInKByte,
+ p->MinCompressedBufferSizeInKByte,
+ p->ConfigReturnBufferSegmentSizeInkByte,
+ p->CompressedBufferSegmentSizeInkByte,
+ p->ReadBandwidthLuma,
+ p->ReadBandwidthChroma,
+ p->full_swath_bytes_l,
+ p->full_swath_bytes_c,
+ p->DPPPerSurface,
+
+ // Output
+ p->DETBufferSizeInKByte, // per hubp pipe
+ p->CompressedBufferSizeInkByte);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP);
+ DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte);
+ DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte);
+ DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled);
+ DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte);
+#endif
+
+ *p->ViewportSizeSupport = true;
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+
+ DETBufferSizeInKByteForSwathCalculation = (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 1024 : p->DETBufferSizeInKByte[k]);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation);
+#endif
+ if (p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) {
+ p->SwathHeightY[k] = MaximumSwathHeightY[k];
+ p->SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k];
+ RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k];
+
+ if (p->surf_linear128_l[k])
+ p->request_size_bytes_luma[k] = 128;
+ else
+ p->request_size_bytes_luma[k] = 256;
+
+ if (p->surf_linear128_c[k])
+ p->request_size_bytes_chroma[k] = 128;
+ else
+ p->request_size_bytes_chroma[k] = 256;
+
+ } else if (p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ p->SwathHeightY[k] = MaximumSwathHeightY[k];
+ p->SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k];
+ RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k];
+ p->request_size_bytes_luma[k] = 256;
+ p->request_size_bytes_chroma[k] = 256;
+
+ } else if (p->full_swath_bytes_l[k] >= 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
+ p->SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2;
+ RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k];
+ p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
+ p->request_size_bytes_chroma[k] = 256;
+
+ } else if (p->full_swath_bytes_l[k] < 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] / 2 <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ p->SwathHeightY[k] = MaximumSwathHeightY[k];
+ p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
+ RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k];
+ RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2;
+ p->request_size_bytes_luma[k] = 256;
+ p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
+
+ } else {
+ p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
+ p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
+ RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2;
+ RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2;
+ p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
+ p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
+ }
+
+ if (p->SwathHeightC[k] == 0)
+ p->request_size_bytes_chroma[k] = 0;
+
+ if ((p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] / 2 > DETBufferSizeInKByteForSwathCalculation * 1024 / 2) ||
+ p->SwathWidth[k] > p->MaximumSwathWidthLuma[k] || (p->SwathHeightC[k] > 0 && p->SwathWidthChroma[k] > p->MaximumSwathWidthChroma[k])) {
+ *p->ViewportSizeSupport = false;
+ DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l=%u\n", __func__, k, p->full_swath_bytes_l[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c=%u\n", __func__, k, p->full_swath_bytes_c[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation=%u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation);
+ DML_LOG_VERBOSE("DML::%s: k=%u SwathWidth=%u\n", __func__, k, p->SwathWidth[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, p->MaximumSwathWidthLuma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthChroma=%d\n", __func__, k, p->SwathWidthChroma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, p->MaximumSwathWidthChroma[k]);
+ p->ViewportSizeSupportPerSurface[k] = false;
+ } else {
+ p->ViewportSizeSupportPerSurface[k] = true;
+ }
+
+ if (p->SwathHeightC[k] == 0) {
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, All DET will be used for plane0\n", __func__, k);
+#endif
+ p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024;
+ p->DETBufferSizeC[k] = 0;
+ } else if (RoundedUpSwathSizeBytesY[k] <= 1.5 * RoundedUpSwathSizeBytesC[k]) {
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, Half DET will be used for plane0, and half for plane1\n", __func__, k);
+#endif
+ p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
+ p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
+ } else {
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, 2/3 DET will be used for plane0, and 1/3 for plane1\n", __func__, k);
+#endif
+ p->DETBufferSizeY[k] = (unsigned int)(math_floor2(p->DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024));
+ p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 - p->DETBufferSizeY[k];
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, RoundedUpSwathSizeBytesY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]);
+#endif
+
+ }
+
+ *p->compbuf_reserved_space_64b = 2 * p->pixel_chunk_size_kbytes * 1024 / 64;
+ if (*p->UnboundedRequestEnabled) {
+ *p->compbuf_reserved_space_64b = (unsigned int)math_ceil2(math_max2(*p->compbuf_reserved_space_64b,
+ (double)(p->rob_buffer_size_kbytes * 1024 / 64) - (double)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / (p->mrq_present ? MAXIMUMCOMPRESSION : 1) / 64)), 1.0);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: RoundedUpSwathSizeBytesY[%d] = %u\n", __func__, SurfaceDoingUnboundedRequest, RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest]);
+ DML_LOG_VERBOSE("DML::%s: rob_buffer_size_kbytes = %u\n", __func__, p->rob_buffer_size_kbytes);
+#endif
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: compbuf_reserved_space_64b = %u\n", __func__, *p->compbuf_reserved_space_64b);
+#endif
+
+ *p->hw_debug5 = false;
+#ifdef ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE
+ if (p->NumberOfActiveSurfaces > 1)
+ *p->hw_debug5 = true;
+#else
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (!(p->mrq_present) && (!(*p->UnboundedRequestEnabled)) && (TotalActiveDPP == 1)
+ && p->display_cfg->plane_descriptors[k].surface.dcc.enable
+ && ((p->rob_buffer_size_kbytes * 1024 * (p->mrq_present ? MAXIMUMCOMPRESSION : 1)
+ + *p->CompressedBufferSizeInkByte * MAXIMUMCOMPRESSION * 1024) > TTUFIFODEPTH * (RoundedUpSwathSizeBytesY[k] + RoundedUpSwathSizeBytesC[k])))
+ *p->hw_debug5 = true;
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled);
+ DML_LOG_VERBOSE("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION);
+ DML_LOG_VERBOSE("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH);
+ DML_LOG_VERBOSE("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte);
+ DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5);
+#endif
+ }
+#endif
+}
+
+static enum dml2_odm_mode DecideODMMode(unsigned int HActive,
+ double MaxDispclk,
+ unsigned int MaximumPixelsPerLinePerDSCUnit,
+ enum dml2_output_format_class OutFormat,
+ bool UseDSC,
+ unsigned int NumberOfDSCSlices,
+ double SurfaceRequiredDISPCLKWithoutODMCombine,
+ double SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
+ double SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
+ double SurfaceRequiredDISPCLKWithODMCombineFourToOne)
+{
+ enum dml2_odm_mode MinimumRequiredODMModeForMaxDispClock;
+ enum dml2_odm_mode MinimumRequiredODMModeForMaxDSCHActive;
+ enum dml2_odm_mode MinimumRequiredODMModeForMax420HActive;
+ enum dml2_odm_mode ODMMode = dml2_odm_mode_bypass;
+
+ MinimumRequiredODMModeForMaxDispClock =
+ (SurfaceRequiredDISPCLKWithoutODMCombine <= MaxDispclk) ? dml2_odm_mode_bypass :
+ (SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= MaxDispclk) ? dml2_odm_mode_combine_2to1 :
+ (SurfaceRequiredDISPCLKWithODMCombineThreeToOne <= MaxDispclk) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1;
+ if (ODMMode < MinimumRequiredODMModeForMaxDispClock)
+ ODMMode = MinimumRequiredODMModeForMaxDispClock;
+
+ if (UseDSC) {
+ MinimumRequiredODMModeForMaxDSCHActive =
+ (HActive <= 1 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_bypass :
+ (HActive <= 2 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_combine_2to1 :
+ (HActive <= 3 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1;
+ if (ODMMode < MinimumRequiredODMModeForMaxDSCHActive)
+ ODMMode = MinimumRequiredODMModeForMaxDSCHActive;
+ }
+
+ if (OutFormat == dml2_420) {
+ MinimumRequiredODMModeForMax420HActive =
+ (HActive <= 1 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_bypass :
+ (HActive <= 2 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_combine_2to1 :
+ (HActive <= 3 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1;
+ if (ODMMode < MinimumRequiredODMModeForMax420HActive)
+ ODMMode = MinimumRequiredODMModeForMax420HActive;
+ }
+
+ if (UseDSC) {
+ if (ODMMode == dml2_odm_mode_bypass && NumberOfDSCSlices > 4)
+ ODMMode = dml2_odm_mode_combine_2to1;
+ if (ODMMode == dml2_odm_mode_combine_2to1 && NumberOfDSCSlices > 8)
+ ODMMode = dml2_odm_mode_combine_3to1;
+ if (ODMMode == dml2_odm_mode_combine_3to1 && NumberOfDSCSlices != 12)
+ ODMMode = dml2_odm_mode_combine_4to1;
+ }
+
+ return ODMMode;
+}
+
+static void CalculateODMConstraints(
+ enum dml2_odm_mode ODMUse,
+ double SurfaceRequiredDISPCLKWithoutODMCombine,
+ double SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
+ double SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
+ double SurfaceRequiredDISPCLKWithODMCombineFourToOne,
+ unsigned int MaximumPixelsPerLinePerDSCUnit,
+ /* Output */
+ double *DISPCLKRequired,
+ unsigned int *NumberOfDPPRequired,
+ unsigned int *MaxHActiveForDSC,
+ unsigned int *MaxDSCSlices,
+ unsigned int *MaxHActiveFor420)
+{
+ switch (ODMUse) {
+ case dml2_odm_mode_combine_2to1:
+ *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
+ *NumberOfDPPRequired = 2;
+ break;
+ case dml2_odm_mode_combine_3to1:
+ *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineThreeToOne;
+ *NumberOfDPPRequired = 3;
+ break;
+ case dml2_odm_mode_combine_4to1:
+ *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
+ *NumberOfDPPRequired = 4;
+ break;
+ case dml2_odm_mode_auto:
+ case dml2_odm_mode_split_1to2:
+ case dml2_odm_mode_mso_1to2:
+ case dml2_odm_mode_mso_1to4:
+ case dml2_odm_mode_bypass:
+ default:
+ *DISPCLKRequired = SurfaceRequiredDISPCLKWithoutODMCombine;
+ *NumberOfDPPRequired = 1;
+ break;
+ }
+ *MaxHActiveForDSC = *NumberOfDPPRequired * MaximumPixelsPerLinePerDSCUnit;
+ *MaxDSCSlices = *NumberOfDPPRequired * DML_MAX_NUM_OF_SLICES_PER_DSC;
+ *MaxHActiveFor420 = *NumberOfDPPRequired * DML2_MAX_FMT_420_BUFFER_WIDTH;
+}
+
+static bool ValidateODMMode(enum dml2_odm_mode ODMMode,
+ double MaxDispclk,
+ unsigned int HActive,
+ enum dml2_output_format_class OutFormat,
+ bool UseDSC,
+ unsigned int NumberOfDSCSlices,
+ unsigned int TotalNumberOfActiveDPP,
+ unsigned int TotalNumberOfActiveOPP,
+ unsigned int MaxNumDPP,
+ unsigned int MaxNumOPP,
+ double DISPCLKRequired,
+ unsigned int NumberOfDPPRequired,
+ unsigned int MaxHActiveForDSC,
+ unsigned int MaxDSCSlices,
+ unsigned int MaxHActiveFor420)
+{
+ bool are_odm_segments_symmetrical = (ODMMode == dml2_odm_mode_combine_3to1) ? UseDSC : true;
+ bool is_max_dsc_slice_required = (ODMMode == dml2_odm_mode_combine_3to1);
+ unsigned int pixels_per_clock_cycle = (OutFormat == dml2_420 || OutFormat == dml2_n422) ? 2 : 1;
+ unsigned int h_timing_div_mode =
+ (ODMMode == dml2_odm_mode_combine_4to1 || ODMMode == dml2_odm_mode_combine_3to1) ? 4 :
+ (ODMMode == dml2_odm_mode_combine_2to1) ? 2 : pixels_per_clock_cycle;
+
+ if (DISPCLKRequired > MaxDispclk)
+ return false;
+ if ((TotalNumberOfActiveDPP + NumberOfDPPRequired) > MaxNumDPP || (TotalNumberOfActiveOPP + NumberOfDPPRequired) > MaxNumOPP)
+ return false;
+ if (are_odm_segments_symmetrical) {
+ if (HActive % (NumberOfDPPRequired * pixels_per_clock_cycle))
+ return false;
+ }
+ if (HActive % h_timing_div_mode)
+ /*
+ * TODO - OTG_H_TOTAL, OTG_H_BLANK_START/END and
+ * OTG_H_SYNC_A_START/END all need to be visible by h timing div
+ * mode. This logic only checks H active.
+ */
+ return false;
+
+ if (UseDSC) {
+ if (HActive > MaxHActiveForDSC)
+ return false;
+ if (NumberOfDSCSlices > MaxDSCSlices)
+ return false;
+ if (HActive % NumberOfDSCSlices)
+ return false;
+ if (NumberOfDSCSlices % NumberOfDPPRequired)
+ return false;
+ if (is_max_dsc_slice_required) {
+ if (NumberOfDSCSlices != MaxDSCSlices)
+ return false;
+ }
+ }
+
+ if (OutFormat == dml2_420) {
+ if (HActive > MaxHActiveFor420)
+ return false;
+ }
+
+ return true;
+}
+
+static noinline_for_stack void CalculateODMMode(
+ unsigned int MaximumPixelsPerLinePerDSCUnit,
+ unsigned int HActive,
+ enum dml2_output_format_class OutFormat,
+ enum dml2_output_encoder_class Output,
+ enum dml2_odm_mode ODMUse,
+ double MaxDispclk,
+ bool DSCEnable,
+ unsigned int TotalNumberOfActiveDPP,
+ unsigned int TotalNumberOfActiveOPP,
+ unsigned int MaxNumDPP,
+ unsigned int MaxNumOPP,
+ double PixelClock,
+ unsigned int NumberOfDSCSlices,
+
+ // Output
+ bool *TotalAvailablePipesSupport,
+ unsigned int *NumberOfDPP,
+ enum dml2_odm_mode *ODMMode,
+ double *RequiredDISPCLKPerSurface)
+{
+ double SurfaceRequiredDISPCLKWithoutODMCombine;
+ double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
+ double SurfaceRequiredDISPCLKWithODMCombineThreeToOne;
+ double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
+ double DISPCLKRequired;
+ unsigned int NumberOfDPPRequired;
+ unsigned int MaxHActiveForDSC;
+ unsigned int MaxDSCSlices;
+ unsigned int MaxHActiveFor420;
+ bool success;
+ bool UseDSC = DSCEnable && (NumberOfDSCSlices > 0);
+ enum dml2_odm_mode DecidedODMMode;
+ bool isTMDS420 = (OutFormat == dml2_420 && Output == dml2_hdmi);
+
+ SurfaceRequiredDISPCLKWithoutODMCombine = CalculateRequiredDispclk(dml2_odm_mode_bypass, PixelClock, isTMDS420);
+ SurfaceRequiredDISPCLKWithODMCombineTwoToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_2to1, PixelClock, isTMDS420);
+ SurfaceRequiredDISPCLKWithODMCombineThreeToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_3to1, PixelClock, isTMDS420);
+ SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_4to1, PixelClock, isTMDS420);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: ODMUse = %d\n", __func__, ODMUse);
+ DML_LOG_VERBOSE("DML::%s: Output = %d\n", __func__, Output);
+ DML_LOG_VERBOSE("DML::%s: DSCEnable = %d\n", __func__, DSCEnable);
+ DML_LOG_VERBOSE("DML::%s: MaxDispclk = %f\n", __func__, MaxDispclk);
+ DML_LOG_VERBOSE("DML::%s: MaximumPixelsPerLinePerDSCUnit = %d\n", __func__, MaximumPixelsPerLinePerDSCUnit);
+ DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithoutODMCombine = %f\n", __func__, SurfaceRequiredDISPCLKWithoutODMCombine);
+ DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineTwoToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineTwoToOne);
+ DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineThreeToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineThreeToOne);
+ DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineFourToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineFourToOne);
+#endif
+ if (ODMUse == dml2_odm_mode_auto)
+ DecidedODMMode = DecideODMMode(HActive,
+ MaxDispclk,
+ MaximumPixelsPerLinePerDSCUnit,
+ OutFormat,
+ UseDSC,
+ NumberOfDSCSlices,
+ SurfaceRequiredDISPCLKWithoutODMCombine,
+ SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
+ SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
+ SurfaceRequiredDISPCLKWithODMCombineFourToOne);
+ else
+ DecidedODMMode = ODMUse;
+ CalculateODMConstraints(DecidedODMMode,
+ SurfaceRequiredDISPCLKWithoutODMCombine,
+ SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
+ SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
+ SurfaceRequiredDISPCLKWithODMCombineFourToOne,
+ MaximumPixelsPerLinePerDSCUnit,
+ &DISPCLKRequired,
+ &NumberOfDPPRequired,
+ &MaxHActiveForDSC,
+ &MaxDSCSlices,
+ &MaxHActiveFor420);
+ success = ValidateODMMode(DecidedODMMode,
+ MaxDispclk,
+ HActive,
+ OutFormat,
+ UseDSC,
+ NumberOfDSCSlices,
+ TotalNumberOfActiveDPP,
+ TotalNumberOfActiveOPP,
+ MaxNumDPP,
+ MaxNumOPP,
+ DISPCLKRequired,
+ NumberOfDPPRequired,
+ MaxHActiveForDSC,
+ MaxDSCSlices,
+ MaxHActiveFor420);
+
+ *ODMMode = DecidedODMMode;
+ *TotalAvailablePipesSupport = success;
+ *NumberOfDPP = NumberOfDPPRequired;
+ *RequiredDISPCLKPerSurface = success ? DISPCLKRequired : 0;
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: ODMMode = %d\n", __func__, *ODMMode);
+ DML_LOG_VERBOSE("DML::%s: NumberOfDPP = %d\n", __func__, *NumberOfDPP);
+ DML_LOG_VERBOSE("DML::%s: TotalAvailablePipesSupport = %d\n", __func__, *TotalAvailablePipesSupport);
+ DML_LOG_VERBOSE("DML::%s: RequiredDISPCLKPerSurface = %f\n", __func__, *RequiredDISPCLKPerSurface);
+#endif
+}
+
+static noinline_for_stack void CalculateOutputLink(
+ struct dml2_core_internal_scratch *s,
+ double PHYCLK,
+ double PHYCLKD18,
+ double PHYCLKD32,
+ double Downspreading,
+ enum dml2_output_encoder_class Output,
+ enum dml2_output_format_class OutputFormat,
+ unsigned int HTotal,
+ unsigned int HActive,
+ double PixelClockBackEnd,
+ double ForcedOutputLinkBPP,
+ unsigned int DSCInputBitPerComponent,
+ unsigned int NumberOfDSCSlices,
+ double AudioSampleRate,
+ unsigned int AudioSampleLayout,
+ enum dml2_odm_mode ODMModeNoDSC,
+ enum dml2_odm_mode ODMModeDSC,
+ enum dml2_dsc_enable_option DSCEnable,
+ unsigned int OutputLinkDPLanes,
+ enum dml2_output_link_dp_rate OutputLinkDPRate,
+
+ // Output
+ bool *RequiresDSC,
+ bool *RequiresFEC,
+ double *OutBpp,
+ enum dml2_core_internal_output_type *OutputType,
+ enum dml2_core_internal_output_type_rate *OutputRate,
+ unsigned int *RequiredSlots)
+{
+ bool LinkDSCEnable;
+ unsigned int dummy;
+ *RequiresDSC = false;
+ *RequiresFEC = false;
+ *OutBpp = 0;
+
+ *OutputType = dml2_core_internal_output_type_unknown;
+ *OutputRate = dml2_core_internal_output_rate_unknown;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: DSCEnable = %u (dis, en, en_if_necessary)\n", __func__, DSCEnable);
+ DML_LOG_VERBOSE("DML::%s: PHYCLK = %f\n", __func__, PHYCLK);
+ DML_LOG_VERBOSE("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd);
+ DML_LOG_VERBOSE("DML::%s: AudioSampleRate = %f\n", __func__, AudioSampleRate);
+ DML_LOG_VERBOSE("DML::%s: HActive = %u\n", __func__, HActive);
+ DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal);
+ DML_LOG_VERBOSE("DML::%s: ODMModeNoDSC = %u\n", __func__, ODMModeNoDSC);
+ DML_LOG_VERBOSE("DML::%s: ODMModeDSC = %u\n", __func__, ODMModeDSC);
+ DML_LOG_VERBOSE("DML::%s: ForcedOutputLinkBPP = %f\n", __func__, ForcedOutputLinkBPP);
+ DML_LOG_VERBOSE("DML::%s: Output (encoder) = %u\n", __func__, Output);
+ DML_LOG_VERBOSE("DML::%s: OutputLinkDPRate = %u\n", __func__, OutputLinkDPRate);
+#endif
+ {
+ if (Output == dml2_hdmi) {
+ *RequiresDSC = false;
+ *RequiresFEC = false;
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, math_min2(600, PHYCLK) * 10, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, false, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
+ //OutputTypeAndRate = "HDMI";
+ *OutputType = dml2_core_internal_output_type_hdmi;
+ } else if (Output == dml2_dp || Output == dml2_dp2p0 || Output == dml2_edp) {
+ if (DSCEnable == dml2_dsc_enable) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dml2_dp || Output == dml2_dp2p0) {
+ *RequiresFEC = true;
+ } else {
+ *RequiresFEC = false;
+ }
+ } else {
+ *RequiresDSC = false;
+ LinkDSCEnable = false;
+ if (Output == dml2_dp2p0) {
+ *RequiresFEC = true;
+ } else {
+ *RequiresFEC = false;
+ }
+ }
+ if (Output == dml2_dp2p0) {
+ *OutBpp = 0;
+ if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr10) && PHYCLKD32 >= 10000.0 / 32) {
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ if (*OutBpp == 0 && PHYCLKD32 < 13500.0 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " UHBR10";
+ *OutputType = dml2_core_internal_output_type_dp2p0;
+ *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr10;
+ }
+ if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr13p5) && *OutBpp == 0 && PHYCLKD32 >= 13500.0 / 32) {
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+
+ if (*OutBpp == 0 && PHYCLKD32 < 20000.0 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " UHBR13p5";
+ *OutputType = dml2_core_internal_output_type_dp2p0;
+ *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr13p5;
+ }
+ if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr20) && *OutBpp == 0 && PHYCLKD32 >= 20000.0 / 32) {
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " UHBR20";
+ *OutputType = dml2_core_internal_output_type_dp2p0;
+ *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr20;
+ }
+ } else { // output is dp or edp
+ *OutBpp = 0;
+ if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr) && PHYCLK >= 270) {
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ if (*OutBpp == 0 && PHYCLK < 540 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dml2_dp) {
+ *RequiresFEC = true;
+ }
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " HBR";
+ *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp;
+ *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr;
+ }
+ if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr2) && *OutBpp == 0 && PHYCLK >= 540) {
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+
+ if (*OutBpp == 0 && PHYCLK < 810 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dml2_dp) {
+ *RequiresFEC = true;
+ }
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " HBR2";
+ *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp;
+ *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr2;
+ }
+ if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr3) && *OutBpp == 0 && PHYCLK >= 810) { // VBA_ERROR, vba code doesn't have hbr3 check
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+
+ if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dml2_dp) {
+ *RequiresFEC = true;
+ }
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " HBR3";
+ *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp;
+ *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr3;
+ }
+ }
+ } else if (Output == dml2_hdmifrl) {
+ if (DSCEnable == dml2_dsc_enable) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *RequiresFEC = true;
+ } else {
+ *RequiresDSC = false;
+ LinkDSCEnable = false;
+ *RequiresFEC = false;
+ }
+ *OutBpp = 0;
+ if (PHYCLKD18 >= 3000.0 / 18) {
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 3000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
+ //OutputTypeAndRate = Output & "3x3";
+ *OutputType = dml2_core_internal_output_type_hdmifrl;
+ *OutputRate = dml2_core_internal_output_rate_hdmi_rate_3x3;
+ }
+ if (*OutBpp == 0 && PHYCLKD18 >= 6000.0 / 18) {
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
+ //OutputTypeAndRate = Output & "6x3";
+ *OutputType = dml2_core_internal_output_type_hdmifrl;
+ *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x3;
+ }
+ if (*OutBpp == 0 && PHYCLKD18 >= 6000.0 / 18) {
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
+ //OutputTypeAndRate = Output & "6x4";
+ *OutputType = dml2_core_internal_output_type_hdmifrl;
+ *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x4;
+ }
+ if (*OutBpp == 0 && PHYCLKD18 >= 8000.0 / 18) {
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 8000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
+ //OutputTypeAndRate = Output & "8x4";
+ *OutputType = dml2_core_internal_output_type_hdmifrl;
+ *OutputRate = dml2_core_internal_output_rate_hdmi_rate_8x4;
+ }
+ if (*OutBpp == 0 && PHYCLKD18 >= 10000.0 / 18) {
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
+ if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0 && PHYCLKD18 < 12000.0 / 18) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *RequiresFEC = true;
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
+ }
+ //OutputTypeAndRate = Output & "10x4";
+ *OutputType = dml2_core_internal_output_type_hdmifrl;
+ *OutputRate = dml2_core_internal_output_rate_hdmi_rate_10x4;
+ }
+ if (*OutBpp == 0 && PHYCLKD18 >= 12000.0 / 18) {
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
+ if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *RequiresFEC = true;
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
+ }
+ //OutputTypeAndRate = Output & "12x4";
+ *OutputType = dml2_core_internal_output_type_hdmifrl;
+ *OutputRate = dml2_core_internal_output_rate_hdmi_rate_12x4;
+ }
+ }
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: RequiresDSC = %u\n", __func__, *RequiresDSC);
+ DML_LOG_VERBOSE("DML::%s: RequiresFEC = %u\n", __func__, *RequiresFEC);
+ DML_LOG_VERBOSE("DML::%s: OutBpp = %f\n", __func__, *OutBpp);
+#endif
+}
+
+static double CalculateWriteBackDISPCLK(
+ enum dml2_source_format_class WritebackPixelFormat,
+ double PixelClock,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackHTaps,
+ unsigned int WritebackVTaps,
+ unsigned int WritebackSourceWidth,
+ unsigned int WritebackDestinationWidth,
+ unsigned int HTotal,
+ unsigned int WritebackLineBufferSize)
+{
+ double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
+
+ DISPCLK_H = PixelClock * math_ceil2((double)WritebackHTaps / 8.0, 1) / WritebackHRatio;
+ DISPCLK_V = PixelClock * (WritebackVTaps * math_ceil2((double)WritebackDestinationWidth / 6.0, 1) + 8.0) / (double)HTotal;
+ DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / (double)WritebackSourceWidth;
+ return math_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
+}
+
+static double RequiredDTBCLK(
+ bool DSCEnable,
+ double PixelClock,
+ enum dml2_output_format_class OutputFormat,
+ double OutputBpp,
+ unsigned int DSCSlices,
+ unsigned int HTotal,
+ unsigned int HActive,
+ unsigned int AudioRate,
+ unsigned int AudioLayout)
+{
+ if (DSCEnable != true) {
+ return math_max2(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
+ } else {
+ double PixelWordRate = PixelClock / (OutputFormat == dml2_444 ? 1 : 2);
+ double HCActive = math_ceil2(DSCSlices * math_ceil2(OutputBpp * math_ceil2(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
+ double HCBlank = 64 + 32 * math_ceil2(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
+ double AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
+ double HActiveTribyteRate = PixelWordRate * HCActive / HActive;
+ return math_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
+ }
+}
+
+static unsigned int DSCDelayRequirement(
+ bool DSCEnabled,
+ enum dml2_odm_mode ODMMode,
+ unsigned int DSCInputBitPerComponent,
+ double OutputBpp,
+ unsigned int HActive,
+ unsigned int HTotal,
+ unsigned int NumberOfDSCSlices,
+ enum dml2_output_format_class OutputFormat,
+ enum dml2_output_encoder_class Output,
+ double PixelClock,
+ double PixelClockBackEnd)
+{
+ unsigned int DSCDelayRequirement_val = 0;
+ unsigned int NumberOfDSCSlicesFactor = 1;
+
+ if (DSCEnabled == true && OutputBpp != 0) {
+
+ if (ODMMode == dml2_odm_mode_combine_4to1)
+ NumberOfDSCSlicesFactor = 4;
+ else if (ODMMode == dml2_odm_mode_combine_3to1)
+ NumberOfDSCSlicesFactor = 3;
+ else if (ODMMode == dml2_odm_mode_combine_2to1)
+ NumberOfDSCSlicesFactor = 2;
+
+ DSCDelayRequirement_val = NumberOfDSCSlicesFactor * (dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (unsigned int)(math_ceil2((double)HActive / (double)NumberOfDSCSlices, 1.0)),
+ (NumberOfDSCSlices / NumberOfDSCSlicesFactor), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output));
+
+ DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val + (HTotal - HActive) * math_ceil2((double)DSCDelayRequirement_val / (double)HActive, 1.0));
+ DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val * PixelClock / PixelClockBackEnd);
+
+ } else {
+ DSCDelayRequirement_val = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: DSCEnabled= %u\n", __func__, DSCEnabled);
+ DML_LOG_VERBOSE("DML::%s: ODMMode = %u\n", __func__, ODMMode);
+ DML_LOG_VERBOSE("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
+ DML_LOG_VERBOSE("DML::%s: HActive = %u\n", __func__, HActive);
+ DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal);
+ DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, PixelClock);
+ DML_LOG_VERBOSE("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd);
+ DML_LOG_VERBOSE("DML::%s: OutputFormat = %u\n", __func__, OutputFormat);
+ DML_LOG_VERBOSE("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent);
+ DML_LOG_VERBOSE("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices);
+ DML_LOG_VERBOSE("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val);
+#endif
+
+ return DSCDelayRequirement_val;
+}
+
+static void CalculateSurfaceSizeInMall(
+ const struct dml2_display_cfg *display_cfg,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int MALLAllocatedForDCN,
+ unsigned int BytesPerPixelY[],
+ unsigned int BytesPerPixelC[],
+ unsigned int Read256BytesBlockWidthY[],
+ unsigned int Read256BytesBlockWidthC[],
+ unsigned int Read256BytesBlockHeightY[],
+ unsigned int Read256BytesBlockHeightC[],
+ unsigned int ReadBlockWidthY[],
+ unsigned int ReadBlockWidthC[],
+ unsigned int ReadBlockHeightY[],
+ unsigned int ReadBlockHeightC[],
+
+ // Output
+ unsigned int SurfaceSizeInMALL[],
+ bool *ExceededMALLSize)
+{
+ unsigned int TotalSurfaceSizeInMALLForSS = 0;
+ unsigned int TotalSurfaceSizeInMALLForSubVP = 0;
+ unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024;
+
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ const struct dml2_composition_cfg *composition = &display_cfg->plane_descriptors[k].composition;
+ const struct dml2_surface_cfg *surface = &display_cfg->plane_descriptors[k].surface;
+
+ if (composition->viewport.stationary) {
+ SurfaceSizeInMALL[k] = (unsigned int)(math_min2(math_ceil2((double)surface->plane0.width, ReadBlockWidthY[k]),
+ math_floor2(composition->viewport.plane0.x_start + composition->viewport.plane0.width + ReadBlockWidthY[k] - 1, ReadBlockWidthY[k]) -
+ math_floor2((double)composition->viewport.plane0.x_start, ReadBlockWidthY[k])) *
+ math_min2(math_ceil2((double)surface->plane0.height, ReadBlockHeightY[k]),
+ math_floor2((double)composition->viewport.plane0.y_start + composition->viewport.plane0.height + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
+ math_floor2((double)composition->viewport.plane0.y_start, ReadBlockHeightY[k])) * BytesPerPixelY[k]);
+
+ if (ReadBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] +
+ math_min2(math_ceil2((double)surface->plane1.width, ReadBlockWidthC[k]),
+ math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.width + ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
+ math_floor2((double)composition->viewport.plane1.y_start, ReadBlockWidthC[k])) *
+ math_min2(math_ceil2((double)surface->plane1.height, ReadBlockHeightC[k]),
+ math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.height + ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
+ math_floor2(composition->viewport.plane1.y_start, ReadBlockHeightC[k])) * BytesPerPixelC[k]);
+ }
+ } else {
+ SurfaceSizeInMALL[k] = (unsigned int)(math_ceil2(math_min2(surface->plane0.width, composition->viewport.plane0.width + ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
+ math_ceil2(math_min2(surface->plane0.height, composition->viewport.plane0.height + ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * BytesPerPixelY[k]);
+ if (ReadBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] +
+ math_ceil2(math_min2(surface->plane1.width, composition->viewport.plane1.width + ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
+ math_ceil2(math_min2(surface->plane1.height, composition->viewport.plane1.height + ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * BytesPerPixelC[k]);
+ }
+ }
+ }
+
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ /* SS and Subvp counted separate as they are never used at the same time */
+ if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))
+ TotalSurfaceSizeInMALLForSubVP += SurfaceSizeInMALL[k];
+ else if (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable)
+ TotalSurfaceSizeInMALLForSS += SurfaceSizeInMALL[k];
+ }
+
+ *ExceededMALLSize = (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) ||
+ (TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: MALLAllocatedForDCN = %u\n", __func__, MALLAllocatedForDCN * 1024 * 1024);
+ DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALLForSubVP = %u\n", __func__, TotalSurfaceSizeInMALLForSubVP);
+ DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALLForSS = %u\n", __func__, TotalSurfaceSizeInMALLForSS);
+ DML_LOG_VERBOSE("DML::%s: ExceededMALLSize = %u\n", __func__, *ExceededMALLSize);
+#endif
+}
+
+static void calculate_tdlut_setting(
+ struct dml2_core_internal_scratch *scratch,
+ struct dml2_core_calcs_calculate_tdlut_setting_params *p)
+{
+ // locals
+ unsigned int tdlut_bpe = 8;
+ unsigned int tdlut_width;
+ unsigned int tdlut_pitch_bytes;
+ unsigned int tdlut_footprint_bytes;
+ unsigned int vmpg_bytes;
+ unsigned int tdlut_vmpg_per_frame;
+ unsigned int tdlut_pte_req_per_frame;
+ unsigned int tdlut_bytes_per_line;
+ double tdlut_drain_rate;
+ unsigned int tdlut_mpc_width;
+ unsigned int tdlut_bytes_per_group_simple;
+
+ if (!p->setup_for_tdlut) {
+ *p->tdlut_groups_per_2row_ub = 0;
+ *p->tdlut_opt_time = 0;
+ *p->tdlut_drain_time = 0;
+ *p->tdlut_bytes_to_deliver = 0;
+ *p->tdlut_bytes_per_group = 0;
+ *p->tdlut_pte_bytes_per_frame = 0;
+ *p->tdlut_bytes_per_frame = 0;
+ return;
+ }
+
+ if (p->tdlut_mpc_width_flag) {
+ tdlut_mpc_width = 33;
+ tdlut_bytes_per_group_simple = 39*256;
+ } else {
+ tdlut_mpc_width = 17;
+ tdlut_bytes_per_group_simple = 10*256;
+ }
+
+ vmpg_bytes = p->gpuvm_page_size_kbytes * 1024;
+
+ if (p->tdlut_addressing_mode == dml2_tdlut_simple_linear) {
+ if (p->tdlut_width_mode == dml2_tdlut_width_17_cube)
+ tdlut_width = 4916;
+ else
+ tdlut_width = 35940;
+ } else {
+ if (p->tdlut_width_mode == dml2_tdlut_width_17_cube)
+ tdlut_width = 17;
+ else // dml2_tdlut_width_33_cube
+ tdlut_width = 33;
+ }
+
+ if (p->is_gfx11)
+ tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); //256B alignment
+ else
+ tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 128); //128B alignment
+
+ if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear)
+ tdlut_footprint_bytes = tdlut_pitch_bytes * tdlut_width * tdlut_width;
+ else
+ tdlut_footprint_bytes = tdlut_pitch_bytes;
+
+ if (!p->gpuvm_enable) {
+ tdlut_vmpg_per_frame = 0;
+ tdlut_pte_req_per_frame = 0;
+ } else {
+ tdlut_vmpg_per_frame = (unsigned int)math_ceil2(tdlut_footprint_bytes - 1, vmpg_bytes) / vmpg_bytes + 1;
+ tdlut_pte_req_per_frame = (unsigned int)math_ceil2(tdlut_vmpg_per_frame - 1, 8) / 8 + 1;
+ }
+ tdlut_bytes_per_line = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 64); //64b request
+ *p->tdlut_pte_bytes_per_frame = tdlut_pte_req_per_frame * 64;
+
+ if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) {
+ //the tdlut_width is either 17 or 33 but the 33x33x33 is subsampled every other line/slice
+ *p->tdlut_bytes_per_frame = tdlut_bytes_per_line * tdlut_mpc_width * tdlut_mpc_width;
+ *p->tdlut_bytes_per_group = tdlut_bytes_per_line * tdlut_mpc_width;
+ //the delivery cycles is DispClk cycles per line * number of lines * number of slices
+ //tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width;
+ tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / math_ceil2(tdlut_mpc_width/2.0, 1);
+ } else {
+ //tdlut_addressing_mode = tdlut_simple_linear, 3dlut width should be 4*1229=4916 elements
+ *p->tdlut_bytes_per_frame = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256);
+ *p->tdlut_bytes_per_group = tdlut_bytes_per_group_simple;
+ //tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_width/2.0, 1);
+ tdlut_drain_rate = 2 * tdlut_bpe * p->dispclk_mhz;
+ }
+
+ //the tdlut is fetched during the 2 row times of prefetch.
+ if (p->setup_for_tdlut) {
+ *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1);
+ if (*p->tdlut_bytes_per_frame > p->cursor_buffer_size * 1024)
+ *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate;
+ else
+ *p->tdlut_opt_time = 0;
+ *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate;
+ *p->tdlut_bytes_to_deliver = (unsigned int) (p->cursor_buffer_size * 1024.0);
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %d\n", __func__, p->gpuvm_enable);
+ DML_LOG_VERBOSE("DML::%s: vmpg_bytes = %d\n", __func__, vmpg_bytes);
+ DML_LOG_VERBOSE("DML::%s: tdlut_vmpg_per_frame = %d\n", __func__, tdlut_vmpg_per_frame);
+ DML_LOG_VERBOSE("DML::%s: tdlut_pte_req_per_frame = %d\n", __func__, tdlut_pte_req_per_frame);
+
+ DML_LOG_VERBOSE("DML::%s: dispclk_mhz = %f\n", __func__, p->dispclk_mhz);
+ DML_LOG_VERBOSE("DML::%s: tdlut_width = %u\n", __func__, tdlut_width);
+ DML_LOG_VERBOSE("DML::%s: tdlut_addressing_mode = %s\n", __func__, (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) ? "sw_linear" : "simple_linear");
+ DML_LOG_VERBOSE("DML::%s: tdlut_pitch_bytes = %u\n", __func__, tdlut_pitch_bytes);
+ DML_LOG_VERBOSE("DML::%s: tdlut_footprint_bytes = %u\n", __func__, tdlut_footprint_bytes);
+ DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_frame = %u\n", __func__, *p->tdlut_bytes_per_frame);
+ DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_line = %u\n", __func__, tdlut_bytes_per_line);
+ DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_group = %u\n", __func__, *p->tdlut_bytes_per_group);
+ DML_LOG_VERBOSE("DML::%s: tdlut_drain_rate = %f\n", __func__, tdlut_drain_rate);
+ DML_LOG_VERBOSE("DML::%s: tdlut_delivery_cycles = %u\n", __func__, p->tdlut_addressing_mode == dml2_tdlut_sw_linear ? (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width : (unsigned int)math_ceil2(tdlut_width/2.0, 1));
+ DML_LOG_VERBOSE("DML::%s: tdlut_opt_time = %f\n", __func__, *p->tdlut_opt_time);
+ DML_LOG_VERBOSE("DML::%s: tdlut_drain_time = %f\n", __func__, *p->tdlut_drain_time);
+ DML_LOG_VERBOSE("DML::%s: tdlut_bytes_to_deliver = %d\n", __func__, *p->tdlut_bytes_to_deliver);
+ DML_LOG_VERBOSE("DML::%s: tdlut_groups_per_2row_ub = %d\n", __func__, *p->tdlut_groups_per_2row_ub);
+#endif
+}
+
+static void CalculateTarb(
+ const struct dml2_display_cfg *display_cfg,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int NumberOfDPP[],
+ unsigned int dpte_group_bytes[],
+ unsigned int tdlut_bytes_per_group[],
+ double HostVMInefficiencyFactor,
+ double HostVMInefficiencyFactorPrefetch,
+ unsigned int HostVMMinPageSize,
+ double ReturnBW,
+ unsigned int MetaChunkSize,
+
+ // output
+ double *Tarb,
+ double *Tarb_prefetch)
+{
+ double extra_bytes = 0;
+ double extra_bytes_prefetch = 0;
+ double HostVMDynamicLevels = CalculateHostVMDynamicLevels(display_cfg->gpuvm_enable, display_cfg->hostvm_enable, HostVMMinPageSize, display_cfg->hostvm_max_non_cached_page_table_levels);
+
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ extra_bytes = extra_bytes + (NumberOfDPP[k] * PixelChunkSizeInKByte * 1024);
+
+ if (display_cfg->plane_descriptors[k].surface.dcc.enable)
+ extra_bytes = extra_bytes + (MetaChunkSize * 1024);
+
+ if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut)
+ extra_bytes = extra_bytes + tdlut_bytes_per_group[k];
+ }
+
+ extra_bytes_prefetch = extra_bytes;
+
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (display_cfg->gpuvm_enable == true) {
+ extra_bytes = extra_bytes + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
+ extra_bytes_prefetch = extra_bytes_prefetch + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactorPrefetch;
+ }
+ }
+ *Tarb = extra_bytes / ReturnBW;
+ *Tarb_prefetch = extra_bytes_prefetch / ReturnBW;
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: PixelChunkSizeInKByte = %d\n", __func__, PixelChunkSizeInKByte);
+ DML_LOG_VERBOSE("DML::%s: MetaChunkSize = %d\n", __func__, MetaChunkSize);
+ DML_LOG_VERBOSE("DML::%s: extra_bytes = %f\n", __func__, extra_bytes);
+ DML_LOG_VERBOSE("DML::%s: extra_bytes_prefetch = %f\n", __func__, extra_bytes_prefetch);
+#endif
+}
+
+static double CalculateTWait(
+ long reserved_vblank_time_ns,
+ double UrgentLatency,
+ double Ttrip,
+ double g6_temp_read_blackout_us)
+{
+ double TWait;
+ double t_urg_trip = math_max2(UrgentLatency, Ttrip);
+ TWait = math_max2(reserved_vblank_time_ns/1000.0, g6_temp_read_blackout_us) + t_urg_trip;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: reserved_vblank_time_ns = %ld\n", __func__, reserved_vblank_time_ns);
+ DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
+ DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, Ttrip);
+ DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, TWait);
+#endif
+ return TWait;
+}
+
+
+static void CalculateVUpdateAndDynamicMetadataParameters(
+ unsigned int MaxInterDCNTileRepeaters,
+ double Dppclk,
+ double Dispclk,
+ double DCFClkDeepSleep,
+ double PixelClock,
+ unsigned int HTotal,
+ unsigned int VBlank,
+ unsigned int DynamicMetadataTransmittedBytes,
+ unsigned int DynamicMetadataLinesBeforeActiveRequired,
+ unsigned int InterlaceEnable,
+ bool ProgressiveToInterlaceUnitInOPP,
+
+ // Output
+ double *TSetup,
+ double *Tdmbf,
+ double *Tdmec,
+ double *Tdmsks,
+ unsigned int *VUpdateOffsetPix,
+ unsigned int *VUpdateWidthPix,
+ unsigned int *VReadyOffsetPix)
+{
+ double TotalRepeaterDelayTime;
+ TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
+ *VUpdateWidthPix = (unsigned int)(math_ceil2((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0));
+ *VReadyOffsetPix = (unsigned int)(math_ceil2(math_max2(150.0 / Dppclk, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0));
+ *VUpdateOffsetPix = (unsigned int)(math_ceil2(HTotal / 4.0, 1.0));
+ *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
+ *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
+ *Tdmec = HTotal / PixelClock;
+
+ if (DynamicMetadataLinesBeforeActiveRequired == 0) {
+ *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
+ } else {
+ *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
+ }
+ if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
+ *Tdmsks = *Tdmsks / 2;
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired);
+ DML_LOG_VERBOSE("DML::%s: VBlank = %u\n", __func__, VBlank);
+ DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal);
+ DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, PixelClock);
+ DML_LOG_VERBOSE("DML::%s: Dppclk = %f\n", __func__, Dppclk);
+ DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep);
+ DML_LOG_VERBOSE("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters);
+ DML_LOG_VERBOSE("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime);
+
+ DML_LOG_VERBOSE("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix);
+ DML_LOG_VERBOSE("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix);
+ DML_LOG_VERBOSE("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix);
+
+ DML_LOG_VERBOSE("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
+#endif
+}
+
+static double get_urgent_bandwidth_required(
+ struct dml2_core_shared_get_urgent_bandwidth_required_locals *l,
+ const struct dml2_display_cfg *display_cfg,
+ enum dml2_core_internal_soc_state_type state_type,
+ enum dml2_core_internal_bw_type bw_type,
+ bool inc_flip_bw, // including flip bw
+ bool use_qual_row_bw,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int NumberOfDPP[],
+ double dcc_dram_bw_nom_overhead_factor_p0[],
+ double dcc_dram_bw_nom_overhead_factor_p1[],
+ double dcc_dram_bw_pref_overhead_factor_p0[],
+ double dcc_dram_bw_pref_overhead_factor_p1[],
+ double mall_prefetch_sdp_overhead_factor[],
+ double mall_prefetch_dram_overhead_factor[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double PrefetchBandwidthLuma[],
+ double PrefetchBandwidthChroma[],
+ double PrefetchBandwidthMax[],
+ double excess_vactive_fill_bw_l[],
+ double excess_vactive_fill_bw_c[],
+ double cursor_bw[],
+ double dpte_row_bw[],
+ double meta_row_bw[],
+ double prefetch_cursor_bw[],
+ double prefetch_vmrow_bw[],
+ double flip_bw[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[],
+ double UrgentBurstFactorLumaPre[],
+ double UrgentBurstFactorChromaPre[],
+ double UrgentBurstFactorCursorPre[],
+ /* outputs */
+ double surface_required_bw[],
+ double surface_peak_required_bw[])
+{
+ // set inc_flip_bw = 0 for total_dchub_urgent_read_bw_noflip calculation, 1 for total_dchub_urgent_read_bw as described in the MAS
+ // set use_qual_row_bw = 1 to calculate using qualified row bandwidth, used for total_flip_bw calculation
+
+ memset(l, 0, sizeof(struct dml2_core_shared_get_urgent_bandwidth_required_locals));
+
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ l->mall_svp_prefetch_factor = (state_type == dml2_core_internal_soc_state_svp_prefetch) ? (bw_type == dml2_core_internal_bw_dram ? mall_prefetch_dram_overhead_factor[k] : mall_prefetch_sdp_overhead_factor[k]) : 1.0;
+ l->tmp_nom_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor;
+ l->tmp_nom_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor;
+ l->tmp_pref_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor;
+ l->tmp_pref_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor;
+
+ l->adj_factor_p0 = UrgentBurstFactorLuma[k] * l->tmp_nom_adj_factor_p0;
+ l->adj_factor_p1 = UrgentBurstFactorChroma[k] * l->tmp_nom_adj_factor_p1;
+ l->adj_factor_cur = UrgentBurstFactorCursor[k];
+ l->adj_factor_p0_pre = UrgentBurstFactorLumaPre[k] * l->tmp_pref_adj_factor_p0;
+ l->adj_factor_p1_pre = UrgentBurstFactorChromaPre[k] * l->tmp_pref_adj_factor_p1;
+ l->adj_factor_cur_pre = UrgentBurstFactorCursorPre[k];
+
+ bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]);
+ bool exclude_this_plane = false;
+
+ // Exclude phantom pipe in bw calculation for non svp prefetch state
+ if (state_type != dml2_core_internal_soc_state_svp_prefetch && is_phantom)
+ exclude_this_plane = true;
+
+ // The qualified row bandwidth, qual_row_bw, accounts for the regular non-flip row bandwidth when there is no possible immediate flip or HostVM invalidation flip.
+ // The qual_row_bw is zero if HostVM is possible and only non-zero and equal to row_bw(i) if immediate flip is not allowed for that pipe.
+ if (use_qual_row_bw) {
+ if (display_cfg->hostvm_enable)
+ l->per_plane_flip_bw[k] = 0; // qual_row_bw
+ else if (!display_cfg->plane_descriptors[k].immediate_flip)
+ l->per_plane_flip_bw[k] = NumberOfDPP[k] * (dpte_row_bw[k] + meta_row_bw[k]);
+ } else {
+ // the final_flip_bw includes the regular row_bw when immediate flip is disallowed (and no HostVM)
+ if ((!display_cfg->plane_descriptors[k].immediate_flip && !display_cfg->hostvm_enable) || !inc_flip_bw)
+ l->per_plane_flip_bw[k] = NumberOfDPP[k] * (dpte_row_bw[k] + meta_row_bw[k]);
+ else
+ l->per_plane_flip_bw[k] = NumberOfDPP[k] * flip_bw[k];
+ }
+
+ if (!exclude_this_plane) {
+ l->vm_row_bw = NumberOfDPP[k] * prefetch_vmrow_bw[k];
+ l->flip_and_active_bw = l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur;
+ l->flip_and_prefetch_bw = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre;
+ l->flip_and_prefetch_bw_max = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthMax[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre;
+ l->active_and_excess_bw = (ReadBandwidthLuma[k] + excess_vactive_fill_bw_l[k]) * l->tmp_nom_adj_factor_p0 + (ReadBandwidthChroma[k] + excess_vactive_fill_bw_c[k]) * l->tmp_nom_adj_factor_p1 + dpte_row_bw[k] + meta_row_bw[k];
+ surface_required_bw[k] = math_max5(l->vm_row_bw, l->flip_and_active_bw, l->flip_and_prefetch_bw, l->active_and_excess_bw, l->flip_and_prefetch_bw_max);
+
+ /* export peak required bandwidth for the surface */
+ surface_peak_required_bw[k] = math_max2(surface_required_bw[k], surface_peak_required_bw[k]);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%d, max1: vm_row_bw=%f\n", __func__, k, l->vm_row_bw);
+ DML_LOG_VERBOSE("DML::%s: k=%d, max2: flip_and_active_bw=%f\n", __func__, k, l->flip_and_active_bw);
+ DML_LOG_VERBOSE("DML::%s: k=%d, max3: flip_and_prefetch_bw=%f\n", __func__, k, l->flip_and_prefetch_bw);
+ DML_LOG_VERBOSE("DML::%s: k=%d, max4: active_and_excess_bw=%f\n", __func__, k, l->active_and_excess_bw);
+ DML_LOG_VERBOSE("DML::%s: k=%d, surface_required_bw=%f\n", __func__, k, surface_required_bw[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, surface_peak_required_bw=%f\n", __func__, k, surface_peak_required_bw[k]);
+#endif
+ } else {
+ surface_required_bw[k] = 0.0;
+ }
+
+ l->required_bandwidth_mbps += surface_required_bw[k];
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%d, NumberOfDPP=%d\n", __func__, k, NumberOfDPP[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, use_qual_row_bw=%d\n", __func__, k, use_qual_row_bw);
+ DML_LOG_VERBOSE("DML::%s: k=%d, immediate_flip=%d\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip);
+ DML_LOG_VERBOSE("DML::%s: k=%d, mall_svp_prefetch_factor=%f\n", __func__, k, l->mall_svp_prefetch_factor);
+ DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p0=%f\n", __func__, k, l->adj_factor_p0);
+ DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p1=%f\n", __func__, k, l->adj_factor_p1);
+ DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_cur=%f\n", __func__, k, l->adj_factor_cur);
+
+ DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p0_pre=%f\n", __func__, k, l->adj_factor_p0_pre);
+ DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p1_pre=%f\n", __func__, k, l->adj_factor_p1_pre);
+ DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_cur_pre=%f\n", __func__, k, l->adj_factor_cur_pre);
+
+ DML_LOG_VERBOSE("DML::%s: k=%d, per_plane_flip_bw=%f\n", __func__, k, l->per_plane_flip_bw[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_vmrow_bw=%f\n", __func__, k, prefetch_vmrow_bw[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, ReadBandwidthLuma=%f\n", __func__, k, ReadBandwidthLuma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, ReadBandwidthChroma=%f\n", __func__, k, ReadBandwidthChroma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, excess_vactive_fill_bw_l=%f\n", __func__, k, excess_vactive_fill_bw_l[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, excess_vactive_fill_bw_c=%f\n", __func__, k, excess_vactive_fill_bw_c[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, cursor_bw=%f\n", __func__, k, cursor_bw[k]);
+
+ DML_LOG_VERBOSE("DML::%s: k=%d, meta_row_bw=%f\n", __func__, k, meta_row_bw[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, dpte_row_bw=%f\n", __func__, k, dpte_row_bw[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, PrefetchBandwidthLuma=%f\n", __func__, k, PrefetchBandwidthLuma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, PrefetchBandwidthChroma=%f\n", __func__, k, PrefetchBandwidthChroma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_cursor_bw=%f\n", __func__, k, prefetch_cursor_bw[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane);
+ DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), soc_state=%s, inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, dml2_core_internal_soc_state_type_str(state_type), inc_flip_bw, is_phantom, exclude_this_plane);
+ DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane);
+#endif
+ }
+
+ return l->required_bandwidth_mbps;
+}
+
+static void CalculateExtraLatency(
+ const struct dml2_display_cfg *display_cfg,
+ unsigned int ROBBufferSizeInKByte,
+ unsigned int RoundTripPingLatencyCycles,
+ unsigned int ReorderingBytes,
+ double DCFCLK,
+ double FabricClock,
+ unsigned int PixelChunkSizeInKByte,
+ double ReturnBW,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int NumberOfDPP[],
+ unsigned int dpte_group_bytes[],
+ unsigned int tdlut_bytes_per_group[],
+ double HostVMInefficiencyFactor,
+ double HostVMInefficiencyFactorPrefetch,
+ unsigned int HostVMMinPageSize,
+ enum dml2_qos_param_type qos_type,
+ bool max_outstanding_when_urgent_expected,
+ unsigned int max_outstanding_requests,
+ unsigned int request_size_bytes_luma[],
+ unsigned int request_size_bytes_chroma[],
+ unsigned int MetaChunkSize,
+ unsigned int dchub_arb_to_ret_delay,
+ double Ttrip,
+ unsigned int hostvm_mode,
+
+ // output
+ double *ExtraLatency, // Tex
+ double *ExtraLatency_sr, // Tex_sr
+ double *ExtraLatencyPrefetch)
+
+{
+ double Tarb;
+ double Tarb_prefetch;
+ double Tex_trips;
+ unsigned int max_request_size_bytes = 0;
+
+ CalculateTarb(
+ display_cfg,
+ PixelChunkSizeInKByte,
+ NumberOfActiveSurfaces,
+ NumberOfDPP,
+ dpte_group_bytes,
+ tdlut_bytes_per_group,
+ HostVMInefficiencyFactor,
+ HostVMInefficiencyFactorPrefetch,
+ HostVMMinPageSize,
+ ReturnBW,
+ MetaChunkSize,
+ // output
+ &Tarb,
+ &Tarb_prefetch);
+
+ Tex_trips = (display_cfg->hostvm_enable && hostvm_mode == 1) ? (2.0 * Ttrip) : 0.0;
+
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (request_size_bytes_luma[k] > max_request_size_bytes)
+ max_request_size_bytes = request_size_bytes_luma[k];
+ if (request_size_bytes_chroma[k] > max_request_size_bytes)
+ max_request_size_bytes = request_size_bytes_chroma[k];
+ }
+
+ if (qos_type == dml2_qos_param_type_dcn4x) {
+ *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK;
+ *ExtraLatency = *ExtraLatency_sr;
+ if (max_outstanding_when_urgent_expected)
+ *ExtraLatency = *ExtraLatency + (ROBBufferSizeInKByte * 1024 - max_outstanding_requests * max_request_size_bytes) / ReturnBW;
+ } else {
+ *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK + RoundTripPingLatencyCycles / FabricClock + ReorderingBytes / ReturnBW;
+ *ExtraLatency = *ExtraLatency_sr;
+ }
+ *ExtraLatency = *ExtraLatency + Tex_trips;
+ *ExtraLatencyPrefetch = *ExtraLatency + Tarb_prefetch;
+ *ExtraLatency = *ExtraLatency + Tarb;
+ *ExtraLatency_sr = *ExtraLatency_sr + Tarb;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: qos_type=%u\n", __func__, qos_type);
+ DML_LOG_VERBOSE("DML::%s: hostvm_mode=%u\n", __func__, hostvm_mode);
+ DML_LOG_VERBOSE("DML::%s: Tex_trips=%f\n", __func__, Tex_trips);
+ DML_LOG_VERBOSE("DML::%s: max_outstanding_when_urgent_expected=%u\n", __func__, max_outstanding_when_urgent_expected);
+ DML_LOG_VERBOSE("DML::%s: FabricClock=%f\n", __func__, FabricClock);
+ DML_LOG_VERBOSE("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
+ DML_LOG_VERBOSE("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
+ DML_LOG_VERBOSE("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles);
+ DML_LOG_VERBOSE("DML::%s: ReorderingBytes=%u\n", __func__, ReorderingBytes);
+ DML_LOG_VERBOSE("DML::%s: Tarb=%f\n", __func__, Tarb);
+ DML_LOG_VERBOSE("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency);
+ DML_LOG_VERBOSE("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr);
+ DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch=%f\n", __func__, *ExtraLatencyPrefetch);
+#endif
+}
+
+static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_CalculatePrefetchSchedule_params *p)
+{
+ struct dml2_core_calcs_CalculatePrefetchSchedule_locals *s = &scratch->CalculatePrefetchSchedule_locals;
+ bool dcc_mrq_enable;
+
+ unsigned int vm_bytes;
+ unsigned int extra_tdpe_bytes;
+ unsigned int tdlut_row_bytes;
+ unsigned int Lo;
+
+ s->NoTimeToPrefetch = false;
+ s->DPPCycles = 0;
+ s->DISPCLKCycles = 0;
+ s->DSTTotalPixelsAfterScaler = 0.0;
+ s->LineTime = 0.0;
+ s->dst_y_prefetch_equ = 0.0;
+ s->prefetch_bw_oto = 0.0;
+ s->Tvm_oto = 0.0;
+ s->Tr0_oto = 0.0;
+ s->Tvm_oto_lines = 0.0;
+ s->Tr0_oto_lines = 0.0;
+ s->dst_y_prefetch_oto = 0.0;
+ s->TimeForFetchingVM = 0.0;
+ s->TimeForFetchingRowInVBlank = 0.0;
+ s->LinesToRequestPrefetchPixelData = 0.0;
+ s->HostVMDynamicLevelsTrips = 0;
+ s->trip_to_mem = 0.0;
+ *p->Tvm_trips = 0.0;
+ *p->Tr0_trips = 0.0;
+ s->Tvm_trips_rounded = 0.0;
+ s->Tr0_trips_rounded = 0.0;
+ s->max_Tsw = 0.0;
+ s->Lsw_oto = 0.0;
+ *p->Tpre_rounded = 0.0;
+ s->prefetch_bw_equ = 0.0;
+ s->Tvm_equ = 0.0;
+ s->Tr0_equ = 0.0;
+ s->Tdmbf = 0.0;
+ s->Tdmec = 0.0;
+ s->Tdmsks = 0.0;
+ *p->prefetch_sw_bytes = 0.0;
+ s->prefetch_bw_pr = 0.0;
+ s->bytes_pp = 0.0;
+ s->dep_bytes = 0.0;
+ s->min_Lsw_oto = 0.0;
+ s->min_Lsw_equ = 0.0;
+ s->Tsw_est1 = 0.0;
+ s->Tsw_est2 = 0.0;
+ s->Tsw_est3 = 0.0;
+ s->cursor_prefetch_bytes = 0;
+ *p->prefetch_cursor_bw = 0;
+ *p->RequiredPrefetchBWMax = 0.0;
+
+ dcc_mrq_enable = (p->dcc_enable && p->mrq_present);
+
+ s->TWait_p = p->TWait - p->Ttrip; // TWait includes max(Turg, Ttrip) and Ttrip here is already max(Turg, Ttrip)
+
+ if (p->display_cfg->gpuvm_enable == true && p->display_cfg->hostvm_enable == true) {
+ s->HostVMDynamicLevelsTrips = p->display_cfg->hostvm_max_non_cached_page_table_levels;
+ } else {
+ s->HostVMDynamicLevelsTrips = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: dcc_enable = %u\n", __func__, p->dcc_enable);
+ DML_LOG_VERBOSE("DML::%s: mrq_present = %u\n", __func__, p->mrq_present);
+ DML_LOG_VERBOSE("DML::%s: dcc_mrq_enable = %u\n", __func__, dcc_mrq_enable);
+ DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->display_cfg->gpuvm_enable);
+ DML_LOG_VERBOSE("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels);
+ DML_LOG_VERBOSE("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable);
+ DML_LOG_VERBOSE("DML::%s: VStartup = %u\n", __func__, p->VStartup);
+ DML_LOG_VERBOSE("DML::%s: HostVMEnable = %u\n", __func__, p->display_cfg->hostvm_enable);
+ DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
+ DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, p->TWait);
+ DML_LOG_VERBOSE("DML::%s: TWait_p = %f\n", __func__, s->TWait_p);
+ DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, p->Ttrip);
+ DML_LOG_VERBOSE("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk);
+ DML_LOG_VERBOSE("DML::%s: myPipe->Dispclk = %f\n", __func__, p->myPipe->Dispclk);
+#endif
+ CalculateVUpdateAndDynamicMetadataParameters(
+ p->MaxInterDCNTileRepeaters,
+ p->myPipe->Dppclk,
+ p->myPipe->Dispclk,
+ p->myPipe->DCFClkDeepSleep,
+ p->myPipe->PixelClock,
+ p->myPipe->HTotal,
+ p->myPipe->VBlank,
+ p->DynamicMetadataTransmittedBytes,
+ p->DynamicMetadataLinesBeforeActiveRequired,
+ p->myPipe->InterlaceEnable,
+ p->myPipe->ProgressiveToInterlaceUnitInOPP,
+ p->TSetup,
+
+ // Output
+ &s->Tdmbf,
+ &s->Tdmec,
+ &s->Tdmsks,
+ p->VUpdateOffsetPix,
+ p->VUpdateWidthPix,
+ p->VReadyOffsetPix);
+
+ s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock;
+ s->trip_to_mem = p->Ttrip;
+ *p->Tvm_trips = p->ExtraLatencyPrefetch + math_max2(s->trip_to_mem * (p->display_cfg->gpuvm_max_page_table_levels * (s->HostVMDynamicLevelsTrips + 1)), p->Turg);
+ if (dcc_mrq_enable)
+ *p->Tvm_trips_flip = *p->Tvm_trips;
+ else
+ *p->Tvm_trips_flip = *p->Tvm_trips - s->trip_to_mem;
+
+ *p->Tr0_trips_flip = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1);
+ *p->Tr0_trips = math_max2(*p->Tr0_trips_flip, p->tdlut_opt_time / 2);
+
+ if (p->DynamicMetadataVMEnabled == true) {
+ *p->Tdmdl_vm = s->TWait_p + *p->Tvm_trips;
+ *p->Tdmdl = *p->Tdmdl_vm + p->Ttrip;
+ } else {
+ *p->Tdmdl_vm = 0;
+ *p->Tdmdl = s->TWait_p + p->ExtraLatencyPrefetch + p->Ttrip; // Tex
+ }
+
+ if (p->DynamicMetadataEnable == true) {
+ if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) {
+ *p->NotEnoughTimeForDynamicMetadata = true;
+ DML_LOG_VERBOSE("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
+ DML_LOG_VERBOSE("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
+ DML_LOG_VERBOSE("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
+ DML_LOG_VERBOSE("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
+ DML_LOG_VERBOSE("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
+ } else {
+ *p->NotEnoughTimeForDynamicMetadata = false;
+ }
+ } else {
+ *p->NotEnoughTimeForDynamicMetadata = false;
+ }
+
+ if (p->myPipe->ScalerEnabled)
+ s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCL);
+ else
+ s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCLLBOnly);
+
+ s->DPPCycles = (unsigned int)(s->DPPCycles + p->myPipe->NumberOfCursors * p->DPPCLKDelayCNVCCursor);
+
+ s->DISPCLKCycles = (unsigned int)p->DISPCLKDelaySubtotal;
+
+ if (p->myPipe->Dppclk == 0.0 || p->myPipe->Dispclk == 0.0)
+ return true;
+
+ *p->DSTXAfterScaler = (unsigned int)math_round(s->DPPCycles * p->myPipe->PixelClock / p->myPipe->Dppclk + s->DISPCLKCycles * p->myPipe->PixelClock / p->myPipe->Dispclk + p->DSCDelay);
+ *p->DSTXAfterScaler = (unsigned int)math_round(*p->DSTXAfterScaler + (p->myPipe->ODMMode != dml2_odm_mode_bypass ? 18 : 0) + (p->myPipe->DPPPerSurface - 1) * p->DPP_RECOUT_WIDTH +
+ ((p->myPipe->ODMMode == dml2_odm_mode_split_1to2 || p->myPipe->ODMMode == dml2_odm_mode_mso_1to2) ? (double)p->myPipe->HActive / 2.0 : 0) +
+ ((p->myPipe->ODMMode == dml2_odm_mode_mso_1to4) ? (double)p->myPipe->HActive * 3.0 / 4.0 : 0));
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: DynamicMetadataVMEnabled = %u\n", __func__, p->DynamicMetadataVMEnabled);
+ DML_LOG_VERBOSE("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles);
+ DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock);
+ DML_LOG_VERBOSE("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk);
+ DML_LOG_VERBOSE("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles);
+ DML_LOG_VERBOSE("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk);
+ DML_LOG_VERBOSE("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay);
+ DML_LOG_VERBOSE("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode);
+ DML_LOG_VERBOSE("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH);
+ DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler);
+
+ DML_LOG_VERBOSE("DML::%s: setup_for_tdlut = %u\n", __func__, p->setup_for_tdlut);
+ DML_LOG_VERBOSE("DML::%s: tdlut_opt_time = %f\n", __func__, p->tdlut_opt_time);
+ DML_LOG_VERBOSE("DML::%s: tdlut_pte_bytes_per_frame = %u\n", __func__, p->tdlut_pte_bytes_per_frame);
+ DML_LOG_VERBOSE("DML::%s: tdlut_drain_time = %f\n", __func__, p->tdlut_drain_time);
+#endif
+
+ if (p->OutputFormat == dml2_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP))
+ *p->DSTYAfterScaler = 1;
+ else
+ *p->DSTYAfterScaler = 0;
+
+ s->DSTTotalPixelsAfterScaler = *p->DSTYAfterScaler * p->myPipe->HTotal + *p->DSTXAfterScaler;
+ *p->DSTYAfterScaler = (unsigned int)(math_floor2(s->DSTTotalPixelsAfterScaler / p->myPipe->HTotal, 1));
+ *p->DSTXAfterScaler = (unsigned int)(s->DSTTotalPixelsAfterScaler - ((double)(*p->DSTYAfterScaler * p->myPipe->HTotal)));
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler);
+ DML_LOG_VERBOSE("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler);
+#endif
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips);
+ DML_LOG_VERBOSE("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips);
+ DML_LOG_VERBOSE("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem);
+ DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch);
+ DML_LOG_VERBOSE("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels);
+ DML_LOG_VERBOSE("DML::%s: HostVMDynamicLevelsTrips = %u\n", __func__, s->HostVMDynamicLevelsTrips);
+#endif
+ if (p->display_cfg->gpuvm_enable) {
+ s->Tvm_trips_rounded = math_ceil2(4.0 * *p->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
+ *p->Tvm_trips_flip_rounded = math_ceil2(4.0 * *p->Tvm_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime;
+ } else {
+ if (p->DynamicMetadataEnable || dcc_mrq_enable || p->setup_for_tdlut)
+ s->Tvm_trips_rounded = math_max2(s->LineTime * math_ceil2(4.0*math_max3(p->ExtraLatencyPrefetch, p->Turg, s->trip_to_mem)/s->LineTime, 1)/4, s->LineTime/4.0);
+ else
+ s->Tvm_trips_rounded = s->LineTime / 4.0;
+ *p->Tvm_trips_flip_rounded = s->LineTime / 4.0;
+ }
+
+ s->Tvm_trips_rounded = math_max2(s->Tvm_trips_rounded, s->LineTime / 4.0);
+ *p->Tvm_trips_flip_rounded = math_max2(*p->Tvm_trips_flip_rounded, s->LineTime / 4.0);
+
+ if (p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable) {
+ s->Tr0_trips_rounded = math_ceil2(4.0 * *p->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
+ *p->Tr0_trips_flip_rounded = math_ceil2(4.0 * *p->Tr0_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime;
+ } else {
+ s->Tr0_trips_rounded = s->LineTime / 4.0;
+ *p->Tr0_trips_flip_rounded = s->LineTime / 4.0;
+ }
+ s->Tr0_trips_rounded = math_max2(s->Tr0_trips_rounded, s->LineTime / 4.0);
+ *p->Tr0_trips_flip_rounded = math_max2(*p->Tr0_trips_flip_rounded, s->LineTime / 4.0);
+
+ if (p->display_cfg->gpuvm_enable == true) {
+ if (p->display_cfg->gpuvm_max_page_table_levels >= 3) {
+ *p->Tno_bw = p->ExtraLatencyPrefetch + s->trip_to_mem * (double)((p->display_cfg->gpuvm_max_page_table_levels - 2) * (s->HostVMDynamicLevelsTrips + 1));
+ } else if (p->display_cfg->gpuvm_max_page_table_levels == 1 && !dcc_mrq_enable && !p->setup_for_tdlut) {
+ *p->Tno_bw = p->ExtraLatencyPrefetch;
+ } else {
+ *p->Tno_bw = 0;
+ }
+ } else {
+ *p->Tno_bw = 0;
+ }
+
+ if (p->mrq_present || p->display_cfg->gpuvm_max_page_table_levels >= 3)
+ *p->Tno_bw_flip = *p->Tno_bw;
+ else
+ *p->Tno_bw_flip = 0; //because there is no 3DLUT for iFlip
+
+ if (dml_is_420(p->myPipe->SourcePixelFormat)) {
+ s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4.0;
+ } else {
+ s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC;
+ }
+
+ *p->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC;
+ *p->prefetch_sw_bytes = *p->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor;
+
+ vm_bytes = p->vm_bytes; // vm_bytes is dpde0_bytes_per_frame_ub_l + dpde0_bytes_per_frame_ub_c + 2*extra_dpde_bytes;
+ extra_tdpe_bytes = (unsigned int)math_max2(0, (p->display_cfg->gpuvm_max_page_table_levels - 1) * 128);
+
+ if (p->setup_for_tdlut)
+ vm_bytes = vm_bytes + p->tdlut_pte_bytes_per_frame + (p->display_cfg->gpuvm_enable ? extra_tdpe_bytes : 0);
+
+ tdlut_row_bytes = (unsigned long) math_ceil2(p->tdlut_bytes_per_frame/2.0, 1.0);
+
+ s->min_Lsw_oto = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_OTO__;
+ s->min_Lsw_oto = math_max2(s->min_Lsw_oto, p->tdlut_drain_time / s->LineTime);
+ s->min_Lsw_oto = math_max2(s->min_Lsw_oto, 2.0);
+
+ // use vactive swath bw for prefetch oto and also cap prefetch_bw_oto to max_vratio_oto
+ // Note: in prefetch calculation, acounting is done mostly per-pipe.
+ // vactive swath bw represents the per-surface (aka per dml plane) bw to move vratio_l/c lines of bytes_l/c per line time
+ s->per_pipe_vactive_sw_bw = p->vactive_sw_bw_l / (double)p->myPipe->DPPPerSurface;
+
+ // one-to-one prefetch bw as one line of bytes per line time (as per vratio_pre_l/c = 1)
+ s->prefetch_bw_oto = (p->swath_width_luma_ub * p->myPipe->BytePerPixelY) / s->LineTime;
+
+ if (p->myPipe->BytePerPixelC > 0) {
+ s->per_pipe_vactive_sw_bw += p->vactive_sw_bw_c / (double)p->myPipe->DPPPerSurface;
+ s->prefetch_bw_oto += (p->swath_width_chroma_ub * p->myPipe->BytePerPixelC) / s->LineTime;
+ }
+
+ /* oto prefetch bw should be always be less than total vactive bw */
+ //DML_ASSERT(s->prefetch_bw_oto < s->per_pipe_vactive_sw_bw * p->myPipe->DPPPerSurface);
+
+ s->prefetch_bw_oto = math_max2(s->per_pipe_vactive_sw_bw, s->prefetch_bw_oto) * p->mall_prefetch_sdp_overhead_factor;
+
+ s->prefetch_bw_oto = math_min2(s->prefetch_bw_oto, *p->prefetch_sw_bytes/(s->min_Lsw_oto*s->LineTime));
+
+ s->Lsw_oto = math_ceil2(4.0 * *p->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, 1.0) / 4.0;
+
+ s->prefetch_bw_oto = math_max3(s->prefetch_bw_oto,
+ p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw,
+ (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime));
+
+ /* oto bw needs to be outputted even if the oto schedule isn't being used to avoid ms/mp mismatch.
+ * mp will fail if ms decides to use equ schedule and mp decides to use oto schedule
+ * and the required bandwidth increases when going from ms to mp
+ */
+ *p->RequiredPrefetchBWMax = s->prefetch_bw_oto;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_l = %f\n", __func__, p->vactive_sw_bw_l);
+ DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_c = %f\n", __func__, p->vactive_sw_bw_c);
+ DML_LOG_VERBOSE("DML::%s: per_pipe_vactive_sw_bw = %f\n", __func__, s->per_pipe_vactive_sw_bw);
+#endif
+
+ if (p->display_cfg->gpuvm_enable == true) {
+ s->Tvm_oto = math_max3(
+ *p->Tvm_trips,
+ *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto,
+ s->LineTime / 4.0);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: Tvm_oto max0 = %f\n", __func__, *p->Tvm_trips);
+ DML_LOG_VERBOSE("DML::%s: Tvm_oto max1 = %f\n", __func__, *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto);
+ DML_LOG_VERBOSE("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4.0);
+#endif
+ } else {
+ s->Tvm_oto = s->Tvm_trips_rounded;
+ }
+
+ if ((p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable)) {
+ s->Tr0_oto = math_max3(
+ *p->Tr0_trips,
+ (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto,
+ s->LineTime / 4.0);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: Tr0_oto max0 = %f\n", __func__, *p->Tr0_trips);
+ DML_LOG_VERBOSE("DML::%s: Tr0_oto max1 = %f\n", __func__, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto);
+ DML_LOG_VERBOSE("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime / 4);
+#endif
+ } else
+ s->Tr0_oto = s->LineTime / 4.0;
+
+ s->Tvm_oto_lines = math_ceil2(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0;
+ s->Tr0_oto_lines = math_ceil2(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0;
+ s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto;
+
+#ifdef DML_GLOBAL_PREFETCH_CHECK
+ DML_LOG_VERBOSE("DML::%s: impacted_Tpre = %f\n", __func__, p->impacted_dst_y_pre);
+ if (p->impacted_dst_y_pre > 0) {
+ DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto);
+ s->dst_y_prefetch_oto = math_max2(s->dst_y_prefetch_oto, p->impacted_dst_y_pre);
+ DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f (impacted)\n", __func__, s->dst_y_prefetch_oto);
+ }
+#endif
+ *p->Tpre_oto = s->dst_y_prefetch_oto * s->LineTime;
+
+ //To (time for delay after scaler) in line time
+ Lo = (unsigned int)(*p->DSTYAfterScaler + (double)*p->DSTXAfterScaler / (double)p->myPipe->HTotal);
+
+ s->min_Lsw_equ = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_EQU__;
+ s->min_Lsw_equ = math_max2(s->min_Lsw_equ, p->tdlut_drain_time / s->LineTime);
+ s->min_Lsw_equ = math_max2(s->min_Lsw_equ, 2.0);
+ //Tpre_equ in line time
+ if (p->DynamicMetadataVMEnabled && p->DynamicMetadataEnable)
+ s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, *p->Tvm_trips) + s->TWait_p) / s->LineTime - Lo;
+ else
+ s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, p->ExtraLatencyPrefetch) + s->TWait_p) / s->LineTime - Lo;
+
+#ifdef DML_GLOBAL_PREFETCH_CHECK
+ s->dst_y_prefetch_equ_impacted = math_max2(p->impacted_dst_y_pre, s->dst_y_prefetch_equ);
+
+ s->dst_y_prefetch_equ_impacted = math_min2(s->dst_y_prefetch_equ_impacted, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
+
+ if (s->dst_y_prefetch_equ_impacted > s->dst_y_prefetch_equ)
+ s->dst_y_prefetch_equ -= s->dst_y_prefetch_equ_impacted - s->dst_y_prefetch_equ;
+#endif
+
+ s->dst_y_prefetch_equ = math_min2(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal);
+ DML_LOG_VERBOSE("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto);
+ DML_LOG_VERBOSE("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ);
+ DML_LOG_VERBOSE("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw);
+ DML_LOG_VERBOSE("DML::%s: Tno_bw_flip = %f\n", __func__, *p->Tno_bw_flip);
+ DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch);
+ DML_LOG_VERBOSE("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem);
+ DML_LOG_VERBOSE("DML::%s: mall_prefetch_sdp_overhead_factor = %f\n", __func__, p->mall_prefetch_sdp_overhead_factor);
+ DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
+ DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
+ DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
+ DML_LOG_VERBOSE("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC);
+ DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
+ DML_LOG_VERBOSE("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub);
+ DML_LOG_VERBOSE("DML::%s: prefetch_sw_bytes = %f\n", __func__, *p->prefetch_sw_bytes);
+ DML_LOG_VERBOSE("DML::%s: max_Tsw = %f\n", __func__, s->max_Tsw);
+ DML_LOG_VERBOSE("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp);
+ DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, vm_bytes);
+ DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
+ DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
+ DML_LOG_VERBOSE("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips);
+ DML_LOG_VERBOSE("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips);
+ DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip = %f\n", __func__, *p->Tvm_trips_flip);
+ DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip = %f\n", __func__, *p->Tr0_trips_flip);
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw_pr = %f\n", __func__, s->prefetch_bw_pr);
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto);
+ DML_LOG_VERBOSE("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto);
+ DML_LOG_VERBOSE("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto);
+ DML_LOG_VERBOSE("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines);
+ DML_LOG_VERBOSE("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines);
+ DML_LOG_VERBOSE("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto);
+ DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto);
+ DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ);
+ DML_LOG_VERBOSE("DML::%s: tdlut_row_bytes = %d\n", __func__, tdlut_row_bytes);
+ DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %d\n", __func__, p->meta_row_bytes);
+#endif
+ s->dst_y_prefetch_equ = math_floor2(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0;
+ *p->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ);
+ DML_LOG_VERBOSE("DML::%s: LineTime: %f\n", __func__, s->LineTime);
+ DML_LOG_VERBOSE("DML::%s: VStartup: %u\n", __func__, p->VStartup);
+ DML_LOG_VERBOSE("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime);
+ DML_LOG_VERBOSE("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup);
+ DML_LOG_VERBOSE("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc);
+ DML_LOG_VERBOSE("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait);
+ DML_LOG_VERBOSE("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
+ DML_LOG_VERBOSE("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
+ DML_LOG_VERBOSE("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
+ DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, p->TWait);
+ DML_LOG_VERBOSE("DML::%s: TWait_p = %f\n", __func__, s->TWait_p);
+ DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, p->Ttrip);
+ DML_LOG_VERBOSE("DML::%s: Tex = %f\n", __func__, p->ExtraLatencyPrefetch);
+ DML_LOG_VERBOSE("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm);
+ DML_LOG_VERBOSE("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
+ DML_LOG_VERBOSE("DML::%s: TWait_p: %fus\n", __func__, s->TWait_p);
+ DML_LOG_VERBOSE("DML::%s: Ttrip: %fus\n", __func__, p->Ttrip);
+ DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler);
+ DML_LOG_VERBOSE("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler);
+ DML_LOG_VERBOSE("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes*p->HostVMInefficiencyFactor);
+ DML_LOG_VERBOSE("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow*p->HostVMInefficiencyFactor+p->meta_row_bytes+tdlut_row_bytes);
+ DML_LOG_VERBOSE("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
+ DML_LOG_VERBOSE("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, (s->dst_y_prefetch_equ * s->LineTime), *p->Tpre_rounded, (*p->Tpre_rounded - (s->dst_y_prefetch_equ * s->LineTime)));
+ DML_LOG_VERBOSE("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips));
+#endif
+
+ *p->dst_y_per_vm_vblank = 0;
+ *p->dst_y_per_row_vblank = 0;
+ *p->VRatioPrefetchY = 0;
+ *p->VRatioPrefetchC = 0;
+ *p->RequiredPrefetchPixelDataBWLuma = 0;
+
+ // Derive bandwidth by finding how much data to move within the time constraint
+ // Tpre_rounded is Tpre rounding to 2-bit fraction
+ // Tvm_trips_rounded is Tvm_trips ceiling to 1/4 line time
+ // Tr0_trips_rounded is Tr0_trips ceiling to 1/4 line time
+ // So that means prefetch bw calculated can be higher since the total time available for prefetch is less
+ bool min_Lsw_equ_ok = *p->Tpre_rounded >= s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded + s->min_Lsw_equ*s->LineTime;
+ bool tpre_gt_req_latency = true;
+#if 0
+ // Check that Tpre_rounded is big enough if all of the stages of the prefetch are time constrained.
+ // The terms Tvm_trips_rounded and Tr0_trips_rounded represent the min time constraints for the VM and row stages.
+ // Normally, these terms cover the overall time constraint for Tpre >= (Tex + max{Ttrip, Turg}), but if these terms are at their minimum, an explicit check is necessary.
+ tpre_gt_req_latency = *p->Tpre_rounded > (math_max2(p->Turg, s->trip_to_mem) + p->ExtraLatencyPrefetch);
+#endif
+
+ if (s->dst_y_prefetch_equ > 1 && min_Lsw_equ_ok && tpre_gt_req_latency) {
+ s->prefetch_bw1 = 0.;
+ s->prefetch_bw2 = 0.;
+ s->prefetch_bw3 = 0.;
+ s->prefetch_bw4 = 0.;
+
+ // prefetch_bw1: VM + 2*R0 + SW
+ if (*p->Tpre_rounded - *p->Tno_bw > 0) {
+ s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor
+ + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)
+ + *p->prefetch_sw_bytes)
+ / (*p->Tpre_rounded - *p->Tno_bw);
+ s->Tsw_est1 = *p->prefetch_sw_bytes / s->prefetch_bw1;
+ } else
+ s->prefetch_bw1 = 0;
+
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1);
+ if ((s->Tsw_est1 < s->min_Lsw_equ * s->LineTime) && (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) {
+ s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) /
+ (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: vm and 2 rows bytes = %f\n", __func__, (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)));
+ DML_LOG_VERBOSE("DML::%s: Tpre_rounded = %f\n", __func__, *p->Tpre_rounded);
+ DML_LOG_VERBOSE("DML::%s: minus term = %f\n", __func__, s->min_Lsw_equ * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw);
+ DML_LOG_VERBOSE("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ);
+ DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime);
+ DML_LOG_VERBOSE("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw);
+ DML_LOG_VERBOSE("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw));
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1);
+#endif
+ }
+
+ // prefetch_bw2: VM + SW
+ if (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded > 0) {
+ s->prefetch_bw2 = (vm_bytes * p->HostVMInefficiencyFactor + *p->prefetch_sw_bytes) /
+ (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded);
+ s->Tsw_est2 = *p->prefetch_sw_bytes / s->prefetch_bw2;
+ } else
+ s->prefetch_bw2 = 0;
+
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f\n", __func__, s->prefetch_bw2);
+ if ((s->Tsw_est2 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime) > 0)) {
+ s->prefetch_bw2 = vm_bytes * p->HostVMInefficiencyFactor / (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime);
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f (updated)\n", __func__, s->prefetch_bw2);
+ }
+
+ // prefetch_bw3: 2*R0 + SW
+ if (*p->Tpre_rounded - s->Tvm_trips_rounded > 0) {
+ s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + *p->prefetch_sw_bytes) /
+ (*p->Tpre_rounded - s->Tvm_trips_rounded);
+ s->Tsw_est3 = *p->prefetch_sw_bytes / s->prefetch_bw3;
+ } else
+ s->prefetch_bw3 = 0;
+
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3);
+ if ((s->Tsw_est3 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) {
+ s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded);
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3);
+ }
+
+ // prefetch_bw4: SW
+ if (*p->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0)
+ s->prefetch_bw4 = *p->prefetch_sw_bytes / (*p->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded);
+ else
+ s->prefetch_bw4 = 0;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
+ DML_LOG_VERBOSE("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, s->dst_y_prefetch_equ * s->LineTime, *p->Tpre_rounded, (*p->Tpre_rounded - (s->dst_y_prefetch_equ * s->LineTime)));
+ DML_LOG_VERBOSE("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips));
+ DML_LOG_VERBOSE("DML::%s: Tr0_trips=%f Tr0_trips_rounded: %f, delta=%f\n", __func__, *p->Tr0_trips, s->Tr0_trips_rounded, (s->Tr0_trips_rounded - *p->Tr0_trips));
+ DML_LOG_VERBOSE("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1);
+ DML_LOG_VERBOSE("DML::%s: Tsw_est2: %f\n", __func__, s->Tsw_est2);
+ DML_LOG_VERBOSE("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3);
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f (final)\n", __func__, s->prefetch_bw1);
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f (final)\n", __func__, s->prefetch_bw2);
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f (final)\n", __func__, s->prefetch_bw3);
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw4: %f (final)\n", __func__, s->prefetch_bw4);
+#endif
+ {
+ bool Case1OK = false;
+ bool Case2OK = false;
+ bool Case3OK = false;
+
+ // get "equalized" bw among all stages (vm, r0, sw), so based is all 3 stages are just above the latency-based requirement
+ // so it is not too dis-portionally favor a particular stage, next is either r0 more agressive and next is vm more agressive, the worst is all are agressive
+ // vs the latency based number
+
+ // prefetch_bw1: VM + 2*R0 + SW
+ // so prefetch_bw1 will have enough bw to transfer the necessary data within Tpre_rounded - Tno_bw (Tpre is the the worst-case latency based time to fetch the data)
+ // here is to make sure equ bw wont be more agressive than the latency-based requirement.
+ // check vm time >= vm_trips
+ // check r0 time >= r0_trips
+
+ double total_row_bytes = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes);
+
+ DML_LOG_VERBOSE("DML::%s: Tvm_trips_rounded = %f\n", __func__, s->Tvm_trips_rounded);
+ DML_LOG_VERBOSE("DML::%s: Tr0_trips_rounded = %f\n", __func__, s->Tr0_trips_rounded);
+
+ if (s->prefetch_bw1 > 0) {
+ double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw1;
+ double row_transfer_time = total_row_bytes / s->prefetch_bw1;
+ DML_LOG_VERBOSE("DML::%s: Case1: vm_transfer_time = %f\n", __func__, vm_transfer_time);
+ DML_LOG_VERBOSE("DML::%s: Case1: row_transfer_time = %f\n", __func__, row_transfer_time);
+ if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) {
+ Case1OK = true;
+ }
+ }
+
+ // prefetch_bw2: VM + SW
+ // prefetch_bw2 will be enough bw to transfer VM and SW data within (Tpre_rounded - Tr0_trips_rounded - Tno_bw)
+ // check vm time >= vm_trips
+ // check r0 time < r0_trips
+ if (s->prefetch_bw2 > 0) {
+ double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw2;
+ double row_transfer_time = total_row_bytes / s->prefetch_bw2;
+ DML_LOG_VERBOSE("DML::%s: Case2: vm_transfer_time = %f\n", __func__, vm_transfer_time);
+ DML_LOG_VERBOSE("DML::%s: Case2: row_transfer_time = %f\n", __func__, row_transfer_time);
+ if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time < s->Tr0_trips_rounded) {
+ Case2OK = true;
+ }
+ }
+
+ // prefetch_bw3: VM + 2*R0
+ // check vm time < vm_trips
+ // check r0 time >= r0_trips
+ if (s->prefetch_bw3 > 0) {
+ double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw3;
+ double row_transfer_time = total_row_bytes / s->prefetch_bw3;
+ DML_LOG_VERBOSE("DML::%s: Case3: vm_transfer_time = %f\n", __func__, vm_transfer_time);
+ DML_LOG_VERBOSE("DML::%s: Case3: row_transfer_time = %f\n", __func__, row_transfer_time);
+ if (vm_transfer_time < s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) {
+ Case3OK = true;
+ }
+ }
+
+ if (Case1OK) {
+ s->prefetch_bw_equ = s->prefetch_bw1;
+ } else if (Case2OK) {
+ s->prefetch_bw_equ = s->prefetch_bw2;
+ } else if (Case3OK) {
+ s->prefetch_bw_equ = s->prefetch_bw3;
+ } else {
+ s->prefetch_bw_equ = s->prefetch_bw4;
+ }
+
+ s->prefetch_bw_equ = math_max3(s->prefetch_bw_equ,
+ p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw,
+ (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime));
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: Case1OK: %u\n", __func__, Case1OK);
+ DML_LOG_VERBOSE("DML::%s: Case2OK: %u\n", __func__, Case2OK);
+ DML_LOG_VERBOSE("DML::%s: Case3OK: %u\n", __func__, Case3OK);
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ);
+#endif
+
+ if (s->prefetch_bw_equ > 0) {
+ if (p->display_cfg->gpuvm_enable == true) {
+ s->Tvm_equ = math_max3(*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_equ, *p->Tvm_trips, s->LineTime / 4);
+ } else {
+ s->Tvm_equ = s->LineTime / 4;
+ }
+
+ if (p->display_cfg->gpuvm_enable == true || dcc_mrq_enable || p->setup_for_tdlut) {
+ s->Tr0_equ = math_max3((p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_equ, // PixelPTEBytesPerRow is dpte_row_bytes
+ *p->Tr0_trips,
+ s->LineTime / 4);
+ } else {
+ s->Tr0_equ = s->LineTime / 4;
+ }
+ } else {
+ s->Tvm_equ = 0;
+ s->Tr0_equ = 0;
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw_equ equals 0!\n", __func__);
+ }
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: Tvm_equ = %f\n", __func__, s->Tvm_equ);
+ DML_LOG_VERBOSE("DML::%s: Tr0_equ = %f\n", __func__, s->Tr0_equ);
+#endif
+ // Use the more stressful prefetch schedule
+ if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) {
+ *p->dst_y_prefetch = s->dst_y_prefetch_oto;
+ s->TimeForFetchingVM = s->Tvm_oto;
+ s->TimeForFetchingRowInVBlank = s->Tr0_oto;
+
+ *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0;
+ *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: Using oto scheduling for prefetch\n", __func__);
+#endif
+
+ } else {
+ *p->dst_y_prefetch = s->dst_y_prefetch_equ;
+
+ if (s->dst_y_prefetch_equ < s->dst_y_prefetch_equ_impacted)
+ *p->dst_y_prefetch = s->dst_y_prefetch_equ_impacted;
+
+ s->TimeForFetchingVM = s->Tvm_equ;
+ s->TimeForFetchingRowInVBlank = s->Tr0_equ;
+
+ *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0;
+ *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
+
+ /* equ bw should be propagated so a ceiling of the equ bw is accounted for prior to mode programming.
+ * Overall bandwidth may be lower when going from mode support to mode programming but final pixel data
+ * bandwidth may end up higher than what was calculated in mode support.
+ */
+ *p->RequiredPrefetchBWMax = math_max2(s->prefetch_bw_equ, *p->RequiredPrefetchBWMax);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: Using equ bw scheduling for prefetch\n", __func__);
+#endif
+ }
+
+ // Lsw = dst_y_prefetch - (dst_y_per_vm_vblank + 2*dst_y_per_row_vblank)
+ s->LinesToRequestPrefetchPixelData = *p->dst_y_prefetch - *p->dst_y_per_vm_vblank - 2 * *p->dst_y_per_row_vblank; // Lsw
+
+ s->cursor_prefetch_bytes = (unsigned int)math_max2(p->cursor_bytes_per_chunk, 4 * p->cursor_bytes_per_line);
+ *p->prefetch_cursor_bw = p->num_cursors * s->cursor_prefetch_bytes / (s->LinesToRequestPrefetchPixelData * s->LineTime);
+ *p->prefetch_swath_time_us = (s->LinesToRequestPrefetchPixelData * s->LineTime);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: TimeForFetchingVM = %f\n", __func__, s->TimeForFetchingVM);
+ DML_LOG_VERBOSE("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank);
+ DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime);
+ DML_LOG_VERBOSE("DML::%s: dst_y_prefetch = %f\n", __func__, *p->dst_y_prefetch);
+ DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank);
+ DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank);
+ DML_LOG_VERBOSE("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData);
+ DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
+ DML_LOG_VERBOSE("DML::%s: prefetch_swath_time_us = %f\n", __func__, *p->prefetch_swath_time_us);
+
+ DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, p->cursor_bytes_per_chunk);
+ DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_line = %d\n", __func__, p->cursor_bytes_per_line);
+ DML_LOG_VERBOSE("DML::%s: cursor_prefetch_bytes = %d\n", __func__, s->cursor_prefetch_bytes);
+ DML_LOG_VERBOSE("DML::%s: prefetch_cursor_bw = %f\n", __func__, *p->prefetch_cursor_bw);
+#endif
+ DML_ASSERT(*p->dst_y_prefetch < 64);
+
+ unsigned int min_lsw_required = (unsigned int)math_max2(2, p->tdlut_drain_time / s->LineTime);
+ if (s->LinesToRequestPrefetchPixelData >= min_lsw_required && s->prefetch_bw_equ > 0) {
+ *p->VRatioPrefetchY = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData;
+ *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY, 1.0);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
+ DML_LOG_VERBOSE("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY);
+ DML_LOG_VERBOSE("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY);
+#endif
+ if ((p->SwathHeightY > 4) && (p->VInitPreFillY > 3)) {
+ if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillY - 3.0) / 2.0) {
+ *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY,
+ (double)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0));
+ } else {
+ s->NoTimeToPrefetch = true;
+ DML_LOG_VERBOSE("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY);
+ *p->VRatioPrefetchY = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
+ DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
+ DML_LOG_VERBOSE("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY);
+#endif
+ }
+
+ *p->VRatioPrefetchC = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData;
+ *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, 1.0);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
+ DML_LOG_VERBOSE("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC);
+ DML_LOG_VERBOSE("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC);
+#endif
+ if ((p->SwathHeightC > 4) && (p->VInitPreFillC > 3)) {
+ if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillC - 3.0) / 2.0) {
+ *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, (double)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0));
+ } else {
+ s->NoTimeToPrefetch = true;
+ DML_LOG_VERBOSE("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC);
+ *p->VRatioPrefetchC = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
+ DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
+ DML_LOG_VERBOSE("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC);
+#endif
+ }
+
+ *p->RequiredPrefetchPixelDataBWLuma = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelY * p->swath_width_luma_ub / s->LineTime;
+ *p->RequiredPrefetchPixelDataBWChroma = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelC * p->swath_width_chroma_ub / s->LineTime;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
+ DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
+ DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime);
+ DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWLuma);
+ DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWChroma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWChroma);
+#endif
+ } else {
+ s->NoTimeToPrefetch = true;
+ DML_LOG_VERBOSE("DML::%s: No time to prefetch!, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required);
+ DML_LOG_VERBOSE("DML::%s: No time to prefetch!, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ);
+ *p->VRatioPrefetchY = 0;
+ *p->VRatioPrefetchC = 0;
+ *p->RequiredPrefetchPixelDataBWLuma = 0;
+ *p->RequiredPrefetchPixelDataBWChroma = 0;
+ }
+ DML_LOG_VERBOSE("DML: Tpre: %fus - sum of time to request 2 x data pte, swaths\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingVM);
+ DML_LOG_VERBOSE("DML: Tvm: %fus - time to fetch vm\n", s->TimeForFetchingVM);
+ DML_LOG_VERBOSE("DML: Tr0: %fus - time to fetch first row of data pagetables\n", s->TimeForFetchingRowInVBlank);
+ DML_LOG_VERBOSE("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime);
+ DML_LOG_VERBOSE("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime);
+ DML_LOG_VERBOSE("DML: Tvstartup - TSetup - Tcalc - TWait - Tpre - To > 0\n");
+ DML_LOG_VERBOSE("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingVM - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup);
+ DML_LOG_VERBOSE("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow);
+
+ } else {
+ DML_LOG_VERBOSE("DML::%s: No time to prefetch! dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ);
+ DML_LOG_VERBOSE("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n",
+ __func__, min_Lsw_equ_ok, *p->Tpre_rounded, s->Tvm_trips_rounded, 2.0*s->Tr0_trips_rounded, s->min_Lsw_equ*s->LineTime);
+ s->NoTimeToPrefetch = true;
+ s->TimeForFetchingVM = 0;
+ s->TimeForFetchingRowInVBlank = 0;
+ *p->dst_y_per_vm_vblank = 0;
+ *p->dst_y_per_row_vblank = 0;
+ s->LinesToRequestPrefetchPixelData = 0;
+ *p->VRatioPrefetchY = 0;
+ *p->VRatioPrefetchC = 0;
+ *p->RequiredPrefetchPixelDataBWLuma = 0;
+ *p->RequiredPrefetchPixelDataBWChroma = 0;
+ }
+
+ {
+ double prefetch_vm_bw;
+ double prefetch_row_bw;
+
+ if (vm_bytes == 0) {
+ prefetch_vm_bw = 0;
+ } else if (*p->dst_y_per_vm_vblank > 0) {
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
+ DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank);
+ DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime);
+#endif
+ prefetch_vm_bw = vm_bytes * p->HostVMInefficiencyFactor / (*p->dst_y_per_vm_vblank * s->LineTime);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
+#endif
+ } else {
+ prefetch_vm_bw = 0;
+ s->NoTimeToPrefetch = true;
+ DML_LOG_VERBOSE("DML::%s: No time to prefetch!. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank);
+ }
+
+ if (p->PixelPTEBytesPerRow == 0 && tdlut_row_bytes == 0) {
+ prefetch_row_bw = 0;
+ } else if (*p->dst_y_per_row_vblank > 0) {
+ prefetch_row_bw = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + tdlut_row_bytes) / (*p->dst_y_per_row_vblank * s->LineTime);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
+ DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank);
+ DML_LOG_VERBOSE("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
+#endif
+ } else {
+ prefetch_row_bw = 0;
+ s->NoTimeToPrefetch = true;
+ DML_LOG_VERBOSE("DML::%s: No time to prefetch!. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank);
+ }
+
+ *p->prefetch_vmrow_bw = math_max2(prefetch_vm_bw, prefetch_row_bw);
+ }
+
+ if (s->NoTimeToPrefetch) {
+ s->TimeForFetchingVM = 0;
+ s->TimeForFetchingRowInVBlank = 0;
+ *p->dst_y_per_vm_vblank = 0;
+ *p->dst_y_per_row_vblank = 0;
+ *p->dst_y_prefetch = 0;
+ s->LinesToRequestPrefetchPixelData = 0;
+ *p->VRatioPrefetchY = 0;
+ *p->VRatioPrefetchC = 0;
+ *p->RequiredPrefetchPixelDataBWLuma = 0;
+ *p->RequiredPrefetchPixelDataBWChroma = 0;
+ *p->prefetch_vmrow_bw = 0;
+ }
+
+ DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f (final)\n", __func__, *p->dst_y_per_vm_vblank);
+ DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f (final)\n", __func__, *p->dst_y_per_row_vblank);
+ DML_LOG_VERBOSE("DML::%s: prefetch_vmrow_bw = %f (final)\n", __func__, *p->prefetch_vmrow_bw);
+ DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWLuma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWLuma);
+ DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWChroma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWChroma);
+ DML_LOG_VERBOSE("DML::%s: NoTimeToPrefetch=%d\n", __func__, s->NoTimeToPrefetch);
+
+ return s->NoTimeToPrefetch;
+}
+
+static unsigned int get_num_lb_source_lines(unsigned int max_line_buffer_lines,
+ unsigned int line_buffer_size_bits,
+ unsigned int num_pipes,
+ unsigned int vp_width,
+ unsigned int vp_height,
+ double h_ratio,
+ enum dml2_rotation_angle rotation_angle)
+{
+ unsigned int num_lb_source_lines = 0;
+ double lb_bit_per_pixel = 57.0;
+ unsigned recin_width = vp_width/num_pipes;
+
+ if (dml_is_vertical_rotation(rotation_angle))
+ recin_width = vp_height/num_pipes;
+
+ num_lb_source_lines = (unsigned int) math_min2((double) max_line_buffer_lines,
+ math_floor2(line_buffer_size_bits / lb_bit_per_pixel / (recin_width / math_max2(h_ratio, 1.0)), 1.0));
+
+ return num_lb_source_lines;
+}
+
+static unsigned int find_max_impact_plane(unsigned int this_plane_idx, unsigned int num_planes, unsigned int Trpd_dcfclk_cycles[])
+{
+ int max_value = -1;
+ int max_idx = -1;
+ for (unsigned int i = 0; i < num_planes; i++) {
+ if (i != this_plane_idx && (int) Trpd_dcfclk_cycles[i] > max_value) {
+ max_value = Trpd_dcfclk_cycles[i];
+ max_idx = i;
+ }
+ }
+ if (max_idx <= 0) {
+ DML_ASSERT(max_idx >= 0);
+ max_idx = this_plane_idx;
+ }
+
+ return max_idx;
+}
+
+static double calculate_impacted_Tsw(unsigned int exclude_plane_idx, unsigned int num_planes, double *prefetch_swath_bytes, double bw_mbps)
+{
+ double sum = 0.;
+ for (unsigned int i = 0; i < num_planes; i++) {
+ if (i != exclude_plane_idx) {
+ sum += prefetch_swath_bytes[i];
+ }
+ }
+ return sum / bw_mbps;
+}
+
+// a global check against the aggregate effect of the per plane prefetch schedule
+static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core_internal_scratch *scratch,
+ struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *p)
+{
+ struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals *s = &scratch->CheckGlobalPrefetchAdmissibility_locals;
+ unsigned int i, k;
+
+ memset(s, 0, sizeof(struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals));
+
+ *p->recalc_prefetch_schedule = 0;
+ s->prefetch_global_check_passed = 1;
+ // worst case if the rob and cdb is fully hogged
+ s->max_Trpd_dcfclk_cycles = (unsigned int) math_ceil2((p->rob_buffer_size_kbytes*1024 + p->compressed_buffer_size_kbytes*DML_MAX_COMPRESSION_RATIO*1024)/64.0, 1.0);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: num_active_planes = %d\n", __func__, p->num_active_planes);
+ DML_LOG_VERBOSE("DML::%s: rob_buffer_size_kbytes = %d\n", __func__, p->rob_buffer_size_kbytes);
+ DML_LOG_VERBOSE("DML::%s: compressed_buffer_size_kbytes = %d\n", __func__, p->compressed_buffer_size_kbytes);
+ DML_LOG_VERBOSE("DML::%s: estimated_urg_bandwidth_required_mbps = %f\n", __func__, p->estimated_urg_bandwidth_required_mbps);
+ DML_LOG_VERBOSE("DML::%s: estimated_dcfclk_mhz = %f\n", __func__, p->estimated_dcfclk_mhz);
+ DML_LOG_VERBOSE("DML::%s: max_Trpd_dcfclk_cycles = %u\n", __func__, s->max_Trpd_dcfclk_cycles);
+#endif
+
+ // calculate the return impact from each plane, request is 256B per dcfclk
+ for (i = 0; i < p->num_active_planes; i++) {
+ s->src_detile_buf_size_bytes_l[i] = p->detile_buffer_size_bytes_l[i];
+ s->src_detile_buf_size_bytes_c[i] = p->detile_buffer_size_bytes_c[i];
+ s->src_swath_bytes_l[i] = p->full_swath_bytes_l[i];
+ s->src_swath_bytes_c[i] = p->full_swath_bytes_c[i];
+
+ if (p->pixel_format[i] == dml2_420_10) {
+ s->src_detile_buf_size_bytes_l[i] = (unsigned int) (s->src_detile_buf_size_bytes_l[i] * 1.5);
+ s->src_detile_buf_size_bytes_c[i] = (unsigned int) (s->src_detile_buf_size_bytes_c[i] * 1.5);
+ s->src_swath_bytes_l[i] = (unsigned int) (s->src_swath_bytes_l[i] * 1.5);
+ s->src_swath_bytes_c[i] = (unsigned int) (s->src_swath_bytes_c[i] * 1.5);
+ }
+
+ s->burst_bytes_to_fill_det = (unsigned int) (math_floor2(s->src_detile_buf_size_bytes_l[i] / p->chunk_bytes_l, 1) * p->chunk_bytes_l);
+ s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_l[i] / p->swath_height_l[i], 1) * s->src_swath_bytes_l[i]);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: i=%u pixel_format = %d\n", __func__, i, p->pixel_format[i]);
+ DML_LOG_VERBOSE("DML::%s: i=%u chunk_bytes_l = %d\n", __func__, i, p->chunk_bytes_l);
+ DML_LOG_VERBOSE("DML::%s: i=%u lb_source_lines_l = %d\n", __func__, i, p->lb_source_lines_l[i]);
+ DML_LOG_VERBOSE("DML::%s: i=%u src_detile_buf_size_bytes_l=%d\n", __func__, i, s->src_detile_buf_size_bytes_l[i]);
+ DML_LOG_VERBOSE("DML::%s: i=%u src_swath_bytes_l=%d\n", __func__, i, s->src_swath_bytes_l[i]);
+ DML_LOG_VERBOSE("DML::%s: i=%u burst_bytes_to_fill_det=%d (luma)\n", __func__, i, s->burst_bytes_to_fill_det);
+#endif
+
+ if (s->src_swath_bytes_c[i] > 0) { // dual_plane
+ s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(s->src_detile_buf_size_bytes_c[i] / p->chunk_bytes_c, 1) * p->chunk_bytes_c);
+
+ if (p->pixel_format[i] == dml2_422_planar_8 || p->pixel_format[i] == dml2_422_planar_10 || p->pixel_format[i] == dml2_422_planar_12) {
+ s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_c[i] / p->swath_height_c[i], 1) * s->src_swath_bytes_c[i]);
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: i=%u chunk_bytes_c = %d\n", __func__, i, p->chunk_bytes_c);
+ DML_LOG_VERBOSE("DML::%s: i=%u lb_source_lines_c = %d\n", __func__, i, p->lb_source_lines_c[i]);
+ DML_LOG_VERBOSE("DML::%s: i=%u src_detile_buf_size_bytes_c=%d\n", __func__, i, s->src_detile_buf_size_bytes_c[i]);
+ DML_LOG_VERBOSE("DML::%s: i=%u src_swath_bytes_c=%d\n", __func__, i, s->src_swath_bytes_c[i]);
+#endif
+ }
+
+ s->time_to_fill_det_us = (double) s->burst_bytes_to_fill_det / (256 * p->estimated_dcfclk_mhz); // fill time assume full burst at request rate
+ s->accumulated_return_path_dcfclk_cycles[i] = (unsigned int) math_ceil2(((DML_MAX_COMPRESSION_RATIO-1) * 64 * p->estimated_dcfclk_mhz) * s->time_to_fill_det_us / 64.0, 1.0); //for 64B per DCFClk
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: i=%u burst_bytes_to_fill_det=%d\n", __func__, i, s->burst_bytes_to_fill_det);
+ DML_LOG_VERBOSE("DML::%s: i=%u time_to_fill_det_us=%f\n", __func__, i, s->time_to_fill_det_us);
+ DML_LOG_VERBOSE("DML::%s: i=%u accumulated_return_path_dcfclk_cycles=%u\n", __func__, i, s->accumulated_return_path_dcfclk_cycles[i]);
+#endif
+ // clamping to worst case delay which is one which occupy the full rob+cdb
+ if (s->accumulated_return_path_dcfclk_cycles[i] > s->max_Trpd_dcfclk_cycles)
+ s->accumulated_return_path_dcfclk_cycles[i] = s->max_Trpd_dcfclk_cycles;
+ }
+
+ // Figure out the impacted prefetch time for each plane
+ // if impacted_Tre is > equ bw Tpre, we need to fail the prefetch schedule as we need a higher state to support the bw
+ for (i = 0; i < p->num_active_planes; i++) {
+ k = find_max_impact_plane(i, p->num_active_planes, s->accumulated_return_path_dcfclk_cycles); // plane k causes most impact to plane i
+ // the rest of planes (except for k) complete for bw
+ p->impacted_dst_y_pre[i] = s->accumulated_return_path_dcfclk_cycles[k]/p->estimated_dcfclk_mhz;
+ p->impacted_dst_y_pre[i] += calculate_impacted_Tsw(k, p->num_active_planes, p->prefetch_sw_bytes, p->estimated_urg_bandwidth_required_mbps);
+ p->impacted_dst_y_pre[i] = math_ceil2(p->impacted_dst_y_pre[i] / p->line_time[i], 0.25);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: i=%u impacted_Tpre=%f (k=%u)\n", __func__, i, p->impacted_dst_y_pre[i], k);
+#endif
+ }
+
+ if (p->Tpre_rounded != NULL && p->Tpre_oto != NULL) {
+ for (i = 0; i < p->num_active_planes; i++) {
+ if (p->impacted_dst_y_pre[i] > p->dst_y_prefetch[i]) {
+ s->prefetch_global_check_passed = 0;
+ *p->recalc_prefetch_schedule = 1;
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: i=%u Tpre_rounded=%f\n", __func__, i, p->Tpre_rounded[i]);
+ DML_LOG_VERBOSE("DML::%s: i=%u Tpre_oto=%f\n", __func__, i, p->Tpre_oto[i]);
+#endif
+ }
+ } else {
+ // likely a mode programming calls, assume support, and no recalc - not used anyways
+ s->prefetch_global_check_passed = 1;
+ *p->recalc_prefetch_schedule = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: prefetch_global_check_passed=%u\n", __func__, s->prefetch_global_check_passed);
+ DML_LOG_VERBOSE("DML::%s: recalc_prefetch_schedule=%u\n", __func__, *p->recalc_prefetch_schedule);
+#endif
+
+ return s->prefetch_global_check_passed;
+}
+
+static void calculate_peak_bandwidth_required(
+ struct dml2_core_internal_scratch *s,
+ struct dml2_core_calcs_calculate_peak_bandwidth_required_params *p)
+{
+ unsigned int n;
+ unsigned int m;
+
+ struct dml2_core_shared_calculate_peak_bandwidth_required_locals *l = &s->calculate_peak_bandwidth_required_locals;
+
+ memset(l, 0, sizeof(struct dml2_core_shared_calculate_peak_bandwidth_required_locals));
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: inc_flip_bw = %d\n", __func__, p->inc_flip_bw);
+ DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, p->num_active_planes);
+#endif
+
+ for (unsigned int k = 0; k < p->num_active_planes; ++k) {
+ l->unity_array[k] = 1.0;
+ l->zero_array[k] = 0.0;
+ }
+
+ for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
+ for (n = 0; n < dml2_core_internal_bw_max; n++) {
+ get_urgent_bandwidth_required(
+ &s->get_urgent_bandwidth_required_locals,
+ p->display_cfg,
+ m,
+ n,
+ 0, //inc_flip_bw,
+ 0, //use_qual_row_bw
+ p->num_active_planes,
+ p->num_of_dpp,
+ p->dcc_dram_bw_nom_overhead_factor_p0,
+ p->dcc_dram_bw_nom_overhead_factor_p1,
+ p->dcc_dram_bw_pref_overhead_factor_p0,
+ p->dcc_dram_bw_pref_overhead_factor_p1,
+ p->mall_prefetch_sdp_overhead_factor,
+ p->mall_prefetch_dram_overhead_factor,
+ p->surface_read_bandwidth_l,
+ p->surface_read_bandwidth_c,
+ l->zero_array, //PrefetchBandwidthLuma,
+ l->zero_array, //PrefetchBandwidthChroma,
+ l->zero_array, //PrefetchBWMax
+ l->zero_array,
+ l->zero_array,
+ l->zero_array,
+ p->dpte_row_bw,
+ p->meta_row_bw,
+ l->zero_array, //prefetch_cursor_bw,
+ l->zero_array, //prefetch_vmrow_bw,
+ l->zero_array, //flip_bw,
+ l->zero_array,
+ l->zero_array,
+ l->zero_array,
+ l->zero_array,
+ l->zero_array,
+ l->zero_array,
+ p->surface_avg_vactive_required_bw[m][n],
+ p->surface_peak_required_bw[m][n]);
+
+ p->urg_vactive_bandwidth_required[m][n] = get_urgent_bandwidth_required(
+ &s->get_urgent_bandwidth_required_locals,
+ p->display_cfg,
+ m,
+ n,
+ 0, //inc_flip_bw,
+ 0, //use_qual_row_bw
+ p->num_active_planes,
+ p->num_of_dpp,
+ p->dcc_dram_bw_nom_overhead_factor_p0,
+ p->dcc_dram_bw_nom_overhead_factor_p1,
+ p->dcc_dram_bw_pref_overhead_factor_p0,
+ p->dcc_dram_bw_pref_overhead_factor_p1,
+ p->mall_prefetch_sdp_overhead_factor,
+ p->mall_prefetch_dram_overhead_factor,
+ p->surface_read_bandwidth_l,
+ p->surface_read_bandwidth_c,
+ l->zero_array, //PrefetchBandwidthLuma,
+ l->zero_array, //PrefetchBandwidthChroma,
+ l->zero_array, //PrefetchBWMax
+ p->excess_vactive_fill_bw_l,
+ p->excess_vactive_fill_bw_c,
+ p->cursor_bw,
+ p->dpte_row_bw,
+ p->meta_row_bw,
+ l->zero_array, //prefetch_cursor_bw,
+ l->zero_array, //prefetch_vmrow_bw,
+ l->zero_array, //flip_bw,
+ p->urgent_burst_factor_l,
+ p->urgent_burst_factor_c,
+ p->urgent_burst_factor_cursor,
+ p->urgent_burst_factor_prefetch_l,
+ p->urgent_burst_factor_prefetch_c,
+ p->urgent_burst_factor_prefetch_cursor,
+ l->surface_dummy_bw,
+ p->surface_peak_required_bw[m][n]);
+
+ p->urg_bandwidth_required[m][n] = get_urgent_bandwidth_required(
+ &s->get_urgent_bandwidth_required_locals,
+ p->display_cfg,
+ m,
+ n,
+ p->inc_flip_bw,
+ 0, //use_qual_row_bw
+ p->num_active_planes,
+ p->num_of_dpp,
+ p->dcc_dram_bw_nom_overhead_factor_p0,
+ p->dcc_dram_bw_nom_overhead_factor_p1,
+ p->dcc_dram_bw_pref_overhead_factor_p0,
+ p->dcc_dram_bw_pref_overhead_factor_p1,
+ p->mall_prefetch_sdp_overhead_factor,
+ p->mall_prefetch_dram_overhead_factor,
+ p->surface_read_bandwidth_l,
+ p->surface_read_bandwidth_c,
+ p->prefetch_bandwidth_l,
+ p->prefetch_bandwidth_c,
+ p->prefetch_bandwidth_max, // to prevent ms/mp mismatches where mp prefetch bw > ms prefetch bw
+ p->excess_vactive_fill_bw_l,
+ p->excess_vactive_fill_bw_c,
+ p->cursor_bw,
+ p->dpte_row_bw,
+ p->meta_row_bw,
+ p->prefetch_cursor_bw,
+ p->prefetch_vmrow_bw,
+ p->flip_bw,
+ p->urgent_burst_factor_l,
+ p->urgent_burst_factor_c,
+ p->urgent_burst_factor_cursor,
+ p->urgent_burst_factor_prefetch_l,
+ p->urgent_burst_factor_prefetch_c,
+ p->urgent_burst_factor_prefetch_cursor,
+ l->surface_dummy_bw,
+ p->surface_peak_required_bw[m][n]);
+
+ p->urg_bandwidth_required_qual[m][n] = get_urgent_bandwidth_required(
+ &s->get_urgent_bandwidth_required_locals,
+ p->display_cfg,
+ m,
+ n,
+ 0, //inc_flip_bw
+ 1, //use_qual_row_bw
+ p->num_active_planes,
+ p->num_of_dpp,
+ p->dcc_dram_bw_nom_overhead_factor_p0,
+ p->dcc_dram_bw_nom_overhead_factor_p1,
+ p->dcc_dram_bw_pref_overhead_factor_p0,
+ p->dcc_dram_bw_pref_overhead_factor_p1,
+ p->mall_prefetch_sdp_overhead_factor,
+ p->mall_prefetch_dram_overhead_factor,
+ p->surface_read_bandwidth_l,
+ p->surface_read_bandwidth_c,
+ p->prefetch_bandwidth_l,
+ p->prefetch_bandwidth_c,
+ p->prefetch_bandwidth_max, // to prevent ms/mp mismatch where mp prefetch bw > ms prefetch bw
+ p->excess_vactive_fill_bw_l,
+ p->excess_vactive_fill_bw_c,
+ p->cursor_bw,
+ p->dpte_row_bw,
+ p->meta_row_bw,
+ p->prefetch_cursor_bw,
+ p->prefetch_vmrow_bw,
+ p->flip_bw,
+ p->urgent_burst_factor_l,
+ p->urgent_burst_factor_c,
+ p->urgent_burst_factor_cursor,
+ p->urgent_burst_factor_prefetch_l,
+ p->urgent_burst_factor_prefetch_c,
+ p->urgent_burst_factor_prefetch_cursor,
+ l->surface_dummy_bw,
+ p->surface_peak_required_bw[m][n]);
+
+ p->non_urg_bandwidth_required[m][n] = get_urgent_bandwidth_required(
+ &s->get_urgent_bandwidth_required_locals,
+ p->display_cfg,
+ m,
+ n,
+ p->inc_flip_bw,
+ 0, //use_qual_row_bw
+ p->num_active_planes,
+ p->num_of_dpp,
+ p->dcc_dram_bw_nom_overhead_factor_p0,
+ p->dcc_dram_bw_nom_overhead_factor_p1,
+ p->dcc_dram_bw_pref_overhead_factor_p0,
+ p->dcc_dram_bw_pref_overhead_factor_p1,
+ p->mall_prefetch_sdp_overhead_factor,
+ p->mall_prefetch_dram_overhead_factor,
+ p->surface_read_bandwidth_l,
+ p->surface_read_bandwidth_c,
+ p->prefetch_bandwidth_l,
+ p->prefetch_bandwidth_c,
+ p->prefetch_bandwidth_max, // to prevent ms/mp mismatches where mp prefetch bw > ms prefetch bw
+ p->excess_vactive_fill_bw_l,
+ p->excess_vactive_fill_bw_c,
+ p->cursor_bw,
+ p->dpte_row_bw,
+ p->meta_row_bw,
+ p->prefetch_cursor_bw,
+ p->prefetch_vmrow_bw,
+ p->flip_bw,
+ l->unity_array,
+ l->unity_array,
+ l->unity_array,
+ l->unity_array,
+ l->unity_array,
+ l->unity_array,
+ l->surface_dummy_bw,
+ p->surface_peak_required_bw[m][n]);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: urg_vactive_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_vactive_bandwidth_required[m][n]);
+ DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]);
+ DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_qual[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]);
+ DML_LOG_VERBOSE("DML::%s: non_urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->non_urg_bandwidth_required[m][n]);
+#endif
+ DML_ASSERT(p->urg_bandwidth_required[m][n] >= p->non_urg_bandwidth_required[m][n]);
+ }
+ }
+}
+
+static void check_urgent_bandwidth_support(
+ double *frac_urg_bandwidth_nom,
+ double *frac_urg_bandwidth_mall,
+ bool *vactive_bandwidth_support_ok, // vactive ok
+ bool *bandwidth_support_ok,// max of vm, prefetch, vactive all ok
+
+ unsigned int mall_allocated_for_dcn_mbytes,
+ double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
+ double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
+ double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
+ double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])
+{
+ double frac_urg_bandwidth_nom_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
+ double frac_urg_bandwidth_nom_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
+ double frac_urg_bandwidth_mall_sdp;
+ double frac_urg_bandwidth_mall_dram;
+ if (urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] > 0)
+ frac_urg_bandwidth_mall_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
+ else
+ frac_urg_bandwidth_mall_sdp = 0.0;
+ if (urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] > 0)
+ frac_urg_bandwidth_mall_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
+ else
+ frac_urg_bandwidth_mall_dram = 0.0;
+
+ *bandwidth_support_ok = 1;
+ *vactive_bandwidth_support_ok = 1;
+
+ // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp -> FractionOfUrgentBandwidth
+ // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram
+ // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp, svp_prefetch -> FractionOfUrgentBandwidthMALL
+ // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram, svp_prefetch
+
+ *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
+ *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
+
+ if (mall_allocated_for_dcn_mbytes > 0) {
+ *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
+ *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
+ }
+
+ *frac_urg_bandwidth_nom = math_max2(frac_urg_bandwidth_nom_sdp, frac_urg_bandwidth_nom_dram);
+ *frac_urg_bandwidth_mall = math_max2(frac_urg_bandwidth_mall_sdp, frac_urg_bandwidth_mall_dram);
+
+ *bandwidth_support_ok &= (*frac_urg_bandwidth_nom <= 1.0);
+
+ if (mall_allocated_for_dcn_mbytes > 0)
+ *bandwidth_support_ok &= (*frac_urg_bandwidth_mall <= 1.0);
+
+ *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
+ *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
+ if (mall_allocated_for_dcn_mbytes > 0) {
+ *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
+ *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom_sdp = %f\n", __func__, frac_urg_bandwidth_nom_sdp);
+ DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom_dram = %f\n", __func__, frac_urg_bandwidth_nom_dram);
+ DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom = %f\n", __func__, *frac_urg_bandwidth_nom);
+
+ DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall_sdp = %f\n", __func__, frac_urg_bandwidth_mall_sdp);
+ DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall_dram = %f\n", __func__, frac_urg_bandwidth_mall_dram);
+ DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall = %f\n", __func__, *frac_urg_bandwidth_mall);
+ DML_LOG_VERBOSE("DML::%s: bandwidth_support_ok = %d\n", __func__, *bandwidth_support_ok);
+
+ for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) {
+ for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) {
+ DML_LOG_VERBOSE("DML::%s: state:%s bw_type:%s urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n",
+ __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n),
+ urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required[m][n]) ? "<" : ">=", urg_bandwidth_required[m][n]);
+ }
+ }
+#endif
+}
+
+static double get_bandwidth_available_for_immediate_flip(enum dml2_core_internal_soc_state_type eval_state,
+ double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], // no flip
+ double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])
+{
+ double flip_bw_available_mbps;
+ double flip_bw_available_sdp_mbps;
+ double flip_bw_available_dram_mbps;
+
+ flip_bw_available_sdp_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp];
+ flip_bw_available_dram_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram];
+ flip_bw_available_mbps = flip_bw_available_sdp_mbps < flip_bw_available_dram_mbps ? flip_bw_available_sdp_mbps : flip_bw_available_dram_mbps;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state));
+ DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_sdp_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp]);
+ DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_dram_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram]);
+ DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_sdp_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp]);
+ DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_dram_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram]);
+ DML_LOG_VERBOSE("DML::%s: flip_bw_available_sdp_mbps = %f\n", __func__, flip_bw_available_sdp_mbps);
+ DML_LOG_VERBOSE("DML::%s: flip_bw_available_dram_mbps = %f\n", __func__, flip_bw_available_dram_mbps);
+ DML_LOG_VERBOSE("DML::%s: flip_bw_available_mbps = %f\n", __func__, flip_bw_available_mbps);
+#endif
+
+ return flip_bw_available_mbps;
+}
+
+static void calculate_immediate_flip_bandwidth_support(
+ // Output
+ double *frac_urg_bandwidth_flip,
+ bool *flip_bandwidth_support_ok,
+
+ // Input
+ enum dml2_core_internal_soc_state_type eval_state,
+ double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
+ double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
+ double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])
+{
+ double frac_urg_bw_flip_sdp = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_sdp] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp];
+ double frac_urg_bw_flip_dram = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_dram] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram];
+
+ *flip_bandwidth_support_ok = true;
+ for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram
+ *flip_bandwidth_support_ok &= urg_bandwidth_available[eval_state][n] >= urg_bandwidth_required_flip[eval_state][n];
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: n = %s\n", __func__, dml2_core_internal_bw_type_str(n));
+ DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available = %f\n", __func__, urg_bandwidth_available[eval_state][n]);
+ DML_LOG_VERBOSE("DML::%s: non_urg_bandwidth_required_flip = %f\n", __func__, non_urg_bandwidth_required_flip[eval_state][n]);
+ DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_flip = %f\n", __func__, urg_bandwidth_required_flip[eval_state][n]);
+ DML_LOG_VERBOSE("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok);
+#endif
+ DML_ASSERT(urg_bandwidth_required_flip[eval_state][n] >= non_urg_bandwidth_required_flip[eval_state][n]);
+ }
+
+ *frac_urg_bandwidth_flip = (frac_urg_bw_flip_sdp > frac_urg_bw_flip_dram) ? frac_urg_bw_flip_sdp : frac_urg_bw_flip_dram;
+ *flip_bandwidth_support_ok &= (*frac_urg_bandwidth_flip <= 1.0);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state));
+ DML_LOG_VERBOSE("DML::%s: frac_urg_bw_flip_sdp = %f\n", __func__, frac_urg_bw_flip_sdp);
+ DML_LOG_VERBOSE("DML::%s: frac_urg_bw_flip_dram = %f\n", __func__, frac_urg_bw_flip_dram);
+ DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_flip = %f\n", __func__, *frac_urg_bandwidth_flip);
+ DML_LOG_VERBOSE("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok);
+
+ for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) {
+ for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) {
+ DML_LOG_VERBOSE("DML::%s: state:%s bw_type:%s, urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n",
+ __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n),
+ urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required_flip[m][n]) ? "<" : ">=", urg_bandwidth_required_flip[m][n]);
+ }
+ }
+#endif
+}
+
+static void CalculateFlipSchedule(
+ struct dml2_core_internal_scratch *s,
+ bool iflip_enable,
+ bool use_lb_flip_bw,
+ double HostVMInefficiencyFactor,
+ double Tvm_trips_flip,
+ double Tr0_trips_flip,
+ double Tvm_trips_flip_rounded,
+ double Tr0_trips_flip_rounded,
+ bool GPUVMEnable,
+ double vm_bytes, // vm_bytes
+ double DPTEBytesPerRow, // dpte_row_bytes
+ double BandwidthAvailableForImmediateFlip,
+ unsigned int TotImmediateFlipBytes,
+ enum dml2_source_format_class SourcePixelFormat,
+ double LineTime,
+ double VRatio,
+ double VRatioChroma,
+ double Tno_bw_flip,
+ unsigned int dpte_row_height,
+ unsigned int dpte_row_height_chroma,
+ bool use_one_row_for_frame_flip,
+ unsigned int max_flip_time_us,
+ unsigned int max_flip_time_lines,
+ unsigned int per_pipe_flip_bytes,
+ unsigned int meta_row_bytes,
+ unsigned int meta_row_height,
+ unsigned int meta_row_height_chroma,
+ bool dcc_mrq_enable,
+
+ // Output
+ double *dst_y_per_vm_flip,
+ double *dst_y_per_row_flip,
+ double *final_flip_bw,
+ bool *ImmediateFlipSupportedForPipe)
+{
+ struct dml2_core_shared_CalculateFlipSchedule_locals *l = &s->CalculateFlipSchedule_locals;
+
+ l->dual_plane = dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha;
+ l->dpte_row_bytes = DPTEBytesPerRow;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
+ DML_LOG_VERBOSE("DML::%s: ip.max_flip_time_us = %d\n", __func__, max_flip_time_us);
+ DML_LOG_VERBOSE("DML::%s: ip.max_flip_time_lines = %d\n", __func__, max_flip_time_lines);
+ DML_LOG_VERBOSE("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
+ DML_LOG_VERBOSE("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes);
+ DML_LOG_VERBOSE("DML::%s: use_lb_flip_bw = %u\n", __func__, use_lb_flip_bw);
+ DML_LOG_VERBOSE("DML::%s: iflip_enable = %u\n", __func__, iflip_enable);
+ DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
+ DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, LineTime);
+ DML_LOG_VERBOSE("DML::%s: Tno_bw_flip = %f\n", __func__, Tno_bw_flip);
+ DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip = %f\n", __func__, Tvm_trips_flip);
+ DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip = %f\n", __func__, Tr0_trips_flip);
+ DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip_rounded = %f\n", __func__, Tvm_trips_flip_rounded);
+ DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip_rounded = %f\n", __func__, Tr0_trips_flip_rounded);
+ DML_LOG_VERBOSE("DML::%s: vm_bytes = %f\n", __func__, vm_bytes);
+ DML_LOG_VERBOSE("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow);
+ DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %d\n", __func__, meta_row_bytes);
+ DML_LOG_VERBOSE("DML::%s: dpte_row_bytes = %f\n", __func__, l->dpte_row_bytes);
+ DML_LOG_VERBOSE("DML::%s: dpte_row_height = %d\n", __func__, dpte_row_height);
+ DML_LOG_VERBOSE("DML::%s: meta_row_height = %d\n", __func__, meta_row_height);
+ DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio);
+#endif
+
+ if (TotImmediateFlipBytes > 0 && (GPUVMEnable || dcc_mrq_enable)) {
+ if (l->dual_plane) {
+ if (dcc_mrq_enable & GPUVMEnable) {
+ l->min_row_height = math_min2(dpte_row_height, meta_row_height);
+ l->min_row_height_chroma = math_min2(dpte_row_height_chroma, meta_row_height_chroma);
+ } else if (GPUVMEnable) {
+ l->min_row_height = dpte_row_height;
+ l->min_row_height_chroma = dpte_row_height_chroma;
+ } else {
+ l->min_row_height = meta_row_height;
+ l->min_row_height_chroma = meta_row_height_chroma;
+ }
+ l->min_row_time = math_min2(l->min_row_height * LineTime / VRatio, l->min_row_height_chroma * LineTime / VRatioChroma);
+ } else {
+ if (dcc_mrq_enable & GPUVMEnable)
+ l->min_row_height = math_min2(dpte_row_height, meta_row_height);
+ else if (GPUVMEnable)
+ l->min_row_height = dpte_row_height;
+ else
+ l->min_row_height = meta_row_height;
+
+ l->min_row_time = l->min_row_height * LineTime / VRatio;
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: min_row_time = %f\n", __func__, l->min_row_time);
+#endif
+ DML_ASSERT(l->min_row_time > 0);
+
+ if (use_lb_flip_bw) {
+ // For mode check, calculation the flip bw requirement with worst case flip time
+ l->max_flip_time = math_min2(math_min2(l->min_row_time, (double)max_flip_time_lines * LineTime / VRatio),
+ math_max2(Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded, (double)max_flip_time_us));
+
+ //The lower bound on flip bandwidth
+ // Note: The get_urgent_bandwidth_required already consider dpte_row_bw and meta_row_bw in bandwidth calculation, so leave final_flip_bw = 0 if iflip not required
+ l->lb_flip_bw = 0;
+
+ if (iflip_enable) {
+ l->hvm_scaled_vm_bytes = vm_bytes * HostVMInefficiencyFactor;
+ l->num_rows = 2;
+ l->hvm_scaled_row_bytes = (l->num_rows * l->dpte_row_bytes * HostVMInefficiencyFactor + l->num_rows * meta_row_bytes);
+ l->hvm_scaled_vm_row_bytes = l->hvm_scaled_vm_bytes + l->hvm_scaled_row_bytes;
+ l->lb_flip_bw = math_max3(
+ l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip),
+ l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded),
+ l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded));
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: max_flip_time = %f\n", __func__, l->max_flip_time);
+ DML_LOG_VERBOSE("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_bytes);
+ DML_LOG_VERBOSE("DML::%s: total row bytes (%f row, hvm ineff scaled) = %f\n", __func__, l->num_rows, l->hvm_scaled_row_bytes);
+ DML_LOG_VERBOSE("DML::%s: total vm+row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_row_bytes);
+ DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm and row = %f\n", __func__, l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip));
+ DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm = %f\n", __func__, l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded));
+ DML_LOG_VERBOSE("DML::%s: lb_flip_bw for row = %f\n", __func__, l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded));
+
+ if (l->lb_flip_bw > 0) {
+ DML_LOG_VERBOSE("DML::%s: mode_support est Tvm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw);
+ DML_LOG_VERBOSE("DML::%s: mode_support est Tr0_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / l->num_rows);
+ DML_LOG_VERBOSE("DML::%s: mode_support est dst_y_per_vm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw / LineTime);
+ DML_LOG_VERBOSE("DML::%s: mode_support est dst_y_per_row_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / LineTime / l->num_rows);
+ DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip_rounded + 2*Tr0_trips_flip_rounded = %f\n", __func__, (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded));
+ }
+#endif
+ l->lb_flip_bw = math_max3(l->lb_flip_bw,
+ l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip,
+ (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime));
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm reg limit = %f\n", __func__, l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip);
+ DML_LOG_VERBOSE("DML::%s: lb_flip_bw for row reg limit = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime));
+#endif
+ }
+
+ *final_flip_bw = l->lb_flip_bw;
+
+ *dst_y_per_vm_flip = 1; // not used
+ *dst_y_per_row_flip = 1; // not used
+ *ImmediateFlipSupportedForPipe = l->min_row_time >= (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded);
+ } else {
+ if (iflip_enable) {
+ l->ImmediateFlipBW = (double)per_pipe_flip_bytes * BandwidthAvailableForImmediateFlip / (double)TotImmediateFlipBytes; // flip_bw(i)
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: per_pipe_flip_bytes = %d\n", __func__, per_pipe_flip_bytes);
+ DML_LOG_VERBOSE("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
+ DML_LOG_VERBOSE("DML::%s: ImmediateFlipBW = %f\n", __func__, l->ImmediateFlipBW);
+ DML_LOG_VERBOSE("DML::%s: portion of flip bw = %f\n", __func__, (double)per_pipe_flip_bytes / (double)TotImmediateFlipBytes);
+#endif
+ if (l->ImmediateFlipBW == 0) {
+ l->Tvm_flip = 0;
+ l->Tr0_flip = 0;
+ } else {
+ l->Tvm_flip = math_max3(Tvm_trips_flip,
+ Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW,
+ LineTime / 4.0);
+
+ l->Tr0_flip = math_max3(Tr0_trips_flip,
+ (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW,
+ LineTime / 4.0);
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, vm_bytes * HostVMInefficiencyFactor);
+ DML_LOG_VERBOSE("DML::%s: total row bytes (hvm ineff scaled, one row) = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes));
+
+ DML_LOG_VERBOSE("DML::%s: Tvm_flip = %f (bw-based), Tvm_trips_flip = %f (latency-based)\n", __func__, Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW, Tvm_trips_flip);
+ DML_LOG_VERBOSE("DML::%s: Tr0_flip = %f (bw-based), Tr0_trips_flip = %f (latency-based)\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW, Tr0_trips_flip);
+#endif
+ *dst_y_per_vm_flip = math_ceil2(4.0 * (l->Tvm_flip / LineTime), 1.0) / 4.0;
+ *dst_y_per_row_flip = math_ceil2(4.0 * (l->Tr0_flip / LineTime), 1.0) / 4.0;
+
+ *final_flip_bw = math_max2(vm_bytes * HostVMInefficiencyFactor / (*dst_y_per_vm_flip * LineTime),
+ (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (*dst_y_per_row_flip * LineTime));
+
+ if (*dst_y_per_vm_flip >= 32 || *dst_y_per_row_flip >= 16 || l->Tvm_flip + 2 * l->Tr0_flip > l->min_row_time) {
+ *ImmediateFlipSupportedForPipe = false;
+ } else {
+ *ImmediateFlipSupportedForPipe = iflip_enable;
+ }
+ } else {
+ l->Tvm_flip = 0;
+ l->Tr0_flip = 0;
+ *dst_y_per_vm_flip = 0;
+ *dst_y_per_row_flip = 0;
+ *final_flip_bw = 0;
+ *ImmediateFlipSupportedForPipe = iflip_enable;
+ }
+ }
+ } else {
+ l->Tvm_flip = 0;
+ l->Tr0_flip = 0;
+ *dst_y_per_vm_flip = 0;
+ *dst_y_per_row_flip = 0;
+ *final_flip_bw = 0;
+ *ImmediateFlipSupportedForPipe = iflip_enable;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ if (!use_lb_flip_bw) {
+ DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_flip = %f (should be < 32)\n", __func__, *dst_y_per_vm_flip);
+ DML_LOG_VERBOSE("DML::%s: dst_y_per_row_flip = %f (should be < 16)\n", __func__, *dst_y_per_row_flip);
+ DML_LOG_VERBOSE("DML::%s: Tvm_flip = %f (final)\n", __func__, l->Tvm_flip);
+ DML_LOG_VERBOSE("DML::%s: Tr0_flip = %f (final)\n", __func__, l->Tr0_flip);
+ DML_LOG_VERBOSE("DML::%s: Tvm_flip + 2*Tr0_flip = %f (should be <= min_row_time=%f)\n", __func__, l->Tvm_flip + 2 * l->Tr0_flip, l->min_row_time);
+ }
+ DML_LOG_VERBOSE("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw);
+ DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe);
+#endif
+}
+
+static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
+ struct dml2_core_internal_scratch *scratch,
+ struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *p)
+{
+ struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals *s = &scratch->CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals;
+
+ enum dml2_uclk_pstate_change_strategy uclk_pstate_change_strategy;
+ double reserved_vblank_time_us;
+ bool FoundCriticalSurface = false;
+
+ s->TotalActiveWriteback = 0;
+ p->Watermark->UrgentWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark);
+#endif
+
+ p->Watermark->USRRetrainingWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency + p->mmSOCParameters.USRRetrainingLatency + p->mmSOCParameters.SMNLatency;
+ p->Watermark->DRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->Watermark->UrgentWatermark;
+ p->Watermark->FCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->Watermark->UrgentWatermark;
+ p->Watermark->StutterExitWatermark = p->mmSOCParameters.SRExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
+ p->Watermark->StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
+ p->Watermark->Z8StutterExitWatermark = p->mmSOCParameters.SRExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
+ p->Watermark->Z8StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
+ if (p->mmSOCParameters.qos_type == dml2_qos_param_type_dcn4x) {
+ p->Watermark->StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
+ p->Watermark->StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
+ p->Watermark->Z8StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
+ p->Watermark->Z8StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
+ }
+ p->Watermark->temp_read_or_ppt_watermark_us = p->mmSOCParameters.g6_temp_read_blackout_us + p->Watermark->UrgentWatermark;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency);
+ DML_LOG_VERBOSE("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency);
+ DML_LOG_VERBOSE("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency);
+ DML_LOG_VERBOSE("DML::%s: SREnterPlusExitZ8Time = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitZ8Time);
+ DML_LOG_VERBOSE("DML::%s: SREnterPlusExitTime = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitTime);
+ DML_LOG_VERBOSE("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark);
+ DML_LOG_VERBOSE("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark);
+ DML_LOG_VERBOSE("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark);
+ DML_LOG_VERBOSE("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark);
+ DML_LOG_VERBOSE("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark);
+ DML_LOG_VERBOSE("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark);
+ DML_LOG_VERBOSE("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark);
+ DML_LOG_VERBOSE("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark);
+ DML_LOG_VERBOSE("DML::%s: temp_read_or_ppt_watermark_us = %f\n", __func__, p->Watermark->temp_read_or_ppt_watermark_us);
+#endif
+
+ s->TotalActiveWriteback = 0;
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
+ s->TotalActiveWriteback = s->TotalActiveWriteback + 1;
+ }
+ }
+
+ if (s->TotalActiveWriteback <= 1) {
+ p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency;
+ } else {
+ p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK;
+ }
+ if (p->USRRetrainingRequired)
+ p->Watermark->WritebackUrgentWatermark = p->Watermark->WritebackUrgentWatermark + p->mmSOCParameters.USRRetrainingLatency;
+
+ if (s->TotalActiveWriteback <= 1) {
+ p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency;
+ p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency;
+ } else {
+ p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK;
+ p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024 / 32 / p->SOCCLK;
+ }
+
+ if (p->USRRetrainingRequired)
+ p->Watermark->WritebackDRAMClockChangeWatermark = p->Watermark->WritebackDRAMClockChangeWatermark + p->mmSOCParameters.USRRetrainingLatency;
+
+ if (p->USRRetrainingRequired)
+ p->Watermark->WritebackFCLKChangeWatermark = p->Watermark->WritebackFCLKChangeWatermark + p->mmSOCParameters.USRRetrainingLatency;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark);
+ DML_LOG_VERBOSE("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark);
+ DML_LOG_VERBOSE("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark);
+ DML_LOG_VERBOSE("DML::%s: USRRetrainingRequired = %u\n", __func__, p->USRRetrainingRequired);
+ DML_LOG_VERBOSE("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency);
+#endif
+
+ s->TotalPixelBW = 0.0;
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total;
+ double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0;
+ double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
+ double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
+ s->TotalPixelBW = s->TotalPixelBW + p->DPPPerSurface[k]
+ * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio + p->SwathWidthC[k] * p->BytePerPixelDETC[k] * v_ratio_c) / (h_total / pixel_clock_mhz);
+ }
+
+ *p->global_fclk_change_supported = true;
+ *p->global_dram_clock_change_supported = true;
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total;
+ double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0;
+ double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
+ double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
+ double v_taps = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
+ double v_taps_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
+ double h_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio;
+ double h_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio;
+ double LBBitPerPixel = 57;
+
+ s->LBLatencyHidingSourceLinesY[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthY[k] / math_max2(h_ratio, 1.0)), 1)) - (v_taps - 1));
+ s->LBLatencyHidingSourceLinesC[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthC[k] / math_max2(h_ratio_c, 1.0)), 1)) - (v_taps_c - 1));
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, MaxLineBufferLines = %u\n", __func__, k, p->MaxLineBufferLines);
+ DML_LOG_VERBOSE("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize);
+ DML_LOG_VERBOSE("DML::%s: k=%u, LBBitPerPixel = %f\n", __func__, k, LBBitPerPixel);
+ DML_LOG_VERBOSE("DML::%s: k=%u, HRatio = %f\n", __func__, k, h_ratio);
+ DML_LOG_VERBOSE("DML::%s: k=%u, VTaps = %f\n", __func__, k, v_taps);
+#endif
+
+ s->EffectiveLBLatencyHidingY = s->LBLatencyHidingSourceLinesY[k] / v_ratio * (h_total / pixel_clock_mhz);
+ s->EffectiveLBLatencyHidingC = s->LBLatencyHidingSourceLinesC[k] / v_ratio_c * (h_total / pixel_clock_mhz);
+
+ s->EffectiveDETBufferSizeY = p->DETBufferSizeY[k];
+ if (p->UnboundedRequestEnabled) {
+ s->EffectiveDETBufferSizeY = s->EffectiveDETBufferSizeY + p->CompressedBufferSizeInkByte * 1024 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio) / (h_total / pixel_clock_mhz) / s->TotalPixelBW;
+ }
+
+ s->LinesInDETY[k] = (double)s->EffectiveDETBufferSizeY / p->BytePerPixelDETY[k] / p->SwathWidthY[k];
+ s->LinesInDETYRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETY[k], p->SwathHeightY[k]));
+ s->FullDETBufferingTimeY = s->LinesInDETYRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio;
+
+ s->ActiveClockChangeLatencyHidingY = s->EffectiveLBLatencyHidingY + s->FullDETBufferingTimeY - ((double)p->DSTXAfterScaler[k] / h_total + (double)p->DSTYAfterScaler[k]) * h_total / pixel_clock_mhz;
+
+ if (p->NumberOfActiveSurfaces > 1) {
+ s->ActiveClockChangeLatencyHidingY = s->ActiveClockChangeLatencyHidingY - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightY[k] * (double)h_total / pixel_clock_mhz / v_ratio;
+ }
+
+ if (p->BytePerPixelDETC[k] > 0) {
+ s->LinesInDETC[k] = p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k];
+ s->LinesInDETCRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETC[k], p->SwathHeightC[k]));
+ s->FullDETBufferingTimeC = s->LinesInDETCRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio_c;
+ s->ActiveClockChangeLatencyHidingC = s->EffectiveLBLatencyHidingC + s->FullDETBufferingTimeC - ((double)p->DSTXAfterScaler[k] / (double)h_total + (double)p->DSTYAfterScaler[k]) * (double)h_total / pixel_clock_mhz;
+ if (p->NumberOfActiveSurfaces > 1) {
+ s->ActiveClockChangeLatencyHidingC = s->ActiveClockChangeLatencyHidingC - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightC[k] * (double)h_total / pixel_clock_mhz / v_ratio_c;
+ }
+ s->ActiveClockChangeLatencyHiding = math_min2(s->ActiveClockChangeLatencyHidingY, s->ActiveClockChangeLatencyHidingC);
+ } else {
+ s->ActiveClockChangeLatencyHiding = s->ActiveClockChangeLatencyHidingY;
+ }
+
+ s->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->DRAMClockChangeWatermark;
+ s->ActiveFCLKChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->FCLKChangeWatermark;
+ s->USRRetrainingLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->USRRetrainingWatermark;
+ s->g6_temp_read_latency_margin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->temp_read_or_ppt_watermark_us;
+
+ if (p->VActiveLatencyHidingMargin)
+ p->VActiveLatencyHidingMargin[k] = s->ActiveDRAMClockChangeLatencyMargin[k];
+
+ if (p->VActiveLatencyHidingUs)
+ p->VActiveLatencyHidingUs[k] = s->ActiveClockChangeLatencyHiding;
+
+ if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
+ s->WritebackLatencyHiding = (double)p->WritebackInterfaceBufferSize * 1024.0
+ / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height
+ * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width
+ / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height * (double)h_total / pixel_clock_mhz) * 4.0);
+ if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format == dml2_444_64) {
+ s->WritebackLatencyHiding = s->WritebackLatencyHiding / 2;
+ }
+ s->WritebackDRAMClockChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackDRAMClockChangeWatermark;
+
+ s->WritebackFCLKChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackFCLKChangeWatermark;
+
+ s->ActiveDRAMClockChangeLatencyMargin[k] = math_min2(s->ActiveDRAMClockChangeLatencyMargin[k], s->WritebackDRAMClockChangeLatencyMargin);
+ s->ActiveFCLKChangeLatencyMargin[k] = math_min2(s->ActiveFCLKChangeLatencyMargin[k], s->WritebackFCLKChangeLatencyMargin);
+ }
+ p->MaxActiveDRAMClockChangeLatencySupported[k] = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 0 : (s->ActiveDRAMClockChangeLatencyMargin[k] + p->mmSOCParameters.DRAMClockChangeLatency);
+
+ uclk_pstate_change_strategy = p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy;
+ reserved_vblank_time_us = (double)p->display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns / 1000;
+
+ p->FCLKChangeSupport[k] = dml2_pstate_change_unsupported;
+ if (s->ActiveFCLKChangeLatencyMargin[k] > 0)
+ p->FCLKChangeSupport[k] = dml2_pstate_change_vactive;
+ else if (reserved_vblank_time_us >= p->mmSOCParameters.FCLKChangeLatency)
+ p->FCLKChangeSupport[k] = dml2_pstate_change_vblank;
+
+ if (p->FCLKChangeSupport[k] == dml2_pstate_change_unsupported)
+ *p->global_fclk_change_supported = false;
+
+ p->DRAMClockChangeSupport[k] = dml2_pstate_change_unsupported;
+ if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_auto) {
+ if (p->display_cfg->overrides.all_streams_blanked ||
+ (s->ActiveDRAMClockChangeLatencyMargin[k] > 0 && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency))
+ p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank_and_vactive;
+ else if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0)
+ p->DRAMClockChangeSupport[k] = dml2_pstate_change_vactive;
+ else if (reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)
+ p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank;
+ } else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vactive && s->ActiveDRAMClockChangeLatencyMargin[k] > 0)
+ p->DRAMClockChangeSupport[k] = dml2_pstate_change_vactive;
+ else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vblank && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)
+ p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank;
+ else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_drr)
+ p->DRAMClockChangeSupport[k] = dml2_pstate_change_drr;
+ else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_svp)
+ p->DRAMClockChangeSupport[k] = dml2_pstate_change_mall_svp;
+ else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame)
+ p->DRAMClockChangeSupport[k] = dml2_pstate_change_mall_full_frame;
+
+ if (p->DRAMClockChangeSupport[k] == dml2_pstate_change_unsupported)
+ *p->global_dram_clock_change_supported = false;
+
+ s->dst_y_pstate = (unsigned int)(math_ceil2((p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.UrgentLatency) / (h_total / pixel_clock_mhz), 1));
+ s->src_y_pstate_l = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio, p->SwathHeightY[k]));
+ s->src_y_ahead_l = (unsigned int)(math_floor2(p->DETBufferSizeY[k] / p->BytePerPixelDETY[k] / p->SwathWidthY[k], p->SwathHeightY[k]) + s->LBLatencyHidingSourceLinesY[k]);
+ s->sub_vp_lines_l = s->src_y_pstate_l + s->src_y_ahead_l + p->meta_row_height_l[k];
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate);
+ DML_LOG_VERBOSE("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l);
+ DML_LOG_VERBOSE("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l);
+ DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_height_l = %u\n", __func__, k, p->meta_row_height_l[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l);
+#endif
+ p->SubViewportLinesNeededInMALL[k] = s->sub_vp_lines_l;
+
+ if (p->BytePerPixelDETC[k] > 0) {
+ s->src_y_pstate_c = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio_c, p->SwathHeightC[k]));
+ s->src_y_ahead_c = (unsigned int)(math_floor2(p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k], p->SwathHeightC[k]) + s->LBLatencyHidingSourceLinesC[k]);
+ s->sub_vp_lines_c = s->src_y_pstate_c + s->src_y_ahead_c + p->meta_row_height_c[k];
+
+ if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format))
+ p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, 2 * s->sub_vp_lines_c));
+ else
+ p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, s->sub_vp_lines_c));
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, k, p->meta_row_height_c[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c);
+ DML_LOG_VERBOSE("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c);
+ DML_LOG_VERBOSE("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c);
+#endif
+ }
+ }
+
+ *p->g6_temp_read_support = true;
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if ((!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) &&
+ (s->g6_temp_read_latency_margin[k] < 0)) {
+ *p->g6_temp_read_support = false;
+ }
+ }
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if ((!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) && ((!FoundCriticalSurface)
+ || ((s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency) < *p->MaxActiveFCLKChangeLatencySupported))) {
+ FoundCriticalSurface = true;
+ *p->MaxActiveFCLKChangeLatencySupported = s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency;
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->global_dram_clock_change_supported);
+ DML_LOG_VERBOSE("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->global_fclk_change_supported);
+ DML_LOG_VERBOSE("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported);
+ DML_LOG_VERBOSE("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport);
+#endif
+}
+
+static void calculate_bytes_to_fetch_required_to_hide_latency(
+ struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *p)
+{
+ unsigned int dst_lines_to_hide;
+ unsigned int src_lines_to_hide_l;
+ unsigned int src_lines_to_hide_c;
+ unsigned int plane_index;
+ unsigned int stream_index;
+
+ for (plane_index = 0; plane_index < p->num_active_planes; plane_index++) {
+ if (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[plane_index]))
+ continue;
+
+ stream_index = p->display_cfg->plane_descriptors[plane_index].stream_index;
+
+ dst_lines_to_hide = (unsigned int)math_ceil(p->latency_to_hide_us[0] /
+ ((double)p->display_cfg->stream_descriptors[stream_index].timing.h_total /
+ (double)p->display_cfg->stream_descriptors[stream_index].timing.pixel_clock_khz * 1000.0));
+
+ src_lines_to_hide_l = (unsigned int)math_ceil2(p->display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane0.v_ratio * dst_lines_to_hide,
+ p->swath_height_l[plane_index]);
+ p->bytes_required_l[plane_index] = src_lines_to_hide_l * p->num_of_dpp[plane_index] * p->swath_width_l[plane_index] * p->byte_per_pix_l[plane_index];
+
+ src_lines_to_hide_c = (unsigned int)math_ceil2(p->display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane1.v_ratio * dst_lines_to_hide,
+ p->swath_height_c[plane_index]);
+ p->bytes_required_c[plane_index] = src_lines_to_hide_c * p->num_of_dpp[plane_index] * p->swath_width_c[plane_index] * p->byte_per_pix_c[plane_index];
+
+ if (p->display_cfg->plane_descriptors[plane_index].surface.dcc.enable && p->mrq_present) {
+ p->bytes_required_l[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_l / p->meta_row_height_l[plane_index]) * p->meta_row_bytes_per_row_ub_l[plane_index];
+ if (p->meta_row_height_c[plane_index]) {
+ p->bytes_required_c[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_c / p->meta_row_height_c[plane_index]) * p->meta_row_bytes_per_row_ub_c[plane_index];
+ }
+ }
+
+ if (p->display_cfg->gpuvm_enable == true) {
+ p->bytes_required_l[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_l / p->dpte_row_height_l[plane_index]) * p->dpte_bytes_per_row_l[plane_index];
+ if (p->dpte_row_height_c[plane_index]) {
+ p->bytes_required_c[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_c / p->dpte_row_height_c[plane_index]) * p->dpte_bytes_per_row_c[plane_index];
+ }
+ }
+ }
+}
+
+static noinline_for_stack void calculate_vactive_det_fill_latency(
+ const struct dml2_display_cfg *display_cfg,
+ unsigned int num_active_planes,
+ unsigned int bytes_required_l[],
+ unsigned int bytes_required_c[],
+ double dcc_dram_bw_nom_overhead_factor_p0[],
+ double dcc_dram_bw_nom_overhead_factor_p1[],
+ double surface_read_bw_l[],
+ double surface_read_bw_c[],
+ double (*surface_avg_vactive_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES],
+ double (*surface_peak_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES],
+ /* output */
+ double vactive_det_fill_delay_us[])
+{
+ double effective_excess_bandwidth;
+ double effective_excess_bandwidth_l;
+ double effective_excess_bandwidth_c;
+ double adj_factor;
+ unsigned int plane_index;
+ unsigned int soc_state;
+ unsigned int bw_type;
+
+ for (plane_index = 0; plane_index < num_active_planes; plane_index++) {
+ if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_index]))
+ continue;
+
+ vactive_det_fill_delay_us[plane_index] = 0.0;
+ for (soc_state = 0; soc_state < dml2_core_internal_soc_state_max; soc_state++) {
+ for (bw_type = 0; bw_type < dml2_core_internal_bw_max; bw_type++) {
+ effective_excess_bandwidth = (surface_peak_required_bw[soc_state][bw_type][plane_index] - surface_avg_vactive_required_bw[soc_state][bw_type][plane_index]);
+
+ /* luma */
+ adj_factor = bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p0[plane_index] : 1.0;
+
+ effective_excess_bandwidth_l = effective_excess_bandwidth * surface_read_bw_l[plane_index] / (surface_read_bw_l[plane_index] + surface_read_bw_c[plane_index]) / adj_factor;
+ if (effective_excess_bandwidth_l > 0.0) {
+ vactive_det_fill_delay_us[plane_index] = math_max2(vactive_det_fill_delay_us[plane_index], bytes_required_l[plane_index] / effective_excess_bandwidth_l);
+ }
+
+ /* chroma */
+ adj_factor = bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p1[plane_index] : 1.0;
+
+ effective_excess_bandwidth_c = effective_excess_bandwidth * surface_read_bw_c[plane_index] / (surface_read_bw_l[plane_index] + surface_read_bw_c[plane_index]) / adj_factor;
+ if (effective_excess_bandwidth_c > 0.0) {
+ vactive_det_fill_delay_us[plane_index] = math_max2(vactive_det_fill_delay_us[plane_index], bytes_required_c[plane_index] / effective_excess_bandwidth_c);
+ }
+ }
+ }
+ }
+}
+
+static void calculate_excess_vactive_bandwidth_required(
+ const struct dml2_display_cfg *display_cfg,
+ unsigned int num_active_planes,
+ unsigned int bytes_required_l[],
+ unsigned int bytes_required_c[],
+ /* outputs */
+ double excess_vactive_fill_bw_l[],
+ double excess_vactive_fill_bw_c[])
+{
+ unsigned int plane_index;
+
+ for (plane_index = 0; plane_index < num_active_planes; plane_index++) {
+ if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_index]))
+ continue;
+
+ excess_vactive_fill_bw_l[plane_index] = 0.0;
+ excess_vactive_fill_bw_c[plane_index] = 0.0;
+
+ if (display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us[dml2_pstate_type_uclk] > 0) {
+ excess_vactive_fill_bw_l[plane_index] = (double)bytes_required_l[plane_index] / (double)display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us[dml2_pstate_type_uclk];
+ excess_vactive_fill_bw_c[plane_index] = (double)bytes_required_c[plane_index] / (double)display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us[dml2_pstate_type_uclk];
+ }
+ }
+}
+
+static double uclk_khz_to_dram_bw_mbps(unsigned long uclk_khz, const struct dml2_dram_params *dram_config)
+{
+ double bw_mbps = 0;
+ bw_mbps = ((double)uclk_khz * dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0;
+
+ return bw_mbps;
+}
+
+static double dram_bw_kbps_to_uclk_mhz(unsigned long long bw_kbps, const struct dml2_dram_params *dram_config)
+{
+ double uclk_mhz = 0;
+
+ uclk_mhz = (double)bw_kbps / (dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0;
+
+ return uclk_mhz;
+}
+
+static unsigned int get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params)
+{
+ unsigned int i;
+ unsigned int index = 0;
+
+ for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) {
+ DML_LOG_VERBOSE("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %ld\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz);
+
+ if (i == 0)
+ index = 0;
+ else
+ index = i - 1;
+
+ if (uclk_freq_khz < per_uclk_dpm_params[i].minimum_uclk_khz ||
+ per_uclk_dpm_params[i].minimum_uclk_khz == 0) {
+ break;
+ }
+ }
+ DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz);
+ DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, index);
+ return index;
+}
+
+static unsigned int get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table)
+{
+ unsigned int i;
+ bool clk_entry_found = false;
+
+ for (i = 0; i < clk_table->uclk.num_clk_values; i++) {
+ DML_LOG_VERBOSE("DML::%s: clk_table.uclk.clk_values_khz[%d] = %ld\n", __func__, i, clk_table->uclk.clk_values_khz[i]);
+
+ if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) {
+ clk_entry_found = true;
+ break;
+ }
+ }
+
+ if (!clk_entry_found)
+ DML_ASSERT(clk_entry_found);
+#if defined(__DML_VBA_DEBUG__)
+ DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz);
+ DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, i);
+#endif
+ return i;
+}
+
+static unsigned int get_pipe_flip_bytes(
+ double hostvm_inefficiency_factor,
+ unsigned int vm_bytes,
+ unsigned int dpte_row_bytes,
+ unsigned int meta_row_bytes)
+{
+ unsigned int flip_bytes = 0;
+
+ flip_bytes += (unsigned int) ((vm_bytes * hostvm_inefficiency_factor) + 2*meta_row_bytes);
+ flip_bytes += (unsigned int) (2*dpte_row_bytes * hostvm_inefficiency_factor);
+
+ return flip_bytes;
+}
+
+static void calculate_hostvm_inefficiency_factor(
+ double *HostVMInefficiencyFactor,
+ double *HostVMInefficiencyFactorPrefetch,
+
+ bool gpuvm_enable,
+ bool hostvm_enable,
+ unsigned int remote_iommu_outstanding_translations,
+ unsigned int max_outstanding_reqs,
+ double urg_bandwidth_avail_active_pixel_and_vm,
+ double urg_bandwidth_avail_active_vm_only)
+{
+ *HostVMInefficiencyFactor = 1;
+ *HostVMInefficiencyFactorPrefetch = 1;
+
+ if (gpuvm_enable && hostvm_enable) {
+ *HostVMInefficiencyFactor = urg_bandwidth_avail_active_pixel_and_vm / urg_bandwidth_avail_active_vm_only;
+ *HostVMInefficiencyFactorPrefetch = *HostVMInefficiencyFactor;
+
+ if ((*HostVMInefficiencyFactorPrefetch < 4) && (remote_iommu_outstanding_translations < max_outstanding_reqs))
+ *HostVMInefficiencyFactorPrefetch = 4;
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: urg_bandwidth_avail_active_pixel_and_vm = %f\n", __func__, urg_bandwidth_avail_active_pixel_and_vm);
+ DML_LOG_VERBOSE("DML::%s: urg_bandwidth_avail_active_vm_only = %f\n", __func__, urg_bandwidth_avail_active_vm_only);
+ DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, *HostVMInefficiencyFactor);
+ DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactorPrefetch = %f\n", __func__, *HostVMInefficiencyFactorPrefetch);
+#endif
+ }
+}
+
+struct dml2_core_internal_g6_temp_read_blackouts_table {
+ struct {
+ unsigned int uclk_khz;
+ unsigned int blackout_us;
+ } entries[DML_MAX_CLK_TABLE_SIZE];
+};
+
+struct dml2_core_internal_g6_temp_read_blackouts_table core_dcn4_g6_temp_read_blackout_table = {
+ .entries = {
+ {
+ .uclk_khz = 96000,
+ .blackout_us = 23,
+ },
+ {
+ .uclk_khz = 435000,
+ .blackout_us = 10,
+ },
+ {
+ .uclk_khz = 521000,
+ .blackout_us = 10,
+ },
+ {
+ .uclk_khz = 731000,
+ .blackout_us = 8,
+ },
+ {
+ .uclk_khz = 822000,
+ .blackout_us = 8,
+ },
+ {
+ .uclk_khz = 962000,
+ .blackout_us = 5,
+ },
+ {
+ .uclk_khz = 1069000,
+ .blackout_us = 5,
+ },
+ {
+ .uclk_khz = 1187000,
+ .blackout_us = 5,
+ },
+ },
+};
+
+static double get_g6_temp_read_blackout_us(
+ struct dml2_soc_bb *soc,
+ unsigned int uclk_freq_khz,
+ unsigned int min_clk_index)
+{
+ unsigned int i;
+ unsigned int blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[0].blackout_us;
+
+ if (soc->power_management_parameters.g6_temp_read_blackout_us[0] > 0.0) {
+ /* overrides are present in the SoC BB */
+ return soc->power_management_parameters.g6_temp_read_blackout_us[min_clk_index];
+ }
+
+ /* use internal table */
+ blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[0].blackout_us;
+
+ for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) {
+ if (uclk_freq_khz < core_dcn4_g6_temp_read_blackout_table.entries[i].uclk_khz ||
+ core_dcn4_g6_temp_read_blackout_table.entries[i].uclk_khz == 0) {
+ break;
+ }
+
+ blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[i].blackout_us;
+ }
+
+ return (double)blackout_us;
+}
+
+static double get_max_urgent_latency_us(
+ struct dml2_dcn4x_soc_qos_params *dcn4x,
+ double uclk_freq_mhz,
+ double FabricClock,
+ unsigned int min_clk_index)
+{
+ double latency;
+ latency = dcn4x->per_uclk_dpm_params[min_clk_index].maximum_latency_when_urgent_uclk_cycles / uclk_freq_mhz
+ * (1 + dcn4x->umc_max_latency_margin / 100.0)
+ + dcn4x->mall_overhead_fclk_cycles / FabricClock
+ + dcn4x->max_round_trip_to_furthest_cs_fclk_cycles / FabricClock
+ * (1 + dcn4x->fabric_max_transport_latency_margin / 100.0);
+ return latency;
+}
+
+static void calculate_pstate_keepout_dst_lines(
+ const struct dml2_display_cfg *display_cfg,
+ const struct dml2_core_internal_watermarks *watermarks,
+ unsigned int pstate_keepout_dst_lines[])
+{
+ const struct dml2_stream_parameters *stream_descriptor;
+ unsigned int i;
+
+ for (i = 0; i < display_cfg->num_planes; i++) {
+ if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[i])) {
+ stream_descriptor = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[i].stream_index];
+
+ pstate_keepout_dst_lines[i] =
+ (unsigned int)math_ceil(watermarks->DRAMClockChangeWatermark / ((double)stream_descriptor->timing.h_total * 1000.0 / (double)stream_descriptor->timing.pixel_clock_khz));
+
+ if (pstate_keepout_dst_lines[i] > stream_descriptor->timing.v_total - 1) {
+ pstate_keepout_dst_lines[i] = stream_descriptor->timing.v_total - 1;
+ }
+ }
+ }
+}
+
+static noinline_for_stack void dml_core_ms_prefetch_check(struct dml2_core_internal_display_mode_lib *mode_lib,
+ const struct dml2_display_cfg *display_cfg)
+{
+ struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals;
+ struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params;
+ struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
+ struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params;
+#ifdef DML_GLOBAL_PREFETCH_CHECK
+ struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params;
+#endif
+ struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
+
+ double min_return_bw_for_latency;
+ unsigned int k;
+
+ mode_lib->ms.TimeCalc = 24 / mode_lib->ms.dcfclk_deepsleep;
+
+ calculate_hostvm_inefficiency_factor(
+ &s->HostVMInefficiencyFactor,
+ &s->HostVMInefficiencyFactorPrefetch,
+
+ display_cfg->gpuvm_enable,
+ display_cfg->hostvm_enable,
+ mode_lib->ip.remote_iommu_outstanding_translations,
+ mode_lib->soc.max_outstanding_reqs,
+ mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active],
+ mode_lib->ms.support.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]);
+
+ mode_lib->ms.Total3dlutActive = 0;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut)
+ mode_lib->ms.Total3dlutActive = mode_lib->ms.Total3dlutActive + 1;
+
+ // Calculate tdlut schedule related terms
+ calculate_tdlut_setting_params->dispclk_mhz = mode_lib->ms.RequiredDISPCLK;
+ calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
+ calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode;
+ calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode;
+ calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size;
+ calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
+ calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
+ calculate_tdlut_setting_params->tdlut_mpc_width_flag = display_cfg->plane_descriptors[k].tdlut.tdlut_mpc_width_flag;
+ calculate_tdlut_setting_params->is_gfx11 = dml_get_gfx_version(display_cfg->plane_descriptors[k].surface.tiling);
+
+ // output
+ calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k];
+ calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k];
+ calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k];
+ calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k];
+ calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k];
+ calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k];
+ calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k];
+
+ calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params);
+ }
+
+ min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active];
+
+ if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3)
+ s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes));
+
+ CalculateExtraLatency(
+ display_cfg,
+ mode_lib->ip.rob_buffer_size_kbytes,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles,
+ s->ReorderingBytes,
+ mode_lib->ms.DCFCLK,
+ mode_lib->ms.FabricClock,
+ mode_lib->ip.pixel_chunk_size_kbytes,
+ min_return_bw_for_latency,
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.NoOfDPP,
+ mode_lib->ms.dpte_group_bytes,
+ s->tdlut_bytes_per_group,
+ s->HostVMInefficiencyFactor,
+ s->HostVMInefficiencyFactorPrefetch,
+ mode_lib->soc.hostvm_min_page_size_kbytes,
+ mode_lib->soc.qos_parameters.qos_type,
+ !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable),
+ mode_lib->soc.max_outstanding_reqs,
+ mode_lib->ms.support.request_size_bytes_luma,
+ mode_lib->ms.support.request_size_bytes_chroma,
+ mode_lib->ip.meta_chunk_size_kbytes,
+ mode_lib->ip.dchub_arb_to_ret_delay,
+ mode_lib->ms.TripToMemory,
+ mode_lib->ip.hostvm_mode,
+
+ // output
+ &mode_lib->ms.ExtraLatency,
+ &mode_lib->ms.ExtraLatency_sr,
+ &mode_lib->ms.ExtraLatencyPrefetch);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++)
+ s->impacted_dst_y_pre[k] = 0;
+
+ s->recalc_prefetch_schedule = 0;
+ s->recalc_prefetch_done = 0;
+ do {
+ mode_lib->ms.support.PrefetchSupported = true;
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+ s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format;
+
+ s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
+ mode_lib->ms.NoOfDPP[k],
+ display_cfg->plane_descriptors[k].composition.viewport.plane0.width,
+ display_cfg->plane_descriptors[k].composition.viewport.plane0.height,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
+ display_cfg->plane_descriptors[k].composition.rotation_angle);
+
+ s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
+ mode_lib->ms.NoOfDPP[k],
+ display_cfg->plane_descriptors[k].composition.viewport.plane1.width,
+ display_cfg->plane_descriptors[k].composition.viewport.plane1.height,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
+ display_cfg->plane_descriptors[k].composition.rotation_angle);
+
+ struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe;
+
+ mode_lib->ms.TWait[k] = CalculateTWait(
+ display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns,
+ mode_lib->ms.UrgLatency,
+ mode_lib->ms.TripToMemory,
+ !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ?
+ get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), mode_lib->ms.state_idx) : 0.0);
+
+ myPipe->Dppclk = mode_lib->ms.RequiredDPPCLK[k];
+ myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK;
+ myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+ myPipe->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep;
+ myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[k];
+ myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled;
+ myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
+ myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
+ myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
+ myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
+ myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
+ myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored;
+ myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
+ myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
+ myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
+ myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
+ myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
+ myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors;
+ myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active;
+ myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
+ myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active;
+ myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
+ myPipe->ODMMode = mode_lib->ms.ODMMode[k];
+ myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
+ myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
+ myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
+ myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k);
+ DML_LOG_VERBOSE("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[k]);
+#endif
+ CalculatePrefetchSchedule_params->display_cfg = display_cfg;
+ CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch;
+ CalculatePrefetchSchedule_params->myPipe = myPipe;
+ CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelay[k];
+ CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter;
+ CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl;
+ CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only;
+ CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor;
+ CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal;
+ CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->ms.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
+ CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format;
+ CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters;
+ CalculatePrefetchSchedule_params->VStartup = s->MaximumVStartup[k];
+ CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
+ CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable;
+ CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled;
+ CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required;
+ CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes;
+ CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency;
+ CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->ms.ExtraLatencyPrefetch;
+ CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc;
+ CalculatePrefetchSchedule_params->vm_bytes = mode_lib->ms.vm_bytes[k];
+ CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[k];
+ CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[k];
+ CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k];
+ CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY[k];
+ CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[k];
+ CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k];
+ CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC[k];
+ CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub[k];
+ CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub[k];
+ CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightY[k];
+ CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightC[k];
+ CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait[k];
+ CalculatePrefetchSchedule_params->Ttrip = mode_lib->ms.TripToMemory;
+ CalculatePrefetchSchedule_params->Turg = mode_lib->ms.UrgLatency;
+ CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
+ CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k];
+ CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k];
+ CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k];
+ CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k];
+ CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0);
+ CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k];
+ CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k];
+ CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
+ CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present;
+ CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->ms.meta_row_bytes[k];
+ CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor[k];
+ CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k];
+ CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->ms.vactive_sw_bw_l[k];
+ CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->ms.vactive_sw_bw_c[k];
+
+ // output
+ CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k];
+ CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k];
+ CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->ms.dst_y_prefetch[k];
+ CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->ms.LinesForVM[k];
+ CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->ms.LinesForDPTERow[k];
+ CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[k];
+ CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[k];
+ CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; // prefetch_sw_bw_l
+ CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; // prefetch_sw_bw_c
+ CalculatePrefetchSchedule_params->RequiredPrefetchBWMax = &mode_lib->ms.RequiredPrefetchBWMax[k];
+ CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.NoTimeForDynamicMetadata[k];
+ CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k];
+ CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->ms.Tno_bw_flip[k];
+ CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k];
+ CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0];
+ CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1];
+ CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2];
+ CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k];
+ CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k];
+ CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k];
+ CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k];
+ CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k];
+ CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k];
+ CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0];
+ CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1];
+ CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2];
+ CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->ms.prefetch_cursor_bw[k];
+ CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k];
+ CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k];
+ CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k];
+ CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->prefetch_swath_time_us[k];
+
+ mode_lib->ms.NoTimeForPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params);
+
+ mode_lib->ms.support.PrefetchSupported &= !mode_lib->ms.NoTimeForPrefetch[k];
+ DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_per_vm_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_vm_vblank);
+ DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_per_row_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_row_vblank);
+ } // for k num_planes
+
+ CalculateDCFCLKDeepSleepTdlut(
+ display_cfg,
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.BytePerPixelY,
+ mode_lib->ms.BytePerPixelC,
+ mode_lib->ms.SwathWidthY,
+ mode_lib->ms.SwathWidthC,
+ mode_lib->ms.NoOfDPP,
+ mode_lib->ms.PSCL_FACTOR,
+ mode_lib->ms.PSCL_FACTOR_CHROMA,
+ mode_lib->ms.RequiredDPPCLK,
+ mode_lib->ms.vactive_sw_bw_l,
+ mode_lib->ms.vactive_sw_bw_c,
+ mode_lib->soc.return_bus_width_bytes,
+ mode_lib->ms.RequiredDISPCLK,
+ s->tdlut_bytes_to_deliver,
+ s->prefetch_swath_time_us,
+
+ /* Output */
+ &mode_lib->ms.dcfclk_deepsleep);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ if (mode_lib->ms.dst_y_prefetch[k] < 2.0
+ || mode_lib->ms.LinesForVM[k] >= 32.0
+ || mode_lib->ms.LinesForDPTERow[k] >= 16.0
+ || mode_lib->ms.NoTimeForPrefetch[k] == true
+ || s->DSTYAfterScaler[k] > 8) {
+ mode_lib->ms.support.PrefetchSupported = false;
+ DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_prefetch=%f (should not be < 2)\n", __func__, k, mode_lib->ms.dst_y_prefetch[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, LinesForVM=%f (should not be >= 32)\n", __func__, k, mode_lib->ms.LinesForVM[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, LinesForDPTERow=%f (should not be >= 16)\n", __func__, k, mode_lib->ms.LinesForDPTERow[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, DSTYAfterScaler=%d (should be <= 8)\n", __func__, k, s->DSTYAfterScaler[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]);
+ }
+ }
+
+ mode_lib->ms.support.DynamicMetadataSupported = true;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.NoTimeForDynamicMetadata[k] == true) {
+ mode_lib->ms.support.DynamicMetadataSupported = false;
+ }
+ }
+
+ mode_lib->ms.support.VRatioInPrefetchSupported = true;
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ ||
+ mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) {
+ mode_lib->ms.support.VRatioInPrefetchSupported = false;
+ DML_LOG_VERBOSE("DML::%s: k=%d VRatioPreY = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreY[k], __DML2_CALCS_MAX_VRATIO_PRE__);
+ DML_LOG_VERBOSE("DML::%s: k=%d VRatioPreC = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreC[k], __DML2_CALCS_MAX_VRATIO_PRE__);
+ DML_LOG_VERBOSE("DML::%s: VRatioInPrefetchSupported = %u\n", __func__, mode_lib->ms.support.VRatioInPrefetchSupported);
+ }
+ }
+
+ mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.VRatioInPrefetchSupported;
+
+ // By default, do not recalc prefetch schedule
+ s->recalc_prefetch_schedule = 0;
+
+ // Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok
+ if (mode_lib->ms.support.PrefetchSupported) {
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ // Calculate Urgent burst factor for prefetch
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%d, Calling CalculateUrgentBurstFactor (for prefetch)\n", __func__, k);
+ DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPreY=%f\n", __func__, k, mode_lib->ms.VRatioPreY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPreC=%f\n", __func__, k, mode_lib->ms.VRatioPreC[k]);
+#endif
+ CalculateUrgentBurstFactor(
+ &display_cfg->plane_descriptors[k],
+ mode_lib->ms.swath_width_luma_ub[k],
+ mode_lib->ms.swath_width_chroma_ub[k],
+ mode_lib->ms.SwathHeightY[k],
+ mode_lib->ms.SwathHeightC[k],
+ s->line_times[k],
+ mode_lib->ms.UrgLatency,
+ mode_lib->ms.VRatioPreY[k],
+ mode_lib->ms.VRatioPreC[k],
+ mode_lib->ms.BytePerPixelInDETY[k],
+ mode_lib->ms.BytePerPixelInDETC[k],
+ mode_lib->ms.DETBufferSizeY[k],
+ mode_lib->ms.DETBufferSizeC[k],
+ /* Output */
+ &mode_lib->ms.UrgentBurstFactorLumaPre[k],
+ &mode_lib->ms.UrgentBurstFactorChromaPre[k],
+ &mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]);
+ }
+
+ // Calculate urgent bandwidth required, both urg and non urg peak bandwidth
+ // assume flip bw is 0 at this point
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++)
+ mode_lib->ms.final_flip_bw[k] = 0;
+
+ calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = mode_lib->ms.support.urg_vactive_bandwidth_required;
+ calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required;
+ calculate_peak_bandwidth_params->urg_bandwidth_required_qual = mode_lib->ms.support.urg_bandwidth_required_qual;
+ calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required;
+ calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = mode_lib->ms.surface_avg_vactive_required_bw;
+ calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw;
+
+ calculate_peak_bandwidth_params->display_cfg = display_cfg;
+ calculate_peak_bandwidth_params->inc_flip_bw = 0;
+ calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes;
+ calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP;
+ calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0;
+ calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1;
+ calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0;
+ calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1;
+ calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor;
+ calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor;
+
+ calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l;
+ calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c;
+ calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma;
+ calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma;
+ calculate_peak_bandwidth_params->prefetch_bandwidth_max = mode_lib->ms.RequiredPrefetchBWMax;
+ calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l;
+ calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c;
+ calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw;
+ calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
+ calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw;
+ calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw;
+ calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw;
+ calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw;
+ calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma;
+ calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma;
+ calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor;
+ calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre;
+ calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre;
+ calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre;
+
+ calculate_peak_bandwidth_required(
+ &mode_lib->scratch,
+ calculate_peak_bandwidth_params);
+
+ // Check urg peak bandwidth against available urg bw
+ // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active)
+ check_urgent_bandwidth_support(
+ &s->dummy_single[0], // double* frac_urg_bandwidth
+ &s->dummy_single[1], // double* frac_urg_bandwidth_mall
+ &mode_lib->ms.support.UrgVactiveBandwidthSupport,
+ &mode_lib->ms.support.PrefetchBandwidthSupported,
+
+ mode_lib->soc.mall_allocated_for_dcn_mbytes,
+ mode_lib->ms.support.non_urg_bandwidth_required,
+ mode_lib->ms.support.urg_vactive_bandwidth_required,
+ mode_lib->ms.support.urg_bandwidth_required,
+ mode_lib->ms.support.urg_bandwidth_available);
+
+ mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.PrefetchBandwidthSupported;
+ DML_LOG_VERBOSE("DML::%s: PrefetchBandwidthSupported=%0d\n", __func__, mode_lib->ms.support.PrefetchBandwidthSupported);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ if (mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]) {
+ mode_lib->ms.support.PrefetchSupported = false;
+ DML_LOG_VERBOSE("DML::%s: k=%d, NotEnoughUrgentLatencyHidingPre=%d\n", __func__, k, mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]);
+ }
+ }
+
+#ifdef DML_GLOBAL_PREFETCH_CHECK
+ if (mode_lib->ms.support.PrefetchSupported && mode_lib->ms.num_active_planes > 1 && s->recalc_prefetch_done == 0) {
+ CheckGlobalPrefetchAdmissibility_params->num_active_planes = mode_lib->ms.num_active_planes;
+ CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format;
+ CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
+ CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
+ CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l;
+ CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c;
+ CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->ms.SwathHeightY;
+ CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->ms.SwathHeightC;
+ CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
+ CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->ms.CompressedBufferSizeInkByte;
+ CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->ms.DETBufferSizeY;
+ CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->ms.DETBufferSizeC;
+ CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l;
+ CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c;
+ CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes;
+ CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = s->Tpre_rounded;
+ CheckGlobalPrefetchAdmissibility_params->Tpre_oto = s->Tpre_oto;
+ CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->ms.support.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
+ CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times;
+ CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->ms.dst_y_prefetch;
+ if (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps < 10 * 1024)
+ CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = 10 * 1024;
+
+ CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps / (double) mode_lib->soc.return_bus_width_bytes) /
+ ((double)mode_lib->soc.qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0);
+
+ // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible
+ CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->recalc_prefetch_schedule;
+ CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre;
+ mode_lib->ms.support.PrefetchSupported = CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params);
+ s->recalc_prefetch_done = 1;
+ s->recalc_prefetch_schedule = 1;
+ }
+#endif
+ } // prefetch schedule ok, do urg bw and flip schedule
+ } while (s->recalc_prefetch_schedule);
+
+ // Flip Schedule
+ // Both prefetch schedule and BW okay
+ if (mode_lib->ms.support.PrefetchSupported == true) {
+ mode_lib->ms.BandwidthAvailableForImmediateFlip =
+ get_bandwidth_available_for_immediate_flip(
+ dml2_core_internal_soc_state_sys_active,
+ mode_lib->ms.support.urg_bandwidth_required_qual, // no flip
+ mode_lib->ms.support.urg_bandwidth_available);
+
+ mode_lib->ms.TotImmediateFlipBytes = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ if (display_cfg->plane_descriptors[k].immediate_flip) {
+ s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes(
+ s->HostVMInefficiencyFactor,
+ mode_lib->ms.vm_bytes[k],
+ mode_lib->ms.DPTEBytesPerRow[k],
+ mode_lib->ms.meta_row_bytes[k]);
+ } else {
+ s->per_pipe_flip_bytes[k] = 0;
+ }
+ mode_lib->ms.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->ms.NoOfDPP[k];
+
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ CalculateFlipSchedule(
+ &mode_lib->scratch,
+ display_cfg->plane_descriptors[k].immediate_flip,
+ 1, // use_lb_flip_bw
+ s->HostVMInefficiencyFactor,
+ s->Tvm_trips_flip[k],
+ s->Tr0_trips_flip[k],
+ s->Tvm_trips_flip_rounded[k],
+ s->Tr0_trips_flip_rounded[k],
+ display_cfg->gpuvm_enable,
+ mode_lib->ms.vm_bytes[k],
+ mode_lib->ms.DPTEBytesPerRow[k],
+ mode_lib->ms.BandwidthAvailableForImmediateFlip,
+ mode_lib->ms.TotImmediateFlipBytes,
+ display_cfg->plane_descriptors[k].pixel_format,
+ (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)),
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
+ mode_lib->ms.Tno_bw_flip[k],
+ mode_lib->ms.dpte_row_height[k],
+ mode_lib->ms.dpte_row_height_chroma[k],
+ mode_lib->ms.use_one_row_for_frame_flip[k],
+ mode_lib->ip.max_flip_time_us,
+ mode_lib->ip.max_flip_time_lines,
+ s->per_pipe_flip_bytes[k],
+ mode_lib->ms.meta_row_bytes[k],
+ s->meta_row_height_luma[k],
+ s->meta_row_height_chroma[k],
+ mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable,
+
+ /* Output */
+ &mode_lib->ms.dst_y_per_vm_flip[k],
+ &mode_lib->ms.dst_y_per_row_flip[k],
+ &mode_lib->ms.final_flip_bw[k],
+ &mode_lib->ms.ImmediateFlipSupportedForPipe[k]);
+ }
+
+ calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw;
+ calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required_flip;
+ calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw;
+ calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required_flip;
+ calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw;
+ calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw;
+
+ calculate_peak_bandwidth_params->display_cfg = display_cfg;
+ calculate_peak_bandwidth_params->inc_flip_bw = 1;
+ calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes;
+ calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP;
+ calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0;
+ calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1;
+ calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0;
+ calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1;
+ calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor;
+ calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor;
+
+ calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l;
+ calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c;
+ calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma;
+ calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma;
+ calculate_peak_bandwidth_params->prefetch_bandwidth_max = mode_lib->ms.RequiredPrefetchBWMax;
+ calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l;
+ calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c;
+ calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw;
+ calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
+ calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw;
+ calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw;
+ calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw;
+ calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw;
+ calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma;
+ calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma;
+ calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor;
+ calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre;
+ calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre;
+ calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre;
+
+ calculate_peak_bandwidth_required(
+ &mode_lib->scratch,
+ calculate_peak_bandwidth_params);
+
+ calculate_immediate_flip_bandwidth_support(
+ &s->dummy_single[0], // double* frac_urg_bandwidth_flip
+ &mode_lib->ms.support.ImmediateFlipSupport,
+
+ dml2_core_internal_soc_state_sys_active,
+ mode_lib->ms.support.urg_bandwidth_required_flip,
+ mode_lib->ms.support.non_urg_bandwidth_required_flip,
+ mode_lib->ms.support.urg_bandwidth_available);
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (display_cfg->plane_descriptors[k].immediate_flip == true && mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false)
+ mode_lib->ms.support.ImmediateFlipSupport = false;
+ }
+
+ } else { // if prefetch not support, assume iflip is not supported too
+ mode_lib->ms.support.ImmediateFlipSupport = false;
+ }
+
+ s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency;
+ s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency;
+ s->mSOCParameters.ExtraLatency_sr = mode_lib->ms.ExtraLatency_sr;
+ s->mSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us;
+ s->mSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
+ s->mSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us;
+ s->mSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us;
+ s->mSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us;
+ s->mSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us;
+ s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us;
+ s->mSOCParameters.USRRetrainingLatency = 0;
+ s->mSOCParameters.SMNLatency = 0;
+ s->mSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), mode_lib->ms.state_idx);
+ s->mSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, mode_lib->ms.state_idx);
+ s->mSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock;
+ s->mSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type;
+
+ CalculateWatermarks_params->display_cfg = display_cfg;
+ CalculateWatermarks_params->USRRetrainingRequired = false;
+ CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
+ CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines;
+ CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits;
+ CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes;
+ CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLK;
+ CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings;
+ CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change;
+ CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
+ CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters;
+ CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes;
+ CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK;
+ CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep;
+ CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY;
+ CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC;
+ CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightY;
+ CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightC;
+ CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthY;
+ CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthC;
+ CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPP;
+ CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY;
+ CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC;
+ CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler;
+ CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler;
+ CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabled;
+ CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByte;
+ CalculateWatermarks_params->meta_row_height_l = s->meta_row_height_luma;
+ CalculateWatermarks_params->meta_row_height_c = s->meta_row_height_chroma;
+
+ // Output
+ CalculateWatermarks_params->Watermark = &mode_lib->ms.support.watermarks; // Watermarks *Watermark
+ CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->ms.support.DRAMClockChangeSupport;
+ CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->ms.support.global_dram_clock_change_supported;
+ CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // double *MaxActiveDRAMClockChangeLatencySupported[]
+ CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->ms.SubViewportLinesNeededInMALL; // unsigned int SubViewportLinesNeededInMALL[]
+ CalculateWatermarks_params->FCLKChangeSupport = mode_lib->ms.support.FCLKChangeSupport;
+ CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->ms.support.global_fclk_change_supported;
+ CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // double *MaxActiveFCLKChangeLatencySupported
+ CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport;
+ CalculateWatermarks_params->g6_temp_read_support = &mode_lib->ms.support.g6_temp_read_support;
+ CalculateWatermarks_params->VActiveLatencyHidingMargin = mode_lib->ms.VActiveLatencyHidingMargin;
+ CalculateWatermarks_params->VActiveLatencyHidingUs = mode_lib->ms.VActiveLatencyHidingUs;
+
+ CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params);
+
+ calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->ms.support.watermarks, s->dummy_integer_array[0]);
+ DML_LOG_VERBOSE("DML::%s: Done prefetch calculation\n", __func__);
+
+}
+
+
+static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out_params)
+{
+ struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib;
+ const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg;
+ const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table;
+
+ double outstanding_latency_us = 0;
+
+ struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals;
+ struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
+ struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
+ struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params;
+ struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *calculate_bytes_to_fetch_required_to_hide_latency_params = &mode_lib->scratch.calculate_bytes_to_fetch_required_to_hide_latency_params;
+ unsigned int k, m, n;
+
+ memset(&mode_lib->scratch, 0, sizeof(struct dml2_core_internal_scratch));
+ memset(&mode_lib->ms, 0, sizeof(struct dml2_core_internal_mode_support));
+
+ mode_lib->ms.num_active_planes = display_cfg->num_planes;
+ get_stream_output_bpp(s->OutputBpp, display_cfg);
+
+ mode_lib->ms.state_idx = in_out_params->min_clk_index;
+ mode_lib->ms.SOCCLK = ((double)mode_lib->soc.clk_table.socclk.clk_values_khz[0] / 1000);
+ mode_lib->ms.DCFCLK = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_dcfclk_khz / 1000);
+ mode_lib->ms.FabricClock = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz / 1000);
+ mode_lib->ms.MaxDCFCLK = (double)min_clk_table->max_clocks_khz.dcfclk / 1000;
+ mode_lib->ms.MaxFabricClock = (double)min_clk_table->max_clocks_khz.fclk / 1000;
+ mode_lib->ms.max_dispclk_freq_mhz = (double)min_clk_table->max_ss_clocks_khz.dispclk / 1000;
+ mode_lib->ms.max_dscclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dscclk / 1000;
+ mode_lib->ms.max_dppclk_freq_mhz = (double)min_clk_table->max_ss_clocks_khz.dppclk / 1000;
+ mode_lib->ms.uclk_freq_mhz = dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config);
+ mode_lib->ms.dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps / 1000);
+ mode_lib->ms.max_dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[min_clk_table->dram_bw_table.num_entries - 1].pre_derate_dram_bw_kbps / 1000);
+ mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params);
+ mode_lib->ms.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), &mode_lib->soc.clk_table);
+
+#if defined(__DML_VBA_DEBUG__)
+ DML_LOG_VERBOSE("DML::%s: --- START --- \n", __func__);
+ DML_LOG_VERBOSE("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes);
+ DML_LOG_VERBOSE("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index);
+ DML_LOG_VERBOSE("DML::%s: qos_param_index = %0d\n", __func__, mode_lib->ms.qos_param_index);
+ DML_LOG_VERBOSE("DML::%s: SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK);
+ DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->ms.dram_bw_mbps);
+ DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz);
+ DML_LOG_VERBOSE("DML::%s: DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK);
+ DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, mode_lib->ms.FabricClock);
+ DML_LOG_VERBOSE("DML::%s: MaxDCFCLK = %f\n", __func__, mode_lib->ms.MaxDCFCLK);
+ DML_LOG_VERBOSE("DML::%s: max_dispclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dispclk_freq_mhz);
+ DML_LOG_VERBOSE("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz);
+ DML_LOG_VERBOSE("DML::%s: max_dppclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dppclk_freq_mhz);
+ DML_LOG_VERBOSE("DML::%s: MaxFabricClock = %f\n", __func__, mode_lib->ms.MaxFabricClock);
+ DML_LOG_VERBOSE("DML::%s: ip.compressed_buffer_segment_size_in_kbytes = %u\n", __func__, mode_lib->ip.compressed_buffer_segment_size_in_kbytes);
+ DML_LOG_VERBOSE("DML::%s: ip.dcn_mrq_present = %u\n", __func__, mode_lib->ip.dcn_mrq_present);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++)
+ DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
+#endif
+
+ CalculateMaxDETAndMinCompressedBufferSize(
+ mode_lib->ip.config_return_buffer_size_in_kbytes,
+ mode_lib->ip.config_return_buffer_segment_size_in_kbytes,
+ mode_lib->ip.rob_buffer_size_kbytes,
+ mode_lib->ip.max_num_dpp,
+ display_cfg->overrides.hw.force_nom_det_size_kbytes.enable,
+ display_cfg->overrides.hw.force_nom_det_size_kbytes.value,
+ mode_lib->ip.dcn_mrq_present,
+
+ /* Output */
+ &mode_lib->ms.MaxTotalDETInKByte,
+ &mode_lib->ms.NomDETInKByte,
+ &mode_lib->ms.MinCompressedBufferSizeInKByte);
+
+ PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd);
+
+ /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
+
+ /*Scale Ratio, taps Support Check*/
+ mode_lib->ms.support.ScaleRatioAndTapsSupport = true;
+ // Many core tests are still setting scaling parameters "incorrectly"
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (display_cfg->plane_descriptors[k].composition.scaler_info.enabled == false
+ && (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)
+ || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio != 1.0
+ || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps != 1.0
+ || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio != 1.0
+ || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps != 1.0)) {
+ mode_lib->ms.support.ScaleRatioAndTapsSupport = false;
+ } else if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps > 8.0
+ || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 8.0
+ || (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 1.0 && (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps % 2) == 1)
+ || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > mode_lib->ip.max_hscl_ratio
+ || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > mode_lib->ip.max_vscl_ratio
+ || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps
+ || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps
+ || (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)
+ && (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps > 8 ||
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 8 ||
+ (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 1 && display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps % 2 == 1) ||
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > mode_lib->ip.max_hscl_ratio ||
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > mode_lib->ip.max_vscl_ratio ||
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps ||
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps))) {
+ mode_lib->ms.support.ScaleRatioAndTapsSupport = false;
+ }
+ }
+
+ /*Source Format, Pixel Format and Scan Support Check*/
+ mode_lib->ms.support.SourceFormatPixelAndScanSupport = true;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear && dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
+ mode_lib->ms.support.SourceFormatPixelAndScanSupport = false;
+ }
+ }
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ CalculateBytePerPixelAndBlockSizes(
+ display_cfg->plane_descriptors[k].pixel_format,
+ display_cfg->plane_descriptors[k].surface.tiling,
+ display_cfg->plane_descriptors[k].surface.plane0.pitch,
+ display_cfg->plane_descriptors[k].surface.plane1.pitch,
+
+ /* Output */
+ &mode_lib->ms.BytePerPixelY[k],
+ &mode_lib->ms.BytePerPixelC[k],
+ &mode_lib->ms.BytePerPixelInDETY[k],
+ &mode_lib->ms.BytePerPixelInDETC[k],
+ &mode_lib->ms.Read256BlockHeightY[k],
+ &mode_lib->ms.Read256BlockHeightC[k],
+ &mode_lib->ms.Read256BlockWidthY[k],
+ &mode_lib->ms.Read256BlockWidthC[k],
+ &mode_lib->ms.MacroTileHeightY[k],
+ &mode_lib->ms.MacroTileHeightC[k],
+ &mode_lib->ms.MacroTileWidthY[k],
+ &mode_lib->ms.MacroTileWidthC[k],
+ &mode_lib->ms.surf_linear128_l[k],
+ &mode_lib->ms.surf_linear128_c[k]);
+ }
+
+ /*Bandwidth Support Check*/
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
+ mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
+ mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.width;
+ } else {
+ mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
+ mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
+ }
+ }
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ mode_lib->ms.vactive_sw_bw_l[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
+ mode_lib->ms.vactive_sw_bw_c[k] = mode_lib->ms.SwathWidthCSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
+
+ mode_lib->ms.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width *
+ display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000));
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, old_ReadBandwidthLuma = %f\n", __func__, k, mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelInDETY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
+ DML_LOG_VERBOSE("DML::%s: k=%u, old_ReadBandwidthChroma = %f\n", __func__, k, mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * math_ceil2(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio / 2.0);
+ DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_l[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_c[k]);
+#endif
+ }
+
+ // Writeback bandwidth
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format == dml2_444_64) {
+ mode_lib->ms.WriteBandwidth[k][0] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height
+ * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width
+ / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height
+ * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total
+ / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 8.0;
+ } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
+ mode_lib->ms.WriteBandwidth[k][0] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height
+ * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width
+ / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height
+ * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total
+ / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 4.0;
+ } else {
+ mode_lib->ms.WriteBandwidth[k][0] = 0.0;
+ }
+ }
+
+ /*Writeback Latency support check*/
+ mode_lib->ms.support.WritebackLatencySupport = true;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0 &&
+ (mode_lib->ms.WriteBandwidth[k][0] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024 / ((double)mode_lib->soc.qos_parameters.writeback.base_latency_us))) {
+ mode_lib->ms.support.WritebackLatencySupport = false;
+ }
+ }
+
+
+ /* Writeback Scale Ratio and Taps Support Check */
+ mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = true;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio > mode_lib->ip.writeback_max_hscl_ratio
+ || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio > mode_lib->ip.writeback_max_vscl_ratio
+ || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio < mode_lib->ip.writeback_min_hscl_ratio
+ || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio < mode_lib->ip.writeback_min_vscl_ratio
+ || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps > (unsigned int) mode_lib->ip.writeback_max_hscl_taps
+ || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps > (unsigned int) mode_lib->ip.writeback_max_vscl_taps
+ || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps
+ || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps
+ || (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps > 2.0 && ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps % 2) == 1))) {
+ mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
+ }
+ if (2.0 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height * (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps - 1) * 57 > mode_lib->ip.writeback_line_buffer_buffer_size) {
+ mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
+ }
+ }
+ }
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ CalculateSinglePipeDPPCLKAndSCLThroughput(
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
+ mode_lib->ip.max_dchub_pscl_bw_pix_per_clk,
+ mode_lib->ip.max_pscl_lb_bw_pix_per_clk,
+ ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
+ display_cfg->plane_descriptors[k].pixel_format,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps,
+ /* Output */
+ &mode_lib->ms.PSCL_FACTOR[k],
+ &mode_lib->ms.PSCL_FACTOR_CHROMA[k],
+ &mode_lib->ms.MinDPPCLKUsingSingleDPP[k]);
+ }
+
+ // Max Viewport Size support
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) {
+ s->MaximumSwathWidthSupportLuma = 15360;
+ } else if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // horz video
+ s->MaximumSwathWidthSupportLuma = 7680 + 16;
+ } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // vert video
+ s->MaximumSwathWidthSupportLuma = 4320 + 16;
+ } else if (display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { // rgbe + alpha
+ s->MaximumSwathWidthSupportLuma = 5120 + 16;
+ } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelY[k] == 8 && display_cfg->plane_descriptors[k].surface.dcc.enable == true) { // vert 64bpp
+ s->MaximumSwathWidthSupportLuma = 3072 + 16;
+ } else {
+ s->MaximumSwathWidthSupportLuma = 6144 + 16;
+ }
+
+ if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) {
+ s->MaximumSwathWidthSupportChroma = (unsigned int)(s->MaximumSwathWidthSupportLuma / 2.0);
+ } else {
+ s->MaximumSwathWidthSupportChroma = s->MaximumSwathWidthSupportLuma;
+ }
+
+ unsigned lb_buffer_size_bits_luma = mode_lib->ip.line_buffer_size_bits;
+ unsigned lb_buffer_size_bits_chroma = mode_lib->ip.line_buffer_size_bits;
+
+/*
+#if defined(DV_BUILD)
+ // Assume a memory config setting of 3 in 420 mode or get a new ip parameter that reflects the programming.
+ if (mode_lib->ms.BytePerPixelC[k] != 0.0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) {
+ lb_buffer_size_bits_luma = 34620 * 57;
+ lb_buffer_size_bits_chroma = 13560 * 57;
+ }
+#endif
+*/
+ mode_lib->ms.MaximumSwathWidthInLineBufferLuma = lb_buffer_size_bits_luma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 1.0) / 57 /
+ (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, 1.0) - 2, 0.0));
+ if (mode_lib->ms.BytePerPixelC[k] == 0.0) {
+ mode_lib->ms.MaximumSwathWidthInLineBufferChroma = 0;
+ } else {
+ mode_lib->ms.MaximumSwathWidthInLineBufferChroma = lb_buffer_size_bits_chroma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 1.0) / 57 /
+ (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, 1.0) - 2, 0.0));
+ }
+
+ mode_lib->ms.MaximumSwathWidthLuma[k] = math_min2(s->MaximumSwathWidthSupportLuma, mode_lib->ms.MaximumSwathWidthInLineBufferLuma);
+ mode_lib->ms.MaximumSwathWidthChroma[k] = math_min2(s->MaximumSwathWidthSupportChroma, mode_lib->ms.MaximumSwathWidthInLineBufferChroma);
+
+ DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthLuma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthSupportLuma=%u\n", __func__, k, s->MaximumSwathWidthSupportLuma);
+ DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthInLineBufferLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferLuma);
+
+ DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthChroma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthSupportChroma=%u\n", __func__, k, s->MaximumSwathWidthSupportChroma);
+ DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthInLineBufferChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferChroma);
+ }
+
+ /* Cursor Support Check */
+ mode_lib->ms.support.CursorSupport = true;
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) {
+ if (display_cfg->plane_descriptors[k].cursor.cursor_bpp == 64 && mode_lib->ip.cursor_64bpp_support == false)
+ mode_lib->ms.support.CursorSupport = false;
+ }
+ }
+
+ /* Valid Pitch Check */
+ mode_lib->ms.support.PitchSupport = true;
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+
+ // data pitch
+ unsigned int alignment_l = mode_lib->ms.MacroTileWidthY[k];
+
+ if (mode_lib->ms.surf_linear128_l[k])
+ alignment_l = alignment_l / 2;
+
+ mode_lib->ms.support.AlignedYPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane0.pitch, display_cfg->plane_descriptors[k].surface.plane0.width), alignment_l);
+ if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) {
+ unsigned int alignment_c = mode_lib->ms.MacroTileWidthC[k];
+
+ if (mode_lib->ms.surf_linear128_c[k])
+ alignment_c = alignment_c / 2;
+ mode_lib->ms.support.AlignedCPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane1.pitch, display_cfg->plane_descriptors[k].surface.plane1.width), alignment_c);
+ } else {
+ mode_lib->ms.support.AlignedCPitch[k] = display_cfg->plane_descriptors[k].surface.plane1.pitch;
+ }
+
+ if (mode_lib->ms.support.AlignedYPitch[k] > display_cfg->plane_descriptors[k].surface.plane0.pitch ||
+ mode_lib->ms.support.AlignedCPitch[k] > display_cfg->plane_descriptors[k].surface.plane1.pitch) {
+ mode_lib->ms.support.PitchSupport = false;
+#if defined(__DML_VBA_DEBUG__)
+ DML_LOG_VERBOSE("DML::%s: k=%u AlignedYPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedYPitch[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u PitchY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.pitch);
+ DML_LOG_VERBOSE("DML::%s: k=%u AlignedCPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedCPitch[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u PitchC = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane1.pitch);
+ DML_LOG_VERBOSE("DML::%s: k=%u PitchSupport = %d\n", __func__, k, mode_lib->ms.support.PitchSupport);
+#endif
+ }
+
+ // meta pitch
+ if (mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable) {
+ mode_lib->ms.support.AlignedDCCMetaPitchY[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch,
+ display_cfg->plane_descriptors[k].surface.plane0.width), 64.0 * mode_lib->ms.Read256BlockWidthY[k]);
+
+ if (mode_lib->ms.support.AlignedDCCMetaPitchY[k] > display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch)
+ mode_lib->ms.support.PitchSupport = false;
+
+ if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) {
+ mode_lib->ms.support.AlignedDCCMetaPitchC[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch,
+ display_cfg->plane_descriptors[k].surface.plane1.width), 64.0 * mode_lib->ms.Read256BlockWidthC[k]);
+
+ if (mode_lib->ms.support.AlignedDCCMetaPitchC[k] > display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch)
+ mode_lib->ms.support.PitchSupport = false;
+ }
+ } else {
+ mode_lib->ms.support.AlignedDCCMetaPitchY[k] = 0;
+ mode_lib->ms.support.AlignedDCCMetaPitchC[k] = 0;
+ }
+ }
+
+ mode_lib->ms.support.ViewportExceedsSurface = false;
+ if (!display_cfg->overrides.hw.surface_viewport_size_check_disable) {
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ if (display_cfg->plane_descriptors[k].composition.viewport.plane0.width > display_cfg->plane_descriptors[k].surface.plane0.width ||
+ display_cfg->plane_descriptors[k].composition.viewport.plane0.height > display_cfg->plane_descriptors[k].surface.plane0.height) {
+ mode_lib->ms.support.ViewportExceedsSurface = true;
+#if defined(__DML_VBA_DEBUG__)
+ DML_LOG_VERBOSE("DML::%s: k=%u ViewportWidth = %ld\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width);
+ DML_LOG_VERBOSE("DML::%s: k=%u SurfaceWidthY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.width);
+ DML_LOG_VERBOSE("DML::%s: k=%u ViewportHeight = %ld\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height);
+ DML_LOG_VERBOSE("DML::%s: k=%u SurfaceHeightY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.height);
+ DML_LOG_VERBOSE("DML::%s: k=%u ViewportExceedsSurface = %d\n", __func__, k, mode_lib->ms.support.ViewportExceedsSurface);
+#endif
+ }
+ if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) {
+ if (display_cfg->plane_descriptors[k].composition.viewport.plane1.width > display_cfg->plane_descriptors[k].surface.plane1.width ||
+ display_cfg->plane_descriptors[k].composition.viewport.plane1.height > display_cfg->plane_descriptors[k].surface.plane1.height) {
+ mode_lib->ms.support.ViewportExceedsSurface = true;
+ }
+ }
+ }
+ }
+
+ CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg;
+ CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte;
+ CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte;
+ CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->ForceSingleDPP = 1;
+ CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
+ CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte;
+ CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.vactive_sw_bw_l;
+ CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.vactive_sw_bw_c;
+ CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma;
+ CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC;
+ CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->ms.surf_linear128_l;
+ CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->ms.surf_linear128_c;
+ CalculateSwathAndDETConfiguration_params->ODMMode = s->dummy_odm_mode;
+ CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY;
+ CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC;
+ CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY;
+ CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC;
+ CalculateSwathAndDETConfiguration_params->DPPPerSurface = s->dummy_integer_array[2];
+ CalculateSwathAndDETConfiguration_params->mrq_present = mode_lib->ip.dcn_mrq_present;
+
+ // output
+ CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0];
+ CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1];
+ CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_integer_array[3];
+ CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_integer_array[4];
+ CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_integer_array[5];
+ CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_integer_array[6];
+ CalculateSwathAndDETConfiguration_params->SwathHeightY = s->dummy_integer_array[7];
+ CalculateSwathAndDETConfiguration_params->SwathHeightC = s->dummy_integer_array[8];
+ CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = s->dummy_integer_array[26];
+ CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = s->dummy_integer_array[27];
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = s->dummy_integer_array[9];
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeY = s->dummy_integer_array[10];
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeC = s->dummy_integer_array[11];
+ CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l;
+ CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c;
+ CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &s->dummy_boolean[0];
+ CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[1];
+ CalculateSwathAndDETConfiguration_params->hw_debug5 = &s->dummy_boolean[2];
+ CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &s->dummy_integer[0];
+ CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = mode_lib->ms.SingleDPPViewportSizeSupportPerSurface;
+ CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[1];
+
+ // This calls is just to find out if there is enough DET space to support full vp in 1 pipe.
+ CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params);
+
+ mode_lib->ms.TotalNumberOfActiveDPP = 0;
+ mode_lib->ms.TotalNumberOfActiveOPP = 0;
+ mode_lib->ms.support.TotalAvailablePipesSupport = true;
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ /*Number Of DSC Slices*/
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable ||
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) {
+
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices != 0)
+ mode_lib->ms.support.NumberOfDSCSlices[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices;
+ else {
+ if (s->PixelClockBackEnd[k] > 4800) {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = (unsigned int)(math_ceil2(s->PixelClockBackEnd[k] / 600, 4));
+ } else if (s->PixelClockBackEnd[k] > 2400) {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 8;
+ } else if (s->PixelClockBackEnd[k] > 1200) {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 4;
+ } else if (s->PixelClockBackEnd[k] > 340) {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 2;
+ } else {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 1;
+ }
+ }
+ } else {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 0;
+ }
+
+ CalculateODMMode(
+ mode_lib->ip.maximum_pixels_per_line_per_dsc_unit,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode,
+ mode_lib->ms.max_dispclk_freq_mhz,
+ false, // DSCEnable
+ mode_lib->ms.TotalNumberOfActiveDPP,
+ mode_lib->ms.TotalNumberOfActiveOPP,
+ mode_lib->ip.max_num_dpp,
+ mode_lib->ip.max_num_opp,
+ ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
+ mode_lib->ms.support.NumberOfDSCSlices[k],
+
+ /* Output */
+ &s->TotalAvailablePipesSupportNoDSC,
+ &s->NumberOfDPPNoDSC,
+ &s->ODMModeNoDSC,
+ &s->RequiredDISPCLKPerSurfaceNoDSC);
+
+ CalculateODMMode(
+ mode_lib->ip.maximum_pixels_per_line_per_dsc_unit,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode,
+ mode_lib->ms.max_dispclk_freq_mhz,
+ true, // DSCEnable
+ mode_lib->ms.TotalNumberOfActiveDPP,
+ mode_lib->ms.TotalNumberOfActiveOPP,
+ mode_lib->ip.max_num_dpp,
+ mode_lib->ip.max_num_opp,
+ ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
+ mode_lib->ms.support.NumberOfDSCSlices[k],
+
+ /* Output */
+ &s->TotalAvailablePipesSupportDSC,
+ &s->NumberOfDPPDSC,
+ &s->ODMModeDSC,
+ &s->RequiredDISPCLKPerSurfaceDSC);
+
+ CalculateOutputLink(
+ &mode_lib->scratch,
+ ((double)mode_lib->soc.clk_table.phyclk.clk_values_khz[0] / 1000),
+ ((double)mode_lib->soc.clk_table.phyclk_d18.clk_values_khz[0] / 1000),
+ ((double)mode_lib->soc.clk_table.phyclk_d32.clk_values_khz[0] / 1000),
+ mode_lib->soc.phy_downspread_percent,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
+ s->PixelClockBackEnd[k],
+ s->OutputBpp[k],
+ mode_lib->ip.maximum_dsc_bits_per_component,
+ mode_lib->ms.support.NumberOfDSCSlices[k],
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout,
+ s->ODMModeNoDSC,
+ s->ODMModeDSC,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate,
+
+ /* Output */
+ &mode_lib->ms.RequiresDSC[k],
+ &mode_lib->ms.RequiresFEC[k],
+ &mode_lib->ms.OutputBpp[k],
+ &mode_lib->ms.OutputType[k],
+ &mode_lib->ms.OutputRate[k],
+ &mode_lib->ms.RequiredSlots[k]);
+
+ if (s->OutputBpp[k] == 0.0) {
+ s->OutputBpp[k] = mode_lib->ms.OutputBpp[k];
+ }
+
+ if (mode_lib->ms.RequiresDSC[k] == false) {
+ mode_lib->ms.ODMMode[k] = s->ODMModeNoDSC;
+ mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceNoDSC;
+ if (!s->TotalAvailablePipesSupportNoDSC)
+ mode_lib->ms.support.TotalAvailablePipesSupport = false;
+ mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPNoDSC;
+ } else {
+ mode_lib->ms.ODMMode[k] = s->ODMModeDSC;
+ mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceDSC;
+ if (!s->TotalAvailablePipesSupportDSC)
+ mode_lib->ms.support.TotalAvailablePipesSupport = false;
+ mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPDSC;
+ }
+#if defined(__DML_VBA_DEBUG__)
+ DML_LOG_VERBOSE("DML::%s: k=%d RequiresDSC = %d\n", __func__, k, mode_lib->ms.RequiresDSC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]);
+#endif
+
+ // ensure the number dsc slices is integer multiple based on ODM mode
+ mode_lib->ms.support.DSCSlicesODMModeSupported = true;
+ if (mode_lib->ms.RequiresDSC[k]) {
+ // fail a ms check if the override num_slices doesn't align with odm mode setting
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices != 0) {
+ if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
+ mode_lib->ms.support.DSCSlicesODMModeSupported = ((mode_lib->ms.support.NumberOfDSCSlices[k] % 2) == 0);
+ else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
+ mode_lib->ms.support.DSCSlicesODMModeSupported = (mode_lib->ms.support.NumberOfDSCSlices[k] == 12);
+ else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
+ mode_lib->ms.support.DSCSlicesODMModeSupported = ((mode_lib->ms.support.NumberOfDSCSlices[k] % 4) == 0);
+#if defined(__DML_VBA_DEBUG__)
+ if (!mode_lib->ms.support.DSCSlicesODMModeSupported) {
+ DML_LOG_VERBOSE("DML::%s: k=%d Invalid dsc num_slices and ODM mode setting\n", __func__, k);
+ DML_LOG_VERBOSE("DML::%s: k=%d num_slices = %d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices);
+ DML_LOG_VERBOSE("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]);
+ }
+#endif
+ } else {
+ // safe guard to ensure the dml derived dsc slices and odm setting are compatible
+ if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 2 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 2.0, 1.0);
+ else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 12;
+ else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 4 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 4.0, 1.0);
+ }
+
+ } else {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 0;
+ }
+ }
+
+ mode_lib->ms.support.incorrect_imall_usage = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall)
+ mode_lib->ms.support.incorrect_imall_usage = 1;
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.MPCCombine[k] = false;
+ mode_lib->ms.NoOfDPP[k] = 1;
+ mode_lib->ms.NoOfOPP[k] = 1;
+
+ if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) {
+ mode_lib->ms.MPCCombine[k] = false;
+ mode_lib->ms.NoOfDPP[k] = 4;
+ mode_lib->ms.NoOfOPP[k] = 4;
+ } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) {
+ mode_lib->ms.MPCCombine[k] = false;
+ mode_lib->ms.NoOfDPP[k] = 3;
+ mode_lib->ms.NoOfOPP[k] = 3;
+ } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) {
+ mode_lib->ms.MPCCombine[k] = false;
+ mode_lib->ms.NoOfDPP[k] = 2;
+ mode_lib->ms.NoOfOPP[k] = 2;
+ } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 2) {
+ mode_lib->ms.MPCCombine[k] = true;
+ mode_lib->ms.NoOfDPP[k] = 2;
+ } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 1) {
+ mode_lib->ms.MPCCombine[k] = false;
+ mode_lib->ms.NoOfDPP[k] = 1;
+ if (!mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) {
+ DML_LOG_VERBOSE("WARNING: DML::%s: MPCC is override to disable but viewport is too large to be supported with single pipe!\n", __func__);
+ }
+ } else {
+ if ((mode_lib->ms.MinDPPCLKUsingSingleDPP[k] > mode_lib->ms.max_dppclk_freq_mhz) || !mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) {
+ mode_lib->ms.MPCCombine[k] = true;
+ mode_lib->ms.NoOfDPP[k] = 2;
+ }
+ }
+#if defined(__DML_VBA_DEBUG__)
+ DML_LOG_VERBOSE("DML::%s: k=%d, NoOfDPP = %d\n", __func__, k, mode_lib->ms.NoOfDPP[k]);
+#endif
+ }
+
+ mode_lib->ms.TotalNumberOfActiveDPP = 0;
+ mode_lib->ms.TotalNumberOfActiveOPP = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.TotalNumberOfActiveDPP += mode_lib->ms.NoOfDPP[k];
+ mode_lib->ms.TotalNumberOfActiveOPP += mode_lib->ms.NoOfOPP[k];
+ }
+ if (mode_lib->ms.TotalNumberOfActiveDPP > (unsigned int)mode_lib->ip.max_num_dpp)
+ mode_lib->ms.support.TotalAvailablePipesSupport = false;
+ if (mode_lib->ms.TotalNumberOfActiveOPP > (unsigned int)mode_lib->ip.max_num_opp)
+ mode_lib->ms.support.TotalAvailablePipesSupport = false;
+
+
+ mode_lib->ms.TotalNumberOfSingleDPPSurfaces = 0;
+ for (k = 0; k < (unsigned int)mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.NoOfDPP[k] == 1)
+ mode_lib->ms.TotalNumberOfSingleDPPSurfaces = mode_lib->ms.TotalNumberOfSingleDPPSurfaces + 1;
+ }
+
+ //DISPCLK/DPPCLK
+ mode_lib->ms.WritebackRequiredDISPCLK = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
+ mode_lib->ms.WritebackRequiredDISPCLK = math_max2(mode_lib->ms.WritebackRequiredDISPCLK,
+ CalculateWriteBackDISPCLK(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format,
+ ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_width,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
+ mode_lib->ip.writeback_line_buffer_buffer_size));
+ }
+ }
+
+ mode_lib->ms.RequiredDISPCLK = mode_lib->ms.WritebackRequiredDISPCLK;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.RequiredDISPCLK = math_max2(mode_lib->ms.RequiredDISPCLK, mode_lib->ms.RequiredDISPCLKPerSurface[k]);
+ }
+
+ mode_lib->ms.GlobalDPPCLK = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.RequiredDPPCLK[k] = mode_lib->ms.MinDPPCLKUsingSingleDPP[k] / mode_lib->ms.NoOfDPP[k];
+ mode_lib->ms.GlobalDPPCLK = math_max2(mode_lib->ms.GlobalDPPCLK, mode_lib->ms.RequiredDPPCLK[k]);
+ }
+
+ mode_lib->ms.support.DISPCLK_DPPCLK_Support = !((mode_lib->ms.RequiredDISPCLK > mode_lib->ms.max_dispclk_freq_mhz) || (mode_lib->ms.GlobalDPPCLK > mode_lib->ms.max_dppclk_freq_mhz));
+
+ /* Total Available OTG, Writeback, HDMIFRL, DP Support Check */
+ s->TotalNumberOfActiveOTG = 0;
+ s->TotalNumberOfActiveHDMIFRL = 0;
+ s->TotalNumberOfActiveDP2p0 = 0;
+ s->TotalNumberOfActiveDP2p0Outputs = 0;
+ s->TotalNumberOfActiveWriteback = 0;
+ memset(s->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool));
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
+ if (!s->stream_visited[display_cfg->plane_descriptors[k].stream_index]) {
+ s->stream_visited[display_cfg->plane_descriptors[k].stream_index] = 1;
+
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0)
+ s->TotalNumberOfActiveWriteback = s->TotalNumberOfActiveWriteback + 1;
+
+ s->TotalNumberOfActiveOTG = s->TotalNumberOfActiveOTG + 1;
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl)
+ s->TotalNumberOfActiveHDMIFRL = s->TotalNumberOfActiveHDMIFRL + 1;
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0) {
+ s->TotalNumberOfActiveDP2p0 = s->TotalNumberOfActiveDP2p0 + 1;
+ // FIXME_STAGE2: SW not using backend related stuff, need mapping for mst setup
+ //if (display_cfg->output.OutputMultistreamId[k] == k || display_cfg->output.OutputMultistreamEn[k] == false) {
+ s->TotalNumberOfActiveDP2p0Outputs = s->TotalNumberOfActiveDP2p0Outputs + 1;
+ //}
+ }
+ }
+ }
+ }
+
+ /* Writeback Mode Support Check */
+ mode_lib->ms.support.EnoughWritebackUnits = 1;
+ if (s->TotalNumberOfActiveWriteback > (unsigned int)mode_lib->ip.max_num_wb) {
+ mode_lib->ms.support.EnoughWritebackUnits = false;
+ }
+ mode_lib->ms.support.NumberOfOTGSupport = (s->TotalNumberOfActiveOTG <= (unsigned int)mode_lib->ip.max_num_otg);
+ mode_lib->ms.support.NumberOfHDMIFRLSupport = (s->TotalNumberOfActiveHDMIFRL <= (unsigned int)mode_lib->ip.max_num_hdmi_frl_outputs);
+ mode_lib->ms.support.NumberOfDP2p0Support = (s->TotalNumberOfActiveDP2p0 <= (unsigned int)mode_lib->ip.max_num_dp2p0_streams && s->TotalNumberOfActiveDP2p0Outputs <= (unsigned int)mode_lib->ip.max_num_dp2p0_outputs);
+
+
+ mode_lib->ms.support.ExceededMultistreamSlots = false;
+ mode_lib->ms.support.LinkCapacitySupport = true;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_disabled == false &&
+ (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) && mode_lib->ms.OutputBpp[k] == 0) {
+ mode_lib->ms.support.LinkCapacitySupport = false;
+ }
+ }
+
+ mode_lib->ms.support.P2IWith420 = false;
+ mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = false;
+ mode_lib->ms.support.DSC422NativeNotSupported = false;
+ mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = false;
+ mode_lib->ms.support.LinkRateForMultistreamNotIndicated = false;
+ mode_lib->ms.support.BPPForMultistreamNotIndicated = false;
+ mode_lib->ms.support.MultistreamWithHDMIOreDP = false;
+ mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = false;
+ mode_lib->ms.support.NotEnoughLanesForMSO = false;
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && mode_lib->ip.ptoi_supported == true)
+ mode_lib->ms.support.P2IWith420 = true;
+
+ if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 && !mode_lib->ip.dsc422_native_support)
+ mode_lib->ms.support.DSC422NativeNotSupported = true;
+
+ if (((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr2 ||
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr3) &&
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_edp) ||
+ ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr10 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr13p5 ||
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr20) &&
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp2p0))
+ mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = true;
+
+ // FIXME_STAGE2
+ //if (display_cfg->output.OutputMultistreamEn[k] == 1) {
+ // if (display_cfg->output.OutputMultistreamId[k] == k && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_na)
+ // mode_lib->ms.support.LinkRateForMultistreamNotIndicated = true;
+ // if (display_cfg->output.OutputMultistreamId[k] == k && s->OutputBpp[k] == 0)
+ // mode_lib->ms.support.BPPForMultistreamNotIndicated = true;
+ // for (n = 0; n < mode_lib->ms.num_active_planes; ++n) {
+ // if (display_cfg->output.OutputMultistreamId[k] == n && s->OutputBpp[k] == 0)
+ // mode_lib->ms.support.BPPForMultistreamNotIndicated = true;
+ // }
+ //}
+
+ if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi ||
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl)) {
+ // FIXME_STAGE2
+ //if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == k)
+ // mode_lib->ms.support.MultistreamWithHDMIOreDP = true;
+ //for (n = 0; n < mode_lib->ms.num_active_planes; ++n) {
+ // if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == n)
+ // mode_lib->ms.support.MultistreamWithHDMIOreDP = true;
+ //}
+ }
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_split_1to2 ||
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4))
+ mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = true;
+
+ if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 2) ||
+ (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 4))
+ mode_lib->ms.support.NotEnoughLanesForMSO = true;
+ }
+ }
+
+ mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl &&
+ !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
+ mode_lib->ms.RequiredDTBCLK[k] = RequiredDTBCLK(
+ mode_lib->ms.RequiresDSC[k],
+ s->PixelClockBackEnd[k],
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
+ mode_lib->ms.OutputBpp[k],
+ mode_lib->ms.support.NumberOfDSCSlices[k],
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout);
+
+ if (mode_lib->ms.RequiredDTBCLK[k] > ((double)min_clk_table->max_ss_clocks_khz.dtbclk / 1000)) {
+ mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = true;
+ }
+ } else {
+ /* Phantom DTBCLK can be calculated different from main because phantom has no DSC and thus
+ * will have a different output BPP. Ignore phantom DTBCLK requirement and only consider
+ * non-phantom DTBCLK requirements. In map_mode_to_soc_dpm we choose the highest DTBCLK
+ * required - by setting phantom dtbclk to 0 we ignore it.
+ */
+ mode_lib->ms.RequiredDTBCLK[k] = 0;
+ }
+ }
+
+ mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = false;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp ||
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 ||
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420) {
+ s->DSCFormatFactor = 2;
+ } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_444) {
+ s->DSCFormatFactor = 1;
+ } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) {
+ s->DSCFormatFactor = 2;
+ } else {
+ s->DSCFormatFactor = 1;
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]);
+#endif
+ if (mode_lib->ms.RequiresDSC[k] == true) {
+ s->PixelClockBackEndFactor = 3.0;
+
+ if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
+ s->PixelClockBackEndFactor = 12.0;
+ else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
+ s->PixelClockBackEndFactor = 9.0;
+ else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
+ s->PixelClockBackEndFactor = 6.0;
+
+ mode_lib->ms.required_dscclk_freq_mhz[k] = s->PixelClockBackEnd[k] / s->PixelClockBackEndFactor / (double)s->DSCFormatFactor;
+ if (mode_lib->ms.required_dscclk_freq_mhz[k] > mode_lib->ms.max_dscclk_freq_mhz) {
+ mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, s->PixelClockBackEnd[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, required_dscclk_freq_mhz = %f\n", __func__, k, mode_lib->ms.required_dscclk_freq_mhz[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor);
+ DML_LOG_VERBOSE("DML::%s: k=%u, DSCCLKRequiredMoreThanSupported = %u\n", __func__, k, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported);
+#endif
+ }
+ }
+ }
+
+ /* Check DSC Unit and Slices Support */
+ mode_lib->ms.support.NotEnoughDSCSlices = false;
+ s->TotalDSCUnitsRequired = 0;
+ mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = true;
+ memset(s->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool));
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.RequiresDSC[k] == true && !s->stream_visited[display_cfg->plane_descriptors[k].stream_index]) {
+ s->NumDSCUnitRequired = 1;
+
+ if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
+ s->NumDSCUnitRequired = 4;
+ else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
+ s->NumDSCUnitRequired = 3;
+ else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
+ s->NumDSCUnitRequired = 2;
+
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active > s->NumDSCUnitRequired * (unsigned int)mode_lib->ip.maximum_pixels_per_line_per_dsc_unit)
+ mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false;
+ s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + s->NumDSCUnitRequired;
+
+ if (mode_lib->ms.support.NumberOfDSCSlices[k] > 4 * s->NumDSCUnitRequired)
+ mode_lib->ms.support.NotEnoughDSCSlices = true;
+ }
+ s->stream_visited[display_cfg->plane_descriptors[k].stream_index] = 1;
+ }
+
+ mode_lib->ms.support.NotEnoughDSCUnits = false;
+ if (s->TotalDSCUnitsRequired > (unsigned int)mode_lib->ip.num_dsc) {
+ mode_lib->ms.support.NotEnoughDSCUnits = true;
+ }
+
+ /*DSC Delay per state*/
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.DSCDelay[k] = DSCDelayRequirement(mode_lib->ms.RequiresDSC[k],
+ mode_lib->ms.ODMMode[k],
+ mode_lib->ip.maximum_dsc_bits_per_component,
+ s->OutputBpp[k],
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
+ mode_lib->ms.support.NumberOfDSCSlices[k],
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
+ ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
+ s->PixelClockBackEnd[k]);
+ }
+
+ // Figure out the swath and DET configuration after the num dpp per plane is figured out
+ CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false;
+ CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.ODMMode;
+ CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.NoOfDPP;
+
+ // output
+ CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0];
+ CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1];
+ CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub;
+ CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub;
+ CalculateSwathAndDETConfiguration_params->SwathWidth = mode_lib->ms.SwathWidthY;
+ CalculateSwathAndDETConfiguration_params->SwathWidthChroma = mode_lib->ms.SwathWidthC;
+ CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->ms.SwathHeightY;
+ CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->ms.SwathHeightC;
+ CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->ms.support.request_size_bytes_luma;
+ CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->ms.support.request_size_bytes_chroma;
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->ms.DETBufferSizeInKByte; // FIXME: This is per pipe but the pipes in plane will use that
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY;
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC;
+ CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->ms.UnboundedRequestEnabled;
+ CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = s->dummy_integer_array[3];
+ CalculateSwathAndDETConfiguration_params->hw_debug5 = s->dummy_boolean_array[1];
+ CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->ms.CompressedBufferSizeInkByte;
+ CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = s->dummy_boolean_array[0];
+ CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &mode_lib->ms.support.ViewportSizeSupport;
+
+ CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params);
+
+ if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) {
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++)
+ mode_lib->ms.SurfaceSizeInMALL[k] = 0;
+ mode_lib->ms.support.ExceededMALLSize = 0;
+ } else {
+ CalculateSurfaceSizeInMall(
+ display_cfg,
+ mode_lib->ms.num_active_planes,
+ mode_lib->soc.mall_allocated_for_dcn_mbytes,
+
+ mode_lib->ms.BytePerPixelY,
+ mode_lib->ms.BytePerPixelC,
+ mode_lib->ms.Read256BlockWidthY,
+ mode_lib->ms.Read256BlockWidthC,
+ mode_lib->ms.Read256BlockHeightY,
+ mode_lib->ms.Read256BlockHeightC,
+ mode_lib->ms.MacroTileWidthY,
+ mode_lib->ms.MacroTileWidthC,
+ mode_lib->ms.MacroTileHeightY,
+ mode_lib->ms.MacroTileHeightC,
+
+ /* Output */
+ mode_lib->ms.SurfaceSizeInMALL,
+ &mode_lib->ms.support.ExceededMALLSize);
+ }
+
+ mode_lib->ms.TotalNumberOfDCCActiveDPP = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (display_cfg->plane_descriptors[k].surface.dcc.enable == true) {
+ mode_lib->ms.TotalNumberOfDCCActiveDPP = mode_lib->ms.TotalNumberOfDCCActiveDPP + mode_lib->ms.NoOfDPP[k];
+ }
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ s->SurfParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+ s->SurfParameters[k].DPPPerSurface = mode_lib->ms.NoOfDPP[k];
+ s->SurfParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
+ s->SurfParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
+ s->SurfParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
+ s->SurfParameters[k].BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
+ s->SurfParameters[k].BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
+ s->SurfParameters[k].BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
+ s->SurfParameters[k].BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
+ s->SurfParameters[k].BlockWidthY = mode_lib->ms.MacroTileWidthY[k];
+ s->SurfParameters[k].BlockHeightY = mode_lib->ms.MacroTileHeightY[k];
+ s->SurfParameters[k].BlockWidthC = mode_lib->ms.MacroTileWidthC[k];
+ s->SurfParameters[k].BlockHeightC = mode_lib->ms.MacroTileHeightC[k];
+ s->SurfParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
+ s->SurfParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
+ s->SurfParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
+ s->SurfParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
+ s->SurfParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling;
+ s->SurfParameters[k].BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
+ s->SurfParameters[k].BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
+ s->SurfParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
+ s->SurfParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
+ s->SurfParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
+ s->SurfParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
+ s->SurfParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
+ s->SurfParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch;
+ s->SurfParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch;
+ s->SurfParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
+ s->SurfParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
+ s->SurfParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
+ s->SurfParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
+ s->SurfParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
+ s->SurfParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame;
+ s->SurfParameters[k].SwathHeightY = mode_lib->ms.SwathHeightY[k];
+ s->SurfParameters[k].SwathHeightC = mode_lib->ms.SwathHeightC[k];
+
+ s->SurfParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch;
+ s->SurfParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch;
+ }
+
+ CalculateVMRowAndSwath_params->display_cfg = display_cfg;
+ CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
+ CalculateVMRowAndSwath_params->myPipe = s->SurfParameters;
+ CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->ms.SurfaceSizeInMALL;
+ CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
+ CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma;
+ CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes;
+ CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->ms.SwathWidthY;
+ CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->ms.SwathWidthC;
+ CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
+ CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes;
+ CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present;
+
+ // output
+ CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceeded;
+ CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = s->dummy_integer_array[12];
+ CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = s->dummy_integer_array[13];
+ CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->ms.dpte_row_height;
+ CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->ms.dpte_row_height_chroma;
+ CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = s->dummy_integer_array[14]; // VBA_DELTA
+ CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = s->dummy_integer_array[15]; // VBA_DELTA
+ CalculateVMRowAndSwath_params->vm_group_bytes = s->dummy_integer_array[16];
+ CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
+ CalculateVMRowAndSwath_params->PixelPTEReqWidthY = s->dummy_integer_array[17];
+ CalculateVMRowAndSwath_params->PixelPTEReqHeightY = s->dummy_integer_array[18];
+ CalculateVMRowAndSwath_params->PTERequestSizeY = s->dummy_integer_array[19];
+ CalculateVMRowAndSwath_params->PixelPTEReqWidthC = s->dummy_integer_array[20];
+ CalculateVMRowAndSwath_params->PixelPTEReqHeightC = s->dummy_integer_array[21];
+ CalculateVMRowAndSwath_params->PTERequestSizeC = s->dummy_integer_array[22];
+ CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y;
+ CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y;
+ CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c;
+ CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c;
+ CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = s->dummy_integer_array[23];
+ CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = s->dummy_integer_array[24];
+ CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY;
+ CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC;
+ CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->ms.PrefillY;
+ CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->ms.PrefillC;
+ CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY;
+ CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC;
+ CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
+ CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow;
+ CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
+ CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
+ CalculateVMRowAndSwath_params->vm_bytes = mode_lib->ms.vm_bytes;
+ CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->ms.use_one_row_for_frame;
+ CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->ms.use_one_row_for_frame_flip;
+ CalculateVMRowAndSwath_params->is_using_mall_for_ss = s->dummy_boolean_array[0];
+ CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = s->dummy_boolean_array[1];
+ CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = s->dummy_integer_array[25];
+ CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = mode_lib->ms.DCCMetaBufferSizeNotExceeded;
+ CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->ms.meta_row_bw;
+ CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->ms.meta_row_bytes;
+ CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
+ CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
+ CalculateVMRowAndSwath_params->meta_req_width_luma = s->dummy_integer_array[26];
+ CalculateVMRowAndSwath_params->meta_req_height_luma = s->dummy_integer_array[27];
+ CalculateVMRowAndSwath_params->meta_row_width_luma = s->dummy_integer_array[28];
+ CalculateVMRowAndSwath_params->meta_row_height_luma = s->meta_row_height_luma;
+ CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = s->dummy_integer_array[29];
+ CalculateVMRowAndSwath_params->meta_req_width_chroma = s->dummy_integer_array[30];
+ CalculateVMRowAndSwath_params->meta_req_height_chroma = s->dummy_integer_array[31];
+ CalculateVMRowAndSwath_params->meta_row_width_chroma = s->dummy_integer_array[32];
+ CalculateVMRowAndSwath_params->meta_row_height_chroma = s->meta_row_height_chroma;
+ CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = s->dummy_integer_array[33];
+
+ CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params);
+
+ mode_lib->ms.support.PTEBufferSizeNotExceeded = true;
+ mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = true;
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.PTEBufferSizeNotExceeded[k] == false)
+ mode_lib->ms.support.PTEBufferSizeNotExceeded = false;
+
+ if (mode_lib->ms.DCCMetaBufferSizeNotExceeded[k] == false)
+ mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = false;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.PTEBufferSizeNotExceeded[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.DCCMetaBufferSizeNotExceeded[k]);
+#endif
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: PTEBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.PTEBufferSizeNotExceeded);
+ DML_LOG_VERBOSE("DML::%s: DCCMetaBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.DCCMetaBufferSizeNotExceeded);
+#endif
+
+ /* VActive bytes to fetch for UCLK P-State */
+ calculate_bytes_to_fetch_required_to_hide_latency_params->display_cfg = display_cfg;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->mrq_present = mode_lib->ip.dcn_mrq_present;
+
+ calculate_bytes_to_fetch_required_to_hide_latency_params->num_active_planes = mode_lib->ms.num_active_planes;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->num_of_dpp = mode_lib->ms.NoOfDPP;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_l = s->meta_row_height_luma;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_c = s->meta_row_height_chroma;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_l = mode_lib->ms.dpte_row_height;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_c = mode_lib->ms.dpte_row_height_chroma;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_l = mode_lib->ms.BytePerPixelY;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_c = mode_lib->ms.BytePerPixelC;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_l = mode_lib->ms.SwathWidthY;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_c = mode_lib->ms.SwathWidthC;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_l = mode_lib->ms.SwathHeightY;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_c = mode_lib->ms.SwathHeightC;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->latency_to_hide_us[0] = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
+
+ /* outputs */
+ calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_l = s->pstate_bytes_required_l[dml2_pstate_type_uclk];
+ calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_c = s->pstate_bytes_required_c[dml2_pstate_type_uclk];
+
+ calculate_bytes_to_fetch_required_to_hide_latency(calculate_bytes_to_fetch_required_to_hide_latency_params);
+
+ /* Excess VActive bandwidth required to fill DET */
+ calculate_excess_vactive_bandwidth_required(
+ display_cfg,
+ mode_lib->ms.num_active_planes,
+ s->pstate_bytes_required_l[dml2_pstate_type_uclk],
+ s->pstate_bytes_required_c[dml2_pstate_type_uclk],
+ /* outputs */
+ mode_lib->ms.excess_vactive_fill_bw_l,
+ mode_lib->ms.excess_vactive_fill_bw_c);
+
+ mode_lib->ms.UrgLatency = CalculateUrgentLatency(
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us,
+ mode_lib->soc.do_urgent_latency_adjustment,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz,
+ mode_lib->ms.FabricClock,
+ mode_lib->ms.uclk_freq_mhz,
+ mode_lib->soc.qos_parameters.qos_type,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
+
+ mode_lib->ms.TripToMemory = CalculateTripToMemory(
+ mode_lib->ms.UrgLatency,
+ mode_lib->ms.FabricClock,
+ mode_lib->ms.uclk_freq_mhz,
+ mode_lib->soc.qos_parameters.qos_type,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
+
+ mode_lib->ms.TripToMemory = math_max2(mode_lib->ms.UrgLatency, mode_lib->ms.TripToMemory);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ double line_time_us = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+ bool cursor_not_enough_urgent_latency_hiding = false;
+
+ if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) {
+ calculate_cursor_req_attributes(
+ display_cfg->plane_descriptors[k].cursor.cursor_width,
+ display_cfg->plane_descriptors[k].cursor.cursor_bpp,
+
+ // output
+ &s->cursor_lines_per_chunk[k],
+ &s->cursor_bytes_per_line[k],
+ &s->cursor_bytes_per_chunk[k],
+ &s->cursor_bytes[k]);
+
+ calculate_cursor_urgent_burst_factor(
+ mode_lib->ip.cursor_buffer_size,
+ display_cfg->plane_descriptors[k].cursor.cursor_width,
+ s->cursor_bytes_per_chunk[k],
+ s->cursor_lines_per_chunk[k],
+ line_time_us,
+ mode_lib->ms.UrgLatency,
+
+ // output
+ &mode_lib->ms.UrgentBurstFactorCursor[k],
+ &cursor_not_enough_urgent_latency_hiding);
+ }
+
+ mode_lib->ms.UrgentBurstFactorCursorPre[k] = mode_lib->ms.UrgentBurstFactorCursor[k];
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%d, Calling CalculateUrgentBurstFactor\n", __func__, k);
+ DML_LOG_VERBOSE("DML::%s: k=%d, VRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
+ DML_LOG_VERBOSE("DML::%s: k=%d, VRatioChroma=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio);
+#endif
+
+ CalculateUrgentBurstFactor(
+ &display_cfg->plane_descriptors[k],
+ mode_lib->ms.swath_width_luma_ub[k],
+ mode_lib->ms.swath_width_chroma_ub[k],
+ mode_lib->ms.SwathHeightY[k],
+ mode_lib->ms.SwathHeightC[k],
+ line_time_us,
+ mode_lib->ms.UrgLatency,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
+ mode_lib->ms.BytePerPixelInDETY[k],
+ mode_lib->ms.BytePerPixelInDETC[k],
+ mode_lib->ms.DETBufferSizeY[k],
+ mode_lib->ms.DETBufferSizeC[k],
+
+ // Output
+ &mode_lib->ms.UrgentBurstFactorLuma[k],
+ &mode_lib->ms.UrgentBurstFactorChroma[k],
+ &mode_lib->ms.NotEnoughUrgentLatencyHiding[k]);
+
+ mode_lib->ms.NotEnoughUrgentLatencyHiding[k] = mode_lib->ms.NotEnoughUrgentLatencyHiding[k] || cursor_not_enough_urgent_latency_hiding;
+ }
+
+ CalculateDCFCLKDeepSleep(
+ display_cfg,
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.BytePerPixelY,
+ mode_lib->ms.BytePerPixelC,
+ mode_lib->ms.SwathWidthY,
+ mode_lib->ms.SwathWidthC,
+ mode_lib->ms.NoOfDPP,
+ mode_lib->ms.PSCL_FACTOR,
+ mode_lib->ms.PSCL_FACTOR_CHROMA,
+ mode_lib->ms.RequiredDPPCLK,
+ mode_lib->ms.vactive_sw_bw_l,
+ mode_lib->ms.vactive_sw_bw_c,
+ mode_lib->soc.return_bus_width_bytes,
+
+ /* Output */
+ &mode_lib->ms.dcfclk_deepsleep);
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
+ mode_lib->ms.WritebackDelayTime[k] = mode_lib->soc.qos_parameters.writeback.base_latency_us + CalculateWriteBackDelay(
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->ms.RequiredDISPCLK;
+ } else {
+ mode_lib->ms.WritebackDelayTime[k] = 0.0;
+ }
+ }
+
+ // MaximumVStartup is actually Tvstartup_min in DCN4 programming guide
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ bool isInterlaceTiming = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !mode_lib->ip.ptoi_supported);
+ s->MaximumVStartup[k] = CalculateMaxVStartup(
+ mode_lib->ip.ptoi_supported,
+ mode_lib->ip.vblank_nom_default_us,
+ &display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing,
+ mode_lib->ms.WritebackDelayTime[k]);
+ mode_lib->ms.MaxVStartupLines[k] = (isInterlaceTiming ? (2 * s->MaximumVStartup[k]) : s->MaximumVStartup[k]);
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, MaximumVStartup = %u\n", __func__, k, s->MaximumVStartup[k]);
+#endif
+
+ /* Immediate Flip and MALL parameters */
+ s->ImmediateFlipRequired = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ s->ImmediateFlipRequired = s->ImmediateFlipRequired || display_cfg->plane_descriptors[k].immediate_flip;
+ }
+
+ mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe =
+ mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe ||
+ ((display_cfg->hostvm_enable == true || display_cfg->plane_descriptors[k].immediate_flip == true) &&
+ (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame || dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])));
+ }
+
+ mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen ||
+ ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))) ||
+ ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_disable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame));
+ }
+
+ s->FullFrameMALLPStateMethod = false;
+ s->SubViewportMALLPStateMethod = false;
+ s->PhantomPipeMALLPStateMethod = false;
+ s->SubViewportMALLRefreshGreaterThan120Hz = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame)
+ s->FullFrameMALLPStateMethod = true;
+ if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) {
+ s->SubViewportMALLPStateMethod = true;
+ if (!display_cfg->overrides.enable_subvp_implicit_pmo) {
+ // For dv, small frame tests will have very high refresh rate
+ unsigned long long refresh_rate = (unsigned long long) ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz * 1000 /
+ (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
+ (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total);
+ if (refresh_rate > 120)
+ s->SubViewportMALLRefreshGreaterThan120Hz = true;
+ }
+ }
+ if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))
+ s->PhantomPipeMALLPStateMethod = true;
+ }
+ mode_lib->ms.support.InvalidCombinationOfMALLUseForPState = (s->SubViewportMALLPStateMethod != s->PhantomPipeMALLPStateMethod) ||
+ (s->SubViewportMALLPStateMethod && s->FullFrameMALLPStateMethod) || s->SubViewportMALLRefreshGreaterThan120Hz;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: SubViewportMALLPStateMethod = %u\n", __func__, s->SubViewportMALLPStateMethod);
+ DML_LOG_VERBOSE("DML::%s: PhantomPipeMALLPStateMethod = %u\n", __func__, s->PhantomPipeMALLPStateMethod);
+ DML_LOG_VERBOSE("DML::%s: FullFrameMALLPStateMethod = %u\n", __func__, s->FullFrameMALLPStateMethod);
+ DML_LOG_VERBOSE("DML::%s: SubViewportMALLRefreshGreaterThan120Hz = %u\n", __func__, s->SubViewportMALLRefreshGreaterThan120Hz);
+ DML_LOG_VERBOSE("DML::%s: InvalidCombinationOfMALLUseForPState = %u\n", __func__, mode_lib->ms.support.InvalidCombinationOfMALLUseForPState);
+ DML_LOG_VERBOSE("DML::%s: in_out_params->min_clk_index = %u\n", __func__, in_out_params->min_clk_index);
+ DML_LOG_VERBOSE("DML::%s: mode_lib->ms.DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK);
+ DML_LOG_VERBOSE("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock);
+ DML_LOG_VERBOSE("DML::%s: mode_lib->ms.uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz);
+#endif
+
+ mode_lib->ms.support.OutstandingRequestsSupport = true;
+ mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = true;
+
+ mode_lib->ms.support.avg_urgent_latency_us
+ = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz
+ * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0)
+ + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock)
+ * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0);
+
+ mode_lib->ms.support.avg_non_urgent_latency_us
+ = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz
+ * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0)
+ + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock)
+ * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0);
+
+ mode_lib->ms.support.max_non_urgent_latency_us
+ = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles
+ / mode_lib->ms.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0)
+ + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock
+ + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock
+ * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+
+ if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) {
+ outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_luma[k]
+ / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes));
+
+ if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) {
+ mode_lib->ms.support.OutstandingRequestsSupport = false;
+ }
+
+ if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) {
+ mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: avg_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_urgent_latency_us);
+ DML_LOG_VERBOSE("DML::%s: avg_non_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_non_urgent_latency_us);
+ DML_LOG_VERBOSE("DML::%s: k=%d, request_size_bytes_luma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_luma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, outstanding_latency_us = %f (luma)\n", __func__, k, outstanding_latency_us);
+#endif
+ }
+
+ if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x && mode_lib->ms.BytePerPixelC[k] > 0) {
+ outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_chroma[k]
+ / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes));
+
+ if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) {
+ mode_lib->ms.support.OutstandingRequestsSupport = false;
+ }
+
+ if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) {
+ mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false;
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%d, request_size_bytes_chroma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_chroma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, outstanding_latency_us = %f (chroma)\n", __func__, k, outstanding_latency_us);
+#endif
+ }
+ }
+
+ memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params));
+ if (mode_lib->soc.mcache_size_bytes == 0 || mode_lib->ip.dcn_mrq_present) {
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ mode_lib->ms.mall_prefetch_sdp_overhead_factor[k] = 1.0;
+ mode_lib->ms.mall_prefetch_dram_overhead_factor[k] = 1.0;
+ mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0;
+ mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0;
+ mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0;
+ mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0;
+ }
+ } else {
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
+ calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count;
+ calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes;
+ calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes;
+ calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes;
+ calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
+ calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
+
+ calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format;
+ calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle);
+ calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
+ calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling;
+ calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall;
+
+ calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
+ calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
+ calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
+ calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
+ calculate_mcache_setting_params->blk_width_l = mode_lib->ms.MacroTileWidthY[k];
+ calculate_mcache_setting_params->blk_height_l = mode_lib->ms.MacroTileHeightY[k];
+ calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k];
+ calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k];
+ calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k];
+ calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->ms.BytePerPixelY[k];
+
+ calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.x_start;
+ calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
+ calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width;
+ calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
+ calculate_mcache_setting_params->blk_width_c = mode_lib->ms.MacroTileWidthC[k];
+ calculate_mcache_setting_params->blk_height_c = mode_lib->ms.MacroTileHeightC[k];
+ calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k];
+ calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k];
+ calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k];
+ calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->ms.BytePerPixelC[k];
+
+ // output
+ calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k];
+ calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k];
+ calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k];
+ calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k];
+
+ calculate_mcache_setting_params->num_mcaches_l = &mode_lib->ms.num_mcaches_l[k];
+ calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->ms.mcache_row_bytes_l[k];
+ calculate_mcache_setting_params->mcache_row_bytes_per_channel_l = &mode_lib->ms.mcache_row_bytes_per_channel_l[k];
+ calculate_mcache_setting_params->mcache_offsets_l = mode_lib->ms.mcache_offsets_l[k];
+ calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->ms.mcache_shift_granularity_l[k];
+
+ calculate_mcache_setting_params->num_mcaches_c = &mode_lib->ms.num_mcaches_c[k];
+ calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->ms.mcache_row_bytes_c[k];
+ calculate_mcache_setting_params->mcache_row_bytes_per_channel_c = &mode_lib->ms.mcache_row_bytes_per_channel_c[k];
+ calculate_mcache_setting_params->mcache_offsets_c = mode_lib->ms.mcache_offsets_c[k];
+ calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->ms.mcache_shift_granularity_c[k];
+
+ calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->ms.mall_comb_mcache_l[k];
+ calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->ms.mall_comb_mcache_c[k];
+ calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->ms.lc_comb_mcache[k];
+
+ calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params);
+ }
+
+ calculate_mall_bw_overhead_factor(
+ mode_lib->ms.mall_prefetch_sdp_overhead_factor,
+ mode_lib->ms.mall_prefetch_dram_overhead_factor,
+
+ // input
+ display_cfg,
+ mode_lib->ms.num_active_planes);
+ }
+
+ // Calculate all the bandwidth available
+ // Need anothe bw for latency evaluation
+ calculate_bandwidth_available(
+ mode_lib->ms.support.avg_bandwidth_available_min, // not used
+ mode_lib->ms.support.avg_bandwidth_available, // not used
+ mode_lib->ms.support.urg_bandwidth_available_min_latency,
+ mode_lib->ms.support.urg_bandwidth_available, // not used
+ mode_lib->ms.support.urg_bandwidth_available_vm_only, // not used
+ mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm, // not used
+
+ &mode_lib->soc,
+ display_cfg->hostvm_enable,
+ mode_lib->ms.DCFCLK,
+ mode_lib->ms.FabricClock,
+ mode_lib->ms.dram_bw_mbps);
+
+ calculate_bandwidth_available(
+ mode_lib->ms.support.avg_bandwidth_available_min,
+ mode_lib->ms.support.avg_bandwidth_available,
+ mode_lib->ms.support.urg_bandwidth_available_min,
+ mode_lib->ms.support.urg_bandwidth_available,
+ mode_lib->ms.support.urg_bandwidth_available_vm_only,
+ mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm,
+
+ &mode_lib->soc,
+ display_cfg->hostvm_enable,
+ mode_lib->ms.MaxDCFCLK,
+ mode_lib->ms.MaxFabricClock,
+#ifdef DML_MODE_SUPPORT_USE_DPM_DRAM_BW
+ mode_lib->ms.dram_bw_mbps);
+#else
+ mode_lib->ms.max_dram_bw_mbps);
+#endif
+
+ // Average BW support check
+ calculate_avg_bandwidth_required(
+ mode_lib->ms.support.avg_bandwidth_required,
+ // input
+ display_cfg,
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.vactive_sw_bw_l,
+ mode_lib->ms.vactive_sw_bw_c,
+ mode_lib->ms.cursor_bw,
+ mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0,
+ mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1,
+ mode_lib->ms.mall_prefetch_dram_overhead_factor,
+ mode_lib->ms.mall_prefetch_sdp_overhead_factor);
+
+ for (m = 0; m < dml2_core_internal_bw_max; m++) { // check sdp and dram
+ mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_idle][m] = 1;
+ mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_active][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][m]);
+ mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_svp_prefetch][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][m]);
+ }
+
+ mode_lib->ms.support.AvgBandwidthSupport = true;
+ mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = true;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.NotEnoughUrgentLatencyHiding[k]) {
+ mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = false;
+ DML_LOG_VERBOSE("DML::%s: k=%u NotEnoughUrgentLatencyHiding set\n", __func__, k);
+
+ }
+ }
+ for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
+ for (n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram
+ if (!mode_lib->ms.support.avg_bandwidth_support_ok[m][n] && (m == dml2_core_internal_soc_state_sys_active || mode_lib->soc.mall_allocated_for_dcn_mbytes > 0)) {
+ mode_lib->ms.support.AvgBandwidthSupport = false;
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: avg_bandwidth_support_ok[%s][%s] not ok\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n));
+#endif
+ }
+ }
+ }
+
+ dml_core_ms_prefetch_check(mode_lib, display_cfg);
+
+ mode_lib->ms.support.max_urgent_latency_us = s->mSOCParameters.max_urgent_latency_us;
+
+ //Re-ordering Buffer Support Check
+ if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) {
+ if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024
+ / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= s->mSOCParameters.max_urgent_latency_us) {
+ mode_lib->ms.support.ROBSupport = true;
+ } else {
+ mode_lib->ms.support.ROBSupport = false;
+ }
+ } else {
+ if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) {
+ mode_lib->ms.support.ROBSupport = true;
+ } else {
+ mode_lib->ms.support.ROBSupport = false;
+ }
+ }
+
+ /* VActive fill time calculations (informative) */
+ calculate_vactive_det_fill_latency(
+ display_cfg,
+ mode_lib->ms.num_active_planes,
+ s->pstate_bytes_required_l[dml2_pstate_type_uclk],
+ s->pstate_bytes_required_c[dml2_pstate_type_uclk],
+ mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0,
+ mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1,
+ mode_lib->ms.vactive_sw_bw_l,
+ mode_lib->ms.vactive_sw_bw_c,
+ mode_lib->ms.surface_avg_vactive_required_bw,
+ mode_lib->ms.surface_peak_required_bw,
+ /* outputs */
+ mode_lib->ms.pstate_vactive_det_fill_delay_us[dml2_pstate_type_uclk]);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: max_urgent_latency_us = %f\n", __func__, s->mSOCParameters.max_urgent_latency_us);
+ DML_LOG_VERBOSE("DML::%s: ROBSupport = %u\n", __func__, mode_lib->ms.support.ROBSupport);
+#endif
+
+ /*Mode Support, Voltage State and SOC Configuration*/
+ {
+ if (mode_lib->ms.support.ScaleRatioAndTapsSupport
+ && mode_lib->ms.support.SourceFormatPixelAndScanSupport
+ && mode_lib->ms.support.ViewportSizeSupport
+ && !mode_lib->ms.support.LinkRateDoesNotMatchDPVersion
+ && !mode_lib->ms.support.LinkRateForMultistreamNotIndicated
+ && !mode_lib->ms.support.BPPForMultistreamNotIndicated
+ && !mode_lib->ms.support.MultistreamWithHDMIOreDP
+ && !mode_lib->ms.support.ExceededMultistreamSlots
+ && !mode_lib->ms.support.MSOOrODMSplitWithNonDPLink
+ && !mode_lib->ms.support.NotEnoughLanesForMSO
+ && !mode_lib->ms.support.P2IWith420
+ && !mode_lib->ms.support.DSC422NativeNotSupported
+ && mode_lib->ms.support.DSCSlicesODMModeSupported
+ && !mode_lib->ms.support.NotEnoughDSCUnits
+ && !mode_lib->ms.support.NotEnoughDSCSlices
+ && !mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe
+ && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen
+ && !mode_lib->ms.support.DSCCLKRequiredMoreThanSupported
+ && mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport
+ && !mode_lib->ms.support.DTBCLKRequiredMoreThanSupported
+ && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPState
+ && mode_lib->ms.support.ROBSupport
+ && mode_lib->ms.support.OutstandingRequestsSupport
+ && mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance
+ && mode_lib->ms.support.DISPCLK_DPPCLK_Support
+ && mode_lib->ms.support.TotalAvailablePipesSupport
+ && mode_lib->ms.support.NumberOfOTGSupport
+ && mode_lib->ms.support.NumberOfHDMIFRLSupport
+ && mode_lib->ms.support.NumberOfDP2p0Support
+ && mode_lib->ms.support.EnoughWritebackUnits
+ && mode_lib->ms.support.WritebackLatencySupport
+ && mode_lib->ms.support.WritebackScaleRatioAndTapsSupport
+ && mode_lib->ms.support.CursorSupport
+ && mode_lib->ms.support.PitchSupport
+ && !mode_lib->ms.support.ViewportExceedsSurface
+ && mode_lib->ms.support.PrefetchSupported
+ && mode_lib->ms.support.EnoughUrgentLatencyHidingSupport
+ && mode_lib->ms.support.AvgBandwidthSupport
+ && mode_lib->ms.support.DynamicMetadataSupported
+ && mode_lib->ms.support.VRatioInPrefetchSupported
+ && mode_lib->ms.support.PTEBufferSizeNotExceeded
+ && mode_lib->ms.support.DCCMetaBufferSizeNotExceeded
+ && !mode_lib->ms.support.ExceededMALLSize
+ && mode_lib->ms.support.g6_temp_read_support
+ && ((!display_cfg->hostvm_enable && !s->ImmediateFlipRequired) || mode_lib->ms.support.ImmediateFlipSupport)) {
+ DML_LOG_VERBOSE("DML::%s: mode is supported\n", __func__);
+ mode_lib->ms.support.ModeSupport = true;
+ } else {
+ DML_LOG_VERBOSE("DML::%s: mode is NOT supported\n", __func__);
+ mode_lib->ms.support.ModeSupport = false;
+ }
+ }
+
+ // Since now the mode_support work on 1 particular power state, so there is only 1 state idx (index 0).
+ DML_LOG_VERBOSE("DML::%s: ModeSupport = %u\n", __func__, mode_lib->ms.support.ModeSupport);
+ DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[k];
+ mode_lib->ms.support.DPPPerSurface[k] = mode_lib->ms.NoOfDPP[k];
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ mode_lib->ms.support.ODMMode[k] = mode_lib->ms.ODMMode[k];
+ mode_lib->ms.support.DSCEnabled[k] = mode_lib->ms.RequiresDSC[k];
+ mode_lib->ms.support.FECEnabled[k] = mode_lib->ms.RequiresFEC[k];
+ mode_lib->ms.support.OutputBpp[k] = mode_lib->ms.OutputBpp[k];
+ mode_lib->ms.support.OutputType[k] = mode_lib->ms.OutputType[k];
+ mode_lib->ms.support.OutputRate[k] = mode_lib->ms.OutputRate[k];
+
+#if defined(__DML_VBA_DEBUG__)
+ DML_LOG_VERBOSE("DML::%s: k=%d, ODMMode = %u\n", __func__, k, mode_lib->ms.support.ODMMode[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, DSCEnabled = %u\n", __func__, k, mode_lib->ms.support.DSCEnabled[k]);
+#endif
+ }
+
+#if defined(__DML_VBA_DEBUG__)
+ if (!mode_lib->ms.support.ModeSupport)
+ dml2_print_mode_support_info(&mode_lib->ms.support, true);
+
+ DML_LOG_VERBOSE("DML::%s: --- DONE --- \n", __func__);
+#endif
+
+ return mode_lib->ms.support.ModeSupport;
+}
+
+unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support_ex *in_out_params)
+{
+ unsigned int result;
+
+ DML_LOG_VERBOSE("DML::%s: ------------- START ----------\n", __func__);
+ result = dml_core_mode_support(in_out_params);
+
+ if (result)
+ *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support;
+
+ DML_LOG_VERBOSE("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, result, in_out_params->min_clk_index);
+
+ for (unsigned int k = 0; k < in_out_params->in_display_cfg->num_planes; k++)
+ DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
+
+ DML_LOG_VERBOSE("DML::%s: ------------- DONE ----------\n", __func__);
+
+ return result;
+}
+
+static void CalculatePixelDeliveryTimes(
+ const struct dml2_display_cfg *display_cfg,
+ const struct core_display_cfg_support_info *cfg_support_info,
+ unsigned int NumberOfActiveSurfaces,
+ double VRatioPrefetchY[],
+ double VRatioPrefetchC[],
+ unsigned int swath_width_luma_ub[],
+ unsigned int swath_width_chroma_ub[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double Dppclk[],
+ unsigned int BytePerPixelC[],
+ unsigned int req_per_swath_ub_l[],
+ unsigned int req_per_swath_ub_c[],
+
+ // Output
+ double DisplayPipeLineDeliveryTimeLuma[],
+ double DisplayPipeLineDeliveryTimeChroma[],
+ double DisplayPipeLineDeliveryTimeLumaPrefetch[],
+ double DisplayPipeLineDeliveryTimeChromaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeLuma[],
+ double DisplayPipeRequestDeliveryTimeChroma[],
+ double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeChromaPrefetch[])
+{
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u : HRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
+ DML_LOG_VERBOSE("DML::%s: k=%u : VRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
+ DML_LOG_VERBOSE("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio);
+ DML_LOG_VERBOSE("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio);
+ DML_LOG_VERBOSE("DML::%s: k=%u : VRatioPrefetchY = %f\n", __func__, k, VRatioPrefetchY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : VRatioPrefetchC = %f\n", __func__, k, VRatioPrefetchC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, cfg_support_info->plane_support_info[k].dpps_used);
+ DML_LOG_VERBOSE("DML::%s: k=%u : pixel_clock_mhz = %f\n", __func__, k, pixel_clock_mhz);
+ DML_LOG_VERBOSE("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]);
+#endif
+ if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) {
+ DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz;
+ } else {
+ DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
+ }
+
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChroma[k] = 0;
+ } else {
+ if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) {
+ DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz;
+ } else {
+ DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
+ }
+ }
+
+ if (VRatioPrefetchY[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz;
+ } else {
+ DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
+ }
+
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
+ } else {
+ if (VRatioPrefetchC[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz;
+ } else {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
+ }
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
+#endif
+ }
+
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+ DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub_l[k];
+ DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub_l[k];
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeRequestDeliveryTimeChroma[k] = 0;
+ DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
+ } else {
+ DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub_c[k];
+ DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub_c[k];
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : req_per_swath_ub_l = %d\n", __func__, k, req_per_swath_ub_l[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : req_per_swath_ub_c = %d\n", __func__, k, req_per_swath_ub_c[k]);
+#endif
+ }
+}
+
+static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTETimes_params *p)
+{
+ unsigned int meta_chunk_width;
+ unsigned int min_meta_chunk_width;
+ unsigned int meta_chunk_per_row_int;
+ unsigned int meta_row_remainder;
+ unsigned int meta_chunk_threshold;
+ unsigned int meta_chunks_per_row_ub;
+ unsigned int meta_chunk_width_chroma;
+ unsigned int min_meta_chunk_width_chroma;
+ unsigned int meta_chunk_per_row_int_chroma;
+ unsigned int meta_row_remainder_chroma;
+ unsigned int meta_chunk_threshold_chroma;
+ unsigned int meta_chunks_per_row_ub_chroma;
+ unsigned int dpte_group_width_luma;
+ unsigned int dpte_groups_per_row_luma_ub;
+ unsigned int dpte_group_width_chroma;
+ unsigned int dpte_groups_per_row_chroma_ub;
+ double pixel_clock_mhz;
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
+ if (p->BytePerPixelC[k] == 0) {
+ p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
+ } else {
+ p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
+ }
+ p->DST_Y_PER_META_ROW_NOM_L[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
+ if (p->BytePerPixelC[k] == 0) {
+ p->DST_Y_PER_META_ROW_NOM_C[k] = 0;
+ } else {
+ p->DST_Y_PER_META_ROW_NOM_C[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
+ }
+ }
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true && p->mrq_present) {
+ meta_chunk_width = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelY[k] / p->meta_row_height[k];
+ min_meta_chunk_width = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelY[k] / p->meta_row_height[k];
+ meta_chunk_per_row_int = p->meta_row_width[k] / meta_chunk_width;
+ meta_row_remainder = p->meta_row_width[k] % meta_chunk_width;
+ if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
+ meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_width[k];
+ } else {
+ meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_height[k];
+ }
+ if (meta_row_remainder <= meta_chunk_threshold) {
+ meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
+ } else {
+ meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
+ }
+ p->TimePerMetaChunkNominal[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio *
+ p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total /
+ (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub;
+ p->TimePerMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total /
+ (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub;
+ p->TimePerMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total /
+ (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub;
+ if (p->BytePerPixelC[k] == 0) {
+ p->TimePerChromaMetaChunkNominal[k] = 0;
+ p->TimePerChromaMetaChunkVBlank[k] = 0;
+ p->TimePerChromaMetaChunkFlip[k] = 0;
+ } else {
+ meta_chunk_width_chroma = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k];
+ min_meta_chunk_width_chroma = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k];
+ meta_chunk_per_row_int_chroma = (unsigned int)((double)p->meta_row_width_chroma[k] / meta_chunk_width_chroma);
+ meta_row_remainder_chroma = p->meta_row_width_chroma[k] % meta_chunk_width_chroma;
+ if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
+ meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_width_chroma[k];
+ } else {
+ meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_height_chroma[k];
+ }
+ if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
+ meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
+ } else {
+ meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
+ }
+ p->TimePerChromaMetaChunkNominal[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma;
+ p->TimePerChromaMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma;
+ p->TimePerChromaMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma;
+ }
+ } else {
+ p->TimePerMetaChunkNominal[k] = 0;
+ p->TimePerMetaChunkVBlank[k] = 0;
+ p->TimePerMetaChunkFlip[k] = 0;
+ p->TimePerChromaMetaChunkNominal[k] = 0;
+ p->TimePerChromaMetaChunkVBlank[k] = 0;
+ p->TimePerChromaMetaChunkFlip[k] = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_L[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_C[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkNominal = %f\n", __func__, k, p->TimePerMetaChunkNominal[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkVBlank = %f\n", __func__, k, p->TimePerMetaChunkVBlank[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkFlip = %f\n", __func__, k, p->TimePerMetaChunkFlip[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkNominal = %f\n", __func__, k, p->TimePerChromaMetaChunkNominal[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkVBlank = %f\n", __func__, k, p->TimePerChromaMetaChunkVBlank[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkFlip = %f\n", __func__, k, p->TimePerChromaMetaChunkFlip[k]);
+#endif
+ }
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
+ if (p->BytePerPixelC[k] == 0) {
+ p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
+ } else {
+ p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
+ }
+ }
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ pixel_clock_mhz = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+
+ if (p->display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut)
+ p->time_per_tdlut_group[k] = 2 * p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / p->tdlut_groups_per_2row_ub[k];
+ else
+ p->time_per_tdlut_group[k] = 0;
+
+ DML_LOG_VERBOSE("DML::%s: k=%u, time_per_tdlut_group = %f\n", __func__, k, p->time_per_tdlut_group[k]);
+
+ if (p->display_cfg->gpuvm_enable == true) {
+ if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
+ dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqWidthY[k]);
+ } else {
+ dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqHeightY[k]);
+ }
+ if (p->use_one_row_for_frame[k]) {
+ dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma / 2.0, 1.0));
+ } else {
+ dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma, 1.0));
+ }
+ if (dpte_groups_per_row_luma_ub <= 2) {
+ dpte_groups_per_row_luma_ub = dpte_groups_per_row_luma_ub + 1;
+ }
+ DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, p->PixelPTEReqWidthY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, p->PixelPTEReqHeightY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub);
+
+ p->time_per_pte_group_nom_luma[k] = p->DST_Y_PER_PTE_ROW_NOM_L[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub;
+ p->time_per_pte_group_vblank_luma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub;
+ p->time_per_pte_group_flip_luma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub;
+ if (p->BytePerPixelC[k] == 0) {
+ p->time_per_pte_group_nom_chroma[k] = 0;
+ p->time_per_pte_group_vblank_chroma[k] = 0;
+ p->time_per_pte_group_flip_chroma[k] = 0;
+ } else {
+ if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
+ dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqWidthC[k]);
+ } else {
+ dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqHeightC[k]);
+ }
+
+ if (p->use_one_row_for_frame[k]) {
+ dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma / 2.0, 1.0));
+ } else {
+ dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma, 1.0));
+ }
+ if (dpte_groups_per_row_chroma_ub <= 2) {
+ dpte_groups_per_row_chroma_ub = dpte_groups_per_row_chroma_ub + 1;
+ }
+ DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub);
+
+ p->time_per_pte_group_nom_chroma[k] = p->DST_Y_PER_PTE_ROW_NOM_C[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub;
+ p->time_per_pte_group_vblank_chroma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub;
+ p->time_per_pte_group_flip_chroma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub;
+ }
+ } else {
+ p->time_per_pte_group_nom_luma[k] = 0;
+ p->time_per_pte_group_vblank_luma[k] = 0;
+ p->time_per_pte_group_flip_luma[k] = 0;
+ p->time_per_pte_group_nom_chroma[k] = 0;
+ p->time_per_pte_group_vblank_chroma[k] = 0;
+ p->time_per_pte_group_flip_chroma[k] = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_row_vblank = %f\n", __func__, k, p->dst_y_per_row_vblank[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_row_flip = %f\n", __func__, k, p->dst_y_per_row_flip[k]);
+
+ DML_LOG_VERBOSE("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_L[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_C[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, p->time_per_pte_group_nom_luma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, p->time_per_pte_group_vblank_luma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, p->time_per_pte_group_flip_luma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, p->time_per_pte_group_nom_chroma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, p->time_per_pte_group_vblank_chroma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, p->time_per_pte_group_flip_chroma[k]);
+#endif
+ }
+} // CalculateMetaAndPTETimes
+
+static void CalculateVMGroupAndRequestTimes(
+ const struct dml2_display_cfg *display_cfg,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int BytePerPixelC[],
+ double dst_y_per_vm_vblank[],
+ double dst_y_per_vm_flip[],
+ unsigned int dpte_row_width_luma_ub[],
+ unsigned int dpte_row_width_chroma_ub[],
+ unsigned int vm_group_bytes[],
+ unsigned int dpde0_bytes_per_frame_ub_l[],
+ unsigned int dpde0_bytes_per_frame_ub_c[],
+ unsigned int tdlut_pte_bytes_per_frame[],
+ unsigned int meta_pte_bytes_per_frame_ub_l[],
+ unsigned int meta_pte_bytes_per_frame_ub_c[],
+ bool mrq_present,
+
+ // Output
+ double TimePerVMGroupVBlank[],
+ double TimePerVMGroupFlip[],
+ double TimePerVMRequestVBlank[],
+ double TimePerVMRequestFlip[])
+{
+ unsigned int num_group_per_lower_vm_stage = 0;
+ unsigned int num_req_per_lower_vm_stage = 0;
+ unsigned int num_group_per_lower_vm_stage_flip;
+ unsigned int num_group_per_lower_vm_stage_pref;
+ unsigned int num_req_per_lower_vm_stage_flip;
+ unsigned int num_req_per_lower_vm_stage_pref;
+ double line_time;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
+#endif
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+ bool dcc_mrq_enable = display_cfg->plane_descriptors[k].surface.dcc.enable && mrq_present;
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, dcc_mrq_enable = %u\n", __func__, k, dcc_mrq_enable);
+ DML_LOG_VERBOSE("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_l = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_c = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
+#endif
+
+ if (display_cfg->gpuvm_enable) {
+ if (display_cfg->gpuvm_max_page_table_levels >= 2) {
+ num_group_per_lower_vm_stage += (unsigned int) math_ceil2((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
+
+ if (BytePerPixelC[k] > 0)
+ num_group_per_lower_vm_stage += (unsigned int) math_ceil2((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
+ }
+
+ if (dcc_mrq_enable) {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage += (unsigned int)(2.0 /*for each mpde0 group*/ + math_ceil2((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) +
+ math_ceil2((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1));
+ } else {
+ num_group_per_lower_vm_stage += (unsigned int)(1.0 + math_ceil2((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1));
+ }
+ }
+
+ num_group_per_lower_vm_stage_flip = num_group_per_lower_vm_stage;
+ num_group_per_lower_vm_stage_pref = num_group_per_lower_vm_stage;
+
+ if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) {
+ num_group_per_lower_vm_stage_pref += (unsigned int) math_ceil2(tdlut_pte_bytes_per_frame[k] / vm_group_bytes[k], 1);
+ if (display_cfg->gpuvm_max_page_table_levels >= 2)
+ num_group_per_lower_vm_stage_pref += 1; // tdpe0 group
+ }
+
+ if (display_cfg->gpuvm_max_page_table_levels >= 2) {
+ num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_l[k] / 64;
+ if (BytePerPixelC[k] > 0)
+ num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_c[k];
+ }
+
+ if (dcc_mrq_enable) {
+ num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_l[k] / 64;
+ if (BytePerPixelC[k] > 0)
+ num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_c[k] / 64;
+ }
+
+ num_req_per_lower_vm_stage_flip = num_req_per_lower_vm_stage;
+ num_req_per_lower_vm_stage_pref = num_req_per_lower_vm_stage;
+
+ if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) {
+ num_req_per_lower_vm_stage_pref += tdlut_pte_bytes_per_frame[k] / 64;
+ }
+
+ line_time = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz;
+
+ if (num_group_per_lower_vm_stage_pref > 0)
+ TimePerVMGroupVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_group_per_lower_vm_stage_pref;
+ else
+ TimePerVMGroupVBlank[k] = 0;
+
+ if (num_group_per_lower_vm_stage_flip > 0)
+ TimePerVMGroupFlip[k] = dst_y_per_vm_flip[k] * line_time / num_group_per_lower_vm_stage_flip;
+ else
+ TimePerVMGroupFlip[k] = 0;
+
+ if (num_req_per_lower_vm_stage_pref > 0)
+ TimePerVMRequestVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_req_per_lower_vm_stage_pref;
+ else
+ TimePerVMRequestVBlank[k] = 0.0;
+ if (num_req_per_lower_vm_stage_flip > 0)
+ TimePerVMRequestFlip[k] = dst_y_per_vm_flip[k] * line_time / num_req_per_lower_vm_stage_flip;
+ else
+ TimePerVMRequestFlip[k] = 0.0;
+
+ DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_vm_vblank = %f\n", __func__, k, dst_y_per_vm_vblank[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_vm_flip = %f\n", __func__, k, dst_y_per_vm_flip[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, line_time = %f\n", __func__, k, line_time);
+ DML_LOG_VERBOSE("DML::%s: k=%u, num_group_per_lower_vm_stage_pref = %d\n", __func__, k, num_group_per_lower_vm_stage_pref);
+ DML_LOG_VERBOSE("DML::%s: k=%u, num_group_per_lower_vm_stage_flip = %d\n", __func__, k, num_group_per_lower_vm_stage_flip);
+ DML_LOG_VERBOSE("DML::%s: k=%u, num_req_per_lower_vm_stage_pref = %d\n", __func__, k, num_req_per_lower_vm_stage_pref);
+ DML_LOG_VERBOSE("DML::%s: k=%u, num_req_per_lower_vm_stage_flip = %d\n", __func__, k, num_req_per_lower_vm_stage_flip);
+
+ if (display_cfg->gpuvm_max_page_table_levels > 2) {
+ TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
+ TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
+ TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
+ TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
+ }
+
+ } else {
+ TimePerVMGroupVBlank[k] = 0;
+ TimePerVMGroupFlip[k] = 0;
+ TimePerVMRequestVBlank[k] = 0;
+ TimePerVMRequestFlip[k] = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
+#endif
+ }
+}
+
+static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratch,
+ struct dml2_core_calcs_CalculateStutterEfficiency_params *p)
+{
+ struct dml2_core_calcs_CalculateStutterEfficiency_locals *l = &scratch->CalculateStutterEfficiency_locals;
+
+ unsigned int TotalNumberOfActiveOTG = 0;
+ double SinglePixelClock = 0;
+ unsigned int SingleHTotal = 0;
+ unsigned int SingleVTotal = 0;
+ bool SameTiming = true;
+ bool FoundCriticalSurface = false;
+
+ memset(l, 0, sizeof(struct dml2_core_calcs_CalculateStutterEfficiency_locals));
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
+ if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true) {
+ if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesY[k] > p->SwathHeightY[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesY[k] > p->SwathHeightY[k]) || p->DCCYMaxUncompressedBlock[k] < 256) {
+ l->MaximumEffectiveCompressionLuma = 2;
+ } else {
+ l->MaximumEffectiveCompressionLuma = 4;
+ }
+ l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0, l->MaximumEffectiveCompressionLuma);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0);
+ DML_LOG_VERBOSE("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, l->MaximumEffectiveCompressionLuma);
+#endif
+ l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0;
+ l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0 / l->MaximumEffectiveCompressionLuma;
+
+ if (p->ReadBandwidthSurfaceChroma[k] > 0) {
+ if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesC[k] > p->SwathHeightC[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesC[k] > p->SwathHeightC[k]) || p->DCCCMaxUncompressedBlock[k] < 256) {
+ l->MaximumEffectiveCompressionChroma = 2;
+ } else {
+ l->MaximumEffectiveCompressionChroma = 4;
+ }
+ l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1, l->MaximumEffectiveCompressionChroma);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1);
+ DML_LOG_VERBOSE("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, l->MaximumEffectiveCompressionChroma);
+#endif
+ l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1;
+ l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1 / l->MaximumEffectiveCompressionChroma;
+ }
+ } else {
+ l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] + p->ReadBandwidthSurfaceChroma[k];
+ }
+ l->TotalRowReadBandwidth = l->TotalRowReadBandwidth + p->DPPPerSurface[k] * (p->meta_row_bw[k] + p->dpte_row_bw[k]);
+ }
+ }
+
+ l->AverageDCCCompressionRate = p->TotalDataReadBandwidth / l->TotalCompressedReadBandwidth;
+ l->AverageDCCZeroSizeFraction = l->TotalZeroSizeRequestReadBandwidth / p->TotalDataReadBandwidth;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled);
+ DML_LOG_VERBOSE("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, l->TotalCompressedReadBandwidth);
+ DML_LOG_VERBOSE("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, l->TotalZeroSizeRequestReadBandwidth);
+ DML_LOG_VERBOSE("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, l->TotalZeroSizeCompressedReadBandwidth);
+ DML_LOG_VERBOSE("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, l->MaximumEffectiveCompressionLuma);
+ DML_LOG_VERBOSE("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, l->MaximumEffectiveCompressionChroma);
+ DML_LOG_VERBOSE("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate);
+ DML_LOG_VERBOSE("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, l->AverageDCCZeroSizeFraction);
+
+ DML_LOG_VERBOSE("DML::%s: CompbufReservedSpace64B = %u (%f kbytes)\n", __func__, p->CompbufReservedSpace64B, p->CompbufReservedSpace64B * 64 / 1024.0);
+ DML_LOG_VERBOSE("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs);
+ DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u kbytes\n", __func__, p->CompressedBufferSizeInkByte);
+ DML_LOG_VERBOSE("DML::%s: ROBBufferSizeInKByte = %u kbytes\n", __func__, p->ROBBufferSizeInKByte);
+#endif
+ if (l->AverageDCCZeroSizeFraction == 1) {
+ l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth;
+ l->EffectiveCompressedBufferSize = (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageZeroSizeCompressionRate + ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 * l->AverageZeroSizeCompressionRate;
+
+
+ } else if (l->AverageDCCZeroSizeFraction > 0) {
+ l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth;
+ l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate,
+ (double)p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate)) +
+ (p->rob_alloc_compressed ? math_min2(((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * l->AverageDCCCompressionRate,
+ ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate))
+ : ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64));
+
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate);
+ DML_LOG_VERBOSE("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate));
+ DML_LOG_VERBOSE("DML::%s: min 3 = %d\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64));
+ DML_LOG_VERBOSE("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate));
+#endif
+ } else {
+ l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate,
+ (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate) +
+ ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * (p->rob_alloc_compressed ? l->AverageDCCCompressionRate : 1.0);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate);
+ DML_LOG_VERBOSE("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate);
+#endif
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries);
+ DML_LOG_VERBOSE("DML::%s: ZeroSizeBufferEntries = %u\n", __func__, p->ZeroSizeBufferEntries);
+ DML_LOG_VERBOSE("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, l->AverageZeroSizeCompressionRate);
+ DML_LOG_VERBOSE("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0);
+#endif
+
+ *p->StutterPeriod = 0;
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
+ l->LinesInDETY = ((double)p->DETBufferSizeY[k] + (p->UnboundedRequestEnabled == true ? l->EffectiveCompressedBufferSize : 0) * p->ReadBandwidthSurfaceLuma[k] / p->TotalDataReadBandwidth) / p->BytePerPixelDETY[k] / p->SwathWidthY[k];
+ l->LinesInDETYRoundedDownToSwath = math_floor2(l->LinesInDETY, p->SwathHeightY[k]);
+ l->DETBufferingTimeY = l->LinesInDETYRoundedDownToSwath * ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferSizeY = %u (%u kbytes)\n", __func__, k, p->DETBufferSizeY[k], p->DETBufferSizeY[k] / 1024);
+ DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth);
+ DML_LOG_VERBOSE("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, l->LinesInDETY);
+ DML_LOG_VERBOSE("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, l->LinesInDETYRoundedDownToSwath);
+ DML_LOG_VERBOSE("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
+ DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, l->DETBufferingTimeY);
+#endif
+
+ if (!FoundCriticalSurface || l->DETBufferingTimeY < *p->StutterPeriod) {
+ bool isInterlaceTiming = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !p->ProgressiveToInterlaceUnitInOPP;
+
+ FoundCriticalSurface = true;
+ *p->StutterPeriod = l->DETBufferingTimeY;
+ l->FrameTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+ l->VActiveTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+ l->BytePerPixelYCriticalSurface = p->BytePerPixelY[k];
+ l->SwathWidthYCriticalSurface = p->SwathWidthY[k];
+ l->SwathHeightYCriticalSurface = p->SwathHeightY[k];
+ l->BlockWidth256BytesYCriticalSurface = p->BlockWidth256BytesY[k];
+ l->DETBufferSizeYCriticalSurface = p->DETBufferSizeY[k];
+ l->MinTTUVBlankCriticalSurface = p->MinTTUVBlank[k];
+ l->SinglePlaneCriticalSurface = (p->ReadBandwidthSurfaceChroma[k] == 0);
+ l->SinglePipeCriticalSurface = (p->DPPPerSurface[k] == 1);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface);
+ DML_LOG_VERBOSE("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod);
+ DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, l->MinTTUVBlankCriticalSurface);
+ DML_LOG_VERBOSE("DML::%s: k=%u, FrameTimeCriticalSurface= %f\n", __func__, k, l->FrameTimeCriticalSurface);
+ DML_LOG_VERBOSE("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, l->VActiveTimeCriticalSurface);
+ DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, l->BytePerPixelYCriticalSurface);
+ DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, l->SwathWidthYCriticalSurface);
+ DML_LOG_VERBOSE("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, l->SwathHeightYCriticalSurface);
+ DML_LOG_VERBOSE("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, l->BlockWidth256BytesYCriticalSurface);
+ DML_LOG_VERBOSE("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, l->SinglePlaneCriticalSurface);
+ DML_LOG_VERBOSE("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, l->SinglePipeCriticalSurface);
+#endif
+ }
+ }
+ }
+
+ // for bounded req, the stutter period is calculated only based on DET size, but during burst there can be some return inside ROB/compressed buffer
+ // stutter period is calculated only on the det sizing
+ // if (cdb + rob >= det) the stutter burst will be absorbed by the cdb + rob which is before decompress
+ // else
+ // the cdb + rob part will be in compressed rate with urg bw (idea bw)
+ // the det part will be return at uncompressed rate with 64B/dcfclk
+ //
+ // for unbounded req, the stutter period should be calculated as total of CDB+ROB+DET, so the term "PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer"
+ // should be == EffectiveCompressedBufferSize which will returned a compressed rate, the rest of stutter period is from the DET will be returned at uncompressed rate with 64B/dcfclk
+
+ l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = math_min2(*p->StutterPeriod * p->TotalDataReadBandwidth, l->EffectiveCompressedBufferSize);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate);
+ DML_LOG_VERBOSE("DML::%s: StutterPeriod*TotalDataReadBandwidth = %f (%f kbytes)\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth, (*p->StutterPeriod * p->TotalDataReadBandwidth) / 1024.0);
+ DML_LOG_VERBOSE("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0);
+ DML_LOG_VERBOSE("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f (%f kbytes)\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 1024);
+ DML_LOG_VERBOSE("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW);
+ DML_LOG_VERBOSE("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth);
+ DML_LOG_VERBOSE("DML::%s: TotalRowReadBandwidth = %f\n", __func__, l->TotalRowReadBandwidth);
+ DML_LOG_VERBOSE("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK);
+#endif
+
+ l->StutterBurstTime = l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer
+ / (p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) +
+ (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer)
+ / math_min2(p->DCFCLK * 64, p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) +
+ *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW;
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: Part 1 = %f\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / p->ReturnBW / (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate));
+ DML_LOG_VERBOSE("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64));
+ DML_LOG_VERBOSE("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW);
+ DML_LOG_VERBOSE("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime);
+#endif
+ l->TotalActiveWriteback = 0;
+ memset(l->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool));
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
+ if (!l->stream_visited[p->display_cfg->plane_descriptors[k].stream_index]) {
+
+ if (p->display_cfg->stream_descriptors[k].writeback.active_writebacks_per_stream > 0)
+ l->TotalActiveWriteback = l->TotalActiveWriteback + 1;
+
+ if (TotalNumberOfActiveOTG == 0) { // first otg
+ SinglePixelClock = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+ SingleHTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total;
+ SingleVTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total;
+ } else if (SinglePixelClock != ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) ||
+ SingleHTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total ||
+ SingleVTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) {
+ SameTiming = false;
+ }
+ TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
+ l->stream_visited[p->display_cfg->plane_descriptors[k].stream_index] = 1;
+ }
+ }
+ }
+
+ if (l->TotalActiveWriteback == 0) {
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime);
+ DML_LOG_VERBOSE("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time);
+ DML_LOG_VERBOSE("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
+#endif
+ *p->StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitTime + l->StutterBurstTime) / *p->StutterPeriod) * 100;
+ *p->Z8StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitZ8Time + l->StutterBurstTime) / *p->StutterPeriod) * 100;
+ *p->NumberOfStutterBurstsPerFrame = (*p->StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0);
+ *p->Z8NumberOfStutterBurstsPerFrame = (*p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0);
+ } else {
+ *p->StutterEfficiencyNotIncludingVBlank = 0.;
+ *p->Z8StutterEfficiencyNotIncludingVBlank = 0.;
+ *p->NumberOfStutterBurstsPerFrame = 0;
+ *p->Z8NumberOfStutterBurstsPerFrame = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, l->VActiveTimeCriticalSurface);
+ DML_LOG_VERBOSE("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
+ DML_LOG_VERBOSE("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank);
+ DML_LOG_VERBOSE("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame);
+ DML_LOG_VERBOSE("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
+#endif
+
+ if (*p->StutterEfficiencyNotIncludingVBlank > 0) {
+ if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) {
+ *p->StutterEfficiency = *p->StutterEfficiencyNotIncludingVBlank;
+ } else {
+ *p->StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitTime + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100;
+ }
+ } else {
+ *p->StutterEfficiency = 0;
+ *p->NumberOfStutterBurstsPerFrame = 0;
+ }
+
+ if (*p->Z8StutterEfficiencyNotIncludingVBlank > 0) {
+ //LastZ8StutterPeriod = l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod;
+ if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) {
+ *p->Z8StutterEfficiency = *p->Z8StutterEfficiencyNotIncludingVBlank;
+ } else {
+ *p->Z8StutterEfficiency = (1 - (*p->Z8NumberOfStutterBurstsPerFrame * p->SRExitZ8Time + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100;
+ }
+ } else {
+ *p->Z8StutterEfficiency = 0.;
+ *p->Z8NumberOfStutterBurstsPerFrame = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: TotalNumberOfActiveOTG = %u\n", __func__, TotalNumberOfActiveOTG);
+ DML_LOG_VERBOSE("DML::%s: SameTiming = %u\n", __func__, SameTiming);
+ DML_LOG_VERBOSE("DML::%s: SynchronizeTimings = %u\n", __func__, p->SynchronizeTimings);
+ DML_LOG_VERBOSE("DML::%s: LastZ8StutterPeriod = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod : 0);
+ DML_LOG_VERBOSE("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark);
+ DML_LOG_VERBOSE("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime);
+ DML_LOG_VERBOSE("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
+ DML_LOG_VERBOSE("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency);
+ DML_LOG_VERBOSE("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency);
+ DML_LOG_VERBOSE("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
+ DML_LOG_VERBOSE("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
+#endif
+
+ *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && l->SinglePlaneCriticalSurface && l->SinglePipeCriticalSurface);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, l->DETBufferSizeYCriticalSurface);
+ DML_LOG_VERBOSE("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte);
+ DML_LOG_VERBOSE("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
+#endif
+}
+
+static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex *in_out_params)
+{
+ const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg;
+ const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table;
+ const struct core_display_cfg_support_info *cfg_support_info = in_out_params->cfg_support_info;
+ struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib;
+ struct dml2_display_cfg_programming *programming = in_out_params->programming;
+
+ struct dml2_core_calcs_mode_programming_locals *s = &mode_lib->scratch.dml_core_mode_programming_locals;
+ struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
+ struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
+ struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
+ struct dml2_core_calcs_CalculateStutterEfficiency_params *CalculateStutterEfficiency_params = &mode_lib->scratch.CalculateStutterEfficiency_params;
+ struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
+ struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params;
+ struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params;
+ struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params;
+ struct dml2_core_shared_CalculateMetaAndPTETimes_params *CalculateMetaAndPTETimes_params = &mode_lib->scratch.CalculateMetaAndPTETimes_params;
+ struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params;
+ struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *calculate_bytes_to_fetch_required_to_hide_latency_params = &mode_lib->scratch.calculate_bytes_to_fetch_required_to_hide_latency_params;
+
+ unsigned int k;
+ bool must_support_iflip;
+ const long min_return_uclk_cycles = 83;
+ const long min_return_fclk_cycles = 75;
+ const double max_fclk_mhz = min_clk_table->max_clocks_khz.fclk / 1000.0;
+ double hard_minimum_dcfclk_mhz = (double)min_clk_table->dram_bw_table.entries[0].min_dcfclk_khz / 1000.0;
+ double max_uclk_mhz = 0;
+ double min_return_latency_in_DCFCLK_cycles = 0;
+
+ DML_LOG_VERBOSE("DML::%s: --- START --- \n", __func__);
+
+ memset(&mode_lib->scratch, 0, sizeof(struct dml2_core_internal_scratch));
+ memset(&mode_lib->mp, 0, sizeof(struct dml2_core_internal_mode_program));
+
+ s->num_active_planes = display_cfg->num_planes;
+ get_stream_output_bpp(s->OutputBpp, display_cfg);
+
+ mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(display_cfg->num_planes, cfg_support_info);
+ dml_calc_pipe_plane_mapping(cfg_support_info, mode_lib->mp.pipe_plane);
+
+ mode_lib->mp.Dcfclk = programming->min_clocks.dcn4x.active.dcfclk_khz / 1000.0;
+ mode_lib->mp.FabricClock = programming->min_clocks.dcn4x.active.fclk_khz / 1000.0;
+ mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table.dram_config);
+ mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4x.active.uclk_khz / 1000.0;
+ mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4x.dpprefclk_khz / 1000.0;
+ s->SOCCLK = (double)programming->min_clocks.dcn4x.socclk_khz / 1000;
+ mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4x.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params);
+ mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table);
+
+ for (k = 0; k < s->num_active_planes; ++k) {
+ unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index;
+ DML_ASSERT(cfg_support_info->stream_support_info[stream_index].odms_used <= 4);
+ DML_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4 ||
+ cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2 ||
+ cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1);
+
+ if (cfg_support_info->stream_support_info[stream_index].odms_used > 1)
+ DML_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1);
+
+ switch (cfg_support_info->stream_support_info[stream_index].odms_used) {
+ case (4):
+ mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_4to1;
+ break;
+ case (3):
+ mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_3to1;
+ break;
+ case (2):
+ mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_2to1;
+ break;
+ default:
+ if (cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4)
+ mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to4;
+ else if (cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2)
+ mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to2;
+ else
+ mode_lib->mp.ODMMode[k] = dml2_odm_mode_bypass;
+ break;
+ }
+ }
+
+ for (k = 0; k < s->num_active_planes; ++k) {
+ mode_lib->mp.NoOfDPP[k] = cfg_support_info->plane_support_info[k].dpps_used;
+ mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4x.dppclk_khz / 1000.0;
+ DML_ASSERT(mode_lib->mp.Dppclk[k] > 0);
+ }
+
+ for (k = 0; k < s->num_active_planes; ++k) {
+ unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index;
+ mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4x.dscclk_khz / 1000.0;
+ DML_LOG_VERBOSE("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]);
+ }
+
+ mode_lib->mp.Dispclk = programming->min_clocks.dcn4x.dispclk_khz / 1000.0;
+ mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4x.deepsleep_dcfclk_khz / 1000.0;
+
+ DML_ASSERT(mode_lib->mp.Dcfclk > 0);
+ DML_ASSERT(mode_lib->mp.FabricClock > 0);
+ DML_ASSERT(mode_lib->mp.dram_bw_mbps > 0);
+ DML_ASSERT(mode_lib->mp.uclk_freq_mhz > 0);
+ DML_ASSERT(mode_lib->mp.GlobalDPPCLK > 0);
+ DML_ASSERT(mode_lib->mp.Dispclk > 0);
+ DML_ASSERT(mode_lib->mp.DCFCLKDeepSleep > 0);
+ DML_ASSERT(s->SOCCLK > 0);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: num_active_planes = %u\n", __func__, s->num_active_planes);
+ DML_LOG_VERBOSE("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes);
+ DML_LOG_VERBOSE("DML::%s: Dcfclk = %f\n", __func__, mode_lib->mp.Dcfclk);
+ DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, mode_lib->mp.FabricClock);
+ DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->mp.dram_bw_mbps);
+ DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->mp.uclk_freq_mhz);
+ DML_LOG_VERBOSE("DML::%s: Dispclk = %f\n", __func__, mode_lib->mp.Dispclk);
+ for (k = 0; k < s->num_active_planes; ++k) {
+ DML_LOG_VERBOSE("DML::%s: Dppclk[%0d] = %f\n", __func__, k, mode_lib->mp.Dppclk[k]);
+ }
+ DML_LOG_VERBOSE("DML::%s: GlobalDPPCLK = %f\n", __func__, mode_lib->mp.GlobalDPPCLK);
+ DML_LOG_VERBOSE("DML::%s: DCFCLKDeepSleep = %f\n", __func__, mode_lib->mp.DCFCLKDeepSleep);
+ DML_LOG_VERBOSE("DML::%s: SOCCLK = %f\n", __func__, s->SOCCLK);
+ DML_LOG_VERBOSE("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index);
+ DML_LOG_VERBOSE("DML::%s: min_clk_table min_fclk_khz = %ld\n", __func__, min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz);
+ DML_LOG_VERBOSE("DML::%s: min_clk_table uclk_mhz = %f\n", __func__, dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config));
+ for (k = 0; k < mode_lib->mp.num_active_pipes; ++k) {
+ DML_LOG_VERBOSE("DML::%s: pipe=%d is in plane=%d\n", __func__, k, mode_lib->mp.pipe_plane[k]);
+ DML_LOG_VERBOSE("DML::%s: Per-plane DPPPerSurface[%0d] = %d\n", __func__, k, mode_lib->mp.NoOfDPP[k]);
+ }
+
+ for (k = 0; k < s->num_active_planes; k++)
+ DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
+#endif
+
+ CalculateMaxDETAndMinCompressedBufferSize(
+ mode_lib->ip.config_return_buffer_size_in_kbytes,
+ mode_lib->ip.config_return_buffer_segment_size_in_kbytes,
+ mode_lib->ip.rob_buffer_size_kbytes,
+ mode_lib->ip.max_num_dpp,
+ display_cfg->overrides.hw.force_nom_det_size_kbytes.enable,
+ display_cfg->overrides.hw.force_nom_det_size_kbytes.value,
+ mode_lib->ip.dcn_mrq_present,
+
+ /* Output */
+ &s->MaxTotalDETInKByte,
+ &s->NomDETInKByte,
+ &s->MinCompressedBufferSizeInKByte);
+
+
+ PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd);
+
+ for (k = 0; k < s->num_active_planes; ++k) {
+ CalculateSinglePipeDPPCLKAndSCLThroughput(
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
+ mode_lib->ip.max_dchub_pscl_bw_pix_per_clk,
+ mode_lib->ip.max_pscl_lb_bw_pix_per_clk,
+ ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
+ display_cfg->plane_descriptors[k].pixel_format,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps,
+
+ /* Output */
+ &mode_lib->mp.PSCL_THROUGHPUT[k],
+ &mode_lib->mp.PSCL_THROUGHPUT_CHROMA[k],
+ &mode_lib->mp.DPPCLKUsingSingleDPP[k]);
+ }
+
+ for (k = 0; k < s->num_active_planes; ++k) {
+ CalculateBytePerPixelAndBlockSizes(
+ display_cfg->plane_descriptors[k].pixel_format,
+ display_cfg->plane_descriptors[k].surface.tiling,
+ display_cfg->plane_descriptors[k].surface.plane0.pitch,
+ display_cfg->plane_descriptors[k].surface.plane1.pitch,
+
+ // Output
+ &mode_lib->mp.BytePerPixelY[k],
+ &mode_lib->mp.BytePerPixelC[k],
+ &mode_lib->mp.BytePerPixelInDETY[k],
+ &mode_lib->mp.BytePerPixelInDETC[k],
+ &mode_lib->mp.Read256BlockHeightY[k],
+ &mode_lib->mp.Read256BlockHeightC[k],
+ &mode_lib->mp.Read256BlockWidthY[k],
+ &mode_lib->mp.Read256BlockWidthC[k],
+ &mode_lib->mp.MacroTileHeightY[k],
+ &mode_lib->mp.MacroTileHeightC[k],
+ &mode_lib->mp.MacroTileWidthY[k],
+ &mode_lib->mp.MacroTileWidthC[k],
+ &mode_lib->mp.surf_linear128_l[k],
+ &mode_lib->mp.surf_linear128_c[k]);
+ }
+
+ CalculateSwathWidth(
+ display_cfg,
+ false, // ForceSingleDPP
+ s->num_active_planes,
+ mode_lib->mp.ODMMode,
+ mode_lib->mp.BytePerPixelY,
+ mode_lib->mp.BytePerPixelC,
+ mode_lib->mp.Read256BlockHeightY,
+ mode_lib->mp.Read256BlockHeightC,
+ mode_lib->mp.Read256BlockWidthY,
+ mode_lib->mp.Read256BlockWidthC,
+ mode_lib->mp.surf_linear128_l,
+ mode_lib->mp.surf_linear128_c,
+ mode_lib->mp.NoOfDPP,
+
+ /* Output */
+ mode_lib->mp.req_per_swath_ub_l,
+ mode_lib->mp.req_per_swath_ub_c,
+ mode_lib->mp.SwathWidthSingleDPPY,
+ mode_lib->mp.SwathWidthSingleDPPC,
+ mode_lib->mp.SwathWidthY,
+ mode_lib->mp.SwathWidthC,
+ s->dummy_integer_array[0], // unsigned int MaximumSwathHeightY[]
+ s->dummy_integer_array[1], // unsigned int MaximumSwathHeightC[]
+ mode_lib->mp.swath_width_luma_ub,
+ mode_lib->mp.swath_width_chroma_ub);
+
+ for (k = 0; k < s->num_active_planes; ++k) {
+ mode_lib->mp.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width * display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 /
+ ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000));
+ mode_lib->mp.vactive_sw_bw_l[k] = mode_lib->mp.SwathWidthSingleDPPY[k] * mode_lib->mp.BytePerPixelY[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
+ mode_lib->mp.vactive_sw_bw_c[k] = mode_lib->mp.SwathWidthSingleDPPC[k] * mode_lib->mp.BytePerPixelC[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
+ DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_l[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]);
+ DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_c[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]);
+ }
+
+ CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg;
+ CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = s->MaxTotalDETInKByte;
+ CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = s->MinCompressedBufferSizeInKByte;
+ CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false;
+ CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = s->num_active_planes;
+ CalculateSwathAndDETConfiguration_params->nomDETInKByte = s->NomDETInKByte;
+ CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->mp.vactive_sw_bw_l;
+ CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->mp.vactive_sw_bw_c;
+ CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = s->dummy_single_array[0];
+ CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = s->dummy_single_array[1];
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->mp.Read256BlockHeightY;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->mp.Read256BlockHeightC;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->mp.Read256BlockWidthY;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->mp.Read256BlockWidthC;
+ CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->mp.surf_linear128_l;
+ CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->mp.surf_linear128_c;
+ CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->mp.ODMMode;
+ CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->mp.NoOfDPP;
+ CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->mp.BytePerPixelY;
+ CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->mp.BytePerPixelC;
+ CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->mp.BytePerPixelInDETY;
+ CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->mp.BytePerPixelInDETC;
+ CalculateSwathAndDETConfiguration_params->mrq_present = mode_lib->ip.dcn_mrq_present;
+
+ // output
+ CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = mode_lib->mp.req_per_swath_ub_l;
+ CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = mode_lib->mp.req_per_swath_ub_c;
+ CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_long_array[0];
+ CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_long_array[1];
+ CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_long_array[2];
+ CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_long_array[3];
+ CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->mp.SwathHeightY;
+ CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->mp.SwathHeightC;
+ CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->mp.request_size_bytes_luma;
+ CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->mp.request_size_bytes_chroma;
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->mp.DETBufferSizeInKByte;
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY;
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC;
+ CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l;
+ CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c;
+ CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->mp.UnboundedRequestEnabled;
+ CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &mode_lib->mp.compbuf_reserved_space_64b;
+ CalculateSwathAndDETConfiguration_params->hw_debug5 = &mode_lib->mp.hw_debug5;
+ CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->mp.CompressedBufferSizeInkByte;
+ CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = &s->dummy_boolean_array[0][0];
+ CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[0];
+
+ // Calculate DET size, swath height here.
+ CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params);
+
+ // DSC Delay
+ for (k = 0; k < s->num_active_planes; ++k) {
+ mode_lib->mp.DSCDelay[k] = DSCDelayRequirement(cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].dsc_enable,
+ mode_lib->mp.ODMMode[k],
+ mode_lib->ip.maximum_dsc_bits_per_component,
+ s->OutputBpp[k],
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
+ cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].num_dsc_slices,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
+ ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
+ s->PixelClockBackEnd[k]);
+ }
+
+ // Prefetch
+ if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) {
+ for (k = 0; k < s->num_active_planes; ++k)
+ mode_lib->mp.SurfaceSizeInTheMALL[k] = 0;
+ } else {
+ CalculateSurfaceSizeInMall(
+ display_cfg,
+ s->num_active_planes,
+ mode_lib->soc.mall_allocated_for_dcn_mbytes,
+ mode_lib->mp.BytePerPixelY,
+ mode_lib->mp.BytePerPixelC,
+ mode_lib->mp.Read256BlockWidthY,
+ mode_lib->mp.Read256BlockWidthC,
+ mode_lib->mp.Read256BlockHeightY,
+ mode_lib->mp.Read256BlockHeightC,
+ mode_lib->mp.MacroTileWidthY,
+ mode_lib->mp.MacroTileWidthC,
+ mode_lib->mp.MacroTileHeightY,
+ mode_lib->mp.MacroTileHeightC,
+
+ /* Output */
+ mode_lib->mp.SurfaceSizeInTheMALL,
+ &s->dummy_boolean[0]); /* bool *ExceededMALLSize */
+ }
+
+ for (k = 0; k < s->num_active_planes; ++k) {
+ s->SurfaceParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+ s->SurfaceParameters[k].DPPPerSurface = mode_lib->mp.NoOfDPP[k];
+ s->SurfaceParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
+ s->SurfaceParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
+ s->SurfaceParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
+ s->SurfaceParameters[k].BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY[k];
+ s->SurfaceParameters[k].BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY[k];
+ s->SurfaceParameters[k].BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC[k];
+ s->SurfaceParameters[k].BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC[k];
+ s->SurfaceParameters[k].BlockWidthY = mode_lib->mp.MacroTileWidthY[k];
+ s->SurfaceParameters[k].BlockHeightY = mode_lib->mp.MacroTileHeightY[k];
+ s->SurfaceParameters[k].BlockWidthC = mode_lib->mp.MacroTileWidthC[k];
+ s->SurfaceParameters[k].BlockHeightC = mode_lib->mp.MacroTileHeightC[k];
+ s->SurfaceParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
+ s->SurfaceParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
+ s->SurfaceParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
+ s->SurfaceParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
+ s->SurfaceParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling;
+ s->SurfaceParameters[k].BytePerPixelY = mode_lib->mp.BytePerPixelY[k];
+ s->SurfaceParameters[k].BytePerPixelC = mode_lib->mp.BytePerPixelC[k];
+ s->SurfaceParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
+ s->SurfaceParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
+ s->SurfaceParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
+ s->SurfaceParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
+ s->SurfaceParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
+ s->SurfaceParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch;
+ s->SurfaceParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch;
+ s->SurfaceParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
+ s->SurfaceParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
+ s->SurfaceParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
+ s->SurfaceParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
+ s->SurfaceParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
+ s->SurfaceParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame;
+ s->SurfaceParameters[k].SwathHeightY = mode_lib->mp.SwathHeightY[k];
+ s->SurfaceParameters[k].SwathHeightC = mode_lib->mp.SwathHeightC[k];
+ s->SurfaceParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch;
+ s->SurfaceParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch;
+ }
+
+ CalculateVMRowAndSwath_params->display_cfg = display_cfg;
+ CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = s->num_active_planes;
+ CalculateVMRowAndSwath_params->myPipe = s->SurfaceParameters;
+ CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->mp.SurfaceSizeInTheMALL;
+ CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
+ CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma;
+ CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes;
+ CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->mp.SwathWidthY;
+ CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->mp.SwathWidthC;
+ CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
+ CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes;
+ CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present;
+
+ // output
+ CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0];
+ CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = mode_lib->mp.dpte_row_width_luma_ub;
+ CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = mode_lib->mp.dpte_row_width_chroma_ub;
+ CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->mp.dpte_row_height;
+ CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->mp.dpte_row_height_chroma;
+ CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = mode_lib->mp.dpte_row_height_linear;
+ CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = mode_lib->mp.dpte_row_height_linear_chroma;
+ CalculateVMRowAndSwath_params->vm_group_bytes = mode_lib->mp.vm_group_bytes;
+ CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes;
+ CalculateVMRowAndSwath_params->PixelPTEReqWidthY = mode_lib->mp.PixelPTEReqWidthY;
+ CalculateVMRowAndSwath_params->PixelPTEReqHeightY = mode_lib->mp.PixelPTEReqHeightY;
+ CalculateVMRowAndSwath_params->PTERequestSizeY = mode_lib->mp.PTERequestSizeY;
+ CalculateVMRowAndSwath_params->PixelPTEReqWidthC = mode_lib->mp.PixelPTEReqWidthC;
+ CalculateVMRowAndSwath_params->PixelPTEReqHeightC = mode_lib->mp.PixelPTEReqHeightC;
+ CalculateVMRowAndSwath_params->PTERequestSizeC = mode_lib->mp.PTERequestSizeC;
+ CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y;
+ CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y;
+ CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c;
+ CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c;
+ CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = mode_lib->mp.dpde0_bytes_per_frame_ub_l;
+ CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = mode_lib->mp.dpde0_bytes_per_frame_ub_c;
+ CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->mp.PrefetchSourceLinesY;
+ CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->mp.PrefetchSourceLinesC;
+ CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->mp.VInitPreFillY;
+ CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->mp.VInitPreFillC;
+ CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->mp.MaxNumSwathY;
+ CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->mp.MaxNumSwathC;
+ CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
+ CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->mp.PixelPTEBytesPerRow;
+ CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
+ CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
+ CalculateVMRowAndSwath_params->vm_bytes = mode_lib->mp.vm_bytes;
+ CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->mp.use_one_row_for_frame;
+ CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->mp.use_one_row_for_frame_flip;
+ CalculateVMRowAndSwath_params->is_using_mall_for_ss = mode_lib->mp.is_using_mall_for_ss;
+ CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = mode_lib->mp.PTE_BUFFER_MODE;
+ CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = mode_lib->mp.BIGK_FRAGMENT_SIZE;
+ CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = s->dummy_boolean_array[1];
+ CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->mp.meta_row_bw;
+ CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->mp.meta_row_bytes;
+ CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
+ CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
+ CalculateVMRowAndSwath_params->meta_req_width_luma = mode_lib->mp.meta_req_width;
+ CalculateVMRowAndSwath_params->meta_req_height_luma = mode_lib->mp.meta_req_height;
+ CalculateVMRowAndSwath_params->meta_row_width_luma = mode_lib->mp.meta_row_width;
+ CalculateVMRowAndSwath_params->meta_row_height_luma = mode_lib->mp.meta_row_height;
+ CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = mode_lib->mp.meta_pte_bytes_per_frame_ub_l;
+ CalculateVMRowAndSwath_params->meta_req_width_chroma = mode_lib->mp.meta_req_width_chroma;
+ CalculateVMRowAndSwath_params->meta_row_height_chroma = mode_lib->mp.meta_row_height_chroma;
+ CalculateVMRowAndSwath_params->meta_row_width_chroma = mode_lib->mp.meta_row_width_chroma;
+ CalculateVMRowAndSwath_params->meta_req_height_chroma = mode_lib->mp.meta_req_height_chroma;
+ CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = mode_lib->mp.meta_pte_bytes_per_frame_ub_c;
+
+ CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params);
+
+ memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params));
+ if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0 || mode_lib->ip.dcn_mrq_present) {
+ for (k = 0; k < s->num_active_planes; k++) {
+ mode_lib->mp.mall_prefetch_sdp_overhead_factor[k] = 1.0;
+ mode_lib->mp.mall_prefetch_dram_overhead_factor[k] = 1.0;
+ mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0;
+ mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0;
+ mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0;
+ mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0;
+ }
+ } else {
+ for (k = 0; k < s->num_active_planes; k++) {
+ calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
+ calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count;
+ calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes;
+ calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes;
+ calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes;
+ calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
+ calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
+
+ calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format;
+ calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle);
+ calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
+ calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling;
+ calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall;
+
+ calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
+ calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
+ calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
+ calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
+ calculate_mcache_setting_params->blk_width_l = mode_lib->mp.MacroTileWidthY[k];
+ calculate_mcache_setting_params->blk_height_l = mode_lib->mp.MacroTileHeightY[k];
+ calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k];
+ calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k];
+ calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k];
+ calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->mp.BytePerPixelY[k];
+
+ calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
+ calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
+ calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width;
+ calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
+ calculate_mcache_setting_params->blk_width_c = mode_lib->mp.MacroTileWidthC[k];
+ calculate_mcache_setting_params->blk_height_c = mode_lib->mp.MacroTileHeightC[k];
+ calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k];
+ calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k];
+ calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k];
+ calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->mp.BytePerPixelC[k];
+
+ // output
+ calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k];
+ calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k];
+ calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k];
+ calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k];
+
+ calculate_mcache_setting_params->num_mcaches_l = &mode_lib->mp.num_mcaches_l[k];
+ calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->mp.mcache_row_bytes_l[k];
+ calculate_mcache_setting_params->mcache_row_bytes_per_channel_l = &mode_lib->mp.mcache_row_bytes_per_channel_l[k];
+ calculate_mcache_setting_params->mcache_offsets_l = mode_lib->mp.mcache_offsets_l[k];
+ calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->mp.mcache_shift_granularity_l[k];
+
+ calculate_mcache_setting_params->num_mcaches_c = &mode_lib->mp.num_mcaches_c[k];
+ calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->mp.mcache_row_bytes_c[k];
+ calculate_mcache_setting_params->mcache_row_bytes_per_channel_c = &mode_lib->mp.mcache_row_bytes_per_channel_c[k];
+ calculate_mcache_setting_params->mcache_offsets_c = mode_lib->mp.mcache_offsets_c[k];
+ calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->mp.mcache_shift_granularity_c[k];
+
+ calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->mp.mall_comb_mcache_l[k];
+ calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->mp.mall_comb_mcache_c[k];
+ calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->mp.lc_comb_mcache[k];
+ calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params);
+ }
+
+ calculate_mall_bw_overhead_factor(
+ mode_lib->mp.mall_prefetch_sdp_overhead_factor,
+ mode_lib->mp.mall_prefetch_dram_overhead_factor,
+
+ // input
+ display_cfg,
+ s->num_active_planes);
+ }
+
+ // Calculate all the bandwidth availabe
+ calculate_bandwidth_available(
+ mode_lib->mp.avg_bandwidth_available_min,
+ mode_lib->mp.avg_bandwidth_available,
+ mode_lib->mp.urg_bandwidth_available_min,
+ mode_lib->mp.urg_bandwidth_available,
+ mode_lib->mp.urg_bandwidth_available_vm_only,
+ mode_lib->mp.urg_bandwidth_available_pixel_and_vm,
+
+ &mode_lib->soc,
+ display_cfg->hostvm_enable,
+ mode_lib->mp.Dcfclk,
+ mode_lib->mp.FabricClock,
+ mode_lib->mp.dram_bw_mbps);
+
+
+ calculate_hostvm_inefficiency_factor(
+ &s->HostVMInefficiencyFactor,
+ &s->HostVMInefficiencyFactorPrefetch,
+
+ display_cfg->gpuvm_enable,
+ display_cfg->hostvm_enable,
+ mode_lib->ip.remote_iommu_outstanding_translations,
+ mode_lib->soc.max_outstanding_reqs,
+ mode_lib->mp.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active],
+ mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]);
+
+ s->TotalDCCActiveDPP = 0;
+ s->TotalActiveDPP = 0;
+ for (k = 0; k < s->num_active_planes; ++k) {
+ s->TotalActiveDPP = s->TotalActiveDPP + mode_lib->mp.NoOfDPP[k];
+ if (display_cfg->plane_descriptors[k].surface.dcc.enable)
+ s->TotalDCCActiveDPP = s->TotalDCCActiveDPP + mode_lib->mp.NoOfDPP[k];
+ }
+ // Calculate tdlut schedule related terms
+ for (k = 0; k <= s->num_active_planes - 1; k++) {
+ calculate_tdlut_setting_params->dispclk_mhz = mode_lib->mp.Dispclk;
+ calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
+ calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode;
+ calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode;
+ calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size;
+ calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
+ calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
+
+ // output
+ calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k];
+ calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k];
+ calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k];
+ calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k];
+ calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k];
+ calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k];
+ calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k];
+ calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params);
+ }
+
+ if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3)
+ s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes));
+
+ CalculateExtraLatency(
+ display_cfg,
+ mode_lib->ip.rob_buffer_size_kbytes,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles,
+ s->ReorderingBytes,
+ mode_lib->mp.Dcfclk,
+ mode_lib->mp.FabricClock,
+ mode_lib->ip.pixel_chunk_size_kbytes,
+ mode_lib->mp.urg_bandwidth_available_min[dml2_core_internal_soc_state_sys_active],
+ s->num_active_planes,
+ mode_lib->mp.NoOfDPP,
+ mode_lib->mp.dpte_group_bytes,
+ s->tdlut_bytes_per_group,
+ s->HostVMInefficiencyFactor,
+ s->HostVMInefficiencyFactorPrefetch,
+ mode_lib->soc.hostvm_min_page_size_kbytes,
+ mode_lib->soc.qos_parameters.qos_type,
+ !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable),
+ mode_lib->soc.max_outstanding_reqs,
+ mode_lib->mp.request_size_bytes_luma,
+ mode_lib->mp.request_size_bytes_chroma,
+ mode_lib->ip.meta_chunk_size_kbytes,
+ mode_lib->ip.dchub_arb_to_ret_delay,
+ mode_lib->mp.TripToMemory,
+ mode_lib->ip.hostvm_mode,
+
+ // output
+ &mode_lib->mp.ExtraLatency,
+ &mode_lib->mp.ExtraLatency_sr,
+ &mode_lib->mp.ExtraLatencyPrefetch);
+
+ mode_lib->mp.TCalc = 24.0 / mode_lib->mp.DCFCLKDeepSleep;
+
+ for (k = 0; k < s->num_active_planes; ++k) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
+ mode_lib->mp.WritebackDelay[k] =
+ mode_lib->soc.qos_parameters.writeback.base_latency_us
+ + CalculateWriteBackDelay(
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->mp.Dispclk;
+ } else
+ mode_lib->mp.WritebackDelay[k] = 0;
+ }
+
+ /* VActive bytes to fetch for UCLK P-State */
+ calculate_bytes_to_fetch_required_to_hide_latency_params->display_cfg = display_cfg;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->mrq_present = mode_lib->ip.dcn_mrq_present;
+
+ calculate_bytes_to_fetch_required_to_hide_latency_params->num_active_planes = s->num_active_planes;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->num_of_dpp = mode_lib->mp.NoOfDPP;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_l = mode_lib->mp.meta_row_height;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_c = mode_lib->mp.meta_row_height_chroma;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_l = mode_lib->mp.dpte_row_height;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_c = mode_lib->mp.dpte_row_height_chroma;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_l = mode_lib->mp.BytePerPixelY;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_c = mode_lib->mp.BytePerPixelC;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_l = mode_lib->mp.SwathWidthY;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_c = mode_lib->mp.SwathWidthC;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_l = mode_lib->mp.SwathHeightY;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_c = mode_lib->mp.SwathHeightC;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->latency_to_hide_us[0] = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
+
+ /* outputs */
+ calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_l = s->pstate_bytes_required_l[dml2_pstate_type_uclk];
+ calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_c = s->pstate_bytes_required_c[dml2_pstate_type_uclk];
+
+ calculate_bytes_to_fetch_required_to_hide_latency(calculate_bytes_to_fetch_required_to_hide_latency_params);
+
+ /* Excess VActive bandwidth required to fill DET */
+ calculate_excess_vactive_bandwidth_required(
+ display_cfg,
+ s->num_active_planes,
+ s->pstate_bytes_required_l[dml2_pstate_type_uclk],
+ s->pstate_bytes_required_c[dml2_pstate_type_uclk],
+ /* outputs */
+ mode_lib->mp.excess_vactive_fill_bw_l,
+ mode_lib->mp.excess_vactive_fill_bw_c);
+
+ mode_lib->mp.UrgentLatency = CalculateUrgentLatency(
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us,
+ mode_lib->soc.do_urgent_latency_adjustment,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz,
+ mode_lib->mp.FabricClock,
+ mode_lib->mp.uclk_freq_mhz,
+ mode_lib->soc.qos_parameters.qos_type,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
+
+ mode_lib->mp.TripToMemory = CalculateTripToMemory(
+ mode_lib->mp.UrgentLatency,
+ mode_lib->mp.FabricClock,
+ mode_lib->mp.uclk_freq_mhz,
+ mode_lib->soc.qos_parameters.qos_type,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
+
+ mode_lib->mp.TripToMemory = math_max2(mode_lib->mp.UrgentLatency, mode_lib->mp.TripToMemory);
+
+ mode_lib->mp.MetaTripToMemory = CalculateMetaTripToMemory(
+ mode_lib->mp.UrgentLatency,
+ mode_lib->mp.FabricClock,
+ mode_lib->mp.uclk_freq_mhz,
+ mode_lib->soc.qos_parameters.qos_type,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.meta_trip_adder_fclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
+
+ for (k = 0; k < s->num_active_planes; ++k) {
+ bool cursor_not_enough_urgent_latency_hiding = false;
+ s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
+ ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+
+ s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format;
+
+ s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
+ mode_lib->mp.NoOfDPP[k],
+ display_cfg->plane_descriptors[k].composition.viewport.plane0.width,
+ display_cfg->plane_descriptors[k].composition.viewport.plane0.height,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
+ display_cfg->plane_descriptors[k].composition.rotation_angle);
+
+ s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
+ mode_lib->mp.NoOfDPP[k],
+ display_cfg->plane_descriptors[k].composition.viewport.plane1.width,
+ display_cfg->plane_descriptors[k].composition.viewport.plane1.height,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
+ display_cfg->plane_descriptors[k].composition.rotation_angle);
+
+ if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) {
+ calculate_cursor_req_attributes(
+ display_cfg->plane_descriptors[k].cursor.cursor_width,
+ display_cfg->plane_descriptors[k].cursor.cursor_bpp,
+
+ // output
+ &s->cursor_lines_per_chunk[k],
+ &s->cursor_bytes_per_line[k],
+ &s->cursor_bytes_per_chunk[k],
+ &s->cursor_bytes[k]);
+
+ calculate_cursor_urgent_burst_factor(
+ mode_lib->ip.cursor_buffer_size,
+ display_cfg->plane_descriptors[k].cursor.cursor_width,
+ s->cursor_bytes_per_chunk[k],
+ s->cursor_lines_per_chunk[k],
+ s->line_times[k],
+ mode_lib->mp.UrgentLatency,
+
+ // output
+